diff --git a/chatglm/modeling_chatglm.py b/chatglm/modeling_chatglm.py
index f5c7e03..292861d 100644
--- a/chatglm/modeling_chatglm.py
+++ b/chatglm/modeling_chatglm.py
@@ -19,6 +19,7 @@ from transformers.configuration_utils import PretrainedConfig
 from transformers.generation import GenerationConfig
 
 from chatglm import ChatGLMConfig
+from tools import show
 
 WEIGHTS_INDEX_NAME = "pytorch_model.bin.index.json"
 
@@ -502,9 +503,7 @@ class ChatGLMModel(nn.Module):
         # Rotary positional embeddings
         rotary_pos_emb = self.rotary_pos_emb(self.seq_length)
 
-        from tools import show
-
-        show.DumpTensorToImage(rotary_pos_emb[:, :, 0], "plot.png", scale=0.1)
+        # show.DumpTensorToImage(rotary_pos_emb[:, :, 0], "rotary_pos_emb.png", scale=0.1)
 
         if position_ids is not None:
             rotary_pos_emb = rotary_pos_emb[position_ids]
@@ -753,9 +752,8 @@ class ChatGLMForConditionalGeneration(nn.Module):
                 return_dict=True,
                 output_hidden_states=output_hidden_states,
             )
-            next_token_logits = logits[:, -1, :]
-            next_token_scores = next_token_logits
-            probs = nn.functional.softmax(next_token_scores, dim=-1)
+            next_token_logits = logits[:, 0, :]
+            probs = nn.functional.softmax(next_token_logits, dim=-1)
             next_tokens = torch.multinomial(probs, num_samples=1).squeeze(1)
 
             # finished sentences should have their next token be a padding token
diff --git a/rotary_pos_emb.png b/rotary_pos_emb.png
new file mode 100644
index 0000000..fa2e598
Binary files /dev/null and b/rotary_pos_emb.png differ
diff --git a/tools/show.py b/tools/show.py
index 40d41bc..3117c82 100644
--- a/tools/show.py
+++ b/tools/show.py
@@ -21,32 +21,3 @@ def DumpTensorToImage(tensor, name, autoPad=True, scale=1.0):
         img = cv2.resize(img, [int(srp[0] * scale), int(srp[1] * scale)])
     srp = img.shape
     cv2.imwrite(name, img)
-
-
-# def DumpTensorToImage(tensor, name, autoPad=True, scale=1.0):
-#     if len(tensor.shape) != 2:
-#         raise ("Error input dims")
-#     tensor = tensor.float()
-#     maxv = torch.max(tensor)
-#     minv = torch.min(tensor)
-#     tensor = (((tensor - minv) / (maxv - minv)) * 256).byte().cpu()
-#     srp = tensor.shape
-#     if autoPad and (max(srp) / min(srp) > 16):
-#         if srp[0] == min(srp):
-#             tensor = F.pad(tensor, [max(srp) - min(srp), 0], "replicate")
-#         else:
-#             tensor = F.pad(tensor, [0, max(srp) - min(srp)], "replicate")
-#     srp = tensor.shape
-
-#     tensor = tensor.unsqueeze(0)
-#     if scale != 1.0:
-#         tensor = Vision.resize(tensor, [int(srp[0] * scale), int(srp[1] * scale)])
-#     tensor = tensor.view([int(srp[0] * scale), int(srp[1] * scale)])
-#     srp = tensor.shape
-
-#     w = 1024 if max(srp) > 1024 else max(srp)
-#     scale = max(srp) / w
-#     # img = px.imshow(tensor)
-#     # img.write_image(name)
-#     cv2.imwrite(name, tensor.numpy())
-#     cv2.CreateMat(name, tensor.numpy())