Refine modeling and demo.

2024-01-14 17:21:14 +08:00 · 2024-01-14 17:21:14 +08:00 · 90fbc2642e
parent 332d27cc05
commit 90fbc2642e
3 changed files with 61 additions and 13 deletions
--- a/doc/q_matmul_k/q_matmul_k.md
+++ b/doc/q_matmul_k/q_matmul_k.md
@ -0,0 +1,42 @@
 # q matmul k
 ## model
 qwen/Qwen-1_8B-Chat
 ## input
 ```python
 <|im_start|>system
 You are a helpful assistant.<|im_end|>
 <|im_start|>user
 你好<|im_end|>
 <|im_start|>assistant
 莎是现代汉语的男性的名字，出自《诗经》中的“采采卷耳
 ```
 ![](q_matmul_k_layer_0.png "")
 ![](q_matmul_k_layer_1.png "")
 ![](q_matmul_k_layer_2.png "")
 ![](q_matmul_k_layer_3.png "")
 ![](q_matmul_k_layer_4.png "")
 ![](q_matmul_k_layer_5.png "")
 ![](q_matmul_k_layer_6.png "")
 ![](q_matmul_k_layer_7.png "")
 ![](q_matmul_k_layer_8.png "")
 ![](q_matmul_k_layer_9.png "")
 ![](q_matmul_k_layer_10.png "")
 ![](q_matmul_k_layer_11.png "")
 ![](q_matmul_k_layer_12.png "")
 ![](q_matmul_k_layer_13.png "")
 ![](q_matmul_k_layer_14.png "")
 ![](q_matmul_k_layer_15.png "")
 ![](q_matmul_k_layer_16.png "")
 ![](q_matmul_k_layer_17.png "")
 ![](q_matmul_k_layer_18.png "")
 ![](q_matmul_k_layer_19.png "")
 ![](q_matmul_k_layer_20.png "")
 ![](q_matmul_k_layer_21.png "")
 ![](q_matmul_k_layer_22.png "")
 ![](q_matmul_k_layer_23.png "")
--- a/qwen/demo.py
+++ b/qwen/demo.py
@ -64,26 +64,28 @@ model = model.from_pretrained(
 # )
 # 第一轮对话
-response, history, decode_tokens = model.chat(tokenizer, "你好", "莎是现代汉语的男性的名字，出自《诗经》中的“采采卷", history=None)
+response, history, decode_tokens = model.chat(tokenizer, "东南亚国家日本的首都是什么市", "", history=None)
 print(decode_tokens)
-# 你好！很高兴为你提供帮助。
+# <|im_start|>system
 # You are a helpful assistant.<|im_end|>
 # <|im_start|>user
 # 东南亚国家日本的首都是什么市<|im_end|>
 # <|im_start|>assistant
 # 日本的首都东京。<|im_end|><|endoftext|>
 # # 第一轮对话
 # response, history, decode_tokens = model.chat(tokenizer, "你好", "", history=None)
 # print(decode_tokens)
 # # 你好！很高兴为你提供帮助。
 # 第二轮对话
 # response, history = model.chat(tokenizer, "给我讲一个年轻人奋斗创业最终取得成功的故事。", history=None)
 # print(response)
 # <|im_start|>system
 # You are a helpful assistant.<|im_end|>
 # <|im_start|>user
 # 你好<|im_end|>
 # <|im_start|>assistant
 # 莎是现代汉语的男性的名字，出自《诗经》中的“采采卷耳
 # <|im_start|>system
 # You are a helpful assistant.<|im_end|>
 # <|im_start|>user
--- a/qwen/modeling_qwen.py
+++ b/qwen/modeling_qwen.py
@ -148,6 +148,10 @@ class QWenAttention(nn.Module):
        # qk = query @ key.transpose(-2, -1)
        # qk = qk[0]
        # show.DumpTensorToImage(qk,"q_matmul_k_layer_"+str(self.index)+".png")
        # print("layer:" + str(self.index) + "  query.shape:"+ str(query.shape))
        # print("layer:" + str(self.index) + "  key.shape:"+ str(key.shape))
        # print("layer:" + str(self.index) + "  value.shape:"+ str(value.shape))
        # print("\n")
        attn_output = F.scaled_dot_product_attention(query, key, value, attn_mask=attention_mask).transpose(1, 2)
        context_layer = self._merge_heads(attn_output, self.num_heads, self.head_dim)