Refine modeling and demo.

This commit is contained in:
Colin 2024-01-14 17:21:14 +08:00
parent 332d27cc05
commit 90fbc2642e
3 changed files with 61 additions and 13 deletions

View File

@ -0,0 +1,42 @@
# q matmul k
## model
qwen/Qwen-1_8B-Chat
## input
```python
<|im_start|>system
You are a helpful assistant.<|im_end|>
<|im_start|>user
你好<|im_end|>
<|im_start|>assistant
莎是现代汉语的男性的名字,出自《诗经》中的“采采卷耳
```
![](q_matmul_k_layer_0.png "")
![](q_matmul_k_layer_1.png "")
![](q_matmul_k_layer_2.png "")
![](q_matmul_k_layer_3.png "")
![](q_matmul_k_layer_4.png "")
![](q_matmul_k_layer_5.png "")
![](q_matmul_k_layer_6.png "")
![](q_matmul_k_layer_7.png "")
![](q_matmul_k_layer_8.png "")
![](q_matmul_k_layer_9.png "")
![](q_matmul_k_layer_10.png "")
![](q_matmul_k_layer_11.png "")
![](q_matmul_k_layer_12.png "")
![](q_matmul_k_layer_13.png "")
![](q_matmul_k_layer_14.png "")
![](q_matmul_k_layer_15.png "")
![](q_matmul_k_layer_16.png "")
![](q_matmul_k_layer_17.png "")
![](q_matmul_k_layer_18.png "")
![](q_matmul_k_layer_19.png "")
![](q_matmul_k_layer_20.png "")
![](q_matmul_k_layer_21.png "")
![](q_matmul_k_layer_22.png "")
![](q_matmul_k_layer_23.png "")

View File

@ -64,26 +64,28 @@ model = model.from_pretrained(
# )
# 第一轮对话
response, history, decode_tokens = model.chat(tokenizer, "你好", "莎是现代汉语的男性的名字,出自《诗经》中的“采采卷", history=None)
response, history, decode_tokens = model.chat(tokenizer, "东南亚国家日本的首都是什么市", "", history=None)
print(decode_tokens)
# 你好!很高兴为你提供帮助。
# <|im_start|>system
# You are a helpful assistant.<|im_end|>
# <|im_start|>user
# 东南亚国家日本的首都是什么市<|im_end|>
# <|im_start|>assistant
# 日本的首都东京。<|im_end|><|endoftext|>
# # 第一轮对话
# response, history, decode_tokens = model.chat(tokenizer, "你好", "", history=None)
# print(decode_tokens)
# # 你好!很高兴为你提供帮助。
# 第二轮对话
# response, history = model.chat(tokenizer, "给我讲一个年轻人奋斗创业最终取得成功的故事。", history=None)
# print(response)
# <|im_start|>system
# You are a helpful assistant.<|im_end|>
# <|im_start|>user
# 你好<|im_end|>
# <|im_start|>assistant
# 莎是现代汉语的男性的名字,出自《诗经》中的“采采卷耳
# <|im_start|>system
# You are a helpful assistant.<|im_end|>
# <|im_start|>user

View File

@ -148,6 +148,10 @@ class QWenAttention(nn.Module):
# qk = query @ key.transpose(-2, -1)
# qk = qk[0]
# show.DumpTensorToImage(qk,"q_matmul_k_layer_"+str(self.index)+".png")
# print("layer:" + str(self.index) + " query.shape:"+ str(query.shape))
# print("layer:" + str(self.index) + " key.shape:"+ str(key.shape))
# print("layer:" + str(self.index) + " value.shape:"+ str(value.shape))
# print("\n")
attn_output = F.scaled_dot_product_attention(query, key, value, attn_mask=attention_mask).transpose(1, 2)
context_layer = self._merge_heads(attn_output, self.num_heads, self.head_dim)