36 lines
		
	
	
		
			668 B
		
	
	
	
		
			Markdown
		
	
	
	
		
		
			
		
	
	
			36 lines
		
	
	
		
			668 B
		
	
	
	
		
			Markdown
		
	
	
	
| 
								 | 
							
								## 模型
							 | 
						||
| 
								 | 
							
								"qwen/Qwen-1_8B-Chat"
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								## 
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								完整的token
							 | 
						||
| 
								 | 
							
								```
							 | 
						||
| 
								 | 
							
								# <|im_start|>system
							 | 
						||
| 
								 | 
							
								# You are a helpful assistant.<|im_end|>
							 | 
						||
| 
								 | 
							
								# <|im_start|>user
							 | 
						||
| 
								 | 
							
								# 东南亚国家日本的首都是什么市<|im_end|>
							 | 
						||
| 
								 | 
							
								# <|im_start|>assistant
							 | 
						||
| 
								 | 
							
								# 日本的首都东京。<|im_end|><|endoftext|>
							 | 
						||
| 
								 | 
							
								```
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								生成的token
							 | 
						||
| 
								 | 
							
								```
							 | 
						||
| 
								 | 
							
								27: 日本
							 | 
						||
| 
								 | 
							
								28: 的
							 | 
						||
| 
								 | 
							
								29: 首都
							 | 
						||
| 
								 | 
							
								30: 东京
							 | 
						||
| 
								 | 
							
								31: 。
							 | 
						||
| 
								 | 
							
								32: <|im_end|>
							 | 
						||
| 
								 | 
							
								33: <|endoftext|>
							 | 
						||
| 
								 | 
							
								```
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								```python
							 | 
						||
| 
								 | 
							
								qk = query @ key.transpose(-2, -1) -> [1, 16, 27, 27]
							 | 
						||
| 
								 | 
							
								qk = qk[0] -> [16, 27, 27]
							 | 
						||
| 
								 | 
							
								show.DumpTensorToImage(qk,"q_matmul_k_layer_"+str(self.index)+".png")
							 | 
						||
| 
								 | 
							
								```
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								sequence : 表示当前生成的第几个token
							 | 
						||
| 
								 | 
							
								layer : 表示是第几层的decoder
							 |