Add output_layer_weight dump.
|  | @ -74,13 +74,13 @@ Linear(intermediate_parallel)  no bias  ->  [6, 1, 4096] | |||
| [6, 1, 32, 128] <-  q   k   v | ||||
|                    /    |    \ | ||||
|               pos_emb pos_emb \ | ||||
|                   |     |      \ | ||||
|                   |     |      | | ||||
|                   |   expand  expand   -> [6, 1, 32, 128] | ||||
|                    \   /       | | ||||
|                     dot        | | ||||
|                   softmax     / | ||||
|                        \     / | ||||
|                          dot           ->  [1, 32, 6, 128] | ||||
|                          dot           ->  [1, 32, 6, 128]  ->  [6, 1, 4096] | ||||
|                         Linear         ->  [6, 1, 4096] | ||||
| 
 | ||||
| hidden_states: [s, b, h] | ||||
|  |  | |||
|  | @ -452,6 +452,15 @@ class ChatGLMModel(nn.Module): | |||
|         hidden_states = self.encoder(inputs_embeds, rotary_pos_emb) | ||||
|         hidden_states = hidden_states[-1:] | ||||
|         lm_logits = self.output_layer(hidden_states) | ||||
| 
 | ||||
|         # for i in range(16): | ||||
|         #     show.DumpTensorToImage( | ||||
|         #         self.output_layer.weight[ | ||||
|         #             int(i * (65024 / 16)) : int((i + 1) * (65024 / 16)), : | ||||
|         #         ], | ||||
|         #         "generated/output_layer_weight_slice" + str(i) + ".png", | ||||
|         #     ) | ||||
| 
 | ||||
|         lm_logits = lm_logits.transpose(0, 1).contiguous() | ||||
| 
 | ||||
|         return lm_logits | ||||
|  |  | |||
| After Width: | Height: | Size: 13 MiB | 
| After Width: | Height: | Size: 12 MiB | 
| After Width: | Height: | Size: 12 MiB | 
| After Width: | Height: | Size: 11 MiB | 
| After Width: | Height: | Size: 11 MiB | 
| After Width: | Height: | Size: 13 MiB | 
| After Width: | Height: | Size: 12 MiB | 
| After Width: | Height: | Size: 11 MiB | 
| After Width: | Height: | Size: 13 MiB | 
| After Width: | Height: | Size: 12 MiB | 
| After Width: | Height: | Size: 12 MiB | 
| After Width: | Height: | Size: 12 MiB | 
| After Width: | Height: | Size: 12 MiB | 
| After Width: | Height: | Size: 12 MiB | 
| After Width: | Height: | Size: 12 MiB | 
| After Width: | Height: | Size: 12 MiB | 
|  | @ -19,12 +19,12 @@ def DumpTensorToImage(tensor, name, autoPad=True, scale=1.0, auto2d=True): | |||
|     img = tensor.numpy() | ||||
|     srp = img.shape | ||||
| 
 | ||||
|     if auto2d and len(srp) == 1: | ||||
|     if auto2d and len(srp) == 1:  # 1D的数据自动折叠成2D图像 | ||||
|         ceiled = math.ceil((srp[0]) ** 0.5) | ||||
|         img = cv2.copyMakeBorder(img, 0, ceiled * ceiled - srp[0], 0, 0, 0) | ||||
|         img = img.reshape((ceiled, ceiled)) | ||||
|         srp = img.shape | ||||
|     if autoPad and (max(srp) / min(srp) > 16): | ||||
|     if autoPad and (max(srp) / min(srp) > 16):  # 自动拉伸成正方形 | ||||
|         img = cv2.resize(img, [max(srp), max(srp)]) | ||||
|         srp = img.shape | ||||
|     if scale != 1.0: | ||||
|  |  | |||