diff --git a/Readme.md b/Readme.md index d30d939..ba325c4 100644 --- a/Readme.md +++ b/Readme.md @@ -74,13 +74,13 @@ Linear(intermediate_parallel) no bias -> [6, 1, 4096] [6, 1, 32, 128] <- q k v / | \ pos_emb pos_emb \ - | | \ + | | | | expand expand -> [6, 1, 32, 128] \ / | dot | softmax / \ / - dot -> [1, 32, 6, 128] + dot -> [1, 32, 6, 128] -> [6, 1, 4096] Linear -> [6, 1, 4096] hidden_states: [s, b, h] diff --git a/chatglm/modeling_chatglm.py b/chatglm/modeling_chatglm.py index dfa4194..9729dd3 100644 --- a/chatglm/modeling_chatglm.py +++ b/chatglm/modeling_chatglm.py @@ -452,6 +452,15 @@ class ChatGLMModel(nn.Module): hidden_states = self.encoder(inputs_embeds, rotary_pos_emb) hidden_states = hidden_states[-1:] lm_logits = self.output_layer(hidden_states) + + # for i in range(16): + # show.DumpTensorToImage( + # self.output_layer.weight[ + # int(i * (65024 / 16)) : int((i + 1) * (65024 / 16)), : + # ], + # "generated/output_layer_weight_slice" + str(i) + ".png", + # ) + lm_logits = lm_logits.transpose(0, 1).contiguous() return lm_logits diff --git a/generated/output_layer_weight_slice0.png b/generated/output_layer_weight_slice0.png new file mode 100644 index 0000000..57e6ffb Binary files /dev/null and b/generated/output_layer_weight_slice0.png differ diff --git a/generated/output_layer_weight_slice1.png b/generated/output_layer_weight_slice1.png new file mode 100644 index 0000000..00db2ab Binary files /dev/null and b/generated/output_layer_weight_slice1.png differ diff --git a/generated/output_layer_weight_slice10.png b/generated/output_layer_weight_slice10.png new file mode 100644 index 0000000..433efec Binary files /dev/null and b/generated/output_layer_weight_slice10.png differ diff --git a/generated/output_layer_weight_slice11.png b/generated/output_layer_weight_slice11.png new file mode 100644 index 0000000..7358f57 Binary files /dev/null and b/generated/output_layer_weight_slice11.png differ diff --git a/generated/output_layer_weight_slice12.png b/generated/output_layer_weight_slice12.png new file mode 100644 index 0000000..31f272b Binary files /dev/null and b/generated/output_layer_weight_slice12.png differ diff --git a/generated/output_layer_weight_slice13.png b/generated/output_layer_weight_slice13.png new file mode 100644 index 0000000..4881263 Binary files /dev/null and b/generated/output_layer_weight_slice13.png differ diff --git a/generated/output_layer_weight_slice14.png b/generated/output_layer_weight_slice14.png new file mode 100644 index 0000000..6763aeb Binary files /dev/null and b/generated/output_layer_weight_slice14.png differ diff --git a/generated/output_layer_weight_slice15.png b/generated/output_layer_weight_slice15.png new file mode 100644 index 0000000..0471d9f Binary files /dev/null and b/generated/output_layer_weight_slice15.png differ diff --git a/generated/output_layer_weight_slice2.png b/generated/output_layer_weight_slice2.png new file mode 100644 index 0000000..48f7a21 Binary files /dev/null and b/generated/output_layer_weight_slice2.png differ diff --git a/generated/output_layer_weight_slice3.png b/generated/output_layer_weight_slice3.png new file mode 100644 index 0000000..d6cda71 Binary files /dev/null and b/generated/output_layer_weight_slice3.png differ diff --git a/generated/output_layer_weight_slice4.png b/generated/output_layer_weight_slice4.png new file mode 100644 index 0000000..1189a90 Binary files /dev/null and b/generated/output_layer_weight_slice4.png differ diff --git a/generated/output_layer_weight_slice5.png b/generated/output_layer_weight_slice5.png new file mode 100644 index 0000000..4dd2b9e Binary files /dev/null and b/generated/output_layer_weight_slice5.png differ diff --git a/generated/output_layer_weight_slice6.png b/generated/output_layer_weight_slice6.png new file mode 100644 index 0000000..a72d037 Binary files /dev/null and b/generated/output_layer_weight_slice6.png differ diff --git a/generated/output_layer_weight_slice7.png b/generated/output_layer_weight_slice7.png new file mode 100644 index 0000000..5c5e9f6 Binary files /dev/null and b/generated/output_layer_weight_slice7.png differ diff --git a/generated/output_layer_weight_slice8.png b/generated/output_layer_weight_slice8.png new file mode 100644 index 0000000..ffa010a Binary files /dev/null and b/generated/output_layer_weight_slice8.png differ diff --git a/generated/output_layer_weight_slice9.png b/generated/output_layer_weight_slice9.png new file mode 100644 index 0000000..ba811ca Binary files /dev/null and b/generated/output_layer_weight_slice9.png differ diff --git a/tools/show.py b/tools/show.py index 0306984..3c20d38 100644 --- a/tools/show.py +++ b/tools/show.py @@ -19,12 +19,12 @@ def DumpTensorToImage(tensor, name, autoPad=True, scale=1.0, auto2d=True): img = tensor.numpy() srp = img.shape - if auto2d and len(srp) == 1: + if auto2d and len(srp) == 1: # 1D的数据自动折叠成2D图像 ceiled = math.ceil((srp[0]) ** 0.5) img = cv2.copyMakeBorder(img, 0, ceiled * ceiled - srp[0], 0, 0, 0) img = img.reshape((ceiled, ceiled)) srp = img.shape - if autoPad and (max(srp) / min(srp) > 16): + if autoPad and (max(srp) / min(srp) > 16): # 自动拉伸成正方形 img = cv2.resize(img, [max(srp), max(srp)]) srp = img.shape if scale != 1.0: