Add output_layer_weight dump.

This commit is contained in:
Colin 2023-12-26 18:59:28 +08:00
parent 235f65aa19
commit 0cee40dbb0
19 changed files with 13 additions and 4 deletions

View File

@ -74,13 +74,13 @@ Linear(intermediate_parallel) no bias -> [6, 1, 4096]
[6, 1, 32, 128] <- q k v [6, 1, 32, 128] <- q k v
/ | \ / | \
pos_emb pos_emb \ pos_emb pos_emb \
| | \ | | |
| expand expand -> [6, 1, 32, 128] | expand expand -> [6, 1, 32, 128]
\ / | \ / |
dot | dot |
softmax / softmax /
\ / \ /
dot -> [1, 32, 6, 128] dot -> [1, 32, 6, 128] -> [6, 1, 4096]
Linear -> [6, 1, 4096] Linear -> [6, 1, 4096]
hidden_states: [s, b, h] hidden_states: [s, b, h]

View File

@ -452,6 +452,15 @@ class ChatGLMModel(nn.Module):
hidden_states = self.encoder(inputs_embeds, rotary_pos_emb) hidden_states = self.encoder(inputs_embeds, rotary_pos_emb)
hidden_states = hidden_states[-1:] hidden_states = hidden_states[-1:]
lm_logits = self.output_layer(hidden_states) lm_logits = self.output_layer(hidden_states)
# for i in range(16):
# show.DumpTensorToImage(
# self.output_layer.weight[
# int(i * (65024 / 16)) : int((i + 1) * (65024 / 16)), :
# ],
# "generated/output_layer_weight_slice" + str(i) + ".png",
# )
lm_logits = lm_logits.transpose(0, 1).contiguous() lm_logits = lm_logits.transpose(0, 1).contiguous()
return lm_logits return lm_logits

Binary file not shown.

After

Width:  |  Height:  |  Size: 13 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 12 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 12 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 11 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 11 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 13 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 12 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 11 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 13 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 12 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 12 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 12 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 12 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 12 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 12 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 12 MiB

View File

@ -19,12 +19,12 @@ def DumpTensorToImage(tensor, name, autoPad=True, scale=1.0, auto2d=True):
img = tensor.numpy() img = tensor.numpy()
srp = img.shape srp = img.shape
if auto2d and len(srp) == 1: if auto2d and len(srp) == 1: # 1D的数据自动折叠成2D图像
ceiled = math.ceil((srp[0]) ** 0.5) ceiled = math.ceil((srp[0]) ** 0.5)
img = cv2.copyMakeBorder(img, 0, ceiled * ceiled - srp[0], 0, 0, 0) img = cv2.copyMakeBorder(img, 0, ceiled * ceiled - srp[0], 0, 0, 0)
img = img.reshape((ceiled, ceiled)) img = img.reshape((ceiled, ceiled))
srp = img.shape srp = img.shape
if autoPad and (max(srp) / min(srp) > 16): if autoPad and (max(srp) / min(srp) > 16): # 自动拉伸成正方形
img = cv2.resize(img, [max(srp), max(srp)]) img = cv2.resize(img, [max(srp), max(srp)])
srp = img.shape srp = img.shape
if scale != 1.0: if scale != 1.0: