Add output_layer_weight dump.
|
@ -74,13 +74,13 @@ Linear(intermediate_parallel) no bias -> [6, 1, 4096]
|
|||
[6, 1, 32, 128] <- q k v
|
||||
/ | \
|
||||
pos_emb pos_emb \
|
||||
| | \
|
||||
| | |
|
||||
| expand expand -> [6, 1, 32, 128]
|
||||
\ / |
|
||||
dot |
|
||||
softmax /
|
||||
\ /
|
||||
dot -> [1, 32, 6, 128]
|
||||
dot -> [1, 32, 6, 128] -> [6, 1, 4096]
|
||||
Linear -> [6, 1, 4096]
|
||||
|
||||
hidden_states: [s, b, h]
|
||||
|
|
|
@ -452,6 +452,15 @@ class ChatGLMModel(nn.Module):
|
|||
hidden_states = self.encoder(inputs_embeds, rotary_pos_emb)
|
||||
hidden_states = hidden_states[-1:]
|
||||
lm_logits = self.output_layer(hidden_states)
|
||||
|
||||
# for i in range(16):
|
||||
# show.DumpTensorToImage(
|
||||
# self.output_layer.weight[
|
||||
# int(i * (65024 / 16)) : int((i + 1) * (65024 / 16)), :
|
||||
# ],
|
||||
# "generated/output_layer_weight_slice" + str(i) + ".png",
|
||||
# )
|
||||
|
||||
lm_logits = lm_logits.transpose(0, 1).contiguous()
|
||||
|
||||
return lm_logits
|
||||
|
|
After Width: | Height: | Size: 13 MiB |
After Width: | Height: | Size: 12 MiB |
After Width: | Height: | Size: 12 MiB |
After Width: | Height: | Size: 11 MiB |
After Width: | Height: | Size: 11 MiB |
After Width: | Height: | Size: 13 MiB |
After Width: | Height: | Size: 12 MiB |
After Width: | Height: | Size: 11 MiB |
After Width: | Height: | Size: 13 MiB |
After Width: | Height: | Size: 12 MiB |
After Width: | Height: | Size: 12 MiB |
After Width: | Height: | Size: 12 MiB |
After Width: | Height: | Size: 12 MiB |
After Width: | Height: | Size: 12 MiB |
After Width: | Height: | Size: 12 MiB |
After Width: | Height: | Size: 12 MiB |
|
@ -19,12 +19,12 @@ def DumpTensorToImage(tensor, name, autoPad=True, scale=1.0, auto2d=True):
|
|||
img = tensor.numpy()
|
||||
srp = img.shape
|
||||
|
||||
if auto2d and len(srp) == 1:
|
||||
if auto2d and len(srp) == 1: # 1D的数据自动折叠成2D图像
|
||||
ceiled = math.ceil((srp[0]) ** 0.5)
|
||||
img = cv2.copyMakeBorder(img, 0, ceiled * ceiled - srp[0], 0, 0, 0)
|
||||
img = img.reshape((ceiled, ceiled))
|
||||
srp = img.shape
|
||||
if autoPad and (max(srp) / min(srp) > 16):
|
||||
if autoPad and (max(srp) / min(srp) > 16): # 自动拉伸成正方形
|
||||
img = cv2.resize(img, [max(srp), max(srp)])
|
||||
srp = img.shape
|
||||
if scale != 1.0:
|
||||
|
|