Update show and output tokens image.

This commit is contained in:
Colin 2023-12-25 22:53:53 +08:00
parent 0bc7bc90b1
commit 72c13cde02
35 changed files with 40 additions and 27 deletions

View File

@ -12,12 +12,12 @@ for:
hidden_states = inputs_embeds hidden_states = inputs_embeds
for layers : GLMBlock(hidden_states, rotary_pos_emb) for layers : GLMBlock(hidden_states, rotary_pos_emb)
hidden_states = RMSNorm(hidden_states) hidden_states = RMSNorm(hidden_states) # final_layernorm -> [6, 1, 4096]
hidden_states = hidden_states[-1:] 截取最后一个sequence hidden_states = hidden_states[-1:] 截取最后一个sequence -> [1, 1, 4096]
lm_logits = self.output_layer(hidden_states) lm_logits = Linear(hidden_states) -> [1, 1, 65024]
lm_logits = lm_logits.transpose(0, 1).contiguous() -> [1, 1, 65024] lm_logits = lm_logits.transpose(0, 1).contiguous() -> [1, 1, 65024]
probs = softmax(lm_logits) -> [1, 65024] probs = softmax(lm_logits) -> [1, 65024] {Softmax}(x_{i}) = \frac{\exp(x_i)}{\sum_j \exp(x_j)}
next_tokens = torch.multinomial(probs, num_samples=1) 采样 -> [1] 1:batch_num next_tokens = torch.multinomial(probs, num_samples=1) 采样 -> [1] 1:batch_num
if next_tokens == eos_token_id 推理结束退出循环 if next_tokens == eos_token_id 推理结束退出循环

View File

@ -110,12 +110,6 @@ class CoreAttention(torch.nn.Module):
class SelfAttention(torch.nn.Module): class SelfAttention(torch.nn.Module):
"""Parallel self-attention layer abstract class.
Self-attention layer takes input with size [s, b, h]
and returns output of the same size.
"""
def __init__(self, config: ChatGLMConfig, layer_number, device=None): def __init__(self, config: ChatGLMConfig, layer_number, device=None):
super(SelfAttention, self).__init__() super(SelfAttention, self).__init__()
self.layer_number = max(1, layer_number) self.layer_number = max(1, layer_number)
@ -237,14 +231,8 @@ class SelfAttention(torch.nn.Module):
self.hidden_size_per_attention_head, self.hidden_size_per_attention_head,
) )
) )
# ==================================
# core attention computation
# ==================================
context_layer = self.core_attention(query_layer, key_layer, value_layer) context_layer = self.core_attention(query_layer, key_layer, value_layer)
# ================= output = self.dense(context_layer) # [sq, b, h]
# Output. [sq, b, h]
# =================
output = self.dense(context_layer)
return output return output
@ -276,7 +264,6 @@ class MLP(torch.nn.Module):
self.activation_func = swiglu self.activation_func = swiglu
# Project back to h.
self.dense_4h_to_h = nn.Linear( self.dense_4h_to_h = nn.Linear(
config.ffn_hidden_size, config.ffn_hidden_size,
config.hidden_size, config.hidden_size,
@ -595,6 +582,7 @@ class ChatGLMForConditionalGeneration(nn.Module):
isFinished = torch.zeros( isFinished = torch.zeros(
input_ids.shape[0], dtype=torch.long, device=input_ids.device input_ids.shape[0], dtype=torch.long, device=input_ids.device
) )
token_count = 0
while True: while True:
input_ids_in = input_ids input_ids_in = input_ids
batch_size, seq_length = input_ids_in.shape batch_size, seq_length = input_ids_in.shape
@ -611,8 +599,11 @@ class ChatGLMForConditionalGeneration(nn.Module):
) )
next_token_logits = logits[:, -1, :] next_token_logits = logits[:, -1, :]
probs = nn.functional.softmax(next_token_logits, dim=-1) probs = nn.functional.softmax(next_token_logits, dim=-1)
next_tokens = torch.multinomial(probs, num_samples=1).squeeze(1)
# show.DumpTensorToImage(next_token_logits[0], "generated/next_tokens"+str(token_count)+".png")
# token_count = token_count + 1
next_tokens = torch.multinomial(probs, num_samples=1).squeeze(1)
# finished sentences should add a padding token to next # finished sentences should add a padding token to next
pad_token = pad_token_id * isFinished pad_token = pad_token_id * isFinished
next_tokens = next_tokens * (1 - isFinished) + pad_token next_tokens = next_tokens * (1 - isFinished) + pad_token

17
demo.py
View File

@ -4,12 +4,15 @@ import torch
from chatglm import ChatGLMForConditionalGeneration from chatglm import ChatGLMForConditionalGeneration
from chatglm import ChatGLMTokenizer from chatglm import ChatGLMTokenizer
from tools import show
from transformers import AutoConfig from transformers import AutoConfig
seed = 1234 seed = 1234
torch.manual_seed(seed) torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed) torch.cuda.manual_seed_all(seed)
pretrained_model_name_or_path = "../ZhipuAI/chatglm3-6b" pretrained_model_name_or_path = "../ZhipuAI/chatglm3-6b"
config, kwargs = AutoConfig.from_pretrained( config, kwargs = AutoConfig.from_pretrained(
pretrained_model_name_or_path, pretrained_model_name_or_path,
@ -39,17 +42,17 @@ tokenizer = ChatGLMTokenizer(*init_inputs, **init_kwargs)
glm = glm.from_pretrained(pretrained_model_name_or_path).half().cuda() glm = glm.from_pretrained(pretrained_model_name_or_path).half().cuda()
glm = glm.eval() glm = glm.eval()
query = "colin" query = "你好"
response, history = glm.chat(tokenizer, query, history=[]) response, history = glm.chat(tokenizer, query, history=[])
print(response) print(response)
if response[1:] != " Hello! How can I assist you today": if response[1:] != " 你好👋!我是人工智能助手 ChatGLM3-6B很高兴见到你欢迎问我任何问题":
raise () raise ()
query = "你好" # query = "colin"
response, history = glm.chat(tokenizer, query, history=history) # response, history = glm.chat(tokenizer, query, history=history)
print(response) # print(response)
if response[1:] != " 你好!有什么我可以帮助你的吗": # if response[1:] != " Hello! How can I assist you today":
raise () # raise ()
# response, history = glm.chat(tokenizer, "你是一个心理学专家,请问晚上睡不着应该怎么办", history=history) # response, history = glm.chat(tokenizer, "你是一个心理学专家,请问晚上睡不着应该怎么办", history=history)
# print(response) # print(response)

BIN
generated/next_tokens0.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 50 KiB

BIN
generated/next_tokens1.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 46 KiB

BIN
generated/next_tokens10.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 52 KiB

BIN
generated/next_tokens11.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 52 KiB

BIN
generated/next_tokens12.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 52 KiB

BIN
generated/next_tokens13.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 49 KiB

BIN
generated/next_tokens14.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 48 KiB

BIN
generated/next_tokens15.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 50 KiB

BIN
generated/next_tokens16.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 47 KiB

BIN
generated/next_tokens17.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 47 KiB

BIN
generated/next_tokens18.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 48 KiB

BIN
generated/next_tokens19.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 48 KiB

BIN
generated/next_tokens2.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 53 KiB

BIN
generated/next_tokens20.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 51 KiB

BIN
generated/next_tokens21.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 51 KiB

BIN
generated/next_tokens22.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 51 KiB

BIN
generated/next_tokens23.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 50 KiB

BIN
generated/next_tokens24.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 52 KiB

BIN
generated/next_tokens25.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 50 KiB

BIN
generated/next_tokens26.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 50 KiB

BIN
generated/next_tokens27.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 50 KiB

BIN
generated/next_tokens28.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 50 KiB

BIN
generated/next_tokens29.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 50 KiB

BIN
generated/next_tokens3.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 50 KiB

BIN
generated/next_tokens4.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 52 KiB

BIN
generated/next_tokens5.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 51 KiB

BIN
generated/next_tokens6.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 51 KiB

BIN
generated/next_tokens7.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 51 KiB

BIN
generated/next_tokens8.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 50 KiB

BIN
generated/next_tokens9.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 52 KiB

View File

@ -5,6 +5,7 @@ import torchvision.transforms.functional as Vision
import cv2 import cv2
import math import math
import numpy as np import numpy as np
import os
def DumpTensorToImage(tensor, name, autoPad=True, scale=1.0, auto2d=True): def DumpTensorToImage(tensor, name, autoPad=True, scale=1.0, auto2d=True):
@ -14,7 +15,7 @@ def DumpTensorToImage(tensor, name, autoPad=True, scale=1.0, auto2d=True):
tensor = tensor.float() tensor = tensor.float()
maxv = torch.max(tensor) maxv = torch.max(tensor)
minv = torch.min(tensor) minv = torch.min(tensor)
tensor = (((tensor - minv) / (maxv - minv)) * 256).byte().cpu() tensor = (((tensor - minv) / (maxv - minv)) * 255).byte().cpu()
img = tensor.numpy() img = tensor.numpy()
srp = img.shape srp = img.shape
@ -30,3 +31,18 @@ def DumpTensorToImage(tensor, name, autoPad=True, scale=1.0, auto2d=True):
img = cv2.resize(img, [int(srp[0] * scale), int(srp[1] * scale)]) img = cv2.resize(img, [int(srp[0] * scale), int(srp[1] * scale)])
srp = img.shape srp = img.shape
cv2.imwrite(name, img) cv2.imwrite(name, img)
def DumpTensorToLog(tensor, name="log"):
shape = tensor.shape
f = open(name, "w")
data = tensor.reshape([-1]).float().cpu().numpy().tolist()
for d in data:
f.writelines("%s" % d + os.linesep)
f.close()
def DumpTensorToFile(tensor, name="tensor.pt"):
torch.save(tensor.cpu(),name)
def LoadTensorToFile(name="tensor.pt"):
return torch.load(name)

View File

@ -8,3 +8,6 @@ import torch
radata = torch.randn(127) radata = torch.randn(127)
show.DumpTensorToImage(radata, "test.png") show.DumpTensorToImage(radata, "test.png")
radata = torch.randn(127, 127)
show.DumpTensorToLog(radata, "test.log")