Update show and output tokens image.

This commit is contained in:
Colin 2023-12-25 22:53:53 +08:00
parent 0bc7bc90b1
commit 72c13cde02
35 changed files with 40 additions and 27 deletions

View File

@ -12,12 +12,12 @@ for:
hidden_states = inputs_embeds
for layers : GLMBlock(hidden_states, rotary_pos_emb)
hidden_states = RMSNorm(hidden_states)
hidden_states = hidden_states[-1:] 截取最后一个sequence
lm_logits = self.output_layer(hidden_states)
hidden_states = RMSNorm(hidden_states) # final_layernorm -> [6, 1, 4096]
hidden_states = hidden_states[-1:] 截取最后一个sequence -> [1, 1, 4096]
lm_logits = Linear(hidden_states) -> [1, 1, 65024]
lm_logits = lm_logits.transpose(0, 1).contiguous() -> [1, 1, 65024]
probs = softmax(lm_logits) -> [1, 65024]
probs = softmax(lm_logits) -> [1, 65024] {Softmax}(x_{i}) = \frac{\exp(x_i)}{\sum_j \exp(x_j)}
next_tokens = torch.multinomial(probs, num_samples=1) 采样 -> [1] 1:batch_num
if next_tokens == eos_token_id 推理结束退出循环

View File

@ -110,12 +110,6 @@ class CoreAttention(torch.nn.Module):
class SelfAttention(torch.nn.Module):
"""Parallel self-attention layer abstract class.
Self-attention layer takes input with size [s, b, h]
and returns output of the same size.
"""
def __init__(self, config: ChatGLMConfig, layer_number, device=None):
super(SelfAttention, self).__init__()
self.layer_number = max(1, layer_number)
@ -237,14 +231,8 @@ class SelfAttention(torch.nn.Module):
self.hidden_size_per_attention_head,
)
)
# ==================================
# core attention computation
# ==================================
context_layer = self.core_attention(query_layer, key_layer, value_layer)
# =================
# Output. [sq, b, h]
# =================
output = self.dense(context_layer)
output = self.dense(context_layer) # [sq, b, h]
return output
@ -276,7 +264,6 @@ class MLP(torch.nn.Module):
self.activation_func = swiglu
# Project back to h.
self.dense_4h_to_h = nn.Linear(
config.ffn_hidden_size,
config.hidden_size,
@ -595,6 +582,7 @@ class ChatGLMForConditionalGeneration(nn.Module):
isFinished = torch.zeros(
input_ids.shape[0], dtype=torch.long, device=input_ids.device
)
token_count = 0
while True:
input_ids_in = input_ids
batch_size, seq_length = input_ids_in.shape
@ -611,8 +599,11 @@ class ChatGLMForConditionalGeneration(nn.Module):
)
next_token_logits = logits[:, -1, :]
probs = nn.functional.softmax(next_token_logits, dim=-1)
next_tokens = torch.multinomial(probs, num_samples=1).squeeze(1)
# show.DumpTensorToImage(next_token_logits[0], "generated/next_tokens"+str(token_count)+".png")
# token_count = token_count + 1
next_tokens = torch.multinomial(probs, num_samples=1).squeeze(1)
# finished sentences should add a padding token to next
pad_token = pad_token_id * isFinished
next_tokens = next_tokens * (1 - isFinished) + pad_token

17
demo.py
View File

@ -4,12 +4,15 @@ import torch
from chatglm import ChatGLMForConditionalGeneration
from chatglm import ChatGLMTokenizer
from tools import show
from transformers import AutoConfig
seed = 1234
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
pretrained_model_name_or_path = "../ZhipuAI/chatglm3-6b"
config, kwargs = AutoConfig.from_pretrained(
pretrained_model_name_or_path,
@ -39,17 +42,17 @@ tokenizer = ChatGLMTokenizer(*init_inputs, **init_kwargs)
glm = glm.from_pretrained(pretrained_model_name_or_path).half().cuda()
glm = glm.eval()
query = "colin"
query = "你好"
response, history = glm.chat(tokenizer, query, history=[])
print(response)
if response[1:] != " Hello! How can I assist you today":
if response[1:] != " 你好👋!我是人工智能助手 ChatGLM3-6B很高兴见到你欢迎问我任何问题":
raise ()
query = "你好"
response, history = glm.chat(tokenizer, query, history=history)
print(response)
if response[1:] != " 你好!有什么我可以帮助你的吗":
raise ()
# query = "colin"
# response, history = glm.chat(tokenizer, query, history=history)
# print(response)
# if response[1:] != " Hello! How can I assist you today":
# raise ()
# response, history = glm.chat(tokenizer, "你是一个心理学专家,请问晚上睡不着应该怎么办", history=history)
# print(response)

BIN
generated/next_tokens0.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 50 KiB

BIN
generated/next_tokens1.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 46 KiB

BIN
generated/next_tokens10.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 52 KiB

BIN
generated/next_tokens11.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 52 KiB

BIN
generated/next_tokens12.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 52 KiB

BIN
generated/next_tokens13.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 49 KiB

BIN
generated/next_tokens14.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 48 KiB

BIN
generated/next_tokens15.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 50 KiB

BIN
generated/next_tokens16.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 47 KiB

BIN
generated/next_tokens17.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 47 KiB

BIN
generated/next_tokens18.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 48 KiB

BIN
generated/next_tokens19.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 48 KiB

BIN
generated/next_tokens2.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 53 KiB

BIN
generated/next_tokens20.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 51 KiB

BIN
generated/next_tokens21.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 51 KiB

BIN
generated/next_tokens22.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 51 KiB

BIN
generated/next_tokens23.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 50 KiB

BIN
generated/next_tokens24.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 52 KiB

BIN
generated/next_tokens25.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 50 KiB

BIN
generated/next_tokens26.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 50 KiB

BIN
generated/next_tokens27.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 50 KiB

BIN
generated/next_tokens28.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 50 KiB

BIN
generated/next_tokens29.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 50 KiB

BIN
generated/next_tokens3.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 50 KiB

BIN
generated/next_tokens4.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 52 KiB

BIN
generated/next_tokens5.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 51 KiB

BIN
generated/next_tokens6.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 51 KiB

BIN
generated/next_tokens7.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 51 KiB

BIN
generated/next_tokens8.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 50 KiB

BIN
generated/next_tokens9.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 52 KiB

View File

@ -5,6 +5,7 @@ import torchvision.transforms.functional as Vision
import cv2
import math
import numpy as np
import os
def DumpTensorToImage(tensor, name, autoPad=True, scale=1.0, auto2d=True):
@ -14,7 +15,7 @@ def DumpTensorToImage(tensor, name, autoPad=True, scale=1.0, auto2d=True):
tensor = tensor.float()
maxv = torch.max(tensor)
minv = torch.min(tensor)
tensor = (((tensor - minv) / (maxv - minv)) * 256).byte().cpu()
tensor = (((tensor - minv) / (maxv - minv)) * 255).byte().cpu()
img = tensor.numpy()
srp = img.shape
@ -30,3 +31,18 @@ def DumpTensorToImage(tensor, name, autoPad=True, scale=1.0, auto2d=True):
img = cv2.resize(img, [int(srp[0] * scale), int(srp[1] * scale)])
srp = img.shape
cv2.imwrite(name, img)
def DumpTensorToLog(tensor, name="log"):
shape = tensor.shape
f = open(name, "w")
data = tensor.reshape([-1]).float().cpu().numpy().tolist()
for d in data:
f.writelines("%s" % d + os.linesep)
f.close()
def DumpTensorToFile(tensor, name="tensor.pt"):
torch.save(tensor.cpu(),name)
def LoadTensorToFile(name="tensor.pt"):
return torch.load(name)

View File

@ -8,3 +8,6 @@ import torch
radata = torch.randn(127)
show.DumpTensorToImage(radata, "test.png")
radata = torch.randn(127, 127)
show.DumpTensorToLog(radata, "test.log")