Update show and output tokens image.
|
@ -12,12 +12,12 @@ for:
|
||||||
|
|
||||||
hidden_states = inputs_embeds
|
hidden_states = inputs_embeds
|
||||||
for layers : GLMBlock(hidden_states, rotary_pos_emb)
|
for layers : GLMBlock(hidden_states, rotary_pos_emb)
|
||||||
hidden_states = RMSNorm(hidden_states)
|
hidden_states = RMSNorm(hidden_states) # final_layernorm -> [6, 1, 4096]
|
||||||
hidden_states = hidden_states[-1:] 截取最后一个sequence
|
hidden_states = hidden_states[-1:] 截取最后一个sequence -> [1, 1, 4096]
|
||||||
lm_logits = self.output_layer(hidden_states)
|
lm_logits = Linear(hidden_states) -> [1, 1, 65024]
|
||||||
lm_logits = lm_logits.transpose(0, 1).contiguous() -> [1, 1, 65024]
|
lm_logits = lm_logits.transpose(0, 1).contiguous() -> [1, 1, 65024]
|
||||||
|
|
||||||
probs = softmax(lm_logits) -> [1, 65024]
|
probs = softmax(lm_logits) -> [1, 65024] {Softmax}(x_{i}) = \frac{\exp(x_i)}{\sum_j \exp(x_j)}
|
||||||
next_tokens = torch.multinomial(probs, num_samples=1) 采样 -> [1] 1:batch_num
|
next_tokens = torch.multinomial(probs, num_samples=1) 采样 -> [1] 1:batch_num
|
||||||
|
|
||||||
if next_tokens == eos_token_id 推理结束退出循环
|
if next_tokens == eos_token_id 推理结束退出循环
|
||||||
|
|
|
@ -110,12 +110,6 @@ class CoreAttention(torch.nn.Module):
|
||||||
|
|
||||||
|
|
||||||
class SelfAttention(torch.nn.Module):
|
class SelfAttention(torch.nn.Module):
|
||||||
"""Parallel self-attention layer abstract class.
|
|
||||||
|
|
||||||
Self-attention layer takes input with size [s, b, h]
|
|
||||||
and returns output of the same size.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, config: ChatGLMConfig, layer_number, device=None):
|
def __init__(self, config: ChatGLMConfig, layer_number, device=None):
|
||||||
super(SelfAttention, self).__init__()
|
super(SelfAttention, self).__init__()
|
||||||
self.layer_number = max(1, layer_number)
|
self.layer_number = max(1, layer_number)
|
||||||
|
@ -237,14 +231,8 @@ class SelfAttention(torch.nn.Module):
|
||||||
self.hidden_size_per_attention_head,
|
self.hidden_size_per_attention_head,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
# ==================================
|
|
||||||
# core attention computation
|
|
||||||
# ==================================
|
|
||||||
context_layer = self.core_attention(query_layer, key_layer, value_layer)
|
context_layer = self.core_attention(query_layer, key_layer, value_layer)
|
||||||
# =================
|
output = self.dense(context_layer) # [sq, b, h]
|
||||||
# Output. [sq, b, h]
|
|
||||||
# =================
|
|
||||||
output = self.dense(context_layer)
|
|
||||||
return output
|
return output
|
||||||
|
|
||||||
|
|
||||||
|
@ -276,7 +264,6 @@ class MLP(torch.nn.Module):
|
||||||
|
|
||||||
self.activation_func = swiglu
|
self.activation_func = swiglu
|
||||||
|
|
||||||
# Project back to h.
|
|
||||||
self.dense_4h_to_h = nn.Linear(
|
self.dense_4h_to_h = nn.Linear(
|
||||||
config.ffn_hidden_size,
|
config.ffn_hidden_size,
|
||||||
config.hidden_size,
|
config.hidden_size,
|
||||||
|
@ -595,6 +582,7 @@ class ChatGLMForConditionalGeneration(nn.Module):
|
||||||
isFinished = torch.zeros(
|
isFinished = torch.zeros(
|
||||||
input_ids.shape[0], dtype=torch.long, device=input_ids.device
|
input_ids.shape[0], dtype=torch.long, device=input_ids.device
|
||||||
)
|
)
|
||||||
|
token_count = 0
|
||||||
while True:
|
while True:
|
||||||
input_ids_in = input_ids
|
input_ids_in = input_ids
|
||||||
batch_size, seq_length = input_ids_in.shape
|
batch_size, seq_length = input_ids_in.shape
|
||||||
|
@ -611,8 +599,11 @@ class ChatGLMForConditionalGeneration(nn.Module):
|
||||||
)
|
)
|
||||||
next_token_logits = logits[:, -1, :]
|
next_token_logits = logits[:, -1, :]
|
||||||
probs = nn.functional.softmax(next_token_logits, dim=-1)
|
probs = nn.functional.softmax(next_token_logits, dim=-1)
|
||||||
next_tokens = torch.multinomial(probs, num_samples=1).squeeze(1)
|
|
||||||
|
|
||||||
|
# show.DumpTensorToImage(next_token_logits[0], "generated/next_tokens"+str(token_count)+".png")
|
||||||
|
# token_count = token_count + 1
|
||||||
|
|
||||||
|
next_tokens = torch.multinomial(probs, num_samples=1).squeeze(1)
|
||||||
# finished sentences should add a padding token to next
|
# finished sentences should add a padding token to next
|
||||||
pad_token = pad_token_id * isFinished
|
pad_token = pad_token_id * isFinished
|
||||||
next_tokens = next_tokens * (1 - isFinished) + pad_token
|
next_tokens = next_tokens * (1 - isFinished) + pad_token
|
||||||
|
|
17
demo.py
|
@ -4,12 +4,15 @@ import torch
|
||||||
from chatglm import ChatGLMForConditionalGeneration
|
from chatglm import ChatGLMForConditionalGeneration
|
||||||
from chatglm import ChatGLMTokenizer
|
from chatglm import ChatGLMTokenizer
|
||||||
|
|
||||||
|
from tools import show
|
||||||
|
|
||||||
from transformers import AutoConfig
|
from transformers import AutoConfig
|
||||||
|
|
||||||
seed = 1234
|
seed = 1234
|
||||||
torch.manual_seed(seed)
|
torch.manual_seed(seed)
|
||||||
torch.cuda.manual_seed_all(seed)
|
torch.cuda.manual_seed_all(seed)
|
||||||
|
|
||||||
|
|
||||||
pretrained_model_name_or_path = "../ZhipuAI/chatglm3-6b"
|
pretrained_model_name_or_path = "../ZhipuAI/chatglm3-6b"
|
||||||
config, kwargs = AutoConfig.from_pretrained(
|
config, kwargs = AutoConfig.from_pretrained(
|
||||||
pretrained_model_name_or_path,
|
pretrained_model_name_or_path,
|
||||||
|
@ -39,17 +42,17 @@ tokenizer = ChatGLMTokenizer(*init_inputs, **init_kwargs)
|
||||||
|
|
||||||
glm = glm.from_pretrained(pretrained_model_name_or_path).half().cuda()
|
glm = glm.from_pretrained(pretrained_model_name_or_path).half().cuda()
|
||||||
glm = glm.eval()
|
glm = glm.eval()
|
||||||
query = "colin"
|
query = "你好"
|
||||||
response, history = glm.chat(tokenizer, query, history=[])
|
response, history = glm.chat(tokenizer, query, history=[])
|
||||||
print(response)
|
print(response)
|
||||||
if response[1:] != " Hello! How can I assist you today":
|
if response[1:] != " 你好👋!我是人工智能助手 ChatGLM3-6B,很高兴见到你,欢迎问我任何问题":
|
||||||
raise ()
|
raise ()
|
||||||
|
|
||||||
query = "你好"
|
# query = "colin"
|
||||||
response, history = glm.chat(tokenizer, query, history=history)
|
# response, history = glm.chat(tokenizer, query, history=history)
|
||||||
print(response)
|
# print(response)
|
||||||
if response[1:] != " 你好!有什么我可以帮助你的吗":
|
# if response[1:] != " Hello! How can I assist you today":
|
||||||
raise ()
|
# raise ()
|
||||||
|
|
||||||
# response, history = glm.chat(tokenizer, "你是一个心理学专家,请问晚上睡不着应该怎么办", history=history)
|
# response, history = glm.chat(tokenizer, "你是一个心理学专家,请问晚上睡不着应该怎么办", history=history)
|
||||||
# print(response)
|
# print(response)
|
||||||
|
|
After Width: | Height: | Size: 50 KiB |
After Width: | Height: | Size: 46 KiB |
After Width: | Height: | Size: 52 KiB |
After Width: | Height: | Size: 52 KiB |
After Width: | Height: | Size: 52 KiB |
After Width: | Height: | Size: 49 KiB |
After Width: | Height: | Size: 48 KiB |
After Width: | Height: | Size: 50 KiB |
After Width: | Height: | Size: 47 KiB |
After Width: | Height: | Size: 47 KiB |
After Width: | Height: | Size: 48 KiB |
After Width: | Height: | Size: 48 KiB |
After Width: | Height: | Size: 53 KiB |
After Width: | Height: | Size: 51 KiB |
After Width: | Height: | Size: 51 KiB |
After Width: | Height: | Size: 51 KiB |
After Width: | Height: | Size: 50 KiB |
After Width: | Height: | Size: 52 KiB |
After Width: | Height: | Size: 50 KiB |
After Width: | Height: | Size: 50 KiB |
After Width: | Height: | Size: 50 KiB |
After Width: | Height: | Size: 50 KiB |
After Width: | Height: | Size: 50 KiB |
After Width: | Height: | Size: 50 KiB |
After Width: | Height: | Size: 52 KiB |
After Width: | Height: | Size: 51 KiB |
After Width: | Height: | Size: 51 KiB |
After Width: | Height: | Size: 51 KiB |
After Width: | Height: | Size: 50 KiB |
After Width: | Height: | Size: 52 KiB |
|
@ -5,6 +5,7 @@ import torchvision.transforms.functional as Vision
|
||||||
import cv2
|
import cv2
|
||||||
import math
|
import math
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
import os
|
||||||
|
|
||||||
|
|
||||||
def DumpTensorToImage(tensor, name, autoPad=True, scale=1.0, auto2d=True):
|
def DumpTensorToImage(tensor, name, autoPad=True, scale=1.0, auto2d=True):
|
||||||
|
@ -14,7 +15,7 @@ def DumpTensorToImage(tensor, name, autoPad=True, scale=1.0, auto2d=True):
|
||||||
tensor = tensor.float()
|
tensor = tensor.float()
|
||||||
maxv = torch.max(tensor)
|
maxv = torch.max(tensor)
|
||||||
minv = torch.min(tensor)
|
minv = torch.min(tensor)
|
||||||
tensor = (((tensor - minv) / (maxv - minv)) * 256).byte().cpu()
|
tensor = (((tensor - minv) / (maxv - minv)) * 255).byte().cpu()
|
||||||
img = tensor.numpy()
|
img = tensor.numpy()
|
||||||
srp = img.shape
|
srp = img.shape
|
||||||
|
|
||||||
|
@ -30,3 +31,18 @@ def DumpTensorToImage(tensor, name, autoPad=True, scale=1.0, auto2d=True):
|
||||||
img = cv2.resize(img, [int(srp[0] * scale), int(srp[1] * scale)])
|
img = cv2.resize(img, [int(srp[0] * scale), int(srp[1] * scale)])
|
||||||
srp = img.shape
|
srp = img.shape
|
||||||
cv2.imwrite(name, img)
|
cv2.imwrite(name, img)
|
||||||
|
|
||||||
|
|
||||||
|
def DumpTensorToLog(tensor, name="log"):
|
||||||
|
shape = tensor.shape
|
||||||
|
f = open(name, "w")
|
||||||
|
data = tensor.reshape([-1]).float().cpu().numpy().tolist()
|
||||||
|
for d in data:
|
||||||
|
f.writelines("%s" % d + os.linesep)
|
||||||
|
f.close()
|
||||||
|
|
||||||
|
def DumpTensorToFile(tensor, name="tensor.pt"):
|
||||||
|
torch.save(tensor.cpu(),name)
|
||||||
|
|
||||||
|
def LoadTensorToFile(name="tensor.pt"):
|
||||||
|
return torch.load(name)
|
|
@ -8,3 +8,6 @@ import torch
|
||||||
|
|
||||||
radata = torch.randn(127)
|
radata = torch.randn(127)
|
||||||
show.DumpTensorToImage(radata, "test.png")
|
show.DumpTensorToImage(radata, "test.png")
|
||||||
|
|
||||||
|
radata = torch.randn(127, 127)
|
||||||
|
show.DumpTensorToLog(radata, "test.log")
|
||||||
|
|