Update show and output tokens image.
|
@ -12,12 +12,12 @@ for:
|
|||
|
||||
hidden_states = inputs_embeds
|
||||
for layers : GLMBlock(hidden_states, rotary_pos_emb)
|
||||
hidden_states = RMSNorm(hidden_states)
|
||||
hidden_states = hidden_states[-1:] 截取最后一个sequence
|
||||
lm_logits = self.output_layer(hidden_states)
|
||||
hidden_states = RMSNorm(hidden_states) # final_layernorm -> [6, 1, 4096]
|
||||
hidden_states = hidden_states[-1:] 截取最后一个sequence -> [1, 1, 4096]
|
||||
lm_logits = Linear(hidden_states) -> [1, 1, 65024]
|
||||
lm_logits = lm_logits.transpose(0, 1).contiguous() -> [1, 1, 65024]
|
||||
|
||||
probs = softmax(lm_logits) -> [1, 65024]
|
||||
probs = softmax(lm_logits) -> [1, 65024] {Softmax}(x_{i}) = \frac{\exp(x_i)}{\sum_j \exp(x_j)}
|
||||
next_tokens = torch.multinomial(probs, num_samples=1) 采样 -> [1] 1:batch_num
|
||||
|
||||
if next_tokens == eos_token_id 推理结束退出循环
|
||||
|
|
|
@ -110,12 +110,6 @@ class CoreAttention(torch.nn.Module):
|
|||
|
||||
|
||||
class SelfAttention(torch.nn.Module):
|
||||
"""Parallel self-attention layer abstract class.
|
||||
|
||||
Self-attention layer takes input with size [s, b, h]
|
||||
and returns output of the same size.
|
||||
"""
|
||||
|
||||
def __init__(self, config: ChatGLMConfig, layer_number, device=None):
|
||||
super(SelfAttention, self).__init__()
|
||||
self.layer_number = max(1, layer_number)
|
||||
|
@ -237,14 +231,8 @@ class SelfAttention(torch.nn.Module):
|
|||
self.hidden_size_per_attention_head,
|
||||
)
|
||||
)
|
||||
# ==================================
|
||||
# core attention computation
|
||||
# ==================================
|
||||
context_layer = self.core_attention(query_layer, key_layer, value_layer)
|
||||
# =================
|
||||
# Output. [sq, b, h]
|
||||
# =================
|
||||
output = self.dense(context_layer)
|
||||
output = self.dense(context_layer) # [sq, b, h]
|
||||
return output
|
||||
|
||||
|
||||
|
@ -276,7 +264,6 @@ class MLP(torch.nn.Module):
|
|||
|
||||
self.activation_func = swiglu
|
||||
|
||||
# Project back to h.
|
||||
self.dense_4h_to_h = nn.Linear(
|
||||
config.ffn_hidden_size,
|
||||
config.hidden_size,
|
||||
|
@ -595,6 +582,7 @@ class ChatGLMForConditionalGeneration(nn.Module):
|
|||
isFinished = torch.zeros(
|
||||
input_ids.shape[0], dtype=torch.long, device=input_ids.device
|
||||
)
|
||||
token_count = 0
|
||||
while True:
|
||||
input_ids_in = input_ids
|
||||
batch_size, seq_length = input_ids_in.shape
|
||||
|
@ -611,8 +599,11 @@ class ChatGLMForConditionalGeneration(nn.Module):
|
|||
)
|
||||
next_token_logits = logits[:, -1, :]
|
||||
probs = nn.functional.softmax(next_token_logits, dim=-1)
|
||||
next_tokens = torch.multinomial(probs, num_samples=1).squeeze(1)
|
||||
|
||||
# show.DumpTensorToImage(next_token_logits[0], "generated/next_tokens"+str(token_count)+".png")
|
||||
# token_count = token_count + 1
|
||||
|
||||
next_tokens = torch.multinomial(probs, num_samples=1).squeeze(1)
|
||||
# finished sentences should add a padding token to next
|
||||
pad_token = pad_token_id * isFinished
|
||||
next_tokens = next_tokens * (1 - isFinished) + pad_token
|
||||
|
|
17
demo.py
|
@ -4,12 +4,15 @@ import torch
|
|||
from chatglm import ChatGLMForConditionalGeneration
|
||||
from chatglm import ChatGLMTokenizer
|
||||
|
||||
from tools import show
|
||||
|
||||
from transformers import AutoConfig
|
||||
|
||||
seed = 1234
|
||||
torch.manual_seed(seed)
|
||||
torch.cuda.manual_seed_all(seed)
|
||||
|
||||
|
||||
pretrained_model_name_or_path = "../ZhipuAI/chatglm3-6b"
|
||||
config, kwargs = AutoConfig.from_pretrained(
|
||||
pretrained_model_name_or_path,
|
||||
|
@ -39,17 +42,17 @@ tokenizer = ChatGLMTokenizer(*init_inputs, **init_kwargs)
|
|||
|
||||
glm = glm.from_pretrained(pretrained_model_name_or_path).half().cuda()
|
||||
glm = glm.eval()
|
||||
query = "colin"
|
||||
query = "你好"
|
||||
response, history = glm.chat(tokenizer, query, history=[])
|
||||
print(response)
|
||||
if response[1:] != " Hello! How can I assist you today":
|
||||
if response[1:] != " 你好👋!我是人工智能助手 ChatGLM3-6B,很高兴见到你,欢迎问我任何问题":
|
||||
raise ()
|
||||
|
||||
query = "你好"
|
||||
response, history = glm.chat(tokenizer, query, history=history)
|
||||
print(response)
|
||||
if response[1:] != " 你好!有什么我可以帮助你的吗":
|
||||
raise ()
|
||||
# query = "colin"
|
||||
# response, history = glm.chat(tokenizer, query, history=history)
|
||||
# print(response)
|
||||
# if response[1:] != " Hello! How can I assist you today":
|
||||
# raise ()
|
||||
|
||||
# response, history = glm.chat(tokenizer, "你是一个心理学专家,请问晚上睡不着应该怎么办", history=history)
|
||||
# print(response)
|
||||
|
|
After Width: | Height: | Size: 50 KiB |
After Width: | Height: | Size: 46 KiB |
After Width: | Height: | Size: 52 KiB |
After Width: | Height: | Size: 52 KiB |
After Width: | Height: | Size: 52 KiB |
After Width: | Height: | Size: 49 KiB |
After Width: | Height: | Size: 48 KiB |
After Width: | Height: | Size: 50 KiB |
After Width: | Height: | Size: 47 KiB |
After Width: | Height: | Size: 47 KiB |
After Width: | Height: | Size: 48 KiB |
After Width: | Height: | Size: 48 KiB |
After Width: | Height: | Size: 53 KiB |
After Width: | Height: | Size: 51 KiB |
After Width: | Height: | Size: 51 KiB |
After Width: | Height: | Size: 51 KiB |
After Width: | Height: | Size: 50 KiB |
After Width: | Height: | Size: 52 KiB |
After Width: | Height: | Size: 50 KiB |
After Width: | Height: | Size: 50 KiB |
After Width: | Height: | Size: 50 KiB |
After Width: | Height: | Size: 50 KiB |
After Width: | Height: | Size: 50 KiB |
After Width: | Height: | Size: 50 KiB |
After Width: | Height: | Size: 52 KiB |
After Width: | Height: | Size: 51 KiB |
After Width: | Height: | Size: 51 KiB |
After Width: | Height: | Size: 51 KiB |
After Width: | Height: | Size: 50 KiB |
After Width: | Height: | Size: 52 KiB |
|
@ -5,6 +5,7 @@ import torchvision.transforms.functional as Vision
|
|||
import cv2
|
||||
import math
|
||||
import numpy as np
|
||||
import os
|
||||
|
||||
|
||||
def DumpTensorToImage(tensor, name, autoPad=True, scale=1.0, auto2d=True):
|
||||
|
@ -14,7 +15,7 @@ def DumpTensorToImage(tensor, name, autoPad=True, scale=1.0, auto2d=True):
|
|||
tensor = tensor.float()
|
||||
maxv = torch.max(tensor)
|
||||
minv = torch.min(tensor)
|
||||
tensor = (((tensor - minv) / (maxv - minv)) * 256).byte().cpu()
|
||||
tensor = (((tensor - minv) / (maxv - minv)) * 255).byte().cpu()
|
||||
img = tensor.numpy()
|
||||
srp = img.shape
|
||||
|
||||
|
@ -30,3 +31,18 @@ def DumpTensorToImage(tensor, name, autoPad=True, scale=1.0, auto2d=True):
|
|||
img = cv2.resize(img, [int(srp[0] * scale), int(srp[1] * scale)])
|
||||
srp = img.shape
|
||||
cv2.imwrite(name, img)
|
||||
|
||||
|
||||
def DumpTensorToLog(tensor, name="log"):
|
||||
shape = tensor.shape
|
||||
f = open(name, "w")
|
||||
data = tensor.reshape([-1]).float().cpu().numpy().tolist()
|
||||
for d in data:
|
||||
f.writelines("%s" % d + os.linesep)
|
||||
f.close()
|
||||
|
||||
def DumpTensorToFile(tensor, name="tensor.pt"):
|
||||
torch.save(tensor.cpu(),name)
|
||||
|
||||
def LoadTensorToFile(name="tensor.pt"):
|
||||
return torch.load(name)
|
|
@ -8,3 +8,6 @@ import torch
|
|||
|
||||
radata = torch.randn(127)
|
||||
show.DumpTensorToImage(radata, "test.png")
|
||||
|
||||
radata = torch.randn(127, 127)
|
||||
show.DumpTensorToLog(radata, "test.log")
|
||||
|
|