95 lines
3.2 KiB
Python
95 lines
3.2 KiB
Python
import torch
|
|
from modelscope import snapshot_download
|
|
from transformers import AutoModelForCausalLM, AutoTokenizer
|
|
from transformers.generation import GenerationConfig
|
|
from transformers import AutoConfig
|
|
|
|
from modeling_qwen import QWenLMHeadModel
|
|
|
|
seed = 4321
|
|
torch.manual_seed(seed)
|
|
torch.cuda.manual_seed_all(seed)
|
|
|
|
model_dir = snapshot_download("qwen/Qwen-1_8B-Chat")
|
|
# model_dir = "/home/colin/.cache/modelscope/hub/qwen/Qwen-1_8B-Chat"
|
|
|
|
config, kwargs = AutoConfig.from_pretrained(
|
|
"./",
|
|
return_unused_kwargs=True,
|
|
trust_remote_code=True,
|
|
code_revision=None,
|
|
_commit_hash=None,
|
|
)
|
|
model = QWenLMHeadModel(config)
|
|
|
|
print(model)
|
|
|
|
# QWenLMHeadModel(
|
|
# (transformer): QWenModel(
|
|
# (wte): Embedding(151936, 2048)
|
|
# (drop): Dropout(p=0.0, inplace=False)
|
|
# (rotary_emb): RotaryEmbedding()
|
|
# (h): ModuleList(
|
|
# (0-23): 24 x QWenBlock(
|
|
# (ln_1): RMSNorm()
|
|
# (attn): QWenAttention(
|
|
# (c_attn): Linear(in_features=2048, out_features=6144, bias=True)
|
|
# (c_proj): Linear(in_features=2048, out_features=2048, bias=False)
|
|
# (attn_dropout): Dropout(p=0.0, inplace=False)
|
|
# )
|
|
# (ln_2): RMSNorm()
|
|
# (mlp): QWenMLP(
|
|
# (w1): Linear(in_features=2048, out_features=5504, bias=False)
|
|
# (w2): Linear(in_features=2048, out_features=5504, bias=False)
|
|
# (c_proj): Linear(in_features=5504, out_features=2048, bias=False)
|
|
# )
|
|
# )
|
|
# )
|
|
# (ln_f): RMSNorm()
|
|
# )
|
|
# (lm_head): Linear(in_features=2048, out_features=151936, bias=False)
|
|
# )
|
|
|
|
|
|
tokenizer = AutoTokenizer.from_pretrained(model_dir, trust_remote_code=True)
|
|
# model = model.from_pretrained(model_dir, config=config, device_map="cuda:1", trust_remote_code=True)
|
|
model = model.from_pretrained(model_dir).cuda()
|
|
|
|
# model = model.eval()
|
|
model = model.train() # control by @torch.no_grad()
|
|
|
|
# 可指定不同的生成长度、top_p等相关超参
|
|
# model.generation_config = GenerationConfig.from_pretrained(
|
|
# model_dir, trust_remote_code=True
|
|
# )
|
|
|
|
# 第一轮对话
|
|
response, history, decode_tokens = model.chat(tokenizer, "东南亚国家日本的首都是什么市", "", history=None)
|
|
print(decode_tokens)
|
|
# <|im_start|>system
|
|
# You are a helpful assistant.<|im_end|>
|
|
# <|im_start|>user
|
|
# 东南亚国家日本的首都是什么市<|im_end|>
|
|
# <|im_start|>assistant
|
|
# 日本的首都东京。<|im_end|><|endoftext|>
|
|
|
|
|
|
# # 第一轮对话
|
|
# response, history, decode_tokens = model.chat(tokenizer, "你好", "", history=None)
|
|
# print(decode_tokens)
|
|
# # 你好!很高兴为你提供帮助。
|
|
|
|
# 第二轮对话
|
|
response, history, decode_tokens = model.chat(tokenizer, "给我讲一个年轻人奋斗创业最终取得成功的故事。", "", history=None)
|
|
print(decode_tokens)
|
|
|
|
|
|
# <|im_start|>system
|
|
# You are a helpful assistant.<|im_end|>
|
|
# <|im_start|>user
|
|
# 你好<|im_end|>
|
|
# <|im_start|>assistant
|
|
# 莎士比亚是头一个使用“你好”这个词的文学家,他在《哈姆雷特》中写道:“你是谁?你在哪儿?
|
|
# ”他的这一段话,通常被认为是最早的使用“你好”这个词的文学记载。这句话在英国语中非常常见,
|
|
# 特别是在正式或礼貌的情况下。<|im_end|><|endoftext|>
|