import torch from modelscope import snapshot_download from transformers import AutoModelForCausalLM, AutoTokenizer from transformers.generation import GenerationConfig from transformers import AutoConfig from modeling_qwen import QWenLMHeadModel seed = 4321 torch.manual_seed(seed) torch.cuda.manual_seed_all(seed) model_dir = snapshot_download("qwen/Qwen-1_8B-Chat") # model_dir = "/home/colin/.cache/modelscope/hub/qwen/Qwen-1_8B-Chat" config, kwargs = AutoConfig.from_pretrained( "./", return_unused_kwargs=True, trust_remote_code=True, code_revision=None, _commit_hash=None, ) model = QWenLMHeadModel(config) print(model) # QWenLMHeadModel( # (transformer): QWenModel( # (wte): Embedding(151936, 2048) # (drop): Dropout(p=0.0, inplace=False) # (rotary_emb): RotaryEmbedding() # (h): ModuleList( # (0-23): 24 x QWenBlock( # (ln_1): RMSNorm() # (attn): QWenAttention( # (c_attn): Linear(in_features=2048, out_features=6144, bias=True) # (c_proj): Linear(in_features=2048, out_features=2048, bias=False) # (attn_dropout): Dropout(p=0.0, inplace=False) # ) # (ln_2): RMSNorm() # (mlp): QWenMLP( # (w1): Linear(in_features=2048, out_features=5504, bias=False) # (w2): Linear(in_features=2048, out_features=5504, bias=False) # (c_proj): Linear(in_features=5504, out_features=2048, bias=False) # ) # ) # ) # (ln_f): RMSNorm() # ) # (lm_head): Linear(in_features=2048, out_features=151936, bias=False) # ) tokenizer = AutoTokenizer.from_pretrained(model_dir, trust_remote_code=True) model = model.from_pretrained(model_dir, config=config, device_map="auto", trust_remote_code=True) # model = model.eval() model = model.train() # control by @torch.no_grad() # 可指定不同的生成长度、top_p等相关超参 # model.generation_config = GenerationConfig.from_pretrained( # model_dir, trust_remote_code=True # ) # 第一轮对话 response, history, decode_tokens = model.chat(tokenizer, "东南亚国家日本的首都是什么市", "", history=None) print(decode_tokens) # <|im_start|>system # You are a helpful assistant.<|im_end|> # <|im_start|>user # 东南亚国家日本的首都是什么市<|im_end|> # <|im_start|>assistant # 日本的首都东京。<|im_end|><|endoftext|> # # 第一轮对话 # response, history, decode_tokens = model.chat(tokenizer, "你好", "", history=None) # print(decode_tokens) # # 你好!很高兴为你提供帮助。 # 第二轮对话 response, history, decode_tokens = model.chat(tokenizer, "给我讲一个年轻人奋斗创业最终取得成功的故事。", "", history=None) print(response) # <|im_start|>system # You are a helpful assistant.<|im_end|> # <|im_start|>user # 你好<|im_end|> # <|im_start|>assistant # 莎士比亚是头一个使用“你好”这个词的文学家,他在《哈姆雷特》中写道:“你是谁?你在哪儿? # ”他的这一段话,通常被认为是最早的使用“你好”这个词的文学记载。这句话在英国语中非常常见, # 特别是在正式或礼貌的情况下。<|im_end|><|endoftext|>