import torch from modelscope import snapshot_download from transformers import AutoModelForCausalLM, AutoTokenizer from transformers.generation import GenerationConfig from transformers import AutoConfig from modeling_qwen import QWenLMHeadModel from modeling_qwen import QwenRunner seed = 4321 torch.manual_seed(seed) torch.cuda.manual_seed_all(seed) model_dir = snapshot_download("qwen/Qwen-1_8B-Chat") # model_dir = "/home/colin/.cache/modelscope/hub/qwen/Qwen-1_8B-Chat" config, kwargs = AutoConfig.from_pretrained( "./", return_unused_kwargs=True, trust_remote_code=True, code_revision=None, _commit_hash=None, ) model = QWenLMHeadModel(config) print(model) tokenizer = AutoTokenizer.from_pretrained(model_dir, trust_remote_code=True) model = model.from_pretrained(model_dir).cuda() model = model.eval() # model = model.train() # control by @torch.no_grad() # 可指定不同的生成长度、top_p等相关超参 # model.generation_config = GenerationConfig.from_pretrained( # model_dir, trust_remote_code=True # ) runner = QwenRunner(model) # 第一轮对话 response, history, decode_tokens = runner.Chat(tokenizer, "东南亚国家日本的首都是什么市", "") print(decode_tokens) # <|im_start|>system # You are a helpful assistant.<|im_end|> # <|im_start|>user # 东南亚国家日本的首都是什么市<|im_end|> # <|im_start|>assistant # 日本的首都东京。<|im_end|><|endoftext|> # 第二轮对话 response, history, decode_tokens = runner.Chat(tokenizer, "给我讲一个年轻人奋斗创业最终取得成功的故事。", "") print(decode_tokens) if decode_tokens.split("\n")[-2] != """这个故事告诉我们,只要我们有决心和毅力,就一定能够克服困难,实现我们的梦想。<|im_end|>""": raise ()