from modelscope import snapshot_download from transformers import AutoModelForCausalLM, AutoTokenizer from transformers.generation import GenerationConfig model_dir = snapshot_download("qwen/Qwen-1_8B-Chat") tokenizer = AutoTokenizer.from_pretrained(model_dir, trust_remote_code=True) model = AutoModelForCausalLM.from_pretrained( model_dir, device_map="auto", trust_remote_code=True ).eval() # 可指定不同的生成长度、top_p等相关超参 model.generation_config = GenerationConfig.from_pretrained( model_dir, trust_remote_code=True ) # 第一轮对话 response, history = model.chat(tokenizer, "你好", history=None) print(response) # 你好!很高兴为你提供帮助。 # 第二轮对话 response, history = model.chat(tokenizer, "给我讲一个年轻人奋斗创业最终取得成功的故事。", history=history) print(response)