diff --git a/qwen3/qwen3-4b.py b/qwen3/qwen3-4b.py new file mode 100644 index 0000000..008d55f --- /dev/null +++ b/qwen3/qwen3-4b.py @@ -0,0 +1,26 @@ +from modelscope import AutoModelForCausalLM, AutoTokenizer + +model_name = "Qwen/Qwen3-4B-Instruct-2507-FP8" + +# load the tokenizer and the model +tokenizer = AutoTokenizer.from_pretrained(model_name) +model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype="auto", device_map="auto") + +# prepare the model input +# prompt = "Give me a short introduction to large language model." +prompt = "中国的首都在哪里?" +messages = [{"role": "user", "content": prompt}] +text = tokenizer.apply_chat_template( + messages, + tokenize=False, + add_generation_prompt=True, +) +model_inputs = tokenizer([text], return_tensors="pt").to(model.device) + +# conduct text completion +generated_ids = model.generate(**model_inputs, max_new_tokens=16384) +output_ids = generated_ids[0][len(model_inputs.input_ids[0]) :].tolist() + +content = tokenizer.decode(output_ids, skip_special_tokens=True) + +print("content:", content)