Refine model of qwen and add runner.
This commit is contained in:
parent
7c047f0b32
commit
9d28280cb1
|
@ -1,5 +0,0 @@
|
||||||
{
|
|
||||||
"framework": "pytorch",
|
|
||||||
"task": "chat",
|
|
||||||
"allow_remote": true
|
|
||||||
}
|
|
|
@ -1,55 +0,0 @@
|
||||||
from torch.utils import cpp_extension
|
|
||||||
import pathlib
|
|
||||||
import os
|
|
||||||
import subprocess
|
|
||||||
|
|
||||||
def _get_cuda_bare_metal_version(cuda_dir):
|
|
||||||
raw_output = subprocess.check_output([cuda_dir + "/bin/nvcc", "-V"],
|
|
||||||
universal_newlines=True)
|
|
||||||
output = raw_output.split()
|
|
||||||
release_idx = output.index("release") + 1
|
|
||||||
release = output[release_idx].split(".")
|
|
||||||
bare_metal_major = release[0]
|
|
||||||
bare_metal_minor = release[1][0]
|
|
||||||
|
|
||||||
return raw_output, bare_metal_major, bare_metal_minor
|
|
||||||
|
|
||||||
def _create_build_dir(buildpath):
|
|
||||||
try:
|
|
||||||
os.mkdir(buildpath)
|
|
||||||
except OSError:
|
|
||||||
if not os.path.isdir(buildpath):
|
|
||||||
print(f"Creation of the build directory {buildpath} failed")
|
|
||||||
|
|
||||||
# Check if cuda 11 is installed for compute capability 8.0
|
|
||||||
cc_flag = []
|
|
||||||
_, bare_metal_major, bare_metal_minor = _get_cuda_bare_metal_version(cpp_extension.CUDA_HOME)
|
|
||||||
if int(bare_metal_major) >= 11:
|
|
||||||
cc_flag.append('-gencode')
|
|
||||||
cc_flag.append('arch=compute_80,code=sm_80')
|
|
||||||
if int(bare_metal_minor) >= 7:
|
|
||||||
cc_flag.append('-gencode')
|
|
||||||
cc_flag.append('arch=compute_90,code=sm_90')
|
|
||||||
|
|
||||||
# Build path
|
|
||||||
srcpath = pathlib.Path(__file__).parent.absolute()
|
|
||||||
buildpath = srcpath / 'build'
|
|
||||||
_create_build_dir(buildpath)
|
|
||||||
|
|
||||||
def _cpp_extention_load_helper(name, sources, extra_cuda_flags):
|
|
||||||
return cpp_extension.load(
|
|
||||||
name=name,
|
|
||||||
sources=sources,
|
|
||||||
build_directory=buildpath,
|
|
||||||
extra_cflags=['-O3', ],
|
|
||||||
extra_cuda_cflags=['-O3',
|
|
||||||
'-gencode', 'arch=compute_70,code=sm_70',
|
|
||||||
'--use_fast_math'] + extra_cuda_flags + cc_flag,
|
|
||||||
verbose=1
|
|
||||||
)
|
|
||||||
|
|
||||||
extra_flags = []
|
|
||||||
|
|
||||||
cache_autogptq_cuda_256_sources = ["./cache_autogptq_cuda_256.cpp",
|
|
||||||
"./cache_autogptq_cuda_kernel_256.cu"]
|
|
||||||
cache_autogptq_cuda_256 = _cpp_extention_load_helper("cache_autogptq_cuda_256", cache_autogptq_cuda_256_sources, extra_flags)
|
|
|
@ -5,6 +5,7 @@ from transformers.generation import GenerationConfig
|
||||||
from transformers import AutoConfig
|
from transformers import AutoConfig
|
||||||
|
|
||||||
from modeling_qwen import QWenLMHeadModel
|
from modeling_qwen import QWenLMHeadModel
|
||||||
|
from modeling_qwen import QwenRunner
|
||||||
|
|
||||||
seed = 4321
|
seed = 4321
|
||||||
torch.manual_seed(seed)
|
torch.manual_seed(seed)
|
||||||
|
@ -35,8 +36,10 @@ model = model.eval()
|
||||||
# model_dir, trust_remote_code=True
|
# model_dir, trust_remote_code=True
|
||||||
# )
|
# )
|
||||||
|
|
||||||
|
runner = QwenRunner(model)
|
||||||
|
|
||||||
# 第一轮对话
|
# 第一轮对话
|
||||||
response, history, decode_tokens = model.chat(tokenizer, "东南亚国家日本的首都是什么市", "", history=None)
|
response, history, decode_tokens = runner.Chat(tokenizer, "东南亚国家日本的首都是什么市", "")
|
||||||
print(decode_tokens)
|
print(decode_tokens)
|
||||||
# <|im_start|>system
|
# <|im_start|>system
|
||||||
# You are a helpful assistant.<|im_end|>
|
# You are a helpful assistant.<|im_end|>
|
||||||
|
@ -46,7 +49,8 @@ print(decode_tokens)
|
||||||
# 日本的首都东京。<|im_end|><|endoftext|>
|
# 日本的首都东京。<|im_end|><|endoftext|>
|
||||||
|
|
||||||
# 第二轮对话
|
# 第二轮对话
|
||||||
response, history, decode_tokens = model.chat(tokenizer, "给我讲一个年轻人奋斗创业最终取得成功的故事。", "", history=None)
|
|
||||||
|
response, history, decode_tokens = runner.Chat(tokenizer, "给我讲一个年轻人奋斗创业最终取得成功的故事。", "")
|
||||||
print(decode_tokens)
|
print(decode_tokens)
|
||||||
|
|
||||||
if decode_tokens.split("\n")[-2] != """这个故事告诉我们,只要我们有决心和毅力,就一定能够克服困难,实现我们的梦想。<|im_end|>""":
|
if decode_tokens.split("\n")[-2] != """这个故事告诉我们,只要我们有决心和毅力,就一定能够克服困难,实现我们的梦想。<|im_end|>""":
|
||||||
|
|
403
qwen/finetune.py
403
qwen/finetune.py
|
@ -1,403 +0,0 @@
|
||||||
# This code is based on the revised code from fastchat based on tatsu-lab/stanford_alpaca.
|
|
||||||
|
|
||||||
|
|
||||||
from dataclasses import dataclass, field
|
|
||||||
import json
|
|
||||||
import math
|
|
||||||
import logging
|
|
||||||
import os
|
|
||||||
from typing import Dict, Optional, List
|
|
||||||
import torch
|
|
||||||
from torch.utils.data import Dataset
|
|
||||||
from deepspeed import zero
|
|
||||||
from deepspeed.runtime.zero.partition_parameters import ZeroParamStatus
|
|
||||||
import transformers
|
|
||||||
from transformers import Trainer, GPTQConfig, deepspeed
|
|
||||||
from transformers.trainer_pt_utils import LabelSmoother
|
|
||||||
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
|
|
||||||
from accelerate.utils import DistributedType
|
|
||||||
from modelscope import snapshot_download
|
|
||||||
|
|
||||||
from modeling_qwen import QWenLMHeadModel
|
|
||||||
|
|
||||||
|
|
||||||
IGNORE_TOKEN_ID = LabelSmoother.ignore_index
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class ModelArguments:
|
|
||||||
model_name_or_path: Optional[str] = field(default="qwen/Qwen-1_8B-Chat")
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class DataArguments:
|
|
||||||
data_path: str = field(
|
|
||||||
default=None, metadata={"help": "Path to the training data."}
|
|
||||||
)
|
|
||||||
eval_data_path: str = field(
|
|
||||||
default=None, metadata={"help": "Path to the evaluation data."}
|
|
||||||
)
|
|
||||||
lazy_preprocess: bool = False
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class TrainingArguments(transformers.TrainingArguments):
|
|
||||||
cache_dir: Optional[str] = field(default=None)
|
|
||||||
optim: str = field(default="adamw_torch")
|
|
||||||
model_max_length: int = field(
|
|
||||||
default=8192,
|
|
||||||
metadata={
|
|
||||||
"help": "Maximum sequence length. Sequences will be right padded (and possibly truncated)."
|
|
||||||
},
|
|
||||||
)
|
|
||||||
use_lora: bool = False
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class LoraArguments:
|
|
||||||
lora_r: int = 64
|
|
||||||
lora_alpha: int = 16
|
|
||||||
lora_dropout: float = 0.05
|
|
||||||
lora_target_modules: List[str] = field(
|
|
||||||
default_factory=lambda: ["c_attn", "c_proj", "w1", "w2"]
|
|
||||||
)
|
|
||||||
lora_weight_path: str = ""
|
|
||||||
lora_bias: str = "none"
|
|
||||||
q_lora: bool = False
|
|
||||||
|
|
||||||
|
|
||||||
def maybe_zero_3(param):
|
|
||||||
if hasattr(param, "ds_id"):
|
|
||||||
assert param.ds_status == ZeroParamStatus.NOT_AVAILABLE
|
|
||||||
with zero.GatheredParameters([param]):
|
|
||||||
param = param.data.detach().cpu().clone()
|
|
||||||
else:
|
|
||||||
param = param.detach().cpu().clone()
|
|
||||||
return param
|
|
||||||
|
|
||||||
|
|
||||||
# Borrowed from peft.utils.get_peft_model_state_dict
|
|
||||||
def get_peft_state_maybe_zero_3(named_params, bias):
|
|
||||||
if bias == "none":
|
|
||||||
to_return = {k: t for k, t in named_params if "lora_" in k}
|
|
||||||
elif bias == "all":
|
|
||||||
to_return = {k: t for k, t in named_params if "lora_" in k or "bias" in k}
|
|
||||||
elif bias == "lora_only":
|
|
||||||
to_return = {}
|
|
||||||
maybe_lora_bias = {}
|
|
||||||
lora_bias_names = set()
|
|
||||||
for k, t in named_params:
|
|
||||||
if "lora_" in k:
|
|
||||||
to_return[k] = t
|
|
||||||
bias_name = k.split("lora_")[0] + "bias"
|
|
||||||
lora_bias_names.add(bias_name)
|
|
||||||
elif "bias" in k:
|
|
||||||
maybe_lora_bias[k] = t
|
|
||||||
for k, t in maybe_lora_bias:
|
|
||||||
if bias_name in lora_bias_names:
|
|
||||||
to_return[bias_name] = t
|
|
||||||
else:
|
|
||||||
raise NotImplementedError
|
|
||||||
to_return = {k: maybe_zero_3(v) for k, v in to_return.items()}
|
|
||||||
return to_return
|
|
||||||
|
|
||||||
|
|
||||||
local_rank = None
|
|
||||||
|
|
||||||
|
|
||||||
def rank0_print(*args):
|
|
||||||
if local_rank == 0:
|
|
||||||
print(*args)
|
|
||||||
|
|
||||||
|
|
||||||
def safe_save_model_for_hf_trainer(
|
|
||||||
trainer: transformers.Trainer, output_dir: str, bias="none"
|
|
||||||
):
|
|
||||||
"""Collects the state dict and dump to disk."""
|
|
||||||
# check if zero3 mode enabled
|
|
||||||
if deepspeed.is_deepspeed_zero3_enabled():
|
|
||||||
state_dict = trainer.model_wrapped._zero3_consolidated_16bit_state_dict()
|
|
||||||
else:
|
|
||||||
if trainer.args.use_lora:
|
|
||||||
state_dict = get_peft_state_maybe_zero_3(
|
|
||||||
trainer.model.named_parameters(), bias
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
state_dict = trainer.model.state_dict()
|
|
||||||
if trainer.args.should_save and trainer.args.local_rank == 0:
|
|
||||||
trainer._save(output_dir, state_dict=state_dict)
|
|
||||||
|
|
||||||
|
|
||||||
def preprocess(
|
|
||||||
sources,
|
|
||||||
tokenizer: transformers.PreTrainedTokenizer,
|
|
||||||
max_len: int,
|
|
||||||
system_message: str = "You are a helpful assistant.",
|
|
||||||
) -> Dict:
|
|
||||||
roles = {"user": "<|im_start|>user", "assistant": "<|im_start|>assistant"}
|
|
||||||
|
|
||||||
im_start = tokenizer.im_start_id
|
|
||||||
im_end = tokenizer.im_end_id
|
|
||||||
nl_tokens = tokenizer("\n").input_ids
|
|
||||||
_system = tokenizer("system").input_ids + nl_tokens
|
|
||||||
_user = tokenizer("user").input_ids + nl_tokens
|
|
||||||
_assistant = tokenizer("assistant").input_ids + nl_tokens
|
|
||||||
|
|
||||||
# Apply prompt templates
|
|
||||||
input_ids, targets = [], []
|
|
||||||
for i, source in enumerate(sources):
|
|
||||||
if roles[source[0]["from"]] != roles["user"]:
|
|
||||||
source = source[1:]
|
|
||||||
|
|
||||||
input_id, target = [], []
|
|
||||||
system = (
|
|
||||||
[im_start]
|
|
||||||
+ _system
|
|
||||||
+ tokenizer(system_message).input_ids
|
|
||||||
+ [im_end]
|
|
||||||
+ nl_tokens
|
|
||||||
)
|
|
||||||
input_id += system
|
|
||||||
target += (
|
|
||||||
[im_start] + [IGNORE_TOKEN_ID] * (len(system) - 3) + [im_end] + nl_tokens
|
|
||||||
)
|
|
||||||
assert len(input_id) == len(target)
|
|
||||||
for j, sentence in enumerate(source):
|
|
||||||
role = roles[sentence["from"]]
|
|
||||||
_input_id = (
|
|
||||||
tokenizer(role).input_ids
|
|
||||||
+ nl_tokens
|
|
||||||
+ tokenizer(sentence["value"]).input_ids
|
|
||||||
+ [im_end]
|
|
||||||
+ nl_tokens
|
|
||||||
)
|
|
||||||
input_id += _input_id
|
|
||||||
if role == "<|im_start|>user":
|
|
||||||
_target = (
|
|
||||||
[im_start]
|
|
||||||
+ [IGNORE_TOKEN_ID] * (len(_input_id) - 3)
|
|
||||||
+ [im_end]
|
|
||||||
+ nl_tokens
|
|
||||||
)
|
|
||||||
elif role == "<|im_start|>assistant":
|
|
||||||
_target = (
|
|
||||||
[im_start]
|
|
||||||
+ [IGNORE_TOKEN_ID] * len(tokenizer(role).input_ids)
|
|
||||||
+ _input_id[len(tokenizer(role).input_ids) + 1 : -2]
|
|
||||||
+ [im_end]
|
|
||||||
+ nl_tokens
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
raise NotImplementedError
|
|
||||||
target += _target
|
|
||||||
assert len(input_id) == len(target)
|
|
||||||
input_id += [tokenizer.pad_token_id] * (max_len - len(input_id))
|
|
||||||
target += [IGNORE_TOKEN_ID] * (max_len - len(target))
|
|
||||||
input_ids.append(input_id[:max_len])
|
|
||||||
targets.append(target[:max_len])
|
|
||||||
input_ids = torch.tensor(input_ids, dtype=torch.int)
|
|
||||||
targets = torch.tensor(targets, dtype=torch.int)
|
|
||||||
|
|
||||||
return dict(
|
|
||||||
input_ids=input_ids,
|
|
||||||
labels=targets,
|
|
||||||
attention_mask=input_ids.ne(tokenizer.pad_token_id),
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class SupervisedDataset(Dataset):
|
|
||||||
"""Dataset for supervised fine-tuning."""
|
|
||||||
|
|
||||||
def __init__(
|
|
||||||
self, raw_data, tokenizer: transformers.PreTrainedTokenizer, max_len: int
|
|
||||||
):
|
|
||||||
super(SupervisedDataset, self).__init__()
|
|
||||||
|
|
||||||
rank0_print("Formatting inputs...")
|
|
||||||
sources = [example["conversations"] for example in raw_data]
|
|
||||||
data_dict = preprocess(sources, tokenizer, max_len)
|
|
||||||
|
|
||||||
self.input_ids = data_dict["input_ids"]
|
|
||||||
self.labels = data_dict["labels"]
|
|
||||||
self.attention_mask = data_dict["attention_mask"]
|
|
||||||
|
|
||||||
def __len__(self):
|
|
||||||
return len(self.input_ids)
|
|
||||||
|
|
||||||
def __getitem__(self, i) -> Dict[str, torch.Tensor]:
|
|
||||||
return dict(
|
|
||||||
input_ids=self.input_ids[i],
|
|
||||||
labels=self.labels[i],
|
|
||||||
attention_mask=self.attention_mask[i],
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class LazySupervisedDataset(Dataset):
|
|
||||||
"""Dataset for supervised fine-tuning."""
|
|
||||||
|
|
||||||
def __init__(
|
|
||||||
self, raw_data, tokenizer: transformers.PreTrainedTokenizer, max_len: int
|
|
||||||
):
|
|
||||||
super(LazySupervisedDataset, self).__init__()
|
|
||||||
self.tokenizer = tokenizer
|
|
||||||
self.max_len = max_len
|
|
||||||
|
|
||||||
rank0_print("Formatting inputs...Skip in lazy mode")
|
|
||||||
self.tokenizer = tokenizer
|
|
||||||
self.raw_data = raw_data
|
|
||||||
self.cached_data_dict = {}
|
|
||||||
|
|
||||||
def __len__(self):
|
|
||||||
return len(self.raw_data)
|
|
||||||
|
|
||||||
def __getitem__(self, i) -> Dict[str, torch.Tensor]:
|
|
||||||
if i in self.cached_data_dict:
|
|
||||||
return self.cached_data_dict[i]
|
|
||||||
|
|
||||||
ret = preprocess(
|
|
||||||
[self.raw_data[i]["conversations"]], self.tokenizer, self.max_len
|
|
||||||
)
|
|
||||||
ret = dict(
|
|
||||||
input_ids=ret["input_ids"][0],
|
|
||||||
labels=ret["labels"][0],
|
|
||||||
attention_mask=ret["attention_mask"][0],
|
|
||||||
)
|
|
||||||
self.cached_data_dict[i] = ret
|
|
||||||
|
|
||||||
return ret
|
|
||||||
|
|
||||||
|
|
||||||
def make_supervised_data_module(
|
|
||||||
tokenizer: transformers.PreTrainedTokenizer,
|
|
||||||
data_args,
|
|
||||||
max_len,
|
|
||||||
) -> Dict:
|
|
||||||
"""Make dataset and collator for supervised fine-tuning."""
|
|
||||||
dataset_cls = (
|
|
||||||
LazySupervisedDataset if data_args.lazy_preprocess else SupervisedDataset
|
|
||||||
)
|
|
||||||
rank0_print("Loading data...")
|
|
||||||
|
|
||||||
train_json = json.load(open(data_args.data_path, "r"))
|
|
||||||
train_dataset = dataset_cls(train_json, tokenizer=tokenizer, max_len=max_len)
|
|
||||||
|
|
||||||
if data_args.eval_data_path:
|
|
||||||
eval_json = json.load(open(data_args.eval_data_path, "r"))
|
|
||||||
eval_dataset = dataset_cls(eval_json, tokenizer=tokenizer, max_len=max_len)
|
|
||||||
else:
|
|
||||||
eval_dataset = None
|
|
||||||
|
|
||||||
return dict(train_dataset=train_dataset, eval_dataset=eval_dataset)
|
|
||||||
|
|
||||||
|
|
||||||
def train():
|
|
||||||
global local_rank
|
|
||||||
|
|
||||||
parser = transformers.HfArgumentParser(
|
|
||||||
(ModelArguments, DataArguments, TrainingArguments, LoraArguments)
|
|
||||||
)
|
|
||||||
(
|
|
||||||
model_args,
|
|
||||||
data_args,
|
|
||||||
training_args,
|
|
||||||
lora_args,
|
|
||||||
) = parser.parse_args_into_dataclasses()
|
|
||||||
|
|
||||||
# This serves for single-gpu qlora.
|
|
||||||
if (
|
|
||||||
getattr(training_args, "deepspeed", None)
|
|
||||||
and int(os.environ.get("WORLD_SIZE", 1)) == 1
|
|
||||||
):
|
|
||||||
training_args.distributed_state.distributed_type = DistributedType.DEEPSPEED
|
|
||||||
|
|
||||||
local_rank = training_args.local_rank
|
|
||||||
|
|
||||||
device_map = "auto"
|
|
||||||
world_size = int(os.environ.get("WORLD_SIZE", 1))
|
|
||||||
ddp = world_size != 1
|
|
||||||
if lora_args.q_lora:
|
|
||||||
device_map = {"": int(os.environ.get("LOCAL_RANK") or 0)} if ddp else "auto"
|
|
||||||
if len(training_args.fsdp) > 0 or deepspeed.is_deepspeed_zero3_enabled():
|
|
||||||
logging.warning("FSDP or ZeRO3 are incompatible with QLoRA.")
|
|
||||||
|
|
||||||
model_dir = snapshot_download(model_args.model_name_or_path)
|
|
||||||
|
|
||||||
# Set RoPE scaling factor
|
|
||||||
config = transformers.AutoConfig.from_pretrained(
|
|
||||||
model_dir,
|
|
||||||
cache_dir=training_args.cache_dir,
|
|
||||||
trust_remote_code=True,
|
|
||||||
)
|
|
||||||
config.use_cache = False
|
|
||||||
|
|
||||||
# Load model and tokenizer
|
|
||||||
|
|
||||||
model = QWenLMHeadModel(config)
|
|
||||||
model = model.from_pretrained(
|
|
||||||
model_dir,
|
|
||||||
config=config,
|
|
||||||
cache_dir=training_args.cache_dir,
|
|
||||||
device_map=device_map,
|
|
||||||
trust_remote_code=True,
|
|
||||||
quantization_config=GPTQConfig(bits=4, disable_exllama=True)
|
|
||||||
if training_args.use_lora and lora_args.q_lora
|
|
||||||
else None,
|
|
||||||
)
|
|
||||||
|
|
||||||
tokenizer = transformers.AutoTokenizer.from_pretrained(
|
|
||||||
model_dir,
|
|
||||||
cache_dir=training_args.cache_dir,
|
|
||||||
model_max_length=training_args.model_max_length,
|
|
||||||
padding_side="right",
|
|
||||||
use_fast=False,
|
|
||||||
trust_remote_code=True,
|
|
||||||
)
|
|
||||||
tokenizer.pad_token_id = tokenizer.eod_id
|
|
||||||
|
|
||||||
if training_args.use_lora:
|
|
||||||
if lora_args.q_lora or "chat" in model_dir.lower():
|
|
||||||
modules_to_save = None
|
|
||||||
else:
|
|
||||||
modules_to_save = ["wte", "lm_head"]
|
|
||||||
lora_config = LoraConfig(
|
|
||||||
r=lora_args.lora_r,
|
|
||||||
lora_alpha=lora_args.lora_alpha,
|
|
||||||
target_modules=lora_args.lora_target_modules,
|
|
||||||
lora_dropout=lora_args.lora_dropout,
|
|
||||||
bias=lora_args.lora_bias,
|
|
||||||
task_type="CAUSAL_LM",
|
|
||||||
modules_to_save=modules_to_save, # This argument serves for adding new tokens.
|
|
||||||
)
|
|
||||||
if lora_args.q_lora:
|
|
||||||
model = prepare_model_for_kbit_training(
|
|
||||||
model, use_gradient_checkpointing=training_args.gradient_checkpointing
|
|
||||||
)
|
|
||||||
|
|
||||||
model = get_peft_model(model, lora_config)
|
|
||||||
|
|
||||||
# Print peft trainable params
|
|
||||||
model.print_trainable_parameters()
|
|
||||||
|
|
||||||
if training_args.gradient_checkpointing:
|
|
||||||
model.enable_input_require_grads()
|
|
||||||
|
|
||||||
# Load data
|
|
||||||
data_module = make_supervised_data_module(
|
|
||||||
tokenizer=tokenizer, data_args=data_args, max_len=training_args.model_max_length
|
|
||||||
)
|
|
||||||
|
|
||||||
# Start trainner
|
|
||||||
trainer = Trainer(
|
|
||||||
model=model, tokenizer=tokenizer, args=training_args, **data_module
|
|
||||||
)
|
|
||||||
|
|
||||||
trainer.train()
|
|
||||||
trainer.save_state()
|
|
||||||
|
|
||||||
safe_save_model_for_hf_trainer(
|
|
||||||
trainer=trainer, output_dir=training_args.output_dir, bias=lora_args.lora_bias
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
train()
|
|
|
@ -1,65 +0,0 @@
|
||||||
#!/bin/bash
|
|
||||||
export CUDA_DEVICE_MAX_CONNECTIONS=1
|
|
||||||
|
|
||||||
MODEL="qwen/Qwen-1_8B-Chat" # Set the path if you do not want to load from huggingface directly
|
|
||||||
# ATTENTION: specify the path to your training data, which should be a json file consisting of a list of conversations.
|
|
||||||
# See the section for finetuning in README for more information.
|
|
||||||
DATA="data.json"
|
|
||||||
|
|
||||||
function usage() {
|
|
||||||
echo '
|
|
||||||
Usage: bash finetune/finetune_lora_single_gpu.sh [-m MODEL_PATH] [-d DATA_PATH]
|
|
||||||
'
|
|
||||||
}
|
|
||||||
|
|
||||||
while [[ "$1" != "" ]]; do
|
|
||||||
case $1 in
|
|
||||||
-m | --model )
|
|
||||||
shift
|
|
||||||
MODEL=$1
|
|
||||||
;;
|
|
||||||
-d | --data )
|
|
||||||
shift
|
|
||||||
DATA=$1
|
|
||||||
;;
|
|
||||||
-h | --help )
|
|
||||||
usage
|
|
||||||
exit 0
|
|
||||||
;;
|
|
||||||
* )
|
|
||||||
echo "Unknown argument ${1}"
|
|
||||||
exit 1
|
|
||||||
;;
|
|
||||||
esac
|
|
||||||
shift
|
|
||||||
done
|
|
||||||
|
|
||||||
export CUDA_VISIBLE_DEVICES=0
|
|
||||||
|
|
||||||
python finetune.py \
|
|
||||||
--model_name_or_path $MODEL \
|
|
||||||
--data_path $DATA \
|
|
||||||
--bf16 False \
|
|
||||||
--output_dir output_qwen \
|
|
||||||
--num_train_epochs 5 \
|
|
||||||
--per_device_train_batch_size 2 \
|
|
||||||
--per_device_eval_batch_size 1 \
|
|
||||||
--gradient_accumulation_steps 8 \
|
|
||||||
--evaluation_strategy "no" \
|
|
||||||
--save_strategy "steps" \
|
|
||||||
--save_steps 1000 \
|
|
||||||
--save_total_limit 10 \
|
|
||||||
--learning_rate 3e-4 \
|
|
||||||
--weight_decay 0.1 \
|
|
||||||
--adam_beta2 0.95 \
|
|
||||||
--warmup_ratio 0.01 \
|
|
||||||
--lr_scheduler_type "cosine" \
|
|
||||||
--logging_steps 1 \
|
|
||||||
--report_to "none" \
|
|
||||||
--model_max_length 512 \
|
|
||||||
--lazy_preprocess True \
|
|
||||||
--gradient_checkpointing \
|
|
||||||
--use_lora
|
|
||||||
|
|
||||||
# If you use fp16 instead of bf16, you should use deepspeed
|
|
||||||
# --fp16 True --deepspeed finetune/ds_config_zero2.json
|
|
|
@ -16,10 +16,7 @@ from torch import nn
|
||||||
from safetensors.torch import load_file as safe_load_file
|
from safetensors.torch import load_file as safe_load_file
|
||||||
from safetensors.torch import save_file as safe_save_file
|
from safetensors.torch import save_file as safe_save_file
|
||||||
|
|
||||||
from transformers.generation.utils import GenerateOutput
|
|
||||||
from configuration_qwen import QWenConfig
|
|
||||||
from qwen_generation_utils import (
|
from qwen_generation_utils import (
|
||||||
HistoryType,
|
|
||||||
make_context,
|
make_context,
|
||||||
decode_tokens,
|
decode_tokens,
|
||||||
)
|
)
|
||||||
|
@ -137,7 +134,6 @@ class QWenLMHeadModel(nn.Module):
|
||||||
def __init__(self, config):
|
def __init__(self, config):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self.config = config
|
self.config = config
|
||||||
|
|
||||||
self.transformer = QWenModel(config)
|
self.transformer = QWenModel(config)
|
||||||
self.lm_head = nn.Linear(config.hidden_size, config.vocab_size, bias=False)
|
self.lm_head = nn.Linear(config.hidden_size, config.vocab_size, bias=False)
|
||||||
|
|
||||||
|
@ -186,65 +182,42 @@ class QWenLMHeadModel(nn.Module):
|
||||||
print(f"All model checkpoint weights were used when initializing {cls.__class__.__name__}.\n")
|
print(f"All model checkpoint weights were used when initializing {cls.__class__.__name__}.\n")
|
||||||
return cls
|
return cls
|
||||||
|
|
||||||
|
|
||||||
|
class QwenRunner:
|
||||||
|
def __init__(self, qwen):
|
||||||
|
self.qwen = qwen
|
||||||
|
|
||||||
@torch.no_grad()
|
@torch.no_grad()
|
||||||
def chat(
|
def Chat(
|
||||||
self,
|
self,
|
||||||
tokenizer,
|
tokenizer,
|
||||||
query: str,
|
query: str,
|
||||||
query_assistant: str,
|
query_assistant: str,
|
||||||
history: Optional[HistoryType],
|
|
||||||
system: str = "You are a helpful assistant.",
|
system: str = "You are a helpful assistant.",
|
||||||
**kwargs,
|
history=[],
|
||||||
) -> Tuple[str, HistoryType]:
|
):
|
||||||
if history is None:
|
qwen = self.qwen
|
||||||
history = []
|
|
||||||
else:
|
|
||||||
history = copy.deepcopy(history)
|
history = copy.deepcopy(history)
|
||||||
|
|
||||||
raw_text, context_tokens = make_context(tokenizer, query, query_assistant, history=history, system=system)
|
raw_text, context_tokens = make_context(tokenizer, query, query_assistant, history=history, system=system)
|
||||||
input_ids = torch.tensor([context_tokens]).to(next(self.parameters()).device)
|
input_ids = torch.tensor([context_tokens]).to(next(qwen.parameters()).device)
|
||||||
outputs = self.generate(
|
eos_token_id_tensor = torch.tensor([qwen.config.eos_token_id]).to(input_ids.device)
|
||||||
input_ids,
|
pad_token_id = qwen.config.pad_token_id
|
||||||
tokenizer=tokenizer,
|
|
||||||
**kwargs,
|
|
||||||
)
|
|
||||||
decoded, response, end_reason = decode_tokens(
|
|
||||||
outputs[0],
|
|
||||||
tokenizer,
|
|
||||||
raw_text_len=len(raw_text),
|
|
||||||
context_length=len(context_tokens),
|
|
||||||
errors="replace",
|
|
||||||
)
|
|
||||||
history.append((query, response))
|
|
||||||
return response, history, decoded
|
|
||||||
|
|
||||||
def generate(
|
|
||||||
self,
|
|
||||||
input_ids: Optional[torch.Tensor] = None,
|
|
||||||
tokenizer=None,
|
|
||||||
) -> Union[GenerateOutput, torch.LongTensor]:
|
|
||||||
pad_token_id = self.config.pad_token_id
|
|
||||||
eos_token_id_tensor = torch.tensor([self.config.eos_token_id]).to(input_ids.device)
|
|
||||||
|
|
||||||
# keep track of which sequences are already finished
|
|
||||||
unfinished_sequences = torch.ones(input_ids.shape[0], dtype=torch.long, device=input_ids.device)
|
unfinished_sequences = torch.ones(input_ids.shape[0], dtype=torch.long, device=input_ids.device)
|
||||||
|
|
||||||
this_peer_finished = False
|
this_peer_finished = False
|
||||||
# auto-regressive generation
|
|
||||||
while True:
|
while True:
|
||||||
# forward pass to get next token
|
outputs = self.forwardQWen(input_ids)
|
||||||
outputs = forwardQWen(self, input_ids)
|
|
||||||
next_token_scores = outputs[:, -1, :]
|
next_token_scores = outputs[:, -1, :]
|
||||||
|
|
||||||
# repetition_penalty
|
# repetition_penalty
|
||||||
penalty = self.config.repetition_penalty
|
penalty = qwen.config.repetition_penalty
|
||||||
score = torch.gather(next_token_scores, 1, input_ids)
|
score = torch.gather(next_token_scores, 1, input_ids)
|
||||||
# if score < 0 then repetition penalty has to be multiplied to reduce the token probabilities
|
# if score < 0 then repetition penalty has to be multiplied to reduce the token probabilities
|
||||||
score = torch.where(score < 0, score * penalty, score / penalty)
|
score = torch.where(score < 0, score * penalty, score / penalty)
|
||||||
next_token_scores = next_token_scores.scatter_(1, input_ids, score)
|
next_token_scores = next_token_scores.scatter_(1, input_ids, score)
|
||||||
|
|
||||||
# top_p
|
# top_p
|
||||||
top_p = self.config.top_p
|
top_p = qwen.config.top_p
|
||||||
filter_value = -float("Inf")
|
filter_value = -float("Inf")
|
||||||
min_tokens_to_keep = 1
|
min_tokens_to_keep = 1
|
||||||
sorted_logits, sorted_indices = torch.sort(next_token_scores, descending=False)
|
sorted_logits, sorted_indices = torch.sort(next_token_scores, descending=False)
|
||||||
|
@ -262,37 +235,33 @@ class QWenLMHeadModel(nn.Module):
|
||||||
next_tokens = torch.multinomial(probs, num_samples=1).squeeze(1)
|
next_tokens = torch.multinomial(probs, num_samples=1).squeeze(1)
|
||||||
|
|
||||||
next_tokens = next_tokens * unfinished_sequences + pad_token_id * (1 - unfinished_sequences)
|
next_tokens = next_tokens * unfinished_sequences + pad_token_id * (1 - unfinished_sequences)
|
||||||
|
|
||||||
# update generated ids, model inputs, and length for next step
|
|
||||||
input_ids = torch.cat([input_ids, next_tokens[:, None]], dim=-1)
|
input_ids = torch.cat([input_ids, next_tokens[:, None]], dim=-1)
|
||||||
|
|
||||||
unfinished_sequences = unfinished_sequences.mul(
|
unfinished_sequences = unfinished_sequences.mul(
|
||||||
next_tokens.tile(eos_token_id_tensor.shape[0], 1).ne(eos_token_id_tensor.unsqueeze(1)).prod(dim=0)
|
next_tokens.tile(eos_token_id_tensor.shape[0], 1).ne(eos_token_id_tensor.unsqueeze(1)).prod(dim=0)
|
||||||
)
|
)
|
||||||
|
|
||||||
# decoded, response, end_reason = decode_tokens(
|
|
||||||
# next_tokens,
|
|
||||||
# tokenizer,
|
|
||||||
# raw_text_len=0,
|
|
||||||
# context_length=0,
|
|
||||||
# errors="replace",
|
|
||||||
# )
|
|
||||||
# print(decoded)
|
|
||||||
|
|
||||||
# stop when each sentence is finished
|
|
||||||
if unfinished_sequences.max() == 0:
|
if unfinished_sequences.max() == 0:
|
||||||
this_peer_finished = True
|
this_peer_finished = True
|
||||||
|
|
||||||
if this_peer_finished:
|
if this_peer_finished:
|
||||||
break
|
break
|
||||||
return input_ids
|
|
||||||
|
|
||||||
|
decoded, response, end_reason = decode_tokens(
|
||||||
|
input_ids[0],
|
||||||
|
tokenizer,
|
||||||
|
raw_text_len=len(raw_text),
|
||||||
|
context_length=len(context_tokens),
|
||||||
|
errors="replace",
|
||||||
|
)
|
||||||
|
history.append((query, response))
|
||||||
|
return response, history, decoded
|
||||||
|
|
||||||
def forwardAttention(
|
def forwardAttention(
|
||||||
|
self,
|
||||||
attention,
|
attention,
|
||||||
hidden_states: Optional[Tuple[torch.FloatTensor]],
|
hidden_states: Optional[Tuple[torch.FloatTensor]],
|
||||||
rotary_pos_emb_list: Optional[List[List[torch.Tensor]]] = None,
|
rotary_pos_emb_list: Optional[List[List[torch.Tensor]]] = None,
|
||||||
):
|
):
|
||||||
def apply_rotary_pos_emb(t, freqs):
|
def apply_rotary_pos_emb(t, freqs):
|
||||||
def _rotate_half(x):
|
def _rotate_half(x):
|
||||||
x = rearrange(x, "... (j d) -> ... j d", j=2)
|
x = rearrange(x, "... (j d) -> ... j d", j=2)
|
||||||
|
@ -340,15 +309,15 @@ def forwardAttention(
|
||||||
|
|
||||||
return attn_output
|
return attn_output
|
||||||
|
|
||||||
|
def forwardQWenBlock(
|
||||||
def forwardQWenBlock(
|
self,
|
||||||
block,
|
block,
|
||||||
hidden_states: Optional[Tuple[torch.FloatTensor]],
|
hidden_states: Optional[Tuple[torch.FloatTensor]],
|
||||||
rotary_pos_emb_list: Optional[List[List[torch.Tensor]]] = None,
|
rotary_pos_emb_list: Optional[List[List[torch.Tensor]]] = None,
|
||||||
):
|
):
|
||||||
layernorm_output = block.ln_1(hidden_states)
|
layernorm_output = block.ln_1(hidden_states)
|
||||||
|
|
||||||
attn_outputs = forwardAttention(block.attn, layernorm_output, rotary_pos_emb_list)
|
attn_outputs = self.forwardAttention(block.attn, layernorm_output, rotary_pos_emb_list)
|
||||||
attn_output = attn_outputs[0]
|
attn_output = attn_outputs[0]
|
||||||
layernorm_input = attn_output + hidden_states
|
layernorm_input = attn_output + hidden_states
|
||||||
|
|
||||||
|
@ -361,13 +330,12 @@ def forwardQWenBlock(
|
||||||
hidden_states = layernorm_input + mlp_output
|
hidden_states = layernorm_input + mlp_output
|
||||||
return hidden_states
|
return hidden_states
|
||||||
|
|
||||||
|
def forwardQWen(
|
||||||
def forwardQWen(
|
self,
|
||||||
qwen,
|
|
||||||
input_ids: Optional[torch.LongTensor] = None,
|
input_ids: Optional[torch.LongTensor] = None,
|
||||||
labels: Optional[torch.LongTensor] = None,
|
labels: Optional[torch.LongTensor] = None,
|
||||||
):
|
):
|
||||||
transfm = qwen.transformer
|
transfm = self.qwen.transformer
|
||||||
input_shape = input_ids.size()
|
input_shape = input_ids.size()
|
||||||
input_ids = input_ids.view(-1, input_shape[-1])
|
input_ids = input_ids.view(-1, input_shape[-1])
|
||||||
hidden_states = transfm.wte(input_ids)
|
hidden_states = transfm.wte(input_ids)
|
||||||
|
@ -381,12 +349,12 @@ def forwardQWen(
|
||||||
output_shape = input_shape + (hidden_states.size(-1),)
|
output_shape = input_shape + (hidden_states.size(-1),)
|
||||||
|
|
||||||
for block in transfm.h:
|
for block in transfm.h:
|
||||||
hidden_states = forwardQWenBlock(block, hidden_states, rotary_pos_emb_list=rotary_pos_emb_list)
|
hidden_states = self.forwardQWenBlock(block, hidden_states, rotary_pos_emb_list=rotary_pos_emb_list)
|
||||||
|
|
||||||
hidden_states = transfm.ln_f(hidden_states)
|
hidden_states = transfm.ln_f(hidden_states)
|
||||||
hidden_states = hidden_states.view(output_shape)
|
hidden_states = hidden_states.view(output_shape)
|
||||||
|
|
||||||
lm_logits = qwen.lm_head(hidden_states)
|
lm_logits = self.qwen.lm_head(hidden_states)
|
||||||
|
|
||||||
loss = None
|
loss = None
|
||||||
if labels is not None:
|
if labels is not None:
|
||||||
|
|
|
@ -0,0 +1,29 @@
|
||||||
|
|
||||||
|
from abc import ABC, abstractmethod
|
||||||
|
|
||||||
|
class People(ABC):
|
||||||
|
# @abstractmethod
|
||||||
|
def walk(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def eat(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def auto(self):
|
||||||
|
self.walk()
|
||||||
|
self.eat()
|
||||||
|
|
||||||
|
class kid1(People):
|
||||||
|
def __init__(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def walk(self):
|
||||||
|
print('走路')
|
||||||
|
|
||||||
|
def eat(self):
|
||||||
|
print('吃饭')
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
k = kid1()
|
||||||
|
k.auto()
|
Loading…
Reference in New Issue