Add qwen and refine folders.
This commit is contained in:
parent
0fa38b7815
commit
3a4e99f7e3
|
@ -1,19 +1,23 @@
|
||||||
|
import sys
|
||||||
|
|
||||||
|
sys.path.append("..")
|
||||||
|
|
||||||
import json
|
import json
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
from chatglm import ChatGLMForConditionalGeneration
|
from modeling_chatglm import ChatGLMForConditionalGeneration
|
||||||
from chatglm import ChatGLMTokenizer
|
from tokenization_chatglm import ChatGLMTokenizer
|
||||||
|
from modelscope import snapshot_download
|
||||||
|
from transformers import AutoConfig
|
||||||
|
|
||||||
from tools import show
|
from tools import show
|
||||||
|
|
||||||
from transformers import AutoConfig
|
|
||||||
|
|
||||||
seed = 4321
|
seed = 4321
|
||||||
torch.manual_seed(seed)
|
torch.manual_seed(seed)
|
||||||
torch.cuda.manual_seed_all(seed)
|
torch.cuda.manual_seed_all(seed)
|
||||||
|
|
||||||
|
pretrained_model_name_or_path = snapshot_download("ZhipuAI/chatglm3-6b")
|
||||||
|
|
||||||
pretrained_model_name_or_path = "../ZhipuAI/chatglm3-6b"
|
|
||||||
config, kwargs = AutoConfig.from_pretrained(
|
config, kwargs = AutoConfig.from_pretrained(
|
||||||
pretrained_model_name_or_path,
|
pretrained_model_name_or_path,
|
||||||
return_unused_kwargs=True,
|
return_unused_kwargs=True,
|
||||||
|
@ -24,7 +28,7 @@ config, kwargs = AutoConfig.from_pretrained(
|
||||||
glm = ChatGLMForConditionalGeneration(config)
|
glm = ChatGLMForConditionalGeneration(config)
|
||||||
|
|
||||||
|
|
||||||
tokenizer_config_file = "./chatglm/tokenizer_config.json"
|
tokenizer_config_file = "./tokenizer_config.json"
|
||||||
if tokenizer_config_file is not None:
|
if tokenizer_config_file is not None:
|
||||||
with open(tokenizer_config_file, encoding="utf-8") as tokenizer_config_handle:
|
with open(tokenizer_config_file, encoding="utf-8") as tokenizer_config_handle:
|
||||||
init_kwargs = json.load(tokenizer_config_handle)
|
init_kwargs = json.load(tokenizer_config_handle)
|
||||||
|
@ -32,7 +36,7 @@ if tokenizer_config_file is not None:
|
||||||
init_kwargs.pop("tokenizer_file", None)
|
init_kwargs.pop("tokenizer_file", None)
|
||||||
saved_init_inputs = init_kwargs.pop("init_inputs", ())
|
saved_init_inputs = init_kwargs.pop("init_inputs", ())
|
||||||
init_inputs = saved_init_inputs
|
init_inputs = saved_init_inputs
|
||||||
init_kwargs["vocab_file"] = "./chatglm/tokenizer.model"
|
init_kwargs["vocab_file"] = "./tokenizer.model"
|
||||||
init_kwargs["added_tokens_file"] = None
|
init_kwargs["added_tokens_file"] = None
|
||||||
init_kwargs["special_tokens_map_file"] = None
|
init_kwargs["special_tokens_map_file"] = None
|
||||||
init_kwargs["tokenizer_file"] = None
|
init_kwargs["tokenizer_file"] = None
|
|
@ -19,14 +19,16 @@ from safetensors.torch import storage_ptr, storage_size
|
||||||
from transformers.configuration_utils import PretrainedConfig
|
from transformers.configuration_utils import PretrainedConfig
|
||||||
from transformers.generation import GenerationConfig
|
from transformers.generation import GenerationConfig
|
||||||
|
|
||||||
from chatglm import ChatGLMConfig
|
from configuration_chatglm import ChatGLMConfig
|
||||||
from tools import show
|
from tools import show
|
||||||
|
|
||||||
|
|
||||||
class RotaryEmbedding(nn.Module):
|
class RotaryEmbedding(nn.Module):
|
||||||
def __init__(self, dim: int, original_impl=False, device=None, dtype=None):
|
def __init__(self, dim: int, original_impl=False, device=None, dtype=None):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
inv_freq = 1.0 / (10000 ** (torch.arange(0, dim, 2, device=device).to(dtype=dtype) / dim))
|
inv_freq = 1.0 / (
|
||||||
|
10000 ** (torch.arange(0, dim, 2, device=device).to(dtype=dtype) / dim)
|
||||||
|
)
|
||||||
self.register_buffer("inv_freq", inv_freq)
|
self.register_buffer("inv_freq", inv_freq)
|
||||||
self.dim = dim
|
self.dim = dim
|
||||||
self.original_impl = original_impl
|
self.original_impl = original_impl
|
||||||
|
@ -35,7 +37,13 @@ class RotaryEmbedding(nn.Module):
|
||||||
dtype = self.inv_freq.dtype
|
dtype = self.inv_freq.dtype
|
||||||
device = self.inv_freq.device
|
device = self.inv_freq.device
|
||||||
# $\Theta = {\theta_i = 10000^{\frac{2(i-1)}{d}}, i \in [1, 2, ..., \frac{d}{2}]}$
|
# $\Theta = {\theta_i = 10000^{\frac{2(i-1)}{d}}, i \in [1, 2, ..., \frac{d}{2}]}$
|
||||||
theta = 1.0 / (base ** (torch.arange(0, self.dim, 2, dtype=torch.float, device=device) / self.dim))
|
theta = 1.0 / (
|
||||||
|
base
|
||||||
|
** (
|
||||||
|
torch.arange(0, self.dim, 2, dtype=torch.float, device=device)
|
||||||
|
/ self.dim
|
||||||
|
)
|
||||||
|
)
|
||||||
# Create position indexes `[0, 1, ..., max_seq_len - 1]`
|
# Create position indexes `[0, 1, ..., max_seq_len - 1]`
|
||||||
seq_idx = torch.arange(max_seq_len, dtype=torch.float, device=device)
|
seq_idx = torch.arange(max_seq_len, dtype=torch.float, device=device)
|
||||||
# Calculate the product of position index and $\theta_i$
|
# Calculate the product of position index and $\theta_i$
|
||||||
|
@ -50,7 +58,9 @@ class RotaryEmbedding(nn.Module):
|
||||||
class RMSNorm(torch.nn.Module):
|
class RMSNorm(torch.nn.Module):
|
||||||
def __init__(self, normalized_shape, eps=1e-5, device=None, dtype=None, **kwargs):
|
def __init__(self, normalized_shape, eps=1e-5, device=None, dtype=None, **kwargs):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self.weight = torch.nn.Parameter(torch.empty(normalized_shape, device=device, dtype=dtype))
|
self.weight = torch.nn.Parameter(
|
||||||
|
torch.empty(normalized_shape, device=device, dtype=dtype)
|
||||||
|
)
|
||||||
self.eps = eps
|
self.eps = eps
|
||||||
|
|
||||||
def forward(self, hidden_states: torch.Tensor):
|
def forward(self, hidden_states: torch.Tensor):
|
||||||
|
@ -70,7 +80,9 @@ class CoreAttention(torch.nn.Module):
|
||||||
projection_size = config.kv_channels * config.num_attention_heads
|
projection_size = config.kv_channels * config.num_attention_heads
|
||||||
# Per attention head and per partition values.
|
# Per attention head and per partition values.
|
||||||
self.hidden_size_per_partition = projection_size
|
self.hidden_size_per_partition = projection_size
|
||||||
self.hidden_size_per_attention_head = projection_size // config.num_attention_heads
|
self.hidden_size_per_attention_head = (
|
||||||
|
projection_size // config.num_attention_heads
|
||||||
|
)
|
||||||
self.num_attention_heads_per_partition = config.num_attention_heads
|
self.num_attention_heads_per_partition = config.num_attention_heads
|
||||||
|
|
||||||
coeff = None
|
coeff = None
|
||||||
|
@ -82,13 +94,17 @@ class CoreAttention(torch.nn.Module):
|
||||||
self.attention_dropout = torch.nn.Dropout(config.attention_dropout)
|
self.attention_dropout = torch.nn.Dropout(config.attention_dropout)
|
||||||
|
|
||||||
def forward(self, query_layer, key_layer, value_layer):
|
def forward(self, query_layer, key_layer, value_layer):
|
||||||
query_layer, key_layer, value_layer = [k.permute(1, 2, 0, 3) for k in [query_layer, key_layer, value_layer]]
|
query_layer, key_layer, value_layer = [
|
||||||
|
k.permute(1, 2, 0, 3) for k in [query_layer, key_layer, value_layer]
|
||||||
|
]
|
||||||
if query_layer.shape[2] == key_layer.shape[2]:
|
if query_layer.shape[2] == key_layer.shape[2]:
|
||||||
context_layer = torch.nn.functional.scaled_dot_product_attention(
|
context_layer = torch.nn.functional.scaled_dot_product_attention(
|
||||||
query_layer, key_layer, value_layer, is_causal=True
|
query_layer, key_layer, value_layer, is_causal=True
|
||||||
)
|
)
|
||||||
context_layer = context_layer.permute(2, 0, 1, 3)
|
context_layer = context_layer.permute(2, 0, 1, 3)
|
||||||
new_context_layer_shape = context_layer.size()[:-2] + (self.hidden_size_per_partition,)
|
new_context_layer_shape = context_layer.size()[:-2] + (
|
||||||
|
self.hidden_size_per_partition,
|
||||||
|
)
|
||||||
context_layer = context_layer.reshape(*new_context_layer_shape)
|
context_layer = context_layer.reshape(*new_context_layer_shape)
|
||||||
return context_layer
|
return context_layer
|
||||||
|
|
||||||
|
@ -98,13 +114,16 @@ class SelfAttention(torch.nn.Module):
|
||||||
super(SelfAttention, self).__init__()
|
super(SelfAttention, self).__init__()
|
||||||
self.layer_number = max(1, layer_number)
|
self.layer_number = max(1, layer_number)
|
||||||
self.projection_size = config.kv_channels * config.num_attention_heads
|
self.projection_size = config.kv_channels * config.num_attention_heads
|
||||||
self.hidden_size_per_attention_head = self.projection_size // config.num_attention_heads
|
self.hidden_size_per_attention_head = (
|
||||||
|
self.projection_size // config.num_attention_heads
|
||||||
|
)
|
||||||
self.num_attention_heads_per_partition = config.num_attention_heads
|
self.num_attention_heads_per_partition = config.num_attention_heads
|
||||||
self.multi_query_attention = config.multi_query_attention
|
self.multi_query_attention = config.multi_query_attention
|
||||||
self.qkv_hidden_size = 3 * self.projection_size
|
self.qkv_hidden_size = 3 * self.projection_size
|
||||||
self.num_multi_query_groups_per_partition = config.multi_query_group_num
|
self.num_multi_query_groups_per_partition = config.multi_query_group_num
|
||||||
self.qkv_hidden_size = (
|
self.qkv_hidden_size = (
|
||||||
self.projection_size + 2 * self.hidden_size_per_attention_head * config.multi_query_group_num
|
self.projection_size
|
||||||
|
+ 2 * self.hidden_size_per_attention_head * config.multi_query_group_num
|
||||||
)
|
)
|
||||||
self.query_key_value = nn.Linear(
|
self.query_key_value = nn.Linear(
|
||||||
config.hidden_size,
|
config.hidden_size,
|
||||||
|
@ -144,9 +163,12 @@ class SelfAttention(torch.nn.Module):
|
||||||
|
|
||||||
(query_layer, key_layer, value_layer) = mixed_x_layer.split(
|
(query_layer, key_layer, value_layer) = mixed_x_layer.split(
|
||||||
[
|
[
|
||||||
self.num_attention_heads_per_partition * self.hidden_size_per_attention_head,
|
self.num_attention_heads_per_partition
|
||||||
self.num_multi_query_groups_per_partition * self.hidden_size_per_attention_head,
|
* self.hidden_size_per_attention_head,
|
||||||
self.num_multi_query_groups_per_partition * self.hidden_size_per_attention_head,
|
self.num_multi_query_groups_per_partition
|
||||||
|
* self.hidden_size_per_attention_head,
|
||||||
|
self.num_multi_query_groups_per_partition
|
||||||
|
* self.hidden_size_per_attention_head,
|
||||||
],
|
],
|
||||||
dim=-1,
|
dim=-1,
|
||||||
)
|
)
|
||||||
|
@ -182,7 +204,8 @@ class SelfAttention(torch.nn.Module):
|
||||||
-1,
|
-1,
|
||||||
-1,
|
-1,
|
||||||
-1,
|
-1,
|
||||||
self.num_attention_heads_per_partition // self.num_multi_query_groups_per_partition,
|
self.num_attention_heads_per_partition
|
||||||
|
// self.num_multi_query_groups_per_partition,
|
||||||
-1,
|
-1,
|
||||||
)
|
)
|
||||||
key_layer = key_layer.contiguous().view(
|
key_layer = key_layer.contiguous().view(
|
||||||
|
@ -197,7 +220,8 @@ class SelfAttention(torch.nn.Module):
|
||||||
-1,
|
-1,
|
||||||
-1,
|
-1,
|
||||||
-1,
|
-1,
|
||||||
self.num_attention_heads_per_partition // self.num_multi_query_groups_per_partition,
|
self.num_attention_heads_per_partition
|
||||||
|
// self.num_multi_query_groups_per_partition,
|
||||||
-1,
|
-1,
|
||||||
)
|
)
|
||||||
value_layer = value_layer.contiguous().view(
|
value_layer = value_layer.contiguous().view(
|
||||||
|
@ -224,9 +248,11 @@ class MLP(torch.nn.Module):
|
||||||
device=device,
|
device=device,
|
||||||
dtype=config.torch_dtype,
|
dtype=config.torch_dtype,
|
||||||
)
|
)
|
||||||
|
|
||||||
def swiglu(x):
|
def swiglu(x):
|
||||||
x = torch.chunk(x, 2, dim=-1)
|
x = torch.chunk(x, 2, dim=-1)
|
||||||
return F.silu(x[0]) * x[1]
|
return F.silu(x[0]) * x[1]
|
||||||
|
|
||||||
self.activation_func = swiglu
|
self.activation_func = swiglu
|
||||||
self.dense_4h_to_h = nn.Linear(
|
self.dense_4h_to_h = nn.Linear(
|
||||||
config.ffn_hidden_size,
|
config.ffn_hidden_size,
|
||||||
|
@ -254,7 +280,9 @@ class GLMBlock(torch.nn.Module):
|
||||||
super(GLMBlock, self).__init__()
|
super(GLMBlock, self).__init__()
|
||||||
self.layer_number = layer_number
|
self.layer_number = layer_number
|
||||||
|
|
||||||
self.apply_residual_connection_post_layernorm = config.apply_residual_connection_post_layernorm
|
self.apply_residual_connection_post_layernorm = (
|
||||||
|
config.apply_residual_connection_post_layernorm
|
||||||
|
)
|
||||||
|
|
||||||
self.fp32_residual_connection = config.fp32_residual_connection
|
self.fp32_residual_connection = config.fp32_residual_connection
|
||||||
|
|
||||||
|
@ -286,7 +314,9 @@ class GLMBlock(torch.nn.Module):
|
||||||
attention_output = self.self_attention(layernorm_output, rotary_pos_emb)
|
attention_output = self.self_attention(layernorm_output, rotary_pos_emb)
|
||||||
residual = hidden_states
|
residual = hidden_states
|
||||||
|
|
||||||
layernorm_input = torch.nn.functional.dropout(attention_output, p=self.hidden_dropout, training=self.training)
|
layernorm_input = torch.nn.functional.dropout(
|
||||||
|
attention_output, p=self.hidden_dropout, training=self.training
|
||||||
|
)
|
||||||
layernorm_input = residual + layernorm_input
|
layernorm_input = residual + layernorm_input
|
||||||
|
|
||||||
# Layer norm post the self attention.
|
# Layer norm post the self attention.
|
||||||
|
@ -297,7 +327,9 @@ class GLMBlock(torch.nn.Module):
|
||||||
|
|
||||||
residual = layernorm_input
|
residual = layernorm_input
|
||||||
|
|
||||||
output = torch.nn.functional.dropout(mlp_output, p=self.hidden_dropout, training=self.training)
|
output = torch.nn.functional.dropout(
|
||||||
|
mlp_output, p=self.hidden_dropout, training=self.training
|
||||||
|
)
|
||||||
output = residual + output
|
output = residual + output
|
||||||
return output
|
return output
|
||||||
|
|
||||||
|
@ -365,7 +397,9 @@ class ChatGLMModel(nn.Module):
|
||||||
# Rotary positional embeddings
|
# Rotary positional embeddings
|
||||||
self.seq_length = config.seq_length
|
self.seq_length = config.seq_length
|
||||||
rotary_dim = (
|
rotary_dim = (
|
||||||
config.hidden_size // config.num_attention_heads if config.kv_channels is None else config.kv_channels
|
config.hidden_size // config.num_attention_heads
|
||||||
|
if config.kv_channels is None
|
||||||
|
else config.kv_channels
|
||||||
)
|
)
|
||||||
|
|
||||||
self.rotary_pos_emb = RotaryEmbedding(
|
self.rotary_pos_emb = RotaryEmbedding(
|
||||||
|
@ -392,7 +426,9 @@ class ChatGLMModel(nn.Module):
|
||||||
tokenizer=None,
|
tokenizer=None,
|
||||||
):
|
):
|
||||||
output_hidden_states = (
|
output_hidden_states = (
|
||||||
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
|
output_hidden_states
|
||||||
|
if output_hidden_states is not None
|
||||||
|
else self.config.output_hidden_states
|
||||||
)
|
)
|
||||||
inputs_embeds = self.embedding(input_ids)
|
inputs_embeds = self.embedding(input_ids)
|
||||||
|
|
||||||
|
@ -410,7 +446,7 @@ class ChatGLMModel(nn.Module):
|
||||||
probs = nn.functional.softmax(next_token_logits, dim=-1)
|
probs = nn.functional.softmax(next_token_logits, dim=-1)
|
||||||
next_tokens = torch.multinomial(probs, num_samples=1).squeeze(1)
|
next_tokens = torch.multinomial(probs, num_samples=1).squeeze(1)
|
||||||
|
|
||||||
return next_tokens
|
return probs, next_tokens
|
||||||
|
|
||||||
|
|
||||||
class ChatGLMForConditionalGeneration(nn.Module):
|
class ChatGLMForConditionalGeneration(nn.Module):
|
||||||
|
@ -427,21 +463,26 @@ class ChatGLMForConditionalGeneration(nn.Module):
|
||||||
self.warnings_issued = {}
|
self.warnings_issued = {}
|
||||||
self.generation_config = GenerationConfig.from_model_config(config)
|
self.generation_config = GenerationConfig.from_model_config(config)
|
||||||
|
|
||||||
def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.PathLike]]):
|
def from_pretrained(
|
||||||
|
cls, pretrained_model_name_or_path: Optional[Union[str, os.PathLike]]
|
||||||
|
):
|
||||||
load_in_8bit = False
|
load_in_8bit = False
|
||||||
load_in_4bit = False
|
load_in_4bit = False
|
||||||
|
|
||||||
pretrained_model_name_or_path = str(pretrained_model_name_or_path)
|
pretrained_model_name_or_path = str(pretrained_model_name_or_path)
|
||||||
resolved_archive_file = os.path.join(pretrained_model_name_or_path, "pytorch_model.bin.index.json")
|
resolved_archive_file = os.path.join(
|
||||||
|
pretrained_model_name_or_path, "pytorch_model.bin.index.json"
|
||||||
|
)
|
||||||
print(f"loading weights file {resolved_archive_file}")
|
print(f"loading weights file {resolved_archive_file}")
|
||||||
with open(resolved_archive_file, "r") as f:
|
with open(resolved_archive_file, "r") as f:
|
||||||
index = json.loads(f.read())
|
index = json.loads(f.read())
|
||||||
shard_filenames = sorted(set(index["weight_map"].values()))
|
shard_filenames = sorted(set(index["weight_map"].values()))
|
||||||
resolved_archive_file = [os.path.join(pretrained_model_name_or_path, f) for f in shard_filenames]
|
resolved_archive_file = [
|
||||||
|
os.path.join(pretrained_model_name_or_path, f) for f in shard_filenames
|
||||||
|
]
|
||||||
model = cls._load_pretrained_model(resolved_archive_file)
|
model = cls._load_pretrained_model(resolved_archive_file)
|
||||||
model.is_loaded_in_4bit = load_in_4bit
|
model.is_loaded_in_4bit = load_in_4bit
|
||||||
model.is_loaded_in_8bit = load_in_8bit
|
model.is_loaded_in_8bit = load_in_8bit
|
||||||
model.eval() # Set model in evaluation mode to deactivate DropOut modules by default
|
|
||||||
return model
|
return model
|
||||||
|
|
||||||
def _load_state_dict_into_model(self, model_to_load, state_dict, start_prefix):
|
def _load_state_dict_into_model(self, model_to_load, state_dict, start_prefix):
|
||||||
|
@ -470,15 +511,21 @@ class ChatGLMForConditionalGeneration(nn.Module):
|
||||||
model_to_load = cls
|
model_to_load = cls
|
||||||
error_msgs = []
|
error_msgs = []
|
||||||
if len(resolved_archive_file) > 1:
|
if len(resolved_archive_file) > 1:
|
||||||
resolved_archive_file = tqdm_lib.tqdm(resolved_archive_file, desc="Loading checkpoint shards")
|
resolved_archive_file = tqdm_lib.tqdm(
|
||||||
|
resolved_archive_file, desc="Loading checkpoint shards"
|
||||||
|
)
|
||||||
for shard_file in resolved_archive_file:
|
for shard_file in resolved_archive_file:
|
||||||
state_dict = torch.load(shard_file, map_location="cpu")
|
state_dict = torch.load(shard_file, map_location="cpu")
|
||||||
|
|
||||||
error_msgs += cls._load_state_dict_into_model(model_to_load, state_dict, start_prefix)
|
error_msgs += cls._load_state_dict_into_model(
|
||||||
|
model_to_load, state_dict, start_prefix
|
||||||
|
)
|
||||||
del state_dict # force memory release
|
del state_dict # force memory release
|
||||||
gc.collect()
|
gc.collect()
|
||||||
|
|
||||||
print(f"All model checkpoint weights were used when initializing {cls.__class__.__name__}.\n")
|
print(
|
||||||
|
f"All model checkpoint weights were used when initializing {cls.__class__.__name__}.\n"
|
||||||
|
)
|
||||||
return cls
|
return cls
|
||||||
|
|
||||||
@torch.inference_mode()
|
@torch.inference_mode()
|
||||||
|
@ -496,7 +543,9 @@ class ChatGLMForConditionalGeneration(nn.Module):
|
||||||
|
|
||||||
generation_config = copy.deepcopy(self.generation_config)
|
generation_config = copy.deepcopy(self.generation_config)
|
||||||
inputs_tensor = inputs["input_ids"]
|
inputs_tensor = inputs["input_ids"]
|
||||||
input_ids = inputs_tensor.repeat_interleave(generation_config.num_return_sequences, dim=0)
|
input_ids = inputs_tensor.repeat_interleave(
|
||||||
|
generation_config.num_return_sequences, dim=0
|
||||||
|
)
|
||||||
|
|
||||||
outputs = self.sample(
|
outputs = self.sample(
|
||||||
input_ids,
|
input_ids,
|
||||||
|
@ -523,17 +572,21 @@ class ChatGLMForConditionalGeneration(nn.Module):
|
||||||
eos_token_id = [eos_token_id]
|
eos_token_id = [eos_token_id]
|
||||||
eos_token_id_tensor = torch.tensor(eos_token_id).to(input_ids.device)
|
eos_token_id_tensor = torch.tensor(eos_token_id).to(input_ids.device)
|
||||||
|
|
||||||
isFinished = torch.zeros(input_ids.shape[0], dtype=torch.long, device=input_ids.device)
|
isFinished = torch.zeros(
|
||||||
|
input_ids.shape[0], dtype=torch.long, device=input_ids.device
|
||||||
|
)
|
||||||
# token_count = 0
|
# token_count = 0
|
||||||
while True:
|
while True:
|
||||||
input_ids_in = input_ids
|
input_ids_in = input_ids
|
||||||
batch_size, seq_length = input_ids_in.shape
|
batch_size, seq_length = input_ids_in.shape
|
||||||
position_ids_in = (
|
position_ids_in = (
|
||||||
torch.arange(seq_length, dtype=torch.long, device=input_ids.device).unsqueeze(0).repeat(batch_size, 1)
|
torch.arange(seq_length, dtype=torch.long, device=input_ids.device)
|
||||||
|
.unsqueeze(0)
|
||||||
|
.repeat(batch_size, 1)
|
||||||
)
|
)
|
||||||
model_inputs = {"input_ids": input_ids_in, "position_ids": position_ids_in}
|
model_inputs = {"input_ids": input_ids_in, "position_ids": position_ids_in}
|
||||||
|
|
||||||
next_tokens = self.transformer(
|
probs, next_tokens = self.transformer(
|
||||||
**model_inputs,
|
**model_inputs,
|
||||||
output_hidden_states=output_hidden_states,
|
output_hidden_states=output_hidden_states,
|
||||||
tokenizer=tokenizer,
|
tokenizer=tokenizer,
|
||||||
|
@ -549,3 +602,41 @@ class ChatGLMForConditionalGeneration(nn.Module):
|
||||||
|
|
||||||
input_ids = torch.cat([input_ids, next_tokens[:, None]], dim=-1)
|
input_ids = torch.cat([input_ids, next_tokens[:, None]], dim=-1)
|
||||||
return input_ids
|
return input_ids
|
||||||
|
|
||||||
|
def backward(
|
||||||
|
self,
|
||||||
|
tokenizer,
|
||||||
|
query: str,
|
||||||
|
):
|
||||||
|
inputs = tokenizer.build_chat_input(query, history=[], role="user")
|
||||||
|
inputs = inputs.to(next(self.parameters()).device)
|
||||||
|
|
||||||
|
generation_config = copy.deepcopy(self.generation_config)
|
||||||
|
inputs_tensor = inputs["input_ids"]
|
||||||
|
input_ids = inputs_tensor.repeat_interleave(
|
||||||
|
generation_config.num_return_sequences, dim=0
|
||||||
|
)
|
||||||
|
|
||||||
|
input_ids_in = input_ids
|
||||||
|
batch_size, seq_length = input_ids_in.shape
|
||||||
|
position_ids_in = (
|
||||||
|
torch.arange(seq_length, dtype=torch.long, device=input_ids.device)
|
||||||
|
.unsqueeze(0)
|
||||||
|
.repeat(batch_size, 1)
|
||||||
|
)
|
||||||
|
model_inputs = {"input_ids": input_ids_in, "position_ids": position_ids_in}
|
||||||
|
|
||||||
|
probs, next_tokens = self.transformer(
|
||||||
|
**model_inputs,
|
||||||
|
output_hidden_states=None,
|
||||||
|
tokenizer=tokenizer,
|
||||||
|
)
|
||||||
|
|
||||||
|
next_tokens = torch.multinomial(probs, num_samples=1).squeeze(1)
|
||||||
|
# probs_target = probs
|
||||||
|
# probs_target[0, next_tokens] = probs_target[0, next_tokens] * 1.1
|
||||||
|
|
||||||
|
loss = probs[0, next_tokens]
|
||||||
|
loss.backward()
|
||||||
|
|
||||||
|
return loss
|
||||||
|
|
|
@ -1,13 +1,18 @@
|
||||||
|
import sys
|
||||||
|
|
||||||
|
sys.path.append("..")
|
||||||
|
|
||||||
import json
|
import json
|
||||||
import torch
|
import torch
|
||||||
from tools import show
|
from tools import show
|
||||||
|
|
||||||
|
|
||||||
from chatglm import ChatGLMTokenizer
|
from chatglm import ChatGLMTokenizer
|
||||||
|
from modelscope import snapshot_download
|
||||||
|
|
||||||
pretrained_model_name_or_path = "../ZhipuAI/chatglm3-6b"
|
pretrained_model_name_or_path = snapshot_download("ZhipuAI/chatglm3-6b")
|
||||||
|
|
||||||
|
tokenizer_config_file = "./tokenizer_config.json"
|
||||||
tokenizer_config_file = "./chatglm/tokenizer_config.json"
|
|
||||||
if tokenizer_config_file is not None:
|
if tokenizer_config_file is not None:
|
||||||
with open(tokenizer_config_file, encoding="utf-8") as tokenizer_config_handle:
|
with open(tokenizer_config_file, encoding="utf-8") as tokenizer_config_handle:
|
||||||
init_kwargs = json.load(tokenizer_config_handle)
|
init_kwargs = json.load(tokenizer_config_handle)
|
||||||
|
@ -15,7 +20,7 @@ if tokenizer_config_file is not None:
|
||||||
init_kwargs.pop("tokenizer_file", None)
|
init_kwargs.pop("tokenizer_file", None)
|
||||||
saved_init_inputs = init_kwargs.pop("init_inputs", ())
|
saved_init_inputs = init_kwargs.pop("init_inputs", ())
|
||||||
init_inputs = saved_init_inputs
|
init_inputs = saved_init_inputs
|
||||||
init_kwargs["vocab_file"] = "./chatglm/tokenizer.model"
|
init_kwargs["vocab_file"] = "./tokenizer.model"
|
||||||
init_kwargs["added_tokens_file"] = None
|
init_kwargs["added_tokens_file"] = None
|
||||||
init_kwargs["special_tokens_map_file"] = None
|
init_kwargs["special_tokens_map_file"] = None
|
||||||
init_kwargs["tokenizer_file"] = None
|
init_kwargs["tokenizer_file"] = None
|
||||||
|
@ -30,7 +35,7 @@ b = tokenizer.decode([236, 173, 140])
|
||||||
token = []
|
token = []
|
||||||
for i in range(64798):
|
for i in range(64798):
|
||||||
token.append(str(i) + " : " + tokenizer.decode(i))
|
token.append(str(i) + " : " + tokenizer.decode(i))
|
||||||
show.DumpListToFile(token, "generated/token.log")
|
show.DumpListToFile(token, "../generated/token.log")
|
||||||
|
|
||||||
# print("=======================")
|
# print("=======================")
|
||||||
# for i in range(hidden_states_en.shape[0]):
|
# for i in range(hidden_states_en.shape[0]):
|
|
@ -0,0 +1,25 @@
|
||||||
|
from modelscope import snapshot_download
|
||||||
|
from transformers import AutoModelForCausalLM, AutoTokenizer
|
||||||
|
from transformers.generation import GenerationConfig
|
||||||
|
|
||||||
|
model_dir = snapshot_download("qwen/Qwen-1_8B-Chat")
|
||||||
|
|
||||||
|
tokenizer = AutoTokenizer.from_pretrained(model_dir, trust_remote_code=True)
|
||||||
|
model = AutoModelForCausalLM.from_pretrained(
|
||||||
|
model_dir, device_map="auto", trust_remote_code=True
|
||||||
|
).eval()
|
||||||
|
|
||||||
|
|
||||||
|
# 可指定不同的生成长度、top_p等相关超参
|
||||||
|
model.generation_config = GenerationConfig.from_pretrained(
|
||||||
|
model_dir, trust_remote_code=True
|
||||||
|
)
|
||||||
|
|
||||||
|
# 第一轮对话
|
||||||
|
response, history = model.chat(tokenizer, "你好", history=None)
|
||||||
|
print(response)
|
||||||
|
# 你好!很高兴为你提供帮助。
|
||||||
|
|
||||||
|
# 第二轮对话
|
||||||
|
response, history = model.chat(tokenizer, "给我讲一个年轻人奋斗创业最终取得成功的故事。", history=history)
|
||||||
|
print(response)
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,45 @@
|
||||||
|
import json
|
||||||
|
import torch
|
||||||
|
|
||||||
|
from chatglm import ChatGLMForConditionalGeneration
|
||||||
|
from chatglm import ChatGLMTokenizer
|
||||||
|
|
||||||
|
from tools import show
|
||||||
|
|
||||||
|
from transformers import AutoConfig
|
||||||
|
|
||||||
|
seed = 4321
|
||||||
|
torch.manual_seed(seed)
|
||||||
|
torch.cuda.manual_seed_all(seed)
|
||||||
|
|
||||||
|
|
||||||
|
pretrained_model_name_or_path = "../ZhipuAI/chatglm3-6b"
|
||||||
|
config, kwargs = AutoConfig.from_pretrained(
|
||||||
|
pretrained_model_name_or_path,
|
||||||
|
return_unused_kwargs=True,
|
||||||
|
trust_remote_code=True,
|
||||||
|
code_revision=None,
|
||||||
|
_commit_hash=None,
|
||||||
|
)
|
||||||
|
glm = ChatGLMForConditionalGeneration(config)
|
||||||
|
|
||||||
|
|
||||||
|
tokenizer_config_file = "./chatglm/tokenizer_config.json"
|
||||||
|
if tokenizer_config_file is not None:
|
||||||
|
with open(tokenizer_config_file, encoding="utf-8") as tokenizer_config_handle:
|
||||||
|
init_kwargs = json.load(tokenizer_config_handle)
|
||||||
|
init_kwargs.pop("tokenizer_class", None)
|
||||||
|
init_kwargs.pop("tokenizer_file", None)
|
||||||
|
saved_init_inputs = init_kwargs.pop("init_inputs", ())
|
||||||
|
init_inputs = saved_init_inputs
|
||||||
|
init_kwargs["vocab_file"] = "./chatglm/tokenizer.model"
|
||||||
|
init_kwargs["added_tokens_file"] = None
|
||||||
|
init_kwargs["special_tokens_map_file"] = None
|
||||||
|
init_kwargs["tokenizer_file"] = None
|
||||||
|
init_kwargs["name_or_path"] = pretrained_model_name_or_path
|
||||||
|
tokenizer = ChatGLMTokenizer(*init_inputs, **init_kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
glm = glm.from_pretrained(pretrained_model_name_or_path).half().cuda()
|
||||||
|
query = "你好"
|
||||||
|
response = glm.backward(tokenizer, query)
|
Loading…
Reference in New Issue