2024-02-04 23:48:24 +08:00
|
|
|
# Copyright (c) Alibaba Cloud.
|
|
|
|
#
|
|
|
|
# This source code is licensed under the license found in the
|
|
|
|
# LICENSE file in the root directory of this source tree.
|
|
|
|
|
|
|
|
|
2024-03-14 11:40:26 +08:00
|
|
|
class ModelConfig:
|
2024-02-04 23:48:24 +08:00
|
|
|
def __init__(self):
|
2024-02-06 14:08:45 +08:00
|
|
|
self.vocab_size = 4096
|
2024-03-14 11:40:26 +08:00
|
|
|
self.hidden_size = 1024
|
|
|
|
self.num_hidden_layers = 24
|
|
|
|
self.num_attention_heads = 16
|
2024-02-04 23:48:24 +08:00
|
|
|
self.emb_dropout_prob = 0.0
|
|
|
|
self.attn_dropout_prob = 0.0
|
|
|
|
self.layer_norm_epsilon = 1e-6
|
|
|
|
self.initializer_range = 0.02
|
|
|
|
self.max_position_embeddings = 8192
|
|
|
|
self.scale_attn_weights = True
|
|
|
|
self.use_cache = True
|
|
|
|
self.bf16 = False
|
|
|
|
self.fp16 = False
|
|
|
|
self.fp32 = False
|
|
|
|
self.rotary_pct = 1.0
|
|
|
|
self.rotary_emb_base = 10000
|
|
|
|
self.use_dynamic_ntk = True
|
|
|
|
self.use_logn_attn = True
|
|
|
|
self.use_flash_attn = "auto"
|
2024-02-06 14:08:45 +08:00
|
|
|
self.intermediate_size = 5504 # 5504 11008
|
2024-02-04 23:48:24 +08:00
|
|
|
self.no_bias = True
|
|
|
|
self.tie_word_embeddings = False
|
|
|
|
self.use_cache_quantization = False
|
|
|
|
self.use_cache_kernel = False
|
|
|
|
self.softmax_in_fp32 = False
|
|
|
|
|
|
|
|
self.chat_format = "chatml"
|
|
|
|
self.max_window_size = 6144
|
|
|
|
self.max_new_tokens = 512
|
|
|
|
self.do_sample = True
|
|
|
|
self.top_k = 0
|
|
|
|
self.top_p = 0.8
|
|
|
|
self.repetition_penalty = 1.1
|
|
|
|
self.model_max_length = 8192
|