# Copyright (c) Alibaba Cloud. # # This source code is licensed under the license found in the # LICENSE file in the root directory of this source tree. class ModelConfig: def __init__(self): self.vocab_size = 4096 self.hidden_size = 1024 self.num_hidden_layers = 24 self.num_attention_heads = 16 self.emb_dropout_prob = 0.0 self.attn_dropout_prob = 0.0 self.layer_norm_epsilon = 1e-6 self.initializer_range = 0.02 self.max_position_embeddings = 8192 self.scale_attn_weights = True self.use_cache = True self.bf16 = False self.fp16 = False self.fp32 = False self.rotary_pct = 1.0 self.rotary_emb_base = 10000 self.use_dynamic_ntk = True self.use_logn_attn = True self.use_flash_attn = "auto" self.intermediate_size = 5504 # 5504 11008 self.no_bias = True self.tie_word_embeddings = False self.use_cache_quantization = False self.use_cache_kernel = False self.softmax_in_fp32 = False self.chat_format = "chatml" self.max_window_size = 6144 self.max_new_tokens = 512 self.do_sample = True self.top_k = 0 self.top_p = 0.8 self.repetition_penalty = 1.1 self.model_max_length = 8192