43 lines
		
	
	
		
			1.3 KiB
		
	
	
	
		
			Python
		
	
	
	
			
		
		
	
	
			43 lines
		
	
	
		
			1.3 KiB
		
	
	
	
		
			Python
		
	
	
	
| # Copyright (c) Alibaba Cloud.
 | |
| #
 | |
| # This source code is licensed under the license found in the
 | |
| # LICENSE file in the root directory of this source tree.
 | |
| 
 | |
| 
 | |
| class QWenConfig:
 | |
|     def __init__(self):
 | |
|         self.vocab_size = 4096
 | |
|         self.hidden_size = 128  # 1024 2048
 | |
|         self.num_hidden_layers = 6  # 12 24
 | |
|         self.num_attention_heads = 8  # 8 16
 | |
|         self.emb_dropout_prob = 0.0
 | |
|         self.attn_dropout_prob = 0.0
 | |
|         self.layer_norm_epsilon = 1e-6
 | |
|         self.initializer_range = 0.02
 | |
|         self.max_position_embeddings = 8192
 | |
|         self.scale_attn_weights = True
 | |
|         self.use_cache = True
 | |
|         self.bf16 = False
 | |
|         self.fp16 = False
 | |
|         self.fp32 = False
 | |
|         self.rotary_pct = 1.0
 | |
|         self.rotary_emb_base = 10000
 | |
|         self.use_dynamic_ntk = True
 | |
|         self.use_logn_attn = True
 | |
|         self.use_flash_attn = "auto"
 | |
|         self.intermediate_size = 5504  # 5504 11008
 | |
|         self.no_bias = True
 | |
|         self.tie_word_embeddings = False
 | |
|         self.use_cache_quantization = False
 | |
|         self.use_cache_kernel = False
 | |
|         self.softmax_in_fp32 = False
 | |
| 
 | |
|         self.chat_format = "chatml"
 | |
|         self.max_window_size = 6144
 | |
|         self.max_new_tokens = 512
 | |
|         self.do_sample = True
 | |
|         self.top_k = 0
 | |
|         self.top_p = 0.8
 | |
|         self.repetition_penalty = 1.1
 | |
|         self.model_max_length = 8192
 |