38 lines
870 B
YAML
38 lines
870 B
YAML
config: !!python/object:wit.configuration.ModelConfig
|
|
attn_dropout_prob: 0.0
|
|
bf16: false
|
|
chat_format: chatml
|
|
do_sample: true
|
|
emb_dropout_prob: 0.0
|
|
fp16: false
|
|
fp32: false
|
|
hidden_size: 128
|
|
initializer_range: 0.02
|
|
intermediate_size: 5504
|
|
layer_norm_epsilon: 1.0e-06
|
|
max_new_tokens: 512
|
|
max_position_embeddings: 8192
|
|
max_window_size: 6144
|
|
model_max_length: 8192
|
|
no_bias: true
|
|
num_attention_heads: 8
|
|
num_hidden_layers: 6
|
|
repetition_penalty: 1.1
|
|
rotary_emb_base: 10000
|
|
rotary_pct: 1.0
|
|
scale_attn_weights: true
|
|
softmax_in_fp32: false
|
|
tie_word_embeddings: false
|
|
top_k: 0
|
|
top_p: 0.8
|
|
use_cache: true
|
|
use_cache_kernel: false
|
|
use_cache_quantization: false
|
|
use_dynamic_ntk: true
|
|
use_flash_attn: auto
|
|
use_logn_attn: true
|
|
vocab_size: 4096
|
|
learning_rate: 0.0001
|
|
pretrained_model_dir: null
|
|
use_tril_attention_mask: null
|