Witllm/wit/lightning_logs/version_0/hparams.yaml

38 lines
870 B
YAML

config: !!python/object:wit.configuration.ModelConfig
attn_dropout_prob: 0.0
bf16: false
chat_format: chatml
do_sample: true
emb_dropout_prob: 0.0
fp16: false
fp32: false
hidden_size: 128
initializer_range: 0.02
intermediate_size: 5504
layer_norm_epsilon: 1.0e-06
max_new_tokens: 512
max_position_embeddings: 8192
max_window_size: 6144
model_max_length: 8192
no_bias: true
num_attention_heads: 8
num_hidden_layers: 6
repetition_penalty: 1.1
rotary_emb_base: 10000
rotary_pct: 1.0
scale_attn_weights: true
softmax_in_fp32: false
tie_word_embeddings: false
top_k: 0
top_p: 0.8
use_cache: true
use_cache_kernel: false
use_cache_quantization: false
use_dynamic_ntk: true
use_flash_attn: auto
use_logn_attn: true
vocab_size: 4096
learning_rate: 0.0001
pretrained_model_dir: null
use_tril_attention_mask: null