Witllm/wit/lightning_logs/version_0/hparams.yaml

config: !!python/object:wit.configuration.ModelConfig
  attn_dropout_prob: 0.0
  bf16: false
  chat_format: chatml
  do_sample: true
  emb_dropout_prob: 0.0
  fp16: false
  fp32: false
  hidden_size: 128
  initializer_range: 0.02
  intermediate_size: 5504
  layer_norm_epsilon: 1.0e-06
  max_new_tokens: 512
  max_position_embeddings: 8192
  max_window_size: 6144
  model_max_length: 8192
  no_bias: true
  num_attention_heads: 8
  num_hidden_layers: 6
  repetition_penalty: 1.1
  rotary_emb_base: 10000
  rotary_pct: 1.0
  scale_attn_weights: true
  softmax_in_fp32: false
  tie_word_embeddings: false
  top_k: 0
  top_p: 0.8
  use_cache: true
  use_cache_kernel: false
  use_cache_quantization: false
  use_dynamic_ntk: true
  use_flash_attn: auto
  use_logn_attn: true
  vocab_size: 4096
learning_rate: 0.0001
pretrained_model_dir: null
use_tril_attention_mask: null