config: !!python/object:wit.configuration.ModelConfig attn_dropout_prob: 0.0 bf16: false chat_format: chatml do_sample: true emb_dropout_prob: 0.0 fp16: false fp32: false hidden_size: 128 initializer_range: 0.02 intermediate_size: 5504 layer_norm_epsilon: 1.0e-06 max_new_tokens: 512 max_position_embeddings: 8192 max_window_size: 6144 model_max_length: 8192 no_bias: true num_attention_heads: 8 num_hidden_layers: 6 repetition_penalty: 1.1 rotary_emb_base: 10000 rotary_pct: 1.0 scale_attn_weights: true softmax_in_fp32: false tie_word_embeddings: false top_k: 0 top_p: 0.8 use_cache: true use_cache_kernel: false use_cache_quantization: false use_dynamic_ntk: true use_flash_attn: auto use_logn_attn: true vocab_size: 4096 learning_rate: 0.0001 pretrained_model_dir: null use_tril_attention_mask: null