Update train code.

This commit is contained in:
Colin 2025-04-09 19:12:21 +08:00
parent cda7f04e49
commit db97131caf
1 changed files with 3 additions and 2 deletions

View File

@ -17,7 +17,7 @@ if __name__ == "__main__":
conf.name = "bigger" # current train process name
conf.pretrain_model_name = None # "qwen/Qwen-1_8B-Chat"
conf.learning_rate = 0.0001
conf.learning_rate = 0.001
conf.use_tril_attention_mask = None
conf.precision = "16-mixed" # "precision:bf16-mixed,16-mixed,32-true"
conf.train_batch_size = 16
@ -37,7 +37,8 @@ if __name__ == "__main__":
conf.dataset.meaning.val_mask_idx = [0, 0, -1]
config.vocab_size = 32
config.hidden_size = 128 # 128 1024 2048 32
config.hidden_size = 32 # 128 1024 2048 32
config.intermediate_size = config.hidden_size * 4
config.num_hidden_layers = 3 # 6 12 24 3
config.num_attention_heads = 8 # 8 8 16