Update train code.
This commit is contained in:
parent
cda7f04e49
commit
db97131caf
|
@ -17,7 +17,7 @@ if __name__ == "__main__":
|
|||
|
||||
conf.name = "bigger" # current train process name
|
||||
conf.pretrain_model_name = None # "qwen/Qwen-1_8B-Chat"
|
||||
conf.learning_rate = 0.0001
|
||||
conf.learning_rate = 0.001
|
||||
conf.use_tril_attention_mask = None
|
||||
conf.precision = "16-mixed" # "precision:bf16-mixed,16-mixed,32-true"
|
||||
conf.train_batch_size = 16
|
||||
|
@ -37,7 +37,8 @@ if __name__ == "__main__":
|
|||
conf.dataset.meaning.val_mask_idx = [0, 0, -1]
|
||||
|
||||
config.vocab_size = 32
|
||||
config.hidden_size = 128 # 128 1024 2048 32
|
||||
config.hidden_size = 32 # 128 1024 2048 32
|
||||
config.intermediate_size = config.hidden_size * 4
|
||||
config.num_hidden_layers = 3 # 6 12 24 3
|
||||
config.num_attention_heads = 8 # 8 8 16
|
||||
|
||||
|
|
Loading…
Reference in New Issue