Update train code.
This commit is contained in:
parent
cda7f04e49
commit
db97131caf
|
@ -17,7 +17,7 @@ if __name__ == "__main__":
|
||||||
|
|
||||||
conf.name = "bigger" # current train process name
|
conf.name = "bigger" # current train process name
|
||||||
conf.pretrain_model_name = None # "qwen/Qwen-1_8B-Chat"
|
conf.pretrain_model_name = None # "qwen/Qwen-1_8B-Chat"
|
||||||
conf.learning_rate = 0.0001
|
conf.learning_rate = 0.001
|
||||||
conf.use_tril_attention_mask = None
|
conf.use_tril_attention_mask = None
|
||||||
conf.precision = "16-mixed" # "precision:bf16-mixed,16-mixed,32-true"
|
conf.precision = "16-mixed" # "precision:bf16-mixed,16-mixed,32-true"
|
||||||
conf.train_batch_size = 16
|
conf.train_batch_size = 16
|
||||||
|
@ -37,7 +37,8 @@ if __name__ == "__main__":
|
||||||
conf.dataset.meaning.val_mask_idx = [0, 0, -1]
|
conf.dataset.meaning.val_mask_idx = [0, 0, -1]
|
||||||
|
|
||||||
config.vocab_size = 32
|
config.vocab_size = 32
|
||||||
config.hidden_size = 128 # 128 1024 2048 32
|
config.hidden_size = 32 # 128 1024 2048 32
|
||||||
|
config.intermediate_size = config.hidden_size * 4
|
||||||
config.num_hidden_layers = 3 # 6 12 24 3
|
config.num_hidden_layers = 3 # 6 12 24 3
|
||||||
config.num_attention_heads = 8 # 8 8 16
|
config.num_attention_heads = 8 # 8 8 16
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue