Update train code.
This commit is contained in:
		
							parent
							
								
									cda7f04e49
								
							
						
					
					
						commit
						db97131caf
					
				| 
						 | 
					@ -17,7 +17,7 @@ if __name__ == "__main__":
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    conf.name = "bigger"  # current train process name
 | 
					    conf.name = "bigger"  # current train process name
 | 
				
			||||||
    conf.pretrain_model_name = None  # "qwen/Qwen-1_8B-Chat"
 | 
					    conf.pretrain_model_name = None  # "qwen/Qwen-1_8B-Chat"
 | 
				
			||||||
    conf.learning_rate = 0.0001
 | 
					    conf.learning_rate = 0.001
 | 
				
			||||||
    conf.use_tril_attention_mask = None
 | 
					    conf.use_tril_attention_mask = None
 | 
				
			||||||
    conf.precision = "16-mixed"  # "precision:bf16-mixed,16-mixed,32-true"
 | 
					    conf.precision = "16-mixed"  # "precision:bf16-mixed,16-mixed,32-true"
 | 
				
			||||||
    conf.train_batch_size = 16
 | 
					    conf.train_batch_size = 16
 | 
				
			||||||
| 
						 | 
					@ -37,7 +37,8 @@ if __name__ == "__main__":
 | 
				
			||||||
    conf.dataset.meaning.val_mask_idx = [0, 0, -1]
 | 
					    conf.dataset.meaning.val_mask_idx = [0, 0, -1]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    config.vocab_size = 32
 | 
					    config.vocab_size = 32
 | 
				
			||||||
    config.hidden_size = 128  # 128 1024 2048  32
 | 
					    config.hidden_size = 32  # 128 1024 2048  32
 | 
				
			||||||
 | 
					    config.intermediate_size = config.hidden_size * 4
 | 
				
			||||||
    config.num_hidden_layers = 3  # 6 12 24  3
 | 
					    config.num_hidden_layers = 3  # 6 12 24  3
 | 
				
			||||||
    config.num_attention_heads = 8  # 8 8 16
 | 
					    config.num_attention_heads = 8  # 8 8 16
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue