Refine llamafactory train.

2025-06-08 16:01:19 +08:00 · 2025-06-08 16:01:19 +08:00 · 710c901f5e
parent 3ceb02a6b0
commit 710c901f5e
2 changed files with 9 additions and 1 deletions
--- a/finetune/llamafactory/speed_test
+++ b/finetune/llamafactory/speed_test
@ -0,0 +1,8 @@
+
+
+4070tisuper batch=1 bf16=false  1.79s/it
+4070tisuper batch=1 bf16=true  1.8s/it
+4070tisuper batch=4 bf16=true  4.2s/it
+
+v100_32G_PCIE batch=1 bf16=false  5.5s/it
+v100_32G_PCIE batch=1 bf16=true  8.7s/it
--- a/finetune/llamafactory/train_qwen3_lora_sft.yaml
+++ b/finetune/llamafactory/train_qwen3_lora_sft.yaml
@ -28,7 +28,7 @@ save_only_model: false
 report_to: tensorboard  # choices: [none, wandb, tensorboard, swanlab, mlflow]

 ### train
-per_device_train_batch_size: 4
+per_device_train_batch_size: 1
 gradient_accumulation_steps: 8
 learning_rate: 1.0e-4
 num_train_epochs: 5.0