|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 4.0, |
|
"global_step": 12272, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.808099739243807e-05, |
|
"loss": 1.1464, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.604383963494133e-05, |
|
"loss": 1.121, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.400668187744459e-05, |
|
"loss": 1.1114, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.196952411994785e-05, |
|
"loss": 1.1053, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.9932366362451115e-05, |
|
"loss": 1.1036, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.7899282920469364e-05, |
|
"loss": 1.1021, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.3860417727967397, |
|
"eval_loss": 1.0996018648147583, |
|
"eval_runtime": 27.3661, |
|
"eval_samples_per_second": 358.655, |
|
"eval_steps_per_second": 11.218, |
|
"step": 3068 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 3.586212516297262e-05, |
|
"loss": 1.0988, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 3.3824967405475883e-05, |
|
"loss": 1.0957, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 3.178780964797914e-05, |
|
"loss": 1.0942, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 2.9754726205997396e-05, |
|
"loss": 1.092, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 2.7717568448500652e-05, |
|
"loss": 1.0908, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 2.5680410691003915e-05, |
|
"loss": 1.0902, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.39470198675496687, |
|
"eval_loss": 1.0913997888565063, |
|
"eval_runtime": 27.3848, |
|
"eval_samples_per_second": 358.41, |
|
"eval_steps_per_second": 11.211, |
|
"step": 6136 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 2.364325293350717e-05, |
|
"loss": 1.088, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 2.1610169491525427e-05, |
|
"loss": 1.0875, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 1.9573011734028683e-05, |
|
"loss": 1.0873, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 1.7535853976531943e-05, |
|
"loss": 1.0853, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 1.5498696219035202e-05, |
|
"loss": 1.0865, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 1.3465612777053455e-05, |
|
"loss": 1.0871, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.3979623025980642, |
|
"eval_loss": 1.0878193378448486, |
|
"eval_runtime": 27.352, |
|
"eval_samples_per_second": 358.84, |
|
"eval_steps_per_second": 11.224, |
|
"step": 9204 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 1.1428455019556714e-05, |
|
"loss": 1.0866, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"learning_rate": 9.391297262059974e-06, |
|
"loss": 1.0847, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"learning_rate": 7.354139504563234e-06, |
|
"loss": 1.0844, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 3.59, |
|
"learning_rate": 5.316981747066493e-06, |
|
"loss": 1.0849, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"learning_rate": 3.283898305084746e-06, |
|
"loss": 1.0832, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"learning_rate": 1.2467405475880053e-06, |
|
"loss": 1.0871, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.4031584309730005, |
|
"eval_loss": 1.0859721899032593, |
|
"eval_runtime": 27.3545, |
|
"eval_samples_per_second": 358.808, |
|
"eval_steps_per_second": 11.223, |
|
"step": 12272 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"step": 12272, |
|
"total_flos": 5.465758560807813e+17, |
|
"train_loss": 1.0949462632001457, |
|
"train_runtime": 12056.6812, |
|
"train_samples_per_second": 130.285, |
|
"train_steps_per_second": 1.018 |
|
} |
|
], |
|
"max_steps": 12272, |
|
"num_train_epochs": 4, |
|
"total_flos": 5.465758560807813e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|