|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"global_step": 1548, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.19, |
|
"eval_f1": 0.849254555494202, |
|
"eval_loss": 0.43653005974375836, |
|
"eval_runtime": 1.8732, |
|
"eval_samples_per_second": 38.438, |
|
"eval_steps_per_second": 38.438, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_f1": 0.8503301540719003, |
|
"eval_loss": 0.41970557127046154, |
|
"eval_runtime": 2.0707, |
|
"eval_samples_per_second": 34.771, |
|
"eval_steps_per_second": 34.771, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_f1": 0.8297065282491516, |
|
"eval_loss": 0.40088379935422785, |
|
"eval_runtime": 1.8754, |
|
"eval_samples_per_second": 38.393, |
|
"eval_steps_per_second": 38.393, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_f1": 0.8422688422688422, |
|
"eval_loss": 0.3884492161937947, |
|
"eval_runtime": 1.9854, |
|
"eval_samples_per_second": 36.264, |
|
"eval_steps_per_second": 36.264, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 3.3850129198966415e-05, |
|
"loss": 0.4311, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"eval_f1": 0.8528028933092224, |
|
"eval_loss": 0.38350051814712116, |
|
"eval_runtime": 1.8888, |
|
"eval_samples_per_second": 38.119, |
|
"eval_steps_per_second": 38.119, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"eval_f1": 0.850596842510589, |
|
"eval_loss": 0.37396426411921446, |
|
"eval_runtime": 1.8758, |
|
"eval_samples_per_second": 38.383, |
|
"eval_steps_per_second": 38.383, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"eval_f1": 0.8452932098765432, |
|
"eval_loss": 0.4050087825284831, |
|
"eval_runtime": 1.8815, |
|
"eval_samples_per_second": 38.268, |
|
"eval_steps_per_second": 38.268, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"eval_f1": 0.8532502919423901, |
|
"eval_loss": 0.3930495809773195, |
|
"eval_runtime": 1.8501, |
|
"eval_samples_per_second": 38.918, |
|
"eval_steps_per_second": 38.918, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"eval_f1": 0.8646658074019518, |
|
"eval_loss": 0.37751149353174285, |
|
"eval_runtime": 1.87, |
|
"eval_samples_per_second": 38.504, |
|
"eval_steps_per_second": 38.504, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 1.7700258397932818e-05, |
|
"loss": 0.3357, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"eval_f1": 0.8435221536735839, |
|
"eval_loss": 0.40316357022531535, |
|
"eval_runtime": 1.9305, |
|
"eval_samples_per_second": 37.296, |
|
"eval_steps_per_second": 37.296, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"eval_f1": 0.8601869158878505, |
|
"eval_loss": 0.37796433318775113, |
|
"eval_runtime": 1.8724, |
|
"eval_samples_per_second": 38.453, |
|
"eval_steps_per_second": 38.453, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"eval_f1": 0.8579914609244479, |
|
"eval_loss": 0.4167007547913436, |
|
"eval_runtime": 1.8767, |
|
"eval_samples_per_second": 38.365, |
|
"eval_steps_per_second": 38.365, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"eval_f1": 0.8557028187418332, |
|
"eval_loss": 0.4009078407659654, |
|
"eval_runtime": 1.8798, |
|
"eval_samples_per_second": 38.302, |
|
"eval_steps_per_second": 38.302, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"eval_f1": 0.853885616568497, |
|
"eval_loss": 0.4169967998459562, |
|
"eval_runtime": 1.8663, |
|
"eval_samples_per_second": 38.579, |
|
"eval_steps_per_second": 38.579, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 1.550387596899225e-06, |
|
"loss": 0.2747, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"eval_f1": 0.8545181852268466, |
|
"eval_loss": 0.41253904368235106, |
|
"eval_runtime": 1.8671, |
|
"eval_samples_per_second": 38.563, |
|
"eval_steps_per_second": 38.563, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 1548, |
|
"total_flos": 1928644274657952.0, |
|
"train_loss": 0.34470706577448884, |
|
"train_runtime": 143.1766, |
|
"train_samples_per_second": 10.812, |
|
"train_steps_per_second": 10.812 |
|
} |
|
], |
|
"max_steps": 1548, |
|
"num_train_epochs": 3, |
|
"total_flos": 1928644274657952.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|