|
{ |
|
"best_metric": 0.6931639313697815, |
|
"best_model_checkpoint": "checkpoints/electra_26_4_1/checkpoint-13858", |
|
"epoch": 13.0, |
|
"global_step": 13858, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 3.904917901570934e-05, |
|
"loss": 0.7426, |
|
"step": 1066 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_auc_micro": 0.5212763137512771, |
|
"eval_loss": 0.6989310383796692, |
|
"eval_runtime": 23.3554, |
|
"eval_samples_per_second": 128.835, |
|
"eval_steps_per_second": 8.092, |
|
"step": 1066 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 3.5795080764400225e-05, |
|
"loss": 0.6964, |
|
"step": 2132 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_auc_micro": 0.5228480764195904, |
|
"eval_loss": 0.6949135661125183, |
|
"eval_runtime": 23.3685, |
|
"eval_samples_per_second": 128.763, |
|
"eval_steps_per_second": 8.088, |
|
"step": 2132 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 3.254098251309111e-05, |
|
"loss": 0.6944, |
|
"step": 3198 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_auc_micro": 0.5232832697413072, |
|
"eval_loss": 0.6939617395401001, |
|
"eval_runtime": 23.1376, |
|
"eval_samples_per_second": 130.048, |
|
"eval_steps_per_second": 8.169, |
|
"step": 3198 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 2.9286884261782e-05, |
|
"loss": 0.6938, |
|
"step": 4264 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_auc_micro": 0.5234944214364559, |
|
"eval_loss": 0.6935874223709106, |
|
"eval_runtime": 23.1396, |
|
"eval_samples_per_second": 130.037, |
|
"eval_steps_per_second": 8.168, |
|
"step": 4264 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 2.603278601047289e-05, |
|
"loss": 0.6935, |
|
"step": 5330 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_auc_micro": 0.5237105566327973, |
|
"eval_loss": 0.6934056282043457, |
|
"eval_runtime": 23.0585, |
|
"eval_samples_per_second": 130.494, |
|
"eval_steps_per_second": 8.197, |
|
"step": 5330 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 2.2778687759163776e-05, |
|
"loss": 0.6934, |
|
"step": 6396 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_auc_micro": 0.5238991498421133, |
|
"eval_loss": 0.6933064460754395, |
|
"eval_runtime": 23.1864, |
|
"eval_samples_per_second": 129.775, |
|
"eval_steps_per_second": 8.151, |
|
"step": 6396 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 1.952458950785467e-05, |
|
"loss": 0.6933, |
|
"step": 7462 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_auc_micro": 0.5239438807088983, |
|
"eval_loss": 0.6932488679885864, |
|
"eval_runtime": 23.1343, |
|
"eval_samples_per_second": 130.067, |
|
"eval_steps_per_second": 8.17, |
|
"step": 7462 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 1.6270491256545556e-05, |
|
"loss": 0.6932, |
|
"step": 8528 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_auc_micro": 0.523929452444219, |
|
"eval_loss": 0.6932142972946167, |
|
"eval_runtime": 23.2347, |
|
"eval_samples_per_second": 129.505, |
|
"eval_steps_per_second": 8.134, |
|
"step": 8528 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 1.3016393005236445e-05, |
|
"loss": 0.6932, |
|
"step": 9594 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_auc_micro": 0.5239810749738886, |
|
"eval_loss": 0.6931925415992737, |
|
"eval_runtime": 23.1045, |
|
"eval_samples_per_second": 130.234, |
|
"eval_steps_per_second": 8.18, |
|
"step": 9594 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 9.762294753927335e-06, |
|
"loss": 0.6932, |
|
"step": 10660 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_auc_micro": 0.5241205756951521, |
|
"eval_loss": 0.6931790709495544, |
|
"eval_runtime": 23.1973, |
|
"eval_samples_per_second": 129.713, |
|
"eval_steps_per_second": 8.148, |
|
"step": 10660 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"learning_rate": 6.508196502618222e-06, |
|
"loss": 0.6932, |
|
"step": 11726 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_auc_micro": 0.5241602931373088, |
|
"eval_loss": 0.6931705474853516, |
|
"eval_runtime": 23.0891, |
|
"eval_samples_per_second": 130.321, |
|
"eval_steps_per_second": 8.186, |
|
"step": 11726 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 3.254098251309111e-06, |
|
"loss": 0.6932, |
|
"step": 12792 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_auc_micro": 0.5242074068777832, |
|
"eval_loss": 0.6931658387184143, |
|
"eval_runtime": 23.1506, |
|
"eval_samples_per_second": 129.975, |
|
"eval_steps_per_second": 8.164, |
|
"step": 12792 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"learning_rate": 0.0, |
|
"loss": 0.6932, |
|
"step": 13858 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_auc_micro": 0.5243155309888969, |
|
"eval_loss": 0.6931639313697815, |
|
"eval_runtime": 23.1954, |
|
"eval_samples_per_second": 129.724, |
|
"eval_steps_per_second": 8.148, |
|
"step": 13858 |
|
} |
|
], |
|
"max_steps": 13858, |
|
"num_train_epochs": 13, |
|
"total_flos": 0.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|