|
{ |
|
"best_metric": 0.8666666666666667, |
|
"best_model_checkpoint": "./outputs/xlm-roberta-large-best-model/xnli_16_0.1_0.00001_04-05-23_12-26/checkpoint-73632", |
|
"epoch": 4.0, |
|
"global_step": 98176, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 7.978761445522026e-06, |
|
"loss": 0.548, |
|
"step": 24544 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.8526104417670682, |
|
"eval_loss": 0.4045264422893524, |
|
"eval_runtime": 85.4254, |
|
"eval_samples_per_second": 29.148, |
|
"eval_steps_per_second": 3.652, |
|
"step": 24544 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 5.319174297014683e-06, |
|
"loss": 0.3739, |
|
"step": 49088 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.8538152610441767, |
|
"eval_loss": 0.3931969106197357, |
|
"eval_runtime": 85.4189, |
|
"eval_samples_per_second": 29.15, |
|
"eval_steps_per_second": 3.653, |
|
"step": 49088 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 2.6595871485073417e-06, |
|
"loss": 0.2739, |
|
"step": 73632 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.8666666666666667, |
|
"eval_loss": 0.457296758890152, |
|
"eval_runtime": 85.1723, |
|
"eval_samples_per_second": 29.235, |
|
"eval_steps_per_second": 3.663, |
|
"step": 73632 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 0.0, |
|
"loss": 0.2075, |
|
"step": 98176 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8626506024096385, |
|
"eval_loss": 0.5882326364517212, |
|
"eval_runtime": 85.1965, |
|
"eval_samples_per_second": 29.227, |
|
"eval_steps_per_second": 3.662, |
|
"step": 98176 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"step": 98176, |
|
"total_flos": 1.4638901871267594e+18, |
|
"train_loss": 0.3508187235567374, |
|
"train_runtime": 165856.5757, |
|
"train_samples_per_second": 9.471, |
|
"train_steps_per_second": 0.592 |
|
} |
|
], |
|
"max_steps": 98176, |
|
"num_train_epochs": 4, |
|
"total_flos": 1.4638901871267594e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|