gpt2-jokes / trainer_state.json
AlekseyKorshuk's picture
End of training
5f65f7f
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"global_step": 1720,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.06,
"eval_accuracy": 0.8732394680670543,
"eval_loss": 0.7285259366035461,
"eval_runtime": 14.7926,
"eval_samples_per_second": 783.026,
"eval_steps_per_second": 6.152,
"step": 100
},
{
"epoch": 0.12,
"eval_accuracy": 0.8747227655273633,
"eval_loss": 0.7140512466430664,
"eval_runtime": 14.7732,
"eval_samples_per_second": 784.057,
"eval_steps_per_second": 6.16,
"step": 200
},
{
"epoch": 0.17,
"eval_accuracy": 0.8756672233683728,
"eval_loss": 0.7055667638778687,
"eval_runtime": 14.8094,
"eval_samples_per_second": 782.138,
"eval_steps_per_second": 6.145,
"step": 300
},
{
"epoch": 0.23,
"eval_accuracy": 0.8763824510950948,
"eval_loss": 0.6991910934448242,
"eval_runtime": 14.8106,
"eval_samples_per_second": 782.075,
"eval_steps_per_second": 6.144,
"step": 400
},
{
"epoch": 0.29,
"learning_rate": 3.5465116279069774e-05,
"loss": 0.7907,
"step": 500
},
{
"epoch": 0.29,
"eval_accuracy": 0.8771028885971415,
"eval_loss": 0.6942312121391296,
"eval_runtime": 14.8425,
"eval_samples_per_second": 780.393,
"eval_steps_per_second": 6.131,
"step": 500
},
{
"epoch": 0.35,
"eval_accuracy": 0.877665544332211,
"eval_loss": 0.6905708909034729,
"eval_runtime": 14.8552,
"eval_samples_per_second": 779.725,
"eval_steps_per_second": 6.126,
"step": 600
},
{
"epoch": 0.41,
"eval_accuracy": 0.8779126365333262,
"eval_loss": 0.6872847676277161,
"eval_runtime": 14.7951,
"eval_samples_per_second": 782.894,
"eval_steps_per_second": 6.151,
"step": 700
},
{
"epoch": 0.47,
"eval_accuracy": 0.8782155477557776,
"eval_loss": 0.6847825646400452,
"eval_runtime": 13.7809,
"eval_samples_per_second": 840.512,
"eval_steps_per_second": 6.603,
"step": 800
},
{
"epoch": 0.52,
"eval_accuracy": 0.8785906515791573,
"eval_loss": 0.6830095648765564,
"eval_runtime": 14.7951,
"eval_samples_per_second": 782.896,
"eval_steps_per_second": 6.151,
"step": 900
},
{
"epoch": 0.58,
"learning_rate": 2.0930232558139536e-05,
"loss": 0.7105,
"step": 1000
},
{
"epoch": 0.58,
"eval_accuracy": 0.8788102063964133,
"eval_loss": 0.6808722019195557,
"eval_runtime": 14.8083,
"eval_samples_per_second": 782.197,
"eval_steps_per_second": 6.145,
"step": 1000
},
{
"epoch": 0.64,
"eval_accuracy": 0.878962034134448,
"eval_loss": 0.6793943643569946,
"eval_runtime": 14.7885,
"eval_samples_per_second": 783.243,
"eval_steps_per_second": 6.153,
"step": 1100
},
{
"epoch": 0.7,
"eval_accuracy": 0.8791533073142268,
"eval_loss": 0.6780144572257996,
"eval_runtime": 13.7848,
"eval_samples_per_second": 840.275,
"eval_steps_per_second": 6.601,
"step": 1200
},
{
"epoch": 0.76,
"eval_accuracy": 0.8792887614726695,
"eval_loss": 0.6770240068435669,
"eval_runtime": 14.7863,
"eval_samples_per_second": 783.358,
"eval_steps_per_second": 6.154,
"step": 1300
},
{
"epoch": 0.81,
"eval_accuracy": 0.8794212386166409,
"eval_loss": 0.6760326027870178,
"eval_runtime": 14.785,
"eval_samples_per_second": 783.432,
"eval_steps_per_second": 6.155,
"step": 1400
},
{
"epoch": 0.87,
"learning_rate": 6.395348837209303e-06,
"loss": 0.7034,
"step": 1500
},
{
"epoch": 0.87,
"eval_accuracy": 0.8794353794353794,
"eval_loss": 0.6754602789878845,
"eval_runtime": 14.7978,
"eval_samples_per_second": 782.753,
"eval_steps_per_second": 6.15,
"step": 1500
},
{
"epoch": 0.93,
"eval_accuracy": 0.8795403191954916,
"eval_loss": 0.6750109195709229,
"eval_runtime": 14.7942,
"eval_samples_per_second": 782.943,
"eval_steps_per_second": 6.151,
"step": 1600
},
{
"epoch": 0.99,
"eval_accuracy": 0.8795477617316698,
"eval_loss": 0.6747931838035583,
"eval_runtime": 14.8037,
"eval_samples_per_second": 782.441,
"eval_steps_per_second": 6.147,
"step": 1700
},
{
"epoch": 1.0,
"step": 1720,
"total_flos": 1.3140586257186816e+16,
"train_loss": 0.730066760750704,
"train_runtime": 657.3049,
"train_samples_per_second": 334.813,
"train_steps_per_second": 2.617
}
],
"max_steps": 1720,
"num_train_epochs": 1,
"total_flos": 1.3140586257186816e+16,
"trial_name": null,
"trial_params": null
}