terminator_finetune / trainer_state.json
echodrift's picture
End of training
698c4fd verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 40.0,
"eval_steps": 60,
"global_step": 2640,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.9090909090909091,
"eval_f1": 0.45714285714285713,
"eval_loss": 1.0333150625228882,
"eval_runtime": 0.2764,
"eval_samples_per_second": 542.727,
"eval_steps_per_second": 10.855,
"step": 60
},
{
"epoch": 1.8181818181818183,
"eval_f1": 0.45753512132822477,
"eval_loss": 1.0936825275421143,
"eval_runtime": 0.2573,
"eval_samples_per_second": 582.969,
"eval_steps_per_second": 11.659,
"step": 120
},
{
"epoch": 2.7272727272727275,
"eval_f1": 0.4339920682933387,
"eval_loss": 1.4987602233886719,
"eval_runtime": 0.2565,
"eval_samples_per_second": 584.756,
"eval_steps_per_second": 11.695,
"step": 180
},
{
"epoch": 3.6363636363636362,
"eval_f1": 0.45818280469332856,
"eval_loss": 1.8738014698028564,
"eval_runtime": 0.2578,
"eval_samples_per_second": 581.75,
"eval_steps_per_second": 11.635,
"step": 240
},
{
"epoch": 4.545454545454545,
"eval_f1": 0.4140568475452196,
"eval_loss": 2.733259916305542,
"eval_runtime": 0.2589,
"eval_samples_per_second": 579.265,
"eval_steps_per_second": 11.585,
"step": 300
},
{
"epoch": 5.454545454545454,
"eval_f1": 0.44683464689418967,
"eval_loss": 3.1444668769836426,
"eval_runtime": 0.2571,
"eval_samples_per_second": 583.537,
"eval_steps_per_second": 11.671,
"step": 360
},
{
"epoch": 6.363636363636363,
"eval_f1": 0.5096665580536548,
"eval_loss": 3.21061110496521,
"eval_runtime": 0.2584,
"eval_samples_per_second": 580.495,
"eval_steps_per_second": 11.61,
"step": 420
},
{
"epoch": 7.2727272727272725,
"eval_f1": 0.4878383357764801,
"eval_loss": 3.321902275085449,
"eval_runtime": 0.257,
"eval_samples_per_second": 583.738,
"eval_steps_per_second": 11.675,
"step": 480
},
{
"epoch": 7.575757575757576,
"grad_norm": 0.48220202326774597,
"learning_rate": 4.2745229733103836e-05,
"loss": 0.3564,
"step": 500
},
{
"epoch": 8.181818181818182,
"eval_f1": 0.4492969396195203,
"eval_loss": 4.1565704345703125,
"eval_runtime": 0.2572,
"eval_samples_per_second": 583.148,
"eval_steps_per_second": 11.663,
"step": 540
},
{
"epoch": 9.090909090909092,
"eval_f1": 0.4938118761971973,
"eval_loss": 3.5661263465881348,
"eval_runtime": 0.2566,
"eval_samples_per_second": 584.642,
"eval_steps_per_second": 11.693,
"step": 600
},
{
"epoch": 10.0,
"eval_f1": 0.5015432098765432,
"eval_loss": 3.5243241786956787,
"eval_runtime": 0.2583,
"eval_samples_per_second": 580.781,
"eval_steps_per_second": 11.616,
"step": 660
},
{
"epoch": 10.909090909090908,
"eval_f1": 0.505655364014844,
"eval_loss": 3.75138258934021,
"eval_runtime": 0.2571,
"eval_samples_per_second": 583.337,
"eval_steps_per_second": 11.667,
"step": 720
},
{
"epoch": 11.818181818181818,
"eval_f1": 0.4607972609439961,
"eval_loss": 4.001529693603516,
"eval_runtime": 0.2582,
"eval_samples_per_second": 580.892,
"eval_steps_per_second": 11.618,
"step": 780
},
{
"epoch": 12.727272727272727,
"eval_f1": 0.42777020796344467,
"eval_loss": 4.467741012573242,
"eval_runtime": 0.2583,
"eval_samples_per_second": 580.706,
"eval_steps_per_second": 11.614,
"step": 840
},
{
"epoch": 13.636363636363637,
"eval_f1": 0.4676531781097319,
"eval_loss": 4.075722694396973,
"eval_runtime": 0.2567,
"eval_samples_per_second": 584.371,
"eval_steps_per_second": 11.687,
"step": 900
},
{
"epoch": 14.545454545454545,
"eval_f1": 0.4501031991744066,
"eval_loss": 4.4461283683776855,
"eval_runtime": 0.2603,
"eval_samples_per_second": 576.366,
"eval_steps_per_second": 11.527,
"step": 960
},
{
"epoch": 15.151515151515152,
"grad_norm": 0.0028503022622317076,
"learning_rate": 3.207179384432036e-05,
"loss": 0.0105,
"step": 1000
},
{
"epoch": 15.454545454545455,
"eval_f1": 0.48195172926589097,
"eval_loss": 4.167490005493164,
"eval_runtime": 0.2574,
"eval_samples_per_second": 582.842,
"eval_steps_per_second": 11.657,
"step": 1020
},
{
"epoch": 16.363636363636363,
"eval_f1": 0.4751515639251033,
"eval_loss": 4.203385353088379,
"eval_runtime": 0.2567,
"eval_samples_per_second": 584.443,
"eval_steps_per_second": 11.689,
"step": 1080
},
{
"epoch": 17.272727272727273,
"eval_f1": 0.48195172926589097,
"eval_loss": 4.214394569396973,
"eval_runtime": 0.2567,
"eval_samples_per_second": 584.422,
"eval_steps_per_second": 11.688,
"step": 1140
},
{
"epoch": 18.181818181818183,
"eval_f1": 0.48705198962930923,
"eval_loss": 4.216163158416748,
"eval_runtime": 0.2565,
"eval_samples_per_second": 584.697,
"eval_steps_per_second": 11.694,
"step": 1200
},
{
"epoch": 19.09090909090909,
"eval_f1": 0.4971560846560847,
"eval_loss": 4.0772294998168945,
"eval_runtime": 0.2583,
"eval_samples_per_second": 580.632,
"eval_steps_per_second": 11.613,
"step": 1260
},
{
"epoch": 20.0,
"eval_f1": 0.47333333333333333,
"eval_loss": 4.344212055206299,
"eval_runtime": 0.258,
"eval_samples_per_second": 581.316,
"eval_steps_per_second": 11.626,
"step": 1320
},
{
"epoch": 20.90909090909091,
"eval_f1": 0.49119604831179786,
"eval_loss": 4.21157693862915,
"eval_runtime": 0.2566,
"eval_samples_per_second": 584.581,
"eval_steps_per_second": 11.692,
"step": 1380
},
{
"epoch": 21.818181818181817,
"eval_f1": 0.48603460346034605,
"eval_loss": 4.196824073791504,
"eval_runtime": 0.2579,
"eval_samples_per_second": 581.615,
"eval_steps_per_second": 11.632,
"step": 1440
},
{
"epoch": 22.727272727272727,
"grad_norm": 0.001047088298946619,
"learning_rate": 1.841155861276481e-05,
"loss": 0.0008,
"step": 1500
},
{
"epoch": 22.727272727272727,
"eval_f1": 0.48547152194211013,
"eval_loss": 4.247754096984863,
"eval_runtime": 0.2573,
"eval_samples_per_second": 582.938,
"eval_steps_per_second": 11.659,
"step": 1500
},
{
"epoch": 23.636363636363637,
"eval_f1": 0.5041214040432384,
"eval_loss": 4.301153182983398,
"eval_runtime": 0.2581,
"eval_samples_per_second": 581.072,
"eval_steps_per_second": 11.621,
"step": 1560
},
{
"epoch": 24.545454545454547,
"eval_f1": 0.4779124579124579,
"eval_loss": 4.698268413543701,
"eval_runtime": 0.2579,
"eval_samples_per_second": 581.559,
"eval_steps_per_second": 11.631,
"step": 1620
},
{
"epoch": 25.454545454545453,
"eval_f1": 0.5193776254326713,
"eval_loss": 4.122583866119385,
"eval_runtime": 0.2577,
"eval_samples_per_second": 582.047,
"eval_steps_per_second": 11.641,
"step": 1680
},
{
"epoch": 26.363636363636363,
"eval_f1": 0.5282001115510212,
"eval_loss": 4.1304450035095215,
"eval_runtime": 0.258,
"eval_samples_per_second": 581.428,
"eval_steps_per_second": 11.629,
"step": 1740
},
{
"epoch": 27.272727272727273,
"eval_f1": 0.524983164983165,
"eval_loss": 4.14604377746582,
"eval_runtime": 0.2568,
"eval_samples_per_second": 584.087,
"eval_steps_per_second": 11.682,
"step": 1800
},
{
"epoch": 28.181818181818183,
"eval_f1": 0.5271497584541062,
"eval_loss": 4.162425518035889,
"eval_runtime": 0.2574,
"eval_samples_per_second": 582.788,
"eval_steps_per_second": 11.656,
"step": 1860
},
{
"epoch": 29.09090909090909,
"eval_f1": 0.5210251919339213,
"eval_loss": 4.175820350646973,
"eval_runtime": 0.2579,
"eval_samples_per_second": 581.623,
"eval_steps_per_second": 11.632,
"step": 1920
},
{
"epoch": 30.0,
"eval_f1": 0.5210251919339213,
"eval_loss": 4.181464195251465,
"eval_runtime": 0.2587,
"eval_samples_per_second": 579.804,
"eval_steps_per_second": 11.596,
"step": 1980
},
{
"epoch": 30.303030303030305,
"grad_norm": 0.001946401665918529,
"learning_rate": 6.459566593467505e-06,
"loss": 0.0005,
"step": 2000
},
{
"epoch": 30.90909090909091,
"eval_f1": 0.5153588182386594,
"eval_loss": 4.197451114654541,
"eval_runtime": 0.2601,
"eval_samples_per_second": 576.796,
"eval_steps_per_second": 11.536,
"step": 2040
},
{
"epoch": 31.818181818181817,
"eval_f1": 0.5153588182386594,
"eval_loss": 4.200737953186035,
"eval_runtime": 0.2597,
"eval_samples_per_second": 577.66,
"eval_steps_per_second": 11.553,
"step": 2100
},
{
"epoch": 32.72727272727273,
"eval_f1": 0.515993265993266,
"eval_loss": 4.207859992980957,
"eval_runtime": 0.2565,
"eval_samples_per_second": 584.761,
"eval_steps_per_second": 11.695,
"step": 2160
},
{
"epoch": 33.63636363636363,
"eval_f1": 0.4817174258359735,
"eval_loss": 4.322238445281982,
"eval_runtime": 0.2576,
"eval_samples_per_second": 582.321,
"eval_steps_per_second": 11.646,
"step": 2220
},
{
"epoch": 34.54545454545455,
"eval_f1": 0.4817174258359735,
"eval_loss": 4.3392863273620605,
"eval_runtime": 0.2587,
"eval_samples_per_second": 579.793,
"eval_steps_per_second": 11.596,
"step": 2280
},
{
"epoch": 35.45454545454545,
"eval_f1": 0.4817174258359735,
"eval_loss": 4.341335296630859,
"eval_runtime": 0.2577,
"eval_samples_per_second": 581.997,
"eval_steps_per_second": 11.64,
"step": 2340
},
{
"epoch": 36.36363636363637,
"eval_f1": 0.4817174258359735,
"eval_loss": 4.34324312210083,
"eval_runtime": 0.2588,
"eval_samples_per_second": 579.582,
"eval_steps_per_second": 11.592,
"step": 2400
},
{
"epoch": 37.27272727272727,
"eval_f1": 0.4817174258359735,
"eval_loss": 4.343974590301514,
"eval_runtime": 0.258,
"eval_samples_per_second": 581.478,
"eval_steps_per_second": 11.63,
"step": 2460
},
{
"epoch": 37.878787878787875,
"grad_norm": 0.0017814389429986477,
"learning_rate": 3.2373468513281763e-07,
"loss": 0.0001,
"step": 2500
},
{
"epoch": 38.18181818181818,
"eval_f1": 0.4817174258359735,
"eval_loss": 4.344239711761475,
"eval_runtime": 0.2579,
"eval_samples_per_second": 581.539,
"eval_steps_per_second": 11.631,
"step": 2520
},
{
"epoch": 39.09090909090909,
"eval_f1": 0.4817174258359735,
"eval_loss": 4.344237804412842,
"eval_runtime": 0.257,
"eval_samples_per_second": 583.583,
"eval_steps_per_second": 11.672,
"step": 2580
},
{
"epoch": 40.0,
"eval_f1": 0.4817174258359735,
"eval_loss": 4.345040798187256,
"eval_runtime": 0.2591,
"eval_samples_per_second": 579.035,
"eval_steps_per_second": 11.581,
"step": 2640
},
{
"epoch": 40.0,
"step": 2640,
"total_flos": 2762690886144000.0,
"train_loss": 0.06974004366167003,
"train_runtime": 251.5282,
"train_samples_per_second": 166.979,
"train_steps_per_second": 10.496
}
],
"logging_steps": 500,
"max_steps": 2640,
"num_input_tokens_seen": 0,
"num_train_epochs": 40,
"save_steps": 1200,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2762690886144000.0,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}