|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 40.0, |
|
"eval_steps": 60, |
|
"global_step": 2640, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.9090909090909091, |
|
"eval_f1": 0.45714285714285713, |
|
"eval_loss": 1.0333150625228882, |
|
"eval_runtime": 0.2764, |
|
"eval_samples_per_second": 542.727, |
|
"eval_steps_per_second": 10.855, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.8181818181818183, |
|
"eval_f1": 0.45753512132822477, |
|
"eval_loss": 1.0936825275421143, |
|
"eval_runtime": 0.2573, |
|
"eval_samples_per_second": 582.969, |
|
"eval_steps_per_second": 11.659, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 2.7272727272727275, |
|
"eval_f1": 0.4339920682933387, |
|
"eval_loss": 1.4987602233886719, |
|
"eval_runtime": 0.2565, |
|
"eval_samples_per_second": 584.756, |
|
"eval_steps_per_second": 11.695, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 3.6363636363636362, |
|
"eval_f1": 0.45818280469332856, |
|
"eval_loss": 1.8738014698028564, |
|
"eval_runtime": 0.2578, |
|
"eval_samples_per_second": 581.75, |
|
"eval_steps_per_second": 11.635, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 4.545454545454545, |
|
"eval_f1": 0.4140568475452196, |
|
"eval_loss": 2.733259916305542, |
|
"eval_runtime": 0.2589, |
|
"eval_samples_per_second": 579.265, |
|
"eval_steps_per_second": 11.585, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 5.454545454545454, |
|
"eval_f1": 0.44683464689418967, |
|
"eval_loss": 3.1444668769836426, |
|
"eval_runtime": 0.2571, |
|
"eval_samples_per_second": 583.537, |
|
"eval_steps_per_second": 11.671, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 6.363636363636363, |
|
"eval_f1": 0.5096665580536548, |
|
"eval_loss": 3.21061110496521, |
|
"eval_runtime": 0.2584, |
|
"eval_samples_per_second": 580.495, |
|
"eval_steps_per_second": 11.61, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 7.2727272727272725, |
|
"eval_f1": 0.4878383357764801, |
|
"eval_loss": 3.321902275085449, |
|
"eval_runtime": 0.257, |
|
"eval_samples_per_second": 583.738, |
|
"eval_steps_per_second": 11.675, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 7.575757575757576, |
|
"grad_norm": 0.48220202326774597, |
|
"learning_rate": 4.2745229733103836e-05, |
|
"loss": 0.3564, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 8.181818181818182, |
|
"eval_f1": 0.4492969396195203, |
|
"eval_loss": 4.1565704345703125, |
|
"eval_runtime": 0.2572, |
|
"eval_samples_per_second": 583.148, |
|
"eval_steps_per_second": 11.663, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 9.090909090909092, |
|
"eval_f1": 0.4938118761971973, |
|
"eval_loss": 3.5661263465881348, |
|
"eval_runtime": 0.2566, |
|
"eval_samples_per_second": 584.642, |
|
"eval_steps_per_second": 11.693, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_f1": 0.5015432098765432, |
|
"eval_loss": 3.5243241786956787, |
|
"eval_runtime": 0.2583, |
|
"eval_samples_per_second": 580.781, |
|
"eval_steps_per_second": 11.616, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 10.909090909090908, |
|
"eval_f1": 0.505655364014844, |
|
"eval_loss": 3.75138258934021, |
|
"eval_runtime": 0.2571, |
|
"eval_samples_per_second": 583.337, |
|
"eval_steps_per_second": 11.667, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 11.818181818181818, |
|
"eval_f1": 0.4607972609439961, |
|
"eval_loss": 4.001529693603516, |
|
"eval_runtime": 0.2582, |
|
"eval_samples_per_second": 580.892, |
|
"eval_steps_per_second": 11.618, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 12.727272727272727, |
|
"eval_f1": 0.42777020796344467, |
|
"eval_loss": 4.467741012573242, |
|
"eval_runtime": 0.2583, |
|
"eval_samples_per_second": 580.706, |
|
"eval_steps_per_second": 11.614, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 13.636363636363637, |
|
"eval_f1": 0.4676531781097319, |
|
"eval_loss": 4.075722694396973, |
|
"eval_runtime": 0.2567, |
|
"eval_samples_per_second": 584.371, |
|
"eval_steps_per_second": 11.687, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 14.545454545454545, |
|
"eval_f1": 0.4501031991744066, |
|
"eval_loss": 4.4461283683776855, |
|
"eval_runtime": 0.2603, |
|
"eval_samples_per_second": 576.366, |
|
"eval_steps_per_second": 11.527, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 15.151515151515152, |
|
"grad_norm": 0.0028503022622317076, |
|
"learning_rate": 3.207179384432036e-05, |
|
"loss": 0.0105, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 15.454545454545455, |
|
"eval_f1": 0.48195172926589097, |
|
"eval_loss": 4.167490005493164, |
|
"eval_runtime": 0.2574, |
|
"eval_samples_per_second": 582.842, |
|
"eval_steps_per_second": 11.657, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 16.363636363636363, |
|
"eval_f1": 0.4751515639251033, |
|
"eval_loss": 4.203385353088379, |
|
"eval_runtime": 0.2567, |
|
"eval_samples_per_second": 584.443, |
|
"eval_steps_per_second": 11.689, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 17.272727272727273, |
|
"eval_f1": 0.48195172926589097, |
|
"eval_loss": 4.214394569396973, |
|
"eval_runtime": 0.2567, |
|
"eval_samples_per_second": 584.422, |
|
"eval_steps_per_second": 11.688, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 18.181818181818183, |
|
"eval_f1": 0.48705198962930923, |
|
"eval_loss": 4.216163158416748, |
|
"eval_runtime": 0.2565, |
|
"eval_samples_per_second": 584.697, |
|
"eval_steps_per_second": 11.694, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 19.09090909090909, |
|
"eval_f1": 0.4971560846560847, |
|
"eval_loss": 4.0772294998168945, |
|
"eval_runtime": 0.2583, |
|
"eval_samples_per_second": 580.632, |
|
"eval_steps_per_second": 11.613, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_f1": 0.47333333333333333, |
|
"eval_loss": 4.344212055206299, |
|
"eval_runtime": 0.258, |
|
"eval_samples_per_second": 581.316, |
|
"eval_steps_per_second": 11.626, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 20.90909090909091, |
|
"eval_f1": 0.49119604831179786, |
|
"eval_loss": 4.21157693862915, |
|
"eval_runtime": 0.2566, |
|
"eval_samples_per_second": 584.581, |
|
"eval_steps_per_second": 11.692, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 21.818181818181817, |
|
"eval_f1": 0.48603460346034605, |
|
"eval_loss": 4.196824073791504, |
|
"eval_runtime": 0.2579, |
|
"eval_samples_per_second": 581.615, |
|
"eval_steps_per_second": 11.632, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 22.727272727272727, |
|
"grad_norm": 0.001047088298946619, |
|
"learning_rate": 1.841155861276481e-05, |
|
"loss": 0.0008, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 22.727272727272727, |
|
"eval_f1": 0.48547152194211013, |
|
"eval_loss": 4.247754096984863, |
|
"eval_runtime": 0.2573, |
|
"eval_samples_per_second": 582.938, |
|
"eval_steps_per_second": 11.659, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 23.636363636363637, |
|
"eval_f1": 0.5041214040432384, |
|
"eval_loss": 4.301153182983398, |
|
"eval_runtime": 0.2581, |
|
"eval_samples_per_second": 581.072, |
|
"eval_steps_per_second": 11.621, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 24.545454545454547, |
|
"eval_f1": 0.4779124579124579, |
|
"eval_loss": 4.698268413543701, |
|
"eval_runtime": 0.2579, |
|
"eval_samples_per_second": 581.559, |
|
"eval_steps_per_second": 11.631, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 25.454545454545453, |
|
"eval_f1": 0.5193776254326713, |
|
"eval_loss": 4.122583866119385, |
|
"eval_runtime": 0.2577, |
|
"eval_samples_per_second": 582.047, |
|
"eval_steps_per_second": 11.641, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 26.363636363636363, |
|
"eval_f1": 0.5282001115510212, |
|
"eval_loss": 4.1304450035095215, |
|
"eval_runtime": 0.258, |
|
"eval_samples_per_second": 581.428, |
|
"eval_steps_per_second": 11.629, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 27.272727272727273, |
|
"eval_f1": 0.524983164983165, |
|
"eval_loss": 4.14604377746582, |
|
"eval_runtime": 0.2568, |
|
"eval_samples_per_second": 584.087, |
|
"eval_steps_per_second": 11.682, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 28.181818181818183, |
|
"eval_f1": 0.5271497584541062, |
|
"eval_loss": 4.162425518035889, |
|
"eval_runtime": 0.2574, |
|
"eval_samples_per_second": 582.788, |
|
"eval_steps_per_second": 11.656, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 29.09090909090909, |
|
"eval_f1": 0.5210251919339213, |
|
"eval_loss": 4.175820350646973, |
|
"eval_runtime": 0.2579, |
|
"eval_samples_per_second": 581.623, |
|
"eval_steps_per_second": 11.632, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_f1": 0.5210251919339213, |
|
"eval_loss": 4.181464195251465, |
|
"eval_runtime": 0.2587, |
|
"eval_samples_per_second": 579.804, |
|
"eval_steps_per_second": 11.596, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 30.303030303030305, |
|
"grad_norm": 0.001946401665918529, |
|
"learning_rate": 6.459566593467505e-06, |
|
"loss": 0.0005, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 30.90909090909091, |
|
"eval_f1": 0.5153588182386594, |
|
"eval_loss": 4.197451114654541, |
|
"eval_runtime": 0.2601, |
|
"eval_samples_per_second": 576.796, |
|
"eval_steps_per_second": 11.536, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 31.818181818181817, |
|
"eval_f1": 0.5153588182386594, |
|
"eval_loss": 4.200737953186035, |
|
"eval_runtime": 0.2597, |
|
"eval_samples_per_second": 577.66, |
|
"eval_steps_per_second": 11.553, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 32.72727272727273, |
|
"eval_f1": 0.515993265993266, |
|
"eval_loss": 4.207859992980957, |
|
"eval_runtime": 0.2565, |
|
"eval_samples_per_second": 584.761, |
|
"eval_steps_per_second": 11.695, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 33.63636363636363, |
|
"eval_f1": 0.4817174258359735, |
|
"eval_loss": 4.322238445281982, |
|
"eval_runtime": 0.2576, |
|
"eval_samples_per_second": 582.321, |
|
"eval_steps_per_second": 11.646, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 34.54545454545455, |
|
"eval_f1": 0.4817174258359735, |
|
"eval_loss": 4.3392863273620605, |
|
"eval_runtime": 0.2587, |
|
"eval_samples_per_second": 579.793, |
|
"eval_steps_per_second": 11.596, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 35.45454545454545, |
|
"eval_f1": 0.4817174258359735, |
|
"eval_loss": 4.341335296630859, |
|
"eval_runtime": 0.2577, |
|
"eval_samples_per_second": 581.997, |
|
"eval_steps_per_second": 11.64, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 36.36363636363637, |
|
"eval_f1": 0.4817174258359735, |
|
"eval_loss": 4.34324312210083, |
|
"eval_runtime": 0.2588, |
|
"eval_samples_per_second": 579.582, |
|
"eval_steps_per_second": 11.592, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 37.27272727272727, |
|
"eval_f1": 0.4817174258359735, |
|
"eval_loss": 4.343974590301514, |
|
"eval_runtime": 0.258, |
|
"eval_samples_per_second": 581.478, |
|
"eval_steps_per_second": 11.63, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 37.878787878787875, |
|
"grad_norm": 0.0017814389429986477, |
|
"learning_rate": 3.2373468513281763e-07, |
|
"loss": 0.0001, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 38.18181818181818, |
|
"eval_f1": 0.4817174258359735, |
|
"eval_loss": 4.344239711761475, |
|
"eval_runtime": 0.2579, |
|
"eval_samples_per_second": 581.539, |
|
"eval_steps_per_second": 11.631, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 39.09090909090909, |
|
"eval_f1": 0.4817174258359735, |
|
"eval_loss": 4.344237804412842, |
|
"eval_runtime": 0.257, |
|
"eval_samples_per_second": 583.583, |
|
"eval_steps_per_second": 11.672, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_f1": 0.4817174258359735, |
|
"eval_loss": 4.345040798187256, |
|
"eval_runtime": 0.2591, |
|
"eval_samples_per_second": 579.035, |
|
"eval_steps_per_second": 11.581, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"step": 2640, |
|
"total_flos": 2762690886144000.0, |
|
"train_loss": 0.06974004366167003, |
|
"train_runtime": 251.5282, |
|
"train_samples_per_second": 166.979, |
|
"train_steps_per_second": 10.496 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 2640, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 40, |
|
"save_steps": 1200, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2762690886144000.0, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|