{ "best_metric": null, "best_model_checkpoint": null, "epoch": 40.0, "eval_steps": 60, "global_step": 2640, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.9090909090909091, "eval_f1": 0.45714285714285713, "eval_loss": 1.0333150625228882, "eval_runtime": 0.2764, "eval_samples_per_second": 542.727, "eval_steps_per_second": 10.855, "step": 60 }, { "epoch": 1.8181818181818183, "eval_f1": 0.45753512132822477, "eval_loss": 1.0936825275421143, "eval_runtime": 0.2573, "eval_samples_per_second": 582.969, "eval_steps_per_second": 11.659, "step": 120 }, { "epoch": 2.7272727272727275, "eval_f1": 0.4339920682933387, "eval_loss": 1.4987602233886719, "eval_runtime": 0.2565, "eval_samples_per_second": 584.756, "eval_steps_per_second": 11.695, "step": 180 }, { "epoch": 3.6363636363636362, "eval_f1": 0.45818280469332856, "eval_loss": 1.8738014698028564, "eval_runtime": 0.2578, "eval_samples_per_second": 581.75, "eval_steps_per_second": 11.635, "step": 240 }, { "epoch": 4.545454545454545, "eval_f1": 0.4140568475452196, "eval_loss": 2.733259916305542, "eval_runtime": 0.2589, "eval_samples_per_second": 579.265, "eval_steps_per_second": 11.585, "step": 300 }, { "epoch": 5.454545454545454, "eval_f1": 0.44683464689418967, "eval_loss": 3.1444668769836426, "eval_runtime": 0.2571, "eval_samples_per_second": 583.537, "eval_steps_per_second": 11.671, "step": 360 }, { "epoch": 6.363636363636363, "eval_f1": 0.5096665580536548, "eval_loss": 3.21061110496521, "eval_runtime": 0.2584, "eval_samples_per_second": 580.495, "eval_steps_per_second": 11.61, "step": 420 }, { "epoch": 7.2727272727272725, "eval_f1": 0.4878383357764801, "eval_loss": 3.321902275085449, "eval_runtime": 0.257, "eval_samples_per_second": 583.738, "eval_steps_per_second": 11.675, "step": 480 }, { "epoch": 7.575757575757576, "grad_norm": 0.48220202326774597, "learning_rate": 4.2745229733103836e-05, "loss": 0.3564, "step": 500 }, { "epoch": 8.181818181818182, "eval_f1": 0.4492969396195203, "eval_loss": 4.1565704345703125, "eval_runtime": 0.2572, "eval_samples_per_second": 583.148, "eval_steps_per_second": 11.663, "step": 540 }, { "epoch": 9.090909090909092, "eval_f1": 0.4938118761971973, "eval_loss": 3.5661263465881348, "eval_runtime": 0.2566, "eval_samples_per_second": 584.642, "eval_steps_per_second": 11.693, "step": 600 }, { "epoch": 10.0, "eval_f1": 0.5015432098765432, "eval_loss": 3.5243241786956787, "eval_runtime": 0.2583, "eval_samples_per_second": 580.781, "eval_steps_per_second": 11.616, "step": 660 }, { "epoch": 10.909090909090908, "eval_f1": 0.505655364014844, "eval_loss": 3.75138258934021, "eval_runtime": 0.2571, "eval_samples_per_second": 583.337, "eval_steps_per_second": 11.667, "step": 720 }, { "epoch": 11.818181818181818, "eval_f1": 0.4607972609439961, "eval_loss": 4.001529693603516, "eval_runtime": 0.2582, "eval_samples_per_second": 580.892, "eval_steps_per_second": 11.618, "step": 780 }, { "epoch": 12.727272727272727, "eval_f1": 0.42777020796344467, "eval_loss": 4.467741012573242, "eval_runtime": 0.2583, "eval_samples_per_second": 580.706, "eval_steps_per_second": 11.614, "step": 840 }, { "epoch": 13.636363636363637, "eval_f1": 0.4676531781097319, "eval_loss": 4.075722694396973, "eval_runtime": 0.2567, "eval_samples_per_second": 584.371, "eval_steps_per_second": 11.687, "step": 900 }, { "epoch": 14.545454545454545, "eval_f1": 0.4501031991744066, "eval_loss": 4.4461283683776855, "eval_runtime": 0.2603, "eval_samples_per_second": 576.366, "eval_steps_per_second": 11.527, "step": 960 }, { "epoch": 15.151515151515152, "grad_norm": 0.0028503022622317076, "learning_rate": 3.207179384432036e-05, "loss": 0.0105, "step": 1000 }, { "epoch": 15.454545454545455, "eval_f1": 0.48195172926589097, "eval_loss": 4.167490005493164, "eval_runtime": 0.2574, "eval_samples_per_second": 582.842, "eval_steps_per_second": 11.657, "step": 1020 }, { "epoch": 16.363636363636363, "eval_f1": 0.4751515639251033, "eval_loss": 4.203385353088379, "eval_runtime": 0.2567, "eval_samples_per_second": 584.443, "eval_steps_per_second": 11.689, "step": 1080 }, { "epoch": 17.272727272727273, "eval_f1": 0.48195172926589097, "eval_loss": 4.214394569396973, "eval_runtime": 0.2567, "eval_samples_per_second": 584.422, "eval_steps_per_second": 11.688, "step": 1140 }, { "epoch": 18.181818181818183, "eval_f1": 0.48705198962930923, "eval_loss": 4.216163158416748, "eval_runtime": 0.2565, "eval_samples_per_second": 584.697, "eval_steps_per_second": 11.694, "step": 1200 }, { "epoch": 19.09090909090909, "eval_f1": 0.4971560846560847, "eval_loss": 4.0772294998168945, "eval_runtime": 0.2583, "eval_samples_per_second": 580.632, "eval_steps_per_second": 11.613, "step": 1260 }, { "epoch": 20.0, "eval_f1": 0.47333333333333333, "eval_loss": 4.344212055206299, "eval_runtime": 0.258, "eval_samples_per_second": 581.316, "eval_steps_per_second": 11.626, "step": 1320 }, { "epoch": 20.90909090909091, "eval_f1": 0.49119604831179786, "eval_loss": 4.21157693862915, "eval_runtime": 0.2566, "eval_samples_per_second": 584.581, "eval_steps_per_second": 11.692, "step": 1380 }, { "epoch": 21.818181818181817, "eval_f1": 0.48603460346034605, "eval_loss": 4.196824073791504, "eval_runtime": 0.2579, "eval_samples_per_second": 581.615, "eval_steps_per_second": 11.632, "step": 1440 }, { "epoch": 22.727272727272727, "grad_norm": 0.001047088298946619, "learning_rate": 1.841155861276481e-05, "loss": 0.0008, "step": 1500 }, { "epoch": 22.727272727272727, "eval_f1": 0.48547152194211013, "eval_loss": 4.247754096984863, "eval_runtime": 0.2573, "eval_samples_per_second": 582.938, "eval_steps_per_second": 11.659, "step": 1500 }, { "epoch": 23.636363636363637, "eval_f1": 0.5041214040432384, "eval_loss": 4.301153182983398, "eval_runtime": 0.2581, "eval_samples_per_second": 581.072, "eval_steps_per_second": 11.621, "step": 1560 }, { "epoch": 24.545454545454547, "eval_f1": 0.4779124579124579, "eval_loss": 4.698268413543701, "eval_runtime": 0.2579, "eval_samples_per_second": 581.559, "eval_steps_per_second": 11.631, "step": 1620 }, { "epoch": 25.454545454545453, "eval_f1": 0.5193776254326713, "eval_loss": 4.122583866119385, "eval_runtime": 0.2577, "eval_samples_per_second": 582.047, "eval_steps_per_second": 11.641, "step": 1680 }, { "epoch": 26.363636363636363, "eval_f1": 0.5282001115510212, "eval_loss": 4.1304450035095215, "eval_runtime": 0.258, "eval_samples_per_second": 581.428, "eval_steps_per_second": 11.629, "step": 1740 }, { "epoch": 27.272727272727273, "eval_f1": 0.524983164983165, "eval_loss": 4.14604377746582, "eval_runtime": 0.2568, "eval_samples_per_second": 584.087, "eval_steps_per_second": 11.682, "step": 1800 }, { "epoch": 28.181818181818183, "eval_f1": 0.5271497584541062, "eval_loss": 4.162425518035889, "eval_runtime": 0.2574, "eval_samples_per_second": 582.788, "eval_steps_per_second": 11.656, "step": 1860 }, { "epoch": 29.09090909090909, "eval_f1": 0.5210251919339213, "eval_loss": 4.175820350646973, "eval_runtime": 0.2579, "eval_samples_per_second": 581.623, "eval_steps_per_second": 11.632, "step": 1920 }, { "epoch": 30.0, "eval_f1": 0.5210251919339213, "eval_loss": 4.181464195251465, "eval_runtime": 0.2587, "eval_samples_per_second": 579.804, "eval_steps_per_second": 11.596, "step": 1980 }, { "epoch": 30.303030303030305, "grad_norm": 0.001946401665918529, "learning_rate": 6.459566593467505e-06, "loss": 0.0005, "step": 2000 }, { "epoch": 30.90909090909091, "eval_f1": 0.5153588182386594, "eval_loss": 4.197451114654541, "eval_runtime": 0.2601, "eval_samples_per_second": 576.796, "eval_steps_per_second": 11.536, "step": 2040 }, { "epoch": 31.818181818181817, "eval_f1": 0.5153588182386594, "eval_loss": 4.200737953186035, "eval_runtime": 0.2597, "eval_samples_per_second": 577.66, "eval_steps_per_second": 11.553, "step": 2100 }, { "epoch": 32.72727272727273, "eval_f1": 0.515993265993266, "eval_loss": 4.207859992980957, "eval_runtime": 0.2565, "eval_samples_per_second": 584.761, "eval_steps_per_second": 11.695, "step": 2160 }, { "epoch": 33.63636363636363, "eval_f1": 0.4817174258359735, "eval_loss": 4.322238445281982, "eval_runtime": 0.2576, "eval_samples_per_second": 582.321, "eval_steps_per_second": 11.646, "step": 2220 }, { "epoch": 34.54545454545455, "eval_f1": 0.4817174258359735, "eval_loss": 4.3392863273620605, "eval_runtime": 0.2587, "eval_samples_per_second": 579.793, "eval_steps_per_second": 11.596, "step": 2280 }, { "epoch": 35.45454545454545, "eval_f1": 0.4817174258359735, "eval_loss": 4.341335296630859, "eval_runtime": 0.2577, "eval_samples_per_second": 581.997, "eval_steps_per_second": 11.64, "step": 2340 }, { "epoch": 36.36363636363637, "eval_f1": 0.4817174258359735, "eval_loss": 4.34324312210083, "eval_runtime": 0.2588, "eval_samples_per_second": 579.582, "eval_steps_per_second": 11.592, "step": 2400 }, { "epoch": 37.27272727272727, "eval_f1": 0.4817174258359735, "eval_loss": 4.343974590301514, "eval_runtime": 0.258, "eval_samples_per_second": 581.478, "eval_steps_per_second": 11.63, "step": 2460 }, { "epoch": 37.878787878787875, "grad_norm": 0.0017814389429986477, "learning_rate": 3.2373468513281763e-07, "loss": 0.0001, "step": 2500 }, { "epoch": 38.18181818181818, "eval_f1": 0.4817174258359735, "eval_loss": 4.344239711761475, "eval_runtime": 0.2579, "eval_samples_per_second": 581.539, "eval_steps_per_second": 11.631, "step": 2520 }, { "epoch": 39.09090909090909, "eval_f1": 0.4817174258359735, "eval_loss": 4.344237804412842, "eval_runtime": 0.257, "eval_samples_per_second": 583.583, "eval_steps_per_second": 11.672, "step": 2580 }, { "epoch": 40.0, "eval_f1": 0.4817174258359735, "eval_loss": 4.345040798187256, "eval_runtime": 0.2591, "eval_samples_per_second": 579.035, "eval_steps_per_second": 11.581, "step": 2640 }, { "epoch": 40.0, "step": 2640, "total_flos": 2762690886144000.0, "train_loss": 0.06974004366167003, "train_runtime": 251.5282, "train_samples_per_second": 166.979, "train_steps_per_second": 10.496 } ], "logging_steps": 500, "max_steps": 2640, "num_input_tokens_seen": 0, "num_train_epochs": 40, "save_steps": 1200, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2762690886144000.0, "train_batch_size": 16, "trial_name": null, "trial_params": null }