|
{ |
|
"best_metric": 0.7319819819819819, |
|
"best_model_checkpoint": "videomae-base-finetuned-ucf101-subset/checkpoint-812", |
|
"epoch": 59.00721153846154, |
|
"eval_steps": 500, |
|
"global_step": 832, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01201923076923077, |
|
"grad_norm": 4.912038326263428, |
|
"learning_rate": 5.9523809523809525e-06, |
|
"loss": 4.238, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.016826923076923076, |
|
"eval_accuracy": 0.018018018018018018, |
|
"eval_loss": 4.223487377166748, |
|
"eval_runtime": 70.4378, |
|
"eval_samples_per_second": 6.303, |
|
"eval_steps_per_second": 0.099, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 1.0072115384615385, |
|
"grad_norm": 5.0958333015441895, |
|
"learning_rate": 1.1904761904761905e-05, |
|
"loss": 4.2658, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 1.0168269230769231, |
|
"eval_accuracy": 0.02027027027027027, |
|
"eval_loss": 4.207489490509033, |
|
"eval_runtime": 72.9674, |
|
"eval_samples_per_second": 6.085, |
|
"eval_steps_per_second": 0.096, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 2.0024038461538463, |
|
"grad_norm": 3.8229589462280273, |
|
"learning_rate": 1.785714285714286e-05, |
|
"loss": 4.2236, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 2.014423076923077, |
|
"grad_norm": 3.7527875900268555, |
|
"learning_rate": 2.380952380952381e-05, |
|
"loss": 4.219, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 2.016826923076923, |
|
"eval_accuracy": 0.015765765765765764, |
|
"eval_loss": 4.205322265625, |
|
"eval_runtime": 70.576, |
|
"eval_samples_per_second": 6.291, |
|
"eval_steps_per_second": 0.099, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 3.0096153846153846, |
|
"grad_norm": 2.5568084716796875, |
|
"learning_rate": 2.9761904761904762e-05, |
|
"loss": 4.2146, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 3.016826923076923, |
|
"eval_accuracy": 0.02252252252252252, |
|
"eval_loss": 4.209786415100098, |
|
"eval_runtime": 73.3716, |
|
"eval_samples_per_second": 6.051, |
|
"eval_steps_per_second": 0.095, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 4.0048076923076925, |
|
"grad_norm": 2.623307943344116, |
|
"learning_rate": 3.571428571428572e-05, |
|
"loss": 4.2083, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 4.016826923076923, |
|
"grad_norm": 11.633626937866211, |
|
"learning_rate": 4.166666666666667e-05, |
|
"loss": 4.1925, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 4.016826923076923, |
|
"eval_accuracy": 0.02252252252252252, |
|
"eval_loss": 4.182406902313232, |
|
"eval_runtime": 73.5871, |
|
"eval_samples_per_second": 6.034, |
|
"eval_steps_per_second": 0.095, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 5.012019230769231, |
|
"grad_norm": 4.2888689041137695, |
|
"learning_rate": 4.761904761904762e-05, |
|
"loss": 4.1192, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 5.016826923076923, |
|
"eval_accuracy": 0.038288288288288286, |
|
"eval_loss": 4.081945896148682, |
|
"eval_runtime": 77.2884, |
|
"eval_samples_per_second": 5.745, |
|
"eval_steps_per_second": 0.091, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 6.007211538461538, |
|
"grad_norm": 3.5548884868621826, |
|
"learning_rate": 4.959893048128342e-05, |
|
"loss": 4.0297, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 6.016826923076923, |
|
"eval_accuracy": 0.06981981981981981, |
|
"eval_loss": 3.981917142868042, |
|
"eval_runtime": 72.62, |
|
"eval_samples_per_second": 6.114, |
|
"eval_steps_per_second": 0.096, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 7.002403846153846, |
|
"grad_norm": 6.507851600646973, |
|
"learning_rate": 4.8930481283422465e-05, |
|
"loss": 3.9499, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 7.014423076923077, |
|
"grad_norm": 5.1739959716796875, |
|
"learning_rate": 4.8262032085561496e-05, |
|
"loss": 3.7134, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 7.016826923076923, |
|
"eval_accuracy": 0.1036036036036036, |
|
"eval_loss": 3.7339625358581543, |
|
"eval_runtime": 73.0624, |
|
"eval_samples_per_second": 6.077, |
|
"eval_steps_per_second": 0.096, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 8.009615384615385, |
|
"grad_norm": 4.7554779052734375, |
|
"learning_rate": 4.759358288770054e-05, |
|
"loss": 3.5289, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 8.016826923076923, |
|
"eval_accuracy": 0.18018018018018017, |
|
"eval_loss": 3.420488119125366, |
|
"eval_runtime": 73.5384, |
|
"eval_samples_per_second": 6.038, |
|
"eval_steps_per_second": 0.095, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 9.004807692307692, |
|
"grad_norm": 6.714512348175049, |
|
"learning_rate": 4.6925133689839576e-05, |
|
"loss": 3.3709, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 9.016826923076923, |
|
"grad_norm": 18.599552154541016, |
|
"learning_rate": 4.625668449197861e-05, |
|
"loss": 3.0625, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 9.016826923076923, |
|
"eval_accuracy": 0.26126126126126126, |
|
"eval_loss": 3.196960687637329, |
|
"eval_runtime": 74.7592, |
|
"eval_samples_per_second": 5.939, |
|
"eval_steps_per_second": 0.094, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 10.01201923076923, |
|
"grad_norm": 7.006440162658691, |
|
"learning_rate": 4.558823529411765e-05, |
|
"loss": 2.8776, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 10.016826923076923, |
|
"eval_accuracy": 0.30180180180180183, |
|
"eval_loss": 2.9350106716156006, |
|
"eval_runtime": 75.4948, |
|
"eval_samples_per_second": 5.881, |
|
"eval_steps_per_second": 0.093, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 11.007211538461538, |
|
"grad_norm": 6.219061374664307, |
|
"learning_rate": 4.491978609625669e-05, |
|
"loss": 2.6375, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 11.016826923076923, |
|
"eval_accuracy": 0.36036036036036034, |
|
"eval_loss": 2.762986660003662, |
|
"eval_runtime": 73.4162, |
|
"eval_samples_per_second": 6.048, |
|
"eval_steps_per_second": 0.095, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 12.002403846153847, |
|
"grad_norm": 6.574148178100586, |
|
"learning_rate": 4.4251336898395724e-05, |
|
"loss": 2.4071, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 12.014423076923077, |
|
"grad_norm": 8.191123008728027, |
|
"learning_rate": 4.358288770053476e-05, |
|
"loss": 2.2954, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 12.016826923076923, |
|
"eval_accuracy": 0.42792792792792794, |
|
"eval_loss": 2.4990594387054443, |
|
"eval_runtime": 80.1492, |
|
"eval_samples_per_second": 5.54, |
|
"eval_steps_per_second": 0.087, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 13.009615384615385, |
|
"grad_norm": 6.4414825439453125, |
|
"learning_rate": 4.29144385026738e-05, |
|
"loss": 2.1337, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 13.016826923076923, |
|
"eval_accuracy": 0.43243243243243246, |
|
"eval_loss": 2.3827102184295654, |
|
"eval_runtime": 73.7035, |
|
"eval_samples_per_second": 6.024, |
|
"eval_steps_per_second": 0.095, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 14.004807692307692, |
|
"grad_norm": 8.87990951538086, |
|
"learning_rate": 4.224598930481284e-05, |
|
"loss": 1.9319, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 14.016826923076923, |
|
"grad_norm": 22.130002975463867, |
|
"learning_rate": 4.157754010695187e-05, |
|
"loss": 1.8195, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 14.016826923076923, |
|
"eval_accuracy": 0.481981981981982, |
|
"eval_loss": 2.2311551570892334, |
|
"eval_runtime": 75.5729, |
|
"eval_samples_per_second": 5.875, |
|
"eval_steps_per_second": 0.093, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 15.01201923076923, |
|
"grad_norm": 7.134392261505127, |
|
"learning_rate": 4.0909090909090915e-05, |
|
"loss": 1.6436, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 15.016826923076923, |
|
"eval_accuracy": 0.44594594594594594, |
|
"eval_loss": 2.3628787994384766, |
|
"eval_runtime": 76.2152, |
|
"eval_samples_per_second": 5.826, |
|
"eval_steps_per_second": 0.092, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 16.00721153846154, |
|
"grad_norm": 11.737651824951172, |
|
"learning_rate": 4.024064171122995e-05, |
|
"loss": 1.6289, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 16.016826923076923, |
|
"eval_accuracy": 0.4752252252252252, |
|
"eval_loss": 2.151165723800659, |
|
"eval_runtime": 72.7861, |
|
"eval_samples_per_second": 6.1, |
|
"eval_steps_per_second": 0.096, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 17.002403846153847, |
|
"grad_norm": 12.988323211669922, |
|
"learning_rate": 3.957219251336899e-05, |
|
"loss": 1.423, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 17.014423076923077, |
|
"grad_norm": 5.394784450531006, |
|
"learning_rate": 3.8903743315508025e-05, |
|
"loss": 1.2957, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 17.016826923076923, |
|
"eval_accuracy": 0.5022522522522522, |
|
"eval_loss": 2.0142312049865723, |
|
"eval_runtime": 74.6687, |
|
"eval_samples_per_second": 5.946, |
|
"eval_steps_per_second": 0.094, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 18.009615384615383, |
|
"grad_norm": 6.641128063201904, |
|
"learning_rate": 3.8235294117647055e-05, |
|
"loss": 1.2761, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 18.016826923076923, |
|
"eval_accuracy": 0.527027027027027, |
|
"eval_loss": 1.906096339225769, |
|
"eval_runtime": 75.2853, |
|
"eval_samples_per_second": 5.898, |
|
"eval_steps_per_second": 0.093, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 19.004807692307693, |
|
"grad_norm": 7.859009742736816, |
|
"learning_rate": 3.75668449197861e-05, |
|
"loss": 1.217, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 19.016826923076923, |
|
"grad_norm": 29.06666374206543, |
|
"learning_rate": 3.6898395721925136e-05, |
|
"loss": 1.1118, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 19.016826923076923, |
|
"eval_accuracy": 0.5495495495495496, |
|
"eval_loss": 1.811281681060791, |
|
"eval_runtime": 81.5227, |
|
"eval_samples_per_second": 5.446, |
|
"eval_steps_per_second": 0.086, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 20.01201923076923, |
|
"grad_norm": 8.949642181396484, |
|
"learning_rate": 3.622994652406417e-05, |
|
"loss": 0.9642, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 20.016826923076923, |
|
"eval_accuracy": 0.6036036036036037, |
|
"eval_loss": 1.727989673614502, |
|
"eval_runtime": 72.1469, |
|
"eval_samples_per_second": 6.154, |
|
"eval_steps_per_second": 0.097, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 21.00721153846154, |
|
"grad_norm": 6.73243522644043, |
|
"learning_rate": 3.556149732620321e-05, |
|
"loss": 0.894, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 21.016826923076923, |
|
"eval_accuracy": 0.5382882882882883, |
|
"eval_loss": 1.8722944259643555, |
|
"eval_runtime": 76.1286, |
|
"eval_samples_per_second": 5.832, |
|
"eval_steps_per_second": 0.092, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 22.002403846153847, |
|
"grad_norm": 8.856271743774414, |
|
"learning_rate": 3.489304812834225e-05, |
|
"loss": 0.8454, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 22.014423076923077, |
|
"grad_norm": 6.268283843994141, |
|
"learning_rate": 3.4224598930481284e-05, |
|
"loss": 0.7974, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 22.016826923076923, |
|
"eval_accuracy": 0.6058558558558559, |
|
"eval_loss": 1.658478021621704, |
|
"eval_runtime": 69.8498, |
|
"eval_samples_per_second": 6.356, |
|
"eval_steps_per_second": 0.1, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 23.009615384615383, |
|
"grad_norm": 6.027498722076416, |
|
"learning_rate": 3.355614973262032e-05, |
|
"loss": 0.833, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 23.016826923076923, |
|
"eval_accuracy": 0.6148648648648649, |
|
"eval_loss": 1.599608302116394, |
|
"eval_runtime": 76.7983, |
|
"eval_samples_per_second": 5.781, |
|
"eval_steps_per_second": 0.091, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 24.004807692307693, |
|
"grad_norm": 5.669070243835449, |
|
"learning_rate": 3.288770053475936e-05, |
|
"loss": 0.6668, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 24.016826923076923, |
|
"grad_norm": 12.669683456420898, |
|
"learning_rate": 3.22192513368984e-05, |
|
"loss": 0.6431, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 24.016826923076923, |
|
"eval_accuracy": 0.6148648648648649, |
|
"eval_loss": 1.560927152633667, |
|
"eval_runtime": 69.5019, |
|
"eval_samples_per_second": 6.388, |
|
"eval_steps_per_second": 0.101, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 25.01201923076923, |
|
"grad_norm": 5.4236626625061035, |
|
"learning_rate": 3.155080213903743e-05, |
|
"loss": 0.5873, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 25.016826923076923, |
|
"eval_accuracy": 0.6171171171171171, |
|
"eval_loss": 1.6108297109603882, |
|
"eval_runtime": 73.8756, |
|
"eval_samples_per_second": 6.01, |
|
"eval_steps_per_second": 0.095, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 26.00721153846154, |
|
"grad_norm": 5.605001449584961, |
|
"learning_rate": 3.0882352941176475e-05, |
|
"loss": 0.5554, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 26.016826923076923, |
|
"eval_accuracy": 0.6531531531531531, |
|
"eval_loss": 1.4013562202453613, |
|
"eval_runtime": 79.7998, |
|
"eval_samples_per_second": 5.564, |
|
"eval_steps_per_second": 0.088, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 27.002403846153847, |
|
"grad_norm": 6.0180277824401855, |
|
"learning_rate": 3.0213903743315508e-05, |
|
"loss": 0.5428, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 27.014423076923077, |
|
"grad_norm": 7.198999404907227, |
|
"learning_rate": 2.954545454545455e-05, |
|
"loss": 0.4786, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 27.016826923076923, |
|
"eval_accuracy": 0.6621621621621622, |
|
"eval_loss": 1.433493733406067, |
|
"eval_runtime": 71.2505, |
|
"eval_samples_per_second": 6.232, |
|
"eval_steps_per_second": 0.098, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 28.009615384615383, |
|
"grad_norm": 4.281993389129639, |
|
"learning_rate": 2.8877005347593582e-05, |
|
"loss": 0.4252, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 28.016826923076923, |
|
"eval_accuracy": 0.6509009009009009, |
|
"eval_loss": 1.444474220275879, |
|
"eval_runtime": 71.5031, |
|
"eval_samples_per_second": 6.21, |
|
"eval_steps_per_second": 0.098, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 29.004807692307693, |
|
"grad_norm": 6.2630534172058105, |
|
"learning_rate": 2.8208556149732622e-05, |
|
"loss": 0.4527, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 29.016826923076923, |
|
"grad_norm": 4.163498878479004, |
|
"learning_rate": 2.754010695187166e-05, |
|
"loss": 0.382, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 29.016826923076923, |
|
"eval_accuracy": 0.6621621621621622, |
|
"eval_loss": 1.3914759159088135, |
|
"eval_runtime": 72.0609, |
|
"eval_samples_per_second": 6.161, |
|
"eval_steps_per_second": 0.097, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 30.01201923076923, |
|
"grad_norm": 5.012676239013672, |
|
"learning_rate": 2.68716577540107e-05, |
|
"loss": 0.365, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 30.016826923076923, |
|
"eval_accuracy": 0.6846846846846847, |
|
"eval_loss": 1.297812819480896, |
|
"eval_runtime": 73.674, |
|
"eval_samples_per_second": 6.027, |
|
"eval_steps_per_second": 0.095, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 31.00721153846154, |
|
"grad_norm": 4.618253707885742, |
|
"learning_rate": 2.6203208556149733e-05, |
|
"loss": 0.319, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 31.016826923076923, |
|
"eval_accuracy": 0.6824324324324325, |
|
"eval_loss": 1.3218427896499634, |
|
"eval_runtime": 74.4745, |
|
"eval_samples_per_second": 5.962, |
|
"eval_steps_per_second": 0.094, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 32.00240384615385, |
|
"grad_norm": 6.823300361633301, |
|
"learning_rate": 2.5534759358288773e-05, |
|
"loss": 0.3829, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 32.01442307692308, |
|
"grad_norm": 5.096348285675049, |
|
"learning_rate": 2.4866310160427807e-05, |
|
"loss": 0.3167, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 32.01682692307692, |
|
"eval_accuracy": 0.6644144144144144, |
|
"eval_loss": 1.3495796918869019, |
|
"eval_runtime": 73.0271, |
|
"eval_samples_per_second": 6.08, |
|
"eval_steps_per_second": 0.096, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 33.00961538461539, |
|
"grad_norm": 6.042089462280273, |
|
"learning_rate": 2.4197860962566847e-05, |
|
"loss": 0.2797, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 33.01682692307692, |
|
"eval_accuracy": 0.6801801801801802, |
|
"eval_loss": 1.2806001901626587, |
|
"eval_runtime": 72.9489, |
|
"eval_samples_per_second": 6.086, |
|
"eval_steps_per_second": 0.096, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 34.00480769230769, |
|
"grad_norm": 4.125849723815918, |
|
"learning_rate": 2.3529411764705884e-05, |
|
"loss": 0.281, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 34.01682692307692, |
|
"grad_norm": 7.1154704093933105, |
|
"learning_rate": 2.286096256684492e-05, |
|
"loss": 0.2864, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 34.01682692307692, |
|
"eval_accuracy": 0.7072072072072072, |
|
"eval_loss": 1.219117283821106, |
|
"eval_runtime": 74.6962, |
|
"eval_samples_per_second": 5.944, |
|
"eval_steps_per_second": 0.094, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 35.01201923076923, |
|
"grad_norm": 6.536057949066162, |
|
"learning_rate": 2.2192513368983957e-05, |
|
"loss": 0.2927, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 35.01682692307692, |
|
"eval_accuracy": 0.7207207207207207, |
|
"eval_loss": 1.2134795188903809, |
|
"eval_runtime": 72.1404, |
|
"eval_samples_per_second": 6.155, |
|
"eval_steps_per_second": 0.097, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 36.00721153846154, |
|
"grad_norm": 4.703156471252441, |
|
"learning_rate": 2.1524064171122994e-05, |
|
"loss": 0.2698, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 36.01682692307692, |
|
"eval_accuracy": 0.6914414414414415, |
|
"eval_loss": 1.250654697418213, |
|
"eval_runtime": 73.2059, |
|
"eval_samples_per_second": 6.065, |
|
"eval_steps_per_second": 0.096, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 37.00240384615385, |
|
"grad_norm": 4.395312786102295, |
|
"learning_rate": 2.0855614973262035e-05, |
|
"loss": 0.256, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 37.01442307692308, |
|
"grad_norm": 3.43831729888916, |
|
"learning_rate": 2.018716577540107e-05, |
|
"loss": 0.2333, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 37.01682692307692, |
|
"eval_accuracy": 0.7094594594594594, |
|
"eval_loss": 1.2037817239761353, |
|
"eval_runtime": 74.757, |
|
"eval_samples_per_second": 5.939, |
|
"eval_steps_per_second": 0.094, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 38.00961538461539, |
|
"grad_norm": 4.047479152679443, |
|
"learning_rate": 1.951871657754011e-05, |
|
"loss": 0.2366, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 38.01682692307692, |
|
"eval_accuracy": 0.7207207207207207, |
|
"eval_loss": 1.1517395973205566, |
|
"eval_runtime": 73.764, |
|
"eval_samples_per_second": 6.019, |
|
"eval_steps_per_second": 0.095, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 39.00480769230769, |
|
"grad_norm": 5.707859516143799, |
|
"learning_rate": 1.8850267379679145e-05, |
|
"loss": 0.1938, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 39.01682692307692, |
|
"grad_norm": 3.7590107917785645, |
|
"learning_rate": 1.8181818181818182e-05, |
|
"loss": 0.1886, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 39.01682692307692, |
|
"eval_accuracy": 0.7094594594594594, |
|
"eval_loss": 1.2073884010314941, |
|
"eval_runtime": 72.0607, |
|
"eval_samples_per_second": 6.161, |
|
"eval_steps_per_second": 0.097, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 40.01201923076923, |
|
"grad_norm": 2.7063074111938477, |
|
"learning_rate": 1.7513368983957222e-05, |
|
"loss": 0.1804, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 40.01682692307692, |
|
"eval_accuracy": 0.7027027027027027, |
|
"eval_loss": 1.1658011674880981, |
|
"eval_runtime": 77.6928, |
|
"eval_samples_per_second": 5.715, |
|
"eval_steps_per_second": 0.09, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 41.00721153846154, |
|
"grad_norm": 6.454692363739014, |
|
"learning_rate": 1.684491978609626e-05, |
|
"loss": 0.1778, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 41.01682692307692, |
|
"eval_accuracy": 0.6824324324324325, |
|
"eval_loss": 1.2350265979766846, |
|
"eval_runtime": 73.7916, |
|
"eval_samples_per_second": 6.017, |
|
"eval_steps_per_second": 0.095, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 42.00240384615385, |
|
"grad_norm": 3.444183349609375, |
|
"learning_rate": 1.6176470588235296e-05, |
|
"loss": 0.2187, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 42.01442307692308, |
|
"grad_norm": 3.9001522064208984, |
|
"learning_rate": 1.5508021390374333e-05, |
|
"loss": 0.1728, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 42.01682692307692, |
|
"eval_accuracy": 0.7162162162162162, |
|
"eval_loss": 1.1637804508209229, |
|
"eval_runtime": 77.911, |
|
"eval_samples_per_second": 5.699, |
|
"eval_steps_per_second": 0.09, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 43.00961538461539, |
|
"grad_norm": 7.290647029876709, |
|
"learning_rate": 1.4839572192513372e-05, |
|
"loss": 0.1998, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 43.01682692307692, |
|
"eval_accuracy": 0.6959459459459459, |
|
"eval_loss": 1.2359126806259155, |
|
"eval_runtime": 72.9535, |
|
"eval_samples_per_second": 6.086, |
|
"eval_steps_per_second": 0.096, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 44.00480769230769, |
|
"grad_norm": 5.636298656463623, |
|
"learning_rate": 1.4171122994652408e-05, |
|
"loss": 0.1639, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 44.01682692307692, |
|
"grad_norm": 3.6947968006134033, |
|
"learning_rate": 1.3502673796791445e-05, |
|
"loss": 0.1727, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 44.01682692307692, |
|
"eval_accuracy": 0.6936936936936937, |
|
"eval_loss": 1.232095718383789, |
|
"eval_runtime": 81.4582, |
|
"eval_samples_per_second": 5.451, |
|
"eval_steps_per_second": 0.086, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 45.01201923076923, |
|
"grad_norm": 3.2019245624542236, |
|
"learning_rate": 1.2834224598930484e-05, |
|
"loss": 0.1564, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 45.01682692307692, |
|
"eval_accuracy": 0.713963963963964, |
|
"eval_loss": 1.1604844331741333, |
|
"eval_runtime": 72.1149, |
|
"eval_samples_per_second": 6.157, |
|
"eval_steps_per_second": 0.097, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 46.00721153846154, |
|
"grad_norm": 1.9841790199279785, |
|
"learning_rate": 1.2165775401069519e-05, |
|
"loss": 0.1888, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 46.01682692307692, |
|
"eval_accuracy": 0.7094594594594594, |
|
"eval_loss": 1.1609092950820923, |
|
"eval_runtime": 71.8777, |
|
"eval_samples_per_second": 6.177, |
|
"eval_steps_per_second": 0.097, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 47.00240384615385, |
|
"grad_norm": 7.085150241851807, |
|
"learning_rate": 1.1497326203208558e-05, |
|
"loss": 0.1618, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 47.01442307692308, |
|
"grad_norm": 5.41819429397583, |
|
"learning_rate": 1.0828877005347594e-05, |
|
"loss": 0.1227, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 47.01682692307692, |
|
"eval_accuracy": 0.7117117117117117, |
|
"eval_loss": 1.1588457822799683, |
|
"eval_runtime": 82.846, |
|
"eval_samples_per_second": 5.359, |
|
"eval_steps_per_second": 0.084, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 48.00961538461539, |
|
"grad_norm": 2.3894031047821045, |
|
"learning_rate": 1.0160427807486631e-05, |
|
"loss": 0.134, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 48.01682692307692, |
|
"eval_accuracy": 0.7072072072072072, |
|
"eval_loss": 1.1698147058486938, |
|
"eval_runtime": 81.0499, |
|
"eval_samples_per_second": 5.478, |
|
"eval_steps_per_second": 0.086, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 49.00480769230769, |
|
"grad_norm": 4.01511287689209, |
|
"learning_rate": 9.49197860962567e-06, |
|
"loss": 0.1234, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 49.01682692307692, |
|
"grad_norm": 7.249420642852783, |
|
"learning_rate": 8.823529411764707e-06, |
|
"loss": 0.1622, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 49.01682692307692, |
|
"eval_accuracy": 0.6981981981981982, |
|
"eval_loss": 1.2014145851135254, |
|
"eval_runtime": 73.38, |
|
"eval_samples_per_second": 6.051, |
|
"eval_steps_per_second": 0.095, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 50.01201923076923, |
|
"grad_norm": 4.699652194976807, |
|
"learning_rate": 8.155080213903744e-06, |
|
"loss": 0.1391, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 50.01682692307692, |
|
"eval_accuracy": 0.7162162162162162, |
|
"eval_loss": 1.1005959510803223, |
|
"eval_runtime": 73.5034, |
|
"eval_samples_per_second": 6.041, |
|
"eval_steps_per_second": 0.095, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 51.00721153846154, |
|
"grad_norm": 3.143523931503296, |
|
"learning_rate": 7.4866310160427806e-06, |
|
"loss": 0.1276, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 51.01682692307692, |
|
"eval_accuracy": 0.6891891891891891, |
|
"eval_loss": 1.1485284566879272, |
|
"eval_runtime": 77.8096, |
|
"eval_samples_per_second": 5.706, |
|
"eval_steps_per_second": 0.09, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 52.00240384615385, |
|
"grad_norm": 1.1826622486114502, |
|
"learning_rate": 6.818181818181818e-06, |
|
"loss": 0.0979, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 52.01442307692308, |
|
"grad_norm": 3.218552589416504, |
|
"learning_rate": 6.149732620320856e-06, |
|
"loss": 0.1222, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 52.01682692307692, |
|
"eval_accuracy": 0.713963963963964, |
|
"eval_loss": 1.0801595449447632, |
|
"eval_runtime": 73.9346, |
|
"eval_samples_per_second": 6.005, |
|
"eval_steps_per_second": 0.095, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 53.00961538461539, |
|
"grad_norm": 4.1740312576293945, |
|
"learning_rate": 5.481283422459893e-06, |
|
"loss": 0.1024, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 53.01682692307692, |
|
"eval_accuracy": 0.704954954954955, |
|
"eval_loss": 1.1242848634719849, |
|
"eval_runtime": 72.7853, |
|
"eval_samples_per_second": 6.1, |
|
"eval_steps_per_second": 0.096, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 54.00480769230769, |
|
"grad_norm": 3.4602904319763184, |
|
"learning_rate": 4.812834224598931e-06, |
|
"loss": 0.1014, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 54.01682692307692, |
|
"grad_norm": 24.67173957824707, |
|
"learning_rate": 4.144385026737968e-06, |
|
"loss": 0.1186, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 54.01682692307692, |
|
"eval_accuracy": 0.7297297297297297, |
|
"eval_loss": 1.0909080505371094, |
|
"eval_runtime": 74.1097, |
|
"eval_samples_per_second": 5.991, |
|
"eval_steps_per_second": 0.094, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 55.01201923076923, |
|
"grad_norm": 2.6615827083587646, |
|
"learning_rate": 3.4759358288770056e-06, |
|
"loss": 0.1121, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 55.01682692307692, |
|
"eval_accuracy": 0.7094594594594594, |
|
"eval_loss": 1.1351189613342285, |
|
"eval_runtime": 73.5115, |
|
"eval_samples_per_second": 6.04, |
|
"eval_steps_per_second": 0.095, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 56.00721153846154, |
|
"grad_norm": 3.412928581237793, |
|
"learning_rate": 2.807486631016043e-06, |
|
"loss": 0.1284, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 56.01682692307692, |
|
"eval_accuracy": 0.7274774774774775, |
|
"eval_loss": 1.1095025539398193, |
|
"eval_runtime": 73.055, |
|
"eval_samples_per_second": 6.078, |
|
"eval_steps_per_second": 0.096, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 57.00240384615385, |
|
"grad_norm": 4.214470386505127, |
|
"learning_rate": 2.1390374331550802e-06, |
|
"loss": 0.1418, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 57.01442307692308, |
|
"grad_norm": 2.0941081047058105, |
|
"learning_rate": 1.4705882352941177e-06, |
|
"loss": 0.0893, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 57.01682692307692, |
|
"eval_accuracy": 0.7319819819819819, |
|
"eval_loss": 1.084416389465332, |
|
"eval_runtime": 71.5145, |
|
"eval_samples_per_second": 6.209, |
|
"eval_steps_per_second": 0.098, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 58.00961538461539, |
|
"grad_norm": 4.0398101806640625, |
|
"learning_rate": 8.021390374331552e-07, |
|
"loss": 0.0878, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 58.01682692307692, |
|
"eval_accuracy": 0.7297297297297297, |
|
"eval_loss": 1.0803806781768799, |
|
"eval_runtime": 71.6842, |
|
"eval_samples_per_second": 6.194, |
|
"eval_steps_per_second": 0.098, |
|
"step": 826 |
|
}, |
|
{ |
|
"epoch": 59.00480769230769, |
|
"grad_norm": 1.0705435276031494, |
|
"learning_rate": 1.3368983957219251e-07, |
|
"loss": 0.0887, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 59.00721153846154, |
|
"eval_accuracy": 0.7297297297297297, |
|
"eval_loss": 1.0809234380722046, |
|
"eval_runtime": 72.7582, |
|
"eval_samples_per_second": 6.102, |
|
"eval_steps_per_second": 0.096, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 59.00721153846154, |
|
"step": 832, |
|
"total_flos": 6.1975283578694664e+19, |
|
"train_loss": 1.175738023289551, |
|
"train_runtime": 12896.1846, |
|
"train_samples_per_second": 4.129, |
|
"train_steps_per_second": 0.065 |
|
}, |
|
{ |
|
"epoch": 59.00721153846154, |
|
"eval_accuracy": 0.7319819819819819, |
|
"eval_loss": 1.084416389465332, |
|
"eval_runtime": 73.94, |
|
"eval_samples_per_second": 6.005, |
|
"eval_steps_per_second": 0.095, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 59.00721153846154, |
|
"eval_accuracy": 0.7319819819819819, |
|
"eval_loss": 1.084416389465332, |
|
"eval_runtime": 75.3126, |
|
"eval_samples_per_second": 5.895, |
|
"eval_steps_per_second": 0.093, |
|
"step": 832 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 832, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 9223372036854775807, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 6.1975283578694664e+19, |
|
"train_batch_size": 64, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|