|
{ |
|
"best_metric": 0.35570469798657717, |
|
"best_model_checkpoint": "videomae-base-finetuned-ucf101-subset/checkpoint-783", |
|
"epoch": 30.026442307692307, |
|
"eval_steps": 500, |
|
"global_step": 832, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01201923076923077, |
|
"grad_norm": 9.181777954101562, |
|
"learning_rate": 5.9523809523809525e-06, |
|
"loss": 4.2532, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.02403846153846154, |
|
"grad_norm": 6.192462921142578, |
|
"learning_rate": 1.1904761904761905e-05, |
|
"loss": 4.2583, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.03245192307692308, |
|
"eval_accuracy": 0.015659955257270694, |
|
"eval_loss": 4.225129127502441, |
|
"eval_runtime": 113.6438, |
|
"eval_samples_per_second": 3.933, |
|
"eval_steps_per_second": 0.123, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 1.0036057692307692, |
|
"grad_norm": 6.222796440124512, |
|
"learning_rate": 1.785714285714286e-05, |
|
"loss": 4.2505, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 1.015625, |
|
"grad_norm": 3.965486526489258, |
|
"learning_rate": 2.380952380952381e-05, |
|
"loss": 4.2489, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.0276442307692308, |
|
"grad_norm": 5.0857744216918945, |
|
"learning_rate": 2.9761904761904762e-05, |
|
"loss": 4.2374, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.0324519230769231, |
|
"eval_accuracy": 0.013422818791946308, |
|
"eval_loss": 4.226736068725586, |
|
"eval_runtime": 113.905, |
|
"eval_samples_per_second": 3.924, |
|
"eval_steps_per_second": 0.123, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 2.0072115384615383, |
|
"grad_norm": 3.5929408073425293, |
|
"learning_rate": 3.571428571428572e-05, |
|
"loss": 4.2403, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 2.019230769230769, |
|
"grad_norm": 3.6417300701141357, |
|
"learning_rate": 4.166666666666667e-05, |
|
"loss": 4.255, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 2.03125, |
|
"grad_norm": 3.2748305797576904, |
|
"learning_rate": 4.761904761904762e-05, |
|
"loss": 4.2678, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 2.032451923076923, |
|
"eval_accuracy": 0.013422818791946308, |
|
"eval_loss": 4.2263360023498535, |
|
"eval_runtime": 115.125, |
|
"eval_samples_per_second": 3.883, |
|
"eval_steps_per_second": 0.122, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 3.0108173076923075, |
|
"grad_norm": 3.1406173706054688, |
|
"learning_rate": 4.959893048128342e-05, |
|
"loss": 4.2151, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 3.0228365384615383, |
|
"grad_norm": 2.8441436290740967, |
|
"learning_rate": 4.8930481283422465e-05, |
|
"loss": 4.2537, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 3.032451923076923, |
|
"eval_accuracy": 0.015659955257270694, |
|
"eval_loss": 4.221240997314453, |
|
"eval_runtime": 115.863, |
|
"eval_samples_per_second": 3.858, |
|
"eval_steps_per_second": 0.121, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 4.002403846153846, |
|
"grad_norm": 2.7364041805267334, |
|
"learning_rate": 4.8262032085561496e-05, |
|
"loss": 4.2568, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 4.014423076923077, |
|
"grad_norm": 2.71181058883667, |
|
"learning_rate": 4.759358288770054e-05, |
|
"loss": 4.2288, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 4.0264423076923075, |
|
"grad_norm": 2.7206692695617676, |
|
"learning_rate": 4.6925133689839576e-05, |
|
"loss": 4.2401, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 4.032451923076923, |
|
"eval_accuracy": 0.015659955257270694, |
|
"eval_loss": 4.206515312194824, |
|
"eval_runtime": 114.2862, |
|
"eval_samples_per_second": 3.911, |
|
"eval_steps_per_second": 0.122, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 5.006009615384615, |
|
"grad_norm": 2.6117472648620605, |
|
"learning_rate": 4.625668449197861e-05, |
|
"loss": 4.2192, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 5.018028846153846, |
|
"grad_norm": 2.90580677986145, |
|
"learning_rate": 4.558823529411765e-05, |
|
"loss": 4.2204, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 5.030048076923077, |
|
"grad_norm": 2.9660141468048096, |
|
"learning_rate": 4.491978609625669e-05, |
|
"loss": 4.2519, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 5.032451923076923, |
|
"eval_accuracy": 0.017897091722595078, |
|
"eval_loss": 4.207516193389893, |
|
"eval_runtime": 117.7024, |
|
"eval_samples_per_second": 3.798, |
|
"eval_steps_per_second": 0.119, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 6.009615384615385, |
|
"grad_norm": 2.8973159790039062, |
|
"learning_rate": 4.4251336898395724e-05, |
|
"loss": 4.2112, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 6.021634615384615, |
|
"grad_norm": 2.734694480895996, |
|
"learning_rate": 4.358288770053476e-05, |
|
"loss": 4.2198, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 6.032451923076923, |
|
"eval_accuracy": 0.013422818791946308, |
|
"eval_loss": 4.205463409423828, |
|
"eval_runtime": 116.7575, |
|
"eval_samples_per_second": 3.828, |
|
"eval_steps_per_second": 0.12, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 7.001201923076923, |
|
"grad_norm": 2.421506881713867, |
|
"learning_rate": 4.29144385026738e-05, |
|
"loss": 4.2292, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 7.013221153846154, |
|
"grad_norm": 2.82832670211792, |
|
"learning_rate": 4.224598930481284e-05, |
|
"loss": 4.2019, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 7.025240384615385, |
|
"grad_norm": 3.0022928714752197, |
|
"learning_rate": 4.157754010695187e-05, |
|
"loss": 4.2111, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 7.032451923076923, |
|
"eval_accuracy": 0.017897091722595078, |
|
"eval_loss": 4.19577693939209, |
|
"eval_runtime": 119.1446, |
|
"eval_samples_per_second": 3.752, |
|
"eval_steps_per_second": 0.118, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 8.004807692307692, |
|
"grad_norm": 2.6406443119049072, |
|
"learning_rate": 4.0909090909090915e-05, |
|
"loss": 4.2068, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 8.016826923076923, |
|
"grad_norm": 3.169625759124756, |
|
"learning_rate": 4.024064171122995e-05, |
|
"loss": 4.1759, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 8.028846153846153, |
|
"grad_norm": 2.8210370540618896, |
|
"learning_rate": 3.957219251336899e-05, |
|
"loss": 4.1871, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 8.032451923076923, |
|
"eval_accuracy": 0.04697986577181208, |
|
"eval_loss": 4.147375583648682, |
|
"eval_runtime": 117.849, |
|
"eval_samples_per_second": 3.793, |
|
"eval_steps_per_second": 0.119, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 9.008413461538462, |
|
"grad_norm": 3.140075206756592, |
|
"learning_rate": 3.8903743315508025e-05, |
|
"loss": 4.1314, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 9.020432692307692, |
|
"grad_norm": 3.0806028842926025, |
|
"learning_rate": 3.8235294117647055e-05, |
|
"loss": 4.0778, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 9.032451923076923, |
|
"grad_norm": 12.136305809020996, |
|
"learning_rate": 3.75668449197861e-05, |
|
"loss": 4.0891, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 9.032451923076923, |
|
"eval_accuracy": 0.0447427293064877, |
|
"eval_loss": 4.032660007476807, |
|
"eval_runtime": 116.5729, |
|
"eval_samples_per_second": 3.835, |
|
"eval_steps_per_second": 0.12, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 10.01201923076923, |
|
"grad_norm": 4.812855243682861, |
|
"learning_rate": 3.6898395721925136e-05, |
|
"loss": 3.9545, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 10.024038461538462, |
|
"grad_norm": 5.218036651611328, |
|
"learning_rate": 3.622994652406417e-05, |
|
"loss": 3.7963, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 10.032451923076923, |
|
"eval_accuracy": 0.08277404921700224, |
|
"eval_loss": 3.82175874710083, |
|
"eval_runtime": 115.9952, |
|
"eval_samples_per_second": 3.854, |
|
"eval_steps_per_second": 0.121, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 11.00360576923077, |
|
"grad_norm": 4.765311241149902, |
|
"learning_rate": 3.556149732620321e-05, |
|
"loss": 3.7306, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 11.015625, |
|
"grad_norm": 6.956701755523682, |
|
"learning_rate": 3.489304812834225e-05, |
|
"loss": 3.5563, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 11.02764423076923, |
|
"grad_norm": 5.394344329833984, |
|
"learning_rate": 3.4224598930481284e-05, |
|
"loss": 3.4787, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 11.032451923076923, |
|
"eval_accuracy": 0.11185682326621924, |
|
"eval_loss": 3.7061688899993896, |
|
"eval_runtime": 113.6071, |
|
"eval_samples_per_second": 3.935, |
|
"eval_steps_per_second": 0.123, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 12.007211538461538, |
|
"grad_norm": 5.70036506652832, |
|
"learning_rate": 3.355614973262032e-05, |
|
"loss": 3.4821, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 12.01923076923077, |
|
"grad_norm": 6.370093822479248, |
|
"learning_rate": 3.288770053475936e-05, |
|
"loss": 3.3756, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 12.03125, |
|
"grad_norm": 5.8564133644104, |
|
"learning_rate": 3.22192513368984e-05, |
|
"loss": 3.1883, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 12.032451923076923, |
|
"eval_accuracy": 0.11185682326621924, |
|
"eval_loss": 3.58866810798645, |
|
"eval_runtime": 112.5771, |
|
"eval_samples_per_second": 3.971, |
|
"eval_steps_per_second": 0.124, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 13.010817307692308, |
|
"grad_norm": 6.884145736694336, |
|
"learning_rate": 3.155080213903743e-05, |
|
"loss": 3.1542, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 13.022836538461538, |
|
"grad_norm": 7.7231059074401855, |
|
"learning_rate": 3.0882352941176475e-05, |
|
"loss": 3.0045, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 13.032451923076923, |
|
"eval_accuracy": 0.14317673378076062, |
|
"eval_loss": 3.380981206893921, |
|
"eval_runtime": 113.3509, |
|
"eval_samples_per_second": 3.944, |
|
"eval_steps_per_second": 0.124, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 14.002403846153847, |
|
"grad_norm": 6.160039901733398, |
|
"learning_rate": 3.0213903743315508e-05, |
|
"loss": 2.9704, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 14.014423076923077, |
|
"grad_norm": 8.257513046264648, |
|
"learning_rate": 2.954545454545455e-05, |
|
"loss": 2.7723, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 14.026442307692308, |
|
"grad_norm": 6.512996196746826, |
|
"learning_rate": 2.8877005347593582e-05, |
|
"loss": 2.8045, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 14.032451923076923, |
|
"eval_accuracy": 0.21923937360178972, |
|
"eval_loss": 3.2211973667144775, |
|
"eval_runtime": 116.514, |
|
"eval_samples_per_second": 3.836, |
|
"eval_steps_per_second": 0.12, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 15.006009615384615, |
|
"grad_norm": 6.627754211425781, |
|
"learning_rate": 2.8208556149732622e-05, |
|
"loss": 2.7247, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 15.018028846153847, |
|
"grad_norm": 6.555057048797607, |
|
"learning_rate": 2.754010695187166e-05, |
|
"loss": 2.5789, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 15.030048076923077, |
|
"grad_norm": 8.675689697265625, |
|
"learning_rate": 2.68716577540107e-05, |
|
"loss": 2.5344, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 15.032451923076923, |
|
"eval_accuracy": 0.15883668903803133, |
|
"eval_loss": 3.270237922668457, |
|
"eval_runtime": 114.1519, |
|
"eval_samples_per_second": 3.916, |
|
"eval_steps_per_second": 0.123, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 16.009615384615383, |
|
"grad_norm": 7.248364448547363, |
|
"learning_rate": 2.6203208556149733e-05, |
|
"loss": 2.3791, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 16.021634615384617, |
|
"grad_norm": 9.409223556518555, |
|
"learning_rate": 2.5534759358288773e-05, |
|
"loss": 2.3725, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 16.032451923076923, |
|
"eval_accuracy": 0.14093959731543623, |
|
"eval_loss": 3.35996413230896, |
|
"eval_runtime": 113.8728, |
|
"eval_samples_per_second": 3.925, |
|
"eval_steps_per_second": 0.123, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 17.001201923076923, |
|
"grad_norm": 7.834836006164551, |
|
"learning_rate": 2.4866310160427807e-05, |
|
"loss": 2.2722, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 17.013221153846153, |
|
"grad_norm": 7.643532752990723, |
|
"learning_rate": 2.4197860962566847e-05, |
|
"loss": 2.1434, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 17.025240384615383, |
|
"grad_norm": 8.425023078918457, |
|
"learning_rate": 2.3529411764705884e-05, |
|
"loss": 2.2074, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 17.032451923076923, |
|
"eval_accuracy": 0.2371364653243848, |
|
"eval_loss": 2.9730582237243652, |
|
"eval_runtime": 113.5853, |
|
"eval_samples_per_second": 3.935, |
|
"eval_steps_per_second": 0.123, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 18.004807692307693, |
|
"grad_norm": 7.840888500213623, |
|
"learning_rate": 2.286096256684492e-05, |
|
"loss": 2.0823, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 18.016826923076923, |
|
"grad_norm": 9.385211944580078, |
|
"learning_rate": 2.2192513368983957e-05, |
|
"loss": 1.9511, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 18.028846153846153, |
|
"grad_norm": 8.72028636932373, |
|
"learning_rate": 2.1524064171122994e-05, |
|
"loss": 2.1094, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 18.032451923076923, |
|
"eval_accuracy": 0.26174496644295303, |
|
"eval_loss": 2.8679935932159424, |
|
"eval_runtime": 113.6275, |
|
"eval_samples_per_second": 3.934, |
|
"eval_steps_per_second": 0.123, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 19.00841346153846, |
|
"grad_norm": 9.284242630004883, |
|
"learning_rate": 2.0855614973262035e-05, |
|
"loss": 2.0278, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 19.020432692307693, |
|
"grad_norm": 6.913205623626709, |
|
"learning_rate": 2.018716577540107e-05, |
|
"loss": 1.8727, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 19.032451923076923, |
|
"grad_norm": 21.923686981201172, |
|
"learning_rate": 1.951871657754011e-05, |
|
"loss": 1.9839, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 19.032451923076923, |
|
"eval_accuracy": 0.27069351230425054, |
|
"eval_loss": 2.8359620571136475, |
|
"eval_runtime": 115.0445, |
|
"eval_samples_per_second": 3.885, |
|
"eval_steps_per_second": 0.122, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 20.01201923076923, |
|
"grad_norm": 8.244462013244629, |
|
"learning_rate": 1.8850267379679145e-05, |
|
"loss": 1.7584, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 20.02403846153846, |
|
"grad_norm": 8.496162414550781, |
|
"learning_rate": 1.8181818181818182e-05, |
|
"loss": 1.7354, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 20.032451923076923, |
|
"eval_accuracy": 0.28187919463087246, |
|
"eval_loss": 2.7890186309814453, |
|
"eval_runtime": 111.5479, |
|
"eval_samples_per_second": 4.007, |
|
"eval_steps_per_second": 0.126, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 21.00360576923077, |
|
"grad_norm": 7.818769931793213, |
|
"learning_rate": 1.7513368983957222e-05, |
|
"loss": 1.701, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 21.015625, |
|
"grad_norm": 9.921280860900879, |
|
"learning_rate": 1.684491978609626e-05, |
|
"loss": 1.648, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 21.02764423076923, |
|
"grad_norm": 8.053401947021484, |
|
"learning_rate": 1.6176470588235296e-05, |
|
"loss": 1.6843, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 21.032451923076923, |
|
"eval_accuracy": 0.29977628635346754, |
|
"eval_loss": 2.728635787963867, |
|
"eval_runtime": 114.938, |
|
"eval_samples_per_second": 3.889, |
|
"eval_steps_per_second": 0.122, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 22.00721153846154, |
|
"grad_norm": 10.640948295593262, |
|
"learning_rate": 1.5508021390374333e-05, |
|
"loss": 1.4562, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 22.01923076923077, |
|
"grad_norm": 9.333779335021973, |
|
"learning_rate": 1.4839572192513372e-05, |
|
"loss": 1.5091, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 22.03125, |
|
"grad_norm": 9.433292388916016, |
|
"learning_rate": 1.4171122994652408e-05, |
|
"loss": 1.6266, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 22.032451923076923, |
|
"eval_accuracy": 0.2841163310961969, |
|
"eval_loss": 2.806154489517212, |
|
"eval_runtime": 115.4195, |
|
"eval_samples_per_second": 3.873, |
|
"eval_steps_per_second": 0.121, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 23.010817307692307, |
|
"grad_norm": 8.609794616699219, |
|
"learning_rate": 1.3502673796791445e-05, |
|
"loss": 1.6214, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 23.02283653846154, |
|
"grad_norm": 6.943145275115967, |
|
"learning_rate": 1.2834224598930484e-05, |
|
"loss": 1.4083, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 23.032451923076923, |
|
"eval_accuracy": 0.2595078299776286, |
|
"eval_loss": 2.8204569816589355, |
|
"eval_runtime": 115.0186, |
|
"eval_samples_per_second": 3.886, |
|
"eval_steps_per_second": 0.122, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 24.002403846153847, |
|
"grad_norm": 10.106169700622559, |
|
"learning_rate": 1.2165775401069519e-05, |
|
"loss": 1.3264, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 24.014423076923077, |
|
"grad_norm": 10.27568244934082, |
|
"learning_rate": 1.1497326203208558e-05, |
|
"loss": 1.3662, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 24.026442307692307, |
|
"grad_norm": 10.36124038696289, |
|
"learning_rate": 1.0828877005347594e-05, |
|
"loss": 1.4422, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 24.032451923076923, |
|
"eval_accuracy": 0.30648769574944074, |
|
"eval_loss": 2.6406848430633545, |
|
"eval_runtime": 116.1998, |
|
"eval_samples_per_second": 3.847, |
|
"eval_steps_per_second": 0.12, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 25.006009615384617, |
|
"grad_norm": 8.807711601257324, |
|
"learning_rate": 1.0160427807486631e-05, |
|
"loss": 1.3912, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 25.018028846153847, |
|
"grad_norm": 8.629064559936523, |
|
"learning_rate": 9.49197860962567e-06, |
|
"loss": 1.2904, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 25.030048076923077, |
|
"grad_norm": 9.190649032592773, |
|
"learning_rate": 8.823529411764707e-06, |
|
"loss": 1.3897, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 25.032451923076923, |
|
"eval_accuracy": 0.34675615212527966, |
|
"eval_loss": 2.5948002338409424, |
|
"eval_runtime": 118.4696, |
|
"eval_samples_per_second": 3.773, |
|
"eval_steps_per_second": 0.118, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 26.009615384615383, |
|
"grad_norm": 10.941615104675293, |
|
"learning_rate": 8.155080213903744e-06, |
|
"loss": 1.2647, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 26.021634615384617, |
|
"grad_norm": 9.282670021057129, |
|
"learning_rate": 7.4866310160427806e-06, |
|
"loss": 1.3906, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 26.032451923076923, |
|
"eval_accuracy": 0.31543624161073824, |
|
"eval_loss": 2.629518985748291, |
|
"eval_runtime": 120.316, |
|
"eval_samples_per_second": 3.715, |
|
"eval_steps_per_second": 0.116, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 27.001201923076923, |
|
"grad_norm": 9.254827499389648, |
|
"learning_rate": 6.818181818181818e-06, |
|
"loss": 1.3257, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 27.013221153846153, |
|
"grad_norm": 7.69126558303833, |
|
"learning_rate": 6.149732620320856e-06, |
|
"loss": 1.2581, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 27.025240384615383, |
|
"grad_norm": 10.26952075958252, |
|
"learning_rate": 5.481283422459893e-06, |
|
"loss": 1.2291, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 27.032451923076923, |
|
"eval_accuracy": 0.3378076062639821, |
|
"eval_loss": 2.5539445877075195, |
|
"eval_runtime": 116.6007, |
|
"eval_samples_per_second": 3.834, |
|
"eval_steps_per_second": 0.12, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 28.004807692307693, |
|
"grad_norm": 11.07676887512207, |
|
"learning_rate": 4.812834224598931e-06, |
|
"loss": 1.29, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 28.016826923076923, |
|
"grad_norm": 8.940054893493652, |
|
"learning_rate": 4.144385026737968e-06, |
|
"loss": 1.1383, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 28.028846153846153, |
|
"grad_norm": 9.17213249206543, |
|
"learning_rate": 3.4759358288770056e-06, |
|
"loss": 1.3166, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 28.032451923076923, |
|
"eval_accuracy": 0.35570469798657717, |
|
"eval_loss": 2.519993305206299, |
|
"eval_runtime": 116.4901, |
|
"eval_samples_per_second": 3.837, |
|
"eval_steps_per_second": 0.12, |
|
"step": 783 |
|
}, |
|
{ |
|
"epoch": 29.00841346153846, |
|
"grad_norm": 11.884153366088867, |
|
"learning_rate": 2.807486631016043e-06, |
|
"loss": 1.232, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 29.020432692307693, |
|
"grad_norm": 9.171028137207031, |
|
"learning_rate": 2.1390374331550802e-06, |
|
"loss": 1.1733, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 29.032451923076923, |
|
"grad_norm": 24.7429256439209, |
|
"learning_rate": 1.4705882352941177e-06, |
|
"loss": 1.2619, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 29.032451923076923, |
|
"eval_accuracy": 0.35570469798657717, |
|
"eval_loss": 2.530792236328125, |
|
"eval_runtime": 118.4418, |
|
"eval_samples_per_second": 3.774, |
|
"eval_steps_per_second": 0.118, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 30.01201923076923, |
|
"grad_norm": 8.288737297058105, |
|
"learning_rate": 8.021390374331552e-07, |
|
"loss": 1.1824, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 30.02403846153846, |
|
"grad_norm": 8.749896049499512, |
|
"learning_rate": 1.3368983957219251e-07, |
|
"loss": 1.1393, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 30.026442307692307, |
|
"eval_accuracy": 0.3243847874720358, |
|
"eval_loss": 2.5855681896209717, |
|
"eval_runtime": 118.0615, |
|
"eval_samples_per_second": 3.786, |
|
"eval_steps_per_second": 0.119, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 30.026442307692307, |
|
"step": 832, |
|
"total_flos": 3.2147153612960956e+19, |
|
"train_loss": 2.754230235058528, |
|
"train_runtime": 10713.0029, |
|
"train_samples_per_second": 2.485, |
|
"train_steps_per_second": 0.078 |
|
}, |
|
{ |
|
"epoch": 30.026442307692307, |
|
"eval_accuracy": 0.35570469798657717, |
|
"eval_loss": 2.5194525718688965, |
|
"eval_runtime": 114.2775, |
|
"eval_samples_per_second": 3.912, |
|
"eval_steps_per_second": 0.123, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 30.026442307692307, |
|
"eval_accuracy": 0.35570469798657717, |
|
"eval_loss": 2.520230293273926, |
|
"eval_runtime": 112.9248, |
|
"eval_samples_per_second": 3.958, |
|
"eval_steps_per_second": 0.124, |
|
"step": 832 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 832, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 9223372036854775807, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.2147153612960956e+19, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|