{ "best_metric": 0.4882943143812709, "best_model_checkpoint": "videomae-base-finetuned-ucf101-subset/checkpoint-806", "epoch": 30.03125, "eval_steps": 500, "global_step": 960, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.010416666666666666, "grad_norm": 4.111547470092773, "learning_rate": 5.208333333333334e-06, "loss": 4.2127, "step": 10 }, { "epoch": 0.020833333333333332, "grad_norm": 4.218038082122803, "learning_rate": 1.0416666666666668e-05, "loss": 4.2247, "step": 20 }, { "epoch": 0.03125, "grad_norm": 3.766047954559326, "learning_rate": 1.5625e-05, "loss": 4.2427, "step": 30 }, { "epoch": 0.03229166666666667, "eval_accuracy": 0.0033444816053511705, "eval_loss": 4.22645378112793, "eval_runtime": 157.4916, "eval_samples_per_second": 1.899, "eval_steps_per_second": 0.063, "step": 31 }, { "epoch": 1.009375, "grad_norm": 4.1204304695129395, "learning_rate": 2.0833333333333336e-05, "loss": 4.2374, "step": 40 }, { "epoch": 1.0197916666666667, "grad_norm": 4.202085971832275, "learning_rate": 2.604166666666667e-05, "loss": 4.228, "step": 50 }, { "epoch": 1.0302083333333334, "grad_norm": 3.5194547176361084, "learning_rate": 3.125e-05, "loss": 4.2321, "step": 60 }, { "epoch": 1.0322916666666666, "eval_accuracy": 0.010033444816053512, "eval_loss": 4.223534107208252, "eval_runtime": 156.7976, "eval_samples_per_second": 1.907, "eval_steps_per_second": 0.064, "step": 62 }, { "epoch": 2.0083333333333333, "grad_norm": 3.131030559539795, "learning_rate": 3.6458333333333336e-05, "loss": 4.2293, "step": 70 }, { "epoch": 2.01875, "grad_norm": 3.120128631591797, "learning_rate": 4.166666666666667e-05, "loss": 4.2347, "step": 80 }, { "epoch": 2.029166666666667, "grad_norm": 3.0988192558288574, "learning_rate": 4.6875e-05, "loss": 4.24, "step": 90 }, { "epoch": 2.0322916666666666, "eval_accuracy": 0.010033444816053512, "eval_loss": 4.228224754333496, "eval_runtime": 156.5504, "eval_samples_per_second": 1.91, "eval_steps_per_second": 0.064, "step": 93 }, { "epoch": 3.0072916666666667, "grad_norm": 3.102827310562134, "learning_rate": 4.976851851851852e-05, "loss": 4.2195, "step": 100 }, { "epoch": 3.017708333333333, "grad_norm": 2.8366568088531494, "learning_rate": 4.9189814814814815e-05, "loss": 4.2354, "step": 110 }, { "epoch": 3.028125, "grad_norm": 3.0085482597351074, "learning_rate": 4.8611111111111115e-05, "loss": 4.2445, "step": 120 }, { "epoch": 3.0322916666666666, "eval_accuracy": 0.006688963210702341, "eval_loss": 4.224982261657715, "eval_runtime": 151.1437, "eval_samples_per_second": 1.978, "eval_steps_per_second": 0.066, "step": 124 }, { "epoch": 4.00625, "grad_norm": 3.300618886947632, "learning_rate": 4.803240740740741e-05, "loss": 4.2226, "step": 130 }, { "epoch": 4.016666666666667, "grad_norm": 3.202220916748047, "learning_rate": 4.745370370370371e-05, "loss": 4.2265, "step": 140 }, { "epoch": 4.027083333333334, "grad_norm": 2.977271556854248, "learning_rate": 4.6875e-05, "loss": 4.2327, "step": 150 }, { "epoch": 4.032291666666667, "eval_accuracy": 0.010033444816053512, "eval_loss": 4.2244367599487305, "eval_runtime": 156.5602, "eval_samples_per_second": 1.91, "eval_steps_per_second": 0.064, "step": 155 }, { "epoch": 5.005208333333333, "grad_norm": 3.7291171550750732, "learning_rate": 4.62962962962963e-05, "loss": 4.2106, "step": 160 }, { "epoch": 5.015625, "grad_norm": 3.581210136413574, "learning_rate": 4.5717592592592594e-05, "loss": 4.2112, "step": 170 }, { "epoch": 5.026041666666667, "grad_norm": 3.7153217792510986, "learning_rate": 4.5138888888888894e-05, "loss": 4.2104, "step": 180 }, { "epoch": 5.032291666666667, "eval_accuracy": 0.020066889632107024, "eval_loss": 4.2100114822387695, "eval_runtime": 150.442, "eval_samples_per_second": 1.987, "eval_steps_per_second": 0.066, "step": 186 }, { "epoch": 6.004166666666666, "grad_norm": 3.1238365173339844, "learning_rate": 4.456018518518519e-05, "loss": 4.2205, "step": 190 }, { "epoch": 6.014583333333333, "grad_norm": 3.4233925342559814, "learning_rate": 4.3981481481481486e-05, "loss": 4.2012, "step": 200 }, { "epoch": 6.025, "grad_norm": 3.511300563812256, "learning_rate": 4.340277777777778e-05, "loss": 4.2374, "step": 210 }, { "epoch": 6.032291666666667, "eval_accuracy": 0.006688963210702341, "eval_loss": 4.2022294998168945, "eval_runtime": 153.3689, "eval_samples_per_second": 1.95, "eval_steps_per_second": 0.065, "step": 217 }, { "epoch": 7.003125, "grad_norm": 3.343992233276367, "learning_rate": 4.282407407407408e-05, "loss": 4.2055, "step": 220 }, { "epoch": 7.013541666666667, "grad_norm": 2.911720037460327, "learning_rate": 4.224537037037037e-05, "loss": 4.1687, "step": 230 }, { "epoch": 7.023958333333334, "grad_norm": 3.1043291091918945, "learning_rate": 4.166666666666667e-05, "loss": 4.1597, "step": 240 }, { "epoch": 7.032291666666667, "eval_accuracy": 0.030100334448160536, "eval_loss": 4.118756294250488, "eval_runtime": 152.3194, "eval_samples_per_second": 1.963, "eval_steps_per_second": 0.066, "step": 248 }, { "epoch": 8.002083333333333, "grad_norm": 3.4208502769470215, "learning_rate": 4.1087962962962965e-05, "loss": 4.1418, "step": 250 }, { "epoch": 8.0125, "grad_norm": 4.338245868682861, "learning_rate": 4.0509259259259265e-05, "loss": 4.0753, "step": 260 }, { "epoch": 8.022916666666667, "grad_norm": 4.86409854888916, "learning_rate": 3.993055555555556e-05, "loss": 4.0522, "step": 270 }, { "epoch": 8.032291666666667, "eval_accuracy": 0.07023411371237458, "eval_loss": 3.935123920440674, "eval_runtime": 155.145, "eval_samples_per_second": 1.927, "eval_steps_per_second": 0.064, "step": 279 }, { "epoch": 9.001041666666667, "grad_norm": 4.609180927276611, "learning_rate": 3.935185185185186e-05, "loss": 3.9967, "step": 280 }, { "epoch": 9.011458333333334, "grad_norm": 5.407143592834473, "learning_rate": 3.877314814814815e-05, "loss": 3.8626, "step": 290 }, { "epoch": 9.021875, "grad_norm": 4.2119598388671875, "learning_rate": 3.8194444444444444e-05, "loss": 3.821, "step": 300 }, { "epoch": 9.032291666666667, "grad_norm": 14.079527854919434, "learning_rate": 3.7615740740740744e-05, "loss": 3.768, "step": 310 }, { "epoch": 9.032291666666667, "eval_accuracy": 0.10702341137123746, "eval_loss": 3.680009126663208, "eval_runtime": 152.6981, "eval_samples_per_second": 1.958, "eval_steps_per_second": 0.065, "step": 310 }, { "epoch": 10.010416666666666, "grad_norm": 4.879744529724121, "learning_rate": 3.7037037037037037e-05, "loss": 3.54, "step": 320 }, { "epoch": 10.020833333333334, "grad_norm": 6.769280910491943, "learning_rate": 3.6458333333333336e-05, "loss": 3.6435, "step": 330 }, { "epoch": 10.03125, "grad_norm": 6.974754810333252, "learning_rate": 3.587962962962963e-05, "loss": 3.5147, "step": 340 }, { "epoch": 10.032291666666667, "eval_accuracy": 0.11036789297658862, "eval_loss": 3.541635036468506, "eval_runtime": 156.1418, "eval_samples_per_second": 1.915, "eval_steps_per_second": 0.064, "step": 341 }, { "epoch": 11.009375, "grad_norm": 6.004092216491699, "learning_rate": 3.530092592592593e-05, "loss": 3.2736, "step": 350 }, { "epoch": 11.019791666666666, "grad_norm": 4.7057905197143555, "learning_rate": 3.472222222222222e-05, "loss": 3.2459, "step": 360 }, { "epoch": 11.030208333333333, "grad_norm": 7.564170837402344, "learning_rate": 3.414351851851852e-05, "loss": 3.2878, "step": 370 }, { "epoch": 11.032291666666667, "eval_accuracy": 0.07023411371237458, "eval_loss": 3.707416534423828, "eval_runtime": 151.3197, "eval_samples_per_second": 1.976, "eval_steps_per_second": 0.066, "step": 372 }, { "epoch": 12.008333333333333, "grad_norm": 6.44061279296875, "learning_rate": 3.3564814814814815e-05, "loss": 3.048, "step": 380 }, { "epoch": 12.01875, "grad_norm": 6.303668022155762, "learning_rate": 3.2986111111111115e-05, "loss": 3.0619, "step": 390 }, { "epoch": 12.029166666666667, "grad_norm": 7.074206352233887, "learning_rate": 3.240740740740741e-05, "loss": 2.9491, "step": 400 }, { "epoch": 12.032291666666667, "eval_accuracy": 0.10702341137123746, "eval_loss": 3.3954155445098877, "eval_runtime": 154.7501, "eval_samples_per_second": 1.932, "eval_steps_per_second": 0.065, "step": 403 }, { "epoch": 13.007291666666667, "grad_norm": 6.961267471313477, "learning_rate": 3.182870370370371e-05, "loss": 2.8801, "step": 410 }, { "epoch": 13.017708333333333, "grad_norm": 5.816298007965088, "learning_rate": 3.125e-05, "loss": 2.9025, "step": 420 }, { "epoch": 13.028125, "grad_norm": 10.059369087219238, "learning_rate": 3.06712962962963e-05, "loss": 2.806, "step": 430 }, { "epoch": 13.032291666666667, "eval_accuracy": 0.1705685618729097, "eval_loss": 3.2551913261413574, "eval_runtime": 154.5283, "eval_samples_per_second": 1.935, "eval_steps_per_second": 0.065, "step": 434 }, { "epoch": 14.00625, "grad_norm": 6.142374515533447, "learning_rate": 3.0092592592592593e-05, "loss": 2.6302, "step": 440 }, { "epoch": 14.016666666666667, "grad_norm": 5.806746959686279, "learning_rate": 2.951388888888889e-05, "loss": 2.5778, "step": 450 }, { "epoch": 14.027083333333334, "grad_norm": 6.683168888092041, "learning_rate": 2.8935185185185186e-05, "loss": 2.4568, "step": 460 }, { "epoch": 14.032291666666667, "eval_accuracy": 0.2040133779264214, "eval_loss": 3.0654280185699463, "eval_runtime": 155.0, "eval_samples_per_second": 1.929, "eval_steps_per_second": 0.065, "step": 465 }, { "epoch": 15.005208333333334, "grad_norm": 8.81658935546875, "learning_rate": 2.8356481481481483e-05, "loss": 2.4185, "step": 470 }, { "epoch": 15.015625, "grad_norm": 17.862884521484375, "learning_rate": 2.777777777777778e-05, "loss": 2.3865, "step": 480 }, { "epoch": 15.026041666666666, "grad_norm": 7.982626438140869, "learning_rate": 2.7199074074074076e-05, "loss": 2.3102, "step": 490 }, { "epoch": 15.032291666666667, "eval_accuracy": 0.3010033444816054, "eval_loss": 2.7440292835235596, "eval_runtime": 154.0941, "eval_samples_per_second": 1.94, "eval_steps_per_second": 0.065, "step": 496 }, { "epoch": 16.004166666666666, "grad_norm": 7.09642219543457, "learning_rate": 2.6620370370370372e-05, "loss": 2.1208, "step": 500 }, { "epoch": 16.014583333333334, "grad_norm": 7.21767520904541, "learning_rate": 2.604166666666667e-05, "loss": 2.1481, "step": 510 }, { "epoch": 16.025, "grad_norm": 8.300311088562012, "learning_rate": 2.5462962962962965e-05, "loss": 2.2079, "step": 520 }, { "epoch": 16.032291666666666, "eval_accuracy": 0.31438127090301005, "eval_loss": 2.6789305210113525, "eval_runtime": 157.8279, "eval_samples_per_second": 1.894, "eval_steps_per_second": 0.063, "step": 527 }, { "epoch": 17.003125, "grad_norm": 8.606009483337402, "learning_rate": 2.488425925925926e-05, "loss": 2.0646, "step": 530 }, { "epoch": 17.013541666666665, "grad_norm": 7.775322437286377, "learning_rate": 2.4305555555555558e-05, "loss": 1.9341, "step": 540 }, { "epoch": 17.023958333333333, "grad_norm": 7.849579811096191, "learning_rate": 2.3726851851851854e-05, "loss": 1.9638, "step": 550 }, { "epoch": 17.032291666666666, "eval_accuracy": 0.36789297658862874, "eval_loss": 2.5920491218566895, "eval_runtime": 157.1641, "eval_samples_per_second": 1.902, "eval_steps_per_second": 0.064, "step": 558 }, { "epoch": 18.002083333333335, "grad_norm": 10.023223876953125, "learning_rate": 2.314814814814815e-05, "loss": 2.0018, "step": 560 }, { "epoch": 18.0125, "grad_norm": 7.027205467224121, "learning_rate": 2.2569444444444447e-05, "loss": 1.8571, "step": 570 }, { "epoch": 18.022916666666667, "grad_norm": 9.41115665435791, "learning_rate": 2.1990740740740743e-05, "loss": 1.7914, "step": 580 }, { "epoch": 18.032291666666666, "eval_accuracy": 0.3377926421404682, "eval_loss": 2.6151952743530273, "eval_runtime": 162.3006, "eval_samples_per_second": 1.842, "eval_steps_per_second": 0.062, "step": 589 }, { "epoch": 19.001041666666666, "grad_norm": 9.176055908203125, "learning_rate": 2.141203703703704e-05, "loss": 1.8165, "step": 590 }, { "epoch": 19.011458333333334, "grad_norm": 7.396921157836914, "learning_rate": 2.0833333333333336e-05, "loss": 1.6863, "step": 600 }, { "epoch": 19.021875, "grad_norm": 9.088372230529785, "learning_rate": 2.0254629629629632e-05, "loss": 1.597, "step": 610 }, { "epoch": 19.032291666666666, "grad_norm": 16.782529830932617, "learning_rate": 1.967592592592593e-05, "loss": 1.6925, "step": 620 }, { "epoch": 19.032291666666666, "eval_accuracy": 0.34448160535117056, "eval_loss": 2.5970685482025146, "eval_runtime": 161.5247, "eval_samples_per_second": 1.851, "eval_steps_per_second": 0.062, "step": 620 }, { "epoch": 20.010416666666668, "grad_norm": 10.128305435180664, "learning_rate": 1.9097222222222222e-05, "loss": 1.6303, "step": 630 }, { "epoch": 20.020833333333332, "grad_norm": 8.396921157836914, "learning_rate": 1.8518518518518518e-05, "loss": 1.6306, "step": 640 }, { "epoch": 20.03125, "grad_norm": 8.574676513671875, "learning_rate": 1.7939814814814815e-05, "loss": 1.5124, "step": 650 }, { "epoch": 20.032291666666666, "eval_accuracy": 0.34782608695652173, "eval_loss": 2.5766701698303223, "eval_runtime": 162.6227, "eval_samples_per_second": 1.839, "eval_steps_per_second": 0.061, "step": 651 }, { "epoch": 21.009375, "grad_norm": 8.482582092285156, "learning_rate": 1.736111111111111e-05, "loss": 1.5163, "step": 660 }, { "epoch": 21.019791666666666, "grad_norm": 7.806921482086182, "learning_rate": 1.6782407407407408e-05, "loss": 1.4579, "step": 670 }, { "epoch": 21.030208333333334, "grad_norm": 10.889120101928711, "learning_rate": 1.6203703703703704e-05, "loss": 1.4834, "step": 680 }, { "epoch": 21.032291666666666, "eval_accuracy": 0.3879598662207358, "eval_loss": 2.4438529014587402, "eval_runtime": 163.8828, "eval_samples_per_second": 1.824, "eval_steps_per_second": 0.061, "step": 682 }, { "epoch": 22.008333333333333, "grad_norm": 8.548680305480957, "learning_rate": 1.5625e-05, "loss": 1.3913, "step": 690 }, { "epoch": 22.01875, "grad_norm": 10.439925193786621, "learning_rate": 1.5046296296296297e-05, "loss": 1.4161, "step": 700 }, { "epoch": 22.029166666666665, "grad_norm": 8.317498207092285, "learning_rate": 1.4467592592592593e-05, "loss": 1.4565, "step": 710 }, { "epoch": 22.032291666666666, "eval_accuracy": 0.38461538461538464, "eval_loss": 2.405748128890991, "eval_runtime": 162.732, "eval_samples_per_second": 1.837, "eval_steps_per_second": 0.061, "step": 713 }, { "epoch": 23.007291666666667, "grad_norm": 8.510457038879395, "learning_rate": 1.388888888888889e-05, "loss": 1.4043, "step": 720 }, { "epoch": 23.017708333333335, "grad_norm": 9.268413543701172, "learning_rate": 1.3310185185185186e-05, "loss": 1.2734, "step": 730 }, { "epoch": 23.028125, "grad_norm": 8.000787734985352, "learning_rate": 1.2731481481481482e-05, "loss": 1.279, "step": 740 }, { "epoch": 23.032291666666666, "eval_accuracy": 0.35451505016722407, "eval_loss": 2.550072193145752, "eval_runtime": 160.4305, "eval_samples_per_second": 1.864, "eval_steps_per_second": 0.062, "step": 744 }, { "epoch": 24.00625, "grad_norm": 8.658973693847656, "learning_rate": 1.2152777777777779e-05, "loss": 1.1963, "step": 750 }, { "epoch": 24.016666666666666, "grad_norm": 7.558023929595947, "learning_rate": 1.1574074074074075e-05, "loss": 1.2601, "step": 760 }, { "epoch": 24.027083333333334, "grad_norm": 9.91243839263916, "learning_rate": 1.0995370370370372e-05, "loss": 1.1477, "step": 770 }, { "epoch": 24.032291666666666, "eval_accuracy": 0.44816053511705684, "eval_loss": 2.3246614933013916, "eval_runtime": 157.9743, "eval_samples_per_second": 1.893, "eval_steps_per_second": 0.063, "step": 775 }, { "epoch": 25.005208333333332, "grad_norm": 10.049819946289062, "learning_rate": 1.0416666666666668e-05, "loss": 1.1727, "step": 780 }, { "epoch": 25.015625, "grad_norm": 8.463665008544922, "learning_rate": 9.837962962962964e-06, "loss": 1.1207, "step": 790 }, { "epoch": 25.026041666666668, "grad_norm": 7.52623176574707, "learning_rate": 9.259259259259259e-06, "loss": 1.2573, "step": 800 }, { "epoch": 25.032291666666666, "eval_accuracy": 0.4882943143812709, "eval_loss": 2.1776490211486816, "eval_runtime": 158.1033, "eval_samples_per_second": 1.891, "eval_steps_per_second": 0.063, "step": 806 }, { "epoch": 26.004166666666666, "grad_norm": 9.336162567138672, "learning_rate": 8.680555555555556e-06, "loss": 1.1791, "step": 810 }, { "epoch": 26.014583333333334, "grad_norm": 6.728664875030518, "learning_rate": 8.101851851851852e-06, "loss": 1.1284, "step": 820 }, { "epoch": 26.025, "grad_norm": 7.307468414306641, "learning_rate": 7.523148148148148e-06, "loss": 1.0825, "step": 830 }, { "epoch": 26.032291666666666, "eval_accuracy": 0.4782608695652174, "eval_loss": 2.14430832862854, "eval_runtime": 156.854, "eval_samples_per_second": 1.906, "eval_steps_per_second": 0.064, "step": 837 }, { "epoch": 27.003125, "grad_norm": 8.843693733215332, "learning_rate": 6.944444444444445e-06, "loss": 1.1431, "step": 840 }, { "epoch": 27.013541666666665, "grad_norm": 9.648551940917969, "learning_rate": 6.365740740740741e-06, "loss": 1.0873, "step": 850 }, { "epoch": 27.023958333333333, "grad_norm": 10.551318168640137, "learning_rate": 5.787037037037038e-06, "loss": 1.2121, "step": 860 }, { "epoch": 27.032291666666666, "eval_accuracy": 0.4782608695652174, "eval_loss": 2.149031639099121, "eval_runtime": 156.4789, "eval_samples_per_second": 1.911, "eval_steps_per_second": 0.064, "step": 868 }, { "epoch": 28.002083333333335, "grad_norm": 7.037572383880615, "learning_rate": 5.208333333333334e-06, "loss": 1.0147, "step": 870 }, { "epoch": 28.0125, "grad_norm": 11.031582832336426, "learning_rate": 4.6296296296296296e-06, "loss": 1.0574, "step": 880 }, { "epoch": 28.022916666666667, "grad_norm": 7.408577919006348, "learning_rate": 4.050925925925926e-06, "loss": 1.0887, "step": 890 }, { "epoch": 28.032291666666666, "eval_accuracy": 0.47157190635451507, "eval_loss": 2.151550054550171, "eval_runtime": 157.7417, "eval_samples_per_second": 1.896, "eval_steps_per_second": 0.063, "step": 899 }, { "epoch": 29.001041666666666, "grad_norm": 7.416742324829102, "learning_rate": 3.4722222222222224e-06, "loss": 1.1448, "step": 900 }, { "epoch": 29.011458333333334, "grad_norm": 8.579631805419922, "learning_rate": 2.893518518518519e-06, "loss": 1.051, "step": 910 }, { "epoch": 29.021875, "grad_norm": 7.20076847076416, "learning_rate": 2.3148148148148148e-06, "loss": 0.9936, "step": 920 }, { "epoch": 29.032291666666666, "grad_norm": 20.40438461303711, "learning_rate": 1.7361111111111112e-06, "loss": 1.1127, "step": 930 }, { "epoch": 29.032291666666666, "eval_accuracy": 0.4882943143812709, "eval_loss": 2.1050899028778076, "eval_runtime": 158.1493, "eval_samples_per_second": 1.891, "eval_steps_per_second": 0.063, "step": 930 }, { "epoch": 30.010416666666668, "grad_norm": 7.520814895629883, "learning_rate": 1.1574074074074074e-06, "loss": 1.0214, "step": 940 }, { "epoch": 30.020833333333332, "grad_norm": 7.830852508544922, "learning_rate": 5.787037037037037e-07, "loss": 0.9516, "step": 950 }, { "epoch": 30.03125, "grad_norm": 8.230799674987793, "learning_rate": 0.0, "loss": 0.9905, "step": 960 }, { "epoch": 30.03125, "eval_accuracy": 0.4816053511705686, "eval_loss": 2.117016553878784, "eval_runtime": 175.2626, "eval_samples_per_second": 1.706, "eval_steps_per_second": 0.057, "step": 960 }, { "epoch": 30.03125, "step": 960, "total_flos": 3.733004678582315e+19, "train_loss": 2.590478341778119, "train_runtime": 22304.5183, "train_samples_per_second": 1.377, "train_steps_per_second": 0.043 }, { "epoch": 30.03125, "eval_accuracy": 0.4882943143812709, "eval_loss": 2.1814045906066895, "eval_runtime": 181.2056, "eval_samples_per_second": 1.65, "eval_steps_per_second": 0.055, "step": 960 }, { "epoch": 30.03125, "eval_accuracy": 0.4882943143812709, "eval_loss": 2.180596113204956, "eval_runtime": 179.6038, "eval_samples_per_second": 1.665, "eval_steps_per_second": 0.056, "step": 960 } ], "logging_steps": 10, "max_steps": 960, "num_input_tokens_seen": 0, "num_train_epochs": 9223372036854775807, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.733004678582315e+19, "train_batch_size": 32, "trial_name": null, "trial_params": null }