{ "best_metric": 0.5185972369819342, "best_model_checkpoint": "./results_bert-base-uncased_combined_lr1e-05_seed45/checkpoint-1200", "epoch": 39.34426229508197, "eval_steps": 500, "global_step": 1200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.6557377049180327, "grad_norm": 7.196445941925049, "learning_rate": 1.5833333333333333e-06, "loss": 1.8973, "step": 20 }, { "epoch": 0.9836065573770492, "eval_accuracy": 0.12964930924548354, "eval_f1": 0.06598924045051528, "eval_loss": 1.80304753780365, "eval_precision": 0.06377482620857301, "eval_recall": 0.12964930924548354, "eval_runtime": 0.2953, "eval_samples_per_second": 3186.127, "eval_steps_per_second": 16.929, "step": 30 }, { "epoch": 1.3114754098360657, "grad_norm": 7.820695400238037, "learning_rate": 3.2500000000000002e-06, "loss": 1.8091, "step": 40 }, { "epoch": 1.9672131147540983, "grad_norm": 5.692554950714111, "learning_rate": 4.9166666666666665e-06, "loss": 1.7275, "step": 60 }, { "epoch": 2.0, "eval_accuracy": 0.2731137088204038, "eval_f1": 0.22275890306102614, "eval_loss": 1.6800185441970825, "eval_precision": 0.21610225983181644, "eval_recall": 0.2731137088204038, "eval_runtime": 0.2824, "eval_samples_per_second": 3332.104, "eval_steps_per_second": 17.705, "step": 61 }, { "epoch": 2.6229508196721314, "grad_norm": 5.084521770477295, "learning_rate": 6.5000000000000004e-06, "loss": 1.6714, "step": 80 }, { "epoch": 2.9836065573770494, "eval_accuracy": 0.38257173219978746, "eval_f1": 0.33377079740961235, "eval_loss": 1.5590412616729736, "eval_precision": 0.405931226928208, "eval_recall": 0.38257173219978746, "eval_runtime": 0.2862, "eval_samples_per_second": 3287.384, "eval_steps_per_second": 17.468, "step": 91 }, { "epoch": 3.278688524590164, "grad_norm": 7.280013084411621, "learning_rate": 8.166666666666668e-06, "loss": 1.5947, "step": 100 }, { "epoch": 3.9344262295081966, "grad_norm": 7.112355709075928, "learning_rate": 9.833333333333333e-06, "loss": 1.5347, "step": 120 }, { "epoch": 4.0, "eval_accuracy": 0.4059511158342189, "eval_f1": 0.36712885756401803, "eval_loss": 1.4780991077423096, "eval_precision": 0.4071133651031437, "eval_recall": 0.4059511158342189, "eval_runtime": 0.2834, "eval_samples_per_second": 3320.842, "eval_steps_per_second": 17.645, "step": 122 }, { "epoch": 4.590163934426229, "grad_norm": 4.6436381340026855, "learning_rate": 9.833333333333333e-06, "loss": 1.4907, "step": 140 }, { "epoch": 4.983606557377049, "eval_accuracy": 0.4261424017003188, "eval_f1": 0.3946275379809259, "eval_loss": 1.4262386560440063, "eval_precision": 0.39385114675841176, "eval_recall": 0.4261424017003188, "eval_runtime": 0.3059, "eval_samples_per_second": 3075.844, "eval_steps_per_second": 16.343, "step": 152 }, { "epoch": 5.245901639344262, "grad_norm": 6.875060081481934, "learning_rate": 9.64814814814815e-06, "loss": 1.4561, "step": 160 }, { "epoch": 5.901639344262295, "grad_norm": 4.790427207946777, "learning_rate": 9.472222222222223e-06, "loss": 1.4254, "step": 180 }, { "epoch": 6.0, "eval_accuracy": 0.45483528161530284, "eval_f1": 0.4291814703075595, "eval_loss": 1.3784066438674927, "eval_precision": 0.4185165606426249, "eval_recall": 0.45483528161530284, "eval_runtime": 0.295, "eval_samples_per_second": 3189.291, "eval_steps_per_second": 16.946, "step": 183 }, { "epoch": 6.557377049180328, "grad_norm": 5.400150299072266, "learning_rate": 9.296296296296296e-06, "loss": 1.4031, "step": 200 }, { "epoch": 6.983606557377049, "eval_accuracy": 0.46865037194473963, "eval_f1": 0.44252499417149155, "eval_loss": 1.3631744384765625, "eval_precision": 0.49440104417651237, "eval_recall": 0.46865037194473963, "eval_runtime": 0.2976, "eval_samples_per_second": 3161.889, "eval_steps_per_second": 16.801, "step": 213 }, { "epoch": 7.213114754098361, "grad_norm": 10.320267677307129, "learning_rate": 9.111111111111112e-06, "loss": 1.3894, "step": 220 }, { "epoch": 7.868852459016393, "grad_norm": 8.646105766296387, "learning_rate": 8.925925925925927e-06, "loss": 1.3661, "step": 240 }, { "epoch": 8.0, "eval_accuracy": 0.46971307120085015, "eval_f1": 0.4568042833868587, "eval_loss": 1.3476293087005615, "eval_precision": 0.47237771275946455, "eval_recall": 0.46971307120085015, "eval_runtime": 0.294, "eval_samples_per_second": 3200.359, "eval_steps_per_second": 17.005, "step": 244 }, { "epoch": 8.524590163934427, "grad_norm": 4.57098388671875, "learning_rate": 8.740740740740741e-06, "loss": 1.3528, "step": 260 }, { "epoch": 8.98360655737705, "eval_accuracy": 0.4707757704569607, "eval_f1": 0.4585472022799805, "eval_loss": 1.3285961151123047, "eval_precision": 0.47091141121713276, "eval_recall": 0.4707757704569607, "eval_runtime": 0.2823, "eval_samples_per_second": 3333.381, "eval_steps_per_second": 17.712, "step": 274 }, { "epoch": 9.180327868852459, "grad_norm": 5.434332370758057, "learning_rate": 8.555555555555556e-06, "loss": 1.3438, "step": 280 }, { "epoch": 9.836065573770492, "grad_norm": 6.4954938888549805, "learning_rate": 8.37037037037037e-06, "loss": 1.309, "step": 300 }, { "epoch": 10.0, "eval_accuracy": 0.46865037194473963, "eval_f1": 0.4568412527812332, "eval_loss": 1.332553505897522, "eval_precision": 0.46992953260701664, "eval_recall": 0.46865037194473963, "eval_runtime": 0.2864, "eval_samples_per_second": 3285.515, "eval_steps_per_second": 17.458, "step": 305 }, { "epoch": 10.491803278688524, "grad_norm": 6.318108081817627, "learning_rate": 8.185185185185187e-06, "loss": 1.3036, "step": 320 }, { "epoch": 10.98360655737705, "eval_accuracy": 0.4622741764080765, "eval_f1": 0.45324041357026684, "eval_loss": 1.3212019205093384, "eval_precision": 0.47221675701090976, "eval_recall": 0.4622741764080765, "eval_runtime": 0.3006, "eval_samples_per_second": 3130.452, "eval_steps_per_second": 16.634, "step": 335 }, { "epoch": 11.147540983606557, "grad_norm": 8.44897174835205, "learning_rate": 8.000000000000001e-06, "loss": 1.3046, "step": 340 }, { "epoch": 11.80327868852459, "grad_norm": 6.639650821685791, "learning_rate": 7.814814814814816e-06, "loss": 1.2737, "step": 360 }, { "epoch": 12.0, "eval_accuracy": 0.5143464399574921, "eval_f1": 0.5059771351474103, "eval_loss": 1.300374984741211, "eval_precision": 0.5130157529201715, "eval_recall": 0.5143464399574921, "eval_runtime": 0.2984, "eval_samples_per_second": 3153.083, "eval_steps_per_second": 16.754, "step": 366 }, { "epoch": 12.459016393442623, "grad_norm": 5.371426582336426, "learning_rate": 7.62962962962963e-06, "loss": 1.2642, "step": 380 }, { "epoch": 12.98360655737705, "eval_accuracy": 0.48884165781083955, "eval_f1": 0.48178502839341597, "eval_loss": 1.315968632698059, "eval_precision": 0.4952118784240597, "eval_recall": 0.48884165781083955, "eval_runtime": 0.3, "eval_samples_per_second": 3136.49, "eval_steps_per_second": 16.666, "step": 396 }, { "epoch": 13.114754098360656, "grad_norm": 6.083697319030762, "learning_rate": 7.444444444444445e-06, "loss": 1.2688, "step": 400 }, { "epoch": 13.770491803278688, "grad_norm": 7.939155101776123, "learning_rate": 7.2592592592592605e-06, "loss": 1.2395, "step": 420 }, { "epoch": 14.0, "eval_accuracy": 0.5058448459086079, "eval_f1": 0.5012340867825481, "eval_loss": 1.3054472208023071, "eval_precision": 0.5059260195308296, "eval_recall": 0.5058448459086079, "eval_runtime": 0.2951, "eval_samples_per_second": 3189.098, "eval_steps_per_second": 16.945, "step": 427 }, { "epoch": 14.426229508196721, "grad_norm": 5.19802188873291, "learning_rate": 7.074074074074074e-06, "loss": 1.2324, "step": 440 }, { "epoch": 14.98360655737705, "eval_accuracy": 0.4909670563230606, "eval_f1": 0.48389179664553195, "eval_loss": 1.3174266815185547, "eval_precision": 0.4974190684341335, "eval_recall": 0.4909670563230606, "eval_runtime": 0.2911, "eval_samples_per_second": 3232.523, "eval_steps_per_second": 17.176, "step": 457 }, { "epoch": 15.081967213114755, "grad_norm": 14.863390922546387, "learning_rate": 6.88888888888889e-06, "loss": 1.2294, "step": 460 }, { "epoch": 15.737704918032787, "grad_norm": 6.973830699920654, "learning_rate": 6.703703703703704e-06, "loss": 1.2043, "step": 480 }, { "epoch": 16.0, "eval_accuracy": 0.5079702444208289, "eval_f1": 0.5009933439562343, "eval_loss": 1.301389217376709, "eval_precision": 0.5133376615246589, "eval_recall": 0.5079702444208289, "eval_runtime": 0.2897, "eval_samples_per_second": 3248.415, "eval_steps_per_second": 17.26, "step": 488 }, { "epoch": 16.39344262295082, "grad_norm": 4.816354751586914, "learning_rate": 6.51851851851852e-06, "loss": 1.1878, "step": 500 }, { "epoch": 16.983606557377048, "eval_accuracy": 0.5047821466524973, "eval_f1": 0.5026293126534545, "eval_loss": 1.3040825128555298, "eval_precision": 0.5084106817664226, "eval_recall": 0.5047821466524973, "eval_runtime": 0.2972, "eval_samples_per_second": 3166.554, "eval_steps_per_second": 16.825, "step": 518 }, { "epoch": 17.049180327868854, "grad_norm": 9.227392196655273, "learning_rate": 6.333333333333333e-06, "loss": 1.185, "step": 520 }, { "epoch": 17.704918032786885, "grad_norm": 8.637321472167969, "learning_rate": 6.148148148148149e-06, "loss": 1.1744, "step": 540 }, { "epoch": 18.0, "eval_accuracy": 0.49415515409139216, "eval_f1": 0.4884746258823515, "eval_loss": 1.3026150465011597, "eval_precision": 0.4988838007681139, "eval_recall": 0.49415515409139216, "eval_runtime": 0.2868, "eval_samples_per_second": 3280.605, "eval_steps_per_second": 17.431, "step": 549 }, { "epoch": 18.360655737704917, "grad_norm": 10.460613250732422, "learning_rate": 5.962962962962963e-06, "loss": 1.1621, "step": 560 }, { "epoch": 18.983606557377048, "eval_accuracy": 0.5026567481402763, "eval_f1": 0.49433902872030766, "eval_loss": 1.3115041255950928, "eval_precision": 0.5064079316801127, "eval_recall": 0.5026567481402763, "eval_runtime": 0.2887, "eval_samples_per_second": 3259.021, "eval_steps_per_second": 17.317, "step": 579 }, { "epoch": 19.016393442622952, "grad_norm": 9.569828033447266, "learning_rate": 5.777777777777778e-06, "loss": 1.1794, "step": 580 }, { "epoch": 19.672131147540984, "grad_norm": 9.484976768493652, "learning_rate": 5.5925925925925926e-06, "loss": 1.1453, "step": 600 }, { "epoch": 20.0, "eval_accuracy": 0.49946865037194477, "eval_f1": 0.4947569022574746, "eval_loss": 1.3135700225830078, "eval_precision": 0.5051709468240039, "eval_recall": 0.49946865037194477, "eval_runtime": 0.284, "eval_samples_per_second": 3313.863, "eval_steps_per_second": 17.608, "step": 610 }, { "epoch": 20.327868852459016, "grad_norm": 6.509533405303955, "learning_rate": 5.407407407407408e-06, "loss": 1.1435, "step": 620 }, { "epoch": 20.983606557377048, "grad_norm": 6.100685119628906, "learning_rate": 5.2222222222222226e-06, "loss": 1.1546, "step": 640 }, { "epoch": 20.983606557377048, "eval_accuracy": 0.49309245483528164, "eval_f1": 0.4888614092606576, "eval_loss": 1.3327937126159668, "eval_precision": 0.5027325713159697, "eval_recall": 0.49309245483528164, "eval_runtime": 0.288, "eval_samples_per_second": 3267.438, "eval_steps_per_second": 17.362, "step": 640 }, { "epoch": 21.639344262295083, "grad_norm": 10.228110313415527, "learning_rate": 5.037037037037037e-06, "loss": 1.1118, "step": 660 }, { "epoch": 22.0, "eval_accuracy": 0.5037194473963869, "eval_f1": 0.49935836644717385, "eval_loss": 1.3201266527175903, "eval_precision": 0.5068092491618498, "eval_recall": 0.5037194473963869, "eval_runtime": 0.2806, "eval_samples_per_second": 3353.599, "eval_steps_per_second": 17.819, "step": 671 }, { "epoch": 22.295081967213115, "grad_norm": 4.691425800323486, "learning_rate": 4.851851851851852e-06, "loss": 1.121, "step": 680 }, { "epoch": 22.950819672131146, "grad_norm": 5.896801471710205, "learning_rate": 4.666666666666667e-06, "loss": 1.1013, "step": 700 }, { "epoch": 22.983606557377048, "eval_accuracy": 0.5079702444208289, "eval_f1": 0.5056149574862951, "eval_loss": 1.3185617923736572, "eval_precision": 0.5104193389071094, "eval_recall": 0.5079702444208289, "eval_runtime": 0.2841, "eval_samples_per_second": 3312.06, "eval_steps_per_second": 17.599, "step": 701 }, { "epoch": 23.60655737704918, "grad_norm": 9.211535453796387, "learning_rate": 4.481481481481482e-06, "loss": 1.0909, "step": 720 }, { "epoch": 24.0, "eval_accuracy": 0.5047821466524973, "eval_f1": 0.5027837397043571, "eval_loss": 1.3096483945846558, "eval_precision": 0.5132526138930299, "eval_recall": 0.5047821466524973, "eval_runtime": 0.3019, "eval_samples_per_second": 3116.922, "eval_steps_per_second": 16.562, "step": 732 }, { "epoch": 24.262295081967213, "grad_norm": 5.588741302490234, "learning_rate": 4.296296296296296e-06, "loss": 1.0904, "step": 740 }, { "epoch": 24.918032786885245, "grad_norm": 7.369673728942871, "learning_rate": 4.111111111111111e-06, "loss": 1.0765, "step": 760 }, { "epoch": 24.983606557377048, "eval_accuracy": 0.5079702444208289, "eval_f1": 0.504151686335666, "eval_loss": 1.3278100490570068, "eval_precision": 0.5111957998187558, "eval_recall": 0.5079702444208289, "eval_runtime": 0.2837, "eval_samples_per_second": 3316.497, "eval_steps_per_second": 17.622, "step": 762 }, { "epoch": 25.57377049180328, "grad_norm": 9.449226379394531, "learning_rate": 3.925925925925926e-06, "loss": 1.0687, "step": 780 }, { "epoch": 26.0, "eval_accuracy": 0.5037194473963869, "eval_f1": 0.501835797044231, "eval_loss": 1.3304780721664429, "eval_precision": 0.5109551773238672, "eval_recall": 0.5037194473963869, "eval_runtime": 0.2896, "eval_samples_per_second": 3249.747, "eval_steps_per_second": 17.268, "step": 793 }, { "epoch": 26.229508196721312, "grad_norm": 5.422854423522949, "learning_rate": 3.740740740740741e-06, "loss": 1.0579, "step": 800 }, { "epoch": 26.885245901639344, "grad_norm": 5.668990612030029, "learning_rate": 3.555555555555556e-06, "loss": 1.0544, "step": 820 }, { "epoch": 26.983606557377048, "eval_accuracy": 0.5175345377258236, "eval_f1": 0.5164541262908391, "eval_loss": 1.318372130393982, "eval_precision": 0.5223443720891333, "eval_recall": 0.5175345377258236, "eval_runtime": 0.302, "eval_samples_per_second": 3115.635, "eval_steps_per_second": 16.555, "step": 823 }, { "epoch": 27.540983606557376, "grad_norm": 10.203471183776855, "learning_rate": 3.3703703703703705e-06, "loss": 1.0577, "step": 840 }, { "epoch": 28.0, "eval_accuracy": 0.5069075451647184, "eval_f1": 0.5033442589921326, "eval_loss": 1.3318045139312744, "eval_precision": 0.5084800819821859, "eval_recall": 0.5069075451647184, "eval_runtime": 0.2944, "eval_samples_per_second": 3196.236, "eval_steps_per_second": 16.983, "step": 854 }, { "epoch": 28.19672131147541, "grad_norm": 6.67368221282959, "learning_rate": 3.1851851851851855e-06, "loss": 1.0434, "step": 860 }, { "epoch": 28.852459016393443, "grad_norm": 6.8327765464782715, "learning_rate": 3e-06, "loss": 1.0475, "step": 880 }, { "epoch": 28.983606557377048, "eval_accuracy": 0.51009564293305, "eval_f1": 0.5074698366226925, "eval_loss": 1.3202146291732788, "eval_precision": 0.5157834747082791, "eval_recall": 0.51009564293305, "eval_runtime": 0.2822, "eval_samples_per_second": 3334.68, "eval_steps_per_second": 17.719, "step": 884 }, { "epoch": 29.508196721311474, "grad_norm": 10.031432151794434, "learning_rate": 2.814814814814815e-06, "loss": 1.0312, "step": 900 }, { "epoch": 30.0, "eval_accuracy": 0.5090329436769394, "eval_f1": 0.5059785444183024, "eval_loss": 1.343613862991333, "eval_precision": 0.5104547305202234, "eval_recall": 0.5090329436769394, "eval_runtime": 0.2901, "eval_samples_per_second": 3244.1, "eval_steps_per_second": 17.238, "step": 915 }, { "epoch": 30.16393442622951, "grad_norm": 5.074967861175537, "learning_rate": 2.6296296296296297e-06, "loss": 1.0248, "step": 920 }, { "epoch": 30.81967213114754, "grad_norm": 7.508426189422607, "learning_rate": 2.4444444444444447e-06, "loss": 1.0231, "step": 940 }, { "epoch": 30.983606557377048, "eval_accuracy": 0.5143464399574921, "eval_f1": 0.5128246980549828, "eval_loss": 1.3461003303527832, "eval_precision": 0.5172606351162928, "eval_recall": 0.5143464399574921, "eval_runtime": 0.298, "eval_samples_per_second": 3157.909, "eval_steps_per_second": 16.78, "step": 945 }, { "epoch": 31.475409836065573, "grad_norm": 12.130611419677734, "learning_rate": 2.2592592592592592e-06, "loss": 1.0185, "step": 960 }, { "epoch": 32.0, "eval_accuracy": 0.5090329436769394, "eval_f1": 0.5062984065924716, "eval_loss": 1.3429207801818848, "eval_precision": 0.5110311107411546, "eval_recall": 0.5090329436769394, "eval_runtime": 0.2991, "eval_samples_per_second": 3145.81, "eval_steps_per_second": 16.715, "step": 976 }, { "epoch": 32.131147540983605, "grad_norm": 7.62930154800415, "learning_rate": 2.0740740740740742e-06, "loss": 0.9978, "step": 980 }, { "epoch": 32.78688524590164, "grad_norm": 6.595398902893066, "learning_rate": 1.888888888888889e-06, "loss": 1.0102, "step": 1000 }, { "epoch": 32.98360655737705, "eval_accuracy": 0.5143464399574921, "eval_f1": 0.5128260191462034, "eval_loss": 1.3501225709915161, "eval_precision": 0.5164523478379722, "eval_recall": 0.5143464399574921, "eval_runtime": 0.2862, "eval_samples_per_second": 3287.538, "eval_steps_per_second": 17.468, "step": 1006 }, { "epoch": 33.442622950819676, "grad_norm": 14.922042846679688, "learning_rate": 1.7037037037037038e-06, "loss": 1.0024, "step": 1020 }, { "epoch": 34.0, "eval_accuracy": 0.5132837407013815, "eval_f1": 0.5115794743472372, "eval_loss": 1.3449465036392212, "eval_precision": 0.518258292844791, "eval_recall": 0.5132837407013815, "eval_runtime": 0.2827, "eval_samples_per_second": 3328.625, "eval_steps_per_second": 17.687, "step": 1037 }, { "epoch": 34.09836065573771, "grad_norm": 8.07007122039795, "learning_rate": 1.5185185185185186e-06, "loss": 0.997, "step": 1040 }, { "epoch": 34.75409836065574, "grad_norm": 6.781075477600098, "learning_rate": 1.3333333333333334e-06, "loss": 0.9991, "step": 1060 }, { "epoch": 34.98360655737705, "eval_accuracy": 0.5143464399574921, "eval_f1": 0.5114723136353695, "eval_loss": 1.3471170663833618, "eval_precision": 0.5158292841288152, "eval_recall": 0.5143464399574921, "eval_runtime": 0.288, "eval_samples_per_second": 3267.384, "eval_steps_per_second": 17.361, "step": 1067 }, { "epoch": 35.40983606557377, "grad_norm": 11.607426643371582, "learning_rate": 1.1481481481481482e-06, "loss": 0.983, "step": 1080 }, { "epoch": 36.0, "eval_accuracy": 0.5079702444208289, "eval_f1": 0.504843217181714, "eval_loss": 1.3585803508758545, "eval_precision": 0.509438648352314, "eval_recall": 0.5079702444208289, "eval_runtime": 0.2932, "eval_samples_per_second": 3209.163, "eval_steps_per_second": 17.052, "step": 1098 }, { "epoch": 36.0655737704918, "grad_norm": 7.522489547729492, "learning_rate": 9.62962962962963e-07, "loss": 0.9771, "step": 1100 }, { "epoch": 36.721311475409834, "grad_norm": 6.985760688781738, "learning_rate": 7.777777777777779e-07, "loss": 0.9827, "step": 1120 }, { "epoch": 36.98360655737705, "eval_accuracy": 0.5058448459086079, "eval_f1": 0.5029473061894668, "eval_loss": 1.3584290742874146, "eval_precision": 0.5075082082551082, "eval_recall": 0.5058448459086079, "eval_runtime": 0.286, "eval_samples_per_second": 3289.897, "eval_steps_per_second": 17.481, "step": 1128 }, { "epoch": 37.377049180327866, "grad_norm": 15.159732818603516, "learning_rate": 5.925925925925927e-07, "loss": 0.9807, "step": 1140 }, { "epoch": 38.0, "eval_accuracy": 0.5154091392136025, "eval_f1": 0.5134611936291602, "eval_loss": 1.3536242246627808, "eval_precision": 0.5189235103268454, "eval_recall": 0.5154091392136025, "eval_runtime": 0.2878, "eval_samples_per_second": 3269.755, "eval_steps_per_second": 17.374, "step": 1159 }, { "epoch": 38.032786885245905, "grad_norm": 8.501507759094238, "learning_rate": 4.074074074074075e-07, "loss": 0.9819, "step": 1160 }, { "epoch": 38.68852459016394, "grad_norm": 7.350555419921875, "learning_rate": 2.2222222222222224e-07, "loss": 0.9698, "step": 1180 }, { "epoch": 38.98360655737705, "eval_accuracy": 0.5143464399574921, "eval_f1": 0.5126916822770045, "eval_loss": 1.3546310663223267, "eval_precision": 0.5184421351153233, "eval_recall": 0.5143464399574921, "eval_runtime": 0.2918, "eval_samples_per_second": 3225.014, "eval_steps_per_second": 17.136, "step": 1189 }, { "epoch": 39.34426229508197, "grad_norm": 13.357161521911621, "learning_rate": 3.703703703703704e-08, "loss": 0.9792, "step": 1200 }, { "epoch": 39.34426229508197, "eval_accuracy": 0.5185972369819342, "eval_f1": 0.516675376095624, "eval_loss": 1.353948950767517, "eval_precision": 0.5216418875507168, "eval_recall": 0.5185972369819342, "eval_runtime": 0.2709, "eval_samples_per_second": 3473.833, "eval_steps_per_second": 18.458, "step": 1200 } ], "logging_steps": 20, "max_steps": 1200, "num_input_tokens_seen": 0, "num_train_epochs": 40, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.515574105997312e+16, "train_batch_size": 24, "trial_name": null, "trial_params": null }