|
{ |
|
"best_metric": 0.5185972369819342, |
|
"best_model_checkpoint": "./results_bert-base-uncased_combined_lr1e-05_seed45/checkpoint-1200", |
|
"epoch": 39.34426229508197, |
|
"eval_steps": 500, |
|
"global_step": 1200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.6557377049180327, |
|
"grad_norm": 7.196445941925049, |
|
"learning_rate": 1.5833333333333333e-06, |
|
"loss": 1.8973, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.9836065573770492, |
|
"eval_accuracy": 0.12964930924548354, |
|
"eval_f1": 0.06598924045051528, |
|
"eval_loss": 1.80304753780365, |
|
"eval_precision": 0.06377482620857301, |
|
"eval_recall": 0.12964930924548354, |
|
"eval_runtime": 0.2953, |
|
"eval_samples_per_second": 3186.127, |
|
"eval_steps_per_second": 16.929, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 1.3114754098360657, |
|
"grad_norm": 7.820695400238037, |
|
"learning_rate": 3.2500000000000002e-06, |
|
"loss": 1.8091, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.9672131147540983, |
|
"grad_norm": 5.692554950714111, |
|
"learning_rate": 4.9166666666666665e-06, |
|
"loss": 1.7275, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.2731137088204038, |
|
"eval_f1": 0.22275890306102614, |
|
"eval_loss": 1.6800185441970825, |
|
"eval_precision": 0.21610225983181644, |
|
"eval_recall": 0.2731137088204038, |
|
"eval_runtime": 0.2824, |
|
"eval_samples_per_second": 3332.104, |
|
"eval_steps_per_second": 17.705, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 2.6229508196721314, |
|
"grad_norm": 5.084521770477295, |
|
"learning_rate": 6.5000000000000004e-06, |
|
"loss": 1.6714, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 2.9836065573770494, |
|
"eval_accuracy": 0.38257173219978746, |
|
"eval_f1": 0.33377079740961235, |
|
"eval_loss": 1.5590412616729736, |
|
"eval_precision": 0.405931226928208, |
|
"eval_recall": 0.38257173219978746, |
|
"eval_runtime": 0.2862, |
|
"eval_samples_per_second": 3287.384, |
|
"eval_steps_per_second": 17.468, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 3.278688524590164, |
|
"grad_norm": 7.280013084411621, |
|
"learning_rate": 8.166666666666668e-06, |
|
"loss": 1.5947, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 3.9344262295081966, |
|
"grad_norm": 7.112355709075928, |
|
"learning_rate": 9.833333333333333e-06, |
|
"loss": 1.5347, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.4059511158342189, |
|
"eval_f1": 0.36712885756401803, |
|
"eval_loss": 1.4780991077423096, |
|
"eval_precision": 0.4071133651031437, |
|
"eval_recall": 0.4059511158342189, |
|
"eval_runtime": 0.2834, |
|
"eval_samples_per_second": 3320.842, |
|
"eval_steps_per_second": 17.645, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 4.590163934426229, |
|
"grad_norm": 4.6436381340026855, |
|
"learning_rate": 9.833333333333333e-06, |
|
"loss": 1.4907, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 4.983606557377049, |
|
"eval_accuracy": 0.4261424017003188, |
|
"eval_f1": 0.3946275379809259, |
|
"eval_loss": 1.4262386560440063, |
|
"eval_precision": 0.39385114675841176, |
|
"eval_recall": 0.4261424017003188, |
|
"eval_runtime": 0.3059, |
|
"eval_samples_per_second": 3075.844, |
|
"eval_steps_per_second": 16.343, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 5.245901639344262, |
|
"grad_norm": 6.875060081481934, |
|
"learning_rate": 9.64814814814815e-06, |
|
"loss": 1.4561, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 5.901639344262295, |
|
"grad_norm": 4.790427207946777, |
|
"learning_rate": 9.472222222222223e-06, |
|
"loss": 1.4254, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.45483528161530284, |
|
"eval_f1": 0.4291814703075595, |
|
"eval_loss": 1.3784066438674927, |
|
"eval_precision": 0.4185165606426249, |
|
"eval_recall": 0.45483528161530284, |
|
"eval_runtime": 0.295, |
|
"eval_samples_per_second": 3189.291, |
|
"eval_steps_per_second": 16.946, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 6.557377049180328, |
|
"grad_norm": 5.400150299072266, |
|
"learning_rate": 9.296296296296296e-06, |
|
"loss": 1.4031, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 6.983606557377049, |
|
"eval_accuracy": 0.46865037194473963, |
|
"eval_f1": 0.44252499417149155, |
|
"eval_loss": 1.3631744384765625, |
|
"eval_precision": 0.49440104417651237, |
|
"eval_recall": 0.46865037194473963, |
|
"eval_runtime": 0.2976, |
|
"eval_samples_per_second": 3161.889, |
|
"eval_steps_per_second": 16.801, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 7.213114754098361, |
|
"grad_norm": 10.320267677307129, |
|
"learning_rate": 9.111111111111112e-06, |
|
"loss": 1.3894, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 7.868852459016393, |
|
"grad_norm": 8.646105766296387, |
|
"learning_rate": 8.925925925925927e-06, |
|
"loss": 1.3661, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.46971307120085015, |
|
"eval_f1": 0.4568042833868587, |
|
"eval_loss": 1.3476293087005615, |
|
"eval_precision": 0.47237771275946455, |
|
"eval_recall": 0.46971307120085015, |
|
"eval_runtime": 0.294, |
|
"eval_samples_per_second": 3200.359, |
|
"eval_steps_per_second": 17.005, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 8.524590163934427, |
|
"grad_norm": 4.57098388671875, |
|
"learning_rate": 8.740740740740741e-06, |
|
"loss": 1.3528, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 8.98360655737705, |
|
"eval_accuracy": 0.4707757704569607, |
|
"eval_f1": 0.4585472022799805, |
|
"eval_loss": 1.3285961151123047, |
|
"eval_precision": 0.47091141121713276, |
|
"eval_recall": 0.4707757704569607, |
|
"eval_runtime": 0.2823, |
|
"eval_samples_per_second": 3333.381, |
|
"eval_steps_per_second": 17.712, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 9.180327868852459, |
|
"grad_norm": 5.434332370758057, |
|
"learning_rate": 8.555555555555556e-06, |
|
"loss": 1.3438, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 9.836065573770492, |
|
"grad_norm": 6.4954938888549805, |
|
"learning_rate": 8.37037037037037e-06, |
|
"loss": 1.309, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.46865037194473963, |
|
"eval_f1": 0.4568412527812332, |
|
"eval_loss": 1.332553505897522, |
|
"eval_precision": 0.46992953260701664, |
|
"eval_recall": 0.46865037194473963, |
|
"eval_runtime": 0.2864, |
|
"eval_samples_per_second": 3285.515, |
|
"eval_steps_per_second": 17.458, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 10.491803278688524, |
|
"grad_norm": 6.318108081817627, |
|
"learning_rate": 8.185185185185187e-06, |
|
"loss": 1.3036, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 10.98360655737705, |
|
"eval_accuracy": 0.4622741764080765, |
|
"eval_f1": 0.45324041357026684, |
|
"eval_loss": 1.3212019205093384, |
|
"eval_precision": 0.47221675701090976, |
|
"eval_recall": 0.4622741764080765, |
|
"eval_runtime": 0.3006, |
|
"eval_samples_per_second": 3130.452, |
|
"eval_steps_per_second": 16.634, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 11.147540983606557, |
|
"grad_norm": 8.44897174835205, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 1.3046, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 11.80327868852459, |
|
"grad_norm": 6.639650821685791, |
|
"learning_rate": 7.814814814814816e-06, |
|
"loss": 1.2737, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.5143464399574921, |
|
"eval_f1": 0.5059771351474103, |
|
"eval_loss": 1.300374984741211, |
|
"eval_precision": 0.5130157529201715, |
|
"eval_recall": 0.5143464399574921, |
|
"eval_runtime": 0.2984, |
|
"eval_samples_per_second": 3153.083, |
|
"eval_steps_per_second": 16.754, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 12.459016393442623, |
|
"grad_norm": 5.371426582336426, |
|
"learning_rate": 7.62962962962963e-06, |
|
"loss": 1.2642, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 12.98360655737705, |
|
"eval_accuracy": 0.48884165781083955, |
|
"eval_f1": 0.48178502839341597, |
|
"eval_loss": 1.315968632698059, |
|
"eval_precision": 0.4952118784240597, |
|
"eval_recall": 0.48884165781083955, |
|
"eval_runtime": 0.3, |
|
"eval_samples_per_second": 3136.49, |
|
"eval_steps_per_second": 16.666, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 13.114754098360656, |
|
"grad_norm": 6.083697319030762, |
|
"learning_rate": 7.444444444444445e-06, |
|
"loss": 1.2688, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 13.770491803278688, |
|
"grad_norm": 7.939155101776123, |
|
"learning_rate": 7.2592592592592605e-06, |
|
"loss": 1.2395, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.5058448459086079, |
|
"eval_f1": 0.5012340867825481, |
|
"eval_loss": 1.3054472208023071, |
|
"eval_precision": 0.5059260195308296, |
|
"eval_recall": 0.5058448459086079, |
|
"eval_runtime": 0.2951, |
|
"eval_samples_per_second": 3189.098, |
|
"eval_steps_per_second": 16.945, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 14.426229508196721, |
|
"grad_norm": 5.19802188873291, |
|
"learning_rate": 7.074074074074074e-06, |
|
"loss": 1.2324, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 14.98360655737705, |
|
"eval_accuracy": 0.4909670563230606, |
|
"eval_f1": 0.48389179664553195, |
|
"eval_loss": 1.3174266815185547, |
|
"eval_precision": 0.4974190684341335, |
|
"eval_recall": 0.4909670563230606, |
|
"eval_runtime": 0.2911, |
|
"eval_samples_per_second": 3232.523, |
|
"eval_steps_per_second": 17.176, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 15.081967213114755, |
|
"grad_norm": 14.863390922546387, |
|
"learning_rate": 6.88888888888889e-06, |
|
"loss": 1.2294, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 15.737704918032787, |
|
"grad_norm": 6.973830699920654, |
|
"learning_rate": 6.703703703703704e-06, |
|
"loss": 1.2043, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.5079702444208289, |
|
"eval_f1": 0.5009933439562343, |
|
"eval_loss": 1.301389217376709, |
|
"eval_precision": 0.5133376615246589, |
|
"eval_recall": 0.5079702444208289, |
|
"eval_runtime": 0.2897, |
|
"eval_samples_per_second": 3248.415, |
|
"eval_steps_per_second": 17.26, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 16.39344262295082, |
|
"grad_norm": 4.816354751586914, |
|
"learning_rate": 6.51851851851852e-06, |
|
"loss": 1.1878, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 16.983606557377048, |
|
"eval_accuracy": 0.5047821466524973, |
|
"eval_f1": 0.5026293126534545, |
|
"eval_loss": 1.3040825128555298, |
|
"eval_precision": 0.5084106817664226, |
|
"eval_recall": 0.5047821466524973, |
|
"eval_runtime": 0.2972, |
|
"eval_samples_per_second": 3166.554, |
|
"eval_steps_per_second": 16.825, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 17.049180327868854, |
|
"grad_norm": 9.227392196655273, |
|
"learning_rate": 6.333333333333333e-06, |
|
"loss": 1.185, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 17.704918032786885, |
|
"grad_norm": 8.637321472167969, |
|
"learning_rate": 6.148148148148149e-06, |
|
"loss": 1.1744, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.49415515409139216, |
|
"eval_f1": 0.4884746258823515, |
|
"eval_loss": 1.3026150465011597, |
|
"eval_precision": 0.4988838007681139, |
|
"eval_recall": 0.49415515409139216, |
|
"eval_runtime": 0.2868, |
|
"eval_samples_per_second": 3280.605, |
|
"eval_steps_per_second": 17.431, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 18.360655737704917, |
|
"grad_norm": 10.460613250732422, |
|
"learning_rate": 5.962962962962963e-06, |
|
"loss": 1.1621, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 18.983606557377048, |
|
"eval_accuracy": 0.5026567481402763, |
|
"eval_f1": 0.49433902872030766, |
|
"eval_loss": 1.3115041255950928, |
|
"eval_precision": 0.5064079316801127, |
|
"eval_recall": 0.5026567481402763, |
|
"eval_runtime": 0.2887, |
|
"eval_samples_per_second": 3259.021, |
|
"eval_steps_per_second": 17.317, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 19.016393442622952, |
|
"grad_norm": 9.569828033447266, |
|
"learning_rate": 5.777777777777778e-06, |
|
"loss": 1.1794, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 19.672131147540984, |
|
"grad_norm": 9.484976768493652, |
|
"learning_rate": 5.5925925925925926e-06, |
|
"loss": 1.1453, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.49946865037194477, |
|
"eval_f1": 0.4947569022574746, |
|
"eval_loss": 1.3135700225830078, |
|
"eval_precision": 0.5051709468240039, |
|
"eval_recall": 0.49946865037194477, |
|
"eval_runtime": 0.284, |
|
"eval_samples_per_second": 3313.863, |
|
"eval_steps_per_second": 17.608, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 20.327868852459016, |
|
"grad_norm": 6.509533405303955, |
|
"learning_rate": 5.407407407407408e-06, |
|
"loss": 1.1435, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 20.983606557377048, |
|
"grad_norm": 6.100685119628906, |
|
"learning_rate": 5.2222222222222226e-06, |
|
"loss": 1.1546, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 20.983606557377048, |
|
"eval_accuracy": 0.49309245483528164, |
|
"eval_f1": 0.4888614092606576, |
|
"eval_loss": 1.3327937126159668, |
|
"eval_precision": 0.5027325713159697, |
|
"eval_recall": 0.49309245483528164, |
|
"eval_runtime": 0.288, |
|
"eval_samples_per_second": 3267.438, |
|
"eval_steps_per_second": 17.362, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 21.639344262295083, |
|
"grad_norm": 10.228110313415527, |
|
"learning_rate": 5.037037037037037e-06, |
|
"loss": 1.1118, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.5037194473963869, |
|
"eval_f1": 0.49935836644717385, |
|
"eval_loss": 1.3201266527175903, |
|
"eval_precision": 0.5068092491618498, |
|
"eval_recall": 0.5037194473963869, |
|
"eval_runtime": 0.2806, |
|
"eval_samples_per_second": 3353.599, |
|
"eval_steps_per_second": 17.819, |
|
"step": 671 |
|
}, |
|
{ |
|
"epoch": 22.295081967213115, |
|
"grad_norm": 4.691425800323486, |
|
"learning_rate": 4.851851851851852e-06, |
|
"loss": 1.121, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 22.950819672131146, |
|
"grad_norm": 5.896801471710205, |
|
"learning_rate": 4.666666666666667e-06, |
|
"loss": 1.1013, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 22.983606557377048, |
|
"eval_accuracy": 0.5079702444208289, |
|
"eval_f1": 0.5056149574862951, |
|
"eval_loss": 1.3185617923736572, |
|
"eval_precision": 0.5104193389071094, |
|
"eval_recall": 0.5079702444208289, |
|
"eval_runtime": 0.2841, |
|
"eval_samples_per_second": 3312.06, |
|
"eval_steps_per_second": 17.599, |
|
"step": 701 |
|
}, |
|
{ |
|
"epoch": 23.60655737704918, |
|
"grad_norm": 9.211535453796387, |
|
"learning_rate": 4.481481481481482e-06, |
|
"loss": 1.0909, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.5047821466524973, |
|
"eval_f1": 0.5027837397043571, |
|
"eval_loss": 1.3096483945846558, |
|
"eval_precision": 0.5132526138930299, |
|
"eval_recall": 0.5047821466524973, |
|
"eval_runtime": 0.3019, |
|
"eval_samples_per_second": 3116.922, |
|
"eval_steps_per_second": 16.562, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 24.262295081967213, |
|
"grad_norm": 5.588741302490234, |
|
"learning_rate": 4.296296296296296e-06, |
|
"loss": 1.0904, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 24.918032786885245, |
|
"grad_norm": 7.369673728942871, |
|
"learning_rate": 4.111111111111111e-06, |
|
"loss": 1.0765, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 24.983606557377048, |
|
"eval_accuracy": 0.5079702444208289, |
|
"eval_f1": 0.504151686335666, |
|
"eval_loss": 1.3278100490570068, |
|
"eval_precision": 0.5111957998187558, |
|
"eval_recall": 0.5079702444208289, |
|
"eval_runtime": 0.2837, |
|
"eval_samples_per_second": 3316.497, |
|
"eval_steps_per_second": 17.622, |
|
"step": 762 |
|
}, |
|
{ |
|
"epoch": 25.57377049180328, |
|
"grad_norm": 9.449226379394531, |
|
"learning_rate": 3.925925925925926e-06, |
|
"loss": 1.0687, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.5037194473963869, |
|
"eval_f1": 0.501835797044231, |
|
"eval_loss": 1.3304780721664429, |
|
"eval_precision": 0.5109551773238672, |
|
"eval_recall": 0.5037194473963869, |
|
"eval_runtime": 0.2896, |
|
"eval_samples_per_second": 3249.747, |
|
"eval_steps_per_second": 17.268, |
|
"step": 793 |
|
}, |
|
{ |
|
"epoch": 26.229508196721312, |
|
"grad_norm": 5.422854423522949, |
|
"learning_rate": 3.740740740740741e-06, |
|
"loss": 1.0579, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 26.885245901639344, |
|
"grad_norm": 5.668990612030029, |
|
"learning_rate": 3.555555555555556e-06, |
|
"loss": 1.0544, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 26.983606557377048, |
|
"eval_accuracy": 0.5175345377258236, |
|
"eval_f1": 0.5164541262908391, |
|
"eval_loss": 1.318372130393982, |
|
"eval_precision": 0.5223443720891333, |
|
"eval_recall": 0.5175345377258236, |
|
"eval_runtime": 0.302, |
|
"eval_samples_per_second": 3115.635, |
|
"eval_steps_per_second": 16.555, |
|
"step": 823 |
|
}, |
|
{ |
|
"epoch": 27.540983606557376, |
|
"grad_norm": 10.203471183776855, |
|
"learning_rate": 3.3703703703703705e-06, |
|
"loss": 1.0577, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.5069075451647184, |
|
"eval_f1": 0.5033442589921326, |
|
"eval_loss": 1.3318045139312744, |
|
"eval_precision": 0.5084800819821859, |
|
"eval_recall": 0.5069075451647184, |
|
"eval_runtime": 0.2944, |
|
"eval_samples_per_second": 3196.236, |
|
"eval_steps_per_second": 16.983, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 28.19672131147541, |
|
"grad_norm": 6.67368221282959, |
|
"learning_rate": 3.1851851851851855e-06, |
|
"loss": 1.0434, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 28.852459016393443, |
|
"grad_norm": 6.8327765464782715, |
|
"learning_rate": 3e-06, |
|
"loss": 1.0475, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 28.983606557377048, |
|
"eval_accuracy": 0.51009564293305, |
|
"eval_f1": 0.5074698366226925, |
|
"eval_loss": 1.3202146291732788, |
|
"eval_precision": 0.5157834747082791, |
|
"eval_recall": 0.51009564293305, |
|
"eval_runtime": 0.2822, |
|
"eval_samples_per_second": 3334.68, |
|
"eval_steps_per_second": 17.719, |
|
"step": 884 |
|
}, |
|
{ |
|
"epoch": 29.508196721311474, |
|
"grad_norm": 10.031432151794434, |
|
"learning_rate": 2.814814814814815e-06, |
|
"loss": 1.0312, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.5090329436769394, |
|
"eval_f1": 0.5059785444183024, |
|
"eval_loss": 1.343613862991333, |
|
"eval_precision": 0.5104547305202234, |
|
"eval_recall": 0.5090329436769394, |
|
"eval_runtime": 0.2901, |
|
"eval_samples_per_second": 3244.1, |
|
"eval_steps_per_second": 17.238, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 30.16393442622951, |
|
"grad_norm": 5.074967861175537, |
|
"learning_rate": 2.6296296296296297e-06, |
|
"loss": 1.0248, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 30.81967213114754, |
|
"grad_norm": 7.508426189422607, |
|
"learning_rate": 2.4444444444444447e-06, |
|
"loss": 1.0231, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 30.983606557377048, |
|
"eval_accuracy": 0.5143464399574921, |
|
"eval_f1": 0.5128246980549828, |
|
"eval_loss": 1.3461003303527832, |
|
"eval_precision": 0.5172606351162928, |
|
"eval_recall": 0.5143464399574921, |
|
"eval_runtime": 0.298, |
|
"eval_samples_per_second": 3157.909, |
|
"eval_steps_per_second": 16.78, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 31.475409836065573, |
|
"grad_norm": 12.130611419677734, |
|
"learning_rate": 2.2592592592592592e-06, |
|
"loss": 1.0185, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.5090329436769394, |
|
"eval_f1": 0.5062984065924716, |
|
"eval_loss": 1.3429207801818848, |
|
"eval_precision": 0.5110311107411546, |
|
"eval_recall": 0.5090329436769394, |
|
"eval_runtime": 0.2991, |
|
"eval_samples_per_second": 3145.81, |
|
"eval_steps_per_second": 16.715, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 32.131147540983605, |
|
"grad_norm": 7.62930154800415, |
|
"learning_rate": 2.0740740740740742e-06, |
|
"loss": 0.9978, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 32.78688524590164, |
|
"grad_norm": 6.595398902893066, |
|
"learning_rate": 1.888888888888889e-06, |
|
"loss": 1.0102, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 32.98360655737705, |
|
"eval_accuracy": 0.5143464399574921, |
|
"eval_f1": 0.5128260191462034, |
|
"eval_loss": 1.3501225709915161, |
|
"eval_precision": 0.5164523478379722, |
|
"eval_recall": 0.5143464399574921, |
|
"eval_runtime": 0.2862, |
|
"eval_samples_per_second": 3287.538, |
|
"eval_steps_per_second": 17.468, |
|
"step": 1006 |
|
}, |
|
{ |
|
"epoch": 33.442622950819676, |
|
"grad_norm": 14.922042846679688, |
|
"learning_rate": 1.7037037037037038e-06, |
|
"loss": 1.0024, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_accuracy": 0.5132837407013815, |
|
"eval_f1": 0.5115794743472372, |
|
"eval_loss": 1.3449465036392212, |
|
"eval_precision": 0.518258292844791, |
|
"eval_recall": 0.5132837407013815, |
|
"eval_runtime": 0.2827, |
|
"eval_samples_per_second": 3328.625, |
|
"eval_steps_per_second": 17.687, |
|
"step": 1037 |
|
}, |
|
{ |
|
"epoch": 34.09836065573771, |
|
"grad_norm": 8.07007122039795, |
|
"learning_rate": 1.5185185185185186e-06, |
|
"loss": 0.997, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 34.75409836065574, |
|
"grad_norm": 6.781075477600098, |
|
"learning_rate": 1.3333333333333334e-06, |
|
"loss": 0.9991, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 34.98360655737705, |
|
"eval_accuracy": 0.5143464399574921, |
|
"eval_f1": 0.5114723136353695, |
|
"eval_loss": 1.3471170663833618, |
|
"eval_precision": 0.5158292841288152, |
|
"eval_recall": 0.5143464399574921, |
|
"eval_runtime": 0.288, |
|
"eval_samples_per_second": 3267.384, |
|
"eval_steps_per_second": 17.361, |
|
"step": 1067 |
|
}, |
|
{ |
|
"epoch": 35.40983606557377, |
|
"grad_norm": 11.607426643371582, |
|
"learning_rate": 1.1481481481481482e-06, |
|
"loss": 0.983, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.5079702444208289, |
|
"eval_f1": 0.504843217181714, |
|
"eval_loss": 1.3585803508758545, |
|
"eval_precision": 0.509438648352314, |
|
"eval_recall": 0.5079702444208289, |
|
"eval_runtime": 0.2932, |
|
"eval_samples_per_second": 3209.163, |
|
"eval_steps_per_second": 17.052, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 36.0655737704918, |
|
"grad_norm": 7.522489547729492, |
|
"learning_rate": 9.62962962962963e-07, |
|
"loss": 0.9771, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 36.721311475409834, |
|
"grad_norm": 6.985760688781738, |
|
"learning_rate": 7.777777777777779e-07, |
|
"loss": 0.9827, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 36.98360655737705, |
|
"eval_accuracy": 0.5058448459086079, |
|
"eval_f1": 0.5029473061894668, |
|
"eval_loss": 1.3584290742874146, |
|
"eval_precision": 0.5075082082551082, |
|
"eval_recall": 0.5058448459086079, |
|
"eval_runtime": 0.286, |
|
"eval_samples_per_second": 3289.897, |
|
"eval_steps_per_second": 17.481, |
|
"step": 1128 |
|
}, |
|
{ |
|
"epoch": 37.377049180327866, |
|
"grad_norm": 15.159732818603516, |
|
"learning_rate": 5.925925925925927e-07, |
|
"loss": 0.9807, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_accuracy": 0.5154091392136025, |
|
"eval_f1": 0.5134611936291602, |
|
"eval_loss": 1.3536242246627808, |
|
"eval_precision": 0.5189235103268454, |
|
"eval_recall": 0.5154091392136025, |
|
"eval_runtime": 0.2878, |
|
"eval_samples_per_second": 3269.755, |
|
"eval_steps_per_second": 17.374, |
|
"step": 1159 |
|
}, |
|
{ |
|
"epoch": 38.032786885245905, |
|
"grad_norm": 8.501507759094238, |
|
"learning_rate": 4.074074074074075e-07, |
|
"loss": 0.9819, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 38.68852459016394, |
|
"grad_norm": 7.350555419921875, |
|
"learning_rate": 2.2222222222222224e-07, |
|
"loss": 0.9698, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 38.98360655737705, |
|
"eval_accuracy": 0.5143464399574921, |
|
"eval_f1": 0.5126916822770045, |
|
"eval_loss": 1.3546310663223267, |
|
"eval_precision": 0.5184421351153233, |
|
"eval_recall": 0.5143464399574921, |
|
"eval_runtime": 0.2918, |
|
"eval_samples_per_second": 3225.014, |
|
"eval_steps_per_second": 17.136, |
|
"step": 1189 |
|
}, |
|
{ |
|
"epoch": 39.34426229508197, |
|
"grad_norm": 13.357161521911621, |
|
"learning_rate": 3.703703703703704e-08, |
|
"loss": 0.9792, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 39.34426229508197, |
|
"eval_accuracy": 0.5185972369819342, |
|
"eval_f1": 0.516675376095624, |
|
"eval_loss": 1.353948950767517, |
|
"eval_precision": 0.5216418875507168, |
|
"eval_recall": 0.5185972369819342, |
|
"eval_runtime": 0.2709, |
|
"eval_samples_per_second": 3473.833, |
|
"eval_steps_per_second": 18.458, |
|
"step": 1200 |
|
} |
|
], |
|
"logging_steps": 20, |
|
"max_steps": 1200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 40, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.515574105997312e+16, |
|
"train_batch_size": 24, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|