BERT_ALL_README / trainer_state.json
pavlyhalim's picture
Upload 15 files
bd574ed verified
{
"best_metric": 0.5185972369819342,
"best_model_checkpoint": "./results_bert-base-uncased_combined_lr1e-05_seed45/checkpoint-1200",
"epoch": 39.34426229508197,
"eval_steps": 500,
"global_step": 1200,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.6557377049180327,
"grad_norm": 7.196445941925049,
"learning_rate": 1.5833333333333333e-06,
"loss": 1.8973,
"step": 20
},
{
"epoch": 0.9836065573770492,
"eval_accuracy": 0.12964930924548354,
"eval_f1": 0.06598924045051528,
"eval_loss": 1.80304753780365,
"eval_precision": 0.06377482620857301,
"eval_recall": 0.12964930924548354,
"eval_runtime": 0.2953,
"eval_samples_per_second": 3186.127,
"eval_steps_per_second": 16.929,
"step": 30
},
{
"epoch": 1.3114754098360657,
"grad_norm": 7.820695400238037,
"learning_rate": 3.2500000000000002e-06,
"loss": 1.8091,
"step": 40
},
{
"epoch": 1.9672131147540983,
"grad_norm": 5.692554950714111,
"learning_rate": 4.9166666666666665e-06,
"loss": 1.7275,
"step": 60
},
{
"epoch": 2.0,
"eval_accuracy": 0.2731137088204038,
"eval_f1": 0.22275890306102614,
"eval_loss": 1.6800185441970825,
"eval_precision": 0.21610225983181644,
"eval_recall": 0.2731137088204038,
"eval_runtime": 0.2824,
"eval_samples_per_second": 3332.104,
"eval_steps_per_second": 17.705,
"step": 61
},
{
"epoch": 2.6229508196721314,
"grad_norm": 5.084521770477295,
"learning_rate": 6.5000000000000004e-06,
"loss": 1.6714,
"step": 80
},
{
"epoch": 2.9836065573770494,
"eval_accuracy": 0.38257173219978746,
"eval_f1": 0.33377079740961235,
"eval_loss": 1.5590412616729736,
"eval_precision": 0.405931226928208,
"eval_recall": 0.38257173219978746,
"eval_runtime": 0.2862,
"eval_samples_per_second": 3287.384,
"eval_steps_per_second": 17.468,
"step": 91
},
{
"epoch": 3.278688524590164,
"grad_norm": 7.280013084411621,
"learning_rate": 8.166666666666668e-06,
"loss": 1.5947,
"step": 100
},
{
"epoch": 3.9344262295081966,
"grad_norm": 7.112355709075928,
"learning_rate": 9.833333333333333e-06,
"loss": 1.5347,
"step": 120
},
{
"epoch": 4.0,
"eval_accuracy": 0.4059511158342189,
"eval_f1": 0.36712885756401803,
"eval_loss": 1.4780991077423096,
"eval_precision": 0.4071133651031437,
"eval_recall": 0.4059511158342189,
"eval_runtime": 0.2834,
"eval_samples_per_second": 3320.842,
"eval_steps_per_second": 17.645,
"step": 122
},
{
"epoch": 4.590163934426229,
"grad_norm": 4.6436381340026855,
"learning_rate": 9.833333333333333e-06,
"loss": 1.4907,
"step": 140
},
{
"epoch": 4.983606557377049,
"eval_accuracy": 0.4261424017003188,
"eval_f1": 0.3946275379809259,
"eval_loss": 1.4262386560440063,
"eval_precision": 0.39385114675841176,
"eval_recall": 0.4261424017003188,
"eval_runtime": 0.3059,
"eval_samples_per_second": 3075.844,
"eval_steps_per_second": 16.343,
"step": 152
},
{
"epoch": 5.245901639344262,
"grad_norm": 6.875060081481934,
"learning_rate": 9.64814814814815e-06,
"loss": 1.4561,
"step": 160
},
{
"epoch": 5.901639344262295,
"grad_norm": 4.790427207946777,
"learning_rate": 9.472222222222223e-06,
"loss": 1.4254,
"step": 180
},
{
"epoch": 6.0,
"eval_accuracy": 0.45483528161530284,
"eval_f1": 0.4291814703075595,
"eval_loss": 1.3784066438674927,
"eval_precision": 0.4185165606426249,
"eval_recall": 0.45483528161530284,
"eval_runtime": 0.295,
"eval_samples_per_second": 3189.291,
"eval_steps_per_second": 16.946,
"step": 183
},
{
"epoch": 6.557377049180328,
"grad_norm": 5.400150299072266,
"learning_rate": 9.296296296296296e-06,
"loss": 1.4031,
"step": 200
},
{
"epoch": 6.983606557377049,
"eval_accuracy": 0.46865037194473963,
"eval_f1": 0.44252499417149155,
"eval_loss": 1.3631744384765625,
"eval_precision": 0.49440104417651237,
"eval_recall": 0.46865037194473963,
"eval_runtime": 0.2976,
"eval_samples_per_second": 3161.889,
"eval_steps_per_second": 16.801,
"step": 213
},
{
"epoch": 7.213114754098361,
"grad_norm": 10.320267677307129,
"learning_rate": 9.111111111111112e-06,
"loss": 1.3894,
"step": 220
},
{
"epoch": 7.868852459016393,
"grad_norm": 8.646105766296387,
"learning_rate": 8.925925925925927e-06,
"loss": 1.3661,
"step": 240
},
{
"epoch": 8.0,
"eval_accuracy": 0.46971307120085015,
"eval_f1": 0.4568042833868587,
"eval_loss": 1.3476293087005615,
"eval_precision": 0.47237771275946455,
"eval_recall": 0.46971307120085015,
"eval_runtime": 0.294,
"eval_samples_per_second": 3200.359,
"eval_steps_per_second": 17.005,
"step": 244
},
{
"epoch": 8.524590163934427,
"grad_norm": 4.57098388671875,
"learning_rate": 8.740740740740741e-06,
"loss": 1.3528,
"step": 260
},
{
"epoch": 8.98360655737705,
"eval_accuracy": 0.4707757704569607,
"eval_f1": 0.4585472022799805,
"eval_loss": 1.3285961151123047,
"eval_precision": 0.47091141121713276,
"eval_recall": 0.4707757704569607,
"eval_runtime": 0.2823,
"eval_samples_per_second": 3333.381,
"eval_steps_per_second": 17.712,
"step": 274
},
{
"epoch": 9.180327868852459,
"grad_norm": 5.434332370758057,
"learning_rate": 8.555555555555556e-06,
"loss": 1.3438,
"step": 280
},
{
"epoch": 9.836065573770492,
"grad_norm": 6.4954938888549805,
"learning_rate": 8.37037037037037e-06,
"loss": 1.309,
"step": 300
},
{
"epoch": 10.0,
"eval_accuracy": 0.46865037194473963,
"eval_f1": 0.4568412527812332,
"eval_loss": 1.332553505897522,
"eval_precision": 0.46992953260701664,
"eval_recall": 0.46865037194473963,
"eval_runtime": 0.2864,
"eval_samples_per_second": 3285.515,
"eval_steps_per_second": 17.458,
"step": 305
},
{
"epoch": 10.491803278688524,
"grad_norm": 6.318108081817627,
"learning_rate": 8.185185185185187e-06,
"loss": 1.3036,
"step": 320
},
{
"epoch": 10.98360655737705,
"eval_accuracy": 0.4622741764080765,
"eval_f1": 0.45324041357026684,
"eval_loss": 1.3212019205093384,
"eval_precision": 0.47221675701090976,
"eval_recall": 0.4622741764080765,
"eval_runtime": 0.3006,
"eval_samples_per_second": 3130.452,
"eval_steps_per_second": 16.634,
"step": 335
},
{
"epoch": 11.147540983606557,
"grad_norm": 8.44897174835205,
"learning_rate": 8.000000000000001e-06,
"loss": 1.3046,
"step": 340
},
{
"epoch": 11.80327868852459,
"grad_norm": 6.639650821685791,
"learning_rate": 7.814814814814816e-06,
"loss": 1.2737,
"step": 360
},
{
"epoch": 12.0,
"eval_accuracy": 0.5143464399574921,
"eval_f1": 0.5059771351474103,
"eval_loss": 1.300374984741211,
"eval_precision": 0.5130157529201715,
"eval_recall": 0.5143464399574921,
"eval_runtime": 0.2984,
"eval_samples_per_second": 3153.083,
"eval_steps_per_second": 16.754,
"step": 366
},
{
"epoch": 12.459016393442623,
"grad_norm": 5.371426582336426,
"learning_rate": 7.62962962962963e-06,
"loss": 1.2642,
"step": 380
},
{
"epoch": 12.98360655737705,
"eval_accuracy": 0.48884165781083955,
"eval_f1": 0.48178502839341597,
"eval_loss": 1.315968632698059,
"eval_precision": 0.4952118784240597,
"eval_recall": 0.48884165781083955,
"eval_runtime": 0.3,
"eval_samples_per_second": 3136.49,
"eval_steps_per_second": 16.666,
"step": 396
},
{
"epoch": 13.114754098360656,
"grad_norm": 6.083697319030762,
"learning_rate": 7.444444444444445e-06,
"loss": 1.2688,
"step": 400
},
{
"epoch": 13.770491803278688,
"grad_norm": 7.939155101776123,
"learning_rate": 7.2592592592592605e-06,
"loss": 1.2395,
"step": 420
},
{
"epoch": 14.0,
"eval_accuracy": 0.5058448459086079,
"eval_f1": 0.5012340867825481,
"eval_loss": 1.3054472208023071,
"eval_precision": 0.5059260195308296,
"eval_recall": 0.5058448459086079,
"eval_runtime": 0.2951,
"eval_samples_per_second": 3189.098,
"eval_steps_per_second": 16.945,
"step": 427
},
{
"epoch": 14.426229508196721,
"grad_norm": 5.19802188873291,
"learning_rate": 7.074074074074074e-06,
"loss": 1.2324,
"step": 440
},
{
"epoch": 14.98360655737705,
"eval_accuracy": 0.4909670563230606,
"eval_f1": 0.48389179664553195,
"eval_loss": 1.3174266815185547,
"eval_precision": 0.4974190684341335,
"eval_recall": 0.4909670563230606,
"eval_runtime": 0.2911,
"eval_samples_per_second": 3232.523,
"eval_steps_per_second": 17.176,
"step": 457
},
{
"epoch": 15.081967213114755,
"grad_norm": 14.863390922546387,
"learning_rate": 6.88888888888889e-06,
"loss": 1.2294,
"step": 460
},
{
"epoch": 15.737704918032787,
"grad_norm": 6.973830699920654,
"learning_rate": 6.703703703703704e-06,
"loss": 1.2043,
"step": 480
},
{
"epoch": 16.0,
"eval_accuracy": 0.5079702444208289,
"eval_f1": 0.5009933439562343,
"eval_loss": 1.301389217376709,
"eval_precision": 0.5133376615246589,
"eval_recall": 0.5079702444208289,
"eval_runtime": 0.2897,
"eval_samples_per_second": 3248.415,
"eval_steps_per_second": 17.26,
"step": 488
},
{
"epoch": 16.39344262295082,
"grad_norm": 4.816354751586914,
"learning_rate": 6.51851851851852e-06,
"loss": 1.1878,
"step": 500
},
{
"epoch": 16.983606557377048,
"eval_accuracy": 0.5047821466524973,
"eval_f1": 0.5026293126534545,
"eval_loss": 1.3040825128555298,
"eval_precision": 0.5084106817664226,
"eval_recall": 0.5047821466524973,
"eval_runtime": 0.2972,
"eval_samples_per_second": 3166.554,
"eval_steps_per_second": 16.825,
"step": 518
},
{
"epoch": 17.049180327868854,
"grad_norm": 9.227392196655273,
"learning_rate": 6.333333333333333e-06,
"loss": 1.185,
"step": 520
},
{
"epoch": 17.704918032786885,
"grad_norm": 8.637321472167969,
"learning_rate": 6.148148148148149e-06,
"loss": 1.1744,
"step": 540
},
{
"epoch": 18.0,
"eval_accuracy": 0.49415515409139216,
"eval_f1": 0.4884746258823515,
"eval_loss": 1.3026150465011597,
"eval_precision": 0.4988838007681139,
"eval_recall": 0.49415515409139216,
"eval_runtime": 0.2868,
"eval_samples_per_second": 3280.605,
"eval_steps_per_second": 17.431,
"step": 549
},
{
"epoch": 18.360655737704917,
"grad_norm": 10.460613250732422,
"learning_rate": 5.962962962962963e-06,
"loss": 1.1621,
"step": 560
},
{
"epoch": 18.983606557377048,
"eval_accuracy": 0.5026567481402763,
"eval_f1": 0.49433902872030766,
"eval_loss": 1.3115041255950928,
"eval_precision": 0.5064079316801127,
"eval_recall": 0.5026567481402763,
"eval_runtime": 0.2887,
"eval_samples_per_second": 3259.021,
"eval_steps_per_second": 17.317,
"step": 579
},
{
"epoch": 19.016393442622952,
"grad_norm": 9.569828033447266,
"learning_rate": 5.777777777777778e-06,
"loss": 1.1794,
"step": 580
},
{
"epoch": 19.672131147540984,
"grad_norm": 9.484976768493652,
"learning_rate": 5.5925925925925926e-06,
"loss": 1.1453,
"step": 600
},
{
"epoch": 20.0,
"eval_accuracy": 0.49946865037194477,
"eval_f1": 0.4947569022574746,
"eval_loss": 1.3135700225830078,
"eval_precision": 0.5051709468240039,
"eval_recall": 0.49946865037194477,
"eval_runtime": 0.284,
"eval_samples_per_second": 3313.863,
"eval_steps_per_second": 17.608,
"step": 610
},
{
"epoch": 20.327868852459016,
"grad_norm": 6.509533405303955,
"learning_rate": 5.407407407407408e-06,
"loss": 1.1435,
"step": 620
},
{
"epoch": 20.983606557377048,
"grad_norm": 6.100685119628906,
"learning_rate": 5.2222222222222226e-06,
"loss": 1.1546,
"step": 640
},
{
"epoch": 20.983606557377048,
"eval_accuracy": 0.49309245483528164,
"eval_f1": 0.4888614092606576,
"eval_loss": 1.3327937126159668,
"eval_precision": 0.5027325713159697,
"eval_recall": 0.49309245483528164,
"eval_runtime": 0.288,
"eval_samples_per_second": 3267.438,
"eval_steps_per_second": 17.362,
"step": 640
},
{
"epoch": 21.639344262295083,
"grad_norm": 10.228110313415527,
"learning_rate": 5.037037037037037e-06,
"loss": 1.1118,
"step": 660
},
{
"epoch": 22.0,
"eval_accuracy": 0.5037194473963869,
"eval_f1": 0.49935836644717385,
"eval_loss": 1.3201266527175903,
"eval_precision": 0.5068092491618498,
"eval_recall": 0.5037194473963869,
"eval_runtime": 0.2806,
"eval_samples_per_second": 3353.599,
"eval_steps_per_second": 17.819,
"step": 671
},
{
"epoch": 22.295081967213115,
"grad_norm": 4.691425800323486,
"learning_rate": 4.851851851851852e-06,
"loss": 1.121,
"step": 680
},
{
"epoch": 22.950819672131146,
"grad_norm": 5.896801471710205,
"learning_rate": 4.666666666666667e-06,
"loss": 1.1013,
"step": 700
},
{
"epoch": 22.983606557377048,
"eval_accuracy": 0.5079702444208289,
"eval_f1": 0.5056149574862951,
"eval_loss": 1.3185617923736572,
"eval_precision": 0.5104193389071094,
"eval_recall": 0.5079702444208289,
"eval_runtime": 0.2841,
"eval_samples_per_second": 3312.06,
"eval_steps_per_second": 17.599,
"step": 701
},
{
"epoch": 23.60655737704918,
"grad_norm": 9.211535453796387,
"learning_rate": 4.481481481481482e-06,
"loss": 1.0909,
"step": 720
},
{
"epoch": 24.0,
"eval_accuracy": 0.5047821466524973,
"eval_f1": 0.5027837397043571,
"eval_loss": 1.3096483945846558,
"eval_precision": 0.5132526138930299,
"eval_recall": 0.5047821466524973,
"eval_runtime": 0.3019,
"eval_samples_per_second": 3116.922,
"eval_steps_per_second": 16.562,
"step": 732
},
{
"epoch": 24.262295081967213,
"grad_norm": 5.588741302490234,
"learning_rate": 4.296296296296296e-06,
"loss": 1.0904,
"step": 740
},
{
"epoch": 24.918032786885245,
"grad_norm": 7.369673728942871,
"learning_rate": 4.111111111111111e-06,
"loss": 1.0765,
"step": 760
},
{
"epoch": 24.983606557377048,
"eval_accuracy": 0.5079702444208289,
"eval_f1": 0.504151686335666,
"eval_loss": 1.3278100490570068,
"eval_precision": 0.5111957998187558,
"eval_recall": 0.5079702444208289,
"eval_runtime": 0.2837,
"eval_samples_per_second": 3316.497,
"eval_steps_per_second": 17.622,
"step": 762
},
{
"epoch": 25.57377049180328,
"grad_norm": 9.449226379394531,
"learning_rate": 3.925925925925926e-06,
"loss": 1.0687,
"step": 780
},
{
"epoch": 26.0,
"eval_accuracy": 0.5037194473963869,
"eval_f1": 0.501835797044231,
"eval_loss": 1.3304780721664429,
"eval_precision": 0.5109551773238672,
"eval_recall": 0.5037194473963869,
"eval_runtime": 0.2896,
"eval_samples_per_second": 3249.747,
"eval_steps_per_second": 17.268,
"step": 793
},
{
"epoch": 26.229508196721312,
"grad_norm": 5.422854423522949,
"learning_rate": 3.740740740740741e-06,
"loss": 1.0579,
"step": 800
},
{
"epoch": 26.885245901639344,
"grad_norm": 5.668990612030029,
"learning_rate": 3.555555555555556e-06,
"loss": 1.0544,
"step": 820
},
{
"epoch": 26.983606557377048,
"eval_accuracy": 0.5175345377258236,
"eval_f1": 0.5164541262908391,
"eval_loss": 1.318372130393982,
"eval_precision": 0.5223443720891333,
"eval_recall": 0.5175345377258236,
"eval_runtime": 0.302,
"eval_samples_per_second": 3115.635,
"eval_steps_per_second": 16.555,
"step": 823
},
{
"epoch": 27.540983606557376,
"grad_norm": 10.203471183776855,
"learning_rate": 3.3703703703703705e-06,
"loss": 1.0577,
"step": 840
},
{
"epoch": 28.0,
"eval_accuracy": 0.5069075451647184,
"eval_f1": 0.5033442589921326,
"eval_loss": 1.3318045139312744,
"eval_precision": 0.5084800819821859,
"eval_recall": 0.5069075451647184,
"eval_runtime": 0.2944,
"eval_samples_per_second": 3196.236,
"eval_steps_per_second": 16.983,
"step": 854
},
{
"epoch": 28.19672131147541,
"grad_norm": 6.67368221282959,
"learning_rate": 3.1851851851851855e-06,
"loss": 1.0434,
"step": 860
},
{
"epoch": 28.852459016393443,
"grad_norm": 6.8327765464782715,
"learning_rate": 3e-06,
"loss": 1.0475,
"step": 880
},
{
"epoch": 28.983606557377048,
"eval_accuracy": 0.51009564293305,
"eval_f1": 0.5074698366226925,
"eval_loss": 1.3202146291732788,
"eval_precision": 0.5157834747082791,
"eval_recall": 0.51009564293305,
"eval_runtime": 0.2822,
"eval_samples_per_second": 3334.68,
"eval_steps_per_second": 17.719,
"step": 884
},
{
"epoch": 29.508196721311474,
"grad_norm": 10.031432151794434,
"learning_rate": 2.814814814814815e-06,
"loss": 1.0312,
"step": 900
},
{
"epoch": 30.0,
"eval_accuracy": 0.5090329436769394,
"eval_f1": 0.5059785444183024,
"eval_loss": 1.343613862991333,
"eval_precision": 0.5104547305202234,
"eval_recall": 0.5090329436769394,
"eval_runtime": 0.2901,
"eval_samples_per_second": 3244.1,
"eval_steps_per_second": 17.238,
"step": 915
},
{
"epoch": 30.16393442622951,
"grad_norm": 5.074967861175537,
"learning_rate": 2.6296296296296297e-06,
"loss": 1.0248,
"step": 920
},
{
"epoch": 30.81967213114754,
"grad_norm": 7.508426189422607,
"learning_rate": 2.4444444444444447e-06,
"loss": 1.0231,
"step": 940
},
{
"epoch": 30.983606557377048,
"eval_accuracy": 0.5143464399574921,
"eval_f1": 0.5128246980549828,
"eval_loss": 1.3461003303527832,
"eval_precision": 0.5172606351162928,
"eval_recall": 0.5143464399574921,
"eval_runtime": 0.298,
"eval_samples_per_second": 3157.909,
"eval_steps_per_second": 16.78,
"step": 945
},
{
"epoch": 31.475409836065573,
"grad_norm": 12.130611419677734,
"learning_rate": 2.2592592592592592e-06,
"loss": 1.0185,
"step": 960
},
{
"epoch": 32.0,
"eval_accuracy": 0.5090329436769394,
"eval_f1": 0.5062984065924716,
"eval_loss": 1.3429207801818848,
"eval_precision": 0.5110311107411546,
"eval_recall": 0.5090329436769394,
"eval_runtime": 0.2991,
"eval_samples_per_second": 3145.81,
"eval_steps_per_second": 16.715,
"step": 976
},
{
"epoch": 32.131147540983605,
"grad_norm": 7.62930154800415,
"learning_rate": 2.0740740740740742e-06,
"loss": 0.9978,
"step": 980
},
{
"epoch": 32.78688524590164,
"grad_norm": 6.595398902893066,
"learning_rate": 1.888888888888889e-06,
"loss": 1.0102,
"step": 1000
},
{
"epoch": 32.98360655737705,
"eval_accuracy": 0.5143464399574921,
"eval_f1": 0.5128260191462034,
"eval_loss": 1.3501225709915161,
"eval_precision": 0.5164523478379722,
"eval_recall": 0.5143464399574921,
"eval_runtime": 0.2862,
"eval_samples_per_second": 3287.538,
"eval_steps_per_second": 17.468,
"step": 1006
},
{
"epoch": 33.442622950819676,
"grad_norm": 14.922042846679688,
"learning_rate": 1.7037037037037038e-06,
"loss": 1.0024,
"step": 1020
},
{
"epoch": 34.0,
"eval_accuracy": 0.5132837407013815,
"eval_f1": 0.5115794743472372,
"eval_loss": 1.3449465036392212,
"eval_precision": 0.518258292844791,
"eval_recall": 0.5132837407013815,
"eval_runtime": 0.2827,
"eval_samples_per_second": 3328.625,
"eval_steps_per_second": 17.687,
"step": 1037
},
{
"epoch": 34.09836065573771,
"grad_norm": 8.07007122039795,
"learning_rate": 1.5185185185185186e-06,
"loss": 0.997,
"step": 1040
},
{
"epoch": 34.75409836065574,
"grad_norm": 6.781075477600098,
"learning_rate": 1.3333333333333334e-06,
"loss": 0.9991,
"step": 1060
},
{
"epoch": 34.98360655737705,
"eval_accuracy": 0.5143464399574921,
"eval_f1": 0.5114723136353695,
"eval_loss": 1.3471170663833618,
"eval_precision": 0.5158292841288152,
"eval_recall": 0.5143464399574921,
"eval_runtime": 0.288,
"eval_samples_per_second": 3267.384,
"eval_steps_per_second": 17.361,
"step": 1067
},
{
"epoch": 35.40983606557377,
"grad_norm": 11.607426643371582,
"learning_rate": 1.1481481481481482e-06,
"loss": 0.983,
"step": 1080
},
{
"epoch": 36.0,
"eval_accuracy": 0.5079702444208289,
"eval_f1": 0.504843217181714,
"eval_loss": 1.3585803508758545,
"eval_precision": 0.509438648352314,
"eval_recall": 0.5079702444208289,
"eval_runtime": 0.2932,
"eval_samples_per_second": 3209.163,
"eval_steps_per_second": 17.052,
"step": 1098
},
{
"epoch": 36.0655737704918,
"grad_norm": 7.522489547729492,
"learning_rate": 9.62962962962963e-07,
"loss": 0.9771,
"step": 1100
},
{
"epoch": 36.721311475409834,
"grad_norm": 6.985760688781738,
"learning_rate": 7.777777777777779e-07,
"loss": 0.9827,
"step": 1120
},
{
"epoch": 36.98360655737705,
"eval_accuracy": 0.5058448459086079,
"eval_f1": 0.5029473061894668,
"eval_loss": 1.3584290742874146,
"eval_precision": 0.5075082082551082,
"eval_recall": 0.5058448459086079,
"eval_runtime": 0.286,
"eval_samples_per_second": 3289.897,
"eval_steps_per_second": 17.481,
"step": 1128
},
{
"epoch": 37.377049180327866,
"grad_norm": 15.159732818603516,
"learning_rate": 5.925925925925927e-07,
"loss": 0.9807,
"step": 1140
},
{
"epoch": 38.0,
"eval_accuracy": 0.5154091392136025,
"eval_f1": 0.5134611936291602,
"eval_loss": 1.3536242246627808,
"eval_precision": 0.5189235103268454,
"eval_recall": 0.5154091392136025,
"eval_runtime": 0.2878,
"eval_samples_per_second": 3269.755,
"eval_steps_per_second": 17.374,
"step": 1159
},
{
"epoch": 38.032786885245905,
"grad_norm": 8.501507759094238,
"learning_rate": 4.074074074074075e-07,
"loss": 0.9819,
"step": 1160
},
{
"epoch": 38.68852459016394,
"grad_norm": 7.350555419921875,
"learning_rate": 2.2222222222222224e-07,
"loss": 0.9698,
"step": 1180
},
{
"epoch": 38.98360655737705,
"eval_accuracy": 0.5143464399574921,
"eval_f1": 0.5126916822770045,
"eval_loss": 1.3546310663223267,
"eval_precision": 0.5184421351153233,
"eval_recall": 0.5143464399574921,
"eval_runtime": 0.2918,
"eval_samples_per_second": 3225.014,
"eval_steps_per_second": 17.136,
"step": 1189
},
{
"epoch": 39.34426229508197,
"grad_norm": 13.357161521911621,
"learning_rate": 3.703703703703704e-08,
"loss": 0.9792,
"step": 1200
},
{
"epoch": 39.34426229508197,
"eval_accuracy": 0.5185972369819342,
"eval_f1": 0.516675376095624,
"eval_loss": 1.353948950767517,
"eval_precision": 0.5216418875507168,
"eval_recall": 0.5185972369819342,
"eval_runtime": 0.2709,
"eval_samples_per_second": 3473.833,
"eval_steps_per_second": 18.458,
"step": 1200
}
],
"logging_steps": 20,
"max_steps": 1200,
"num_input_tokens_seen": 0,
"num_train_epochs": 40,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.515574105997312e+16,
"train_batch_size": 24,
"trial_name": null,
"trial_params": null
}