NER-bert-base-multilingual-cased / trainer_state.json
hts98's picture
End of training
2f33ce3
{
"best_metric": 0.624123043712898,
"best_model_checkpoint": "/tmp/test-ner1_/checkpoint-52109",
"epoch": 120.0,
"global_step": 58440,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"eval_accuracy": 0.741183488427583,
"eval_f1": 0.46964064436183395,
"eval_loss": 0.8133957386016846,
"eval_precision": 0.4221430162619737,
"eval_recall": 0.5291817927953085,
"eval_runtime": 2.7713,
"eval_samples_per_second": 401.25,
"eval_steps_per_second": 25.259,
"step": 487
},
{
"epoch": 1.03,
"learning_rate": 2.9743326488706367e-05,
"loss": 1.0691,
"step": 500
},
{
"epoch": 2.0,
"eval_accuracy": 0.7599856836077309,
"eval_f1": 0.5087176950661556,
"eval_loss": 0.7439278960227966,
"eval_precision": 0.4565024411895251,
"eval_recall": 0.5744205529181793,
"eval_runtime": 2.7397,
"eval_samples_per_second": 405.878,
"eval_steps_per_second": 25.55,
"step": 974
},
{
"epoch": 2.05,
"learning_rate": 2.9486652977412733e-05,
"loss": 0.6796,
"step": 1000
},
{
"epoch": 3.0,
"eval_accuracy": 0.7499642090193271,
"eval_f1": 0.5268402042595591,
"eval_loss": 0.8020838499069214,
"eval_precision": 0.4754946043165468,
"eval_recall": 0.5906171460485897,
"eval_runtime": 2.7638,
"eval_samples_per_second": 402.346,
"eval_steps_per_second": 25.328,
"step": 1461
},
{
"epoch": 3.08,
"learning_rate": 2.9229979466119096e-05,
"loss": 0.5266,
"step": 1500
},
{
"epoch": 4.0,
"eval_accuracy": 0.7569315199236459,
"eval_f1": 0.5452078450721599,
"eval_loss": 0.826590895652771,
"eval_precision": 0.4882898806893504,
"eval_recall": 0.6171460485897794,
"eval_runtime": 2.7581,
"eval_samples_per_second": 403.178,
"eval_steps_per_second": 25.38,
"step": 1948
},
{
"epoch": 4.11,
"learning_rate": 2.8973305954825462e-05,
"loss": 0.4087,
"step": 2000
},
{
"epoch": 5.0,
"eval_accuracy": 0.7584347411119065,
"eval_f1": 0.5577331169641744,
"eval_loss": 0.8819655776023865,
"eval_precision": 0.5042889390519187,
"eval_recall": 0.623848087126501,
"eval_runtime": 2.7608,
"eval_samples_per_second": 402.775,
"eval_steps_per_second": 25.355,
"step": 2435
},
{
"epoch": 5.13,
"learning_rate": 2.8716632443531828e-05,
"loss": 0.314,
"step": 2500
},
{
"epoch": 6.0,
"eval_accuracy": 0.7587687902648532,
"eval_f1": 0.5619107479572597,
"eval_loss": 0.8884239196777344,
"eval_precision": 0.5109739368998628,
"eval_recall": 0.6241273387321977,
"eval_runtime": 2.7416,
"eval_samples_per_second": 405.6,
"eval_steps_per_second": 25.532,
"step": 2922
},
{
"epoch": 6.16,
"learning_rate": 2.8459958932238194e-05,
"loss": 0.254,
"step": 3000
},
{
"epoch": 7.0,
"eval_accuracy": 0.7592937246480553,
"eval_f1": 0.5628216392619556,
"eval_loss": 0.9709771275520325,
"eval_precision": 0.511171910624715,
"eval_recall": 0.6260820999720749,
"eval_runtime": 2.7411,
"eval_samples_per_second": 405.673,
"eval_steps_per_second": 25.537,
"step": 3409
},
{
"epoch": 7.19,
"learning_rate": 2.8203285420944557e-05,
"loss": 0.2096,
"step": 3500
},
{
"epoch": 8.0,
"eval_accuracy": 0.7622285850632308,
"eval_f1": 0.5648183075568968,
"eval_loss": 1.0743454694747925,
"eval_precision": 0.5137236962488564,
"eval_recall": 0.6271991063948618,
"eval_runtime": 2.7561,
"eval_samples_per_second": 403.467,
"eval_steps_per_second": 25.398,
"step": 3896
},
{
"epoch": 8.21,
"learning_rate": 2.7946611909650923e-05,
"loss": 0.1786,
"step": 4000
},
{
"epoch": 9.0,
"eval_accuracy": 0.7570985445001193,
"eval_f1": 0.5668016194331984,
"eval_loss": 1.1285585165023804,
"eval_precision": 0.5181586860976174,
"eval_recall": 0.6255235967606814,
"eval_runtime": 2.7449,
"eval_samples_per_second": 405.112,
"eval_steps_per_second": 25.502,
"step": 4383
},
{
"epoch": 9.24,
"learning_rate": 2.7689938398357292e-05,
"loss": 0.1486,
"step": 4500
},
{
"epoch": 10.0,
"eval_accuracy": 0.7544738725841088,
"eval_f1": 0.5723700887198986,
"eval_loss": 1.1630432605743408,
"eval_precision": 0.5240194940821536,
"eval_recall": 0.6305501256632225,
"eval_runtime": 2.8838,
"eval_samples_per_second": 385.605,
"eval_steps_per_second": 24.274,
"step": 4870
},
{
"epoch": 10.27,
"learning_rate": 2.743326488706366e-05,
"loss": 0.132,
"step": 5000
},
{
"epoch": 11.0,
"eval_accuracy": 0.7605821999522787,
"eval_f1": 0.5760409993593851,
"eval_loss": 1.1933799982070923,
"eval_precision": 0.5321969696969697,
"eval_recall": 0.6277576096062553,
"eval_runtime": 2.7538,
"eval_samples_per_second": 403.799,
"eval_steps_per_second": 25.419,
"step": 5357
},
{
"epoch": 11.29,
"learning_rate": 2.717659137577002e-05,
"loss": 0.1098,
"step": 5500
},
{
"epoch": 12.0,
"eval_accuracy": 0.760200429491768,
"eval_f1": 0.5755844155844156,
"eval_loss": 1.1861658096313477,
"eval_precision": 0.5379946588977907,
"eval_recall": 0.6188215582239598,
"eval_runtime": 2.7422,
"eval_samples_per_second": 405.52,
"eval_steps_per_second": 25.527,
"step": 5844
},
{
"epoch": 12.32,
"learning_rate": 2.6919917864476387e-05,
"loss": 0.094,
"step": 6000
},
{
"epoch": 13.0,
"eval_accuracy": 0.7506084466714388,
"eval_f1": 0.5764092123679858,
"eval_loss": 1.3724055290222168,
"eval_precision": 0.52945301542777,
"eval_recall": 0.6325048869030997,
"eval_runtime": 2.7524,
"eval_samples_per_second": 404.018,
"eval_steps_per_second": 25.433,
"step": 6331
},
{
"epoch": 13.35,
"learning_rate": 2.6663244353182754e-05,
"loss": 0.084,
"step": 6500
},
{
"epoch": 14.0,
"eval_accuracy": 0.7531615366261035,
"eval_f1": 0.574173712528824,
"eval_loss": 1.374582290649414,
"eval_precision": 0.530414201183432,
"eval_recall": 0.6258028483663781,
"eval_runtime": 2.7617,
"eval_samples_per_second": 402.648,
"eval_steps_per_second": 25.347,
"step": 6818
},
{
"epoch": 14.37,
"learning_rate": 2.640657084188912e-05,
"loss": 0.0758,
"step": 7000
},
{
"epoch": 15.0,
"eval_accuracy": 0.7581245526127416,
"eval_f1": 0.5684922922672014,
"eval_loss": 1.3000197410583496,
"eval_precision": 0.5156889495225102,
"eval_recall": 0.6333426417201898,
"eval_runtime": 2.7658,
"eval_samples_per_second": 402.06,
"eval_steps_per_second": 25.31,
"step": 7305
},
{
"epoch": 15.4,
"learning_rate": 2.6149897330595482e-05,
"loss": 0.0694,
"step": 7500
},
{
"epoch": 16.0,
"eval_accuracy": 0.7593175853018372,
"eval_f1": 0.5867220995192932,
"eval_loss": 1.4194592237472534,
"eval_precision": 0.5485908649173955,
"eval_recall": 0.6305501256632225,
"eval_runtime": 2.7493,
"eval_samples_per_second": 404.466,
"eval_steps_per_second": 25.461,
"step": 7792
},
{
"epoch": 16.43,
"learning_rate": 2.589322381930185e-05,
"loss": 0.062,
"step": 8000
},
{
"epoch": 17.0,
"eval_accuracy": 0.7465521355285135,
"eval_f1": 0.5717906475731847,
"eval_loss": 1.4974385499954224,
"eval_precision": 0.5234338747099768,
"eval_recall": 0.629991622451829,
"eval_runtime": 2.7551,
"eval_samples_per_second": 403.608,
"eval_steps_per_second": 25.407,
"step": 8279
},
{
"epoch": 17.45,
"learning_rate": 2.5636550308008215e-05,
"loss": 0.0543,
"step": 8500
},
{
"epoch": 18.0,
"eval_accuracy": 0.7567644953471725,
"eval_f1": 0.5741626794258373,
"eval_loss": 1.5013597011566162,
"eval_precision": 0.5346820809248555,
"eval_recall": 0.6199385646467467,
"eval_runtime": 2.7536,
"eval_samples_per_second": 403.832,
"eval_steps_per_second": 25.421,
"step": 8766
},
{
"epoch": 18.48,
"learning_rate": 2.537987679671458e-05,
"loss": 0.0471,
"step": 9000
},
{
"epoch": 19.0,
"eval_accuracy": 0.7546408971605822,
"eval_f1": 0.5768982020437201,
"eval_loss": 1.5164920091629028,
"eval_precision": 0.5373493975903615,
"eval_recall": 0.6227310807037141,
"eval_runtime": 2.7556,
"eval_samples_per_second": 403.547,
"eval_steps_per_second": 25.403,
"step": 9253
},
{
"epoch": 19.51,
"learning_rate": 2.5123203285420943e-05,
"loss": 0.0449,
"step": 9500
},
{
"epoch": 20.0,
"eval_accuracy": 0.7568122166547363,
"eval_f1": 0.5733962504782553,
"eval_loss": 1.5718824863433838,
"eval_precision": 0.5276995305164319,
"eval_recall": 0.6277576096062553,
"eval_runtime": 2.7667,
"eval_samples_per_second": 401.921,
"eval_steps_per_second": 25.301,
"step": 9740
},
{
"epoch": 20.53,
"learning_rate": 2.486652977412731e-05,
"loss": 0.0451,
"step": 10000
},
{
"epoch": 21.0,
"eval_accuracy": 0.7607253638749701,
"eval_f1": 0.5917858548746885,
"eval_loss": 1.5306912660598755,
"eval_precision": 0.5581683168316832,
"eval_recall": 0.6297123708461324,
"eval_runtime": 2.7618,
"eval_samples_per_second": 402.633,
"eval_steps_per_second": 25.346,
"step": 10227
},
{
"epoch": 21.56,
"learning_rate": 2.4609856262833676e-05,
"loss": 0.039,
"step": 10500
},
{
"epoch": 22.0,
"eval_accuracy": 0.7572417084228108,
"eval_f1": 0.5844206174912802,
"eval_loss": 1.5783377885818481,
"eval_precision": 0.54375,
"eval_recall": 0.6316671320860094,
"eval_runtime": 2.7632,
"eval_samples_per_second": 402.427,
"eval_steps_per_second": 25.333,
"step": 10714
},
{
"epoch": 22.59,
"learning_rate": 2.4353182751540042e-05,
"loss": 0.0363,
"step": 11000
},
{
"epoch": 23.0,
"eval_accuracy": 0.7541875447387258,
"eval_f1": 0.5802802416763081,
"eval_loss": 1.6342318058013916,
"eval_precision": 0.5376369699857074,
"eval_recall": 0.6302708740575258,
"eval_runtime": 2.7447,
"eval_samples_per_second": 405.151,
"eval_steps_per_second": 25.504,
"step": 11201
},
{
"epoch": 23.61,
"learning_rate": 2.4096509240246405e-05,
"loss": 0.0326,
"step": 11500
},
{
"epoch": 24.0,
"eval_accuracy": 0.7596993557623479,
"eval_f1": 0.5911304118963022,
"eval_loss": 1.641722321510315,
"eval_precision": 0.5589845694375312,
"eval_recall": 0.6271991063948618,
"eval_runtime": 2.7546,
"eval_samples_per_second": 403.689,
"eval_steps_per_second": 25.412,
"step": 11688
},
{
"epoch": 24.64,
"learning_rate": 2.383983572895277e-05,
"loss": 0.0296,
"step": 12000
},
{
"epoch": 25.0,
"eval_accuracy": 0.7586733476497256,
"eval_f1": 0.5861406430126809,
"eval_loss": 1.6684845685958862,
"eval_precision": 0.5414103170847137,
"eval_recall": 0.6389276738341245,
"eval_runtime": 2.8726,
"eval_samples_per_second": 387.108,
"eval_steps_per_second": 24.368,
"step": 12175
},
{
"epoch": 25.67,
"learning_rate": 2.3583162217659137e-05,
"loss": 0.0283,
"step": 12500
},
{
"epoch": 26.0,
"eval_accuracy": 0.760200429491768,
"eval_f1": 0.5926797385620914,
"eval_loss": 1.7346807718276978,
"eval_precision": 0.5571393462767265,
"eval_recall": 0.6330633901144932,
"eval_runtime": 2.7525,
"eval_samples_per_second": 404.0,
"eval_steps_per_second": 25.432,
"step": 12662
},
{
"epoch": 26.69,
"learning_rate": 2.3326488706365506e-05,
"loss": 0.0277,
"step": 13000
},
{
"epoch": 27.0,
"eval_accuracy": 0.7632068718682892,
"eval_f1": 0.6025674613570867,
"eval_loss": 1.6559849977493286,
"eval_precision": 0.5674808783617074,
"eval_recall": 0.6422786931024853,
"eval_runtime": 2.7331,
"eval_samples_per_second": 406.857,
"eval_steps_per_second": 25.612,
"step": 13149
},
{
"epoch": 27.72,
"learning_rate": 2.306981519507187e-05,
"loss": 0.025,
"step": 13500
},
{
"epoch": 28.0,
"eval_accuracy": 0.7613934621808638,
"eval_f1": 0.6024861147844485,
"eval_loss": 1.7496830224990845,
"eval_precision": 0.5722180356694297,
"eval_recall": 0.6361351577771572,
"eval_runtime": 2.7227,
"eval_samples_per_second": 408.425,
"eval_steps_per_second": 25.71,
"step": 13636
},
{
"epoch": 28.75,
"learning_rate": 2.2813141683778235e-05,
"loss": 0.0241,
"step": 14000
},
{
"epoch": 29.0,
"eval_accuracy": 0.7637795275590551,
"eval_f1": 0.5988181221273802,
"eval_loss": 1.710971474647522,
"eval_precision": 0.5651958353991076,
"eval_recall": 0.6366936609885507,
"eval_runtime": 2.7599,
"eval_samples_per_second": 402.917,
"eval_steps_per_second": 25.363,
"step": 14123
},
{
"epoch": 29.77,
"learning_rate": 2.25564681724846e-05,
"loss": 0.0242,
"step": 14500
},
{
"epoch": 30.0,
"eval_accuracy": 0.7647339537103317,
"eval_f1": 0.5951438374241224,
"eval_loss": 1.794677734375,
"eval_precision": 0.5641731298473855,
"eval_recall": 0.6297123708461324,
"eval_runtime": 2.7628,
"eval_samples_per_second": 402.492,
"eval_steps_per_second": 25.337,
"step": 14610
},
{
"epoch": 30.8,
"learning_rate": 2.2299794661190967e-05,
"loss": 0.0219,
"step": 15000
},
{
"epoch": 31.0,
"eval_accuracy": 0.7565258888093533,
"eval_f1": 0.592573084013695,
"eval_loss": 1.8282643556594849,
"eval_precision": 0.5606777971592325,
"eval_recall": 0.6283161128176487,
"eval_runtime": 2.7362,
"eval_samples_per_second": 406.407,
"eval_steps_per_second": 25.583,
"step": 15097
},
{
"epoch": 31.83,
"learning_rate": 2.204312114989733e-05,
"loss": 0.0193,
"step": 15500
},
{
"epoch": 32.0,
"eval_accuracy": 0.7648293963254593,
"eval_f1": 0.5969198088157196,
"eval_loss": 1.8161470890045166,
"eval_precision": 0.568969881042774,
"eval_recall": 0.6277576096062553,
"eval_runtime": 2.7545,
"eval_samples_per_second": 403.709,
"eval_steps_per_second": 25.413,
"step": 15584
},
{
"epoch": 32.85,
"learning_rate": 2.1786447638603696e-05,
"loss": 0.0185,
"step": 16000
},
{
"epoch": 33.0,
"eval_accuracy": 0.7608685277976617,
"eval_f1": 0.5930080876597965,
"eval_loss": 1.846176028251648,
"eval_precision": 0.5564259485924112,
"eval_recall": 0.6347388997486736,
"eval_runtime": 2.7276,
"eval_samples_per_second": 407.69,
"eval_steps_per_second": 25.664,
"step": 16071
},
{
"epoch": 33.88,
"learning_rate": 2.1529774127310062e-05,
"loss": 0.0195,
"step": 16500
},
{
"epoch": 34.0,
"eval_accuracy": 0.755810069195896,
"eval_f1": 0.5868997912317329,
"eval_loss": 1.9018374681472778,
"eval_precision": 0.5508204751408278,
"eval_recall": 0.628036861211952,
"eval_runtime": 2.7537,
"eval_samples_per_second": 403.819,
"eval_steps_per_second": 25.42,
"step": 16558
},
{
"epoch": 34.91,
"learning_rate": 2.127310061601643e-05,
"loss": 0.0181,
"step": 17000
},
{
"epoch": 35.0,
"eval_accuracy": 0.7597470770699117,
"eval_f1": 0.5975321606720924,
"eval_loss": 1.8523436784744263,
"eval_precision": 0.5637849888531088,
"eval_recall": 0.6355766545657637,
"eval_runtime": 2.7278,
"eval_samples_per_second": 407.651,
"eval_steps_per_second": 25.661,
"step": 17045
},
{
"epoch": 35.93,
"learning_rate": 2.1016427104722795e-05,
"loss": 0.0182,
"step": 17500
},
{
"epoch": 36.0,
"eval_accuracy": 0.7611309949892627,
"eval_f1": 0.6036228023441662,
"eval_loss": 1.8343547582626343,
"eval_precision": 0.5770308123249299,
"eval_recall": 0.6327841385087964,
"eval_runtime": 2.7612,
"eval_samples_per_second": 402.717,
"eval_steps_per_second": 25.351,
"step": 17532
},
{
"epoch": 36.96,
"learning_rate": 2.0759753593429157e-05,
"loss": 0.0153,
"step": 18000
},
{
"epoch": 37.0,
"eval_accuracy": 0.7668814125507039,
"eval_f1": 0.603166156711454,
"eval_loss": 1.8464767932891846,
"eval_precision": 0.5759654471544715,
"eval_recall": 0.6330633901144932,
"eval_runtime": 2.7392,
"eval_samples_per_second": 405.958,
"eval_steps_per_second": 25.555,
"step": 18019
},
{
"epoch": 37.99,
"learning_rate": 2.0503080082135523e-05,
"loss": 0.0142,
"step": 18500
},
{
"epoch": 38.0,
"eval_accuracy": 0.7632068718682892,
"eval_f1": 0.5945442448436461,
"eval_loss": 1.8911150693893433,
"eval_precision": 0.5678698525673614,
"eval_recall": 0.623848087126501,
"eval_runtime": 2.7453,
"eval_samples_per_second": 405.06,
"eval_steps_per_second": 25.498,
"step": 18506
},
{
"epoch": 39.0,
"eval_accuracy": 0.7622763063707946,
"eval_f1": 0.6007257089100927,
"eval_loss": 1.8849174976348877,
"eval_precision": 0.5790155440414507,
"eval_recall": 0.6241273387321977,
"eval_runtime": 2.7696,
"eval_samples_per_second": 401.495,
"eval_steps_per_second": 25.274,
"step": 18993
},
{
"epoch": 39.01,
"learning_rate": 2.024640657084189e-05,
"loss": 0.0151,
"step": 19000
},
{
"epoch": 40.0,
"eval_accuracy": 0.7665235027439752,
"eval_f1": 0.5976520811099254,
"eval_loss": 1.8399417400360107,
"eval_precision": 0.5721583652618135,
"eval_recall": 0.6255235967606814,
"eval_runtime": 2.8345,
"eval_samples_per_second": 392.315,
"eval_steps_per_second": 24.696,
"step": 19480
},
{
"epoch": 40.04,
"learning_rate": 1.9989733059548256e-05,
"loss": 0.0148,
"step": 19500
},
{
"epoch": 41.0,
"eval_accuracy": 0.7649248389405869,
"eval_f1": 0.596647742633144,
"eval_loss": 1.843032956123352,
"eval_precision": 0.5782027770500393,
"eval_recall": 0.6163082937726891,
"eval_runtime": 2.7539,
"eval_samples_per_second": 403.796,
"eval_steps_per_second": 25.419,
"step": 19967
},
{
"epoch": 41.07,
"learning_rate": 1.973305954825462e-05,
"loss": 0.0138,
"step": 20000
},
{
"epoch": 42.0,
"eval_accuracy": 0.7691481746599856,
"eval_f1": 0.5887899423782085,
"eval_loss": 1.8764090538024902,
"eval_precision": 0.5543773119605425,
"eval_recall": 0.6277576096062553,
"eval_runtime": 2.7704,
"eval_samples_per_second": 401.388,
"eval_steps_per_second": 25.267,
"step": 20454
},
{
"epoch": 42.09,
"learning_rate": 1.9476386036960984e-05,
"loss": 0.0147,
"step": 20500
},
{
"epoch": 43.0,
"eval_accuracy": 0.7666428060128847,
"eval_f1": 0.6014559894109861,
"eval_loss": 1.9270243644714355,
"eval_precision": 0.571716155007549,
"eval_recall": 0.6344596481429768,
"eval_runtime": 2.7281,
"eval_samples_per_second": 407.612,
"eval_steps_per_second": 25.659,
"step": 20941
},
{
"epoch": 43.12,
"learning_rate": 1.921971252566735e-05,
"loss": 0.0148,
"step": 21000
},
{
"epoch": 44.0,
"eval_accuracy": 0.7710570269625387,
"eval_f1": 0.5908850026497084,
"eval_loss": 1.8888484239578247,
"eval_precision": 0.5621376354928157,
"eval_recall": 0.6227310807037141,
"eval_runtime": 2.775,
"eval_samples_per_second": 400.718,
"eval_steps_per_second": 25.225,
"step": 21428
},
{
"epoch": 44.15,
"learning_rate": 1.896303901437372e-05,
"loss": 0.0123,
"step": 21500
},
{
"epoch": 45.0,
"eval_accuracy": 0.7653066094010976,
"eval_f1": 0.5868878357030016,
"eval_loss": 1.8992524147033691,
"eval_precision": 0.5551681195516812,
"eval_recall": 0.6224518290980173,
"eval_runtime": 2.7417,
"eval_samples_per_second": 405.586,
"eval_steps_per_second": 25.531,
"step": 21915
},
{
"epoch": 45.17,
"learning_rate": 1.8706365503080083e-05,
"loss": 0.0115,
"step": 22000
},
{
"epoch": 46.0,
"eval_accuracy": 0.7644714865187306,
"eval_f1": 0.597897503285151,
"eval_loss": 1.947526216506958,
"eval_precision": 0.5646562422437329,
"eval_recall": 0.635297402960067,
"eval_runtime": 2.7396,
"eval_samples_per_second": 405.892,
"eval_steps_per_second": 25.551,
"step": 22402
},
{
"epoch": 46.2,
"learning_rate": 1.844969199178645e-05,
"loss": 0.0107,
"step": 22500
},
{
"epoch": 47.0,
"eval_accuracy": 0.7674302075876879,
"eval_f1": 0.605424089337942,
"eval_loss": 1.994935393333435,
"eval_precision": 0.5777721390510023,
"eval_recall": 0.6358559061714605,
"eval_runtime": 2.7286,
"eval_samples_per_second": 407.535,
"eval_steps_per_second": 25.654,
"step": 22889
},
{
"epoch": 47.23,
"learning_rate": 1.8193018480492815e-05,
"loss": 0.0098,
"step": 23000
},
{
"epoch": 48.0,
"eval_accuracy": 0.7680744452397996,
"eval_f1": 0.5976063829787234,
"eval_loss": 1.9606735706329346,
"eval_precision": 0.5704493526275705,
"eval_recall": 0.6274783580005585,
"eval_runtime": 2.772,
"eval_samples_per_second": 401.157,
"eval_steps_per_second": 25.253,
"step": 23376
},
{
"epoch": 48.25,
"learning_rate": 1.793634496919918e-05,
"loss": 0.012,
"step": 23500
},
{
"epoch": 49.0,
"eval_accuracy": 0.7675733715103794,
"eval_f1": 0.6134034165571617,
"eval_loss": 1.918538212776184,
"eval_precision": 0.5793000744601638,
"eval_recall": 0.6517732476961743,
"eval_runtime": 2.7319,
"eval_samples_per_second": 407.049,
"eval_steps_per_second": 25.624,
"step": 23863
},
{
"epoch": 49.28,
"learning_rate": 1.7679671457905544e-05,
"loss": 0.0117,
"step": 24000
},
{
"epoch": 50.0,
"eval_accuracy": 0.7698401336196612,
"eval_f1": 0.6049822064056939,
"eval_loss": 1.9814343452453613,
"eval_precision": 0.572890664003994,
"eval_recall": 0.6408824350740017,
"eval_runtime": 2.761,
"eval_samples_per_second": 402.752,
"eval_steps_per_second": 25.353,
"step": 24350
},
{
"epoch": 50.31,
"learning_rate": 1.742299794661191e-05,
"loss": 0.0093,
"step": 24500
},
{
"epoch": 51.0,
"eval_accuracy": 0.7662371748985922,
"eval_f1": 0.6067415730337078,
"eval_loss": 2.035374164581299,
"eval_precision": 0.5760542168674698,
"eval_recall": 0.6408824350740017,
"eval_runtime": 2.7628,
"eval_samples_per_second": 402.494,
"eval_steps_per_second": 25.337,
"step": 24837
},
{
"epoch": 51.33,
"learning_rate": 1.7166324435318276e-05,
"loss": 0.0082,
"step": 25000
},
{
"epoch": 52.0,
"eval_accuracy": 0.7683130517776187,
"eval_f1": 0.6179188429087987,
"eval_loss": 1.9876421689987183,
"eval_precision": 0.5936695831188883,
"eval_recall": 0.6442334543423625,
"eval_runtime": 2.751,
"eval_samples_per_second": 404.224,
"eval_steps_per_second": 25.446,
"step": 25324
},
{
"epoch": 52.36,
"learning_rate": 1.6909650924024642e-05,
"loss": 0.0077,
"step": 25500
},
{
"epoch": 53.0,
"eval_accuracy": 0.7691481746599856,
"eval_f1": 0.6208498428747096,
"eval_loss": 2.0615577697753906,
"eval_precision": 0.6078116639914393,
"eval_recall": 0.6344596481429768,
"eval_runtime": 2.7544,
"eval_samples_per_second": 403.721,
"eval_steps_per_second": 25.414,
"step": 25811
},
{
"epoch": 53.39,
"learning_rate": 1.6652977412731005e-05,
"loss": 0.0087,
"step": 26000
},
{
"epoch": 54.0,
"eval_accuracy": 0.7652588880935338,
"eval_f1": 0.5977975878342947,
"eval_loss": 1.9790315628051758,
"eval_precision": 0.5633802816901409,
"eval_recall": 0.6366936609885507,
"eval_runtime": 2.7387,
"eval_samples_per_second": 406.036,
"eval_steps_per_second": 25.56,
"step": 26298
},
{
"epoch": 54.41,
"learning_rate": 1.639630390143737e-05,
"loss": 0.0102,
"step": 26500
},
{
"epoch": 55.0,
"eval_accuracy": 0.7677642567406346,
"eval_f1": 0.6056356660933986,
"eval_loss": 2.068793773651123,
"eval_precision": 0.5754147812971342,
"eval_recall": 0.6392069254398213,
"eval_runtime": 2.8605,
"eval_samples_per_second": 388.746,
"eval_steps_per_second": 24.471,
"step": 26785
},
{
"epoch": 55.44,
"learning_rate": 1.6139630390143737e-05,
"loss": 0.0073,
"step": 27000
},
{
"epoch": 56.0,
"eval_accuracy": 0.7678596993557624,
"eval_f1": 0.6073495759860008,
"eval_loss": 1.960081696510315,
"eval_precision": 0.5862785862785863,
"eval_recall": 0.629991622451829,
"eval_runtime": 2.7429,
"eval_samples_per_second": 405.41,
"eval_steps_per_second": 25.52,
"step": 27272
},
{
"epoch": 56.47,
"learning_rate": 1.5882956878850103e-05,
"loss": 0.0087,
"step": 27500
},
{
"epoch": 57.0,
"eval_accuracy": 0.7683369124314006,
"eval_f1": 0.6085343228200372,
"eval_loss": 2.0414817333221436,
"eval_precision": 0.5790668348045397,
"eval_recall": 0.6411616866796984,
"eval_runtime": 2.7325,
"eval_samples_per_second": 406.95,
"eval_steps_per_second": 25.617,
"step": 27759
},
{
"epoch": 57.49,
"learning_rate": 1.5626283367556466e-05,
"loss": 0.0082,
"step": 28000
},
{
"epoch": 58.0,
"eval_accuracy": 0.766571224051539,
"eval_f1": 0.6019978969505784,
"eval_loss": 2.077375888824463,
"eval_precision": 0.5686615346411721,
"eval_recall": 0.639486177045518,
"eval_runtime": 2.7299,
"eval_samples_per_second": 407.338,
"eval_steps_per_second": 25.642,
"step": 28246
},
{
"epoch": 58.52,
"learning_rate": 1.5369609856262832e-05,
"loss": 0.0056,
"step": 28500
},
{
"epoch": 59.0,
"eval_accuracy": 0.7637318062514913,
"eval_f1": 0.6061579651941098,
"eval_loss": 2.077326536178589,
"eval_precision": 0.5821547955772692,
"eval_recall": 0.6322256352974029,
"eval_runtime": 2.7596,
"eval_samples_per_second": 402.96,
"eval_steps_per_second": 25.366,
"step": 28733
},
{
"epoch": 59.55,
"learning_rate": 1.5112936344969198e-05,
"loss": 0.0076,
"step": 29000
},
{
"epoch": 60.0,
"eval_accuracy": 0.7695299451204963,
"eval_f1": 0.6170241467691892,
"eval_loss": 2.104527235031128,
"eval_precision": 0.5968162839248434,
"eval_recall": 0.6386484222284278,
"eval_runtime": 2.73,
"eval_samples_per_second": 407.333,
"eval_steps_per_second": 25.641,
"step": 29220
},
{
"epoch": 60.57,
"learning_rate": 1.4856262833675564e-05,
"loss": 0.0071,
"step": 29500
},
{
"epoch": 61.0,
"eval_accuracy": 0.768241469816273,
"eval_f1": 0.6094618408567168,
"eval_loss": 2.0994060039520264,
"eval_precision": 0.5922023182297155,
"eval_recall": 0.6277576096062553,
"eval_runtime": 2.7589,
"eval_samples_per_second": 403.058,
"eval_steps_per_second": 25.372,
"step": 29707
},
{
"epoch": 61.6,
"learning_rate": 1.459958932238193e-05,
"loss": 0.0076,
"step": 30000
},
{
"epoch": 62.0,
"eval_accuracy": 0.7649725602481507,
"eval_f1": 0.609375,
"eval_loss": 2.0936837196350098,
"eval_precision": 0.5794510198942332,
"eval_recall": 0.6425579447081821,
"eval_runtime": 2.7348,
"eval_samples_per_second": 406.615,
"eval_steps_per_second": 25.596,
"step": 30194
},
{
"epoch": 62.63,
"learning_rate": 1.4342915811088295e-05,
"loss": 0.0082,
"step": 30500
},
{
"epoch": 63.0,
"eval_accuracy": 0.7682653304700549,
"eval_f1": 0.6062616078535421,
"eval_loss": 2.0307247638702393,
"eval_precision": 0.5774576699519838,
"eval_recall": 0.6380899190170344,
"eval_runtime": 2.7517,
"eval_samples_per_second": 404.118,
"eval_steps_per_second": 25.439,
"step": 30681
},
{
"epoch": 63.66,
"learning_rate": 1.4086242299794661e-05,
"loss": 0.0068,
"step": 31000
},
{
"epoch": 64.0,
"eval_accuracy": 0.7597232164161298,
"eval_f1": 0.6074766355140186,
"eval_loss": 2.1657230854034424,
"eval_precision": 0.581990278843694,
"eval_recall": 0.635297402960067,
"eval_runtime": 2.7205,
"eval_samples_per_second": 408.743,
"eval_steps_per_second": 25.73,
"step": 31168
},
{
"epoch": 64.68,
"learning_rate": 1.3829568788501027e-05,
"loss": 0.0065,
"step": 31500
},
{
"epoch": 65.0,
"eval_accuracy": 0.7691720353137675,
"eval_f1": 0.6134431455897981,
"eval_loss": 2.0141701698303223,
"eval_precision": 0.5850012667848999,
"eval_recall": 0.644791957553756,
"eval_runtime": 2.7583,
"eval_samples_per_second": 403.142,
"eval_steps_per_second": 25.378,
"step": 31655
},
{
"epoch": 65.71,
"learning_rate": 1.3572895277207393e-05,
"loss": 0.0062,
"step": 32000
},
{
"epoch": 66.0,
"eval_accuracy": 0.76022429014555,
"eval_f1": 0.6064225053078556,
"eval_loss": 2.1378581523895264,
"eval_precision": 0.5777496839443742,
"eval_recall": 0.6380899190170344,
"eval_runtime": 2.7502,
"eval_samples_per_second": 404.334,
"eval_steps_per_second": 25.453,
"step": 32142
},
{
"epoch": 66.74,
"learning_rate": 1.331622176591376e-05,
"loss": 0.0059,
"step": 32500
},
{
"epoch": 67.0,
"eval_accuracy": 0.7631352899069435,
"eval_f1": 0.6117240462581417,
"eval_loss": 2.1318540573120117,
"eval_precision": 0.5837138508371386,
"eval_recall": 0.6425579447081821,
"eval_runtime": 2.7357,
"eval_samples_per_second": 406.471,
"eval_steps_per_second": 25.587,
"step": 32629
},
{
"epoch": 67.76,
"learning_rate": 1.3059548254620124e-05,
"loss": 0.0053,
"step": 33000
},
{
"epoch": 68.0,
"eval_accuracy": 0.768217609162491,
"eval_f1": 0.6046449900464498,
"eval_loss": 2.1246144771575928,
"eval_precision": 0.5761254425897825,
"eval_recall": 0.6361351577771572,
"eval_runtime": 2.7525,
"eval_samples_per_second": 404.001,
"eval_steps_per_second": 25.432,
"step": 33116
},
{
"epoch": 68.79,
"learning_rate": 1.280287474332649e-05,
"loss": 0.0049,
"step": 33500
},
{
"epoch": 69.0,
"eval_accuracy": 0.765736101169172,
"eval_f1": 0.60803618946248,
"eval_loss": 2.151386022567749,
"eval_precision": 0.5806861499364676,
"eval_recall": 0.6380899190170344,
"eval_runtime": 2.7251,
"eval_samples_per_second": 408.061,
"eval_steps_per_second": 25.687,
"step": 33603
},
{
"epoch": 69.82,
"learning_rate": 1.2546201232032854e-05,
"loss": 0.0037,
"step": 34000
},
{
"epoch": 70.0,
"eval_accuracy": 0.7679790026246719,
"eval_f1": 0.6107114308553158,
"eval_loss": 2.163627862930298,
"eval_precision": 0.5839490445859873,
"eval_recall": 0.6400446802569115,
"eval_runtime": 2.8617,
"eval_samples_per_second": 388.576,
"eval_steps_per_second": 24.461,
"step": 34090
},
{
"epoch": 70.84,
"learning_rate": 1.228952772073922e-05,
"loss": 0.0053,
"step": 34500
},
{
"epoch": 71.0,
"eval_accuracy": 0.7639465521355285,
"eval_f1": 0.6052596089008766,
"eval_loss": 2.1477560997009277,
"eval_precision": 0.5852895148669797,
"eval_recall": 0.6266406031834683,
"eval_runtime": 2.752,
"eval_samples_per_second": 404.075,
"eval_steps_per_second": 25.436,
"step": 34577
},
{
"epoch": 71.87,
"learning_rate": 1.2032854209445585e-05,
"loss": 0.0051,
"step": 35000
},
{
"epoch": 72.0,
"eval_accuracy": 0.7688141255070389,
"eval_f1": 0.6074976818121606,
"eval_loss": 2.1522202491760254,
"eval_precision": 0.5778729838709677,
"eval_recall": 0.6403239318626082,
"eval_runtime": 2.7541,
"eval_samples_per_second": 403.762,
"eval_steps_per_second": 25.417,
"step": 35064
},
{
"epoch": 72.9,
"learning_rate": 1.1776180698151951e-05,
"loss": 0.0047,
"step": 35500
},
{
"epoch": 73.0,
"eval_accuracy": 0.767120019088523,
"eval_f1": 0.6093333333333334,
"eval_loss": 2.1609299182891846,
"eval_precision": 0.5830569022709875,
"eval_recall": 0.6380899190170344,
"eval_runtime": 2.741,
"eval_samples_per_second": 405.695,
"eval_steps_per_second": 25.538,
"step": 35551
},
{
"epoch": 73.92,
"learning_rate": 1.1519507186858315e-05,
"loss": 0.0036,
"step": 36000
},
{
"epoch": 74.0,
"eval_accuracy": 0.7705559532331185,
"eval_f1": 0.6200566878121204,
"eval_loss": 2.175739288330078,
"eval_precision": 0.6000522466039707,
"eval_recall": 0.6414409382853952,
"eval_runtime": 2.7261,
"eval_samples_per_second": 407.915,
"eval_steps_per_second": 25.678,
"step": 36038
},
{
"epoch": 74.95,
"learning_rate": 1.1262833675564683e-05,
"loss": 0.004,
"step": 36500
},
{
"epoch": 75.0,
"eval_accuracy": 0.7661894535910284,
"eval_f1": 0.6165353279016962,
"eval_loss": 2.2280185222625732,
"eval_precision": 0.5908858166922683,
"eval_recall": 0.6445127059480592,
"eval_runtime": 2.75,
"eval_samples_per_second": 404.367,
"eval_steps_per_second": 25.455,
"step": 36525
},
{
"epoch": 75.98,
"learning_rate": 1.1006160164271048e-05,
"loss": 0.0036,
"step": 37000
},
{
"epoch": 76.0,
"eval_accuracy": 0.770985445001193,
"eval_f1": 0.6190347071583514,
"eval_loss": 2.219874382019043,
"eval_precision": 0.6015810276679842,
"eval_recall": 0.6375314158056409,
"eval_runtime": 2.7612,
"eval_samples_per_second": 402.72,
"eval_steps_per_second": 25.351,
"step": 37012
},
{
"epoch": 77.0,
"eval_accuracy": 0.7684800763540921,
"eval_f1": 0.6117552978808477,
"eval_loss": 2.1809566020965576,
"eval_precision": 0.5851606323304437,
"eval_recall": 0.6408824350740017,
"eval_runtime": 2.7598,
"eval_samples_per_second": 402.922,
"eval_steps_per_second": 25.364,
"step": 37499
},
{
"epoch": 77.0,
"learning_rate": 1.0749486652977414e-05,
"loss": 0.0043,
"step": 37500
},
{
"epoch": 78.0,
"eval_accuracy": 0.7688857074683846,
"eval_f1": 0.6095212623696176,
"eval_loss": 2.2160749435424805,
"eval_precision": 0.5848088273030536,
"eval_recall": 0.636414409382854,
"eval_runtime": 2.7615,
"eval_samples_per_second": 402.681,
"eval_steps_per_second": 25.349,
"step": 37986
},
{
"epoch": 78.03,
"learning_rate": 1.0492813141683778e-05,
"loss": 0.0039,
"step": 38000
},
{
"epoch": 79.0,
"eval_accuracy": 0.7693867811978048,
"eval_f1": 0.6086727989487517,
"eval_loss": 2.187803030014038,
"eval_precision": 0.5748324646314222,
"eval_recall": 0.6467467187936331,
"eval_runtime": 2.7479,
"eval_samples_per_second": 404.677,
"eval_steps_per_second": 25.474,
"step": 38473
},
{
"epoch": 79.06,
"learning_rate": 1.0236139630390144e-05,
"loss": 0.0052,
"step": 38500
},
{
"epoch": 80.0,
"eval_accuracy": 0.7653066094010976,
"eval_f1": 0.6083209909788608,
"eval_loss": 2.271164894104004,
"eval_precision": 0.5873634945397815,
"eval_recall": 0.6308293772689193,
"eval_runtime": 2.7444,
"eval_samples_per_second": 405.19,
"eval_steps_per_second": 25.507,
"step": 38960
},
{
"epoch": 80.08,
"learning_rate": 9.979466119096509e-06,
"loss": 0.0034,
"step": 39000
},
{
"epoch": 81.0,
"eval_accuracy": 0.7658076831305177,
"eval_f1": 0.6129723934601984,
"eval_loss": 2.264491319656372,
"eval_precision": 0.5892811131151765,
"eval_recall": 0.6386484222284278,
"eval_runtime": 2.7444,
"eval_samples_per_second": 405.192,
"eval_steps_per_second": 25.507,
"step": 39447
},
{
"epoch": 81.11,
"learning_rate": 9.722792607802875e-06,
"loss": 0.0027,
"step": 39500
},
{
"epoch": 82.0,
"eval_accuracy": 0.7651157241708423,
"eval_f1": 0.6160738528373609,
"eval_loss": 2.2353475093841553,
"eval_precision": 0.5994715984147952,
"eval_recall": 0.6336218933258866,
"eval_runtime": 2.7335,
"eval_samples_per_second": 406.808,
"eval_steps_per_second": 25.608,
"step": 39934
},
{
"epoch": 82.14,
"learning_rate": 9.46611909650924e-06,
"loss": 0.0026,
"step": 40000
},
{
"epoch": 83.0,
"eval_accuracy": 0.7630398472918158,
"eval_f1": 0.6092892517735242,
"eval_loss": 2.3130922317504883,
"eval_precision": 0.5850899742930591,
"eval_recall": 0.6355766545657637,
"eval_runtime": 2.7731,
"eval_samples_per_second": 400.996,
"eval_steps_per_second": 25.243,
"step": 40421
},
{
"epoch": 83.16,
"learning_rate": 9.209445585215607e-06,
"loss": 0.0017,
"step": 40500
},
{
"epoch": 84.0,
"eval_accuracy": 0.766022429014555,
"eval_f1": 0.6101919258769026,
"eval_loss": 2.279829978942871,
"eval_precision": 0.5800201308505284,
"eval_recall": 0.643674951130969,
"eval_runtime": 2.8746,
"eval_samples_per_second": 386.835,
"eval_steps_per_second": 24.351,
"step": 40908
},
{
"epoch": 84.19,
"learning_rate": 8.952772073921972e-06,
"loss": 0.0022,
"step": 41000
},
{
"epoch": 85.0,
"eval_accuracy": 0.7636840849439275,
"eval_f1": 0.6126270733012306,
"eval_loss": 2.3181393146514893,
"eval_precision": 0.5879332477535302,
"eval_recall": 0.639486177045518,
"eval_runtime": 2.7182,
"eval_samples_per_second": 409.097,
"eval_steps_per_second": 25.752,
"step": 41395
},
{
"epoch": 85.22,
"learning_rate": 8.696098562628338e-06,
"loss": 0.0032,
"step": 41500
},
{
"epoch": 86.0,
"eval_accuracy": 0.769625387735624,
"eval_f1": 0.6169463995668651,
"eval_loss": 2.296386480331421,
"eval_precision": 0.5986340950879958,
"eval_recall": 0.636414409382854,
"eval_runtime": 2.7321,
"eval_samples_per_second": 407.016,
"eval_steps_per_second": 25.622,
"step": 41882
},
{
"epoch": 86.24,
"learning_rate": 8.439425051334702e-06,
"loss": 0.003,
"step": 42000
},
{
"epoch": 87.0,
"eval_accuracy": 0.7664996420901933,
"eval_f1": 0.6199271942834029,
"eval_loss": 2.250883102416992,
"eval_precision": 0.5993222106360793,
"eval_recall": 0.6419994414967886,
"eval_runtime": 2.7205,
"eval_samples_per_second": 408.748,
"eval_steps_per_second": 25.731,
"step": 42369
},
{
"epoch": 87.27,
"learning_rate": 8.182751540041068e-06,
"loss": 0.003,
"step": 42500
},
{
"epoch": 88.0,
"eval_accuracy": 0.7704843712717728,
"eval_f1": 0.6209611729568286,
"eval_loss": 2.2512495517730713,
"eval_precision": 0.604227212681638,
"eval_recall": 0.6386484222284278,
"eval_runtime": 2.7574,
"eval_samples_per_second": 403.273,
"eval_steps_per_second": 25.386,
"step": 42856
},
{
"epoch": 88.3,
"learning_rate": 7.926078028747433e-06,
"loss": 0.0027,
"step": 43000
},
{
"epoch": 89.0,
"eval_accuracy": 0.7695299451204963,
"eval_f1": 0.612212529738303,
"eval_loss": 2.2786777019500732,
"eval_precision": 0.5811794228356336,
"eval_recall": 0.6467467187936331,
"eval_runtime": 2.7267,
"eval_samples_per_second": 407.814,
"eval_steps_per_second": 25.672,
"step": 43343
},
{
"epoch": 89.32,
"learning_rate": 7.669404517453799e-06,
"loss": 0.0016,
"step": 43500
},
{
"epoch": 90.0,
"eval_accuracy": 0.7652588880935338,
"eval_f1": 0.6130278406820301,
"eval_loss": 2.2572543621063232,
"eval_precision": 0.5860927152317881,
"eval_recall": 0.6425579447081821,
"eval_runtime": 2.7593,
"eval_samples_per_second": 403.005,
"eval_steps_per_second": 25.369,
"step": 43830
},
{
"epoch": 90.35,
"learning_rate": 7.412731006160165e-06,
"loss": 0.0028,
"step": 44000
},
{
"epoch": 91.0,
"eval_accuracy": 0.7694106418515867,
"eval_f1": 0.6204956463496316,
"eval_loss": 2.247675895690918,
"eval_precision": 0.596292481977343,
"eval_recall": 0.6467467187936331,
"eval_runtime": 2.7505,
"eval_samples_per_second": 404.29,
"eval_steps_per_second": 25.45,
"step": 44317
},
{
"epoch": 91.38,
"learning_rate": 7.15605749486653e-06,
"loss": 0.0022,
"step": 44500
},
{
"epoch": 92.0,
"eval_accuracy": 0.765187306132188,
"eval_f1": 0.6163021868787275,
"eval_loss": 2.2445809841156006,
"eval_precision": 0.5865287588294652,
"eval_recall": 0.6492599832449036,
"eval_runtime": 2.7626,
"eval_samples_per_second": 402.524,
"eval_steps_per_second": 25.339,
"step": 44804
},
{
"epoch": 92.4,
"learning_rate": 6.899383983572895e-06,
"loss": 0.0017,
"step": 45000
},
{
"epoch": 93.0,
"eval_accuracy": 0.7661417322834646,
"eval_f1": 0.6177255739455418,
"eval_loss": 2.25286602973938,
"eval_precision": 0.591664535924316,
"eval_recall": 0.6461882155822396,
"eval_runtime": 2.7411,
"eval_samples_per_second": 405.68,
"eval_steps_per_second": 25.537,
"step": 45291
},
{
"epoch": 93.43,
"learning_rate": 6.642710472279261e-06,
"loss": 0.0017,
"step": 45500
},
{
"epoch": 94.0,
"eval_accuracy": 0.7649725602481507,
"eval_f1": 0.6157979580870501,
"eval_loss": 2.2623953819274902,
"eval_precision": 0.5933212529122444,
"eval_recall": 0.6400446802569115,
"eval_runtime": 2.7635,
"eval_samples_per_second": 402.391,
"eval_steps_per_second": 25.33,
"step": 45778
},
{
"epoch": 94.46,
"learning_rate": 6.386036960985627e-06,
"loss": 0.0015,
"step": 46000
},
{
"epoch": 95.0,
"eval_accuracy": 0.7649964209019328,
"eval_f1": 0.6160291931342073,
"eval_loss": 2.2783970832824707,
"eval_precision": 0.5969093766369827,
"eval_recall": 0.636414409382854,
"eval_runtime": 2.7635,
"eval_samples_per_second": 402.387,
"eval_steps_per_second": 25.33,
"step": 46265
},
{
"epoch": 95.48,
"learning_rate": 6.129363449691992e-06,
"loss": 0.0012,
"step": 46500
},
{
"epoch": 96.0,
"eval_accuracy": 0.7629444046766881,
"eval_f1": 0.6143217749435366,
"eval_loss": 2.303816318511963,
"eval_precision": 0.58590978205778,
"eval_recall": 0.6456297123708461,
"eval_runtime": 2.7486,
"eval_samples_per_second": 404.571,
"eval_steps_per_second": 25.468,
"step": 46752
},
{
"epoch": 96.51,
"learning_rate": 5.872689938398357e-06,
"loss": 0.0019,
"step": 47000
},
{
"epoch": 97.0,
"eval_accuracy": 0.764900978286805,
"eval_f1": 0.6164437971666887,
"eval_loss": 2.3128607273101807,
"eval_precision": 0.5861027190332326,
"eval_recall": 0.6500977380619939,
"eval_runtime": 2.7259,
"eval_samples_per_second": 407.943,
"eval_steps_per_second": 25.68,
"step": 47239
},
{
"epoch": 97.54,
"learning_rate": 5.6160164271047226e-06,
"loss": 0.001,
"step": 47500
},
{
"epoch": 98.0,
"eval_accuracy": 0.7681698878549272,
"eval_f1": 0.615528781793842,
"eval_loss": 2.3077099323272705,
"eval_precision": 0.5911545384417588,
"eval_recall": 0.6419994414967886,
"eval_runtime": 2.7427,
"eval_samples_per_second": 405.435,
"eval_steps_per_second": 25.522,
"step": 47726
},
{
"epoch": 98.56,
"learning_rate": 5.359342915811089e-06,
"loss": 0.0009,
"step": 48000
},
{
"epoch": 99.0,
"eval_accuracy": 0.7633023144834169,
"eval_f1": 0.6161656646626587,
"eval_loss": 2.3493497371673584,
"eval_precision": 0.5906762295081968,
"eval_recall": 0.6439542027366657,
"eval_runtime": 2.7482,
"eval_samples_per_second": 404.633,
"eval_steps_per_second": 25.471,
"step": 48213
},
{
"epoch": 99.59,
"learning_rate": 5.102669404517454e-06,
"loss": 0.0015,
"step": 48500
},
{
"epoch": 100.0,
"eval_accuracy": 0.7701264614650442,
"eval_f1": 0.62121007950411,
"eval_loss": 2.319547414779663,
"eval_precision": 0.6002604166666666,
"eval_recall": 0.643674951130969,
"eval_runtime": 2.8329,
"eval_samples_per_second": 392.534,
"eval_steps_per_second": 24.71,
"step": 48700
},
{
"epoch": 100.62,
"learning_rate": 4.845995893223819e-06,
"loss": 0.001,
"step": 49000
},
{
"epoch": 101.0,
"eval_accuracy": 0.7711286089238845,
"eval_f1": 0.6214266631044617,
"eval_loss": 2.3443996906280518,
"eval_precision": 0.5956466069142126,
"eval_recall": 0.6495392348506004,
"eval_runtime": 2.7763,
"eval_samples_per_second": 400.528,
"eval_steps_per_second": 25.213,
"step": 49187
},
{
"epoch": 101.64,
"learning_rate": 4.5893223819301845e-06,
"loss": 0.0008,
"step": 49500
},
{
"epoch": 102.0,
"eval_accuracy": 0.7638749701741828,
"eval_f1": 0.6155906777390839,
"eval_loss": 2.404651403427124,
"eval_precision": 0.5915057915057915,
"eval_recall": 0.6417201898910919,
"eval_runtime": 2.7517,
"eval_samples_per_second": 404.111,
"eval_steps_per_second": 25.439,
"step": 49674
},
{
"epoch": 102.67,
"learning_rate": 4.332648870636551e-06,
"loss": 0.0011,
"step": 50000
},
{
"epoch": 103.0,
"eval_accuracy": 0.7672393223574326,
"eval_f1": 0.6098464796188459,
"eval_loss": 2.344151258468628,
"eval_precision": 0.579622641509434,
"eval_recall": 0.6433956995252723,
"eval_runtime": 2.7536,
"eval_samples_per_second": 403.83,
"eval_steps_per_second": 25.421,
"step": 50161
},
{
"epoch": 103.7,
"learning_rate": 4.075975359342916e-06,
"loss": 0.0009,
"step": 50500
},
{
"epoch": 104.0,
"eval_accuracy": 0.768217609162491,
"eval_f1": 0.6160439266104192,
"eval_loss": 2.3377583026885986,
"eval_precision": 0.5918682449819866,
"eval_recall": 0.6422786931024853,
"eval_runtime": 2.718,
"eval_samples_per_second": 409.123,
"eval_steps_per_second": 25.754,
"step": 50648
},
{
"epoch": 104.72,
"learning_rate": 3.819301848049281e-06,
"loss": 0.0011,
"step": 51000
},
{
"epoch": 105.0,
"eval_accuracy": 0.7702934860415175,
"eval_f1": 0.6217602591792657,
"eval_loss": 2.319119691848755,
"eval_precision": 0.6017768487065587,
"eval_recall": 0.6431164479195756,
"eval_runtime": 2.7509,
"eval_samples_per_second": 404.228,
"eval_steps_per_second": 25.446,
"step": 51135
},
{
"epoch": 105.75,
"learning_rate": 3.5626283367556473e-06,
"loss": 0.0007,
"step": 51500
},
{
"epoch": 106.0,
"eval_accuracy": 0.7682891911238368,
"eval_f1": 0.6160821442859047,
"eval_loss": 2.376638412475586,
"eval_precision": 0.5895865237366003,
"eval_recall": 0.6450712091594527,
"eval_runtime": 2.7518,
"eval_samples_per_second": 404.103,
"eval_steps_per_second": 25.438,
"step": 51622
},
{
"epoch": 106.78,
"learning_rate": 3.3059548254620125e-06,
"loss": 0.0004,
"step": 52000
},
{
"epoch": 107.0,
"eval_accuracy": 0.77568599379623,
"eval_f1": 0.624123043712898,
"eval_loss": 2.3492467403411865,
"eval_precision": 0.6037588097102584,
"eval_recall": 0.6459089639765428,
"eval_runtime": 2.7841,
"eval_samples_per_second": 399.413,
"eval_steps_per_second": 25.143,
"step": 52109
},
{
"epoch": 107.8,
"learning_rate": 3.049281314168378e-06,
"loss": 0.0008,
"step": 52500
},
{
"epoch": 108.0,
"eval_accuracy": 0.7681221665473634,
"eval_f1": 0.620874698148645,
"eval_loss": 2.3653135299682617,
"eval_precision": 0.5974696617609089,
"eval_recall": 0.6461882155822396,
"eval_runtime": 2.742,
"eval_samples_per_second": 405.547,
"eval_steps_per_second": 25.529,
"step": 52596
},
{
"epoch": 108.83,
"learning_rate": 2.7926078028747435e-06,
"loss": 0.0005,
"step": 53000
},
{
"epoch": 109.0,
"eval_accuracy": 0.7691720353137675,
"eval_f1": 0.6206246634356489,
"eval_loss": 2.3852195739746094,
"eval_precision": 0.5991681829997401,
"eval_recall": 0.643674951130969,
"eval_runtime": 2.756,
"eval_samples_per_second": 403.488,
"eval_steps_per_second": 25.399,
"step": 53083
},
{
"epoch": 109.86,
"learning_rate": 2.5359342915811088e-06,
"loss": 0.0005,
"step": 53500
},
{
"epoch": 110.0,
"eval_accuracy": 0.7685277976616559,
"eval_f1": 0.6224392891059558,
"eval_loss": 2.406304359436035,
"eval_precision": 0.6052770448548813,
"eval_recall": 0.6406031834683049,
"eval_runtime": 2.7427,
"eval_samples_per_second": 405.436,
"eval_steps_per_second": 25.522,
"step": 53570
},
{
"epoch": 110.88,
"learning_rate": 2.2792607802874745e-06,
"loss": 0.0008,
"step": 54000
},
{
"epoch": 111.0,
"eval_accuracy": 0.7683369124314006,
"eval_f1": 0.6195049371026647,
"eval_loss": 2.4256536960601807,
"eval_precision": 0.6007345225603358,
"eval_recall": 0.639486177045518,
"eval_runtime": 2.7562,
"eval_samples_per_second": 403.45,
"eval_steps_per_second": 25.397,
"step": 54057
},
{
"epoch": 111.91,
"learning_rate": 2.0225872689938397e-06,
"loss": 0.0009,
"step": 54500
},
{
"epoch": 112.0,
"eval_accuracy": 0.7700071581961345,
"eval_f1": 0.6207082267402719,
"eval_loss": 2.40315842628479,
"eval_precision": 0.5993239729589184,
"eval_recall": 0.643674951130969,
"eval_runtime": 2.7393,
"eval_samples_per_second": 405.937,
"eval_steps_per_second": 25.554,
"step": 54544
},
{
"epoch": 112.94,
"learning_rate": 1.7659137577002054e-06,
"loss": 0.0006,
"step": 55000
},
{
"epoch": 113.0,
"eval_accuracy": 0.770722977809592,
"eval_f1": 0.6195783536994763,
"eval_loss": 2.3877792358398438,
"eval_precision": 0.5967408173823073,
"eval_recall": 0.6442334543423625,
"eval_runtime": 2.7437,
"eval_samples_per_second": 405.288,
"eval_steps_per_second": 25.513,
"step": 55031
},
{
"epoch": 113.96,
"learning_rate": 1.509240246406571e-06,
"loss": 0.0003,
"step": 55500
},
{
"epoch": 114.0,
"eval_accuracy": 0.771271772846576,
"eval_f1": 0.6211180124223602,
"eval_loss": 2.3938703536987305,
"eval_precision": 0.6013071895424836,
"eval_recall": 0.6422786931024853,
"eval_runtime": 2.7378,
"eval_samples_per_second": 406.165,
"eval_steps_per_second": 25.568,
"step": 55518
},
{
"epoch": 114.99,
"learning_rate": 1.2525667351129362e-06,
"loss": 0.0003,
"step": 56000
},
{
"epoch": 115.0,
"eval_accuracy": 0.7702934860415175,
"eval_f1": 0.618289722147289,
"eval_loss": 2.412465810775757,
"eval_precision": 0.597965040438299,
"eval_recall": 0.6400446802569115,
"eval_runtime": 2.8471,
"eval_samples_per_second": 390.575,
"eval_steps_per_second": 24.587,
"step": 56005
},
{
"epoch": 116.0,
"eval_accuracy": 0.7705798138869006,
"eval_f1": 0.6196730099169123,
"eval_loss": 2.420255661010742,
"eval_precision": 0.5957227518680752,
"eval_recall": 0.6456297123708461,
"eval_runtime": 2.7618,
"eval_samples_per_second": 402.642,
"eval_steps_per_second": 25.346,
"step": 56492
},
{
"epoch": 116.02,
"learning_rate": 9.958932238193019e-07,
"loss": 0.0003,
"step": 56500
},
{
"epoch": 117.0,
"eval_accuracy": 0.770722977809592,
"eval_f1": 0.6205501618122977,
"eval_loss": 2.410393476486206,
"eval_precision": 0.6,
"eval_recall": 0.6425579447081821,
"eval_runtime": 2.7288,
"eval_samples_per_second": 407.51,
"eval_steps_per_second": 25.653,
"step": 56979
},
{
"epoch": 117.04,
"learning_rate": 7.392197125256674e-07,
"loss": 0.0004,
"step": 57000
},
{
"epoch": 118.0,
"eval_accuracy": 0.7695538057742782,
"eval_f1": 0.6216835016835018,
"eval_loss": 2.4210033416748047,
"eval_precision": 0.6004162330905307,
"eval_recall": 0.6445127059480592,
"eval_runtime": 2.7601,
"eval_samples_per_second": 402.887,
"eval_steps_per_second": 25.362,
"step": 57466
},
{
"epoch": 118.07,
"learning_rate": 4.82546201232033e-07,
"loss": 0.0004,
"step": 57500
},
{
"epoch": 119.0,
"eval_accuracy": 0.7691720353137675,
"eval_f1": 0.6201508620689655,
"eval_loss": 2.4213058948516846,
"eval_precision": 0.5990111891751236,
"eval_recall": 0.6428371963138788,
"eval_runtime": 2.776,
"eval_samples_per_second": 400.579,
"eval_steps_per_second": 25.216,
"step": 57953
},
{
"epoch": 119.1,
"learning_rate": 2.2587268993839835e-07,
"loss": 0.0004,
"step": 58000
},
{
"epoch": 120.0,
"eval_accuracy": 0.7694106418515867,
"eval_f1": 0.6200296535921282,
"eval_loss": 2.4216408729553223,
"eval_precision": 0.5992704533611256,
"eval_recall": 0.6422786931024853,
"eval_runtime": 2.7416,
"eval_samples_per_second": 405.598,
"eval_steps_per_second": 25.532,
"step": 58440
},
{
"epoch": 120.0,
"step": 58440,
"total_flos": 1.220726808511488e+17,
"train_loss": 0.04618847079620959,
"train_runtime": 7270.5358,
"train_samples_per_second": 128.491,
"train_steps_per_second": 8.038
}
],
"max_steps": 58440,
"num_train_epochs": 120,
"total_flos": 1.220726808511488e+17,
"trial_name": null,
"trial_params": null
}