albertina_gun / trainer_state.json
belisards's picture
Upload 5 files
009effb
raw
history blame
111 kB
{
"best_metric": 0.9738785407252287,
"best_model_checkpoint": "drive/MyDrive/albertina/checkpoint-9000",
"epoch": 0.8024966562639322,
"global_step": 9000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 2.0000000000000002e-07,
"loss": 0.7187,
"step": 10
},
{
"epoch": 0.0,
"learning_rate": 4.0000000000000003e-07,
"loss": 0.7199,
"step": 20
},
{
"epoch": 0.0,
"learning_rate": 6.000000000000001e-07,
"loss": 0.6146,
"step": 30
},
{
"epoch": 0.0,
"learning_rate": 8.000000000000001e-07,
"loss": 0.6195,
"step": 40
},
{
"epoch": 0.0,
"learning_rate": 1.0000000000000002e-06,
"loss": 0.6245,
"step": 50
},
{
"epoch": 0.01,
"learning_rate": 1.2000000000000002e-06,
"loss": 0.6241,
"step": 60
},
{
"epoch": 0.01,
"learning_rate": 1.4000000000000001e-06,
"loss": 0.5657,
"step": 70
},
{
"epoch": 0.01,
"learning_rate": 1.6000000000000001e-06,
"loss": 0.4948,
"step": 80
},
{
"epoch": 0.01,
"learning_rate": 1.8000000000000001e-06,
"loss": 0.5167,
"step": 90
},
{
"epoch": 0.01,
"learning_rate": 2.0000000000000003e-06,
"loss": 0.4677,
"step": 100
},
{
"epoch": 0.01,
"learning_rate": 2.2e-06,
"loss": 0.4529,
"step": 110
},
{
"epoch": 0.01,
"learning_rate": 2.4000000000000003e-06,
"loss": 0.4148,
"step": 120
},
{
"epoch": 0.01,
"learning_rate": 2.6e-06,
"loss": 0.3799,
"step": 130
},
{
"epoch": 0.01,
"learning_rate": 2.8000000000000003e-06,
"loss": 0.3091,
"step": 140
},
{
"epoch": 0.01,
"learning_rate": 3e-06,
"loss": 0.3131,
"step": 150
},
{
"epoch": 0.01,
"learning_rate": 3.2000000000000003e-06,
"loss": 0.2855,
"step": 160
},
{
"epoch": 0.02,
"learning_rate": 3.4000000000000005e-06,
"loss": 0.2601,
"step": 170
},
{
"epoch": 0.02,
"learning_rate": 3.6000000000000003e-06,
"loss": 0.193,
"step": 180
},
{
"epoch": 0.02,
"learning_rate": 3.8000000000000005e-06,
"loss": 0.2041,
"step": 190
},
{
"epoch": 0.02,
"learning_rate": 4.000000000000001e-06,
"loss": 0.326,
"step": 200
},
{
"epoch": 0.02,
"learning_rate": 4.2000000000000004e-06,
"loss": 0.1665,
"step": 210
},
{
"epoch": 0.02,
"learning_rate": 4.4e-06,
"loss": 0.2885,
"step": 220
},
{
"epoch": 0.02,
"learning_rate": 4.600000000000001e-06,
"loss": 0.2399,
"step": 230
},
{
"epoch": 0.02,
"learning_rate": 4.800000000000001e-06,
"loss": 0.2109,
"step": 240
},
{
"epoch": 0.02,
"learning_rate": 5e-06,
"loss": 0.2259,
"step": 250
},
{
"epoch": 0.02,
"learning_rate": 5.2e-06,
"loss": 0.2323,
"step": 260
},
{
"epoch": 0.02,
"learning_rate": 5.400000000000001e-06,
"loss": 0.1955,
"step": 270
},
{
"epoch": 0.02,
"learning_rate": 5.600000000000001e-06,
"loss": 0.1765,
"step": 280
},
{
"epoch": 0.03,
"learning_rate": 5.8e-06,
"loss": 0.1722,
"step": 290
},
{
"epoch": 0.03,
"learning_rate": 6e-06,
"loss": 0.1648,
"step": 300
},
{
"epoch": 0.03,
"learning_rate": 6.200000000000001e-06,
"loss": 0.1617,
"step": 310
},
{
"epoch": 0.03,
"learning_rate": 6.4000000000000006e-06,
"loss": 0.1687,
"step": 320
},
{
"epoch": 0.03,
"learning_rate": 6.600000000000001e-06,
"loss": 0.1727,
"step": 330
},
{
"epoch": 0.03,
"learning_rate": 6.800000000000001e-06,
"loss": 0.2064,
"step": 340
},
{
"epoch": 0.03,
"learning_rate": 7e-06,
"loss": 0.1663,
"step": 350
},
{
"epoch": 0.03,
"learning_rate": 7.2000000000000005e-06,
"loss": 0.1032,
"step": 360
},
{
"epoch": 0.03,
"learning_rate": 7.4e-06,
"loss": 0.1592,
"step": 370
},
{
"epoch": 0.03,
"learning_rate": 7.600000000000001e-06,
"loss": 0.2606,
"step": 380
},
{
"epoch": 0.03,
"learning_rate": 7.800000000000002e-06,
"loss": 0.0709,
"step": 390
},
{
"epoch": 0.04,
"learning_rate": 8.000000000000001e-06,
"loss": 0.117,
"step": 400
},
{
"epoch": 0.04,
"learning_rate": 8.2e-06,
"loss": 0.1836,
"step": 410
},
{
"epoch": 0.04,
"learning_rate": 8.400000000000001e-06,
"loss": 0.0998,
"step": 420
},
{
"epoch": 0.04,
"learning_rate": 8.6e-06,
"loss": 0.1288,
"step": 430
},
{
"epoch": 0.04,
"learning_rate": 8.8e-06,
"loss": 0.1877,
"step": 440
},
{
"epoch": 0.04,
"learning_rate": 9e-06,
"loss": 0.1695,
"step": 450
},
{
"epoch": 0.04,
"learning_rate": 9.200000000000002e-06,
"loss": 0.1736,
"step": 460
},
{
"epoch": 0.04,
"learning_rate": 9.4e-06,
"loss": 0.195,
"step": 470
},
{
"epoch": 0.04,
"learning_rate": 9.600000000000001e-06,
"loss": 0.1599,
"step": 480
},
{
"epoch": 0.04,
"learning_rate": 9.800000000000001e-06,
"loss": 0.0596,
"step": 490
},
{
"epoch": 0.04,
"learning_rate": 1e-05,
"loss": 0.0599,
"step": 500
},
{
"epoch": 0.05,
"learning_rate": 9.998200629779578e-06,
"loss": 0.2157,
"step": 510
},
{
"epoch": 0.05,
"learning_rate": 9.996401259559155e-06,
"loss": 0.1006,
"step": 520
},
{
"epoch": 0.05,
"learning_rate": 9.994601889338731e-06,
"loss": 0.2082,
"step": 530
},
{
"epoch": 0.05,
"learning_rate": 9.99280251911831e-06,
"loss": 0.2305,
"step": 540
},
{
"epoch": 0.05,
"learning_rate": 9.991003148897887e-06,
"loss": 0.1995,
"step": 550
},
{
"epoch": 0.05,
"learning_rate": 9.989203778677464e-06,
"loss": 0.1551,
"step": 560
},
{
"epoch": 0.05,
"learning_rate": 9.98740440845704e-06,
"loss": 0.0971,
"step": 570
},
{
"epoch": 0.05,
"learning_rate": 9.985605038236617e-06,
"loss": 0.2007,
"step": 580
},
{
"epoch": 0.05,
"learning_rate": 9.983805668016196e-06,
"loss": 0.1312,
"step": 590
},
{
"epoch": 0.05,
"learning_rate": 9.982006297795773e-06,
"loss": 0.1931,
"step": 600
},
{
"epoch": 0.05,
"learning_rate": 9.98020692757535e-06,
"loss": 0.223,
"step": 610
},
{
"epoch": 0.06,
"learning_rate": 9.978407557354927e-06,
"loss": 0.3252,
"step": 620
},
{
"epoch": 0.06,
"learning_rate": 9.976608187134503e-06,
"loss": 0.0981,
"step": 630
},
{
"epoch": 0.06,
"learning_rate": 9.97480881691408e-06,
"loss": 0.1815,
"step": 640
},
{
"epoch": 0.06,
"learning_rate": 9.973009446693657e-06,
"loss": 0.1782,
"step": 650
},
{
"epoch": 0.06,
"learning_rate": 9.971210076473236e-06,
"loss": 0.1804,
"step": 660
},
{
"epoch": 0.06,
"learning_rate": 9.969410706252813e-06,
"loss": 0.1271,
"step": 670
},
{
"epoch": 0.06,
"learning_rate": 9.96761133603239e-06,
"loss": 0.1316,
"step": 680
},
{
"epoch": 0.06,
"learning_rate": 9.965811965811966e-06,
"loss": 0.2092,
"step": 690
},
{
"epoch": 0.06,
"learning_rate": 9.964012595591543e-06,
"loss": 0.0782,
"step": 700
},
{
"epoch": 0.06,
"learning_rate": 9.962213225371122e-06,
"loss": 0.2433,
"step": 710
},
{
"epoch": 0.06,
"learning_rate": 9.960413855150699e-06,
"loss": 0.1883,
"step": 720
},
{
"epoch": 0.07,
"learning_rate": 9.958614484930275e-06,
"loss": 0.032,
"step": 730
},
{
"epoch": 0.07,
"learning_rate": 9.956815114709852e-06,
"loss": 0.2181,
"step": 740
},
{
"epoch": 0.07,
"learning_rate": 9.955015744489429e-06,
"loss": 0.1828,
"step": 750
},
{
"epoch": 0.07,
"learning_rate": 9.953216374269008e-06,
"loss": 0.1206,
"step": 760
},
{
"epoch": 0.07,
"learning_rate": 9.951417004048583e-06,
"loss": 0.1253,
"step": 770
},
{
"epoch": 0.07,
"learning_rate": 9.949617633828161e-06,
"loss": 0.1047,
"step": 780
},
{
"epoch": 0.07,
"learning_rate": 9.947818263607738e-06,
"loss": 0.1174,
"step": 790
},
{
"epoch": 0.07,
"learning_rate": 9.946018893387315e-06,
"loss": 0.3025,
"step": 800
},
{
"epoch": 0.07,
"learning_rate": 9.944219523166892e-06,
"loss": 0.0581,
"step": 810
},
{
"epoch": 0.07,
"learning_rate": 9.942420152946469e-06,
"loss": 0.2664,
"step": 820
},
{
"epoch": 0.07,
"learning_rate": 9.940620782726047e-06,
"loss": 0.1218,
"step": 830
},
{
"epoch": 0.07,
"learning_rate": 9.938821412505624e-06,
"loss": 0.1708,
"step": 840
},
{
"epoch": 0.08,
"learning_rate": 9.937022042285201e-06,
"loss": 0.1626,
"step": 850
},
{
"epoch": 0.08,
"learning_rate": 9.935222672064778e-06,
"loss": 0.0553,
"step": 860
},
{
"epoch": 0.08,
"learning_rate": 9.933423301844355e-06,
"loss": 0.1209,
"step": 870
},
{
"epoch": 0.08,
"learning_rate": 9.931623931623933e-06,
"loss": 0.11,
"step": 880
},
{
"epoch": 0.08,
"learning_rate": 9.929824561403509e-06,
"loss": 0.0945,
"step": 890
},
{
"epoch": 0.08,
"learning_rate": 9.928025191183087e-06,
"loss": 0.2105,
"step": 900
},
{
"epoch": 0.08,
"learning_rate": 9.926225820962664e-06,
"loss": 0.1548,
"step": 910
},
{
"epoch": 0.08,
"learning_rate": 9.92442645074224e-06,
"loss": 0.1819,
"step": 920
},
{
"epoch": 0.08,
"learning_rate": 9.922627080521818e-06,
"loss": 0.1461,
"step": 930
},
{
"epoch": 0.08,
"learning_rate": 9.920827710301395e-06,
"loss": 0.1917,
"step": 940
},
{
"epoch": 0.08,
"learning_rate": 9.919028340080973e-06,
"loss": 0.0796,
"step": 950
},
{
"epoch": 0.09,
"learning_rate": 9.91722896986055e-06,
"loss": 0.1768,
"step": 960
},
{
"epoch": 0.09,
"learning_rate": 9.915429599640127e-06,
"loss": 0.1726,
"step": 970
},
{
"epoch": 0.09,
"learning_rate": 9.913630229419704e-06,
"loss": 0.1244,
"step": 980
},
{
"epoch": 0.09,
"learning_rate": 9.91183085919928e-06,
"loss": 0.0765,
"step": 990
},
{
"epoch": 0.09,
"learning_rate": 9.910031488978859e-06,
"loss": 0.1842,
"step": 1000
},
{
"epoch": 0.09,
"learning_rate": 9.908232118758436e-06,
"loss": 0.1192,
"step": 1010
},
{
"epoch": 0.09,
"learning_rate": 9.906432748538013e-06,
"loss": 0.132,
"step": 1020
},
{
"epoch": 0.09,
"learning_rate": 9.90463337831759e-06,
"loss": 0.1347,
"step": 1030
},
{
"epoch": 0.09,
"learning_rate": 9.902834008097167e-06,
"loss": 0.0591,
"step": 1040
},
{
"epoch": 0.09,
"learning_rate": 9.901034637876743e-06,
"loss": 0.1588,
"step": 1050
},
{
"epoch": 0.09,
"learning_rate": 9.89923526765632e-06,
"loss": 0.1273,
"step": 1060
},
{
"epoch": 0.1,
"learning_rate": 9.897435897435899e-06,
"loss": 0.0527,
"step": 1070
},
{
"epoch": 0.1,
"learning_rate": 9.895636527215476e-06,
"loss": 0.1438,
"step": 1080
},
{
"epoch": 0.1,
"learning_rate": 9.893837156995053e-06,
"loss": 0.1085,
"step": 1090
},
{
"epoch": 0.1,
"learning_rate": 9.89203778677463e-06,
"loss": 0.0793,
"step": 1100
},
{
"epoch": 0.1,
"learning_rate": 9.890238416554206e-06,
"loss": 0.1617,
"step": 1110
},
{
"epoch": 0.1,
"learning_rate": 9.888439046333785e-06,
"loss": 0.0164,
"step": 1120
},
{
"epoch": 0.1,
"learning_rate": 9.886639676113362e-06,
"loss": 0.2053,
"step": 1130
},
{
"epoch": 0.1,
"learning_rate": 9.884840305892939e-06,
"loss": 0.1774,
"step": 1140
},
{
"epoch": 0.1,
"learning_rate": 9.883040935672515e-06,
"loss": 0.1195,
"step": 1150
},
{
"epoch": 0.1,
"learning_rate": 9.881241565452092e-06,
"loss": 0.0487,
"step": 1160
},
{
"epoch": 0.1,
"learning_rate": 9.879442195231669e-06,
"loss": 0.1629,
"step": 1170
},
{
"epoch": 0.11,
"learning_rate": 9.877642825011246e-06,
"loss": 0.0964,
"step": 1180
},
{
"epoch": 0.11,
"learning_rate": 9.875843454790825e-06,
"loss": 0.1443,
"step": 1190
},
{
"epoch": 0.11,
"learning_rate": 9.874044084570401e-06,
"loss": 0.1089,
"step": 1200
},
{
"epoch": 0.11,
"learning_rate": 9.872244714349978e-06,
"loss": 0.0233,
"step": 1210
},
{
"epoch": 0.11,
"learning_rate": 9.870445344129555e-06,
"loss": 0.0712,
"step": 1220
},
{
"epoch": 0.11,
"learning_rate": 9.868645973909132e-06,
"loss": 0.2537,
"step": 1230
},
{
"epoch": 0.11,
"learning_rate": 9.86684660368871e-06,
"loss": 0.2621,
"step": 1240
},
{
"epoch": 0.11,
"learning_rate": 9.865047233468287e-06,
"loss": 0.1302,
"step": 1250
},
{
"epoch": 0.11,
"learning_rate": 9.863247863247864e-06,
"loss": 0.136,
"step": 1260
},
{
"epoch": 0.11,
"learning_rate": 9.861448493027441e-06,
"loss": 0.146,
"step": 1270
},
{
"epoch": 0.11,
"learning_rate": 9.859649122807018e-06,
"loss": 0.0938,
"step": 1280
},
{
"epoch": 0.12,
"learning_rate": 9.857849752586597e-06,
"loss": 0.0718,
"step": 1290
},
{
"epoch": 0.12,
"learning_rate": 9.856050382366172e-06,
"loss": 0.1728,
"step": 1300
},
{
"epoch": 0.12,
"learning_rate": 9.85425101214575e-06,
"loss": 0.2543,
"step": 1310
},
{
"epoch": 0.12,
"learning_rate": 9.852451641925327e-06,
"loss": 0.1327,
"step": 1320
},
{
"epoch": 0.12,
"learning_rate": 9.850652271704904e-06,
"loss": 0.1058,
"step": 1330
},
{
"epoch": 0.12,
"learning_rate": 9.84885290148448e-06,
"loss": 0.13,
"step": 1340
},
{
"epoch": 0.12,
"learning_rate": 9.847053531264058e-06,
"loss": 0.1322,
"step": 1350
},
{
"epoch": 0.12,
"learning_rate": 9.845254161043636e-06,
"loss": 0.1909,
"step": 1360
},
{
"epoch": 0.12,
"learning_rate": 9.843454790823213e-06,
"loss": 0.078,
"step": 1370
},
{
"epoch": 0.12,
"learning_rate": 9.84165542060279e-06,
"loss": 0.1836,
"step": 1380
},
{
"epoch": 0.12,
"learning_rate": 9.839856050382367e-06,
"loss": 0.1491,
"step": 1390
},
{
"epoch": 0.12,
"learning_rate": 9.838056680161944e-06,
"loss": 0.139,
"step": 1400
},
{
"epoch": 0.13,
"learning_rate": 9.836257309941522e-06,
"loss": 0.0737,
"step": 1410
},
{
"epoch": 0.13,
"learning_rate": 9.834457939721097e-06,
"loss": 0.1696,
"step": 1420
},
{
"epoch": 0.13,
"learning_rate": 9.832658569500676e-06,
"loss": 0.2328,
"step": 1430
},
{
"epoch": 0.13,
"learning_rate": 9.830859199280253e-06,
"loss": 0.1342,
"step": 1440
},
{
"epoch": 0.13,
"learning_rate": 9.82905982905983e-06,
"loss": 0.1243,
"step": 1450
},
{
"epoch": 0.13,
"learning_rate": 9.827260458839407e-06,
"loss": 0.1969,
"step": 1460
},
{
"epoch": 0.13,
"learning_rate": 9.825461088618983e-06,
"loss": 0.2298,
"step": 1470
},
{
"epoch": 0.13,
"learning_rate": 9.823661718398562e-06,
"loss": 0.1167,
"step": 1480
},
{
"epoch": 0.13,
"learning_rate": 9.821862348178139e-06,
"loss": 0.123,
"step": 1490
},
{
"epoch": 0.13,
"learning_rate": 9.820062977957716e-06,
"loss": 0.1128,
"step": 1500
},
{
"epoch": 0.13,
"eval_accuracy": 0.9694931312174325,
"eval_f1": 0.94606967396159,
"eval_loss": 0.09655023366212845,
"eval_precision": 0.9585926009729607,
"eval_recall": 0.9338697233550094,
"eval_runtime": 436.9466,
"eval_samples_per_second": 72.469,
"eval_steps_per_second": 4.531,
"step": 1500
},
{
"epoch": 0.13,
"learning_rate": 9.818263607737293e-06,
"loss": 0.0397,
"step": 1510
},
{
"epoch": 0.14,
"learning_rate": 9.81646423751687e-06,
"loss": 0.1572,
"step": 1520
},
{
"epoch": 0.14,
"learning_rate": 9.814664867296448e-06,
"loss": 0.1214,
"step": 1530
},
{
"epoch": 0.14,
"learning_rate": 9.812865497076025e-06,
"loss": 0.135,
"step": 1540
},
{
"epoch": 0.14,
"learning_rate": 9.811066126855602e-06,
"loss": 0.3403,
"step": 1550
},
{
"epoch": 0.14,
"learning_rate": 9.809266756635179e-06,
"loss": 0.0645,
"step": 1560
},
{
"epoch": 0.14,
"learning_rate": 9.807467386414755e-06,
"loss": 0.1162,
"step": 1570
},
{
"epoch": 0.14,
"learning_rate": 9.805668016194332e-06,
"loss": 0.065,
"step": 1580
},
{
"epoch": 0.14,
"learning_rate": 9.803868645973909e-06,
"loss": 0.0923,
"step": 1590
},
{
"epoch": 0.14,
"learning_rate": 9.802069275753488e-06,
"loss": 0.2101,
"step": 1600
},
{
"epoch": 0.14,
"learning_rate": 9.800269905533065e-06,
"loss": 0.1123,
"step": 1610
},
{
"epoch": 0.14,
"learning_rate": 9.798470535312641e-06,
"loss": 0.2323,
"step": 1620
},
{
"epoch": 0.15,
"learning_rate": 9.796671165092218e-06,
"loss": 0.0653,
"step": 1630
},
{
"epoch": 0.15,
"learning_rate": 9.794871794871795e-06,
"loss": 0.1639,
"step": 1640
},
{
"epoch": 0.15,
"learning_rate": 9.793072424651374e-06,
"loss": 0.0505,
"step": 1650
},
{
"epoch": 0.15,
"learning_rate": 9.79127305443095e-06,
"loss": 0.1409,
"step": 1660
},
{
"epoch": 0.15,
"learning_rate": 9.789473684210527e-06,
"loss": 0.1419,
"step": 1670
},
{
"epoch": 0.15,
"learning_rate": 9.787674313990104e-06,
"loss": 0.1625,
"step": 1680
},
{
"epoch": 0.15,
"learning_rate": 9.785874943769681e-06,
"loss": 0.0771,
"step": 1690
},
{
"epoch": 0.15,
"learning_rate": 9.784075573549258e-06,
"loss": 0.0929,
"step": 1700
},
{
"epoch": 0.15,
"learning_rate": 9.782276203328835e-06,
"loss": 0.095,
"step": 1710
},
{
"epoch": 0.15,
"learning_rate": 9.780476833108413e-06,
"loss": 0.1673,
"step": 1720
},
{
"epoch": 0.15,
"learning_rate": 9.77867746288799e-06,
"loss": 0.1376,
"step": 1730
},
{
"epoch": 0.16,
"learning_rate": 9.776878092667567e-06,
"loss": 0.0783,
"step": 1740
},
{
"epoch": 0.16,
"learning_rate": 9.775078722447144e-06,
"loss": 0.1428,
"step": 1750
},
{
"epoch": 0.16,
"learning_rate": 9.77327935222672e-06,
"loss": 0.0576,
"step": 1760
},
{
"epoch": 0.16,
"learning_rate": 9.7714799820063e-06,
"loss": 0.0739,
"step": 1770
},
{
"epoch": 0.16,
"learning_rate": 9.769680611785876e-06,
"loss": 0.09,
"step": 1780
},
{
"epoch": 0.16,
"learning_rate": 9.767881241565453e-06,
"loss": 0.0987,
"step": 1790
},
{
"epoch": 0.16,
"learning_rate": 9.76608187134503e-06,
"loss": 0.1613,
"step": 1800
},
{
"epoch": 0.16,
"learning_rate": 9.764282501124607e-06,
"loss": 0.058,
"step": 1810
},
{
"epoch": 0.16,
"learning_rate": 9.762483130904185e-06,
"loss": 0.218,
"step": 1820
},
{
"epoch": 0.16,
"learning_rate": 9.76068376068376e-06,
"loss": 0.1083,
"step": 1830
},
{
"epoch": 0.16,
"learning_rate": 9.758884390463339e-06,
"loss": 0.1274,
"step": 1840
},
{
"epoch": 0.16,
"learning_rate": 9.757085020242916e-06,
"loss": 0.1016,
"step": 1850
},
{
"epoch": 0.17,
"learning_rate": 9.755285650022493e-06,
"loss": 0.1442,
"step": 1860
},
{
"epoch": 0.17,
"learning_rate": 9.75348627980207e-06,
"loss": 0.0771,
"step": 1870
},
{
"epoch": 0.17,
"learning_rate": 9.751686909581647e-06,
"loss": 0.0832,
"step": 1880
},
{
"epoch": 0.17,
"learning_rate": 9.749887539361225e-06,
"loss": 0.1264,
"step": 1890
},
{
"epoch": 0.17,
"learning_rate": 9.748088169140802e-06,
"loss": 0.1182,
"step": 1900
},
{
"epoch": 0.17,
"learning_rate": 9.746288798920379e-06,
"loss": 0.1582,
"step": 1910
},
{
"epoch": 0.17,
"learning_rate": 9.744489428699956e-06,
"loss": 0.1256,
"step": 1920
},
{
"epoch": 0.17,
"learning_rate": 9.742690058479533e-06,
"loss": 0.1081,
"step": 1930
},
{
"epoch": 0.17,
"learning_rate": 9.740890688259111e-06,
"loss": 0.0922,
"step": 1940
},
{
"epoch": 0.17,
"learning_rate": 9.739091318038686e-06,
"loss": 0.1155,
"step": 1950
},
{
"epoch": 0.17,
"learning_rate": 9.737291947818265e-06,
"loss": 0.0559,
"step": 1960
},
{
"epoch": 0.18,
"learning_rate": 9.735492577597842e-06,
"loss": 0.1008,
"step": 1970
},
{
"epoch": 0.18,
"learning_rate": 9.733693207377419e-06,
"loss": 0.1123,
"step": 1980
},
{
"epoch": 0.18,
"learning_rate": 9.731893837156995e-06,
"loss": 0.1095,
"step": 1990
},
{
"epoch": 0.18,
"learning_rate": 9.730094466936572e-06,
"loss": 0.1269,
"step": 2000
},
{
"epoch": 0.18,
"learning_rate": 9.72829509671615e-06,
"loss": 0.1702,
"step": 2010
},
{
"epoch": 0.18,
"learning_rate": 9.726495726495728e-06,
"loss": 0.1762,
"step": 2020
},
{
"epoch": 0.18,
"learning_rate": 9.724696356275305e-06,
"loss": 0.0528,
"step": 2030
},
{
"epoch": 0.18,
"learning_rate": 9.722896986054881e-06,
"loss": 0.0912,
"step": 2040
},
{
"epoch": 0.18,
"learning_rate": 9.721097615834458e-06,
"loss": 0.0528,
"step": 2050
},
{
"epoch": 0.18,
"learning_rate": 9.719298245614037e-06,
"loss": 0.1277,
"step": 2060
},
{
"epoch": 0.18,
"learning_rate": 9.717498875393614e-06,
"loss": 0.1685,
"step": 2070
},
{
"epoch": 0.19,
"learning_rate": 9.71569950517319e-06,
"loss": 0.0825,
"step": 2080
},
{
"epoch": 0.19,
"learning_rate": 9.713900134952767e-06,
"loss": 0.1238,
"step": 2090
},
{
"epoch": 0.19,
"learning_rate": 9.712100764732344e-06,
"loss": 0.0815,
"step": 2100
},
{
"epoch": 0.19,
"learning_rate": 9.710301394511921e-06,
"loss": 0.0942,
"step": 2110
},
{
"epoch": 0.19,
"learning_rate": 9.708502024291498e-06,
"loss": 0.0789,
"step": 2120
},
{
"epoch": 0.19,
"learning_rate": 9.706702654071076e-06,
"loss": 0.1439,
"step": 2130
},
{
"epoch": 0.19,
"learning_rate": 9.704903283850653e-06,
"loss": 0.1,
"step": 2140
},
{
"epoch": 0.19,
"learning_rate": 9.70310391363023e-06,
"loss": 0.0548,
"step": 2150
},
{
"epoch": 0.19,
"learning_rate": 9.701304543409807e-06,
"loss": 0.1491,
"step": 2160
},
{
"epoch": 0.19,
"learning_rate": 9.699505173189384e-06,
"loss": 0.063,
"step": 2170
},
{
"epoch": 0.19,
"learning_rate": 9.697705802968962e-06,
"loss": 0.2628,
"step": 2180
},
{
"epoch": 0.2,
"learning_rate": 9.69590643274854e-06,
"loss": 0.2376,
"step": 2190
},
{
"epoch": 0.2,
"learning_rate": 9.694107062528116e-06,
"loss": 0.1094,
"step": 2200
},
{
"epoch": 0.2,
"learning_rate": 9.692307692307693e-06,
"loss": 0.143,
"step": 2210
},
{
"epoch": 0.2,
"learning_rate": 9.69050832208727e-06,
"loss": 0.1503,
"step": 2220
},
{
"epoch": 0.2,
"learning_rate": 9.688708951866847e-06,
"loss": 0.1998,
"step": 2230
},
{
"epoch": 0.2,
"learning_rate": 9.686909581646424e-06,
"loss": 0.0649,
"step": 2240
},
{
"epoch": 0.2,
"learning_rate": 9.685110211426002e-06,
"loss": 0.024,
"step": 2250
},
{
"epoch": 0.2,
"learning_rate": 9.683310841205579e-06,
"loss": 0.119,
"step": 2260
},
{
"epoch": 0.2,
"learning_rate": 9.681511470985156e-06,
"loss": 0.228,
"step": 2270
},
{
"epoch": 0.2,
"learning_rate": 9.679712100764733e-06,
"loss": 0.1202,
"step": 2280
},
{
"epoch": 0.2,
"learning_rate": 9.67791273054431e-06,
"loss": 0.053,
"step": 2290
},
{
"epoch": 0.21,
"learning_rate": 9.676113360323888e-06,
"loss": 0.1156,
"step": 2300
},
{
"epoch": 0.21,
"learning_rate": 9.674313990103465e-06,
"loss": 0.1197,
"step": 2310
},
{
"epoch": 0.21,
"learning_rate": 9.672514619883042e-06,
"loss": 0.0872,
"step": 2320
},
{
"epoch": 0.21,
"learning_rate": 9.670715249662619e-06,
"loss": 0.1937,
"step": 2330
},
{
"epoch": 0.21,
"learning_rate": 9.668915879442196e-06,
"loss": 0.172,
"step": 2340
},
{
"epoch": 0.21,
"learning_rate": 9.667116509221774e-06,
"loss": 0.1065,
"step": 2350
},
{
"epoch": 0.21,
"learning_rate": 9.66531713900135e-06,
"loss": 0.12,
"step": 2360
},
{
"epoch": 0.21,
"learning_rate": 9.663517768780928e-06,
"loss": 0.094,
"step": 2370
},
{
"epoch": 0.21,
"learning_rate": 9.661718398560505e-06,
"loss": 0.0623,
"step": 2380
},
{
"epoch": 0.21,
"learning_rate": 9.659919028340082e-06,
"loss": 0.1098,
"step": 2390
},
{
"epoch": 0.21,
"learning_rate": 9.658119658119659e-06,
"loss": 0.1679,
"step": 2400
},
{
"epoch": 0.21,
"learning_rate": 9.656320287899235e-06,
"loss": 0.1222,
"step": 2410
},
{
"epoch": 0.22,
"learning_rate": 9.654520917678814e-06,
"loss": 0.0483,
"step": 2420
},
{
"epoch": 0.22,
"learning_rate": 9.65272154745839e-06,
"loss": 0.1289,
"step": 2430
},
{
"epoch": 0.22,
"learning_rate": 9.650922177237968e-06,
"loss": 0.0802,
"step": 2440
},
{
"epoch": 0.22,
"learning_rate": 9.649122807017545e-06,
"loss": 0.0691,
"step": 2450
},
{
"epoch": 0.22,
"learning_rate": 9.647323436797121e-06,
"loss": 0.2148,
"step": 2460
},
{
"epoch": 0.22,
"learning_rate": 9.6455240665767e-06,
"loss": 0.1005,
"step": 2470
},
{
"epoch": 0.22,
"learning_rate": 9.643724696356275e-06,
"loss": 0.1019,
"step": 2480
},
{
"epoch": 0.22,
"learning_rate": 9.641925326135854e-06,
"loss": 0.1362,
"step": 2490
},
{
"epoch": 0.22,
"learning_rate": 9.64012595591543e-06,
"loss": 0.0445,
"step": 2500
},
{
"epoch": 0.22,
"learning_rate": 9.638326585695007e-06,
"loss": 0.1021,
"step": 2510
},
{
"epoch": 0.22,
"learning_rate": 9.636527215474584e-06,
"loss": 0.2398,
"step": 2520
},
{
"epoch": 0.23,
"learning_rate": 9.634727845254161e-06,
"loss": 0.1228,
"step": 2530
},
{
"epoch": 0.23,
"learning_rate": 9.63292847503374e-06,
"loss": 0.1529,
"step": 2540
},
{
"epoch": 0.23,
"learning_rate": 9.631129104813316e-06,
"loss": 0.0813,
"step": 2550
},
{
"epoch": 0.23,
"learning_rate": 9.629329734592893e-06,
"loss": 0.0542,
"step": 2560
},
{
"epoch": 0.23,
"learning_rate": 9.62753036437247e-06,
"loss": 0.1951,
"step": 2570
},
{
"epoch": 0.23,
"learning_rate": 9.625730994152047e-06,
"loss": 0.0304,
"step": 2580
},
{
"epoch": 0.23,
"learning_rate": 9.623931623931626e-06,
"loss": 0.1102,
"step": 2590
},
{
"epoch": 0.23,
"learning_rate": 9.6221322537112e-06,
"loss": 0.0727,
"step": 2600
},
{
"epoch": 0.23,
"learning_rate": 9.62033288349078e-06,
"loss": 0.136,
"step": 2610
},
{
"epoch": 0.23,
"learning_rate": 9.618533513270356e-06,
"loss": 0.1733,
"step": 2620
},
{
"epoch": 0.23,
"learning_rate": 9.616734143049933e-06,
"loss": 0.043,
"step": 2630
},
{
"epoch": 0.24,
"learning_rate": 9.61493477282951e-06,
"loss": 0.1242,
"step": 2640
},
{
"epoch": 0.24,
"learning_rate": 9.613135402609087e-06,
"loss": 0.0807,
"step": 2650
},
{
"epoch": 0.24,
"learning_rate": 9.611336032388665e-06,
"loss": 0.084,
"step": 2660
},
{
"epoch": 0.24,
"learning_rate": 9.609536662168242e-06,
"loss": 0.0175,
"step": 2670
},
{
"epoch": 0.24,
"learning_rate": 9.607737291947819e-06,
"loss": 0.0686,
"step": 2680
},
{
"epoch": 0.24,
"learning_rate": 9.605937921727396e-06,
"loss": 0.0984,
"step": 2690
},
{
"epoch": 0.24,
"learning_rate": 9.604138551506973e-06,
"loss": 0.0717,
"step": 2700
},
{
"epoch": 0.24,
"learning_rate": 9.602339181286551e-06,
"loss": 0.0323,
"step": 2710
},
{
"epoch": 0.24,
"learning_rate": 9.600539811066128e-06,
"loss": 0.1174,
"step": 2720
},
{
"epoch": 0.24,
"learning_rate": 9.598740440845705e-06,
"loss": 0.2252,
"step": 2730
},
{
"epoch": 0.24,
"learning_rate": 9.596941070625282e-06,
"loss": 0.1463,
"step": 2740
},
{
"epoch": 0.25,
"learning_rate": 9.595141700404859e-06,
"loss": 0.1296,
"step": 2750
},
{
"epoch": 0.25,
"learning_rate": 9.593342330184436e-06,
"loss": 0.0498,
"step": 2760
},
{
"epoch": 0.25,
"learning_rate": 9.591542959964013e-06,
"loss": 0.0488,
"step": 2770
},
{
"epoch": 0.25,
"learning_rate": 9.589743589743591e-06,
"loss": 0.1701,
"step": 2780
},
{
"epoch": 0.25,
"learning_rate": 9.587944219523168e-06,
"loss": 0.0986,
"step": 2790
},
{
"epoch": 0.25,
"learning_rate": 9.586144849302745e-06,
"loss": 0.0605,
"step": 2800
},
{
"epoch": 0.25,
"learning_rate": 9.584345479082322e-06,
"loss": 0.0779,
"step": 2810
},
{
"epoch": 0.25,
"learning_rate": 9.582546108861898e-06,
"loss": 0.1724,
"step": 2820
},
{
"epoch": 0.25,
"learning_rate": 9.580746738641477e-06,
"loss": 0.0229,
"step": 2830
},
{
"epoch": 0.25,
"learning_rate": 9.578947368421054e-06,
"loss": 0.1895,
"step": 2840
},
{
"epoch": 0.25,
"learning_rate": 9.57714799820063e-06,
"loss": 0.1627,
"step": 2850
},
{
"epoch": 0.26,
"learning_rate": 9.575348627980208e-06,
"loss": 0.061,
"step": 2860
},
{
"epoch": 0.26,
"learning_rate": 9.573549257759784e-06,
"loss": 0.0394,
"step": 2870
},
{
"epoch": 0.26,
"learning_rate": 9.571749887539361e-06,
"loss": 0.0101,
"step": 2880
},
{
"epoch": 0.26,
"learning_rate": 9.569950517318938e-06,
"loss": 0.1223,
"step": 2890
},
{
"epoch": 0.26,
"learning_rate": 9.568151147098517e-06,
"loss": 0.0859,
"step": 2900
},
{
"epoch": 0.26,
"learning_rate": 9.566351776878094e-06,
"loss": 0.0245,
"step": 2910
},
{
"epoch": 0.26,
"learning_rate": 9.56455240665767e-06,
"loss": 0.077,
"step": 2920
},
{
"epoch": 0.26,
"learning_rate": 9.562753036437247e-06,
"loss": 0.1988,
"step": 2930
},
{
"epoch": 0.26,
"learning_rate": 9.560953666216824e-06,
"loss": 0.1803,
"step": 2940
},
{
"epoch": 0.26,
"learning_rate": 9.559154295996403e-06,
"loss": 0.0734,
"step": 2950
},
{
"epoch": 0.26,
"learning_rate": 9.55735492577598e-06,
"loss": 0.0309,
"step": 2960
},
{
"epoch": 0.26,
"learning_rate": 9.555555555555556e-06,
"loss": 0.0958,
"step": 2970
},
{
"epoch": 0.27,
"learning_rate": 9.553756185335133e-06,
"loss": 0.1951,
"step": 2980
},
{
"epoch": 0.27,
"learning_rate": 9.55195681511471e-06,
"loss": 0.0235,
"step": 2990
},
{
"epoch": 0.27,
"learning_rate": 9.550157444894289e-06,
"loss": 0.0854,
"step": 3000
},
{
"epoch": 0.27,
"eval_accuracy": 0.9742618032528028,
"eval_f1": 0.9549798375959787,
"eval_loss": 0.1135290339589119,
"eval_precision": 0.9572535991140643,
"eval_recall": 0.9527168521988317,
"eval_runtime": 437.2717,
"eval_samples_per_second": 72.415,
"eval_steps_per_second": 4.528,
"step": 3000
},
{
"epoch": 0.27,
"learning_rate": 9.548358074673864e-06,
"loss": 0.0418,
"step": 3010
},
{
"epoch": 0.27,
"learning_rate": 9.546558704453442e-06,
"loss": 0.0694,
"step": 3020
},
{
"epoch": 0.27,
"learning_rate": 9.54475933423302e-06,
"loss": 0.048,
"step": 3030
},
{
"epoch": 0.27,
"learning_rate": 9.542959964012596e-06,
"loss": 0.1657,
"step": 3040
},
{
"epoch": 0.27,
"learning_rate": 9.541160593792173e-06,
"loss": 0.0778,
"step": 3050
},
{
"epoch": 0.27,
"learning_rate": 9.53936122357175e-06,
"loss": 0.1125,
"step": 3060
},
{
"epoch": 0.27,
"learning_rate": 9.537561853351328e-06,
"loss": 0.0662,
"step": 3070
},
{
"epoch": 0.27,
"learning_rate": 9.535762483130905e-06,
"loss": 0.1191,
"step": 3080
},
{
"epoch": 0.28,
"learning_rate": 9.533963112910482e-06,
"loss": 0.171,
"step": 3090
},
{
"epoch": 0.28,
"learning_rate": 9.532163742690059e-06,
"loss": 0.0703,
"step": 3100
},
{
"epoch": 0.28,
"learning_rate": 9.530364372469636e-06,
"loss": 0.138,
"step": 3110
},
{
"epoch": 0.28,
"learning_rate": 9.528565002249214e-06,
"loss": 0.1945,
"step": 3120
},
{
"epoch": 0.28,
"learning_rate": 9.52676563202879e-06,
"loss": 0.1241,
"step": 3130
},
{
"epoch": 0.28,
"learning_rate": 9.524966261808368e-06,
"loss": 0.1185,
"step": 3140
},
{
"epoch": 0.28,
"learning_rate": 9.523166891587945e-06,
"loss": 0.0739,
"step": 3150
},
{
"epoch": 0.28,
"learning_rate": 9.521367521367522e-06,
"loss": 0.0361,
"step": 3160
},
{
"epoch": 0.28,
"learning_rate": 9.519568151147099e-06,
"loss": 0.1103,
"step": 3170
},
{
"epoch": 0.28,
"learning_rate": 9.517768780926676e-06,
"loss": 0.1141,
"step": 3180
},
{
"epoch": 0.28,
"learning_rate": 9.515969410706254e-06,
"loss": 0.0741,
"step": 3190
},
{
"epoch": 0.29,
"learning_rate": 9.514170040485831e-06,
"loss": 0.0637,
"step": 3200
},
{
"epoch": 0.29,
"learning_rate": 9.512370670265408e-06,
"loss": 0.2084,
"step": 3210
},
{
"epoch": 0.29,
"learning_rate": 9.510571300044985e-06,
"loss": 0.1303,
"step": 3220
},
{
"epoch": 0.29,
"learning_rate": 9.508771929824562e-06,
"loss": 0.1449,
"step": 3230
},
{
"epoch": 0.29,
"learning_rate": 9.50697255960414e-06,
"loss": 0.0977,
"step": 3240
},
{
"epoch": 0.29,
"learning_rate": 9.505173189383717e-06,
"loss": 0.0754,
"step": 3250
},
{
"epoch": 0.29,
"learning_rate": 9.503373819163294e-06,
"loss": 0.0237,
"step": 3260
},
{
"epoch": 0.29,
"learning_rate": 9.50157444894287e-06,
"loss": 0.1629,
"step": 3270
},
{
"epoch": 0.29,
"learning_rate": 9.499775078722448e-06,
"loss": 0.1183,
"step": 3280
},
{
"epoch": 0.29,
"learning_rate": 9.497975708502024e-06,
"loss": 0.0365,
"step": 3290
},
{
"epoch": 0.29,
"learning_rate": 9.496176338281601e-06,
"loss": 0.0068,
"step": 3300
},
{
"epoch": 0.3,
"learning_rate": 9.49437696806118e-06,
"loss": 0.2428,
"step": 3310
},
{
"epoch": 0.3,
"learning_rate": 9.492577597840757e-06,
"loss": 0.0858,
"step": 3320
},
{
"epoch": 0.3,
"learning_rate": 9.490778227620334e-06,
"loss": 0.0579,
"step": 3330
},
{
"epoch": 0.3,
"learning_rate": 9.48897885739991e-06,
"loss": 0.0911,
"step": 3340
},
{
"epoch": 0.3,
"learning_rate": 9.487179487179487e-06,
"loss": 0.0851,
"step": 3350
},
{
"epoch": 0.3,
"learning_rate": 9.485380116959066e-06,
"loss": 0.1322,
"step": 3360
},
{
"epoch": 0.3,
"learning_rate": 9.483580746738643e-06,
"loss": 0.0657,
"step": 3370
},
{
"epoch": 0.3,
"learning_rate": 9.48178137651822e-06,
"loss": 0.0299,
"step": 3380
},
{
"epoch": 0.3,
"learning_rate": 9.479982006297796e-06,
"loss": 0.2368,
"step": 3390
},
{
"epoch": 0.3,
"learning_rate": 9.478182636077373e-06,
"loss": 0.064,
"step": 3400
},
{
"epoch": 0.3,
"learning_rate": 9.47638326585695e-06,
"loss": 0.1288,
"step": 3410
},
{
"epoch": 0.3,
"learning_rate": 9.474583895636527e-06,
"loss": 0.0765,
"step": 3420
},
{
"epoch": 0.31,
"learning_rate": 9.472784525416106e-06,
"loss": 0.1403,
"step": 3430
},
{
"epoch": 0.31,
"learning_rate": 9.470985155195682e-06,
"loss": 0.0662,
"step": 3440
},
{
"epoch": 0.31,
"learning_rate": 9.46918578497526e-06,
"loss": 0.1429,
"step": 3450
},
{
"epoch": 0.31,
"learning_rate": 9.467386414754836e-06,
"loss": 0.1111,
"step": 3460
},
{
"epoch": 0.31,
"learning_rate": 9.465587044534413e-06,
"loss": 0.1807,
"step": 3470
},
{
"epoch": 0.31,
"learning_rate": 9.463787674313992e-06,
"loss": 0.1044,
"step": 3480
},
{
"epoch": 0.31,
"learning_rate": 9.461988304093568e-06,
"loss": 0.0654,
"step": 3490
},
{
"epoch": 0.31,
"learning_rate": 9.460188933873145e-06,
"loss": 0.0255,
"step": 3500
},
{
"epoch": 0.31,
"learning_rate": 9.458389563652722e-06,
"loss": 0.1917,
"step": 3510
},
{
"epoch": 0.31,
"learning_rate": 9.456590193432299e-06,
"loss": 0.02,
"step": 3520
},
{
"epoch": 0.31,
"learning_rate": 9.454790823211878e-06,
"loss": 0.0694,
"step": 3530
},
{
"epoch": 0.32,
"learning_rate": 9.452991452991453e-06,
"loss": 0.1769,
"step": 3540
},
{
"epoch": 0.32,
"learning_rate": 9.451192082771031e-06,
"loss": 0.0723,
"step": 3550
},
{
"epoch": 0.32,
"learning_rate": 9.449392712550608e-06,
"loss": 0.1041,
"step": 3560
},
{
"epoch": 0.32,
"learning_rate": 9.447593342330185e-06,
"loss": 0.0711,
"step": 3570
},
{
"epoch": 0.32,
"learning_rate": 9.445793972109762e-06,
"loss": 0.0932,
"step": 3580
},
{
"epoch": 0.32,
"learning_rate": 9.443994601889339e-06,
"loss": 0.1224,
"step": 3590
},
{
"epoch": 0.32,
"learning_rate": 9.442195231668917e-06,
"loss": 0.0452,
"step": 3600
},
{
"epoch": 0.32,
"learning_rate": 9.440395861448494e-06,
"loss": 0.0763,
"step": 3610
},
{
"epoch": 0.32,
"learning_rate": 9.438596491228071e-06,
"loss": 0.0422,
"step": 3620
},
{
"epoch": 0.32,
"learning_rate": 9.436797121007648e-06,
"loss": 0.0184,
"step": 3630
},
{
"epoch": 0.32,
"learning_rate": 9.434997750787225e-06,
"loss": 0.0059,
"step": 3640
},
{
"epoch": 0.33,
"learning_rate": 9.433198380566803e-06,
"loss": 0.1277,
"step": 3650
},
{
"epoch": 0.33,
"learning_rate": 9.431399010346378e-06,
"loss": 0.0845,
"step": 3660
},
{
"epoch": 0.33,
"learning_rate": 9.429599640125957e-06,
"loss": 0.1707,
"step": 3670
},
{
"epoch": 0.33,
"learning_rate": 9.427800269905534e-06,
"loss": 0.1214,
"step": 3680
},
{
"epoch": 0.33,
"learning_rate": 9.42600089968511e-06,
"loss": 0.0493,
"step": 3690
},
{
"epoch": 0.33,
"learning_rate": 9.424201529464688e-06,
"loss": 0.1305,
"step": 3700
},
{
"epoch": 0.33,
"learning_rate": 9.422402159244264e-06,
"loss": 0.0809,
"step": 3710
},
{
"epoch": 0.33,
"learning_rate": 9.420602789023843e-06,
"loss": 0.0707,
"step": 3720
},
{
"epoch": 0.33,
"learning_rate": 9.41880341880342e-06,
"loss": 0.1068,
"step": 3730
},
{
"epoch": 0.33,
"learning_rate": 9.417004048582997e-06,
"loss": 0.0152,
"step": 3740
},
{
"epoch": 0.33,
"learning_rate": 9.415204678362574e-06,
"loss": 0.0773,
"step": 3750
},
{
"epoch": 0.34,
"learning_rate": 9.41340530814215e-06,
"loss": 0.072,
"step": 3760
},
{
"epoch": 0.34,
"learning_rate": 9.411605937921729e-06,
"loss": 0.1304,
"step": 3770
},
{
"epoch": 0.34,
"learning_rate": 9.409806567701306e-06,
"loss": 0.0825,
"step": 3780
},
{
"epoch": 0.34,
"learning_rate": 9.408007197480883e-06,
"loss": 0.1158,
"step": 3790
},
{
"epoch": 0.34,
"learning_rate": 9.40620782726046e-06,
"loss": 0.2465,
"step": 3800
},
{
"epoch": 0.34,
"learning_rate": 9.404408457040036e-06,
"loss": 0.2057,
"step": 3810
},
{
"epoch": 0.34,
"learning_rate": 9.402609086819613e-06,
"loss": 0.1457,
"step": 3820
},
{
"epoch": 0.34,
"learning_rate": 9.40080971659919e-06,
"loss": 0.0986,
"step": 3830
},
{
"epoch": 0.34,
"learning_rate": 9.399010346378769e-06,
"loss": 0.1613,
"step": 3840
},
{
"epoch": 0.34,
"learning_rate": 9.397210976158346e-06,
"loss": 0.0951,
"step": 3850
},
{
"epoch": 0.34,
"learning_rate": 9.395411605937922e-06,
"loss": 0.094,
"step": 3860
},
{
"epoch": 0.35,
"learning_rate": 9.3936122357175e-06,
"loss": 0.0467,
"step": 3870
},
{
"epoch": 0.35,
"learning_rate": 9.391812865497076e-06,
"loss": 0.1046,
"step": 3880
},
{
"epoch": 0.35,
"learning_rate": 9.390013495276655e-06,
"loss": 0.0671,
"step": 3890
},
{
"epoch": 0.35,
"learning_rate": 9.388214125056232e-06,
"loss": 0.0632,
"step": 3900
},
{
"epoch": 0.35,
"learning_rate": 9.386414754835808e-06,
"loss": 0.1289,
"step": 3910
},
{
"epoch": 0.35,
"learning_rate": 9.384615384615385e-06,
"loss": 0.0962,
"step": 3920
},
{
"epoch": 0.35,
"learning_rate": 9.382816014394962e-06,
"loss": 0.1108,
"step": 3930
},
{
"epoch": 0.35,
"learning_rate": 9.381016644174539e-06,
"loss": 0.1097,
"step": 3940
},
{
"epoch": 0.35,
"learning_rate": 9.379217273954116e-06,
"loss": 0.0337,
"step": 3950
},
{
"epoch": 0.35,
"learning_rate": 9.377417903733694e-06,
"loss": 0.0564,
"step": 3960
},
{
"epoch": 0.35,
"learning_rate": 9.375618533513271e-06,
"loss": 0.086,
"step": 3970
},
{
"epoch": 0.35,
"learning_rate": 9.373819163292848e-06,
"loss": 0.0716,
"step": 3980
},
{
"epoch": 0.36,
"learning_rate": 9.372019793072425e-06,
"loss": 0.0274,
"step": 3990
},
{
"epoch": 0.36,
"learning_rate": 9.370220422852002e-06,
"loss": 0.0945,
"step": 4000
},
{
"epoch": 0.36,
"learning_rate": 9.36842105263158e-06,
"loss": 0.2289,
"step": 4010
},
{
"epoch": 0.36,
"learning_rate": 9.366621682411157e-06,
"loss": 0.1001,
"step": 4020
},
{
"epoch": 0.36,
"learning_rate": 9.364822312190734e-06,
"loss": 0.127,
"step": 4030
},
{
"epoch": 0.36,
"learning_rate": 9.363022941970311e-06,
"loss": 0.0575,
"step": 4040
},
{
"epoch": 0.36,
"learning_rate": 9.361223571749888e-06,
"loss": 0.14,
"step": 4050
},
{
"epoch": 0.36,
"learning_rate": 9.359424201529466e-06,
"loss": 0.1394,
"step": 4060
},
{
"epoch": 0.36,
"learning_rate": 9.357624831309042e-06,
"loss": 0.0982,
"step": 4070
},
{
"epoch": 0.36,
"learning_rate": 9.35582546108862e-06,
"loss": 0.137,
"step": 4080
},
{
"epoch": 0.36,
"learning_rate": 9.354026090868197e-06,
"loss": 0.0678,
"step": 4090
},
{
"epoch": 0.37,
"learning_rate": 9.352226720647774e-06,
"loss": 0.1493,
"step": 4100
},
{
"epoch": 0.37,
"learning_rate": 9.35042735042735e-06,
"loss": 0.0452,
"step": 4110
},
{
"epoch": 0.37,
"learning_rate": 9.348627980206928e-06,
"loss": 0.0618,
"step": 4120
},
{
"epoch": 0.37,
"learning_rate": 9.346828609986506e-06,
"loss": 0.0688,
"step": 4130
},
{
"epoch": 0.37,
"learning_rate": 9.345029239766083e-06,
"loss": 0.0446,
"step": 4140
},
{
"epoch": 0.37,
"learning_rate": 9.34322986954566e-06,
"loss": 0.1038,
"step": 4150
},
{
"epoch": 0.37,
"learning_rate": 9.341430499325237e-06,
"loss": 0.0517,
"step": 4160
},
{
"epoch": 0.37,
"learning_rate": 9.339631129104814e-06,
"loss": 0.1752,
"step": 4170
},
{
"epoch": 0.37,
"learning_rate": 9.337831758884392e-06,
"loss": 0.0554,
"step": 4180
},
{
"epoch": 0.37,
"learning_rate": 9.336032388663967e-06,
"loss": 0.0795,
"step": 4190
},
{
"epoch": 0.37,
"learning_rate": 9.334233018443546e-06,
"loss": 0.0729,
"step": 4200
},
{
"epoch": 0.38,
"learning_rate": 9.332433648223123e-06,
"loss": 0.1994,
"step": 4210
},
{
"epoch": 0.38,
"learning_rate": 9.3306342780027e-06,
"loss": 0.1256,
"step": 4220
},
{
"epoch": 0.38,
"learning_rate": 9.328834907782276e-06,
"loss": 0.0416,
"step": 4230
},
{
"epoch": 0.38,
"learning_rate": 9.327035537561853e-06,
"loss": 0.0455,
"step": 4240
},
{
"epoch": 0.38,
"learning_rate": 9.325236167341432e-06,
"loss": 0.1276,
"step": 4250
},
{
"epoch": 0.38,
"learning_rate": 9.323436797121009e-06,
"loss": 0.1538,
"step": 4260
},
{
"epoch": 0.38,
"learning_rate": 9.321637426900586e-06,
"loss": 0.072,
"step": 4270
},
{
"epoch": 0.38,
"learning_rate": 9.319838056680162e-06,
"loss": 0.1352,
"step": 4280
},
{
"epoch": 0.38,
"learning_rate": 9.31803868645974e-06,
"loss": 0.0613,
"step": 4290
},
{
"epoch": 0.38,
"learning_rate": 9.316239316239318e-06,
"loss": 0.1611,
"step": 4300
},
{
"epoch": 0.38,
"learning_rate": 9.314439946018893e-06,
"loss": 0.0851,
"step": 4310
},
{
"epoch": 0.39,
"learning_rate": 9.312640575798472e-06,
"loss": 0.1095,
"step": 4320
},
{
"epoch": 0.39,
"learning_rate": 9.310841205578048e-06,
"loss": 0.1378,
"step": 4330
},
{
"epoch": 0.39,
"learning_rate": 9.309041835357625e-06,
"loss": 0.0981,
"step": 4340
},
{
"epoch": 0.39,
"learning_rate": 9.307242465137202e-06,
"loss": 0.0396,
"step": 4350
},
{
"epoch": 0.39,
"learning_rate": 9.305443094916779e-06,
"loss": 0.1012,
"step": 4360
},
{
"epoch": 0.39,
"learning_rate": 9.303643724696358e-06,
"loss": 0.1195,
"step": 4370
},
{
"epoch": 0.39,
"learning_rate": 9.301844354475934e-06,
"loss": 0.0449,
"step": 4380
},
{
"epoch": 0.39,
"learning_rate": 9.300044984255511e-06,
"loss": 0.0778,
"step": 4390
},
{
"epoch": 0.39,
"learning_rate": 9.298245614035088e-06,
"loss": 0.0422,
"step": 4400
},
{
"epoch": 0.39,
"learning_rate": 9.296446243814665e-06,
"loss": 0.1016,
"step": 4410
},
{
"epoch": 0.39,
"learning_rate": 9.294646873594244e-06,
"loss": 0.1021,
"step": 4420
},
{
"epoch": 0.4,
"learning_rate": 9.29284750337382e-06,
"loss": 0.1569,
"step": 4430
},
{
"epoch": 0.4,
"learning_rate": 9.291048133153397e-06,
"loss": 0.1313,
"step": 4440
},
{
"epoch": 0.4,
"learning_rate": 9.289248762932974e-06,
"loss": 0.0682,
"step": 4450
},
{
"epoch": 0.4,
"learning_rate": 9.287449392712551e-06,
"loss": 0.1345,
"step": 4460
},
{
"epoch": 0.4,
"learning_rate": 9.285650022492128e-06,
"loss": 0.0751,
"step": 4470
},
{
"epoch": 0.4,
"learning_rate": 9.283850652271705e-06,
"loss": 0.1235,
"step": 4480
},
{
"epoch": 0.4,
"learning_rate": 9.282051282051283e-06,
"loss": 0.1123,
"step": 4490
},
{
"epoch": 0.4,
"learning_rate": 9.28025191183086e-06,
"loss": 0.0496,
"step": 4500
},
{
"epoch": 0.4,
"eval_accuracy": 0.9711984841307437,
"eval_f1": 0.9508832399827659,
"eval_loss": 0.08512861281633377,
"eval_precision": 0.9297525013164823,
"eval_recall": 0.9729968037032954,
"eval_runtime": 436.7164,
"eval_samples_per_second": 72.507,
"eval_steps_per_second": 4.534,
"step": 4500
},
{
"epoch": 0.4,
"learning_rate": 9.278452541610437e-06,
"loss": 0.1278,
"step": 4510
},
{
"epoch": 0.4,
"learning_rate": 9.276653171390014e-06,
"loss": 0.0659,
"step": 4520
},
{
"epoch": 0.4,
"learning_rate": 9.27485380116959e-06,
"loss": 0.1262,
"step": 4530
},
{
"epoch": 0.4,
"learning_rate": 9.27305443094917e-06,
"loss": 0.0633,
"step": 4540
},
{
"epoch": 0.41,
"learning_rate": 9.271255060728746e-06,
"loss": 0.1094,
"step": 4550
},
{
"epoch": 0.41,
"learning_rate": 9.269455690508323e-06,
"loss": 0.1054,
"step": 4560
},
{
"epoch": 0.41,
"learning_rate": 9.2676563202879e-06,
"loss": 0.0375,
"step": 4570
},
{
"epoch": 0.41,
"learning_rate": 9.265856950067477e-06,
"loss": 0.0434,
"step": 4580
},
{
"epoch": 0.41,
"learning_rate": 9.264057579847054e-06,
"loss": 0.0875,
"step": 4590
},
{
"epoch": 0.41,
"learning_rate": 9.26225820962663e-06,
"loss": 0.0963,
"step": 4600
},
{
"epoch": 0.41,
"learning_rate": 9.260458839406209e-06,
"loss": 0.1243,
"step": 4610
},
{
"epoch": 0.41,
"learning_rate": 9.258659469185786e-06,
"loss": 0.1053,
"step": 4620
},
{
"epoch": 0.41,
"learning_rate": 9.256860098965363e-06,
"loss": 0.0566,
"step": 4630
},
{
"epoch": 0.41,
"learning_rate": 9.25506072874494e-06,
"loss": 0.0947,
"step": 4640
},
{
"epoch": 0.41,
"learning_rate": 9.253261358524516e-06,
"loss": 0.1326,
"step": 4650
},
{
"epoch": 0.42,
"learning_rate": 9.251461988304095e-06,
"loss": 0.1622,
"step": 4660
},
{
"epoch": 0.42,
"learning_rate": 9.249662618083672e-06,
"loss": 0.0438,
"step": 4670
},
{
"epoch": 0.42,
"learning_rate": 9.247863247863249e-06,
"loss": 0.0346,
"step": 4680
},
{
"epoch": 0.42,
"learning_rate": 9.246063877642826e-06,
"loss": 0.0647,
"step": 4690
},
{
"epoch": 0.42,
"learning_rate": 9.244264507422402e-06,
"loss": 0.2105,
"step": 4700
},
{
"epoch": 0.42,
"learning_rate": 9.242465137201981e-06,
"loss": 0.0517,
"step": 4710
},
{
"epoch": 0.42,
"learning_rate": 9.240665766981556e-06,
"loss": 0.0369,
"step": 4720
},
{
"epoch": 0.42,
"learning_rate": 9.238866396761135e-06,
"loss": 0.0792,
"step": 4730
},
{
"epoch": 0.42,
"learning_rate": 9.237067026540712e-06,
"loss": 0.1478,
"step": 4740
},
{
"epoch": 0.42,
"learning_rate": 9.235267656320288e-06,
"loss": 0.0625,
"step": 4750
},
{
"epoch": 0.42,
"learning_rate": 9.233468286099865e-06,
"loss": 0.1502,
"step": 4760
},
{
"epoch": 0.43,
"learning_rate": 9.231668915879442e-06,
"loss": 0.1159,
"step": 4770
},
{
"epoch": 0.43,
"learning_rate": 9.22986954565902e-06,
"loss": 0.0383,
"step": 4780
},
{
"epoch": 0.43,
"learning_rate": 9.228070175438598e-06,
"loss": 0.0419,
"step": 4790
},
{
"epoch": 0.43,
"learning_rate": 9.226270805218174e-06,
"loss": 0.0887,
"step": 4800
},
{
"epoch": 0.43,
"learning_rate": 9.224471434997751e-06,
"loss": 0.0085,
"step": 4810
},
{
"epoch": 0.43,
"learning_rate": 9.222672064777328e-06,
"loss": 0.1119,
"step": 4820
},
{
"epoch": 0.43,
"learning_rate": 9.220872694556907e-06,
"loss": 0.0901,
"step": 4830
},
{
"epoch": 0.43,
"learning_rate": 9.219073324336482e-06,
"loss": 0.1066,
"step": 4840
},
{
"epoch": 0.43,
"learning_rate": 9.21727395411606e-06,
"loss": 0.096,
"step": 4850
},
{
"epoch": 0.43,
"learning_rate": 9.215474583895637e-06,
"loss": 0.1194,
"step": 4860
},
{
"epoch": 0.43,
"learning_rate": 9.213675213675214e-06,
"loss": 0.0887,
"step": 4870
},
{
"epoch": 0.44,
"learning_rate": 9.211875843454791e-06,
"loss": 0.0658,
"step": 4880
},
{
"epoch": 0.44,
"learning_rate": 9.210076473234368e-06,
"loss": 0.1369,
"step": 4890
},
{
"epoch": 0.44,
"learning_rate": 9.208277103013946e-06,
"loss": 0.1153,
"step": 4900
},
{
"epoch": 0.44,
"learning_rate": 9.206477732793523e-06,
"loss": 0.0603,
"step": 4910
},
{
"epoch": 0.44,
"learning_rate": 9.2046783625731e-06,
"loss": 0.1061,
"step": 4920
},
{
"epoch": 0.44,
"learning_rate": 9.202878992352677e-06,
"loss": 0.0689,
"step": 4930
},
{
"epoch": 0.44,
"learning_rate": 9.201079622132254e-06,
"loss": 0.1351,
"step": 4940
},
{
"epoch": 0.44,
"learning_rate": 9.199280251911832e-06,
"loss": 0.1209,
"step": 4950
},
{
"epoch": 0.44,
"learning_rate": 9.19748088169141e-06,
"loss": 0.0371,
"step": 4960
},
{
"epoch": 0.44,
"learning_rate": 9.195681511470986e-06,
"loss": 0.0833,
"step": 4970
},
{
"epoch": 0.44,
"learning_rate": 9.193882141250563e-06,
"loss": 0.1354,
"step": 4980
},
{
"epoch": 0.44,
"learning_rate": 9.19208277103014e-06,
"loss": 0.2174,
"step": 4990
},
{
"epoch": 0.45,
"learning_rate": 9.190283400809717e-06,
"loss": 0.1419,
"step": 5000
},
{
"epoch": 0.45,
"learning_rate": 9.188484030589294e-06,
"loss": 0.1198,
"step": 5010
},
{
"epoch": 0.45,
"learning_rate": 9.186684660368872e-06,
"loss": 0.1137,
"step": 5020
},
{
"epoch": 0.45,
"learning_rate": 9.184885290148449e-06,
"loss": 0.094,
"step": 5030
},
{
"epoch": 0.45,
"learning_rate": 9.183085919928026e-06,
"loss": 0.0805,
"step": 5040
},
{
"epoch": 0.45,
"learning_rate": 9.181286549707603e-06,
"loss": 0.0516,
"step": 5050
},
{
"epoch": 0.45,
"learning_rate": 9.17948717948718e-06,
"loss": 0.0696,
"step": 5060
},
{
"epoch": 0.45,
"learning_rate": 9.177687809266758e-06,
"loss": 0.0769,
"step": 5070
},
{
"epoch": 0.45,
"learning_rate": 9.175888439046335e-06,
"loss": 0.0959,
"step": 5080
},
{
"epoch": 0.45,
"learning_rate": 9.174089068825912e-06,
"loss": 0.0542,
"step": 5090
},
{
"epoch": 0.45,
"learning_rate": 9.172289698605489e-06,
"loss": 0.0712,
"step": 5100
},
{
"epoch": 0.46,
"learning_rate": 9.170490328385066e-06,
"loss": 0.0537,
"step": 5110
},
{
"epoch": 0.46,
"learning_rate": 9.168690958164642e-06,
"loss": 0.1163,
"step": 5120
},
{
"epoch": 0.46,
"learning_rate": 9.16689158794422e-06,
"loss": 0.0765,
"step": 5130
},
{
"epoch": 0.46,
"learning_rate": 9.165092217723798e-06,
"loss": 0.2003,
"step": 5140
},
{
"epoch": 0.46,
"learning_rate": 9.163292847503375e-06,
"loss": 0.0108,
"step": 5150
},
{
"epoch": 0.46,
"learning_rate": 9.161493477282952e-06,
"loss": 0.1257,
"step": 5160
},
{
"epoch": 0.46,
"learning_rate": 9.159694107062528e-06,
"loss": 0.1449,
"step": 5170
},
{
"epoch": 0.46,
"learning_rate": 9.157894736842105e-06,
"loss": 0.053,
"step": 5180
},
{
"epoch": 0.46,
"learning_rate": 9.156095366621684e-06,
"loss": 0.0475,
"step": 5190
},
{
"epoch": 0.46,
"learning_rate": 9.15429599640126e-06,
"loss": 0.029,
"step": 5200
},
{
"epoch": 0.46,
"learning_rate": 9.152496626180838e-06,
"loss": 0.0036,
"step": 5210
},
{
"epoch": 0.47,
"learning_rate": 9.150697255960414e-06,
"loss": 0.0076,
"step": 5220
},
{
"epoch": 0.47,
"learning_rate": 9.148897885739991e-06,
"loss": 0.0655,
"step": 5230
},
{
"epoch": 0.47,
"learning_rate": 9.14709851551957e-06,
"loss": 0.0534,
"step": 5240
},
{
"epoch": 0.47,
"learning_rate": 9.145299145299145e-06,
"loss": 0.0378,
"step": 5250
},
{
"epoch": 0.47,
"learning_rate": 9.143499775078724e-06,
"loss": 0.1112,
"step": 5260
},
{
"epoch": 0.47,
"learning_rate": 9.1417004048583e-06,
"loss": 0.1258,
"step": 5270
},
{
"epoch": 0.47,
"learning_rate": 9.139901034637877e-06,
"loss": 0.1018,
"step": 5280
},
{
"epoch": 0.47,
"learning_rate": 9.138101664417454e-06,
"loss": 0.1015,
"step": 5290
},
{
"epoch": 0.47,
"learning_rate": 9.136302294197031e-06,
"loss": 0.2089,
"step": 5300
},
{
"epoch": 0.47,
"learning_rate": 9.13450292397661e-06,
"loss": 0.0352,
"step": 5310
},
{
"epoch": 0.47,
"learning_rate": 9.132703553756186e-06,
"loss": 0.1233,
"step": 5320
},
{
"epoch": 0.48,
"learning_rate": 9.130904183535763e-06,
"loss": 0.1092,
"step": 5330
},
{
"epoch": 0.48,
"learning_rate": 9.12910481331534e-06,
"loss": 0.0103,
"step": 5340
},
{
"epoch": 0.48,
"learning_rate": 9.127305443094917e-06,
"loss": 0.0304,
"step": 5350
},
{
"epoch": 0.48,
"learning_rate": 9.125506072874496e-06,
"loss": 0.1365,
"step": 5360
},
{
"epoch": 0.48,
"learning_rate": 9.12370670265407e-06,
"loss": 0.1332,
"step": 5370
},
{
"epoch": 0.48,
"learning_rate": 9.12190733243365e-06,
"loss": 0.0738,
"step": 5380
},
{
"epoch": 0.48,
"learning_rate": 9.120107962213226e-06,
"loss": 0.1559,
"step": 5390
},
{
"epoch": 0.48,
"learning_rate": 9.118308591992803e-06,
"loss": 0.139,
"step": 5400
},
{
"epoch": 0.48,
"learning_rate": 9.11650922177238e-06,
"loss": 0.1414,
"step": 5410
},
{
"epoch": 0.48,
"learning_rate": 9.114709851551957e-06,
"loss": 0.1316,
"step": 5420
},
{
"epoch": 0.48,
"learning_rate": 9.112910481331535e-06,
"loss": 0.1166,
"step": 5430
},
{
"epoch": 0.49,
"learning_rate": 9.111111111111112e-06,
"loss": 0.0909,
"step": 5440
},
{
"epoch": 0.49,
"learning_rate": 9.109311740890689e-06,
"loss": 0.1103,
"step": 5450
},
{
"epoch": 0.49,
"learning_rate": 9.107512370670266e-06,
"loss": 0.1618,
"step": 5460
},
{
"epoch": 0.49,
"learning_rate": 9.105713000449843e-06,
"loss": 0.0721,
"step": 5470
},
{
"epoch": 0.49,
"learning_rate": 9.103913630229421e-06,
"loss": 0.1584,
"step": 5480
},
{
"epoch": 0.49,
"learning_rate": 9.102114260008998e-06,
"loss": 0.1035,
"step": 5490
},
{
"epoch": 0.49,
"learning_rate": 9.100314889788575e-06,
"loss": 0.028,
"step": 5500
},
{
"epoch": 0.49,
"learning_rate": 9.098515519568152e-06,
"loss": 0.1161,
"step": 5510
},
{
"epoch": 0.49,
"learning_rate": 9.096716149347729e-06,
"loss": 0.1424,
"step": 5520
},
{
"epoch": 0.49,
"learning_rate": 9.094916779127306e-06,
"loss": 0.1636,
"step": 5530
},
{
"epoch": 0.49,
"learning_rate": 9.093117408906882e-06,
"loss": 0.1167,
"step": 5540
},
{
"epoch": 0.49,
"learning_rate": 9.091318038686461e-06,
"loss": 0.0831,
"step": 5550
},
{
"epoch": 0.5,
"learning_rate": 9.089518668466038e-06,
"loss": 0.1418,
"step": 5560
},
{
"epoch": 0.5,
"learning_rate": 9.087719298245615e-06,
"loss": 0.1449,
"step": 5570
},
{
"epoch": 0.5,
"learning_rate": 9.085919928025192e-06,
"loss": 0.1729,
"step": 5580
},
{
"epoch": 0.5,
"learning_rate": 9.084120557804768e-06,
"loss": 0.125,
"step": 5590
},
{
"epoch": 0.5,
"learning_rate": 9.082321187584347e-06,
"loss": 0.0741,
"step": 5600
},
{
"epoch": 0.5,
"learning_rate": 9.080521817363924e-06,
"loss": 0.0811,
"step": 5610
},
{
"epoch": 0.5,
"learning_rate": 9.0787224471435e-06,
"loss": 0.1593,
"step": 5620
},
{
"epoch": 0.5,
"learning_rate": 9.076923076923078e-06,
"loss": 0.1943,
"step": 5630
},
{
"epoch": 0.5,
"learning_rate": 9.075123706702654e-06,
"loss": 0.0983,
"step": 5640
},
{
"epoch": 0.5,
"learning_rate": 9.073324336482231e-06,
"loss": 0.1401,
"step": 5650
},
{
"epoch": 0.5,
"learning_rate": 9.071524966261808e-06,
"loss": 0.1739,
"step": 5660
},
{
"epoch": 0.51,
"learning_rate": 9.069725596041387e-06,
"loss": 0.1541,
"step": 5670
},
{
"epoch": 0.51,
"learning_rate": 9.067926225820964e-06,
"loss": 0.1699,
"step": 5680
},
{
"epoch": 0.51,
"learning_rate": 9.06612685560054e-06,
"loss": 0.0351,
"step": 5690
},
{
"epoch": 0.51,
"learning_rate": 9.064327485380117e-06,
"loss": 0.0941,
"step": 5700
},
{
"epoch": 0.51,
"learning_rate": 9.062528115159694e-06,
"loss": 0.1932,
"step": 5710
},
{
"epoch": 0.51,
"learning_rate": 9.060728744939273e-06,
"loss": 0.0819,
"step": 5720
},
{
"epoch": 0.51,
"learning_rate": 9.05892937471885e-06,
"loss": 0.2269,
"step": 5730
},
{
"epoch": 0.51,
"learning_rate": 9.057130004498426e-06,
"loss": 0.0967,
"step": 5740
},
{
"epoch": 0.51,
"learning_rate": 9.055330634278003e-06,
"loss": 0.0594,
"step": 5750
},
{
"epoch": 0.51,
"learning_rate": 9.05353126405758e-06,
"loss": 0.2919,
"step": 5760
},
{
"epoch": 0.51,
"learning_rate": 9.051731893837159e-06,
"loss": 0.0618,
"step": 5770
},
{
"epoch": 0.52,
"learning_rate": 9.049932523616734e-06,
"loss": 0.1166,
"step": 5780
},
{
"epoch": 0.52,
"learning_rate": 9.048133153396312e-06,
"loss": 0.124,
"step": 5790
},
{
"epoch": 0.52,
"learning_rate": 9.04633378317589e-06,
"loss": 0.1845,
"step": 5800
},
{
"epoch": 0.52,
"learning_rate": 9.044534412955466e-06,
"loss": 0.1357,
"step": 5810
},
{
"epoch": 0.52,
"learning_rate": 9.042735042735043e-06,
"loss": 0.1428,
"step": 5820
},
{
"epoch": 0.52,
"learning_rate": 9.04093567251462e-06,
"loss": 0.1932,
"step": 5830
},
{
"epoch": 0.52,
"learning_rate": 9.039136302294198e-06,
"loss": 0.0585,
"step": 5840
},
{
"epoch": 0.52,
"learning_rate": 9.037336932073775e-06,
"loss": 0.0983,
"step": 5850
},
{
"epoch": 0.52,
"learning_rate": 9.035537561853352e-06,
"loss": 0.0764,
"step": 5860
},
{
"epoch": 0.52,
"learning_rate": 9.033738191632929e-06,
"loss": 0.1484,
"step": 5870
},
{
"epoch": 0.52,
"learning_rate": 9.031938821412506e-06,
"loss": 0.1209,
"step": 5880
},
{
"epoch": 0.53,
"learning_rate": 9.030139451192084e-06,
"loss": 0.0421,
"step": 5890
},
{
"epoch": 0.53,
"learning_rate": 9.02834008097166e-06,
"loss": 0.0788,
"step": 5900
},
{
"epoch": 0.53,
"learning_rate": 9.026540710751238e-06,
"loss": 0.0637,
"step": 5910
},
{
"epoch": 0.53,
"learning_rate": 9.024741340530815e-06,
"loss": 0.0962,
"step": 5920
},
{
"epoch": 0.53,
"learning_rate": 9.022941970310392e-06,
"loss": 0.0681,
"step": 5930
},
{
"epoch": 0.53,
"learning_rate": 9.021142600089969e-06,
"loss": 0.1625,
"step": 5940
},
{
"epoch": 0.53,
"learning_rate": 9.019343229869546e-06,
"loss": 0.1218,
"step": 5950
},
{
"epoch": 0.53,
"learning_rate": 9.017543859649124e-06,
"loss": 0.0072,
"step": 5960
},
{
"epoch": 0.53,
"learning_rate": 9.015744489428701e-06,
"loss": 0.1611,
"step": 5970
},
{
"epoch": 0.53,
"learning_rate": 9.013945119208278e-06,
"loss": 0.1108,
"step": 5980
},
{
"epoch": 0.53,
"learning_rate": 9.012145748987855e-06,
"loss": 0.1706,
"step": 5990
},
{
"epoch": 0.53,
"learning_rate": 9.010346378767432e-06,
"loss": 0.0967,
"step": 6000
},
{
"epoch": 0.53,
"eval_accuracy": 0.9735354492341702,
"eval_f1": 0.954082191780822,
"eval_loss": 0.09180905669927597,
"eval_precision": 0.9486760379208892,
"eval_recall": 0.9595503141188141,
"eval_runtime": 436.4287,
"eval_samples_per_second": 72.555,
"eval_steps_per_second": 4.537,
"step": 6000
},
{
"epoch": 0.54,
"learning_rate": 9.00854700854701e-06,
"loss": 0.0714,
"step": 6010
},
{
"epoch": 0.54,
"learning_rate": 9.006747638326587e-06,
"loss": 0.1627,
"step": 6020
},
{
"epoch": 0.54,
"learning_rate": 9.004948268106164e-06,
"loss": 0.0399,
"step": 6030
},
{
"epoch": 0.54,
"learning_rate": 9.00314889788574e-06,
"loss": 0.0568,
"step": 6040
},
{
"epoch": 0.54,
"learning_rate": 9.001349527665318e-06,
"loss": 0.1471,
"step": 6050
},
{
"epoch": 0.54,
"learning_rate": 8.999550157444894e-06,
"loss": 0.0979,
"step": 6060
},
{
"epoch": 0.54,
"learning_rate": 8.997750787224471e-06,
"loss": 0.0177,
"step": 6070
},
{
"epoch": 0.54,
"learning_rate": 8.99595141700405e-06,
"loss": 0.1574,
"step": 6080
},
{
"epoch": 0.54,
"learning_rate": 8.994152046783627e-06,
"loss": 0.1683,
"step": 6090
},
{
"epoch": 0.54,
"learning_rate": 8.992352676563204e-06,
"loss": 0.1035,
"step": 6100
},
{
"epoch": 0.54,
"learning_rate": 8.99055330634278e-06,
"loss": 0.1381,
"step": 6110
},
{
"epoch": 0.55,
"learning_rate": 8.988753936122357e-06,
"loss": 0.1026,
"step": 6120
},
{
"epoch": 0.55,
"learning_rate": 8.986954565901936e-06,
"loss": 0.0454,
"step": 6130
},
{
"epoch": 0.55,
"learning_rate": 8.985155195681513e-06,
"loss": 0.0653,
"step": 6140
},
{
"epoch": 0.55,
"learning_rate": 8.98335582546109e-06,
"loss": 0.0681,
"step": 6150
},
{
"epoch": 0.55,
"learning_rate": 8.981556455240666e-06,
"loss": 0.1286,
"step": 6160
},
{
"epoch": 0.55,
"learning_rate": 8.979757085020243e-06,
"loss": 0.0823,
"step": 6170
},
{
"epoch": 0.55,
"learning_rate": 8.97795771479982e-06,
"loss": 0.0489,
"step": 6180
},
{
"epoch": 0.55,
"learning_rate": 8.976158344579397e-06,
"loss": 0.0605,
"step": 6190
},
{
"epoch": 0.55,
"learning_rate": 8.974358974358976e-06,
"loss": 0.054,
"step": 6200
},
{
"epoch": 0.55,
"learning_rate": 8.972559604138552e-06,
"loss": 0.1616,
"step": 6210
},
{
"epoch": 0.55,
"learning_rate": 8.97076023391813e-06,
"loss": 0.0963,
"step": 6220
},
{
"epoch": 0.56,
"learning_rate": 8.968960863697706e-06,
"loss": 0.1245,
"step": 6230
},
{
"epoch": 0.56,
"learning_rate": 8.967161493477283e-06,
"loss": 0.092,
"step": 6240
},
{
"epoch": 0.56,
"learning_rate": 8.965362123256862e-06,
"loss": 0.0728,
"step": 6250
},
{
"epoch": 0.56,
"learning_rate": 8.963562753036438e-06,
"loss": 0.1138,
"step": 6260
},
{
"epoch": 0.56,
"learning_rate": 8.961763382816015e-06,
"loss": 0.1143,
"step": 6270
},
{
"epoch": 0.56,
"learning_rate": 8.959964012595592e-06,
"loss": 0.0954,
"step": 6280
},
{
"epoch": 0.56,
"learning_rate": 8.958164642375169e-06,
"loss": 0.0606,
"step": 6290
},
{
"epoch": 0.56,
"learning_rate": 8.956365272154746e-06,
"loss": 0.0667,
"step": 6300
},
{
"epoch": 0.56,
"learning_rate": 8.954565901934323e-06,
"loss": 0.0505,
"step": 6310
},
{
"epoch": 0.56,
"learning_rate": 8.952766531713901e-06,
"loss": 0.1699,
"step": 6320
},
{
"epoch": 0.56,
"learning_rate": 8.950967161493478e-06,
"loss": 0.0568,
"step": 6330
},
{
"epoch": 0.57,
"learning_rate": 8.949167791273055e-06,
"loss": 0.1413,
"step": 6340
},
{
"epoch": 0.57,
"learning_rate": 8.947368421052632e-06,
"loss": 0.0903,
"step": 6350
},
{
"epoch": 0.57,
"learning_rate": 8.945569050832209e-06,
"loss": 0.1166,
"step": 6360
},
{
"epoch": 0.57,
"learning_rate": 8.943769680611787e-06,
"loss": 0.0781,
"step": 6370
},
{
"epoch": 0.57,
"learning_rate": 8.941970310391364e-06,
"loss": 0.0385,
"step": 6380
},
{
"epoch": 0.57,
"learning_rate": 8.940170940170941e-06,
"loss": 0.0508,
"step": 6390
},
{
"epoch": 0.57,
"learning_rate": 8.938371569950518e-06,
"loss": 0.0893,
"step": 6400
},
{
"epoch": 0.57,
"learning_rate": 8.936572199730095e-06,
"loss": 0.1399,
"step": 6410
},
{
"epoch": 0.57,
"learning_rate": 8.934772829509673e-06,
"loss": 0.0252,
"step": 6420
},
{
"epoch": 0.57,
"learning_rate": 8.932973459289248e-06,
"loss": 0.0843,
"step": 6430
},
{
"epoch": 0.57,
"learning_rate": 8.931174089068827e-06,
"loss": 0.0468,
"step": 6440
},
{
"epoch": 0.58,
"learning_rate": 8.929374718848404e-06,
"loss": 0.0843,
"step": 6450
},
{
"epoch": 0.58,
"learning_rate": 8.92757534862798e-06,
"loss": 0.0478,
"step": 6460
},
{
"epoch": 0.58,
"learning_rate": 8.925775978407558e-06,
"loss": 0.1855,
"step": 6470
},
{
"epoch": 0.58,
"learning_rate": 8.923976608187134e-06,
"loss": 0.033,
"step": 6480
},
{
"epoch": 0.58,
"learning_rate": 8.922177237966713e-06,
"loss": 0.1028,
"step": 6490
},
{
"epoch": 0.58,
"learning_rate": 8.92037786774629e-06,
"loss": 0.1134,
"step": 6500
},
{
"epoch": 0.58,
"learning_rate": 8.918578497525867e-06,
"loss": 0.0714,
"step": 6510
},
{
"epoch": 0.58,
"learning_rate": 8.916779127305444e-06,
"loss": 0.1058,
"step": 6520
},
{
"epoch": 0.58,
"learning_rate": 8.91497975708502e-06,
"loss": 0.0672,
"step": 6530
},
{
"epoch": 0.58,
"learning_rate": 8.913180386864599e-06,
"loss": 0.1081,
"step": 6540
},
{
"epoch": 0.58,
"learning_rate": 8.911381016644174e-06,
"loss": 0.0558,
"step": 6550
},
{
"epoch": 0.58,
"learning_rate": 8.909581646423753e-06,
"loss": 0.1227,
"step": 6560
},
{
"epoch": 0.59,
"learning_rate": 8.90778227620333e-06,
"loss": 0.1211,
"step": 6570
},
{
"epoch": 0.59,
"learning_rate": 8.905982905982906e-06,
"loss": 0.0534,
"step": 6580
},
{
"epoch": 0.59,
"learning_rate": 8.904183535762483e-06,
"loss": 0.1179,
"step": 6590
},
{
"epoch": 0.59,
"learning_rate": 8.90238416554206e-06,
"loss": 0.1224,
"step": 6600
},
{
"epoch": 0.59,
"learning_rate": 8.900584795321639e-06,
"loss": 0.0878,
"step": 6610
},
{
"epoch": 0.59,
"learning_rate": 8.898785425101216e-06,
"loss": 0.12,
"step": 6620
},
{
"epoch": 0.59,
"learning_rate": 8.896986054880792e-06,
"loss": 0.1773,
"step": 6630
},
{
"epoch": 0.59,
"learning_rate": 8.89518668466037e-06,
"loss": 0.0991,
"step": 6640
},
{
"epoch": 0.59,
"learning_rate": 8.893387314439946e-06,
"loss": 0.1262,
"step": 6650
},
{
"epoch": 0.59,
"learning_rate": 8.891587944219525e-06,
"loss": 0.0043,
"step": 6660
},
{
"epoch": 0.59,
"learning_rate": 8.889788573999102e-06,
"loss": 0.0821,
"step": 6670
},
{
"epoch": 0.6,
"learning_rate": 8.887989203778678e-06,
"loss": 0.212,
"step": 6680
},
{
"epoch": 0.6,
"learning_rate": 8.886189833558255e-06,
"loss": 0.0983,
"step": 6690
},
{
"epoch": 0.6,
"learning_rate": 8.884390463337832e-06,
"loss": 0.1402,
"step": 6700
},
{
"epoch": 0.6,
"learning_rate": 8.882591093117409e-06,
"loss": 0.159,
"step": 6710
},
{
"epoch": 0.6,
"learning_rate": 8.880791722896986e-06,
"loss": 0.1258,
"step": 6720
},
{
"epoch": 0.6,
"learning_rate": 8.878992352676564e-06,
"loss": 0.0954,
"step": 6730
},
{
"epoch": 0.6,
"learning_rate": 8.877192982456141e-06,
"loss": 0.0412,
"step": 6740
},
{
"epoch": 0.6,
"learning_rate": 8.875393612235718e-06,
"loss": 0.078,
"step": 6750
},
{
"epoch": 0.6,
"learning_rate": 8.873594242015295e-06,
"loss": 0.1009,
"step": 6760
},
{
"epoch": 0.6,
"learning_rate": 8.871794871794872e-06,
"loss": 0.099,
"step": 6770
},
{
"epoch": 0.6,
"learning_rate": 8.86999550157445e-06,
"loss": 0.0313,
"step": 6780
},
{
"epoch": 0.61,
"learning_rate": 8.868196131354027e-06,
"loss": 0.0375,
"step": 6790
},
{
"epoch": 0.61,
"learning_rate": 8.866396761133604e-06,
"loss": 0.0885,
"step": 6800
},
{
"epoch": 0.61,
"learning_rate": 8.864597390913181e-06,
"loss": 0.0761,
"step": 6810
},
{
"epoch": 0.61,
"learning_rate": 8.862798020692758e-06,
"loss": 0.0892,
"step": 6820
},
{
"epoch": 0.61,
"learning_rate": 8.860998650472335e-06,
"loss": 0.1141,
"step": 6830
},
{
"epoch": 0.61,
"learning_rate": 8.859199280251912e-06,
"loss": 0.0784,
"step": 6840
},
{
"epoch": 0.61,
"learning_rate": 8.85739991003149e-06,
"loss": 0.1206,
"step": 6850
},
{
"epoch": 0.61,
"learning_rate": 8.855600539811067e-06,
"loss": 0.054,
"step": 6860
},
{
"epoch": 0.61,
"learning_rate": 8.853801169590644e-06,
"loss": 0.2208,
"step": 6870
},
{
"epoch": 0.61,
"learning_rate": 8.85200179937022e-06,
"loss": 0.0689,
"step": 6880
},
{
"epoch": 0.61,
"learning_rate": 8.850202429149798e-06,
"loss": 0.0898,
"step": 6890
},
{
"epoch": 0.62,
"learning_rate": 8.848403058929376e-06,
"loss": 0.1601,
"step": 6900
},
{
"epoch": 0.62,
"learning_rate": 8.846603688708953e-06,
"loss": 0.0574,
"step": 6910
},
{
"epoch": 0.62,
"learning_rate": 8.84480431848853e-06,
"loss": 0.0564,
"step": 6920
},
{
"epoch": 0.62,
"learning_rate": 8.843004948268107e-06,
"loss": 0.0072,
"step": 6930
},
{
"epoch": 0.62,
"learning_rate": 8.841205578047684e-06,
"loss": 0.0368,
"step": 6940
},
{
"epoch": 0.62,
"learning_rate": 8.839406207827262e-06,
"loss": 0.1152,
"step": 6950
},
{
"epoch": 0.62,
"learning_rate": 8.837606837606837e-06,
"loss": 0.0871,
"step": 6960
},
{
"epoch": 0.62,
"learning_rate": 8.835807467386416e-06,
"loss": 0.0936,
"step": 6970
},
{
"epoch": 0.62,
"learning_rate": 8.834008097165993e-06,
"loss": 0.2966,
"step": 6980
},
{
"epoch": 0.62,
"learning_rate": 8.83220872694557e-06,
"loss": 0.1432,
"step": 6990
},
{
"epoch": 0.62,
"learning_rate": 8.830409356725146e-06,
"loss": 0.135,
"step": 7000
},
{
"epoch": 0.63,
"learning_rate": 8.828609986504723e-06,
"loss": 0.0894,
"step": 7010
},
{
"epoch": 0.63,
"learning_rate": 8.826810616284302e-06,
"loss": 0.1052,
"step": 7020
},
{
"epoch": 0.63,
"learning_rate": 8.825011246063879e-06,
"loss": 0.1165,
"step": 7030
},
{
"epoch": 0.63,
"learning_rate": 8.823211875843456e-06,
"loss": 0.0955,
"step": 7040
},
{
"epoch": 0.63,
"learning_rate": 8.821412505623032e-06,
"loss": 0.0356,
"step": 7050
},
{
"epoch": 0.63,
"learning_rate": 8.81961313540261e-06,
"loss": 0.143,
"step": 7060
},
{
"epoch": 0.63,
"learning_rate": 8.817813765182188e-06,
"loss": 0.0743,
"step": 7070
},
{
"epoch": 0.63,
"learning_rate": 8.816014394961763e-06,
"loss": 0.0865,
"step": 7080
},
{
"epoch": 0.63,
"learning_rate": 8.814215024741342e-06,
"loss": 0.1262,
"step": 7090
},
{
"epoch": 0.63,
"learning_rate": 8.812415654520918e-06,
"loss": 0.0891,
"step": 7100
},
{
"epoch": 0.63,
"learning_rate": 8.810616284300495e-06,
"loss": 0.1062,
"step": 7110
},
{
"epoch": 0.63,
"learning_rate": 8.808816914080072e-06,
"loss": 0.0651,
"step": 7120
},
{
"epoch": 0.64,
"learning_rate": 8.807017543859649e-06,
"loss": 0.1016,
"step": 7130
},
{
"epoch": 0.64,
"learning_rate": 8.805218173639228e-06,
"loss": 0.1007,
"step": 7140
},
{
"epoch": 0.64,
"learning_rate": 8.803418803418804e-06,
"loss": 0.0866,
"step": 7150
},
{
"epoch": 0.64,
"learning_rate": 8.801619433198381e-06,
"loss": 0.0866,
"step": 7160
},
{
"epoch": 0.64,
"learning_rate": 8.799820062977958e-06,
"loss": 0.0524,
"step": 7170
},
{
"epoch": 0.64,
"learning_rate": 8.798020692757535e-06,
"loss": 0.0092,
"step": 7180
},
{
"epoch": 0.64,
"learning_rate": 8.796221322537114e-06,
"loss": 0.0843,
"step": 7190
},
{
"epoch": 0.64,
"learning_rate": 8.79442195231669e-06,
"loss": 0.0962,
"step": 7200
},
{
"epoch": 0.64,
"learning_rate": 8.792622582096267e-06,
"loss": 0.0755,
"step": 7210
},
{
"epoch": 0.64,
"learning_rate": 8.790823211875844e-06,
"loss": 0.1832,
"step": 7220
},
{
"epoch": 0.64,
"learning_rate": 8.789023841655421e-06,
"loss": 0.1657,
"step": 7230
},
{
"epoch": 0.65,
"learning_rate": 8.787224471434998e-06,
"loss": 0.0969,
"step": 7240
},
{
"epoch": 0.65,
"learning_rate": 8.785425101214575e-06,
"loss": 0.1005,
"step": 7250
},
{
"epoch": 0.65,
"learning_rate": 8.783625730994153e-06,
"loss": 0.0896,
"step": 7260
},
{
"epoch": 0.65,
"learning_rate": 8.78182636077373e-06,
"loss": 0.0941,
"step": 7270
},
{
"epoch": 0.65,
"learning_rate": 8.780026990553307e-06,
"loss": 0.1214,
"step": 7280
},
{
"epoch": 0.65,
"learning_rate": 8.778227620332884e-06,
"loss": 0.1319,
"step": 7290
},
{
"epoch": 0.65,
"learning_rate": 8.77642825011246e-06,
"loss": 0.0806,
"step": 7300
},
{
"epoch": 0.65,
"learning_rate": 8.77462887989204e-06,
"loss": 0.1304,
"step": 7310
},
{
"epoch": 0.65,
"learning_rate": 8.772829509671616e-06,
"loss": 0.0723,
"step": 7320
},
{
"epoch": 0.65,
"learning_rate": 8.771030139451193e-06,
"loss": 0.176,
"step": 7330
},
{
"epoch": 0.65,
"learning_rate": 8.76923076923077e-06,
"loss": 0.0992,
"step": 7340
},
{
"epoch": 0.66,
"learning_rate": 8.767431399010347e-06,
"loss": 0.128,
"step": 7350
},
{
"epoch": 0.66,
"learning_rate": 8.765632028789924e-06,
"loss": 0.0585,
"step": 7360
},
{
"epoch": 0.66,
"learning_rate": 8.7638326585695e-06,
"loss": 0.2104,
"step": 7370
},
{
"epoch": 0.66,
"learning_rate": 8.762033288349079e-06,
"loss": 0.0491,
"step": 7380
},
{
"epoch": 0.66,
"learning_rate": 8.760233918128656e-06,
"loss": 0.1178,
"step": 7390
},
{
"epoch": 0.66,
"learning_rate": 8.758434547908233e-06,
"loss": 0.181,
"step": 7400
},
{
"epoch": 0.66,
"learning_rate": 8.75663517768781e-06,
"loss": 0.1329,
"step": 7410
},
{
"epoch": 0.66,
"learning_rate": 8.754835807467386e-06,
"loss": 0.1067,
"step": 7420
},
{
"epoch": 0.66,
"learning_rate": 8.753036437246965e-06,
"loss": 0.1344,
"step": 7430
},
{
"epoch": 0.66,
"learning_rate": 8.751237067026542e-06,
"loss": 0.0359,
"step": 7440
},
{
"epoch": 0.66,
"learning_rate": 8.749437696806119e-06,
"loss": 0.0587,
"step": 7450
},
{
"epoch": 0.67,
"learning_rate": 8.747638326585696e-06,
"loss": 0.0763,
"step": 7460
},
{
"epoch": 0.67,
"learning_rate": 8.745838956365272e-06,
"loss": 0.1267,
"step": 7470
},
{
"epoch": 0.67,
"learning_rate": 8.744039586144851e-06,
"loss": 0.0407,
"step": 7480
},
{
"epoch": 0.67,
"learning_rate": 8.742240215924426e-06,
"loss": 0.0741,
"step": 7490
},
{
"epoch": 0.67,
"learning_rate": 8.740440845704005e-06,
"loss": 0.0914,
"step": 7500
},
{
"epoch": 0.67,
"eval_accuracy": 0.971293225959261,
"eval_f1": 0.95092057664273,
"eval_loss": 0.12410593777894974,
"eval_precision": 0.9320491109229466,
"eval_recall": 0.9705720268929792,
"eval_runtime": 436.6013,
"eval_samples_per_second": 72.526,
"eval_steps_per_second": 4.535,
"step": 7500
},
{
"epoch": 0.67,
"learning_rate": 8.738641475483582e-06,
"loss": 0.1633,
"step": 7510
},
{
"epoch": 0.67,
"learning_rate": 8.736842105263158e-06,
"loss": 0.1843,
"step": 7520
},
{
"epoch": 0.67,
"learning_rate": 8.735042735042735e-06,
"loss": 0.1008,
"step": 7530
},
{
"epoch": 0.67,
"learning_rate": 8.733243364822312e-06,
"loss": 0.0962,
"step": 7540
},
{
"epoch": 0.67,
"learning_rate": 8.73144399460189e-06,
"loss": 0.0271,
"step": 7550
},
{
"epoch": 0.67,
"learning_rate": 8.729644624381468e-06,
"loss": 0.1435,
"step": 7560
},
{
"epoch": 0.67,
"learning_rate": 8.727845254161044e-06,
"loss": 0.0794,
"step": 7570
},
{
"epoch": 0.68,
"learning_rate": 8.726045883940621e-06,
"loss": 0.1544,
"step": 7580
},
{
"epoch": 0.68,
"learning_rate": 8.724246513720198e-06,
"loss": 0.0104,
"step": 7590
},
{
"epoch": 0.68,
"learning_rate": 8.722447143499777e-06,
"loss": 0.0687,
"step": 7600
},
{
"epoch": 0.68,
"learning_rate": 8.720647773279352e-06,
"loss": 0.1329,
"step": 7610
},
{
"epoch": 0.68,
"learning_rate": 8.71884840305893e-06,
"loss": 0.0721,
"step": 7620
},
{
"epoch": 0.68,
"learning_rate": 8.717049032838507e-06,
"loss": 0.1454,
"step": 7630
},
{
"epoch": 0.68,
"learning_rate": 8.715249662618084e-06,
"loss": 0.0498,
"step": 7640
},
{
"epoch": 0.68,
"learning_rate": 8.713450292397661e-06,
"loss": 0.1081,
"step": 7650
},
{
"epoch": 0.68,
"learning_rate": 8.711650922177238e-06,
"loss": 0.072,
"step": 7660
},
{
"epoch": 0.68,
"learning_rate": 8.709851551956816e-06,
"loss": 0.0765,
"step": 7670
},
{
"epoch": 0.68,
"learning_rate": 8.708052181736393e-06,
"loss": 0.1375,
"step": 7680
},
{
"epoch": 0.69,
"learning_rate": 8.70625281151597e-06,
"loss": 0.1537,
"step": 7690
},
{
"epoch": 0.69,
"learning_rate": 8.704453441295547e-06,
"loss": 0.097,
"step": 7700
},
{
"epoch": 0.69,
"learning_rate": 8.702654071075124e-06,
"loss": 0.1346,
"step": 7710
},
{
"epoch": 0.69,
"learning_rate": 8.700854700854702e-06,
"loss": 0.1619,
"step": 7720
},
{
"epoch": 0.69,
"learning_rate": 8.69905533063428e-06,
"loss": 0.1027,
"step": 7730
},
{
"epoch": 0.69,
"learning_rate": 8.697255960413856e-06,
"loss": 0.1377,
"step": 7740
},
{
"epoch": 0.69,
"learning_rate": 8.695456590193433e-06,
"loss": 0.0791,
"step": 7750
},
{
"epoch": 0.69,
"learning_rate": 8.693657219973012e-06,
"loss": 0.0814,
"step": 7760
},
{
"epoch": 0.69,
"learning_rate": 8.691857849752587e-06,
"loss": 0.078,
"step": 7770
},
{
"epoch": 0.69,
"learning_rate": 8.690058479532164e-06,
"loss": 0.0902,
"step": 7780
},
{
"epoch": 0.69,
"learning_rate": 8.688259109311742e-06,
"loss": 0.144,
"step": 7790
},
{
"epoch": 0.7,
"learning_rate": 8.686459739091319e-06,
"loss": 0.1267,
"step": 7800
},
{
"epoch": 0.7,
"learning_rate": 8.684660368870896e-06,
"loss": 0.0564,
"step": 7810
},
{
"epoch": 0.7,
"learning_rate": 8.682860998650473e-06,
"loss": 0.0782,
"step": 7820
},
{
"epoch": 0.7,
"learning_rate": 8.68106162843005e-06,
"loss": 0.1353,
"step": 7830
},
{
"epoch": 0.7,
"learning_rate": 8.679262258209628e-06,
"loss": 0.0428,
"step": 7840
},
{
"epoch": 0.7,
"learning_rate": 8.677462887989205e-06,
"loss": 0.0864,
"step": 7850
},
{
"epoch": 0.7,
"learning_rate": 8.675663517768782e-06,
"loss": 0.0717,
"step": 7860
},
{
"epoch": 0.7,
"learning_rate": 8.673864147548359e-06,
"loss": 0.1192,
"step": 7870
},
{
"epoch": 0.7,
"learning_rate": 8.672064777327936e-06,
"loss": 0.0728,
"step": 7880
},
{
"epoch": 0.7,
"learning_rate": 8.670265407107512e-06,
"loss": 0.0615,
"step": 7890
},
{
"epoch": 0.7,
"learning_rate": 8.66846603688709e-06,
"loss": 0.0884,
"step": 7900
},
{
"epoch": 0.71,
"learning_rate": 8.666666666666668e-06,
"loss": 0.1488,
"step": 7910
},
{
"epoch": 0.71,
"learning_rate": 8.664867296446245e-06,
"loss": 0.1203,
"step": 7920
},
{
"epoch": 0.71,
"learning_rate": 8.663067926225822e-06,
"loss": 0.0469,
"step": 7930
},
{
"epoch": 0.71,
"learning_rate": 8.661268556005398e-06,
"loss": 0.0475,
"step": 7940
},
{
"epoch": 0.71,
"learning_rate": 8.659469185784975e-06,
"loss": 0.1008,
"step": 7950
},
{
"epoch": 0.71,
"learning_rate": 8.657669815564554e-06,
"loss": 0.0544,
"step": 7960
},
{
"epoch": 0.71,
"learning_rate": 8.65587044534413e-06,
"loss": 0.1543,
"step": 7970
},
{
"epoch": 0.71,
"learning_rate": 8.654071075123708e-06,
"loss": 0.1327,
"step": 7980
},
{
"epoch": 0.71,
"learning_rate": 8.652271704903284e-06,
"loss": 0.3129,
"step": 7990
},
{
"epoch": 0.71,
"learning_rate": 8.650472334682861e-06,
"loss": 0.0302,
"step": 8000
},
{
"epoch": 0.71,
"learning_rate": 8.64867296446244e-06,
"loss": 0.1717,
"step": 8010
},
{
"epoch": 0.72,
"learning_rate": 8.646873594242015e-06,
"loss": 0.0878,
"step": 8020
},
{
"epoch": 0.72,
"learning_rate": 8.645074224021594e-06,
"loss": 0.0869,
"step": 8030
},
{
"epoch": 0.72,
"learning_rate": 8.64327485380117e-06,
"loss": 0.0919,
"step": 8040
},
{
"epoch": 0.72,
"learning_rate": 8.641475483580747e-06,
"loss": 0.058,
"step": 8050
},
{
"epoch": 0.72,
"learning_rate": 8.639676113360324e-06,
"loss": 0.1208,
"step": 8060
},
{
"epoch": 0.72,
"learning_rate": 8.637876743139901e-06,
"loss": 0.0707,
"step": 8070
},
{
"epoch": 0.72,
"learning_rate": 8.63607737291948e-06,
"loss": 0.0532,
"step": 8080
},
{
"epoch": 0.72,
"learning_rate": 8.634278002699056e-06,
"loss": 0.0889,
"step": 8090
},
{
"epoch": 0.72,
"learning_rate": 8.632478632478633e-06,
"loss": 0.0517,
"step": 8100
},
{
"epoch": 0.72,
"learning_rate": 8.63067926225821e-06,
"loss": 0.1343,
"step": 8110
},
{
"epoch": 0.72,
"learning_rate": 8.628879892037787e-06,
"loss": 0.0304,
"step": 8120
},
{
"epoch": 0.72,
"learning_rate": 8.627080521817366e-06,
"loss": 0.1531,
"step": 8130
},
{
"epoch": 0.73,
"learning_rate": 8.62528115159694e-06,
"loss": 0.1384,
"step": 8140
},
{
"epoch": 0.73,
"learning_rate": 8.62348178137652e-06,
"loss": 0.0572,
"step": 8150
},
{
"epoch": 0.73,
"learning_rate": 8.621682411156096e-06,
"loss": 0.043,
"step": 8160
},
{
"epoch": 0.73,
"learning_rate": 8.619883040935673e-06,
"loss": 0.0598,
"step": 8170
},
{
"epoch": 0.73,
"learning_rate": 8.61808367071525e-06,
"loss": 0.0271,
"step": 8180
},
{
"epoch": 0.73,
"learning_rate": 8.616284300494827e-06,
"loss": 0.0278,
"step": 8190
},
{
"epoch": 0.73,
"learning_rate": 8.614484930274405e-06,
"loss": 0.0878,
"step": 8200
},
{
"epoch": 0.73,
"learning_rate": 8.612685560053982e-06,
"loss": 0.1291,
"step": 8210
},
{
"epoch": 0.73,
"learning_rate": 8.610886189833559e-06,
"loss": 0.091,
"step": 8220
},
{
"epoch": 0.73,
"learning_rate": 8.609086819613136e-06,
"loss": 0.053,
"step": 8230
},
{
"epoch": 0.73,
"learning_rate": 8.607287449392713e-06,
"loss": 0.0576,
"step": 8240
},
{
"epoch": 0.74,
"learning_rate": 8.605488079172291e-06,
"loss": 0.1051,
"step": 8250
},
{
"epoch": 0.74,
"learning_rate": 8.603688708951866e-06,
"loss": 0.0692,
"step": 8260
},
{
"epoch": 0.74,
"learning_rate": 8.601889338731445e-06,
"loss": 0.1337,
"step": 8270
},
{
"epoch": 0.74,
"learning_rate": 8.600089968511022e-06,
"loss": 0.0394,
"step": 8280
},
{
"epoch": 0.74,
"learning_rate": 8.598290598290599e-06,
"loss": 0.066,
"step": 8290
},
{
"epoch": 0.74,
"learning_rate": 8.596491228070176e-06,
"loss": 0.0414,
"step": 8300
},
{
"epoch": 0.74,
"learning_rate": 8.594691857849752e-06,
"loss": 0.1076,
"step": 8310
},
{
"epoch": 0.74,
"learning_rate": 8.592892487629331e-06,
"loss": 0.1802,
"step": 8320
},
{
"epoch": 0.74,
"learning_rate": 8.591093117408908e-06,
"loss": 0.0185,
"step": 8330
},
{
"epoch": 0.74,
"learning_rate": 8.589293747188485e-06,
"loss": 0.0604,
"step": 8340
},
{
"epoch": 0.74,
"learning_rate": 8.587494376968062e-06,
"loss": 0.1542,
"step": 8350
},
{
"epoch": 0.75,
"learning_rate": 8.585695006747638e-06,
"loss": 0.1218,
"step": 8360
},
{
"epoch": 0.75,
"learning_rate": 8.583895636527217e-06,
"loss": 0.1441,
"step": 8370
},
{
"epoch": 0.75,
"learning_rate": 8.582096266306794e-06,
"loss": 0.0921,
"step": 8380
},
{
"epoch": 0.75,
"learning_rate": 8.58029689608637e-06,
"loss": 0.0408,
"step": 8390
},
{
"epoch": 0.75,
"learning_rate": 8.578497525865948e-06,
"loss": 0.0247,
"step": 8400
},
{
"epoch": 0.75,
"learning_rate": 8.576698155645524e-06,
"loss": 0.0663,
"step": 8410
},
{
"epoch": 0.75,
"learning_rate": 8.574898785425101e-06,
"loss": 0.148,
"step": 8420
},
{
"epoch": 0.75,
"learning_rate": 8.573099415204678e-06,
"loss": 0.0787,
"step": 8430
},
{
"epoch": 0.75,
"learning_rate": 8.571300044984257e-06,
"loss": 0.0468,
"step": 8440
},
{
"epoch": 0.75,
"learning_rate": 8.569500674763834e-06,
"loss": 0.1468,
"step": 8450
},
{
"epoch": 0.75,
"learning_rate": 8.56770130454341e-06,
"loss": 0.0841,
"step": 8460
},
{
"epoch": 0.76,
"learning_rate": 8.565901934322987e-06,
"loss": 0.0974,
"step": 8470
},
{
"epoch": 0.76,
"learning_rate": 8.564102564102564e-06,
"loss": 0.0644,
"step": 8480
},
{
"epoch": 0.76,
"learning_rate": 8.562303193882143e-06,
"loss": 0.055,
"step": 8490
},
{
"epoch": 0.76,
"learning_rate": 8.56050382366172e-06,
"loss": 0.0476,
"step": 8500
},
{
"epoch": 0.76,
"learning_rate": 8.558704453441296e-06,
"loss": 0.2432,
"step": 8510
},
{
"epoch": 0.76,
"learning_rate": 8.556905083220873e-06,
"loss": 0.1153,
"step": 8520
},
{
"epoch": 0.76,
"learning_rate": 8.55510571300045e-06,
"loss": 0.0691,
"step": 8530
},
{
"epoch": 0.76,
"learning_rate": 8.553306342780027e-06,
"loss": 0.0634,
"step": 8540
},
{
"epoch": 0.76,
"learning_rate": 8.551506972559604e-06,
"loss": 0.1217,
"step": 8550
},
{
"epoch": 0.76,
"learning_rate": 8.549707602339182e-06,
"loss": 0.15,
"step": 8560
},
{
"epoch": 0.76,
"learning_rate": 8.54790823211876e-06,
"loss": 0.1146,
"step": 8570
},
{
"epoch": 0.77,
"learning_rate": 8.546108861898336e-06,
"loss": 0.0232,
"step": 8580
},
{
"epoch": 0.77,
"learning_rate": 8.544309491677913e-06,
"loss": 0.1652,
"step": 8590
},
{
"epoch": 0.77,
"learning_rate": 8.54251012145749e-06,
"loss": 0.1147,
"step": 8600
},
{
"epoch": 0.77,
"learning_rate": 8.540710751237068e-06,
"loss": 0.1156,
"step": 8610
},
{
"epoch": 0.77,
"learning_rate": 8.538911381016645e-06,
"loss": 0.0725,
"step": 8620
},
{
"epoch": 0.77,
"learning_rate": 8.537112010796222e-06,
"loss": 0.0058,
"step": 8630
},
{
"epoch": 0.77,
"learning_rate": 8.535312640575799e-06,
"loss": 0.1656,
"step": 8640
},
{
"epoch": 0.77,
"learning_rate": 8.533513270355376e-06,
"loss": 0.0576,
"step": 8650
},
{
"epoch": 0.77,
"learning_rate": 8.531713900134954e-06,
"loss": 0.1474,
"step": 8660
},
{
"epoch": 0.77,
"learning_rate": 8.52991452991453e-06,
"loss": 0.1511,
"step": 8670
},
{
"epoch": 0.77,
"learning_rate": 8.528115159694108e-06,
"loss": 0.1008,
"step": 8680
},
{
"epoch": 0.77,
"learning_rate": 8.526315789473685e-06,
"loss": 0.0811,
"step": 8690
},
{
"epoch": 0.78,
"learning_rate": 8.524516419253262e-06,
"loss": 0.0772,
"step": 8700
},
{
"epoch": 0.78,
"learning_rate": 8.522717049032839e-06,
"loss": 0.0955,
"step": 8710
},
{
"epoch": 0.78,
"learning_rate": 8.520917678812416e-06,
"loss": 0.0719,
"step": 8720
},
{
"epoch": 0.78,
"learning_rate": 8.519118308591994e-06,
"loss": 0.1383,
"step": 8730
},
{
"epoch": 0.78,
"learning_rate": 8.517318938371571e-06,
"loss": 0.0035,
"step": 8740
},
{
"epoch": 0.78,
"learning_rate": 8.515519568151148e-06,
"loss": 0.1452,
"step": 8750
},
{
"epoch": 0.78,
"learning_rate": 8.513720197930725e-06,
"loss": 0.1435,
"step": 8760
},
{
"epoch": 0.78,
"learning_rate": 8.511920827710302e-06,
"loss": 0.1142,
"step": 8770
},
{
"epoch": 0.78,
"learning_rate": 8.51012145748988e-06,
"loss": 0.1441,
"step": 8780
},
{
"epoch": 0.78,
"learning_rate": 8.508322087269455e-06,
"loss": 0.179,
"step": 8790
},
{
"epoch": 0.78,
"learning_rate": 8.506522717049034e-06,
"loss": 0.0934,
"step": 8800
},
{
"epoch": 0.79,
"learning_rate": 8.50472334682861e-06,
"loss": 0.0647,
"step": 8810
},
{
"epoch": 0.79,
"learning_rate": 8.502923976608188e-06,
"loss": 0.0749,
"step": 8820
},
{
"epoch": 0.79,
"learning_rate": 8.501124606387764e-06,
"loss": 0.1038,
"step": 8830
},
{
"epoch": 0.79,
"learning_rate": 8.499325236167341e-06,
"loss": 0.0616,
"step": 8840
},
{
"epoch": 0.79,
"learning_rate": 8.49752586594692e-06,
"loss": 0.1128,
"step": 8850
},
{
"epoch": 0.79,
"learning_rate": 8.495726495726497e-06,
"loss": 0.1401,
"step": 8860
},
{
"epoch": 0.79,
"learning_rate": 8.493927125506074e-06,
"loss": 0.2143,
"step": 8870
},
{
"epoch": 0.79,
"learning_rate": 8.49212775528565e-06,
"loss": 0.0586,
"step": 8880
},
{
"epoch": 0.79,
"learning_rate": 8.490328385065227e-06,
"loss": 0.0917,
"step": 8890
},
{
"epoch": 0.79,
"learning_rate": 8.488529014844806e-06,
"loss": 0.1244,
"step": 8900
},
{
"epoch": 0.79,
"learning_rate": 8.486729644624383e-06,
"loss": 0.0877,
"step": 8910
},
{
"epoch": 0.8,
"learning_rate": 8.48493027440396e-06,
"loss": 0.0792,
"step": 8920
},
{
"epoch": 0.8,
"learning_rate": 8.483130904183536e-06,
"loss": 0.1058,
"step": 8930
},
{
"epoch": 0.8,
"learning_rate": 8.481331533963115e-06,
"loss": 0.1133,
"step": 8940
},
{
"epoch": 0.8,
"learning_rate": 8.47953216374269e-06,
"loss": 0.0392,
"step": 8950
},
{
"epoch": 0.8,
"learning_rate": 8.477732793522267e-06,
"loss": 0.1745,
"step": 8960
},
{
"epoch": 0.8,
"learning_rate": 8.475933423301846e-06,
"loss": 0.1019,
"step": 8970
},
{
"epoch": 0.8,
"learning_rate": 8.474134053081422e-06,
"loss": 0.166,
"step": 8980
},
{
"epoch": 0.8,
"learning_rate": 8.472334682861e-06,
"loss": 0.1927,
"step": 8990
},
{
"epoch": 0.8,
"learning_rate": 8.470535312640576e-06,
"loss": 0.1251,
"step": 9000
},
{
"epoch": 0.8,
"eval_accuracy": 0.9701563240170535,
"eval_f1": 0.9492399419885051,
"eval_loss": 0.09671162813901901,
"eval_precision": 0.9258172673931265,
"eval_recall": 0.9738785407252287,
"eval_runtime": 437.2054,
"eval_samples_per_second": 72.426,
"eval_steps_per_second": 4.529,
"step": 9000
}
],
"max_steps": 56075,
"num_train_epochs": 5,
"total_flos": 7.6243123003392e+16,
"trial_name": null,
"trial_params": null
}