BacteriaTIS-DNABERT-K6-89M / trainer_state.json
Genereux-akotenou's picture
add model files
ae706e7 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0017030357752335,
"global_step": 99076,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 2.9999979372962823e-05,
"loss": 0.1516,
"step": 100
},
{
"epoch": 0.0,
"learning_rate": 2.9999938118888465e-05,
"loss": 0.1292,
"step": 200
},
{
"epoch": 0.0,
"learning_rate": 2.9999896864814113e-05,
"loss": 0.1529,
"step": 300
},
{
"epoch": 0.0,
"learning_rate": 2.9999855610739755e-05,
"loss": 0.128,
"step": 400
},
{
"epoch": 0.0,
"learning_rate": 2.99998143566654e-05,
"loss": 0.1422,
"step": 500
},
{
"epoch": 0.0,
"learning_rate": 2.9999773102591045e-05,
"loss": 0.1399,
"step": 600
},
{
"epoch": 0.0,
"learning_rate": 2.999973184851669e-05,
"loss": 0.1531,
"step": 700
},
{
"epoch": 0.0,
"learning_rate": 2.999969059444233e-05,
"loss": 0.1411,
"step": 800
},
{
"epoch": 0.0,
"learning_rate": 2.999964934036798e-05,
"loss": 0.1376,
"step": 900
},
{
"epoch": 0.0,
"learning_rate": 2.999960808629362e-05,
"loss": 0.1655,
"step": 1000
},
{
"epoch": 0.0,
"learning_rate": 2.9999566832219266e-05,
"loss": 0.1721,
"step": 1100
},
{
"epoch": 0.0,
"learning_rate": 2.9999525578144915e-05,
"loss": 0.1759,
"step": 1200
},
{
"epoch": 0.0,
"learning_rate": 2.9999484324070556e-05,
"loss": 0.1762,
"step": 1300
},
{
"epoch": 0.0,
"learning_rate": 2.99994430699962e-05,
"loss": 0.1584,
"step": 1400
},
{
"epoch": 0.0,
"learning_rate": 2.9999401815921846e-05,
"loss": 0.1661,
"step": 1500
},
{
"epoch": 0.0,
"learning_rate": 2.999936056184749e-05,
"loss": 0.1591,
"step": 1600
},
{
"epoch": 0.0,
"learning_rate": 2.9999319307773133e-05,
"loss": 0.1646,
"step": 1700
},
{
"epoch": 0.0,
"learning_rate": 2.999927805369878e-05,
"loss": 0.1636,
"step": 1800
},
{
"epoch": 0.0,
"learning_rate": 2.9999236799624423e-05,
"loss": 0.1518,
"step": 1900
},
{
"epoch": 0.0,
"learning_rate": 2.9999195545550068e-05,
"loss": 0.1463,
"step": 2000
},
{
"epoch": 0.0,
"learning_rate": 2.9999154291475713e-05,
"loss": 0.1507,
"step": 2100
},
{
"epoch": 0.0,
"learning_rate": 2.9999113037401357e-05,
"loss": 0.1528,
"step": 2200
},
{
"epoch": 0.0,
"learning_rate": 2.9999071783327e-05,
"loss": 0.1542,
"step": 2300
},
{
"epoch": 0.0,
"learning_rate": 2.9999030529252647e-05,
"loss": 0.1494,
"step": 2400
},
{
"epoch": 0.0,
"learning_rate": 2.999898927517829e-05,
"loss": 0.154,
"step": 2500
},
{
"epoch": 0.0,
"learning_rate": 2.9998948021103934e-05,
"loss": 0.1474,
"step": 2600
},
{
"epoch": 0.0,
"learning_rate": 2.999890676702958e-05,
"loss": 0.1562,
"step": 2700
},
{
"epoch": 0.0,
"learning_rate": 2.9998865512955224e-05,
"loss": 0.1482,
"step": 2800
},
{
"epoch": 0.0,
"learning_rate": 2.9998824258880865e-05,
"loss": 0.166,
"step": 2900
},
{
"epoch": 0.0,
"learning_rate": 2.9998783004806514e-05,
"loss": 0.1187,
"step": 3000
},
{
"epoch": 0.0,
"learning_rate": 2.9998741750732155e-05,
"loss": 0.1322,
"step": 3100
},
{
"epoch": 0.0,
"learning_rate": 2.99987004966578e-05,
"loss": 0.1612,
"step": 3200
},
{
"epoch": 0.0,
"learning_rate": 2.999865924258345e-05,
"loss": 0.152,
"step": 3300
},
{
"epoch": 0.0,
"learning_rate": 2.999861798850909e-05,
"loss": 0.1375,
"step": 3400
},
{
"epoch": 0.0,
"learning_rate": 2.9998576734434735e-05,
"loss": 0.1252,
"step": 3500
},
{
"epoch": 0.0,
"learning_rate": 2.999853548036038e-05,
"loss": 0.1582,
"step": 3600
},
{
"epoch": 0.0,
"learning_rate": 2.9998494226286025e-05,
"loss": 0.1371,
"step": 3700
},
{
"epoch": 0.0,
"learning_rate": 2.9998452972211667e-05,
"loss": 0.1264,
"step": 3800
},
{
"epoch": 0.0,
"learning_rate": 2.9998411718137315e-05,
"loss": 0.1409,
"step": 3900
},
{
"epoch": 0.0,
"learning_rate": 2.9998370876603703e-05,
"loss": 0.1247,
"step": 4000
},
{
"epoch": 0.0,
"learning_rate": 2.9998329622529344e-05,
"loss": 0.1342,
"step": 4100
},
{
"epoch": 0.0,
"learning_rate": 2.999828836845499e-05,
"loss": 0.1528,
"step": 4200
},
{
"epoch": 0.0,
"learning_rate": 2.9998247114380634e-05,
"loss": 0.1532,
"step": 4300
},
{
"epoch": 0.0,
"learning_rate": 2.999820586030628e-05,
"loss": 0.1239,
"step": 4400
},
{
"epoch": 0.0,
"learning_rate": 2.999816460623192e-05,
"loss": 0.1489,
"step": 4500
},
{
"epoch": 0.0,
"learning_rate": 2.999812335215757e-05,
"loss": 0.1367,
"step": 4600
},
{
"epoch": 0.0,
"learning_rate": 2.9998082098083214e-05,
"loss": 0.1593,
"step": 4700
},
{
"epoch": 0.0,
"learning_rate": 2.9998040844008856e-05,
"loss": 0.1559,
"step": 4800
},
{
"epoch": 0.0,
"learning_rate": 2.9997999589934504e-05,
"loss": 0.1366,
"step": 4900
},
{
"epoch": 0.0,
"learning_rate": 2.9997958335860146e-05,
"loss": 0.1535,
"step": 5000
},
{
"epoch": 0.0,
"learning_rate": 2.999791708178579e-05,
"loss": 0.1515,
"step": 5100
},
{
"epoch": 0.0,
"learning_rate": 2.9997875827711436e-05,
"loss": 0.1453,
"step": 5200
},
{
"epoch": 0.0,
"learning_rate": 2.999783457363708e-05,
"loss": 0.1378,
"step": 5300
},
{
"epoch": 0.0,
"learning_rate": 2.9997793319562722e-05,
"loss": 0.1202,
"step": 5400
},
{
"epoch": 0.0,
"learning_rate": 2.999775206548837e-05,
"loss": 0.1503,
"step": 5500
},
{
"epoch": 0.0,
"learning_rate": 2.9997710811414012e-05,
"loss": 0.1573,
"step": 5600
},
{
"epoch": 0.0,
"learning_rate": 2.9997669557339657e-05,
"loss": 0.1508,
"step": 5700
},
{
"epoch": 0.0,
"learning_rate": 2.9997628303265302e-05,
"loss": 0.137,
"step": 5800
},
{
"epoch": 0.0,
"learning_rate": 2.9997587049190947e-05,
"loss": 0.1373,
"step": 5900
},
{
"epoch": 0.0,
"learning_rate": 2.999754579511659e-05,
"loss": 0.152,
"step": 6000
},
{
"epoch": 0.0,
"learning_rate": 2.9997504541042237e-05,
"loss": 0.1575,
"step": 6100
},
{
"epoch": 0.0,
"learning_rate": 2.999746328696788e-05,
"loss": 0.1468,
"step": 6200
},
{
"epoch": 0.0,
"learning_rate": 2.999742244543427e-05,
"loss": 0.1289,
"step": 6300
},
{
"epoch": 0.0,
"learning_rate": 2.999738119135991e-05,
"loss": 0.1549,
"step": 6400
},
{
"epoch": 0.0,
"learning_rate": 2.999733993728556e-05,
"loss": 0.1638,
"step": 6500
},
{
"epoch": 0.0,
"learning_rate": 2.99972986832112e-05,
"loss": 0.1305,
"step": 6600
},
{
"epoch": 0.0,
"learning_rate": 2.9997257429136846e-05,
"loss": 0.1522,
"step": 6700
},
{
"epoch": 0.0,
"learning_rate": 2.999721617506249e-05,
"loss": 0.1496,
"step": 6800
},
{
"epoch": 0.0,
"learning_rate": 2.9997174920988136e-05,
"loss": 0.1535,
"step": 6900
},
{
"epoch": 0.0,
"learning_rate": 2.9997133666913778e-05,
"loss": 0.1444,
"step": 7000
},
{
"epoch": 0.0,
"learning_rate": 2.9997092412839426e-05,
"loss": 0.1505,
"step": 7100
},
{
"epoch": 0.0,
"learning_rate": 2.9997051158765068e-05,
"loss": 0.1233,
"step": 7200
},
{
"epoch": 0.0,
"learning_rate": 2.9997009904690713e-05,
"loss": 0.1575,
"step": 7300
},
{
"epoch": 0.0,
"learning_rate": 2.9996968650616358e-05,
"loss": 0.1305,
"step": 7400
},
{
"epoch": 0.0,
"learning_rate": 2.9996927396542003e-05,
"loss": 0.1433,
"step": 7500
},
{
"epoch": 0.0,
"learning_rate": 2.9996886142467644e-05,
"loss": 0.1621,
"step": 7600
},
{
"epoch": 0.0,
"learning_rate": 2.9996844888393293e-05,
"loss": 0.1501,
"step": 7700
},
{
"epoch": 0.0,
"learning_rate": 2.9996803634318934e-05,
"loss": 0.1601,
"step": 7800
},
{
"epoch": 0.0,
"learning_rate": 2.999676238024458e-05,
"loss": 0.1459,
"step": 7900
},
{
"epoch": 0.0,
"learning_rate": 2.9996721126170224e-05,
"loss": 0.1671,
"step": 8000
},
{
"epoch": 0.0,
"learning_rate": 2.999667987209587e-05,
"loss": 0.1321,
"step": 8100
},
{
"epoch": 0.0,
"learning_rate": 2.9996638618021514e-05,
"loss": 0.1512,
"step": 8200
},
{
"epoch": 0.0,
"learning_rate": 2.999659736394716e-05,
"loss": 0.1654,
"step": 8300
},
{
"epoch": 0.0,
"learning_rate": 2.9996556109872804e-05,
"loss": 0.1468,
"step": 8400
},
{
"epoch": 0.0,
"learning_rate": 2.9996514855798446e-05,
"loss": 0.154,
"step": 8500
},
{
"epoch": 0.0,
"learning_rate": 2.9996473601724094e-05,
"loss": 0.1684,
"step": 8600
},
{
"epoch": 0.0,
"learning_rate": 2.9996432347649735e-05,
"loss": 0.1538,
"step": 8700
},
{
"epoch": 0.0,
"learning_rate": 2.9996391506116123e-05,
"loss": 0.1543,
"step": 8800
},
{
"epoch": 0.0,
"learning_rate": 2.9996350252041768e-05,
"loss": 0.1543,
"step": 8900
},
{
"epoch": 0.0,
"learning_rate": 2.999630899796741e-05,
"loss": 0.1595,
"step": 9000
},
{
"epoch": 0.0,
"learning_rate": 2.9996267743893058e-05,
"loss": 0.1749,
"step": 9100
},
{
"epoch": 0.0,
"learning_rate": 2.99962264898187e-05,
"loss": 0.1828,
"step": 9200
},
{
"epoch": 0.0,
"learning_rate": 2.9996185235744345e-05,
"loss": 0.1355,
"step": 9300
},
{
"epoch": 0.0,
"learning_rate": 2.999614398166999e-05,
"loss": 0.1533,
"step": 9400
},
{
"epoch": 0.0,
"learning_rate": 2.9996102727595635e-05,
"loss": 0.1432,
"step": 9500
},
{
"epoch": 0.0,
"learning_rate": 2.999606147352128e-05,
"loss": 0.1227,
"step": 9600
},
{
"epoch": 0.0,
"learning_rate": 2.9996020219446924e-05,
"loss": 0.1507,
"step": 9700
},
{
"epoch": 0.0,
"learning_rate": 2.999597896537257e-05,
"loss": 0.1324,
"step": 9800
},
{
"epoch": 0.0,
"learning_rate": 2.999593771129821e-05,
"loss": 0.1549,
"step": 9900
},
{
"epoch": 0.0,
"learning_rate": 2.999589645722386e-05,
"loss": 0.1554,
"step": 10000
},
{
"epoch": 0.0,
"learning_rate": 2.99958552031495e-05,
"loss": 0.1591,
"step": 10100
},
{
"epoch": 0.0,
"learning_rate": 2.9995813949075146e-05,
"loss": 0.1546,
"step": 10200
},
{
"epoch": 0.0,
"learning_rate": 2.999577269500079e-05,
"loss": 0.162,
"step": 10300
},
{
"epoch": 0.0,
"learning_rate": 2.9995731440926436e-05,
"loss": 0.1678,
"step": 10400
},
{
"epoch": 0.0,
"learning_rate": 2.9995690186852077e-05,
"loss": 0.1584,
"step": 10500
},
{
"epoch": 0.0,
"learning_rate": 2.9995648932777726e-05,
"loss": 0.164,
"step": 10600
},
{
"epoch": 0.0,
"learning_rate": 2.9995607678703367e-05,
"loss": 0.1569,
"step": 10700
},
{
"epoch": 0.0,
"learning_rate": 2.9995566424629012e-05,
"loss": 0.1531,
"step": 10800
},
{
"epoch": 0.0,
"learning_rate": 2.9995525170554657e-05,
"loss": 0.1566,
"step": 10900
},
{
"epoch": 0.0,
"learning_rate": 2.9995483916480302e-05,
"loss": 0.1423,
"step": 11000
},
{
"epoch": 0.0,
"learning_rate": 2.9995442662405944e-05,
"loss": 0.1405,
"step": 11100
},
{
"epoch": 0.0,
"learning_rate": 2.9995401408331592e-05,
"loss": 0.1471,
"step": 11200
},
{
"epoch": 0.0,
"learning_rate": 2.9995360154257234e-05,
"loss": 0.1347,
"step": 11300
},
{
"epoch": 0.0,
"learning_rate": 2.999531890018288e-05,
"loss": 0.1674,
"step": 11400
},
{
"epoch": 0.0,
"learning_rate": 2.9995277646108524e-05,
"loss": 0.1459,
"step": 11500
},
{
"epoch": 0.0,
"learning_rate": 2.999523639203417e-05,
"loss": 0.1151,
"step": 11600
},
{
"epoch": 0.0,
"learning_rate": 2.9995195137959814e-05,
"loss": 0.1601,
"step": 11700
},
{
"epoch": 0.0,
"learning_rate": 2.999515388388546e-05,
"loss": 0.1225,
"step": 11800
},
{
"epoch": 0.0,
"learning_rate": 2.9995112629811104e-05,
"loss": 0.1402,
"step": 11900
},
{
"epoch": 0.0,
"learning_rate": 2.9995071375736745e-05,
"loss": 0.1446,
"step": 12000
},
{
"epoch": 0.0,
"learning_rate": 2.9995030121662394e-05,
"loss": 0.1492,
"step": 12100
},
{
"epoch": 0.0,
"learning_rate": 2.9994988867588035e-05,
"loss": 0.1349,
"step": 12200
},
{
"epoch": 0.0,
"learning_rate": 2.999494761351368e-05,
"loss": 0.1444,
"step": 12300
},
{
"epoch": 0.0,
"learning_rate": 2.9994906359439325e-05,
"loss": 0.1433,
"step": 12400
},
{
"epoch": 0.0,
"learning_rate": 2.999486510536497e-05,
"loss": 0.1442,
"step": 12500
},
{
"epoch": 0.0,
"learning_rate": 2.999482385129061e-05,
"loss": 0.1139,
"step": 12600
},
{
"epoch": 0.0,
"learning_rate": 2.999478259721626e-05,
"loss": 0.1315,
"step": 12700
},
{
"epoch": 0.0,
"learning_rate": 2.99947413431419e-05,
"loss": 0.1569,
"step": 12800
},
{
"epoch": 0.0,
"learning_rate": 2.9994700089067547e-05,
"loss": 0.1222,
"step": 12900
},
{
"epoch": 0.0,
"learning_rate": 2.999465883499319e-05,
"loss": 0.1266,
"step": 13000
},
{
"epoch": 0.0,
"learning_rate": 2.9994617580918837e-05,
"loss": 0.1427,
"step": 13100
},
{
"epoch": 0.0,
"learning_rate": 2.9994576326844478e-05,
"loss": 0.1287,
"step": 13200
},
{
"epoch": 0.0,
"learning_rate": 2.9994535072770126e-05,
"loss": 0.1353,
"step": 13300
},
{
"epoch": 0.0,
"learning_rate": 2.9994493818695768e-05,
"loss": 0.1429,
"step": 13400
},
{
"epoch": 0.0,
"learning_rate": 2.9994452564621413e-05,
"loss": 0.1268,
"step": 13500
},
{
"epoch": 0.0,
"learning_rate": 2.9994411310547058e-05,
"loss": 0.1187,
"step": 13600
},
{
"epoch": 0.0,
"learning_rate": 2.9994370056472703e-05,
"loss": 0.1497,
"step": 13700
},
{
"epoch": 0.0,
"learning_rate": 2.9994328802398348e-05,
"loss": 0.1325,
"step": 13800
},
{
"epoch": 0.0,
"learning_rate": 2.9994288373405478e-05,
"loss": 0.1375,
"step": 13900
},
{
"epoch": 0.0,
"learning_rate": 2.9994247119331123e-05,
"loss": 0.1542,
"step": 14000
},
{
"epoch": 0.0,
"learning_rate": 2.9994205865256768e-05,
"loss": 0.147,
"step": 14100
},
{
"epoch": 0.0,
"learning_rate": 2.9994164611182413e-05,
"loss": 0.1269,
"step": 14200
},
{
"epoch": 0.0,
"learning_rate": 2.9994123357108055e-05,
"loss": 0.1141,
"step": 14300
},
{
"epoch": 0.0,
"learning_rate": 2.9994082103033703e-05,
"loss": 0.1304,
"step": 14400
},
{
"epoch": 0.0,
"learning_rate": 2.9994040848959348e-05,
"loss": 0.1029,
"step": 14500
},
{
"epoch": 0.0,
"learning_rate": 2.999399959488499e-05,
"loss": 0.0914,
"step": 14600
},
{
"epoch": 0.0,
"learning_rate": 2.9993958340810638e-05,
"loss": 0.1389,
"step": 14700
},
{
"epoch": 0.0,
"learning_rate": 2.999391708673628e-05,
"loss": 0.1376,
"step": 14800
},
{
"epoch": 0.0,
"learning_rate": 2.9993876245202667e-05,
"loss": 0.1233,
"step": 14900
},
{
"epoch": 0.0,
"learning_rate": 2.9993834991128312e-05,
"loss": 0.1309,
"step": 15000
},
{
"epoch": 0.0,
"learning_rate": 2.9993793737053954e-05,
"loss": 0.133,
"step": 15100
},
{
"epoch": 0.0,
"learning_rate": 2.9993752482979602e-05,
"loss": 0.1391,
"step": 15200
},
{
"epoch": 0.0,
"learning_rate": 2.9993711228905244e-05,
"loss": 0.1182,
"step": 15300
},
{
"epoch": 0.0,
"learning_rate": 2.999366997483089e-05,
"loss": 0.1414,
"step": 15400
},
{
"epoch": 0.0,
"learning_rate": 2.9993628720756534e-05,
"loss": 0.1214,
"step": 15500
},
{
"epoch": 0.0,
"learning_rate": 2.999358746668218e-05,
"loss": 0.0809,
"step": 15600
},
{
"epoch": 0.0,
"learning_rate": 2.999354621260782e-05,
"loss": 0.1307,
"step": 15700
},
{
"epoch": 0.0,
"learning_rate": 2.999350495853347e-05,
"loss": 0.0915,
"step": 15800
},
{
"epoch": 0.0,
"learning_rate": 2.9993463704459114e-05,
"loss": 0.1278,
"step": 15900
},
{
"epoch": 0.0,
"learning_rate": 2.9993422450384755e-05,
"loss": 0.1266,
"step": 16000
},
{
"epoch": 0.0,
"learning_rate": 2.9993381196310404e-05,
"loss": 0.1135,
"step": 16100
},
{
"epoch": 0.0,
"learning_rate": 2.9993339942236045e-05,
"loss": 0.1251,
"step": 16200
},
{
"epoch": 0.0,
"learning_rate": 2.999329868816169e-05,
"loss": 0.1367,
"step": 16300
},
{
"epoch": 0.0,
"learning_rate": 2.9993257434087335e-05,
"loss": 0.1205,
"step": 16400
},
{
"epoch": 0.0,
"learning_rate": 2.999321618001298e-05,
"loss": 0.1462,
"step": 16500
},
{
"epoch": 0.0,
"learning_rate": 2.999317492593862e-05,
"loss": 0.1299,
"step": 16600
},
{
"epoch": 0.0,
"learning_rate": 2.999313367186427e-05,
"loss": 0.1049,
"step": 16700
},
{
"epoch": 0.0,
"learning_rate": 2.999309241778991e-05,
"loss": 0.1164,
"step": 16800
},
{
"epoch": 0.0,
"learning_rate": 2.9993051163715557e-05,
"loss": 0.1475,
"step": 16900
},
{
"epoch": 0.0,
"learning_rate": 2.99930099096412e-05,
"loss": 0.1131,
"step": 17000
},
{
"epoch": 0.0,
"learning_rate": 2.9992968655566846e-05,
"loss": 0.1297,
"step": 17100
},
{
"epoch": 0.0,
"learning_rate": 2.9992927401492488e-05,
"loss": 0.1154,
"step": 17200
},
{
"epoch": 0.0,
"learning_rate": 2.9992886147418136e-05,
"loss": 0.1271,
"step": 17300
},
{
"epoch": 0.0,
"learning_rate": 2.9992844893343778e-05,
"loss": 0.1306,
"step": 17400
},
{
"epoch": 0.0,
"learning_rate": 2.9992803639269423e-05,
"loss": 0.1157,
"step": 17500
},
{
"epoch": 0.0,
"learning_rate": 2.9992762385195068e-05,
"loss": 0.1525,
"step": 17600
},
{
"epoch": 0.0,
"learning_rate": 2.9992721131120713e-05,
"loss": 0.1188,
"step": 17700
},
{
"epoch": 0.0,
"learning_rate": 2.9992679877046358e-05,
"loss": 0.1322,
"step": 17800
},
{
"epoch": 0.0,
"learning_rate": 2.9992638622972003e-05,
"loss": 0.1322,
"step": 17900
},
{
"epoch": 0.0,
"learning_rate": 2.9992597368897648e-05,
"loss": 0.1288,
"step": 18000
},
{
"epoch": 0.0,
"learning_rate": 2.999255611482329e-05,
"loss": 0.123,
"step": 18100
},
{
"epoch": 0.0,
"learning_rate": 2.9992514860748938e-05,
"loss": 0.1447,
"step": 18200
},
{
"epoch": 0.0,
"learning_rate": 2.999247360667458e-05,
"loss": 0.1312,
"step": 18300
},
{
"epoch": 0.0,
"learning_rate": 2.9992432352600224e-05,
"loss": 0.1187,
"step": 18400
},
{
"epoch": 0.0,
"learning_rate": 2.999239109852587e-05,
"loss": 0.1371,
"step": 18500
},
{
"epoch": 0.0,
"learning_rate": 2.9992349844451514e-05,
"loss": 0.1273,
"step": 18600
},
{
"epoch": 0.0,
"learning_rate": 2.9992308590377156e-05,
"loss": 0.1524,
"step": 18700
},
{
"epoch": 0.0,
"learning_rate": 2.9992267336302804e-05,
"loss": 0.1354,
"step": 18800
},
{
"epoch": 0.0,
"learning_rate": 2.9992226082228446e-05,
"loss": 0.1204,
"step": 18900
},
{
"epoch": 0.0,
"learning_rate": 2.999218482815409e-05,
"loss": 0.1483,
"step": 19000
},
{
"epoch": 0.0,
"learning_rate": 2.9992143574079736e-05,
"loss": 0.1246,
"step": 19100
},
{
"epoch": 0.0,
"learning_rate": 2.999210232000538e-05,
"loss": 0.094,
"step": 19200
},
{
"epoch": 0.0,
"learning_rate": 2.9992061065931022e-05,
"loss": 0.1177,
"step": 19300
},
{
"epoch": 0.0,
"learning_rate": 2.999201981185667e-05,
"loss": 0.1363,
"step": 19400
},
{
"epoch": 0.0,
"learning_rate": 2.9991978557782312e-05,
"loss": 0.1276,
"step": 19500
},
{
"epoch": 0.0,
"learning_rate": 2.9991937303707957e-05,
"loss": 0.1299,
"step": 19600
},
{
"epoch": 0.0,
"learning_rate": 2.9991896049633602e-05,
"loss": 0.1389,
"step": 19700
},
{
"epoch": 0.0,
"learning_rate": 2.9991854795559247e-05,
"loss": 0.1224,
"step": 19800
},
{
"epoch": 0.0,
"learning_rate": 2.9991813541484892e-05,
"loss": 0.1317,
"step": 19900
},
{
"epoch": 0.0,
"learning_rate": 2.9991772287410537e-05,
"loss": 0.1223,
"step": 20000
},
{
"epoch": 0.0,
"learning_rate": 2.9991731033336182e-05,
"loss": 0.1234,
"step": 20100
},
{
"epoch": 0.0,
"learning_rate": 2.9991689779261824e-05,
"loss": 0.1001,
"step": 20200
},
{
"epoch": 0.0,
"learning_rate": 2.9991648525187472e-05,
"loss": 0.1282,
"step": 20300
},
{
"epoch": 0.0,
"learning_rate": 2.9991607271113114e-05,
"loss": 0.0976,
"step": 20400
},
{
"epoch": 0.0,
"learning_rate": 2.999156601703876e-05,
"loss": 0.1214,
"step": 20500
},
{
"epoch": 0.0,
"learning_rate": 2.9991524762964403e-05,
"loss": 0.1281,
"step": 20600
},
{
"epoch": 0.0,
"learning_rate": 2.999148350889005e-05,
"loss": 0.1027,
"step": 20700
},
{
"epoch": 0.0,
"learning_rate": 2.999144225481569e-05,
"loss": 0.0946,
"step": 20800
},
{
"epoch": 0.0,
"learning_rate": 2.999140100074134e-05,
"loss": 0.1501,
"step": 20900
},
{
"epoch": 0.0,
"learning_rate": 2.9991360159207726e-05,
"loss": 0.1294,
"step": 21000
},
{
"epoch": 0.0,
"learning_rate": 2.9991318905133368e-05,
"loss": 0.1237,
"step": 21100
},
{
"epoch": 0.0,
"learning_rate": 2.9991277651059013e-05,
"loss": 0.1613,
"step": 21200
},
{
"epoch": 0.0,
"learning_rate": 2.9991236396984658e-05,
"loss": 0.134,
"step": 21300
},
{
"epoch": 0.0,
"learning_rate": 2.9991195142910303e-05,
"loss": 0.1156,
"step": 21400
},
{
"epoch": 0.0,
"learning_rate": 2.9991153888835948e-05,
"loss": 0.111,
"step": 21500
},
{
"epoch": 0.0,
"learning_rate": 2.9991112634761592e-05,
"loss": 0.1627,
"step": 21600
},
{
"epoch": 0.0,
"learning_rate": 2.9991071380687237e-05,
"loss": 0.1362,
"step": 21700
},
{
"epoch": 0.0,
"learning_rate": 2.999103012661288e-05,
"loss": 0.1193,
"step": 21800
},
{
"epoch": 0.0,
"learning_rate": 2.9990988872538527e-05,
"loss": 0.133,
"step": 21900
},
{
"epoch": 0.0,
"learning_rate": 2.999094761846417e-05,
"loss": 0.1091,
"step": 22000
},
{
"epoch": 0.0,
"learning_rate": 2.9990906364389814e-05,
"loss": 0.1259,
"step": 22100
},
{
"epoch": 0.0,
"learning_rate": 2.999086511031546e-05,
"loss": 0.1234,
"step": 22200
},
{
"epoch": 0.0,
"learning_rate": 2.9990823856241104e-05,
"loss": 0.115,
"step": 22300
},
{
"epoch": 0.0,
"learning_rate": 2.9990782602166745e-05,
"loss": 0.1402,
"step": 22400
},
{
"epoch": 0.0,
"learning_rate": 2.9990741348092394e-05,
"loss": 0.1286,
"step": 22500
},
{
"epoch": 0.0,
"learning_rate": 2.9990700094018035e-05,
"loss": 0.1092,
"step": 22600
},
{
"epoch": 0.0,
"learning_rate": 2.9990659252484423e-05,
"loss": 0.1191,
"step": 22700
},
{
"epoch": 0.0,
"learning_rate": 2.9990617998410068e-05,
"loss": 0.1197,
"step": 22800
},
{
"epoch": 0.0,
"learning_rate": 2.9990576744335716e-05,
"loss": 0.1361,
"step": 22900
},
{
"epoch": 0.0,
"learning_rate": 2.9990535490261358e-05,
"loss": 0.1234,
"step": 23000
},
{
"epoch": 0.0,
"learning_rate": 2.9990494236187003e-05,
"loss": 0.1068,
"step": 23100
},
{
"epoch": 0.0,
"learning_rate": 2.9990452982112648e-05,
"loss": 0.1248,
"step": 23200
},
{
"epoch": 0.0,
"learning_rate": 2.9990411728038293e-05,
"loss": 0.1148,
"step": 23300
},
{
"epoch": 0.0,
"learning_rate": 2.9990370473963934e-05,
"loss": 0.1355,
"step": 23400
},
{
"epoch": 0.0,
"learning_rate": 2.9990329219889583e-05,
"loss": 0.1161,
"step": 23500
},
{
"epoch": 0.0,
"learning_rate": 2.9990287965815224e-05,
"loss": 0.12,
"step": 23600
},
{
"epoch": 0.0,
"learning_rate": 2.999024671174087e-05,
"loss": 0.1173,
"step": 23700
},
{
"epoch": 0.0,
"learning_rate": 2.999020545766651e-05,
"loss": 0.1193,
"step": 23800
},
{
"epoch": 0.0,
"learning_rate": 2.999016420359216e-05,
"loss": 0.1455,
"step": 23900
},
{
"epoch": 0.0,
"learning_rate": 2.99901229495178e-05,
"loss": 0.1296,
"step": 24000
},
{
"epoch": 0.0,
"learning_rate": 2.9990081695443446e-05,
"loss": 0.1402,
"step": 24100
},
{
"epoch": 0.0,
"learning_rate": 2.999004044136909e-05,
"loss": 0.1232,
"step": 24200
},
{
"epoch": 0.0,
"learning_rate": 2.9989999187294736e-05,
"loss": 0.1263,
"step": 24300
},
{
"epoch": 0.0,
"learning_rate": 2.9989957933220377e-05,
"loss": 0.1339,
"step": 24400
},
{
"epoch": 0.0,
"learning_rate": 2.9989916679146026e-05,
"loss": 0.1242,
"step": 24500
},
{
"epoch": 0.0,
"learning_rate": 2.9989875425071667e-05,
"loss": 0.1557,
"step": 24600
},
{
"epoch": 0.0,
"learning_rate": 2.9989834170997312e-05,
"loss": 0.128,
"step": 24700
},
{
"epoch": 0.0,
"eval_accuracy": 0.9412505310427588,
"eval_f1": 0.9412461155109177,
"eval_loss": 0.15371489524841309,
"eval_matthews_correlation": 0.8826317470341026,
"eval_precision": 0.9413817526260211,
"eval_recall": 0.9412500042409688,
"eval_runtime": 1573.539,
"eval_samples_per_second": 2628.686,
"eval_steps_per_second": 2628.686,
"step": 24769
},
{
"epoch": 1.0,
"learning_rate": 2.9989792916922957e-05,
"loss": 0.1622,
"step": 24800
},
{
"epoch": 1.0,
"learning_rate": 2.9989751662848602e-05,
"loss": 0.1287,
"step": 24900
},
{
"epoch": 1.0,
"learning_rate": 2.9989710408774247e-05,
"loss": 0.1407,
"step": 25000
},
{
"epoch": 1.0,
"learning_rate": 2.9989669154699892e-05,
"loss": 0.1415,
"step": 25100
},
{
"epoch": 1.0,
"learning_rate": 2.9989627900625537e-05,
"loss": 0.1269,
"step": 25200
},
{
"epoch": 1.0,
"learning_rate": 2.998958664655118e-05,
"loss": 0.1266,
"step": 25300
},
{
"epoch": 1.0,
"learning_rate": 2.9989545392476827e-05,
"loss": 0.1323,
"step": 25400
},
{
"epoch": 1.0,
"learning_rate": 2.998950413840247e-05,
"loss": 0.1448,
"step": 25500
},
{
"epoch": 1.0,
"learning_rate": 2.9989462884328114e-05,
"loss": 0.1476,
"step": 25600
},
{
"epoch": 1.0,
"learning_rate": 2.998942163025376e-05,
"loss": 0.1401,
"step": 25700
},
{
"epoch": 1.0,
"learning_rate": 2.9989380376179404e-05,
"loss": 0.1688,
"step": 25800
},
{
"epoch": 1.0,
"learning_rate": 2.9989339122105045e-05,
"loss": 0.1509,
"step": 25900
},
{
"epoch": 1.0,
"learning_rate": 2.9989297868030694e-05,
"loss": 0.1828,
"step": 26000
},
{
"epoch": 1.0,
"learning_rate": 2.9989256613956335e-05,
"loss": 0.1663,
"step": 26100
},
{
"epoch": 1.0,
"learning_rate": 2.998921535988198e-05,
"loss": 0.1476,
"step": 26200
},
{
"epoch": 1.0,
"learning_rate": 2.9989174105807625e-05,
"loss": 0.1601,
"step": 26300
},
{
"epoch": 1.0,
"learning_rate": 2.998913285173327e-05,
"loss": 0.1552,
"step": 26400
},
{
"epoch": 1.0,
"learning_rate": 2.998909159765891e-05,
"loss": 0.1573,
"step": 26500
},
{
"epoch": 1.0,
"learning_rate": 2.998905034358456e-05,
"loss": 0.1707,
"step": 26600
},
{
"epoch": 1.0,
"learning_rate": 2.99890090895102e-05,
"loss": 0.1389,
"step": 26700
},
{
"epoch": 1.0,
"learning_rate": 2.9988967835435847e-05,
"loss": 0.1472,
"step": 26800
},
{
"epoch": 1.0,
"learning_rate": 2.9988926581361495e-05,
"loss": 0.1456,
"step": 26900
},
{
"epoch": 1.0,
"learning_rate": 2.9988885327287137e-05,
"loss": 0.1366,
"step": 27000
},
{
"epoch": 1.0,
"learning_rate": 2.998884407321278e-05,
"loss": 0.1623,
"step": 27100
},
{
"epoch": 1.0,
"learning_rate": 2.9988802819138426e-05,
"loss": 0.1525,
"step": 27200
},
{
"epoch": 1.0,
"learning_rate": 2.998876156506407e-05,
"loss": 0.1426,
"step": 27300
},
{
"epoch": 1.0,
"learning_rate": 2.9988720310989713e-05,
"loss": 0.1444,
"step": 27400
},
{
"epoch": 1.0,
"learning_rate": 2.998867905691536e-05,
"loss": 0.152,
"step": 27500
},
{
"epoch": 1.0,
"learning_rate": 2.9988637802841003e-05,
"loss": 0.1441,
"step": 27600
},
{
"epoch": 1.0,
"learning_rate": 2.9988596548766648e-05,
"loss": 0.1566,
"step": 27700
},
{
"epoch": 1.0,
"learning_rate": 2.9988555294692293e-05,
"loss": 0.1052,
"step": 27800
},
{
"epoch": 1.0,
"learning_rate": 2.9988514040617938e-05,
"loss": 0.1509,
"step": 27900
},
{
"epoch": 1.0,
"learning_rate": 2.998847278654358e-05,
"loss": 0.1519,
"step": 28000
},
{
"epoch": 1.0,
"learning_rate": 2.9988431532469228e-05,
"loss": 0.1331,
"step": 28100
},
{
"epoch": 1.0,
"learning_rate": 2.998839027839487e-05,
"loss": 0.1391,
"step": 28200
},
{
"epoch": 1.0,
"learning_rate": 2.9988349024320514e-05,
"loss": 0.1295,
"step": 28300
},
{
"epoch": 1.0,
"learning_rate": 2.998830777024616e-05,
"loss": 0.151,
"step": 28400
},
{
"epoch": 1.0,
"learning_rate": 2.9988266516171804e-05,
"loss": 0.1396,
"step": 28500
},
{
"epoch": 1.0,
"learning_rate": 2.9988225262097446e-05,
"loss": 0.1138,
"step": 28600
},
{
"epoch": 1.0,
"learning_rate": 2.9988184008023094e-05,
"loss": 0.138,
"step": 28700
},
{
"epoch": 1.0,
"learning_rate": 2.9988143166489482e-05,
"loss": 0.118,
"step": 28800
},
{
"epoch": 1.0,
"learning_rate": 2.9988101912415127e-05,
"loss": 0.1409,
"step": 28900
},
{
"epoch": 1.0,
"learning_rate": 2.998806065834077e-05,
"loss": 0.1454,
"step": 29000
},
{
"epoch": 1.0,
"learning_rate": 2.9988019404266417e-05,
"loss": 0.1435,
"step": 29100
},
{
"epoch": 1.0,
"learning_rate": 2.9987978562732804e-05,
"loss": 0.1201,
"step": 29200
},
{
"epoch": 1.0,
"learning_rate": 2.9987937308658446e-05,
"loss": 0.1533,
"step": 29300
},
{
"epoch": 1.0,
"learning_rate": 2.9987896467124837e-05,
"loss": 0.1382,
"step": 29400
},
{
"epoch": 1.0,
"learning_rate": 2.998785521305048e-05,
"loss": 0.1454,
"step": 29500
},
{
"epoch": 1.0,
"learning_rate": 2.9987813958976127e-05,
"loss": 0.1453,
"step": 29600
},
{
"epoch": 1.0,
"learning_rate": 2.998777270490177e-05,
"loss": 0.1374,
"step": 29700
},
{
"epoch": 1.0,
"learning_rate": 2.9987731450827414e-05,
"loss": 0.1434,
"step": 29800
},
{
"epoch": 1.0,
"learning_rate": 2.998769019675306e-05,
"loss": 0.1465,
"step": 29900
},
{
"epoch": 1.0,
"learning_rate": 2.9987648942678703e-05,
"loss": 0.1518,
"step": 30000
},
{
"epoch": 1.0,
"learning_rate": 2.9987607688604345e-05,
"loss": 0.1218,
"step": 30100
},
{
"epoch": 1.0,
"learning_rate": 2.9987566434529993e-05,
"loss": 0.1333,
"step": 30200
},
{
"epoch": 1.0,
"learning_rate": 2.9987525180455635e-05,
"loss": 0.1646,
"step": 30300
},
{
"epoch": 1.0,
"learning_rate": 2.998748392638128e-05,
"loss": 0.1376,
"step": 30400
},
{
"epoch": 1.0,
"learning_rate": 2.998744267230692e-05,
"loss": 0.1376,
"step": 30500
},
{
"epoch": 1.0,
"learning_rate": 2.998740141823257e-05,
"loss": 0.1368,
"step": 30600
},
{
"epoch": 1.0,
"learning_rate": 2.998736016415821e-05,
"loss": 0.1459,
"step": 30700
},
{
"epoch": 1.0,
"learning_rate": 2.9987318910083856e-05,
"loss": 0.1297,
"step": 30800
},
{
"epoch": 1.0,
"learning_rate": 2.99872776560095e-05,
"loss": 0.1576,
"step": 30900
},
{
"epoch": 1.0,
"learning_rate": 2.9987236401935146e-05,
"loss": 0.1458,
"step": 31000
},
{
"epoch": 1.0,
"learning_rate": 2.998719514786079e-05,
"loss": 0.1278,
"step": 31100
},
{
"epoch": 1.0,
"learning_rate": 2.9987153893786436e-05,
"loss": 0.1475,
"step": 31200
},
{
"epoch": 1.0,
"learning_rate": 2.998711263971208e-05,
"loss": 0.1608,
"step": 31300
},
{
"epoch": 1.0,
"learning_rate": 2.9987071385637723e-05,
"loss": 0.1405,
"step": 31400
},
{
"epoch": 1.0,
"learning_rate": 2.998703013156337e-05,
"loss": 0.1395,
"step": 31500
},
{
"epoch": 1.0,
"learning_rate": 2.9986988877489013e-05,
"loss": 0.1495,
"step": 31600
},
{
"epoch": 1.0,
"learning_rate": 2.99869480359554e-05,
"loss": 0.1523,
"step": 31700
},
{
"epoch": 1.0,
"learning_rate": 2.9986906781881045e-05,
"loss": 0.1298,
"step": 31800
},
{
"epoch": 1.0,
"learning_rate": 2.998686552780669e-05,
"loss": 0.1558,
"step": 31900
},
{
"epoch": 1.0,
"learning_rate": 2.9986824273732335e-05,
"loss": 0.1224,
"step": 32000
},
{
"epoch": 1.0,
"learning_rate": 2.9986783019657977e-05,
"loss": 0.1515,
"step": 32100
},
{
"epoch": 1.0,
"learning_rate": 2.9986741765583625e-05,
"loss": 0.1261,
"step": 32200
},
{
"epoch": 1.0,
"learning_rate": 2.9986700511509267e-05,
"loss": 0.1513,
"step": 32300
},
{
"epoch": 1.0,
"learning_rate": 2.9986659257434912e-05,
"loss": 0.1606,
"step": 32400
},
{
"epoch": 1.0,
"learning_rate": 2.998661800336056e-05,
"loss": 0.142,
"step": 32500
},
{
"epoch": 1.0,
"learning_rate": 2.9986576749286202e-05,
"loss": 0.1448,
"step": 32600
},
{
"epoch": 1.0,
"learning_rate": 2.9986535495211847e-05,
"loss": 0.1492,
"step": 32700
},
{
"epoch": 1.0,
"learning_rate": 2.9986494241137492e-05,
"loss": 0.1591,
"step": 32800
},
{
"epoch": 1.0,
"learning_rate": 2.9986452987063137e-05,
"loss": 0.1365,
"step": 32900
},
{
"epoch": 1.0,
"learning_rate": 2.998641173298878e-05,
"loss": 0.1585,
"step": 33000
},
{
"epoch": 1.0,
"learning_rate": 2.9986370478914427e-05,
"loss": 0.1526,
"step": 33100
},
{
"epoch": 1.0,
"learning_rate": 2.9986329224840068e-05,
"loss": 0.1301,
"step": 33200
},
{
"epoch": 1.0,
"learning_rate": 2.9986287970765713e-05,
"loss": 0.1698,
"step": 33300
},
{
"epoch": 1.0,
"learning_rate": 2.9986246716691358e-05,
"loss": 0.1513,
"step": 33400
},
{
"epoch": 1.0,
"learning_rate": 2.9986205462617003e-05,
"loss": 0.1554,
"step": 33500
},
{
"epoch": 1.0,
"learning_rate": 2.9986164208542645e-05,
"loss": 0.1619,
"step": 33600
},
{
"epoch": 1.0,
"learning_rate": 2.9986122954468293e-05,
"loss": 0.1456,
"step": 33700
},
{
"epoch": 1.0,
"learning_rate": 2.9986081700393935e-05,
"loss": 0.1724,
"step": 33800
},
{
"epoch": 1.0,
"learning_rate": 2.998604044631958e-05,
"loss": 0.1806,
"step": 33900
},
{
"epoch": 1.0,
"learning_rate": 2.9985999192245225e-05,
"loss": 0.1503,
"step": 34000
},
{
"epoch": 1.0,
"learning_rate": 2.998595793817087e-05,
"loss": 0.1341,
"step": 34100
},
{
"epoch": 1.0,
"learning_rate": 2.998591668409651e-05,
"loss": 0.1599,
"step": 34200
},
{
"epoch": 1.0,
"learning_rate": 2.998587543002216e-05,
"loss": 0.1327,
"step": 34300
},
{
"epoch": 1.0,
"learning_rate": 2.99858341759478e-05,
"loss": 0.1172,
"step": 34400
},
{
"epoch": 1.0,
"learning_rate": 2.9985792921873446e-05,
"loss": 0.1544,
"step": 34500
},
{
"epoch": 1.0,
"learning_rate": 2.9985751667799095e-05,
"loss": 0.146,
"step": 34600
},
{
"epoch": 1.0,
"learning_rate": 2.9985710413724736e-05,
"loss": 0.1323,
"step": 34700
},
{
"epoch": 1.0,
"learning_rate": 2.998566915965038e-05,
"loss": 0.1682,
"step": 34800
},
{
"epoch": 1.0,
"learning_rate": 2.9985627905576026e-05,
"loss": 0.1505,
"step": 34900
},
{
"epoch": 1.0,
"learning_rate": 2.998558665150167e-05,
"loss": 0.1501,
"step": 35000
},
{
"epoch": 1.0,
"learning_rate": 2.9985545397427313e-05,
"loss": 0.1841,
"step": 35100
},
{
"epoch": 1.0,
"learning_rate": 2.998550414335296e-05,
"loss": 0.1416,
"step": 35200
},
{
"epoch": 1.0,
"learning_rate": 2.9985462889278603e-05,
"loss": 0.1621,
"step": 35300
},
{
"epoch": 1.0,
"learning_rate": 2.9985421635204247e-05,
"loss": 0.1578,
"step": 35400
},
{
"epoch": 1.0,
"learning_rate": 2.9985380381129892e-05,
"loss": 0.1456,
"step": 35500
},
{
"epoch": 1.0,
"learning_rate": 2.9985339127055537e-05,
"loss": 0.1641,
"step": 35600
},
{
"epoch": 1.0,
"learning_rate": 2.998529787298118e-05,
"loss": 0.146,
"step": 35700
},
{
"epoch": 1.0,
"learning_rate": 2.9985256618906827e-05,
"loss": 0.1264,
"step": 35800
},
{
"epoch": 1.0,
"learning_rate": 2.998521536483247e-05,
"loss": 0.149,
"step": 35900
},
{
"epoch": 1.0,
"learning_rate": 2.9985174110758114e-05,
"loss": 0.1421,
"step": 36000
},
{
"epoch": 1.0,
"learning_rate": 2.998513285668376e-05,
"loss": 0.1442,
"step": 36100
},
{
"epoch": 1.0,
"learning_rate": 2.9985091602609404e-05,
"loss": 0.1563,
"step": 36200
},
{
"epoch": 1.0,
"learning_rate": 2.9985050348535045e-05,
"loss": 0.1368,
"step": 36300
},
{
"epoch": 1.0,
"learning_rate": 2.9985009094460694e-05,
"loss": 0.1202,
"step": 36400
},
{
"epoch": 1.0,
"learning_rate": 2.9984967840386335e-05,
"loss": 0.156,
"step": 36500
},
{
"epoch": 1.0,
"learning_rate": 2.998492658631198e-05,
"loss": 0.1316,
"step": 36600
},
{
"epoch": 1.0,
"learning_rate": 2.998488533223763e-05,
"loss": 0.1266,
"step": 36700
},
{
"epoch": 1.0,
"learning_rate": 2.998484407816327e-05,
"loss": 0.1362,
"step": 36800
},
{
"epoch": 1.0,
"learning_rate": 2.9984802824088915e-05,
"loss": 0.1532,
"step": 36900
},
{
"epoch": 1.0,
"learning_rate": 2.998476157001456e-05,
"loss": 0.1364,
"step": 37000
},
{
"epoch": 1.0,
"learning_rate": 2.9984720315940205e-05,
"loss": 0.1402,
"step": 37100
},
{
"epoch": 1.0,
"learning_rate": 2.9984679061865847e-05,
"loss": 0.1469,
"step": 37200
},
{
"epoch": 1.0,
"learning_rate": 2.9984637807791495e-05,
"loss": 0.1281,
"step": 37300
},
{
"epoch": 1.0,
"learning_rate": 2.9984596966257883e-05,
"loss": 0.1166,
"step": 37400
},
{
"epoch": 1.0,
"learning_rate": 2.9984555712183524e-05,
"loss": 0.14,
"step": 37500
},
{
"epoch": 1.0,
"learning_rate": 2.998451445810917e-05,
"loss": 0.1445,
"step": 37600
},
{
"epoch": 1.0,
"learning_rate": 2.9984473204034814e-05,
"loss": 0.1244,
"step": 37700
},
{
"epoch": 1.0,
"learning_rate": 2.998443194996046e-05,
"loss": 0.1236,
"step": 37800
},
{
"epoch": 1.0,
"learning_rate": 2.99843906958861e-05,
"loss": 0.1377,
"step": 37900
},
{
"epoch": 1.0,
"learning_rate": 2.998434944181175e-05,
"loss": 0.1247,
"step": 38000
},
{
"epoch": 1.0,
"learning_rate": 2.9984308187737394e-05,
"loss": 0.1328,
"step": 38100
},
{
"epoch": 1.0,
"learning_rate": 2.9984266933663036e-05,
"loss": 0.1409,
"step": 38200
},
{
"epoch": 1.0,
"learning_rate": 2.9984225679588684e-05,
"loss": 0.1145,
"step": 38300
},
{
"epoch": 1.0,
"learning_rate": 2.9984184425514326e-05,
"loss": 0.1356,
"step": 38400
},
{
"epoch": 1.0,
"learning_rate": 2.998414317143997e-05,
"loss": 0.1317,
"step": 38500
},
{
"epoch": 1.0,
"learning_rate": 2.9984101917365616e-05,
"loss": 0.1297,
"step": 38600
},
{
"epoch": 1.0,
"learning_rate": 2.9984061075832e-05,
"loss": 0.1321,
"step": 38700
},
{
"epoch": 1.0,
"learning_rate": 2.9984019821757648e-05,
"loss": 0.1552,
"step": 38800
},
{
"epoch": 1.0,
"learning_rate": 2.998397856768329e-05,
"loss": 0.1352,
"step": 38900
},
{
"epoch": 1.0,
"learning_rate": 2.9983937313608935e-05,
"loss": 0.1217,
"step": 39000
},
{
"epoch": 1.0,
"learning_rate": 2.998389605953458e-05,
"loss": 0.1159,
"step": 39100
},
{
"epoch": 1.0,
"learning_rate": 2.9983854805460225e-05,
"loss": 0.1141,
"step": 39200
},
{
"epoch": 1.0,
"learning_rate": 2.9983813551385866e-05,
"loss": 0.0878,
"step": 39300
},
{
"epoch": 1.0,
"learning_rate": 2.9983772297311515e-05,
"loss": 0.1115,
"step": 39400
},
{
"epoch": 1.0,
"learning_rate": 2.9983731455777902e-05,
"loss": 0.1444,
"step": 39500
},
{
"epoch": 1.0,
"learning_rate": 2.9983690201703547e-05,
"loss": 0.1187,
"step": 39600
},
{
"epoch": 1.0,
"learning_rate": 2.998364894762919e-05,
"loss": 0.1168,
"step": 39700
},
{
"epoch": 1.0,
"learning_rate": 2.9983607693554837e-05,
"loss": 0.1417,
"step": 39800
},
{
"epoch": 1.0,
"learning_rate": 2.998356643948048e-05,
"loss": 0.1224,
"step": 39900
},
{
"epoch": 1.0,
"learning_rate": 2.9983525185406124e-05,
"loss": 0.1378,
"step": 40000
},
{
"epoch": 1.0,
"learning_rate": 2.998348393133177e-05,
"loss": 0.1148,
"step": 40100
},
{
"epoch": 1.0,
"learning_rate": 2.9983442677257414e-05,
"loss": 0.1312,
"step": 40200
},
{
"epoch": 1.0,
"learning_rate": 2.9983401423183055e-05,
"loss": 0.1075,
"step": 40300
},
{
"epoch": 1.0,
"learning_rate": 2.9983360169108704e-05,
"loss": 0.0949,
"step": 40400
},
{
"epoch": 1.0,
"learning_rate": 2.9983318915034345e-05,
"loss": 0.1106,
"step": 40500
},
{
"epoch": 1.0,
"learning_rate": 2.998327766095999e-05,
"loss": 0.0915,
"step": 40600
},
{
"epoch": 1.0,
"learning_rate": 2.9983236406885635e-05,
"loss": 0.1321,
"step": 40700
},
{
"epoch": 1.0,
"learning_rate": 2.998319515281128e-05,
"loss": 0.1145,
"step": 40800
},
{
"epoch": 1.0,
"learning_rate": 2.9983153898736925e-05,
"loss": 0.111,
"step": 40900
},
{
"epoch": 1.0,
"learning_rate": 2.998311264466257e-05,
"loss": 0.1445,
"step": 41000
},
{
"epoch": 1.0,
"learning_rate": 2.9983071390588215e-05,
"loss": 0.1068,
"step": 41100
},
{
"epoch": 1.0,
"learning_rate": 2.9983030136513857e-05,
"loss": 0.1167,
"step": 41200
},
{
"epoch": 1.0,
"learning_rate": 2.9982988882439505e-05,
"loss": 0.1445,
"step": 41300
},
{
"epoch": 1.0,
"learning_rate": 2.9982947628365147e-05,
"loss": 0.1114,
"step": 41400
},
{
"epoch": 1.0,
"learning_rate": 2.998290637429079e-05,
"loss": 0.104,
"step": 41500
},
{
"epoch": 1.0,
"learning_rate": 2.9982865120216437e-05,
"loss": 0.1261,
"step": 41600
},
{
"epoch": 1.0,
"learning_rate": 2.998282386614208e-05,
"loss": 0.1371,
"step": 41700
},
{
"epoch": 1.0,
"learning_rate": 2.9982782612067723e-05,
"loss": 0.1097,
"step": 41800
},
{
"epoch": 1.0,
"learning_rate": 2.998274135799337e-05,
"loss": 0.1258,
"step": 41900
},
{
"epoch": 1.0,
"learning_rate": 2.9982700103919013e-05,
"loss": 0.1119,
"step": 42000
},
{
"epoch": 1.0,
"learning_rate": 2.9982658849844658e-05,
"loss": 0.1315,
"step": 42100
},
{
"epoch": 1.0,
"learning_rate": 2.9982617595770303e-05,
"loss": 0.1242,
"step": 42200
},
{
"epoch": 1.0,
"learning_rate": 2.9982576341695948e-05,
"loss": 0.1184,
"step": 42300
},
{
"epoch": 1.0,
"learning_rate": 2.998253508762159e-05,
"loss": 0.1344,
"step": 42400
},
{
"epoch": 1.0,
"learning_rate": 2.9982493833547238e-05,
"loss": 0.1254,
"step": 42500
},
{
"epoch": 1.0,
"learning_rate": 2.998245257947288e-05,
"loss": 0.121,
"step": 42600
},
{
"epoch": 1.0,
"learning_rate": 2.9982411325398525e-05,
"loss": 0.1254,
"step": 42700
},
{
"epoch": 1.0,
"learning_rate": 2.998237007132417e-05,
"loss": 0.1308,
"step": 42800
},
{
"epoch": 1.0,
"learning_rate": 2.9982328817249814e-05,
"loss": 0.1099,
"step": 42900
},
{
"epoch": 1.0,
"learning_rate": 2.998228756317546e-05,
"loss": 0.1433,
"step": 43000
},
{
"epoch": 1.0,
"learning_rate": 2.9982246309101104e-05,
"loss": 0.1263,
"step": 43100
},
{
"epoch": 1.0,
"learning_rate": 2.998220505502675e-05,
"loss": 0.1217,
"step": 43200
},
{
"epoch": 1.0,
"learning_rate": 2.998216380095239e-05,
"loss": 0.1298,
"step": 43300
},
{
"epoch": 1.0,
"learning_rate": 2.998212254687804e-05,
"loss": 0.1359,
"step": 43400
},
{
"epoch": 1.0,
"learning_rate": 2.998208129280368e-05,
"loss": 0.1514,
"step": 43500
},
{
"epoch": 1.0,
"learning_rate": 2.9982040038729326e-05,
"loss": 0.1165,
"step": 43600
},
{
"epoch": 1.0,
"learning_rate": 2.998199878465497e-05,
"loss": 0.1191,
"step": 43700
},
{
"epoch": 1.0,
"learning_rate": 2.9981957530580616e-05,
"loss": 0.1484,
"step": 43800
},
{
"epoch": 1.0,
"learning_rate": 2.9981916276506257e-05,
"loss": 0.1221,
"step": 43900
},
{
"epoch": 1.0,
"learning_rate": 2.9981875022431906e-05,
"loss": 0.0714,
"step": 44000
},
{
"epoch": 1.0,
"learning_rate": 2.9981833768357547e-05,
"loss": 0.1398,
"step": 44100
},
{
"epoch": 1.0,
"learning_rate": 2.9981792514283192e-05,
"loss": 0.13,
"step": 44200
},
{
"epoch": 1.0,
"learning_rate": 2.9981751260208837e-05,
"loss": 0.1309,
"step": 44300
},
{
"epoch": 1.0,
"learning_rate": 2.9981710006134482e-05,
"loss": 0.1177,
"step": 44400
},
{
"epoch": 1.0,
"learning_rate": 2.9981668752060124e-05,
"loss": 0.1376,
"step": 44500
},
{
"epoch": 1.0,
"learning_rate": 2.9981627497985772e-05,
"loss": 0.1107,
"step": 44600
},
{
"epoch": 1.0,
"learning_rate": 2.9981586243911414e-05,
"loss": 0.1182,
"step": 44700
},
{
"epoch": 1.0,
"learning_rate": 2.998154498983706e-05,
"loss": 0.1288,
"step": 44800
},
{
"epoch": 1.0,
"learning_rate": 2.9981503735762704e-05,
"loss": 0.1198,
"step": 44900
},
{
"epoch": 1.0,
"learning_rate": 2.998146248168835e-05,
"loss": 0.0918,
"step": 45000
},
{
"epoch": 1.0,
"learning_rate": 2.9981421227613994e-05,
"loss": 0.1233,
"step": 45100
},
{
"epoch": 1.0,
"learning_rate": 2.998137997353964e-05,
"loss": 0.0905,
"step": 45200
},
{
"epoch": 1.0,
"learning_rate": 2.9981338719465284e-05,
"loss": 0.1315,
"step": 45300
},
{
"epoch": 1.0,
"learning_rate": 2.9981297465390925e-05,
"loss": 0.1153,
"step": 45400
},
{
"epoch": 1.0,
"learning_rate": 2.9981256211316574e-05,
"loss": 0.0926,
"step": 45500
},
{
"epoch": 1.0,
"learning_rate": 2.9981214957242215e-05,
"loss": 0.1079,
"step": 45600
},
{
"epoch": 1.0,
"learning_rate": 2.998117370316786e-05,
"loss": 0.1597,
"step": 45700
},
{
"epoch": 1.0,
"learning_rate": 2.9981132449093505e-05,
"loss": 0.1067,
"step": 45800
},
{
"epoch": 1.0,
"learning_rate": 2.9981091607559893e-05,
"loss": 0.1353,
"step": 45900
},
{
"epoch": 1.0,
"learning_rate": 2.9981050353485538e-05,
"loss": 0.1461,
"step": 46000
},
{
"epoch": 1.0,
"learning_rate": 2.998100909941118e-05,
"loss": 0.123,
"step": 46100
},
{
"epoch": 1.0,
"learning_rate": 2.9980967845336828e-05,
"loss": 0.1224,
"step": 46200
},
{
"epoch": 1.0,
"learning_rate": 2.998092659126247e-05,
"loss": 0.124,
"step": 46300
},
{
"epoch": 1.0,
"learning_rate": 2.9980885337188114e-05,
"loss": 0.1518,
"step": 46400
},
{
"epoch": 1.0,
"learning_rate": 2.9980844083113763e-05,
"loss": 0.1229,
"step": 46500
},
{
"epoch": 1.0,
"learning_rate": 2.9980802829039404e-05,
"loss": 0.1259,
"step": 46600
},
{
"epoch": 1.0,
"learning_rate": 2.998076157496505e-05,
"loss": 0.112,
"step": 46700
},
{
"epoch": 1.0,
"learning_rate": 2.9980720320890694e-05,
"loss": 0.1137,
"step": 46800
},
{
"epoch": 1.0,
"learning_rate": 2.998067906681634e-05,
"loss": 0.1286,
"step": 46900
},
{
"epoch": 1.0,
"learning_rate": 2.998063781274198e-05,
"loss": 0.1205,
"step": 47000
},
{
"epoch": 1.0,
"learning_rate": 2.998059655866763e-05,
"loss": 0.1052,
"step": 47100
},
{
"epoch": 1.0,
"learning_rate": 2.998055530459327e-05,
"loss": 0.1469,
"step": 47200
},
{
"epoch": 1.0,
"learning_rate": 2.9980514050518916e-05,
"loss": 0.1166,
"step": 47300
},
{
"epoch": 1.0,
"learning_rate": 2.9980473208985303e-05,
"loss": 0.1041,
"step": 47400
},
{
"epoch": 1.0,
"learning_rate": 2.9980431954910945e-05,
"loss": 0.1268,
"step": 47500
},
{
"epoch": 1.0,
"learning_rate": 2.9980390700836593e-05,
"loss": 0.1193,
"step": 47600
},
{
"epoch": 1.0,
"learning_rate": 2.9980349446762235e-05,
"loss": 0.1408,
"step": 47700
},
{
"epoch": 1.0,
"learning_rate": 2.998030819268788e-05,
"loss": 0.1067,
"step": 47800
},
{
"epoch": 1.0,
"learning_rate": 2.9980266938613528e-05,
"loss": 0.1193,
"step": 47900
},
{
"epoch": 1.0,
"learning_rate": 2.998022568453917e-05,
"loss": 0.1211,
"step": 48000
},
{
"epoch": 1.0,
"learning_rate": 2.9980184430464815e-05,
"loss": 0.1295,
"step": 48100
},
{
"epoch": 1.0,
"learning_rate": 2.998014317639046e-05,
"loss": 0.13,
"step": 48200
},
{
"epoch": 1.0,
"learning_rate": 2.9980101922316105e-05,
"loss": 0.1217,
"step": 48300
},
{
"epoch": 1.0,
"learning_rate": 2.9980060668241746e-05,
"loss": 0.1054,
"step": 48400
},
{
"epoch": 1.0,
"learning_rate": 2.9980019414167394e-05,
"loss": 0.1127,
"step": 48500
},
{
"epoch": 1.0,
"learning_rate": 2.9979978160093036e-05,
"loss": 0.1308,
"step": 48600
},
{
"epoch": 1.0,
"learning_rate": 2.997993690601868e-05,
"loss": 0.1329,
"step": 48700
},
{
"epoch": 1.0,
"learning_rate": 2.9979895651944326e-05,
"loss": 0.14,
"step": 48800
},
{
"epoch": 1.0,
"learning_rate": 2.997985439786997e-05,
"loss": 0.1262,
"step": 48900
},
{
"epoch": 1.0,
"learning_rate": 2.9979813143795613e-05,
"loss": 0.1331,
"step": 49000
},
{
"epoch": 1.0,
"learning_rate": 2.997977188972126e-05,
"loss": 0.1177,
"step": 49100
},
{
"epoch": 1.0,
"learning_rate": 2.9979730635646902e-05,
"loss": 0.134,
"step": 49200
},
{
"epoch": 1.0,
"learning_rate": 2.9979689381572547e-05,
"loss": 0.1338,
"step": 49300
},
{
"epoch": 1.0,
"learning_rate": 2.9979648127498192e-05,
"loss": 0.1415,
"step": 49400
},
{
"epoch": 1.0,
"learning_rate": 2.9979606873423837e-05,
"loss": 0.1381,
"step": 49500
},
{
"epoch": 1.0,
"eval_accuracy": 0.942031046023061,
"eval_f1": 0.9420274047220744,
"eval_loss": 0.15210944414138794,
"eval_matthews_correlation": 0.8841713498744154,
"eval_precision": 0.9421407920220504,
"eval_recall": 0.9420305647232377,
"eval_runtime": 1387.9505,
"eval_samples_per_second": 2980.178,
"eval_steps_per_second": 2980.178,
"step": 49538
},
{
"epoch": 2.0,
"learning_rate": 2.997956561934948e-05,
"loss": 0.1507,
"step": 49600
},
{
"epoch": 2.0,
"learning_rate": 2.9979524365275127e-05,
"loss": 0.132,
"step": 49700
},
{
"epoch": 2.0,
"learning_rate": 2.997948311120077e-05,
"loss": 0.1351,
"step": 49800
},
{
"epoch": 2.0,
"learning_rate": 2.9979441857126414e-05,
"loss": 0.1282,
"step": 49900
},
{
"epoch": 2.0,
"learning_rate": 2.9979400603052062e-05,
"loss": 0.1244,
"step": 50000
},
{
"epoch": 2.0,
"learning_rate": 2.9979359348977704e-05,
"loss": 0.1386,
"step": 50100
},
{
"epoch": 2.0,
"learning_rate": 2.997931809490335e-05,
"loss": 0.1273,
"step": 50200
},
{
"epoch": 2.0,
"learning_rate": 2.9979276840828994e-05,
"loss": 0.1344,
"step": 50300
},
{
"epoch": 2.0,
"learning_rate": 2.997923558675464e-05,
"loss": 0.1375,
"step": 50400
},
{
"epoch": 2.0,
"learning_rate": 2.997919433268028e-05,
"loss": 0.148,
"step": 50500
},
{
"epoch": 2.0,
"learning_rate": 2.997915307860593e-05,
"loss": 0.1519,
"step": 50600
},
{
"epoch": 2.0,
"learning_rate": 2.997911182453157e-05,
"loss": 0.1551,
"step": 50700
},
{
"epoch": 2.0,
"learning_rate": 2.9979070570457215e-05,
"loss": 0.1844,
"step": 50800
},
{
"epoch": 2.0,
"learning_rate": 2.997902931638286e-05,
"loss": 0.1535,
"step": 50900
},
{
"epoch": 2.0,
"learning_rate": 2.9978988062308505e-05,
"loss": 0.1542,
"step": 51000
},
{
"epoch": 2.0,
"learning_rate": 2.9978946808234147e-05,
"loss": 0.1406,
"step": 51100
},
{
"epoch": 2.0,
"learning_rate": 2.9978905554159795e-05,
"loss": 0.1647,
"step": 51200
},
{
"epoch": 2.0,
"learning_rate": 2.9978864300085437e-05,
"loss": 0.1429,
"step": 51300
},
{
"epoch": 2.0,
"learning_rate": 2.9978823046011082e-05,
"loss": 0.161,
"step": 51400
},
{
"epoch": 2.0,
"learning_rate": 2.9978781791936727e-05,
"loss": 0.1395,
"step": 51500
},
{
"epoch": 2.0,
"learning_rate": 2.997874053786237e-05,
"loss": 0.1425,
"step": 51600
},
{
"epoch": 2.0,
"learning_rate": 2.9978699283788013e-05,
"loss": 0.1512,
"step": 51700
},
{
"epoch": 2.0,
"learning_rate": 2.997865802971366e-05,
"loss": 0.1334,
"step": 51800
},
{
"epoch": 2.0,
"learning_rate": 2.9978616775639303e-05,
"loss": 0.1536,
"step": 51900
},
{
"epoch": 2.0,
"learning_rate": 2.9978575521564948e-05,
"loss": 0.1398,
"step": 52000
},
{
"epoch": 2.0,
"learning_rate": 2.9978534267490596e-05,
"loss": 0.1366,
"step": 52100
},
{
"epoch": 2.0,
"learning_rate": 2.9978493013416238e-05,
"loss": 0.1508,
"step": 52200
},
{
"epoch": 2.0,
"learning_rate": 2.9978451759341883e-05,
"loss": 0.1413,
"step": 52300
},
{
"epoch": 2.0,
"learning_rate": 2.9978410505267528e-05,
"loss": 0.1459,
"step": 52400
},
{
"epoch": 2.0,
"learning_rate": 2.9978369251193173e-05,
"loss": 0.1295,
"step": 52500
},
{
"epoch": 2.0,
"learning_rate": 2.9978327997118815e-05,
"loss": 0.1225,
"step": 52600
},
{
"epoch": 2.0,
"learning_rate": 2.9978286743044463e-05,
"loss": 0.1369,
"step": 52700
},
{
"epoch": 2.0,
"learning_rate": 2.9978245488970105e-05,
"loss": 0.1569,
"step": 52800
},
{
"epoch": 2.0,
"learning_rate": 2.997820423489575e-05,
"loss": 0.1257,
"step": 52900
},
{
"epoch": 2.0,
"learning_rate": 2.9978162980821394e-05,
"loss": 0.1295,
"step": 53000
},
{
"epoch": 2.0,
"learning_rate": 2.997812172674704e-05,
"loss": 0.1445,
"step": 53100
},
{
"epoch": 2.0,
"learning_rate": 2.997808047267268e-05,
"loss": 0.1323,
"step": 53200
},
{
"epoch": 2.0,
"learning_rate": 2.997803963113907e-05,
"loss": 0.1238,
"step": 53300
},
{
"epoch": 2.0,
"learning_rate": 2.9977998377064717e-05,
"loss": 0.1092,
"step": 53400
},
{
"epoch": 2.0,
"learning_rate": 2.9977957535531105e-05,
"loss": 0.1483,
"step": 53500
},
{
"epoch": 2.0,
"learning_rate": 2.997791628145675e-05,
"loss": 0.1059,
"step": 53600
},
{
"epoch": 2.0,
"learning_rate": 2.997787502738239e-05,
"loss": 0.1509,
"step": 53700
},
{
"epoch": 2.0,
"learning_rate": 2.9977833773308036e-05,
"loss": 0.1375,
"step": 53800
},
{
"epoch": 2.0,
"learning_rate": 2.997779251923368e-05,
"loss": 0.1362,
"step": 53900
},
{
"epoch": 2.0,
"learning_rate": 2.9977751265159326e-05,
"loss": 0.1153,
"step": 54000
},
{
"epoch": 2.0,
"learning_rate": 2.9977710011084968e-05,
"loss": 0.1498,
"step": 54100
},
{
"epoch": 2.0,
"learning_rate": 2.9977668757010616e-05,
"loss": 0.1421,
"step": 54200
},
{
"epoch": 2.0,
"learning_rate": 2.9977627502936258e-05,
"loss": 0.1391,
"step": 54300
},
{
"epoch": 2.0,
"learning_rate": 2.9977586248861903e-05,
"loss": 0.1347,
"step": 54400
},
{
"epoch": 2.0,
"learning_rate": 2.9977544994787548e-05,
"loss": 0.1515,
"step": 54500
},
{
"epoch": 2.0,
"learning_rate": 2.9977503740713193e-05,
"loss": 0.1356,
"step": 54600
},
{
"epoch": 2.0,
"learning_rate": 2.9977462486638834e-05,
"loss": 0.1271,
"step": 54700
},
{
"epoch": 2.0,
"learning_rate": 2.9977421232564483e-05,
"loss": 0.162,
"step": 54800
},
{
"epoch": 2.0,
"learning_rate": 2.9977379978490127e-05,
"loss": 0.1029,
"step": 54900
},
{
"epoch": 2.0,
"learning_rate": 2.997733872441577e-05,
"loss": 0.1429,
"step": 55000
},
{
"epoch": 2.0,
"learning_rate": 2.9977297470341417e-05,
"loss": 0.1495,
"step": 55100
},
{
"epoch": 2.0,
"learning_rate": 2.997725621626706e-05,
"loss": 0.1389,
"step": 55200
},
{
"epoch": 2.0,
"learning_rate": 2.9977214962192704e-05,
"loss": 0.1377,
"step": 55300
},
{
"epoch": 2.0,
"learning_rate": 2.997717370811835e-05,
"loss": 0.1273,
"step": 55400
},
{
"epoch": 2.0,
"learning_rate": 2.9977132454043994e-05,
"loss": 0.14,
"step": 55500
},
{
"epoch": 2.0,
"learning_rate": 2.9977091199969636e-05,
"loss": 0.1491,
"step": 55600
},
{
"epoch": 2.0,
"learning_rate": 2.9977049945895284e-05,
"loss": 0.1362,
"step": 55700
},
{
"epoch": 2.0,
"learning_rate": 2.997700910436167e-05,
"loss": 0.1254,
"step": 55800
},
{
"epoch": 2.0,
"learning_rate": 2.9976967850287313e-05,
"loss": 0.1389,
"step": 55900
},
{
"epoch": 2.0,
"learning_rate": 2.9976926596212958e-05,
"loss": 0.154,
"step": 56000
},
{
"epoch": 2.0,
"learning_rate": 2.9976885342138603e-05,
"loss": 0.138,
"step": 56100
},
{
"epoch": 2.0,
"learning_rate": 2.9976844088064248e-05,
"loss": 0.139,
"step": 56200
},
{
"epoch": 2.0,
"learning_rate": 2.9976802833989893e-05,
"loss": 0.1337,
"step": 56300
},
{
"epoch": 2.0,
"learning_rate": 2.9976761579915538e-05,
"loss": 0.1509,
"step": 56400
},
{
"epoch": 2.0,
"learning_rate": 2.9976720325841183e-05,
"loss": 0.1452,
"step": 56500
},
{
"epoch": 2.0,
"learning_rate": 2.9976679071766824e-05,
"loss": 0.1306,
"step": 56600
},
{
"epoch": 2.0,
"learning_rate": 2.9976637817692473e-05,
"loss": 0.1358,
"step": 56700
},
{
"epoch": 2.0,
"learning_rate": 2.9976596563618114e-05,
"loss": 0.1243,
"step": 56800
},
{
"epoch": 2.0,
"learning_rate": 2.997655530954376e-05,
"loss": 0.1451,
"step": 56900
},
{
"epoch": 2.0,
"learning_rate": 2.9976514055469404e-05,
"loss": 0.1322,
"step": 57000
},
{
"epoch": 2.0,
"learning_rate": 2.997647280139505e-05,
"loss": 0.1396,
"step": 57100
},
{
"epoch": 2.0,
"learning_rate": 2.997643154732069e-05,
"loss": 0.1636,
"step": 57200
},
{
"epoch": 2.0,
"learning_rate": 2.997639029324634e-05,
"loss": 0.1388,
"step": 57300
},
{
"epoch": 2.0,
"learning_rate": 2.997634903917198e-05,
"loss": 0.14,
"step": 57400
},
{
"epoch": 2.0,
"learning_rate": 2.9976307785097626e-05,
"loss": 0.1556,
"step": 57500
},
{
"epoch": 2.0,
"learning_rate": 2.997626653102327e-05,
"loss": 0.1406,
"step": 57600
},
{
"epoch": 2.0,
"learning_rate": 2.9976225276948916e-05,
"loss": 0.1241,
"step": 57700
},
{
"epoch": 2.0,
"learning_rate": 2.9976184022874557e-05,
"loss": 0.1656,
"step": 57800
},
{
"epoch": 2.0,
"learning_rate": 2.9976142768800206e-05,
"loss": 0.1459,
"step": 57900
},
{
"epoch": 2.0,
"learning_rate": 2.9976101514725847e-05,
"loss": 0.1462,
"step": 58000
},
{
"epoch": 2.0,
"learning_rate": 2.9976060260651492e-05,
"loss": 0.1437,
"step": 58100
},
{
"epoch": 2.0,
"learning_rate": 2.9976019006577137e-05,
"loss": 0.1607,
"step": 58200
},
{
"epoch": 2.0,
"learning_rate": 2.9975978165043528e-05,
"loss": 0.146,
"step": 58300
},
{
"epoch": 2.0,
"learning_rate": 2.997593691096917e-05,
"loss": 0.1542,
"step": 58400
},
{
"epoch": 2.0,
"learning_rate": 2.9975895656894815e-05,
"loss": 0.1431,
"step": 58500
},
{
"epoch": 2.0,
"learning_rate": 2.997585440282046e-05,
"loss": 0.1782,
"step": 58600
},
{
"epoch": 2.0,
"learning_rate": 2.9975813148746105e-05,
"loss": 0.177,
"step": 58700
},
{
"epoch": 2.0,
"learning_rate": 2.9975771894671746e-05,
"loss": 0.1327,
"step": 58800
},
{
"epoch": 2.0,
"learning_rate": 2.9975730640597395e-05,
"loss": 0.1325,
"step": 58900
},
{
"epoch": 2.0,
"learning_rate": 2.9975689386523036e-05,
"loss": 0.1535,
"step": 59000
},
{
"epoch": 2.0,
"learning_rate": 2.997564813244868e-05,
"loss": 0.122,
"step": 59100
},
{
"epoch": 2.0,
"learning_rate": 2.9975606878374326e-05,
"loss": 0.1304,
"step": 59200
},
{
"epoch": 2.0,
"learning_rate": 2.997556562429997e-05,
"loss": 0.1326,
"step": 59300
},
{
"epoch": 2.0,
"learning_rate": 2.9975524370225613e-05,
"loss": 0.1542,
"step": 59400
},
{
"epoch": 2.0,
"learning_rate": 2.997548311615126e-05,
"loss": 0.1349,
"step": 59500
},
{
"epoch": 2.0,
"learning_rate": 2.9975441862076903e-05,
"loss": 0.1556,
"step": 59600
},
{
"epoch": 2.0,
"learning_rate": 2.9975400608002548e-05,
"loss": 0.1555,
"step": 59700
},
{
"epoch": 2.0,
"learning_rate": 2.9975359353928196e-05,
"loss": 0.1384,
"step": 59800
},
{
"epoch": 2.0,
"learning_rate": 2.9975318099853838e-05,
"loss": 0.1839,
"step": 59900
},
{
"epoch": 2.0,
"learning_rate": 2.9975276845779483e-05,
"loss": 0.1432,
"step": 60000
},
{
"epoch": 2.0,
"learning_rate": 2.9975235591705128e-05,
"loss": 0.1479,
"step": 60100
},
{
"epoch": 2.0,
"learning_rate": 2.9975194337630773e-05,
"loss": 0.1689,
"step": 60200
},
{
"epoch": 2.0,
"learning_rate": 2.9975153083556414e-05,
"loss": 0.1339,
"step": 60300
},
{
"epoch": 2.0,
"learning_rate": 2.9975111829482063e-05,
"loss": 0.1528,
"step": 60400
},
{
"epoch": 2.0,
"learning_rate": 2.9975070575407704e-05,
"loss": 0.1409,
"step": 60500
},
{
"epoch": 2.0,
"learning_rate": 2.997502932133335e-05,
"loss": 0.1224,
"step": 60600
},
{
"epoch": 2.0,
"learning_rate": 2.9974988067258994e-05,
"loss": 0.1473,
"step": 60700
},
{
"epoch": 2.0,
"learning_rate": 2.997494681318464e-05,
"loss": 0.1305,
"step": 60800
},
{
"epoch": 2.0,
"learning_rate": 2.997490555911028e-05,
"loss": 0.1506,
"step": 60900
},
{
"epoch": 2.0,
"learning_rate": 2.997486430503593e-05,
"loss": 0.1501,
"step": 61000
},
{
"epoch": 2.0,
"learning_rate": 2.997482305096157e-05,
"loss": 0.1168,
"step": 61100
},
{
"epoch": 2.0,
"learning_rate": 2.9974781796887216e-05,
"loss": 0.1375,
"step": 61200
},
{
"epoch": 2.0,
"learning_rate": 2.997474054281286e-05,
"loss": 0.1334,
"step": 61300
},
{
"epoch": 2.0,
"learning_rate": 2.9974699288738505e-05,
"loss": 0.1325,
"step": 61400
},
{
"epoch": 2.0,
"learning_rate": 2.9974658034664147e-05,
"loss": 0.125,
"step": 61500
},
{
"epoch": 2.0,
"learning_rate": 2.9974616780589795e-05,
"loss": 0.1437,
"step": 61600
},
{
"epoch": 2.0,
"learning_rate": 2.9974575526515437e-05,
"loss": 0.1316,
"step": 61700
},
{
"epoch": 2.0,
"learning_rate": 2.9974534272441082e-05,
"loss": 0.143,
"step": 61800
},
{
"epoch": 2.0,
"learning_rate": 2.997449301836673e-05,
"loss": 0.1341,
"step": 61900
},
{
"epoch": 2.0,
"learning_rate": 2.9974451764292372e-05,
"loss": 0.1405,
"step": 62000
},
{
"epoch": 2.0,
"learning_rate": 2.9974410510218017e-05,
"loss": 0.1164,
"step": 62100
},
{
"epoch": 2.0,
"learning_rate": 2.9974369256143662e-05,
"loss": 0.1148,
"step": 62200
},
{
"epoch": 2.0,
"learning_rate": 2.9974328002069307e-05,
"loss": 0.1521,
"step": 62300
},
{
"epoch": 2.0,
"learning_rate": 2.997428674799495e-05,
"loss": 0.1292,
"step": 62400
},
{
"epoch": 2.0,
"learning_rate": 2.9974245493920597e-05,
"loss": 0.12,
"step": 62500
},
{
"epoch": 2.0,
"learning_rate": 2.997420423984624e-05,
"loss": 0.1294,
"step": 62600
},
{
"epoch": 2.0,
"learning_rate": 2.9974162985771883e-05,
"loss": 0.1173,
"step": 62700
},
{
"epoch": 2.0,
"learning_rate": 2.9974121731697528e-05,
"loss": 0.1189,
"step": 62800
},
{
"epoch": 2.0,
"learning_rate": 2.9974080477623173e-05,
"loss": 0.1302,
"step": 62900
},
{
"epoch": 2.0,
"learning_rate": 2.9974039223548815e-05,
"loss": 0.1299,
"step": 63000
},
{
"epoch": 2.0,
"learning_rate": 2.9973997969474463e-05,
"loss": 0.1051,
"step": 63100
},
{
"epoch": 2.0,
"learning_rate": 2.9973956715400105e-05,
"loss": 0.1283,
"step": 63200
},
{
"epoch": 2.0,
"learning_rate": 2.997391546132575e-05,
"loss": 0.1244,
"step": 63300
},
{
"epoch": 2.0,
"learning_rate": 2.9973874207251395e-05,
"loss": 0.1226,
"step": 63400
},
{
"epoch": 2.0,
"learning_rate": 2.9973833365717782e-05,
"loss": 0.1194,
"step": 63500
},
{
"epoch": 2.0,
"learning_rate": 2.9973792111643427e-05,
"loss": 0.1556,
"step": 63600
},
{
"epoch": 2.0,
"learning_rate": 2.9973750857569072e-05,
"loss": 0.1274,
"step": 63700
},
{
"epoch": 2.0,
"learning_rate": 2.9973709603494714e-05,
"loss": 0.1127,
"step": 63800
},
{
"epoch": 2.0,
"learning_rate": 2.9973668349420362e-05,
"loss": 0.1167,
"step": 63900
},
{
"epoch": 2.0,
"learning_rate": 2.9973627095346004e-05,
"loss": 0.1089,
"step": 64000
},
{
"epoch": 2.0,
"learning_rate": 2.997358584127165e-05,
"loss": 0.063,
"step": 64100
},
{
"epoch": 2.0,
"learning_rate": 2.9973544587197294e-05,
"loss": 0.1274,
"step": 64200
},
{
"epoch": 2.0,
"learning_rate": 2.997350333312294e-05,
"loss": 0.1307,
"step": 64300
},
{
"epoch": 2.0,
"learning_rate": 2.997346207904858e-05,
"loss": 0.1188,
"step": 64400
},
{
"epoch": 2.0,
"learning_rate": 2.997342082497423e-05,
"loss": 0.1141,
"step": 64500
},
{
"epoch": 2.0,
"learning_rate": 2.997337957089987e-05,
"loss": 0.1371,
"step": 64600
},
{
"epoch": 2.0,
"learning_rate": 2.9973338316825515e-05,
"loss": 0.1143,
"step": 64700
},
{
"epoch": 2.0,
"learning_rate": 2.997329706275116e-05,
"loss": 0.1226,
"step": 64800
},
{
"epoch": 2.0,
"learning_rate": 2.9973255808676805e-05,
"loss": 0.1176,
"step": 64900
},
{
"epoch": 2.0,
"learning_rate": 2.9973214554602447e-05,
"loss": 0.1231,
"step": 65000
},
{
"epoch": 2.0,
"learning_rate": 2.9973173300528095e-05,
"loss": 0.0797,
"step": 65100
},
{
"epoch": 2.0,
"learning_rate": 2.9973132046453737e-05,
"loss": 0.1038,
"step": 65200
},
{
"epoch": 2.0,
"learning_rate": 2.997309079237938e-05,
"loss": 0.0957,
"step": 65300
},
{
"epoch": 2.0,
"learning_rate": 2.997304953830503e-05,
"loss": 0.098,
"step": 65400
},
{
"epoch": 2.0,
"learning_rate": 2.997300828423067e-05,
"loss": 0.1269,
"step": 65500
},
{
"epoch": 2.0,
"learning_rate": 2.997296744269706e-05,
"loss": 0.0959,
"step": 65600
},
{
"epoch": 2.0,
"learning_rate": 2.9972926188622704e-05,
"loss": 0.1248,
"step": 65700
},
{
"epoch": 2.0,
"learning_rate": 2.9972885347089092e-05,
"loss": 0.1319,
"step": 65800
},
{
"epoch": 2.0,
"learning_rate": 2.9972844093014737e-05,
"loss": 0.1025,
"step": 65900
},
{
"epoch": 2.0,
"learning_rate": 2.9972802838940382e-05,
"loss": 0.1175,
"step": 66000
},
{
"epoch": 2.0,
"learning_rate": 2.9972761584866027e-05,
"loss": 0.1335,
"step": 66100
},
{
"epoch": 2.0,
"learning_rate": 2.9972720330791672e-05,
"loss": 0.0931,
"step": 66200
},
{
"epoch": 2.0,
"learning_rate": 2.9972679076717317e-05,
"loss": 0.1022,
"step": 66300
},
{
"epoch": 2.0,
"learning_rate": 2.997263782264296e-05,
"loss": 0.1277,
"step": 66400
},
{
"epoch": 2.0,
"learning_rate": 2.9972596568568607e-05,
"loss": 0.1197,
"step": 66500
},
{
"epoch": 2.0,
"learning_rate": 2.9972555314494248e-05,
"loss": 0.112,
"step": 66600
},
{
"epoch": 2.0,
"learning_rate": 2.9972514060419893e-05,
"loss": 0.1176,
"step": 66700
},
{
"epoch": 2.0,
"learning_rate": 2.9972472806345538e-05,
"loss": 0.1074,
"step": 66800
},
{
"epoch": 2.0,
"learning_rate": 2.9972431552271183e-05,
"loss": 0.1321,
"step": 66900
},
{
"epoch": 2.0,
"learning_rate": 2.9972390298196825e-05,
"loss": 0.1112,
"step": 67000
},
{
"epoch": 2.0,
"learning_rate": 2.9972349044122473e-05,
"loss": 0.1173,
"step": 67100
},
{
"epoch": 2.0,
"learning_rate": 2.9972307790048115e-05,
"loss": 0.1236,
"step": 67200
},
{
"epoch": 2.0,
"learning_rate": 2.997226653597376e-05,
"loss": 0.1198,
"step": 67300
},
{
"epoch": 2.0,
"learning_rate": 2.9972225281899405e-05,
"loss": 0.1195,
"step": 67400
},
{
"epoch": 2.0,
"learning_rate": 2.997218402782505e-05,
"loss": 0.1228,
"step": 67500
},
{
"epoch": 2.0,
"learning_rate": 2.997214277375069e-05,
"loss": 0.1159,
"step": 67600
},
{
"epoch": 2.0,
"learning_rate": 2.997210151967634e-05,
"loss": 0.1124,
"step": 67700
},
{
"epoch": 2.0,
"learning_rate": 2.997206026560198e-05,
"loss": 0.1325,
"step": 67800
},
{
"epoch": 2.0,
"learning_rate": 2.9972019011527626e-05,
"loss": 0.1154,
"step": 67900
},
{
"epoch": 2.0,
"learning_rate": 2.997197775745327e-05,
"loss": 0.1182,
"step": 68000
},
{
"epoch": 2.0,
"learning_rate": 2.997193691591966e-05,
"loss": 0.1123,
"step": 68100
},
{
"epoch": 2.0,
"learning_rate": 2.9971895661845304e-05,
"loss": 0.1388,
"step": 68200
},
{
"epoch": 2.0,
"learning_rate": 2.997185440777095e-05,
"loss": 0.1451,
"step": 68300
},
{
"epoch": 2.0,
"learning_rate": 2.997181315369659e-05,
"loss": 0.109,
"step": 68400
},
{
"epoch": 2.0,
"learning_rate": 2.997177189962224e-05,
"loss": 0.1114,
"step": 68500
},
{
"epoch": 2.0,
"learning_rate": 2.997173064554788e-05,
"loss": 0.1373,
"step": 68600
},
{
"epoch": 2.0,
"learning_rate": 2.9971689391473525e-05,
"loss": 0.114,
"step": 68700
},
{
"epoch": 2.0,
"learning_rate": 2.997164813739917e-05,
"loss": 0.0818,
"step": 68800
},
{
"epoch": 2.0,
"learning_rate": 2.9971606883324815e-05,
"loss": 0.1278,
"step": 68900
},
{
"epoch": 2.0,
"learning_rate": 2.9971565629250457e-05,
"loss": 0.1206,
"step": 69000
},
{
"epoch": 2.0,
"learning_rate": 2.9971524375176105e-05,
"loss": 0.1209,
"step": 69100
},
{
"epoch": 2.0,
"learning_rate": 2.9971483121101747e-05,
"loss": 0.1178,
"step": 69200
},
{
"epoch": 2.0,
"learning_rate": 2.997144186702739e-05,
"loss": 0.1235,
"step": 69300
},
{
"epoch": 2.0,
"learning_rate": 2.9971400612953037e-05,
"loss": 0.1157,
"step": 69400
},
{
"epoch": 2.0,
"learning_rate": 2.997135935887868e-05,
"loss": 0.1075,
"step": 69500
},
{
"epoch": 2.0,
"learning_rate": 2.9971318104804327e-05,
"loss": 0.116,
"step": 69600
},
{
"epoch": 2.0,
"learning_rate": 2.997127685072997e-05,
"loss": 0.1181,
"step": 69700
},
{
"epoch": 2.0,
"learning_rate": 2.9971235596655616e-05,
"loss": 0.0814,
"step": 69800
},
{
"epoch": 2.0,
"learning_rate": 2.9971194342581258e-05,
"loss": 0.1161,
"step": 69900
},
{
"epoch": 2.0,
"learning_rate": 2.9971153088506906e-05,
"loss": 0.0829,
"step": 70000
},
{
"epoch": 2.0,
"learning_rate": 2.9971111834432548e-05,
"loss": 0.1345,
"step": 70100
},
{
"epoch": 2.0,
"learning_rate": 2.9971070580358193e-05,
"loss": 0.0964,
"step": 70200
},
{
"epoch": 2.0,
"learning_rate": 2.9971029326283838e-05,
"loss": 0.0862,
"step": 70300
},
{
"epoch": 2.0,
"learning_rate": 2.9970988072209483e-05,
"loss": 0.1249,
"step": 70400
},
{
"epoch": 2.0,
"learning_rate": 2.9970946818135124e-05,
"loss": 0.1382,
"step": 70500
},
{
"epoch": 2.0,
"learning_rate": 2.9970905564060773e-05,
"loss": 0.098,
"step": 70600
},
{
"epoch": 2.0,
"learning_rate": 2.9970864309986414e-05,
"loss": 0.156,
"step": 70700
},
{
"epoch": 2.0,
"learning_rate": 2.997082305591206e-05,
"loss": 0.1253,
"step": 70800
},
{
"epoch": 2.0,
"learning_rate": 2.9970781801837704e-05,
"loss": 0.1206,
"step": 70900
},
{
"epoch": 2.0,
"learning_rate": 2.997074054776335e-05,
"loss": 0.0953,
"step": 71000
},
{
"epoch": 2.0,
"learning_rate": 2.997069929368899e-05,
"loss": 0.1493,
"step": 71100
},
{
"epoch": 2.0,
"learning_rate": 2.997065803961464e-05,
"loss": 0.1247,
"step": 71200
},
{
"epoch": 2.0,
"learning_rate": 2.997061678554028e-05,
"loss": 0.1099,
"step": 71300
},
{
"epoch": 2.0,
"learning_rate": 2.9970575531465926e-05,
"loss": 0.1281,
"step": 71400
},
{
"epoch": 2.0,
"learning_rate": 2.997053427739157e-05,
"loss": 0.1005,
"step": 71500
},
{
"epoch": 2.0,
"learning_rate": 2.9970493023317216e-05,
"loss": 0.1172,
"step": 71600
},
{
"epoch": 2.0,
"learning_rate": 2.997045176924286e-05,
"loss": 0.1112,
"step": 71700
},
{
"epoch": 2.0,
"learning_rate": 2.9970410515168506e-05,
"loss": 0.1126,
"step": 71800
},
{
"epoch": 2.0,
"learning_rate": 2.997036926109415e-05,
"loss": 0.1204,
"step": 71900
},
{
"epoch": 2.0,
"learning_rate": 2.9970328007019792e-05,
"loss": 0.1262,
"step": 72000
},
{
"epoch": 2.0,
"learning_rate": 2.997028675294544e-05,
"loss": 0.1157,
"step": 72100
},
{
"epoch": 2.0,
"learning_rate": 2.9970245911411828e-05,
"loss": 0.1122,
"step": 72200
},
{
"epoch": 2.0,
"learning_rate": 2.997020465733747e-05,
"loss": 0.1112,
"step": 72300
},
{
"epoch": 2.0,
"learning_rate": 2.9970163403263115e-05,
"loss": 0.1196,
"step": 72400
},
{
"epoch": 2.0,
"learning_rate": 2.997012214918876e-05,
"loss": 0.1379,
"step": 72500
},
{
"epoch": 2.0,
"learning_rate": 2.9970080895114405e-05,
"loss": 0.1008,
"step": 72600
},
{
"epoch": 2.0,
"learning_rate": 2.9970039641040046e-05,
"loss": 0.1172,
"step": 72700
},
{
"epoch": 2.0,
"learning_rate": 2.9969998386965695e-05,
"loss": 0.1098,
"step": 72800
},
{
"epoch": 2.0,
"learning_rate": 2.996995713289134e-05,
"loss": 0.1368,
"step": 72900
},
{
"epoch": 2.0,
"learning_rate": 2.996991587881698e-05,
"loss": 0.1233,
"step": 73000
},
{
"epoch": 2.0,
"learning_rate": 2.996987462474263e-05,
"loss": 0.1142,
"step": 73100
},
{
"epoch": 2.0,
"learning_rate": 2.996983337066827e-05,
"loss": 0.1051,
"step": 73200
},
{
"epoch": 2.0,
"learning_rate": 2.9969792116593916e-05,
"loss": 0.1079,
"step": 73300
},
{
"epoch": 2.0,
"learning_rate": 2.996975086251956e-05,
"loss": 0.1337,
"step": 73400
},
{
"epoch": 2.0,
"learning_rate": 2.996971002098595e-05,
"loss": 0.1179,
"step": 73500
},
{
"epoch": 2.0,
"learning_rate": 2.9969668766911594e-05,
"loss": 0.1478,
"step": 73600
},
{
"epoch": 2.0,
"learning_rate": 2.9969627512837235e-05,
"loss": 0.1154,
"step": 73700
},
{
"epoch": 2.0,
"learning_rate": 2.9969586258762884e-05,
"loss": 0.1226,
"step": 73800
},
{
"epoch": 2.0,
"learning_rate": 2.9969545004688525e-05,
"loss": 0.1274,
"step": 73900
},
{
"epoch": 2.0,
"learning_rate": 2.996950375061417e-05,
"loss": 0.1253,
"step": 74000
},
{
"epoch": 2.0,
"learning_rate": 2.9969462496539815e-05,
"loss": 0.1382,
"step": 74100
},
{
"epoch": 2.0,
"learning_rate": 2.996942124246546e-05,
"loss": 0.1317,
"step": 74200
},
{
"epoch": 2.0,
"learning_rate": 2.9969379988391105e-05,
"loss": 0.1321,
"step": 74300
},
{
"epoch": 2.0,
"eval_accuracy": 0.9419618925892423,
"eval_f1": 0.9419576710015996,
"eval_loss": 0.1524653136730194,
"eval_matthews_correlation": 0.8840504346996007,
"eval_precision": 0.9420890695231088,
"eval_recall": 0.9419613743988416,
"eval_runtime": 1386.958,
"eval_samples_per_second": 2982.311,
"eval_steps_per_second": 2982.311,
"step": 74307
},
{
"epoch": 3.0,
"learning_rate": 2.996933873431675e-05,
"loss": 0.146,
"step": 74400
},
{
"epoch": 3.0,
"learning_rate": 2.9969297480242395e-05,
"loss": 0.1157,
"step": 74500
},
{
"epoch": 3.0,
"learning_rate": 2.9969256226168037e-05,
"loss": 0.1385,
"step": 74600
},
{
"epoch": 3.0,
"learning_rate": 2.9969214972093685e-05,
"loss": 0.1157,
"step": 74700
},
{
"epoch": 3.0,
"learning_rate": 2.9969173718019327e-05,
"loss": 0.1254,
"step": 74800
},
{
"epoch": 3.0,
"learning_rate": 2.996913246394497e-05,
"loss": 0.1329,
"step": 74900
},
{
"epoch": 3.0,
"learning_rate": 2.9969091209870617e-05,
"loss": 0.1386,
"step": 75000
},
{
"epoch": 3.0,
"learning_rate": 2.996904995579626e-05,
"loss": 0.1264,
"step": 75100
},
{
"epoch": 3.0,
"learning_rate": 2.9969008701721903e-05,
"loss": 0.1288,
"step": 75200
},
{
"epoch": 3.0,
"learning_rate": 2.996896744764755e-05,
"loss": 0.147,
"step": 75300
},
{
"epoch": 3.0,
"learning_rate": 2.9968926193573193e-05,
"loss": 0.1562,
"step": 75400
},
{
"epoch": 3.0,
"learning_rate": 2.9968884939498838e-05,
"loss": 0.1632,
"step": 75500
},
{
"epoch": 3.0,
"learning_rate": 2.9968843685424483e-05,
"loss": 0.1574,
"step": 75600
},
{
"epoch": 3.0,
"learning_rate": 2.9968802431350128e-05,
"loss": 0.1518,
"step": 75700
},
{
"epoch": 3.0,
"learning_rate": 2.996876117727577e-05,
"loss": 0.152,
"step": 75800
},
{
"epoch": 3.0,
"learning_rate": 2.9968719923201418e-05,
"loss": 0.1457,
"step": 75900
},
{
"epoch": 3.0,
"learning_rate": 2.996867866912706e-05,
"loss": 0.1518,
"step": 76000
},
{
"epoch": 3.0,
"learning_rate": 2.9968637415052704e-05,
"loss": 0.148,
"step": 76100
},
{
"epoch": 3.0,
"learning_rate": 2.996859616097835e-05,
"loss": 0.1408,
"step": 76200
},
{
"epoch": 3.0,
"learning_rate": 2.9968554906903994e-05,
"loss": 0.1371,
"step": 76300
},
{
"epoch": 3.0,
"learning_rate": 2.996851365282964e-05,
"loss": 0.1374,
"step": 76400
},
{
"epoch": 3.0,
"learning_rate": 2.9968472398755284e-05,
"loss": 0.1405,
"step": 76500
},
{
"epoch": 3.0,
"learning_rate": 2.996843114468093e-05,
"loss": 0.1403,
"step": 76600
},
{
"epoch": 3.0,
"learning_rate": 2.996838989060657e-05,
"loss": 0.1392,
"step": 76700
},
{
"epoch": 3.0,
"learning_rate": 2.996834863653222e-05,
"loss": 0.1413,
"step": 76800
},
{
"epoch": 3.0,
"learning_rate": 2.996830738245786e-05,
"loss": 0.1371,
"step": 76900
},
{
"epoch": 3.0,
"learning_rate": 2.9968266128383506e-05,
"loss": 0.1412,
"step": 77000
},
{
"epoch": 3.0,
"learning_rate": 2.996822487430915e-05,
"loss": 0.1406,
"step": 77100
},
{
"epoch": 3.0,
"learning_rate": 2.9968183620234796e-05,
"loss": 0.1447,
"step": 77200
},
{
"epoch": 3.0,
"learning_rate": 2.9968142366160437e-05,
"loss": 0.1151,
"step": 77300
},
{
"epoch": 3.0,
"learning_rate": 2.9968101112086082e-05,
"loss": 0.1255,
"step": 77400
},
{
"epoch": 3.0,
"learning_rate": 2.9968059858011727e-05,
"loss": 0.1464,
"step": 77500
},
{
"epoch": 3.0,
"learning_rate": 2.9968018603937372e-05,
"loss": 0.1391,
"step": 77600
},
{
"epoch": 3.0,
"learning_rate": 2.9967977349863014e-05,
"loss": 0.124,
"step": 77700
},
{
"epoch": 3.0,
"learning_rate": 2.9967936095788662e-05,
"loss": 0.118,
"step": 77800
},
{
"epoch": 3.0,
"learning_rate": 2.9967894841714304e-05,
"loss": 0.1472,
"step": 77900
},
{
"epoch": 3.0,
"learning_rate": 2.996785358763995e-05,
"loss": 0.1268,
"step": 78000
},
{
"epoch": 3.0,
"learning_rate": 2.9967812333565594e-05,
"loss": 0.1126,
"step": 78100
},
{
"epoch": 3.0,
"learning_rate": 2.996777107949124e-05,
"loss": 0.1199,
"step": 78200
},
{
"epoch": 3.0,
"learning_rate": 2.9967730237957626e-05,
"loss": 0.1234,
"step": 78300
},
{
"epoch": 3.0,
"learning_rate": 2.996768898388327e-05,
"loss": 0.1256,
"step": 78400
},
{
"epoch": 3.0,
"learning_rate": 2.9967647729808916e-05,
"loss": 0.1378,
"step": 78500
},
{
"epoch": 3.0,
"learning_rate": 2.996760647573456e-05,
"loss": 0.1361,
"step": 78600
},
{
"epoch": 3.0,
"learning_rate": 2.996756563420095e-05,
"loss": 0.1173,
"step": 78700
},
{
"epoch": 3.0,
"learning_rate": 2.996752438012659e-05,
"loss": 0.1356,
"step": 78800
},
{
"epoch": 3.0,
"learning_rate": 2.996748312605224e-05,
"loss": 0.1194,
"step": 78900
},
{
"epoch": 3.0,
"learning_rate": 2.996744187197788e-05,
"loss": 0.1528,
"step": 79000
},
{
"epoch": 3.0,
"learning_rate": 2.9967400617903525e-05,
"loss": 0.1384,
"step": 79100
},
{
"epoch": 3.0,
"learning_rate": 2.9967359363829174e-05,
"loss": 0.1279,
"step": 79200
},
{
"epoch": 3.0,
"learning_rate": 2.9967318109754815e-05,
"loss": 0.1436,
"step": 79300
},
{
"epoch": 3.0,
"learning_rate": 2.996727685568046e-05,
"loss": 0.1289,
"step": 79400
},
{
"epoch": 3.0,
"learning_rate": 2.9967235601606105e-05,
"loss": 0.1422,
"step": 79500
},
{
"epoch": 3.0,
"learning_rate": 2.996719434753175e-05,
"loss": 0.1297,
"step": 79600
},
{
"epoch": 3.0,
"learning_rate": 2.9967153093457392e-05,
"loss": 0.1057,
"step": 79700
},
{
"epoch": 3.0,
"learning_rate": 2.996711183938304e-05,
"loss": 0.1373,
"step": 79800
},
{
"epoch": 3.0,
"learning_rate": 2.9967070585308682e-05,
"loss": 0.1465,
"step": 79900
},
{
"epoch": 3.0,
"learning_rate": 2.9967029331234327e-05,
"loss": 0.1409,
"step": 80000
},
{
"epoch": 3.0,
"learning_rate": 2.9966988077159972e-05,
"loss": 0.1224,
"step": 80100
},
{
"epoch": 3.0,
"learning_rate": 2.9966946823085617e-05,
"loss": 0.1252,
"step": 80200
},
{
"epoch": 3.0,
"learning_rate": 2.9966905569011258e-05,
"loss": 0.1413,
"step": 80300
},
{
"epoch": 3.0,
"learning_rate": 2.9966864314936907e-05,
"loss": 0.1481,
"step": 80400
},
{
"epoch": 3.0,
"learning_rate": 2.9966823060862548e-05,
"loss": 0.1335,
"step": 80500
},
{
"epoch": 3.0,
"learning_rate": 2.9966781806788193e-05,
"loss": 0.1177,
"step": 80600
},
{
"epoch": 3.0,
"learning_rate": 2.9966740552713838e-05,
"loss": 0.1428,
"step": 80700
},
{
"epoch": 3.0,
"learning_rate": 2.9966699298639483e-05,
"loss": 0.1483,
"step": 80800
},
{
"epoch": 3.0,
"learning_rate": 2.9966658044565125e-05,
"loss": 0.1266,
"step": 80900
},
{
"epoch": 3.0,
"learning_rate": 2.9966616790490773e-05,
"loss": 0.136,
"step": 81000
},
{
"epoch": 3.0,
"learning_rate": 2.9966575536416415e-05,
"loss": 0.1336,
"step": 81100
},
{
"epoch": 3.0,
"learning_rate": 2.996653428234206e-05,
"loss": 0.1477,
"step": 81200
},
{
"epoch": 3.0,
"learning_rate": 2.9966493440808447e-05,
"loss": 0.1408,
"step": 81300
},
{
"epoch": 3.0,
"learning_rate": 2.9966452186734096e-05,
"loss": 0.1346,
"step": 81400
},
{
"epoch": 3.0,
"learning_rate": 2.9966410932659737e-05,
"loss": 0.1142,
"step": 81500
},
{
"epoch": 3.0,
"learning_rate": 2.9966369678585382e-05,
"loss": 0.1426,
"step": 81600
},
{
"epoch": 3.0,
"learning_rate": 2.9966328424511027e-05,
"loss": 0.125,
"step": 81700
},
{
"epoch": 3.0,
"learning_rate": 2.9966287170436672e-05,
"loss": 0.1311,
"step": 81800
},
{
"epoch": 3.0,
"learning_rate": 2.9966245916362314e-05,
"loss": 0.1454,
"step": 81900
},
{
"epoch": 3.0,
"learning_rate": 2.9966204662287962e-05,
"loss": 0.1472,
"step": 82000
},
{
"epoch": 3.0,
"learning_rate": 2.9966163408213604e-05,
"loss": 0.1498,
"step": 82100
},
{
"epoch": 3.0,
"learning_rate": 2.996612215413925e-05,
"loss": 0.1362,
"step": 82200
},
{
"epoch": 3.0,
"learning_rate": 2.9966080900064894e-05,
"loss": 0.1541,
"step": 82300
},
{
"epoch": 3.0,
"learning_rate": 2.996603964599054e-05,
"loss": 0.118,
"step": 82400
},
{
"epoch": 3.0,
"learning_rate": 2.996599839191618e-05,
"loss": 0.1398,
"step": 82500
},
{
"epoch": 3.0,
"learning_rate": 2.996595713784183e-05,
"loss": 0.1556,
"step": 82600
},
{
"epoch": 3.0,
"learning_rate": 2.9965915883767473e-05,
"loss": 0.1388,
"step": 82700
},
{
"epoch": 3.0,
"learning_rate": 2.9965874629693115e-05,
"loss": 0.1466,
"step": 82800
},
{
"epoch": 3.0,
"learning_rate": 2.9965833375618763e-05,
"loss": 0.1534,
"step": 82900
},
{
"epoch": 3.0,
"learning_rate": 2.9965792121544405e-05,
"loss": 0.1421,
"step": 83000
},
{
"epoch": 3.0,
"learning_rate": 2.996575086747005e-05,
"loss": 0.1453,
"step": 83100
},
{
"epoch": 3.0,
"learning_rate": 2.9965709613395695e-05,
"loss": 0.1439,
"step": 83200
},
{
"epoch": 3.0,
"learning_rate": 2.996566835932134e-05,
"loss": 0.1482,
"step": 83300
},
{
"epoch": 3.0,
"learning_rate": 2.996562710524698e-05,
"loss": 0.1651,
"step": 83400
},
{
"epoch": 3.0,
"learning_rate": 2.996558585117263e-05,
"loss": 0.174,
"step": 83500
},
{
"epoch": 3.0,
"learning_rate": 2.996554459709827e-05,
"loss": 0.1261,
"step": 83600
},
{
"epoch": 3.0,
"learning_rate": 2.9965503343023916e-05,
"loss": 0.1385,
"step": 83700
},
{
"epoch": 3.0,
"learning_rate": 2.996546208894956e-05,
"loss": 0.1411,
"step": 83800
},
{
"epoch": 3.0,
"learning_rate": 2.9965420834875206e-05,
"loss": 0.1117,
"step": 83900
},
{
"epoch": 3.0,
"learning_rate": 2.9965379580800848e-05,
"loss": 0.1382,
"step": 84000
},
{
"epoch": 3.0,
"learning_rate": 2.9965338326726493e-05,
"loss": 0.1223,
"step": 84100
},
{
"epoch": 3.0,
"learning_rate": 2.9965297072652138e-05,
"loss": 0.148,
"step": 84200
},
{
"epoch": 3.0,
"learning_rate": 2.9965255818577783e-05,
"loss": 0.1443,
"step": 84300
},
{
"epoch": 3.0,
"learning_rate": 2.9965214564503424e-05,
"loss": 0.153,
"step": 84400
},
{
"epoch": 3.0,
"learning_rate": 2.9965173310429073e-05,
"loss": 0.1436,
"step": 84500
},
{
"epoch": 3.0,
"learning_rate": 2.9965132056354714e-05,
"loss": 0.1508,
"step": 84600
},
{
"epoch": 3.0,
"learning_rate": 2.996509080228036e-05,
"loss": 0.1631,
"step": 84700
},
{
"epoch": 3.0,
"learning_rate": 2.9965049548206008e-05,
"loss": 0.1436,
"step": 84800
},
{
"epoch": 3.0,
"learning_rate": 2.996500829413165e-05,
"loss": 0.1492,
"step": 84900
},
{
"epoch": 3.0,
"learning_rate": 2.9964967040057294e-05,
"loss": 0.1493,
"step": 85000
},
{
"epoch": 3.0,
"learning_rate": 2.996492578598294e-05,
"loss": 0.1386,
"step": 85100
},
{
"epoch": 3.0,
"learning_rate": 2.9964884531908584e-05,
"loss": 0.1495,
"step": 85200
},
{
"epoch": 3.0,
"learning_rate": 2.9964843277834226e-05,
"loss": 0.1346,
"step": 85300
},
{
"epoch": 3.0,
"learning_rate": 2.9964802023759874e-05,
"loss": 0.1273,
"step": 85400
},
{
"epoch": 3.0,
"learning_rate": 2.9964760769685516e-05,
"loss": 0.1391,
"step": 85500
},
{
"epoch": 3.0,
"learning_rate": 2.996471951561116e-05,
"loss": 0.1252,
"step": 85600
},
{
"epoch": 3.0,
"learning_rate": 2.9964678261536806e-05,
"loss": 0.1555,
"step": 85700
},
{
"epoch": 3.0,
"learning_rate": 2.996463700746245e-05,
"loss": 0.1366,
"step": 85800
},
{
"epoch": 3.0,
"learning_rate": 2.9964595753388092e-05,
"loss": 0.1052,
"step": 85900
},
{
"epoch": 3.0,
"learning_rate": 2.996455449931374e-05,
"loss": 0.1499,
"step": 86000
},
{
"epoch": 3.0,
"learning_rate": 2.9964513245239382e-05,
"loss": 0.1135,
"step": 86100
},
{
"epoch": 3.0,
"learning_rate": 2.9964471991165027e-05,
"loss": 0.1344,
"step": 86200
},
{
"epoch": 3.0,
"learning_rate": 2.9964430737090672e-05,
"loss": 0.13,
"step": 86300
},
{
"epoch": 3.0,
"learning_rate": 2.9964389483016317e-05,
"loss": 0.1346,
"step": 86400
},
{
"epoch": 3.0,
"learning_rate": 2.996434822894196e-05,
"loss": 0.1274,
"step": 86500
},
{
"epoch": 3.0,
"learning_rate": 2.9964306974867607e-05,
"loss": 0.1374,
"step": 86600
},
{
"epoch": 3.0,
"learning_rate": 2.996426572079325e-05,
"loss": 0.1353,
"step": 86700
},
{
"epoch": 3.0,
"learning_rate": 2.9964224466718894e-05,
"loss": 0.1307,
"step": 86800
},
{
"epoch": 3.0,
"learning_rate": 2.996418362518528e-05,
"loss": 0.1063,
"step": 86900
},
{
"epoch": 3.0,
"learning_rate": 2.996414237111093e-05,
"loss": 0.1176,
"step": 87000
},
{
"epoch": 3.0,
"learning_rate": 2.996410111703657e-05,
"loss": 0.1491,
"step": 87100
},
{
"epoch": 3.0,
"learning_rate": 2.9964059862962216e-05,
"loss": 0.1189,
"step": 87200
},
{
"epoch": 3.0,
"learning_rate": 2.996401860888786e-05,
"loss": 0.1142,
"step": 87300
},
{
"epoch": 3.0,
"learning_rate": 2.9963977354813506e-05,
"loss": 0.1296,
"step": 87400
},
{
"epoch": 3.0,
"learning_rate": 2.9963936100739148e-05,
"loss": 0.1116,
"step": 87500
},
{
"epoch": 3.0,
"learning_rate": 2.9963894846664796e-05,
"loss": 0.1184,
"step": 87600
},
{
"epoch": 3.0,
"learning_rate": 2.9963853592590438e-05,
"loss": 0.1217,
"step": 87700
},
{
"epoch": 3.0,
"learning_rate": 2.9963812338516083e-05,
"loss": 0.1114,
"step": 87800
},
{
"epoch": 3.0,
"learning_rate": 2.9963771084441728e-05,
"loss": 0.1036,
"step": 87900
},
{
"epoch": 3.0,
"learning_rate": 2.9963729830367373e-05,
"loss": 0.1306,
"step": 88000
},
{
"epoch": 3.0,
"learning_rate": 2.9963688576293014e-05,
"loss": 0.1135,
"step": 88100
},
{
"epoch": 3.0,
"learning_rate": 2.9963647734759405e-05,
"loss": 0.113,
"step": 88200
},
{
"epoch": 3.0,
"learning_rate": 2.996360648068505e-05,
"loss": 0.1347,
"step": 88300
},
{
"epoch": 3.0,
"learning_rate": 2.9963565226610695e-05,
"loss": 0.1365,
"step": 88400
},
{
"epoch": 3.0,
"learning_rate": 2.9963523972536337e-05,
"loss": 0.1104,
"step": 88500
},
{
"epoch": 3.0,
"learning_rate": 2.9963482718461985e-05,
"loss": 0.1048,
"step": 88600
},
{
"epoch": 3.0,
"learning_rate": 2.9963441464387627e-05,
"loss": 0.1142,
"step": 88700
},
{
"epoch": 3.0,
"learning_rate": 2.996340021031327e-05,
"loss": 0.0926,
"step": 88800
},
{
"epoch": 3.0,
"learning_rate": 2.9963358956238917e-05,
"loss": 0.0777,
"step": 88900
},
{
"epoch": 3.0,
"learning_rate": 2.996331770216456e-05,
"loss": 0.1193,
"step": 89000
},
{
"epoch": 3.0,
"learning_rate": 2.9963276448090203e-05,
"loss": 0.1257,
"step": 89100
},
{
"epoch": 3.0,
"learning_rate": 2.996323519401585e-05,
"loss": 0.1059,
"step": 89200
},
{
"epoch": 3.0,
"learning_rate": 2.9963193939941493e-05,
"loss": 0.114,
"step": 89300
},
{
"epoch": 3.0,
"learning_rate": 2.9963152685867138e-05,
"loss": 0.116,
"step": 89400
},
{
"epoch": 3.0,
"learning_rate": 2.9963111431792783e-05,
"loss": 0.1223,
"step": 89500
},
{
"epoch": 3.0,
"learning_rate": 2.9963070177718428e-05,
"loss": 0.1025,
"step": 89600
},
{
"epoch": 3.0,
"learning_rate": 2.9963028923644073e-05,
"loss": 0.1222,
"step": 89700
},
{
"epoch": 3.0,
"learning_rate": 2.9962987669569718e-05,
"loss": 0.1044,
"step": 89800
},
{
"epoch": 3.0,
"learning_rate": 2.9962946415495363e-05,
"loss": 0.0695,
"step": 89900
},
{
"epoch": 3.0,
"learning_rate": 2.9962905161421004e-05,
"loss": 0.1164,
"step": 90000
},
{
"epoch": 3.0,
"learning_rate": 2.9962863907346653e-05,
"loss": 0.0778,
"step": 90100
},
{
"epoch": 3.0,
"learning_rate": 2.9962822653272294e-05,
"loss": 0.1077,
"step": 90200
},
{
"epoch": 3.0,
"learning_rate": 2.996278139919794e-05,
"loss": 0.1145,
"step": 90300
},
{
"epoch": 3.0,
"learning_rate": 2.9962740145123584e-05,
"loss": 0.0945,
"step": 90400
},
{
"epoch": 3.0,
"learning_rate": 2.996269889104923e-05,
"loss": 0.1119,
"step": 90500
},
{
"epoch": 3.0,
"learning_rate": 2.996265763697487e-05,
"loss": 0.1222,
"step": 90600
},
{
"epoch": 3.0,
"learning_rate": 2.996261638290052e-05,
"loss": 0.1044,
"step": 90700
},
{
"epoch": 3.0,
"learning_rate": 2.996257512882616e-05,
"loss": 0.1253,
"step": 90800
},
{
"epoch": 3.0,
"learning_rate": 2.9962533874751806e-05,
"loss": 0.1137,
"step": 90900
},
{
"epoch": 3.0,
"learning_rate": 2.996249262067745e-05,
"loss": 0.0822,
"step": 91000
},
{
"epoch": 3.0,
"learning_rate": 2.9962451366603096e-05,
"loss": 0.099,
"step": 91100
},
{
"epoch": 3.0,
"learning_rate": 2.9962410112528737e-05,
"loss": 0.1331,
"step": 91200
},
{
"epoch": 3.0,
"learning_rate": 2.9962368858454386e-05,
"loss": 0.097,
"step": 91300
},
{
"epoch": 3.0,
"learning_rate": 2.9962327604380027e-05,
"loss": 0.1154,
"step": 91400
},
{
"epoch": 3.0,
"learning_rate": 2.9962286350305672e-05,
"loss": 0.1002,
"step": 91500
},
{
"epoch": 3.0,
"learning_rate": 2.9962245096231317e-05,
"loss": 0.1111,
"step": 91600
},
{
"epoch": 3.0,
"learning_rate": 2.9962203842156962e-05,
"loss": 0.1211,
"step": 91700
},
{
"epoch": 3.0,
"learning_rate": 2.9962162588082607e-05,
"loss": 0.1011,
"step": 91800
},
{
"epoch": 3.0,
"learning_rate": 2.9962121334008252e-05,
"loss": 0.1321,
"step": 91900
},
{
"epoch": 3.0,
"learning_rate": 2.9962080079933897e-05,
"loss": 0.103,
"step": 92000
},
{
"epoch": 3.0,
"learning_rate": 2.996203882585954e-05,
"loss": 0.1161,
"step": 92100
},
{
"epoch": 3.0,
"learning_rate": 2.9961997571785187e-05,
"loss": 0.1111,
"step": 92200
},
{
"epoch": 3.0,
"learning_rate": 2.996195631771083e-05,
"loss": 0.1105,
"step": 92300
},
{
"epoch": 3.0,
"learning_rate": 2.9961915063636474e-05,
"loss": 0.1121,
"step": 92400
},
{
"epoch": 3.0,
"learning_rate": 2.996187380956212e-05,
"loss": 0.1265,
"step": 92500
},
{
"epoch": 3.0,
"learning_rate": 2.9961832968028503e-05,
"loss": 0.1109,
"step": 92600
},
{
"epoch": 3.0,
"learning_rate": 2.996179171395415e-05,
"loss": 0.1032,
"step": 92700
},
{
"epoch": 3.0,
"learning_rate": 2.9961750459879793e-05,
"loss": 0.1122,
"step": 92800
},
{
"epoch": 3.0,
"learning_rate": 2.9961709618346184e-05,
"loss": 0.1165,
"step": 92900
},
{
"epoch": 3.0,
"learning_rate": 2.9961668364271825e-05,
"loss": 0.1329,
"step": 93000
},
{
"epoch": 3.0,
"learning_rate": 2.9961627110197474e-05,
"loss": 0.1272,
"step": 93100
},
{
"epoch": 3.0,
"learning_rate": 2.9961585856123115e-05,
"loss": 0.1092,
"step": 93200
},
{
"epoch": 3.0,
"learning_rate": 2.996154460204876e-05,
"loss": 0.1252,
"step": 93300
},
{
"epoch": 3.0,
"learning_rate": 2.9961503347974405e-05,
"loss": 0.1195,
"step": 93400
},
{
"epoch": 3.0,
"learning_rate": 2.996146209390005e-05,
"loss": 0.0835,
"step": 93500
},
{
"epoch": 3.0,
"learning_rate": 2.9961420839825692e-05,
"loss": 0.1005,
"step": 93600
},
{
"epoch": 3.0,
"learning_rate": 2.996137958575134e-05,
"loss": 0.1201,
"step": 93700
},
{
"epoch": 3.0,
"learning_rate": 2.9961338331676982e-05,
"loss": 0.1167,
"step": 93800
},
{
"epoch": 3.0,
"learning_rate": 2.9961297077602627e-05,
"loss": 0.1121,
"step": 93900
},
{
"epoch": 3.0,
"learning_rate": 2.9961255823528272e-05,
"loss": 0.1146,
"step": 94000
},
{
"epoch": 3.0,
"learning_rate": 2.9961214569453917e-05,
"loss": 0.1087,
"step": 94100
},
{
"epoch": 3.0,
"learning_rate": 2.9961173315379558e-05,
"loss": 0.1156,
"step": 94200
},
{
"epoch": 3.0,
"learning_rate": 2.9961132061305207e-05,
"loss": 0.1016,
"step": 94300
},
{
"epoch": 3.0,
"learning_rate": 2.9961090807230848e-05,
"loss": 0.1064,
"step": 94400
},
{
"epoch": 3.0,
"learning_rate": 2.9961049553156493e-05,
"loss": 0.0906,
"step": 94500
},
{
"epoch": 3.0,
"learning_rate": 2.996100829908214e-05,
"loss": 0.0982,
"step": 94600
},
{
"epoch": 3.0,
"learning_rate": 2.9960967045007783e-05,
"loss": 0.0944,
"step": 94700
},
{
"epoch": 3.0,
"learning_rate": 2.9960925790933428e-05,
"loss": 0.0984,
"step": 94800
},
{
"epoch": 3.0,
"learning_rate": 2.9960884536859073e-05,
"loss": 0.1191,
"step": 94900
},
{
"epoch": 3.0,
"learning_rate": 2.9960843282784718e-05,
"loss": 0.0901,
"step": 95000
},
{
"epoch": 3.0,
"learning_rate": 2.996080202871036e-05,
"loss": 0.0836,
"step": 95100
},
{
"epoch": 3.0,
"learning_rate": 2.9960760774636008e-05,
"loss": 0.1309,
"step": 95200
},
{
"epoch": 3.0,
"learning_rate": 2.996071952056165e-05,
"loss": 0.113,
"step": 95300
},
{
"epoch": 3.0,
"learning_rate": 2.9960678266487295e-05,
"loss": 0.1097,
"step": 95400
},
{
"epoch": 3.0,
"learning_rate": 2.996063701241294e-05,
"loss": 0.1448,
"step": 95500
},
{
"epoch": 3.0,
"learning_rate": 2.9960595758338584e-05,
"loss": 0.1221,
"step": 95600
},
{
"epoch": 3.0,
"learning_rate": 2.9960554504264226e-05,
"loss": 0.0998,
"step": 95700
},
{
"epoch": 3.0,
"learning_rate": 2.9960513250189874e-05,
"loss": 0.0946,
"step": 95800
},
{
"epoch": 3.0,
"learning_rate": 2.9960471996115516e-05,
"loss": 0.1486,
"step": 95900
},
{
"epoch": 3.0,
"learning_rate": 2.996043074204116e-05,
"loss": 0.1168,
"step": 96000
},
{
"epoch": 3.0,
"learning_rate": 2.9960389487966806e-05,
"loss": 0.1024,
"step": 96100
},
{
"epoch": 3.0,
"learning_rate": 2.996034823389245e-05,
"loss": 0.1186,
"step": 96200
},
{
"epoch": 3.0,
"learning_rate": 2.9960306979818092e-05,
"loss": 0.0986,
"step": 96300
},
{
"epoch": 3.0,
"learning_rate": 2.996026572574374e-05,
"loss": 0.1118,
"step": 96400
},
{
"epoch": 3.0,
"learning_rate": 2.9960224471669382e-05,
"loss": 0.1072,
"step": 96500
},
{
"epoch": 3.0,
"learning_rate": 2.9960183217595027e-05,
"loss": 0.1039,
"step": 96600
},
{
"epoch": 3.0,
"learning_rate": 2.9960141963520676e-05,
"loss": 0.1214,
"step": 96700
},
{
"epoch": 3.0,
"learning_rate": 2.9960100709446317e-05,
"loss": 0.1225,
"step": 96800
},
{
"epoch": 3.0,
"learning_rate": 2.9960059455371962e-05,
"loss": 0.1082,
"step": 96900
},
{
"epoch": 3.0,
"learning_rate": 2.996001861383835e-05,
"loss": 0.1089,
"step": 97000
},
{
"epoch": 3.0,
"learning_rate": 2.9959977359763995e-05,
"loss": 0.1109,
"step": 97100
},
{
"epoch": 3.0,
"learning_rate": 2.995993610568964e-05,
"loss": 0.1302,
"step": 97200
},
{
"epoch": 3.0,
"learning_rate": 2.995989485161528e-05,
"loss": 0.1174,
"step": 97300
},
{
"epoch": 3.0,
"learning_rate": 2.995985359754093e-05,
"loss": 0.1003,
"step": 97400
},
{
"epoch": 3.0,
"learning_rate": 2.995981234346657e-05,
"loss": 0.1141,
"step": 97500
},
{
"epoch": 3.0,
"learning_rate": 2.9959771089392216e-05,
"loss": 0.1078,
"step": 97600
},
{
"epoch": 3.0,
"learning_rate": 2.995972983531786e-05,
"loss": 0.1351,
"step": 97700
},
{
"epoch": 3.0,
"learning_rate": 2.9959688581243506e-05,
"loss": 0.1077,
"step": 97800
},
{
"epoch": 3.0,
"learning_rate": 2.9959647327169148e-05,
"loss": 0.1125,
"step": 97900
},
{
"epoch": 3.0,
"learning_rate": 2.9959606073094796e-05,
"loss": 0.1085,
"step": 98000
},
{
"epoch": 3.0,
"learning_rate": 2.995956481902044e-05,
"loss": 0.1107,
"step": 98100
},
{
"epoch": 3.0,
"learning_rate": 2.995952397748683e-05,
"loss": 0.1362,
"step": 98200
},
{
"epoch": 3.0,
"learning_rate": 2.995948272341247e-05,
"loss": 0.11,
"step": 98300
},
{
"epoch": 3.0,
"learning_rate": 2.9959441469338115e-05,
"loss": 0.1443,
"step": 98400
},
{
"epoch": 3.0,
"learning_rate": 2.995940021526376e-05,
"loss": 0.1067,
"step": 98500
},
{
"epoch": 3.0,
"learning_rate": 2.9959358961189405e-05,
"loss": 0.1241,
"step": 98600
},
{
"epoch": 3.0,
"learning_rate": 2.9959317707115047e-05,
"loss": 0.1221,
"step": 98700
},
{
"epoch": 3.0,
"learning_rate": 2.9959276453040695e-05,
"loss": 0.1182,
"step": 98800
},
{
"epoch": 3.0,
"learning_rate": 2.9959235198966337e-05,
"loss": 0.1437,
"step": 98900
},
{
"epoch": 3.0,
"learning_rate": 2.9959193944891982e-05,
"loss": 0.1188,
"step": 99000
},
{
"epoch": 3.0,
"eval_accuracy": 0.9421570213343179,
"eval_f1": 0.9421534578908957,
"eval_loss": 0.15225644409656525,
"eval_matthews_correlation": 0.8844212104135638,
"eval_precision": 0.942264672299383,
"eval_recall": 0.9421565447239119,
"eval_runtime": 1388.3348,
"eval_samples_per_second": 2979.353,
"eval_steps_per_second": 2979.353,
"step": 99076
}
],
"max_steps": 72720140,
"num_train_epochs": 5,
"total_flos": 1.7037705236761805e+18,
"trial_name": null,
"trial_params": null
}