{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0017030357752335, "global_step": 99076, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 2.9999979372962823e-05, "loss": 0.1516, "step": 100 }, { "epoch": 0.0, "learning_rate": 2.9999938118888465e-05, "loss": 0.1292, "step": 200 }, { "epoch": 0.0, "learning_rate": 2.9999896864814113e-05, "loss": 0.1529, "step": 300 }, { "epoch": 0.0, "learning_rate": 2.9999855610739755e-05, "loss": 0.128, "step": 400 }, { "epoch": 0.0, "learning_rate": 2.99998143566654e-05, "loss": 0.1422, "step": 500 }, { "epoch": 0.0, "learning_rate": 2.9999773102591045e-05, "loss": 0.1399, "step": 600 }, { "epoch": 0.0, "learning_rate": 2.999973184851669e-05, "loss": 0.1531, "step": 700 }, { "epoch": 0.0, "learning_rate": 2.999969059444233e-05, "loss": 0.1411, "step": 800 }, { "epoch": 0.0, "learning_rate": 2.999964934036798e-05, "loss": 0.1376, "step": 900 }, { "epoch": 0.0, "learning_rate": 2.999960808629362e-05, "loss": 0.1655, "step": 1000 }, { "epoch": 0.0, "learning_rate": 2.9999566832219266e-05, "loss": 0.1721, "step": 1100 }, { "epoch": 0.0, "learning_rate": 2.9999525578144915e-05, "loss": 0.1759, "step": 1200 }, { "epoch": 0.0, "learning_rate": 2.9999484324070556e-05, "loss": 0.1762, "step": 1300 }, { "epoch": 0.0, "learning_rate": 2.99994430699962e-05, "loss": 0.1584, "step": 1400 }, { "epoch": 0.0, "learning_rate": 2.9999401815921846e-05, "loss": 0.1661, "step": 1500 }, { "epoch": 0.0, "learning_rate": 2.999936056184749e-05, "loss": 0.1591, "step": 1600 }, { "epoch": 0.0, "learning_rate": 2.9999319307773133e-05, "loss": 0.1646, "step": 1700 }, { "epoch": 0.0, "learning_rate": 2.999927805369878e-05, "loss": 0.1636, "step": 1800 }, { "epoch": 0.0, "learning_rate": 2.9999236799624423e-05, "loss": 0.1518, "step": 1900 }, { "epoch": 0.0, "learning_rate": 2.9999195545550068e-05, "loss": 0.1463, "step": 2000 }, { "epoch": 0.0, "learning_rate": 2.9999154291475713e-05, "loss": 0.1507, "step": 2100 }, { "epoch": 0.0, "learning_rate": 2.9999113037401357e-05, "loss": 0.1528, "step": 2200 }, { "epoch": 0.0, "learning_rate": 2.9999071783327e-05, "loss": 0.1542, "step": 2300 }, { "epoch": 0.0, "learning_rate": 2.9999030529252647e-05, "loss": 0.1494, "step": 2400 }, { "epoch": 0.0, "learning_rate": 2.999898927517829e-05, "loss": 0.154, "step": 2500 }, { "epoch": 0.0, "learning_rate": 2.9998948021103934e-05, "loss": 0.1474, "step": 2600 }, { "epoch": 0.0, "learning_rate": 2.999890676702958e-05, "loss": 0.1562, "step": 2700 }, { "epoch": 0.0, "learning_rate": 2.9998865512955224e-05, "loss": 0.1482, "step": 2800 }, { "epoch": 0.0, "learning_rate": 2.9998824258880865e-05, "loss": 0.166, "step": 2900 }, { "epoch": 0.0, "learning_rate": 2.9998783004806514e-05, "loss": 0.1187, "step": 3000 }, { "epoch": 0.0, "learning_rate": 2.9998741750732155e-05, "loss": 0.1322, "step": 3100 }, { "epoch": 0.0, "learning_rate": 2.99987004966578e-05, "loss": 0.1612, "step": 3200 }, { "epoch": 0.0, "learning_rate": 2.999865924258345e-05, "loss": 0.152, "step": 3300 }, { "epoch": 0.0, "learning_rate": 2.999861798850909e-05, "loss": 0.1375, "step": 3400 }, { "epoch": 0.0, "learning_rate": 2.9998576734434735e-05, "loss": 0.1252, "step": 3500 }, { "epoch": 0.0, "learning_rate": 2.999853548036038e-05, "loss": 0.1582, "step": 3600 }, { "epoch": 0.0, "learning_rate": 2.9998494226286025e-05, "loss": 0.1371, "step": 3700 }, { "epoch": 0.0, "learning_rate": 2.9998452972211667e-05, "loss": 0.1264, "step": 3800 }, { "epoch": 0.0, "learning_rate": 2.9998411718137315e-05, "loss": 0.1409, "step": 3900 }, { "epoch": 0.0, "learning_rate": 2.9998370876603703e-05, "loss": 0.1247, "step": 4000 }, { "epoch": 0.0, "learning_rate": 2.9998329622529344e-05, "loss": 0.1342, "step": 4100 }, { "epoch": 0.0, "learning_rate": 2.999828836845499e-05, "loss": 0.1528, "step": 4200 }, { "epoch": 0.0, "learning_rate": 2.9998247114380634e-05, "loss": 0.1532, "step": 4300 }, { "epoch": 0.0, "learning_rate": 2.999820586030628e-05, "loss": 0.1239, "step": 4400 }, { "epoch": 0.0, "learning_rate": 2.999816460623192e-05, "loss": 0.1489, "step": 4500 }, { "epoch": 0.0, "learning_rate": 2.999812335215757e-05, "loss": 0.1367, "step": 4600 }, { "epoch": 0.0, "learning_rate": 2.9998082098083214e-05, "loss": 0.1593, "step": 4700 }, { "epoch": 0.0, "learning_rate": 2.9998040844008856e-05, "loss": 0.1559, "step": 4800 }, { "epoch": 0.0, "learning_rate": 2.9997999589934504e-05, "loss": 0.1366, "step": 4900 }, { "epoch": 0.0, "learning_rate": 2.9997958335860146e-05, "loss": 0.1535, "step": 5000 }, { "epoch": 0.0, "learning_rate": 2.999791708178579e-05, "loss": 0.1515, "step": 5100 }, { "epoch": 0.0, "learning_rate": 2.9997875827711436e-05, "loss": 0.1453, "step": 5200 }, { "epoch": 0.0, "learning_rate": 2.999783457363708e-05, "loss": 0.1378, "step": 5300 }, { "epoch": 0.0, "learning_rate": 2.9997793319562722e-05, "loss": 0.1202, "step": 5400 }, { "epoch": 0.0, "learning_rate": 2.999775206548837e-05, "loss": 0.1503, "step": 5500 }, { "epoch": 0.0, "learning_rate": 2.9997710811414012e-05, "loss": 0.1573, "step": 5600 }, { "epoch": 0.0, "learning_rate": 2.9997669557339657e-05, "loss": 0.1508, "step": 5700 }, { "epoch": 0.0, "learning_rate": 2.9997628303265302e-05, "loss": 0.137, "step": 5800 }, { "epoch": 0.0, "learning_rate": 2.9997587049190947e-05, "loss": 0.1373, "step": 5900 }, { "epoch": 0.0, "learning_rate": 2.999754579511659e-05, "loss": 0.152, "step": 6000 }, { "epoch": 0.0, "learning_rate": 2.9997504541042237e-05, "loss": 0.1575, "step": 6100 }, { "epoch": 0.0, "learning_rate": 2.999746328696788e-05, "loss": 0.1468, "step": 6200 }, { "epoch": 0.0, "learning_rate": 2.999742244543427e-05, "loss": 0.1289, "step": 6300 }, { "epoch": 0.0, "learning_rate": 2.999738119135991e-05, "loss": 0.1549, "step": 6400 }, { "epoch": 0.0, "learning_rate": 2.999733993728556e-05, "loss": 0.1638, "step": 6500 }, { "epoch": 0.0, "learning_rate": 2.99972986832112e-05, "loss": 0.1305, "step": 6600 }, { "epoch": 0.0, "learning_rate": 2.9997257429136846e-05, "loss": 0.1522, "step": 6700 }, { "epoch": 0.0, "learning_rate": 2.999721617506249e-05, "loss": 0.1496, "step": 6800 }, { "epoch": 0.0, "learning_rate": 2.9997174920988136e-05, "loss": 0.1535, "step": 6900 }, { "epoch": 0.0, "learning_rate": 2.9997133666913778e-05, "loss": 0.1444, "step": 7000 }, { "epoch": 0.0, "learning_rate": 2.9997092412839426e-05, "loss": 0.1505, "step": 7100 }, { "epoch": 0.0, "learning_rate": 2.9997051158765068e-05, "loss": 0.1233, "step": 7200 }, { "epoch": 0.0, "learning_rate": 2.9997009904690713e-05, "loss": 0.1575, "step": 7300 }, { "epoch": 0.0, "learning_rate": 2.9996968650616358e-05, "loss": 0.1305, "step": 7400 }, { "epoch": 0.0, "learning_rate": 2.9996927396542003e-05, "loss": 0.1433, "step": 7500 }, { "epoch": 0.0, "learning_rate": 2.9996886142467644e-05, "loss": 0.1621, "step": 7600 }, { "epoch": 0.0, "learning_rate": 2.9996844888393293e-05, "loss": 0.1501, "step": 7700 }, { "epoch": 0.0, "learning_rate": 2.9996803634318934e-05, "loss": 0.1601, "step": 7800 }, { "epoch": 0.0, "learning_rate": 2.999676238024458e-05, "loss": 0.1459, "step": 7900 }, { "epoch": 0.0, "learning_rate": 2.9996721126170224e-05, "loss": 0.1671, "step": 8000 }, { "epoch": 0.0, "learning_rate": 2.999667987209587e-05, "loss": 0.1321, "step": 8100 }, { "epoch": 0.0, "learning_rate": 2.9996638618021514e-05, "loss": 0.1512, "step": 8200 }, { "epoch": 0.0, "learning_rate": 2.999659736394716e-05, "loss": 0.1654, "step": 8300 }, { "epoch": 0.0, "learning_rate": 2.9996556109872804e-05, "loss": 0.1468, "step": 8400 }, { "epoch": 0.0, "learning_rate": 2.9996514855798446e-05, "loss": 0.154, "step": 8500 }, { "epoch": 0.0, "learning_rate": 2.9996473601724094e-05, "loss": 0.1684, "step": 8600 }, { "epoch": 0.0, "learning_rate": 2.9996432347649735e-05, "loss": 0.1538, "step": 8700 }, { "epoch": 0.0, "learning_rate": 2.9996391506116123e-05, "loss": 0.1543, "step": 8800 }, { "epoch": 0.0, "learning_rate": 2.9996350252041768e-05, "loss": 0.1543, "step": 8900 }, { "epoch": 0.0, "learning_rate": 2.999630899796741e-05, "loss": 0.1595, "step": 9000 }, { "epoch": 0.0, "learning_rate": 2.9996267743893058e-05, "loss": 0.1749, "step": 9100 }, { "epoch": 0.0, "learning_rate": 2.99962264898187e-05, "loss": 0.1828, "step": 9200 }, { "epoch": 0.0, "learning_rate": 2.9996185235744345e-05, "loss": 0.1355, "step": 9300 }, { "epoch": 0.0, "learning_rate": 2.999614398166999e-05, "loss": 0.1533, "step": 9400 }, { "epoch": 0.0, "learning_rate": 2.9996102727595635e-05, "loss": 0.1432, "step": 9500 }, { "epoch": 0.0, "learning_rate": 2.999606147352128e-05, "loss": 0.1227, "step": 9600 }, { "epoch": 0.0, "learning_rate": 2.9996020219446924e-05, "loss": 0.1507, "step": 9700 }, { "epoch": 0.0, "learning_rate": 2.999597896537257e-05, "loss": 0.1324, "step": 9800 }, { "epoch": 0.0, "learning_rate": 2.999593771129821e-05, "loss": 0.1549, "step": 9900 }, { "epoch": 0.0, "learning_rate": 2.999589645722386e-05, "loss": 0.1554, "step": 10000 }, { "epoch": 0.0, "learning_rate": 2.99958552031495e-05, "loss": 0.1591, "step": 10100 }, { "epoch": 0.0, "learning_rate": 2.9995813949075146e-05, "loss": 0.1546, "step": 10200 }, { "epoch": 0.0, "learning_rate": 2.999577269500079e-05, "loss": 0.162, "step": 10300 }, { "epoch": 0.0, "learning_rate": 2.9995731440926436e-05, "loss": 0.1678, "step": 10400 }, { "epoch": 0.0, "learning_rate": 2.9995690186852077e-05, "loss": 0.1584, "step": 10500 }, { "epoch": 0.0, "learning_rate": 2.9995648932777726e-05, "loss": 0.164, "step": 10600 }, { "epoch": 0.0, "learning_rate": 2.9995607678703367e-05, "loss": 0.1569, "step": 10700 }, { "epoch": 0.0, "learning_rate": 2.9995566424629012e-05, "loss": 0.1531, "step": 10800 }, { "epoch": 0.0, "learning_rate": 2.9995525170554657e-05, "loss": 0.1566, "step": 10900 }, { "epoch": 0.0, "learning_rate": 2.9995483916480302e-05, "loss": 0.1423, "step": 11000 }, { "epoch": 0.0, "learning_rate": 2.9995442662405944e-05, "loss": 0.1405, "step": 11100 }, { "epoch": 0.0, "learning_rate": 2.9995401408331592e-05, "loss": 0.1471, "step": 11200 }, { "epoch": 0.0, "learning_rate": 2.9995360154257234e-05, "loss": 0.1347, "step": 11300 }, { "epoch": 0.0, "learning_rate": 2.999531890018288e-05, "loss": 0.1674, "step": 11400 }, { "epoch": 0.0, "learning_rate": 2.9995277646108524e-05, "loss": 0.1459, "step": 11500 }, { "epoch": 0.0, "learning_rate": 2.999523639203417e-05, "loss": 0.1151, "step": 11600 }, { "epoch": 0.0, "learning_rate": 2.9995195137959814e-05, "loss": 0.1601, "step": 11700 }, { "epoch": 0.0, "learning_rate": 2.999515388388546e-05, "loss": 0.1225, "step": 11800 }, { "epoch": 0.0, "learning_rate": 2.9995112629811104e-05, "loss": 0.1402, "step": 11900 }, { "epoch": 0.0, "learning_rate": 2.9995071375736745e-05, "loss": 0.1446, "step": 12000 }, { "epoch": 0.0, "learning_rate": 2.9995030121662394e-05, "loss": 0.1492, "step": 12100 }, { "epoch": 0.0, "learning_rate": 2.9994988867588035e-05, "loss": 0.1349, "step": 12200 }, { "epoch": 0.0, "learning_rate": 2.999494761351368e-05, "loss": 0.1444, "step": 12300 }, { "epoch": 0.0, "learning_rate": 2.9994906359439325e-05, "loss": 0.1433, "step": 12400 }, { "epoch": 0.0, "learning_rate": 2.999486510536497e-05, "loss": 0.1442, "step": 12500 }, { "epoch": 0.0, "learning_rate": 2.999482385129061e-05, "loss": 0.1139, "step": 12600 }, { "epoch": 0.0, "learning_rate": 2.999478259721626e-05, "loss": 0.1315, "step": 12700 }, { "epoch": 0.0, "learning_rate": 2.99947413431419e-05, "loss": 0.1569, "step": 12800 }, { "epoch": 0.0, "learning_rate": 2.9994700089067547e-05, "loss": 0.1222, "step": 12900 }, { "epoch": 0.0, "learning_rate": 2.999465883499319e-05, "loss": 0.1266, "step": 13000 }, { "epoch": 0.0, "learning_rate": 2.9994617580918837e-05, "loss": 0.1427, "step": 13100 }, { "epoch": 0.0, "learning_rate": 2.9994576326844478e-05, "loss": 0.1287, "step": 13200 }, { "epoch": 0.0, "learning_rate": 2.9994535072770126e-05, "loss": 0.1353, "step": 13300 }, { "epoch": 0.0, "learning_rate": 2.9994493818695768e-05, "loss": 0.1429, "step": 13400 }, { "epoch": 0.0, "learning_rate": 2.9994452564621413e-05, "loss": 0.1268, "step": 13500 }, { "epoch": 0.0, "learning_rate": 2.9994411310547058e-05, "loss": 0.1187, "step": 13600 }, { "epoch": 0.0, "learning_rate": 2.9994370056472703e-05, "loss": 0.1497, "step": 13700 }, { "epoch": 0.0, "learning_rate": 2.9994328802398348e-05, "loss": 0.1325, "step": 13800 }, { "epoch": 0.0, "learning_rate": 2.9994288373405478e-05, "loss": 0.1375, "step": 13900 }, { "epoch": 0.0, "learning_rate": 2.9994247119331123e-05, "loss": 0.1542, "step": 14000 }, { "epoch": 0.0, "learning_rate": 2.9994205865256768e-05, "loss": 0.147, "step": 14100 }, { "epoch": 0.0, "learning_rate": 2.9994164611182413e-05, "loss": 0.1269, "step": 14200 }, { "epoch": 0.0, "learning_rate": 2.9994123357108055e-05, "loss": 0.1141, "step": 14300 }, { "epoch": 0.0, "learning_rate": 2.9994082103033703e-05, "loss": 0.1304, "step": 14400 }, { "epoch": 0.0, "learning_rate": 2.9994040848959348e-05, "loss": 0.1029, "step": 14500 }, { "epoch": 0.0, "learning_rate": 2.999399959488499e-05, "loss": 0.0914, "step": 14600 }, { "epoch": 0.0, "learning_rate": 2.9993958340810638e-05, "loss": 0.1389, "step": 14700 }, { "epoch": 0.0, "learning_rate": 2.999391708673628e-05, "loss": 0.1376, "step": 14800 }, { "epoch": 0.0, "learning_rate": 2.9993876245202667e-05, "loss": 0.1233, "step": 14900 }, { "epoch": 0.0, "learning_rate": 2.9993834991128312e-05, "loss": 0.1309, "step": 15000 }, { "epoch": 0.0, "learning_rate": 2.9993793737053954e-05, "loss": 0.133, "step": 15100 }, { "epoch": 0.0, "learning_rate": 2.9993752482979602e-05, "loss": 0.1391, "step": 15200 }, { "epoch": 0.0, "learning_rate": 2.9993711228905244e-05, "loss": 0.1182, "step": 15300 }, { "epoch": 0.0, "learning_rate": 2.999366997483089e-05, "loss": 0.1414, "step": 15400 }, { "epoch": 0.0, "learning_rate": 2.9993628720756534e-05, "loss": 0.1214, "step": 15500 }, { "epoch": 0.0, "learning_rate": 2.999358746668218e-05, "loss": 0.0809, "step": 15600 }, { "epoch": 0.0, "learning_rate": 2.999354621260782e-05, "loss": 0.1307, "step": 15700 }, { "epoch": 0.0, "learning_rate": 2.999350495853347e-05, "loss": 0.0915, "step": 15800 }, { "epoch": 0.0, "learning_rate": 2.9993463704459114e-05, "loss": 0.1278, "step": 15900 }, { "epoch": 0.0, "learning_rate": 2.9993422450384755e-05, "loss": 0.1266, "step": 16000 }, { "epoch": 0.0, "learning_rate": 2.9993381196310404e-05, "loss": 0.1135, "step": 16100 }, { "epoch": 0.0, "learning_rate": 2.9993339942236045e-05, "loss": 0.1251, "step": 16200 }, { "epoch": 0.0, "learning_rate": 2.999329868816169e-05, "loss": 0.1367, "step": 16300 }, { "epoch": 0.0, "learning_rate": 2.9993257434087335e-05, "loss": 0.1205, "step": 16400 }, { "epoch": 0.0, "learning_rate": 2.999321618001298e-05, "loss": 0.1462, "step": 16500 }, { "epoch": 0.0, "learning_rate": 2.999317492593862e-05, "loss": 0.1299, "step": 16600 }, { "epoch": 0.0, "learning_rate": 2.999313367186427e-05, "loss": 0.1049, "step": 16700 }, { "epoch": 0.0, "learning_rate": 2.999309241778991e-05, "loss": 0.1164, "step": 16800 }, { "epoch": 0.0, "learning_rate": 2.9993051163715557e-05, "loss": 0.1475, "step": 16900 }, { "epoch": 0.0, "learning_rate": 2.99930099096412e-05, "loss": 0.1131, "step": 17000 }, { "epoch": 0.0, "learning_rate": 2.9992968655566846e-05, "loss": 0.1297, "step": 17100 }, { "epoch": 0.0, "learning_rate": 2.9992927401492488e-05, "loss": 0.1154, "step": 17200 }, { "epoch": 0.0, "learning_rate": 2.9992886147418136e-05, "loss": 0.1271, "step": 17300 }, { "epoch": 0.0, "learning_rate": 2.9992844893343778e-05, "loss": 0.1306, "step": 17400 }, { "epoch": 0.0, "learning_rate": 2.9992803639269423e-05, "loss": 0.1157, "step": 17500 }, { "epoch": 0.0, "learning_rate": 2.9992762385195068e-05, "loss": 0.1525, "step": 17600 }, { "epoch": 0.0, "learning_rate": 2.9992721131120713e-05, "loss": 0.1188, "step": 17700 }, { "epoch": 0.0, "learning_rate": 2.9992679877046358e-05, "loss": 0.1322, "step": 17800 }, { "epoch": 0.0, "learning_rate": 2.9992638622972003e-05, "loss": 0.1322, "step": 17900 }, { "epoch": 0.0, "learning_rate": 2.9992597368897648e-05, "loss": 0.1288, "step": 18000 }, { "epoch": 0.0, "learning_rate": 2.999255611482329e-05, "loss": 0.123, "step": 18100 }, { "epoch": 0.0, "learning_rate": 2.9992514860748938e-05, "loss": 0.1447, "step": 18200 }, { "epoch": 0.0, "learning_rate": 2.999247360667458e-05, "loss": 0.1312, "step": 18300 }, { "epoch": 0.0, "learning_rate": 2.9992432352600224e-05, "loss": 0.1187, "step": 18400 }, { "epoch": 0.0, "learning_rate": 2.999239109852587e-05, "loss": 0.1371, "step": 18500 }, { "epoch": 0.0, "learning_rate": 2.9992349844451514e-05, "loss": 0.1273, "step": 18600 }, { "epoch": 0.0, "learning_rate": 2.9992308590377156e-05, "loss": 0.1524, "step": 18700 }, { "epoch": 0.0, "learning_rate": 2.9992267336302804e-05, "loss": 0.1354, "step": 18800 }, { "epoch": 0.0, "learning_rate": 2.9992226082228446e-05, "loss": 0.1204, "step": 18900 }, { "epoch": 0.0, "learning_rate": 2.999218482815409e-05, "loss": 0.1483, "step": 19000 }, { "epoch": 0.0, "learning_rate": 2.9992143574079736e-05, "loss": 0.1246, "step": 19100 }, { "epoch": 0.0, "learning_rate": 2.999210232000538e-05, "loss": 0.094, "step": 19200 }, { "epoch": 0.0, "learning_rate": 2.9992061065931022e-05, "loss": 0.1177, "step": 19300 }, { "epoch": 0.0, "learning_rate": 2.999201981185667e-05, "loss": 0.1363, "step": 19400 }, { "epoch": 0.0, "learning_rate": 2.9991978557782312e-05, "loss": 0.1276, "step": 19500 }, { "epoch": 0.0, "learning_rate": 2.9991937303707957e-05, "loss": 0.1299, "step": 19600 }, { "epoch": 0.0, "learning_rate": 2.9991896049633602e-05, "loss": 0.1389, "step": 19700 }, { "epoch": 0.0, "learning_rate": 2.9991854795559247e-05, "loss": 0.1224, "step": 19800 }, { "epoch": 0.0, "learning_rate": 2.9991813541484892e-05, "loss": 0.1317, "step": 19900 }, { "epoch": 0.0, "learning_rate": 2.9991772287410537e-05, "loss": 0.1223, "step": 20000 }, { "epoch": 0.0, "learning_rate": 2.9991731033336182e-05, "loss": 0.1234, "step": 20100 }, { "epoch": 0.0, "learning_rate": 2.9991689779261824e-05, "loss": 0.1001, "step": 20200 }, { "epoch": 0.0, "learning_rate": 2.9991648525187472e-05, "loss": 0.1282, "step": 20300 }, { "epoch": 0.0, "learning_rate": 2.9991607271113114e-05, "loss": 0.0976, "step": 20400 }, { "epoch": 0.0, "learning_rate": 2.999156601703876e-05, "loss": 0.1214, "step": 20500 }, { "epoch": 0.0, "learning_rate": 2.9991524762964403e-05, "loss": 0.1281, "step": 20600 }, { "epoch": 0.0, "learning_rate": 2.999148350889005e-05, "loss": 0.1027, "step": 20700 }, { "epoch": 0.0, "learning_rate": 2.999144225481569e-05, "loss": 0.0946, "step": 20800 }, { "epoch": 0.0, "learning_rate": 2.999140100074134e-05, "loss": 0.1501, "step": 20900 }, { "epoch": 0.0, "learning_rate": 2.9991360159207726e-05, "loss": 0.1294, "step": 21000 }, { "epoch": 0.0, "learning_rate": 2.9991318905133368e-05, "loss": 0.1237, "step": 21100 }, { "epoch": 0.0, "learning_rate": 2.9991277651059013e-05, "loss": 0.1613, "step": 21200 }, { "epoch": 0.0, "learning_rate": 2.9991236396984658e-05, "loss": 0.134, "step": 21300 }, { "epoch": 0.0, "learning_rate": 2.9991195142910303e-05, "loss": 0.1156, "step": 21400 }, { "epoch": 0.0, "learning_rate": 2.9991153888835948e-05, "loss": 0.111, "step": 21500 }, { "epoch": 0.0, "learning_rate": 2.9991112634761592e-05, "loss": 0.1627, "step": 21600 }, { "epoch": 0.0, "learning_rate": 2.9991071380687237e-05, "loss": 0.1362, "step": 21700 }, { "epoch": 0.0, "learning_rate": 2.999103012661288e-05, "loss": 0.1193, "step": 21800 }, { "epoch": 0.0, "learning_rate": 2.9990988872538527e-05, "loss": 0.133, "step": 21900 }, { "epoch": 0.0, "learning_rate": 2.999094761846417e-05, "loss": 0.1091, "step": 22000 }, { "epoch": 0.0, "learning_rate": 2.9990906364389814e-05, "loss": 0.1259, "step": 22100 }, { "epoch": 0.0, "learning_rate": 2.999086511031546e-05, "loss": 0.1234, "step": 22200 }, { "epoch": 0.0, "learning_rate": 2.9990823856241104e-05, "loss": 0.115, "step": 22300 }, { "epoch": 0.0, "learning_rate": 2.9990782602166745e-05, "loss": 0.1402, "step": 22400 }, { "epoch": 0.0, "learning_rate": 2.9990741348092394e-05, "loss": 0.1286, "step": 22500 }, { "epoch": 0.0, "learning_rate": 2.9990700094018035e-05, "loss": 0.1092, "step": 22600 }, { "epoch": 0.0, "learning_rate": 2.9990659252484423e-05, "loss": 0.1191, "step": 22700 }, { "epoch": 0.0, "learning_rate": 2.9990617998410068e-05, "loss": 0.1197, "step": 22800 }, { "epoch": 0.0, "learning_rate": 2.9990576744335716e-05, "loss": 0.1361, "step": 22900 }, { "epoch": 0.0, "learning_rate": 2.9990535490261358e-05, "loss": 0.1234, "step": 23000 }, { "epoch": 0.0, "learning_rate": 2.9990494236187003e-05, "loss": 0.1068, "step": 23100 }, { "epoch": 0.0, "learning_rate": 2.9990452982112648e-05, "loss": 0.1248, "step": 23200 }, { "epoch": 0.0, "learning_rate": 2.9990411728038293e-05, "loss": 0.1148, "step": 23300 }, { "epoch": 0.0, "learning_rate": 2.9990370473963934e-05, "loss": 0.1355, "step": 23400 }, { "epoch": 0.0, "learning_rate": 2.9990329219889583e-05, "loss": 0.1161, "step": 23500 }, { "epoch": 0.0, "learning_rate": 2.9990287965815224e-05, "loss": 0.12, "step": 23600 }, { "epoch": 0.0, "learning_rate": 2.999024671174087e-05, "loss": 0.1173, "step": 23700 }, { "epoch": 0.0, "learning_rate": 2.999020545766651e-05, "loss": 0.1193, "step": 23800 }, { "epoch": 0.0, "learning_rate": 2.999016420359216e-05, "loss": 0.1455, "step": 23900 }, { "epoch": 0.0, "learning_rate": 2.99901229495178e-05, "loss": 0.1296, "step": 24000 }, { "epoch": 0.0, "learning_rate": 2.9990081695443446e-05, "loss": 0.1402, "step": 24100 }, { "epoch": 0.0, "learning_rate": 2.999004044136909e-05, "loss": 0.1232, "step": 24200 }, { "epoch": 0.0, "learning_rate": 2.9989999187294736e-05, "loss": 0.1263, "step": 24300 }, { "epoch": 0.0, "learning_rate": 2.9989957933220377e-05, "loss": 0.1339, "step": 24400 }, { "epoch": 0.0, "learning_rate": 2.9989916679146026e-05, "loss": 0.1242, "step": 24500 }, { "epoch": 0.0, "learning_rate": 2.9989875425071667e-05, "loss": 0.1557, "step": 24600 }, { "epoch": 0.0, "learning_rate": 2.9989834170997312e-05, "loss": 0.128, "step": 24700 }, { "epoch": 0.0, "eval_accuracy": 0.9412505310427588, "eval_f1": 0.9412461155109177, "eval_loss": 0.15371489524841309, "eval_matthews_correlation": 0.8826317470341026, "eval_precision": 0.9413817526260211, "eval_recall": 0.9412500042409688, "eval_runtime": 1573.539, "eval_samples_per_second": 2628.686, "eval_steps_per_second": 2628.686, "step": 24769 }, { "epoch": 1.0, "learning_rate": 2.9989792916922957e-05, "loss": 0.1622, "step": 24800 }, { "epoch": 1.0, "learning_rate": 2.9989751662848602e-05, "loss": 0.1287, "step": 24900 }, { "epoch": 1.0, "learning_rate": 2.9989710408774247e-05, "loss": 0.1407, "step": 25000 }, { "epoch": 1.0, "learning_rate": 2.9989669154699892e-05, "loss": 0.1415, "step": 25100 }, { "epoch": 1.0, "learning_rate": 2.9989627900625537e-05, "loss": 0.1269, "step": 25200 }, { "epoch": 1.0, "learning_rate": 2.998958664655118e-05, "loss": 0.1266, "step": 25300 }, { "epoch": 1.0, "learning_rate": 2.9989545392476827e-05, "loss": 0.1323, "step": 25400 }, { "epoch": 1.0, "learning_rate": 2.998950413840247e-05, "loss": 0.1448, "step": 25500 }, { "epoch": 1.0, "learning_rate": 2.9989462884328114e-05, "loss": 0.1476, "step": 25600 }, { "epoch": 1.0, "learning_rate": 2.998942163025376e-05, "loss": 0.1401, "step": 25700 }, { "epoch": 1.0, "learning_rate": 2.9989380376179404e-05, "loss": 0.1688, "step": 25800 }, { "epoch": 1.0, "learning_rate": 2.9989339122105045e-05, "loss": 0.1509, "step": 25900 }, { "epoch": 1.0, "learning_rate": 2.9989297868030694e-05, "loss": 0.1828, "step": 26000 }, { "epoch": 1.0, "learning_rate": 2.9989256613956335e-05, "loss": 0.1663, "step": 26100 }, { "epoch": 1.0, "learning_rate": 2.998921535988198e-05, "loss": 0.1476, "step": 26200 }, { "epoch": 1.0, "learning_rate": 2.9989174105807625e-05, "loss": 0.1601, "step": 26300 }, { "epoch": 1.0, "learning_rate": 2.998913285173327e-05, "loss": 0.1552, "step": 26400 }, { "epoch": 1.0, "learning_rate": 2.998909159765891e-05, "loss": 0.1573, "step": 26500 }, { "epoch": 1.0, "learning_rate": 2.998905034358456e-05, "loss": 0.1707, "step": 26600 }, { "epoch": 1.0, "learning_rate": 2.99890090895102e-05, "loss": 0.1389, "step": 26700 }, { "epoch": 1.0, "learning_rate": 2.9988967835435847e-05, "loss": 0.1472, "step": 26800 }, { "epoch": 1.0, "learning_rate": 2.9988926581361495e-05, "loss": 0.1456, "step": 26900 }, { "epoch": 1.0, "learning_rate": 2.9988885327287137e-05, "loss": 0.1366, "step": 27000 }, { "epoch": 1.0, "learning_rate": 2.998884407321278e-05, "loss": 0.1623, "step": 27100 }, { "epoch": 1.0, "learning_rate": 2.9988802819138426e-05, "loss": 0.1525, "step": 27200 }, { "epoch": 1.0, "learning_rate": 2.998876156506407e-05, "loss": 0.1426, "step": 27300 }, { "epoch": 1.0, "learning_rate": 2.9988720310989713e-05, "loss": 0.1444, "step": 27400 }, { "epoch": 1.0, "learning_rate": 2.998867905691536e-05, "loss": 0.152, "step": 27500 }, { "epoch": 1.0, "learning_rate": 2.9988637802841003e-05, "loss": 0.1441, "step": 27600 }, { "epoch": 1.0, "learning_rate": 2.9988596548766648e-05, "loss": 0.1566, "step": 27700 }, { "epoch": 1.0, "learning_rate": 2.9988555294692293e-05, "loss": 0.1052, "step": 27800 }, { "epoch": 1.0, "learning_rate": 2.9988514040617938e-05, "loss": 0.1509, "step": 27900 }, { "epoch": 1.0, "learning_rate": 2.998847278654358e-05, "loss": 0.1519, "step": 28000 }, { "epoch": 1.0, "learning_rate": 2.9988431532469228e-05, "loss": 0.1331, "step": 28100 }, { "epoch": 1.0, "learning_rate": 2.998839027839487e-05, "loss": 0.1391, "step": 28200 }, { "epoch": 1.0, "learning_rate": 2.9988349024320514e-05, "loss": 0.1295, "step": 28300 }, { "epoch": 1.0, "learning_rate": 2.998830777024616e-05, "loss": 0.151, "step": 28400 }, { "epoch": 1.0, "learning_rate": 2.9988266516171804e-05, "loss": 0.1396, "step": 28500 }, { "epoch": 1.0, "learning_rate": 2.9988225262097446e-05, "loss": 0.1138, "step": 28600 }, { "epoch": 1.0, "learning_rate": 2.9988184008023094e-05, "loss": 0.138, "step": 28700 }, { "epoch": 1.0, "learning_rate": 2.9988143166489482e-05, "loss": 0.118, "step": 28800 }, { "epoch": 1.0, "learning_rate": 2.9988101912415127e-05, "loss": 0.1409, "step": 28900 }, { "epoch": 1.0, "learning_rate": 2.998806065834077e-05, "loss": 0.1454, "step": 29000 }, { "epoch": 1.0, "learning_rate": 2.9988019404266417e-05, "loss": 0.1435, "step": 29100 }, { "epoch": 1.0, "learning_rate": 2.9987978562732804e-05, "loss": 0.1201, "step": 29200 }, { "epoch": 1.0, "learning_rate": 2.9987937308658446e-05, "loss": 0.1533, "step": 29300 }, { "epoch": 1.0, "learning_rate": 2.9987896467124837e-05, "loss": 0.1382, "step": 29400 }, { "epoch": 1.0, "learning_rate": 2.998785521305048e-05, "loss": 0.1454, "step": 29500 }, { "epoch": 1.0, "learning_rate": 2.9987813958976127e-05, "loss": 0.1453, "step": 29600 }, { "epoch": 1.0, "learning_rate": 2.998777270490177e-05, "loss": 0.1374, "step": 29700 }, { "epoch": 1.0, "learning_rate": 2.9987731450827414e-05, "loss": 0.1434, "step": 29800 }, { "epoch": 1.0, "learning_rate": 2.998769019675306e-05, "loss": 0.1465, "step": 29900 }, { "epoch": 1.0, "learning_rate": 2.9987648942678703e-05, "loss": 0.1518, "step": 30000 }, { "epoch": 1.0, "learning_rate": 2.9987607688604345e-05, "loss": 0.1218, "step": 30100 }, { "epoch": 1.0, "learning_rate": 2.9987566434529993e-05, "loss": 0.1333, "step": 30200 }, { "epoch": 1.0, "learning_rate": 2.9987525180455635e-05, "loss": 0.1646, "step": 30300 }, { "epoch": 1.0, "learning_rate": 2.998748392638128e-05, "loss": 0.1376, "step": 30400 }, { "epoch": 1.0, "learning_rate": 2.998744267230692e-05, "loss": 0.1376, "step": 30500 }, { "epoch": 1.0, "learning_rate": 2.998740141823257e-05, "loss": 0.1368, "step": 30600 }, { "epoch": 1.0, "learning_rate": 2.998736016415821e-05, "loss": 0.1459, "step": 30700 }, { "epoch": 1.0, "learning_rate": 2.9987318910083856e-05, "loss": 0.1297, "step": 30800 }, { "epoch": 1.0, "learning_rate": 2.99872776560095e-05, "loss": 0.1576, "step": 30900 }, { "epoch": 1.0, "learning_rate": 2.9987236401935146e-05, "loss": 0.1458, "step": 31000 }, { "epoch": 1.0, "learning_rate": 2.998719514786079e-05, "loss": 0.1278, "step": 31100 }, { "epoch": 1.0, "learning_rate": 2.9987153893786436e-05, "loss": 0.1475, "step": 31200 }, { "epoch": 1.0, "learning_rate": 2.998711263971208e-05, "loss": 0.1608, "step": 31300 }, { "epoch": 1.0, "learning_rate": 2.9987071385637723e-05, "loss": 0.1405, "step": 31400 }, { "epoch": 1.0, "learning_rate": 2.998703013156337e-05, "loss": 0.1395, "step": 31500 }, { "epoch": 1.0, "learning_rate": 2.9986988877489013e-05, "loss": 0.1495, "step": 31600 }, { "epoch": 1.0, "learning_rate": 2.99869480359554e-05, "loss": 0.1523, "step": 31700 }, { "epoch": 1.0, "learning_rate": 2.9986906781881045e-05, "loss": 0.1298, "step": 31800 }, { "epoch": 1.0, "learning_rate": 2.998686552780669e-05, "loss": 0.1558, "step": 31900 }, { "epoch": 1.0, "learning_rate": 2.9986824273732335e-05, "loss": 0.1224, "step": 32000 }, { "epoch": 1.0, "learning_rate": 2.9986783019657977e-05, "loss": 0.1515, "step": 32100 }, { "epoch": 1.0, "learning_rate": 2.9986741765583625e-05, "loss": 0.1261, "step": 32200 }, { "epoch": 1.0, "learning_rate": 2.9986700511509267e-05, "loss": 0.1513, "step": 32300 }, { "epoch": 1.0, "learning_rate": 2.9986659257434912e-05, "loss": 0.1606, "step": 32400 }, { "epoch": 1.0, "learning_rate": 2.998661800336056e-05, "loss": 0.142, "step": 32500 }, { "epoch": 1.0, "learning_rate": 2.9986576749286202e-05, "loss": 0.1448, "step": 32600 }, { "epoch": 1.0, "learning_rate": 2.9986535495211847e-05, "loss": 0.1492, "step": 32700 }, { "epoch": 1.0, "learning_rate": 2.9986494241137492e-05, "loss": 0.1591, "step": 32800 }, { "epoch": 1.0, "learning_rate": 2.9986452987063137e-05, "loss": 0.1365, "step": 32900 }, { "epoch": 1.0, "learning_rate": 2.998641173298878e-05, "loss": 0.1585, "step": 33000 }, { "epoch": 1.0, "learning_rate": 2.9986370478914427e-05, "loss": 0.1526, "step": 33100 }, { "epoch": 1.0, "learning_rate": 2.9986329224840068e-05, "loss": 0.1301, "step": 33200 }, { "epoch": 1.0, "learning_rate": 2.9986287970765713e-05, "loss": 0.1698, "step": 33300 }, { "epoch": 1.0, "learning_rate": 2.9986246716691358e-05, "loss": 0.1513, "step": 33400 }, { "epoch": 1.0, "learning_rate": 2.9986205462617003e-05, "loss": 0.1554, "step": 33500 }, { "epoch": 1.0, "learning_rate": 2.9986164208542645e-05, "loss": 0.1619, "step": 33600 }, { "epoch": 1.0, "learning_rate": 2.9986122954468293e-05, "loss": 0.1456, "step": 33700 }, { "epoch": 1.0, "learning_rate": 2.9986081700393935e-05, "loss": 0.1724, "step": 33800 }, { "epoch": 1.0, "learning_rate": 2.998604044631958e-05, "loss": 0.1806, "step": 33900 }, { "epoch": 1.0, "learning_rate": 2.9985999192245225e-05, "loss": 0.1503, "step": 34000 }, { "epoch": 1.0, "learning_rate": 2.998595793817087e-05, "loss": 0.1341, "step": 34100 }, { "epoch": 1.0, "learning_rate": 2.998591668409651e-05, "loss": 0.1599, "step": 34200 }, { "epoch": 1.0, "learning_rate": 2.998587543002216e-05, "loss": 0.1327, "step": 34300 }, { "epoch": 1.0, "learning_rate": 2.99858341759478e-05, "loss": 0.1172, "step": 34400 }, { "epoch": 1.0, "learning_rate": 2.9985792921873446e-05, "loss": 0.1544, "step": 34500 }, { "epoch": 1.0, "learning_rate": 2.9985751667799095e-05, "loss": 0.146, "step": 34600 }, { "epoch": 1.0, "learning_rate": 2.9985710413724736e-05, "loss": 0.1323, "step": 34700 }, { "epoch": 1.0, "learning_rate": 2.998566915965038e-05, "loss": 0.1682, "step": 34800 }, { "epoch": 1.0, "learning_rate": 2.9985627905576026e-05, "loss": 0.1505, "step": 34900 }, { "epoch": 1.0, "learning_rate": 2.998558665150167e-05, "loss": 0.1501, "step": 35000 }, { "epoch": 1.0, "learning_rate": 2.9985545397427313e-05, "loss": 0.1841, "step": 35100 }, { "epoch": 1.0, "learning_rate": 2.998550414335296e-05, "loss": 0.1416, "step": 35200 }, { "epoch": 1.0, "learning_rate": 2.9985462889278603e-05, "loss": 0.1621, "step": 35300 }, { "epoch": 1.0, "learning_rate": 2.9985421635204247e-05, "loss": 0.1578, "step": 35400 }, { "epoch": 1.0, "learning_rate": 2.9985380381129892e-05, "loss": 0.1456, "step": 35500 }, { "epoch": 1.0, "learning_rate": 2.9985339127055537e-05, "loss": 0.1641, "step": 35600 }, { "epoch": 1.0, "learning_rate": 2.998529787298118e-05, "loss": 0.146, "step": 35700 }, { "epoch": 1.0, "learning_rate": 2.9985256618906827e-05, "loss": 0.1264, "step": 35800 }, { "epoch": 1.0, "learning_rate": 2.998521536483247e-05, "loss": 0.149, "step": 35900 }, { "epoch": 1.0, "learning_rate": 2.9985174110758114e-05, "loss": 0.1421, "step": 36000 }, { "epoch": 1.0, "learning_rate": 2.998513285668376e-05, "loss": 0.1442, "step": 36100 }, { "epoch": 1.0, "learning_rate": 2.9985091602609404e-05, "loss": 0.1563, "step": 36200 }, { "epoch": 1.0, "learning_rate": 2.9985050348535045e-05, "loss": 0.1368, "step": 36300 }, { "epoch": 1.0, "learning_rate": 2.9985009094460694e-05, "loss": 0.1202, "step": 36400 }, { "epoch": 1.0, "learning_rate": 2.9984967840386335e-05, "loss": 0.156, "step": 36500 }, { "epoch": 1.0, "learning_rate": 2.998492658631198e-05, "loss": 0.1316, "step": 36600 }, { "epoch": 1.0, "learning_rate": 2.998488533223763e-05, "loss": 0.1266, "step": 36700 }, { "epoch": 1.0, "learning_rate": 2.998484407816327e-05, "loss": 0.1362, "step": 36800 }, { "epoch": 1.0, "learning_rate": 2.9984802824088915e-05, "loss": 0.1532, "step": 36900 }, { "epoch": 1.0, "learning_rate": 2.998476157001456e-05, "loss": 0.1364, "step": 37000 }, { "epoch": 1.0, "learning_rate": 2.9984720315940205e-05, "loss": 0.1402, "step": 37100 }, { "epoch": 1.0, "learning_rate": 2.9984679061865847e-05, "loss": 0.1469, "step": 37200 }, { "epoch": 1.0, "learning_rate": 2.9984637807791495e-05, "loss": 0.1281, "step": 37300 }, { "epoch": 1.0, "learning_rate": 2.9984596966257883e-05, "loss": 0.1166, "step": 37400 }, { "epoch": 1.0, "learning_rate": 2.9984555712183524e-05, "loss": 0.14, "step": 37500 }, { "epoch": 1.0, "learning_rate": 2.998451445810917e-05, "loss": 0.1445, "step": 37600 }, { "epoch": 1.0, "learning_rate": 2.9984473204034814e-05, "loss": 0.1244, "step": 37700 }, { "epoch": 1.0, "learning_rate": 2.998443194996046e-05, "loss": 0.1236, "step": 37800 }, { "epoch": 1.0, "learning_rate": 2.99843906958861e-05, "loss": 0.1377, "step": 37900 }, { "epoch": 1.0, "learning_rate": 2.998434944181175e-05, "loss": 0.1247, "step": 38000 }, { "epoch": 1.0, "learning_rate": 2.9984308187737394e-05, "loss": 0.1328, "step": 38100 }, { "epoch": 1.0, "learning_rate": 2.9984266933663036e-05, "loss": 0.1409, "step": 38200 }, { "epoch": 1.0, "learning_rate": 2.9984225679588684e-05, "loss": 0.1145, "step": 38300 }, { "epoch": 1.0, "learning_rate": 2.9984184425514326e-05, "loss": 0.1356, "step": 38400 }, { "epoch": 1.0, "learning_rate": 2.998414317143997e-05, "loss": 0.1317, "step": 38500 }, { "epoch": 1.0, "learning_rate": 2.9984101917365616e-05, "loss": 0.1297, "step": 38600 }, { "epoch": 1.0, "learning_rate": 2.9984061075832e-05, "loss": 0.1321, "step": 38700 }, { "epoch": 1.0, "learning_rate": 2.9984019821757648e-05, "loss": 0.1552, "step": 38800 }, { "epoch": 1.0, "learning_rate": 2.998397856768329e-05, "loss": 0.1352, "step": 38900 }, { "epoch": 1.0, "learning_rate": 2.9983937313608935e-05, "loss": 0.1217, "step": 39000 }, { "epoch": 1.0, "learning_rate": 2.998389605953458e-05, "loss": 0.1159, "step": 39100 }, { "epoch": 1.0, "learning_rate": 2.9983854805460225e-05, "loss": 0.1141, "step": 39200 }, { "epoch": 1.0, "learning_rate": 2.9983813551385866e-05, "loss": 0.0878, "step": 39300 }, { "epoch": 1.0, "learning_rate": 2.9983772297311515e-05, "loss": 0.1115, "step": 39400 }, { "epoch": 1.0, "learning_rate": 2.9983731455777902e-05, "loss": 0.1444, "step": 39500 }, { "epoch": 1.0, "learning_rate": 2.9983690201703547e-05, "loss": 0.1187, "step": 39600 }, { "epoch": 1.0, "learning_rate": 2.998364894762919e-05, "loss": 0.1168, "step": 39700 }, { "epoch": 1.0, "learning_rate": 2.9983607693554837e-05, "loss": 0.1417, "step": 39800 }, { "epoch": 1.0, "learning_rate": 2.998356643948048e-05, "loss": 0.1224, "step": 39900 }, { "epoch": 1.0, "learning_rate": 2.9983525185406124e-05, "loss": 0.1378, "step": 40000 }, { "epoch": 1.0, "learning_rate": 2.998348393133177e-05, "loss": 0.1148, "step": 40100 }, { "epoch": 1.0, "learning_rate": 2.9983442677257414e-05, "loss": 0.1312, "step": 40200 }, { "epoch": 1.0, "learning_rate": 2.9983401423183055e-05, "loss": 0.1075, "step": 40300 }, { "epoch": 1.0, "learning_rate": 2.9983360169108704e-05, "loss": 0.0949, "step": 40400 }, { "epoch": 1.0, "learning_rate": 2.9983318915034345e-05, "loss": 0.1106, "step": 40500 }, { "epoch": 1.0, "learning_rate": 2.998327766095999e-05, "loss": 0.0915, "step": 40600 }, { "epoch": 1.0, "learning_rate": 2.9983236406885635e-05, "loss": 0.1321, "step": 40700 }, { "epoch": 1.0, "learning_rate": 2.998319515281128e-05, "loss": 0.1145, "step": 40800 }, { "epoch": 1.0, "learning_rate": 2.9983153898736925e-05, "loss": 0.111, "step": 40900 }, { "epoch": 1.0, "learning_rate": 2.998311264466257e-05, "loss": 0.1445, "step": 41000 }, { "epoch": 1.0, "learning_rate": 2.9983071390588215e-05, "loss": 0.1068, "step": 41100 }, { "epoch": 1.0, "learning_rate": 2.9983030136513857e-05, "loss": 0.1167, "step": 41200 }, { "epoch": 1.0, "learning_rate": 2.9982988882439505e-05, "loss": 0.1445, "step": 41300 }, { "epoch": 1.0, "learning_rate": 2.9982947628365147e-05, "loss": 0.1114, "step": 41400 }, { "epoch": 1.0, "learning_rate": 2.998290637429079e-05, "loss": 0.104, "step": 41500 }, { "epoch": 1.0, "learning_rate": 2.9982865120216437e-05, "loss": 0.1261, "step": 41600 }, { "epoch": 1.0, "learning_rate": 2.998282386614208e-05, "loss": 0.1371, "step": 41700 }, { "epoch": 1.0, "learning_rate": 2.9982782612067723e-05, "loss": 0.1097, "step": 41800 }, { "epoch": 1.0, "learning_rate": 2.998274135799337e-05, "loss": 0.1258, "step": 41900 }, { "epoch": 1.0, "learning_rate": 2.9982700103919013e-05, "loss": 0.1119, "step": 42000 }, { "epoch": 1.0, "learning_rate": 2.9982658849844658e-05, "loss": 0.1315, "step": 42100 }, { "epoch": 1.0, "learning_rate": 2.9982617595770303e-05, "loss": 0.1242, "step": 42200 }, { "epoch": 1.0, "learning_rate": 2.9982576341695948e-05, "loss": 0.1184, "step": 42300 }, { "epoch": 1.0, "learning_rate": 2.998253508762159e-05, "loss": 0.1344, "step": 42400 }, { "epoch": 1.0, "learning_rate": 2.9982493833547238e-05, "loss": 0.1254, "step": 42500 }, { "epoch": 1.0, "learning_rate": 2.998245257947288e-05, "loss": 0.121, "step": 42600 }, { "epoch": 1.0, "learning_rate": 2.9982411325398525e-05, "loss": 0.1254, "step": 42700 }, { "epoch": 1.0, "learning_rate": 2.998237007132417e-05, "loss": 0.1308, "step": 42800 }, { "epoch": 1.0, "learning_rate": 2.9982328817249814e-05, "loss": 0.1099, "step": 42900 }, { "epoch": 1.0, "learning_rate": 2.998228756317546e-05, "loss": 0.1433, "step": 43000 }, { "epoch": 1.0, "learning_rate": 2.9982246309101104e-05, "loss": 0.1263, "step": 43100 }, { "epoch": 1.0, "learning_rate": 2.998220505502675e-05, "loss": 0.1217, "step": 43200 }, { "epoch": 1.0, "learning_rate": 2.998216380095239e-05, "loss": 0.1298, "step": 43300 }, { "epoch": 1.0, "learning_rate": 2.998212254687804e-05, "loss": 0.1359, "step": 43400 }, { "epoch": 1.0, "learning_rate": 2.998208129280368e-05, "loss": 0.1514, "step": 43500 }, { "epoch": 1.0, "learning_rate": 2.9982040038729326e-05, "loss": 0.1165, "step": 43600 }, { "epoch": 1.0, "learning_rate": 2.998199878465497e-05, "loss": 0.1191, "step": 43700 }, { "epoch": 1.0, "learning_rate": 2.9981957530580616e-05, "loss": 0.1484, "step": 43800 }, { "epoch": 1.0, "learning_rate": 2.9981916276506257e-05, "loss": 0.1221, "step": 43900 }, { "epoch": 1.0, "learning_rate": 2.9981875022431906e-05, "loss": 0.0714, "step": 44000 }, { "epoch": 1.0, "learning_rate": 2.9981833768357547e-05, "loss": 0.1398, "step": 44100 }, { "epoch": 1.0, "learning_rate": 2.9981792514283192e-05, "loss": 0.13, "step": 44200 }, { "epoch": 1.0, "learning_rate": 2.9981751260208837e-05, "loss": 0.1309, "step": 44300 }, { "epoch": 1.0, "learning_rate": 2.9981710006134482e-05, "loss": 0.1177, "step": 44400 }, { "epoch": 1.0, "learning_rate": 2.9981668752060124e-05, "loss": 0.1376, "step": 44500 }, { "epoch": 1.0, "learning_rate": 2.9981627497985772e-05, "loss": 0.1107, "step": 44600 }, { "epoch": 1.0, "learning_rate": 2.9981586243911414e-05, "loss": 0.1182, "step": 44700 }, { "epoch": 1.0, "learning_rate": 2.998154498983706e-05, "loss": 0.1288, "step": 44800 }, { "epoch": 1.0, "learning_rate": 2.9981503735762704e-05, "loss": 0.1198, "step": 44900 }, { "epoch": 1.0, "learning_rate": 2.998146248168835e-05, "loss": 0.0918, "step": 45000 }, { "epoch": 1.0, "learning_rate": 2.9981421227613994e-05, "loss": 0.1233, "step": 45100 }, { "epoch": 1.0, "learning_rate": 2.998137997353964e-05, "loss": 0.0905, "step": 45200 }, { "epoch": 1.0, "learning_rate": 2.9981338719465284e-05, "loss": 0.1315, "step": 45300 }, { "epoch": 1.0, "learning_rate": 2.9981297465390925e-05, "loss": 0.1153, "step": 45400 }, { "epoch": 1.0, "learning_rate": 2.9981256211316574e-05, "loss": 0.0926, "step": 45500 }, { "epoch": 1.0, "learning_rate": 2.9981214957242215e-05, "loss": 0.1079, "step": 45600 }, { "epoch": 1.0, "learning_rate": 2.998117370316786e-05, "loss": 0.1597, "step": 45700 }, { "epoch": 1.0, "learning_rate": 2.9981132449093505e-05, "loss": 0.1067, "step": 45800 }, { "epoch": 1.0, "learning_rate": 2.9981091607559893e-05, "loss": 0.1353, "step": 45900 }, { "epoch": 1.0, "learning_rate": 2.9981050353485538e-05, "loss": 0.1461, "step": 46000 }, { "epoch": 1.0, "learning_rate": 2.998100909941118e-05, "loss": 0.123, "step": 46100 }, { "epoch": 1.0, "learning_rate": 2.9980967845336828e-05, "loss": 0.1224, "step": 46200 }, { "epoch": 1.0, "learning_rate": 2.998092659126247e-05, "loss": 0.124, "step": 46300 }, { "epoch": 1.0, "learning_rate": 2.9980885337188114e-05, "loss": 0.1518, "step": 46400 }, { "epoch": 1.0, "learning_rate": 2.9980844083113763e-05, "loss": 0.1229, "step": 46500 }, { "epoch": 1.0, "learning_rate": 2.9980802829039404e-05, "loss": 0.1259, "step": 46600 }, { "epoch": 1.0, "learning_rate": 2.998076157496505e-05, "loss": 0.112, "step": 46700 }, { "epoch": 1.0, "learning_rate": 2.9980720320890694e-05, "loss": 0.1137, "step": 46800 }, { "epoch": 1.0, "learning_rate": 2.998067906681634e-05, "loss": 0.1286, "step": 46900 }, { "epoch": 1.0, "learning_rate": 2.998063781274198e-05, "loss": 0.1205, "step": 47000 }, { "epoch": 1.0, "learning_rate": 2.998059655866763e-05, "loss": 0.1052, "step": 47100 }, { "epoch": 1.0, "learning_rate": 2.998055530459327e-05, "loss": 0.1469, "step": 47200 }, { "epoch": 1.0, "learning_rate": 2.9980514050518916e-05, "loss": 0.1166, "step": 47300 }, { "epoch": 1.0, "learning_rate": 2.9980473208985303e-05, "loss": 0.1041, "step": 47400 }, { "epoch": 1.0, "learning_rate": 2.9980431954910945e-05, "loss": 0.1268, "step": 47500 }, { "epoch": 1.0, "learning_rate": 2.9980390700836593e-05, "loss": 0.1193, "step": 47600 }, { "epoch": 1.0, "learning_rate": 2.9980349446762235e-05, "loss": 0.1408, "step": 47700 }, { "epoch": 1.0, "learning_rate": 2.998030819268788e-05, "loss": 0.1067, "step": 47800 }, { "epoch": 1.0, "learning_rate": 2.9980266938613528e-05, "loss": 0.1193, "step": 47900 }, { "epoch": 1.0, "learning_rate": 2.998022568453917e-05, "loss": 0.1211, "step": 48000 }, { "epoch": 1.0, "learning_rate": 2.9980184430464815e-05, "loss": 0.1295, "step": 48100 }, { "epoch": 1.0, "learning_rate": 2.998014317639046e-05, "loss": 0.13, "step": 48200 }, { "epoch": 1.0, "learning_rate": 2.9980101922316105e-05, "loss": 0.1217, "step": 48300 }, { "epoch": 1.0, "learning_rate": 2.9980060668241746e-05, "loss": 0.1054, "step": 48400 }, { "epoch": 1.0, "learning_rate": 2.9980019414167394e-05, "loss": 0.1127, "step": 48500 }, { "epoch": 1.0, "learning_rate": 2.9979978160093036e-05, "loss": 0.1308, "step": 48600 }, { "epoch": 1.0, "learning_rate": 2.997993690601868e-05, "loss": 0.1329, "step": 48700 }, { "epoch": 1.0, "learning_rate": 2.9979895651944326e-05, "loss": 0.14, "step": 48800 }, { "epoch": 1.0, "learning_rate": 2.997985439786997e-05, "loss": 0.1262, "step": 48900 }, { "epoch": 1.0, "learning_rate": 2.9979813143795613e-05, "loss": 0.1331, "step": 49000 }, { "epoch": 1.0, "learning_rate": 2.997977188972126e-05, "loss": 0.1177, "step": 49100 }, { "epoch": 1.0, "learning_rate": 2.9979730635646902e-05, "loss": 0.134, "step": 49200 }, { "epoch": 1.0, "learning_rate": 2.9979689381572547e-05, "loss": 0.1338, "step": 49300 }, { "epoch": 1.0, "learning_rate": 2.9979648127498192e-05, "loss": 0.1415, "step": 49400 }, { "epoch": 1.0, "learning_rate": 2.9979606873423837e-05, "loss": 0.1381, "step": 49500 }, { "epoch": 1.0, "eval_accuracy": 0.942031046023061, "eval_f1": 0.9420274047220744, "eval_loss": 0.15210944414138794, "eval_matthews_correlation": 0.8841713498744154, "eval_precision": 0.9421407920220504, "eval_recall": 0.9420305647232377, "eval_runtime": 1387.9505, "eval_samples_per_second": 2980.178, "eval_steps_per_second": 2980.178, "step": 49538 }, { "epoch": 2.0, "learning_rate": 2.997956561934948e-05, "loss": 0.1507, "step": 49600 }, { "epoch": 2.0, "learning_rate": 2.9979524365275127e-05, "loss": 0.132, "step": 49700 }, { "epoch": 2.0, "learning_rate": 2.997948311120077e-05, "loss": 0.1351, "step": 49800 }, { "epoch": 2.0, "learning_rate": 2.9979441857126414e-05, "loss": 0.1282, "step": 49900 }, { "epoch": 2.0, "learning_rate": 2.9979400603052062e-05, "loss": 0.1244, "step": 50000 }, { "epoch": 2.0, "learning_rate": 2.9979359348977704e-05, "loss": 0.1386, "step": 50100 }, { "epoch": 2.0, "learning_rate": 2.997931809490335e-05, "loss": 0.1273, "step": 50200 }, { "epoch": 2.0, "learning_rate": 2.9979276840828994e-05, "loss": 0.1344, "step": 50300 }, { "epoch": 2.0, "learning_rate": 2.997923558675464e-05, "loss": 0.1375, "step": 50400 }, { "epoch": 2.0, "learning_rate": 2.997919433268028e-05, "loss": 0.148, "step": 50500 }, { "epoch": 2.0, "learning_rate": 2.997915307860593e-05, "loss": 0.1519, "step": 50600 }, { "epoch": 2.0, "learning_rate": 2.997911182453157e-05, "loss": 0.1551, "step": 50700 }, { "epoch": 2.0, "learning_rate": 2.9979070570457215e-05, "loss": 0.1844, "step": 50800 }, { "epoch": 2.0, "learning_rate": 2.997902931638286e-05, "loss": 0.1535, "step": 50900 }, { "epoch": 2.0, "learning_rate": 2.9978988062308505e-05, "loss": 0.1542, "step": 51000 }, { "epoch": 2.0, "learning_rate": 2.9978946808234147e-05, "loss": 0.1406, "step": 51100 }, { "epoch": 2.0, "learning_rate": 2.9978905554159795e-05, "loss": 0.1647, "step": 51200 }, { "epoch": 2.0, "learning_rate": 2.9978864300085437e-05, "loss": 0.1429, "step": 51300 }, { "epoch": 2.0, "learning_rate": 2.9978823046011082e-05, "loss": 0.161, "step": 51400 }, { "epoch": 2.0, "learning_rate": 2.9978781791936727e-05, "loss": 0.1395, "step": 51500 }, { "epoch": 2.0, "learning_rate": 2.997874053786237e-05, "loss": 0.1425, "step": 51600 }, { "epoch": 2.0, "learning_rate": 2.9978699283788013e-05, "loss": 0.1512, "step": 51700 }, { "epoch": 2.0, "learning_rate": 2.997865802971366e-05, "loss": 0.1334, "step": 51800 }, { "epoch": 2.0, "learning_rate": 2.9978616775639303e-05, "loss": 0.1536, "step": 51900 }, { "epoch": 2.0, "learning_rate": 2.9978575521564948e-05, "loss": 0.1398, "step": 52000 }, { "epoch": 2.0, "learning_rate": 2.9978534267490596e-05, "loss": 0.1366, "step": 52100 }, { "epoch": 2.0, "learning_rate": 2.9978493013416238e-05, "loss": 0.1508, "step": 52200 }, { "epoch": 2.0, "learning_rate": 2.9978451759341883e-05, "loss": 0.1413, "step": 52300 }, { "epoch": 2.0, "learning_rate": 2.9978410505267528e-05, "loss": 0.1459, "step": 52400 }, { "epoch": 2.0, "learning_rate": 2.9978369251193173e-05, "loss": 0.1295, "step": 52500 }, { "epoch": 2.0, "learning_rate": 2.9978327997118815e-05, "loss": 0.1225, "step": 52600 }, { "epoch": 2.0, "learning_rate": 2.9978286743044463e-05, "loss": 0.1369, "step": 52700 }, { "epoch": 2.0, "learning_rate": 2.9978245488970105e-05, "loss": 0.1569, "step": 52800 }, { "epoch": 2.0, "learning_rate": 2.997820423489575e-05, "loss": 0.1257, "step": 52900 }, { "epoch": 2.0, "learning_rate": 2.9978162980821394e-05, "loss": 0.1295, "step": 53000 }, { "epoch": 2.0, "learning_rate": 2.997812172674704e-05, "loss": 0.1445, "step": 53100 }, { "epoch": 2.0, "learning_rate": 2.997808047267268e-05, "loss": 0.1323, "step": 53200 }, { "epoch": 2.0, "learning_rate": 2.997803963113907e-05, "loss": 0.1238, "step": 53300 }, { "epoch": 2.0, "learning_rate": 2.9977998377064717e-05, "loss": 0.1092, "step": 53400 }, { "epoch": 2.0, "learning_rate": 2.9977957535531105e-05, "loss": 0.1483, "step": 53500 }, { "epoch": 2.0, "learning_rate": 2.997791628145675e-05, "loss": 0.1059, "step": 53600 }, { "epoch": 2.0, "learning_rate": 2.997787502738239e-05, "loss": 0.1509, "step": 53700 }, { "epoch": 2.0, "learning_rate": 2.9977833773308036e-05, "loss": 0.1375, "step": 53800 }, { "epoch": 2.0, "learning_rate": 2.997779251923368e-05, "loss": 0.1362, "step": 53900 }, { "epoch": 2.0, "learning_rate": 2.9977751265159326e-05, "loss": 0.1153, "step": 54000 }, { "epoch": 2.0, "learning_rate": 2.9977710011084968e-05, "loss": 0.1498, "step": 54100 }, { "epoch": 2.0, "learning_rate": 2.9977668757010616e-05, "loss": 0.1421, "step": 54200 }, { "epoch": 2.0, "learning_rate": 2.9977627502936258e-05, "loss": 0.1391, "step": 54300 }, { "epoch": 2.0, "learning_rate": 2.9977586248861903e-05, "loss": 0.1347, "step": 54400 }, { "epoch": 2.0, "learning_rate": 2.9977544994787548e-05, "loss": 0.1515, "step": 54500 }, { "epoch": 2.0, "learning_rate": 2.9977503740713193e-05, "loss": 0.1356, "step": 54600 }, { "epoch": 2.0, "learning_rate": 2.9977462486638834e-05, "loss": 0.1271, "step": 54700 }, { "epoch": 2.0, "learning_rate": 2.9977421232564483e-05, "loss": 0.162, "step": 54800 }, { "epoch": 2.0, "learning_rate": 2.9977379978490127e-05, "loss": 0.1029, "step": 54900 }, { "epoch": 2.0, "learning_rate": 2.997733872441577e-05, "loss": 0.1429, "step": 55000 }, { "epoch": 2.0, "learning_rate": 2.9977297470341417e-05, "loss": 0.1495, "step": 55100 }, { "epoch": 2.0, "learning_rate": 2.997725621626706e-05, "loss": 0.1389, "step": 55200 }, { "epoch": 2.0, "learning_rate": 2.9977214962192704e-05, "loss": 0.1377, "step": 55300 }, { "epoch": 2.0, "learning_rate": 2.997717370811835e-05, "loss": 0.1273, "step": 55400 }, { "epoch": 2.0, "learning_rate": 2.9977132454043994e-05, "loss": 0.14, "step": 55500 }, { "epoch": 2.0, "learning_rate": 2.9977091199969636e-05, "loss": 0.1491, "step": 55600 }, { "epoch": 2.0, "learning_rate": 2.9977049945895284e-05, "loss": 0.1362, "step": 55700 }, { "epoch": 2.0, "learning_rate": 2.997700910436167e-05, "loss": 0.1254, "step": 55800 }, { "epoch": 2.0, "learning_rate": 2.9976967850287313e-05, "loss": 0.1389, "step": 55900 }, { "epoch": 2.0, "learning_rate": 2.9976926596212958e-05, "loss": 0.154, "step": 56000 }, { "epoch": 2.0, "learning_rate": 2.9976885342138603e-05, "loss": 0.138, "step": 56100 }, { "epoch": 2.0, "learning_rate": 2.9976844088064248e-05, "loss": 0.139, "step": 56200 }, { "epoch": 2.0, "learning_rate": 2.9976802833989893e-05, "loss": 0.1337, "step": 56300 }, { "epoch": 2.0, "learning_rate": 2.9976761579915538e-05, "loss": 0.1509, "step": 56400 }, { "epoch": 2.0, "learning_rate": 2.9976720325841183e-05, "loss": 0.1452, "step": 56500 }, { "epoch": 2.0, "learning_rate": 2.9976679071766824e-05, "loss": 0.1306, "step": 56600 }, { "epoch": 2.0, "learning_rate": 2.9976637817692473e-05, "loss": 0.1358, "step": 56700 }, { "epoch": 2.0, "learning_rate": 2.9976596563618114e-05, "loss": 0.1243, "step": 56800 }, { "epoch": 2.0, "learning_rate": 2.997655530954376e-05, "loss": 0.1451, "step": 56900 }, { "epoch": 2.0, "learning_rate": 2.9976514055469404e-05, "loss": 0.1322, "step": 57000 }, { "epoch": 2.0, "learning_rate": 2.997647280139505e-05, "loss": 0.1396, "step": 57100 }, { "epoch": 2.0, "learning_rate": 2.997643154732069e-05, "loss": 0.1636, "step": 57200 }, { "epoch": 2.0, "learning_rate": 2.997639029324634e-05, "loss": 0.1388, "step": 57300 }, { "epoch": 2.0, "learning_rate": 2.997634903917198e-05, "loss": 0.14, "step": 57400 }, { "epoch": 2.0, "learning_rate": 2.9976307785097626e-05, "loss": 0.1556, "step": 57500 }, { "epoch": 2.0, "learning_rate": 2.997626653102327e-05, "loss": 0.1406, "step": 57600 }, { "epoch": 2.0, "learning_rate": 2.9976225276948916e-05, "loss": 0.1241, "step": 57700 }, { "epoch": 2.0, "learning_rate": 2.9976184022874557e-05, "loss": 0.1656, "step": 57800 }, { "epoch": 2.0, "learning_rate": 2.9976142768800206e-05, "loss": 0.1459, "step": 57900 }, { "epoch": 2.0, "learning_rate": 2.9976101514725847e-05, "loss": 0.1462, "step": 58000 }, { "epoch": 2.0, "learning_rate": 2.9976060260651492e-05, "loss": 0.1437, "step": 58100 }, { "epoch": 2.0, "learning_rate": 2.9976019006577137e-05, "loss": 0.1607, "step": 58200 }, { "epoch": 2.0, "learning_rate": 2.9975978165043528e-05, "loss": 0.146, "step": 58300 }, { "epoch": 2.0, "learning_rate": 2.997593691096917e-05, "loss": 0.1542, "step": 58400 }, { "epoch": 2.0, "learning_rate": 2.9975895656894815e-05, "loss": 0.1431, "step": 58500 }, { "epoch": 2.0, "learning_rate": 2.997585440282046e-05, "loss": 0.1782, "step": 58600 }, { "epoch": 2.0, "learning_rate": 2.9975813148746105e-05, "loss": 0.177, "step": 58700 }, { "epoch": 2.0, "learning_rate": 2.9975771894671746e-05, "loss": 0.1327, "step": 58800 }, { "epoch": 2.0, "learning_rate": 2.9975730640597395e-05, "loss": 0.1325, "step": 58900 }, { "epoch": 2.0, "learning_rate": 2.9975689386523036e-05, "loss": 0.1535, "step": 59000 }, { "epoch": 2.0, "learning_rate": 2.997564813244868e-05, "loss": 0.122, "step": 59100 }, { "epoch": 2.0, "learning_rate": 2.9975606878374326e-05, "loss": 0.1304, "step": 59200 }, { "epoch": 2.0, "learning_rate": 2.997556562429997e-05, "loss": 0.1326, "step": 59300 }, { "epoch": 2.0, "learning_rate": 2.9975524370225613e-05, "loss": 0.1542, "step": 59400 }, { "epoch": 2.0, "learning_rate": 2.997548311615126e-05, "loss": 0.1349, "step": 59500 }, { "epoch": 2.0, "learning_rate": 2.9975441862076903e-05, "loss": 0.1556, "step": 59600 }, { "epoch": 2.0, "learning_rate": 2.9975400608002548e-05, "loss": 0.1555, "step": 59700 }, { "epoch": 2.0, "learning_rate": 2.9975359353928196e-05, "loss": 0.1384, "step": 59800 }, { "epoch": 2.0, "learning_rate": 2.9975318099853838e-05, "loss": 0.1839, "step": 59900 }, { "epoch": 2.0, "learning_rate": 2.9975276845779483e-05, "loss": 0.1432, "step": 60000 }, { "epoch": 2.0, "learning_rate": 2.9975235591705128e-05, "loss": 0.1479, "step": 60100 }, { "epoch": 2.0, "learning_rate": 2.9975194337630773e-05, "loss": 0.1689, "step": 60200 }, { "epoch": 2.0, "learning_rate": 2.9975153083556414e-05, "loss": 0.1339, "step": 60300 }, { "epoch": 2.0, "learning_rate": 2.9975111829482063e-05, "loss": 0.1528, "step": 60400 }, { "epoch": 2.0, "learning_rate": 2.9975070575407704e-05, "loss": 0.1409, "step": 60500 }, { "epoch": 2.0, "learning_rate": 2.997502932133335e-05, "loss": 0.1224, "step": 60600 }, { "epoch": 2.0, "learning_rate": 2.9974988067258994e-05, "loss": 0.1473, "step": 60700 }, { "epoch": 2.0, "learning_rate": 2.997494681318464e-05, "loss": 0.1305, "step": 60800 }, { "epoch": 2.0, "learning_rate": 2.997490555911028e-05, "loss": 0.1506, "step": 60900 }, { "epoch": 2.0, "learning_rate": 2.997486430503593e-05, "loss": 0.1501, "step": 61000 }, { "epoch": 2.0, "learning_rate": 2.997482305096157e-05, "loss": 0.1168, "step": 61100 }, { "epoch": 2.0, "learning_rate": 2.9974781796887216e-05, "loss": 0.1375, "step": 61200 }, { "epoch": 2.0, "learning_rate": 2.997474054281286e-05, "loss": 0.1334, "step": 61300 }, { "epoch": 2.0, "learning_rate": 2.9974699288738505e-05, "loss": 0.1325, "step": 61400 }, { "epoch": 2.0, "learning_rate": 2.9974658034664147e-05, "loss": 0.125, "step": 61500 }, { "epoch": 2.0, "learning_rate": 2.9974616780589795e-05, "loss": 0.1437, "step": 61600 }, { "epoch": 2.0, "learning_rate": 2.9974575526515437e-05, "loss": 0.1316, "step": 61700 }, { "epoch": 2.0, "learning_rate": 2.9974534272441082e-05, "loss": 0.143, "step": 61800 }, { "epoch": 2.0, "learning_rate": 2.997449301836673e-05, "loss": 0.1341, "step": 61900 }, { "epoch": 2.0, "learning_rate": 2.9974451764292372e-05, "loss": 0.1405, "step": 62000 }, { "epoch": 2.0, "learning_rate": 2.9974410510218017e-05, "loss": 0.1164, "step": 62100 }, { "epoch": 2.0, "learning_rate": 2.9974369256143662e-05, "loss": 0.1148, "step": 62200 }, { "epoch": 2.0, "learning_rate": 2.9974328002069307e-05, "loss": 0.1521, "step": 62300 }, { "epoch": 2.0, "learning_rate": 2.997428674799495e-05, "loss": 0.1292, "step": 62400 }, { "epoch": 2.0, "learning_rate": 2.9974245493920597e-05, "loss": 0.12, "step": 62500 }, { "epoch": 2.0, "learning_rate": 2.997420423984624e-05, "loss": 0.1294, "step": 62600 }, { "epoch": 2.0, "learning_rate": 2.9974162985771883e-05, "loss": 0.1173, "step": 62700 }, { "epoch": 2.0, "learning_rate": 2.9974121731697528e-05, "loss": 0.1189, "step": 62800 }, { "epoch": 2.0, "learning_rate": 2.9974080477623173e-05, "loss": 0.1302, "step": 62900 }, { "epoch": 2.0, "learning_rate": 2.9974039223548815e-05, "loss": 0.1299, "step": 63000 }, { "epoch": 2.0, "learning_rate": 2.9973997969474463e-05, "loss": 0.1051, "step": 63100 }, { "epoch": 2.0, "learning_rate": 2.9973956715400105e-05, "loss": 0.1283, "step": 63200 }, { "epoch": 2.0, "learning_rate": 2.997391546132575e-05, "loss": 0.1244, "step": 63300 }, { "epoch": 2.0, "learning_rate": 2.9973874207251395e-05, "loss": 0.1226, "step": 63400 }, { "epoch": 2.0, "learning_rate": 2.9973833365717782e-05, "loss": 0.1194, "step": 63500 }, { "epoch": 2.0, "learning_rate": 2.9973792111643427e-05, "loss": 0.1556, "step": 63600 }, { "epoch": 2.0, "learning_rate": 2.9973750857569072e-05, "loss": 0.1274, "step": 63700 }, { "epoch": 2.0, "learning_rate": 2.9973709603494714e-05, "loss": 0.1127, "step": 63800 }, { "epoch": 2.0, "learning_rate": 2.9973668349420362e-05, "loss": 0.1167, "step": 63900 }, { "epoch": 2.0, "learning_rate": 2.9973627095346004e-05, "loss": 0.1089, "step": 64000 }, { "epoch": 2.0, "learning_rate": 2.997358584127165e-05, "loss": 0.063, "step": 64100 }, { "epoch": 2.0, "learning_rate": 2.9973544587197294e-05, "loss": 0.1274, "step": 64200 }, { "epoch": 2.0, "learning_rate": 2.997350333312294e-05, "loss": 0.1307, "step": 64300 }, { "epoch": 2.0, "learning_rate": 2.997346207904858e-05, "loss": 0.1188, "step": 64400 }, { "epoch": 2.0, "learning_rate": 2.997342082497423e-05, "loss": 0.1141, "step": 64500 }, { "epoch": 2.0, "learning_rate": 2.997337957089987e-05, "loss": 0.1371, "step": 64600 }, { "epoch": 2.0, "learning_rate": 2.9973338316825515e-05, "loss": 0.1143, "step": 64700 }, { "epoch": 2.0, "learning_rate": 2.997329706275116e-05, "loss": 0.1226, "step": 64800 }, { "epoch": 2.0, "learning_rate": 2.9973255808676805e-05, "loss": 0.1176, "step": 64900 }, { "epoch": 2.0, "learning_rate": 2.9973214554602447e-05, "loss": 0.1231, "step": 65000 }, { "epoch": 2.0, "learning_rate": 2.9973173300528095e-05, "loss": 0.0797, "step": 65100 }, { "epoch": 2.0, "learning_rate": 2.9973132046453737e-05, "loss": 0.1038, "step": 65200 }, { "epoch": 2.0, "learning_rate": 2.997309079237938e-05, "loss": 0.0957, "step": 65300 }, { "epoch": 2.0, "learning_rate": 2.997304953830503e-05, "loss": 0.098, "step": 65400 }, { "epoch": 2.0, "learning_rate": 2.997300828423067e-05, "loss": 0.1269, "step": 65500 }, { "epoch": 2.0, "learning_rate": 2.997296744269706e-05, "loss": 0.0959, "step": 65600 }, { "epoch": 2.0, "learning_rate": 2.9972926188622704e-05, "loss": 0.1248, "step": 65700 }, { "epoch": 2.0, "learning_rate": 2.9972885347089092e-05, "loss": 0.1319, "step": 65800 }, { "epoch": 2.0, "learning_rate": 2.9972844093014737e-05, "loss": 0.1025, "step": 65900 }, { "epoch": 2.0, "learning_rate": 2.9972802838940382e-05, "loss": 0.1175, "step": 66000 }, { "epoch": 2.0, "learning_rate": 2.9972761584866027e-05, "loss": 0.1335, "step": 66100 }, { "epoch": 2.0, "learning_rate": 2.9972720330791672e-05, "loss": 0.0931, "step": 66200 }, { "epoch": 2.0, "learning_rate": 2.9972679076717317e-05, "loss": 0.1022, "step": 66300 }, { "epoch": 2.0, "learning_rate": 2.997263782264296e-05, "loss": 0.1277, "step": 66400 }, { "epoch": 2.0, "learning_rate": 2.9972596568568607e-05, "loss": 0.1197, "step": 66500 }, { "epoch": 2.0, "learning_rate": 2.9972555314494248e-05, "loss": 0.112, "step": 66600 }, { "epoch": 2.0, "learning_rate": 2.9972514060419893e-05, "loss": 0.1176, "step": 66700 }, { "epoch": 2.0, "learning_rate": 2.9972472806345538e-05, "loss": 0.1074, "step": 66800 }, { "epoch": 2.0, "learning_rate": 2.9972431552271183e-05, "loss": 0.1321, "step": 66900 }, { "epoch": 2.0, "learning_rate": 2.9972390298196825e-05, "loss": 0.1112, "step": 67000 }, { "epoch": 2.0, "learning_rate": 2.9972349044122473e-05, "loss": 0.1173, "step": 67100 }, { "epoch": 2.0, "learning_rate": 2.9972307790048115e-05, "loss": 0.1236, "step": 67200 }, { "epoch": 2.0, "learning_rate": 2.997226653597376e-05, "loss": 0.1198, "step": 67300 }, { "epoch": 2.0, "learning_rate": 2.9972225281899405e-05, "loss": 0.1195, "step": 67400 }, { "epoch": 2.0, "learning_rate": 2.997218402782505e-05, "loss": 0.1228, "step": 67500 }, { "epoch": 2.0, "learning_rate": 2.997214277375069e-05, "loss": 0.1159, "step": 67600 }, { "epoch": 2.0, "learning_rate": 2.997210151967634e-05, "loss": 0.1124, "step": 67700 }, { "epoch": 2.0, "learning_rate": 2.997206026560198e-05, "loss": 0.1325, "step": 67800 }, { "epoch": 2.0, "learning_rate": 2.9972019011527626e-05, "loss": 0.1154, "step": 67900 }, { "epoch": 2.0, "learning_rate": 2.997197775745327e-05, "loss": 0.1182, "step": 68000 }, { "epoch": 2.0, "learning_rate": 2.997193691591966e-05, "loss": 0.1123, "step": 68100 }, { "epoch": 2.0, "learning_rate": 2.9971895661845304e-05, "loss": 0.1388, "step": 68200 }, { "epoch": 2.0, "learning_rate": 2.997185440777095e-05, "loss": 0.1451, "step": 68300 }, { "epoch": 2.0, "learning_rate": 2.997181315369659e-05, "loss": 0.109, "step": 68400 }, { "epoch": 2.0, "learning_rate": 2.997177189962224e-05, "loss": 0.1114, "step": 68500 }, { "epoch": 2.0, "learning_rate": 2.997173064554788e-05, "loss": 0.1373, "step": 68600 }, { "epoch": 2.0, "learning_rate": 2.9971689391473525e-05, "loss": 0.114, "step": 68700 }, { "epoch": 2.0, "learning_rate": 2.997164813739917e-05, "loss": 0.0818, "step": 68800 }, { "epoch": 2.0, "learning_rate": 2.9971606883324815e-05, "loss": 0.1278, "step": 68900 }, { "epoch": 2.0, "learning_rate": 2.9971565629250457e-05, "loss": 0.1206, "step": 69000 }, { "epoch": 2.0, "learning_rate": 2.9971524375176105e-05, "loss": 0.1209, "step": 69100 }, { "epoch": 2.0, "learning_rate": 2.9971483121101747e-05, "loss": 0.1178, "step": 69200 }, { "epoch": 2.0, "learning_rate": 2.997144186702739e-05, "loss": 0.1235, "step": 69300 }, { "epoch": 2.0, "learning_rate": 2.9971400612953037e-05, "loss": 0.1157, "step": 69400 }, { "epoch": 2.0, "learning_rate": 2.997135935887868e-05, "loss": 0.1075, "step": 69500 }, { "epoch": 2.0, "learning_rate": 2.9971318104804327e-05, "loss": 0.116, "step": 69600 }, { "epoch": 2.0, "learning_rate": 2.997127685072997e-05, "loss": 0.1181, "step": 69700 }, { "epoch": 2.0, "learning_rate": 2.9971235596655616e-05, "loss": 0.0814, "step": 69800 }, { "epoch": 2.0, "learning_rate": 2.9971194342581258e-05, "loss": 0.1161, "step": 69900 }, { "epoch": 2.0, "learning_rate": 2.9971153088506906e-05, "loss": 0.0829, "step": 70000 }, { "epoch": 2.0, "learning_rate": 2.9971111834432548e-05, "loss": 0.1345, "step": 70100 }, { "epoch": 2.0, "learning_rate": 2.9971070580358193e-05, "loss": 0.0964, "step": 70200 }, { "epoch": 2.0, "learning_rate": 2.9971029326283838e-05, "loss": 0.0862, "step": 70300 }, { "epoch": 2.0, "learning_rate": 2.9970988072209483e-05, "loss": 0.1249, "step": 70400 }, { "epoch": 2.0, "learning_rate": 2.9970946818135124e-05, "loss": 0.1382, "step": 70500 }, { "epoch": 2.0, "learning_rate": 2.9970905564060773e-05, "loss": 0.098, "step": 70600 }, { "epoch": 2.0, "learning_rate": 2.9970864309986414e-05, "loss": 0.156, "step": 70700 }, { "epoch": 2.0, "learning_rate": 2.997082305591206e-05, "loss": 0.1253, "step": 70800 }, { "epoch": 2.0, "learning_rate": 2.9970781801837704e-05, "loss": 0.1206, "step": 70900 }, { "epoch": 2.0, "learning_rate": 2.997074054776335e-05, "loss": 0.0953, "step": 71000 }, { "epoch": 2.0, "learning_rate": 2.997069929368899e-05, "loss": 0.1493, "step": 71100 }, { "epoch": 2.0, "learning_rate": 2.997065803961464e-05, "loss": 0.1247, "step": 71200 }, { "epoch": 2.0, "learning_rate": 2.997061678554028e-05, "loss": 0.1099, "step": 71300 }, { "epoch": 2.0, "learning_rate": 2.9970575531465926e-05, "loss": 0.1281, "step": 71400 }, { "epoch": 2.0, "learning_rate": 2.997053427739157e-05, "loss": 0.1005, "step": 71500 }, { "epoch": 2.0, "learning_rate": 2.9970493023317216e-05, "loss": 0.1172, "step": 71600 }, { "epoch": 2.0, "learning_rate": 2.997045176924286e-05, "loss": 0.1112, "step": 71700 }, { "epoch": 2.0, "learning_rate": 2.9970410515168506e-05, "loss": 0.1126, "step": 71800 }, { "epoch": 2.0, "learning_rate": 2.997036926109415e-05, "loss": 0.1204, "step": 71900 }, { "epoch": 2.0, "learning_rate": 2.9970328007019792e-05, "loss": 0.1262, "step": 72000 }, { "epoch": 2.0, "learning_rate": 2.997028675294544e-05, "loss": 0.1157, "step": 72100 }, { "epoch": 2.0, "learning_rate": 2.9970245911411828e-05, "loss": 0.1122, "step": 72200 }, { "epoch": 2.0, "learning_rate": 2.997020465733747e-05, "loss": 0.1112, "step": 72300 }, { "epoch": 2.0, "learning_rate": 2.9970163403263115e-05, "loss": 0.1196, "step": 72400 }, { "epoch": 2.0, "learning_rate": 2.997012214918876e-05, "loss": 0.1379, "step": 72500 }, { "epoch": 2.0, "learning_rate": 2.9970080895114405e-05, "loss": 0.1008, "step": 72600 }, { "epoch": 2.0, "learning_rate": 2.9970039641040046e-05, "loss": 0.1172, "step": 72700 }, { "epoch": 2.0, "learning_rate": 2.9969998386965695e-05, "loss": 0.1098, "step": 72800 }, { "epoch": 2.0, "learning_rate": 2.996995713289134e-05, "loss": 0.1368, "step": 72900 }, { "epoch": 2.0, "learning_rate": 2.996991587881698e-05, "loss": 0.1233, "step": 73000 }, { "epoch": 2.0, "learning_rate": 2.996987462474263e-05, "loss": 0.1142, "step": 73100 }, { "epoch": 2.0, "learning_rate": 2.996983337066827e-05, "loss": 0.1051, "step": 73200 }, { "epoch": 2.0, "learning_rate": 2.9969792116593916e-05, "loss": 0.1079, "step": 73300 }, { "epoch": 2.0, "learning_rate": 2.996975086251956e-05, "loss": 0.1337, "step": 73400 }, { "epoch": 2.0, "learning_rate": 2.996971002098595e-05, "loss": 0.1179, "step": 73500 }, { "epoch": 2.0, "learning_rate": 2.9969668766911594e-05, "loss": 0.1478, "step": 73600 }, { "epoch": 2.0, "learning_rate": 2.9969627512837235e-05, "loss": 0.1154, "step": 73700 }, { "epoch": 2.0, "learning_rate": 2.9969586258762884e-05, "loss": 0.1226, "step": 73800 }, { "epoch": 2.0, "learning_rate": 2.9969545004688525e-05, "loss": 0.1274, "step": 73900 }, { "epoch": 2.0, "learning_rate": 2.996950375061417e-05, "loss": 0.1253, "step": 74000 }, { "epoch": 2.0, "learning_rate": 2.9969462496539815e-05, "loss": 0.1382, "step": 74100 }, { "epoch": 2.0, "learning_rate": 2.996942124246546e-05, "loss": 0.1317, "step": 74200 }, { "epoch": 2.0, "learning_rate": 2.9969379988391105e-05, "loss": 0.1321, "step": 74300 }, { "epoch": 2.0, "eval_accuracy": 0.9419618925892423, "eval_f1": 0.9419576710015996, "eval_loss": 0.1524653136730194, "eval_matthews_correlation": 0.8840504346996007, "eval_precision": 0.9420890695231088, "eval_recall": 0.9419613743988416, "eval_runtime": 1386.958, "eval_samples_per_second": 2982.311, "eval_steps_per_second": 2982.311, "step": 74307 }, { "epoch": 3.0, "learning_rate": 2.996933873431675e-05, "loss": 0.146, "step": 74400 }, { "epoch": 3.0, "learning_rate": 2.9969297480242395e-05, "loss": 0.1157, "step": 74500 }, { "epoch": 3.0, "learning_rate": 2.9969256226168037e-05, "loss": 0.1385, "step": 74600 }, { "epoch": 3.0, "learning_rate": 2.9969214972093685e-05, "loss": 0.1157, "step": 74700 }, { "epoch": 3.0, "learning_rate": 2.9969173718019327e-05, "loss": 0.1254, "step": 74800 }, { "epoch": 3.0, "learning_rate": 2.996913246394497e-05, "loss": 0.1329, "step": 74900 }, { "epoch": 3.0, "learning_rate": 2.9969091209870617e-05, "loss": 0.1386, "step": 75000 }, { "epoch": 3.0, "learning_rate": 2.996904995579626e-05, "loss": 0.1264, "step": 75100 }, { "epoch": 3.0, "learning_rate": 2.9969008701721903e-05, "loss": 0.1288, "step": 75200 }, { "epoch": 3.0, "learning_rate": 2.996896744764755e-05, "loss": 0.147, "step": 75300 }, { "epoch": 3.0, "learning_rate": 2.9968926193573193e-05, "loss": 0.1562, "step": 75400 }, { "epoch": 3.0, "learning_rate": 2.9968884939498838e-05, "loss": 0.1632, "step": 75500 }, { "epoch": 3.0, "learning_rate": 2.9968843685424483e-05, "loss": 0.1574, "step": 75600 }, { "epoch": 3.0, "learning_rate": 2.9968802431350128e-05, "loss": 0.1518, "step": 75700 }, { "epoch": 3.0, "learning_rate": 2.996876117727577e-05, "loss": 0.152, "step": 75800 }, { "epoch": 3.0, "learning_rate": 2.9968719923201418e-05, "loss": 0.1457, "step": 75900 }, { "epoch": 3.0, "learning_rate": 2.996867866912706e-05, "loss": 0.1518, "step": 76000 }, { "epoch": 3.0, "learning_rate": 2.9968637415052704e-05, "loss": 0.148, "step": 76100 }, { "epoch": 3.0, "learning_rate": 2.996859616097835e-05, "loss": 0.1408, "step": 76200 }, { "epoch": 3.0, "learning_rate": 2.9968554906903994e-05, "loss": 0.1371, "step": 76300 }, { "epoch": 3.0, "learning_rate": 2.996851365282964e-05, "loss": 0.1374, "step": 76400 }, { "epoch": 3.0, "learning_rate": 2.9968472398755284e-05, "loss": 0.1405, "step": 76500 }, { "epoch": 3.0, "learning_rate": 2.996843114468093e-05, "loss": 0.1403, "step": 76600 }, { "epoch": 3.0, "learning_rate": 2.996838989060657e-05, "loss": 0.1392, "step": 76700 }, { "epoch": 3.0, "learning_rate": 2.996834863653222e-05, "loss": 0.1413, "step": 76800 }, { "epoch": 3.0, "learning_rate": 2.996830738245786e-05, "loss": 0.1371, "step": 76900 }, { "epoch": 3.0, "learning_rate": 2.9968266128383506e-05, "loss": 0.1412, "step": 77000 }, { "epoch": 3.0, "learning_rate": 2.996822487430915e-05, "loss": 0.1406, "step": 77100 }, { "epoch": 3.0, "learning_rate": 2.9968183620234796e-05, "loss": 0.1447, "step": 77200 }, { "epoch": 3.0, "learning_rate": 2.9968142366160437e-05, "loss": 0.1151, "step": 77300 }, { "epoch": 3.0, "learning_rate": 2.9968101112086082e-05, "loss": 0.1255, "step": 77400 }, { "epoch": 3.0, "learning_rate": 2.9968059858011727e-05, "loss": 0.1464, "step": 77500 }, { "epoch": 3.0, "learning_rate": 2.9968018603937372e-05, "loss": 0.1391, "step": 77600 }, { "epoch": 3.0, "learning_rate": 2.9967977349863014e-05, "loss": 0.124, "step": 77700 }, { "epoch": 3.0, "learning_rate": 2.9967936095788662e-05, "loss": 0.118, "step": 77800 }, { "epoch": 3.0, "learning_rate": 2.9967894841714304e-05, "loss": 0.1472, "step": 77900 }, { "epoch": 3.0, "learning_rate": 2.996785358763995e-05, "loss": 0.1268, "step": 78000 }, { "epoch": 3.0, "learning_rate": 2.9967812333565594e-05, "loss": 0.1126, "step": 78100 }, { "epoch": 3.0, "learning_rate": 2.996777107949124e-05, "loss": 0.1199, "step": 78200 }, { "epoch": 3.0, "learning_rate": 2.9967730237957626e-05, "loss": 0.1234, "step": 78300 }, { "epoch": 3.0, "learning_rate": 2.996768898388327e-05, "loss": 0.1256, "step": 78400 }, { "epoch": 3.0, "learning_rate": 2.9967647729808916e-05, "loss": 0.1378, "step": 78500 }, { "epoch": 3.0, "learning_rate": 2.996760647573456e-05, "loss": 0.1361, "step": 78600 }, { "epoch": 3.0, "learning_rate": 2.996756563420095e-05, "loss": 0.1173, "step": 78700 }, { "epoch": 3.0, "learning_rate": 2.996752438012659e-05, "loss": 0.1356, "step": 78800 }, { "epoch": 3.0, "learning_rate": 2.996748312605224e-05, "loss": 0.1194, "step": 78900 }, { "epoch": 3.0, "learning_rate": 2.996744187197788e-05, "loss": 0.1528, "step": 79000 }, { "epoch": 3.0, "learning_rate": 2.9967400617903525e-05, "loss": 0.1384, "step": 79100 }, { "epoch": 3.0, "learning_rate": 2.9967359363829174e-05, "loss": 0.1279, "step": 79200 }, { "epoch": 3.0, "learning_rate": 2.9967318109754815e-05, "loss": 0.1436, "step": 79300 }, { "epoch": 3.0, "learning_rate": 2.996727685568046e-05, "loss": 0.1289, "step": 79400 }, { "epoch": 3.0, "learning_rate": 2.9967235601606105e-05, "loss": 0.1422, "step": 79500 }, { "epoch": 3.0, "learning_rate": 2.996719434753175e-05, "loss": 0.1297, "step": 79600 }, { "epoch": 3.0, "learning_rate": 2.9967153093457392e-05, "loss": 0.1057, "step": 79700 }, { "epoch": 3.0, "learning_rate": 2.996711183938304e-05, "loss": 0.1373, "step": 79800 }, { "epoch": 3.0, "learning_rate": 2.9967070585308682e-05, "loss": 0.1465, "step": 79900 }, { "epoch": 3.0, "learning_rate": 2.9967029331234327e-05, "loss": 0.1409, "step": 80000 }, { "epoch": 3.0, "learning_rate": 2.9966988077159972e-05, "loss": 0.1224, "step": 80100 }, { "epoch": 3.0, "learning_rate": 2.9966946823085617e-05, "loss": 0.1252, "step": 80200 }, { "epoch": 3.0, "learning_rate": 2.9966905569011258e-05, "loss": 0.1413, "step": 80300 }, { "epoch": 3.0, "learning_rate": 2.9966864314936907e-05, "loss": 0.1481, "step": 80400 }, { "epoch": 3.0, "learning_rate": 2.9966823060862548e-05, "loss": 0.1335, "step": 80500 }, { "epoch": 3.0, "learning_rate": 2.9966781806788193e-05, "loss": 0.1177, "step": 80600 }, { "epoch": 3.0, "learning_rate": 2.9966740552713838e-05, "loss": 0.1428, "step": 80700 }, { "epoch": 3.0, "learning_rate": 2.9966699298639483e-05, "loss": 0.1483, "step": 80800 }, { "epoch": 3.0, "learning_rate": 2.9966658044565125e-05, "loss": 0.1266, "step": 80900 }, { "epoch": 3.0, "learning_rate": 2.9966616790490773e-05, "loss": 0.136, "step": 81000 }, { "epoch": 3.0, "learning_rate": 2.9966575536416415e-05, "loss": 0.1336, "step": 81100 }, { "epoch": 3.0, "learning_rate": 2.996653428234206e-05, "loss": 0.1477, "step": 81200 }, { "epoch": 3.0, "learning_rate": 2.9966493440808447e-05, "loss": 0.1408, "step": 81300 }, { "epoch": 3.0, "learning_rate": 2.9966452186734096e-05, "loss": 0.1346, "step": 81400 }, { "epoch": 3.0, "learning_rate": 2.9966410932659737e-05, "loss": 0.1142, "step": 81500 }, { "epoch": 3.0, "learning_rate": 2.9966369678585382e-05, "loss": 0.1426, "step": 81600 }, { "epoch": 3.0, "learning_rate": 2.9966328424511027e-05, "loss": 0.125, "step": 81700 }, { "epoch": 3.0, "learning_rate": 2.9966287170436672e-05, "loss": 0.1311, "step": 81800 }, { "epoch": 3.0, "learning_rate": 2.9966245916362314e-05, "loss": 0.1454, "step": 81900 }, { "epoch": 3.0, "learning_rate": 2.9966204662287962e-05, "loss": 0.1472, "step": 82000 }, { "epoch": 3.0, "learning_rate": 2.9966163408213604e-05, "loss": 0.1498, "step": 82100 }, { "epoch": 3.0, "learning_rate": 2.996612215413925e-05, "loss": 0.1362, "step": 82200 }, { "epoch": 3.0, "learning_rate": 2.9966080900064894e-05, "loss": 0.1541, "step": 82300 }, { "epoch": 3.0, "learning_rate": 2.996603964599054e-05, "loss": 0.118, "step": 82400 }, { "epoch": 3.0, "learning_rate": 2.996599839191618e-05, "loss": 0.1398, "step": 82500 }, { "epoch": 3.0, "learning_rate": 2.996595713784183e-05, "loss": 0.1556, "step": 82600 }, { "epoch": 3.0, "learning_rate": 2.9965915883767473e-05, "loss": 0.1388, "step": 82700 }, { "epoch": 3.0, "learning_rate": 2.9965874629693115e-05, "loss": 0.1466, "step": 82800 }, { "epoch": 3.0, "learning_rate": 2.9965833375618763e-05, "loss": 0.1534, "step": 82900 }, { "epoch": 3.0, "learning_rate": 2.9965792121544405e-05, "loss": 0.1421, "step": 83000 }, { "epoch": 3.0, "learning_rate": 2.996575086747005e-05, "loss": 0.1453, "step": 83100 }, { "epoch": 3.0, "learning_rate": 2.9965709613395695e-05, "loss": 0.1439, "step": 83200 }, { "epoch": 3.0, "learning_rate": 2.996566835932134e-05, "loss": 0.1482, "step": 83300 }, { "epoch": 3.0, "learning_rate": 2.996562710524698e-05, "loss": 0.1651, "step": 83400 }, { "epoch": 3.0, "learning_rate": 2.996558585117263e-05, "loss": 0.174, "step": 83500 }, { "epoch": 3.0, "learning_rate": 2.996554459709827e-05, "loss": 0.1261, "step": 83600 }, { "epoch": 3.0, "learning_rate": 2.9965503343023916e-05, "loss": 0.1385, "step": 83700 }, { "epoch": 3.0, "learning_rate": 2.996546208894956e-05, "loss": 0.1411, "step": 83800 }, { "epoch": 3.0, "learning_rate": 2.9965420834875206e-05, "loss": 0.1117, "step": 83900 }, { "epoch": 3.0, "learning_rate": 2.9965379580800848e-05, "loss": 0.1382, "step": 84000 }, { "epoch": 3.0, "learning_rate": 2.9965338326726493e-05, "loss": 0.1223, "step": 84100 }, { "epoch": 3.0, "learning_rate": 2.9965297072652138e-05, "loss": 0.148, "step": 84200 }, { "epoch": 3.0, "learning_rate": 2.9965255818577783e-05, "loss": 0.1443, "step": 84300 }, { "epoch": 3.0, "learning_rate": 2.9965214564503424e-05, "loss": 0.153, "step": 84400 }, { "epoch": 3.0, "learning_rate": 2.9965173310429073e-05, "loss": 0.1436, "step": 84500 }, { "epoch": 3.0, "learning_rate": 2.9965132056354714e-05, "loss": 0.1508, "step": 84600 }, { "epoch": 3.0, "learning_rate": 2.996509080228036e-05, "loss": 0.1631, "step": 84700 }, { "epoch": 3.0, "learning_rate": 2.9965049548206008e-05, "loss": 0.1436, "step": 84800 }, { "epoch": 3.0, "learning_rate": 2.996500829413165e-05, "loss": 0.1492, "step": 84900 }, { "epoch": 3.0, "learning_rate": 2.9964967040057294e-05, "loss": 0.1493, "step": 85000 }, { "epoch": 3.0, "learning_rate": 2.996492578598294e-05, "loss": 0.1386, "step": 85100 }, { "epoch": 3.0, "learning_rate": 2.9964884531908584e-05, "loss": 0.1495, "step": 85200 }, { "epoch": 3.0, "learning_rate": 2.9964843277834226e-05, "loss": 0.1346, "step": 85300 }, { "epoch": 3.0, "learning_rate": 2.9964802023759874e-05, "loss": 0.1273, "step": 85400 }, { "epoch": 3.0, "learning_rate": 2.9964760769685516e-05, "loss": 0.1391, "step": 85500 }, { "epoch": 3.0, "learning_rate": 2.996471951561116e-05, "loss": 0.1252, "step": 85600 }, { "epoch": 3.0, "learning_rate": 2.9964678261536806e-05, "loss": 0.1555, "step": 85700 }, { "epoch": 3.0, "learning_rate": 2.996463700746245e-05, "loss": 0.1366, "step": 85800 }, { "epoch": 3.0, "learning_rate": 2.9964595753388092e-05, "loss": 0.1052, "step": 85900 }, { "epoch": 3.0, "learning_rate": 2.996455449931374e-05, "loss": 0.1499, "step": 86000 }, { "epoch": 3.0, "learning_rate": 2.9964513245239382e-05, "loss": 0.1135, "step": 86100 }, { "epoch": 3.0, "learning_rate": 2.9964471991165027e-05, "loss": 0.1344, "step": 86200 }, { "epoch": 3.0, "learning_rate": 2.9964430737090672e-05, "loss": 0.13, "step": 86300 }, { "epoch": 3.0, "learning_rate": 2.9964389483016317e-05, "loss": 0.1346, "step": 86400 }, { "epoch": 3.0, "learning_rate": 2.996434822894196e-05, "loss": 0.1274, "step": 86500 }, { "epoch": 3.0, "learning_rate": 2.9964306974867607e-05, "loss": 0.1374, "step": 86600 }, { "epoch": 3.0, "learning_rate": 2.996426572079325e-05, "loss": 0.1353, "step": 86700 }, { "epoch": 3.0, "learning_rate": 2.9964224466718894e-05, "loss": 0.1307, "step": 86800 }, { "epoch": 3.0, "learning_rate": 2.996418362518528e-05, "loss": 0.1063, "step": 86900 }, { "epoch": 3.0, "learning_rate": 2.996414237111093e-05, "loss": 0.1176, "step": 87000 }, { "epoch": 3.0, "learning_rate": 2.996410111703657e-05, "loss": 0.1491, "step": 87100 }, { "epoch": 3.0, "learning_rate": 2.9964059862962216e-05, "loss": 0.1189, "step": 87200 }, { "epoch": 3.0, "learning_rate": 2.996401860888786e-05, "loss": 0.1142, "step": 87300 }, { "epoch": 3.0, "learning_rate": 2.9963977354813506e-05, "loss": 0.1296, "step": 87400 }, { "epoch": 3.0, "learning_rate": 2.9963936100739148e-05, "loss": 0.1116, "step": 87500 }, { "epoch": 3.0, "learning_rate": 2.9963894846664796e-05, "loss": 0.1184, "step": 87600 }, { "epoch": 3.0, "learning_rate": 2.9963853592590438e-05, "loss": 0.1217, "step": 87700 }, { "epoch": 3.0, "learning_rate": 2.9963812338516083e-05, "loss": 0.1114, "step": 87800 }, { "epoch": 3.0, "learning_rate": 2.9963771084441728e-05, "loss": 0.1036, "step": 87900 }, { "epoch": 3.0, "learning_rate": 2.9963729830367373e-05, "loss": 0.1306, "step": 88000 }, { "epoch": 3.0, "learning_rate": 2.9963688576293014e-05, "loss": 0.1135, "step": 88100 }, { "epoch": 3.0, "learning_rate": 2.9963647734759405e-05, "loss": 0.113, "step": 88200 }, { "epoch": 3.0, "learning_rate": 2.996360648068505e-05, "loss": 0.1347, "step": 88300 }, { "epoch": 3.0, "learning_rate": 2.9963565226610695e-05, "loss": 0.1365, "step": 88400 }, { "epoch": 3.0, "learning_rate": 2.9963523972536337e-05, "loss": 0.1104, "step": 88500 }, { "epoch": 3.0, "learning_rate": 2.9963482718461985e-05, "loss": 0.1048, "step": 88600 }, { "epoch": 3.0, "learning_rate": 2.9963441464387627e-05, "loss": 0.1142, "step": 88700 }, { "epoch": 3.0, "learning_rate": 2.996340021031327e-05, "loss": 0.0926, "step": 88800 }, { "epoch": 3.0, "learning_rate": 2.9963358956238917e-05, "loss": 0.0777, "step": 88900 }, { "epoch": 3.0, "learning_rate": 2.996331770216456e-05, "loss": 0.1193, "step": 89000 }, { "epoch": 3.0, "learning_rate": 2.9963276448090203e-05, "loss": 0.1257, "step": 89100 }, { "epoch": 3.0, "learning_rate": 2.996323519401585e-05, "loss": 0.1059, "step": 89200 }, { "epoch": 3.0, "learning_rate": 2.9963193939941493e-05, "loss": 0.114, "step": 89300 }, { "epoch": 3.0, "learning_rate": 2.9963152685867138e-05, "loss": 0.116, "step": 89400 }, { "epoch": 3.0, "learning_rate": 2.9963111431792783e-05, "loss": 0.1223, "step": 89500 }, { "epoch": 3.0, "learning_rate": 2.9963070177718428e-05, "loss": 0.1025, "step": 89600 }, { "epoch": 3.0, "learning_rate": 2.9963028923644073e-05, "loss": 0.1222, "step": 89700 }, { "epoch": 3.0, "learning_rate": 2.9962987669569718e-05, "loss": 0.1044, "step": 89800 }, { "epoch": 3.0, "learning_rate": 2.9962946415495363e-05, "loss": 0.0695, "step": 89900 }, { "epoch": 3.0, "learning_rate": 2.9962905161421004e-05, "loss": 0.1164, "step": 90000 }, { "epoch": 3.0, "learning_rate": 2.9962863907346653e-05, "loss": 0.0778, "step": 90100 }, { "epoch": 3.0, "learning_rate": 2.9962822653272294e-05, "loss": 0.1077, "step": 90200 }, { "epoch": 3.0, "learning_rate": 2.996278139919794e-05, "loss": 0.1145, "step": 90300 }, { "epoch": 3.0, "learning_rate": 2.9962740145123584e-05, "loss": 0.0945, "step": 90400 }, { "epoch": 3.0, "learning_rate": 2.996269889104923e-05, "loss": 0.1119, "step": 90500 }, { "epoch": 3.0, "learning_rate": 2.996265763697487e-05, "loss": 0.1222, "step": 90600 }, { "epoch": 3.0, "learning_rate": 2.996261638290052e-05, "loss": 0.1044, "step": 90700 }, { "epoch": 3.0, "learning_rate": 2.996257512882616e-05, "loss": 0.1253, "step": 90800 }, { "epoch": 3.0, "learning_rate": 2.9962533874751806e-05, "loss": 0.1137, "step": 90900 }, { "epoch": 3.0, "learning_rate": 2.996249262067745e-05, "loss": 0.0822, "step": 91000 }, { "epoch": 3.0, "learning_rate": 2.9962451366603096e-05, "loss": 0.099, "step": 91100 }, { "epoch": 3.0, "learning_rate": 2.9962410112528737e-05, "loss": 0.1331, "step": 91200 }, { "epoch": 3.0, "learning_rate": 2.9962368858454386e-05, "loss": 0.097, "step": 91300 }, { "epoch": 3.0, "learning_rate": 2.9962327604380027e-05, "loss": 0.1154, "step": 91400 }, { "epoch": 3.0, "learning_rate": 2.9962286350305672e-05, "loss": 0.1002, "step": 91500 }, { "epoch": 3.0, "learning_rate": 2.9962245096231317e-05, "loss": 0.1111, "step": 91600 }, { "epoch": 3.0, "learning_rate": 2.9962203842156962e-05, "loss": 0.1211, "step": 91700 }, { "epoch": 3.0, "learning_rate": 2.9962162588082607e-05, "loss": 0.1011, "step": 91800 }, { "epoch": 3.0, "learning_rate": 2.9962121334008252e-05, "loss": 0.1321, "step": 91900 }, { "epoch": 3.0, "learning_rate": 2.9962080079933897e-05, "loss": 0.103, "step": 92000 }, { "epoch": 3.0, "learning_rate": 2.996203882585954e-05, "loss": 0.1161, "step": 92100 }, { "epoch": 3.0, "learning_rate": 2.9961997571785187e-05, "loss": 0.1111, "step": 92200 }, { "epoch": 3.0, "learning_rate": 2.996195631771083e-05, "loss": 0.1105, "step": 92300 }, { "epoch": 3.0, "learning_rate": 2.9961915063636474e-05, "loss": 0.1121, "step": 92400 }, { "epoch": 3.0, "learning_rate": 2.996187380956212e-05, "loss": 0.1265, "step": 92500 }, { "epoch": 3.0, "learning_rate": 2.9961832968028503e-05, "loss": 0.1109, "step": 92600 }, { "epoch": 3.0, "learning_rate": 2.996179171395415e-05, "loss": 0.1032, "step": 92700 }, { "epoch": 3.0, "learning_rate": 2.9961750459879793e-05, "loss": 0.1122, "step": 92800 }, { "epoch": 3.0, "learning_rate": 2.9961709618346184e-05, "loss": 0.1165, "step": 92900 }, { "epoch": 3.0, "learning_rate": 2.9961668364271825e-05, "loss": 0.1329, "step": 93000 }, { "epoch": 3.0, "learning_rate": 2.9961627110197474e-05, "loss": 0.1272, "step": 93100 }, { "epoch": 3.0, "learning_rate": 2.9961585856123115e-05, "loss": 0.1092, "step": 93200 }, { "epoch": 3.0, "learning_rate": 2.996154460204876e-05, "loss": 0.1252, "step": 93300 }, { "epoch": 3.0, "learning_rate": 2.9961503347974405e-05, "loss": 0.1195, "step": 93400 }, { "epoch": 3.0, "learning_rate": 2.996146209390005e-05, "loss": 0.0835, "step": 93500 }, { "epoch": 3.0, "learning_rate": 2.9961420839825692e-05, "loss": 0.1005, "step": 93600 }, { "epoch": 3.0, "learning_rate": 2.996137958575134e-05, "loss": 0.1201, "step": 93700 }, { "epoch": 3.0, "learning_rate": 2.9961338331676982e-05, "loss": 0.1167, "step": 93800 }, { "epoch": 3.0, "learning_rate": 2.9961297077602627e-05, "loss": 0.1121, "step": 93900 }, { "epoch": 3.0, "learning_rate": 2.9961255823528272e-05, "loss": 0.1146, "step": 94000 }, { "epoch": 3.0, "learning_rate": 2.9961214569453917e-05, "loss": 0.1087, "step": 94100 }, { "epoch": 3.0, "learning_rate": 2.9961173315379558e-05, "loss": 0.1156, "step": 94200 }, { "epoch": 3.0, "learning_rate": 2.9961132061305207e-05, "loss": 0.1016, "step": 94300 }, { "epoch": 3.0, "learning_rate": 2.9961090807230848e-05, "loss": 0.1064, "step": 94400 }, { "epoch": 3.0, "learning_rate": 2.9961049553156493e-05, "loss": 0.0906, "step": 94500 }, { "epoch": 3.0, "learning_rate": 2.996100829908214e-05, "loss": 0.0982, "step": 94600 }, { "epoch": 3.0, "learning_rate": 2.9960967045007783e-05, "loss": 0.0944, "step": 94700 }, { "epoch": 3.0, "learning_rate": 2.9960925790933428e-05, "loss": 0.0984, "step": 94800 }, { "epoch": 3.0, "learning_rate": 2.9960884536859073e-05, "loss": 0.1191, "step": 94900 }, { "epoch": 3.0, "learning_rate": 2.9960843282784718e-05, "loss": 0.0901, "step": 95000 }, { "epoch": 3.0, "learning_rate": 2.996080202871036e-05, "loss": 0.0836, "step": 95100 }, { "epoch": 3.0, "learning_rate": 2.9960760774636008e-05, "loss": 0.1309, "step": 95200 }, { "epoch": 3.0, "learning_rate": 2.996071952056165e-05, "loss": 0.113, "step": 95300 }, { "epoch": 3.0, "learning_rate": 2.9960678266487295e-05, "loss": 0.1097, "step": 95400 }, { "epoch": 3.0, "learning_rate": 2.996063701241294e-05, "loss": 0.1448, "step": 95500 }, { "epoch": 3.0, "learning_rate": 2.9960595758338584e-05, "loss": 0.1221, "step": 95600 }, { "epoch": 3.0, "learning_rate": 2.9960554504264226e-05, "loss": 0.0998, "step": 95700 }, { "epoch": 3.0, "learning_rate": 2.9960513250189874e-05, "loss": 0.0946, "step": 95800 }, { "epoch": 3.0, "learning_rate": 2.9960471996115516e-05, "loss": 0.1486, "step": 95900 }, { "epoch": 3.0, "learning_rate": 2.996043074204116e-05, "loss": 0.1168, "step": 96000 }, { "epoch": 3.0, "learning_rate": 2.9960389487966806e-05, "loss": 0.1024, "step": 96100 }, { "epoch": 3.0, "learning_rate": 2.996034823389245e-05, "loss": 0.1186, "step": 96200 }, { "epoch": 3.0, "learning_rate": 2.9960306979818092e-05, "loss": 0.0986, "step": 96300 }, { "epoch": 3.0, "learning_rate": 2.996026572574374e-05, "loss": 0.1118, "step": 96400 }, { "epoch": 3.0, "learning_rate": 2.9960224471669382e-05, "loss": 0.1072, "step": 96500 }, { "epoch": 3.0, "learning_rate": 2.9960183217595027e-05, "loss": 0.1039, "step": 96600 }, { "epoch": 3.0, "learning_rate": 2.9960141963520676e-05, "loss": 0.1214, "step": 96700 }, { "epoch": 3.0, "learning_rate": 2.9960100709446317e-05, "loss": 0.1225, "step": 96800 }, { "epoch": 3.0, "learning_rate": 2.9960059455371962e-05, "loss": 0.1082, "step": 96900 }, { "epoch": 3.0, "learning_rate": 2.996001861383835e-05, "loss": 0.1089, "step": 97000 }, { "epoch": 3.0, "learning_rate": 2.9959977359763995e-05, "loss": 0.1109, "step": 97100 }, { "epoch": 3.0, "learning_rate": 2.995993610568964e-05, "loss": 0.1302, "step": 97200 }, { "epoch": 3.0, "learning_rate": 2.995989485161528e-05, "loss": 0.1174, "step": 97300 }, { "epoch": 3.0, "learning_rate": 2.995985359754093e-05, "loss": 0.1003, "step": 97400 }, { "epoch": 3.0, "learning_rate": 2.995981234346657e-05, "loss": 0.1141, "step": 97500 }, { "epoch": 3.0, "learning_rate": 2.9959771089392216e-05, "loss": 0.1078, "step": 97600 }, { "epoch": 3.0, "learning_rate": 2.995972983531786e-05, "loss": 0.1351, "step": 97700 }, { "epoch": 3.0, "learning_rate": 2.9959688581243506e-05, "loss": 0.1077, "step": 97800 }, { "epoch": 3.0, "learning_rate": 2.9959647327169148e-05, "loss": 0.1125, "step": 97900 }, { "epoch": 3.0, "learning_rate": 2.9959606073094796e-05, "loss": 0.1085, "step": 98000 }, { "epoch": 3.0, "learning_rate": 2.995956481902044e-05, "loss": 0.1107, "step": 98100 }, { "epoch": 3.0, "learning_rate": 2.995952397748683e-05, "loss": 0.1362, "step": 98200 }, { "epoch": 3.0, "learning_rate": 2.995948272341247e-05, "loss": 0.11, "step": 98300 }, { "epoch": 3.0, "learning_rate": 2.9959441469338115e-05, "loss": 0.1443, "step": 98400 }, { "epoch": 3.0, "learning_rate": 2.995940021526376e-05, "loss": 0.1067, "step": 98500 }, { "epoch": 3.0, "learning_rate": 2.9959358961189405e-05, "loss": 0.1241, "step": 98600 }, { "epoch": 3.0, "learning_rate": 2.9959317707115047e-05, "loss": 0.1221, "step": 98700 }, { "epoch": 3.0, "learning_rate": 2.9959276453040695e-05, "loss": 0.1182, "step": 98800 }, { "epoch": 3.0, "learning_rate": 2.9959235198966337e-05, "loss": 0.1437, "step": 98900 }, { "epoch": 3.0, "learning_rate": 2.9959193944891982e-05, "loss": 0.1188, "step": 99000 }, { "epoch": 3.0, "eval_accuracy": 0.9421570213343179, "eval_f1": 0.9421534578908957, "eval_loss": 0.15225644409656525, "eval_matthews_correlation": 0.8844212104135638, "eval_precision": 0.942264672299383, "eval_recall": 0.9421565447239119, "eval_runtime": 1388.3348, "eval_samples_per_second": 2979.353, "eval_steps_per_second": 2979.353, "step": 99076 } ], "max_steps": 72720140, "num_train_epochs": 5, "total_flos": 1.7037705236761805e+18, "trial_name": null, "trial_params": null }