{ "best_metric": 0.0033987753558903933, "best_model_checkpoint": "./GTSRB_outputs/checkpoint-37494", "epoch": 10.0, "global_step": 41660, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.9995199231877104e-05, "loss": 3.7783, "step": 10 }, { "epoch": 0.0, "learning_rate": 1.99903984637542e-05, "loss": 3.7505, "step": 20 }, { "epoch": 0.01, "learning_rate": 1.9985597695631302e-05, "loss": 3.7487, "step": 30 }, { "epoch": 0.01, "learning_rate": 1.9980796927508405e-05, "loss": 3.7393, "step": 40 }, { "epoch": 0.01, "learning_rate": 1.99759961593855e-05, "loss": 3.6324, "step": 50 }, { "epoch": 0.01, "learning_rate": 1.9971195391262603e-05, "loss": 3.6406, "step": 60 }, { "epoch": 0.02, "learning_rate": 1.9966394623139706e-05, "loss": 3.5461, "step": 70 }, { "epoch": 0.02, "learning_rate": 1.9961593855016805e-05, "loss": 3.558, "step": 80 }, { "epoch": 0.02, "learning_rate": 1.9956793086893904e-05, "loss": 3.4714, "step": 90 }, { "epoch": 0.02, "learning_rate": 1.9951992318771007e-05, "loss": 3.4558, "step": 100 }, { "epoch": 0.03, "learning_rate": 1.9947191550648106e-05, "loss": 3.4103, "step": 110 }, { "epoch": 0.03, "learning_rate": 1.9942390782525205e-05, "loss": 3.3774, "step": 120 }, { "epoch": 0.03, "learning_rate": 1.9937590014402308e-05, "loss": 3.2727, "step": 130 }, { "epoch": 0.03, "learning_rate": 1.9932789246279407e-05, "loss": 3.2236, "step": 140 }, { "epoch": 0.04, "learning_rate": 1.9927988478156506e-05, "loss": 3.2661, "step": 150 }, { "epoch": 0.04, "learning_rate": 1.992318771003361e-05, "loss": 3.203, "step": 160 }, { "epoch": 0.04, "learning_rate": 1.9918386941910707e-05, "loss": 3.1994, "step": 170 }, { "epoch": 0.04, "learning_rate": 1.9913586173787807e-05, "loss": 3.1309, "step": 180 }, { "epoch": 0.05, "learning_rate": 1.990878540566491e-05, "loss": 3.0043, "step": 190 }, { "epoch": 0.05, "learning_rate": 1.990398463754201e-05, "loss": 3.079, "step": 200 }, { "epoch": 0.05, "learning_rate": 1.989918386941911e-05, "loss": 2.9737, "step": 210 }, { "epoch": 0.05, "learning_rate": 1.989438310129621e-05, "loss": 2.9686, "step": 220 }, { "epoch": 0.06, "learning_rate": 1.988958233317331e-05, "loss": 2.955, "step": 230 }, { "epoch": 0.06, "learning_rate": 1.9884781565050412e-05, "loss": 2.874, "step": 240 }, { "epoch": 0.06, "learning_rate": 1.987998079692751e-05, "loss": 2.9016, "step": 250 }, { "epoch": 0.06, "learning_rate": 1.987518002880461e-05, "loss": 2.8635, "step": 260 }, { "epoch": 0.06, "learning_rate": 1.9870379260681713e-05, "loss": 2.8191, "step": 270 }, { "epoch": 0.07, "learning_rate": 1.986557849255881e-05, "loss": 2.8164, "step": 280 }, { "epoch": 0.07, "learning_rate": 1.986077772443591e-05, "loss": 2.6601, "step": 290 }, { "epoch": 0.07, "learning_rate": 1.9855976956313013e-05, "loss": 2.6189, "step": 300 }, { "epoch": 0.07, "learning_rate": 1.9851176188190113e-05, "loss": 2.6481, "step": 310 }, { "epoch": 0.08, "learning_rate": 1.984637542006721e-05, "loss": 2.7157, "step": 320 }, { "epoch": 0.08, "learning_rate": 1.9841574651944314e-05, "loss": 2.7044, "step": 330 }, { "epoch": 0.08, "learning_rate": 1.9836773883821413e-05, "loss": 2.58, "step": 340 }, { "epoch": 0.08, "learning_rate": 1.9831973115698513e-05, "loss": 2.6004, "step": 350 }, { "epoch": 0.09, "learning_rate": 1.9827172347575615e-05, "loss": 2.573, "step": 360 }, { "epoch": 0.09, "learning_rate": 1.9822371579452714e-05, "loss": 2.5859, "step": 370 }, { "epoch": 0.09, "learning_rate": 1.9817570811329813e-05, "loss": 2.5185, "step": 380 }, { "epoch": 0.09, "learning_rate": 1.9812770043206916e-05, "loss": 2.3877, "step": 390 }, { "epoch": 0.1, "learning_rate": 1.9807969275084015e-05, "loss": 2.4955, "step": 400 }, { "epoch": 0.1, "learning_rate": 1.9803168506961114e-05, "loss": 2.4998, "step": 410 }, { "epoch": 0.1, "learning_rate": 1.9798367738838217e-05, "loss": 2.4137, "step": 420 }, { "epoch": 0.1, "learning_rate": 1.9793566970715316e-05, "loss": 2.3628, "step": 430 }, { "epoch": 0.11, "learning_rate": 1.9788766202592415e-05, "loss": 2.3601, "step": 440 }, { "epoch": 0.11, "learning_rate": 1.9783965434469518e-05, "loss": 2.2108, "step": 450 }, { "epoch": 0.11, "learning_rate": 1.9779164666346617e-05, "loss": 2.2191, "step": 460 }, { "epoch": 0.11, "learning_rate": 1.977436389822372e-05, "loss": 2.2868, "step": 470 }, { "epoch": 0.12, "learning_rate": 1.976956313010082e-05, "loss": 2.2979, "step": 480 }, { "epoch": 0.12, "learning_rate": 1.9764762361977918e-05, "loss": 2.0404, "step": 490 }, { "epoch": 0.12, "learning_rate": 1.975996159385502e-05, "loss": 2.2322, "step": 500 }, { "epoch": 0.12, "learning_rate": 1.975516082573212e-05, "loss": 2.2469, "step": 510 }, { "epoch": 0.12, "learning_rate": 1.975036005760922e-05, "loss": 2.0574, "step": 520 }, { "epoch": 0.13, "learning_rate": 1.974555928948632e-05, "loss": 2.2088, "step": 530 }, { "epoch": 0.13, "learning_rate": 1.974075852136342e-05, "loss": 2.157, "step": 540 }, { "epoch": 0.13, "learning_rate": 1.973595775324052e-05, "loss": 2.1264, "step": 550 }, { "epoch": 0.13, "learning_rate": 1.9731156985117622e-05, "loss": 2.1088, "step": 560 }, { "epoch": 0.14, "learning_rate": 1.972635621699472e-05, "loss": 2.027, "step": 570 }, { "epoch": 0.14, "learning_rate": 1.972155544887182e-05, "loss": 2.0213, "step": 580 }, { "epoch": 0.14, "learning_rate": 1.9716754680748923e-05, "loss": 2.0719, "step": 590 }, { "epoch": 0.14, "learning_rate": 1.9711953912626022e-05, "loss": 2.0945, "step": 600 }, { "epoch": 0.15, "learning_rate": 1.970715314450312e-05, "loss": 1.968, "step": 610 }, { "epoch": 0.15, "learning_rate": 1.9702352376380223e-05, "loss": 1.869, "step": 620 }, { "epoch": 0.15, "learning_rate": 1.9697551608257323e-05, "loss": 1.8604, "step": 630 }, { "epoch": 0.15, "learning_rate": 1.9692750840134422e-05, "loss": 1.8648, "step": 640 }, { "epoch": 0.16, "learning_rate": 1.9687950072011524e-05, "loss": 1.9939, "step": 650 }, { "epoch": 0.16, "learning_rate": 1.9683149303888623e-05, "loss": 1.8737, "step": 660 }, { "epoch": 0.16, "learning_rate": 1.9678348535765723e-05, "loss": 1.7044, "step": 670 }, { "epoch": 0.16, "learning_rate": 1.9673547767642825e-05, "loss": 1.8427, "step": 680 }, { "epoch": 0.17, "learning_rate": 1.9668746999519924e-05, "loss": 1.7928, "step": 690 }, { "epoch": 0.17, "learning_rate": 1.9663946231397023e-05, "loss": 1.9989, "step": 700 }, { "epoch": 0.17, "learning_rate": 1.9659145463274126e-05, "loss": 1.9756, "step": 710 }, { "epoch": 0.17, "learning_rate": 1.9654344695151225e-05, "loss": 1.8586, "step": 720 }, { "epoch": 0.18, "learning_rate": 1.9649543927028328e-05, "loss": 1.6752, "step": 730 }, { "epoch": 0.18, "learning_rate": 1.9644743158905427e-05, "loss": 1.8349, "step": 740 }, { "epoch": 0.18, "learning_rate": 1.9639942390782526e-05, "loss": 1.6647, "step": 750 }, { "epoch": 0.18, "learning_rate": 1.963514162265963e-05, "loss": 1.5412, "step": 760 }, { "epoch": 0.18, "learning_rate": 1.9630340854536728e-05, "loss": 1.6946, "step": 770 }, { "epoch": 0.19, "learning_rate": 1.9625540086413827e-05, "loss": 1.6583, "step": 780 }, { "epoch": 0.19, "learning_rate": 1.962073931829093e-05, "loss": 1.6469, "step": 790 }, { "epoch": 0.19, "learning_rate": 1.961593855016803e-05, "loss": 1.6358, "step": 800 }, { "epoch": 0.19, "learning_rate": 1.9611137782045128e-05, "loss": 1.6302, "step": 810 }, { "epoch": 0.2, "learning_rate": 1.960633701392223e-05, "loss": 1.7555, "step": 820 }, { "epoch": 0.2, "learning_rate": 1.960153624579933e-05, "loss": 1.8043, "step": 830 }, { "epoch": 0.2, "learning_rate": 1.959673547767643e-05, "loss": 1.7428, "step": 840 }, { "epoch": 0.2, "learning_rate": 1.959193470955353e-05, "loss": 1.4335, "step": 850 }, { "epoch": 0.21, "learning_rate": 1.958713394143063e-05, "loss": 1.6664, "step": 860 }, { "epoch": 0.21, "learning_rate": 1.958233317330773e-05, "loss": 1.5474, "step": 870 }, { "epoch": 0.21, "learning_rate": 1.9577532405184832e-05, "loss": 1.421, "step": 880 }, { "epoch": 0.21, "learning_rate": 1.957273163706193e-05, "loss": 1.6877, "step": 890 }, { "epoch": 0.22, "learning_rate": 1.956793086893903e-05, "loss": 1.3315, "step": 900 }, { "epoch": 0.22, "learning_rate": 1.9563130100816133e-05, "loss": 1.5427, "step": 910 }, { "epoch": 0.22, "learning_rate": 1.9558329332693232e-05, "loss": 1.455, "step": 920 }, { "epoch": 0.22, "learning_rate": 1.955352856457033e-05, "loss": 1.4563, "step": 930 }, { "epoch": 0.23, "learning_rate": 1.9548727796447433e-05, "loss": 1.3555, "step": 940 }, { "epoch": 0.23, "learning_rate": 1.9543927028324533e-05, "loss": 1.4399, "step": 950 }, { "epoch": 0.23, "learning_rate": 1.9539126260201632e-05, "loss": 1.3208, "step": 960 }, { "epoch": 0.23, "learning_rate": 1.9534325492078734e-05, "loss": 1.4741, "step": 970 }, { "epoch": 0.24, "learning_rate": 1.9529524723955833e-05, "loss": 1.4694, "step": 980 }, { "epoch": 0.24, "learning_rate": 1.9524723955832933e-05, "loss": 1.4141, "step": 990 }, { "epoch": 0.24, "learning_rate": 1.9519923187710035e-05, "loss": 1.3043, "step": 1000 }, { "epoch": 0.24, "learning_rate": 1.9515122419587134e-05, "loss": 1.3022, "step": 1010 }, { "epoch": 0.24, "learning_rate": 1.9510321651464237e-05, "loss": 1.3918, "step": 1020 }, { "epoch": 0.25, "learning_rate": 1.9505520883341336e-05, "loss": 1.3439, "step": 1030 }, { "epoch": 0.25, "learning_rate": 1.9500720115218435e-05, "loss": 1.3018, "step": 1040 }, { "epoch": 0.25, "learning_rate": 1.9495919347095538e-05, "loss": 1.5106, "step": 1050 }, { "epoch": 0.25, "learning_rate": 1.9491118578972637e-05, "loss": 1.2436, "step": 1060 }, { "epoch": 0.26, "learning_rate": 1.9486317810849736e-05, "loss": 1.3641, "step": 1070 }, { "epoch": 0.26, "learning_rate": 1.948151704272684e-05, "loss": 1.2213, "step": 1080 }, { "epoch": 0.26, "learning_rate": 1.9476716274603938e-05, "loss": 1.4179, "step": 1090 }, { "epoch": 0.26, "learning_rate": 1.9471915506481037e-05, "loss": 1.125, "step": 1100 }, { "epoch": 0.27, "learning_rate": 1.946711473835814e-05, "loss": 1.3662, "step": 1110 }, { "epoch": 0.27, "learning_rate": 1.946231397023524e-05, "loss": 1.2618, "step": 1120 }, { "epoch": 0.27, "learning_rate": 1.9457513202112338e-05, "loss": 1.241, "step": 1130 }, { "epoch": 0.27, "learning_rate": 1.945271243398944e-05, "loss": 1.2252, "step": 1140 }, { "epoch": 0.28, "learning_rate": 1.9447911665866543e-05, "loss": 1.1152, "step": 1150 }, { "epoch": 0.28, "learning_rate": 1.944311089774364e-05, "loss": 1.2045, "step": 1160 }, { "epoch": 0.28, "learning_rate": 1.943831012962074e-05, "loss": 1.0854, "step": 1170 }, { "epoch": 0.28, "learning_rate": 1.9433509361497844e-05, "loss": 1.1037, "step": 1180 }, { "epoch": 0.29, "learning_rate": 1.942870859337494e-05, "loss": 1.4213, "step": 1190 }, { "epoch": 0.29, "learning_rate": 1.9423907825252042e-05, "loss": 1.1631, "step": 1200 }, { "epoch": 0.29, "learning_rate": 1.9419107057129144e-05, "loss": 1.0022, "step": 1210 }, { "epoch": 0.29, "learning_rate": 1.941430628900624e-05, "loss": 1.1225, "step": 1220 }, { "epoch": 0.3, "learning_rate": 1.9409505520883343e-05, "loss": 1.1233, "step": 1230 }, { "epoch": 0.3, "learning_rate": 1.9404704752760445e-05, "loss": 1.2195, "step": 1240 }, { "epoch": 0.3, "learning_rate": 1.939990398463754e-05, "loss": 1.2476, "step": 1250 }, { "epoch": 0.3, "learning_rate": 1.9395103216514643e-05, "loss": 1.1084, "step": 1260 }, { "epoch": 0.3, "learning_rate": 1.9390302448391746e-05, "loss": 0.93, "step": 1270 }, { "epoch": 0.31, "learning_rate": 1.9385501680268845e-05, "loss": 0.9731, "step": 1280 }, { "epoch": 0.31, "learning_rate": 1.9380700912145944e-05, "loss": 1.1567, "step": 1290 }, { "epoch": 0.31, "learning_rate": 1.9375900144023047e-05, "loss": 0.9934, "step": 1300 }, { "epoch": 0.31, "learning_rate": 1.9371099375900146e-05, "loss": 1.1649, "step": 1310 }, { "epoch": 0.32, "learning_rate": 1.9366298607777245e-05, "loss": 1.0041, "step": 1320 }, { "epoch": 0.32, "learning_rate": 1.9361497839654348e-05, "loss": 1.003, "step": 1330 }, { "epoch": 0.32, "learning_rate": 1.9356697071531447e-05, "loss": 1.1013, "step": 1340 }, { "epoch": 0.32, "learning_rate": 1.9351896303408546e-05, "loss": 1.2216, "step": 1350 }, { "epoch": 0.33, "learning_rate": 1.934709553528565e-05, "loss": 1.1426, "step": 1360 }, { "epoch": 0.33, "learning_rate": 1.9342294767162748e-05, "loss": 1.1875, "step": 1370 }, { "epoch": 0.33, "learning_rate": 1.9337493999039847e-05, "loss": 1.1213, "step": 1380 }, { "epoch": 0.33, "learning_rate": 1.933269323091695e-05, "loss": 1.0672, "step": 1390 }, { "epoch": 0.34, "learning_rate": 1.932789246279405e-05, "loss": 1.0088, "step": 1400 }, { "epoch": 0.34, "learning_rate": 1.932309169467115e-05, "loss": 1.1139, "step": 1410 }, { "epoch": 0.34, "learning_rate": 1.931829092654825e-05, "loss": 1.0133, "step": 1420 }, { "epoch": 0.34, "learning_rate": 1.931349015842535e-05, "loss": 0.9739, "step": 1430 }, { "epoch": 0.35, "learning_rate": 1.9308689390302452e-05, "loss": 0.9978, "step": 1440 }, { "epoch": 0.35, "learning_rate": 1.930388862217955e-05, "loss": 1.0893, "step": 1450 }, { "epoch": 0.35, "learning_rate": 1.929908785405665e-05, "loss": 1.0321, "step": 1460 }, { "epoch": 0.35, "learning_rate": 1.9294287085933753e-05, "loss": 0.9259, "step": 1470 }, { "epoch": 0.36, "learning_rate": 1.9289486317810852e-05, "loss": 1.0544, "step": 1480 }, { "epoch": 0.36, "learning_rate": 1.928468554968795e-05, "loss": 0.9128, "step": 1490 }, { "epoch": 0.36, "learning_rate": 1.9279884781565054e-05, "loss": 0.9296, "step": 1500 }, { "epoch": 0.36, "learning_rate": 1.9275084013442153e-05, "loss": 0.9006, "step": 1510 }, { "epoch": 0.36, "learning_rate": 1.9270283245319252e-05, "loss": 0.9222, "step": 1520 }, { "epoch": 0.37, "learning_rate": 1.9265482477196354e-05, "loss": 0.7412, "step": 1530 }, { "epoch": 0.37, "learning_rate": 1.9260681709073454e-05, "loss": 0.947, "step": 1540 }, { "epoch": 0.37, "learning_rate": 1.9255880940950553e-05, "loss": 0.8165, "step": 1550 }, { "epoch": 0.37, "learning_rate": 1.9251080172827655e-05, "loss": 0.9555, "step": 1560 }, { "epoch": 0.38, "learning_rate": 1.9246279404704754e-05, "loss": 0.968, "step": 1570 }, { "epoch": 0.38, "learning_rate": 1.9241478636581854e-05, "loss": 0.9129, "step": 1580 }, { "epoch": 0.38, "learning_rate": 1.9236677868458956e-05, "loss": 0.8208, "step": 1590 }, { "epoch": 0.38, "learning_rate": 1.9231877100336055e-05, "loss": 1.0102, "step": 1600 }, { "epoch": 0.39, "learning_rate": 1.9227076332213154e-05, "loss": 0.8468, "step": 1610 }, { "epoch": 0.39, "learning_rate": 1.9222275564090257e-05, "loss": 0.8872, "step": 1620 }, { "epoch": 0.39, "learning_rate": 1.9217474795967356e-05, "loss": 0.8988, "step": 1630 }, { "epoch": 0.39, "learning_rate": 1.9212674027844455e-05, "loss": 0.8378, "step": 1640 }, { "epoch": 0.4, "learning_rate": 1.9207873259721558e-05, "loss": 0.6568, "step": 1650 }, { "epoch": 0.4, "learning_rate": 1.9203072491598657e-05, "loss": 0.9033, "step": 1660 }, { "epoch": 0.4, "learning_rate": 1.919827172347576e-05, "loss": 0.8359, "step": 1670 }, { "epoch": 0.4, "learning_rate": 1.919347095535286e-05, "loss": 0.7811, "step": 1680 }, { "epoch": 0.41, "learning_rate": 1.9188670187229958e-05, "loss": 0.8456, "step": 1690 }, { "epoch": 0.41, "learning_rate": 1.918386941910706e-05, "loss": 0.7367, "step": 1700 }, { "epoch": 0.41, "learning_rate": 1.917906865098416e-05, "loss": 0.831, "step": 1710 }, { "epoch": 0.41, "learning_rate": 1.917426788286126e-05, "loss": 0.9071, "step": 1720 }, { "epoch": 0.42, "learning_rate": 1.916946711473836e-05, "loss": 1.0782, "step": 1730 }, { "epoch": 0.42, "learning_rate": 1.916466634661546e-05, "loss": 0.7672, "step": 1740 }, { "epoch": 0.42, "learning_rate": 1.915986557849256e-05, "loss": 0.9447, "step": 1750 }, { "epoch": 0.42, "learning_rate": 1.9155064810369662e-05, "loss": 0.7413, "step": 1760 }, { "epoch": 0.42, "learning_rate": 1.915026404224676e-05, "loss": 0.81, "step": 1770 }, { "epoch": 0.43, "learning_rate": 1.914546327412386e-05, "loss": 0.7186, "step": 1780 }, { "epoch": 0.43, "learning_rate": 1.9140662506000963e-05, "loss": 0.8462, "step": 1790 }, { "epoch": 0.43, "learning_rate": 1.9135861737878062e-05, "loss": 0.8137, "step": 1800 }, { "epoch": 0.43, "learning_rate": 1.913106096975516e-05, "loss": 0.6924, "step": 1810 }, { "epoch": 0.44, "learning_rate": 1.9126260201632264e-05, "loss": 0.8786, "step": 1820 }, { "epoch": 0.44, "learning_rate": 1.9121459433509363e-05, "loss": 0.893, "step": 1830 }, { "epoch": 0.44, "learning_rate": 1.9116658665386462e-05, "loss": 0.955, "step": 1840 }, { "epoch": 0.44, "learning_rate": 1.9111857897263564e-05, "loss": 0.7753, "step": 1850 }, { "epoch": 0.45, "learning_rate": 1.9107057129140664e-05, "loss": 0.8304, "step": 1860 }, { "epoch": 0.45, "learning_rate": 1.9102256361017763e-05, "loss": 0.7471, "step": 1870 }, { "epoch": 0.45, "learning_rate": 1.9097455592894865e-05, "loss": 0.7336, "step": 1880 }, { "epoch": 0.45, "learning_rate": 1.9092654824771964e-05, "loss": 0.7616, "step": 1890 }, { "epoch": 0.46, "learning_rate": 1.9087854056649064e-05, "loss": 0.8023, "step": 1900 }, { "epoch": 0.46, "learning_rate": 1.9083053288526166e-05, "loss": 0.7867, "step": 1910 }, { "epoch": 0.46, "learning_rate": 1.9078252520403265e-05, "loss": 0.6036, "step": 1920 }, { "epoch": 0.46, "learning_rate": 1.9073451752280368e-05, "loss": 0.8622, "step": 1930 }, { "epoch": 0.47, "learning_rate": 1.9068650984157467e-05, "loss": 0.8327, "step": 1940 }, { "epoch": 0.47, "learning_rate": 1.9063850216034566e-05, "loss": 0.6514, "step": 1950 }, { "epoch": 0.47, "learning_rate": 1.905904944791167e-05, "loss": 0.8685, "step": 1960 }, { "epoch": 0.47, "learning_rate": 1.9054248679788768e-05, "loss": 0.7693, "step": 1970 }, { "epoch": 0.48, "learning_rate": 1.9049447911665867e-05, "loss": 0.803, "step": 1980 }, { "epoch": 0.48, "learning_rate": 1.904464714354297e-05, "loss": 0.5931, "step": 1990 }, { "epoch": 0.48, "learning_rate": 1.903984637542007e-05, "loss": 0.9011, "step": 2000 }, { "epoch": 0.48, "learning_rate": 1.9035045607297168e-05, "loss": 0.7069, "step": 2010 }, { "epoch": 0.48, "learning_rate": 1.903024483917427e-05, "loss": 0.7496, "step": 2020 }, { "epoch": 0.49, "learning_rate": 1.902544407105137e-05, "loss": 0.6518, "step": 2030 }, { "epoch": 0.49, "learning_rate": 1.902064330292847e-05, "loss": 0.8198, "step": 2040 }, { "epoch": 0.49, "learning_rate": 1.901584253480557e-05, "loss": 0.7376, "step": 2050 }, { "epoch": 0.49, "learning_rate": 1.901104176668267e-05, "loss": 0.7526, "step": 2060 }, { "epoch": 0.5, "learning_rate": 1.900624099855977e-05, "loss": 0.7426, "step": 2070 }, { "epoch": 0.5, "learning_rate": 1.9001440230436872e-05, "loss": 0.6284, "step": 2080 }, { "epoch": 0.5, "learning_rate": 1.899663946231397e-05, "loss": 0.6465, "step": 2090 }, { "epoch": 0.5, "learning_rate": 1.899183869419107e-05, "loss": 0.7576, "step": 2100 }, { "epoch": 0.51, "learning_rate": 1.8987037926068173e-05, "loss": 0.6778, "step": 2110 }, { "epoch": 0.51, "learning_rate": 1.8982237157945272e-05, "loss": 0.681, "step": 2120 }, { "epoch": 0.51, "learning_rate": 1.897743638982237e-05, "loss": 0.636, "step": 2130 }, { "epoch": 0.51, "learning_rate": 1.8972635621699474e-05, "loss": 0.6679, "step": 2140 }, { "epoch": 0.52, "learning_rate": 1.8967834853576573e-05, "loss": 0.7074, "step": 2150 }, { "epoch": 0.52, "learning_rate": 1.8963034085453672e-05, "loss": 0.5379, "step": 2160 }, { "epoch": 0.52, "learning_rate": 1.8958233317330774e-05, "loss": 0.6772, "step": 2170 }, { "epoch": 0.52, "learning_rate": 1.8953432549207874e-05, "loss": 0.5271, "step": 2180 }, { "epoch": 0.53, "learning_rate": 1.8948631781084976e-05, "loss": 0.3873, "step": 2190 }, { "epoch": 0.53, "learning_rate": 1.8943831012962075e-05, "loss": 0.7285, "step": 2200 }, { "epoch": 0.53, "learning_rate": 1.8939030244839174e-05, "loss": 0.7146, "step": 2210 }, { "epoch": 0.53, "learning_rate": 1.8934229476716277e-05, "loss": 0.6375, "step": 2220 }, { "epoch": 0.54, "learning_rate": 1.8929428708593376e-05, "loss": 0.6244, "step": 2230 }, { "epoch": 0.54, "learning_rate": 1.8924627940470475e-05, "loss": 0.8262, "step": 2240 }, { "epoch": 0.54, "learning_rate": 1.8919827172347578e-05, "loss": 0.5245, "step": 2250 }, { "epoch": 0.54, "learning_rate": 1.8915026404224677e-05, "loss": 0.653, "step": 2260 }, { "epoch": 0.54, "learning_rate": 1.8910225636101776e-05, "loss": 0.6866, "step": 2270 }, { "epoch": 0.55, "learning_rate": 1.890542486797888e-05, "loss": 0.5555, "step": 2280 }, { "epoch": 0.55, "learning_rate": 1.8900624099855978e-05, "loss": 0.4962, "step": 2290 }, { "epoch": 0.55, "learning_rate": 1.8895823331733077e-05, "loss": 0.6223, "step": 2300 }, { "epoch": 0.55, "learning_rate": 1.889102256361018e-05, "loss": 0.586, "step": 2310 }, { "epoch": 0.56, "learning_rate": 1.888622179548728e-05, "loss": 0.4495, "step": 2320 }, { "epoch": 0.56, "learning_rate": 1.8881421027364378e-05, "loss": 0.5812, "step": 2330 }, { "epoch": 0.56, "learning_rate": 1.887662025924148e-05, "loss": 0.4158, "step": 2340 }, { "epoch": 0.56, "learning_rate": 1.8871819491118583e-05, "loss": 0.6827, "step": 2350 }, { "epoch": 0.57, "learning_rate": 1.886701872299568e-05, "loss": 0.4711, "step": 2360 }, { "epoch": 0.57, "learning_rate": 1.886221795487278e-05, "loss": 0.7131, "step": 2370 }, { "epoch": 0.57, "learning_rate": 1.8857417186749884e-05, "loss": 0.7031, "step": 2380 }, { "epoch": 0.57, "learning_rate": 1.885261641862698e-05, "loss": 0.6428, "step": 2390 }, { "epoch": 0.58, "learning_rate": 1.8847815650504082e-05, "loss": 0.5477, "step": 2400 }, { "epoch": 0.58, "learning_rate": 1.8843014882381185e-05, "loss": 0.6406, "step": 2410 }, { "epoch": 0.58, "learning_rate": 1.883821411425828e-05, "loss": 0.511, "step": 2420 }, { "epoch": 0.58, "learning_rate": 1.8833413346135383e-05, "loss": 0.6828, "step": 2430 }, { "epoch": 0.59, "learning_rate": 1.8828612578012485e-05, "loss": 0.6954, "step": 2440 }, { "epoch": 0.59, "learning_rate": 1.882381180988958e-05, "loss": 0.6661, "step": 2450 }, { "epoch": 0.59, "learning_rate": 1.8819011041766684e-05, "loss": 0.629, "step": 2460 }, { "epoch": 0.59, "learning_rate": 1.8814210273643786e-05, "loss": 0.449, "step": 2470 }, { "epoch": 0.6, "learning_rate": 1.8809409505520885e-05, "loss": 0.5062, "step": 2480 }, { "epoch": 0.6, "learning_rate": 1.8804608737397985e-05, "loss": 0.4024, "step": 2490 }, { "epoch": 0.6, "learning_rate": 1.8799807969275087e-05, "loss": 0.6008, "step": 2500 }, { "epoch": 0.6, "learning_rate": 1.8795007201152186e-05, "loss": 0.5213, "step": 2510 }, { "epoch": 0.6, "learning_rate": 1.8790206433029285e-05, "loss": 0.4123, "step": 2520 }, { "epoch": 0.61, "learning_rate": 1.8785405664906388e-05, "loss": 0.5173, "step": 2530 }, { "epoch": 0.61, "learning_rate": 1.8780604896783487e-05, "loss": 0.5035, "step": 2540 }, { "epoch": 0.61, "learning_rate": 1.8775804128660586e-05, "loss": 0.3853, "step": 2550 }, { "epoch": 0.61, "learning_rate": 1.877100336053769e-05, "loss": 0.5554, "step": 2560 }, { "epoch": 0.62, "learning_rate": 1.8766202592414788e-05, "loss": 0.3964, "step": 2570 }, { "epoch": 0.62, "learning_rate": 1.8761401824291887e-05, "loss": 0.7588, "step": 2580 }, { "epoch": 0.62, "learning_rate": 1.875660105616899e-05, "loss": 0.6074, "step": 2590 }, { "epoch": 0.62, "learning_rate": 1.875180028804609e-05, "loss": 0.433, "step": 2600 }, { "epoch": 0.63, "learning_rate": 1.874699951992319e-05, "loss": 0.49, "step": 2610 }, { "epoch": 0.63, "learning_rate": 1.874219875180029e-05, "loss": 0.503, "step": 2620 }, { "epoch": 0.63, "learning_rate": 1.873739798367739e-05, "loss": 0.4721, "step": 2630 }, { "epoch": 0.63, "learning_rate": 1.8732597215554492e-05, "loss": 0.6306, "step": 2640 }, { "epoch": 0.64, "learning_rate": 1.872779644743159e-05, "loss": 0.4342, "step": 2650 }, { "epoch": 0.64, "learning_rate": 1.872299567930869e-05, "loss": 0.4246, "step": 2660 }, { "epoch": 0.64, "learning_rate": 1.8718194911185793e-05, "loss": 0.6997, "step": 2670 }, { "epoch": 0.64, "learning_rate": 1.8713394143062892e-05, "loss": 0.4625, "step": 2680 }, { "epoch": 0.65, "learning_rate": 1.870859337493999e-05, "loss": 0.574, "step": 2690 }, { "epoch": 0.65, "learning_rate": 1.8703792606817094e-05, "loss": 0.4818, "step": 2700 }, { "epoch": 0.65, "learning_rate": 1.8698991838694193e-05, "loss": 0.4565, "step": 2710 }, { "epoch": 0.65, "learning_rate": 1.8694191070571292e-05, "loss": 0.4671, "step": 2720 }, { "epoch": 0.66, "learning_rate": 1.8689390302448395e-05, "loss": 0.4953, "step": 2730 }, { "epoch": 0.66, "learning_rate": 1.8684589534325494e-05, "loss": 0.3733, "step": 2740 }, { "epoch": 0.66, "learning_rate": 1.8679788766202593e-05, "loss": 0.6231, "step": 2750 }, { "epoch": 0.66, "learning_rate": 1.8674987998079695e-05, "loss": 0.4676, "step": 2760 }, { "epoch": 0.66, "learning_rate": 1.8670187229956795e-05, "loss": 0.3567, "step": 2770 }, { "epoch": 0.67, "learning_rate": 1.8665386461833894e-05, "loss": 0.4384, "step": 2780 }, { "epoch": 0.67, "learning_rate": 1.8660585693710996e-05, "loss": 0.5046, "step": 2790 }, { "epoch": 0.67, "learning_rate": 1.8655784925588095e-05, "loss": 0.453, "step": 2800 }, { "epoch": 0.67, "learning_rate": 1.8650984157465195e-05, "loss": 0.4219, "step": 2810 }, { "epoch": 0.68, "learning_rate": 1.8646183389342297e-05, "loss": 0.3992, "step": 2820 }, { "epoch": 0.68, "learning_rate": 1.8641382621219396e-05, "loss": 0.3865, "step": 2830 }, { "epoch": 0.68, "learning_rate": 1.8636581853096495e-05, "loss": 0.597, "step": 2840 }, { "epoch": 0.68, "learning_rate": 1.8631781084973598e-05, "loss": 0.5623, "step": 2850 }, { "epoch": 0.69, "learning_rate": 1.8626980316850697e-05, "loss": 0.4672, "step": 2860 }, { "epoch": 0.69, "learning_rate": 1.86221795487278e-05, "loss": 0.3366, "step": 2870 }, { "epoch": 0.69, "learning_rate": 1.86173787806049e-05, "loss": 0.4285, "step": 2880 }, { "epoch": 0.69, "learning_rate": 1.8612578012481998e-05, "loss": 0.5842, "step": 2890 }, { "epoch": 0.7, "learning_rate": 1.86077772443591e-05, "loss": 0.4076, "step": 2900 }, { "epoch": 0.7, "learning_rate": 1.86029764762362e-05, "loss": 0.4258, "step": 2910 }, { "epoch": 0.7, "learning_rate": 1.85981757081133e-05, "loss": 0.5203, "step": 2920 }, { "epoch": 0.7, "learning_rate": 1.85933749399904e-05, "loss": 0.4427, "step": 2930 }, { "epoch": 0.71, "learning_rate": 1.85885741718675e-05, "loss": 0.5219, "step": 2940 }, { "epoch": 0.71, "learning_rate": 1.85837734037446e-05, "loss": 0.526, "step": 2950 }, { "epoch": 0.71, "learning_rate": 1.8578972635621702e-05, "loss": 0.5013, "step": 2960 }, { "epoch": 0.71, "learning_rate": 1.85741718674988e-05, "loss": 0.5992, "step": 2970 }, { "epoch": 0.72, "learning_rate": 1.85693710993759e-05, "loss": 0.5753, "step": 2980 }, { "epoch": 0.72, "learning_rate": 1.8564570331253003e-05, "loss": 0.5459, "step": 2990 }, { "epoch": 0.72, "learning_rate": 1.8559769563130102e-05, "loss": 0.6427, "step": 3000 }, { "epoch": 0.72, "learning_rate": 1.85549687950072e-05, "loss": 0.3971, "step": 3010 }, { "epoch": 0.72, "learning_rate": 1.8550168026884304e-05, "loss": 0.3084, "step": 3020 }, { "epoch": 0.73, "learning_rate": 1.8545367258761403e-05, "loss": 0.4304, "step": 3030 }, { "epoch": 0.73, "learning_rate": 1.8540566490638502e-05, "loss": 0.4354, "step": 3040 }, { "epoch": 0.73, "learning_rate": 1.8535765722515605e-05, "loss": 0.4845, "step": 3050 }, { "epoch": 0.73, "learning_rate": 1.8530964954392704e-05, "loss": 0.4229, "step": 3060 }, { "epoch": 0.74, "learning_rate": 1.8526164186269803e-05, "loss": 0.4911, "step": 3070 }, { "epoch": 0.74, "learning_rate": 1.8521363418146905e-05, "loss": 0.5619, "step": 3080 }, { "epoch": 0.74, "learning_rate": 1.8516562650024005e-05, "loss": 0.4871, "step": 3090 }, { "epoch": 0.74, "learning_rate": 1.8511761881901104e-05, "loss": 0.3633, "step": 3100 }, { "epoch": 0.75, "learning_rate": 1.8506961113778206e-05, "loss": 0.3094, "step": 3110 }, { "epoch": 0.75, "learning_rate": 1.8502160345655305e-05, "loss": 0.5367, "step": 3120 }, { "epoch": 0.75, "learning_rate": 1.8497359577532408e-05, "loss": 0.4657, "step": 3130 }, { "epoch": 0.75, "learning_rate": 1.8492558809409507e-05, "loss": 0.3797, "step": 3140 }, { "epoch": 0.76, "learning_rate": 1.8487758041286606e-05, "loss": 0.5531, "step": 3150 }, { "epoch": 0.76, "learning_rate": 1.848295727316371e-05, "loss": 0.5096, "step": 3160 }, { "epoch": 0.76, "learning_rate": 1.8478156505040808e-05, "loss": 0.2796, "step": 3170 }, { "epoch": 0.76, "learning_rate": 1.8473355736917907e-05, "loss": 0.4184, "step": 3180 }, { "epoch": 0.77, "learning_rate": 1.846855496879501e-05, "loss": 0.4097, "step": 3190 }, { "epoch": 0.77, "learning_rate": 1.846375420067211e-05, "loss": 0.4248, "step": 3200 }, { "epoch": 0.77, "learning_rate": 1.8458953432549208e-05, "loss": 0.5682, "step": 3210 }, { "epoch": 0.77, "learning_rate": 1.845415266442631e-05, "loss": 0.5558, "step": 3220 }, { "epoch": 0.78, "learning_rate": 1.844935189630341e-05, "loss": 0.5888, "step": 3230 }, { "epoch": 0.78, "learning_rate": 1.844455112818051e-05, "loss": 0.4789, "step": 3240 }, { "epoch": 0.78, "learning_rate": 1.843975036005761e-05, "loss": 0.3556, "step": 3250 }, { "epoch": 0.78, "learning_rate": 1.843494959193471e-05, "loss": 0.4935, "step": 3260 }, { "epoch": 0.78, "learning_rate": 1.843014882381181e-05, "loss": 0.349, "step": 3270 }, { "epoch": 0.79, "learning_rate": 1.8425348055688912e-05, "loss": 0.4277, "step": 3280 }, { "epoch": 0.79, "learning_rate": 1.842054728756601e-05, "loss": 0.3369, "step": 3290 }, { "epoch": 0.79, "learning_rate": 1.841574651944311e-05, "loss": 0.4325, "step": 3300 }, { "epoch": 0.79, "learning_rate": 1.8410945751320213e-05, "loss": 0.4881, "step": 3310 }, { "epoch": 0.8, "learning_rate": 1.8406144983197312e-05, "loss": 0.4303, "step": 3320 }, { "epoch": 0.8, "learning_rate": 1.840134421507441e-05, "loss": 0.4241, "step": 3330 }, { "epoch": 0.8, "learning_rate": 1.8396543446951514e-05, "loss": 0.3176, "step": 3340 }, { "epoch": 0.8, "learning_rate": 1.8391742678828613e-05, "loss": 0.3702, "step": 3350 }, { "epoch": 0.81, "learning_rate": 1.8386941910705712e-05, "loss": 0.4432, "step": 3360 }, { "epoch": 0.81, "learning_rate": 1.8382141142582815e-05, "loss": 0.4104, "step": 3370 }, { "epoch": 0.81, "learning_rate": 1.8377340374459914e-05, "loss": 0.5021, "step": 3380 }, { "epoch": 0.81, "learning_rate": 1.8372539606337016e-05, "loss": 0.4312, "step": 3390 }, { "epoch": 0.82, "learning_rate": 1.8367738838214116e-05, "loss": 0.374, "step": 3400 }, { "epoch": 0.82, "learning_rate": 1.8362938070091215e-05, "loss": 0.3657, "step": 3410 }, { "epoch": 0.82, "learning_rate": 1.8358137301968317e-05, "loss": 0.317, "step": 3420 }, { "epoch": 0.82, "learning_rate": 1.8353336533845416e-05, "loss": 0.4727, "step": 3430 }, { "epoch": 0.83, "learning_rate": 1.8348535765722515e-05, "loss": 0.418, "step": 3440 }, { "epoch": 0.83, "learning_rate": 1.8343734997599618e-05, "loss": 0.3723, "step": 3450 }, { "epoch": 0.83, "learning_rate": 1.8338934229476717e-05, "loss": 0.4062, "step": 3460 }, { "epoch": 0.83, "learning_rate": 1.8334133461353816e-05, "loss": 0.299, "step": 3470 }, { "epoch": 0.84, "learning_rate": 1.832933269323092e-05, "loss": 0.3177, "step": 3480 }, { "epoch": 0.84, "learning_rate": 1.8324531925108018e-05, "loss": 0.2584, "step": 3490 }, { "epoch": 0.84, "learning_rate": 1.8319731156985117e-05, "loss": 0.4107, "step": 3500 }, { "epoch": 0.84, "learning_rate": 1.831493038886222e-05, "loss": 0.409, "step": 3510 }, { "epoch": 0.84, "learning_rate": 1.8310129620739322e-05, "loss": 0.2987, "step": 3520 }, { "epoch": 0.85, "learning_rate": 1.8305328852616418e-05, "loss": 0.4629, "step": 3530 }, { "epoch": 0.85, "learning_rate": 1.830052808449352e-05, "loss": 0.3649, "step": 3540 }, { "epoch": 0.85, "learning_rate": 1.8295727316370623e-05, "loss": 0.2186, "step": 3550 }, { "epoch": 0.85, "learning_rate": 1.829092654824772e-05, "loss": 0.5434, "step": 3560 }, { "epoch": 0.86, "learning_rate": 1.828612578012482e-05, "loss": 0.3069, "step": 3570 }, { "epoch": 0.86, "learning_rate": 1.8281325012001924e-05, "loss": 0.3797, "step": 3580 }, { "epoch": 0.86, "learning_rate": 1.827652424387902e-05, "loss": 0.3937, "step": 3590 }, { "epoch": 0.86, "learning_rate": 1.8271723475756122e-05, "loss": 0.4565, "step": 3600 }, { "epoch": 0.87, "learning_rate": 1.8266922707633225e-05, "loss": 0.3195, "step": 3610 }, { "epoch": 0.87, "learning_rate": 1.826212193951032e-05, "loss": 0.2143, "step": 3620 }, { "epoch": 0.87, "learning_rate": 1.8257321171387423e-05, "loss": 0.584, "step": 3630 }, { "epoch": 0.87, "learning_rate": 1.8252520403264526e-05, "loss": 0.2804, "step": 3640 }, { "epoch": 0.88, "learning_rate": 1.8247719635141625e-05, "loss": 0.2962, "step": 3650 }, { "epoch": 0.88, "learning_rate": 1.8242918867018724e-05, "loss": 0.2373, "step": 3660 }, { "epoch": 0.88, "learning_rate": 1.8238118098895826e-05, "loss": 0.2916, "step": 3670 }, { "epoch": 0.88, "learning_rate": 1.8233317330772926e-05, "loss": 0.4911, "step": 3680 }, { "epoch": 0.89, "learning_rate": 1.8228516562650025e-05, "loss": 0.5212, "step": 3690 }, { "epoch": 0.89, "learning_rate": 1.8223715794527127e-05, "loss": 0.3899, "step": 3700 }, { "epoch": 0.89, "learning_rate": 1.8218915026404226e-05, "loss": 0.3755, "step": 3710 }, { "epoch": 0.89, "learning_rate": 1.8214114258281326e-05, "loss": 0.4213, "step": 3720 }, { "epoch": 0.9, "learning_rate": 1.8209313490158428e-05, "loss": 0.3548, "step": 3730 }, { "epoch": 0.9, "learning_rate": 1.8204512722035527e-05, "loss": 0.2486, "step": 3740 }, { "epoch": 0.9, "learning_rate": 1.8199711953912626e-05, "loss": 0.4735, "step": 3750 }, { "epoch": 0.9, "learning_rate": 1.819491118578973e-05, "loss": 0.2879, "step": 3760 }, { "epoch": 0.9, "learning_rate": 1.8190110417666828e-05, "loss": 0.2308, "step": 3770 }, { "epoch": 0.91, "learning_rate": 1.8185309649543927e-05, "loss": 0.3713, "step": 3780 }, { "epoch": 0.91, "learning_rate": 1.818050888142103e-05, "loss": 0.4296, "step": 3790 }, { "epoch": 0.91, "learning_rate": 1.817570811329813e-05, "loss": 0.3481, "step": 3800 }, { "epoch": 0.91, "learning_rate": 1.817090734517523e-05, "loss": 0.3113, "step": 3810 }, { "epoch": 0.92, "learning_rate": 1.816610657705233e-05, "loss": 0.304, "step": 3820 }, { "epoch": 0.92, "learning_rate": 1.816130580892943e-05, "loss": 0.4197, "step": 3830 }, { "epoch": 0.92, "learning_rate": 1.8156505040806532e-05, "loss": 0.3564, "step": 3840 }, { "epoch": 0.92, "learning_rate": 1.815170427268363e-05, "loss": 0.3632, "step": 3850 }, { "epoch": 0.93, "learning_rate": 1.814690350456073e-05, "loss": 0.3188, "step": 3860 }, { "epoch": 0.93, "learning_rate": 1.8142102736437833e-05, "loss": 0.4399, "step": 3870 }, { "epoch": 0.93, "learning_rate": 1.8137301968314932e-05, "loss": 0.3436, "step": 3880 }, { "epoch": 0.93, "learning_rate": 1.813250120019203e-05, "loss": 0.3636, "step": 3890 }, { "epoch": 0.94, "learning_rate": 1.8127700432069134e-05, "loss": 0.381, "step": 3900 }, { "epoch": 0.94, "learning_rate": 1.8122899663946233e-05, "loss": 0.5667, "step": 3910 }, { "epoch": 0.94, "learning_rate": 1.8118098895823332e-05, "loss": 0.1597, "step": 3920 }, { "epoch": 0.94, "learning_rate": 1.8113298127700435e-05, "loss": 0.3125, "step": 3930 }, { "epoch": 0.95, "learning_rate": 1.8108497359577534e-05, "loss": 0.3009, "step": 3940 }, { "epoch": 0.95, "learning_rate": 1.8103696591454633e-05, "loss": 0.2959, "step": 3950 }, { "epoch": 0.95, "learning_rate": 1.8098895823331736e-05, "loss": 0.285, "step": 3960 }, { "epoch": 0.95, "learning_rate": 1.8094095055208835e-05, "loss": 0.4986, "step": 3970 }, { "epoch": 0.96, "learning_rate": 1.8089294287085934e-05, "loss": 0.4226, "step": 3980 }, { "epoch": 0.96, "learning_rate": 1.8084493518963036e-05, "loss": 0.357, "step": 3990 }, { "epoch": 0.96, "learning_rate": 1.8079692750840136e-05, "loss": 0.2581, "step": 4000 }, { "epoch": 0.96, "learning_rate": 1.8074891982717235e-05, "loss": 0.3922, "step": 4010 }, { "epoch": 0.96, "learning_rate": 1.8070091214594337e-05, "loss": 0.2216, "step": 4020 }, { "epoch": 0.97, "learning_rate": 1.8065290446471436e-05, "loss": 0.6425, "step": 4030 }, { "epoch": 0.97, "learning_rate": 1.8060489678348536e-05, "loss": 0.396, "step": 4040 }, { "epoch": 0.97, "learning_rate": 1.8055688910225638e-05, "loss": 0.3119, "step": 4050 }, { "epoch": 0.97, "learning_rate": 1.8050888142102737e-05, "loss": 0.377, "step": 4060 }, { "epoch": 0.98, "learning_rate": 1.804608737397984e-05, "loss": 0.3404, "step": 4070 }, { "epoch": 0.98, "learning_rate": 1.804128660585694e-05, "loss": 0.2097, "step": 4080 }, { "epoch": 0.98, "learning_rate": 1.8036485837734038e-05, "loss": 0.4086, "step": 4090 }, { "epoch": 0.98, "learning_rate": 1.803168506961114e-05, "loss": 0.4156, "step": 4100 }, { "epoch": 0.99, "learning_rate": 1.802688430148824e-05, "loss": 0.4628, "step": 4110 }, { "epoch": 0.99, "learning_rate": 1.802208353336534e-05, "loss": 0.2637, "step": 4120 }, { "epoch": 0.99, "learning_rate": 1.801728276524244e-05, "loss": 0.3179, "step": 4130 }, { "epoch": 0.99, "learning_rate": 1.801248199711954e-05, "loss": 0.2576, "step": 4140 }, { "epoch": 1.0, "learning_rate": 1.800768122899664e-05, "loss": 0.2956, "step": 4150 }, { "epoch": 1.0, "learning_rate": 1.8002880460873742e-05, "loss": 0.2593, "step": 4160 }, { "epoch": 1.0, "eval_accuracy": 0.9697381842910575, "eval_loss": 0.15848955512046814, "eval_runtime": 518.0494, "eval_samples_per_second": 11.354, "eval_steps_per_second": 1.421, "step": 4166 }, { "epoch": 1.0, "learning_rate": 1.799807969275084e-05, "loss": 0.2808, "step": 4170 }, { "epoch": 1.0, "learning_rate": 1.799327892462794e-05, "loss": 0.3358, "step": 4180 }, { "epoch": 1.01, "learning_rate": 1.7988478156505043e-05, "loss": 0.3963, "step": 4190 }, { "epoch": 1.01, "learning_rate": 1.7983677388382142e-05, "loss": 0.3169, "step": 4200 }, { "epoch": 1.01, "learning_rate": 1.797887662025924e-05, "loss": 0.3115, "step": 4210 }, { "epoch": 1.01, "learning_rate": 1.7974075852136344e-05, "loss": 0.4787, "step": 4220 }, { "epoch": 1.02, "learning_rate": 1.7969275084013443e-05, "loss": 0.3459, "step": 4230 }, { "epoch": 1.02, "learning_rate": 1.7964474315890542e-05, "loss": 0.4496, "step": 4240 }, { "epoch": 1.02, "learning_rate": 1.7959673547767645e-05, "loss": 0.3848, "step": 4250 }, { "epoch": 1.02, "learning_rate": 1.7954872779644744e-05, "loss": 0.301, "step": 4260 }, { "epoch": 1.02, "learning_rate": 1.7950072011521843e-05, "loss": 0.2783, "step": 4270 }, { "epoch": 1.03, "learning_rate": 1.7945271243398946e-05, "loss": 0.4538, "step": 4280 }, { "epoch": 1.03, "learning_rate": 1.7940470475276045e-05, "loss": 0.4946, "step": 4290 }, { "epoch": 1.03, "learning_rate": 1.7935669707153144e-05, "loss": 0.4657, "step": 4300 }, { "epoch": 1.03, "learning_rate": 1.7930868939030246e-05, "loss": 0.4639, "step": 4310 }, { "epoch": 1.04, "learning_rate": 1.7926068170907346e-05, "loss": 0.4415, "step": 4320 }, { "epoch": 1.04, "learning_rate": 1.7921267402784448e-05, "loss": 0.4027, "step": 4330 }, { "epoch": 1.04, "learning_rate": 1.7916466634661547e-05, "loss": 0.3872, "step": 4340 }, { "epoch": 1.04, "learning_rate": 1.7911665866538646e-05, "loss": 0.3735, "step": 4350 }, { "epoch": 1.05, "learning_rate": 1.790686509841575e-05, "loss": 0.3642, "step": 4360 }, { "epoch": 1.05, "learning_rate": 1.7902064330292848e-05, "loss": 0.4073, "step": 4370 }, { "epoch": 1.05, "learning_rate": 1.7897263562169947e-05, "loss": 0.2874, "step": 4380 }, { "epoch": 1.05, "learning_rate": 1.789246279404705e-05, "loss": 0.3783, "step": 4390 }, { "epoch": 1.06, "learning_rate": 1.788766202592415e-05, "loss": 0.2895, "step": 4400 }, { "epoch": 1.06, "learning_rate": 1.7882861257801248e-05, "loss": 0.1587, "step": 4410 }, { "epoch": 1.06, "learning_rate": 1.787806048967835e-05, "loss": 0.2973, "step": 4420 }, { "epoch": 1.06, "learning_rate": 1.787325972155545e-05, "loss": 0.2541, "step": 4430 }, { "epoch": 1.07, "learning_rate": 1.786845895343255e-05, "loss": 0.2957, "step": 4440 }, { "epoch": 1.07, "learning_rate": 1.786365818530965e-05, "loss": 0.4392, "step": 4450 }, { "epoch": 1.07, "learning_rate": 1.785885741718675e-05, "loss": 0.3449, "step": 4460 }, { "epoch": 1.07, "learning_rate": 1.785405664906385e-05, "loss": 0.3259, "step": 4470 }, { "epoch": 1.08, "learning_rate": 1.7849255880940952e-05, "loss": 0.4596, "step": 4480 }, { "epoch": 1.08, "learning_rate": 1.784445511281805e-05, "loss": 0.2926, "step": 4490 }, { "epoch": 1.08, "learning_rate": 1.783965434469515e-05, "loss": 0.2853, "step": 4500 }, { "epoch": 1.08, "learning_rate": 1.7834853576572253e-05, "loss": 0.3184, "step": 4510 }, { "epoch": 1.08, "learning_rate": 1.7830052808449356e-05, "loss": 0.3771, "step": 4520 }, { "epoch": 1.09, "learning_rate": 1.782525204032645e-05, "loss": 0.1982, "step": 4530 }, { "epoch": 1.09, "learning_rate": 1.7820451272203554e-05, "loss": 0.4207, "step": 4540 }, { "epoch": 1.09, "learning_rate": 1.7815650504080657e-05, "loss": 0.3651, "step": 4550 }, { "epoch": 1.09, "learning_rate": 1.7810849735957752e-05, "loss": 0.3715, "step": 4560 }, { "epoch": 1.1, "learning_rate": 1.7806048967834855e-05, "loss": 0.358, "step": 4570 }, { "epoch": 1.1, "learning_rate": 1.7801248199711957e-05, "loss": 0.3517, "step": 4580 }, { "epoch": 1.1, "learning_rate": 1.7796447431589057e-05, "loss": 0.438, "step": 4590 }, { "epoch": 1.1, "learning_rate": 1.7791646663466156e-05, "loss": 0.2194, "step": 4600 }, { "epoch": 1.11, "learning_rate": 1.7786845895343258e-05, "loss": 0.4025, "step": 4610 }, { "epoch": 1.11, "learning_rate": 1.7782045127220357e-05, "loss": 0.3115, "step": 4620 }, { "epoch": 1.11, "learning_rate": 1.7777244359097457e-05, "loss": 0.3962, "step": 4630 }, { "epoch": 1.11, "learning_rate": 1.777244359097456e-05, "loss": 0.2103, "step": 4640 }, { "epoch": 1.12, "learning_rate": 1.7767642822851658e-05, "loss": 0.2085, "step": 4650 }, { "epoch": 1.12, "learning_rate": 1.7762842054728757e-05, "loss": 0.3312, "step": 4660 }, { "epoch": 1.12, "learning_rate": 1.775804128660586e-05, "loss": 0.4754, "step": 4670 }, { "epoch": 1.12, "learning_rate": 1.775324051848296e-05, "loss": 0.4154, "step": 4680 }, { "epoch": 1.13, "learning_rate": 1.7748439750360058e-05, "loss": 0.2771, "step": 4690 }, { "epoch": 1.13, "learning_rate": 1.774363898223716e-05, "loss": 0.4255, "step": 4700 }, { "epoch": 1.13, "learning_rate": 1.773883821411426e-05, "loss": 0.3558, "step": 4710 }, { "epoch": 1.13, "learning_rate": 1.7734037445991362e-05, "loss": 0.4037, "step": 4720 }, { "epoch": 1.14, "learning_rate": 1.772923667786846e-05, "loss": 0.3761, "step": 4730 }, { "epoch": 1.14, "learning_rate": 1.772443590974556e-05, "loss": 0.2908, "step": 4740 }, { "epoch": 1.14, "learning_rate": 1.7719635141622663e-05, "loss": 0.4659, "step": 4750 }, { "epoch": 1.14, "learning_rate": 1.7714834373499762e-05, "loss": 0.3018, "step": 4760 }, { "epoch": 1.14, "learning_rate": 1.771003360537686e-05, "loss": 0.2194, "step": 4770 }, { "epoch": 1.15, "learning_rate": 1.7705232837253964e-05, "loss": 0.4303, "step": 4780 }, { "epoch": 1.15, "learning_rate": 1.7700432069131063e-05, "loss": 0.2039, "step": 4790 }, { "epoch": 1.15, "learning_rate": 1.7695631301008162e-05, "loss": 0.324, "step": 4800 }, { "epoch": 1.15, "learning_rate": 1.7690830532885265e-05, "loss": 0.5169, "step": 4810 }, { "epoch": 1.16, "learning_rate": 1.7686029764762364e-05, "loss": 0.2463, "step": 4820 }, { "epoch": 1.16, "learning_rate": 1.7681228996639463e-05, "loss": 0.3105, "step": 4830 }, { "epoch": 1.16, "learning_rate": 1.7676428228516566e-05, "loss": 0.1741, "step": 4840 }, { "epoch": 1.16, "learning_rate": 1.7671627460393665e-05, "loss": 0.1917, "step": 4850 }, { "epoch": 1.17, "learning_rate": 1.7666826692270764e-05, "loss": 0.3279, "step": 4860 }, { "epoch": 1.17, "learning_rate": 1.7662025924147867e-05, "loss": 0.6172, "step": 4870 }, { "epoch": 1.17, "learning_rate": 1.7657225156024966e-05, "loss": 0.4579, "step": 4880 }, { "epoch": 1.17, "learning_rate": 1.7652424387902065e-05, "loss": 0.4042, "step": 4890 }, { "epoch": 1.18, "learning_rate": 1.7647623619779167e-05, "loss": 0.189, "step": 4900 }, { "epoch": 1.18, "learning_rate": 1.7642822851656267e-05, "loss": 0.4336, "step": 4910 }, { "epoch": 1.18, "learning_rate": 1.7638022083533366e-05, "loss": 0.1077, "step": 4920 }, { "epoch": 1.18, "learning_rate": 1.7633221315410468e-05, "loss": 0.4885, "step": 4930 }, { "epoch": 1.19, "learning_rate": 1.7628420547287567e-05, "loss": 0.3344, "step": 4940 }, { "epoch": 1.19, "learning_rate": 1.7623619779164667e-05, "loss": 0.236, "step": 4950 }, { "epoch": 1.19, "learning_rate": 1.761881901104177e-05, "loss": 0.2562, "step": 4960 }, { "epoch": 1.19, "learning_rate": 1.7614018242918868e-05, "loss": 0.4231, "step": 4970 }, { "epoch": 1.2, "learning_rate": 1.760921747479597e-05, "loss": 0.1379, "step": 4980 }, { "epoch": 1.2, "learning_rate": 1.760441670667307e-05, "loss": 0.135, "step": 4990 }, { "epoch": 1.2, "learning_rate": 1.759961593855017e-05, "loss": 0.1465, "step": 5000 }, { "epoch": 1.2, "learning_rate": 1.759481517042727e-05, "loss": 0.208, "step": 5010 }, { "epoch": 1.2, "learning_rate": 1.759001440230437e-05, "loss": 0.3199, "step": 5020 }, { "epoch": 1.21, "learning_rate": 1.758521363418147e-05, "loss": 0.3387, "step": 5030 }, { "epoch": 1.21, "learning_rate": 1.7580412866058572e-05, "loss": 0.2913, "step": 5040 }, { "epoch": 1.21, "learning_rate": 1.757561209793567e-05, "loss": 0.208, "step": 5050 }, { "epoch": 1.21, "learning_rate": 1.757081132981277e-05, "loss": 0.2419, "step": 5060 }, { "epoch": 1.22, "learning_rate": 1.7566010561689873e-05, "loss": 0.2377, "step": 5070 }, { "epoch": 1.22, "learning_rate": 1.7561209793566972e-05, "loss": 0.386, "step": 5080 }, { "epoch": 1.22, "learning_rate": 1.755640902544407e-05, "loss": 0.3919, "step": 5090 }, { "epoch": 1.22, "learning_rate": 1.7551608257321174e-05, "loss": 0.2799, "step": 5100 }, { "epoch": 1.23, "learning_rate": 1.7546807489198273e-05, "loss": 0.2507, "step": 5110 }, { "epoch": 1.23, "learning_rate": 1.7542006721075372e-05, "loss": 0.1796, "step": 5120 }, { "epoch": 1.23, "learning_rate": 1.7537205952952475e-05, "loss": 0.2141, "step": 5130 }, { "epoch": 1.23, "learning_rate": 1.7532405184829574e-05, "loss": 0.275, "step": 5140 }, { "epoch": 1.24, "learning_rate": 1.7527604416706673e-05, "loss": 0.2695, "step": 5150 }, { "epoch": 1.24, "learning_rate": 1.7522803648583776e-05, "loss": 0.232, "step": 5160 }, { "epoch": 1.24, "learning_rate": 1.7518002880460875e-05, "loss": 0.2083, "step": 5170 }, { "epoch": 1.24, "learning_rate": 1.7513202112337974e-05, "loss": 0.4113, "step": 5180 }, { "epoch": 1.25, "learning_rate": 1.7508401344215077e-05, "loss": 0.4923, "step": 5190 }, { "epoch": 1.25, "learning_rate": 1.7503600576092176e-05, "loss": 0.3211, "step": 5200 }, { "epoch": 1.25, "learning_rate": 1.7498799807969275e-05, "loss": 0.2507, "step": 5210 }, { "epoch": 1.25, "learning_rate": 1.7493999039846377e-05, "loss": 0.3018, "step": 5220 }, { "epoch": 1.26, "learning_rate": 1.7489198271723477e-05, "loss": 0.1886, "step": 5230 }, { "epoch": 1.26, "learning_rate": 1.7484397503600576e-05, "loss": 0.3006, "step": 5240 }, { "epoch": 1.26, "learning_rate": 1.747959673547768e-05, "loss": 0.3268, "step": 5250 }, { "epoch": 1.26, "learning_rate": 1.7474795967354777e-05, "loss": 0.1477, "step": 5260 }, { "epoch": 1.27, "learning_rate": 1.746999519923188e-05, "loss": 0.213, "step": 5270 }, { "epoch": 1.27, "learning_rate": 1.746519443110898e-05, "loss": 0.3322, "step": 5280 }, { "epoch": 1.27, "learning_rate": 1.7460393662986078e-05, "loss": 0.2913, "step": 5290 }, { "epoch": 1.27, "learning_rate": 1.745559289486318e-05, "loss": 0.318, "step": 5300 }, { "epoch": 1.27, "learning_rate": 1.745079212674028e-05, "loss": 0.2929, "step": 5310 }, { "epoch": 1.28, "learning_rate": 1.744599135861738e-05, "loss": 0.2828, "step": 5320 }, { "epoch": 1.28, "learning_rate": 1.744119059049448e-05, "loss": 0.3368, "step": 5330 }, { "epoch": 1.28, "learning_rate": 1.743638982237158e-05, "loss": 0.2549, "step": 5340 }, { "epoch": 1.28, "learning_rate": 1.743158905424868e-05, "loss": 0.1731, "step": 5350 }, { "epoch": 1.29, "learning_rate": 1.7426788286125783e-05, "loss": 0.4072, "step": 5360 }, { "epoch": 1.29, "learning_rate": 1.742198751800288e-05, "loss": 0.2125, "step": 5370 }, { "epoch": 1.29, "learning_rate": 1.741718674987998e-05, "loss": 0.2279, "step": 5380 }, { "epoch": 1.29, "learning_rate": 1.7412385981757083e-05, "loss": 0.3121, "step": 5390 }, { "epoch": 1.3, "learning_rate": 1.7407585213634182e-05, "loss": 0.3211, "step": 5400 }, { "epoch": 1.3, "learning_rate": 1.740278444551128e-05, "loss": 0.2252, "step": 5410 }, { "epoch": 1.3, "learning_rate": 1.7397983677388384e-05, "loss": 0.3518, "step": 5420 }, { "epoch": 1.3, "learning_rate": 1.7393182909265483e-05, "loss": 0.3043, "step": 5430 }, { "epoch": 1.31, "learning_rate": 1.7388382141142582e-05, "loss": 0.2471, "step": 5440 }, { "epoch": 1.31, "learning_rate": 1.7383581373019685e-05, "loss": 0.258, "step": 5450 }, { "epoch": 1.31, "learning_rate": 1.7378780604896784e-05, "loss": 0.2289, "step": 5460 }, { "epoch": 1.31, "learning_rate": 1.7373979836773883e-05, "loss": 0.172, "step": 5470 }, { "epoch": 1.32, "learning_rate": 1.7369179068650986e-05, "loss": 0.1301, "step": 5480 }, { "epoch": 1.32, "learning_rate": 1.7364378300528085e-05, "loss": 0.3406, "step": 5490 }, { "epoch": 1.32, "learning_rate": 1.7359577532405184e-05, "loss": 0.4144, "step": 5500 }, { "epoch": 1.32, "learning_rate": 1.7354776764282287e-05, "loss": 0.2505, "step": 5510 }, { "epoch": 1.33, "learning_rate": 1.7349975996159386e-05, "loss": 0.298, "step": 5520 }, { "epoch": 1.33, "learning_rate": 1.734517522803649e-05, "loss": 0.191, "step": 5530 }, { "epoch": 1.33, "learning_rate": 1.7340374459913588e-05, "loss": 0.4512, "step": 5540 }, { "epoch": 1.33, "learning_rate": 1.7335573691790687e-05, "loss": 0.2741, "step": 5550 }, { "epoch": 1.33, "learning_rate": 1.733077292366779e-05, "loss": 0.1714, "step": 5560 }, { "epoch": 1.34, "learning_rate": 1.732597215554489e-05, "loss": 0.2258, "step": 5570 }, { "epoch": 1.34, "learning_rate": 1.7321171387421988e-05, "loss": 0.3457, "step": 5580 }, { "epoch": 1.34, "learning_rate": 1.731637061929909e-05, "loss": 0.2781, "step": 5590 }, { "epoch": 1.34, "learning_rate": 1.731156985117619e-05, "loss": 0.1566, "step": 5600 }, { "epoch": 1.35, "learning_rate": 1.730676908305329e-05, "loss": 0.2438, "step": 5610 }, { "epoch": 1.35, "learning_rate": 1.730196831493039e-05, "loss": 0.304, "step": 5620 }, { "epoch": 1.35, "learning_rate": 1.729716754680749e-05, "loss": 0.2895, "step": 5630 }, { "epoch": 1.35, "learning_rate": 1.729236677868459e-05, "loss": 0.1523, "step": 5640 }, { "epoch": 1.36, "learning_rate": 1.7287566010561692e-05, "loss": 0.5485, "step": 5650 }, { "epoch": 1.36, "learning_rate": 1.7282765242438794e-05, "loss": 0.2285, "step": 5660 }, { "epoch": 1.36, "learning_rate": 1.727796447431589e-05, "loss": 0.3143, "step": 5670 }, { "epoch": 1.36, "learning_rate": 1.7273163706192993e-05, "loss": 0.4023, "step": 5680 }, { "epoch": 1.37, "learning_rate": 1.7268362938070095e-05, "loss": 0.3152, "step": 5690 }, { "epoch": 1.37, "learning_rate": 1.726356216994719e-05, "loss": 0.3419, "step": 5700 }, { "epoch": 1.37, "learning_rate": 1.7258761401824293e-05, "loss": 0.2251, "step": 5710 }, { "epoch": 1.37, "learning_rate": 1.7253960633701396e-05, "loss": 0.4259, "step": 5720 }, { "epoch": 1.38, "learning_rate": 1.724915986557849e-05, "loss": 0.3391, "step": 5730 }, { "epoch": 1.38, "learning_rate": 1.7244359097455594e-05, "loss": 0.2633, "step": 5740 }, { "epoch": 1.38, "learning_rate": 1.7239558329332697e-05, "loss": 0.285, "step": 5750 }, { "epoch": 1.38, "learning_rate": 1.7234757561209793e-05, "loss": 0.3351, "step": 5760 }, { "epoch": 1.39, "learning_rate": 1.7229956793086895e-05, "loss": 0.3749, "step": 5770 }, { "epoch": 1.39, "learning_rate": 1.7225156024963998e-05, "loss": 0.1099, "step": 5780 }, { "epoch": 1.39, "learning_rate": 1.7220355256841097e-05, "loss": 0.2469, "step": 5790 }, { "epoch": 1.39, "learning_rate": 1.7215554488718196e-05, "loss": 0.1826, "step": 5800 }, { "epoch": 1.39, "learning_rate": 1.72107537205953e-05, "loss": 0.2992, "step": 5810 }, { "epoch": 1.4, "learning_rate": 1.7205952952472398e-05, "loss": 0.4089, "step": 5820 }, { "epoch": 1.4, "learning_rate": 1.7201152184349497e-05, "loss": 0.2354, "step": 5830 }, { "epoch": 1.4, "learning_rate": 1.71963514162266e-05, "loss": 0.4552, "step": 5840 }, { "epoch": 1.4, "learning_rate": 1.71915506481037e-05, "loss": 0.1552, "step": 5850 }, { "epoch": 1.41, "learning_rate": 1.7186749879980798e-05, "loss": 0.2951, "step": 5860 }, { "epoch": 1.41, "learning_rate": 1.71819491118579e-05, "loss": 0.167, "step": 5870 }, { "epoch": 1.41, "learning_rate": 1.7177148343735e-05, "loss": 0.3009, "step": 5880 }, { "epoch": 1.41, "learning_rate": 1.71723475756121e-05, "loss": 0.3901, "step": 5890 }, { "epoch": 1.42, "learning_rate": 1.71675468074892e-05, "loss": 0.1968, "step": 5900 }, { "epoch": 1.42, "learning_rate": 1.71627460393663e-05, "loss": 0.3833, "step": 5910 }, { "epoch": 1.42, "learning_rate": 1.7157945271243403e-05, "loss": 0.1489, "step": 5920 }, { "epoch": 1.42, "learning_rate": 1.7153144503120502e-05, "loss": 0.5201, "step": 5930 }, { "epoch": 1.43, "learning_rate": 1.71483437349976e-05, "loss": 0.2792, "step": 5940 }, { "epoch": 1.43, "learning_rate": 1.7143542966874703e-05, "loss": 0.1983, "step": 5950 }, { "epoch": 1.43, "learning_rate": 1.7138742198751803e-05, "loss": 0.3335, "step": 5960 }, { "epoch": 1.43, "learning_rate": 1.7133941430628902e-05, "loss": 0.2847, "step": 5970 }, { "epoch": 1.44, "learning_rate": 1.7129140662506004e-05, "loss": 0.3299, "step": 5980 }, { "epoch": 1.44, "learning_rate": 1.7124339894383103e-05, "loss": 0.3726, "step": 5990 }, { "epoch": 1.44, "learning_rate": 1.7119539126260203e-05, "loss": 0.3465, "step": 6000 }, { "epoch": 1.44, "learning_rate": 1.7114738358137305e-05, "loss": 0.3033, "step": 6010 }, { "epoch": 1.45, "learning_rate": 1.7109937590014404e-05, "loss": 0.1776, "step": 6020 }, { "epoch": 1.45, "learning_rate": 1.7105136821891503e-05, "loss": 0.2759, "step": 6030 }, { "epoch": 1.45, "learning_rate": 1.7100336053768606e-05, "loss": 0.2215, "step": 6040 }, { "epoch": 1.45, "learning_rate": 1.7095535285645705e-05, "loss": 0.2725, "step": 6050 }, { "epoch": 1.45, "learning_rate": 1.7090734517522804e-05, "loss": 0.2655, "step": 6060 }, { "epoch": 1.46, "learning_rate": 1.7085933749399907e-05, "loss": 0.2265, "step": 6070 }, { "epoch": 1.46, "learning_rate": 1.7081132981277006e-05, "loss": 0.2479, "step": 6080 }, { "epoch": 1.46, "learning_rate": 1.7076332213154105e-05, "loss": 0.3463, "step": 6090 }, { "epoch": 1.46, "learning_rate": 1.7071531445031208e-05, "loss": 0.2398, "step": 6100 }, { "epoch": 1.47, "learning_rate": 1.7066730676908307e-05, "loss": 0.244, "step": 6110 }, { "epoch": 1.47, "learning_rate": 1.7061929908785406e-05, "loss": 0.2266, "step": 6120 }, { "epoch": 1.47, "learning_rate": 1.705712914066251e-05, "loss": 0.3041, "step": 6130 }, { "epoch": 1.47, "learning_rate": 1.7052328372539608e-05, "loss": 0.3264, "step": 6140 }, { "epoch": 1.48, "learning_rate": 1.7047527604416707e-05, "loss": 0.1808, "step": 6150 }, { "epoch": 1.48, "learning_rate": 1.704272683629381e-05, "loss": 0.345, "step": 6160 }, { "epoch": 1.48, "learning_rate": 1.703792606817091e-05, "loss": 0.3436, "step": 6170 }, { "epoch": 1.48, "learning_rate": 1.703312530004801e-05, "loss": 0.3214, "step": 6180 }, { "epoch": 1.49, "learning_rate": 1.702832453192511e-05, "loss": 0.1239, "step": 6190 }, { "epoch": 1.49, "learning_rate": 1.702352376380221e-05, "loss": 0.2669, "step": 6200 }, { "epoch": 1.49, "learning_rate": 1.7018722995679312e-05, "loss": 0.2145, "step": 6210 }, { "epoch": 1.49, "learning_rate": 1.701392222755641e-05, "loss": 0.3709, "step": 6220 }, { "epoch": 1.5, "learning_rate": 1.700912145943351e-05, "loss": 0.4625, "step": 6230 }, { "epoch": 1.5, "learning_rate": 1.7004320691310613e-05, "loss": 0.2129, "step": 6240 }, { "epoch": 1.5, "learning_rate": 1.6999519923187712e-05, "loss": 0.3548, "step": 6250 }, { "epoch": 1.5, "learning_rate": 1.699471915506481e-05, "loss": 0.3471, "step": 6260 }, { "epoch": 1.51, "learning_rate": 1.6989918386941914e-05, "loss": 0.3064, "step": 6270 }, { "epoch": 1.51, "learning_rate": 1.6985117618819013e-05, "loss": 0.3154, "step": 6280 }, { "epoch": 1.51, "learning_rate": 1.6980316850696112e-05, "loss": 0.2231, "step": 6290 }, { "epoch": 1.51, "learning_rate": 1.6975516082573214e-05, "loss": 0.2599, "step": 6300 }, { "epoch": 1.51, "learning_rate": 1.6970715314450313e-05, "loss": 0.1981, "step": 6310 }, { "epoch": 1.52, "learning_rate": 1.6965914546327413e-05, "loss": 0.2433, "step": 6320 }, { "epoch": 1.52, "learning_rate": 1.6961113778204515e-05, "loss": 0.32, "step": 6330 }, { "epoch": 1.52, "learning_rate": 1.6956313010081614e-05, "loss": 0.2642, "step": 6340 }, { "epoch": 1.52, "learning_rate": 1.6951512241958713e-05, "loss": 0.362, "step": 6350 }, { "epoch": 1.53, "learning_rate": 1.6946711473835816e-05, "loss": 0.2901, "step": 6360 }, { "epoch": 1.53, "learning_rate": 1.6941910705712915e-05, "loss": 0.2511, "step": 6370 }, { "epoch": 1.53, "learning_rate": 1.6937109937590014e-05, "loss": 0.3144, "step": 6380 }, { "epoch": 1.53, "learning_rate": 1.6932309169467117e-05, "loss": 0.2797, "step": 6390 }, { "epoch": 1.54, "learning_rate": 1.6927508401344216e-05, "loss": 0.5936, "step": 6400 }, { "epoch": 1.54, "learning_rate": 1.6922707633221315e-05, "loss": 0.2537, "step": 6410 }, { "epoch": 1.54, "learning_rate": 1.6917906865098418e-05, "loss": 0.2856, "step": 6420 }, { "epoch": 1.54, "learning_rate": 1.6913106096975517e-05, "loss": 0.3406, "step": 6430 }, { "epoch": 1.55, "learning_rate": 1.690830532885262e-05, "loss": 0.219, "step": 6440 }, { "epoch": 1.55, "learning_rate": 1.690350456072972e-05, "loss": 0.3206, "step": 6450 }, { "epoch": 1.55, "learning_rate": 1.6898703792606818e-05, "loss": 0.3433, "step": 6460 }, { "epoch": 1.55, "learning_rate": 1.689390302448392e-05, "loss": 0.356, "step": 6470 }, { "epoch": 1.56, "learning_rate": 1.688910225636102e-05, "loss": 0.2436, "step": 6480 }, { "epoch": 1.56, "learning_rate": 1.688430148823812e-05, "loss": 0.2651, "step": 6490 }, { "epoch": 1.56, "learning_rate": 1.687950072011522e-05, "loss": 0.2662, "step": 6500 }, { "epoch": 1.56, "learning_rate": 1.687469995199232e-05, "loss": 0.3791, "step": 6510 }, { "epoch": 1.57, "learning_rate": 1.686989918386942e-05, "loss": 0.2771, "step": 6520 }, { "epoch": 1.57, "learning_rate": 1.6865098415746522e-05, "loss": 0.2927, "step": 6530 }, { "epoch": 1.57, "learning_rate": 1.686029764762362e-05, "loss": 0.2066, "step": 6540 }, { "epoch": 1.57, "learning_rate": 1.685549687950072e-05, "loss": 0.1953, "step": 6550 }, { "epoch": 1.57, "learning_rate": 1.6850696111377823e-05, "loss": 0.1433, "step": 6560 }, { "epoch": 1.58, "learning_rate": 1.6845895343254922e-05, "loss": 0.3811, "step": 6570 }, { "epoch": 1.58, "learning_rate": 1.684109457513202e-05, "loss": 0.2226, "step": 6580 }, { "epoch": 1.58, "learning_rate": 1.6836293807009124e-05, "loss": 0.2424, "step": 6590 }, { "epoch": 1.58, "learning_rate": 1.6831493038886223e-05, "loss": 0.3268, "step": 6600 }, { "epoch": 1.59, "learning_rate": 1.6826692270763322e-05, "loss": 0.2614, "step": 6610 }, { "epoch": 1.59, "learning_rate": 1.6821891502640424e-05, "loss": 0.3407, "step": 6620 }, { "epoch": 1.59, "learning_rate": 1.6817090734517524e-05, "loss": 0.1865, "step": 6630 }, { "epoch": 1.59, "learning_rate": 1.6812289966394623e-05, "loss": 0.3205, "step": 6640 }, { "epoch": 1.6, "learning_rate": 1.6807489198271725e-05, "loss": 0.2809, "step": 6650 }, { "epoch": 1.6, "learning_rate": 1.6802688430148824e-05, "loss": 0.2641, "step": 6660 }, { "epoch": 1.6, "learning_rate": 1.6797887662025924e-05, "loss": 0.2858, "step": 6670 }, { "epoch": 1.6, "learning_rate": 1.6793086893903026e-05, "loss": 0.119, "step": 6680 }, { "epoch": 1.61, "learning_rate": 1.6788286125780125e-05, "loss": 0.2279, "step": 6690 }, { "epoch": 1.61, "learning_rate": 1.6783485357657224e-05, "loss": 0.3126, "step": 6700 }, { "epoch": 1.61, "learning_rate": 1.6778684589534327e-05, "loss": 0.2901, "step": 6710 }, { "epoch": 1.61, "learning_rate": 1.6773883821411426e-05, "loss": 0.288, "step": 6720 }, { "epoch": 1.62, "learning_rate": 1.676908305328853e-05, "loss": 0.208, "step": 6730 }, { "epoch": 1.62, "learning_rate": 1.6764282285165628e-05, "loss": 0.1229, "step": 6740 }, { "epoch": 1.62, "learning_rate": 1.6759481517042727e-05, "loss": 0.5133, "step": 6750 }, { "epoch": 1.62, "learning_rate": 1.675468074891983e-05, "loss": 0.3316, "step": 6760 }, { "epoch": 1.63, "learning_rate": 1.674987998079693e-05, "loss": 0.295, "step": 6770 }, { "epoch": 1.63, "learning_rate": 1.6745079212674028e-05, "loss": 0.2996, "step": 6780 }, { "epoch": 1.63, "learning_rate": 1.674027844455113e-05, "loss": 0.1774, "step": 6790 }, { "epoch": 1.63, "learning_rate": 1.673547767642823e-05, "loss": 0.3722, "step": 6800 }, { "epoch": 1.63, "learning_rate": 1.673067690830533e-05, "loss": 0.0939, "step": 6810 }, { "epoch": 1.64, "learning_rate": 1.672587614018243e-05, "loss": 0.4576, "step": 6820 }, { "epoch": 1.64, "learning_rate": 1.672107537205953e-05, "loss": 0.2684, "step": 6830 }, { "epoch": 1.64, "learning_rate": 1.671627460393663e-05, "loss": 0.2958, "step": 6840 }, { "epoch": 1.64, "learning_rate": 1.6711473835813732e-05, "loss": 0.2087, "step": 6850 }, { "epoch": 1.65, "learning_rate": 1.6706673067690834e-05, "loss": 0.3335, "step": 6860 }, { "epoch": 1.65, "learning_rate": 1.670187229956793e-05, "loss": 0.1949, "step": 6870 }, { "epoch": 1.65, "learning_rate": 1.6697071531445033e-05, "loss": 0.2866, "step": 6880 }, { "epoch": 1.65, "learning_rate": 1.6692270763322135e-05, "loss": 0.2533, "step": 6890 }, { "epoch": 1.66, "learning_rate": 1.668746999519923e-05, "loss": 0.3213, "step": 6900 }, { "epoch": 1.66, "learning_rate": 1.6682669227076334e-05, "loss": 0.2166, "step": 6910 }, { "epoch": 1.66, "learning_rate": 1.6677868458953436e-05, "loss": 0.357, "step": 6920 }, { "epoch": 1.66, "learning_rate": 1.6673067690830532e-05, "loss": 0.1333, "step": 6930 }, { "epoch": 1.67, "learning_rate": 1.6668266922707634e-05, "loss": 0.3074, "step": 6940 }, { "epoch": 1.67, "learning_rate": 1.6663466154584737e-05, "loss": 0.1646, "step": 6950 }, { "epoch": 1.67, "learning_rate": 1.6658665386461833e-05, "loss": 0.121, "step": 6960 }, { "epoch": 1.67, "learning_rate": 1.6653864618338935e-05, "loss": 0.3188, "step": 6970 }, { "epoch": 1.68, "learning_rate": 1.6649063850216038e-05, "loss": 0.2299, "step": 6980 }, { "epoch": 1.68, "learning_rate": 1.6644263082093137e-05, "loss": 0.3709, "step": 6990 }, { "epoch": 1.68, "learning_rate": 1.6639462313970236e-05, "loss": 0.3846, "step": 7000 }, { "epoch": 1.68, "learning_rate": 1.663466154584734e-05, "loss": 0.226, "step": 7010 }, { "epoch": 1.69, "learning_rate": 1.6629860777724438e-05, "loss": 0.1858, "step": 7020 }, { "epoch": 1.69, "learning_rate": 1.6625060009601537e-05, "loss": 0.3426, "step": 7030 }, { "epoch": 1.69, "learning_rate": 1.662025924147864e-05, "loss": 0.2819, "step": 7040 }, { "epoch": 1.69, "learning_rate": 1.661545847335574e-05, "loss": 0.2335, "step": 7050 }, { "epoch": 1.69, "learning_rate": 1.6610657705232838e-05, "loss": 0.3592, "step": 7060 }, { "epoch": 1.7, "learning_rate": 1.660585693710994e-05, "loss": 0.2479, "step": 7070 }, { "epoch": 1.7, "learning_rate": 1.660105616898704e-05, "loss": 0.2644, "step": 7080 }, { "epoch": 1.7, "learning_rate": 1.659625540086414e-05, "loss": 0.1601, "step": 7090 }, { "epoch": 1.7, "learning_rate": 1.659145463274124e-05, "loss": 0.2437, "step": 7100 }, { "epoch": 1.71, "learning_rate": 1.658665386461834e-05, "loss": 0.2847, "step": 7110 }, { "epoch": 1.71, "learning_rate": 1.6581853096495443e-05, "loss": 0.2417, "step": 7120 }, { "epoch": 1.71, "learning_rate": 1.6577052328372542e-05, "loss": 0.2989, "step": 7130 }, { "epoch": 1.71, "learning_rate": 1.657225156024964e-05, "loss": 0.1034, "step": 7140 }, { "epoch": 1.72, "learning_rate": 1.6567450792126744e-05, "loss": 0.1075, "step": 7150 }, { "epoch": 1.72, "learning_rate": 1.6562650024003843e-05, "loss": 0.1714, "step": 7160 }, { "epoch": 1.72, "learning_rate": 1.6557849255880942e-05, "loss": 0.3407, "step": 7170 }, { "epoch": 1.72, "learning_rate": 1.6553048487758044e-05, "loss": 0.1231, "step": 7180 }, { "epoch": 1.73, "learning_rate": 1.6548247719635144e-05, "loss": 0.3359, "step": 7190 }, { "epoch": 1.73, "learning_rate": 1.6543446951512243e-05, "loss": 0.1883, "step": 7200 }, { "epoch": 1.73, "learning_rate": 1.6538646183389345e-05, "loss": 0.4074, "step": 7210 }, { "epoch": 1.73, "learning_rate": 1.6533845415266444e-05, "loss": 0.1621, "step": 7220 }, { "epoch": 1.74, "learning_rate": 1.6529044647143544e-05, "loss": 0.2488, "step": 7230 }, { "epoch": 1.74, "learning_rate": 1.6524243879020646e-05, "loss": 0.29, "step": 7240 }, { "epoch": 1.74, "learning_rate": 1.6519443110897745e-05, "loss": 0.4122, "step": 7250 }, { "epoch": 1.74, "learning_rate": 1.6514642342774844e-05, "loss": 0.3558, "step": 7260 }, { "epoch": 1.75, "learning_rate": 1.6509841574651947e-05, "loss": 0.2255, "step": 7270 }, { "epoch": 1.75, "learning_rate": 1.6505040806529046e-05, "loss": 0.1434, "step": 7280 }, { "epoch": 1.75, "learning_rate": 1.6500240038406145e-05, "loss": 0.2075, "step": 7290 }, { "epoch": 1.75, "learning_rate": 1.6495439270283248e-05, "loss": 0.1391, "step": 7300 }, { "epoch": 1.75, "learning_rate": 1.6490638502160347e-05, "loss": 0.2247, "step": 7310 }, { "epoch": 1.76, "learning_rate": 1.6485837734037446e-05, "loss": 0.1813, "step": 7320 }, { "epoch": 1.76, "learning_rate": 1.648103696591455e-05, "loss": 0.284, "step": 7330 }, { "epoch": 1.76, "learning_rate": 1.6476236197791648e-05, "loss": 0.1173, "step": 7340 }, { "epoch": 1.76, "learning_rate": 1.6471435429668747e-05, "loss": 0.1648, "step": 7350 }, { "epoch": 1.77, "learning_rate": 1.646663466154585e-05, "loss": 0.1874, "step": 7360 }, { "epoch": 1.77, "learning_rate": 1.646183389342295e-05, "loss": 0.4147, "step": 7370 }, { "epoch": 1.77, "learning_rate": 1.645703312530005e-05, "loss": 0.1736, "step": 7380 }, { "epoch": 1.77, "learning_rate": 1.645223235717715e-05, "loss": 0.1742, "step": 7390 }, { "epoch": 1.78, "learning_rate": 1.644743158905425e-05, "loss": 0.2497, "step": 7400 }, { "epoch": 1.78, "learning_rate": 1.6442630820931352e-05, "loss": 0.3118, "step": 7410 }, { "epoch": 1.78, "learning_rate": 1.643783005280845e-05, "loss": 0.2658, "step": 7420 }, { "epoch": 1.78, "learning_rate": 1.643302928468555e-05, "loss": 0.2131, "step": 7430 }, { "epoch": 1.79, "learning_rate": 1.6428228516562653e-05, "loss": 0.4354, "step": 7440 }, { "epoch": 1.79, "learning_rate": 1.6423427748439752e-05, "loss": 0.1038, "step": 7450 }, { "epoch": 1.79, "learning_rate": 1.641862698031685e-05, "loss": 0.3985, "step": 7460 }, { "epoch": 1.79, "learning_rate": 1.6413826212193954e-05, "loss": 0.2182, "step": 7470 }, { "epoch": 1.8, "learning_rate": 1.6409025444071053e-05, "loss": 0.1926, "step": 7480 }, { "epoch": 1.8, "learning_rate": 1.6404224675948152e-05, "loss": 0.293, "step": 7490 }, { "epoch": 1.8, "learning_rate": 1.6399423907825255e-05, "loss": 0.222, "step": 7500 }, { "epoch": 1.8, "learning_rate": 1.6394623139702354e-05, "loss": 0.333, "step": 7510 }, { "epoch": 1.81, "learning_rate": 1.6389822371579453e-05, "loss": 0.3317, "step": 7520 }, { "epoch": 1.81, "learning_rate": 1.6385021603456555e-05, "loss": 0.1311, "step": 7530 }, { "epoch": 1.81, "learning_rate": 1.6380220835333655e-05, "loss": 0.2559, "step": 7540 }, { "epoch": 1.81, "learning_rate": 1.6375420067210754e-05, "loss": 0.1644, "step": 7550 }, { "epoch": 1.81, "learning_rate": 1.6370619299087856e-05, "loss": 0.356, "step": 7560 }, { "epoch": 1.82, "learning_rate": 1.6365818530964955e-05, "loss": 0.2955, "step": 7570 }, { "epoch": 1.82, "learning_rate": 1.6361017762842054e-05, "loss": 0.2048, "step": 7580 }, { "epoch": 1.82, "learning_rate": 1.6356216994719157e-05, "loss": 0.1993, "step": 7590 }, { "epoch": 1.82, "learning_rate": 1.6351416226596256e-05, "loss": 0.2183, "step": 7600 }, { "epoch": 1.83, "learning_rate": 1.6346615458473355e-05, "loss": 0.27, "step": 7610 }, { "epoch": 1.83, "learning_rate": 1.6341814690350458e-05, "loss": 0.2941, "step": 7620 }, { "epoch": 1.83, "learning_rate": 1.6337013922227557e-05, "loss": 0.314, "step": 7630 }, { "epoch": 1.83, "learning_rate": 1.633221315410466e-05, "loss": 0.1382, "step": 7640 }, { "epoch": 1.84, "learning_rate": 1.632741238598176e-05, "loss": 0.2129, "step": 7650 }, { "epoch": 1.84, "learning_rate": 1.6322611617858858e-05, "loss": 0.3042, "step": 7660 }, { "epoch": 1.84, "learning_rate": 1.631781084973596e-05, "loss": 0.1912, "step": 7670 }, { "epoch": 1.84, "learning_rate": 1.631301008161306e-05, "loss": 0.1291, "step": 7680 }, { "epoch": 1.85, "learning_rate": 1.630820931349016e-05, "loss": 0.1602, "step": 7690 }, { "epoch": 1.85, "learning_rate": 1.630340854536726e-05, "loss": 0.1936, "step": 7700 }, { "epoch": 1.85, "learning_rate": 1.629860777724436e-05, "loss": 0.2681, "step": 7710 }, { "epoch": 1.85, "learning_rate": 1.629380700912146e-05, "loss": 0.2612, "step": 7720 }, { "epoch": 1.86, "learning_rate": 1.6289006240998562e-05, "loss": 0.149, "step": 7730 }, { "epoch": 1.86, "learning_rate": 1.628420547287566e-05, "loss": 0.3117, "step": 7740 }, { "epoch": 1.86, "learning_rate": 1.627940470475276e-05, "loss": 0.2836, "step": 7750 }, { "epoch": 1.86, "learning_rate": 1.6274603936629863e-05, "loss": 0.2682, "step": 7760 }, { "epoch": 1.87, "learning_rate": 1.6269803168506962e-05, "loss": 0.2707, "step": 7770 }, { "epoch": 1.87, "learning_rate": 1.626500240038406e-05, "loss": 0.3505, "step": 7780 }, { "epoch": 1.87, "learning_rate": 1.6260201632261164e-05, "loss": 0.2875, "step": 7790 }, { "epoch": 1.87, "learning_rate": 1.6255400864138263e-05, "loss": 0.2281, "step": 7800 }, { "epoch": 1.87, "learning_rate": 1.6250600096015362e-05, "loss": 0.1715, "step": 7810 }, { "epoch": 1.88, "learning_rate": 1.6245799327892465e-05, "loss": 0.2805, "step": 7820 }, { "epoch": 1.88, "learning_rate": 1.6240998559769564e-05, "loss": 0.2698, "step": 7830 }, { "epoch": 1.88, "learning_rate": 1.6236197791646663e-05, "loss": 0.2783, "step": 7840 }, { "epoch": 1.88, "learning_rate": 1.6231397023523765e-05, "loss": 0.188, "step": 7850 }, { "epoch": 1.89, "learning_rate": 1.6226596255400865e-05, "loss": 0.3296, "step": 7860 }, { "epoch": 1.89, "learning_rate": 1.6221795487277964e-05, "loss": 0.1835, "step": 7870 }, { "epoch": 1.89, "learning_rate": 1.6216994719155066e-05, "loss": 0.1733, "step": 7880 }, { "epoch": 1.89, "learning_rate": 1.6212193951032165e-05, "loss": 0.1842, "step": 7890 }, { "epoch": 1.9, "learning_rate": 1.6207393182909268e-05, "loss": 0.2846, "step": 7900 }, { "epoch": 1.9, "learning_rate": 1.6202592414786367e-05, "loss": 0.1461, "step": 7910 }, { "epoch": 1.9, "learning_rate": 1.6197791646663466e-05, "loss": 0.266, "step": 7920 }, { "epoch": 1.9, "learning_rate": 1.619299087854057e-05, "loss": 0.2243, "step": 7930 }, { "epoch": 1.91, "learning_rate": 1.6188190110417668e-05, "loss": 0.1376, "step": 7940 }, { "epoch": 1.91, "learning_rate": 1.6183389342294767e-05, "loss": 0.115, "step": 7950 }, { "epoch": 1.91, "learning_rate": 1.617858857417187e-05, "loss": 0.0298, "step": 7960 }, { "epoch": 1.91, "learning_rate": 1.617378780604897e-05, "loss": 0.0588, "step": 7970 }, { "epoch": 1.92, "learning_rate": 1.6168987037926068e-05, "loss": 0.1534, "step": 7980 }, { "epoch": 1.92, "learning_rate": 1.616418626980317e-05, "loss": 0.3296, "step": 7990 }, { "epoch": 1.92, "learning_rate": 1.615938550168027e-05, "loss": 0.0936, "step": 8000 }, { "epoch": 1.92, "learning_rate": 1.615458473355737e-05, "loss": 0.1979, "step": 8010 }, { "epoch": 1.93, "learning_rate": 1.614978396543447e-05, "loss": 0.3102, "step": 8020 }, { "epoch": 1.93, "learning_rate": 1.6144983197311574e-05, "loss": 0.1653, "step": 8030 }, { "epoch": 1.93, "learning_rate": 1.614018242918867e-05, "loss": 0.285, "step": 8040 }, { "epoch": 1.93, "learning_rate": 1.6135381661065772e-05, "loss": 0.1726, "step": 8050 }, { "epoch": 1.93, "learning_rate": 1.6130580892942875e-05, "loss": 0.1975, "step": 8060 }, { "epoch": 1.94, "learning_rate": 1.612578012481997e-05, "loss": 0.2318, "step": 8070 }, { "epoch": 1.94, "learning_rate": 1.6120979356697073e-05, "loss": 0.3342, "step": 8080 }, { "epoch": 1.94, "learning_rate": 1.6116178588574175e-05, "loss": 0.2647, "step": 8090 }, { "epoch": 1.94, "learning_rate": 1.611137782045127e-05, "loss": 0.1491, "step": 8100 }, { "epoch": 1.95, "learning_rate": 1.6106577052328374e-05, "loss": 0.1737, "step": 8110 }, { "epoch": 1.95, "learning_rate": 1.6101776284205476e-05, "loss": 0.1692, "step": 8120 }, { "epoch": 1.95, "learning_rate": 1.6096975516082572e-05, "loss": 0.2975, "step": 8130 }, { "epoch": 1.95, "learning_rate": 1.6092174747959675e-05, "loss": 0.338, "step": 8140 }, { "epoch": 1.96, "learning_rate": 1.6087373979836777e-05, "loss": 0.2175, "step": 8150 }, { "epoch": 1.96, "learning_rate": 1.6082573211713876e-05, "loss": 0.2292, "step": 8160 }, { "epoch": 1.96, "learning_rate": 1.6077772443590975e-05, "loss": 0.2528, "step": 8170 }, { "epoch": 1.96, "learning_rate": 1.6072971675468078e-05, "loss": 0.4907, "step": 8180 }, { "epoch": 1.97, "learning_rate": 1.6068170907345177e-05, "loss": 0.2025, "step": 8190 }, { "epoch": 1.97, "learning_rate": 1.6063370139222276e-05, "loss": 0.2904, "step": 8200 }, { "epoch": 1.97, "learning_rate": 1.605856937109938e-05, "loss": 0.2836, "step": 8210 }, { "epoch": 1.97, "learning_rate": 1.6053768602976478e-05, "loss": 0.092, "step": 8220 }, { "epoch": 1.98, "learning_rate": 1.6048967834853577e-05, "loss": 0.3169, "step": 8230 }, { "epoch": 1.98, "learning_rate": 1.604416706673068e-05, "loss": 0.2997, "step": 8240 }, { "epoch": 1.98, "learning_rate": 1.603936629860778e-05, "loss": 0.1031, "step": 8250 }, { "epoch": 1.98, "learning_rate": 1.6034565530484878e-05, "loss": 0.1372, "step": 8260 }, { "epoch": 1.99, "learning_rate": 1.602976476236198e-05, "loss": 0.2871, "step": 8270 }, { "epoch": 1.99, "learning_rate": 1.602496399423908e-05, "loss": 0.3045, "step": 8280 }, { "epoch": 1.99, "learning_rate": 1.602016322611618e-05, "loss": 0.2663, "step": 8290 }, { "epoch": 1.99, "learning_rate": 1.601536245799328e-05, "loss": 0.0791, "step": 8300 }, { "epoch": 1.99, "learning_rate": 1.601056168987038e-05, "loss": 0.2043, "step": 8310 }, { "epoch": 2.0, "learning_rate": 1.6005760921747483e-05, "loss": 0.2235, "step": 8320 }, { "epoch": 2.0, "learning_rate": 1.6000960153624582e-05, "loss": 0.2659, "step": 8330 }, { "epoch": 2.0, "eval_accuracy": 0.9899693981638898, "eval_loss": 0.04719490185379982, "eval_runtime": 513.7691, "eval_samples_per_second": 11.449, "eval_steps_per_second": 1.433, "step": 8332 }, { "epoch": 2.0, "learning_rate": 1.599615938550168e-05, "loss": 0.2035, "step": 8340 }, { "epoch": 2.0, "learning_rate": 1.5991358617378784e-05, "loss": 0.3643, "step": 8350 }, { "epoch": 2.01, "learning_rate": 1.5986557849255883e-05, "loss": 0.2984, "step": 8360 }, { "epoch": 2.01, "learning_rate": 1.5981757081132982e-05, "loss": 0.4262, "step": 8370 }, { "epoch": 2.01, "learning_rate": 1.5976956313010085e-05, "loss": 0.2335, "step": 8380 }, { "epoch": 2.01, "learning_rate": 1.5972155544887184e-05, "loss": 0.2319, "step": 8390 }, { "epoch": 2.02, "learning_rate": 1.5967354776764283e-05, "loss": 0.1588, "step": 8400 }, { "epoch": 2.02, "learning_rate": 1.5962554008641386e-05, "loss": 0.2065, "step": 8410 }, { "epoch": 2.02, "learning_rate": 1.5957753240518485e-05, "loss": 0.1686, "step": 8420 }, { "epoch": 2.02, "learning_rate": 1.5952952472395584e-05, "loss": 0.3103, "step": 8430 }, { "epoch": 2.03, "learning_rate": 1.5948151704272686e-05, "loss": 0.2384, "step": 8440 }, { "epoch": 2.03, "learning_rate": 1.5943350936149785e-05, "loss": 0.166, "step": 8450 }, { "epoch": 2.03, "learning_rate": 1.5938550168026885e-05, "loss": 0.1075, "step": 8460 }, { "epoch": 2.03, "learning_rate": 1.5933749399903987e-05, "loss": 0.083, "step": 8470 }, { "epoch": 2.04, "learning_rate": 1.5928948631781086e-05, "loss": 0.2055, "step": 8480 }, { "epoch": 2.04, "learning_rate": 1.5924147863658185e-05, "loss": 0.3414, "step": 8490 }, { "epoch": 2.04, "learning_rate": 1.5919347095535288e-05, "loss": 0.1712, "step": 8500 }, { "epoch": 2.04, "learning_rate": 1.5914546327412387e-05, "loss": 0.3873, "step": 8510 }, { "epoch": 2.05, "learning_rate": 1.5909745559289486e-05, "loss": 0.1031, "step": 8520 }, { "epoch": 2.05, "learning_rate": 1.590494479116659e-05, "loss": 0.378, "step": 8530 }, { "epoch": 2.05, "learning_rate": 1.5900144023043688e-05, "loss": 0.2487, "step": 8540 }, { "epoch": 2.05, "learning_rate": 1.5895343254920787e-05, "loss": 0.0604, "step": 8550 }, { "epoch": 2.05, "learning_rate": 1.589054248679789e-05, "loss": 0.2216, "step": 8560 }, { "epoch": 2.06, "learning_rate": 1.588574171867499e-05, "loss": 0.2217, "step": 8570 }, { "epoch": 2.06, "learning_rate": 1.588094095055209e-05, "loss": 0.3557, "step": 8580 }, { "epoch": 2.06, "learning_rate": 1.587614018242919e-05, "loss": 0.3425, "step": 8590 }, { "epoch": 2.06, "learning_rate": 1.587133941430629e-05, "loss": 0.2533, "step": 8600 }, { "epoch": 2.07, "learning_rate": 1.5866538646183392e-05, "loss": 0.3007, "step": 8610 }, { "epoch": 2.07, "learning_rate": 1.586173787806049e-05, "loss": 0.32, "step": 8620 }, { "epoch": 2.07, "learning_rate": 1.585693710993759e-05, "loss": 0.2033, "step": 8630 }, { "epoch": 2.07, "learning_rate": 1.5852136341814693e-05, "loss": 0.4156, "step": 8640 }, { "epoch": 2.08, "learning_rate": 1.5847335573691792e-05, "loss": 0.1899, "step": 8650 }, { "epoch": 2.08, "learning_rate": 1.584253480556889e-05, "loss": 0.342, "step": 8660 }, { "epoch": 2.08, "learning_rate": 1.5837734037445994e-05, "loss": 0.2475, "step": 8670 }, { "epoch": 2.08, "learning_rate": 1.5832933269323093e-05, "loss": 0.3728, "step": 8680 }, { "epoch": 2.09, "learning_rate": 1.5828132501200192e-05, "loss": 0.2888, "step": 8690 }, { "epoch": 2.09, "learning_rate": 1.5823331733077295e-05, "loss": 0.2512, "step": 8700 }, { "epoch": 2.09, "learning_rate": 1.5818530964954394e-05, "loss": 0.1717, "step": 8710 }, { "epoch": 2.09, "learning_rate": 1.5813730196831493e-05, "loss": 0.1767, "step": 8720 }, { "epoch": 2.1, "learning_rate": 1.5808929428708596e-05, "loss": 0.224, "step": 8730 }, { "epoch": 2.1, "learning_rate": 1.5804128660585695e-05, "loss": 0.1161, "step": 8740 }, { "epoch": 2.1, "learning_rate": 1.5799327892462794e-05, "loss": 0.3446, "step": 8750 }, { "epoch": 2.1, "learning_rate": 1.5794527124339896e-05, "loss": 0.2593, "step": 8760 }, { "epoch": 2.11, "learning_rate": 1.5789726356216996e-05, "loss": 0.3028, "step": 8770 }, { "epoch": 2.11, "learning_rate": 1.5784925588094095e-05, "loss": 0.2327, "step": 8780 }, { "epoch": 2.11, "learning_rate": 1.5780124819971197e-05, "loss": 0.216, "step": 8790 }, { "epoch": 2.11, "learning_rate": 1.5775324051848296e-05, "loss": 0.3009, "step": 8800 }, { "epoch": 2.11, "learning_rate": 1.5770523283725396e-05, "loss": 0.2319, "step": 8810 }, { "epoch": 2.12, "learning_rate": 1.5765722515602498e-05, "loss": 0.2915, "step": 8820 }, { "epoch": 2.12, "learning_rate": 1.5760921747479597e-05, "loss": 0.168, "step": 8830 }, { "epoch": 2.12, "learning_rate": 1.57561209793567e-05, "loss": 0.083, "step": 8840 }, { "epoch": 2.12, "learning_rate": 1.57513202112338e-05, "loss": 0.222, "step": 8850 }, { "epoch": 2.13, "learning_rate": 1.5746519443110898e-05, "loss": 0.1219, "step": 8860 }, { "epoch": 2.13, "learning_rate": 1.5741718674988e-05, "loss": 0.1796, "step": 8870 }, { "epoch": 2.13, "learning_rate": 1.57369179068651e-05, "loss": 0.1524, "step": 8880 }, { "epoch": 2.13, "learning_rate": 1.57321171387422e-05, "loss": 0.1782, "step": 8890 }, { "epoch": 2.14, "learning_rate": 1.57273163706193e-05, "loss": 0.1206, "step": 8900 }, { "epoch": 2.14, "learning_rate": 1.57225156024964e-05, "loss": 0.096, "step": 8910 }, { "epoch": 2.14, "learning_rate": 1.57177148343735e-05, "loss": 0.3113, "step": 8920 }, { "epoch": 2.14, "learning_rate": 1.5712914066250602e-05, "loss": 0.3569, "step": 8930 }, { "epoch": 2.15, "learning_rate": 1.57081132981277e-05, "loss": 0.1468, "step": 8940 }, { "epoch": 2.15, "learning_rate": 1.57033125300048e-05, "loss": 0.3133, "step": 8950 }, { "epoch": 2.15, "learning_rate": 1.5698511761881903e-05, "loss": 0.2364, "step": 8960 }, { "epoch": 2.15, "learning_rate": 1.5693710993759002e-05, "loss": 0.3481, "step": 8970 }, { "epoch": 2.16, "learning_rate": 1.56889102256361e-05, "loss": 0.2658, "step": 8980 }, { "epoch": 2.16, "learning_rate": 1.5684109457513204e-05, "loss": 0.2087, "step": 8990 }, { "epoch": 2.16, "learning_rate": 1.5679308689390303e-05, "loss": 0.1101, "step": 9000 }, { "epoch": 2.16, "learning_rate": 1.5674507921267402e-05, "loss": 0.1522, "step": 9010 }, { "epoch": 2.17, "learning_rate": 1.5669707153144505e-05, "loss": 0.2566, "step": 9020 }, { "epoch": 2.17, "learning_rate": 1.5664906385021604e-05, "loss": 0.1457, "step": 9030 }, { "epoch": 2.17, "learning_rate": 1.5660105616898703e-05, "loss": 0.1996, "step": 9040 }, { "epoch": 2.17, "learning_rate": 1.5655304848775806e-05, "loss": 0.198, "step": 9050 }, { "epoch": 2.17, "learning_rate": 1.5650504080652905e-05, "loss": 0.1476, "step": 9060 }, { "epoch": 2.18, "learning_rate": 1.5645703312530004e-05, "loss": 0.2568, "step": 9070 }, { "epoch": 2.18, "learning_rate": 1.5640902544407106e-05, "loss": 0.186, "step": 9080 }, { "epoch": 2.18, "learning_rate": 1.5636101776284206e-05, "loss": 0.141, "step": 9090 }, { "epoch": 2.18, "learning_rate": 1.5631301008161308e-05, "loss": 0.2253, "step": 9100 }, { "epoch": 2.19, "learning_rate": 1.5626500240038407e-05, "loss": 0.0886, "step": 9110 }, { "epoch": 2.19, "learning_rate": 1.5621699471915506e-05, "loss": 0.1932, "step": 9120 }, { "epoch": 2.19, "learning_rate": 1.561689870379261e-05, "loss": 0.3795, "step": 9130 }, { "epoch": 2.19, "learning_rate": 1.5612097935669708e-05, "loss": 0.203, "step": 9140 }, { "epoch": 2.2, "learning_rate": 1.5607297167546807e-05, "loss": 0.2419, "step": 9150 }, { "epoch": 2.2, "learning_rate": 1.560249639942391e-05, "loss": 0.2311, "step": 9160 }, { "epoch": 2.2, "learning_rate": 1.559769563130101e-05, "loss": 0.2893, "step": 9170 }, { "epoch": 2.2, "learning_rate": 1.5592894863178108e-05, "loss": 0.1757, "step": 9180 }, { "epoch": 2.21, "learning_rate": 1.558809409505521e-05, "loss": 0.1514, "step": 9190 }, { "epoch": 2.21, "learning_rate": 1.558329332693231e-05, "loss": 0.2027, "step": 9200 }, { "epoch": 2.21, "learning_rate": 1.557849255880941e-05, "loss": 0.2129, "step": 9210 }, { "epoch": 2.21, "learning_rate": 1.557369179068651e-05, "loss": 0.1661, "step": 9220 }, { "epoch": 2.22, "learning_rate": 1.5568891022563614e-05, "loss": 0.2077, "step": 9230 }, { "epoch": 2.22, "learning_rate": 1.556409025444071e-05, "loss": 0.1438, "step": 9240 }, { "epoch": 2.22, "learning_rate": 1.5559289486317812e-05, "loss": 0.2247, "step": 9250 }, { "epoch": 2.22, "learning_rate": 1.5554488718194915e-05, "loss": 0.0931, "step": 9260 }, { "epoch": 2.23, "learning_rate": 1.554968795007201e-05, "loss": 0.2027, "step": 9270 }, { "epoch": 2.23, "learning_rate": 1.5544887181949113e-05, "loss": 0.2568, "step": 9280 }, { "epoch": 2.23, "learning_rate": 1.5540086413826216e-05, "loss": 0.1005, "step": 9290 }, { "epoch": 2.23, "learning_rate": 1.553528564570331e-05, "loss": 0.149, "step": 9300 }, { "epoch": 2.23, "learning_rate": 1.5530484877580414e-05, "loss": 0.2124, "step": 9310 }, { "epoch": 2.24, "learning_rate": 1.5525684109457517e-05, "loss": 0.3563, "step": 9320 }, { "epoch": 2.24, "learning_rate": 1.5520883341334612e-05, "loss": 0.3381, "step": 9330 }, { "epoch": 2.24, "learning_rate": 1.5516082573211715e-05, "loss": 0.1943, "step": 9340 }, { "epoch": 2.24, "learning_rate": 1.5511281805088817e-05, "loss": 0.0356, "step": 9350 }, { "epoch": 2.25, "learning_rate": 1.5506481036965916e-05, "loss": 0.1956, "step": 9360 }, { "epoch": 2.25, "learning_rate": 1.5501680268843016e-05, "loss": 0.3598, "step": 9370 }, { "epoch": 2.25, "learning_rate": 1.5496879500720118e-05, "loss": 0.2397, "step": 9380 }, { "epoch": 2.25, "learning_rate": 1.5492078732597217e-05, "loss": 0.3346, "step": 9390 }, { "epoch": 2.26, "learning_rate": 1.5487277964474316e-05, "loss": 0.0372, "step": 9400 }, { "epoch": 2.26, "learning_rate": 1.548247719635142e-05, "loss": 0.2807, "step": 9410 }, { "epoch": 2.26, "learning_rate": 1.5477676428228518e-05, "loss": 0.1641, "step": 9420 }, { "epoch": 2.26, "learning_rate": 1.5472875660105617e-05, "loss": 0.2699, "step": 9430 }, { "epoch": 2.27, "learning_rate": 1.546807489198272e-05, "loss": 0.1725, "step": 9440 }, { "epoch": 2.27, "learning_rate": 1.546327412385982e-05, "loss": 0.1684, "step": 9450 }, { "epoch": 2.27, "learning_rate": 1.5458473355736918e-05, "loss": 0.2307, "step": 9460 }, { "epoch": 2.27, "learning_rate": 1.545367258761402e-05, "loss": 0.0479, "step": 9470 }, { "epoch": 2.28, "learning_rate": 1.544887181949112e-05, "loss": 0.1937, "step": 9480 }, { "epoch": 2.28, "learning_rate": 1.5444071051368222e-05, "loss": 0.076, "step": 9490 }, { "epoch": 2.28, "learning_rate": 1.543927028324532e-05, "loss": 0.2921, "step": 9500 }, { "epoch": 2.28, "learning_rate": 1.543446951512242e-05, "loss": 0.1695, "step": 9510 }, { "epoch": 2.29, "learning_rate": 1.5429668746999523e-05, "loss": 0.4266, "step": 9520 }, { "epoch": 2.29, "learning_rate": 1.5424867978876622e-05, "loss": 0.3545, "step": 9530 }, { "epoch": 2.29, "learning_rate": 1.542006721075372e-05, "loss": 0.0763, "step": 9540 }, { "epoch": 2.29, "learning_rate": 1.5415266442630824e-05, "loss": 0.1396, "step": 9550 }, { "epoch": 2.29, "learning_rate": 1.5410465674507923e-05, "loss": 0.345, "step": 9560 }, { "epoch": 2.3, "learning_rate": 1.5405664906385022e-05, "loss": 0.1957, "step": 9570 }, { "epoch": 2.3, "learning_rate": 1.5400864138262125e-05, "loss": 0.2812, "step": 9580 }, { "epoch": 2.3, "learning_rate": 1.5396063370139224e-05, "loss": 0.2335, "step": 9590 }, { "epoch": 2.3, "learning_rate": 1.5391262602016323e-05, "loss": 0.3567, "step": 9600 }, { "epoch": 2.31, "learning_rate": 1.5386461833893426e-05, "loss": 0.1948, "step": 9610 }, { "epoch": 2.31, "learning_rate": 1.5381661065770525e-05, "loss": 0.146, "step": 9620 }, { "epoch": 2.31, "learning_rate": 1.5376860297647624e-05, "loss": 0.293, "step": 9630 }, { "epoch": 2.31, "learning_rate": 1.5372059529524727e-05, "loss": 0.1375, "step": 9640 }, { "epoch": 2.32, "learning_rate": 1.5367258761401826e-05, "loss": 0.1077, "step": 9650 }, { "epoch": 2.32, "learning_rate": 1.5362457993278925e-05, "loss": 0.1453, "step": 9660 }, { "epoch": 2.32, "learning_rate": 1.5357657225156027e-05, "loss": 0.2481, "step": 9670 }, { "epoch": 2.32, "learning_rate": 1.5352856457033127e-05, "loss": 0.2042, "step": 9680 }, { "epoch": 2.33, "learning_rate": 1.5348055688910226e-05, "loss": 0.2722, "step": 9690 }, { "epoch": 2.33, "learning_rate": 1.5343254920787328e-05, "loss": 0.2392, "step": 9700 }, { "epoch": 2.33, "learning_rate": 1.5338454152664427e-05, "loss": 0.1365, "step": 9710 }, { "epoch": 2.33, "learning_rate": 1.5333653384541527e-05, "loss": 0.2481, "step": 9720 }, { "epoch": 2.34, "learning_rate": 1.532885261641863e-05, "loss": 0.1214, "step": 9730 }, { "epoch": 2.34, "learning_rate": 1.5324051848295728e-05, "loss": 0.1552, "step": 9740 }, { "epoch": 2.34, "learning_rate": 1.5319251080172827e-05, "loss": 0.2478, "step": 9750 }, { "epoch": 2.34, "learning_rate": 1.531445031204993e-05, "loss": 0.2395, "step": 9760 }, { "epoch": 2.35, "learning_rate": 1.530964954392703e-05, "loss": 0.2827, "step": 9770 }, { "epoch": 2.35, "learning_rate": 1.530484877580413e-05, "loss": 0.0382, "step": 9780 }, { "epoch": 2.35, "learning_rate": 1.530004800768123e-05, "loss": 0.1288, "step": 9790 }, { "epoch": 2.35, "learning_rate": 1.529524723955833e-05, "loss": 0.0809, "step": 9800 }, { "epoch": 2.35, "learning_rate": 1.5290446471435432e-05, "loss": 0.3355, "step": 9810 }, { "epoch": 2.36, "learning_rate": 1.528564570331253e-05, "loss": 0.2088, "step": 9820 }, { "epoch": 2.36, "learning_rate": 1.528084493518963e-05, "loss": 0.0543, "step": 9830 }, { "epoch": 2.36, "learning_rate": 1.5276044167066733e-05, "loss": 0.1086, "step": 9840 }, { "epoch": 2.36, "learning_rate": 1.5271243398943832e-05, "loss": 0.1396, "step": 9850 }, { "epoch": 2.37, "learning_rate": 1.526644263082093e-05, "loss": 0.1955, "step": 9860 }, { "epoch": 2.37, "learning_rate": 1.5261641862698034e-05, "loss": 0.2258, "step": 9870 }, { "epoch": 2.37, "learning_rate": 1.5256841094575132e-05, "loss": 0.1816, "step": 9880 }, { "epoch": 2.37, "learning_rate": 1.5252040326452232e-05, "loss": 0.2099, "step": 9890 }, { "epoch": 2.38, "learning_rate": 1.5247239558329335e-05, "loss": 0.0644, "step": 9900 }, { "epoch": 2.38, "learning_rate": 1.5242438790206436e-05, "loss": 0.1803, "step": 9910 }, { "epoch": 2.38, "learning_rate": 1.5237638022083533e-05, "loss": 0.3613, "step": 9920 }, { "epoch": 2.38, "learning_rate": 1.5232837253960636e-05, "loss": 0.3219, "step": 9930 }, { "epoch": 2.39, "learning_rate": 1.5228036485837737e-05, "loss": 0.1716, "step": 9940 }, { "epoch": 2.39, "learning_rate": 1.5223235717714834e-05, "loss": 0.0709, "step": 9950 }, { "epoch": 2.39, "learning_rate": 1.5218434949591937e-05, "loss": 0.1417, "step": 9960 }, { "epoch": 2.39, "learning_rate": 1.5213634181469037e-05, "loss": 0.1804, "step": 9970 }, { "epoch": 2.4, "learning_rate": 1.5208833413346135e-05, "loss": 0.1785, "step": 9980 }, { "epoch": 2.4, "learning_rate": 1.5204032645223237e-05, "loss": 0.2638, "step": 9990 }, { "epoch": 2.4, "learning_rate": 1.5199231877100338e-05, "loss": 0.2374, "step": 10000 }, { "epoch": 2.4, "learning_rate": 1.5194431108977437e-05, "loss": 0.1964, "step": 10010 }, { "epoch": 2.41, "learning_rate": 1.5189630340854538e-05, "loss": 0.1756, "step": 10020 }, { "epoch": 2.41, "learning_rate": 1.5184829572731639e-05, "loss": 0.3015, "step": 10030 }, { "epoch": 2.41, "learning_rate": 1.518002880460874e-05, "loss": 0.2633, "step": 10040 }, { "epoch": 2.41, "learning_rate": 1.5175228036485839e-05, "loss": 0.2612, "step": 10050 }, { "epoch": 2.41, "learning_rate": 1.517042726836294e-05, "loss": 0.3114, "step": 10060 }, { "epoch": 2.42, "learning_rate": 1.516562650024004e-05, "loss": 0.1669, "step": 10070 }, { "epoch": 2.42, "learning_rate": 1.516082573211714e-05, "loss": 0.2757, "step": 10080 }, { "epoch": 2.42, "learning_rate": 1.515602496399424e-05, "loss": 0.4158, "step": 10090 }, { "epoch": 2.42, "learning_rate": 1.5151224195871342e-05, "loss": 0.2609, "step": 10100 }, { "epoch": 2.43, "learning_rate": 1.514642342774844e-05, "loss": 0.1338, "step": 10110 }, { "epoch": 2.43, "learning_rate": 1.5141622659625542e-05, "loss": 0.2169, "step": 10120 }, { "epoch": 2.43, "learning_rate": 1.5136821891502642e-05, "loss": 0.1438, "step": 10130 }, { "epoch": 2.43, "learning_rate": 1.5132021123379742e-05, "loss": 0.0989, "step": 10140 }, { "epoch": 2.44, "learning_rate": 1.5127220355256842e-05, "loss": 0.2776, "step": 10150 }, { "epoch": 2.44, "learning_rate": 1.5122419587133943e-05, "loss": 0.2292, "step": 10160 }, { "epoch": 2.44, "learning_rate": 1.5117618819011044e-05, "loss": 0.1841, "step": 10170 }, { "epoch": 2.44, "learning_rate": 1.5112818050888143e-05, "loss": 0.1699, "step": 10180 }, { "epoch": 2.45, "learning_rate": 1.5108017282765244e-05, "loss": 0.1345, "step": 10190 }, { "epoch": 2.45, "learning_rate": 1.5103216514642345e-05, "loss": 0.289, "step": 10200 }, { "epoch": 2.45, "learning_rate": 1.5098415746519444e-05, "loss": 0.1946, "step": 10210 }, { "epoch": 2.45, "learning_rate": 1.5093614978396545e-05, "loss": 0.1135, "step": 10220 }, { "epoch": 2.46, "learning_rate": 1.5088814210273646e-05, "loss": 0.283, "step": 10230 }, { "epoch": 2.46, "learning_rate": 1.5084013442150745e-05, "loss": 0.0817, "step": 10240 }, { "epoch": 2.46, "learning_rate": 1.5079212674027846e-05, "loss": 0.2072, "step": 10250 }, { "epoch": 2.46, "learning_rate": 1.5074411905904947e-05, "loss": 0.1907, "step": 10260 }, { "epoch": 2.47, "learning_rate": 1.5069611137782046e-05, "loss": 0.1626, "step": 10270 }, { "epoch": 2.47, "learning_rate": 1.5064810369659147e-05, "loss": 0.2369, "step": 10280 }, { "epoch": 2.47, "learning_rate": 1.5060009601536247e-05, "loss": 0.215, "step": 10290 }, { "epoch": 2.47, "learning_rate": 1.5055208833413348e-05, "loss": 0.1224, "step": 10300 }, { "epoch": 2.47, "learning_rate": 1.5050408065290447e-05, "loss": 0.2837, "step": 10310 }, { "epoch": 2.48, "learning_rate": 1.5045607297167548e-05, "loss": 0.2383, "step": 10320 }, { "epoch": 2.48, "learning_rate": 1.5040806529044649e-05, "loss": 0.129, "step": 10330 }, { "epoch": 2.48, "learning_rate": 1.5036005760921748e-05, "loss": 0.1004, "step": 10340 }, { "epoch": 2.48, "learning_rate": 1.5031204992798849e-05, "loss": 0.1682, "step": 10350 }, { "epoch": 2.49, "learning_rate": 1.502640422467595e-05, "loss": 0.1598, "step": 10360 }, { "epoch": 2.49, "learning_rate": 1.5021603456553049e-05, "loss": 0.1692, "step": 10370 }, { "epoch": 2.49, "learning_rate": 1.501680268843015e-05, "loss": 0.2667, "step": 10380 }, { "epoch": 2.49, "learning_rate": 1.501200192030725e-05, "loss": 0.152, "step": 10390 }, { "epoch": 2.5, "learning_rate": 1.500720115218435e-05, "loss": 0.2442, "step": 10400 }, { "epoch": 2.5, "learning_rate": 1.500240038406145e-05, "loss": 0.1468, "step": 10410 }, { "epoch": 2.5, "learning_rate": 1.4997599615938552e-05, "loss": 0.3102, "step": 10420 }, { "epoch": 2.5, "learning_rate": 1.4992798847815652e-05, "loss": 0.0355, "step": 10430 }, { "epoch": 2.51, "learning_rate": 1.4987998079692752e-05, "loss": 0.1872, "step": 10440 }, { "epoch": 2.51, "learning_rate": 1.4983197311569852e-05, "loss": 0.241, "step": 10450 }, { "epoch": 2.51, "learning_rate": 1.4978396543446953e-05, "loss": 0.1428, "step": 10460 }, { "epoch": 2.51, "learning_rate": 1.4973595775324052e-05, "loss": 0.1533, "step": 10470 }, { "epoch": 2.52, "learning_rate": 1.4968795007201153e-05, "loss": 0.1158, "step": 10480 }, { "epoch": 2.52, "learning_rate": 1.4963994239078254e-05, "loss": 0.318, "step": 10490 }, { "epoch": 2.52, "learning_rate": 1.4959193470955353e-05, "loss": 0.1924, "step": 10500 }, { "epoch": 2.52, "learning_rate": 1.4954392702832454e-05, "loss": 0.1385, "step": 10510 }, { "epoch": 2.53, "learning_rate": 1.4949591934709555e-05, "loss": 0.2009, "step": 10520 }, { "epoch": 2.53, "learning_rate": 1.4944791166586654e-05, "loss": 0.2928, "step": 10530 }, { "epoch": 2.53, "learning_rate": 1.4939990398463755e-05, "loss": 0.1048, "step": 10540 }, { "epoch": 2.53, "learning_rate": 1.4935189630340856e-05, "loss": 0.2862, "step": 10550 }, { "epoch": 2.53, "learning_rate": 1.4930388862217957e-05, "loss": 0.3168, "step": 10560 }, { "epoch": 2.54, "learning_rate": 1.4925588094095056e-05, "loss": 0.2204, "step": 10570 }, { "epoch": 2.54, "learning_rate": 1.4920787325972157e-05, "loss": 0.358, "step": 10580 }, { "epoch": 2.54, "learning_rate": 1.4915986557849258e-05, "loss": 0.1276, "step": 10590 }, { "epoch": 2.54, "learning_rate": 1.4911185789726357e-05, "loss": 0.2204, "step": 10600 }, { "epoch": 2.55, "learning_rate": 1.4906385021603458e-05, "loss": 0.1954, "step": 10610 }, { "epoch": 2.55, "learning_rate": 1.4901584253480558e-05, "loss": 0.1739, "step": 10620 }, { "epoch": 2.55, "learning_rate": 1.4896783485357657e-05, "loss": 0.2356, "step": 10630 }, { "epoch": 2.55, "learning_rate": 1.4891982717234758e-05, "loss": 0.1433, "step": 10640 }, { "epoch": 2.56, "learning_rate": 1.488718194911186e-05, "loss": 0.3961, "step": 10650 }, { "epoch": 2.56, "learning_rate": 1.4882381180988958e-05, "loss": 0.1719, "step": 10660 }, { "epoch": 2.56, "learning_rate": 1.487758041286606e-05, "loss": 0.2837, "step": 10670 }, { "epoch": 2.56, "learning_rate": 1.487277964474316e-05, "loss": 0.1502, "step": 10680 }, { "epoch": 2.57, "learning_rate": 1.4867978876620261e-05, "loss": 0.2964, "step": 10690 }, { "epoch": 2.57, "learning_rate": 1.486317810849736e-05, "loss": 0.085, "step": 10700 }, { "epoch": 2.57, "learning_rate": 1.4858377340374461e-05, "loss": 0.1052, "step": 10710 }, { "epoch": 2.57, "learning_rate": 1.4853576572251562e-05, "loss": 0.1558, "step": 10720 }, { "epoch": 2.58, "learning_rate": 1.484877580412866e-05, "loss": 0.3192, "step": 10730 }, { "epoch": 2.58, "learning_rate": 1.4843975036005762e-05, "loss": 0.1599, "step": 10740 }, { "epoch": 2.58, "learning_rate": 1.4839174267882863e-05, "loss": 0.2426, "step": 10750 }, { "epoch": 2.58, "learning_rate": 1.4834373499759962e-05, "loss": 0.1456, "step": 10760 }, { "epoch": 2.59, "learning_rate": 1.4829572731637063e-05, "loss": 0.0722, "step": 10770 }, { "epoch": 2.59, "learning_rate": 1.4824771963514163e-05, "loss": 0.2097, "step": 10780 }, { "epoch": 2.59, "learning_rate": 1.4819971195391263e-05, "loss": 0.2274, "step": 10790 }, { "epoch": 2.59, "learning_rate": 1.4815170427268363e-05, "loss": 0.202, "step": 10800 }, { "epoch": 2.59, "learning_rate": 1.4810369659145464e-05, "loss": 0.1246, "step": 10810 }, { "epoch": 2.6, "learning_rate": 1.4805568891022565e-05, "loss": 0.1036, "step": 10820 }, { "epoch": 2.6, "learning_rate": 1.4800768122899664e-05, "loss": 0.4299, "step": 10830 }, { "epoch": 2.6, "learning_rate": 1.4795967354776765e-05, "loss": 0.0968, "step": 10840 }, { "epoch": 2.6, "learning_rate": 1.4791166586653868e-05, "loss": 0.2303, "step": 10850 }, { "epoch": 2.61, "learning_rate": 1.4786365818530965e-05, "loss": 0.0818, "step": 10860 }, { "epoch": 2.61, "learning_rate": 1.4781565050408066e-05, "loss": 0.3172, "step": 10870 }, { "epoch": 2.61, "learning_rate": 1.4776764282285168e-05, "loss": 0.1984, "step": 10880 }, { "epoch": 2.61, "learning_rate": 1.4771963514162266e-05, "loss": 0.2799, "step": 10890 }, { "epoch": 2.62, "learning_rate": 1.4767162746039367e-05, "loss": 0.2263, "step": 10900 }, { "epoch": 2.62, "learning_rate": 1.476236197791647e-05, "loss": 0.143, "step": 10910 }, { "epoch": 2.62, "learning_rate": 1.4757561209793567e-05, "loss": 0.1558, "step": 10920 }, { "epoch": 2.62, "learning_rate": 1.4752760441670668e-05, "loss": 0.1062, "step": 10930 }, { "epoch": 2.63, "learning_rate": 1.474795967354777e-05, "loss": 0.0642, "step": 10940 }, { "epoch": 2.63, "learning_rate": 1.4743158905424871e-05, "loss": 0.1034, "step": 10950 }, { "epoch": 2.63, "learning_rate": 1.4738358137301968e-05, "loss": 0.3258, "step": 10960 }, { "epoch": 2.63, "learning_rate": 1.4733557369179071e-05, "loss": 0.1321, "step": 10970 }, { "epoch": 2.64, "learning_rate": 1.4728756601056172e-05, "loss": 0.0591, "step": 10980 }, { "epoch": 2.64, "learning_rate": 1.472395583293327e-05, "loss": 0.1292, "step": 10990 }, { "epoch": 2.64, "learning_rate": 1.4719155064810372e-05, "loss": 0.1714, "step": 11000 }, { "epoch": 2.64, "learning_rate": 1.4714354296687473e-05, "loss": 0.2207, "step": 11010 }, { "epoch": 2.65, "learning_rate": 1.470955352856457e-05, "loss": 0.137, "step": 11020 }, { "epoch": 2.65, "learning_rate": 1.4704752760441673e-05, "loss": 0.1735, "step": 11030 }, { "epoch": 2.65, "learning_rate": 1.4699951992318773e-05, "loss": 0.2947, "step": 11040 }, { "epoch": 2.65, "learning_rate": 1.4695151224195871e-05, "loss": 0.2376, "step": 11050 }, { "epoch": 2.65, "learning_rate": 1.4690350456072973e-05, "loss": 0.2711, "step": 11060 }, { "epoch": 2.66, "learning_rate": 1.4685549687950074e-05, "loss": 0.3468, "step": 11070 }, { "epoch": 2.66, "learning_rate": 1.4680748919827175e-05, "loss": 0.1008, "step": 11080 }, { "epoch": 2.66, "learning_rate": 1.4675948151704274e-05, "loss": 0.2649, "step": 11090 }, { "epoch": 2.66, "learning_rate": 1.4671147383581375e-05, "loss": 0.0722, "step": 11100 }, { "epoch": 2.67, "learning_rate": 1.4666346615458476e-05, "loss": 0.1807, "step": 11110 }, { "epoch": 2.67, "learning_rate": 1.4661545847335575e-05, "loss": 0.2237, "step": 11120 }, { "epoch": 2.67, "learning_rate": 1.4656745079212676e-05, "loss": 0.0997, "step": 11130 }, { "epoch": 2.67, "learning_rate": 1.4651944311089777e-05, "loss": 0.09, "step": 11140 }, { "epoch": 2.68, "learning_rate": 1.4647143542966876e-05, "loss": 0.2057, "step": 11150 }, { "epoch": 2.68, "learning_rate": 1.4642342774843977e-05, "loss": 0.2111, "step": 11160 }, { "epoch": 2.68, "learning_rate": 1.4637542006721078e-05, "loss": 0.0847, "step": 11170 }, { "epoch": 2.68, "learning_rate": 1.4632741238598177e-05, "loss": 0.2067, "step": 11180 }, { "epoch": 2.69, "learning_rate": 1.4627940470475278e-05, "loss": 0.1233, "step": 11190 }, { "epoch": 2.69, "learning_rate": 1.4623139702352378e-05, "loss": 0.1625, "step": 11200 }, { "epoch": 2.69, "learning_rate": 1.4618338934229478e-05, "loss": 0.4326, "step": 11210 }, { "epoch": 2.69, "learning_rate": 1.4613538166106578e-05, "loss": 0.2955, "step": 11220 }, { "epoch": 2.7, "learning_rate": 1.460873739798368e-05, "loss": 0.2861, "step": 11230 }, { "epoch": 2.7, "learning_rate": 1.460393662986078e-05, "loss": 0.2929, "step": 11240 }, { "epoch": 2.7, "learning_rate": 1.459913586173788e-05, "loss": 0.264, "step": 11250 }, { "epoch": 2.7, "learning_rate": 1.459433509361498e-05, "loss": 0.2689, "step": 11260 }, { "epoch": 2.71, "learning_rate": 1.4589534325492081e-05, "loss": 0.1043, "step": 11270 }, { "epoch": 2.71, "learning_rate": 1.458473355736918e-05, "loss": 0.0879, "step": 11280 }, { "epoch": 2.71, "learning_rate": 1.4579932789246281e-05, "loss": 0.1821, "step": 11290 }, { "epoch": 2.71, "learning_rate": 1.4575132021123382e-05, "loss": 0.1557, "step": 11300 }, { "epoch": 2.71, "learning_rate": 1.4570331253000481e-05, "loss": 0.148, "step": 11310 }, { "epoch": 2.72, "learning_rate": 1.4565530484877582e-05, "loss": 0.1579, "step": 11320 }, { "epoch": 2.72, "learning_rate": 1.4560729716754683e-05, "loss": 0.3617, "step": 11330 }, { "epoch": 2.72, "learning_rate": 1.4555928948631782e-05, "loss": 0.1216, "step": 11340 }, { "epoch": 2.72, "learning_rate": 1.4551128180508883e-05, "loss": 0.117, "step": 11350 }, { "epoch": 2.73, "learning_rate": 1.4546327412385983e-05, "loss": 0.2943, "step": 11360 }, { "epoch": 2.73, "learning_rate": 1.4541526644263084e-05, "loss": 0.3734, "step": 11370 }, { "epoch": 2.73, "learning_rate": 1.4536725876140183e-05, "loss": 0.2759, "step": 11380 }, { "epoch": 2.73, "learning_rate": 1.4531925108017284e-05, "loss": 0.2898, "step": 11390 }, { "epoch": 2.74, "learning_rate": 1.4527124339894385e-05, "loss": 0.294, "step": 11400 }, { "epoch": 2.74, "learning_rate": 1.4522323571771484e-05, "loss": 0.4532, "step": 11410 }, { "epoch": 2.74, "learning_rate": 1.4517522803648585e-05, "loss": 0.1752, "step": 11420 }, { "epoch": 2.74, "learning_rate": 1.4512722035525686e-05, "loss": 0.2728, "step": 11430 }, { "epoch": 2.75, "learning_rate": 1.4507921267402785e-05, "loss": 0.2024, "step": 11440 }, { "epoch": 2.75, "learning_rate": 1.4503120499279886e-05, "loss": 0.2437, "step": 11450 }, { "epoch": 2.75, "learning_rate": 1.4498319731156987e-05, "loss": 0.0856, "step": 11460 }, { "epoch": 2.75, "learning_rate": 1.4493518963034086e-05, "loss": 0.1678, "step": 11470 }, { "epoch": 2.76, "learning_rate": 1.4488718194911187e-05, "loss": 0.117, "step": 11480 }, { "epoch": 2.76, "learning_rate": 1.4483917426788288e-05, "loss": 0.2189, "step": 11490 }, { "epoch": 2.76, "learning_rate": 1.4479116658665388e-05, "loss": 0.1801, "step": 11500 }, { "epoch": 2.76, "learning_rate": 1.4474315890542488e-05, "loss": 0.1597, "step": 11510 }, { "epoch": 2.77, "learning_rate": 1.4469515122419588e-05, "loss": 0.1212, "step": 11520 }, { "epoch": 2.77, "learning_rate": 1.446471435429669e-05, "loss": 0.3085, "step": 11530 }, { "epoch": 2.77, "learning_rate": 1.4459913586173788e-05, "loss": 0.2141, "step": 11540 }, { "epoch": 2.77, "learning_rate": 1.445511281805089e-05, "loss": 0.1987, "step": 11550 }, { "epoch": 2.77, "learning_rate": 1.445031204992799e-05, "loss": 0.1391, "step": 11560 }, { "epoch": 2.78, "learning_rate": 1.444551128180509e-05, "loss": 0.2763, "step": 11570 }, { "epoch": 2.78, "learning_rate": 1.444071051368219e-05, "loss": 0.181, "step": 11580 }, { "epoch": 2.78, "learning_rate": 1.4435909745559291e-05, "loss": 0.3497, "step": 11590 }, { "epoch": 2.78, "learning_rate": 1.443110897743639e-05, "loss": 0.1695, "step": 11600 }, { "epoch": 2.79, "learning_rate": 1.4426308209313491e-05, "loss": 0.1573, "step": 11610 }, { "epoch": 2.79, "learning_rate": 1.4421507441190592e-05, "loss": 0.1493, "step": 11620 }, { "epoch": 2.79, "learning_rate": 1.4416706673067693e-05, "loss": 0.3066, "step": 11630 }, { "epoch": 2.79, "learning_rate": 1.4411905904944792e-05, "loss": 0.3554, "step": 11640 }, { "epoch": 2.8, "learning_rate": 1.4407105136821893e-05, "loss": 0.2408, "step": 11650 }, { "epoch": 2.8, "learning_rate": 1.4402304368698994e-05, "loss": 0.1179, "step": 11660 }, { "epoch": 2.8, "learning_rate": 1.4397503600576093e-05, "loss": 0.1495, "step": 11670 }, { "epoch": 2.8, "learning_rate": 1.4392702832453194e-05, "loss": 0.2195, "step": 11680 }, { "epoch": 2.81, "learning_rate": 1.4387902064330294e-05, "loss": 0.2917, "step": 11690 }, { "epoch": 2.81, "learning_rate": 1.4383101296207394e-05, "loss": 0.1422, "step": 11700 }, { "epoch": 2.81, "learning_rate": 1.4378300528084494e-05, "loss": 0.1392, "step": 11710 }, { "epoch": 2.81, "learning_rate": 1.4373499759961595e-05, "loss": 0.2353, "step": 11720 }, { "epoch": 2.82, "learning_rate": 1.4368698991838694e-05, "loss": 0.2834, "step": 11730 }, { "epoch": 2.82, "learning_rate": 1.4363898223715795e-05, "loss": 0.2711, "step": 11740 }, { "epoch": 2.82, "learning_rate": 1.4359097455592896e-05, "loss": 0.2033, "step": 11750 }, { "epoch": 2.82, "learning_rate": 1.4354296687469997e-05, "loss": 0.1748, "step": 11760 }, { "epoch": 2.83, "learning_rate": 1.4349495919347096e-05, "loss": 0.164, "step": 11770 }, { "epoch": 2.83, "learning_rate": 1.4344695151224197e-05, "loss": 0.1899, "step": 11780 }, { "epoch": 2.83, "learning_rate": 1.4339894383101298e-05, "loss": 0.1409, "step": 11790 }, { "epoch": 2.83, "learning_rate": 1.4335093614978397e-05, "loss": 0.1571, "step": 11800 }, { "epoch": 2.83, "learning_rate": 1.4330292846855498e-05, "loss": 0.2338, "step": 11810 }, { "epoch": 2.84, "learning_rate": 1.4325492078732599e-05, "loss": 0.3575, "step": 11820 }, { "epoch": 2.84, "learning_rate": 1.4320691310609698e-05, "loss": 0.3519, "step": 11830 }, { "epoch": 2.84, "learning_rate": 1.4315890542486799e-05, "loss": 0.1915, "step": 11840 }, { "epoch": 2.84, "learning_rate": 1.43110897743639e-05, "loss": 0.1729, "step": 11850 }, { "epoch": 2.85, "learning_rate": 1.4306289006240999e-05, "loss": 0.1198, "step": 11860 }, { "epoch": 2.85, "learning_rate": 1.43014882381181e-05, "loss": 0.1349, "step": 11870 }, { "epoch": 2.85, "learning_rate": 1.42966874699952e-05, "loss": 0.2092, "step": 11880 }, { "epoch": 2.85, "learning_rate": 1.4291886701872301e-05, "loss": 0.0536, "step": 11890 }, { "epoch": 2.86, "learning_rate": 1.42870859337494e-05, "loss": 0.1428, "step": 11900 }, { "epoch": 2.86, "learning_rate": 1.4282285165626501e-05, "loss": 0.1473, "step": 11910 }, { "epoch": 2.86, "learning_rate": 1.4277484397503602e-05, "loss": 0.2543, "step": 11920 }, { "epoch": 2.86, "learning_rate": 1.4272683629380701e-05, "loss": 0.1749, "step": 11930 }, { "epoch": 2.87, "learning_rate": 1.4267882861257802e-05, "loss": 0.183, "step": 11940 }, { "epoch": 2.87, "learning_rate": 1.4263082093134903e-05, "loss": 0.3432, "step": 11950 }, { "epoch": 2.87, "learning_rate": 1.4258281325012002e-05, "loss": 0.2002, "step": 11960 }, { "epoch": 2.87, "learning_rate": 1.4253480556889103e-05, "loss": 0.2997, "step": 11970 }, { "epoch": 2.88, "learning_rate": 1.4248679788766204e-05, "loss": 0.2017, "step": 11980 }, { "epoch": 2.88, "learning_rate": 1.4243879020643303e-05, "loss": 0.0466, "step": 11990 }, { "epoch": 2.88, "learning_rate": 1.4239078252520404e-05, "loss": 0.3021, "step": 12000 }, { "epoch": 2.88, "learning_rate": 1.4234277484397504e-05, "loss": 0.2667, "step": 12010 }, { "epoch": 2.89, "learning_rate": 1.4229476716274607e-05, "loss": 0.2184, "step": 12020 }, { "epoch": 2.89, "learning_rate": 1.4224675948151704e-05, "loss": 0.1655, "step": 12030 }, { "epoch": 2.89, "learning_rate": 1.4219875180028805e-05, "loss": 0.0865, "step": 12040 }, { "epoch": 2.89, "learning_rate": 1.4215074411905908e-05, "loss": 0.1376, "step": 12050 }, { "epoch": 2.89, "learning_rate": 1.4210273643783005e-05, "loss": 0.1216, "step": 12060 }, { "epoch": 2.9, "learning_rate": 1.4205472875660106e-05, "loss": 0.1288, "step": 12070 }, { "epoch": 2.9, "learning_rate": 1.4200672107537209e-05, "loss": 0.0865, "step": 12080 }, { "epoch": 2.9, "learning_rate": 1.4195871339414306e-05, "loss": 0.2608, "step": 12090 }, { "epoch": 2.9, "learning_rate": 1.4191070571291407e-05, "loss": 0.0557, "step": 12100 }, { "epoch": 2.91, "learning_rate": 1.418626980316851e-05, "loss": 0.2073, "step": 12110 }, { "epoch": 2.91, "learning_rate": 1.4181469035045607e-05, "loss": 0.2566, "step": 12120 }, { "epoch": 2.91, "learning_rate": 1.4176668266922708e-05, "loss": 0.1452, "step": 12130 }, { "epoch": 2.91, "learning_rate": 1.417186749879981e-05, "loss": 0.2432, "step": 12140 }, { "epoch": 2.92, "learning_rate": 1.4167066730676911e-05, "loss": 0.2239, "step": 12150 }, { "epoch": 2.92, "learning_rate": 1.4162265962554009e-05, "loss": 0.2452, "step": 12160 }, { "epoch": 2.92, "learning_rate": 1.4157465194431111e-05, "loss": 0.2074, "step": 12170 }, { "epoch": 2.92, "learning_rate": 1.4152664426308212e-05, "loss": 0.4822, "step": 12180 }, { "epoch": 2.93, "learning_rate": 1.414786365818531e-05, "loss": 0.2003, "step": 12190 }, { "epoch": 2.93, "learning_rate": 1.4143062890062412e-05, "loss": 0.2836, "step": 12200 }, { "epoch": 2.93, "learning_rate": 1.4138262121939513e-05, "loss": 0.2733, "step": 12210 }, { "epoch": 2.93, "learning_rate": 1.413346135381661e-05, "loss": 0.2449, "step": 12220 }, { "epoch": 2.94, "learning_rate": 1.4128660585693713e-05, "loss": 0.4129, "step": 12230 }, { "epoch": 2.94, "learning_rate": 1.4123859817570814e-05, "loss": 0.2572, "step": 12240 }, { "epoch": 2.94, "learning_rate": 1.4119059049447911e-05, "loss": 0.2052, "step": 12250 }, { "epoch": 2.94, "learning_rate": 1.4114258281325014e-05, "loss": 0.0483, "step": 12260 }, { "epoch": 2.95, "learning_rate": 1.4109457513202114e-05, "loss": 0.2035, "step": 12270 }, { "epoch": 2.95, "learning_rate": 1.4104656745079215e-05, "loss": 0.1016, "step": 12280 }, { "epoch": 2.95, "learning_rate": 1.4099855976956314e-05, "loss": 0.1477, "step": 12290 }, { "epoch": 2.95, "learning_rate": 1.4095055208833415e-05, "loss": 0.1118, "step": 12300 }, { "epoch": 2.95, "learning_rate": 1.4090254440710516e-05, "loss": 0.2902, "step": 12310 }, { "epoch": 2.96, "learning_rate": 1.4085453672587615e-05, "loss": 0.1305, "step": 12320 }, { "epoch": 2.96, "learning_rate": 1.4080652904464716e-05, "loss": 0.2209, "step": 12330 }, { "epoch": 2.96, "learning_rate": 1.4075852136341817e-05, "loss": 0.1331, "step": 12340 }, { "epoch": 2.96, "learning_rate": 1.4071051368218916e-05, "loss": 0.1694, "step": 12350 }, { "epoch": 2.97, "learning_rate": 1.4066250600096017e-05, "loss": 0.2619, "step": 12360 }, { "epoch": 2.97, "learning_rate": 1.4061449831973118e-05, "loss": 0.3794, "step": 12370 }, { "epoch": 2.97, "learning_rate": 1.4056649063850217e-05, "loss": 0.2388, "step": 12380 }, { "epoch": 2.97, "learning_rate": 1.4051848295727318e-05, "loss": 0.3372, "step": 12390 }, { "epoch": 2.98, "learning_rate": 1.4047047527604419e-05, "loss": 0.1956, "step": 12400 }, { "epoch": 2.98, "learning_rate": 1.404224675948152e-05, "loss": 0.1022, "step": 12410 }, { "epoch": 2.98, "learning_rate": 1.4037445991358619e-05, "loss": 0.1557, "step": 12420 }, { "epoch": 2.98, "learning_rate": 1.403264522323572e-05, "loss": 0.031, "step": 12430 }, { "epoch": 2.99, "learning_rate": 1.402784445511282e-05, "loss": 0.1382, "step": 12440 }, { "epoch": 2.99, "learning_rate": 1.402304368698992e-05, "loss": 0.1419, "step": 12450 }, { "epoch": 2.99, "learning_rate": 1.401824291886702e-05, "loss": 0.3698, "step": 12460 }, { "epoch": 2.99, "learning_rate": 1.4013442150744121e-05, "loss": 0.0967, "step": 12470 }, { "epoch": 3.0, "learning_rate": 1.400864138262122e-05, "loss": 0.1026, "step": 12480 }, { "epoch": 3.0, "learning_rate": 1.4003840614498321e-05, "loss": 0.2825, "step": 12490 }, { "epoch": 3.0, "eval_accuracy": 0.9971098265895953, "eval_loss": 0.015470546670258045, "eval_runtime": 514.2587, "eval_samples_per_second": 11.438, "eval_steps_per_second": 1.431, "step": 12498 }, { "epoch": 3.0, "learning_rate": 1.3999039846375422e-05, "loss": 0.1956, "step": 12500 }, { "epoch": 3.0, "learning_rate": 1.3994239078252521e-05, "loss": 0.0667, "step": 12510 }, { "epoch": 3.01, "learning_rate": 1.3989438310129622e-05, "loss": 0.3162, "step": 12520 }, { "epoch": 3.01, "learning_rate": 1.3984637542006723e-05, "loss": 0.1426, "step": 12530 }, { "epoch": 3.01, "learning_rate": 1.3979836773883824e-05, "loss": 0.1479, "step": 12540 }, { "epoch": 3.01, "learning_rate": 1.3975036005760923e-05, "loss": 0.1379, "step": 12550 }, { "epoch": 3.01, "learning_rate": 1.3970235237638024e-05, "loss": 0.1264, "step": 12560 }, { "epoch": 3.02, "learning_rate": 1.3965434469515125e-05, "loss": 0.0989, "step": 12570 }, { "epoch": 3.02, "learning_rate": 1.3960633701392224e-05, "loss": 0.1482, "step": 12580 }, { "epoch": 3.02, "learning_rate": 1.3955832933269324e-05, "loss": 0.1873, "step": 12590 }, { "epoch": 3.02, "learning_rate": 1.3951032165146425e-05, "loss": 0.3184, "step": 12600 }, { "epoch": 3.03, "learning_rate": 1.3946231397023524e-05, "loss": 0.1699, "step": 12610 }, { "epoch": 3.03, "learning_rate": 1.3941430628900625e-05, "loss": 0.1943, "step": 12620 }, { "epoch": 3.03, "learning_rate": 1.3936629860777726e-05, "loss": 0.1227, "step": 12630 }, { "epoch": 3.03, "learning_rate": 1.3931829092654825e-05, "loss": 0.1615, "step": 12640 }, { "epoch": 3.04, "learning_rate": 1.3927028324531926e-05, "loss": 0.2606, "step": 12650 }, { "epoch": 3.04, "learning_rate": 1.3922227556409027e-05, "loss": 0.2146, "step": 12660 }, { "epoch": 3.04, "learning_rate": 1.3917426788286126e-05, "loss": 0.18, "step": 12670 }, { "epoch": 3.04, "learning_rate": 1.3912626020163227e-05, "loss": 0.1854, "step": 12680 }, { "epoch": 3.05, "learning_rate": 1.3907825252040328e-05, "loss": 0.2845, "step": 12690 }, { "epoch": 3.05, "learning_rate": 1.3903024483917429e-05, "loss": 0.1362, "step": 12700 }, { "epoch": 3.05, "learning_rate": 1.3898223715794528e-05, "loss": 0.4133, "step": 12710 }, { "epoch": 3.05, "learning_rate": 1.3893422947671629e-05, "loss": 0.3149, "step": 12720 }, { "epoch": 3.06, "learning_rate": 1.388862217954873e-05, "loss": 0.3335, "step": 12730 }, { "epoch": 3.06, "learning_rate": 1.3883821411425829e-05, "loss": 0.189, "step": 12740 }, { "epoch": 3.06, "learning_rate": 1.387902064330293e-05, "loss": 0.2713, "step": 12750 }, { "epoch": 3.06, "learning_rate": 1.387421987518003e-05, "loss": 0.2291, "step": 12760 }, { "epoch": 3.07, "learning_rate": 1.386941910705713e-05, "loss": 0.14, "step": 12770 }, { "epoch": 3.07, "learning_rate": 1.386461833893423e-05, "loss": 0.2882, "step": 12780 }, { "epoch": 3.07, "learning_rate": 1.3859817570811331e-05, "loss": 0.1449, "step": 12790 }, { "epoch": 3.07, "learning_rate": 1.385501680268843e-05, "loss": 0.1463, "step": 12800 }, { "epoch": 3.07, "learning_rate": 1.3850216034565531e-05, "loss": 0.1283, "step": 12810 }, { "epoch": 3.08, "learning_rate": 1.3845415266442632e-05, "loss": 0.2423, "step": 12820 }, { "epoch": 3.08, "learning_rate": 1.3840614498319733e-05, "loss": 0.1881, "step": 12830 }, { "epoch": 3.08, "learning_rate": 1.3835813730196832e-05, "loss": 0.1671, "step": 12840 }, { "epoch": 3.08, "learning_rate": 1.3831012962073933e-05, "loss": 0.1666, "step": 12850 }, { "epoch": 3.09, "learning_rate": 1.3826212193951034e-05, "loss": 0.2601, "step": 12860 }, { "epoch": 3.09, "learning_rate": 1.3821411425828133e-05, "loss": 0.0795, "step": 12870 }, { "epoch": 3.09, "learning_rate": 1.3816610657705234e-05, "loss": 0.224, "step": 12880 }, { "epoch": 3.09, "learning_rate": 1.3811809889582335e-05, "loss": 0.1724, "step": 12890 }, { "epoch": 3.1, "learning_rate": 1.3807009121459434e-05, "loss": 0.2105, "step": 12900 }, { "epoch": 3.1, "learning_rate": 1.3802208353336535e-05, "loss": 0.1632, "step": 12910 }, { "epoch": 3.1, "learning_rate": 1.3797407585213635e-05, "loss": 0.1207, "step": 12920 }, { "epoch": 3.1, "learning_rate": 1.3792606817090735e-05, "loss": 0.1772, "step": 12930 }, { "epoch": 3.11, "learning_rate": 1.3787806048967835e-05, "loss": 0.1582, "step": 12940 }, { "epoch": 3.11, "learning_rate": 1.3783005280844936e-05, "loss": 0.2478, "step": 12950 }, { "epoch": 3.11, "learning_rate": 1.3778204512722037e-05, "loss": 0.2557, "step": 12960 }, { "epoch": 3.11, "learning_rate": 1.3773403744599136e-05, "loss": 0.1532, "step": 12970 }, { "epoch": 3.12, "learning_rate": 1.3768602976476237e-05, "loss": 0.0719, "step": 12980 }, { "epoch": 3.12, "learning_rate": 1.3763802208353338e-05, "loss": 0.1269, "step": 12990 }, { "epoch": 3.12, "learning_rate": 1.3759001440230437e-05, "loss": 0.2606, "step": 13000 }, { "epoch": 3.12, "learning_rate": 1.3754200672107538e-05, "loss": 0.0655, "step": 13010 }, { "epoch": 3.13, "learning_rate": 1.3749399903984639e-05, "loss": 0.3242, "step": 13020 }, { "epoch": 3.13, "learning_rate": 1.3744599135861738e-05, "loss": 0.1404, "step": 13030 }, { "epoch": 3.13, "learning_rate": 1.3739798367738839e-05, "loss": 0.1755, "step": 13040 }, { "epoch": 3.13, "learning_rate": 1.373499759961594e-05, "loss": 0.1227, "step": 13050 }, { "epoch": 3.13, "learning_rate": 1.3730196831493039e-05, "loss": 0.1876, "step": 13060 }, { "epoch": 3.14, "learning_rate": 1.372539606337014e-05, "loss": 0.2574, "step": 13070 }, { "epoch": 3.14, "learning_rate": 1.372059529524724e-05, "loss": 0.231, "step": 13080 }, { "epoch": 3.14, "learning_rate": 1.3715794527124341e-05, "loss": 0.1954, "step": 13090 }, { "epoch": 3.14, "learning_rate": 1.371099375900144e-05, "loss": 0.0904, "step": 13100 }, { "epoch": 3.15, "learning_rate": 1.3706192990878541e-05, "loss": 0.2386, "step": 13110 }, { "epoch": 3.15, "learning_rate": 1.3701392222755642e-05, "loss": 0.1465, "step": 13120 }, { "epoch": 3.15, "learning_rate": 1.3696591454632741e-05, "loss": 0.2315, "step": 13130 }, { "epoch": 3.15, "learning_rate": 1.3691790686509842e-05, "loss": 0.1774, "step": 13140 }, { "epoch": 3.16, "learning_rate": 1.3686989918386943e-05, "loss": 0.1009, "step": 13150 }, { "epoch": 3.16, "learning_rate": 1.3682189150264042e-05, "loss": 0.2987, "step": 13160 }, { "epoch": 3.16, "learning_rate": 1.3677388382141143e-05, "loss": 0.1496, "step": 13170 }, { "epoch": 3.16, "learning_rate": 1.3672587614018244e-05, "loss": 0.0935, "step": 13180 }, { "epoch": 3.17, "learning_rate": 1.3667786845895343e-05, "loss": 0.1828, "step": 13190 }, { "epoch": 3.17, "learning_rate": 1.3662986077772444e-05, "loss": 0.1867, "step": 13200 }, { "epoch": 3.17, "learning_rate": 1.3658185309649545e-05, "loss": 0.1345, "step": 13210 }, { "epoch": 3.17, "learning_rate": 1.3653384541526647e-05, "loss": 0.0513, "step": 13220 }, { "epoch": 3.18, "learning_rate": 1.3648583773403745e-05, "loss": 0.3241, "step": 13230 }, { "epoch": 3.18, "learning_rate": 1.3643783005280845e-05, "loss": 0.0991, "step": 13240 }, { "epoch": 3.18, "learning_rate": 1.3638982237157948e-05, "loss": 0.1115, "step": 13250 }, { "epoch": 3.18, "learning_rate": 1.3634181469035045e-05, "loss": 0.224, "step": 13260 }, { "epoch": 3.19, "learning_rate": 1.3629380700912146e-05, "loss": 0.2751, "step": 13270 }, { "epoch": 3.19, "learning_rate": 1.3624579932789249e-05, "loss": 0.0207, "step": 13280 }, { "epoch": 3.19, "learning_rate": 1.3619779164666346e-05, "loss": 0.0919, "step": 13290 }, { "epoch": 3.19, "learning_rate": 1.3614978396543447e-05, "loss": 0.3005, "step": 13300 }, { "epoch": 3.19, "learning_rate": 1.361017762842055e-05, "loss": 0.3086, "step": 13310 }, { "epoch": 3.2, "learning_rate": 1.3605376860297647e-05, "loss": 0.2728, "step": 13320 }, { "epoch": 3.2, "learning_rate": 1.3600576092174748e-05, "loss": 0.0788, "step": 13330 }, { "epoch": 3.2, "learning_rate": 1.359577532405185e-05, "loss": 0.1995, "step": 13340 }, { "epoch": 3.2, "learning_rate": 1.3590974555928951e-05, "loss": 0.2567, "step": 13350 }, { "epoch": 3.21, "learning_rate": 1.3586173787806049e-05, "loss": 0.1863, "step": 13360 }, { "epoch": 3.21, "learning_rate": 1.3581373019683151e-05, "loss": 0.2475, "step": 13370 }, { "epoch": 3.21, "learning_rate": 1.3576572251560252e-05, "loss": 0.1014, "step": 13380 }, { "epoch": 3.21, "learning_rate": 1.357177148343735e-05, "loss": 0.2758, "step": 13390 }, { "epoch": 3.22, "learning_rate": 1.3566970715314452e-05, "loss": 0.221, "step": 13400 }, { "epoch": 3.22, "learning_rate": 1.3562169947191553e-05, "loss": 0.1227, "step": 13410 }, { "epoch": 3.22, "learning_rate": 1.355736917906865e-05, "loss": 0.1679, "step": 13420 }, { "epoch": 3.22, "learning_rate": 1.3552568410945753e-05, "loss": 0.231, "step": 13430 }, { "epoch": 3.23, "learning_rate": 1.3547767642822854e-05, "loss": 0.3235, "step": 13440 }, { "epoch": 3.23, "learning_rate": 1.3542966874699951e-05, "loss": 0.2003, "step": 13450 }, { "epoch": 3.23, "learning_rate": 1.3538166106577054e-05, "loss": 0.2498, "step": 13460 }, { "epoch": 3.23, "learning_rate": 1.3533365338454155e-05, "loss": 0.212, "step": 13470 }, { "epoch": 3.24, "learning_rate": 1.3528564570331255e-05, "loss": 0.2615, "step": 13480 }, { "epoch": 3.24, "learning_rate": 1.3523763802208355e-05, "loss": 0.2424, "step": 13490 }, { "epoch": 3.24, "learning_rate": 1.3518963034085455e-05, "loss": 0.1213, "step": 13500 }, { "epoch": 3.24, "learning_rate": 1.3514162265962556e-05, "loss": 0.0742, "step": 13510 }, { "epoch": 3.25, "learning_rate": 1.3509361497839655e-05, "loss": 0.0757, "step": 13520 }, { "epoch": 3.25, "learning_rate": 1.3504560729716756e-05, "loss": 0.2055, "step": 13530 }, { "epoch": 3.25, "learning_rate": 1.3499759961593857e-05, "loss": 0.0865, "step": 13540 }, { "epoch": 3.25, "learning_rate": 1.3494959193470956e-05, "loss": 0.1126, "step": 13550 }, { "epoch": 3.25, "learning_rate": 1.3490158425348057e-05, "loss": 0.3053, "step": 13560 }, { "epoch": 3.26, "learning_rate": 1.3485357657225158e-05, "loss": 0.1582, "step": 13570 }, { "epoch": 3.26, "learning_rate": 1.3480556889102257e-05, "loss": 0.1463, "step": 13580 }, { "epoch": 3.26, "learning_rate": 1.3475756120979358e-05, "loss": 0.3074, "step": 13590 }, { "epoch": 3.26, "learning_rate": 1.3470955352856459e-05, "loss": 0.1445, "step": 13600 }, { "epoch": 3.27, "learning_rate": 1.346615458473356e-05, "loss": 0.1317, "step": 13610 }, { "epoch": 3.27, "learning_rate": 1.3461353816610659e-05, "loss": 0.2067, "step": 13620 }, { "epoch": 3.27, "learning_rate": 1.345655304848776e-05, "loss": 0.3204, "step": 13630 }, { "epoch": 3.27, "learning_rate": 1.345175228036486e-05, "loss": 0.1537, "step": 13640 }, { "epoch": 3.28, "learning_rate": 1.344695151224196e-05, "loss": 0.2585, "step": 13650 }, { "epoch": 3.28, "learning_rate": 1.344215074411906e-05, "loss": 0.2489, "step": 13660 }, { "epoch": 3.28, "learning_rate": 1.3437349975996161e-05, "loss": 0.1383, "step": 13670 }, { "epoch": 3.28, "learning_rate": 1.343254920787326e-05, "loss": 0.0854, "step": 13680 }, { "epoch": 3.29, "learning_rate": 1.3427748439750361e-05, "loss": 0.254, "step": 13690 }, { "epoch": 3.29, "learning_rate": 1.3422947671627462e-05, "loss": 0.379, "step": 13700 }, { "epoch": 3.29, "learning_rate": 1.3418146903504561e-05, "loss": 0.2196, "step": 13710 }, { "epoch": 3.29, "learning_rate": 1.3413346135381662e-05, "loss": 0.3524, "step": 13720 }, { "epoch": 3.3, "learning_rate": 1.3408545367258763e-05, "loss": 0.0453, "step": 13730 }, { "epoch": 3.3, "learning_rate": 1.3403744599135864e-05, "loss": 0.1446, "step": 13740 }, { "epoch": 3.3, "learning_rate": 1.3398943831012963e-05, "loss": 0.2736, "step": 13750 }, { "epoch": 3.3, "learning_rate": 1.3394143062890064e-05, "loss": 0.1868, "step": 13760 }, { "epoch": 3.31, "learning_rate": 1.3389342294767165e-05, "loss": 0.1, "step": 13770 }, { "epoch": 3.31, "learning_rate": 1.3384541526644264e-05, "loss": 0.199, "step": 13780 }, { "epoch": 3.31, "learning_rate": 1.3379740758521365e-05, "loss": 0.2656, "step": 13790 }, { "epoch": 3.31, "learning_rate": 1.3374939990398466e-05, "loss": 0.1734, "step": 13800 }, { "epoch": 3.31, "learning_rate": 1.3370139222275565e-05, "loss": 0.21, "step": 13810 }, { "epoch": 3.32, "learning_rate": 1.3365338454152666e-05, "loss": 0.285, "step": 13820 }, { "epoch": 3.32, "learning_rate": 1.3360537686029766e-05, "loss": 0.2771, "step": 13830 }, { "epoch": 3.32, "learning_rate": 1.3355736917906866e-05, "loss": 0.2536, "step": 13840 }, { "epoch": 3.32, "learning_rate": 1.3350936149783966e-05, "loss": 0.1195, "step": 13850 }, { "epoch": 3.33, "learning_rate": 1.3346135381661067e-05, "loss": 0.1885, "step": 13860 }, { "epoch": 3.33, "learning_rate": 1.3341334613538168e-05, "loss": 0.1271, "step": 13870 }, { "epoch": 3.33, "learning_rate": 1.3336533845415267e-05, "loss": 0.0776, "step": 13880 }, { "epoch": 3.33, "learning_rate": 1.3331733077292368e-05, "loss": 0.0926, "step": 13890 }, { "epoch": 3.34, "learning_rate": 1.3326932309169469e-05, "loss": 0.1214, "step": 13900 }, { "epoch": 3.34, "learning_rate": 1.3322131541046568e-05, "loss": 0.3027, "step": 13910 }, { "epoch": 3.34, "learning_rate": 1.3317330772923669e-05, "loss": 0.2863, "step": 13920 }, { "epoch": 3.34, "learning_rate": 1.331253000480077e-05, "loss": 0.2322, "step": 13930 }, { "epoch": 3.35, "learning_rate": 1.3307729236677869e-05, "loss": 0.21, "step": 13940 }, { "epoch": 3.35, "learning_rate": 1.330292846855497e-05, "loss": 0.0087, "step": 13950 }, { "epoch": 3.35, "learning_rate": 1.329812770043207e-05, "loss": 0.1244, "step": 13960 }, { "epoch": 3.35, "learning_rate": 1.329332693230917e-05, "loss": 0.098, "step": 13970 }, { "epoch": 3.36, "learning_rate": 1.328852616418627e-05, "loss": 0.0396, "step": 13980 }, { "epoch": 3.36, "learning_rate": 1.3283725396063371e-05, "loss": 0.2156, "step": 13990 }, { "epoch": 3.36, "learning_rate": 1.3278924627940472e-05, "loss": 0.2285, "step": 14000 }, { "epoch": 3.36, "learning_rate": 1.3274123859817571e-05, "loss": 0.093, "step": 14010 }, { "epoch": 3.37, "learning_rate": 1.3269323091694672e-05, "loss": 0.0888, "step": 14020 }, { "epoch": 3.37, "learning_rate": 1.3264522323571773e-05, "loss": 0.2583, "step": 14030 }, { "epoch": 3.37, "learning_rate": 1.3259721555448872e-05, "loss": 0.0801, "step": 14040 }, { "epoch": 3.37, "learning_rate": 1.3254920787325973e-05, "loss": 0.2272, "step": 14050 }, { "epoch": 3.37, "learning_rate": 1.3250120019203074e-05, "loss": 0.1998, "step": 14060 }, { "epoch": 3.38, "learning_rate": 1.3245319251080173e-05, "loss": 0.0903, "step": 14070 }, { "epoch": 3.38, "learning_rate": 1.3240518482957274e-05, "loss": 0.1902, "step": 14080 }, { "epoch": 3.38, "learning_rate": 1.3235717714834375e-05, "loss": 0.114, "step": 14090 }, { "epoch": 3.38, "learning_rate": 1.3230916946711474e-05, "loss": 0.1958, "step": 14100 }, { "epoch": 3.39, "learning_rate": 1.3226116178588575e-05, "loss": 0.2417, "step": 14110 }, { "epoch": 3.39, "learning_rate": 1.3221315410465676e-05, "loss": 0.1614, "step": 14120 }, { "epoch": 3.39, "learning_rate": 1.3216514642342776e-05, "loss": 0.0816, "step": 14130 }, { "epoch": 3.39, "learning_rate": 1.3211713874219876e-05, "loss": 0.1514, "step": 14140 }, { "epoch": 3.4, "learning_rate": 1.3206913106096976e-05, "loss": 0.0999, "step": 14150 }, { "epoch": 3.4, "learning_rate": 1.3202112337974077e-05, "loss": 0.2251, "step": 14160 }, { "epoch": 3.4, "learning_rate": 1.3197311569851176e-05, "loss": 0.2379, "step": 14170 }, { "epoch": 3.4, "learning_rate": 1.3192510801728277e-05, "loss": 0.1113, "step": 14180 }, { "epoch": 3.41, "learning_rate": 1.3187710033605378e-05, "loss": 0.1042, "step": 14190 }, { "epoch": 3.41, "learning_rate": 1.3182909265482477e-05, "loss": 0.2047, "step": 14200 }, { "epoch": 3.41, "learning_rate": 1.3178108497359578e-05, "loss": 0.2152, "step": 14210 }, { "epoch": 3.41, "learning_rate": 1.3173307729236679e-05, "loss": 0.1043, "step": 14220 }, { "epoch": 3.42, "learning_rate": 1.3168506961113778e-05, "loss": 0.2235, "step": 14230 }, { "epoch": 3.42, "learning_rate": 1.3163706192990879e-05, "loss": 0.0452, "step": 14240 }, { "epoch": 3.42, "learning_rate": 1.315890542486798e-05, "loss": 0.2039, "step": 14250 }, { "epoch": 3.42, "learning_rate": 1.3154104656745079e-05, "loss": 0.1909, "step": 14260 }, { "epoch": 3.43, "learning_rate": 1.314930388862218e-05, "loss": 0.0699, "step": 14270 }, { "epoch": 3.43, "learning_rate": 1.314450312049928e-05, "loss": 0.2831, "step": 14280 }, { "epoch": 3.43, "learning_rate": 1.3139702352376381e-05, "loss": 0.2112, "step": 14290 }, { "epoch": 3.43, "learning_rate": 1.313490158425348e-05, "loss": 0.1573, "step": 14300 }, { "epoch": 3.43, "learning_rate": 1.3130100816130581e-05, "loss": 0.2213, "step": 14310 }, { "epoch": 3.44, "learning_rate": 1.3125300048007682e-05, "loss": 0.2092, "step": 14320 }, { "epoch": 3.44, "learning_rate": 1.3120499279884781e-05, "loss": 0.2019, "step": 14330 }, { "epoch": 3.44, "learning_rate": 1.3115698511761882e-05, "loss": 0.0454, "step": 14340 }, { "epoch": 3.44, "learning_rate": 1.3110897743638983e-05, "loss": 0.3562, "step": 14350 }, { "epoch": 3.45, "learning_rate": 1.3106096975516082e-05, "loss": 0.1206, "step": 14360 }, { "epoch": 3.45, "learning_rate": 1.3101296207393183e-05, "loss": 0.2038, "step": 14370 }, { "epoch": 3.45, "learning_rate": 1.3096495439270284e-05, "loss": 0.181, "step": 14380 }, { "epoch": 3.45, "learning_rate": 1.3091694671147383e-05, "loss": 0.0628, "step": 14390 }, { "epoch": 3.46, "learning_rate": 1.3086893903024484e-05, "loss": 0.2226, "step": 14400 }, { "epoch": 3.46, "learning_rate": 1.3082093134901585e-05, "loss": 0.0373, "step": 14410 }, { "epoch": 3.46, "learning_rate": 1.3077292366778687e-05, "loss": 0.1289, "step": 14420 }, { "epoch": 3.46, "learning_rate": 1.3072491598655785e-05, "loss": 0.0771, "step": 14430 }, { "epoch": 3.47, "learning_rate": 1.3067690830532886e-05, "loss": 0.2937, "step": 14440 }, { "epoch": 3.47, "learning_rate": 1.3062890062409988e-05, "loss": 0.2052, "step": 14450 }, { "epoch": 3.47, "learning_rate": 1.3058089294287086e-05, "loss": 0.185, "step": 14460 }, { "epoch": 3.47, "learning_rate": 1.3053288526164186e-05, "loss": 0.1627, "step": 14470 }, { "epoch": 3.48, "learning_rate": 1.3048487758041289e-05, "loss": 0.0552, "step": 14480 }, { "epoch": 3.48, "learning_rate": 1.3043686989918386e-05, "loss": 0.0897, "step": 14490 }, { "epoch": 3.48, "learning_rate": 1.3038886221795489e-05, "loss": 0.2872, "step": 14500 }, { "epoch": 3.48, "learning_rate": 1.303408545367259e-05, "loss": 0.1088, "step": 14510 }, { "epoch": 3.49, "learning_rate": 1.3029284685549687e-05, "loss": 0.2288, "step": 14520 }, { "epoch": 3.49, "learning_rate": 1.302448391742679e-05, "loss": 0.0859, "step": 14530 }, { "epoch": 3.49, "learning_rate": 1.301968314930389e-05, "loss": 0.2351, "step": 14540 }, { "epoch": 3.49, "learning_rate": 1.3014882381180991e-05, "loss": 0.2432, "step": 14550 }, { "epoch": 3.49, "learning_rate": 1.301008161305809e-05, "loss": 0.193, "step": 14560 }, { "epoch": 3.5, "learning_rate": 1.3005280844935191e-05, "loss": 0.1116, "step": 14570 }, { "epoch": 3.5, "learning_rate": 1.3000480076812292e-05, "loss": 0.1057, "step": 14580 }, { "epoch": 3.5, "learning_rate": 1.2995679308689391e-05, "loss": 0.209, "step": 14590 }, { "epoch": 3.5, "learning_rate": 1.2990878540566492e-05, "loss": 0.1492, "step": 14600 }, { "epoch": 3.51, "learning_rate": 1.2986077772443593e-05, "loss": 0.1865, "step": 14610 }, { "epoch": 3.51, "learning_rate": 1.2981277004320692e-05, "loss": 0.0575, "step": 14620 }, { "epoch": 3.51, "learning_rate": 1.2976476236197793e-05, "loss": 0.226, "step": 14630 }, { "epoch": 3.51, "learning_rate": 1.2971675468074894e-05, "loss": 0.0842, "step": 14640 }, { "epoch": 3.52, "learning_rate": 1.2966874699951993e-05, "loss": 0.2408, "step": 14650 }, { "epoch": 3.52, "learning_rate": 1.2962073931829094e-05, "loss": 0.2219, "step": 14660 }, { "epoch": 3.52, "learning_rate": 1.2957273163706195e-05, "loss": 0.1683, "step": 14670 }, { "epoch": 3.52, "learning_rate": 1.2952472395583296e-05, "loss": 0.1358, "step": 14680 }, { "epoch": 3.53, "learning_rate": 1.2947671627460395e-05, "loss": 0.3367, "step": 14690 }, { "epoch": 3.53, "learning_rate": 1.2942870859337496e-05, "loss": 0.2099, "step": 14700 }, { "epoch": 3.53, "learning_rate": 1.2938070091214597e-05, "loss": 0.2868, "step": 14710 }, { "epoch": 3.53, "learning_rate": 1.2933269323091696e-05, "loss": 0.1333, "step": 14720 }, { "epoch": 3.54, "learning_rate": 1.2928468554968797e-05, "loss": 0.1113, "step": 14730 }, { "epoch": 3.54, "learning_rate": 1.2923667786845897e-05, "loss": 0.2277, "step": 14740 }, { "epoch": 3.54, "learning_rate": 1.2918867018722997e-05, "loss": 0.1579, "step": 14750 }, { "epoch": 3.54, "learning_rate": 1.2914066250600097e-05, "loss": 0.1595, "step": 14760 }, { "epoch": 3.55, "learning_rate": 1.2909265482477198e-05, "loss": 0.3039, "step": 14770 }, { "epoch": 3.55, "learning_rate": 1.2904464714354297e-05, "loss": 0.1915, "step": 14780 }, { "epoch": 3.55, "learning_rate": 1.2899663946231398e-05, "loss": 0.0972, "step": 14790 }, { "epoch": 3.55, "learning_rate": 1.2894863178108499e-05, "loss": 0.2483, "step": 14800 }, { "epoch": 3.55, "learning_rate": 1.28900624099856e-05, "loss": 0.2983, "step": 14810 }, { "epoch": 3.56, "learning_rate": 1.2885261641862699e-05, "loss": 0.1354, "step": 14820 }, { "epoch": 3.56, "learning_rate": 1.28804608737398e-05, "loss": 0.2247, "step": 14830 }, { "epoch": 3.56, "learning_rate": 1.28756601056169e-05, "loss": 0.0247, "step": 14840 }, { "epoch": 3.56, "learning_rate": 1.2870859337494e-05, "loss": 0.1584, "step": 14850 }, { "epoch": 3.57, "learning_rate": 1.28660585693711e-05, "loss": 0.1437, "step": 14860 }, { "epoch": 3.57, "learning_rate": 1.2861257801248202e-05, "loss": 0.1723, "step": 14870 }, { "epoch": 3.57, "learning_rate": 1.28564570331253e-05, "loss": 0.0568, "step": 14880 }, { "epoch": 3.57, "learning_rate": 1.2851656265002402e-05, "loss": 0.1586, "step": 14890 }, { "epoch": 3.58, "learning_rate": 1.2846855496879502e-05, "loss": 0.0733, "step": 14900 }, { "epoch": 3.58, "learning_rate": 1.2842054728756602e-05, "loss": 0.0914, "step": 14910 }, { "epoch": 3.58, "learning_rate": 1.2837253960633702e-05, "loss": 0.2732, "step": 14920 }, { "epoch": 3.58, "learning_rate": 1.2832453192510803e-05, "loss": 0.1728, "step": 14930 }, { "epoch": 3.59, "learning_rate": 1.2827652424387904e-05, "loss": 0.1559, "step": 14940 }, { "epoch": 3.59, "learning_rate": 1.2822851656265003e-05, "loss": 0.3054, "step": 14950 }, { "epoch": 3.59, "learning_rate": 1.2818050888142104e-05, "loss": 0.2016, "step": 14960 }, { "epoch": 3.59, "learning_rate": 1.2813250120019205e-05, "loss": 0.096, "step": 14970 }, { "epoch": 3.6, "learning_rate": 1.2808449351896304e-05, "loss": 0.3411, "step": 14980 }, { "epoch": 3.6, "learning_rate": 1.2803648583773405e-05, "loss": 0.1373, "step": 14990 }, { "epoch": 3.6, "learning_rate": 1.2798847815650506e-05, "loss": 0.1088, "step": 15000 }, { "epoch": 3.6, "learning_rate": 1.2794047047527605e-05, "loss": 0.0894, "step": 15010 }, { "epoch": 3.61, "learning_rate": 1.2789246279404706e-05, "loss": 0.0775, "step": 15020 }, { "epoch": 3.61, "learning_rate": 1.2784445511281807e-05, "loss": 0.2274, "step": 15030 }, { "epoch": 3.61, "learning_rate": 1.2779644743158906e-05, "loss": 0.1833, "step": 15040 }, { "epoch": 3.61, "learning_rate": 1.2774843975036007e-05, "loss": 0.0732, "step": 15050 }, { "epoch": 3.61, "learning_rate": 1.2770043206913107e-05, "loss": 0.078, "step": 15060 }, { "epoch": 3.62, "learning_rate": 1.2765242438790208e-05, "loss": 0.1856, "step": 15070 }, { "epoch": 3.62, "learning_rate": 1.2760441670667307e-05, "loss": 0.1172, "step": 15080 }, { "epoch": 3.62, "learning_rate": 1.2755640902544408e-05, "loss": 0.1687, "step": 15090 }, { "epoch": 3.62, "learning_rate": 1.2750840134421509e-05, "loss": 0.3823, "step": 15100 }, { "epoch": 3.63, "learning_rate": 1.2746039366298608e-05, "loss": 0.2649, "step": 15110 }, { "epoch": 3.63, "learning_rate": 1.2741238598175709e-05, "loss": 0.1884, "step": 15120 }, { "epoch": 3.63, "learning_rate": 1.273643783005281e-05, "loss": 0.096, "step": 15130 }, { "epoch": 3.63, "learning_rate": 1.2731637061929909e-05, "loss": 0.0961, "step": 15140 }, { "epoch": 3.64, "learning_rate": 1.272683629380701e-05, "loss": 0.0619, "step": 15150 }, { "epoch": 3.64, "learning_rate": 1.272203552568411e-05, "loss": 0.2301, "step": 15160 }, { "epoch": 3.64, "learning_rate": 1.271723475756121e-05, "loss": 0.286, "step": 15170 }, { "epoch": 3.64, "learning_rate": 1.271243398943831e-05, "loss": 0.1029, "step": 15180 }, { "epoch": 3.65, "learning_rate": 1.2707633221315412e-05, "loss": 0.1848, "step": 15190 }, { "epoch": 3.65, "learning_rate": 1.2702832453192512e-05, "loss": 0.0732, "step": 15200 }, { "epoch": 3.65, "learning_rate": 1.2698031685069612e-05, "loss": 0.1323, "step": 15210 }, { "epoch": 3.65, "learning_rate": 1.2693230916946712e-05, "loss": 0.1789, "step": 15220 }, { "epoch": 3.66, "learning_rate": 1.2688430148823813e-05, "loss": 0.1113, "step": 15230 }, { "epoch": 3.66, "learning_rate": 1.2683629380700912e-05, "loss": 0.1314, "step": 15240 }, { "epoch": 3.66, "learning_rate": 1.2678828612578013e-05, "loss": 0.0374, "step": 15250 }, { "epoch": 3.66, "learning_rate": 1.2674027844455114e-05, "loss": 0.2404, "step": 15260 }, { "epoch": 3.67, "learning_rate": 1.2669227076332213e-05, "loss": 0.2263, "step": 15270 }, { "epoch": 3.67, "learning_rate": 1.2664426308209314e-05, "loss": 0.1551, "step": 15280 }, { "epoch": 3.67, "learning_rate": 1.2659625540086415e-05, "loss": 0.104, "step": 15290 }, { "epoch": 3.67, "learning_rate": 1.2654824771963514e-05, "loss": 0.0599, "step": 15300 }, { "epoch": 3.67, "learning_rate": 1.2650024003840615e-05, "loss": 0.2928, "step": 15310 }, { "epoch": 3.68, "learning_rate": 1.2645223235717716e-05, "loss": 0.2366, "step": 15320 }, { "epoch": 3.68, "learning_rate": 1.2640422467594817e-05, "loss": 0.1708, "step": 15330 }, { "epoch": 3.68, "learning_rate": 1.2635621699471916e-05, "loss": 0.2391, "step": 15340 }, { "epoch": 3.68, "learning_rate": 1.2630820931349017e-05, "loss": 0.0755, "step": 15350 }, { "epoch": 3.69, "learning_rate": 1.2626020163226117e-05, "loss": 0.1634, "step": 15360 }, { "epoch": 3.69, "learning_rate": 1.2621219395103217e-05, "loss": 0.2716, "step": 15370 }, { "epoch": 3.69, "learning_rate": 1.2616418626980317e-05, "loss": 0.0929, "step": 15380 }, { "epoch": 3.69, "learning_rate": 1.2611617858857418e-05, "loss": 0.0948, "step": 15390 }, { "epoch": 3.7, "learning_rate": 1.2606817090734517e-05, "loss": 0.1283, "step": 15400 }, { "epoch": 3.7, "learning_rate": 1.2602016322611618e-05, "loss": 0.2531, "step": 15410 }, { "epoch": 3.7, "learning_rate": 1.2597215554488719e-05, "loss": 0.1154, "step": 15420 }, { "epoch": 3.7, "learning_rate": 1.2592414786365818e-05, "loss": 0.1397, "step": 15430 }, { "epoch": 3.71, "learning_rate": 1.2587614018242919e-05, "loss": 0.3529, "step": 15440 }, { "epoch": 3.71, "learning_rate": 1.258281325012002e-05, "loss": 0.2267, "step": 15450 }, { "epoch": 3.71, "learning_rate": 1.2578012481997122e-05, "loss": 0.1405, "step": 15460 }, { "epoch": 3.71, "learning_rate": 1.257321171387422e-05, "loss": 0.0208, "step": 15470 }, { "epoch": 3.72, "learning_rate": 1.256841094575132e-05, "loss": 0.1602, "step": 15480 }, { "epoch": 3.72, "learning_rate": 1.2563610177628423e-05, "loss": 0.2042, "step": 15490 }, { "epoch": 3.72, "learning_rate": 1.255880940950552e-05, "loss": 0.0106, "step": 15500 }, { "epoch": 3.72, "learning_rate": 1.2554008641382622e-05, "loss": 0.212, "step": 15510 }, { "epoch": 3.73, "learning_rate": 1.2549207873259724e-05, "loss": 0.1279, "step": 15520 }, { "epoch": 3.73, "learning_rate": 1.2544407105136822e-05, "loss": 0.2245, "step": 15530 }, { "epoch": 3.73, "learning_rate": 1.2539606337013922e-05, "loss": 0.1547, "step": 15540 }, { "epoch": 3.73, "learning_rate": 1.2534805568891025e-05, "loss": 0.1279, "step": 15550 }, { "epoch": 3.73, "learning_rate": 1.2530004800768122e-05, "loss": 0.1042, "step": 15560 }, { "epoch": 3.74, "learning_rate": 1.2525204032645223e-05, "loss": 0.2089, "step": 15570 }, { "epoch": 3.74, "learning_rate": 1.2520403264522326e-05, "loss": 0.2795, "step": 15580 }, { "epoch": 3.74, "learning_rate": 1.2515602496399427e-05, "loss": 0.2023, "step": 15590 }, { "epoch": 3.74, "learning_rate": 1.2510801728276524e-05, "loss": 0.0131, "step": 15600 }, { "epoch": 3.75, "learning_rate": 1.2506000960153627e-05, "loss": 0.2091, "step": 15610 }, { "epoch": 3.75, "learning_rate": 1.2501200192030728e-05, "loss": 0.1836, "step": 15620 }, { "epoch": 3.75, "learning_rate": 1.2496399423907825e-05, "loss": 0.1042, "step": 15630 }, { "epoch": 3.75, "learning_rate": 1.2491598655784927e-05, "loss": 0.1711, "step": 15640 }, { "epoch": 3.76, "learning_rate": 1.2486797887662028e-05, "loss": 0.2689, "step": 15650 }, { "epoch": 3.76, "learning_rate": 1.2481997119539126e-05, "loss": 0.0546, "step": 15660 }, { "epoch": 3.76, "learning_rate": 1.2477196351416228e-05, "loss": 0.0376, "step": 15670 }, { "epoch": 3.76, "learning_rate": 1.247239558329333e-05, "loss": 0.2063, "step": 15680 }, { "epoch": 3.77, "learning_rate": 1.2467594815170427e-05, "loss": 0.2549, "step": 15690 }, { "epoch": 3.77, "learning_rate": 1.246279404704753e-05, "loss": 0.1667, "step": 15700 }, { "epoch": 3.77, "learning_rate": 1.245799327892463e-05, "loss": 0.2155, "step": 15710 }, { "epoch": 3.77, "learning_rate": 1.2453192510801727e-05, "loss": 0.1918, "step": 15720 }, { "epoch": 3.78, "learning_rate": 1.244839174267883e-05, "loss": 0.2764, "step": 15730 }, { "epoch": 3.78, "learning_rate": 1.2443590974555931e-05, "loss": 0.2481, "step": 15740 }, { "epoch": 3.78, "learning_rate": 1.2438790206433032e-05, "loss": 0.1298, "step": 15750 }, { "epoch": 3.78, "learning_rate": 1.243398943831013e-05, "loss": 0.0848, "step": 15760 }, { "epoch": 3.79, "learning_rate": 1.2429188670187232e-05, "loss": 0.2643, "step": 15770 }, { "epoch": 3.79, "learning_rate": 1.2424387902064333e-05, "loss": 0.2249, "step": 15780 }, { "epoch": 3.79, "learning_rate": 1.2419587133941432e-05, "loss": 0.1375, "step": 15790 }, { "epoch": 3.79, "learning_rate": 1.2414786365818533e-05, "loss": 0.2448, "step": 15800 }, { "epoch": 3.8, "learning_rate": 1.2409985597695633e-05, "loss": 0.1616, "step": 15810 }, { "epoch": 3.8, "learning_rate": 1.2405184829572733e-05, "loss": 0.1173, "step": 15820 }, { "epoch": 3.8, "learning_rate": 1.2400384061449833e-05, "loss": 0.0892, "step": 15830 }, { "epoch": 3.8, "learning_rate": 1.2395583293326934e-05, "loss": 0.1786, "step": 15840 }, { "epoch": 3.8, "learning_rate": 1.2390782525204033e-05, "loss": 0.301, "step": 15850 }, { "epoch": 3.81, "learning_rate": 1.2385981757081134e-05, "loss": 0.155, "step": 15860 }, { "epoch": 3.81, "learning_rate": 1.2381180988958235e-05, "loss": 0.1148, "step": 15870 }, { "epoch": 3.81, "learning_rate": 1.2376380220835336e-05, "loss": 0.1996, "step": 15880 }, { "epoch": 3.81, "learning_rate": 1.2371579452712435e-05, "loss": 0.2966, "step": 15890 }, { "epoch": 3.82, "learning_rate": 1.2366778684589536e-05, "loss": 0.1079, "step": 15900 }, { "epoch": 3.82, "learning_rate": 1.2361977916466637e-05, "loss": 0.2542, "step": 15910 }, { "epoch": 3.82, "learning_rate": 1.2357177148343736e-05, "loss": 0.1314, "step": 15920 }, { "epoch": 3.82, "learning_rate": 1.2352376380220837e-05, "loss": 0.223, "step": 15930 }, { "epoch": 3.83, "learning_rate": 1.2347575612097938e-05, "loss": 0.1448, "step": 15940 }, { "epoch": 3.83, "learning_rate": 1.2342774843975037e-05, "loss": 0.0932, "step": 15950 }, { "epoch": 3.83, "learning_rate": 1.2337974075852138e-05, "loss": 0.092, "step": 15960 }, { "epoch": 3.83, "learning_rate": 1.2333173307729238e-05, "loss": 0.0868, "step": 15970 }, { "epoch": 3.84, "learning_rate": 1.2328372539606338e-05, "loss": 0.2071, "step": 15980 }, { "epoch": 3.84, "learning_rate": 1.2323571771483438e-05, "loss": 0.2415, "step": 15990 }, { "epoch": 3.84, "learning_rate": 1.231877100336054e-05, "loss": 0.0858, "step": 16000 }, { "epoch": 3.84, "learning_rate": 1.231397023523764e-05, "loss": 0.0158, "step": 16010 }, { "epoch": 3.85, "learning_rate": 1.230916946711474e-05, "loss": 0.2116, "step": 16020 }, { "epoch": 3.85, "learning_rate": 1.230436869899184e-05, "loss": 0.0287, "step": 16030 }, { "epoch": 3.85, "learning_rate": 1.2299567930868941e-05, "loss": 0.1323, "step": 16040 }, { "epoch": 3.85, "learning_rate": 1.229476716274604e-05, "loss": 0.0941, "step": 16050 }, { "epoch": 3.86, "learning_rate": 1.2289966394623141e-05, "loss": 0.3061, "step": 16060 }, { "epoch": 3.86, "learning_rate": 1.2285165626500242e-05, "loss": 0.0455, "step": 16070 }, { "epoch": 3.86, "learning_rate": 1.2280364858377341e-05, "loss": 0.1998, "step": 16080 }, { "epoch": 3.86, "learning_rate": 1.2275564090254442e-05, "loss": 0.4682, "step": 16090 }, { "epoch": 3.86, "learning_rate": 1.2270763322131543e-05, "loss": 0.0575, "step": 16100 }, { "epoch": 3.87, "learning_rate": 1.2265962554008642e-05, "loss": 0.0866, "step": 16110 }, { "epoch": 3.87, "learning_rate": 1.2261161785885743e-05, "loss": 0.1079, "step": 16120 }, { "epoch": 3.87, "learning_rate": 1.2256361017762843e-05, "loss": 0.1024, "step": 16130 }, { "epoch": 3.87, "learning_rate": 1.2251560249639944e-05, "loss": 0.2423, "step": 16140 }, { "epoch": 3.88, "learning_rate": 1.2246759481517043e-05, "loss": 0.1786, "step": 16150 }, { "epoch": 3.88, "learning_rate": 1.2241958713394144e-05, "loss": 0.1133, "step": 16160 }, { "epoch": 3.88, "learning_rate": 1.2237157945271245e-05, "loss": 0.1581, "step": 16170 }, { "epoch": 3.88, "learning_rate": 1.2232357177148344e-05, "loss": 0.1095, "step": 16180 }, { "epoch": 3.89, "learning_rate": 1.2227556409025445e-05, "loss": 0.3906, "step": 16190 }, { "epoch": 3.89, "learning_rate": 1.2222755640902546e-05, "loss": 0.415, "step": 16200 }, { "epoch": 3.89, "learning_rate": 1.2217954872779645e-05, "loss": 0.053, "step": 16210 }, { "epoch": 3.89, "learning_rate": 1.2213154104656746e-05, "loss": 0.2106, "step": 16220 }, { "epoch": 3.9, "learning_rate": 1.2208353336533847e-05, "loss": 0.1024, "step": 16230 }, { "epoch": 3.9, "learning_rate": 1.2203552568410946e-05, "loss": 0.1113, "step": 16240 }, { "epoch": 3.9, "learning_rate": 1.2198751800288047e-05, "loss": 0.2038, "step": 16250 }, { "epoch": 3.9, "learning_rate": 1.2193951032165148e-05, "loss": 0.1252, "step": 16260 }, { "epoch": 3.91, "learning_rate": 1.2189150264042248e-05, "loss": 0.1538, "step": 16270 }, { "epoch": 3.91, "learning_rate": 1.2184349495919348e-05, "loss": 0.1372, "step": 16280 }, { "epoch": 3.91, "learning_rate": 1.2179548727796448e-05, "loss": 0.1523, "step": 16290 }, { "epoch": 3.91, "learning_rate": 1.217474795967355e-05, "loss": 0.1139, "step": 16300 }, { "epoch": 3.92, "learning_rate": 1.2169947191550648e-05, "loss": 0.1087, "step": 16310 }, { "epoch": 3.92, "learning_rate": 1.216514642342775e-05, "loss": 0.1898, "step": 16320 }, { "epoch": 3.92, "learning_rate": 1.216034565530485e-05, "loss": 0.1214, "step": 16330 }, { "epoch": 3.92, "learning_rate": 1.215554488718195e-05, "loss": 0.2339, "step": 16340 }, { "epoch": 3.92, "learning_rate": 1.215074411905905e-05, "loss": 0.203, "step": 16350 }, { "epoch": 3.93, "learning_rate": 1.2145943350936151e-05, "loss": 0.2726, "step": 16360 }, { "epoch": 3.93, "learning_rate": 1.214114258281325e-05, "loss": 0.1904, "step": 16370 }, { "epoch": 3.93, "learning_rate": 1.2136341814690351e-05, "loss": 0.1762, "step": 16380 }, { "epoch": 3.93, "learning_rate": 1.2131541046567452e-05, "loss": 0.2288, "step": 16390 }, { "epoch": 3.94, "learning_rate": 1.2126740278444553e-05, "loss": 0.0262, "step": 16400 }, { "epoch": 3.94, "learning_rate": 1.2121939510321652e-05, "loss": 0.1427, "step": 16410 }, { "epoch": 3.94, "learning_rate": 1.2117138742198753e-05, "loss": 0.124, "step": 16420 }, { "epoch": 3.94, "learning_rate": 1.2112337974075853e-05, "loss": 0.129, "step": 16430 }, { "epoch": 3.95, "learning_rate": 1.2107537205952953e-05, "loss": 0.1269, "step": 16440 }, { "epoch": 3.95, "learning_rate": 1.2102736437830053e-05, "loss": 0.12, "step": 16450 }, { "epoch": 3.95, "learning_rate": 1.2097935669707154e-05, "loss": 0.1905, "step": 16460 }, { "epoch": 3.95, "learning_rate": 1.2093134901584253e-05, "loss": 0.1511, "step": 16470 }, { "epoch": 3.96, "learning_rate": 1.2088334133461354e-05, "loss": 0.112, "step": 16480 }, { "epoch": 3.96, "learning_rate": 1.2083533365338455e-05, "loss": 0.1972, "step": 16490 }, { "epoch": 3.96, "learning_rate": 1.2078732597215554e-05, "loss": 0.1213, "step": 16500 }, { "epoch": 3.96, "learning_rate": 1.2073931829092655e-05, "loss": 0.2227, "step": 16510 }, { "epoch": 3.97, "learning_rate": 1.2069131060969756e-05, "loss": 0.1679, "step": 16520 }, { "epoch": 3.97, "learning_rate": 1.2064330292846857e-05, "loss": 0.2295, "step": 16530 }, { "epoch": 3.97, "learning_rate": 1.2059529524723956e-05, "loss": 0.2329, "step": 16540 }, { "epoch": 3.97, "learning_rate": 1.2054728756601057e-05, "loss": 0.0916, "step": 16550 }, { "epoch": 3.98, "learning_rate": 1.2049927988478158e-05, "loss": 0.0707, "step": 16560 }, { "epoch": 3.98, "learning_rate": 1.2045127220355257e-05, "loss": 0.1396, "step": 16570 }, { "epoch": 3.98, "learning_rate": 1.2040326452232358e-05, "loss": 0.2439, "step": 16580 }, { "epoch": 3.98, "learning_rate": 1.2035525684109458e-05, "loss": 0.0556, "step": 16590 }, { "epoch": 3.98, "learning_rate": 1.2030724915986558e-05, "loss": 0.1437, "step": 16600 }, { "epoch": 3.99, "learning_rate": 1.2025924147863658e-05, "loss": 0.1124, "step": 16610 }, { "epoch": 3.99, "learning_rate": 1.202112337974076e-05, "loss": 0.1308, "step": 16620 }, { "epoch": 3.99, "learning_rate": 1.2016322611617858e-05, "loss": 0.3201, "step": 16630 }, { "epoch": 3.99, "learning_rate": 1.201152184349496e-05, "loss": 0.1035, "step": 16640 }, { "epoch": 4.0, "learning_rate": 1.200672107537206e-05, "loss": 0.3391, "step": 16650 }, { "epoch": 4.0, "learning_rate": 1.2001920307249163e-05, "loss": 0.0953, "step": 16660 }, { "epoch": 4.0, "eval_accuracy": 0.9982998979938796, "eval_loss": 0.01134026050567627, "eval_runtime": 514.5133, "eval_samples_per_second": 11.432, "eval_steps_per_second": 1.43, "step": 16664 }, { "epoch": 4.0, "learning_rate": 1.199711953912626e-05, "loss": 0.1111, "step": 16670 }, { "epoch": 4.0, "learning_rate": 1.1992318771003361e-05, "loss": 0.0915, "step": 16680 }, { "epoch": 4.01, "learning_rate": 1.1987518002880464e-05, "loss": 0.1586, "step": 16690 }, { "epoch": 4.01, "learning_rate": 1.1982717234757561e-05, "loss": 0.3341, "step": 16700 }, { "epoch": 4.01, "learning_rate": 1.1977916466634662e-05, "loss": 0.103, "step": 16710 }, { "epoch": 4.01, "learning_rate": 1.1973115698511764e-05, "loss": 0.0366, "step": 16720 }, { "epoch": 4.02, "learning_rate": 1.1968314930388862e-05, "loss": 0.1628, "step": 16730 }, { "epoch": 4.02, "learning_rate": 1.1963514162265963e-05, "loss": 0.1286, "step": 16740 }, { "epoch": 4.02, "learning_rate": 1.1958713394143065e-05, "loss": 0.2569, "step": 16750 }, { "epoch": 4.02, "learning_rate": 1.1953912626020163e-05, "loss": 0.1188, "step": 16760 }, { "epoch": 4.03, "learning_rate": 1.1949111857897263e-05, "loss": 0.2489, "step": 16770 }, { "epoch": 4.03, "learning_rate": 1.1944311089774366e-05, "loss": 0.1957, "step": 16780 }, { "epoch": 4.03, "learning_rate": 1.1939510321651467e-05, "loss": 0.1348, "step": 16790 }, { "epoch": 4.03, "learning_rate": 1.1934709553528564e-05, "loss": 0.3714, "step": 16800 }, { "epoch": 4.04, "learning_rate": 1.1929908785405667e-05, "loss": 0.1264, "step": 16810 }, { "epoch": 4.04, "learning_rate": 1.1925108017282768e-05, "loss": 0.0909, "step": 16820 }, { "epoch": 4.04, "learning_rate": 1.1920307249159865e-05, "loss": 0.1203, "step": 16830 }, { "epoch": 4.04, "learning_rate": 1.1915506481036968e-05, "loss": 0.1898, "step": 16840 }, { "epoch": 4.04, "learning_rate": 1.1910705712914069e-05, "loss": 0.0655, "step": 16850 }, { "epoch": 4.05, "learning_rate": 1.1905904944791166e-05, "loss": 0.0952, "step": 16860 }, { "epoch": 4.05, "learning_rate": 1.1901104176668269e-05, "loss": 0.1401, "step": 16870 }, { "epoch": 4.05, "learning_rate": 1.189630340854537e-05, "loss": 0.1629, "step": 16880 }, { "epoch": 4.05, "learning_rate": 1.1891502640422467e-05, "loss": 0.1457, "step": 16890 }, { "epoch": 4.06, "learning_rate": 1.188670187229957e-05, "loss": 0.0459, "step": 16900 }, { "epoch": 4.06, "learning_rate": 1.188190110417667e-05, "loss": 0.1654, "step": 16910 }, { "epoch": 4.06, "learning_rate": 1.1877100336053771e-05, "loss": 0.0484, "step": 16920 }, { "epoch": 4.06, "learning_rate": 1.187229956793087e-05, "loss": 0.1428, "step": 16930 }, { "epoch": 4.07, "learning_rate": 1.1867498799807971e-05, "loss": 0.1877, "step": 16940 }, { "epoch": 4.07, "learning_rate": 1.1862698031685072e-05, "loss": 0.1808, "step": 16950 }, { "epoch": 4.07, "learning_rate": 1.1857897263562171e-05, "loss": 0.1693, "step": 16960 }, { "epoch": 4.07, "learning_rate": 1.1853096495439272e-05, "loss": 0.0924, "step": 16970 }, { "epoch": 4.08, "learning_rate": 1.1848295727316373e-05, "loss": 0.3183, "step": 16980 }, { "epoch": 4.08, "learning_rate": 1.1843494959193472e-05, "loss": 0.1466, "step": 16990 }, { "epoch": 4.08, "learning_rate": 1.1838694191070573e-05, "loss": 0.0974, "step": 17000 }, { "epoch": 4.08, "learning_rate": 1.1833893422947674e-05, "loss": 0.1933, "step": 17010 }, { "epoch": 4.09, "learning_rate": 1.1829092654824773e-05, "loss": 0.1988, "step": 17020 }, { "epoch": 4.09, "learning_rate": 1.1824291886701874e-05, "loss": 0.3429, "step": 17030 }, { "epoch": 4.09, "learning_rate": 1.1819491118578974e-05, "loss": 0.1169, "step": 17040 }, { "epoch": 4.09, "learning_rate": 1.1814690350456075e-05, "loss": 0.1339, "step": 17050 }, { "epoch": 4.1, "learning_rate": 1.1809889582333174e-05, "loss": 0.1416, "step": 17060 }, { "epoch": 4.1, "learning_rate": 1.1805088814210275e-05, "loss": 0.1262, "step": 17070 }, { "epoch": 4.1, "learning_rate": 1.1800288046087376e-05, "loss": 0.1731, "step": 17080 }, { "epoch": 4.1, "learning_rate": 1.1795487277964475e-05, "loss": 0.057, "step": 17090 }, { "epoch": 4.1, "learning_rate": 1.1790686509841576e-05, "loss": 0.1439, "step": 17100 }, { "epoch": 4.11, "learning_rate": 1.1785885741718677e-05, "loss": 0.1839, "step": 17110 }, { "epoch": 4.11, "learning_rate": 1.1781084973595776e-05, "loss": 0.1761, "step": 17120 }, { "epoch": 4.11, "learning_rate": 1.1776284205472877e-05, "loss": 0.0387, "step": 17130 }, { "epoch": 4.11, "learning_rate": 1.1771483437349978e-05, "loss": 0.0616, "step": 17140 }, { "epoch": 4.12, "learning_rate": 1.1766682669227077e-05, "loss": 0.3693, "step": 17150 }, { "epoch": 4.12, "learning_rate": 1.1761881901104178e-05, "loss": 0.2378, "step": 17160 }, { "epoch": 4.12, "learning_rate": 1.1757081132981279e-05, "loss": 0.2855, "step": 17170 }, { "epoch": 4.12, "learning_rate": 1.1752280364858378e-05, "loss": 0.1147, "step": 17180 }, { "epoch": 4.13, "learning_rate": 1.1747479596735479e-05, "loss": 0.0328, "step": 17190 }, { "epoch": 4.13, "learning_rate": 1.174267882861258e-05, "loss": 0.1441, "step": 17200 }, { "epoch": 4.13, "learning_rate": 1.173787806048968e-05, "loss": 0.0887, "step": 17210 }, { "epoch": 4.13, "learning_rate": 1.173307729236678e-05, "loss": 0.1302, "step": 17220 }, { "epoch": 4.14, "learning_rate": 1.172827652424388e-05, "loss": 0.2209, "step": 17230 }, { "epoch": 4.14, "learning_rate": 1.1723475756120981e-05, "loss": 0.1899, "step": 17240 }, { "epoch": 4.14, "learning_rate": 1.171867498799808e-05, "loss": 0.1993, "step": 17250 }, { "epoch": 4.14, "learning_rate": 1.1713874219875181e-05, "loss": 0.2106, "step": 17260 }, { "epoch": 4.15, "learning_rate": 1.1709073451752282e-05, "loss": 0.2002, "step": 17270 }, { "epoch": 4.15, "learning_rate": 1.1704272683629381e-05, "loss": 0.1638, "step": 17280 }, { "epoch": 4.15, "learning_rate": 1.1699471915506482e-05, "loss": 0.2725, "step": 17290 }, { "epoch": 4.15, "learning_rate": 1.1694671147383583e-05, "loss": 0.2701, "step": 17300 }, { "epoch": 4.16, "learning_rate": 1.1689870379260682e-05, "loss": 0.2262, "step": 17310 }, { "epoch": 4.16, "learning_rate": 1.1685069611137783e-05, "loss": 0.1649, "step": 17320 }, { "epoch": 4.16, "learning_rate": 1.1680268843014884e-05, "loss": 0.092, "step": 17330 }, { "epoch": 4.16, "learning_rate": 1.1675468074891984e-05, "loss": 0.1584, "step": 17340 }, { "epoch": 4.16, "learning_rate": 1.1670667306769084e-05, "loss": 0.1937, "step": 17350 }, { "epoch": 4.17, "learning_rate": 1.1665866538646184e-05, "loss": 0.1445, "step": 17360 }, { "epoch": 4.17, "learning_rate": 1.1661065770523285e-05, "loss": 0.1609, "step": 17370 }, { "epoch": 4.17, "learning_rate": 1.1656265002400384e-05, "loss": 0.1111, "step": 17380 }, { "epoch": 4.17, "learning_rate": 1.1651464234277485e-05, "loss": 0.0643, "step": 17390 }, { "epoch": 4.18, "learning_rate": 1.1646663466154586e-05, "loss": 0.1454, "step": 17400 }, { "epoch": 4.18, "learning_rate": 1.1641862698031685e-05, "loss": 0.1178, "step": 17410 }, { "epoch": 4.18, "learning_rate": 1.1637061929908786e-05, "loss": 0.0629, "step": 17420 }, { "epoch": 4.18, "learning_rate": 1.1632261161785887e-05, "loss": 0.0369, "step": 17430 }, { "epoch": 4.19, "learning_rate": 1.1627460393662986e-05, "loss": 0.0455, "step": 17440 }, { "epoch": 4.19, "learning_rate": 1.1622659625540087e-05, "loss": 0.107, "step": 17450 }, { "epoch": 4.19, "learning_rate": 1.1617858857417188e-05, "loss": 0.148, "step": 17460 }, { "epoch": 4.19, "learning_rate": 1.1613058089294289e-05, "loss": 0.1942, "step": 17470 }, { "epoch": 4.2, "learning_rate": 1.1608257321171388e-05, "loss": 0.1286, "step": 17480 }, { "epoch": 4.2, "learning_rate": 1.1603456553048489e-05, "loss": 0.13, "step": 17490 }, { "epoch": 4.2, "learning_rate": 1.159865578492559e-05, "loss": 0.1229, "step": 17500 }, { "epoch": 4.2, "learning_rate": 1.1593855016802689e-05, "loss": 0.1719, "step": 17510 }, { "epoch": 4.21, "learning_rate": 1.158905424867979e-05, "loss": 0.1498, "step": 17520 }, { "epoch": 4.21, "learning_rate": 1.158425348055689e-05, "loss": 0.1402, "step": 17530 }, { "epoch": 4.21, "learning_rate": 1.157945271243399e-05, "loss": 0.3317, "step": 17540 }, { "epoch": 4.21, "learning_rate": 1.157465194431109e-05, "loss": 0.1315, "step": 17550 }, { "epoch": 4.22, "learning_rate": 1.1569851176188191e-05, "loss": 0.1067, "step": 17560 }, { "epoch": 4.22, "learning_rate": 1.156505040806529e-05, "loss": 0.1837, "step": 17570 }, { "epoch": 4.22, "learning_rate": 1.1560249639942391e-05, "loss": 0.1995, "step": 17580 }, { "epoch": 4.22, "learning_rate": 1.1555448871819492e-05, "loss": 0.2198, "step": 17590 }, { "epoch": 4.22, "learning_rate": 1.1550648103696593e-05, "loss": 0.1542, "step": 17600 }, { "epoch": 4.23, "learning_rate": 1.1545847335573692e-05, "loss": 0.0782, "step": 17610 }, { "epoch": 4.23, "learning_rate": 1.1541046567450793e-05, "loss": 0.2669, "step": 17620 }, { "epoch": 4.23, "learning_rate": 1.1536245799327894e-05, "loss": 0.0132, "step": 17630 }, { "epoch": 4.23, "learning_rate": 1.1531445031204993e-05, "loss": 0.2486, "step": 17640 }, { "epoch": 4.24, "learning_rate": 1.1526644263082094e-05, "loss": 0.1794, "step": 17650 }, { "epoch": 4.24, "learning_rate": 1.1521843494959194e-05, "loss": 0.0648, "step": 17660 }, { "epoch": 4.24, "learning_rate": 1.1517042726836294e-05, "loss": 0.2922, "step": 17670 }, { "epoch": 4.24, "learning_rate": 1.1512241958713394e-05, "loss": 0.0661, "step": 17680 }, { "epoch": 4.25, "learning_rate": 1.1507441190590495e-05, "loss": 0.1633, "step": 17690 }, { "epoch": 4.25, "learning_rate": 1.1502640422467594e-05, "loss": 0.061, "step": 17700 }, { "epoch": 4.25, "learning_rate": 1.1497839654344695e-05, "loss": 0.1788, "step": 17710 }, { "epoch": 4.25, "learning_rate": 1.1493038886221796e-05, "loss": 0.1882, "step": 17720 }, { "epoch": 4.26, "learning_rate": 1.1488238118098897e-05, "loss": 0.3, "step": 17730 }, { "epoch": 4.26, "learning_rate": 1.1483437349975996e-05, "loss": 0.3165, "step": 17740 }, { "epoch": 4.26, "learning_rate": 1.1478636581853097e-05, "loss": 0.0919, "step": 17750 }, { "epoch": 4.26, "learning_rate": 1.1473835813730198e-05, "loss": 0.1332, "step": 17760 }, { "epoch": 4.27, "learning_rate": 1.1469035045607297e-05, "loss": 0.1854, "step": 17770 }, { "epoch": 4.27, "learning_rate": 1.1464234277484398e-05, "loss": 0.053, "step": 17780 }, { "epoch": 4.27, "learning_rate": 1.1459433509361499e-05, "loss": 0.2584, "step": 17790 }, { "epoch": 4.27, "learning_rate": 1.1454632741238598e-05, "loss": 0.1938, "step": 17800 }, { "epoch": 4.28, "learning_rate": 1.1449831973115699e-05, "loss": 0.1523, "step": 17810 }, { "epoch": 4.28, "learning_rate": 1.14450312049928e-05, "loss": 0.0308, "step": 17820 }, { "epoch": 4.28, "learning_rate": 1.1440230436869899e-05, "loss": 0.2052, "step": 17830 }, { "epoch": 4.28, "learning_rate": 1.1435429668747e-05, "loss": 0.2572, "step": 17840 }, { "epoch": 4.28, "learning_rate": 1.14306289006241e-05, "loss": 0.0975, "step": 17850 }, { "epoch": 4.29, "learning_rate": 1.1425828132501203e-05, "loss": 0.2768, "step": 17860 }, { "epoch": 4.29, "learning_rate": 1.14210273643783e-05, "loss": 0.2362, "step": 17870 }, { "epoch": 4.29, "learning_rate": 1.1416226596255401e-05, "loss": 0.2411, "step": 17880 }, { "epoch": 4.29, "learning_rate": 1.1411425828132504e-05, "loss": 0.1508, "step": 17890 }, { "epoch": 4.3, "learning_rate": 1.1406625060009601e-05, "loss": 0.1502, "step": 17900 }, { "epoch": 4.3, "learning_rate": 1.1401824291886702e-05, "loss": 0.0543, "step": 17910 }, { "epoch": 4.3, "learning_rate": 1.1397023523763805e-05, "loss": 0.0654, "step": 17920 }, { "epoch": 4.3, "learning_rate": 1.1392222755640902e-05, "loss": 0.1301, "step": 17930 }, { "epoch": 4.31, "learning_rate": 1.1387421987518003e-05, "loss": 0.2797, "step": 17940 }, { "epoch": 4.31, "learning_rate": 1.1382621219395105e-05, "loss": 0.2444, "step": 17950 }, { "epoch": 4.31, "learning_rate": 1.1377820451272203e-05, "loss": 0.0555, "step": 17960 }, { "epoch": 4.31, "learning_rate": 1.1373019683149304e-05, "loss": 0.39, "step": 17970 }, { "epoch": 4.32, "learning_rate": 1.1368218915026406e-05, "loss": 0.0872, "step": 17980 }, { "epoch": 4.32, "learning_rate": 1.1363418146903507e-05, "loss": 0.1286, "step": 17990 }, { "epoch": 4.32, "learning_rate": 1.1358617378780605e-05, "loss": 0.2268, "step": 18000 }, { "epoch": 4.32, "learning_rate": 1.1353816610657707e-05, "loss": 0.1159, "step": 18010 }, { "epoch": 4.33, "learning_rate": 1.1349015842534808e-05, "loss": 0.2392, "step": 18020 }, { "epoch": 4.33, "learning_rate": 1.1344215074411905e-05, "loss": 0.1014, "step": 18030 }, { "epoch": 4.33, "learning_rate": 1.1339414306289008e-05, "loss": 0.0968, "step": 18040 }, { "epoch": 4.33, "learning_rate": 1.1334613538166109e-05, "loss": 0.156, "step": 18050 }, { "epoch": 4.34, "learning_rate": 1.1329812770043206e-05, "loss": 0.083, "step": 18060 }, { "epoch": 4.34, "learning_rate": 1.1325012001920309e-05, "loss": 0.1678, "step": 18070 }, { "epoch": 4.34, "learning_rate": 1.132021123379741e-05, "loss": 0.1471, "step": 18080 }, { "epoch": 4.34, "learning_rate": 1.1315410465674507e-05, "loss": 0.2576, "step": 18090 }, { "epoch": 4.34, "learning_rate": 1.131060969755161e-05, "loss": 0.1527, "step": 18100 }, { "epoch": 4.35, "learning_rate": 1.130580892942871e-05, "loss": 0.0976, "step": 18110 }, { "epoch": 4.35, "learning_rate": 1.1301008161305811e-05, "loss": 0.1236, "step": 18120 }, { "epoch": 4.35, "learning_rate": 1.129620739318291e-05, "loss": 0.1652, "step": 18130 }, { "epoch": 4.35, "learning_rate": 1.1291406625060011e-05, "loss": 0.2625, "step": 18140 }, { "epoch": 4.36, "learning_rate": 1.1286605856937112e-05, "loss": 0.1496, "step": 18150 }, { "epoch": 4.36, "learning_rate": 1.1281805088814211e-05, "loss": 0.2683, "step": 18160 }, { "epoch": 4.36, "learning_rate": 1.1277004320691312e-05, "loss": 0.0809, "step": 18170 }, { "epoch": 4.36, "learning_rate": 1.1272203552568413e-05, "loss": 0.1193, "step": 18180 }, { "epoch": 4.37, "learning_rate": 1.1267402784445512e-05, "loss": 0.0153, "step": 18190 }, { "epoch": 4.37, "learning_rate": 1.1262602016322613e-05, "loss": 0.3455, "step": 18200 }, { "epoch": 4.37, "learning_rate": 1.1257801248199714e-05, "loss": 0.1779, "step": 18210 }, { "epoch": 4.37, "learning_rate": 1.1253000480076813e-05, "loss": 0.0654, "step": 18220 }, { "epoch": 4.38, "learning_rate": 1.1248199711953914e-05, "loss": 0.1511, "step": 18230 }, { "epoch": 4.38, "learning_rate": 1.1243398943831015e-05, "loss": 0.0947, "step": 18240 }, { "epoch": 4.38, "learning_rate": 1.1238598175708115e-05, "loss": 0.0489, "step": 18250 }, { "epoch": 4.38, "learning_rate": 1.1233797407585215e-05, "loss": 0.2878, "step": 18260 }, { "epoch": 4.39, "learning_rate": 1.1228996639462315e-05, "loss": 0.1612, "step": 18270 }, { "epoch": 4.39, "learning_rate": 1.1224195871339416e-05, "loss": 0.1771, "step": 18280 }, { "epoch": 4.39, "learning_rate": 1.1219395103216515e-05, "loss": 0.0957, "step": 18290 }, { "epoch": 4.39, "learning_rate": 1.1214594335093616e-05, "loss": 0.0988, "step": 18300 }, { "epoch": 4.4, "learning_rate": 1.1209793566970717e-05, "loss": 0.0742, "step": 18310 }, { "epoch": 4.4, "learning_rate": 1.1204992798847816e-05, "loss": 0.1248, "step": 18320 }, { "epoch": 4.4, "learning_rate": 1.1200192030724917e-05, "loss": 0.2212, "step": 18330 }, { "epoch": 4.4, "learning_rate": 1.1195391262602018e-05, "loss": 0.1609, "step": 18340 }, { "epoch": 4.4, "learning_rate": 1.1190590494479117e-05, "loss": 0.034, "step": 18350 }, { "epoch": 4.41, "learning_rate": 1.1185789726356218e-05, "loss": 0.2764, "step": 18360 }, { "epoch": 4.41, "learning_rate": 1.1180988958233319e-05, "loss": 0.0646, "step": 18370 }, { "epoch": 4.41, "learning_rate": 1.117618819011042e-05, "loss": 0.3224, "step": 18380 }, { "epoch": 4.41, "learning_rate": 1.1171387421987519e-05, "loss": 0.3037, "step": 18390 }, { "epoch": 4.42, "learning_rate": 1.116658665386462e-05, "loss": 0.1136, "step": 18400 }, { "epoch": 4.42, "learning_rate": 1.116178588574172e-05, "loss": 0.1244, "step": 18410 }, { "epoch": 4.42, "learning_rate": 1.115698511761882e-05, "loss": 0.1034, "step": 18420 }, { "epoch": 4.42, "learning_rate": 1.115218434949592e-05, "loss": 0.1198, "step": 18430 }, { "epoch": 4.43, "learning_rate": 1.1147383581373021e-05, "loss": 0.2697, "step": 18440 }, { "epoch": 4.43, "learning_rate": 1.114258281325012e-05, "loss": 0.2753, "step": 18450 }, { "epoch": 4.43, "learning_rate": 1.1137782045127221e-05, "loss": 0.0837, "step": 18460 }, { "epoch": 4.43, "learning_rate": 1.1132981277004322e-05, "loss": 0.2019, "step": 18470 }, { "epoch": 4.44, "learning_rate": 1.1128180508881421e-05, "loss": 0.2953, "step": 18480 }, { "epoch": 4.44, "learning_rate": 1.1123379740758522e-05, "loss": 0.157, "step": 18490 }, { "epoch": 4.44, "learning_rate": 1.1118578972635623e-05, "loss": 0.0641, "step": 18500 }, { "epoch": 4.44, "learning_rate": 1.1113778204512724e-05, "loss": 0.1297, "step": 18510 }, { "epoch": 4.45, "learning_rate": 1.1108977436389823e-05, "loss": 0.1481, "step": 18520 }, { "epoch": 4.45, "learning_rate": 1.1104176668266924e-05, "loss": 0.0494, "step": 18530 }, { "epoch": 4.45, "learning_rate": 1.1099375900144025e-05, "loss": 0.086, "step": 18540 }, { "epoch": 4.45, "learning_rate": 1.1094575132021124e-05, "loss": 0.2122, "step": 18550 }, { "epoch": 4.46, "learning_rate": 1.1089774363898225e-05, "loss": 0.091, "step": 18560 }, { "epoch": 4.46, "learning_rate": 1.1084973595775325e-05, "loss": 0.1277, "step": 18570 }, { "epoch": 4.46, "learning_rate": 1.1080172827652425e-05, "loss": 0.0822, "step": 18580 }, { "epoch": 4.46, "learning_rate": 1.1075372059529525e-05, "loss": 0.1275, "step": 18590 }, { "epoch": 4.46, "learning_rate": 1.1070571291406626e-05, "loss": 0.1348, "step": 18600 }, { "epoch": 4.47, "learning_rate": 1.1065770523283725e-05, "loss": 0.1216, "step": 18610 }, { "epoch": 4.47, "learning_rate": 1.1060969755160826e-05, "loss": 0.2042, "step": 18620 }, { "epoch": 4.47, "learning_rate": 1.1056168987037927e-05, "loss": 0.3463, "step": 18630 }, { "epoch": 4.47, "learning_rate": 1.1051368218915026e-05, "loss": 0.065, "step": 18640 }, { "epoch": 4.48, "learning_rate": 1.1046567450792127e-05, "loss": 0.2147, "step": 18650 }, { "epoch": 4.48, "learning_rate": 1.1041766682669228e-05, "loss": 0.0212, "step": 18660 }, { "epoch": 4.48, "learning_rate": 1.1036965914546329e-05, "loss": 0.1466, "step": 18670 }, { "epoch": 4.48, "learning_rate": 1.1032165146423428e-05, "loss": 0.4545, "step": 18680 }, { "epoch": 4.49, "learning_rate": 1.1027364378300529e-05, "loss": 0.2321, "step": 18690 }, { "epoch": 4.49, "learning_rate": 1.102256361017763e-05, "loss": 0.1884, "step": 18700 }, { "epoch": 4.49, "learning_rate": 1.1017762842054729e-05, "loss": 0.2749, "step": 18710 }, { "epoch": 4.49, "learning_rate": 1.101296207393183e-05, "loss": 0.1542, "step": 18720 }, { "epoch": 4.5, "learning_rate": 1.100816130580893e-05, "loss": 0.1061, "step": 18730 }, { "epoch": 4.5, "learning_rate": 1.100336053768603e-05, "loss": 0.0758, "step": 18740 }, { "epoch": 4.5, "learning_rate": 1.099855976956313e-05, "loss": 0.1873, "step": 18750 }, { "epoch": 4.5, "learning_rate": 1.0993759001440231e-05, "loss": 0.0921, "step": 18760 }, { "epoch": 4.51, "learning_rate": 1.098895823331733e-05, "loss": 0.1542, "step": 18770 }, { "epoch": 4.51, "learning_rate": 1.0984157465194431e-05, "loss": 0.1391, "step": 18780 }, { "epoch": 4.51, "learning_rate": 1.0979356697071532e-05, "loss": 0.1835, "step": 18790 }, { "epoch": 4.51, "learning_rate": 1.0974555928948633e-05, "loss": 0.1821, "step": 18800 }, { "epoch": 4.52, "learning_rate": 1.0969755160825732e-05, "loss": 0.1109, "step": 18810 }, { "epoch": 4.52, "learning_rate": 1.0964954392702833e-05, "loss": 0.0555, "step": 18820 }, { "epoch": 4.52, "learning_rate": 1.0960153624579934e-05, "loss": 0.2581, "step": 18830 }, { "epoch": 4.52, "learning_rate": 1.0955352856457033e-05, "loss": 0.2598, "step": 18840 }, { "epoch": 4.52, "learning_rate": 1.0950552088334134e-05, "loss": 0.203, "step": 18850 }, { "epoch": 4.53, "learning_rate": 1.0945751320211235e-05, "loss": 0.1052, "step": 18860 }, { "epoch": 4.53, "learning_rate": 1.0940950552088334e-05, "loss": 0.0144, "step": 18870 }, { "epoch": 4.53, "learning_rate": 1.0936149783965435e-05, "loss": 0.1813, "step": 18880 }, { "epoch": 4.53, "learning_rate": 1.0931349015842536e-05, "loss": 0.1861, "step": 18890 }, { "epoch": 4.54, "learning_rate": 1.0926548247719635e-05, "loss": 0.1467, "step": 18900 }, { "epoch": 4.54, "learning_rate": 1.0921747479596735e-05, "loss": 0.2253, "step": 18910 }, { "epoch": 4.54, "learning_rate": 1.0916946711473836e-05, "loss": 0.1522, "step": 18920 }, { "epoch": 4.54, "learning_rate": 1.0912145943350937e-05, "loss": 0.1577, "step": 18930 }, { "epoch": 4.55, "learning_rate": 1.0907345175228036e-05, "loss": 0.1278, "step": 18940 }, { "epoch": 4.55, "learning_rate": 1.0902544407105137e-05, "loss": 0.1783, "step": 18950 }, { "epoch": 4.55, "learning_rate": 1.0897743638982238e-05, "loss": 0.0888, "step": 18960 }, { "epoch": 4.55, "learning_rate": 1.0892942870859337e-05, "loss": 0.19, "step": 18970 }, { "epoch": 4.56, "learning_rate": 1.0888142102736438e-05, "loss": 0.166, "step": 18980 }, { "epoch": 4.56, "learning_rate": 1.0883341334613539e-05, "loss": 0.3331, "step": 18990 }, { "epoch": 4.56, "learning_rate": 1.0878540566490638e-05, "loss": 0.0942, "step": 19000 }, { "epoch": 4.56, "learning_rate": 1.0873739798367739e-05, "loss": 0.2903, "step": 19010 }, { "epoch": 4.57, "learning_rate": 1.0868939030244841e-05, "loss": 0.1409, "step": 19020 }, { "epoch": 4.57, "learning_rate": 1.0864138262121939e-05, "loss": 0.0761, "step": 19030 }, { "epoch": 4.57, "learning_rate": 1.085933749399904e-05, "loss": 0.1623, "step": 19040 }, { "epoch": 4.57, "learning_rate": 1.0854536725876142e-05, "loss": 0.1639, "step": 19050 }, { "epoch": 4.58, "learning_rate": 1.0849735957753243e-05, "loss": 0.1958, "step": 19060 }, { "epoch": 4.58, "learning_rate": 1.084493518963034e-05, "loss": 0.0872, "step": 19070 }, { "epoch": 4.58, "learning_rate": 1.0840134421507443e-05, "loss": 0.2083, "step": 19080 }, { "epoch": 4.58, "learning_rate": 1.0835333653384544e-05, "loss": 0.1247, "step": 19090 }, { "epoch": 4.58, "learning_rate": 1.0830532885261641e-05, "loss": 0.0674, "step": 19100 }, { "epoch": 4.59, "learning_rate": 1.0825732117138744e-05, "loss": 0.0053, "step": 19110 }, { "epoch": 4.59, "learning_rate": 1.0820931349015845e-05, "loss": 0.1899, "step": 19120 }, { "epoch": 4.59, "learning_rate": 1.0816130580892942e-05, "loss": 0.1545, "step": 19130 }, { "epoch": 4.59, "learning_rate": 1.0811329812770045e-05, "loss": 0.1403, "step": 19140 }, { "epoch": 4.6, "learning_rate": 1.0806529044647146e-05, "loss": 0.2222, "step": 19150 }, { "epoch": 4.6, "learning_rate": 1.0801728276524243e-05, "loss": 0.3289, "step": 19160 }, { "epoch": 4.6, "learning_rate": 1.0796927508401346e-05, "loss": 0.1257, "step": 19170 }, { "epoch": 4.6, "learning_rate": 1.0792126740278446e-05, "loss": 0.0612, "step": 19180 }, { "epoch": 4.61, "learning_rate": 1.0787325972155547e-05, "loss": 0.2134, "step": 19190 }, { "epoch": 4.61, "learning_rate": 1.0782525204032646e-05, "loss": 0.1621, "step": 19200 }, { "epoch": 4.61, "learning_rate": 1.0777724435909747e-05, "loss": 0.2597, "step": 19210 }, { "epoch": 4.61, "learning_rate": 1.0772923667786848e-05, "loss": 0.1157, "step": 19220 }, { "epoch": 4.62, "learning_rate": 1.0768122899663947e-05, "loss": 0.137, "step": 19230 }, { "epoch": 4.62, "learning_rate": 1.0763322131541048e-05, "loss": 0.0738, "step": 19240 }, { "epoch": 4.62, "learning_rate": 1.0758521363418149e-05, "loss": 0.2909, "step": 19250 }, { "epoch": 4.62, "learning_rate": 1.0753720595295248e-05, "loss": 0.0436, "step": 19260 }, { "epoch": 4.63, "learning_rate": 1.0748919827172349e-05, "loss": 0.1493, "step": 19270 }, { "epoch": 4.63, "learning_rate": 1.074411905904945e-05, "loss": 0.279, "step": 19280 }, { "epoch": 4.63, "learning_rate": 1.0739318290926549e-05, "loss": 0.1393, "step": 19290 }, { "epoch": 4.63, "learning_rate": 1.073451752280365e-05, "loss": 0.2138, "step": 19300 }, { "epoch": 4.64, "learning_rate": 1.072971675468075e-05, "loss": 0.0686, "step": 19310 }, { "epoch": 4.64, "learning_rate": 1.0724915986557851e-05, "loss": 0.216, "step": 19320 }, { "epoch": 4.64, "learning_rate": 1.072011521843495e-05, "loss": 0.11, "step": 19330 }, { "epoch": 4.64, "learning_rate": 1.0715314450312051e-05, "loss": 0.2092, "step": 19340 }, { "epoch": 4.64, "learning_rate": 1.0710513682189152e-05, "loss": 0.116, "step": 19350 }, { "epoch": 4.65, "learning_rate": 1.0705712914066251e-05, "loss": 0.2469, "step": 19360 }, { "epoch": 4.65, "learning_rate": 1.0700912145943352e-05, "loss": 0.0953, "step": 19370 }, { "epoch": 4.65, "learning_rate": 1.0696111377820453e-05, "loss": 0.0989, "step": 19380 }, { "epoch": 4.65, "learning_rate": 1.0691310609697552e-05, "loss": 0.122, "step": 19390 }, { "epoch": 4.66, "learning_rate": 1.0686509841574653e-05, "loss": 0.2108, "step": 19400 }, { "epoch": 4.66, "learning_rate": 1.0681709073451754e-05, "loss": 0.161, "step": 19410 }, { "epoch": 4.66, "learning_rate": 1.0676908305328853e-05, "loss": 0.1641, "step": 19420 }, { "epoch": 4.66, "learning_rate": 1.0672107537205954e-05, "loss": 0.263, "step": 19430 }, { "epoch": 4.67, "learning_rate": 1.0667306769083055e-05, "loss": 0.0792, "step": 19440 }, { "epoch": 4.67, "learning_rate": 1.0662506000960156e-05, "loss": 0.0966, "step": 19450 }, { "epoch": 4.67, "learning_rate": 1.0657705232837255e-05, "loss": 0.1117, "step": 19460 }, { "epoch": 4.67, "learning_rate": 1.0652904464714356e-05, "loss": 0.1938, "step": 19470 }, { "epoch": 4.68, "learning_rate": 1.0648103696591456e-05, "loss": 0.2601, "step": 19480 }, { "epoch": 4.68, "learning_rate": 1.0643302928468556e-05, "loss": 0.108, "step": 19490 }, { "epoch": 4.68, "learning_rate": 1.0638502160345656e-05, "loss": 0.2481, "step": 19500 }, { "epoch": 4.68, "learning_rate": 1.0633701392222757e-05, "loss": 0.2096, "step": 19510 }, { "epoch": 4.69, "learning_rate": 1.0628900624099856e-05, "loss": 0.2282, "step": 19520 }, { "epoch": 4.69, "learning_rate": 1.0624099855976957e-05, "loss": 0.1648, "step": 19530 }, { "epoch": 4.69, "learning_rate": 1.0619299087854058e-05, "loss": 0.1631, "step": 19540 }, { "epoch": 4.69, "learning_rate": 1.0614498319731157e-05, "loss": 0.21, "step": 19550 }, { "epoch": 4.7, "learning_rate": 1.0609697551608258e-05, "loss": 0.1879, "step": 19560 }, { "epoch": 4.7, "learning_rate": 1.0604896783485359e-05, "loss": 0.1041, "step": 19570 }, { "epoch": 4.7, "learning_rate": 1.060009601536246e-05, "loss": 0.2326, "step": 19580 }, { "epoch": 4.7, "learning_rate": 1.0595295247239559e-05, "loss": 0.1513, "step": 19590 }, { "epoch": 4.7, "learning_rate": 1.059049447911666e-05, "loss": 0.1195, "step": 19600 }, { "epoch": 4.71, "learning_rate": 1.058569371099376e-05, "loss": 0.096, "step": 19610 }, { "epoch": 4.71, "learning_rate": 1.058089294287086e-05, "loss": 0.2725, "step": 19620 }, { "epoch": 4.71, "learning_rate": 1.057609217474796e-05, "loss": 0.0975, "step": 19630 }, { "epoch": 4.71, "learning_rate": 1.0571291406625061e-05, "loss": 0.113, "step": 19640 }, { "epoch": 4.72, "learning_rate": 1.056649063850216e-05, "loss": 0.062, "step": 19650 }, { "epoch": 4.72, "learning_rate": 1.0561689870379261e-05, "loss": 0.2259, "step": 19660 }, { "epoch": 4.72, "learning_rate": 1.0556889102256362e-05, "loss": 0.0499, "step": 19670 }, { "epoch": 4.72, "learning_rate": 1.0552088334133461e-05, "loss": 0.076, "step": 19680 }, { "epoch": 4.73, "learning_rate": 1.0547287566010562e-05, "loss": 0.2014, "step": 19690 }, { "epoch": 4.73, "learning_rate": 1.0542486797887663e-05, "loss": 0.2395, "step": 19700 }, { "epoch": 4.73, "learning_rate": 1.0537686029764764e-05, "loss": 0.0829, "step": 19710 }, { "epoch": 4.73, "learning_rate": 1.0532885261641863e-05, "loss": 0.1266, "step": 19720 }, { "epoch": 4.74, "learning_rate": 1.0528084493518964e-05, "loss": 0.1522, "step": 19730 }, { "epoch": 4.74, "learning_rate": 1.0523283725396065e-05, "loss": 0.0219, "step": 19740 }, { "epoch": 4.74, "learning_rate": 1.0518482957273164e-05, "loss": 0.1634, "step": 19750 }, { "epoch": 4.74, "learning_rate": 1.0513682189150265e-05, "loss": 0.0393, "step": 19760 }, { "epoch": 4.75, "learning_rate": 1.0508881421027366e-05, "loss": 0.175, "step": 19770 }, { "epoch": 4.75, "learning_rate": 1.0504080652904465e-05, "loss": 0.0398, "step": 19780 }, { "epoch": 4.75, "learning_rate": 1.0499279884781566e-05, "loss": 0.2301, "step": 19790 }, { "epoch": 4.75, "learning_rate": 1.0494479116658666e-05, "loss": 0.2285, "step": 19800 }, { "epoch": 4.76, "learning_rate": 1.0489678348535766e-05, "loss": 0.1778, "step": 19810 }, { "epoch": 4.76, "learning_rate": 1.0484877580412866e-05, "loss": 0.0901, "step": 19820 }, { "epoch": 4.76, "learning_rate": 1.0480076812289967e-05, "loss": 0.2282, "step": 19830 }, { "epoch": 4.76, "learning_rate": 1.0475276044167068e-05, "loss": 0.2151, "step": 19840 }, { "epoch": 4.76, "learning_rate": 1.0470475276044167e-05, "loss": 0.1444, "step": 19850 }, { "epoch": 4.77, "learning_rate": 1.0465674507921268e-05, "loss": 0.1353, "step": 19860 }, { "epoch": 4.77, "learning_rate": 1.0460873739798369e-05, "loss": 0.0451, "step": 19870 }, { "epoch": 4.77, "learning_rate": 1.0456072971675468e-05, "loss": 0.0515, "step": 19880 }, { "epoch": 4.77, "learning_rate": 1.0451272203552569e-05, "loss": 0.1176, "step": 19890 }, { "epoch": 4.78, "learning_rate": 1.044647143542967e-05, "loss": 0.0929, "step": 19900 }, { "epoch": 4.78, "learning_rate": 1.0441670667306769e-05, "loss": 0.3223, "step": 19910 }, { "epoch": 4.78, "learning_rate": 1.043686989918387e-05, "loss": 0.1727, "step": 19920 }, { "epoch": 4.78, "learning_rate": 1.043206913106097e-05, "loss": 0.0451, "step": 19930 }, { "epoch": 4.79, "learning_rate": 1.042726836293807e-05, "loss": 0.2133, "step": 19940 }, { "epoch": 4.79, "learning_rate": 1.042246759481517e-05, "loss": 0.1312, "step": 19950 }, { "epoch": 4.79, "learning_rate": 1.0417666826692272e-05, "loss": 0.1612, "step": 19960 }, { "epoch": 4.79, "learning_rate": 1.0412866058569372e-05, "loss": 0.1478, "step": 19970 }, { "epoch": 4.8, "learning_rate": 1.0408065290446472e-05, "loss": 0.0216, "step": 19980 }, { "epoch": 4.8, "learning_rate": 1.0403264522323572e-05, "loss": 0.0332, "step": 19990 }, { "epoch": 4.8, "learning_rate": 1.0398463754200673e-05, "loss": 0.0915, "step": 20000 }, { "epoch": 4.8, "learning_rate": 1.0393662986077772e-05, "loss": 0.1817, "step": 20010 }, { "epoch": 4.81, "learning_rate": 1.0388862217954873e-05, "loss": 0.124, "step": 20020 }, { "epoch": 4.81, "learning_rate": 1.0384061449831974e-05, "loss": 0.1593, "step": 20030 }, { "epoch": 4.81, "learning_rate": 1.0379260681709073e-05, "loss": 0.2087, "step": 20040 }, { "epoch": 4.81, "learning_rate": 1.0374459913586174e-05, "loss": 0.1367, "step": 20050 }, { "epoch": 4.82, "learning_rate": 1.0369659145463275e-05, "loss": 0.0998, "step": 20060 }, { "epoch": 4.82, "learning_rate": 1.0364858377340374e-05, "loss": 0.0786, "step": 20070 }, { "epoch": 4.82, "learning_rate": 1.0360057609217475e-05, "loss": 0.3306, "step": 20080 }, { "epoch": 4.82, "learning_rate": 1.0355256841094576e-05, "loss": 0.0529, "step": 20090 }, { "epoch": 4.82, "learning_rate": 1.0350456072971675e-05, "loss": 0.2779, "step": 20100 }, { "epoch": 4.83, "learning_rate": 1.0345655304848776e-05, "loss": 0.2016, "step": 20110 }, { "epoch": 4.83, "learning_rate": 1.0340854536725877e-05, "loss": 0.1131, "step": 20120 }, { "epoch": 4.83, "learning_rate": 1.0336053768602979e-05, "loss": 0.0348, "step": 20130 }, { "epoch": 4.83, "learning_rate": 1.0331253000480077e-05, "loss": 0.1864, "step": 20140 }, { "epoch": 4.84, "learning_rate": 1.0326452232357177e-05, "loss": 0.0716, "step": 20150 }, { "epoch": 4.84, "learning_rate": 1.032165146423428e-05, "loss": 0.1235, "step": 20160 }, { "epoch": 4.84, "learning_rate": 1.0316850696111377e-05, "loss": 0.225, "step": 20170 }, { "epoch": 4.84, "learning_rate": 1.0312049927988478e-05, "loss": 0.0424, "step": 20180 }, { "epoch": 4.85, "learning_rate": 1.030724915986558e-05, "loss": 0.1848, "step": 20190 }, { "epoch": 4.85, "learning_rate": 1.0302448391742678e-05, "loss": 0.2428, "step": 20200 }, { "epoch": 4.85, "learning_rate": 1.0297647623619779e-05, "loss": 0.2352, "step": 20210 }, { "epoch": 4.85, "learning_rate": 1.0292846855496882e-05, "loss": 0.1211, "step": 20220 }, { "epoch": 4.86, "learning_rate": 1.0288046087373979e-05, "loss": 0.1854, "step": 20230 }, { "epoch": 4.86, "learning_rate": 1.028324531925108e-05, "loss": 0.2107, "step": 20240 }, { "epoch": 4.86, "learning_rate": 1.0278444551128182e-05, "loss": 0.0815, "step": 20250 }, { "epoch": 4.86, "learning_rate": 1.0273643783005283e-05, "loss": 0.1032, "step": 20260 }, { "epoch": 4.87, "learning_rate": 1.026884301488238e-05, "loss": 0.0906, "step": 20270 }, { "epoch": 4.87, "learning_rate": 1.0264042246759483e-05, "loss": 0.1465, "step": 20280 }, { "epoch": 4.87, "learning_rate": 1.0259241478636584e-05, "loss": 0.139, "step": 20290 }, { "epoch": 4.87, "learning_rate": 1.0254440710513682e-05, "loss": 0.1765, "step": 20300 }, { "epoch": 4.88, "learning_rate": 1.0249639942390784e-05, "loss": 0.2019, "step": 20310 }, { "epoch": 4.88, "learning_rate": 1.0244839174267885e-05, "loss": 0.1373, "step": 20320 }, { "epoch": 4.88, "learning_rate": 1.0240038406144982e-05, "loss": 0.1912, "step": 20330 }, { "epoch": 4.88, "learning_rate": 1.0235237638022085e-05, "loss": 0.2043, "step": 20340 }, { "epoch": 4.88, "learning_rate": 1.0230436869899186e-05, "loss": 0.101, "step": 20350 }, { "epoch": 4.89, "learning_rate": 1.0225636101776283e-05, "loss": 0.1034, "step": 20360 }, { "epoch": 4.89, "learning_rate": 1.0220835333653386e-05, "loss": 0.0576, "step": 20370 }, { "epoch": 4.89, "learning_rate": 1.0216034565530487e-05, "loss": 0.08, "step": 20380 }, { "epoch": 4.89, "learning_rate": 1.0211233797407587e-05, "loss": 0.1144, "step": 20390 }, { "epoch": 4.9, "learning_rate": 1.0206433029284687e-05, "loss": 0.1167, "step": 20400 }, { "epoch": 4.9, "learning_rate": 1.0201632261161787e-05, "loss": 0.1824, "step": 20410 }, { "epoch": 4.9, "learning_rate": 1.0196831493038888e-05, "loss": 0.0094, "step": 20420 }, { "epoch": 4.9, "learning_rate": 1.0192030724915987e-05, "loss": 0.2312, "step": 20430 }, { "epoch": 4.91, "learning_rate": 1.0187229956793088e-05, "loss": 0.1083, "step": 20440 }, { "epoch": 4.91, "learning_rate": 1.0182429188670189e-05, "loss": 0.0617, "step": 20450 }, { "epoch": 4.91, "learning_rate": 1.0177628420547288e-05, "loss": 0.2505, "step": 20460 }, { "epoch": 4.91, "learning_rate": 1.0172827652424389e-05, "loss": 0.2413, "step": 20470 }, { "epoch": 4.92, "learning_rate": 1.016802688430149e-05, "loss": 0.0901, "step": 20480 }, { "epoch": 4.92, "learning_rate": 1.0163226116178589e-05, "loss": 0.079, "step": 20490 }, { "epoch": 4.92, "learning_rate": 1.015842534805569e-05, "loss": 0.1842, "step": 20500 }, { "epoch": 4.92, "learning_rate": 1.015362457993279e-05, "loss": 0.1913, "step": 20510 }, { "epoch": 4.93, "learning_rate": 1.0148823811809892e-05, "loss": 0.1038, "step": 20520 }, { "epoch": 4.93, "learning_rate": 1.014402304368699e-05, "loss": 0.24, "step": 20530 }, { "epoch": 4.93, "learning_rate": 1.0139222275564092e-05, "loss": 0.0364, "step": 20540 }, { "epoch": 4.93, "learning_rate": 1.0134421507441192e-05, "loss": 0.0442, "step": 20550 }, { "epoch": 4.94, "learning_rate": 1.0129620739318292e-05, "loss": 0.0907, "step": 20560 }, { "epoch": 4.94, "learning_rate": 1.0124819971195392e-05, "loss": 0.1739, "step": 20570 }, { "epoch": 4.94, "learning_rate": 1.0120019203072493e-05, "loss": 0.0714, "step": 20580 }, { "epoch": 4.94, "learning_rate": 1.0115218434949592e-05, "loss": 0.1442, "step": 20590 }, { "epoch": 4.94, "learning_rate": 1.0110417666826693e-05, "loss": 0.1367, "step": 20600 }, { "epoch": 4.95, "learning_rate": 1.0105616898703794e-05, "loss": 0.0988, "step": 20610 }, { "epoch": 4.95, "learning_rate": 1.0100816130580893e-05, "loss": 0.1826, "step": 20620 }, { "epoch": 4.95, "learning_rate": 1.0096015362457994e-05, "loss": 0.2755, "step": 20630 }, { "epoch": 4.95, "learning_rate": 1.0091214594335095e-05, "loss": 0.1459, "step": 20640 }, { "epoch": 4.96, "learning_rate": 1.0086413826212196e-05, "loss": 0.042, "step": 20650 }, { "epoch": 4.96, "learning_rate": 1.0081613058089295e-05, "loss": 0.1153, "step": 20660 }, { "epoch": 4.96, "learning_rate": 1.0076812289966396e-05, "loss": 0.0608, "step": 20670 }, { "epoch": 4.96, "learning_rate": 1.0072011521843497e-05, "loss": 0.0992, "step": 20680 }, { "epoch": 4.97, "learning_rate": 1.0067210753720596e-05, "loss": 0.1285, "step": 20690 }, { "epoch": 4.97, "learning_rate": 1.0062409985597697e-05, "loss": 0.1267, "step": 20700 }, { "epoch": 4.97, "learning_rate": 1.0057609217474797e-05, "loss": 0.1148, "step": 20710 }, { "epoch": 4.97, "learning_rate": 1.0052808449351897e-05, "loss": 0.214, "step": 20720 }, { "epoch": 4.98, "learning_rate": 1.0048007681228997e-05, "loss": 0.1003, "step": 20730 }, { "epoch": 4.98, "learning_rate": 1.0043206913106098e-05, "loss": 0.2603, "step": 20740 }, { "epoch": 4.98, "learning_rate": 1.0038406144983197e-05, "loss": 0.0703, "step": 20750 }, { "epoch": 4.98, "learning_rate": 1.0033605376860298e-05, "loss": 0.19, "step": 20760 }, { "epoch": 4.99, "learning_rate": 1.0028804608737399e-05, "loss": 0.1491, "step": 20770 }, { "epoch": 4.99, "learning_rate": 1.00240038406145e-05, "loss": 0.1619, "step": 20780 }, { "epoch": 4.99, "learning_rate": 1.0019203072491599e-05, "loss": 0.1406, "step": 20790 }, { "epoch": 4.99, "learning_rate": 1.00144023043687e-05, "loss": 0.1669, "step": 20800 }, { "epoch": 5.0, "learning_rate": 1.00096015362458e-05, "loss": 0.2989, "step": 20810 }, { "epoch": 5.0, "learning_rate": 1.00048007681229e-05, "loss": 0.167, "step": 20820 }, { "epoch": 5.0, "learning_rate": 1e-05, "loss": 0.1277, "step": 20830 }, { "epoch": 5.0, "eval_accuracy": 0.9984699081944917, "eval_loss": 0.007643561344593763, "eval_runtime": 516.7779, "eval_samples_per_second": 11.382, "eval_steps_per_second": 1.424, "step": 20830 }, { "epoch": 5.0, "learning_rate": 9.9951992318771e-06, "loss": 0.2282, "step": 20840 }, { "epoch": 5.0, "learning_rate": 9.990398463754203e-06, "loss": 0.1466, "step": 20850 }, { "epoch": 5.01, "learning_rate": 9.985597695631302e-06, "loss": 0.2386, "step": 20860 }, { "epoch": 5.01, "learning_rate": 9.980796927508402e-06, "loss": 0.1551, "step": 20870 }, { "epoch": 5.01, "learning_rate": 9.975996159385503e-06, "loss": 0.2354, "step": 20880 }, { "epoch": 5.01, "learning_rate": 9.971195391262602e-06, "loss": 0.059, "step": 20890 }, { "epoch": 5.02, "learning_rate": 9.966394623139703e-06, "loss": 0.1755, "step": 20900 }, { "epoch": 5.02, "learning_rate": 9.961593855016804e-06, "loss": 0.258, "step": 20910 }, { "epoch": 5.02, "learning_rate": 9.956793086893903e-06, "loss": 0.1466, "step": 20920 }, { "epoch": 5.02, "learning_rate": 9.951992318771004e-06, "loss": 0.1936, "step": 20930 }, { "epoch": 5.03, "learning_rate": 9.947191550648105e-06, "loss": 0.0295, "step": 20940 }, { "epoch": 5.03, "learning_rate": 9.942390782525206e-06, "loss": 0.0796, "step": 20950 }, { "epoch": 5.03, "learning_rate": 9.937590014402305e-06, "loss": 0.1534, "step": 20960 }, { "epoch": 5.03, "learning_rate": 9.932789246279406e-06, "loss": 0.4549, "step": 20970 }, { "epoch": 5.04, "learning_rate": 9.927988478156507e-06, "loss": 0.081, "step": 20980 }, { "epoch": 5.04, "learning_rate": 9.923187710033606e-06, "loss": 0.3626, "step": 20990 }, { "epoch": 5.04, "learning_rate": 9.918386941910707e-06, "loss": 0.2685, "step": 21000 }, { "epoch": 5.04, "learning_rate": 9.913586173787808e-06, "loss": 0.1561, "step": 21010 }, { "epoch": 5.05, "learning_rate": 9.908785405664907e-06, "loss": 0.2837, "step": 21020 }, { "epoch": 5.05, "learning_rate": 9.903984637542008e-06, "loss": 0.1386, "step": 21030 }, { "epoch": 5.05, "learning_rate": 9.899183869419108e-06, "loss": 0.1665, "step": 21040 }, { "epoch": 5.05, "learning_rate": 9.894383101296208e-06, "loss": 0.1262, "step": 21050 }, { "epoch": 5.06, "learning_rate": 9.889582333173308e-06, "loss": 0.0925, "step": 21060 }, { "epoch": 5.06, "learning_rate": 9.88478156505041e-06, "loss": 0.0526, "step": 21070 }, { "epoch": 5.06, "learning_rate": 9.87998079692751e-06, "loss": 0.1895, "step": 21080 }, { "epoch": 5.06, "learning_rate": 9.87518002880461e-06, "loss": 0.1075, "step": 21090 }, { "epoch": 5.06, "learning_rate": 9.87037926068171e-06, "loss": 0.2021, "step": 21100 }, { "epoch": 5.07, "learning_rate": 9.865578492558811e-06, "loss": 0.2489, "step": 21110 }, { "epoch": 5.07, "learning_rate": 9.86077772443591e-06, "loss": 0.1887, "step": 21120 }, { "epoch": 5.07, "learning_rate": 9.855976956313011e-06, "loss": 0.1019, "step": 21130 }, { "epoch": 5.07, "learning_rate": 9.851176188190112e-06, "loss": 0.1957, "step": 21140 }, { "epoch": 5.08, "learning_rate": 9.846375420067211e-06, "loss": 0.1449, "step": 21150 }, { "epoch": 5.08, "learning_rate": 9.841574651944312e-06, "loss": 0.1341, "step": 21160 }, { "epoch": 5.08, "learning_rate": 9.836773883821413e-06, "loss": 0.1704, "step": 21170 }, { "epoch": 5.08, "learning_rate": 9.831973115698512e-06, "loss": 0.1499, "step": 21180 }, { "epoch": 5.09, "learning_rate": 9.827172347575613e-06, "loss": 0.1429, "step": 21190 }, { "epoch": 5.09, "learning_rate": 9.822371579452713e-06, "loss": 0.032, "step": 21200 }, { "epoch": 5.09, "learning_rate": 9.817570811329814e-06, "loss": 0.1481, "step": 21210 }, { "epoch": 5.09, "learning_rate": 9.812770043206913e-06, "loss": 0.0606, "step": 21220 }, { "epoch": 5.1, "learning_rate": 9.807969275084014e-06, "loss": 0.045, "step": 21230 }, { "epoch": 5.1, "learning_rate": 9.803168506961115e-06, "loss": 0.2612, "step": 21240 }, { "epoch": 5.1, "learning_rate": 9.798367738838214e-06, "loss": 0.2214, "step": 21250 }, { "epoch": 5.1, "learning_rate": 9.793566970715315e-06, "loss": 0.1381, "step": 21260 }, { "epoch": 5.11, "learning_rate": 9.788766202592416e-06, "loss": 0.1585, "step": 21270 }, { "epoch": 5.11, "learning_rate": 9.783965434469515e-06, "loss": 0.0588, "step": 21280 }, { "epoch": 5.11, "learning_rate": 9.779164666346616e-06, "loss": 0.231, "step": 21290 }, { "epoch": 5.11, "learning_rate": 9.774363898223717e-06, "loss": 0.196, "step": 21300 }, { "epoch": 5.12, "learning_rate": 9.769563130100816e-06, "loss": 0.1165, "step": 21310 }, { "epoch": 5.12, "learning_rate": 9.764762361977917e-06, "loss": 0.0786, "step": 21320 }, { "epoch": 5.12, "learning_rate": 9.759961593855018e-06, "loss": 0.1502, "step": 21330 }, { "epoch": 5.12, "learning_rate": 9.755160825732118e-06, "loss": 0.305, "step": 21340 }, { "epoch": 5.12, "learning_rate": 9.750360057609218e-06, "loss": 0.0459, "step": 21350 }, { "epoch": 5.13, "learning_rate": 9.745559289486318e-06, "loss": 0.124, "step": 21360 }, { "epoch": 5.13, "learning_rate": 9.74075852136342e-06, "loss": 0.086, "step": 21370 }, { "epoch": 5.13, "learning_rate": 9.735957753240518e-06, "loss": 0.0953, "step": 21380 }, { "epoch": 5.13, "learning_rate": 9.73115698511762e-06, "loss": 0.1654, "step": 21390 }, { "epoch": 5.14, "learning_rate": 9.72635621699472e-06, "loss": 0.1364, "step": 21400 }, { "epoch": 5.14, "learning_rate": 9.72155544887182e-06, "loss": 0.1196, "step": 21410 }, { "epoch": 5.14, "learning_rate": 9.716754680748922e-06, "loss": 0.1727, "step": 21420 }, { "epoch": 5.14, "learning_rate": 9.711953912626021e-06, "loss": 0.2441, "step": 21430 }, { "epoch": 5.15, "learning_rate": 9.70715314450312e-06, "loss": 0.0361, "step": 21440 }, { "epoch": 5.15, "learning_rate": 9.702352376380223e-06, "loss": 0.1526, "step": 21450 }, { "epoch": 5.15, "learning_rate": 9.697551608257322e-06, "loss": 0.1651, "step": 21460 }, { "epoch": 5.15, "learning_rate": 9.692750840134423e-06, "loss": 0.0999, "step": 21470 }, { "epoch": 5.16, "learning_rate": 9.687950072011523e-06, "loss": 0.0559, "step": 21480 }, { "epoch": 5.16, "learning_rate": 9.683149303888623e-06, "loss": 0.124, "step": 21490 }, { "epoch": 5.16, "learning_rate": 9.678348535765723e-06, "loss": 0.4906, "step": 21500 }, { "epoch": 5.16, "learning_rate": 9.673547767642824e-06, "loss": 0.108, "step": 21510 }, { "epoch": 5.17, "learning_rate": 9.668746999519923e-06, "loss": 0.3218, "step": 21520 }, { "epoch": 5.17, "learning_rate": 9.663946231397024e-06, "loss": 0.137, "step": 21530 }, { "epoch": 5.17, "learning_rate": 9.659145463274125e-06, "loss": 0.104, "step": 21540 }, { "epoch": 5.17, "learning_rate": 9.654344695151226e-06, "loss": 0.1426, "step": 21550 }, { "epoch": 5.18, "learning_rate": 9.649543927028325e-06, "loss": 0.1215, "step": 21560 }, { "epoch": 5.18, "learning_rate": 9.644743158905426e-06, "loss": 0.1023, "step": 21570 }, { "epoch": 5.18, "learning_rate": 9.639942390782527e-06, "loss": 0.1644, "step": 21580 }, { "epoch": 5.18, "learning_rate": 9.635141622659626e-06, "loss": 0.1716, "step": 21590 }, { "epoch": 5.18, "learning_rate": 9.630340854536727e-06, "loss": 0.1004, "step": 21600 }, { "epoch": 5.19, "learning_rate": 9.625540086413828e-06, "loss": 0.1565, "step": 21610 }, { "epoch": 5.19, "learning_rate": 9.620739318290927e-06, "loss": 0.0625, "step": 21620 }, { "epoch": 5.19, "learning_rate": 9.615938550168028e-06, "loss": 0.0435, "step": 21630 }, { "epoch": 5.19, "learning_rate": 9.611137782045128e-06, "loss": 0.1232, "step": 21640 }, { "epoch": 5.2, "learning_rate": 9.606337013922228e-06, "loss": 0.1154, "step": 21650 }, { "epoch": 5.2, "learning_rate": 9.601536245799328e-06, "loss": 0.1544, "step": 21660 }, { "epoch": 5.2, "learning_rate": 9.59673547767643e-06, "loss": 0.2692, "step": 21670 }, { "epoch": 5.2, "learning_rate": 9.59193470955353e-06, "loss": 0.2211, "step": 21680 }, { "epoch": 5.21, "learning_rate": 9.58713394143063e-06, "loss": 0.0279, "step": 21690 }, { "epoch": 5.21, "learning_rate": 9.58233317330773e-06, "loss": 0.2267, "step": 21700 }, { "epoch": 5.21, "learning_rate": 9.577532405184831e-06, "loss": 0.1144, "step": 21710 }, { "epoch": 5.21, "learning_rate": 9.57273163706193e-06, "loss": 0.1727, "step": 21720 }, { "epoch": 5.22, "learning_rate": 9.567930868939031e-06, "loss": 0.2691, "step": 21730 }, { "epoch": 5.22, "learning_rate": 9.563130100816132e-06, "loss": 0.0892, "step": 21740 }, { "epoch": 5.22, "learning_rate": 9.558329332693231e-06, "loss": 0.116, "step": 21750 }, { "epoch": 5.22, "learning_rate": 9.553528564570332e-06, "loss": 0.2147, "step": 21760 }, { "epoch": 5.23, "learning_rate": 9.548727796447433e-06, "loss": 0.0164, "step": 21770 }, { "epoch": 5.23, "learning_rate": 9.543927028324532e-06, "loss": 0.1004, "step": 21780 }, { "epoch": 5.23, "learning_rate": 9.539126260201633e-06, "loss": 0.0166, "step": 21790 }, { "epoch": 5.23, "learning_rate": 9.534325492078733e-06, "loss": 0.1319, "step": 21800 }, { "epoch": 5.24, "learning_rate": 9.529524723955834e-06, "loss": 0.1209, "step": 21810 }, { "epoch": 5.24, "learning_rate": 9.524723955832933e-06, "loss": 0.0864, "step": 21820 }, { "epoch": 5.24, "learning_rate": 9.519923187710034e-06, "loss": 0.1978, "step": 21830 }, { "epoch": 5.24, "learning_rate": 9.515122419587135e-06, "loss": 0.0985, "step": 21840 }, { "epoch": 5.24, "learning_rate": 9.510321651464234e-06, "loss": 0.0542, "step": 21850 }, { "epoch": 5.25, "learning_rate": 9.505520883341335e-06, "loss": 0.1809, "step": 21860 }, { "epoch": 5.25, "learning_rate": 9.500720115218436e-06, "loss": 0.1019, "step": 21870 }, { "epoch": 5.25, "learning_rate": 9.495919347095535e-06, "loss": 0.1618, "step": 21880 }, { "epoch": 5.25, "learning_rate": 9.491118578972636e-06, "loss": 0.0627, "step": 21890 }, { "epoch": 5.26, "learning_rate": 9.486317810849737e-06, "loss": 0.1535, "step": 21900 }, { "epoch": 5.26, "learning_rate": 9.481517042726836e-06, "loss": 0.2104, "step": 21910 }, { "epoch": 5.26, "learning_rate": 9.476716274603937e-06, "loss": 0.0626, "step": 21920 }, { "epoch": 5.26, "learning_rate": 9.471915506481038e-06, "loss": 0.0491, "step": 21930 }, { "epoch": 5.27, "learning_rate": 9.467114738358139e-06, "loss": 0.2386, "step": 21940 }, { "epoch": 5.27, "learning_rate": 9.462313970235238e-06, "loss": 0.1588, "step": 21950 }, { "epoch": 5.27, "learning_rate": 9.457513202112338e-06, "loss": 0.3844, "step": 21960 }, { "epoch": 5.27, "learning_rate": 9.45271243398944e-06, "loss": 0.1755, "step": 21970 }, { "epoch": 5.28, "learning_rate": 9.447911665866538e-06, "loss": 0.3013, "step": 21980 }, { "epoch": 5.28, "learning_rate": 9.44311089774364e-06, "loss": 0.2543, "step": 21990 }, { "epoch": 5.28, "learning_rate": 9.43831012962074e-06, "loss": 0.0207, "step": 22000 }, { "epoch": 5.28, "learning_rate": 9.43350936149784e-06, "loss": 0.1017, "step": 22010 }, { "epoch": 5.29, "learning_rate": 9.428708593374942e-06, "loss": 0.1307, "step": 22020 }, { "epoch": 5.29, "learning_rate": 9.423907825252041e-06, "loss": 0.2027, "step": 22030 }, { "epoch": 5.29, "learning_rate": 9.41910705712914e-06, "loss": 0.112, "step": 22040 }, { "epoch": 5.29, "learning_rate": 9.414306289006243e-06, "loss": 0.0592, "step": 22050 }, { "epoch": 5.3, "learning_rate": 9.409505520883342e-06, "loss": 0.0323, "step": 22060 }, { "epoch": 5.3, "learning_rate": 9.404704752760443e-06, "loss": 0.0991, "step": 22070 }, { "epoch": 5.3, "learning_rate": 9.399903984637544e-06, "loss": 0.0943, "step": 22080 }, { "epoch": 5.3, "learning_rate": 9.395103216514643e-06, "loss": 0.1787, "step": 22090 }, { "epoch": 5.3, "learning_rate": 9.390302448391744e-06, "loss": 0.2703, "step": 22100 }, { "epoch": 5.31, "learning_rate": 9.385501680268844e-06, "loss": 0.1061, "step": 22110 }, { "epoch": 5.31, "learning_rate": 9.380700912145944e-06, "loss": 0.1904, "step": 22120 }, { "epoch": 5.31, "learning_rate": 9.375900144023044e-06, "loss": 0.0296, "step": 22130 }, { "epoch": 5.31, "learning_rate": 9.371099375900145e-06, "loss": 0.0205, "step": 22140 }, { "epoch": 5.32, "learning_rate": 9.366298607777246e-06, "loss": 0.091, "step": 22150 }, { "epoch": 5.32, "learning_rate": 9.361497839654345e-06, "loss": 0.2114, "step": 22160 }, { "epoch": 5.32, "learning_rate": 9.356697071531446e-06, "loss": 0.1102, "step": 22170 }, { "epoch": 5.32, "learning_rate": 9.351896303408547e-06, "loss": 0.1026, "step": 22180 }, { "epoch": 5.33, "learning_rate": 9.347095535285646e-06, "loss": 0.2057, "step": 22190 }, { "epoch": 5.33, "learning_rate": 9.342294767162747e-06, "loss": 0.0954, "step": 22200 }, { "epoch": 5.33, "learning_rate": 9.337493999039848e-06, "loss": 0.0996, "step": 22210 }, { "epoch": 5.33, "learning_rate": 9.332693230916947e-06, "loss": 0.2091, "step": 22220 }, { "epoch": 5.34, "learning_rate": 9.327892462794048e-06, "loss": 0.1729, "step": 22230 }, { "epoch": 5.34, "learning_rate": 9.323091694671149e-06, "loss": 0.1483, "step": 22240 }, { "epoch": 5.34, "learning_rate": 9.318290926548248e-06, "loss": 0.088, "step": 22250 }, { "epoch": 5.34, "learning_rate": 9.313490158425349e-06, "loss": 0.1449, "step": 22260 }, { "epoch": 5.35, "learning_rate": 9.30868939030245e-06, "loss": 0.2895, "step": 22270 }, { "epoch": 5.35, "learning_rate": 9.30388862217955e-06, "loss": 0.0841, "step": 22280 }, { "epoch": 5.35, "learning_rate": 9.29908785405665e-06, "loss": 0.1966, "step": 22290 }, { "epoch": 5.35, "learning_rate": 9.29428708593375e-06, "loss": 0.0854, "step": 22300 }, { "epoch": 5.36, "learning_rate": 9.289486317810851e-06, "loss": 0.1064, "step": 22310 }, { "epoch": 5.36, "learning_rate": 9.28468554968795e-06, "loss": 0.248, "step": 22320 }, { "epoch": 5.36, "learning_rate": 9.279884781565051e-06, "loss": 0.241, "step": 22330 }, { "epoch": 5.36, "learning_rate": 9.275084013442152e-06, "loss": 0.0676, "step": 22340 }, { "epoch": 5.36, "learning_rate": 9.270283245319251e-06, "loss": 0.2349, "step": 22350 }, { "epoch": 5.37, "learning_rate": 9.265482477196352e-06, "loss": 0.1589, "step": 22360 }, { "epoch": 5.37, "learning_rate": 9.260681709073453e-06, "loss": 0.0945, "step": 22370 }, { "epoch": 5.37, "learning_rate": 9.255880940950552e-06, "loss": 0.1398, "step": 22380 }, { "epoch": 5.37, "learning_rate": 9.251080172827653e-06, "loss": 0.1089, "step": 22390 }, { "epoch": 5.38, "learning_rate": 9.246279404704754e-06, "loss": 0.0284, "step": 22400 }, { "epoch": 5.38, "learning_rate": 9.241478636581854e-06, "loss": 0.2299, "step": 22410 }, { "epoch": 5.38, "learning_rate": 9.236677868458954e-06, "loss": 0.0708, "step": 22420 }, { "epoch": 5.38, "learning_rate": 9.231877100336054e-06, "loss": 0.1511, "step": 22430 }, { "epoch": 5.39, "learning_rate": 9.227076332213155e-06, "loss": 0.1563, "step": 22440 }, { "epoch": 5.39, "learning_rate": 9.222275564090254e-06, "loss": 0.1064, "step": 22450 }, { "epoch": 5.39, "learning_rate": 9.217474795967355e-06, "loss": 0.0699, "step": 22460 }, { "epoch": 5.39, "learning_rate": 9.212674027844456e-06, "loss": 0.1026, "step": 22470 }, { "epoch": 5.4, "learning_rate": 9.207873259721555e-06, "loss": 0.1185, "step": 22480 }, { "epoch": 5.4, "learning_rate": 9.203072491598656e-06, "loss": 0.0494, "step": 22490 }, { "epoch": 5.4, "learning_rate": 9.198271723475757e-06, "loss": 0.1729, "step": 22500 }, { "epoch": 5.4, "learning_rate": 9.193470955352856e-06, "loss": 0.0716, "step": 22510 }, { "epoch": 5.41, "learning_rate": 9.188670187229957e-06, "loss": 0.3206, "step": 22520 }, { "epoch": 5.41, "learning_rate": 9.183869419107058e-06, "loss": 0.0669, "step": 22530 }, { "epoch": 5.41, "learning_rate": 9.179068650984159e-06, "loss": 0.1566, "step": 22540 }, { "epoch": 5.41, "learning_rate": 9.174267882861258e-06, "loss": 0.2688, "step": 22550 }, { "epoch": 5.42, "learning_rate": 9.169467114738359e-06, "loss": 0.102, "step": 22560 }, { "epoch": 5.42, "learning_rate": 9.16466634661546e-06, "loss": 0.2398, "step": 22570 }, { "epoch": 5.42, "learning_rate": 9.159865578492559e-06, "loss": 0.0957, "step": 22580 }, { "epoch": 5.42, "learning_rate": 9.155064810369661e-06, "loss": 0.1979, "step": 22590 }, { "epoch": 5.42, "learning_rate": 9.15026404224676e-06, "loss": 0.2477, "step": 22600 }, { "epoch": 5.43, "learning_rate": 9.14546327412386e-06, "loss": 0.0932, "step": 22610 }, { "epoch": 5.43, "learning_rate": 9.140662506000962e-06, "loss": 0.117, "step": 22620 }, { "epoch": 5.43, "learning_rate": 9.135861737878061e-06, "loss": 0.1844, "step": 22630 }, { "epoch": 5.43, "learning_rate": 9.13106096975516e-06, "loss": 0.1836, "step": 22640 }, { "epoch": 5.44, "learning_rate": 9.126260201632263e-06, "loss": 0.0727, "step": 22650 }, { "epoch": 5.44, "learning_rate": 9.121459433509362e-06, "loss": 0.0694, "step": 22660 }, { "epoch": 5.44, "learning_rate": 9.116658665386463e-06, "loss": 0.1239, "step": 22670 }, { "epoch": 5.44, "learning_rate": 9.111857897263564e-06, "loss": 0.0828, "step": 22680 }, { "epoch": 5.45, "learning_rate": 9.107057129140663e-06, "loss": 0.1898, "step": 22690 }, { "epoch": 5.45, "learning_rate": 9.102256361017764e-06, "loss": 0.1236, "step": 22700 }, { "epoch": 5.45, "learning_rate": 9.097455592894864e-06, "loss": 0.1967, "step": 22710 }, { "epoch": 5.45, "learning_rate": 9.092654824771964e-06, "loss": 0.1741, "step": 22720 }, { "epoch": 5.46, "learning_rate": 9.087854056649064e-06, "loss": 0.1815, "step": 22730 }, { "epoch": 5.46, "learning_rate": 9.083053288526165e-06, "loss": 0.0979, "step": 22740 }, { "epoch": 5.46, "learning_rate": 9.078252520403266e-06, "loss": 0.1507, "step": 22750 }, { "epoch": 5.46, "learning_rate": 9.073451752280365e-06, "loss": 0.1533, "step": 22760 }, { "epoch": 5.47, "learning_rate": 9.068650984157466e-06, "loss": 0.2152, "step": 22770 }, { "epoch": 5.47, "learning_rate": 9.063850216034567e-06, "loss": 0.1521, "step": 22780 }, { "epoch": 5.47, "learning_rate": 9.059049447911666e-06, "loss": 0.0693, "step": 22790 }, { "epoch": 5.47, "learning_rate": 9.054248679788767e-06, "loss": 0.0744, "step": 22800 }, { "epoch": 5.48, "learning_rate": 9.049447911665868e-06, "loss": 0.0892, "step": 22810 }, { "epoch": 5.48, "learning_rate": 9.044647143542967e-06, "loss": 0.1605, "step": 22820 }, { "epoch": 5.48, "learning_rate": 9.039846375420068e-06, "loss": 0.1546, "step": 22830 }, { "epoch": 5.48, "learning_rate": 9.035045607297169e-06, "loss": 0.015, "step": 22840 }, { "epoch": 5.48, "learning_rate": 9.030244839174268e-06, "loss": 0.0518, "step": 22850 }, { "epoch": 5.49, "learning_rate": 9.025444071051369e-06, "loss": 0.1183, "step": 22860 }, { "epoch": 5.49, "learning_rate": 9.02064330292847e-06, "loss": 0.2947, "step": 22870 }, { "epoch": 5.49, "learning_rate": 9.01584253480557e-06, "loss": 0.1924, "step": 22880 }, { "epoch": 5.49, "learning_rate": 9.01104176668267e-06, "loss": 0.2254, "step": 22890 }, { "epoch": 5.5, "learning_rate": 9.00624099855977e-06, "loss": 0.0984, "step": 22900 }, { "epoch": 5.5, "learning_rate": 9.001440230436871e-06, "loss": 0.286, "step": 22910 }, { "epoch": 5.5, "learning_rate": 8.99663946231397e-06, "loss": 0.2143, "step": 22920 }, { "epoch": 5.5, "learning_rate": 8.991838694191071e-06, "loss": 0.2313, "step": 22930 }, { "epoch": 5.51, "learning_rate": 8.987037926068172e-06, "loss": 0.2279, "step": 22940 }, { "epoch": 5.51, "learning_rate": 8.982237157945271e-06, "loss": 0.1045, "step": 22950 }, { "epoch": 5.51, "learning_rate": 8.977436389822372e-06, "loss": 0.279, "step": 22960 }, { "epoch": 5.51, "learning_rate": 8.972635621699473e-06, "loss": 0.0485, "step": 22970 }, { "epoch": 5.52, "learning_rate": 8.967834853576572e-06, "loss": 0.0757, "step": 22980 }, { "epoch": 5.52, "learning_rate": 8.963034085453673e-06, "loss": 0.1427, "step": 22990 }, { "epoch": 5.52, "learning_rate": 8.958233317330774e-06, "loss": 0.2291, "step": 23000 }, { "epoch": 5.52, "learning_rate": 8.953432549207875e-06, "loss": 0.0404, "step": 23010 }, { "epoch": 5.53, "learning_rate": 8.948631781084974e-06, "loss": 0.1381, "step": 23020 }, { "epoch": 5.53, "learning_rate": 8.943831012962075e-06, "loss": 0.1238, "step": 23030 }, { "epoch": 5.53, "learning_rate": 8.939030244839175e-06, "loss": 0.1505, "step": 23040 }, { "epoch": 5.53, "learning_rate": 8.934229476716274e-06, "loss": 0.0807, "step": 23050 }, { "epoch": 5.54, "learning_rate": 8.929428708593375e-06, "loss": 0.0767, "step": 23060 }, { "epoch": 5.54, "learning_rate": 8.924627940470476e-06, "loss": 0.186, "step": 23070 }, { "epoch": 5.54, "learning_rate": 8.919827172347575e-06, "loss": 0.1324, "step": 23080 }, { "epoch": 5.54, "learning_rate": 8.915026404224678e-06, "loss": 0.0861, "step": 23090 }, { "epoch": 5.54, "learning_rate": 8.910225636101777e-06, "loss": 0.1016, "step": 23100 }, { "epoch": 5.55, "learning_rate": 8.905424867978876e-06, "loss": 0.0707, "step": 23110 }, { "epoch": 5.55, "learning_rate": 8.900624099855979e-06, "loss": 0.1564, "step": 23120 }, { "epoch": 5.55, "learning_rate": 8.895823331733078e-06, "loss": 0.1158, "step": 23130 }, { "epoch": 5.55, "learning_rate": 8.891022563610179e-06, "loss": 0.3153, "step": 23140 }, { "epoch": 5.56, "learning_rate": 8.88622179548728e-06, "loss": 0.1727, "step": 23150 }, { "epoch": 5.56, "learning_rate": 8.881421027364379e-06, "loss": 0.1206, "step": 23160 }, { "epoch": 5.56, "learning_rate": 8.87662025924148e-06, "loss": 0.0377, "step": 23170 }, { "epoch": 5.56, "learning_rate": 8.87181949111858e-06, "loss": 0.2036, "step": 23180 }, { "epoch": 5.57, "learning_rate": 8.867018722995681e-06, "loss": 0.1361, "step": 23190 }, { "epoch": 5.57, "learning_rate": 8.86221795487278e-06, "loss": 0.1613, "step": 23200 }, { "epoch": 5.57, "learning_rate": 8.857417186749881e-06, "loss": 0.2225, "step": 23210 }, { "epoch": 5.57, "learning_rate": 8.852616418626982e-06, "loss": 0.2083, "step": 23220 }, { "epoch": 5.58, "learning_rate": 8.847815650504081e-06, "loss": 0.0807, "step": 23230 }, { "epoch": 5.58, "learning_rate": 8.843014882381182e-06, "loss": 0.1861, "step": 23240 }, { "epoch": 5.58, "learning_rate": 8.838214114258283e-06, "loss": 0.1768, "step": 23250 }, { "epoch": 5.58, "learning_rate": 8.833413346135382e-06, "loss": 0.1907, "step": 23260 }, { "epoch": 5.59, "learning_rate": 8.828612578012483e-06, "loss": 0.1334, "step": 23270 }, { "epoch": 5.59, "learning_rate": 8.823811809889584e-06, "loss": 0.0783, "step": 23280 }, { "epoch": 5.59, "learning_rate": 8.819011041766683e-06, "loss": 0.157, "step": 23290 }, { "epoch": 5.59, "learning_rate": 8.814210273643784e-06, "loss": 0.113, "step": 23300 }, { "epoch": 5.6, "learning_rate": 8.809409505520885e-06, "loss": 0.2966, "step": 23310 }, { "epoch": 5.6, "learning_rate": 8.804608737397985e-06, "loss": 0.1154, "step": 23320 }, { "epoch": 5.6, "learning_rate": 8.799807969275085e-06, "loss": 0.1907, "step": 23330 }, { "epoch": 5.6, "learning_rate": 8.795007201152185e-06, "loss": 0.0188, "step": 23340 }, { "epoch": 5.6, "learning_rate": 8.790206433029286e-06, "loss": 0.1731, "step": 23350 }, { "epoch": 5.61, "learning_rate": 8.785405664906385e-06, "loss": 0.1111, "step": 23360 }, { "epoch": 5.61, "learning_rate": 8.780604896783486e-06, "loss": 0.0115, "step": 23370 }, { "epoch": 5.61, "learning_rate": 8.775804128660587e-06, "loss": 0.1328, "step": 23380 }, { "epoch": 5.61, "learning_rate": 8.771003360537686e-06, "loss": 0.1696, "step": 23390 }, { "epoch": 5.62, "learning_rate": 8.766202592414787e-06, "loss": 0.2068, "step": 23400 }, { "epoch": 5.62, "learning_rate": 8.761401824291888e-06, "loss": 0.2997, "step": 23410 }, { "epoch": 5.62, "learning_rate": 8.756601056168987e-06, "loss": 0.1893, "step": 23420 }, { "epoch": 5.62, "learning_rate": 8.751800288046088e-06, "loss": 0.1389, "step": 23430 }, { "epoch": 5.63, "learning_rate": 8.746999519923189e-06, "loss": 0.1851, "step": 23440 }, { "epoch": 5.63, "learning_rate": 8.742198751800288e-06, "loss": 0.0748, "step": 23450 }, { "epoch": 5.63, "learning_rate": 8.737397983677389e-06, "loss": 0.0554, "step": 23460 }, { "epoch": 5.63, "learning_rate": 8.73259721555449e-06, "loss": 0.2345, "step": 23470 }, { "epoch": 5.64, "learning_rate": 8.72779644743159e-06, "loss": 0.2851, "step": 23480 }, { "epoch": 5.64, "learning_rate": 8.72299567930869e-06, "loss": 0.1438, "step": 23490 }, { "epoch": 5.64, "learning_rate": 8.71819491118579e-06, "loss": 0.2902, "step": 23500 }, { "epoch": 5.64, "learning_rate": 8.713394143062891e-06, "loss": 0.096, "step": 23510 }, { "epoch": 5.65, "learning_rate": 8.70859337493999e-06, "loss": 0.0564, "step": 23520 }, { "epoch": 5.65, "learning_rate": 8.703792606817091e-06, "loss": 0.1112, "step": 23530 }, { "epoch": 5.65, "learning_rate": 8.698991838694192e-06, "loss": 0.0016, "step": 23540 }, { "epoch": 5.65, "learning_rate": 8.694191070571291e-06, "loss": 0.0427, "step": 23550 }, { "epoch": 5.66, "learning_rate": 8.689390302448392e-06, "loss": 0.141, "step": 23560 }, { "epoch": 5.66, "learning_rate": 8.684589534325493e-06, "loss": 0.1203, "step": 23570 }, { "epoch": 5.66, "learning_rate": 8.679788766202592e-06, "loss": 0.0431, "step": 23580 }, { "epoch": 5.66, "learning_rate": 8.674987998079693e-06, "loss": 0.0898, "step": 23590 }, { "epoch": 5.66, "learning_rate": 8.670187229956794e-06, "loss": 0.1872, "step": 23600 }, { "epoch": 5.67, "learning_rate": 8.665386461833895e-06, "loss": 0.1567, "step": 23610 }, { "epoch": 5.67, "learning_rate": 8.660585693710994e-06, "loss": 0.0639, "step": 23620 }, { "epoch": 5.67, "learning_rate": 8.655784925588095e-06, "loss": 0.1339, "step": 23630 }, { "epoch": 5.67, "learning_rate": 8.650984157465195e-06, "loss": 0.1433, "step": 23640 }, { "epoch": 5.68, "learning_rate": 8.646183389342295e-06, "loss": 0.2104, "step": 23650 }, { "epoch": 5.68, "learning_rate": 8.641382621219397e-06, "loss": 0.0067, "step": 23660 }, { "epoch": 5.68, "learning_rate": 8.636581853096496e-06, "loss": 0.242, "step": 23670 }, { "epoch": 5.68, "learning_rate": 8.631781084973595e-06, "loss": 0.0935, "step": 23680 }, { "epoch": 5.69, "learning_rate": 8.626980316850698e-06, "loss": 0.305, "step": 23690 }, { "epoch": 5.69, "learning_rate": 8.622179548727797e-06, "loss": 0.1011, "step": 23700 }, { "epoch": 5.69, "learning_rate": 8.617378780604896e-06, "loss": 0.2383, "step": 23710 }, { "epoch": 5.69, "learning_rate": 8.612578012481999e-06, "loss": 0.1385, "step": 23720 }, { "epoch": 5.7, "learning_rate": 8.607777244359098e-06, "loss": 0.0412, "step": 23730 }, { "epoch": 5.7, "learning_rate": 8.602976476236199e-06, "loss": 0.193, "step": 23740 }, { "epoch": 5.7, "learning_rate": 8.5981757081133e-06, "loss": 0.1095, "step": 23750 }, { "epoch": 5.7, "learning_rate": 8.593374939990399e-06, "loss": 0.2309, "step": 23760 }, { "epoch": 5.71, "learning_rate": 8.5885741718675e-06, "loss": 0.1307, "step": 23770 }, { "epoch": 5.71, "learning_rate": 8.5837734037446e-06, "loss": 0.3265, "step": 23780 }, { "epoch": 5.71, "learning_rate": 8.578972635621701e-06, "loss": 0.1727, "step": 23790 }, { "epoch": 5.71, "learning_rate": 8.5741718674988e-06, "loss": 0.1567, "step": 23800 }, { "epoch": 5.72, "learning_rate": 8.569371099375901e-06, "loss": 0.1232, "step": 23810 }, { "epoch": 5.72, "learning_rate": 8.564570331253002e-06, "loss": 0.0749, "step": 23820 }, { "epoch": 5.72, "learning_rate": 8.559769563130101e-06, "loss": 0.1984, "step": 23830 }, { "epoch": 5.72, "learning_rate": 8.554968795007202e-06, "loss": 0.0778, "step": 23840 }, { "epoch": 5.72, "learning_rate": 8.550168026884303e-06, "loss": 0.1163, "step": 23850 }, { "epoch": 5.73, "learning_rate": 8.545367258761402e-06, "loss": 0.0196, "step": 23860 }, { "epoch": 5.73, "learning_rate": 8.540566490638503e-06, "loss": 0.2435, "step": 23870 }, { "epoch": 5.73, "learning_rate": 8.535765722515604e-06, "loss": 0.2268, "step": 23880 }, { "epoch": 5.73, "learning_rate": 8.530964954392703e-06, "loss": 0.1571, "step": 23890 }, { "epoch": 5.74, "learning_rate": 8.526164186269804e-06, "loss": 0.202, "step": 23900 }, { "epoch": 5.74, "learning_rate": 8.521363418146905e-06, "loss": 0.1178, "step": 23910 }, { "epoch": 5.74, "learning_rate": 8.516562650024005e-06, "loss": 0.2691, "step": 23920 }, { "epoch": 5.74, "learning_rate": 8.511761881901105e-06, "loss": 0.0993, "step": 23930 }, { "epoch": 5.75, "learning_rate": 8.506961113778205e-06, "loss": 0.1158, "step": 23940 }, { "epoch": 5.75, "learning_rate": 8.502160345655306e-06, "loss": 0.0977, "step": 23950 }, { "epoch": 5.75, "learning_rate": 8.497359577532405e-06, "loss": 0.1979, "step": 23960 }, { "epoch": 5.75, "learning_rate": 8.492558809409506e-06, "loss": 0.1082, "step": 23970 }, { "epoch": 5.76, "learning_rate": 8.487758041286607e-06, "loss": 0.1079, "step": 23980 }, { "epoch": 5.76, "learning_rate": 8.482957273163706e-06, "loss": 0.3092, "step": 23990 }, { "epoch": 5.76, "learning_rate": 8.478156505040807e-06, "loss": 0.2673, "step": 24000 }, { "epoch": 5.76, "learning_rate": 8.473355736917908e-06, "loss": 0.075, "step": 24010 }, { "epoch": 5.77, "learning_rate": 8.468554968795007e-06, "loss": 0.037, "step": 24020 }, { "epoch": 5.77, "learning_rate": 8.463754200672108e-06, "loss": 0.0184, "step": 24030 }, { "epoch": 5.77, "learning_rate": 8.458953432549209e-06, "loss": 0.0767, "step": 24040 }, { "epoch": 5.77, "learning_rate": 8.45415266442631e-06, "loss": 0.0071, "step": 24050 }, { "epoch": 5.78, "learning_rate": 8.449351896303409e-06, "loss": 0.165, "step": 24060 }, { "epoch": 5.78, "learning_rate": 8.44455112818051e-06, "loss": 0.0991, "step": 24070 }, { "epoch": 5.78, "learning_rate": 8.43975036005761e-06, "loss": 0.1252, "step": 24080 }, { "epoch": 5.78, "learning_rate": 8.43494959193471e-06, "loss": 0.0874, "step": 24090 }, { "epoch": 5.78, "learning_rate": 8.43014882381181e-06, "loss": 0.1977, "step": 24100 }, { "epoch": 5.79, "learning_rate": 8.425348055688911e-06, "loss": 0.0942, "step": 24110 }, { "epoch": 5.79, "learning_rate": 8.42054728756601e-06, "loss": 0.2025, "step": 24120 }, { "epoch": 5.79, "learning_rate": 8.415746519443111e-06, "loss": 0.1034, "step": 24130 }, { "epoch": 5.79, "learning_rate": 8.410945751320212e-06, "loss": 0.1661, "step": 24140 }, { "epoch": 5.8, "learning_rate": 8.406144983197311e-06, "loss": 0.1799, "step": 24150 }, { "epoch": 5.8, "learning_rate": 8.401344215074412e-06, "loss": 0.3321, "step": 24160 }, { "epoch": 5.8, "learning_rate": 8.396543446951513e-06, "loss": 0.0543, "step": 24170 }, { "epoch": 5.8, "learning_rate": 8.391742678828612e-06, "loss": 0.1071, "step": 24180 }, { "epoch": 5.81, "learning_rate": 8.386941910705713e-06, "loss": 0.038, "step": 24190 }, { "epoch": 5.81, "learning_rate": 8.382141142582814e-06, "loss": 0.3388, "step": 24200 }, { "epoch": 5.81, "learning_rate": 8.377340374459915e-06, "loss": 0.1123, "step": 24210 }, { "epoch": 5.81, "learning_rate": 8.372539606337014e-06, "loss": 0.0851, "step": 24220 }, { "epoch": 5.82, "learning_rate": 8.367738838214115e-06, "loss": 0.2466, "step": 24230 }, { "epoch": 5.82, "learning_rate": 8.362938070091216e-06, "loss": 0.2098, "step": 24240 }, { "epoch": 5.82, "learning_rate": 8.358137301968315e-06, "loss": 0.1229, "step": 24250 }, { "epoch": 5.82, "learning_rate": 8.353336533845417e-06, "loss": 0.0406, "step": 24260 }, { "epoch": 5.83, "learning_rate": 8.348535765722516e-06, "loss": 0.1041, "step": 24270 }, { "epoch": 5.83, "learning_rate": 8.343734997599616e-06, "loss": 0.1262, "step": 24280 }, { "epoch": 5.83, "learning_rate": 8.338934229476718e-06, "loss": 0.1693, "step": 24290 }, { "epoch": 5.83, "learning_rate": 8.334133461353817e-06, "loss": 0.1611, "step": 24300 }, { "epoch": 5.84, "learning_rate": 8.329332693230916e-06, "loss": 0.0699, "step": 24310 }, { "epoch": 5.84, "learning_rate": 8.324531925108019e-06, "loss": 0.209, "step": 24320 }, { "epoch": 5.84, "learning_rate": 8.319731156985118e-06, "loss": 0.206, "step": 24330 }, { "epoch": 5.84, "learning_rate": 8.314930388862219e-06, "loss": 0.0412, "step": 24340 }, { "epoch": 5.84, "learning_rate": 8.31012962073932e-06, "loss": 0.1087, "step": 24350 }, { "epoch": 5.85, "learning_rate": 8.305328852616419e-06, "loss": 0.2261, "step": 24360 }, { "epoch": 5.85, "learning_rate": 8.30052808449352e-06, "loss": 0.1058, "step": 24370 }, { "epoch": 5.85, "learning_rate": 8.29572731637062e-06, "loss": 0.1548, "step": 24380 }, { "epoch": 5.85, "learning_rate": 8.290926548247721e-06, "loss": 0.1344, "step": 24390 }, { "epoch": 5.86, "learning_rate": 8.28612578012482e-06, "loss": 0.0995, "step": 24400 }, { "epoch": 5.86, "learning_rate": 8.281325012001921e-06, "loss": 0.0742, "step": 24410 }, { "epoch": 5.86, "learning_rate": 8.276524243879022e-06, "loss": 0.2176, "step": 24420 }, { "epoch": 5.86, "learning_rate": 8.271723475756121e-06, "loss": 0.0195, "step": 24430 }, { "epoch": 5.87, "learning_rate": 8.266922707633222e-06, "loss": 0.2776, "step": 24440 }, { "epoch": 5.87, "learning_rate": 8.262121939510323e-06, "loss": 0.0599, "step": 24450 }, { "epoch": 5.87, "learning_rate": 8.257321171387422e-06, "loss": 0.2414, "step": 24460 }, { "epoch": 5.87, "learning_rate": 8.252520403264523e-06, "loss": 0.1183, "step": 24470 }, { "epoch": 5.88, "learning_rate": 8.247719635141624e-06, "loss": 0.2741, "step": 24480 }, { "epoch": 5.88, "learning_rate": 8.242918867018723e-06, "loss": 0.1529, "step": 24490 }, { "epoch": 5.88, "learning_rate": 8.238118098895824e-06, "loss": 0.3196, "step": 24500 }, { "epoch": 5.88, "learning_rate": 8.233317330772925e-06, "loss": 0.2431, "step": 24510 }, { "epoch": 5.89, "learning_rate": 8.228516562650026e-06, "loss": 0.3491, "step": 24520 }, { "epoch": 5.89, "learning_rate": 8.223715794527125e-06, "loss": 0.0612, "step": 24530 }, { "epoch": 5.89, "learning_rate": 8.218915026404226e-06, "loss": 0.171, "step": 24540 }, { "epoch": 5.89, "learning_rate": 8.214114258281326e-06, "loss": 0.4149, "step": 24550 }, { "epoch": 5.9, "learning_rate": 8.209313490158426e-06, "loss": 0.1195, "step": 24560 }, { "epoch": 5.9, "learning_rate": 8.204512722035526e-06, "loss": 0.0475, "step": 24570 }, { "epoch": 5.9, "learning_rate": 8.199711953912627e-06, "loss": 0.2125, "step": 24580 }, { "epoch": 5.9, "learning_rate": 8.194911185789726e-06, "loss": 0.1063, "step": 24590 }, { "epoch": 5.9, "learning_rate": 8.190110417666827e-06, "loss": 0.2058, "step": 24600 }, { "epoch": 5.91, "learning_rate": 8.185309649543928e-06, "loss": 0.1106, "step": 24610 }, { "epoch": 5.91, "learning_rate": 8.180508881421027e-06, "loss": 0.0819, "step": 24620 }, { "epoch": 5.91, "learning_rate": 8.175708113298128e-06, "loss": 0.1393, "step": 24630 }, { "epoch": 5.91, "learning_rate": 8.170907345175229e-06, "loss": 0.2128, "step": 24640 }, { "epoch": 5.92, "learning_rate": 8.16610657705233e-06, "loss": 0.1396, "step": 24650 }, { "epoch": 5.92, "learning_rate": 8.161305808929429e-06, "loss": 0.0379, "step": 24660 }, { "epoch": 5.92, "learning_rate": 8.15650504080653e-06, "loss": 0.3127, "step": 24670 }, { "epoch": 5.92, "learning_rate": 8.15170427268363e-06, "loss": 0.054, "step": 24680 }, { "epoch": 5.93, "learning_rate": 8.14690350456073e-06, "loss": 0.143, "step": 24690 }, { "epoch": 5.93, "learning_rate": 8.14210273643783e-06, "loss": 0.246, "step": 24700 }, { "epoch": 5.93, "learning_rate": 8.137301968314931e-06, "loss": 0.174, "step": 24710 }, { "epoch": 5.93, "learning_rate": 8.13250120019203e-06, "loss": 0.146, "step": 24720 }, { "epoch": 5.94, "learning_rate": 8.127700432069131e-06, "loss": 0.1204, "step": 24730 }, { "epoch": 5.94, "learning_rate": 8.122899663946232e-06, "loss": 0.085, "step": 24740 }, { "epoch": 5.94, "learning_rate": 8.118098895823331e-06, "loss": 0.0923, "step": 24750 }, { "epoch": 5.94, "learning_rate": 8.113298127700432e-06, "loss": 0.1449, "step": 24760 }, { "epoch": 5.95, "learning_rate": 8.108497359577533e-06, "loss": 0.0448, "step": 24770 }, { "epoch": 5.95, "learning_rate": 8.103696591454634e-06, "loss": 0.1176, "step": 24780 }, { "epoch": 5.95, "learning_rate": 8.098895823331733e-06, "loss": 0.15, "step": 24790 }, { "epoch": 5.95, "learning_rate": 8.094095055208834e-06, "loss": 0.0147, "step": 24800 }, { "epoch": 5.96, "learning_rate": 8.089294287085935e-06, "loss": 0.1931, "step": 24810 }, { "epoch": 5.96, "learning_rate": 8.084493518963034e-06, "loss": 0.2148, "step": 24820 }, { "epoch": 5.96, "learning_rate": 8.079692750840135e-06, "loss": 0.1358, "step": 24830 }, { "epoch": 5.96, "learning_rate": 8.074891982717236e-06, "loss": 0.3344, "step": 24840 }, { "epoch": 5.96, "learning_rate": 8.070091214594335e-06, "loss": 0.2951, "step": 24850 }, { "epoch": 5.97, "learning_rate": 8.065290446471437e-06, "loss": 0.0591, "step": 24860 }, { "epoch": 5.97, "learning_rate": 8.060489678348536e-06, "loss": 0.088, "step": 24870 }, { "epoch": 5.97, "learning_rate": 8.055688910225636e-06, "loss": 0.05, "step": 24880 }, { "epoch": 5.97, "learning_rate": 8.050888142102738e-06, "loss": 0.1937, "step": 24890 }, { "epoch": 5.98, "learning_rate": 8.046087373979837e-06, "loss": 0.2958, "step": 24900 }, { "epoch": 5.98, "learning_rate": 8.041286605856938e-06, "loss": 0.1362, "step": 24910 }, { "epoch": 5.98, "learning_rate": 8.036485837734039e-06, "loss": 0.1428, "step": 24920 }, { "epoch": 5.98, "learning_rate": 8.031685069611138e-06, "loss": 0.1029, "step": 24930 }, { "epoch": 5.99, "learning_rate": 8.026884301488239e-06, "loss": 0.1845, "step": 24940 }, { "epoch": 5.99, "learning_rate": 8.02208353336534e-06, "loss": 0.2016, "step": 24950 }, { "epoch": 5.99, "learning_rate": 8.017282765242439e-06, "loss": 0.3287, "step": 24960 }, { "epoch": 5.99, "learning_rate": 8.01248199711954e-06, "loss": 0.2603, "step": 24970 }, { "epoch": 6.0, "learning_rate": 8.00768122899664e-06, "loss": 0.1813, "step": 24980 }, { "epoch": 6.0, "learning_rate": 8.002880460873742e-06, "loss": 0.0816, "step": 24990 }, { "epoch": 6.0, "eval_accuracy": 0.9988099285957157, "eval_loss": 0.004691378679126501, "eval_runtime": 511.5577, "eval_samples_per_second": 11.498, "eval_steps_per_second": 1.439, "step": 24996 }, { "epoch": 6.0, "learning_rate": 7.99807969275084e-06, "loss": 0.1807, "step": 25000 }, { "epoch": 6.0, "learning_rate": 7.993278924627941e-06, "loss": 0.2785, "step": 25010 }, { "epoch": 6.01, "learning_rate": 7.988478156505042e-06, "loss": 0.0749, "step": 25020 }, { "epoch": 6.01, "learning_rate": 7.983677388382141e-06, "loss": 0.1342, "step": 25030 }, { "epoch": 6.01, "learning_rate": 7.978876620259242e-06, "loss": 0.0924, "step": 25040 }, { "epoch": 6.01, "learning_rate": 7.974075852136343e-06, "loss": 0.2424, "step": 25050 }, { "epoch": 6.02, "learning_rate": 7.969275084013442e-06, "loss": 0.0164, "step": 25060 }, { "epoch": 6.02, "learning_rate": 7.964474315890543e-06, "loss": 0.2166, "step": 25070 }, { "epoch": 6.02, "learning_rate": 7.959673547767644e-06, "loss": 0.2773, "step": 25080 }, { "epoch": 6.02, "learning_rate": 7.954872779644743e-06, "loss": 0.1265, "step": 25090 }, { "epoch": 6.02, "learning_rate": 7.950072011521844e-06, "loss": 0.1849, "step": 25100 }, { "epoch": 6.03, "learning_rate": 7.945271243398945e-06, "loss": 0.0658, "step": 25110 }, { "epoch": 6.03, "learning_rate": 7.940470475276046e-06, "loss": 0.0178, "step": 25120 }, { "epoch": 6.03, "learning_rate": 7.935669707153145e-06, "loss": 0.0527, "step": 25130 }, { "epoch": 6.03, "learning_rate": 7.930868939030246e-06, "loss": 0.112, "step": 25140 }, { "epoch": 6.04, "learning_rate": 7.926068170907347e-06, "loss": 0.1347, "step": 25150 }, { "epoch": 6.04, "learning_rate": 7.921267402784446e-06, "loss": 0.0655, "step": 25160 }, { "epoch": 6.04, "learning_rate": 7.916466634661547e-06, "loss": 0.2298, "step": 25170 }, { "epoch": 6.04, "learning_rate": 7.911665866538647e-06, "loss": 0.2421, "step": 25180 }, { "epoch": 6.05, "learning_rate": 7.906865098415747e-06, "loss": 0.0634, "step": 25190 }, { "epoch": 6.05, "learning_rate": 7.902064330292847e-06, "loss": 0.3296, "step": 25200 }, { "epoch": 6.05, "learning_rate": 7.897263562169948e-06, "loss": 0.0632, "step": 25210 }, { "epoch": 6.05, "learning_rate": 7.892462794047047e-06, "loss": 0.2612, "step": 25220 }, { "epoch": 6.06, "learning_rate": 7.887662025924148e-06, "loss": 0.2561, "step": 25230 }, { "epoch": 6.06, "learning_rate": 7.882861257801249e-06, "loss": 0.0797, "step": 25240 }, { "epoch": 6.06, "learning_rate": 7.87806048967835e-06, "loss": 0.0996, "step": 25250 }, { "epoch": 6.06, "learning_rate": 7.873259721555449e-06, "loss": 0.2585, "step": 25260 }, { "epoch": 6.07, "learning_rate": 7.86845895343255e-06, "loss": 0.0933, "step": 25270 }, { "epoch": 6.07, "learning_rate": 7.86365818530965e-06, "loss": 0.15, "step": 25280 }, { "epoch": 6.07, "learning_rate": 7.85885741718675e-06, "loss": 0.0967, "step": 25290 }, { "epoch": 6.07, "learning_rate": 7.85405664906385e-06, "loss": 0.1467, "step": 25300 }, { "epoch": 6.08, "learning_rate": 7.849255880940952e-06, "loss": 0.1004, "step": 25310 }, { "epoch": 6.08, "learning_rate": 7.84445511281805e-06, "loss": 0.0888, "step": 25320 }, { "epoch": 6.08, "learning_rate": 7.839654344695152e-06, "loss": 0.3506, "step": 25330 }, { "epoch": 6.08, "learning_rate": 7.834853576572252e-06, "loss": 0.0876, "step": 25340 }, { "epoch": 6.08, "learning_rate": 7.830052808449352e-06, "loss": 0.0225, "step": 25350 }, { "epoch": 6.09, "learning_rate": 7.825252040326452e-06, "loss": 0.3848, "step": 25360 }, { "epoch": 6.09, "learning_rate": 7.820451272203553e-06, "loss": 0.0576, "step": 25370 }, { "epoch": 6.09, "learning_rate": 7.815650504080654e-06, "loss": 0.1384, "step": 25380 }, { "epoch": 6.09, "learning_rate": 7.810849735957753e-06, "loss": 0.0694, "step": 25390 }, { "epoch": 6.1, "learning_rate": 7.806048967834854e-06, "loss": 0.1307, "step": 25400 }, { "epoch": 6.1, "learning_rate": 7.801248199711955e-06, "loss": 0.1658, "step": 25410 }, { "epoch": 6.1, "learning_rate": 7.796447431589054e-06, "loss": 0.1527, "step": 25420 }, { "epoch": 6.1, "learning_rate": 7.791646663466155e-06, "loss": 0.0733, "step": 25430 }, { "epoch": 6.11, "learning_rate": 7.786845895343256e-06, "loss": 0.1692, "step": 25440 }, { "epoch": 6.11, "learning_rate": 7.782045127220355e-06, "loss": 0.0852, "step": 25450 }, { "epoch": 6.11, "learning_rate": 7.777244359097457e-06, "loss": 0.2594, "step": 25460 }, { "epoch": 6.11, "learning_rate": 7.772443590974557e-06, "loss": 0.0741, "step": 25470 }, { "epoch": 6.12, "learning_rate": 7.767642822851656e-06, "loss": 0.0687, "step": 25480 }, { "epoch": 6.12, "learning_rate": 7.762842054728758e-06, "loss": 0.17, "step": 25490 }, { "epoch": 6.12, "learning_rate": 7.758041286605857e-06, "loss": 0.0603, "step": 25500 }, { "epoch": 6.12, "learning_rate": 7.753240518482958e-06, "loss": 0.0806, "step": 25510 }, { "epoch": 6.13, "learning_rate": 7.748439750360059e-06, "loss": 0.1253, "step": 25520 }, { "epoch": 6.13, "learning_rate": 7.743638982237158e-06, "loss": 0.0558, "step": 25530 }, { "epoch": 6.13, "learning_rate": 7.738838214114259e-06, "loss": 0.1631, "step": 25540 }, { "epoch": 6.13, "learning_rate": 7.73403744599136e-06, "loss": 0.0188, "step": 25550 }, { "epoch": 6.14, "learning_rate": 7.729236677868459e-06, "loss": 0.0871, "step": 25560 }, { "epoch": 6.14, "learning_rate": 7.72443590974556e-06, "loss": 0.1564, "step": 25570 }, { "epoch": 6.14, "learning_rate": 7.71963514162266e-06, "loss": 0.1638, "step": 25580 }, { "epoch": 6.14, "learning_rate": 7.714834373499762e-06, "loss": 0.1099, "step": 25590 }, { "epoch": 6.14, "learning_rate": 7.71003360537686e-06, "loss": 0.116, "step": 25600 }, { "epoch": 6.15, "learning_rate": 7.705232837253962e-06, "loss": 0.107, "step": 25610 }, { "epoch": 6.15, "learning_rate": 7.700432069131062e-06, "loss": 0.1544, "step": 25620 }, { "epoch": 6.15, "learning_rate": 7.695631301008162e-06, "loss": 0.1063, "step": 25630 }, { "epoch": 6.15, "learning_rate": 7.690830532885262e-06, "loss": 0.1224, "step": 25640 }, { "epoch": 6.16, "learning_rate": 7.686029764762363e-06, "loss": 0.0394, "step": 25650 }, { "epoch": 6.16, "learning_rate": 7.681228996639462e-06, "loss": 0.2104, "step": 25660 }, { "epoch": 6.16, "learning_rate": 7.676428228516563e-06, "loss": 0.1375, "step": 25670 }, { "epoch": 6.16, "learning_rate": 7.671627460393664e-06, "loss": 0.0978, "step": 25680 }, { "epoch": 6.17, "learning_rate": 7.666826692270763e-06, "loss": 0.1603, "step": 25690 }, { "epoch": 6.17, "learning_rate": 7.662025924147864e-06, "loss": 0.084, "step": 25700 }, { "epoch": 6.17, "learning_rate": 7.657225156024965e-06, "loss": 0.161, "step": 25710 }, { "epoch": 6.17, "learning_rate": 7.652424387902066e-06, "loss": 0.2226, "step": 25720 }, { "epoch": 6.18, "learning_rate": 7.647623619779165e-06, "loss": 0.1334, "step": 25730 }, { "epoch": 6.18, "learning_rate": 7.642822851656266e-06, "loss": 0.1843, "step": 25740 }, { "epoch": 6.18, "learning_rate": 7.638022083533367e-06, "loss": 0.2848, "step": 25750 }, { "epoch": 6.18, "learning_rate": 7.633221315410466e-06, "loss": 0.2861, "step": 25760 }, { "epoch": 6.19, "learning_rate": 7.628420547287566e-06, "loss": 0.1252, "step": 25770 }, { "epoch": 6.19, "learning_rate": 7.6236197791646675e-06, "loss": 0.0782, "step": 25780 }, { "epoch": 6.19, "learning_rate": 7.618819011041767e-06, "loss": 0.0714, "step": 25790 }, { "epoch": 6.19, "learning_rate": 7.614018242918868e-06, "loss": 0.2284, "step": 25800 }, { "epoch": 6.2, "learning_rate": 7.609217474795968e-06, "loss": 0.1255, "step": 25810 }, { "epoch": 6.2, "learning_rate": 7.6044167066730674e-06, "loss": 0.1299, "step": 25820 }, { "epoch": 6.2, "learning_rate": 7.599615938550169e-06, "loss": 0.2973, "step": 25830 }, { "epoch": 6.2, "learning_rate": 7.594815170427269e-06, "loss": 0.2841, "step": 25840 }, { "epoch": 6.2, "learning_rate": 7.59001440230437e-06, "loss": 0.0801, "step": 25850 }, { "epoch": 6.21, "learning_rate": 7.58521363418147e-06, "loss": 0.2105, "step": 25860 }, { "epoch": 6.21, "learning_rate": 7.58041286605857e-06, "loss": 0.2079, "step": 25870 }, { "epoch": 6.21, "learning_rate": 7.575612097935671e-06, "loss": 0.0622, "step": 25880 }, { "epoch": 6.21, "learning_rate": 7.570811329812771e-06, "loss": 0.036, "step": 25890 }, { "epoch": 6.22, "learning_rate": 7.566010561689871e-06, "loss": 0.2376, "step": 25900 }, { "epoch": 6.22, "learning_rate": 7.561209793566972e-06, "loss": 0.235, "step": 25910 }, { "epoch": 6.22, "learning_rate": 7.556409025444072e-06, "loss": 0.225, "step": 25920 }, { "epoch": 6.22, "learning_rate": 7.5516082573211725e-06, "loss": 0.0561, "step": 25930 }, { "epoch": 6.23, "learning_rate": 7.5468074891982725e-06, "loss": 0.0468, "step": 25940 }, { "epoch": 6.23, "learning_rate": 7.5420067210753725e-06, "loss": 0.1827, "step": 25950 }, { "epoch": 6.23, "learning_rate": 7.537205952952473e-06, "loss": 0.0561, "step": 25960 }, { "epoch": 6.23, "learning_rate": 7.532405184829573e-06, "loss": 0.2067, "step": 25970 }, { "epoch": 6.24, "learning_rate": 7.527604416706674e-06, "loss": 0.119, "step": 25980 }, { "epoch": 6.24, "learning_rate": 7.522803648583774e-06, "loss": 0.1031, "step": 25990 }, { "epoch": 6.24, "learning_rate": 7.518002880460874e-06, "loss": 0.0797, "step": 26000 }, { "epoch": 6.24, "learning_rate": 7.513202112337975e-06, "loss": 0.0839, "step": 26010 }, { "epoch": 6.25, "learning_rate": 7.508401344215075e-06, "loss": 0.2089, "step": 26020 }, { "epoch": 6.25, "learning_rate": 7.503600576092175e-06, "loss": 0.1383, "step": 26030 }, { "epoch": 6.25, "learning_rate": 7.498799807969276e-06, "loss": 0.1204, "step": 26040 }, { "epoch": 6.25, "learning_rate": 7.493999039846376e-06, "loss": 0.0502, "step": 26050 }, { "epoch": 6.26, "learning_rate": 7.489198271723477e-06, "loss": 0.0454, "step": 26060 }, { "epoch": 6.26, "learning_rate": 7.484397503600577e-06, "loss": 0.1206, "step": 26070 }, { "epoch": 6.26, "learning_rate": 7.479596735477677e-06, "loss": 0.0891, "step": 26080 }, { "epoch": 6.26, "learning_rate": 7.4747959673547775e-06, "loss": 0.1036, "step": 26090 }, { "epoch": 6.27, "learning_rate": 7.4699951992318775e-06, "loss": 0.1232, "step": 26100 }, { "epoch": 6.27, "learning_rate": 7.465194431108978e-06, "loss": 0.1374, "step": 26110 }, { "epoch": 6.27, "learning_rate": 7.460393662986078e-06, "loss": 0.1454, "step": 26120 }, { "epoch": 6.27, "learning_rate": 7.455592894863178e-06, "loss": 0.1978, "step": 26130 }, { "epoch": 6.27, "learning_rate": 7.450792126740279e-06, "loss": 0.0285, "step": 26140 }, { "epoch": 6.28, "learning_rate": 7.445991358617379e-06, "loss": 0.1201, "step": 26150 }, { "epoch": 6.28, "learning_rate": 7.441190590494479e-06, "loss": 0.1305, "step": 26160 }, { "epoch": 6.28, "learning_rate": 7.43638982237158e-06, "loss": 0.1754, "step": 26170 }, { "epoch": 6.28, "learning_rate": 7.43158905424868e-06, "loss": 0.1374, "step": 26180 }, { "epoch": 6.29, "learning_rate": 7.426788286125781e-06, "loss": 0.1109, "step": 26190 }, { "epoch": 6.29, "learning_rate": 7.421987518002881e-06, "loss": 0.1079, "step": 26200 }, { "epoch": 6.29, "learning_rate": 7.417186749879981e-06, "loss": 0.0572, "step": 26210 }, { "epoch": 6.29, "learning_rate": 7.412385981757082e-06, "loss": 0.0358, "step": 26220 }, { "epoch": 6.3, "learning_rate": 7.407585213634182e-06, "loss": 0.1026, "step": 26230 }, { "epoch": 6.3, "learning_rate": 7.4027844455112825e-06, "loss": 0.1215, "step": 26240 }, { "epoch": 6.3, "learning_rate": 7.3979836773883825e-06, "loss": 0.1265, "step": 26250 }, { "epoch": 6.3, "learning_rate": 7.3931829092654825e-06, "loss": 0.119, "step": 26260 }, { "epoch": 6.31, "learning_rate": 7.388382141142584e-06, "loss": 0.0374, "step": 26270 }, { "epoch": 6.31, "learning_rate": 7.383581373019683e-06, "loss": 0.2972, "step": 26280 }, { "epoch": 6.31, "learning_rate": 7.378780604896783e-06, "loss": 0.1746, "step": 26290 }, { "epoch": 6.31, "learning_rate": 7.373979836773885e-06, "loss": 0.1265, "step": 26300 }, { "epoch": 6.32, "learning_rate": 7.369179068650984e-06, "loss": 0.1768, "step": 26310 }, { "epoch": 6.32, "learning_rate": 7.364378300528086e-06, "loss": 0.2109, "step": 26320 }, { "epoch": 6.32, "learning_rate": 7.359577532405186e-06, "loss": 0.2025, "step": 26330 }, { "epoch": 6.32, "learning_rate": 7.354776764282285e-06, "loss": 0.0017, "step": 26340 }, { "epoch": 6.33, "learning_rate": 7.349975996159387e-06, "loss": 0.17, "step": 26350 }, { "epoch": 6.33, "learning_rate": 7.345175228036487e-06, "loss": 0.2705, "step": 26360 }, { "epoch": 6.33, "learning_rate": 7.3403744599135876e-06, "loss": 0.1737, "step": 26370 }, { "epoch": 6.33, "learning_rate": 7.3355736917906876e-06, "loss": 0.3457, "step": 26380 }, { "epoch": 6.33, "learning_rate": 7.3307729236677875e-06, "loss": 0.269, "step": 26390 }, { "epoch": 6.34, "learning_rate": 7.325972155544888e-06, "loss": 0.0642, "step": 26400 }, { "epoch": 6.34, "learning_rate": 7.321171387421988e-06, "loss": 0.2375, "step": 26410 }, { "epoch": 6.34, "learning_rate": 7.316370619299088e-06, "loss": 0.04, "step": 26420 }, { "epoch": 6.34, "learning_rate": 7.311569851176189e-06, "loss": 0.2352, "step": 26430 }, { "epoch": 6.35, "learning_rate": 7.306769083053289e-06, "loss": 0.1182, "step": 26440 }, { "epoch": 6.35, "learning_rate": 7.30196831493039e-06, "loss": 0.1019, "step": 26450 }, { "epoch": 6.35, "learning_rate": 7.29716754680749e-06, "loss": 0.1283, "step": 26460 }, { "epoch": 6.35, "learning_rate": 7.29236677868459e-06, "loss": 0.1895, "step": 26470 }, { "epoch": 6.36, "learning_rate": 7.287566010561691e-06, "loss": 0.0406, "step": 26480 }, { "epoch": 6.36, "learning_rate": 7.282765242438791e-06, "loss": 0.1872, "step": 26490 }, { "epoch": 6.36, "learning_rate": 7.277964474315891e-06, "loss": 0.1373, "step": 26500 }, { "epoch": 6.36, "learning_rate": 7.273163706192992e-06, "loss": 0.2961, "step": 26510 }, { "epoch": 6.37, "learning_rate": 7.268362938070092e-06, "loss": 0.0306, "step": 26520 }, { "epoch": 6.37, "learning_rate": 7.2635621699471926e-06, "loss": 0.1013, "step": 26530 }, { "epoch": 6.37, "learning_rate": 7.2587614018242926e-06, "loss": 0.0054, "step": 26540 }, { "epoch": 6.37, "learning_rate": 7.2539606337013926e-06, "loss": 0.2625, "step": 26550 }, { "epoch": 6.38, "learning_rate": 7.249159865578493e-06, "loss": 0.0668, "step": 26560 }, { "epoch": 6.38, "learning_rate": 7.244359097455593e-06, "loss": 0.2242, "step": 26570 }, { "epoch": 6.38, "learning_rate": 7.239558329332694e-06, "loss": 0.2363, "step": 26580 }, { "epoch": 6.38, "learning_rate": 7.234757561209794e-06, "loss": 0.0504, "step": 26590 }, { "epoch": 6.39, "learning_rate": 7.229956793086894e-06, "loss": 0.0536, "step": 26600 }, { "epoch": 6.39, "learning_rate": 7.225156024963995e-06, "loss": 0.1208, "step": 26610 }, { "epoch": 6.39, "learning_rate": 7.220355256841095e-06, "loss": 0.2168, "step": 26620 }, { "epoch": 6.39, "learning_rate": 7.215554488718195e-06, "loss": 0.1863, "step": 26630 }, { "epoch": 6.39, "learning_rate": 7.210753720595296e-06, "loss": 0.1843, "step": 26640 }, { "epoch": 6.4, "learning_rate": 7.205952952472396e-06, "loss": 0.1705, "step": 26650 }, { "epoch": 6.4, "learning_rate": 7.201152184349497e-06, "loss": 0.1628, "step": 26660 }, { "epoch": 6.4, "learning_rate": 7.196351416226597e-06, "loss": 0.0485, "step": 26670 }, { "epoch": 6.4, "learning_rate": 7.191550648103697e-06, "loss": 0.1267, "step": 26680 }, { "epoch": 6.41, "learning_rate": 7.186749879980798e-06, "loss": 0.1102, "step": 26690 }, { "epoch": 6.41, "learning_rate": 7.181949111857898e-06, "loss": 0.0499, "step": 26700 }, { "epoch": 6.41, "learning_rate": 7.1771483437349984e-06, "loss": 0.174, "step": 26710 }, { "epoch": 6.41, "learning_rate": 7.1723475756120984e-06, "loss": 0.0876, "step": 26720 }, { "epoch": 6.42, "learning_rate": 7.1675468074891984e-06, "loss": 0.1602, "step": 26730 }, { "epoch": 6.42, "learning_rate": 7.162746039366299e-06, "loss": 0.1286, "step": 26740 }, { "epoch": 6.42, "learning_rate": 7.157945271243399e-06, "loss": 0.265, "step": 26750 }, { "epoch": 6.42, "learning_rate": 7.153144503120499e-06, "loss": 0.1964, "step": 26760 }, { "epoch": 6.43, "learning_rate": 7.1483437349976e-06, "loss": 0.0691, "step": 26770 }, { "epoch": 6.43, "learning_rate": 7.1435429668747e-06, "loss": 0.1533, "step": 26780 }, { "epoch": 6.43, "learning_rate": 7.138742198751801e-06, "loss": 0.1206, "step": 26790 }, { "epoch": 6.43, "learning_rate": 7.133941430628901e-06, "loss": 0.139, "step": 26800 }, { "epoch": 6.44, "learning_rate": 7.129140662506001e-06, "loss": 0.1537, "step": 26810 }, { "epoch": 6.44, "learning_rate": 7.124339894383102e-06, "loss": 0.2052, "step": 26820 }, { "epoch": 6.44, "learning_rate": 7.119539126260202e-06, "loss": 0.0242, "step": 26830 }, { "epoch": 6.44, "learning_rate": 7.1147383581373035e-06, "loss": 0.1092, "step": 26840 }, { "epoch": 6.45, "learning_rate": 7.109937590014403e-06, "loss": 0.2086, "step": 26850 }, { "epoch": 6.45, "learning_rate": 7.105136821891503e-06, "loss": 0.1234, "step": 26860 }, { "epoch": 6.45, "learning_rate": 7.100336053768604e-06, "loss": 0.143, "step": 26870 }, { "epoch": 6.45, "learning_rate": 7.0955352856457035e-06, "loss": 0.1464, "step": 26880 }, { "epoch": 6.45, "learning_rate": 7.0907345175228034e-06, "loss": 0.0402, "step": 26890 }, { "epoch": 6.46, "learning_rate": 7.085933749399905e-06, "loss": 0.0983, "step": 26900 }, { "epoch": 6.46, "learning_rate": 7.081132981277004e-06, "loss": 0.0526, "step": 26910 }, { "epoch": 6.46, "learning_rate": 7.076332213154106e-06, "loss": 0.1907, "step": 26920 }, { "epoch": 6.46, "learning_rate": 7.071531445031206e-06, "loss": 0.2767, "step": 26930 }, { "epoch": 6.47, "learning_rate": 7.066730676908305e-06, "loss": 0.0913, "step": 26940 }, { "epoch": 6.47, "learning_rate": 7.061929908785407e-06, "loss": 0.0431, "step": 26950 }, { "epoch": 6.47, "learning_rate": 7.057129140662507e-06, "loss": 0.1337, "step": 26960 }, { "epoch": 6.47, "learning_rate": 7.052328372539608e-06, "loss": 0.1209, "step": 26970 }, { "epoch": 6.48, "learning_rate": 7.047527604416708e-06, "loss": 0.1396, "step": 26980 }, { "epoch": 6.48, "learning_rate": 7.042726836293808e-06, "loss": 0.084, "step": 26990 }, { "epoch": 6.48, "learning_rate": 7.0379260681709085e-06, "loss": 0.1096, "step": 27000 }, { "epoch": 6.48, "learning_rate": 7.0331253000480085e-06, "loss": 0.2033, "step": 27010 }, { "epoch": 6.49, "learning_rate": 7.0283245319251085e-06, "loss": 0.0877, "step": 27020 }, { "epoch": 6.49, "learning_rate": 7.023523763802209e-06, "loss": 0.0669, "step": 27030 }, { "epoch": 6.49, "learning_rate": 7.018722995679309e-06, "loss": 0.0395, "step": 27040 }, { "epoch": 6.49, "learning_rate": 7.01392222755641e-06, "loss": 0.185, "step": 27050 }, { "epoch": 6.5, "learning_rate": 7.00912145943351e-06, "loss": 0.0763, "step": 27060 }, { "epoch": 6.5, "learning_rate": 7.00432069131061e-06, "loss": 0.1283, "step": 27070 }, { "epoch": 6.5, "learning_rate": 6.999519923187711e-06, "loss": 0.0991, "step": 27080 }, { "epoch": 6.5, "learning_rate": 6.994719155064811e-06, "loss": 0.1041, "step": 27090 }, { "epoch": 6.51, "learning_rate": 6.989918386941912e-06, "loss": 0.1128, "step": 27100 }, { "epoch": 6.51, "learning_rate": 6.985117618819012e-06, "loss": 0.0976, "step": 27110 }, { "epoch": 6.51, "learning_rate": 6.980316850696112e-06, "loss": 0.2417, "step": 27120 }, { "epoch": 6.51, "learning_rate": 6.975516082573213e-06, "loss": 0.0686, "step": 27130 }, { "epoch": 6.51, "learning_rate": 6.970715314450313e-06, "loss": 0.0381, "step": 27140 }, { "epoch": 6.52, "learning_rate": 6.965914546327413e-06, "loss": 0.1548, "step": 27150 }, { "epoch": 6.52, "learning_rate": 6.9611137782045135e-06, "loss": 0.1737, "step": 27160 }, { "epoch": 6.52, "learning_rate": 6.9563130100816135e-06, "loss": 0.226, "step": 27170 }, { "epoch": 6.52, "learning_rate": 6.951512241958714e-06, "loss": 0.2837, "step": 27180 }, { "epoch": 6.53, "learning_rate": 6.946711473835814e-06, "loss": 0.2096, "step": 27190 }, { "epoch": 6.53, "learning_rate": 6.941910705712914e-06, "loss": 0.1456, "step": 27200 }, { "epoch": 6.53, "learning_rate": 6.937109937590015e-06, "loss": 0.1104, "step": 27210 }, { "epoch": 6.53, "learning_rate": 6.932309169467115e-06, "loss": 0.1061, "step": 27220 }, { "epoch": 6.54, "learning_rate": 6.927508401344215e-06, "loss": 0.1684, "step": 27230 }, { "epoch": 6.54, "learning_rate": 6.922707633221316e-06, "loss": 0.183, "step": 27240 }, { "epoch": 6.54, "learning_rate": 6.917906865098416e-06, "loss": 0.1638, "step": 27250 }, { "epoch": 6.54, "learning_rate": 6.913106096975517e-06, "loss": 0.2148, "step": 27260 }, { "epoch": 6.55, "learning_rate": 6.908305328852617e-06, "loss": 0.1398, "step": 27270 }, { "epoch": 6.55, "learning_rate": 6.903504560729717e-06, "loss": 0.2507, "step": 27280 }, { "epoch": 6.55, "learning_rate": 6.898703792606818e-06, "loss": 0.1065, "step": 27290 }, { "epoch": 6.55, "learning_rate": 6.893903024483918e-06, "loss": 0.0586, "step": 27300 }, { "epoch": 6.56, "learning_rate": 6.8891022563610185e-06, "loss": 0.1674, "step": 27310 }, { "epoch": 6.56, "learning_rate": 6.8843014882381185e-06, "loss": 0.1305, "step": 27320 }, { "epoch": 6.56, "learning_rate": 6.8795007201152185e-06, "loss": 0.1653, "step": 27330 }, { "epoch": 6.56, "learning_rate": 6.874699951992319e-06, "loss": 0.2459, "step": 27340 }, { "epoch": 6.57, "learning_rate": 6.869899183869419e-06, "loss": 0.1194, "step": 27350 }, { "epoch": 6.57, "learning_rate": 6.865098415746519e-06, "loss": 0.0173, "step": 27360 }, { "epoch": 6.57, "learning_rate": 6.86029764762362e-06, "loss": 0.0501, "step": 27370 }, { "epoch": 6.57, "learning_rate": 6.85549687950072e-06, "loss": 0.1727, "step": 27380 }, { "epoch": 6.57, "learning_rate": 6.850696111377821e-06, "loss": 0.0827, "step": 27390 }, { "epoch": 6.58, "learning_rate": 6.845895343254921e-06, "loss": 0.0344, "step": 27400 }, { "epoch": 6.58, "learning_rate": 6.841094575132021e-06, "loss": 0.1478, "step": 27410 }, { "epoch": 6.58, "learning_rate": 6.836293807009122e-06, "loss": 0.0105, "step": 27420 }, { "epoch": 6.58, "learning_rate": 6.831493038886222e-06, "loss": 0.1823, "step": 27430 }, { "epoch": 6.59, "learning_rate": 6.8266922707633236e-06, "loss": 0.0478, "step": 27440 }, { "epoch": 6.59, "learning_rate": 6.821891502640423e-06, "loss": 0.1934, "step": 27450 }, { "epoch": 6.59, "learning_rate": 6.817090734517523e-06, "loss": 0.0652, "step": 27460 }, { "epoch": 6.59, "learning_rate": 6.812289966394624e-06, "loss": 0.1508, "step": 27470 }, { "epoch": 6.6, "learning_rate": 6.8074891982717235e-06, "loss": 0.0405, "step": 27480 }, { "epoch": 6.6, "learning_rate": 6.8026884301488235e-06, "loss": 0.106, "step": 27490 }, { "epoch": 6.6, "learning_rate": 6.797887662025925e-06, "loss": 0.2549, "step": 27500 }, { "epoch": 6.6, "learning_rate": 6.793086893903024e-06, "loss": 0.2154, "step": 27510 }, { "epoch": 6.61, "learning_rate": 6.788286125780126e-06, "loss": 0.2765, "step": 27520 }, { "epoch": 6.61, "learning_rate": 6.783485357657226e-06, "loss": 0.0385, "step": 27530 }, { "epoch": 6.61, "learning_rate": 6.778684589534325e-06, "loss": 0.2861, "step": 27540 }, { "epoch": 6.61, "learning_rate": 6.773883821411427e-06, "loss": 0.112, "step": 27550 }, { "epoch": 6.62, "learning_rate": 6.769083053288527e-06, "loss": 0.0288, "step": 27560 }, { "epoch": 6.62, "learning_rate": 6.764282285165628e-06, "loss": 0.0997, "step": 27570 }, { "epoch": 6.62, "learning_rate": 6.759481517042728e-06, "loss": 0.1873, "step": 27580 }, { "epoch": 6.62, "learning_rate": 6.754680748919828e-06, "loss": 0.1055, "step": 27590 }, { "epoch": 6.63, "learning_rate": 6.749879980796929e-06, "loss": 0.3592, "step": 27600 }, { "epoch": 6.63, "learning_rate": 6.745079212674029e-06, "loss": 0.1721, "step": 27610 }, { "epoch": 6.63, "learning_rate": 6.7402784445511286e-06, "loss": 0.0783, "step": 27620 }, { "epoch": 6.63, "learning_rate": 6.735477676428229e-06, "loss": 0.0941, "step": 27630 }, { "epoch": 6.63, "learning_rate": 6.730676908305329e-06, "loss": 0.1333, "step": 27640 }, { "epoch": 6.64, "learning_rate": 6.72587614018243e-06, "loss": 0.1835, "step": 27650 }, { "epoch": 6.64, "learning_rate": 6.72107537205953e-06, "loss": 0.0821, "step": 27660 }, { "epoch": 6.64, "learning_rate": 6.71627460393663e-06, "loss": 0.1152, "step": 27670 }, { "epoch": 6.64, "learning_rate": 6.711473835813731e-06, "loss": 0.1185, "step": 27680 }, { "epoch": 6.65, "learning_rate": 6.706673067690831e-06, "loss": 0.122, "step": 27690 }, { "epoch": 6.65, "learning_rate": 6.701872299567932e-06, "loss": 0.0585, "step": 27700 }, { "epoch": 6.65, "learning_rate": 6.697071531445032e-06, "loss": 0.1351, "step": 27710 }, { "epoch": 6.65, "learning_rate": 6.692270763322132e-06, "loss": 0.1063, "step": 27720 }, { "epoch": 6.66, "learning_rate": 6.687469995199233e-06, "loss": 0.1178, "step": 27730 }, { "epoch": 6.66, "learning_rate": 6.682669227076333e-06, "loss": 0.1179, "step": 27740 }, { "epoch": 6.66, "learning_rate": 6.677868458953433e-06, "loss": 0.0544, "step": 27750 }, { "epoch": 6.66, "learning_rate": 6.673067690830534e-06, "loss": 0.0045, "step": 27760 }, { "epoch": 6.67, "learning_rate": 6.668266922707634e-06, "loss": 0.1876, "step": 27770 }, { "epoch": 6.67, "learning_rate": 6.6634661545847344e-06, "loss": 0.2106, "step": 27780 }, { "epoch": 6.67, "learning_rate": 6.6586653864618344e-06, "loss": 0.0983, "step": 27790 }, { "epoch": 6.67, "learning_rate": 6.6538646183389344e-06, "loss": 0.3233, "step": 27800 }, { "epoch": 6.68, "learning_rate": 6.649063850216035e-06, "loss": 0.1251, "step": 27810 }, { "epoch": 6.68, "learning_rate": 6.644263082093135e-06, "loss": 0.142, "step": 27820 }, { "epoch": 6.68, "learning_rate": 6.639462313970236e-06, "loss": 0.0565, "step": 27830 }, { "epoch": 6.68, "learning_rate": 6.634661545847336e-06, "loss": 0.2549, "step": 27840 }, { "epoch": 6.69, "learning_rate": 6.629860777724436e-06, "loss": 0.0303, "step": 27850 }, { "epoch": 6.69, "learning_rate": 6.625060009601537e-06, "loss": 0.1001, "step": 27860 }, { "epoch": 6.69, "learning_rate": 6.620259241478637e-06, "loss": 0.131, "step": 27870 }, { "epoch": 6.69, "learning_rate": 6.615458473355737e-06, "loss": 0.1185, "step": 27880 }, { "epoch": 6.69, "learning_rate": 6.610657705232838e-06, "loss": 0.1637, "step": 27890 }, { "epoch": 6.7, "learning_rate": 6.605856937109938e-06, "loss": 0.2684, "step": 27900 }, { "epoch": 6.7, "learning_rate": 6.601056168987039e-06, "loss": 0.0268, "step": 27910 }, { "epoch": 6.7, "learning_rate": 6.596255400864139e-06, "loss": 0.0646, "step": 27920 }, { "epoch": 6.7, "learning_rate": 6.591454632741239e-06, "loss": 0.1919, "step": 27930 }, { "epoch": 6.71, "learning_rate": 6.5866538646183395e-06, "loss": 0.1586, "step": 27940 }, { "epoch": 6.71, "learning_rate": 6.5818530964954395e-06, "loss": 0.2948, "step": 27950 }, { "epoch": 6.71, "learning_rate": 6.5770523283725395e-06, "loss": 0.2266, "step": 27960 }, { "epoch": 6.71, "learning_rate": 6.57225156024964e-06, "loss": 0.1866, "step": 27970 }, { "epoch": 6.72, "learning_rate": 6.56745079212674e-06, "loss": 0.0298, "step": 27980 }, { "epoch": 6.72, "learning_rate": 6.562650024003841e-06, "loss": 0.2418, "step": 27990 }, { "epoch": 6.72, "learning_rate": 6.557849255880941e-06, "loss": 0.2283, "step": 28000 }, { "epoch": 6.72, "learning_rate": 6.553048487758041e-06, "loss": 0.2184, "step": 28010 }, { "epoch": 6.73, "learning_rate": 6.548247719635142e-06, "loss": 0.1073, "step": 28020 }, { "epoch": 6.73, "learning_rate": 6.543446951512242e-06, "loss": 0.1495, "step": 28030 }, { "epoch": 6.73, "learning_rate": 6.538646183389344e-06, "loss": 0.17, "step": 28040 }, { "epoch": 6.73, "learning_rate": 6.533845415266443e-06, "loss": 0.1112, "step": 28050 }, { "epoch": 6.74, "learning_rate": 6.529044647143543e-06, "loss": 0.1676, "step": 28060 }, { "epoch": 6.74, "learning_rate": 6.5242438790206445e-06, "loss": 0.189, "step": 28070 }, { "epoch": 6.74, "learning_rate": 6.5194431108977445e-06, "loss": 0.0018, "step": 28080 }, { "epoch": 6.74, "learning_rate": 6.514642342774844e-06, "loss": 0.0371, "step": 28090 }, { "epoch": 6.75, "learning_rate": 6.509841574651945e-06, "loss": 0.037, "step": 28100 }, { "epoch": 6.75, "learning_rate": 6.505040806529045e-06, "loss": 0.0021, "step": 28110 }, { "epoch": 6.75, "learning_rate": 6.500240038406146e-06, "loss": 0.0804, "step": 28120 }, { "epoch": 6.75, "learning_rate": 6.495439270283246e-06, "loss": 0.1476, "step": 28130 }, { "epoch": 6.75, "learning_rate": 6.490638502160346e-06, "loss": 0.139, "step": 28140 }, { "epoch": 6.76, "learning_rate": 6.485837734037447e-06, "loss": 0.0574, "step": 28150 }, { "epoch": 6.76, "learning_rate": 6.481036965914547e-06, "loss": 0.1628, "step": 28160 }, { "epoch": 6.76, "learning_rate": 6.476236197791648e-06, "loss": 0.2185, "step": 28170 }, { "epoch": 6.76, "learning_rate": 6.471435429668748e-06, "loss": 0.2074, "step": 28180 }, { "epoch": 6.77, "learning_rate": 6.466634661545848e-06, "loss": 0.0685, "step": 28190 }, { "epoch": 6.77, "learning_rate": 6.461833893422949e-06, "loss": 0.1158, "step": 28200 }, { "epoch": 6.77, "learning_rate": 6.457033125300049e-06, "loss": 0.0665, "step": 28210 }, { "epoch": 6.77, "learning_rate": 6.452232357177149e-06, "loss": 0.0428, "step": 28220 }, { "epoch": 6.78, "learning_rate": 6.4474315890542495e-06, "loss": 0.0921, "step": 28230 }, { "epoch": 6.78, "learning_rate": 6.4426308209313495e-06, "loss": 0.1071, "step": 28240 }, { "epoch": 6.78, "learning_rate": 6.43783005280845e-06, "loss": 0.2545, "step": 28250 }, { "epoch": 6.78, "learning_rate": 6.43302928468555e-06, "loss": 0.2955, "step": 28260 }, { "epoch": 6.79, "learning_rate": 6.42822851656265e-06, "loss": 0.1481, "step": 28270 }, { "epoch": 6.79, "learning_rate": 6.423427748439751e-06, "loss": 0.1973, "step": 28280 }, { "epoch": 6.79, "learning_rate": 6.418626980316851e-06, "loss": 0.0672, "step": 28290 }, { "epoch": 6.79, "learning_rate": 6.413826212193952e-06, "loss": 0.0613, "step": 28300 }, { "epoch": 6.8, "learning_rate": 6.409025444071052e-06, "loss": 0.0977, "step": 28310 }, { "epoch": 6.8, "learning_rate": 6.404224675948152e-06, "loss": 0.2991, "step": 28320 }, { "epoch": 6.8, "learning_rate": 6.399423907825253e-06, "loss": 0.066, "step": 28330 }, { "epoch": 6.8, "learning_rate": 6.394623139702353e-06, "loss": 0.0094, "step": 28340 }, { "epoch": 6.81, "learning_rate": 6.389822371579453e-06, "loss": 0.1725, "step": 28350 }, { "epoch": 6.81, "learning_rate": 6.385021603456554e-06, "loss": 0.0944, "step": 28360 }, { "epoch": 6.81, "learning_rate": 6.380220835333654e-06, "loss": 0.1518, "step": 28370 }, { "epoch": 6.81, "learning_rate": 6.3754200672107545e-06, "loss": 0.25, "step": 28380 }, { "epoch": 6.81, "learning_rate": 6.3706192990878545e-06, "loss": 0.1794, "step": 28390 }, { "epoch": 6.82, "learning_rate": 6.3658185309649545e-06, "loss": 0.1061, "step": 28400 }, { "epoch": 6.82, "learning_rate": 6.361017762842055e-06, "loss": 0.115, "step": 28410 }, { "epoch": 6.82, "learning_rate": 6.356216994719155e-06, "loss": 0.148, "step": 28420 }, { "epoch": 6.82, "learning_rate": 6.351416226596256e-06, "loss": 0.0749, "step": 28430 }, { "epoch": 6.83, "learning_rate": 6.346615458473356e-06, "loss": 0.0959, "step": 28440 }, { "epoch": 6.83, "learning_rate": 6.341814690350456e-06, "loss": 0.1134, "step": 28450 }, { "epoch": 6.83, "learning_rate": 6.337013922227557e-06, "loss": 0.1223, "step": 28460 }, { "epoch": 6.83, "learning_rate": 6.332213154104657e-06, "loss": 0.0427, "step": 28470 }, { "epoch": 6.84, "learning_rate": 6.327412385981757e-06, "loss": 0.0784, "step": 28480 }, { "epoch": 6.84, "learning_rate": 6.322611617858858e-06, "loss": 0.1584, "step": 28490 }, { "epoch": 6.84, "learning_rate": 6.317810849735958e-06, "loss": 0.2344, "step": 28500 }, { "epoch": 6.84, "learning_rate": 6.313010081613059e-06, "loss": 0.1309, "step": 28510 }, { "epoch": 6.85, "learning_rate": 6.308209313490159e-06, "loss": 0.0823, "step": 28520 }, { "epoch": 6.85, "learning_rate": 6.303408545367259e-06, "loss": 0.057, "step": 28530 }, { "epoch": 6.85, "learning_rate": 6.2986077772443596e-06, "loss": 0.2559, "step": 28540 }, { "epoch": 6.85, "learning_rate": 6.2938070091214596e-06, "loss": 0.0904, "step": 28550 }, { "epoch": 6.86, "learning_rate": 6.289006240998561e-06, "loss": 0.0722, "step": 28560 }, { "epoch": 6.86, "learning_rate": 6.28420547287566e-06, "loss": 0.1817, "step": 28570 }, { "epoch": 6.86, "learning_rate": 6.27940470475276e-06, "loss": 0.1436, "step": 28580 }, { "epoch": 6.86, "learning_rate": 6.274603936629862e-06, "loss": 0.1605, "step": 28590 }, { "epoch": 6.87, "learning_rate": 6.269803168506961e-06, "loss": 0.0104, "step": 28600 }, { "epoch": 6.87, "learning_rate": 6.265002400384061e-06, "loss": 0.0916, "step": 28610 }, { "epoch": 6.87, "learning_rate": 6.260201632261163e-06, "loss": 0.0928, "step": 28620 }, { "epoch": 6.87, "learning_rate": 6.255400864138262e-06, "loss": 0.1712, "step": 28630 }, { "epoch": 6.87, "learning_rate": 6.250600096015364e-06, "loss": 0.2175, "step": 28640 }, { "epoch": 6.88, "learning_rate": 6.245799327892464e-06, "loss": 0.2536, "step": 28650 }, { "epoch": 6.88, "learning_rate": 6.240998559769563e-06, "loss": 0.0635, "step": 28660 }, { "epoch": 6.88, "learning_rate": 6.236197791646665e-06, "loss": 0.0912, "step": 28670 }, { "epoch": 6.88, "learning_rate": 6.231397023523765e-06, "loss": 0.0915, "step": 28680 }, { "epoch": 6.89, "learning_rate": 6.226596255400864e-06, "loss": 0.0813, "step": 28690 }, { "epoch": 6.89, "learning_rate": 6.2217954872779654e-06, "loss": 0.0746, "step": 28700 }, { "epoch": 6.89, "learning_rate": 6.216994719155065e-06, "loss": 0.099, "step": 28710 }, { "epoch": 6.89, "learning_rate": 6.212193951032166e-06, "loss": 0.1502, "step": 28720 }, { "epoch": 6.9, "learning_rate": 6.207393182909266e-06, "loss": 0.1392, "step": 28730 }, { "epoch": 6.9, "learning_rate": 6.202592414786366e-06, "loss": 0.2027, "step": 28740 }, { "epoch": 6.9, "learning_rate": 6.197791646663467e-06, "loss": 0.0262, "step": 28750 }, { "epoch": 6.9, "learning_rate": 6.192990878540567e-06, "loss": 0.0815, "step": 28760 }, { "epoch": 6.91, "learning_rate": 6.188190110417668e-06, "loss": 0.1663, "step": 28770 }, { "epoch": 6.91, "learning_rate": 6.183389342294768e-06, "loss": 0.121, "step": 28780 }, { "epoch": 6.91, "learning_rate": 6.178588574171868e-06, "loss": 0.0896, "step": 28790 }, { "epoch": 6.91, "learning_rate": 6.173787806048969e-06, "loss": 0.175, "step": 28800 }, { "epoch": 6.92, "learning_rate": 6.168987037926069e-06, "loss": 0.0343, "step": 28810 }, { "epoch": 6.92, "learning_rate": 6.164186269803169e-06, "loss": 0.0791, "step": 28820 }, { "epoch": 6.92, "learning_rate": 6.15938550168027e-06, "loss": 0.1533, "step": 28830 }, { "epoch": 6.92, "learning_rate": 6.15458473355737e-06, "loss": 0.0793, "step": 28840 }, { "epoch": 6.93, "learning_rate": 6.1497839654344704e-06, "loss": 0.2216, "step": 28850 }, { "epoch": 6.93, "learning_rate": 6.1449831973115704e-06, "loss": 0.0918, "step": 28860 }, { "epoch": 6.93, "learning_rate": 6.1401824291886704e-06, "loss": 0.1085, "step": 28870 }, { "epoch": 6.93, "learning_rate": 6.135381661065771e-06, "loss": 0.0291, "step": 28880 }, { "epoch": 6.93, "learning_rate": 6.130580892942871e-06, "loss": 0.0958, "step": 28890 }, { "epoch": 6.94, "learning_rate": 6.125780124819972e-06, "loss": 0.0622, "step": 28900 }, { "epoch": 6.94, "learning_rate": 6.120979356697072e-06, "loss": 0.0927, "step": 28910 }, { "epoch": 6.94, "learning_rate": 6.116178588574172e-06, "loss": 0.0762, "step": 28920 }, { "epoch": 6.94, "learning_rate": 6.111377820451273e-06, "loss": 0.0212, "step": 28930 }, { "epoch": 6.95, "learning_rate": 6.106577052328373e-06, "loss": 0.2022, "step": 28940 }, { "epoch": 6.95, "learning_rate": 6.101776284205473e-06, "loss": 0.2169, "step": 28950 }, { "epoch": 6.95, "learning_rate": 6.096975516082574e-06, "loss": 0.1167, "step": 28960 }, { "epoch": 6.95, "learning_rate": 6.092174747959674e-06, "loss": 0.1172, "step": 28970 }, { "epoch": 6.96, "learning_rate": 6.087373979836775e-06, "loss": 0.1195, "step": 28980 }, { "epoch": 6.96, "learning_rate": 6.082573211713875e-06, "loss": 0.0863, "step": 28990 }, { "epoch": 6.96, "learning_rate": 6.077772443590975e-06, "loss": 0.1059, "step": 29000 }, { "epoch": 6.96, "learning_rate": 6.0729716754680755e-06, "loss": 0.2612, "step": 29010 }, { "epoch": 6.97, "learning_rate": 6.0681709073451755e-06, "loss": 0.1776, "step": 29020 }, { "epoch": 6.97, "learning_rate": 6.063370139222276e-06, "loss": 0.1468, "step": 29030 }, { "epoch": 6.97, "learning_rate": 6.058569371099376e-06, "loss": 0.2081, "step": 29040 }, { "epoch": 6.97, "learning_rate": 6.053768602976476e-06, "loss": 0.0333, "step": 29050 }, { "epoch": 6.98, "learning_rate": 6.048967834853577e-06, "loss": 0.0971, "step": 29060 }, { "epoch": 6.98, "learning_rate": 6.044167066730677e-06, "loss": 0.0909, "step": 29070 }, { "epoch": 6.98, "learning_rate": 6.039366298607777e-06, "loss": 0.0325, "step": 29080 }, { "epoch": 6.98, "learning_rate": 6.034565530484878e-06, "loss": 0.1671, "step": 29090 }, { "epoch": 6.99, "learning_rate": 6.029764762361978e-06, "loss": 0.1051, "step": 29100 }, { "epoch": 6.99, "learning_rate": 6.024963994239079e-06, "loss": 0.1591, "step": 29110 }, { "epoch": 6.99, "learning_rate": 6.020163226116179e-06, "loss": 0.1524, "step": 29120 }, { "epoch": 6.99, "learning_rate": 6.015362457993279e-06, "loss": 0.131, "step": 29130 }, { "epoch": 6.99, "learning_rate": 6.01056168987038e-06, "loss": 0.1134, "step": 29140 }, { "epoch": 7.0, "learning_rate": 6.00576092174748e-06, "loss": 0.0578, "step": 29150 }, { "epoch": 7.0, "learning_rate": 6.000960153624581e-06, "loss": 0.0382, "step": 29160 }, { "epoch": 7.0, "eval_accuracy": 0.9989799387963277, "eval_loss": 0.0041446564719080925, "eval_runtime": 512.3759, "eval_samples_per_second": 11.48, "eval_steps_per_second": 1.436, "step": 29162 }, { "epoch": 7.0, "learning_rate": 5.9961593855016805e-06, "loss": 0.0427, "step": 29170 }, { "epoch": 7.0, "learning_rate": 5.9913586173787805e-06, "loss": 0.1351, "step": 29180 }, { "epoch": 7.01, "learning_rate": 5.986557849255882e-06, "loss": 0.0226, "step": 29190 }, { "epoch": 7.01, "learning_rate": 5.981757081132981e-06, "loss": 0.3388, "step": 29200 }, { "epoch": 7.01, "learning_rate": 5.976956313010081e-06, "loss": 0.1873, "step": 29210 }, { "epoch": 7.01, "learning_rate": 5.972155544887183e-06, "loss": 0.0675, "step": 29220 }, { "epoch": 7.02, "learning_rate": 5.967354776764282e-06, "loss": 0.1323, "step": 29230 }, { "epoch": 7.02, "learning_rate": 5.962554008641384e-06, "loss": 0.0186, "step": 29240 }, { "epoch": 7.02, "learning_rate": 5.957753240518484e-06, "loss": 0.1142, "step": 29250 }, { "epoch": 7.02, "learning_rate": 5.952952472395583e-06, "loss": 0.2242, "step": 29260 }, { "epoch": 7.03, "learning_rate": 5.948151704272685e-06, "loss": 0.1497, "step": 29270 }, { "epoch": 7.03, "learning_rate": 5.943350936149785e-06, "loss": 0.1228, "step": 29280 }, { "epoch": 7.03, "learning_rate": 5.9385501680268855e-06, "loss": 0.1011, "step": 29290 }, { "epoch": 7.03, "learning_rate": 5.9337493999039855e-06, "loss": 0.1164, "step": 29300 }, { "epoch": 7.04, "learning_rate": 5.9289486317810855e-06, "loss": 0.1214, "step": 29310 }, { "epoch": 7.04, "learning_rate": 5.924147863658186e-06, "loss": 0.1761, "step": 29320 }, { "epoch": 7.04, "learning_rate": 5.919347095535286e-06, "loss": 0.2364, "step": 29330 }, { "epoch": 7.04, "learning_rate": 5.914546327412386e-06, "loss": 0.2313, "step": 29340 }, { "epoch": 7.05, "learning_rate": 5.909745559289487e-06, "loss": 0.0919, "step": 29350 }, { "epoch": 7.05, "learning_rate": 5.904944791166587e-06, "loss": 0.1262, "step": 29360 }, { "epoch": 7.05, "learning_rate": 5.900144023043688e-06, "loss": 0.1081, "step": 29370 }, { "epoch": 7.05, "learning_rate": 5.895343254920788e-06, "loss": 0.0264, "step": 29380 }, { "epoch": 7.05, "learning_rate": 5.890542486797888e-06, "loss": 0.2043, "step": 29390 }, { "epoch": 7.06, "learning_rate": 5.885741718674989e-06, "loss": 0.1089, "step": 29400 }, { "epoch": 7.06, "learning_rate": 5.880940950552089e-06, "loss": 0.0956, "step": 29410 }, { "epoch": 7.06, "learning_rate": 5.876140182429189e-06, "loss": 0.1844, "step": 29420 }, { "epoch": 7.06, "learning_rate": 5.87133941430629e-06, "loss": 0.0381, "step": 29430 }, { "epoch": 7.07, "learning_rate": 5.86653864618339e-06, "loss": 0.0449, "step": 29440 }, { "epoch": 7.07, "learning_rate": 5.8617378780604905e-06, "loss": 0.1502, "step": 29450 }, { "epoch": 7.07, "learning_rate": 5.8569371099375905e-06, "loss": 0.0263, "step": 29460 }, { "epoch": 7.07, "learning_rate": 5.8521363418146905e-06, "loss": 0.1507, "step": 29470 }, { "epoch": 7.08, "learning_rate": 5.847335573691791e-06, "loss": 0.1064, "step": 29480 }, { "epoch": 7.08, "learning_rate": 5.842534805568891e-06, "loss": 0.0999, "step": 29490 }, { "epoch": 7.08, "learning_rate": 5.837734037445992e-06, "loss": 0.0132, "step": 29500 }, { "epoch": 7.08, "learning_rate": 5.832933269323092e-06, "loss": 0.2197, "step": 29510 }, { "epoch": 7.09, "learning_rate": 5.828132501200192e-06, "loss": 0.0369, "step": 29520 }, { "epoch": 7.09, "learning_rate": 5.823331733077293e-06, "loss": 0.1048, "step": 29530 }, { "epoch": 7.09, "learning_rate": 5.818530964954393e-06, "loss": 0.0706, "step": 29540 }, { "epoch": 7.09, "learning_rate": 5.813730196831493e-06, "loss": 0.1868, "step": 29550 }, { "epoch": 7.1, "learning_rate": 5.808929428708594e-06, "loss": 0.087, "step": 29560 }, { "epoch": 7.1, "learning_rate": 5.804128660585694e-06, "loss": 0.0398, "step": 29570 }, { "epoch": 7.1, "learning_rate": 5.799327892462795e-06, "loss": 0.1526, "step": 29580 }, { "epoch": 7.1, "learning_rate": 5.794527124339895e-06, "loss": 0.1448, "step": 29590 }, { "epoch": 7.11, "learning_rate": 5.789726356216995e-06, "loss": 0.1866, "step": 29600 }, { "epoch": 7.11, "learning_rate": 5.7849255880940956e-06, "loss": 0.0975, "step": 29610 }, { "epoch": 7.11, "learning_rate": 5.7801248199711956e-06, "loss": 0.0988, "step": 29620 }, { "epoch": 7.11, "learning_rate": 5.775324051848296e-06, "loss": 0.1986, "step": 29630 }, { "epoch": 7.11, "learning_rate": 5.770523283725396e-06, "loss": 0.1569, "step": 29640 }, { "epoch": 7.12, "learning_rate": 5.765722515602496e-06, "loss": 0.1155, "step": 29650 }, { "epoch": 7.12, "learning_rate": 5.760921747479597e-06, "loss": 0.1231, "step": 29660 }, { "epoch": 7.12, "learning_rate": 5.756120979356697e-06, "loss": 0.1436, "step": 29670 }, { "epoch": 7.12, "learning_rate": 5.751320211233797e-06, "loss": 0.1439, "step": 29680 }, { "epoch": 7.13, "learning_rate": 5.746519443110898e-06, "loss": 0.2206, "step": 29690 }, { "epoch": 7.13, "learning_rate": 5.741718674987998e-06, "loss": 0.1349, "step": 29700 }, { "epoch": 7.13, "learning_rate": 5.736917906865099e-06, "loss": 0.1502, "step": 29710 }, { "epoch": 7.13, "learning_rate": 5.732117138742199e-06, "loss": 0.2065, "step": 29720 }, { "epoch": 7.14, "learning_rate": 5.727316370619299e-06, "loss": 0.0464, "step": 29730 }, { "epoch": 7.14, "learning_rate": 5.7225156024964e-06, "loss": 0.0204, "step": 29740 }, { "epoch": 7.14, "learning_rate": 5.7177148343735e-06, "loss": 0.1609, "step": 29750 }, { "epoch": 7.14, "learning_rate": 5.7129140662506014e-06, "loss": 0.1828, "step": 29760 }, { "epoch": 7.15, "learning_rate": 5.708113298127701e-06, "loss": 0.1521, "step": 29770 }, { "epoch": 7.15, "learning_rate": 5.703312530004801e-06, "loss": 0.1619, "step": 29780 }, { "epoch": 7.15, "learning_rate": 5.698511761881902e-06, "loss": 0.0323, "step": 29790 }, { "epoch": 7.15, "learning_rate": 5.693710993759001e-06, "loss": 0.0438, "step": 29800 }, { "epoch": 7.16, "learning_rate": 5.688910225636101e-06, "loss": 0.1842, "step": 29810 }, { "epoch": 7.16, "learning_rate": 5.684109457513203e-06, "loss": 0.1751, "step": 29820 }, { "epoch": 7.16, "learning_rate": 5.679308689390302e-06, "loss": 0.1748, "step": 29830 }, { "epoch": 7.16, "learning_rate": 5.674507921267404e-06, "loss": 0.1454, "step": 29840 }, { "epoch": 7.17, "learning_rate": 5.669707153144504e-06, "loss": 0.1514, "step": 29850 }, { "epoch": 7.17, "learning_rate": 5.664906385021603e-06, "loss": 0.0321, "step": 29860 }, { "epoch": 7.17, "learning_rate": 5.660105616898705e-06, "loss": 0.0465, "step": 29870 }, { "epoch": 7.17, "learning_rate": 5.655304848775805e-06, "loss": 0.1633, "step": 29880 }, { "epoch": 7.17, "learning_rate": 5.650504080652906e-06, "loss": 0.2442, "step": 29890 }, { "epoch": 7.18, "learning_rate": 5.645703312530006e-06, "loss": 0.1108, "step": 29900 }, { "epoch": 7.18, "learning_rate": 5.640902544407106e-06, "loss": 0.1613, "step": 29910 }, { "epoch": 7.18, "learning_rate": 5.6361017762842065e-06, "loss": 0.0198, "step": 29920 }, { "epoch": 7.18, "learning_rate": 5.6313010081613065e-06, "loss": 0.0772, "step": 29930 }, { "epoch": 7.19, "learning_rate": 5.6265002400384064e-06, "loss": 0.0637, "step": 29940 }, { "epoch": 7.19, "learning_rate": 5.621699471915507e-06, "loss": 0.0334, "step": 29950 }, { "epoch": 7.19, "learning_rate": 5.616898703792607e-06, "loss": 0.1742, "step": 29960 }, { "epoch": 7.19, "learning_rate": 5.612097935669708e-06, "loss": 0.2029, "step": 29970 }, { "epoch": 7.2, "learning_rate": 5.607297167546808e-06, "loss": 0.0441, "step": 29980 }, { "epoch": 7.2, "learning_rate": 5.602496399423908e-06, "loss": 0.0548, "step": 29990 }, { "epoch": 7.2, "learning_rate": 5.597695631301009e-06, "loss": 0.1157, "step": 30000 }, { "epoch": 7.2, "learning_rate": 5.592894863178109e-06, "loss": 0.1097, "step": 30010 }, { "epoch": 7.21, "learning_rate": 5.58809409505521e-06, "loss": 0.2888, "step": 30020 }, { "epoch": 7.21, "learning_rate": 5.58329332693231e-06, "loss": 0.1438, "step": 30030 }, { "epoch": 7.21, "learning_rate": 5.57849255880941e-06, "loss": 0.0536, "step": 30040 }, { "epoch": 7.21, "learning_rate": 5.573691790686511e-06, "loss": 0.1964, "step": 30050 }, { "epoch": 7.22, "learning_rate": 5.568891022563611e-06, "loss": 0.1537, "step": 30060 }, { "epoch": 7.22, "learning_rate": 5.564090254440711e-06, "loss": 0.182, "step": 30070 }, { "epoch": 7.22, "learning_rate": 5.5592894863178115e-06, "loss": 0.1942, "step": 30080 }, { "epoch": 7.22, "learning_rate": 5.5544887181949115e-06, "loss": 0.1047, "step": 30090 }, { "epoch": 7.23, "learning_rate": 5.549687950072012e-06, "loss": 0.0543, "step": 30100 }, { "epoch": 7.23, "learning_rate": 5.544887181949112e-06, "loss": 0.1204, "step": 30110 }, { "epoch": 7.23, "learning_rate": 5.540086413826212e-06, "loss": 0.0519, "step": 30120 }, { "epoch": 7.23, "learning_rate": 5.535285645703313e-06, "loss": 0.2937, "step": 30130 }, { "epoch": 7.23, "learning_rate": 5.530484877580413e-06, "loss": 0.1944, "step": 30140 }, { "epoch": 7.24, "learning_rate": 5.525684109457513e-06, "loss": 0.1251, "step": 30150 }, { "epoch": 7.24, "learning_rate": 5.520883341334614e-06, "loss": 0.1634, "step": 30160 }, { "epoch": 7.24, "learning_rate": 5.516082573211714e-06, "loss": 0.1002, "step": 30170 }, { "epoch": 7.24, "learning_rate": 5.511281805088815e-06, "loss": 0.1376, "step": 30180 }, { "epoch": 7.25, "learning_rate": 5.506481036965915e-06, "loss": 0.1307, "step": 30190 }, { "epoch": 7.25, "learning_rate": 5.501680268843015e-06, "loss": 0.241, "step": 30200 }, { "epoch": 7.25, "learning_rate": 5.496879500720116e-06, "loss": 0.1038, "step": 30210 }, { "epoch": 7.25, "learning_rate": 5.492078732597216e-06, "loss": 0.1768, "step": 30220 }, { "epoch": 7.26, "learning_rate": 5.4872779644743165e-06, "loss": 0.1427, "step": 30230 }, { "epoch": 7.26, "learning_rate": 5.4824771963514165e-06, "loss": 0.1265, "step": 30240 }, { "epoch": 7.26, "learning_rate": 5.4776764282285165e-06, "loss": 0.0629, "step": 30250 }, { "epoch": 7.26, "learning_rate": 5.472875660105617e-06, "loss": 0.1155, "step": 30260 }, { "epoch": 7.27, "learning_rate": 5.468074891982717e-06, "loss": 0.0592, "step": 30270 }, { "epoch": 7.27, "learning_rate": 5.463274123859817e-06, "loss": 0.2225, "step": 30280 }, { "epoch": 7.27, "learning_rate": 5.458473355736918e-06, "loss": 0.1028, "step": 30290 }, { "epoch": 7.27, "learning_rate": 5.453672587614018e-06, "loss": 0.0832, "step": 30300 }, { "epoch": 7.28, "learning_rate": 5.448871819491119e-06, "loss": 0.0684, "step": 30310 }, { "epoch": 7.28, "learning_rate": 5.444071051368219e-06, "loss": 0.1697, "step": 30320 }, { "epoch": 7.28, "learning_rate": 5.439270283245319e-06, "loss": 0.1072, "step": 30330 }, { "epoch": 7.28, "learning_rate": 5.434469515122421e-06, "loss": 0.2466, "step": 30340 }, { "epoch": 7.29, "learning_rate": 5.42966874699952e-06, "loss": 0.1203, "step": 30350 }, { "epoch": 7.29, "learning_rate": 5.4248679788766215e-06, "loss": 0.0798, "step": 30360 }, { "epoch": 7.29, "learning_rate": 5.4200672107537215e-06, "loss": 0.0639, "step": 30370 }, { "epoch": 7.29, "learning_rate": 5.415266442630821e-06, "loss": 0.0231, "step": 30380 }, { "epoch": 7.29, "learning_rate": 5.410465674507922e-06, "loss": 0.1542, "step": 30390 }, { "epoch": 7.3, "learning_rate": 5.405664906385022e-06, "loss": 0.1338, "step": 30400 }, { "epoch": 7.3, "learning_rate": 5.4008641382621215e-06, "loss": 0.0452, "step": 30410 }, { "epoch": 7.3, "learning_rate": 5.396063370139223e-06, "loss": 0.1355, "step": 30420 }, { "epoch": 7.3, "learning_rate": 5.391262602016323e-06, "loss": 0.0517, "step": 30430 }, { "epoch": 7.31, "learning_rate": 5.386461833893424e-06, "loss": 0.0767, "step": 30440 }, { "epoch": 7.31, "learning_rate": 5.381661065770524e-06, "loss": 0.124, "step": 30450 }, { "epoch": 7.31, "learning_rate": 5.376860297647624e-06, "loss": 0.0329, "step": 30460 }, { "epoch": 7.31, "learning_rate": 5.372059529524725e-06, "loss": 0.1021, "step": 30470 }, { "epoch": 7.32, "learning_rate": 5.367258761401825e-06, "loss": 0.2603, "step": 30480 }, { "epoch": 7.32, "learning_rate": 5.362457993278926e-06, "loss": 0.057, "step": 30490 }, { "epoch": 7.32, "learning_rate": 5.357657225156026e-06, "loss": 0.0372, "step": 30500 }, { "epoch": 7.32, "learning_rate": 5.352856457033126e-06, "loss": 0.043, "step": 30510 }, { "epoch": 7.33, "learning_rate": 5.3480556889102266e-06, "loss": 0.0619, "step": 30520 }, { "epoch": 7.33, "learning_rate": 5.3432549207873265e-06, "loss": 0.1371, "step": 30530 }, { "epoch": 7.33, "learning_rate": 5.3384541526644265e-06, "loss": 0.0139, "step": 30540 }, { "epoch": 7.33, "learning_rate": 5.333653384541527e-06, "loss": 0.2777, "step": 30550 }, { "epoch": 7.34, "learning_rate": 5.328852616418627e-06, "loss": 0.0778, "step": 30560 }, { "epoch": 7.34, "learning_rate": 5.324051848295728e-06, "loss": 0.2275, "step": 30570 }, { "epoch": 7.34, "learning_rate": 5.319251080172828e-06, "loss": 0.0845, "step": 30580 }, { "epoch": 7.34, "learning_rate": 5.314450312049928e-06, "loss": 0.0851, "step": 30590 }, { "epoch": 7.35, "learning_rate": 5.309649543927029e-06, "loss": 0.0897, "step": 30600 }, { "epoch": 7.35, "learning_rate": 5.304848775804129e-06, "loss": 0.1516, "step": 30610 }, { "epoch": 7.35, "learning_rate": 5.30004800768123e-06, "loss": 0.1013, "step": 30620 }, { "epoch": 7.35, "learning_rate": 5.29524723955833e-06, "loss": 0.1872, "step": 30630 }, { "epoch": 7.35, "learning_rate": 5.29044647143543e-06, "loss": 0.1838, "step": 30640 }, { "epoch": 7.36, "learning_rate": 5.285645703312531e-06, "loss": 0.3258, "step": 30650 }, { "epoch": 7.36, "learning_rate": 5.280844935189631e-06, "loss": 0.1336, "step": 30660 }, { "epoch": 7.36, "learning_rate": 5.276044167066731e-06, "loss": 0.0836, "step": 30670 }, { "epoch": 7.36, "learning_rate": 5.2712433989438316e-06, "loss": 0.0752, "step": 30680 }, { "epoch": 7.37, "learning_rate": 5.2664426308209316e-06, "loss": 0.1298, "step": 30690 }, { "epoch": 7.37, "learning_rate": 5.261641862698032e-06, "loss": 0.1682, "step": 30700 }, { "epoch": 7.37, "learning_rate": 5.256841094575132e-06, "loss": 0.1267, "step": 30710 }, { "epoch": 7.37, "learning_rate": 5.252040326452232e-06, "loss": 0.3594, "step": 30720 }, { "epoch": 7.38, "learning_rate": 5.247239558329333e-06, "loss": 0.1903, "step": 30730 }, { "epoch": 7.38, "learning_rate": 5.242438790206433e-06, "loss": 0.1051, "step": 30740 }, { "epoch": 7.38, "learning_rate": 5.237638022083534e-06, "loss": 0.1873, "step": 30750 }, { "epoch": 7.38, "learning_rate": 5.232837253960634e-06, "loss": 0.1266, "step": 30760 }, { "epoch": 7.39, "learning_rate": 5.228036485837734e-06, "loss": 0.0347, "step": 30770 }, { "epoch": 7.39, "learning_rate": 5.223235717714835e-06, "loss": 0.1071, "step": 30780 }, { "epoch": 7.39, "learning_rate": 5.218434949591935e-06, "loss": 0.1605, "step": 30790 }, { "epoch": 7.39, "learning_rate": 5.213634181469035e-06, "loss": 0.1368, "step": 30800 }, { "epoch": 7.4, "learning_rate": 5.208833413346136e-06, "loss": 0.146, "step": 30810 }, { "epoch": 7.4, "learning_rate": 5.204032645223236e-06, "loss": 0.1211, "step": 30820 }, { "epoch": 7.4, "learning_rate": 5.199231877100337e-06, "loss": 0.1252, "step": 30830 }, { "epoch": 7.4, "learning_rate": 5.194431108977437e-06, "loss": 0.1082, "step": 30840 }, { "epoch": 7.41, "learning_rate": 5.189630340854537e-06, "loss": 0.0754, "step": 30850 }, { "epoch": 7.41, "learning_rate": 5.1848295727316374e-06, "loss": 0.1623, "step": 30860 }, { "epoch": 7.41, "learning_rate": 5.1800288046087374e-06, "loss": 0.1211, "step": 30870 }, { "epoch": 7.41, "learning_rate": 5.175228036485837e-06, "loss": 0.2528, "step": 30880 }, { "epoch": 7.41, "learning_rate": 5.170427268362938e-06, "loss": 0.1014, "step": 30890 }, { "epoch": 7.42, "learning_rate": 5.165626500240038e-06, "loss": 0.1172, "step": 30900 }, { "epoch": 7.42, "learning_rate": 5.16082573211714e-06, "loss": 0.0743, "step": 30910 }, { "epoch": 7.42, "learning_rate": 5.156024963994239e-06, "loss": 0.1039, "step": 30920 }, { "epoch": 7.42, "learning_rate": 5.151224195871339e-06, "loss": 0.0924, "step": 30930 }, { "epoch": 7.43, "learning_rate": 5.146423427748441e-06, "loss": 0.0778, "step": 30940 }, { "epoch": 7.43, "learning_rate": 5.14162265962554e-06, "loss": 0.1223, "step": 30950 }, { "epoch": 7.43, "learning_rate": 5.136821891502642e-06, "loss": 0.1135, "step": 30960 }, { "epoch": 7.43, "learning_rate": 5.132021123379742e-06, "loss": 0.0646, "step": 30970 }, { "epoch": 7.44, "learning_rate": 5.127220355256841e-06, "loss": 0.1503, "step": 30980 }, { "epoch": 7.44, "learning_rate": 5.1224195871339425e-06, "loss": 0.1705, "step": 30990 }, { "epoch": 7.44, "learning_rate": 5.1176188190110425e-06, "loss": 0.0655, "step": 31000 }, { "epoch": 7.44, "learning_rate": 5.112818050888142e-06, "loss": 0.2437, "step": 31010 }, { "epoch": 7.45, "learning_rate": 5.108017282765243e-06, "loss": 0.1258, "step": 31020 }, { "epoch": 7.45, "learning_rate": 5.103216514642343e-06, "loss": 0.0092, "step": 31030 }, { "epoch": 7.45, "learning_rate": 5.098415746519444e-06, "loss": 0.1024, "step": 31040 }, { "epoch": 7.45, "learning_rate": 5.093614978396544e-06, "loss": 0.1543, "step": 31050 }, { "epoch": 7.46, "learning_rate": 5.088814210273644e-06, "loss": 0.2027, "step": 31060 }, { "epoch": 7.46, "learning_rate": 5.084013442150745e-06, "loss": 0.078, "step": 31070 }, { "epoch": 7.46, "learning_rate": 5.079212674027845e-06, "loss": 0.1359, "step": 31080 }, { "epoch": 7.46, "learning_rate": 5.074411905904946e-06, "loss": 0.1248, "step": 31090 }, { "epoch": 7.47, "learning_rate": 5.069611137782046e-06, "loss": 0.045, "step": 31100 }, { "epoch": 7.47, "learning_rate": 5.064810369659146e-06, "loss": 0.2843, "step": 31110 }, { "epoch": 7.47, "learning_rate": 5.060009601536247e-06, "loss": 0.1168, "step": 31120 }, { "epoch": 7.47, "learning_rate": 5.055208833413347e-06, "loss": 0.203, "step": 31130 }, { "epoch": 7.47, "learning_rate": 5.050408065290447e-06, "loss": 0.092, "step": 31140 }, { "epoch": 7.48, "learning_rate": 5.0456072971675475e-06, "loss": 0.1803, "step": 31150 }, { "epoch": 7.48, "learning_rate": 5.0408065290446475e-06, "loss": 0.152, "step": 31160 }, { "epoch": 7.48, "learning_rate": 5.036005760921748e-06, "loss": 0.0967, "step": 31170 }, { "epoch": 7.48, "learning_rate": 5.031204992798848e-06, "loss": 0.035, "step": 31180 }, { "epoch": 7.49, "learning_rate": 5.026404224675948e-06, "loss": 0.0353, "step": 31190 }, { "epoch": 7.49, "learning_rate": 5.021603456553049e-06, "loss": 0.0645, "step": 31200 }, { "epoch": 7.49, "learning_rate": 5.016802688430149e-06, "loss": 0.1822, "step": 31210 }, { "epoch": 7.49, "learning_rate": 5.01200192030725e-06, "loss": 0.0498, "step": 31220 }, { "epoch": 7.5, "learning_rate": 5.00720115218435e-06, "loss": 0.1, "step": 31230 }, { "epoch": 7.5, "learning_rate": 5.00240038406145e-06, "loss": 0.1453, "step": 31240 }, { "epoch": 7.5, "learning_rate": 4.99759961593855e-06, "loss": 0.1099, "step": 31250 }, { "epoch": 7.5, "learning_rate": 4.992798847815651e-06, "loss": 0.0734, "step": 31260 }, { "epoch": 7.51, "learning_rate": 4.987998079692752e-06, "loss": 0.0992, "step": 31270 }, { "epoch": 7.51, "learning_rate": 4.983197311569852e-06, "loss": 0.1218, "step": 31280 }, { "epoch": 7.51, "learning_rate": 4.978396543446952e-06, "loss": 0.118, "step": 31290 }, { "epoch": 7.51, "learning_rate": 4.9735957753240525e-06, "loss": 0.0957, "step": 31300 }, { "epoch": 7.52, "learning_rate": 4.9687950072011525e-06, "loss": 0.0668, "step": 31310 }, { "epoch": 7.52, "learning_rate": 4.963994239078253e-06, "loss": 0.0022, "step": 31320 }, { "epoch": 7.52, "learning_rate": 4.959193470955353e-06, "loss": 0.103, "step": 31330 }, { "epoch": 7.52, "learning_rate": 4.954392702832453e-06, "loss": 0.1837, "step": 31340 }, { "epoch": 7.53, "learning_rate": 4.949591934709554e-06, "loss": 0.1301, "step": 31350 }, { "epoch": 7.53, "learning_rate": 4.944791166586654e-06, "loss": 0.1617, "step": 31360 }, { "epoch": 7.53, "learning_rate": 4.939990398463755e-06, "loss": 0.1388, "step": 31370 }, { "epoch": 7.53, "learning_rate": 4.935189630340855e-06, "loss": 0.0231, "step": 31380 }, { "epoch": 7.53, "learning_rate": 4.930388862217955e-06, "loss": 0.1248, "step": 31390 }, { "epoch": 7.54, "learning_rate": 4.925588094095056e-06, "loss": 0.3422, "step": 31400 }, { "epoch": 7.54, "learning_rate": 4.920787325972156e-06, "loss": 0.095, "step": 31410 }, { "epoch": 7.54, "learning_rate": 4.915986557849256e-06, "loss": 0.1259, "step": 31420 }, { "epoch": 7.54, "learning_rate": 4.911185789726357e-06, "loss": 0.1291, "step": 31430 }, { "epoch": 7.55, "learning_rate": 4.906385021603457e-06, "loss": 0.1223, "step": 31440 }, { "epoch": 7.55, "learning_rate": 4.9015842534805575e-06, "loss": 0.1171, "step": 31450 }, { "epoch": 7.55, "learning_rate": 4.8967834853576575e-06, "loss": 0.0748, "step": 31460 }, { "epoch": 7.55, "learning_rate": 4.8919827172347575e-06, "loss": 0.3344, "step": 31470 }, { "epoch": 7.56, "learning_rate": 4.887181949111858e-06, "loss": 0.3566, "step": 31480 }, { "epoch": 7.56, "learning_rate": 4.882381180988958e-06, "loss": 0.1322, "step": 31490 }, { "epoch": 7.56, "learning_rate": 4.877580412866059e-06, "loss": 0.0181, "step": 31500 }, { "epoch": 7.56, "learning_rate": 4.872779644743159e-06, "loss": 0.0393, "step": 31510 }, { "epoch": 7.57, "learning_rate": 4.867978876620259e-06, "loss": 0.1927, "step": 31520 }, { "epoch": 7.57, "learning_rate": 4.86317810849736e-06, "loss": 0.1775, "step": 31530 }, { "epoch": 7.57, "learning_rate": 4.858377340374461e-06, "loss": 0.2065, "step": 31540 }, { "epoch": 7.57, "learning_rate": 4.85357657225156e-06, "loss": 0.1311, "step": 31550 }, { "epoch": 7.58, "learning_rate": 4.848775804128661e-06, "loss": 0.1337, "step": 31560 }, { "epoch": 7.58, "learning_rate": 4.843975036005762e-06, "loss": 0.108, "step": 31570 }, { "epoch": 7.58, "learning_rate": 4.839174267882862e-06, "loss": 0.2268, "step": 31580 }, { "epoch": 7.58, "learning_rate": 4.834373499759962e-06, "loss": 0.263, "step": 31590 }, { "epoch": 7.59, "learning_rate": 4.8295727316370626e-06, "loss": 0.2218, "step": 31600 }, { "epoch": 7.59, "learning_rate": 4.8247719635141625e-06, "loss": 0.1218, "step": 31610 }, { "epoch": 7.59, "learning_rate": 4.819971195391263e-06, "loss": 0.0787, "step": 31620 }, { "epoch": 7.59, "learning_rate": 4.815170427268363e-06, "loss": 0.0269, "step": 31630 }, { "epoch": 7.59, "learning_rate": 4.810369659145463e-06, "loss": 0.0866, "step": 31640 }, { "epoch": 7.6, "learning_rate": 4.805568891022564e-06, "loss": 0.1543, "step": 31650 }, { "epoch": 7.6, "learning_rate": 4.800768122899664e-06, "loss": 0.0988, "step": 31660 }, { "epoch": 7.6, "learning_rate": 4.795967354776765e-06, "loss": 0.1467, "step": 31670 }, { "epoch": 7.6, "learning_rate": 4.791166586653865e-06, "loss": 0.123, "step": 31680 }, { "epoch": 7.61, "learning_rate": 4.786365818530965e-06, "loss": 0.0073, "step": 31690 }, { "epoch": 7.61, "learning_rate": 4.781565050408066e-06, "loss": 0.0458, "step": 31700 }, { "epoch": 7.61, "learning_rate": 4.776764282285166e-06, "loss": 0.0539, "step": 31710 }, { "epoch": 7.61, "learning_rate": 4.771963514162266e-06, "loss": 0.2896, "step": 31720 }, { "epoch": 7.62, "learning_rate": 4.767162746039367e-06, "loss": 0.1829, "step": 31730 }, { "epoch": 7.62, "learning_rate": 4.762361977916467e-06, "loss": 0.0767, "step": 31740 }, { "epoch": 7.62, "learning_rate": 4.757561209793568e-06, "loss": 0.2413, "step": 31750 }, { "epoch": 7.62, "learning_rate": 4.7527604416706676e-06, "loss": 0.0629, "step": 31760 }, { "epoch": 7.63, "learning_rate": 4.7479596735477676e-06, "loss": 0.0834, "step": 31770 }, { "epoch": 7.63, "learning_rate": 4.743158905424868e-06, "loss": 0.1614, "step": 31780 }, { "epoch": 7.63, "learning_rate": 4.738358137301968e-06, "loss": 0.1813, "step": 31790 }, { "epoch": 7.63, "learning_rate": 4.733557369179069e-06, "loss": 0.1669, "step": 31800 }, { "epoch": 7.64, "learning_rate": 4.728756601056169e-06, "loss": 0.1585, "step": 31810 }, { "epoch": 7.64, "learning_rate": 4.723955832933269e-06, "loss": 0.0119, "step": 31820 }, { "epoch": 7.64, "learning_rate": 4.71915506481037e-06, "loss": 0.0517, "step": 31830 }, { "epoch": 7.64, "learning_rate": 4.714354296687471e-06, "loss": 0.1613, "step": 31840 }, { "epoch": 7.65, "learning_rate": 4.70955352856457e-06, "loss": 0.1125, "step": 31850 }, { "epoch": 7.65, "learning_rate": 4.704752760441671e-06, "loss": 0.1379, "step": 31860 }, { "epoch": 7.65, "learning_rate": 4.699951992318772e-06, "loss": 0.1726, "step": 31870 }, { "epoch": 7.65, "learning_rate": 4.695151224195872e-06, "loss": 0.1147, "step": 31880 }, { "epoch": 7.65, "learning_rate": 4.690350456072972e-06, "loss": 0.2184, "step": 31890 }, { "epoch": 7.66, "learning_rate": 4.685549687950073e-06, "loss": 0.1497, "step": 31900 }, { "epoch": 7.66, "learning_rate": 4.680748919827173e-06, "loss": 0.1905, "step": 31910 }, { "epoch": 7.66, "learning_rate": 4.6759481517042734e-06, "loss": 0.203, "step": 31920 }, { "epoch": 7.66, "learning_rate": 4.6711473835813734e-06, "loss": 0.2244, "step": 31930 }, { "epoch": 7.67, "learning_rate": 4.6663466154584734e-06, "loss": 0.0683, "step": 31940 }, { "epoch": 7.67, "learning_rate": 4.661545847335574e-06, "loss": 0.0788, "step": 31950 }, { "epoch": 7.67, "learning_rate": 4.656745079212674e-06, "loss": 0.0052, "step": 31960 }, { "epoch": 7.67, "learning_rate": 4.651944311089775e-06, "loss": 0.2374, "step": 31970 }, { "epoch": 7.68, "learning_rate": 4.647143542966875e-06, "loss": 0.1665, "step": 31980 }, { "epoch": 7.68, "learning_rate": 4.642342774843975e-06, "loss": 0.1838, "step": 31990 }, { "epoch": 7.68, "learning_rate": 4.637542006721076e-06, "loss": 0.114, "step": 32000 }, { "epoch": 7.68, "learning_rate": 4.632741238598176e-06, "loss": 0.0497, "step": 32010 }, { "epoch": 7.69, "learning_rate": 4.627940470475276e-06, "loss": 0.1016, "step": 32020 }, { "epoch": 7.69, "learning_rate": 4.623139702352377e-06, "loss": 0.0612, "step": 32030 }, { "epoch": 7.69, "learning_rate": 4.618338934229477e-06, "loss": 0.0497, "step": 32040 }, { "epoch": 7.69, "learning_rate": 4.613538166106578e-06, "loss": 0.2271, "step": 32050 }, { "epoch": 7.7, "learning_rate": 4.608737397983678e-06, "loss": 0.1026, "step": 32060 }, { "epoch": 7.7, "learning_rate": 4.603936629860778e-06, "loss": 0.2153, "step": 32070 }, { "epoch": 7.7, "learning_rate": 4.5991358617378785e-06, "loss": 0.1663, "step": 32080 }, { "epoch": 7.7, "learning_rate": 4.5943350936149785e-06, "loss": 0.0797, "step": 32090 }, { "epoch": 7.71, "learning_rate": 4.589534325492079e-06, "loss": 0.1696, "step": 32100 }, { "epoch": 7.71, "learning_rate": 4.584733557369179e-06, "loss": 0.2904, "step": 32110 }, { "epoch": 7.71, "learning_rate": 4.579932789246279e-06, "loss": 0.0849, "step": 32120 }, { "epoch": 7.71, "learning_rate": 4.57513202112338e-06, "loss": 0.169, "step": 32130 }, { "epoch": 7.71, "learning_rate": 4.570331253000481e-06, "loss": 0.1106, "step": 32140 }, { "epoch": 7.72, "learning_rate": 4.56553048487758e-06, "loss": 0.0145, "step": 32150 }, { "epoch": 7.72, "learning_rate": 4.560729716754681e-06, "loss": 0.1161, "step": 32160 }, { "epoch": 7.72, "learning_rate": 4.555928948631782e-06, "loss": 0.1767, "step": 32170 }, { "epoch": 7.72, "learning_rate": 4.551128180508882e-06, "loss": 0.1485, "step": 32180 }, { "epoch": 7.73, "learning_rate": 4.546327412385982e-06, "loss": 0.0946, "step": 32190 }, { "epoch": 7.73, "learning_rate": 4.541526644263083e-06, "loss": 0.089, "step": 32200 }, { "epoch": 7.73, "learning_rate": 4.536725876140183e-06, "loss": 0.1676, "step": 32210 }, { "epoch": 7.73, "learning_rate": 4.5319251080172835e-06, "loss": 0.0497, "step": 32220 }, { "epoch": 7.74, "learning_rate": 4.5271243398943835e-06, "loss": 0.0922, "step": 32230 }, { "epoch": 7.74, "learning_rate": 4.5223235717714835e-06, "loss": 0.1706, "step": 32240 }, { "epoch": 7.74, "learning_rate": 4.517522803648584e-06, "loss": 0.0641, "step": 32250 }, { "epoch": 7.74, "learning_rate": 4.512722035525684e-06, "loss": 0.2167, "step": 32260 }, { "epoch": 7.75, "learning_rate": 4.507921267402785e-06, "loss": 0.2173, "step": 32270 }, { "epoch": 7.75, "learning_rate": 4.503120499279885e-06, "loss": 0.2801, "step": 32280 }, { "epoch": 7.75, "learning_rate": 4.498319731156985e-06, "loss": 0.0967, "step": 32290 }, { "epoch": 7.75, "learning_rate": 4.493518963034086e-06, "loss": 0.0357, "step": 32300 }, { "epoch": 7.76, "learning_rate": 4.488718194911186e-06, "loss": 0.0833, "step": 32310 }, { "epoch": 7.76, "learning_rate": 4.483917426788286e-06, "loss": 0.1514, "step": 32320 }, { "epoch": 7.76, "learning_rate": 4.479116658665387e-06, "loss": 0.1501, "step": 32330 }, { "epoch": 7.76, "learning_rate": 4.474315890542487e-06, "loss": 0.0309, "step": 32340 }, { "epoch": 7.77, "learning_rate": 4.469515122419588e-06, "loss": 0.2215, "step": 32350 }, { "epoch": 7.77, "learning_rate": 4.464714354296688e-06, "loss": 0.0584, "step": 32360 }, { "epoch": 7.77, "learning_rate": 4.459913586173788e-06, "loss": 0.1679, "step": 32370 }, { "epoch": 7.77, "learning_rate": 4.4551128180508885e-06, "loss": 0.0158, "step": 32380 }, { "epoch": 7.77, "learning_rate": 4.450312049927989e-06, "loss": 0.1857, "step": 32390 }, { "epoch": 7.78, "learning_rate": 4.445511281805089e-06, "loss": 0.1, "step": 32400 }, { "epoch": 7.78, "learning_rate": 4.440710513682189e-06, "loss": 0.0909, "step": 32410 }, { "epoch": 7.78, "learning_rate": 4.43590974555929e-06, "loss": 0.1199, "step": 32420 }, { "epoch": 7.78, "learning_rate": 4.43110897743639e-06, "loss": 0.0755, "step": 32430 }, { "epoch": 7.79, "learning_rate": 4.426308209313491e-06, "loss": 0.068, "step": 32440 }, { "epoch": 7.79, "learning_rate": 4.421507441190591e-06, "loss": 0.0868, "step": 32450 }, { "epoch": 7.79, "learning_rate": 4.416706673067691e-06, "loss": 0.1958, "step": 32460 }, { "epoch": 7.79, "learning_rate": 4.411905904944792e-06, "loss": 0.169, "step": 32470 }, { "epoch": 7.8, "learning_rate": 4.407105136821892e-06, "loss": 0.1028, "step": 32480 }, { "epoch": 7.8, "learning_rate": 4.402304368698993e-06, "loss": 0.0437, "step": 32490 }, { "epoch": 7.8, "learning_rate": 4.397503600576093e-06, "loss": 0.1739, "step": 32500 }, { "epoch": 7.8, "learning_rate": 4.392702832453193e-06, "loss": 0.0676, "step": 32510 }, { "epoch": 7.81, "learning_rate": 4.3879020643302935e-06, "loss": 0.0551, "step": 32520 }, { "epoch": 7.81, "learning_rate": 4.3831012962073935e-06, "loss": 0.1959, "step": 32530 }, { "epoch": 7.81, "learning_rate": 4.3783005280844935e-06, "loss": 0.02, "step": 32540 }, { "epoch": 7.81, "learning_rate": 4.373499759961594e-06, "loss": 0.1355, "step": 32550 }, { "epoch": 7.82, "learning_rate": 4.368698991838694e-06, "loss": 0.2029, "step": 32560 }, { "epoch": 7.82, "learning_rate": 4.363898223715795e-06, "loss": 0.052, "step": 32570 }, { "epoch": 7.82, "learning_rate": 4.359097455592895e-06, "loss": 0.1296, "step": 32580 }, { "epoch": 7.82, "learning_rate": 4.354296687469995e-06, "loss": 0.0444, "step": 32590 }, { "epoch": 7.83, "learning_rate": 4.349495919347096e-06, "loss": 0.0891, "step": 32600 }, { "epoch": 7.83, "learning_rate": 4.344695151224196e-06, "loss": 0.0459, "step": 32610 }, { "epoch": 7.83, "learning_rate": 4.339894383101296e-06, "loss": 0.1198, "step": 32620 }, { "epoch": 7.83, "learning_rate": 4.335093614978397e-06, "loss": 0.1227, "step": 32630 }, { "epoch": 7.83, "learning_rate": 4.330292846855497e-06, "loss": 0.0715, "step": 32640 }, { "epoch": 7.84, "learning_rate": 4.325492078732598e-06, "loss": 0.1631, "step": 32650 }, { "epoch": 7.84, "learning_rate": 4.3206913106096986e-06, "loss": 0.0663, "step": 32660 }, { "epoch": 7.84, "learning_rate": 4.315890542486798e-06, "loss": 0.0343, "step": 32670 }, { "epoch": 7.84, "learning_rate": 4.3110897743638986e-06, "loss": 0.1073, "step": 32680 }, { "epoch": 7.85, "learning_rate": 4.306289006240999e-06, "loss": 0.0775, "step": 32690 }, { "epoch": 7.85, "learning_rate": 4.301488238118099e-06, "loss": 0.1082, "step": 32700 }, { "epoch": 7.85, "learning_rate": 4.296687469995199e-06, "loss": 0.0996, "step": 32710 }, { "epoch": 7.85, "learning_rate": 4.2918867018723e-06, "loss": 0.2619, "step": 32720 }, { "epoch": 7.86, "learning_rate": 4.2870859337494e-06, "loss": 0.1357, "step": 32730 }, { "epoch": 7.86, "learning_rate": 4.282285165626501e-06, "loss": 0.1922, "step": 32740 }, { "epoch": 7.86, "learning_rate": 4.277484397503601e-06, "loss": 0.01, "step": 32750 }, { "epoch": 7.86, "learning_rate": 4.272683629380701e-06, "loss": 0.1439, "step": 32760 }, { "epoch": 7.87, "learning_rate": 4.267882861257802e-06, "loss": 0.1285, "step": 32770 }, { "epoch": 7.87, "learning_rate": 4.263082093134902e-06, "loss": 0.0881, "step": 32780 }, { "epoch": 7.87, "learning_rate": 4.258281325012003e-06, "loss": 0.034, "step": 32790 }, { "epoch": 7.87, "learning_rate": 4.253480556889103e-06, "loss": 0.1083, "step": 32800 }, { "epoch": 7.88, "learning_rate": 4.248679788766203e-06, "loss": 0.2431, "step": 32810 }, { "epoch": 7.88, "learning_rate": 4.243879020643304e-06, "loss": 0.1309, "step": 32820 }, { "epoch": 7.88, "learning_rate": 4.239078252520404e-06, "loss": 0.0219, "step": 32830 }, { "epoch": 7.88, "learning_rate": 4.2342774843975036e-06, "loss": 0.0618, "step": 32840 }, { "epoch": 7.89, "learning_rate": 4.229476716274604e-06, "loss": 0.311, "step": 32850 }, { "epoch": 7.89, "learning_rate": 4.224675948151704e-06, "loss": 0.1233, "step": 32860 }, { "epoch": 7.89, "learning_rate": 4.219875180028805e-06, "loss": 0.0769, "step": 32870 }, { "epoch": 7.89, "learning_rate": 4.215074411905905e-06, "loss": 0.0651, "step": 32880 }, { "epoch": 7.89, "learning_rate": 4.210273643783005e-06, "loss": 0.109, "step": 32890 }, { "epoch": 7.9, "learning_rate": 4.205472875660106e-06, "loss": 0.0559, "step": 32900 }, { "epoch": 7.9, "learning_rate": 4.200672107537206e-06, "loss": 0.263, "step": 32910 }, { "epoch": 7.9, "learning_rate": 4.195871339414306e-06, "loss": 0.1216, "step": 32920 }, { "epoch": 7.9, "learning_rate": 4.191070571291407e-06, "loss": 0.1233, "step": 32930 }, { "epoch": 7.91, "learning_rate": 4.186269803168507e-06, "loss": 0.1657, "step": 32940 }, { "epoch": 7.91, "learning_rate": 4.181469035045608e-06, "loss": 0.1153, "step": 32950 }, { "epoch": 7.91, "learning_rate": 4.176668266922709e-06, "loss": 0.0072, "step": 32960 }, { "epoch": 7.91, "learning_rate": 4.171867498799808e-06, "loss": 0.1707, "step": 32970 }, { "epoch": 7.92, "learning_rate": 4.167066730676909e-06, "loss": 0.064, "step": 32980 }, { "epoch": 7.92, "learning_rate": 4.1622659625540094e-06, "loss": 0.2296, "step": 32990 }, { "epoch": 7.92, "learning_rate": 4.1574651944311094e-06, "loss": 0.0409, "step": 33000 }, { "epoch": 7.92, "learning_rate": 4.1526644263082094e-06, "loss": 0.0928, "step": 33010 }, { "epoch": 7.93, "learning_rate": 4.14786365818531e-06, "loss": 0.2015, "step": 33020 }, { "epoch": 7.93, "learning_rate": 4.14306289006241e-06, "loss": 0.1539, "step": 33030 }, { "epoch": 7.93, "learning_rate": 4.138262121939511e-06, "loss": 0.033, "step": 33040 }, { "epoch": 7.93, "learning_rate": 4.133461353816611e-06, "loss": 0.1806, "step": 33050 }, { "epoch": 7.94, "learning_rate": 4.128660585693711e-06, "loss": 0.0404, "step": 33060 }, { "epoch": 7.94, "learning_rate": 4.123859817570812e-06, "loss": 0.0788, "step": 33070 }, { "epoch": 7.94, "learning_rate": 4.119059049447912e-06, "loss": 0.0049, "step": 33080 }, { "epoch": 7.94, "learning_rate": 4.114258281325013e-06, "loss": 0.1674, "step": 33090 }, { "epoch": 7.95, "learning_rate": 4.109457513202113e-06, "loss": 0.1119, "step": 33100 }, { "epoch": 7.95, "learning_rate": 4.104656745079213e-06, "loss": 0.1309, "step": 33110 }, { "epoch": 7.95, "learning_rate": 4.099855976956314e-06, "loss": 0.153, "step": 33120 }, { "epoch": 7.95, "learning_rate": 4.095055208833414e-06, "loss": 0.0916, "step": 33130 }, { "epoch": 7.95, "learning_rate": 4.090254440710514e-06, "loss": 0.0981, "step": 33140 }, { "epoch": 7.96, "learning_rate": 4.0854536725876145e-06, "loss": 0.1299, "step": 33150 }, { "epoch": 7.96, "learning_rate": 4.0806529044647145e-06, "loss": 0.078, "step": 33160 }, { "epoch": 7.96, "learning_rate": 4.075852136341815e-06, "loss": 0.179, "step": 33170 }, { "epoch": 7.96, "learning_rate": 4.071051368218915e-06, "loss": 0.079, "step": 33180 }, { "epoch": 7.97, "learning_rate": 4.066250600096015e-06, "loss": 0.0038, "step": 33190 }, { "epoch": 7.97, "learning_rate": 4.061449831973116e-06, "loss": 0.1786, "step": 33200 }, { "epoch": 7.97, "learning_rate": 4.056649063850216e-06, "loss": 0.1366, "step": 33210 }, { "epoch": 7.97, "learning_rate": 4.051848295727317e-06, "loss": 0.0247, "step": 33220 }, { "epoch": 7.98, "learning_rate": 4.047047527604417e-06, "loss": 0.2032, "step": 33230 }, { "epoch": 7.98, "learning_rate": 4.042246759481517e-06, "loss": 0.0918, "step": 33240 }, { "epoch": 7.98, "learning_rate": 4.037445991358618e-06, "loss": 0.0276, "step": 33250 }, { "epoch": 7.98, "learning_rate": 4.032645223235719e-06, "loss": 0.11, "step": 33260 }, { "epoch": 7.99, "learning_rate": 4.027844455112818e-06, "loss": 0.0286, "step": 33270 }, { "epoch": 7.99, "learning_rate": 4.023043686989919e-06, "loss": 0.0939, "step": 33280 }, { "epoch": 7.99, "learning_rate": 4.0182429188670195e-06, "loss": 0.2662, "step": 33290 }, { "epoch": 7.99, "learning_rate": 4.0134421507441195e-06, "loss": 0.2048, "step": 33300 }, { "epoch": 8.0, "learning_rate": 4.0086413826212195e-06, "loss": 0.2249, "step": 33310 }, { "epoch": 8.0, "learning_rate": 4.00384061449832e-06, "loss": 0.0983, "step": 33320 }, { "epoch": 8.0, "eval_accuracy": 0.9989799387963277, "eval_loss": 0.005884047131985426, "eval_runtime": 517.9675, "eval_samples_per_second": 11.356, "eval_steps_per_second": 1.421, "step": 33328 }, { "epoch": 8.0, "learning_rate": 3.99903984637542e-06, "loss": 0.1972, "step": 33330 }, { "epoch": 8.0, "learning_rate": 3.994239078252521e-06, "loss": 0.0332, "step": 33340 }, { "epoch": 8.01, "learning_rate": 3.989438310129621e-06, "loss": 0.1043, "step": 33350 }, { "epoch": 8.01, "learning_rate": 3.984637542006721e-06, "loss": 0.2772, "step": 33360 }, { "epoch": 8.01, "learning_rate": 3.979836773883822e-06, "loss": 0.1198, "step": 33370 }, { "epoch": 8.01, "learning_rate": 3.975036005760922e-06, "loss": 0.0784, "step": 33380 }, { "epoch": 8.01, "learning_rate": 3.970235237638023e-06, "loss": 0.064, "step": 33390 }, { "epoch": 8.02, "learning_rate": 3.965434469515123e-06, "loss": 0.1932, "step": 33400 }, { "epoch": 8.02, "learning_rate": 3.960633701392223e-06, "loss": 0.0251, "step": 33410 }, { "epoch": 8.02, "learning_rate": 3.955832933269324e-06, "loss": 0.1158, "step": 33420 }, { "epoch": 8.02, "learning_rate": 3.951032165146424e-06, "loss": 0.1333, "step": 33430 }, { "epoch": 8.03, "learning_rate": 3.946231397023524e-06, "loss": 0.1075, "step": 33440 }, { "epoch": 8.03, "learning_rate": 3.9414306289006245e-06, "loss": 0.1348, "step": 33450 }, { "epoch": 8.03, "learning_rate": 3.9366298607777245e-06, "loss": 0.2771, "step": 33460 }, { "epoch": 8.03, "learning_rate": 3.931829092654825e-06, "loss": 0.0497, "step": 33470 }, { "epoch": 8.04, "learning_rate": 3.927028324531925e-06, "loss": 0.125, "step": 33480 }, { "epoch": 8.04, "learning_rate": 3.922227556409025e-06, "loss": 0.1646, "step": 33490 }, { "epoch": 8.04, "learning_rate": 3.917426788286126e-06, "loss": 0.0214, "step": 33500 }, { "epoch": 8.04, "learning_rate": 3.912626020163226e-06, "loss": 0.221, "step": 33510 }, { "epoch": 8.05, "learning_rate": 3.907825252040327e-06, "loss": 0.1701, "step": 33520 }, { "epoch": 8.05, "learning_rate": 3.903024483917427e-06, "loss": 0.1275, "step": 33530 }, { "epoch": 8.05, "learning_rate": 3.898223715794527e-06, "loss": 0.1893, "step": 33540 }, { "epoch": 8.05, "learning_rate": 3.893422947671628e-06, "loss": 0.0999, "step": 33550 }, { "epoch": 8.06, "learning_rate": 3.888622179548729e-06, "loss": 0.0996, "step": 33560 }, { "epoch": 8.06, "learning_rate": 3.883821411425828e-06, "loss": 0.0339, "step": 33570 }, { "epoch": 8.06, "learning_rate": 3.879020643302929e-06, "loss": 0.0447, "step": 33580 }, { "epoch": 8.06, "learning_rate": 3.8742198751800295e-06, "loss": 0.2243, "step": 33590 }, { "epoch": 8.07, "learning_rate": 3.8694191070571295e-06, "loss": 0.2021, "step": 33600 }, { "epoch": 8.07, "learning_rate": 3.8646183389342295e-06, "loss": 0.0495, "step": 33610 }, { "epoch": 8.07, "learning_rate": 3.85981757081133e-06, "loss": 0.1671, "step": 33620 }, { "epoch": 8.07, "learning_rate": 3.85501680268843e-06, "loss": 0.3138, "step": 33630 }, { "epoch": 8.07, "learning_rate": 3.850216034565531e-06, "loss": 0.1293, "step": 33640 }, { "epoch": 8.08, "learning_rate": 3.845415266442631e-06, "loss": 0.0642, "step": 33650 }, { "epoch": 8.08, "learning_rate": 3.840614498319731e-06, "loss": 0.0912, "step": 33660 }, { "epoch": 8.08, "learning_rate": 3.835813730196832e-06, "loss": 0.048, "step": 33670 }, { "epoch": 8.08, "learning_rate": 3.831012962073932e-06, "loss": 0.0946, "step": 33680 }, { "epoch": 8.09, "learning_rate": 3.826212193951033e-06, "loss": 0.0444, "step": 33690 }, { "epoch": 8.09, "learning_rate": 3.821411425828133e-06, "loss": 0.0184, "step": 33700 }, { "epoch": 8.09, "learning_rate": 3.816610657705233e-06, "loss": 0.3485, "step": 33710 }, { "epoch": 8.09, "learning_rate": 3.8118098895823337e-06, "loss": 0.0928, "step": 33720 }, { "epoch": 8.1, "learning_rate": 3.807009121459434e-06, "loss": 0.2039, "step": 33730 }, { "epoch": 8.1, "learning_rate": 3.8022083533365337e-06, "loss": 0.0653, "step": 33740 }, { "epoch": 8.1, "learning_rate": 3.7974075852136346e-06, "loss": 0.0541, "step": 33750 }, { "epoch": 8.1, "learning_rate": 3.792606817090735e-06, "loss": 0.0809, "step": 33760 }, { "epoch": 8.11, "learning_rate": 3.7878060489678354e-06, "loss": 0.0341, "step": 33770 }, { "epoch": 8.11, "learning_rate": 3.7830052808449354e-06, "loss": 0.1706, "step": 33780 }, { "epoch": 8.11, "learning_rate": 3.778204512722036e-06, "loss": 0.2115, "step": 33790 }, { "epoch": 8.11, "learning_rate": 3.7734037445991362e-06, "loss": 0.3165, "step": 33800 }, { "epoch": 8.12, "learning_rate": 3.7686029764762367e-06, "loss": 0.0026, "step": 33810 }, { "epoch": 8.12, "learning_rate": 3.763802208353337e-06, "loss": 0.1435, "step": 33820 }, { "epoch": 8.12, "learning_rate": 3.759001440230437e-06, "loss": 0.1226, "step": 33830 }, { "epoch": 8.12, "learning_rate": 3.7542006721075375e-06, "loss": 0.1266, "step": 33840 }, { "epoch": 8.13, "learning_rate": 3.749399903984638e-06, "loss": 0.1283, "step": 33850 }, { "epoch": 8.13, "learning_rate": 3.7445991358617383e-06, "loss": 0.1462, "step": 33860 }, { "epoch": 8.13, "learning_rate": 3.7397983677388383e-06, "loss": 0.2587, "step": 33870 }, { "epoch": 8.13, "learning_rate": 3.7349975996159387e-06, "loss": 0.0669, "step": 33880 }, { "epoch": 8.13, "learning_rate": 3.730196831493039e-06, "loss": 0.0769, "step": 33890 }, { "epoch": 8.14, "learning_rate": 3.7253960633701396e-06, "loss": 0.0891, "step": 33900 }, { "epoch": 8.14, "learning_rate": 3.7205952952472396e-06, "loss": 0.2469, "step": 33910 }, { "epoch": 8.14, "learning_rate": 3.71579452712434e-06, "loss": 0.0637, "step": 33920 }, { "epoch": 8.14, "learning_rate": 3.7109937590014404e-06, "loss": 0.1237, "step": 33930 }, { "epoch": 8.15, "learning_rate": 3.706192990878541e-06, "loss": 0.0574, "step": 33940 }, { "epoch": 8.15, "learning_rate": 3.7013922227556413e-06, "loss": 0.125, "step": 33950 }, { "epoch": 8.15, "learning_rate": 3.6965914546327413e-06, "loss": 0.2603, "step": 33960 }, { "epoch": 8.15, "learning_rate": 3.6917906865098417e-06, "loss": 0.0585, "step": 33970 }, { "epoch": 8.16, "learning_rate": 3.6869899183869425e-06, "loss": 0.0952, "step": 33980 }, { "epoch": 8.16, "learning_rate": 3.682189150264043e-06, "loss": 0.1033, "step": 33990 }, { "epoch": 8.16, "learning_rate": 3.6773883821411425e-06, "loss": 0.0956, "step": 34000 }, { "epoch": 8.16, "learning_rate": 3.6725876140182434e-06, "loss": 0.1679, "step": 34010 }, { "epoch": 8.17, "learning_rate": 3.6677868458953438e-06, "loss": 0.0395, "step": 34020 }, { "epoch": 8.17, "learning_rate": 3.662986077772444e-06, "loss": 0.063, "step": 34030 }, { "epoch": 8.17, "learning_rate": 3.658185309649544e-06, "loss": 0.0922, "step": 34040 }, { "epoch": 8.17, "learning_rate": 3.6533845415266446e-06, "loss": 0.0746, "step": 34050 }, { "epoch": 8.18, "learning_rate": 3.648583773403745e-06, "loss": 0.1837, "step": 34060 }, { "epoch": 8.18, "learning_rate": 3.6437830052808455e-06, "loss": 0.1759, "step": 34070 }, { "epoch": 8.18, "learning_rate": 3.6389822371579454e-06, "loss": 0.0915, "step": 34080 }, { "epoch": 8.18, "learning_rate": 3.634181469035046e-06, "loss": 0.1557, "step": 34090 }, { "epoch": 8.19, "learning_rate": 3.6293807009121463e-06, "loss": 0.1361, "step": 34100 }, { "epoch": 8.19, "learning_rate": 3.6245799327892467e-06, "loss": 0.0635, "step": 34110 }, { "epoch": 8.19, "learning_rate": 3.619779164666347e-06, "loss": 0.0581, "step": 34120 }, { "epoch": 8.19, "learning_rate": 3.614978396543447e-06, "loss": 0.0995, "step": 34130 }, { "epoch": 8.19, "learning_rate": 3.6101776284205475e-06, "loss": 0.0327, "step": 34140 }, { "epoch": 8.2, "learning_rate": 3.605376860297648e-06, "loss": 0.1152, "step": 34150 }, { "epoch": 8.2, "learning_rate": 3.6005760921747484e-06, "loss": 0.3173, "step": 34160 }, { "epoch": 8.2, "learning_rate": 3.5957753240518484e-06, "loss": 0.0716, "step": 34170 }, { "epoch": 8.2, "learning_rate": 3.590974555928949e-06, "loss": 0.0701, "step": 34180 }, { "epoch": 8.21, "learning_rate": 3.5861737878060492e-06, "loss": 0.2868, "step": 34190 }, { "epoch": 8.21, "learning_rate": 3.5813730196831496e-06, "loss": 0.203, "step": 34200 }, { "epoch": 8.21, "learning_rate": 3.5765722515602496e-06, "loss": 0.0392, "step": 34210 }, { "epoch": 8.21, "learning_rate": 3.57177148343735e-06, "loss": 0.0982, "step": 34220 }, { "epoch": 8.22, "learning_rate": 3.5669707153144505e-06, "loss": 0.0443, "step": 34230 }, { "epoch": 8.22, "learning_rate": 3.562169947191551e-06, "loss": 0.11, "step": 34240 }, { "epoch": 8.22, "learning_rate": 3.5573691790686517e-06, "loss": 0.1083, "step": 34250 }, { "epoch": 8.22, "learning_rate": 3.5525684109457513e-06, "loss": 0.067, "step": 34260 }, { "epoch": 8.23, "learning_rate": 3.5477676428228517e-06, "loss": 0.0604, "step": 34270 }, { "epoch": 8.23, "learning_rate": 3.5429668746999526e-06, "loss": 0.1665, "step": 34280 }, { "epoch": 8.23, "learning_rate": 3.538166106577053e-06, "loss": 0.065, "step": 34290 }, { "epoch": 8.23, "learning_rate": 3.5333653384541526e-06, "loss": 0.1961, "step": 34300 }, { "epoch": 8.24, "learning_rate": 3.5285645703312534e-06, "loss": 0.1178, "step": 34310 }, { "epoch": 8.24, "learning_rate": 3.523763802208354e-06, "loss": 0.1627, "step": 34320 }, { "epoch": 8.24, "learning_rate": 3.5189630340854542e-06, "loss": 0.1551, "step": 34330 }, { "epoch": 8.24, "learning_rate": 3.5141622659625542e-06, "loss": 0.0533, "step": 34340 }, { "epoch": 8.25, "learning_rate": 3.5093614978396547e-06, "loss": 0.2665, "step": 34350 }, { "epoch": 8.25, "learning_rate": 3.504560729716755e-06, "loss": 0.0695, "step": 34360 }, { "epoch": 8.25, "learning_rate": 3.4997599615938555e-06, "loss": 0.2935, "step": 34370 }, { "epoch": 8.25, "learning_rate": 3.494959193470956e-06, "loss": 0.0365, "step": 34380 }, { "epoch": 8.25, "learning_rate": 3.490158425348056e-06, "loss": 0.0689, "step": 34390 }, { "epoch": 8.26, "learning_rate": 3.4853576572251563e-06, "loss": 0.1108, "step": 34400 }, { "epoch": 8.26, "learning_rate": 3.4805568891022568e-06, "loss": 0.0311, "step": 34410 }, { "epoch": 8.26, "learning_rate": 3.475756120979357e-06, "loss": 0.1035, "step": 34420 }, { "epoch": 8.26, "learning_rate": 3.470955352856457e-06, "loss": 0.1511, "step": 34430 }, { "epoch": 8.27, "learning_rate": 3.4661545847335576e-06, "loss": 0.2384, "step": 34440 }, { "epoch": 8.27, "learning_rate": 3.461353816610658e-06, "loss": 0.0966, "step": 34450 }, { "epoch": 8.27, "learning_rate": 3.4565530484877584e-06, "loss": 0.165, "step": 34460 }, { "epoch": 8.27, "learning_rate": 3.4517522803648584e-06, "loss": 0.139, "step": 34470 }, { "epoch": 8.28, "learning_rate": 3.446951512241959e-06, "loss": 0.041, "step": 34480 }, { "epoch": 8.28, "learning_rate": 3.4421507441190593e-06, "loss": 0.0959, "step": 34490 }, { "epoch": 8.28, "learning_rate": 3.4373499759961597e-06, "loss": 0.1206, "step": 34500 }, { "epoch": 8.28, "learning_rate": 3.4325492078732597e-06, "loss": 0.2708, "step": 34510 }, { "epoch": 8.29, "learning_rate": 3.42774843975036e-06, "loss": 0.1659, "step": 34520 }, { "epoch": 8.29, "learning_rate": 3.4229476716274605e-06, "loss": 0.1849, "step": 34530 }, { "epoch": 8.29, "learning_rate": 3.418146903504561e-06, "loss": 0.0601, "step": 34540 }, { "epoch": 8.29, "learning_rate": 3.4133461353816618e-06, "loss": 0.1162, "step": 34550 }, { "epoch": 8.3, "learning_rate": 3.4085453672587614e-06, "loss": 0.1731, "step": 34560 }, { "epoch": 8.3, "learning_rate": 3.4037445991358618e-06, "loss": 0.1586, "step": 34570 }, { "epoch": 8.3, "learning_rate": 3.3989438310129626e-06, "loss": 0.0555, "step": 34580 }, { "epoch": 8.3, "learning_rate": 3.394143062890063e-06, "loss": 0.1953, "step": 34590 }, { "epoch": 8.31, "learning_rate": 3.3893422947671626e-06, "loss": 0.0576, "step": 34600 }, { "epoch": 8.31, "learning_rate": 3.3845415266442635e-06, "loss": 0.1094, "step": 34610 }, { "epoch": 8.31, "learning_rate": 3.379740758521364e-06, "loss": 0.0029, "step": 34620 }, { "epoch": 8.31, "learning_rate": 3.3749399903984643e-06, "loss": 0.1093, "step": 34630 }, { "epoch": 8.31, "learning_rate": 3.3701392222755643e-06, "loss": 0.0386, "step": 34640 }, { "epoch": 8.32, "learning_rate": 3.3653384541526647e-06, "loss": 0.2095, "step": 34650 }, { "epoch": 8.32, "learning_rate": 3.360537686029765e-06, "loss": 0.1846, "step": 34660 }, { "epoch": 8.32, "learning_rate": 3.3557369179068655e-06, "loss": 0.1879, "step": 34670 }, { "epoch": 8.32, "learning_rate": 3.350936149783966e-06, "loss": 0.1359, "step": 34680 }, { "epoch": 8.33, "learning_rate": 3.346135381661066e-06, "loss": 0.1777, "step": 34690 }, { "epoch": 8.33, "learning_rate": 3.3413346135381664e-06, "loss": 0.0897, "step": 34700 }, { "epoch": 8.33, "learning_rate": 3.336533845415267e-06, "loss": 0.2725, "step": 34710 }, { "epoch": 8.33, "learning_rate": 3.3317330772923672e-06, "loss": 0.1332, "step": 34720 }, { "epoch": 8.34, "learning_rate": 3.3269323091694672e-06, "loss": 0.114, "step": 34730 }, { "epoch": 8.34, "learning_rate": 3.3221315410465676e-06, "loss": 0.2266, "step": 34740 }, { "epoch": 8.34, "learning_rate": 3.317330772923668e-06, "loss": 0.0507, "step": 34750 }, { "epoch": 8.34, "learning_rate": 3.3125300048007685e-06, "loss": 0.127, "step": 34760 }, { "epoch": 8.35, "learning_rate": 3.3077292366778685e-06, "loss": 0.1693, "step": 34770 }, { "epoch": 8.35, "learning_rate": 3.302928468554969e-06, "loss": 0.2422, "step": 34780 }, { "epoch": 8.35, "learning_rate": 3.2981277004320693e-06, "loss": 0.2181, "step": 34790 }, { "epoch": 8.35, "learning_rate": 3.2933269323091697e-06, "loss": 0.0957, "step": 34800 }, { "epoch": 8.36, "learning_rate": 3.2885261641862697e-06, "loss": 0.0768, "step": 34810 }, { "epoch": 8.36, "learning_rate": 3.28372539606337e-06, "loss": 0.0727, "step": 34820 }, { "epoch": 8.36, "learning_rate": 3.2789246279404706e-06, "loss": 0.107, "step": 34830 }, { "epoch": 8.36, "learning_rate": 3.274123859817571e-06, "loss": 0.0183, "step": 34840 }, { "epoch": 8.37, "learning_rate": 3.269323091694672e-06, "loss": 0.1119, "step": 34850 }, { "epoch": 8.37, "learning_rate": 3.2645223235717714e-06, "loss": 0.0896, "step": 34860 }, { "epoch": 8.37, "learning_rate": 3.2597215554488722e-06, "loss": 0.013, "step": 34870 }, { "epoch": 8.37, "learning_rate": 3.2549207873259727e-06, "loss": 0.1023, "step": 34880 }, { "epoch": 8.37, "learning_rate": 3.250120019203073e-06, "loss": 0.0438, "step": 34890 }, { "epoch": 8.38, "learning_rate": 3.245319251080173e-06, "loss": 0.1528, "step": 34900 }, { "epoch": 8.38, "learning_rate": 3.2405184829572735e-06, "loss": 0.0306, "step": 34910 }, { "epoch": 8.38, "learning_rate": 3.235717714834374e-06, "loss": 0.1335, "step": 34920 }, { "epoch": 8.38, "learning_rate": 3.2309169467114743e-06, "loss": 0.1406, "step": 34930 }, { "epoch": 8.39, "learning_rate": 3.2261161785885743e-06, "loss": 0.09, "step": 34940 }, { "epoch": 8.39, "learning_rate": 3.2213154104656748e-06, "loss": 0.2012, "step": 34950 }, { "epoch": 8.39, "learning_rate": 3.216514642342775e-06, "loss": 0.1888, "step": 34960 }, { "epoch": 8.39, "learning_rate": 3.2117138742198756e-06, "loss": 0.126, "step": 34970 }, { "epoch": 8.4, "learning_rate": 3.206913106096976e-06, "loss": 0.1207, "step": 34980 }, { "epoch": 8.4, "learning_rate": 3.202112337974076e-06, "loss": 0.1189, "step": 34990 }, { "epoch": 8.4, "learning_rate": 3.1973115698511764e-06, "loss": 0.1249, "step": 35000 }, { "epoch": 8.4, "learning_rate": 3.192510801728277e-06, "loss": 0.0334, "step": 35010 }, { "epoch": 8.41, "learning_rate": 3.1877100336053773e-06, "loss": 0.0516, "step": 35020 }, { "epoch": 8.41, "learning_rate": 3.1829092654824773e-06, "loss": 0.0101, "step": 35030 }, { "epoch": 8.41, "learning_rate": 3.1781084973595777e-06, "loss": 0.1285, "step": 35040 }, { "epoch": 8.41, "learning_rate": 3.173307729236678e-06, "loss": 0.0358, "step": 35050 }, { "epoch": 8.42, "learning_rate": 3.1685069611137785e-06, "loss": 0.1, "step": 35060 }, { "epoch": 8.42, "learning_rate": 3.1637061929908785e-06, "loss": 0.0185, "step": 35070 }, { "epoch": 8.42, "learning_rate": 3.158905424867979e-06, "loss": 0.0182, "step": 35080 }, { "epoch": 8.42, "learning_rate": 3.1541046567450794e-06, "loss": 0.0885, "step": 35090 }, { "epoch": 8.43, "learning_rate": 3.1493038886221798e-06, "loss": 0.2235, "step": 35100 }, { "epoch": 8.43, "learning_rate": 3.1445031204992806e-06, "loss": 0.0725, "step": 35110 }, { "epoch": 8.43, "learning_rate": 3.13970235237638e-06, "loss": 0.0892, "step": 35120 }, { "epoch": 8.43, "learning_rate": 3.1349015842534806e-06, "loss": 0.272, "step": 35130 }, { "epoch": 8.43, "learning_rate": 3.1301008161305815e-06, "loss": 0.075, "step": 35140 }, { "epoch": 8.44, "learning_rate": 3.125300048007682e-06, "loss": 0.0485, "step": 35150 }, { "epoch": 8.44, "learning_rate": 3.1204992798847814e-06, "loss": 0.1188, "step": 35160 }, { "epoch": 8.44, "learning_rate": 3.1156985117618823e-06, "loss": 0.0818, "step": 35170 }, { "epoch": 8.44, "learning_rate": 3.1108977436389827e-06, "loss": 0.0898, "step": 35180 }, { "epoch": 8.45, "learning_rate": 3.106096975516083e-06, "loss": 0.1345, "step": 35190 }, { "epoch": 8.45, "learning_rate": 3.101296207393183e-06, "loss": 0.0588, "step": 35200 }, { "epoch": 8.45, "learning_rate": 3.0964954392702835e-06, "loss": 0.0573, "step": 35210 }, { "epoch": 8.45, "learning_rate": 3.091694671147384e-06, "loss": 0.1107, "step": 35220 }, { "epoch": 8.46, "learning_rate": 3.0868939030244844e-06, "loss": 0.1779, "step": 35230 }, { "epoch": 8.46, "learning_rate": 3.0820931349015844e-06, "loss": 0.2117, "step": 35240 }, { "epoch": 8.46, "learning_rate": 3.077292366778685e-06, "loss": 0.1218, "step": 35250 }, { "epoch": 8.46, "learning_rate": 3.0724915986557852e-06, "loss": 0.1629, "step": 35260 }, { "epoch": 8.47, "learning_rate": 3.0676908305328856e-06, "loss": 0.2059, "step": 35270 }, { "epoch": 8.47, "learning_rate": 3.062890062409986e-06, "loss": 0.1014, "step": 35280 }, { "epoch": 8.47, "learning_rate": 3.058089294287086e-06, "loss": 0.2194, "step": 35290 }, { "epoch": 8.47, "learning_rate": 3.0532885261641865e-06, "loss": 0.0733, "step": 35300 }, { "epoch": 8.48, "learning_rate": 3.048487758041287e-06, "loss": 0.2837, "step": 35310 }, { "epoch": 8.48, "learning_rate": 3.0436869899183873e-06, "loss": 0.1377, "step": 35320 }, { "epoch": 8.48, "learning_rate": 3.0388862217954873e-06, "loss": 0.2215, "step": 35330 }, { "epoch": 8.48, "learning_rate": 3.0340854536725877e-06, "loss": 0.0281, "step": 35340 }, { "epoch": 8.49, "learning_rate": 3.029284685549688e-06, "loss": 0.0766, "step": 35350 }, { "epoch": 8.49, "learning_rate": 3.0244839174267886e-06, "loss": 0.1596, "step": 35360 }, { "epoch": 8.49, "learning_rate": 3.0196831493038886e-06, "loss": 0.1112, "step": 35370 }, { "epoch": 8.49, "learning_rate": 3.014882381180989e-06, "loss": 0.1629, "step": 35380 }, { "epoch": 8.49, "learning_rate": 3.0100816130580894e-06, "loss": 0.1476, "step": 35390 }, { "epoch": 8.5, "learning_rate": 3.00528084493519e-06, "loss": 0.1226, "step": 35400 }, { "epoch": 8.5, "learning_rate": 3.0004800768122907e-06, "loss": 0.2149, "step": 35410 }, { "epoch": 8.5, "learning_rate": 2.9956793086893902e-06, "loss": 0.1062, "step": 35420 }, { "epoch": 8.5, "learning_rate": 2.9908785405664907e-06, "loss": 0.2502, "step": 35430 }, { "epoch": 8.51, "learning_rate": 2.9860777724435915e-06, "loss": 0.0245, "step": 35440 }, { "epoch": 8.51, "learning_rate": 2.981277004320692e-06, "loss": 0.1055, "step": 35450 }, { "epoch": 8.51, "learning_rate": 2.9764762361977915e-06, "loss": 0.0777, "step": 35460 }, { "epoch": 8.51, "learning_rate": 2.9716754680748923e-06, "loss": 0.1057, "step": 35470 }, { "epoch": 8.52, "learning_rate": 2.9668746999519928e-06, "loss": 0.0549, "step": 35480 }, { "epoch": 8.52, "learning_rate": 2.962073931829093e-06, "loss": 0.0987, "step": 35490 }, { "epoch": 8.52, "learning_rate": 2.957273163706193e-06, "loss": 0.1285, "step": 35500 }, { "epoch": 8.52, "learning_rate": 2.9524723955832936e-06, "loss": 0.0943, "step": 35510 }, { "epoch": 8.53, "learning_rate": 2.947671627460394e-06, "loss": 0.121, "step": 35520 }, { "epoch": 8.53, "learning_rate": 2.9428708593374944e-06, "loss": 0.1275, "step": 35530 }, { "epoch": 8.53, "learning_rate": 2.9380700912145944e-06, "loss": 0.0021, "step": 35540 }, { "epoch": 8.53, "learning_rate": 2.933269323091695e-06, "loss": 0.0995, "step": 35550 }, { "epoch": 8.54, "learning_rate": 2.9284685549687953e-06, "loss": 0.0811, "step": 35560 }, { "epoch": 8.54, "learning_rate": 2.9236677868458957e-06, "loss": 0.049, "step": 35570 }, { "epoch": 8.54, "learning_rate": 2.918867018722996e-06, "loss": 0.0044, "step": 35580 }, { "epoch": 8.54, "learning_rate": 2.914066250600096e-06, "loss": 0.0953, "step": 35590 }, { "epoch": 8.55, "learning_rate": 2.9092654824771965e-06, "loss": 0.1121, "step": 35600 }, { "epoch": 8.55, "learning_rate": 2.904464714354297e-06, "loss": 0.1563, "step": 35610 }, { "epoch": 8.55, "learning_rate": 2.8996639462313974e-06, "loss": 0.0416, "step": 35620 }, { "epoch": 8.55, "learning_rate": 2.8948631781084974e-06, "loss": 0.2389, "step": 35630 }, { "epoch": 8.55, "learning_rate": 2.8900624099855978e-06, "loss": 0.1079, "step": 35640 }, { "epoch": 8.56, "learning_rate": 2.885261641862698e-06, "loss": 0.172, "step": 35650 }, { "epoch": 8.56, "learning_rate": 2.8804608737397986e-06, "loss": 0.0645, "step": 35660 }, { "epoch": 8.56, "learning_rate": 2.8756601056168986e-06, "loss": 0.0511, "step": 35670 }, { "epoch": 8.56, "learning_rate": 2.870859337493999e-06, "loss": 0.0898, "step": 35680 }, { "epoch": 8.57, "learning_rate": 2.8660585693710995e-06, "loss": 0.0793, "step": 35690 }, { "epoch": 8.57, "learning_rate": 2.8612578012482e-06, "loss": 0.1187, "step": 35700 }, { "epoch": 8.57, "learning_rate": 2.8564570331253007e-06, "loss": 0.0558, "step": 35710 }, { "epoch": 8.57, "learning_rate": 2.8516562650024003e-06, "loss": 0.0742, "step": 35720 }, { "epoch": 8.58, "learning_rate": 2.8468554968795007e-06, "loss": 0.1792, "step": 35730 }, { "epoch": 8.58, "learning_rate": 2.8420547287566016e-06, "loss": 0.1642, "step": 35740 }, { "epoch": 8.58, "learning_rate": 2.837253960633702e-06, "loss": 0.2237, "step": 35750 }, { "epoch": 8.58, "learning_rate": 2.8324531925108015e-06, "loss": 0.1767, "step": 35760 }, { "epoch": 8.59, "learning_rate": 2.8276524243879024e-06, "loss": 0.0623, "step": 35770 }, { "epoch": 8.59, "learning_rate": 2.822851656265003e-06, "loss": 0.0554, "step": 35780 }, { "epoch": 8.59, "learning_rate": 2.8180508881421032e-06, "loss": 0.0084, "step": 35790 }, { "epoch": 8.59, "learning_rate": 2.8132501200192032e-06, "loss": 0.2372, "step": 35800 }, { "epoch": 8.6, "learning_rate": 2.8084493518963036e-06, "loss": 0.0402, "step": 35810 }, { "epoch": 8.6, "learning_rate": 2.803648583773404e-06, "loss": 0.1268, "step": 35820 }, { "epoch": 8.6, "learning_rate": 2.7988478156505045e-06, "loss": 0.047, "step": 35830 }, { "epoch": 8.6, "learning_rate": 2.794047047527605e-06, "loss": 0.0758, "step": 35840 }, { "epoch": 8.61, "learning_rate": 2.789246279404705e-06, "loss": 0.0898, "step": 35850 }, { "epoch": 8.61, "learning_rate": 2.7844455112818053e-06, "loss": 0.0381, "step": 35860 }, { "epoch": 8.61, "learning_rate": 2.7796447431589057e-06, "loss": 0.0702, "step": 35870 }, { "epoch": 8.61, "learning_rate": 2.774843975036006e-06, "loss": 0.0517, "step": 35880 }, { "epoch": 8.61, "learning_rate": 2.770043206913106e-06, "loss": 0.1712, "step": 35890 }, { "epoch": 8.62, "learning_rate": 2.7652424387902066e-06, "loss": 0.109, "step": 35900 }, { "epoch": 8.62, "learning_rate": 2.760441670667307e-06, "loss": 0.0274, "step": 35910 }, { "epoch": 8.62, "learning_rate": 2.7556409025444074e-06, "loss": 0.088, "step": 35920 }, { "epoch": 8.62, "learning_rate": 2.7508401344215074e-06, "loss": 0.0917, "step": 35930 }, { "epoch": 8.63, "learning_rate": 2.746039366298608e-06, "loss": 0.1361, "step": 35940 }, { "epoch": 8.63, "learning_rate": 2.7412385981757082e-06, "loss": 0.1388, "step": 35950 }, { "epoch": 8.63, "learning_rate": 2.7364378300528087e-06, "loss": 0.0728, "step": 35960 }, { "epoch": 8.63, "learning_rate": 2.7316370619299087e-06, "loss": 0.0112, "step": 35970 }, { "epoch": 8.64, "learning_rate": 2.726836293807009e-06, "loss": 0.0377, "step": 35980 }, { "epoch": 8.64, "learning_rate": 2.7220355256841095e-06, "loss": 0.0404, "step": 35990 }, { "epoch": 8.64, "learning_rate": 2.7172347575612103e-06, "loss": 0.0364, "step": 36000 }, { "epoch": 8.64, "learning_rate": 2.7124339894383108e-06, "loss": 0.0636, "step": 36010 }, { "epoch": 8.65, "learning_rate": 2.7076332213154103e-06, "loss": 0.1615, "step": 36020 }, { "epoch": 8.65, "learning_rate": 2.702832453192511e-06, "loss": 0.1372, "step": 36030 }, { "epoch": 8.65, "learning_rate": 2.6980316850696116e-06, "loss": 0.0586, "step": 36040 }, { "epoch": 8.65, "learning_rate": 2.693230916946712e-06, "loss": 0.0477, "step": 36050 }, { "epoch": 8.66, "learning_rate": 2.688430148823812e-06, "loss": 0.0621, "step": 36060 }, { "epoch": 8.66, "learning_rate": 2.6836293807009124e-06, "loss": 0.2088, "step": 36070 }, { "epoch": 8.66, "learning_rate": 2.678828612578013e-06, "loss": 0.0828, "step": 36080 }, { "epoch": 8.66, "learning_rate": 2.6740278444551133e-06, "loss": 0.1979, "step": 36090 }, { "epoch": 8.67, "learning_rate": 2.6692270763322133e-06, "loss": 0.1058, "step": 36100 }, { "epoch": 8.67, "learning_rate": 2.6644263082093137e-06, "loss": 0.0778, "step": 36110 }, { "epoch": 8.67, "learning_rate": 2.659625540086414e-06, "loss": 0.04, "step": 36120 }, { "epoch": 8.67, "learning_rate": 2.6548247719635145e-06, "loss": 0.1341, "step": 36130 }, { "epoch": 8.67, "learning_rate": 2.650024003840615e-06, "loss": 0.0925, "step": 36140 }, { "epoch": 8.68, "learning_rate": 2.645223235717715e-06, "loss": 0.0177, "step": 36150 }, { "epoch": 8.68, "learning_rate": 2.6404224675948154e-06, "loss": 0.0232, "step": 36160 }, { "epoch": 8.68, "learning_rate": 2.6356216994719158e-06, "loss": 0.0384, "step": 36170 }, { "epoch": 8.68, "learning_rate": 2.630820931349016e-06, "loss": 0.2557, "step": 36180 }, { "epoch": 8.69, "learning_rate": 2.626020163226116e-06, "loss": 0.1308, "step": 36190 }, { "epoch": 8.69, "learning_rate": 2.6212193951032166e-06, "loss": 0.1193, "step": 36200 }, { "epoch": 8.69, "learning_rate": 2.616418626980317e-06, "loss": 0.1673, "step": 36210 }, { "epoch": 8.69, "learning_rate": 2.6116178588574175e-06, "loss": 0.0019, "step": 36220 }, { "epoch": 8.7, "learning_rate": 2.6068170907345175e-06, "loss": 0.1037, "step": 36230 }, { "epoch": 8.7, "learning_rate": 2.602016322611618e-06, "loss": 0.0414, "step": 36240 }, { "epoch": 8.7, "learning_rate": 2.5972155544887183e-06, "loss": 0.1626, "step": 36250 }, { "epoch": 8.7, "learning_rate": 2.5924147863658187e-06, "loss": 0.0854, "step": 36260 }, { "epoch": 8.71, "learning_rate": 2.5876140182429187e-06, "loss": 0.0936, "step": 36270 }, { "epoch": 8.71, "learning_rate": 2.582813250120019e-06, "loss": 0.1719, "step": 36280 }, { "epoch": 8.71, "learning_rate": 2.5780124819971196e-06, "loss": 0.1474, "step": 36290 }, { "epoch": 8.71, "learning_rate": 2.5732117138742204e-06, "loss": 0.1208, "step": 36300 }, { "epoch": 8.72, "learning_rate": 2.568410945751321e-06, "loss": 0.0029, "step": 36310 }, { "epoch": 8.72, "learning_rate": 2.5636101776284204e-06, "loss": 0.2022, "step": 36320 }, { "epoch": 8.72, "learning_rate": 2.5588094095055212e-06, "loss": 0.0562, "step": 36330 }, { "epoch": 8.72, "learning_rate": 2.5540086413826216e-06, "loss": 0.0899, "step": 36340 }, { "epoch": 8.73, "learning_rate": 2.549207873259722e-06, "loss": 0.2181, "step": 36350 }, { "epoch": 8.73, "learning_rate": 2.544407105136822e-06, "loss": 0.1464, "step": 36360 }, { "epoch": 8.73, "learning_rate": 2.5396063370139225e-06, "loss": 0.1775, "step": 36370 }, { "epoch": 8.73, "learning_rate": 2.534805568891023e-06, "loss": 0.1081, "step": 36380 }, { "epoch": 8.73, "learning_rate": 2.5300048007681233e-06, "loss": 0.2208, "step": 36390 }, { "epoch": 8.74, "learning_rate": 2.5252040326452233e-06, "loss": 0.1515, "step": 36400 }, { "epoch": 8.74, "learning_rate": 2.5204032645223237e-06, "loss": 0.1562, "step": 36410 }, { "epoch": 8.74, "learning_rate": 2.515602496399424e-06, "loss": 0.1043, "step": 36420 }, { "epoch": 8.74, "learning_rate": 2.5108017282765246e-06, "loss": 0.1249, "step": 36430 }, { "epoch": 8.75, "learning_rate": 2.506000960153625e-06, "loss": 0.135, "step": 36440 }, { "epoch": 8.75, "learning_rate": 2.501200192030725e-06, "loss": 0.0607, "step": 36450 }, { "epoch": 8.75, "learning_rate": 2.4963994239078254e-06, "loss": 0.0664, "step": 36460 }, { "epoch": 8.75, "learning_rate": 2.491598655784926e-06, "loss": 0.0984, "step": 36470 }, { "epoch": 8.76, "learning_rate": 2.4867978876620263e-06, "loss": 0.0656, "step": 36480 }, { "epoch": 8.76, "learning_rate": 2.4819971195391267e-06, "loss": 0.0594, "step": 36490 }, { "epoch": 8.76, "learning_rate": 2.4771963514162267e-06, "loss": 0.0654, "step": 36500 }, { "epoch": 8.76, "learning_rate": 2.472395583293327e-06, "loss": 0.0641, "step": 36510 }, { "epoch": 8.77, "learning_rate": 2.4675948151704275e-06, "loss": 0.0239, "step": 36520 }, { "epoch": 8.77, "learning_rate": 2.462794047047528e-06, "loss": 0.1542, "step": 36530 }, { "epoch": 8.77, "learning_rate": 2.457993278924628e-06, "loss": 0.0402, "step": 36540 }, { "epoch": 8.77, "learning_rate": 2.4531925108017283e-06, "loss": 0.214, "step": 36550 }, { "epoch": 8.78, "learning_rate": 2.4483917426788288e-06, "loss": 0.1662, "step": 36560 }, { "epoch": 8.78, "learning_rate": 2.443590974555929e-06, "loss": 0.0757, "step": 36570 }, { "epoch": 8.78, "learning_rate": 2.4387902064330296e-06, "loss": 0.0178, "step": 36580 }, { "epoch": 8.78, "learning_rate": 2.4339894383101296e-06, "loss": 0.1082, "step": 36590 }, { "epoch": 8.79, "learning_rate": 2.4291886701872304e-06, "loss": 0.0402, "step": 36600 }, { "epoch": 8.79, "learning_rate": 2.4243879020643304e-06, "loss": 0.1778, "step": 36610 }, { "epoch": 8.79, "learning_rate": 2.419587133941431e-06, "loss": 0.1125, "step": 36620 }, { "epoch": 8.79, "learning_rate": 2.4147863658185313e-06, "loss": 0.1207, "step": 36630 }, { "epoch": 8.8, "learning_rate": 2.4099855976956317e-06, "loss": 0.0123, "step": 36640 }, { "epoch": 8.8, "learning_rate": 2.4051848295727317e-06, "loss": 0.1551, "step": 36650 }, { "epoch": 8.8, "learning_rate": 2.400384061449832e-06, "loss": 0.0674, "step": 36660 }, { "epoch": 8.8, "learning_rate": 2.3955832933269325e-06, "loss": 0.1039, "step": 36670 }, { "epoch": 8.8, "learning_rate": 2.390782525204033e-06, "loss": 0.026, "step": 36680 }, { "epoch": 8.81, "learning_rate": 2.385981757081133e-06, "loss": 0.0884, "step": 36690 }, { "epoch": 8.81, "learning_rate": 2.3811809889582334e-06, "loss": 0.0832, "step": 36700 }, { "epoch": 8.81, "learning_rate": 2.3763802208353338e-06, "loss": 0.0427, "step": 36710 }, { "epoch": 8.81, "learning_rate": 2.371579452712434e-06, "loss": 0.0382, "step": 36720 }, { "epoch": 8.82, "learning_rate": 2.3667786845895346e-06, "loss": 0.0207, "step": 36730 }, { "epoch": 8.82, "learning_rate": 2.3619779164666346e-06, "loss": 0.2051, "step": 36740 }, { "epoch": 8.82, "learning_rate": 2.3571771483437355e-06, "loss": 0.1208, "step": 36750 }, { "epoch": 8.82, "learning_rate": 2.3523763802208355e-06, "loss": 0.0285, "step": 36760 }, { "epoch": 8.83, "learning_rate": 2.347575612097936e-06, "loss": 0.006, "step": 36770 }, { "epoch": 8.83, "learning_rate": 2.3427748439750363e-06, "loss": 0.202, "step": 36780 }, { "epoch": 8.83, "learning_rate": 2.3379740758521367e-06, "loss": 0.2012, "step": 36790 }, { "epoch": 8.83, "learning_rate": 2.3331733077292367e-06, "loss": 0.1408, "step": 36800 }, { "epoch": 8.84, "learning_rate": 2.328372539606337e-06, "loss": 0.0723, "step": 36810 }, { "epoch": 8.84, "learning_rate": 2.3235717714834376e-06, "loss": 0.123, "step": 36820 }, { "epoch": 8.84, "learning_rate": 2.318771003360538e-06, "loss": 0.1602, "step": 36830 }, { "epoch": 8.84, "learning_rate": 2.313970235237638e-06, "loss": 0.1734, "step": 36840 }, { "epoch": 8.85, "learning_rate": 2.3091694671147384e-06, "loss": 0.1012, "step": 36850 }, { "epoch": 8.85, "learning_rate": 2.304368698991839e-06, "loss": 0.1971, "step": 36860 }, { "epoch": 8.85, "learning_rate": 2.2995679308689392e-06, "loss": 0.1942, "step": 36870 }, { "epoch": 8.85, "learning_rate": 2.2947671627460396e-06, "loss": 0.0578, "step": 36880 }, { "epoch": 8.86, "learning_rate": 2.2899663946231396e-06, "loss": 0.176, "step": 36890 }, { "epoch": 8.86, "learning_rate": 2.2851656265002405e-06, "loss": 0.1863, "step": 36900 }, { "epoch": 8.86, "learning_rate": 2.2803648583773405e-06, "loss": 0.0396, "step": 36910 }, { "epoch": 8.86, "learning_rate": 2.275564090254441e-06, "loss": 0.2159, "step": 36920 }, { "epoch": 8.86, "learning_rate": 2.2707633221315413e-06, "loss": 0.0832, "step": 36930 }, { "epoch": 8.87, "learning_rate": 2.2659625540086417e-06, "loss": 0.2012, "step": 36940 }, { "epoch": 8.87, "learning_rate": 2.2611617858857417e-06, "loss": 0.2133, "step": 36950 }, { "epoch": 8.87, "learning_rate": 2.256361017762842e-06, "loss": 0.1355, "step": 36960 }, { "epoch": 8.87, "learning_rate": 2.2515602496399426e-06, "loss": 0.0235, "step": 36970 }, { "epoch": 8.88, "learning_rate": 2.246759481517043e-06, "loss": 0.0979, "step": 36980 }, { "epoch": 8.88, "learning_rate": 2.241958713394143e-06, "loss": 0.0873, "step": 36990 }, { "epoch": 8.88, "learning_rate": 2.2371579452712434e-06, "loss": 0.0015, "step": 37000 }, { "epoch": 8.88, "learning_rate": 2.232357177148344e-06, "loss": 0.2196, "step": 37010 }, { "epoch": 8.89, "learning_rate": 2.2275564090254443e-06, "loss": 0.1781, "step": 37020 }, { "epoch": 8.89, "learning_rate": 2.2227556409025447e-06, "loss": 0.1108, "step": 37030 }, { "epoch": 8.89, "learning_rate": 2.217954872779645e-06, "loss": 0.0359, "step": 37040 }, { "epoch": 8.89, "learning_rate": 2.2131541046567455e-06, "loss": 0.0443, "step": 37050 }, { "epoch": 8.9, "learning_rate": 2.2083533365338455e-06, "loss": 0.0613, "step": 37060 }, { "epoch": 8.9, "learning_rate": 2.203552568410946e-06, "loss": 0.057, "step": 37070 }, { "epoch": 8.9, "learning_rate": 2.1987518002880463e-06, "loss": 0.0378, "step": 37080 }, { "epoch": 8.9, "learning_rate": 2.1939510321651468e-06, "loss": 0.1198, "step": 37090 }, { "epoch": 8.91, "learning_rate": 2.1891502640422468e-06, "loss": 0.232, "step": 37100 }, { "epoch": 8.91, "learning_rate": 2.184349495919347e-06, "loss": 0.1237, "step": 37110 }, { "epoch": 8.91, "learning_rate": 2.1795487277964476e-06, "loss": 0.1076, "step": 37120 }, { "epoch": 8.91, "learning_rate": 2.174747959673548e-06, "loss": 0.101, "step": 37130 }, { "epoch": 8.92, "learning_rate": 2.169947191550648e-06, "loss": 0.1641, "step": 37140 }, { "epoch": 8.92, "learning_rate": 2.1651464234277484e-06, "loss": 0.2298, "step": 37150 }, { "epoch": 8.92, "learning_rate": 2.1603456553048493e-06, "loss": 0.2283, "step": 37160 }, { "epoch": 8.92, "learning_rate": 2.1555448871819493e-06, "loss": 0.002, "step": 37170 }, { "epoch": 8.92, "learning_rate": 2.1507441190590497e-06, "loss": 0.2284, "step": 37180 }, { "epoch": 8.93, "learning_rate": 2.14594335093615e-06, "loss": 0.0436, "step": 37190 }, { "epoch": 8.93, "learning_rate": 2.1411425828132505e-06, "loss": 0.028, "step": 37200 }, { "epoch": 8.93, "learning_rate": 2.1363418146903505e-06, "loss": 0.015, "step": 37210 }, { "epoch": 8.93, "learning_rate": 2.131541046567451e-06, "loss": 0.1213, "step": 37220 }, { "epoch": 8.94, "learning_rate": 2.1267402784445514e-06, "loss": 0.2661, "step": 37230 }, { "epoch": 8.94, "learning_rate": 2.121939510321652e-06, "loss": 0.1677, "step": 37240 }, { "epoch": 8.94, "learning_rate": 2.1171387421987518e-06, "loss": 0.0819, "step": 37250 }, { "epoch": 8.94, "learning_rate": 2.112337974075852e-06, "loss": 0.08, "step": 37260 }, { "epoch": 8.95, "learning_rate": 2.1075372059529526e-06, "loss": 0.1198, "step": 37270 }, { "epoch": 8.95, "learning_rate": 2.102736437830053e-06, "loss": 0.0467, "step": 37280 }, { "epoch": 8.95, "learning_rate": 2.097935669707153e-06, "loss": 0.0958, "step": 37290 }, { "epoch": 8.95, "learning_rate": 2.0931349015842535e-06, "loss": 0.1183, "step": 37300 }, { "epoch": 8.96, "learning_rate": 2.0883341334613543e-06, "loss": 0.1418, "step": 37310 }, { "epoch": 8.96, "learning_rate": 2.0835333653384543e-06, "loss": 0.1526, "step": 37320 }, { "epoch": 8.96, "learning_rate": 2.0787325972155547e-06, "loss": 0.143, "step": 37330 }, { "epoch": 8.96, "learning_rate": 2.073931829092655e-06, "loss": 0.0534, "step": 37340 }, { "epoch": 8.97, "learning_rate": 2.0691310609697556e-06, "loss": 0.1059, "step": 37350 }, { "epoch": 8.97, "learning_rate": 2.0643302928468556e-06, "loss": 0.0761, "step": 37360 }, { "epoch": 8.97, "learning_rate": 2.059529524723956e-06, "loss": 0.0224, "step": 37370 }, { "epoch": 8.97, "learning_rate": 2.0547287566010564e-06, "loss": 0.0215, "step": 37380 }, { "epoch": 8.98, "learning_rate": 2.049927988478157e-06, "loss": 0.2208, "step": 37390 }, { "epoch": 8.98, "learning_rate": 2.045127220355257e-06, "loss": 0.1801, "step": 37400 }, { "epoch": 8.98, "learning_rate": 2.0403264522323572e-06, "loss": 0.1677, "step": 37410 }, { "epoch": 8.98, "learning_rate": 2.0355256841094577e-06, "loss": 0.1413, "step": 37420 }, { "epoch": 8.98, "learning_rate": 2.030724915986558e-06, "loss": 0.1408, "step": 37430 }, { "epoch": 8.99, "learning_rate": 2.0259241478636585e-06, "loss": 0.0303, "step": 37440 }, { "epoch": 8.99, "learning_rate": 2.0211233797407585e-06, "loss": 0.1131, "step": 37450 }, { "epoch": 8.99, "learning_rate": 2.0163226116178593e-06, "loss": 0.1378, "step": 37460 }, { "epoch": 8.99, "learning_rate": 2.0115218434949593e-06, "loss": 0.0746, "step": 37470 }, { "epoch": 9.0, "learning_rate": 2.0067210753720597e-06, "loss": 0.1386, "step": 37480 }, { "epoch": 9.0, "learning_rate": 2.00192030724916e-06, "loss": 0.1746, "step": 37490 }, { "epoch": 9.0, "eval_accuracy": 0.9993199591975519, "eval_loss": 0.0033987753558903933, "eval_runtime": 513.2336, "eval_samples_per_second": 11.461, "eval_steps_per_second": 1.434, "step": 37494 }, { "epoch": 9.0, "learning_rate": 1.9971195391262606e-06, "loss": 0.1878, "step": 37500 }, { "epoch": 9.0, "learning_rate": 1.9923187710033606e-06, "loss": 0.1701, "step": 37510 }, { "epoch": 9.01, "learning_rate": 1.987518002880461e-06, "loss": 0.206, "step": 37520 }, { "epoch": 9.01, "learning_rate": 1.9827172347575614e-06, "loss": 0.0521, "step": 37530 }, { "epoch": 9.01, "learning_rate": 1.977916466634662e-06, "loss": 0.0378, "step": 37540 }, { "epoch": 9.01, "learning_rate": 1.973115698511762e-06, "loss": 0.1435, "step": 37550 }, { "epoch": 9.02, "learning_rate": 1.9683149303888623e-06, "loss": 0.0828, "step": 37560 }, { "epoch": 9.02, "learning_rate": 1.9635141622659627e-06, "loss": 0.1406, "step": 37570 }, { "epoch": 9.02, "learning_rate": 1.958713394143063e-06, "loss": 0.1259, "step": 37580 }, { "epoch": 9.02, "learning_rate": 1.9539126260201635e-06, "loss": 0.1752, "step": 37590 }, { "epoch": 9.03, "learning_rate": 1.9491118578972635e-06, "loss": 0.1011, "step": 37600 }, { "epoch": 9.03, "learning_rate": 1.9443110897743644e-06, "loss": 0.0785, "step": 37610 }, { "epoch": 9.03, "learning_rate": 1.9395103216514643e-06, "loss": 0.1497, "step": 37620 }, { "epoch": 9.03, "learning_rate": 1.9347095535285648e-06, "loss": 0.2559, "step": 37630 }, { "epoch": 9.04, "learning_rate": 1.929908785405665e-06, "loss": 0.0948, "step": 37640 }, { "epoch": 9.04, "learning_rate": 1.9251080172827656e-06, "loss": 0.2756, "step": 37650 }, { "epoch": 9.04, "learning_rate": 1.9203072491598656e-06, "loss": 0.2295, "step": 37660 }, { "epoch": 9.04, "learning_rate": 1.915506481036966e-06, "loss": 0.1007, "step": 37670 }, { "epoch": 9.04, "learning_rate": 1.9107057129140664e-06, "loss": 0.1474, "step": 37680 }, { "epoch": 9.05, "learning_rate": 1.9059049447911669e-06, "loss": 0.2653, "step": 37690 }, { "epoch": 9.05, "learning_rate": 1.9011041766682669e-06, "loss": 0.1391, "step": 37700 }, { "epoch": 9.05, "learning_rate": 1.8963034085453675e-06, "loss": 0.0741, "step": 37710 }, { "epoch": 9.05, "learning_rate": 1.8915026404224677e-06, "loss": 0.0096, "step": 37720 }, { "epoch": 9.06, "learning_rate": 1.8867018722995681e-06, "loss": 0.035, "step": 37730 }, { "epoch": 9.06, "learning_rate": 1.8819011041766685e-06, "loss": 0.055, "step": 37740 }, { "epoch": 9.06, "learning_rate": 1.8771003360537687e-06, "loss": 0.2031, "step": 37750 }, { "epoch": 9.06, "learning_rate": 1.8722995679308692e-06, "loss": 0.0956, "step": 37760 }, { "epoch": 9.07, "learning_rate": 1.8674987998079694e-06, "loss": 0.1047, "step": 37770 }, { "epoch": 9.07, "learning_rate": 1.8626980316850698e-06, "loss": 0.02, "step": 37780 }, { "epoch": 9.07, "learning_rate": 1.85789726356217e-06, "loss": 0.0987, "step": 37790 }, { "epoch": 9.07, "learning_rate": 1.8530964954392704e-06, "loss": 0.0381, "step": 37800 }, { "epoch": 9.08, "learning_rate": 1.8482957273163706e-06, "loss": 0.125, "step": 37810 }, { "epoch": 9.08, "learning_rate": 1.8434949591934713e-06, "loss": 0.1215, "step": 37820 }, { "epoch": 9.08, "learning_rate": 1.8386941910705713e-06, "loss": 0.1177, "step": 37830 }, { "epoch": 9.08, "learning_rate": 1.8338934229476719e-06, "loss": 0.1425, "step": 37840 }, { "epoch": 9.09, "learning_rate": 1.829092654824772e-06, "loss": 0.1777, "step": 37850 }, { "epoch": 9.09, "learning_rate": 1.8242918867018725e-06, "loss": 0.0397, "step": 37860 }, { "epoch": 9.09, "learning_rate": 1.8194911185789727e-06, "loss": 0.0324, "step": 37870 }, { "epoch": 9.09, "learning_rate": 1.8146903504560731e-06, "loss": 0.1581, "step": 37880 }, { "epoch": 9.1, "learning_rate": 1.8098895823331736e-06, "loss": 0.1535, "step": 37890 }, { "epoch": 9.1, "learning_rate": 1.8050888142102738e-06, "loss": 0.182, "step": 37900 }, { "epoch": 9.1, "learning_rate": 1.8002880460873742e-06, "loss": 0.1094, "step": 37910 }, { "epoch": 9.1, "learning_rate": 1.7954872779644744e-06, "loss": 0.092, "step": 37920 }, { "epoch": 9.1, "learning_rate": 1.7906865098415748e-06, "loss": 0.195, "step": 37930 }, { "epoch": 9.11, "learning_rate": 1.785885741718675e-06, "loss": 0.0445, "step": 37940 }, { "epoch": 9.11, "learning_rate": 1.7810849735957754e-06, "loss": 0.0243, "step": 37950 }, { "epoch": 9.11, "learning_rate": 1.7762842054728757e-06, "loss": 0.1486, "step": 37960 }, { "epoch": 9.11, "learning_rate": 1.7714834373499763e-06, "loss": 0.0886, "step": 37970 }, { "epoch": 9.12, "learning_rate": 1.7666826692270763e-06, "loss": 0.1356, "step": 37980 }, { "epoch": 9.12, "learning_rate": 1.761881901104177e-06, "loss": 0.0991, "step": 37990 }, { "epoch": 9.12, "learning_rate": 1.7570811329812771e-06, "loss": 0.2388, "step": 38000 }, { "epoch": 9.12, "learning_rate": 1.7522803648583775e-06, "loss": 0.0346, "step": 38010 }, { "epoch": 9.13, "learning_rate": 1.747479596735478e-06, "loss": 0.231, "step": 38020 }, { "epoch": 9.13, "learning_rate": 1.7426788286125782e-06, "loss": 0.0353, "step": 38030 }, { "epoch": 9.13, "learning_rate": 1.7378780604896786e-06, "loss": 0.1968, "step": 38040 }, { "epoch": 9.13, "learning_rate": 1.7330772923667788e-06, "loss": 0.0631, "step": 38050 }, { "epoch": 9.14, "learning_rate": 1.7282765242438792e-06, "loss": 0.0521, "step": 38060 }, { "epoch": 9.14, "learning_rate": 1.7234757561209794e-06, "loss": 0.145, "step": 38070 }, { "epoch": 9.14, "learning_rate": 1.7186749879980798e-06, "loss": 0.1857, "step": 38080 }, { "epoch": 9.14, "learning_rate": 1.71387421987518e-06, "loss": 0.0272, "step": 38090 }, { "epoch": 9.15, "learning_rate": 1.7090734517522805e-06, "loss": 0.1056, "step": 38100 }, { "epoch": 9.15, "learning_rate": 1.7042726836293807e-06, "loss": 0.0739, "step": 38110 }, { "epoch": 9.15, "learning_rate": 1.6994719155064813e-06, "loss": 0.1712, "step": 38120 }, { "epoch": 9.15, "learning_rate": 1.6946711473835813e-06, "loss": 0.1357, "step": 38130 }, { "epoch": 9.16, "learning_rate": 1.689870379260682e-06, "loss": 0.2179, "step": 38140 }, { "epoch": 9.16, "learning_rate": 1.6850696111377821e-06, "loss": 0.1611, "step": 38150 }, { "epoch": 9.16, "learning_rate": 1.6802688430148826e-06, "loss": 0.1862, "step": 38160 }, { "epoch": 9.16, "learning_rate": 1.675468074891983e-06, "loss": 0.0997, "step": 38170 }, { "epoch": 9.16, "learning_rate": 1.6706673067690832e-06, "loss": 0.1874, "step": 38180 }, { "epoch": 9.17, "learning_rate": 1.6658665386461836e-06, "loss": 0.0434, "step": 38190 }, { "epoch": 9.17, "learning_rate": 1.6610657705232838e-06, "loss": 0.0266, "step": 38200 }, { "epoch": 9.17, "learning_rate": 1.6562650024003842e-06, "loss": 0.239, "step": 38210 }, { "epoch": 9.17, "learning_rate": 1.6514642342774844e-06, "loss": 0.1163, "step": 38220 }, { "epoch": 9.18, "learning_rate": 1.6466634661545849e-06, "loss": 0.3316, "step": 38230 }, { "epoch": 9.18, "learning_rate": 1.641862698031685e-06, "loss": 0.0933, "step": 38240 }, { "epoch": 9.18, "learning_rate": 1.6370619299087855e-06, "loss": 0.2108, "step": 38250 }, { "epoch": 9.18, "learning_rate": 1.6322611617858857e-06, "loss": 0.0327, "step": 38260 }, { "epoch": 9.19, "learning_rate": 1.6274603936629863e-06, "loss": 0.0451, "step": 38270 }, { "epoch": 9.19, "learning_rate": 1.6226596255400865e-06, "loss": 0.0216, "step": 38280 }, { "epoch": 9.19, "learning_rate": 1.617858857417187e-06, "loss": 0.001, "step": 38290 }, { "epoch": 9.19, "learning_rate": 1.6130580892942872e-06, "loss": 0.0998, "step": 38300 }, { "epoch": 9.2, "learning_rate": 1.6082573211713876e-06, "loss": 0.1583, "step": 38310 }, { "epoch": 9.2, "learning_rate": 1.603456553048488e-06, "loss": 0.0751, "step": 38320 }, { "epoch": 9.2, "learning_rate": 1.5986557849255882e-06, "loss": 0.017, "step": 38330 }, { "epoch": 9.2, "learning_rate": 1.5938550168026886e-06, "loss": 0.0585, "step": 38340 }, { "epoch": 9.21, "learning_rate": 1.5890542486797888e-06, "loss": 0.009, "step": 38350 }, { "epoch": 9.21, "learning_rate": 1.5842534805568893e-06, "loss": 0.0885, "step": 38360 }, { "epoch": 9.21, "learning_rate": 1.5794527124339895e-06, "loss": 0.0495, "step": 38370 }, { "epoch": 9.21, "learning_rate": 1.5746519443110899e-06, "loss": 0.0419, "step": 38380 }, { "epoch": 9.22, "learning_rate": 1.56985117618819e-06, "loss": 0.0551, "step": 38390 }, { "epoch": 9.22, "learning_rate": 1.5650504080652907e-06, "loss": 0.0302, "step": 38400 }, { "epoch": 9.22, "learning_rate": 1.5602496399423907e-06, "loss": 0.1685, "step": 38410 }, { "epoch": 9.22, "learning_rate": 1.5554488718194914e-06, "loss": 0.0711, "step": 38420 }, { "epoch": 9.22, "learning_rate": 1.5506481036965916e-06, "loss": 0.0825, "step": 38430 }, { "epoch": 9.23, "learning_rate": 1.545847335573692e-06, "loss": 0.1307, "step": 38440 }, { "epoch": 9.23, "learning_rate": 1.5410465674507922e-06, "loss": 0.0563, "step": 38450 }, { "epoch": 9.23, "learning_rate": 1.5362457993278926e-06, "loss": 0.1944, "step": 38460 }, { "epoch": 9.23, "learning_rate": 1.531445031204993e-06, "loss": 0.112, "step": 38470 }, { "epoch": 9.24, "learning_rate": 1.5266442630820932e-06, "loss": 0.1698, "step": 38480 }, { "epoch": 9.24, "learning_rate": 1.5218434949591937e-06, "loss": 0.1531, "step": 38490 }, { "epoch": 9.24, "learning_rate": 1.5170427268362939e-06, "loss": 0.0607, "step": 38500 }, { "epoch": 9.24, "learning_rate": 1.5122419587133943e-06, "loss": 0.1413, "step": 38510 }, { "epoch": 9.25, "learning_rate": 1.5074411905904945e-06, "loss": 0.0615, "step": 38520 }, { "epoch": 9.25, "learning_rate": 1.502640422467595e-06, "loss": 0.0759, "step": 38530 }, { "epoch": 9.25, "learning_rate": 1.4978396543446951e-06, "loss": 0.2142, "step": 38540 }, { "epoch": 9.25, "learning_rate": 1.4930388862217958e-06, "loss": 0.0335, "step": 38550 }, { "epoch": 9.26, "learning_rate": 1.4882381180988957e-06, "loss": 0.1903, "step": 38560 }, { "epoch": 9.26, "learning_rate": 1.4834373499759964e-06, "loss": 0.0648, "step": 38570 }, { "epoch": 9.26, "learning_rate": 1.4786365818530966e-06, "loss": 0.1101, "step": 38580 }, { "epoch": 9.26, "learning_rate": 1.473835813730197e-06, "loss": 0.1405, "step": 38590 }, { "epoch": 9.27, "learning_rate": 1.4690350456072972e-06, "loss": 0.1506, "step": 38600 }, { "epoch": 9.27, "learning_rate": 1.4642342774843976e-06, "loss": 0.2052, "step": 38610 }, { "epoch": 9.27, "learning_rate": 1.459433509361498e-06, "loss": 0.104, "step": 38620 }, { "epoch": 9.27, "learning_rate": 1.4546327412385983e-06, "loss": 0.1255, "step": 38630 }, { "epoch": 9.28, "learning_rate": 1.4498319731156987e-06, "loss": 0.0829, "step": 38640 }, { "epoch": 9.28, "learning_rate": 1.4450312049927989e-06, "loss": 0.1473, "step": 38650 }, { "epoch": 9.28, "learning_rate": 1.4402304368698993e-06, "loss": 0.0626, "step": 38660 }, { "epoch": 9.28, "learning_rate": 1.4354296687469995e-06, "loss": 0.0857, "step": 38670 }, { "epoch": 9.28, "learning_rate": 1.4306289006241e-06, "loss": 0.0173, "step": 38680 }, { "epoch": 9.29, "learning_rate": 1.4258281325012001e-06, "loss": 0.153, "step": 38690 }, { "epoch": 9.29, "learning_rate": 1.4210273643783008e-06, "loss": 0.1947, "step": 38700 }, { "epoch": 9.29, "learning_rate": 1.4162265962554008e-06, "loss": 0.1525, "step": 38710 }, { "epoch": 9.29, "learning_rate": 1.4114258281325014e-06, "loss": 0.2249, "step": 38720 }, { "epoch": 9.3, "learning_rate": 1.4066250600096016e-06, "loss": 0.0994, "step": 38730 }, { "epoch": 9.3, "learning_rate": 1.401824291886702e-06, "loss": 0.1172, "step": 38740 }, { "epoch": 9.3, "learning_rate": 1.3970235237638025e-06, "loss": 0.0375, "step": 38750 }, { "epoch": 9.3, "learning_rate": 1.3922227556409027e-06, "loss": 0.3177, "step": 38760 }, { "epoch": 9.31, "learning_rate": 1.387421987518003e-06, "loss": 0.1305, "step": 38770 }, { "epoch": 9.31, "learning_rate": 1.3826212193951033e-06, "loss": 0.077, "step": 38780 }, { "epoch": 9.31, "learning_rate": 1.3778204512722037e-06, "loss": 0.1399, "step": 38790 }, { "epoch": 9.31, "learning_rate": 1.373019683149304e-06, "loss": 0.0777, "step": 38800 }, { "epoch": 9.32, "learning_rate": 1.3682189150264043e-06, "loss": 0.2221, "step": 38810 }, { "epoch": 9.32, "learning_rate": 1.3634181469035045e-06, "loss": 0.1954, "step": 38820 }, { "epoch": 9.32, "learning_rate": 1.3586173787806052e-06, "loss": 0.0751, "step": 38830 }, { "epoch": 9.32, "learning_rate": 1.3538166106577052e-06, "loss": 0.2409, "step": 38840 }, { "epoch": 9.33, "learning_rate": 1.3490158425348058e-06, "loss": 0.1612, "step": 38850 }, { "epoch": 9.33, "learning_rate": 1.344215074411906e-06, "loss": 0.0215, "step": 38860 }, { "epoch": 9.33, "learning_rate": 1.3394143062890064e-06, "loss": 0.1091, "step": 38870 }, { "epoch": 9.33, "learning_rate": 1.3346135381661066e-06, "loss": 0.047, "step": 38880 }, { "epoch": 9.34, "learning_rate": 1.329812770043207e-06, "loss": 0.083, "step": 38890 }, { "epoch": 9.34, "learning_rate": 1.3250120019203075e-06, "loss": 0.1649, "step": 38900 }, { "epoch": 9.34, "learning_rate": 1.3202112337974077e-06, "loss": 0.0897, "step": 38910 }, { "epoch": 9.34, "learning_rate": 1.315410465674508e-06, "loss": 0.1613, "step": 38920 }, { "epoch": 9.34, "learning_rate": 1.3106096975516083e-06, "loss": 0.1087, "step": 38930 }, { "epoch": 9.35, "learning_rate": 1.3058089294287087e-06, "loss": 0.1112, "step": 38940 }, { "epoch": 9.35, "learning_rate": 1.301008161305809e-06, "loss": 0.1944, "step": 38950 }, { "epoch": 9.35, "learning_rate": 1.2962073931829094e-06, "loss": 0.1083, "step": 38960 }, { "epoch": 9.35, "learning_rate": 1.2914066250600096e-06, "loss": 0.1031, "step": 38970 }, { "epoch": 9.36, "learning_rate": 1.2866058569371102e-06, "loss": 0.2249, "step": 38980 }, { "epoch": 9.36, "learning_rate": 1.2818050888142102e-06, "loss": 0.0762, "step": 38990 }, { "epoch": 9.36, "learning_rate": 1.2770043206913108e-06, "loss": 0.0738, "step": 39000 }, { "epoch": 9.36, "learning_rate": 1.272203552568411e-06, "loss": 0.1203, "step": 39010 }, { "epoch": 9.37, "learning_rate": 1.2674027844455115e-06, "loss": 0.0748, "step": 39020 }, { "epoch": 9.37, "learning_rate": 1.2626020163226117e-06, "loss": 0.2156, "step": 39030 }, { "epoch": 9.37, "learning_rate": 1.257801248199712e-06, "loss": 0.0364, "step": 39040 }, { "epoch": 9.37, "learning_rate": 1.2530004800768125e-06, "loss": 0.0599, "step": 39050 }, { "epoch": 9.38, "learning_rate": 1.2481997119539127e-06, "loss": 0.1296, "step": 39060 }, { "epoch": 9.38, "learning_rate": 1.2433989438310131e-06, "loss": 0.1681, "step": 39070 }, { "epoch": 9.38, "learning_rate": 1.2385981757081133e-06, "loss": 0.0017, "step": 39080 }, { "epoch": 9.38, "learning_rate": 1.2337974075852138e-06, "loss": 0.0728, "step": 39090 }, { "epoch": 9.39, "learning_rate": 1.228996639462314e-06, "loss": 0.107, "step": 39100 }, { "epoch": 9.39, "learning_rate": 1.2241958713394144e-06, "loss": 0.0685, "step": 39110 }, { "epoch": 9.39, "learning_rate": 1.2193951032165148e-06, "loss": 0.0478, "step": 39120 }, { "epoch": 9.39, "learning_rate": 1.2145943350936152e-06, "loss": 0.027, "step": 39130 }, { "epoch": 9.4, "learning_rate": 1.2097935669707154e-06, "loss": 0.1607, "step": 39140 }, { "epoch": 9.4, "learning_rate": 1.2049927988478158e-06, "loss": 0.0729, "step": 39150 }, { "epoch": 9.4, "learning_rate": 1.200192030724916e-06, "loss": 0.1213, "step": 39160 }, { "epoch": 9.4, "learning_rate": 1.1953912626020165e-06, "loss": 0.0903, "step": 39170 }, { "epoch": 9.4, "learning_rate": 1.1905904944791167e-06, "loss": 0.1131, "step": 39180 }, { "epoch": 9.41, "learning_rate": 1.185789726356217e-06, "loss": 0.072, "step": 39190 }, { "epoch": 9.41, "learning_rate": 1.1809889582333173e-06, "loss": 0.1336, "step": 39200 }, { "epoch": 9.41, "learning_rate": 1.1761881901104177e-06, "loss": 0.1518, "step": 39210 }, { "epoch": 9.41, "learning_rate": 1.1713874219875182e-06, "loss": 0.183, "step": 39220 }, { "epoch": 9.42, "learning_rate": 1.1665866538646184e-06, "loss": 0.0555, "step": 39230 }, { "epoch": 9.42, "learning_rate": 1.1617858857417188e-06, "loss": 0.1397, "step": 39240 }, { "epoch": 9.42, "learning_rate": 1.156985117618819e-06, "loss": 0.1307, "step": 39250 }, { "epoch": 9.42, "learning_rate": 1.1521843494959194e-06, "loss": 0.1193, "step": 39260 }, { "epoch": 9.43, "learning_rate": 1.1473835813730198e-06, "loss": 0.096, "step": 39270 }, { "epoch": 9.43, "learning_rate": 1.1425828132501202e-06, "loss": 0.0553, "step": 39280 }, { "epoch": 9.43, "learning_rate": 1.1377820451272205e-06, "loss": 0.0361, "step": 39290 }, { "epoch": 9.43, "learning_rate": 1.1329812770043209e-06, "loss": 0.175, "step": 39300 }, { "epoch": 9.44, "learning_rate": 1.128180508881421e-06, "loss": 0.114, "step": 39310 }, { "epoch": 9.44, "learning_rate": 1.1233797407585215e-06, "loss": 0.0598, "step": 39320 }, { "epoch": 9.44, "learning_rate": 1.1185789726356217e-06, "loss": 0.1577, "step": 39330 }, { "epoch": 9.44, "learning_rate": 1.1137782045127221e-06, "loss": 0.0409, "step": 39340 }, { "epoch": 9.45, "learning_rate": 1.1089774363898225e-06, "loss": 0.1862, "step": 39350 }, { "epoch": 9.45, "learning_rate": 1.1041766682669228e-06, "loss": 0.0979, "step": 39360 }, { "epoch": 9.45, "learning_rate": 1.0993759001440232e-06, "loss": 0.074, "step": 39370 }, { "epoch": 9.45, "learning_rate": 1.0945751320211234e-06, "loss": 0.1331, "step": 39380 }, { "epoch": 9.46, "learning_rate": 1.0897743638982238e-06, "loss": 0.1577, "step": 39390 }, { "epoch": 9.46, "learning_rate": 1.084973595775324e-06, "loss": 0.2741, "step": 39400 }, { "epoch": 9.46, "learning_rate": 1.0801728276524246e-06, "loss": 0.0864, "step": 39410 }, { "epoch": 9.46, "learning_rate": 1.0753720595295248e-06, "loss": 0.077, "step": 39420 }, { "epoch": 9.46, "learning_rate": 1.0705712914066253e-06, "loss": 0.0169, "step": 39430 }, { "epoch": 9.47, "learning_rate": 1.0657705232837255e-06, "loss": 0.2377, "step": 39440 }, { "epoch": 9.47, "learning_rate": 1.060969755160826e-06, "loss": 0.1072, "step": 39450 }, { "epoch": 9.47, "learning_rate": 1.056168987037926e-06, "loss": 0.1515, "step": 39460 }, { "epoch": 9.47, "learning_rate": 1.0513682189150265e-06, "loss": 0.2445, "step": 39470 }, { "epoch": 9.48, "learning_rate": 1.0465674507921267e-06, "loss": 0.2286, "step": 39480 }, { "epoch": 9.48, "learning_rate": 1.0417666826692272e-06, "loss": 0.0924, "step": 39490 }, { "epoch": 9.48, "learning_rate": 1.0369659145463276e-06, "loss": 0.2416, "step": 39500 }, { "epoch": 9.48, "learning_rate": 1.0321651464234278e-06, "loss": 0.0229, "step": 39510 }, { "epoch": 9.49, "learning_rate": 1.0273643783005282e-06, "loss": 0.1693, "step": 39520 }, { "epoch": 9.49, "learning_rate": 1.0225636101776284e-06, "loss": 0.1761, "step": 39530 }, { "epoch": 9.49, "learning_rate": 1.0177628420547288e-06, "loss": 0.0647, "step": 39540 }, { "epoch": 9.49, "learning_rate": 1.0129620739318292e-06, "loss": 0.113, "step": 39550 }, { "epoch": 9.5, "learning_rate": 1.0081613058089297e-06, "loss": 0.1588, "step": 39560 }, { "epoch": 9.5, "learning_rate": 1.0033605376860299e-06, "loss": 0.1326, "step": 39570 }, { "epoch": 9.5, "learning_rate": 9.985597695631303e-07, "loss": 0.137, "step": 39580 }, { "epoch": 9.5, "learning_rate": 9.937590014402305e-07, "loss": 0.0122, "step": 39590 }, { "epoch": 9.51, "learning_rate": 9.88958233317331e-07, "loss": 0.1263, "step": 39600 }, { "epoch": 9.51, "learning_rate": 9.841574651944311e-07, "loss": 0.0744, "step": 39610 }, { "epoch": 9.51, "learning_rate": 9.793566970715315e-07, "loss": 0.0664, "step": 39620 }, { "epoch": 9.51, "learning_rate": 9.745559289486318e-07, "loss": 0.1275, "step": 39630 }, { "epoch": 9.52, "learning_rate": 9.697551608257322e-07, "loss": 0.1238, "step": 39640 }, { "epoch": 9.52, "learning_rate": 9.649543927028326e-07, "loss": 0.0931, "step": 39650 }, { "epoch": 9.52, "learning_rate": 9.601536245799328e-07, "loss": 0.2066, "step": 39660 }, { "epoch": 9.52, "learning_rate": 9.553528564570332e-07, "loss": 0.1512, "step": 39670 }, { "epoch": 9.52, "learning_rate": 9.505520883341334e-07, "loss": 0.1915, "step": 39680 }, { "epoch": 9.53, "learning_rate": 9.457513202112338e-07, "loss": 0.1372, "step": 39690 }, { "epoch": 9.53, "learning_rate": 9.409505520883343e-07, "loss": 0.1282, "step": 39700 }, { "epoch": 9.53, "learning_rate": 9.361497839654346e-07, "loss": 0.12, "step": 39710 }, { "epoch": 9.53, "learning_rate": 9.313490158425349e-07, "loss": 0.0362, "step": 39720 }, { "epoch": 9.54, "learning_rate": 9.265482477196352e-07, "loss": 0.1238, "step": 39730 }, { "epoch": 9.54, "learning_rate": 9.217474795967356e-07, "loss": 0.2101, "step": 39740 }, { "epoch": 9.54, "learning_rate": 9.169467114738359e-07, "loss": 0.1995, "step": 39750 }, { "epoch": 9.54, "learning_rate": 9.121459433509363e-07, "loss": 0.1221, "step": 39760 }, { "epoch": 9.55, "learning_rate": 9.073451752280366e-07, "loss": 0.1389, "step": 39770 }, { "epoch": 9.55, "learning_rate": 9.025444071051369e-07, "loss": 0.1688, "step": 39780 }, { "epoch": 9.55, "learning_rate": 8.977436389822372e-07, "loss": 0.09, "step": 39790 }, { "epoch": 9.55, "learning_rate": 8.929428708593375e-07, "loss": 0.1276, "step": 39800 }, { "epoch": 9.56, "learning_rate": 8.881421027364378e-07, "loss": 0.1869, "step": 39810 }, { "epoch": 9.56, "learning_rate": 8.833413346135381e-07, "loss": 0.2138, "step": 39820 }, { "epoch": 9.56, "learning_rate": 8.785405664906386e-07, "loss": 0.1163, "step": 39830 }, { "epoch": 9.56, "learning_rate": 8.73739798367739e-07, "loss": 0.0455, "step": 39840 }, { "epoch": 9.57, "learning_rate": 8.689390302448393e-07, "loss": 0.0664, "step": 39850 }, { "epoch": 9.57, "learning_rate": 8.641382621219396e-07, "loss": 0.0044, "step": 39860 }, { "epoch": 9.57, "learning_rate": 8.593374939990399e-07, "loss": 0.0218, "step": 39870 }, { "epoch": 9.57, "learning_rate": 8.545367258761402e-07, "loss": 0.0642, "step": 39880 }, { "epoch": 9.58, "learning_rate": 8.497359577532407e-07, "loss": 0.2006, "step": 39890 }, { "epoch": 9.58, "learning_rate": 8.44935189630341e-07, "loss": 0.0605, "step": 39900 }, { "epoch": 9.58, "learning_rate": 8.401344215074413e-07, "loss": 0.0787, "step": 39910 }, { "epoch": 9.58, "learning_rate": 8.353336533845416e-07, "loss": 0.0373, "step": 39920 }, { "epoch": 9.58, "learning_rate": 8.305328852616419e-07, "loss": 0.1479, "step": 39930 }, { "epoch": 9.59, "learning_rate": 8.257321171387422e-07, "loss": 0.1385, "step": 39940 }, { "epoch": 9.59, "learning_rate": 8.209313490158425e-07, "loss": 0.0012, "step": 39950 }, { "epoch": 9.59, "learning_rate": 8.161305808929429e-07, "loss": 0.1451, "step": 39960 }, { "epoch": 9.59, "learning_rate": 8.113298127700433e-07, "loss": 0.1556, "step": 39970 }, { "epoch": 9.6, "learning_rate": 8.065290446471436e-07, "loss": 0.1122, "step": 39980 }, { "epoch": 9.6, "learning_rate": 8.01728276524244e-07, "loss": 0.0253, "step": 39990 }, { "epoch": 9.6, "learning_rate": 7.969275084013443e-07, "loss": 0.1737, "step": 40000 }, { "epoch": 9.6, "learning_rate": 7.921267402784446e-07, "loss": 0.1586, "step": 40010 }, { "epoch": 9.61, "learning_rate": 7.873259721555449e-07, "loss": 0.0304, "step": 40020 }, { "epoch": 9.61, "learning_rate": 7.825252040326454e-07, "loss": 0.1445, "step": 40030 }, { "epoch": 9.61, "learning_rate": 7.777244359097457e-07, "loss": 0.002, "step": 40040 }, { "epoch": 9.61, "learning_rate": 7.72923667786846e-07, "loss": 0.1163, "step": 40050 }, { "epoch": 9.62, "learning_rate": 7.681228996639463e-07, "loss": 0.073, "step": 40060 }, { "epoch": 9.62, "learning_rate": 7.633221315410466e-07, "loss": 0.1001, "step": 40070 }, { "epoch": 9.62, "learning_rate": 7.585213634181469e-07, "loss": 0.0139, "step": 40080 }, { "epoch": 9.62, "learning_rate": 7.537205952952472e-07, "loss": 0.0942, "step": 40090 }, { "epoch": 9.63, "learning_rate": 7.489198271723476e-07, "loss": 0.1576, "step": 40100 }, { "epoch": 9.63, "learning_rate": 7.441190590494479e-07, "loss": 0.1171, "step": 40110 }, { "epoch": 9.63, "learning_rate": 7.393182909265483e-07, "loss": 0.0171, "step": 40120 }, { "epoch": 9.63, "learning_rate": 7.345175228036486e-07, "loss": 0.1325, "step": 40130 }, { "epoch": 9.64, "learning_rate": 7.29716754680749e-07, "loss": 0.0591, "step": 40140 }, { "epoch": 9.64, "learning_rate": 7.249159865578493e-07, "loss": 0.1072, "step": 40150 }, { "epoch": 9.64, "learning_rate": 7.201152184349497e-07, "loss": 0.1103, "step": 40160 }, { "epoch": 9.64, "learning_rate": 7.1531445031205e-07, "loss": 0.0459, "step": 40170 }, { "epoch": 9.64, "learning_rate": 7.105136821891504e-07, "loss": 0.1257, "step": 40180 }, { "epoch": 9.65, "learning_rate": 7.057129140662507e-07, "loss": 0.2633, "step": 40190 }, { "epoch": 9.65, "learning_rate": 7.00912145943351e-07, "loss": 0.0624, "step": 40200 }, { "epoch": 9.65, "learning_rate": 6.961113778204513e-07, "loss": 0.0512, "step": 40210 }, { "epoch": 9.65, "learning_rate": 6.913106096975516e-07, "loss": 0.0438, "step": 40220 }, { "epoch": 9.66, "learning_rate": 6.86509841574652e-07, "loss": 0.0351, "step": 40230 }, { "epoch": 9.66, "learning_rate": 6.817090734517523e-07, "loss": 0.1526, "step": 40240 }, { "epoch": 9.66, "learning_rate": 6.769083053288526e-07, "loss": 0.1071, "step": 40250 }, { "epoch": 9.66, "learning_rate": 6.72107537205953e-07, "loss": 0.1395, "step": 40260 }, { "epoch": 9.67, "learning_rate": 6.673067690830533e-07, "loss": 0.0983, "step": 40270 }, { "epoch": 9.67, "learning_rate": 6.625060009601537e-07, "loss": 0.1229, "step": 40280 }, { "epoch": 9.67, "learning_rate": 6.57705232837254e-07, "loss": 0.1386, "step": 40290 }, { "epoch": 9.67, "learning_rate": 6.529044647143544e-07, "loss": 0.3393, "step": 40300 }, { "epoch": 9.68, "learning_rate": 6.481036965914547e-07, "loss": 0.0607, "step": 40310 }, { "epoch": 9.68, "learning_rate": 6.433029284685551e-07, "loss": 0.0755, "step": 40320 }, { "epoch": 9.68, "learning_rate": 6.385021603456554e-07, "loss": 0.0597, "step": 40330 }, { "epoch": 9.68, "learning_rate": 6.337013922227557e-07, "loss": 0.0224, "step": 40340 }, { "epoch": 9.69, "learning_rate": 6.28900624099856e-07, "loss": 0.0668, "step": 40350 }, { "epoch": 9.69, "learning_rate": 6.240998559769564e-07, "loss": 0.1638, "step": 40360 }, { "epoch": 9.69, "learning_rate": 6.192990878540567e-07, "loss": 0.1092, "step": 40370 }, { "epoch": 9.69, "learning_rate": 6.14498319731157e-07, "loss": 0.0599, "step": 40380 }, { "epoch": 9.7, "learning_rate": 6.096975516082574e-07, "loss": 0.1491, "step": 40390 }, { "epoch": 9.7, "learning_rate": 6.048967834853577e-07, "loss": 0.0223, "step": 40400 }, { "epoch": 9.7, "learning_rate": 6.00096015362458e-07, "loss": 0.0953, "step": 40410 }, { "epoch": 9.7, "learning_rate": 5.952952472395583e-07, "loss": 0.0171, "step": 40420 }, { "epoch": 9.7, "learning_rate": 5.904944791166587e-07, "loss": 0.0875, "step": 40430 }, { "epoch": 9.71, "learning_rate": 5.856937109937591e-07, "loss": 0.0847, "step": 40440 }, { "epoch": 9.71, "learning_rate": 5.808929428708594e-07, "loss": 0.0336, "step": 40450 }, { "epoch": 9.71, "learning_rate": 5.760921747479597e-07, "loss": 0.0778, "step": 40460 }, { "epoch": 9.71, "learning_rate": 5.712914066250601e-07, "loss": 0.1324, "step": 40470 }, { "epoch": 9.72, "learning_rate": 5.664906385021604e-07, "loss": 0.1527, "step": 40480 }, { "epoch": 9.72, "learning_rate": 5.616898703792607e-07, "loss": 0.1002, "step": 40490 }, { "epoch": 9.72, "learning_rate": 5.568891022563611e-07, "loss": 0.0011, "step": 40500 }, { "epoch": 9.72, "learning_rate": 5.520883341334614e-07, "loss": 0.042, "step": 40510 }, { "epoch": 9.73, "learning_rate": 5.472875660105617e-07, "loss": 0.1818, "step": 40520 }, { "epoch": 9.73, "learning_rate": 5.42486797887662e-07, "loss": 0.073, "step": 40530 }, { "epoch": 9.73, "learning_rate": 5.376860297647624e-07, "loss": 0.0294, "step": 40540 }, { "epoch": 9.73, "learning_rate": 5.328852616418627e-07, "loss": 0.1023, "step": 40550 }, { "epoch": 9.74, "learning_rate": 5.28084493518963e-07, "loss": 0.1877, "step": 40560 }, { "epoch": 9.74, "learning_rate": 5.232837253960634e-07, "loss": 0.0826, "step": 40570 }, { "epoch": 9.74, "learning_rate": 5.184829572731638e-07, "loss": 0.0465, "step": 40580 }, { "epoch": 9.74, "learning_rate": 5.136821891502641e-07, "loss": 0.0032, "step": 40590 }, { "epoch": 9.75, "learning_rate": 5.088814210273644e-07, "loss": 0.1634, "step": 40600 }, { "epoch": 9.75, "learning_rate": 5.040806529044648e-07, "loss": 0.0433, "step": 40610 }, { "epoch": 9.75, "learning_rate": 4.992798847815651e-07, "loss": 0.0801, "step": 40620 }, { "epoch": 9.75, "learning_rate": 4.944791166586655e-07, "loss": 0.1849, "step": 40630 }, { "epoch": 9.76, "learning_rate": 4.896783485357658e-07, "loss": 0.0679, "step": 40640 }, { "epoch": 9.76, "learning_rate": 4.848775804128661e-07, "loss": 0.0169, "step": 40650 }, { "epoch": 9.76, "learning_rate": 4.800768122899664e-07, "loss": 0.0615, "step": 40660 }, { "epoch": 9.76, "learning_rate": 4.752760441670667e-07, "loss": 0.2065, "step": 40670 }, { "epoch": 9.76, "learning_rate": 4.7047527604416713e-07, "loss": 0.045, "step": 40680 }, { "epoch": 9.77, "learning_rate": 4.6567450792126745e-07, "loss": 0.0923, "step": 40690 }, { "epoch": 9.77, "learning_rate": 4.608737397983678e-07, "loss": 0.1579, "step": 40700 }, { "epoch": 9.77, "learning_rate": 4.5607297167546813e-07, "loss": 0.1446, "step": 40710 }, { "epoch": 9.77, "learning_rate": 4.5127220355256844e-07, "loss": 0.1923, "step": 40720 }, { "epoch": 9.78, "learning_rate": 4.4647143542966876e-07, "loss": 0.0255, "step": 40730 }, { "epoch": 9.78, "learning_rate": 4.4167066730676907e-07, "loss": 0.0468, "step": 40740 }, { "epoch": 9.78, "learning_rate": 4.368698991838695e-07, "loss": 0.0678, "step": 40750 }, { "epoch": 9.78, "learning_rate": 4.320691310609698e-07, "loss": 0.0587, "step": 40760 }, { "epoch": 9.79, "learning_rate": 4.272683629380701e-07, "loss": 0.0953, "step": 40770 }, { "epoch": 9.79, "learning_rate": 4.224675948151705e-07, "loss": 0.0455, "step": 40780 }, { "epoch": 9.79, "learning_rate": 4.176668266922708e-07, "loss": 0.0845, "step": 40790 }, { "epoch": 9.79, "learning_rate": 4.128660585693711e-07, "loss": 0.1594, "step": 40800 }, { "epoch": 9.8, "learning_rate": 4.080652904464714e-07, "loss": 0.1171, "step": 40810 }, { "epoch": 9.8, "learning_rate": 4.032645223235718e-07, "loss": 0.1101, "step": 40820 }, { "epoch": 9.8, "learning_rate": 3.9846375420067216e-07, "loss": 0.1529, "step": 40830 }, { "epoch": 9.8, "learning_rate": 3.9366298607777247e-07, "loss": 0.2728, "step": 40840 }, { "epoch": 9.81, "learning_rate": 3.8886221795487284e-07, "loss": 0.131, "step": 40850 }, { "epoch": 9.81, "learning_rate": 3.8406144983197315e-07, "loss": 0.0351, "step": 40860 }, { "epoch": 9.81, "learning_rate": 3.7926068170907347e-07, "loss": 0.1684, "step": 40870 }, { "epoch": 9.81, "learning_rate": 3.744599135861738e-07, "loss": 0.1191, "step": 40880 }, { "epoch": 9.82, "learning_rate": 3.6965914546327415e-07, "loss": 0.1576, "step": 40890 }, { "epoch": 9.82, "learning_rate": 3.648583773403745e-07, "loss": 0.1688, "step": 40900 }, { "epoch": 9.82, "learning_rate": 3.6005760921747483e-07, "loss": 0.0885, "step": 40910 }, { "epoch": 9.82, "learning_rate": 3.552568410945752e-07, "loss": 0.1408, "step": 40920 }, { "epoch": 9.82, "learning_rate": 3.504560729716755e-07, "loss": 0.0328, "step": 40930 }, { "epoch": 9.83, "learning_rate": 3.456553048487758e-07, "loss": 0.1646, "step": 40940 }, { "epoch": 9.83, "learning_rate": 3.4085453672587614e-07, "loss": 0.1002, "step": 40950 }, { "epoch": 9.83, "learning_rate": 3.360537686029765e-07, "loss": 0.0981, "step": 40960 }, { "epoch": 9.83, "learning_rate": 3.3125300048007687e-07, "loss": 0.0534, "step": 40970 }, { "epoch": 9.84, "learning_rate": 3.264522323571772e-07, "loss": 0.2004, "step": 40980 }, { "epoch": 9.84, "learning_rate": 3.2165146423427755e-07, "loss": 0.071, "step": 40990 }, { "epoch": 9.84, "learning_rate": 3.1685069611137786e-07, "loss": 0.0341, "step": 41000 }, { "epoch": 9.84, "learning_rate": 3.120499279884782e-07, "loss": 0.1309, "step": 41010 }, { "epoch": 9.85, "learning_rate": 3.072491598655785e-07, "loss": 0.0926, "step": 41020 }, { "epoch": 9.85, "learning_rate": 3.0244839174267886e-07, "loss": 0.1122, "step": 41030 }, { "epoch": 9.85, "learning_rate": 2.9764762361977917e-07, "loss": 0.0592, "step": 41040 }, { "epoch": 9.85, "learning_rate": 2.9284685549687954e-07, "loss": 0.1313, "step": 41050 }, { "epoch": 9.86, "learning_rate": 2.8804608737397985e-07, "loss": 0.14, "step": 41060 }, { "epoch": 9.86, "learning_rate": 2.832453192510802e-07, "loss": 0.0849, "step": 41070 }, { "epoch": 9.86, "learning_rate": 2.7844455112818053e-07, "loss": 0.1438, "step": 41080 }, { "epoch": 9.86, "learning_rate": 2.7364378300528085e-07, "loss": 0.0411, "step": 41090 }, { "epoch": 9.87, "learning_rate": 2.688430148823812e-07, "loss": 0.1346, "step": 41100 }, { "epoch": 9.87, "learning_rate": 2.640422467594815e-07, "loss": 0.1163, "step": 41110 }, { "epoch": 9.87, "learning_rate": 2.592414786365819e-07, "loss": 0.1592, "step": 41120 }, { "epoch": 9.87, "learning_rate": 2.544407105136822e-07, "loss": 0.0797, "step": 41130 }, { "epoch": 9.88, "learning_rate": 2.4963994239078257e-07, "loss": 0.0269, "step": 41140 }, { "epoch": 9.88, "learning_rate": 2.448391742678829e-07, "loss": 0.2395, "step": 41150 }, { "epoch": 9.88, "learning_rate": 2.400384061449832e-07, "loss": 0.1327, "step": 41160 }, { "epoch": 9.88, "learning_rate": 2.3523763802208357e-07, "loss": 0.0493, "step": 41170 }, { "epoch": 9.88, "learning_rate": 2.304368698991839e-07, "loss": 0.1222, "step": 41180 }, { "epoch": 9.89, "learning_rate": 2.2563610177628422e-07, "loss": 0.1053, "step": 41190 }, { "epoch": 9.89, "learning_rate": 2.2083533365338454e-07, "loss": 0.2707, "step": 41200 }, { "epoch": 9.89, "learning_rate": 2.160345655304849e-07, "loss": 0.0977, "step": 41210 }, { "epoch": 9.89, "learning_rate": 2.1123379740758524e-07, "loss": 0.1164, "step": 41220 }, { "epoch": 9.9, "learning_rate": 2.0643302928468556e-07, "loss": 0.0351, "step": 41230 }, { "epoch": 9.9, "learning_rate": 2.016322611617859e-07, "loss": 0.138, "step": 41240 }, { "epoch": 9.9, "learning_rate": 1.9683149303888624e-07, "loss": 0.0721, "step": 41250 }, { "epoch": 9.9, "learning_rate": 1.9203072491598658e-07, "loss": 0.0985, "step": 41260 }, { "epoch": 9.91, "learning_rate": 1.872299567930869e-07, "loss": 0.0556, "step": 41270 }, { "epoch": 9.91, "learning_rate": 1.8242918867018726e-07, "loss": 0.0595, "step": 41280 }, { "epoch": 9.91, "learning_rate": 1.776284205472876e-07, "loss": 0.0777, "step": 41290 }, { "epoch": 9.91, "learning_rate": 1.728276524243879e-07, "loss": 0.0311, "step": 41300 }, { "epoch": 9.92, "learning_rate": 1.6802688430148825e-07, "loss": 0.2685, "step": 41310 }, { "epoch": 9.92, "learning_rate": 1.632261161785886e-07, "loss": 0.1761, "step": 41320 }, { "epoch": 9.92, "learning_rate": 1.5842534805568893e-07, "loss": 0.0728, "step": 41330 }, { "epoch": 9.92, "learning_rate": 1.5362457993278925e-07, "loss": 0.0459, "step": 41340 }, { "epoch": 9.93, "learning_rate": 1.4882381180988959e-07, "loss": 0.0592, "step": 41350 }, { "epoch": 9.93, "learning_rate": 1.4402304368698993e-07, "loss": 0.1233, "step": 41360 }, { "epoch": 9.93, "learning_rate": 1.3922227556409027e-07, "loss": 0.222, "step": 41370 }, { "epoch": 9.93, "learning_rate": 1.344215074411906e-07, "loss": 0.092, "step": 41380 }, { "epoch": 9.94, "learning_rate": 1.2962073931829095e-07, "loss": 0.3358, "step": 41390 }, { "epoch": 9.94, "learning_rate": 1.2481997119539129e-07, "loss": 0.0952, "step": 41400 }, { "epoch": 9.94, "learning_rate": 1.200192030724916e-07, "loss": 0.1867, "step": 41410 }, { "epoch": 9.94, "learning_rate": 1.1521843494959195e-07, "loss": 0.0464, "step": 41420 }, { "epoch": 9.94, "learning_rate": 1.1041766682669227e-07, "loss": 0.15, "step": 41430 }, { "epoch": 9.95, "learning_rate": 1.0561689870379262e-07, "loss": 0.0712, "step": 41440 }, { "epoch": 9.95, "learning_rate": 1.0081613058089295e-07, "loss": 0.159, "step": 41450 }, { "epoch": 9.95, "learning_rate": 9.601536245799329e-08, "loss": 0.1062, "step": 41460 }, { "epoch": 9.95, "learning_rate": 9.121459433509363e-08, "loss": 0.1292, "step": 41470 }, { "epoch": 9.96, "learning_rate": 8.641382621219396e-08, "loss": 0.1752, "step": 41480 }, { "epoch": 9.96, "learning_rate": 8.16130580892943e-08, "loss": 0.1284, "step": 41490 }, { "epoch": 9.96, "learning_rate": 7.681228996639462e-08, "loss": 0.1465, "step": 41500 }, { "epoch": 9.96, "learning_rate": 7.201152184349496e-08, "loss": 0.0496, "step": 41510 }, { "epoch": 9.97, "learning_rate": 6.72107537205953e-08, "loss": 0.011, "step": 41520 }, { "epoch": 9.97, "learning_rate": 6.240998559769564e-08, "loss": 0.0567, "step": 41530 }, { "epoch": 9.97, "learning_rate": 5.760921747479598e-08, "loss": 0.0069, "step": 41540 }, { "epoch": 9.97, "learning_rate": 5.280844935189631e-08, "loss": 0.0323, "step": 41550 }, { "epoch": 9.98, "learning_rate": 4.8007681228996644e-08, "loss": 0.1293, "step": 41560 }, { "epoch": 9.98, "learning_rate": 4.320691310609698e-08, "loss": 0.0513, "step": 41570 }, { "epoch": 9.98, "learning_rate": 3.840614498319731e-08, "loss": 0.0365, "step": 41580 }, { "epoch": 9.98, "learning_rate": 3.360537686029765e-08, "loss": 0.0485, "step": 41590 }, { "epoch": 9.99, "learning_rate": 2.880460873739799e-08, "loss": 0.3029, "step": 41600 }, { "epoch": 9.99, "learning_rate": 2.4003840614498322e-08, "loss": 0.0429, "step": 41610 }, { "epoch": 9.99, "learning_rate": 1.9203072491598656e-08, "loss": 0.0587, "step": 41620 }, { "epoch": 9.99, "learning_rate": 1.4402304368698994e-08, "loss": 0.0627, "step": 41630 }, { "epoch": 10.0, "learning_rate": 9.601536245799328e-09, "loss": 0.1289, "step": 41640 }, { "epoch": 10.0, "learning_rate": 4.800768122899664e-09, "loss": 0.0901, "step": 41650 }, { "epoch": 10.0, "learning_rate": 0.0, "loss": 0.1153, "step": 41660 }, { "epoch": 10.0, "eval_accuracy": 0.9989799387963277, "eval_loss": 0.003796164644882083, "eval_runtime": 514.469, "eval_samples_per_second": 11.433, "eval_steps_per_second": 1.431, "step": 41660 }, { "epoch": 10.0, "step": 41660, "total_flos": 2.5835477038076805e+19, "train_loss": 0.24897499611371868, "train_runtime": 81597.3302, "train_samples_per_second": 4.084, "train_steps_per_second": 0.511 } ], "max_steps": 41660, "num_train_epochs": 10, "total_flos": 2.5835477038076805e+19, "trial_name": null, "trial_params": null }