{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9941646855413875, "eval_steps": 500, "global_step": 4600, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0002161227577263886, "grad_norm": 0.7498325109481812, "learning_rate": 2.877697841726619e-07, "loss": 1.6207, "step": 1 }, { "epoch": 0.0004322455154527772, "grad_norm": 0.6986605525016785, "learning_rate": 5.755395683453238e-07, "loss": 1.6011, "step": 2 }, { "epoch": 0.0006483682731791658, "grad_norm": 0.7146493792533875, "learning_rate": 8.633093525179857e-07, "loss": 1.5284, "step": 3 }, { "epoch": 0.0008644910309055544, "grad_norm": 0.8592181205749512, "learning_rate": 1.1510791366906476e-06, "loss": 1.5683, "step": 4 }, { "epoch": 0.001080613788631943, "grad_norm": 0.6243698000907898, "learning_rate": 1.4388489208633094e-06, "loss": 1.4792, "step": 5 }, { "epoch": 0.0012967365463583316, "grad_norm": 0.7416863441467285, "learning_rate": 1.7266187050359715e-06, "loss": 1.5804, "step": 6 }, { "epoch": 0.0015128593040847202, "grad_norm": 0.510182797908783, "learning_rate": 2.0143884892086333e-06, "loss": 1.6262, "step": 7 }, { "epoch": 0.0017289820618111088, "grad_norm": 0.7753154635429382, "learning_rate": 2.302158273381295e-06, "loss": 1.5323, "step": 8 }, { "epoch": 0.0019451048195374973, "grad_norm": 0.5771347284317017, "learning_rate": 2.589928057553957e-06, "loss": 1.4445, "step": 9 }, { "epoch": 0.002161227577263886, "grad_norm": 0.7808551788330078, "learning_rate": 2.877697841726619e-06, "loss": 1.6863, "step": 10 }, { "epoch": 0.0023773503349902745, "grad_norm": 0.7155261039733887, "learning_rate": 3.1654676258992807e-06, "loss": 1.505, "step": 11 }, { "epoch": 0.002593473092716663, "grad_norm": 0.7685397863388062, "learning_rate": 3.453237410071943e-06, "loss": 1.683, "step": 12 }, { "epoch": 0.0028095958504430517, "grad_norm": 0.6781991720199585, "learning_rate": 3.741007194244605e-06, "loss": 1.5397, "step": 13 }, { "epoch": 0.0030257186081694403, "grad_norm": 0.6643094420433044, "learning_rate": 4.028776978417267e-06, "loss": 1.3852, "step": 14 }, { "epoch": 0.003241841365895829, "grad_norm": 0.5720023512840271, "learning_rate": 4.316546762589928e-06, "loss": 1.3075, "step": 15 }, { "epoch": 0.0034579641236222175, "grad_norm": 0.6695529222488403, "learning_rate": 4.60431654676259e-06, "loss": 1.4687, "step": 16 }, { "epoch": 0.003674086881348606, "grad_norm": 0.8202911615371704, "learning_rate": 4.892086330935253e-06, "loss": 1.6027, "step": 17 }, { "epoch": 0.0038902096390749947, "grad_norm": 0.7854058146476746, "learning_rate": 5.179856115107914e-06, "loss": 1.4183, "step": 18 }, { "epoch": 0.004106332396801383, "grad_norm": 0.7073310613632202, "learning_rate": 5.467625899280576e-06, "loss": 1.545, "step": 19 }, { "epoch": 0.004322455154527772, "grad_norm": 0.7040787935256958, "learning_rate": 5.755395683453238e-06, "loss": 1.6758, "step": 20 }, { "epoch": 0.0045385779122541605, "grad_norm": 0.6893643140792847, "learning_rate": 6.0431654676259e-06, "loss": 1.4593, "step": 21 }, { "epoch": 0.004754700669980549, "grad_norm": 0.751395583152771, "learning_rate": 6.330935251798561e-06, "loss": 1.5057, "step": 22 }, { "epoch": 0.004970823427706938, "grad_norm": 0.8289034962654114, "learning_rate": 6.618705035971224e-06, "loss": 1.5668, "step": 23 }, { "epoch": 0.005186946185433326, "grad_norm": 0.6555783748626709, "learning_rate": 6.906474820143886e-06, "loss": 1.5488, "step": 24 }, { "epoch": 0.005403068943159715, "grad_norm": 0.789090096950531, "learning_rate": 7.194244604316547e-06, "loss": 1.4694, "step": 25 }, { "epoch": 0.0056191917008861034, "grad_norm": 0.7591240406036377, "learning_rate": 7.48201438848921e-06, "loss": 1.6212, "step": 26 }, { "epoch": 0.005835314458612492, "grad_norm": 0.6710547208786011, "learning_rate": 7.769784172661872e-06, "loss": 1.39, "step": 27 }, { "epoch": 0.006051437216338881, "grad_norm": 0.6780836582183838, "learning_rate": 8.057553956834533e-06, "loss": 1.3066, "step": 28 }, { "epoch": 0.006267559974065269, "grad_norm": 0.75581955909729, "learning_rate": 8.345323741007195e-06, "loss": 1.6376, "step": 29 }, { "epoch": 0.006483682731791658, "grad_norm": 0.7257469892501831, "learning_rate": 8.633093525179856e-06, "loss": 1.6985, "step": 30 }, { "epoch": 0.006699805489518046, "grad_norm": 0.8537693023681641, "learning_rate": 8.92086330935252e-06, "loss": 1.4547, "step": 31 }, { "epoch": 0.006915928247244435, "grad_norm": 0.7649074196815491, "learning_rate": 9.20863309352518e-06, "loss": 1.7011, "step": 32 }, { "epoch": 0.007132051004970824, "grad_norm": 0.6618505716323853, "learning_rate": 9.496402877697842e-06, "loss": 1.4064, "step": 33 }, { "epoch": 0.007348173762697212, "grad_norm": 0.8622448444366455, "learning_rate": 9.784172661870505e-06, "loss": 1.3952, "step": 34 }, { "epoch": 0.007564296520423601, "grad_norm": 0.8482533097267151, "learning_rate": 1.0071942446043167e-05, "loss": 1.3496, "step": 35 }, { "epoch": 0.007780419278149989, "grad_norm": 0.7652527093887329, "learning_rate": 1.0359712230215828e-05, "loss": 1.4609, "step": 36 }, { "epoch": 0.007996542035876377, "grad_norm": 0.7646381855010986, "learning_rate": 1.0647482014388491e-05, "loss": 1.3516, "step": 37 }, { "epoch": 0.008212664793602767, "grad_norm": 0.8664438724517822, "learning_rate": 1.0935251798561153e-05, "loss": 1.2758, "step": 38 }, { "epoch": 0.008428787551329154, "grad_norm": 0.6816773414611816, "learning_rate": 1.1223021582733812e-05, "loss": 1.2794, "step": 39 }, { "epoch": 0.008644910309055544, "grad_norm": 0.7363021373748779, "learning_rate": 1.1510791366906475e-05, "loss": 1.3211, "step": 40 }, { "epoch": 0.008861033066781931, "grad_norm": 0.6895145177841187, "learning_rate": 1.1798561151079137e-05, "loss": 1.3894, "step": 41 }, { "epoch": 0.009077155824508321, "grad_norm": 0.6489687561988831, "learning_rate": 1.20863309352518e-05, "loss": 1.3483, "step": 42 }, { "epoch": 0.009293278582234709, "grad_norm": 0.9342665076255798, "learning_rate": 1.2374100719424463e-05, "loss": 1.3172, "step": 43 }, { "epoch": 0.009509401339961098, "grad_norm": 0.9434204697608948, "learning_rate": 1.2661870503597123e-05, "loss": 1.3531, "step": 44 }, { "epoch": 0.009725524097687486, "grad_norm": 0.7507467865943909, "learning_rate": 1.2949640287769784e-05, "loss": 1.4414, "step": 45 }, { "epoch": 0.009941646855413875, "grad_norm": 0.8019313812255859, "learning_rate": 1.3237410071942447e-05, "loss": 1.449, "step": 46 }, { "epoch": 0.010157769613140263, "grad_norm": 0.725066065788269, "learning_rate": 1.3525179856115109e-05, "loss": 1.2374, "step": 47 }, { "epoch": 0.010373892370866653, "grad_norm": 0.8231765627861023, "learning_rate": 1.3812949640287772e-05, "loss": 1.4651, "step": 48 }, { "epoch": 0.01059001512859304, "grad_norm": 0.7059705853462219, "learning_rate": 1.4100719424460432e-05, "loss": 1.3966, "step": 49 }, { "epoch": 0.01080613788631943, "grad_norm": 0.8703414797782898, "learning_rate": 1.4388489208633095e-05, "loss": 1.0098, "step": 50 }, { "epoch": 0.011022260644045817, "grad_norm": 0.9461300373077393, "learning_rate": 1.4676258992805756e-05, "loss": 1.3343, "step": 51 }, { "epoch": 0.011238383401772207, "grad_norm": 0.8059316277503967, "learning_rate": 1.496402877697842e-05, "loss": 1.4206, "step": 52 }, { "epoch": 0.011454506159498595, "grad_norm": 0.8721457123756409, "learning_rate": 1.525179856115108e-05, "loss": 1.3473, "step": 53 }, { "epoch": 0.011670628917224984, "grad_norm": 0.7327573299407959, "learning_rate": 1.5539568345323744e-05, "loss": 1.38, "step": 54 }, { "epoch": 0.011886751674951372, "grad_norm": 0.792658805847168, "learning_rate": 1.5827338129496403e-05, "loss": 1.2197, "step": 55 }, { "epoch": 0.012102874432677761, "grad_norm": 0.746330976486206, "learning_rate": 1.6115107913669067e-05, "loss": 1.1263, "step": 56 }, { "epoch": 0.012318997190404149, "grad_norm": 0.6460121870040894, "learning_rate": 1.640287769784173e-05, "loss": 1.2812, "step": 57 }, { "epoch": 0.012535119948130538, "grad_norm": 0.8302263617515564, "learning_rate": 1.669064748201439e-05, "loss": 1.1794, "step": 58 }, { "epoch": 0.012751242705856926, "grad_norm": 0.8536649346351624, "learning_rate": 1.6978417266187053e-05, "loss": 1.237, "step": 59 }, { "epoch": 0.012967365463583316, "grad_norm": 0.7866637706756592, "learning_rate": 1.7266187050359712e-05, "loss": 1.3387, "step": 60 }, { "epoch": 0.013183488221309703, "grad_norm": 0.7334522008895874, "learning_rate": 1.7553956834532375e-05, "loss": 1.1289, "step": 61 }, { "epoch": 0.013399610979036093, "grad_norm": 0.8383240699768066, "learning_rate": 1.784172661870504e-05, "loss": 1.3147, "step": 62 }, { "epoch": 0.01361573373676248, "grad_norm": 0.8260761499404907, "learning_rate": 1.8129496402877698e-05, "loss": 1.0473, "step": 63 }, { "epoch": 0.01383185649448887, "grad_norm": 0.8201937675476074, "learning_rate": 1.841726618705036e-05, "loss": 1.3302, "step": 64 }, { "epoch": 0.014047979252215258, "grad_norm": 0.7809607982635498, "learning_rate": 1.870503597122302e-05, "loss": 0.9791, "step": 65 }, { "epoch": 0.014264102009941647, "grad_norm": 0.9497784972190857, "learning_rate": 1.8992805755395684e-05, "loss": 1.1998, "step": 66 }, { "epoch": 0.014480224767668035, "grad_norm": 0.8034746050834656, "learning_rate": 1.9280575539568347e-05, "loss": 1.1849, "step": 67 }, { "epoch": 0.014696347525394424, "grad_norm": 0.8192304372787476, "learning_rate": 1.956834532374101e-05, "loss": 1.1532, "step": 68 }, { "epoch": 0.014912470283120812, "grad_norm": 0.8304838538169861, "learning_rate": 1.985611510791367e-05, "loss": 1.2936, "step": 69 }, { "epoch": 0.015128593040847202, "grad_norm": 0.932195246219635, "learning_rate": 2.0143884892086333e-05, "loss": 1.165, "step": 70 }, { "epoch": 0.01534471579857359, "grad_norm": 0.9050421118736267, "learning_rate": 2.0431654676258996e-05, "loss": 1.1442, "step": 71 }, { "epoch": 0.015560838556299979, "grad_norm": 0.7831349968910217, "learning_rate": 2.0719424460431656e-05, "loss": 0.9148, "step": 72 }, { "epoch": 0.015776961314026366, "grad_norm": 0.8111123442649841, "learning_rate": 2.1007194244604316e-05, "loss": 1.2934, "step": 73 }, { "epoch": 0.015993084071752754, "grad_norm": 1.0379811525344849, "learning_rate": 2.1294964028776982e-05, "loss": 1.3094, "step": 74 }, { "epoch": 0.016209206829479145, "grad_norm": 0.9090933799743652, "learning_rate": 2.1582733812949642e-05, "loss": 1.1518, "step": 75 }, { "epoch": 0.016425329587205533, "grad_norm": 0.8984608054161072, "learning_rate": 2.1870503597122305e-05, "loss": 1.2224, "step": 76 }, { "epoch": 0.01664145234493192, "grad_norm": 0.9421352744102478, "learning_rate": 2.2158273381294965e-05, "loss": 1.0569, "step": 77 }, { "epoch": 0.01685757510265831, "grad_norm": 1.1250900030136108, "learning_rate": 2.2446043165467625e-05, "loss": 1.1747, "step": 78 }, { "epoch": 0.0170736978603847, "grad_norm": 0.9288277626037598, "learning_rate": 2.273381294964029e-05, "loss": 1.0783, "step": 79 }, { "epoch": 0.017289820618111088, "grad_norm": 0.8962733149528503, "learning_rate": 2.302158273381295e-05, "loss": 1.0767, "step": 80 }, { "epoch": 0.017505943375837475, "grad_norm": 0.8960233926773071, "learning_rate": 2.3309352517985614e-05, "loss": 1.1613, "step": 81 }, { "epoch": 0.017722066133563863, "grad_norm": 0.9017160534858704, "learning_rate": 2.3597122302158274e-05, "loss": 1.2361, "step": 82 }, { "epoch": 0.017938188891290254, "grad_norm": 0.7587832808494568, "learning_rate": 2.3884892086330937e-05, "loss": 1.1694, "step": 83 }, { "epoch": 0.018154311649016642, "grad_norm": 0.8145380616188049, "learning_rate": 2.41726618705036e-05, "loss": 1.063, "step": 84 }, { "epoch": 0.01837043440674303, "grad_norm": 0.863741397857666, "learning_rate": 2.446043165467626e-05, "loss": 0.9647, "step": 85 }, { "epoch": 0.018586557164469417, "grad_norm": 0.8460249304771423, "learning_rate": 2.4748201438848926e-05, "loss": 1.1474, "step": 86 }, { "epoch": 0.01880267992219581, "grad_norm": 0.8994437456130981, "learning_rate": 2.5035971223021586e-05, "loss": 1.0045, "step": 87 }, { "epoch": 0.019018802679922196, "grad_norm": 0.8365037441253662, "learning_rate": 2.5323741007194246e-05, "loss": 1.0582, "step": 88 }, { "epoch": 0.019234925437648584, "grad_norm": 1.2148154973983765, "learning_rate": 2.561151079136691e-05, "loss": 0.9929, "step": 89 }, { "epoch": 0.01945104819537497, "grad_norm": 1.068932056427002, "learning_rate": 2.589928057553957e-05, "loss": 1.0284, "step": 90 }, { "epoch": 0.019667170953101363, "grad_norm": 0.9524146318435669, "learning_rate": 2.6187050359712235e-05, "loss": 0.9298, "step": 91 }, { "epoch": 0.01988329371082775, "grad_norm": 0.9831689596176147, "learning_rate": 2.6474820143884895e-05, "loss": 0.891, "step": 92 }, { "epoch": 0.02009941646855414, "grad_norm": 0.8392059803009033, "learning_rate": 2.6762589928057554e-05, "loss": 0.7688, "step": 93 }, { "epoch": 0.020315539226280526, "grad_norm": 0.9849358797073364, "learning_rate": 2.7050359712230217e-05, "loss": 1.166, "step": 94 }, { "epoch": 0.020531661984006917, "grad_norm": 0.8717548847198486, "learning_rate": 2.7338129496402877e-05, "loss": 1.0336, "step": 95 }, { "epoch": 0.020747784741733305, "grad_norm": 0.9101350903511047, "learning_rate": 2.7625899280575544e-05, "loss": 1.0679, "step": 96 }, { "epoch": 0.020963907499459693, "grad_norm": 0.8745354413986206, "learning_rate": 2.7913669064748203e-05, "loss": 1.1726, "step": 97 }, { "epoch": 0.02118003025718608, "grad_norm": 0.8927585482597351, "learning_rate": 2.8201438848920863e-05, "loss": 1.15, "step": 98 }, { "epoch": 0.02139615301491247, "grad_norm": 0.888699471950531, "learning_rate": 2.848920863309353e-05, "loss": 0.9032, "step": 99 }, { "epoch": 0.02161227577263886, "grad_norm": 0.9794796705245972, "learning_rate": 2.877697841726619e-05, "loss": 1.1491, "step": 100 }, { "epoch": 0.021828398530365247, "grad_norm": 0.8439841270446777, "learning_rate": 2.9064748201438852e-05, "loss": 1.1291, "step": 101 }, { "epoch": 0.022044521288091635, "grad_norm": 0.9384711980819702, "learning_rate": 2.9352517985611512e-05, "loss": 0.9793, "step": 102 }, { "epoch": 0.022260644045818026, "grad_norm": 0.8711330890655518, "learning_rate": 2.9640287769784172e-05, "loss": 1.0932, "step": 103 }, { "epoch": 0.022476766803544414, "grad_norm": 1.1253970861434937, "learning_rate": 2.992805755395684e-05, "loss": 1.118, "step": 104 }, { "epoch": 0.0226928895612708, "grad_norm": 0.8921772837638855, "learning_rate": 3.0215827338129498e-05, "loss": 0.8669, "step": 105 }, { "epoch": 0.02290901231899719, "grad_norm": 0.9359428882598877, "learning_rate": 3.050359712230216e-05, "loss": 1.0879, "step": 106 }, { "epoch": 0.02312513507672358, "grad_norm": 0.9697002172470093, "learning_rate": 3.0791366906474824e-05, "loss": 1.064, "step": 107 }, { "epoch": 0.023341257834449968, "grad_norm": 0.9988358616828918, "learning_rate": 3.107913669064749e-05, "loss": 1.0661, "step": 108 }, { "epoch": 0.023557380592176356, "grad_norm": 1.1203874349594116, "learning_rate": 3.1366906474820144e-05, "loss": 1.1429, "step": 109 }, { "epoch": 0.023773503349902744, "grad_norm": 0.9657348990440369, "learning_rate": 3.165467625899281e-05, "loss": 1.2262, "step": 110 }, { "epoch": 0.023989626107629135, "grad_norm": 0.7940213084220886, "learning_rate": 3.194244604316547e-05, "loss": 0.9388, "step": 111 }, { "epoch": 0.024205748865355523, "grad_norm": 0.9472448229789734, "learning_rate": 3.223021582733813e-05, "loss": 1.1237, "step": 112 }, { "epoch": 0.02442187162308191, "grad_norm": 0.9700066447257996, "learning_rate": 3.2517985611510796e-05, "loss": 1.2015, "step": 113 }, { "epoch": 0.024637994380808298, "grad_norm": 1.0106992721557617, "learning_rate": 3.280575539568346e-05, "loss": 1.1162, "step": 114 }, { "epoch": 0.02485411713853469, "grad_norm": 1.0185657739639282, "learning_rate": 3.3093525179856116e-05, "loss": 1.0547, "step": 115 }, { "epoch": 0.025070239896261077, "grad_norm": 1.0063570737838745, "learning_rate": 3.338129496402878e-05, "loss": 1.0538, "step": 116 }, { "epoch": 0.025286362653987465, "grad_norm": 0.9898773431777954, "learning_rate": 3.366906474820144e-05, "loss": 1.1855, "step": 117 }, { "epoch": 0.025502485411713852, "grad_norm": 0.8569101095199585, "learning_rate": 3.3956834532374105e-05, "loss": 0.9859, "step": 118 }, { "epoch": 0.025718608169440244, "grad_norm": 0.8109889030456543, "learning_rate": 3.424460431654677e-05, "loss": 0.9956, "step": 119 }, { "epoch": 0.02593473092716663, "grad_norm": 0.9044827818870544, "learning_rate": 3.4532374100719424e-05, "loss": 1.1331, "step": 120 }, { "epoch": 0.02615085368489302, "grad_norm": 1.191148042678833, "learning_rate": 3.482014388489209e-05, "loss": 1.0748, "step": 121 }, { "epoch": 0.026366976442619407, "grad_norm": 1.043940544128418, "learning_rate": 3.510791366906475e-05, "loss": 1.1331, "step": 122 }, { "epoch": 0.026583099200345798, "grad_norm": 0.8819486498832703, "learning_rate": 3.5395683453237414e-05, "loss": 1.0659, "step": 123 }, { "epoch": 0.026799221958072186, "grad_norm": 0.9887312650680542, "learning_rate": 3.568345323741008e-05, "loss": 1.1115, "step": 124 }, { "epoch": 0.027015344715798573, "grad_norm": 0.9711753726005554, "learning_rate": 3.597122302158273e-05, "loss": 1.1175, "step": 125 }, { "epoch": 0.02723146747352496, "grad_norm": 1.0508410930633545, "learning_rate": 3.6258992805755396e-05, "loss": 1.1413, "step": 126 }, { "epoch": 0.027447590231251352, "grad_norm": 0.9886671304702759, "learning_rate": 3.654676258992806e-05, "loss": 0.9326, "step": 127 }, { "epoch": 0.02766371298897774, "grad_norm": 0.9078131318092346, "learning_rate": 3.683453237410072e-05, "loss": 1.1397, "step": 128 }, { "epoch": 0.027879835746704128, "grad_norm": 1.1544677019119263, "learning_rate": 3.7122302158273386e-05, "loss": 0.9968, "step": 129 }, { "epoch": 0.028095958504430515, "grad_norm": 0.9644024968147278, "learning_rate": 3.741007194244604e-05, "loss": 1.1007, "step": 130 }, { "epoch": 0.028312081262156907, "grad_norm": 0.7962532043457031, "learning_rate": 3.769784172661871e-05, "loss": 0.9139, "step": 131 }, { "epoch": 0.028528204019883294, "grad_norm": 0.9570571780204773, "learning_rate": 3.798561151079137e-05, "loss": 1.0106, "step": 132 }, { "epoch": 0.028744326777609682, "grad_norm": 0.9263657331466675, "learning_rate": 3.827338129496403e-05, "loss": 0.7735, "step": 133 }, { "epoch": 0.02896044953533607, "grad_norm": 0.8990103602409363, "learning_rate": 3.8561151079136694e-05, "loss": 0.9535, "step": 134 }, { "epoch": 0.02917657229306246, "grad_norm": 0.9810959696769714, "learning_rate": 3.884892086330935e-05, "loss": 1.0279, "step": 135 }, { "epoch": 0.02939269505078885, "grad_norm": 1.0959899425506592, "learning_rate": 3.913669064748202e-05, "loss": 0.8303, "step": 136 }, { "epoch": 0.029608817808515236, "grad_norm": 0.9222844839096069, "learning_rate": 3.942446043165468e-05, "loss": 1.05, "step": 137 }, { "epoch": 0.029824940566241624, "grad_norm": 0.9906018972396851, "learning_rate": 3.971223021582734e-05, "loss": 1.043, "step": 138 }, { "epoch": 0.030041063323968015, "grad_norm": 1.038453459739685, "learning_rate": 4e-05, "loss": 1.0572, "step": 139 }, { "epoch": 0.030257186081694403, "grad_norm": 0.9111128449440002, "learning_rate": 3.9999995100023115e-05, "loss": 0.987, "step": 140 }, { "epoch": 0.03047330883942079, "grad_norm": 1.0118787288665771, "learning_rate": 3.9999980400094876e-05, "loss": 1.2349, "step": 141 }, { "epoch": 0.03068943159714718, "grad_norm": 0.8447703719139099, "learning_rate": 3.9999955900222456e-05, "loss": 0.8415, "step": 142 }, { "epoch": 0.03090555435487357, "grad_norm": 1.0229984521865845, "learning_rate": 3.999992160041789e-05, "loss": 0.8446, "step": 143 }, { "epoch": 0.031121677112599958, "grad_norm": 1.0735254287719727, "learning_rate": 3.999987750069797e-05, "loss": 1.3394, "step": 144 }, { "epoch": 0.031337799870326345, "grad_norm": 1.1151766777038574, "learning_rate": 3.9999823601084306e-05, "loss": 1.1003, "step": 145 }, { "epoch": 0.03155392262805273, "grad_norm": 0.959318995475769, "learning_rate": 3.999975990160331e-05, "loss": 1.1259, "step": 146 }, { "epoch": 0.03177004538577912, "grad_norm": 0.9324452877044678, "learning_rate": 3.99996864022862e-05, "loss": 0.9367, "step": 147 }, { "epoch": 0.03198616814350551, "grad_norm": 1.0011591911315918, "learning_rate": 3.999960310316898e-05, "loss": 1.0493, "step": 148 }, { "epoch": 0.0322022909012319, "grad_norm": 0.979171633720398, "learning_rate": 3.9999510004292474e-05, "loss": 0.9986, "step": 149 }, { "epoch": 0.03241841365895829, "grad_norm": 1.1019887924194336, "learning_rate": 3.99994071057023e-05, "loss": 1.141, "step": 150 }, { "epoch": 0.03263453641668468, "grad_norm": 1.1715887784957886, "learning_rate": 3.999929440744887e-05, "loss": 0.9725, "step": 151 }, { "epoch": 0.032850659174411066, "grad_norm": 0.9511465430259705, "learning_rate": 3.999917190958742e-05, "loss": 1.0759, "step": 152 }, { "epoch": 0.033066781932137454, "grad_norm": 1.1758034229278564, "learning_rate": 3.999903961217796e-05, "loss": 0.9378, "step": 153 }, { "epoch": 0.03328290468986384, "grad_norm": 0.9852985739707947, "learning_rate": 3.9998897515285323e-05, "loss": 0.9593, "step": 154 }, { "epoch": 0.03349902744759023, "grad_norm": 0.929764449596405, "learning_rate": 3.999874561897913e-05, "loss": 1.0765, "step": 155 }, { "epoch": 0.03371515020531662, "grad_norm": 1.136412262916565, "learning_rate": 3.999858392333382e-05, "loss": 1.0034, "step": 156 }, { "epoch": 0.03393127296304301, "grad_norm": 0.9158518314361572, "learning_rate": 3.9998412428428613e-05, "loss": 1.1276, "step": 157 }, { "epoch": 0.0341473957207694, "grad_norm": 1.185773253440857, "learning_rate": 3.9998231134347554e-05, "loss": 1.1234, "step": 158 }, { "epoch": 0.03436351847849579, "grad_norm": 1.0396409034729004, "learning_rate": 3.999804004117946e-05, "loss": 0.9214, "step": 159 }, { "epoch": 0.034579641236222175, "grad_norm": 1.094385027885437, "learning_rate": 3.999783914901798e-05, "loss": 1.0425, "step": 160 }, { "epoch": 0.03479576399394856, "grad_norm": 1.138527512550354, "learning_rate": 3.999762845796154e-05, "loss": 1.1496, "step": 161 }, { "epoch": 0.03501188675167495, "grad_norm": 1.072544813156128, "learning_rate": 3.999740796811339e-05, "loss": 1.2448, "step": 162 }, { "epoch": 0.03522800950940134, "grad_norm": 1.015540599822998, "learning_rate": 3.9997177679581555e-05, "loss": 0.9786, "step": 163 }, { "epoch": 0.035444132267127726, "grad_norm": 0.9477831721305847, "learning_rate": 3.999693759247889e-05, "loss": 1.1633, "step": 164 }, { "epoch": 0.03566025502485412, "grad_norm": 1.0140697956085205, "learning_rate": 3.999668770692303e-05, "loss": 1.1194, "step": 165 }, { "epoch": 0.03587637778258051, "grad_norm": 1.0042285919189453, "learning_rate": 3.9996428023036415e-05, "loss": 1.0049, "step": 166 }, { "epoch": 0.036092500540306896, "grad_norm": 0.965667724609375, "learning_rate": 3.99961585409463e-05, "loss": 0.9693, "step": 167 }, { "epoch": 0.036308623298033284, "grad_norm": 1.002320647239685, "learning_rate": 3.999587926078472e-05, "loss": 0.8459, "step": 168 }, { "epoch": 0.03652474605575967, "grad_norm": 0.9055720567703247, "learning_rate": 3.999559018268853e-05, "loss": 0.9717, "step": 169 }, { "epoch": 0.03674086881348606, "grad_norm": 1.0310685634613037, "learning_rate": 3.9995291306799374e-05, "loss": 1.0326, "step": 170 }, { "epoch": 0.03695699157121245, "grad_norm": 0.7893375158309937, "learning_rate": 3.9994982633263695e-05, "loss": 0.8942, "step": 171 }, { "epoch": 0.037173114328938835, "grad_norm": 1.0069329738616943, "learning_rate": 3.999466416223275e-05, "loss": 1.1418, "step": 172 }, { "epoch": 0.03738923708666522, "grad_norm": 0.7812685966491699, "learning_rate": 3.999433589386259e-05, "loss": 0.8637, "step": 173 }, { "epoch": 0.03760535984439162, "grad_norm": 1.1129683256149292, "learning_rate": 3.999399782831405e-05, "loss": 0.9179, "step": 174 }, { "epoch": 0.037821482602118005, "grad_norm": 0.9261072874069214, "learning_rate": 3.9993649965752804e-05, "loss": 1.0623, "step": 175 }, { "epoch": 0.03803760535984439, "grad_norm": 0.9303197860717773, "learning_rate": 3.999329230634929e-05, "loss": 0.9668, "step": 176 }, { "epoch": 0.03825372811757078, "grad_norm": 1.0003294944763184, "learning_rate": 3.9992924850278764e-05, "loss": 0.9787, "step": 177 }, { "epoch": 0.03846985087529717, "grad_norm": 0.9643764495849609, "learning_rate": 3.9992547597721283e-05, "loss": 1.2231, "step": 178 }, { "epoch": 0.038685973633023556, "grad_norm": 0.8814199566841125, "learning_rate": 3.9992160548861694e-05, "loss": 0.8795, "step": 179 }, { "epoch": 0.03890209639074994, "grad_norm": 1.132537603378296, "learning_rate": 3.999176370388965e-05, "loss": 0.9443, "step": 180 }, { "epoch": 0.03911821914847633, "grad_norm": 1.0179696083068848, "learning_rate": 3.99913570629996e-05, "loss": 1.0842, "step": 181 }, { "epoch": 0.039334341906202726, "grad_norm": 0.9068275690078735, "learning_rate": 3.999094062639081e-05, "loss": 0.9677, "step": 182 }, { "epoch": 0.039550464663929114, "grad_norm": 0.9726153612136841, "learning_rate": 3.999051439426732e-05, "loss": 1.1278, "step": 183 }, { "epoch": 0.0397665874216555, "grad_norm": 0.9714052677154541, "learning_rate": 3.999007836683799e-05, "loss": 1.1799, "step": 184 }, { "epoch": 0.03998271017938189, "grad_norm": 0.8894519209861755, "learning_rate": 3.998963254431647e-05, "loss": 1.1082, "step": 185 }, { "epoch": 0.04019883293710828, "grad_norm": 0.993949830532074, "learning_rate": 3.998917692692121e-05, "loss": 1.0954, "step": 186 }, { "epoch": 0.040414955694834664, "grad_norm": 0.993137538433075, "learning_rate": 3.998871151487548e-05, "loss": 1.1753, "step": 187 }, { "epoch": 0.04063107845256105, "grad_norm": 1.0292941331863403, "learning_rate": 3.99882363084073e-05, "loss": 1.0214, "step": 188 }, { "epoch": 0.04084720121028744, "grad_norm": 0.9065425395965576, "learning_rate": 3.9987751307749536e-05, "loss": 1.127, "step": 189 }, { "epoch": 0.041063323968013835, "grad_norm": 1.0782829523086548, "learning_rate": 3.998725651313984e-05, "loss": 0.9114, "step": 190 }, { "epoch": 0.04127944672574022, "grad_norm": 0.9183439016342163, "learning_rate": 3.998675192482065e-05, "loss": 1.0319, "step": 191 }, { "epoch": 0.04149556948346661, "grad_norm": 0.8721674084663391, "learning_rate": 3.998623754303923e-05, "loss": 0.8357, "step": 192 }, { "epoch": 0.041711692241193, "grad_norm": 0.9285215735435486, "learning_rate": 3.998571336804761e-05, "loss": 1.0677, "step": 193 }, { "epoch": 0.041927814998919385, "grad_norm": 0.972098708152771, "learning_rate": 3.9985179400102634e-05, "loss": 0.9023, "step": 194 }, { "epoch": 0.04214393775664577, "grad_norm": 0.9518387913703918, "learning_rate": 3.998463563946596e-05, "loss": 1.082, "step": 195 }, { "epoch": 0.04236006051437216, "grad_norm": 0.9610128998756409, "learning_rate": 3.9984082086404015e-05, "loss": 0.8819, "step": 196 }, { "epoch": 0.04257618327209855, "grad_norm": 0.9028835892677307, "learning_rate": 3.9983518741188046e-05, "loss": 1.0048, "step": 197 }, { "epoch": 0.04279230602982494, "grad_norm": 1.0402621030807495, "learning_rate": 3.998294560409409e-05, "loss": 1.1093, "step": 198 }, { "epoch": 0.04300842878755133, "grad_norm": 0.8937285542488098, "learning_rate": 3.998236267540298e-05, "loss": 0.8782, "step": 199 }, { "epoch": 0.04322455154527772, "grad_norm": 0.9310992360115051, "learning_rate": 3.998176995540035e-05, "loss": 0.8388, "step": 200 }, { "epoch": 0.043440674303004106, "grad_norm": 1.0354390144348145, "learning_rate": 3.998116744437664e-05, "loss": 1.0339, "step": 201 }, { "epoch": 0.043656797060730494, "grad_norm": 0.9959929585456848, "learning_rate": 3.9980555142627065e-05, "loss": 1.0143, "step": 202 }, { "epoch": 0.04387291981845688, "grad_norm": 1.0468930006027222, "learning_rate": 3.9979933050451664e-05, "loss": 1.1142, "step": 203 }, { "epoch": 0.04408904257618327, "grad_norm": 1.0852928161621094, "learning_rate": 3.997930116815525e-05, "loss": 1.0066, "step": 204 }, { "epoch": 0.04430516533390966, "grad_norm": 0.9408144354820251, "learning_rate": 3.9978659496047456e-05, "loss": 1.152, "step": 205 }, { "epoch": 0.04452128809163605, "grad_norm": 0.8921926617622375, "learning_rate": 3.997800803444269e-05, "loss": 1.005, "step": 206 }, { "epoch": 0.04473741084936244, "grad_norm": 0.9611052870750427, "learning_rate": 3.9977346783660165e-05, "loss": 1.1262, "step": 207 }, { "epoch": 0.04495353360708883, "grad_norm": 0.8504266142845154, "learning_rate": 3.99766757440239e-05, "loss": 1.05, "step": 208 }, { "epoch": 0.045169656364815215, "grad_norm": 0.8924556374549866, "learning_rate": 3.99759949158627e-05, "loss": 1.1095, "step": 209 }, { "epoch": 0.0453857791225416, "grad_norm": 1.0046991109848022, "learning_rate": 3.997530429951017e-05, "loss": 0.9707, "step": 210 }, { "epoch": 0.04560190188026799, "grad_norm": 0.9472872614860535, "learning_rate": 3.997460389530471e-05, "loss": 1.1529, "step": 211 }, { "epoch": 0.04581802463799438, "grad_norm": 0.958730161190033, "learning_rate": 3.997389370358951e-05, "loss": 1.0757, "step": 212 }, { "epoch": 0.046034147395720766, "grad_norm": 0.8812927007675171, "learning_rate": 3.997317372471257e-05, "loss": 1.0235, "step": 213 }, { "epoch": 0.04625027015344716, "grad_norm": 1.042219877243042, "learning_rate": 3.997244395902668e-05, "loss": 1.1085, "step": 214 }, { "epoch": 0.04646639291117355, "grad_norm": 0.850281834602356, "learning_rate": 3.997170440688942e-05, "loss": 0.8835, "step": 215 }, { "epoch": 0.046682515668899936, "grad_norm": 0.9123249053955078, "learning_rate": 3.9970955068663165e-05, "loss": 0.8998, "step": 216 }, { "epoch": 0.046898638426626324, "grad_norm": 1.029981017112732, "learning_rate": 3.9970195944715096e-05, "loss": 1.2, "step": 217 }, { "epoch": 0.04711476118435271, "grad_norm": 0.9120750427246094, "learning_rate": 3.9969427035417176e-05, "loss": 0.8322, "step": 218 }, { "epoch": 0.0473308839420791, "grad_norm": 0.9278477430343628, "learning_rate": 3.996864834114617e-05, "loss": 0.9291, "step": 219 }, { "epoch": 0.04754700669980549, "grad_norm": 0.9059831500053406, "learning_rate": 3.996785986228364e-05, "loss": 1.0506, "step": 220 }, { "epoch": 0.047763129457531875, "grad_norm": 0.9443290829658508, "learning_rate": 3.996706159921594e-05, "loss": 0.9087, "step": 221 }, { "epoch": 0.04797925221525827, "grad_norm": 0.9705933332443237, "learning_rate": 3.996625355233421e-05, "loss": 1.074, "step": 222 }, { "epoch": 0.04819537497298466, "grad_norm": 1.1599591970443726, "learning_rate": 3.9965435722034395e-05, "loss": 0.7866, "step": 223 }, { "epoch": 0.048411497730711045, "grad_norm": 1.036673903465271, "learning_rate": 3.996460810871723e-05, "loss": 1.0867, "step": 224 }, { "epoch": 0.04862762048843743, "grad_norm": 1.0065069198608398, "learning_rate": 3.9963770712788244e-05, "loss": 0.8558, "step": 225 }, { "epoch": 0.04884374324616382, "grad_norm": 0.9539085626602173, "learning_rate": 3.996292353465775e-05, "loss": 0.9596, "step": 226 }, { "epoch": 0.04905986600389021, "grad_norm": 1.0341073274612427, "learning_rate": 3.9962066574740886e-05, "loss": 0.9632, "step": 227 }, { "epoch": 0.049275988761616596, "grad_norm": 1.0347282886505127, "learning_rate": 3.996119983345754e-05, "loss": 0.9397, "step": 228 }, { "epoch": 0.049492111519342984, "grad_norm": 1.1618353128433228, "learning_rate": 3.996032331123242e-05, "loss": 0.967, "step": 229 }, { "epoch": 0.04970823427706938, "grad_norm": 1.0670056343078613, "learning_rate": 3.995943700849501e-05, "loss": 1.0497, "step": 230 }, { "epoch": 0.049924357034795766, "grad_norm": 0.999468207359314, "learning_rate": 3.9958540925679614e-05, "loss": 0.9668, "step": 231 }, { "epoch": 0.050140479792522154, "grad_norm": 0.9835057258605957, "learning_rate": 3.9957635063225305e-05, "loss": 0.9974, "step": 232 }, { "epoch": 0.05035660255024854, "grad_norm": 0.9463314414024353, "learning_rate": 3.995671942157594e-05, "loss": 1.0148, "step": 233 }, { "epoch": 0.05057272530797493, "grad_norm": 1.0669755935668945, "learning_rate": 3.99557940011802e-05, "loss": 1.0068, "step": 234 }, { "epoch": 0.05078884806570132, "grad_norm": 0.9666453003883362, "learning_rate": 3.995485880249153e-05, "loss": 0.9449, "step": 235 }, { "epoch": 0.051004970823427705, "grad_norm": 0.899185299873352, "learning_rate": 3.995391382596817e-05, "loss": 1.007, "step": 236 }, { "epoch": 0.05122109358115409, "grad_norm": 0.8701006770133972, "learning_rate": 3.995295907207317e-05, "loss": 0.9585, "step": 237 }, { "epoch": 0.05143721633888049, "grad_norm": 0.9041889309883118, "learning_rate": 3.9951994541274345e-05, "loss": 0.8978, "step": 238 }, { "epoch": 0.051653339096606875, "grad_norm": 1.0119632482528687, "learning_rate": 3.9951020234044316e-05, "loss": 1.0081, "step": 239 }, { "epoch": 0.05186946185433326, "grad_norm": 1.0799907445907593, "learning_rate": 3.995003615086049e-05, "loss": 0.9312, "step": 240 }, { "epoch": 0.05208558461205965, "grad_norm": 1.000360131263733, "learning_rate": 3.994904229220507e-05, "loss": 1.078, "step": 241 }, { "epoch": 0.05230170736978604, "grad_norm": 0.8915219902992249, "learning_rate": 3.994803865856505e-05, "loss": 1.031, "step": 242 }, { "epoch": 0.052517830127512426, "grad_norm": 0.9879536628723145, "learning_rate": 3.994702525043219e-05, "loss": 1.1415, "step": 243 }, { "epoch": 0.05273395288523881, "grad_norm": 0.8250242471694946, "learning_rate": 3.9946002068303076e-05, "loss": 0.983, "step": 244 }, { "epoch": 0.0529500756429652, "grad_norm": 0.9632031917572021, "learning_rate": 3.994496911267905e-05, "loss": 1.0133, "step": 245 }, { "epoch": 0.053166198400691596, "grad_norm": 0.9913688898086548, "learning_rate": 3.9943926384066266e-05, "loss": 0.8823, "step": 246 }, { "epoch": 0.053382321158417984, "grad_norm": 1.0919219255447388, "learning_rate": 3.9942873882975665e-05, "loss": 1.1, "step": 247 }, { "epoch": 0.05359844391614437, "grad_norm": 0.9994152784347534, "learning_rate": 3.9941811609922954e-05, "loss": 1.1517, "step": 248 }, { "epoch": 0.05381456667387076, "grad_norm": 1.039565086364746, "learning_rate": 3.994073956542866e-05, "loss": 1.0937, "step": 249 }, { "epoch": 0.05403068943159715, "grad_norm": 0.9709450006484985, "learning_rate": 3.993965775001807e-05, "loss": 1.016, "step": 250 }, { "epoch": 0.054246812189323534, "grad_norm": 1.1056290864944458, "learning_rate": 3.993856616422128e-05, "loss": 1.0423, "step": 251 }, { "epoch": 0.05446293494704992, "grad_norm": 0.8803907036781311, "learning_rate": 3.9937464808573155e-05, "loss": 1.0442, "step": 252 }, { "epoch": 0.05467905770477631, "grad_norm": 0.8767016530036926, "learning_rate": 3.9936353683613374e-05, "loss": 0.9632, "step": 253 }, { "epoch": 0.054895180462502705, "grad_norm": 0.771586537361145, "learning_rate": 3.993523278988637e-05, "loss": 0.868, "step": 254 }, { "epoch": 0.05511130322022909, "grad_norm": 0.9147107005119324, "learning_rate": 3.993410212794137e-05, "loss": 0.9849, "step": 255 }, { "epoch": 0.05532742597795548, "grad_norm": 0.8793245553970337, "learning_rate": 3.993296169833242e-05, "loss": 1.1097, "step": 256 }, { "epoch": 0.05554354873568187, "grad_norm": 0.9457629323005676, "learning_rate": 3.993181150161832e-05, "loss": 0.9583, "step": 257 }, { "epoch": 0.055759671493408255, "grad_norm": 0.946765661239624, "learning_rate": 3.993065153836265e-05, "loss": 1.0441, "step": 258 }, { "epoch": 0.05597579425113464, "grad_norm": 1.1300455331802368, "learning_rate": 3.9929481809133806e-05, "loss": 1.2199, "step": 259 }, { "epoch": 0.05619191700886103, "grad_norm": 0.9175975322723389, "learning_rate": 3.992830231450494e-05, "loss": 0.9304, "step": 260 }, { "epoch": 0.05640803976658742, "grad_norm": 1.046789526939392, "learning_rate": 3.9927113055054006e-05, "loss": 0.9935, "step": 261 }, { "epoch": 0.05662416252431381, "grad_norm": 0.8311507105827332, "learning_rate": 3.992591403136375e-05, "loss": 1.0011, "step": 262 }, { "epoch": 0.0568402852820402, "grad_norm": 0.9540581703186035, "learning_rate": 3.9924705244021675e-05, "loss": 1.0372, "step": 263 }, { "epoch": 0.05705640803976659, "grad_norm": 0.8871579766273499, "learning_rate": 3.992348669362009e-05, "loss": 0.8266, "step": 264 }, { "epoch": 0.057272530797492976, "grad_norm": 0.9213599562644958, "learning_rate": 3.9922258380756076e-05, "loss": 0.9786, "step": 265 }, { "epoch": 0.057488653555219364, "grad_norm": 0.8515825271606445, "learning_rate": 3.9921020306031514e-05, "loss": 0.9597, "step": 266 }, { "epoch": 0.05770477631294575, "grad_norm": 1.0192275047302246, "learning_rate": 3.991977247005306e-05, "loss": 1.0194, "step": 267 }, { "epoch": 0.05792089907067214, "grad_norm": 0.8933265805244446, "learning_rate": 3.991851487343213e-05, "loss": 0.9933, "step": 268 }, { "epoch": 0.05813702182839853, "grad_norm": 0.8800820112228394, "learning_rate": 3.9917247516784965e-05, "loss": 1.0745, "step": 269 }, { "epoch": 0.05835314458612492, "grad_norm": 1.1516456604003906, "learning_rate": 3.9915970400732554e-05, "loss": 1.2227, "step": 270 }, { "epoch": 0.05856926734385131, "grad_norm": 0.8964235782623291, "learning_rate": 3.991468352590069e-05, "loss": 1.1094, "step": 271 }, { "epoch": 0.0587853901015777, "grad_norm": 0.9254226684570312, "learning_rate": 3.991338689291993e-05, "loss": 1.0838, "step": 272 }, { "epoch": 0.059001512859304085, "grad_norm": 1.0113575458526611, "learning_rate": 3.9912080502425635e-05, "loss": 0.9458, "step": 273 }, { "epoch": 0.05921763561703047, "grad_norm": 0.8866898417472839, "learning_rate": 3.991076435505792e-05, "loss": 1.0424, "step": 274 }, { "epoch": 0.05943375837475686, "grad_norm": 0.8620572090148926, "learning_rate": 3.9909438451461695e-05, "loss": 1.1379, "step": 275 }, { "epoch": 0.05964988113248325, "grad_norm": 0.9906118512153625, "learning_rate": 3.990810279228665e-05, "loss": 1.0947, "step": 276 }, { "epoch": 0.059866003890209636, "grad_norm": 0.8600062727928162, "learning_rate": 3.9906757378187266e-05, "loss": 1.0202, "step": 277 }, { "epoch": 0.06008212664793603, "grad_norm": 0.977017343044281, "learning_rate": 3.990540220982278e-05, "loss": 0.9175, "step": 278 }, { "epoch": 0.06029824940566242, "grad_norm": 0.8460851907730103, "learning_rate": 3.9904037287857226e-05, "loss": 1.0349, "step": 279 }, { "epoch": 0.060514372163388806, "grad_norm": 1.07172429561615, "learning_rate": 3.9902662612959407e-05, "loss": 1.0305, "step": 280 }, { "epoch": 0.060730494921115194, "grad_norm": 0.9060229063034058, "learning_rate": 3.990127818580293e-05, "loss": 0.8089, "step": 281 }, { "epoch": 0.06094661767884158, "grad_norm": 1.1195108890533447, "learning_rate": 3.9899884007066135e-05, "loss": 1.1218, "step": 282 }, { "epoch": 0.06116274043656797, "grad_norm": 0.7674738764762878, "learning_rate": 3.9898480077432184e-05, "loss": 0.7514, "step": 283 }, { "epoch": 0.06137886319429436, "grad_norm": 0.8991888761520386, "learning_rate": 3.9897066397588986e-05, "loss": 0.8423, "step": 284 }, { "epoch": 0.061594985952020745, "grad_norm": 0.8716213703155518, "learning_rate": 3.989564296822925e-05, "loss": 0.7751, "step": 285 }, { "epoch": 0.06181110870974714, "grad_norm": 1.0875698328018188, "learning_rate": 3.9894209790050466e-05, "loss": 1.0288, "step": 286 }, { "epoch": 0.06202723146747353, "grad_norm": 0.9232035875320435, "learning_rate": 3.989276686375486e-05, "loss": 0.9869, "step": 287 }, { "epoch": 0.062243354225199915, "grad_norm": 0.8879022598266602, "learning_rate": 3.989131419004948e-05, "loss": 1.081, "step": 288 }, { "epoch": 0.0624594769829263, "grad_norm": 0.97151118516922, "learning_rate": 3.9889851769646125e-05, "loss": 0.9847, "step": 289 }, { "epoch": 0.06267559974065269, "grad_norm": 1.0698877573013306, "learning_rate": 3.988837960326139e-05, "loss": 1.2144, "step": 290 }, { "epoch": 0.06289172249837909, "grad_norm": 0.9475870132446289, "learning_rate": 3.9886897691616616e-05, "loss": 1.0191, "step": 291 }, { "epoch": 0.06310784525610547, "grad_norm": 0.9550348520278931, "learning_rate": 3.9885406035437953e-05, "loss": 0.9717, "step": 292 }, { "epoch": 0.06332396801383186, "grad_norm": 0.9410080313682556, "learning_rate": 3.98839046354563e-05, "loss": 1.0433, "step": 293 }, { "epoch": 0.06354009077155824, "grad_norm": 0.9392552375793457, "learning_rate": 3.988239349240733e-05, "loss": 1.0808, "step": 294 }, { "epoch": 0.06375621352928464, "grad_norm": 1.015593409538269, "learning_rate": 3.988087260703152e-05, "loss": 1.0099, "step": 295 }, { "epoch": 0.06397233628701102, "grad_norm": 1.0544767379760742, "learning_rate": 3.987934198007409e-05, "loss": 1.0538, "step": 296 }, { "epoch": 0.06418845904473741, "grad_norm": 1.0634684562683105, "learning_rate": 3.987780161228504e-05, "loss": 0.9461, "step": 297 }, { "epoch": 0.0644045818024638, "grad_norm": 0.8911001682281494, "learning_rate": 3.9876251504419154e-05, "loss": 1.0814, "step": 298 }, { "epoch": 0.06462070456019019, "grad_norm": 0.9938652515411377, "learning_rate": 3.987469165723598e-05, "loss": 1.0856, "step": 299 }, { "epoch": 0.06483682731791658, "grad_norm": 1.0032827854156494, "learning_rate": 3.987312207149983e-05, "loss": 1.1615, "step": 300 }, { "epoch": 0.06505295007564296, "grad_norm": 0.9022207856178284, "learning_rate": 3.987154274797981e-05, "loss": 0.9091, "step": 301 }, { "epoch": 0.06526907283336936, "grad_norm": 1.1873313188552856, "learning_rate": 3.986995368744978e-05, "loss": 1.0765, "step": 302 }, { "epoch": 0.06548519559109574, "grad_norm": 0.9208522439002991, "learning_rate": 3.9868354890688375e-05, "loss": 1.1512, "step": 303 }, { "epoch": 0.06570131834882213, "grad_norm": 1.0207360982894897, "learning_rate": 3.9866746358479e-05, "loss": 0.8993, "step": 304 }, { "epoch": 0.06591744110654851, "grad_norm": 0.9842541217803955, "learning_rate": 3.986512809160984e-05, "loss": 1.0306, "step": 305 }, { "epoch": 0.06613356386427491, "grad_norm": 1.0802537202835083, "learning_rate": 3.986350009087384e-05, "loss": 1.1707, "step": 306 }, { "epoch": 0.0663496866220013, "grad_norm": 0.945550799369812, "learning_rate": 3.9861862357068705e-05, "loss": 0.9111, "step": 307 }, { "epoch": 0.06656580937972768, "grad_norm": 0.9740046858787537, "learning_rate": 3.9860214890996925e-05, "loss": 0.9361, "step": 308 }, { "epoch": 0.06678193213745408, "grad_norm": 1.0904077291488647, "learning_rate": 3.9858557693465766e-05, "loss": 0.9004, "step": 309 }, { "epoch": 0.06699805489518046, "grad_norm": 0.935242772102356, "learning_rate": 3.985689076528725e-05, "loss": 1.1445, "step": 310 }, { "epoch": 0.06721417765290685, "grad_norm": 0.8713205456733704, "learning_rate": 3.985521410727815e-05, "loss": 0.794, "step": 311 }, { "epoch": 0.06743030041063323, "grad_norm": 0.9313145279884338, "learning_rate": 3.985352772026004e-05, "loss": 0.9409, "step": 312 }, { "epoch": 0.06764642316835963, "grad_norm": 0.9327392578125, "learning_rate": 3.9851831605059244e-05, "loss": 0.8954, "step": 313 }, { "epoch": 0.06786254592608602, "grad_norm": 1.0644276142120361, "learning_rate": 3.9850125762506853e-05, "loss": 0.9337, "step": 314 }, { "epoch": 0.0680786686838124, "grad_norm": 0.9526025652885437, "learning_rate": 3.984841019343872e-05, "loss": 0.9243, "step": 315 }, { "epoch": 0.0682947914415388, "grad_norm": 1.130817174911499, "learning_rate": 3.9846684898695474e-05, "loss": 0.9321, "step": 316 }, { "epoch": 0.06851091419926518, "grad_norm": 0.9395380616188049, "learning_rate": 3.9844949879122515e-05, "loss": 0.7396, "step": 317 }, { "epoch": 0.06872703695699157, "grad_norm": 0.9533188343048096, "learning_rate": 3.984320513556999e-05, "loss": 1.0038, "step": 318 }, { "epoch": 0.06894315971471796, "grad_norm": 0.9289395809173584, "learning_rate": 3.984145066889281e-05, "loss": 0.9629, "step": 319 }, { "epoch": 0.06915928247244435, "grad_norm": 1.1060194969177246, "learning_rate": 3.983968647995067e-05, "loss": 1.0178, "step": 320 }, { "epoch": 0.06937540523017073, "grad_norm": 0.9341673851013184, "learning_rate": 3.983791256960803e-05, "loss": 0.9548, "step": 321 }, { "epoch": 0.06959152798789713, "grad_norm": 0.9757445454597473, "learning_rate": 3.9836128938734075e-05, "loss": 0.9576, "step": 322 }, { "epoch": 0.06980765074562352, "grad_norm": 0.9519818425178528, "learning_rate": 3.98343355882028e-05, "loss": 1.0839, "step": 323 }, { "epoch": 0.0700237735033499, "grad_norm": 0.8457005620002747, "learning_rate": 3.983253251889294e-05, "loss": 0.9882, "step": 324 }, { "epoch": 0.0702398962610763, "grad_norm": 0.9045811891555786, "learning_rate": 3.983071973168799e-05, "loss": 1.1539, "step": 325 }, { "epoch": 0.07045601901880268, "grad_norm": 0.8618796467781067, "learning_rate": 3.982889722747621e-05, "loss": 0.8228, "step": 326 }, { "epoch": 0.07067214177652907, "grad_norm": 0.912308394908905, "learning_rate": 3.9827065007150626e-05, "loss": 0.9616, "step": 327 }, { "epoch": 0.07088826453425545, "grad_norm": 0.8848555684089661, "learning_rate": 3.982522307160903e-05, "loss": 0.948, "step": 328 }, { "epoch": 0.07110438729198185, "grad_norm": 0.839093029499054, "learning_rate": 3.982337142175396e-05, "loss": 0.9289, "step": 329 }, { "epoch": 0.07132051004970824, "grad_norm": 0.908827543258667, "learning_rate": 3.9821510058492706e-05, "loss": 1.0501, "step": 330 }, { "epoch": 0.07153663280743462, "grad_norm": 0.8893517255783081, "learning_rate": 3.9819638982737354e-05, "loss": 1.0266, "step": 331 }, { "epoch": 0.07175275556516102, "grad_norm": 0.8727594614028931, "learning_rate": 3.981775819540471e-05, "loss": 0.812, "step": 332 }, { "epoch": 0.0719688783228874, "grad_norm": 0.833471417427063, "learning_rate": 3.9815867697416364e-05, "loss": 1.0062, "step": 333 }, { "epoch": 0.07218500108061379, "grad_norm": 0.903219997882843, "learning_rate": 3.981396748969866e-05, "loss": 1.1231, "step": 334 }, { "epoch": 0.07240112383834017, "grad_norm": 0.9188300967216492, "learning_rate": 3.981205757318268e-05, "loss": 0.9167, "step": 335 }, { "epoch": 0.07261724659606657, "grad_norm": 0.9991043210029602, "learning_rate": 3.981013794880429e-05, "loss": 0.8541, "step": 336 }, { "epoch": 0.07283336935379295, "grad_norm": 0.8712428212165833, "learning_rate": 3.9808208617504106e-05, "loss": 0.8442, "step": 337 }, { "epoch": 0.07304949211151934, "grad_norm": 1.0942577123641968, "learning_rate": 3.980626958022748e-05, "loss": 1.0906, "step": 338 }, { "epoch": 0.07326561486924574, "grad_norm": 0.9288346171379089, "learning_rate": 3.9804320837924554e-05, "loss": 1.0523, "step": 339 }, { "epoch": 0.07348173762697212, "grad_norm": 1.0512988567352295, "learning_rate": 3.9802362391550195e-05, "loss": 1.0331, "step": 340 }, { "epoch": 0.07369786038469851, "grad_norm": 0.9598914384841919, "learning_rate": 3.980039424206404e-05, "loss": 1.0145, "step": 341 }, { "epoch": 0.0739139831424249, "grad_norm": 0.929222822189331, "learning_rate": 3.9798416390430485e-05, "loss": 0.9458, "step": 342 }, { "epoch": 0.07413010590015129, "grad_norm": 0.9733883738517761, "learning_rate": 3.979642883761866e-05, "loss": 1.0217, "step": 343 }, { "epoch": 0.07434622865787767, "grad_norm": 0.9186878204345703, "learning_rate": 3.9794431584602466e-05, "loss": 1.1528, "step": 344 }, { "epoch": 0.07456235141560406, "grad_norm": 0.8842979073524475, "learning_rate": 3.979242463236055e-05, "loss": 0.9351, "step": 345 }, { "epoch": 0.07477847417333044, "grad_norm": 0.9121730327606201, "learning_rate": 3.979040798187633e-05, "loss": 1.0886, "step": 346 }, { "epoch": 0.07499459693105684, "grad_norm": 0.8223870992660522, "learning_rate": 3.978838163413793e-05, "loss": 0.8619, "step": 347 }, { "epoch": 0.07521071968878323, "grad_norm": 1.1758759021759033, "learning_rate": 3.978634559013829e-05, "loss": 0.8168, "step": 348 }, { "epoch": 0.07542684244650961, "grad_norm": 0.948322057723999, "learning_rate": 3.978429985087504e-05, "loss": 0.8581, "step": 349 }, { "epoch": 0.07564296520423601, "grad_norm": 0.8527007102966309, "learning_rate": 3.97822444173506e-05, "loss": 1.0509, "step": 350 }, { "epoch": 0.07585908796196239, "grad_norm": 0.8772498965263367, "learning_rate": 3.978017929057213e-05, "loss": 0.8964, "step": 351 }, { "epoch": 0.07607521071968879, "grad_norm": 1.080377459526062, "learning_rate": 3.977810447155154e-05, "loss": 1.118, "step": 352 }, { "epoch": 0.07629133347741517, "grad_norm": 0.9243336319923401, "learning_rate": 3.977601996130546e-05, "loss": 1.0803, "step": 353 }, { "epoch": 0.07650745623514156, "grad_norm": 0.8424449563026428, "learning_rate": 3.9773925760855324e-05, "loss": 1.0326, "step": 354 }, { "epoch": 0.07672357899286796, "grad_norm": 0.9570095539093018, "learning_rate": 3.977182187122728e-05, "loss": 0.8873, "step": 355 }, { "epoch": 0.07693970175059434, "grad_norm": 0.7843940854072571, "learning_rate": 3.976970829345222e-05, "loss": 0.9296, "step": 356 }, { "epoch": 0.07715582450832073, "grad_norm": 0.8542441725730896, "learning_rate": 3.97675850285658e-05, "loss": 0.861, "step": 357 }, { "epoch": 0.07737194726604711, "grad_norm": 1.0213744640350342, "learning_rate": 3.9765452077608404e-05, "loss": 0.9373, "step": 358 }, { "epoch": 0.0775880700237735, "grad_norm": 0.9188033938407898, "learning_rate": 3.976330944162519e-05, "loss": 1.0499, "step": 359 }, { "epoch": 0.07780419278149989, "grad_norm": 0.911727249622345, "learning_rate": 3.9761157121666034e-05, "loss": 0.995, "step": 360 }, { "epoch": 0.07802031553922628, "grad_norm": 0.8088400363922119, "learning_rate": 3.9758995118785566e-05, "loss": 1.1295, "step": 361 }, { "epoch": 0.07823643829695266, "grad_norm": 0.8860215544700623, "learning_rate": 3.975682343404317e-05, "loss": 1.0503, "step": 362 }, { "epoch": 0.07845256105467906, "grad_norm": 0.8325342535972595, "learning_rate": 3.975464206850296e-05, "loss": 0.8244, "step": 363 }, { "epoch": 0.07866868381240545, "grad_norm": 0.9521036744117737, "learning_rate": 3.9752451023233804e-05, "loss": 1.0406, "step": 364 }, { "epoch": 0.07888480657013183, "grad_norm": 0.9645699858665466, "learning_rate": 3.975025029930931e-05, "loss": 0.9931, "step": 365 }, { "epoch": 0.07910092932785823, "grad_norm": 1.0489739179611206, "learning_rate": 3.974803989780782e-05, "loss": 0.8751, "step": 366 }, { "epoch": 0.07931705208558461, "grad_norm": 1.0184046030044556, "learning_rate": 3.974581981981243e-05, "loss": 1.1351, "step": 367 }, { "epoch": 0.079533174843311, "grad_norm": 0.84317547082901, "learning_rate": 3.9743590066410984e-05, "loss": 0.8186, "step": 368 }, { "epoch": 0.07974929760103738, "grad_norm": 0.9043307304382324, "learning_rate": 3.9741350638696034e-05, "loss": 0.903, "step": 369 }, { "epoch": 0.07996542035876378, "grad_norm": 0.843694806098938, "learning_rate": 3.973910153776492e-05, "loss": 1.0774, "step": 370 }, { "epoch": 0.08018154311649017, "grad_norm": 0.8882518410682678, "learning_rate": 3.973684276471967e-05, "loss": 0.9999, "step": 371 }, { "epoch": 0.08039766587421655, "grad_norm": 0.997040331363678, "learning_rate": 3.97345743206671e-05, "loss": 1.201, "step": 372 }, { "epoch": 0.08061378863194295, "grad_norm": 0.7990419268608093, "learning_rate": 3.9732296206718724e-05, "loss": 0.9738, "step": 373 }, { "epoch": 0.08082991138966933, "grad_norm": 0.9458608627319336, "learning_rate": 3.973000842399082e-05, "loss": 0.9205, "step": 374 }, { "epoch": 0.08104603414739572, "grad_norm": 0.9523917436599731, "learning_rate": 3.9727710973604406e-05, "loss": 0.9653, "step": 375 }, { "epoch": 0.0812621569051221, "grad_norm": 1.1069090366363525, "learning_rate": 3.972540385668522e-05, "loss": 0.907, "step": 376 }, { "epoch": 0.0814782796628485, "grad_norm": 1.0182411670684814, "learning_rate": 3.972308707436374e-05, "loss": 1.0174, "step": 377 }, { "epoch": 0.08169440242057488, "grad_norm": 0.9523218870162964, "learning_rate": 3.972076062777518e-05, "loss": 0.9735, "step": 378 }, { "epoch": 0.08191052517830127, "grad_norm": 0.9677371382713318, "learning_rate": 3.97184245180595e-05, "loss": 0.9037, "step": 379 }, { "epoch": 0.08212664793602767, "grad_norm": 0.9076384902000427, "learning_rate": 3.97160787463614e-05, "loss": 1.0213, "step": 380 }, { "epoch": 0.08234277069375405, "grad_norm": 0.8701322674751282, "learning_rate": 3.971372331383028e-05, "loss": 0.9872, "step": 381 }, { "epoch": 0.08255889345148044, "grad_norm": 0.841193437576294, "learning_rate": 3.9711358221620315e-05, "loss": 0.7858, "step": 382 }, { "epoch": 0.08277501620920683, "grad_norm": 0.9040749669075012, "learning_rate": 3.9708983470890385e-05, "loss": 0.9726, "step": 383 }, { "epoch": 0.08299113896693322, "grad_norm": 0.8368477821350098, "learning_rate": 3.970659906280411e-05, "loss": 1.0165, "step": 384 }, { "epoch": 0.0832072617246596, "grad_norm": 0.9723634123802185, "learning_rate": 3.970420499852986e-05, "loss": 1.004, "step": 385 }, { "epoch": 0.083423384482386, "grad_norm": 0.8730185031890869, "learning_rate": 3.9701801279240704e-05, "loss": 0.9924, "step": 386 }, { "epoch": 0.08363950724011239, "grad_norm": 0.869093120098114, "learning_rate": 3.969938790611447e-05, "loss": 0.9814, "step": 387 }, { "epoch": 0.08385562999783877, "grad_norm": 0.9956414103507996, "learning_rate": 3.969696488033369e-05, "loss": 1.0197, "step": 388 }, { "epoch": 0.08407175275556517, "grad_norm": 1.1042028665542603, "learning_rate": 3.969453220308566e-05, "loss": 1.0194, "step": 389 }, { "epoch": 0.08428787551329155, "grad_norm": 0.9331915378570557, "learning_rate": 3.9692089875562374e-05, "loss": 0.8877, "step": 390 }, { "epoch": 0.08450399827101794, "grad_norm": 0.9793432950973511, "learning_rate": 3.968963789896057e-05, "loss": 1.0923, "step": 391 }, { "epoch": 0.08472012102874432, "grad_norm": 0.9753433465957642, "learning_rate": 3.968717627448172e-05, "loss": 1.1249, "step": 392 }, { "epoch": 0.08493624378647072, "grad_norm": 1.0509642362594604, "learning_rate": 3.9684705003331994e-05, "loss": 0.9646, "step": 393 }, { "epoch": 0.0851523665441971, "grad_norm": 0.9101366400718689, "learning_rate": 3.968222408672232e-05, "loss": 1.0473, "step": 394 }, { "epoch": 0.08536848930192349, "grad_norm": 0.9615835547447205, "learning_rate": 3.967973352586835e-05, "loss": 0.9445, "step": 395 }, { "epoch": 0.08558461205964989, "grad_norm": 0.885998010635376, "learning_rate": 3.967723332199045e-05, "loss": 0.7974, "step": 396 }, { "epoch": 0.08580073481737627, "grad_norm": 0.9641144871711731, "learning_rate": 3.96747234763137e-05, "loss": 1.0299, "step": 397 }, { "epoch": 0.08601685757510266, "grad_norm": 1.0174018144607544, "learning_rate": 3.967220399006793e-05, "loss": 0.9633, "step": 398 }, { "epoch": 0.08623298033282904, "grad_norm": 0.8235479593276978, "learning_rate": 3.966967486448768e-05, "loss": 0.8539, "step": 399 }, { "epoch": 0.08644910309055544, "grad_norm": 0.9421714544296265, "learning_rate": 3.966713610081222e-05, "loss": 1.2542, "step": 400 }, { "epoch": 0.08666522584828182, "grad_norm": 0.9316366314888, "learning_rate": 3.966458770028552e-05, "loss": 1.106, "step": 401 }, { "epoch": 0.08688134860600821, "grad_norm": 0.9297628402709961, "learning_rate": 3.9662029664156325e-05, "loss": 1.0657, "step": 402 }, { "epoch": 0.08709747136373461, "grad_norm": 1.0702085494995117, "learning_rate": 3.965946199367804e-05, "loss": 0.9823, "step": 403 }, { "epoch": 0.08731359412146099, "grad_norm": 0.9327825903892517, "learning_rate": 3.9656884690108824e-05, "loss": 0.9744, "step": 404 }, { "epoch": 0.08752971687918738, "grad_norm": 0.9128959774971008, "learning_rate": 3.965429775471155e-05, "loss": 0.8884, "step": 405 }, { "epoch": 0.08774583963691376, "grad_norm": 0.8637864589691162, "learning_rate": 3.9651701188753806e-05, "loss": 0.9057, "step": 406 }, { "epoch": 0.08796196239464016, "grad_norm": 0.9656795263290405, "learning_rate": 3.9649094993507905e-05, "loss": 1.0883, "step": 407 }, { "epoch": 0.08817808515236654, "grad_norm": 0.8900482058525085, "learning_rate": 3.964647917025089e-05, "loss": 0.8608, "step": 408 }, { "epoch": 0.08839420791009293, "grad_norm": 0.907908022403717, "learning_rate": 3.964385372026449e-05, "loss": 0.8995, "step": 409 }, { "epoch": 0.08861033066781931, "grad_norm": 0.9215573072433472, "learning_rate": 3.964121864483518e-05, "loss": 0.9278, "step": 410 }, { "epoch": 0.08882645342554571, "grad_norm": 0.8984443545341492, "learning_rate": 3.963857394525413e-05, "loss": 0.9938, "step": 411 }, { "epoch": 0.0890425761832721, "grad_norm": 0.9193916320800781, "learning_rate": 3.963591962281726e-05, "loss": 1.0218, "step": 412 }, { "epoch": 0.08925869894099848, "grad_norm": 0.8618556261062622, "learning_rate": 3.9633255678825156e-05, "loss": 1.0636, "step": 413 }, { "epoch": 0.08947482169872488, "grad_norm": 1.1572033166885376, "learning_rate": 3.9630582114583165e-05, "loss": 0.9855, "step": 414 }, { "epoch": 0.08969094445645126, "grad_norm": 0.8636722564697266, "learning_rate": 3.9627898931401307e-05, "loss": 1.1296, "step": 415 }, { "epoch": 0.08990706721417766, "grad_norm": 0.9317108988761902, "learning_rate": 3.9625206130594345e-05, "loss": 1.076, "step": 416 }, { "epoch": 0.09012318997190404, "grad_norm": 0.9154568910598755, "learning_rate": 3.962250371348175e-05, "loss": 1.0057, "step": 417 }, { "epoch": 0.09033931272963043, "grad_norm": 1.0209555625915527, "learning_rate": 3.961979168138769e-05, "loss": 1.0806, "step": 418 }, { "epoch": 0.09055543548735683, "grad_norm": 0.9097704291343689, "learning_rate": 3.9617070035641075e-05, "loss": 0.8494, "step": 419 }, { "epoch": 0.0907715582450832, "grad_norm": 0.8296076655387878, "learning_rate": 3.961433877757548e-05, "loss": 1.0176, "step": 420 }, { "epoch": 0.0909876810028096, "grad_norm": 0.8262540698051453, "learning_rate": 3.9611597908529224e-05, "loss": 0.863, "step": 421 }, { "epoch": 0.09120380376053598, "grad_norm": 1.1728832721710205, "learning_rate": 3.9608847429845336e-05, "loss": 0.968, "step": 422 }, { "epoch": 0.09141992651826238, "grad_norm": 0.7996010780334473, "learning_rate": 3.960608734287153e-05, "loss": 0.8249, "step": 423 }, { "epoch": 0.09163604927598876, "grad_norm": 0.929660975933075, "learning_rate": 3.960331764896025e-05, "loss": 0.8146, "step": 424 }, { "epoch": 0.09185217203371515, "grad_norm": 0.8653681874275208, "learning_rate": 3.960053834946864e-05, "loss": 0.8004, "step": 425 }, { "epoch": 0.09206829479144153, "grad_norm": 0.8965323567390442, "learning_rate": 3.9597749445758545e-05, "loss": 0.9186, "step": 426 }, { "epoch": 0.09228441754916793, "grad_norm": 1.086283564567566, "learning_rate": 3.9594950939196535e-05, "loss": 0.947, "step": 427 }, { "epoch": 0.09250054030689432, "grad_norm": 0.8903186321258545, "learning_rate": 3.959214283115385e-05, "loss": 0.7717, "step": 428 }, { "epoch": 0.0927166630646207, "grad_norm": 0.9546589255332947, "learning_rate": 3.9589325123006476e-05, "loss": 1.2013, "step": 429 }, { "epoch": 0.0929327858223471, "grad_norm": 0.9602444171905518, "learning_rate": 3.958649781613507e-05, "loss": 1.0055, "step": 430 }, { "epoch": 0.09314890858007348, "grad_norm": 0.8137685060501099, "learning_rate": 3.958366091192502e-05, "loss": 0.8361, "step": 431 }, { "epoch": 0.09336503133779987, "grad_norm": 1.2224217653274536, "learning_rate": 3.958081441176639e-05, "loss": 1.0727, "step": 432 }, { "epoch": 0.09358115409552625, "grad_norm": 0.9968867301940918, "learning_rate": 3.957795831705396e-05, "loss": 0.947, "step": 433 }, { "epoch": 0.09379727685325265, "grad_norm": 0.954623281955719, "learning_rate": 3.957509262918721e-05, "loss": 0.9429, "step": 434 }, { "epoch": 0.09401339961097904, "grad_norm": 0.9548289179801941, "learning_rate": 3.957221734957032e-05, "loss": 1.0949, "step": 435 }, { "epoch": 0.09422952236870542, "grad_norm": 0.8827535510063171, "learning_rate": 3.956933247961218e-05, "loss": 0.9591, "step": 436 }, { "epoch": 0.09444564512643182, "grad_norm": 0.9374966025352478, "learning_rate": 3.9566438020726366e-05, "loss": 1.1719, "step": 437 }, { "epoch": 0.0946617678841582, "grad_norm": 1.0943764448165894, "learning_rate": 3.956353397433115e-05, "loss": 0.8931, "step": 438 }, { "epoch": 0.0948778906418846, "grad_norm": 0.9052419066429138, "learning_rate": 3.9560620341849504e-05, "loss": 0.9404, "step": 439 }, { "epoch": 0.09509401339961097, "grad_norm": 0.8661943674087524, "learning_rate": 3.955769712470912e-05, "loss": 0.8814, "step": 440 }, { "epoch": 0.09531013615733737, "grad_norm": 1.0524399280548096, "learning_rate": 3.9554764324342344e-05, "loss": 1.124, "step": 441 }, { "epoch": 0.09552625891506375, "grad_norm": 0.8547698259353638, "learning_rate": 3.9551821942186264e-05, "loss": 1.0171, "step": 442 }, { "epoch": 0.09574238167279014, "grad_norm": 0.7743398547172546, "learning_rate": 3.954886997968262e-05, "loss": 0.9985, "step": 443 }, { "epoch": 0.09595850443051654, "grad_norm": 0.8728423118591309, "learning_rate": 3.9545908438277885e-05, "loss": 0.9533, "step": 444 }, { "epoch": 0.09617462718824292, "grad_norm": 0.8962077498435974, "learning_rate": 3.954293731942319e-05, "loss": 0.936, "step": 445 }, { "epoch": 0.09639074994596931, "grad_norm": 0.7698949575424194, "learning_rate": 3.953995662457439e-05, "loss": 0.8866, "step": 446 }, { "epoch": 0.0966068727036957, "grad_norm": 0.9615756273269653, "learning_rate": 3.9536966355192016e-05, "loss": 0.8821, "step": 447 }, { "epoch": 0.09682299546142209, "grad_norm": 1.023740291595459, "learning_rate": 3.953396651274129e-05, "loss": 1.214, "step": 448 }, { "epoch": 0.09703911821914847, "grad_norm": 0.912865161895752, "learning_rate": 3.9530957098692126e-05, "loss": 1.0308, "step": 449 }, { "epoch": 0.09725524097687487, "grad_norm": 0.8043718338012695, "learning_rate": 3.9527938114519134e-05, "loss": 1.1519, "step": 450 }, { "epoch": 0.09747136373460126, "grad_norm": 0.7729007601737976, "learning_rate": 3.9524909561701615e-05, "loss": 0.9035, "step": 451 }, { "epoch": 0.09768748649232764, "grad_norm": 0.9104475378990173, "learning_rate": 3.9521871441723534e-05, "loss": 0.9637, "step": 452 }, { "epoch": 0.09790360925005404, "grad_norm": 0.9340957403182983, "learning_rate": 3.951882375607358e-05, "loss": 1.0346, "step": 453 }, { "epoch": 0.09811973200778042, "grad_norm": 0.8758137822151184, "learning_rate": 3.95157665062451e-05, "loss": 0.9313, "step": 454 }, { "epoch": 0.09833585476550681, "grad_norm": 1.1035685539245605, "learning_rate": 3.951269969373616e-05, "loss": 1.0212, "step": 455 }, { "epoch": 0.09855197752323319, "grad_norm": 0.9677191376686096, "learning_rate": 3.950962332004947e-05, "loss": 0.972, "step": 456 }, { "epoch": 0.09876810028095959, "grad_norm": 0.9146068096160889, "learning_rate": 3.9506537386692455e-05, "loss": 0.8206, "step": 457 }, { "epoch": 0.09898422303868597, "grad_norm": 1.0270830392837524, "learning_rate": 3.9503441895177206e-05, "loss": 0.9351, "step": 458 }, { "epoch": 0.09920034579641236, "grad_norm": 0.8653499484062195, "learning_rate": 3.950033684702051e-05, "loss": 0.864, "step": 459 }, { "epoch": 0.09941646855413876, "grad_norm": 0.8095022439956665, "learning_rate": 3.949722224374385e-05, "loss": 0.8414, "step": 460 }, { "epoch": 0.09963259131186514, "grad_norm": 1.0095607042312622, "learning_rate": 3.949409808687336e-05, "loss": 0.903, "step": 461 }, { "epoch": 0.09984871406959153, "grad_norm": 0.8256875872612, "learning_rate": 3.949096437793986e-05, "loss": 1.0661, "step": 462 }, { "epoch": 0.10006483682731791, "grad_norm": 0.8819296360015869, "learning_rate": 3.9487821118478885e-05, "loss": 1.1813, "step": 463 }, { "epoch": 0.10028095958504431, "grad_norm": 0.8339661955833435, "learning_rate": 3.9484668310030604e-05, "loss": 1.028, "step": 464 }, { "epoch": 0.10049708234277069, "grad_norm": 1.0233263969421387, "learning_rate": 3.9481505954139896e-05, "loss": 0.9629, "step": 465 }, { "epoch": 0.10071320510049708, "grad_norm": 1.049281358718872, "learning_rate": 3.94783340523563e-05, "loss": 0.9291, "step": 466 }, { "epoch": 0.10092932785822348, "grad_norm": 0.8857412934303284, "learning_rate": 3.947515260623405e-05, "loss": 0.9623, "step": 467 }, { "epoch": 0.10114545061594986, "grad_norm": 0.9321138858795166, "learning_rate": 3.947196161733205e-05, "loss": 1.0166, "step": 468 }, { "epoch": 0.10136157337367625, "grad_norm": 0.9237700700759888, "learning_rate": 3.9468761087213864e-05, "loss": 1.0322, "step": 469 }, { "epoch": 0.10157769613140263, "grad_norm": 1.0547393560409546, "learning_rate": 3.946555101744775e-05, "loss": 0.8755, "step": 470 }, { "epoch": 0.10179381888912903, "grad_norm": 0.9208168983459473, "learning_rate": 3.9462331409606636e-05, "loss": 0.9245, "step": 471 }, { "epoch": 0.10200994164685541, "grad_norm": 0.9916532039642334, "learning_rate": 3.9459102265268115e-05, "loss": 1.1441, "step": 472 }, { "epoch": 0.1022260644045818, "grad_norm": 0.9643540382385254, "learning_rate": 3.9455863586014474e-05, "loss": 1.1193, "step": 473 }, { "epoch": 0.10244218716230818, "grad_norm": 1.0929360389709473, "learning_rate": 3.9452615373432645e-05, "loss": 1.0725, "step": 474 }, { "epoch": 0.10265830992003458, "grad_norm": 0.8778156042098999, "learning_rate": 3.944935762911426e-05, "loss": 0.8897, "step": 475 }, { "epoch": 0.10287443267776097, "grad_norm": 0.9703811407089233, "learning_rate": 3.9446090354655596e-05, "loss": 0.9387, "step": 476 }, { "epoch": 0.10309055543548735, "grad_norm": 0.9691447615623474, "learning_rate": 3.944281355165761e-05, "loss": 0.9189, "step": 477 }, { "epoch": 0.10330667819321375, "grad_norm": 0.7944625020027161, "learning_rate": 3.943952722172592e-05, "loss": 0.9113, "step": 478 }, { "epoch": 0.10352280095094013, "grad_norm": 0.9441581964492798, "learning_rate": 3.9436231366470836e-05, "loss": 0.9868, "step": 479 }, { "epoch": 0.10373892370866653, "grad_norm": 0.9912706613540649, "learning_rate": 3.943292598750732e-05, "loss": 1.0999, "step": 480 }, { "epoch": 0.1039550464663929, "grad_norm": 0.9922301173210144, "learning_rate": 3.9429611086454975e-05, "loss": 0.882, "step": 481 }, { "epoch": 0.1041711692241193, "grad_norm": 0.9965765476226807, "learning_rate": 3.9426286664938123e-05, "loss": 1.0958, "step": 482 }, { "epoch": 0.1043872919818457, "grad_norm": 1.0244691371917725, "learning_rate": 3.942295272458571e-05, "loss": 1.0144, "step": 483 }, { "epoch": 0.10460341473957208, "grad_norm": 1.0300536155700684, "learning_rate": 3.941960926703137e-05, "loss": 0.912, "step": 484 }, { "epoch": 0.10481953749729847, "grad_norm": 1.0701106786727905, "learning_rate": 3.9416256293913376e-05, "loss": 0.838, "step": 485 }, { "epoch": 0.10503566025502485, "grad_norm": 0.9118524193763733, "learning_rate": 3.941289380687468e-05, "loss": 0.9516, "step": 486 }, { "epoch": 0.10525178301275125, "grad_norm": 0.8736467957496643, "learning_rate": 3.940952180756289e-05, "loss": 0.9668, "step": 487 }, { "epoch": 0.10546790577047763, "grad_norm": 0.9006360173225403, "learning_rate": 3.9406140297630286e-05, "loss": 0.7392, "step": 488 }, { "epoch": 0.10568402852820402, "grad_norm": 0.8101032376289368, "learning_rate": 3.94027492787338e-05, "loss": 0.881, "step": 489 }, { "epoch": 0.1059001512859304, "grad_norm": 0.9181492924690247, "learning_rate": 3.939934875253502e-05, "loss": 0.8168, "step": 490 }, { "epoch": 0.1061162740436568, "grad_norm": 0.9278901219367981, "learning_rate": 3.9395938720700196e-05, "loss": 0.8411, "step": 491 }, { "epoch": 0.10633239680138319, "grad_norm": 0.958052396774292, "learning_rate": 3.939251918490023e-05, "loss": 0.8052, "step": 492 }, { "epoch": 0.10654851955910957, "grad_norm": 0.934644341468811, "learning_rate": 3.9389090146810696e-05, "loss": 0.9906, "step": 493 }, { "epoch": 0.10676464231683597, "grad_norm": 0.888184130191803, "learning_rate": 3.938565160811181e-05, "loss": 1.0343, "step": 494 }, { "epoch": 0.10698076507456235, "grad_norm": 1.039035677909851, "learning_rate": 3.938220357048845e-05, "loss": 1.0156, "step": 495 }, { "epoch": 0.10719688783228874, "grad_norm": 0.9711042642593384, "learning_rate": 3.937874603563015e-05, "loss": 1.0355, "step": 496 }, { "epoch": 0.10741301059001512, "grad_norm": 0.8338973522186279, "learning_rate": 3.9375279005231084e-05, "loss": 0.8755, "step": 497 }, { "epoch": 0.10762913334774152, "grad_norm": 0.9922994375228882, "learning_rate": 3.93718024809901e-05, "loss": 1.0939, "step": 498 }, { "epoch": 0.10784525610546791, "grad_norm": 1.0171689987182617, "learning_rate": 3.936831646461068e-05, "loss": 1.0244, "step": 499 }, { "epoch": 0.1080613788631943, "grad_norm": 0.8213376402854919, "learning_rate": 3.936482095780096e-05, "loss": 1.12, "step": 500 }, { "epoch": 0.10827750162092069, "grad_norm": 1.0276782512664795, "learning_rate": 3.936131596227374e-05, "loss": 1.0105, "step": 501 }, { "epoch": 0.10849362437864707, "grad_norm": 0.9741190671920776, "learning_rate": 3.935780147974646e-05, "loss": 0.8942, "step": 502 }, { "epoch": 0.10870974713637346, "grad_norm": 0.9998877644538879, "learning_rate": 3.935427751194119e-05, "loss": 1.0869, "step": 503 }, { "epoch": 0.10892586989409984, "grad_norm": 0.8405811190605164, "learning_rate": 3.935074406058469e-05, "loss": 0.9447, "step": 504 }, { "epoch": 0.10914199265182624, "grad_norm": 0.9247153401374817, "learning_rate": 3.9347201127408335e-05, "loss": 0.9206, "step": 505 }, { "epoch": 0.10935811540955262, "grad_norm": 0.8571478128433228, "learning_rate": 3.934364871414815e-05, "loss": 0.9402, "step": 506 }, { "epoch": 0.10957423816727901, "grad_norm": 0.9175286293029785, "learning_rate": 3.9340086822544806e-05, "loss": 0.854, "step": 507 }, { "epoch": 0.10979036092500541, "grad_norm": 1.0318107604980469, "learning_rate": 3.9336515454343634e-05, "loss": 1.066, "step": 508 }, { "epoch": 0.11000648368273179, "grad_norm": 0.9785745143890381, "learning_rate": 3.9332934611294575e-05, "loss": 0.9719, "step": 509 }, { "epoch": 0.11022260644045818, "grad_norm": 0.9880504012107849, "learning_rate": 3.932934429515226e-05, "loss": 1.005, "step": 510 }, { "epoch": 0.11043872919818457, "grad_norm": 0.8954070806503296, "learning_rate": 3.932574450767592e-05, "loss": 0.9407, "step": 511 }, { "epoch": 0.11065485195591096, "grad_norm": 0.9446602463722229, "learning_rate": 3.932213525062945e-05, "loss": 1.1116, "step": 512 }, { "epoch": 0.11087097471363734, "grad_norm": 0.9656209349632263, "learning_rate": 3.931851652578137e-05, "loss": 0.9174, "step": 513 }, { "epoch": 0.11108709747136374, "grad_norm": 0.9980925917625427, "learning_rate": 3.9314888334904846e-05, "loss": 0.975, "step": 514 }, { "epoch": 0.11130322022909012, "grad_norm": 0.9334444403648376, "learning_rate": 3.9311250679777694e-05, "loss": 1.0594, "step": 515 }, { "epoch": 0.11151934298681651, "grad_norm": 1.04780912399292, "learning_rate": 3.930760356218235e-05, "loss": 0.9406, "step": 516 }, { "epoch": 0.1117354657445429, "grad_norm": 0.9021848440170288, "learning_rate": 3.930394698390589e-05, "loss": 0.9542, "step": 517 }, { "epoch": 0.11195158850226929, "grad_norm": 0.9023771286010742, "learning_rate": 3.9300280946740034e-05, "loss": 1.1127, "step": 518 }, { "epoch": 0.11216771125999568, "grad_norm": 0.8352866768836975, "learning_rate": 3.929660545248113e-05, "loss": 0.7962, "step": 519 }, { "epoch": 0.11238383401772206, "grad_norm": 0.8192297220230103, "learning_rate": 3.929292050293016e-05, "loss": 0.7481, "step": 520 }, { "epoch": 0.11259995677544846, "grad_norm": 0.8999636769294739, "learning_rate": 3.9289226099892745e-05, "loss": 0.975, "step": 521 }, { "epoch": 0.11281607953317484, "grad_norm": 1.027186393737793, "learning_rate": 3.928552224517913e-05, "loss": 0.9635, "step": 522 }, { "epoch": 0.11303220229090123, "grad_norm": 0.9493417739868164, "learning_rate": 3.92818089406042e-05, "loss": 0.8414, "step": 523 }, { "epoch": 0.11324832504862763, "grad_norm": 1.0801202058792114, "learning_rate": 3.927808618798746e-05, "loss": 0.8554, "step": 524 }, { "epoch": 0.11346444780635401, "grad_norm": 0.9390546679496765, "learning_rate": 3.9274353989153044e-05, "loss": 0.9667, "step": 525 }, { "epoch": 0.1136805705640804, "grad_norm": 0.9530695080757141, "learning_rate": 3.927061234592974e-05, "loss": 0.8776, "step": 526 }, { "epoch": 0.11389669332180678, "grad_norm": 0.860176682472229, "learning_rate": 3.926686126015093e-05, "loss": 1.0035, "step": 527 }, { "epoch": 0.11411281607953318, "grad_norm": 0.9042350649833679, "learning_rate": 3.926310073365464e-05, "loss": 1.0282, "step": 528 }, { "epoch": 0.11432893883725956, "grad_norm": 0.8940215110778809, "learning_rate": 3.9259330768283516e-05, "loss": 0.9375, "step": 529 }, { "epoch": 0.11454506159498595, "grad_norm": 0.9392261505126953, "learning_rate": 3.925555136588484e-05, "loss": 0.9435, "step": 530 }, { "epoch": 0.11476118435271233, "grad_norm": 0.8384535312652588, "learning_rate": 3.9251762528310514e-05, "loss": 0.709, "step": 531 }, { "epoch": 0.11497730711043873, "grad_norm": 0.9484767913818359, "learning_rate": 3.924796425741705e-05, "loss": 0.9147, "step": 532 }, { "epoch": 0.11519342986816512, "grad_norm": 0.9432885646820068, "learning_rate": 3.9244156555065595e-05, "loss": 1.1317, "step": 533 }, { "epoch": 0.1154095526258915, "grad_norm": 1.028935194015503, "learning_rate": 3.924033942312191e-05, "loss": 1.0019, "step": 534 }, { "epoch": 0.1156256753836179, "grad_norm": 0.8583811521530151, "learning_rate": 3.923651286345638e-05, "loss": 0.9307, "step": 535 }, { "epoch": 0.11584179814134428, "grad_norm": 1.0542842149734497, "learning_rate": 3.923267687794403e-05, "loss": 1.171, "step": 536 }, { "epoch": 0.11605792089907067, "grad_norm": 0.9055013656616211, "learning_rate": 3.9228831468464464e-05, "loss": 0.8571, "step": 537 }, { "epoch": 0.11627404365679705, "grad_norm": 1.1255946159362793, "learning_rate": 3.922497663690192e-05, "loss": 1.0053, "step": 538 }, { "epoch": 0.11649016641452345, "grad_norm": 0.9034440517425537, "learning_rate": 3.9221112385145274e-05, "loss": 0.9113, "step": 539 }, { "epoch": 0.11670628917224984, "grad_norm": 0.96932053565979, "learning_rate": 3.921723871508799e-05, "loss": 0.9796, "step": 540 }, { "epoch": 0.11692241192997622, "grad_norm": 0.88820481300354, "learning_rate": 3.921335562862816e-05, "loss": 0.9076, "step": 541 }, { "epoch": 0.11713853468770262, "grad_norm": 1.00152587890625, "learning_rate": 3.920946312766848e-05, "loss": 1.1389, "step": 542 }, { "epoch": 0.117354657445429, "grad_norm": 1.0847313404083252, "learning_rate": 3.920556121411628e-05, "loss": 1.1365, "step": 543 }, { "epoch": 0.1175707802031554, "grad_norm": 1.0765568017959595, "learning_rate": 3.9201649889883485e-05, "loss": 1.0681, "step": 544 }, { "epoch": 0.11778690296088178, "grad_norm": 1.0710378885269165, "learning_rate": 3.919772915688663e-05, "loss": 1.3251, "step": 545 }, { "epoch": 0.11800302571860817, "grad_norm": 0.9251645803451538, "learning_rate": 3.9193799017046865e-05, "loss": 0.9097, "step": 546 }, { "epoch": 0.11821914847633455, "grad_norm": 1.001984715461731, "learning_rate": 3.9189859472289956e-05, "loss": 0.8641, "step": 547 }, { "epoch": 0.11843527123406095, "grad_norm": 0.9508165121078491, "learning_rate": 3.918591052454626e-05, "loss": 0.9421, "step": 548 }, { "epoch": 0.11865139399178734, "grad_norm": 0.9724790453910828, "learning_rate": 3.918195217575075e-05, "loss": 1.0963, "step": 549 }, { "epoch": 0.11886751674951372, "grad_norm": 0.9443535208702087, "learning_rate": 3.917798442784303e-05, "loss": 0.9419, "step": 550 }, { "epoch": 0.11908363950724012, "grad_norm": 0.8758763074874878, "learning_rate": 3.917400728276727e-05, "loss": 0.8275, "step": 551 }, { "epoch": 0.1192997622649665, "grad_norm": 0.8813546895980835, "learning_rate": 3.917002074247227e-05, "loss": 0.8677, "step": 552 }, { "epoch": 0.11951588502269289, "grad_norm": 0.8212718367576599, "learning_rate": 3.916602480891141e-05, "loss": 0.8355, "step": 553 }, { "epoch": 0.11973200778041927, "grad_norm": 0.9864819645881653, "learning_rate": 3.916201948404271e-05, "loss": 1.0547, "step": 554 }, { "epoch": 0.11994813053814567, "grad_norm": 0.8727284073829651, "learning_rate": 3.915800476982875e-05, "loss": 0.8634, "step": 555 }, { "epoch": 0.12016425329587206, "grad_norm": 0.9952285885810852, "learning_rate": 3.9153980668236744e-05, "loss": 0.9807, "step": 556 }, { "epoch": 0.12038037605359844, "grad_norm": 0.824222207069397, "learning_rate": 3.9149947181238486e-05, "loss": 0.8683, "step": 557 }, { "epoch": 0.12059649881132484, "grad_norm": 0.9063690304756165, "learning_rate": 3.914590431081038e-05, "loss": 0.9414, "step": 558 }, { "epoch": 0.12081262156905122, "grad_norm": 0.9026046395301819, "learning_rate": 3.914185205893342e-05, "loss": 0.9657, "step": 559 }, { "epoch": 0.12102874432677761, "grad_norm": 0.8094519376754761, "learning_rate": 3.9137790427593205e-05, "loss": 0.9151, "step": 560 }, { "epoch": 0.121244867084504, "grad_norm": 0.8167213201522827, "learning_rate": 3.913371941877992e-05, "loss": 0.9311, "step": 561 }, { "epoch": 0.12146098984223039, "grad_norm": 0.9244396090507507, "learning_rate": 3.912963903448834e-05, "loss": 1.005, "step": 562 }, { "epoch": 0.12167711259995677, "grad_norm": 0.8598445653915405, "learning_rate": 3.912554927671786e-05, "loss": 0.9037, "step": 563 }, { "epoch": 0.12189323535768316, "grad_norm": 1.0223029851913452, "learning_rate": 3.912145014747245e-05, "loss": 1.044, "step": 564 }, { "epoch": 0.12210935811540956, "grad_norm": 1.0009716749191284, "learning_rate": 3.911734164876067e-05, "loss": 1.0534, "step": 565 }, { "epoch": 0.12232548087313594, "grad_norm": 1.0638169050216675, "learning_rate": 3.9113223782595674e-05, "loss": 1.0947, "step": 566 }, { "epoch": 0.12254160363086233, "grad_norm": 0.9844657778739929, "learning_rate": 3.910909655099521e-05, "loss": 1.0553, "step": 567 }, { "epoch": 0.12275772638858871, "grad_norm": 0.9277519583702087, "learning_rate": 3.9104959955981605e-05, "loss": 0.9108, "step": 568 }, { "epoch": 0.12297384914631511, "grad_norm": 0.8299845457077026, "learning_rate": 3.910081399958179e-05, "loss": 0.7876, "step": 569 }, { "epoch": 0.12318997190404149, "grad_norm": 0.8751687407493591, "learning_rate": 3.909665868382726e-05, "loss": 1.0615, "step": 570 }, { "epoch": 0.12340609466176788, "grad_norm": 0.8630756139755249, "learning_rate": 3.909249401075413e-05, "loss": 0.9572, "step": 571 }, { "epoch": 0.12362221741949428, "grad_norm": 0.839674711227417, "learning_rate": 3.908831998240307e-05, "loss": 1.0258, "step": 572 }, { "epoch": 0.12383834017722066, "grad_norm": 0.9267817139625549, "learning_rate": 3.908413660081934e-05, "loss": 0.7814, "step": 573 }, { "epoch": 0.12405446293494705, "grad_norm": 0.8045945167541504, "learning_rate": 3.907994386805279e-05, "loss": 0.8182, "step": 574 }, { "epoch": 0.12427058569267344, "grad_norm": 0.9595804214477539, "learning_rate": 3.9075741786157856e-05, "loss": 1.0572, "step": 575 }, { "epoch": 0.12448670845039983, "grad_norm": 0.9574965834617615, "learning_rate": 3.907153035719355e-05, "loss": 0.8036, "step": 576 }, { "epoch": 0.12470283120812621, "grad_norm": 0.9275492429733276, "learning_rate": 3.906730958322345e-05, "loss": 0.9117, "step": 577 }, { "epoch": 0.1249189539658526, "grad_norm": 0.9035372138023376, "learning_rate": 3.9063079466315725e-05, "loss": 1.1805, "step": 578 }, { "epoch": 0.125135076723579, "grad_norm": 0.951949954032898, "learning_rate": 3.9058840008543136e-05, "loss": 0.8268, "step": 579 }, { "epoch": 0.12535119948130538, "grad_norm": 0.8895303606987, "learning_rate": 3.9054591211983e-05, "loss": 0.859, "step": 580 }, { "epoch": 0.12556732223903178, "grad_norm": 0.8728366494178772, "learning_rate": 3.9050333078717216e-05, "loss": 0.7964, "step": 581 }, { "epoch": 0.12578344499675817, "grad_norm": 1.0275249481201172, "learning_rate": 3.9046065610832256e-05, "loss": 0.8628, "step": 582 }, { "epoch": 0.12599956775448454, "grad_norm": 0.8042998909950256, "learning_rate": 3.9041788810419186e-05, "loss": 0.8814, "step": 583 }, { "epoch": 0.12621569051221093, "grad_norm": 0.8455626964569092, "learning_rate": 3.903750267957361e-05, "loss": 0.8937, "step": 584 }, { "epoch": 0.12643181326993733, "grad_norm": 0.995812177658081, "learning_rate": 3.9033207220395733e-05, "loss": 0.7724, "step": 585 }, { "epoch": 0.12664793602766372, "grad_norm": 0.9180460572242737, "learning_rate": 3.9028902434990315e-05, "loss": 0.9788, "step": 586 }, { "epoch": 0.1268640587853901, "grad_norm": 1.0631048679351807, "learning_rate": 3.902458832546669e-05, "loss": 1.0315, "step": 587 }, { "epoch": 0.12708018154311648, "grad_norm": 0.7558464407920837, "learning_rate": 3.9020264893938763e-05, "loss": 0.9477, "step": 588 }, { "epoch": 0.12729630430084288, "grad_norm": 1.027151346206665, "learning_rate": 3.901593214252502e-05, "loss": 0.9978, "step": 589 }, { "epoch": 0.12751242705856927, "grad_norm": 1.023328185081482, "learning_rate": 3.901159007334847e-05, "loss": 1.08, "step": 590 }, { "epoch": 0.12772854981629567, "grad_norm": 1.150592565536499, "learning_rate": 3.900723868853674e-05, "loss": 0.9376, "step": 591 }, { "epoch": 0.12794467257402203, "grad_norm": 0.8630537390708923, "learning_rate": 3.9002877990221986e-05, "loss": 1.0298, "step": 592 }, { "epoch": 0.12816079533174843, "grad_norm": 0.7960755825042725, "learning_rate": 3.899850798054095e-05, "loss": 0.8426, "step": 593 }, { "epoch": 0.12837691808947482, "grad_norm": 0.7773906588554382, "learning_rate": 3.8994128661634914e-05, "loss": 0.8398, "step": 594 }, { "epoch": 0.12859304084720122, "grad_norm": 0.844587504863739, "learning_rate": 3.898974003564975e-05, "loss": 0.8978, "step": 595 }, { "epoch": 0.1288091636049276, "grad_norm": 0.8783379197120667, "learning_rate": 3.8985342104735864e-05, "loss": 0.9715, "step": 596 }, { "epoch": 0.12902528636265398, "grad_norm": 0.8174152374267578, "learning_rate": 3.898093487104824e-05, "loss": 0.9263, "step": 597 }, { "epoch": 0.12924140912038037, "grad_norm": 0.9347906708717346, "learning_rate": 3.8976518336746396e-05, "loss": 0.9257, "step": 598 }, { "epoch": 0.12945753187810677, "grad_norm": 0.9588862061500549, "learning_rate": 3.897209250399444e-05, "loss": 1.107, "step": 599 }, { "epoch": 0.12967365463583316, "grad_norm": 0.8485615253448486, "learning_rate": 3.896765737496101e-05, "loss": 0.8536, "step": 600 }, { "epoch": 0.12988977739355953, "grad_norm": 0.8176436424255371, "learning_rate": 3.896321295181932e-05, "loss": 0.7917, "step": 601 }, { "epoch": 0.13010590015128592, "grad_norm": 0.8480759859085083, "learning_rate": 3.8958759236747116e-05, "loss": 0.8708, "step": 602 }, { "epoch": 0.13032202290901232, "grad_norm": 1.072355031967163, "learning_rate": 3.895429623192672e-05, "loss": 1.0741, "step": 603 }, { "epoch": 0.13053814566673871, "grad_norm": 0.7905756831169128, "learning_rate": 3.894982393954498e-05, "loss": 0.8311, "step": 604 }, { "epoch": 0.1307542684244651, "grad_norm": 1.0046707391738892, "learning_rate": 3.894534236179331e-05, "loss": 0.8966, "step": 605 }, { "epoch": 0.13097039118219148, "grad_norm": 0.8906007409095764, "learning_rate": 3.894085150086769e-05, "loss": 0.9787, "step": 606 }, { "epoch": 0.13118651393991787, "grad_norm": 1.1165415048599243, "learning_rate": 3.893635135896861e-05, "loss": 0.9902, "step": 607 }, { "epoch": 0.13140263669764427, "grad_norm": 0.9424011707305908, "learning_rate": 3.893184193830114e-05, "loss": 0.8775, "step": 608 }, { "epoch": 0.13161875945537066, "grad_norm": 0.9391512274742126, "learning_rate": 3.8927323241074886e-05, "loss": 1.0867, "step": 609 }, { "epoch": 0.13183488221309703, "grad_norm": 1.1020722389221191, "learning_rate": 3.8922795269503996e-05, "loss": 1.1466, "step": 610 }, { "epoch": 0.13205100497082342, "grad_norm": 0.955644428730011, "learning_rate": 3.8918258025807164e-05, "loss": 1.0072, "step": 611 }, { "epoch": 0.13226712772854982, "grad_norm": 1.0140981674194336, "learning_rate": 3.891371151220764e-05, "loss": 1.0182, "step": 612 }, { "epoch": 0.1324832504862762, "grad_norm": 0.8920745849609375, "learning_rate": 3.89091557309332e-05, "loss": 0.6105, "step": 613 }, { "epoch": 0.1326993732440026, "grad_norm": 0.891534686088562, "learning_rate": 3.890459068421615e-05, "loss": 0.8945, "step": 614 }, { "epoch": 0.13291549600172897, "grad_norm": 0.9055238962173462, "learning_rate": 3.890001637429337e-05, "loss": 0.9382, "step": 615 }, { "epoch": 0.13313161875945537, "grad_norm": 0.9690259695053101, "learning_rate": 3.8895432803406266e-05, "loss": 0.9672, "step": 616 }, { "epoch": 0.13334774151718176, "grad_norm": 0.8873419761657715, "learning_rate": 3.889083997380076e-05, "loss": 0.8831, "step": 617 }, { "epoch": 0.13356386427490816, "grad_norm": 0.9011080861091614, "learning_rate": 3.888623788772734e-05, "loss": 0.9901, "step": 618 }, { "epoch": 0.13377998703263452, "grad_norm": 0.7810534238815308, "learning_rate": 3.888162654744101e-05, "loss": 0.7921, "step": 619 }, { "epoch": 0.13399610979036092, "grad_norm": 0.9624156355857849, "learning_rate": 3.887700595520132e-05, "loss": 0.9847, "step": 620 }, { "epoch": 0.1342122325480873, "grad_norm": 1.021198034286499, "learning_rate": 3.887237611327235e-05, "loss": 1.0239, "step": 621 }, { "epoch": 0.1344283553058137, "grad_norm": 0.8436684012413025, "learning_rate": 3.886773702392271e-05, "loss": 0.7515, "step": 622 }, { "epoch": 0.1346444780635401, "grad_norm": 1.0054851770401, "learning_rate": 3.886308868942555e-05, "loss": 0.7851, "step": 623 }, { "epoch": 0.13486060082126647, "grad_norm": 0.8490008115768433, "learning_rate": 3.8858431112058534e-05, "loss": 0.7387, "step": 624 }, { "epoch": 0.13507672357899286, "grad_norm": 0.9846950173377991, "learning_rate": 3.885376429410387e-05, "loss": 0.9461, "step": 625 }, { "epoch": 0.13529284633671926, "grad_norm": 0.9749772548675537, "learning_rate": 3.884908823784828e-05, "loss": 0.9217, "step": 626 }, { "epoch": 0.13550896909444565, "grad_norm": 0.8481361269950867, "learning_rate": 3.884440294558303e-05, "loss": 0.8687, "step": 627 }, { "epoch": 0.13572509185217205, "grad_norm": 0.9036352634429932, "learning_rate": 3.88397084196039e-05, "loss": 0.7601, "step": 628 }, { "epoch": 0.13594121460989841, "grad_norm": 0.9833604693412781, "learning_rate": 3.883500466221119e-05, "loss": 0.8995, "step": 629 }, { "epoch": 0.1361573373676248, "grad_norm": 0.8426030874252319, "learning_rate": 3.883029167570974e-05, "loss": 1.1427, "step": 630 }, { "epoch": 0.1363734601253512, "grad_norm": 0.961252748966217, "learning_rate": 3.88255694624089e-05, "loss": 0.9613, "step": 631 }, { "epoch": 0.1365895828830776, "grad_norm": 0.8939110040664673, "learning_rate": 3.882083802462254e-05, "loss": 1.1321, "step": 632 }, { "epoch": 0.13680570564080397, "grad_norm": 1.0109326839447021, "learning_rate": 3.881609736466906e-05, "loss": 0.8538, "step": 633 }, { "epoch": 0.13702182839853036, "grad_norm": 0.9047037363052368, "learning_rate": 3.8811347484871353e-05, "loss": 1.0148, "step": 634 }, { "epoch": 0.13723795115625675, "grad_norm": 0.9888780117034912, "learning_rate": 3.880658838755688e-05, "loss": 1.035, "step": 635 }, { "epoch": 0.13745407391398315, "grad_norm": 0.9979327321052551, "learning_rate": 3.880182007505756e-05, "loss": 1.0421, "step": 636 }, { "epoch": 0.13767019667170954, "grad_norm": 0.8484611511230469, "learning_rate": 3.879704254970987e-05, "loss": 0.7986, "step": 637 }, { "epoch": 0.1378863194294359, "grad_norm": 0.8858148455619812, "learning_rate": 3.8792255813854783e-05, "loss": 0.9573, "step": 638 }, { "epoch": 0.1381024421871623, "grad_norm": 0.9977371692657471, "learning_rate": 3.87874598698378e-05, "loss": 1.0117, "step": 639 }, { "epoch": 0.1383185649448887, "grad_norm": 0.8719163537025452, "learning_rate": 3.87826547200089e-05, "loss": 0.9715, "step": 640 }, { "epoch": 0.1385346877026151, "grad_norm": 0.9166508316993713, "learning_rate": 3.8777840366722606e-05, "loss": 1.0143, "step": 641 }, { "epoch": 0.13875081046034146, "grad_norm": 0.9907276034355164, "learning_rate": 3.877301681233794e-05, "loss": 1.0263, "step": 642 }, { "epoch": 0.13896693321806786, "grad_norm": 0.8705201148986816, "learning_rate": 3.876818405921844e-05, "loss": 0.7806, "step": 643 }, { "epoch": 0.13918305597579425, "grad_norm": 0.8763052225112915, "learning_rate": 3.876334210973213e-05, "loss": 1.0054, "step": 644 }, { "epoch": 0.13939917873352065, "grad_norm": 0.8491306900978088, "learning_rate": 3.8758490966251565e-05, "loss": 0.9252, "step": 645 }, { "epoch": 0.13961530149124704, "grad_norm": 0.9202139377593994, "learning_rate": 3.875363063115379e-05, "loss": 0.9219, "step": 646 }, { "epoch": 0.1398314242489734, "grad_norm": 0.9065908789634705, "learning_rate": 3.874876110682035e-05, "loss": 0.9117, "step": 647 }, { "epoch": 0.1400475470066998, "grad_norm": 0.8544798493385315, "learning_rate": 3.874388239563732e-05, "loss": 0.9109, "step": 648 }, { "epoch": 0.1402636697644262, "grad_norm": 0.8583292961120605, "learning_rate": 3.873899449999524e-05, "loss": 0.9353, "step": 649 }, { "epoch": 0.1404797925221526, "grad_norm": 1.047090768814087, "learning_rate": 3.8734097422289175e-05, "loss": 0.9254, "step": 650 }, { "epoch": 0.14069591527987896, "grad_norm": 0.8735350966453552, "learning_rate": 3.8729191164918674e-05, "loss": 0.9183, "step": 651 }, { "epoch": 0.14091203803760535, "grad_norm": 0.8952236175537109, "learning_rate": 3.87242757302878e-05, "loss": 0.8292, "step": 652 }, { "epoch": 0.14112816079533175, "grad_norm": 0.8877466320991516, "learning_rate": 3.871935112080511e-05, "loss": 0.8552, "step": 653 }, { "epoch": 0.14134428355305814, "grad_norm": 0.9474995136260986, "learning_rate": 3.8714417338883635e-05, "loss": 1.0136, "step": 654 }, { "epoch": 0.14156040631078454, "grad_norm": 0.8943508863449097, "learning_rate": 3.870947438694093e-05, "loss": 0.9082, "step": 655 }, { "epoch": 0.1417765290685109, "grad_norm": 0.9678506255149841, "learning_rate": 3.870452226739903e-05, "loss": 0.8696, "step": 656 }, { "epoch": 0.1419926518262373, "grad_norm": 1.0479648113250732, "learning_rate": 3.869956098268444e-05, "loss": 0.9327, "step": 657 }, { "epoch": 0.1422087745839637, "grad_norm": 0.8884786367416382, "learning_rate": 3.869459053522821e-05, "loss": 1.0548, "step": 658 }, { "epoch": 0.1424248973416901, "grad_norm": 0.9204626679420471, "learning_rate": 3.868961092746584e-05, "loss": 0.9182, "step": 659 }, { "epoch": 0.14264102009941648, "grad_norm": 0.7968615889549255, "learning_rate": 3.8684622161837306e-05, "loss": 0.924, "step": 660 }, { "epoch": 0.14285714285714285, "grad_norm": 1.0329526662826538, "learning_rate": 3.8679624240787113e-05, "loss": 0.9867, "step": 661 }, { "epoch": 0.14307326561486924, "grad_norm": 0.9560927748680115, "learning_rate": 3.8674617166764216e-05, "loss": 1.07, "step": 662 }, { "epoch": 0.14328938837259564, "grad_norm": 0.8470692038536072, "learning_rate": 3.866960094222208e-05, "loss": 0.9925, "step": 663 }, { "epoch": 0.14350551113032203, "grad_norm": 0.8122526407241821, "learning_rate": 3.866457556961864e-05, "loss": 0.7895, "step": 664 }, { "epoch": 0.1437216338880484, "grad_norm": 0.9864859580993652, "learning_rate": 3.865954105141632e-05, "loss": 0.9597, "step": 665 }, { "epoch": 0.1439377566457748, "grad_norm": 0.8763389587402344, "learning_rate": 3.865449739008202e-05, "loss": 0.8651, "step": 666 }, { "epoch": 0.1441538794035012, "grad_norm": 0.8942314386367798, "learning_rate": 3.864944458808712e-05, "loss": 0.8472, "step": 667 }, { "epoch": 0.14437000216122758, "grad_norm": 0.8911261558532715, "learning_rate": 3.864438264790748e-05, "loss": 0.9874, "step": 668 }, { "epoch": 0.14458612491895398, "grad_norm": 0.9165135622024536, "learning_rate": 3.8639311572023445e-05, "loss": 0.8398, "step": 669 }, { "epoch": 0.14480224767668035, "grad_norm": 0.8999388813972473, "learning_rate": 3.8634231362919826e-05, "loss": 0.8984, "step": 670 }, { "epoch": 0.14501837043440674, "grad_norm": 0.9407719969749451, "learning_rate": 3.862914202308592e-05, "loss": 1.0439, "step": 671 }, { "epoch": 0.14523449319213314, "grad_norm": 1.0014902353286743, "learning_rate": 3.8624043555015485e-05, "loss": 0.8871, "step": 672 }, { "epoch": 0.14545061594985953, "grad_norm": 0.9063896536827087, "learning_rate": 3.861893596120676e-05, "loss": 1.0437, "step": 673 }, { "epoch": 0.1456667387075859, "grad_norm": 0.9328497052192688, "learning_rate": 3.861381924416245e-05, "loss": 0.8635, "step": 674 }, { "epoch": 0.1458828614653123, "grad_norm": 0.9634287357330322, "learning_rate": 3.860869340638974e-05, "loss": 0.88, "step": 675 }, { "epoch": 0.14609898422303869, "grad_norm": 0.8020275235176086, "learning_rate": 3.8603558450400286e-05, "loss": 0.8758, "step": 676 }, { "epoch": 0.14631510698076508, "grad_norm": 0.8185033798217773, "learning_rate": 3.859841437871019e-05, "loss": 0.8325, "step": 677 }, { "epoch": 0.14653122973849148, "grad_norm": 0.9865819215774536, "learning_rate": 3.859326119384004e-05, "loss": 0.9134, "step": 678 }, { "epoch": 0.14674735249621784, "grad_norm": 0.7986255884170532, "learning_rate": 3.8588098898314895e-05, "loss": 0.9649, "step": 679 }, { "epoch": 0.14696347525394424, "grad_norm": 1.0155187845230103, "learning_rate": 3.858292749466426e-05, "loss": 1.039, "step": 680 }, { "epoch": 0.14717959801167063, "grad_norm": 0.8765354156494141, "learning_rate": 3.85777469854221e-05, "loss": 1.0504, "step": 681 }, { "epoch": 0.14739572076939703, "grad_norm": 0.9732946753501892, "learning_rate": 3.857255737312687e-05, "loss": 1.0145, "step": 682 }, { "epoch": 0.1476118435271234, "grad_norm": 1.0179862976074219, "learning_rate": 3.856735866032145e-05, "loss": 0.8695, "step": 683 }, { "epoch": 0.1478279662848498, "grad_norm": 0.9551127552986145, "learning_rate": 3.856215084955322e-05, "loss": 1.0414, "step": 684 }, { "epoch": 0.14804408904257618, "grad_norm": 0.8766571283340454, "learning_rate": 3.855693394337398e-05, "loss": 0.978, "step": 685 }, { "epoch": 0.14826021180030258, "grad_norm": 0.9660585522651672, "learning_rate": 3.855170794434e-05, "loss": 0.9748, "step": 686 }, { "epoch": 0.14847633455802897, "grad_norm": 0.8801333904266357, "learning_rate": 3.854647285501202e-05, "loss": 0.9898, "step": 687 }, { "epoch": 0.14869245731575534, "grad_norm": 0.8716098666191101, "learning_rate": 3.8541228677955216e-05, "loss": 0.8926, "step": 688 }, { "epoch": 0.14890858007348173, "grad_norm": 0.9434691667556763, "learning_rate": 3.853597541573921e-05, "loss": 1.1135, "step": 689 }, { "epoch": 0.14912470283120813, "grad_norm": 1.0055344104766846, "learning_rate": 3.853071307093811e-05, "loss": 1.0053, "step": 690 }, { "epoch": 0.14934082558893452, "grad_norm": 1.0277626514434814, "learning_rate": 3.8525441646130435e-05, "loss": 0.9823, "step": 691 }, { "epoch": 0.1495569483466609, "grad_norm": 0.8405638933181763, "learning_rate": 3.852016114389918e-05, "loss": 0.9192, "step": 692 }, { "epoch": 0.14977307110438728, "grad_norm": 0.7715931534767151, "learning_rate": 3.851487156683178e-05, "loss": 0.9255, "step": 693 }, { "epoch": 0.14998919386211368, "grad_norm": 1.054321050643921, "learning_rate": 3.8509572917520113e-05, "loss": 1.1794, "step": 694 }, { "epoch": 0.15020531661984007, "grad_norm": 0.8760213255882263, "learning_rate": 3.8504265198560495e-05, "loss": 0.9638, "step": 695 }, { "epoch": 0.15042143937756647, "grad_norm": 0.8417812585830688, "learning_rate": 3.8498948412553715e-05, "loss": 0.8056, "step": 696 }, { "epoch": 0.15063756213529284, "grad_norm": 0.8356149196624756, "learning_rate": 3.8493622562104976e-05, "loss": 0.9349, "step": 697 }, { "epoch": 0.15085368489301923, "grad_norm": 0.8915977478027344, "learning_rate": 3.8488287649823924e-05, "loss": 0.9902, "step": 698 }, { "epoch": 0.15106980765074562, "grad_norm": 0.810424268245697, "learning_rate": 3.8482943678324674e-05, "loss": 0.8976, "step": 699 }, { "epoch": 0.15128593040847202, "grad_norm": 0.9278942346572876, "learning_rate": 3.8477590650225735e-05, "loss": 0.956, "step": 700 }, { "epoch": 0.1515020531661984, "grad_norm": 0.8655145168304443, "learning_rate": 3.8472228568150105e-05, "loss": 0.7879, "step": 701 }, { "epoch": 0.15171817592392478, "grad_norm": 1.128374695777893, "learning_rate": 3.8466857434725164e-05, "loss": 1.0564, "step": 702 }, { "epoch": 0.15193429868165118, "grad_norm": 0.874324381351471, "learning_rate": 3.846147725258278e-05, "loss": 0.9016, "step": 703 }, { "epoch": 0.15215042143937757, "grad_norm": 0.9844251275062561, "learning_rate": 3.845608802435922e-05, "loss": 0.7689, "step": 704 }, { "epoch": 0.15236654419710396, "grad_norm": 0.9193190336227417, "learning_rate": 3.8450689752695186e-05, "loss": 0.9483, "step": 705 }, { "epoch": 0.15258266695483033, "grad_norm": 0.9375171661376953, "learning_rate": 3.844528244023583e-05, "loss": 0.8956, "step": 706 }, { "epoch": 0.15279878971255673, "grad_norm": 0.8980957269668579, "learning_rate": 3.8439866089630714e-05, "loss": 1.0145, "step": 707 }, { "epoch": 0.15301491247028312, "grad_norm": 0.9306039214134216, "learning_rate": 3.843444070353384e-05, "loss": 0.8307, "step": 708 }, { "epoch": 0.15323103522800952, "grad_norm": 0.8327880501747131, "learning_rate": 3.842900628460364e-05, "loss": 0.853, "step": 709 }, { "epoch": 0.1534471579857359, "grad_norm": 0.9295020699501038, "learning_rate": 3.842356283550296e-05, "loss": 1.0683, "step": 710 }, { "epoch": 0.15366328074346228, "grad_norm": 0.9068468809127808, "learning_rate": 3.841811035889908e-05, "loss": 1.1291, "step": 711 }, { "epoch": 0.15387940350118867, "grad_norm": 0.8090900778770447, "learning_rate": 3.8412648857463694e-05, "loss": 0.9488, "step": 712 }, { "epoch": 0.15409552625891507, "grad_norm": 0.9061992168426514, "learning_rate": 3.840717833387294e-05, "loss": 0.9388, "step": 713 }, { "epoch": 0.15431164901664146, "grad_norm": 0.8311301469802856, "learning_rate": 3.840169879080735e-05, "loss": 1.0176, "step": 714 }, { "epoch": 0.15452777177436783, "grad_norm": 0.8473559021949768, "learning_rate": 3.839621023095189e-05, "loss": 0.9885, "step": 715 }, { "epoch": 0.15474389453209422, "grad_norm": 0.9512867331504822, "learning_rate": 3.8390712656995946e-05, "loss": 0.8733, "step": 716 }, { "epoch": 0.15496001728982062, "grad_norm": 1.2607604265213013, "learning_rate": 3.8385206071633315e-05, "loss": 0.8414, "step": 717 }, { "epoch": 0.155176140047547, "grad_norm": 0.8926445841789246, "learning_rate": 3.837969047756221e-05, "loss": 0.8794, "step": 718 }, { "epoch": 0.1553922628052734, "grad_norm": 0.841687798500061, "learning_rate": 3.837416587748525e-05, "loss": 0.8882, "step": 719 }, { "epoch": 0.15560838556299977, "grad_norm": 0.9890703558921814, "learning_rate": 3.836863227410949e-05, "loss": 1.0339, "step": 720 }, { "epoch": 0.15582450832072617, "grad_norm": 1.026080846786499, "learning_rate": 3.836308967014638e-05, "loss": 1.102, "step": 721 }, { "epoch": 0.15604063107845256, "grad_norm": 0.9443207383155823, "learning_rate": 3.835753806831178e-05, "loss": 0.9666, "step": 722 }, { "epoch": 0.15625675383617896, "grad_norm": 0.8445281982421875, "learning_rate": 3.835197747132596e-05, "loss": 0.9106, "step": 723 }, { "epoch": 0.15647287659390532, "grad_norm": 0.983044445514679, "learning_rate": 3.834640788191361e-05, "loss": 0.8976, "step": 724 }, { "epoch": 0.15668899935163172, "grad_norm": 0.9355096817016602, "learning_rate": 3.83408293028038e-05, "loss": 1.0032, "step": 725 }, { "epoch": 0.15690512210935811, "grad_norm": 0.8715406060218811, "learning_rate": 3.833524173673004e-05, "loss": 0.8908, "step": 726 }, { "epoch": 0.1571212448670845, "grad_norm": 0.9600350260734558, "learning_rate": 3.83296451864302e-05, "loss": 0.9133, "step": 727 }, { "epoch": 0.1573373676248109, "grad_norm": 0.8724390268325806, "learning_rate": 3.83240396546466e-05, "loss": 1.0321, "step": 728 }, { "epoch": 0.15755349038253727, "grad_norm": 0.9737085700035095, "learning_rate": 3.8318425144125926e-05, "loss": 0.9593, "step": 729 }, { "epoch": 0.15776961314026366, "grad_norm": 0.8641157150268555, "learning_rate": 3.8312801657619277e-05, "loss": 0.8612, "step": 730 }, { "epoch": 0.15798573589799006, "grad_norm": 0.9206441640853882, "learning_rate": 3.830716919788215e-05, "loss": 0.9566, "step": 731 }, { "epoch": 0.15820185865571645, "grad_norm": 1.0136420726776123, "learning_rate": 3.830152776767444e-05, "loss": 0.9764, "step": 732 }, { "epoch": 0.15841798141344285, "grad_norm": 0.916991651058197, "learning_rate": 3.8295877369760426e-05, "loss": 1.0438, "step": 733 }, { "epoch": 0.15863410417116922, "grad_norm": 0.8354426622390747, "learning_rate": 3.829021800690879e-05, "loss": 0.8525, "step": 734 }, { "epoch": 0.1588502269288956, "grad_norm": 0.9984889030456543, "learning_rate": 3.8284549681892615e-05, "loss": 1.1238, "step": 735 }, { "epoch": 0.159066349686622, "grad_norm": 0.9616902470588684, "learning_rate": 3.827887239748937e-05, "loss": 0.8615, "step": 736 }, { "epoch": 0.1592824724443484, "grad_norm": 0.9104542136192322, "learning_rate": 3.82731861564809e-05, "loss": 1.0627, "step": 737 }, { "epoch": 0.15949859520207477, "grad_norm": 0.9917435646057129, "learning_rate": 3.826749096165346e-05, "loss": 0.9407, "step": 738 }, { "epoch": 0.15971471795980116, "grad_norm": 1.0158805847167969, "learning_rate": 3.826178681579767e-05, "loss": 0.8319, "step": 739 }, { "epoch": 0.15993084071752756, "grad_norm": 0.8807531595230103, "learning_rate": 3.825607372170855e-05, "loss": 1.025, "step": 740 }, { "epoch": 0.16014696347525395, "grad_norm": 0.8851889967918396, "learning_rate": 3.825035168218552e-05, "loss": 1.0129, "step": 741 }, { "epoch": 0.16036308623298035, "grad_norm": 0.9945665001869202, "learning_rate": 3.824462070003235e-05, "loss": 1.0437, "step": 742 }, { "epoch": 0.1605792089907067, "grad_norm": 0.949619710445404, "learning_rate": 3.8238880778057214e-05, "loss": 0.9602, "step": 743 }, { "epoch": 0.1607953317484331, "grad_norm": 0.9423522353172302, "learning_rate": 3.8233131919072655e-05, "loss": 0.8515, "step": 744 }, { "epoch": 0.1610114545061595, "grad_norm": 0.9392579197883606, "learning_rate": 3.822737412589561e-05, "loss": 1.1029, "step": 745 }, { "epoch": 0.1612275772638859, "grad_norm": 1.0344432592391968, "learning_rate": 3.822160740134738e-05, "loss": 0.9482, "step": 746 }, { "epoch": 0.16144370002161226, "grad_norm": 0.9603621959686279, "learning_rate": 3.821583174825364e-05, "loss": 1.0275, "step": 747 }, { "epoch": 0.16165982277933866, "grad_norm": 1.0137100219726562, "learning_rate": 3.821004716944445e-05, "loss": 0.9796, "step": 748 }, { "epoch": 0.16187594553706505, "grad_norm": 0.9770598411560059, "learning_rate": 3.820425366775425e-05, "loss": 0.9573, "step": 749 }, { "epoch": 0.16209206829479145, "grad_norm": 0.8415801525115967, "learning_rate": 3.8198451246021834e-05, "loss": 0.7984, "step": 750 }, { "epoch": 0.16230819105251784, "grad_norm": 0.912669837474823, "learning_rate": 3.819263990709037e-05, "loss": 0.9408, "step": 751 }, { "epoch": 0.1625243138102442, "grad_norm": 1.046235203742981, "learning_rate": 3.818681965380741e-05, "loss": 1.0403, "step": 752 }, { "epoch": 0.1627404365679706, "grad_norm": 0.7493875622749329, "learning_rate": 3.818099048902486e-05, "loss": 0.9189, "step": 753 }, { "epoch": 0.162956559325697, "grad_norm": 0.9934805035591125, "learning_rate": 3.817515241559901e-05, "loss": 0.8216, "step": 754 }, { "epoch": 0.1631726820834234, "grad_norm": 0.9069898724555969, "learning_rate": 3.8169305436390474e-05, "loss": 0.9603, "step": 755 }, { "epoch": 0.16338880484114976, "grad_norm": 0.9589218497276306, "learning_rate": 3.816344955426429e-05, "loss": 0.9931, "step": 756 }, { "epoch": 0.16360492759887615, "grad_norm": 0.8494526743888855, "learning_rate": 3.81575847720898e-05, "loss": 1.0661, "step": 757 }, { "epoch": 0.16382105035660255, "grad_norm": 0.9042608141899109, "learning_rate": 3.8151711092740756e-05, "loss": 0.8215, "step": 758 }, { "epoch": 0.16403717311432894, "grad_norm": 0.8558818101882935, "learning_rate": 3.814582851909523e-05, "loss": 0.9715, "step": 759 }, { "epoch": 0.16425329587205534, "grad_norm": 0.9446301460266113, "learning_rate": 3.8139937054035686e-05, "loss": 0.8714, "step": 760 }, { "epoch": 0.1644694186297817, "grad_norm": 0.9493723511695862, "learning_rate": 3.813403670044891e-05, "loss": 1.0183, "step": 761 }, { "epoch": 0.1646855413875081, "grad_norm": 0.8754888772964478, "learning_rate": 3.8128127461226074e-05, "loss": 1.0799, "step": 762 }, { "epoch": 0.1649016641452345, "grad_norm": 0.9679006338119507, "learning_rate": 3.8122209339262686e-05, "loss": 1.0678, "step": 763 }, { "epoch": 0.1651177869029609, "grad_norm": 1.0180104970932007, "learning_rate": 3.811628233745862e-05, "loss": 0.9036, "step": 764 }, { "epoch": 0.16533390966068728, "grad_norm": 0.7865733504295349, "learning_rate": 3.8110346458718085e-05, "loss": 0.8595, "step": 765 }, { "epoch": 0.16555003241841365, "grad_norm": 0.877221405506134, "learning_rate": 3.810440170594964e-05, "loss": 0.9345, "step": 766 }, { "epoch": 0.16576615517614005, "grad_norm": 1.0369511842727661, "learning_rate": 3.809844808206622e-05, "loss": 1.0009, "step": 767 }, { "epoch": 0.16598227793386644, "grad_norm": 0.9274987578392029, "learning_rate": 3.809248558998508e-05, "loss": 0.9221, "step": 768 }, { "epoch": 0.16619840069159283, "grad_norm": 0.9995907545089722, "learning_rate": 3.808651423262782e-05, "loss": 0.9444, "step": 769 }, { "epoch": 0.1664145234493192, "grad_norm": 0.8567201495170593, "learning_rate": 3.80805340129204e-05, "loss": 1.0448, "step": 770 }, { "epoch": 0.1666306462070456, "grad_norm": 0.8255602717399597, "learning_rate": 3.8074544933793105e-05, "loss": 0.8142, "step": 771 }, { "epoch": 0.166846768964772, "grad_norm": 0.8213574290275574, "learning_rate": 3.806854699818058e-05, "loss": 1.0253, "step": 772 }, { "epoch": 0.16706289172249839, "grad_norm": 0.8631877303123474, "learning_rate": 3.806254020902179e-05, "loss": 1.0511, "step": 773 }, { "epoch": 0.16727901448022478, "grad_norm": 0.8674742579460144, "learning_rate": 3.805652456926005e-05, "loss": 0.8251, "step": 774 }, { "epoch": 0.16749513723795115, "grad_norm": 1.031251072883606, "learning_rate": 3.805050008184302e-05, "loss": 0.8754, "step": 775 }, { "epoch": 0.16771125999567754, "grad_norm": 0.9958481788635254, "learning_rate": 3.804446674972267e-05, "loss": 0.9438, "step": 776 }, { "epoch": 0.16792738275340394, "grad_norm": 0.7598473429679871, "learning_rate": 3.8038424575855326e-05, "loss": 0.6512, "step": 777 }, { "epoch": 0.16814350551113033, "grad_norm": 0.8823094367980957, "learning_rate": 3.8032373563201644e-05, "loss": 0.9703, "step": 778 }, { "epoch": 0.1683596282688567, "grad_norm": 0.94339519739151, "learning_rate": 3.802631371472659e-05, "loss": 0.9723, "step": 779 }, { "epoch": 0.1685757510265831, "grad_norm": 0.9543091058731079, "learning_rate": 3.80202450333995e-05, "loss": 0.8788, "step": 780 }, { "epoch": 0.1687918737843095, "grad_norm": 0.9106163382530212, "learning_rate": 3.8014167522193985e-05, "loss": 0.9754, "step": 781 }, { "epoch": 0.16900799654203588, "grad_norm": 0.9123164415359497, "learning_rate": 3.8008081184088036e-05, "loss": 0.8364, "step": 782 }, { "epoch": 0.16922411929976228, "grad_norm": 0.9945230484008789, "learning_rate": 3.800198602206394e-05, "loss": 1.0841, "step": 783 }, { "epoch": 0.16944024205748864, "grad_norm": 0.9595274329185486, "learning_rate": 3.79958820391083e-05, "loss": 0.9674, "step": 784 }, { "epoch": 0.16965636481521504, "grad_norm": 0.9454584121704102, "learning_rate": 3.798976923821207e-05, "loss": 0.8179, "step": 785 }, { "epoch": 0.16987248757294143, "grad_norm": 0.9144061207771301, "learning_rate": 3.798364762237049e-05, "loss": 0.8421, "step": 786 }, { "epoch": 0.17008861033066783, "grad_norm": 0.8717312216758728, "learning_rate": 3.797751719458315e-05, "loss": 0.884, "step": 787 }, { "epoch": 0.1703047330883942, "grad_norm": 0.8118639588356018, "learning_rate": 3.7971377957853945e-05, "loss": 0.9057, "step": 788 }, { "epoch": 0.1705208558461206, "grad_norm": 1.0112063884735107, "learning_rate": 3.7965229915191086e-05, "loss": 0.8148, "step": 789 }, { "epoch": 0.17073697860384698, "grad_norm": 0.9880518913269043, "learning_rate": 3.7959073069607094e-05, "loss": 0.9605, "step": 790 }, { "epoch": 0.17095310136157338, "grad_norm": 1.0393980741500854, "learning_rate": 3.7952907424118815e-05, "loss": 1.0229, "step": 791 }, { "epoch": 0.17116922411929977, "grad_norm": 0.8649792671203613, "learning_rate": 3.79467329817474e-05, "loss": 1.0278, "step": 792 }, { "epoch": 0.17138534687702614, "grad_norm": 0.84566730260849, "learning_rate": 3.7940549745518306e-05, "loss": 1.0526, "step": 793 }, { "epoch": 0.17160146963475253, "grad_norm": 0.9151575565338135, "learning_rate": 3.793435771846131e-05, "loss": 0.9885, "step": 794 }, { "epoch": 0.17181759239247893, "grad_norm": 1.1504076719284058, "learning_rate": 3.792815690361049e-05, "loss": 1.0227, "step": 795 }, { "epoch": 0.17203371515020532, "grad_norm": 0.854996383190155, "learning_rate": 3.792194730400424e-05, "loss": 0.9017, "step": 796 }, { "epoch": 0.17224983790793172, "grad_norm": 0.9883208274841309, "learning_rate": 3.791572892268524e-05, "loss": 0.943, "step": 797 }, { "epoch": 0.17246596066565809, "grad_norm": 0.941680908203125, "learning_rate": 3.790950176270047e-05, "loss": 1.1355, "step": 798 }, { "epoch": 0.17268208342338448, "grad_norm": 1.0921597480773926, "learning_rate": 3.790326582710125e-05, "loss": 1.0459, "step": 799 }, { "epoch": 0.17289820618111088, "grad_norm": 0.9764776229858398, "learning_rate": 3.7897021118943156e-05, "loss": 1.1244, "step": 800 }, { "epoch": 0.17311432893883727, "grad_norm": 0.8465645909309387, "learning_rate": 3.7890767641286086e-05, "loss": 0.8692, "step": 801 }, { "epoch": 0.17333045169656364, "grad_norm": 0.8207517862319946, "learning_rate": 3.788450539719423e-05, "loss": 0.7815, "step": 802 }, { "epoch": 0.17354657445429003, "grad_norm": 1.0459989309310913, "learning_rate": 3.7878234389736074e-05, "loss": 0.9891, "step": 803 }, { "epoch": 0.17376269721201643, "grad_norm": 0.9639811515808105, "learning_rate": 3.787195462198439e-05, "loss": 0.861, "step": 804 }, { "epoch": 0.17397881996974282, "grad_norm": 0.7766333222389221, "learning_rate": 3.786566609701626e-05, "loss": 0.731, "step": 805 }, { "epoch": 0.17419494272746922, "grad_norm": 0.7881793975830078, "learning_rate": 3.7859368817913037e-05, "loss": 0.9065, "step": 806 }, { "epoch": 0.17441106548519558, "grad_norm": 0.8844035267829895, "learning_rate": 3.785306278776038e-05, "loss": 0.9453, "step": 807 }, { "epoch": 0.17462718824292198, "grad_norm": 0.9553999304771423, "learning_rate": 3.784674800964823e-05, "loss": 0.7729, "step": 808 }, { "epoch": 0.17484331100064837, "grad_norm": 0.8754869699478149, "learning_rate": 3.784042448667081e-05, "loss": 1.0213, "step": 809 }, { "epoch": 0.17505943375837477, "grad_norm": 0.9966672658920288, "learning_rate": 3.783409222192663e-05, "loss": 0.8817, "step": 810 }, { "epoch": 0.17527555651610113, "grad_norm": 0.953411877155304, "learning_rate": 3.782775121851849e-05, "loss": 0.8634, "step": 811 }, { "epoch": 0.17549167927382753, "grad_norm": 1.004577398300171, "learning_rate": 3.782140147955347e-05, "loss": 0.8899, "step": 812 }, { "epoch": 0.17570780203155392, "grad_norm": 0.8689178228378296, "learning_rate": 3.7815043008142915e-05, "loss": 0.9426, "step": 813 }, { "epoch": 0.17592392478928032, "grad_norm": 0.8731287717819214, "learning_rate": 3.780867580740247e-05, "loss": 1.0252, "step": 814 }, { "epoch": 0.1761400475470067, "grad_norm": 0.9560363292694092, "learning_rate": 3.780229988045204e-05, "loss": 0.8715, "step": 815 }, { "epoch": 0.17635617030473308, "grad_norm": 0.9369506239891052, "learning_rate": 3.7795915230415834e-05, "loss": 0.8979, "step": 816 }, { "epoch": 0.17657229306245947, "grad_norm": 0.9733330011367798, "learning_rate": 3.77895218604223e-05, "loss": 0.8355, "step": 817 }, { "epoch": 0.17678841582018587, "grad_norm": 0.7921898365020752, "learning_rate": 3.7783119773604176e-05, "loss": 0.9214, "step": 818 }, { "epoch": 0.17700453857791226, "grad_norm": 0.8752906918525696, "learning_rate": 3.7776708973098476e-05, "loss": 1.0074, "step": 819 }, { "epoch": 0.17722066133563863, "grad_norm": 1.0164240598678589, "learning_rate": 3.777028946204647e-05, "loss": 1.0186, "step": 820 }, { "epoch": 0.17743678409336502, "grad_norm": 0.8949374556541443, "learning_rate": 3.77638612435937e-05, "loss": 0.9594, "step": 821 }, { "epoch": 0.17765290685109142, "grad_norm": 1.0692867040634155, "learning_rate": 3.7757424320889987e-05, "loss": 0.9626, "step": 822 }, { "epoch": 0.1778690296088178, "grad_norm": 0.8554781675338745, "learning_rate": 3.775097869708941e-05, "loss": 0.8944, "step": 823 }, { "epoch": 0.1780851523665442, "grad_norm": 0.979682207107544, "learning_rate": 3.774452437535031e-05, "loss": 0.9192, "step": 824 }, { "epoch": 0.17830127512427058, "grad_norm": 0.9625377655029297, "learning_rate": 3.773806135883528e-05, "loss": 0.8022, "step": 825 }, { "epoch": 0.17851739788199697, "grad_norm": 0.9088266491889954, "learning_rate": 3.773158965071119e-05, "loss": 0.9798, "step": 826 }, { "epoch": 0.17873352063972336, "grad_norm": 0.9747220277786255, "learning_rate": 3.772510925414916e-05, "loss": 0.879, "step": 827 }, { "epoch": 0.17894964339744976, "grad_norm": 0.9348064661026001, "learning_rate": 3.771862017232456e-05, "loss": 1.0974, "step": 828 }, { "epoch": 0.17916576615517615, "grad_norm": 0.8869150876998901, "learning_rate": 3.7712122408417055e-05, "loss": 0.8494, "step": 829 }, { "epoch": 0.17938188891290252, "grad_norm": 0.9823442101478577, "learning_rate": 3.77056159656105e-05, "loss": 1.0264, "step": 830 }, { "epoch": 0.17959801167062892, "grad_norm": 0.8608393669128418, "learning_rate": 3.769910084709305e-05, "loss": 0.8466, "step": 831 }, { "epoch": 0.1798141344283553, "grad_norm": 0.7502967119216919, "learning_rate": 3.769257705605711e-05, "loss": 0.9066, "step": 832 }, { "epoch": 0.1800302571860817, "grad_norm": 1.046044945716858, "learning_rate": 3.768604459569931e-05, "loss": 1.1937, "step": 833 }, { "epoch": 0.18024637994380807, "grad_norm": 0.8878052830696106, "learning_rate": 3.767950346922054e-05, "loss": 0.8745, "step": 834 }, { "epoch": 0.18046250270153447, "grad_norm": 0.803325891494751, "learning_rate": 3.7672953679825934e-05, "loss": 0.8249, "step": 835 }, { "epoch": 0.18067862545926086, "grad_norm": 0.9014940857887268, "learning_rate": 3.7666395230724885e-05, "loss": 0.8478, "step": 836 }, { "epoch": 0.18089474821698726, "grad_norm": 0.8608657717704773, "learning_rate": 3.765982812513101e-05, "loss": 0.9171, "step": 837 }, { "epoch": 0.18111087097471365, "grad_norm": 0.8538623452186584, "learning_rate": 3.765325236626217e-05, "loss": 0.7362, "step": 838 }, { "epoch": 0.18132699373244002, "grad_norm": 0.9900898337364197, "learning_rate": 3.764666795734049e-05, "loss": 1.0384, "step": 839 }, { "epoch": 0.1815431164901664, "grad_norm": 0.9848501682281494, "learning_rate": 3.7640074901592306e-05, "loss": 0.8081, "step": 840 }, { "epoch": 0.1817592392478928, "grad_norm": 0.8468287587165833, "learning_rate": 3.763347320224819e-05, "loss": 0.8982, "step": 841 }, { "epoch": 0.1819753620056192, "grad_norm": 1.0634558200836182, "learning_rate": 3.762686286254297e-05, "loss": 1.0065, "step": 842 }, { "epoch": 0.18219148476334557, "grad_norm": 0.8285744190216064, "learning_rate": 3.7620243885715695e-05, "loss": 0.8926, "step": 843 }, { "epoch": 0.18240760752107196, "grad_norm": 0.7932522892951965, "learning_rate": 3.761361627500964e-05, "loss": 0.9055, "step": 844 }, { "epoch": 0.18262373027879836, "grad_norm": 1.0279165506362915, "learning_rate": 3.7606980033672344e-05, "loss": 0.9415, "step": 845 }, { "epoch": 0.18283985303652475, "grad_norm": 0.8211097121238708, "learning_rate": 3.760033516495552e-05, "loss": 0.7362, "step": 846 }, { "epoch": 0.18305597579425115, "grad_norm": 1.0028184652328491, "learning_rate": 3.7593681672115155e-05, "loss": 1.0083, "step": 847 }, { "epoch": 0.1832720985519775, "grad_norm": 1.0128577947616577, "learning_rate": 3.758701955841144e-05, "loss": 0.8788, "step": 848 }, { "epoch": 0.1834882213097039, "grad_norm": 0.9472517967224121, "learning_rate": 3.75803488271088e-05, "loss": 0.9193, "step": 849 }, { "epoch": 0.1837043440674303, "grad_norm": 0.9328030943870544, "learning_rate": 3.757366948147587e-05, "loss": 0.8707, "step": 850 }, { "epoch": 0.1839204668251567, "grad_norm": 0.9275659322738647, "learning_rate": 3.7566981524785526e-05, "loss": 0.8106, "step": 851 }, { "epoch": 0.18413658958288306, "grad_norm": 0.8463131189346313, "learning_rate": 3.756028496031484e-05, "loss": 0.8724, "step": 852 }, { "epoch": 0.18435271234060946, "grad_norm": 0.9580005407333374, "learning_rate": 3.755357979134511e-05, "loss": 0.9289, "step": 853 }, { "epoch": 0.18456883509833585, "grad_norm": 1.036739706993103, "learning_rate": 3.754686602116187e-05, "loss": 0.9357, "step": 854 }, { "epoch": 0.18478495785606225, "grad_norm": 0.8900768756866455, "learning_rate": 3.754014365305484e-05, "loss": 0.9699, "step": 855 }, { "epoch": 0.18500108061378864, "grad_norm": 0.8311254978179932, "learning_rate": 3.753341269031797e-05, "loss": 0.902, "step": 856 }, { "epoch": 0.185217203371515, "grad_norm": 0.8818534016609192, "learning_rate": 3.7526673136249404e-05, "loss": 0.8418, "step": 857 }, { "epoch": 0.1854333261292414, "grad_norm": 0.8500455021858215, "learning_rate": 3.7519924994151524e-05, "loss": 0.9488, "step": 858 }, { "epoch": 0.1856494488869678, "grad_norm": 0.984480619430542, "learning_rate": 3.7513168267330894e-05, "loss": 1.0126, "step": 859 }, { "epoch": 0.1858655716446942, "grad_norm": 0.9773917198181152, "learning_rate": 3.75064029590983e-05, "loss": 0.9344, "step": 860 }, { "epoch": 0.18608169440242056, "grad_norm": 0.8242107033729553, "learning_rate": 3.7499629072768724e-05, "loss": 0.927, "step": 861 }, { "epoch": 0.18629781716014696, "grad_norm": 0.830271303653717, "learning_rate": 3.749284661166135e-05, "loss": 0.9614, "step": 862 }, { "epoch": 0.18651393991787335, "grad_norm": 0.8726242184638977, "learning_rate": 3.748605557909958e-05, "loss": 0.9402, "step": 863 }, { "epoch": 0.18673006267559975, "grad_norm": 0.9918140769004822, "learning_rate": 3.7479255978411e-05, "loss": 1.0401, "step": 864 }, { "epoch": 0.18694618543332614, "grad_norm": 0.8276970386505127, "learning_rate": 3.7472447812927395e-05, "loss": 0.8476, "step": 865 }, { "epoch": 0.1871623081910525, "grad_norm": 0.9273027777671814, "learning_rate": 3.746563108598475e-05, "loss": 0.971, "step": 866 }, { "epoch": 0.1873784309487789, "grad_norm": 0.926529049873352, "learning_rate": 3.7458805800923253e-05, "loss": 0.8091, "step": 867 }, { "epoch": 0.1875945537065053, "grad_norm": 0.9758947491645813, "learning_rate": 3.745197196108726e-05, "loss": 0.9299, "step": 868 }, { "epoch": 0.1878106764642317, "grad_norm": 0.9522454738616943, "learning_rate": 3.744512956982537e-05, "loss": 0.9481, "step": 869 }, { "epoch": 0.18802679922195809, "grad_norm": 0.952519953250885, "learning_rate": 3.743827863049029e-05, "loss": 0.973, "step": 870 }, { "epoch": 0.18824292197968445, "grad_norm": 0.8867212533950806, "learning_rate": 3.7431419146439014e-05, "loss": 0.9154, "step": 871 }, { "epoch": 0.18845904473741085, "grad_norm": 0.9702788591384888, "learning_rate": 3.7424551121032646e-05, "loss": 0.9125, "step": 872 }, { "epoch": 0.18867516749513724, "grad_norm": 0.8474773168563843, "learning_rate": 3.741767455763651e-05, "loss": 0.9884, "step": 873 }, { "epoch": 0.18889129025286364, "grad_norm": 0.7889257073402405, "learning_rate": 3.74107894596201e-05, "loss": 0.6314, "step": 874 }, { "epoch": 0.18910741301059, "grad_norm": 1.02834153175354, "learning_rate": 3.7403895830357114e-05, "loss": 1.0108, "step": 875 }, { "epoch": 0.1893235357683164, "grad_norm": 0.942200243473053, "learning_rate": 3.739699367322539e-05, "loss": 0.8859, "step": 876 }, { "epoch": 0.1895396585260428, "grad_norm": 1.0845520496368408, "learning_rate": 3.739008299160698e-05, "loss": 0.982, "step": 877 }, { "epoch": 0.1897557812837692, "grad_norm": 1.137697696685791, "learning_rate": 3.738316378888812e-05, "loss": 0.933, "step": 878 }, { "epoch": 0.18997190404149558, "grad_norm": 1.0095382928848267, "learning_rate": 3.737623606845918e-05, "loss": 1.1746, "step": 879 }, { "epoch": 0.19018802679922195, "grad_norm": 0.9245660901069641, "learning_rate": 3.736929983371473e-05, "loss": 1.0572, "step": 880 }, { "epoch": 0.19040414955694834, "grad_norm": 0.9055622816085815, "learning_rate": 3.736235508805352e-05, "loss": 1.0532, "step": 881 }, { "epoch": 0.19062027231467474, "grad_norm": 1.05194091796875, "learning_rate": 3.7355401834878446e-05, "loss": 1.0278, "step": 882 }, { "epoch": 0.19083639507240113, "grad_norm": 0.7964370846748352, "learning_rate": 3.73484400775966e-05, "loss": 0.8587, "step": 883 }, { "epoch": 0.1910525178301275, "grad_norm": 0.8697637319564819, "learning_rate": 3.734146981961921e-05, "loss": 0.9233, "step": 884 }, { "epoch": 0.1912686405878539, "grad_norm": 0.9340189695358276, "learning_rate": 3.7334491064361705e-05, "loss": 0.7572, "step": 885 }, { "epoch": 0.1914847633455803, "grad_norm": 0.909929633140564, "learning_rate": 3.7327503815243644e-05, "loss": 0.955, "step": 886 }, { "epoch": 0.19170088610330668, "grad_norm": 0.8539615869522095, "learning_rate": 3.732050807568878e-05, "loss": 0.836, "step": 887 }, { "epoch": 0.19191700886103308, "grad_norm": 0.9616323709487915, "learning_rate": 3.7313503849124986e-05, "loss": 1.0609, "step": 888 }, { "epoch": 0.19213313161875945, "grad_norm": 0.9650198221206665, "learning_rate": 3.730649113898433e-05, "loss": 0.8262, "step": 889 }, { "epoch": 0.19234925437648584, "grad_norm": 0.8615740537643433, "learning_rate": 3.729946994870303e-05, "loss": 0.7607, "step": 890 }, { "epoch": 0.19256537713421223, "grad_norm": 0.8472082614898682, "learning_rate": 3.729244028172144e-05, "loss": 0.9098, "step": 891 }, { "epoch": 0.19278149989193863, "grad_norm": 0.9803862571716309, "learning_rate": 3.7285402141484093e-05, "loss": 0.8824, "step": 892 }, { "epoch": 0.192997622649665, "grad_norm": 1.0066810846328735, "learning_rate": 3.727835553143965e-05, "loss": 1.0202, "step": 893 }, { "epoch": 0.1932137454073914, "grad_norm": 1.0869866609573364, "learning_rate": 3.7271300455040936e-05, "loss": 0.8883, "step": 894 }, { "epoch": 0.19342986816511779, "grad_norm": 0.9176041483879089, "learning_rate": 3.7264236915744924e-05, "loss": 0.857, "step": 895 }, { "epoch": 0.19364599092284418, "grad_norm": 0.8855222463607788, "learning_rate": 3.725716491701274e-05, "loss": 1.0702, "step": 896 }, { "epoch": 0.19386211368057057, "grad_norm": 1.1113038063049316, "learning_rate": 3.7250084462309635e-05, "loss": 0.9795, "step": 897 }, { "epoch": 0.19407823643829694, "grad_norm": 1.0217797756195068, "learning_rate": 3.7242995555105016e-05, "loss": 0.971, "step": 898 }, { "epoch": 0.19429435919602334, "grad_norm": 1.0123941898345947, "learning_rate": 3.723589819887244e-05, "loss": 1.0251, "step": 899 }, { "epoch": 0.19451048195374973, "grad_norm": 0.7982286810874939, "learning_rate": 3.7228792397089584e-05, "loss": 0.8106, "step": 900 }, { "epoch": 0.19472660471147613, "grad_norm": 0.9076669812202454, "learning_rate": 3.722167815323829e-05, "loss": 0.9463, "step": 901 }, { "epoch": 0.19494272746920252, "grad_norm": 0.9631551504135132, "learning_rate": 3.7214555470804505e-05, "loss": 0.9932, "step": 902 }, { "epoch": 0.1951588502269289, "grad_norm": 0.9473233819007874, "learning_rate": 3.720742435327834e-05, "loss": 0.8753, "step": 903 }, { "epoch": 0.19537497298465528, "grad_norm": 0.905228853225708, "learning_rate": 3.720028480415401e-05, "loss": 0.8958, "step": 904 }, { "epoch": 0.19559109574238168, "grad_norm": 0.8304605484008789, "learning_rate": 3.7193136826929894e-05, "loss": 0.8962, "step": 905 }, { "epoch": 0.19580721850010807, "grad_norm": 0.8458021879196167, "learning_rate": 3.7185980425108473e-05, "loss": 1.0285, "step": 906 }, { "epoch": 0.19602334125783444, "grad_norm": 0.8611153960227966, "learning_rate": 3.7178815602196385e-05, "loss": 0.8238, "step": 907 }, { "epoch": 0.19623946401556083, "grad_norm": 1.0125149488449097, "learning_rate": 3.717164236170435e-05, "loss": 0.9488, "step": 908 }, { "epoch": 0.19645558677328723, "grad_norm": 0.872134268283844, "learning_rate": 3.7164460707147255e-05, "loss": 0.8408, "step": 909 }, { "epoch": 0.19667170953101362, "grad_norm": 0.9392416477203369, "learning_rate": 3.715727064204409e-05, "loss": 0.992, "step": 910 }, { "epoch": 0.19688783228874002, "grad_norm": 1.0227292776107788, "learning_rate": 3.715007216991798e-05, "loss": 0.9461, "step": 911 }, { "epoch": 0.19710395504646638, "grad_norm": 1.30838942527771, "learning_rate": 3.7142865294296153e-05, "loss": 0.9096, "step": 912 }, { "epoch": 0.19732007780419278, "grad_norm": 0.854081392288208, "learning_rate": 3.713565001870995e-05, "loss": 0.9528, "step": 913 }, { "epoch": 0.19753620056191917, "grad_norm": 0.9335965514183044, "learning_rate": 3.712842634669486e-05, "loss": 0.9381, "step": 914 }, { "epoch": 0.19775232331964557, "grad_norm": 0.8086388111114502, "learning_rate": 3.712119428179045e-05, "loss": 0.9615, "step": 915 }, { "epoch": 0.19796844607737193, "grad_norm": 0.9744918346405029, "learning_rate": 3.7113953827540424e-05, "loss": 1.0383, "step": 916 }, { "epoch": 0.19818456883509833, "grad_norm": 0.9365674257278442, "learning_rate": 3.710670498749258e-05, "loss": 0.9333, "step": 917 }, { "epoch": 0.19840069159282472, "grad_norm": 0.9845136404037476, "learning_rate": 3.709944776519883e-05, "loss": 1.1053, "step": 918 }, { "epoch": 0.19861681435055112, "grad_norm": 0.9566503763198853, "learning_rate": 3.709218216421522e-05, "loss": 0.7985, "step": 919 }, { "epoch": 0.1988329371082775, "grad_norm": 0.9244258403778076, "learning_rate": 3.708490818810185e-05, "loss": 0.8061, "step": 920 }, { "epoch": 0.19904905986600388, "grad_norm": 0.8147101998329163, "learning_rate": 3.707762584042297e-05, "loss": 0.8016, "step": 921 }, { "epoch": 0.19926518262373027, "grad_norm": 0.8522989153862, "learning_rate": 3.70703351247469e-05, "loss": 0.8835, "step": 922 }, { "epoch": 0.19948130538145667, "grad_norm": 1.0560715198516846, "learning_rate": 3.7063036044646076e-05, "loss": 1.2435, "step": 923 }, { "epoch": 0.19969742813918306, "grad_norm": 0.8863456845283508, "learning_rate": 3.705572860369704e-05, "loss": 0.9555, "step": 924 }, { "epoch": 0.19991355089690943, "grad_norm": 0.7883124351501465, "learning_rate": 3.704841280548041e-05, "loss": 0.728, "step": 925 }, { "epoch": 0.20012967365463583, "grad_norm": 0.8467419147491455, "learning_rate": 3.704108865358093e-05, "loss": 0.7594, "step": 926 }, { "epoch": 0.20034579641236222, "grad_norm": 1.0494290590286255, "learning_rate": 3.7033756151587385e-05, "loss": 0.9632, "step": 927 }, { "epoch": 0.20056191917008862, "grad_norm": 0.8138583302497864, "learning_rate": 3.702641530309271e-05, "loss": 0.8598, "step": 928 }, { "epoch": 0.200778041927815, "grad_norm": 0.8637174367904663, "learning_rate": 3.70190661116939e-05, "loss": 0.9428, "step": 929 }, { "epoch": 0.20099416468554138, "grad_norm": 0.8784462213516235, "learning_rate": 3.701170858099203e-05, "loss": 0.8025, "step": 930 }, { "epoch": 0.20121028744326777, "grad_norm": 0.9863048195838928, "learning_rate": 3.700434271459229e-05, "loss": 1.1744, "step": 931 }, { "epoch": 0.20142641020099417, "grad_norm": 0.960402250289917, "learning_rate": 3.6996968516103915e-05, "loss": 1.1154, "step": 932 }, { "epoch": 0.20164253295872056, "grad_norm": 0.9031201601028442, "learning_rate": 3.6989585989140276e-05, "loss": 1.0328, "step": 933 }, { "epoch": 0.20185865571644696, "grad_norm": 0.8621350526809692, "learning_rate": 3.698219513731876e-05, "loss": 0.9169, "step": 934 }, { "epoch": 0.20207477847417332, "grad_norm": 0.8853005766868591, "learning_rate": 3.697479596426089e-05, "loss": 1.019, "step": 935 }, { "epoch": 0.20229090123189972, "grad_norm": 0.8601515293121338, "learning_rate": 3.6967388473592236e-05, "loss": 0.8714, "step": 936 }, { "epoch": 0.2025070239896261, "grad_norm": 0.9690818786621094, "learning_rate": 3.6959972668942456e-05, "loss": 1.1918, "step": 937 }, { "epoch": 0.2027231467473525, "grad_norm": 0.8859080672264099, "learning_rate": 3.695254855394527e-05, "loss": 0.6843, "step": 938 }, { "epoch": 0.20293926950507887, "grad_norm": 0.819726288318634, "learning_rate": 3.694511613223849e-05, "loss": 0.8831, "step": 939 }, { "epoch": 0.20315539226280527, "grad_norm": 0.8103852868080139, "learning_rate": 3.693767540746397e-05, "loss": 0.7011, "step": 940 }, { "epoch": 0.20337151502053166, "grad_norm": 0.8728498220443726, "learning_rate": 3.6930226383267655e-05, "loss": 0.8609, "step": 941 }, { "epoch": 0.20358763777825806, "grad_norm": 0.8289908170700073, "learning_rate": 3.692276906329955e-05, "loss": 0.9882, "step": 942 }, { "epoch": 0.20380376053598445, "grad_norm": 0.9996088743209839, "learning_rate": 3.691530345121372e-05, "loss": 0.9028, "step": 943 }, { "epoch": 0.20401988329371082, "grad_norm": 0.871531069278717, "learning_rate": 3.690782955066831e-05, "loss": 0.8812, "step": 944 }, { "epoch": 0.2042360060514372, "grad_norm": 0.9398096799850464, "learning_rate": 3.69003473653255e-05, "loss": 1.0003, "step": 945 }, { "epoch": 0.2044521288091636, "grad_norm": 0.909731924533844, "learning_rate": 3.689285689885154e-05, "loss": 0.9512, "step": 946 }, { "epoch": 0.20466825156689, "grad_norm": 0.8529198169708252, "learning_rate": 3.6885358154916765e-05, "loss": 0.7834, "step": 947 }, { "epoch": 0.20488437432461637, "grad_norm": 0.8345663547515869, "learning_rate": 3.687785113719552e-05, "loss": 0.9069, "step": 948 }, { "epoch": 0.20510049708234276, "grad_norm": 0.9323077201843262, "learning_rate": 3.687033584936624e-05, "loss": 0.8803, "step": 949 }, { "epoch": 0.20531661984006916, "grad_norm": 0.8528758883476257, "learning_rate": 3.686281229511138e-05, "loss": 0.9107, "step": 950 }, { "epoch": 0.20553274259779555, "grad_norm": 0.8408779501914978, "learning_rate": 3.685528047811749e-05, "loss": 0.8124, "step": 951 }, { "epoch": 0.20574886535552195, "grad_norm": 1.095395565032959, "learning_rate": 3.684774040207512e-05, "loss": 0.8935, "step": 952 }, { "epoch": 0.20596498811324832, "grad_norm": 1.0189132690429688, "learning_rate": 3.684019207067891e-05, "loss": 1.014, "step": 953 }, { "epoch": 0.2061811108709747, "grad_norm": 0.9494995474815369, "learning_rate": 3.68326354876275e-05, "loss": 0.8565, "step": 954 }, { "epoch": 0.2063972336287011, "grad_norm": 0.9605530500411987, "learning_rate": 3.6825070656623626e-05, "loss": 1.0225, "step": 955 }, { "epoch": 0.2066133563864275, "grad_norm": 0.8805063962936401, "learning_rate": 3.681749758137402e-05, "loss": 0.9296, "step": 956 }, { "epoch": 0.20682947914415387, "grad_norm": 1.0285567045211792, "learning_rate": 3.6809916265589476e-05, "loss": 1.1333, "step": 957 }, { "epoch": 0.20704560190188026, "grad_norm": 0.9766727089881897, "learning_rate": 3.6802326712984816e-05, "loss": 0.9373, "step": 958 }, { "epoch": 0.20726172465960666, "grad_norm": 1.1031501293182373, "learning_rate": 3.679472892727891e-05, "loss": 1.0861, "step": 959 }, { "epoch": 0.20747784741733305, "grad_norm": 0.7932749390602112, "learning_rate": 3.6787122912194656e-05, "loss": 0.8303, "step": 960 }, { "epoch": 0.20769397017505944, "grad_norm": 1.0850369930267334, "learning_rate": 3.677950867145898e-05, "loss": 0.831, "step": 961 }, { "epoch": 0.2079100929327858, "grad_norm": 0.943305253982544, "learning_rate": 3.677188620880284e-05, "loss": 0.9211, "step": 962 }, { "epoch": 0.2081262156905122, "grad_norm": 0.9907835125923157, "learning_rate": 3.676425552796123e-05, "loss": 0.9135, "step": 963 }, { "epoch": 0.2083423384482386, "grad_norm": 0.8394286632537842, "learning_rate": 3.675661663267317e-05, "loss": 0.8862, "step": 964 }, { "epoch": 0.208558461205965, "grad_norm": 1.1331239938735962, "learning_rate": 3.674896952668169e-05, "loss": 1.1158, "step": 965 }, { "epoch": 0.2087745839636914, "grad_norm": 0.8609957098960876, "learning_rate": 3.674131421373386e-05, "loss": 0.8496, "step": 966 }, { "epoch": 0.20899070672141776, "grad_norm": 0.8272537589073181, "learning_rate": 3.6733650697580774e-05, "loss": 0.8221, "step": 967 }, { "epoch": 0.20920682947914415, "grad_norm": 0.9432743787765503, "learning_rate": 3.672597898197752e-05, "loss": 1.0988, "step": 968 }, { "epoch": 0.20942295223687055, "grad_norm": 0.9202385544776917, "learning_rate": 3.6718299070683234e-05, "loss": 0.8537, "step": 969 }, { "epoch": 0.20963907499459694, "grad_norm": 0.8289174437522888, "learning_rate": 3.6710610967461044e-05, "loss": 0.9086, "step": 970 }, { "epoch": 0.2098551977523233, "grad_norm": 0.9070008397102356, "learning_rate": 3.670291467607812e-05, "loss": 0.809, "step": 971 }, { "epoch": 0.2100713205100497, "grad_norm": 1.0298635959625244, "learning_rate": 3.669521020030561e-05, "loss": 0.9918, "step": 972 }, { "epoch": 0.2102874432677761, "grad_norm": 0.9390925168991089, "learning_rate": 3.668749754391869e-05, "loss": 0.7554, "step": 973 }, { "epoch": 0.2105035660255025, "grad_norm": 0.8981751203536987, "learning_rate": 3.667977671069656e-05, "loss": 0.9524, "step": 974 }, { "epoch": 0.2107196887832289, "grad_norm": 0.9763649702072144, "learning_rate": 3.667204770442239e-05, "loss": 0.8765, "step": 975 }, { "epoch": 0.21093581154095525, "grad_norm": 0.8939294815063477, "learning_rate": 3.6664310528883396e-05, "loss": 1.0306, "step": 976 }, { "epoch": 0.21115193429868165, "grad_norm": 0.9194285273551941, "learning_rate": 3.665656518787076e-05, "loss": 0.8041, "step": 977 }, { "epoch": 0.21136805705640804, "grad_norm": 0.8309286832809448, "learning_rate": 3.664881168517968e-05, "loss": 1.0146, "step": 978 }, { "epoch": 0.21158417981413444, "grad_norm": 0.9171546101570129, "learning_rate": 3.664105002460937e-05, "loss": 1.1928, "step": 979 }, { "epoch": 0.2118003025718608, "grad_norm": 0.8434054255485535, "learning_rate": 3.663328020996301e-05, "loss": 0.9317, "step": 980 }, { "epoch": 0.2120164253295872, "grad_norm": 0.9480212330818176, "learning_rate": 3.66255022450478e-05, "loss": 0.9464, "step": 981 }, { "epoch": 0.2122325480873136, "grad_norm": 0.8842633366584778, "learning_rate": 3.661771613367493e-05, "loss": 0.8515, "step": 982 }, { "epoch": 0.21244867084504, "grad_norm": 0.8174552321434021, "learning_rate": 3.660992187965956e-05, "loss": 0.9086, "step": 983 }, { "epoch": 0.21266479360276638, "grad_norm": 0.9173724055290222, "learning_rate": 3.660211948682086e-05, "loss": 0.9264, "step": 984 }, { "epoch": 0.21288091636049275, "grad_norm": 0.9887210130691528, "learning_rate": 3.6594308958982004e-05, "loss": 1.0338, "step": 985 }, { "epoch": 0.21309703911821914, "grad_norm": 0.9939167499542236, "learning_rate": 3.658649029997011e-05, "loss": 0.9151, "step": 986 }, { "epoch": 0.21331316187594554, "grad_norm": 0.9670457243919373, "learning_rate": 3.657866351361632e-05, "loss": 0.9914, "step": 987 }, { "epoch": 0.21352928463367193, "grad_norm": 0.8448925614356995, "learning_rate": 3.657082860375572e-05, "loss": 0.9263, "step": 988 }, { "epoch": 0.2137454073913983, "grad_norm": 0.9376818537712097, "learning_rate": 3.6562985574227414e-05, "loss": 1.036, "step": 989 }, { "epoch": 0.2139615301491247, "grad_norm": 0.9312554001808167, "learning_rate": 3.655513442887447e-05, "loss": 0.9952, "step": 990 }, { "epoch": 0.2141776529068511, "grad_norm": 0.9356735944747925, "learning_rate": 3.6547275171543924e-05, "loss": 1.3891, "step": 991 }, { "epoch": 0.21439377566457749, "grad_norm": 0.9153226613998413, "learning_rate": 3.6539407806086796e-05, "loss": 0.9982, "step": 992 }, { "epoch": 0.21460989842230388, "grad_norm": 1.0015946626663208, "learning_rate": 3.653153233635808e-05, "loss": 0.9758, "step": 993 }, { "epoch": 0.21482602118003025, "grad_norm": 0.928017795085907, "learning_rate": 3.652364876621673e-05, "loss": 0.9092, "step": 994 }, { "epoch": 0.21504214393775664, "grad_norm": 0.8220463991165161, "learning_rate": 3.651575709952568e-05, "loss": 0.9433, "step": 995 }, { "epoch": 0.21525826669548304, "grad_norm": 0.9616919755935669, "learning_rate": 3.650785734015183e-05, "loss": 0.8842, "step": 996 }, { "epoch": 0.21547438945320943, "grad_norm": 0.9724686741828918, "learning_rate": 3.6499949491966046e-05, "loss": 0.9112, "step": 997 }, { "epoch": 0.21569051221093583, "grad_norm": 0.8349328637123108, "learning_rate": 3.649203355884316e-05, "loss": 0.9399, "step": 998 }, { "epoch": 0.2159066349686622, "grad_norm": 0.8705836534500122, "learning_rate": 3.648410954466195e-05, "loss": 0.9218, "step": 999 }, { "epoch": 0.2161227577263886, "grad_norm": 0.8451696634292603, "learning_rate": 3.6476177453305164e-05, "loss": 0.9779, "step": 1000 }, { "epoch": 0.21633888048411498, "grad_norm": 0.9489712715148926, "learning_rate": 3.646823728865952e-05, "loss": 1.0642, "step": 1001 }, { "epoch": 0.21655500324184138, "grad_norm": 0.8946622610092163, "learning_rate": 3.6460289054615665e-05, "loss": 0.9093, "step": 1002 }, { "epoch": 0.21677112599956774, "grad_norm": 0.9775545001029968, "learning_rate": 3.6452332755068226e-05, "loss": 0.874, "step": 1003 }, { "epoch": 0.21698724875729414, "grad_norm": 0.7812718152999878, "learning_rate": 3.6444368393915774e-05, "loss": 0.7704, "step": 1004 }, { "epoch": 0.21720337151502053, "grad_norm": 0.995879590511322, "learning_rate": 3.643639597506082e-05, "loss": 1.0089, "step": 1005 }, { "epoch": 0.21741949427274693, "grad_norm": 1.089314341545105, "learning_rate": 3.642841550240983e-05, "loss": 0.9043, "step": 1006 }, { "epoch": 0.21763561703047332, "grad_norm": 0.9540629982948303, "learning_rate": 3.642042697987323e-05, "loss": 0.9521, "step": 1007 }, { "epoch": 0.2178517397881997, "grad_norm": 0.908274233341217, "learning_rate": 3.6412430411365365e-05, "loss": 0.9355, "step": 1008 }, { "epoch": 0.21806786254592608, "grad_norm": 0.8804648518562317, "learning_rate": 3.6404425800804533e-05, "loss": 1.1751, "step": 1009 }, { "epoch": 0.21828398530365248, "grad_norm": 0.9607532024383545, "learning_rate": 3.639641315211299e-05, "loss": 0.8435, "step": 1010 }, { "epoch": 0.21850010806137887, "grad_norm": 0.9816673994064331, "learning_rate": 3.638839246921689e-05, "loss": 1.0648, "step": 1011 }, { "epoch": 0.21871623081910524, "grad_norm": 0.8674247860908508, "learning_rate": 3.638036375604638e-05, "loss": 1.0623, "step": 1012 }, { "epoch": 0.21893235357683163, "grad_norm": 1.0201153755187988, "learning_rate": 3.637232701653549e-05, "loss": 1.1882, "step": 1013 }, { "epoch": 0.21914847633455803, "grad_norm": 0.870021641254425, "learning_rate": 3.6364282254622215e-05, "loss": 0.8389, "step": 1014 }, { "epoch": 0.21936459909228442, "grad_norm": 0.9027089476585388, "learning_rate": 3.635622947424845e-05, "loss": 1.0713, "step": 1015 }, { "epoch": 0.21958072185001082, "grad_norm": 1.028327465057373, "learning_rate": 3.634816867936007e-05, "loss": 1.1063, "step": 1016 }, { "epoch": 0.21979684460773719, "grad_norm": 0.9232480525970459, "learning_rate": 3.6340099873906814e-05, "loss": 0.9094, "step": 1017 }, { "epoch": 0.22001296736546358, "grad_norm": 0.879548192024231, "learning_rate": 3.63320230618424e-05, "loss": 0.9462, "step": 1018 }, { "epoch": 0.22022909012318997, "grad_norm": 0.9714813232421875, "learning_rate": 3.632393824712444e-05, "loss": 0.95, "step": 1019 }, { "epoch": 0.22044521288091637, "grad_norm": 0.9346253871917725, "learning_rate": 3.631584543371447e-05, "loss": 0.9187, "step": 1020 }, { "epoch": 0.22066133563864274, "grad_norm": 0.975001335144043, "learning_rate": 3.630774462557796e-05, "loss": 0.9681, "step": 1021 }, { "epoch": 0.22087745839636913, "grad_norm": 0.905458390712738, "learning_rate": 3.629963582668428e-05, "loss": 1.0308, "step": 1022 }, { "epoch": 0.22109358115409553, "grad_norm": 0.9808592796325684, "learning_rate": 3.629151904100672e-05, "loss": 0.7074, "step": 1023 }, { "epoch": 0.22130970391182192, "grad_norm": 0.9215885400772095, "learning_rate": 3.628339427252249e-05, "loss": 0.9571, "step": 1024 }, { "epoch": 0.22152582666954831, "grad_norm": 0.9385959506034851, "learning_rate": 3.627526152521271e-05, "loss": 0.8931, "step": 1025 }, { "epoch": 0.22174194942727468, "grad_norm": 0.8258581757545471, "learning_rate": 3.626712080306241e-05, "loss": 0.8248, "step": 1026 }, { "epoch": 0.22195807218500108, "grad_norm": 0.9462264776229858, "learning_rate": 3.625897211006051e-05, "loss": 0.9672, "step": 1027 }, { "epoch": 0.22217419494272747, "grad_norm": 0.9047693610191345, "learning_rate": 3.625081545019987e-05, "loss": 0.9575, "step": 1028 }, { "epoch": 0.22239031770045387, "grad_norm": 0.7842395901679993, "learning_rate": 3.624265082747723e-05, "loss": 0.8527, "step": 1029 }, { "epoch": 0.22260644045818023, "grad_norm": 0.8913676142692566, "learning_rate": 3.623447824589323e-05, "loss": 0.9988, "step": 1030 }, { "epoch": 0.22282256321590663, "grad_norm": 0.9162867665290833, "learning_rate": 3.622629770945241e-05, "loss": 0.817, "step": 1031 }, { "epoch": 0.22303868597363302, "grad_norm": 0.9730992317199707, "learning_rate": 3.621810922216323e-05, "loss": 0.8078, "step": 1032 }, { "epoch": 0.22325480873135942, "grad_norm": 0.951578676700592, "learning_rate": 3.620991278803802e-05, "loss": 0.9413, "step": 1033 }, { "epoch": 0.2234709314890858, "grad_norm": 0.9462690949440002, "learning_rate": 3.620170841109301e-05, "loss": 1.0054, "step": 1034 }, { "epoch": 0.22368705424681218, "grad_norm": 0.9456660747528076, "learning_rate": 3.6193496095348345e-05, "loss": 0.879, "step": 1035 }, { "epoch": 0.22390317700453857, "grad_norm": 0.9455549120903015, "learning_rate": 3.618527584482801e-05, "loss": 1.0426, "step": 1036 }, { "epoch": 0.22411929976226497, "grad_norm": 0.8765550255775452, "learning_rate": 3.617704766355994e-05, "loss": 0.9799, "step": 1037 }, { "epoch": 0.22433542251999136, "grad_norm": 0.9239512085914612, "learning_rate": 3.6168811555575905e-05, "loss": 0.9659, "step": 1038 }, { "epoch": 0.22455154527771776, "grad_norm": 0.8369002342224121, "learning_rate": 3.6160567524911586e-05, "loss": 0.8801, "step": 1039 }, { "epoch": 0.22476766803544412, "grad_norm": 0.9263517260551453, "learning_rate": 3.6152315575606535e-05, "loss": 0.9159, "step": 1040 }, { "epoch": 0.22498379079317052, "grad_norm": 0.8030198216438293, "learning_rate": 3.6144055711704196e-05, "loss": 0.7952, "step": 1041 }, { "epoch": 0.2251999135508969, "grad_norm": 0.9598090052604675, "learning_rate": 3.6135787937251875e-05, "loss": 0.9139, "step": 1042 }, { "epoch": 0.2254160363086233, "grad_norm": 0.8471052050590515, "learning_rate": 3.612751225630076e-05, "loss": 0.9855, "step": 1043 }, { "epoch": 0.22563215906634967, "grad_norm": 0.8563123941421509, "learning_rate": 3.6119228672905926e-05, "loss": 0.9401, "step": 1044 }, { "epoch": 0.22584828182407607, "grad_norm": 0.9090791344642639, "learning_rate": 3.6110937191126305e-05, "loss": 0.9147, "step": 1045 }, { "epoch": 0.22606440458180246, "grad_norm": 0.8402764201164246, "learning_rate": 3.61026378150247e-05, "loss": 0.9035, "step": 1046 }, { "epoch": 0.22628052733952886, "grad_norm": 0.8264151215553284, "learning_rate": 3.6094330548667794e-05, "loss": 0.8666, "step": 1047 }, { "epoch": 0.22649665009725525, "grad_norm": 0.9080968499183655, "learning_rate": 3.6086015396126126e-05, "loss": 0.8237, "step": 1048 }, { "epoch": 0.22671277285498162, "grad_norm": 0.8279055953025818, "learning_rate": 3.6077692361474095e-05, "loss": 0.8232, "step": 1049 }, { "epoch": 0.22692889561270801, "grad_norm": 1.0176692008972168, "learning_rate": 3.6069361448789976e-05, "loss": 0.7964, "step": 1050 }, { "epoch": 0.2271450183704344, "grad_norm": 0.803254246711731, "learning_rate": 3.606102266215589e-05, "loss": 0.7906, "step": 1051 }, { "epoch": 0.2273611411281608, "grad_norm": 0.908789873123169, "learning_rate": 3.605267600565783e-05, "loss": 0.9803, "step": 1052 }, { "epoch": 0.22757726388588717, "grad_norm": 0.9088970422744751, "learning_rate": 3.604432148338563e-05, "loss": 0.954, "step": 1053 }, { "epoch": 0.22779338664361357, "grad_norm": 0.9667347073554993, "learning_rate": 3.6035959099433e-05, "loss": 1.0223, "step": 1054 }, { "epoch": 0.22800950940133996, "grad_norm": 0.8741658926010132, "learning_rate": 3.602758885789748e-05, "loss": 0.99, "step": 1055 }, { "epoch": 0.22822563215906636, "grad_norm": 0.8842224478721619, "learning_rate": 3.6019210762880464e-05, "loss": 0.8577, "step": 1056 }, { "epoch": 0.22844175491679275, "grad_norm": 0.8977473378181458, "learning_rate": 3.601082481848721e-05, "loss": 1.0656, "step": 1057 }, { "epoch": 0.22865787767451912, "grad_norm": 1.0967479944229126, "learning_rate": 3.6002431028826806e-05, "loss": 0.8729, "step": 1058 }, { "epoch": 0.2288740004322455, "grad_norm": 0.8644981384277344, "learning_rate": 3.599402939801219e-05, "loss": 0.9116, "step": 1059 }, { "epoch": 0.2290901231899719, "grad_norm": 1.03041672706604, "learning_rate": 3.5985619930160146e-05, "loss": 0.9206, "step": 1060 }, { "epoch": 0.2293062459476983, "grad_norm": 1.0125470161437988, "learning_rate": 3.5977202629391284e-05, "loss": 0.9103, "step": 1061 }, { "epoch": 0.22952236870542467, "grad_norm": 0.8819757103919983, "learning_rate": 3.596877749983007e-05, "loss": 0.9736, "step": 1062 }, { "epoch": 0.22973849146315106, "grad_norm": 0.8741188645362854, "learning_rate": 3.5960344545604796e-05, "loss": 0.977, "step": 1063 }, { "epoch": 0.22995461422087746, "grad_norm": 0.8714580535888672, "learning_rate": 3.5951903770847584e-05, "loss": 0.977, "step": 1064 }, { "epoch": 0.23017073697860385, "grad_norm": 0.8280560374259949, "learning_rate": 3.5943455179694404e-05, "loss": 0.9297, "step": 1065 }, { "epoch": 0.23038685973633025, "grad_norm": 0.8424382209777832, "learning_rate": 3.5934998776285044e-05, "loss": 1.046, "step": 1066 }, { "epoch": 0.2306029824940566, "grad_norm": 0.9405550956726074, "learning_rate": 3.5926534564763116e-05, "loss": 1.0598, "step": 1067 }, { "epoch": 0.230819105251783, "grad_norm": 1.014173984527588, "learning_rate": 3.591806254927607e-05, "loss": 0.8757, "step": 1068 }, { "epoch": 0.2310352280095094, "grad_norm": 1.0071674585342407, "learning_rate": 3.5909582733975174e-05, "loss": 0.9478, "step": 1069 }, { "epoch": 0.2312513507672358, "grad_norm": 1.0263164043426514, "learning_rate": 3.590109512301552e-05, "loss": 0.9838, "step": 1070 }, { "epoch": 0.2314674735249622, "grad_norm": 0.873468816280365, "learning_rate": 3.5892599720556e-05, "loss": 0.8863, "step": 1071 }, { "epoch": 0.23168359628268856, "grad_norm": 0.8657466769218445, "learning_rate": 3.588409653075937e-05, "loss": 0.9229, "step": 1072 }, { "epoch": 0.23189971904041495, "grad_norm": 0.993262767791748, "learning_rate": 3.587558555779215e-05, "loss": 0.7674, "step": 1073 }, { "epoch": 0.23211584179814135, "grad_norm": 0.7991393804550171, "learning_rate": 3.586706680582471e-05, "loss": 0.7787, "step": 1074 }, { "epoch": 0.23233196455586774, "grad_norm": 0.8678078651428223, "learning_rate": 3.585854027903121e-05, "loss": 0.774, "step": 1075 }, { "epoch": 0.2325480873135941, "grad_norm": 0.9204895496368408, "learning_rate": 3.585000598158964e-05, "loss": 0.9951, "step": 1076 }, { "epoch": 0.2327642100713205, "grad_norm": 0.9125127196311951, "learning_rate": 3.584146391768177e-05, "loss": 0.9557, "step": 1077 }, { "epoch": 0.2329803328290469, "grad_norm": 0.856071949005127, "learning_rate": 3.58329140914932e-05, "loss": 1.0097, "step": 1078 }, { "epoch": 0.2331964555867733, "grad_norm": 0.9988387227058411, "learning_rate": 3.582435650721333e-05, "loss": 1.1671, "step": 1079 }, { "epoch": 0.2334125783444997, "grad_norm": 0.9570616483688354, "learning_rate": 3.581579116903536e-05, "loss": 0.9732, "step": 1080 }, { "epoch": 0.23362870110222606, "grad_norm": 0.926640510559082, "learning_rate": 3.580721808115627e-05, "loss": 0.7473, "step": 1081 }, { "epoch": 0.23384482385995245, "grad_norm": 0.8650385737419128, "learning_rate": 3.579863724777686e-05, "loss": 0.8781, "step": 1082 }, { "epoch": 0.23406094661767884, "grad_norm": 0.8797435760498047, "learning_rate": 3.579004867310172e-05, "loss": 0.9566, "step": 1083 }, { "epoch": 0.23427706937540524, "grad_norm": 0.8567848205566406, "learning_rate": 3.578145236133923e-05, "loss": 0.8627, "step": 1084 }, { "epoch": 0.2344931921331316, "grad_norm": 0.9849474430084229, "learning_rate": 3.577284831670157e-05, "loss": 1.0674, "step": 1085 }, { "epoch": 0.234709314890858, "grad_norm": 0.8508183360099792, "learning_rate": 3.5764236543404694e-05, "loss": 0.8411, "step": 1086 }, { "epoch": 0.2349254376485844, "grad_norm": 0.8463327884674072, "learning_rate": 3.575561704566835e-05, "loss": 0.9291, "step": 1087 }, { "epoch": 0.2351415604063108, "grad_norm": 0.8836262822151184, "learning_rate": 3.574698982771608e-05, "loss": 0.8609, "step": 1088 }, { "epoch": 0.23535768316403718, "grad_norm": 1.0963822603225708, "learning_rate": 3.57383548937752e-05, "loss": 0.984, "step": 1089 }, { "epoch": 0.23557380592176355, "grad_norm": 0.9397730231285095, "learning_rate": 3.572971224807679e-05, "loss": 0.9046, "step": 1090 }, { "epoch": 0.23578992867948995, "grad_norm": 0.9169086217880249, "learning_rate": 3.5721061894855756e-05, "loss": 1.0361, "step": 1091 }, { "epoch": 0.23600605143721634, "grad_norm": 1.0400639772415161, "learning_rate": 3.5712403838350726e-05, "loss": 1.0632, "step": 1092 }, { "epoch": 0.23622217419494274, "grad_norm": 0.8737825155258179, "learning_rate": 3.570373808280414e-05, "loss": 0.9283, "step": 1093 }, { "epoch": 0.2364382969526691, "grad_norm": 0.9475460648536682, "learning_rate": 3.569506463246219e-05, "loss": 0.9713, "step": 1094 }, { "epoch": 0.2366544197103955, "grad_norm": 0.8910499215126038, "learning_rate": 3.568638349157486e-05, "loss": 1.0578, "step": 1095 }, { "epoch": 0.2368705424681219, "grad_norm": 0.933046281337738, "learning_rate": 3.567769466439588e-05, "loss": 0.7465, "step": 1096 }, { "epoch": 0.2370866652258483, "grad_norm": 0.8022202253341675, "learning_rate": 3.5668998155182746e-05, "loss": 0.7768, "step": 1097 }, { "epoch": 0.23730278798357468, "grad_norm": 0.9533029198646545, "learning_rate": 3.5660293968196744e-05, "loss": 0.9189, "step": 1098 }, { "epoch": 0.23751891074130105, "grad_norm": 0.9283272624015808, "learning_rate": 3.56515821077029e-05, "loss": 0.975, "step": 1099 }, { "epoch": 0.23773503349902744, "grad_norm": 0.933311402797699, "learning_rate": 3.564286257797001e-05, "loss": 0.9122, "step": 1100 }, { "epoch": 0.23795115625675384, "grad_norm": 1.4455485343933105, "learning_rate": 3.563413538327061e-05, "loss": 1.0093, "step": 1101 }, { "epoch": 0.23816727901448023, "grad_norm": 0.8379711508750916, "learning_rate": 3.5625400527881015e-05, "loss": 0.9233, "step": 1102 }, { "epoch": 0.23838340177220663, "grad_norm": 0.7099660038948059, "learning_rate": 3.5616658016081286e-05, "loss": 0.825, "step": 1103 }, { "epoch": 0.238599524529933, "grad_norm": 0.9785086512565613, "learning_rate": 3.560790785215522e-05, "loss": 0.9785, "step": 1104 }, { "epoch": 0.2388156472876594, "grad_norm": 0.9324660301208496, "learning_rate": 3.559915004039039e-05, "loss": 1.1149, "step": 1105 }, { "epoch": 0.23903177004538578, "grad_norm": 0.9618993401527405, "learning_rate": 3.559038458507811e-05, "loss": 0.972, "step": 1106 }, { "epoch": 0.23924789280311218, "grad_norm": 0.8113493323326111, "learning_rate": 3.558161149051341e-05, "loss": 0.8161, "step": 1107 }, { "epoch": 0.23946401556083854, "grad_norm": 0.8987981081008911, "learning_rate": 3.55728307609951e-05, "loss": 0.8535, "step": 1108 }, { "epoch": 0.23968013831856494, "grad_norm": 0.7710052728652954, "learning_rate": 3.556404240082573e-05, "loss": 0.8278, "step": 1109 }, { "epoch": 0.23989626107629133, "grad_norm": 0.8242369890213013, "learning_rate": 3.555524641431155e-05, "loss": 1.0152, "step": 1110 }, { "epoch": 0.24011238383401773, "grad_norm": 0.8900905251502991, "learning_rate": 3.554644280576259e-05, "loss": 0.8891, "step": 1111 }, { "epoch": 0.24032850659174412, "grad_norm": 0.8917087316513062, "learning_rate": 3.553763157949259e-05, "loss": 1.0774, "step": 1112 }, { "epoch": 0.2405446293494705, "grad_norm": 0.8916319012641907, "learning_rate": 3.5528812739819044e-05, "loss": 0.9739, "step": 1113 }, { "epoch": 0.24076075210719688, "grad_norm": 0.9248983860015869, "learning_rate": 3.551998629106314e-05, "loss": 0.9, "step": 1114 }, { "epoch": 0.24097687486492328, "grad_norm": 0.9821251034736633, "learning_rate": 3.551115223754984e-05, "loss": 1.0232, "step": 1115 }, { "epoch": 0.24119299762264967, "grad_norm": 0.868484377861023, "learning_rate": 3.5502310583607796e-05, "loss": 0.8949, "step": 1116 }, { "epoch": 0.24140912038037604, "grad_norm": 0.9557080864906311, "learning_rate": 3.5493461333569396e-05, "loss": 0.897, "step": 1117 }, { "epoch": 0.24162524313810244, "grad_norm": 0.9541976451873779, "learning_rate": 3.548460449177077e-05, "loss": 1.0214, "step": 1118 }, { "epoch": 0.24184136589582883, "grad_norm": 0.8650433421134949, "learning_rate": 3.547574006255173e-05, "loss": 1.0235, "step": 1119 }, { "epoch": 0.24205748865355523, "grad_norm": 0.9833268523216248, "learning_rate": 3.546686805025584e-05, "loss": 0.913, "step": 1120 }, { "epoch": 0.24227361141128162, "grad_norm": 0.940438985824585, "learning_rate": 3.5457988459230356e-05, "loss": 0.9043, "step": 1121 }, { "epoch": 0.242489734169008, "grad_norm": 0.774019718170166, "learning_rate": 3.544910129382626e-05, "loss": 0.8052, "step": 1122 }, { "epoch": 0.24270585692673438, "grad_norm": 0.7606857419013977, "learning_rate": 3.544020655839824e-05, "loss": 0.8237, "step": 1123 }, { "epoch": 0.24292197968446078, "grad_norm": 0.9330726861953735, "learning_rate": 3.54313042573047e-05, "loss": 0.8704, "step": 1124 }, { "epoch": 0.24313810244218717, "grad_norm": 0.8465023636817932, "learning_rate": 3.542239439490776e-05, "loss": 0.9687, "step": 1125 }, { "epoch": 0.24335422519991354, "grad_norm": 0.9619770646095276, "learning_rate": 3.54134769755732e-05, "loss": 0.9585, "step": 1126 }, { "epoch": 0.24357034795763993, "grad_norm": 0.7957881689071655, "learning_rate": 3.5404552003670565e-05, "loss": 0.783, "step": 1127 }, { "epoch": 0.24378647071536633, "grad_norm": 0.8766531944274902, "learning_rate": 3.539561948357305e-05, "loss": 0.9029, "step": 1128 }, { "epoch": 0.24400259347309272, "grad_norm": 0.9924162030220032, "learning_rate": 3.538667941965758e-05, "loss": 0.8902, "step": 1129 }, { "epoch": 0.24421871623081912, "grad_norm": 0.7866281270980835, "learning_rate": 3.537773181630477e-05, "loss": 0.7439, "step": 1130 }, { "epoch": 0.24443483898854548, "grad_norm": 0.8730312585830688, "learning_rate": 3.5368776677898906e-05, "loss": 0.9673, "step": 1131 }, { "epoch": 0.24465096174627188, "grad_norm": 0.9427263140678406, "learning_rate": 3.5359814008828006e-05, "loss": 1.077, "step": 1132 }, { "epoch": 0.24486708450399827, "grad_norm": 0.9608944058418274, "learning_rate": 3.5350843813483746e-05, "loss": 1.0296, "step": 1133 }, { "epoch": 0.24508320726172467, "grad_norm": 0.7508792281150818, "learning_rate": 3.53418660962615e-05, "loss": 0.937, "step": 1134 }, { "epoch": 0.24529933001945106, "grad_norm": 0.8189660906791687, "learning_rate": 3.533288086156034e-05, "loss": 0.8369, "step": 1135 }, { "epoch": 0.24551545277717743, "grad_norm": 0.8507649302482605, "learning_rate": 3.5323888113782996e-05, "loss": 0.847, "step": 1136 }, { "epoch": 0.24573157553490382, "grad_norm": 1.0197499990463257, "learning_rate": 3.53148878573359e-05, "loss": 0.8865, "step": 1137 }, { "epoch": 0.24594769829263022, "grad_norm": 0.8537721633911133, "learning_rate": 3.530588009662916e-05, "loss": 1.0585, "step": 1138 }, { "epoch": 0.2461638210503566, "grad_norm": 0.8288100361824036, "learning_rate": 3.5296864836076555e-05, "loss": 0.9157, "step": 1139 }, { "epoch": 0.24637994380808298, "grad_norm": 1.0532606840133667, "learning_rate": 3.5287842080095537e-05, "loss": 1.039, "step": 1140 }, { "epoch": 0.24659606656580937, "grad_norm": 0.9588279128074646, "learning_rate": 3.5278811833107246e-05, "loss": 0.9791, "step": 1141 }, { "epoch": 0.24681218932353577, "grad_norm": 0.9191341996192932, "learning_rate": 3.5269774099536476e-05, "loss": 1.0564, "step": 1142 }, { "epoch": 0.24702831208126216, "grad_norm": 0.7049510478973389, "learning_rate": 3.526072888381169e-05, "loss": 0.6928, "step": 1143 }, { "epoch": 0.24724443483898856, "grad_norm": 1.0857255458831787, "learning_rate": 3.525167619036503e-05, "loss": 1.0761, "step": 1144 }, { "epoch": 0.24746055759671493, "grad_norm": 0.8708821535110474, "learning_rate": 3.52426160236323e-05, "loss": 0.7623, "step": 1145 }, { "epoch": 0.24767668035444132, "grad_norm": 0.7799321413040161, "learning_rate": 3.523354838805295e-05, "loss": 0.8589, "step": 1146 }, { "epoch": 0.24789280311216771, "grad_norm": 0.9419688582420349, "learning_rate": 3.52244732880701e-05, "loss": 1.1236, "step": 1147 }, { "epoch": 0.2481089258698941, "grad_norm": 0.9029631018638611, "learning_rate": 3.521539072813054e-05, "loss": 1.0226, "step": 1148 }, { "epoch": 0.24832504862762048, "grad_norm": 1.017147421836853, "learning_rate": 3.5206300712684694e-05, "loss": 0.9981, "step": 1149 }, { "epoch": 0.24854117138534687, "grad_norm": 0.8713418841362, "learning_rate": 3.5197203246186654e-05, "loss": 0.8987, "step": 1150 }, { "epoch": 0.24875729414307327, "grad_norm": 0.8041524291038513, "learning_rate": 3.5188098333094145e-05, "loss": 0.8464, "step": 1151 }, { "epoch": 0.24897341690079966, "grad_norm": 0.9536299705505371, "learning_rate": 3.517898597786857e-05, "loss": 0.9542, "step": 1152 }, { "epoch": 0.24918953965852605, "grad_norm": 0.899425745010376, "learning_rate": 3.516986618497496e-05, "loss": 1.0598, "step": 1153 }, { "epoch": 0.24940566241625242, "grad_norm": 0.8632761240005493, "learning_rate": 3.5160738958881975e-05, "loss": 0.924, "step": 1154 }, { "epoch": 0.24962178517397882, "grad_norm": 0.85684734582901, "learning_rate": 3.5151604304061946e-05, "loss": 0.9598, "step": 1155 }, { "epoch": 0.2498379079317052, "grad_norm": 0.9506204128265381, "learning_rate": 3.514246222499084e-05, "loss": 0.9236, "step": 1156 }, { "epoch": 0.2500540306894316, "grad_norm": 0.8864320516586304, "learning_rate": 3.5133312726148244e-05, "loss": 1.0351, "step": 1157 }, { "epoch": 0.250270153447158, "grad_norm": 1.1389678716659546, "learning_rate": 3.51241558120174e-05, "loss": 1.0681, "step": 1158 }, { "epoch": 0.2504862762048844, "grad_norm": 0.9330379366874695, "learning_rate": 3.511499148708517e-05, "loss": 1.0575, "step": 1159 }, { "epoch": 0.25070239896261076, "grad_norm": 0.7407273054122925, "learning_rate": 3.510581975584205e-05, "loss": 0.7253, "step": 1160 }, { "epoch": 0.25091852172033713, "grad_norm": 0.8479929566383362, "learning_rate": 3.509664062278217e-05, "loss": 0.8569, "step": 1161 }, { "epoch": 0.25113464447806355, "grad_norm": 0.9289430975914001, "learning_rate": 3.5087454092403285e-05, "loss": 0.9247, "step": 1162 }, { "epoch": 0.2513507672357899, "grad_norm": 0.9285940527915955, "learning_rate": 3.507826016920677e-05, "loss": 0.9408, "step": 1163 }, { "epoch": 0.25156688999351634, "grad_norm": 0.8576862812042236, "learning_rate": 3.5069058857697625e-05, "loss": 0.9638, "step": 1164 }, { "epoch": 0.2517830127512427, "grad_norm": 1.0147756338119507, "learning_rate": 3.505985016238448e-05, "loss": 0.9354, "step": 1165 }, { "epoch": 0.2519991355089691, "grad_norm": 0.9034207463264465, "learning_rate": 3.505063408777956e-05, "loss": 1.0437, "step": 1166 }, { "epoch": 0.2522152582666955, "grad_norm": 0.9311251640319824, "learning_rate": 3.504141063839874e-05, "loss": 0.8789, "step": 1167 }, { "epoch": 0.25243138102442186, "grad_norm": 0.9327875971794128, "learning_rate": 3.503217981876147e-05, "loss": 1.0022, "step": 1168 }, { "epoch": 0.2526475037821483, "grad_norm": 0.9139388799667358, "learning_rate": 3.502294163339084e-05, "loss": 0.845, "step": 1169 }, { "epoch": 0.25286362653987465, "grad_norm": 0.9231714010238647, "learning_rate": 3.501369608681354e-05, "loss": 0.887, "step": 1170 }, { "epoch": 0.253079749297601, "grad_norm": 0.9506759643554688, "learning_rate": 3.5004443183559864e-05, "loss": 0.7532, "step": 1171 }, { "epoch": 0.25329587205532744, "grad_norm": 0.9717447757720947, "learning_rate": 3.499518292816371e-05, "loss": 0.9421, "step": 1172 }, { "epoch": 0.2535119948130538, "grad_norm": 1.1096994876861572, "learning_rate": 3.498591532516259e-05, "loss": 1.0407, "step": 1173 }, { "epoch": 0.2537281175707802, "grad_norm": 0.8934118151664734, "learning_rate": 3.4976640379097594e-05, "loss": 0.9223, "step": 1174 }, { "epoch": 0.2539442403285066, "grad_norm": 0.96073317527771, "learning_rate": 3.4967358094513446e-05, "loss": 0.9524, "step": 1175 }, { "epoch": 0.25416036308623297, "grad_norm": 0.8692421317100525, "learning_rate": 3.4958068475958424e-05, "loss": 0.7931, "step": 1176 }, { "epoch": 0.2543764858439594, "grad_norm": 1.026824712753296, "learning_rate": 3.494877152798442e-05, "loss": 0.9667, "step": 1177 }, { "epoch": 0.25459260860168575, "grad_norm": 0.9396779537200928, "learning_rate": 3.493946725514694e-05, "loss": 0.8441, "step": 1178 }, { "epoch": 0.2548087313594121, "grad_norm": 0.9980692267417908, "learning_rate": 3.493015566200503e-05, "loss": 0.9181, "step": 1179 }, { "epoch": 0.25502485411713854, "grad_norm": 0.8568054437637329, "learning_rate": 3.4920836753121366e-05, "loss": 0.911, "step": 1180 }, { "epoch": 0.2552409768748649, "grad_norm": 0.8480731844902039, "learning_rate": 3.4911510533062174e-05, "loss": 1.0297, "step": 1181 }, { "epoch": 0.25545709963259133, "grad_norm": 0.9319406151771545, "learning_rate": 3.49021770063973e-05, "loss": 0.8339, "step": 1182 }, { "epoch": 0.2556732223903177, "grad_norm": 0.8354699611663818, "learning_rate": 3.489283617770014e-05, "loss": 0.891, "step": 1183 }, { "epoch": 0.25588934514804407, "grad_norm": 0.9215112924575806, "learning_rate": 3.4883488051547674e-05, "loss": 0.8315, "step": 1184 }, { "epoch": 0.2561054679057705, "grad_norm": 0.9236769676208496, "learning_rate": 3.487413263252047e-05, "loss": 0.9985, "step": 1185 }, { "epoch": 0.25632159066349686, "grad_norm": 0.8746913075447083, "learning_rate": 3.486476992520267e-05, "loss": 0.7641, "step": 1186 }, { "epoch": 0.2565377134212233, "grad_norm": 0.9822189211845398, "learning_rate": 3.485539993418196e-05, "loss": 1.0482, "step": 1187 }, { "epoch": 0.25675383617894965, "grad_norm": 0.9604916572570801, "learning_rate": 3.4846022664049624e-05, "loss": 1.032, "step": 1188 }, { "epoch": 0.256969958936676, "grad_norm": 0.9414983987808228, "learning_rate": 3.4836638119400504e-05, "loss": 1.0286, "step": 1189 }, { "epoch": 0.25718608169440244, "grad_norm": 0.831779420375824, "learning_rate": 3.4827246304833e-05, "loss": 0.9321, "step": 1190 }, { "epoch": 0.2574022044521288, "grad_norm": 0.8739473223686218, "learning_rate": 3.481784722494909e-05, "loss": 0.8186, "step": 1191 }, { "epoch": 0.2576183272098552, "grad_norm": 0.8328155279159546, "learning_rate": 3.480844088435428e-05, "loss": 1.0132, "step": 1192 }, { "epoch": 0.2578344499675816, "grad_norm": 0.8653275966644287, "learning_rate": 3.479902728765768e-05, "loss": 0.9433, "step": 1193 }, { "epoch": 0.25805057272530796, "grad_norm": 0.7569735646247864, "learning_rate": 3.478960643947192e-05, "loss": 0.7814, "step": 1194 }, { "epoch": 0.2582666954830344, "grad_norm": 0.751507043838501, "learning_rate": 3.478017834441319e-05, "loss": 0.8006, "step": 1195 }, { "epoch": 0.25848281824076075, "grad_norm": 0.9412997961044312, "learning_rate": 3.477074300710123e-05, "loss": 0.9028, "step": 1196 }, { "epoch": 0.2586989409984871, "grad_norm": 0.8218026757240295, "learning_rate": 3.4761300432159356e-05, "loss": 1.0349, "step": 1197 }, { "epoch": 0.25891506375621354, "grad_norm": 0.8984594345092773, "learning_rate": 3.475185062421438e-05, "loss": 0.8372, "step": 1198 }, { "epoch": 0.2591311865139399, "grad_norm": 0.9882254600524902, "learning_rate": 3.474239358789671e-05, "loss": 1.1724, "step": 1199 }, { "epoch": 0.2593473092716663, "grad_norm": 0.9558103084564209, "learning_rate": 3.473292932784026e-05, "loss": 0.9732, "step": 1200 }, { "epoch": 0.2595634320293927, "grad_norm": 0.9504913091659546, "learning_rate": 3.472345784868249e-05, "loss": 0.8753, "step": 1201 }, { "epoch": 0.25977955478711906, "grad_norm": 0.9989475607872009, "learning_rate": 3.471397915506441e-05, "loss": 0.8223, "step": 1202 }, { "epoch": 0.2599956775448455, "grad_norm": 0.9614273309707642, "learning_rate": 3.4704493251630565e-05, "loss": 1.0081, "step": 1203 }, { "epoch": 0.26021180030257185, "grad_norm": 1.006683349609375, "learning_rate": 3.4695000143029013e-05, "loss": 0.8342, "step": 1204 }, { "epoch": 0.26042792306029827, "grad_norm": 1.0121324062347412, "learning_rate": 3.4685499833911366e-05, "loss": 0.9228, "step": 1205 }, { "epoch": 0.26064404581802464, "grad_norm": 0.9092336297035217, "learning_rate": 3.4675992328932746e-05, "loss": 0.8474, "step": 1206 }, { "epoch": 0.260860168575751, "grad_norm": 0.9401735067367554, "learning_rate": 3.4666477632751814e-05, "loss": 0.8988, "step": 1207 }, { "epoch": 0.26107629133347743, "grad_norm": 0.7813588976860046, "learning_rate": 3.465695575003074e-05, "loss": 0.8302, "step": 1208 }, { "epoch": 0.2612924140912038, "grad_norm": 0.7777760028839111, "learning_rate": 3.464742668543523e-05, "loss": 0.773, "step": 1209 }, { "epoch": 0.2615085368489302, "grad_norm": 1.030483365058899, "learning_rate": 3.463789044363451e-05, "loss": 0.9962, "step": 1210 }, { "epoch": 0.2617246596066566, "grad_norm": 0.7267763018608093, "learning_rate": 3.462834702930131e-05, "loss": 0.7075, "step": 1211 }, { "epoch": 0.26194078236438295, "grad_norm": 0.9555537700653076, "learning_rate": 3.461879644711188e-05, "loss": 1.1664, "step": 1212 }, { "epoch": 0.2621569051221094, "grad_norm": 0.9532811045646667, "learning_rate": 3.4609238701745985e-05, "loss": 0.9267, "step": 1213 }, { "epoch": 0.26237302787983574, "grad_norm": 0.9747017621994019, "learning_rate": 3.4599673797886896e-05, "loss": 0.972, "step": 1214 }, { "epoch": 0.2625891506375621, "grad_norm": 0.838711678981781, "learning_rate": 3.45901017402214e-05, "loss": 0.9826, "step": 1215 }, { "epoch": 0.26280527339528853, "grad_norm": 1.041108250617981, "learning_rate": 3.4580522533439773e-05, "loss": 0.9327, "step": 1216 }, { "epoch": 0.2630213961530149, "grad_norm": 0.7605981826782227, "learning_rate": 3.457093618223581e-05, "loss": 0.9068, "step": 1217 }, { "epoch": 0.2632375189107413, "grad_norm": 0.8309284448623657, "learning_rate": 3.4561342691306806e-05, "loss": 0.9885, "step": 1218 }, { "epoch": 0.2634536416684677, "grad_norm": 0.9520570039749146, "learning_rate": 3.455174206535354e-05, "loss": 0.7893, "step": 1219 }, { "epoch": 0.26366976442619405, "grad_norm": 0.7623745799064636, "learning_rate": 3.45421343090803e-05, "loss": 0.7545, "step": 1220 }, { "epoch": 0.2638858871839205, "grad_norm": 0.9425437450408936, "learning_rate": 3.453251942719487e-05, "loss": 0.8535, "step": 1221 }, { "epoch": 0.26410200994164684, "grad_norm": 0.8759909868240356, "learning_rate": 3.452289742440851e-05, "loss": 0.9291, "step": 1222 }, { "epoch": 0.26431813269937326, "grad_norm": 0.8797547817230225, "learning_rate": 3.451326830543599e-05, "loss": 0.9388, "step": 1223 }, { "epoch": 0.26453425545709963, "grad_norm": 0.8488011956214905, "learning_rate": 3.450363207499554e-05, "loss": 0.9036, "step": 1224 }, { "epoch": 0.264750378214826, "grad_norm": 0.9757619500160217, "learning_rate": 3.449398873780891e-05, "loss": 0.9631, "step": 1225 }, { "epoch": 0.2649665009725524, "grad_norm": 1.0438483953475952, "learning_rate": 3.44843382986013e-05, "loss": 1.0235, "step": 1226 }, { "epoch": 0.2651826237302788, "grad_norm": 0.9465405941009521, "learning_rate": 3.44746807621014e-05, "loss": 0.8919, "step": 1227 }, { "epoch": 0.2653987464880052, "grad_norm": 0.9920943379402161, "learning_rate": 3.4465016133041405e-05, "loss": 0.9785, "step": 1228 }, { "epoch": 0.2656148692457316, "grad_norm": 0.9541953206062317, "learning_rate": 3.445534441615693e-05, "loss": 0.9799, "step": 1229 }, { "epoch": 0.26583099200345794, "grad_norm": 1.056687831878662, "learning_rate": 3.4445665616187106e-05, "loss": 0.9835, "step": 1230 }, { "epoch": 0.26604711476118437, "grad_norm": 0.8478302955627441, "learning_rate": 3.4435979737874534e-05, "loss": 0.8815, "step": 1231 }, { "epoch": 0.26626323751891073, "grad_norm": 1.0264782905578613, "learning_rate": 3.442628678596525e-05, "loss": 0.9199, "step": 1232 }, { "epoch": 0.26647936027663716, "grad_norm": 0.8958058953285217, "learning_rate": 3.441658676520879e-05, "loss": 1.0462, "step": 1233 }, { "epoch": 0.2666954830343635, "grad_norm": 0.9210952520370483, "learning_rate": 3.440687968035815e-05, "loss": 0.9968, "step": 1234 }, { "epoch": 0.2669116057920899, "grad_norm": 0.8968856930732727, "learning_rate": 3.439716553616977e-05, "loss": 0.8992, "step": 1235 }, { "epoch": 0.2671277285498163, "grad_norm": 0.8895953297615051, "learning_rate": 3.4387444337403556e-05, "loss": 0.9828, "step": 1236 }, { "epoch": 0.2673438513075427, "grad_norm": 0.9425379633903503, "learning_rate": 3.4377716088822875e-05, "loss": 0.862, "step": 1237 }, { "epoch": 0.26755997406526905, "grad_norm": 0.8021935820579529, "learning_rate": 3.436798079519455e-05, "loss": 0.799, "step": 1238 }, { "epoch": 0.26777609682299547, "grad_norm": 0.9322415590286255, "learning_rate": 3.435823846128884e-05, "loss": 1.0036, "step": 1239 }, { "epoch": 0.26799221958072184, "grad_norm": 0.8307620286941528, "learning_rate": 3.434848909187948e-05, "loss": 0.77, "step": 1240 }, { "epoch": 0.26820834233844826, "grad_norm": 1.0585285425186157, "learning_rate": 3.4338732691743644e-05, "loss": 1.1941, "step": 1241 }, { "epoch": 0.2684244650961746, "grad_norm": 1.032701849937439, "learning_rate": 3.432896926566193e-05, "loss": 0.9371, "step": 1242 }, { "epoch": 0.268640587853901, "grad_norm": 0.8700973391532898, "learning_rate": 3.431919881841839e-05, "loss": 0.7914, "step": 1243 }, { "epoch": 0.2688567106116274, "grad_norm": 0.8723090291023254, "learning_rate": 3.430942135480053e-05, "loss": 0.914, "step": 1244 }, { "epoch": 0.2690728333693538, "grad_norm": 0.9654848575592041, "learning_rate": 3.4299636879599295e-05, "loss": 0.8412, "step": 1245 }, { "epoch": 0.2692889561270802, "grad_norm": 0.8331428170204163, "learning_rate": 3.4289845397609044e-05, "loss": 0.8932, "step": 1246 }, { "epoch": 0.26950507888480657, "grad_norm": 0.9070435166358948, "learning_rate": 3.428004691362758e-05, "loss": 0.8609, "step": 1247 }, { "epoch": 0.26972120164253294, "grad_norm": 0.8193120956420898, "learning_rate": 3.4270241432456135e-05, "loss": 0.7546, "step": 1248 }, { "epoch": 0.26993732440025936, "grad_norm": 0.9638481736183167, "learning_rate": 3.426042895889938e-05, "loss": 1.0327, "step": 1249 }, { "epoch": 0.2701534471579857, "grad_norm": 1.1214122772216797, "learning_rate": 3.42506094977654e-05, "loss": 1.1299, "step": 1250 }, { "epoch": 0.27036956991571215, "grad_norm": 0.8765868544578552, "learning_rate": 3.424078305386571e-05, "loss": 0.8192, "step": 1251 }, { "epoch": 0.2705856926734385, "grad_norm": 0.8048397302627563, "learning_rate": 3.423094963201524e-05, "loss": 0.7947, "step": 1252 }, { "epoch": 0.2708018154311649, "grad_norm": 0.8385607004165649, "learning_rate": 3.422110923703235e-05, "loss": 0.7999, "step": 1253 }, { "epoch": 0.2710179381888913, "grad_norm": 0.7560341954231262, "learning_rate": 3.421126187373881e-05, "loss": 0.7791, "step": 1254 }, { "epoch": 0.27123406094661767, "grad_norm": 1.1296929121017456, "learning_rate": 3.4201407546959796e-05, "loss": 1.0208, "step": 1255 }, { "epoch": 0.2714501837043441, "grad_norm": 0.9315122365951538, "learning_rate": 3.419154626152392e-05, "loss": 0.7966, "step": 1256 }, { "epoch": 0.27166630646207046, "grad_norm": 0.9457100033760071, "learning_rate": 3.418167802226318e-05, "loss": 1.1376, "step": 1257 }, { "epoch": 0.27188242921979683, "grad_norm": 0.8627362251281738, "learning_rate": 3.4171802834012996e-05, "loss": 0.9037, "step": 1258 }, { "epoch": 0.27209855197752325, "grad_norm": 0.8868746161460876, "learning_rate": 3.416192070161218e-05, "loss": 0.8328, "step": 1259 }, { "epoch": 0.2723146747352496, "grad_norm": 0.8933835029602051, "learning_rate": 3.415203162990296e-05, "loss": 0.9073, "step": 1260 }, { "epoch": 0.272530797492976, "grad_norm": 0.8237617015838623, "learning_rate": 3.4142135623730954e-05, "loss": 0.987, "step": 1261 }, { "epoch": 0.2727469202507024, "grad_norm": 0.8819873929023743, "learning_rate": 3.413223268794518e-05, "loss": 0.9322, "step": 1262 }, { "epoch": 0.2729630430084288, "grad_norm": 0.8413676619529724, "learning_rate": 3.412232282739807e-05, "loss": 0.801, "step": 1263 }, { "epoch": 0.2731791657661552, "grad_norm": 0.9011121392250061, "learning_rate": 3.411240604694541e-05, "loss": 1.0719, "step": 1264 }, { "epoch": 0.27339528852388156, "grad_norm": 1.065183162689209, "learning_rate": 3.410248235144641e-05, "loss": 1.1748, "step": 1265 }, { "epoch": 0.27361141128160793, "grad_norm": 0.8870022892951965, "learning_rate": 3.409255174576367e-05, "loss": 0.9793, "step": 1266 }, { "epoch": 0.27382753403933435, "grad_norm": 0.9056565165519714, "learning_rate": 3.408261423476314e-05, "loss": 0.9582, "step": 1267 }, { "epoch": 0.2740436567970607, "grad_norm": 0.9530169367790222, "learning_rate": 3.4072669823314194e-05, "loss": 1.0038, "step": 1268 }, { "epoch": 0.27425977955478714, "grad_norm": 0.961306095123291, "learning_rate": 3.4062718516289564e-05, "loss": 0.8197, "step": 1269 }, { "epoch": 0.2744759023125135, "grad_norm": 0.8169891834259033, "learning_rate": 3.405276031856537e-05, "loss": 0.8325, "step": 1270 }, { "epoch": 0.2746920250702399, "grad_norm": 0.8405026793479919, "learning_rate": 3.40427952350211e-05, "loss": 0.8495, "step": 1271 }, { "epoch": 0.2749081478279663, "grad_norm": 0.7815272212028503, "learning_rate": 3.403282327053963e-05, "loss": 0.9514, "step": 1272 }, { "epoch": 0.27512427058569267, "grad_norm": 0.9990447759628296, "learning_rate": 3.402284443000721e-05, "loss": 0.8438, "step": 1273 }, { "epoch": 0.2753403933434191, "grad_norm": 0.8385444283485413, "learning_rate": 3.401285871831342e-05, "loss": 0.8427, "step": 1274 }, { "epoch": 0.27555651610114545, "grad_norm": 0.9013781547546387, "learning_rate": 3.400286614035126e-05, "loss": 0.9588, "step": 1275 }, { "epoch": 0.2757726388588718, "grad_norm": 0.8931231498718262, "learning_rate": 3.399286670101705e-05, "loss": 1.0243, "step": 1276 }, { "epoch": 0.27598876161659824, "grad_norm": 0.9418546557426453, "learning_rate": 3.398286040521051e-05, "loss": 0.8088, "step": 1277 }, { "epoch": 0.2762048843743246, "grad_norm": 0.8482999801635742, "learning_rate": 3.39728472578347e-05, "loss": 0.955, "step": 1278 }, { "epoch": 0.276421007132051, "grad_norm": 0.9833201169967651, "learning_rate": 3.3962827263796024e-05, "loss": 1.0117, "step": 1279 }, { "epoch": 0.2766371298897774, "grad_norm": 0.803976833820343, "learning_rate": 3.395280042800427e-05, "loss": 0.9786, "step": 1280 }, { "epoch": 0.27685325264750377, "grad_norm": 0.8341302275657654, "learning_rate": 3.394276675537256e-05, "loss": 0.884, "step": 1281 }, { "epoch": 0.2770693754052302, "grad_norm": 0.8698337078094482, "learning_rate": 3.393272625081737e-05, "loss": 0.9135, "step": 1282 }, { "epoch": 0.27728549816295656, "grad_norm": 1.0237162113189697, "learning_rate": 3.392267891925854e-05, "loss": 0.7524, "step": 1283 }, { "epoch": 0.2775016209206829, "grad_norm": 0.9259644746780396, "learning_rate": 3.391262476561921e-05, "loss": 1.0063, "step": 1284 }, { "epoch": 0.27771774367840935, "grad_norm": 0.9323410987854004, "learning_rate": 3.3902563794825904e-05, "loss": 1.021, "step": 1285 }, { "epoch": 0.2779338664361357, "grad_norm": 0.841606616973877, "learning_rate": 3.389249601180848e-05, "loss": 0.9984, "step": 1286 }, { "epoch": 0.27814998919386213, "grad_norm": 0.844332754611969, "learning_rate": 3.388242142150013e-05, "loss": 0.8519, "step": 1287 }, { "epoch": 0.2783661119515885, "grad_norm": 0.9326592087745667, "learning_rate": 3.3872340028837366e-05, "loss": 0.9249, "step": 1288 }, { "epoch": 0.27858223470931487, "grad_norm": 0.9754464626312256, "learning_rate": 3.3862251838760067e-05, "loss": 1.016, "step": 1289 }, { "epoch": 0.2787983574670413, "grad_norm": 0.8150403499603271, "learning_rate": 3.3852156856211404e-05, "loss": 0.9499, "step": 1290 }, { "epoch": 0.27901448022476766, "grad_norm": 0.8909110426902771, "learning_rate": 3.38420550861379e-05, "loss": 0.9758, "step": 1291 }, { "epoch": 0.2792306029824941, "grad_norm": 0.9303125739097595, "learning_rate": 3.3831946533489414e-05, "loss": 0.9312, "step": 1292 }, { "epoch": 0.27944672574022045, "grad_norm": 0.9276483058929443, "learning_rate": 3.382183120321909e-05, "loss": 0.8911, "step": 1293 }, { "epoch": 0.2796628484979468, "grad_norm": 0.892886757850647, "learning_rate": 3.3811709100283434e-05, "loss": 1.0487, "step": 1294 }, { "epoch": 0.27987897125567324, "grad_norm": 0.8534461855888367, "learning_rate": 3.3801580229642243e-05, "loss": 0.8842, "step": 1295 }, { "epoch": 0.2800950940133996, "grad_norm": 0.7508280277252197, "learning_rate": 3.379144459625865e-05, "loss": 0.7597, "step": 1296 }, { "epoch": 0.280311216771126, "grad_norm": 0.8750606179237366, "learning_rate": 3.378130220509908e-05, "loss": 0.8795, "step": 1297 }, { "epoch": 0.2805273395288524, "grad_norm": 0.88175368309021, "learning_rate": 3.3771153061133286e-05, "loss": 1.1336, "step": 1298 }, { "epoch": 0.28074346228657876, "grad_norm": 0.9986168742179871, "learning_rate": 3.376099716933433e-05, "loss": 1.225, "step": 1299 }, { "epoch": 0.2809595850443052, "grad_norm": 0.7794172763824463, "learning_rate": 3.375083453467857e-05, "loss": 0.941, "step": 1300 }, { "epoch": 0.28117570780203155, "grad_norm": 0.9246460795402527, "learning_rate": 3.3740665162145685e-05, "loss": 0.9086, "step": 1301 }, { "epoch": 0.2813918305597579, "grad_norm": 0.8370898962020874, "learning_rate": 3.373048905671862e-05, "loss": 0.8536, "step": 1302 }, { "epoch": 0.28160795331748434, "grad_norm": 0.8827524185180664, "learning_rate": 3.372030622338367e-05, "loss": 1.0139, "step": 1303 }, { "epoch": 0.2818240760752107, "grad_norm": 0.8502309918403625, "learning_rate": 3.371011666713038e-05, "loss": 0.9512, "step": 1304 }, { "epoch": 0.28204019883293713, "grad_norm": 0.777923583984375, "learning_rate": 3.3699920392951615e-05, "loss": 0.9032, "step": 1305 }, { "epoch": 0.2822563215906635, "grad_norm": 1.035717487335205, "learning_rate": 3.368971740584353e-05, "loss": 1.1064, "step": 1306 }, { "epoch": 0.28247244434838986, "grad_norm": 0.8214704990386963, "learning_rate": 3.367950771080556e-05, "loss": 1.0441, "step": 1307 }, { "epoch": 0.2826885671061163, "grad_norm": 0.9529228210449219, "learning_rate": 3.366929131284043e-05, "loss": 0.9373, "step": 1308 }, { "epoch": 0.28290468986384265, "grad_norm": 0.8761690258979797, "learning_rate": 3.3659068216954164e-05, "loss": 0.8627, "step": 1309 }, { "epoch": 0.2831208126215691, "grad_norm": 0.8952942490577698, "learning_rate": 3.364883842815605e-05, "loss": 0.9455, "step": 1310 }, { "epoch": 0.28333693537929544, "grad_norm": 0.864251971244812, "learning_rate": 3.363860195145865e-05, "loss": 0.8178, "step": 1311 }, { "epoch": 0.2835530581370218, "grad_norm": 0.9414435029029846, "learning_rate": 3.362835879187783e-05, "loss": 1.1242, "step": 1312 }, { "epoch": 0.28376918089474823, "grad_norm": 0.8933331370353699, "learning_rate": 3.361810895443269e-05, "loss": 0.7215, "step": 1313 }, { "epoch": 0.2839853036524746, "grad_norm": 0.9027130603790283, "learning_rate": 3.360785244414566e-05, "loss": 1.0014, "step": 1314 }, { "epoch": 0.284201426410201, "grad_norm": 0.9086551666259766, "learning_rate": 3.3597589266042384e-05, "loss": 0.769, "step": 1315 }, { "epoch": 0.2844175491679274, "grad_norm": 0.9505411982536316, "learning_rate": 3.35873194251518e-05, "loss": 0.8302, "step": 1316 }, { "epoch": 0.28463367192565375, "grad_norm": 0.7406628131866455, "learning_rate": 3.35770429265061e-05, "loss": 0.8559, "step": 1317 }, { "epoch": 0.2848497946833802, "grad_norm": 0.9455854892730713, "learning_rate": 3.356675977514076e-05, "loss": 0.9963, "step": 1318 }, { "epoch": 0.28506591744110654, "grad_norm": 0.915973961353302, "learning_rate": 3.355646997609449e-05, "loss": 1.0081, "step": 1319 }, { "epoch": 0.28528204019883296, "grad_norm": 0.8263589143753052, "learning_rate": 3.354617353440927e-05, "loss": 0.9118, "step": 1320 }, { "epoch": 0.28549816295655933, "grad_norm": 0.8953337669372559, "learning_rate": 3.353587045513033e-05, "loss": 1.052, "step": 1321 }, { "epoch": 0.2857142857142857, "grad_norm": 0.9771729111671448, "learning_rate": 3.352556074330615e-05, "loss": 0.9796, "step": 1322 }, { "epoch": 0.2859304084720121, "grad_norm": 1.0700210332870483, "learning_rate": 3.351524440398849e-05, "loss": 1.0475, "step": 1323 }, { "epoch": 0.2861465312297385, "grad_norm": 0.8895292282104492, "learning_rate": 3.3504921442232306e-05, "loss": 1.0354, "step": 1324 }, { "epoch": 0.28636265398746485, "grad_norm": 0.8481094837188721, "learning_rate": 3.3494591863095834e-05, "loss": 0.988, "step": 1325 }, { "epoch": 0.2865787767451913, "grad_norm": 1.0255558490753174, "learning_rate": 3.348425567164054e-05, "loss": 0.8495, "step": 1326 }, { "epoch": 0.28679489950291764, "grad_norm": 0.8727287650108337, "learning_rate": 3.347391287293115e-05, "loss": 0.8897, "step": 1327 }, { "epoch": 0.28701102226064407, "grad_norm": 0.8275237679481506, "learning_rate": 3.3463563472035586e-05, "loss": 0.8135, "step": 1328 }, { "epoch": 0.28722714501837043, "grad_norm": 0.8719654679298401, "learning_rate": 3.3453207474025054e-05, "loss": 0.9233, "step": 1329 }, { "epoch": 0.2874432677760968, "grad_norm": 0.9389613270759583, "learning_rate": 3.344284488397395e-05, "loss": 0.9438, "step": 1330 }, { "epoch": 0.2876593905338232, "grad_norm": 0.8721885085105896, "learning_rate": 3.3432475706959936e-05, "loss": 0.8661, "step": 1331 }, { "epoch": 0.2878755132915496, "grad_norm": 0.9125036001205444, "learning_rate": 3.3422099948063876e-05, "loss": 0.9638, "step": 1332 }, { "epoch": 0.288091636049276, "grad_norm": 0.9462102651596069, "learning_rate": 3.3411717612369866e-05, "loss": 0.8471, "step": 1333 }, { "epoch": 0.2883077588070024, "grad_norm": 0.9385507702827454, "learning_rate": 3.340132870496523e-05, "loss": 0.9694, "step": 1334 }, { "epoch": 0.28852388156472875, "grad_norm": 0.9072837233543396, "learning_rate": 3.339093323094051e-05, "loss": 0.8278, "step": 1335 }, { "epoch": 0.28874000432245517, "grad_norm": 0.7959422469139099, "learning_rate": 3.338053119538946e-05, "loss": 0.9622, "step": 1336 }, { "epoch": 0.28895612708018154, "grad_norm": 0.8302894234657288, "learning_rate": 3.337012260340906e-05, "loss": 0.9179, "step": 1337 }, { "epoch": 0.28917224983790796, "grad_norm": 0.8141044974327087, "learning_rate": 3.3359707460099485e-05, "loss": 0.872, "step": 1338 }, { "epoch": 0.2893883725956343, "grad_norm": 1.1808451414108276, "learning_rate": 3.334928577056414e-05, "loss": 1.1128, "step": 1339 }, { "epoch": 0.2896044953533607, "grad_norm": 0.8286523818969727, "learning_rate": 3.3338857539909625e-05, "loss": 1.0352, "step": 1340 }, { "epoch": 0.2898206181110871, "grad_norm": 0.9007647037506104, "learning_rate": 3.332842277324576e-05, "loss": 0.8617, "step": 1341 }, { "epoch": 0.2900367408688135, "grad_norm": 0.9239495992660522, "learning_rate": 3.3317981475685534e-05, "loss": 1.0, "step": 1342 }, { "epoch": 0.29025286362653985, "grad_norm": 0.986542284488678, "learning_rate": 3.330753365234518e-05, "loss": 0.9027, "step": 1343 }, { "epoch": 0.29046898638426627, "grad_norm": 0.96485835313797, "learning_rate": 3.329707930834409e-05, "loss": 0.9289, "step": 1344 }, { "epoch": 0.29068510914199264, "grad_norm": 0.8924793004989624, "learning_rate": 3.328661844880489e-05, "loss": 1.0875, "step": 1345 }, { "epoch": 0.29090123189971906, "grad_norm": 0.9833986163139343, "learning_rate": 3.327615107885335e-05, "loss": 0.9299, "step": 1346 }, { "epoch": 0.2911173546574454, "grad_norm": 0.8929439187049866, "learning_rate": 3.326567720361849e-05, "loss": 0.9629, "step": 1347 }, { "epoch": 0.2913334774151718, "grad_norm": 0.8560901880264282, "learning_rate": 3.325519682823244e-05, "loss": 1.1753, "step": 1348 }, { "epoch": 0.2915496001728982, "grad_norm": 0.9288893938064575, "learning_rate": 3.324470995783061e-05, "loss": 0.994, "step": 1349 }, { "epoch": 0.2917657229306246, "grad_norm": 1.0577545166015625, "learning_rate": 3.323421659755151e-05, "loss": 0.9899, "step": 1350 }, { "epoch": 0.291981845688351, "grad_norm": 0.8964827060699463, "learning_rate": 3.322371675253686e-05, "loss": 0.898, "step": 1351 }, { "epoch": 0.29219796844607737, "grad_norm": 0.856554388999939, "learning_rate": 3.3213210427931585e-05, "loss": 0.8849, "step": 1352 }, { "epoch": 0.29241409120380374, "grad_norm": 1.0463320016860962, "learning_rate": 3.320269762888374e-05, "loss": 0.98, "step": 1353 }, { "epoch": 0.29263021396153016, "grad_norm": 0.9561208486557007, "learning_rate": 3.319217836054457e-05, "loss": 0.8383, "step": 1354 }, { "epoch": 0.29284633671925653, "grad_norm": 0.8747385740280151, "learning_rate": 3.318165262806851e-05, "loss": 0.9091, "step": 1355 }, { "epoch": 0.29306245947698295, "grad_norm": 0.8965030908584595, "learning_rate": 3.317112043661313e-05, "loss": 0.999, "step": 1356 }, { "epoch": 0.2932785822347093, "grad_norm": 0.9199790954589844, "learning_rate": 3.3160581791339186e-05, "loss": 0.8623, "step": 1357 }, { "epoch": 0.2934947049924357, "grad_norm": 0.7929543256759644, "learning_rate": 3.3150036697410586e-05, "loss": 0.9401, "step": 1358 }, { "epoch": 0.2937108277501621, "grad_norm": 0.8746668100357056, "learning_rate": 3.3139485159994396e-05, "loss": 0.9932, "step": 1359 }, { "epoch": 0.2939269505078885, "grad_norm": 0.8712968826293945, "learning_rate": 3.312892718426086e-05, "loss": 0.9287, "step": 1360 }, { "epoch": 0.2941430732656149, "grad_norm": 0.8132144212722778, "learning_rate": 3.3118362775383346e-05, "loss": 0.9417, "step": 1361 }, { "epoch": 0.29435919602334126, "grad_norm": 0.9515878558158875, "learning_rate": 3.31077919385384e-05, "loss": 0.8063, "step": 1362 }, { "epoch": 0.29457531878106763, "grad_norm": 0.8533822298049927, "learning_rate": 3.309721467890571e-05, "loss": 0.9044, "step": 1363 }, { "epoch": 0.29479144153879405, "grad_norm": 0.8742635846138, "learning_rate": 3.308663100166809e-05, "loss": 0.9775, "step": 1364 }, { "epoch": 0.2950075642965204, "grad_norm": 0.8867061734199524, "learning_rate": 3.3076040912011544e-05, "loss": 0.8029, "step": 1365 }, { "epoch": 0.2952236870542468, "grad_norm": 0.9431737661361694, "learning_rate": 3.306544441512518e-05, "loss": 1.0518, "step": 1366 }, { "epoch": 0.2954398098119732, "grad_norm": 0.8994881510734558, "learning_rate": 3.3054841516201244e-05, "loss": 0.9312, "step": 1367 }, { "epoch": 0.2956559325696996, "grad_norm": 0.8521738648414612, "learning_rate": 3.304423222043515e-05, "loss": 1.0674, "step": 1368 }, { "epoch": 0.295872055327426, "grad_norm": 0.8029457330703735, "learning_rate": 3.3033616533025425e-05, "loss": 0.9699, "step": 1369 }, { "epoch": 0.29608817808515236, "grad_norm": 0.8967093229293823, "learning_rate": 3.3022994459173724e-05, "loss": 0.8124, "step": 1370 }, { "epoch": 0.29630430084287873, "grad_norm": 1.0194793939590454, "learning_rate": 3.301236600408484e-05, "loss": 0.8773, "step": 1371 }, { "epoch": 0.29652042360060515, "grad_norm": 0.8610829710960388, "learning_rate": 3.300173117296671e-05, "loss": 0.9437, "step": 1372 }, { "epoch": 0.2967365463583315, "grad_norm": 0.9379924535751343, "learning_rate": 3.2991089971030344e-05, "loss": 1.0667, "step": 1373 }, { "epoch": 0.29695266911605794, "grad_norm": 1.0194292068481445, "learning_rate": 3.2980442403489926e-05, "loss": 1.1446, "step": 1374 }, { "epoch": 0.2971687918737843, "grad_norm": 0.8629703521728516, "learning_rate": 3.296978847556274e-05, "loss": 0.8923, "step": 1375 }, { "epoch": 0.2973849146315107, "grad_norm": 0.817665159702301, "learning_rate": 3.295912819246918e-05, "loss": 0.7325, "step": 1376 }, { "epoch": 0.2976010373892371, "grad_norm": 0.9250444173812866, "learning_rate": 3.294846155943277e-05, "loss": 1.0255, "step": 1377 }, { "epoch": 0.29781716014696347, "grad_norm": 0.9583104848861694, "learning_rate": 3.293778858168012e-05, "loss": 1.0777, "step": 1378 }, { "epoch": 0.2980332829046899, "grad_norm": 0.8659541606903076, "learning_rate": 3.292710926444098e-05, "loss": 0.9546, "step": 1379 }, { "epoch": 0.29824940566241626, "grad_norm": 0.819677472114563, "learning_rate": 3.291642361294818e-05, "loss": 0.8741, "step": 1380 }, { "epoch": 0.2984655284201426, "grad_norm": 0.9010489583015442, "learning_rate": 3.290573163243766e-05, "loss": 0.9506, "step": 1381 }, { "epoch": 0.29868165117786905, "grad_norm": 0.8347435593605042, "learning_rate": 3.2895033328148485e-05, "loss": 0.7022, "step": 1382 }, { "epoch": 0.2988977739355954, "grad_norm": 1.0209494829177856, "learning_rate": 3.2884328705322786e-05, "loss": 1.0069, "step": 1383 }, { "epoch": 0.2991138966933218, "grad_norm": 0.7825828790664673, "learning_rate": 3.28736177692058e-05, "loss": 0.7867, "step": 1384 }, { "epoch": 0.2993300194510482, "grad_norm": 0.9509403109550476, "learning_rate": 3.2862900525045875e-05, "loss": 1.0487, "step": 1385 }, { "epoch": 0.29954614220877457, "grad_norm": 0.8550942540168762, "learning_rate": 3.285217697809443e-05, "loss": 0.8047, "step": 1386 }, { "epoch": 0.299762264966501, "grad_norm": 0.7809147834777832, "learning_rate": 3.2841447133605965e-05, "loss": 0.7594, "step": 1387 }, { "epoch": 0.29997838772422736, "grad_norm": 1.0176464319229126, "learning_rate": 3.2830710996838094e-05, "loss": 1.0076, "step": 1388 }, { "epoch": 0.3001945104819537, "grad_norm": 0.7915776371955872, "learning_rate": 3.2819968573051494e-05, "loss": 0.9234, "step": 1389 }, { "epoch": 0.30041063323968015, "grad_norm": 0.9604405760765076, "learning_rate": 3.280921986750993e-05, "loss": 1.0885, "step": 1390 }, { "epoch": 0.3006267559974065, "grad_norm": 0.8955967426300049, "learning_rate": 3.279846488548024e-05, "loss": 0.8431, "step": 1391 }, { "epoch": 0.30084287875513294, "grad_norm": 0.936397910118103, "learning_rate": 3.2787703632232344e-05, "loss": 0.8925, "step": 1392 }, { "epoch": 0.3010590015128593, "grad_norm": 0.7613109946250916, "learning_rate": 3.277693611303922e-05, "loss": 0.8787, "step": 1393 }, { "epoch": 0.30127512427058567, "grad_norm": 0.9893035292625427, "learning_rate": 3.2766162333176955e-05, "loss": 0.9203, "step": 1394 }, { "epoch": 0.3014912470283121, "grad_norm": 0.800090491771698, "learning_rate": 3.275538229792465e-05, "loss": 0.8575, "step": 1395 }, { "epoch": 0.30170736978603846, "grad_norm": 0.9871878623962402, "learning_rate": 3.27445960125645e-05, "loss": 0.8238, "step": 1396 }, { "epoch": 0.3019234925437649, "grad_norm": 0.9994919300079346, "learning_rate": 3.273380348238177e-05, "loss": 0.927, "step": 1397 }, { "epoch": 0.30213961530149125, "grad_norm": 0.9238467216491699, "learning_rate": 3.2723004712664766e-05, "loss": 0.8515, "step": 1398 }, { "epoch": 0.3023557380592176, "grad_norm": 0.9151886105537415, "learning_rate": 3.271219970870487e-05, "loss": 0.9554, "step": 1399 }, { "epoch": 0.30257186081694404, "grad_norm": 0.9553155899047852, "learning_rate": 3.27013884757965e-05, "loss": 1.0217, "step": 1400 }, { "epoch": 0.3027879835746704, "grad_norm": 0.8802904486656189, "learning_rate": 3.269057101923714e-05, "loss": 0.9886, "step": 1401 }, { "epoch": 0.3030041063323968, "grad_norm": 1.0054231882095337, "learning_rate": 3.267974734432731e-05, "loss": 1.1334, "step": 1402 }, { "epoch": 0.3032202290901232, "grad_norm": 0.8831580281257629, "learning_rate": 3.266891745637059e-05, "loss": 1.0838, "step": 1403 }, { "epoch": 0.30343635184784956, "grad_norm": 0.8652462959289551, "learning_rate": 3.2658081360673615e-05, "loss": 0.9097, "step": 1404 }, { "epoch": 0.303652474605576, "grad_norm": 0.8898692727088928, "learning_rate": 3.2647239062546026e-05, "loss": 0.9986, "step": 1405 }, { "epoch": 0.30386859736330235, "grad_norm": 0.8116910457611084, "learning_rate": 3.2636390567300544e-05, "loss": 0.883, "step": 1406 }, { "epoch": 0.3040847201210287, "grad_norm": 0.8537965416908264, "learning_rate": 3.262553588025288e-05, "loss": 1.0509, "step": 1407 }, { "epoch": 0.30430084287875514, "grad_norm": 0.9292021989822388, "learning_rate": 3.2614675006721826e-05, "loss": 0.96, "step": 1408 }, { "epoch": 0.3045169656364815, "grad_norm": 0.8599814176559448, "learning_rate": 3.2603807952029187e-05, "loss": 0.9549, "step": 1409 }, { "epoch": 0.30473308839420793, "grad_norm": 0.7698879837989807, "learning_rate": 3.2592934721499775e-05, "loss": 0.8736, "step": 1410 }, { "epoch": 0.3049492111519343, "grad_norm": 0.9960350394248962, "learning_rate": 3.258205532046147e-05, "loss": 0.8644, "step": 1411 }, { "epoch": 0.30516533390966066, "grad_norm": 0.8982427716255188, "learning_rate": 3.2571169754245136e-05, "loss": 1.0009, "step": 1412 }, { "epoch": 0.3053814566673871, "grad_norm": 0.9564204812049866, "learning_rate": 3.256027802818469e-05, "loss": 0.8045, "step": 1413 }, { "epoch": 0.30559757942511345, "grad_norm": 0.8544869422912598, "learning_rate": 3.254938014761704e-05, "loss": 0.8334, "step": 1414 }, { "epoch": 0.3058137021828399, "grad_norm": 1.0413247346878052, "learning_rate": 3.253847611788214e-05, "loss": 1.0471, "step": 1415 }, { "epoch": 0.30602982494056624, "grad_norm": 0.9799613356590271, "learning_rate": 3.252756594432291e-05, "loss": 1.1929, "step": 1416 }, { "epoch": 0.3062459476982926, "grad_norm": 0.8312803506851196, "learning_rate": 3.2516649632285335e-05, "loss": 0.9199, "step": 1417 }, { "epoch": 0.30646207045601903, "grad_norm": 1.087661862373352, "learning_rate": 3.250572718711837e-05, "loss": 1.019, "step": 1418 }, { "epoch": 0.3066781932137454, "grad_norm": 0.8894320726394653, "learning_rate": 3.2494798614174e-05, "loss": 0.8547, "step": 1419 }, { "epoch": 0.3068943159714718, "grad_norm": 1.049173355102539, "learning_rate": 3.2483863918807187e-05, "loss": 0.8446, "step": 1420 }, { "epoch": 0.3071104387291982, "grad_norm": 0.8932068347930908, "learning_rate": 3.2472923106375915e-05, "loss": 0.7526, "step": 1421 }, { "epoch": 0.30732656148692455, "grad_norm": 0.9055934548377991, "learning_rate": 3.246197618224115e-05, "loss": 1.0318, "step": 1422 }, { "epoch": 0.307542684244651, "grad_norm": 0.9189789295196533, "learning_rate": 3.245102315176687e-05, "loss": 0.9418, "step": 1423 }, { "epoch": 0.30775880700237734, "grad_norm": 0.8460312485694885, "learning_rate": 3.2440064020320025e-05, "loss": 0.8354, "step": 1424 }, { "epoch": 0.30797492976010377, "grad_norm": 0.9116464257240295, "learning_rate": 3.2429098793270565e-05, "loss": 0.8884, "step": 1425 }, { "epoch": 0.30819105251783013, "grad_norm": 0.9721053242683411, "learning_rate": 3.241812747599143e-05, "loss": 1.0997, "step": 1426 }, { "epoch": 0.3084071752755565, "grad_norm": 0.8986929655075073, "learning_rate": 3.2407150073858536e-05, "loss": 0.6901, "step": 1427 }, { "epoch": 0.3086232980332829, "grad_norm": 0.8689582347869873, "learning_rate": 3.239616659225079e-05, "loss": 0.9803, "step": 1428 }, { "epoch": 0.3088394207910093, "grad_norm": 0.8031696677207947, "learning_rate": 3.2385177036550075e-05, "loss": 1.196, "step": 1429 }, { "epoch": 0.30905554354873566, "grad_norm": 0.8193029761314392, "learning_rate": 3.2374181412141235e-05, "loss": 0.9293, "step": 1430 }, { "epoch": 0.3092716663064621, "grad_norm": 0.9366302490234375, "learning_rate": 3.2363179724412105e-05, "loss": 1.0836, "step": 1431 }, { "epoch": 0.30948778906418845, "grad_norm": 1.066731572151184, "learning_rate": 3.2352171978753495e-05, "loss": 0.9343, "step": 1432 }, { "epoch": 0.30970391182191487, "grad_norm": 0.9972415566444397, "learning_rate": 3.2341158180559174e-05, "loss": 1.0282, "step": 1433 }, { "epoch": 0.30992003457964123, "grad_norm": 1.0614042282104492, "learning_rate": 3.233013833522587e-05, "loss": 1.0944, "step": 1434 }, { "epoch": 0.3101361573373676, "grad_norm": 0.9234687089920044, "learning_rate": 3.2319112448153274e-05, "loss": 0.953, "step": 1435 }, { "epoch": 0.310352280095094, "grad_norm": 0.8281671404838562, "learning_rate": 3.230808052474407e-05, "loss": 0.9637, "step": 1436 }, { "epoch": 0.3105684028528204, "grad_norm": 1.0255532264709473, "learning_rate": 3.229704257040385e-05, "loss": 0.9303, "step": 1437 }, { "epoch": 0.3107845256105468, "grad_norm": 0.8764996528625488, "learning_rate": 3.228599859054121e-05, "loss": 1.0025, "step": 1438 }, { "epoch": 0.3110006483682732, "grad_norm": 1.0559879541397095, "learning_rate": 3.227494859056765e-05, "loss": 1.0939, "step": 1439 }, { "epoch": 0.31121677112599955, "grad_norm": 0.8221381306648254, "learning_rate": 3.2263892575897666e-05, "loss": 0.9899, "step": 1440 }, { "epoch": 0.31143289388372597, "grad_norm": 0.782566249370575, "learning_rate": 3.2252830551948663e-05, "loss": 0.9341, "step": 1441 }, { "epoch": 0.31164901664145234, "grad_norm": 0.9282163381576538, "learning_rate": 3.2241762524141016e-05, "loss": 0.923, "step": 1442 }, { "epoch": 0.31186513939917876, "grad_norm": 0.9249364733695984, "learning_rate": 3.223068849789803e-05, "loss": 0.8954, "step": 1443 }, { "epoch": 0.3120812621569051, "grad_norm": 0.9271489381790161, "learning_rate": 3.221960847864596e-05, "loss": 0.8951, "step": 1444 }, { "epoch": 0.3122973849146315, "grad_norm": 0.7943389415740967, "learning_rate": 3.220852247181397e-05, "loss": 0.8719, "step": 1445 }, { "epoch": 0.3125135076723579, "grad_norm": 0.8858696222305298, "learning_rate": 3.2197430482834204e-05, "loss": 0.9753, "step": 1446 }, { "epoch": 0.3127296304300843, "grad_norm": 0.9106383919715881, "learning_rate": 3.218633251714169e-05, "loss": 0.9076, "step": 1447 }, { "epoch": 0.31294575318781065, "grad_norm": 1.0993046760559082, "learning_rate": 3.217522858017442e-05, "loss": 0.8854, "step": 1448 }, { "epoch": 0.31316187594553707, "grad_norm": 0.9718704223632812, "learning_rate": 3.2164118677373287e-05, "loss": 1.0509, "step": 1449 }, { "epoch": 0.31337799870326344, "grad_norm": 0.9736214876174927, "learning_rate": 3.215300281418212e-05, "loss": 0.8624, "step": 1450 }, { "epoch": 0.31359412146098986, "grad_norm": 0.8958658576011658, "learning_rate": 3.214188099604766e-05, "loss": 0.9222, "step": 1451 }, { "epoch": 0.31381024421871623, "grad_norm": 0.9576481580734253, "learning_rate": 3.21307532284196e-05, "loss": 0.8332, "step": 1452 }, { "epoch": 0.3140263669764426, "grad_norm": 0.8226738572120667, "learning_rate": 3.211961951675048e-05, "loss": 0.7807, "step": 1453 }, { "epoch": 0.314242489734169, "grad_norm": 0.877682089805603, "learning_rate": 3.210847986649582e-05, "loss": 0.7873, "step": 1454 }, { "epoch": 0.3144586124918954, "grad_norm": 0.8779473304748535, "learning_rate": 3.209733428311403e-05, "loss": 0.8722, "step": 1455 }, { "epoch": 0.3146747352496218, "grad_norm": 0.9517311453819275, "learning_rate": 3.2086182772066386e-05, "loss": 0.9974, "step": 1456 }, { "epoch": 0.3148908580073482, "grad_norm": 0.8762261867523193, "learning_rate": 3.207502533881713e-05, "loss": 0.9426, "step": 1457 }, { "epoch": 0.31510698076507454, "grad_norm": 0.9212803244590759, "learning_rate": 3.206386198883338e-05, "loss": 0.9103, "step": 1458 }, { "epoch": 0.31532310352280096, "grad_norm": 0.85163813829422, "learning_rate": 3.205269272758513e-05, "loss": 0.9321, "step": 1459 }, { "epoch": 0.31553922628052733, "grad_norm": 0.9100742340087891, "learning_rate": 3.204151756054532e-05, "loss": 0.9176, "step": 1460 }, { "epoch": 0.31575534903825375, "grad_norm": 0.9264156818389893, "learning_rate": 3.203033649318973e-05, "loss": 0.9382, "step": 1461 }, { "epoch": 0.3159714717959801, "grad_norm": 0.8556106090545654, "learning_rate": 3.201914953099707e-05, "loss": 0.9871, "step": 1462 }, { "epoch": 0.3161875945537065, "grad_norm": 0.9584073424339294, "learning_rate": 3.200795667944892e-05, "loss": 1.0367, "step": 1463 }, { "epoch": 0.3164037173114329, "grad_norm": 0.78240966796875, "learning_rate": 3.199675794402976e-05, "loss": 0.7538, "step": 1464 }, { "epoch": 0.3166198400691593, "grad_norm": 0.881813645362854, "learning_rate": 3.198555333022694e-05, "loss": 1.0155, "step": 1465 }, { "epoch": 0.3168359628268857, "grad_norm": 1.0268313884735107, "learning_rate": 3.1974342843530694e-05, "loss": 1.0565, "step": 1466 }, { "epoch": 0.31705208558461206, "grad_norm": 0.8890941143035889, "learning_rate": 3.196312648943414e-05, "loss": 1.0004, "step": 1467 }, { "epoch": 0.31726820834233843, "grad_norm": 0.8016995191574097, "learning_rate": 3.195190427343326e-05, "loss": 0.7452, "step": 1468 }, { "epoch": 0.31748433110006485, "grad_norm": 0.8824421167373657, "learning_rate": 3.194067620102691e-05, "loss": 0.9135, "step": 1469 }, { "epoch": 0.3177004538577912, "grad_norm": 1.0067412853240967, "learning_rate": 3.192944227771682e-05, "loss": 0.9748, "step": 1470 }, { "epoch": 0.3179165766155176, "grad_norm": 0.9073596596717834, "learning_rate": 3.1918202509007605e-05, "loss": 1.0502, "step": 1471 }, { "epoch": 0.318132699373244, "grad_norm": 1.0185718536376953, "learning_rate": 3.19069569004067e-05, "loss": 0.8564, "step": 1472 }, { "epoch": 0.3183488221309704, "grad_norm": 0.9078257083892822, "learning_rate": 3.189570545742444e-05, "loss": 0.7839, "step": 1473 }, { "epoch": 0.3185649448886968, "grad_norm": 0.8796834945678711, "learning_rate": 3.1884448185574016e-05, "loss": 0.7607, "step": 1474 }, { "epoch": 0.31878106764642317, "grad_norm": 0.9579322934150696, "learning_rate": 3.187318509037145e-05, "loss": 1.1325, "step": 1475 }, { "epoch": 0.31899719040414953, "grad_norm": 0.834815502166748, "learning_rate": 3.186191617733562e-05, "loss": 0.8135, "step": 1476 }, { "epoch": 0.31921331316187596, "grad_norm": 0.9586957693099976, "learning_rate": 3.1850641451988295e-05, "loss": 0.9507, "step": 1477 }, { "epoch": 0.3194294359196023, "grad_norm": 0.824196994304657, "learning_rate": 3.1839360919854067e-05, "loss": 0.9114, "step": 1478 }, { "epoch": 0.31964555867732874, "grad_norm": 0.7384624481201172, "learning_rate": 3.182807458646034e-05, "loss": 0.6911, "step": 1479 }, { "epoch": 0.3198616814350551, "grad_norm": 0.9443362951278687, "learning_rate": 3.181678245733741e-05, "loss": 0.9006, "step": 1480 }, { "epoch": 0.3200778041927815, "grad_norm": 1.0642915964126587, "learning_rate": 3.1805484538018396e-05, "loss": 0.9546, "step": 1481 }, { "epoch": 0.3202939269505079, "grad_norm": 0.9522401690483093, "learning_rate": 3.1794180834039245e-05, "loss": 0.9805, "step": 1482 }, { "epoch": 0.32051004970823427, "grad_norm": 0.9213443398475647, "learning_rate": 3.178287135093875e-05, "loss": 1.0474, "step": 1483 }, { "epoch": 0.3207261724659607, "grad_norm": 0.8785810470581055, "learning_rate": 3.177155609425854e-05, "loss": 0.8559, "step": 1484 }, { "epoch": 0.32094229522368706, "grad_norm": 0.8620195984840393, "learning_rate": 3.176023506954304e-05, "loss": 0.8553, "step": 1485 }, { "epoch": 0.3211584179814134, "grad_norm": 0.9921575784683228, "learning_rate": 3.174890828233956e-05, "loss": 0.9996, "step": 1486 }, { "epoch": 0.32137454073913985, "grad_norm": 0.8634210228919983, "learning_rate": 3.173757573819817e-05, "loss": 0.8852, "step": 1487 }, { "epoch": 0.3215906634968662, "grad_norm": 1.0673092603683472, "learning_rate": 3.17262374426718e-05, "loss": 0.9562, "step": 1488 }, { "epoch": 0.32180678625459264, "grad_norm": 0.9692311882972717, "learning_rate": 3.171489340131619e-05, "loss": 0.8759, "step": 1489 }, { "epoch": 0.322022909012319, "grad_norm": 0.9291105270385742, "learning_rate": 3.17035436196899e-05, "loss": 0.8828, "step": 1490 }, { "epoch": 0.32223903177004537, "grad_norm": 0.9369811415672302, "learning_rate": 3.169218810335429e-05, "loss": 0.8377, "step": 1491 }, { "epoch": 0.3224551545277718, "grad_norm": 1.0407109260559082, "learning_rate": 3.1680826857873534e-05, "loss": 0.7897, "step": 1492 }, { "epoch": 0.32267127728549816, "grad_norm": 0.8820456266403198, "learning_rate": 3.166945988881462e-05, "loss": 0.9196, "step": 1493 }, { "epoch": 0.3228874000432245, "grad_norm": 0.9677581191062927, "learning_rate": 3.165808720174734e-05, "loss": 0.9512, "step": 1494 }, { "epoch": 0.32310352280095095, "grad_norm": 1.0615472793579102, "learning_rate": 3.164670880224428e-05, "loss": 0.9821, "step": 1495 }, { "epoch": 0.3233196455586773, "grad_norm": 0.908225953578949, "learning_rate": 3.163532469588084e-05, "loss": 0.9871, "step": 1496 }, { "epoch": 0.32353576831640374, "grad_norm": 0.9291085004806519, "learning_rate": 3.162393488823518e-05, "loss": 0.8227, "step": 1497 }, { "epoch": 0.3237518910741301, "grad_norm": 1.0241197347640991, "learning_rate": 3.1612539384888304e-05, "loss": 0.9324, "step": 1498 }, { "epoch": 0.32396801383185647, "grad_norm": 0.9944846630096436, "learning_rate": 3.1601138191423966e-05, "loss": 1.0057, "step": 1499 }, { "epoch": 0.3241841365895829, "grad_norm": 0.872500479221344, "learning_rate": 3.1589731313428745e-05, "loss": 1.0265, "step": 1500 }, { "epoch": 0.32440025934730926, "grad_norm": 0.9759976267814636, "learning_rate": 3.157831875649196e-05, "loss": 0.9748, "step": 1501 }, { "epoch": 0.3246163821050357, "grad_norm": 0.9262040257453918, "learning_rate": 3.156690052620575e-05, "loss": 0.8156, "step": 1502 }, { "epoch": 0.32483250486276205, "grad_norm": 0.9541052579879761, "learning_rate": 3.155547662816503e-05, "loss": 0.9731, "step": 1503 }, { "epoch": 0.3250486276204884, "grad_norm": 0.996863067150116, "learning_rate": 3.1544047067967465e-05, "loss": 0.9828, "step": 1504 }, { "epoch": 0.32526475037821484, "grad_norm": 0.9251271486282349, "learning_rate": 3.153261185121353e-05, "loss": 0.9075, "step": 1505 }, { "epoch": 0.3254808731359412, "grad_norm": 0.9439906477928162, "learning_rate": 3.152117098350644e-05, "loss": 0.9737, "step": 1506 }, { "epoch": 0.32569699589366763, "grad_norm": 0.9262637495994568, "learning_rate": 3.15097244704522e-05, "loss": 0.9972, "step": 1507 }, { "epoch": 0.325913118651394, "grad_norm": 0.8676000237464905, "learning_rate": 3.149827231765959e-05, "loss": 1.0146, "step": 1508 }, { "epoch": 0.32612924140912036, "grad_norm": 0.894512414932251, "learning_rate": 3.1486814530740114e-05, "loss": 1.0719, "step": 1509 }, { "epoch": 0.3263453641668468, "grad_norm": 0.9048900008201599, "learning_rate": 3.147535111530807e-05, "loss": 0.7671, "step": 1510 }, { "epoch": 0.32656148692457315, "grad_norm": 0.9422193169593811, "learning_rate": 3.1463882076980515e-05, "loss": 1.0002, "step": 1511 }, { "epoch": 0.3267776096822995, "grad_norm": 0.9564182162284851, "learning_rate": 3.145240742137724e-05, "loss": 0.9639, "step": 1512 }, { "epoch": 0.32699373244002594, "grad_norm": 0.9609580039978027, "learning_rate": 3.1440927154120795e-05, "loss": 0.9638, "step": 1513 }, { "epoch": 0.3272098551977523, "grad_norm": 0.9527109265327454, "learning_rate": 3.1429441280836504e-05, "loss": 0.9075, "step": 1514 }, { "epoch": 0.32742597795547873, "grad_norm": 0.9161962866783142, "learning_rate": 3.14179498071524e-05, "loss": 0.8975, "step": 1515 }, { "epoch": 0.3276421007132051, "grad_norm": 0.9064362645149231, "learning_rate": 3.1406452738699284e-05, "loss": 0.9487, "step": 1516 }, { "epoch": 0.32785822347093146, "grad_norm": 0.9479205012321472, "learning_rate": 3.139495008111069e-05, "loss": 0.9245, "step": 1517 }, { "epoch": 0.3280743462286579, "grad_norm": 0.9338969588279724, "learning_rate": 3.138344184002291e-05, "loss": 0.8734, "step": 1518 }, { "epoch": 0.32829046898638425, "grad_norm": 0.9291052222251892, "learning_rate": 3.137192802107493e-05, "loss": 0.9992, "step": 1519 }, { "epoch": 0.3285065917441107, "grad_norm": 0.9882834553718567, "learning_rate": 3.136040862990852e-05, "loss": 1.01, "step": 1520 }, { "epoch": 0.32872271450183704, "grad_norm": 0.8983349800109863, "learning_rate": 3.134888367216814e-05, "loss": 0.9383, "step": 1521 }, { "epoch": 0.3289388372595634, "grad_norm": 1.0109102725982666, "learning_rate": 3.133735315350099e-05, "loss": 0.9699, "step": 1522 }, { "epoch": 0.32915496001728983, "grad_norm": 1.0953506231307983, "learning_rate": 3.1325817079557004e-05, "loss": 1.0418, "step": 1523 }, { "epoch": 0.3293710827750162, "grad_norm": 0.9270989894866943, "learning_rate": 3.1314275455988835e-05, "loss": 0.9192, "step": 1524 }, { "epoch": 0.3295872055327426, "grad_norm": 0.8695876598358154, "learning_rate": 3.130272828845184e-05, "loss": 0.8122, "step": 1525 }, { "epoch": 0.329803328290469, "grad_norm": 0.8579394817352295, "learning_rate": 3.1291175582604116e-05, "loss": 0.9604, "step": 1526 }, { "epoch": 0.33001945104819536, "grad_norm": 0.8832108378410339, "learning_rate": 3.127961734410646e-05, "loss": 0.9691, "step": 1527 }, { "epoch": 0.3302355738059218, "grad_norm": 0.8641235828399658, "learning_rate": 3.126805357862237e-05, "loss": 0.9122, "step": 1528 }, { "epoch": 0.33045169656364815, "grad_norm": 0.8991526961326599, "learning_rate": 3.125648429181809e-05, "loss": 0.8178, "step": 1529 }, { "epoch": 0.33066781932137457, "grad_norm": 1.029059648513794, "learning_rate": 3.1244909489362526e-05, "loss": 0.97, "step": 1530 }, { "epoch": 0.33088394207910093, "grad_norm": 0.9144630432128906, "learning_rate": 3.12333291769273e-05, "loss": 0.9817, "step": 1531 }, { "epoch": 0.3311000648368273, "grad_norm": 1.0621875524520874, "learning_rate": 3.1221743360186745e-05, "loss": 0.9882, "step": 1532 }, { "epoch": 0.3313161875945537, "grad_norm": 0.8354048728942871, "learning_rate": 3.121015204481788e-05, "loss": 0.8407, "step": 1533 }, { "epoch": 0.3315323103522801, "grad_norm": 0.8807803988456726, "learning_rate": 3.1198555236500435e-05, "loss": 0.814, "step": 1534 }, { "epoch": 0.33174843311000646, "grad_norm": 1.0710721015930176, "learning_rate": 3.118695294091681e-05, "loss": 0.9749, "step": 1535 }, { "epoch": 0.3319645558677329, "grad_norm": 0.9333633780479431, "learning_rate": 3.1175345163752105e-05, "loss": 0.8591, "step": 1536 }, { "epoch": 0.33218067862545925, "grad_norm": 0.986922562122345, "learning_rate": 3.11637319106941e-05, "loss": 1.0004, "step": 1537 }, { "epoch": 0.33239680138318567, "grad_norm": 0.7981417179107666, "learning_rate": 3.115211318743327e-05, "loss": 0.8796, "step": 1538 }, { "epoch": 0.33261292414091204, "grad_norm": 0.914089024066925, "learning_rate": 3.114048899966275e-05, "loss": 0.9013, "step": 1539 }, { "epoch": 0.3328290468986384, "grad_norm": 0.942493736743927, "learning_rate": 3.112885935307839e-05, "loss": 0.9443, "step": 1540 }, { "epoch": 0.3330451696563648, "grad_norm": 0.8391870260238647, "learning_rate": 3.111722425337866e-05, "loss": 0.9616, "step": 1541 }, { "epoch": 0.3332612924140912, "grad_norm": 0.8648127317428589, "learning_rate": 3.110558370626475e-05, "loss": 0.9745, "step": 1542 }, { "epoch": 0.3334774151718176, "grad_norm": 1.1335663795471191, "learning_rate": 3.109393771744049e-05, "loss": 0.9485, "step": 1543 }, { "epoch": 0.333693537929544, "grad_norm": 0.8218050003051758, "learning_rate": 3.10822862926124e-05, "loss": 0.87, "step": 1544 }, { "epoch": 0.33390966068727035, "grad_norm": 0.8941485285758972, "learning_rate": 3.1070629437489644e-05, "loss": 1.0029, "step": 1545 }, { "epoch": 0.33412578344499677, "grad_norm": 0.8925780057907104, "learning_rate": 3.1058967157784056e-05, "loss": 0.8107, "step": 1546 }, { "epoch": 0.33434190620272314, "grad_norm": 0.780466616153717, "learning_rate": 3.104729945921012e-05, "loss": 0.8016, "step": 1547 }, { "epoch": 0.33455802896044956, "grad_norm": 1.005927562713623, "learning_rate": 3.103562634748498e-05, "loss": 0.9431, "step": 1548 }, { "epoch": 0.3347741517181759, "grad_norm": 0.9308692216873169, "learning_rate": 3.102394782832846e-05, "loss": 0.8973, "step": 1549 }, { "epoch": 0.3349902744759023, "grad_norm": 0.9113888740539551, "learning_rate": 3.101226390746298e-05, "loss": 0.8785, "step": 1550 }, { "epoch": 0.3352063972336287, "grad_norm": 0.9140170216560364, "learning_rate": 3.1000574590613636e-05, "loss": 0.8812, "step": 1551 }, { "epoch": 0.3354225199913551, "grad_norm": 0.9670307636260986, "learning_rate": 3.098887988350818e-05, "loss": 0.7821, "step": 1552 }, { "epoch": 0.33563864274908145, "grad_norm": 0.9144891500473022, "learning_rate": 3.097717979187698e-05, "loss": 0.9738, "step": 1553 }, { "epoch": 0.3358547655068079, "grad_norm": 1.0251222848892212, "learning_rate": 3.096547432145306e-05, "loss": 0.8484, "step": 1554 }, { "epoch": 0.33607088826453424, "grad_norm": 0.9502887725830078, "learning_rate": 3.095376347797207e-05, "loss": 0.8715, "step": 1555 }, { "epoch": 0.33628701102226066, "grad_norm": 0.8589299321174622, "learning_rate": 3.0942047267172296e-05, "loss": 0.8837, "step": 1556 }, { "epoch": 0.33650313377998703, "grad_norm": 0.9323639273643494, "learning_rate": 3.093032569479466e-05, "loss": 0.8995, "step": 1557 }, { "epoch": 0.3367192565377134, "grad_norm": 1.05650794506073, "learning_rate": 3.091859876658269e-05, "loss": 0.9877, "step": 1558 }, { "epoch": 0.3369353792954398, "grad_norm": 1.1186269521713257, "learning_rate": 3.090686648828257e-05, "loss": 1.0473, "step": 1559 }, { "epoch": 0.3371515020531662, "grad_norm": 1.087912917137146, "learning_rate": 3.0895128865643086e-05, "loss": 1.1434, "step": 1560 }, { "epoch": 0.3373676248108926, "grad_norm": 0.8387062549591064, "learning_rate": 3.0883385904415644e-05, "loss": 0.9497, "step": 1561 }, { "epoch": 0.337583747568619, "grad_norm": 0.9030735492706299, "learning_rate": 3.087163761035427e-05, "loss": 0.9586, "step": 1562 }, { "epoch": 0.33779987032634534, "grad_norm": 0.8736393451690674, "learning_rate": 3.085988398921559e-05, "loss": 0.9666, "step": 1563 }, { "epoch": 0.33801599308407176, "grad_norm": 0.9995821118354797, "learning_rate": 3.0848125046758863e-05, "loss": 0.9769, "step": 1564 }, { "epoch": 0.33823211584179813, "grad_norm": 1.046533465385437, "learning_rate": 3.0836360788745946e-05, "loss": 0.9898, "step": 1565 }, { "epoch": 0.33844823859952455, "grad_norm": 0.9657412767410278, "learning_rate": 3.082459122094129e-05, "loss": 1.049, "step": 1566 }, { "epoch": 0.3386643613572509, "grad_norm": 0.9767979383468628, "learning_rate": 3.0812816349111956e-05, "loss": 0.9881, "step": 1567 }, { "epoch": 0.3388804841149773, "grad_norm": 0.9699817895889282, "learning_rate": 3.080103617902761e-05, "loss": 1.1529, "step": 1568 }, { "epoch": 0.3390966068727037, "grad_norm": 0.9518383145332336, "learning_rate": 3.0789250716460504e-05, "loss": 1.0584, "step": 1569 }, { "epoch": 0.3393127296304301, "grad_norm": 0.9244017601013184, "learning_rate": 3.077745996718548e-05, "loss": 0.8928, "step": 1570 }, { "epoch": 0.3395288523881565, "grad_norm": 1.14718759059906, "learning_rate": 3.0765663936979994e-05, "loss": 0.8307, "step": 1571 }, { "epoch": 0.33974497514588287, "grad_norm": 1.0088956356048584, "learning_rate": 3.0753862631624066e-05, "loss": 1.0454, "step": 1572 }, { "epoch": 0.33996109790360923, "grad_norm": 0.8901629447937012, "learning_rate": 3.0742056056900304e-05, "loss": 0.9993, "step": 1573 }, { "epoch": 0.34017722066133566, "grad_norm": 0.8994284868240356, "learning_rate": 3.073024421859391e-05, "loss": 0.8864, "step": 1574 }, { "epoch": 0.340393343419062, "grad_norm": 0.9857457876205444, "learning_rate": 3.071842712249265e-05, "loss": 0.9714, "step": 1575 }, { "epoch": 0.3406094661767884, "grad_norm": 1.186560034751892, "learning_rate": 3.070660477438688e-05, "loss": 1.0122, "step": 1576 }, { "epoch": 0.3408255889345148, "grad_norm": 0.9748552441596985, "learning_rate": 3.0694777180069515e-05, "loss": 1.0159, "step": 1577 }, { "epoch": 0.3410417116922412, "grad_norm": 1.0009037256240845, "learning_rate": 3.068294434533606e-05, "loss": 1.112, "step": 1578 }, { "epoch": 0.3412578344499676, "grad_norm": 0.8519662618637085, "learning_rate": 3.067110627598457e-05, "loss": 0.7989, "step": 1579 }, { "epoch": 0.34147395720769397, "grad_norm": 0.9448208808898926, "learning_rate": 3.065926297781567e-05, "loss": 0.7649, "step": 1580 }, { "epoch": 0.34169007996542033, "grad_norm": 0.8865207433700562, "learning_rate": 3.0647414456632554e-05, "loss": 1.0816, "step": 1581 }, { "epoch": 0.34190620272314676, "grad_norm": 1.0456334352493286, "learning_rate": 3.063556071824097e-05, "loss": 0.6623, "step": 1582 }, { "epoch": 0.3421223254808731, "grad_norm": 1.0350103378295898, "learning_rate": 3.0623701768449214e-05, "loss": 1.1929, "step": 1583 }, { "epoch": 0.34233844823859955, "grad_norm": 0.8997272849082947, "learning_rate": 3.061183761306816e-05, "loss": 0.9787, "step": 1584 }, { "epoch": 0.3425545709963259, "grad_norm": 1.0938043594360352, "learning_rate": 3.05999682579112e-05, "loss": 1.0633, "step": 1585 }, { "epoch": 0.3427706937540523, "grad_norm": 0.8825398087501526, "learning_rate": 3.05880937087943e-05, "loss": 0.7721, "step": 1586 }, { "epoch": 0.3429868165117787, "grad_norm": 0.8789626955986023, "learning_rate": 3.057621397153596e-05, "loss": 0.9942, "step": 1587 }, { "epoch": 0.34320293926950507, "grad_norm": 0.8417037725448608, "learning_rate": 3.056432905195721e-05, "loss": 0.977, "step": 1588 }, { "epoch": 0.3434190620272315, "grad_norm": 0.8185150623321533, "learning_rate": 3.055243895588167e-05, "loss": 0.7804, "step": 1589 }, { "epoch": 0.34363518478495786, "grad_norm": 0.8963676691055298, "learning_rate": 3.054054368913541e-05, "loss": 0.9526, "step": 1590 }, { "epoch": 0.3438513075426842, "grad_norm": 0.8924521803855896, "learning_rate": 3.052864325754712e-05, "loss": 0.9355, "step": 1591 }, { "epoch": 0.34406743030041065, "grad_norm": 0.8117523193359375, "learning_rate": 3.051673766694797e-05, "loss": 0.8333, "step": 1592 }, { "epoch": 0.344283553058137, "grad_norm": 0.8220072984695435, "learning_rate": 3.050482692317168e-05, "loss": 0.83, "step": 1593 }, { "epoch": 0.34449967581586344, "grad_norm": 0.9793494343757629, "learning_rate": 3.0492911032054472e-05, "loss": 1.0176, "step": 1594 }, { "epoch": 0.3447157985735898, "grad_norm": 0.8228978514671326, "learning_rate": 3.048098999943512e-05, "loss": 0.8449, "step": 1595 }, { "epoch": 0.34493192133131617, "grad_norm": 0.9178285598754883, "learning_rate": 3.0469063831154896e-05, "loss": 0.9921, "step": 1596 }, { "epoch": 0.3451480440890426, "grad_norm": 0.8306567668914795, "learning_rate": 3.0457132533057598e-05, "loss": 0.957, "step": 1597 }, { "epoch": 0.34536416684676896, "grad_norm": 0.9351313710212708, "learning_rate": 3.0445196110989533e-05, "loss": 1.1073, "step": 1598 }, { "epoch": 0.3455802896044953, "grad_norm": 0.8027867078781128, "learning_rate": 3.0433254570799514e-05, "loss": 1.0925, "step": 1599 }, { "epoch": 0.34579641236222175, "grad_norm": 0.9075708985328674, "learning_rate": 3.0421307918338873e-05, "loss": 0.9232, "step": 1600 }, { "epoch": 0.3460125351199481, "grad_norm": 0.994594156742096, "learning_rate": 3.0409356159461447e-05, "loss": 1.0689, "step": 1601 }, { "epoch": 0.34622865787767454, "grad_norm": 0.9905773997306824, "learning_rate": 3.0397399300023568e-05, "loss": 0.9695, "step": 1602 }, { "epoch": 0.3464447806354009, "grad_norm": 0.9328803420066833, "learning_rate": 3.038543734588406e-05, "loss": 0.8649, "step": 1603 }, { "epoch": 0.3466609033931273, "grad_norm": 1.0163012742996216, "learning_rate": 3.037347030290427e-05, "loss": 0.9543, "step": 1604 }, { "epoch": 0.3468770261508537, "grad_norm": 0.9779214859008789, "learning_rate": 3.0361498176948e-05, "loss": 0.8091, "step": 1605 }, { "epoch": 0.34709314890858006, "grad_norm": 0.8842347264289856, "learning_rate": 3.0349520973881582e-05, "loss": 0.849, "step": 1606 }, { "epoch": 0.3473092716663065, "grad_norm": 0.9716510772705078, "learning_rate": 3.0337538699573814e-05, "loss": 1.102, "step": 1607 }, { "epoch": 0.34752539442403285, "grad_norm": 0.9485663175582886, "learning_rate": 3.032555135989597e-05, "loss": 0.8349, "step": 1608 }, { "epoch": 0.3477415171817592, "grad_norm": 1.0341752767562866, "learning_rate": 3.0313558960721844e-05, "loss": 0.8645, "step": 1609 }, { "epoch": 0.34795763993948564, "grad_norm": 0.9334589242935181, "learning_rate": 3.0301561507927655e-05, "loss": 1.0647, "step": 1610 }, { "epoch": 0.348173762697212, "grad_norm": 0.9249916672706604, "learning_rate": 3.0289559007392148e-05, "loss": 0.8871, "step": 1611 }, { "epoch": 0.34838988545493843, "grad_norm": 0.8663226366043091, "learning_rate": 3.027755146499651e-05, "loss": 0.8528, "step": 1612 }, { "epoch": 0.3486060082126648, "grad_norm": 0.8412871956825256, "learning_rate": 3.0265538886624413e-05, "loss": 0.8306, "step": 1613 }, { "epoch": 0.34882213097039116, "grad_norm": 0.9474676847457886, "learning_rate": 3.0253521278161996e-05, "loss": 0.8494, "step": 1614 }, { "epoch": 0.3490382537281176, "grad_norm": 1.1441682577133179, "learning_rate": 3.0241498645497852e-05, "loss": 1.1878, "step": 1615 }, { "epoch": 0.34925437648584395, "grad_norm": 0.8670212626457214, "learning_rate": 3.0229470994523048e-05, "loss": 0.8506, "step": 1616 }, { "epoch": 0.3494704992435703, "grad_norm": 0.801177978515625, "learning_rate": 3.0217438331131102e-05, "loss": 0.7948, "step": 1617 }, { "epoch": 0.34968662200129674, "grad_norm": 1.0146235227584839, "learning_rate": 3.0205400661218e-05, "loss": 0.8659, "step": 1618 }, { "epoch": 0.3499027447590231, "grad_norm": 1.0296528339385986, "learning_rate": 3.0193357990682153e-05, "loss": 1.0093, "step": 1619 }, { "epoch": 0.35011886751674953, "grad_norm": 1.0370322465896606, "learning_rate": 3.0181310325424464e-05, "loss": 0.9417, "step": 1620 }, { "epoch": 0.3503349902744759, "grad_norm": 0.912803053855896, "learning_rate": 3.016925767134825e-05, "loss": 1.0228, "step": 1621 }, { "epoch": 0.35055111303220227, "grad_norm": 0.9205127358436584, "learning_rate": 3.015720003435928e-05, "loss": 0.8741, "step": 1622 }, { "epoch": 0.3507672357899287, "grad_norm": 0.8457703590393066, "learning_rate": 3.0145137420365774e-05, "loss": 0.9972, "step": 1623 }, { "epoch": 0.35098335854765506, "grad_norm": 0.9067233204841614, "learning_rate": 3.013306983527839e-05, "loss": 0.8588, "step": 1624 }, { "epoch": 0.3511994813053815, "grad_norm": 1.0689157247543335, "learning_rate": 3.012099728501021e-05, "loss": 0.9996, "step": 1625 }, { "epoch": 0.35141560406310784, "grad_norm": 0.752790629863739, "learning_rate": 3.010891977547675e-05, "loss": 0.9512, "step": 1626 }, { "epoch": 0.3516317268208342, "grad_norm": 0.905785322189331, "learning_rate": 3.009683731259598e-05, "loss": 1.0775, "step": 1627 }, { "epoch": 0.35184784957856063, "grad_norm": 1.0364049673080444, "learning_rate": 3.008474990228825e-05, "loss": 0.9045, "step": 1628 }, { "epoch": 0.352063972336287, "grad_norm": 0.9609606266021729, "learning_rate": 3.0072657550476402e-05, "loss": 0.896, "step": 1629 }, { "epoch": 0.3522800950940134, "grad_norm": 0.916305422782898, "learning_rate": 3.006056026308562e-05, "loss": 1.0727, "step": 1630 }, { "epoch": 0.3524962178517398, "grad_norm": 0.8478757739067078, "learning_rate": 3.0048458046043573e-05, "loss": 0.9927, "step": 1631 }, { "epoch": 0.35271234060946616, "grad_norm": 0.8042452335357666, "learning_rate": 3.0036350905280316e-05, "loss": 0.7312, "step": 1632 }, { "epoch": 0.3529284633671926, "grad_norm": 0.9166516065597534, "learning_rate": 3.002423884672831e-05, "loss": 0.8677, "step": 1633 }, { "epoch": 0.35314458612491895, "grad_norm": 0.9375431537628174, "learning_rate": 3.0012121876322443e-05, "loss": 0.8155, "step": 1634 }, { "epoch": 0.35336070888264537, "grad_norm": 1.0676640272140503, "learning_rate": 3.0000000000000004e-05, "loss": 0.894, "step": 1635 }, { "epoch": 0.35357683164037174, "grad_norm": 0.9676708579063416, "learning_rate": 2.9987873223700677e-05, "loss": 0.9801, "step": 1636 }, { "epoch": 0.3537929543980981, "grad_norm": 0.9622818231582642, "learning_rate": 2.9975741553366564e-05, "loss": 0.8829, "step": 1637 }, { "epoch": 0.3540090771558245, "grad_norm": 0.7891084551811218, "learning_rate": 2.9963604994942152e-05, "loss": 0.8298, "step": 1638 }, { "epoch": 0.3542251999135509, "grad_norm": 0.8852497339248657, "learning_rate": 2.9951463554374317e-05, "loss": 0.9008, "step": 1639 }, { "epoch": 0.35444132267127726, "grad_norm": 0.9709888696670532, "learning_rate": 2.9939317237612352e-05, "loss": 1.0987, "step": 1640 }, { "epoch": 0.3546574454290037, "grad_norm": 0.9508516788482666, "learning_rate": 2.9927166050607912e-05, "loss": 0.8624, "step": 1641 }, { "epoch": 0.35487356818673005, "grad_norm": 0.9188629984855652, "learning_rate": 2.991500999931506e-05, "loss": 1.0215, "step": 1642 }, { "epoch": 0.35508969094445647, "grad_norm": 0.900020182132721, "learning_rate": 2.990284908969023e-05, "loss": 0.8522, "step": 1643 }, { "epoch": 0.35530581370218284, "grad_norm": 0.8772697448730469, "learning_rate": 2.989068332769223e-05, "loss": 0.9635, "step": 1644 }, { "epoch": 0.3555219364599092, "grad_norm": 0.8928239941596985, "learning_rate": 2.9878512719282267e-05, "loss": 0.9649, "step": 1645 }, { "epoch": 0.3557380592176356, "grad_norm": 0.9713953137397766, "learning_rate": 2.9866337270423912e-05, "loss": 0.9589, "step": 1646 }, { "epoch": 0.355954181975362, "grad_norm": 0.9290111660957336, "learning_rate": 2.98541569870831e-05, "loss": 0.9241, "step": 1647 }, { "epoch": 0.3561703047330884, "grad_norm": 0.8772423267364502, "learning_rate": 2.9841971875228134e-05, "loss": 1.0103, "step": 1648 }, { "epoch": 0.3563864274908148, "grad_norm": 1.002431035041809, "learning_rate": 2.9829781940829718e-05, "loss": 0.9588, "step": 1649 }, { "epoch": 0.35660255024854115, "grad_norm": 0.8712643384933472, "learning_rate": 2.981758718986086e-05, "loss": 0.8737, "step": 1650 }, { "epoch": 0.3568186730062676, "grad_norm": 0.9159672260284424, "learning_rate": 2.980538762829698e-05, "loss": 1.0155, "step": 1651 }, { "epoch": 0.35703479576399394, "grad_norm": 0.9155294895172119, "learning_rate": 2.9793183262115824e-05, "loss": 0.8415, "step": 1652 }, { "epoch": 0.35725091852172036, "grad_norm": 1.0039186477661133, "learning_rate": 2.9780974097297516e-05, "loss": 0.9856, "step": 1653 }, { "epoch": 0.35746704127944673, "grad_norm": 0.903708815574646, "learning_rate": 2.976876013982451e-05, "loss": 0.9116, "step": 1654 }, { "epoch": 0.3576831640371731, "grad_norm": 0.9548552632331848, "learning_rate": 2.9756541395681613e-05, "loss": 1.2098, "step": 1655 }, { "epoch": 0.3578992867948995, "grad_norm": 0.912732720375061, "learning_rate": 2.974431787085599e-05, "loss": 0.9992, "step": 1656 }, { "epoch": 0.3581154095526259, "grad_norm": 0.8119702339172363, "learning_rate": 2.9732089571337126e-05, "loss": 0.9309, "step": 1657 }, { "epoch": 0.3583315323103523, "grad_norm": 0.9073593616485596, "learning_rate": 2.9719856503116886e-05, "loss": 1.0523, "step": 1658 }, { "epoch": 0.3585476550680787, "grad_norm": 0.8632818460464478, "learning_rate": 2.970761867218941e-05, "loss": 1.0282, "step": 1659 }, { "epoch": 0.35876377782580504, "grad_norm": 0.9574206471443176, "learning_rate": 2.9695376084551237e-05, "loss": 1.0417, "step": 1660 }, { "epoch": 0.35897990058353146, "grad_norm": 0.8399937748908997, "learning_rate": 2.9683128746201194e-05, "loss": 0.8353, "step": 1661 }, { "epoch": 0.35919602334125783, "grad_norm": 0.852632999420166, "learning_rate": 2.9670876663140443e-05, "loss": 0.9927, "step": 1662 }, { "epoch": 0.3594121460989842, "grad_norm": 0.9457361698150635, "learning_rate": 2.965861984137249e-05, "loss": 1.0071, "step": 1663 }, { "epoch": 0.3596282688567106, "grad_norm": 0.6972500681877136, "learning_rate": 2.9646358286903144e-05, "loss": 0.7935, "step": 1664 }, { "epoch": 0.359844391614437, "grad_norm": 0.9250654578208923, "learning_rate": 2.963409200574053e-05, "loss": 0.8254, "step": 1665 }, { "epoch": 0.3600605143721634, "grad_norm": 0.8242780566215515, "learning_rate": 2.9621821003895107e-05, "loss": 0.8502, "step": 1666 }, { "epoch": 0.3602766371298898, "grad_norm": 0.8958160877227783, "learning_rate": 2.960954528737964e-05, "loss": 0.9306, "step": 1667 }, { "epoch": 0.36049275988761614, "grad_norm": 0.9546581506729126, "learning_rate": 2.9597264862209183e-05, "loss": 0.8418, "step": 1668 }, { "epoch": 0.36070888264534257, "grad_norm": 1.0000452995300293, "learning_rate": 2.958497973440114e-05, "loss": 0.8989, "step": 1669 }, { "epoch": 0.36092500540306893, "grad_norm": 0.8637808561325073, "learning_rate": 2.9572689909975182e-05, "loss": 0.9931, "step": 1670 }, { "epoch": 0.36114112816079535, "grad_norm": 0.9332252740859985, "learning_rate": 2.9560395394953295e-05, "loss": 0.891, "step": 1671 }, { "epoch": 0.3613572509185217, "grad_norm": 0.8956640362739563, "learning_rate": 2.9548096195359765e-05, "loss": 0.8972, "step": 1672 }, { "epoch": 0.3615733736762481, "grad_norm": 0.8492765426635742, "learning_rate": 2.9535792317221178e-05, "loss": 0.9525, "step": 1673 }, { "epoch": 0.3617894964339745, "grad_norm": 0.8705052733421326, "learning_rate": 2.9523483766566392e-05, "loss": 0.9671, "step": 1674 }, { "epoch": 0.3620056191917009, "grad_norm": 0.9933458566665649, "learning_rate": 2.9511170549426577e-05, "loss": 1.1338, "step": 1675 }, { "epoch": 0.3622217419494273, "grad_norm": 0.8857420086860657, "learning_rate": 2.949885267183518e-05, "loss": 0.9703, "step": 1676 }, { "epoch": 0.36243786470715367, "grad_norm": 0.926338791847229, "learning_rate": 2.9486530139827926e-05, "loss": 1.1379, "step": 1677 }, { "epoch": 0.36265398746488003, "grad_norm": 0.9619150161743164, "learning_rate": 2.9474202959442848e-05, "loss": 0.9449, "step": 1678 }, { "epoch": 0.36287011022260646, "grad_norm": 0.882526695728302, "learning_rate": 2.9461871136720205e-05, "loss": 0.9521, "step": 1679 }, { "epoch": 0.3630862329803328, "grad_norm": 0.8300138711929321, "learning_rate": 2.9449534677702584e-05, "loss": 1.04, "step": 1680 }, { "epoch": 0.3633023557380592, "grad_norm": 0.9559792280197144, "learning_rate": 2.9437193588434816e-05, "loss": 1.0917, "step": 1681 }, { "epoch": 0.3635184784957856, "grad_norm": 0.9795278310775757, "learning_rate": 2.9424847874964003e-05, "loss": 0.8666, "step": 1682 }, { "epoch": 0.363734601253512, "grad_norm": 0.8716777563095093, "learning_rate": 2.941249754333952e-05, "loss": 0.8962, "step": 1683 }, { "epoch": 0.3639507240112384, "grad_norm": 0.867805540561676, "learning_rate": 2.9400142599613005e-05, "loss": 0.7833, "step": 1684 }, { "epoch": 0.36416684676896477, "grad_norm": 0.8331664800643921, "learning_rate": 2.9387783049838338e-05, "loss": 1.0517, "step": 1685 }, { "epoch": 0.36438296952669114, "grad_norm": 0.9284753203392029, "learning_rate": 2.9375418900071676e-05, "loss": 0.9471, "step": 1686 }, { "epoch": 0.36459909228441756, "grad_norm": 0.8487952351570129, "learning_rate": 2.9363050156371433e-05, "loss": 0.9644, "step": 1687 }, { "epoch": 0.3648152150421439, "grad_norm": 0.8749293684959412, "learning_rate": 2.9350676824798256e-05, "loss": 1.0773, "step": 1688 }, { "epoch": 0.36503133779987035, "grad_norm": 0.8109781742095947, "learning_rate": 2.9338298911415052e-05, "loss": 0.7734, "step": 1689 }, { "epoch": 0.3652474605575967, "grad_norm": 0.8879095315933228, "learning_rate": 2.932591642228696e-05, "loss": 0.7161, "step": 1690 }, { "epoch": 0.3654635833153231, "grad_norm": 0.9725330471992493, "learning_rate": 2.9313529363481386e-05, "loss": 1.0409, "step": 1691 }, { "epoch": 0.3656797060730495, "grad_norm": 1.1101995706558228, "learning_rate": 2.9301137741067958e-05, "loss": 1.1728, "step": 1692 }, { "epoch": 0.36589582883077587, "grad_norm": 0.8863847255706787, "learning_rate": 2.928874156111853e-05, "loss": 0.8595, "step": 1693 }, { "epoch": 0.3661119515885023, "grad_norm": 1.0283147096633911, "learning_rate": 2.9276340829707216e-05, "loss": 0.8235, "step": 1694 }, { "epoch": 0.36632807434622866, "grad_norm": 0.8551615476608276, "learning_rate": 2.9263935552910333e-05, "loss": 0.7459, "step": 1695 }, { "epoch": 0.366544197103955, "grad_norm": 0.8963771462440491, "learning_rate": 2.9251525736806454e-05, "loss": 0.989, "step": 1696 }, { "epoch": 0.36676031986168145, "grad_norm": 0.8559364080429077, "learning_rate": 2.9239111387476336e-05, "loss": 0.9415, "step": 1697 }, { "epoch": 0.3669764426194078, "grad_norm": 1.028477668762207, "learning_rate": 2.9226692511003007e-05, "loss": 0.9121, "step": 1698 }, { "epoch": 0.36719256537713424, "grad_norm": 0.8393043279647827, "learning_rate": 2.9214269113471672e-05, "loss": 0.9051, "step": 1699 }, { "epoch": 0.3674086881348606, "grad_norm": 0.959419310092926, "learning_rate": 2.920184120096977e-05, "loss": 1.0464, "step": 1700 }, { "epoch": 0.367624810892587, "grad_norm": 0.9398619532585144, "learning_rate": 2.9189408779586953e-05, "loss": 0.9417, "step": 1701 }, { "epoch": 0.3678409336503134, "grad_norm": 0.9295390248298645, "learning_rate": 2.9176971855415083e-05, "loss": 1.106, "step": 1702 }, { "epoch": 0.36805705640803976, "grad_norm": 1.0452978610992432, "learning_rate": 2.916453043454821e-05, "loss": 1.1577, "step": 1703 }, { "epoch": 0.36827317916576613, "grad_norm": 0.9328250885009766, "learning_rate": 2.9152084523082616e-05, "loss": 0.843, "step": 1704 }, { "epoch": 0.36848930192349255, "grad_norm": 0.9288171529769897, "learning_rate": 2.9139634127116756e-05, "loss": 0.9252, "step": 1705 }, { "epoch": 0.3687054246812189, "grad_norm": 0.9420575499534607, "learning_rate": 2.9127179252751305e-05, "loss": 1.1016, "step": 1706 }, { "epoch": 0.36892154743894534, "grad_norm": 0.8303332924842834, "learning_rate": 2.9114719906089117e-05, "loss": 0.8496, "step": 1707 }, { "epoch": 0.3691376701966717, "grad_norm": 0.9922606945037842, "learning_rate": 2.9102256093235245e-05, "loss": 0.9668, "step": 1708 }, { "epoch": 0.3693537929543981, "grad_norm": 0.8178890347480774, "learning_rate": 2.908978782029693e-05, "loss": 0.8172, "step": 1709 }, { "epoch": 0.3695699157121245, "grad_norm": 0.8483377695083618, "learning_rate": 2.9077315093383595e-05, "loss": 0.8602, "step": 1710 }, { "epoch": 0.36978603846985086, "grad_norm": 0.7788403630256653, "learning_rate": 2.906483791860685e-05, "loss": 0.8256, "step": 1711 }, { "epoch": 0.3700021612275773, "grad_norm": 0.8779165744781494, "learning_rate": 2.905235630208048e-05, "loss": 0.8665, "step": 1712 }, { "epoch": 0.37021828398530365, "grad_norm": 0.8570807576179504, "learning_rate": 2.9039870249920447e-05, "loss": 1.023, "step": 1713 }, { "epoch": 0.37043440674303, "grad_norm": 0.8159751296043396, "learning_rate": 2.902737976824489e-05, "loss": 0.8657, "step": 1714 }, { "epoch": 0.37065052950075644, "grad_norm": 0.9290934801101685, "learning_rate": 2.901488486317411e-05, "loss": 0.8364, "step": 1715 }, { "epoch": 0.3708666522584828, "grad_norm": 0.9076923727989197, "learning_rate": 2.9002385540830585e-05, "loss": 0.9955, "step": 1716 }, { "epoch": 0.37108277501620923, "grad_norm": 0.8310691118240356, "learning_rate": 2.8989881807338964e-05, "loss": 0.7522, "step": 1717 }, { "epoch": 0.3712988977739356, "grad_norm": 1.0004169940948486, "learning_rate": 2.8977373668826035e-05, "loss": 1.0444, "step": 1718 }, { "epoch": 0.37151502053166197, "grad_norm": 0.8425519466400146, "learning_rate": 2.8964861131420758e-05, "loss": 0.8879, "step": 1719 }, { "epoch": 0.3717311432893884, "grad_norm": 0.9142181873321533, "learning_rate": 2.8952344201254253e-05, "loss": 0.9851, "step": 1720 }, { "epoch": 0.37194726604711476, "grad_norm": 0.9255647659301758, "learning_rate": 2.8939822884459786e-05, "loss": 0.8724, "step": 1721 }, { "epoch": 0.3721633888048411, "grad_norm": 0.8818854689598083, "learning_rate": 2.8927297187172772e-05, "loss": 0.9729, "step": 1722 }, { "epoch": 0.37237951156256754, "grad_norm": 0.768568217754364, "learning_rate": 2.891476711553077e-05, "loss": 0.8083, "step": 1723 }, { "epoch": 0.3725956343202939, "grad_norm": 0.8540796041488647, "learning_rate": 2.8902232675673486e-05, "loss": 0.9899, "step": 1724 }, { "epoch": 0.37281175707802033, "grad_norm": 0.9795287251472473, "learning_rate": 2.8889693873742783e-05, "loss": 0.9475, "step": 1725 }, { "epoch": 0.3730278798357467, "grad_norm": 0.7936578989028931, "learning_rate": 2.8877150715882613e-05, "loss": 0.766, "step": 1726 }, { "epoch": 0.37324400259347307, "grad_norm": 0.8888428211212158, "learning_rate": 2.886460320823913e-05, "loss": 1.121, "step": 1727 }, { "epoch": 0.3734601253511995, "grad_norm": 0.8631256222724915, "learning_rate": 2.8852051356960555e-05, "loss": 0.827, "step": 1728 }, { "epoch": 0.37367624810892586, "grad_norm": 1.0968459844589233, "learning_rate": 2.8839495168197288e-05, "loss": 0.8127, "step": 1729 }, { "epoch": 0.3738923708666523, "grad_norm": 0.9336002469062805, "learning_rate": 2.8826934648101815e-05, "loss": 0.8939, "step": 1730 }, { "epoch": 0.37410849362437865, "grad_norm": 0.8790167570114136, "learning_rate": 2.8814369802828773e-05, "loss": 0.8584, "step": 1731 }, { "epoch": 0.374324616382105, "grad_norm": 0.8758849501609802, "learning_rate": 2.8801800638534906e-05, "loss": 0.8605, "step": 1732 }, { "epoch": 0.37454073913983144, "grad_norm": 0.7395503520965576, "learning_rate": 2.8789227161379068e-05, "loss": 0.8336, "step": 1733 }, { "epoch": 0.3747568618975578, "grad_norm": 0.9526098966598511, "learning_rate": 2.8776649377522245e-05, "loss": 1.0668, "step": 1734 }, { "epoch": 0.3749729846552842, "grad_norm": 0.8846078515052795, "learning_rate": 2.8764067293127506e-05, "loss": 0.8983, "step": 1735 }, { "epoch": 0.3751891074130106, "grad_norm": 0.9965586066246033, "learning_rate": 2.875148091436006e-05, "loss": 0.9229, "step": 1736 }, { "epoch": 0.37540523017073696, "grad_norm": 0.8903135061264038, "learning_rate": 2.873889024738719e-05, "loss": 1.0622, "step": 1737 }, { "epoch": 0.3756213529284634, "grad_norm": 0.9423289895057678, "learning_rate": 2.872629529837831e-05, "loss": 0.9667, "step": 1738 }, { "epoch": 0.37583747568618975, "grad_norm": 0.8821099400520325, "learning_rate": 2.8713696073504897e-05, "loss": 0.8798, "step": 1739 }, { "epoch": 0.37605359844391617, "grad_norm": 0.8812253475189209, "learning_rate": 2.870109257894056e-05, "loss": 0.8199, "step": 1740 }, { "epoch": 0.37626972120164254, "grad_norm": 0.8192459344863892, "learning_rate": 2.8688484820860965e-05, "loss": 0.8676, "step": 1741 }, { "epoch": 0.3764858439593689, "grad_norm": 0.8847028613090515, "learning_rate": 2.8675872805443895e-05, "loss": 1.0504, "step": 1742 }, { "epoch": 0.3767019667170953, "grad_norm": 1.005643606185913, "learning_rate": 2.8663256538869212e-05, "loss": 1.2306, "step": 1743 }, { "epoch": 0.3769180894748217, "grad_norm": 0.779718279838562, "learning_rate": 2.8650636027318844e-05, "loss": 0.7796, "step": 1744 }, { "epoch": 0.37713421223254806, "grad_norm": 1.0670115947723389, "learning_rate": 2.8638011276976825e-05, "loss": 0.8797, "step": 1745 }, { "epoch": 0.3773503349902745, "grad_norm": 0.8027476668357849, "learning_rate": 2.8625382294029242e-05, "loss": 0.7303, "step": 1746 }, { "epoch": 0.37756645774800085, "grad_norm": 0.8964573740959167, "learning_rate": 2.8612749084664282e-05, "loss": 0.8716, "step": 1747 }, { "epoch": 0.3777825805057273, "grad_norm": 0.9428089261054993, "learning_rate": 2.8600111655072172e-05, "loss": 1.0786, "step": 1748 }, { "epoch": 0.37799870326345364, "grad_norm": 0.8974640965461731, "learning_rate": 2.858747001144524e-05, "loss": 0.9482, "step": 1749 }, { "epoch": 0.37821482602118, "grad_norm": 0.9904823303222656, "learning_rate": 2.857482415997785e-05, "loss": 0.9166, "step": 1750 }, { "epoch": 0.37843094877890643, "grad_norm": 0.9504700899124146, "learning_rate": 2.856217410686644e-05, "loss": 0.9677, "step": 1751 }, { "epoch": 0.3786470715366328, "grad_norm": 0.9497498273849487, "learning_rate": 2.8549519858309514e-05, "loss": 0.9889, "step": 1752 }, { "epoch": 0.3788631942943592, "grad_norm": 0.9732851982116699, "learning_rate": 2.853686142050762e-05, "loss": 0.9731, "step": 1753 }, { "epoch": 0.3790793170520856, "grad_norm": 0.7974346876144409, "learning_rate": 2.8524198799663367e-05, "loss": 0.7792, "step": 1754 }, { "epoch": 0.37929543980981195, "grad_norm": 0.7844465374946594, "learning_rate": 2.851153200198141e-05, "loss": 0.8316, "step": 1755 }, { "epoch": 0.3795115625675384, "grad_norm": 0.8197035789489746, "learning_rate": 2.8498861033668444e-05, "loss": 0.718, "step": 1756 }, { "epoch": 0.37972768532526474, "grad_norm": 1.013469934463501, "learning_rate": 2.8486185900933212e-05, "loss": 0.962, "step": 1757 }, { "epoch": 0.37994380808299116, "grad_norm": 0.7966868877410889, "learning_rate": 2.8473506609986515e-05, "loss": 0.8272, "step": 1758 }, { "epoch": 0.38015993084071753, "grad_norm": 0.9745547771453857, "learning_rate": 2.8460823167041162e-05, "loss": 1.0775, "step": 1759 }, { "epoch": 0.3803760535984439, "grad_norm": 1.1553611755371094, "learning_rate": 2.8448135578312018e-05, "loss": 0.9355, "step": 1760 }, { "epoch": 0.3805921763561703, "grad_norm": 0.9141063094139099, "learning_rate": 2.8435443850015967e-05, "loss": 0.8721, "step": 1761 }, { "epoch": 0.3808082991138967, "grad_norm": 0.8130009770393372, "learning_rate": 2.842274798837193e-05, "loss": 0.9071, "step": 1762 }, { "epoch": 0.3810244218716231, "grad_norm": 0.8336833119392395, "learning_rate": 2.8410047999600853e-05, "loss": 0.9596, "step": 1763 }, { "epoch": 0.3812405446293495, "grad_norm": 1.0067347288131714, "learning_rate": 2.8397343889925694e-05, "loss": 1.0109, "step": 1764 }, { "epoch": 0.38145666738707584, "grad_norm": 0.9170424342155457, "learning_rate": 2.8384635665571444e-05, "loss": 0.9603, "step": 1765 }, { "epoch": 0.38167279014480227, "grad_norm": 1.063937783241272, "learning_rate": 2.8371923332765097e-05, "loss": 1.0738, "step": 1766 }, { "epoch": 0.38188891290252863, "grad_norm": 0.9675772786140442, "learning_rate": 2.8359206897735673e-05, "loss": 0.9906, "step": 1767 }, { "epoch": 0.382105035660255, "grad_norm": 0.8671706318855286, "learning_rate": 2.8346486366714185e-05, "loss": 0.971, "step": 1768 }, { "epoch": 0.3823211584179814, "grad_norm": 0.8462857007980347, "learning_rate": 2.833376174593368e-05, "loss": 0.9277, "step": 1769 }, { "epoch": 0.3825372811757078, "grad_norm": 0.8690409660339355, "learning_rate": 2.832103304162918e-05, "loss": 0.868, "step": 1770 }, { "epoch": 0.3827534039334342, "grad_norm": 0.8862578868865967, "learning_rate": 2.8308300260037734e-05, "loss": 0.7762, "step": 1771 }, { "epoch": 0.3829695266911606, "grad_norm": 0.9211979508399963, "learning_rate": 2.8295563407398358e-05, "loss": 0.8859, "step": 1772 }, { "epoch": 0.38318564944888694, "grad_norm": 0.8833542466163635, "learning_rate": 2.8282822489952093e-05, "loss": 0.8993, "step": 1773 }, { "epoch": 0.38340177220661337, "grad_norm": 0.9462282657623291, "learning_rate": 2.8270077513941953e-05, "loss": 0.9645, "step": 1774 }, { "epoch": 0.38361789496433973, "grad_norm": 0.8705824017524719, "learning_rate": 2.8257328485612948e-05, "loss": 0.8253, "step": 1775 }, { "epoch": 0.38383401772206616, "grad_norm": 0.8962376713752747, "learning_rate": 2.8244575411212075e-05, "loss": 0.8689, "step": 1776 }, { "epoch": 0.3840501404797925, "grad_norm": 0.9097771644592285, "learning_rate": 2.8231818296988304e-05, "loss": 0.8516, "step": 1777 }, { "epoch": 0.3842662632375189, "grad_norm": 0.8236470222473145, "learning_rate": 2.82190571491926e-05, "loss": 0.8598, "step": 1778 }, { "epoch": 0.3844823859952453, "grad_norm": 0.9493615031242371, "learning_rate": 2.8206291974077894e-05, "loss": 0.976, "step": 1779 }, { "epoch": 0.3846985087529717, "grad_norm": 0.9372259378433228, "learning_rate": 2.819352277789909e-05, "loss": 0.9265, "step": 1780 }, { "epoch": 0.3849146315106981, "grad_norm": 0.9209122061729431, "learning_rate": 2.8180749566913067e-05, "loss": 0.8781, "step": 1781 }, { "epoch": 0.38513075426842447, "grad_norm": 1.0795836448669434, "learning_rate": 2.8167972347378666e-05, "loss": 1.0435, "step": 1782 }, { "epoch": 0.38534687702615084, "grad_norm": 0.9621537923812866, "learning_rate": 2.8155191125556696e-05, "loss": 0.9987, "step": 1783 }, { "epoch": 0.38556299978387726, "grad_norm": 0.9384992122650146, "learning_rate": 2.8142405907709926e-05, "loss": 0.9227, "step": 1784 }, { "epoch": 0.3857791225416036, "grad_norm": 0.93331378698349, "learning_rate": 2.8129616700103088e-05, "loss": 1.0711, "step": 1785 }, { "epoch": 0.38599524529933, "grad_norm": 0.9580069780349731, "learning_rate": 2.811682350900285e-05, "loss": 0.9268, "step": 1786 }, { "epoch": 0.3862113680570564, "grad_norm": 0.9399328827857971, "learning_rate": 2.810402634067787e-05, "loss": 1.1607, "step": 1787 }, { "epoch": 0.3864274908147828, "grad_norm": 1.0003597736358643, "learning_rate": 2.8091225201398703e-05, "loss": 1.0215, "step": 1788 }, { "epoch": 0.3866436135725092, "grad_norm": 1.009648084640503, "learning_rate": 2.80784200974379e-05, "loss": 1.0312, "step": 1789 }, { "epoch": 0.38685973633023557, "grad_norm": 0.9388926029205322, "learning_rate": 2.8065611035069923e-05, "loss": 0.9376, "step": 1790 }, { "epoch": 0.38707585908796194, "grad_norm": 0.7759811282157898, "learning_rate": 2.8052798020571182e-05, "loss": 0.914, "step": 1791 }, { "epoch": 0.38729198184568836, "grad_norm": 0.976290762424469, "learning_rate": 2.8039981060220028e-05, "loss": 0.759, "step": 1792 }, { "epoch": 0.3875081046034147, "grad_norm": 0.8384339213371277, "learning_rate": 2.802716016029674e-05, "loss": 0.8724, "step": 1793 }, { "epoch": 0.38772422736114115, "grad_norm": 0.9807355999946594, "learning_rate": 2.801433532708353e-05, "loss": 0.8927, "step": 1794 }, { "epoch": 0.3879403501188675, "grad_norm": 0.9871201515197754, "learning_rate": 2.8001506566864534e-05, "loss": 0.9795, "step": 1795 }, { "epoch": 0.3881564728765939, "grad_norm": 1.03062105178833, "learning_rate": 2.7988673885925818e-05, "loss": 0.7453, "step": 1796 }, { "epoch": 0.3883725956343203, "grad_norm": 0.9937511086463928, "learning_rate": 2.7975837290555366e-05, "loss": 0.8696, "step": 1797 }, { "epoch": 0.3885887183920467, "grad_norm": 0.8401237726211548, "learning_rate": 2.796299678704308e-05, "loss": 0.941, "step": 1798 }, { "epoch": 0.3888048411497731, "grad_norm": 0.9794442057609558, "learning_rate": 2.7950152381680772e-05, "loss": 0.8264, "step": 1799 }, { "epoch": 0.38902096390749946, "grad_norm": 0.8898727297782898, "learning_rate": 2.7937304080762174e-05, "loss": 0.7814, "step": 1800 }, { "epoch": 0.38923708666522583, "grad_norm": 0.9887492656707764, "learning_rate": 2.7924451890582936e-05, "loss": 0.6829, "step": 1801 }, { "epoch": 0.38945320942295225, "grad_norm": 0.8383352756500244, "learning_rate": 2.791159581744058e-05, "loss": 0.8101, "step": 1802 }, { "epoch": 0.3896693321806786, "grad_norm": 0.9079082608222961, "learning_rate": 2.7898735867634567e-05, "loss": 0.9837, "step": 1803 }, { "epoch": 0.38988545493840504, "grad_norm": 0.8341466188430786, "learning_rate": 2.7885872047466236e-05, "loss": 0.8203, "step": 1804 }, { "epoch": 0.3901015776961314, "grad_norm": 0.8649726510047913, "learning_rate": 2.787300436323883e-05, "loss": 0.7397, "step": 1805 }, { "epoch": 0.3903177004538578, "grad_norm": 0.9751591682434082, "learning_rate": 2.7860132821257483e-05, "loss": 0.8808, "step": 1806 }, { "epoch": 0.3905338232115842, "grad_norm": 0.8929107785224915, "learning_rate": 2.7847257427829233e-05, "loss": 0.9881, "step": 1807 }, { "epoch": 0.39074994596931056, "grad_norm": 1.0322059392929077, "learning_rate": 2.783437818926298e-05, "loss": 1.0004, "step": 1808 }, { "epoch": 0.39096606872703693, "grad_norm": 0.8034584522247314, "learning_rate": 2.782149511186952e-05, "loss": 0.6872, "step": 1809 }, { "epoch": 0.39118219148476335, "grad_norm": 0.9873882532119751, "learning_rate": 2.780860820196154e-05, "loss": 0.9788, "step": 1810 }, { "epoch": 0.3913983142424897, "grad_norm": 0.8640844225883484, "learning_rate": 2.7795717465853588e-05, "loss": 0.8948, "step": 1811 }, { "epoch": 0.39161443700021614, "grad_norm": 1.0133180618286133, "learning_rate": 2.7782822909862105e-05, "loss": 1.009, "step": 1812 }, { "epoch": 0.3918305597579425, "grad_norm": 0.8707285523414612, "learning_rate": 2.7769924540305385e-05, "loss": 0.7323, "step": 1813 }, { "epoch": 0.3920466825156689, "grad_norm": 1.0021897554397583, "learning_rate": 2.77570223635036e-05, "loss": 1.042, "step": 1814 }, { "epoch": 0.3922628052733953, "grad_norm": 0.926548182964325, "learning_rate": 2.774411638577879e-05, "loss": 0.9995, "step": 1815 }, { "epoch": 0.39247892803112167, "grad_norm": 0.9106104373931885, "learning_rate": 2.7731206613454853e-05, "loss": 0.8422, "step": 1816 }, { "epoch": 0.3926950507888481, "grad_norm": 0.9673689007759094, "learning_rate": 2.7718293052857545e-05, "loss": 0.919, "step": 1817 }, { "epoch": 0.39291117354657445, "grad_norm": 0.9710796475410461, "learning_rate": 2.7705375710314486e-05, "loss": 0.9097, "step": 1818 }, { "epoch": 0.3931272963043008, "grad_norm": 0.9857740998268127, "learning_rate": 2.7692454592155137e-05, "loss": 1.0333, "step": 1819 }, { "epoch": 0.39334341906202724, "grad_norm": 1.0093835592269897, "learning_rate": 2.7679529704710827e-05, "loss": 0.8549, "step": 1820 }, { "epoch": 0.3935595418197536, "grad_norm": 0.9259928464889526, "learning_rate": 2.7666601054314707e-05, "loss": 1.0608, "step": 1821 }, { "epoch": 0.39377566457748003, "grad_norm": 0.9289209842681885, "learning_rate": 2.7653668647301797e-05, "loss": 0.9653, "step": 1822 }, { "epoch": 0.3939917873352064, "grad_norm": 0.8605603575706482, "learning_rate": 2.7640732490008945e-05, "loss": 1.0109, "step": 1823 }, { "epoch": 0.39420791009293277, "grad_norm": 0.8922584056854248, "learning_rate": 2.7627792588774832e-05, "loss": 0.9341, "step": 1824 }, { "epoch": 0.3944240328506592, "grad_norm": 0.9215991497039795, "learning_rate": 2.7614848949939986e-05, "loss": 0.9281, "step": 1825 }, { "epoch": 0.39464015560838556, "grad_norm": 0.7918894290924072, "learning_rate": 2.760190157984675e-05, "loss": 0.9201, "step": 1826 }, { "epoch": 0.394856278366112, "grad_norm": 0.9827929139137268, "learning_rate": 2.7588950484839324e-05, "loss": 0.9999, "step": 1827 }, { "epoch": 0.39507240112383835, "grad_norm": 1.0141046047210693, "learning_rate": 2.7575995671263695e-05, "loss": 0.9462, "step": 1828 }, { "epoch": 0.3952885238815647, "grad_norm": 0.8736757040023804, "learning_rate": 2.7563037145467705e-05, "loss": 1.0194, "step": 1829 }, { "epoch": 0.39550464663929114, "grad_norm": 0.8712459206581116, "learning_rate": 2.7550074913800995e-05, "loss": 0.6799, "step": 1830 }, { "epoch": 0.3957207693970175, "grad_norm": 0.9101915955543518, "learning_rate": 2.753710898261503e-05, "loss": 0.9883, "step": 1831 }, { "epoch": 0.39593689215474387, "grad_norm": 0.9826453328132629, "learning_rate": 2.7524139358263088e-05, "loss": 1.0092, "step": 1832 }, { "epoch": 0.3961530149124703, "grad_norm": 0.8820707201957703, "learning_rate": 2.7511166047100255e-05, "loss": 1.0001, "step": 1833 }, { "epoch": 0.39636913767019666, "grad_norm": 0.8937494158744812, "learning_rate": 2.7498189055483423e-05, "loss": 0.8805, "step": 1834 }, { "epoch": 0.3965852604279231, "grad_norm": 0.9945870637893677, "learning_rate": 2.748520838977128e-05, "loss": 0.9133, "step": 1835 }, { "epoch": 0.39680138318564945, "grad_norm": 0.8278480768203735, "learning_rate": 2.747222405632434e-05, "loss": 1.0361, "step": 1836 }, { "epoch": 0.3970175059433758, "grad_norm": 0.8340871930122375, "learning_rate": 2.7459236061504873e-05, "loss": 0.8497, "step": 1837 }, { "epoch": 0.39723362870110224, "grad_norm": 0.8968183398246765, "learning_rate": 2.744624441167699e-05, "loss": 0.8952, "step": 1838 }, { "epoch": 0.3974497514588286, "grad_norm": 1.0628737211227417, "learning_rate": 2.743324911320655e-05, "loss": 0.8561, "step": 1839 }, { "epoch": 0.397665874216555, "grad_norm": 0.9170161485671997, "learning_rate": 2.7420250172461233e-05, "loss": 1.0833, "step": 1840 }, { "epoch": 0.3978819969742814, "grad_norm": 0.8254498839378357, "learning_rate": 2.7407247595810486e-05, "loss": 0.8913, "step": 1841 }, { "epoch": 0.39809811973200776, "grad_norm": 0.8792933821678162, "learning_rate": 2.739424138962554e-05, "loss": 0.8642, "step": 1842 }, { "epoch": 0.3983142424897342, "grad_norm": 0.8736568093299866, "learning_rate": 2.7381231560279405e-05, "loss": 0.8703, "step": 1843 }, { "epoch": 0.39853036524746055, "grad_norm": 1.0543543100357056, "learning_rate": 2.7368218114146867e-05, "loss": 1.0008, "step": 1844 }, { "epoch": 0.39874648800518697, "grad_norm": 0.9799116849899292, "learning_rate": 2.735520105760449e-05, "loss": 0.8504, "step": 1845 }, { "epoch": 0.39896261076291334, "grad_norm": 0.8448163866996765, "learning_rate": 2.7342180397030586e-05, "loss": 0.8936, "step": 1846 }, { "epoch": 0.3991787335206397, "grad_norm": 1.048321008682251, "learning_rate": 2.7329156138805273e-05, "loss": 1.1015, "step": 1847 }, { "epoch": 0.39939485627836613, "grad_norm": 0.8891046643257141, "learning_rate": 2.7316128289310386e-05, "loss": 0.8276, "step": 1848 }, { "epoch": 0.3996109790360925, "grad_norm": 0.8857054114341736, "learning_rate": 2.7303096854929553e-05, "loss": 0.9379, "step": 1849 }, { "epoch": 0.39982710179381886, "grad_norm": 1.0203007459640503, "learning_rate": 2.7290061842048143e-05, "loss": 0.9498, "step": 1850 }, { "epoch": 0.4000432245515453, "grad_norm": 0.865835964679718, "learning_rate": 2.7277023257053286e-05, "loss": 0.8314, "step": 1851 }, { "epoch": 0.40025934730927165, "grad_norm": 0.9435209631919861, "learning_rate": 2.726398110633385e-05, "loss": 0.9581, "step": 1852 }, { "epoch": 0.4004754700669981, "grad_norm": 0.9913296699523926, "learning_rate": 2.7250935396280467e-05, "loss": 0.9327, "step": 1853 }, { "epoch": 0.40069159282472444, "grad_norm": 0.885454535484314, "learning_rate": 2.7237886133285504e-05, "loss": 0.7667, "step": 1854 }, { "epoch": 0.4009077155824508, "grad_norm": 0.8463343977928162, "learning_rate": 2.7224833323743064e-05, "loss": 0.8656, "step": 1855 }, { "epoch": 0.40112383834017723, "grad_norm": 1.0554388761520386, "learning_rate": 2.7211776974048997e-05, "loss": 1.0108, "step": 1856 }, { "epoch": 0.4013399610979036, "grad_norm": 1.000020980834961, "learning_rate": 2.7198717090600878e-05, "loss": 0.9093, "step": 1857 }, { "epoch": 0.40155608385563, "grad_norm": 0.7798357009887695, "learning_rate": 2.7185653679798036e-05, "loss": 0.7673, "step": 1858 }, { "epoch": 0.4017722066133564, "grad_norm": 0.959668755531311, "learning_rate": 2.7172586748041494e-05, "loss": 1.021, "step": 1859 }, { "epoch": 0.40198832937108275, "grad_norm": 0.8943659663200378, "learning_rate": 2.715951630173403e-05, "loss": 0.86, "step": 1860 }, { "epoch": 0.4022044521288092, "grad_norm": 0.9070212841033936, "learning_rate": 2.714644234728013e-05, "loss": 0.8823, "step": 1861 }, { "epoch": 0.40242057488653554, "grad_norm": 0.99940425157547, "learning_rate": 2.7133364891085997e-05, "loss": 0.9913, "step": 1862 }, { "epoch": 0.40263669764426196, "grad_norm": 0.9460877776145935, "learning_rate": 2.712028393955956e-05, "loss": 0.9143, "step": 1863 }, { "epoch": 0.40285282040198833, "grad_norm": 0.9539337754249573, "learning_rate": 2.7107199499110448e-05, "loss": 0.9687, "step": 1864 }, { "epoch": 0.4030689431597147, "grad_norm": 0.8600987792015076, "learning_rate": 2.7094111576150014e-05, "loss": 0.8365, "step": 1865 }, { "epoch": 0.4032850659174411, "grad_norm": 1.0653421878814697, "learning_rate": 2.7081020177091304e-05, "loss": 1.046, "step": 1866 }, { "epoch": 0.4035011886751675, "grad_norm": 0.8216643929481506, "learning_rate": 2.7067925308349084e-05, "loss": 0.7576, "step": 1867 }, { "epoch": 0.4037173114328939, "grad_norm": 0.9096048474311829, "learning_rate": 2.7054826976339793e-05, "loss": 0.9487, "step": 1868 }, { "epoch": 0.4039334341906203, "grad_norm": 0.8275030851364136, "learning_rate": 2.7041725187481592e-05, "loss": 0.9392, "step": 1869 }, { "epoch": 0.40414955694834664, "grad_norm": 0.9473551511764526, "learning_rate": 2.7028619948194332e-05, "loss": 1.0163, "step": 1870 }, { "epoch": 0.40436567970607307, "grad_norm": 1.0237468481063843, "learning_rate": 2.7015511264899545e-05, "loss": 0.9627, "step": 1871 }, { "epoch": 0.40458180246379943, "grad_norm": 0.910361111164093, "learning_rate": 2.700239914402045e-05, "loss": 1.0283, "step": 1872 }, { "epoch": 0.4047979252215258, "grad_norm": 0.9893791675567627, "learning_rate": 2.698928359198197e-05, "loss": 0.8604, "step": 1873 }, { "epoch": 0.4050140479792522, "grad_norm": 0.91969233751297, "learning_rate": 2.697616461521068e-05, "loss": 0.9786, "step": 1874 }, { "epoch": 0.4052301707369786, "grad_norm": 0.8568896055221558, "learning_rate": 2.696304222013486e-05, "loss": 1.0031, "step": 1875 }, { "epoch": 0.405446293494705, "grad_norm": 0.8218428492546082, "learning_rate": 2.694991641318445e-05, "loss": 0.9537, "step": 1876 }, { "epoch": 0.4056624162524314, "grad_norm": 0.833819568157196, "learning_rate": 2.693678720079105e-05, "loss": 0.697, "step": 1877 }, { "epoch": 0.40587853901015775, "grad_norm": 0.9879897832870483, "learning_rate": 2.6923654589387976e-05, "loss": 0.9709, "step": 1878 }, { "epoch": 0.40609466176788417, "grad_norm": 1.0249334573745728, "learning_rate": 2.6910518585410144e-05, "loss": 0.7457, "step": 1879 }, { "epoch": 0.40631078452561054, "grad_norm": 0.8635609745979309, "learning_rate": 2.6897379195294187e-05, "loss": 0.8512, "step": 1880 }, { "epoch": 0.40652690728333696, "grad_norm": 1.0228142738342285, "learning_rate": 2.6884236425478366e-05, "loss": 0.9787, "step": 1881 }, { "epoch": 0.4067430300410633, "grad_norm": 1.288407325744629, "learning_rate": 2.6871090282402614e-05, "loss": 0.9159, "step": 1882 }, { "epoch": 0.4069591527987897, "grad_norm": 0.9210023283958435, "learning_rate": 2.6857940772508504e-05, "loss": 1.0713, "step": 1883 }, { "epoch": 0.4071752755565161, "grad_norm": 0.9451961517333984, "learning_rate": 2.684478790223927e-05, "loss": 1.0474, "step": 1884 }, { "epoch": 0.4073913983142425, "grad_norm": 0.9673775434494019, "learning_rate": 2.6831631678039785e-05, "loss": 0.8909, "step": 1885 }, { "epoch": 0.4076075210719689, "grad_norm": 1.0196783542633057, "learning_rate": 2.681847210635657e-05, "loss": 0.988, "step": 1886 }, { "epoch": 0.40782364382969527, "grad_norm": 0.864243745803833, "learning_rate": 2.6805309193637793e-05, "loss": 0.94, "step": 1887 }, { "epoch": 0.40803976658742164, "grad_norm": 0.9797789454460144, "learning_rate": 2.6792142946333227e-05, "loss": 0.8806, "step": 1888 }, { "epoch": 0.40825588934514806, "grad_norm": 0.9118221402168274, "learning_rate": 2.6778973370894327e-05, "loss": 0.8709, "step": 1889 }, { "epoch": 0.4084720121028744, "grad_norm": 0.928864598274231, "learning_rate": 2.676580047377415e-05, "loss": 0.8471, "step": 1890 }, { "epoch": 0.4086881348606008, "grad_norm": 0.9791908860206604, "learning_rate": 2.675262426142738e-05, "loss": 1.0364, "step": 1891 }, { "epoch": 0.4089042576183272, "grad_norm": 1.1222354173660278, "learning_rate": 2.673944474031033e-05, "loss": 1.0092, "step": 1892 }, { "epoch": 0.4091203803760536, "grad_norm": 0.8850167989730835, "learning_rate": 2.6726261916880933e-05, "loss": 0.8162, "step": 1893 }, { "epoch": 0.40933650313378, "grad_norm": 0.9502853751182556, "learning_rate": 2.671307579759875e-05, "loss": 0.9675, "step": 1894 }, { "epoch": 0.40955262589150637, "grad_norm": 0.8617500066757202, "learning_rate": 2.6699886388924942e-05, "loss": 0.7554, "step": 1895 }, { "epoch": 0.40976874864923274, "grad_norm": 0.9188858270645142, "learning_rate": 2.6686693697322294e-05, "loss": 0.7892, "step": 1896 }, { "epoch": 0.40998487140695916, "grad_norm": 1.0231481790542603, "learning_rate": 2.6673497729255188e-05, "loss": 0.9294, "step": 1897 }, { "epoch": 0.41020099416468553, "grad_norm": 0.9644643068313599, "learning_rate": 2.666029849118963e-05, "loss": 1.0355, "step": 1898 }, { "epoch": 0.41041711692241195, "grad_norm": 0.8928436040878296, "learning_rate": 2.6647095989593194e-05, "loss": 0.8279, "step": 1899 }, { "epoch": 0.4106332396801383, "grad_norm": 0.9390683770179749, "learning_rate": 2.66338902309351e-05, "loss": 0.8059, "step": 1900 }, { "epoch": 0.4108493624378647, "grad_norm": 0.8569971919059753, "learning_rate": 2.6620681221686126e-05, "loss": 0.9673, "step": 1901 }, { "epoch": 0.4110654851955911, "grad_norm": 0.8158060312271118, "learning_rate": 2.6607468968318655e-05, "loss": 0.9587, "step": 1902 }, { "epoch": 0.4112816079533175, "grad_norm": 0.9675447344779968, "learning_rate": 2.6594253477306663e-05, "loss": 0.7087, "step": 1903 }, { "epoch": 0.4114977307110439, "grad_norm": 0.87473464012146, "learning_rate": 2.6581034755125713e-05, "loss": 1.0256, "step": 1904 }, { "epoch": 0.41171385346877026, "grad_norm": 1.0762571096420288, "learning_rate": 2.656781280825295e-05, "loss": 0.9579, "step": 1905 }, { "epoch": 0.41192997622649663, "grad_norm": 1.1155767440795898, "learning_rate": 2.6554587643167088e-05, "loss": 0.8231, "step": 1906 }, { "epoch": 0.41214609898422305, "grad_norm": 0.9181614518165588, "learning_rate": 2.6541359266348437e-05, "loss": 0.9439, "step": 1907 }, { "epoch": 0.4123622217419494, "grad_norm": 1.0097953081130981, "learning_rate": 2.6528127684278858e-05, "loss": 1.0552, "step": 1908 }, { "epoch": 0.41257834449967584, "grad_norm": 0.8957809805870056, "learning_rate": 2.6514892903441815e-05, "loss": 0.963, "step": 1909 }, { "epoch": 0.4127944672574022, "grad_norm": 0.9842140674591064, "learning_rate": 2.650165493032231e-05, "loss": 0.9449, "step": 1910 }, { "epoch": 0.4130105900151286, "grad_norm": 0.9267460703849792, "learning_rate": 2.6488413771406913e-05, "loss": 0.9907, "step": 1911 }, { "epoch": 0.413226712772855, "grad_norm": 1.0374408960342407, "learning_rate": 2.6475169433183768e-05, "loss": 1.0192, "step": 1912 }, { "epoch": 0.41344283553058137, "grad_norm": 0.7625603079795837, "learning_rate": 2.6461921922142573e-05, "loss": 0.7252, "step": 1913 }, { "epoch": 0.41365895828830773, "grad_norm": 0.9079304337501526, "learning_rate": 2.6448671244774572e-05, "loss": 0.8406, "step": 1914 }, { "epoch": 0.41387508104603415, "grad_norm": 0.9198092222213745, "learning_rate": 2.643541740757256e-05, "loss": 0.852, "step": 1915 }, { "epoch": 0.4140912038037605, "grad_norm": 0.8380274176597595, "learning_rate": 2.6422160417030908e-05, "loss": 0.9572, "step": 1916 }, { "epoch": 0.41430732656148694, "grad_norm": 0.9923501014709473, "learning_rate": 2.640890027964549e-05, "loss": 1.0269, "step": 1917 }, { "epoch": 0.4145234493192133, "grad_norm": 0.9316266775131226, "learning_rate": 2.6395637001913752e-05, "loss": 0.9294, "step": 1918 }, { "epoch": 0.4147395720769397, "grad_norm": 0.8916770219802856, "learning_rate": 2.6382370590334664e-05, "loss": 0.876, "step": 1919 }, { "epoch": 0.4149556948346661, "grad_norm": 0.9091696739196777, "learning_rate": 2.6369101051408748e-05, "loss": 0.8264, "step": 1920 }, { "epoch": 0.41517181759239247, "grad_norm": 0.9155973792076111, "learning_rate": 2.6355828391638036e-05, "loss": 0.9626, "step": 1921 }, { "epoch": 0.4153879403501189, "grad_norm": 0.9353761672973633, "learning_rate": 2.6342552617526104e-05, "loss": 1.0452, "step": 1922 }, { "epoch": 0.41560406310784526, "grad_norm": 0.8498687148094177, "learning_rate": 2.6329273735578053e-05, "loss": 0.8687, "step": 1923 }, { "epoch": 0.4158201858655716, "grad_norm": 0.895742654800415, "learning_rate": 2.6315991752300503e-05, "loss": 0.9839, "step": 1924 }, { "epoch": 0.41603630862329805, "grad_norm": 0.9112388491630554, "learning_rate": 2.630270667420159e-05, "loss": 0.8423, "step": 1925 }, { "epoch": 0.4162524313810244, "grad_norm": 0.8951870203018188, "learning_rate": 2.628941850779098e-05, "loss": 0.8704, "step": 1926 }, { "epoch": 0.41646855413875083, "grad_norm": 0.9525790214538574, "learning_rate": 2.6276127259579848e-05, "loss": 0.9369, "step": 1927 }, { "epoch": 0.4166846768964772, "grad_norm": 0.8421275615692139, "learning_rate": 2.626283293608085e-05, "loss": 1.0124, "step": 1928 }, { "epoch": 0.41690079965420357, "grad_norm": 0.9734429717063904, "learning_rate": 2.6249535543808202e-05, "loss": 0.9653, "step": 1929 }, { "epoch": 0.41711692241193, "grad_norm": 0.9232828617095947, "learning_rate": 2.623623508927758e-05, "loss": 0.9472, "step": 1930 }, { "epoch": 0.41733304516965636, "grad_norm": 0.9370399117469788, "learning_rate": 2.622293157900619e-05, "loss": 0.885, "step": 1931 }, { "epoch": 0.4175491679273828, "grad_norm": 1.0074236392974854, "learning_rate": 2.62096250195127e-05, "loss": 1.1915, "step": 1932 }, { "epoch": 0.41776529068510915, "grad_norm": 0.884238600730896, "learning_rate": 2.6196315417317314e-05, "loss": 0.8891, "step": 1933 }, { "epoch": 0.4179814134428355, "grad_norm": 0.8914430737495422, "learning_rate": 2.6183002778941692e-05, "loss": 0.743, "step": 1934 }, { "epoch": 0.41819753620056194, "grad_norm": 0.7857561707496643, "learning_rate": 2.6169687110909002e-05, "loss": 0.8604, "step": 1935 }, { "epoch": 0.4184136589582883, "grad_norm": 1.0400208234786987, "learning_rate": 2.6156368419743892e-05, "loss": 0.7448, "step": 1936 }, { "epoch": 0.41862978171601467, "grad_norm": 0.8496856093406677, "learning_rate": 2.614304671197248e-05, "loss": 0.8913, "step": 1937 }, { "epoch": 0.4188459044737411, "grad_norm": 0.9101283550262451, "learning_rate": 2.612972199412239e-05, "loss": 0.7999, "step": 1938 }, { "epoch": 0.41906202723146746, "grad_norm": 0.974231481552124, "learning_rate": 2.6116394272722688e-05, "loss": 1.0992, "step": 1939 }, { "epoch": 0.4192781499891939, "grad_norm": 0.9200551509857178, "learning_rate": 2.6103063554303934e-05, "loss": 0.8596, "step": 1940 }, { "epoch": 0.41949427274692025, "grad_norm": 0.7142768502235413, "learning_rate": 2.6089729845398144e-05, "loss": 0.854, "step": 1941 }, { "epoch": 0.4197103955046466, "grad_norm": 1.0593105554580688, "learning_rate": 2.6076393152538807e-05, "loss": 1.0074, "step": 1942 }, { "epoch": 0.41992651826237304, "grad_norm": 0.9730693101882935, "learning_rate": 2.606305348226087e-05, "loss": 0.8169, "step": 1943 }, { "epoch": 0.4201426410200994, "grad_norm": 1.0784400701522827, "learning_rate": 2.604971084110075e-05, "loss": 0.9604, "step": 1944 }, { "epoch": 0.42035876377782583, "grad_norm": 0.7956601977348328, "learning_rate": 2.6036365235596296e-05, "loss": 0.8099, "step": 1945 }, { "epoch": 0.4205748865355522, "grad_norm": 0.8289860486984253, "learning_rate": 2.602301667228683e-05, "loss": 0.7475, "step": 1946 }, { "epoch": 0.42079100929327856, "grad_norm": 0.9040570259094238, "learning_rate": 2.6009665157713127e-05, "loss": 0.885, "step": 1947 }, { "epoch": 0.421007132051005, "grad_norm": 0.9660112857818604, "learning_rate": 2.5996310698417376e-05, "loss": 0.8363, "step": 1948 }, { "epoch": 0.42122325480873135, "grad_norm": 0.9220053553581238, "learning_rate": 2.5982953300943254e-05, "loss": 0.7915, "step": 1949 }, { "epoch": 0.4214393775664578, "grad_norm": 0.9023492336273193, "learning_rate": 2.5969592971835836e-05, "loss": 0.9589, "step": 1950 }, { "epoch": 0.42165550032418414, "grad_norm": 0.8304703831672668, "learning_rate": 2.595622971764167e-05, "loss": 0.9593, "step": 1951 }, { "epoch": 0.4218716230819105, "grad_norm": 0.9802231788635254, "learning_rate": 2.594286354490871e-05, "loss": 1.0522, "step": 1952 }, { "epoch": 0.42208774583963693, "grad_norm": 1.0685917139053345, "learning_rate": 2.592949446018635e-05, "loss": 0.7953, "step": 1953 }, { "epoch": 0.4223038685973633, "grad_norm": 0.9584881067276001, "learning_rate": 2.5916122470025414e-05, "loss": 1.0165, "step": 1954 }, { "epoch": 0.42251999135508966, "grad_norm": 0.8616102933883667, "learning_rate": 2.5902747580978137e-05, "loss": 0.7753, "step": 1955 }, { "epoch": 0.4227361141128161, "grad_norm": 0.885311484336853, "learning_rate": 2.5889369799598196e-05, "loss": 0.8329, "step": 1956 }, { "epoch": 0.42295223687054245, "grad_norm": 0.8231478929519653, "learning_rate": 2.5875989132440663e-05, "loss": 0.96, "step": 1957 }, { "epoch": 0.4231683596282689, "grad_norm": 1.279319405555725, "learning_rate": 2.5862605586062044e-05, "loss": 1.0165, "step": 1958 }, { "epoch": 0.42338448238599524, "grad_norm": 0.7875201106071472, "learning_rate": 2.5849219167020235e-05, "loss": 1.0049, "step": 1959 }, { "epoch": 0.4236006051437216, "grad_norm": 0.9663813710212708, "learning_rate": 2.583582988187456e-05, "loss": 0.8889, "step": 1960 }, { "epoch": 0.42381672790144803, "grad_norm": 0.9025231599807739, "learning_rate": 2.582243773718573e-05, "loss": 0.9526, "step": 1961 }, { "epoch": 0.4240328506591744, "grad_norm": 0.9170035719871521, "learning_rate": 2.5809042739515872e-05, "loss": 0.8904, "step": 1962 }, { "epoch": 0.4242489734169008, "grad_norm": 0.9558761715888977, "learning_rate": 2.5795644895428494e-05, "loss": 0.829, "step": 1963 }, { "epoch": 0.4244650961746272, "grad_norm": 1.0170693397521973, "learning_rate": 2.578224421148852e-05, "loss": 1.1339, "step": 1964 }, { "epoch": 0.42468121893235355, "grad_norm": 0.9244565963745117, "learning_rate": 2.5768840694262247e-05, "loss": 1.1144, "step": 1965 }, { "epoch": 0.42489734169008, "grad_norm": 0.8529707193374634, "learning_rate": 2.5755434350317367e-05, "loss": 0.9846, "step": 1966 }, { "epoch": 0.42511346444780634, "grad_norm": 0.8597925901412964, "learning_rate": 2.574202518622297e-05, "loss": 0.9885, "step": 1967 }, { "epoch": 0.42532958720553277, "grad_norm": 0.9960910081863403, "learning_rate": 2.5728613208549495e-05, "loss": 0.8757, "step": 1968 }, { "epoch": 0.42554570996325913, "grad_norm": 0.9559326767921448, "learning_rate": 2.5715198423868794e-05, "loss": 0.9808, "step": 1969 }, { "epoch": 0.4257618327209855, "grad_norm": 1.0519607067108154, "learning_rate": 2.5701780838754075e-05, "loss": 1.0984, "step": 1970 }, { "epoch": 0.4259779554787119, "grad_norm": 0.9505062103271484, "learning_rate": 2.568836045977993e-05, "loss": 0.9912, "step": 1971 }, { "epoch": 0.4261940782364383, "grad_norm": 0.9739280939102173, "learning_rate": 2.5674937293522305e-05, "loss": 1.039, "step": 1972 }, { "epoch": 0.4264102009941647, "grad_norm": 0.9407761096954346, "learning_rate": 2.566151134655853e-05, "loss": 0.8215, "step": 1973 }, { "epoch": 0.4266263237518911, "grad_norm": 0.9971686005592346, "learning_rate": 2.564808262546728e-05, "loss": 1.0354, "step": 1974 }, { "epoch": 0.42684244650961745, "grad_norm": 0.8059692978858948, "learning_rate": 2.5634651136828597e-05, "loss": 0.9137, "step": 1975 }, { "epoch": 0.42705856926734387, "grad_norm": 0.9364917278289795, "learning_rate": 2.5621216887223886e-05, "loss": 1.1067, "step": 1976 }, { "epoch": 0.42727469202507024, "grad_norm": 0.8833372592926025, "learning_rate": 2.560777988323589e-05, "loss": 1.0041, "step": 1977 }, { "epoch": 0.4274908147827966, "grad_norm": 0.9525211453437805, "learning_rate": 2.559434013144872e-05, "loss": 1.0978, "step": 1978 }, { "epoch": 0.427706937540523, "grad_norm": 1.0457853078842163, "learning_rate": 2.5580897638447814e-05, "loss": 0.881, "step": 1979 }, { "epoch": 0.4279230602982494, "grad_norm": 0.9005357027053833, "learning_rate": 2.5567452410819966e-05, "loss": 0.9099, "step": 1980 }, { "epoch": 0.4281391830559758, "grad_norm": 0.9874290227890015, "learning_rate": 2.555400445515331e-05, "loss": 0.9065, "step": 1981 }, { "epoch": 0.4283553058137022, "grad_norm": 0.8200092911720276, "learning_rate": 2.554055377803731e-05, "loss": 0.6178, "step": 1982 }, { "epoch": 0.42857142857142855, "grad_norm": 1.066659688949585, "learning_rate": 2.552710038606277e-05, "loss": 0.9835, "step": 1983 }, { "epoch": 0.42878755132915497, "grad_norm": 0.9795448780059814, "learning_rate": 2.551364428582181e-05, "loss": 0.9276, "step": 1984 }, { "epoch": 0.42900367408688134, "grad_norm": 0.9300527572631836, "learning_rate": 2.55001854839079e-05, "loss": 0.9971, "step": 1985 }, { "epoch": 0.42921979684460776, "grad_norm": 0.771075963973999, "learning_rate": 2.548672398691581e-05, "loss": 0.7037, "step": 1986 }, { "epoch": 0.4294359196023341, "grad_norm": 0.8233857750892639, "learning_rate": 2.5473259801441663e-05, "loss": 0.9341, "step": 1987 }, { "epoch": 0.4296520423600605, "grad_norm": 1.0577247142791748, "learning_rate": 2.5459792934082853e-05, "loss": 1.0113, "step": 1988 }, { "epoch": 0.4298681651177869, "grad_norm": 0.9196573495864868, "learning_rate": 2.5446323391438133e-05, "loss": 0.822, "step": 1989 }, { "epoch": 0.4300842878755133, "grad_norm": 0.8457186818122864, "learning_rate": 2.5432851180107544e-05, "loss": 0.8506, "step": 1990 }, { "epoch": 0.4303004106332397, "grad_norm": 0.8600859045982361, "learning_rate": 2.5419376306692433e-05, "loss": 0.9624, "step": 1991 }, { "epoch": 0.43051653339096607, "grad_norm": 0.9862504601478577, "learning_rate": 2.540589877779546e-05, "loss": 0.9211, "step": 1992 }, { "epoch": 0.43073265614869244, "grad_norm": 0.8851495385169983, "learning_rate": 2.539241860002058e-05, "loss": 0.9876, "step": 1993 }, { "epoch": 0.43094877890641886, "grad_norm": 0.9032397270202637, "learning_rate": 2.537893577997305e-05, "loss": 0.9057, "step": 1994 }, { "epoch": 0.43116490166414523, "grad_norm": 0.8996214270591736, "learning_rate": 2.5365450324259424e-05, "loss": 1.0677, "step": 1995 }, { "epoch": 0.43138102442187165, "grad_norm": 0.9084228277206421, "learning_rate": 2.5351962239487548e-05, "loss": 0.9358, "step": 1996 }, { "epoch": 0.431597147179598, "grad_norm": 1.0036503076553345, "learning_rate": 2.5338471532266534e-05, "loss": 0.9952, "step": 1997 }, { "epoch": 0.4318132699373244, "grad_norm": 0.893693208694458, "learning_rate": 2.532497820920682e-05, "loss": 0.9576, "step": 1998 }, { "epoch": 0.4320293926950508, "grad_norm": 0.9255844354629517, "learning_rate": 2.531148227692009e-05, "loss": 0.9368, "step": 1999 }, { "epoch": 0.4322455154527772, "grad_norm": 0.9848679304122925, "learning_rate": 2.5297983742019325e-05, "loss": 1.0371, "step": 2000 }, { "epoch": 0.43246163821050354, "grad_norm": 0.9268440008163452, "learning_rate": 2.5284482611118773e-05, "loss": 0.8615, "step": 2001 }, { "epoch": 0.43267776096822996, "grad_norm": 0.8308999538421631, "learning_rate": 2.5270978890833955e-05, "loss": 0.886, "step": 2002 }, { "epoch": 0.43289388372595633, "grad_norm": 0.9142823815345764, "learning_rate": 2.525747258778167e-05, "loss": 0.9298, "step": 2003 }, { "epoch": 0.43311000648368275, "grad_norm": 0.9102539420127869, "learning_rate": 2.5243963708579964e-05, "loss": 0.9284, "step": 2004 }, { "epoch": 0.4333261292414091, "grad_norm": 0.8535233736038208, "learning_rate": 2.5230452259848167e-05, "loss": 0.8829, "step": 2005 }, { "epoch": 0.4335422519991355, "grad_norm": 0.9500638246536255, "learning_rate": 2.5216938248206847e-05, "loss": 0.9293, "step": 2006 }, { "epoch": 0.4337583747568619, "grad_norm": 0.9432377815246582, "learning_rate": 2.520342168027786e-05, "loss": 0.8627, "step": 2007 }, { "epoch": 0.4339744975145883, "grad_norm": 0.8626837134361267, "learning_rate": 2.5189902562684268e-05, "loss": 0.9437, "step": 2008 }, { "epoch": 0.4341906202723147, "grad_norm": 0.9086456894874573, "learning_rate": 2.5176380902050418e-05, "loss": 0.9182, "step": 2009 }, { "epoch": 0.43440674303004106, "grad_norm": 0.9128880500793457, "learning_rate": 2.5162856705001892e-05, "loss": 0.8335, "step": 2010 }, { "epoch": 0.43462286578776743, "grad_norm": 0.933714747428894, "learning_rate": 2.514932997816552e-05, "loss": 1.0162, "step": 2011 }, { "epoch": 0.43483898854549385, "grad_norm": 0.9576535820960999, "learning_rate": 2.5135800728169357e-05, "loss": 1.0565, "step": 2012 }, { "epoch": 0.4350551113032202, "grad_norm": 1.1083024740219116, "learning_rate": 2.512226896164271e-05, "loss": 0.9044, "step": 2013 }, { "epoch": 0.43527123406094664, "grad_norm": 0.7745140790939331, "learning_rate": 2.5108734685216117e-05, "loss": 0.7354, "step": 2014 }, { "epoch": 0.435487356818673, "grad_norm": 1.1194214820861816, "learning_rate": 2.509519790552133e-05, "loss": 0.923, "step": 2015 }, { "epoch": 0.4357034795763994, "grad_norm": 0.9793070554733276, "learning_rate": 2.5081658629191353e-05, "loss": 1.0025, "step": 2016 }, { "epoch": 0.4359196023341258, "grad_norm": 1.0025380849838257, "learning_rate": 2.5068116862860397e-05, "loss": 1.009, "step": 2017 }, { "epoch": 0.43613572509185217, "grad_norm": 0.9246045351028442, "learning_rate": 2.505457261316389e-05, "loss": 1.0137, "step": 2018 }, { "epoch": 0.43635184784957853, "grad_norm": 0.8660938143730164, "learning_rate": 2.504102588673849e-05, "loss": 0.9983, "step": 2019 }, { "epoch": 0.43656797060730496, "grad_norm": 0.9384306073188782, "learning_rate": 2.5027476690222058e-05, "loss": 0.9404, "step": 2020 }, { "epoch": 0.4367840933650313, "grad_norm": 1.1287697553634644, "learning_rate": 2.501392503025367e-05, "loss": 0.9825, "step": 2021 }, { "epoch": 0.43700021612275775, "grad_norm": 0.9511003494262695, "learning_rate": 2.5000370913473605e-05, "loss": 0.839, "step": 2022 }, { "epoch": 0.4372163388804841, "grad_norm": 0.9103716015815735, "learning_rate": 2.498681434652335e-05, "loss": 0.9403, "step": 2023 }, { "epoch": 0.4374324616382105, "grad_norm": 0.9426201581954956, "learning_rate": 2.4973255336045597e-05, "loss": 1.0598, "step": 2024 }, { "epoch": 0.4376485843959369, "grad_norm": 0.8501827120780945, "learning_rate": 2.4959693888684226e-05, "loss": 0.9278, "step": 2025 }, { "epoch": 0.43786470715366327, "grad_norm": 0.9373735785484314, "learning_rate": 2.494613001108431e-05, "loss": 0.9727, "step": 2026 }, { "epoch": 0.4380808299113897, "grad_norm": 1.0211824178695679, "learning_rate": 2.493256370989213e-05, "loss": 1.0724, "step": 2027 }, { "epoch": 0.43829695266911606, "grad_norm": 0.9939093589782715, "learning_rate": 2.4918994991755126e-05, "loss": 0.9271, "step": 2028 }, { "epoch": 0.4385130754268424, "grad_norm": 0.9821503162384033, "learning_rate": 2.4905423863321953e-05, "loss": 0.8327, "step": 2029 }, { "epoch": 0.43872919818456885, "grad_norm": 0.7790869474411011, "learning_rate": 2.4891850331242427e-05, "loss": 0.8875, "step": 2030 }, { "epoch": 0.4389453209422952, "grad_norm": 0.971564531326294, "learning_rate": 2.4878274402167544e-05, "loss": 0.9534, "step": 2031 }, { "epoch": 0.43916144370002164, "grad_norm": 0.8810921311378479, "learning_rate": 2.4864696082749483e-05, "loss": 0.8962, "step": 2032 }, { "epoch": 0.439377566457748, "grad_norm": 1.0457826852798462, "learning_rate": 2.4851115379641588e-05, "loss": 1.0562, "step": 2033 }, { "epoch": 0.43959368921547437, "grad_norm": 0.8541572093963623, "learning_rate": 2.4837532299498366e-05, "loss": 0.7529, "step": 2034 }, { "epoch": 0.4398098119732008, "grad_norm": 1.006701111793518, "learning_rate": 2.4823946848975503e-05, "loss": 1.0355, "step": 2035 }, { "epoch": 0.44002593473092716, "grad_norm": 0.8878010511398315, "learning_rate": 2.481035903472984e-05, "loss": 0.8281, "step": 2036 }, { "epoch": 0.4402420574886536, "grad_norm": 1.10139000415802, "learning_rate": 2.4796768863419364e-05, "loss": 0.9031, "step": 2037 }, { "epoch": 0.44045818024637995, "grad_norm": 0.9112840294837952, "learning_rate": 2.4783176341703233e-05, "loss": 0.8575, "step": 2038 }, { "epoch": 0.4406743030041063, "grad_norm": 0.9776943922042847, "learning_rate": 2.4769581476241756e-05, "loss": 0.8534, "step": 2039 }, { "epoch": 0.44089042576183274, "grad_norm": 0.8622772693634033, "learning_rate": 2.4755984273696377e-05, "loss": 0.8593, "step": 2040 }, { "epoch": 0.4411065485195591, "grad_norm": 0.7838684320449829, "learning_rate": 2.4742384740729704e-05, "loss": 0.952, "step": 2041 }, { "epoch": 0.44132267127728547, "grad_norm": 0.982847273349762, "learning_rate": 2.4728782884005465e-05, "loss": 0.8801, "step": 2042 }, { "epoch": 0.4415387940350119, "grad_norm": 0.8614881038665771, "learning_rate": 2.471517871018855e-05, "loss": 0.8426, "step": 2043 }, { "epoch": 0.44175491679273826, "grad_norm": 0.9391106367111206, "learning_rate": 2.4701572225944962e-05, "loss": 0.9851, "step": 2044 }, { "epoch": 0.4419710395504647, "grad_norm": 1.0585438013076782, "learning_rate": 2.4687963437941855e-05, "loss": 1.0897, "step": 2045 }, { "epoch": 0.44218716230819105, "grad_norm": 0.9564892649650574, "learning_rate": 2.4674352352847492e-05, "loss": 0.9417, "step": 2046 }, { "epoch": 0.4424032850659174, "grad_norm": 0.9705290198326111, "learning_rate": 2.4660738977331297e-05, "loss": 0.9973, "step": 2047 }, { "epoch": 0.44261940782364384, "grad_norm": 0.9146240949630737, "learning_rate": 2.4647123318063758e-05, "loss": 0.8612, "step": 2048 }, { "epoch": 0.4428355305813702, "grad_norm": 0.8360450863838196, "learning_rate": 2.463350538171655e-05, "loss": 0.7957, "step": 2049 }, { "epoch": 0.44305165333909663, "grad_norm": 0.8925408720970154, "learning_rate": 2.4619885174962414e-05, "loss": 0.9586, "step": 2050 }, { "epoch": 0.443267776096823, "grad_norm": 1.1791043281555176, "learning_rate": 2.460626270447522e-05, "loss": 1.1034, "step": 2051 }, { "epoch": 0.44348389885454936, "grad_norm": 0.8828898668289185, "learning_rate": 2.4592637976929946e-05, "loss": 0.8665, "step": 2052 }, { "epoch": 0.4437000216122758, "grad_norm": 0.9726607799530029, "learning_rate": 2.4579010999002683e-05, "loss": 1.0096, "step": 2053 }, { "epoch": 0.44391614437000215, "grad_norm": 0.9190080761909485, "learning_rate": 2.4565381777370618e-05, "loss": 0.7618, "step": 2054 }, { "epoch": 0.4441322671277286, "grad_norm": 0.9725461006164551, "learning_rate": 2.4551750318712027e-05, "loss": 0.8907, "step": 2055 }, { "epoch": 0.44434838988545494, "grad_norm": 0.8990496397018433, "learning_rate": 2.4538116629706314e-05, "loss": 0.8768, "step": 2056 }, { "epoch": 0.4445645126431813, "grad_norm": 1.0470227003097534, "learning_rate": 2.4524480717033936e-05, "loss": 0.8767, "step": 2057 }, { "epoch": 0.44478063540090773, "grad_norm": 0.9046476483345032, "learning_rate": 2.4510842587376465e-05, "loss": 0.9632, "step": 2058 }, { "epoch": 0.4449967581586341, "grad_norm": 0.8195945620536804, "learning_rate": 2.4497202247416557e-05, "loss": 0.8968, "step": 2059 }, { "epoch": 0.44521288091636047, "grad_norm": 0.9423304796218872, "learning_rate": 2.4483559703837943e-05, "loss": 0.8321, "step": 2060 }, { "epoch": 0.4454290036740869, "grad_norm": 0.9971717000007629, "learning_rate": 2.4469914963325444e-05, "loss": 0.7434, "step": 2061 }, { "epoch": 0.44564512643181325, "grad_norm": 0.8343095183372498, "learning_rate": 2.4456268032564935e-05, "loss": 0.7801, "step": 2062 }, { "epoch": 0.4458612491895397, "grad_norm": 0.9570486545562744, "learning_rate": 2.4442618918243398e-05, "loss": 1.0419, "step": 2063 }, { "epoch": 0.44607737194726604, "grad_norm": 0.9690583944320679, "learning_rate": 2.4428967627048857e-05, "loss": 0.8199, "step": 2064 }, { "epoch": 0.4462934947049924, "grad_norm": 0.8820160031318665, "learning_rate": 2.4415314165670423e-05, "loss": 0.8887, "step": 2065 }, { "epoch": 0.44650961746271883, "grad_norm": 0.9160499572753906, "learning_rate": 2.4401658540798247e-05, "loss": 0.7878, "step": 2066 }, { "epoch": 0.4467257402204452, "grad_norm": 0.9459227919578552, "learning_rate": 2.4388000759123573e-05, "loss": 0.8063, "step": 2067 }, { "epoch": 0.4469418629781716, "grad_norm": 0.8146785497665405, "learning_rate": 2.4374340827338653e-05, "loss": 0.7571, "step": 2068 }, { "epoch": 0.447157985735898, "grad_norm": 0.9509387612342834, "learning_rate": 2.4360678752136853e-05, "loss": 1.1469, "step": 2069 }, { "epoch": 0.44737410849362436, "grad_norm": 0.8361994028091431, "learning_rate": 2.4347014540212546e-05, "loss": 0.8913, "step": 2070 }, { "epoch": 0.4475902312513508, "grad_norm": 0.8761122822761536, "learning_rate": 2.4333348198261154e-05, "loss": 0.9557, "step": 2071 }, { "epoch": 0.44780635400907715, "grad_norm": 0.8106732964515686, "learning_rate": 2.4319679732979164e-05, "loss": 0.8319, "step": 2072 }, { "epoch": 0.44802247676680357, "grad_norm": 0.9187236428260803, "learning_rate": 2.4306009151064096e-05, "loss": 0.7749, "step": 2073 }, { "epoch": 0.44823859952452993, "grad_norm": 0.8334870934486389, "learning_rate": 2.4292336459214497e-05, "loss": 0.8025, "step": 2074 }, { "epoch": 0.4484547222822563, "grad_norm": 0.8454905152320862, "learning_rate": 2.427866166412995e-05, "loss": 0.8934, "step": 2075 }, { "epoch": 0.4486708450399827, "grad_norm": 0.9709123969078064, "learning_rate": 2.4264984772511085e-05, "loss": 0.958, "step": 2076 }, { "epoch": 0.4488869677977091, "grad_norm": 0.89371258020401, "learning_rate": 2.4251305791059533e-05, "loss": 0.8512, "step": 2077 }, { "epoch": 0.4491030905554355, "grad_norm": 0.8887743949890137, "learning_rate": 2.4237624726477976e-05, "loss": 0.8881, "step": 2078 }, { "epoch": 0.4493192133131619, "grad_norm": 0.808680534362793, "learning_rate": 2.4223941585470104e-05, "loss": 0.9046, "step": 2079 }, { "epoch": 0.44953533607088825, "grad_norm": 0.913142204284668, "learning_rate": 2.421025637474061e-05, "loss": 1.0584, "step": 2080 }, { "epoch": 0.44975145882861467, "grad_norm": 0.9678011536598206, "learning_rate": 2.4196569100995228e-05, "loss": 0.9738, "step": 2081 }, { "epoch": 0.44996758158634104, "grad_norm": 0.8861469030380249, "learning_rate": 2.4182879770940692e-05, "loss": 0.8452, "step": 2082 }, { "epoch": 0.4501837043440674, "grad_norm": 1.0149720907211304, "learning_rate": 2.4169188391284735e-05, "loss": 0.758, "step": 2083 }, { "epoch": 0.4503998271017938, "grad_norm": 0.8799240589141846, "learning_rate": 2.4155494968736104e-05, "loss": 0.8982, "step": 2084 }, { "epoch": 0.4506159498595202, "grad_norm": 0.9708576202392578, "learning_rate": 2.4141799510004545e-05, "loss": 0.842, "step": 2085 }, { "epoch": 0.4508320726172466, "grad_norm": 0.8632113337516785, "learning_rate": 2.4128102021800794e-05, "loss": 0.8216, "step": 2086 }, { "epoch": 0.451048195374973, "grad_norm": 0.9555045366287231, "learning_rate": 2.4114402510836605e-05, "loss": 1.0627, "step": 2087 }, { "epoch": 0.45126431813269935, "grad_norm": 0.9048724174499512, "learning_rate": 2.4100700983824687e-05, "loss": 0.8379, "step": 2088 }, { "epoch": 0.45148044089042577, "grad_norm": 0.9210740923881531, "learning_rate": 2.408699744747877e-05, "loss": 0.9444, "step": 2089 }, { "epoch": 0.45169656364815214, "grad_norm": 0.9148637652397156, "learning_rate": 2.407329190851356e-05, "loss": 0.8682, "step": 2090 }, { "epoch": 0.45191268640587856, "grad_norm": 0.9044589996337891, "learning_rate": 2.4059584373644724e-05, "loss": 0.8847, "step": 2091 }, { "epoch": 0.45212880916360493, "grad_norm": 1.0271599292755127, "learning_rate": 2.4045874849588932e-05, "loss": 1.0158, "step": 2092 }, { "epoch": 0.4523449319213313, "grad_norm": 0.9533768892288208, "learning_rate": 2.403216334306381e-05, "loss": 0.8622, "step": 2093 }, { "epoch": 0.4525610546790577, "grad_norm": 0.9236588478088379, "learning_rate": 2.4018449860787977e-05, "loss": 0.9078, "step": 2094 }, { "epoch": 0.4527771774367841, "grad_norm": 0.992451012134552, "learning_rate": 2.4004734409480996e-05, "loss": 1.0081, "step": 2095 }, { "epoch": 0.4529933001945105, "grad_norm": 0.8581631779670715, "learning_rate": 2.3991016995863417e-05, "loss": 0.7346, "step": 2096 }, { "epoch": 0.4532094229522369, "grad_norm": 0.8542599081993103, "learning_rate": 2.3977297626656736e-05, "loss": 0.9192, "step": 2097 }, { "epoch": 0.45342554570996324, "grad_norm": 0.9894627332687378, "learning_rate": 2.396357630858341e-05, "loss": 1.1239, "step": 2098 }, { "epoch": 0.45364166846768966, "grad_norm": 0.9991193413734436, "learning_rate": 2.394985304836686e-05, "loss": 0.9764, "step": 2099 }, { "epoch": 0.45385779122541603, "grad_norm": 0.9391309022903442, "learning_rate": 2.3936127852731436e-05, "loss": 0.8762, "step": 2100 }, { "epoch": 0.45407391398314245, "grad_norm": 0.908093273639679, "learning_rate": 2.392240072840247e-05, "loss": 0.9232, "step": 2101 }, { "epoch": 0.4542900367408688, "grad_norm": 1.0131136178970337, "learning_rate": 2.3908671682106218e-05, "loss": 0.9918, "step": 2102 }, { "epoch": 0.4545061594985952, "grad_norm": 0.9139040112495422, "learning_rate": 2.389494072056987e-05, "loss": 0.7649, "step": 2103 }, { "epoch": 0.4547222822563216, "grad_norm": 0.9638808369636536, "learning_rate": 2.3881207850521578e-05, "loss": 1.1865, "step": 2104 }, { "epoch": 0.454938405014048, "grad_norm": 0.8026013970375061, "learning_rate": 2.3867473078690412e-05, "loss": 0.9392, "step": 2105 }, { "epoch": 0.45515452777177434, "grad_norm": 0.8221445083618164, "learning_rate": 2.3853736411806372e-05, "loss": 0.8729, "step": 2106 }, { "epoch": 0.45537065052950076, "grad_norm": 0.9557278752326965, "learning_rate": 2.3839997856600405e-05, "loss": 1.0695, "step": 2107 }, { "epoch": 0.45558677328722713, "grad_norm": 1.0303809642791748, "learning_rate": 2.3826257419804364e-05, "loss": 1.0914, "step": 2108 }, { "epoch": 0.45580289604495355, "grad_norm": 0.8700131773948669, "learning_rate": 2.3812515108151027e-05, "loss": 0.7936, "step": 2109 }, { "epoch": 0.4560190188026799, "grad_norm": 1.0393489599227905, "learning_rate": 2.379877092837411e-05, "loss": 0.9687, "step": 2110 }, { "epoch": 0.4562351415604063, "grad_norm": 1.025715947151184, "learning_rate": 2.3785024887208207e-05, "loss": 1.0204, "step": 2111 }, { "epoch": 0.4564512643181327, "grad_norm": 0.8350749611854553, "learning_rate": 2.3771276991388864e-05, "loss": 0.99, "step": 2112 }, { "epoch": 0.4566673870758591, "grad_norm": 0.9822539687156677, "learning_rate": 2.3757527247652513e-05, "loss": 1.0197, "step": 2113 }, { "epoch": 0.4568835098335855, "grad_norm": 0.986997663974762, "learning_rate": 2.3743775662736504e-05, "loss": 0.9264, "step": 2114 }, { "epoch": 0.45709963259131187, "grad_norm": 0.8423007130622864, "learning_rate": 2.3730022243379063e-05, "loss": 0.8811, "step": 2115 }, { "epoch": 0.45731575534903823, "grad_norm": 0.893768310546875, "learning_rate": 2.3716266996319356e-05, "loss": 0.9342, "step": 2116 }, { "epoch": 0.45753187810676466, "grad_norm": 0.8970029950141907, "learning_rate": 2.3702509928297407e-05, "loss": 0.9518, "step": 2117 }, { "epoch": 0.457748000864491, "grad_norm": 0.9548612833023071, "learning_rate": 2.368875104605415e-05, "loss": 0.9881, "step": 2118 }, { "epoch": 0.45796412362221744, "grad_norm": 0.9271209836006165, "learning_rate": 2.367499035633141e-05, "loss": 0.8797, "step": 2119 }, { "epoch": 0.4581802463799438, "grad_norm": 0.8605367541313171, "learning_rate": 2.366122786587189e-05, "loss": 0.8788, "step": 2120 }, { "epoch": 0.4583963691376702, "grad_norm": 0.9081845879554749, "learning_rate": 2.364746358141918e-05, "loss": 0.8302, "step": 2121 }, { "epoch": 0.4586124918953966, "grad_norm": 0.8858135342597961, "learning_rate": 2.3633697509717745e-05, "loss": 0.9653, "step": 2122 }, { "epoch": 0.45882861465312297, "grad_norm": 1.0024068355560303, "learning_rate": 2.3619929657512934e-05, "loss": 0.988, "step": 2123 }, { "epoch": 0.45904473741084934, "grad_norm": 0.8974485993385315, "learning_rate": 2.3606160031550952e-05, "loss": 0.8367, "step": 2124 }, { "epoch": 0.45926086016857576, "grad_norm": 0.9284573793411255, "learning_rate": 2.35923886385789e-05, "loss": 1.1474, "step": 2125 }, { "epoch": 0.4594769829263021, "grad_norm": 0.85592120885849, "learning_rate": 2.357861548534471e-05, "loss": 0.9362, "step": 2126 }, { "epoch": 0.45969310568402855, "grad_norm": 0.926304042339325, "learning_rate": 2.3564840578597213e-05, "loss": 0.9677, "step": 2127 }, { "epoch": 0.4599092284417549, "grad_norm": 0.8898852467536926, "learning_rate": 2.3551063925086072e-05, "loss": 0.9228, "step": 2128 }, { "epoch": 0.4601253511994813, "grad_norm": 0.9166799783706665, "learning_rate": 2.353728553156181e-05, "loss": 0.9467, "step": 2129 }, { "epoch": 0.4603414739572077, "grad_norm": 0.9641197323799133, "learning_rate": 2.3523505404775825e-05, "loss": 0.8508, "step": 2130 }, { "epoch": 0.46055759671493407, "grad_norm": 1.0209652185440063, "learning_rate": 2.3509723551480325e-05, "loss": 0.9728, "step": 2131 }, { "epoch": 0.4607737194726605, "grad_norm": 0.8495245575904846, "learning_rate": 2.34959399784284e-05, "loss": 0.9475, "step": 2132 }, { "epoch": 0.46098984223038686, "grad_norm": 0.7956224679946899, "learning_rate": 2.348215469237397e-05, "loss": 0.9316, "step": 2133 }, { "epoch": 0.4612059649881132, "grad_norm": 1.0309728384017944, "learning_rate": 2.3468367700071796e-05, "loss": 0.8616, "step": 2134 }, { "epoch": 0.46142208774583965, "grad_norm": 0.7615361213684082, "learning_rate": 2.3454579008277457e-05, "loss": 0.8523, "step": 2135 }, { "epoch": 0.461638210503566, "grad_norm": 1.004491925239563, "learning_rate": 2.34407886237474e-05, "loss": 0.7608, "step": 2136 }, { "epoch": 0.46185433326129244, "grad_norm": 1.046006441116333, "learning_rate": 2.3426996553238865e-05, "loss": 0.9058, "step": 2137 }, { "epoch": 0.4620704560190188, "grad_norm": 0.9255813360214233, "learning_rate": 2.3413202803509938e-05, "loss": 0.8566, "step": 2138 }, { "epoch": 0.46228657877674517, "grad_norm": 0.902448296546936, "learning_rate": 2.3399407381319536e-05, "loss": 1.1862, "step": 2139 }, { "epoch": 0.4625027015344716, "grad_norm": 1.046591877937317, "learning_rate": 2.3385610293427367e-05, "loss": 1.0268, "step": 2140 }, { "epoch": 0.46271882429219796, "grad_norm": 0.8518416881561279, "learning_rate": 2.3371811546593985e-05, "loss": 0.8228, "step": 2141 }, { "epoch": 0.4629349470499244, "grad_norm": 0.8757534623146057, "learning_rate": 2.3358011147580738e-05, "loss": 1.0293, "step": 2142 }, { "epoch": 0.46315106980765075, "grad_norm": 0.8403485417366028, "learning_rate": 2.3344209103149792e-05, "loss": 0.9059, "step": 2143 }, { "epoch": 0.4633671925653771, "grad_norm": 1.0074067115783691, "learning_rate": 2.3330405420064116e-05, "loss": 0.84, "step": 2144 }, { "epoch": 0.46358331532310354, "grad_norm": 1.0459541082382202, "learning_rate": 2.3316600105087484e-05, "loss": 0.9625, "step": 2145 }, { "epoch": 0.4637994380808299, "grad_norm": 0.9329015016555786, "learning_rate": 2.3302793164984472e-05, "loss": 0.828, "step": 2146 }, { "epoch": 0.4640155608385563, "grad_norm": 0.8942865133285522, "learning_rate": 2.3288984606520435e-05, "loss": 0.7879, "step": 2147 }, { "epoch": 0.4642316835962827, "grad_norm": 0.8742052316665649, "learning_rate": 2.327517443646155e-05, "loss": 1.0176, "step": 2148 }, { "epoch": 0.46444780635400906, "grad_norm": 0.9404723048210144, "learning_rate": 2.3261362661574758e-05, "loss": 1.0162, "step": 2149 }, { "epoch": 0.4646639291117355, "grad_norm": 0.8055806159973145, "learning_rate": 2.3247549288627806e-05, "loss": 0.7548, "step": 2150 }, { "epoch": 0.46488005186946185, "grad_norm": 0.7803989052772522, "learning_rate": 2.3233734324389204e-05, "loss": 0.9598, "step": 2151 }, { "epoch": 0.4650961746271882, "grad_norm": 0.8580408096313477, "learning_rate": 2.321991777562826e-05, "loss": 0.7791, "step": 2152 }, { "epoch": 0.46531229738491464, "grad_norm": 0.91729736328125, "learning_rate": 2.320609964911505e-05, "loss": 0.8086, "step": 2153 }, { "epoch": 0.465528420142641, "grad_norm": 0.9715539813041687, "learning_rate": 2.3192279951620426e-05, "loss": 0.8756, "step": 2154 }, { "epoch": 0.46574454290036743, "grad_norm": 0.8068549633026123, "learning_rate": 2.3178458689916e-05, "loss": 0.6827, "step": 2155 }, { "epoch": 0.4659606656580938, "grad_norm": 0.909864068031311, "learning_rate": 2.3164635870774167e-05, "loss": 1.0626, "step": 2156 }, { "epoch": 0.46617678841582016, "grad_norm": 0.8749873042106628, "learning_rate": 2.3150811500968077e-05, "loss": 0.8645, "step": 2157 }, { "epoch": 0.4663929111735466, "grad_norm": 0.9503135681152344, "learning_rate": 2.3136985587271627e-05, "loss": 0.9609, "step": 2158 }, { "epoch": 0.46660903393127295, "grad_norm": 1.0307978391647339, "learning_rate": 2.31231581364595e-05, "loss": 0.9689, "step": 2159 }, { "epoch": 0.4668251566889994, "grad_norm": 1.094694972038269, "learning_rate": 2.3109329155307094e-05, "loss": 1.0066, "step": 2160 }, { "epoch": 0.46704127944672574, "grad_norm": 0.9437207579612732, "learning_rate": 2.3095498650590598e-05, "loss": 0.9423, "step": 2161 }, { "epoch": 0.4672574022044521, "grad_norm": 0.907027006149292, "learning_rate": 2.3081666629086918e-05, "loss": 0.9202, "step": 2162 }, { "epoch": 0.46747352496217853, "grad_norm": 1.0126712322235107, "learning_rate": 2.3067833097573713e-05, "loss": 1.0868, "step": 2163 }, { "epoch": 0.4676896477199049, "grad_norm": 0.8758916258811951, "learning_rate": 2.3053998062829375e-05, "loss": 1.0776, "step": 2164 }, { "epoch": 0.4679057704776313, "grad_norm": 1.004266619682312, "learning_rate": 2.3040161531633057e-05, "loss": 1.0596, "step": 2165 }, { "epoch": 0.4681218932353577, "grad_norm": 0.9461581707000732, "learning_rate": 2.3026323510764614e-05, "loss": 0.8754, "step": 2166 }, { "epoch": 0.46833801599308406, "grad_norm": 0.9111448526382446, "learning_rate": 2.3012484007004644e-05, "loss": 0.8873, "step": 2167 }, { "epoch": 0.4685541387508105, "grad_norm": 1.1857701539993286, "learning_rate": 2.299864302713447e-05, "loss": 1.0155, "step": 2168 }, { "epoch": 0.46877026150853685, "grad_norm": 0.9034219980239868, "learning_rate": 2.298480057793615e-05, "loss": 1.0139, "step": 2169 }, { "epoch": 0.4689863842662632, "grad_norm": 0.9284543991088867, "learning_rate": 2.297095666619245e-05, "loss": 0.9375, "step": 2170 }, { "epoch": 0.46920250702398963, "grad_norm": 0.9861460328102112, "learning_rate": 2.2957111298686844e-05, "loss": 1.1214, "step": 2171 }, { "epoch": 0.469418629781716, "grad_norm": 0.997794508934021, "learning_rate": 2.2943264482203544e-05, "loss": 0.831, "step": 2172 }, { "epoch": 0.4696347525394424, "grad_norm": 0.9758365750312805, "learning_rate": 2.2929416223527446e-05, "loss": 0.9791, "step": 2173 }, { "epoch": 0.4698508752971688, "grad_norm": 0.8575986623764038, "learning_rate": 2.2915566529444173e-05, "loss": 0.8343, "step": 2174 }, { "epoch": 0.47006699805489516, "grad_norm": 0.8302111625671387, "learning_rate": 2.2901715406740043e-05, "loss": 0.7926, "step": 2175 }, { "epoch": 0.4702831208126216, "grad_norm": 0.9717157483100891, "learning_rate": 2.2887862862202068e-05, "loss": 0.7629, "step": 2176 }, { "epoch": 0.47049924357034795, "grad_norm": 0.8666814565658569, "learning_rate": 2.2874008902617967e-05, "loss": 0.8743, "step": 2177 }, { "epoch": 0.47071536632807437, "grad_norm": 0.8030889630317688, "learning_rate": 2.2860153534776145e-05, "loss": 0.8479, "step": 2178 }, { "epoch": 0.47093148908580074, "grad_norm": 0.9813153147697449, "learning_rate": 2.2846296765465708e-05, "loss": 0.8502, "step": 2179 }, { "epoch": 0.4711476118435271, "grad_norm": 0.9990540742874146, "learning_rate": 2.2832438601476428e-05, "loss": 0.9455, "step": 2180 }, { "epoch": 0.4713637346012535, "grad_norm": 1.0254837274551392, "learning_rate": 2.2818579049598788e-05, "loss": 1.0668, "step": 2181 }, { "epoch": 0.4715798573589799, "grad_norm": 0.9308380484580994, "learning_rate": 2.2804718116623926e-05, "loss": 0.9167, "step": 2182 }, { "epoch": 0.4717959801167063, "grad_norm": 0.8804174065589905, "learning_rate": 2.2790855809343673e-05, "loss": 0.8824, "step": 2183 }, { "epoch": 0.4720121028744327, "grad_norm": 0.847017228603363, "learning_rate": 2.2776992134550517e-05, "loss": 0.7257, "step": 2184 }, { "epoch": 0.47222822563215905, "grad_norm": 0.9161202907562256, "learning_rate": 2.2763127099037646e-05, "loss": 0.8147, "step": 2185 }, { "epoch": 0.47244434838988547, "grad_norm": 0.9066073298454285, "learning_rate": 2.274926070959888e-05, "loss": 0.8774, "step": 2186 }, { "epoch": 0.47266047114761184, "grad_norm": 0.9424638152122498, "learning_rate": 2.2735392973028723e-05, "loss": 0.9729, "step": 2187 }, { "epoch": 0.4728765939053382, "grad_norm": 0.8449385166168213, "learning_rate": 2.272152389612233e-05, "loss": 0.8649, "step": 2188 }, { "epoch": 0.4730927166630646, "grad_norm": 0.8816748261451721, "learning_rate": 2.2707653485675513e-05, "loss": 1.0513, "step": 2189 }, { "epoch": 0.473308839420791, "grad_norm": 1.061385154724121, "learning_rate": 2.269378174848476e-05, "loss": 0.8477, "step": 2190 }, { "epoch": 0.4735249621785174, "grad_norm": 0.9070470929145813, "learning_rate": 2.2679908691347165e-05, "loss": 0.7769, "step": 2191 }, { "epoch": 0.4737410849362438, "grad_norm": 0.8117925524711609, "learning_rate": 2.2666034321060516e-05, "loss": 0.8018, "step": 2192 }, { "epoch": 0.47395720769397015, "grad_norm": 0.9357739090919495, "learning_rate": 2.265215864442321e-05, "loss": 0.8859, "step": 2193 }, { "epoch": 0.4741733304516966, "grad_norm": 1.0191253423690796, "learning_rate": 2.2638281668234295e-05, "loss": 0.9105, "step": 2194 }, { "epoch": 0.47438945320942294, "grad_norm": 0.8923472166061401, "learning_rate": 2.2624403399293464e-05, "loss": 0.8753, "step": 2195 }, { "epoch": 0.47460557596714936, "grad_norm": 1.0272793769836426, "learning_rate": 2.261052384440104e-05, "loss": 0.885, "step": 2196 }, { "epoch": 0.47482169872487573, "grad_norm": 0.9659473299980164, "learning_rate": 2.259664301035796e-05, "loss": 0.9349, "step": 2197 }, { "epoch": 0.4750378214826021, "grad_norm": 0.9282272458076477, "learning_rate": 2.25827609039658e-05, "loss": 0.9397, "step": 2198 }, { "epoch": 0.4752539442403285, "grad_norm": 1.0531755685806274, "learning_rate": 2.2568877532026785e-05, "loss": 0.7518, "step": 2199 }, { "epoch": 0.4754700669980549, "grad_norm": 0.8328658938407898, "learning_rate": 2.2554992901343705e-05, "loss": 0.7891, "step": 2200 }, { "epoch": 0.4756861897557813, "grad_norm": 1.065454125404358, "learning_rate": 2.2541107018720013e-05, "loss": 0.9156, "step": 2201 }, { "epoch": 0.4759023125135077, "grad_norm": 0.9001337289810181, "learning_rate": 2.2527219890959756e-05, "loss": 0.8919, "step": 2202 }, { "epoch": 0.47611843527123404, "grad_norm": 1.0516306161880493, "learning_rate": 2.2513331524867596e-05, "loss": 1.1024, "step": 2203 }, { "epoch": 0.47633455802896046, "grad_norm": 0.952760636806488, "learning_rate": 2.24994419272488e-05, "loss": 0.8666, "step": 2204 }, { "epoch": 0.47655068078668683, "grad_norm": 1.0019255876541138, "learning_rate": 2.2485551104909235e-05, "loss": 1.0784, "step": 2205 }, { "epoch": 0.47676680354441325, "grad_norm": 1.068312168121338, "learning_rate": 2.2471659064655375e-05, "loss": 1.0797, "step": 2206 }, { "epoch": 0.4769829263021396, "grad_norm": 1.2210317850112915, "learning_rate": 2.2457765813294285e-05, "loss": 0.7697, "step": 2207 }, { "epoch": 0.477199049059866, "grad_norm": 1.0743895769119263, "learning_rate": 2.2443871357633632e-05, "loss": 1.1007, "step": 2208 }, { "epoch": 0.4774151718175924, "grad_norm": 0.9332402348518372, "learning_rate": 2.2429975704481658e-05, "loss": 0.9549, "step": 2209 }, { "epoch": 0.4776312945753188, "grad_norm": 0.833015501499176, "learning_rate": 2.2416078860647213e-05, "loss": 0.8848, "step": 2210 }, { "epoch": 0.47784741733304514, "grad_norm": 0.9843903183937073, "learning_rate": 2.2402180832939707e-05, "loss": 0.9991, "step": 2211 }, { "epoch": 0.47806354009077157, "grad_norm": 0.8275328278541565, "learning_rate": 2.2388281628169146e-05, "loss": 0.8761, "step": 2212 }, { "epoch": 0.47827966284849793, "grad_norm": 1.0255450010299683, "learning_rate": 2.2374381253146105e-05, "loss": 0.9425, "step": 2213 }, { "epoch": 0.47849578560622436, "grad_norm": 1.0531729459762573, "learning_rate": 2.2360479714681745e-05, "loss": 1.0212, "step": 2214 }, { "epoch": 0.4787119083639507, "grad_norm": 0.8640594482421875, "learning_rate": 2.234657701958778e-05, "loss": 0.8951, "step": 2215 }, { "epoch": 0.4789280311216771, "grad_norm": 1.0316178798675537, "learning_rate": 2.2332673174676496e-05, "loss": 0.9412, "step": 2216 }, { "epoch": 0.4791441538794035, "grad_norm": 0.8753515481948853, "learning_rate": 2.2318768186760753e-05, "loss": 0.9821, "step": 2217 }, { "epoch": 0.4793602766371299, "grad_norm": 0.998913049697876, "learning_rate": 2.2304862062653956e-05, "loss": 1.0409, "step": 2218 }, { "epoch": 0.4795763993948563, "grad_norm": 0.9236776232719421, "learning_rate": 2.229095480917008e-05, "loss": 0.9509, "step": 2219 }, { "epoch": 0.47979252215258267, "grad_norm": 0.9642339944839478, "learning_rate": 2.2277046433123636e-05, "loss": 1.0345, "step": 2220 }, { "epoch": 0.48000864491030903, "grad_norm": 0.9492659568786621, "learning_rate": 2.226313694132971e-05, "loss": 0.8834, "step": 2221 }, { "epoch": 0.48022476766803546, "grad_norm": 0.8292040228843689, "learning_rate": 2.2249226340603913e-05, "loss": 0.7468, "step": 2222 }, { "epoch": 0.4804408904257618, "grad_norm": 0.9183518886566162, "learning_rate": 2.223531463776241e-05, "loss": 0.914, "step": 2223 }, { "epoch": 0.48065701318348825, "grad_norm": 0.960827648639679, "learning_rate": 2.22214018396219e-05, "loss": 0.8415, "step": 2224 }, { "epoch": 0.4808731359412146, "grad_norm": 0.932241678237915, "learning_rate": 2.2207487952999623e-05, "loss": 0.9552, "step": 2225 }, { "epoch": 0.481089258698941, "grad_norm": 0.9515604376792908, "learning_rate": 2.2193572984713356e-05, "loss": 0.8896, "step": 2226 }, { "epoch": 0.4813053814566674, "grad_norm": 0.9919021129608154, "learning_rate": 2.2179656941581387e-05, "loss": 0.8612, "step": 2227 }, { "epoch": 0.48152150421439377, "grad_norm": 0.9585627913475037, "learning_rate": 2.216573983042257e-05, "loss": 0.9442, "step": 2228 }, { "epoch": 0.48173762697212014, "grad_norm": 0.9216515421867371, "learning_rate": 2.2151821658056232e-05, "loss": 0.9371, "step": 2229 }, { "epoch": 0.48195374972984656, "grad_norm": 0.9168282151222229, "learning_rate": 2.2137902431302264e-05, "loss": 0.9961, "step": 2230 }, { "epoch": 0.4821698724875729, "grad_norm": 0.9793004989624023, "learning_rate": 2.2123982156981034e-05, "loss": 1.1558, "step": 2231 }, { "epoch": 0.48238599524529935, "grad_norm": 0.8328722715377808, "learning_rate": 2.2110060841913467e-05, "loss": 0.7743, "step": 2232 }, { "epoch": 0.4826021180030257, "grad_norm": 1.0485424995422363, "learning_rate": 2.2096138492920965e-05, "loss": 0.8943, "step": 2233 }, { "epoch": 0.4828182407607521, "grad_norm": 1.0185878276824951, "learning_rate": 2.2082215116825445e-05, "loss": 0.9977, "step": 2234 }, { "epoch": 0.4830343635184785, "grad_norm": 1.0318353176116943, "learning_rate": 2.2068290720449328e-05, "loss": 0.9433, "step": 2235 }, { "epoch": 0.48325048627620487, "grad_norm": 0.9102119207382202, "learning_rate": 2.2054365310615548e-05, "loss": 1.0616, "step": 2236 }, { "epoch": 0.4834666090339313, "grad_norm": 0.7391582131385803, "learning_rate": 2.2040438894147505e-05, "loss": 0.8957, "step": 2237 }, { "epoch": 0.48368273179165766, "grad_norm": 1.055592656135559, "learning_rate": 2.202651147786912e-05, "loss": 0.8464, "step": 2238 }, { "epoch": 0.483898854549384, "grad_norm": 1.039684534072876, "learning_rate": 2.2012583068604803e-05, "loss": 1.0491, "step": 2239 }, { "epoch": 0.48411497730711045, "grad_norm": 1.0695092678070068, "learning_rate": 2.1998653673179433e-05, "loss": 0.8996, "step": 2240 }, { "epoch": 0.4843311000648368, "grad_norm": 0.9700279235839844, "learning_rate": 2.1984723298418384e-05, "loss": 0.7883, "step": 2241 }, { "epoch": 0.48454722282256324, "grad_norm": 0.8059743046760559, "learning_rate": 2.1970791951147505e-05, "loss": 0.7191, "step": 2242 }, { "epoch": 0.4847633455802896, "grad_norm": 0.8079337477684021, "learning_rate": 2.1956859638193126e-05, "loss": 0.929, "step": 2243 }, { "epoch": 0.484979468338016, "grad_norm": 1.0118950605392456, "learning_rate": 2.194292636638205e-05, "loss": 0.8756, "step": 2244 }, { "epoch": 0.4851955910957424, "grad_norm": 0.9371973872184753, "learning_rate": 2.1928992142541545e-05, "loss": 0.8836, "step": 2245 }, { "epoch": 0.48541171385346876, "grad_norm": 0.9567400217056274, "learning_rate": 2.1915056973499346e-05, "loss": 0.9433, "step": 2246 }, { "epoch": 0.4856278366111952, "grad_norm": 1.198793888092041, "learning_rate": 2.190112086608365e-05, "loss": 1.1154, "step": 2247 }, { "epoch": 0.48584395936892155, "grad_norm": 0.9048296809196472, "learning_rate": 2.1887183827123143e-05, "loss": 0.7672, "step": 2248 }, { "epoch": 0.4860600821266479, "grad_norm": 1.0196088552474976, "learning_rate": 2.187324586344691e-05, "loss": 0.9443, "step": 2249 }, { "epoch": 0.48627620488437434, "grad_norm": 1.0643374919891357, "learning_rate": 2.1859306981884542e-05, "loss": 0.8723, "step": 2250 }, { "epoch": 0.4864923276421007, "grad_norm": 0.9235920310020447, "learning_rate": 2.184536718926604e-05, "loss": 0.7271, "step": 2251 }, { "epoch": 0.4867084503998271, "grad_norm": 0.9010453820228577, "learning_rate": 2.1831426492421893e-05, "loss": 0.9279, "step": 2252 }, { "epoch": 0.4869245731575535, "grad_norm": 0.8894018530845642, "learning_rate": 2.1817484898182992e-05, "loss": 0.8781, "step": 2253 }, { "epoch": 0.48714069591527986, "grad_norm": 0.8632429838180542, "learning_rate": 2.180354241338069e-05, "loss": 0.8908, "step": 2254 }, { "epoch": 0.4873568186730063, "grad_norm": 1.0111632347106934, "learning_rate": 2.1789599044846782e-05, "loss": 1.0216, "step": 2255 }, { "epoch": 0.48757294143073265, "grad_norm": 0.933199942111969, "learning_rate": 2.1775654799413476e-05, "loss": 0.8884, "step": 2256 }, { "epoch": 0.487789064188459, "grad_norm": 0.8441728353500366, "learning_rate": 2.1761709683913423e-05, "loss": 0.7231, "step": 2257 }, { "epoch": 0.48800518694618544, "grad_norm": 1.016108751296997, "learning_rate": 2.174776370517969e-05, "loss": 0.7251, "step": 2258 }, { "epoch": 0.4882213097039118, "grad_norm": 0.8854086399078369, "learning_rate": 2.1733816870045798e-05, "loss": 0.9986, "step": 2259 }, { "epoch": 0.48843743246163823, "grad_norm": 0.9269996881484985, "learning_rate": 2.1719869185345632e-05, "loss": 0.9333, "step": 2260 }, { "epoch": 0.4886535552193646, "grad_norm": 0.8737806081771851, "learning_rate": 2.170592065791355e-05, "loss": 0.9868, "step": 2261 }, { "epoch": 0.48886967797709097, "grad_norm": 0.9864827394485474, "learning_rate": 2.1691971294584282e-05, "loss": 0.9379, "step": 2262 }, { "epoch": 0.4890858007348174, "grad_norm": 0.9524204134941101, "learning_rate": 2.1678021102192996e-05, "loss": 0.7459, "step": 2263 }, { "epoch": 0.48930192349254376, "grad_norm": 0.9791752099990845, "learning_rate": 2.166407008757525e-05, "loss": 0.9494, "step": 2264 }, { "epoch": 0.4895180462502702, "grad_norm": 1.0480341911315918, "learning_rate": 2.1650118257567002e-05, "loss": 0.9778, "step": 2265 }, { "epoch": 0.48973416900799654, "grad_norm": 0.9262263774871826, "learning_rate": 2.163616561900463e-05, "loss": 0.7492, "step": 2266 }, { "epoch": 0.4899502917657229, "grad_norm": 0.9226260185241699, "learning_rate": 2.162221217872488e-05, "loss": 0.9377, "step": 2267 }, { "epoch": 0.49016641452344933, "grad_norm": 1.1801583766937256, "learning_rate": 2.160825794356492e-05, "loss": 0.8532, "step": 2268 }, { "epoch": 0.4903825372811757, "grad_norm": 0.9868795871734619, "learning_rate": 2.1594302920362276e-05, "loss": 1.0342, "step": 2269 }, { "epoch": 0.4905986600389021, "grad_norm": 0.9075852632522583, "learning_rate": 2.1580347115954896e-05, "loss": 0.7872, "step": 2270 }, { "epoch": 0.4908147827966285, "grad_norm": 0.9382511973381042, "learning_rate": 2.1566390537181075e-05, "loss": 0.7565, "step": 2271 }, { "epoch": 0.49103090555435486, "grad_norm": 0.8777767419815063, "learning_rate": 2.1552433190879512e-05, "loss": 0.736, "step": 2272 }, { "epoch": 0.4912470283120813, "grad_norm": 0.7822272777557373, "learning_rate": 2.1538475083889278e-05, "loss": 0.6991, "step": 2273 }, { "epoch": 0.49146315106980765, "grad_norm": 0.8870785236358643, "learning_rate": 2.152451622304981e-05, "loss": 0.8687, "step": 2274 }, { "epoch": 0.491679273827534, "grad_norm": 1.043366551399231, "learning_rate": 2.1510556615200917e-05, "loss": 0.891, "step": 2275 }, { "epoch": 0.49189539658526044, "grad_norm": 0.7973032593727112, "learning_rate": 2.1496596267182773e-05, "loss": 0.8404, "step": 2276 }, { "epoch": 0.4921115193429868, "grad_norm": 0.9199157953262329, "learning_rate": 2.1482635185835917e-05, "loss": 1.0426, "step": 2277 }, { "epoch": 0.4923276421007132, "grad_norm": 1.0459051132202148, "learning_rate": 2.1468673378001242e-05, "loss": 0.8554, "step": 2278 }, { "epoch": 0.4925437648584396, "grad_norm": 0.9541643261909485, "learning_rate": 2.1454710850520016e-05, "loss": 0.9347, "step": 2279 }, { "epoch": 0.49275988761616596, "grad_norm": 0.9007477164268494, "learning_rate": 2.144074761023383e-05, "loss": 0.9255, "step": 2280 }, { "epoch": 0.4929760103738924, "grad_norm": 0.8977211117744446, "learning_rate": 2.1426783663984648e-05, "loss": 0.9632, "step": 2281 }, { "epoch": 0.49319213313161875, "grad_norm": 0.8943268656730652, "learning_rate": 2.141281901861477e-05, "loss": 0.8649, "step": 2282 }, { "epoch": 0.49340825588934517, "grad_norm": 0.9033158421516418, "learning_rate": 2.139885368096684e-05, "loss": 0.8529, "step": 2283 }, { "epoch": 0.49362437864707154, "grad_norm": 0.8825488686561584, "learning_rate": 2.1384887657883836e-05, "loss": 0.9252, "step": 2284 }, { "epoch": 0.4938405014047979, "grad_norm": 0.8469288349151611, "learning_rate": 2.137092095620908e-05, "loss": 0.8682, "step": 2285 }, { "epoch": 0.4940566241625243, "grad_norm": 0.9247711300849915, "learning_rate": 2.135695358278623e-05, "loss": 0.7958, "step": 2286 }, { "epoch": 0.4942727469202507, "grad_norm": 0.8183013796806335, "learning_rate": 2.1342985544459258e-05, "loss": 0.8834, "step": 2287 }, { "epoch": 0.4944888696779771, "grad_norm": 0.8992856740951538, "learning_rate": 2.132901684807248e-05, "loss": 1.0267, "step": 2288 }, { "epoch": 0.4947049924357035, "grad_norm": 0.9574567079544067, "learning_rate": 2.1315047500470505e-05, "loss": 0.9439, "step": 2289 }, { "epoch": 0.49492111519342985, "grad_norm": 0.9456168413162231, "learning_rate": 2.1301077508498305e-05, "loss": 0.9476, "step": 2290 }, { "epoch": 0.4951372379511563, "grad_norm": 0.8847987055778503, "learning_rate": 2.128710687900113e-05, "loss": 0.9578, "step": 2291 }, { "epoch": 0.49535336070888264, "grad_norm": 1.0706462860107422, "learning_rate": 2.1273135618824562e-05, "loss": 1.0467, "step": 2292 }, { "epoch": 0.495569483466609, "grad_norm": 0.9031651020050049, "learning_rate": 2.1259163734814482e-05, "loss": 0.7507, "step": 2293 }, { "epoch": 0.49578560622433543, "grad_norm": 1.0642560720443726, "learning_rate": 2.1245191233817085e-05, "loss": 0.9923, "step": 2294 }, { "epoch": 0.4960017289820618, "grad_norm": 1.0771523714065552, "learning_rate": 2.1231218122678862e-05, "loss": 1.0417, "step": 2295 }, { "epoch": 0.4962178517397882, "grad_norm": 0.9673963785171509, "learning_rate": 2.1217244408246605e-05, "loss": 0.8142, "step": 2296 }, { "epoch": 0.4964339744975146, "grad_norm": 0.8511964082717896, "learning_rate": 2.1203270097367398e-05, "loss": 0.9035, "step": 2297 }, { "epoch": 0.49665009725524095, "grad_norm": 1.0219144821166992, "learning_rate": 2.1189295196888624e-05, "loss": 0.9958, "step": 2298 }, { "epoch": 0.4968662200129674, "grad_norm": 0.8596683144569397, "learning_rate": 2.117531971365796e-05, "loss": 0.8375, "step": 2299 }, { "epoch": 0.49708234277069374, "grad_norm": 1.0385677814483643, "learning_rate": 2.1161343654523346e-05, "loss": 0.8623, "step": 2300 }, { "epoch": 0.49729846552842016, "grad_norm": 0.8668344020843506, "learning_rate": 2.114736702633303e-05, "loss": 0.905, "step": 2301 }, { "epoch": 0.49751458828614653, "grad_norm": 0.9759607315063477, "learning_rate": 2.113338983593552e-05, "loss": 0.8899, "step": 2302 }, { "epoch": 0.4977307110438729, "grad_norm": 0.9088861346244812, "learning_rate": 2.1119412090179616e-05, "loss": 1.0605, "step": 2303 }, { "epoch": 0.4979468338015993, "grad_norm": 0.8826245069503784, "learning_rate": 2.110543379591437e-05, "loss": 0.8327, "step": 2304 }, { "epoch": 0.4981629565593257, "grad_norm": 0.9717110395431519, "learning_rate": 2.109145495998912e-05, "loss": 0.9719, "step": 2305 }, { "epoch": 0.4983790793170521, "grad_norm": 0.9280117154121399, "learning_rate": 2.107747558925347e-05, "loss": 0.897, "step": 2306 }, { "epoch": 0.4985952020747785, "grad_norm": 0.9122903943061829, "learning_rate": 2.106349569055726e-05, "loss": 0.9383, "step": 2307 }, { "epoch": 0.49881132483250484, "grad_norm": 1.0483940839767456, "learning_rate": 2.1049515270750632e-05, "loss": 0.8508, "step": 2308 }, { "epoch": 0.49902744759023127, "grad_norm": 0.860085129737854, "learning_rate": 2.1035534336683936e-05, "loss": 0.9986, "step": 2309 }, { "epoch": 0.49924357034795763, "grad_norm": 0.9422728419303894, "learning_rate": 2.1021552895207815e-05, "loss": 0.9296, "step": 2310 }, { "epoch": 0.49945969310568405, "grad_norm": 0.8932642936706543, "learning_rate": 2.100757095317314e-05, "loss": 1.1715, "step": 2311 }, { "epoch": 0.4996758158634104, "grad_norm": 0.9482457637786865, "learning_rate": 2.0993588517431024e-05, "loss": 0.9304, "step": 2312 }, { "epoch": 0.4998919386211368, "grad_norm": 1.0164151191711426, "learning_rate": 2.097960559483283e-05, "loss": 1.05, "step": 2313 }, { "epoch": 0.5001080613788632, "grad_norm": 1.0282493829727173, "learning_rate": 2.0965622192230158e-05, "loss": 1.0176, "step": 2314 }, { "epoch": 0.5003241841365896, "grad_norm": 0.8830835223197937, "learning_rate": 2.095163831647485e-05, "loss": 0.773, "step": 2315 }, { "epoch": 0.500540306894316, "grad_norm": 0.9088915586471558, "learning_rate": 2.0937653974418963e-05, "loss": 0.9497, "step": 2316 }, { "epoch": 0.5007564296520424, "grad_norm": 0.9781036376953125, "learning_rate": 2.0923669172914796e-05, "loss": 1.0345, "step": 2317 }, { "epoch": 0.5009725524097688, "grad_norm": 0.8619513511657715, "learning_rate": 2.0909683918814867e-05, "loss": 0.8277, "step": 2318 }, { "epoch": 0.5011886751674951, "grad_norm": 0.9903712868690491, "learning_rate": 2.0895698218971927e-05, "loss": 0.9553, "step": 2319 }, { "epoch": 0.5014047979252215, "grad_norm": 0.8601084351539612, "learning_rate": 2.088171208023892e-05, "loss": 0.7103, "step": 2320 }, { "epoch": 0.501620920682948, "grad_norm": 1.0813382863998413, "learning_rate": 2.0867725509469042e-05, "loss": 0.8895, "step": 2321 }, { "epoch": 0.5018370434406743, "grad_norm": 0.9440907835960388, "learning_rate": 2.0853738513515663e-05, "loss": 0.8573, "step": 2322 }, { "epoch": 0.5020531661984007, "grad_norm": 1.0963239669799805, "learning_rate": 2.0839751099232392e-05, "loss": 0.9476, "step": 2323 }, { "epoch": 0.5022692889561271, "grad_norm": 1.0190849304199219, "learning_rate": 2.0825763273473022e-05, "loss": 0.9648, "step": 2324 }, { "epoch": 0.5024854117138534, "grad_norm": 0.9341250061988831, "learning_rate": 2.081177504309156e-05, "loss": 0.8598, "step": 2325 }, { "epoch": 0.5027015344715798, "grad_norm": 1.0032615661621094, "learning_rate": 2.0797786414942197e-05, "loss": 1.0656, "step": 2326 }, { "epoch": 0.5029176572293063, "grad_norm": 0.8984432816505432, "learning_rate": 2.078379739587933e-05, "loss": 0.7449, "step": 2327 }, { "epoch": 0.5031337799870327, "grad_norm": 0.8945673704147339, "learning_rate": 2.0769807992757568e-05, "loss": 0.834, "step": 2328 }, { "epoch": 0.503349902744759, "grad_norm": 0.8658612966537476, "learning_rate": 2.0755818212431653e-05, "loss": 0.8557, "step": 2329 }, { "epoch": 0.5035660255024854, "grad_norm": 0.9491671323776245, "learning_rate": 2.074182806175657e-05, "loss": 1.0323, "step": 2330 }, { "epoch": 0.5037821482602118, "grad_norm": 0.8965189456939697, "learning_rate": 2.0727837547587447e-05, "loss": 0.9245, "step": 2331 }, { "epoch": 0.5039982710179381, "grad_norm": 0.9609273076057434, "learning_rate": 2.0713846676779613e-05, "loss": 0.7758, "step": 2332 }, { "epoch": 0.5042143937756646, "grad_norm": 0.992252767086029, "learning_rate": 2.0699855456188555e-05, "loss": 1.0365, "step": 2333 }, { "epoch": 0.504430516533391, "grad_norm": 0.907981812953949, "learning_rate": 2.068586389266994e-05, "loss": 0.9384, "step": 2334 }, { "epoch": 0.5046466392911173, "grad_norm": 0.8060364723205566, "learning_rate": 2.0671871993079606e-05, "loss": 0.9055, "step": 2335 }, { "epoch": 0.5048627620488437, "grad_norm": 1.042852520942688, "learning_rate": 2.0657879764273546e-05, "loss": 1.0777, "step": 2336 }, { "epoch": 0.5050788848065701, "grad_norm": 0.8662554025650024, "learning_rate": 2.064388721310792e-05, "loss": 0.9275, "step": 2337 }, { "epoch": 0.5052950075642966, "grad_norm": 0.9479020833969116, "learning_rate": 2.062989434643905e-05, "loss": 0.9244, "step": 2338 }, { "epoch": 0.5055111303220229, "grad_norm": 0.978425920009613, "learning_rate": 2.061590117112341e-05, "loss": 0.8059, "step": 2339 }, { "epoch": 0.5057272530797493, "grad_norm": 0.919092059135437, "learning_rate": 2.0601907694017617e-05, "loss": 0.915, "step": 2340 }, { "epoch": 0.5059433758374757, "grad_norm": 1.0036684274673462, "learning_rate": 2.0587913921978445e-05, "loss": 0.8228, "step": 2341 }, { "epoch": 0.506159498595202, "grad_norm": 0.978500247001648, "learning_rate": 2.0573919861862812e-05, "loss": 0.9381, "step": 2342 }, { "epoch": 0.5063756213529285, "grad_norm": 0.9454809427261353, "learning_rate": 2.055992552052777e-05, "loss": 0.9983, "step": 2343 }, { "epoch": 0.5065917441106549, "grad_norm": 1.0295363664627075, "learning_rate": 2.054593090483052e-05, "loss": 1.0686, "step": 2344 }, { "epoch": 0.5068078668683812, "grad_norm": 0.8806283473968506, "learning_rate": 2.053193602162839e-05, "loss": 0.899, "step": 2345 }, { "epoch": 0.5070239896261076, "grad_norm": 0.9531514048576355, "learning_rate": 2.051794087777884e-05, "loss": 0.9833, "step": 2346 }, { "epoch": 0.507240112383834, "grad_norm": 0.9447773098945618, "learning_rate": 2.050394548013945e-05, "loss": 0.9393, "step": 2347 }, { "epoch": 0.5074562351415604, "grad_norm": 0.9605883955955505, "learning_rate": 2.048994983556795e-05, "loss": 1.005, "step": 2348 }, { "epoch": 0.5076723578992868, "grad_norm": 0.9409863948822021, "learning_rate": 2.0475953950922148e-05, "loss": 0.7372, "step": 2349 }, { "epoch": 0.5078884806570132, "grad_norm": 0.9915863871574402, "learning_rate": 2.0461957833060025e-05, "loss": 0.9475, "step": 2350 }, { "epoch": 0.5081046034147396, "grad_norm": 0.8699820041656494, "learning_rate": 2.0447961488839625e-05, "loss": 0.7303, "step": 2351 }, { "epoch": 0.5083207261724659, "grad_norm": 0.8779844641685486, "learning_rate": 2.0433964925119132e-05, "loss": 0.936, "step": 2352 }, { "epoch": 0.5085368489301924, "grad_norm": 1.072743535041809, "learning_rate": 2.041996814875683e-05, "loss": 1.0447, "step": 2353 }, { "epoch": 0.5087529716879188, "grad_norm": 1.0182307958602905, "learning_rate": 2.0405971166611108e-05, "loss": 0.8995, "step": 2354 }, { "epoch": 0.5089690944456451, "grad_norm": 0.9541364908218384, "learning_rate": 2.039197398554045e-05, "loss": 0.9084, "step": 2355 }, { "epoch": 0.5091852172033715, "grad_norm": 0.9599238038063049, "learning_rate": 2.0377976612403443e-05, "loss": 0.8955, "step": 2356 }, { "epoch": 0.5094013399610979, "grad_norm": 0.8445461988449097, "learning_rate": 2.0363979054058777e-05, "loss": 0.8298, "step": 2357 }, { "epoch": 0.5096174627188242, "grad_norm": 0.9038436412811279, "learning_rate": 2.0349981317365205e-05, "loss": 0.7916, "step": 2358 }, { "epoch": 0.5098335854765507, "grad_norm": 0.8970431089401245, "learning_rate": 2.0335983409181606e-05, "loss": 0.9355, "step": 2359 }, { "epoch": 0.5100497082342771, "grad_norm": 0.934702455997467, "learning_rate": 2.0321985336366906e-05, "loss": 0.8184, "step": 2360 }, { "epoch": 0.5102658309920035, "grad_norm": 0.8944525718688965, "learning_rate": 2.0307987105780138e-05, "loss": 0.9199, "step": 2361 }, { "epoch": 0.5104819537497298, "grad_norm": 0.9491285681724548, "learning_rate": 2.0293988724280404e-05, "loss": 0.9956, "step": 2362 }, { "epoch": 0.5106980765074562, "grad_norm": 0.8704890608787537, "learning_rate": 2.027999019872687e-05, "loss": 0.8784, "step": 2363 }, { "epoch": 0.5109141992651827, "grad_norm": 0.8883909583091736, "learning_rate": 2.026599153597879e-05, "loss": 0.8123, "step": 2364 }, { "epoch": 0.511130322022909, "grad_norm": 0.8475587964057922, "learning_rate": 2.025199274289547e-05, "loss": 0.89, "step": 2365 }, { "epoch": 0.5113464447806354, "grad_norm": 0.888871431350708, "learning_rate": 2.023799382633629e-05, "loss": 1.0221, "step": 2366 }, { "epoch": 0.5115625675383618, "grad_norm": 0.846250057220459, "learning_rate": 2.0223994793160678e-05, "loss": 0.8937, "step": 2367 }, { "epoch": 0.5117786902960881, "grad_norm": 0.9950271844863892, "learning_rate": 2.0209995650228146e-05, "loss": 0.8609, "step": 2368 }, { "epoch": 0.5119948130538146, "grad_norm": 0.9442114233970642, "learning_rate": 2.0195996404398222e-05, "loss": 0.9993, "step": 2369 }, { "epoch": 0.512210935811541, "grad_norm": 0.8403794169425964, "learning_rate": 2.0181997062530513e-05, "loss": 0.9092, "step": 2370 }, { "epoch": 0.5124270585692673, "grad_norm": 0.8744664192199707, "learning_rate": 2.016799763148467e-05, "loss": 0.9722, "step": 2371 }, { "epoch": 0.5126431813269937, "grad_norm": 0.9275670051574707, "learning_rate": 2.0153998118120376e-05, "loss": 0.9156, "step": 2372 }, { "epoch": 0.5128593040847201, "grad_norm": 0.8707650303840637, "learning_rate": 2.0139998529297355e-05, "loss": 0.921, "step": 2373 }, { "epoch": 0.5130754268424466, "grad_norm": 1.0175389051437378, "learning_rate": 2.0125998871875385e-05, "loss": 1.0841, "step": 2374 }, { "epoch": 0.5132915496001729, "grad_norm": 0.9202478528022766, "learning_rate": 2.0111999152714254e-05, "loss": 1.1347, "step": 2375 }, { "epoch": 0.5135076723578993, "grad_norm": 0.929257869720459, "learning_rate": 2.00979993786738e-05, "loss": 0.9207, "step": 2376 }, { "epoch": 0.5137237951156257, "grad_norm": 0.9317587614059448, "learning_rate": 2.0083999556613874e-05, "loss": 0.8966, "step": 2377 }, { "epoch": 0.513939917873352, "grad_norm": 1.0302447080612183, "learning_rate": 2.0069999693394354e-05, "loss": 0.808, "step": 2378 }, { "epoch": 0.5141560406310784, "grad_norm": 0.9321349859237671, "learning_rate": 2.005599979587516e-05, "loss": 0.8473, "step": 2379 }, { "epoch": 0.5143721633888049, "grad_norm": 1.0263248682022095, "learning_rate": 2.0041999870916186e-05, "loss": 1.0613, "step": 2380 }, { "epoch": 0.5145882861465312, "grad_norm": 0.9499816298484802, "learning_rate": 2.0027999925377375e-05, "loss": 0.9073, "step": 2381 }, { "epoch": 0.5148044089042576, "grad_norm": 1.3729811906814575, "learning_rate": 2.0013999966118664e-05, "loss": 0.8664, "step": 2382 }, { "epoch": 0.515020531661984, "grad_norm": 0.9456096887588501, "learning_rate": 2e-05, "loss": 0.9954, "step": 2383 }, { "epoch": 0.5152366544197104, "grad_norm": 1.0228707790374756, "learning_rate": 1.998600003388134e-05, "loss": 1.041, "step": 2384 }, { "epoch": 0.5154527771774368, "grad_norm": 0.8717309832572937, "learning_rate": 1.9972000074622628e-05, "loss": 0.9667, "step": 2385 }, { "epoch": 0.5156688999351632, "grad_norm": 0.8505531549453735, "learning_rate": 1.995800012908382e-05, "loss": 0.9104, "step": 2386 }, { "epoch": 0.5158850226928896, "grad_norm": 0.8359039425849915, "learning_rate": 1.9944000204124848e-05, "loss": 0.8077, "step": 2387 }, { "epoch": 0.5161011454506159, "grad_norm": 0.9694004058837891, "learning_rate": 1.993000030660565e-05, "loss": 1.0607, "step": 2388 }, { "epoch": 0.5163172682083423, "grad_norm": 0.8425354957580566, "learning_rate": 1.9916000443386132e-05, "loss": 0.7214, "step": 2389 }, { "epoch": 0.5165333909660688, "grad_norm": 1.0435991287231445, "learning_rate": 1.9902000621326206e-05, "loss": 0.9371, "step": 2390 }, { "epoch": 0.5167495137237951, "grad_norm": 1.007751703262329, "learning_rate": 1.9888000847285753e-05, "loss": 1.0005, "step": 2391 }, { "epoch": 0.5169656364815215, "grad_norm": 1.0336477756500244, "learning_rate": 1.987400112812463e-05, "loss": 0.9929, "step": 2392 }, { "epoch": 0.5171817592392479, "grad_norm": 0.9204444885253906, "learning_rate": 1.986000147070265e-05, "loss": 0.863, "step": 2393 }, { "epoch": 0.5173978819969742, "grad_norm": 1.0044440031051636, "learning_rate": 1.984600188187963e-05, "loss": 1.0805, "step": 2394 }, { "epoch": 0.5176140047547007, "grad_norm": 0.9487243890762329, "learning_rate": 1.9832002368515336e-05, "loss": 0.8167, "step": 2395 }, { "epoch": 0.5178301275124271, "grad_norm": 0.9148545265197754, "learning_rate": 1.9818002937469484e-05, "loss": 0.9394, "step": 2396 }, { "epoch": 0.5180462502701535, "grad_norm": 1.1300163269042969, "learning_rate": 1.9804003595601778e-05, "loss": 0.9321, "step": 2397 }, { "epoch": 0.5182623730278798, "grad_norm": 0.9817214012145996, "learning_rate": 1.9790004349771864e-05, "loss": 0.9786, "step": 2398 }, { "epoch": 0.5184784957856062, "grad_norm": 0.9644269943237305, "learning_rate": 1.977600520683933e-05, "loss": 1.0304, "step": 2399 }, { "epoch": 0.5186946185433327, "grad_norm": 0.9276800751686096, "learning_rate": 1.9762006173663717e-05, "loss": 1.1313, "step": 2400 }, { "epoch": 0.518910741301059, "grad_norm": 0.9133787155151367, "learning_rate": 1.974800725710454e-05, "loss": 0.7661, "step": 2401 }, { "epoch": 0.5191268640587854, "grad_norm": 0.8322534561157227, "learning_rate": 1.9734008464021216e-05, "loss": 0.8939, "step": 2402 }, { "epoch": 0.5193429868165118, "grad_norm": 0.8768430352210999, "learning_rate": 1.972000980127313e-05, "loss": 0.8087, "step": 2403 }, { "epoch": 0.5195591095742381, "grad_norm": 0.8987047076225281, "learning_rate": 1.9706011275719603e-05, "loss": 0.9341, "step": 2404 }, { "epoch": 0.5197752323319645, "grad_norm": 0.9694629311561584, "learning_rate": 1.969201289421987e-05, "loss": 0.9099, "step": 2405 }, { "epoch": 0.519991355089691, "grad_norm": 0.8979818224906921, "learning_rate": 1.96780146636331e-05, "loss": 0.8387, "step": 2406 }, { "epoch": 0.5202074778474174, "grad_norm": 1.0015891790390015, "learning_rate": 1.96640165908184e-05, "loss": 1.0356, "step": 2407 }, { "epoch": 0.5204236006051437, "grad_norm": 0.8676528930664062, "learning_rate": 1.96500186826348e-05, "loss": 0.9171, "step": 2408 }, { "epoch": 0.5206397233628701, "grad_norm": 1.0528929233551025, "learning_rate": 1.9636020945941236e-05, "loss": 1.0244, "step": 2409 }, { "epoch": 0.5208558461205965, "grad_norm": 0.8272160887718201, "learning_rate": 1.9622023387596563e-05, "loss": 0.7513, "step": 2410 }, { "epoch": 0.5210719688783229, "grad_norm": 1.0948705673217773, "learning_rate": 1.9608026014459554e-05, "loss": 0.9408, "step": 2411 }, { "epoch": 0.5212880916360493, "grad_norm": 0.9872722625732422, "learning_rate": 1.95940288333889e-05, "loss": 0.9502, "step": 2412 }, { "epoch": 0.5215042143937757, "grad_norm": 0.8445034623146057, "learning_rate": 1.9580031851243176e-05, "loss": 0.8021, "step": 2413 }, { "epoch": 0.521720337151502, "grad_norm": 0.8690013289451599, "learning_rate": 1.9566035074880868e-05, "loss": 0.7603, "step": 2414 }, { "epoch": 0.5219364599092284, "grad_norm": 1.050392746925354, "learning_rate": 1.955203851116038e-05, "loss": 0.9773, "step": 2415 }, { "epoch": 0.5221525826669549, "grad_norm": 0.9531746506690979, "learning_rate": 1.9538042166939982e-05, "loss": 0.9301, "step": 2416 }, { "epoch": 0.5223687054246812, "grad_norm": 0.8638320565223694, "learning_rate": 1.9524046049077855e-05, "loss": 0.8095, "step": 2417 }, { "epoch": 0.5225848281824076, "grad_norm": 1.070334553718567, "learning_rate": 1.9510050164432058e-05, "loss": 1.0291, "step": 2418 }, { "epoch": 0.522800950940134, "grad_norm": 0.9095038771629333, "learning_rate": 1.949605451986055e-05, "loss": 0.8391, "step": 2419 }, { "epoch": 0.5230170736978604, "grad_norm": 0.9874014258384705, "learning_rate": 1.9482059122221168e-05, "loss": 0.9386, "step": 2420 }, { "epoch": 0.5232331964555867, "grad_norm": 0.8531448841094971, "learning_rate": 1.946806397837162e-05, "loss": 0.819, "step": 2421 }, { "epoch": 0.5234493192133132, "grad_norm": 0.8765074610710144, "learning_rate": 1.9454069095169484e-05, "loss": 0.8008, "step": 2422 }, { "epoch": 0.5236654419710396, "grad_norm": 0.9033129811286926, "learning_rate": 1.9440074479472232e-05, "loss": 0.9036, "step": 2423 }, { "epoch": 0.5238815647287659, "grad_norm": 0.9528734087944031, "learning_rate": 1.9426080138137195e-05, "loss": 0.8323, "step": 2424 }, { "epoch": 0.5240976874864923, "grad_norm": 0.9053243398666382, "learning_rate": 1.941208607802156e-05, "loss": 0.9455, "step": 2425 }, { "epoch": 0.5243138102442187, "grad_norm": 0.8585258722305298, "learning_rate": 1.939809230598239e-05, "loss": 0.8961, "step": 2426 }, { "epoch": 0.5245299330019451, "grad_norm": 0.9704940319061279, "learning_rate": 1.9384098828876597e-05, "loss": 0.8393, "step": 2427 }, { "epoch": 0.5247460557596715, "grad_norm": 1.1031426191329956, "learning_rate": 1.9370105653560956e-05, "loss": 0.9268, "step": 2428 }, { "epoch": 0.5249621785173979, "grad_norm": 0.9932018518447876, "learning_rate": 1.9356112786892082e-05, "loss": 0.9444, "step": 2429 }, { "epoch": 0.5251783012751242, "grad_norm": 1.0802488327026367, "learning_rate": 1.934212023572646e-05, "loss": 0.9151, "step": 2430 }, { "epoch": 0.5253944240328506, "grad_norm": 0.8728967905044556, "learning_rate": 1.93281280069204e-05, "loss": 0.9823, "step": 2431 }, { "epoch": 0.5256105467905771, "grad_norm": 0.7961871027946472, "learning_rate": 1.931413610733007e-05, "loss": 0.934, "step": 2432 }, { "epoch": 0.5258266695483035, "grad_norm": 0.908875584602356, "learning_rate": 1.930014454381145e-05, "loss": 0.8754, "step": 2433 }, { "epoch": 0.5260427923060298, "grad_norm": 0.875497579574585, "learning_rate": 1.9286153323220393e-05, "loss": 0.6693, "step": 2434 }, { "epoch": 0.5262589150637562, "grad_norm": 0.9510184526443481, "learning_rate": 1.9272162452412556e-05, "loss": 0.8506, "step": 2435 }, { "epoch": 0.5264750378214826, "grad_norm": 1.0109052658081055, "learning_rate": 1.9258171938243432e-05, "loss": 0.8483, "step": 2436 }, { "epoch": 0.526691160579209, "grad_norm": 0.9058769941329956, "learning_rate": 1.9244181787568347e-05, "loss": 0.8904, "step": 2437 }, { "epoch": 0.5269072833369354, "grad_norm": 1.0631839036941528, "learning_rate": 1.9230192007242442e-05, "loss": 1.0426, "step": 2438 }, { "epoch": 0.5271234060946618, "grad_norm": 0.9550550580024719, "learning_rate": 1.9216202604120672e-05, "loss": 1.0229, "step": 2439 }, { "epoch": 0.5273395288523881, "grad_norm": 0.893438994884491, "learning_rate": 1.920221358505781e-05, "loss": 0.8342, "step": 2440 }, { "epoch": 0.5275556516101145, "grad_norm": 1.1339728832244873, "learning_rate": 1.9188224956908448e-05, "loss": 0.9101, "step": 2441 }, { "epoch": 0.527771774367841, "grad_norm": 0.9783705472946167, "learning_rate": 1.917423672652698e-05, "loss": 0.9414, "step": 2442 }, { "epoch": 0.5279878971255674, "grad_norm": 0.8473526239395142, "learning_rate": 1.9160248900767607e-05, "loss": 0.7932, "step": 2443 }, { "epoch": 0.5282040198832937, "grad_norm": 0.8786388635635376, "learning_rate": 1.914626148648434e-05, "loss": 0.8596, "step": 2444 }, { "epoch": 0.5284201426410201, "grad_norm": 0.9827716946601868, "learning_rate": 1.9132274490530964e-05, "loss": 1.0699, "step": 2445 }, { "epoch": 0.5286362653987465, "grad_norm": 1.1196885108947754, "learning_rate": 1.9118287919761084e-05, "loss": 1.0602, "step": 2446 }, { "epoch": 0.5288523881564728, "grad_norm": 0.8260840773582458, "learning_rate": 1.910430178102808e-05, "loss": 0.746, "step": 2447 }, { "epoch": 0.5290685109141993, "grad_norm": 0.8771698474884033, "learning_rate": 1.9090316081185136e-05, "loss": 0.7933, "step": 2448 }, { "epoch": 0.5292846336719257, "grad_norm": 0.9941705465316772, "learning_rate": 1.9076330827085214e-05, "loss": 0.95, "step": 2449 }, { "epoch": 0.529500756429652, "grad_norm": 0.9113670587539673, "learning_rate": 1.9062346025581047e-05, "loss": 0.9018, "step": 2450 }, { "epoch": 0.5297168791873784, "grad_norm": 1.045186161994934, "learning_rate": 1.9048361683525155e-05, "loss": 0.9235, "step": 2451 }, { "epoch": 0.5299330019451048, "grad_norm": 0.9754068851470947, "learning_rate": 1.9034377807769845e-05, "loss": 1.0684, "step": 2452 }, { "epoch": 0.5301491247028312, "grad_norm": 1.0298631191253662, "learning_rate": 1.9020394405167174e-05, "loss": 0.7132, "step": 2453 }, { "epoch": 0.5303652474605576, "grad_norm": 0.9308910965919495, "learning_rate": 1.900641148256898e-05, "loss": 0.9392, "step": 2454 }, { "epoch": 0.530581370218284, "grad_norm": 0.889383852481842, "learning_rate": 1.8992429046826865e-05, "loss": 0.765, "step": 2455 }, { "epoch": 0.5307974929760104, "grad_norm": 0.9352144598960876, "learning_rate": 1.897844710479219e-05, "loss": 0.9131, "step": 2456 }, { "epoch": 0.5310136157337367, "grad_norm": 1.1047238111495972, "learning_rate": 1.8964465663316067e-05, "loss": 0.9011, "step": 2457 }, { "epoch": 0.5312297384914632, "grad_norm": 0.8324387073516846, "learning_rate": 1.8950484729249374e-05, "loss": 0.9107, "step": 2458 }, { "epoch": 0.5314458612491896, "grad_norm": 0.9552721977233887, "learning_rate": 1.893650430944274e-05, "loss": 0.8853, "step": 2459 }, { "epoch": 0.5316619840069159, "grad_norm": 0.9641865491867065, "learning_rate": 1.892252441074654e-05, "loss": 0.8674, "step": 2460 }, { "epoch": 0.5318781067646423, "grad_norm": 0.9755154252052307, "learning_rate": 1.8908545040010885e-05, "loss": 0.9572, "step": 2461 }, { "epoch": 0.5320942295223687, "grad_norm": 0.8502157330513, "learning_rate": 1.8894566204085633e-05, "loss": 0.8698, "step": 2462 }, { "epoch": 0.532310352280095, "grad_norm": 0.8985865116119385, "learning_rate": 1.888058790982039e-05, "loss": 0.9292, "step": 2463 }, { "epoch": 0.5325264750378215, "grad_norm": 0.9788563847541809, "learning_rate": 1.8866610164064485e-05, "loss": 0.8234, "step": 2464 }, { "epoch": 0.5327425977955479, "grad_norm": 1.0623949766159058, "learning_rate": 1.8852632973666972e-05, "loss": 1.0194, "step": 2465 }, { "epoch": 0.5329587205532743, "grad_norm": 1.2494370937347412, "learning_rate": 1.8838656345476654e-05, "loss": 0.9452, "step": 2466 }, { "epoch": 0.5331748433110006, "grad_norm": 0.8787872791290283, "learning_rate": 1.882468028634205e-05, "loss": 0.8668, "step": 2467 }, { "epoch": 0.533390966068727, "grad_norm": 0.888615071773529, "learning_rate": 1.8810704803111382e-05, "loss": 0.9638, "step": 2468 }, { "epoch": 0.5336070888264535, "grad_norm": 1.047143578529358, "learning_rate": 1.8796729902632605e-05, "loss": 0.8589, "step": 2469 }, { "epoch": 0.5338232115841798, "grad_norm": 1.0401242971420288, "learning_rate": 1.8782755591753405e-05, "loss": 1.0722, "step": 2470 }, { "epoch": 0.5340393343419062, "grad_norm": 0.9091007709503174, "learning_rate": 1.8768781877321145e-05, "loss": 0.9763, "step": 2471 }, { "epoch": 0.5342554570996326, "grad_norm": 0.9124115109443665, "learning_rate": 1.8754808766182925e-05, "loss": 0.9341, "step": 2472 }, { "epoch": 0.5344715798573589, "grad_norm": 1.0849777460098267, "learning_rate": 1.874083626518552e-05, "loss": 0.9522, "step": 2473 }, { "epoch": 0.5346877026150854, "grad_norm": 0.9831454753875732, "learning_rate": 1.8726864381175445e-05, "loss": 0.7644, "step": 2474 }, { "epoch": 0.5349038253728118, "grad_norm": 0.8318514227867126, "learning_rate": 1.8712893120998873e-05, "loss": 0.7629, "step": 2475 }, { "epoch": 0.5351199481305381, "grad_norm": 0.8997523188591003, "learning_rate": 1.8698922491501698e-05, "loss": 0.8557, "step": 2476 }, { "epoch": 0.5353360708882645, "grad_norm": 0.8529967665672302, "learning_rate": 1.8684952499529495e-05, "loss": 0.8869, "step": 2477 }, { "epoch": 0.5355521936459909, "grad_norm": 0.938438892364502, "learning_rate": 1.8670983151927534e-05, "loss": 0.9609, "step": 2478 }, { "epoch": 0.5357683164037174, "grad_norm": 0.9284506440162659, "learning_rate": 1.865701445554075e-05, "loss": 1.0326, "step": 2479 }, { "epoch": 0.5359844391614437, "grad_norm": 0.9874833226203918, "learning_rate": 1.8643046417213776e-05, "loss": 0.7757, "step": 2480 }, { "epoch": 0.5362005619191701, "grad_norm": 0.9178028106689453, "learning_rate": 1.8629079043790922e-05, "loss": 0.982, "step": 2481 }, { "epoch": 0.5364166846768965, "grad_norm": 0.8410900831222534, "learning_rate": 1.861511234211617e-05, "loss": 0.8956, "step": 2482 }, { "epoch": 0.5366328074346228, "grad_norm": 0.9202355146408081, "learning_rate": 1.8601146319033164e-05, "loss": 0.8585, "step": 2483 }, { "epoch": 0.5368489301923492, "grad_norm": 0.9029659628868103, "learning_rate": 1.8587180981385237e-05, "loss": 1.0118, "step": 2484 }, { "epoch": 0.5370650529500757, "grad_norm": 0.8835629224777222, "learning_rate": 1.8573216336015355e-05, "loss": 0.8455, "step": 2485 }, { "epoch": 0.537281175707802, "grad_norm": 0.9127852320671082, "learning_rate": 1.8559252389766177e-05, "loss": 1.0092, "step": 2486 }, { "epoch": 0.5374972984655284, "grad_norm": 0.9556038975715637, "learning_rate": 1.8545289149479987e-05, "loss": 0.7549, "step": 2487 }, { "epoch": 0.5377134212232548, "grad_norm": 0.887857973575592, "learning_rate": 1.8531326621998758e-05, "loss": 0.9067, "step": 2488 }, { "epoch": 0.5379295439809813, "grad_norm": 0.8509790897369385, "learning_rate": 1.8517364814164093e-05, "loss": 0.7826, "step": 2489 }, { "epoch": 0.5381456667387076, "grad_norm": 0.956433892250061, "learning_rate": 1.8503403732817237e-05, "loss": 0.86, "step": 2490 }, { "epoch": 0.538361789496434, "grad_norm": 0.9578793048858643, "learning_rate": 1.848944338479909e-05, "loss": 0.9152, "step": 2491 }, { "epoch": 0.5385779122541604, "grad_norm": 1.0387738943099976, "learning_rate": 1.8475483776950196e-05, "loss": 0.8922, "step": 2492 }, { "epoch": 0.5387940350118867, "grad_norm": 1.0104392766952515, "learning_rate": 1.8461524916110725e-05, "loss": 0.7996, "step": 2493 }, { "epoch": 0.5390101577696131, "grad_norm": 0.9003995060920715, "learning_rate": 1.8447566809120487e-05, "loss": 0.7733, "step": 2494 }, { "epoch": 0.5392262805273396, "grad_norm": 1.0649616718292236, "learning_rate": 1.8433609462818935e-05, "loss": 1.0595, "step": 2495 }, { "epoch": 0.5394424032850659, "grad_norm": 0.906844437122345, "learning_rate": 1.8419652884045114e-05, "loss": 0.8297, "step": 2496 }, { "epoch": 0.5396585260427923, "grad_norm": 0.8947880268096924, "learning_rate": 1.840569707963773e-05, "loss": 1.0482, "step": 2497 }, { "epoch": 0.5398746488005187, "grad_norm": 0.9246777892112732, "learning_rate": 1.839174205643509e-05, "loss": 0.9083, "step": 2498 }, { "epoch": 0.540090771558245, "grad_norm": 0.8564475774765015, "learning_rate": 1.8377787821275122e-05, "loss": 0.8579, "step": 2499 }, { "epoch": 0.5403068943159715, "grad_norm": 0.9944950938224792, "learning_rate": 1.8363834380995377e-05, "loss": 1.0187, "step": 2500 }, { "epoch": 0.5405230170736979, "grad_norm": 1.0183297395706177, "learning_rate": 1.8349881742433004e-05, "loss": 1.0016, "step": 2501 }, { "epoch": 0.5407391398314243, "grad_norm": 0.8491703867912292, "learning_rate": 1.8335929912424756e-05, "loss": 0.7843, "step": 2502 }, { "epoch": 0.5409552625891506, "grad_norm": 1.0703781843185425, "learning_rate": 1.8321978897807007e-05, "loss": 1.1484, "step": 2503 }, { "epoch": 0.541171385346877, "grad_norm": 0.8054190874099731, "learning_rate": 1.8308028705415725e-05, "loss": 0.7427, "step": 2504 }, { "epoch": 0.5413875081046035, "grad_norm": 0.9089491367340088, "learning_rate": 1.8294079342086454e-05, "loss": 0.8596, "step": 2505 }, { "epoch": 0.5416036308623298, "grad_norm": 0.9109365344047546, "learning_rate": 1.828013081465437e-05, "loss": 0.9888, "step": 2506 }, { "epoch": 0.5418197536200562, "grad_norm": 1.0054105520248413, "learning_rate": 1.8266183129954215e-05, "loss": 1.2071, "step": 2507 }, { "epoch": 0.5420358763777826, "grad_norm": 0.8904284238815308, "learning_rate": 1.8252236294820313e-05, "loss": 0.9599, "step": 2508 }, { "epoch": 0.5422519991355089, "grad_norm": 0.9618232250213623, "learning_rate": 1.8238290316086584e-05, "loss": 0.9419, "step": 2509 }, { "epoch": 0.5424681218932353, "grad_norm": 1.0052188634872437, "learning_rate": 1.822434520058653e-05, "loss": 0.948, "step": 2510 }, { "epoch": 0.5426842446509618, "grad_norm": 0.9211570620536804, "learning_rate": 1.8210400955153224e-05, "loss": 1.1163, "step": 2511 }, { "epoch": 0.5429003674086882, "grad_norm": 1.0729602575302124, "learning_rate": 1.8196457586619315e-05, "loss": 0.9801, "step": 2512 }, { "epoch": 0.5431164901664145, "grad_norm": 0.934258759021759, "learning_rate": 1.8182515101817015e-05, "loss": 0.8857, "step": 2513 }, { "epoch": 0.5433326129241409, "grad_norm": 0.7439682483673096, "learning_rate": 1.8168573507578114e-05, "loss": 0.9079, "step": 2514 }, { "epoch": 0.5435487356818673, "grad_norm": 1.0175498723983765, "learning_rate": 1.815463281073396e-05, "loss": 0.8523, "step": 2515 }, { "epoch": 0.5437648584395937, "grad_norm": 0.838266134262085, "learning_rate": 1.8140693018115465e-05, "loss": 0.9479, "step": 2516 }, { "epoch": 0.5439809811973201, "grad_norm": 0.8469918966293335, "learning_rate": 1.8126754136553093e-05, "loss": 1.0116, "step": 2517 }, { "epoch": 0.5441971039550465, "grad_norm": 0.9128462076187134, "learning_rate": 1.8112816172876867e-05, "loss": 0.902, "step": 2518 }, { "epoch": 0.5444132267127728, "grad_norm": 0.8548224568367004, "learning_rate": 1.8098879133916352e-05, "loss": 0.9813, "step": 2519 }, { "epoch": 0.5446293494704992, "grad_norm": 0.8403018116950989, "learning_rate": 1.808494302650066e-05, "loss": 0.7716, "step": 2520 }, { "epoch": 0.5448454722282257, "grad_norm": 0.8741294741630554, "learning_rate": 1.8071007857458465e-05, "loss": 0.7792, "step": 2521 }, { "epoch": 0.545061594985952, "grad_norm": 1.0990644693374634, "learning_rate": 1.8057073633617958e-05, "loss": 0.8027, "step": 2522 }, { "epoch": 0.5452777177436784, "grad_norm": 0.840221643447876, "learning_rate": 1.8043140361806877e-05, "loss": 0.891, "step": 2523 }, { "epoch": 0.5454938405014048, "grad_norm": 0.8790461421012878, "learning_rate": 1.8029208048852505e-05, "loss": 0.8501, "step": 2524 }, { "epoch": 0.5457099632591312, "grad_norm": 0.8606840968132019, "learning_rate": 1.8015276701581623e-05, "loss": 0.8979, "step": 2525 }, { "epoch": 0.5459260860168575, "grad_norm": 0.9998458623886108, "learning_rate": 1.8001346326820574e-05, "loss": 0.9728, "step": 2526 }, { "epoch": 0.546142208774584, "grad_norm": 0.8386487364768982, "learning_rate": 1.79874169313952e-05, "loss": 0.7945, "step": 2527 }, { "epoch": 0.5463583315323104, "grad_norm": 0.9057452082633972, "learning_rate": 1.797348852213088e-05, "loss": 0.8812, "step": 2528 }, { "epoch": 0.5465744542900367, "grad_norm": 0.8506041765213013, "learning_rate": 1.7959561105852505e-05, "loss": 0.9357, "step": 2529 }, { "epoch": 0.5467905770477631, "grad_norm": 0.8173531293869019, "learning_rate": 1.7945634689384465e-05, "loss": 0.9853, "step": 2530 }, { "epoch": 0.5470066998054895, "grad_norm": 0.7886649966239929, "learning_rate": 1.7931709279550676e-05, "loss": 1.0335, "step": 2531 }, { "epoch": 0.5472228225632159, "grad_norm": 0.9975094199180603, "learning_rate": 1.7917784883174562e-05, "loss": 0.8146, "step": 2532 }, { "epoch": 0.5474389453209423, "grad_norm": 0.8904764652252197, "learning_rate": 1.7903861507079042e-05, "loss": 0.8679, "step": 2533 }, { "epoch": 0.5476550680786687, "grad_norm": 0.9284105896949768, "learning_rate": 1.7889939158086536e-05, "loss": 1.0873, "step": 2534 }, { "epoch": 0.5478711908363951, "grad_norm": 0.8726813793182373, "learning_rate": 1.7876017843018973e-05, "loss": 0.9619, "step": 2535 }, { "epoch": 0.5480873135941214, "grad_norm": 0.9748417735099792, "learning_rate": 1.786209756869775e-05, "loss": 0.9706, "step": 2536 }, { "epoch": 0.5483034363518479, "grad_norm": 0.959156334400177, "learning_rate": 1.7848178341943775e-05, "loss": 0.8527, "step": 2537 }, { "epoch": 0.5485195591095743, "grad_norm": 0.8951647281646729, "learning_rate": 1.7834260169577436e-05, "loss": 0.6911, "step": 2538 }, { "epoch": 0.5487356818673006, "grad_norm": 0.8986188173294067, "learning_rate": 1.7820343058418613e-05, "loss": 0.8569, "step": 2539 }, { "epoch": 0.548951804625027, "grad_norm": 0.9517485499382019, "learning_rate": 1.780642701528665e-05, "loss": 1.0089, "step": 2540 }, { "epoch": 0.5491679273827534, "grad_norm": 0.8486550450325012, "learning_rate": 1.7792512047000387e-05, "loss": 0.9291, "step": 2541 }, { "epoch": 0.5493840501404798, "grad_norm": 0.9620897173881531, "learning_rate": 1.7778598160378107e-05, "loss": 0.7867, "step": 2542 }, { "epoch": 0.5496001728982062, "grad_norm": 0.894357442855835, "learning_rate": 1.7764685362237596e-05, "loss": 0.7913, "step": 2543 }, { "epoch": 0.5498162956559326, "grad_norm": 0.897143542766571, "learning_rate": 1.7750773659396094e-05, "loss": 0.8315, "step": 2544 }, { "epoch": 0.5500324184136589, "grad_norm": 0.9111842513084412, "learning_rate": 1.773686305867029e-05, "loss": 0.8653, "step": 2545 }, { "epoch": 0.5502485411713853, "grad_norm": 1.007009744644165, "learning_rate": 1.7722953566876364e-05, "loss": 0.8701, "step": 2546 }, { "epoch": 0.5504646639291118, "grad_norm": 0.8903434872627258, "learning_rate": 1.770904519082993e-05, "loss": 0.7084, "step": 2547 }, { "epoch": 0.5506807866868382, "grad_norm": 1.0857263803482056, "learning_rate": 1.769513793734605e-05, "loss": 0.8966, "step": 2548 }, { "epoch": 0.5508969094445645, "grad_norm": 0.94488126039505, "learning_rate": 1.7681231813239254e-05, "loss": 1.0278, "step": 2549 }, { "epoch": 0.5511130322022909, "grad_norm": 1.0167680978775024, "learning_rate": 1.7667326825323507e-05, "loss": 0.8787, "step": 2550 }, { "epoch": 0.5513291549600173, "grad_norm": 0.9108545184135437, "learning_rate": 1.7653422980412227e-05, "loss": 0.8335, "step": 2551 }, { "epoch": 0.5515452777177436, "grad_norm": 0.9126055836677551, "learning_rate": 1.7639520285318265e-05, "loss": 0.7946, "step": 2552 }, { "epoch": 0.5517614004754701, "grad_norm": 0.9356114864349365, "learning_rate": 1.7625618746853902e-05, "loss": 1.0366, "step": 2553 }, { "epoch": 0.5519775232331965, "grad_norm": 0.9361844658851624, "learning_rate": 1.761171837183086e-05, "loss": 0.9991, "step": 2554 }, { "epoch": 0.5521936459909228, "grad_norm": 0.9151226282119751, "learning_rate": 1.7597819167060303e-05, "loss": 0.8677, "step": 2555 }, { "epoch": 0.5524097687486492, "grad_norm": 0.922074556350708, "learning_rate": 1.7583921139352793e-05, "loss": 0.9288, "step": 2556 }, { "epoch": 0.5526258915063756, "grad_norm": 0.8715337514877319, "learning_rate": 1.7570024295518345e-05, "loss": 0.7825, "step": 2557 }, { "epoch": 0.552842014264102, "grad_norm": 0.9341384172439575, "learning_rate": 1.7556128642366378e-05, "loss": 0.9295, "step": 2558 }, { "epoch": 0.5530581370218284, "grad_norm": 0.7741240859031677, "learning_rate": 1.7542234186705722e-05, "loss": 0.8583, "step": 2559 }, { "epoch": 0.5532742597795548, "grad_norm": 0.9513848423957825, "learning_rate": 1.752834093534463e-05, "loss": 1.0367, "step": 2560 }, { "epoch": 0.5534903825372812, "grad_norm": 0.9577759504318237, "learning_rate": 1.751444889509077e-05, "loss": 1.003, "step": 2561 }, { "epoch": 0.5537065052950075, "grad_norm": 0.8637582063674927, "learning_rate": 1.7500558072751207e-05, "loss": 0.8981, "step": 2562 }, { "epoch": 0.553922628052734, "grad_norm": 0.9732483625411987, "learning_rate": 1.7486668475132404e-05, "loss": 0.801, "step": 2563 }, { "epoch": 0.5541387508104604, "grad_norm": 1.0261329412460327, "learning_rate": 1.7472780109040254e-05, "loss": 1.0119, "step": 2564 }, { "epoch": 0.5543548735681867, "grad_norm": 1.0444742441177368, "learning_rate": 1.7458892981279993e-05, "loss": 0.926, "step": 2565 }, { "epoch": 0.5545709963259131, "grad_norm": 0.8797621130943298, "learning_rate": 1.74450070986563e-05, "loss": 0.9306, "step": 2566 }, { "epoch": 0.5547871190836395, "grad_norm": 1.0905117988586426, "learning_rate": 1.743112246797322e-05, "loss": 0.965, "step": 2567 }, { "epoch": 0.5550032418413658, "grad_norm": 0.7685717344284058, "learning_rate": 1.7417239096034197e-05, "loss": 0.7421, "step": 2568 }, { "epoch": 0.5552193645990923, "grad_norm": 0.8749861121177673, "learning_rate": 1.740335698964205e-05, "loss": 0.9124, "step": 2569 }, { "epoch": 0.5554354873568187, "grad_norm": 0.9034978151321411, "learning_rate": 1.7389476155598974e-05, "loss": 0.8177, "step": 2570 }, { "epoch": 0.5556516101145451, "grad_norm": 0.8407739996910095, "learning_rate": 1.737559660070654e-05, "loss": 0.9647, "step": 2571 }, { "epoch": 0.5558677328722714, "grad_norm": 1.0439980030059814, "learning_rate": 1.736171833176571e-05, "loss": 0.9058, "step": 2572 }, { "epoch": 0.5560838556299978, "grad_norm": 0.8621343374252319, "learning_rate": 1.7347841355576797e-05, "loss": 0.8257, "step": 2573 }, { "epoch": 0.5562999783877243, "grad_norm": 1.0059354305267334, "learning_rate": 1.7333965678939487e-05, "loss": 1.1693, "step": 2574 }, { "epoch": 0.5565161011454506, "grad_norm": 0.9958191514015198, "learning_rate": 1.7320091308652842e-05, "loss": 0.9896, "step": 2575 }, { "epoch": 0.556732223903177, "grad_norm": 0.9572399854660034, "learning_rate": 1.730621825151525e-05, "loss": 0.8653, "step": 2576 }, { "epoch": 0.5569483466609034, "grad_norm": 0.9230698943138123, "learning_rate": 1.729234651432449e-05, "loss": 0.6516, "step": 2577 }, { "epoch": 0.5571644694186297, "grad_norm": 0.8629150390625, "learning_rate": 1.7278476103877676e-05, "loss": 0.8205, "step": 2578 }, { "epoch": 0.5573805921763562, "grad_norm": 0.9017676711082458, "learning_rate": 1.7264607026971284e-05, "loss": 0.7964, "step": 2579 }, { "epoch": 0.5575967149340826, "grad_norm": 0.8994758725166321, "learning_rate": 1.7250739290401123e-05, "loss": 0.9763, "step": 2580 }, { "epoch": 0.5578128376918089, "grad_norm": 0.9548357129096985, "learning_rate": 1.7236872900962364e-05, "loss": 0.8762, "step": 2581 }, { "epoch": 0.5580289604495353, "grad_norm": 0.9338661432266235, "learning_rate": 1.7223007865449487e-05, "loss": 0.8987, "step": 2582 }, { "epoch": 0.5582450832072617, "grad_norm": 0.8923291563987732, "learning_rate": 1.7209144190656333e-05, "loss": 0.8751, "step": 2583 }, { "epoch": 0.5584612059649882, "grad_norm": 0.9313778877258301, "learning_rate": 1.7195281883376078e-05, "loss": 0.9678, "step": 2584 }, { "epoch": 0.5586773287227145, "grad_norm": 0.9116126298904419, "learning_rate": 1.7181420950401212e-05, "loss": 0.9974, "step": 2585 }, { "epoch": 0.5588934514804409, "grad_norm": 0.9923733472824097, "learning_rate": 1.7167561398523572e-05, "loss": 0.897, "step": 2586 }, { "epoch": 0.5591095742381673, "grad_norm": 0.843914270401001, "learning_rate": 1.7153703234534302e-05, "loss": 0.7292, "step": 2587 }, { "epoch": 0.5593256969958936, "grad_norm": 1.0005183219909668, "learning_rate": 1.713984646522386e-05, "loss": 0.8998, "step": 2588 }, { "epoch": 0.55954181975362, "grad_norm": 0.9981311559677124, "learning_rate": 1.712599109738204e-05, "loss": 1.0055, "step": 2589 }, { "epoch": 0.5597579425113465, "grad_norm": 0.8771159648895264, "learning_rate": 1.711213713779794e-05, "loss": 0.9573, "step": 2590 }, { "epoch": 0.5599740652690728, "grad_norm": 0.9199696779251099, "learning_rate": 1.7098284593259963e-05, "loss": 0.8015, "step": 2591 }, { "epoch": 0.5601901880267992, "grad_norm": 0.9282891750335693, "learning_rate": 1.7084433470555837e-05, "loss": 0.8832, "step": 2592 }, { "epoch": 0.5604063107845256, "grad_norm": 0.9497724175453186, "learning_rate": 1.7070583776472564e-05, "loss": 0.7778, "step": 2593 }, { "epoch": 0.560622433542252, "grad_norm": 1.0086259841918945, "learning_rate": 1.7056735517796463e-05, "loss": 0.9504, "step": 2594 }, { "epoch": 0.5608385562999784, "grad_norm": 0.954163134098053, "learning_rate": 1.704288870131316e-05, "loss": 0.9827, "step": 2595 }, { "epoch": 0.5610546790577048, "grad_norm": 0.9140467047691345, "learning_rate": 1.7029043333807556e-05, "loss": 0.8622, "step": 2596 }, { "epoch": 0.5612708018154312, "grad_norm": 0.8212366104125977, "learning_rate": 1.701519942206385e-05, "loss": 0.9051, "step": 2597 }, { "epoch": 0.5614869245731575, "grad_norm": 1.0883913040161133, "learning_rate": 1.7001356972865535e-05, "loss": 0.9649, "step": 2598 }, { "epoch": 0.5617030473308839, "grad_norm": 0.9906061887741089, "learning_rate": 1.6987515992995366e-05, "loss": 0.8934, "step": 2599 }, { "epoch": 0.5619191700886104, "grad_norm": 1.04061758518219, "learning_rate": 1.6973676489235393e-05, "loss": 0.9897, "step": 2600 }, { "epoch": 0.5621352928463367, "grad_norm": 0.956551194190979, "learning_rate": 1.6959838468366947e-05, "loss": 0.8, "step": 2601 }, { "epoch": 0.5623514156040631, "grad_norm": 1.0711612701416016, "learning_rate": 1.6946001937170625e-05, "loss": 0.9623, "step": 2602 }, { "epoch": 0.5625675383617895, "grad_norm": 0.9555321335792542, "learning_rate": 1.693216690242629e-05, "loss": 0.8434, "step": 2603 }, { "epoch": 0.5627836611195158, "grad_norm": 1.0556505918502808, "learning_rate": 1.6918333370913092e-05, "loss": 0.9153, "step": 2604 }, { "epoch": 0.5629997838772423, "grad_norm": 1.0087647438049316, "learning_rate": 1.690450134940941e-05, "loss": 0.9824, "step": 2605 }, { "epoch": 0.5632159066349687, "grad_norm": 0.9734079241752625, "learning_rate": 1.6890670844692912e-05, "loss": 0.9209, "step": 2606 }, { "epoch": 0.5634320293926951, "grad_norm": 1.0049638748168945, "learning_rate": 1.6876841863540508e-05, "loss": 0.822, "step": 2607 }, { "epoch": 0.5636481521504214, "grad_norm": 0.9042305946350098, "learning_rate": 1.6863014412728377e-05, "loss": 0.8646, "step": 2608 }, { "epoch": 0.5638642749081478, "grad_norm": 1.022499442100525, "learning_rate": 1.684918849903193e-05, "loss": 0.6836, "step": 2609 }, { "epoch": 0.5640803976658743, "grad_norm": 0.9768783450126648, "learning_rate": 1.683536412922584e-05, "loss": 0.9014, "step": 2610 }, { "epoch": 0.5642965204236006, "grad_norm": 0.957285463809967, "learning_rate": 1.6821541310084007e-05, "loss": 0.8889, "step": 2611 }, { "epoch": 0.564512643181327, "grad_norm": 1.1212570667266846, "learning_rate": 1.6807720048379577e-05, "loss": 0.9276, "step": 2612 }, { "epoch": 0.5647287659390534, "grad_norm": 0.9631536602973938, "learning_rate": 1.6793900350884956e-05, "loss": 0.9226, "step": 2613 }, { "epoch": 0.5649448886967797, "grad_norm": 1.0156223773956299, "learning_rate": 1.678008222437174e-05, "loss": 0.9919, "step": 2614 }, { "epoch": 0.5651610114545061, "grad_norm": 0.8217774629592896, "learning_rate": 1.6766265675610806e-05, "loss": 0.8019, "step": 2615 }, { "epoch": 0.5653771342122326, "grad_norm": 0.9502395391464233, "learning_rate": 1.6752450711372204e-05, "loss": 0.7396, "step": 2616 }, { "epoch": 0.565593256969959, "grad_norm": 0.866053581237793, "learning_rate": 1.673863733842525e-05, "loss": 0.9259, "step": 2617 }, { "epoch": 0.5658093797276853, "grad_norm": 0.9778143763542175, "learning_rate": 1.6724825563538455e-05, "loss": 0.926, "step": 2618 }, { "epoch": 0.5660255024854117, "grad_norm": 1.0290147066116333, "learning_rate": 1.6711015393479568e-05, "loss": 1.0948, "step": 2619 }, { "epoch": 0.5662416252431381, "grad_norm": 1.0352962017059326, "learning_rate": 1.6697206835015535e-05, "loss": 1.0491, "step": 2620 }, { "epoch": 0.5664577480008645, "grad_norm": 0.8152976036071777, "learning_rate": 1.6683399894912522e-05, "loss": 0.7761, "step": 2621 }, { "epoch": 0.5666738707585909, "grad_norm": 0.9332512617111206, "learning_rate": 1.666959457993589e-05, "loss": 0.9035, "step": 2622 }, { "epoch": 0.5668899935163173, "grad_norm": 0.8031812310218811, "learning_rate": 1.665579089685021e-05, "loss": 0.9277, "step": 2623 }, { "epoch": 0.5671061162740436, "grad_norm": 0.9118196964263916, "learning_rate": 1.6641988852419265e-05, "loss": 0.6753, "step": 2624 }, { "epoch": 0.56732223903177, "grad_norm": 0.883331835269928, "learning_rate": 1.6628188453406015e-05, "loss": 0.9446, "step": 2625 }, { "epoch": 0.5675383617894965, "grad_norm": 1.035176396369934, "learning_rate": 1.6614389706572633e-05, "loss": 0.9588, "step": 2626 }, { "epoch": 0.5677544845472228, "grad_norm": 0.9993638396263123, "learning_rate": 1.6600592618680474e-05, "loss": 0.911, "step": 2627 }, { "epoch": 0.5679706073049492, "grad_norm": 0.9066967368125916, "learning_rate": 1.658679719649007e-05, "loss": 1.0114, "step": 2628 }, { "epoch": 0.5681867300626756, "grad_norm": 1.0658564567565918, "learning_rate": 1.657300344676114e-05, "loss": 1.0594, "step": 2629 }, { "epoch": 0.568402852820402, "grad_norm": 1.0333069562911987, "learning_rate": 1.6559211376252607e-05, "loss": 0.8774, "step": 2630 }, { "epoch": 0.5686189755781283, "grad_norm": 0.9193286895751953, "learning_rate": 1.6545420991722543e-05, "loss": 0.8313, "step": 2631 }, { "epoch": 0.5688350983358548, "grad_norm": 1.1600608825683594, "learning_rate": 1.6531632299928207e-05, "loss": 0.8811, "step": 2632 }, { "epoch": 0.5690512210935812, "grad_norm": 0.9152405261993408, "learning_rate": 1.6517845307626035e-05, "loss": 0.9077, "step": 2633 }, { "epoch": 0.5692673438513075, "grad_norm": 0.9811593890190125, "learning_rate": 1.6504060021571602e-05, "loss": 1.1066, "step": 2634 }, { "epoch": 0.5694834666090339, "grad_norm": 0.9509673118591309, "learning_rate": 1.649027644851968e-05, "loss": 1.0162, "step": 2635 }, { "epoch": 0.5696995893667604, "grad_norm": 1.0188031196594238, "learning_rate": 1.6476494595224185e-05, "loss": 0.96, "step": 2636 }, { "epoch": 0.5699157121244867, "grad_norm": 0.9113590717315674, "learning_rate": 1.646271446843819e-05, "loss": 0.8967, "step": 2637 }, { "epoch": 0.5701318348822131, "grad_norm": 1.0003587007522583, "learning_rate": 1.6448936074913938e-05, "loss": 1.0857, "step": 2638 }, { "epoch": 0.5703479576399395, "grad_norm": 1.0504573583602905, "learning_rate": 1.6435159421402797e-05, "loss": 0.9088, "step": 2639 }, { "epoch": 0.5705640803976659, "grad_norm": 0.8997858166694641, "learning_rate": 1.6421384514655296e-05, "loss": 0.8765, "step": 2640 }, { "epoch": 0.5707802031553922, "grad_norm": 0.776848316192627, "learning_rate": 1.6407611361421107e-05, "loss": 0.8643, "step": 2641 }, { "epoch": 0.5709963259131187, "grad_norm": 0.9078662991523743, "learning_rate": 1.639383996844905e-05, "loss": 0.7983, "step": 2642 }, { "epoch": 0.5712124486708451, "grad_norm": 1.1863467693328857, "learning_rate": 1.638007034248707e-05, "loss": 0.9971, "step": 2643 }, { "epoch": 0.5714285714285714, "grad_norm": 0.9968357682228088, "learning_rate": 1.6366302490282265e-05, "loss": 0.9071, "step": 2644 }, { "epoch": 0.5716446941862978, "grad_norm": 1.0731886625289917, "learning_rate": 1.6352536418580828e-05, "loss": 0.9942, "step": 2645 }, { "epoch": 0.5718608169440242, "grad_norm": 0.9937326312065125, "learning_rate": 1.6338772134128115e-05, "loss": 0.8896, "step": 2646 }, { "epoch": 0.5720769397017506, "grad_norm": 1.042829155921936, "learning_rate": 1.6325009643668592e-05, "loss": 0.7685, "step": 2647 }, { "epoch": 0.572293062459477, "grad_norm": 0.8667680621147156, "learning_rate": 1.6311248953945854e-05, "loss": 0.9037, "step": 2648 }, { "epoch": 0.5725091852172034, "grad_norm": 0.873292088508606, "learning_rate": 1.62974900717026e-05, "loss": 0.8839, "step": 2649 }, { "epoch": 0.5727253079749297, "grad_norm": 0.9799197912216187, "learning_rate": 1.6283733003680655e-05, "loss": 0.9105, "step": 2650 }, { "epoch": 0.5729414307326561, "grad_norm": 0.9652045965194702, "learning_rate": 1.6269977756620944e-05, "loss": 0.8397, "step": 2651 }, { "epoch": 0.5731575534903826, "grad_norm": 0.9093625545501709, "learning_rate": 1.6256224337263503e-05, "loss": 0.7846, "step": 2652 }, { "epoch": 0.573373676248109, "grad_norm": 0.8416289687156677, "learning_rate": 1.624247275234749e-05, "loss": 0.8236, "step": 2653 }, { "epoch": 0.5735897990058353, "grad_norm": 0.8340027332305908, "learning_rate": 1.6228723008611136e-05, "loss": 0.7617, "step": 2654 }, { "epoch": 0.5738059217635617, "grad_norm": 0.9418075084686279, "learning_rate": 1.6214975112791803e-05, "loss": 1.0724, "step": 2655 }, { "epoch": 0.5740220445212881, "grad_norm": 0.9534636735916138, "learning_rate": 1.6201229071625905e-05, "loss": 0.8903, "step": 2656 }, { "epoch": 0.5742381672790144, "grad_norm": 0.9745770692825317, "learning_rate": 1.6187484891848983e-05, "loss": 0.8336, "step": 2657 }, { "epoch": 0.5744542900367409, "grad_norm": 0.9617936015129089, "learning_rate": 1.6173742580195643e-05, "loss": 0.982, "step": 2658 }, { "epoch": 0.5746704127944673, "grad_norm": 0.9466758966445923, "learning_rate": 1.61600021433996e-05, "loss": 0.8253, "step": 2659 }, { "epoch": 0.5748865355521936, "grad_norm": 0.9294735789299011, "learning_rate": 1.614626358819363e-05, "loss": 0.8607, "step": 2660 }, { "epoch": 0.57510265830992, "grad_norm": 0.8792514204978943, "learning_rate": 1.6132526921309598e-05, "loss": 0.9271, "step": 2661 }, { "epoch": 0.5753187810676464, "grad_norm": 0.9135647416114807, "learning_rate": 1.6118792149478432e-05, "loss": 0.8542, "step": 2662 }, { "epoch": 0.5755349038253728, "grad_norm": 0.7765149474143982, "learning_rate": 1.6105059279430132e-05, "loss": 0.902, "step": 2663 }, { "epoch": 0.5757510265830992, "grad_norm": 0.978321373462677, "learning_rate": 1.6091328317893792e-05, "loss": 1.0733, "step": 2664 }, { "epoch": 0.5759671493408256, "grad_norm": 0.9727914333343506, "learning_rate": 1.607759927159753e-05, "loss": 0.907, "step": 2665 }, { "epoch": 0.576183272098552, "grad_norm": 0.9169484376907349, "learning_rate": 1.6063872147268564e-05, "loss": 0.9775, "step": 2666 }, { "epoch": 0.5763993948562783, "grad_norm": 0.9502250552177429, "learning_rate": 1.605014695163315e-05, "loss": 0.8968, "step": 2667 }, { "epoch": 0.5766155176140048, "grad_norm": 1.0350382328033447, "learning_rate": 1.6036423691416597e-05, "loss": 0.9144, "step": 2668 }, { "epoch": 0.5768316403717312, "grad_norm": 0.9713992476463318, "learning_rate": 1.6022702373343274e-05, "loss": 0.9253, "step": 2669 }, { "epoch": 0.5770477631294575, "grad_norm": 0.9588881731033325, "learning_rate": 1.6008983004136586e-05, "loss": 0.8175, "step": 2670 }, { "epoch": 0.5772638858871839, "grad_norm": 0.878943145275116, "learning_rate": 1.5995265590519007e-05, "loss": 0.9627, "step": 2671 }, { "epoch": 0.5774800086449103, "grad_norm": 0.9545088410377502, "learning_rate": 1.5981550139212023e-05, "loss": 1.0463, "step": 2672 }, { "epoch": 0.5776961314026366, "grad_norm": 0.9589905738830566, "learning_rate": 1.5967836656936197e-05, "loss": 0.8573, "step": 2673 }, { "epoch": 0.5779122541603631, "grad_norm": 1.0416665077209473, "learning_rate": 1.5954125150411078e-05, "loss": 1.0135, "step": 2674 }, { "epoch": 0.5781283769180895, "grad_norm": 1.038084864616394, "learning_rate": 1.5940415626355282e-05, "loss": 0.9184, "step": 2675 }, { "epoch": 0.5783444996758159, "grad_norm": 0.9264064431190491, "learning_rate": 1.5926708091486443e-05, "loss": 0.9914, "step": 2676 }, { "epoch": 0.5785606224335422, "grad_norm": 0.9196184873580933, "learning_rate": 1.5913002552521225e-05, "loss": 0.7685, "step": 2677 }, { "epoch": 0.5787767451912686, "grad_norm": 0.8804293870925903, "learning_rate": 1.5899299016175317e-05, "loss": 0.8003, "step": 2678 }, { "epoch": 0.5789928679489951, "grad_norm": 0.8991097807884216, "learning_rate": 1.5885597489163405e-05, "loss": 0.923, "step": 2679 }, { "epoch": 0.5792089907067214, "grad_norm": 0.8430712223052979, "learning_rate": 1.5871897978199213e-05, "loss": 0.9133, "step": 2680 }, { "epoch": 0.5794251134644478, "grad_norm": 1.0993525981903076, "learning_rate": 1.585820048999546e-05, "loss": 0.9837, "step": 2681 }, { "epoch": 0.5796412362221742, "grad_norm": 0.8528681993484497, "learning_rate": 1.5844505031263902e-05, "loss": 0.8822, "step": 2682 }, { "epoch": 0.5798573589799005, "grad_norm": 0.8468205332756042, "learning_rate": 1.5830811608715265e-05, "loss": 0.7528, "step": 2683 }, { "epoch": 0.580073481737627, "grad_norm": 0.8934227824211121, "learning_rate": 1.5817120229059318e-05, "loss": 0.9948, "step": 2684 }, { "epoch": 0.5802896044953534, "grad_norm": 0.8171609044075012, "learning_rate": 1.5803430899004775e-05, "loss": 0.9341, "step": 2685 }, { "epoch": 0.5805057272530797, "grad_norm": 0.9663010239601135, "learning_rate": 1.5789743625259396e-05, "loss": 0.8579, "step": 2686 }, { "epoch": 0.5807218500108061, "grad_norm": 1.067415714263916, "learning_rate": 1.5776058414529903e-05, "loss": 0.9969, "step": 2687 }, { "epoch": 0.5809379727685325, "grad_norm": 1.0298633575439453, "learning_rate": 1.5762375273522024e-05, "loss": 1.0491, "step": 2688 }, { "epoch": 0.581154095526259, "grad_norm": 0.8125079274177551, "learning_rate": 1.5748694208940467e-05, "loss": 0.8395, "step": 2689 }, { "epoch": 0.5813702182839853, "grad_norm": 0.9669515490531921, "learning_rate": 1.5735015227488925e-05, "loss": 0.7882, "step": 2690 }, { "epoch": 0.5815863410417117, "grad_norm": 0.9027782082557678, "learning_rate": 1.5721338335870057e-05, "loss": 0.8252, "step": 2691 }, { "epoch": 0.5818024637994381, "grad_norm": 1.0070239305496216, "learning_rate": 1.570766354078551e-05, "loss": 0.9992, "step": 2692 }, { "epoch": 0.5820185865571644, "grad_norm": 0.8885738253593445, "learning_rate": 1.569399084893591e-05, "loss": 0.8881, "step": 2693 }, { "epoch": 0.5822347093148909, "grad_norm": 0.9686094522476196, "learning_rate": 1.5680320267020836e-05, "loss": 0.9656, "step": 2694 }, { "epoch": 0.5824508320726173, "grad_norm": 0.9403521418571472, "learning_rate": 1.5666651801738856e-05, "loss": 0.9953, "step": 2695 }, { "epoch": 0.5826669548303436, "grad_norm": 1.0152837038040161, "learning_rate": 1.5652985459787464e-05, "loss": 0.9059, "step": 2696 }, { "epoch": 0.58288307758807, "grad_norm": 0.8296104073524475, "learning_rate": 1.5639321247863154e-05, "loss": 0.8858, "step": 2697 }, { "epoch": 0.5830992003457964, "grad_norm": 1.0700321197509766, "learning_rate": 1.562565917266135e-05, "loss": 0.9993, "step": 2698 }, { "epoch": 0.5833153231035229, "grad_norm": 0.8812270164489746, "learning_rate": 1.5611999240876437e-05, "loss": 0.9261, "step": 2699 }, { "epoch": 0.5835314458612492, "grad_norm": 0.9802578687667847, "learning_rate": 1.5598341459201756e-05, "loss": 0.9699, "step": 2700 }, { "epoch": 0.5837475686189756, "grad_norm": 0.9958237409591675, "learning_rate": 1.558468583432959e-05, "loss": 1.0226, "step": 2701 }, { "epoch": 0.583963691376702, "grad_norm": 0.8533070087432861, "learning_rate": 1.5571032372951153e-05, "loss": 0.7393, "step": 2702 }, { "epoch": 0.5841798141344283, "grad_norm": 0.9260321855545044, "learning_rate": 1.555738108175661e-05, "loss": 0.7341, "step": 2703 }, { "epoch": 0.5843959368921547, "grad_norm": 1.0298815965652466, "learning_rate": 1.554373196743507e-05, "loss": 0.9243, "step": 2704 }, { "epoch": 0.5846120596498812, "grad_norm": 0.9426895976066589, "learning_rate": 1.5530085036674563e-05, "loss": 0.9307, "step": 2705 }, { "epoch": 0.5848281824076075, "grad_norm": 0.9806160926818848, "learning_rate": 1.551644029616206e-05, "loss": 1.0719, "step": 2706 }, { "epoch": 0.5850443051653339, "grad_norm": 1.058203101158142, "learning_rate": 1.550279775258345e-05, "loss": 0.9036, "step": 2707 }, { "epoch": 0.5852604279230603, "grad_norm": 0.9290843605995178, "learning_rate": 1.5489157412623538e-05, "loss": 0.8886, "step": 2708 }, { "epoch": 0.5854765506807866, "grad_norm": 1.087302803993225, "learning_rate": 1.547551928296607e-05, "loss": 1.0837, "step": 2709 }, { "epoch": 0.5856926734385131, "grad_norm": 1.1101616621017456, "learning_rate": 1.5461883370293692e-05, "loss": 1.0016, "step": 2710 }, { "epoch": 0.5859087961962395, "grad_norm": 0.8973838686943054, "learning_rate": 1.5448249681287972e-05, "loss": 1.0268, "step": 2711 }, { "epoch": 0.5861249189539659, "grad_norm": 0.9331724047660828, "learning_rate": 1.5434618222629382e-05, "loss": 0.8761, "step": 2712 }, { "epoch": 0.5863410417116922, "grad_norm": 1.032333254814148, "learning_rate": 1.5420989000997324e-05, "loss": 0.9178, "step": 2713 }, { "epoch": 0.5865571644694186, "grad_norm": 0.8464323282241821, "learning_rate": 1.5407362023070057e-05, "loss": 0.782, "step": 2714 }, { "epoch": 0.5867732872271451, "grad_norm": 0.9692414999008179, "learning_rate": 1.539373729552479e-05, "loss": 0.8567, "step": 2715 }, { "epoch": 0.5869894099848714, "grad_norm": 0.8548213243484497, "learning_rate": 1.538011482503759e-05, "loss": 0.8791, "step": 2716 }, { "epoch": 0.5872055327425978, "grad_norm": 1.0426594018936157, "learning_rate": 1.5366494618283453e-05, "loss": 0.8803, "step": 2717 }, { "epoch": 0.5874216555003242, "grad_norm": 0.8966501355171204, "learning_rate": 1.5352876681936245e-05, "loss": 0.8331, "step": 2718 }, { "epoch": 0.5876377782580505, "grad_norm": 0.9452791810035706, "learning_rate": 1.5339261022668717e-05, "loss": 0.7207, "step": 2719 }, { "epoch": 0.587853901015777, "grad_norm": 1.0118849277496338, "learning_rate": 1.5325647647152514e-05, "loss": 0.8975, "step": 2720 }, { "epoch": 0.5880700237735034, "grad_norm": 0.9325366616249084, "learning_rate": 1.5312036562058152e-05, "loss": 1.0635, "step": 2721 }, { "epoch": 0.5882861465312298, "grad_norm": 0.9577626585960388, "learning_rate": 1.5298427774055045e-05, "loss": 0.7438, "step": 2722 }, { "epoch": 0.5885022692889561, "grad_norm": 0.9435785412788391, "learning_rate": 1.5284821289811453e-05, "loss": 0.9906, "step": 2723 }, { "epoch": 0.5887183920466825, "grad_norm": 0.863259494304657, "learning_rate": 1.5271217115994542e-05, "loss": 0.8271, "step": 2724 }, { "epoch": 0.588934514804409, "grad_norm": 0.9923564791679382, "learning_rate": 1.5257615259270302e-05, "loss": 0.9293, "step": 2725 }, { "epoch": 0.5891506375621353, "grad_norm": 0.9756587743759155, "learning_rate": 1.5244015726303626e-05, "loss": 0.9125, "step": 2726 }, { "epoch": 0.5893667603198617, "grad_norm": 0.8018559813499451, "learning_rate": 1.523041852375825e-05, "loss": 0.8673, "step": 2727 }, { "epoch": 0.5895828830775881, "grad_norm": 1.043648600578308, "learning_rate": 1.5216823658296767e-05, "loss": 0.8729, "step": 2728 }, { "epoch": 0.5897990058353144, "grad_norm": 0.9324607253074646, "learning_rate": 1.520323113658064e-05, "loss": 0.8696, "step": 2729 }, { "epoch": 0.5900151285930408, "grad_norm": 0.9599418640136719, "learning_rate": 1.518964096527017e-05, "loss": 0.8883, "step": 2730 }, { "epoch": 0.5902312513507673, "grad_norm": 0.919252336025238, "learning_rate": 1.51760531510245e-05, "loss": 0.8387, "step": 2731 }, { "epoch": 0.5904473741084936, "grad_norm": 0.9479200839996338, "learning_rate": 1.5162467700501635e-05, "loss": 0.9632, "step": 2732 }, { "epoch": 0.59066349686622, "grad_norm": 1.010833501815796, "learning_rate": 1.5148884620358417e-05, "loss": 0.7722, "step": 2733 }, { "epoch": 0.5908796196239464, "grad_norm": 0.9122352004051208, "learning_rate": 1.5135303917250517e-05, "loss": 0.8343, "step": 2734 }, { "epoch": 0.5910957423816728, "grad_norm": 1.0183535814285278, "learning_rate": 1.5121725597832457e-05, "loss": 0.772, "step": 2735 }, { "epoch": 0.5913118651393992, "grad_norm": 1.1082161664962769, "learning_rate": 1.5108149668757578e-05, "loss": 1.0553, "step": 2736 }, { "epoch": 0.5915279878971256, "grad_norm": 0.9422979950904846, "learning_rate": 1.5094576136678052e-05, "loss": 0.986, "step": 2737 }, { "epoch": 0.591744110654852, "grad_norm": 0.9901473522186279, "learning_rate": 1.5081005008244879e-05, "loss": 1.1901, "step": 2738 }, { "epoch": 0.5919602334125783, "grad_norm": 1.0788806676864624, "learning_rate": 1.5067436290107876e-05, "loss": 0.9779, "step": 2739 }, { "epoch": 0.5921763561703047, "grad_norm": 0.9207198619842529, "learning_rate": 1.5053869988915691e-05, "loss": 0.8134, "step": 2740 }, { "epoch": 0.5923924789280312, "grad_norm": 1.2359508275985718, "learning_rate": 1.5040306111315783e-05, "loss": 1.0705, "step": 2741 }, { "epoch": 0.5926086016857575, "grad_norm": 0.9359343647956848, "learning_rate": 1.5026744663954411e-05, "loss": 0.8531, "step": 2742 }, { "epoch": 0.5928247244434839, "grad_norm": 1.0426597595214844, "learning_rate": 1.5013185653476653e-05, "loss": 0.8466, "step": 2743 }, { "epoch": 0.5930408472012103, "grad_norm": 0.9316443204879761, "learning_rate": 1.4999629086526403e-05, "loss": 0.9299, "step": 2744 }, { "epoch": 0.5932569699589367, "grad_norm": 0.8866399526596069, "learning_rate": 1.4986074969746333e-05, "loss": 0.8621, "step": 2745 }, { "epoch": 0.593473092716663, "grad_norm": 0.9012348651885986, "learning_rate": 1.4972523309777947e-05, "loss": 0.9353, "step": 2746 }, { "epoch": 0.5936892154743895, "grad_norm": 0.8727730512619019, "learning_rate": 1.4958974113261518e-05, "loss": 0.709, "step": 2747 }, { "epoch": 0.5939053382321159, "grad_norm": 0.9378992915153503, "learning_rate": 1.4945427386836118e-05, "loss": 0.8303, "step": 2748 }, { "epoch": 0.5941214609898422, "grad_norm": 0.9986936450004578, "learning_rate": 1.4931883137139612e-05, "loss": 0.9444, "step": 2749 }, { "epoch": 0.5943375837475686, "grad_norm": 1.0609228610992432, "learning_rate": 1.4918341370808649e-05, "loss": 1.0354, "step": 2750 }, { "epoch": 0.594553706505295, "grad_norm": 0.9177163243293762, "learning_rate": 1.4904802094478672e-05, "loss": 0.8406, "step": 2751 }, { "epoch": 0.5947698292630214, "grad_norm": 0.8395483493804932, "learning_rate": 1.4891265314783888e-05, "loss": 0.8217, "step": 2752 }, { "epoch": 0.5949859520207478, "grad_norm": 0.9909489154815674, "learning_rate": 1.4877731038357299e-05, "loss": 0.9599, "step": 2753 }, { "epoch": 0.5952020747784742, "grad_norm": 0.9902933239936829, "learning_rate": 1.4864199271830648e-05, "loss": 1.0263, "step": 2754 }, { "epoch": 0.5954181975362005, "grad_norm": 1.1740868091583252, "learning_rate": 1.4850670021834488e-05, "loss": 1.0437, "step": 2755 }, { "epoch": 0.5956343202939269, "grad_norm": 0.9129568934440613, "learning_rate": 1.4837143294998113e-05, "loss": 0.8702, "step": 2756 }, { "epoch": 0.5958504430516534, "grad_norm": 1.0828653573989868, "learning_rate": 1.4823619097949584e-05, "loss": 0.7669, "step": 2757 }, { "epoch": 0.5960665658093798, "grad_norm": 0.9232279062271118, "learning_rate": 1.481009743731574e-05, "loss": 0.7958, "step": 2758 }, { "epoch": 0.5962826885671061, "grad_norm": 0.910819947719574, "learning_rate": 1.479657831972215e-05, "loss": 0.9502, "step": 2759 }, { "epoch": 0.5964988113248325, "grad_norm": 0.8901724815368652, "learning_rate": 1.4783061751793155e-05, "loss": 0.8451, "step": 2760 }, { "epoch": 0.5967149340825589, "grad_norm": 0.8953905701637268, "learning_rate": 1.4769547740151838e-05, "loss": 0.9198, "step": 2761 }, { "epoch": 0.5969310568402852, "grad_norm": 0.9130937457084656, "learning_rate": 1.475603629142004e-05, "loss": 0.7836, "step": 2762 }, { "epoch": 0.5971471795980117, "grad_norm": 0.9950690865516663, "learning_rate": 1.4742527412218332e-05, "loss": 0.9808, "step": 2763 }, { "epoch": 0.5973633023557381, "grad_norm": 1.0196300745010376, "learning_rate": 1.4729021109166053e-05, "loss": 0.7786, "step": 2764 }, { "epoch": 0.5975794251134644, "grad_norm": 0.91399085521698, "learning_rate": 1.4715517388881234e-05, "loss": 0.8409, "step": 2765 }, { "epoch": 0.5977955478711908, "grad_norm": 1.1146055459976196, "learning_rate": 1.470201625798068e-05, "loss": 0.7907, "step": 2766 }, { "epoch": 0.5980116706289172, "grad_norm": 0.9721848368644714, "learning_rate": 1.4688517723079914e-05, "loss": 0.8861, "step": 2767 }, { "epoch": 0.5982277933866436, "grad_norm": 0.876146674156189, "learning_rate": 1.4675021790793182e-05, "loss": 0.9064, "step": 2768 }, { "epoch": 0.59844391614437, "grad_norm": 0.8468629121780396, "learning_rate": 1.4661528467733465e-05, "loss": 1.0736, "step": 2769 }, { "epoch": 0.5986600389020964, "grad_norm": 1.004036545753479, "learning_rate": 1.4648037760512464e-05, "loss": 0.8996, "step": 2770 }, { "epoch": 0.5988761616598228, "grad_norm": 1.0619404315948486, "learning_rate": 1.4634549675740584e-05, "loss": 1.1611, "step": 2771 }, { "epoch": 0.5990922844175491, "grad_norm": 0.9154078960418701, "learning_rate": 1.4621064220026955e-05, "loss": 0.8513, "step": 2772 }, { "epoch": 0.5993084071752756, "grad_norm": 0.9970297813415527, "learning_rate": 1.4607581399979427e-05, "loss": 0.8516, "step": 2773 }, { "epoch": 0.599524529933002, "grad_norm": 1.0168700218200684, "learning_rate": 1.4594101222204544e-05, "loss": 0.8145, "step": 2774 }, { "epoch": 0.5997406526907283, "grad_norm": 0.8243957161903381, "learning_rate": 1.4580623693307572e-05, "loss": 0.7947, "step": 2775 }, { "epoch": 0.5999567754484547, "grad_norm": 0.9932655096054077, "learning_rate": 1.4567148819892464e-05, "loss": 0.8704, "step": 2776 }, { "epoch": 0.6001728982061811, "grad_norm": 1.0828499794006348, "learning_rate": 1.4553676608561872e-05, "loss": 1.0422, "step": 2777 }, { "epoch": 0.6003890209639074, "grad_norm": 1.019808292388916, "learning_rate": 1.4540207065917152e-05, "loss": 0.9274, "step": 2778 }, { "epoch": 0.6006051437216339, "grad_norm": 0.9077752828598022, "learning_rate": 1.4526740198558345e-05, "loss": 0.9341, "step": 2779 }, { "epoch": 0.6008212664793603, "grad_norm": 0.9558945894241333, "learning_rate": 1.451327601308419e-05, "loss": 0.937, "step": 2780 }, { "epoch": 0.6010373892370867, "grad_norm": 1.0341992378234863, "learning_rate": 1.4499814516092113e-05, "loss": 0.8255, "step": 2781 }, { "epoch": 0.601253511994813, "grad_norm": 0.9370862245559692, "learning_rate": 1.44863557141782e-05, "loss": 1.0031, "step": 2782 }, { "epoch": 0.6014696347525395, "grad_norm": 0.9899297952651978, "learning_rate": 1.447289961393724e-05, "loss": 0.9807, "step": 2783 }, { "epoch": 0.6016857575102659, "grad_norm": 1.0725080966949463, "learning_rate": 1.4459446221962697e-05, "loss": 1.0988, "step": 2784 }, { "epoch": 0.6019018802679922, "grad_norm": 0.9283778071403503, "learning_rate": 1.4445995544846694e-05, "loss": 0.9264, "step": 2785 }, { "epoch": 0.6021180030257186, "grad_norm": 0.9730119109153748, "learning_rate": 1.4432547589180033e-05, "loss": 0.7145, "step": 2786 }, { "epoch": 0.602334125783445, "grad_norm": 0.9832450747489929, "learning_rate": 1.4419102361552191e-05, "loss": 0.8735, "step": 2787 }, { "epoch": 0.6025502485411713, "grad_norm": 0.8619163632392883, "learning_rate": 1.4405659868551287e-05, "loss": 0.7491, "step": 2788 }, { "epoch": 0.6027663712988978, "grad_norm": 0.9204447269439697, "learning_rate": 1.4392220116764116e-05, "loss": 0.8984, "step": 2789 }, { "epoch": 0.6029824940566242, "grad_norm": 1.0223321914672852, "learning_rate": 1.4378783112776119e-05, "loss": 0.9145, "step": 2790 }, { "epoch": 0.6031986168143505, "grad_norm": 0.9550166130065918, "learning_rate": 1.4365348863171406e-05, "loss": 0.8271, "step": 2791 }, { "epoch": 0.6034147395720769, "grad_norm": 0.9865041971206665, "learning_rate": 1.4351917374532725e-05, "loss": 0.9244, "step": 2792 }, { "epoch": 0.6036308623298033, "grad_norm": 0.9395430088043213, "learning_rate": 1.4338488653441482e-05, "loss": 0.8319, "step": 2793 }, { "epoch": 0.6038469850875298, "grad_norm": 0.9528705477714539, "learning_rate": 1.43250627064777e-05, "loss": 0.9107, "step": 2794 }, { "epoch": 0.6040631078452561, "grad_norm": 0.9629172682762146, "learning_rate": 1.4311639540220075e-05, "loss": 0.8418, "step": 2795 }, { "epoch": 0.6042792306029825, "grad_norm": 0.8943168520927429, "learning_rate": 1.4298219161245927e-05, "loss": 0.8928, "step": 2796 }, { "epoch": 0.6044953533607089, "grad_norm": 0.8976085186004639, "learning_rate": 1.428480157613121e-05, "loss": 1.0248, "step": 2797 }, { "epoch": 0.6047114761184352, "grad_norm": 0.9356058239936829, "learning_rate": 1.4271386791450508e-05, "loss": 1.0223, "step": 2798 }, { "epoch": 0.6049275988761617, "grad_norm": 0.9700066447257996, "learning_rate": 1.425797481377704e-05, "loss": 0.8128, "step": 2799 }, { "epoch": 0.6051437216338881, "grad_norm": 1.0174139738082886, "learning_rate": 1.4244565649682636e-05, "loss": 0.9016, "step": 2800 }, { "epoch": 0.6053598443916144, "grad_norm": 1.014001727104187, "learning_rate": 1.4231159305737757e-05, "loss": 0.9467, "step": 2801 }, { "epoch": 0.6055759671493408, "grad_norm": 0.8730211853981018, "learning_rate": 1.4217755788511485e-05, "loss": 0.8795, "step": 2802 }, { "epoch": 0.6057920899070672, "grad_norm": 0.8965271711349487, "learning_rate": 1.4204355104571506e-05, "loss": 0.8219, "step": 2803 }, { "epoch": 0.6060082126647937, "grad_norm": 0.9582975506782532, "learning_rate": 1.419095726048414e-05, "loss": 0.8053, "step": 2804 }, { "epoch": 0.60622433542252, "grad_norm": 1.272420883178711, "learning_rate": 1.4177562262814277e-05, "loss": 0.8486, "step": 2805 }, { "epoch": 0.6064404581802464, "grad_norm": 1.0263395309448242, "learning_rate": 1.4164170118125448e-05, "loss": 0.9882, "step": 2806 }, { "epoch": 0.6066565809379728, "grad_norm": 1.087254524230957, "learning_rate": 1.415078083297977e-05, "loss": 1.0561, "step": 2807 }, { "epoch": 0.6068727036956991, "grad_norm": 0.9583911299705505, "learning_rate": 1.4137394413937959e-05, "loss": 1.1197, "step": 2808 }, { "epoch": 0.6070888264534255, "grad_norm": 0.8234987854957581, "learning_rate": 1.4124010867559339e-05, "loss": 0.8728, "step": 2809 }, { "epoch": 0.607304949211152, "grad_norm": 0.9210377931594849, "learning_rate": 1.411063020040181e-05, "loss": 0.8823, "step": 2810 }, { "epoch": 0.6075210719688783, "grad_norm": 0.9682777523994446, "learning_rate": 1.4097252419021871e-05, "loss": 0.8733, "step": 2811 }, { "epoch": 0.6077371947266047, "grad_norm": 0.9390709400177002, "learning_rate": 1.4083877529974594e-05, "loss": 1.0549, "step": 2812 }, { "epoch": 0.6079533174843311, "grad_norm": 0.9637075662612915, "learning_rate": 1.4070505539813654e-05, "loss": 0.8418, "step": 2813 }, { "epoch": 0.6081694402420574, "grad_norm": 1.1636171340942383, "learning_rate": 1.4057136455091293e-05, "loss": 0.901, "step": 2814 }, { "epoch": 0.6083855629997839, "grad_norm": 0.9594619870185852, "learning_rate": 1.4043770282358332e-05, "loss": 1.0105, "step": 2815 }, { "epoch": 0.6086016857575103, "grad_norm": 0.9706513285636902, "learning_rate": 1.4030407028164165e-05, "loss": 0.9269, "step": 2816 }, { "epoch": 0.6088178085152367, "grad_norm": 1.107113242149353, "learning_rate": 1.4017046699056753e-05, "loss": 1.0875, "step": 2817 }, { "epoch": 0.609033931272963, "grad_norm": 0.9278103113174438, "learning_rate": 1.4003689301582628e-05, "loss": 0.9864, "step": 2818 }, { "epoch": 0.6092500540306894, "grad_norm": 0.8669817447662354, "learning_rate": 1.3990334842286881e-05, "loss": 0.9355, "step": 2819 }, { "epoch": 0.6094661767884159, "grad_norm": 1.051826000213623, "learning_rate": 1.3976983327713172e-05, "loss": 1.1648, "step": 2820 }, { "epoch": 0.6096822995461422, "grad_norm": 1.1233115196228027, "learning_rate": 1.3963634764403714e-05, "loss": 1.042, "step": 2821 }, { "epoch": 0.6098984223038686, "grad_norm": 0.9370507597923279, "learning_rate": 1.3950289158899262e-05, "loss": 0.7103, "step": 2822 }, { "epoch": 0.610114545061595, "grad_norm": 1.1144932508468628, "learning_rate": 1.3936946517739132e-05, "loss": 0.911, "step": 2823 }, { "epoch": 0.6103306678193213, "grad_norm": 1.068408727645874, "learning_rate": 1.39236068474612e-05, "loss": 0.9533, "step": 2824 }, { "epoch": 0.6105467905770477, "grad_norm": 1.0667728185653687, "learning_rate": 1.3910270154601864e-05, "loss": 0.9732, "step": 2825 }, { "epoch": 0.6107629133347742, "grad_norm": 1.15886390209198, "learning_rate": 1.389693644569607e-05, "loss": 1.1132, "step": 2826 }, { "epoch": 0.6109790360925006, "grad_norm": 0.9730468392372131, "learning_rate": 1.3883605727277319e-05, "loss": 0.893, "step": 2827 }, { "epoch": 0.6111951588502269, "grad_norm": 0.945916473865509, "learning_rate": 1.3870278005877617e-05, "loss": 0.8653, "step": 2828 }, { "epoch": 0.6114112816079533, "grad_norm": 1.081801414489746, "learning_rate": 1.3856953288027524e-05, "loss": 1.001, "step": 2829 }, { "epoch": 0.6116274043656797, "grad_norm": 0.8034911155700684, "learning_rate": 1.3843631580256114e-05, "loss": 0.7614, "step": 2830 }, { "epoch": 0.6118435271234061, "grad_norm": 0.9153169393539429, "learning_rate": 1.3830312889091003e-05, "loss": 0.908, "step": 2831 }, { "epoch": 0.6120596498811325, "grad_norm": 0.9824167490005493, "learning_rate": 1.381699722105831e-05, "loss": 0.9988, "step": 2832 }, { "epoch": 0.6122757726388589, "grad_norm": 1.022257685661316, "learning_rate": 1.3803684582682697e-05, "loss": 0.988, "step": 2833 }, { "epoch": 0.6124918953965852, "grad_norm": 0.7998851537704468, "learning_rate": 1.3790374980487303e-05, "loss": 0.7413, "step": 2834 }, { "epoch": 0.6127080181543116, "grad_norm": 0.877083420753479, "learning_rate": 1.3777068420993818e-05, "loss": 0.912, "step": 2835 }, { "epoch": 0.6129241409120381, "grad_norm": 0.9918305277824402, "learning_rate": 1.3763764910722422e-05, "loss": 0.9743, "step": 2836 }, { "epoch": 0.6131402636697644, "grad_norm": 0.837921142578125, "learning_rate": 1.37504644561918e-05, "loss": 0.8085, "step": 2837 }, { "epoch": 0.6133563864274908, "grad_norm": 0.8603920936584473, "learning_rate": 1.3737167063919148e-05, "loss": 1.0102, "step": 2838 }, { "epoch": 0.6135725091852172, "grad_norm": 1.0279122591018677, "learning_rate": 1.3723872740420165e-05, "loss": 1.1155, "step": 2839 }, { "epoch": 0.6137886319429436, "grad_norm": 0.9947484135627747, "learning_rate": 1.3710581492209025e-05, "loss": 0.8671, "step": 2840 }, { "epoch": 0.61400475470067, "grad_norm": 0.9215602874755859, "learning_rate": 1.369729332579841e-05, "loss": 0.9884, "step": 2841 }, { "epoch": 0.6142208774583964, "grad_norm": 0.9822049140930176, "learning_rate": 1.3684008247699505e-05, "loss": 0.9937, "step": 2842 }, { "epoch": 0.6144370002161228, "grad_norm": 0.9134843945503235, "learning_rate": 1.3670726264421954e-05, "loss": 1.0323, "step": 2843 }, { "epoch": 0.6146531229738491, "grad_norm": 1.0407973527908325, "learning_rate": 1.3657447382473905e-05, "loss": 0.9765, "step": 2844 }, { "epoch": 0.6148692457315755, "grad_norm": 0.8549898266792297, "learning_rate": 1.364417160836197e-05, "loss": 0.8894, "step": 2845 }, { "epoch": 0.615085368489302, "grad_norm": 1.0176432132720947, "learning_rate": 1.363089894859126e-05, "loss": 0.8621, "step": 2846 }, { "epoch": 0.6153014912470283, "grad_norm": 0.8429177403450012, "learning_rate": 1.3617629409665338e-05, "loss": 0.7707, "step": 2847 }, { "epoch": 0.6155176140047547, "grad_norm": 1.0289604663848877, "learning_rate": 1.3604362998086251e-05, "loss": 0.9811, "step": 2848 }, { "epoch": 0.6157337367624811, "grad_norm": 1.0498583316802979, "learning_rate": 1.3591099720354515e-05, "loss": 0.8922, "step": 2849 }, { "epoch": 0.6159498595202075, "grad_norm": 0.9106667637825012, "learning_rate": 1.3577839582969104e-05, "loss": 0.9103, "step": 2850 }, { "epoch": 0.6161659822779338, "grad_norm": 1.018824815750122, "learning_rate": 1.3564582592427445e-05, "loss": 0.8685, "step": 2851 }, { "epoch": 0.6163821050356603, "grad_norm": 0.886332094669342, "learning_rate": 1.3551328755225437e-05, "loss": 0.9994, "step": 2852 }, { "epoch": 0.6165982277933867, "grad_norm": 1.0133635997772217, "learning_rate": 1.3538078077857435e-05, "loss": 0.8734, "step": 2853 }, { "epoch": 0.616814350551113, "grad_norm": 0.9232343435287476, "learning_rate": 1.3524830566816239e-05, "loss": 0.8883, "step": 2854 }, { "epoch": 0.6170304733088394, "grad_norm": 0.888059675693512, "learning_rate": 1.3511586228593089e-05, "loss": 1.0347, "step": 2855 }, { "epoch": 0.6172465960665658, "grad_norm": 1.0011212825775146, "learning_rate": 1.3498345069677698e-05, "loss": 1.0151, "step": 2856 }, { "epoch": 0.6174627188242922, "grad_norm": 1.1768680810928345, "learning_rate": 1.348510709655819e-05, "loss": 0.9633, "step": 2857 }, { "epoch": 0.6176788415820186, "grad_norm": 0.9705842137336731, "learning_rate": 1.3471872315721144e-05, "loss": 0.8847, "step": 2858 }, { "epoch": 0.617894964339745, "grad_norm": 0.9357125163078308, "learning_rate": 1.345864073365157e-05, "loss": 0.8738, "step": 2859 }, { "epoch": 0.6181110870974713, "grad_norm": 0.8916847705841064, "learning_rate": 1.3445412356832917e-05, "loss": 1.0725, "step": 2860 }, { "epoch": 0.6183272098551977, "grad_norm": 1.191519856452942, "learning_rate": 1.3432187191747059e-05, "loss": 1.0197, "step": 2861 }, { "epoch": 0.6185433326129242, "grad_norm": 1.018497347831726, "learning_rate": 1.3418965244874293e-05, "loss": 0.8523, "step": 2862 }, { "epoch": 0.6187594553706506, "grad_norm": 0.9003714323043823, "learning_rate": 1.3405746522693339e-05, "loss": 0.8863, "step": 2863 }, { "epoch": 0.6189755781283769, "grad_norm": 0.9653944373130798, "learning_rate": 1.3392531031681352e-05, "loss": 0.8744, "step": 2864 }, { "epoch": 0.6191917008861033, "grad_norm": 1.1506441831588745, "learning_rate": 1.3379318778313883e-05, "loss": 0.9678, "step": 2865 }, { "epoch": 0.6194078236438297, "grad_norm": 0.8557504415512085, "learning_rate": 1.3366109769064903e-05, "loss": 0.7498, "step": 2866 }, { "epoch": 0.619623946401556, "grad_norm": 0.9204830527305603, "learning_rate": 1.3352904010406811e-05, "loss": 0.9042, "step": 2867 }, { "epoch": 0.6198400691592825, "grad_norm": 0.9908314943313599, "learning_rate": 1.3339701508810384e-05, "loss": 1.0972, "step": 2868 }, { "epoch": 0.6200561919170089, "grad_norm": 0.9415073394775391, "learning_rate": 1.3326502270744819e-05, "loss": 0.9935, "step": 2869 }, { "epoch": 0.6202723146747352, "grad_norm": 1.0607409477233887, "learning_rate": 1.3313306302677711e-05, "loss": 0.9559, "step": 2870 }, { "epoch": 0.6204884374324616, "grad_norm": 0.9476536512374878, "learning_rate": 1.3300113611075061e-05, "loss": 0.8337, "step": 2871 }, { "epoch": 0.620704560190188, "grad_norm": 0.937411367893219, "learning_rate": 1.3286924202401257e-05, "loss": 0.8395, "step": 2872 }, { "epoch": 0.6209206829479145, "grad_norm": 0.8920255899429321, "learning_rate": 1.3273738083119074e-05, "loss": 0.7921, "step": 2873 }, { "epoch": 0.6211368057056408, "grad_norm": 1.0879623889923096, "learning_rate": 1.3260555259689678e-05, "loss": 1.0492, "step": 2874 }, { "epoch": 0.6213529284633672, "grad_norm": 0.9902235269546509, "learning_rate": 1.3247375738572628e-05, "loss": 0.9859, "step": 2875 }, { "epoch": 0.6215690512210936, "grad_norm": 0.8703365921974182, "learning_rate": 1.3234199526225858e-05, "loss": 0.9271, "step": 2876 }, { "epoch": 0.6217851739788199, "grad_norm": 0.943744957447052, "learning_rate": 1.3221026629105672e-05, "loss": 0.9108, "step": 2877 }, { "epoch": 0.6220012967365464, "grad_norm": 1.1115494966506958, "learning_rate": 1.3207857053666773e-05, "loss": 0.9466, "step": 2878 }, { "epoch": 0.6222174194942728, "grad_norm": 0.9848674535751343, "learning_rate": 1.319469080636222e-05, "loss": 1.085, "step": 2879 }, { "epoch": 0.6224335422519991, "grad_norm": 1.033340334892273, "learning_rate": 1.3181527893643437e-05, "loss": 0.9287, "step": 2880 }, { "epoch": 0.6226496650097255, "grad_norm": 0.8988987803459167, "learning_rate": 1.3168368321960218e-05, "loss": 0.9669, "step": 2881 }, { "epoch": 0.6228657877674519, "grad_norm": 0.9278745055198669, "learning_rate": 1.3155212097760736e-05, "loss": 0.8758, "step": 2882 }, { "epoch": 0.6230819105251783, "grad_norm": 0.976412296295166, "learning_rate": 1.3142059227491501e-05, "loss": 0.9732, "step": 2883 }, { "epoch": 0.6232980332829047, "grad_norm": 1.006489872932434, "learning_rate": 1.3128909717597397e-05, "loss": 0.9468, "step": 2884 }, { "epoch": 0.6235141560406311, "grad_norm": 1.134055256843567, "learning_rate": 1.3115763574521641e-05, "loss": 0.9543, "step": 2885 }, { "epoch": 0.6237302787983575, "grad_norm": 0.9546724557876587, "learning_rate": 1.3102620804705818e-05, "loss": 0.7967, "step": 2886 }, { "epoch": 0.6239464015560838, "grad_norm": 1.0579935312271118, "learning_rate": 1.308948141458986e-05, "loss": 1.0239, "step": 2887 }, { "epoch": 0.6241625243138103, "grad_norm": 1.0004938840866089, "learning_rate": 1.307634541061203e-05, "loss": 0.831, "step": 2888 }, { "epoch": 0.6243786470715367, "grad_norm": 1.0822181701660156, "learning_rate": 1.3063212799208947e-05, "loss": 0.9237, "step": 2889 }, { "epoch": 0.624594769829263, "grad_norm": 0.9382985234260559, "learning_rate": 1.305008358681556e-05, "loss": 0.8923, "step": 2890 }, { "epoch": 0.6248108925869894, "grad_norm": 0.9579905271530151, "learning_rate": 1.3036957779865147e-05, "loss": 0.9521, "step": 2891 }, { "epoch": 0.6250270153447158, "grad_norm": 1.0470249652862549, "learning_rate": 1.302383538478932e-05, "loss": 0.9409, "step": 2892 }, { "epoch": 0.6252431381024421, "grad_norm": 1.0197440385818481, "learning_rate": 1.3010716408018037e-05, "loss": 0.946, "step": 2893 }, { "epoch": 0.6254592608601686, "grad_norm": 0.8235803246498108, "learning_rate": 1.299760085597955e-05, "loss": 0.8161, "step": 2894 }, { "epoch": 0.625675383617895, "grad_norm": 1.1165820360183716, "learning_rate": 1.2984488735100458e-05, "loss": 0.8393, "step": 2895 }, { "epoch": 0.6258915063756213, "grad_norm": 1.1988672018051147, "learning_rate": 1.2971380051805673e-05, "loss": 1.1965, "step": 2896 }, { "epoch": 0.6261076291333477, "grad_norm": 0.9154154658317566, "learning_rate": 1.2958274812518413e-05, "loss": 0.9277, "step": 2897 }, { "epoch": 0.6263237518910741, "grad_norm": 0.9022080302238464, "learning_rate": 1.2945173023660216e-05, "loss": 0.8837, "step": 2898 }, { "epoch": 0.6265398746488006, "grad_norm": 0.946685791015625, "learning_rate": 1.2932074691650925e-05, "loss": 0.8566, "step": 2899 }, { "epoch": 0.6267559974065269, "grad_norm": 1.0265495777130127, "learning_rate": 1.2918979822908697e-05, "loss": 0.8907, "step": 2900 }, { "epoch": 0.6269721201642533, "grad_norm": 0.9504611492156982, "learning_rate": 1.2905888423849991e-05, "loss": 0.8348, "step": 2901 }, { "epoch": 0.6271882429219797, "grad_norm": 0.9449243545532227, "learning_rate": 1.289280050088956e-05, "loss": 0.7915, "step": 2902 }, { "epoch": 0.627404365679706, "grad_norm": 1.0838656425476074, "learning_rate": 1.2879716060440446e-05, "loss": 0.9741, "step": 2903 }, { "epoch": 0.6276204884374325, "grad_norm": 0.9667279124259949, "learning_rate": 1.2866635108914007e-05, "loss": 0.8818, "step": 2904 }, { "epoch": 0.6278366111951589, "grad_norm": 0.9566159248352051, "learning_rate": 1.2853557652719877e-05, "loss": 0.876, "step": 2905 }, { "epoch": 0.6280527339528852, "grad_norm": 0.9707115888595581, "learning_rate": 1.2840483698265971e-05, "loss": 0.8774, "step": 2906 }, { "epoch": 0.6282688567106116, "grad_norm": 0.8697360157966614, "learning_rate": 1.282741325195851e-05, "loss": 0.9508, "step": 2907 }, { "epoch": 0.628484979468338, "grad_norm": 0.8760444521903992, "learning_rate": 1.281434632020197e-05, "loss": 0.8965, "step": 2908 }, { "epoch": 0.6287011022260645, "grad_norm": 0.8903259038925171, "learning_rate": 1.2801282909399126e-05, "loss": 0.9143, "step": 2909 }, { "epoch": 0.6289172249837908, "grad_norm": 1.049411654472351, "learning_rate": 1.278822302595101e-05, "loss": 0.9625, "step": 2910 }, { "epoch": 0.6291333477415172, "grad_norm": 0.9313656091690063, "learning_rate": 1.2775166676256942e-05, "loss": 0.8914, "step": 2911 }, { "epoch": 0.6293494704992436, "grad_norm": 0.9373122453689575, "learning_rate": 1.2762113866714503e-05, "loss": 0.9475, "step": 2912 }, { "epoch": 0.6295655932569699, "grad_norm": 1.0107008218765259, "learning_rate": 1.2749064603719541e-05, "loss": 0.9687, "step": 2913 }, { "epoch": 0.6297817160146963, "grad_norm": 1.0947474241256714, "learning_rate": 1.2736018893666154e-05, "loss": 1.0293, "step": 2914 }, { "epoch": 0.6299978387724228, "grad_norm": 1.0009697675704956, "learning_rate": 1.2722976742946719e-05, "loss": 0.8728, "step": 2915 }, { "epoch": 0.6302139615301491, "grad_norm": 0.8761113286018372, "learning_rate": 1.270993815795186e-05, "loss": 0.8043, "step": 2916 }, { "epoch": 0.6304300842878755, "grad_norm": 1.0249576568603516, "learning_rate": 1.2696903145070447e-05, "loss": 0.9708, "step": 2917 }, { "epoch": 0.6306462070456019, "grad_norm": 1.007078766822815, "learning_rate": 1.2683871710689614e-05, "loss": 1.0561, "step": 2918 }, { "epoch": 0.6308623298033282, "grad_norm": 0.9375736117362976, "learning_rate": 1.2670843861194737e-05, "loss": 0.8983, "step": 2919 }, { "epoch": 0.6310784525610547, "grad_norm": 0.883455753326416, "learning_rate": 1.265781960296942e-05, "loss": 0.8459, "step": 2920 }, { "epoch": 0.6312945753187811, "grad_norm": 1.0403642654418945, "learning_rate": 1.264479894239552e-05, "loss": 0.9753, "step": 2921 }, { "epoch": 0.6315106980765075, "grad_norm": 0.8808861970901489, "learning_rate": 1.2631781885853141e-05, "loss": 0.7907, "step": 2922 }, { "epoch": 0.6317268208342338, "grad_norm": 1.1256625652313232, "learning_rate": 1.2618768439720603e-05, "loss": 0.8048, "step": 2923 }, { "epoch": 0.6319429435919602, "grad_norm": 0.9758326411247253, "learning_rate": 1.260575861037447e-05, "loss": 1.0097, "step": 2924 }, { "epoch": 0.6321590663496867, "grad_norm": 1.0359525680541992, "learning_rate": 1.259275240418952e-05, "loss": 0.9651, "step": 2925 }, { "epoch": 0.632375189107413, "grad_norm": 0.8884275555610657, "learning_rate": 1.257974982753877e-05, "loss": 1.0232, "step": 2926 }, { "epoch": 0.6325913118651394, "grad_norm": 1.0615814924240112, "learning_rate": 1.2566750886793453e-05, "loss": 0.7354, "step": 2927 }, { "epoch": 0.6328074346228658, "grad_norm": 0.9971369504928589, "learning_rate": 1.2553755588323014e-05, "loss": 0.9903, "step": 2928 }, { "epoch": 0.6330235573805921, "grad_norm": 1.0728267431259155, "learning_rate": 1.2540763938495127e-05, "loss": 0.7252, "step": 2929 }, { "epoch": 0.6332396801383186, "grad_norm": 0.9396435618400574, "learning_rate": 1.2527775943675673e-05, "loss": 0.8694, "step": 2930 }, { "epoch": 0.633455802896045, "grad_norm": 0.9887653589248657, "learning_rate": 1.2514791610228727e-05, "loss": 0.93, "step": 2931 }, { "epoch": 0.6336719256537714, "grad_norm": 0.9077363014221191, "learning_rate": 1.2501810944516585e-05, "loss": 0.7585, "step": 2932 }, { "epoch": 0.6338880484114977, "grad_norm": 0.8319936394691467, "learning_rate": 1.248883395289975e-05, "loss": 0.9588, "step": 2933 }, { "epoch": 0.6341041711692241, "grad_norm": 0.9702330827713013, "learning_rate": 1.2475860641736917e-05, "loss": 0.8698, "step": 2934 }, { "epoch": 0.6343202939269506, "grad_norm": 1.0369808673858643, "learning_rate": 1.2462891017384971e-05, "loss": 0.9861, "step": 2935 }, { "epoch": 0.6345364166846769, "grad_norm": 1.0273332595825195, "learning_rate": 1.244992508619901e-05, "loss": 0.9162, "step": 2936 }, { "epoch": 0.6347525394424033, "grad_norm": 0.8920016884803772, "learning_rate": 1.2436962854532302e-05, "loss": 0.8125, "step": 2937 }, { "epoch": 0.6349686622001297, "grad_norm": 1.032292127609253, "learning_rate": 1.2424004328736312e-05, "loss": 0.9432, "step": 2938 }, { "epoch": 0.635184784957856, "grad_norm": 0.9539034962654114, "learning_rate": 1.2411049515160683e-05, "loss": 0.7774, "step": 2939 }, { "epoch": 0.6354009077155824, "grad_norm": 0.8693695664405823, "learning_rate": 1.239809842015325e-05, "loss": 0.8773, "step": 2940 }, { "epoch": 0.6356170304733089, "grad_norm": 0.8640608191490173, "learning_rate": 1.238515105006002e-05, "loss": 0.8453, "step": 2941 }, { "epoch": 0.6358331532310352, "grad_norm": 0.8216714262962341, "learning_rate": 1.237220741122518e-05, "loss": 0.6976, "step": 2942 }, { "epoch": 0.6360492759887616, "grad_norm": 0.9706547856330872, "learning_rate": 1.2359267509991062e-05, "loss": 0.8826, "step": 2943 }, { "epoch": 0.636265398746488, "grad_norm": 0.8941812515258789, "learning_rate": 1.2346331352698206e-05, "loss": 0.8344, "step": 2944 }, { "epoch": 0.6364815215042144, "grad_norm": 0.9289961457252502, "learning_rate": 1.2333398945685295e-05, "loss": 0.9049, "step": 2945 }, { "epoch": 0.6366976442619408, "grad_norm": 0.9785312414169312, "learning_rate": 1.2320470295289178e-05, "loss": 0.957, "step": 2946 }, { "epoch": 0.6369137670196672, "grad_norm": 0.9480656981468201, "learning_rate": 1.2307545407844868e-05, "loss": 0.7372, "step": 2947 }, { "epoch": 0.6371298897773936, "grad_norm": 1.0860769748687744, "learning_rate": 1.2294624289685522e-05, "loss": 1.123, "step": 2948 }, { "epoch": 0.6373460125351199, "grad_norm": 1.026252269744873, "learning_rate": 1.2281706947142463e-05, "loss": 0.7511, "step": 2949 }, { "epoch": 0.6375621352928463, "grad_norm": 0.9047048091888428, "learning_rate": 1.2268793386545152e-05, "loss": 0.9597, "step": 2950 }, { "epoch": 0.6377782580505728, "grad_norm": 0.9991381168365479, "learning_rate": 1.2255883614221216e-05, "loss": 1.0385, "step": 2951 }, { "epoch": 0.6379943808082991, "grad_norm": 1.060020089149475, "learning_rate": 1.2242977636496405e-05, "loss": 0.9657, "step": 2952 }, { "epoch": 0.6382105035660255, "grad_norm": 0.9928672313690186, "learning_rate": 1.2230075459694626e-05, "loss": 0.8669, "step": 2953 }, { "epoch": 0.6384266263237519, "grad_norm": 0.9651292562484741, "learning_rate": 1.2217177090137901e-05, "loss": 1.0384, "step": 2954 }, { "epoch": 0.6386427490814783, "grad_norm": 0.927743673324585, "learning_rate": 1.2204282534146414e-05, "loss": 0.9027, "step": 2955 }, { "epoch": 0.6388588718392046, "grad_norm": 0.9630751013755798, "learning_rate": 1.2191391798038468e-05, "loss": 0.7777, "step": 2956 }, { "epoch": 0.6390749945969311, "grad_norm": 0.9790912866592407, "learning_rate": 1.2178504888130482e-05, "loss": 0.9915, "step": 2957 }, { "epoch": 0.6392911173546575, "grad_norm": 1.0396443605422974, "learning_rate": 1.2165621810737025e-05, "loss": 0.8656, "step": 2958 }, { "epoch": 0.6395072401123838, "grad_norm": 0.8791309595108032, "learning_rate": 1.2152742572170774e-05, "loss": 0.8693, "step": 2959 }, { "epoch": 0.6397233628701102, "grad_norm": 0.9552299976348877, "learning_rate": 1.2139867178742519e-05, "loss": 0.736, "step": 2960 }, { "epoch": 0.6399394856278366, "grad_norm": 0.9659188985824585, "learning_rate": 1.2126995636761174e-05, "loss": 0.8197, "step": 2961 }, { "epoch": 0.640155608385563, "grad_norm": 1.0355355739593506, "learning_rate": 1.2114127952533773e-05, "loss": 0.9412, "step": 2962 }, { "epoch": 0.6403717311432894, "grad_norm": 0.8427955508232117, "learning_rate": 1.210126413236544e-05, "loss": 0.8794, "step": 2963 }, { "epoch": 0.6405878539010158, "grad_norm": 0.9869213104248047, "learning_rate": 1.208840418255942e-05, "loss": 0.8764, "step": 2964 }, { "epoch": 0.6408039766587421, "grad_norm": 0.8903014063835144, "learning_rate": 1.2075548109417073e-05, "loss": 0.9439, "step": 2965 }, { "epoch": 0.6410200994164685, "grad_norm": 1.016274333000183, "learning_rate": 1.2062695919237827e-05, "loss": 0.9497, "step": 2966 }, { "epoch": 0.641236222174195, "grad_norm": 1.1049562692642212, "learning_rate": 1.2049847618319235e-05, "loss": 1.0377, "step": 2967 }, { "epoch": 0.6414523449319214, "grad_norm": 0.9826555848121643, "learning_rate": 1.2037003212956924e-05, "loss": 0.9585, "step": 2968 }, { "epoch": 0.6416684676896477, "grad_norm": 1.0991896390914917, "learning_rate": 1.2024162709444637e-05, "loss": 0.9034, "step": 2969 }, { "epoch": 0.6418845904473741, "grad_norm": 0.9281312227249146, "learning_rate": 1.2011326114074188e-05, "loss": 0.9162, "step": 2970 }, { "epoch": 0.6421007132051005, "grad_norm": 1.021680474281311, "learning_rate": 1.1998493433135474e-05, "loss": 0.9187, "step": 2971 }, { "epoch": 0.6423168359628268, "grad_norm": 0.9557349681854248, "learning_rate": 1.1985664672916474e-05, "loss": 0.8563, "step": 2972 }, { "epoch": 0.6425329587205533, "grad_norm": 0.985569417476654, "learning_rate": 1.1972839839703263e-05, "loss": 0.7767, "step": 2973 }, { "epoch": 0.6427490814782797, "grad_norm": 1.2562131881713867, "learning_rate": 1.1960018939779977e-05, "loss": 0.9193, "step": 2974 }, { "epoch": 0.642965204236006, "grad_norm": 1.118052363395691, "learning_rate": 1.1947201979428817e-05, "loss": 0.9389, "step": 2975 }, { "epoch": 0.6431813269937324, "grad_norm": 1.0856068134307861, "learning_rate": 1.1934388964930082e-05, "loss": 0.9635, "step": 2976 }, { "epoch": 0.6433974497514588, "grad_norm": 1.1075384616851807, "learning_rate": 1.1921579902562103e-05, "loss": 0.9912, "step": 2977 }, { "epoch": 0.6436135725091853, "grad_norm": 0.9479909539222717, "learning_rate": 1.19087747986013e-05, "loss": 0.9073, "step": 2978 }, { "epoch": 0.6438296952669116, "grad_norm": 0.8583290576934814, "learning_rate": 1.1895973659322135e-05, "loss": 0.9195, "step": 2979 }, { "epoch": 0.644045818024638, "grad_norm": 1.0640686750411987, "learning_rate": 1.188317649099715e-05, "loss": 0.8857, "step": 2980 }, { "epoch": 0.6442619407823644, "grad_norm": 0.8737252354621887, "learning_rate": 1.1870383299896918e-05, "loss": 0.6835, "step": 2981 }, { "epoch": 0.6444780635400907, "grad_norm": 1.0016423463821411, "learning_rate": 1.185759409229008e-05, "loss": 0.8296, "step": 2982 }, { "epoch": 0.6446941862978172, "grad_norm": 0.8675923347473145, "learning_rate": 1.1844808874443307e-05, "loss": 0.911, "step": 2983 }, { "epoch": 0.6449103090555436, "grad_norm": 0.9906182885169983, "learning_rate": 1.1832027652621339e-05, "loss": 0.964, "step": 2984 }, { "epoch": 0.6451264318132699, "grad_norm": 0.9699721932411194, "learning_rate": 1.181925043308694e-05, "loss": 0.9571, "step": 2985 }, { "epoch": 0.6453425545709963, "grad_norm": 0.9955711364746094, "learning_rate": 1.1806477222100912e-05, "loss": 0.9385, "step": 2986 }, { "epoch": 0.6455586773287227, "grad_norm": 1.0205777883529663, "learning_rate": 1.1793708025922112e-05, "loss": 1.1295, "step": 2987 }, { "epoch": 0.645774800086449, "grad_norm": 0.8897711038589478, "learning_rate": 1.1780942850807407e-05, "loss": 0.8313, "step": 2988 }, { "epoch": 0.6459909228441755, "grad_norm": 0.9280810356140137, "learning_rate": 1.1768181703011703e-05, "loss": 0.8511, "step": 2989 }, { "epoch": 0.6462070456019019, "grad_norm": 1.0160632133483887, "learning_rate": 1.1755424588787933e-05, "loss": 0.972, "step": 2990 }, { "epoch": 0.6464231683596283, "grad_norm": 1.013733148574829, "learning_rate": 1.1742671514387059e-05, "loss": 0.9266, "step": 2991 }, { "epoch": 0.6466392911173546, "grad_norm": 0.9809374213218689, "learning_rate": 1.1729922486058057e-05, "loss": 0.9901, "step": 2992 }, { "epoch": 0.646855413875081, "grad_norm": 0.9819334745407104, "learning_rate": 1.1717177510047919e-05, "loss": 0.9503, "step": 2993 }, { "epoch": 0.6470715366328075, "grad_norm": 0.953656017780304, "learning_rate": 1.1704436592601649e-05, "loss": 0.7493, "step": 2994 }, { "epoch": 0.6472876593905338, "grad_norm": 0.9014970064163208, "learning_rate": 1.1691699739962275e-05, "loss": 0.8103, "step": 2995 }, { "epoch": 0.6475037821482602, "grad_norm": 0.9725258350372314, "learning_rate": 1.167896695837082e-05, "loss": 0.934, "step": 2996 }, { "epoch": 0.6477199049059866, "grad_norm": 1.0287680625915527, "learning_rate": 1.166623825406632e-05, "loss": 0.9125, "step": 2997 }, { "epoch": 0.6479360276637129, "grad_norm": 1.0294086933135986, "learning_rate": 1.1653513633285813e-05, "loss": 0.9198, "step": 2998 }, { "epoch": 0.6481521504214394, "grad_norm": 0.9742729067802429, "learning_rate": 1.164079310226434e-05, "loss": 0.8445, "step": 2999 }, { "epoch": 0.6483682731791658, "grad_norm": 0.9297633767127991, "learning_rate": 1.1628076667234906e-05, "loss": 0.9093, "step": 3000 }, { "epoch": 0.6485843959368921, "grad_norm": 0.9483668208122253, "learning_rate": 1.1615364334428562e-05, "loss": 0.9307, "step": 3001 }, { "epoch": 0.6488005186946185, "grad_norm": 1.0058062076568604, "learning_rate": 1.1602656110074308e-05, "loss": 0.8993, "step": 3002 }, { "epoch": 0.649016641452345, "grad_norm": 0.9775843024253845, "learning_rate": 1.1589952000399152e-05, "loss": 0.9641, "step": 3003 }, { "epoch": 0.6492327642100714, "grad_norm": 0.821497917175293, "learning_rate": 1.1577252011628072e-05, "loss": 0.8256, "step": 3004 }, { "epoch": 0.6494488869677977, "grad_norm": 0.9848234057426453, "learning_rate": 1.1564556149984038e-05, "loss": 0.9869, "step": 3005 }, { "epoch": 0.6496650097255241, "grad_norm": 1.0490641593933105, "learning_rate": 1.155186442168799e-05, "loss": 0.8785, "step": 3006 }, { "epoch": 0.6498811324832505, "grad_norm": 1.098029613494873, "learning_rate": 1.1539176832958845e-05, "loss": 1.0068, "step": 3007 }, { "epoch": 0.6500972552409768, "grad_norm": 1.0362261533737183, "learning_rate": 1.1526493390013493e-05, "loss": 1.0468, "step": 3008 }, { "epoch": 0.6503133779987033, "grad_norm": 0.9353405833244324, "learning_rate": 1.1513814099066786e-05, "loss": 0.8203, "step": 3009 }, { "epoch": 0.6505295007564297, "grad_norm": 0.9363531470298767, "learning_rate": 1.150113896633157e-05, "loss": 1.0759, "step": 3010 }, { "epoch": 0.650745623514156, "grad_norm": 0.9360188841819763, "learning_rate": 1.1488467998018601e-05, "loss": 0.9603, "step": 3011 }, { "epoch": 0.6509617462718824, "grad_norm": 0.9600111842155457, "learning_rate": 1.147580120033664e-05, "loss": 0.8148, "step": 3012 }, { "epoch": 0.6511778690296088, "grad_norm": 0.9954214096069336, "learning_rate": 1.1463138579492386e-05, "loss": 0.8941, "step": 3013 }, { "epoch": 0.6513939917873353, "grad_norm": 0.8630295991897583, "learning_rate": 1.1450480141690486e-05, "loss": 0.8974, "step": 3014 }, { "epoch": 0.6516101145450616, "grad_norm": 1.057152509689331, "learning_rate": 1.143782589313356e-05, "loss": 0.9268, "step": 3015 }, { "epoch": 0.651826237302788, "grad_norm": 0.884134829044342, "learning_rate": 1.1425175840022163e-05, "loss": 0.8361, "step": 3016 }, { "epoch": 0.6520423600605144, "grad_norm": 1.0940039157867432, "learning_rate": 1.1412529988554772e-05, "loss": 1.1047, "step": 3017 }, { "epoch": 0.6522584828182407, "grad_norm": 0.986821711063385, "learning_rate": 1.1399888344927828e-05, "loss": 0.8267, "step": 3018 }, { "epoch": 0.6524746055759671, "grad_norm": 0.9668971300125122, "learning_rate": 1.1387250915335724e-05, "loss": 0.8693, "step": 3019 }, { "epoch": 0.6526907283336936, "grad_norm": 0.9331496357917786, "learning_rate": 1.1374617705970761e-05, "loss": 0.9027, "step": 3020 }, { "epoch": 0.6529068510914199, "grad_norm": 0.9307202696800232, "learning_rate": 1.1361988723023183e-05, "loss": 1.0028, "step": 3021 }, { "epoch": 0.6531229738491463, "grad_norm": 0.880175769329071, "learning_rate": 1.134936397268116e-05, "loss": 0.8451, "step": 3022 }, { "epoch": 0.6533390966068727, "grad_norm": 0.9314516186714172, "learning_rate": 1.1336743461130797e-05, "loss": 0.7698, "step": 3023 }, { "epoch": 0.653555219364599, "grad_norm": 0.9444621801376343, "learning_rate": 1.1324127194556107e-05, "loss": 0.9248, "step": 3024 }, { "epoch": 0.6537713421223255, "grad_norm": 0.9031175971031189, "learning_rate": 1.1311515179139038e-05, "loss": 0.9376, "step": 3025 }, { "epoch": 0.6539874648800519, "grad_norm": 0.9749978184700012, "learning_rate": 1.1298907421059448e-05, "loss": 0.9033, "step": 3026 }, { "epoch": 0.6542035876377783, "grad_norm": 1.1861553192138672, "learning_rate": 1.1286303926495099e-05, "loss": 1.048, "step": 3027 }, { "epoch": 0.6544197103955046, "grad_norm": 1.0161523818969727, "learning_rate": 1.1273704701621698e-05, "loss": 0.9086, "step": 3028 }, { "epoch": 0.654635833153231, "grad_norm": 0.8376269936561584, "learning_rate": 1.1261109752612813e-05, "loss": 0.9415, "step": 3029 }, { "epoch": 0.6548519559109575, "grad_norm": 0.9243308305740356, "learning_rate": 1.1248519085639948e-05, "loss": 0.8454, "step": 3030 }, { "epoch": 0.6550680786686838, "grad_norm": 1.1032763719558716, "learning_rate": 1.12359327068725e-05, "loss": 0.8026, "step": 3031 }, { "epoch": 0.6552842014264102, "grad_norm": 0.9946315884590149, "learning_rate": 1.122335062247776e-05, "loss": 0.9173, "step": 3032 }, { "epoch": 0.6555003241841366, "grad_norm": 0.904829204082489, "learning_rate": 1.121077283862094e-05, "loss": 0.8143, "step": 3033 }, { "epoch": 0.6557164469418629, "grad_norm": 0.9684969782829285, "learning_rate": 1.1198199361465104e-05, "loss": 0.9296, "step": 3034 }, { "epoch": 0.6559325696995894, "grad_norm": 1.0400232076644897, "learning_rate": 1.1185630197171236e-05, "loss": 0.8541, "step": 3035 }, { "epoch": 0.6561486924573158, "grad_norm": 1.0041389465332031, "learning_rate": 1.1173065351898185e-05, "loss": 0.8815, "step": 3036 }, { "epoch": 0.6563648152150422, "grad_norm": 0.9918075799942017, "learning_rate": 1.1160504831802714e-05, "loss": 0.9075, "step": 3037 }, { "epoch": 0.6565809379727685, "grad_norm": 0.9889822006225586, "learning_rate": 1.1147948643039443e-05, "loss": 0.9699, "step": 3038 }, { "epoch": 0.6567970607304949, "grad_norm": 1.0850324630737305, "learning_rate": 1.1135396791760882e-05, "loss": 0.9191, "step": 3039 }, { "epoch": 0.6570131834882214, "grad_norm": 1.0572766065597534, "learning_rate": 1.1122849284117385e-05, "loss": 0.8661, "step": 3040 }, { "epoch": 0.6572293062459477, "grad_norm": 1.0197628736495972, "learning_rate": 1.1110306126257226e-05, "loss": 0.919, "step": 3041 }, { "epoch": 0.6574454290036741, "grad_norm": 0.9910076856613159, "learning_rate": 1.1097767324326515e-05, "loss": 0.7211, "step": 3042 }, { "epoch": 0.6576615517614005, "grad_norm": 0.9274858236312866, "learning_rate": 1.1085232884469236e-05, "loss": 0.9885, "step": 3043 }, { "epoch": 0.6578776745191268, "grad_norm": 1.080104112625122, "learning_rate": 1.1072702812827236e-05, "loss": 0.971, "step": 3044 }, { "epoch": 0.6580937972768532, "grad_norm": 1.3358137607574463, "learning_rate": 1.106017711554022e-05, "loss": 0.8378, "step": 3045 }, { "epoch": 0.6583099200345797, "grad_norm": 0.8777062296867371, "learning_rate": 1.1047655798745752e-05, "loss": 0.8373, "step": 3046 }, { "epoch": 0.658526042792306, "grad_norm": 0.9593110084533691, "learning_rate": 1.1035138868579247e-05, "loss": 0.9809, "step": 3047 }, { "epoch": 0.6587421655500324, "grad_norm": 1.1162147521972656, "learning_rate": 1.1022626331173973e-05, "loss": 0.9226, "step": 3048 }, { "epoch": 0.6589582883077588, "grad_norm": 0.9021515846252441, "learning_rate": 1.1010118192661036e-05, "loss": 0.9045, "step": 3049 }, { "epoch": 0.6591744110654852, "grad_norm": 1.0183483362197876, "learning_rate": 1.0997614459169421e-05, "loss": 0.763, "step": 3050 }, { "epoch": 0.6593905338232116, "grad_norm": 0.9093033671379089, "learning_rate": 1.0985115136825901e-05, "loss": 0.8944, "step": 3051 }, { "epoch": 0.659606656580938, "grad_norm": 1.030536413192749, "learning_rate": 1.0972620231755125e-05, "loss": 0.9605, "step": 3052 }, { "epoch": 0.6598227793386644, "grad_norm": 0.8911164402961731, "learning_rate": 1.0960129750079565e-05, "loss": 0.923, "step": 3053 }, { "epoch": 0.6600389020963907, "grad_norm": 0.9700839519500732, "learning_rate": 1.0947643697919523e-05, "loss": 0.9794, "step": 3054 }, { "epoch": 0.6602550248541171, "grad_norm": 0.9478440880775452, "learning_rate": 1.0935162081393154e-05, "loss": 0.8314, "step": 3055 }, { "epoch": 0.6604711476118436, "grad_norm": 0.8433210253715515, "learning_rate": 1.0922684906616414e-05, "loss": 0.7496, "step": 3056 }, { "epoch": 0.6606872703695699, "grad_norm": 0.9891439080238342, "learning_rate": 1.0910212179703082e-05, "loss": 0.7754, "step": 3057 }, { "epoch": 0.6609033931272963, "grad_norm": 1.016565203666687, "learning_rate": 1.0897743906764757e-05, "loss": 0.8037, "step": 3058 }, { "epoch": 0.6611195158850227, "grad_norm": 0.9328057765960693, "learning_rate": 1.0885280093910886e-05, "loss": 0.95, "step": 3059 }, { "epoch": 0.6613356386427491, "grad_norm": 0.9315574169158936, "learning_rate": 1.08728207472487e-05, "loss": 0.964, "step": 3060 }, { "epoch": 0.6615517614004754, "grad_norm": 0.9109113216400146, "learning_rate": 1.0860365872883247e-05, "loss": 0.9243, "step": 3061 }, { "epoch": 0.6617678841582019, "grad_norm": 1.1577420234680176, "learning_rate": 1.0847915476917392e-05, "loss": 0.8427, "step": 3062 }, { "epoch": 0.6619840069159283, "grad_norm": 0.9538229703903198, "learning_rate": 1.0835469565451792e-05, "loss": 0.9386, "step": 3063 }, { "epoch": 0.6622001296736546, "grad_norm": 1.0019724369049072, "learning_rate": 1.0823028144584924e-05, "loss": 0.732, "step": 3064 }, { "epoch": 0.662416252431381, "grad_norm": 0.9666872620582581, "learning_rate": 1.081059122041305e-05, "loss": 0.9524, "step": 3065 }, { "epoch": 0.6626323751891074, "grad_norm": 0.9982711672782898, "learning_rate": 1.0798158799030234e-05, "loss": 0.9872, "step": 3066 }, { "epoch": 0.6628484979468338, "grad_norm": 1.018813133239746, "learning_rate": 1.0785730886528328e-05, "loss": 0.7601, "step": 3067 }, { "epoch": 0.6630646207045602, "grad_norm": 1.0204263925552368, "learning_rate": 1.0773307488997001e-05, "loss": 1.0132, "step": 3068 }, { "epoch": 0.6632807434622866, "grad_norm": 0.9229834675788879, "learning_rate": 1.0760888612523667e-05, "loss": 0.8415, "step": 3069 }, { "epoch": 0.6634968662200129, "grad_norm": 1.0011403560638428, "learning_rate": 1.074847426319356e-05, "loss": 0.7339, "step": 3070 }, { "epoch": 0.6637129889777393, "grad_norm": 0.9895837903022766, "learning_rate": 1.0736064447089674e-05, "loss": 1.0072, "step": 3071 }, { "epoch": 0.6639291117354658, "grad_norm": 0.929438054561615, "learning_rate": 1.0723659170292786e-05, "loss": 0.6744, "step": 3072 }, { "epoch": 0.6641452344931922, "grad_norm": 0.9434496760368347, "learning_rate": 1.0711258438881477e-05, "loss": 0.9305, "step": 3073 }, { "epoch": 0.6643613572509185, "grad_norm": 1.071992039680481, "learning_rate": 1.0698862258932052e-05, "loss": 1.0707, "step": 3074 }, { "epoch": 0.6645774800086449, "grad_norm": 0.8919010162353516, "learning_rate": 1.068647063651862e-05, "loss": 0.7848, "step": 3075 }, { "epoch": 0.6647936027663713, "grad_norm": 1.0251803398132324, "learning_rate": 1.0674083577713037e-05, "loss": 0.972, "step": 3076 }, { "epoch": 0.6650097255240977, "grad_norm": 0.9181733131408691, "learning_rate": 1.0661701088584953e-05, "loss": 0.9924, "step": 3077 }, { "epoch": 0.6652258482818241, "grad_norm": 1.0462700128555298, "learning_rate": 1.0649323175201746e-05, "loss": 1.1303, "step": 3078 }, { "epoch": 0.6654419710395505, "grad_norm": 1.027639627456665, "learning_rate": 1.0636949843628578e-05, "loss": 0.9405, "step": 3079 }, { "epoch": 0.6656580937972768, "grad_norm": 1.0701429843902588, "learning_rate": 1.0624581099928324e-05, "loss": 0.8144, "step": 3080 }, { "epoch": 0.6658742165550032, "grad_norm": 0.9261655807495117, "learning_rate": 1.0612216950161667e-05, "loss": 0.86, "step": 3081 }, { "epoch": 0.6660903393127297, "grad_norm": 1.076094388961792, "learning_rate": 1.0599857400387003e-05, "loss": 0.8355, "step": 3082 }, { "epoch": 0.6663064620704561, "grad_norm": 1.1415523290634155, "learning_rate": 1.0587502456660484e-05, "loss": 1.2092, "step": 3083 }, { "epoch": 0.6665225848281824, "grad_norm": 0.7931217551231384, "learning_rate": 1.0575152125036e-05, "loss": 0.8419, "step": 3084 }, { "epoch": 0.6667387075859088, "grad_norm": 0.9748810529708862, "learning_rate": 1.0562806411565192e-05, "loss": 1.0033, "step": 3085 }, { "epoch": 0.6669548303436352, "grad_norm": 0.9649502635002136, "learning_rate": 1.0550465322297421e-05, "loss": 0.7608, "step": 3086 }, { "epoch": 0.6671709531013615, "grad_norm": 1.041642427444458, "learning_rate": 1.0538128863279801e-05, "loss": 0.8764, "step": 3087 }, { "epoch": 0.667387075859088, "grad_norm": 0.9173684120178223, "learning_rate": 1.0525797040557166e-05, "loss": 0.828, "step": 3088 }, { "epoch": 0.6676031986168144, "grad_norm": 0.9731480479240417, "learning_rate": 1.0513469860172079e-05, "loss": 1.0716, "step": 3089 }, { "epoch": 0.6678193213745407, "grad_norm": 0.8859195113182068, "learning_rate": 1.0501147328164832e-05, "loss": 0.8304, "step": 3090 }, { "epoch": 0.6680354441322671, "grad_norm": 0.9966490864753723, "learning_rate": 1.0488829450573435e-05, "loss": 1.1198, "step": 3091 }, { "epoch": 0.6682515668899935, "grad_norm": 0.9166406393051147, "learning_rate": 1.047651623343362e-05, "loss": 0.7968, "step": 3092 }, { "epoch": 0.6684676896477199, "grad_norm": 0.9286608695983887, "learning_rate": 1.0464207682778835e-05, "loss": 0.8948, "step": 3093 }, { "epoch": 0.6686838124054463, "grad_norm": 0.8471441864967346, "learning_rate": 1.0451903804640236e-05, "loss": 0.8495, "step": 3094 }, { "epoch": 0.6688999351631727, "grad_norm": 0.9391002058982849, "learning_rate": 1.0439604605046707e-05, "loss": 0.8925, "step": 3095 }, { "epoch": 0.6691160579208991, "grad_norm": 0.9239630103111267, "learning_rate": 1.042731009002483e-05, "loss": 0.8672, "step": 3096 }, { "epoch": 0.6693321806786254, "grad_norm": 1.1243126392364502, "learning_rate": 1.0415020265598872e-05, "loss": 1.0357, "step": 3097 }, { "epoch": 0.6695483034363519, "grad_norm": 0.9715553522109985, "learning_rate": 1.0402735137790816e-05, "loss": 0.9174, "step": 3098 }, { "epoch": 0.6697644261940783, "grad_norm": 0.9210078716278076, "learning_rate": 1.0390454712620368e-05, "loss": 1.0687, "step": 3099 }, { "epoch": 0.6699805489518046, "grad_norm": 1.0733075141906738, "learning_rate": 1.0378178996104898e-05, "loss": 0.9525, "step": 3100 }, { "epoch": 0.670196671709531, "grad_norm": 0.9286667108535767, "learning_rate": 1.0365907994259472e-05, "loss": 0.9281, "step": 3101 }, { "epoch": 0.6704127944672574, "grad_norm": 1.105976939201355, "learning_rate": 1.0353641713096863e-05, "loss": 1.0309, "step": 3102 }, { "epoch": 0.6706289172249837, "grad_norm": 1.0517795085906982, "learning_rate": 1.0341380158627512e-05, "loss": 0.9342, "step": 3103 }, { "epoch": 0.6708450399827102, "grad_norm": 1.0220019817352295, "learning_rate": 1.0329123336859559e-05, "loss": 0.8418, "step": 3104 }, { "epoch": 0.6710611627404366, "grad_norm": 0.9805824756622314, "learning_rate": 1.0316871253798813e-05, "loss": 0.9325, "step": 3105 }, { "epoch": 0.6712772854981629, "grad_norm": 0.8305776119232178, "learning_rate": 1.030462391544877e-05, "loss": 0.6997, "step": 3106 }, { "epoch": 0.6714934082558893, "grad_norm": 0.9174684286117554, "learning_rate": 1.0292381327810585e-05, "loss": 0.7971, "step": 3107 }, { "epoch": 0.6717095310136157, "grad_norm": 0.9358829855918884, "learning_rate": 1.0280143496883128e-05, "loss": 0.992, "step": 3108 }, { "epoch": 0.6719256537713422, "grad_norm": 0.946599543094635, "learning_rate": 1.0267910428662878e-05, "loss": 0.8471, "step": 3109 }, { "epoch": 0.6721417765290685, "grad_norm": 1.030730962753296, "learning_rate": 1.0255682129144022e-05, "loss": 0.994, "step": 3110 }, { "epoch": 0.6723578992867949, "grad_norm": 0.9013956189155579, "learning_rate": 1.0243458604318397e-05, "loss": 0.9702, "step": 3111 }, { "epoch": 0.6725740220445213, "grad_norm": 0.9366409778594971, "learning_rate": 1.0231239860175495e-05, "loss": 0.8885, "step": 3112 }, { "epoch": 0.6727901448022476, "grad_norm": 1.0941097736358643, "learning_rate": 1.0219025902702494e-05, "loss": 0.9037, "step": 3113 }, { "epoch": 0.6730062675599741, "grad_norm": 0.9904168248176575, "learning_rate": 1.0206816737884182e-05, "loss": 0.8922, "step": 3114 }, { "epoch": 0.6732223903177005, "grad_norm": 0.9461530447006226, "learning_rate": 1.019461237170303e-05, "loss": 1.0165, "step": 3115 }, { "epoch": 0.6734385130754268, "grad_norm": 0.9304680824279785, "learning_rate": 1.0182412810139142e-05, "loss": 0.7641, "step": 3116 }, { "epoch": 0.6736546358331532, "grad_norm": 0.9654682874679565, "learning_rate": 1.017021805917029e-05, "loss": 0.9698, "step": 3117 }, { "epoch": 0.6738707585908796, "grad_norm": 0.8547962307929993, "learning_rate": 1.0158028124771863e-05, "loss": 0.803, "step": 3118 }, { "epoch": 0.6740868813486061, "grad_norm": 0.9742785692214966, "learning_rate": 1.0145843012916913e-05, "loss": 0.8176, "step": 3119 }, { "epoch": 0.6743030041063324, "grad_norm": 0.9695746302604675, "learning_rate": 1.0133662729576095e-05, "loss": 0.8249, "step": 3120 }, { "epoch": 0.6745191268640588, "grad_norm": 0.9653365612030029, "learning_rate": 1.0121487280717734e-05, "loss": 0.9193, "step": 3121 }, { "epoch": 0.6747352496217852, "grad_norm": 1.0053045749664307, "learning_rate": 1.0109316672307774e-05, "loss": 0.8366, "step": 3122 }, { "epoch": 0.6749513723795115, "grad_norm": 0.9816423654556274, "learning_rate": 1.0097150910309778e-05, "loss": 0.7376, "step": 3123 }, { "epoch": 0.675167495137238, "grad_norm": 0.9633327126502991, "learning_rate": 1.0084990000684947e-05, "loss": 0.9669, "step": 3124 }, { "epoch": 0.6753836178949644, "grad_norm": 0.9444330930709839, "learning_rate": 1.0072833949392091e-05, "loss": 0.8994, "step": 3125 }, { "epoch": 0.6755997406526907, "grad_norm": 0.8973102569580078, "learning_rate": 1.0060682762387655e-05, "loss": 0.9545, "step": 3126 }, { "epoch": 0.6758158634104171, "grad_norm": 0.9260913729667664, "learning_rate": 1.0048536445625688e-05, "loss": 0.9796, "step": 3127 }, { "epoch": 0.6760319861681435, "grad_norm": 1.016711711883545, "learning_rate": 1.0036395005057858e-05, "loss": 0.968, "step": 3128 }, { "epoch": 0.6762481089258698, "grad_norm": 1.183542251586914, "learning_rate": 1.0024258446633444e-05, "loss": 1.1579, "step": 3129 }, { "epoch": 0.6764642316835963, "grad_norm": 0.9324146509170532, "learning_rate": 1.001212677629932e-05, "loss": 0.9397, "step": 3130 }, { "epoch": 0.6766803544413227, "grad_norm": 0.9586891531944275, "learning_rate": 1.0000000000000006e-05, "loss": 0.8797, "step": 3131 }, { "epoch": 0.6768964771990491, "grad_norm": 1.0296969413757324, "learning_rate": 9.987878123677565e-06, "loss": 0.8891, "step": 3132 }, { "epoch": 0.6771125999567754, "grad_norm": 1.126132845878601, "learning_rate": 9.9757611532717e-06, "loss": 0.8492, "step": 3133 }, { "epoch": 0.6773287227145018, "grad_norm": 0.8979195356369019, "learning_rate": 9.96364909471969e-06, "loss": 0.7549, "step": 3134 }, { "epoch": 0.6775448454722283, "grad_norm": 0.852216899394989, "learning_rate": 9.951541953956427e-06, "loss": 0.9574, "step": 3135 }, { "epoch": 0.6777609682299546, "grad_norm": 0.9262779355049133, "learning_rate": 9.939439736914388e-06, "loss": 0.8306, "step": 3136 }, { "epoch": 0.677977090987681, "grad_norm": 0.9397847652435303, "learning_rate": 9.927342449523616e-06, "loss": 0.9153, "step": 3137 }, { "epoch": 0.6781932137454074, "grad_norm": 0.9414774775505066, "learning_rate": 9.915250097711749e-06, "loss": 0.6423, "step": 3138 }, { "epoch": 0.6784093365031337, "grad_norm": 1.0208468437194824, "learning_rate": 9.903162687404028e-06, "loss": 0.9731, "step": 3139 }, { "epoch": 0.6786254592608602, "grad_norm": 0.9117647409439087, "learning_rate": 9.891080224523253e-06, "loss": 0.8163, "step": 3140 }, { "epoch": 0.6788415820185866, "grad_norm": 0.9898706674575806, "learning_rate": 9.879002714989796e-06, "loss": 0.955, "step": 3141 }, { "epoch": 0.679057704776313, "grad_norm": 0.9839960932731628, "learning_rate": 9.866930164721615e-06, "loss": 1.0999, "step": 3142 }, { "epoch": 0.6792738275340393, "grad_norm": 0.9270585179328918, "learning_rate": 9.854862579634228e-06, "loss": 1.1173, "step": 3143 }, { "epoch": 0.6794899502917657, "grad_norm": 1.0187488794326782, "learning_rate": 9.842799965640725e-06, "loss": 0.968, "step": 3144 }, { "epoch": 0.6797060730494922, "grad_norm": 1.1590285301208496, "learning_rate": 9.830742328651759e-06, "loss": 1.1053, "step": 3145 }, { "epoch": 0.6799221958072185, "grad_norm": 1.092870831489563, "learning_rate": 9.818689674575543e-06, "loss": 1.0063, "step": 3146 }, { "epoch": 0.6801383185649449, "grad_norm": 0.9513214230537415, "learning_rate": 9.80664200931785e-06, "loss": 0.9578, "step": 3147 }, { "epoch": 0.6803544413226713, "grad_norm": 0.8436422348022461, "learning_rate": 9.794599338782011e-06, "loss": 0.8834, "step": 3148 }, { "epoch": 0.6805705640803976, "grad_norm": 0.8941531181335449, "learning_rate": 9.782561668868905e-06, "loss": 0.8751, "step": 3149 }, { "epoch": 0.680786686838124, "grad_norm": 1.0576249361038208, "learning_rate": 9.770529005476959e-06, "loss": 1.0757, "step": 3150 }, { "epoch": 0.6810028095958505, "grad_norm": 0.9331166744232178, "learning_rate": 9.758501354502154e-06, "loss": 1.1021, "step": 3151 }, { "epoch": 0.6812189323535768, "grad_norm": 0.8988887667655945, "learning_rate": 9.746478721838004e-06, "loss": 0.7193, "step": 3152 }, { "epoch": 0.6814350551113032, "grad_norm": 1.0191924571990967, "learning_rate": 9.734461113375593e-06, "loss": 0.8743, "step": 3153 }, { "epoch": 0.6816511778690296, "grad_norm": 0.9970690608024597, "learning_rate": 9.722448535003497e-06, "loss": 0.989, "step": 3154 }, { "epoch": 0.681867300626756, "grad_norm": 0.9373263716697693, "learning_rate": 9.710440992607862e-06, "loss": 0.9117, "step": 3155 }, { "epoch": 0.6820834233844824, "grad_norm": 1.0130620002746582, "learning_rate": 9.698438492072346e-06, "loss": 1.0492, "step": 3156 }, { "epoch": 0.6822995461422088, "grad_norm": 0.9624810814857483, "learning_rate": 9.686441039278163e-06, "loss": 0.818, "step": 3157 }, { "epoch": 0.6825156688999352, "grad_norm": 1.0124694108963013, "learning_rate": 9.674448640104028e-06, "loss": 1.0394, "step": 3158 }, { "epoch": 0.6827317916576615, "grad_norm": 1.0002669095993042, "learning_rate": 9.6624613004262e-06, "loss": 1.0167, "step": 3159 }, { "epoch": 0.6829479144153879, "grad_norm": 0.9762430191040039, "learning_rate": 9.650479026118423e-06, "loss": 0.9059, "step": 3160 }, { "epoch": 0.6831640371731144, "grad_norm": 0.8910196423530579, "learning_rate": 9.638501823052002e-06, "loss": 0.8185, "step": 3161 }, { "epoch": 0.6833801599308407, "grad_norm": 1.0277290344238281, "learning_rate": 9.626529697095738e-06, "loss": 0.8963, "step": 3162 }, { "epoch": 0.6835962826885671, "grad_norm": 0.9845807552337646, "learning_rate": 9.614562654115944e-06, "loss": 0.8445, "step": 3163 }, { "epoch": 0.6838124054462935, "grad_norm": 0.9002982974052429, "learning_rate": 9.602600699976439e-06, "loss": 0.9235, "step": 3164 }, { "epoch": 0.6840285282040199, "grad_norm": 0.7742257118225098, "learning_rate": 9.590643840538558e-06, "loss": 0.7285, "step": 3165 }, { "epoch": 0.6842446509617462, "grad_norm": 0.8876941204071045, "learning_rate": 9.578692081661132e-06, "loss": 0.9619, "step": 3166 }, { "epoch": 0.6844607737194727, "grad_norm": 1.004382848739624, "learning_rate": 9.566745429200492e-06, "loss": 1.0215, "step": 3167 }, { "epoch": 0.6846768964771991, "grad_norm": 0.9555290937423706, "learning_rate": 9.554803889010477e-06, "loss": 0.721, "step": 3168 }, { "epoch": 0.6848930192349254, "grad_norm": 0.9498472213745117, "learning_rate": 9.542867466942409e-06, "loss": 1.0724, "step": 3169 }, { "epoch": 0.6851091419926518, "grad_norm": 0.9486294984817505, "learning_rate": 9.530936168845102e-06, "loss": 0.9296, "step": 3170 }, { "epoch": 0.6853252647503782, "grad_norm": 0.8646090030670166, "learning_rate": 9.519010000564888e-06, "loss": 0.7584, "step": 3171 }, { "epoch": 0.6855413875081046, "grad_norm": 0.9679620265960693, "learning_rate": 9.507088967945535e-06, "loss": 0.8904, "step": 3172 }, { "epoch": 0.685757510265831, "grad_norm": 0.877466082572937, "learning_rate": 9.495173076828332e-06, "loss": 0.861, "step": 3173 }, { "epoch": 0.6859736330235574, "grad_norm": 0.8935673832893372, "learning_rate": 9.48326233305203e-06, "loss": 0.8915, "step": 3174 }, { "epoch": 0.6861897557812837, "grad_norm": 0.8707239031791687, "learning_rate": 9.471356742452881e-06, "loss": 0.7875, "step": 3175 }, { "epoch": 0.6864058785390101, "grad_norm": 0.9756590723991394, "learning_rate": 9.4594563108646e-06, "loss": 0.8454, "step": 3176 }, { "epoch": 0.6866220012967366, "grad_norm": 0.7923886179924011, "learning_rate": 9.447561044118349e-06, "loss": 0.7555, "step": 3177 }, { "epoch": 0.686838124054463, "grad_norm": 0.9808708429336548, "learning_rate": 9.435670948042788e-06, "loss": 0.9774, "step": 3178 }, { "epoch": 0.6870542468121893, "grad_norm": 0.9539241194725037, "learning_rate": 9.423786028464049e-06, "loss": 0.8864, "step": 3179 }, { "epoch": 0.6872703695699157, "grad_norm": 1.0560336112976074, "learning_rate": 9.411906291205704e-06, "loss": 0.9401, "step": 3180 }, { "epoch": 0.6874864923276421, "grad_norm": 1.0322731733322144, "learning_rate": 9.400031742088802e-06, "loss": 0.7739, "step": 3181 }, { "epoch": 0.6877026150853685, "grad_norm": 1.0997140407562256, "learning_rate": 9.388162386931842e-06, "loss": 1.0653, "step": 3182 }, { "epoch": 0.6879187378430949, "grad_norm": 0.8863367438316345, "learning_rate": 9.376298231550784e-06, "loss": 0.7619, "step": 3183 }, { "epoch": 0.6881348606008213, "grad_norm": 1.0070819854736328, "learning_rate": 9.364439281759033e-06, "loss": 0.9854, "step": 3184 }, { "epoch": 0.6883509833585476, "grad_norm": 1.0507131814956665, "learning_rate": 9.352585543367448e-06, "loss": 0.9161, "step": 3185 }, { "epoch": 0.688567106116274, "grad_norm": 1.0086380243301392, "learning_rate": 9.340737022184331e-06, "loss": 0.9242, "step": 3186 }, { "epoch": 0.6887832288740005, "grad_norm": 1.051830530166626, "learning_rate": 9.328893724015436e-06, "loss": 1.0695, "step": 3187 }, { "epoch": 0.6889993516317269, "grad_norm": 0.9411728382110596, "learning_rate": 9.317055654663946e-06, "loss": 0.9433, "step": 3188 }, { "epoch": 0.6892154743894532, "grad_norm": 0.9798381328582764, "learning_rate": 9.30522281993049e-06, "loss": 1.023, "step": 3189 }, { "epoch": 0.6894315971471796, "grad_norm": 0.9365191459655762, "learning_rate": 9.29339522561313e-06, "loss": 0.8442, "step": 3190 }, { "epoch": 0.689647719904906, "grad_norm": 0.9904110431671143, "learning_rate": 9.281572877507359e-06, "loss": 0.9847, "step": 3191 }, { "epoch": 0.6898638426626323, "grad_norm": 0.9646345376968384, "learning_rate": 9.269755781406094e-06, "loss": 0.8717, "step": 3192 }, { "epoch": 0.6900799654203588, "grad_norm": 1.0621178150177002, "learning_rate": 9.257943943099698e-06, "loss": 0.6718, "step": 3193 }, { "epoch": 0.6902960881780852, "grad_norm": 0.9507277607917786, "learning_rate": 9.246137368375944e-06, "loss": 0.8953, "step": 3194 }, { "epoch": 0.6905122109358115, "grad_norm": 0.9311648607254028, "learning_rate": 9.234336063020014e-06, "loss": 0.8478, "step": 3195 }, { "epoch": 0.6907283336935379, "grad_norm": 1.0660985708236694, "learning_rate": 9.222540032814522e-06, "loss": 0.9248, "step": 3196 }, { "epoch": 0.6909444564512643, "grad_norm": 0.9388880133628845, "learning_rate": 9.210749283539504e-06, "loss": 0.753, "step": 3197 }, { "epoch": 0.6911605792089907, "grad_norm": 1.06510329246521, "learning_rate": 9.198963820972398e-06, "loss": 0.9045, "step": 3198 }, { "epoch": 0.6913767019667171, "grad_norm": 1.0905274152755737, "learning_rate": 9.187183650888056e-06, "loss": 0.8409, "step": 3199 }, { "epoch": 0.6915928247244435, "grad_norm": 0.9568233489990234, "learning_rate": 9.175408779058716e-06, "loss": 0.815, "step": 3200 }, { "epoch": 0.6918089474821699, "grad_norm": 1.0407795906066895, "learning_rate": 9.163639211254059e-06, "loss": 0.7762, "step": 3201 }, { "epoch": 0.6920250702398962, "grad_norm": 1.0074913501739502, "learning_rate": 9.151874953241138e-06, "loss": 0.9301, "step": 3202 }, { "epoch": 0.6922411929976227, "grad_norm": 0.9492015242576599, "learning_rate": 9.140116010784413e-06, "loss": 1.0045, "step": 3203 }, { "epoch": 0.6924573157553491, "grad_norm": 0.9555363059043884, "learning_rate": 9.128362389645737e-06, "loss": 0.7526, "step": 3204 }, { "epoch": 0.6926734385130754, "grad_norm": 1.1192456483840942, "learning_rate": 9.116614095584361e-06, "loss": 1.0528, "step": 3205 }, { "epoch": 0.6928895612708018, "grad_norm": 0.9081218242645264, "learning_rate": 9.104871134356919e-06, "loss": 0.9308, "step": 3206 }, { "epoch": 0.6931056840285282, "grad_norm": 1.0473381280899048, "learning_rate": 9.093133511717433e-06, "loss": 0.9199, "step": 3207 }, { "epoch": 0.6933218067862545, "grad_norm": 0.8416535258293152, "learning_rate": 9.081401233417315e-06, "loss": 0.7143, "step": 3208 }, { "epoch": 0.693537929543981, "grad_norm": 1.107761263847351, "learning_rate": 9.069674305205352e-06, "loss": 0.8804, "step": 3209 }, { "epoch": 0.6937540523017074, "grad_norm": 1.1008070707321167, "learning_rate": 9.057952732827704e-06, "loss": 0.9937, "step": 3210 }, { "epoch": 0.6939701750594338, "grad_norm": 1.0305111408233643, "learning_rate": 9.046236522027939e-06, "loss": 0.8992, "step": 3211 }, { "epoch": 0.6941862978171601, "grad_norm": 1.0368010997772217, "learning_rate": 9.034525678546948e-06, "loss": 0.938, "step": 3212 }, { "epoch": 0.6944024205748865, "grad_norm": 0.9238491058349609, "learning_rate": 9.022820208123026e-06, "loss": 1.0078, "step": 3213 }, { "epoch": 0.694618543332613, "grad_norm": 0.9420394897460938, "learning_rate": 9.01112011649182e-06, "loss": 0.9367, "step": 3214 }, { "epoch": 0.6948346660903393, "grad_norm": 0.8957251310348511, "learning_rate": 8.999425409386362e-06, "loss": 0.8335, "step": 3215 }, { "epoch": 0.6950507888480657, "grad_norm": 0.9629054069519043, "learning_rate": 8.987736092537029e-06, "loss": 0.9271, "step": 3216 }, { "epoch": 0.6952669116057921, "grad_norm": 1.0926717519760132, "learning_rate": 8.97605217167155e-06, "loss": 1.0634, "step": 3217 }, { "epoch": 0.6954830343635184, "grad_norm": 0.8469776511192322, "learning_rate": 8.964373652515012e-06, "loss": 0.8309, "step": 3218 }, { "epoch": 0.6956991571212449, "grad_norm": 1.1549241542816162, "learning_rate": 8.952700540789884e-06, "loss": 0.977, "step": 3219 }, { "epoch": 0.6959152798789713, "grad_norm": 0.9077586531639099, "learning_rate": 8.941032842215951e-06, "loss": 1.0237, "step": 3220 }, { "epoch": 0.6961314026366976, "grad_norm": 0.9283832311630249, "learning_rate": 8.929370562510363e-06, "loss": 0.9388, "step": 3221 }, { "epoch": 0.696347525394424, "grad_norm": 0.8420954346656799, "learning_rate": 8.917713707387606e-06, "loss": 0.8526, "step": 3222 }, { "epoch": 0.6965636481521504, "grad_norm": 1.092646598815918, "learning_rate": 8.906062282559516e-06, "loss": 0.956, "step": 3223 }, { "epoch": 0.6967797709098769, "grad_norm": 0.9578081965446472, "learning_rate": 8.894416293735259e-06, "loss": 0.8625, "step": 3224 }, { "epoch": 0.6969958936676032, "grad_norm": 0.9161734580993652, "learning_rate": 8.882775746621348e-06, "loss": 0.9652, "step": 3225 }, { "epoch": 0.6972120164253296, "grad_norm": 0.9222764372825623, "learning_rate": 8.871140646921622e-06, "loss": 0.9847, "step": 3226 }, { "epoch": 0.697428139183056, "grad_norm": 1.0095137357711792, "learning_rate": 8.85951100033725e-06, "loss": 0.9466, "step": 3227 }, { "epoch": 0.6976442619407823, "grad_norm": 0.8722392320632935, "learning_rate": 8.847886812566737e-06, "loss": 0.8155, "step": 3228 }, { "epoch": 0.6978603846985088, "grad_norm": 1.0187437534332275, "learning_rate": 8.836268089305904e-06, "loss": 0.9925, "step": 3229 }, { "epoch": 0.6980765074562352, "grad_norm": 0.9919723868370056, "learning_rate": 8.824654836247903e-06, "loss": 0.9173, "step": 3230 }, { "epoch": 0.6982926302139615, "grad_norm": 0.9587224721908569, "learning_rate": 8.813047059083198e-06, "loss": 0.8911, "step": 3231 }, { "epoch": 0.6985087529716879, "grad_norm": 0.9450093507766724, "learning_rate": 8.801444763499565e-06, "loss": 0.9868, "step": 3232 }, { "epoch": 0.6987248757294143, "grad_norm": 1.0732356309890747, "learning_rate": 8.789847955182118e-06, "loss": 0.8334, "step": 3233 }, { "epoch": 0.6989409984871406, "grad_norm": 0.9450024366378784, "learning_rate": 8.778256639813267e-06, "loss": 0.8401, "step": 3234 }, { "epoch": 0.6991571212448671, "grad_norm": 1.0916708707809448, "learning_rate": 8.766670823072714e-06, "loss": 0.851, "step": 3235 }, { "epoch": 0.6993732440025935, "grad_norm": 0.9243761301040649, "learning_rate": 8.755090510637483e-06, "loss": 0.8658, "step": 3236 }, { "epoch": 0.6995893667603199, "grad_norm": 1.0502249002456665, "learning_rate": 8.743515708181914e-06, "loss": 0.8741, "step": 3237 }, { "epoch": 0.6998054895180462, "grad_norm": 1.1587893962860107, "learning_rate": 8.731946421377627e-06, "loss": 0.9818, "step": 3238 }, { "epoch": 0.7000216122757726, "grad_norm": 0.8979175686836243, "learning_rate": 8.720382655893552e-06, "loss": 1.0243, "step": 3239 }, { "epoch": 0.7002377350334991, "grad_norm": 1.0077136754989624, "learning_rate": 8.708824417395887e-06, "loss": 1.0221, "step": 3240 }, { "epoch": 0.7004538577912254, "grad_norm": 0.9645074009895325, "learning_rate": 8.697271711548163e-06, "loss": 1.0476, "step": 3241 }, { "epoch": 0.7006699805489518, "grad_norm": 0.9618235230445862, "learning_rate": 8.685724544011174e-06, "loss": 0.9815, "step": 3242 }, { "epoch": 0.7008861033066782, "grad_norm": 1.1321656703948975, "learning_rate": 8.674182920443002e-06, "loss": 0.8899, "step": 3243 }, { "epoch": 0.7011022260644045, "grad_norm": 0.9900889992713928, "learning_rate": 8.662646846499017e-06, "loss": 0.8192, "step": 3244 }, { "epoch": 0.701318348822131, "grad_norm": 0.9528653621673584, "learning_rate": 8.65111632783187e-06, "loss": 0.6818, "step": 3245 }, { "epoch": 0.7015344715798574, "grad_norm": 0.956042468547821, "learning_rate": 8.639591370091486e-06, "loss": 0.6874, "step": 3246 }, { "epoch": 0.7017505943375838, "grad_norm": 1.0208436250686646, "learning_rate": 8.62807197892507e-06, "loss": 0.8192, "step": 3247 }, { "epoch": 0.7019667170953101, "grad_norm": 0.9951738119125366, "learning_rate": 8.616558159977097e-06, "loss": 0.953, "step": 3248 }, { "epoch": 0.7021828398530365, "grad_norm": 0.9474645853042603, "learning_rate": 8.60504991888931e-06, "loss": 0.9003, "step": 3249 }, { "epoch": 0.702398962610763, "grad_norm": 0.9384509921073914, "learning_rate": 8.593547261300716e-06, "loss": 0.9431, "step": 3250 }, { "epoch": 0.7026150853684893, "grad_norm": 0.9716159701347351, "learning_rate": 8.582050192847608e-06, "loss": 0.8827, "step": 3251 }, { "epoch": 0.7028312081262157, "grad_norm": 0.8509849309921265, "learning_rate": 8.570558719163506e-06, "loss": 0.7682, "step": 3252 }, { "epoch": 0.7030473308839421, "grad_norm": 1.0275623798370361, "learning_rate": 8.559072845879211e-06, "loss": 1.0044, "step": 3253 }, { "epoch": 0.7032634536416684, "grad_norm": 1.024664282798767, "learning_rate": 8.547592578622762e-06, "loss": 0.9481, "step": 3254 }, { "epoch": 0.7034795763993948, "grad_norm": 0.9387202262878418, "learning_rate": 8.536117923019486e-06, "loss": 0.983, "step": 3255 }, { "epoch": 0.7036956991571213, "grad_norm": 0.9498915672302246, "learning_rate": 8.524648884691935e-06, "loss": 0.9352, "step": 3256 }, { "epoch": 0.7039118219148476, "grad_norm": 0.988545835018158, "learning_rate": 8.513185469259894e-06, "loss": 0.7864, "step": 3257 }, { "epoch": 0.704127944672574, "grad_norm": 0.8617672920227051, "learning_rate": 8.501727682340415e-06, "loss": 0.7823, "step": 3258 }, { "epoch": 0.7043440674303004, "grad_norm": 0.9407501220703125, "learning_rate": 8.490275529547798e-06, "loss": 0.8565, "step": 3259 }, { "epoch": 0.7045601901880268, "grad_norm": 0.9361156821250916, "learning_rate": 8.478829016493565e-06, "loss": 0.9448, "step": 3260 }, { "epoch": 0.7047763129457532, "grad_norm": 0.9246730208396912, "learning_rate": 8.467388148786477e-06, "loss": 0.8766, "step": 3261 }, { "epoch": 0.7049924357034796, "grad_norm": 1.061888575553894, "learning_rate": 8.455952932032541e-06, "loss": 0.9644, "step": 3262 }, { "epoch": 0.705208558461206, "grad_norm": 1.057067632675171, "learning_rate": 8.444523371834978e-06, "loss": 0.9544, "step": 3263 }, { "epoch": 0.7054246812189323, "grad_norm": 0.9874102473258972, "learning_rate": 8.433099473794255e-06, "loss": 0.9106, "step": 3264 }, { "epoch": 0.7056408039766587, "grad_norm": 0.8923742771148682, "learning_rate": 8.421681243508048e-06, "loss": 0.764, "step": 3265 }, { "epoch": 0.7058569267343852, "grad_norm": 1.0538125038146973, "learning_rate": 8.410268686571269e-06, "loss": 1.0664, "step": 3266 }, { "epoch": 0.7060730494921115, "grad_norm": 0.9824836254119873, "learning_rate": 8.39886180857604e-06, "loss": 0.7895, "step": 3267 }, { "epoch": 0.7062891722498379, "grad_norm": 0.962027907371521, "learning_rate": 8.387460615111707e-06, "loss": 0.6859, "step": 3268 }, { "epoch": 0.7065052950075643, "grad_norm": 0.8959424495697021, "learning_rate": 8.376065111764829e-06, "loss": 0.9898, "step": 3269 }, { "epoch": 0.7067214177652907, "grad_norm": 0.9475058913230896, "learning_rate": 8.364675304119175e-06, "loss": 0.8121, "step": 3270 }, { "epoch": 0.706937540523017, "grad_norm": 1.3032982349395752, "learning_rate": 8.353291197755724e-06, "loss": 0.998, "step": 3271 }, { "epoch": 0.7071536632807435, "grad_norm": 1.0476473569869995, "learning_rate": 8.341912798252659e-06, "loss": 0.9759, "step": 3272 }, { "epoch": 0.7073697860384699, "grad_norm": 1.0302788019180298, "learning_rate": 8.330540111185377e-06, "loss": 0.9669, "step": 3273 }, { "epoch": 0.7075859087961962, "grad_norm": 1.0285735130310059, "learning_rate": 8.319173142126473e-06, "loss": 0.8233, "step": 3274 }, { "epoch": 0.7078020315539226, "grad_norm": 0.9094539284706116, "learning_rate": 8.307811896645719e-06, "loss": 0.8072, "step": 3275 }, { "epoch": 0.708018154311649, "grad_norm": 0.8531277179718018, "learning_rate": 8.296456380310101e-06, "loss": 0.8927, "step": 3276 }, { "epoch": 0.7082342770693754, "grad_norm": 1.093274712562561, "learning_rate": 8.28510659868381e-06, "loss": 0.8381, "step": 3277 }, { "epoch": 0.7084503998271018, "grad_norm": 1.050874948501587, "learning_rate": 8.273762557328204e-06, "loss": 0.919, "step": 3278 }, { "epoch": 0.7086665225848282, "grad_norm": 0.8907845616340637, "learning_rate": 8.262424261801844e-06, "loss": 0.8325, "step": 3279 }, { "epoch": 0.7088826453425545, "grad_norm": 1.0920202732086182, "learning_rate": 8.251091717660449e-06, "loss": 1.036, "step": 3280 }, { "epoch": 0.7090987681002809, "grad_norm": 1.0165354013442993, "learning_rate": 8.23976493045696e-06, "loss": 0.6763, "step": 3281 }, { "epoch": 0.7093148908580074, "grad_norm": 0.9226462244987488, "learning_rate": 8.22844390574147e-06, "loss": 0.7246, "step": 3282 }, { "epoch": 0.7095310136157338, "grad_norm": 1.0060843229293823, "learning_rate": 8.217128649061252e-06, "loss": 0.8275, "step": 3283 }, { "epoch": 0.7097471363734601, "grad_norm": 0.9231213927268982, "learning_rate": 8.20581916596076e-06, "loss": 1.0298, "step": 3284 }, { "epoch": 0.7099632591311865, "grad_norm": 0.8861151933670044, "learning_rate": 8.194515461981612e-06, "loss": 0.9043, "step": 3285 }, { "epoch": 0.7101793818889129, "grad_norm": 1.0948805809020996, "learning_rate": 8.183217542662596e-06, "loss": 1.028, "step": 3286 }, { "epoch": 0.7103955046466393, "grad_norm": 1.1120010614395142, "learning_rate": 8.17192541353967e-06, "loss": 0.9463, "step": 3287 }, { "epoch": 0.7106116274043657, "grad_norm": 0.9352031350135803, "learning_rate": 8.160639080145947e-06, "loss": 0.8321, "step": 3288 }, { "epoch": 0.7108277501620921, "grad_norm": 1.0009642839431763, "learning_rate": 8.149358548011706e-06, "loss": 0.876, "step": 3289 }, { "epoch": 0.7110438729198184, "grad_norm": 0.907788097858429, "learning_rate": 8.138083822664376e-06, "loss": 0.9031, "step": 3290 }, { "epoch": 0.7112599956775448, "grad_norm": 1.021148681640625, "learning_rate": 8.126814909628565e-06, "loss": 0.9595, "step": 3291 }, { "epoch": 0.7114761184352713, "grad_norm": 1.090847373008728, "learning_rate": 8.115551814425995e-06, "loss": 1.0886, "step": 3292 }, { "epoch": 0.7116922411929977, "grad_norm": 0.9658523201942444, "learning_rate": 8.104294542575562e-06, "loss": 0.9741, "step": 3293 }, { "epoch": 0.711908363950724, "grad_norm": 1.063903570175171, "learning_rate": 8.093043099593298e-06, "loss": 0.986, "step": 3294 }, { "epoch": 0.7121244867084504, "grad_norm": 0.9700491428375244, "learning_rate": 8.081797490992398e-06, "loss": 0.8083, "step": 3295 }, { "epoch": 0.7123406094661768, "grad_norm": 0.9563601613044739, "learning_rate": 8.070557722283176e-06, "loss": 1.1012, "step": 3296 }, { "epoch": 0.7125567322239031, "grad_norm": 1.0904157161712646, "learning_rate": 8.0593237989731e-06, "loss": 0.9117, "step": 3297 }, { "epoch": 0.7127728549816296, "grad_norm": 0.9020714163780212, "learning_rate": 8.048095726566746e-06, "loss": 0.7514, "step": 3298 }, { "epoch": 0.712988977739356, "grad_norm": 0.9047414660453796, "learning_rate": 8.036873510565864e-06, "loss": 0.8639, "step": 3299 }, { "epoch": 0.7132051004970823, "grad_norm": 1.0338889360427856, "learning_rate": 8.025657156469307e-06, "loss": 0.8126, "step": 3300 }, { "epoch": 0.7134212232548087, "grad_norm": 0.9189358949661255, "learning_rate": 8.014446669773061e-06, "loss": 0.7255, "step": 3301 }, { "epoch": 0.7136373460125351, "grad_norm": 0.9422216415405273, "learning_rate": 8.003242055970245e-06, "loss": 0.7908, "step": 3302 }, { "epoch": 0.7138534687702615, "grad_norm": 1.0897268056869507, "learning_rate": 7.992043320551084e-06, "loss": 0.9166, "step": 3303 }, { "epoch": 0.7140695915279879, "grad_norm": 0.9540897011756897, "learning_rate": 7.980850469002939e-06, "loss": 0.8191, "step": 3304 }, { "epoch": 0.7142857142857143, "grad_norm": 0.9342294931411743, "learning_rate": 7.969663506810282e-06, "loss": 0.7946, "step": 3305 }, { "epoch": 0.7145018370434407, "grad_norm": 1.0292030572891235, "learning_rate": 7.958482439454694e-06, "loss": 0.8582, "step": 3306 }, { "epoch": 0.714717959801167, "grad_norm": 0.899174153804779, "learning_rate": 7.947307272414874e-06, "loss": 1.0184, "step": 3307 }, { "epoch": 0.7149340825588935, "grad_norm": 1.0515456199645996, "learning_rate": 7.936138011166633e-06, "loss": 0.9218, "step": 3308 }, { "epoch": 0.7151502053166199, "grad_norm": 1.0137747526168823, "learning_rate": 7.924974661182873e-06, "loss": 1.0679, "step": 3309 }, { "epoch": 0.7153663280743462, "grad_norm": 1.2173030376434326, "learning_rate": 7.91381722793362e-06, "loss": 1.0549, "step": 3310 }, { "epoch": 0.7155824508320726, "grad_norm": 1.0457470417022705, "learning_rate": 7.902665716885985e-06, "loss": 0.7535, "step": 3311 }, { "epoch": 0.715798573589799, "grad_norm": 1.2232424020767212, "learning_rate": 7.891520133504175e-06, "loss": 1.2076, "step": 3312 }, { "epoch": 0.7160146963475253, "grad_norm": 1.0062086582183838, "learning_rate": 7.880380483249519e-06, "loss": 0.92, "step": 3313 }, { "epoch": 0.7162308191052518, "grad_norm": 1.0115803480148315, "learning_rate": 7.869246771580414e-06, "loss": 1.0616, "step": 3314 }, { "epoch": 0.7164469418629782, "grad_norm": 0.9694705009460449, "learning_rate": 7.858119003952344e-06, "loss": 0.9655, "step": 3315 }, { "epoch": 0.7166630646207046, "grad_norm": 0.9992493987083435, "learning_rate": 7.846997185817886e-06, "loss": 0.9105, "step": 3316 }, { "epoch": 0.7168791873784309, "grad_norm": 1.0341936349868774, "learning_rate": 7.83588132262672e-06, "loss": 0.9677, "step": 3317 }, { "epoch": 0.7170953101361573, "grad_norm": 1.082875370979309, "learning_rate": 7.824771419825588e-06, "loss": 1.0106, "step": 3318 }, { "epoch": 0.7173114328938838, "grad_norm": 0.9768566489219666, "learning_rate": 7.81366748285832e-06, "loss": 0.8919, "step": 3319 }, { "epoch": 0.7175275556516101, "grad_norm": 1.0182429552078247, "learning_rate": 7.8025695171658e-06, "loss": 0.9841, "step": 3320 }, { "epoch": 0.7177436784093365, "grad_norm": 1.0493568181991577, "learning_rate": 7.791477528186031e-06, "loss": 1.1323, "step": 3321 }, { "epoch": 0.7179598011670629, "grad_norm": 0.9830143451690674, "learning_rate": 7.780391521354047e-06, "loss": 0.791, "step": 3322 }, { "epoch": 0.7181759239247892, "grad_norm": 1.1327837705612183, "learning_rate": 7.769311502101973e-06, "loss": 0.8785, "step": 3323 }, { "epoch": 0.7183920466825157, "grad_norm": 0.8882994651794434, "learning_rate": 7.758237475858987e-06, "loss": 0.912, "step": 3324 }, { "epoch": 0.7186081694402421, "grad_norm": 1.1242026090621948, "learning_rate": 7.747169448051341e-06, "loss": 0.9498, "step": 3325 }, { "epoch": 0.7188242921979684, "grad_norm": 1.0362653732299805, "learning_rate": 7.736107424102342e-06, "loss": 0.8962, "step": 3326 }, { "epoch": 0.7190404149556948, "grad_norm": 1.0527663230895996, "learning_rate": 7.725051409432353e-06, "loss": 1.0429, "step": 3327 }, { "epoch": 0.7192565377134212, "grad_norm": 0.9332002401351929, "learning_rate": 7.714001409458798e-06, "loss": 0.8325, "step": 3328 }, { "epoch": 0.7194726604711477, "grad_norm": 1.1222304105758667, "learning_rate": 7.702957429596152e-06, "loss": 0.964, "step": 3329 }, { "epoch": 0.719688783228874, "grad_norm": 1.1005209684371948, "learning_rate": 7.691919475255931e-06, "loss": 1.0872, "step": 3330 }, { "epoch": 0.7199049059866004, "grad_norm": 0.9944490194320679, "learning_rate": 7.68088755184673e-06, "loss": 0.858, "step": 3331 }, { "epoch": 0.7201210287443268, "grad_norm": 0.990530788898468, "learning_rate": 7.669861664774143e-06, "loss": 0.7804, "step": 3332 }, { "epoch": 0.7203371515020531, "grad_norm": 1.1296415328979492, "learning_rate": 7.658841819440836e-06, "loss": 0.8683, "step": 3333 }, { "epoch": 0.7205532742597796, "grad_norm": 1.0111063718795776, "learning_rate": 7.647828021246503e-06, "loss": 0.7653, "step": 3334 }, { "epoch": 0.720769397017506, "grad_norm": 0.9293777942657471, "learning_rate": 7.636820275587894e-06, "loss": 1.0058, "step": 3335 }, { "epoch": 0.7209855197752323, "grad_norm": 1.007898211479187, "learning_rate": 7.625818587858769e-06, "loss": 0.9946, "step": 3336 }, { "epoch": 0.7212016425329587, "grad_norm": 1.0067023038864136, "learning_rate": 7.6148229634499396e-06, "loss": 0.9263, "step": 3337 }, { "epoch": 0.7214177652906851, "grad_norm": 0.9708625078201294, "learning_rate": 7.6038334077492105e-06, "loss": 0.8998, "step": 3338 }, { "epoch": 0.7216338880484114, "grad_norm": 0.9538854360580444, "learning_rate": 7.592849926141466e-06, "loss": 0.8437, "step": 3339 }, { "epoch": 0.7218500108061379, "grad_norm": 1.0082589387893677, "learning_rate": 7.581872524008574e-06, "loss": 1.076, "step": 3340 }, { "epoch": 0.7220661335638643, "grad_norm": 1.0913642644882202, "learning_rate": 7.5709012067294395e-06, "loss": 1.1092, "step": 3341 }, { "epoch": 0.7222822563215907, "grad_norm": 0.9131114482879639, "learning_rate": 7.559935979679988e-06, "loss": 0.926, "step": 3342 }, { "epoch": 0.722498379079317, "grad_norm": 1.0407235622406006, "learning_rate": 7.548976848233138e-06, "loss": 0.9838, "step": 3343 }, { "epoch": 0.7227145018370434, "grad_norm": 0.9489783644676208, "learning_rate": 7.538023817758855e-06, "loss": 0.7608, "step": 3344 }, { "epoch": 0.7229306245947699, "grad_norm": 0.939863383769989, "learning_rate": 7.5270768936240924e-06, "loss": 1.0709, "step": 3345 }, { "epoch": 0.7231467473524962, "grad_norm": 1.0594513416290283, "learning_rate": 7.516136081192819e-06, "loss": 1.008, "step": 3346 }, { "epoch": 0.7233628701102226, "grad_norm": 0.9714732766151428, "learning_rate": 7.505201385826009e-06, "loss": 0.8528, "step": 3347 }, { "epoch": 0.723578992867949, "grad_norm": 0.9454349875450134, "learning_rate": 7.4942728128816355e-06, "loss": 0.8362, "step": 3348 }, { "epoch": 0.7237951156256753, "grad_norm": 0.9525579214096069, "learning_rate": 7.4833503677146725e-06, "loss": 0.8455, "step": 3349 }, { "epoch": 0.7240112383834018, "grad_norm": 1.1196388006210327, "learning_rate": 7.472434055677098e-06, "loss": 1.0083, "step": 3350 }, { "epoch": 0.7242273611411282, "grad_norm": 0.9199864864349365, "learning_rate": 7.461523882117876e-06, "loss": 0.967, "step": 3351 }, { "epoch": 0.7244434838988546, "grad_norm": 0.9865084886550903, "learning_rate": 7.450619852382959e-06, "loss": 0.9323, "step": 3352 }, { "epoch": 0.7246596066565809, "grad_norm": 0.8805970549583435, "learning_rate": 7.43972197181531e-06, "loss": 0.8617, "step": 3353 }, { "epoch": 0.7248757294143073, "grad_norm": 0.9994069337844849, "learning_rate": 7.42883024575487e-06, "loss": 0.8431, "step": 3354 }, { "epoch": 0.7250918521720338, "grad_norm": 1.1997559070587158, "learning_rate": 7.41794467953854e-06, "loss": 0.8347, "step": 3355 }, { "epoch": 0.7253079749297601, "grad_norm": 0.9434436559677124, "learning_rate": 7.407065278500225e-06, "loss": 0.9283, "step": 3356 }, { "epoch": 0.7255240976874865, "grad_norm": 1.0402320623397827, "learning_rate": 7.39619204797082e-06, "loss": 1.0099, "step": 3357 }, { "epoch": 0.7257402204452129, "grad_norm": 1.0664135217666626, "learning_rate": 7.3853249932781755e-06, "loss": 0.8768, "step": 3358 }, { "epoch": 0.7259563432029392, "grad_norm": 0.9630206227302551, "learning_rate": 7.374464119747122e-06, "loss": 0.9166, "step": 3359 }, { "epoch": 0.7261724659606656, "grad_norm": 1.0093247890472412, "learning_rate": 7.363609432699466e-06, "loss": 0.9244, "step": 3360 }, { "epoch": 0.7263885887183921, "grad_norm": 0.9457176923751831, "learning_rate": 7.352760937453975e-06, "loss": 0.786, "step": 3361 }, { "epoch": 0.7266047114761184, "grad_norm": 0.9402831792831421, "learning_rate": 7.341918639326391e-06, "loss": 0.8649, "step": 3362 }, { "epoch": 0.7268208342338448, "grad_norm": 0.9374096393585205, "learning_rate": 7.331082543629411e-06, "loss": 0.8783, "step": 3363 }, { "epoch": 0.7270369569915712, "grad_norm": 1.0298699140548706, "learning_rate": 7.320252655672697e-06, "loss": 0.8972, "step": 3364 }, { "epoch": 0.7272530797492976, "grad_norm": 0.945776641368866, "learning_rate": 7.309428980762874e-06, "loss": 1.0332, "step": 3365 }, { "epoch": 0.727469202507024, "grad_norm": 1.0960521697998047, "learning_rate": 7.29861152420351e-06, "loss": 1.0236, "step": 3366 }, { "epoch": 0.7276853252647504, "grad_norm": 0.9372932314872742, "learning_rate": 7.2878002912951395e-06, "loss": 0.9436, "step": 3367 }, { "epoch": 0.7279014480224768, "grad_norm": 0.9429520964622498, "learning_rate": 7.27699528733524e-06, "loss": 0.9814, "step": 3368 }, { "epoch": 0.7281175707802031, "grad_norm": 1.0094513893127441, "learning_rate": 7.266196517618238e-06, "loss": 0.9912, "step": 3369 }, { "epoch": 0.7283336935379295, "grad_norm": 0.9140549898147583, "learning_rate": 7.2554039874355005e-06, "loss": 0.6879, "step": 3370 }, { "epoch": 0.728549816295656, "grad_norm": 1.0616697072982788, "learning_rate": 7.244617702075361e-06, "loss": 0.9242, "step": 3371 }, { "epoch": 0.7287659390533823, "grad_norm": 0.9983554482460022, "learning_rate": 7.233837666823054e-06, "loss": 0.9, "step": 3372 }, { "epoch": 0.7289820618111087, "grad_norm": 0.9800330400466919, "learning_rate": 7.223063886960779e-06, "loss": 0.8686, "step": 3373 }, { "epoch": 0.7291981845688351, "grad_norm": 0.8646646738052368, "learning_rate": 7.212296367767657e-06, "loss": 0.7352, "step": 3374 }, { "epoch": 0.7294143073265615, "grad_norm": 0.8267316818237305, "learning_rate": 7.2015351145197594e-06, "loss": 0.9062, "step": 3375 }, { "epoch": 0.7296304300842879, "grad_norm": 0.9159088730812073, "learning_rate": 7.190780132490071e-06, "loss": 0.9845, "step": 3376 }, { "epoch": 0.7298465528420143, "grad_norm": 1.0310955047607422, "learning_rate": 7.180031426948515e-06, "loss": 0.8794, "step": 3377 }, { "epoch": 0.7300626755997407, "grad_norm": 0.956666111946106, "learning_rate": 7.169289003161908e-06, "loss": 0.8247, "step": 3378 }, { "epoch": 0.730278798357467, "grad_norm": 1.0660452842712402, "learning_rate": 7.1585528663940375e-06, "loss": 0.9081, "step": 3379 }, { "epoch": 0.7304949211151934, "grad_norm": 1.1198481321334839, "learning_rate": 7.147823021905578e-06, "loss": 0.9272, "step": 3380 }, { "epoch": 0.7307110438729199, "grad_norm": 1.1018749475479126, "learning_rate": 7.137099474954125e-06, "loss": 1.1237, "step": 3381 }, { "epoch": 0.7309271666306462, "grad_norm": 1.0701727867126465, "learning_rate": 7.1263822307942045e-06, "loss": 0.9933, "step": 3382 }, { "epoch": 0.7311432893883726, "grad_norm": 0.8856919407844543, "learning_rate": 7.115671294677218e-06, "loss": 0.8479, "step": 3383 }, { "epoch": 0.731359412146099, "grad_norm": 1.0185657739639282, "learning_rate": 7.104966671851517e-06, "loss": 0.8627, "step": 3384 }, { "epoch": 0.7315755349038253, "grad_norm": 1.0855196714401245, "learning_rate": 7.09426836756234e-06, "loss": 0.8953, "step": 3385 }, { "epoch": 0.7317916576615517, "grad_norm": 0.9401381611824036, "learning_rate": 7.083576387051827e-06, "loss": 0.8269, "step": 3386 }, { "epoch": 0.7320077804192782, "grad_norm": 0.9077387452125549, "learning_rate": 7.072890735559028e-06, "loss": 0.7239, "step": 3387 }, { "epoch": 0.7322239031770046, "grad_norm": 1.0916656255722046, "learning_rate": 7.062211418319884e-06, "loss": 0.9714, "step": 3388 }, { "epoch": 0.7324400259347309, "grad_norm": 1.1207377910614014, "learning_rate": 7.051538440567238e-06, "loss": 1.0157, "step": 3389 }, { "epoch": 0.7326561486924573, "grad_norm": 1.0763168334960938, "learning_rate": 7.040871807530825e-06, "loss": 0.8366, "step": 3390 }, { "epoch": 0.7328722714501837, "grad_norm": 1.031316876411438, "learning_rate": 7.030211524437267e-06, "loss": 1.0263, "step": 3391 }, { "epoch": 0.73308839420791, "grad_norm": 1.0733709335327148, "learning_rate": 7.0195575965100735e-06, "loss": 0.6845, "step": 3392 }, { "epoch": 0.7333045169656365, "grad_norm": 1.0250283479690552, "learning_rate": 7.008910028969657e-06, "loss": 0.9607, "step": 3393 }, { "epoch": 0.7335206397233629, "grad_norm": 0.9315289258956909, "learning_rate": 6.998268827033303e-06, "loss": 0.8932, "step": 3394 }, { "epoch": 0.7337367624810892, "grad_norm": 1.0561474561691284, "learning_rate": 6.987633995915164e-06, "loss": 0.8489, "step": 3395 }, { "epoch": 0.7339528852388156, "grad_norm": 1.0175994634628296, "learning_rate": 6.977005540826276e-06, "loss": 1.1645, "step": 3396 }, { "epoch": 0.7341690079965421, "grad_norm": 1.0059309005737305, "learning_rate": 6.966383466974578e-06, "loss": 0.7149, "step": 3397 }, { "epoch": 0.7343851307542685, "grad_norm": 1.010170578956604, "learning_rate": 6.95576777956485e-06, "loss": 0.7357, "step": 3398 }, { "epoch": 0.7346012535119948, "grad_norm": 0.9982577562332153, "learning_rate": 6.9451584837987574e-06, "loss": 0.9607, "step": 3399 }, { "epoch": 0.7348173762697212, "grad_norm": 1.0086729526519775, "learning_rate": 6.934555584874834e-06, "loss": 0.9249, "step": 3400 }, { "epoch": 0.7350334990274476, "grad_norm": 0.9194556474685669, "learning_rate": 6.923959087988459e-06, "loss": 1.0919, "step": 3401 }, { "epoch": 0.735249621785174, "grad_norm": 0.9621269106864929, "learning_rate": 6.913368998331911e-06, "loss": 0.8922, "step": 3402 }, { "epoch": 0.7354657445429004, "grad_norm": 0.914582371711731, "learning_rate": 6.902785321094301e-06, "loss": 0.7621, "step": 3403 }, { "epoch": 0.7356818673006268, "grad_norm": 1.0676108598709106, "learning_rate": 6.892208061461607e-06, "loss": 1.0045, "step": 3404 }, { "epoch": 0.7358979900583531, "grad_norm": 1.0403320789337158, "learning_rate": 6.881637224616662e-06, "loss": 0.9555, "step": 3405 }, { "epoch": 0.7361141128160795, "grad_norm": 0.9418090581893921, "learning_rate": 6.87107281573915e-06, "loss": 0.8053, "step": 3406 }, { "epoch": 0.736330235573806, "grad_norm": 0.8581088185310364, "learning_rate": 6.860514840005612e-06, "loss": 0.7958, "step": 3407 }, { "epoch": 0.7365463583315323, "grad_norm": 0.9900988340377808, "learning_rate": 6.849963302589426e-06, "loss": 0.9443, "step": 3408 }, { "epoch": 0.7367624810892587, "grad_norm": 0.8412300944328308, "learning_rate": 6.839418208660824e-06, "loss": 0.7845, "step": 3409 }, { "epoch": 0.7369786038469851, "grad_norm": 0.9322758913040161, "learning_rate": 6.82887956338687e-06, "loss": 0.948, "step": 3410 }, { "epoch": 0.7371947266047115, "grad_norm": 0.9318099021911621, "learning_rate": 6.818347371931498e-06, "loss": 1.0697, "step": 3411 }, { "epoch": 0.7374108493624378, "grad_norm": 1.023543119430542, "learning_rate": 6.807821639455432e-06, "loss": 1.0122, "step": 3412 }, { "epoch": 0.7376269721201643, "grad_norm": 0.9158769249916077, "learning_rate": 6.7973023711162675e-06, "loss": 0.8321, "step": 3413 }, { "epoch": 0.7378430948778907, "grad_norm": 1.0362530946731567, "learning_rate": 6.786789572068417e-06, "loss": 0.8262, "step": 3414 }, { "epoch": 0.738059217635617, "grad_norm": 0.9433619379997253, "learning_rate": 6.776283247463135e-06, "loss": 0.8907, "step": 3415 }, { "epoch": 0.7382753403933434, "grad_norm": 0.8728352785110474, "learning_rate": 6.765783402448496e-06, "loss": 0.7827, "step": 3416 }, { "epoch": 0.7384914631510698, "grad_norm": 0.9512519836425781, "learning_rate": 6.755290042169402e-06, "loss": 1.0115, "step": 3417 }, { "epoch": 0.7387075859087961, "grad_norm": 0.9114593863487244, "learning_rate": 6.744803171767556e-06, "loss": 0.6862, "step": 3418 }, { "epoch": 0.7389237086665226, "grad_norm": 1.0527325868606567, "learning_rate": 6.734322796381521e-06, "loss": 0.9852, "step": 3419 }, { "epoch": 0.739139831424249, "grad_norm": 0.9113556742668152, "learning_rate": 6.723848921146649e-06, "loss": 0.9002, "step": 3420 }, { "epoch": 0.7393559541819754, "grad_norm": 0.9874826669692993, "learning_rate": 6.71338155119512e-06, "loss": 0.8813, "step": 3421 }, { "epoch": 0.7395720769397017, "grad_norm": 1.0439640283584595, "learning_rate": 6.702920691655919e-06, "loss": 1.0515, "step": 3422 }, { "epoch": 0.7397881996974282, "grad_norm": 0.8799842596054077, "learning_rate": 6.692466347654829e-06, "loss": 0.7147, "step": 3423 }, { "epoch": 0.7400043224551546, "grad_norm": 1.0796020030975342, "learning_rate": 6.682018524314471e-06, "loss": 1.081, "step": 3424 }, { "epoch": 0.7402204452128809, "grad_norm": 0.8543500900268555, "learning_rate": 6.6715772267542515e-06, "loss": 0.8152, "step": 3425 }, { "epoch": 0.7404365679706073, "grad_norm": 0.9580144882202148, "learning_rate": 6.661142460090379e-06, "loss": 0.8699, "step": 3426 }, { "epoch": 0.7406526907283337, "grad_norm": 0.9547476172447205, "learning_rate": 6.650714229435867e-06, "loss": 0.9847, "step": 3427 }, { "epoch": 0.74086881348606, "grad_norm": 0.995082437992096, "learning_rate": 6.640292539900521e-06, "loss": 0.9451, "step": 3428 }, { "epoch": 0.7410849362437865, "grad_norm": 0.9206424951553345, "learning_rate": 6.629877396590952e-06, "loss": 0.8789, "step": 3429 }, { "epoch": 0.7413010590015129, "grad_norm": 0.9735265374183655, "learning_rate": 6.619468804610547e-06, "loss": 0.9124, "step": 3430 }, { "epoch": 0.7415171817592392, "grad_norm": 0.9054409861564636, "learning_rate": 6.609066769059498e-06, "loss": 0.6867, "step": 3431 }, { "epoch": 0.7417333045169656, "grad_norm": 0.9464341402053833, "learning_rate": 6.5986712950347705e-06, "loss": 0.7371, "step": 3432 }, { "epoch": 0.741949427274692, "grad_norm": 1.1124294996261597, "learning_rate": 6.588282387630134e-06, "loss": 0.9704, "step": 3433 }, { "epoch": 0.7421655500324185, "grad_norm": 1.0384552478790283, "learning_rate": 6.577900051936133e-06, "loss": 0.8598, "step": 3434 }, { "epoch": 0.7423816727901448, "grad_norm": 0.8957676291465759, "learning_rate": 6.567524293040071e-06, "loss": 0.8189, "step": 3435 }, { "epoch": 0.7425977955478712, "grad_norm": 0.9393583536148071, "learning_rate": 6.557155116026048e-06, "loss": 0.8454, "step": 3436 }, { "epoch": 0.7428139183055976, "grad_norm": 1.0651594400405884, "learning_rate": 6.54679252597495e-06, "loss": 0.9852, "step": 3437 }, { "epoch": 0.7430300410633239, "grad_norm": 1.1220266819000244, "learning_rate": 6.536436527964414e-06, "loss": 1.0354, "step": 3438 }, { "epoch": 0.7432461638210504, "grad_norm": 0.9720316529273987, "learning_rate": 6.526087127068857e-06, "loss": 0.8795, "step": 3439 }, { "epoch": 0.7434622865787768, "grad_norm": 1.1413424015045166, "learning_rate": 6.5157443283594655e-06, "loss": 1.0138, "step": 3440 }, { "epoch": 0.7436784093365031, "grad_norm": 0.939396858215332, "learning_rate": 6.50540813690417e-06, "loss": 0.8591, "step": 3441 }, { "epoch": 0.7438945320942295, "grad_norm": 1.1626993417739868, "learning_rate": 6.495078557767698e-06, "loss": 0.883, "step": 3442 }, { "epoch": 0.7441106548519559, "grad_norm": 0.9208924174308777, "learning_rate": 6.484755596011514e-06, "loss": 0.7905, "step": 3443 }, { "epoch": 0.7443267776096822, "grad_norm": 0.9631216526031494, "learning_rate": 6.474439256693845e-06, "loss": 0.953, "step": 3444 }, { "epoch": 0.7445429003674087, "grad_norm": 0.9523822665214539, "learning_rate": 6.464129544869675e-06, "loss": 0.8924, "step": 3445 }, { "epoch": 0.7447590231251351, "grad_norm": 1.01498281955719, "learning_rate": 6.453826465590738e-06, "loss": 0.9344, "step": 3446 }, { "epoch": 0.7449751458828615, "grad_norm": 0.7808797359466553, "learning_rate": 6.443530023905518e-06, "loss": 0.7739, "step": 3447 }, { "epoch": 0.7451912686405878, "grad_norm": 1.0282634496688843, "learning_rate": 6.433240224859247e-06, "loss": 0.8307, "step": 3448 }, { "epoch": 0.7454073913983142, "grad_norm": 1.138948917388916, "learning_rate": 6.422957073493905e-06, "loss": 1.1069, "step": 3449 }, { "epoch": 0.7456235141560407, "grad_norm": 1.1635663509368896, "learning_rate": 6.412680574848205e-06, "loss": 0.9551, "step": 3450 }, { "epoch": 0.745839636913767, "grad_norm": 1.016524076461792, "learning_rate": 6.402410733957627e-06, "loss": 1.0311, "step": 3451 }, { "epoch": 0.7460557596714934, "grad_norm": 0.8808252215385437, "learning_rate": 6.392147555854349e-06, "loss": 0.8383, "step": 3452 }, { "epoch": 0.7462718824292198, "grad_norm": 1.1563820838928223, "learning_rate": 6.3818910455673125e-06, "loss": 0.8775, "step": 3453 }, { "epoch": 0.7464880051869461, "grad_norm": 1.0173801183700562, "learning_rate": 6.3716412081221766e-06, "loss": 0.8934, "step": 3454 }, { "epoch": 0.7467041279446726, "grad_norm": 0.996764063835144, "learning_rate": 6.361398048541349e-06, "loss": 0.9363, "step": 3455 }, { "epoch": 0.746920250702399, "grad_norm": 1.1051582098007202, "learning_rate": 6.351161571843953e-06, "loss": 1.063, "step": 3456 }, { "epoch": 0.7471363734601254, "grad_norm": 1.0325502157211304, "learning_rate": 6.340931783045841e-06, "loss": 0.9407, "step": 3457 }, { "epoch": 0.7473524962178517, "grad_norm": 1.139733910560608, "learning_rate": 6.330708687159573e-06, "loss": 1.0407, "step": 3458 }, { "epoch": 0.7475686189755781, "grad_norm": 1.0138351917266846, "learning_rate": 6.320492289194442e-06, "loss": 1.0362, "step": 3459 }, { "epoch": 0.7477847417333046, "grad_norm": 0.9171978235244751, "learning_rate": 6.310282594156474e-06, "loss": 0.7239, "step": 3460 }, { "epoch": 0.7480008644910309, "grad_norm": 1.101423740386963, "learning_rate": 6.300079607048388e-06, "loss": 1.0078, "step": 3461 }, { "epoch": 0.7482169872487573, "grad_norm": 1.0272020101547241, "learning_rate": 6.2898833328696265e-06, "loss": 0.9119, "step": 3462 }, { "epoch": 0.7484331100064837, "grad_norm": 1.0556352138519287, "learning_rate": 6.279693776616338e-06, "loss": 1.0261, "step": 3463 }, { "epoch": 0.74864923276421, "grad_norm": 0.9500029683113098, "learning_rate": 6.269510943281383e-06, "loss": 0.9667, "step": 3464 }, { "epoch": 0.7488653555219364, "grad_norm": 0.8981829881668091, "learning_rate": 6.2593348378543255e-06, "loss": 1.0014, "step": 3465 }, { "epoch": 0.7490814782796629, "grad_norm": 0.9949851036071777, "learning_rate": 6.249165465321432e-06, "loss": 0.8555, "step": 3466 }, { "epoch": 0.7492976010373892, "grad_norm": 0.9537209868431091, "learning_rate": 6.239002830665675e-06, "loss": 0.726, "step": 3467 }, { "epoch": 0.7495137237951156, "grad_norm": 0.8678178191184998, "learning_rate": 6.228846938866717e-06, "loss": 0.9033, "step": 3468 }, { "epoch": 0.749729846552842, "grad_norm": 0.9645637273788452, "learning_rate": 6.218697794900928e-06, "loss": 0.793, "step": 3469 }, { "epoch": 0.7499459693105684, "grad_norm": 0.9632251262664795, "learning_rate": 6.208555403741361e-06, "loss": 0.809, "step": 3470 }, { "epoch": 0.7501620920682948, "grad_norm": 1.032147765159607, "learning_rate": 6.198419770357764e-06, "loss": 0.9394, "step": 3471 }, { "epoch": 0.7503782148260212, "grad_norm": 1.0285770893096924, "learning_rate": 6.188290899716569e-06, "loss": 0.962, "step": 3472 }, { "epoch": 0.7505943375837476, "grad_norm": 0.8958332538604736, "learning_rate": 6.178168796780912e-06, "loss": 0.9542, "step": 3473 }, { "epoch": 0.7508104603414739, "grad_norm": 0.9863196611404419, "learning_rate": 6.168053466510597e-06, "loss": 0.8857, "step": 3474 }, { "epoch": 0.7510265830992003, "grad_norm": 1.0095715522766113, "learning_rate": 6.1579449138621065e-06, "loss": 0.8864, "step": 3475 }, { "epoch": 0.7512427058569268, "grad_norm": 1.0227198600769043, "learning_rate": 6.147843143788601e-06, "loss": 0.8899, "step": 3476 }, { "epoch": 0.7514588286146531, "grad_norm": 0.9959652423858643, "learning_rate": 6.137748161239938e-06, "loss": 0.8684, "step": 3477 }, { "epoch": 0.7516749513723795, "grad_norm": 1.0028258562088013, "learning_rate": 6.127659971162634e-06, "loss": 1.0545, "step": 3478 }, { "epoch": 0.7518910741301059, "grad_norm": 1.0746790170669556, "learning_rate": 6.1175785784998745e-06, "loss": 0.8086, "step": 3479 }, { "epoch": 0.7521071968878323, "grad_norm": 1.0223948955535889, "learning_rate": 6.107503988191528e-06, "loss": 0.7856, "step": 3480 }, { "epoch": 0.7523233196455587, "grad_norm": 0.9098303318023682, "learning_rate": 6.0974362051740985e-06, "loss": 0.7511, "step": 3481 }, { "epoch": 0.7525394424032851, "grad_norm": 1.2489588260650635, "learning_rate": 6.0873752343807965e-06, "loss": 0.9436, "step": 3482 }, { "epoch": 0.7527555651610115, "grad_norm": 0.947459876537323, "learning_rate": 6.077321080741469e-06, "loss": 0.823, "step": 3483 }, { "epoch": 0.7529716879187378, "grad_norm": 1.0971026420593262, "learning_rate": 6.067273749182627e-06, "loss": 0.9406, "step": 3484 }, { "epoch": 0.7531878106764642, "grad_norm": 0.9637070298194885, "learning_rate": 6.057233244627441e-06, "loss": 0.7159, "step": 3485 }, { "epoch": 0.7534039334341907, "grad_norm": 0.9341323971748352, "learning_rate": 6.047199571995732e-06, "loss": 0.9081, "step": 3486 }, { "epoch": 0.753620056191917, "grad_norm": 0.9842247366905212, "learning_rate": 6.03717273620398e-06, "loss": 0.9413, "step": 3487 }, { "epoch": 0.7538361789496434, "grad_norm": 0.920375406742096, "learning_rate": 6.02715274216531e-06, "loss": 0.9189, "step": 3488 }, { "epoch": 0.7540523017073698, "grad_norm": 1.0057052373886108, "learning_rate": 6.017139594789496e-06, "loss": 0.8734, "step": 3489 }, { "epoch": 0.7542684244650961, "grad_norm": 1.1185704469680786, "learning_rate": 6.00713329898295e-06, "loss": 0.9477, "step": 3490 }, { "epoch": 0.7544845472228225, "grad_norm": 0.9550908207893372, "learning_rate": 5.997133859648752e-06, "loss": 0.7867, "step": 3491 }, { "epoch": 0.754700669980549, "grad_norm": 0.9330325722694397, "learning_rate": 5.987141281686588e-06, "loss": 0.8126, "step": 3492 }, { "epoch": 0.7549167927382754, "grad_norm": 0.9264267683029175, "learning_rate": 5.977155569992803e-06, "loss": 0.8604, "step": 3493 }, { "epoch": 0.7551329154960017, "grad_norm": 1.3186193704605103, "learning_rate": 5.967176729460367e-06, "loss": 1.0712, "step": 3494 }, { "epoch": 0.7553490382537281, "grad_norm": 0.9311937689781189, "learning_rate": 5.957204764978899e-06, "loss": 0.736, "step": 3495 }, { "epoch": 0.7555651610114545, "grad_norm": 1.113035798072815, "learning_rate": 5.947239681434634e-06, "loss": 0.8888, "step": 3496 }, { "epoch": 0.7557812837691809, "grad_norm": 1.006145715713501, "learning_rate": 5.937281483710446e-06, "loss": 0.9843, "step": 3497 }, { "epoch": 0.7559974065269073, "grad_norm": 0.9831714034080505, "learning_rate": 5.927330176685817e-06, "loss": 0.9059, "step": 3498 }, { "epoch": 0.7562135292846337, "grad_norm": 0.9994760155677795, "learning_rate": 5.9173857652368645e-06, "loss": 0.909, "step": 3499 }, { "epoch": 0.75642965204236, "grad_norm": 1.0729668140411377, "learning_rate": 5.907448254236339e-06, "loss": 1.023, "step": 3500 }, { "epoch": 0.7566457748000864, "grad_norm": 0.9878201484680176, "learning_rate": 5.89751764855359e-06, "loss": 0.9754, "step": 3501 }, { "epoch": 0.7568618975578129, "grad_norm": 0.9825910329818726, "learning_rate": 5.8875939530545936e-06, "loss": 0.6884, "step": 3502 }, { "epoch": 0.7570780203155393, "grad_norm": 0.946611225605011, "learning_rate": 5.877677172601937e-06, "loss": 0.902, "step": 3503 }, { "epoch": 0.7572941430732656, "grad_norm": 1.0394933223724365, "learning_rate": 5.86776731205482e-06, "loss": 1.0897, "step": 3504 }, { "epoch": 0.757510265830992, "grad_norm": 0.9216761589050293, "learning_rate": 5.857864376269051e-06, "loss": 0.7417, "step": 3505 }, { "epoch": 0.7577263885887184, "grad_norm": 0.8604562878608704, "learning_rate": 5.847968370097045e-06, "loss": 0.9106, "step": 3506 }, { "epoch": 0.7579425113464447, "grad_norm": 1.0855454206466675, "learning_rate": 5.838079298387824e-06, "loss": 0.7717, "step": 3507 }, { "epoch": 0.7581586341041712, "grad_norm": 0.9160546064376831, "learning_rate": 5.82819716598701e-06, "loss": 0.7919, "step": 3508 }, { "epoch": 0.7583747568618976, "grad_norm": 1.1170223951339722, "learning_rate": 5.818321977736822e-06, "loss": 0.7261, "step": 3509 }, { "epoch": 0.7585908796196239, "grad_norm": 1.0788850784301758, "learning_rate": 5.808453738476083e-06, "loss": 1.0544, "step": 3510 }, { "epoch": 0.7588070023773503, "grad_norm": 1.0905154943466187, "learning_rate": 5.7985924530402064e-06, "loss": 0.9653, "step": 3511 }, { "epoch": 0.7590231251350767, "grad_norm": 1.0005922317504883, "learning_rate": 5.788738126261191e-06, "loss": 0.9405, "step": 3512 }, { "epoch": 0.7592392478928031, "grad_norm": 1.1160231828689575, "learning_rate": 5.7788907629676504e-06, "loss": 1.0449, "step": 3513 }, { "epoch": 0.7594553706505295, "grad_norm": 1.0808011293411255, "learning_rate": 5.769050367984765e-06, "loss": 1.06, "step": 3514 }, { "epoch": 0.7596714934082559, "grad_norm": 1.0940054655075073, "learning_rate": 5.759216946134298e-06, "loss": 0.7752, "step": 3515 }, { "epoch": 0.7598876161659823, "grad_norm": 0.9768939018249512, "learning_rate": 5.749390502234606e-06, "loss": 0.8032, "step": 3516 }, { "epoch": 0.7601037389237086, "grad_norm": 0.9841901659965515, "learning_rate": 5.739571041100622e-06, "loss": 0.8549, "step": 3517 }, { "epoch": 0.7603198616814351, "grad_norm": 1.0701329708099365, "learning_rate": 5.729758567543866e-06, "loss": 0.9749, "step": 3518 }, { "epoch": 0.7605359844391615, "grad_norm": 1.1490933895111084, "learning_rate": 5.719953086372425e-06, "loss": 0.9564, "step": 3519 }, { "epoch": 0.7607521071968878, "grad_norm": 0.9661714434623718, "learning_rate": 5.710154602390965e-06, "loss": 0.9353, "step": 3520 }, { "epoch": 0.7609682299546142, "grad_norm": 0.97609943151474, "learning_rate": 5.700363120400707e-06, "loss": 0.9127, "step": 3521 }, { "epoch": 0.7611843527123406, "grad_norm": 1.0656062364578247, "learning_rate": 5.690578645199469e-06, "loss": 0.8924, "step": 3522 }, { "epoch": 0.761400475470067, "grad_norm": 0.9709311723709106, "learning_rate": 5.680801181581617e-06, "loss": 0.8216, "step": 3523 }, { "epoch": 0.7616165982277934, "grad_norm": 1.0572892427444458, "learning_rate": 5.671030734338083e-06, "loss": 0.9399, "step": 3524 }, { "epoch": 0.7618327209855198, "grad_norm": 0.9256997108459473, "learning_rate": 5.661267308256366e-06, "loss": 0.8269, "step": 3525 }, { "epoch": 0.7620488437432462, "grad_norm": 1.0007261037826538, "learning_rate": 5.651510908120521e-06, "loss": 0.8812, "step": 3526 }, { "epoch": 0.7622649665009725, "grad_norm": 1.1655714511871338, "learning_rate": 5.641761538711164e-06, "loss": 0.7871, "step": 3527 }, { "epoch": 0.762481089258699, "grad_norm": 0.970575213432312, "learning_rate": 5.632019204805461e-06, "loss": 1.1446, "step": 3528 }, { "epoch": 0.7626972120164254, "grad_norm": 1.0391998291015625, "learning_rate": 5.622283911177133e-06, "loss": 0.9843, "step": 3529 }, { "epoch": 0.7629133347741517, "grad_norm": 0.9886474013328552, "learning_rate": 5.6125556625964465e-06, "loss": 0.9048, "step": 3530 }, { "epoch": 0.7631294575318781, "grad_norm": 0.9747617244720459, "learning_rate": 5.602834463830238e-06, "loss": 0.8056, "step": 3531 }, { "epoch": 0.7633455802896045, "grad_norm": 1.0145761966705322, "learning_rate": 5.593120319641854e-06, "loss": 0.8992, "step": 3532 }, { "epoch": 0.7635617030473308, "grad_norm": 0.8937737345695496, "learning_rate": 5.583413234791211e-06, "loss": 0.8653, "step": 3533 }, { "epoch": 0.7637778258050573, "grad_norm": 0.9845710396766663, "learning_rate": 5.5737132140347575e-06, "loss": 0.8091, "step": 3534 }, { "epoch": 0.7639939485627837, "grad_norm": 0.993019700050354, "learning_rate": 5.5640202621254714e-06, "loss": 0.8797, "step": 3535 }, { "epoch": 0.76421007132051, "grad_norm": 0.986335813999176, "learning_rate": 5.5543343838128935e-06, "loss": 1.0194, "step": 3536 }, { "epoch": 0.7644261940782364, "grad_norm": 1.0384854078292847, "learning_rate": 5.544655583843079e-06, "loss": 0.9606, "step": 3537 }, { "epoch": 0.7646423168359628, "grad_norm": 0.9218137860298157, "learning_rate": 5.534983866958608e-06, "loss": 0.8564, "step": 3538 }, { "epoch": 0.7648584395936893, "grad_norm": 1.080175518989563, "learning_rate": 5.5253192378985966e-06, "loss": 1.0654, "step": 3539 }, { "epoch": 0.7650745623514156, "grad_norm": 0.9595901966094971, "learning_rate": 5.515661701398705e-06, "loss": 0.8546, "step": 3540 }, { "epoch": 0.765290685109142, "grad_norm": 1.0158528089523315, "learning_rate": 5.506011262191096e-06, "loss": 0.9655, "step": 3541 }, { "epoch": 0.7655068078668684, "grad_norm": 1.0765477418899536, "learning_rate": 5.496367925004462e-06, "loss": 1.0154, "step": 3542 }, { "epoch": 0.7657229306245947, "grad_norm": 1.0074890851974487, "learning_rate": 5.48673169456402e-06, "loss": 0.8622, "step": 3543 }, { "epoch": 0.7659390533823212, "grad_norm": 1.063214898109436, "learning_rate": 5.477102575591495e-06, "loss": 0.984, "step": 3544 }, { "epoch": 0.7661551761400476, "grad_norm": 1.00334894657135, "learning_rate": 5.4674805728051395e-06, "loss": 0.9945, "step": 3545 }, { "epoch": 0.7663712988977739, "grad_norm": 0.9830182194709778, "learning_rate": 5.4578656909197055e-06, "loss": 0.9223, "step": 3546 }, { "epoch": 0.7665874216555003, "grad_norm": 1.033290982246399, "learning_rate": 5.448257934646468e-06, "loss": 0.8841, "step": 3547 }, { "epoch": 0.7668035444132267, "grad_norm": 1.0941208600997925, "learning_rate": 5.438657308693202e-06, "loss": 1.073, "step": 3548 }, { "epoch": 0.7670196671709532, "grad_norm": 0.8555561900138855, "learning_rate": 5.429063817764197e-06, "loss": 0.9096, "step": 3549 }, { "epoch": 0.7672357899286795, "grad_norm": 0.8595163226127625, "learning_rate": 5.419477466560237e-06, "loss": 0.7998, "step": 3550 }, { "epoch": 0.7674519126864059, "grad_norm": 0.8784067630767822, "learning_rate": 5.409898259778612e-06, "loss": 0.822, "step": 3551 }, { "epoch": 0.7676680354441323, "grad_norm": 1.0079318284988403, "learning_rate": 5.400326202113107e-06, "loss": 0.8605, "step": 3552 }, { "epoch": 0.7678841582018586, "grad_norm": 1.1107678413391113, "learning_rate": 5.390761298254019e-06, "loss": 0.9658, "step": 3553 }, { "epoch": 0.768100280959585, "grad_norm": 0.952477753162384, "learning_rate": 5.381203552888128e-06, "loss": 0.851, "step": 3554 }, { "epoch": 0.7683164037173115, "grad_norm": 1.0495015382766724, "learning_rate": 5.371652970698697e-06, "loss": 1.0494, "step": 3555 }, { "epoch": 0.7685325264750378, "grad_norm": 1.0040615797042847, "learning_rate": 5.362109556365496e-06, "loss": 0.951, "step": 3556 }, { "epoch": 0.7687486492327642, "grad_norm": 1.0023738145828247, "learning_rate": 5.352573314564768e-06, "loss": 0.8326, "step": 3557 }, { "epoch": 0.7689647719904906, "grad_norm": 0.9427552819252014, "learning_rate": 5.343044249969263e-06, "loss": 0.9219, "step": 3558 }, { "epoch": 0.7691808947482169, "grad_norm": 1.0693159103393555, "learning_rate": 5.333522367248189e-06, "loss": 0.8595, "step": 3559 }, { "epoch": 0.7693970175059434, "grad_norm": 0.8929608464241028, "learning_rate": 5.324007671067262e-06, "loss": 0.8378, "step": 3560 }, { "epoch": 0.7696131402636698, "grad_norm": 1.0509757995605469, "learning_rate": 5.3145001660886366e-06, "loss": 0.8896, "step": 3561 }, { "epoch": 0.7698292630213962, "grad_norm": 1.0412158966064453, "learning_rate": 5.304999856970987e-06, "loss": 0.8417, "step": 3562 }, { "epoch": 0.7700453857791225, "grad_norm": 1.0653715133666992, "learning_rate": 5.295506748369437e-06, "loss": 0.9056, "step": 3563 }, { "epoch": 0.7702615085368489, "grad_norm": 0.8758518695831299, "learning_rate": 5.286020844935591e-06, "loss": 0.9891, "step": 3564 }, { "epoch": 0.7704776312945754, "grad_norm": 1.0536704063415527, "learning_rate": 5.276542151317514e-06, "loss": 0.9429, "step": 3565 }, { "epoch": 0.7706937540523017, "grad_norm": 0.942868709564209, "learning_rate": 5.267070672159749e-06, "loss": 0.9326, "step": 3566 }, { "epoch": 0.7709098768100281, "grad_norm": 0.9325775504112244, "learning_rate": 5.257606412103298e-06, "loss": 0.8711, "step": 3567 }, { "epoch": 0.7711259995677545, "grad_norm": 0.8976432085037231, "learning_rate": 5.248149375785623e-06, "loss": 0.7371, "step": 3568 }, { "epoch": 0.7713421223254808, "grad_norm": 0.881286084651947, "learning_rate": 5.238699567840655e-06, "loss": 0.947, "step": 3569 }, { "epoch": 0.7715582450832073, "grad_norm": 0.9576743245124817, "learning_rate": 5.229256992898768e-06, "loss": 0.9322, "step": 3570 }, { "epoch": 0.7717743678409337, "grad_norm": 0.9686759114265442, "learning_rate": 5.219821655586821e-06, "loss": 0.9093, "step": 3571 }, { "epoch": 0.77199049059866, "grad_norm": 1.0285251140594482, "learning_rate": 5.210393560528091e-06, "loss": 0.9755, "step": 3572 }, { "epoch": 0.7722066133563864, "grad_norm": 0.9954254627227783, "learning_rate": 5.200972712342327e-06, "loss": 0.9602, "step": 3573 }, { "epoch": 0.7724227361141128, "grad_norm": 0.9787831902503967, "learning_rate": 5.191559115645723e-06, "loss": 0.8698, "step": 3574 }, { "epoch": 0.7726388588718393, "grad_norm": 0.911556601524353, "learning_rate": 5.182152775050917e-06, "loss": 0.8634, "step": 3575 }, { "epoch": 0.7728549816295656, "grad_norm": 0.9864009022712708, "learning_rate": 5.172753695167001e-06, "loss": 0.8141, "step": 3576 }, { "epoch": 0.773071104387292, "grad_norm": 0.9081274271011353, "learning_rate": 5.163361880599505e-06, "loss": 0.9335, "step": 3577 }, { "epoch": 0.7732872271450184, "grad_norm": 0.9300902485847473, "learning_rate": 5.153977335950384e-06, "loss": 0.989, "step": 3578 }, { "epoch": 0.7735033499027447, "grad_norm": 1.0700031518936157, "learning_rate": 5.144600065818044e-06, "loss": 1.0278, "step": 3579 }, { "epoch": 0.7737194726604711, "grad_norm": 1.109338402748108, "learning_rate": 5.1352300747973375e-06, "loss": 0.8681, "step": 3580 }, { "epoch": 0.7739355954181976, "grad_norm": 1.0578832626342773, "learning_rate": 5.125867367479531e-06, "loss": 1.0099, "step": 3581 }, { "epoch": 0.7741517181759239, "grad_norm": 0.8836191296577454, "learning_rate": 5.11651194845233e-06, "loss": 0.9081, "step": 3582 }, { "epoch": 0.7743678409336503, "grad_norm": 0.9299150109291077, "learning_rate": 5.10716382229987e-06, "loss": 0.9009, "step": 3583 }, { "epoch": 0.7745839636913767, "grad_norm": 1.2958821058273315, "learning_rate": 5.0978229936027076e-06, "loss": 1.021, "step": 3584 }, { "epoch": 0.7748000864491031, "grad_norm": 0.9406410455703735, "learning_rate": 5.088489466937832e-06, "loss": 0.8438, "step": 3585 }, { "epoch": 0.7750162092068295, "grad_norm": 1.2616820335388184, "learning_rate": 5.0791632468786445e-06, "loss": 0.9295, "step": 3586 }, { "epoch": 0.7752323319645559, "grad_norm": 0.8569998741149902, "learning_rate": 5.069844337994976e-06, "loss": 0.7257, "step": 3587 }, { "epoch": 0.7754484547222823, "grad_norm": 1.0046062469482422, "learning_rate": 5.0605327448530616e-06, "loss": 0.9712, "step": 3588 }, { "epoch": 0.7756645774800086, "grad_norm": 1.2030833959579468, "learning_rate": 5.0512284720155794e-06, "loss": 0.9328, "step": 3589 }, { "epoch": 0.775880700237735, "grad_norm": 0.9053451418876648, "learning_rate": 5.041931524041584e-06, "loss": 0.7339, "step": 3590 }, { "epoch": 0.7760968229954615, "grad_norm": 1.0405101776123047, "learning_rate": 5.032641905486562e-06, "loss": 0.9354, "step": 3591 }, { "epoch": 0.7763129457531878, "grad_norm": 1.0439729690551758, "learning_rate": 5.023359620902408e-06, "loss": 0.9671, "step": 3592 }, { "epoch": 0.7765290685109142, "grad_norm": 0.9951745867729187, "learning_rate": 5.014084674837414e-06, "loss": 0.7272, "step": 3593 }, { "epoch": 0.7767451912686406, "grad_norm": 0.9379323720932007, "learning_rate": 5.0048170718362965e-06, "loss": 0.8559, "step": 3594 }, { "epoch": 0.7769613140263669, "grad_norm": 0.9604951739311218, "learning_rate": 4.9955568164401456e-06, "loss": 0.7323, "step": 3595 }, { "epoch": 0.7771774367840933, "grad_norm": 0.9695637822151184, "learning_rate": 4.986303913186468e-06, "loss": 0.906, "step": 3596 }, { "epoch": 0.7773935595418198, "grad_norm": 1.0083304643630981, "learning_rate": 4.9770583666091625e-06, "loss": 0.967, "step": 3597 }, { "epoch": 0.7776096822995462, "grad_norm": 1.0379058122634888, "learning_rate": 4.967820181238532e-06, "loss": 0.9315, "step": 3598 }, { "epoch": 0.7778258050572725, "grad_norm": 1.0539214611053467, "learning_rate": 4.958589361601265e-06, "loss": 0.9961, "step": 3599 }, { "epoch": 0.7780419278149989, "grad_norm": 0.9680747985839844, "learning_rate": 4.9493659122204475e-06, "loss": 0.7478, "step": 3600 }, { "epoch": 0.7782580505727253, "grad_norm": 1.0678519010543823, "learning_rate": 4.940149837615527e-06, "loss": 0.9473, "step": 3601 }, { "epoch": 0.7784741733304517, "grad_norm": 1.0017139911651611, "learning_rate": 4.930941142302379e-06, "loss": 0.8501, "step": 3602 }, { "epoch": 0.7786902960881781, "grad_norm": 0.981833815574646, "learning_rate": 4.9217398307932376e-06, "loss": 0.7998, "step": 3603 }, { "epoch": 0.7789064188459045, "grad_norm": 1.018774151802063, "learning_rate": 4.912545907596722e-06, "loss": 0.7805, "step": 3604 }, { "epoch": 0.7791225416036308, "grad_norm": 1.044105887413025, "learning_rate": 4.9033593772178355e-06, "loss": 0.9452, "step": 3605 }, { "epoch": 0.7793386643613572, "grad_norm": 0.9681096076965332, "learning_rate": 4.894180244157956e-06, "loss": 0.7941, "step": 3606 }, { "epoch": 0.7795547871190837, "grad_norm": 1.0327109098434448, "learning_rate": 4.885008512914837e-06, "loss": 0.9058, "step": 3607 }, { "epoch": 0.7797709098768101, "grad_norm": 1.0944807529449463, "learning_rate": 4.875844187982606e-06, "loss": 1.0559, "step": 3608 }, { "epoch": 0.7799870326345364, "grad_norm": 1.0139321088790894, "learning_rate": 4.8666872738517605e-06, "loss": 0.9443, "step": 3609 }, { "epoch": 0.7802031553922628, "grad_norm": 0.9465548992156982, "learning_rate": 4.85753777500916e-06, "loss": 1.0223, "step": 3610 }, { "epoch": 0.7804192781499892, "grad_norm": 0.9195845127105713, "learning_rate": 4.8483956959380595e-06, "loss": 0.7036, "step": 3611 }, { "epoch": 0.7806354009077155, "grad_norm": 1.0407829284667969, "learning_rate": 4.839261041118035e-06, "loss": 0.8927, "step": 3612 }, { "epoch": 0.780851523665442, "grad_norm": 0.9261695146560669, "learning_rate": 4.830133815025055e-06, "loss": 0.7521, "step": 3613 }, { "epoch": 0.7810676464231684, "grad_norm": 0.9677841067314148, "learning_rate": 4.821014022131439e-06, "loss": 0.9074, "step": 3614 }, { "epoch": 0.7812837691808947, "grad_norm": 0.9663761854171753, "learning_rate": 4.811901666905856e-06, "loss": 0.9172, "step": 3615 }, { "epoch": 0.7814998919386211, "grad_norm": 0.9527883529663086, "learning_rate": 4.802796753813353e-06, "loss": 0.9255, "step": 3616 }, { "epoch": 0.7817160146963475, "grad_norm": 1.0191152095794678, "learning_rate": 4.793699287315314e-06, "loss": 0.8335, "step": 3617 }, { "epoch": 0.7819321374540739, "grad_norm": 1.1564364433288574, "learning_rate": 4.784609271869469e-06, "loss": 0.8629, "step": 3618 }, { "epoch": 0.7821482602118003, "grad_norm": 1.0203686952590942, "learning_rate": 4.775526711929901e-06, "loss": 0.8993, "step": 3619 }, { "epoch": 0.7823643829695267, "grad_norm": 1.1355894804000854, "learning_rate": 4.7664516119470565e-06, "loss": 0.8992, "step": 3620 }, { "epoch": 0.7825805057272531, "grad_norm": 0.9424318671226501, "learning_rate": 4.7573839763677045e-06, "loss": 0.976, "step": 3621 }, { "epoch": 0.7827966284849794, "grad_norm": 1.0141879320144653, "learning_rate": 4.748323809634972e-06, "loss": 1.0126, "step": 3622 }, { "epoch": 0.7830127512427059, "grad_norm": 0.9914786219596863, "learning_rate": 4.7392711161883136e-06, "loss": 0.968, "step": 3623 }, { "epoch": 0.7832288740004323, "grad_norm": 1.0773526430130005, "learning_rate": 4.73022590046353e-06, "loss": 1.0232, "step": 3624 }, { "epoch": 0.7834449967581586, "grad_norm": 1.0062161684036255, "learning_rate": 4.721188166892759e-06, "loss": 0.931, "step": 3625 }, { "epoch": 0.783661119515885, "grad_norm": 1.0620781183242798, "learning_rate": 4.712157919904465e-06, "loss": 0.8005, "step": 3626 }, { "epoch": 0.7838772422736114, "grad_norm": 1.038500189781189, "learning_rate": 4.703135163923451e-06, "loss": 0.825, "step": 3627 }, { "epoch": 0.7840933650313378, "grad_norm": 0.9993885159492493, "learning_rate": 4.694119903370837e-06, "loss": 0.803, "step": 3628 }, { "epoch": 0.7843094877890642, "grad_norm": 1.0093708038330078, "learning_rate": 4.685112142664103e-06, "loss": 1.0174, "step": 3629 }, { "epoch": 0.7845256105467906, "grad_norm": 0.9287478923797607, "learning_rate": 4.67611188621701e-06, "loss": 0.9034, "step": 3630 }, { "epoch": 0.784741733304517, "grad_norm": 0.9693188667297363, "learning_rate": 4.667119138439669e-06, "loss": 0.906, "step": 3631 }, { "epoch": 0.7849578560622433, "grad_norm": 0.9690222144126892, "learning_rate": 4.6581339037385045e-06, "loss": 0.7174, "step": 3632 }, { "epoch": 0.7851739788199698, "grad_norm": 1.038748025894165, "learning_rate": 4.649156186516255e-06, "loss": 0.8444, "step": 3633 }, { "epoch": 0.7853901015776962, "grad_norm": 1.1169486045837402, "learning_rate": 4.640185991172002e-06, "loss": 0.9354, "step": 3634 }, { "epoch": 0.7856062243354225, "grad_norm": 0.9531453251838684, "learning_rate": 4.6312233221011e-06, "loss": 0.8381, "step": 3635 }, { "epoch": 0.7858223470931489, "grad_norm": 0.9114736318588257, "learning_rate": 4.622268183695242e-06, "loss": 0.9621, "step": 3636 }, { "epoch": 0.7860384698508753, "grad_norm": 0.9040265083312988, "learning_rate": 4.613320580342422e-06, "loss": 0.8006, "step": 3637 }, { "epoch": 0.7862545926086016, "grad_norm": 0.9460227489471436, "learning_rate": 4.6043805164269516e-06, "loss": 0.8082, "step": 3638 }, { "epoch": 0.7864707153663281, "grad_norm": 1.059830665588379, "learning_rate": 4.595447996329441e-06, "loss": 0.8074, "step": 3639 }, { "epoch": 0.7866868381240545, "grad_norm": 0.9420005679130554, "learning_rate": 4.586523024426808e-06, "loss": 0.8067, "step": 3640 }, { "epoch": 0.7869029608817808, "grad_norm": 0.9644083976745605, "learning_rate": 4.577605605092248e-06, "loss": 0.8934, "step": 3641 }, { "epoch": 0.7871190836395072, "grad_norm": 1.048953652381897, "learning_rate": 4.568695742695297e-06, "loss": 1.0489, "step": 3642 }, { "epoch": 0.7873352063972336, "grad_norm": 1.144123911857605, "learning_rate": 4.559793441601761e-06, "loss": 1.1039, "step": 3643 }, { "epoch": 0.7875513291549601, "grad_norm": 0.9493755102157593, "learning_rate": 4.550898706173745e-06, "loss": 0.9291, "step": 3644 }, { "epoch": 0.7877674519126864, "grad_norm": 1.1302835941314697, "learning_rate": 4.54201154076965e-06, "loss": 0.8519, "step": 3645 }, { "epoch": 0.7879835746704128, "grad_norm": 1.1162554025650024, "learning_rate": 4.533131949744167e-06, "loss": 0.795, "step": 3646 }, { "epoch": 0.7881996974281392, "grad_norm": 0.9301594495773315, "learning_rate": 4.524259937448274e-06, "loss": 0.9605, "step": 3647 }, { "epoch": 0.7884158201858655, "grad_norm": 1.05987548828125, "learning_rate": 4.515395508229239e-06, "loss": 1.0865, "step": 3648 }, { "epoch": 0.788631942943592, "grad_norm": 0.9806153178215027, "learning_rate": 4.506538666430606e-06, "loss": 0.9356, "step": 3649 }, { "epoch": 0.7888480657013184, "grad_norm": 1.012948751449585, "learning_rate": 4.4976894163922126e-06, "loss": 0.9, "step": 3650 }, { "epoch": 0.7890641884590447, "grad_norm": 0.9189282059669495, "learning_rate": 4.4888477624501704e-06, "loss": 0.9075, "step": 3651 }, { "epoch": 0.7892803112167711, "grad_norm": 0.9508541822433472, "learning_rate": 4.4800137089368655e-06, "loss": 0.9728, "step": 3652 }, { "epoch": 0.7894964339744975, "grad_norm": 1.013681411743164, "learning_rate": 4.471187260180967e-06, "loss": 0.9852, "step": 3653 }, { "epoch": 0.789712556732224, "grad_norm": 1.008089303970337, "learning_rate": 4.462368420507414e-06, "loss": 0.8611, "step": 3654 }, { "epoch": 0.7899286794899503, "grad_norm": 0.8755045533180237, "learning_rate": 4.453557194237413e-06, "loss": 0.7341, "step": 3655 }, { "epoch": 0.7901448022476767, "grad_norm": 0.9966463446617126, "learning_rate": 4.4447535856884505e-06, "loss": 0.8665, "step": 3656 }, { "epoch": 0.7903609250054031, "grad_norm": 0.9538276791572571, "learning_rate": 4.435957599174281e-06, "loss": 0.8991, "step": 3657 }, { "epoch": 0.7905770477631294, "grad_norm": 1.0445584058761597, "learning_rate": 4.427169239004902e-06, "loss": 0.9756, "step": 3658 }, { "epoch": 0.7907931705208558, "grad_norm": 0.9147434830665588, "learning_rate": 4.41838850948659e-06, "loss": 0.9259, "step": 3659 }, { "epoch": 0.7910092932785823, "grad_norm": 1.0846123695373535, "learning_rate": 4.4096154149218974e-06, "loss": 0.9764, "step": 3660 }, { "epoch": 0.7912254160363086, "grad_norm": 1.015160083770752, "learning_rate": 4.4008499596096095e-06, "loss": 1.1893, "step": 3661 }, { "epoch": 0.791441538794035, "grad_norm": 0.9260875582695007, "learning_rate": 4.392092147844782e-06, "loss": 0.9678, "step": 3662 }, { "epoch": 0.7916576615517614, "grad_norm": 0.9385184049606323, "learning_rate": 4.383341983918723e-06, "loss": 0.8045, "step": 3663 }, { "epoch": 0.7918737843094877, "grad_norm": 0.9274015426635742, "learning_rate": 4.37459947211899e-06, "loss": 0.9444, "step": 3664 }, { "epoch": 0.7920899070672142, "grad_norm": 0.9285722374916077, "learning_rate": 4.365864616729396e-06, "loss": 0.892, "step": 3665 }, { "epoch": 0.7923060298249406, "grad_norm": 0.9584118723869324, "learning_rate": 4.3571374220299974e-06, "loss": 0.9126, "step": 3666 }, { "epoch": 0.792522152582667, "grad_norm": 0.9379822611808777, "learning_rate": 4.348417892297101e-06, "loss": 0.887, "step": 3667 }, { "epoch": 0.7927382753403933, "grad_norm": 1.0396922826766968, "learning_rate": 4.339706031803252e-06, "loss": 0.9026, "step": 3668 }, { "epoch": 0.7929543980981197, "grad_norm": 1.0380027294158936, "learning_rate": 4.331001844817257e-06, "loss": 0.9497, "step": 3669 }, { "epoch": 0.7931705208558462, "grad_norm": 1.0886414051055908, "learning_rate": 4.3223053356041315e-06, "loss": 0.918, "step": 3670 }, { "epoch": 0.7933866436135725, "grad_norm": 1.024379849433899, "learning_rate": 4.313616508425147e-06, "loss": 1.1016, "step": 3671 }, { "epoch": 0.7936027663712989, "grad_norm": 1.0421767234802246, "learning_rate": 4.304935367537814e-06, "loss": 1.086, "step": 3672 }, { "epoch": 0.7938188891290253, "grad_norm": 0.8832529783248901, "learning_rate": 4.296261917195863e-06, "loss": 0.7619, "step": 3673 }, { "epoch": 0.7940350118867516, "grad_norm": 0.9886100888252258, "learning_rate": 4.287596161649283e-06, "loss": 0.8841, "step": 3674 }, { "epoch": 0.794251134644478, "grad_norm": 1.0766829252243042, "learning_rate": 4.278938105144255e-06, "loss": 1.0098, "step": 3675 }, { "epoch": 0.7944672574022045, "grad_norm": 1.0706645250320435, "learning_rate": 4.270287751923215e-06, "loss": 0.9871, "step": 3676 }, { "epoch": 0.7946833801599308, "grad_norm": 0.9385272264480591, "learning_rate": 4.2616451062248075e-06, "loss": 0.8735, "step": 3677 }, { "epoch": 0.7948995029176572, "grad_norm": 1.014313817024231, "learning_rate": 4.253010172283923e-06, "loss": 1.1954, "step": 3678 }, { "epoch": 0.7951156256753836, "grad_norm": 0.9842823147773743, "learning_rate": 4.244382954331652e-06, "loss": 0.9118, "step": 3679 }, { "epoch": 0.79533174843311, "grad_norm": 1.0659067630767822, "learning_rate": 4.2357634565953165e-06, "loss": 0.821, "step": 3680 }, { "epoch": 0.7955478711908364, "grad_norm": 1.0056623220443726, "learning_rate": 4.2271516832984335e-06, "loss": 0.9094, "step": 3681 }, { "epoch": 0.7957639939485628, "grad_norm": 1.0319385528564453, "learning_rate": 4.218547638660773e-06, "loss": 0.9341, "step": 3682 }, { "epoch": 0.7959801167062892, "grad_norm": 0.9862573146820068, "learning_rate": 4.209951326898285e-06, "loss": 0.968, "step": 3683 }, { "epoch": 0.7961962394640155, "grad_norm": 0.9928536415100098, "learning_rate": 4.201362752223146e-06, "loss": 0.8657, "step": 3684 }, { "epoch": 0.7964123622217419, "grad_norm": 1.0648728609085083, "learning_rate": 4.192781918843738e-06, "loss": 0.8685, "step": 3685 }, { "epoch": 0.7966284849794684, "grad_norm": 1.1425668001174927, "learning_rate": 4.184208830964649e-06, "loss": 1.0023, "step": 3686 }, { "epoch": 0.7968446077371947, "grad_norm": 1.02164626121521, "learning_rate": 4.175643492786672e-06, "loss": 0.8831, "step": 3687 }, { "epoch": 0.7970607304949211, "grad_norm": 1.0058878660202026, "learning_rate": 4.167085908506803e-06, "loss": 0.982, "step": 3688 }, { "epoch": 0.7972768532526475, "grad_norm": 0.8938169479370117, "learning_rate": 4.1585360823182365e-06, "loss": 0.8402, "step": 3689 }, { "epoch": 0.7974929760103739, "grad_norm": 1.107102394104004, "learning_rate": 4.149994018410372e-06, "loss": 0.9722, "step": 3690 }, { "epoch": 0.7977090987681003, "grad_norm": 1.0572689771652222, "learning_rate": 4.141459720968793e-06, "loss": 1.1449, "step": 3691 }, { "epoch": 0.7979252215258267, "grad_norm": 0.9329217672348022, "learning_rate": 4.132933194175299e-06, "loss": 0.871, "step": 3692 }, { "epoch": 0.7981413442835531, "grad_norm": 1.038081169128418, "learning_rate": 4.124414442207858e-06, "loss": 0.8993, "step": 3693 }, { "epoch": 0.7983574670412794, "grad_norm": 1.0002672672271729, "learning_rate": 4.115903469240641e-06, "loss": 0.885, "step": 3694 }, { "epoch": 0.7985735897990058, "grad_norm": 1.032814860343933, "learning_rate": 4.107400279443998e-06, "loss": 0.8953, "step": 3695 }, { "epoch": 0.7987897125567323, "grad_norm": 0.975195050239563, "learning_rate": 4.098904876984486e-06, "loss": 0.7751, "step": 3696 }, { "epoch": 0.7990058353144586, "grad_norm": 1.104385256767273, "learning_rate": 4.090417266024833e-06, "loss": 0.9905, "step": 3697 }, { "epoch": 0.799221958072185, "grad_norm": 1.1246614456176758, "learning_rate": 4.081937450723936e-06, "loss": 0.9624, "step": 3698 }, { "epoch": 0.7994380808299114, "grad_norm": 1.0041158199310303, "learning_rate": 4.073465435236886e-06, "loss": 0.9519, "step": 3699 }, { "epoch": 0.7996542035876377, "grad_norm": 1.0940678119659424, "learning_rate": 4.065001223714959e-06, "loss": 0.8947, "step": 3700 }, { "epoch": 0.7998703263453641, "grad_norm": 1.1004393100738525, "learning_rate": 4.056544820305597e-06, "loss": 0.9583, "step": 3701 }, { "epoch": 0.8000864491030906, "grad_norm": 1.0078845024108887, "learning_rate": 4.0480962291524185e-06, "loss": 0.9829, "step": 3702 }, { "epoch": 0.800302571860817, "grad_norm": 1.1241837739944458, "learning_rate": 4.03965545439521e-06, "loss": 0.9297, "step": 3703 }, { "epoch": 0.8005186946185433, "grad_norm": 0.9753207564353943, "learning_rate": 4.0312225001699355e-06, "loss": 0.8199, "step": 3704 }, { "epoch": 0.8007348173762697, "grad_norm": 0.9606614112854004, "learning_rate": 4.022797370608722e-06, "loss": 0.8788, "step": 3705 }, { "epoch": 0.8009509401339961, "grad_norm": 1.0398199558258057, "learning_rate": 4.014380069839861e-06, "loss": 0.8983, "step": 3706 }, { "epoch": 0.8011670628917225, "grad_norm": 1.0209386348724365, "learning_rate": 4.005970601987814e-06, "loss": 0.7962, "step": 3707 }, { "epoch": 0.8013831856494489, "grad_norm": 0.9006760716438293, "learning_rate": 3.997568971173198e-06, "loss": 0.8473, "step": 3708 }, { "epoch": 0.8015993084071753, "grad_norm": 0.9175861477851868, "learning_rate": 3.989175181512794e-06, "loss": 0.9721, "step": 3709 }, { "epoch": 0.8018154311649016, "grad_norm": 0.9091984033584595, "learning_rate": 3.98078923711954e-06, "loss": 0.9827, "step": 3710 }, { "epoch": 0.802031553922628, "grad_norm": 1.1166619062423706, "learning_rate": 3.972411142102528e-06, "loss": 0.9531, "step": 3711 }, { "epoch": 0.8022476766803545, "grad_norm": 1.0179290771484375, "learning_rate": 3.9640409005670075e-06, "loss": 1.0223, "step": 3712 }, { "epoch": 0.8024637994380809, "grad_norm": 1.015807032585144, "learning_rate": 3.95567851661437e-06, "loss": 0.8241, "step": 3713 }, { "epoch": 0.8026799221958072, "grad_norm": 0.9998385906219482, "learning_rate": 3.947323994342178e-06, "loss": 0.6707, "step": 3714 }, { "epoch": 0.8028960449535336, "grad_norm": 1.1686913967132568, "learning_rate": 3.9389773378441185e-06, "loss": 1.0828, "step": 3715 }, { "epoch": 0.80311216771126, "grad_norm": 1.1071698665618896, "learning_rate": 3.930638551210035e-06, "loss": 0.7809, "step": 3716 }, { "epoch": 0.8033282904689864, "grad_norm": 0.9545592069625854, "learning_rate": 3.922307638525909e-06, "loss": 0.7958, "step": 3717 }, { "epoch": 0.8035444132267128, "grad_norm": 1.0194050073623657, "learning_rate": 3.913984603873877e-06, "loss": 0.9727, "step": 3718 }, { "epoch": 0.8037605359844392, "grad_norm": 0.9267217516899109, "learning_rate": 3.9056694513322054e-06, "loss": 1.0364, "step": 3719 }, { "epoch": 0.8039766587421655, "grad_norm": 0.9400733709335327, "learning_rate": 3.8973621849753044e-06, "loss": 0.7974, "step": 3720 }, { "epoch": 0.8041927814998919, "grad_norm": 1.0166336297988892, "learning_rate": 3.889062808873698e-06, "loss": 0.8838, "step": 3721 }, { "epoch": 0.8044089042576184, "grad_norm": 0.8093613982200623, "learning_rate": 3.880771327094075e-06, "loss": 0.9021, "step": 3722 }, { "epoch": 0.8046250270153447, "grad_norm": 0.9725896120071411, "learning_rate": 3.8724877436992425e-06, "loss": 1.0241, "step": 3723 }, { "epoch": 0.8048411497730711, "grad_norm": 0.9414122700691223, "learning_rate": 3.864212062748132e-06, "loss": 0.8416, "step": 3724 }, { "epoch": 0.8050572725307975, "grad_norm": 1.0157009363174438, "learning_rate": 3.85594428829581e-06, "loss": 0.9229, "step": 3725 }, { "epoch": 0.8052733952885239, "grad_norm": 1.0341070890426636, "learning_rate": 3.8476844243934695e-06, "loss": 0.8497, "step": 3726 }, { "epoch": 0.8054895180462502, "grad_norm": 0.9143247604370117, "learning_rate": 3.83943247508842e-06, "loss": 0.9056, "step": 3727 }, { "epoch": 0.8057056408039767, "grad_norm": 1.0246120691299438, "learning_rate": 3.831188444424101e-06, "loss": 0.7757, "step": 3728 }, { "epoch": 0.8059217635617031, "grad_norm": 0.9455540180206299, "learning_rate": 3.822952336440067e-06, "loss": 0.719, "step": 3729 }, { "epoch": 0.8061378863194294, "grad_norm": 1.1429113149642944, "learning_rate": 3.8147241551719915e-06, "loss": 0.9857, "step": 3730 }, { "epoch": 0.8063540090771558, "grad_norm": 1.106581687927246, "learning_rate": 3.8065039046516594e-06, "loss": 1.0205, "step": 3731 }, { "epoch": 0.8065701318348822, "grad_norm": 0.9925294518470764, "learning_rate": 3.798291588906993e-06, "loss": 0.9074, "step": 3732 }, { "epoch": 0.8067862545926086, "grad_norm": 1.0743608474731445, "learning_rate": 3.790087211961988e-06, "loss": 0.9284, "step": 3733 }, { "epoch": 0.807002377350335, "grad_norm": 1.1088292598724365, "learning_rate": 3.7818907778367763e-06, "loss": 0.798, "step": 3734 }, { "epoch": 0.8072185001080614, "grad_norm": 1.0600242614746094, "learning_rate": 3.7737022905475895e-06, "loss": 0.9915, "step": 3735 }, { "epoch": 0.8074346228657878, "grad_norm": 1.0275952816009521, "learning_rate": 3.765521754106776e-06, "loss": 0.9083, "step": 3736 }, { "epoch": 0.8076507456235141, "grad_norm": 1.030941367149353, "learning_rate": 3.7573491725227774e-06, "loss": 0.7311, "step": 3737 }, { "epoch": 0.8078668683812406, "grad_norm": 1.1321561336517334, "learning_rate": 3.7491845498001334e-06, "loss": 0.849, "step": 3738 }, { "epoch": 0.808082991138967, "grad_norm": 1.012608289718628, "learning_rate": 3.741027889939486e-06, "loss": 1.1026, "step": 3739 }, { "epoch": 0.8082991138966933, "grad_norm": 0.9792577028274536, "learning_rate": 3.7328791969375954e-06, "loss": 0.7981, "step": 3740 }, { "epoch": 0.8085152366544197, "grad_norm": 0.9109253883361816, "learning_rate": 3.7247384747872927e-06, "loss": 0.9318, "step": 3741 }, { "epoch": 0.8087313594121461, "grad_norm": 0.9116306900978088, "learning_rate": 3.7166057274775134e-06, "loss": 0.8796, "step": 3742 }, { "epoch": 0.8089474821698724, "grad_norm": 0.9492202401161194, "learning_rate": 3.708480958993286e-06, "loss": 0.7395, "step": 3743 }, { "epoch": 0.8091636049275989, "grad_norm": 1.0096417665481567, "learning_rate": 3.700364173315729e-06, "loss": 0.9314, "step": 3744 }, { "epoch": 0.8093797276853253, "grad_norm": 1.0050616264343262, "learning_rate": 3.692255374422049e-06, "loss": 1.0423, "step": 3745 }, { "epoch": 0.8095958504430516, "grad_norm": 1.1089311838150024, "learning_rate": 3.684154566285536e-06, "loss": 0.9018, "step": 3746 }, { "epoch": 0.809811973200778, "grad_norm": 1.2634425163269043, "learning_rate": 3.6760617528755682e-06, "loss": 0.9003, "step": 3747 }, { "epoch": 0.8100280959585044, "grad_norm": 0.9780515432357788, "learning_rate": 3.667976938157607e-06, "loss": 1.0411, "step": 3748 }, { "epoch": 0.8102442187162309, "grad_norm": 1.1175509691238403, "learning_rate": 3.65990012609319e-06, "loss": 1.0888, "step": 3749 }, { "epoch": 0.8104603414739572, "grad_norm": 0.9692811369895935, "learning_rate": 3.65183132063994e-06, "loss": 0.9398, "step": 3750 }, { "epoch": 0.8106764642316836, "grad_norm": 0.9459337592124939, "learning_rate": 3.643770525751551e-06, "loss": 0.8167, "step": 3751 }, { "epoch": 0.81089258698941, "grad_norm": 0.9116824865341187, "learning_rate": 3.635717745377796e-06, "loss": 0.7984, "step": 3752 }, { "epoch": 0.8111087097471363, "grad_norm": 0.8943130970001221, "learning_rate": 3.62767298346451e-06, "loss": 0.8053, "step": 3753 }, { "epoch": 0.8113248325048628, "grad_norm": 1.1531084775924683, "learning_rate": 3.6196362439536192e-06, "loss": 0.8493, "step": 3754 }, { "epoch": 0.8115409552625892, "grad_norm": 1.037761926651001, "learning_rate": 3.61160753078311e-06, "loss": 0.9401, "step": 3755 }, { "epoch": 0.8117570780203155, "grad_norm": 1.0017485618591309, "learning_rate": 3.6035868478870196e-06, "loss": 0.822, "step": 3756 }, { "epoch": 0.8119732007780419, "grad_norm": 1.1072742938995361, "learning_rate": 3.5955741991954664e-06, "loss": 0.9453, "step": 3757 }, { "epoch": 0.8121893235357683, "grad_norm": 0.9541094899177551, "learning_rate": 3.5875695886346386e-06, "loss": 1.0183, "step": 3758 }, { "epoch": 0.8124054462934948, "grad_norm": 0.9097086787223816, "learning_rate": 3.579573020126774e-06, "loss": 0.8697, "step": 3759 }, { "epoch": 0.8126215690512211, "grad_norm": 0.9351658225059509, "learning_rate": 3.5715844975901747e-06, "loss": 0.7667, "step": 3760 }, { "epoch": 0.8128376918089475, "grad_norm": 0.9204522967338562, "learning_rate": 3.5636040249391845e-06, "loss": 0.7639, "step": 3761 }, { "epoch": 0.8130538145666739, "grad_norm": 0.9401305913925171, "learning_rate": 3.555631606084231e-06, "loss": 0.9774, "step": 3762 }, { "epoch": 0.8132699373244002, "grad_norm": 1.004342794418335, "learning_rate": 3.547667244931776e-06, "loss": 0.8118, "step": 3763 }, { "epoch": 0.8134860600821266, "grad_norm": 0.8848515748977661, "learning_rate": 3.5397109453843403e-06, "loss": 0.8712, "step": 3764 }, { "epoch": 0.8137021828398531, "grad_norm": 1.2648967504501343, "learning_rate": 3.5317627113404917e-06, "loss": 0.9541, "step": 3765 }, { "epoch": 0.8139183055975794, "grad_norm": 1.0908479690551758, "learning_rate": 3.523822546694844e-06, "loss": 0.9072, "step": 3766 }, { "epoch": 0.8141344283553058, "grad_norm": 0.9648228287696838, "learning_rate": 3.51589045533806e-06, "loss": 0.8983, "step": 3767 }, { "epoch": 0.8143505511130322, "grad_norm": 1.0807435512542725, "learning_rate": 3.507966441156847e-06, "loss": 0.8812, "step": 3768 }, { "epoch": 0.8145666738707585, "grad_norm": 1.1944962739944458, "learning_rate": 3.5000505080339565e-06, "loss": 0.9897, "step": 3769 }, { "epoch": 0.814782796628485, "grad_norm": 0.981046199798584, "learning_rate": 3.492142659848172e-06, "loss": 0.9178, "step": 3770 }, { "epoch": 0.8149989193862114, "grad_norm": 1.0109437704086304, "learning_rate": 3.4842429004743196e-06, "loss": 0.9643, "step": 3771 }, { "epoch": 0.8152150421439378, "grad_norm": 1.007354497909546, "learning_rate": 3.476351233783277e-06, "loss": 0.96, "step": 3772 }, { "epoch": 0.8154311649016641, "grad_norm": 1.2974567413330078, "learning_rate": 3.4684676636419278e-06, "loss": 0.8178, "step": 3773 }, { "epoch": 0.8156472876593905, "grad_norm": 1.0700843334197998, "learning_rate": 3.460592193913208e-06, "loss": 1.0717, "step": 3774 }, { "epoch": 0.815863410417117, "grad_norm": 0.9423474073410034, "learning_rate": 3.4527248284560754e-06, "loss": 0.862, "step": 3775 }, { "epoch": 0.8160795331748433, "grad_norm": 1.1047825813293457, "learning_rate": 3.4448655711255286e-06, "loss": 0.8822, "step": 3776 }, { "epoch": 0.8162956559325697, "grad_norm": 0.9060317873954773, "learning_rate": 3.437014425772587e-06, "loss": 0.8137, "step": 3777 }, { "epoch": 0.8165117786902961, "grad_norm": 0.947235643863678, "learning_rate": 3.429171396244284e-06, "loss": 0.9042, "step": 3778 }, { "epoch": 0.8167279014480224, "grad_norm": 1.0349210500717163, "learning_rate": 3.421336486383686e-06, "loss": 0.9579, "step": 3779 }, { "epoch": 0.8169440242057489, "grad_norm": 1.0050138235092163, "learning_rate": 3.413509700029891e-06, "loss": 0.9266, "step": 3780 }, { "epoch": 0.8171601469634753, "grad_norm": 0.9082103371620178, "learning_rate": 3.405691041018e-06, "loss": 0.8102, "step": 3781 }, { "epoch": 0.8173762697212016, "grad_norm": 0.9560586810112, "learning_rate": 3.397880513179137e-06, "loss": 0.9151, "step": 3782 }, { "epoch": 0.817592392478928, "grad_norm": 1.0189166069030762, "learning_rate": 3.390078120340445e-06, "loss": 0.8613, "step": 3783 }, { "epoch": 0.8178085152366544, "grad_norm": 1.002669095993042, "learning_rate": 3.382283866325078e-06, "loss": 0.8865, "step": 3784 }, { "epoch": 0.8180246379943809, "grad_norm": 0.9827417731285095, "learning_rate": 3.374497754952202e-06, "loss": 1.0344, "step": 3785 }, { "epoch": 0.8182407607521072, "grad_norm": 1.049056887626648, "learning_rate": 3.366719790036994e-06, "loss": 0.9794, "step": 3786 }, { "epoch": 0.8184568835098336, "grad_norm": 1.1441189050674438, "learning_rate": 3.3589499753906375e-06, "loss": 1.0127, "step": 3787 }, { "epoch": 0.81867300626756, "grad_norm": 1.0008662939071655, "learning_rate": 3.351188314820324e-06, "loss": 0.932, "step": 3788 }, { "epoch": 0.8188891290252863, "grad_norm": 1.0514990091323853, "learning_rate": 3.3434348121292493e-06, "loss": 0.9164, "step": 3789 }, { "epoch": 0.8191052517830127, "grad_norm": 1.1653443574905396, "learning_rate": 3.335689471116612e-06, "loss": 1.0593, "step": 3790 }, { "epoch": 0.8193213745407392, "grad_norm": 0.9866675734519958, "learning_rate": 3.327952295577612e-06, "loss": 0.8743, "step": 3791 }, { "epoch": 0.8195374972984655, "grad_norm": 0.9063333868980408, "learning_rate": 3.320223289303448e-06, "loss": 0.7064, "step": 3792 }, { "epoch": 0.8197536200561919, "grad_norm": 0.9756568670272827, "learning_rate": 3.312502456081308e-06, "loss": 0.8558, "step": 3793 }, { "epoch": 0.8199697428139183, "grad_norm": 1.0017309188842773, "learning_rate": 3.3047897996943947e-06, "loss": 0.9177, "step": 3794 }, { "epoch": 0.8201858655716447, "grad_norm": 0.8668156862258911, "learning_rate": 3.2970853239218916e-06, "loss": 0.88, "step": 3795 }, { "epoch": 0.8204019883293711, "grad_norm": 0.9589985013008118, "learning_rate": 3.289389032538961e-06, "loss": 0.9202, "step": 3796 }, { "epoch": 0.8206181110870975, "grad_norm": 0.9581915140151978, "learning_rate": 3.281700929316771e-06, "loss": 0.8171, "step": 3797 }, { "epoch": 0.8208342338448239, "grad_norm": 1.1750407218933105, "learning_rate": 3.274021018022484e-06, "loss": 0.8521, "step": 3798 }, { "epoch": 0.8210503566025502, "grad_norm": 1.0545321702957153, "learning_rate": 3.2663493024192316e-06, "loss": 0.8311, "step": 3799 }, { "epoch": 0.8212664793602766, "grad_norm": 0.9771540760993958, "learning_rate": 3.2586857862661447e-06, "loss": 0.8776, "step": 3800 }, { "epoch": 0.8214826021180031, "grad_norm": 0.9694487452507019, "learning_rate": 3.251030473318313e-06, "loss": 0.8964, "step": 3801 }, { "epoch": 0.8216987248757294, "grad_norm": 0.9411128759384155, "learning_rate": 3.2433833673268358e-06, "loss": 0.9964, "step": 3802 }, { "epoch": 0.8219148476334558, "grad_norm": 1.0032674074172974, "learning_rate": 3.235744472038771e-06, "loss": 1.0625, "step": 3803 }, { "epoch": 0.8221309703911822, "grad_norm": 1.0997395515441895, "learning_rate": 3.228113791197163e-06, "loss": 0.9355, "step": 3804 }, { "epoch": 0.8223470931489085, "grad_norm": 0.9010671377182007, "learning_rate": 3.220491328541027e-06, "loss": 0.8208, "step": 3805 }, { "epoch": 0.822563215906635, "grad_norm": 1.134998083114624, "learning_rate": 3.2128770878053506e-06, "loss": 0.9348, "step": 3806 }, { "epoch": 0.8227793386643614, "grad_norm": 0.963811993598938, "learning_rate": 3.2052710727210945e-06, "loss": 0.9721, "step": 3807 }, { "epoch": 0.8229954614220878, "grad_norm": 1.1139354705810547, "learning_rate": 3.1976732870151903e-06, "loss": 0.8551, "step": 3808 }, { "epoch": 0.8232115841798141, "grad_norm": 0.9749976396560669, "learning_rate": 3.1900837344105317e-06, "loss": 0.8782, "step": 3809 }, { "epoch": 0.8234277069375405, "grad_norm": 1.0936123132705688, "learning_rate": 3.182502418625986e-06, "loss": 0.9987, "step": 3810 }, { "epoch": 0.823643829695267, "grad_norm": 0.9570392966270447, "learning_rate": 3.174929343376374e-06, "loss": 1.0546, "step": 3811 }, { "epoch": 0.8238599524529933, "grad_norm": 0.9953511953353882, "learning_rate": 3.1673645123724992e-06, "loss": 0.8854, "step": 3812 }, { "epoch": 0.8240760752107197, "grad_norm": 1.0623141527175903, "learning_rate": 3.159807929321097e-06, "loss": 0.8543, "step": 3813 }, { "epoch": 0.8242921979684461, "grad_norm": 0.9737940430641174, "learning_rate": 3.1522595979248805e-06, "loss": 0.8359, "step": 3814 }, { "epoch": 0.8245083207261724, "grad_norm": 0.9180198311805725, "learning_rate": 3.144719521882511e-06, "loss": 1.0532, "step": 3815 }, { "epoch": 0.8247244434838988, "grad_norm": 1.090312123298645, "learning_rate": 3.1371877048886156e-06, "loss": 0.9969, "step": 3816 }, { "epoch": 0.8249405662416253, "grad_norm": 0.9804825782775879, "learning_rate": 3.1296641506337687e-06, "loss": 0.9602, "step": 3817 }, { "epoch": 0.8251566889993517, "grad_norm": 0.9461047649383545, "learning_rate": 3.1221488628044837e-06, "loss": 0.7752, "step": 3818 }, { "epoch": 0.825372811757078, "grad_norm": 0.9010140895843506, "learning_rate": 3.1146418450832374e-06, "loss": 0.8004, "step": 3819 }, { "epoch": 0.8255889345148044, "grad_norm": 1.0007201433181763, "learning_rate": 3.1071431011484555e-06, "loss": 0.9878, "step": 3820 }, { "epoch": 0.8258050572725308, "grad_norm": 0.9827144742012024, "learning_rate": 3.099652634674506e-06, "loss": 1.0481, "step": 3821 }, { "epoch": 0.8260211800302572, "grad_norm": 1.028521180152893, "learning_rate": 3.0921704493316973e-06, "loss": 1.04, "step": 3822 }, { "epoch": 0.8262373027879836, "grad_norm": 0.9806821346282959, "learning_rate": 3.0846965487862813e-06, "loss": 0.9917, "step": 3823 }, { "epoch": 0.82645342554571, "grad_norm": 0.9888787865638733, "learning_rate": 3.077230936700455e-06, "loss": 0.8864, "step": 3824 }, { "epoch": 0.8266695483034363, "grad_norm": 0.9758038520812988, "learning_rate": 3.06977361673235e-06, "loss": 0.881, "step": 3825 }, { "epoch": 0.8268856710611627, "grad_norm": 0.9353499412536621, "learning_rate": 3.0623245925360344e-06, "loss": 0.8202, "step": 3826 }, { "epoch": 0.8271017938188892, "grad_norm": 0.9721300005912781, "learning_rate": 3.0548838677615154e-06, "loss": 1.1367, "step": 3827 }, { "epoch": 0.8273179165766155, "grad_norm": 1.3146761655807495, "learning_rate": 3.04745144605473e-06, "loss": 0.9314, "step": 3828 }, { "epoch": 0.8275340393343419, "grad_norm": 1.1623433828353882, "learning_rate": 3.040027331057547e-06, "loss": 1.0629, "step": 3829 }, { "epoch": 0.8277501620920683, "grad_norm": 0.8271481990814209, "learning_rate": 3.0326115264077672e-06, "loss": 0.768, "step": 3830 }, { "epoch": 0.8279662848497947, "grad_norm": 0.9230291247367859, "learning_rate": 3.0252040357391156e-06, "loss": 0.9952, "step": 3831 }, { "epoch": 0.828182407607521, "grad_norm": 0.9363948702812195, "learning_rate": 3.0178048626812464e-06, "loss": 0.8299, "step": 3832 }, { "epoch": 0.8283985303652475, "grad_norm": 0.939526379108429, "learning_rate": 3.0104140108597323e-06, "loss": 0.8855, "step": 3833 }, { "epoch": 0.8286146531229739, "grad_norm": 0.9929744601249695, "learning_rate": 3.003031483896084e-06, "loss": 0.8704, "step": 3834 }, { "epoch": 0.8288307758807002, "grad_norm": 1.0379951000213623, "learning_rate": 2.9956572854077205e-06, "loss": 0.9567, "step": 3835 }, { "epoch": 0.8290468986384266, "grad_norm": 0.9431433081626892, "learning_rate": 2.988291419007976e-06, "loss": 0.8092, "step": 3836 }, { "epoch": 0.829263021396153, "grad_norm": 0.9728435277938843, "learning_rate": 2.9809338883061035e-06, "loss": 1.0015, "step": 3837 }, { "epoch": 0.8294791441538794, "grad_norm": 1.0361050367355347, "learning_rate": 2.97358469690729e-06, "loss": 0.9186, "step": 3838 }, { "epoch": 0.8296952669116058, "grad_norm": 0.9760729074478149, "learning_rate": 2.9662438484126155e-06, "loss": 0.8529, "step": 3839 }, { "epoch": 0.8299113896693322, "grad_norm": 0.965825617313385, "learning_rate": 2.9589113464190844e-06, "loss": 0.8245, "step": 3840 }, { "epoch": 0.8301275124270586, "grad_norm": 1.0055328607559204, "learning_rate": 2.9515871945195897e-06, "loss": 0.8166, "step": 3841 }, { "epoch": 0.8303436351847849, "grad_norm": 1.0321468114852905, "learning_rate": 2.9442713963029645e-06, "loss": 0.9099, "step": 3842 }, { "epoch": 0.8305597579425114, "grad_norm": 1.145390510559082, "learning_rate": 2.9369639553539266e-06, "loss": 1.1025, "step": 3843 }, { "epoch": 0.8307758807002378, "grad_norm": 1.0296893119812012, "learning_rate": 2.9296648752531085e-06, "loss": 1.1091, "step": 3844 }, { "epoch": 0.8309920034579641, "grad_norm": 0.9882704615592957, "learning_rate": 2.9223741595770392e-06, "loss": 1.1372, "step": 3845 }, { "epoch": 0.8312081262156905, "grad_norm": 0.9929832816123962, "learning_rate": 2.9150918118981542e-06, "loss": 0.8435, "step": 3846 }, { "epoch": 0.8314242489734169, "grad_norm": 0.913496196269989, "learning_rate": 2.9078178357847874e-06, "loss": 0.7169, "step": 3847 }, { "epoch": 0.8316403717311432, "grad_norm": 1.0215977430343628, "learning_rate": 2.900552234801168e-06, "loss": 0.9791, "step": 3848 }, { "epoch": 0.8318564944888697, "grad_norm": 0.9647204279899597, "learning_rate": 2.8932950125074287e-06, "loss": 0.9165, "step": 3849 }, { "epoch": 0.8320726172465961, "grad_norm": 0.969436764717102, "learning_rate": 2.8860461724595846e-06, "loss": 0.8661, "step": 3850 }, { "epoch": 0.8322887400043224, "grad_norm": 0.8607776165008545, "learning_rate": 2.8788057182095518e-06, "loss": 0.7945, "step": 3851 }, { "epoch": 0.8325048627620488, "grad_norm": 0.977681577205658, "learning_rate": 2.8715736533051485e-06, "loss": 0.8304, "step": 3852 }, { "epoch": 0.8327209855197752, "grad_norm": 0.9332045912742615, "learning_rate": 2.864349981290053e-06, "loss": 0.8605, "step": 3853 }, { "epoch": 0.8329371082775017, "grad_norm": 0.9848089814186096, "learning_rate": 2.8571347057038566e-06, "loss": 0.9698, "step": 3854 }, { "epoch": 0.833153231035228, "grad_norm": 1.0630346536636353, "learning_rate": 2.84992783008202e-06, "loss": 0.9879, "step": 3855 }, { "epoch": 0.8333693537929544, "grad_norm": 1.0251802206039429, "learning_rate": 2.8427293579559067e-06, "loss": 0.8216, "step": 3856 }, { "epoch": 0.8335854765506808, "grad_norm": 0.9811074733734131, "learning_rate": 2.835539292852745e-06, "loss": 0.8215, "step": 3857 }, { "epoch": 0.8338015993084071, "grad_norm": 1.0241789817810059, "learning_rate": 2.828357638295658e-06, "loss": 1.0037, "step": 3858 }, { "epoch": 0.8340177220661336, "grad_norm": 1.0992343425750732, "learning_rate": 2.8211843978036223e-06, "loss": 0.8555, "step": 3859 }, { "epoch": 0.83423384482386, "grad_norm": 1.0655280351638794, "learning_rate": 2.8140195748915243e-06, "loss": 0.8525, "step": 3860 }, { "epoch": 0.8344499675815863, "grad_norm": 1.097280740737915, "learning_rate": 2.8068631730701067e-06, "loss": 0.8614, "step": 3861 }, { "epoch": 0.8346660903393127, "grad_norm": 1.0304309129714966, "learning_rate": 2.7997151958459888e-06, "loss": 0.9039, "step": 3862 }, { "epoch": 0.8348822130970391, "grad_norm": 0.9885969758033752, "learning_rate": 2.7925756467216646e-06, "loss": 0.9911, "step": 3863 }, { "epoch": 0.8350983358547656, "grad_norm": 0.8733798265457153, "learning_rate": 2.785444529195498e-06, "loss": 0.8581, "step": 3864 }, { "epoch": 0.8353144586124919, "grad_norm": 1.0050156116485596, "learning_rate": 2.7783218467617134e-06, "loss": 0.802, "step": 3865 }, { "epoch": 0.8355305813702183, "grad_norm": 1.1616407632827759, "learning_rate": 2.7712076029104152e-06, "loss": 0.9401, "step": 3866 }, { "epoch": 0.8357467041279447, "grad_norm": 0.8896188139915466, "learning_rate": 2.7641018011275657e-06, "loss": 0.9754, "step": 3867 }, { "epoch": 0.835962826885671, "grad_norm": 0.9471938610076904, "learning_rate": 2.7570044448949886e-06, "loss": 0.8151, "step": 3868 }, { "epoch": 0.8361789496433975, "grad_norm": 0.9115317463874817, "learning_rate": 2.749915537690373e-06, "loss": 0.9089, "step": 3869 }, { "epoch": 0.8363950724011239, "grad_norm": 0.8532413244247437, "learning_rate": 2.7428350829872675e-06, "loss": 0.815, "step": 3870 }, { "epoch": 0.8366111951588502, "grad_norm": 1.0686910152435303, "learning_rate": 2.7357630842550785e-06, "loss": 0.8564, "step": 3871 }, { "epoch": 0.8368273179165766, "grad_norm": 0.8713759183883667, "learning_rate": 2.7286995449590703e-06, "loss": 1.0198, "step": 3872 }, { "epoch": 0.837043440674303, "grad_norm": 1.0089508295059204, "learning_rate": 2.7216444685603537e-06, "loss": 0.9313, "step": 3873 }, { "epoch": 0.8372595634320293, "grad_norm": 0.9502614140510559, "learning_rate": 2.714597858515913e-06, "loss": 0.8456, "step": 3874 }, { "epoch": 0.8374756861897558, "grad_norm": 1.1013680696487427, "learning_rate": 2.7075597182785653e-06, "loss": 0.8482, "step": 3875 }, { "epoch": 0.8376918089474822, "grad_norm": 0.9405859112739563, "learning_rate": 2.7005300512969766e-06, "loss": 0.8086, "step": 3876 }, { "epoch": 0.8379079317052086, "grad_norm": 1.0673246383666992, "learning_rate": 2.693508861015668e-06, "loss": 0.8902, "step": 3877 }, { "epoch": 0.8381240544629349, "grad_norm": 0.8461023569107056, "learning_rate": 2.6864961508750154e-06, "loss": 0.9145, "step": 3878 }, { "epoch": 0.8383401772206613, "grad_norm": 1.0020347833633423, "learning_rate": 2.679491924311226e-06, "loss": 0.8721, "step": 3879 }, { "epoch": 0.8385562999783878, "grad_norm": 1.1831213235855103, "learning_rate": 2.672496184756359e-06, "loss": 0.8828, "step": 3880 }, { "epoch": 0.8387724227361141, "grad_norm": 0.860724925994873, "learning_rate": 2.665508935638297e-06, "loss": 0.8307, "step": 3881 }, { "epoch": 0.8389885454938405, "grad_norm": 1.030531883239746, "learning_rate": 2.6585301803807894e-06, "loss": 0.7885, "step": 3882 }, { "epoch": 0.8392046682515669, "grad_norm": 1.1311169862747192, "learning_rate": 2.651559922403406e-06, "loss": 0.8929, "step": 3883 }, { "epoch": 0.8394207910092932, "grad_norm": 1.1814595460891724, "learning_rate": 2.644598165121557e-06, "loss": 1.0145, "step": 3884 }, { "epoch": 0.8396369137670197, "grad_norm": 0.9401963949203491, "learning_rate": 2.6376449119464864e-06, "loss": 1.0197, "step": 3885 }, { "epoch": 0.8398530365247461, "grad_norm": 0.8778254389762878, "learning_rate": 2.630700166285274e-06, "loss": 0.8003, "step": 3886 }, { "epoch": 0.8400691592824725, "grad_norm": 0.935158908367157, "learning_rate": 2.623763931540828e-06, "loss": 1.071, "step": 3887 }, { "epoch": 0.8402852820401988, "grad_norm": 0.9393670558929443, "learning_rate": 2.616836211111886e-06, "loss": 0.8125, "step": 3888 }, { "epoch": 0.8405014047979252, "grad_norm": 0.9210618734359741, "learning_rate": 2.609917008393019e-06, "loss": 0.8869, "step": 3889 }, { "epoch": 0.8407175275556517, "grad_norm": 0.917171061038971, "learning_rate": 2.603006326774615e-06, "loss": 0.958, "step": 3890 }, { "epoch": 0.840933650313378, "grad_norm": 0.9822640419006348, "learning_rate": 2.5961041696428923e-06, "loss": 0.7881, "step": 3891 }, { "epoch": 0.8411497730711044, "grad_norm": 0.9905669093132019, "learning_rate": 2.589210540379903e-06, "loss": 0.8828, "step": 3892 }, { "epoch": 0.8413658958288308, "grad_norm": 1.043694019317627, "learning_rate": 2.5823254423634957e-06, "loss": 0.8347, "step": 3893 }, { "epoch": 0.8415820185865571, "grad_norm": 0.9720776677131653, "learning_rate": 2.5754488789673595e-06, "loss": 0.8232, "step": 3894 }, { "epoch": 0.8417981413442835, "grad_norm": 1.063287615776062, "learning_rate": 2.5685808535609867e-06, "loss": 0.9028, "step": 3895 }, { "epoch": 0.84201426410201, "grad_norm": 0.9968577027320862, "learning_rate": 2.5617213695097045e-06, "loss": 0.8873, "step": 3896 }, { "epoch": 0.8422303868597363, "grad_norm": 0.8752455711364746, "learning_rate": 2.554870430174641e-06, "loss": 0.8159, "step": 3897 }, { "epoch": 0.8424465096174627, "grad_norm": 1.0653284788131714, "learning_rate": 2.5480280389127422e-06, "loss": 0.9913, "step": 3898 }, { "epoch": 0.8426626323751891, "grad_norm": 0.8732882738113403, "learning_rate": 2.541194199076753e-06, "loss": 0.8493, "step": 3899 }, { "epoch": 0.8428787551329155, "grad_norm": 0.9469903707504272, "learning_rate": 2.534368914015253e-06, "loss": 0.8376, "step": 3900 }, { "epoch": 0.8430948778906419, "grad_norm": 0.9434764981269836, "learning_rate": 2.5275521870726107e-06, "loss": 0.7804, "step": 3901 }, { "epoch": 0.8433110006483683, "grad_norm": 1.004648208618164, "learning_rate": 2.520744021589003e-06, "loss": 0.9445, "step": 3902 }, { "epoch": 0.8435271234060947, "grad_norm": 0.8740761280059814, "learning_rate": 2.5139444209004248e-06, "loss": 0.9413, "step": 3903 }, { "epoch": 0.843743246163821, "grad_norm": 1.0177165269851685, "learning_rate": 2.50715338833865e-06, "loss": 0.9606, "step": 3904 }, { "epoch": 0.8439593689215474, "grad_norm": 0.9700515270233154, "learning_rate": 2.5003709272312814e-06, "loss": 0.7797, "step": 3905 }, { "epoch": 0.8441754916792739, "grad_norm": 1.0586341619491577, "learning_rate": 2.493597040901705e-06, "loss": 1.1064, "step": 3906 }, { "epoch": 0.8443916144370002, "grad_norm": 0.9335659742355347, "learning_rate": 2.4868317326691107e-06, "loss": 0.8268, "step": 3907 }, { "epoch": 0.8446077371947266, "grad_norm": 1.0961978435516357, "learning_rate": 2.4800750058484814e-06, "loss": 0.809, "step": 3908 }, { "epoch": 0.844823859952453, "grad_norm": 0.8870816826820374, "learning_rate": 2.4733268637506e-06, "loss": 0.8485, "step": 3909 }, { "epoch": 0.8450399827101793, "grad_norm": 0.8989795446395874, "learning_rate": 2.466587309682038e-06, "loss": 0.8478, "step": 3910 }, { "epoch": 0.8452561054679057, "grad_norm": 1.1302543878555298, "learning_rate": 2.459856346945164e-06, "loss": 0.9803, "step": 3911 }, { "epoch": 0.8454722282256322, "grad_norm": 0.9858680367469788, "learning_rate": 2.4531339788381337e-06, "loss": 0.8998, "step": 3912 }, { "epoch": 0.8456883509833586, "grad_norm": 0.9979439973831177, "learning_rate": 2.4464202086548874e-06, "loss": 0.9993, "step": 3913 }, { "epoch": 0.8459044737410849, "grad_norm": 0.9646304249763489, "learning_rate": 2.439715039685162e-06, "loss": 0.7718, "step": 3914 }, { "epoch": 0.8461205964988113, "grad_norm": 0.9694345593452454, "learning_rate": 2.4330184752144815e-06, "loss": 0.925, "step": 3915 }, { "epoch": 0.8463367192565378, "grad_norm": 0.9699779152870178, "learning_rate": 2.4263305185241316e-06, "loss": 0.7816, "step": 3916 }, { "epoch": 0.8465528420142641, "grad_norm": 1.027593731880188, "learning_rate": 2.419651172891202e-06, "loss": 0.9126, "step": 3917 }, { "epoch": 0.8467689647719905, "grad_norm": 0.9932441711425781, "learning_rate": 2.4129804415885593e-06, "loss": 0.9482, "step": 3918 }, { "epoch": 0.8469850875297169, "grad_norm": 1.0544880628585815, "learning_rate": 2.406318327884847e-06, "loss": 0.9531, "step": 3919 }, { "epoch": 0.8472012102874432, "grad_norm": 1.0541728734970093, "learning_rate": 2.3996648350444816e-06, "loss": 0.9116, "step": 3920 }, { "epoch": 0.8474173330451696, "grad_norm": 1.0878851413726807, "learning_rate": 2.3930199663276633e-06, "loss": 1.0167, "step": 3921 }, { "epoch": 0.8476334558028961, "grad_norm": 1.073660135269165, "learning_rate": 2.3863837249903577e-06, "loss": 0.8671, "step": 3922 }, { "epoch": 0.8478495785606225, "grad_norm": 0.906342089176178, "learning_rate": 2.3797561142843107e-06, "loss": 0.8903, "step": 3923 }, { "epoch": 0.8480657013183488, "grad_norm": 0.9477362632751465, "learning_rate": 2.3731371374570354e-06, "loss": 0.9786, "step": 3924 }, { "epoch": 0.8482818240760752, "grad_norm": 1.012372612953186, "learning_rate": 2.3665267977518157e-06, "loss": 1.0066, "step": 3925 }, { "epoch": 0.8484979468338016, "grad_norm": 0.9933478236198425, "learning_rate": 2.3599250984077026e-06, "loss": 0.8428, "step": 3926 }, { "epoch": 0.848714069591528, "grad_norm": 1.0077451467514038, "learning_rate": 2.353332042659513e-06, "loss": 0.9651, "step": 3927 }, { "epoch": 0.8489301923492544, "grad_norm": 0.9091978669166565, "learning_rate": 2.346747633737829e-06, "loss": 0.751, "step": 3928 }, { "epoch": 0.8491463151069808, "grad_norm": 0.9324750900268555, "learning_rate": 2.3401718748689972e-06, "loss": 1.1466, "step": 3929 }, { "epoch": 0.8493624378647071, "grad_norm": 1.0648280382156372, "learning_rate": 2.3336047692751216e-06, "loss": 0.9892, "step": 3930 }, { "epoch": 0.8495785606224335, "grad_norm": 0.967571496963501, "learning_rate": 2.3270463201740668e-06, "loss": 0.959, "step": 3931 }, { "epoch": 0.84979468338016, "grad_norm": 0.9903391599655151, "learning_rate": 2.32049653077947e-06, "loss": 0.7614, "step": 3932 }, { "epoch": 0.8500108061378863, "grad_norm": 0.9767742156982422, "learning_rate": 2.3139554043006986e-06, "loss": 0.8769, "step": 3933 }, { "epoch": 0.8502269288956127, "grad_norm": 0.9995903968811035, "learning_rate": 2.3074229439428964e-06, "loss": 0.8753, "step": 3934 }, { "epoch": 0.8504430516533391, "grad_norm": 1.0542783737182617, "learning_rate": 2.300899152906946e-06, "loss": 0.9123, "step": 3935 }, { "epoch": 0.8506591744110655, "grad_norm": 1.0569159984588623, "learning_rate": 2.294384034389503e-06, "loss": 0.7211, "step": 3936 }, { "epoch": 0.8508752971687918, "grad_norm": 0.9834873080253601, "learning_rate": 2.287877591582952e-06, "loss": 0.8814, "step": 3937 }, { "epoch": 0.8510914199265183, "grad_norm": 0.9262287616729736, "learning_rate": 2.2813798276754407e-06, "loss": 0.8303, "step": 3938 }, { "epoch": 0.8513075426842447, "grad_norm": 0.9619258642196655, "learning_rate": 2.274890745850846e-06, "loss": 0.9651, "step": 3939 }, { "epoch": 0.851523665441971, "grad_norm": 0.9438610076904297, "learning_rate": 2.2684103492888165e-06, "loss": 0.8412, "step": 3940 }, { "epoch": 0.8517397881996974, "grad_norm": 1.086382269859314, "learning_rate": 2.261938641164725e-06, "loss": 0.915, "step": 3941 }, { "epoch": 0.8519559109574238, "grad_norm": 1.0183038711547852, "learning_rate": 2.2554756246496966e-06, "loss": 0.9559, "step": 3942 }, { "epoch": 0.8521720337151502, "grad_norm": 1.060952067375183, "learning_rate": 2.2490213029105947e-06, "loss": 1.0643, "step": 3943 }, { "epoch": 0.8523881564728766, "grad_norm": 1.0353541374206543, "learning_rate": 2.242575679110013e-06, "loss": 0.9, "step": 3944 }, { "epoch": 0.852604279230603, "grad_norm": 1.009286642074585, "learning_rate": 2.236138756406303e-06, "loss": 0.8826, "step": 3945 }, { "epoch": 0.8528204019883294, "grad_norm": 0.941284716129303, "learning_rate": 2.22971053795354e-06, "loss": 0.9171, "step": 3946 }, { "epoch": 0.8530365247460557, "grad_norm": 1.0223666429519653, "learning_rate": 2.223291026901533e-06, "loss": 0.7593, "step": 3947 }, { "epoch": 0.8532526475037822, "grad_norm": 0.9023488759994507, "learning_rate": 2.2168802263958278e-06, "loss": 0.9817, "step": 3948 }, { "epoch": 0.8534687702615086, "grad_norm": 0.985598623752594, "learning_rate": 2.210478139577705e-06, "loss": 1.0036, "step": 3949 }, { "epoch": 0.8536848930192349, "grad_norm": 1.0244380235671997, "learning_rate": 2.2040847695841693e-06, "loss": 0.9144, "step": 3950 }, { "epoch": 0.8539010157769613, "grad_norm": 1.0165053606033325, "learning_rate": 2.1977001195479586e-06, "loss": 0.8182, "step": 3951 }, { "epoch": 0.8541171385346877, "grad_norm": 0.9381210803985596, "learning_rate": 2.191324192597535e-06, "loss": 0.9025, "step": 3952 }, { "epoch": 0.854333261292414, "grad_norm": 0.9398542642593384, "learning_rate": 2.184956991857088e-06, "loss": 0.7237, "step": 3953 }, { "epoch": 0.8545493840501405, "grad_norm": 1.001290202140808, "learning_rate": 2.1785985204465354e-06, "loss": 0.9462, "step": 3954 }, { "epoch": 0.8547655068078669, "grad_norm": 1.0010464191436768, "learning_rate": 2.172248781481514e-06, "loss": 0.8731, "step": 3955 }, { "epoch": 0.8549816295655932, "grad_norm": 0.9304476380348206, "learning_rate": 2.165907778073373e-06, "loss": 0.7721, "step": 3956 }, { "epoch": 0.8551977523233196, "grad_norm": 0.8804721832275391, "learning_rate": 2.159575513329193e-06, "loss": 0.8879, "step": 3957 }, { "epoch": 0.855413875081046, "grad_norm": 0.8830518126487732, "learning_rate": 2.1532519903517723e-06, "loss": 0.9191, "step": 3958 }, { "epoch": 0.8556299978387725, "grad_norm": 0.9027577638626099, "learning_rate": 2.1469372122396214e-06, "loss": 1.0066, "step": 3959 }, { "epoch": 0.8558461205964988, "grad_norm": 0.9849170446395874, "learning_rate": 2.140631182086965e-06, "loss": 0.8284, "step": 3960 }, { "epoch": 0.8560622433542252, "grad_norm": 0.9777927994728088, "learning_rate": 2.1343339029837496e-06, "loss": 0.8547, "step": 3961 }, { "epoch": 0.8562783661119516, "grad_norm": 1.0231190919876099, "learning_rate": 2.1280453780156153e-06, "loss": 0.9239, "step": 3962 }, { "epoch": 0.8564944888696779, "grad_norm": 1.0713601112365723, "learning_rate": 2.1217656102639326e-06, "loss": 0.8877, "step": 3963 }, { "epoch": 0.8567106116274044, "grad_norm": 1.1315653324127197, "learning_rate": 2.1154946028057744e-06, "loss": 0.7842, "step": 3964 }, { "epoch": 0.8569267343851308, "grad_norm": 0.9086594581604004, "learning_rate": 2.1092323587139174e-06, "loss": 0.8541, "step": 3965 }, { "epoch": 0.8571428571428571, "grad_norm": 1.12448251247406, "learning_rate": 2.1029788810568473e-06, "loss": 1.0357, "step": 3966 }, { "epoch": 0.8573589799005835, "grad_norm": 0.9281867742538452, "learning_rate": 2.0967341728987554e-06, "loss": 0.8266, "step": 3967 }, { "epoch": 0.8575751026583099, "grad_norm": 1.0272259712219238, "learning_rate": 2.09049823729953e-06, "loss": 0.9871, "step": 3968 }, { "epoch": 0.8577912254160364, "grad_norm": 0.963164210319519, "learning_rate": 2.0842710773147677e-06, "loss": 0.8631, "step": 3969 }, { "epoch": 0.8580073481737627, "grad_norm": 1.1444388628005981, "learning_rate": 2.0780526959957627e-06, "loss": 0.9162, "step": 3970 }, { "epoch": 0.8582234709314891, "grad_norm": 1.088512897491455, "learning_rate": 2.0718430963895054e-06, "loss": 0.9834, "step": 3971 }, { "epoch": 0.8584395936892155, "grad_norm": 0.9993893504142761, "learning_rate": 2.065642281538691e-06, "loss": 0.7611, "step": 3972 }, { "epoch": 0.8586557164469418, "grad_norm": 1.0248545408248901, "learning_rate": 2.0594502544816984e-06, "loss": 0.8907, "step": 3973 }, { "epoch": 0.8588718392046683, "grad_norm": 0.980186402797699, "learning_rate": 2.0532670182526093e-06, "loss": 0.8991, "step": 3974 }, { "epoch": 0.8590879619623947, "grad_norm": 1.2061337232589722, "learning_rate": 2.047092575881189e-06, "loss": 1.0024, "step": 3975 }, { "epoch": 0.859304084720121, "grad_norm": 1.0617130994796753, "learning_rate": 2.04092693039291e-06, "loss": 0.9011, "step": 3976 }, { "epoch": 0.8595202074778474, "grad_norm": 1.0404224395751953, "learning_rate": 2.0347700848089193e-06, "loss": 0.7154, "step": 3977 }, { "epoch": 0.8597363302355738, "grad_norm": 0.974334716796875, "learning_rate": 2.0286220421460624e-06, "loss": 0.8879, "step": 3978 }, { "epoch": 0.8599524529933001, "grad_norm": 1.0852996110916138, "learning_rate": 2.0224828054168523e-06, "loss": 0.9256, "step": 3979 }, { "epoch": 0.8601685757510266, "grad_norm": 1.0426374673843384, "learning_rate": 2.0163523776295134e-06, "loss": 0.7925, "step": 3980 }, { "epoch": 0.860384698508753, "grad_norm": 1.0513733625411987, "learning_rate": 2.0102307617879367e-06, "loss": 1.0972, "step": 3981 }, { "epoch": 0.8606008212664794, "grad_norm": 0.9410885572433472, "learning_rate": 2.0041179608917003e-06, "loss": 0.9346, "step": 3982 }, { "epoch": 0.8608169440242057, "grad_norm": 0.9574518799781799, "learning_rate": 1.9980139779360683e-06, "loss": 0.9286, "step": 3983 }, { "epoch": 0.8610330667819321, "grad_norm": 1.0688837766647339, "learning_rate": 1.991918815911964e-06, "loss": 0.8646, "step": 3984 }, { "epoch": 0.8612491895396586, "grad_norm": 0.8987706899642944, "learning_rate": 1.9858324778060133e-06, "loss": 0.7938, "step": 3985 }, { "epoch": 0.8614653122973849, "grad_norm": 0.9745295643806458, "learning_rate": 1.979754966600509e-06, "loss": 0.9665, "step": 3986 }, { "epoch": 0.8616814350551113, "grad_norm": 1.0833499431610107, "learning_rate": 1.9736862852734108e-06, "loss": 0.8568, "step": 3987 }, { "epoch": 0.8618975578128377, "grad_norm": 1.0643657445907593, "learning_rate": 1.9676264367983643e-06, "loss": 0.8574, "step": 3988 }, { "epoch": 0.862113680570564, "grad_norm": 0.8867236375808716, "learning_rate": 1.9615754241446794e-06, "loss": 0.7652, "step": 3989 }, { "epoch": 0.8623298033282905, "grad_norm": 1.0987955331802368, "learning_rate": 1.955533250277335e-06, "loss": 0.9481, "step": 3990 }, { "epoch": 0.8625459260860169, "grad_norm": 1.0885558128356934, "learning_rate": 1.9494999181569874e-06, "loss": 0.9157, "step": 3991 }, { "epoch": 0.8627620488437433, "grad_norm": 1.0694215297698975, "learning_rate": 1.9434754307399537e-06, "loss": 0.9242, "step": 3992 }, { "epoch": 0.8629781716014696, "grad_norm": 1.1088707447052002, "learning_rate": 1.9374597909782135e-06, "loss": 0.8787, "step": 3993 }, { "epoch": 0.863194294359196, "grad_norm": 1.2541041374206543, "learning_rate": 1.9314530018194253e-06, "loss": 1.0074, "step": 3994 }, { "epoch": 0.8634104171169225, "grad_norm": 1.0666176080703735, "learning_rate": 1.9254550662069004e-06, "loss": 0.9751, "step": 3995 }, { "epoch": 0.8636265398746488, "grad_norm": 1.0079668760299683, "learning_rate": 1.919465987079607e-06, "loss": 1.0891, "step": 3996 }, { "epoch": 0.8638426626323752, "grad_norm": 0.880274772644043, "learning_rate": 1.913485767372181e-06, "loss": 0.9269, "step": 3997 }, { "epoch": 0.8640587853901016, "grad_norm": 1.0225210189819336, "learning_rate": 1.9075144100149234e-06, "loss": 0.9084, "step": 3998 }, { "epoch": 0.8642749081478279, "grad_norm": 1.0274851322174072, "learning_rate": 1.9015519179337794e-06, "loss": 0.8577, "step": 3999 }, { "epoch": 0.8644910309055543, "grad_norm": 1.1693341732025146, "learning_rate": 1.895598294050358e-06, "loss": 1.0272, "step": 4000 }, { "epoch": 0.8647071536632808, "grad_norm": 0.8606007099151611, "learning_rate": 1.889653541281926e-06, "loss": 0.9262, "step": 4001 }, { "epoch": 0.8649232764210071, "grad_norm": 0.9811006784439087, "learning_rate": 1.8837176625413866e-06, "loss": 0.8264, "step": 4002 }, { "epoch": 0.8651393991787335, "grad_norm": 1.1836777925491333, "learning_rate": 1.8777906607373175e-06, "loss": 0.8992, "step": 4003 }, { "epoch": 0.8653555219364599, "grad_norm": 1.0306787490844727, "learning_rate": 1.8718725387739312e-06, "loss": 0.9253, "step": 4004 }, { "epoch": 0.8655716446941863, "grad_norm": 0.8847578763961792, "learning_rate": 1.8659632995510946e-06, "loss": 0.8711, "step": 4005 }, { "epoch": 0.8657877674519127, "grad_norm": 1.0953636169433594, "learning_rate": 1.8600629459643226e-06, "loss": 0.938, "step": 4006 }, { "epoch": 0.8660038902096391, "grad_norm": 0.9728406071662903, "learning_rate": 1.8541714809047716e-06, "loss": 0.7935, "step": 4007 }, { "epoch": 0.8662200129673655, "grad_norm": 1.0419201850891113, "learning_rate": 1.8482889072592502e-06, "loss": 1.0054, "step": 4008 }, { "epoch": 0.8664361357250918, "grad_norm": 0.9814909100532532, "learning_rate": 1.8424152279102015e-06, "loss": 1.0094, "step": 4009 }, { "epoch": 0.8666522584828182, "grad_norm": 0.9687902331352234, "learning_rate": 1.8365504457357187e-06, "loss": 0.9497, "step": 4010 }, { "epoch": 0.8668683812405447, "grad_norm": 1.1203945875167847, "learning_rate": 1.8306945636095253e-06, "loss": 0.725, "step": 4011 }, { "epoch": 0.867084503998271, "grad_norm": 1.0863019227981567, "learning_rate": 1.8248475844010016e-06, "loss": 0.885, "step": 4012 }, { "epoch": 0.8673006267559974, "grad_norm": 1.0180869102478027, "learning_rate": 1.8190095109751427e-06, "loss": 0.9969, "step": 4013 }, { "epoch": 0.8675167495137238, "grad_norm": 1.0216892957687378, "learning_rate": 1.8131803461925934e-06, "loss": 0.872, "step": 4014 }, { "epoch": 0.8677328722714501, "grad_norm": 1.0102025270462036, "learning_rate": 1.8073600929096314e-06, "loss": 0.9727, "step": 4015 }, { "epoch": 0.8679489950291766, "grad_norm": 0.9080718755722046, "learning_rate": 1.8015487539781705e-06, "loss": 0.8032, "step": 4016 }, { "epoch": 0.868165117786903, "grad_norm": 1.008196234703064, "learning_rate": 1.7957463322457536e-06, "loss": 0.7756, "step": 4017 }, { "epoch": 0.8683812405446294, "grad_norm": 1.1037870645523071, "learning_rate": 1.7899528305555547e-06, "loss": 0.9485, "step": 4018 }, { "epoch": 0.8685973633023557, "grad_norm": 1.0407118797302246, "learning_rate": 1.7841682517463677e-06, "loss": 0.9199, "step": 4019 }, { "epoch": 0.8688134860600821, "grad_norm": 0.9282059669494629, "learning_rate": 1.7783925986526273e-06, "loss": 0.92, "step": 4020 }, { "epoch": 0.8690296088178086, "grad_norm": 0.840758740901947, "learning_rate": 1.7726258741043945e-06, "loss": 0.95, "step": 4021 }, { "epoch": 0.8692457315755349, "grad_norm": 0.9742379784584045, "learning_rate": 1.7668680809273465e-06, "loss": 0.8779, "step": 4022 }, { "epoch": 0.8694618543332613, "grad_norm": 0.9899144768714905, "learning_rate": 1.7611192219427908e-06, "loss": 1.01, "step": 4023 }, { "epoch": 0.8696779770909877, "grad_norm": 1.0181303024291992, "learning_rate": 1.7553792999676523e-06, "loss": 0.831, "step": 4024 }, { "epoch": 0.869894099848714, "grad_norm": 1.0468906164169312, "learning_rate": 1.749648317814483e-06, "loss": 0.7739, "step": 4025 }, { "epoch": 0.8701102226064404, "grad_norm": 1.0993393659591675, "learning_rate": 1.74392627829145e-06, "loss": 1.0472, "step": 4026 }, { "epoch": 0.8703263453641669, "grad_norm": 1.4291259050369263, "learning_rate": 1.7382131842023374e-06, "loss": 0.9302, "step": 4027 }, { "epoch": 0.8705424681218933, "grad_norm": 1.1961036920547485, "learning_rate": 1.7325090383465503e-06, "loss": 0.8646, "step": 4028 }, { "epoch": 0.8707585908796196, "grad_norm": 1.1001901626586914, "learning_rate": 1.7268138435191061e-06, "loss": 0.9163, "step": 4029 }, { "epoch": 0.870974713637346, "grad_norm": 1.0502665042877197, "learning_rate": 1.721127602510635e-06, "loss": 0.9426, "step": 4030 }, { "epoch": 0.8711908363950724, "grad_norm": 0.9333060383796692, "learning_rate": 1.7154503181073857e-06, "loss": 0.768, "step": 4031 }, { "epoch": 0.8714069591527988, "grad_norm": 0.9372854828834534, "learning_rate": 1.7097819930912129e-06, "loss": 0.9407, "step": 4032 }, { "epoch": 0.8716230819105252, "grad_norm": 1.0821665525436401, "learning_rate": 1.7041226302395797e-06, "loss": 0.9718, "step": 4033 }, { "epoch": 0.8718392046682516, "grad_norm": 1.0281955003738403, "learning_rate": 1.6984722323255654e-06, "loss": 0.9438, "step": 4034 }, { "epoch": 0.8720553274259779, "grad_norm": 1.062590479850769, "learning_rate": 1.6928308021178552e-06, "loss": 0.9395, "step": 4035 }, { "epoch": 0.8722714501837043, "grad_norm": 0.8797858953475952, "learning_rate": 1.687198342380727e-06, "loss": 0.7857, "step": 4036 }, { "epoch": 0.8724875729414308, "grad_norm": 1.0413355827331543, "learning_rate": 1.6815748558740752e-06, "loss": 0.9426, "step": 4037 }, { "epoch": 0.8727036956991571, "grad_norm": 0.9694939851760864, "learning_rate": 1.6759603453534024e-06, "loss": 0.7913, "step": 4038 }, { "epoch": 0.8729198184568835, "grad_norm": 0.9185709357261658, "learning_rate": 1.6703548135698012e-06, "loss": 1.0559, "step": 4039 }, { "epoch": 0.8731359412146099, "grad_norm": 1.0040276050567627, "learning_rate": 1.6647582632699676e-06, "loss": 0.9708, "step": 4040 }, { "epoch": 0.8733520639723363, "grad_norm": 1.1234097480773926, "learning_rate": 1.659170697196204e-06, "loss": 0.9381, "step": 4041 }, { "epoch": 0.8735681867300626, "grad_norm": 0.9272008538246155, "learning_rate": 1.6535921180863956e-06, "loss": 0.9944, "step": 4042 }, { "epoch": 0.8737843094877891, "grad_norm": 1.124240517616272, "learning_rate": 1.6480225286740404e-06, "loss": 1.0021, "step": 4043 }, { "epoch": 0.8740004322455155, "grad_norm": 0.9661961793899536, "learning_rate": 1.642461931688224e-06, "loss": 0.7872, "step": 4044 }, { "epoch": 0.8742165550032418, "grad_norm": 0.999000608921051, "learning_rate": 1.6369103298536227e-06, "loss": 0.7445, "step": 4045 }, { "epoch": 0.8744326777609682, "grad_norm": 0.9599443674087524, "learning_rate": 1.6313677258905114e-06, "loss": 0.8118, "step": 4046 }, { "epoch": 0.8746488005186946, "grad_norm": 1.1033684015274048, "learning_rate": 1.625834122514751e-06, "loss": 1.0608, "step": 4047 }, { "epoch": 0.874864923276421, "grad_norm": 1.0001951456069946, "learning_rate": 1.6203095224377974e-06, "loss": 0.8486, "step": 4048 }, { "epoch": 0.8750810460341474, "grad_norm": 0.9805539846420288, "learning_rate": 1.6147939283666892e-06, "loss": 0.7877, "step": 4049 }, { "epoch": 0.8752971687918738, "grad_norm": 0.9951595664024353, "learning_rate": 1.6092873430040557e-06, "loss": 1.0561, "step": 4050 }, { "epoch": 0.8755132915496002, "grad_norm": 0.8902543187141418, "learning_rate": 1.6037897690481075e-06, "loss": 0.8228, "step": 4051 }, { "epoch": 0.8757294143073265, "grad_norm": 1.0002317428588867, "learning_rate": 1.598301209192654e-06, "loss": 0.9239, "step": 4052 }, { "epoch": 0.875945537065053, "grad_norm": 0.9699692130088806, "learning_rate": 1.5928216661270669e-06, "loss": 1.1387, "step": 4053 }, { "epoch": 0.8761616598227794, "grad_norm": 1.1492412090301514, "learning_rate": 1.5873511425363108e-06, "loss": 0.8409, "step": 4054 }, { "epoch": 0.8763777825805057, "grad_norm": 1.0058752298355103, "learning_rate": 1.5818896411009266e-06, "loss": 0.8294, "step": 4055 }, { "epoch": 0.8765939053382321, "grad_norm": 1.0559006929397583, "learning_rate": 1.5764371644970468e-06, "loss": 0.7307, "step": 4056 }, { "epoch": 0.8768100280959585, "grad_norm": 0.8250787854194641, "learning_rate": 1.570993715396365e-06, "loss": 0.8297, "step": 4057 }, { "epoch": 0.8770261508536848, "grad_norm": 1.0399080514907837, "learning_rate": 1.5655592964661659e-06, "loss": 0.8245, "step": 4058 }, { "epoch": 0.8772422736114113, "grad_norm": 1.0151152610778809, "learning_rate": 1.5601339103692948e-06, "loss": 0.9051, "step": 4059 }, { "epoch": 0.8774583963691377, "grad_norm": 1.1445362567901611, "learning_rate": 1.5547175597641762e-06, "loss": 0.9454, "step": 4060 }, { "epoch": 0.877674519126864, "grad_norm": 1.040764570236206, "learning_rate": 1.5493102473048183e-06, "loss": 0.8549, "step": 4061 }, { "epoch": 0.8778906418845904, "grad_norm": 0.9080301523208618, "learning_rate": 1.543911975640786e-06, "loss": 0.996, "step": 4062 }, { "epoch": 0.8781067646423169, "grad_norm": 1.0131959915161133, "learning_rate": 1.5385227474172215e-06, "loss": 0.9449, "step": 4063 }, { "epoch": 0.8783228874000433, "grad_norm": 1.0617555379867554, "learning_rate": 1.5331425652748344e-06, "loss": 1.1335, "step": 4064 }, { "epoch": 0.8785390101577696, "grad_norm": 0.9740786552429199, "learning_rate": 1.5277714318499025e-06, "loss": 0.9065, "step": 4065 }, { "epoch": 0.878755132915496, "grad_norm": 1.1823405027389526, "learning_rate": 1.5224093497742654e-06, "loss": 1.1069, "step": 4066 }, { "epoch": 0.8789712556732224, "grad_norm": 0.9506311416625977, "learning_rate": 1.5170563216753342e-06, "loss": 0.8977, "step": 4067 }, { "epoch": 0.8791873784309487, "grad_norm": 0.9873430132865906, "learning_rate": 1.5117123501760778e-06, "loss": 0.9684, "step": 4068 }, { "epoch": 0.8794035011886752, "grad_norm": 1.0437781810760498, "learning_rate": 1.5063774378950325e-06, "loss": 1.042, "step": 4069 }, { "epoch": 0.8796196239464016, "grad_norm": 1.0704689025878906, "learning_rate": 1.5010515874462893e-06, "loss": 0.9161, "step": 4070 }, { "epoch": 0.8798357467041279, "grad_norm": 0.8986729979515076, "learning_rate": 1.4957348014395079e-06, "loss": 0.8105, "step": 4071 }, { "epoch": 0.8800518694618543, "grad_norm": 1.09950590133667, "learning_rate": 1.490427082479895e-06, "loss": 0.8366, "step": 4072 }, { "epoch": 0.8802679922195807, "grad_norm": 0.9348450899124146, "learning_rate": 1.4851284331682215e-06, "loss": 0.9173, "step": 4073 }, { "epoch": 0.8804841149773072, "grad_norm": 0.9867619276046753, "learning_rate": 1.4798388561008193e-06, "loss": 0.8978, "step": 4074 }, { "epoch": 0.8807002377350335, "grad_norm": 1.164351463317871, "learning_rate": 1.4745583538695685e-06, "loss": 1.2014, "step": 4075 }, { "epoch": 0.8809163604927599, "grad_norm": 0.9201247692108154, "learning_rate": 1.4692869290618971e-06, "loss": 0.9247, "step": 4076 }, { "epoch": 0.8811324832504863, "grad_norm": 0.9837038516998291, "learning_rate": 1.4640245842607925e-06, "loss": 0.8844, "step": 4077 }, { "epoch": 0.8813486060082126, "grad_norm": 0.9408811926841736, "learning_rate": 1.4587713220447897e-06, "loss": 0.8393, "step": 4078 }, { "epoch": 0.881564728765939, "grad_norm": 0.9224588871002197, "learning_rate": 1.4535271449879806e-06, "loss": 0.8589, "step": 4079 }, { "epoch": 0.8817808515236655, "grad_norm": 0.8685857653617859, "learning_rate": 1.4482920556599988e-06, "loss": 0.8432, "step": 4080 }, { "epoch": 0.8819969742813918, "grad_norm": 0.8095546960830688, "learning_rate": 1.4430660566260256e-06, "loss": 0.7959, "step": 4081 }, { "epoch": 0.8822130970391182, "grad_norm": 0.887802243232727, "learning_rate": 1.4378491504467817e-06, "loss": 0.8161, "step": 4082 }, { "epoch": 0.8824292197968446, "grad_norm": 0.8776811957359314, "learning_rate": 1.4326413396785488e-06, "loss": 0.788, "step": 4083 }, { "epoch": 0.8826453425545709, "grad_norm": 1.1636507511138916, "learning_rate": 1.4274426268731367e-06, "loss": 0.9023, "step": 4084 }, { "epoch": 0.8828614653122974, "grad_norm": 0.9223209023475647, "learning_rate": 1.4222530145779034e-06, "loss": 0.8426, "step": 4085 }, { "epoch": 0.8830775880700238, "grad_norm": 1.0244272947311401, "learning_rate": 1.417072505335748e-06, "loss": 0.9004, "step": 4086 }, { "epoch": 0.8832937108277502, "grad_norm": 0.8953300714492798, "learning_rate": 1.411901101685107e-06, "loss": 0.8038, "step": 4087 }, { "epoch": 0.8835098335854765, "grad_norm": 0.9177679419517517, "learning_rate": 1.4067388061599575e-06, "loss": 0.8871, "step": 4088 }, { "epoch": 0.883725956343203, "grad_norm": 1.039570927619934, "learning_rate": 1.4015856212898116e-06, "loss": 0.7814, "step": 4089 }, { "epoch": 0.8839420791009294, "grad_norm": 1.0864183902740479, "learning_rate": 1.3964415495997185e-06, "loss": 0.8857, "step": 4090 }, { "epoch": 0.8841582018586557, "grad_norm": 1.0120965242385864, "learning_rate": 1.3913065936102555e-06, "loss": 0.9873, "step": 4091 }, { "epoch": 0.8843743246163821, "grad_norm": 1.0110678672790527, "learning_rate": 1.386180755837554e-06, "loss": 0.9451, "step": 4092 }, { "epoch": 0.8845904473741085, "grad_norm": 1.2350870370864868, "learning_rate": 1.3810640387932472e-06, "loss": 1.1773, "step": 4093 }, { "epoch": 0.8848065701318348, "grad_norm": 0.9564366936683655, "learning_rate": 1.3759564449845208e-06, "loss": 0.926, "step": 4094 }, { "epoch": 0.8850226928895613, "grad_norm": 1.0063345432281494, "learning_rate": 1.370857976914086e-06, "loss": 1.0404, "step": 4095 }, { "epoch": 0.8852388156472877, "grad_norm": 1.1416981220245361, "learning_rate": 1.3657686370801737e-06, "loss": 1.0394, "step": 4096 }, { "epoch": 0.8854549384050141, "grad_norm": 0.9854344725608826, "learning_rate": 1.3606884279765553e-06, "loss": 0.8007, "step": 4097 }, { "epoch": 0.8856710611627404, "grad_norm": 1.0236746072769165, "learning_rate": 1.3556173520925242e-06, "loss": 0.9153, "step": 4098 }, { "epoch": 0.8858871839204668, "grad_norm": 1.0137629508972168, "learning_rate": 1.3505554119128861e-06, "loss": 0.9402, "step": 4099 }, { "epoch": 0.8861033066781933, "grad_norm": 0.9242549538612366, "learning_rate": 1.3455026099179835e-06, "loss": 1.0568, "step": 4100 }, { "epoch": 0.8863194294359196, "grad_norm": 0.9783289432525635, "learning_rate": 1.3404589485836805e-06, "loss": 0.8188, "step": 4101 }, { "epoch": 0.886535552193646, "grad_norm": 0.9931995272636414, "learning_rate": 1.3354244303813601e-06, "loss": 0.7379, "step": 4102 }, { "epoch": 0.8867516749513724, "grad_norm": 1.0986474752426147, "learning_rate": 1.3303990577779202e-06, "loss": 0.8815, "step": 4103 }, { "epoch": 0.8869677977090987, "grad_norm": 0.9438388347625732, "learning_rate": 1.3253828332357866e-06, "loss": 0.9092, "step": 4104 }, { "epoch": 0.8871839204668251, "grad_norm": 1.1600340604782104, "learning_rate": 1.320375759212893e-06, "loss": 1.1459, "step": 4105 }, { "epoch": 0.8874000432245516, "grad_norm": 1.036247968673706, "learning_rate": 1.3153778381626968e-06, "loss": 0.9705, "step": 4106 }, { "epoch": 0.8876161659822779, "grad_norm": 1.0610734224319458, "learning_rate": 1.3103890725341683e-06, "loss": 0.9458, "step": 4107 }, { "epoch": 0.8878322887400043, "grad_norm": 0.9418146014213562, "learning_rate": 1.3054094647717896e-06, "loss": 0.8307, "step": 4108 }, { "epoch": 0.8880484114977307, "grad_norm": 0.9183703064918518, "learning_rate": 1.300439017315558e-06, "loss": 1.0513, "step": 4109 }, { "epoch": 0.8882645342554571, "grad_norm": 0.9850232601165771, "learning_rate": 1.2954777326009805e-06, "loss": 0.7928, "step": 4110 }, { "epoch": 0.8884806570131835, "grad_norm": 1.0256174802780151, "learning_rate": 1.2905256130590749e-06, "loss": 1.0977, "step": 4111 }, { "epoch": 0.8886967797709099, "grad_norm": 1.2523407936096191, "learning_rate": 1.2855826611163691e-06, "loss": 1.1814, "step": 4112 }, { "epoch": 0.8889129025286363, "grad_norm": 1.0399409532546997, "learning_rate": 1.2806488791948945e-06, "loss": 0.9109, "step": 4113 }, { "epoch": 0.8891290252863626, "grad_norm": 0.8949527740478516, "learning_rate": 1.2757242697121997e-06, "loss": 0.9874, "step": 4114 }, { "epoch": 0.889345148044089, "grad_norm": 0.9688258767127991, "learning_rate": 1.2708088350813297e-06, "loss": 0.873, "step": 4115 }, { "epoch": 0.8895612708018155, "grad_norm": 0.8582277894020081, "learning_rate": 1.2659025777108336e-06, "loss": 0.9761, "step": 4116 }, { "epoch": 0.8897773935595418, "grad_norm": 1.06842839717865, "learning_rate": 1.2610055000047683e-06, "loss": 0.8791, "step": 4117 }, { "epoch": 0.8899935163172682, "grad_norm": 0.9435558319091797, "learning_rate": 1.2561176043626856e-06, "loss": 0.7926, "step": 4118 }, { "epoch": 0.8902096390749946, "grad_norm": 1.0161453485488892, "learning_rate": 1.2512388931796493e-06, "loss": 0.9486, "step": 4119 }, { "epoch": 0.8904257618327209, "grad_norm": 1.0344659090042114, "learning_rate": 1.2463693688462163e-06, "loss": 0.9384, "step": 4120 }, { "epoch": 0.8906418845904474, "grad_norm": 0.998457670211792, "learning_rate": 1.2415090337484425e-06, "loss": 1.0499, "step": 4121 }, { "epoch": 0.8908580073481738, "grad_norm": 0.9651675224304199, "learning_rate": 1.2366578902678717e-06, "loss": 1.0828, "step": 4122 }, { "epoch": 0.8910741301059002, "grad_norm": 0.9161550998687744, "learning_rate": 1.2318159407815645e-06, "loss": 0.8627, "step": 4123 }, { "epoch": 0.8912902528636265, "grad_norm": 0.9125650525093079, "learning_rate": 1.2269831876620608e-06, "loss": 0.9055, "step": 4124 }, { "epoch": 0.8915063756213529, "grad_norm": 0.7748423218727112, "learning_rate": 1.2221596332773977e-06, "loss": 0.8033, "step": 4125 }, { "epoch": 0.8917224983790794, "grad_norm": 0.8713288903236389, "learning_rate": 1.2173452799911046e-06, "loss": 0.981, "step": 4126 }, { "epoch": 0.8919386211368057, "grad_norm": 1.0444568395614624, "learning_rate": 1.2125401301622076e-06, "loss": 0.868, "step": 4127 }, { "epoch": 0.8921547438945321, "grad_norm": 0.8919979929924011, "learning_rate": 1.2077441861452144e-06, "loss": 0.933, "step": 4128 }, { "epoch": 0.8923708666522585, "grad_norm": 0.9409472346305847, "learning_rate": 1.2029574502901297e-06, "loss": 0.8335, "step": 4129 }, { "epoch": 0.8925869894099848, "grad_norm": 0.9649437069892883, "learning_rate": 1.198179924942442e-06, "loss": 0.8632, "step": 4130 }, { "epoch": 0.8928031121677112, "grad_norm": 1.0272592306137085, "learning_rate": 1.193411612443125e-06, "loss": 0.8637, "step": 4131 }, { "epoch": 0.8930192349254377, "grad_norm": 0.9774860143661499, "learning_rate": 1.1886525151286477e-06, "loss": 0.8743, "step": 4132 }, { "epoch": 0.8932353576831641, "grad_norm": 1.0608245134353638, "learning_rate": 1.1839026353309514e-06, "loss": 0.7975, "step": 4133 }, { "epoch": 0.8934514804408904, "grad_norm": 1.0434435606002808, "learning_rate": 1.1791619753774653e-06, "loss": 0.9528, "step": 4134 }, { "epoch": 0.8936676031986168, "grad_norm": 1.160058617591858, "learning_rate": 1.1744305375911048e-06, "loss": 0.9413, "step": 4135 }, { "epoch": 0.8938837259563432, "grad_norm": 1.0943877696990967, "learning_rate": 1.1697083242902597e-06, "loss": 0.968, "step": 4136 }, { "epoch": 0.8940998487140696, "grad_norm": 0.9067952632904053, "learning_rate": 1.1649953377888102e-06, "loss": 1.0125, "step": 4137 }, { "epoch": 0.894315971471796, "grad_norm": 0.8918052315711975, "learning_rate": 1.1602915803961068e-06, "loss": 0.9804, "step": 4138 }, { "epoch": 0.8945320942295224, "grad_norm": 0.900425374507904, "learning_rate": 1.1555970544169747e-06, "loss": 0.832, "step": 4139 }, { "epoch": 0.8947482169872487, "grad_norm": 0.986312747001648, "learning_rate": 1.1509117621517207e-06, "loss": 0.8684, "step": 4140 }, { "epoch": 0.8949643397449751, "grad_norm": 1.1736079454421997, "learning_rate": 1.1462357058961327e-06, "loss": 0.7799, "step": 4141 }, { "epoch": 0.8951804625027016, "grad_norm": 1.3107961416244507, "learning_rate": 1.1415688879414666e-06, "loss": 1.0719, "step": 4142 }, { "epoch": 0.8953965852604279, "grad_norm": 0.8984982967376709, "learning_rate": 1.1369113105744512e-06, "loss": 0.7859, "step": 4143 }, { "epoch": 0.8956127080181543, "grad_norm": 1.043787956237793, "learning_rate": 1.1322629760772874e-06, "loss": 1.031, "step": 4144 }, { "epoch": 0.8958288307758807, "grad_norm": 0.9398433566093445, "learning_rate": 1.1276238867276511e-06, "loss": 0.8668, "step": 4145 }, { "epoch": 0.8960449535336071, "grad_norm": 0.9927624464035034, "learning_rate": 1.122994044798682e-06, "loss": 0.8491, "step": 4146 }, { "epoch": 0.8962610762913334, "grad_norm": 0.9938647747039795, "learning_rate": 1.1183734525589939e-06, "loss": 0.8402, "step": 4147 }, { "epoch": 0.8964771990490599, "grad_norm": 0.8894004821777344, "learning_rate": 1.1137621122726671e-06, "loss": 0.7474, "step": 4148 }, { "epoch": 0.8966933218067863, "grad_norm": 1.0167287588119507, "learning_rate": 1.109160026199243e-06, "loss": 0.9, "step": 4149 }, { "epoch": 0.8969094445645126, "grad_norm": 1.0129833221435547, "learning_rate": 1.1045671965937421e-06, "loss": 0.7466, "step": 4150 }, { "epoch": 0.897125567322239, "grad_norm": 0.9298940896987915, "learning_rate": 1.099983625706631e-06, "loss": 0.8004, "step": 4151 }, { "epoch": 0.8973416900799654, "grad_norm": 1.0372710227966309, "learning_rate": 1.0954093157838552e-06, "loss": 0.9976, "step": 4152 }, { "epoch": 0.8975578128376918, "grad_norm": 0.9522298574447632, "learning_rate": 1.090844269066813e-06, "loss": 0.7235, "step": 4153 }, { "epoch": 0.8977739355954182, "grad_norm": 0.9699406027793884, "learning_rate": 1.086288487792364e-06, "loss": 0.7414, "step": 4154 }, { "epoch": 0.8979900583531446, "grad_norm": 1.175062894821167, "learning_rate": 1.081741974192838e-06, "loss": 0.8125, "step": 4155 }, { "epoch": 0.898206181110871, "grad_norm": 1.0469586849212646, "learning_rate": 1.0772047304960109e-06, "loss": 0.878, "step": 4156 }, { "epoch": 0.8984223038685973, "grad_norm": 0.9139113426208496, "learning_rate": 1.0726767589251219e-06, "loss": 0.8979, "step": 4157 }, { "epoch": 0.8986384266263238, "grad_norm": 0.9332925081253052, "learning_rate": 1.068158061698865e-06, "loss": 0.8307, "step": 4158 }, { "epoch": 0.8988545493840502, "grad_norm": 0.9882146120071411, "learning_rate": 1.0636486410313961e-06, "loss": 0.8748, "step": 4159 }, { "epoch": 0.8990706721417765, "grad_norm": 0.920577883720398, "learning_rate": 1.0591484991323186e-06, "loss": 0.8209, "step": 4160 }, { "epoch": 0.8992867948995029, "grad_norm": 1.1150248050689697, "learning_rate": 1.0546576382066952e-06, "loss": 0.9629, "step": 4161 }, { "epoch": 0.8995029176572293, "grad_norm": 0.9610633850097656, "learning_rate": 1.0501760604550281e-06, "loss": 0.7465, "step": 4162 }, { "epoch": 0.8997190404149557, "grad_norm": 1.076218843460083, "learning_rate": 1.0457037680732873e-06, "loss": 1.0762, "step": 4163 }, { "epoch": 0.8999351631726821, "grad_norm": 1.0198490619659424, "learning_rate": 1.0412407632528864e-06, "loss": 0.8838, "step": 4164 }, { "epoch": 0.9001512859304085, "grad_norm": 1.087411880493164, "learning_rate": 1.036787048180683e-06, "loss": 0.7398, "step": 4165 }, { "epoch": 0.9003674086881348, "grad_norm": 1.007017970085144, "learning_rate": 1.0323426250389912e-06, "loss": 0.8635, "step": 4166 }, { "epoch": 0.9005835314458612, "grad_norm": 1.0128799676895142, "learning_rate": 1.0279074960055646e-06, "loss": 1.0326, "step": 4167 }, { "epoch": 0.9007996542035877, "grad_norm": 0.8858432769775391, "learning_rate": 1.0234816632536094e-06, "loss": 0.8224, "step": 4168 }, { "epoch": 0.9010157769613141, "grad_norm": 1.1053907871246338, "learning_rate": 1.0190651289517705e-06, "loss": 0.9375, "step": 4169 }, { "epoch": 0.9012318997190404, "grad_norm": 1.0449488162994385, "learning_rate": 1.0146578952641394e-06, "loss": 0.935, "step": 4170 }, { "epoch": 0.9014480224767668, "grad_norm": 1.0494916439056396, "learning_rate": 1.0102599643502508e-06, "loss": 0.9382, "step": 4171 }, { "epoch": 0.9016641452344932, "grad_norm": 1.1499552726745605, "learning_rate": 1.0058713383650875e-06, "loss": 0.844, "step": 4172 }, { "epoch": 0.9018802679922195, "grad_norm": 1.020134449005127, "learning_rate": 1.0014920194590582e-06, "loss": 0.7838, "step": 4173 }, { "epoch": 0.902096390749946, "grad_norm": 1.2185606956481934, "learning_rate": 9.9712200977802e-07, "loss": 0.9149, "step": 4174 }, { "epoch": 0.9023125135076724, "grad_norm": 1.0377721786499023, "learning_rate": 9.927613114632684e-07, "loss": 0.9493, "step": 4175 }, { "epoch": 0.9025286362653987, "grad_norm": 1.054484486579895, "learning_rate": 9.884099266515345e-07, "loss": 0.821, "step": 4176 }, { "epoch": 0.9027447590231251, "grad_norm": 1.0430999994277954, "learning_rate": 9.840678574749886e-07, "loss": 0.9175, "step": 4177 }, { "epoch": 0.9029608817808515, "grad_norm": 0.9499197602272034, "learning_rate": 9.797351060612392e-07, "loss": 0.9419, "step": 4178 }, { "epoch": 0.903177004538578, "grad_norm": 0.9823135733604431, "learning_rate": 9.754116745333152e-07, "loss": 0.8397, "step": 4179 }, { "epoch": 0.9033931272963043, "grad_norm": 0.8455004692077637, "learning_rate": 9.710975650096889e-07, "loss": 0.7876, "step": 4180 }, { "epoch": 0.9036092500540307, "grad_norm": 0.9631431698799133, "learning_rate": 9.66792779604271e-07, "loss": 0.8957, "step": 4181 }, { "epoch": 0.9038253728117571, "grad_norm": 1.0619665384292603, "learning_rate": 9.62497320426392e-07, "loss": 1.0105, "step": 4182 }, { "epoch": 0.9040414955694834, "grad_norm": 1.1879591941833496, "learning_rate": 9.582111895808198e-07, "loss": 0.9524, "step": 4183 }, { "epoch": 0.9042576183272099, "grad_norm": 1.102536678314209, "learning_rate": 9.539343891677432e-07, "loss": 0.8738, "step": 4184 }, { "epoch": 0.9044737410849363, "grad_norm": 0.9416372179985046, "learning_rate": 9.496669212827903e-07, "loss": 1.0261, "step": 4185 }, { "epoch": 0.9046898638426626, "grad_norm": 1.0179506540298462, "learning_rate": 9.45408788017006e-07, "loss": 0.9296, "step": 4186 }, { "epoch": 0.904905986600389, "grad_norm": 0.9441346526145935, "learning_rate": 9.411599914568703e-07, "loss": 0.7562, "step": 4187 }, { "epoch": 0.9051221093581154, "grad_norm": 1.112977385520935, "learning_rate": 9.369205336842779e-07, "loss": 0.9034, "step": 4188 }, { "epoch": 0.9053382321158417, "grad_norm": 1.003321886062622, "learning_rate": 9.32690416776556e-07, "loss": 0.9544, "step": 4189 }, { "epoch": 0.9055543548735682, "grad_norm": 1.2458070516586304, "learning_rate": 9.284696428064577e-07, "loss": 0.9224, "step": 4190 }, { "epoch": 0.9057704776312946, "grad_norm": 1.2427130937576294, "learning_rate": 9.242582138421441e-07, "loss": 0.8655, "step": 4191 }, { "epoch": 0.905986600389021, "grad_norm": 0.9118661880493164, "learning_rate": 9.20056131947209e-07, "loss": 0.9911, "step": 4192 }, { "epoch": 0.9062027231467473, "grad_norm": 1.0034087896347046, "learning_rate": 9.158633991806631e-07, "loss": 0.8121, "step": 4193 }, { "epoch": 0.9064188459044737, "grad_norm": 0.9934155941009521, "learning_rate": 9.116800175969342e-07, "loss": 0.957, "step": 4194 }, { "epoch": 0.9066349686622002, "grad_norm": 1.0172679424285889, "learning_rate": 9.075059892458738e-07, "loss": 0.9426, "step": 4195 }, { "epoch": 0.9068510914199265, "grad_norm": 1.0507500171661377, "learning_rate": 9.033413161727411e-07, "loss": 0.8351, "step": 4196 }, { "epoch": 0.9070672141776529, "grad_norm": 0.9022180438041687, "learning_rate": 8.991860004182196e-07, "loss": 0.7201, "step": 4197 }, { "epoch": 0.9072833369353793, "grad_norm": 0.9793972373008728, "learning_rate": 8.950400440184004e-07, "loss": 0.8626, "step": 4198 }, { "epoch": 0.9074994596931056, "grad_norm": 0.952980637550354, "learning_rate": 8.909034490047964e-07, "loss": 0.7976, "step": 4199 }, { "epoch": 0.9077155824508321, "grad_norm": 0.993766188621521, "learning_rate": 8.867762174043304e-07, "loss": 0.9592, "step": 4200 }, { "epoch": 0.9079317052085585, "grad_norm": 0.9308351278305054, "learning_rate": 8.826583512393361e-07, "loss": 0.8082, "step": 4201 }, { "epoch": 0.9081478279662849, "grad_norm": 1.0772156715393066, "learning_rate": 8.785498525275505e-07, "loss": 0.9905, "step": 4202 }, { "epoch": 0.9083639507240112, "grad_norm": 1.0217748880386353, "learning_rate": 8.744507232821387e-07, "loss": 0.8301, "step": 4203 }, { "epoch": 0.9085800734817376, "grad_norm": 0.9618154764175415, "learning_rate": 8.703609655116608e-07, "loss": 0.8667, "step": 4204 }, { "epoch": 0.9087961962394641, "grad_norm": 0.9852338433265686, "learning_rate": 8.662805812200869e-07, "loss": 0.7885, "step": 4205 }, { "epoch": 0.9090123189971904, "grad_norm": 0.9149219989776611, "learning_rate": 8.62209572406798e-07, "loss": 0.7684, "step": 4206 }, { "epoch": 0.9092284417549168, "grad_norm": 1.0153368711471558, "learning_rate": 8.581479410665805e-07, "loss": 1.0269, "step": 4207 }, { "epoch": 0.9094445645126432, "grad_norm": 0.863723874092102, "learning_rate": 8.540956891896201e-07, "loss": 0.7367, "step": 4208 }, { "epoch": 0.9096606872703695, "grad_norm": 0.9872366189956665, "learning_rate": 8.500528187615131e-07, "loss": 0.7964, "step": 4209 }, { "epoch": 0.909876810028096, "grad_norm": 0.9526989459991455, "learning_rate": 8.46019331763257e-07, "loss": 0.9288, "step": 4210 }, { "epoch": 0.9100929327858224, "grad_norm": 0.9501885175704956, "learning_rate": 8.419952301712508e-07, "loss": 0.9276, "step": 4211 }, { "epoch": 0.9103090555435487, "grad_norm": 1.0456392765045166, "learning_rate": 8.379805159572951e-07, "loss": 0.9932, "step": 4212 }, { "epoch": 0.9105251783012751, "grad_norm": 1.0203437805175781, "learning_rate": 8.339751910885918e-07, "loss": 0.872, "step": 4213 }, { "epoch": 0.9107413010590015, "grad_norm": 0.8790463209152222, "learning_rate": 8.299792575277377e-07, "loss": 0.7509, "step": 4214 }, { "epoch": 0.910957423816728, "grad_norm": 0.8575922846794128, "learning_rate": 8.259927172327331e-07, "loss": 0.9013, "step": 4215 }, { "epoch": 0.9111735465744543, "grad_norm": 0.9011969566345215, "learning_rate": 8.220155721569689e-07, "loss": 0.8245, "step": 4216 }, { "epoch": 0.9113896693321807, "grad_norm": 1.3565723896026611, "learning_rate": 8.180478242492462e-07, "loss": 0.7886, "step": 4217 }, { "epoch": 0.9116057920899071, "grad_norm": 1.2235686779022217, "learning_rate": 8.140894754537476e-07, "loss": 0.9486, "step": 4218 }, { "epoch": 0.9118219148476334, "grad_norm": 1.0132081508636475, "learning_rate": 8.101405277100549e-07, "loss": 0.8002, "step": 4219 }, { "epoch": 0.9120380376053598, "grad_norm": 0.9781072735786438, "learning_rate": 8.062009829531381e-07, "loss": 0.9461, "step": 4220 }, { "epoch": 0.9122541603630863, "grad_norm": 1.0441522598266602, "learning_rate": 8.022708431133752e-07, "loss": 0.8362, "step": 4221 }, { "epoch": 0.9124702831208126, "grad_norm": 0.8778496980667114, "learning_rate": 7.983501101165169e-07, "loss": 0.9303, "step": 4222 }, { "epoch": 0.912686405878539, "grad_norm": 1.0359258651733398, "learning_rate": 7.944387858837199e-07, "loss": 1.0722, "step": 4223 }, { "epoch": 0.9129025286362654, "grad_norm": 0.9095891118049622, "learning_rate": 7.905368723315199e-07, "loss": 0.8379, "step": 4224 }, { "epoch": 0.9131186513939918, "grad_norm": 1.0899566411972046, "learning_rate": 7.866443713718453e-07, "loss": 0.977, "step": 4225 }, { "epoch": 0.9133347741517182, "grad_norm": 1.255446434020996, "learning_rate": 7.827612849120148e-07, "loss": 0.8697, "step": 4226 }, { "epoch": 0.9135508969094446, "grad_norm": 1.0579952001571655, "learning_rate": 7.788876148547308e-07, "loss": 0.9245, "step": 4227 }, { "epoch": 0.913767019667171, "grad_norm": 1.039961576461792, "learning_rate": 7.750233630980841e-07, "loss": 0.716, "step": 4228 }, { "epoch": 0.9139831424248973, "grad_norm": 0.9159168601036072, "learning_rate": 7.711685315355444e-07, "loss": 0.8762, "step": 4229 }, { "epoch": 0.9141992651826237, "grad_norm": 1.0014218091964722, "learning_rate": 7.673231220559785e-07, "loss": 0.9218, "step": 4230 }, { "epoch": 0.9144153879403502, "grad_norm": 1.0748810768127441, "learning_rate": 7.634871365436192e-07, "loss": 0.9407, "step": 4231 }, { "epoch": 0.9146315106980765, "grad_norm": 1.0846792459487915, "learning_rate": 7.596605768780962e-07, "loss": 0.8723, "step": 4232 }, { "epoch": 0.9148476334558029, "grad_norm": 0.8718065023422241, "learning_rate": 7.558434449344143e-07, "loss": 0.753, "step": 4233 }, { "epoch": 0.9150637562135293, "grad_norm": 0.9065682291984558, "learning_rate": 7.520357425829528e-07, "loss": 0.8227, "step": 4234 }, { "epoch": 0.9152798789712556, "grad_norm": 1.1097488403320312, "learning_rate": 7.482374716894902e-07, "loss": 0.9782, "step": 4235 }, { "epoch": 0.915496001728982, "grad_norm": 1.0259422063827515, "learning_rate": 7.444486341151602e-07, "loss": 1.0459, "step": 4236 }, { "epoch": 0.9157121244867085, "grad_norm": 0.9476332664489746, "learning_rate": 7.406692317164865e-07, "loss": 0.9825, "step": 4237 }, { "epoch": 0.9159282472444349, "grad_norm": 0.9656356573104858, "learning_rate": 7.368992663453656e-07, "loss": 0.9509, "step": 4238 }, { "epoch": 0.9161443700021612, "grad_norm": 0.938168466091156, "learning_rate": 7.331387398490753e-07, "loss": 0.9848, "step": 4239 }, { "epoch": 0.9163604927598876, "grad_norm": 1.0702391862869263, "learning_rate": 7.29387654070266e-07, "loss": 0.8139, "step": 4240 }, { "epoch": 0.916576615517614, "grad_norm": 0.9183998703956604, "learning_rate": 7.25646010846961e-07, "loss": 0.885, "step": 4241 }, { "epoch": 0.9167927382753404, "grad_norm": 0.9607285261154175, "learning_rate": 7.21913812012549e-07, "loss": 0.9994, "step": 4242 }, { "epoch": 0.9170088610330668, "grad_norm": 0.8861908316612244, "learning_rate": 7.181910593958075e-07, "loss": 0.8015, "step": 4243 }, { "epoch": 0.9172249837907932, "grad_norm": 0.9602293372154236, "learning_rate": 7.144777548208748e-07, "loss": 0.943, "step": 4244 }, { "epoch": 0.9174411065485195, "grad_norm": 1.086655616760254, "learning_rate": 7.107739001072578e-07, "loss": 0.8883, "step": 4245 }, { "epoch": 0.9176572293062459, "grad_norm": 1.0744946002960205, "learning_rate": 7.070794970698425e-07, "loss": 1.0598, "step": 4246 }, { "epoch": 0.9178733520639724, "grad_norm": 0.9552499055862427, "learning_rate": 7.033945475188741e-07, "loss": 0.9892, "step": 4247 }, { "epoch": 0.9180894748216987, "grad_norm": 1.0425517559051514, "learning_rate": 6.997190532599685e-07, "loss": 0.8853, "step": 4248 }, { "epoch": 0.9183055975794251, "grad_norm": 1.0558618307113647, "learning_rate": 6.960530160941136e-07, "loss": 0.9576, "step": 4249 }, { "epoch": 0.9185217203371515, "grad_norm": 0.9843370318412781, "learning_rate": 6.923964378176551e-07, "loss": 1.045, "step": 4250 }, { "epoch": 0.9187378430948779, "grad_norm": 1.0195176601409912, "learning_rate": 6.887493202223083e-07, "loss": 0.8838, "step": 4251 }, { "epoch": 0.9189539658526042, "grad_norm": 1.0274927616119385, "learning_rate": 6.851116650951528e-07, "loss": 0.858, "step": 4252 }, { "epoch": 0.9191700886103307, "grad_norm": 0.9396346211433411, "learning_rate": 6.814834742186361e-07, "loss": 0.8896, "step": 4253 }, { "epoch": 0.9193862113680571, "grad_norm": 0.944214403629303, "learning_rate": 6.778647493705559e-07, "loss": 0.7939, "step": 4254 }, { "epoch": 0.9196023341257834, "grad_norm": 1.110487461090088, "learning_rate": 6.742554923240829e-07, "loss": 1.0676, "step": 4255 }, { "epoch": 0.9198184568835098, "grad_norm": 1.0984482765197754, "learning_rate": 6.706557048477425e-07, "loss": 0.9112, "step": 4256 }, { "epoch": 0.9200345796412362, "grad_norm": 0.9708102941513062, "learning_rate": 6.670653887054235e-07, "loss": 0.8405, "step": 4257 }, { "epoch": 0.9202507023989626, "grad_norm": 1.0560801029205322, "learning_rate": 6.634845456563766e-07, "loss": 1.0735, "step": 4258 }, { "epoch": 0.920466825156689, "grad_norm": 1.0283024311065674, "learning_rate": 6.599131774552003e-07, "loss": 0.7695, "step": 4259 }, { "epoch": 0.9206829479144154, "grad_norm": 1.0239213705062866, "learning_rate": 6.563512858518573e-07, "loss": 0.9256, "step": 4260 }, { "epoch": 0.9208990706721418, "grad_norm": 1.0854101181030273, "learning_rate": 6.527988725916712e-07, "loss": 0.9957, "step": 4261 }, { "epoch": 0.9211151934298681, "grad_norm": 0.8721448183059692, "learning_rate": 6.492559394153119e-07, "loss": 0.6199, "step": 4262 }, { "epoch": 0.9213313161875946, "grad_norm": 0.9405100345611572, "learning_rate": 6.457224880588108e-07, "loss": 0.7663, "step": 4263 }, { "epoch": 0.921547438945321, "grad_norm": 1.0795950889587402, "learning_rate": 6.421985202535497e-07, "loss": 1.0405, "step": 4264 }, { "epoch": 0.9217635617030473, "grad_norm": 0.8960533142089844, "learning_rate": 6.386840377262626e-07, "loss": 0.8689, "step": 4265 }, { "epoch": 0.9219796844607737, "grad_norm": 1.0131511688232422, "learning_rate": 6.351790421990434e-07, "loss": 0.8257, "step": 4266 }, { "epoch": 0.9221958072185001, "grad_norm": 0.9130016565322876, "learning_rate": 6.31683535389327e-07, "loss": 0.9669, "step": 4267 }, { "epoch": 0.9224119299762265, "grad_norm": 1.104579210281372, "learning_rate": 6.281975190099055e-07, "loss": 1.077, "step": 4268 }, { "epoch": 0.9226280527339529, "grad_norm": 0.9432905912399292, "learning_rate": 6.247209947689192e-07, "loss": 0.9165, "step": 4269 }, { "epoch": 0.9228441754916793, "grad_norm": 1.0555541515350342, "learning_rate": 6.212539643698546e-07, "loss": 1.0482, "step": 4270 }, { "epoch": 0.9230602982494056, "grad_norm": 0.9618312120437622, "learning_rate": 6.177964295115502e-07, "loss": 0.856, "step": 4271 }, { "epoch": 0.923276421007132, "grad_norm": 0.9917889833450317, "learning_rate": 6.14348391888191e-07, "loss": 0.8548, "step": 4272 }, { "epoch": 0.9234925437648585, "grad_norm": 0.9678471088409424, "learning_rate": 6.109098531893076e-07, "loss": 0.8315, "step": 4273 }, { "epoch": 0.9237086665225849, "grad_norm": 1.0311787128448486, "learning_rate": 6.074808150997724e-07, "loss": 0.9404, "step": 4274 }, { "epoch": 0.9239247892803112, "grad_norm": 0.880062460899353, "learning_rate": 6.040612792998124e-07, "loss": 0.7252, "step": 4275 }, { "epoch": 0.9241409120380376, "grad_norm": 0.978182315826416, "learning_rate": 6.006512474649873e-07, "loss": 0.7556, "step": 4276 }, { "epoch": 0.924357034795764, "grad_norm": 1.063117504119873, "learning_rate": 5.972507212662048e-07, "loss": 1.0127, "step": 4277 }, { "epoch": 0.9245731575534903, "grad_norm": 1.0114563703536987, "learning_rate": 5.938597023697146e-07, "loss": 0.8317, "step": 4278 }, { "epoch": 0.9247892803112168, "grad_norm": 0.9993071556091309, "learning_rate": 5.904781924371117e-07, "loss": 0.9688, "step": 4279 }, { "epoch": 0.9250054030689432, "grad_norm": 0.8388939499855042, "learning_rate": 5.871061931253263e-07, "loss": 0.885, "step": 4280 }, { "epoch": 0.9252215258266695, "grad_norm": 1.0512640476226807, "learning_rate": 5.837437060866325e-07, "loss": 0.9746, "step": 4281 }, { "epoch": 0.9254376485843959, "grad_norm": 1.0189282894134521, "learning_rate": 5.803907329686342e-07, "loss": 0.837, "step": 4282 }, { "epoch": 0.9256537713421223, "grad_norm": 0.9870699048042297, "learning_rate": 5.770472754142886e-07, "loss": 0.9099, "step": 4283 }, { "epoch": 0.9258698940998488, "grad_norm": 1.0658068656921387, "learning_rate": 5.737133350618762e-07, "loss": 0.9256, "step": 4284 }, { "epoch": 0.9260860168575751, "grad_norm": 0.9333972334861755, "learning_rate": 5.703889135450258e-07, "loss": 0.7999, "step": 4285 }, { "epoch": 0.9263021396153015, "grad_norm": 1.1418362855911255, "learning_rate": 5.670740124926898e-07, "loss": 0.7904, "step": 4286 }, { "epoch": 0.9265182623730279, "grad_norm": 1.0817952156066895, "learning_rate": 5.63768633529167e-07, "loss": 1.0475, "step": 4287 }, { "epoch": 0.9267343851307542, "grad_norm": 0.9698407053947449, "learning_rate": 5.604727782740838e-07, "loss": 0.7099, "step": 4288 }, { "epoch": 0.9269505078884807, "grad_norm": 0.8985784649848938, "learning_rate": 5.571864483423994e-07, "loss": 0.8405, "step": 4289 }, { "epoch": 0.9271666306462071, "grad_norm": 1.0659029483795166, "learning_rate": 5.539096453444126e-07, "loss": 0.9841, "step": 4290 }, { "epoch": 0.9273827534039334, "grad_norm": 1.0063865184783936, "learning_rate": 5.506423708857456e-07, "loss": 1.1218, "step": 4291 }, { "epoch": 0.9275988761616598, "grad_norm": 1.0510445833206177, "learning_rate": 5.473846265673532e-07, "loss": 0.9045, "step": 4292 }, { "epoch": 0.9278149989193862, "grad_norm": 1.0389801263809204, "learning_rate": 5.441364139855321e-07, "loss": 0.6456, "step": 4293 }, { "epoch": 0.9280311216771125, "grad_norm": 0.9817002415657043, "learning_rate": 5.408977347318889e-07, "loss": 0.9881, "step": 4294 }, { "epoch": 0.928247244434839, "grad_norm": 0.9527703523635864, "learning_rate": 5.376685903933743e-07, "loss": 0.849, "step": 4295 }, { "epoch": 0.9284633671925654, "grad_norm": 1.1319552659988403, "learning_rate": 5.344489825522581e-07, "loss": 0.9348, "step": 4296 }, { "epoch": 0.9286794899502918, "grad_norm": 1.188833475112915, "learning_rate": 5.312389127861428e-07, "loss": 0.9795, "step": 4297 }, { "epoch": 0.9288956127080181, "grad_norm": 1.1184444427490234, "learning_rate": 5.280383826679591e-07, "loss": 0.9105, "step": 4298 }, { "epoch": 0.9291117354657445, "grad_norm": 1.1532635688781738, "learning_rate": 5.248473937659504e-07, "loss": 0.7492, "step": 4299 }, { "epoch": 0.929327858223471, "grad_norm": 0.9561310410499573, "learning_rate": 5.216659476436991e-07, "loss": 0.9266, "step": 4300 }, { "epoch": 0.9295439809811973, "grad_norm": 1.0445328950881958, "learning_rate": 5.184940458601073e-07, "loss": 0.8585, "step": 4301 }, { "epoch": 0.9297601037389237, "grad_norm": 0.8858436346054077, "learning_rate": 5.153316899693983e-07, "loss": 0.7677, "step": 4302 }, { "epoch": 0.9299762264966501, "grad_norm": 0.9820959568023682, "learning_rate": 5.121788815211193e-07, "loss": 1.0154, "step": 4303 }, { "epoch": 0.9301923492543764, "grad_norm": 0.9774004817008972, "learning_rate": 5.090356220601389e-07, "loss": 0.898, "step": 4304 }, { "epoch": 0.9304084720121029, "grad_norm": 0.9356990456581116, "learning_rate": 5.059019131266474e-07, "loss": 0.7913, "step": 4305 }, { "epoch": 0.9306245947698293, "grad_norm": 0.8759821653366089, "learning_rate": 5.027777562561542e-07, "loss": 0.8859, "step": 4306 }, { "epoch": 0.9308407175275557, "grad_norm": 1.1266238689422607, "learning_rate": 4.996631529794882e-07, "loss": 0.9109, "step": 4307 }, { "epoch": 0.931056840285282, "grad_norm": 0.9832292795181274, "learning_rate": 4.965581048227997e-07, "loss": 0.8228, "step": 4308 }, { "epoch": 0.9312729630430084, "grad_norm": 0.9165593385696411, "learning_rate": 4.93462613307556e-07, "loss": 0.7686, "step": 4309 }, { "epoch": 0.9314890858007349, "grad_norm": 1.1463444232940674, "learning_rate": 4.903766799505372e-07, "loss": 0.9074, "step": 4310 }, { "epoch": 0.9317052085584612, "grad_norm": 1.0148438215255737, "learning_rate": 4.873003062638471e-07, "loss": 1.0103, "step": 4311 }, { "epoch": 0.9319213313161876, "grad_norm": 0.9984873533248901, "learning_rate": 4.842334937548976e-07, "loss": 0.8455, "step": 4312 }, { "epoch": 0.932137454073914, "grad_norm": 1.06214439868927, "learning_rate": 4.811762439264244e-07, "loss": 0.8178, "step": 4313 }, { "epoch": 0.9323535768316403, "grad_norm": 1.0038710832595825, "learning_rate": 4.781285582764694e-07, "loss": 1.0596, "step": 4314 }, { "epoch": 0.9325696995893668, "grad_norm": 0.8882947564125061, "learning_rate": 4.750904382983934e-07, "loss": 0.8929, "step": 4315 }, { "epoch": 0.9327858223470932, "grad_norm": 0.9856219291687012, "learning_rate": 4.720618854808678e-07, "loss": 0.8802, "step": 4316 }, { "epoch": 0.9330019451048195, "grad_norm": 1.1882649660110474, "learning_rate": 4.690429013078768e-07, "loss": 1.1672, "step": 4317 }, { "epoch": 0.9332180678625459, "grad_norm": 0.9748342633247375, "learning_rate": 4.6603348725871244e-07, "loss": 1.0518, "step": 4318 }, { "epoch": 0.9334341906202723, "grad_norm": 0.9472171664237976, "learning_rate": 4.630336448079864e-07, "loss": 0.8179, "step": 4319 }, { "epoch": 0.9336503133779988, "grad_norm": 0.896943986415863, "learning_rate": 4.600433754256095e-07, "loss": 0.7086, "step": 4320 }, { "epoch": 0.9338664361357251, "grad_norm": 1.007301688194275, "learning_rate": 4.570626805768119e-07, "loss": 1.0587, "step": 4321 }, { "epoch": 0.9340825588934515, "grad_norm": 0.9737399816513062, "learning_rate": 4.540915617221187e-07, "loss": 0.6853, "step": 4322 }, { "epoch": 0.9342986816511779, "grad_norm": 1.0090069770812988, "learning_rate": 4.511300203173807e-07, "loss": 0.948, "step": 4323 }, { "epoch": 0.9345148044089042, "grad_norm": 0.9337702393531799, "learning_rate": 4.4817805781374177e-07, "loss": 0.808, "step": 4324 }, { "epoch": 0.9347309271666306, "grad_norm": 0.9156675934791565, "learning_rate": 4.4523567565765593e-07, "loss": 0.76, "step": 4325 }, { "epoch": 0.9349470499243571, "grad_norm": 1.023437261581421, "learning_rate": 4.4230287529088534e-07, "loss": 0.9403, "step": 4326 }, { "epoch": 0.9351631726820834, "grad_norm": 1.1438939571380615, "learning_rate": 4.393796581504961e-07, "loss": 0.8609, "step": 4327 }, { "epoch": 0.9353792954398098, "grad_norm": 0.9754688739776611, "learning_rate": 4.364660256688558e-07, "loss": 0.8773, "step": 4328 }, { "epoch": 0.9355954181975362, "grad_norm": 0.9838240146636963, "learning_rate": 4.3356197927363786e-07, "loss": 1.0625, "step": 4329 }, { "epoch": 0.9358115409552626, "grad_norm": 1.1422899961471558, "learning_rate": 4.306675203878219e-07, "loss": 0.8681, "step": 4330 }, { "epoch": 0.936027663712989, "grad_norm": 0.9374284744262695, "learning_rate": 4.2778265042968003e-07, "loss": 0.8327, "step": 4331 }, { "epoch": 0.9362437864707154, "grad_norm": 1.1640714406967163, "learning_rate": 4.2490737081279487e-07, "loss": 1.0628, "step": 4332 }, { "epoch": 0.9364599092284418, "grad_norm": 1.0389443635940552, "learning_rate": 4.220416829460505e-07, "loss": 1.0365, "step": 4333 }, { "epoch": 0.9366760319861681, "grad_norm": 0.9568149447441101, "learning_rate": 4.1918558823362155e-07, "loss": 0.8681, "step": 4334 }, { "epoch": 0.9368921547438945, "grad_norm": 0.9431428909301758, "learning_rate": 4.1633908807498847e-07, "loss": 0.9613, "step": 4335 }, { "epoch": 0.937108277501621, "grad_norm": 1.0188632011413574, "learning_rate": 4.1350218386493115e-07, "loss": 0.8431, "step": 4336 }, { "epoch": 0.9373244002593473, "grad_norm": 0.9922558665275574, "learning_rate": 4.106748769935287e-07, "loss": 0.8872, "step": 4337 }, { "epoch": 0.9375405230170737, "grad_norm": 0.856784999370575, "learning_rate": 4.07857168846153e-07, "loss": 0.9267, "step": 4338 }, { "epoch": 0.9377566457748001, "grad_norm": 0.8459962606430054, "learning_rate": 4.050490608034729e-07, "loss": 0.7388, "step": 4339 }, { "epoch": 0.9379727685325264, "grad_norm": 1.016809344291687, "learning_rate": 4.022505542414545e-07, "loss": 1.0432, "step": 4340 }, { "epoch": 0.9381888912902528, "grad_norm": 0.947068989276886, "learning_rate": 3.994616505313631e-07, "loss": 0.7964, "step": 4341 }, { "epoch": 0.9384050140479793, "grad_norm": 0.9337932467460632, "learning_rate": 3.966823510397522e-07, "loss": 0.8459, "step": 4342 }, { "epoch": 0.9386211368057057, "grad_norm": 1.0331088304519653, "learning_rate": 3.9391265712847236e-07, "loss": 0.8917, "step": 4343 }, { "epoch": 0.938837259563432, "grad_norm": 1.0333054065704346, "learning_rate": 3.9115257015466923e-07, "loss": 0.719, "step": 4344 }, { "epoch": 0.9390533823211584, "grad_norm": 1.0131577253341675, "learning_rate": 3.8840209147077866e-07, "loss": 0.7622, "step": 4345 }, { "epoch": 0.9392695050788848, "grad_norm": 0.9322049021720886, "learning_rate": 3.856612224245249e-07, "loss": 0.7806, "step": 4346 }, { "epoch": 0.9394856278366112, "grad_norm": 0.9983024001121521, "learning_rate": 3.8292996435893125e-07, "loss": 0.8483, "step": 4347 }, { "epoch": 0.9397017505943376, "grad_norm": 0.9335387945175171, "learning_rate": 3.8020831861230733e-07, "loss": 0.7184, "step": 4348 }, { "epoch": 0.939917873352064, "grad_norm": 1.0600119829177856, "learning_rate": 3.77496286518253e-07, "loss": 1.0294, "step": 4349 }, { "epoch": 0.9401339961097903, "grad_norm": 0.9346319437026978, "learning_rate": 3.747938694056585e-07, "loss": 0.9171, "step": 4350 }, { "epoch": 0.9403501188675167, "grad_norm": 1.1037440299987793, "learning_rate": 3.721010685987003e-07, "loss": 0.925, "step": 4351 }, { "epoch": 0.9405662416252432, "grad_norm": 0.9268629550933838, "learning_rate": 3.6941788541684507e-07, "loss": 0.8545, "step": 4352 }, { "epoch": 0.9407823643829695, "grad_norm": 1.0709277391433716, "learning_rate": 3.667443211748456e-07, "loss": 0.9047, "step": 4353 }, { "epoch": 0.9409984871406959, "grad_norm": 0.9865515232086182, "learning_rate": 3.640803771827428e-07, "loss": 0.9506, "step": 4354 }, { "epoch": 0.9412146098984223, "grad_norm": 1.1726608276367188, "learning_rate": 3.614260547458659e-07, "loss": 0.9874, "step": 4355 }, { "epoch": 0.9414307326561487, "grad_norm": 0.9988002777099609, "learning_rate": 3.587813551648256e-07, "loss": 1.0136, "step": 4356 }, { "epoch": 0.941646855413875, "grad_norm": 0.9838830232620239, "learning_rate": 3.561462797355142e-07, "loss": 0.9081, "step": 4357 }, { "epoch": 0.9418629781716015, "grad_norm": 0.8589175343513489, "learning_rate": 3.535208297491144e-07, "loss": 0.6842, "step": 4358 }, { "epoch": 0.9420791009293279, "grad_norm": 1.0426251888275146, "learning_rate": 3.509050064920949e-07, "loss": 1.096, "step": 4359 }, { "epoch": 0.9422952236870542, "grad_norm": 0.873805820941925, "learning_rate": 3.4829881124619933e-07, "loss": 0.8171, "step": 4360 }, { "epoch": 0.9425113464447806, "grad_norm": 0.9295645356178284, "learning_rate": 3.4570224528845953e-07, "loss": 0.891, "step": 4361 }, { "epoch": 0.942727469202507, "grad_norm": 0.9706746935844421, "learning_rate": 3.4311530989118215e-07, "loss": 0.7913, "step": 4362 }, { "epoch": 0.9429435919602334, "grad_norm": 0.9419608116149902, "learning_rate": 3.4053800632196434e-07, "loss": 0.8183, "step": 4363 }, { "epoch": 0.9431597147179598, "grad_norm": 0.983264684677124, "learning_rate": 3.379703358436781e-07, "loss": 0.9577, "step": 4364 }, { "epoch": 0.9433758374756862, "grad_norm": 0.9642937183380127, "learning_rate": 3.3541229971447487e-07, "loss": 0.8284, "step": 4365 }, { "epoch": 0.9435919602334126, "grad_norm": 0.954237163066864, "learning_rate": 3.328638991877853e-07, "loss": 1.0989, "step": 4366 }, { "epoch": 0.9438080829911389, "grad_norm": 1.041129469871521, "learning_rate": 3.303251355123238e-07, "loss": 0.7665, "step": 4367 }, { "epoch": 0.9440242057488654, "grad_norm": 0.9410345554351807, "learning_rate": 3.277960099320732e-07, "loss": 0.831, "step": 4368 }, { "epoch": 0.9442403285065918, "grad_norm": 1.1055059432983398, "learning_rate": 3.2527652368630426e-07, "loss": 0.8237, "step": 4369 }, { "epoch": 0.9444564512643181, "grad_norm": 0.9659721851348877, "learning_rate": 3.227666780095584e-07, "loss": 0.9486, "step": 4370 }, { "epoch": 0.9446725740220445, "grad_norm": 1.0501004457473755, "learning_rate": 3.202664741316519e-07, "loss": 0.922, "step": 4371 }, { "epoch": 0.9448886967797709, "grad_norm": 1.0579583644866943, "learning_rate": 3.177759132776781e-07, "loss": 0.9856, "step": 4372 }, { "epoch": 0.9451048195374973, "grad_norm": 0.9522593021392822, "learning_rate": 3.1529499666800965e-07, "loss": 0.8259, "step": 4373 }, { "epoch": 0.9453209422952237, "grad_norm": 0.997139036655426, "learning_rate": 3.1282372551828975e-07, "loss": 0.8049, "step": 4374 }, { "epoch": 0.9455370650529501, "grad_norm": 1.0258209705352783, "learning_rate": 3.10362101039432e-07, "loss": 0.8842, "step": 4375 }, { "epoch": 0.9457531878106764, "grad_norm": 1.07490873336792, "learning_rate": 3.0791012443762924e-07, "loss": 0.9115, "step": 4376 }, { "epoch": 0.9459693105684028, "grad_norm": 1.0216132402420044, "learning_rate": 3.054677969143449e-07, "loss": 0.8965, "step": 4377 }, { "epoch": 0.9461854333261293, "grad_norm": 0.8703557252883911, "learning_rate": 3.030351196663128e-07, "loss": 0.7417, "step": 4378 }, { "epoch": 0.9464015560838557, "grad_norm": 0.983020544052124, "learning_rate": 3.0061209388553945e-07, "loss": 0.9063, "step": 4379 }, { "epoch": 0.946617678841582, "grad_norm": 0.9806068539619446, "learning_rate": 2.981987207592996e-07, "loss": 0.8552, "step": 4380 }, { "epoch": 0.9468338015993084, "grad_norm": 0.9600115418434143, "learning_rate": 2.9579500147014496e-07, "loss": 0.8382, "step": 4381 }, { "epoch": 0.9470499243570348, "grad_norm": 0.8612217307090759, "learning_rate": 2.93400937195889e-07, "loss": 0.8051, "step": 4382 }, { "epoch": 0.9472660471147611, "grad_norm": 1.0909043550491333, "learning_rate": 2.9101652910961785e-07, "loss": 0.847, "step": 4383 }, { "epoch": 0.9474821698724876, "grad_norm": 0.9872725605964661, "learning_rate": 2.88641778379688e-07, "loss": 0.8161, "step": 4384 }, { "epoch": 0.947698292630214, "grad_norm": 0.8272393941879272, "learning_rate": 2.8627668616972194e-07, "loss": 0.865, "step": 4385 }, { "epoch": 0.9479144153879403, "grad_norm": 1.1792147159576416, "learning_rate": 2.83921253638606e-07, "loss": 0.978, "step": 4386 }, { "epoch": 0.9481305381456667, "grad_norm": 1.021734595298767, "learning_rate": 2.815754819404992e-07, "loss": 0.9533, "step": 4387 }, { "epoch": 0.9483466609033931, "grad_norm": 0.8528704643249512, "learning_rate": 2.7923937222482436e-07, "loss": 0.8307, "step": 4388 }, { "epoch": 0.9485627836611196, "grad_norm": 0.9459760189056396, "learning_rate": 2.7691292563627016e-07, "loss": 0.8365, "step": 4389 }, { "epoch": 0.9487789064188459, "grad_norm": 0.9810954332351685, "learning_rate": 2.74596143314787e-07, "loss": 0.8873, "step": 4390 }, { "epoch": 0.9489950291765723, "grad_norm": 0.9510730504989624, "learning_rate": 2.7228902639559575e-07, "loss": 0.7129, "step": 4391 }, { "epoch": 0.9492111519342987, "grad_norm": 1.1085160970687866, "learning_rate": 2.699915760091787e-07, "loss": 0.8964, "step": 4392 }, { "epoch": 0.949427274692025, "grad_norm": 1.085143804550171, "learning_rate": 2.6770379328127983e-07, "loss": 0.7079, "step": 4393 }, { "epoch": 0.9496433974497515, "grad_norm": 0.9611392617225647, "learning_rate": 2.654256793329069e-07, "loss": 0.9965, "step": 4394 }, { "epoch": 0.9498595202074779, "grad_norm": 1.0482733249664307, "learning_rate": 2.6315723528033133e-07, "loss": 1.0366, "step": 4395 }, { "epoch": 0.9500756429652042, "grad_norm": 0.9436819553375244, "learning_rate": 2.6089846223508853e-07, "loss": 1.0193, "step": 4396 }, { "epoch": 0.9502917657229306, "grad_norm": 1.0251249074935913, "learning_rate": 2.5864936130396647e-07, "loss": 0.8849, "step": 4397 }, { "epoch": 0.950507888480657, "grad_norm": 1.0731300115585327, "learning_rate": 2.564099335890191e-07, "loss": 0.9058, "step": 4398 }, { "epoch": 0.9507240112383833, "grad_norm": 1.1696685552597046, "learning_rate": 2.5418018018756876e-07, "loss": 0.8267, "step": 4399 }, { "epoch": 0.9509401339961098, "grad_norm": 1.0380321741104126, "learning_rate": 2.519601021921814e-07, "loss": 0.9227, "step": 4400 }, { "epoch": 0.9511562567538362, "grad_norm": 1.0092154741287231, "learning_rate": 2.497497006906957e-07, "loss": 0.9168, "step": 4401 }, { "epoch": 0.9513723795115626, "grad_norm": 1.201551079750061, "learning_rate": 2.4754897676619647e-07, "loss": 0.8225, "step": 4402 }, { "epoch": 0.9515885022692889, "grad_norm": 0.9235658645629883, "learning_rate": 2.4535793149704114e-07, "loss": 0.8592, "step": 4403 }, { "epoch": 0.9518046250270153, "grad_norm": 1.0101706981658936, "learning_rate": 2.4317656595683305e-07, "loss": 0.9089, "step": 4404 }, { "epoch": 0.9520207477847418, "grad_norm": 0.9426020979881287, "learning_rate": 2.41004881214435e-07, "loss": 0.8982, "step": 4405 }, { "epoch": 0.9522368705424681, "grad_norm": 0.9659827947616577, "learning_rate": 2.3884287833396915e-07, "loss": 0.8039, "step": 4406 }, { "epoch": 0.9524529933001945, "grad_norm": 0.9621565937995911, "learning_rate": 2.3669055837481247e-07, "loss": 0.9724, "step": 4407 }, { "epoch": 0.9526691160579209, "grad_norm": 0.9790732860565186, "learning_rate": 2.3454792239159474e-07, "loss": 0.8141, "step": 4408 }, { "epoch": 0.9528852388156472, "grad_norm": 0.9651133418083191, "learning_rate": 2.32414971434205e-07, "loss": 0.8466, "step": 4409 }, { "epoch": 0.9531013615733737, "grad_norm": 0.9662875533103943, "learning_rate": 2.3029170654778277e-07, "loss": 1.0337, "step": 4410 }, { "epoch": 0.9533174843311001, "grad_norm": 1.0735738277435303, "learning_rate": 2.2817812877272471e-07, "loss": 0.9644, "step": 4411 }, { "epoch": 0.9535336070888265, "grad_norm": 1.1095219850540161, "learning_rate": 2.2607423914467575e-07, "loss": 0.7762, "step": 4412 }, { "epoch": 0.9537497298465528, "grad_norm": 1.1492083072662354, "learning_rate": 2.239800386945401e-07, "loss": 1.0877, "step": 4413 }, { "epoch": 0.9539658526042792, "grad_norm": 1.0608298778533936, "learning_rate": 2.2189552844847027e-07, "loss": 0.8082, "step": 4414 }, { "epoch": 0.9541819753620057, "grad_norm": 1.0046215057373047, "learning_rate": 2.1982070942786927e-07, "loss": 1.0174, "step": 4415 }, { "epoch": 0.954398098119732, "grad_norm": 1.0800167322158813, "learning_rate": 2.1775558264939488e-07, "loss": 0.997, "step": 4416 }, { "epoch": 0.9546142208774584, "grad_norm": 1.068735122680664, "learning_rate": 2.1570014912495773e-07, "loss": 0.902, "step": 4417 }, { "epoch": 0.9548303436351848, "grad_norm": 0.9986612796783447, "learning_rate": 2.1365440986171215e-07, "loss": 0.9912, "step": 4418 }, { "epoch": 0.9550464663929111, "grad_norm": 1.009142518043518, "learning_rate": 2.1161836586206742e-07, "loss": 0.7025, "step": 4419 }, { "epoch": 0.9552625891506376, "grad_norm": 0.9367358088493347, "learning_rate": 2.0959201812367658e-07, "loss": 0.8448, "step": 4420 }, { "epoch": 0.955478711908364, "grad_norm": 0.9901167154312134, "learning_rate": 2.0757536763944985e-07, "loss": 0.9174, "step": 4421 }, { "epoch": 0.9556948346660903, "grad_norm": 1.069323182106018, "learning_rate": 2.0556841539753903e-07, "loss": 0.8929, "step": 4422 }, { "epoch": 0.9559109574238167, "grad_norm": 0.8847269415855408, "learning_rate": 2.0357116238134633e-07, "loss": 0.8808, "step": 4423 }, { "epoch": 0.9561270801815431, "grad_norm": 0.9327659606933594, "learning_rate": 2.0158360956952004e-07, "loss": 0.959, "step": 4424 }, { "epoch": 0.9563432029392696, "grad_norm": 1.008876085281372, "learning_rate": 1.9960575793595893e-07, "loss": 0.8631, "step": 4425 }, { "epoch": 0.9565593256969959, "grad_norm": 0.979472815990448, "learning_rate": 1.976376084498055e-07, "loss": 0.7011, "step": 4426 }, { "epoch": 0.9567754484547223, "grad_norm": 0.9732769727706909, "learning_rate": 1.9567916207544612e-07, "loss": 0.7911, "step": 4427 }, { "epoch": 0.9569915712124487, "grad_norm": 1.0720874071121216, "learning_rate": 1.9373041977251762e-07, "loss": 0.8465, "step": 4428 }, { "epoch": 0.957207693970175, "grad_norm": 1.0365686416625977, "learning_rate": 1.9179138249589836e-07, "loss": 1.0038, "step": 4429 }, { "epoch": 0.9574238167279014, "grad_norm": 1.026694655418396, "learning_rate": 1.8986205119571055e-07, "loss": 0.7158, "step": 4430 }, { "epoch": 0.9576399394856279, "grad_norm": 1.1193565130233765, "learning_rate": 1.8794242681732243e-07, "loss": 0.9759, "step": 4431 }, { "epoch": 0.9578560622433542, "grad_norm": 0.900276243686676, "learning_rate": 1.8603251030134606e-07, "loss": 0.7723, "step": 4432 }, { "epoch": 0.9580721850010806, "grad_norm": 0.9539914727210999, "learning_rate": 1.8413230258363946e-07, "loss": 0.8152, "step": 4433 }, { "epoch": 0.958288307758807, "grad_norm": 0.9081903696060181, "learning_rate": 1.8224180459529338e-07, "loss": 0.8354, "step": 4434 }, { "epoch": 0.9585044305165334, "grad_norm": 0.9369721412658691, "learning_rate": 1.8036101726265133e-07, "loss": 0.7079, "step": 4435 }, { "epoch": 0.9587205532742598, "grad_norm": 1.1081403493881226, "learning_rate": 1.7848994150729825e-07, "loss": 0.8549, "step": 4436 }, { "epoch": 0.9589366760319862, "grad_norm": 0.9961804747581482, "learning_rate": 1.7662857824604972e-07, "loss": 0.897, "step": 4437 }, { "epoch": 0.9591527987897126, "grad_norm": 0.8851146697998047, "learning_rate": 1.747769283909717e-07, "loss": 0.8173, "step": 4438 }, { "epoch": 0.9593689215474389, "grad_norm": 1.0285462141036987, "learning_rate": 1.7293499284937177e-07, "loss": 0.8903, "step": 4439 }, { "epoch": 0.9595850443051653, "grad_norm": 1.2063196897506714, "learning_rate": 1.7110277252379238e-07, "loss": 0.8745, "step": 4440 }, { "epoch": 0.9598011670628918, "grad_norm": 0.9206846952438354, "learning_rate": 1.692802683120154e-07, "loss": 0.9445, "step": 4441 }, { "epoch": 0.9600172898206181, "grad_norm": 1.0168124437332153, "learning_rate": 1.6746748110706422e-07, "loss": 0.672, "step": 4442 }, { "epoch": 0.9602334125783445, "grad_norm": 1.0631967782974243, "learning_rate": 1.656644117972017e-07, "loss": 0.8407, "step": 4443 }, { "epoch": 0.9604495353360709, "grad_norm": 1.1555129289627075, "learning_rate": 1.6387106126592778e-07, "loss": 0.8799, "step": 4444 }, { "epoch": 0.9606656580937972, "grad_norm": 0.9522283673286438, "learning_rate": 1.620874303919795e-07, "loss": 0.6942, "step": 4445 }, { "epoch": 0.9608817808515236, "grad_norm": 1.115195870399475, "learning_rate": 1.603135200493311e-07, "loss": 0.8846, "step": 4446 }, { "epoch": 0.9610979036092501, "grad_norm": 0.9391319155693054, "learning_rate": 1.5854933110719616e-07, "loss": 0.9165, "step": 4447 }, { "epoch": 0.9613140263669765, "grad_norm": 0.9205000996589661, "learning_rate": 1.56794864430021e-07, "loss": 0.9234, "step": 4448 }, { "epoch": 0.9615301491247028, "grad_norm": 1.1516467332839966, "learning_rate": 1.5505012087749126e-07, "loss": 1.0175, "step": 4449 }, { "epoch": 0.9617462718824292, "grad_norm": 1.1859296560287476, "learning_rate": 1.5331510130452752e-07, "loss": 1.2111, "step": 4450 }, { "epoch": 0.9619623946401556, "grad_norm": 1.063227653503418, "learning_rate": 1.515898065612853e-07, "loss": 0.9176, "step": 4451 }, { "epoch": 0.962178517397882, "grad_norm": 0.9186780452728271, "learning_rate": 1.49874237493155e-07, "loss": 0.759, "step": 4452 }, { "epoch": 0.9623946401556084, "grad_norm": 0.9617912769317627, "learning_rate": 1.4816839494076197e-07, "loss": 0.785, "step": 4453 }, { "epoch": 0.9626107629133348, "grad_norm": 1.1196167469024658, "learning_rate": 1.4647227973996425e-07, "loss": 0.8965, "step": 4454 }, { "epoch": 0.9628268856710611, "grad_norm": 0.9806433916091919, "learning_rate": 1.4478589272185483e-07, "loss": 0.8639, "step": 4455 }, { "epoch": 0.9630430084287875, "grad_norm": 0.9332010746002197, "learning_rate": 1.4310923471275717e-07, "loss": 0.9386, "step": 4456 }, { "epoch": 0.963259131186514, "grad_norm": 1.1104164123535156, "learning_rate": 1.4144230653423408e-07, "loss": 0.903, "step": 4457 }, { "epoch": 0.9634752539442403, "grad_norm": 0.959007203578949, "learning_rate": 1.3978510900307441e-07, "loss": 1.0107, "step": 4458 }, { "epoch": 0.9636913767019667, "grad_norm": 1.015206217765808, "learning_rate": 1.3813764293130194e-07, "loss": 0.8919, "step": 4459 }, { "epoch": 0.9639074994596931, "grad_norm": 0.9340243935585022, "learning_rate": 1.3649990912616873e-07, "loss": 0.7884, "step": 4460 }, { "epoch": 0.9641236222174195, "grad_norm": 0.8546795845031738, "learning_rate": 1.3487190839016394e-07, "loss": 0.7857, "step": 4461 }, { "epoch": 0.9643397449751459, "grad_norm": 1.1241455078125, "learning_rate": 1.3325364152100063e-07, "loss": 0.9149, "step": 4462 }, { "epoch": 0.9645558677328723, "grad_norm": 1.0332034826278687, "learning_rate": 1.3164510931162888e-07, "loss": 0.9935, "step": 4463 }, { "epoch": 0.9647719904905987, "grad_norm": 0.8962178826332092, "learning_rate": 1.3004631255022492e-07, "loss": 0.8437, "step": 4464 }, { "epoch": 0.964988113248325, "grad_norm": 0.8388503193855286, "learning_rate": 1.2845725202019322e-07, "loss": 0.7117, "step": 4465 }, { "epoch": 0.9652042360060514, "grad_norm": 1.0348695516586304, "learning_rate": 1.268779285001731e-07, "loss": 0.9797, "step": 4466 }, { "epoch": 0.9654203587637779, "grad_norm": 0.9365739822387695, "learning_rate": 1.2530834276402782e-07, "loss": 0.8525, "step": 4467 }, { "epoch": 0.9656364815215042, "grad_norm": 0.996942937374115, "learning_rate": 1.23748495580851e-07, "loss": 0.997, "step": 4468 }, { "epoch": 0.9658526042792306, "grad_norm": 1.0092296600341797, "learning_rate": 1.2219838771496462e-07, "loss": 0.8307, "step": 4469 }, { "epoch": 0.966068727036957, "grad_norm": 1.114109992980957, "learning_rate": 1.2065801992591663e-07, "loss": 0.9976, "step": 4470 }, { "epoch": 0.9662848497946834, "grad_norm": 1.0005512237548828, "learning_rate": 1.1912739296848552e-07, "loss": 0.99, "step": 4471 }, { "epoch": 0.9665009725524097, "grad_norm": 0.9434089064598083, "learning_rate": 1.1760650759267356e-07, "loss": 0.797, "step": 4472 }, { "epoch": 0.9667170953101362, "grad_norm": 0.9649157524108887, "learning_rate": 1.1609536454371129e-07, "loss": 0.97, "step": 4473 }, { "epoch": 0.9669332180678626, "grad_norm": 0.8413819670677185, "learning_rate": 1.1459396456205307e-07, "loss": 0.8336, "step": 4474 }, { "epoch": 0.9671493408255889, "grad_norm": 1.0013097524642944, "learning_rate": 1.1310230838338598e-07, "loss": 0.9761, "step": 4475 }, { "epoch": 0.9673654635833153, "grad_norm": 0.9233847260475159, "learning_rate": 1.1162039673861646e-07, "loss": 0.7964, "step": 4476 }, { "epoch": 0.9675815863410417, "grad_norm": 1.0927408933639526, "learning_rate": 1.10148230353877e-07, "loss": 0.9503, "step": 4477 }, { "epoch": 0.967797709098768, "grad_norm": 1.0073485374450684, "learning_rate": 1.0868580995052392e-07, "loss": 0.8874, "step": 4478 }, { "epoch": 0.9680138318564945, "grad_norm": 0.8882655501365662, "learning_rate": 1.0723313624514398e-07, "loss": 0.8, "step": 4479 }, { "epoch": 0.9682299546142209, "grad_norm": 1.0216253995895386, "learning_rate": 1.0579020994954114e-07, "loss": 0.956, "step": 4480 }, { "epoch": 0.9684460773719472, "grad_norm": 1.0553555488586426, "learning_rate": 1.0435703177074763e-07, "loss": 0.797, "step": 4481 }, { "epoch": 0.9686622001296736, "grad_norm": 1.1059043407440186, "learning_rate": 1.0293360241101502e-07, "loss": 0.955, "step": 4482 }, { "epoch": 0.9688783228874001, "grad_norm": 0.9645715951919556, "learning_rate": 1.0151992256782317e-07, "loss": 0.8712, "step": 4483 }, { "epoch": 0.9690944456451265, "grad_norm": 0.9705997109413147, "learning_rate": 1.0011599293386909e-07, "loss": 0.7353, "step": 4484 }, { "epoch": 0.9693105684028528, "grad_norm": 0.9964491128921509, "learning_rate": 9.872181419707805e-08, "loss": 0.8744, "step": 4485 }, { "epoch": 0.9695266911605792, "grad_norm": 0.9052468538284302, "learning_rate": 9.733738704059247e-08, "loss": 0.8135, "step": 4486 }, { "epoch": 0.9697428139183056, "grad_norm": 1.006423830986023, "learning_rate": 9.596271214277864e-08, "loss": 0.9931, "step": 4487 }, { "epoch": 0.969958936676032, "grad_norm": 1.1249778270721436, "learning_rate": 9.459779017722436e-08, "loss": 1.0073, "step": 4488 }, { "epoch": 0.9701750594337584, "grad_norm": 0.9946759343147278, "learning_rate": 9.324262181273691e-08, "loss": 0.9562, "step": 4489 }, { "epoch": 0.9703911821914848, "grad_norm": 0.898918867111206, "learning_rate": 9.189720771334954e-08, "loss": 0.9048, "step": 4490 }, { "epoch": 0.9706073049492111, "grad_norm": 0.940396249294281, "learning_rate": 9.056154853830823e-08, "loss": 0.9321, "step": 4491 }, { "epoch": 0.9708234277069375, "grad_norm": 0.8641870021820068, "learning_rate": 8.923564494208281e-08, "loss": 0.8102, "step": 4492 }, { "epoch": 0.971039550464664, "grad_norm": 0.9652795791625977, "learning_rate": 8.791949757436691e-08, "loss": 0.6587, "step": 4493 }, { "epoch": 0.9712556732223904, "grad_norm": 0.9301003217697144, "learning_rate": 8.661310708006688e-08, "loss": 0.9088, "step": 4494 }, { "epoch": 0.9714717959801167, "grad_norm": 1.0240460634231567, "learning_rate": 8.531647409931065e-08, "loss": 0.892, "step": 4495 }, { "epoch": 0.9716879187378431, "grad_norm": 1.031960129737854, "learning_rate": 8.402959926744337e-08, "loss": 0.8466, "step": 4496 }, { "epoch": 0.9719040414955695, "grad_norm": 0.915630578994751, "learning_rate": 8.275248321503615e-08, "loss": 0.9407, "step": 4497 }, { "epoch": 0.9721201642532958, "grad_norm": 0.9745843410491943, "learning_rate": 8.148512656787066e-08, "loss": 0.8825, "step": 4498 }, { "epoch": 0.9723362870110223, "grad_norm": 1.051472783088684, "learning_rate": 8.022752994694793e-08, "loss": 0.7516, "step": 4499 }, { "epoch": 0.9725524097687487, "grad_norm": 1.0768318176269531, "learning_rate": 7.897969396848615e-08, "loss": 0.93, "step": 4500 }, { "epoch": 0.972768532526475, "grad_norm": 1.0499401092529297, "learning_rate": 7.77416192439251e-08, "loss": 0.9373, "step": 4501 }, { "epoch": 0.9729846552842014, "grad_norm": 0.964851975440979, "learning_rate": 7.651330637991506e-08, "loss": 0.7312, "step": 4502 }, { "epoch": 0.9732007780419278, "grad_norm": 0.9900359511375427, "learning_rate": 7.529475597833013e-08, "loss": 0.8702, "step": 4503 }, { "epoch": 0.9734169007996542, "grad_norm": 0.9830164313316345, "learning_rate": 7.408596863625717e-08, "loss": 0.9838, "step": 4504 }, { "epoch": 0.9736330235573806, "grad_norm": 1.0824828147888184, "learning_rate": 7.288694494599347e-08, "loss": 1.0791, "step": 4505 }, { "epoch": 0.973849146315107, "grad_norm": 1.0393846035003662, "learning_rate": 7.169768549506461e-08, "loss": 0.847, "step": 4506 }, { "epoch": 0.9740652690728334, "grad_norm": 1.1033649444580078, "learning_rate": 7.051819086620004e-08, "loss": 0.9154, "step": 4507 }, { "epoch": 0.9742813918305597, "grad_norm": 0.9521198868751526, "learning_rate": 6.934846163735298e-08, "loss": 0.8702, "step": 4508 }, { "epoch": 0.9744975145882862, "grad_norm": 1.0471328496932983, "learning_rate": 6.818849838168718e-08, "loss": 0.7826, "step": 4509 }, { "epoch": 0.9747136373460126, "grad_norm": 0.9011777639389038, "learning_rate": 6.703830166758129e-08, "loss": 0.8816, "step": 4510 }, { "epoch": 0.9749297601037389, "grad_norm": 0.955214262008667, "learning_rate": 6.589787205862896e-08, "loss": 1.0, "step": 4511 }, { "epoch": 0.9751458828614653, "grad_norm": 0.938822865486145, "learning_rate": 6.476721011363873e-08, "loss": 0.9094, "step": 4512 }, { "epoch": 0.9753620056191917, "grad_norm": 1.0690059661865234, "learning_rate": 6.364631638663188e-08, "loss": 0.9433, "step": 4513 }, { "epoch": 0.975578128376918, "grad_norm": 0.9920691847801208, "learning_rate": 6.253519142684239e-08, "loss": 0.9111, "step": 4514 }, { "epoch": 0.9757942511346445, "grad_norm": 0.9803416132926941, "learning_rate": 6.143383577872142e-08, "loss": 0.8739, "step": 4515 }, { "epoch": 0.9760103738923709, "grad_norm": 0.9745467305183411, "learning_rate": 6.034224998193061e-08, "loss": 0.8991, "step": 4516 }, { "epoch": 0.9762264966500973, "grad_norm": 0.9180235266685486, "learning_rate": 5.926043457134212e-08, "loss": 0.8453, "step": 4517 }, { "epoch": 0.9764426194078236, "grad_norm": 0.8540080189704895, "learning_rate": 5.818839007704524e-08, "loss": 0.9471, "step": 4518 }, { "epoch": 0.97665874216555, "grad_norm": 0.9433836340904236, "learning_rate": 5.712611702433757e-08, "loss": 0.8529, "step": 4519 }, { "epoch": 0.9768748649232765, "grad_norm": 1.2008123397827148, "learning_rate": 5.6073615933731616e-08, "loss": 1.0432, "step": 4520 }, { "epoch": 0.9770909876810028, "grad_norm": 1.162864327430725, "learning_rate": 5.503088732095041e-08, "loss": 1.0488, "step": 4521 }, { "epoch": 0.9773071104387292, "grad_norm": 0.9588613510131836, "learning_rate": 5.399793169692968e-08, "loss": 0.8168, "step": 4522 }, { "epoch": 0.9775232331964556, "grad_norm": 1.075270652770996, "learning_rate": 5.2974749567811235e-08, "loss": 0.9058, "step": 4523 }, { "epoch": 0.9777393559541819, "grad_norm": 1.1643694639205933, "learning_rate": 5.1961341434956233e-08, "loss": 1.0175, "step": 4524 }, { "epoch": 0.9779554787119084, "grad_norm": 1.2145328521728516, "learning_rate": 5.09577077949297e-08, "loss": 0.9433, "step": 4525 }, { "epoch": 0.9781716014696348, "grad_norm": 1.0082900524139404, "learning_rate": 4.996384913951158e-08, "loss": 0.859, "step": 4526 }, { "epoch": 0.9783877242273611, "grad_norm": 0.8812754154205322, "learning_rate": 4.897976595568787e-08, "loss": 1.0174, "step": 4527 }, { "epoch": 0.9786038469850875, "grad_norm": 0.8338526487350464, "learning_rate": 4.800545872566176e-08, "loss": 0.9185, "step": 4528 }, { "epoch": 0.9788199697428139, "grad_norm": 0.9946125745773315, "learning_rate": 4.704092792683579e-08, "loss": 0.8096, "step": 4529 }, { "epoch": 0.9790360925005404, "grad_norm": 1.0745145082473755, "learning_rate": 4.608617403183191e-08, "loss": 0.7382, "step": 4530 }, { "epoch": 0.9792522152582667, "grad_norm": 1.0827641487121582, "learning_rate": 4.51411975084759e-08, "loss": 0.8046, "step": 4531 }, { "epoch": 0.9794683380159931, "grad_norm": 0.9236524701118469, "learning_rate": 4.420599881980403e-08, "loss": 0.9964, "step": 4532 }, { "epoch": 0.9796844607737195, "grad_norm": 0.957859218120575, "learning_rate": 4.328057842406086e-08, "loss": 0.9137, "step": 4533 }, { "epoch": 0.9799005835314458, "grad_norm": 0.9804733991622925, "learning_rate": 4.236493677470144e-08, "loss": 0.8111, "step": 4534 }, { "epoch": 0.9801167062891722, "grad_norm": 0.9219115972518921, "learning_rate": 4.145907432038909e-08, "loss": 0.7578, "step": 4535 }, { "epoch": 0.9803328290468987, "grad_norm": 0.9382902383804321, "learning_rate": 4.056299150499099e-08, "loss": 0.9013, "step": 4536 }, { "epoch": 0.980548951804625, "grad_norm": 1.0483931303024292, "learning_rate": 3.967668876758701e-08, "loss": 1.0505, "step": 4537 }, { "epoch": 0.9807650745623514, "grad_norm": 0.9215896725654602, "learning_rate": 3.880016654246532e-08, "loss": 0.7008, "step": 4538 }, { "epoch": 0.9809811973200778, "grad_norm": 0.9627508521080017, "learning_rate": 3.793342525911792e-08, "loss": 0.9846, "step": 4539 }, { "epoch": 0.9811973200778042, "grad_norm": 1.0096752643585205, "learning_rate": 3.7076465342247295e-08, "loss": 1.0354, "step": 4540 }, { "epoch": 0.9814134428355306, "grad_norm": 0.8894267678260803, "learning_rate": 3.622928721175978e-08, "loss": 0.9714, "step": 4541 }, { "epoch": 0.981629565593257, "grad_norm": 1.1433912515640259, "learning_rate": 3.539189128277221e-08, "loss": 0.9653, "step": 4542 }, { "epoch": 0.9818456883509834, "grad_norm": 0.9559594392776489, "learning_rate": 3.4564277965607465e-08, "loss": 0.9179, "step": 4543 }, { "epoch": 0.9820618111087097, "grad_norm": 1.0474815368652344, "learning_rate": 3.374644766579227e-08, "loss": 0.9376, "step": 4544 }, { "epoch": 0.9822779338664361, "grad_norm": 0.8644964694976807, "learning_rate": 3.293840078406163e-08, "loss": 0.8446, "step": 4545 }, { "epoch": 0.9824940566241626, "grad_norm": 0.8857249021530151, "learning_rate": 3.214013771635882e-08, "loss": 0.7733, "step": 4546 }, { "epoch": 0.9827101793818889, "grad_norm": 1.0087493658065796, "learning_rate": 3.135165885382874e-08, "loss": 0.9894, "step": 4547 }, { "epoch": 0.9829263021396153, "grad_norm": 1.016663670539856, "learning_rate": 3.057296458282677e-08, "loss": 1.0015, "step": 4548 }, { "epoch": 0.9831424248973417, "grad_norm": 1.002447485923767, "learning_rate": 2.9804055284907705e-08, "loss": 0.9862, "step": 4549 }, { "epoch": 0.983358547655068, "grad_norm": 1.0246262550354004, "learning_rate": 2.9044931336836834e-08, "loss": 1.0012, "step": 4550 }, { "epoch": 0.9835746704127944, "grad_norm": 1.0020081996917725, "learning_rate": 2.8295593110583275e-08, "loss": 0.9286, "step": 4551 }, { "epoch": 0.9837907931705209, "grad_norm": 1.1388698816299438, "learning_rate": 2.7556040973322206e-08, "loss": 0.9182, "step": 4552 }, { "epoch": 0.9840069159282473, "grad_norm": 1.0084162950515747, "learning_rate": 2.6826275287430426e-08, "loss": 0.9106, "step": 4553 }, { "epoch": 0.9842230386859736, "grad_norm": 0.9532014727592468, "learning_rate": 2.6106296410493005e-08, "loss": 0.9627, "step": 4554 }, { "epoch": 0.9844391614437, "grad_norm": 0.9719927310943604, "learning_rate": 2.539610469529885e-08, "loss": 0.8488, "step": 4555 }, { "epoch": 0.9846552842014265, "grad_norm": 1.0258097648620605, "learning_rate": 2.4695700489836273e-08, "loss": 0.8759, "step": 4556 }, { "epoch": 0.9848714069591528, "grad_norm": 0.9381598234176636, "learning_rate": 2.400508413730629e-08, "loss": 0.8614, "step": 4557 }, { "epoch": 0.9850875297168792, "grad_norm": 1.0903583765029907, "learning_rate": 2.3324255976104883e-08, "loss": 0.9688, "step": 4558 }, { "epoch": 0.9853036524746056, "grad_norm": 1.0507482290267944, "learning_rate": 2.2653216339840746e-08, "loss": 0.9657, "step": 4559 }, { "epoch": 0.9855197752323319, "grad_norm": 1.034676194190979, "learning_rate": 2.1991965557317528e-08, "loss": 0.918, "step": 4560 }, { "epoch": 0.9857358979900583, "grad_norm": 0.9329960346221924, "learning_rate": 2.1340503952551606e-08, "loss": 0.8797, "step": 4561 }, { "epoch": 0.9859520207477848, "grad_norm": 1.1639207601547241, "learning_rate": 2.0698831844752077e-08, "loss": 0.9585, "step": 4562 }, { "epoch": 0.9861681435055112, "grad_norm": 1.0641400814056396, "learning_rate": 2.0066949548340765e-08, "loss": 0.8128, "step": 4563 }, { "epoch": 0.9863842662632375, "grad_norm": 1.0153011083602905, "learning_rate": 1.9444857372936666e-08, "loss": 0.9178, "step": 4564 }, { "epoch": 0.9866003890209639, "grad_norm": 0.8482317924499512, "learning_rate": 1.8832555623364836e-08, "loss": 0.6754, "step": 4565 }, { "epoch": 0.9868165117786903, "grad_norm": 0.8743635416030884, "learning_rate": 1.8230044599651942e-08, "loss": 0.8353, "step": 4566 }, { "epoch": 0.9870326345364167, "grad_norm": 0.9658658504486084, "learning_rate": 1.763732459702405e-08, "loss": 0.9213, "step": 4567 }, { "epoch": 0.9872487572941431, "grad_norm": 0.8978521823883057, "learning_rate": 1.705439590591551e-08, "loss": 0.8257, "step": 4568 }, { "epoch": 0.9874648800518695, "grad_norm": 1.0447510480880737, "learning_rate": 1.6481258811957836e-08, "loss": 1.0086, "step": 4569 }, { "epoch": 0.9876810028095958, "grad_norm": 1.058210015296936, "learning_rate": 1.59179135959886e-08, "loss": 1.0297, "step": 4570 }, { "epoch": 0.9878971255673222, "grad_norm": 1.0691936016082764, "learning_rate": 1.5364360534046997e-08, "loss": 1.0244, "step": 4571 }, { "epoch": 0.9881132483250487, "grad_norm": 1.0271050930023193, "learning_rate": 1.4820599897369393e-08, "loss": 0.9619, "step": 4572 }, { "epoch": 0.988329371082775, "grad_norm": 1.0300852060317993, "learning_rate": 1.4286631952398212e-08, "loss": 0.8862, "step": 4573 }, { "epoch": 0.9885454938405014, "grad_norm": 1.0740405321121216, "learning_rate": 1.3762456960777492e-08, "loss": 0.9585, "step": 4574 }, { "epoch": 0.9887616165982278, "grad_norm": 1.0555627346038818, "learning_rate": 1.3248075179352893e-08, "loss": 1.0198, "step": 4575 }, { "epoch": 0.9889777393559542, "grad_norm": 1.0466147661209106, "learning_rate": 1.2743486860165022e-08, "loss": 0.9095, "step": 4576 }, { "epoch": 0.9891938621136805, "grad_norm": 0.9894844889640808, "learning_rate": 1.224869225046721e-08, "loss": 0.8757, "step": 4577 }, { "epoch": 0.989409984871407, "grad_norm": 0.9214833974838257, "learning_rate": 1.1763691592705517e-08, "loss": 0.8196, "step": 4578 }, { "epoch": 0.9896261076291334, "grad_norm": 0.9439073801040649, "learning_rate": 1.1288485124529847e-08, "loss": 0.804, "step": 4579 }, { "epoch": 0.9898422303868597, "grad_norm": 1.0329656600952148, "learning_rate": 1.0823073078787271e-08, "loss": 0.8119, "step": 4580 }, { "epoch": 0.9900583531445861, "grad_norm": 1.309882402420044, "learning_rate": 1.0367455683530924e-08, "loss": 0.9939, "step": 4581 }, { "epoch": 0.9902744759023125, "grad_norm": 0.9725418090820312, "learning_rate": 9.921633162011113e-09, "loss": 0.8914, "step": 4582 }, { "epoch": 0.9904905986600389, "grad_norm": 0.8949552774429321, "learning_rate": 9.48560573268198e-09, "loss": 0.9486, "step": 4583 }, { "epoch": 0.9907067214177653, "grad_norm": 0.8219007253646851, "learning_rate": 9.059373609194844e-09, "loss": 0.6703, "step": 4584 }, { "epoch": 0.9909228441754917, "grad_norm": 0.9638029336929321, "learning_rate": 8.64293700040264e-09, "loss": 0.9538, "step": 4585 }, { "epoch": 0.991138966933218, "grad_norm": 0.9765580296516418, "learning_rate": 8.23629611035548e-09, "loss": 0.736, "step": 4586 }, { "epoch": 0.9913550896909444, "grad_norm": 1.000381350517273, "learning_rate": 7.839451138311748e-09, "loss": 0.8744, "step": 4587 }, { "epoch": 0.9915712124486709, "grad_norm": 0.983529269695282, "learning_rate": 7.45240227872035e-09, "loss": 0.9069, "step": 4588 }, { "epoch": 0.9917873352063973, "grad_norm": 1.205679178237915, "learning_rate": 7.075149721236241e-09, "loss": 0.9098, "step": 4589 }, { "epoch": 0.9920034579641236, "grad_norm": 1.0421596765518188, "learning_rate": 6.707693650711555e-09, "loss": 0.8997, "step": 4590 }, { "epoch": 0.99221958072185, "grad_norm": 0.8940469622612, "learning_rate": 6.350034247197823e-09, "loss": 0.9408, "step": 4591 }, { "epoch": 0.9924357034795764, "grad_norm": 1.1142728328704834, "learning_rate": 6.002171685950408e-09, "loss": 0.9996, "step": 4592 }, { "epoch": 0.9926518262373027, "grad_norm": 0.8785488605499268, "learning_rate": 5.664106137419634e-09, "loss": 0.9027, "step": 4593 }, { "epoch": 0.9928679489950292, "grad_norm": 0.8924729228019714, "learning_rate": 5.335837767255214e-09, "loss": 0.7117, "step": 4594 }, { "epoch": 0.9930840717527556, "grad_norm": 1.0885940790176392, "learning_rate": 5.017366736308482e-09, "loss": 0.9792, "step": 4595 }, { "epoch": 0.9933001945104819, "grad_norm": 0.9672080278396606, "learning_rate": 4.708693200632386e-09, "loss": 0.8026, "step": 4596 }, { "epoch": 0.9935163172682083, "grad_norm": 0.9822454452514648, "learning_rate": 4.409817311474829e-09, "loss": 0.9591, "step": 4597 }, { "epoch": 0.9937324400259347, "grad_norm": 0.9059059619903564, "learning_rate": 4.120739215280889e-09, "loss": 0.9964, "step": 4598 }, { "epoch": 0.9939485627836612, "grad_norm": 1.1094274520874023, "learning_rate": 3.841459053703922e-09, "loss": 0.8858, "step": 4599 }, { "epoch": 0.9941646855413875, "grad_norm": 1.0185402631759644, "learning_rate": 3.5719769635855773e-09, "loss": 0.8135, "step": 4600 } ], "logging_steps": 1.0, "max_steps": 4627, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 200, "total_flos": 1.5423137193824092e+21, "train_batch_size": 2, "trial_name": null, "trial_params": null }