{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9999977398116359, "eval_steps": 5000, "global_step": 55305, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00018081506912786112, "grad_norm": 40.21875, "learning_rate": 9.999971747693341e-06, "loss": 17.52, "step": 10 }, { "epoch": 0.00036163013825572224, "grad_norm": 40.4375, "learning_rate": 9.999943495386682e-06, "loss": 17.8709, "step": 20 }, { "epoch": 0.0005424452073835833, "grad_norm": 40.875, "learning_rate": 9.999915243080021e-06, "loss": 17.6916, "step": 30 }, { "epoch": 0.0007232602765114445, "grad_norm": 39.96875, "learning_rate": 9.999886990773362e-06, "loss": 17.9391, "step": 40 }, { "epoch": 0.0009040753456393055, "grad_norm": 39.09375, "learning_rate": 9.999858738466704e-06, "loss": 17.3349, "step": 50 }, { "epoch": 0.0010848904147671666, "grad_norm": 43.1875, "learning_rate": 9.999830486160043e-06, "loss": 17.7175, "step": 60 }, { "epoch": 0.0012657054838950278, "grad_norm": 38.1875, "learning_rate": 9.999802233853383e-06, "loss": 17.9345, "step": 70 }, { "epoch": 0.001446520553022889, "grad_norm": 41.875, "learning_rate": 9.999773981546724e-06, "loss": 17.5544, "step": 80 }, { "epoch": 0.0016273356221507501, "grad_norm": 41.03125, "learning_rate": 9.999745729240065e-06, "loss": 17.255, "step": 90 }, { "epoch": 0.001808150691278611, "grad_norm": 38.90625, "learning_rate": 9.999717476933405e-06, "loss": 17.2562, "step": 100 }, { "epoch": 0.0019889657604064723, "grad_norm": 38.71875, "learning_rate": 9.999689224626746e-06, "loss": 17.4383, "step": 110 }, { "epoch": 0.002169780829534333, "grad_norm": 41.75, "learning_rate": 9.999660972320085e-06, "loss": 17.9809, "step": 120 }, { "epoch": 0.0023505958986621946, "grad_norm": 38.84375, "learning_rate": 9.999632720013426e-06, "loss": 17.7502, "step": 130 }, { "epoch": 0.0025314109677900556, "grad_norm": 41.21875, "learning_rate": 9.999604467706768e-06, "loss": 17.4189, "step": 140 }, { "epoch": 0.002712226036917917, "grad_norm": 39.1875, "learning_rate": 9.999576215400107e-06, "loss": 17.8842, "step": 150 }, { "epoch": 0.002893041106045778, "grad_norm": 40.75, "learning_rate": 9.999547963093447e-06, "loss": 17.6147, "step": 160 }, { "epoch": 0.003073856175173639, "grad_norm": 39.1875, "learning_rate": 9.999519710786788e-06, "loss": 17.6027, "step": 170 }, { "epoch": 0.0032546712443015002, "grad_norm": 39.375, "learning_rate": 9.999491458480129e-06, "loss": 17.581, "step": 180 }, { "epoch": 0.003435486313429361, "grad_norm": 40.15625, "learning_rate": 9.99946320617347e-06, "loss": 17.5783, "step": 190 }, { "epoch": 0.003616301382557222, "grad_norm": 39.53125, "learning_rate": 9.999434953866808e-06, "loss": 17.3805, "step": 200 }, { "epoch": 0.0037971164516850836, "grad_norm": 39.53125, "learning_rate": 9.999406701560149e-06, "loss": 17.3353, "step": 210 }, { "epoch": 0.0039779315208129445, "grad_norm": 40.25, "learning_rate": 9.999378449253491e-06, "loss": 17.6241, "step": 220 }, { "epoch": 0.004158746589940806, "grad_norm": 39.03125, "learning_rate": 9.999350196946832e-06, "loss": 17.5579, "step": 230 }, { "epoch": 0.004339561659068666, "grad_norm": 42.71875, "learning_rate": 9.99932194464017e-06, "loss": 17.7354, "step": 240 }, { "epoch": 0.004520376728196528, "grad_norm": 40.78125, "learning_rate": 9.999293692333511e-06, "loss": 17.3126, "step": 250 }, { "epoch": 0.004701191797324389, "grad_norm": 38.1875, "learning_rate": 9.999265440026852e-06, "loss": 17.5456, "step": 260 }, { "epoch": 0.00488200686645225, "grad_norm": 42.21875, "learning_rate": 9.999237187720193e-06, "loss": 17.7592, "step": 270 }, { "epoch": 0.005062821935580111, "grad_norm": 41.8125, "learning_rate": 9.999208935413533e-06, "loss": 17.3461, "step": 280 }, { "epoch": 0.0052436370047079725, "grad_norm": 44.0, "learning_rate": 9.999180683106872e-06, "loss": 17.643, "step": 290 }, { "epoch": 0.005424452073835834, "grad_norm": 40.21875, "learning_rate": 9.999152430800213e-06, "loss": 17.3788, "step": 300 }, { "epoch": 0.005605267142963694, "grad_norm": 42.9375, "learning_rate": 9.999124178493555e-06, "loss": 17.5415, "step": 310 }, { "epoch": 0.005786082212091556, "grad_norm": 43.09375, "learning_rate": 9.999095926186894e-06, "loss": 17.9753, "step": 320 }, { "epoch": 0.005966897281219417, "grad_norm": 38.375, "learning_rate": 9.999067673880235e-06, "loss": 17.3605, "step": 330 }, { "epoch": 0.006147712350347278, "grad_norm": 39.875, "learning_rate": 9.999039421573575e-06, "loss": 17.8647, "step": 340 }, { "epoch": 0.006328527419475139, "grad_norm": 40.0, "learning_rate": 9.999011169266916e-06, "loss": 17.6431, "step": 350 }, { "epoch": 0.0065093424886030005, "grad_norm": 42.40625, "learning_rate": 9.998982916960256e-06, "loss": 17.4284, "step": 360 }, { "epoch": 0.006690157557730861, "grad_norm": 41.46875, "learning_rate": 9.998954664653595e-06, "loss": 17.6202, "step": 370 }, { "epoch": 0.006870972626858722, "grad_norm": 42.3125, "learning_rate": 9.998926412346936e-06, "loss": 17.5108, "step": 380 }, { "epoch": 0.007051787695986584, "grad_norm": 41.4375, "learning_rate": 9.998898160040277e-06, "loss": 17.2915, "step": 390 }, { "epoch": 0.007232602765114444, "grad_norm": 41.625, "learning_rate": 9.998869907733619e-06, "loss": 17.2113, "step": 400 }, { "epoch": 0.007413417834242306, "grad_norm": 39.375, "learning_rate": 9.998841655426958e-06, "loss": 17.551, "step": 410 }, { "epoch": 0.007594232903370167, "grad_norm": 39.09375, "learning_rate": 9.998813403120298e-06, "loss": 17.6326, "step": 420 }, { "epoch": 0.007775047972498028, "grad_norm": 39.65625, "learning_rate": 9.998785150813639e-06, "loss": 17.1406, "step": 430 }, { "epoch": 0.007955863041625889, "grad_norm": 39.03125, "learning_rate": 9.99875689850698e-06, "loss": 17.8196, "step": 440 }, { "epoch": 0.00813667811075375, "grad_norm": 39.4375, "learning_rate": 9.99872864620032e-06, "loss": 17.2403, "step": 450 }, { "epoch": 0.008317493179881612, "grad_norm": 40.375, "learning_rate": 9.99870039389366e-06, "loss": 17.9381, "step": 460 }, { "epoch": 0.008498308249009472, "grad_norm": 41.3125, "learning_rate": 9.998672141587e-06, "loss": 17.7649, "step": 470 }, { "epoch": 0.008679123318137333, "grad_norm": 38.625, "learning_rate": 9.99864388928034e-06, "loss": 17.6915, "step": 480 }, { "epoch": 0.008859938387265195, "grad_norm": 39.78125, "learning_rate": 9.998615636973681e-06, "loss": 17.434, "step": 490 }, { "epoch": 0.009040753456393056, "grad_norm": 42.09375, "learning_rate": 9.998587384667022e-06, "loss": 17.4497, "step": 500 }, { "epoch": 0.009221568525520916, "grad_norm": 39.0, "learning_rate": 9.998559132360362e-06, "loss": 17.5049, "step": 510 }, { "epoch": 0.009402383594648778, "grad_norm": 39.4375, "learning_rate": 9.998530880053703e-06, "loss": 17.8357, "step": 520 }, { "epoch": 0.009583198663776639, "grad_norm": 40.9375, "learning_rate": 9.998502627747044e-06, "loss": 17.334, "step": 530 }, { "epoch": 0.0097640137329045, "grad_norm": 39.9375, "learning_rate": 9.998474375440384e-06, "loss": 17.6756, "step": 540 }, { "epoch": 0.009944828802032362, "grad_norm": 40.15625, "learning_rate": 9.998446123133723e-06, "loss": 18.0823, "step": 550 }, { "epoch": 0.010125643871160222, "grad_norm": 43.5, "learning_rate": 9.998417870827064e-06, "loss": 17.8315, "step": 560 }, { "epoch": 0.010306458940288083, "grad_norm": 40.0625, "learning_rate": 9.998389618520406e-06, "loss": 17.7426, "step": 570 }, { "epoch": 0.010487274009415945, "grad_norm": 39.5, "learning_rate": 9.998361366213745e-06, "loss": 17.3527, "step": 580 }, { "epoch": 0.010668089078543806, "grad_norm": 38.9375, "learning_rate": 9.998333113907086e-06, "loss": 17.8237, "step": 590 }, { "epoch": 0.010848904147671668, "grad_norm": 39.46875, "learning_rate": 9.998304861600426e-06, "loss": 17.406, "step": 600 }, { "epoch": 0.011029719216799528, "grad_norm": 40.6875, "learning_rate": 9.998276609293767e-06, "loss": 17.7848, "step": 610 }, { "epoch": 0.011210534285927389, "grad_norm": 40.5, "learning_rate": 9.998248356987108e-06, "loss": 17.4076, "step": 620 }, { "epoch": 0.011391349355055251, "grad_norm": 42.5, "learning_rate": 9.998220104680446e-06, "loss": 17.6821, "step": 630 }, { "epoch": 0.011572164424183112, "grad_norm": 40.125, "learning_rate": 9.998191852373787e-06, "loss": 17.5884, "step": 640 }, { "epoch": 0.011752979493310972, "grad_norm": 37.4375, "learning_rate": 9.998163600067128e-06, "loss": 17.4682, "step": 650 }, { "epoch": 0.011933794562438834, "grad_norm": 39.96875, "learning_rate": 9.99813534776047e-06, "loss": 17.4463, "step": 660 }, { "epoch": 0.012114609631566695, "grad_norm": 41.09375, "learning_rate": 9.998107095453809e-06, "loss": 17.7127, "step": 670 }, { "epoch": 0.012295424700694555, "grad_norm": 41.34375, "learning_rate": 9.99807884314715e-06, "loss": 17.7266, "step": 680 }, { "epoch": 0.012476239769822418, "grad_norm": 39.75, "learning_rate": 9.99805059084049e-06, "loss": 17.7013, "step": 690 }, { "epoch": 0.012657054838950278, "grad_norm": 37.25, "learning_rate": 9.99802233853383e-06, "loss": 17.6106, "step": 700 }, { "epoch": 0.012837869908078139, "grad_norm": 41.125, "learning_rate": 9.997994086227171e-06, "loss": 17.6656, "step": 710 }, { "epoch": 0.013018684977206001, "grad_norm": 43.5, "learning_rate": 9.99796583392051e-06, "loss": 17.5118, "step": 720 }, { "epoch": 0.013199500046333862, "grad_norm": 40.21875, "learning_rate": 9.997937581613851e-06, "loss": 17.6598, "step": 730 }, { "epoch": 0.013380315115461722, "grad_norm": 41.9375, "learning_rate": 9.997909329307192e-06, "loss": 17.5925, "step": 740 }, { "epoch": 0.013561130184589584, "grad_norm": 38.28125, "learning_rate": 9.997881077000532e-06, "loss": 17.5799, "step": 750 }, { "epoch": 0.013741945253717445, "grad_norm": 38.5, "learning_rate": 9.997852824693873e-06, "loss": 17.5959, "step": 760 }, { "epoch": 0.013922760322845305, "grad_norm": 38.21875, "learning_rate": 9.997824572387213e-06, "loss": 17.2816, "step": 770 }, { "epoch": 0.014103575391973168, "grad_norm": 39.53125, "learning_rate": 9.997796320080554e-06, "loss": 17.6993, "step": 780 }, { "epoch": 0.014284390461101028, "grad_norm": 40.71875, "learning_rate": 9.997768067773895e-06, "loss": 17.8703, "step": 790 }, { "epoch": 0.014465205530228889, "grad_norm": 40.8125, "learning_rate": 9.997739815467234e-06, "loss": 17.6, "step": 800 }, { "epoch": 0.014646020599356751, "grad_norm": 41.25, "learning_rate": 9.997711563160574e-06, "loss": 17.4995, "step": 810 }, { "epoch": 0.014826835668484611, "grad_norm": 37.78125, "learning_rate": 9.997683310853915e-06, "loss": 17.3134, "step": 820 }, { "epoch": 0.015007650737612472, "grad_norm": 40.28125, "learning_rate": 9.997655058547256e-06, "loss": 17.6378, "step": 830 }, { "epoch": 0.015188465806740334, "grad_norm": 41.625, "learning_rate": 9.997626806240596e-06, "loss": 18.0105, "step": 840 }, { "epoch": 0.015369280875868195, "grad_norm": 40.625, "learning_rate": 9.997598553933937e-06, "loss": 17.5278, "step": 850 }, { "epoch": 0.015550095944996055, "grad_norm": 43.21875, "learning_rate": 9.997570301627277e-06, "loss": 17.677, "step": 860 }, { "epoch": 0.015730911014123918, "grad_norm": 40.5, "learning_rate": 9.997542049320618e-06, "loss": 17.4019, "step": 870 }, { "epoch": 0.015911726083251778, "grad_norm": 40.5, "learning_rate": 9.997513797013959e-06, "loss": 17.5114, "step": 880 }, { "epoch": 0.01609254115237964, "grad_norm": 42.28125, "learning_rate": 9.997485544707298e-06, "loss": 17.2865, "step": 890 }, { "epoch": 0.0162733562215075, "grad_norm": 41.9375, "learning_rate": 9.997457292400638e-06, "loss": 17.6203, "step": 900 }, { "epoch": 0.016454171290635363, "grad_norm": 41.71875, "learning_rate": 9.997429040093979e-06, "loss": 17.9291, "step": 910 }, { "epoch": 0.016634986359763224, "grad_norm": 40.8125, "learning_rate": 9.99740078778732e-06, "loss": 17.7519, "step": 920 }, { "epoch": 0.016815801428891084, "grad_norm": 40.34375, "learning_rate": 9.99737253548066e-06, "loss": 17.777, "step": 930 }, { "epoch": 0.016996616498018945, "grad_norm": 40.15625, "learning_rate": 9.997344283174e-06, "loss": 17.1429, "step": 940 }, { "epoch": 0.017177431567146805, "grad_norm": 39.5625, "learning_rate": 9.997316030867341e-06, "loss": 17.6653, "step": 950 }, { "epoch": 0.017358246636274666, "grad_norm": 39.96875, "learning_rate": 9.997287778560682e-06, "loss": 17.7769, "step": 960 }, { "epoch": 0.01753906170540253, "grad_norm": 36.65625, "learning_rate": 9.997259526254023e-06, "loss": 17.4157, "step": 970 }, { "epoch": 0.01771987677453039, "grad_norm": 40.8125, "learning_rate": 9.997231273947361e-06, "loss": 17.7347, "step": 980 }, { "epoch": 0.01790069184365825, "grad_norm": 43.8125, "learning_rate": 9.997203021640702e-06, "loss": 17.3825, "step": 990 }, { "epoch": 0.01808150691278611, "grad_norm": 39.0625, "learning_rate": 9.997174769334043e-06, "loss": 17.6844, "step": 1000 }, { "epoch": 0.018262321981913972, "grad_norm": 40.84375, "learning_rate": 9.997146517027383e-06, "loss": 17.6222, "step": 1010 }, { "epoch": 0.018443137051041832, "grad_norm": 38.9375, "learning_rate": 9.997118264720724e-06, "loss": 17.6019, "step": 1020 }, { "epoch": 0.018623952120169696, "grad_norm": 39.96875, "learning_rate": 9.997090012414065e-06, "loss": 17.5789, "step": 1030 }, { "epoch": 0.018804767189297557, "grad_norm": 42.78125, "learning_rate": 9.997061760107405e-06, "loss": 17.1038, "step": 1040 }, { "epoch": 0.018985582258425417, "grad_norm": 41.65625, "learning_rate": 9.997033507800746e-06, "loss": 17.6586, "step": 1050 }, { "epoch": 0.019166397327553278, "grad_norm": 38.9375, "learning_rate": 9.997005255494085e-06, "loss": 17.4448, "step": 1060 }, { "epoch": 0.01934721239668114, "grad_norm": 41.21875, "learning_rate": 9.996977003187425e-06, "loss": 16.9077, "step": 1070 }, { "epoch": 0.019528027465809, "grad_norm": 36.1875, "learning_rate": 9.996948750880766e-06, "loss": 17.3949, "step": 1080 }, { "epoch": 0.019708842534936863, "grad_norm": 40.28125, "learning_rate": 9.996920498574107e-06, "loss": 18.0129, "step": 1090 }, { "epoch": 0.019889657604064723, "grad_norm": 39.625, "learning_rate": 9.996892246267447e-06, "loss": 17.3197, "step": 1100 }, { "epoch": 0.020070472673192584, "grad_norm": 40.46875, "learning_rate": 9.996863993960788e-06, "loss": 17.4881, "step": 1110 }, { "epoch": 0.020251287742320444, "grad_norm": 39.78125, "learning_rate": 9.996835741654128e-06, "loss": 17.6111, "step": 1120 }, { "epoch": 0.020432102811448305, "grad_norm": 41.59375, "learning_rate": 9.996807489347469e-06, "loss": 17.5654, "step": 1130 }, { "epoch": 0.020612917880576166, "grad_norm": 39.625, "learning_rate": 9.99677923704081e-06, "loss": 17.5175, "step": 1140 }, { "epoch": 0.02079373294970403, "grad_norm": 42.03125, "learning_rate": 9.996750984734149e-06, "loss": 18.2359, "step": 1150 }, { "epoch": 0.02097454801883189, "grad_norm": 41.8125, "learning_rate": 9.99672273242749e-06, "loss": 17.6192, "step": 1160 }, { "epoch": 0.02115536308795975, "grad_norm": 43.875, "learning_rate": 9.99669448012083e-06, "loss": 17.8928, "step": 1170 }, { "epoch": 0.02133617815708761, "grad_norm": 39.25, "learning_rate": 9.99666622781417e-06, "loss": 17.5018, "step": 1180 }, { "epoch": 0.02151699322621547, "grad_norm": 40.75, "learning_rate": 9.996637975507511e-06, "loss": 17.8533, "step": 1190 }, { "epoch": 0.021697808295343336, "grad_norm": 39.5, "learning_rate": 9.996609723200852e-06, "loss": 17.491, "step": 1200 }, { "epoch": 0.021878623364471196, "grad_norm": 38.75, "learning_rate": 9.996581470894192e-06, "loss": 17.6635, "step": 1210 }, { "epoch": 0.022059438433599057, "grad_norm": 38.0625, "learning_rate": 9.996553218587533e-06, "loss": 17.7756, "step": 1220 }, { "epoch": 0.022240253502726917, "grad_norm": 43.21875, "learning_rate": 9.996524966280872e-06, "loss": 17.5359, "step": 1230 }, { "epoch": 0.022421068571854778, "grad_norm": 38.53125, "learning_rate": 9.996496713974213e-06, "loss": 17.4919, "step": 1240 }, { "epoch": 0.022601883640982638, "grad_norm": 40.84375, "learning_rate": 9.996468461667553e-06, "loss": 17.6023, "step": 1250 }, { "epoch": 0.022782698710110502, "grad_norm": 41.0625, "learning_rate": 9.996440209360894e-06, "loss": 17.3854, "step": 1260 }, { "epoch": 0.022963513779238363, "grad_norm": 42.78125, "learning_rate": 9.996411957054234e-06, "loss": 17.7068, "step": 1270 }, { "epoch": 0.023144328848366223, "grad_norm": 40.46875, "learning_rate": 9.996383704747575e-06, "loss": 17.4914, "step": 1280 }, { "epoch": 0.023325143917494084, "grad_norm": 38.875, "learning_rate": 9.996355452440916e-06, "loss": 18.045, "step": 1290 }, { "epoch": 0.023505958986621944, "grad_norm": 42.40625, "learning_rate": 9.996327200134256e-06, "loss": 17.5793, "step": 1300 }, { "epoch": 0.023686774055749805, "grad_norm": 41.09375, "learning_rate": 9.996298947827597e-06, "loss": 17.5581, "step": 1310 }, { "epoch": 0.02386758912487767, "grad_norm": 41.59375, "learning_rate": 9.996270695520936e-06, "loss": 17.4817, "step": 1320 }, { "epoch": 0.02404840419400553, "grad_norm": 40.1875, "learning_rate": 9.996242443214276e-06, "loss": 17.7449, "step": 1330 }, { "epoch": 0.02422921926313339, "grad_norm": 42.84375, "learning_rate": 9.996214190907617e-06, "loss": 17.4616, "step": 1340 }, { "epoch": 0.02441003433226125, "grad_norm": 38.65625, "learning_rate": 9.996185938600958e-06, "loss": 18.0316, "step": 1350 }, { "epoch": 0.02459084940138911, "grad_norm": 44.84375, "learning_rate": 9.996157686294298e-06, "loss": 17.4498, "step": 1360 }, { "epoch": 0.02477166447051697, "grad_norm": 36.1875, "learning_rate": 9.996129433987639e-06, "loss": 17.7884, "step": 1370 }, { "epoch": 0.024952479539644835, "grad_norm": 37.9375, "learning_rate": 9.99610118168098e-06, "loss": 17.6003, "step": 1380 }, { "epoch": 0.025133294608772696, "grad_norm": 41.4375, "learning_rate": 9.99607292937432e-06, "loss": 17.8514, "step": 1390 }, { "epoch": 0.025314109677900556, "grad_norm": 38.875, "learning_rate": 9.99604467706766e-06, "loss": 17.455, "step": 1400 }, { "epoch": 0.025494924747028417, "grad_norm": 39.84375, "learning_rate": 9.996016424761e-06, "loss": 17.4442, "step": 1410 }, { "epoch": 0.025675739816156277, "grad_norm": 41.46875, "learning_rate": 9.99598817245434e-06, "loss": 17.4482, "step": 1420 }, { "epoch": 0.025856554885284138, "grad_norm": 38.0, "learning_rate": 9.995959920147681e-06, "loss": 17.3167, "step": 1430 }, { "epoch": 0.026037369954412002, "grad_norm": 40.40625, "learning_rate": 9.995931667841022e-06, "loss": 17.3963, "step": 1440 }, { "epoch": 0.026218185023539863, "grad_norm": 42.09375, "learning_rate": 9.995903415534362e-06, "loss": 17.8684, "step": 1450 }, { "epoch": 0.026399000092667723, "grad_norm": 39.0625, "learning_rate": 9.995875163227703e-06, "loss": 17.4063, "step": 1460 }, { "epoch": 0.026579815161795584, "grad_norm": 37.8125, "learning_rate": 9.995846910921044e-06, "loss": 17.4437, "step": 1470 }, { "epoch": 0.026760630230923444, "grad_norm": 41.8125, "learning_rate": 9.995818658614384e-06, "loss": 17.8306, "step": 1480 }, { "epoch": 0.026941445300051305, "grad_norm": 38.875, "learning_rate": 9.995790406307723e-06, "loss": 17.4603, "step": 1490 }, { "epoch": 0.02712226036917917, "grad_norm": 43.15625, "learning_rate": 9.995762154001064e-06, "loss": 17.4142, "step": 1500 }, { "epoch": 0.02730307543830703, "grad_norm": 40.59375, "learning_rate": 9.995733901694404e-06, "loss": 17.9184, "step": 1510 }, { "epoch": 0.02748389050743489, "grad_norm": 40.6875, "learning_rate": 9.995705649387745e-06, "loss": 17.6185, "step": 1520 }, { "epoch": 0.02766470557656275, "grad_norm": 40.40625, "learning_rate": 9.995677397081086e-06, "loss": 17.5548, "step": 1530 }, { "epoch": 0.02784552064569061, "grad_norm": 40.5, "learning_rate": 9.995649144774426e-06, "loss": 17.4174, "step": 1540 }, { "epoch": 0.028026335714818475, "grad_norm": 41.5, "learning_rate": 9.995620892467767e-06, "loss": 17.5004, "step": 1550 }, { "epoch": 0.028207150783946335, "grad_norm": 40.625, "learning_rate": 9.995592640161107e-06, "loss": 17.4335, "step": 1560 }, { "epoch": 0.028387965853074196, "grad_norm": 41.59375, "learning_rate": 9.995564387854448e-06, "loss": 17.5609, "step": 1570 }, { "epoch": 0.028568780922202056, "grad_norm": 41.6875, "learning_rate": 9.995536135547787e-06, "loss": 17.2265, "step": 1580 }, { "epoch": 0.028749595991329917, "grad_norm": 39.5, "learning_rate": 9.995507883241128e-06, "loss": 17.6429, "step": 1590 }, { "epoch": 0.028930411060457777, "grad_norm": 37.625, "learning_rate": 9.995479630934468e-06, "loss": 17.6421, "step": 1600 }, { "epoch": 0.02911122612958564, "grad_norm": 42.34375, "learning_rate": 9.995451378627809e-06, "loss": 17.8708, "step": 1610 }, { "epoch": 0.029292041198713502, "grad_norm": 38.375, "learning_rate": 9.99542312632115e-06, "loss": 17.6356, "step": 1620 }, { "epoch": 0.029472856267841362, "grad_norm": 37.03125, "learning_rate": 9.99539487401449e-06, "loss": 17.4266, "step": 1630 }, { "epoch": 0.029653671336969223, "grad_norm": 38.125, "learning_rate": 9.99536662170783e-06, "loss": 17.4123, "step": 1640 }, { "epoch": 0.029834486406097083, "grad_norm": 38.3125, "learning_rate": 9.995338369401171e-06, "loss": 17.4872, "step": 1650 }, { "epoch": 0.030015301475224944, "grad_norm": 40.0, "learning_rate": 9.99531011709451e-06, "loss": 17.6213, "step": 1660 }, { "epoch": 0.030196116544352808, "grad_norm": 39.46875, "learning_rate": 9.995281864787851e-06, "loss": 17.2009, "step": 1670 }, { "epoch": 0.03037693161348067, "grad_norm": 42.125, "learning_rate": 9.995253612481191e-06, "loss": 17.5288, "step": 1680 }, { "epoch": 0.03055774668260853, "grad_norm": 37.625, "learning_rate": 9.995225360174532e-06, "loss": 17.842, "step": 1690 }, { "epoch": 0.03073856175173639, "grad_norm": 37.90625, "learning_rate": 9.995197107867873e-06, "loss": 17.5651, "step": 1700 }, { "epoch": 0.03091937682086425, "grad_norm": 40.3125, "learning_rate": 9.995168855561213e-06, "loss": 17.3393, "step": 1710 }, { "epoch": 0.03110019188999211, "grad_norm": 38.65625, "learning_rate": 9.995140603254554e-06, "loss": 17.9935, "step": 1720 }, { "epoch": 0.03128100695911997, "grad_norm": 41.78125, "learning_rate": 9.995112350947895e-06, "loss": 17.5959, "step": 1730 }, { "epoch": 0.031461822028247835, "grad_norm": 40.25, "learning_rate": 9.995084098641235e-06, "loss": 17.9083, "step": 1740 }, { "epoch": 0.03164263709737569, "grad_norm": 39.9375, "learning_rate": 9.995055846334574e-06, "loss": 17.3785, "step": 1750 }, { "epoch": 0.031823452166503556, "grad_norm": 40.625, "learning_rate": 9.995027594027915e-06, "loss": 17.5619, "step": 1760 }, { "epoch": 0.03200426723563142, "grad_norm": 41.5625, "learning_rate": 9.994999341721255e-06, "loss": 17.3343, "step": 1770 }, { "epoch": 0.03218508230475928, "grad_norm": 40.65625, "learning_rate": 9.994971089414596e-06, "loss": 17.5575, "step": 1780 }, { "epoch": 0.03236589737388714, "grad_norm": 38.125, "learning_rate": 9.994942837107937e-06, "loss": 17.4286, "step": 1790 }, { "epoch": 0.032546712443015, "grad_norm": 40.96875, "learning_rate": 9.994914584801277e-06, "loss": 17.5324, "step": 1800 }, { "epoch": 0.03272752751214286, "grad_norm": 41.71875, "learning_rate": 9.994886332494618e-06, "loss": 17.3849, "step": 1810 }, { "epoch": 0.032908342581270726, "grad_norm": 41.25, "learning_rate": 9.994858080187959e-06, "loss": 17.6226, "step": 1820 }, { "epoch": 0.03308915765039858, "grad_norm": 40.59375, "learning_rate": 9.994829827881299e-06, "loss": 17.5047, "step": 1830 }, { "epoch": 0.03326997271952645, "grad_norm": 41.1875, "learning_rate": 9.994801575574638e-06, "loss": 17.7064, "step": 1840 }, { "epoch": 0.033450787788654304, "grad_norm": 40.0625, "learning_rate": 9.994773323267979e-06, "loss": 17.5485, "step": 1850 }, { "epoch": 0.03363160285778217, "grad_norm": 39.65625, "learning_rate": 9.99474507096132e-06, "loss": 17.5458, "step": 1860 }, { "epoch": 0.033812417926910025, "grad_norm": 39.1875, "learning_rate": 9.99471681865466e-06, "loss": 17.452, "step": 1870 }, { "epoch": 0.03399323299603789, "grad_norm": 40.6875, "learning_rate": 9.994688566348e-06, "loss": 18.0397, "step": 1880 }, { "epoch": 0.03417404806516575, "grad_norm": 41.1875, "learning_rate": 9.994660314041341e-06, "loss": 17.6931, "step": 1890 }, { "epoch": 0.03435486313429361, "grad_norm": 41.28125, "learning_rate": 9.994632061734682e-06, "loss": 17.7594, "step": 1900 }, { "epoch": 0.034535678203421474, "grad_norm": 41.15625, "learning_rate": 9.994603809428022e-06, "loss": 17.4341, "step": 1910 }, { "epoch": 0.03471649327254933, "grad_norm": 40.5, "learning_rate": 9.994575557121361e-06, "loss": 17.5042, "step": 1920 }, { "epoch": 0.034897308341677195, "grad_norm": 40.78125, "learning_rate": 9.994547304814702e-06, "loss": 17.4923, "step": 1930 }, { "epoch": 0.03507812341080506, "grad_norm": 40.8125, "learning_rate": 9.994519052508043e-06, "loss": 17.8844, "step": 1940 }, { "epoch": 0.035258938479932916, "grad_norm": 38.875, "learning_rate": 9.994490800201383e-06, "loss": 17.2922, "step": 1950 }, { "epoch": 0.03543975354906078, "grad_norm": 38.40625, "learning_rate": 9.994462547894724e-06, "loss": 17.5523, "step": 1960 }, { "epoch": 0.03562056861818864, "grad_norm": 38.71875, "learning_rate": 9.994434295588063e-06, "loss": 17.504, "step": 1970 }, { "epoch": 0.0358013836873165, "grad_norm": 40.1875, "learning_rate": 9.994406043281405e-06, "loss": 17.8244, "step": 1980 }, { "epoch": 0.035982198756444365, "grad_norm": 39.78125, "learning_rate": 9.994377790974746e-06, "loss": 17.5912, "step": 1990 }, { "epoch": 0.03616301382557222, "grad_norm": 37.8125, "learning_rate": 9.994349538668086e-06, "loss": 17.4715, "step": 2000 }, { "epoch": 0.036343828894700086, "grad_norm": 39.78125, "learning_rate": 9.994321286361425e-06, "loss": 17.6859, "step": 2010 }, { "epoch": 0.036524643963827944, "grad_norm": 38.96875, "learning_rate": 9.994293034054766e-06, "loss": 17.9146, "step": 2020 }, { "epoch": 0.03670545903295581, "grad_norm": 40.71875, "learning_rate": 9.994264781748106e-06, "loss": 17.4209, "step": 2030 }, { "epoch": 0.036886274102083665, "grad_norm": 38.4375, "learning_rate": 9.994236529441447e-06, "loss": 17.432, "step": 2040 }, { "epoch": 0.03706708917121153, "grad_norm": 40.875, "learning_rate": 9.994208277134788e-06, "loss": 17.6924, "step": 2050 }, { "epoch": 0.03724790424033939, "grad_norm": 40.34375, "learning_rate": 9.994180024828128e-06, "loss": 17.1496, "step": 2060 }, { "epoch": 0.03742871930946725, "grad_norm": 38.3125, "learning_rate": 9.994151772521469e-06, "loss": 17.0407, "step": 2070 }, { "epoch": 0.037609534378595114, "grad_norm": 39.40625, "learning_rate": 9.99412352021481e-06, "loss": 18.0761, "step": 2080 }, { "epoch": 0.03779034944772297, "grad_norm": 37.71875, "learning_rate": 9.994095267908149e-06, "loss": 17.3811, "step": 2090 }, { "epoch": 0.037971164516850835, "grad_norm": 41.65625, "learning_rate": 9.99406701560149e-06, "loss": 17.4023, "step": 2100 }, { "epoch": 0.0381519795859787, "grad_norm": 40.40625, "learning_rate": 9.99403876329483e-06, "loss": 17.4827, "step": 2110 }, { "epoch": 0.038332794655106556, "grad_norm": 41.96875, "learning_rate": 9.99401051098817e-06, "loss": 17.3661, "step": 2120 }, { "epoch": 0.03851360972423442, "grad_norm": 39.375, "learning_rate": 9.993982258681511e-06, "loss": 17.6976, "step": 2130 }, { "epoch": 0.03869442479336228, "grad_norm": 39.09375, "learning_rate": 9.993954006374852e-06, "loss": 17.472, "step": 2140 }, { "epoch": 0.03887523986249014, "grad_norm": 40.9375, "learning_rate": 9.993925754068192e-06, "loss": 17.3894, "step": 2150 }, { "epoch": 0.039056054931618, "grad_norm": 38.4375, "learning_rate": 9.993897501761533e-06, "loss": 17.4943, "step": 2160 }, { "epoch": 0.03923687000074586, "grad_norm": 40.0625, "learning_rate": 9.993869249454874e-06, "loss": 17.8319, "step": 2170 }, { "epoch": 0.039417685069873726, "grad_norm": 37.40625, "learning_rate": 9.993840997148212e-06, "loss": 17.5854, "step": 2180 }, { "epoch": 0.03959850013900158, "grad_norm": 41.96875, "learning_rate": 9.993812744841553e-06, "loss": 17.6529, "step": 2190 }, { "epoch": 0.03977931520812945, "grad_norm": 38.375, "learning_rate": 9.993784492534894e-06, "loss": 17.0541, "step": 2200 }, { "epoch": 0.039960130277257304, "grad_norm": 40.90625, "learning_rate": 9.993756240228234e-06, "loss": 17.2947, "step": 2210 }, { "epoch": 0.04014094534638517, "grad_norm": 40.5625, "learning_rate": 9.993727987921575e-06, "loss": 17.6078, "step": 2220 }, { "epoch": 0.04032176041551303, "grad_norm": 38.375, "learning_rate": 9.993699735614914e-06, "loss": 17.3897, "step": 2230 }, { "epoch": 0.04050257548464089, "grad_norm": 41.78125, "learning_rate": 9.993671483308256e-06, "loss": 17.3831, "step": 2240 }, { "epoch": 0.04068339055376875, "grad_norm": 41.6875, "learning_rate": 9.993643231001597e-06, "loss": 17.4514, "step": 2250 }, { "epoch": 0.04086420562289661, "grad_norm": 43.09375, "learning_rate": 9.993614978694937e-06, "loss": 17.9483, "step": 2260 }, { "epoch": 0.041045020692024474, "grad_norm": 39.53125, "learning_rate": 9.993586726388276e-06, "loss": 17.9368, "step": 2270 }, { "epoch": 0.04122583576115233, "grad_norm": 40.15625, "learning_rate": 9.993558474081617e-06, "loss": 17.7421, "step": 2280 }, { "epoch": 0.041406650830280195, "grad_norm": 42.21875, "learning_rate": 9.993530221774958e-06, "loss": 17.6865, "step": 2290 }, { "epoch": 0.04158746589940806, "grad_norm": 39.625, "learning_rate": 9.993501969468298e-06, "loss": 17.6578, "step": 2300 }, { "epoch": 0.041768280968535916, "grad_norm": 40.3125, "learning_rate": 9.993473717161639e-06, "loss": 17.5698, "step": 2310 }, { "epoch": 0.04194909603766378, "grad_norm": 39.3125, "learning_rate": 9.993445464854978e-06, "loss": 17.1494, "step": 2320 }, { "epoch": 0.04212991110679164, "grad_norm": 42.96875, "learning_rate": 9.99341721254832e-06, "loss": 17.4623, "step": 2330 }, { "epoch": 0.0423107261759195, "grad_norm": 40.5, "learning_rate": 9.99338896024166e-06, "loss": 17.5555, "step": 2340 }, { "epoch": 0.042491541245047365, "grad_norm": 42.125, "learning_rate": 9.993360707935e-06, "loss": 17.5949, "step": 2350 }, { "epoch": 0.04267235631417522, "grad_norm": 41.34375, "learning_rate": 9.99333245562834e-06, "loss": 17.2163, "step": 2360 }, { "epoch": 0.042853171383303086, "grad_norm": 41.3125, "learning_rate": 9.993304203321681e-06, "loss": 17.95, "step": 2370 }, { "epoch": 0.04303398645243094, "grad_norm": 43.40625, "learning_rate": 9.993275951015021e-06, "loss": 17.4045, "step": 2380 }, { "epoch": 0.04321480152155881, "grad_norm": 37.78125, "learning_rate": 9.993247698708362e-06, "loss": 17.2573, "step": 2390 }, { "epoch": 0.04339561659068667, "grad_norm": 42.15625, "learning_rate": 9.993219446401701e-06, "loss": 17.1407, "step": 2400 }, { "epoch": 0.04357643165981453, "grad_norm": 37.84375, "learning_rate": 9.993191194095043e-06, "loss": 17.4419, "step": 2410 }, { "epoch": 0.04375724672894239, "grad_norm": 39.46875, "learning_rate": 9.993162941788384e-06, "loss": 17.9597, "step": 2420 }, { "epoch": 0.04393806179807025, "grad_norm": 40.5, "learning_rate": 9.993134689481725e-06, "loss": 17.3377, "step": 2430 }, { "epoch": 0.04411887686719811, "grad_norm": 37.03125, "learning_rate": 9.993106437175064e-06, "loss": 17.517, "step": 2440 }, { "epoch": 0.04429969193632597, "grad_norm": 41.6875, "learning_rate": 9.993078184868404e-06, "loss": 17.1929, "step": 2450 }, { "epoch": 0.044480507005453834, "grad_norm": 38.75, "learning_rate": 9.993049932561745e-06, "loss": 17.5776, "step": 2460 }, { "epoch": 0.0446613220745817, "grad_norm": 40.28125, "learning_rate": 9.993021680255085e-06, "loss": 17.2231, "step": 2470 }, { "epoch": 0.044842137143709555, "grad_norm": 42.0625, "learning_rate": 9.992993427948426e-06, "loss": 17.4934, "step": 2480 }, { "epoch": 0.04502295221283742, "grad_norm": 41.21875, "learning_rate": 9.992965175641765e-06, "loss": 17.5628, "step": 2490 }, { "epoch": 0.045203767281965276, "grad_norm": 41.59375, "learning_rate": 9.992936923335107e-06, "loss": 17.804, "step": 2500 }, { "epoch": 0.04538458235109314, "grad_norm": 42.8125, "learning_rate": 9.992908671028448e-06, "loss": 17.9538, "step": 2510 }, { "epoch": 0.045565397420221004, "grad_norm": 39.09375, "learning_rate": 9.992880418721787e-06, "loss": 17.6877, "step": 2520 }, { "epoch": 0.04574621248934886, "grad_norm": 39.9375, "learning_rate": 9.992852166415127e-06, "loss": 18.0231, "step": 2530 }, { "epoch": 0.045927027558476725, "grad_norm": 43.3125, "learning_rate": 9.992823914108468e-06, "loss": 17.272, "step": 2540 }, { "epoch": 0.04610784262760458, "grad_norm": 40.6875, "learning_rate": 9.992795661801809e-06, "loss": 17.1814, "step": 2550 }, { "epoch": 0.046288657696732446, "grad_norm": 39.4375, "learning_rate": 9.99276740949515e-06, "loss": 17.5537, "step": 2560 }, { "epoch": 0.046469472765860304, "grad_norm": 39.75, "learning_rate": 9.992739157188488e-06, "loss": 17.979, "step": 2570 }, { "epoch": 0.04665028783498817, "grad_norm": 42.15625, "learning_rate": 9.992710904881829e-06, "loss": 17.4319, "step": 2580 }, { "epoch": 0.04683110290411603, "grad_norm": 38.28125, "learning_rate": 9.992682652575171e-06, "loss": 17.3438, "step": 2590 }, { "epoch": 0.04701191797324389, "grad_norm": 40.9375, "learning_rate": 9.992654400268512e-06, "loss": 17.3572, "step": 2600 }, { "epoch": 0.04719273304237175, "grad_norm": 41.59375, "learning_rate": 9.99262614796185e-06, "loss": 17.6413, "step": 2610 }, { "epoch": 0.04737354811149961, "grad_norm": 40.46875, "learning_rate": 9.992597895655191e-06, "loss": 17.2812, "step": 2620 }, { "epoch": 0.047554363180627474, "grad_norm": 41.09375, "learning_rate": 9.992569643348532e-06, "loss": 17.3716, "step": 2630 }, { "epoch": 0.04773517824975534, "grad_norm": 42.53125, "learning_rate": 9.992541391041873e-06, "loss": 17.6471, "step": 2640 }, { "epoch": 0.047915993318883195, "grad_norm": 39.65625, "learning_rate": 9.992513138735213e-06, "loss": 17.5608, "step": 2650 }, { "epoch": 0.04809680838801106, "grad_norm": 38.15625, "learning_rate": 9.992484886428552e-06, "loss": 17.6671, "step": 2660 }, { "epoch": 0.048277623457138916, "grad_norm": 38.21875, "learning_rate": 9.992456634121893e-06, "loss": 17.71, "step": 2670 }, { "epoch": 0.04845843852626678, "grad_norm": 40.46875, "learning_rate": 9.992428381815235e-06, "loss": 17.377, "step": 2680 }, { "epoch": 0.04863925359539464, "grad_norm": 40.5625, "learning_rate": 9.992400129508574e-06, "loss": 17.4225, "step": 2690 }, { "epoch": 0.0488200686645225, "grad_norm": 40.78125, "learning_rate": 9.992371877201915e-06, "loss": 17.4072, "step": 2700 }, { "epoch": 0.049000883733650365, "grad_norm": 40.4375, "learning_rate": 9.992343624895255e-06, "loss": 17.1139, "step": 2710 }, { "epoch": 0.04918169880277822, "grad_norm": 38.21875, "learning_rate": 9.992315372588596e-06, "loss": 17.3573, "step": 2720 }, { "epoch": 0.049362513871906086, "grad_norm": 39.53125, "learning_rate": 9.992287120281937e-06, "loss": 17.2752, "step": 2730 }, { "epoch": 0.04954332894103394, "grad_norm": 38.6875, "learning_rate": 9.992258867975277e-06, "loss": 17.4226, "step": 2740 }, { "epoch": 0.04972414401016181, "grad_norm": 40.25, "learning_rate": 9.992230615668616e-06, "loss": 17.6635, "step": 2750 }, { "epoch": 0.04990495907928967, "grad_norm": 42.0, "learning_rate": 9.992202363361958e-06, "loss": 17.8238, "step": 2760 }, { "epoch": 0.05008577414841753, "grad_norm": 39.90625, "learning_rate": 9.992174111055299e-06, "loss": 17.3426, "step": 2770 }, { "epoch": 0.05026658921754539, "grad_norm": 40.28125, "learning_rate": 9.992145858748638e-06, "loss": 17.4383, "step": 2780 }, { "epoch": 0.05044740428667325, "grad_norm": 40.46875, "learning_rate": 9.992117606441979e-06, "loss": 17.505, "step": 2790 }, { "epoch": 0.05062821935580111, "grad_norm": 42.5, "learning_rate": 9.99208935413532e-06, "loss": 17.3786, "step": 2800 }, { "epoch": 0.05080903442492898, "grad_norm": 43.625, "learning_rate": 9.99206110182866e-06, "loss": 17.5904, "step": 2810 }, { "epoch": 0.050989849494056834, "grad_norm": 39.0625, "learning_rate": 9.992032849522e-06, "loss": 17.7643, "step": 2820 }, { "epoch": 0.0511706645631847, "grad_norm": 39.875, "learning_rate": 9.99200459721534e-06, "loss": 17.3822, "step": 2830 }, { "epoch": 0.051351479632312555, "grad_norm": 40.78125, "learning_rate": 9.99197634490868e-06, "loss": 17.8304, "step": 2840 }, { "epoch": 0.05153229470144042, "grad_norm": 38.1875, "learning_rate": 9.991948092602022e-06, "loss": 17.4109, "step": 2850 }, { "epoch": 0.051713109770568276, "grad_norm": 43.90625, "learning_rate": 9.991919840295363e-06, "loss": 17.5073, "step": 2860 }, { "epoch": 0.05189392483969614, "grad_norm": 36.53125, "learning_rate": 9.991891587988702e-06, "loss": 17.6826, "step": 2870 }, { "epoch": 0.052074739908824004, "grad_norm": 39.5625, "learning_rate": 9.991863335682042e-06, "loss": 17.5374, "step": 2880 }, { "epoch": 0.05225555497795186, "grad_norm": 42.03125, "learning_rate": 9.991835083375383e-06, "loss": 17.7052, "step": 2890 }, { "epoch": 0.052436370047079725, "grad_norm": 39.3125, "learning_rate": 9.991806831068724e-06, "loss": 17.8823, "step": 2900 }, { "epoch": 0.05261718511620758, "grad_norm": 39.125, "learning_rate": 9.991778578762064e-06, "loss": 17.2184, "step": 2910 }, { "epoch": 0.052798000185335446, "grad_norm": 41.375, "learning_rate": 9.991750326455403e-06, "loss": 17.5451, "step": 2920 }, { "epoch": 0.05297881525446331, "grad_norm": 41.8125, "learning_rate": 9.991722074148744e-06, "loss": 17.6575, "step": 2930 }, { "epoch": 0.05315963032359117, "grad_norm": 41.625, "learning_rate": 9.991693821842086e-06, "loss": 17.2601, "step": 2940 }, { "epoch": 0.05334044539271903, "grad_norm": 39.6875, "learning_rate": 9.991665569535425e-06, "loss": 17.8104, "step": 2950 }, { "epoch": 0.05352126046184689, "grad_norm": 36.96875, "learning_rate": 9.991637317228766e-06, "loss": 17.6773, "step": 2960 }, { "epoch": 0.05370207553097475, "grad_norm": 39.15625, "learning_rate": 9.991609064922106e-06, "loss": 17.3477, "step": 2970 }, { "epoch": 0.05388289060010261, "grad_norm": 41.5, "learning_rate": 9.991580812615447e-06, "loss": 17.2803, "step": 2980 }, { "epoch": 0.05406370566923047, "grad_norm": 39.28125, "learning_rate": 9.991552560308788e-06, "loss": 17.2301, "step": 2990 }, { "epoch": 0.05424452073835834, "grad_norm": 37.625, "learning_rate": 9.991524308002127e-06, "loss": 17.5987, "step": 3000 }, { "epoch": 0.054425335807486194, "grad_norm": 39.34375, "learning_rate": 9.991496055695467e-06, "loss": 17.4069, "step": 3010 }, { "epoch": 0.05460615087661406, "grad_norm": 43.03125, "learning_rate": 9.991467803388808e-06, "loss": 17.0933, "step": 3020 }, { "epoch": 0.054786965945741915, "grad_norm": 38.65625, "learning_rate": 9.99143955108215e-06, "loss": 17.2305, "step": 3030 }, { "epoch": 0.05496778101486978, "grad_norm": 40.4375, "learning_rate": 9.991411298775489e-06, "loss": 17.7806, "step": 3040 }, { "epoch": 0.05514859608399764, "grad_norm": 39.28125, "learning_rate": 9.99138304646883e-06, "loss": 17.7382, "step": 3050 }, { "epoch": 0.0553294111531255, "grad_norm": 42.875, "learning_rate": 9.99135479416217e-06, "loss": 17.4769, "step": 3060 }, { "epoch": 0.055510226222253364, "grad_norm": 40.8125, "learning_rate": 9.991326541855511e-06, "loss": 17.1672, "step": 3070 }, { "epoch": 0.05569104129138122, "grad_norm": 38.9375, "learning_rate": 9.991298289548852e-06, "loss": 17.4499, "step": 3080 }, { "epoch": 0.055871856360509085, "grad_norm": 44.28125, "learning_rate": 9.99127003724219e-06, "loss": 17.5775, "step": 3090 }, { "epoch": 0.05605267142963695, "grad_norm": 41.03125, "learning_rate": 9.991241784935531e-06, "loss": 17.6459, "step": 3100 }, { "epoch": 0.056233486498764806, "grad_norm": 40.75, "learning_rate": 9.991213532628873e-06, "loss": 17.3404, "step": 3110 }, { "epoch": 0.05641430156789267, "grad_norm": 41.5, "learning_rate": 9.991185280322212e-06, "loss": 17.4062, "step": 3120 }, { "epoch": 0.05659511663702053, "grad_norm": 40.59375, "learning_rate": 9.991157028015553e-06, "loss": 17.7076, "step": 3130 }, { "epoch": 0.05677593170614839, "grad_norm": 38.625, "learning_rate": 9.991128775708894e-06, "loss": 17.6773, "step": 3140 }, { "epoch": 0.05695674677527625, "grad_norm": 42.0, "learning_rate": 9.991100523402234e-06, "loss": 17.3732, "step": 3150 }, { "epoch": 0.05713756184440411, "grad_norm": 39.59375, "learning_rate": 9.991072271095575e-06, "loss": 17.4142, "step": 3160 }, { "epoch": 0.057318376913531977, "grad_norm": 41.375, "learning_rate": 9.991044018788915e-06, "loss": 17.4713, "step": 3170 }, { "epoch": 0.057499191982659834, "grad_norm": 40.1875, "learning_rate": 9.991015766482254e-06, "loss": 17.4469, "step": 3180 }, { "epoch": 0.0576800070517877, "grad_norm": 37.5, "learning_rate": 9.990987514175595e-06, "loss": 17.2832, "step": 3190 }, { "epoch": 0.057860822120915555, "grad_norm": 40.53125, "learning_rate": 9.990959261868937e-06, "loss": 17.5141, "step": 3200 }, { "epoch": 0.05804163719004342, "grad_norm": 38.5625, "learning_rate": 9.990931009562276e-06, "loss": 17.3238, "step": 3210 }, { "epoch": 0.05822245225917128, "grad_norm": 40.375, "learning_rate": 9.990902757255617e-06, "loss": 17.4824, "step": 3220 }, { "epoch": 0.05840326732829914, "grad_norm": 40.34375, "learning_rate": 9.990874504948957e-06, "loss": 17.458, "step": 3230 }, { "epoch": 0.058584082397427004, "grad_norm": 41.15625, "learning_rate": 9.990846252642298e-06, "loss": 17.5533, "step": 3240 }, { "epoch": 0.05876489746655486, "grad_norm": 40.9375, "learning_rate": 9.990818000335639e-06, "loss": 17.7103, "step": 3250 }, { "epoch": 0.058945712535682725, "grad_norm": 40.15625, "learning_rate": 9.990789748028978e-06, "loss": 17.4866, "step": 3260 }, { "epoch": 0.05912652760481058, "grad_norm": 41.3125, "learning_rate": 9.990761495722318e-06, "loss": 17.6451, "step": 3270 }, { "epoch": 0.059307342673938446, "grad_norm": 39.9375, "learning_rate": 9.990733243415659e-06, "loss": 18.1264, "step": 3280 }, { "epoch": 0.05948815774306631, "grad_norm": 40.03125, "learning_rate": 9.990704991109001e-06, "loss": 17.2858, "step": 3290 }, { "epoch": 0.05966897281219417, "grad_norm": 43.34375, "learning_rate": 9.99067673880234e-06, "loss": 17.6059, "step": 3300 }, { "epoch": 0.05984978788132203, "grad_norm": 39.65625, "learning_rate": 9.99064848649568e-06, "loss": 17.8113, "step": 3310 }, { "epoch": 0.06003060295044989, "grad_norm": 42.0, "learning_rate": 9.990620234189021e-06, "loss": 17.2323, "step": 3320 }, { "epoch": 0.06021141801957775, "grad_norm": 39.40625, "learning_rate": 9.990591981882362e-06, "loss": 17.2295, "step": 3330 }, { "epoch": 0.060392233088705616, "grad_norm": 40.65625, "learning_rate": 9.990563729575703e-06, "loss": 17.2785, "step": 3340 }, { "epoch": 0.06057304815783347, "grad_norm": 38.9375, "learning_rate": 9.990535477269042e-06, "loss": 17.3906, "step": 3350 }, { "epoch": 0.06075386322696134, "grad_norm": 42.09375, "learning_rate": 9.990507224962382e-06, "loss": 17.4731, "step": 3360 }, { "epoch": 0.060934678296089194, "grad_norm": 39.21875, "learning_rate": 9.990478972655723e-06, "loss": 17.5231, "step": 3370 }, { "epoch": 0.06111549336521706, "grad_norm": 43.0625, "learning_rate": 9.990450720349063e-06, "loss": 17.6285, "step": 3380 }, { "epoch": 0.061296308434344915, "grad_norm": 39.1875, "learning_rate": 9.990422468042404e-06, "loss": 17.5402, "step": 3390 }, { "epoch": 0.06147712350347278, "grad_norm": 41.25, "learning_rate": 9.990394215735745e-06, "loss": 17.6009, "step": 3400 }, { "epoch": 0.06165793857260064, "grad_norm": 40.9375, "learning_rate": 9.990365963429085e-06, "loss": 17.5682, "step": 3410 }, { "epoch": 0.0618387536417285, "grad_norm": 41.125, "learning_rate": 9.990337711122426e-06, "loss": 17.8649, "step": 3420 }, { "epoch": 0.062019568710856364, "grad_norm": 41.28125, "learning_rate": 9.990309458815765e-06, "loss": 17.6951, "step": 3430 }, { "epoch": 0.06220038377998422, "grad_norm": 38.96875, "learning_rate": 9.990281206509105e-06, "loss": 17.3738, "step": 3440 }, { "epoch": 0.062381198849112085, "grad_norm": 40.03125, "learning_rate": 9.990252954202446e-06, "loss": 17.4536, "step": 3450 }, { "epoch": 0.06256201391823994, "grad_norm": 38.5625, "learning_rate": 9.990224701895788e-06, "loss": 17.4441, "step": 3460 }, { "epoch": 0.06274282898736781, "grad_norm": 41.59375, "learning_rate": 9.990196449589127e-06, "loss": 17.284, "step": 3470 }, { "epoch": 0.06292364405649567, "grad_norm": 41.25, "learning_rate": 9.990168197282468e-06, "loss": 17.529, "step": 3480 }, { "epoch": 0.06310445912562353, "grad_norm": 39.25, "learning_rate": 9.990139944975809e-06, "loss": 17.4169, "step": 3490 }, { "epoch": 0.06328527419475138, "grad_norm": 37.78125, "learning_rate": 9.99011169266915e-06, "loss": 17.2194, "step": 3500 }, { "epoch": 0.06346608926387926, "grad_norm": 43.84375, "learning_rate": 9.99008344036249e-06, "loss": 17.5379, "step": 3510 }, { "epoch": 0.06364690433300711, "grad_norm": 40.5, "learning_rate": 9.990055188055829e-06, "loss": 17.5937, "step": 3520 }, { "epoch": 0.06382771940213497, "grad_norm": 41.125, "learning_rate": 9.99002693574917e-06, "loss": 17.8142, "step": 3530 }, { "epoch": 0.06400853447126284, "grad_norm": 40.78125, "learning_rate": 9.98999868344251e-06, "loss": 17.6586, "step": 3540 }, { "epoch": 0.0641893495403907, "grad_norm": 41.03125, "learning_rate": 9.98997043113585e-06, "loss": 17.0579, "step": 3550 }, { "epoch": 0.06437016460951855, "grad_norm": 37.84375, "learning_rate": 9.989942178829191e-06, "loss": 17.0986, "step": 3560 }, { "epoch": 0.06455097967864643, "grad_norm": 37.84375, "learning_rate": 9.989913926522532e-06, "loss": 17.384, "step": 3570 }, { "epoch": 0.06473179474777428, "grad_norm": 41.625, "learning_rate": 9.989885674215872e-06, "loss": 17.5132, "step": 3580 }, { "epoch": 0.06491260981690214, "grad_norm": 40.5, "learning_rate": 9.989857421909213e-06, "loss": 17.5032, "step": 3590 }, { "epoch": 0.06509342488603, "grad_norm": 40.96875, "learning_rate": 9.989829169602554e-06, "loss": 17.913, "step": 3600 }, { "epoch": 0.06527423995515787, "grad_norm": 40.78125, "learning_rate": 9.989800917295893e-06, "loss": 17.4051, "step": 3610 }, { "epoch": 0.06545505502428572, "grad_norm": 38.3125, "learning_rate": 9.989772664989233e-06, "loss": 17.7376, "step": 3620 }, { "epoch": 0.06563587009341358, "grad_norm": 39.40625, "learning_rate": 9.989744412682574e-06, "loss": 17.9015, "step": 3630 }, { "epoch": 0.06581668516254145, "grad_norm": 39.53125, "learning_rate": 9.989716160375915e-06, "loss": 17.7884, "step": 3640 }, { "epoch": 0.06599750023166931, "grad_norm": 36.15625, "learning_rate": 9.989687908069255e-06, "loss": 17.105, "step": 3650 }, { "epoch": 0.06617831530079717, "grad_norm": 39.09375, "learning_rate": 9.989659655762596e-06, "loss": 17.6172, "step": 3660 }, { "epoch": 0.06635913036992502, "grad_norm": 42.15625, "learning_rate": 9.989631403455936e-06, "loss": 17.6919, "step": 3670 }, { "epoch": 0.0665399454390529, "grad_norm": 38.90625, "learning_rate": 9.989603151149277e-06, "loss": 17.4987, "step": 3680 }, { "epoch": 0.06672076050818075, "grad_norm": 40.875, "learning_rate": 9.989574898842616e-06, "loss": 17.4662, "step": 3690 }, { "epoch": 0.06690157557730861, "grad_norm": 40.0625, "learning_rate": 9.989546646535957e-06, "loss": 17.3278, "step": 3700 }, { "epoch": 0.06708239064643648, "grad_norm": 39.28125, "learning_rate": 9.989518394229297e-06, "loss": 17.5842, "step": 3710 }, { "epoch": 0.06726320571556434, "grad_norm": 42.03125, "learning_rate": 9.989490141922638e-06, "loss": 17.4774, "step": 3720 }, { "epoch": 0.0674440207846922, "grad_norm": 37.84375, "learning_rate": 9.989461889615978e-06, "loss": 17.6076, "step": 3730 }, { "epoch": 0.06762483585382005, "grad_norm": 39.59375, "learning_rate": 9.989433637309319e-06, "loss": 17.2923, "step": 3740 }, { "epoch": 0.06780565092294792, "grad_norm": 40.625, "learning_rate": 9.98940538500266e-06, "loss": 17.6126, "step": 3750 }, { "epoch": 0.06798646599207578, "grad_norm": 41.84375, "learning_rate": 9.989377132696e-06, "loss": 17.6107, "step": 3760 }, { "epoch": 0.06816728106120364, "grad_norm": 40.8125, "learning_rate": 9.989348880389341e-06, "loss": 17.3268, "step": 3770 }, { "epoch": 0.0683480961303315, "grad_norm": 39.25, "learning_rate": 9.98932062808268e-06, "loss": 17.3873, "step": 3780 }, { "epoch": 0.06852891119945936, "grad_norm": 41.4375, "learning_rate": 9.98929237577602e-06, "loss": 17.6123, "step": 3790 }, { "epoch": 0.06870972626858722, "grad_norm": 42.4375, "learning_rate": 9.989264123469361e-06, "loss": 17.2932, "step": 3800 }, { "epoch": 0.06889054133771509, "grad_norm": 39.71875, "learning_rate": 9.989235871162702e-06, "loss": 17.8571, "step": 3810 }, { "epoch": 0.06907135640684295, "grad_norm": 38.625, "learning_rate": 9.989207618856042e-06, "loss": 17.2305, "step": 3820 }, { "epoch": 0.0692521714759708, "grad_norm": 40.53125, "learning_rate": 9.989179366549383e-06, "loss": 17.0817, "step": 3830 }, { "epoch": 0.06943298654509866, "grad_norm": 41.34375, "learning_rate": 9.989151114242724e-06, "loss": 17.5897, "step": 3840 }, { "epoch": 0.06961380161422653, "grad_norm": 39.0625, "learning_rate": 9.989122861936064e-06, "loss": 17.8866, "step": 3850 }, { "epoch": 0.06979461668335439, "grad_norm": 38.96875, "learning_rate": 9.989094609629403e-06, "loss": 17.5472, "step": 3860 }, { "epoch": 0.06997543175248225, "grad_norm": 41.03125, "learning_rate": 9.989066357322744e-06, "loss": 17.7536, "step": 3870 }, { "epoch": 0.07015624682161012, "grad_norm": 39.5625, "learning_rate": 9.989038105016084e-06, "loss": 17.7344, "step": 3880 }, { "epoch": 0.07033706189073798, "grad_norm": 39.15625, "learning_rate": 9.989009852709425e-06, "loss": 17.0403, "step": 3890 }, { "epoch": 0.07051787695986583, "grad_norm": 40.09375, "learning_rate": 9.988981600402766e-06, "loss": 17.368, "step": 3900 }, { "epoch": 0.07069869202899369, "grad_norm": 37.21875, "learning_rate": 9.988953348096106e-06, "loss": 17.6145, "step": 3910 }, { "epoch": 0.07087950709812156, "grad_norm": 39.3125, "learning_rate": 9.988925095789447e-06, "loss": 17.3629, "step": 3920 }, { "epoch": 0.07106032216724942, "grad_norm": 42.5625, "learning_rate": 9.988896843482787e-06, "loss": 17.8597, "step": 3930 }, { "epoch": 0.07124113723637727, "grad_norm": 41.15625, "learning_rate": 9.988868591176128e-06, "loss": 17.4615, "step": 3940 }, { "epoch": 0.07142195230550515, "grad_norm": 42.1875, "learning_rate": 9.988840338869467e-06, "loss": 17.6934, "step": 3950 }, { "epoch": 0.071602767374633, "grad_norm": 43.25, "learning_rate": 9.988812086562808e-06, "loss": 17.637, "step": 3960 }, { "epoch": 0.07178358244376086, "grad_norm": 40.65625, "learning_rate": 9.988783834256148e-06, "loss": 17.3732, "step": 3970 }, { "epoch": 0.07196439751288873, "grad_norm": 41.5625, "learning_rate": 9.988755581949489e-06, "loss": 17.4592, "step": 3980 }, { "epoch": 0.07214521258201659, "grad_norm": 41.125, "learning_rate": 9.98872732964283e-06, "loss": 17.7686, "step": 3990 }, { "epoch": 0.07232602765114444, "grad_norm": 41.65625, "learning_rate": 9.98869907733617e-06, "loss": 17.8232, "step": 4000 }, { "epoch": 0.0725068427202723, "grad_norm": 40.3125, "learning_rate": 9.98867082502951e-06, "loss": 17.597, "step": 4010 }, { "epoch": 0.07268765778940017, "grad_norm": 40.71875, "learning_rate": 9.988642572722851e-06, "loss": 17.6848, "step": 4020 }, { "epoch": 0.07286847285852803, "grad_norm": 41.96875, "learning_rate": 9.988614320416192e-06, "loss": 17.1169, "step": 4030 }, { "epoch": 0.07304928792765589, "grad_norm": 41.0, "learning_rate": 9.988586068109531e-06, "loss": 17.301, "step": 4040 }, { "epoch": 0.07323010299678376, "grad_norm": 39.8125, "learning_rate": 9.988557815802872e-06, "loss": 17.5104, "step": 4050 }, { "epoch": 0.07341091806591162, "grad_norm": 40.1875, "learning_rate": 9.988529563496212e-06, "loss": 17.302, "step": 4060 }, { "epoch": 0.07359173313503947, "grad_norm": 39.625, "learning_rate": 9.988501311189553e-06, "loss": 17.4633, "step": 4070 }, { "epoch": 0.07377254820416733, "grad_norm": 41.84375, "learning_rate": 9.988473058882893e-06, "loss": 17.6813, "step": 4080 }, { "epoch": 0.0739533632732952, "grad_norm": 37.4375, "learning_rate": 9.988444806576234e-06, "loss": 17.2189, "step": 4090 }, { "epoch": 0.07413417834242306, "grad_norm": 39.5, "learning_rate": 9.988416554269575e-06, "loss": 17.2021, "step": 4100 }, { "epoch": 0.07431499341155091, "grad_norm": 38.96875, "learning_rate": 9.988388301962915e-06, "loss": 17.6964, "step": 4110 }, { "epoch": 0.07449580848067879, "grad_norm": 39.375, "learning_rate": 9.988360049656254e-06, "loss": 17.2008, "step": 4120 }, { "epoch": 0.07467662354980664, "grad_norm": 39.0, "learning_rate": 9.988331797349595e-06, "loss": 17.7864, "step": 4130 }, { "epoch": 0.0748574386189345, "grad_norm": 42.84375, "learning_rate": 9.988303545042935e-06, "loss": 17.8637, "step": 4140 }, { "epoch": 0.07503825368806236, "grad_norm": 43.34375, "learning_rate": 9.988275292736276e-06, "loss": 17.4759, "step": 4150 }, { "epoch": 0.07521906875719023, "grad_norm": 41.5, "learning_rate": 9.988247040429617e-06, "loss": 17.9535, "step": 4160 }, { "epoch": 0.07539988382631808, "grad_norm": 47.34375, "learning_rate": 9.988218788122957e-06, "loss": 17.3817, "step": 4170 }, { "epoch": 0.07558069889544594, "grad_norm": 40.53125, "learning_rate": 9.988190535816298e-06, "loss": 17.4104, "step": 4180 }, { "epoch": 0.07576151396457381, "grad_norm": 40.25, "learning_rate": 9.988162283509639e-06, "loss": 17.018, "step": 4190 }, { "epoch": 0.07594232903370167, "grad_norm": 40.9375, "learning_rate": 9.98813403120298e-06, "loss": 17.7528, "step": 4200 }, { "epoch": 0.07612314410282953, "grad_norm": 40.125, "learning_rate": 9.988105778896318e-06, "loss": 17.5826, "step": 4210 }, { "epoch": 0.0763039591719574, "grad_norm": 40.28125, "learning_rate": 9.988077526589659e-06, "loss": 17.3862, "step": 4220 }, { "epoch": 0.07648477424108525, "grad_norm": 38.6875, "learning_rate": 9.988049274283e-06, "loss": 17.879, "step": 4230 }, { "epoch": 0.07666558931021311, "grad_norm": 40.53125, "learning_rate": 9.98802102197634e-06, "loss": 17.4999, "step": 4240 }, { "epoch": 0.07684640437934097, "grad_norm": 42.0, "learning_rate": 9.98799276966968e-06, "loss": 17.3519, "step": 4250 }, { "epoch": 0.07702721944846884, "grad_norm": 40.78125, "learning_rate": 9.987964517363021e-06, "loss": 17.3579, "step": 4260 }, { "epoch": 0.0772080345175967, "grad_norm": 42.59375, "learning_rate": 9.987936265056362e-06, "loss": 17.225, "step": 4270 }, { "epoch": 0.07738884958672455, "grad_norm": 39.5625, "learning_rate": 9.987908012749702e-06, "loss": 17.7747, "step": 4280 }, { "epoch": 0.07756966465585242, "grad_norm": 41.15625, "learning_rate": 9.987879760443041e-06, "loss": 17.5272, "step": 4290 }, { "epoch": 0.07775047972498028, "grad_norm": 40.03125, "learning_rate": 9.987851508136382e-06, "loss": 17.151, "step": 4300 }, { "epoch": 0.07793129479410814, "grad_norm": 39.5625, "learning_rate": 9.987823255829723e-06, "loss": 17.4248, "step": 4310 }, { "epoch": 0.078112109863236, "grad_norm": 39.09375, "learning_rate": 9.987795003523063e-06, "loss": 17.5838, "step": 4320 }, { "epoch": 0.07829292493236387, "grad_norm": 40.125, "learning_rate": 9.987766751216404e-06, "loss": 17.3888, "step": 4330 }, { "epoch": 0.07847374000149172, "grad_norm": 43.3125, "learning_rate": 9.987738498909745e-06, "loss": 17.7687, "step": 4340 }, { "epoch": 0.07865455507061958, "grad_norm": 43.1875, "learning_rate": 9.987710246603085e-06, "loss": 17.5069, "step": 4350 }, { "epoch": 0.07883537013974745, "grad_norm": 38.375, "learning_rate": 9.987681994296426e-06, "loss": 17.6491, "step": 4360 }, { "epoch": 0.07901618520887531, "grad_norm": 39.75, "learning_rate": 9.987653741989766e-06, "loss": 17.67, "step": 4370 }, { "epoch": 0.07919700027800317, "grad_norm": 41.40625, "learning_rate": 9.987625489683105e-06, "loss": 17.4607, "step": 4380 }, { "epoch": 0.07937781534713104, "grad_norm": 41.09375, "learning_rate": 9.987597237376446e-06, "loss": 17.0653, "step": 4390 }, { "epoch": 0.0795586304162589, "grad_norm": 39.4375, "learning_rate": 9.987568985069787e-06, "loss": 17.7432, "step": 4400 }, { "epoch": 0.07973944548538675, "grad_norm": 40.34375, "learning_rate": 9.987540732763127e-06, "loss": 17.5271, "step": 4410 }, { "epoch": 0.07992026055451461, "grad_norm": 43.625, "learning_rate": 9.987512480456468e-06, "loss": 17.3105, "step": 4420 }, { "epoch": 0.08010107562364248, "grad_norm": 37.25, "learning_rate": 9.987484228149808e-06, "loss": 17.6018, "step": 4430 }, { "epoch": 0.08028189069277034, "grad_norm": 43.4375, "learning_rate": 9.987455975843149e-06, "loss": 17.2744, "step": 4440 }, { "epoch": 0.08046270576189819, "grad_norm": 37.03125, "learning_rate": 9.98742772353649e-06, "loss": 17.3233, "step": 4450 }, { "epoch": 0.08064352083102606, "grad_norm": 39.6875, "learning_rate": 9.98739947122983e-06, "loss": 17.4399, "step": 4460 }, { "epoch": 0.08082433590015392, "grad_norm": 40.5, "learning_rate": 9.98737121892317e-06, "loss": 16.9782, "step": 4470 }, { "epoch": 0.08100515096928178, "grad_norm": 38.1875, "learning_rate": 9.98734296661651e-06, "loss": 16.7517, "step": 4480 }, { "epoch": 0.08118596603840963, "grad_norm": 39.96875, "learning_rate": 9.98731471430985e-06, "loss": 17.4957, "step": 4490 }, { "epoch": 0.0813667811075375, "grad_norm": 41.03125, "learning_rate": 9.987286462003191e-06, "loss": 17.537, "step": 4500 }, { "epoch": 0.08154759617666536, "grad_norm": 40.09375, "learning_rate": 9.987258209696532e-06, "loss": 17.3668, "step": 4510 }, { "epoch": 0.08172841124579322, "grad_norm": 41.09375, "learning_rate": 9.987229957389872e-06, "loss": 17.4374, "step": 4520 }, { "epoch": 0.08190922631492109, "grad_norm": 43.90625, "learning_rate": 9.987201705083213e-06, "loss": 17.6475, "step": 4530 }, { "epoch": 0.08209004138404895, "grad_norm": 39.0625, "learning_rate": 9.987173452776554e-06, "loss": 17.6052, "step": 4540 }, { "epoch": 0.0822708564531768, "grad_norm": 36.59375, "learning_rate": 9.987145200469892e-06, "loss": 17.2227, "step": 4550 }, { "epoch": 0.08245167152230466, "grad_norm": 38.625, "learning_rate": 9.987116948163233e-06, "loss": 17.2557, "step": 4560 }, { "epoch": 0.08263248659143253, "grad_norm": 39.46875, "learning_rate": 9.987088695856574e-06, "loss": 17.3571, "step": 4570 }, { "epoch": 0.08281330166056039, "grad_norm": 43.625, "learning_rate": 9.987060443549914e-06, "loss": 17.6732, "step": 4580 }, { "epoch": 0.08299411672968825, "grad_norm": 38.9375, "learning_rate": 9.987032191243255e-06, "loss": 17.7018, "step": 4590 }, { "epoch": 0.08317493179881612, "grad_norm": 41.875, "learning_rate": 9.987003938936596e-06, "loss": 17.6475, "step": 4600 }, { "epoch": 0.08335574686794398, "grad_norm": 40.90625, "learning_rate": 9.986975686629936e-06, "loss": 17.6004, "step": 4610 }, { "epoch": 0.08353656193707183, "grad_norm": 41.4375, "learning_rate": 9.986947434323277e-06, "loss": 17.108, "step": 4620 }, { "epoch": 0.0837173770061997, "grad_norm": 39.03125, "learning_rate": 9.986919182016617e-06, "loss": 17.4432, "step": 4630 }, { "epoch": 0.08389819207532756, "grad_norm": 39.5625, "learning_rate": 9.986890929709956e-06, "loss": 17.2823, "step": 4640 }, { "epoch": 0.08407900714445542, "grad_norm": 38.40625, "learning_rate": 9.986862677403297e-06, "loss": 17.3734, "step": 4650 }, { "epoch": 0.08425982221358327, "grad_norm": 41.15625, "learning_rate": 9.986834425096638e-06, "loss": 17.538, "step": 4660 }, { "epoch": 0.08444063728271115, "grad_norm": 38.46875, "learning_rate": 9.986806172789978e-06, "loss": 16.9801, "step": 4670 }, { "epoch": 0.084621452351839, "grad_norm": 39.53125, "learning_rate": 9.986777920483319e-06, "loss": 17.5482, "step": 4680 }, { "epoch": 0.08480226742096686, "grad_norm": 39.46875, "learning_rate": 9.98674966817666e-06, "loss": 17.2156, "step": 4690 }, { "epoch": 0.08498308249009473, "grad_norm": 37.34375, "learning_rate": 9.98672141587e-06, "loss": 17.5817, "step": 4700 }, { "epoch": 0.08516389755922259, "grad_norm": 39.0, "learning_rate": 9.98669316356334e-06, "loss": 17.4391, "step": 4710 }, { "epoch": 0.08534471262835044, "grad_norm": 39.71875, "learning_rate": 9.98666491125668e-06, "loss": 17.4249, "step": 4720 }, { "epoch": 0.0855255276974783, "grad_norm": 38.25, "learning_rate": 9.98663665895002e-06, "loss": 17.2424, "step": 4730 }, { "epoch": 0.08570634276660617, "grad_norm": 40.40625, "learning_rate": 9.986608406643361e-06, "loss": 17.6555, "step": 4740 }, { "epoch": 0.08588715783573403, "grad_norm": 39.625, "learning_rate": 9.986580154336702e-06, "loss": 17.662, "step": 4750 }, { "epoch": 0.08606797290486189, "grad_norm": 38.4375, "learning_rate": 9.986551902030042e-06, "loss": 17.5323, "step": 4760 }, { "epoch": 0.08624878797398976, "grad_norm": 39.28125, "learning_rate": 9.986523649723381e-06, "loss": 17.3838, "step": 4770 }, { "epoch": 0.08642960304311761, "grad_norm": 40.09375, "learning_rate": 9.986495397416723e-06, "loss": 17.4733, "step": 4780 }, { "epoch": 0.08661041811224547, "grad_norm": 39.1875, "learning_rate": 9.986467145110064e-06, "loss": 17.9583, "step": 4790 }, { "epoch": 0.08679123318137334, "grad_norm": 40.65625, "learning_rate": 9.986438892803405e-06, "loss": 17.6036, "step": 4800 }, { "epoch": 0.0869720482505012, "grad_norm": 41.4375, "learning_rate": 9.986410640496744e-06, "loss": 17.6934, "step": 4810 }, { "epoch": 0.08715286331962906, "grad_norm": 38.84375, "learning_rate": 9.986382388190084e-06, "loss": 17.2059, "step": 4820 }, { "epoch": 0.08733367838875691, "grad_norm": 37.75, "learning_rate": 9.986354135883425e-06, "loss": 17.1567, "step": 4830 }, { "epoch": 0.08751449345788478, "grad_norm": 42.1875, "learning_rate": 9.986325883576765e-06, "loss": 17.3249, "step": 4840 }, { "epoch": 0.08769530852701264, "grad_norm": 38.25, "learning_rate": 9.986297631270106e-06, "loss": 17.2945, "step": 4850 }, { "epoch": 0.0878761235961405, "grad_norm": 40.0625, "learning_rate": 9.986269378963447e-06, "loss": 17.2515, "step": 4860 }, { "epoch": 0.08805693866526837, "grad_norm": 38.9375, "learning_rate": 9.986241126656787e-06, "loss": 17.6603, "step": 4870 }, { "epoch": 0.08823775373439623, "grad_norm": 42.0625, "learning_rate": 9.986212874350128e-06, "loss": 17.6273, "step": 4880 }, { "epoch": 0.08841856880352408, "grad_norm": 37.9375, "learning_rate": 9.986184622043467e-06, "loss": 17.3321, "step": 4890 }, { "epoch": 0.08859938387265194, "grad_norm": 38.15625, "learning_rate": 9.986156369736808e-06, "loss": 17.5066, "step": 4900 }, { "epoch": 0.08878019894177981, "grad_norm": 40.125, "learning_rate": 9.986128117430148e-06, "loss": 17.2144, "step": 4910 }, { "epoch": 0.08896101401090767, "grad_norm": 39.6875, "learning_rate": 9.986099865123489e-06, "loss": 17.3687, "step": 4920 }, { "epoch": 0.08914182908003553, "grad_norm": 41.3125, "learning_rate": 9.98607161281683e-06, "loss": 17.7729, "step": 4930 }, { "epoch": 0.0893226441491634, "grad_norm": 38.0, "learning_rate": 9.98604336051017e-06, "loss": 17.2783, "step": 4940 }, { "epoch": 0.08950345921829125, "grad_norm": 37.96875, "learning_rate": 9.98601510820351e-06, "loss": 17.4247, "step": 4950 }, { "epoch": 0.08968427428741911, "grad_norm": 38.84375, "learning_rate": 9.985986855896851e-06, "loss": 17.406, "step": 4960 }, { "epoch": 0.08986508935654697, "grad_norm": 40.875, "learning_rate": 9.985958603590192e-06, "loss": 17.4239, "step": 4970 }, { "epoch": 0.09004590442567484, "grad_norm": 38.9375, "learning_rate": 9.98593035128353e-06, "loss": 17.6854, "step": 4980 }, { "epoch": 0.0902267194948027, "grad_norm": 39.03125, "learning_rate": 9.985902098976871e-06, "loss": 17.2287, "step": 4990 }, { "epoch": 0.09040753456393055, "grad_norm": 39.625, "learning_rate": 9.985873846670212e-06, "loss": 17.9483, "step": 5000 }, { "epoch": 0.09040753456393055, "eval_loss": 2.186025381088257, "eval_runtime": 229.8838, "eval_samples_per_second": 3158.374, "eval_steps_per_second": 49.351, "step": 5000 }, { "epoch": 0.09058834963305842, "grad_norm": 38.96875, "learning_rate": 9.985845594363553e-06, "loss": 17.5609, "step": 5010 }, { "epoch": 0.09076916470218628, "grad_norm": 39.28125, "learning_rate": 9.985817342056893e-06, "loss": 17.8373, "step": 5020 }, { "epoch": 0.09094997977131414, "grad_norm": 39.90625, "learning_rate": 9.985789089750232e-06, "loss": 17.0478, "step": 5030 }, { "epoch": 0.09113079484044201, "grad_norm": 43.09375, "learning_rate": 9.985760837443575e-06, "loss": 17.4107, "step": 5040 }, { "epoch": 0.09131160990956987, "grad_norm": 37.3125, "learning_rate": 9.985732585136915e-06, "loss": 17.773, "step": 5050 }, { "epoch": 0.09149242497869772, "grad_norm": 38.21875, "learning_rate": 9.985704332830256e-06, "loss": 16.9747, "step": 5060 }, { "epoch": 0.09167324004782558, "grad_norm": 39.78125, "learning_rate": 9.985676080523595e-06, "loss": 16.9955, "step": 5070 }, { "epoch": 0.09185405511695345, "grad_norm": 41.53125, "learning_rate": 9.985647828216935e-06, "loss": 17.2063, "step": 5080 }, { "epoch": 0.09203487018608131, "grad_norm": 39.90625, "learning_rate": 9.985619575910276e-06, "loss": 17.2967, "step": 5090 }, { "epoch": 0.09221568525520916, "grad_norm": 39.34375, "learning_rate": 9.985591323603617e-06, "loss": 17.641, "step": 5100 }, { "epoch": 0.09239650032433704, "grad_norm": 40.5625, "learning_rate": 9.985563071296957e-06, "loss": 17.6372, "step": 5110 }, { "epoch": 0.09257731539346489, "grad_norm": 39.96875, "learning_rate": 9.985534818990296e-06, "loss": 17.3905, "step": 5120 }, { "epoch": 0.09275813046259275, "grad_norm": 38.375, "learning_rate": 9.985506566683638e-06, "loss": 17.634, "step": 5130 }, { "epoch": 0.09293894553172061, "grad_norm": 39.9375, "learning_rate": 9.985478314376979e-06, "loss": 17.8938, "step": 5140 }, { "epoch": 0.09311976060084848, "grad_norm": 39.84375, "learning_rate": 9.985450062070318e-06, "loss": 17.6147, "step": 5150 }, { "epoch": 0.09330057566997634, "grad_norm": 39.0625, "learning_rate": 9.985421809763659e-06, "loss": 17.5877, "step": 5160 }, { "epoch": 0.09348139073910419, "grad_norm": 38.65625, "learning_rate": 9.985393557457e-06, "loss": 17.0614, "step": 5170 }, { "epoch": 0.09366220580823206, "grad_norm": 41.8125, "learning_rate": 9.98536530515034e-06, "loss": 17.3577, "step": 5180 }, { "epoch": 0.09384302087735992, "grad_norm": 42.65625, "learning_rate": 9.98533705284368e-06, "loss": 17.6818, "step": 5190 }, { "epoch": 0.09402383594648778, "grad_norm": 41.125, "learning_rate": 9.98530880053702e-06, "loss": 17.375, "step": 5200 }, { "epoch": 0.09420465101561565, "grad_norm": 39.90625, "learning_rate": 9.985280548230362e-06, "loss": 17.1533, "step": 5210 }, { "epoch": 0.0943854660847435, "grad_norm": 37.84375, "learning_rate": 9.985252295923702e-06, "loss": 17.5257, "step": 5220 }, { "epoch": 0.09456628115387136, "grad_norm": 41.53125, "learning_rate": 9.985224043617043e-06, "loss": 17.6709, "step": 5230 }, { "epoch": 0.09474709622299922, "grad_norm": 41.71875, "learning_rate": 9.985195791310382e-06, "loss": 17.9804, "step": 5240 }, { "epoch": 0.09492791129212709, "grad_norm": 39.6875, "learning_rate": 9.985167539003723e-06, "loss": 17.0051, "step": 5250 }, { "epoch": 0.09510872636125495, "grad_norm": 39.0, "learning_rate": 9.985139286697063e-06, "loss": 17.7626, "step": 5260 }, { "epoch": 0.0952895414303828, "grad_norm": 39.0, "learning_rate": 9.985111034390404e-06, "loss": 17.0476, "step": 5270 }, { "epoch": 0.09547035649951068, "grad_norm": 40.15625, "learning_rate": 9.985082782083744e-06, "loss": 17.5495, "step": 5280 }, { "epoch": 0.09565117156863853, "grad_norm": 41.78125, "learning_rate": 9.985054529777083e-06, "loss": 17.558, "step": 5290 }, { "epoch": 0.09583198663776639, "grad_norm": 36.9375, "learning_rate": 9.985026277470426e-06, "loss": 17.4099, "step": 5300 }, { "epoch": 0.09601280170689425, "grad_norm": 43.0625, "learning_rate": 9.984998025163766e-06, "loss": 17.2957, "step": 5310 }, { "epoch": 0.09619361677602212, "grad_norm": 39.71875, "learning_rate": 9.984969772857105e-06, "loss": 17.5661, "step": 5320 }, { "epoch": 0.09637443184514997, "grad_norm": 41.65625, "learning_rate": 9.984941520550446e-06, "loss": 17.8033, "step": 5330 }, { "epoch": 0.09655524691427783, "grad_norm": 42.1875, "learning_rate": 9.984913268243786e-06, "loss": 17.4909, "step": 5340 }, { "epoch": 0.0967360619834057, "grad_norm": 38.46875, "learning_rate": 9.984885015937127e-06, "loss": 17.7694, "step": 5350 }, { "epoch": 0.09691687705253356, "grad_norm": 39.5, "learning_rate": 9.984856763630468e-06, "loss": 17.4628, "step": 5360 }, { "epoch": 0.09709769212166142, "grad_norm": 40.75, "learning_rate": 9.984828511323808e-06, "loss": 17.667, "step": 5370 }, { "epoch": 0.09727850719078927, "grad_norm": 40.0625, "learning_rate": 9.984800259017147e-06, "loss": 17.372, "step": 5380 }, { "epoch": 0.09745932225991714, "grad_norm": 41.0, "learning_rate": 9.98477200671049e-06, "loss": 17.2629, "step": 5390 }, { "epoch": 0.097640137329045, "grad_norm": 39.5, "learning_rate": 9.98474375440383e-06, "loss": 17.4202, "step": 5400 }, { "epoch": 0.09782095239817286, "grad_norm": 40.03125, "learning_rate": 9.984715502097169e-06, "loss": 17.287, "step": 5410 }, { "epoch": 0.09800176746730073, "grad_norm": 43.90625, "learning_rate": 9.98468724979051e-06, "loss": 17.6449, "step": 5420 }, { "epoch": 0.09818258253642859, "grad_norm": 40.5625, "learning_rate": 9.98465899748385e-06, "loss": 16.9249, "step": 5430 }, { "epoch": 0.09836339760555644, "grad_norm": 39.375, "learning_rate": 9.984630745177191e-06, "loss": 17.5399, "step": 5440 }, { "epoch": 0.09854421267468431, "grad_norm": 38.96875, "learning_rate": 9.984602492870532e-06, "loss": 17.6102, "step": 5450 }, { "epoch": 0.09872502774381217, "grad_norm": 39.1875, "learning_rate": 9.98457424056387e-06, "loss": 17.4345, "step": 5460 }, { "epoch": 0.09890584281294003, "grad_norm": 41.65625, "learning_rate": 9.984545988257211e-06, "loss": 17.5696, "step": 5470 }, { "epoch": 0.09908665788206789, "grad_norm": 41.75, "learning_rate": 9.984517735950553e-06, "loss": 17.2881, "step": 5480 }, { "epoch": 0.09926747295119576, "grad_norm": 39.4375, "learning_rate": 9.984489483643894e-06, "loss": 17.1251, "step": 5490 }, { "epoch": 0.09944828802032361, "grad_norm": 43.1875, "learning_rate": 9.984461231337233e-06, "loss": 17.6069, "step": 5500 }, { "epoch": 0.09962910308945147, "grad_norm": 40.8125, "learning_rate": 9.984432979030574e-06, "loss": 17.2116, "step": 5510 }, { "epoch": 0.09980991815857934, "grad_norm": 42.84375, "learning_rate": 9.984404726723914e-06, "loss": 17.4008, "step": 5520 }, { "epoch": 0.0999907332277072, "grad_norm": 40.09375, "learning_rate": 9.984376474417255e-06, "loss": 17.622, "step": 5530 }, { "epoch": 0.10017154829683506, "grad_norm": 41.125, "learning_rate": 9.984348222110595e-06, "loss": 17.6973, "step": 5540 }, { "epoch": 0.10035236336596291, "grad_norm": 42.34375, "learning_rate": 9.984319969803934e-06, "loss": 17.1871, "step": 5550 }, { "epoch": 0.10053317843509078, "grad_norm": 40.40625, "learning_rate": 9.984291717497277e-06, "loss": 17.496, "step": 5560 }, { "epoch": 0.10071399350421864, "grad_norm": 41.71875, "learning_rate": 9.984263465190617e-06, "loss": 17.375, "step": 5570 }, { "epoch": 0.1008948085733465, "grad_norm": 41.625, "learning_rate": 9.984235212883956e-06, "loss": 17.9584, "step": 5580 }, { "epoch": 0.10107562364247437, "grad_norm": 39.875, "learning_rate": 9.984206960577297e-06, "loss": 17.8407, "step": 5590 }, { "epoch": 0.10125643871160223, "grad_norm": 41.84375, "learning_rate": 9.984178708270638e-06, "loss": 17.1891, "step": 5600 }, { "epoch": 0.10143725378073008, "grad_norm": 40.5, "learning_rate": 9.984150455963978e-06, "loss": 17.3538, "step": 5610 }, { "epoch": 0.10161806884985795, "grad_norm": 39.09375, "learning_rate": 9.984122203657319e-06, "loss": 17.3117, "step": 5620 }, { "epoch": 0.10179888391898581, "grad_norm": 37.90625, "learning_rate": 9.984093951350658e-06, "loss": 17.2763, "step": 5630 }, { "epoch": 0.10197969898811367, "grad_norm": 40.46875, "learning_rate": 9.984065699043998e-06, "loss": 17.175, "step": 5640 }, { "epoch": 0.10216051405724152, "grad_norm": 37.46875, "learning_rate": 9.98403744673734e-06, "loss": 17.5182, "step": 5650 }, { "epoch": 0.1023413291263694, "grad_norm": 39.03125, "learning_rate": 9.984009194430681e-06, "loss": 17.2687, "step": 5660 }, { "epoch": 0.10252214419549725, "grad_norm": 39.875, "learning_rate": 9.98398094212402e-06, "loss": 17.5617, "step": 5670 }, { "epoch": 0.10270295926462511, "grad_norm": 40.375, "learning_rate": 9.98395268981736e-06, "loss": 17.4767, "step": 5680 }, { "epoch": 0.10288377433375298, "grad_norm": 40.40625, "learning_rate": 9.983924437510701e-06, "loss": 17.7439, "step": 5690 }, { "epoch": 0.10306458940288084, "grad_norm": 40.34375, "learning_rate": 9.983896185204042e-06, "loss": 17.0064, "step": 5700 }, { "epoch": 0.1032454044720087, "grad_norm": 39.90625, "learning_rate": 9.983867932897383e-06, "loss": 17.699, "step": 5710 }, { "epoch": 0.10342621954113655, "grad_norm": 38.4375, "learning_rate": 9.983839680590722e-06, "loss": 17.5337, "step": 5720 }, { "epoch": 0.10360703461026442, "grad_norm": 38.375, "learning_rate": 9.983811428284062e-06, "loss": 17.6822, "step": 5730 }, { "epoch": 0.10378784967939228, "grad_norm": 38.90625, "learning_rate": 9.983783175977405e-06, "loss": 17.4954, "step": 5740 }, { "epoch": 0.10396866474852014, "grad_norm": 42.28125, "learning_rate": 9.983754923670743e-06, "loss": 17.6689, "step": 5750 }, { "epoch": 0.10414947981764801, "grad_norm": 40.75, "learning_rate": 9.983726671364084e-06, "loss": 17.2288, "step": 5760 }, { "epoch": 0.10433029488677587, "grad_norm": 41.59375, "learning_rate": 9.983698419057425e-06, "loss": 17.3645, "step": 5770 }, { "epoch": 0.10451110995590372, "grad_norm": 40.65625, "learning_rate": 9.983670166750765e-06, "loss": 17.2011, "step": 5780 }, { "epoch": 0.10469192502503159, "grad_norm": 40.65625, "learning_rate": 9.983641914444106e-06, "loss": 17.4304, "step": 5790 }, { "epoch": 0.10487274009415945, "grad_norm": 39.40625, "learning_rate": 9.983613662137447e-06, "loss": 17.1882, "step": 5800 }, { "epoch": 0.10505355516328731, "grad_norm": 40.46875, "learning_rate": 9.983585409830785e-06, "loss": 17.7, "step": 5810 }, { "epoch": 0.10523437023241516, "grad_norm": 41.25, "learning_rate": 9.983557157524126e-06, "loss": 17.4315, "step": 5820 }, { "epoch": 0.10541518530154304, "grad_norm": 40.75, "learning_rate": 9.983528905217468e-06, "loss": 17.2083, "step": 5830 }, { "epoch": 0.10559600037067089, "grad_norm": 39.40625, "learning_rate": 9.983500652910807e-06, "loss": 17.4995, "step": 5840 }, { "epoch": 0.10577681543979875, "grad_norm": 41.59375, "learning_rate": 9.983472400604148e-06, "loss": 17.4644, "step": 5850 }, { "epoch": 0.10595763050892662, "grad_norm": 39.875, "learning_rate": 9.983444148297489e-06, "loss": 16.9171, "step": 5860 }, { "epoch": 0.10613844557805448, "grad_norm": 40.15625, "learning_rate": 9.98341589599083e-06, "loss": 17.2418, "step": 5870 }, { "epoch": 0.10631926064718233, "grad_norm": 41.6875, "learning_rate": 9.98338764368417e-06, "loss": 17.2949, "step": 5880 }, { "epoch": 0.10650007571631019, "grad_norm": 42.09375, "learning_rate": 9.983359391377509e-06, "loss": 17.4008, "step": 5890 }, { "epoch": 0.10668089078543806, "grad_norm": 40.59375, "learning_rate": 9.98333113907085e-06, "loss": 17.65, "step": 5900 }, { "epoch": 0.10686170585456592, "grad_norm": 38.59375, "learning_rate": 9.98330288676419e-06, "loss": 16.9491, "step": 5910 }, { "epoch": 0.10704252092369378, "grad_norm": 40.59375, "learning_rate": 9.983274634457532e-06, "loss": 17.8632, "step": 5920 }, { "epoch": 0.10722333599282165, "grad_norm": 39.875, "learning_rate": 9.983246382150871e-06, "loss": 17.4305, "step": 5930 }, { "epoch": 0.1074041510619495, "grad_norm": 40.5, "learning_rate": 9.983218129844212e-06, "loss": 17.4213, "step": 5940 }, { "epoch": 0.10758496613107736, "grad_norm": 39.25, "learning_rate": 9.983189877537553e-06, "loss": 17.216, "step": 5950 }, { "epoch": 0.10776578120020522, "grad_norm": 38.375, "learning_rate": 9.983161625230893e-06, "loss": 17.5143, "step": 5960 }, { "epoch": 0.10794659626933309, "grad_norm": 40.9375, "learning_rate": 9.983133372924234e-06, "loss": 17.9775, "step": 5970 }, { "epoch": 0.10812741133846095, "grad_norm": 40.09375, "learning_rate": 9.983105120617573e-06, "loss": 17.2851, "step": 5980 }, { "epoch": 0.1083082264075888, "grad_norm": 37.90625, "learning_rate": 9.983076868310913e-06, "loss": 17.2725, "step": 5990 }, { "epoch": 0.10848904147671667, "grad_norm": 36.4375, "learning_rate": 9.983048616004256e-06, "loss": 17.4561, "step": 6000 }, { "epoch": 0.10866985654584453, "grad_norm": 39.15625, "learning_rate": 9.983020363697595e-06, "loss": 17.251, "step": 6010 }, { "epoch": 0.10885067161497239, "grad_norm": 39.34375, "learning_rate": 9.982992111390935e-06, "loss": 17.4456, "step": 6020 }, { "epoch": 0.10903148668410026, "grad_norm": 41.3125, "learning_rate": 9.982963859084276e-06, "loss": 17.8138, "step": 6030 }, { "epoch": 0.10921230175322812, "grad_norm": 41.53125, "learning_rate": 9.982935606777616e-06, "loss": 17.5189, "step": 6040 }, { "epoch": 0.10939311682235597, "grad_norm": 40.59375, "learning_rate": 9.982907354470957e-06, "loss": 17.5637, "step": 6050 }, { "epoch": 0.10957393189148383, "grad_norm": 42.21875, "learning_rate": 9.982879102164296e-06, "loss": 17.297, "step": 6060 }, { "epoch": 0.1097547469606117, "grad_norm": 39.03125, "learning_rate": 9.982850849857637e-06, "loss": 17.6551, "step": 6070 }, { "epoch": 0.10993556202973956, "grad_norm": 37.84375, "learning_rate": 9.982822597550977e-06, "loss": 17.2923, "step": 6080 }, { "epoch": 0.11011637709886742, "grad_norm": 40.25, "learning_rate": 9.98279434524432e-06, "loss": 17.8248, "step": 6090 }, { "epoch": 0.11029719216799529, "grad_norm": 40.84375, "learning_rate": 9.982766092937658e-06, "loss": 17.666, "step": 6100 }, { "epoch": 0.11047800723712314, "grad_norm": 42.9375, "learning_rate": 9.982737840630999e-06, "loss": 17.6284, "step": 6110 }, { "epoch": 0.110658822306251, "grad_norm": 40.5625, "learning_rate": 9.98270958832434e-06, "loss": 16.9809, "step": 6120 }, { "epoch": 0.11083963737537886, "grad_norm": 42.5625, "learning_rate": 9.98268133601768e-06, "loss": 17.6463, "step": 6130 }, { "epoch": 0.11102045244450673, "grad_norm": 41.46875, "learning_rate": 9.982653083711021e-06, "loss": 17.5154, "step": 6140 }, { "epoch": 0.11120126751363459, "grad_norm": 39.40625, "learning_rate": 9.98262483140436e-06, "loss": 17.27, "step": 6150 }, { "epoch": 0.11138208258276244, "grad_norm": 39.96875, "learning_rate": 9.9825965790977e-06, "loss": 17.2718, "step": 6160 }, { "epoch": 0.11156289765189031, "grad_norm": 39.78125, "learning_rate": 9.982568326791041e-06, "loss": 17.3054, "step": 6170 }, { "epoch": 0.11174371272101817, "grad_norm": 37.53125, "learning_rate": 9.982540074484382e-06, "loss": 17.0941, "step": 6180 }, { "epoch": 0.11192452779014603, "grad_norm": 40.9375, "learning_rate": 9.982511822177722e-06, "loss": 16.9333, "step": 6190 }, { "epoch": 0.1121053428592739, "grad_norm": 41.40625, "learning_rate": 9.982483569871063e-06, "loss": 17.4357, "step": 6200 }, { "epoch": 0.11228615792840176, "grad_norm": 37.4375, "learning_rate": 9.982455317564404e-06, "loss": 17.1446, "step": 6210 }, { "epoch": 0.11246697299752961, "grad_norm": 40.0, "learning_rate": 9.982427065257744e-06, "loss": 17.581, "step": 6220 }, { "epoch": 0.11264778806665747, "grad_norm": 40.78125, "learning_rate": 9.982398812951085e-06, "loss": 17.6953, "step": 6230 }, { "epoch": 0.11282860313578534, "grad_norm": 42.96875, "learning_rate": 9.982370560644424e-06, "loss": 17.8289, "step": 6240 }, { "epoch": 0.1130094182049132, "grad_norm": 41.375, "learning_rate": 9.982342308337764e-06, "loss": 17.8248, "step": 6250 }, { "epoch": 0.11319023327404105, "grad_norm": 41.34375, "learning_rate": 9.982314056031105e-06, "loss": 17.7385, "step": 6260 }, { "epoch": 0.11337104834316893, "grad_norm": 39.9375, "learning_rate": 9.982285803724446e-06, "loss": 17.1312, "step": 6270 }, { "epoch": 0.11355186341229678, "grad_norm": 37.65625, "learning_rate": 9.982257551417786e-06, "loss": 17.5599, "step": 6280 }, { "epoch": 0.11373267848142464, "grad_norm": 37.71875, "learning_rate": 9.982229299111127e-06, "loss": 17.3697, "step": 6290 }, { "epoch": 0.1139134935505525, "grad_norm": 42.46875, "learning_rate": 9.982201046804468e-06, "loss": 17.8049, "step": 6300 }, { "epoch": 0.11409430861968037, "grad_norm": 40.0625, "learning_rate": 9.982172794497808e-06, "loss": 17.5022, "step": 6310 }, { "epoch": 0.11427512368880823, "grad_norm": 42.5, "learning_rate": 9.982144542191147e-06, "loss": 17.1551, "step": 6320 }, { "epoch": 0.11445593875793608, "grad_norm": 39.125, "learning_rate": 9.982116289884488e-06, "loss": 17.5444, "step": 6330 }, { "epoch": 0.11463675382706395, "grad_norm": 39.8125, "learning_rate": 9.982088037577828e-06, "loss": 16.926, "step": 6340 }, { "epoch": 0.11481756889619181, "grad_norm": 41.40625, "learning_rate": 9.98205978527117e-06, "loss": 17.1707, "step": 6350 }, { "epoch": 0.11499838396531967, "grad_norm": 38.71875, "learning_rate": 9.98203153296451e-06, "loss": 17.3683, "step": 6360 }, { "epoch": 0.11517919903444752, "grad_norm": 41.25, "learning_rate": 9.98200328065785e-06, "loss": 17.3285, "step": 6370 }, { "epoch": 0.1153600141035754, "grad_norm": 41.75, "learning_rate": 9.98197502835119e-06, "loss": 17.3364, "step": 6380 }, { "epoch": 0.11554082917270325, "grad_norm": 40.5, "learning_rate": 9.981946776044531e-06, "loss": 17.5414, "step": 6390 }, { "epoch": 0.11572164424183111, "grad_norm": 40.625, "learning_rate": 9.981918523737872e-06, "loss": 17.6574, "step": 6400 }, { "epoch": 0.11590245931095898, "grad_norm": 40.75, "learning_rate": 9.981890271431211e-06, "loss": 17.3751, "step": 6410 }, { "epoch": 0.11608327438008684, "grad_norm": 38.53125, "learning_rate": 9.981862019124552e-06, "loss": 16.8837, "step": 6420 }, { "epoch": 0.1162640894492147, "grad_norm": 41.625, "learning_rate": 9.981833766817892e-06, "loss": 17.3399, "step": 6430 }, { "epoch": 0.11644490451834257, "grad_norm": 41.375, "learning_rate": 9.981805514511233e-06, "loss": 17.4723, "step": 6440 }, { "epoch": 0.11662571958747042, "grad_norm": 38.75, "learning_rate": 9.981777262204573e-06, "loss": 17.4737, "step": 6450 }, { "epoch": 0.11680653465659828, "grad_norm": 38.96875, "learning_rate": 9.981749009897914e-06, "loss": 17.5064, "step": 6460 }, { "epoch": 0.11698734972572614, "grad_norm": 37.75, "learning_rate": 9.981720757591255e-06, "loss": 17.5465, "step": 6470 }, { "epoch": 0.11716816479485401, "grad_norm": 41.84375, "learning_rate": 9.981692505284595e-06, "loss": 17.2815, "step": 6480 }, { "epoch": 0.11734897986398186, "grad_norm": 41.25, "learning_rate": 9.981664252977934e-06, "loss": 17.0824, "step": 6490 }, { "epoch": 0.11752979493310972, "grad_norm": 40.78125, "learning_rate": 9.981636000671275e-06, "loss": 17.3115, "step": 6500 }, { "epoch": 0.11771061000223759, "grad_norm": 39.625, "learning_rate": 9.981607748364616e-06, "loss": 17.4792, "step": 6510 }, { "epoch": 0.11789142507136545, "grad_norm": 39.09375, "learning_rate": 9.981579496057956e-06, "loss": 17.1819, "step": 6520 }, { "epoch": 0.1180722401404933, "grad_norm": 37.71875, "learning_rate": 9.981551243751297e-06, "loss": 17.3972, "step": 6530 }, { "epoch": 0.11825305520962116, "grad_norm": 38.34375, "learning_rate": 9.981522991444637e-06, "loss": 17.372, "step": 6540 }, { "epoch": 0.11843387027874903, "grad_norm": 39.28125, "learning_rate": 9.981494739137978e-06, "loss": 17.452, "step": 6550 }, { "epoch": 0.11861468534787689, "grad_norm": 41.28125, "learning_rate": 9.981466486831319e-06, "loss": 17.7057, "step": 6560 }, { "epoch": 0.11879550041700475, "grad_norm": 40.375, "learning_rate": 9.98143823452466e-06, "loss": 17.2223, "step": 6570 }, { "epoch": 0.11897631548613262, "grad_norm": 41.78125, "learning_rate": 9.981409982217998e-06, "loss": 17.7114, "step": 6580 }, { "epoch": 0.11915713055526048, "grad_norm": 38.75, "learning_rate": 9.981381729911339e-06, "loss": 17.115, "step": 6590 }, { "epoch": 0.11933794562438833, "grad_norm": 38.3125, "learning_rate": 9.98135347760468e-06, "loss": 17.3579, "step": 6600 }, { "epoch": 0.1195187606935162, "grad_norm": 42.71875, "learning_rate": 9.98132522529802e-06, "loss": 17.7693, "step": 6610 }, { "epoch": 0.11969957576264406, "grad_norm": 39.25, "learning_rate": 9.98129697299136e-06, "loss": 17.7085, "step": 6620 }, { "epoch": 0.11988039083177192, "grad_norm": 39.09375, "learning_rate": 9.981268720684701e-06, "loss": 17.2631, "step": 6630 }, { "epoch": 0.12006120590089978, "grad_norm": 40.78125, "learning_rate": 9.981240468378042e-06, "loss": 17.6135, "step": 6640 }, { "epoch": 0.12024202097002765, "grad_norm": 38.34375, "learning_rate": 9.981212216071383e-06, "loss": 17.5299, "step": 6650 }, { "epoch": 0.1204228360391555, "grad_norm": 40.09375, "learning_rate": 9.981183963764723e-06, "loss": 17.2463, "step": 6660 }, { "epoch": 0.12060365110828336, "grad_norm": 39.75, "learning_rate": 9.981155711458062e-06, "loss": 17.5251, "step": 6670 }, { "epoch": 0.12078446617741123, "grad_norm": 38.8125, "learning_rate": 9.981127459151403e-06, "loss": 17.2288, "step": 6680 }, { "epoch": 0.12096528124653909, "grad_norm": 43.625, "learning_rate": 9.981099206844743e-06, "loss": 17.6455, "step": 6690 }, { "epoch": 0.12114609631566695, "grad_norm": 38.875, "learning_rate": 9.981070954538084e-06, "loss": 17.074, "step": 6700 }, { "epoch": 0.1213269113847948, "grad_norm": 41.8125, "learning_rate": 9.981042702231425e-06, "loss": 17.5405, "step": 6710 }, { "epoch": 0.12150772645392267, "grad_norm": 40.6875, "learning_rate": 9.981014449924765e-06, "loss": 17.4071, "step": 6720 }, { "epoch": 0.12168854152305053, "grad_norm": 39.75, "learning_rate": 9.980986197618106e-06, "loss": 17.665, "step": 6730 }, { "epoch": 0.12186935659217839, "grad_norm": 39.625, "learning_rate": 9.980957945311446e-06, "loss": 17.5599, "step": 6740 }, { "epoch": 0.12205017166130626, "grad_norm": 39.5625, "learning_rate": 9.980929693004785e-06, "loss": 17.3555, "step": 6750 }, { "epoch": 0.12223098673043412, "grad_norm": 38.625, "learning_rate": 9.980901440698126e-06, "loss": 17.6027, "step": 6760 }, { "epoch": 0.12241180179956197, "grad_norm": 37.125, "learning_rate": 9.980873188391467e-06, "loss": 17.5312, "step": 6770 }, { "epoch": 0.12259261686868983, "grad_norm": 41.4375, "learning_rate": 9.980844936084807e-06, "loss": 17.3242, "step": 6780 }, { "epoch": 0.1227734319378177, "grad_norm": 39.375, "learning_rate": 9.980816683778148e-06, "loss": 17.6528, "step": 6790 }, { "epoch": 0.12295424700694556, "grad_norm": 41.71875, "learning_rate": 9.980788431471488e-06, "loss": 17.2223, "step": 6800 }, { "epoch": 0.12313506207607341, "grad_norm": 40.0, "learning_rate": 9.980760179164829e-06, "loss": 17.4004, "step": 6810 }, { "epoch": 0.12331587714520129, "grad_norm": 41.09375, "learning_rate": 9.98073192685817e-06, "loss": 17.1266, "step": 6820 }, { "epoch": 0.12349669221432914, "grad_norm": 40.125, "learning_rate": 9.98070367455151e-06, "loss": 17.7438, "step": 6830 }, { "epoch": 0.123677507283457, "grad_norm": 37.96875, "learning_rate": 9.98067542224485e-06, "loss": 17.5249, "step": 6840 }, { "epoch": 0.12385832235258487, "grad_norm": 41.78125, "learning_rate": 9.98064716993819e-06, "loss": 17.5363, "step": 6850 }, { "epoch": 0.12403913742171273, "grad_norm": 39.8125, "learning_rate": 9.98061891763153e-06, "loss": 17.8214, "step": 6860 }, { "epoch": 0.12421995249084058, "grad_norm": 39.53125, "learning_rate": 9.980590665324871e-06, "loss": 17.4741, "step": 6870 }, { "epoch": 0.12440076755996844, "grad_norm": 41.5625, "learning_rate": 9.980562413018212e-06, "loss": 17.3098, "step": 6880 }, { "epoch": 0.12458158262909631, "grad_norm": 38.9375, "learning_rate": 9.980534160711552e-06, "loss": 17.4718, "step": 6890 }, { "epoch": 0.12476239769822417, "grad_norm": 39.40625, "learning_rate": 9.980505908404893e-06, "loss": 17.3257, "step": 6900 }, { "epoch": 0.12494321276735203, "grad_norm": 40.84375, "learning_rate": 9.980477656098234e-06, "loss": 17.6889, "step": 6910 }, { "epoch": 0.12512402783647988, "grad_norm": 42.15625, "learning_rate": 9.980449403791573e-06, "loss": 17.4467, "step": 6920 }, { "epoch": 0.12530484290560776, "grad_norm": 42.0625, "learning_rate": 9.980421151484913e-06, "loss": 17.0792, "step": 6930 }, { "epoch": 0.12548565797473563, "grad_norm": 40.75, "learning_rate": 9.980392899178254e-06, "loss": 17.2743, "step": 6940 }, { "epoch": 0.12566647304386347, "grad_norm": 42.0625, "learning_rate": 9.980364646871594e-06, "loss": 17.3742, "step": 6950 }, { "epoch": 0.12584728811299134, "grad_norm": 40.6875, "learning_rate": 9.980336394564935e-06, "loss": 17.4063, "step": 6960 }, { "epoch": 0.1260281031821192, "grad_norm": 38.96875, "learning_rate": 9.980308142258276e-06, "loss": 17.4938, "step": 6970 }, { "epoch": 0.12620891825124705, "grad_norm": 39.71875, "learning_rate": 9.980279889951616e-06, "loss": 17.3211, "step": 6980 }, { "epoch": 0.12638973332037493, "grad_norm": 39.125, "learning_rate": 9.980251637644957e-06, "loss": 17.2774, "step": 6990 }, { "epoch": 0.12657054838950277, "grad_norm": 43.125, "learning_rate": 9.980223385338298e-06, "loss": 17.5812, "step": 7000 }, { "epoch": 0.12675136345863064, "grad_norm": 40.75, "learning_rate": 9.980195133031636e-06, "loss": 17.6814, "step": 7010 }, { "epoch": 0.1269321785277585, "grad_norm": 36.75, "learning_rate": 9.980166880724977e-06, "loss": 17.2395, "step": 7020 }, { "epoch": 0.12711299359688635, "grad_norm": 40.15625, "learning_rate": 9.980138628418318e-06, "loss": 17.2053, "step": 7030 }, { "epoch": 0.12729380866601422, "grad_norm": 41.3125, "learning_rate": 9.980110376111658e-06, "loss": 17.4385, "step": 7040 }, { "epoch": 0.1274746237351421, "grad_norm": 42.375, "learning_rate": 9.980082123804999e-06, "loss": 17.9981, "step": 7050 }, { "epoch": 0.12765543880426994, "grad_norm": 41.9375, "learning_rate": 9.98005387149834e-06, "loss": 17.6429, "step": 7060 }, { "epoch": 0.1278362538733978, "grad_norm": 42.5625, "learning_rate": 9.98002561919168e-06, "loss": 17.5678, "step": 7070 }, { "epoch": 0.12801706894252568, "grad_norm": 39.5625, "learning_rate": 9.97999736688502e-06, "loss": 17.5928, "step": 7080 }, { "epoch": 0.12819788401165352, "grad_norm": 42.46875, "learning_rate": 9.979969114578361e-06, "loss": 17.4203, "step": 7090 }, { "epoch": 0.1283786990807814, "grad_norm": 43.09375, "learning_rate": 9.9799408622717e-06, "loss": 17.4189, "step": 7100 }, { "epoch": 0.12855951414990927, "grad_norm": 40.625, "learning_rate": 9.979912609965041e-06, "loss": 17.4796, "step": 7110 }, { "epoch": 0.1287403292190371, "grad_norm": 38.875, "learning_rate": 9.979884357658382e-06, "loss": 17.6867, "step": 7120 }, { "epoch": 0.12892114428816498, "grad_norm": 39.90625, "learning_rate": 9.979856105351722e-06, "loss": 17.7465, "step": 7130 }, { "epoch": 0.12910195935729285, "grad_norm": 39.125, "learning_rate": 9.979827853045063e-06, "loss": 17.5646, "step": 7140 }, { "epoch": 0.1292827744264207, "grad_norm": 38.46875, "learning_rate": 9.979799600738403e-06, "loss": 17.3168, "step": 7150 }, { "epoch": 0.12946358949554856, "grad_norm": 41.0, "learning_rate": 9.979771348431744e-06, "loss": 17.292, "step": 7160 }, { "epoch": 0.1296444045646764, "grad_norm": 42.75, "learning_rate": 9.979743096125085e-06, "loss": 17.3749, "step": 7170 }, { "epoch": 0.12982521963380428, "grad_norm": 39.96875, "learning_rate": 9.979714843818424e-06, "loss": 17.1644, "step": 7180 }, { "epoch": 0.13000603470293215, "grad_norm": 43.15625, "learning_rate": 9.979686591511764e-06, "loss": 17.7358, "step": 7190 }, { "epoch": 0.13018684977206, "grad_norm": 39.59375, "learning_rate": 9.979658339205105e-06, "loss": 17.423, "step": 7200 }, { "epoch": 0.13036766484118786, "grad_norm": 39.46875, "learning_rate": 9.979630086898446e-06, "loss": 17.3277, "step": 7210 }, { "epoch": 0.13054847991031573, "grad_norm": 40.9375, "learning_rate": 9.979601834591786e-06, "loss": 17.3163, "step": 7220 }, { "epoch": 0.13072929497944358, "grad_norm": 40.03125, "learning_rate": 9.979573582285127e-06, "loss": 17.2494, "step": 7230 }, { "epoch": 0.13091011004857145, "grad_norm": 42.1875, "learning_rate": 9.979545329978467e-06, "loss": 17.5062, "step": 7240 }, { "epoch": 0.13109092511769932, "grad_norm": 40.21875, "learning_rate": 9.979517077671808e-06, "loss": 17.5579, "step": 7250 }, { "epoch": 0.13127174018682716, "grad_norm": 40.25, "learning_rate": 9.979488825365149e-06, "loss": 17.6391, "step": 7260 }, { "epoch": 0.13145255525595503, "grad_norm": 39.78125, "learning_rate": 9.979460573058488e-06, "loss": 17.387, "step": 7270 }, { "epoch": 0.1316333703250829, "grad_norm": 41.9375, "learning_rate": 9.979432320751828e-06, "loss": 17.5774, "step": 7280 }, { "epoch": 0.13181418539421075, "grad_norm": 41.125, "learning_rate": 9.979404068445169e-06, "loss": 17.1301, "step": 7290 }, { "epoch": 0.13199500046333862, "grad_norm": 41.15625, "learning_rate": 9.97937581613851e-06, "loss": 17.3479, "step": 7300 }, { "epoch": 0.1321758155324665, "grad_norm": 39.1875, "learning_rate": 9.97934756383185e-06, "loss": 17.3513, "step": 7310 }, { "epoch": 0.13235663060159433, "grad_norm": 41.75, "learning_rate": 9.97931931152519e-06, "loss": 16.835, "step": 7320 }, { "epoch": 0.1325374456707222, "grad_norm": 40.0625, "learning_rate": 9.979291059218531e-06, "loss": 17.5654, "step": 7330 }, { "epoch": 0.13271826073985005, "grad_norm": 39.6875, "learning_rate": 9.979262806911872e-06, "loss": 17.58, "step": 7340 }, { "epoch": 0.13289907580897792, "grad_norm": 40.90625, "learning_rate": 9.97923455460521e-06, "loss": 17.6938, "step": 7350 }, { "epoch": 0.1330798908781058, "grad_norm": 39.90625, "learning_rate": 9.979206302298551e-06, "loss": 16.8821, "step": 7360 }, { "epoch": 0.13326070594723363, "grad_norm": 39.15625, "learning_rate": 9.979178049991892e-06, "loss": 17.4715, "step": 7370 }, { "epoch": 0.1334415210163615, "grad_norm": 41.09375, "learning_rate": 9.979149797685233e-06, "loss": 17.1391, "step": 7380 }, { "epoch": 0.13362233608548937, "grad_norm": 38.96875, "learning_rate": 9.979121545378573e-06, "loss": 17.4993, "step": 7390 }, { "epoch": 0.13380315115461722, "grad_norm": 40.90625, "learning_rate": 9.979093293071914e-06, "loss": 17.5661, "step": 7400 }, { "epoch": 0.1339839662237451, "grad_norm": 40.09375, "learning_rate": 9.979065040765255e-06, "loss": 17.7374, "step": 7410 }, { "epoch": 0.13416478129287296, "grad_norm": 42.15625, "learning_rate": 9.979036788458595e-06, "loss": 17.6661, "step": 7420 }, { "epoch": 0.1343455963620008, "grad_norm": 38.03125, "learning_rate": 9.979008536151936e-06, "loss": 16.8962, "step": 7430 }, { "epoch": 0.13452641143112867, "grad_norm": 41.15625, "learning_rate": 9.978980283845275e-06, "loss": 17.0925, "step": 7440 }, { "epoch": 0.13470722650025654, "grad_norm": 38.03125, "learning_rate": 9.978952031538615e-06, "loss": 17.3493, "step": 7450 }, { "epoch": 0.1348880415693844, "grad_norm": 40.75, "learning_rate": 9.978923779231956e-06, "loss": 17.4892, "step": 7460 }, { "epoch": 0.13506885663851226, "grad_norm": 40.3125, "learning_rate": 9.978895526925297e-06, "loss": 17.3092, "step": 7470 }, { "epoch": 0.1352496717076401, "grad_norm": 41.5, "learning_rate": 9.978867274618637e-06, "loss": 17.4271, "step": 7480 }, { "epoch": 0.13543048677676797, "grad_norm": 38.15625, "learning_rate": 9.978839022311978e-06, "loss": 17.9422, "step": 7490 }, { "epoch": 0.13561130184589584, "grad_norm": 39.90625, "learning_rate": 9.978810770005318e-06, "loss": 17.0924, "step": 7500 }, { "epoch": 0.1357921169150237, "grad_norm": 42.84375, "learning_rate": 9.978782517698659e-06, "loss": 17.3601, "step": 7510 }, { "epoch": 0.13597293198415156, "grad_norm": 39.3125, "learning_rate": 9.978754265391998e-06, "loss": 17.6276, "step": 7520 }, { "epoch": 0.13615374705327943, "grad_norm": 39.46875, "learning_rate": 9.978726013085339e-06, "loss": 17.1647, "step": 7530 }, { "epoch": 0.13633456212240727, "grad_norm": 40.625, "learning_rate": 9.97869776077868e-06, "loss": 17.5063, "step": 7540 }, { "epoch": 0.13651537719153514, "grad_norm": 40.0, "learning_rate": 9.97866950847202e-06, "loss": 17.4359, "step": 7550 }, { "epoch": 0.136696192260663, "grad_norm": 40.28125, "learning_rate": 9.97864125616536e-06, "loss": 17.7257, "step": 7560 }, { "epoch": 0.13687700732979086, "grad_norm": 41.8125, "learning_rate": 9.978613003858701e-06, "loss": 17.6415, "step": 7570 }, { "epoch": 0.13705782239891873, "grad_norm": 41.25, "learning_rate": 9.978584751552042e-06, "loss": 17.0992, "step": 7580 }, { "epoch": 0.1372386374680466, "grad_norm": 38.84375, "learning_rate": 9.978556499245382e-06, "loss": 17.3908, "step": 7590 }, { "epoch": 0.13741945253717444, "grad_norm": 39.09375, "learning_rate": 9.978528246938723e-06, "loss": 17.303, "step": 7600 }, { "epoch": 0.1376002676063023, "grad_norm": 39.40625, "learning_rate": 9.978499994632062e-06, "loss": 17.3098, "step": 7610 }, { "epoch": 0.13778108267543018, "grad_norm": 39.28125, "learning_rate": 9.978471742325403e-06, "loss": 17.2351, "step": 7620 }, { "epoch": 0.13796189774455803, "grad_norm": 39.8125, "learning_rate": 9.978443490018743e-06, "loss": 17.6227, "step": 7630 }, { "epoch": 0.1381427128136859, "grad_norm": 41.125, "learning_rate": 9.978415237712084e-06, "loss": 17.4154, "step": 7640 }, { "epoch": 0.13832352788281374, "grad_norm": 41.15625, "learning_rate": 9.978386985405424e-06, "loss": 17.1675, "step": 7650 }, { "epoch": 0.1385043429519416, "grad_norm": 39.875, "learning_rate": 9.978358733098763e-06, "loss": 17.3523, "step": 7660 }, { "epoch": 0.13868515802106948, "grad_norm": 40.625, "learning_rate": 9.978330480792106e-06, "loss": 17.9028, "step": 7670 }, { "epoch": 0.13886597309019733, "grad_norm": 40.1875, "learning_rate": 9.978302228485446e-06, "loss": 17.5639, "step": 7680 }, { "epoch": 0.1390467881593252, "grad_norm": 39.40625, "learning_rate": 9.978273976178787e-06, "loss": 17.2738, "step": 7690 }, { "epoch": 0.13922760322845307, "grad_norm": 40.1875, "learning_rate": 9.978245723872126e-06, "loss": 17.1962, "step": 7700 }, { "epoch": 0.1394084182975809, "grad_norm": 39.21875, "learning_rate": 9.978217471565466e-06, "loss": 17.376, "step": 7710 }, { "epoch": 0.13958923336670878, "grad_norm": 37.5, "learning_rate": 9.978189219258807e-06, "loss": 17.0739, "step": 7720 }, { "epoch": 0.13977004843583665, "grad_norm": 38.5, "learning_rate": 9.978160966952148e-06, "loss": 16.9097, "step": 7730 }, { "epoch": 0.1399508635049645, "grad_norm": 41.0625, "learning_rate": 9.978132714645488e-06, "loss": 17.4405, "step": 7740 }, { "epoch": 0.14013167857409237, "grad_norm": 43.0625, "learning_rate": 9.978104462338829e-06, "loss": 17.3024, "step": 7750 }, { "epoch": 0.14031249364322024, "grad_norm": 39.5625, "learning_rate": 9.97807621003217e-06, "loss": 17.0304, "step": 7760 }, { "epoch": 0.14049330871234808, "grad_norm": 41.3125, "learning_rate": 9.97804795772551e-06, "loss": 17.3244, "step": 7770 }, { "epoch": 0.14067412378147595, "grad_norm": 39.9375, "learning_rate": 9.978019705418849e-06, "loss": 17.3528, "step": 7780 }, { "epoch": 0.14085493885060382, "grad_norm": 39.40625, "learning_rate": 9.97799145311219e-06, "loss": 17.2244, "step": 7790 }, { "epoch": 0.14103575391973167, "grad_norm": 38.0625, "learning_rate": 9.97796320080553e-06, "loss": 16.99, "step": 7800 }, { "epoch": 0.14121656898885954, "grad_norm": 41.0625, "learning_rate": 9.977934948498871e-06, "loss": 17.0707, "step": 7810 }, { "epoch": 0.14139738405798738, "grad_norm": 40.28125, "learning_rate": 9.977906696192212e-06, "loss": 17.4142, "step": 7820 }, { "epoch": 0.14157819912711525, "grad_norm": 41.125, "learning_rate": 9.97787844388555e-06, "loss": 17.4165, "step": 7830 }, { "epoch": 0.14175901419624312, "grad_norm": 41.78125, "learning_rate": 9.977850191578893e-06, "loss": 17.5218, "step": 7840 }, { "epoch": 0.14193982926537096, "grad_norm": 38.90625, "learning_rate": 9.977821939272233e-06, "loss": 17.1668, "step": 7850 }, { "epoch": 0.14212064433449884, "grad_norm": 37.46875, "learning_rate": 9.977793686965574e-06, "loss": 16.9904, "step": 7860 }, { "epoch": 0.1423014594036267, "grad_norm": 37.21875, "learning_rate": 9.977765434658913e-06, "loss": 17.0856, "step": 7870 }, { "epoch": 0.14248227447275455, "grad_norm": 36.71875, "learning_rate": 9.977737182352254e-06, "loss": 17.1422, "step": 7880 }, { "epoch": 0.14266308954188242, "grad_norm": 42.21875, "learning_rate": 9.977708930045594e-06, "loss": 17.0785, "step": 7890 }, { "epoch": 0.1428439046110103, "grad_norm": 40.71875, "learning_rate": 9.977680677738935e-06, "loss": 17.4634, "step": 7900 }, { "epoch": 0.14302471968013813, "grad_norm": 42.5625, "learning_rate": 9.977652425432276e-06, "loss": 17.3942, "step": 7910 }, { "epoch": 0.143205534749266, "grad_norm": 43.75, "learning_rate": 9.977624173125614e-06, "loss": 17.7189, "step": 7920 }, { "epoch": 0.14338634981839388, "grad_norm": 41.53125, "learning_rate": 9.977595920818957e-06, "loss": 17.412, "step": 7930 }, { "epoch": 0.14356716488752172, "grad_norm": 42.875, "learning_rate": 9.977567668512297e-06, "loss": 17.591, "step": 7940 }, { "epoch": 0.1437479799566496, "grad_norm": 39.125, "learning_rate": 9.977539416205636e-06, "loss": 17.5684, "step": 7950 }, { "epoch": 0.14392879502577746, "grad_norm": 40.40625, "learning_rate": 9.977511163898977e-06, "loss": 17.3622, "step": 7960 }, { "epoch": 0.1441096100949053, "grad_norm": 40.53125, "learning_rate": 9.977482911592318e-06, "loss": 17.1179, "step": 7970 }, { "epoch": 0.14429042516403318, "grad_norm": 39.71875, "learning_rate": 9.977454659285658e-06, "loss": 17.5406, "step": 7980 }, { "epoch": 0.14447124023316102, "grad_norm": 42.40625, "learning_rate": 9.977426406978999e-06, "loss": 17.0921, "step": 7990 }, { "epoch": 0.1446520553022889, "grad_norm": 42.5625, "learning_rate": 9.97739815467234e-06, "loss": 17.3135, "step": 8000 }, { "epoch": 0.14483287037141676, "grad_norm": 40.5625, "learning_rate": 9.977369902365678e-06, "loss": 17.4295, "step": 8010 }, { "epoch": 0.1450136854405446, "grad_norm": 37.84375, "learning_rate": 9.97734165005902e-06, "loss": 17.4965, "step": 8020 }, { "epoch": 0.14519450050967248, "grad_norm": 39.5, "learning_rate": 9.977313397752361e-06, "loss": 17.6603, "step": 8030 }, { "epoch": 0.14537531557880035, "grad_norm": 40.25, "learning_rate": 9.9772851454457e-06, "loss": 16.9875, "step": 8040 }, { "epoch": 0.1455561306479282, "grad_norm": 42.625, "learning_rate": 9.977256893139041e-06, "loss": 17.1946, "step": 8050 }, { "epoch": 0.14573694571705606, "grad_norm": 40.46875, "learning_rate": 9.977228640832381e-06, "loss": 17.4784, "step": 8060 }, { "epoch": 0.14591776078618393, "grad_norm": 38.75, "learning_rate": 9.977200388525722e-06, "loss": 17.3109, "step": 8070 }, { "epoch": 0.14609857585531177, "grad_norm": 44.78125, "learning_rate": 9.977172136219063e-06, "loss": 17.7436, "step": 8080 }, { "epoch": 0.14627939092443965, "grad_norm": 42.53125, "learning_rate": 9.977143883912402e-06, "loss": 17.1768, "step": 8090 }, { "epoch": 0.14646020599356752, "grad_norm": 40.125, "learning_rate": 9.977115631605744e-06, "loss": 17.3665, "step": 8100 }, { "epoch": 0.14664102106269536, "grad_norm": 38.21875, "learning_rate": 9.977087379299085e-06, "loss": 17.5109, "step": 8110 }, { "epoch": 0.14682183613182323, "grad_norm": 39.15625, "learning_rate": 9.977059126992425e-06, "loss": 17.9137, "step": 8120 }, { "epoch": 0.1470026512009511, "grad_norm": 43.375, "learning_rate": 9.977030874685764e-06, "loss": 17.3533, "step": 8130 }, { "epoch": 0.14718346627007894, "grad_norm": 43.71875, "learning_rate": 9.977002622379105e-06, "loss": 17.4239, "step": 8140 }, { "epoch": 0.14736428133920682, "grad_norm": 39.4375, "learning_rate": 9.976974370072445e-06, "loss": 17.8833, "step": 8150 }, { "epoch": 0.14754509640833466, "grad_norm": 42.09375, "learning_rate": 9.976946117765786e-06, "loss": 17.5879, "step": 8160 }, { "epoch": 0.14772591147746253, "grad_norm": 40.875, "learning_rate": 9.976917865459127e-06, "loss": 17.5351, "step": 8170 }, { "epoch": 0.1479067265465904, "grad_norm": 38.78125, "learning_rate": 9.976889613152466e-06, "loss": 17.1707, "step": 8180 }, { "epoch": 0.14808754161571824, "grad_norm": 38.96875, "learning_rate": 9.976861360845808e-06, "loss": 17.5665, "step": 8190 }, { "epoch": 0.14826835668484611, "grad_norm": 41.84375, "learning_rate": 9.976833108539148e-06, "loss": 17.9471, "step": 8200 }, { "epoch": 0.14844917175397399, "grad_norm": 42.0625, "learning_rate": 9.976804856232487e-06, "loss": 17.341, "step": 8210 }, { "epoch": 0.14862998682310183, "grad_norm": 41.71875, "learning_rate": 9.976776603925828e-06, "loss": 17.2915, "step": 8220 }, { "epoch": 0.1488108018922297, "grad_norm": 40.15625, "learning_rate": 9.976748351619169e-06, "loss": 17.2771, "step": 8230 }, { "epoch": 0.14899161696135757, "grad_norm": 40.4375, "learning_rate": 9.97672009931251e-06, "loss": 17.4774, "step": 8240 }, { "epoch": 0.1491724320304854, "grad_norm": 38.84375, "learning_rate": 9.97669184700585e-06, "loss": 17.43, "step": 8250 }, { "epoch": 0.14935324709961328, "grad_norm": 39.875, "learning_rate": 9.976663594699189e-06, "loss": 17.3579, "step": 8260 }, { "epoch": 0.14953406216874116, "grad_norm": 39.125, "learning_rate": 9.97663534239253e-06, "loss": 17.5017, "step": 8270 }, { "epoch": 0.149714877237869, "grad_norm": 39.9375, "learning_rate": 9.976607090085872e-06, "loss": 17.2222, "step": 8280 }, { "epoch": 0.14989569230699687, "grad_norm": 38.375, "learning_rate": 9.976578837779212e-06, "loss": 17.222, "step": 8290 }, { "epoch": 0.1500765073761247, "grad_norm": 40.15625, "learning_rate": 9.976550585472551e-06, "loss": 17.5821, "step": 8300 }, { "epoch": 0.15025732244525258, "grad_norm": 39.03125, "learning_rate": 9.976522333165892e-06, "loss": 17.1667, "step": 8310 }, { "epoch": 0.15043813751438045, "grad_norm": 41.3125, "learning_rate": 9.976494080859233e-06, "loss": 17.5639, "step": 8320 }, { "epoch": 0.1506189525835083, "grad_norm": 40.84375, "learning_rate": 9.976465828552573e-06, "loss": 17.3709, "step": 8330 }, { "epoch": 0.15079976765263617, "grad_norm": 41.09375, "learning_rate": 9.976437576245914e-06, "loss": 17.223, "step": 8340 }, { "epoch": 0.15098058272176404, "grad_norm": 36.84375, "learning_rate": 9.976409323939253e-06, "loss": 17.4693, "step": 8350 }, { "epoch": 0.15116139779089188, "grad_norm": 41.71875, "learning_rate": 9.976381071632593e-06, "loss": 17.4322, "step": 8360 }, { "epoch": 0.15134221286001975, "grad_norm": 41.96875, "learning_rate": 9.976352819325936e-06, "loss": 17.8045, "step": 8370 }, { "epoch": 0.15152302792914762, "grad_norm": 40.59375, "learning_rate": 9.976324567019275e-06, "loss": 17.2138, "step": 8380 }, { "epoch": 0.15170384299827547, "grad_norm": 39.6875, "learning_rate": 9.976296314712615e-06, "loss": 17.234, "step": 8390 }, { "epoch": 0.15188465806740334, "grad_norm": 41.84375, "learning_rate": 9.976268062405956e-06, "loss": 17.3728, "step": 8400 }, { "epoch": 0.1520654731365312, "grad_norm": 38.75, "learning_rate": 9.976239810099296e-06, "loss": 17.6384, "step": 8410 }, { "epoch": 0.15224628820565905, "grad_norm": 40.46875, "learning_rate": 9.976211557792637e-06, "loss": 17.4172, "step": 8420 }, { "epoch": 0.15242710327478692, "grad_norm": 40.84375, "learning_rate": 9.976183305485978e-06, "loss": 17.0899, "step": 8430 }, { "epoch": 0.1526079183439148, "grad_norm": 39.71875, "learning_rate": 9.976155053179317e-06, "loss": 17.2965, "step": 8440 }, { "epoch": 0.15278873341304264, "grad_norm": 40.75, "learning_rate": 9.976126800872659e-06, "loss": 17.5926, "step": 8450 }, { "epoch": 0.1529695484821705, "grad_norm": 39.375, "learning_rate": 9.976098548566e-06, "loss": 17.4443, "step": 8460 }, { "epoch": 0.15315036355129835, "grad_norm": 39.46875, "learning_rate": 9.976070296259339e-06, "loss": 17.2535, "step": 8470 }, { "epoch": 0.15333117862042622, "grad_norm": 41.65625, "learning_rate": 9.976042043952679e-06, "loss": 17.4198, "step": 8480 }, { "epoch": 0.1535119936895541, "grad_norm": 39.84375, "learning_rate": 9.97601379164602e-06, "loss": 17.372, "step": 8490 }, { "epoch": 0.15369280875868194, "grad_norm": 40.28125, "learning_rate": 9.97598553933936e-06, "loss": 17.0233, "step": 8500 }, { "epoch": 0.1538736238278098, "grad_norm": 39.65625, "learning_rate": 9.975957287032701e-06, "loss": 17.4556, "step": 8510 }, { "epoch": 0.15405443889693768, "grad_norm": 40.1875, "learning_rate": 9.97592903472604e-06, "loss": 17.3115, "step": 8520 }, { "epoch": 0.15423525396606552, "grad_norm": 42.46875, "learning_rate": 9.97590078241938e-06, "loss": 17.3447, "step": 8530 }, { "epoch": 0.1544160690351934, "grad_norm": 37.8125, "learning_rate": 9.975872530112723e-06, "loss": 17.3876, "step": 8540 }, { "epoch": 0.15459688410432126, "grad_norm": 39.03125, "learning_rate": 9.975844277806064e-06, "loss": 17.5301, "step": 8550 }, { "epoch": 0.1547776991734491, "grad_norm": 38.96875, "learning_rate": 9.975816025499402e-06, "loss": 17.4396, "step": 8560 }, { "epoch": 0.15495851424257698, "grad_norm": 40.84375, "learning_rate": 9.975787773192743e-06, "loss": 17.4825, "step": 8570 }, { "epoch": 0.15513932931170485, "grad_norm": 39.96875, "learning_rate": 9.975759520886084e-06, "loss": 17.7308, "step": 8580 }, { "epoch": 0.1553201443808327, "grad_norm": 40.34375, "learning_rate": 9.975731268579424e-06, "loss": 17.5016, "step": 8590 }, { "epoch": 0.15550095944996056, "grad_norm": 40.40625, "learning_rate": 9.975703016272765e-06, "loss": 17.2755, "step": 8600 }, { "epoch": 0.15568177451908843, "grad_norm": 37.09375, "learning_rate": 9.975674763966104e-06, "loss": 17.3056, "step": 8610 }, { "epoch": 0.15586258958821628, "grad_norm": 41.09375, "learning_rate": 9.975646511659444e-06, "loss": 17.0374, "step": 8620 }, { "epoch": 0.15604340465734415, "grad_norm": 41.15625, "learning_rate": 9.975618259352787e-06, "loss": 17.2957, "step": 8630 }, { "epoch": 0.156224219726472, "grad_norm": 40.1875, "learning_rate": 9.975590007046126e-06, "loss": 17.1892, "step": 8640 }, { "epoch": 0.15640503479559986, "grad_norm": 37.3125, "learning_rate": 9.975561754739466e-06, "loss": 17.7739, "step": 8650 }, { "epoch": 0.15658584986472773, "grad_norm": 39.90625, "learning_rate": 9.975533502432807e-06, "loss": 17.861, "step": 8660 }, { "epoch": 0.15676666493385558, "grad_norm": 39.96875, "learning_rate": 9.975505250126148e-06, "loss": 17.779, "step": 8670 }, { "epoch": 0.15694748000298345, "grad_norm": 40.65625, "learning_rate": 9.975476997819488e-06, "loss": 17.6016, "step": 8680 }, { "epoch": 0.15712829507211132, "grad_norm": 41.34375, "learning_rate": 9.975448745512827e-06, "loss": 17.1837, "step": 8690 }, { "epoch": 0.15730911014123916, "grad_norm": 42.15625, "learning_rate": 9.975420493206168e-06, "loss": 17.3432, "step": 8700 }, { "epoch": 0.15748992521036703, "grad_norm": 40.5, "learning_rate": 9.975392240899508e-06, "loss": 17.4633, "step": 8710 }, { "epoch": 0.1576707402794949, "grad_norm": 40.875, "learning_rate": 9.97536398859285e-06, "loss": 17.3936, "step": 8720 }, { "epoch": 0.15785155534862275, "grad_norm": 39.34375, "learning_rate": 9.97533573628619e-06, "loss": 17.2713, "step": 8730 }, { "epoch": 0.15803237041775062, "grad_norm": 42.21875, "learning_rate": 9.97530748397953e-06, "loss": 17.163, "step": 8740 }, { "epoch": 0.1582131854868785, "grad_norm": 41.75, "learning_rate": 9.975279231672871e-06, "loss": 17.3989, "step": 8750 }, { "epoch": 0.15839400055600633, "grad_norm": 38.4375, "learning_rate": 9.975250979366211e-06, "loss": 17.3244, "step": 8760 }, { "epoch": 0.1585748156251342, "grad_norm": 41.0, "learning_rate": 9.975222727059552e-06, "loss": 17.2113, "step": 8770 }, { "epoch": 0.15875563069426207, "grad_norm": 37.875, "learning_rate": 9.975194474752891e-06, "loss": 17.142, "step": 8780 }, { "epoch": 0.15893644576338992, "grad_norm": 41.15625, "learning_rate": 9.975166222446232e-06, "loss": 17.2934, "step": 8790 }, { "epoch": 0.1591172608325178, "grad_norm": 41.5, "learning_rate": 9.975137970139574e-06, "loss": 17.4551, "step": 8800 }, { "epoch": 0.15929807590164563, "grad_norm": 40.90625, "learning_rate": 9.975109717832913e-06, "loss": 17.2555, "step": 8810 }, { "epoch": 0.1594788909707735, "grad_norm": 40.90625, "learning_rate": 9.975081465526254e-06, "loss": 17.4699, "step": 8820 }, { "epoch": 0.15965970603990137, "grad_norm": 43.53125, "learning_rate": 9.975053213219594e-06, "loss": 17.3129, "step": 8830 }, { "epoch": 0.15984052110902922, "grad_norm": 38.34375, "learning_rate": 9.975024960912935e-06, "loss": 17.7102, "step": 8840 }, { "epoch": 0.1600213361781571, "grad_norm": 37.28125, "learning_rate": 9.974996708606275e-06, "loss": 16.9641, "step": 8850 }, { "epoch": 0.16020215124728496, "grad_norm": 41.25, "learning_rate": 9.974968456299616e-06, "loss": 17.6302, "step": 8860 }, { "epoch": 0.1603829663164128, "grad_norm": 43.28125, "learning_rate": 9.974940203992955e-06, "loss": 17.493, "step": 8870 }, { "epoch": 0.16056378138554067, "grad_norm": 38.625, "learning_rate": 9.974911951686296e-06, "loss": 17.4427, "step": 8880 }, { "epoch": 0.16074459645466854, "grad_norm": 41.375, "learning_rate": 9.974883699379638e-06, "loss": 17.3359, "step": 8890 }, { "epoch": 0.16092541152379639, "grad_norm": 38.375, "learning_rate": 9.974855447072977e-06, "loss": 17.4055, "step": 8900 }, { "epoch": 0.16110622659292426, "grad_norm": 38.34375, "learning_rate": 9.974827194766317e-06, "loss": 17.0807, "step": 8910 }, { "epoch": 0.16128704166205213, "grad_norm": 42.09375, "learning_rate": 9.974798942459658e-06, "loss": 17.5087, "step": 8920 }, { "epoch": 0.16146785673117997, "grad_norm": 38.9375, "learning_rate": 9.974770690152999e-06, "loss": 17.475, "step": 8930 }, { "epoch": 0.16164867180030784, "grad_norm": 41.15625, "learning_rate": 9.97474243784634e-06, "loss": 17.2472, "step": 8940 }, { "epoch": 0.1618294868694357, "grad_norm": 40.28125, "learning_rate": 9.974714185539678e-06, "loss": 17.5978, "step": 8950 }, { "epoch": 0.16201030193856356, "grad_norm": 40.59375, "learning_rate": 9.974685933233019e-06, "loss": 17.1928, "step": 8960 }, { "epoch": 0.16219111700769143, "grad_norm": 39.4375, "learning_rate": 9.97465768092636e-06, "loss": 17.3915, "step": 8970 }, { "epoch": 0.16237193207681927, "grad_norm": 42.625, "learning_rate": 9.974629428619702e-06, "loss": 17.7899, "step": 8980 }, { "epoch": 0.16255274714594714, "grad_norm": 39.375, "learning_rate": 9.97460117631304e-06, "loss": 17.9558, "step": 8990 }, { "epoch": 0.162733562215075, "grad_norm": 38.84375, "learning_rate": 9.974572924006381e-06, "loss": 17.5389, "step": 9000 }, { "epoch": 0.16291437728420285, "grad_norm": 40.59375, "learning_rate": 9.974544671699722e-06, "loss": 17.6044, "step": 9010 }, { "epoch": 0.16309519235333073, "grad_norm": 39.28125, "learning_rate": 9.974516419393063e-06, "loss": 17.5651, "step": 9020 }, { "epoch": 0.1632760074224586, "grad_norm": 40.25, "learning_rate": 9.974488167086403e-06, "loss": 17.4637, "step": 9030 }, { "epoch": 0.16345682249158644, "grad_norm": 38.9375, "learning_rate": 9.974459914779742e-06, "loss": 17.348, "step": 9040 }, { "epoch": 0.1636376375607143, "grad_norm": 40.28125, "learning_rate": 9.974431662473083e-06, "loss": 17.2592, "step": 9050 }, { "epoch": 0.16381845262984218, "grad_norm": 39.75, "learning_rate": 9.974403410166423e-06, "loss": 17.5499, "step": 9060 }, { "epoch": 0.16399926769897002, "grad_norm": 37.625, "learning_rate": 9.974375157859764e-06, "loss": 17.1442, "step": 9070 }, { "epoch": 0.1641800827680979, "grad_norm": 40.5, "learning_rate": 9.974346905553105e-06, "loss": 17.0087, "step": 9080 }, { "epoch": 0.16436089783722577, "grad_norm": 38.125, "learning_rate": 9.974318653246445e-06, "loss": 17.2556, "step": 9090 }, { "epoch": 0.1645417129063536, "grad_norm": 37.9375, "learning_rate": 9.974290400939786e-06, "loss": 17.1482, "step": 9100 }, { "epoch": 0.16472252797548148, "grad_norm": 40.4375, "learning_rate": 9.974262148633126e-06, "loss": 17.108, "step": 9110 }, { "epoch": 0.16490334304460932, "grad_norm": 39.875, "learning_rate": 9.974233896326465e-06, "loss": 17.4758, "step": 9120 }, { "epoch": 0.1650841581137372, "grad_norm": 40.90625, "learning_rate": 9.974205644019806e-06, "loss": 17.3085, "step": 9130 }, { "epoch": 0.16526497318286507, "grad_norm": 38.59375, "learning_rate": 9.974177391713147e-06, "loss": 17.3685, "step": 9140 }, { "epoch": 0.1654457882519929, "grad_norm": 43.0, "learning_rate": 9.974149139406489e-06, "loss": 17.382, "step": 9150 }, { "epoch": 0.16562660332112078, "grad_norm": 41.53125, "learning_rate": 9.974120887099828e-06, "loss": 17.3543, "step": 9160 }, { "epoch": 0.16580741839024865, "grad_norm": 41.0, "learning_rate": 9.974092634793169e-06, "loss": 17.5792, "step": 9170 }, { "epoch": 0.1659882334593765, "grad_norm": 38.46875, "learning_rate": 9.97406438248651e-06, "loss": 17.8702, "step": 9180 }, { "epoch": 0.16616904852850437, "grad_norm": 39.03125, "learning_rate": 9.97403613017985e-06, "loss": 17.4478, "step": 9190 }, { "epoch": 0.16634986359763224, "grad_norm": 41.59375, "learning_rate": 9.97400787787319e-06, "loss": 17.2742, "step": 9200 }, { "epoch": 0.16653067866676008, "grad_norm": 40.15625, "learning_rate": 9.97397962556653e-06, "loss": 17.5207, "step": 9210 }, { "epoch": 0.16671149373588795, "grad_norm": 39.09375, "learning_rate": 9.97395137325987e-06, "loss": 17.5423, "step": 9220 }, { "epoch": 0.16689230880501582, "grad_norm": 40.40625, "learning_rate": 9.97392312095321e-06, "loss": 17.708, "step": 9230 }, { "epoch": 0.16707312387414366, "grad_norm": 43.5625, "learning_rate": 9.973894868646551e-06, "loss": 17.2694, "step": 9240 }, { "epoch": 0.16725393894327154, "grad_norm": 39.75, "learning_rate": 9.973866616339892e-06, "loss": 17.4095, "step": 9250 }, { "epoch": 0.1674347540123994, "grad_norm": 41.375, "learning_rate": 9.973838364033232e-06, "loss": 17.4842, "step": 9260 }, { "epoch": 0.16761556908152725, "grad_norm": 38.125, "learning_rate": 9.973810111726573e-06, "loss": 17.4138, "step": 9270 }, { "epoch": 0.16779638415065512, "grad_norm": 40.1875, "learning_rate": 9.973781859419914e-06, "loss": 17.4215, "step": 9280 }, { "epoch": 0.16797719921978296, "grad_norm": 41.1875, "learning_rate": 9.973753607113254e-06, "loss": 17.5086, "step": 9290 }, { "epoch": 0.16815801428891083, "grad_norm": 38.34375, "learning_rate": 9.973725354806593e-06, "loss": 17.5304, "step": 9300 }, { "epoch": 0.1683388293580387, "grad_norm": 42.34375, "learning_rate": 9.973697102499934e-06, "loss": 17.6362, "step": 9310 }, { "epoch": 0.16851964442716655, "grad_norm": 40.84375, "learning_rate": 9.973668850193274e-06, "loss": 17.7299, "step": 9320 }, { "epoch": 0.16870045949629442, "grad_norm": 40.1875, "learning_rate": 9.973640597886615e-06, "loss": 17.6307, "step": 9330 }, { "epoch": 0.1688812745654223, "grad_norm": 39.125, "learning_rate": 9.973612345579956e-06, "loss": 17.3122, "step": 9340 }, { "epoch": 0.16906208963455013, "grad_norm": 40.5625, "learning_rate": 9.973584093273296e-06, "loss": 17.3277, "step": 9350 }, { "epoch": 0.169242904703678, "grad_norm": 40.40625, "learning_rate": 9.973555840966637e-06, "loss": 17.4785, "step": 9360 }, { "epoch": 0.16942371977280588, "grad_norm": 39.90625, "learning_rate": 9.973527588659978e-06, "loss": 17.1665, "step": 9370 }, { "epoch": 0.16960453484193372, "grad_norm": 39.625, "learning_rate": 9.973499336353317e-06, "loss": 17.2563, "step": 9380 }, { "epoch": 0.1697853499110616, "grad_norm": 37.71875, "learning_rate": 9.973471084046657e-06, "loss": 17.3261, "step": 9390 }, { "epoch": 0.16996616498018946, "grad_norm": 38.5, "learning_rate": 9.973442831739998e-06, "loss": 17.3375, "step": 9400 }, { "epoch": 0.1701469800493173, "grad_norm": 43.25, "learning_rate": 9.973414579433338e-06, "loss": 17.1612, "step": 9410 }, { "epoch": 0.17032779511844517, "grad_norm": 41.4375, "learning_rate": 9.973386327126679e-06, "loss": 17.3999, "step": 9420 }, { "epoch": 0.17050861018757305, "grad_norm": 42.9375, "learning_rate": 9.97335807482002e-06, "loss": 17.4043, "step": 9430 }, { "epoch": 0.1706894252567009, "grad_norm": 42.65625, "learning_rate": 9.97332982251336e-06, "loss": 17.2566, "step": 9440 }, { "epoch": 0.17087024032582876, "grad_norm": 41.3125, "learning_rate": 9.973301570206701e-06, "loss": 17.311, "step": 9450 }, { "epoch": 0.1710510553949566, "grad_norm": 41.25, "learning_rate": 9.973273317900041e-06, "loss": 17.0837, "step": 9460 }, { "epoch": 0.17123187046408447, "grad_norm": 37.28125, "learning_rate": 9.97324506559338e-06, "loss": 17.1674, "step": 9470 }, { "epoch": 0.17141268553321234, "grad_norm": 41.40625, "learning_rate": 9.973216813286721e-06, "loss": 17.3728, "step": 9480 }, { "epoch": 0.1715935006023402, "grad_norm": 42.125, "learning_rate": 9.973188560980062e-06, "loss": 17.5858, "step": 9490 }, { "epoch": 0.17177431567146806, "grad_norm": 37.5625, "learning_rate": 9.973160308673402e-06, "loss": 17.2968, "step": 9500 }, { "epoch": 0.17195513074059593, "grad_norm": 41.84375, "learning_rate": 9.973132056366743e-06, "loss": 17.3369, "step": 9510 }, { "epoch": 0.17213594580972377, "grad_norm": 42.03125, "learning_rate": 9.973103804060084e-06, "loss": 17.0527, "step": 9520 }, { "epoch": 0.17231676087885164, "grad_norm": 40.75, "learning_rate": 9.973075551753424e-06, "loss": 17.159, "step": 9530 }, { "epoch": 0.17249757594797951, "grad_norm": 41.03125, "learning_rate": 9.973047299446765e-06, "loss": 17.5794, "step": 9540 }, { "epoch": 0.17267839101710736, "grad_norm": 42.0, "learning_rate": 9.973019047140104e-06, "loss": 17.6003, "step": 9550 }, { "epoch": 0.17285920608623523, "grad_norm": 42.1875, "learning_rate": 9.972990794833444e-06, "loss": 17.372, "step": 9560 }, { "epoch": 0.1730400211553631, "grad_norm": 40.84375, "learning_rate": 9.972962542526785e-06, "loss": 16.9928, "step": 9570 }, { "epoch": 0.17322083622449094, "grad_norm": 39.65625, "learning_rate": 9.972934290220126e-06, "loss": 17.6939, "step": 9580 }, { "epoch": 0.1734016512936188, "grad_norm": 44.5625, "learning_rate": 9.972906037913466e-06, "loss": 17.4905, "step": 9590 }, { "epoch": 0.17358246636274668, "grad_norm": 38.6875, "learning_rate": 9.972877785606807e-06, "loss": 17.1858, "step": 9600 }, { "epoch": 0.17376328143187453, "grad_norm": 39.125, "learning_rate": 9.972849533300147e-06, "loss": 17.3789, "step": 9610 }, { "epoch": 0.1739440965010024, "grad_norm": 38.4375, "learning_rate": 9.972821280993488e-06, "loss": 17.4596, "step": 9620 }, { "epoch": 0.17412491157013024, "grad_norm": 41.25, "learning_rate": 9.972793028686829e-06, "loss": 17.162, "step": 9630 }, { "epoch": 0.1743057266392581, "grad_norm": 42.25, "learning_rate": 9.972764776380168e-06, "loss": 17.4351, "step": 9640 }, { "epoch": 0.17448654170838598, "grad_norm": 40.625, "learning_rate": 9.972736524073508e-06, "loss": 17.6255, "step": 9650 }, { "epoch": 0.17466735677751383, "grad_norm": 40.34375, "learning_rate": 9.972708271766849e-06, "loss": 17.2791, "step": 9660 }, { "epoch": 0.1748481718466417, "grad_norm": 43.0, "learning_rate": 9.97268001946019e-06, "loss": 17.5448, "step": 9670 }, { "epoch": 0.17502898691576957, "grad_norm": 40.65625, "learning_rate": 9.97265176715353e-06, "loss": 17.4885, "step": 9680 }, { "epoch": 0.1752098019848974, "grad_norm": 38.9375, "learning_rate": 9.97262351484687e-06, "loss": 17.652, "step": 9690 }, { "epoch": 0.17539061705402528, "grad_norm": 41.09375, "learning_rate": 9.972595262540211e-06, "loss": 17.7164, "step": 9700 }, { "epoch": 0.17557143212315315, "grad_norm": 39.71875, "learning_rate": 9.972567010233552e-06, "loss": 17.4173, "step": 9710 }, { "epoch": 0.175752247192281, "grad_norm": 40.34375, "learning_rate": 9.972538757926891e-06, "loss": 17.8984, "step": 9720 }, { "epoch": 0.17593306226140887, "grad_norm": 40.53125, "learning_rate": 9.972510505620232e-06, "loss": 17.6743, "step": 9730 }, { "epoch": 0.17611387733053674, "grad_norm": 41.875, "learning_rate": 9.972482253313572e-06, "loss": 17.1897, "step": 9740 }, { "epoch": 0.17629469239966458, "grad_norm": 41.90625, "learning_rate": 9.972454001006913e-06, "loss": 17.4059, "step": 9750 }, { "epoch": 0.17647550746879245, "grad_norm": 40.84375, "learning_rate": 9.972425748700253e-06, "loss": 17.2298, "step": 9760 }, { "epoch": 0.17665632253792032, "grad_norm": 39.25, "learning_rate": 9.972397496393594e-06, "loss": 17.4787, "step": 9770 }, { "epoch": 0.17683713760704817, "grad_norm": 39.0, "learning_rate": 9.972369244086935e-06, "loss": 16.8661, "step": 9780 }, { "epoch": 0.17701795267617604, "grad_norm": 39.09375, "learning_rate": 9.972340991780275e-06, "loss": 16.9897, "step": 9790 }, { "epoch": 0.17719876774530388, "grad_norm": 40.625, "learning_rate": 9.972312739473616e-06, "loss": 17.3103, "step": 9800 }, { "epoch": 0.17737958281443175, "grad_norm": 41.75, "learning_rate": 9.972284487166955e-06, "loss": 17.4173, "step": 9810 }, { "epoch": 0.17756039788355962, "grad_norm": 42.34375, "learning_rate": 9.972256234860295e-06, "loss": 17.5176, "step": 9820 }, { "epoch": 0.17774121295268747, "grad_norm": 40.59375, "learning_rate": 9.972227982553636e-06, "loss": 17.8892, "step": 9830 }, { "epoch": 0.17792202802181534, "grad_norm": 40.15625, "learning_rate": 9.972199730246977e-06, "loss": 17.3229, "step": 9840 }, { "epoch": 0.1781028430909432, "grad_norm": 39.40625, "learning_rate": 9.972171477940317e-06, "loss": 17.4932, "step": 9850 }, { "epoch": 0.17828365816007105, "grad_norm": 39.40625, "learning_rate": 9.972143225633658e-06, "loss": 17.1002, "step": 9860 }, { "epoch": 0.17846447322919892, "grad_norm": 41.46875, "learning_rate": 9.972114973326999e-06, "loss": 17.3818, "step": 9870 }, { "epoch": 0.1786452882983268, "grad_norm": 40.25, "learning_rate": 9.97208672102034e-06, "loss": 17.3064, "step": 9880 }, { "epoch": 0.17882610336745464, "grad_norm": 41.71875, "learning_rate": 9.97205846871368e-06, "loss": 17.0175, "step": 9890 }, { "epoch": 0.1790069184365825, "grad_norm": 40.25, "learning_rate": 9.972030216407019e-06, "loss": 17.5675, "step": 9900 }, { "epoch": 0.17918773350571038, "grad_norm": 40.75, "learning_rate": 9.97200196410036e-06, "loss": 17.2624, "step": 9910 }, { "epoch": 0.17936854857483822, "grad_norm": 42.09375, "learning_rate": 9.9719737117937e-06, "loss": 16.7743, "step": 9920 }, { "epoch": 0.1795493636439661, "grad_norm": 41.21875, "learning_rate": 9.97194545948704e-06, "loss": 17.372, "step": 9930 }, { "epoch": 0.17973017871309394, "grad_norm": 41.09375, "learning_rate": 9.971917207180381e-06, "loss": 17.2744, "step": 9940 }, { "epoch": 0.1799109937822218, "grad_norm": 39.28125, "learning_rate": 9.971888954873722e-06, "loss": 17.3897, "step": 9950 }, { "epoch": 0.18009180885134968, "grad_norm": 38.6875, "learning_rate": 9.971860702567062e-06, "loss": 17.1437, "step": 9960 }, { "epoch": 0.18027262392047752, "grad_norm": 41.0, "learning_rate": 9.971832450260403e-06, "loss": 17.4114, "step": 9970 }, { "epoch": 0.1804534389896054, "grad_norm": 41.15625, "learning_rate": 9.971804197953742e-06, "loss": 17.2309, "step": 9980 }, { "epoch": 0.18063425405873326, "grad_norm": 38.46875, "learning_rate": 9.971775945647083e-06, "loss": 17.4542, "step": 9990 }, { "epoch": 0.1808150691278611, "grad_norm": 40.1875, "learning_rate": 9.971747693340423e-06, "loss": 17.4604, "step": 10000 }, { "epoch": 0.1808150691278611, "eval_loss": 2.173656463623047, "eval_runtime": 230.3398, "eval_samples_per_second": 3152.122, "eval_steps_per_second": 49.253, "step": 10000 }, { "epoch": 0.18099588419698898, "grad_norm": 38.78125, "learning_rate": 9.971719441033764e-06, "loss": 17.6182, "step": 10010 }, { "epoch": 0.18117669926611685, "grad_norm": 42.90625, "learning_rate": 9.971691188727104e-06, "loss": 17.3686, "step": 10020 }, { "epoch": 0.1813575143352447, "grad_norm": 41.15625, "learning_rate": 9.971662936420445e-06, "loss": 16.8379, "step": 10030 }, { "epoch": 0.18153832940437256, "grad_norm": 37.84375, "learning_rate": 9.971634684113786e-06, "loss": 17.3498, "step": 10040 }, { "epoch": 0.18171914447350043, "grad_norm": 38.84375, "learning_rate": 9.971606431807126e-06, "loss": 17.3983, "step": 10050 }, { "epoch": 0.18189995954262828, "grad_norm": 43.8125, "learning_rate": 9.971578179500467e-06, "loss": 17.4824, "step": 10060 }, { "epoch": 0.18208077461175615, "grad_norm": 39.46875, "learning_rate": 9.971549927193806e-06, "loss": 17.3409, "step": 10070 }, { "epoch": 0.18226158968088402, "grad_norm": 41.15625, "learning_rate": 9.971521674887147e-06, "loss": 17.2479, "step": 10080 }, { "epoch": 0.18244240475001186, "grad_norm": 43.90625, "learning_rate": 9.971493422580487e-06, "loss": 17.4112, "step": 10090 }, { "epoch": 0.18262321981913973, "grad_norm": 38.71875, "learning_rate": 9.971465170273828e-06, "loss": 17.4876, "step": 10100 }, { "epoch": 0.18280403488826757, "grad_norm": 39.125, "learning_rate": 9.971436917967168e-06, "loss": 17.2352, "step": 10110 }, { "epoch": 0.18298484995739545, "grad_norm": 40.34375, "learning_rate": 9.971408665660509e-06, "loss": 17.4973, "step": 10120 }, { "epoch": 0.18316566502652332, "grad_norm": 38.875, "learning_rate": 9.97138041335385e-06, "loss": 17.0968, "step": 10130 }, { "epoch": 0.18334648009565116, "grad_norm": 40.5625, "learning_rate": 9.97135216104719e-06, "loss": 17.0191, "step": 10140 }, { "epoch": 0.18352729516477903, "grad_norm": 42.21875, "learning_rate": 9.97132390874053e-06, "loss": 17.3471, "step": 10150 }, { "epoch": 0.1837081102339069, "grad_norm": 40.28125, "learning_rate": 9.97129565643387e-06, "loss": 17.1329, "step": 10160 }, { "epoch": 0.18388892530303474, "grad_norm": 39.4375, "learning_rate": 9.97126740412721e-06, "loss": 17.4335, "step": 10170 }, { "epoch": 0.18406974037216262, "grad_norm": 38.75, "learning_rate": 9.971239151820551e-06, "loss": 17.201, "step": 10180 }, { "epoch": 0.1842505554412905, "grad_norm": 41.125, "learning_rate": 9.971210899513892e-06, "loss": 17.3499, "step": 10190 }, { "epoch": 0.18443137051041833, "grad_norm": 39.90625, "learning_rate": 9.971182647207232e-06, "loss": 17.1541, "step": 10200 }, { "epoch": 0.1846121855795462, "grad_norm": 41.90625, "learning_rate": 9.971154394900573e-06, "loss": 17.4216, "step": 10210 }, { "epoch": 0.18479300064867407, "grad_norm": 42.4375, "learning_rate": 9.971126142593914e-06, "loss": 17.228, "step": 10220 }, { "epoch": 0.18497381571780191, "grad_norm": 42.0, "learning_rate": 9.971097890287254e-06, "loss": 17.3865, "step": 10230 }, { "epoch": 0.18515463078692979, "grad_norm": 40.4375, "learning_rate": 9.971069637980593e-06, "loss": 17.1773, "step": 10240 }, { "epoch": 0.18533544585605766, "grad_norm": 39.78125, "learning_rate": 9.971041385673934e-06, "loss": 17.2779, "step": 10250 }, { "epoch": 0.1855162609251855, "grad_norm": 42.59375, "learning_rate": 9.971013133367274e-06, "loss": 17.4225, "step": 10260 }, { "epoch": 0.18569707599431337, "grad_norm": 39.03125, "learning_rate": 9.970984881060615e-06, "loss": 17.1712, "step": 10270 }, { "epoch": 0.18587789106344121, "grad_norm": 42.34375, "learning_rate": 9.970956628753956e-06, "loss": 17.1064, "step": 10280 }, { "epoch": 0.18605870613256908, "grad_norm": 38.875, "learning_rate": 9.970928376447296e-06, "loss": 17.4685, "step": 10290 }, { "epoch": 0.18623952120169696, "grad_norm": 43.15625, "learning_rate": 9.970900124140637e-06, "loss": 17.152, "step": 10300 }, { "epoch": 0.1864203362708248, "grad_norm": 40.875, "learning_rate": 9.970871871833977e-06, "loss": 17.4354, "step": 10310 }, { "epoch": 0.18660115133995267, "grad_norm": 40.6875, "learning_rate": 9.970843619527318e-06, "loss": 17.0856, "step": 10320 }, { "epoch": 0.18678196640908054, "grad_norm": 38.21875, "learning_rate": 9.970815367220657e-06, "loss": 17.2967, "step": 10330 }, { "epoch": 0.18696278147820838, "grad_norm": 41.21875, "learning_rate": 9.970787114913998e-06, "loss": 17.6692, "step": 10340 }, { "epoch": 0.18714359654733626, "grad_norm": 41.71875, "learning_rate": 9.970758862607338e-06, "loss": 17.5644, "step": 10350 }, { "epoch": 0.18732441161646413, "grad_norm": 39.09375, "learning_rate": 9.970730610300679e-06, "loss": 17.5649, "step": 10360 }, { "epoch": 0.18750522668559197, "grad_norm": 40.59375, "learning_rate": 9.97070235799402e-06, "loss": 17.1662, "step": 10370 }, { "epoch": 0.18768604175471984, "grad_norm": 41.0625, "learning_rate": 9.97067410568736e-06, "loss": 17.7755, "step": 10380 }, { "epoch": 0.1878668568238477, "grad_norm": 39.40625, "learning_rate": 9.9706458533807e-06, "loss": 17.0753, "step": 10390 }, { "epoch": 0.18804767189297555, "grad_norm": 41.53125, "learning_rate": 9.970617601074041e-06, "loss": 17.3117, "step": 10400 }, { "epoch": 0.18822848696210343, "grad_norm": 41.34375, "learning_rate": 9.97058934876738e-06, "loss": 17.3471, "step": 10410 }, { "epoch": 0.1884093020312313, "grad_norm": 37.625, "learning_rate": 9.970561096460721e-06, "loss": 17.3984, "step": 10420 }, { "epoch": 0.18859011710035914, "grad_norm": 39.6875, "learning_rate": 9.970532844154062e-06, "loss": 17.0128, "step": 10430 }, { "epoch": 0.188770932169487, "grad_norm": 41.0625, "learning_rate": 9.970504591847402e-06, "loss": 17.2128, "step": 10440 }, { "epoch": 0.18895174723861485, "grad_norm": 41.21875, "learning_rate": 9.970476339540743e-06, "loss": 17.5359, "step": 10450 }, { "epoch": 0.18913256230774272, "grad_norm": 39.6875, "learning_rate": 9.970448087234082e-06, "loss": 16.8257, "step": 10460 }, { "epoch": 0.1893133773768706, "grad_norm": 40.3125, "learning_rate": 9.970419834927424e-06, "loss": 17.617, "step": 10470 }, { "epoch": 0.18949419244599844, "grad_norm": 40.0, "learning_rate": 9.970391582620765e-06, "loss": 18.0481, "step": 10480 }, { "epoch": 0.1896750075151263, "grad_norm": 42.375, "learning_rate": 9.970363330314105e-06, "loss": 17.4429, "step": 10490 }, { "epoch": 0.18985582258425418, "grad_norm": 39.1875, "learning_rate": 9.970335078007444e-06, "loss": 17.5725, "step": 10500 }, { "epoch": 0.19003663765338202, "grad_norm": 41.21875, "learning_rate": 9.970306825700785e-06, "loss": 17.3861, "step": 10510 }, { "epoch": 0.1902174527225099, "grad_norm": 38.875, "learning_rate": 9.970278573394125e-06, "loss": 17.8106, "step": 10520 }, { "epoch": 0.19039826779163777, "grad_norm": 40.46875, "learning_rate": 9.970250321087466e-06, "loss": 17.1235, "step": 10530 }, { "epoch": 0.1905790828607656, "grad_norm": 38.8125, "learning_rate": 9.970222068780807e-06, "loss": 17.1585, "step": 10540 }, { "epoch": 0.19075989792989348, "grad_norm": 39.65625, "learning_rate": 9.970193816474146e-06, "loss": 17.3798, "step": 10550 }, { "epoch": 0.19094071299902135, "grad_norm": 42.15625, "learning_rate": 9.970165564167488e-06, "loss": 16.9925, "step": 10560 }, { "epoch": 0.1911215280681492, "grad_norm": 41.09375, "learning_rate": 9.970137311860829e-06, "loss": 17.6112, "step": 10570 }, { "epoch": 0.19130234313727706, "grad_norm": 41.5, "learning_rate": 9.970109059554167e-06, "loss": 17.0915, "step": 10580 }, { "epoch": 0.19148315820640494, "grad_norm": 37.09375, "learning_rate": 9.970080807247508e-06, "loss": 17.5033, "step": 10590 }, { "epoch": 0.19166397327553278, "grad_norm": 40.15625, "learning_rate": 9.970052554940849e-06, "loss": 17.2897, "step": 10600 }, { "epoch": 0.19184478834466065, "grad_norm": 41.375, "learning_rate": 9.97002430263419e-06, "loss": 17.5192, "step": 10610 }, { "epoch": 0.1920256034137885, "grad_norm": 39.125, "learning_rate": 9.96999605032753e-06, "loss": 17.6841, "step": 10620 }, { "epoch": 0.19220641848291636, "grad_norm": 37.90625, "learning_rate": 9.96996779802087e-06, "loss": 17.2809, "step": 10630 }, { "epoch": 0.19238723355204423, "grad_norm": 40.53125, "learning_rate": 9.969939545714211e-06, "loss": 17.3026, "step": 10640 }, { "epoch": 0.19256804862117208, "grad_norm": 39.96875, "learning_rate": 9.969911293407552e-06, "loss": 17.5597, "step": 10650 }, { "epoch": 0.19274886369029995, "grad_norm": 40.90625, "learning_rate": 9.969883041100892e-06, "loss": 17.0397, "step": 10660 }, { "epoch": 0.19292967875942782, "grad_norm": 39.9375, "learning_rate": 9.969854788794231e-06, "loss": 17.7761, "step": 10670 }, { "epoch": 0.19311049382855566, "grad_norm": 42.3125, "learning_rate": 9.969826536487572e-06, "loss": 17.5117, "step": 10680 }, { "epoch": 0.19329130889768353, "grad_norm": 38.0, "learning_rate": 9.969798284180913e-06, "loss": 17.26, "step": 10690 }, { "epoch": 0.1934721239668114, "grad_norm": 40.40625, "learning_rate": 9.969770031874253e-06, "loss": 17.8356, "step": 10700 }, { "epoch": 0.19365293903593925, "grad_norm": 39.9375, "learning_rate": 9.969741779567594e-06, "loss": 17.7687, "step": 10710 }, { "epoch": 0.19383375410506712, "grad_norm": 40.84375, "learning_rate": 9.969713527260933e-06, "loss": 17.4783, "step": 10720 }, { "epoch": 0.194014569174195, "grad_norm": 43.03125, "learning_rate": 9.969685274954275e-06, "loss": 17.7937, "step": 10730 }, { "epoch": 0.19419538424332283, "grad_norm": 43.15625, "learning_rate": 9.969657022647616e-06, "loss": 17.655, "step": 10740 }, { "epoch": 0.1943761993124507, "grad_norm": 41.28125, "learning_rate": 9.969628770340956e-06, "loss": 17.4106, "step": 10750 }, { "epoch": 0.19455701438157855, "grad_norm": 40.75, "learning_rate": 9.969600518034295e-06, "loss": 17.2935, "step": 10760 }, { "epoch": 0.19473782945070642, "grad_norm": 38.75, "learning_rate": 9.969572265727636e-06, "loss": 17.3053, "step": 10770 }, { "epoch": 0.1949186445198343, "grad_norm": 39.78125, "learning_rate": 9.969544013420977e-06, "loss": 17.6152, "step": 10780 }, { "epoch": 0.19509945958896213, "grad_norm": 43.40625, "learning_rate": 9.969515761114317e-06, "loss": 17.6451, "step": 10790 }, { "epoch": 0.19528027465809, "grad_norm": 40.9375, "learning_rate": 9.969487508807658e-06, "loss": 17.323, "step": 10800 }, { "epoch": 0.19546108972721787, "grad_norm": 39.1875, "learning_rate": 9.969459256500997e-06, "loss": 17.0142, "step": 10810 }, { "epoch": 0.19564190479634572, "grad_norm": 41.34375, "learning_rate": 9.969431004194339e-06, "loss": 17.7659, "step": 10820 }, { "epoch": 0.1958227198654736, "grad_norm": 43.375, "learning_rate": 9.96940275188768e-06, "loss": 17.0923, "step": 10830 }, { "epoch": 0.19600353493460146, "grad_norm": 37.625, "learning_rate": 9.969374499581019e-06, "loss": 17.2729, "step": 10840 }, { "epoch": 0.1961843500037293, "grad_norm": 41.40625, "learning_rate": 9.96934624727436e-06, "loss": 17.4684, "step": 10850 }, { "epoch": 0.19636516507285717, "grad_norm": 38.5, "learning_rate": 9.9693179949677e-06, "loss": 17.3878, "step": 10860 }, { "epoch": 0.19654598014198504, "grad_norm": 40.71875, "learning_rate": 9.96928974266104e-06, "loss": 17.1617, "step": 10870 }, { "epoch": 0.1967267952111129, "grad_norm": 40.8125, "learning_rate": 9.969261490354381e-06, "loss": 17.1731, "step": 10880 }, { "epoch": 0.19690761028024076, "grad_norm": 41.03125, "learning_rate": 9.96923323804772e-06, "loss": 17.5819, "step": 10890 }, { "epoch": 0.19708842534936863, "grad_norm": 39.5, "learning_rate": 9.96920498574106e-06, "loss": 17.3935, "step": 10900 }, { "epoch": 0.19726924041849647, "grad_norm": 40.96875, "learning_rate": 9.969176733434403e-06, "loss": 17.4576, "step": 10910 }, { "epoch": 0.19745005548762434, "grad_norm": 38.1875, "learning_rate": 9.969148481127744e-06, "loss": 17.3355, "step": 10920 }, { "epoch": 0.1976308705567522, "grad_norm": 39.1875, "learning_rate": 9.969120228821082e-06, "loss": 17.1319, "step": 10930 }, { "epoch": 0.19781168562588006, "grad_norm": 38.9375, "learning_rate": 9.969091976514423e-06, "loss": 17.4143, "step": 10940 }, { "epoch": 0.19799250069500793, "grad_norm": 41.46875, "learning_rate": 9.969063724207764e-06, "loss": 17.4249, "step": 10950 }, { "epoch": 0.19817331576413577, "grad_norm": 41.5, "learning_rate": 9.969035471901104e-06, "loss": 17.1293, "step": 10960 }, { "epoch": 0.19835413083326364, "grad_norm": 41.6875, "learning_rate": 9.969007219594445e-06, "loss": 17.4503, "step": 10970 }, { "epoch": 0.1985349459023915, "grad_norm": 38.4375, "learning_rate": 9.968978967287784e-06, "loss": 17.0614, "step": 10980 }, { "epoch": 0.19871576097151936, "grad_norm": 39.09375, "learning_rate": 9.968950714981126e-06, "loss": 17.3571, "step": 10990 }, { "epoch": 0.19889657604064723, "grad_norm": 39.1875, "learning_rate": 9.968922462674467e-06, "loss": 17.5243, "step": 11000 }, { "epoch": 0.1990773911097751, "grad_norm": 38.9375, "learning_rate": 9.968894210367806e-06, "loss": 17.303, "step": 11010 }, { "epoch": 0.19925820617890294, "grad_norm": 40.375, "learning_rate": 9.968865958061146e-06, "loss": 17.3339, "step": 11020 }, { "epoch": 0.1994390212480308, "grad_norm": 40.75, "learning_rate": 9.968837705754487e-06, "loss": 17.4386, "step": 11030 }, { "epoch": 0.19961983631715868, "grad_norm": 38.96875, "learning_rate": 9.968809453447828e-06, "loss": 17.4172, "step": 11040 }, { "epoch": 0.19980065138628653, "grad_norm": 40.59375, "learning_rate": 9.968781201141168e-06, "loss": 17.1305, "step": 11050 }, { "epoch": 0.1999814664554144, "grad_norm": 39.625, "learning_rate": 9.968752948834509e-06, "loss": 17.4369, "step": 11060 }, { "epoch": 0.20016228152454227, "grad_norm": 41.59375, "learning_rate": 9.968724696527848e-06, "loss": 16.9403, "step": 11070 }, { "epoch": 0.2003430965936701, "grad_norm": 40.21875, "learning_rate": 9.96869644422119e-06, "loss": 16.5315, "step": 11080 }, { "epoch": 0.20052391166279798, "grad_norm": 41.40625, "learning_rate": 9.96866819191453e-06, "loss": 17.3027, "step": 11090 }, { "epoch": 0.20070472673192583, "grad_norm": 39.3125, "learning_rate": 9.96863993960787e-06, "loss": 17.1912, "step": 11100 }, { "epoch": 0.2008855418010537, "grad_norm": 43.53125, "learning_rate": 9.96861168730121e-06, "loss": 17.6024, "step": 11110 }, { "epoch": 0.20106635687018157, "grad_norm": 38.6875, "learning_rate": 9.968583434994551e-06, "loss": 17.4576, "step": 11120 }, { "epoch": 0.2012471719393094, "grad_norm": 39.84375, "learning_rate": 9.968555182687892e-06, "loss": 17.3747, "step": 11130 }, { "epoch": 0.20142798700843728, "grad_norm": 40.15625, "learning_rate": 9.968526930381232e-06, "loss": 17.0922, "step": 11140 }, { "epoch": 0.20160880207756515, "grad_norm": 39.5625, "learning_rate": 9.968498678074571e-06, "loss": 17.3724, "step": 11150 }, { "epoch": 0.201789617146693, "grad_norm": 39.96875, "learning_rate": 9.968470425767912e-06, "loss": 17.3961, "step": 11160 }, { "epoch": 0.20197043221582087, "grad_norm": 42.9375, "learning_rate": 9.968442173461254e-06, "loss": 17.4708, "step": 11170 }, { "epoch": 0.20215124728494874, "grad_norm": 41.28125, "learning_rate": 9.968413921154595e-06, "loss": 17.5796, "step": 11180 }, { "epoch": 0.20233206235407658, "grad_norm": 41.15625, "learning_rate": 9.968385668847934e-06, "loss": 17.803, "step": 11190 }, { "epoch": 0.20251287742320445, "grad_norm": 40.0625, "learning_rate": 9.968357416541274e-06, "loss": 17.0907, "step": 11200 }, { "epoch": 0.20269369249233232, "grad_norm": 38.6875, "learning_rate": 9.968329164234615e-06, "loss": 17.4098, "step": 11210 }, { "epoch": 0.20287450756146017, "grad_norm": 41.3125, "learning_rate": 9.968300911927955e-06, "loss": 17.3981, "step": 11220 }, { "epoch": 0.20305532263058804, "grad_norm": 42.40625, "learning_rate": 9.968272659621296e-06, "loss": 17.505, "step": 11230 }, { "epoch": 0.2032361376997159, "grad_norm": 40.03125, "learning_rate": 9.968244407314635e-06, "loss": 17.1375, "step": 11240 }, { "epoch": 0.20341695276884375, "grad_norm": 42.09375, "learning_rate": 9.968216155007976e-06, "loss": 17.1601, "step": 11250 }, { "epoch": 0.20359776783797162, "grad_norm": 39.0625, "learning_rate": 9.968187902701318e-06, "loss": 17.2334, "step": 11260 }, { "epoch": 0.20377858290709946, "grad_norm": 39.90625, "learning_rate": 9.968159650394657e-06, "loss": 16.8655, "step": 11270 }, { "epoch": 0.20395939797622734, "grad_norm": 43.40625, "learning_rate": 9.968131398087997e-06, "loss": 17.5414, "step": 11280 }, { "epoch": 0.2041402130453552, "grad_norm": 38.0, "learning_rate": 9.968103145781338e-06, "loss": 17.5637, "step": 11290 }, { "epoch": 0.20432102811448305, "grad_norm": 40.625, "learning_rate": 9.968074893474679e-06, "loss": 17.0316, "step": 11300 }, { "epoch": 0.20450184318361092, "grad_norm": 42.59375, "learning_rate": 9.96804664116802e-06, "loss": 17.5458, "step": 11310 }, { "epoch": 0.2046826582527388, "grad_norm": 38.9375, "learning_rate": 9.968018388861358e-06, "loss": 17.4147, "step": 11320 }, { "epoch": 0.20486347332186663, "grad_norm": 41.78125, "learning_rate": 9.967990136554699e-06, "loss": 17.158, "step": 11330 }, { "epoch": 0.2050442883909945, "grad_norm": 42.21875, "learning_rate": 9.967961884248041e-06, "loss": 17.5764, "step": 11340 }, { "epoch": 0.20522510346012238, "grad_norm": 39.40625, "learning_rate": 9.967933631941382e-06, "loss": 17.6255, "step": 11350 }, { "epoch": 0.20540591852925022, "grad_norm": 40.6875, "learning_rate": 9.96790537963472e-06, "loss": 17.2257, "step": 11360 }, { "epoch": 0.2055867335983781, "grad_norm": 40.625, "learning_rate": 9.967877127328061e-06, "loss": 17.3469, "step": 11370 }, { "epoch": 0.20576754866750596, "grad_norm": 41.9375, "learning_rate": 9.967848875021402e-06, "loss": 17.1934, "step": 11380 }, { "epoch": 0.2059483637366338, "grad_norm": 39.625, "learning_rate": 9.967820622714743e-06, "loss": 17.1529, "step": 11390 }, { "epoch": 0.20612917880576168, "grad_norm": 42.0, "learning_rate": 9.967792370408083e-06, "loss": 17.0926, "step": 11400 }, { "epoch": 0.20630999387488955, "grad_norm": 41.96875, "learning_rate": 9.967764118101422e-06, "loss": 17.0699, "step": 11410 }, { "epoch": 0.2064908089440174, "grad_norm": 41.6875, "learning_rate": 9.967735865794763e-06, "loss": 17.3271, "step": 11420 }, { "epoch": 0.20667162401314526, "grad_norm": 42.78125, "learning_rate": 9.967707613488105e-06, "loss": 17.2057, "step": 11430 }, { "epoch": 0.2068524390822731, "grad_norm": 38.71875, "learning_rate": 9.967679361181444e-06, "loss": 17.5981, "step": 11440 }, { "epoch": 0.20703325415140097, "grad_norm": 39.65625, "learning_rate": 9.967651108874785e-06, "loss": 16.9635, "step": 11450 }, { "epoch": 0.20721406922052885, "grad_norm": 40.375, "learning_rate": 9.967622856568125e-06, "loss": 17.6198, "step": 11460 }, { "epoch": 0.2073948842896567, "grad_norm": 38.75, "learning_rate": 9.967594604261466e-06, "loss": 17.2423, "step": 11470 }, { "epoch": 0.20757569935878456, "grad_norm": 43.15625, "learning_rate": 9.967566351954807e-06, "loss": 17.2828, "step": 11480 }, { "epoch": 0.20775651442791243, "grad_norm": 41.34375, "learning_rate": 9.967538099648147e-06, "loss": 17.5125, "step": 11490 }, { "epoch": 0.20793732949704027, "grad_norm": 42.625, "learning_rate": 9.967509847341486e-06, "loss": 17.0669, "step": 11500 }, { "epoch": 0.20811814456616815, "grad_norm": 41.84375, "learning_rate": 9.967481595034827e-06, "loss": 17.4618, "step": 11510 }, { "epoch": 0.20829895963529602, "grad_norm": 40.5625, "learning_rate": 9.967453342728169e-06, "loss": 17.1914, "step": 11520 }, { "epoch": 0.20847977470442386, "grad_norm": 39.09375, "learning_rate": 9.967425090421508e-06, "loss": 17.3149, "step": 11530 }, { "epoch": 0.20866058977355173, "grad_norm": 43.78125, "learning_rate": 9.967396838114849e-06, "loss": 17.5775, "step": 11540 }, { "epoch": 0.2088414048426796, "grad_norm": 41.28125, "learning_rate": 9.96736858580819e-06, "loss": 17.443, "step": 11550 }, { "epoch": 0.20902221991180744, "grad_norm": 40.40625, "learning_rate": 9.96734033350153e-06, "loss": 17.4273, "step": 11560 }, { "epoch": 0.20920303498093532, "grad_norm": 41.75, "learning_rate": 9.96731208119487e-06, "loss": 17.0814, "step": 11570 }, { "epoch": 0.20938385005006319, "grad_norm": 41.625, "learning_rate": 9.96728382888821e-06, "loss": 17.515, "step": 11580 }, { "epoch": 0.20956466511919103, "grad_norm": 38.03125, "learning_rate": 9.96725557658155e-06, "loss": 17.6116, "step": 11590 }, { "epoch": 0.2097454801883189, "grad_norm": 38.9375, "learning_rate": 9.96722732427489e-06, "loss": 17.3167, "step": 11600 }, { "epoch": 0.20992629525744674, "grad_norm": 40.8125, "learning_rate": 9.967199071968233e-06, "loss": 17.2356, "step": 11610 }, { "epoch": 0.21010711032657461, "grad_norm": 41.375, "learning_rate": 9.967170819661572e-06, "loss": 17.4695, "step": 11620 }, { "epoch": 0.21028792539570249, "grad_norm": 39.78125, "learning_rate": 9.967142567354912e-06, "loss": 17.5472, "step": 11630 }, { "epoch": 0.21046874046483033, "grad_norm": 42.46875, "learning_rate": 9.967114315048253e-06, "loss": 16.7093, "step": 11640 }, { "epoch": 0.2106495555339582, "grad_norm": 44.0625, "learning_rate": 9.967086062741594e-06, "loss": 17.725, "step": 11650 }, { "epoch": 0.21083037060308607, "grad_norm": 42.6875, "learning_rate": 9.967057810434934e-06, "loss": 17.4257, "step": 11660 }, { "epoch": 0.2110111856722139, "grad_norm": 38.25, "learning_rate": 9.967029558128273e-06, "loss": 17.2519, "step": 11670 }, { "epoch": 0.21119200074134178, "grad_norm": 38.46875, "learning_rate": 9.967001305821614e-06, "loss": 17.6406, "step": 11680 }, { "epoch": 0.21137281581046966, "grad_norm": 38.28125, "learning_rate": 9.966973053514956e-06, "loss": 17.1831, "step": 11690 }, { "epoch": 0.2115536308795975, "grad_norm": 42.40625, "learning_rate": 9.966944801208295e-06, "loss": 17.2882, "step": 11700 }, { "epoch": 0.21173444594872537, "grad_norm": 38.40625, "learning_rate": 9.966916548901636e-06, "loss": 17.6014, "step": 11710 }, { "epoch": 0.21191526101785324, "grad_norm": 42.71875, "learning_rate": 9.966888296594976e-06, "loss": 17.5026, "step": 11720 }, { "epoch": 0.21209607608698108, "grad_norm": 41.3125, "learning_rate": 9.966860044288317e-06, "loss": 17.4043, "step": 11730 }, { "epoch": 0.21227689115610895, "grad_norm": 40.40625, "learning_rate": 9.966831791981658e-06, "loss": 16.6975, "step": 11740 }, { "epoch": 0.2124577062252368, "grad_norm": 42.96875, "learning_rate": 9.966803539674997e-06, "loss": 17.4932, "step": 11750 }, { "epoch": 0.21263852129436467, "grad_norm": 41.8125, "learning_rate": 9.966775287368337e-06, "loss": 17.5085, "step": 11760 }, { "epoch": 0.21281933636349254, "grad_norm": 41.0, "learning_rate": 9.966747035061678e-06, "loss": 17.6466, "step": 11770 }, { "epoch": 0.21300015143262038, "grad_norm": 41.15625, "learning_rate": 9.96671878275502e-06, "loss": 17.0325, "step": 11780 }, { "epoch": 0.21318096650174825, "grad_norm": 39.46875, "learning_rate": 9.966690530448359e-06, "loss": 17.2715, "step": 11790 }, { "epoch": 0.21336178157087612, "grad_norm": 40.65625, "learning_rate": 9.9666622781417e-06, "loss": 17.3877, "step": 11800 }, { "epoch": 0.21354259664000397, "grad_norm": 41.3125, "learning_rate": 9.96663402583504e-06, "loss": 17.3444, "step": 11810 }, { "epoch": 0.21372341170913184, "grad_norm": 41.5625, "learning_rate": 9.966605773528381e-06, "loss": 17.11, "step": 11820 }, { "epoch": 0.2139042267782597, "grad_norm": 38.65625, "learning_rate": 9.966577521221722e-06, "loss": 17.4604, "step": 11830 }, { "epoch": 0.21408504184738755, "grad_norm": 40.90625, "learning_rate": 9.96654926891506e-06, "loss": 17.2247, "step": 11840 }, { "epoch": 0.21426585691651542, "grad_norm": 38.875, "learning_rate": 9.966521016608401e-06, "loss": 17.1911, "step": 11850 }, { "epoch": 0.2144466719856433, "grad_norm": 41.90625, "learning_rate": 9.966492764301742e-06, "loss": 17.3012, "step": 11860 }, { "epoch": 0.21462748705477114, "grad_norm": 42.0, "learning_rate": 9.966464511995082e-06, "loss": 16.8401, "step": 11870 }, { "epoch": 0.214808302123899, "grad_norm": 40.8125, "learning_rate": 9.966436259688423e-06, "loss": 17.2553, "step": 11880 }, { "epoch": 0.21498911719302688, "grad_norm": 40.625, "learning_rate": 9.966408007381764e-06, "loss": 17.3049, "step": 11890 }, { "epoch": 0.21516993226215472, "grad_norm": 40.875, "learning_rate": 9.966379755075104e-06, "loss": 17.5202, "step": 11900 }, { "epoch": 0.2153507473312826, "grad_norm": 39.34375, "learning_rate": 9.966351502768445e-06, "loss": 17.5647, "step": 11910 }, { "epoch": 0.21553156240041044, "grad_norm": 40.71875, "learning_rate": 9.966323250461785e-06, "loss": 17.4072, "step": 11920 }, { "epoch": 0.2157123774695383, "grad_norm": 41.0625, "learning_rate": 9.966294998155124e-06, "loss": 17.3908, "step": 11930 }, { "epoch": 0.21589319253866618, "grad_norm": 39.78125, "learning_rate": 9.966266745848465e-06, "loss": 17.5294, "step": 11940 }, { "epoch": 0.21607400760779402, "grad_norm": 37.75, "learning_rate": 9.966238493541806e-06, "loss": 17.1815, "step": 11950 }, { "epoch": 0.2162548226769219, "grad_norm": 37.875, "learning_rate": 9.966210241235146e-06, "loss": 17.3322, "step": 11960 }, { "epoch": 0.21643563774604976, "grad_norm": 36.75, "learning_rate": 9.966181988928487e-06, "loss": 16.9328, "step": 11970 }, { "epoch": 0.2166164528151776, "grad_norm": 40.5625, "learning_rate": 9.966153736621828e-06, "loss": 16.6897, "step": 11980 }, { "epoch": 0.21679726788430548, "grad_norm": 39.78125, "learning_rate": 9.966125484315168e-06, "loss": 17.0212, "step": 11990 }, { "epoch": 0.21697808295343335, "grad_norm": 41.71875, "learning_rate": 9.966097232008509e-06, "loss": 17.2172, "step": 12000 }, { "epoch": 0.2171588980225612, "grad_norm": 42.125, "learning_rate": 9.966068979701848e-06, "loss": 16.9982, "step": 12010 }, { "epoch": 0.21733971309168906, "grad_norm": 42.125, "learning_rate": 9.966040727395188e-06, "loss": 17.1451, "step": 12020 }, { "epoch": 0.21752052816081693, "grad_norm": 39.96875, "learning_rate": 9.966012475088529e-06, "loss": 17.435, "step": 12030 }, { "epoch": 0.21770134322994478, "grad_norm": 38.625, "learning_rate": 9.96598422278187e-06, "loss": 17.0784, "step": 12040 }, { "epoch": 0.21788215829907265, "grad_norm": 39.0, "learning_rate": 9.96595597047521e-06, "loss": 17.4566, "step": 12050 }, { "epoch": 0.21806297336820052, "grad_norm": 40.09375, "learning_rate": 9.96592771816855e-06, "loss": 17.1917, "step": 12060 }, { "epoch": 0.21824378843732836, "grad_norm": 41.21875, "learning_rate": 9.965899465861891e-06, "loss": 17.5761, "step": 12070 }, { "epoch": 0.21842460350645623, "grad_norm": 41.59375, "learning_rate": 9.965871213555232e-06, "loss": 17.2526, "step": 12080 }, { "epoch": 0.21860541857558408, "grad_norm": 40.34375, "learning_rate": 9.965842961248573e-06, "loss": 17.4308, "step": 12090 }, { "epoch": 0.21878623364471195, "grad_norm": 42.28125, "learning_rate": 9.965814708941912e-06, "loss": 17.687, "step": 12100 }, { "epoch": 0.21896704871383982, "grad_norm": 40.78125, "learning_rate": 9.965786456635252e-06, "loss": 17.2235, "step": 12110 }, { "epoch": 0.21914786378296766, "grad_norm": 40.78125, "learning_rate": 9.965758204328593e-06, "loss": 17.2053, "step": 12120 }, { "epoch": 0.21932867885209553, "grad_norm": 41.46875, "learning_rate": 9.965729952021933e-06, "loss": 16.9105, "step": 12130 }, { "epoch": 0.2195094939212234, "grad_norm": 39.875, "learning_rate": 9.965701699715274e-06, "loss": 17.7573, "step": 12140 }, { "epoch": 0.21969030899035125, "grad_norm": 44.09375, "learning_rate": 9.965673447408615e-06, "loss": 18.0731, "step": 12150 }, { "epoch": 0.21987112405947912, "grad_norm": 41.03125, "learning_rate": 9.965645195101955e-06, "loss": 16.8835, "step": 12160 }, { "epoch": 0.220051939128607, "grad_norm": 41.0625, "learning_rate": 9.965616942795296e-06, "loss": 17.0859, "step": 12170 }, { "epoch": 0.22023275419773483, "grad_norm": 40.25, "learning_rate": 9.965588690488635e-06, "loss": 17.2099, "step": 12180 }, { "epoch": 0.2204135692668627, "grad_norm": 43.40625, "learning_rate": 9.965560438181975e-06, "loss": 17.5276, "step": 12190 }, { "epoch": 0.22059438433599057, "grad_norm": 40.1875, "learning_rate": 9.965532185875316e-06, "loss": 17.3187, "step": 12200 }, { "epoch": 0.22077519940511842, "grad_norm": 42.46875, "learning_rate": 9.965503933568657e-06, "loss": 17.2944, "step": 12210 }, { "epoch": 0.2209560144742463, "grad_norm": 42.9375, "learning_rate": 9.965475681261997e-06, "loss": 17.1889, "step": 12220 }, { "epoch": 0.22113682954337416, "grad_norm": 40.4375, "learning_rate": 9.965447428955338e-06, "loss": 16.8959, "step": 12230 }, { "epoch": 0.221317644612502, "grad_norm": 45.375, "learning_rate": 9.965419176648679e-06, "loss": 17.3403, "step": 12240 }, { "epoch": 0.22149845968162987, "grad_norm": 37.5625, "learning_rate": 9.96539092434202e-06, "loss": 17.3305, "step": 12250 }, { "epoch": 0.22167927475075772, "grad_norm": 45.03125, "learning_rate": 9.96536267203536e-06, "loss": 17.6105, "step": 12260 }, { "epoch": 0.2218600898198856, "grad_norm": 41.875, "learning_rate": 9.965334419728699e-06, "loss": 17.2321, "step": 12270 }, { "epoch": 0.22204090488901346, "grad_norm": 39.4375, "learning_rate": 9.96530616742204e-06, "loss": 17.4887, "step": 12280 }, { "epoch": 0.2222217199581413, "grad_norm": 39.78125, "learning_rate": 9.96527791511538e-06, "loss": 17.6234, "step": 12290 }, { "epoch": 0.22240253502726917, "grad_norm": 40.28125, "learning_rate": 9.96524966280872e-06, "loss": 17.3018, "step": 12300 }, { "epoch": 0.22258335009639704, "grad_norm": 38.28125, "learning_rate": 9.965221410502061e-06, "loss": 17.2957, "step": 12310 }, { "epoch": 0.22276416516552489, "grad_norm": 41.625, "learning_rate": 9.965193158195402e-06, "loss": 17.1843, "step": 12320 }, { "epoch": 0.22294498023465276, "grad_norm": 38.96875, "learning_rate": 9.965164905888743e-06, "loss": 17.5083, "step": 12330 }, { "epoch": 0.22312579530378063, "grad_norm": 40.21875, "learning_rate": 9.965136653582083e-06, "loss": 17.018, "step": 12340 }, { "epoch": 0.22330661037290847, "grad_norm": 41.8125, "learning_rate": 9.965108401275422e-06, "loss": 17.7748, "step": 12350 }, { "epoch": 0.22348742544203634, "grad_norm": 41.46875, "learning_rate": 9.965080148968763e-06, "loss": 17.2274, "step": 12360 }, { "epoch": 0.2236682405111642, "grad_norm": 39.5625, "learning_rate": 9.965051896662103e-06, "loss": 17.2712, "step": 12370 }, { "epoch": 0.22384905558029206, "grad_norm": 41.15625, "learning_rate": 9.965023644355444e-06, "loss": 17.5226, "step": 12380 }, { "epoch": 0.22402987064941993, "grad_norm": 39.09375, "learning_rate": 9.964995392048785e-06, "loss": 17.2146, "step": 12390 }, { "epoch": 0.2242106857185478, "grad_norm": 41.59375, "learning_rate": 9.964967139742125e-06, "loss": 17.4363, "step": 12400 }, { "epoch": 0.22439150078767564, "grad_norm": 38.40625, "learning_rate": 9.964938887435466e-06, "loss": 17.2412, "step": 12410 }, { "epoch": 0.2245723158568035, "grad_norm": 37.0, "learning_rate": 9.964910635128806e-06, "loss": 17.4035, "step": 12420 }, { "epoch": 0.22475313092593135, "grad_norm": 41.03125, "learning_rate": 9.964882382822147e-06, "loss": 17.4648, "step": 12430 }, { "epoch": 0.22493394599505923, "grad_norm": 39.84375, "learning_rate": 9.964854130515486e-06, "loss": 17.2871, "step": 12440 }, { "epoch": 0.2251147610641871, "grad_norm": 41.40625, "learning_rate": 9.964825878208827e-06, "loss": 17.3342, "step": 12450 }, { "epoch": 0.22529557613331494, "grad_norm": 40.78125, "learning_rate": 9.964797625902167e-06, "loss": 17.5347, "step": 12460 }, { "epoch": 0.2254763912024428, "grad_norm": 37.28125, "learning_rate": 9.964769373595508e-06, "loss": 17.2245, "step": 12470 }, { "epoch": 0.22565720627157068, "grad_norm": 36.9375, "learning_rate": 9.964741121288848e-06, "loss": 16.9847, "step": 12480 }, { "epoch": 0.22583802134069852, "grad_norm": 40.65625, "learning_rate": 9.964712868982189e-06, "loss": 16.98, "step": 12490 }, { "epoch": 0.2260188364098264, "grad_norm": 38.8125, "learning_rate": 9.96468461667553e-06, "loss": 17.0928, "step": 12500 }, { "epoch": 0.22619965147895427, "grad_norm": 37.59375, "learning_rate": 9.96465636436887e-06, "loss": 17.3024, "step": 12510 }, { "epoch": 0.2263804665480821, "grad_norm": 41.75, "learning_rate": 9.964628112062211e-06, "loss": 17.6225, "step": 12520 }, { "epoch": 0.22656128161720998, "grad_norm": 41.625, "learning_rate": 9.96459985975555e-06, "loss": 17.4294, "step": 12530 }, { "epoch": 0.22674209668633785, "grad_norm": 40.78125, "learning_rate": 9.96457160744889e-06, "loss": 17.0309, "step": 12540 }, { "epoch": 0.2269229117554657, "grad_norm": 40.25, "learning_rate": 9.964543355142231e-06, "loss": 16.9067, "step": 12550 }, { "epoch": 0.22710372682459357, "grad_norm": 41.65625, "learning_rate": 9.964515102835572e-06, "loss": 17.5133, "step": 12560 }, { "epoch": 0.2272845418937214, "grad_norm": 40.71875, "learning_rate": 9.964486850528912e-06, "loss": 17.1679, "step": 12570 }, { "epoch": 0.22746535696284928, "grad_norm": 40.5, "learning_rate": 9.964458598222253e-06, "loss": 17.345, "step": 12580 }, { "epoch": 0.22764617203197715, "grad_norm": 41.53125, "learning_rate": 9.964430345915594e-06, "loss": 17.2944, "step": 12590 }, { "epoch": 0.227826987101105, "grad_norm": 42.375, "learning_rate": 9.964402093608934e-06, "loss": 16.8557, "step": 12600 }, { "epoch": 0.22800780217023287, "grad_norm": 40.25, "learning_rate": 9.964373841302273e-06, "loss": 17.3471, "step": 12610 }, { "epoch": 0.22818861723936074, "grad_norm": 42.28125, "learning_rate": 9.964345588995614e-06, "loss": 16.9808, "step": 12620 }, { "epoch": 0.22836943230848858, "grad_norm": 39.78125, "learning_rate": 9.964317336688954e-06, "loss": 17.1642, "step": 12630 }, { "epoch": 0.22855024737761645, "grad_norm": 40.84375, "learning_rate": 9.964289084382295e-06, "loss": 17.1546, "step": 12640 }, { "epoch": 0.22873106244674432, "grad_norm": 40.53125, "learning_rate": 9.964260832075636e-06, "loss": 17.4697, "step": 12650 }, { "epoch": 0.22891187751587216, "grad_norm": 41.40625, "learning_rate": 9.964232579768976e-06, "loss": 17.0869, "step": 12660 }, { "epoch": 0.22909269258500004, "grad_norm": 38.15625, "learning_rate": 9.964204327462317e-06, "loss": 17.1933, "step": 12670 }, { "epoch": 0.2292735076541279, "grad_norm": 41.71875, "learning_rate": 9.964176075155658e-06, "loss": 17.1562, "step": 12680 }, { "epoch": 0.22945432272325575, "grad_norm": 43.15625, "learning_rate": 9.964147822848998e-06, "loss": 17.2442, "step": 12690 }, { "epoch": 0.22963513779238362, "grad_norm": 42.46875, "learning_rate": 9.964119570542337e-06, "loss": 17.0829, "step": 12700 }, { "epoch": 0.2298159528615115, "grad_norm": 39.75, "learning_rate": 9.964091318235678e-06, "loss": 17.4495, "step": 12710 }, { "epoch": 0.22999676793063933, "grad_norm": 41.8125, "learning_rate": 9.964063065929018e-06, "loss": 17.1042, "step": 12720 }, { "epoch": 0.2301775829997672, "grad_norm": 41.28125, "learning_rate": 9.964034813622359e-06, "loss": 17.2907, "step": 12730 }, { "epoch": 0.23035839806889505, "grad_norm": 39.84375, "learning_rate": 9.9640065613157e-06, "loss": 17.3119, "step": 12740 }, { "epoch": 0.23053921313802292, "grad_norm": 42.0625, "learning_rate": 9.96397830900904e-06, "loss": 17.4088, "step": 12750 }, { "epoch": 0.2307200282071508, "grad_norm": 39.65625, "learning_rate": 9.96395005670238e-06, "loss": 17.0552, "step": 12760 }, { "epoch": 0.23090084327627863, "grad_norm": 37.9375, "learning_rate": 9.963921804395721e-06, "loss": 17.3551, "step": 12770 }, { "epoch": 0.2310816583454065, "grad_norm": 39.03125, "learning_rate": 9.96389355208906e-06, "loss": 17.0639, "step": 12780 }, { "epoch": 0.23126247341453438, "grad_norm": 37.71875, "learning_rate": 9.963865299782401e-06, "loss": 17.2184, "step": 12790 }, { "epoch": 0.23144328848366222, "grad_norm": 41.59375, "learning_rate": 9.963837047475742e-06, "loss": 17.7574, "step": 12800 }, { "epoch": 0.2316241035527901, "grad_norm": 37.375, "learning_rate": 9.963808795169082e-06, "loss": 17.421, "step": 12810 }, { "epoch": 0.23180491862191796, "grad_norm": 39.0625, "learning_rate": 9.963780542862423e-06, "loss": 17.1152, "step": 12820 }, { "epoch": 0.2319857336910458, "grad_norm": 42.28125, "learning_rate": 9.963752290555763e-06, "loss": 17.2219, "step": 12830 }, { "epoch": 0.23216654876017367, "grad_norm": 39.96875, "learning_rate": 9.963724038249104e-06, "loss": 17.1605, "step": 12840 }, { "epoch": 0.23234736382930155, "grad_norm": 40.96875, "learning_rate": 9.963695785942445e-06, "loss": 17.1662, "step": 12850 }, { "epoch": 0.2325281788984294, "grad_norm": 39.84375, "learning_rate": 9.963667533635785e-06, "loss": 17.369, "step": 12860 }, { "epoch": 0.23270899396755726, "grad_norm": 43.375, "learning_rate": 9.963639281329124e-06, "loss": 17.798, "step": 12870 }, { "epoch": 0.23288980903668513, "grad_norm": 41.5625, "learning_rate": 9.963611029022465e-06, "loss": 17.1347, "step": 12880 }, { "epoch": 0.23307062410581297, "grad_norm": 38.8125, "learning_rate": 9.963582776715805e-06, "loss": 17.398, "step": 12890 }, { "epoch": 0.23325143917494084, "grad_norm": 38.4375, "learning_rate": 9.963554524409146e-06, "loss": 17.3474, "step": 12900 }, { "epoch": 0.2334322542440687, "grad_norm": 39.59375, "learning_rate": 9.963526272102487e-06, "loss": 17.1073, "step": 12910 }, { "epoch": 0.23361306931319656, "grad_norm": 41.625, "learning_rate": 9.963498019795827e-06, "loss": 17.2598, "step": 12920 }, { "epoch": 0.23379388438232443, "grad_norm": 43.96875, "learning_rate": 9.963469767489168e-06, "loss": 17.114, "step": 12930 }, { "epoch": 0.23397469945145227, "grad_norm": 39.46875, "learning_rate": 9.963441515182509e-06, "loss": 17.214, "step": 12940 }, { "epoch": 0.23415551452058014, "grad_norm": 39.1875, "learning_rate": 9.96341326287585e-06, "loss": 17.0908, "step": 12950 }, { "epoch": 0.23433632958970801, "grad_norm": 39.21875, "learning_rate": 9.963385010569188e-06, "loss": 16.9238, "step": 12960 }, { "epoch": 0.23451714465883586, "grad_norm": 42.71875, "learning_rate": 9.963356758262529e-06, "loss": 17.0022, "step": 12970 }, { "epoch": 0.23469795972796373, "grad_norm": 38.84375, "learning_rate": 9.96332850595587e-06, "loss": 17.0961, "step": 12980 }, { "epoch": 0.2348787747970916, "grad_norm": 38.53125, "learning_rate": 9.96330025364921e-06, "loss": 17.5029, "step": 12990 }, { "epoch": 0.23505958986621944, "grad_norm": 38.3125, "learning_rate": 9.96327200134255e-06, "loss": 17.3472, "step": 13000 }, { "epoch": 0.2352404049353473, "grad_norm": 41.0625, "learning_rate": 9.963243749035891e-06, "loss": 17.238, "step": 13010 }, { "epoch": 0.23542122000447518, "grad_norm": 39.21875, "learning_rate": 9.963215496729232e-06, "loss": 17.2826, "step": 13020 }, { "epoch": 0.23560203507360303, "grad_norm": 40.15625, "learning_rate": 9.963187244422573e-06, "loss": 16.9839, "step": 13030 }, { "epoch": 0.2357828501427309, "grad_norm": 41.28125, "learning_rate": 9.963158992115911e-06, "loss": 17.0687, "step": 13040 }, { "epoch": 0.23596366521185877, "grad_norm": 40.6875, "learning_rate": 9.963130739809252e-06, "loss": 17.3794, "step": 13050 }, { "epoch": 0.2361444802809866, "grad_norm": 40.53125, "learning_rate": 9.963102487502593e-06, "loss": 17.0712, "step": 13060 }, { "epoch": 0.23632529535011448, "grad_norm": 40.3125, "learning_rate": 9.963074235195933e-06, "loss": 16.9269, "step": 13070 }, { "epoch": 0.23650611041924233, "grad_norm": 39.6875, "learning_rate": 9.963045982889274e-06, "loss": 17.0576, "step": 13080 }, { "epoch": 0.2366869254883702, "grad_norm": 40.4375, "learning_rate": 9.963017730582613e-06, "loss": 16.9084, "step": 13090 }, { "epoch": 0.23686774055749807, "grad_norm": 38.71875, "learning_rate": 9.962989478275955e-06, "loss": 17.7958, "step": 13100 }, { "epoch": 0.2370485556266259, "grad_norm": 40.125, "learning_rate": 9.962961225969296e-06, "loss": 17.2817, "step": 13110 }, { "epoch": 0.23722937069575378, "grad_norm": 41.40625, "learning_rate": 9.962932973662636e-06, "loss": 17.777, "step": 13120 }, { "epoch": 0.23741018576488165, "grad_norm": 39.3125, "learning_rate": 9.962904721355975e-06, "loss": 17.2679, "step": 13130 }, { "epoch": 0.2375910008340095, "grad_norm": 42.375, "learning_rate": 9.962876469049316e-06, "loss": 17.0529, "step": 13140 }, { "epoch": 0.23777181590313737, "grad_norm": 41.84375, "learning_rate": 9.962848216742657e-06, "loss": 17.0093, "step": 13150 }, { "epoch": 0.23795263097226524, "grad_norm": 41.46875, "learning_rate": 9.962819964435997e-06, "loss": 17.5174, "step": 13160 }, { "epoch": 0.23813344604139308, "grad_norm": 42.46875, "learning_rate": 9.962791712129338e-06, "loss": 17.5967, "step": 13170 }, { "epoch": 0.23831426111052095, "grad_norm": 38.90625, "learning_rate": 9.962763459822678e-06, "loss": 17.1299, "step": 13180 }, { "epoch": 0.23849507617964882, "grad_norm": 42.21875, "learning_rate": 9.962735207516019e-06, "loss": 17.0755, "step": 13190 }, { "epoch": 0.23867589124877667, "grad_norm": 39.375, "learning_rate": 9.96270695520936e-06, "loss": 17.0905, "step": 13200 }, { "epoch": 0.23885670631790454, "grad_norm": 41.25, "learning_rate": 9.962678702902699e-06, "loss": 17.5971, "step": 13210 }, { "epoch": 0.2390375213870324, "grad_norm": 41.28125, "learning_rate": 9.96265045059604e-06, "loss": 17.156, "step": 13220 }, { "epoch": 0.23921833645616025, "grad_norm": 38.21875, "learning_rate": 9.96262219828938e-06, "loss": 17.2076, "step": 13230 }, { "epoch": 0.23939915152528812, "grad_norm": 40.28125, "learning_rate": 9.96259394598272e-06, "loss": 17.1039, "step": 13240 }, { "epoch": 0.23957996659441597, "grad_norm": 41.21875, "learning_rate": 9.962565693676061e-06, "loss": 17.3425, "step": 13250 }, { "epoch": 0.23976078166354384, "grad_norm": 40.09375, "learning_rate": 9.962537441369402e-06, "loss": 17.5018, "step": 13260 }, { "epoch": 0.2399415967326717, "grad_norm": 41.03125, "learning_rate": 9.962509189062742e-06, "loss": 17.3076, "step": 13270 }, { "epoch": 0.24012241180179955, "grad_norm": 38.34375, "learning_rate": 9.962480936756083e-06, "loss": 17.1876, "step": 13280 }, { "epoch": 0.24030322687092742, "grad_norm": 36.90625, "learning_rate": 9.962452684449424e-06, "loss": 16.7979, "step": 13290 }, { "epoch": 0.2404840419400553, "grad_norm": 40.4375, "learning_rate": 9.962424432142763e-06, "loss": 17.3982, "step": 13300 }, { "epoch": 0.24066485700918314, "grad_norm": 41.53125, "learning_rate": 9.962396179836103e-06, "loss": 17.331, "step": 13310 }, { "epoch": 0.240845672078311, "grad_norm": 39.5, "learning_rate": 9.962367927529444e-06, "loss": 16.9671, "step": 13320 }, { "epoch": 0.24102648714743888, "grad_norm": 39.0, "learning_rate": 9.962339675222784e-06, "loss": 17.5099, "step": 13330 }, { "epoch": 0.24120730221656672, "grad_norm": 41.0, "learning_rate": 9.962311422916125e-06, "loss": 17.1498, "step": 13340 }, { "epoch": 0.2413881172856946, "grad_norm": 39.3125, "learning_rate": 9.962283170609464e-06, "loss": 17.5852, "step": 13350 }, { "epoch": 0.24156893235482246, "grad_norm": 43.8125, "learning_rate": 9.962254918302806e-06, "loss": 17.0536, "step": 13360 }, { "epoch": 0.2417497474239503, "grad_norm": 40.875, "learning_rate": 9.962226665996147e-06, "loss": 17.2196, "step": 13370 }, { "epoch": 0.24193056249307818, "grad_norm": 42.96875, "learning_rate": 9.962198413689488e-06, "loss": 17.5202, "step": 13380 }, { "epoch": 0.24211137756220602, "grad_norm": 41.625, "learning_rate": 9.962170161382826e-06, "loss": 17.5732, "step": 13390 }, { "epoch": 0.2422921926313339, "grad_norm": 40.59375, "learning_rate": 9.962141909076167e-06, "loss": 17.365, "step": 13400 }, { "epoch": 0.24247300770046176, "grad_norm": 41.09375, "learning_rate": 9.962113656769508e-06, "loss": 17.4699, "step": 13410 }, { "epoch": 0.2426538227695896, "grad_norm": 37.46875, "learning_rate": 9.962085404462848e-06, "loss": 17.4083, "step": 13420 }, { "epoch": 0.24283463783871748, "grad_norm": 39.0625, "learning_rate": 9.962057152156189e-06, "loss": 16.9631, "step": 13430 }, { "epoch": 0.24301545290784535, "grad_norm": 40.90625, "learning_rate": 9.962028899849528e-06, "loss": 17.1987, "step": 13440 }, { "epoch": 0.2431962679769732, "grad_norm": 42.125, "learning_rate": 9.96200064754287e-06, "loss": 17.1089, "step": 13450 }, { "epoch": 0.24337708304610106, "grad_norm": 39.875, "learning_rate": 9.96197239523621e-06, "loss": 17.1529, "step": 13460 }, { "epoch": 0.24355789811522893, "grad_norm": 41.8125, "learning_rate": 9.96194414292955e-06, "loss": 17.3545, "step": 13470 }, { "epoch": 0.24373871318435678, "grad_norm": 41.28125, "learning_rate": 9.96191589062289e-06, "loss": 17.4552, "step": 13480 }, { "epoch": 0.24391952825348465, "grad_norm": 40.46875, "learning_rate": 9.961887638316231e-06, "loss": 17.1754, "step": 13490 }, { "epoch": 0.24410034332261252, "grad_norm": 37.53125, "learning_rate": 9.961859386009572e-06, "loss": 16.8383, "step": 13500 }, { "epoch": 0.24428115839174036, "grad_norm": 41.90625, "learning_rate": 9.961831133702912e-06, "loss": 17.8071, "step": 13510 }, { "epoch": 0.24446197346086823, "grad_norm": 38.78125, "learning_rate": 9.961802881396251e-06, "loss": 17.6229, "step": 13520 }, { "epoch": 0.2446427885299961, "grad_norm": 40.34375, "learning_rate": 9.961774629089593e-06, "loss": 17.1821, "step": 13530 }, { "epoch": 0.24482360359912395, "grad_norm": 40.53125, "learning_rate": 9.961746376782934e-06, "loss": 17.358, "step": 13540 }, { "epoch": 0.24500441866825182, "grad_norm": 39.09375, "learning_rate": 9.961718124476275e-06, "loss": 17.3703, "step": 13550 }, { "epoch": 0.24518523373737966, "grad_norm": 40.0625, "learning_rate": 9.961689872169614e-06, "loss": 17.2847, "step": 13560 }, { "epoch": 0.24536604880650753, "grad_norm": 41.28125, "learning_rate": 9.961661619862954e-06, "loss": 16.8204, "step": 13570 }, { "epoch": 0.2455468638756354, "grad_norm": 40.96875, "learning_rate": 9.961633367556295e-06, "loss": 17.361, "step": 13580 }, { "epoch": 0.24572767894476324, "grad_norm": 42.96875, "learning_rate": 9.961605115249636e-06, "loss": 17.4287, "step": 13590 }, { "epoch": 0.24590849401389112, "grad_norm": 41.21875, "learning_rate": 9.961576862942976e-06, "loss": 17.5482, "step": 13600 }, { "epoch": 0.246089309083019, "grad_norm": 39.65625, "learning_rate": 9.961548610636315e-06, "loss": 17.2304, "step": 13610 }, { "epoch": 0.24627012415214683, "grad_norm": 38.84375, "learning_rate": 9.961520358329657e-06, "loss": 17.3173, "step": 13620 }, { "epoch": 0.2464509392212747, "grad_norm": 40.59375, "learning_rate": 9.961492106022998e-06, "loss": 17.6542, "step": 13630 }, { "epoch": 0.24663175429040257, "grad_norm": 40.3125, "learning_rate": 9.961463853716337e-06, "loss": 17.1626, "step": 13640 }, { "epoch": 0.24681256935953041, "grad_norm": 40.5, "learning_rate": 9.961435601409678e-06, "loss": 17.2235, "step": 13650 }, { "epoch": 0.24699338442865829, "grad_norm": 38.9375, "learning_rate": 9.961407349103018e-06, "loss": 17.0925, "step": 13660 }, { "epoch": 0.24717419949778616, "grad_norm": 38.71875, "learning_rate": 9.961379096796359e-06, "loss": 16.9831, "step": 13670 }, { "epoch": 0.247355014566914, "grad_norm": 36.78125, "learning_rate": 9.9613508444897e-06, "loss": 16.8113, "step": 13680 }, { "epoch": 0.24753582963604187, "grad_norm": 42.65625, "learning_rate": 9.96132259218304e-06, "loss": 17.2586, "step": 13690 }, { "epoch": 0.24771664470516974, "grad_norm": 39.625, "learning_rate": 9.961294339876379e-06, "loss": 17.7591, "step": 13700 }, { "epoch": 0.24789745977429758, "grad_norm": 38.84375, "learning_rate": 9.961266087569721e-06, "loss": 17.3166, "step": 13710 }, { "epoch": 0.24807827484342546, "grad_norm": 39.375, "learning_rate": 9.961237835263062e-06, "loss": 17.3201, "step": 13720 }, { "epoch": 0.2482590899125533, "grad_norm": 39.5625, "learning_rate": 9.9612095829564e-06, "loss": 16.998, "step": 13730 }, { "epoch": 0.24843990498168117, "grad_norm": 40.34375, "learning_rate": 9.961181330649741e-06, "loss": 17.3526, "step": 13740 }, { "epoch": 0.24862072005080904, "grad_norm": 40.59375, "learning_rate": 9.961153078343082e-06, "loss": 17.0846, "step": 13750 }, { "epoch": 0.24880153511993688, "grad_norm": 40.59375, "learning_rate": 9.961124826036423e-06, "loss": 17.3245, "step": 13760 }, { "epoch": 0.24898235018906476, "grad_norm": 39.09375, "learning_rate": 9.961096573729763e-06, "loss": 17.525, "step": 13770 }, { "epoch": 0.24916316525819263, "grad_norm": 40.46875, "learning_rate": 9.961068321423102e-06, "loss": 16.7753, "step": 13780 }, { "epoch": 0.24934398032732047, "grad_norm": 41.65625, "learning_rate": 9.961040069116443e-06, "loss": 17.5064, "step": 13790 }, { "epoch": 0.24952479539644834, "grad_norm": 41.0, "learning_rate": 9.961011816809785e-06, "loss": 16.8966, "step": 13800 }, { "epoch": 0.2497056104655762, "grad_norm": 42.15625, "learning_rate": 9.960983564503126e-06, "loss": 17.5192, "step": 13810 }, { "epoch": 0.24988642553470405, "grad_norm": 41.875, "learning_rate": 9.960955312196465e-06, "loss": 17.1112, "step": 13820 }, { "epoch": 0.2500672406038319, "grad_norm": 40.8125, "learning_rate": 9.960927059889805e-06, "loss": 17.2937, "step": 13830 }, { "epoch": 0.25024805567295977, "grad_norm": 42.8125, "learning_rate": 9.960898807583146e-06, "loss": 17.4666, "step": 13840 }, { "epoch": 0.25042887074208764, "grad_norm": 38.875, "learning_rate": 9.960870555276487e-06, "loss": 17.4754, "step": 13850 }, { "epoch": 0.2506096858112155, "grad_norm": 43.8125, "learning_rate": 9.960842302969827e-06, "loss": 17.234, "step": 13860 }, { "epoch": 0.2507905008803434, "grad_norm": 40.65625, "learning_rate": 9.960814050663166e-06, "loss": 17.5303, "step": 13870 }, { "epoch": 0.25097131594947125, "grad_norm": 41.71875, "learning_rate": 9.960785798356508e-06, "loss": 17.6465, "step": 13880 }, { "epoch": 0.25115213101859907, "grad_norm": 39.15625, "learning_rate": 9.960757546049849e-06, "loss": 17.1546, "step": 13890 }, { "epoch": 0.25133294608772694, "grad_norm": 41.28125, "learning_rate": 9.960729293743188e-06, "loss": 17.2995, "step": 13900 }, { "epoch": 0.2515137611568548, "grad_norm": 41.8125, "learning_rate": 9.960701041436529e-06, "loss": 17.6509, "step": 13910 }, { "epoch": 0.2516945762259827, "grad_norm": 39.90625, "learning_rate": 9.96067278912987e-06, "loss": 17.4684, "step": 13920 }, { "epoch": 0.25187539129511055, "grad_norm": 41.96875, "learning_rate": 9.96064453682321e-06, "loss": 17.4557, "step": 13930 }, { "epoch": 0.2520562063642384, "grad_norm": 40.84375, "learning_rate": 9.96061628451655e-06, "loss": 17.2042, "step": 13940 }, { "epoch": 0.25223702143336624, "grad_norm": 42.34375, "learning_rate": 9.96058803220989e-06, "loss": 17.3159, "step": 13950 }, { "epoch": 0.2524178365024941, "grad_norm": 40.65625, "learning_rate": 9.96055977990323e-06, "loss": 17.0903, "step": 13960 }, { "epoch": 0.252598651571622, "grad_norm": 38.28125, "learning_rate": 9.960531527596572e-06, "loss": 17.1521, "step": 13970 }, { "epoch": 0.25277946664074985, "grad_norm": 40.875, "learning_rate": 9.960503275289913e-06, "loss": 16.9971, "step": 13980 }, { "epoch": 0.2529602817098777, "grad_norm": 41.15625, "learning_rate": 9.960475022983252e-06, "loss": 17.4463, "step": 13990 }, { "epoch": 0.25314109677900554, "grad_norm": 41.46875, "learning_rate": 9.960446770676593e-06, "loss": 17.1802, "step": 14000 }, { "epoch": 0.2533219118481334, "grad_norm": 41.75, "learning_rate": 9.960418518369933e-06, "loss": 17.2515, "step": 14010 }, { "epoch": 0.2535027269172613, "grad_norm": 40.5625, "learning_rate": 9.960390266063274e-06, "loss": 17.5549, "step": 14020 }, { "epoch": 0.25368354198638915, "grad_norm": 39.8125, "learning_rate": 9.960362013756614e-06, "loss": 17.5882, "step": 14030 }, { "epoch": 0.253864357055517, "grad_norm": 40.65625, "learning_rate": 9.960333761449953e-06, "loss": 17.462, "step": 14040 }, { "epoch": 0.2540451721246449, "grad_norm": 39.78125, "learning_rate": 9.960305509143294e-06, "loss": 17.2972, "step": 14050 }, { "epoch": 0.2542259871937727, "grad_norm": 41.875, "learning_rate": 9.960277256836636e-06, "loss": 17.5263, "step": 14060 }, { "epoch": 0.2544068022629006, "grad_norm": 40.3125, "learning_rate": 9.960249004529975e-06, "loss": 17.6281, "step": 14070 }, { "epoch": 0.25458761733202845, "grad_norm": 37.40625, "learning_rate": 9.960220752223316e-06, "loss": 17.341, "step": 14080 }, { "epoch": 0.2547684324011563, "grad_norm": 39.8125, "learning_rate": 9.960192499916656e-06, "loss": 17.308, "step": 14090 }, { "epoch": 0.2549492474702842, "grad_norm": 37.71875, "learning_rate": 9.960164247609997e-06, "loss": 17.3255, "step": 14100 }, { "epoch": 0.25513006253941206, "grad_norm": 40.09375, "learning_rate": 9.960135995303338e-06, "loss": 17.5287, "step": 14110 }, { "epoch": 0.2553108776085399, "grad_norm": 43.15625, "learning_rate": 9.960107742996678e-06, "loss": 17.3311, "step": 14120 }, { "epoch": 0.25549169267766775, "grad_norm": 39.46875, "learning_rate": 9.960079490690017e-06, "loss": 16.9866, "step": 14130 }, { "epoch": 0.2556725077467956, "grad_norm": 41.25, "learning_rate": 9.960051238383358e-06, "loss": 17.3274, "step": 14140 }, { "epoch": 0.2558533228159235, "grad_norm": 40.0625, "learning_rate": 9.9600229860767e-06, "loss": 17.3333, "step": 14150 }, { "epoch": 0.25603413788505136, "grad_norm": 37.75, "learning_rate": 9.959994733770039e-06, "loss": 17.3042, "step": 14160 }, { "epoch": 0.2562149529541792, "grad_norm": 40.875, "learning_rate": 9.95996648146338e-06, "loss": 17.2533, "step": 14170 }, { "epoch": 0.25639576802330705, "grad_norm": 36.25, "learning_rate": 9.95993822915672e-06, "loss": 17.3347, "step": 14180 }, { "epoch": 0.2565765830924349, "grad_norm": 39.46875, "learning_rate": 9.959909976850061e-06, "loss": 17.7013, "step": 14190 }, { "epoch": 0.2567573981615628, "grad_norm": 41.25, "learning_rate": 9.959881724543402e-06, "loss": 17.4491, "step": 14200 }, { "epoch": 0.25693821323069066, "grad_norm": 41.4375, "learning_rate": 9.95985347223674e-06, "loss": 16.9943, "step": 14210 }, { "epoch": 0.25711902829981853, "grad_norm": 39.25, "learning_rate": 9.959825219930081e-06, "loss": 17.5549, "step": 14220 }, { "epoch": 0.25729984336894635, "grad_norm": 40.09375, "learning_rate": 9.959796967623423e-06, "loss": 17.2006, "step": 14230 }, { "epoch": 0.2574806584380742, "grad_norm": 39.1875, "learning_rate": 9.959768715316764e-06, "loss": 17.0657, "step": 14240 }, { "epoch": 0.2576614735072021, "grad_norm": 39.03125, "learning_rate": 9.959740463010103e-06, "loss": 16.9687, "step": 14250 }, { "epoch": 0.25784228857632996, "grad_norm": 39.8125, "learning_rate": 9.959712210703444e-06, "loss": 17.2125, "step": 14260 }, { "epoch": 0.25802310364545783, "grad_norm": 42.6875, "learning_rate": 9.959683958396784e-06, "loss": 17.1497, "step": 14270 }, { "epoch": 0.2582039187145857, "grad_norm": 42.0625, "learning_rate": 9.959655706090125e-06, "loss": 16.7245, "step": 14280 }, { "epoch": 0.2583847337837135, "grad_norm": 40.6875, "learning_rate": 9.959627453783466e-06, "loss": 17.2191, "step": 14290 }, { "epoch": 0.2585655488528414, "grad_norm": 38.09375, "learning_rate": 9.959599201476804e-06, "loss": 17.0521, "step": 14300 }, { "epoch": 0.25874636392196926, "grad_norm": 41.84375, "learning_rate": 9.959570949170145e-06, "loss": 17.1253, "step": 14310 }, { "epoch": 0.25892717899109713, "grad_norm": 39.125, "learning_rate": 9.959542696863487e-06, "loss": 16.9786, "step": 14320 }, { "epoch": 0.259107994060225, "grad_norm": 39.53125, "learning_rate": 9.959514444556826e-06, "loss": 17.5312, "step": 14330 }, { "epoch": 0.2592888091293528, "grad_norm": 36.75, "learning_rate": 9.959486192250167e-06, "loss": 17.2473, "step": 14340 }, { "epoch": 0.2594696241984807, "grad_norm": 40.0625, "learning_rate": 9.959457939943508e-06, "loss": 16.8098, "step": 14350 }, { "epoch": 0.25965043926760856, "grad_norm": 40.65625, "learning_rate": 9.959429687636848e-06, "loss": 17.3783, "step": 14360 }, { "epoch": 0.25983125433673643, "grad_norm": 39.625, "learning_rate": 9.959401435330189e-06, "loss": 17.593, "step": 14370 }, { "epoch": 0.2600120694058643, "grad_norm": 39.1875, "learning_rate": 9.959373183023528e-06, "loss": 17.406, "step": 14380 }, { "epoch": 0.26019288447499217, "grad_norm": 39.96875, "learning_rate": 9.959344930716868e-06, "loss": 17.3751, "step": 14390 }, { "epoch": 0.26037369954412, "grad_norm": 40.40625, "learning_rate": 9.959316678410209e-06, "loss": 17.2173, "step": 14400 }, { "epoch": 0.26055451461324786, "grad_norm": 40.53125, "learning_rate": 9.959288426103551e-06, "loss": 17.1151, "step": 14410 }, { "epoch": 0.2607353296823757, "grad_norm": 42.3125, "learning_rate": 9.95926017379689e-06, "loss": 17.3042, "step": 14420 }, { "epoch": 0.2609161447515036, "grad_norm": 42.4375, "learning_rate": 9.95923192149023e-06, "loss": 17.3013, "step": 14430 }, { "epoch": 0.26109695982063147, "grad_norm": 41.71875, "learning_rate": 9.959203669183571e-06, "loss": 17.1391, "step": 14440 }, { "epoch": 0.26127777488975934, "grad_norm": 41.71875, "learning_rate": 9.959175416876912e-06, "loss": 17.412, "step": 14450 }, { "epoch": 0.26145858995888716, "grad_norm": 41.1875, "learning_rate": 9.959147164570253e-06, "loss": 17.0796, "step": 14460 }, { "epoch": 0.261639405028015, "grad_norm": 41.65625, "learning_rate": 9.959118912263592e-06, "loss": 17.1053, "step": 14470 }, { "epoch": 0.2618202200971429, "grad_norm": 42.03125, "learning_rate": 9.959090659956932e-06, "loss": 17.2328, "step": 14480 }, { "epoch": 0.26200103516627077, "grad_norm": 39.59375, "learning_rate": 9.959062407650273e-06, "loss": 17.2747, "step": 14490 }, { "epoch": 0.26218185023539864, "grad_norm": 40.1875, "learning_rate": 9.959034155343614e-06, "loss": 17.1025, "step": 14500 }, { "epoch": 0.26236266530452645, "grad_norm": 40.25, "learning_rate": 9.959005903036954e-06, "loss": 17.8269, "step": 14510 }, { "epoch": 0.2625434803736543, "grad_norm": 39.65625, "learning_rate": 9.958977650730295e-06, "loss": 17.1326, "step": 14520 }, { "epoch": 0.2627242954427822, "grad_norm": 38.96875, "learning_rate": 9.958949398423635e-06, "loss": 17.4717, "step": 14530 }, { "epoch": 0.26290511051191007, "grad_norm": 40.96875, "learning_rate": 9.958921146116976e-06, "loss": 16.7587, "step": 14540 }, { "epoch": 0.26308592558103794, "grad_norm": 44.09375, "learning_rate": 9.958892893810315e-06, "loss": 17.5066, "step": 14550 }, { "epoch": 0.2632667406501658, "grad_norm": 40.53125, "learning_rate": 9.958864641503656e-06, "loss": 17.0883, "step": 14560 }, { "epoch": 0.2634475557192936, "grad_norm": 41.8125, "learning_rate": 9.958836389196996e-06, "loss": 17.5933, "step": 14570 }, { "epoch": 0.2636283707884215, "grad_norm": 42.0, "learning_rate": 9.958808136890338e-06, "loss": 17.4965, "step": 14580 }, { "epoch": 0.26380918585754937, "grad_norm": 38.625, "learning_rate": 9.958779884583677e-06, "loss": 17.28, "step": 14590 }, { "epoch": 0.26399000092667724, "grad_norm": 40.5, "learning_rate": 9.958751632277018e-06, "loss": 17.4132, "step": 14600 }, { "epoch": 0.2641708159958051, "grad_norm": 41.28125, "learning_rate": 9.958723379970359e-06, "loss": 17.2562, "step": 14610 }, { "epoch": 0.264351631064933, "grad_norm": 40.21875, "learning_rate": 9.9586951276637e-06, "loss": 17.5402, "step": 14620 }, { "epoch": 0.2645324461340608, "grad_norm": 41.6875, "learning_rate": 9.95866687535704e-06, "loss": 17.2135, "step": 14630 }, { "epoch": 0.26471326120318867, "grad_norm": 40.21875, "learning_rate": 9.958638623050379e-06, "loss": 17.0918, "step": 14640 }, { "epoch": 0.26489407627231654, "grad_norm": 40.0625, "learning_rate": 9.95861037074372e-06, "loss": 17.3941, "step": 14650 }, { "epoch": 0.2650748913414444, "grad_norm": 42.9375, "learning_rate": 9.95858211843706e-06, "loss": 17.3198, "step": 14660 }, { "epoch": 0.2652557064105723, "grad_norm": 38.8125, "learning_rate": 9.9585538661304e-06, "loss": 16.8643, "step": 14670 }, { "epoch": 0.2654365214797001, "grad_norm": 41.0, "learning_rate": 9.958525613823741e-06, "loss": 17.2676, "step": 14680 }, { "epoch": 0.26561733654882796, "grad_norm": 39.71875, "learning_rate": 9.958497361517082e-06, "loss": 17.462, "step": 14690 }, { "epoch": 0.26579815161795584, "grad_norm": 40.5625, "learning_rate": 9.958469109210423e-06, "loss": 17.0876, "step": 14700 }, { "epoch": 0.2659789666870837, "grad_norm": 37.8125, "learning_rate": 9.958440856903763e-06, "loss": 17.2175, "step": 14710 }, { "epoch": 0.2661597817562116, "grad_norm": 40.09375, "learning_rate": 9.958412604597104e-06, "loss": 17.4817, "step": 14720 }, { "epoch": 0.26634059682533945, "grad_norm": 40.09375, "learning_rate": 9.958384352290443e-06, "loss": 17.4135, "step": 14730 }, { "epoch": 0.26652141189446726, "grad_norm": 40.375, "learning_rate": 9.958356099983783e-06, "loss": 17.1609, "step": 14740 }, { "epoch": 0.26670222696359513, "grad_norm": 40.03125, "learning_rate": 9.958327847677124e-06, "loss": 17.1004, "step": 14750 }, { "epoch": 0.266883042032723, "grad_norm": 39.90625, "learning_rate": 9.958299595370465e-06, "loss": 17.1141, "step": 14760 }, { "epoch": 0.2670638571018509, "grad_norm": 40.6875, "learning_rate": 9.958271343063805e-06, "loss": 17.2939, "step": 14770 }, { "epoch": 0.26724467217097875, "grad_norm": 41.625, "learning_rate": 9.958243090757146e-06, "loss": 17.0918, "step": 14780 }, { "epoch": 0.26742548724010656, "grad_norm": 41.46875, "learning_rate": 9.958214838450486e-06, "loss": 17.1987, "step": 14790 }, { "epoch": 0.26760630230923443, "grad_norm": 41.3125, "learning_rate": 9.958186586143827e-06, "loss": 16.9342, "step": 14800 }, { "epoch": 0.2677871173783623, "grad_norm": 41.15625, "learning_rate": 9.958158333837166e-06, "loss": 17.0782, "step": 14810 }, { "epoch": 0.2679679324474902, "grad_norm": 42.9375, "learning_rate": 9.958130081530507e-06, "loss": 17.7101, "step": 14820 }, { "epoch": 0.26814874751661805, "grad_norm": 40.6875, "learning_rate": 9.958101829223847e-06, "loss": 17.6526, "step": 14830 }, { "epoch": 0.2683295625857459, "grad_norm": 43.46875, "learning_rate": 9.958073576917188e-06, "loss": 17.1204, "step": 14840 }, { "epoch": 0.26851037765487373, "grad_norm": 41.53125, "learning_rate": 9.958045324610529e-06, "loss": 17.2844, "step": 14850 }, { "epoch": 0.2686911927240016, "grad_norm": 39.28125, "learning_rate": 9.958017072303869e-06, "loss": 16.8951, "step": 14860 }, { "epoch": 0.2688720077931295, "grad_norm": 39.25, "learning_rate": 9.95798881999721e-06, "loss": 17.281, "step": 14870 }, { "epoch": 0.26905282286225735, "grad_norm": 42.03125, "learning_rate": 9.95796056769055e-06, "loss": 17.4636, "step": 14880 }, { "epoch": 0.2692336379313852, "grad_norm": 38.4375, "learning_rate": 9.957932315383891e-06, "loss": 17.1833, "step": 14890 }, { "epoch": 0.2694144530005131, "grad_norm": 41.375, "learning_rate": 9.95790406307723e-06, "loss": 17.3453, "step": 14900 }, { "epoch": 0.2695952680696409, "grad_norm": 42.125, "learning_rate": 9.95787581077057e-06, "loss": 17.3075, "step": 14910 }, { "epoch": 0.2697760831387688, "grad_norm": 41.1875, "learning_rate": 9.957847558463911e-06, "loss": 17.2296, "step": 14920 }, { "epoch": 0.26995689820789665, "grad_norm": 37.8125, "learning_rate": 9.957819306157252e-06, "loss": 17.2295, "step": 14930 }, { "epoch": 0.2701377132770245, "grad_norm": 42.84375, "learning_rate": 9.957791053850592e-06, "loss": 17.1168, "step": 14940 }, { "epoch": 0.2703185283461524, "grad_norm": 41.9375, "learning_rate": 9.957762801543933e-06, "loss": 16.8162, "step": 14950 }, { "epoch": 0.2704993434152802, "grad_norm": 44.0625, "learning_rate": 9.957734549237274e-06, "loss": 17.2402, "step": 14960 }, { "epoch": 0.2706801584844081, "grad_norm": 42.46875, "learning_rate": 9.957706296930614e-06, "loss": 17.6101, "step": 14970 }, { "epoch": 0.27086097355353594, "grad_norm": 42.6875, "learning_rate": 9.957678044623953e-06, "loss": 17.2303, "step": 14980 }, { "epoch": 0.2710417886226638, "grad_norm": 40.53125, "learning_rate": 9.957649792317294e-06, "loss": 17.1591, "step": 14990 }, { "epoch": 0.2712226036917917, "grad_norm": 39.25, "learning_rate": 9.957621540010634e-06, "loss": 17.2613, "step": 15000 }, { "epoch": 0.2712226036917917, "eval_loss": 2.1613612174987793, "eval_runtime": 229.1402, "eval_samples_per_second": 3168.623, "eval_steps_per_second": 49.511, "step": 15000 }, { "epoch": 0.27140341876091956, "grad_norm": 39.84375, "learning_rate": 9.957593287703975e-06, "loss": 17.1637, "step": 15010 }, { "epoch": 0.2715842338300474, "grad_norm": 38.71875, "learning_rate": 9.957565035397316e-06, "loss": 17.2246, "step": 15020 }, { "epoch": 0.27176504889917524, "grad_norm": 39.34375, "learning_rate": 9.957536783090656e-06, "loss": 17.1489, "step": 15030 }, { "epoch": 0.2719458639683031, "grad_norm": 40.78125, "learning_rate": 9.957508530783997e-06, "loss": 17.6059, "step": 15040 }, { "epoch": 0.272126679037431, "grad_norm": 39.53125, "learning_rate": 9.957480278477338e-06, "loss": 17.1587, "step": 15050 }, { "epoch": 0.27230749410655886, "grad_norm": 40.84375, "learning_rate": 9.957452026170678e-06, "loss": 17.5717, "step": 15060 }, { "epoch": 0.2724883091756867, "grad_norm": 40.15625, "learning_rate": 9.957423773864017e-06, "loss": 17.1256, "step": 15070 }, { "epoch": 0.27266912424481454, "grad_norm": 42.25, "learning_rate": 9.957395521557358e-06, "loss": 17.153, "step": 15080 }, { "epoch": 0.2728499393139424, "grad_norm": 39.5, "learning_rate": 9.957367269250698e-06, "loss": 16.9621, "step": 15090 }, { "epoch": 0.2730307543830703, "grad_norm": 40.8125, "learning_rate": 9.957339016944039e-06, "loss": 17.2758, "step": 15100 }, { "epoch": 0.27321156945219816, "grad_norm": 41.5, "learning_rate": 9.95731076463738e-06, "loss": 17.0574, "step": 15110 }, { "epoch": 0.273392384521326, "grad_norm": 41.625, "learning_rate": 9.95728251233072e-06, "loss": 17.3584, "step": 15120 }, { "epoch": 0.27357319959045384, "grad_norm": 41.90625, "learning_rate": 9.957254260024061e-06, "loss": 17.1221, "step": 15130 }, { "epoch": 0.2737540146595817, "grad_norm": 41.40625, "learning_rate": 9.957226007717401e-06, "loss": 17.4213, "step": 15140 }, { "epoch": 0.2739348297287096, "grad_norm": 38.59375, "learning_rate": 9.957197755410742e-06, "loss": 17.5993, "step": 15150 }, { "epoch": 0.27411564479783745, "grad_norm": 41.90625, "learning_rate": 9.957169503104081e-06, "loss": 17.2159, "step": 15160 }, { "epoch": 0.2742964598669653, "grad_norm": 41.25, "learning_rate": 9.957141250797422e-06, "loss": 17.6288, "step": 15170 }, { "epoch": 0.2744772749360932, "grad_norm": 40.3125, "learning_rate": 9.957112998490762e-06, "loss": 17.0387, "step": 15180 }, { "epoch": 0.274658090005221, "grad_norm": 41.1875, "learning_rate": 9.957084746184103e-06, "loss": 17.3209, "step": 15190 }, { "epoch": 0.2748389050743489, "grad_norm": 40.78125, "learning_rate": 9.957056493877444e-06, "loss": 16.9922, "step": 15200 }, { "epoch": 0.27501972014347675, "grad_norm": 40.15625, "learning_rate": 9.957028241570784e-06, "loss": 17.4538, "step": 15210 }, { "epoch": 0.2752005352126046, "grad_norm": 41.875, "learning_rate": 9.956999989264125e-06, "loss": 17.3762, "step": 15220 }, { "epoch": 0.2753813502817325, "grad_norm": 38.875, "learning_rate": 9.956971736957465e-06, "loss": 17.343, "step": 15230 }, { "epoch": 0.27556216535086037, "grad_norm": 40.90625, "learning_rate": 9.956943484650804e-06, "loss": 17.5539, "step": 15240 }, { "epoch": 0.2757429804199882, "grad_norm": 38.90625, "learning_rate": 9.956915232344145e-06, "loss": 16.9692, "step": 15250 }, { "epoch": 0.27592379548911605, "grad_norm": 40.8125, "learning_rate": 9.956886980037486e-06, "loss": 17.3419, "step": 15260 }, { "epoch": 0.2761046105582439, "grad_norm": 42.5, "learning_rate": 9.956858727730826e-06, "loss": 16.8046, "step": 15270 }, { "epoch": 0.2762854256273718, "grad_norm": 38.53125, "learning_rate": 9.956830475424167e-06, "loss": 17.2915, "step": 15280 }, { "epoch": 0.27646624069649967, "grad_norm": 41.78125, "learning_rate": 9.956802223117507e-06, "loss": 17.1039, "step": 15290 }, { "epoch": 0.2766470557656275, "grad_norm": 40.0, "learning_rate": 9.956773970810848e-06, "loss": 17.0082, "step": 15300 }, { "epoch": 0.27682787083475535, "grad_norm": 41.25, "learning_rate": 9.956745718504189e-06, "loss": 17.4862, "step": 15310 }, { "epoch": 0.2770086859038832, "grad_norm": 41.40625, "learning_rate": 9.95671746619753e-06, "loss": 17.2742, "step": 15320 }, { "epoch": 0.2771895009730111, "grad_norm": 42.0, "learning_rate": 9.956689213890868e-06, "loss": 17.3996, "step": 15330 }, { "epoch": 0.27737031604213896, "grad_norm": 41.625, "learning_rate": 9.956660961584209e-06, "loss": 17.3848, "step": 15340 }, { "epoch": 0.27755113111126684, "grad_norm": 39.75, "learning_rate": 9.95663270927755e-06, "loss": 17.3582, "step": 15350 }, { "epoch": 0.27773194618039465, "grad_norm": 44.0, "learning_rate": 9.95660445697089e-06, "loss": 17.5821, "step": 15360 }, { "epoch": 0.2779127612495225, "grad_norm": 41.3125, "learning_rate": 9.95657620466423e-06, "loss": 17.4158, "step": 15370 }, { "epoch": 0.2780935763186504, "grad_norm": 41.78125, "learning_rate": 9.956547952357571e-06, "loss": 17.2659, "step": 15380 }, { "epoch": 0.27827439138777826, "grad_norm": 41.90625, "learning_rate": 9.956519700050912e-06, "loss": 17.218, "step": 15390 }, { "epoch": 0.27845520645690613, "grad_norm": 40.15625, "learning_rate": 9.956491447744253e-06, "loss": 17.1191, "step": 15400 }, { "epoch": 0.278636021526034, "grad_norm": 43.625, "learning_rate": 9.956463195437592e-06, "loss": 17.2365, "step": 15410 }, { "epoch": 0.2788168365951618, "grad_norm": 41.8125, "learning_rate": 9.956434943130932e-06, "loss": 17.5236, "step": 15420 }, { "epoch": 0.2789976516642897, "grad_norm": 42.625, "learning_rate": 9.956406690824273e-06, "loss": 17.0254, "step": 15430 }, { "epoch": 0.27917846673341756, "grad_norm": 41.40625, "learning_rate": 9.956378438517613e-06, "loss": 17.2221, "step": 15440 }, { "epoch": 0.27935928180254543, "grad_norm": 39.84375, "learning_rate": 9.956350186210954e-06, "loss": 17.5745, "step": 15450 }, { "epoch": 0.2795400968716733, "grad_norm": 40.9375, "learning_rate": 9.956321933904295e-06, "loss": 16.9634, "step": 15460 }, { "epoch": 0.2797209119408011, "grad_norm": 40.0625, "learning_rate": 9.956293681597635e-06, "loss": 16.9116, "step": 15470 }, { "epoch": 0.279901727009929, "grad_norm": 39.34375, "learning_rate": 9.956265429290976e-06, "loss": 17.1836, "step": 15480 }, { "epoch": 0.28008254207905686, "grad_norm": 40.5, "learning_rate": 9.956237176984316e-06, "loss": 17.068, "step": 15490 }, { "epoch": 0.28026335714818473, "grad_norm": 41.0, "learning_rate": 9.956208924677655e-06, "loss": 17.2735, "step": 15500 }, { "epoch": 0.2804441722173126, "grad_norm": 40.09375, "learning_rate": 9.956180672370996e-06, "loss": 17.0922, "step": 15510 }, { "epoch": 0.2806249872864405, "grad_norm": 40.1875, "learning_rate": 9.956152420064337e-06, "loss": 16.9675, "step": 15520 }, { "epoch": 0.2808058023555683, "grad_norm": 40.28125, "learning_rate": 9.956124167757677e-06, "loss": 17.1198, "step": 15530 }, { "epoch": 0.28098661742469616, "grad_norm": 42.5, "learning_rate": 9.956095915451018e-06, "loss": 16.9641, "step": 15540 }, { "epoch": 0.28116743249382403, "grad_norm": 41.78125, "learning_rate": 9.956067663144359e-06, "loss": 17.2347, "step": 15550 }, { "epoch": 0.2813482475629519, "grad_norm": 39.0625, "learning_rate": 9.956039410837699e-06, "loss": 16.6774, "step": 15560 }, { "epoch": 0.2815290626320798, "grad_norm": 37.3125, "learning_rate": 9.95601115853104e-06, "loss": 17.3081, "step": 15570 }, { "epoch": 0.28170987770120764, "grad_norm": 40.8125, "learning_rate": 9.95598290622438e-06, "loss": 17.3257, "step": 15580 }, { "epoch": 0.28189069277033546, "grad_norm": 40.5, "learning_rate": 9.95595465391772e-06, "loss": 16.8696, "step": 15590 }, { "epoch": 0.28207150783946333, "grad_norm": 40.25, "learning_rate": 9.95592640161106e-06, "loss": 17.0273, "step": 15600 }, { "epoch": 0.2822523229085912, "grad_norm": 39.78125, "learning_rate": 9.9558981493044e-06, "loss": 17.3795, "step": 15610 }, { "epoch": 0.2824331379777191, "grad_norm": 41.8125, "learning_rate": 9.955869896997741e-06, "loss": 17.113, "step": 15620 }, { "epoch": 0.28261395304684694, "grad_norm": 39.65625, "learning_rate": 9.955841644691082e-06, "loss": 17.3427, "step": 15630 }, { "epoch": 0.28279476811597476, "grad_norm": 40.59375, "learning_rate": 9.955813392384422e-06, "loss": 17.609, "step": 15640 }, { "epoch": 0.28297558318510263, "grad_norm": 39.5, "learning_rate": 9.955785140077763e-06, "loss": 17.6259, "step": 15650 }, { "epoch": 0.2831563982542305, "grad_norm": 40.28125, "learning_rate": 9.955756887771104e-06, "loss": 17.0857, "step": 15660 }, { "epoch": 0.2833372133233584, "grad_norm": 42.125, "learning_rate": 9.955728635464443e-06, "loss": 17.0976, "step": 15670 }, { "epoch": 0.28351802839248624, "grad_norm": 42.71875, "learning_rate": 9.955700383157783e-06, "loss": 17.3903, "step": 15680 }, { "epoch": 0.2836988434616141, "grad_norm": 40.53125, "learning_rate": 9.955672130851124e-06, "loss": 17.3015, "step": 15690 }, { "epoch": 0.28387965853074193, "grad_norm": 38.8125, "learning_rate": 9.955643878544464e-06, "loss": 16.9424, "step": 15700 }, { "epoch": 0.2840604735998698, "grad_norm": 40.34375, "learning_rate": 9.955615626237805e-06, "loss": 17.3459, "step": 15710 }, { "epoch": 0.28424128866899767, "grad_norm": 44.75, "learning_rate": 9.955587373931146e-06, "loss": 17.1487, "step": 15720 }, { "epoch": 0.28442210373812554, "grad_norm": 40.53125, "learning_rate": 9.955559121624486e-06, "loss": 17.3224, "step": 15730 }, { "epoch": 0.2846029188072534, "grad_norm": 40.15625, "learning_rate": 9.955530869317827e-06, "loss": 17.1909, "step": 15740 }, { "epoch": 0.2847837338763813, "grad_norm": 41.0, "learning_rate": 9.955502617011168e-06, "loss": 16.998, "step": 15750 }, { "epoch": 0.2849645489455091, "grad_norm": 41.46875, "learning_rate": 9.955474364704507e-06, "loss": 17.4409, "step": 15760 }, { "epoch": 0.28514536401463697, "grad_norm": 38.0625, "learning_rate": 9.955446112397847e-06, "loss": 17.0526, "step": 15770 }, { "epoch": 0.28532617908376484, "grad_norm": 40.4375, "learning_rate": 9.955417860091188e-06, "loss": 16.7261, "step": 15780 }, { "epoch": 0.2855069941528927, "grad_norm": 41.375, "learning_rate": 9.955389607784528e-06, "loss": 17.4373, "step": 15790 }, { "epoch": 0.2856878092220206, "grad_norm": 39.65625, "learning_rate": 9.955361355477869e-06, "loss": 17.2315, "step": 15800 }, { "epoch": 0.2858686242911484, "grad_norm": 40.84375, "learning_rate": 9.95533310317121e-06, "loss": 17.264, "step": 15810 }, { "epoch": 0.28604943936027627, "grad_norm": 40.9375, "learning_rate": 9.95530485086455e-06, "loss": 17.5945, "step": 15820 }, { "epoch": 0.28623025442940414, "grad_norm": 40.0, "learning_rate": 9.955276598557891e-06, "loss": 17.2339, "step": 15830 }, { "epoch": 0.286411069498532, "grad_norm": 37.9375, "learning_rate": 9.95524834625123e-06, "loss": 17.3516, "step": 15840 }, { "epoch": 0.2865918845676599, "grad_norm": 39.125, "learning_rate": 9.95522009394457e-06, "loss": 17.5479, "step": 15850 }, { "epoch": 0.28677269963678775, "grad_norm": 39.96875, "learning_rate": 9.955191841637911e-06, "loss": 17.3622, "step": 15860 }, { "epoch": 0.28695351470591557, "grad_norm": 40.3125, "learning_rate": 9.955163589331252e-06, "loss": 17.4275, "step": 15870 }, { "epoch": 0.28713432977504344, "grad_norm": 41.46875, "learning_rate": 9.955135337024592e-06, "loss": 17.2381, "step": 15880 }, { "epoch": 0.2873151448441713, "grad_norm": 40.84375, "learning_rate": 9.955107084717933e-06, "loss": 17.163, "step": 15890 }, { "epoch": 0.2874959599132992, "grad_norm": 40.875, "learning_rate": 9.955078832411274e-06, "loss": 17.093, "step": 15900 }, { "epoch": 0.28767677498242705, "grad_norm": 39.25, "learning_rate": 9.955050580104614e-06, "loss": 17.1249, "step": 15910 }, { "epoch": 0.2878575900515549, "grad_norm": 39.625, "learning_rate": 9.955022327797955e-06, "loss": 17.3523, "step": 15920 }, { "epoch": 0.28803840512068274, "grad_norm": 41.875, "learning_rate": 9.954994075491294e-06, "loss": 17.2994, "step": 15930 }, { "epoch": 0.2882192201898106, "grad_norm": 39.65625, "learning_rate": 9.954965823184634e-06, "loss": 17.3658, "step": 15940 }, { "epoch": 0.2884000352589385, "grad_norm": 39.96875, "learning_rate": 9.954937570877975e-06, "loss": 17.1942, "step": 15950 }, { "epoch": 0.28858085032806635, "grad_norm": 40.53125, "learning_rate": 9.954909318571316e-06, "loss": 17.3323, "step": 15960 }, { "epoch": 0.2887616653971942, "grad_norm": 41.15625, "learning_rate": 9.954881066264656e-06, "loss": 17.148, "step": 15970 }, { "epoch": 0.28894248046632204, "grad_norm": 38.375, "learning_rate": 9.954852813957997e-06, "loss": 17.149, "step": 15980 }, { "epoch": 0.2891232955354499, "grad_norm": 40.8125, "learning_rate": 9.954824561651337e-06, "loss": 17.4588, "step": 15990 }, { "epoch": 0.2893041106045778, "grad_norm": 40.75, "learning_rate": 9.954796309344678e-06, "loss": 17.116, "step": 16000 }, { "epoch": 0.28948492567370565, "grad_norm": 38.5625, "learning_rate": 9.954768057038019e-06, "loss": 16.7949, "step": 16010 }, { "epoch": 0.2896657407428335, "grad_norm": 40.1875, "learning_rate": 9.954739804731358e-06, "loss": 17.0813, "step": 16020 }, { "epoch": 0.2898465558119614, "grad_norm": 39.625, "learning_rate": 9.954711552424698e-06, "loss": 17.6624, "step": 16030 }, { "epoch": 0.2900273708810892, "grad_norm": 40.0, "learning_rate": 9.954683300118039e-06, "loss": 17.3748, "step": 16040 }, { "epoch": 0.2902081859502171, "grad_norm": 39.71875, "learning_rate": 9.95465504781138e-06, "loss": 17.3348, "step": 16050 }, { "epoch": 0.29038900101934495, "grad_norm": 38.28125, "learning_rate": 9.95462679550472e-06, "loss": 16.9131, "step": 16060 }, { "epoch": 0.2905698160884728, "grad_norm": 39.03125, "learning_rate": 9.95459854319806e-06, "loss": 17.5051, "step": 16070 }, { "epoch": 0.2907506311576007, "grad_norm": 41.0625, "learning_rate": 9.954570290891401e-06, "loss": 17.3849, "step": 16080 }, { "epoch": 0.29093144622672856, "grad_norm": 40.09375, "learning_rate": 9.954542038584742e-06, "loss": 17.4643, "step": 16090 }, { "epoch": 0.2911122612958564, "grad_norm": 42.21875, "learning_rate": 9.954513786278081e-06, "loss": 17.4884, "step": 16100 }, { "epoch": 0.29129307636498425, "grad_norm": 39.875, "learning_rate": 9.954485533971422e-06, "loss": 17.3067, "step": 16110 }, { "epoch": 0.2914738914341121, "grad_norm": 38.53125, "learning_rate": 9.954457281664762e-06, "loss": 17.2178, "step": 16120 }, { "epoch": 0.29165470650324, "grad_norm": 42.875, "learning_rate": 9.954429029358103e-06, "loss": 17.2724, "step": 16130 }, { "epoch": 0.29183552157236786, "grad_norm": 42.96875, "learning_rate": 9.954400777051443e-06, "loss": 17.2351, "step": 16140 }, { "epoch": 0.2920163366414957, "grad_norm": 38.59375, "learning_rate": 9.954372524744782e-06, "loss": 17.3964, "step": 16150 }, { "epoch": 0.29219715171062355, "grad_norm": 41.0625, "learning_rate": 9.954344272438125e-06, "loss": 17.1551, "step": 16160 }, { "epoch": 0.2923779667797514, "grad_norm": 38.1875, "learning_rate": 9.954316020131465e-06, "loss": 17.0422, "step": 16170 }, { "epoch": 0.2925587818488793, "grad_norm": 40.5625, "learning_rate": 9.954287767824806e-06, "loss": 17.5605, "step": 16180 }, { "epoch": 0.29273959691800716, "grad_norm": 42.9375, "learning_rate": 9.954259515518145e-06, "loss": 17.3039, "step": 16190 }, { "epoch": 0.29292041198713503, "grad_norm": 39.9375, "learning_rate": 9.954231263211485e-06, "loss": 17.0426, "step": 16200 }, { "epoch": 0.29310122705626285, "grad_norm": 42.0, "learning_rate": 9.954203010904826e-06, "loss": 17.1135, "step": 16210 }, { "epoch": 0.2932820421253907, "grad_norm": 39.96875, "learning_rate": 9.954174758598167e-06, "loss": 17.2362, "step": 16220 }, { "epoch": 0.2934628571945186, "grad_norm": 42.84375, "learning_rate": 9.954146506291507e-06, "loss": 17.3939, "step": 16230 }, { "epoch": 0.29364367226364646, "grad_norm": 41.34375, "learning_rate": 9.954118253984846e-06, "loss": 17.2928, "step": 16240 }, { "epoch": 0.29382448733277433, "grad_norm": 42.125, "learning_rate": 9.954090001678189e-06, "loss": 17.3053, "step": 16250 }, { "epoch": 0.2940053024019022, "grad_norm": 39.9375, "learning_rate": 9.95406174937153e-06, "loss": 17.1759, "step": 16260 }, { "epoch": 0.29418611747103, "grad_norm": 42.5625, "learning_rate": 9.954033497064868e-06, "loss": 17.2078, "step": 16270 }, { "epoch": 0.2943669325401579, "grad_norm": 41.46875, "learning_rate": 9.954005244758209e-06, "loss": 17.2217, "step": 16280 }, { "epoch": 0.29454774760928576, "grad_norm": 41.21875, "learning_rate": 9.95397699245155e-06, "loss": 17.5906, "step": 16290 }, { "epoch": 0.29472856267841363, "grad_norm": 40.28125, "learning_rate": 9.95394874014489e-06, "loss": 17.3592, "step": 16300 }, { "epoch": 0.2949093777475415, "grad_norm": 43.0625, "learning_rate": 9.95392048783823e-06, "loss": 17.7649, "step": 16310 }, { "epoch": 0.2950901928166693, "grad_norm": 40.0625, "learning_rate": 9.953892235531571e-06, "loss": 17.0514, "step": 16320 }, { "epoch": 0.2952710078857972, "grad_norm": 38.96875, "learning_rate": 9.953863983224912e-06, "loss": 17.0892, "step": 16330 }, { "epoch": 0.29545182295492506, "grad_norm": 38.75, "learning_rate": 9.953835730918252e-06, "loss": 17.6396, "step": 16340 }, { "epoch": 0.29563263802405293, "grad_norm": 40.75, "learning_rate": 9.953807478611593e-06, "loss": 17.1078, "step": 16350 }, { "epoch": 0.2958134530931808, "grad_norm": 40.5625, "learning_rate": 9.953779226304932e-06, "loss": 17.0602, "step": 16360 }, { "epoch": 0.29599426816230867, "grad_norm": 40.9375, "learning_rate": 9.953750973998273e-06, "loss": 17.2024, "step": 16370 }, { "epoch": 0.2961750832314365, "grad_norm": 44.0, "learning_rate": 9.953722721691613e-06, "loss": 17.265, "step": 16380 }, { "epoch": 0.29635589830056436, "grad_norm": 40.53125, "learning_rate": 9.953694469384954e-06, "loss": 16.996, "step": 16390 }, { "epoch": 0.29653671336969223, "grad_norm": 41.15625, "learning_rate": 9.953666217078294e-06, "loss": 16.7257, "step": 16400 }, { "epoch": 0.2967175284388201, "grad_norm": 40.65625, "learning_rate": 9.953637964771633e-06, "loss": 17.1792, "step": 16410 }, { "epoch": 0.29689834350794797, "grad_norm": 42.90625, "learning_rate": 9.953609712464976e-06, "loss": 17.4675, "step": 16420 }, { "epoch": 0.2970791585770758, "grad_norm": 42.03125, "learning_rate": 9.953581460158316e-06, "loss": 17.298, "step": 16430 }, { "epoch": 0.29725997364620366, "grad_norm": 38.09375, "learning_rate": 9.953553207851657e-06, "loss": 17.1823, "step": 16440 }, { "epoch": 0.2974407887153315, "grad_norm": 40.90625, "learning_rate": 9.953524955544996e-06, "loss": 17.3904, "step": 16450 }, { "epoch": 0.2976216037844594, "grad_norm": 40.5, "learning_rate": 9.953496703238337e-06, "loss": 17.1084, "step": 16460 }, { "epoch": 0.29780241885358727, "grad_norm": 40.40625, "learning_rate": 9.953468450931677e-06, "loss": 17.1752, "step": 16470 }, { "epoch": 0.29798323392271514, "grad_norm": 38.0, "learning_rate": 9.953440198625018e-06, "loss": 17.1169, "step": 16480 }, { "epoch": 0.29816404899184296, "grad_norm": 40.03125, "learning_rate": 9.953411946318358e-06, "loss": 17.6988, "step": 16490 }, { "epoch": 0.2983448640609708, "grad_norm": 42.09375, "learning_rate": 9.953383694011697e-06, "loss": 17.0947, "step": 16500 }, { "epoch": 0.2985256791300987, "grad_norm": 40.90625, "learning_rate": 9.95335544170504e-06, "loss": 17.1475, "step": 16510 }, { "epoch": 0.29870649419922657, "grad_norm": 40.25, "learning_rate": 9.95332718939838e-06, "loss": 17.1444, "step": 16520 }, { "epoch": 0.29888730926835444, "grad_norm": 40.3125, "learning_rate": 9.95329893709172e-06, "loss": 17.1142, "step": 16530 }, { "epoch": 0.2990681243374823, "grad_norm": 38.78125, "learning_rate": 9.95327068478506e-06, "loss": 16.9027, "step": 16540 }, { "epoch": 0.2992489394066101, "grad_norm": 44.03125, "learning_rate": 9.9532424324784e-06, "loss": 17.0472, "step": 16550 }, { "epoch": 0.299429754475738, "grad_norm": 40.5625, "learning_rate": 9.953214180171741e-06, "loss": 16.765, "step": 16560 }, { "epoch": 0.29961056954486587, "grad_norm": 42.15625, "learning_rate": 9.953185927865082e-06, "loss": 16.9042, "step": 16570 }, { "epoch": 0.29979138461399374, "grad_norm": 38.90625, "learning_rate": 9.95315767555842e-06, "loss": 16.9619, "step": 16580 }, { "epoch": 0.2999721996831216, "grad_norm": 42.25, "learning_rate": 9.953129423251761e-06, "loss": 17.0627, "step": 16590 }, { "epoch": 0.3001530147522494, "grad_norm": 37.75, "learning_rate": 9.953101170945104e-06, "loss": 17.3962, "step": 16600 }, { "epoch": 0.3003338298213773, "grad_norm": 40.5, "learning_rate": 9.953072918638444e-06, "loss": 17.0685, "step": 16610 }, { "epoch": 0.30051464489050517, "grad_norm": 38.6875, "learning_rate": 9.953044666331783e-06, "loss": 17.456, "step": 16620 }, { "epoch": 0.30069545995963304, "grad_norm": 41.53125, "learning_rate": 9.953016414025124e-06, "loss": 17.1949, "step": 16630 }, { "epoch": 0.3008762750287609, "grad_norm": 39.09375, "learning_rate": 9.952988161718464e-06, "loss": 17.527, "step": 16640 }, { "epoch": 0.3010570900978888, "grad_norm": 39.8125, "learning_rate": 9.952959909411805e-06, "loss": 17.1713, "step": 16650 }, { "epoch": 0.3012379051670166, "grad_norm": 38.90625, "learning_rate": 9.952931657105146e-06, "loss": 16.9757, "step": 16660 }, { "epoch": 0.30141872023614447, "grad_norm": 41.78125, "learning_rate": 9.952903404798485e-06, "loss": 17.2477, "step": 16670 }, { "epoch": 0.30159953530527234, "grad_norm": 39.78125, "learning_rate": 9.952875152491827e-06, "loss": 17.256, "step": 16680 }, { "epoch": 0.3017803503744002, "grad_norm": 40.8125, "learning_rate": 9.952846900185167e-06, "loss": 17.1216, "step": 16690 }, { "epoch": 0.3019611654435281, "grad_norm": 38.625, "learning_rate": 9.952818647878506e-06, "loss": 17.2615, "step": 16700 }, { "epoch": 0.30214198051265595, "grad_norm": 39.625, "learning_rate": 9.952790395571847e-06, "loss": 17.2899, "step": 16710 }, { "epoch": 0.30232279558178377, "grad_norm": 41.375, "learning_rate": 9.952762143265188e-06, "loss": 17.8263, "step": 16720 }, { "epoch": 0.30250361065091164, "grad_norm": 42.5, "learning_rate": 9.952733890958528e-06, "loss": 17.4072, "step": 16730 }, { "epoch": 0.3026844257200395, "grad_norm": 44.25, "learning_rate": 9.952705638651869e-06, "loss": 17.9111, "step": 16740 }, { "epoch": 0.3028652407891674, "grad_norm": 40.40625, "learning_rate": 9.952677386345208e-06, "loss": 17.2915, "step": 16750 }, { "epoch": 0.30304605585829525, "grad_norm": 41.9375, "learning_rate": 9.952649134038548e-06, "loss": 17.3275, "step": 16760 }, { "epoch": 0.30322687092742306, "grad_norm": 39.125, "learning_rate": 9.95262088173189e-06, "loss": 17.4011, "step": 16770 }, { "epoch": 0.30340768599655094, "grad_norm": 41.25, "learning_rate": 9.952592629425231e-06, "loss": 17.0106, "step": 16780 }, { "epoch": 0.3035885010656788, "grad_norm": 41.53125, "learning_rate": 9.95256437711857e-06, "loss": 17.0403, "step": 16790 }, { "epoch": 0.3037693161348067, "grad_norm": 39.78125, "learning_rate": 9.952536124811911e-06, "loss": 17.1771, "step": 16800 }, { "epoch": 0.30395013120393455, "grad_norm": 43.78125, "learning_rate": 9.952507872505252e-06, "loss": 17.4583, "step": 16810 }, { "epoch": 0.3041309462730624, "grad_norm": 38.09375, "learning_rate": 9.952479620198592e-06, "loss": 17.1125, "step": 16820 }, { "epoch": 0.30431176134219023, "grad_norm": 40.25, "learning_rate": 9.952451367891933e-06, "loss": 17.0927, "step": 16830 }, { "epoch": 0.3044925764113181, "grad_norm": 41.09375, "learning_rate": 9.952423115585272e-06, "loss": 17.3882, "step": 16840 }, { "epoch": 0.304673391480446, "grad_norm": 40.71875, "learning_rate": 9.952394863278612e-06, "loss": 16.8834, "step": 16850 }, { "epoch": 0.30485420654957385, "grad_norm": 43.40625, "learning_rate": 9.952366610971955e-06, "loss": 17.6202, "step": 16860 }, { "epoch": 0.3050350216187017, "grad_norm": 39.28125, "learning_rate": 9.952338358665294e-06, "loss": 17.3385, "step": 16870 }, { "epoch": 0.3052158366878296, "grad_norm": 41.03125, "learning_rate": 9.952310106358634e-06, "loss": 17.5984, "step": 16880 }, { "epoch": 0.3053966517569574, "grad_norm": 40.59375, "learning_rate": 9.952281854051975e-06, "loss": 17.3033, "step": 16890 }, { "epoch": 0.3055774668260853, "grad_norm": 43.0, "learning_rate": 9.952253601745315e-06, "loss": 17.1927, "step": 16900 }, { "epoch": 0.30575828189521315, "grad_norm": 40.1875, "learning_rate": 9.952225349438656e-06, "loss": 17.7446, "step": 16910 }, { "epoch": 0.305939096964341, "grad_norm": 42.34375, "learning_rate": 9.952197097131997e-06, "loss": 17.2445, "step": 16920 }, { "epoch": 0.3061199120334689, "grad_norm": 43.65625, "learning_rate": 9.952168844825336e-06, "loss": 17.4012, "step": 16930 }, { "epoch": 0.3063007271025967, "grad_norm": 41.5, "learning_rate": 9.952140592518676e-06, "loss": 17.2224, "step": 16940 }, { "epoch": 0.3064815421717246, "grad_norm": 44.75, "learning_rate": 9.952112340212019e-06, "loss": 17.4899, "step": 16950 }, { "epoch": 0.30666235724085245, "grad_norm": 41.03125, "learning_rate": 9.952084087905357e-06, "loss": 16.8993, "step": 16960 }, { "epoch": 0.3068431723099803, "grad_norm": 41.4375, "learning_rate": 9.952055835598698e-06, "loss": 17.5821, "step": 16970 }, { "epoch": 0.3070239873791082, "grad_norm": 40.40625, "learning_rate": 9.952027583292039e-06, "loss": 17.31, "step": 16980 }, { "epoch": 0.30720480244823606, "grad_norm": 43.0, "learning_rate": 9.95199933098538e-06, "loss": 17.4335, "step": 16990 }, { "epoch": 0.3073856175173639, "grad_norm": 44.5, "learning_rate": 9.95197107867872e-06, "loss": 17.3602, "step": 17000 }, { "epoch": 0.30756643258649174, "grad_norm": 39.09375, "learning_rate": 9.951942826372059e-06, "loss": 16.9662, "step": 17010 }, { "epoch": 0.3077472476556196, "grad_norm": 39.625, "learning_rate": 9.9519145740654e-06, "loss": 17.0458, "step": 17020 }, { "epoch": 0.3079280627247475, "grad_norm": 41.0625, "learning_rate": 9.95188632175874e-06, "loss": 17.7801, "step": 17030 }, { "epoch": 0.30810887779387536, "grad_norm": 43.1875, "learning_rate": 9.951858069452082e-06, "loss": 17.4092, "step": 17040 }, { "epoch": 0.30828969286300323, "grad_norm": 42.03125, "learning_rate": 9.951829817145421e-06, "loss": 17.5656, "step": 17050 }, { "epoch": 0.30847050793213104, "grad_norm": 41.375, "learning_rate": 9.951801564838762e-06, "loss": 17.5283, "step": 17060 }, { "epoch": 0.3086513230012589, "grad_norm": 40.90625, "learning_rate": 9.951773312532103e-06, "loss": 17.3261, "step": 17070 }, { "epoch": 0.3088321380703868, "grad_norm": 40.46875, "learning_rate": 9.951745060225443e-06, "loss": 17.403, "step": 17080 }, { "epoch": 0.30901295313951466, "grad_norm": 39.71875, "learning_rate": 9.951716807918784e-06, "loss": 17.1059, "step": 17090 }, { "epoch": 0.3091937682086425, "grad_norm": 42.0625, "learning_rate": 9.951688555612123e-06, "loss": 17.2102, "step": 17100 }, { "epoch": 0.30937458327777034, "grad_norm": 40.6875, "learning_rate": 9.951660303305463e-06, "loss": 16.8268, "step": 17110 }, { "epoch": 0.3095553983468982, "grad_norm": 40.71875, "learning_rate": 9.951632050998806e-06, "loss": 16.661, "step": 17120 }, { "epoch": 0.3097362134160261, "grad_norm": 40.03125, "learning_rate": 9.951603798692145e-06, "loss": 17.2557, "step": 17130 }, { "epoch": 0.30991702848515396, "grad_norm": 38.5, "learning_rate": 9.951575546385485e-06, "loss": 17.03, "step": 17140 }, { "epoch": 0.3100978435542818, "grad_norm": 40.1875, "learning_rate": 9.951547294078826e-06, "loss": 16.9144, "step": 17150 }, { "epoch": 0.3102786586234097, "grad_norm": 41.03125, "learning_rate": 9.951519041772167e-06, "loss": 16.892, "step": 17160 }, { "epoch": 0.3104594736925375, "grad_norm": 43.9375, "learning_rate": 9.951490789465507e-06, "loss": 17.3718, "step": 17170 }, { "epoch": 0.3106402887616654, "grad_norm": 43.625, "learning_rate": 9.951462537158846e-06, "loss": 16.9441, "step": 17180 }, { "epoch": 0.31082110383079325, "grad_norm": 38.96875, "learning_rate": 9.951434284852187e-06, "loss": 16.8048, "step": 17190 }, { "epoch": 0.3110019188999211, "grad_norm": 40.5, "learning_rate": 9.951406032545527e-06, "loss": 17.1232, "step": 17200 }, { "epoch": 0.311182733969049, "grad_norm": 42.8125, "learning_rate": 9.95137778023887e-06, "loss": 16.9251, "step": 17210 }, { "epoch": 0.31136354903817687, "grad_norm": 39.5, "learning_rate": 9.951349527932209e-06, "loss": 17.2344, "step": 17220 }, { "epoch": 0.3115443641073047, "grad_norm": 42.46875, "learning_rate": 9.95132127562555e-06, "loss": 16.9758, "step": 17230 }, { "epoch": 0.31172517917643255, "grad_norm": 41.28125, "learning_rate": 9.95129302331889e-06, "loss": 17.0863, "step": 17240 }, { "epoch": 0.3119059942455604, "grad_norm": 36.375, "learning_rate": 9.95126477101223e-06, "loss": 17.3485, "step": 17250 }, { "epoch": 0.3120868093146883, "grad_norm": 39.875, "learning_rate": 9.951236518705571e-06, "loss": 17.0591, "step": 17260 }, { "epoch": 0.31226762438381617, "grad_norm": 44.5625, "learning_rate": 9.95120826639891e-06, "loss": 17.2459, "step": 17270 }, { "epoch": 0.312448439452944, "grad_norm": 41.5625, "learning_rate": 9.95118001409225e-06, "loss": 17.2446, "step": 17280 }, { "epoch": 0.31262925452207185, "grad_norm": 40.0, "learning_rate": 9.951151761785591e-06, "loss": 16.8575, "step": 17290 }, { "epoch": 0.3128100695911997, "grad_norm": 39.875, "learning_rate": 9.951123509478932e-06, "loss": 17.4136, "step": 17300 }, { "epoch": 0.3129908846603276, "grad_norm": 41.8125, "learning_rate": 9.951095257172272e-06, "loss": 17.2629, "step": 17310 }, { "epoch": 0.31317169972945547, "grad_norm": 37.375, "learning_rate": 9.951067004865613e-06, "loss": 17.0746, "step": 17320 }, { "epoch": 0.31335251479858334, "grad_norm": 40.84375, "learning_rate": 9.951038752558954e-06, "loss": 17.2394, "step": 17330 }, { "epoch": 0.31353332986771115, "grad_norm": 41.53125, "learning_rate": 9.951010500252294e-06, "loss": 17.4258, "step": 17340 }, { "epoch": 0.313714144936839, "grad_norm": 39.75, "learning_rate": 9.950982247945635e-06, "loss": 17.1237, "step": 17350 }, { "epoch": 0.3138949600059669, "grad_norm": 38.8125, "learning_rate": 9.950953995638974e-06, "loss": 16.8779, "step": 17360 }, { "epoch": 0.31407577507509477, "grad_norm": 38.59375, "learning_rate": 9.950925743332315e-06, "loss": 17.5397, "step": 17370 }, { "epoch": 0.31425659014422264, "grad_norm": 41.75, "learning_rate": 9.950897491025655e-06, "loss": 17.3225, "step": 17380 }, { "epoch": 0.3144374052133505, "grad_norm": 40.1875, "learning_rate": 9.950869238718996e-06, "loss": 17.1693, "step": 17390 }, { "epoch": 0.3146182202824783, "grad_norm": 38.5625, "learning_rate": 9.950840986412336e-06, "loss": 17.3698, "step": 17400 }, { "epoch": 0.3147990353516062, "grad_norm": 39.875, "learning_rate": 9.950812734105677e-06, "loss": 17.0733, "step": 17410 }, { "epoch": 0.31497985042073406, "grad_norm": 39.46875, "learning_rate": 9.950784481799018e-06, "loss": 16.9899, "step": 17420 }, { "epoch": 0.31516066548986194, "grad_norm": 40.0625, "learning_rate": 9.950756229492358e-06, "loss": 17.282, "step": 17430 }, { "epoch": 0.3153414805589898, "grad_norm": 39.96875, "learning_rate": 9.950727977185697e-06, "loss": 17.2829, "step": 17440 }, { "epoch": 0.3155222956281176, "grad_norm": 41.5625, "learning_rate": 9.950699724879038e-06, "loss": 17.432, "step": 17450 }, { "epoch": 0.3157031106972455, "grad_norm": 36.0, "learning_rate": 9.950671472572378e-06, "loss": 17.3085, "step": 17460 }, { "epoch": 0.31588392576637336, "grad_norm": 39.34375, "learning_rate": 9.95064322026572e-06, "loss": 17.0396, "step": 17470 }, { "epoch": 0.31606474083550123, "grad_norm": 41.15625, "learning_rate": 9.95061496795906e-06, "loss": 17.3051, "step": 17480 }, { "epoch": 0.3162455559046291, "grad_norm": 40.1875, "learning_rate": 9.9505867156524e-06, "loss": 17.087, "step": 17490 }, { "epoch": 0.316426370973757, "grad_norm": 38.1875, "learning_rate": 9.950558463345741e-06, "loss": 17.0963, "step": 17500 }, { "epoch": 0.3166071860428848, "grad_norm": 41.59375, "learning_rate": 9.950530211039082e-06, "loss": 17.3186, "step": 17510 }, { "epoch": 0.31678800111201266, "grad_norm": 40.71875, "learning_rate": 9.950501958732422e-06, "loss": 17.0498, "step": 17520 }, { "epoch": 0.31696881618114053, "grad_norm": 40.375, "learning_rate": 9.950473706425761e-06, "loss": 17.2796, "step": 17530 }, { "epoch": 0.3171496312502684, "grad_norm": 40.5625, "learning_rate": 9.950445454119102e-06, "loss": 16.9422, "step": 17540 }, { "epoch": 0.3173304463193963, "grad_norm": 40.4375, "learning_rate": 9.950417201812442e-06, "loss": 17.1075, "step": 17550 }, { "epoch": 0.31751126138852415, "grad_norm": 41.0, "learning_rate": 9.950388949505783e-06, "loss": 17.3609, "step": 17560 }, { "epoch": 0.31769207645765196, "grad_norm": 41.46875, "learning_rate": 9.950360697199124e-06, "loss": 17.1011, "step": 17570 }, { "epoch": 0.31787289152677983, "grad_norm": 41.34375, "learning_rate": 9.950332444892464e-06, "loss": 17.5317, "step": 17580 }, { "epoch": 0.3180537065959077, "grad_norm": 37.0, "learning_rate": 9.950304192585805e-06, "loss": 17.2281, "step": 17590 }, { "epoch": 0.3182345216650356, "grad_norm": 41.28125, "learning_rate": 9.950275940279145e-06, "loss": 16.8669, "step": 17600 }, { "epoch": 0.31841533673416345, "grad_norm": 42.1875, "learning_rate": 9.950247687972484e-06, "loss": 16.9876, "step": 17610 }, { "epoch": 0.31859615180329126, "grad_norm": 41.3125, "learning_rate": 9.950219435665825e-06, "loss": 17.7563, "step": 17620 }, { "epoch": 0.31877696687241913, "grad_norm": 39.59375, "learning_rate": 9.950191183359166e-06, "loss": 17.0608, "step": 17630 }, { "epoch": 0.318957781941547, "grad_norm": 40.09375, "learning_rate": 9.950162931052506e-06, "loss": 17.4674, "step": 17640 }, { "epoch": 0.3191385970106749, "grad_norm": 40.5, "learning_rate": 9.950134678745847e-06, "loss": 16.9903, "step": 17650 }, { "epoch": 0.31931941207980274, "grad_norm": 41.28125, "learning_rate": 9.950106426439187e-06, "loss": 17.3048, "step": 17660 }, { "epoch": 0.3195002271489306, "grad_norm": 39.25, "learning_rate": 9.950078174132528e-06, "loss": 17.2867, "step": 17670 }, { "epoch": 0.31968104221805843, "grad_norm": 42.09375, "learning_rate": 9.950049921825869e-06, "loss": 17.6517, "step": 17680 }, { "epoch": 0.3198618572871863, "grad_norm": 39.03125, "learning_rate": 9.95002166951921e-06, "loss": 17.5702, "step": 17690 }, { "epoch": 0.3200426723563142, "grad_norm": 41.4375, "learning_rate": 9.949993417212548e-06, "loss": 17.4709, "step": 17700 }, { "epoch": 0.32022348742544204, "grad_norm": 43.1875, "learning_rate": 9.949965164905889e-06, "loss": 17.372, "step": 17710 }, { "epoch": 0.3204043024945699, "grad_norm": 40.0625, "learning_rate": 9.94993691259923e-06, "loss": 17.1449, "step": 17720 }, { "epoch": 0.3205851175636978, "grad_norm": 39.875, "learning_rate": 9.94990866029257e-06, "loss": 16.9721, "step": 17730 }, { "epoch": 0.3207659326328256, "grad_norm": 40.34375, "learning_rate": 9.94988040798591e-06, "loss": 17.058, "step": 17740 }, { "epoch": 0.32094674770195347, "grad_norm": 40.59375, "learning_rate": 9.949852155679251e-06, "loss": 17.1496, "step": 17750 }, { "epoch": 0.32112756277108134, "grad_norm": 41.4375, "learning_rate": 9.949823903372592e-06, "loss": 17.3002, "step": 17760 }, { "epoch": 0.3213083778402092, "grad_norm": 41.03125, "learning_rate": 9.949795651065933e-06, "loss": 17.1647, "step": 17770 }, { "epoch": 0.3214891929093371, "grad_norm": 41.625, "learning_rate": 9.949767398759273e-06, "loss": 17.1988, "step": 17780 }, { "epoch": 0.3216700079784649, "grad_norm": 43.875, "learning_rate": 9.949739146452612e-06, "loss": 17.3825, "step": 17790 }, { "epoch": 0.32185082304759277, "grad_norm": 41.4375, "learning_rate": 9.949710894145953e-06, "loss": 17.05, "step": 17800 }, { "epoch": 0.32203163811672064, "grad_norm": 43.59375, "learning_rate": 9.949682641839293e-06, "loss": 17.0607, "step": 17810 }, { "epoch": 0.3222124531858485, "grad_norm": 40.6875, "learning_rate": 9.949654389532634e-06, "loss": 16.5678, "step": 17820 }, { "epoch": 0.3223932682549764, "grad_norm": 42.0, "learning_rate": 9.949626137225975e-06, "loss": 17.0074, "step": 17830 }, { "epoch": 0.32257408332410425, "grad_norm": 39.1875, "learning_rate": 9.949597884919315e-06, "loss": 17.5844, "step": 17840 }, { "epoch": 0.32275489839323207, "grad_norm": 39.3125, "learning_rate": 9.949569632612656e-06, "loss": 17.0044, "step": 17850 }, { "epoch": 0.32293571346235994, "grad_norm": 40.375, "learning_rate": 9.949541380305997e-06, "loss": 17.272, "step": 17860 }, { "epoch": 0.3231165285314878, "grad_norm": 41.75, "learning_rate": 9.949513127999335e-06, "loss": 17.2767, "step": 17870 }, { "epoch": 0.3232973436006157, "grad_norm": 40.125, "learning_rate": 9.949484875692676e-06, "loss": 16.9778, "step": 17880 }, { "epoch": 0.32347815866974355, "grad_norm": 40.84375, "learning_rate": 9.949456623386017e-06, "loss": 17.1859, "step": 17890 }, { "epoch": 0.3236589737388714, "grad_norm": 41.6875, "learning_rate": 9.949428371079357e-06, "loss": 16.9518, "step": 17900 }, { "epoch": 0.32383978880799924, "grad_norm": 43.09375, "learning_rate": 9.949400118772698e-06, "loss": 17.2181, "step": 17910 }, { "epoch": 0.3240206038771271, "grad_norm": 40.8125, "learning_rate": 9.949371866466039e-06, "loss": 17.0839, "step": 17920 }, { "epoch": 0.324201418946255, "grad_norm": 41.0, "learning_rate": 9.94934361415938e-06, "loss": 16.9002, "step": 17930 }, { "epoch": 0.32438223401538285, "grad_norm": 37.875, "learning_rate": 9.94931536185272e-06, "loss": 17.3268, "step": 17940 }, { "epoch": 0.3245630490845107, "grad_norm": 40.09375, "learning_rate": 9.94928710954606e-06, "loss": 17.4407, "step": 17950 }, { "epoch": 0.32474386415363854, "grad_norm": 41.40625, "learning_rate": 9.9492588572394e-06, "loss": 17.2294, "step": 17960 }, { "epoch": 0.3249246792227664, "grad_norm": 41.625, "learning_rate": 9.94923060493274e-06, "loss": 17.0065, "step": 17970 }, { "epoch": 0.3251054942918943, "grad_norm": 41.28125, "learning_rate": 9.94920235262608e-06, "loss": 17.3333, "step": 17980 }, { "epoch": 0.32528630936102215, "grad_norm": 41.09375, "learning_rate": 9.949174100319421e-06, "loss": 17.2219, "step": 17990 }, { "epoch": 0.32546712443015, "grad_norm": 41.09375, "learning_rate": 9.949145848012762e-06, "loss": 17.4031, "step": 18000 }, { "epoch": 0.3256479394992779, "grad_norm": 36.53125, "learning_rate": 9.949117595706102e-06, "loss": 17.2132, "step": 18010 }, { "epoch": 0.3258287545684057, "grad_norm": 37.28125, "learning_rate": 9.949089343399443e-06, "loss": 17.2204, "step": 18020 }, { "epoch": 0.3260095696375336, "grad_norm": 42.375, "learning_rate": 9.949061091092784e-06, "loss": 17.3199, "step": 18030 }, { "epoch": 0.32619038470666145, "grad_norm": 38.5, "learning_rate": 9.949032838786123e-06, "loss": 17.6373, "step": 18040 }, { "epoch": 0.3263711997757893, "grad_norm": 42.375, "learning_rate": 9.949004586479463e-06, "loss": 17.1415, "step": 18050 }, { "epoch": 0.3265520148449172, "grad_norm": 41.0, "learning_rate": 9.948976334172804e-06, "loss": 17.1106, "step": 18060 }, { "epoch": 0.32673282991404506, "grad_norm": 41.5625, "learning_rate": 9.948948081866145e-06, "loss": 17.5328, "step": 18070 }, { "epoch": 0.3269136449831729, "grad_norm": 40.53125, "learning_rate": 9.948919829559485e-06, "loss": 17.2637, "step": 18080 }, { "epoch": 0.32709446005230075, "grad_norm": 41.375, "learning_rate": 9.948891577252826e-06, "loss": 16.6566, "step": 18090 }, { "epoch": 0.3272752751214286, "grad_norm": 41.40625, "learning_rate": 9.948863324946166e-06, "loss": 17.204, "step": 18100 }, { "epoch": 0.3274560901905565, "grad_norm": 40.6875, "learning_rate": 9.948835072639507e-06, "loss": 17.4579, "step": 18110 }, { "epoch": 0.32763690525968436, "grad_norm": 42.15625, "learning_rate": 9.948806820332848e-06, "loss": 17.6162, "step": 18120 }, { "epoch": 0.3278177203288122, "grad_norm": 42.71875, "learning_rate": 9.948778568026187e-06, "loss": 17.0227, "step": 18130 }, { "epoch": 0.32799853539794005, "grad_norm": 39.09375, "learning_rate": 9.948750315719527e-06, "loss": 17.1382, "step": 18140 }, { "epoch": 0.3281793504670679, "grad_norm": 40.0625, "learning_rate": 9.948722063412868e-06, "loss": 17.3412, "step": 18150 }, { "epoch": 0.3283601655361958, "grad_norm": 39.21875, "learning_rate": 9.948693811106208e-06, "loss": 17.1184, "step": 18160 }, { "epoch": 0.32854098060532366, "grad_norm": 42.5, "learning_rate": 9.948665558799549e-06, "loss": 16.8094, "step": 18170 }, { "epoch": 0.32872179567445153, "grad_norm": 39.5, "learning_rate": 9.94863730649289e-06, "loss": 16.9699, "step": 18180 }, { "epoch": 0.32890261074357935, "grad_norm": 39.53125, "learning_rate": 9.94860905418623e-06, "loss": 17.0162, "step": 18190 }, { "epoch": 0.3290834258127072, "grad_norm": 39.875, "learning_rate": 9.948580801879571e-06, "loss": 17.3723, "step": 18200 }, { "epoch": 0.3292642408818351, "grad_norm": 39.78125, "learning_rate": 9.948552549572912e-06, "loss": 17.3077, "step": 18210 }, { "epoch": 0.32944505595096296, "grad_norm": 41.4375, "learning_rate": 9.94852429726625e-06, "loss": 16.9637, "step": 18220 }, { "epoch": 0.32962587102009083, "grad_norm": 40.96875, "learning_rate": 9.948496044959591e-06, "loss": 17.1387, "step": 18230 }, { "epoch": 0.32980668608921865, "grad_norm": 40.8125, "learning_rate": 9.948467792652932e-06, "loss": 17.3337, "step": 18240 }, { "epoch": 0.3299875011583465, "grad_norm": 44.0625, "learning_rate": 9.948439540346272e-06, "loss": 17.097, "step": 18250 }, { "epoch": 0.3301683162274744, "grad_norm": 43.46875, "learning_rate": 9.948411288039613e-06, "loss": 17.0158, "step": 18260 }, { "epoch": 0.33034913129660226, "grad_norm": 43.84375, "learning_rate": 9.948383035732954e-06, "loss": 17.2627, "step": 18270 }, { "epoch": 0.33052994636573013, "grad_norm": 41.65625, "learning_rate": 9.948354783426294e-06, "loss": 16.8991, "step": 18280 }, { "epoch": 0.330710761434858, "grad_norm": 40.9375, "learning_rate": 9.948326531119635e-06, "loss": 17.0619, "step": 18290 }, { "epoch": 0.3308915765039858, "grad_norm": 42.15625, "learning_rate": 9.948298278812974e-06, "loss": 17.3769, "step": 18300 }, { "epoch": 0.3310723915731137, "grad_norm": 43.15625, "learning_rate": 9.948270026506314e-06, "loss": 17.4126, "step": 18310 }, { "epoch": 0.33125320664224156, "grad_norm": 39.71875, "learning_rate": 9.948241774199655e-06, "loss": 17.0779, "step": 18320 }, { "epoch": 0.33143402171136943, "grad_norm": 39.1875, "learning_rate": 9.948213521892996e-06, "loss": 16.984, "step": 18330 }, { "epoch": 0.3316148367804973, "grad_norm": 38.96875, "learning_rate": 9.948185269586336e-06, "loss": 16.7094, "step": 18340 }, { "epoch": 0.3317956518496252, "grad_norm": 41.46875, "learning_rate": 9.948157017279677e-06, "loss": 17.0435, "step": 18350 }, { "epoch": 0.331976466918753, "grad_norm": 39.96875, "learning_rate": 9.948128764973017e-06, "loss": 17.2927, "step": 18360 }, { "epoch": 0.33215728198788086, "grad_norm": 43.46875, "learning_rate": 9.948100512666358e-06, "loss": 17.579, "step": 18370 }, { "epoch": 0.33233809705700873, "grad_norm": 42.9375, "learning_rate": 9.948072260359699e-06, "loss": 17.1167, "step": 18380 }, { "epoch": 0.3325189121261366, "grad_norm": 40.875, "learning_rate": 9.948044008053038e-06, "loss": 17.3039, "step": 18390 }, { "epoch": 0.33269972719526447, "grad_norm": 42.15625, "learning_rate": 9.948015755746378e-06, "loss": 17.0618, "step": 18400 }, { "epoch": 0.3328805422643923, "grad_norm": 38.0625, "learning_rate": 9.947987503439719e-06, "loss": 17.0005, "step": 18410 }, { "epoch": 0.33306135733352016, "grad_norm": 39.90625, "learning_rate": 9.94795925113306e-06, "loss": 17.2511, "step": 18420 }, { "epoch": 0.33324217240264803, "grad_norm": 37.9375, "learning_rate": 9.9479309988264e-06, "loss": 16.8679, "step": 18430 }, { "epoch": 0.3334229874717759, "grad_norm": 39.0625, "learning_rate": 9.94790274651974e-06, "loss": 17.434, "step": 18440 }, { "epoch": 0.33360380254090377, "grad_norm": 41.03125, "learning_rate": 9.947874494213081e-06, "loss": 17.287, "step": 18450 }, { "epoch": 0.33378461761003164, "grad_norm": 37.5625, "learning_rate": 9.947846241906422e-06, "loss": 17.7589, "step": 18460 }, { "epoch": 0.33396543267915946, "grad_norm": 41.09375, "learning_rate": 9.947817989599761e-06, "loss": 16.7847, "step": 18470 }, { "epoch": 0.33414624774828733, "grad_norm": 41.3125, "learning_rate": 9.947789737293102e-06, "loss": 16.9083, "step": 18480 }, { "epoch": 0.3343270628174152, "grad_norm": 39.8125, "learning_rate": 9.947761484986442e-06, "loss": 17.2558, "step": 18490 }, { "epoch": 0.33450787788654307, "grad_norm": 39.75, "learning_rate": 9.947733232679783e-06, "loss": 16.9924, "step": 18500 }, { "epoch": 0.33468869295567094, "grad_norm": 38.65625, "learning_rate": 9.947704980373123e-06, "loss": 17.0651, "step": 18510 }, { "epoch": 0.3348695080247988, "grad_norm": 42.875, "learning_rate": 9.947676728066464e-06, "loss": 16.8699, "step": 18520 }, { "epoch": 0.3350503230939266, "grad_norm": 37.375, "learning_rate": 9.947648475759805e-06, "loss": 17.208, "step": 18530 }, { "epoch": 0.3352311381630545, "grad_norm": 40.4375, "learning_rate": 9.947620223453145e-06, "loss": 17.3355, "step": 18540 }, { "epoch": 0.33541195323218237, "grad_norm": 40.6875, "learning_rate": 9.947591971146486e-06, "loss": 16.9956, "step": 18550 }, { "epoch": 0.33559276830131024, "grad_norm": 42.53125, "learning_rate": 9.947563718839825e-06, "loss": 16.8433, "step": 18560 }, { "epoch": 0.3357735833704381, "grad_norm": 38.96875, "learning_rate": 9.947535466533165e-06, "loss": 17.1173, "step": 18570 }, { "epoch": 0.3359543984395659, "grad_norm": 41.4375, "learning_rate": 9.947507214226506e-06, "loss": 17.3725, "step": 18580 }, { "epoch": 0.3361352135086938, "grad_norm": 38.9375, "learning_rate": 9.947478961919847e-06, "loss": 17.1542, "step": 18590 }, { "epoch": 0.33631602857782167, "grad_norm": 43.0625, "learning_rate": 9.947450709613187e-06, "loss": 17.6042, "step": 18600 }, { "epoch": 0.33649684364694954, "grad_norm": 43.5, "learning_rate": 9.947422457306528e-06, "loss": 17.1111, "step": 18610 }, { "epoch": 0.3366776587160774, "grad_norm": 39.6875, "learning_rate": 9.947394204999869e-06, "loss": 16.7568, "step": 18620 }, { "epoch": 0.3368584737852053, "grad_norm": 41.0625, "learning_rate": 9.94736595269321e-06, "loss": 16.7065, "step": 18630 }, { "epoch": 0.3370392888543331, "grad_norm": 39.90625, "learning_rate": 9.94733770038655e-06, "loss": 16.9225, "step": 18640 }, { "epoch": 0.33722010392346097, "grad_norm": 43.15625, "learning_rate": 9.947309448079889e-06, "loss": 17.0526, "step": 18650 }, { "epoch": 0.33740091899258884, "grad_norm": 38.625, "learning_rate": 9.94728119577323e-06, "loss": 17.6703, "step": 18660 }, { "epoch": 0.3375817340617167, "grad_norm": 40.3125, "learning_rate": 9.94725294346657e-06, "loss": 17.1176, "step": 18670 }, { "epoch": 0.3377625491308446, "grad_norm": 41.1875, "learning_rate": 9.94722469115991e-06, "loss": 17.3024, "step": 18680 }, { "epoch": 0.33794336419997245, "grad_norm": 40.25, "learning_rate": 9.947196438853251e-06, "loss": 17.4143, "step": 18690 }, { "epoch": 0.33812417926910027, "grad_norm": 38.5, "learning_rate": 9.947168186546592e-06, "loss": 17.0131, "step": 18700 }, { "epoch": 0.33830499433822814, "grad_norm": 40.5625, "learning_rate": 9.947139934239932e-06, "loss": 17.4532, "step": 18710 }, { "epoch": 0.338485809407356, "grad_norm": 42.84375, "learning_rate": 9.947111681933273e-06, "loss": 17.7063, "step": 18720 }, { "epoch": 0.3386666244764839, "grad_norm": 44.6875, "learning_rate": 9.947083429626612e-06, "loss": 17.6791, "step": 18730 }, { "epoch": 0.33884743954561175, "grad_norm": 37.96875, "learning_rate": 9.947055177319953e-06, "loss": 17.2725, "step": 18740 }, { "epoch": 0.33902825461473957, "grad_norm": 41.34375, "learning_rate": 9.947026925013293e-06, "loss": 17.5307, "step": 18750 }, { "epoch": 0.33920906968386744, "grad_norm": 38.25, "learning_rate": 9.946998672706634e-06, "loss": 17.2355, "step": 18760 }, { "epoch": 0.3393898847529953, "grad_norm": 40.34375, "learning_rate": 9.946970420399975e-06, "loss": 17.0688, "step": 18770 }, { "epoch": 0.3395706998221232, "grad_norm": 40.53125, "learning_rate": 9.946942168093313e-06, "loss": 17.016, "step": 18780 }, { "epoch": 0.33975151489125105, "grad_norm": 41.125, "learning_rate": 9.946913915786656e-06, "loss": 17.0653, "step": 18790 }, { "epoch": 0.3399323299603789, "grad_norm": 40.71875, "learning_rate": 9.946885663479996e-06, "loss": 17.0459, "step": 18800 }, { "epoch": 0.34011314502950674, "grad_norm": 41.03125, "learning_rate": 9.946857411173337e-06, "loss": 17.3077, "step": 18810 }, { "epoch": 0.3402939600986346, "grad_norm": 41.78125, "learning_rate": 9.946829158866676e-06, "loss": 17.0437, "step": 18820 }, { "epoch": 0.3404747751677625, "grad_norm": 40.25, "learning_rate": 9.946800906560017e-06, "loss": 17.9247, "step": 18830 }, { "epoch": 0.34065559023689035, "grad_norm": 41.625, "learning_rate": 9.946772654253357e-06, "loss": 17.3083, "step": 18840 }, { "epoch": 0.3408364053060182, "grad_norm": 42.3125, "learning_rate": 9.946744401946698e-06, "loss": 16.8552, "step": 18850 }, { "epoch": 0.3410172203751461, "grad_norm": 42.875, "learning_rate": 9.946716149640038e-06, "loss": 17.5379, "step": 18860 }, { "epoch": 0.3411980354442739, "grad_norm": 38.0625, "learning_rate": 9.946687897333379e-06, "loss": 17.1698, "step": 18870 }, { "epoch": 0.3413788505134018, "grad_norm": 41.53125, "learning_rate": 9.94665964502672e-06, "loss": 17.2349, "step": 18880 }, { "epoch": 0.34155966558252965, "grad_norm": 39.84375, "learning_rate": 9.94663139272006e-06, "loss": 17.123, "step": 18890 }, { "epoch": 0.3417404806516575, "grad_norm": 43.28125, "learning_rate": 9.9466031404134e-06, "loss": 17.0436, "step": 18900 }, { "epoch": 0.3419212957207854, "grad_norm": 41.40625, "learning_rate": 9.94657488810674e-06, "loss": 17.3219, "step": 18910 }, { "epoch": 0.3421021107899132, "grad_norm": 39.21875, "learning_rate": 9.94654663580008e-06, "loss": 17.252, "step": 18920 }, { "epoch": 0.3422829258590411, "grad_norm": 41.0625, "learning_rate": 9.946518383493421e-06, "loss": 17.3506, "step": 18930 }, { "epoch": 0.34246374092816895, "grad_norm": 39.78125, "learning_rate": 9.946490131186762e-06, "loss": 17.1539, "step": 18940 }, { "epoch": 0.3426445559972968, "grad_norm": 38.84375, "learning_rate": 9.946461878880102e-06, "loss": 17.2454, "step": 18950 }, { "epoch": 0.3428253710664247, "grad_norm": 43.6875, "learning_rate": 9.946433626573443e-06, "loss": 17.1604, "step": 18960 }, { "epoch": 0.34300618613555256, "grad_norm": 40.90625, "learning_rate": 9.946405374266784e-06, "loss": 17.216, "step": 18970 }, { "epoch": 0.3431870012046804, "grad_norm": 39.8125, "learning_rate": 9.946377121960124e-06, "loss": 17.0927, "step": 18980 }, { "epoch": 0.34336781627380825, "grad_norm": 39.65625, "learning_rate": 9.946348869653463e-06, "loss": 17.1234, "step": 18990 }, { "epoch": 0.3435486313429361, "grad_norm": 40.90625, "learning_rate": 9.946320617346804e-06, "loss": 17.0432, "step": 19000 }, { "epoch": 0.343729446412064, "grad_norm": 39.75, "learning_rate": 9.946292365040144e-06, "loss": 17.5622, "step": 19010 }, { "epoch": 0.34391026148119186, "grad_norm": 41.21875, "learning_rate": 9.946264112733485e-06, "loss": 17.1439, "step": 19020 }, { "epoch": 0.34409107655031973, "grad_norm": 42.125, "learning_rate": 9.946235860426826e-06, "loss": 16.7243, "step": 19030 }, { "epoch": 0.34427189161944755, "grad_norm": 36.53125, "learning_rate": 9.946207608120165e-06, "loss": 17.1458, "step": 19040 }, { "epoch": 0.3444527066885754, "grad_norm": 41.25, "learning_rate": 9.946179355813507e-06, "loss": 17.2407, "step": 19050 }, { "epoch": 0.3446335217577033, "grad_norm": 39.125, "learning_rate": 9.946151103506848e-06, "loss": 17.1062, "step": 19060 }, { "epoch": 0.34481433682683116, "grad_norm": 38.71875, "learning_rate": 9.946122851200188e-06, "loss": 17.1634, "step": 19070 }, { "epoch": 0.34499515189595903, "grad_norm": 41.125, "learning_rate": 9.946094598893527e-06, "loss": 17.13, "step": 19080 }, { "epoch": 0.34517596696508684, "grad_norm": 38.65625, "learning_rate": 9.946066346586868e-06, "loss": 17.0695, "step": 19090 }, { "epoch": 0.3453567820342147, "grad_norm": 39.625, "learning_rate": 9.946038094280208e-06, "loss": 16.5874, "step": 19100 }, { "epoch": 0.3455375971033426, "grad_norm": 42.8125, "learning_rate": 9.946009841973549e-06, "loss": 17.1617, "step": 19110 }, { "epoch": 0.34571841217247046, "grad_norm": 41.1875, "learning_rate": 9.94598158966689e-06, "loss": 17.3538, "step": 19120 }, { "epoch": 0.34589922724159833, "grad_norm": 40.6875, "learning_rate": 9.945953337360228e-06, "loss": 17.2054, "step": 19130 }, { "epoch": 0.3460800423107262, "grad_norm": 42.25, "learning_rate": 9.94592508505357e-06, "loss": 17.5536, "step": 19140 }, { "epoch": 0.346260857379854, "grad_norm": 43.9375, "learning_rate": 9.945896832746911e-06, "loss": 17.1485, "step": 19150 }, { "epoch": 0.3464416724489819, "grad_norm": 43.6875, "learning_rate": 9.94586858044025e-06, "loss": 17.3381, "step": 19160 }, { "epoch": 0.34662248751810976, "grad_norm": 39.53125, "learning_rate": 9.945840328133591e-06, "loss": 16.9011, "step": 19170 }, { "epoch": 0.3468033025872376, "grad_norm": 40.90625, "learning_rate": 9.945812075826932e-06, "loss": 16.9682, "step": 19180 }, { "epoch": 0.3469841176563655, "grad_norm": 39.65625, "learning_rate": 9.945783823520272e-06, "loss": 16.9983, "step": 19190 }, { "epoch": 0.34716493272549337, "grad_norm": 43.4375, "learning_rate": 9.945755571213613e-06, "loss": 17.5587, "step": 19200 }, { "epoch": 0.3473457477946212, "grad_norm": 41.8125, "learning_rate": 9.945727318906952e-06, "loss": 16.8602, "step": 19210 }, { "epoch": 0.34752656286374906, "grad_norm": 41.96875, "learning_rate": 9.945699066600294e-06, "loss": 17.4146, "step": 19220 }, { "epoch": 0.3477073779328769, "grad_norm": 42.375, "learning_rate": 9.945670814293635e-06, "loss": 16.7323, "step": 19230 }, { "epoch": 0.3478881930020048, "grad_norm": 38.6875, "learning_rate": 9.945642561986975e-06, "loss": 17.0783, "step": 19240 }, { "epoch": 0.34806900807113267, "grad_norm": 39.40625, "learning_rate": 9.945614309680314e-06, "loss": 16.9396, "step": 19250 }, { "epoch": 0.3482498231402605, "grad_norm": 40.78125, "learning_rate": 9.945586057373655e-06, "loss": 17.1608, "step": 19260 }, { "epoch": 0.34843063820938835, "grad_norm": 39.21875, "learning_rate": 9.945557805066995e-06, "loss": 17.3588, "step": 19270 }, { "epoch": 0.3486114532785162, "grad_norm": 39.15625, "learning_rate": 9.945529552760336e-06, "loss": 17.2244, "step": 19280 }, { "epoch": 0.3487922683476441, "grad_norm": 43.6875, "learning_rate": 9.945501300453677e-06, "loss": 17.6382, "step": 19290 }, { "epoch": 0.34897308341677197, "grad_norm": 41.21875, "learning_rate": 9.945473048147016e-06, "loss": 17.0822, "step": 19300 }, { "epoch": 0.34915389848589984, "grad_norm": 40.03125, "learning_rate": 9.945444795840358e-06, "loss": 17.418, "step": 19310 }, { "epoch": 0.34933471355502765, "grad_norm": 44.03125, "learning_rate": 9.945416543533699e-06, "loss": 17.2986, "step": 19320 }, { "epoch": 0.3495155286241555, "grad_norm": 41.28125, "learning_rate": 9.945388291227038e-06, "loss": 16.9621, "step": 19330 }, { "epoch": 0.3496963436932834, "grad_norm": 41.5, "learning_rate": 9.945360038920378e-06, "loss": 17.1927, "step": 19340 }, { "epoch": 0.34987715876241127, "grad_norm": 40.75, "learning_rate": 9.945331786613719e-06, "loss": 17.5507, "step": 19350 }, { "epoch": 0.35005797383153914, "grad_norm": 41.375, "learning_rate": 9.94530353430706e-06, "loss": 17.3923, "step": 19360 }, { "epoch": 0.350238788900667, "grad_norm": 40.6875, "learning_rate": 9.9452752820004e-06, "loss": 17.129, "step": 19370 }, { "epoch": 0.3504196039697948, "grad_norm": 39.375, "learning_rate": 9.945247029693739e-06, "loss": 17.591, "step": 19380 }, { "epoch": 0.3506004190389227, "grad_norm": 40.03125, "learning_rate": 9.94521877738708e-06, "loss": 17.3433, "step": 19390 }, { "epoch": 0.35078123410805057, "grad_norm": 38.09375, "learning_rate": 9.945190525080422e-06, "loss": 17.116, "step": 19400 }, { "epoch": 0.35096204917717844, "grad_norm": 41.65625, "learning_rate": 9.945162272773763e-06, "loss": 17.407, "step": 19410 }, { "epoch": 0.3511428642463063, "grad_norm": 38.21875, "learning_rate": 9.945134020467101e-06, "loss": 16.9265, "step": 19420 }, { "epoch": 0.3513236793154341, "grad_norm": 41.8125, "learning_rate": 9.945105768160442e-06, "loss": 16.8851, "step": 19430 }, { "epoch": 0.351504494384562, "grad_norm": 39.5, "learning_rate": 9.945077515853783e-06, "loss": 17.0815, "step": 19440 }, { "epoch": 0.35168530945368986, "grad_norm": 39.78125, "learning_rate": 9.945049263547123e-06, "loss": 17.43, "step": 19450 }, { "epoch": 0.35186612452281774, "grad_norm": 40.84375, "learning_rate": 9.945021011240464e-06, "loss": 16.987, "step": 19460 }, { "epoch": 0.3520469395919456, "grad_norm": 40.53125, "learning_rate": 9.944992758933803e-06, "loss": 16.6883, "step": 19470 }, { "epoch": 0.3522277546610735, "grad_norm": 41.8125, "learning_rate": 9.944964506627143e-06, "loss": 17.3894, "step": 19480 }, { "epoch": 0.3524085697302013, "grad_norm": 42.0625, "learning_rate": 9.944936254320486e-06, "loss": 17.1256, "step": 19490 }, { "epoch": 0.35258938479932916, "grad_norm": 41.34375, "learning_rate": 9.944908002013825e-06, "loss": 17.2746, "step": 19500 }, { "epoch": 0.35277019986845704, "grad_norm": 39.40625, "learning_rate": 9.944879749707165e-06, "loss": 17.3932, "step": 19510 }, { "epoch": 0.3529510149375849, "grad_norm": 38.0625, "learning_rate": 9.944851497400506e-06, "loss": 16.6536, "step": 19520 }, { "epoch": 0.3531318300067128, "grad_norm": 41.6875, "learning_rate": 9.944823245093847e-06, "loss": 17.1047, "step": 19530 }, { "epoch": 0.35331264507584065, "grad_norm": 40.21875, "learning_rate": 9.944794992787187e-06, "loss": 17.5294, "step": 19540 }, { "epoch": 0.35349346014496846, "grad_norm": 43.1875, "learning_rate": 9.944766740480528e-06, "loss": 16.927, "step": 19550 }, { "epoch": 0.35367427521409633, "grad_norm": 40.84375, "learning_rate": 9.944738488173867e-06, "loss": 17.1587, "step": 19560 }, { "epoch": 0.3538550902832242, "grad_norm": 41.8125, "learning_rate": 9.944710235867209e-06, "loss": 17.4372, "step": 19570 }, { "epoch": 0.3540359053523521, "grad_norm": 41.0, "learning_rate": 9.94468198356055e-06, "loss": 17.637, "step": 19580 }, { "epoch": 0.35421672042147995, "grad_norm": 42.125, "learning_rate": 9.944653731253889e-06, "loss": 17.1499, "step": 19590 }, { "epoch": 0.35439753549060776, "grad_norm": 37.25, "learning_rate": 9.94462547894723e-06, "loss": 17.0127, "step": 19600 }, { "epoch": 0.35457835055973563, "grad_norm": 39.53125, "learning_rate": 9.94459722664057e-06, "loss": 17.0926, "step": 19610 }, { "epoch": 0.3547591656288635, "grad_norm": 40.1875, "learning_rate": 9.94456897433391e-06, "loss": 17.227, "step": 19620 }, { "epoch": 0.3549399806979914, "grad_norm": 44.28125, "learning_rate": 9.944540722027251e-06, "loss": 16.7545, "step": 19630 }, { "epoch": 0.35512079576711925, "grad_norm": 40.625, "learning_rate": 9.94451246972059e-06, "loss": 17.1527, "step": 19640 }, { "epoch": 0.3553016108362471, "grad_norm": 39.5, "learning_rate": 9.94448421741393e-06, "loss": 17.1649, "step": 19650 }, { "epoch": 0.35548242590537493, "grad_norm": 39.9375, "learning_rate": 9.944455965107273e-06, "loss": 17.5156, "step": 19660 }, { "epoch": 0.3556632409745028, "grad_norm": 40.84375, "learning_rate": 9.944427712800614e-06, "loss": 16.9445, "step": 19670 }, { "epoch": 0.3558440560436307, "grad_norm": 42.1875, "learning_rate": 9.944399460493953e-06, "loss": 16.997, "step": 19680 }, { "epoch": 0.35602487111275855, "grad_norm": 40.15625, "learning_rate": 9.944371208187293e-06, "loss": 17.3185, "step": 19690 }, { "epoch": 0.3562056861818864, "grad_norm": 39.0625, "learning_rate": 9.944342955880634e-06, "loss": 17.1195, "step": 19700 }, { "epoch": 0.3563865012510143, "grad_norm": 40.53125, "learning_rate": 9.944314703573974e-06, "loss": 17.2379, "step": 19710 }, { "epoch": 0.3565673163201421, "grad_norm": 39.78125, "learning_rate": 9.944286451267315e-06, "loss": 17.1858, "step": 19720 }, { "epoch": 0.35674813138927, "grad_norm": 39.84375, "learning_rate": 9.944258198960654e-06, "loss": 17.3016, "step": 19730 }, { "epoch": 0.35692894645839784, "grad_norm": 43.34375, "learning_rate": 9.944229946653995e-06, "loss": 17.0505, "step": 19740 }, { "epoch": 0.3571097615275257, "grad_norm": 41.46875, "learning_rate": 9.944201694347337e-06, "loss": 16.6689, "step": 19750 }, { "epoch": 0.3572905765966536, "grad_norm": 43.46875, "learning_rate": 9.944173442040676e-06, "loss": 16.7929, "step": 19760 }, { "epoch": 0.3574713916657814, "grad_norm": 41.0625, "learning_rate": 9.944145189734016e-06, "loss": 17.0547, "step": 19770 }, { "epoch": 0.3576522067349093, "grad_norm": 39.34375, "learning_rate": 9.944116937427357e-06, "loss": 17.0405, "step": 19780 }, { "epoch": 0.35783302180403714, "grad_norm": 41.96875, "learning_rate": 9.944088685120698e-06, "loss": 17.3822, "step": 19790 }, { "epoch": 0.358013836873165, "grad_norm": 43.125, "learning_rate": 9.944060432814038e-06, "loss": 17.0451, "step": 19800 }, { "epoch": 0.3581946519422929, "grad_norm": 39.9375, "learning_rate": 9.944032180507377e-06, "loss": 17.159, "step": 19810 }, { "epoch": 0.35837546701142076, "grad_norm": 42.09375, "learning_rate": 9.944003928200718e-06, "loss": 16.8105, "step": 19820 }, { "epoch": 0.35855628208054857, "grad_norm": 40.46875, "learning_rate": 9.943975675894058e-06, "loss": 17.3335, "step": 19830 }, { "epoch": 0.35873709714967644, "grad_norm": 39.25, "learning_rate": 9.9439474235874e-06, "loss": 17.1033, "step": 19840 }, { "epoch": 0.3589179122188043, "grad_norm": 38.1875, "learning_rate": 9.94391917128074e-06, "loss": 17.2213, "step": 19850 }, { "epoch": 0.3590987272879322, "grad_norm": 42.09375, "learning_rate": 9.94389091897408e-06, "loss": 16.6604, "step": 19860 }, { "epoch": 0.35927954235706006, "grad_norm": 44.65625, "learning_rate": 9.943862666667421e-06, "loss": 17.556, "step": 19870 }, { "epoch": 0.35946035742618787, "grad_norm": 40.1875, "learning_rate": 9.943834414360762e-06, "loss": 16.9048, "step": 19880 }, { "epoch": 0.35964117249531574, "grad_norm": 41.625, "learning_rate": 9.943806162054102e-06, "loss": 17.0686, "step": 19890 }, { "epoch": 0.3598219875644436, "grad_norm": 42.21875, "learning_rate": 9.943777909747441e-06, "loss": 17.6033, "step": 19900 }, { "epoch": 0.3600028026335715, "grad_norm": 40.90625, "learning_rate": 9.943749657440782e-06, "loss": 16.8213, "step": 19910 }, { "epoch": 0.36018361770269935, "grad_norm": 43.375, "learning_rate": 9.943721405134124e-06, "loss": 17.6674, "step": 19920 }, { "epoch": 0.3603644327718272, "grad_norm": 39.0, "learning_rate": 9.943693152827463e-06, "loss": 17.1387, "step": 19930 }, { "epoch": 0.36054524784095504, "grad_norm": 40.21875, "learning_rate": 9.943664900520804e-06, "loss": 17.6029, "step": 19940 }, { "epoch": 0.3607260629100829, "grad_norm": 40.5, "learning_rate": 9.943636648214144e-06, "loss": 17.2961, "step": 19950 }, { "epoch": 0.3609068779792108, "grad_norm": 42.78125, "learning_rate": 9.943608395907485e-06, "loss": 17.1464, "step": 19960 }, { "epoch": 0.36108769304833865, "grad_norm": 41.625, "learning_rate": 9.943580143600825e-06, "loss": 17.2045, "step": 19970 }, { "epoch": 0.3612685081174665, "grad_norm": 39.4375, "learning_rate": 9.943551891294166e-06, "loss": 17.0232, "step": 19980 }, { "epoch": 0.3614493231865944, "grad_norm": 42.5625, "learning_rate": 9.943523638987505e-06, "loss": 16.7419, "step": 19990 }, { "epoch": 0.3616301382557222, "grad_norm": 39.1875, "learning_rate": 9.943495386680846e-06, "loss": 17.3945, "step": 20000 }, { "epoch": 0.3616301382557222, "eval_loss": 2.1501667499542236, "eval_runtime": 228.7367, "eval_samples_per_second": 3174.213, "eval_steps_per_second": 49.599, "step": 20000 }, { "epoch": 0.3618109533248501, "grad_norm": 40.1875, "learning_rate": 9.943467134374188e-06, "loss": 17.2655, "step": 20010 }, { "epoch": 0.36199176839397795, "grad_norm": 40.21875, "learning_rate": 9.943438882067527e-06, "loss": 17.4808, "step": 20020 }, { "epoch": 0.3621725834631058, "grad_norm": 42.1875, "learning_rate": 9.943410629760868e-06, "loss": 17.0917, "step": 20030 }, { "epoch": 0.3623533985322337, "grad_norm": 41.71875, "learning_rate": 9.943382377454208e-06, "loss": 16.8377, "step": 20040 }, { "epoch": 0.3625342136013615, "grad_norm": 44.3125, "learning_rate": 9.943354125147549e-06, "loss": 17.6013, "step": 20050 }, { "epoch": 0.3627150286704894, "grad_norm": 41.59375, "learning_rate": 9.94332587284089e-06, "loss": 17.57, "step": 20060 }, { "epoch": 0.36289584373961725, "grad_norm": 41.09375, "learning_rate": 9.943297620534228e-06, "loss": 17.0332, "step": 20070 }, { "epoch": 0.3630766588087451, "grad_norm": 40.9375, "learning_rate": 9.943269368227569e-06, "loss": 17.2701, "step": 20080 }, { "epoch": 0.363257473877873, "grad_norm": 40.21875, "learning_rate": 9.94324111592091e-06, "loss": 17.0229, "step": 20090 }, { "epoch": 0.36343828894700086, "grad_norm": 42.28125, "learning_rate": 9.943212863614252e-06, "loss": 17.3704, "step": 20100 }, { "epoch": 0.3636191040161287, "grad_norm": 42.84375, "learning_rate": 9.94318461130759e-06, "loss": 16.8609, "step": 20110 }, { "epoch": 0.36379991908525655, "grad_norm": 43.625, "learning_rate": 9.943156359000931e-06, "loss": 17.259, "step": 20120 }, { "epoch": 0.3639807341543844, "grad_norm": 41.4375, "learning_rate": 9.943128106694272e-06, "loss": 17.0671, "step": 20130 }, { "epoch": 0.3641615492235123, "grad_norm": 40.78125, "learning_rate": 9.943099854387613e-06, "loss": 17.6245, "step": 20140 }, { "epoch": 0.36434236429264016, "grad_norm": 43.84375, "learning_rate": 9.943071602080953e-06, "loss": 17.1875, "step": 20150 }, { "epoch": 0.36452317936176803, "grad_norm": 44.40625, "learning_rate": 9.943043349774292e-06, "loss": 17.0023, "step": 20160 }, { "epoch": 0.36470399443089585, "grad_norm": 40.0, "learning_rate": 9.943015097467633e-06, "loss": 16.8232, "step": 20170 }, { "epoch": 0.3648848095000237, "grad_norm": 38.3125, "learning_rate": 9.942986845160973e-06, "loss": 17.224, "step": 20180 }, { "epoch": 0.3650656245691516, "grad_norm": 41.9375, "learning_rate": 9.942958592854314e-06, "loss": 17.2585, "step": 20190 }, { "epoch": 0.36524643963827946, "grad_norm": 41.1875, "learning_rate": 9.942930340547655e-06, "loss": 17.0163, "step": 20200 }, { "epoch": 0.36542725470740733, "grad_norm": 44.5, "learning_rate": 9.942902088240995e-06, "loss": 16.949, "step": 20210 }, { "epoch": 0.36560806977653515, "grad_norm": 37.96875, "learning_rate": 9.942873835934336e-06, "loss": 17.4766, "step": 20220 }, { "epoch": 0.365788884845663, "grad_norm": 40.4375, "learning_rate": 9.942845583627677e-06, "loss": 17.3368, "step": 20230 }, { "epoch": 0.3659696999147909, "grad_norm": 43.8125, "learning_rate": 9.942817331321016e-06, "loss": 16.8237, "step": 20240 }, { "epoch": 0.36615051498391876, "grad_norm": 40.125, "learning_rate": 9.942789079014356e-06, "loss": 17.1638, "step": 20250 }, { "epoch": 0.36633133005304663, "grad_norm": 41.03125, "learning_rate": 9.942760826707697e-06, "loss": 17.4119, "step": 20260 }, { "epoch": 0.3665121451221745, "grad_norm": 39.5, "learning_rate": 9.942732574401039e-06, "loss": 16.9666, "step": 20270 }, { "epoch": 0.3666929601913023, "grad_norm": 38.9375, "learning_rate": 9.942704322094378e-06, "loss": 17.1098, "step": 20280 }, { "epoch": 0.3668737752604302, "grad_norm": 39.71875, "learning_rate": 9.942676069787719e-06, "loss": 17.1769, "step": 20290 }, { "epoch": 0.36705459032955806, "grad_norm": 39.3125, "learning_rate": 9.94264781748106e-06, "loss": 16.8182, "step": 20300 }, { "epoch": 0.36723540539868593, "grad_norm": 40.84375, "learning_rate": 9.9426195651744e-06, "loss": 17.4049, "step": 20310 }, { "epoch": 0.3674162204678138, "grad_norm": 42.21875, "learning_rate": 9.94259131286774e-06, "loss": 17.2833, "step": 20320 }, { "epoch": 0.3675970355369417, "grad_norm": 38.71875, "learning_rate": 9.94256306056108e-06, "loss": 17.001, "step": 20330 }, { "epoch": 0.3677778506060695, "grad_norm": 40.03125, "learning_rate": 9.94253480825442e-06, "loss": 17.0131, "step": 20340 }, { "epoch": 0.36795866567519736, "grad_norm": 40.53125, "learning_rate": 9.94250655594776e-06, "loss": 16.411, "step": 20350 }, { "epoch": 0.36813948074432523, "grad_norm": 41.875, "learning_rate": 9.942478303641101e-06, "loss": 17.2263, "step": 20360 }, { "epoch": 0.3683202958134531, "grad_norm": 42.0625, "learning_rate": 9.942450051334442e-06, "loss": 16.7313, "step": 20370 }, { "epoch": 0.368501110882581, "grad_norm": 40.9375, "learning_rate": 9.942421799027783e-06, "loss": 16.9421, "step": 20380 }, { "epoch": 0.3686819259517088, "grad_norm": 40.4375, "learning_rate": 9.942393546721123e-06, "loss": 17.3382, "step": 20390 }, { "epoch": 0.36886274102083666, "grad_norm": 42.75, "learning_rate": 9.942365294414464e-06, "loss": 17.1704, "step": 20400 }, { "epoch": 0.36904355608996453, "grad_norm": 39.84375, "learning_rate": 9.942337042107804e-06, "loss": 17.117, "step": 20410 }, { "epoch": 0.3692243711590924, "grad_norm": 40.03125, "learning_rate": 9.942308789801143e-06, "loss": 16.4973, "step": 20420 }, { "epoch": 0.3694051862282203, "grad_norm": 41.8125, "learning_rate": 9.942280537494484e-06, "loss": 17.1972, "step": 20430 }, { "epoch": 0.36958600129734814, "grad_norm": 43.1875, "learning_rate": 9.942252285187825e-06, "loss": 17.1071, "step": 20440 }, { "epoch": 0.36976681636647596, "grad_norm": 43.84375, "learning_rate": 9.942224032881165e-06, "loss": 16.8985, "step": 20450 }, { "epoch": 0.36994763143560383, "grad_norm": 41.15625, "learning_rate": 9.942195780574506e-06, "loss": 17.5709, "step": 20460 }, { "epoch": 0.3701284465047317, "grad_norm": 39.8125, "learning_rate": 9.942167528267846e-06, "loss": 17.0988, "step": 20470 }, { "epoch": 0.37030926157385957, "grad_norm": 44.625, "learning_rate": 9.942139275961187e-06, "loss": 17.2837, "step": 20480 }, { "epoch": 0.37049007664298744, "grad_norm": 41.03125, "learning_rate": 9.942111023654528e-06, "loss": 17.2057, "step": 20490 }, { "epoch": 0.3706708917121153, "grad_norm": 41.6875, "learning_rate": 9.942082771347867e-06, "loss": 16.946, "step": 20500 }, { "epoch": 0.37085170678124313, "grad_norm": 41.15625, "learning_rate": 9.942054519041207e-06, "loss": 17.1524, "step": 20510 }, { "epoch": 0.371032521850371, "grad_norm": 42.21875, "learning_rate": 9.942026266734548e-06, "loss": 16.8513, "step": 20520 }, { "epoch": 0.37121333691949887, "grad_norm": 42.65625, "learning_rate": 9.941998014427888e-06, "loss": 16.7765, "step": 20530 }, { "epoch": 0.37139415198862674, "grad_norm": 36.78125, "learning_rate": 9.941969762121229e-06, "loss": 17.0085, "step": 20540 }, { "epoch": 0.3715749670577546, "grad_norm": 37.75, "learning_rate": 9.94194150981457e-06, "loss": 16.9942, "step": 20550 }, { "epoch": 0.37175578212688243, "grad_norm": 42.90625, "learning_rate": 9.94191325750791e-06, "loss": 17.1815, "step": 20560 }, { "epoch": 0.3719365971960103, "grad_norm": 41.28125, "learning_rate": 9.941885005201251e-06, "loss": 16.9027, "step": 20570 }, { "epoch": 0.37211741226513817, "grad_norm": 40.96875, "learning_rate": 9.941856752894592e-06, "loss": 17.4756, "step": 20580 }, { "epoch": 0.37229822733426604, "grad_norm": 43.1875, "learning_rate": 9.94182850058793e-06, "loss": 17.3078, "step": 20590 }, { "epoch": 0.3724790424033939, "grad_norm": 44.0625, "learning_rate": 9.941800248281271e-06, "loss": 16.9457, "step": 20600 }, { "epoch": 0.3726598574725218, "grad_norm": 40.34375, "learning_rate": 9.941771995974612e-06, "loss": 16.969, "step": 20610 }, { "epoch": 0.3728406725416496, "grad_norm": 41.40625, "learning_rate": 9.941743743667952e-06, "loss": 16.8207, "step": 20620 }, { "epoch": 0.37302148761077747, "grad_norm": 42.53125, "learning_rate": 9.941715491361293e-06, "loss": 16.9767, "step": 20630 }, { "epoch": 0.37320230267990534, "grad_norm": 40.59375, "learning_rate": 9.941687239054634e-06, "loss": 16.9558, "step": 20640 }, { "epoch": 0.3733831177490332, "grad_norm": 38.375, "learning_rate": 9.941658986747974e-06, "loss": 16.9039, "step": 20650 }, { "epoch": 0.3735639328181611, "grad_norm": 44.9375, "learning_rate": 9.941630734441315e-06, "loss": 17.105, "step": 20660 }, { "epoch": 0.37374474788728895, "grad_norm": 41.0, "learning_rate": 9.941602482134654e-06, "loss": 16.8125, "step": 20670 }, { "epoch": 0.37392556295641677, "grad_norm": 42.84375, "learning_rate": 9.941574229827994e-06, "loss": 17.5724, "step": 20680 }, { "epoch": 0.37410637802554464, "grad_norm": 43.3125, "learning_rate": 9.941545977521335e-06, "loss": 17.3323, "step": 20690 }, { "epoch": 0.3742871930946725, "grad_norm": 39.96875, "learning_rate": 9.941517725214676e-06, "loss": 17.2595, "step": 20700 }, { "epoch": 0.3744680081638004, "grad_norm": 39.6875, "learning_rate": 9.941489472908016e-06, "loss": 16.9378, "step": 20710 }, { "epoch": 0.37464882323292825, "grad_norm": 40.71875, "learning_rate": 9.941461220601357e-06, "loss": 17.1729, "step": 20720 }, { "epoch": 0.37482963830205607, "grad_norm": 38.5625, "learning_rate": 9.941432968294698e-06, "loss": 17.1281, "step": 20730 }, { "epoch": 0.37501045337118394, "grad_norm": 42.4375, "learning_rate": 9.941404715988038e-06, "loss": 17.3943, "step": 20740 }, { "epoch": 0.3751912684403118, "grad_norm": 41.9375, "learning_rate": 9.941376463681379e-06, "loss": 17.0422, "step": 20750 }, { "epoch": 0.3753720835094397, "grad_norm": 42.90625, "learning_rate": 9.941348211374718e-06, "loss": 17.6543, "step": 20760 }, { "epoch": 0.37555289857856755, "grad_norm": 39.46875, "learning_rate": 9.941319959068058e-06, "loss": 17.1982, "step": 20770 }, { "epoch": 0.3757337136476954, "grad_norm": 43.0625, "learning_rate": 9.941291706761399e-06, "loss": 17.2206, "step": 20780 }, { "epoch": 0.37591452871682324, "grad_norm": 41.4375, "learning_rate": 9.94126345445474e-06, "loss": 16.8636, "step": 20790 }, { "epoch": 0.3760953437859511, "grad_norm": 41.1875, "learning_rate": 9.94123520214808e-06, "loss": 16.6194, "step": 20800 }, { "epoch": 0.376276158855079, "grad_norm": 42.25, "learning_rate": 9.94120694984142e-06, "loss": 17.0831, "step": 20810 }, { "epoch": 0.37645697392420685, "grad_norm": 39.96875, "learning_rate": 9.941178697534761e-06, "loss": 17.2419, "step": 20820 }, { "epoch": 0.3766377889933347, "grad_norm": 41.1875, "learning_rate": 9.941150445228102e-06, "loss": 17.0365, "step": 20830 }, { "epoch": 0.3768186040624626, "grad_norm": 39.90625, "learning_rate": 9.941122192921443e-06, "loss": 17.113, "step": 20840 }, { "epoch": 0.3769994191315904, "grad_norm": 41.03125, "learning_rate": 9.941093940614782e-06, "loss": 17.5729, "step": 20850 }, { "epoch": 0.3771802342007183, "grad_norm": 44.09375, "learning_rate": 9.941065688308122e-06, "loss": 17.2333, "step": 20860 }, { "epoch": 0.37736104926984615, "grad_norm": 40.96875, "learning_rate": 9.941037436001463e-06, "loss": 17.4125, "step": 20870 }, { "epoch": 0.377541864338974, "grad_norm": 40.53125, "learning_rate": 9.941009183694803e-06, "loss": 17.2689, "step": 20880 }, { "epoch": 0.3777226794081019, "grad_norm": 41.375, "learning_rate": 9.940980931388144e-06, "loss": 17.0753, "step": 20890 }, { "epoch": 0.3779034944772297, "grad_norm": 41.96875, "learning_rate": 9.940952679081485e-06, "loss": 17.0969, "step": 20900 }, { "epoch": 0.3780843095463576, "grad_norm": 41.5625, "learning_rate": 9.940924426774825e-06, "loss": 16.9405, "step": 20910 }, { "epoch": 0.37826512461548545, "grad_norm": 41.8125, "learning_rate": 9.940896174468166e-06, "loss": 16.7523, "step": 20920 }, { "epoch": 0.3784459396846133, "grad_norm": 38.96875, "learning_rate": 9.940867922161505e-06, "loss": 16.8263, "step": 20930 }, { "epoch": 0.3786267547537412, "grad_norm": 45.3125, "learning_rate": 9.940839669854846e-06, "loss": 17.2799, "step": 20940 }, { "epoch": 0.37880756982286906, "grad_norm": 40.25, "learning_rate": 9.940811417548186e-06, "loss": 16.8069, "step": 20950 }, { "epoch": 0.3789883848919969, "grad_norm": 40.875, "learning_rate": 9.940783165241527e-06, "loss": 17.2283, "step": 20960 }, { "epoch": 0.37916919996112475, "grad_norm": 39.28125, "learning_rate": 9.940754912934867e-06, "loss": 16.9905, "step": 20970 }, { "epoch": 0.3793500150302526, "grad_norm": 40.65625, "learning_rate": 9.940726660628208e-06, "loss": 17.0025, "step": 20980 }, { "epoch": 0.3795308300993805, "grad_norm": 42.28125, "learning_rate": 9.940698408321549e-06, "loss": 17.2225, "step": 20990 }, { "epoch": 0.37971164516850836, "grad_norm": 43.90625, "learning_rate": 9.94067015601489e-06, "loss": 17.4368, "step": 21000 }, { "epoch": 0.37989246023763623, "grad_norm": 40.375, "learning_rate": 9.94064190370823e-06, "loss": 17.1831, "step": 21010 }, { "epoch": 0.38007327530676405, "grad_norm": 39.40625, "learning_rate": 9.940613651401569e-06, "loss": 17.1583, "step": 21020 }, { "epoch": 0.3802540903758919, "grad_norm": 38.65625, "learning_rate": 9.94058539909491e-06, "loss": 17.0701, "step": 21030 }, { "epoch": 0.3804349054450198, "grad_norm": 39.4375, "learning_rate": 9.94055714678825e-06, "loss": 17.1009, "step": 21040 }, { "epoch": 0.38061572051414766, "grad_norm": 40.5, "learning_rate": 9.94052889448159e-06, "loss": 17.0964, "step": 21050 }, { "epoch": 0.38079653558327553, "grad_norm": 38.21875, "learning_rate": 9.940500642174931e-06, "loss": 17.2598, "step": 21060 }, { "epoch": 0.38097735065240335, "grad_norm": 43.15625, "learning_rate": 9.940472389868272e-06, "loss": 17.3123, "step": 21070 }, { "epoch": 0.3811581657215312, "grad_norm": 40.4375, "learning_rate": 9.940444137561613e-06, "loss": 17.112, "step": 21080 }, { "epoch": 0.3813389807906591, "grad_norm": 41.5, "learning_rate": 9.940415885254953e-06, "loss": 17.1988, "step": 21090 }, { "epoch": 0.38151979585978696, "grad_norm": 40.3125, "learning_rate": 9.940387632948292e-06, "loss": 17.2093, "step": 21100 }, { "epoch": 0.38170061092891483, "grad_norm": 40.09375, "learning_rate": 9.940359380641633e-06, "loss": 17.2864, "step": 21110 }, { "epoch": 0.3818814259980427, "grad_norm": 40.21875, "learning_rate": 9.940331128334973e-06, "loss": 16.7385, "step": 21120 }, { "epoch": 0.3820622410671705, "grad_norm": 40.0, "learning_rate": 9.940302876028314e-06, "loss": 17.4242, "step": 21130 }, { "epoch": 0.3822430561362984, "grad_norm": 41.8125, "learning_rate": 9.940274623721655e-06, "loss": 17.2833, "step": 21140 }, { "epoch": 0.38242387120542626, "grad_norm": 42.0, "learning_rate": 9.940246371414995e-06, "loss": 17.4265, "step": 21150 }, { "epoch": 0.38260468627455413, "grad_norm": 43.75, "learning_rate": 9.940218119108336e-06, "loss": 17.0023, "step": 21160 }, { "epoch": 0.382785501343682, "grad_norm": 38.40625, "learning_rate": 9.940189866801676e-06, "loss": 17.121, "step": 21170 }, { "epoch": 0.38296631641280987, "grad_norm": 40.09375, "learning_rate": 9.940161614495017e-06, "loss": 17.3334, "step": 21180 }, { "epoch": 0.3831471314819377, "grad_norm": 44.5, "learning_rate": 9.940133362188356e-06, "loss": 16.8784, "step": 21190 }, { "epoch": 0.38332794655106556, "grad_norm": 42.34375, "learning_rate": 9.940105109881697e-06, "loss": 17.4054, "step": 21200 }, { "epoch": 0.38350876162019343, "grad_norm": 42.8125, "learning_rate": 9.940076857575037e-06, "loss": 17.2301, "step": 21210 }, { "epoch": 0.3836895766893213, "grad_norm": 41.71875, "learning_rate": 9.940048605268378e-06, "loss": 17.1037, "step": 21220 }, { "epoch": 0.38387039175844917, "grad_norm": 40.375, "learning_rate": 9.940020352961719e-06, "loss": 17.263, "step": 21230 }, { "epoch": 0.384051206827577, "grad_norm": 38.4375, "learning_rate": 9.939992100655059e-06, "loss": 17.4868, "step": 21240 }, { "epoch": 0.38423202189670486, "grad_norm": 45.40625, "learning_rate": 9.9399638483484e-06, "loss": 17.1353, "step": 21250 }, { "epoch": 0.3844128369658327, "grad_norm": 40.15625, "learning_rate": 9.93993559604174e-06, "loss": 17.1201, "step": 21260 }, { "epoch": 0.3845936520349606, "grad_norm": 38.65625, "learning_rate": 9.939907343735081e-06, "loss": 17.1401, "step": 21270 }, { "epoch": 0.38477446710408847, "grad_norm": 40.03125, "learning_rate": 9.93987909142842e-06, "loss": 17.2789, "step": 21280 }, { "epoch": 0.38495528217321634, "grad_norm": 40.71875, "learning_rate": 9.93985083912176e-06, "loss": 16.8673, "step": 21290 }, { "epoch": 0.38513609724234416, "grad_norm": 41.375, "learning_rate": 9.939822586815101e-06, "loss": 16.7684, "step": 21300 }, { "epoch": 0.385316912311472, "grad_norm": 41.53125, "learning_rate": 9.939794334508442e-06, "loss": 17.5884, "step": 21310 }, { "epoch": 0.3854977273805999, "grad_norm": 44.71875, "learning_rate": 9.939766082201782e-06, "loss": 17.2755, "step": 21320 }, { "epoch": 0.38567854244972777, "grad_norm": 38.78125, "learning_rate": 9.939737829895123e-06, "loss": 17.0057, "step": 21330 }, { "epoch": 0.38585935751885564, "grad_norm": 41.6875, "learning_rate": 9.939709577588464e-06, "loss": 17.2807, "step": 21340 }, { "epoch": 0.3860401725879835, "grad_norm": 41.90625, "learning_rate": 9.939681325281804e-06, "loss": 17.0537, "step": 21350 }, { "epoch": 0.3862209876571113, "grad_norm": 41.96875, "learning_rate": 9.939653072975143e-06, "loss": 16.9537, "step": 21360 }, { "epoch": 0.3864018027262392, "grad_norm": 42.96875, "learning_rate": 9.939624820668484e-06, "loss": 16.8987, "step": 21370 }, { "epoch": 0.38658261779536707, "grad_norm": 40.34375, "learning_rate": 9.939596568361824e-06, "loss": 17.2109, "step": 21380 }, { "epoch": 0.38676343286449494, "grad_norm": 39.15625, "learning_rate": 9.939568316055165e-06, "loss": 17.2425, "step": 21390 }, { "epoch": 0.3869442479336228, "grad_norm": 39.8125, "learning_rate": 9.939540063748506e-06, "loss": 17.6312, "step": 21400 }, { "epoch": 0.3871250630027506, "grad_norm": 41.78125, "learning_rate": 9.939511811441846e-06, "loss": 16.8594, "step": 21410 }, { "epoch": 0.3873058780718785, "grad_norm": 39.65625, "learning_rate": 9.939483559135187e-06, "loss": 17.2082, "step": 21420 }, { "epoch": 0.38748669314100637, "grad_norm": 40.0625, "learning_rate": 9.939455306828528e-06, "loss": 16.9624, "step": 21430 }, { "epoch": 0.38766750821013424, "grad_norm": 41.625, "learning_rate": 9.939427054521868e-06, "loss": 16.6994, "step": 21440 }, { "epoch": 0.3878483232792621, "grad_norm": 41.09375, "learning_rate": 9.939398802215207e-06, "loss": 17.3257, "step": 21450 }, { "epoch": 0.38802913834839, "grad_norm": 43.0, "learning_rate": 9.939370549908548e-06, "loss": 17.1406, "step": 21460 }, { "epoch": 0.3882099534175178, "grad_norm": 37.6875, "learning_rate": 9.939342297601888e-06, "loss": 17.3857, "step": 21470 }, { "epoch": 0.38839076848664567, "grad_norm": 41.90625, "learning_rate": 9.939314045295229e-06, "loss": 16.8819, "step": 21480 }, { "epoch": 0.38857158355577354, "grad_norm": 42.34375, "learning_rate": 9.93928579298857e-06, "loss": 17.316, "step": 21490 }, { "epoch": 0.3887523986249014, "grad_norm": 40.21875, "learning_rate": 9.93925754068191e-06, "loss": 17.0337, "step": 21500 }, { "epoch": 0.3889332136940293, "grad_norm": 39.9375, "learning_rate": 9.93922928837525e-06, "loss": 17.1861, "step": 21510 }, { "epoch": 0.3891140287631571, "grad_norm": 41.8125, "learning_rate": 9.939201036068591e-06, "loss": 16.8454, "step": 21520 }, { "epoch": 0.38929484383228496, "grad_norm": 44.28125, "learning_rate": 9.93917278376193e-06, "loss": 17.6789, "step": 21530 }, { "epoch": 0.38947565890141284, "grad_norm": 39.78125, "learning_rate": 9.939144531455271e-06, "loss": 16.8952, "step": 21540 }, { "epoch": 0.3896564739705407, "grad_norm": 40.78125, "learning_rate": 9.939116279148612e-06, "loss": 17.1093, "step": 21550 }, { "epoch": 0.3898372890396686, "grad_norm": 42.25, "learning_rate": 9.939088026841952e-06, "loss": 17.1814, "step": 21560 }, { "epoch": 0.39001810410879645, "grad_norm": 42.4375, "learning_rate": 9.939059774535293e-06, "loss": 17.0269, "step": 21570 }, { "epoch": 0.39019891917792426, "grad_norm": 41.0625, "learning_rate": 9.939031522228632e-06, "loss": 17.3362, "step": 21580 }, { "epoch": 0.39037973424705213, "grad_norm": 40.125, "learning_rate": 9.939003269921974e-06, "loss": 16.7692, "step": 21590 }, { "epoch": 0.39056054931618, "grad_norm": 40.40625, "learning_rate": 9.938975017615315e-06, "loss": 17.1616, "step": 21600 }, { "epoch": 0.3907413643853079, "grad_norm": 44.03125, "learning_rate": 9.938946765308655e-06, "loss": 17.3161, "step": 21610 }, { "epoch": 0.39092217945443575, "grad_norm": 39.75, "learning_rate": 9.938918513001994e-06, "loss": 17.4791, "step": 21620 }, { "epoch": 0.3911029945235636, "grad_norm": 41.9375, "learning_rate": 9.938890260695335e-06, "loss": 17.2422, "step": 21630 }, { "epoch": 0.39128380959269143, "grad_norm": 42.125, "learning_rate": 9.938862008388676e-06, "loss": 17.1619, "step": 21640 }, { "epoch": 0.3914646246618193, "grad_norm": 40.9375, "learning_rate": 9.938833756082016e-06, "loss": 17.3982, "step": 21650 }, { "epoch": 0.3916454397309472, "grad_norm": 38.71875, "learning_rate": 9.938805503775357e-06, "loss": 17.2333, "step": 21660 }, { "epoch": 0.39182625480007505, "grad_norm": 41.6875, "learning_rate": 9.938777251468696e-06, "loss": 17.1, "step": 21670 }, { "epoch": 0.3920070698692029, "grad_norm": 39.4375, "learning_rate": 9.938748999162038e-06, "loss": 17.1815, "step": 21680 }, { "epoch": 0.39218788493833073, "grad_norm": 39.40625, "learning_rate": 9.938720746855379e-06, "loss": 16.9364, "step": 21690 }, { "epoch": 0.3923687000074586, "grad_norm": 42.46875, "learning_rate": 9.938692494548718e-06, "loss": 17.2628, "step": 21700 }, { "epoch": 0.3925495150765865, "grad_norm": 43.53125, "learning_rate": 9.938664242242058e-06, "loss": 17.1232, "step": 21710 }, { "epoch": 0.39273033014571435, "grad_norm": 39.6875, "learning_rate": 9.938635989935399e-06, "loss": 16.7432, "step": 21720 }, { "epoch": 0.3929111452148422, "grad_norm": 39.1875, "learning_rate": 9.93860773762874e-06, "loss": 17.1859, "step": 21730 }, { "epoch": 0.3930919602839701, "grad_norm": 43.5, "learning_rate": 9.93857948532208e-06, "loss": 17.0688, "step": 21740 }, { "epoch": 0.3932727753530979, "grad_norm": 41.875, "learning_rate": 9.93855123301542e-06, "loss": 17.1453, "step": 21750 }, { "epoch": 0.3934535904222258, "grad_norm": 43.71875, "learning_rate": 9.938522980708761e-06, "loss": 17.4998, "step": 21760 }, { "epoch": 0.39363440549135364, "grad_norm": 44.71875, "learning_rate": 9.938494728402102e-06, "loss": 17.1017, "step": 21770 }, { "epoch": 0.3938152205604815, "grad_norm": 42.6875, "learning_rate": 9.938466476095443e-06, "loss": 16.8154, "step": 21780 }, { "epoch": 0.3939960356296094, "grad_norm": 41.8125, "learning_rate": 9.938438223788781e-06, "loss": 17.0335, "step": 21790 }, { "epoch": 0.39417685069873726, "grad_norm": 41.375, "learning_rate": 9.938409971482122e-06, "loss": 17.3227, "step": 21800 }, { "epoch": 0.3943576657678651, "grad_norm": 40.3125, "learning_rate": 9.938381719175463e-06, "loss": 16.984, "step": 21810 }, { "epoch": 0.39453848083699294, "grad_norm": 40.96875, "learning_rate": 9.938353466868803e-06, "loss": 17.2131, "step": 21820 }, { "epoch": 0.3947192959061208, "grad_norm": 42.375, "learning_rate": 9.938325214562144e-06, "loss": 17.0346, "step": 21830 }, { "epoch": 0.3949001109752487, "grad_norm": 41.09375, "learning_rate": 9.938296962255483e-06, "loss": 17.0261, "step": 21840 }, { "epoch": 0.39508092604437656, "grad_norm": 42.0, "learning_rate": 9.938268709948825e-06, "loss": 17.3454, "step": 21850 }, { "epoch": 0.3952617411135044, "grad_norm": 39.8125, "learning_rate": 9.938240457642166e-06, "loss": 17.0362, "step": 21860 }, { "epoch": 0.39544255618263224, "grad_norm": 40.78125, "learning_rate": 9.938212205335506e-06, "loss": 17.1867, "step": 21870 }, { "epoch": 0.3956233712517601, "grad_norm": 43.84375, "learning_rate": 9.938183953028845e-06, "loss": 17.3699, "step": 21880 }, { "epoch": 0.395804186320888, "grad_norm": 40.03125, "learning_rate": 9.938155700722186e-06, "loss": 17.1582, "step": 21890 }, { "epoch": 0.39598500139001586, "grad_norm": 38.84375, "learning_rate": 9.938127448415527e-06, "loss": 17.2781, "step": 21900 }, { "epoch": 0.3961658164591437, "grad_norm": 43.09375, "learning_rate": 9.938099196108867e-06, "loss": 17.2928, "step": 21910 }, { "epoch": 0.39634663152827154, "grad_norm": 44.125, "learning_rate": 9.938070943802208e-06, "loss": 17.1416, "step": 21920 }, { "epoch": 0.3965274465973994, "grad_norm": 40.25, "learning_rate": 9.938042691495547e-06, "loss": 17.1793, "step": 21930 }, { "epoch": 0.3967082616665273, "grad_norm": 41.28125, "learning_rate": 9.938014439188889e-06, "loss": 17.5213, "step": 21940 }, { "epoch": 0.39688907673565516, "grad_norm": 40.4375, "learning_rate": 9.93798618688223e-06, "loss": 17.0786, "step": 21950 }, { "epoch": 0.397069891804783, "grad_norm": 40.625, "learning_rate": 9.937957934575569e-06, "loss": 17.1238, "step": 21960 }, { "epoch": 0.3972507068739109, "grad_norm": 40.5, "learning_rate": 9.93792968226891e-06, "loss": 17.5342, "step": 21970 }, { "epoch": 0.3974315219430387, "grad_norm": 43.0, "learning_rate": 9.93790142996225e-06, "loss": 17.2814, "step": 21980 }, { "epoch": 0.3976123370121666, "grad_norm": 40.75, "learning_rate": 9.93787317765559e-06, "loss": 17.0391, "step": 21990 }, { "epoch": 0.39779315208129445, "grad_norm": 42.125, "learning_rate": 9.937844925348931e-06, "loss": 17.3825, "step": 22000 }, { "epoch": 0.3979739671504223, "grad_norm": 40.875, "learning_rate": 9.93781667304227e-06, "loss": 17.0485, "step": 22010 }, { "epoch": 0.3981547822195502, "grad_norm": 37.71875, "learning_rate": 9.93778842073561e-06, "loss": 16.8926, "step": 22020 }, { "epoch": 0.398335597288678, "grad_norm": 40.71875, "learning_rate": 9.937760168428953e-06, "loss": 17.0525, "step": 22030 }, { "epoch": 0.3985164123578059, "grad_norm": 40.03125, "learning_rate": 9.937731916122294e-06, "loss": 16.8813, "step": 22040 }, { "epoch": 0.39869722742693375, "grad_norm": 41.5, "learning_rate": 9.937703663815633e-06, "loss": 16.917, "step": 22050 }, { "epoch": 0.3988780424960616, "grad_norm": 41.8125, "learning_rate": 9.937675411508973e-06, "loss": 17.3316, "step": 22060 }, { "epoch": 0.3990588575651895, "grad_norm": 43.0625, "learning_rate": 9.937647159202314e-06, "loss": 17.4144, "step": 22070 }, { "epoch": 0.39923967263431737, "grad_norm": 42.25, "learning_rate": 9.937618906895654e-06, "loss": 16.9716, "step": 22080 }, { "epoch": 0.3994204877034452, "grad_norm": 38.46875, "learning_rate": 9.937590654588995e-06, "loss": 17.2706, "step": 22090 }, { "epoch": 0.39960130277257305, "grad_norm": 42.09375, "learning_rate": 9.937562402282334e-06, "loss": 17.2526, "step": 22100 }, { "epoch": 0.3997821178417009, "grad_norm": 38.21875, "learning_rate": 9.937534149975676e-06, "loss": 16.7109, "step": 22110 }, { "epoch": 0.3999629329108288, "grad_norm": 40.0, "learning_rate": 9.937505897669017e-06, "loss": 16.9083, "step": 22120 }, { "epoch": 0.40014374797995667, "grad_norm": 41.59375, "learning_rate": 9.937477645362356e-06, "loss": 17.1417, "step": 22130 }, { "epoch": 0.40032456304908454, "grad_norm": 41.96875, "learning_rate": 9.937449393055696e-06, "loss": 16.9667, "step": 22140 }, { "epoch": 0.40050537811821235, "grad_norm": 41.375, "learning_rate": 9.937421140749037e-06, "loss": 16.957, "step": 22150 }, { "epoch": 0.4006861931873402, "grad_norm": 39.625, "learning_rate": 9.937392888442378e-06, "loss": 17.6841, "step": 22160 }, { "epoch": 0.4008670082564681, "grad_norm": 41.53125, "learning_rate": 9.937364636135718e-06, "loss": 16.9138, "step": 22170 }, { "epoch": 0.40104782332559596, "grad_norm": 40.625, "learning_rate": 9.937336383829059e-06, "loss": 17.0477, "step": 22180 }, { "epoch": 0.40122863839472384, "grad_norm": 41.625, "learning_rate": 9.937308131522398e-06, "loss": 17.344, "step": 22190 }, { "epoch": 0.40140945346385165, "grad_norm": 41.125, "learning_rate": 9.93727987921574e-06, "loss": 16.9657, "step": 22200 }, { "epoch": 0.4015902685329795, "grad_norm": 39.53125, "learning_rate": 9.937251626909081e-06, "loss": 16.5823, "step": 22210 }, { "epoch": 0.4017710836021074, "grad_norm": 39.78125, "learning_rate": 9.93722337460242e-06, "loss": 17.1726, "step": 22220 }, { "epoch": 0.40195189867123526, "grad_norm": 41.375, "learning_rate": 9.93719512229576e-06, "loss": 17.4138, "step": 22230 }, { "epoch": 0.40213271374036313, "grad_norm": 39.0, "learning_rate": 9.937166869989101e-06, "loss": 17.2267, "step": 22240 }, { "epoch": 0.402313528809491, "grad_norm": 40.0, "learning_rate": 9.937138617682442e-06, "loss": 17.1238, "step": 22250 }, { "epoch": 0.4024943438786188, "grad_norm": 46.125, "learning_rate": 9.937110365375782e-06, "loss": 16.9259, "step": 22260 }, { "epoch": 0.4026751589477467, "grad_norm": 39.28125, "learning_rate": 9.937082113069121e-06, "loss": 17.2861, "step": 22270 }, { "epoch": 0.40285597401687456, "grad_norm": 43.3125, "learning_rate": 9.937053860762462e-06, "loss": 17.508, "step": 22280 }, { "epoch": 0.40303678908600243, "grad_norm": 44.125, "learning_rate": 9.937025608455804e-06, "loss": 16.9261, "step": 22290 }, { "epoch": 0.4032176041551303, "grad_norm": 40.71875, "learning_rate": 9.936997356149145e-06, "loss": 17.3141, "step": 22300 }, { "epoch": 0.4033984192242582, "grad_norm": 42.34375, "learning_rate": 9.936969103842484e-06, "loss": 17.0033, "step": 22310 }, { "epoch": 0.403579234293386, "grad_norm": 38.65625, "learning_rate": 9.936940851535824e-06, "loss": 17.1212, "step": 22320 }, { "epoch": 0.40376004936251386, "grad_norm": 40.5, "learning_rate": 9.936912599229165e-06, "loss": 16.7634, "step": 22330 }, { "epoch": 0.40394086443164173, "grad_norm": 42.03125, "learning_rate": 9.936884346922506e-06, "loss": 17.2485, "step": 22340 }, { "epoch": 0.4041216795007696, "grad_norm": 42.0625, "learning_rate": 9.936856094615846e-06, "loss": 17.0581, "step": 22350 }, { "epoch": 0.4043024945698975, "grad_norm": 43.625, "learning_rate": 9.936827842309185e-06, "loss": 17.339, "step": 22360 }, { "epoch": 0.4044833096390253, "grad_norm": 38.9375, "learning_rate": 9.936799590002526e-06, "loss": 17.0071, "step": 22370 }, { "epoch": 0.40466412470815316, "grad_norm": 41.5, "learning_rate": 9.936771337695868e-06, "loss": 16.8362, "step": 22380 }, { "epoch": 0.40484493977728103, "grad_norm": 41.9375, "learning_rate": 9.936743085389207e-06, "loss": 17.1963, "step": 22390 }, { "epoch": 0.4050257548464089, "grad_norm": 39.75, "learning_rate": 9.936714833082548e-06, "loss": 16.811, "step": 22400 }, { "epoch": 0.4052065699155368, "grad_norm": 43.65625, "learning_rate": 9.936686580775888e-06, "loss": 17.4384, "step": 22410 }, { "epoch": 0.40538738498466464, "grad_norm": 40.53125, "learning_rate": 9.936658328469229e-06, "loss": 17.1916, "step": 22420 }, { "epoch": 0.40556820005379246, "grad_norm": 42.03125, "learning_rate": 9.93663007616257e-06, "loss": 17.2659, "step": 22430 }, { "epoch": 0.40574901512292033, "grad_norm": 42.25, "learning_rate": 9.936601823855908e-06, "loss": 17.0808, "step": 22440 }, { "epoch": 0.4059298301920482, "grad_norm": 41.53125, "learning_rate": 9.936573571549249e-06, "loss": 17.25, "step": 22450 }, { "epoch": 0.4061106452611761, "grad_norm": 42.4375, "learning_rate": 9.936545319242591e-06, "loss": 17.3886, "step": 22460 }, { "epoch": 0.40629146033030394, "grad_norm": 42.4375, "learning_rate": 9.936517066935932e-06, "loss": 17.0031, "step": 22470 }, { "epoch": 0.4064722753994318, "grad_norm": 46.25, "learning_rate": 9.936488814629271e-06, "loss": 17.1316, "step": 22480 }, { "epoch": 0.40665309046855963, "grad_norm": 43.34375, "learning_rate": 9.936460562322612e-06, "loss": 17.3826, "step": 22490 }, { "epoch": 0.4068339055376875, "grad_norm": 38.9375, "learning_rate": 9.936432310015952e-06, "loss": 16.9754, "step": 22500 }, { "epoch": 0.40701472060681537, "grad_norm": 41.15625, "learning_rate": 9.936404057709293e-06, "loss": 17.3494, "step": 22510 }, { "epoch": 0.40719553567594324, "grad_norm": 41.875, "learning_rate": 9.936375805402633e-06, "loss": 16.3543, "step": 22520 }, { "epoch": 0.4073763507450711, "grad_norm": 40.15625, "learning_rate": 9.936347553095972e-06, "loss": 17.2515, "step": 22530 }, { "epoch": 0.40755716581419893, "grad_norm": 43.6875, "learning_rate": 9.936319300789313e-06, "loss": 17.1219, "step": 22540 }, { "epoch": 0.4077379808833268, "grad_norm": 42.1875, "learning_rate": 9.936291048482655e-06, "loss": 17.0418, "step": 22550 }, { "epoch": 0.40791879595245467, "grad_norm": 41.5625, "learning_rate": 9.936262796175994e-06, "loss": 17.096, "step": 22560 }, { "epoch": 0.40809961102158254, "grad_norm": 41.25, "learning_rate": 9.936234543869335e-06, "loss": 17.0234, "step": 22570 }, { "epoch": 0.4082804260907104, "grad_norm": 43.71875, "learning_rate": 9.936206291562675e-06, "loss": 17.0583, "step": 22580 }, { "epoch": 0.4084612411598383, "grad_norm": 42.34375, "learning_rate": 9.936178039256016e-06, "loss": 17.0878, "step": 22590 }, { "epoch": 0.4086420562289661, "grad_norm": 41.8125, "learning_rate": 9.936149786949357e-06, "loss": 16.7648, "step": 22600 }, { "epoch": 0.40882287129809397, "grad_norm": 42.90625, "learning_rate": 9.936121534642697e-06, "loss": 17.1874, "step": 22610 }, { "epoch": 0.40900368636722184, "grad_norm": 43.6875, "learning_rate": 9.936093282336036e-06, "loss": 17.5475, "step": 22620 }, { "epoch": 0.4091845014363497, "grad_norm": 42.59375, "learning_rate": 9.936065030029377e-06, "loss": 17.4482, "step": 22630 }, { "epoch": 0.4093653165054776, "grad_norm": 38.4375, "learning_rate": 9.936036777722719e-06, "loss": 17.099, "step": 22640 }, { "epoch": 0.40954613157460545, "grad_norm": 41.0, "learning_rate": 9.936008525416058e-06, "loss": 17.336, "step": 22650 }, { "epoch": 0.40972694664373327, "grad_norm": 41.90625, "learning_rate": 9.935980273109399e-06, "loss": 17.0403, "step": 22660 }, { "epoch": 0.40990776171286114, "grad_norm": 42.09375, "learning_rate": 9.93595202080274e-06, "loss": 17.0826, "step": 22670 }, { "epoch": 0.410088576781989, "grad_norm": 41.625, "learning_rate": 9.93592376849608e-06, "loss": 17.2808, "step": 22680 }, { "epoch": 0.4102693918511169, "grad_norm": 41.25, "learning_rate": 9.93589551618942e-06, "loss": 16.7892, "step": 22690 }, { "epoch": 0.41045020692024475, "grad_norm": 43.28125, "learning_rate": 9.93586726388276e-06, "loss": 16.9339, "step": 22700 }, { "epoch": 0.41063102198937257, "grad_norm": 39.125, "learning_rate": 9.9358390115761e-06, "loss": 16.9299, "step": 22710 }, { "epoch": 0.41081183705850044, "grad_norm": 42.0625, "learning_rate": 9.93581075926944e-06, "loss": 17.1152, "step": 22720 }, { "epoch": 0.4109926521276283, "grad_norm": 42.53125, "learning_rate": 9.935782506962783e-06, "loss": 17.313, "step": 22730 }, { "epoch": 0.4111734671967562, "grad_norm": 43.0, "learning_rate": 9.935754254656122e-06, "loss": 17.5031, "step": 22740 }, { "epoch": 0.41135428226588405, "grad_norm": 41.40625, "learning_rate": 9.935726002349463e-06, "loss": 17.04, "step": 22750 }, { "epoch": 0.4115350973350119, "grad_norm": 40.40625, "learning_rate": 9.935697750042803e-06, "loss": 17.2801, "step": 22760 }, { "epoch": 0.41171591240413974, "grad_norm": 41.53125, "learning_rate": 9.935669497736144e-06, "loss": 17.1183, "step": 22770 }, { "epoch": 0.4118967274732676, "grad_norm": 38.28125, "learning_rate": 9.935641245429484e-06, "loss": 17.0931, "step": 22780 }, { "epoch": 0.4120775425423955, "grad_norm": 40.59375, "learning_rate": 9.935612993122823e-06, "loss": 17.5523, "step": 22790 }, { "epoch": 0.41225835761152335, "grad_norm": 42.5, "learning_rate": 9.935584740816164e-06, "loss": 16.6863, "step": 22800 }, { "epoch": 0.4124391726806512, "grad_norm": 42.84375, "learning_rate": 9.935556488509506e-06, "loss": 16.9507, "step": 22810 }, { "epoch": 0.4126199877497791, "grad_norm": 43.8125, "learning_rate": 9.935528236202845e-06, "loss": 16.8283, "step": 22820 }, { "epoch": 0.4128008028189069, "grad_norm": 43.28125, "learning_rate": 9.935499983896186e-06, "loss": 17.189, "step": 22830 }, { "epoch": 0.4129816178880348, "grad_norm": 41.21875, "learning_rate": 9.935471731589527e-06, "loss": 17.2389, "step": 22840 }, { "epoch": 0.41316243295716265, "grad_norm": 40.65625, "learning_rate": 9.935443479282867e-06, "loss": 16.7641, "step": 22850 }, { "epoch": 0.4133432480262905, "grad_norm": 40.28125, "learning_rate": 9.935415226976208e-06, "loss": 17.1253, "step": 22860 }, { "epoch": 0.4135240630954184, "grad_norm": 44.28125, "learning_rate": 9.935386974669547e-06, "loss": 17.3765, "step": 22870 }, { "epoch": 0.4137048781645462, "grad_norm": 44.28125, "learning_rate": 9.935358722362887e-06, "loss": 17.4771, "step": 22880 }, { "epoch": 0.4138856932336741, "grad_norm": 39.625, "learning_rate": 9.935330470056228e-06, "loss": 16.9489, "step": 22890 }, { "epoch": 0.41406650830280195, "grad_norm": 41.8125, "learning_rate": 9.93530221774957e-06, "loss": 17.4015, "step": 22900 }, { "epoch": 0.4142473233719298, "grad_norm": 39.28125, "learning_rate": 9.93527396544291e-06, "loss": 17.2297, "step": 22910 }, { "epoch": 0.4144281384410577, "grad_norm": 41.5625, "learning_rate": 9.93524571313625e-06, "loss": 17.3229, "step": 22920 }, { "epoch": 0.41460895351018556, "grad_norm": 42.6875, "learning_rate": 9.93521746082959e-06, "loss": 17.0622, "step": 22930 }, { "epoch": 0.4147897685793134, "grad_norm": 37.96875, "learning_rate": 9.935189208522931e-06, "loss": 17.4614, "step": 22940 }, { "epoch": 0.41497058364844125, "grad_norm": 41.15625, "learning_rate": 9.935160956216272e-06, "loss": 17.0732, "step": 22950 }, { "epoch": 0.4151513987175691, "grad_norm": 40.15625, "learning_rate": 9.93513270390961e-06, "loss": 17.2508, "step": 22960 }, { "epoch": 0.415332213786697, "grad_norm": 40.40625, "learning_rate": 9.935104451602951e-06, "loss": 16.7658, "step": 22970 }, { "epoch": 0.41551302885582486, "grad_norm": 41.1875, "learning_rate": 9.935076199296292e-06, "loss": 16.942, "step": 22980 }, { "epoch": 0.41569384392495273, "grad_norm": 41.46875, "learning_rate": 9.935047946989632e-06, "loss": 17.2235, "step": 22990 }, { "epoch": 0.41587465899408055, "grad_norm": 41.96875, "learning_rate": 9.935019694682973e-06, "loss": 16.4738, "step": 23000 }, { "epoch": 0.4160554740632084, "grad_norm": 42.5, "learning_rate": 9.934991442376314e-06, "loss": 17.0574, "step": 23010 }, { "epoch": 0.4162362891323363, "grad_norm": 42.46875, "learning_rate": 9.934963190069654e-06, "loss": 17.2055, "step": 23020 }, { "epoch": 0.41641710420146416, "grad_norm": 38.15625, "learning_rate": 9.934934937762995e-06, "loss": 16.9899, "step": 23030 }, { "epoch": 0.41659791927059203, "grad_norm": 43.375, "learning_rate": 9.934906685456336e-06, "loss": 17.4972, "step": 23040 }, { "epoch": 0.41677873433971985, "grad_norm": 39.09375, "learning_rate": 9.934878433149674e-06, "loss": 17.2599, "step": 23050 }, { "epoch": 0.4169595494088477, "grad_norm": 39.78125, "learning_rate": 9.934850180843015e-06, "loss": 17.2226, "step": 23060 }, { "epoch": 0.4171403644779756, "grad_norm": 40.8125, "learning_rate": 9.934821928536356e-06, "loss": 17.1681, "step": 23070 }, { "epoch": 0.41732117954710346, "grad_norm": 44.0, "learning_rate": 9.934793676229696e-06, "loss": 17.1996, "step": 23080 }, { "epoch": 0.41750199461623133, "grad_norm": 42.625, "learning_rate": 9.934765423923037e-06, "loss": 17.3154, "step": 23090 }, { "epoch": 0.4176828096853592, "grad_norm": 41.125, "learning_rate": 9.934737171616378e-06, "loss": 16.4046, "step": 23100 }, { "epoch": 0.417863624754487, "grad_norm": 42.15625, "learning_rate": 9.934708919309718e-06, "loss": 17.0241, "step": 23110 }, { "epoch": 0.4180444398236149, "grad_norm": 39.3125, "learning_rate": 9.934680667003059e-06, "loss": 16.9767, "step": 23120 }, { "epoch": 0.41822525489274276, "grad_norm": 38.71875, "learning_rate": 9.934652414696398e-06, "loss": 17.1326, "step": 23130 }, { "epoch": 0.41840606996187063, "grad_norm": 42.625, "learning_rate": 9.934624162389738e-06, "loss": 17.2872, "step": 23140 }, { "epoch": 0.4185868850309985, "grad_norm": 40.5, "learning_rate": 9.934595910083079e-06, "loss": 16.9717, "step": 23150 }, { "epoch": 0.41876770010012637, "grad_norm": 39.3125, "learning_rate": 9.934567657776421e-06, "loss": 17.3705, "step": 23160 }, { "epoch": 0.4189485151692542, "grad_norm": 40.84375, "learning_rate": 9.93453940546976e-06, "loss": 17.2579, "step": 23170 }, { "epoch": 0.41912933023838206, "grad_norm": 40.875, "learning_rate": 9.934511153163101e-06, "loss": 17.1618, "step": 23180 }, { "epoch": 0.41931014530750993, "grad_norm": 40.59375, "learning_rate": 9.934482900856442e-06, "loss": 17.0195, "step": 23190 }, { "epoch": 0.4194909603766378, "grad_norm": 38.8125, "learning_rate": 9.934454648549782e-06, "loss": 17.0593, "step": 23200 }, { "epoch": 0.41967177544576567, "grad_norm": 42.375, "learning_rate": 9.934426396243123e-06, "loss": 17.0742, "step": 23210 }, { "epoch": 0.4198525905148935, "grad_norm": 40.875, "learning_rate": 9.934398143936462e-06, "loss": 17.3838, "step": 23220 }, { "epoch": 0.42003340558402136, "grad_norm": 44.09375, "learning_rate": 9.934369891629802e-06, "loss": 17.1919, "step": 23230 }, { "epoch": 0.42021422065314923, "grad_norm": 42.75, "learning_rate": 9.934341639323143e-06, "loss": 16.9229, "step": 23240 }, { "epoch": 0.4203950357222771, "grad_norm": 40.21875, "learning_rate": 9.934313387016484e-06, "loss": 17.1713, "step": 23250 }, { "epoch": 0.42057585079140497, "grad_norm": 43.125, "learning_rate": 9.934285134709824e-06, "loss": 17.0081, "step": 23260 }, { "epoch": 0.42075666586053284, "grad_norm": 43.0625, "learning_rate": 9.934256882403165e-06, "loss": 17.02, "step": 23270 }, { "epoch": 0.42093748092966066, "grad_norm": 40.0625, "learning_rate": 9.934228630096505e-06, "loss": 17.4053, "step": 23280 }, { "epoch": 0.4211182959987885, "grad_norm": 42.46875, "learning_rate": 9.934200377789846e-06, "loss": 16.9431, "step": 23290 }, { "epoch": 0.4212991110679164, "grad_norm": 41.28125, "learning_rate": 9.934172125483185e-06, "loss": 16.5166, "step": 23300 }, { "epoch": 0.42147992613704427, "grad_norm": 37.84375, "learning_rate": 9.934143873176526e-06, "loss": 17.1852, "step": 23310 }, { "epoch": 0.42166074120617214, "grad_norm": 43.0625, "learning_rate": 9.934115620869866e-06, "loss": 16.831, "step": 23320 }, { "epoch": 0.42184155627529996, "grad_norm": 45.15625, "learning_rate": 9.934087368563207e-06, "loss": 17.0062, "step": 23330 }, { "epoch": 0.4220223713444278, "grad_norm": 41.9375, "learning_rate": 9.934059116256547e-06, "loss": 17.0704, "step": 23340 }, { "epoch": 0.4222031864135557, "grad_norm": 40.8125, "learning_rate": 9.934030863949888e-06, "loss": 17.3631, "step": 23350 }, { "epoch": 0.42238400148268357, "grad_norm": 40.53125, "learning_rate": 9.934002611643229e-06, "loss": 17.2416, "step": 23360 }, { "epoch": 0.42256481655181144, "grad_norm": 41.03125, "learning_rate": 9.93397435933657e-06, "loss": 16.8875, "step": 23370 }, { "epoch": 0.4227456316209393, "grad_norm": 42.09375, "learning_rate": 9.93394610702991e-06, "loss": 17.089, "step": 23380 }, { "epoch": 0.4229264466900671, "grad_norm": 41.09375, "learning_rate": 9.933917854723249e-06, "loss": 17.174, "step": 23390 }, { "epoch": 0.423107261759195, "grad_norm": 41.59375, "learning_rate": 9.93388960241659e-06, "loss": 16.8249, "step": 23400 }, { "epoch": 0.42328807682832287, "grad_norm": 42.4375, "learning_rate": 9.93386135010993e-06, "loss": 16.6637, "step": 23410 }, { "epoch": 0.42346889189745074, "grad_norm": 41.40625, "learning_rate": 9.93383309780327e-06, "loss": 17.1979, "step": 23420 }, { "epoch": 0.4236497069665786, "grad_norm": 43.125, "learning_rate": 9.933804845496611e-06, "loss": 16.7769, "step": 23430 }, { "epoch": 0.4238305220357065, "grad_norm": 41.1875, "learning_rate": 9.933776593189952e-06, "loss": 17.3441, "step": 23440 }, { "epoch": 0.4240113371048343, "grad_norm": 39.5, "learning_rate": 9.933748340883293e-06, "loss": 17.4445, "step": 23450 }, { "epoch": 0.42419215217396217, "grad_norm": 41.84375, "learning_rate": 9.933720088576633e-06, "loss": 16.9229, "step": 23460 }, { "epoch": 0.42437296724309004, "grad_norm": 41.28125, "learning_rate": 9.933691836269974e-06, "loss": 17.0078, "step": 23470 }, { "epoch": 0.4245537823122179, "grad_norm": 42.78125, "learning_rate": 9.933663583963313e-06, "loss": 16.8134, "step": 23480 }, { "epoch": 0.4247345973813458, "grad_norm": 42.5, "learning_rate": 9.933635331656653e-06, "loss": 16.873, "step": 23490 }, { "epoch": 0.4249154124504736, "grad_norm": 41.84375, "learning_rate": 9.933607079349994e-06, "loss": 16.9522, "step": 23500 }, { "epoch": 0.42509622751960147, "grad_norm": 42.78125, "learning_rate": 9.933578827043335e-06, "loss": 17.1385, "step": 23510 }, { "epoch": 0.42527704258872934, "grad_norm": 42.4375, "learning_rate": 9.933550574736675e-06, "loss": 17.2821, "step": 23520 }, { "epoch": 0.4254578576578572, "grad_norm": 41.09375, "learning_rate": 9.933522322430016e-06, "loss": 17.0284, "step": 23530 }, { "epoch": 0.4256386727269851, "grad_norm": 40.875, "learning_rate": 9.933494070123357e-06, "loss": 17.3555, "step": 23540 }, { "epoch": 0.42581948779611295, "grad_norm": 44.1875, "learning_rate": 9.933465817816697e-06, "loss": 16.8159, "step": 23550 }, { "epoch": 0.42600030286524077, "grad_norm": 41.6875, "learning_rate": 9.933437565510036e-06, "loss": 17.1709, "step": 23560 }, { "epoch": 0.42618111793436864, "grad_norm": 39.78125, "learning_rate": 9.933409313203377e-06, "loss": 17.1132, "step": 23570 }, { "epoch": 0.4263619330034965, "grad_norm": 40.875, "learning_rate": 9.933381060896717e-06, "loss": 16.997, "step": 23580 }, { "epoch": 0.4265427480726244, "grad_norm": 42.53125, "learning_rate": 9.933352808590058e-06, "loss": 17.2656, "step": 23590 }, { "epoch": 0.42672356314175225, "grad_norm": 38.875, "learning_rate": 9.933324556283399e-06, "loss": 17.3424, "step": 23600 }, { "epoch": 0.4269043782108801, "grad_norm": 42.9375, "learning_rate": 9.93329630397674e-06, "loss": 17.2155, "step": 23610 }, { "epoch": 0.42708519328000794, "grad_norm": 40.1875, "learning_rate": 9.93326805167008e-06, "loss": 17.0139, "step": 23620 }, { "epoch": 0.4272660083491358, "grad_norm": 42.125, "learning_rate": 9.93323979936342e-06, "loss": 17.278, "step": 23630 }, { "epoch": 0.4274468234182637, "grad_norm": 40.53125, "learning_rate": 9.933211547056761e-06, "loss": 17.0852, "step": 23640 }, { "epoch": 0.42762763848739155, "grad_norm": 40.28125, "learning_rate": 9.9331832947501e-06, "loss": 17.0743, "step": 23650 }, { "epoch": 0.4278084535565194, "grad_norm": 41.9375, "learning_rate": 9.93315504244344e-06, "loss": 17.3539, "step": 23660 }, { "epoch": 0.42798926862564723, "grad_norm": 39.5, "learning_rate": 9.933126790136781e-06, "loss": 16.9718, "step": 23670 }, { "epoch": 0.4281700836947751, "grad_norm": 44.4375, "learning_rate": 9.933098537830122e-06, "loss": 17.266, "step": 23680 }, { "epoch": 0.428350898763903, "grad_norm": 42.03125, "learning_rate": 9.933070285523462e-06, "loss": 17.0662, "step": 23690 }, { "epoch": 0.42853171383303085, "grad_norm": 46.09375, "learning_rate": 9.933042033216803e-06, "loss": 16.9782, "step": 23700 }, { "epoch": 0.4287125289021587, "grad_norm": 40.625, "learning_rate": 9.933013780910144e-06, "loss": 17.1737, "step": 23710 }, { "epoch": 0.4288933439712866, "grad_norm": 44.09375, "learning_rate": 9.932985528603484e-06, "loss": 17.0811, "step": 23720 }, { "epoch": 0.4290741590404144, "grad_norm": 41.78125, "learning_rate": 9.932957276296823e-06, "loss": 16.8908, "step": 23730 }, { "epoch": 0.4292549741095423, "grad_norm": 39.34375, "learning_rate": 9.932929023990164e-06, "loss": 17.0634, "step": 23740 }, { "epoch": 0.42943578917867015, "grad_norm": 39.28125, "learning_rate": 9.932900771683505e-06, "loss": 17.1466, "step": 23750 }, { "epoch": 0.429616604247798, "grad_norm": 41.53125, "learning_rate": 9.932872519376845e-06, "loss": 16.7681, "step": 23760 }, { "epoch": 0.4297974193169259, "grad_norm": 38.34375, "learning_rate": 9.932844267070186e-06, "loss": 17.4063, "step": 23770 }, { "epoch": 0.42997823438605376, "grad_norm": 42.46875, "learning_rate": 9.932816014763526e-06, "loss": 17.1721, "step": 23780 }, { "epoch": 0.4301590494551816, "grad_norm": 41.65625, "learning_rate": 9.932787762456867e-06, "loss": 17.095, "step": 23790 }, { "epoch": 0.43033986452430945, "grad_norm": 40.9375, "learning_rate": 9.932759510150208e-06, "loss": 17.3986, "step": 23800 }, { "epoch": 0.4305206795934373, "grad_norm": 44.25, "learning_rate": 9.932731257843548e-06, "loss": 17.5244, "step": 23810 }, { "epoch": 0.4307014946625652, "grad_norm": 41.65625, "learning_rate": 9.932703005536887e-06, "loss": 17.2414, "step": 23820 }, { "epoch": 0.43088230973169306, "grad_norm": 41.90625, "learning_rate": 9.932674753230228e-06, "loss": 16.83, "step": 23830 }, { "epoch": 0.4310631248008209, "grad_norm": 38.875, "learning_rate": 9.932646500923568e-06, "loss": 17.2673, "step": 23840 }, { "epoch": 0.43124393986994874, "grad_norm": 41.40625, "learning_rate": 9.932618248616909e-06, "loss": 17.1928, "step": 23850 }, { "epoch": 0.4314247549390766, "grad_norm": 40.15625, "learning_rate": 9.93258999631025e-06, "loss": 17.1712, "step": 23860 }, { "epoch": 0.4316055700082045, "grad_norm": 40.21875, "learning_rate": 9.93256174400359e-06, "loss": 16.9979, "step": 23870 }, { "epoch": 0.43178638507733236, "grad_norm": 40.78125, "learning_rate": 9.932533491696931e-06, "loss": 17.0087, "step": 23880 }, { "epoch": 0.43196720014646023, "grad_norm": 40.1875, "learning_rate": 9.932505239390272e-06, "loss": 16.8195, "step": 23890 }, { "epoch": 0.43214801521558804, "grad_norm": 38.15625, "learning_rate": 9.932476987083612e-06, "loss": 16.6816, "step": 23900 }, { "epoch": 0.4323288302847159, "grad_norm": 39.8125, "learning_rate": 9.932448734776951e-06, "loss": 16.6358, "step": 23910 }, { "epoch": 0.4325096453538438, "grad_norm": 40.84375, "learning_rate": 9.932420482470292e-06, "loss": 16.9947, "step": 23920 }, { "epoch": 0.43269046042297166, "grad_norm": 39.96875, "learning_rate": 9.932392230163632e-06, "loss": 17.3373, "step": 23930 }, { "epoch": 0.4328712754920995, "grad_norm": 41.09375, "learning_rate": 9.932363977856973e-06, "loss": 17.1195, "step": 23940 }, { "epoch": 0.4330520905612274, "grad_norm": 38.96875, "learning_rate": 9.932335725550314e-06, "loss": 17.2644, "step": 23950 }, { "epoch": 0.4332329056303552, "grad_norm": 40.5, "learning_rate": 9.932307473243654e-06, "loss": 16.8849, "step": 23960 }, { "epoch": 0.4334137206994831, "grad_norm": 41.40625, "learning_rate": 9.932279220936995e-06, "loss": 17.2072, "step": 23970 }, { "epoch": 0.43359453576861096, "grad_norm": 40.3125, "learning_rate": 9.932250968630335e-06, "loss": 16.8773, "step": 23980 }, { "epoch": 0.4337753508377388, "grad_norm": 42.375, "learning_rate": 9.932222716323674e-06, "loss": 17.1238, "step": 23990 }, { "epoch": 0.4339561659068667, "grad_norm": 41.5625, "learning_rate": 9.932194464017015e-06, "loss": 16.9992, "step": 24000 }, { "epoch": 0.4341369809759945, "grad_norm": 39.96875, "learning_rate": 9.932166211710356e-06, "loss": 17.4453, "step": 24010 }, { "epoch": 0.4343177960451224, "grad_norm": 39.875, "learning_rate": 9.932137959403696e-06, "loss": 16.7997, "step": 24020 }, { "epoch": 0.43449861111425025, "grad_norm": 39.28125, "learning_rate": 9.932109707097037e-06, "loss": 17.2229, "step": 24030 }, { "epoch": 0.4346794261833781, "grad_norm": 42.6875, "learning_rate": 9.932081454790377e-06, "loss": 17.3416, "step": 24040 }, { "epoch": 0.434860241252506, "grad_norm": 41.8125, "learning_rate": 9.932053202483718e-06, "loss": 17.0329, "step": 24050 }, { "epoch": 0.43504105632163387, "grad_norm": 41.96875, "learning_rate": 9.932024950177059e-06, "loss": 16.8602, "step": 24060 }, { "epoch": 0.4352218713907617, "grad_norm": 41.71875, "learning_rate": 9.9319966978704e-06, "loss": 17.5934, "step": 24070 }, { "epoch": 0.43540268645988955, "grad_norm": 41.4375, "learning_rate": 9.931968445563738e-06, "loss": 16.8142, "step": 24080 }, { "epoch": 0.4355835015290174, "grad_norm": 42.09375, "learning_rate": 9.931940193257079e-06, "loss": 17.3197, "step": 24090 }, { "epoch": 0.4357643165981453, "grad_norm": 41.5625, "learning_rate": 9.93191194095042e-06, "loss": 17.4021, "step": 24100 }, { "epoch": 0.43594513166727317, "grad_norm": 41.875, "learning_rate": 9.93188368864376e-06, "loss": 16.9198, "step": 24110 }, { "epoch": 0.43612594673640104, "grad_norm": 40.34375, "learning_rate": 9.9318554363371e-06, "loss": 17.3164, "step": 24120 }, { "epoch": 0.43630676180552885, "grad_norm": 42.25, "learning_rate": 9.931827184030441e-06, "loss": 17.3602, "step": 24130 }, { "epoch": 0.4364875768746567, "grad_norm": 43.96875, "learning_rate": 9.931798931723782e-06, "loss": 17.1367, "step": 24140 }, { "epoch": 0.4366683919437846, "grad_norm": 40.96875, "learning_rate": 9.931770679417123e-06, "loss": 17.2549, "step": 24150 }, { "epoch": 0.43684920701291247, "grad_norm": 40.8125, "learning_rate": 9.931742427110462e-06, "loss": 17.0768, "step": 24160 }, { "epoch": 0.43703002208204034, "grad_norm": 39.5625, "learning_rate": 9.931714174803802e-06, "loss": 16.9547, "step": 24170 }, { "epoch": 0.43721083715116815, "grad_norm": 42.15625, "learning_rate": 9.931685922497143e-06, "loss": 16.8944, "step": 24180 }, { "epoch": 0.437391652220296, "grad_norm": 38.4375, "learning_rate": 9.931657670190483e-06, "loss": 17.1201, "step": 24190 }, { "epoch": 0.4375724672894239, "grad_norm": 40.71875, "learning_rate": 9.931629417883824e-06, "loss": 16.9747, "step": 24200 }, { "epoch": 0.43775328235855177, "grad_norm": 42.3125, "learning_rate": 9.931601165577163e-06, "loss": 16.6857, "step": 24210 }, { "epoch": 0.43793409742767964, "grad_norm": 39.21875, "learning_rate": 9.931572913270505e-06, "loss": 17.3643, "step": 24220 }, { "epoch": 0.4381149124968075, "grad_norm": 43.375, "learning_rate": 9.931544660963846e-06, "loss": 17.2106, "step": 24230 }, { "epoch": 0.4382957275659353, "grad_norm": 38.8125, "learning_rate": 9.931516408657187e-06, "loss": 16.833, "step": 24240 }, { "epoch": 0.4384765426350632, "grad_norm": 43.90625, "learning_rate": 9.931488156350525e-06, "loss": 17.0473, "step": 24250 }, { "epoch": 0.43865735770419106, "grad_norm": 40.3125, "learning_rate": 9.931459904043866e-06, "loss": 16.9058, "step": 24260 }, { "epoch": 0.43883817277331894, "grad_norm": 42.65625, "learning_rate": 9.931431651737207e-06, "loss": 16.9209, "step": 24270 }, { "epoch": 0.4390189878424468, "grad_norm": 43.28125, "learning_rate": 9.931403399430547e-06, "loss": 17.0468, "step": 24280 }, { "epoch": 0.4391998029115747, "grad_norm": 40.25, "learning_rate": 9.931375147123888e-06, "loss": 17.075, "step": 24290 }, { "epoch": 0.4393806179807025, "grad_norm": 39.25, "learning_rate": 9.931346894817229e-06, "loss": 16.9849, "step": 24300 }, { "epoch": 0.43956143304983036, "grad_norm": 39.53125, "learning_rate": 9.93131864251057e-06, "loss": 17.4202, "step": 24310 }, { "epoch": 0.43974224811895823, "grad_norm": 42.21875, "learning_rate": 9.93129039020391e-06, "loss": 17.1817, "step": 24320 }, { "epoch": 0.4399230631880861, "grad_norm": 40.59375, "learning_rate": 9.931262137897249e-06, "loss": 17.1151, "step": 24330 }, { "epoch": 0.440103878257214, "grad_norm": 38.375, "learning_rate": 9.93123388559059e-06, "loss": 16.8332, "step": 24340 }, { "epoch": 0.4402846933263418, "grad_norm": 42.5625, "learning_rate": 9.93120563328393e-06, "loss": 17.2657, "step": 24350 }, { "epoch": 0.44046550839546966, "grad_norm": 45.21875, "learning_rate": 9.93117738097727e-06, "loss": 17.5942, "step": 24360 }, { "epoch": 0.44064632346459753, "grad_norm": 39.375, "learning_rate": 9.931149128670611e-06, "loss": 16.6795, "step": 24370 }, { "epoch": 0.4408271385337254, "grad_norm": 42.25, "learning_rate": 9.931120876363952e-06, "loss": 16.5306, "step": 24380 }, { "epoch": 0.4410079536028533, "grad_norm": 39.78125, "learning_rate": 9.931092624057292e-06, "loss": 16.996, "step": 24390 }, { "epoch": 0.44118876867198115, "grad_norm": 39.53125, "learning_rate": 9.931064371750633e-06, "loss": 17.0845, "step": 24400 }, { "epoch": 0.44136958374110896, "grad_norm": 40.5, "learning_rate": 9.931036119443974e-06, "loss": 16.9601, "step": 24410 }, { "epoch": 0.44155039881023683, "grad_norm": 41.78125, "learning_rate": 9.931007867137313e-06, "loss": 17.1927, "step": 24420 }, { "epoch": 0.4417312138793647, "grad_norm": 43.84375, "learning_rate": 9.930979614830653e-06, "loss": 17.0799, "step": 24430 }, { "epoch": 0.4419120289484926, "grad_norm": 39.3125, "learning_rate": 9.930951362523994e-06, "loss": 17.2743, "step": 24440 }, { "epoch": 0.44209284401762045, "grad_norm": 40.6875, "learning_rate": 9.930923110217335e-06, "loss": 17.1115, "step": 24450 }, { "epoch": 0.4422736590867483, "grad_norm": 42.09375, "learning_rate": 9.930894857910675e-06, "loss": 17.0342, "step": 24460 }, { "epoch": 0.44245447415587613, "grad_norm": 39.78125, "learning_rate": 9.930866605604014e-06, "loss": 17.1333, "step": 24470 }, { "epoch": 0.442635289225004, "grad_norm": 41.8125, "learning_rate": 9.930838353297356e-06, "loss": 17.1999, "step": 24480 }, { "epoch": 0.4428161042941319, "grad_norm": 38.75, "learning_rate": 9.930810100990697e-06, "loss": 17.1589, "step": 24490 }, { "epoch": 0.44299691936325974, "grad_norm": 43.4375, "learning_rate": 9.930781848684038e-06, "loss": 17.0211, "step": 24500 }, { "epoch": 0.4431777344323876, "grad_norm": 40.84375, "learning_rate": 9.930753596377377e-06, "loss": 16.4922, "step": 24510 }, { "epoch": 0.44335854950151543, "grad_norm": 39.8125, "learning_rate": 9.930725344070717e-06, "loss": 16.8577, "step": 24520 }, { "epoch": 0.4435393645706433, "grad_norm": 38.875, "learning_rate": 9.930697091764058e-06, "loss": 17.4463, "step": 24530 }, { "epoch": 0.4437201796397712, "grad_norm": 41.09375, "learning_rate": 9.930668839457398e-06, "loss": 17.0105, "step": 24540 }, { "epoch": 0.44390099470889904, "grad_norm": 38.8125, "learning_rate": 9.930640587150739e-06, "loss": 17.0146, "step": 24550 }, { "epoch": 0.4440818097780269, "grad_norm": 39.25, "learning_rate": 9.930612334844078e-06, "loss": 17.0366, "step": 24560 }, { "epoch": 0.4442626248471548, "grad_norm": 42.21875, "learning_rate": 9.93058408253742e-06, "loss": 16.7516, "step": 24570 }, { "epoch": 0.4444434399162826, "grad_norm": 40.125, "learning_rate": 9.930555830230761e-06, "loss": 16.9998, "step": 24580 }, { "epoch": 0.44462425498541047, "grad_norm": 39.5625, "learning_rate": 9.9305275779241e-06, "loss": 16.992, "step": 24590 }, { "epoch": 0.44480507005453834, "grad_norm": 42.34375, "learning_rate": 9.93049932561744e-06, "loss": 17.2084, "step": 24600 }, { "epoch": 0.4449858851236662, "grad_norm": 41.15625, "learning_rate": 9.930471073310781e-06, "loss": 17.1132, "step": 24610 }, { "epoch": 0.4451667001927941, "grad_norm": 40.78125, "learning_rate": 9.930442821004122e-06, "loss": 17.2638, "step": 24620 }, { "epoch": 0.44534751526192196, "grad_norm": 42.09375, "learning_rate": 9.930414568697462e-06, "loss": 17.0395, "step": 24630 }, { "epoch": 0.44552833033104977, "grad_norm": 40.0, "learning_rate": 9.930386316390801e-06, "loss": 17.0863, "step": 24640 }, { "epoch": 0.44570914540017764, "grad_norm": 43.78125, "learning_rate": 9.930358064084144e-06, "loss": 16.9601, "step": 24650 }, { "epoch": 0.4458899604693055, "grad_norm": 43.9375, "learning_rate": 9.930329811777484e-06, "loss": 17.36, "step": 24660 }, { "epoch": 0.4460707755384334, "grad_norm": 41.6875, "learning_rate": 9.930301559470825e-06, "loss": 17.5581, "step": 24670 }, { "epoch": 0.44625159060756125, "grad_norm": 39.40625, "learning_rate": 9.930273307164164e-06, "loss": 17.0764, "step": 24680 }, { "epoch": 0.44643240567668907, "grad_norm": 37.71875, "learning_rate": 9.930245054857504e-06, "loss": 17.2964, "step": 24690 }, { "epoch": 0.44661322074581694, "grad_norm": 41.09375, "learning_rate": 9.930216802550845e-06, "loss": 16.8406, "step": 24700 }, { "epoch": 0.4467940358149448, "grad_norm": 41.34375, "learning_rate": 9.930188550244186e-06, "loss": 16.8859, "step": 24710 }, { "epoch": 0.4469748508840727, "grad_norm": 39.375, "learning_rate": 9.930160297937526e-06, "loss": 17.282, "step": 24720 }, { "epoch": 0.44715566595320055, "grad_norm": 41.0625, "learning_rate": 9.930132045630865e-06, "loss": 16.5238, "step": 24730 }, { "epoch": 0.4473364810223284, "grad_norm": 40.0625, "learning_rate": 9.930103793324207e-06, "loss": 17.1698, "step": 24740 }, { "epoch": 0.44751729609145624, "grad_norm": 38.5625, "learning_rate": 9.930075541017548e-06, "loss": 17.1585, "step": 24750 }, { "epoch": 0.4476981111605841, "grad_norm": 41.46875, "learning_rate": 9.930047288710887e-06, "loss": 17.5576, "step": 24760 }, { "epoch": 0.447878926229712, "grad_norm": 39.875, "learning_rate": 9.930019036404228e-06, "loss": 17.3201, "step": 24770 }, { "epoch": 0.44805974129883985, "grad_norm": 43.0625, "learning_rate": 9.929990784097568e-06, "loss": 16.9252, "step": 24780 }, { "epoch": 0.4482405563679677, "grad_norm": 42.125, "learning_rate": 9.929962531790909e-06, "loss": 17.2519, "step": 24790 }, { "epoch": 0.4484213714370956, "grad_norm": 38.40625, "learning_rate": 9.92993427948425e-06, "loss": 17.0709, "step": 24800 }, { "epoch": 0.4486021865062234, "grad_norm": 42.59375, "learning_rate": 9.92990602717759e-06, "loss": 17.0579, "step": 24810 }, { "epoch": 0.4487830015753513, "grad_norm": 44.40625, "learning_rate": 9.929877774870929e-06, "loss": 16.8308, "step": 24820 }, { "epoch": 0.44896381664447915, "grad_norm": 39.15625, "learning_rate": 9.929849522564271e-06, "loss": 17.0908, "step": 24830 }, { "epoch": 0.449144631713607, "grad_norm": 40.53125, "learning_rate": 9.929821270257612e-06, "loss": 16.7871, "step": 24840 }, { "epoch": 0.4493254467827349, "grad_norm": 43.03125, "learning_rate": 9.929793017950951e-06, "loss": 17.3479, "step": 24850 }, { "epoch": 0.4495062618518627, "grad_norm": 40.65625, "learning_rate": 9.929764765644292e-06, "loss": 17.52, "step": 24860 }, { "epoch": 0.4496870769209906, "grad_norm": 41.875, "learning_rate": 9.929736513337632e-06, "loss": 17.0901, "step": 24870 }, { "epoch": 0.44986789199011845, "grad_norm": 42.40625, "learning_rate": 9.929708261030973e-06, "loss": 17.2132, "step": 24880 }, { "epoch": 0.4500487070592463, "grad_norm": 39.78125, "learning_rate": 9.929680008724313e-06, "loss": 17.1365, "step": 24890 }, { "epoch": 0.4502295221283742, "grad_norm": 38.59375, "learning_rate": 9.929651756417652e-06, "loss": 17.0028, "step": 24900 }, { "epoch": 0.45041033719750206, "grad_norm": 42.96875, "learning_rate": 9.929623504110993e-06, "loss": 17.0601, "step": 24910 }, { "epoch": 0.4505911522666299, "grad_norm": 39.6875, "learning_rate": 9.929595251804335e-06, "loss": 16.7189, "step": 24920 }, { "epoch": 0.45077196733575775, "grad_norm": 39.0, "learning_rate": 9.929566999497676e-06, "loss": 16.7454, "step": 24930 }, { "epoch": 0.4509527824048856, "grad_norm": 40.4375, "learning_rate": 9.929538747191015e-06, "loss": 17.6337, "step": 24940 }, { "epoch": 0.4511335974740135, "grad_norm": 42.34375, "learning_rate": 9.929510494884355e-06, "loss": 16.8392, "step": 24950 }, { "epoch": 0.45131441254314136, "grad_norm": 43.78125, "learning_rate": 9.929482242577696e-06, "loss": 16.8768, "step": 24960 }, { "epoch": 0.4514952276122692, "grad_norm": 39.84375, "learning_rate": 9.929453990271037e-06, "loss": 17.0484, "step": 24970 }, { "epoch": 0.45167604268139705, "grad_norm": 41.0625, "learning_rate": 9.929425737964377e-06, "loss": 17.2024, "step": 24980 }, { "epoch": 0.4518568577505249, "grad_norm": 39.90625, "learning_rate": 9.929397485657716e-06, "loss": 17.5434, "step": 24990 }, { "epoch": 0.4520376728196528, "grad_norm": 40.40625, "learning_rate": 9.929369233351059e-06, "loss": 16.9544, "step": 25000 }, { "epoch": 0.4520376728196528, "eval_loss": 2.1385836601257324, "eval_runtime": 229.8952, "eval_samples_per_second": 3158.217, "eval_steps_per_second": 49.349, "step": 25000 }, { "epoch": 0.45221848788878066, "grad_norm": 42.25, "learning_rate": 9.9293409810444e-06, "loss": 17.2312, "step": 25010 }, { "epoch": 0.45239930295790853, "grad_norm": 43.40625, "learning_rate": 9.929312728737738e-06, "loss": 16.9233, "step": 25020 }, { "epoch": 0.45258011802703635, "grad_norm": 42.34375, "learning_rate": 9.929284476431079e-06, "loss": 17.1353, "step": 25030 }, { "epoch": 0.4527609330961642, "grad_norm": 41.40625, "learning_rate": 9.92925622412442e-06, "loss": 17.2442, "step": 25040 }, { "epoch": 0.4529417481652921, "grad_norm": 42.09375, "learning_rate": 9.92922797181776e-06, "loss": 17.064, "step": 25050 }, { "epoch": 0.45312256323441996, "grad_norm": 39.34375, "learning_rate": 9.9291997195111e-06, "loss": 17.032, "step": 25060 }, { "epoch": 0.45330337830354783, "grad_norm": 43.34375, "learning_rate": 9.92917146720444e-06, "loss": 17.1714, "step": 25070 }, { "epoch": 0.4534841933726757, "grad_norm": 42.59375, "learning_rate": 9.92914321489778e-06, "loss": 17.0163, "step": 25080 }, { "epoch": 0.4536650084418035, "grad_norm": 41.78125, "learning_rate": 9.929114962591122e-06, "loss": 17.1397, "step": 25090 }, { "epoch": 0.4538458235109314, "grad_norm": 41.6875, "learning_rate": 9.929086710284463e-06, "loss": 16.8583, "step": 25100 }, { "epoch": 0.45402663858005926, "grad_norm": 41.0625, "learning_rate": 9.929058457977802e-06, "loss": 16.8965, "step": 25110 }, { "epoch": 0.45420745364918713, "grad_norm": 42.09375, "learning_rate": 9.929030205671143e-06, "loss": 17.3544, "step": 25120 }, { "epoch": 0.454388268718315, "grad_norm": 38.84375, "learning_rate": 9.929001953364483e-06, "loss": 17.2578, "step": 25130 }, { "epoch": 0.4545690837874428, "grad_norm": 38.90625, "learning_rate": 9.928973701057824e-06, "loss": 17.2073, "step": 25140 }, { "epoch": 0.4547498988565707, "grad_norm": 40.8125, "learning_rate": 9.928945448751165e-06, "loss": 16.996, "step": 25150 }, { "epoch": 0.45493071392569856, "grad_norm": 39.5625, "learning_rate": 9.928917196444503e-06, "loss": 17.1514, "step": 25160 }, { "epoch": 0.45511152899482643, "grad_norm": 39.5625, "learning_rate": 9.928888944137844e-06, "loss": 17.1125, "step": 25170 }, { "epoch": 0.4552923440639543, "grad_norm": 40.34375, "learning_rate": 9.928860691831186e-06, "loss": 16.7267, "step": 25180 }, { "epoch": 0.4554731591330822, "grad_norm": 40.75, "learning_rate": 9.928832439524525e-06, "loss": 17.3858, "step": 25190 }, { "epoch": 0.45565397420221, "grad_norm": 41.125, "learning_rate": 9.928804187217866e-06, "loss": 17.0973, "step": 25200 }, { "epoch": 0.45583478927133786, "grad_norm": 41.25, "learning_rate": 9.928775934911207e-06, "loss": 16.7575, "step": 25210 }, { "epoch": 0.45601560434046573, "grad_norm": 41.28125, "learning_rate": 9.928747682604547e-06, "loss": 16.9319, "step": 25220 }, { "epoch": 0.4561964194095936, "grad_norm": 40.0, "learning_rate": 9.928719430297888e-06, "loss": 16.9408, "step": 25230 }, { "epoch": 0.45637723447872147, "grad_norm": 40.8125, "learning_rate": 9.928691177991228e-06, "loss": 17.4637, "step": 25240 }, { "epoch": 0.45655804954784934, "grad_norm": 38.78125, "learning_rate": 9.928662925684567e-06, "loss": 17.4776, "step": 25250 }, { "epoch": 0.45673886461697716, "grad_norm": 41.125, "learning_rate": 9.928634673377908e-06, "loss": 16.9808, "step": 25260 }, { "epoch": 0.45691967968610503, "grad_norm": 41.40625, "learning_rate": 9.92860642107125e-06, "loss": 16.7948, "step": 25270 }, { "epoch": 0.4571004947552329, "grad_norm": 41.59375, "learning_rate": 9.92857816876459e-06, "loss": 17.1491, "step": 25280 }, { "epoch": 0.45728130982436077, "grad_norm": 41.78125, "learning_rate": 9.92854991645793e-06, "loss": 17.0664, "step": 25290 }, { "epoch": 0.45746212489348864, "grad_norm": 43.46875, "learning_rate": 9.92852166415127e-06, "loss": 17.0108, "step": 25300 }, { "epoch": 0.45764293996261646, "grad_norm": 43.03125, "learning_rate": 9.928493411844611e-06, "loss": 16.851, "step": 25310 }, { "epoch": 0.45782375503174433, "grad_norm": 41.34375, "learning_rate": 9.928465159537952e-06, "loss": 16.9911, "step": 25320 }, { "epoch": 0.4580045701008722, "grad_norm": 39.1875, "learning_rate": 9.92843690723129e-06, "loss": 17.1102, "step": 25330 }, { "epoch": 0.45818538517000007, "grad_norm": 39.75, "learning_rate": 9.928408654924631e-06, "loss": 16.7065, "step": 25340 }, { "epoch": 0.45836620023912794, "grad_norm": 41.59375, "learning_rate": 9.928380402617974e-06, "loss": 17.1513, "step": 25350 }, { "epoch": 0.4585470153082558, "grad_norm": 39.46875, "learning_rate": 9.928352150311314e-06, "loss": 17.343, "step": 25360 }, { "epoch": 0.4587278303773836, "grad_norm": 44.4375, "learning_rate": 9.928323898004653e-06, "loss": 16.954, "step": 25370 }, { "epoch": 0.4589086454465115, "grad_norm": 39.6875, "learning_rate": 9.928295645697994e-06, "loss": 16.6668, "step": 25380 }, { "epoch": 0.45908946051563937, "grad_norm": 41.0625, "learning_rate": 9.928267393391334e-06, "loss": 17.0833, "step": 25390 }, { "epoch": 0.45927027558476724, "grad_norm": 42.625, "learning_rate": 9.928239141084675e-06, "loss": 17.1839, "step": 25400 }, { "epoch": 0.4594510906538951, "grad_norm": 42.90625, "learning_rate": 9.928210888778016e-06, "loss": 17.6705, "step": 25410 }, { "epoch": 0.459631905723023, "grad_norm": 43.0625, "learning_rate": 9.928182636471355e-06, "loss": 17.2035, "step": 25420 }, { "epoch": 0.4598127207921508, "grad_norm": 39.96875, "learning_rate": 9.928154384164695e-06, "loss": 17.355, "step": 25430 }, { "epoch": 0.45999353586127867, "grad_norm": 42.0, "learning_rate": 9.928126131858037e-06, "loss": 17.029, "step": 25440 }, { "epoch": 0.46017435093040654, "grad_norm": 45.03125, "learning_rate": 9.928097879551376e-06, "loss": 16.6355, "step": 25450 }, { "epoch": 0.4603551659995344, "grad_norm": 42.40625, "learning_rate": 9.928069627244717e-06, "loss": 17.3171, "step": 25460 }, { "epoch": 0.4605359810686623, "grad_norm": 41.8125, "learning_rate": 9.928041374938058e-06, "loss": 16.8692, "step": 25470 }, { "epoch": 0.4607167961377901, "grad_norm": 41.21875, "learning_rate": 9.928013122631398e-06, "loss": 17.0069, "step": 25480 }, { "epoch": 0.46089761120691797, "grad_norm": 43.25, "learning_rate": 9.927984870324739e-06, "loss": 17.3876, "step": 25490 }, { "epoch": 0.46107842627604584, "grad_norm": 42.375, "learning_rate": 9.927956618018078e-06, "loss": 16.6849, "step": 25500 }, { "epoch": 0.4612592413451737, "grad_norm": 43.125, "learning_rate": 9.927928365711418e-06, "loss": 17.0812, "step": 25510 }, { "epoch": 0.4614400564143016, "grad_norm": 41.09375, "learning_rate": 9.927900113404759e-06, "loss": 17.2766, "step": 25520 }, { "epoch": 0.46162087148342945, "grad_norm": 41.9375, "learning_rate": 9.927871861098101e-06, "loss": 17.084, "step": 25530 }, { "epoch": 0.46180168655255727, "grad_norm": 38.625, "learning_rate": 9.92784360879144e-06, "loss": 17.2581, "step": 25540 }, { "epoch": 0.46198250162168514, "grad_norm": 41.28125, "learning_rate": 9.927815356484781e-06, "loss": 17.2412, "step": 25550 }, { "epoch": 0.462163316690813, "grad_norm": 41.28125, "learning_rate": 9.927787104178122e-06, "loss": 16.8625, "step": 25560 }, { "epoch": 0.4623441317599409, "grad_norm": 40.6875, "learning_rate": 9.927758851871462e-06, "loss": 17.4807, "step": 25570 }, { "epoch": 0.46252494682906875, "grad_norm": 43.40625, "learning_rate": 9.927730599564803e-06, "loss": 16.9297, "step": 25580 }, { "epoch": 0.4627057618981966, "grad_norm": 42.5, "learning_rate": 9.927702347258142e-06, "loss": 17.1685, "step": 25590 }, { "epoch": 0.46288657696732444, "grad_norm": 40.09375, "learning_rate": 9.927674094951482e-06, "loss": 17.0162, "step": 25600 }, { "epoch": 0.4630673920364523, "grad_norm": 42.0625, "learning_rate": 9.927645842644823e-06, "loss": 17.2933, "step": 25610 }, { "epoch": 0.4632482071055802, "grad_norm": 44.71875, "learning_rate": 9.927617590338164e-06, "loss": 16.9681, "step": 25620 }, { "epoch": 0.46342902217470805, "grad_norm": 39.5625, "learning_rate": 9.927589338031504e-06, "loss": 17.1248, "step": 25630 }, { "epoch": 0.4636098372438359, "grad_norm": 43.9375, "learning_rate": 9.927561085724845e-06, "loss": 17.0732, "step": 25640 }, { "epoch": 0.46379065231296374, "grad_norm": 42.6875, "learning_rate": 9.927532833418185e-06, "loss": 17.0435, "step": 25650 }, { "epoch": 0.4639714673820916, "grad_norm": 43.34375, "learning_rate": 9.927504581111526e-06, "loss": 17.0115, "step": 25660 }, { "epoch": 0.4641522824512195, "grad_norm": 39.28125, "learning_rate": 9.927476328804867e-06, "loss": 16.9102, "step": 25670 }, { "epoch": 0.46433309752034735, "grad_norm": 41.4375, "learning_rate": 9.927448076498206e-06, "loss": 16.9313, "step": 25680 }, { "epoch": 0.4645139125894752, "grad_norm": 41.15625, "learning_rate": 9.927419824191546e-06, "loss": 17.2224, "step": 25690 }, { "epoch": 0.4646947276586031, "grad_norm": 43.75, "learning_rate": 9.927391571884889e-06, "loss": 17.1843, "step": 25700 }, { "epoch": 0.4648755427277309, "grad_norm": 39.625, "learning_rate": 9.927363319578228e-06, "loss": 16.6983, "step": 25710 }, { "epoch": 0.4650563577968588, "grad_norm": 43.5, "learning_rate": 9.927335067271568e-06, "loss": 16.8113, "step": 25720 }, { "epoch": 0.46523717286598665, "grad_norm": 42.0, "learning_rate": 9.927306814964909e-06, "loss": 16.901, "step": 25730 }, { "epoch": 0.4654179879351145, "grad_norm": 39.875, "learning_rate": 9.92727856265825e-06, "loss": 17.2378, "step": 25740 }, { "epoch": 0.4655988030042424, "grad_norm": 40.125, "learning_rate": 9.92725031035159e-06, "loss": 17.2241, "step": 25750 }, { "epoch": 0.46577961807337026, "grad_norm": 40.5625, "learning_rate": 9.927222058044929e-06, "loss": 16.9758, "step": 25760 }, { "epoch": 0.4659604331424981, "grad_norm": 38.53125, "learning_rate": 9.92719380573827e-06, "loss": 16.8353, "step": 25770 }, { "epoch": 0.46614124821162595, "grad_norm": 40.625, "learning_rate": 9.92716555343161e-06, "loss": 16.9377, "step": 25780 }, { "epoch": 0.4663220632807538, "grad_norm": 41.5625, "learning_rate": 9.927137301124952e-06, "loss": 17.0842, "step": 25790 }, { "epoch": 0.4665028783498817, "grad_norm": 41.25, "learning_rate": 9.927109048818291e-06, "loss": 17.0646, "step": 25800 }, { "epoch": 0.46668369341900956, "grad_norm": 42.59375, "learning_rate": 9.927080796511632e-06, "loss": 17.0465, "step": 25810 }, { "epoch": 0.4668645084881374, "grad_norm": 38.1875, "learning_rate": 9.927052544204973e-06, "loss": 16.8757, "step": 25820 }, { "epoch": 0.46704532355726525, "grad_norm": 41.34375, "learning_rate": 9.927024291898313e-06, "loss": 17.617, "step": 25830 }, { "epoch": 0.4672261386263931, "grad_norm": 39.65625, "learning_rate": 9.926996039591654e-06, "loss": 16.8398, "step": 25840 }, { "epoch": 0.467406953695521, "grad_norm": 42.875, "learning_rate": 9.926967787284993e-06, "loss": 16.779, "step": 25850 }, { "epoch": 0.46758776876464886, "grad_norm": 41.75, "learning_rate": 9.926939534978333e-06, "loss": 16.9525, "step": 25860 }, { "epoch": 0.46776858383377673, "grad_norm": 41.3125, "learning_rate": 9.926911282671674e-06, "loss": 16.375, "step": 25870 }, { "epoch": 0.46794939890290455, "grad_norm": 40.71875, "learning_rate": 9.926883030365015e-06, "loss": 17.3035, "step": 25880 }, { "epoch": 0.4681302139720324, "grad_norm": 41.9375, "learning_rate": 9.926854778058355e-06, "loss": 17.2107, "step": 25890 }, { "epoch": 0.4683110290411603, "grad_norm": 45.1875, "learning_rate": 9.926826525751696e-06, "loss": 16.8853, "step": 25900 }, { "epoch": 0.46849184411028816, "grad_norm": 41.125, "learning_rate": 9.926798273445037e-06, "loss": 16.858, "step": 25910 }, { "epoch": 0.46867265917941603, "grad_norm": 40.21875, "learning_rate": 9.926770021138377e-06, "loss": 17.2101, "step": 25920 }, { "epoch": 0.4688534742485439, "grad_norm": 37.15625, "learning_rate": 9.926741768831716e-06, "loss": 16.781, "step": 25930 }, { "epoch": 0.4690342893176717, "grad_norm": 41.28125, "learning_rate": 9.926713516525057e-06, "loss": 16.9944, "step": 25940 }, { "epoch": 0.4692151043867996, "grad_norm": 42.0625, "learning_rate": 9.926685264218397e-06, "loss": 17.245, "step": 25950 }, { "epoch": 0.46939591945592746, "grad_norm": 42.6875, "learning_rate": 9.926657011911738e-06, "loss": 16.9312, "step": 25960 }, { "epoch": 0.46957673452505533, "grad_norm": 42.28125, "learning_rate": 9.926628759605079e-06, "loss": 17.4238, "step": 25970 }, { "epoch": 0.4697575495941832, "grad_norm": 42.34375, "learning_rate": 9.92660050729842e-06, "loss": 16.9905, "step": 25980 }, { "epoch": 0.469938364663311, "grad_norm": 39.40625, "learning_rate": 9.92657225499176e-06, "loss": 16.793, "step": 25990 }, { "epoch": 0.4701191797324389, "grad_norm": 41.15625, "learning_rate": 9.9265440026851e-06, "loss": 17.3917, "step": 26000 }, { "epoch": 0.47029999480156676, "grad_norm": 42.5625, "learning_rate": 9.926515750378441e-06, "loss": 17.1517, "step": 26010 }, { "epoch": 0.4704808098706946, "grad_norm": 39.21875, "learning_rate": 9.92648749807178e-06, "loss": 16.935, "step": 26020 }, { "epoch": 0.4706616249398225, "grad_norm": 41.90625, "learning_rate": 9.92645924576512e-06, "loss": 16.5918, "step": 26030 }, { "epoch": 0.47084244000895037, "grad_norm": 40.25, "learning_rate": 9.926430993458461e-06, "loss": 17.144, "step": 26040 }, { "epoch": 0.4710232550780782, "grad_norm": 39.34375, "learning_rate": 9.926402741151802e-06, "loss": 16.9929, "step": 26050 }, { "epoch": 0.47120407014720606, "grad_norm": 41.75, "learning_rate": 9.926374488845143e-06, "loss": 16.8202, "step": 26060 }, { "epoch": 0.4713848852163339, "grad_norm": 41.8125, "learning_rate": 9.926346236538483e-06, "loss": 16.9627, "step": 26070 }, { "epoch": 0.4715657002854618, "grad_norm": 39.46875, "learning_rate": 9.926317984231824e-06, "loss": 16.9011, "step": 26080 }, { "epoch": 0.47174651535458967, "grad_norm": 41.4375, "learning_rate": 9.926289731925164e-06, "loss": 17.0805, "step": 26090 }, { "epoch": 0.47192733042371754, "grad_norm": 43.375, "learning_rate": 9.926261479618505e-06, "loss": 17.2723, "step": 26100 }, { "epoch": 0.47210814549284535, "grad_norm": 41.875, "learning_rate": 9.926233227311844e-06, "loss": 17.1467, "step": 26110 }, { "epoch": 0.4722889605619732, "grad_norm": 43.25, "learning_rate": 9.926204975005185e-06, "loss": 16.8147, "step": 26120 }, { "epoch": 0.4724697756311011, "grad_norm": 42.03125, "learning_rate": 9.926176722698525e-06, "loss": 16.6152, "step": 26130 }, { "epoch": 0.47265059070022897, "grad_norm": 39.03125, "learning_rate": 9.926148470391866e-06, "loss": 16.9996, "step": 26140 }, { "epoch": 0.47283140576935684, "grad_norm": 41.875, "learning_rate": 9.926120218085206e-06, "loss": 16.8885, "step": 26150 }, { "epoch": 0.47301222083848465, "grad_norm": 40.9375, "learning_rate": 9.926091965778547e-06, "loss": 16.686, "step": 26160 }, { "epoch": 0.4731930359076125, "grad_norm": 40.34375, "learning_rate": 9.926063713471888e-06, "loss": 17.0671, "step": 26170 }, { "epoch": 0.4733738509767404, "grad_norm": 43.71875, "learning_rate": 9.926035461165228e-06, "loss": 16.7281, "step": 26180 }, { "epoch": 0.47355466604586827, "grad_norm": 41.8125, "learning_rate": 9.926007208858567e-06, "loss": 17.1318, "step": 26190 }, { "epoch": 0.47373548111499614, "grad_norm": 40.375, "learning_rate": 9.925978956551908e-06, "loss": 17.2749, "step": 26200 }, { "epoch": 0.473916296184124, "grad_norm": 44.0, "learning_rate": 9.925950704245248e-06, "loss": 17.1022, "step": 26210 }, { "epoch": 0.4740971112532518, "grad_norm": 43.9375, "learning_rate": 9.925922451938589e-06, "loss": 17.0693, "step": 26220 }, { "epoch": 0.4742779263223797, "grad_norm": 41.3125, "learning_rate": 9.92589419963193e-06, "loss": 16.8296, "step": 26230 }, { "epoch": 0.47445874139150757, "grad_norm": 42.03125, "learning_rate": 9.92586594732527e-06, "loss": 17.0046, "step": 26240 }, { "epoch": 0.47463955646063544, "grad_norm": 41.34375, "learning_rate": 9.925837695018611e-06, "loss": 17.1826, "step": 26250 }, { "epoch": 0.4748203715297633, "grad_norm": 41.59375, "learning_rate": 9.925809442711952e-06, "loss": 17.0491, "step": 26260 }, { "epoch": 0.4750011865988912, "grad_norm": 40.875, "learning_rate": 9.925781190405292e-06, "loss": 16.8475, "step": 26270 }, { "epoch": 0.475182001668019, "grad_norm": 41.78125, "learning_rate": 9.925752938098631e-06, "loss": 17.4165, "step": 26280 }, { "epoch": 0.47536281673714686, "grad_norm": 40.59375, "learning_rate": 9.925724685791972e-06, "loss": 17.1112, "step": 26290 }, { "epoch": 0.47554363180627474, "grad_norm": 40.0, "learning_rate": 9.925696433485312e-06, "loss": 17.0229, "step": 26300 }, { "epoch": 0.4757244468754026, "grad_norm": 38.0625, "learning_rate": 9.925668181178653e-06, "loss": 17.1572, "step": 26310 }, { "epoch": 0.4759052619445305, "grad_norm": 39.28125, "learning_rate": 9.925639928871994e-06, "loss": 17.0653, "step": 26320 }, { "epoch": 0.4760860770136583, "grad_norm": 41.5625, "learning_rate": 9.925611676565334e-06, "loss": 16.9044, "step": 26330 }, { "epoch": 0.47626689208278616, "grad_norm": 42.4375, "learning_rate": 9.925583424258675e-06, "loss": 16.9558, "step": 26340 }, { "epoch": 0.47644770715191403, "grad_norm": 40.625, "learning_rate": 9.925555171952015e-06, "loss": 17.1931, "step": 26350 }, { "epoch": 0.4766285222210419, "grad_norm": 39.59375, "learning_rate": 9.925526919645354e-06, "loss": 16.6998, "step": 26360 }, { "epoch": 0.4768093372901698, "grad_norm": 41.0, "learning_rate": 9.925498667338695e-06, "loss": 16.9934, "step": 26370 }, { "epoch": 0.47699015235929765, "grad_norm": 43.125, "learning_rate": 9.925470415032036e-06, "loss": 17.419, "step": 26380 }, { "epoch": 0.47717096742842546, "grad_norm": 41.875, "learning_rate": 9.925442162725376e-06, "loss": 17.275, "step": 26390 }, { "epoch": 0.47735178249755333, "grad_norm": 42.15625, "learning_rate": 9.925413910418717e-06, "loss": 17.2304, "step": 26400 }, { "epoch": 0.4775325975666812, "grad_norm": 39.96875, "learning_rate": 9.925385658112058e-06, "loss": 16.9184, "step": 26410 }, { "epoch": 0.4777134126358091, "grad_norm": 41.96875, "learning_rate": 9.925357405805398e-06, "loss": 16.8243, "step": 26420 }, { "epoch": 0.47789422770493695, "grad_norm": 42.375, "learning_rate": 9.925329153498739e-06, "loss": 17.2043, "step": 26430 }, { "epoch": 0.4780750427740648, "grad_norm": 42.0, "learning_rate": 9.92530090119208e-06, "loss": 16.9562, "step": 26440 }, { "epoch": 0.47825585784319263, "grad_norm": 43.40625, "learning_rate": 9.925272648885418e-06, "loss": 16.9889, "step": 26450 }, { "epoch": 0.4784366729123205, "grad_norm": 38.78125, "learning_rate": 9.925244396578759e-06, "loss": 17.1232, "step": 26460 }, { "epoch": 0.4786174879814484, "grad_norm": 41.96875, "learning_rate": 9.9252161442721e-06, "loss": 17.097, "step": 26470 }, { "epoch": 0.47879830305057625, "grad_norm": 39.59375, "learning_rate": 9.92518789196544e-06, "loss": 16.9876, "step": 26480 }, { "epoch": 0.4789791181197041, "grad_norm": 42.34375, "learning_rate": 9.92515963965878e-06, "loss": 17.3516, "step": 26490 }, { "epoch": 0.47915993318883193, "grad_norm": 40.3125, "learning_rate": 9.925131387352121e-06, "loss": 17.1503, "step": 26500 }, { "epoch": 0.4793407482579598, "grad_norm": 42.375, "learning_rate": 9.925103135045462e-06, "loss": 17.4379, "step": 26510 }, { "epoch": 0.4795215633270877, "grad_norm": 40.9375, "learning_rate": 9.925074882738803e-06, "loss": 16.7705, "step": 26520 }, { "epoch": 0.47970237839621555, "grad_norm": 40.3125, "learning_rate": 9.925046630432142e-06, "loss": 16.8241, "step": 26530 }, { "epoch": 0.4798831934653434, "grad_norm": 45.15625, "learning_rate": 9.925018378125482e-06, "loss": 17.3142, "step": 26540 }, { "epoch": 0.4800640085344713, "grad_norm": 42.59375, "learning_rate": 9.924990125818823e-06, "loss": 16.7404, "step": 26550 }, { "epoch": 0.4802448236035991, "grad_norm": 44.78125, "learning_rate": 9.924961873512163e-06, "loss": 17.6105, "step": 26560 }, { "epoch": 0.480425638672727, "grad_norm": 40.71875, "learning_rate": 9.924933621205504e-06, "loss": 16.9162, "step": 26570 }, { "epoch": 0.48060645374185484, "grad_norm": 42.03125, "learning_rate": 9.924905368898845e-06, "loss": 16.7582, "step": 26580 }, { "epoch": 0.4807872688109827, "grad_norm": 42.40625, "learning_rate": 9.924877116592185e-06, "loss": 17.1185, "step": 26590 }, { "epoch": 0.4809680838801106, "grad_norm": 43.03125, "learning_rate": 9.924848864285526e-06, "loss": 17.1527, "step": 26600 }, { "epoch": 0.48114889894923846, "grad_norm": 43.78125, "learning_rate": 9.924820611978867e-06, "loss": 17.2085, "step": 26610 }, { "epoch": 0.4813297140183663, "grad_norm": 42.09375, "learning_rate": 9.924792359672206e-06, "loss": 17.3011, "step": 26620 }, { "epoch": 0.48151052908749414, "grad_norm": 42.9375, "learning_rate": 9.924764107365546e-06, "loss": 17.2475, "step": 26630 }, { "epoch": 0.481691344156622, "grad_norm": 41.125, "learning_rate": 9.924735855058887e-06, "loss": 17.0385, "step": 26640 }, { "epoch": 0.4818721592257499, "grad_norm": 40.40625, "learning_rate": 9.924707602752227e-06, "loss": 16.8667, "step": 26650 }, { "epoch": 0.48205297429487776, "grad_norm": 42.09375, "learning_rate": 9.924679350445568e-06, "loss": 17.118, "step": 26660 }, { "epoch": 0.48223378936400557, "grad_norm": 39.8125, "learning_rate": 9.924651098138909e-06, "loss": 17.058, "step": 26670 }, { "epoch": 0.48241460443313344, "grad_norm": 41.34375, "learning_rate": 9.92462284583225e-06, "loss": 17.0949, "step": 26680 }, { "epoch": 0.4825954195022613, "grad_norm": 41.5, "learning_rate": 9.92459459352559e-06, "loss": 17.0175, "step": 26690 }, { "epoch": 0.4827762345713892, "grad_norm": 43.53125, "learning_rate": 9.92456634121893e-06, "loss": 16.6859, "step": 26700 }, { "epoch": 0.48295704964051706, "grad_norm": 40.1875, "learning_rate": 9.92453808891227e-06, "loss": 16.7084, "step": 26710 }, { "epoch": 0.4831378647096449, "grad_norm": 40.28125, "learning_rate": 9.92450983660561e-06, "loss": 16.9311, "step": 26720 }, { "epoch": 0.48331867977877274, "grad_norm": 39.1875, "learning_rate": 9.92448158429895e-06, "loss": 17.2466, "step": 26730 }, { "epoch": 0.4834994948479006, "grad_norm": 38.46875, "learning_rate": 9.924453331992291e-06, "loss": 16.9546, "step": 26740 }, { "epoch": 0.4836803099170285, "grad_norm": 40.65625, "learning_rate": 9.924425079685632e-06, "loss": 17.0234, "step": 26750 }, { "epoch": 0.48386112498615635, "grad_norm": 42.78125, "learning_rate": 9.924396827378973e-06, "loss": 17.4127, "step": 26760 }, { "epoch": 0.4840419400552842, "grad_norm": 42.78125, "learning_rate": 9.924368575072313e-06, "loss": 17.1631, "step": 26770 }, { "epoch": 0.48422275512441204, "grad_norm": 42.9375, "learning_rate": 9.924340322765654e-06, "loss": 16.9457, "step": 26780 }, { "epoch": 0.4844035701935399, "grad_norm": 41.4375, "learning_rate": 9.924312070458993e-06, "loss": 17.0132, "step": 26790 }, { "epoch": 0.4845843852626678, "grad_norm": 43.3125, "learning_rate": 9.924283818152333e-06, "loss": 17.3709, "step": 26800 }, { "epoch": 0.48476520033179565, "grad_norm": 41.25, "learning_rate": 9.924255565845674e-06, "loss": 17.4508, "step": 26810 }, { "epoch": 0.4849460154009235, "grad_norm": 42.9375, "learning_rate": 9.924227313539015e-06, "loss": 17.0839, "step": 26820 }, { "epoch": 0.4851268304700514, "grad_norm": 40.5625, "learning_rate": 9.924199061232355e-06, "loss": 17.0203, "step": 26830 }, { "epoch": 0.4853076455391792, "grad_norm": 43.46875, "learning_rate": 9.924170808925696e-06, "loss": 17.0355, "step": 26840 }, { "epoch": 0.4854884606083071, "grad_norm": 42.21875, "learning_rate": 9.924142556619036e-06, "loss": 17.1141, "step": 26850 }, { "epoch": 0.48566927567743495, "grad_norm": 41.03125, "learning_rate": 9.924114304312377e-06, "loss": 17.0769, "step": 26860 }, { "epoch": 0.4858500907465628, "grad_norm": 38.59375, "learning_rate": 9.924086052005718e-06, "loss": 16.6634, "step": 26870 }, { "epoch": 0.4860309058156907, "grad_norm": 44.1875, "learning_rate": 9.924057799699057e-06, "loss": 16.7654, "step": 26880 }, { "epoch": 0.48621172088481857, "grad_norm": 42.90625, "learning_rate": 9.924029547392397e-06, "loss": 17.1133, "step": 26890 }, { "epoch": 0.4863925359539464, "grad_norm": 44.125, "learning_rate": 9.924001295085738e-06, "loss": 17.0373, "step": 26900 }, { "epoch": 0.48657335102307425, "grad_norm": 40.78125, "learning_rate": 9.923973042779078e-06, "loss": 17.2041, "step": 26910 }, { "epoch": 0.4867541660922021, "grad_norm": 44.1875, "learning_rate": 9.923944790472419e-06, "loss": 16.9118, "step": 26920 }, { "epoch": 0.48693498116133, "grad_norm": 43.125, "learning_rate": 9.92391653816576e-06, "loss": 17.4285, "step": 26930 }, { "epoch": 0.48711579623045786, "grad_norm": 41.6875, "learning_rate": 9.9238882858591e-06, "loss": 17.1475, "step": 26940 }, { "epoch": 0.4872966112995857, "grad_norm": 40.625, "learning_rate": 9.923860033552441e-06, "loss": 17.7247, "step": 26950 }, { "epoch": 0.48747742636871355, "grad_norm": 42.71875, "learning_rate": 9.92383178124578e-06, "loss": 17.3309, "step": 26960 }, { "epoch": 0.4876582414378414, "grad_norm": 46.21875, "learning_rate": 9.92380352893912e-06, "loss": 17.1113, "step": 26970 }, { "epoch": 0.4878390565069693, "grad_norm": 41.15625, "learning_rate": 9.923775276632461e-06, "loss": 17.2221, "step": 26980 }, { "epoch": 0.48801987157609716, "grad_norm": 42.15625, "learning_rate": 9.923747024325802e-06, "loss": 17.3459, "step": 26990 }, { "epoch": 0.48820068664522503, "grad_norm": 42.40625, "learning_rate": 9.923718772019142e-06, "loss": 17.1826, "step": 27000 }, { "epoch": 0.48838150171435285, "grad_norm": 42.6875, "learning_rate": 9.923690519712483e-06, "loss": 16.9989, "step": 27010 }, { "epoch": 0.4885623167834807, "grad_norm": 44.6875, "learning_rate": 9.923662267405824e-06, "loss": 17.1884, "step": 27020 }, { "epoch": 0.4887431318526086, "grad_norm": 41.21875, "learning_rate": 9.923634015099164e-06, "loss": 17.0027, "step": 27030 }, { "epoch": 0.48892394692173646, "grad_norm": 43.625, "learning_rate": 9.923605762792505e-06, "loss": 16.6061, "step": 27040 }, { "epoch": 0.48910476199086433, "grad_norm": 42.875, "learning_rate": 9.923577510485844e-06, "loss": 17.0401, "step": 27050 }, { "epoch": 0.4892855770599922, "grad_norm": 41.28125, "learning_rate": 9.923549258179184e-06, "loss": 17.1379, "step": 27060 }, { "epoch": 0.48946639212912, "grad_norm": 40.84375, "learning_rate": 9.923521005872525e-06, "loss": 16.6934, "step": 27070 }, { "epoch": 0.4896472071982479, "grad_norm": 39.03125, "learning_rate": 9.923492753565866e-06, "loss": 16.9134, "step": 27080 }, { "epoch": 0.48982802226737576, "grad_norm": 40.59375, "learning_rate": 9.923464501259206e-06, "loss": 16.6742, "step": 27090 }, { "epoch": 0.49000883733650363, "grad_norm": 40.0625, "learning_rate": 9.923436248952547e-06, "loss": 17.0472, "step": 27100 }, { "epoch": 0.4901896524056315, "grad_norm": 43.65625, "learning_rate": 9.923407996645888e-06, "loss": 17.0729, "step": 27110 }, { "epoch": 0.4903704674747593, "grad_norm": 40.34375, "learning_rate": 9.923379744339228e-06, "loss": 17.1721, "step": 27120 }, { "epoch": 0.4905512825438872, "grad_norm": 41.5, "learning_rate": 9.923351492032569e-06, "loss": 16.6988, "step": 27130 }, { "epoch": 0.49073209761301506, "grad_norm": 38.90625, "learning_rate": 9.923323239725908e-06, "loss": 16.7262, "step": 27140 }, { "epoch": 0.49091291268214293, "grad_norm": 39.6875, "learning_rate": 9.923294987419248e-06, "loss": 17.3932, "step": 27150 }, { "epoch": 0.4910937277512708, "grad_norm": 40.34375, "learning_rate": 9.923266735112589e-06, "loss": 17.2486, "step": 27160 }, { "epoch": 0.4912745428203987, "grad_norm": 41.3125, "learning_rate": 9.92323848280593e-06, "loss": 16.8497, "step": 27170 }, { "epoch": 0.4914553578895265, "grad_norm": 40.625, "learning_rate": 9.92321023049927e-06, "loss": 16.9637, "step": 27180 }, { "epoch": 0.49163617295865436, "grad_norm": 40.125, "learning_rate": 9.92318197819261e-06, "loss": 16.8488, "step": 27190 }, { "epoch": 0.49181698802778223, "grad_norm": 42.0, "learning_rate": 9.923153725885951e-06, "loss": 17.0431, "step": 27200 }, { "epoch": 0.4919978030969101, "grad_norm": 42.21875, "learning_rate": 9.923125473579292e-06, "loss": 16.9398, "step": 27210 }, { "epoch": 0.492178618166038, "grad_norm": 42.71875, "learning_rate": 9.923097221272631e-06, "loss": 17.0839, "step": 27220 }, { "epoch": 0.49235943323516584, "grad_norm": 39.96875, "learning_rate": 9.923068968965972e-06, "loss": 17.1533, "step": 27230 }, { "epoch": 0.49254024830429366, "grad_norm": 41.5625, "learning_rate": 9.923040716659312e-06, "loss": 17.4095, "step": 27240 }, { "epoch": 0.49272106337342153, "grad_norm": 44.03125, "learning_rate": 9.923012464352653e-06, "loss": 16.9864, "step": 27250 }, { "epoch": 0.4929018784425494, "grad_norm": 41.78125, "learning_rate": 9.922984212045993e-06, "loss": 16.8673, "step": 27260 }, { "epoch": 0.4930826935116773, "grad_norm": 44.34375, "learning_rate": 9.922955959739332e-06, "loss": 17.4023, "step": 27270 }, { "epoch": 0.49326350858080514, "grad_norm": 42.40625, "learning_rate": 9.922927707432675e-06, "loss": 17.256, "step": 27280 }, { "epoch": 0.49344432364993296, "grad_norm": 41.9375, "learning_rate": 9.922899455126015e-06, "loss": 16.9397, "step": 27290 }, { "epoch": 0.49362513871906083, "grad_norm": 41.09375, "learning_rate": 9.922871202819356e-06, "loss": 16.7331, "step": 27300 }, { "epoch": 0.4938059537881887, "grad_norm": 43.65625, "learning_rate": 9.922842950512695e-06, "loss": 16.9543, "step": 27310 }, { "epoch": 0.49398676885731657, "grad_norm": 42.15625, "learning_rate": 9.922814698206036e-06, "loss": 17.0799, "step": 27320 }, { "epoch": 0.49416758392644444, "grad_norm": 40.78125, "learning_rate": 9.922786445899376e-06, "loss": 17.436, "step": 27330 }, { "epoch": 0.4943483989955723, "grad_norm": 41.75, "learning_rate": 9.922758193592717e-06, "loss": 17.0082, "step": 27340 }, { "epoch": 0.49452921406470013, "grad_norm": 45.0, "learning_rate": 9.922729941286057e-06, "loss": 16.635, "step": 27350 }, { "epoch": 0.494710029133828, "grad_norm": 43.3125, "learning_rate": 9.922701688979396e-06, "loss": 16.8168, "step": 27360 }, { "epoch": 0.49489084420295587, "grad_norm": 42.03125, "learning_rate": 9.922673436672739e-06, "loss": 17.2225, "step": 27370 }, { "epoch": 0.49507165927208374, "grad_norm": 39.46875, "learning_rate": 9.92264518436608e-06, "loss": 16.5935, "step": 27380 }, { "epoch": 0.4952524743412116, "grad_norm": 41.5625, "learning_rate": 9.922616932059418e-06, "loss": 16.63, "step": 27390 }, { "epoch": 0.4954332894103395, "grad_norm": 39.3125, "learning_rate": 9.922588679752759e-06, "loss": 16.9653, "step": 27400 }, { "epoch": 0.4956141044794673, "grad_norm": 40.1875, "learning_rate": 9.9225604274461e-06, "loss": 17.6395, "step": 27410 }, { "epoch": 0.49579491954859517, "grad_norm": 40.875, "learning_rate": 9.92253217513944e-06, "loss": 17.131, "step": 27420 }, { "epoch": 0.49597573461772304, "grad_norm": 43.71875, "learning_rate": 9.92250392283278e-06, "loss": 17.4824, "step": 27430 }, { "epoch": 0.4961565496868509, "grad_norm": 42.46875, "learning_rate": 9.922475670526121e-06, "loss": 17.3884, "step": 27440 }, { "epoch": 0.4963373647559788, "grad_norm": 39.84375, "learning_rate": 9.922447418219462e-06, "loss": 17.2447, "step": 27450 }, { "epoch": 0.4965181798251066, "grad_norm": 41.96875, "learning_rate": 9.922419165912803e-06, "loss": 16.9101, "step": 27460 }, { "epoch": 0.49669899489423447, "grad_norm": 43.84375, "learning_rate": 9.922390913606143e-06, "loss": 17.1153, "step": 27470 }, { "epoch": 0.49687980996336234, "grad_norm": 42.46875, "learning_rate": 9.922362661299482e-06, "loss": 17.0791, "step": 27480 }, { "epoch": 0.4970606250324902, "grad_norm": 39.0, "learning_rate": 9.922334408992823e-06, "loss": 17.3504, "step": 27490 }, { "epoch": 0.4972414401016181, "grad_norm": 43.40625, "learning_rate": 9.922306156686163e-06, "loss": 17.0333, "step": 27500 }, { "epoch": 0.49742225517074595, "grad_norm": 42.625, "learning_rate": 9.922277904379504e-06, "loss": 16.9336, "step": 27510 }, { "epoch": 0.49760307023987377, "grad_norm": 39.875, "learning_rate": 9.922249652072845e-06, "loss": 16.8369, "step": 27520 }, { "epoch": 0.49778388530900164, "grad_norm": 43.4375, "learning_rate": 9.922221399766184e-06, "loss": 17.2567, "step": 27530 }, { "epoch": 0.4979647003781295, "grad_norm": 39.71875, "learning_rate": 9.922193147459526e-06, "loss": 16.7613, "step": 27540 }, { "epoch": 0.4981455154472574, "grad_norm": 40.59375, "learning_rate": 9.922164895152866e-06, "loss": 17.1967, "step": 27550 }, { "epoch": 0.49832633051638525, "grad_norm": 41.625, "learning_rate": 9.922136642846207e-06, "loss": 17.1377, "step": 27560 }, { "epoch": 0.4985071455855131, "grad_norm": 41.25, "learning_rate": 9.922108390539546e-06, "loss": 16.5317, "step": 27570 }, { "epoch": 0.49868796065464094, "grad_norm": 39.75, "learning_rate": 9.922080138232887e-06, "loss": 17.3832, "step": 27580 }, { "epoch": 0.4988687757237688, "grad_norm": 41.53125, "learning_rate": 9.922051885926227e-06, "loss": 16.9755, "step": 27590 }, { "epoch": 0.4990495907928967, "grad_norm": 44.28125, "learning_rate": 9.922023633619568e-06, "loss": 16.8368, "step": 27600 }, { "epoch": 0.49923040586202455, "grad_norm": 42.4375, "learning_rate": 9.921995381312908e-06, "loss": 16.3951, "step": 27610 }, { "epoch": 0.4994112209311524, "grad_norm": 43.375, "learning_rate": 9.921967129006247e-06, "loss": 17.0727, "step": 27620 }, { "epoch": 0.49959203600028024, "grad_norm": 39.375, "learning_rate": 9.92193887669959e-06, "loss": 16.7404, "step": 27630 }, { "epoch": 0.4997728510694081, "grad_norm": 42.5625, "learning_rate": 9.92191062439293e-06, "loss": 16.76, "step": 27640 }, { "epoch": 0.499953666138536, "grad_norm": 42.21875, "learning_rate": 9.92188237208627e-06, "loss": 17.0706, "step": 27650 }, { "epoch": 0.5001344812076638, "grad_norm": 41.40625, "learning_rate": 9.92185411977961e-06, "loss": 16.7611, "step": 27660 }, { "epoch": 0.5003152962767917, "grad_norm": 39.84375, "learning_rate": 9.92182586747295e-06, "loss": 17.0832, "step": 27670 }, { "epoch": 0.5004961113459195, "grad_norm": 40.1875, "learning_rate": 9.921797615166291e-06, "loss": 16.9609, "step": 27680 }, { "epoch": 0.5006769264150475, "grad_norm": 40.0, "learning_rate": 9.921769362859632e-06, "loss": 17.1072, "step": 27690 }, { "epoch": 0.5008577414841753, "grad_norm": 39.40625, "learning_rate": 9.92174111055297e-06, "loss": 16.9236, "step": 27700 }, { "epoch": 0.5010385565533032, "grad_norm": 40.75, "learning_rate": 9.921712858246311e-06, "loss": 16.9665, "step": 27710 }, { "epoch": 0.501219371622431, "grad_norm": 42.8125, "learning_rate": 9.921684605939654e-06, "loss": 16.732, "step": 27720 }, { "epoch": 0.5014001866915588, "grad_norm": 39.90625, "learning_rate": 9.921656353632994e-06, "loss": 16.7196, "step": 27730 }, { "epoch": 0.5015810017606868, "grad_norm": 42.0625, "learning_rate": 9.921628101326333e-06, "loss": 17.0108, "step": 27740 }, { "epoch": 0.5017618168298146, "grad_norm": 40.25, "learning_rate": 9.921599849019674e-06, "loss": 16.9423, "step": 27750 }, { "epoch": 0.5019426318989425, "grad_norm": 39.46875, "learning_rate": 9.921571596713014e-06, "loss": 17.2849, "step": 27760 }, { "epoch": 0.5021234469680703, "grad_norm": 41.4375, "learning_rate": 9.921543344406355e-06, "loss": 17.3861, "step": 27770 }, { "epoch": 0.5023042620371981, "grad_norm": 43.09375, "learning_rate": 9.921515092099696e-06, "loss": 16.6662, "step": 27780 }, { "epoch": 0.5024850771063261, "grad_norm": 41.21875, "learning_rate": 9.921486839793035e-06, "loss": 16.7922, "step": 27790 }, { "epoch": 0.5026658921754539, "grad_norm": 41.9375, "learning_rate": 9.921458587486377e-06, "loss": 16.8959, "step": 27800 }, { "epoch": 0.5028467072445818, "grad_norm": 40.15625, "learning_rate": 9.921430335179718e-06, "loss": 17.2021, "step": 27810 }, { "epoch": 0.5030275223137096, "grad_norm": 40.6875, "learning_rate": 9.921402082873056e-06, "loss": 16.8186, "step": 27820 }, { "epoch": 0.5032083373828374, "grad_norm": 42.03125, "learning_rate": 9.921373830566397e-06, "loss": 16.8509, "step": 27830 }, { "epoch": 0.5033891524519654, "grad_norm": 41.09375, "learning_rate": 9.921345578259738e-06, "loss": 16.794, "step": 27840 }, { "epoch": 0.5035699675210932, "grad_norm": 40.5, "learning_rate": 9.921317325953078e-06, "loss": 17.2108, "step": 27850 }, { "epoch": 0.5037507825902211, "grad_norm": 41.84375, "learning_rate": 9.921289073646419e-06, "loss": 16.9348, "step": 27860 }, { "epoch": 0.5039315976593489, "grad_norm": 41.875, "learning_rate": 9.92126082133976e-06, "loss": 17.0763, "step": 27870 }, { "epoch": 0.5041124127284768, "grad_norm": 42.28125, "learning_rate": 9.921232569033099e-06, "loss": 16.7722, "step": 27880 }, { "epoch": 0.5042932277976047, "grad_norm": 40.53125, "learning_rate": 9.92120431672644e-06, "loss": 17.0283, "step": 27890 }, { "epoch": 0.5044740428667325, "grad_norm": 41.6875, "learning_rate": 9.921176064419781e-06, "loss": 17.3908, "step": 27900 }, { "epoch": 0.5046548579358604, "grad_norm": 44.21875, "learning_rate": 9.92114781211312e-06, "loss": 17.0255, "step": 27910 }, { "epoch": 0.5048356730049882, "grad_norm": 41.59375, "learning_rate": 9.921119559806461e-06, "loss": 16.8385, "step": 27920 }, { "epoch": 0.5050164880741161, "grad_norm": 40.25, "learning_rate": 9.921091307499802e-06, "loss": 17.0742, "step": 27930 }, { "epoch": 0.505197303143244, "grad_norm": 45.1875, "learning_rate": 9.921063055193142e-06, "loss": 17.5109, "step": 27940 }, { "epoch": 0.5053781182123718, "grad_norm": 44.15625, "learning_rate": 9.921034802886483e-06, "loss": 16.946, "step": 27950 }, { "epoch": 0.5055589332814997, "grad_norm": 42.1875, "learning_rate": 9.921006550579822e-06, "loss": 16.8611, "step": 27960 }, { "epoch": 0.5057397483506275, "grad_norm": 42.59375, "learning_rate": 9.920978298273162e-06, "loss": 16.8588, "step": 27970 }, { "epoch": 0.5059205634197554, "grad_norm": 41.0625, "learning_rate": 9.920950045966505e-06, "loss": 17.1637, "step": 27980 }, { "epoch": 0.5061013784888833, "grad_norm": 42.53125, "learning_rate": 9.920921793659845e-06, "loss": 16.828, "step": 27990 }, { "epoch": 0.5062821935580111, "grad_norm": 39.46875, "learning_rate": 9.920893541353184e-06, "loss": 17.0082, "step": 28000 }, { "epoch": 0.506463008627139, "grad_norm": 43.0625, "learning_rate": 9.920865289046525e-06, "loss": 17.4609, "step": 28010 }, { "epoch": 0.5066438236962668, "grad_norm": 44.125, "learning_rate": 9.920837036739866e-06, "loss": 17.0481, "step": 28020 }, { "epoch": 0.5068246387653947, "grad_norm": 41.65625, "learning_rate": 9.920808784433206e-06, "loss": 17.2568, "step": 28030 }, { "epoch": 0.5070054538345226, "grad_norm": 38.78125, "learning_rate": 9.920780532126547e-06, "loss": 17.1309, "step": 28040 }, { "epoch": 0.5071862689036505, "grad_norm": 40.90625, "learning_rate": 9.920752279819886e-06, "loss": 16.5908, "step": 28050 }, { "epoch": 0.5073670839727783, "grad_norm": 42.375, "learning_rate": 9.920724027513226e-06, "loss": 16.8779, "step": 28060 }, { "epoch": 0.5075478990419061, "grad_norm": 41.28125, "learning_rate": 9.920695775206569e-06, "loss": 17.1186, "step": 28070 }, { "epoch": 0.507728714111034, "grad_norm": 40.0, "learning_rate": 9.920667522899908e-06, "loss": 16.7213, "step": 28080 }, { "epoch": 0.5079095291801619, "grad_norm": 42.9375, "learning_rate": 9.920639270593248e-06, "loss": 17.0304, "step": 28090 }, { "epoch": 0.5080903442492898, "grad_norm": 44.5, "learning_rate": 9.920611018286589e-06, "loss": 17.2263, "step": 28100 }, { "epoch": 0.5082711593184176, "grad_norm": 39.625, "learning_rate": 9.92058276597993e-06, "loss": 17.4506, "step": 28110 }, { "epoch": 0.5084519743875454, "grad_norm": 46.53125, "learning_rate": 9.92055451367327e-06, "loss": 17.0361, "step": 28120 }, { "epoch": 0.5086327894566733, "grad_norm": 43.03125, "learning_rate": 9.920526261366609e-06, "loss": 17.1712, "step": 28130 }, { "epoch": 0.5088136045258012, "grad_norm": 44.75, "learning_rate": 9.92049800905995e-06, "loss": 17.1423, "step": 28140 }, { "epoch": 0.5089944195949291, "grad_norm": 42.34375, "learning_rate": 9.92046975675329e-06, "loss": 16.952, "step": 28150 }, { "epoch": 0.5091752346640569, "grad_norm": 43.34375, "learning_rate": 9.920441504446633e-06, "loss": 17.1141, "step": 28160 }, { "epoch": 0.5093560497331847, "grad_norm": 37.75, "learning_rate": 9.920413252139971e-06, "loss": 17.0306, "step": 28170 }, { "epoch": 0.5095368648023126, "grad_norm": 42.21875, "learning_rate": 9.920384999833312e-06, "loss": 17.3226, "step": 28180 }, { "epoch": 0.5097176798714405, "grad_norm": 42.4375, "learning_rate": 9.920356747526653e-06, "loss": 17.2329, "step": 28190 }, { "epoch": 0.5098984949405684, "grad_norm": 39.40625, "learning_rate": 9.920328495219993e-06, "loss": 17.0066, "step": 28200 }, { "epoch": 0.5100793100096962, "grad_norm": 41.0625, "learning_rate": 9.920300242913334e-06, "loss": 17.5045, "step": 28210 }, { "epoch": 0.5102601250788241, "grad_norm": 41.375, "learning_rate": 9.920271990606673e-06, "loss": 16.7508, "step": 28220 }, { "epoch": 0.5104409401479519, "grad_norm": 41.71875, "learning_rate": 9.920243738300014e-06, "loss": 17.258, "step": 28230 }, { "epoch": 0.5106217552170798, "grad_norm": 42.625, "learning_rate": 9.920215485993356e-06, "loss": 17.1619, "step": 28240 }, { "epoch": 0.5108025702862077, "grad_norm": 40.21875, "learning_rate": 9.920187233686695e-06, "loss": 16.9134, "step": 28250 }, { "epoch": 0.5109833853553355, "grad_norm": 41.75, "learning_rate": 9.920158981380035e-06, "loss": 17.2607, "step": 28260 }, { "epoch": 0.5111642004244634, "grad_norm": 45.40625, "learning_rate": 9.920130729073376e-06, "loss": 16.8008, "step": 28270 }, { "epoch": 0.5113450154935912, "grad_norm": 41.15625, "learning_rate": 9.920102476766717e-06, "loss": 17.2442, "step": 28280 }, { "epoch": 0.511525830562719, "grad_norm": 41.28125, "learning_rate": 9.920074224460057e-06, "loss": 16.9537, "step": 28290 }, { "epoch": 0.511706645631847, "grad_norm": 41.75, "learning_rate": 9.920045972153398e-06, "loss": 16.9558, "step": 28300 }, { "epoch": 0.5118874607009748, "grad_norm": 41.09375, "learning_rate": 9.920017719846737e-06, "loss": 16.932, "step": 28310 }, { "epoch": 0.5120682757701027, "grad_norm": 40.90625, "learning_rate": 9.919989467540077e-06, "loss": 17.1713, "step": 28320 }, { "epoch": 0.5122490908392305, "grad_norm": 41.0625, "learning_rate": 9.91996121523342e-06, "loss": 17.1424, "step": 28330 }, { "epoch": 0.5124299059083584, "grad_norm": 40.3125, "learning_rate": 9.919932962926759e-06, "loss": 16.8539, "step": 28340 }, { "epoch": 0.5126107209774863, "grad_norm": 38.71875, "learning_rate": 9.9199047106201e-06, "loss": 17.0396, "step": 28350 }, { "epoch": 0.5127915360466141, "grad_norm": 42.96875, "learning_rate": 9.91987645831344e-06, "loss": 16.9008, "step": 28360 }, { "epoch": 0.512972351115742, "grad_norm": 43.1875, "learning_rate": 9.91984820600678e-06, "loss": 16.977, "step": 28370 }, { "epoch": 0.5131531661848698, "grad_norm": 40.5, "learning_rate": 9.919819953700121e-06, "loss": 16.8402, "step": 28380 }, { "epoch": 0.5133339812539978, "grad_norm": 41.4375, "learning_rate": 9.91979170139346e-06, "loss": 17.1193, "step": 28390 }, { "epoch": 0.5135147963231256, "grad_norm": 40.40625, "learning_rate": 9.9197634490868e-06, "loss": 17.002, "step": 28400 }, { "epoch": 0.5136956113922534, "grad_norm": 43.6875, "learning_rate": 9.919735196780141e-06, "loss": 17.2712, "step": 28410 }, { "epoch": 0.5138764264613813, "grad_norm": 41.75, "learning_rate": 9.919706944473484e-06, "loss": 17.0316, "step": 28420 }, { "epoch": 0.5140572415305091, "grad_norm": 40.3125, "learning_rate": 9.919678692166823e-06, "loss": 17.0029, "step": 28430 }, { "epoch": 0.5142380565996371, "grad_norm": 39.40625, "learning_rate": 9.919650439860163e-06, "loss": 16.7383, "step": 28440 }, { "epoch": 0.5144188716687649, "grad_norm": 42.25, "learning_rate": 9.919622187553504e-06, "loss": 16.9989, "step": 28450 }, { "epoch": 0.5145996867378927, "grad_norm": 41.84375, "learning_rate": 9.919593935246844e-06, "loss": 17.0851, "step": 28460 }, { "epoch": 0.5147805018070206, "grad_norm": 40.75, "learning_rate": 9.919565682940185e-06, "loss": 16.9262, "step": 28470 }, { "epoch": 0.5149613168761484, "grad_norm": 39.3125, "learning_rate": 9.919537430633524e-06, "loss": 16.9286, "step": 28480 }, { "epoch": 0.5151421319452764, "grad_norm": 40.75, "learning_rate": 9.919509178326865e-06, "loss": 16.9974, "step": 28490 }, { "epoch": 0.5153229470144042, "grad_norm": 43.125, "learning_rate": 9.919480926020205e-06, "loss": 17.1487, "step": 28500 }, { "epoch": 0.515503762083532, "grad_norm": 38.4375, "learning_rate": 9.919452673713546e-06, "loss": 16.7718, "step": 28510 }, { "epoch": 0.5156845771526599, "grad_norm": 41.71875, "learning_rate": 9.919424421406886e-06, "loss": 16.9833, "step": 28520 }, { "epoch": 0.5158653922217877, "grad_norm": 42.40625, "learning_rate": 9.919396169100227e-06, "loss": 17.0211, "step": 28530 }, { "epoch": 0.5160462072909157, "grad_norm": 43.3125, "learning_rate": 9.919367916793568e-06, "loss": 17.1867, "step": 28540 }, { "epoch": 0.5162270223600435, "grad_norm": 42.90625, "learning_rate": 9.919339664486908e-06, "loss": 16.7948, "step": 28550 }, { "epoch": 0.5164078374291714, "grad_norm": 41.75, "learning_rate": 9.919311412180247e-06, "loss": 17.1321, "step": 28560 }, { "epoch": 0.5165886524982992, "grad_norm": 41.4375, "learning_rate": 9.919283159873588e-06, "loss": 17.0192, "step": 28570 }, { "epoch": 0.516769467567427, "grad_norm": 42.25, "learning_rate": 9.919254907566929e-06, "loss": 16.7417, "step": 28580 }, { "epoch": 0.516950282636555, "grad_norm": 41.5625, "learning_rate": 9.91922665526027e-06, "loss": 17.0249, "step": 28590 }, { "epoch": 0.5171310977056828, "grad_norm": 38.75, "learning_rate": 9.91919840295361e-06, "loss": 16.8818, "step": 28600 }, { "epoch": 0.5173119127748107, "grad_norm": 43.875, "learning_rate": 9.91917015064695e-06, "loss": 17.1732, "step": 28610 }, { "epoch": 0.5174927278439385, "grad_norm": 41.8125, "learning_rate": 9.919141898340291e-06, "loss": 17.4947, "step": 28620 }, { "epoch": 0.5176735429130663, "grad_norm": 40.1875, "learning_rate": 9.919113646033632e-06, "loss": 17.4457, "step": 28630 }, { "epoch": 0.5178543579821943, "grad_norm": 42.3125, "learning_rate": 9.919085393726972e-06, "loss": 16.9908, "step": 28640 }, { "epoch": 0.5180351730513221, "grad_norm": 40.46875, "learning_rate": 9.919057141420311e-06, "loss": 16.6074, "step": 28650 }, { "epoch": 0.51821598812045, "grad_norm": 42.625, "learning_rate": 9.919028889113652e-06, "loss": 16.7887, "step": 28660 }, { "epoch": 0.5183968031895778, "grad_norm": 42.1875, "learning_rate": 9.919000636806992e-06, "loss": 17.326, "step": 28670 }, { "epoch": 0.5185776182587056, "grad_norm": 40.25, "learning_rate": 9.918972384500333e-06, "loss": 16.7892, "step": 28680 }, { "epoch": 0.5187584333278336, "grad_norm": 40.25, "learning_rate": 9.918944132193674e-06, "loss": 17.2271, "step": 28690 }, { "epoch": 0.5189392483969614, "grad_norm": 41.90625, "learning_rate": 9.918915879887014e-06, "loss": 17.1251, "step": 28700 }, { "epoch": 0.5191200634660893, "grad_norm": 41.625, "learning_rate": 9.918887627580355e-06, "loss": 16.8376, "step": 28710 }, { "epoch": 0.5193008785352171, "grad_norm": 40.90625, "learning_rate": 9.918859375273696e-06, "loss": 17.0451, "step": 28720 }, { "epoch": 0.519481693604345, "grad_norm": 41.71875, "learning_rate": 9.918831122967034e-06, "loss": 17.2237, "step": 28730 }, { "epoch": 0.5196625086734729, "grad_norm": 40.65625, "learning_rate": 9.918802870660375e-06, "loss": 17.1952, "step": 28740 }, { "epoch": 0.5198433237426007, "grad_norm": 41.53125, "learning_rate": 9.918774618353716e-06, "loss": 16.9982, "step": 28750 }, { "epoch": 0.5200241388117286, "grad_norm": 40.125, "learning_rate": 9.918746366047056e-06, "loss": 17.1002, "step": 28760 }, { "epoch": 0.5202049538808564, "grad_norm": 43.25, "learning_rate": 9.918718113740397e-06, "loss": 16.8707, "step": 28770 }, { "epoch": 0.5203857689499843, "grad_norm": 41.3125, "learning_rate": 9.918689861433738e-06, "loss": 17.2035, "step": 28780 }, { "epoch": 0.5205665840191122, "grad_norm": 45.375, "learning_rate": 9.918661609127078e-06, "loss": 16.9605, "step": 28790 }, { "epoch": 0.52074739908824, "grad_norm": 41.53125, "learning_rate": 9.918633356820419e-06, "loss": 17.3515, "step": 28800 }, { "epoch": 0.5209282141573679, "grad_norm": 43.5625, "learning_rate": 9.91860510451376e-06, "loss": 16.8148, "step": 28810 }, { "epoch": 0.5211090292264957, "grad_norm": 42.75, "learning_rate": 9.918576852207098e-06, "loss": 16.5191, "step": 28820 }, { "epoch": 0.5212898442956236, "grad_norm": 42.96875, "learning_rate": 9.918548599900439e-06, "loss": 17.1934, "step": 28830 }, { "epoch": 0.5214706593647515, "grad_norm": 41.5, "learning_rate": 9.91852034759378e-06, "loss": 17.5222, "step": 28840 }, { "epoch": 0.5216514744338793, "grad_norm": 43.09375, "learning_rate": 9.91849209528712e-06, "loss": 17.0393, "step": 28850 }, { "epoch": 0.5218322895030072, "grad_norm": 44.875, "learning_rate": 9.918463842980461e-06, "loss": 17.2174, "step": 28860 }, { "epoch": 0.522013104572135, "grad_norm": 40.6875, "learning_rate": 9.918435590673801e-06, "loss": 17.1666, "step": 28870 }, { "epoch": 0.5221939196412629, "grad_norm": 40.65625, "learning_rate": 9.918407338367142e-06, "loss": 16.7025, "step": 28880 }, { "epoch": 0.5223747347103908, "grad_norm": 40.5, "learning_rate": 9.918379086060483e-06, "loss": 16.7877, "step": 28890 }, { "epoch": 0.5225555497795187, "grad_norm": 43.71875, "learning_rate": 9.918350833753823e-06, "loss": 17.1532, "step": 28900 }, { "epoch": 0.5227363648486465, "grad_norm": 40.96875, "learning_rate": 9.918322581447162e-06, "loss": 17.0564, "step": 28910 }, { "epoch": 0.5229171799177743, "grad_norm": 42.96875, "learning_rate": 9.918294329140503e-06, "loss": 16.9364, "step": 28920 }, { "epoch": 0.5230979949869022, "grad_norm": 43.40625, "learning_rate": 9.918266076833844e-06, "loss": 17.3431, "step": 28930 }, { "epoch": 0.52327881005603, "grad_norm": 43.53125, "learning_rate": 9.918237824527184e-06, "loss": 16.791, "step": 28940 }, { "epoch": 0.523459625125158, "grad_norm": 40.6875, "learning_rate": 9.918209572220525e-06, "loss": 17.3021, "step": 28950 }, { "epoch": 0.5236404401942858, "grad_norm": 41.40625, "learning_rate": 9.918181319913865e-06, "loss": 16.802, "step": 28960 }, { "epoch": 0.5238212552634136, "grad_norm": 43.5625, "learning_rate": 9.918153067607206e-06, "loss": 17.1349, "step": 28970 }, { "epoch": 0.5240020703325415, "grad_norm": 44.0625, "learning_rate": 9.918124815300547e-06, "loss": 16.9321, "step": 28980 }, { "epoch": 0.5241828854016694, "grad_norm": 41.25, "learning_rate": 9.918096562993886e-06, "loss": 17.3189, "step": 28990 }, { "epoch": 0.5243637004707973, "grad_norm": 42.96875, "learning_rate": 9.918068310687226e-06, "loss": 16.9273, "step": 29000 }, { "epoch": 0.5245445155399251, "grad_norm": 40.875, "learning_rate": 9.918040058380567e-06, "loss": 17.2656, "step": 29010 }, { "epoch": 0.5247253306090529, "grad_norm": 41.90625, "learning_rate": 9.918011806073907e-06, "loss": 17.2891, "step": 29020 }, { "epoch": 0.5249061456781808, "grad_norm": 43.78125, "learning_rate": 9.917983553767248e-06, "loss": 16.7634, "step": 29030 }, { "epoch": 0.5250869607473087, "grad_norm": 44.40625, "learning_rate": 9.917955301460589e-06, "loss": 16.9445, "step": 29040 }, { "epoch": 0.5252677758164366, "grad_norm": 41.09375, "learning_rate": 9.91792704915393e-06, "loss": 17.0999, "step": 29050 }, { "epoch": 0.5254485908855644, "grad_norm": 42.0625, "learning_rate": 9.91789879684727e-06, "loss": 17.1245, "step": 29060 }, { "epoch": 0.5256294059546923, "grad_norm": 42.09375, "learning_rate": 9.91787054454061e-06, "loss": 16.812, "step": 29070 }, { "epoch": 0.5258102210238201, "grad_norm": 38.9375, "learning_rate": 9.91784229223395e-06, "loss": 17.0611, "step": 29080 }, { "epoch": 0.525991036092948, "grad_norm": 40.53125, "learning_rate": 9.91781403992729e-06, "loss": 17.081, "step": 29090 }, { "epoch": 0.5261718511620759, "grad_norm": 40.90625, "learning_rate": 9.91778578762063e-06, "loss": 16.5785, "step": 29100 }, { "epoch": 0.5263526662312037, "grad_norm": 40.1875, "learning_rate": 9.917757535313971e-06, "loss": 16.8719, "step": 29110 }, { "epoch": 0.5265334813003316, "grad_norm": 42.1875, "learning_rate": 9.917729283007312e-06, "loss": 16.9369, "step": 29120 }, { "epoch": 0.5267142963694594, "grad_norm": 39.15625, "learning_rate": 9.917701030700653e-06, "loss": 17.2821, "step": 29130 }, { "epoch": 0.5268951114385872, "grad_norm": 38.15625, "learning_rate": 9.917672778393993e-06, "loss": 16.6713, "step": 29140 }, { "epoch": 0.5270759265077152, "grad_norm": 41.75, "learning_rate": 9.917644526087334e-06, "loss": 16.9598, "step": 29150 }, { "epoch": 0.527256741576843, "grad_norm": 38.96875, "learning_rate": 9.917616273780673e-06, "loss": 17.0933, "step": 29160 }, { "epoch": 0.5274375566459709, "grad_norm": 38.3125, "learning_rate": 9.917588021474013e-06, "loss": 17.3192, "step": 29170 }, { "epoch": 0.5276183717150987, "grad_norm": 42.65625, "learning_rate": 9.917559769167354e-06, "loss": 17.0815, "step": 29180 }, { "epoch": 0.5277991867842265, "grad_norm": 42.65625, "learning_rate": 9.917531516860695e-06, "loss": 17.0196, "step": 29190 }, { "epoch": 0.5279800018533545, "grad_norm": 39.875, "learning_rate": 9.917503264554035e-06, "loss": 16.7482, "step": 29200 }, { "epoch": 0.5281608169224823, "grad_norm": 43.65625, "learning_rate": 9.917475012247376e-06, "loss": 16.6207, "step": 29210 }, { "epoch": 0.5283416319916102, "grad_norm": 41.65625, "learning_rate": 9.917446759940716e-06, "loss": 17.0133, "step": 29220 }, { "epoch": 0.528522447060738, "grad_norm": 43.28125, "learning_rate": 9.917418507634057e-06, "loss": 17.2084, "step": 29230 }, { "epoch": 0.528703262129866, "grad_norm": 42.84375, "learning_rate": 9.917390255327398e-06, "loss": 17.0293, "step": 29240 }, { "epoch": 0.5288840771989938, "grad_norm": 41.03125, "learning_rate": 9.917362003020737e-06, "loss": 16.935, "step": 29250 }, { "epoch": 0.5290648922681216, "grad_norm": 41.9375, "learning_rate": 9.917333750714077e-06, "loss": 16.9455, "step": 29260 }, { "epoch": 0.5292457073372495, "grad_norm": 41.65625, "learning_rate": 9.917305498407418e-06, "loss": 17.1935, "step": 29270 }, { "epoch": 0.5294265224063773, "grad_norm": 40.625, "learning_rate": 9.917277246100759e-06, "loss": 17.0945, "step": 29280 }, { "epoch": 0.5296073374755053, "grad_norm": 40.59375, "learning_rate": 9.9172489937941e-06, "loss": 17.1142, "step": 29290 }, { "epoch": 0.5297881525446331, "grad_norm": 40.5625, "learning_rate": 9.91722074148744e-06, "loss": 17.0181, "step": 29300 }, { "epoch": 0.5299689676137609, "grad_norm": 41.40625, "learning_rate": 9.91719248918078e-06, "loss": 17.0982, "step": 29310 }, { "epoch": 0.5301497826828888, "grad_norm": 42.0625, "learning_rate": 9.917164236874121e-06, "loss": 17.1721, "step": 29320 }, { "epoch": 0.5303305977520166, "grad_norm": 40.5625, "learning_rate": 9.917135984567462e-06, "loss": 17.2262, "step": 29330 }, { "epoch": 0.5305114128211446, "grad_norm": 42.40625, "learning_rate": 9.9171077322608e-06, "loss": 17.0821, "step": 29340 }, { "epoch": 0.5306922278902724, "grad_norm": 41.96875, "learning_rate": 9.917079479954141e-06, "loss": 17.1578, "step": 29350 }, { "epoch": 0.5308730429594002, "grad_norm": 42.25, "learning_rate": 9.917051227647482e-06, "loss": 17.4814, "step": 29360 }, { "epoch": 0.5310538580285281, "grad_norm": 41.40625, "learning_rate": 9.917022975340822e-06, "loss": 17.022, "step": 29370 }, { "epoch": 0.5312346730976559, "grad_norm": 41.5, "learning_rate": 9.916994723034163e-06, "loss": 17.2217, "step": 29380 }, { "epoch": 0.5314154881667839, "grad_norm": 42.28125, "learning_rate": 9.916966470727504e-06, "loss": 17.1552, "step": 29390 }, { "epoch": 0.5315963032359117, "grad_norm": 44.09375, "learning_rate": 9.916938218420844e-06, "loss": 16.6741, "step": 29400 }, { "epoch": 0.5317771183050395, "grad_norm": 44.03125, "learning_rate": 9.916909966114185e-06, "loss": 17.2433, "step": 29410 }, { "epoch": 0.5319579333741674, "grad_norm": 46.09375, "learning_rate": 9.916881713807524e-06, "loss": 16.9512, "step": 29420 }, { "epoch": 0.5321387484432952, "grad_norm": 45.71875, "learning_rate": 9.916853461500864e-06, "loss": 17.1226, "step": 29430 }, { "epoch": 0.5323195635124232, "grad_norm": 41.3125, "learning_rate": 9.916825209194205e-06, "loss": 16.9795, "step": 29440 }, { "epoch": 0.532500378581551, "grad_norm": 41.625, "learning_rate": 9.916796956887546e-06, "loss": 16.7698, "step": 29450 }, { "epoch": 0.5326811936506789, "grad_norm": 41.84375, "learning_rate": 9.916768704580886e-06, "loss": 17.3518, "step": 29460 }, { "epoch": 0.5328620087198067, "grad_norm": 41.84375, "learning_rate": 9.916740452274227e-06, "loss": 16.6971, "step": 29470 }, { "epoch": 0.5330428237889345, "grad_norm": 42.34375, "learning_rate": 9.916712199967568e-06, "loss": 16.7716, "step": 29480 }, { "epoch": 0.5332236388580625, "grad_norm": 43.21875, "learning_rate": 9.916683947660908e-06, "loss": 17.3883, "step": 29490 }, { "epoch": 0.5334044539271903, "grad_norm": 41.03125, "learning_rate": 9.916655695354249e-06, "loss": 17.0945, "step": 29500 }, { "epoch": 0.5335852689963182, "grad_norm": 42.0, "learning_rate": 9.916627443047588e-06, "loss": 16.6286, "step": 29510 }, { "epoch": 0.533766084065446, "grad_norm": 39.6875, "learning_rate": 9.916599190740928e-06, "loss": 16.7957, "step": 29520 }, { "epoch": 0.5339468991345738, "grad_norm": 37.21875, "learning_rate": 9.916570938434269e-06, "loss": 16.9727, "step": 29530 }, { "epoch": 0.5341277142037018, "grad_norm": 40.6875, "learning_rate": 9.91654268612761e-06, "loss": 17.2793, "step": 29540 }, { "epoch": 0.5343085292728296, "grad_norm": 42.9375, "learning_rate": 9.91651443382095e-06, "loss": 16.8023, "step": 29550 }, { "epoch": 0.5344893443419575, "grad_norm": 42.09375, "learning_rate": 9.916486181514291e-06, "loss": 16.812, "step": 29560 }, { "epoch": 0.5346701594110853, "grad_norm": 41.75, "learning_rate": 9.916457929207632e-06, "loss": 17.0138, "step": 29570 }, { "epoch": 0.5348509744802131, "grad_norm": 40.65625, "learning_rate": 9.916429676900972e-06, "loss": 17.0572, "step": 29580 }, { "epoch": 0.535031789549341, "grad_norm": 40.90625, "learning_rate": 9.916401424594311e-06, "loss": 16.8175, "step": 29590 }, { "epoch": 0.5352126046184689, "grad_norm": 41.9375, "learning_rate": 9.916373172287652e-06, "loss": 17.1891, "step": 29600 }, { "epoch": 0.5353934196875968, "grad_norm": 41.21875, "learning_rate": 9.916344919980992e-06, "loss": 17.1858, "step": 29610 }, { "epoch": 0.5355742347567246, "grad_norm": 42.875, "learning_rate": 9.916316667674333e-06, "loss": 17.156, "step": 29620 }, { "epoch": 0.5357550498258525, "grad_norm": 44.15625, "learning_rate": 9.916288415367674e-06, "loss": 16.9915, "step": 29630 }, { "epoch": 0.5359358648949804, "grad_norm": 41.25, "learning_rate": 9.916260163061014e-06, "loss": 17.008, "step": 29640 }, { "epoch": 0.5361166799641082, "grad_norm": 41.0625, "learning_rate": 9.916231910754355e-06, "loss": 17.0597, "step": 29650 }, { "epoch": 0.5362974950332361, "grad_norm": 41.59375, "learning_rate": 9.916203658447695e-06, "loss": 17.3012, "step": 29660 }, { "epoch": 0.5364783101023639, "grad_norm": 39.8125, "learning_rate": 9.916175406141036e-06, "loss": 16.7412, "step": 29670 }, { "epoch": 0.5366591251714918, "grad_norm": 41.71875, "learning_rate": 9.916147153834375e-06, "loss": 16.666, "step": 29680 }, { "epoch": 0.5368399402406197, "grad_norm": 40.6875, "learning_rate": 9.916118901527716e-06, "loss": 17.0808, "step": 29690 }, { "epoch": 0.5370207553097475, "grad_norm": 41.84375, "learning_rate": 9.916090649221056e-06, "loss": 16.8892, "step": 29700 }, { "epoch": 0.5372015703788754, "grad_norm": 42.84375, "learning_rate": 9.916062396914397e-06, "loss": 16.88, "step": 29710 }, { "epoch": 0.5373823854480032, "grad_norm": 44.09375, "learning_rate": 9.916034144607737e-06, "loss": 16.8642, "step": 29720 }, { "epoch": 0.5375632005171311, "grad_norm": 43.59375, "learning_rate": 9.916005892301078e-06, "loss": 17.3012, "step": 29730 }, { "epoch": 0.537744015586259, "grad_norm": 40.28125, "learning_rate": 9.915977639994419e-06, "loss": 17.0513, "step": 29740 }, { "epoch": 0.5379248306553868, "grad_norm": 40.6875, "learning_rate": 9.91594938768776e-06, "loss": 17.2723, "step": 29750 }, { "epoch": 0.5381056457245147, "grad_norm": 41.625, "learning_rate": 9.9159211353811e-06, "loss": 17.2688, "step": 29760 }, { "epoch": 0.5382864607936425, "grad_norm": 38.59375, "learning_rate": 9.915892883074439e-06, "loss": 17.1386, "step": 29770 }, { "epoch": 0.5384672758627704, "grad_norm": 41.5625, "learning_rate": 9.91586463076778e-06, "loss": 16.8733, "step": 29780 }, { "epoch": 0.5386480909318982, "grad_norm": 40.375, "learning_rate": 9.91583637846112e-06, "loss": 17.0199, "step": 29790 }, { "epoch": 0.5388289060010262, "grad_norm": 44.59375, "learning_rate": 9.91580812615446e-06, "loss": 17.2014, "step": 29800 }, { "epoch": 0.539009721070154, "grad_norm": 42.0625, "learning_rate": 9.915779873847801e-06, "loss": 16.9838, "step": 29810 }, { "epoch": 0.5391905361392818, "grad_norm": 43.15625, "learning_rate": 9.915751621541142e-06, "loss": 17.3729, "step": 29820 }, { "epoch": 0.5393713512084097, "grad_norm": 41.875, "learning_rate": 9.915723369234483e-06, "loss": 17.075, "step": 29830 }, { "epoch": 0.5395521662775375, "grad_norm": 38.125, "learning_rate": 9.915695116927823e-06, "loss": 17.0875, "step": 29840 }, { "epoch": 0.5397329813466655, "grad_norm": 39.53125, "learning_rate": 9.915666864621162e-06, "loss": 16.6237, "step": 29850 }, { "epoch": 0.5399137964157933, "grad_norm": 41.84375, "learning_rate": 9.915638612314503e-06, "loss": 17.0236, "step": 29860 }, { "epoch": 0.5400946114849211, "grad_norm": 42.65625, "learning_rate": 9.915610360007843e-06, "loss": 17.3734, "step": 29870 }, { "epoch": 0.540275426554049, "grad_norm": 42.09375, "learning_rate": 9.915582107701184e-06, "loss": 16.5518, "step": 29880 }, { "epoch": 0.5404562416231768, "grad_norm": 41.1875, "learning_rate": 9.915553855394525e-06, "loss": 16.4954, "step": 29890 }, { "epoch": 0.5406370566923048, "grad_norm": 40.5, "learning_rate": 9.915525603087864e-06, "loss": 16.9942, "step": 29900 }, { "epoch": 0.5408178717614326, "grad_norm": 40.25, "learning_rate": 9.915497350781206e-06, "loss": 16.9093, "step": 29910 }, { "epoch": 0.5409986868305604, "grad_norm": 40.875, "learning_rate": 9.915469098474547e-06, "loss": 17.0349, "step": 29920 }, { "epoch": 0.5411795018996883, "grad_norm": 41.625, "learning_rate": 9.915440846167887e-06, "loss": 16.9415, "step": 29930 }, { "epoch": 0.5413603169688161, "grad_norm": 41.53125, "learning_rate": 9.915412593861226e-06, "loss": 16.9677, "step": 29940 }, { "epoch": 0.5415411320379441, "grad_norm": 39.9375, "learning_rate": 9.915384341554567e-06, "loss": 17.1662, "step": 29950 }, { "epoch": 0.5417219471070719, "grad_norm": 46.5, "learning_rate": 9.915356089247907e-06, "loss": 17.0126, "step": 29960 }, { "epoch": 0.5419027621761998, "grad_norm": 37.96875, "learning_rate": 9.915327836941248e-06, "loss": 17.1567, "step": 29970 }, { "epoch": 0.5420835772453276, "grad_norm": 42.28125, "learning_rate": 9.915299584634589e-06, "loss": 17.0694, "step": 29980 }, { "epoch": 0.5422643923144554, "grad_norm": 40.21875, "learning_rate": 9.91527133232793e-06, "loss": 16.7261, "step": 29990 }, { "epoch": 0.5424452073835834, "grad_norm": 41.6875, "learning_rate": 9.91524308002127e-06, "loss": 16.8142, "step": 30000 }, { "epoch": 0.5424452073835834, "eval_loss": 2.1271157264709473, "eval_runtime": 229.7564, "eval_samples_per_second": 3160.125, "eval_steps_per_second": 49.378, "step": 30000 }, { "epoch": 0.5426260224527112, "grad_norm": 44.59375, "learning_rate": 9.91521482771461e-06, "loss": 16.946, "step": 30010 }, { "epoch": 0.5428068375218391, "grad_norm": 42.71875, "learning_rate": 9.91518657540795e-06, "loss": 17.0405, "step": 30020 }, { "epoch": 0.5429876525909669, "grad_norm": 40.875, "learning_rate": 9.91515832310129e-06, "loss": 16.5716, "step": 30030 }, { "epoch": 0.5431684676600947, "grad_norm": 40.25, "learning_rate": 9.91513007079463e-06, "loss": 16.8354, "step": 30040 }, { "epoch": 0.5433492827292227, "grad_norm": 41.40625, "learning_rate": 9.915101818487971e-06, "loss": 17.1805, "step": 30050 }, { "epoch": 0.5435300977983505, "grad_norm": 42.5625, "learning_rate": 9.915073566181312e-06, "loss": 17.2318, "step": 30060 }, { "epoch": 0.5437109128674784, "grad_norm": 44.8125, "learning_rate": 9.915045313874652e-06, "loss": 16.813, "step": 30070 }, { "epoch": 0.5438917279366062, "grad_norm": 39.4375, "learning_rate": 9.915017061567993e-06, "loss": 16.9205, "step": 30080 }, { "epoch": 0.544072543005734, "grad_norm": 42.5625, "learning_rate": 9.914988809261334e-06, "loss": 16.9416, "step": 30090 }, { "epoch": 0.544253358074862, "grad_norm": 42.75, "learning_rate": 9.914960556954674e-06, "loss": 17.3216, "step": 30100 }, { "epoch": 0.5444341731439898, "grad_norm": 38.4375, "learning_rate": 9.914932304648013e-06, "loss": 17.1828, "step": 30110 }, { "epoch": 0.5446149882131177, "grad_norm": 44.34375, "learning_rate": 9.914904052341354e-06, "loss": 16.7072, "step": 30120 }, { "epoch": 0.5447958032822455, "grad_norm": 39.0625, "learning_rate": 9.914875800034694e-06, "loss": 17.0456, "step": 30130 }, { "epoch": 0.5449766183513735, "grad_norm": 42.1875, "learning_rate": 9.914847547728035e-06, "loss": 16.8821, "step": 30140 }, { "epoch": 0.5451574334205013, "grad_norm": 41.5, "learning_rate": 9.914819295421376e-06, "loss": 16.8177, "step": 30150 }, { "epoch": 0.5453382484896291, "grad_norm": 39.59375, "learning_rate": 9.914791043114715e-06, "loss": 16.8663, "step": 30160 }, { "epoch": 0.545519063558757, "grad_norm": 42.28125, "learning_rate": 9.914762790808057e-06, "loss": 17.1785, "step": 30170 }, { "epoch": 0.5456998786278848, "grad_norm": 42.25, "learning_rate": 9.914734538501398e-06, "loss": 16.9884, "step": 30180 }, { "epoch": 0.5458806936970128, "grad_norm": 42.75, "learning_rate": 9.914706286194738e-06, "loss": 16.6989, "step": 30190 }, { "epoch": 0.5460615087661406, "grad_norm": 42.65625, "learning_rate": 9.914678033888077e-06, "loss": 17.1729, "step": 30200 }, { "epoch": 0.5462423238352684, "grad_norm": 40.8125, "learning_rate": 9.914649781581418e-06, "loss": 17.0265, "step": 30210 }, { "epoch": 0.5464231389043963, "grad_norm": 40.34375, "learning_rate": 9.914621529274758e-06, "loss": 17.4852, "step": 30220 }, { "epoch": 0.5466039539735241, "grad_norm": 41.625, "learning_rate": 9.914593276968099e-06, "loss": 17.1923, "step": 30230 }, { "epoch": 0.546784769042652, "grad_norm": 41.375, "learning_rate": 9.91456502466144e-06, "loss": 17.2628, "step": 30240 }, { "epoch": 0.5469655841117799, "grad_norm": 40.6875, "learning_rate": 9.914536772354779e-06, "loss": 17.0136, "step": 30250 }, { "epoch": 0.5471463991809077, "grad_norm": 42.6875, "learning_rate": 9.914508520048121e-06, "loss": 16.633, "step": 30260 }, { "epoch": 0.5473272142500356, "grad_norm": 42.1875, "learning_rate": 9.914480267741462e-06, "loss": 16.9213, "step": 30270 }, { "epoch": 0.5475080293191634, "grad_norm": 44.15625, "learning_rate": 9.9144520154348e-06, "loss": 16.8474, "step": 30280 }, { "epoch": 0.5476888443882914, "grad_norm": 43.5625, "learning_rate": 9.914423763128141e-06, "loss": 16.8602, "step": 30290 }, { "epoch": 0.5478696594574192, "grad_norm": 39.46875, "learning_rate": 9.914395510821482e-06, "loss": 17.1963, "step": 30300 }, { "epoch": 0.5480504745265471, "grad_norm": 39.59375, "learning_rate": 9.914367258514822e-06, "loss": 17.1725, "step": 30310 }, { "epoch": 0.5482312895956749, "grad_norm": 40.40625, "learning_rate": 9.914339006208163e-06, "loss": 17.3516, "step": 30320 }, { "epoch": 0.5484121046648027, "grad_norm": 40.46875, "learning_rate": 9.914310753901502e-06, "loss": 17.3431, "step": 30330 }, { "epoch": 0.5485929197339307, "grad_norm": 38.40625, "learning_rate": 9.914282501594844e-06, "loss": 16.9414, "step": 30340 }, { "epoch": 0.5487737348030585, "grad_norm": 41.3125, "learning_rate": 9.914254249288185e-06, "loss": 16.793, "step": 30350 }, { "epoch": 0.5489545498721864, "grad_norm": 43.75, "learning_rate": 9.914225996981525e-06, "loss": 16.7673, "step": 30360 }, { "epoch": 0.5491353649413142, "grad_norm": 41.625, "learning_rate": 9.914197744674864e-06, "loss": 17.0738, "step": 30370 }, { "epoch": 0.549316180010442, "grad_norm": 39.75, "learning_rate": 9.914169492368205e-06, "loss": 17.0317, "step": 30380 }, { "epoch": 0.54949699507957, "grad_norm": 39.90625, "learning_rate": 9.914141240061546e-06, "loss": 17.0928, "step": 30390 }, { "epoch": 0.5496778101486978, "grad_norm": 41.1875, "learning_rate": 9.914112987754886e-06, "loss": 17.2458, "step": 30400 }, { "epoch": 0.5498586252178257, "grad_norm": 40.09375, "learning_rate": 9.914084735448227e-06, "loss": 17.1515, "step": 30410 }, { "epoch": 0.5500394402869535, "grad_norm": 43.03125, "learning_rate": 9.914056483141566e-06, "loss": 17.6208, "step": 30420 }, { "epoch": 0.5502202553560813, "grad_norm": 42.0, "learning_rate": 9.914028230834908e-06, "loss": 17.0189, "step": 30430 }, { "epoch": 0.5504010704252092, "grad_norm": 41.3125, "learning_rate": 9.913999978528249e-06, "loss": 16.9412, "step": 30440 }, { "epoch": 0.5505818854943371, "grad_norm": 44.5, "learning_rate": 9.913971726221588e-06, "loss": 16.8629, "step": 30450 }, { "epoch": 0.550762700563465, "grad_norm": 42.3125, "learning_rate": 9.913943473914928e-06, "loss": 17.4288, "step": 30460 }, { "epoch": 0.5509435156325928, "grad_norm": 39.1875, "learning_rate": 9.913915221608269e-06, "loss": 16.7881, "step": 30470 }, { "epoch": 0.5511243307017207, "grad_norm": 44.125, "learning_rate": 9.91388696930161e-06, "loss": 17.1656, "step": 30480 }, { "epoch": 0.5513051457708485, "grad_norm": 40.125, "learning_rate": 9.91385871699495e-06, "loss": 16.8119, "step": 30490 }, { "epoch": 0.5514859608399764, "grad_norm": 41.65625, "learning_rate": 9.91383046468829e-06, "loss": 16.8665, "step": 30500 }, { "epoch": 0.5516667759091043, "grad_norm": 40.21875, "learning_rate": 9.91380221238163e-06, "loss": 17.2542, "step": 30510 }, { "epoch": 0.5518475909782321, "grad_norm": 42.0625, "learning_rate": 9.913773960074972e-06, "loss": 16.7592, "step": 30520 }, { "epoch": 0.55202840604736, "grad_norm": 42.625, "learning_rate": 9.913745707768313e-06, "loss": 16.7518, "step": 30530 }, { "epoch": 0.5522092211164878, "grad_norm": 43.78125, "learning_rate": 9.913717455461652e-06, "loss": 16.7956, "step": 30540 }, { "epoch": 0.5523900361856157, "grad_norm": 38.28125, "learning_rate": 9.913689203154992e-06, "loss": 17.1653, "step": 30550 }, { "epoch": 0.5525708512547436, "grad_norm": 42.75, "learning_rate": 9.913660950848333e-06, "loss": 17.4375, "step": 30560 }, { "epoch": 0.5527516663238714, "grad_norm": 41.0, "learning_rate": 9.913632698541673e-06, "loss": 16.7577, "step": 30570 }, { "epoch": 0.5529324813929993, "grad_norm": 44.0, "learning_rate": 9.913604446235014e-06, "loss": 16.8749, "step": 30580 }, { "epoch": 0.5531132964621271, "grad_norm": 43.90625, "learning_rate": 9.913576193928353e-06, "loss": 17.3017, "step": 30590 }, { "epoch": 0.553294111531255, "grad_norm": 45.0625, "learning_rate": 9.913547941621694e-06, "loss": 16.8087, "step": 30600 }, { "epoch": 0.5534749266003829, "grad_norm": 45.84375, "learning_rate": 9.913519689315036e-06, "loss": 16.8531, "step": 30610 }, { "epoch": 0.5536557416695107, "grad_norm": 38.78125, "learning_rate": 9.913491437008377e-06, "loss": 16.864, "step": 30620 }, { "epoch": 0.5538365567386386, "grad_norm": 43.1875, "learning_rate": 9.913463184701715e-06, "loss": 16.9866, "step": 30630 }, { "epoch": 0.5540173718077664, "grad_norm": 42.25, "learning_rate": 9.913434932395056e-06, "loss": 17.0031, "step": 30640 }, { "epoch": 0.5541981868768944, "grad_norm": 39.34375, "learning_rate": 9.913406680088397e-06, "loss": 16.7895, "step": 30650 }, { "epoch": 0.5543790019460222, "grad_norm": 43.5, "learning_rate": 9.913378427781737e-06, "loss": 17.0026, "step": 30660 }, { "epoch": 0.55455981701515, "grad_norm": 40.25, "learning_rate": 9.913350175475078e-06, "loss": 17.2128, "step": 30670 }, { "epoch": 0.5547406320842779, "grad_norm": 43.96875, "learning_rate": 9.913321923168417e-06, "loss": 17.0078, "step": 30680 }, { "epoch": 0.5549214471534057, "grad_norm": 43.84375, "learning_rate": 9.91329367086176e-06, "loss": 17.1819, "step": 30690 }, { "epoch": 0.5551022622225337, "grad_norm": 43.5625, "learning_rate": 9.9132654185551e-06, "loss": 16.6908, "step": 30700 }, { "epoch": 0.5552830772916615, "grad_norm": 43.5, "learning_rate": 9.913237166248439e-06, "loss": 17.0772, "step": 30710 }, { "epoch": 0.5554638923607893, "grad_norm": 41.84375, "learning_rate": 9.91320891394178e-06, "loss": 17.0964, "step": 30720 }, { "epoch": 0.5556447074299172, "grad_norm": 43.875, "learning_rate": 9.91318066163512e-06, "loss": 16.7243, "step": 30730 }, { "epoch": 0.555825522499045, "grad_norm": 41.53125, "learning_rate": 9.91315240932846e-06, "loss": 17.2681, "step": 30740 }, { "epoch": 0.556006337568173, "grad_norm": 42.6875, "learning_rate": 9.913124157021801e-06, "loss": 17.031, "step": 30750 }, { "epoch": 0.5561871526373008, "grad_norm": 42.3125, "learning_rate": 9.91309590471514e-06, "loss": 16.7159, "step": 30760 }, { "epoch": 0.5563679677064286, "grad_norm": 43.84375, "learning_rate": 9.91306765240848e-06, "loss": 16.9925, "step": 30770 }, { "epoch": 0.5565487827755565, "grad_norm": 40.71875, "learning_rate": 9.913039400101823e-06, "loss": 16.9813, "step": 30780 }, { "epoch": 0.5567295978446843, "grad_norm": 40.40625, "learning_rate": 9.913011147795164e-06, "loss": 16.9735, "step": 30790 }, { "epoch": 0.5569104129138123, "grad_norm": 42.4375, "learning_rate": 9.912982895488503e-06, "loss": 17.1974, "step": 30800 }, { "epoch": 0.5570912279829401, "grad_norm": 42.96875, "learning_rate": 9.912954643181843e-06, "loss": 17.0436, "step": 30810 }, { "epoch": 0.557272043052068, "grad_norm": 43.375, "learning_rate": 9.912926390875184e-06, "loss": 17.1123, "step": 30820 }, { "epoch": 0.5574528581211958, "grad_norm": 43.21875, "learning_rate": 9.912898138568525e-06, "loss": 16.9163, "step": 30830 }, { "epoch": 0.5576336731903236, "grad_norm": 39.90625, "learning_rate": 9.912869886261865e-06, "loss": 17.1751, "step": 30840 }, { "epoch": 0.5578144882594516, "grad_norm": 42.90625, "learning_rate": 9.912841633955204e-06, "loss": 16.9831, "step": 30850 }, { "epoch": 0.5579953033285794, "grad_norm": 43.9375, "learning_rate": 9.912813381648545e-06, "loss": 16.887, "step": 30860 }, { "epoch": 0.5581761183977073, "grad_norm": 41.15625, "learning_rate": 9.912785129341887e-06, "loss": 16.9941, "step": 30870 }, { "epoch": 0.5583569334668351, "grad_norm": 44.25, "learning_rate": 9.912756877035226e-06, "loss": 16.9951, "step": 30880 }, { "epoch": 0.5585377485359629, "grad_norm": 43.5, "learning_rate": 9.912728624728567e-06, "loss": 17.0885, "step": 30890 }, { "epoch": 0.5587185636050909, "grad_norm": 38.4375, "learning_rate": 9.912700372421907e-06, "loss": 16.9381, "step": 30900 }, { "epoch": 0.5588993786742187, "grad_norm": 45.90625, "learning_rate": 9.912672120115248e-06, "loss": 16.8079, "step": 30910 }, { "epoch": 0.5590801937433466, "grad_norm": 45.1875, "learning_rate": 9.912643867808588e-06, "loss": 17.1373, "step": 30920 }, { "epoch": 0.5592610088124744, "grad_norm": 41.65625, "learning_rate": 9.912615615501929e-06, "loss": 17.3978, "step": 30930 }, { "epoch": 0.5594418238816022, "grad_norm": 40.0, "learning_rate": 9.912587363195268e-06, "loss": 17.0463, "step": 30940 }, { "epoch": 0.5596226389507302, "grad_norm": 40.25, "learning_rate": 9.912559110888609e-06, "loss": 17.0721, "step": 30950 }, { "epoch": 0.559803454019858, "grad_norm": 43.40625, "learning_rate": 9.912530858581951e-06, "loss": 17.0719, "step": 30960 }, { "epoch": 0.5599842690889859, "grad_norm": 39.9375, "learning_rate": 9.91250260627529e-06, "loss": 17.2505, "step": 30970 }, { "epoch": 0.5601650841581137, "grad_norm": 41.71875, "learning_rate": 9.91247435396863e-06, "loss": 17.0649, "step": 30980 }, { "epoch": 0.5603458992272417, "grad_norm": 42.21875, "learning_rate": 9.912446101661971e-06, "loss": 17.0292, "step": 30990 }, { "epoch": 0.5605267142963695, "grad_norm": 45.75, "learning_rate": 9.912417849355312e-06, "loss": 16.998, "step": 31000 }, { "epoch": 0.5607075293654973, "grad_norm": 39.71875, "learning_rate": 9.912389597048652e-06, "loss": 16.8837, "step": 31010 }, { "epoch": 0.5608883444346252, "grad_norm": 39.78125, "learning_rate": 9.912361344741991e-06, "loss": 17.1694, "step": 31020 }, { "epoch": 0.561069159503753, "grad_norm": 40.5, "learning_rate": 9.912333092435332e-06, "loss": 16.8809, "step": 31030 }, { "epoch": 0.561249974572881, "grad_norm": 42.125, "learning_rate": 9.912304840128674e-06, "loss": 17.1144, "step": 31040 }, { "epoch": 0.5614307896420088, "grad_norm": 40.25, "learning_rate": 9.912276587822015e-06, "loss": 17.2917, "step": 31050 }, { "epoch": 0.5616116047111366, "grad_norm": 44.6875, "learning_rate": 9.912248335515354e-06, "loss": 16.9495, "step": 31060 }, { "epoch": 0.5617924197802645, "grad_norm": 43.96875, "learning_rate": 9.912220083208694e-06, "loss": 17.3728, "step": 31070 }, { "epoch": 0.5619732348493923, "grad_norm": 41.6875, "learning_rate": 9.912191830902035e-06, "loss": 16.7284, "step": 31080 }, { "epoch": 0.5621540499185202, "grad_norm": 40.65625, "learning_rate": 9.912163578595376e-06, "loss": 16.6271, "step": 31090 }, { "epoch": 0.5623348649876481, "grad_norm": 40.375, "learning_rate": 9.912135326288716e-06, "loss": 16.8732, "step": 31100 }, { "epoch": 0.5625156800567759, "grad_norm": 42.21875, "learning_rate": 9.912107073982055e-06, "loss": 16.6208, "step": 31110 }, { "epoch": 0.5626964951259038, "grad_norm": 40.71875, "learning_rate": 9.912078821675396e-06, "loss": 17.2431, "step": 31120 }, { "epoch": 0.5628773101950316, "grad_norm": 40.15625, "learning_rate": 9.912050569368738e-06, "loss": 16.7951, "step": 31130 }, { "epoch": 0.5630581252641595, "grad_norm": 40.21875, "learning_rate": 9.912022317062077e-06, "loss": 17.2329, "step": 31140 }, { "epoch": 0.5632389403332874, "grad_norm": 39.90625, "learning_rate": 9.911994064755418e-06, "loss": 16.9604, "step": 31150 }, { "epoch": 0.5634197554024153, "grad_norm": 43.0, "learning_rate": 9.911965812448758e-06, "loss": 16.9863, "step": 31160 }, { "epoch": 0.5636005704715431, "grad_norm": 41.71875, "learning_rate": 9.911937560142099e-06, "loss": 16.8509, "step": 31170 }, { "epoch": 0.5637813855406709, "grad_norm": 39.96875, "learning_rate": 9.91190930783544e-06, "loss": 16.956, "step": 31180 }, { "epoch": 0.5639622006097988, "grad_norm": 41.53125, "learning_rate": 9.911881055528778e-06, "loss": 16.8709, "step": 31190 }, { "epoch": 0.5641430156789267, "grad_norm": 42.71875, "learning_rate": 9.911852803222119e-06, "loss": 16.7424, "step": 31200 }, { "epoch": 0.5643238307480546, "grad_norm": 43.0, "learning_rate": 9.91182455091546e-06, "loss": 16.9176, "step": 31210 }, { "epoch": 0.5645046458171824, "grad_norm": 43.34375, "learning_rate": 9.911796298608802e-06, "loss": 17.2313, "step": 31220 }, { "epoch": 0.5646854608863102, "grad_norm": 41.28125, "learning_rate": 9.911768046302141e-06, "loss": 17.1907, "step": 31230 }, { "epoch": 0.5648662759554381, "grad_norm": 39.9375, "learning_rate": 9.911739793995482e-06, "loss": 16.7637, "step": 31240 }, { "epoch": 0.565047091024566, "grad_norm": 41.0, "learning_rate": 9.911711541688822e-06, "loss": 17.0071, "step": 31250 }, { "epoch": 0.5652279060936939, "grad_norm": 41.53125, "learning_rate": 9.911683289382163e-06, "loss": 17.097, "step": 31260 }, { "epoch": 0.5654087211628217, "grad_norm": 45.09375, "learning_rate": 9.911655037075503e-06, "loss": 17.3105, "step": 31270 }, { "epoch": 0.5655895362319495, "grad_norm": 42.59375, "learning_rate": 9.911626784768842e-06, "loss": 17.0906, "step": 31280 }, { "epoch": 0.5657703513010774, "grad_norm": 39.96875, "learning_rate": 9.911598532462183e-06, "loss": 17.2088, "step": 31290 }, { "epoch": 0.5659511663702053, "grad_norm": 38.9375, "learning_rate": 9.911570280155524e-06, "loss": 16.8181, "step": 31300 }, { "epoch": 0.5661319814393332, "grad_norm": 38.9375, "learning_rate": 9.911542027848864e-06, "loss": 17.0384, "step": 31310 }, { "epoch": 0.566312796508461, "grad_norm": 42.4375, "learning_rate": 9.911513775542205e-06, "loss": 16.741, "step": 31320 }, { "epoch": 0.5664936115775889, "grad_norm": 39.3125, "learning_rate": 9.911485523235545e-06, "loss": 16.7638, "step": 31330 }, { "epoch": 0.5666744266467167, "grad_norm": 43.75, "learning_rate": 9.911457270928886e-06, "loss": 16.7134, "step": 31340 }, { "epoch": 0.5668552417158446, "grad_norm": 40.8125, "learning_rate": 9.911429018622227e-06, "loss": 16.9839, "step": 31350 }, { "epoch": 0.5670360567849725, "grad_norm": 42.96875, "learning_rate": 9.911400766315566e-06, "loss": 16.8958, "step": 31360 }, { "epoch": 0.5672168718541003, "grad_norm": 41.875, "learning_rate": 9.911372514008906e-06, "loss": 16.3046, "step": 31370 }, { "epoch": 0.5673976869232282, "grad_norm": 42.46875, "learning_rate": 9.911344261702247e-06, "loss": 17.342, "step": 31380 }, { "epoch": 0.567578501992356, "grad_norm": 41.65625, "learning_rate": 9.91131600939559e-06, "loss": 17.1161, "step": 31390 }, { "epoch": 0.5677593170614839, "grad_norm": 41.40625, "learning_rate": 9.911287757088928e-06, "loss": 16.7149, "step": 31400 }, { "epoch": 0.5679401321306118, "grad_norm": 40.59375, "learning_rate": 9.911259504782269e-06, "loss": 16.841, "step": 31410 }, { "epoch": 0.5681209471997396, "grad_norm": 40.625, "learning_rate": 9.91123125247561e-06, "loss": 16.8366, "step": 31420 }, { "epoch": 0.5683017622688675, "grad_norm": 40.125, "learning_rate": 9.91120300016895e-06, "loss": 17.2759, "step": 31430 }, { "epoch": 0.5684825773379953, "grad_norm": 42.09375, "learning_rate": 9.91117474786229e-06, "loss": 16.9346, "step": 31440 }, { "epoch": 0.5686633924071232, "grad_norm": 40.5, "learning_rate": 9.91114649555563e-06, "loss": 16.5734, "step": 31450 }, { "epoch": 0.5688442074762511, "grad_norm": 41.96875, "learning_rate": 9.91111824324897e-06, "loss": 16.8365, "step": 31460 }, { "epoch": 0.5690250225453789, "grad_norm": 43.65625, "learning_rate": 9.91108999094231e-06, "loss": 17.1106, "step": 31470 }, { "epoch": 0.5692058376145068, "grad_norm": 41.59375, "learning_rate": 9.911061738635651e-06, "loss": 16.7902, "step": 31480 }, { "epoch": 0.5693866526836346, "grad_norm": 42.59375, "learning_rate": 9.911033486328992e-06, "loss": 16.4987, "step": 31490 }, { "epoch": 0.5695674677527626, "grad_norm": 40.6875, "learning_rate": 9.911005234022333e-06, "loss": 16.7677, "step": 31500 }, { "epoch": 0.5697482828218904, "grad_norm": 41.46875, "learning_rate": 9.910976981715673e-06, "loss": 16.971, "step": 31510 }, { "epoch": 0.5699290978910182, "grad_norm": 40.9375, "learning_rate": 9.910948729409014e-06, "loss": 16.8891, "step": 31520 }, { "epoch": 0.5701099129601461, "grad_norm": 43.78125, "learning_rate": 9.910920477102355e-06, "loss": 16.5515, "step": 31530 }, { "epoch": 0.5702907280292739, "grad_norm": 41.34375, "learning_rate": 9.910892224795693e-06, "loss": 17.0996, "step": 31540 }, { "epoch": 0.5704715430984019, "grad_norm": 41.40625, "learning_rate": 9.910863972489034e-06, "loss": 17.3, "step": 31550 }, { "epoch": 0.5706523581675297, "grad_norm": 41.875, "learning_rate": 9.910835720182375e-06, "loss": 16.7294, "step": 31560 }, { "epoch": 0.5708331732366575, "grad_norm": 41.25, "learning_rate": 9.910807467875715e-06, "loss": 17.0507, "step": 31570 }, { "epoch": 0.5710139883057854, "grad_norm": 44.46875, "learning_rate": 9.910779215569056e-06, "loss": 16.7798, "step": 31580 }, { "epoch": 0.5711948033749132, "grad_norm": 40.875, "learning_rate": 9.910750963262397e-06, "loss": 16.729, "step": 31590 }, { "epoch": 0.5713756184440412, "grad_norm": 40.9375, "learning_rate": 9.910722710955737e-06, "loss": 16.7486, "step": 31600 }, { "epoch": 0.571556433513169, "grad_norm": 45.0, "learning_rate": 9.910694458649078e-06, "loss": 17.0809, "step": 31610 }, { "epoch": 0.5717372485822968, "grad_norm": 41.09375, "learning_rate": 9.910666206342417e-06, "loss": 17.0275, "step": 31620 }, { "epoch": 0.5719180636514247, "grad_norm": 40.5, "learning_rate": 9.910637954035757e-06, "loss": 16.9974, "step": 31630 }, { "epoch": 0.5720988787205525, "grad_norm": 40.84375, "learning_rate": 9.910609701729098e-06, "loss": 16.9911, "step": 31640 }, { "epoch": 0.5722796937896805, "grad_norm": 43.40625, "learning_rate": 9.910581449422439e-06, "loss": 16.7812, "step": 31650 }, { "epoch": 0.5724605088588083, "grad_norm": 44.40625, "learning_rate": 9.91055319711578e-06, "loss": 16.8301, "step": 31660 }, { "epoch": 0.5726413239279362, "grad_norm": 40.03125, "learning_rate": 9.91052494480912e-06, "loss": 16.7667, "step": 31670 }, { "epoch": 0.572822138997064, "grad_norm": 43.40625, "learning_rate": 9.91049669250246e-06, "loss": 16.5191, "step": 31680 }, { "epoch": 0.5730029540661918, "grad_norm": 39.8125, "learning_rate": 9.910468440195801e-06, "loss": 17.1354, "step": 31690 }, { "epoch": 0.5731837691353198, "grad_norm": 41.28125, "learning_rate": 9.910440187889142e-06, "loss": 16.8287, "step": 31700 }, { "epoch": 0.5733645842044476, "grad_norm": 41.84375, "learning_rate": 9.91041193558248e-06, "loss": 17.183, "step": 31710 }, { "epoch": 0.5735453992735755, "grad_norm": 42.9375, "learning_rate": 9.910383683275821e-06, "loss": 17.053, "step": 31720 }, { "epoch": 0.5737262143427033, "grad_norm": 41.96875, "learning_rate": 9.910355430969162e-06, "loss": 17.1543, "step": 31730 }, { "epoch": 0.5739070294118311, "grad_norm": 42.09375, "learning_rate": 9.910327178662503e-06, "loss": 16.8542, "step": 31740 }, { "epoch": 0.5740878444809591, "grad_norm": 39.15625, "learning_rate": 9.910298926355843e-06, "loss": 16.7303, "step": 31750 }, { "epoch": 0.5742686595500869, "grad_norm": 40.15625, "learning_rate": 9.910270674049184e-06, "loss": 16.62, "step": 31760 }, { "epoch": 0.5744494746192148, "grad_norm": 43.4375, "learning_rate": 9.910242421742524e-06, "loss": 17.0608, "step": 31770 }, { "epoch": 0.5746302896883426, "grad_norm": 39.90625, "learning_rate": 9.910214169435865e-06, "loss": 16.7006, "step": 31780 }, { "epoch": 0.5748111047574704, "grad_norm": 41.53125, "learning_rate": 9.910185917129204e-06, "loss": 17.0885, "step": 31790 }, { "epoch": 0.5749919198265984, "grad_norm": 43.0625, "learning_rate": 9.910157664822545e-06, "loss": 16.7022, "step": 31800 }, { "epoch": 0.5751727348957262, "grad_norm": 44.1875, "learning_rate": 9.910129412515885e-06, "loss": 16.9512, "step": 31810 }, { "epoch": 0.5753535499648541, "grad_norm": 42.375, "learning_rate": 9.910101160209226e-06, "loss": 16.9256, "step": 31820 }, { "epoch": 0.5755343650339819, "grad_norm": 42.34375, "learning_rate": 9.910072907902566e-06, "loss": 16.6284, "step": 31830 }, { "epoch": 0.5757151801031098, "grad_norm": 41.25, "learning_rate": 9.910044655595907e-06, "loss": 16.9051, "step": 31840 }, { "epoch": 0.5758959951722377, "grad_norm": 39.9375, "learning_rate": 9.910016403289248e-06, "loss": 16.7478, "step": 31850 }, { "epoch": 0.5760768102413655, "grad_norm": 39.9375, "learning_rate": 9.909988150982588e-06, "loss": 16.8593, "step": 31860 }, { "epoch": 0.5762576253104934, "grad_norm": 44.09375, "learning_rate": 9.909959898675929e-06, "loss": 16.7016, "step": 31870 }, { "epoch": 0.5764384403796212, "grad_norm": 43.1875, "learning_rate": 9.909931646369268e-06, "loss": 17.125, "step": 31880 }, { "epoch": 0.5766192554487491, "grad_norm": 41.53125, "learning_rate": 9.909903394062608e-06, "loss": 16.9871, "step": 31890 }, { "epoch": 0.576800070517877, "grad_norm": 40.96875, "learning_rate": 9.909875141755949e-06, "loss": 17.0037, "step": 31900 }, { "epoch": 0.5769808855870048, "grad_norm": 43.90625, "learning_rate": 9.90984688944929e-06, "loss": 17.2558, "step": 31910 }, { "epoch": 0.5771617006561327, "grad_norm": 42.0625, "learning_rate": 9.90981863714263e-06, "loss": 17.2594, "step": 31920 }, { "epoch": 0.5773425157252605, "grad_norm": 43.53125, "learning_rate": 9.909790384835971e-06, "loss": 16.8757, "step": 31930 }, { "epoch": 0.5775233307943884, "grad_norm": 39.40625, "learning_rate": 9.909762132529312e-06, "loss": 16.9364, "step": 31940 }, { "epoch": 0.5777041458635163, "grad_norm": 41.875, "learning_rate": 9.909733880222652e-06, "loss": 16.6703, "step": 31950 }, { "epoch": 0.5778849609326441, "grad_norm": 40.65625, "learning_rate": 9.909705627915993e-06, "loss": 16.8876, "step": 31960 }, { "epoch": 0.578065776001772, "grad_norm": 44.96875, "learning_rate": 9.909677375609332e-06, "loss": 16.8418, "step": 31970 }, { "epoch": 0.5782465910708998, "grad_norm": 43.34375, "learning_rate": 9.909649123302672e-06, "loss": 16.9372, "step": 31980 }, { "epoch": 0.5784274061400277, "grad_norm": 44.1875, "learning_rate": 9.909620870996013e-06, "loss": 16.7588, "step": 31990 }, { "epoch": 0.5786082212091556, "grad_norm": 42.09375, "learning_rate": 9.909592618689354e-06, "loss": 17.0603, "step": 32000 }, { "epoch": 0.5787890362782835, "grad_norm": 45.75, "learning_rate": 9.909564366382694e-06, "loss": 17.18, "step": 32010 }, { "epoch": 0.5789698513474113, "grad_norm": 43.34375, "learning_rate": 9.909536114076035e-06, "loss": 16.7754, "step": 32020 }, { "epoch": 0.5791506664165391, "grad_norm": 42.4375, "learning_rate": 9.909507861769375e-06, "loss": 17.1534, "step": 32030 }, { "epoch": 0.579331481485667, "grad_norm": 42.40625, "learning_rate": 9.909479609462716e-06, "loss": 17.2115, "step": 32040 }, { "epoch": 0.5795122965547949, "grad_norm": 43.1875, "learning_rate": 9.909451357156055e-06, "loss": 17.0768, "step": 32050 }, { "epoch": 0.5796931116239228, "grad_norm": 43.46875, "learning_rate": 9.909423104849396e-06, "loss": 17.2024, "step": 32060 }, { "epoch": 0.5798739266930506, "grad_norm": 45.84375, "learning_rate": 9.909394852542736e-06, "loss": 16.6366, "step": 32070 }, { "epoch": 0.5800547417621784, "grad_norm": 39.8125, "learning_rate": 9.909366600236077e-06, "loss": 17.1224, "step": 32080 }, { "epoch": 0.5802355568313063, "grad_norm": 40.15625, "learning_rate": 9.909338347929418e-06, "loss": 16.9536, "step": 32090 }, { "epoch": 0.5804163719004342, "grad_norm": 41.34375, "learning_rate": 9.909310095622758e-06, "loss": 16.8179, "step": 32100 }, { "epoch": 0.5805971869695621, "grad_norm": 42.28125, "learning_rate": 9.909281843316099e-06, "loss": 17.0561, "step": 32110 }, { "epoch": 0.5807780020386899, "grad_norm": 41.9375, "learning_rate": 9.90925359100944e-06, "loss": 16.8479, "step": 32120 }, { "epoch": 0.5809588171078177, "grad_norm": 41.34375, "learning_rate": 9.90922533870278e-06, "loss": 17.2642, "step": 32130 }, { "epoch": 0.5811396321769456, "grad_norm": 41.5, "learning_rate": 9.909197086396119e-06, "loss": 17.1699, "step": 32140 }, { "epoch": 0.5813204472460735, "grad_norm": 42.71875, "learning_rate": 9.90916883408946e-06, "loss": 16.8568, "step": 32150 }, { "epoch": 0.5815012623152014, "grad_norm": 39.0, "learning_rate": 9.9091405817828e-06, "loss": 16.9422, "step": 32160 }, { "epoch": 0.5816820773843292, "grad_norm": 39.3125, "learning_rate": 9.90911232947614e-06, "loss": 16.7037, "step": 32170 }, { "epoch": 0.5818628924534571, "grad_norm": 42.125, "learning_rate": 9.909084077169481e-06, "loss": 17.1012, "step": 32180 }, { "epoch": 0.5820437075225849, "grad_norm": 43.4375, "learning_rate": 9.909055824862822e-06, "loss": 16.9235, "step": 32190 }, { "epoch": 0.5822245225917128, "grad_norm": 40.90625, "learning_rate": 9.909027572556163e-06, "loss": 17.0028, "step": 32200 }, { "epoch": 0.5824053376608407, "grad_norm": 42.40625, "learning_rate": 9.908999320249503e-06, "loss": 17.3045, "step": 32210 }, { "epoch": 0.5825861527299685, "grad_norm": 41.5625, "learning_rate": 9.908971067942842e-06, "loss": 16.6335, "step": 32220 }, { "epoch": 0.5827669677990964, "grad_norm": 41.6875, "learning_rate": 9.908942815636183e-06, "loss": 17.434, "step": 32230 }, { "epoch": 0.5829477828682242, "grad_norm": 41.40625, "learning_rate": 9.908914563329523e-06, "loss": 16.849, "step": 32240 }, { "epoch": 0.5831285979373521, "grad_norm": 41.71875, "learning_rate": 9.908886311022864e-06, "loss": 17.0407, "step": 32250 }, { "epoch": 0.58330941300648, "grad_norm": 43.0, "learning_rate": 9.908858058716205e-06, "loss": 16.7928, "step": 32260 }, { "epoch": 0.5834902280756078, "grad_norm": 41.59375, "learning_rate": 9.908829806409545e-06, "loss": 16.5376, "step": 32270 }, { "epoch": 0.5836710431447357, "grad_norm": 40.84375, "learning_rate": 9.908801554102886e-06, "loss": 16.9865, "step": 32280 }, { "epoch": 0.5838518582138635, "grad_norm": 45.21875, "learning_rate": 9.908773301796227e-06, "loss": 16.6478, "step": 32290 }, { "epoch": 0.5840326732829914, "grad_norm": 38.8125, "learning_rate": 9.908745049489567e-06, "loss": 17.0553, "step": 32300 }, { "epoch": 0.5842134883521193, "grad_norm": 45.0625, "learning_rate": 9.908716797182906e-06, "loss": 16.9444, "step": 32310 }, { "epoch": 0.5843943034212471, "grad_norm": 45.0, "learning_rate": 9.908688544876247e-06, "loss": 17.1495, "step": 32320 }, { "epoch": 0.584575118490375, "grad_norm": 40.90625, "learning_rate": 9.908660292569587e-06, "loss": 17.0832, "step": 32330 }, { "epoch": 0.5847559335595028, "grad_norm": 38.375, "learning_rate": 9.908632040262928e-06, "loss": 16.2737, "step": 32340 }, { "epoch": 0.5849367486286308, "grad_norm": 44.0, "learning_rate": 9.908603787956269e-06, "loss": 16.8861, "step": 32350 }, { "epoch": 0.5851175636977586, "grad_norm": 41.28125, "learning_rate": 9.90857553564961e-06, "loss": 16.8836, "step": 32360 }, { "epoch": 0.5852983787668864, "grad_norm": 42.90625, "learning_rate": 9.90854728334295e-06, "loss": 17.1306, "step": 32370 }, { "epoch": 0.5854791938360143, "grad_norm": 43.375, "learning_rate": 9.90851903103629e-06, "loss": 17.097, "step": 32380 }, { "epoch": 0.5856600089051421, "grad_norm": 41.3125, "learning_rate": 9.908490778729631e-06, "loss": 16.9224, "step": 32390 }, { "epoch": 0.5858408239742701, "grad_norm": 45.375, "learning_rate": 9.90846252642297e-06, "loss": 16.8028, "step": 32400 }, { "epoch": 0.5860216390433979, "grad_norm": 39.15625, "learning_rate": 9.90843427411631e-06, "loss": 16.8687, "step": 32410 }, { "epoch": 0.5862024541125257, "grad_norm": 39.375, "learning_rate": 9.908406021809651e-06, "loss": 16.8831, "step": 32420 }, { "epoch": 0.5863832691816536, "grad_norm": 42.78125, "learning_rate": 9.908377769502992e-06, "loss": 16.6693, "step": 32430 }, { "epoch": 0.5865640842507814, "grad_norm": 41.125, "learning_rate": 9.908349517196333e-06, "loss": 16.9407, "step": 32440 }, { "epoch": 0.5867448993199094, "grad_norm": 46.59375, "learning_rate": 9.908321264889673e-06, "loss": 17.4105, "step": 32450 }, { "epoch": 0.5869257143890372, "grad_norm": 41.90625, "learning_rate": 9.908293012583014e-06, "loss": 16.7872, "step": 32460 }, { "epoch": 0.587106529458165, "grad_norm": 40.0, "learning_rate": 9.908264760276354e-06, "loss": 17.1423, "step": 32470 }, { "epoch": 0.5872873445272929, "grad_norm": 43.21875, "learning_rate": 9.908236507969693e-06, "loss": 17.2344, "step": 32480 }, { "epoch": 0.5874681595964207, "grad_norm": 40.5, "learning_rate": 9.908208255663034e-06, "loss": 17.0029, "step": 32490 }, { "epoch": 0.5876489746655487, "grad_norm": 46.3125, "learning_rate": 9.908180003356375e-06, "loss": 17.0556, "step": 32500 }, { "epoch": 0.5878297897346765, "grad_norm": 39.3125, "learning_rate": 9.908151751049715e-06, "loss": 16.9346, "step": 32510 }, { "epoch": 0.5880106048038044, "grad_norm": 45.0625, "learning_rate": 9.908123498743056e-06, "loss": 16.9969, "step": 32520 }, { "epoch": 0.5881914198729322, "grad_norm": 45.1875, "learning_rate": 9.908095246436396e-06, "loss": 17.1584, "step": 32530 }, { "epoch": 0.58837223494206, "grad_norm": 44.4375, "learning_rate": 9.908066994129737e-06, "loss": 16.9199, "step": 32540 }, { "epoch": 0.588553050011188, "grad_norm": 44.21875, "learning_rate": 9.908038741823078e-06, "loss": 16.8951, "step": 32550 }, { "epoch": 0.5887338650803158, "grad_norm": 41.53125, "learning_rate": 9.908010489516418e-06, "loss": 17.4404, "step": 32560 }, { "epoch": 0.5889146801494437, "grad_norm": 45.5625, "learning_rate": 9.907982237209757e-06, "loss": 17.1576, "step": 32570 }, { "epoch": 0.5890954952185715, "grad_norm": 42.625, "learning_rate": 9.907953984903098e-06, "loss": 16.656, "step": 32580 }, { "epoch": 0.5892763102876993, "grad_norm": 42.28125, "learning_rate": 9.907925732596438e-06, "loss": 16.8173, "step": 32590 }, { "epoch": 0.5894571253568273, "grad_norm": 41.375, "learning_rate": 9.907897480289779e-06, "loss": 16.9153, "step": 32600 }, { "epoch": 0.5896379404259551, "grad_norm": 40.78125, "learning_rate": 9.90786922798312e-06, "loss": 17.1193, "step": 32610 }, { "epoch": 0.589818755495083, "grad_norm": 43.59375, "learning_rate": 9.90784097567646e-06, "loss": 17.2083, "step": 32620 }, { "epoch": 0.5899995705642108, "grad_norm": 44.75, "learning_rate": 9.907812723369801e-06, "loss": 17.1789, "step": 32630 }, { "epoch": 0.5901803856333386, "grad_norm": 40.78125, "learning_rate": 9.907784471063142e-06, "loss": 16.5824, "step": 32640 }, { "epoch": 0.5903612007024666, "grad_norm": 39.375, "learning_rate": 9.90775621875648e-06, "loss": 16.6448, "step": 32650 }, { "epoch": 0.5905420157715944, "grad_norm": 44.125, "learning_rate": 9.907727966449821e-06, "loss": 17.0394, "step": 32660 }, { "epoch": 0.5907228308407223, "grad_norm": 43.6875, "learning_rate": 9.907699714143162e-06, "loss": 16.9122, "step": 32670 }, { "epoch": 0.5909036459098501, "grad_norm": 44.6875, "learning_rate": 9.907671461836502e-06, "loss": 16.9744, "step": 32680 }, { "epoch": 0.591084460978978, "grad_norm": 44.15625, "learning_rate": 9.907643209529843e-06, "loss": 16.7347, "step": 32690 }, { "epoch": 0.5912652760481059, "grad_norm": 44.84375, "learning_rate": 9.907614957223184e-06, "loss": 17.1523, "step": 32700 }, { "epoch": 0.5914460911172337, "grad_norm": 41.40625, "learning_rate": 9.907586704916524e-06, "loss": 16.64, "step": 32710 }, { "epoch": 0.5916269061863616, "grad_norm": 41.53125, "learning_rate": 9.907558452609865e-06, "loss": 16.8095, "step": 32720 }, { "epoch": 0.5918077212554894, "grad_norm": 43.71875, "learning_rate": 9.907530200303205e-06, "loss": 17.1435, "step": 32730 }, { "epoch": 0.5919885363246173, "grad_norm": 41.21875, "learning_rate": 9.907501947996544e-06, "loss": 17.3664, "step": 32740 }, { "epoch": 0.5921693513937452, "grad_norm": 42.25, "learning_rate": 9.907473695689885e-06, "loss": 17.0529, "step": 32750 }, { "epoch": 0.592350166462873, "grad_norm": 42.625, "learning_rate": 9.907445443383226e-06, "loss": 16.9959, "step": 32760 }, { "epoch": 0.5925309815320009, "grad_norm": 41.71875, "learning_rate": 9.907417191076566e-06, "loss": 16.3658, "step": 32770 }, { "epoch": 0.5927117966011287, "grad_norm": 42.90625, "learning_rate": 9.907388938769907e-06, "loss": 16.7799, "step": 32780 }, { "epoch": 0.5928926116702566, "grad_norm": 40.875, "learning_rate": 9.907360686463246e-06, "loss": 17.0065, "step": 32790 }, { "epoch": 0.5930734267393845, "grad_norm": 42.5625, "learning_rate": 9.907332434156588e-06, "loss": 16.8945, "step": 32800 }, { "epoch": 0.5932542418085123, "grad_norm": 41.875, "learning_rate": 9.907304181849929e-06, "loss": 16.9943, "step": 32810 }, { "epoch": 0.5934350568776402, "grad_norm": 38.78125, "learning_rate": 9.90727592954327e-06, "loss": 16.6761, "step": 32820 }, { "epoch": 0.593615871946768, "grad_norm": 42.96875, "learning_rate": 9.907247677236608e-06, "loss": 16.9836, "step": 32830 }, { "epoch": 0.5937966870158959, "grad_norm": 42.0, "learning_rate": 9.907219424929949e-06, "loss": 17.4699, "step": 32840 }, { "epoch": 0.5939775020850238, "grad_norm": 41.28125, "learning_rate": 9.90719117262329e-06, "loss": 17.0958, "step": 32850 }, { "epoch": 0.5941583171541516, "grad_norm": 41.34375, "learning_rate": 9.90716292031663e-06, "loss": 17.0023, "step": 32860 }, { "epoch": 0.5943391322232795, "grad_norm": 41.21875, "learning_rate": 9.90713466800997e-06, "loss": 16.894, "step": 32870 }, { "epoch": 0.5945199472924073, "grad_norm": 41.1875, "learning_rate": 9.907106415703311e-06, "loss": 17.3038, "step": 32880 }, { "epoch": 0.5947007623615352, "grad_norm": 41.5, "learning_rate": 9.907078163396652e-06, "loss": 16.8994, "step": 32890 }, { "epoch": 0.594881577430663, "grad_norm": 38.78125, "learning_rate": 9.907049911089993e-06, "loss": 17.6324, "step": 32900 }, { "epoch": 0.595062392499791, "grad_norm": 40.78125, "learning_rate": 9.907021658783332e-06, "loss": 16.7333, "step": 32910 }, { "epoch": 0.5952432075689188, "grad_norm": 43.0, "learning_rate": 9.906993406476672e-06, "loss": 16.869, "step": 32920 }, { "epoch": 0.5954240226380466, "grad_norm": 41.34375, "learning_rate": 9.906965154170013e-06, "loss": 16.7851, "step": 32930 }, { "epoch": 0.5956048377071745, "grad_norm": 40.8125, "learning_rate": 9.906936901863353e-06, "loss": 16.8765, "step": 32940 }, { "epoch": 0.5957856527763024, "grad_norm": 38.3125, "learning_rate": 9.906908649556694e-06, "loss": 17.119, "step": 32950 }, { "epoch": 0.5959664678454303, "grad_norm": 44.8125, "learning_rate": 9.906880397250033e-06, "loss": 16.8493, "step": 32960 }, { "epoch": 0.5961472829145581, "grad_norm": 42.625, "learning_rate": 9.906852144943375e-06, "loss": 17.293, "step": 32970 }, { "epoch": 0.5963280979836859, "grad_norm": 41.15625, "learning_rate": 9.906823892636716e-06, "loss": 16.4654, "step": 32980 }, { "epoch": 0.5965089130528138, "grad_norm": 46.65625, "learning_rate": 9.906795640330057e-06, "loss": 16.9846, "step": 32990 }, { "epoch": 0.5966897281219417, "grad_norm": 38.6875, "learning_rate": 9.906767388023396e-06, "loss": 17.0765, "step": 33000 }, { "epoch": 0.5968705431910696, "grad_norm": 45.40625, "learning_rate": 9.906739135716736e-06, "loss": 17.217, "step": 33010 }, { "epoch": 0.5970513582601974, "grad_norm": 41.4375, "learning_rate": 9.906710883410077e-06, "loss": 16.6237, "step": 33020 }, { "epoch": 0.5972321733293252, "grad_norm": 44.125, "learning_rate": 9.906682631103417e-06, "loss": 16.7351, "step": 33030 }, { "epoch": 0.5974129883984531, "grad_norm": 42.6875, "learning_rate": 9.906654378796758e-06, "loss": 16.7977, "step": 33040 }, { "epoch": 0.597593803467581, "grad_norm": 44.34375, "learning_rate": 9.906626126490097e-06, "loss": 17.443, "step": 33050 }, { "epoch": 0.5977746185367089, "grad_norm": 42.5625, "learning_rate": 9.90659787418344e-06, "loss": 16.9387, "step": 33060 }, { "epoch": 0.5979554336058367, "grad_norm": 38.90625, "learning_rate": 9.90656962187678e-06, "loss": 16.9259, "step": 33070 }, { "epoch": 0.5981362486749646, "grad_norm": 39.4375, "learning_rate": 9.906541369570119e-06, "loss": 16.7404, "step": 33080 }, { "epoch": 0.5983170637440924, "grad_norm": 42.65625, "learning_rate": 9.90651311726346e-06, "loss": 16.6768, "step": 33090 }, { "epoch": 0.5984978788132203, "grad_norm": 42.71875, "learning_rate": 9.9064848649568e-06, "loss": 16.6058, "step": 33100 }, { "epoch": 0.5986786938823482, "grad_norm": 42.65625, "learning_rate": 9.90645661265014e-06, "loss": 16.9653, "step": 33110 }, { "epoch": 0.598859508951476, "grad_norm": 41.34375, "learning_rate": 9.906428360343481e-06, "loss": 16.6989, "step": 33120 }, { "epoch": 0.5990403240206039, "grad_norm": 43.03125, "learning_rate": 9.906400108036822e-06, "loss": 16.8293, "step": 33130 }, { "epoch": 0.5992211390897317, "grad_norm": 42.25, "learning_rate": 9.90637185573016e-06, "loss": 16.9864, "step": 33140 }, { "epoch": 0.5994019541588596, "grad_norm": 40.78125, "learning_rate": 9.906343603423503e-06, "loss": 17.4234, "step": 33150 }, { "epoch": 0.5995827692279875, "grad_norm": 40.34375, "learning_rate": 9.906315351116844e-06, "loss": 17.622, "step": 33160 }, { "epoch": 0.5997635842971153, "grad_norm": 44.0625, "learning_rate": 9.906287098810183e-06, "loss": 17.2536, "step": 33170 }, { "epoch": 0.5999443993662432, "grad_norm": 43.9375, "learning_rate": 9.906258846503523e-06, "loss": 17.4132, "step": 33180 }, { "epoch": 0.600125214435371, "grad_norm": 41.90625, "learning_rate": 9.906230594196864e-06, "loss": 17.1735, "step": 33190 }, { "epoch": 0.6003060295044989, "grad_norm": 40.09375, "learning_rate": 9.906202341890205e-06, "loss": 16.8919, "step": 33200 }, { "epoch": 0.6004868445736268, "grad_norm": 41.6875, "learning_rate": 9.906174089583545e-06, "loss": 16.8507, "step": 33210 }, { "epoch": 0.6006676596427546, "grad_norm": 42.21875, "learning_rate": 9.906145837276884e-06, "loss": 16.8082, "step": 33220 }, { "epoch": 0.6008484747118825, "grad_norm": 42.34375, "learning_rate": 9.906117584970226e-06, "loss": 17.3458, "step": 33230 }, { "epoch": 0.6010292897810103, "grad_norm": 41.0, "learning_rate": 9.906089332663567e-06, "loss": 17.2312, "step": 33240 }, { "epoch": 0.6012101048501383, "grad_norm": 39.375, "learning_rate": 9.906061080356908e-06, "loss": 16.5374, "step": 33250 }, { "epoch": 0.6013909199192661, "grad_norm": 41.375, "learning_rate": 9.906032828050247e-06, "loss": 16.3421, "step": 33260 }, { "epoch": 0.6015717349883939, "grad_norm": 45.34375, "learning_rate": 9.906004575743587e-06, "loss": 16.4727, "step": 33270 }, { "epoch": 0.6017525500575218, "grad_norm": 43.03125, "learning_rate": 9.905976323436928e-06, "loss": 16.9591, "step": 33280 }, { "epoch": 0.6019333651266496, "grad_norm": 41.875, "learning_rate": 9.905948071130268e-06, "loss": 17.0089, "step": 33290 }, { "epoch": 0.6021141801957776, "grad_norm": 44.4375, "learning_rate": 9.905919818823609e-06, "loss": 17.0707, "step": 33300 }, { "epoch": 0.6022949952649054, "grad_norm": 42.1875, "learning_rate": 9.905891566516948e-06, "loss": 17.0987, "step": 33310 }, { "epoch": 0.6024758103340332, "grad_norm": 43.5, "learning_rate": 9.90586331421029e-06, "loss": 17.0295, "step": 33320 }, { "epoch": 0.6026566254031611, "grad_norm": 44.21875, "learning_rate": 9.905835061903631e-06, "loss": 16.9252, "step": 33330 }, { "epoch": 0.6028374404722889, "grad_norm": 42.78125, "learning_rate": 9.90580680959697e-06, "loss": 16.6911, "step": 33340 }, { "epoch": 0.6030182555414169, "grad_norm": 44.1875, "learning_rate": 9.90577855729031e-06, "loss": 17.1652, "step": 33350 }, { "epoch": 0.6031990706105447, "grad_norm": 42.03125, "learning_rate": 9.905750304983651e-06, "loss": 16.8341, "step": 33360 }, { "epoch": 0.6033798856796725, "grad_norm": 44.09375, "learning_rate": 9.905722052676992e-06, "loss": 16.6755, "step": 33370 }, { "epoch": 0.6035607007488004, "grad_norm": 44.25, "learning_rate": 9.905693800370332e-06, "loss": 16.725, "step": 33380 }, { "epoch": 0.6037415158179282, "grad_norm": 41.96875, "learning_rate": 9.905665548063671e-06, "loss": 16.9334, "step": 33390 }, { "epoch": 0.6039223308870562, "grad_norm": 42.71875, "learning_rate": 9.905637295757012e-06, "loss": 16.5482, "step": 33400 }, { "epoch": 0.604103145956184, "grad_norm": 40.34375, "learning_rate": 9.905609043450354e-06, "loss": 16.8465, "step": 33410 }, { "epoch": 0.6042839610253119, "grad_norm": 41.59375, "learning_rate": 9.905580791143695e-06, "loss": 16.6368, "step": 33420 }, { "epoch": 0.6044647760944397, "grad_norm": 41.75, "learning_rate": 9.905552538837034e-06, "loss": 16.757, "step": 33430 }, { "epoch": 0.6046455911635675, "grad_norm": 42.8125, "learning_rate": 9.905524286530374e-06, "loss": 17.0386, "step": 33440 }, { "epoch": 0.6048264062326955, "grad_norm": 41.84375, "learning_rate": 9.905496034223715e-06, "loss": 16.8365, "step": 33450 }, { "epoch": 0.6050072213018233, "grad_norm": 40.75, "learning_rate": 9.905467781917056e-06, "loss": 17.0499, "step": 33460 }, { "epoch": 0.6051880363709512, "grad_norm": 41.4375, "learning_rate": 9.905439529610396e-06, "loss": 17.1388, "step": 33470 }, { "epoch": 0.605368851440079, "grad_norm": 42.3125, "learning_rate": 9.905411277303735e-06, "loss": 17.1348, "step": 33480 }, { "epoch": 0.6055496665092068, "grad_norm": 39.34375, "learning_rate": 9.905383024997076e-06, "loss": 16.5983, "step": 33490 }, { "epoch": 0.6057304815783348, "grad_norm": 40.09375, "learning_rate": 9.905354772690418e-06, "loss": 17.5614, "step": 33500 }, { "epoch": 0.6059112966474626, "grad_norm": 41.4375, "learning_rate": 9.905326520383757e-06, "loss": 16.7848, "step": 33510 }, { "epoch": 0.6060921117165905, "grad_norm": 40.0, "learning_rate": 9.905298268077098e-06, "loss": 16.9091, "step": 33520 }, { "epoch": 0.6062729267857183, "grad_norm": 43.6875, "learning_rate": 9.905270015770438e-06, "loss": 16.7644, "step": 33530 }, { "epoch": 0.6064537418548461, "grad_norm": 43.25, "learning_rate": 9.905241763463779e-06, "loss": 16.7903, "step": 33540 }, { "epoch": 0.606634556923974, "grad_norm": 44.875, "learning_rate": 9.90521351115712e-06, "loss": 16.7235, "step": 33550 }, { "epoch": 0.6068153719931019, "grad_norm": 42.03125, "learning_rate": 9.905185258850458e-06, "loss": 16.9318, "step": 33560 }, { "epoch": 0.6069961870622298, "grad_norm": 43.1875, "learning_rate": 9.905157006543799e-06, "loss": 16.8678, "step": 33570 }, { "epoch": 0.6071770021313576, "grad_norm": 40.375, "learning_rate": 9.905128754237141e-06, "loss": 16.9585, "step": 33580 }, { "epoch": 0.6073578172004855, "grad_norm": 42.28125, "learning_rate": 9.905100501930482e-06, "loss": 16.8712, "step": 33590 }, { "epoch": 0.6075386322696134, "grad_norm": 42.46875, "learning_rate": 9.905072249623821e-06, "loss": 16.741, "step": 33600 }, { "epoch": 0.6077194473387412, "grad_norm": 42.40625, "learning_rate": 9.905043997317162e-06, "loss": 16.962, "step": 33610 }, { "epoch": 0.6079002624078691, "grad_norm": 41.25, "learning_rate": 9.905015745010502e-06, "loss": 16.7619, "step": 33620 }, { "epoch": 0.6080810774769969, "grad_norm": 42.375, "learning_rate": 9.904987492703843e-06, "loss": 16.7792, "step": 33630 }, { "epoch": 0.6082618925461248, "grad_norm": 44.28125, "learning_rate": 9.904959240397183e-06, "loss": 17.2013, "step": 33640 }, { "epoch": 0.6084427076152527, "grad_norm": 42.90625, "learning_rate": 9.904930988090522e-06, "loss": 17.1005, "step": 33650 }, { "epoch": 0.6086235226843805, "grad_norm": 42.90625, "learning_rate": 9.904902735783863e-06, "loss": 17.0336, "step": 33660 }, { "epoch": 0.6088043377535084, "grad_norm": 41.75, "learning_rate": 9.904874483477205e-06, "loss": 16.9484, "step": 33670 }, { "epoch": 0.6089851528226362, "grad_norm": 42.59375, "learning_rate": 9.904846231170544e-06, "loss": 16.7472, "step": 33680 }, { "epoch": 0.6091659678917641, "grad_norm": 41.34375, "learning_rate": 9.904817978863885e-06, "loss": 16.9834, "step": 33690 }, { "epoch": 0.609346782960892, "grad_norm": 43.96875, "learning_rate": 9.904789726557226e-06, "loss": 16.5895, "step": 33700 }, { "epoch": 0.6095275980300198, "grad_norm": 42.34375, "learning_rate": 9.904761474250566e-06, "loss": 16.9442, "step": 33710 }, { "epoch": 0.6097084130991477, "grad_norm": 43.75, "learning_rate": 9.904733221943907e-06, "loss": 17.1539, "step": 33720 }, { "epoch": 0.6098892281682755, "grad_norm": 39.625, "learning_rate": 9.904704969637247e-06, "loss": 17.1007, "step": 33730 }, { "epoch": 0.6100700432374034, "grad_norm": 42.1875, "learning_rate": 9.904676717330586e-06, "loss": 16.9644, "step": 33740 }, { "epoch": 0.6102508583065313, "grad_norm": 42.09375, "learning_rate": 9.904648465023927e-06, "loss": 16.9215, "step": 33750 }, { "epoch": 0.6104316733756592, "grad_norm": 43.15625, "learning_rate": 9.90462021271727e-06, "loss": 17.2342, "step": 33760 }, { "epoch": 0.610612488444787, "grad_norm": 42.3125, "learning_rate": 9.904591960410608e-06, "loss": 17.427, "step": 33770 }, { "epoch": 0.6107933035139148, "grad_norm": 39.9375, "learning_rate": 9.904563708103949e-06, "loss": 17.1716, "step": 33780 }, { "epoch": 0.6109741185830427, "grad_norm": 43.0625, "learning_rate": 9.90453545579729e-06, "loss": 17.059, "step": 33790 }, { "epoch": 0.6111549336521706, "grad_norm": 39.6875, "learning_rate": 9.90450720349063e-06, "loss": 17.2268, "step": 33800 }, { "epoch": 0.6113357487212985, "grad_norm": 43.15625, "learning_rate": 9.90447895118397e-06, "loss": 16.9237, "step": 33810 }, { "epoch": 0.6115165637904263, "grad_norm": 41.78125, "learning_rate": 9.90445069887731e-06, "loss": 17.0366, "step": 33820 }, { "epoch": 0.6116973788595541, "grad_norm": 41.8125, "learning_rate": 9.90442244657065e-06, "loss": 16.9747, "step": 33830 }, { "epoch": 0.611878193928682, "grad_norm": 43.90625, "learning_rate": 9.90439419426399e-06, "loss": 16.7878, "step": 33840 }, { "epoch": 0.6120590089978099, "grad_norm": 44.96875, "learning_rate": 9.904365941957333e-06, "loss": 16.8532, "step": 33850 }, { "epoch": 0.6122398240669378, "grad_norm": 41.6875, "learning_rate": 9.904337689650672e-06, "loss": 16.7214, "step": 33860 }, { "epoch": 0.6124206391360656, "grad_norm": 43.15625, "learning_rate": 9.904309437344013e-06, "loss": 16.9797, "step": 33870 }, { "epoch": 0.6126014542051934, "grad_norm": 41.75, "learning_rate": 9.904281185037353e-06, "loss": 16.8734, "step": 33880 }, { "epoch": 0.6127822692743213, "grad_norm": 42.34375, "learning_rate": 9.904252932730694e-06, "loss": 17.0882, "step": 33890 }, { "epoch": 0.6129630843434491, "grad_norm": 42.34375, "learning_rate": 9.904224680424035e-06, "loss": 16.4828, "step": 33900 }, { "epoch": 0.6131438994125771, "grad_norm": 41.59375, "learning_rate": 9.904196428117374e-06, "loss": 16.8991, "step": 33910 }, { "epoch": 0.6133247144817049, "grad_norm": 42.90625, "learning_rate": 9.904168175810714e-06, "loss": 16.8868, "step": 33920 }, { "epoch": 0.6135055295508328, "grad_norm": 40.625, "learning_rate": 9.904139923504056e-06, "loss": 16.8543, "step": 33930 }, { "epoch": 0.6136863446199606, "grad_norm": 42.9375, "learning_rate": 9.904111671197395e-06, "loss": 17.1237, "step": 33940 }, { "epoch": 0.6138671596890884, "grad_norm": 41.25, "learning_rate": 9.904083418890736e-06, "loss": 17.0901, "step": 33950 }, { "epoch": 0.6140479747582164, "grad_norm": 43.125, "learning_rate": 9.904055166584077e-06, "loss": 17.1514, "step": 33960 }, { "epoch": 0.6142287898273442, "grad_norm": 43.875, "learning_rate": 9.904026914277417e-06, "loss": 17.3739, "step": 33970 }, { "epoch": 0.6144096048964721, "grad_norm": 40.78125, "learning_rate": 9.903998661970758e-06, "loss": 17.0267, "step": 33980 }, { "epoch": 0.6145904199655999, "grad_norm": 41.0625, "learning_rate": 9.903970409664097e-06, "loss": 16.9841, "step": 33990 }, { "epoch": 0.6147712350347277, "grad_norm": 40.90625, "learning_rate": 9.903942157357437e-06, "loss": 16.8561, "step": 34000 }, { "epoch": 0.6149520501038557, "grad_norm": 39.78125, "learning_rate": 9.903913905050778e-06, "loss": 17.2727, "step": 34010 }, { "epoch": 0.6151328651729835, "grad_norm": 42.1875, "learning_rate": 9.90388565274412e-06, "loss": 16.6296, "step": 34020 }, { "epoch": 0.6153136802421114, "grad_norm": 41.71875, "learning_rate": 9.90385740043746e-06, "loss": 16.8987, "step": 34030 }, { "epoch": 0.6154944953112392, "grad_norm": 40.25, "learning_rate": 9.9038291481308e-06, "loss": 17.0696, "step": 34040 }, { "epoch": 0.615675310380367, "grad_norm": 42.96875, "learning_rate": 9.90380089582414e-06, "loss": 17.1479, "step": 34050 }, { "epoch": 0.615856125449495, "grad_norm": 41.21875, "learning_rate": 9.903772643517481e-06, "loss": 17.0279, "step": 34060 }, { "epoch": 0.6160369405186228, "grad_norm": 41.65625, "learning_rate": 9.903744391210822e-06, "loss": 17.0861, "step": 34070 }, { "epoch": 0.6162177555877507, "grad_norm": 41.25, "learning_rate": 9.90371613890416e-06, "loss": 17.1457, "step": 34080 }, { "epoch": 0.6163985706568785, "grad_norm": 42.21875, "learning_rate": 9.903687886597501e-06, "loss": 16.8054, "step": 34090 }, { "epoch": 0.6165793857260065, "grad_norm": 39.71875, "learning_rate": 9.903659634290842e-06, "loss": 16.8803, "step": 34100 }, { "epoch": 0.6167602007951343, "grad_norm": 40.0, "learning_rate": 9.903631381984183e-06, "loss": 17.2851, "step": 34110 }, { "epoch": 0.6169410158642621, "grad_norm": 42.15625, "learning_rate": 9.903603129677523e-06, "loss": 17.0829, "step": 34120 }, { "epoch": 0.61712183093339, "grad_norm": 43.625, "learning_rate": 9.903574877370864e-06, "loss": 16.758, "step": 34130 }, { "epoch": 0.6173026460025178, "grad_norm": 39.09375, "learning_rate": 9.903546625064204e-06, "loss": 17.0564, "step": 34140 }, { "epoch": 0.6174834610716458, "grad_norm": 41.625, "learning_rate": 9.903518372757545e-06, "loss": 16.8258, "step": 34150 }, { "epoch": 0.6176642761407736, "grad_norm": 44.84375, "learning_rate": 9.903490120450886e-06, "loss": 17.0703, "step": 34160 }, { "epoch": 0.6178450912099014, "grad_norm": 43.8125, "learning_rate": 9.903461868144225e-06, "loss": 16.8211, "step": 34170 }, { "epoch": 0.6180259062790293, "grad_norm": 40.15625, "learning_rate": 9.903433615837565e-06, "loss": 16.6875, "step": 34180 }, { "epoch": 0.6182067213481571, "grad_norm": 41.90625, "learning_rate": 9.903405363530906e-06, "loss": 16.5914, "step": 34190 }, { "epoch": 0.618387536417285, "grad_norm": 39.875, "learning_rate": 9.903377111224246e-06, "loss": 16.8584, "step": 34200 }, { "epoch": 0.6185683514864129, "grad_norm": 41.65625, "learning_rate": 9.903348858917587e-06, "loss": 16.7128, "step": 34210 }, { "epoch": 0.6187491665555407, "grad_norm": 41.875, "learning_rate": 9.903320606610928e-06, "loss": 16.6644, "step": 34220 }, { "epoch": 0.6189299816246686, "grad_norm": 40.96875, "learning_rate": 9.903292354304268e-06, "loss": 16.4386, "step": 34230 }, { "epoch": 0.6191107966937964, "grad_norm": 40.71875, "learning_rate": 9.903264101997609e-06, "loss": 16.9011, "step": 34240 }, { "epoch": 0.6192916117629244, "grad_norm": 41.0625, "learning_rate": 9.903235849690948e-06, "loss": 16.8479, "step": 34250 }, { "epoch": 0.6194724268320522, "grad_norm": 42.53125, "learning_rate": 9.903207597384289e-06, "loss": 17.3777, "step": 34260 }, { "epoch": 0.6196532419011801, "grad_norm": 45.84375, "learning_rate": 9.903179345077629e-06, "loss": 17.1973, "step": 34270 }, { "epoch": 0.6198340569703079, "grad_norm": 41.6875, "learning_rate": 9.903151092770971e-06, "loss": 17.3628, "step": 34280 }, { "epoch": 0.6200148720394357, "grad_norm": 41.46875, "learning_rate": 9.90312284046431e-06, "loss": 17.0504, "step": 34290 }, { "epoch": 0.6201956871085637, "grad_norm": 42.21875, "learning_rate": 9.903094588157651e-06, "loss": 17.6827, "step": 34300 }, { "epoch": 0.6203765021776915, "grad_norm": 42.96875, "learning_rate": 9.903066335850992e-06, "loss": 16.3788, "step": 34310 }, { "epoch": 0.6205573172468194, "grad_norm": 42.0625, "learning_rate": 9.903038083544332e-06, "loss": 16.5896, "step": 34320 }, { "epoch": 0.6207381323159472, "grad_norm": 41.46875, "learning_rate": 9.903009831237673e-06, "loss": 16.5937, "step": 34330 }, { "epoch": 0.620918947385075, "grad_norm": 39.8125, "learning_rate": 9.902981578931012e-06, "loss": 16.8482, "step": 34340 }, { "epoch": 0.621099762454203, "grad_norm": 42.21875, "learning_rate": 9.902953326624352e-06, "loss": 16.9854, "step": 34350 }, { "epoch": 0.6212805775233308, "grad_norm": 44.625, "learning_rate": 9.902925074317693e-06, "loss": 16.7246, "step": 34360 }, { "epoch": 0.6214613925924587, "grad_norm": 43.78125, "learning_rate": 9.902896822011034e-06, "loss": 16.9704, "step": 34370 }, { "epoch": 0.6216422076615865, "grad_norm": 38.78125, "learning_rate": 9.902868569704374e-06, "loss": 16.8478, "step": 34380 }, { "epoch": 0.6218230227307143, "grad_norm": 44.34375, "learning_rate": 9.902840317397715e-06, "loss": 16.8959, "step": 34390 }, { "epoch": 0.6220038377998423, "grad_norm": 42.25, "learning_rate": 9.902812065091056e-06, "loss": 16.4676, "step": 34400 }, { "epoch": 0.6221846528689701, "grad_norm": 40.59375, "learning_rate": 9.902783812784396e-06, "loss": 16.6884, "step": 34410 }, { "epoch": 0.622365467938098, "grad_norm": 44.34375, "learning_rate": 9.902755560477735e-06, "loss": 17.0956, "step": 34420 }, { "epoch": 0.6225462830072258, "grad_norm": 43.15625, "learning_rate": 9.902727308171076e-06, "loss": 16.7984, "step": 34430 }, { "epoch": 0.6227270980763537, "grad_norm": 40.53125, "learning_rate": 9.902699055864416e-06, "loss": 16.6023, "step": 34440 }, { "epoch": 0.6229079131454816, "grad_norm": 44.53125, "learning_rate": 9.902670803557757e-06, "loss": 16.9605, "step": 34450 }, { "epoch": 0.6230887282146094, "grad_norm": 41.0625, "learning_rate": 9.902642551251098e-06, "loss": 17.1605, "step": 34460 }, { "epoch": 0.6232695432837373, "grad_norm": 42.03125, "learning_rate": 9.902614298944438e-06, "loss": 16.8769, "step": 34470 }, { "epoch": 0.6234503583528651, "grad_norm": 40.8125, "learning_rate": 9.902586046637779e-06, "loss": 17.3661, "step": 34480 }, { "epoch": 0.623631173421993, "grad_norm": 41.59375, "learning_rate": 9.90255779433112e-06, "loss": 16.8391, "step": 34490 }, { "epoch": 0.6238119884911209, "grad_norm": 41.625, "learning_rate": 9.90252954202446e-06, "loss": 17.2878, "step": 34500 }, { "epoch": 0.6239928035602487, "grad_norm": 39.0625, "learning_rate": 9.902501289717799e-06, "loss": 17.1672, "step": 34510 }, { "epoch": 0.6241736186293766, "grad_norm": 41.21875, "learning_rate": 9.90247303741114e-06, "loss": 17.1609, "step": 34520 }, { "epoch": 0.6243544336985044, "grad_norm": 42.125, "learning_rate": 9.90244478510448e-06, "loss": 17.036, "step": 34530 }, { "epoch": 0.6245352487676323, "grad_norm": 42.40625, "learning_rate": 9.902416532797821e-06, "loss": 16.9945, "step": 34540 }, { "epoch": 0.6247160638367601, "grad_norm": 40.09375, "learning_rate": 9.902388280491161e-06, "loss": 16.446, "step": 34550 }, { "epoch": 0.624896878905888, "grad_norm": 42.6875, "learning_rate": 9.902360028184502e-06, "loss": 16.5309, "step": 34560 }, { "epoch": 0.6250776939750159, "grad_norm": 41.4375, "learning_rate": 9.902331775877843e-06, "loss": 17.0712, "step": 34570 }, { "epoch": 0.6252585090441437, "grad_norm": 40.9375, "learning_rate": 9.902303523571183e-06, "loss": 16.878, "step": 34580 }, { "epoch": 0.6254393241132716, "grad_norm": 43.28125, "learning_rate": 9.902275271264524e-06, "loss": 16.8346, "step": 34590 }, { "epoch": 0.6256201391823994, "grad_norm": 40.96875, "learning_rate": 9.902247018957863e-06, "loss": 16.9437, "step": 34600 }, { "epoch": 0.6258009542515274, "grad_norm": 43.0625, "learning_rate": 9.902218766651204e-06, "loss": 17.3732, "step": 34610 }, { "epoch": 0.6259817693206552, "grad_norm": 42.03125, "learning_rate": 9.902190514344544e-06, "loss": 17.4373, "step": 34620 }, { "epoch": 0.626162584389783, "grad_norm": 40.25, "learning_rate": 9.902162262037885e-06, "loss": 16.7355, "step": 34630 }, { "epoch": 0.6263433994589109, "grad_norm": 43.03125, "learning_rate": 9.902134009731225e-06, "loss": 16.743, "step": 34640 }, { "epoch": 0.6265242145280387, "grad_norm": 42.5625, "learning_rate": 9.902105757424566e-06, "loss": 16.8201, "step": 34650 }, { "epoch": 0.6267050295971667, "grad_norm": 43.625, "learning_rate": 9.902077505117907e-06, "loss": 17.0255, "step": 34660 }, { "epoch": 0.6268858446662945, "grad_norm": 41.6875, "learning_rate": 9.902049252811247e-06, "loss": 16.6278, "step": 34670 }, { "epoch": 0.6270666597354223, "grad_norm": 43.15625, "learning_rate": 9.902021000504586e-06, "loss": 16.7172, "step": 34680 }, { "epoch": 0.6272474748045502, "grad_norm": 42.90625, "learning_rate": 9.901992748197927e-06, "loss": 17.1172, "step": 34690 }, { "epoch": 0.627428289873678, "grad_norm": 38.4375, "learning_rate": 9.901964495891267e-06, "loss": 17.0091, "step": 34700 }, { "epoch": 0.627609104942806, "grad_norm": 43.625, "learning_rate": 9.901936243584608e-06, "loss": 16.8104, "step": 34710 }, { "epoch": 0.6277899200119338, "grad_norm": 40.1875, "learning_rate": 9.901907991277949e-06, "loss": 16.8872, "step": 34720 }, { "epoch": 0.6279707350810616, "grad_norm": 41.78125, "learning_rate": 9.90187973897129e-06, "loss": 17.1016, "step": 34730 }, { "epoch": 0.6281515501501895, "grad_norm": 41.59375, "learning_rate": 9.90185148666463e-06, "loss": 17.1182, "step": 34740 }, { "epoch": 0.6283323652193173, "grad_norm": 43.78125, "learning_rate": 9.90182323435797e-06, "loss": 16.9325, "step": 34750 }, { "epoch": 0.6285131802884453, "grad_norm": 43.8125, "learning_rate": 9.901794982051311e-06, "loss": 16.7678, "step": 34760 }, { "epoch": 0.6286939953575731, "grad_norm": 44.0, "learning_rate": 9.90176672974465e-06, "loss": 16.7194, "step": 34770 }, { "epoch": 0.628874810426701, "grad_norm": 41.4375, "learning_rate": 9.90173847743799e-06, "loss": 16.9852, "step": 34780 }, { "epoch": 0.6290556254958288, "grad_norm": 40.34375, "learning_rate": 9.901710225131331e-06, "loss": 17.3694, "step": 34790 }, { "epoch": 0.6292364405649566, "grad_norm": 45.625, "learning_rate": 9.901681972824672e-06, "loss": 16.7443, "step": 34800 }, { "epoch": 0.6294172556340846, "grad_norm": 42.53125, "learning_rate": 9.901653720518013e-06, "loss": 16.5887, "step": 34810 }, { "epoch": 0.6295980707032124, "grad_norm": 43.125, "learning_rate": 9.901625468211353e-06, "loss": 16.9845, "step": 34820 }, { "epoch": 0.6297788857723403, "grad_norm": 39.9375, "learning_rate": 9.901597215904694e-06, "loss": 16.8, "step": 34830 }, { "epoch": 0.6299597008414681, "grad_norm": 41.6875, "learning_rate": 9.901568963598034e-06, "loss": 17.1302, "step": 34840 }, { "epoch": 0.6301405159105959, "grad_norm": 43.90625, "learning_rate": 9.901540711291373e-06, "loss": 16.5467, "step": 34850 }, { "epoch": 0.6303213309797239, "grad_norm": 43.84375, "learning_rate": 9.901512458984714e-06, "loss": 16.4088, "step": 34860 }, { "epoch": 0.6305021460488517, "grad_norm": 42.09375, "learning_rate": 9.901484206678055e-06, "loss": 16.5629, "step": 34870 }, { "epoch": 0.6306829611179796, "grad_norm": 43.9375, "learning_rate": 9.901455954371395e-06, "loss": 16.8379, "step": 34880 }, { "epoch": 0.6308637761871074, "grad_norm": 43.03125, "learning_rate": 9.901427702064736e-06, "loss": 16.6736, "step": 34890 }, { "epoch": 0.6310445912562352, "grad_norm": 42.5, "learning_rate": 9.901399449758076e-06, "loss": 16.9239, "step": 34900 }, { "epoch": 0.6312254063253632, "grad_norm": 43.96875, "learning_rate": 9.901371197451417e-06, "loss": 17.1634, "step": 34910 }, { "epoch": 0.631406221394491, "grad_norm": 40.78125, "learning_rate": 9.901342945144758e-06, "loss": 17.1069, "step": 34920 }, { "epoch": 0.6315870364636189, "grad_norm": 42.875, "learning_rate": 9.901314692838098e-06, "loss": 16.9312, "step": 34930 }, { "epoch": 0.6317678515327467, "grad_norm": 41.09375, "learning_rate": 9.901286440531437e-06, "loss": 16.988, "step": 34940 }, { "epoch": 0.6319486666018747, "grad_norm": 43.1875, "learning_rate": 9.901258188224778e-06, "loss": 16.9073, "step": 34950 }, { "epoch": 0.6321294816710025, "grad_norm": 41.84375, "learning_rate": 9.901229935918119e-06, "loss": 17.0261, "step": 34960 }, { "epoch": 0.6323102967401303, "grad_norm": 41.8125, "learning_rate": 9.901201683611459e-06, "loss": 16.8868, "step": 34970 }, { "epoch": 0.6324911118092582, "grad_norm": 42.375, "learning_rate": 9.9011734313048e-06, "loss": 16.634, "step": 34980 }, { "epoch": 0.632671926878386, "grad_norm": 42.78125, "learning_rate": 9.90114517899814e-06, "loss": 16.5571, "step": 34990 }, { "epoch": 0.632852741947514, "grad_norm": 41.625, "learning_rate": 9.901116926691481e-06, "loss": 16.7899, "step": 35000 }, { "epoch": 0.632852741947514, "eval_loss": 2.1153111457824707, "eval_runtime": 229.0986, "eval_samples_per_second": 3169.199, "eval_steps_per_second": 49.52, "step": 35000 }, { "epoch": 0.6330335570166418, "grad_norm": 40.875, "learning_rate": 9.901088674384822e-06, "loss": 17.0118, "step": 35010 }, { "epoch": 0.6332143720857696, "grad_norm": 42.5, "learning_rate": 9.901060422078162e-06, "loss": 17.3374, "step": 35020 }, { "epoch": 0.6333951871548975, "grad_norm": 44.21875, "learning_rate": 9.901032169771501e-06, "loss": 17.1585, "step": 35030 }, { "epoch": 0.6335760022240253, "grad_norm": 43.28125, "learning_rate": 9.901003917464842e-06, "loss": 16.9927, "step": 35040 }, { "epoch": 0.6337568172931533, "grad_norm": 41.90625, "learning_rate": 9.900975665158182e-06, "loss": 16.7819, "step": 35050 }, { "epoch": 0.6339376323622811, "grad_norm": 42.65625, "learning_rate": 9.900947412851523e-06, "loss": 16.6033, "step": 35060 }, { "epoch": 0.6341184474314089, "grad_norm": 45.15625, "learning_rate": 9.900919160544864e-06, "loss": 16.9377, "step": 35070 }, { "epoch": 0.6342992625005368, "grad_norm": 41.34375, "learning_rate": 9.900890908238204e-06, "loss": 16.9437, "step": 35080 }, { "epoch": 0.6344800775696646, "grad_norm": 41.5625, "learning_rate": 9.900862655931545e-06, "loss": 16.9132, "step": 35090 }, { "epoch": 0.6346608926387926, "grad_norm": 39.625, "learning_rate": 9.900834403624886e-06, "loss": 16.9262, "step": 35100 }, { "epoch": 0.6348417077079204, "grad_norm": 43.03125, "learning_rate": 9.900806151318224e-06, "loss": 16.8212, "step": 35110 }, { "epoch": 0.6350225227770483, "grad_norm": 43.5625, "learning_rate": 9.900777899011565e-06, "loss": 16.7947, "step": 35120 }, { "epoch": 0.6352033378461761, "grad_norm": 39.59375, "learning_rate": 9.900749646704906e-06, "loss": 16.9076, "step": 35130 }, { "epoch": 0.6353841529153039, "grad_norm": 42.25, "learning_rate": 9.900721394398246e-06, "loss": 16.4483, "step": 35140 }, { "epoch": 0.6355649679844319, "grad_norm": 43.625, "learning_rate": 9.900693142091587e-06, "loss": 17.4467, "step": 35150 }, { "epoch": 0.6357457830535597, "grad_norm": 39.375, "learning_rate": 9.900664889784928e-06, "loss": 16.6712, "step": 35160 }, { "epoch": 0.6359265981226876, "grad_norm": 43.5625, "learning_rate": 9.900636637478268e-06, "loss": 16.6812, "step": 35170 }, { "epoch": 0.6361074131918154, "grad_norm": 43.25, "learning_rate": 9.900608385171609e-06, "loss": 16.7277, "step": 35180 }, { "epoch": 0.6362882282609432, "grad_norm": 42.75, "learning_rate": 9.90058013286495e-06, "loss": 17.0572, "step": 35190 }, { "epoch": 0.6364690433300711, "grad_norm": 39.90625, "learning_rate": 9.900551880558288e-06, "loss": 17.105, "step": 35200 }, { "epoch": 0.636649858399199, "grad_norm": 41.1875, "learning_rate": 9.900523628251629e-06, "loss": 16.6574, "step": 35210 }, { "epoch": 0.6368306734683269, "grad_norm": 42.03125, "learning_rate": 9.90049537594497e-06, "loss": 16.8217, "step": 35220 }, { "epoch": 0.6370114885374547, "grad_norm": 43.21875, "learning_rate": 9.90046712363831e-06, "loss": 16.7569, "step": 35230 }, { "epoch": 0.6371923036065825, "grad_norm": 42.21875, "learning_rate": 9.900438871331651e-06, "loss": 16.4877, "step": 35240 }, { "epoch": 0.6373731186757104, "grad_norm": 42.28125, "learning_rate": 9.900410619024991e-06, "loss": 16.7122, "step": 35250 }, { "epoch": 0.6375539337448383, "grad_norm": 40.65625, "learning_rate": 9.900382366718332e-06, "loss": 16.9515, "step": 35260 }, { "epoch": 0.6377347488139662, "grad_norm": 42.25, "learning_rate": 9.900354114411673e-06, "loss": 16.655, "step": 35270 }, { "epoch": 0.637915563883094, "grad_norm": 42.5625, "learning_rate": 9.900325862105012e-06, "loss": 16.847, "step": 35280 }, { "epoch": 0.6380963789522219, "grad_norm": 38.375, "learning_rate": 9.900297609798352e-06, "loss": 16.9473, "step": 35290 }, { "epoch": 0.6382771940213497, "grad_norm": 42.625, "learning_rate": 9.900269357491693e-06, "loss": 16.8541, "step": 35300 }, { "epoch": 0.6384580090904776, "grad_norm": 40.28125, "learning_rate": 9.900241105185034e-06, "loss": 16.9603, "step": 35310 }, { "epoch": 0.6386388241596055, "grad_norm": 42.21875, "learning_rate": 9.900212852878374e-06, "loss": 16.9738, "step": 35320 }, { "epoch": 0.6388196392287333, "grad_norm": 41.28125, "learning_rate": 9.900184600571715e-06, "loss": 17.0404, "step": 35330 }, { "epoch": 0.6390004542978612, "grad_norm": 43.46875, "learning_rate": 9.900156348265055e-06, "loss": 17.3818, "step": 35340 }, { "epoch": 0.639181269366989, "grad_norm": 40.90625, "learning_rate": 9.900128095958396e-06, "loss": 16.7372, "step": 35350 }, { "epoch": 0.6393620844361169, "grad_norm": 42.65625, "learning_rate": 9.900099843651737e-06, "loss": 16.9702, "step": 35360 }, { "epoch": 0.6395428995052448, "grad_norm": 45.375, "learning_rate": 9.900071591345076e-06, "loss": 17.1497, "step": 35370 }, { "epoch": 0.6397237145743726, "grad_norm": 43.3125, "learning_rate": 9.900043339038416e-06, "loss": 16.9083, "step": 35380 }, { "epoch": 0.6399045296435005, "grad_norm": 45.34375, "learning_rate": 9.900015086731757e-06, "loss": 16.762, "step": 35390 }, { "epoch": 0.6400853447126283, "grad_norm": 40.6875, "learning_rate": 9.899986834425097e-06, "loss": 17.2308, "step": 35400 }, { "epoch": 0.6402661597817562, "grad_norm": 43.96875, "learning_rate": 9.899958582118438e-06, "loss": 16.7875, "step": 35410 }, { "epoch": 0.6404469748508841, "grad_norm": 41.90625, "learning_rate": 9.899930329811779e-06, "loss": 16.6921, "step": 35420 }, { "epoch": 0.6406277899200119, "grad_norm": 43.15625, "learning_rate": 9.89990207750512e-06, "loss": 16.8197, "step": 35430 }, { "epoch": 0.6408086049891398, "grad_norm": 43.65625, "learning_rate": 9.89987382519846e-06, "loss": 16.9357, "step": 35440 }, { "epoch": 0.6409894200582676, "grad_norm": 43.40625, "learning_rate": 9.8998455728918e-06, "loss": 16.8973, "step": 35450 }, { "epoch": 0.6411702351273956, "grad_norm": 43.21875, "learning_rate": 9.89981732058514e-06, "loss": 16.7642, "step": 35460 }, { "epoch": 0.6413510501965234, "grad_norm": 46.84375, "learning_rate": 9.89978906827848e-06, "loss": 16.9361, "step": 35470 }, { "epoch": 0.6415318652656512, "grad_norm": 43.15625, "learning_rate": 9.89976081597182e-06, "loss": 17.1652, "step": 35480 }, { "epoch": 0.6417126803347791, "grad_norm": 43.71875, "learning_rate": 9.899732563665161e-06, "loss": 16.9174, "step": 35490 }, { "epoch": 0.6418934954039069, "grad_norm": 41.8125, "learning_rate": 9.899704311358502e-06, "loss": 16.7135, "step": 35500 }, { "epoch": 0.6420743104730349, "grad_norm": 38.71875, "learning_rate": 9.899676059051843e-06, "loss": 16.6854, "step": 35510 }, { "epoch": 0.6422551255421627, "grad_norm": 42.375, "learning_rate": 9.899647806745183e-06, "loss": 17.3483, "step": 35520 }, { "epoch": 0.6424359406112905, "grad_norm": 40.625, "learning_rate": 9.899619554438524e-06, "loss": 16.9488, "step": 35530 }, { "epoch": 0.6426167556804184, "grad_norm": 42.4375, "learning_rate": 9.899591302131863e-06, "loss": 17.0364, "step": 35540 }, { "epoch": 0.6427975707495462, "grad_norm": 44.1875, "learning_rate": 9.899563049825203e-06, "loss": 17.0453, "step": 35550 }, { "epoch": 0.6429783858186742, "grad_norm": 42.0, "learning_rate": 9.899534797518544e-06, "loss": 17.2179, "step": 35560 }, { "epoch": 0.643159200887802, "grad_norm": 42.40625, "learning_rate": 9.899506545211885e-06, "loss": 16.6237, "step": 35570 }, { "epoch": 0.6433400159569298, "grad_norm": 42.34375, "learning_rate": 9.899478292905225e-06, "loss": 16.5776, "step": 35580 }, { "epoch": 0.6435208310260577, "grad_norm": 43.28125, "learning_rate": 9.899450040598564e-06, "loss": 17.0083, "step": 35590 }, { "epoch": 0.6437016460951855, "grad_norm": 42.09375, "learning_rate": 9.899421788291906e-06, "loss": 16.8802, "step": 35600 }, { "epoch": 0.6438824611643135, "grad_norm": 43.15625, "learning_rate": 9.899393535985247e-06, "loss": 16.8426, "step": 35610 }, { "epoch": 0.6440632762334413, "grad_norm": 40.5, "learning_rate": 9.899365283678588e-06, "loss": 16.6048, "step": 35620 }, { "epoch": 0.6442440913025692, "grad_norm": 44.21875, "learning_rate": 9.899337031371927e-06, "loss": 16.8645, "step": 35630 }, { "epoch": 0.644424906371697, "grad_norm": 42.5, "learning_rate": 9.899308779065267e-06, "loss": 17.0983, "step": 35640 }, { "epoch": 0.6446057214408248, "grad_norm": 40.625, "learning_rate": 9.899280526758608e-06, "loss": 16.9962, "step": 35650 }, { "epoch": 0.6447865365099528, "grad_norm": 41.34375, "learning_rate": 9.899252274451949e-06, "loss": 16.5154, "step": 35660 }, { "epoch": 0.6449673515790806, "grad_norm": 41.53125, "learning_rate": 9.899224022145289e-06, "loss": 17.1147, "step": 35670 }, { "epoch": 0.6451481666482085, "grad_norm": 43.75, "learning_rate": 9.899195769838628e-06, "loss": 17.1466, "step": 35680 }, { "epoch": 0.6453289817173363, "grad_norm": 43.875, "learning_rate": 9.89916751753197e-06, "loss": 16.6516, "step": 35690 }, { "epoch": 0.6455097967864641, "grad_norm": 44.375, "learning_rate": 9.899139265225311e-06, "loss": 16.9173, "step": 35700 }, { "epoch": 0.6456906118555921, "grad_norm": 43.0625, "learning_rate": 9.89911101291865e-06, "loss": 16.9888, "step": 35710 }, { "epoch": 0.6458714269247199, "grad_norm": 40.125, "learning_rate": 9.89908276061199e-06, "loss": 16.8233, "step": 35720 }, { "epoch": 0.6460522419938478, "grad_norm": 44.1875, "learning_rate": 9.899054508305331e-06, "loss": 16.9166, "step": 35730 }, { "epoch": 0.6462330570629756, "grad_norm": 38.09375, "learning_rate": 9.899026255998672e-06, "loss": 16.7543, "step": 35740 }, { "epoch": 0.6464138721321034, "grad_norm": 41.96875, "learning_rate": 9.898998003692012e-06, "loss": 17.1185, "step": 35750 }, { "epoch": 0.6465946872012314, "grad_norm": 42.40625, "learning_rate": 9.898969751385353e-06, "loss": 16.4558, "step": 35760 }, { "epoch": 0.6467755022703592, "grad_norm": 44.28125, "learning_rate": 9.898941499078694e-06, "loss": 16.7642, "step": 35770 }, { "epoch": 0.6469563173394871, "grad_norm": 41.59375, "learning_rate": 9.898913246772034e-06, "loss": 16.9298, "step": 35780 }, { "epoch": 0.6471371324086149, "grad_norm": 42.5625, "learning_rate": 9.898884994465375e-06, "loss": 17.1463, "step": 35790 }, { "epoch": 0.6473179474777428, "grad_norm": 40.84375, "learning_rate": 9.898856742158714e-06, "loss": 17.0144, "step": 35800 }, { "epoch": 0.6474987625468707, "grad_norm": 42.59375, "learning_rate": 9.898828489852054e-06, "loss": 16.8369, "step": 35810 }, { "epoch": 0.6476795776159985, "grad_norm": 42.03125, "learning_rate": 9.898800237545395e-06, "loss": 16.9437, "step": 35820 }, { "epoch": 0.6478603926851264, "grad_norm": 42.28125, "learning_rate": 9.898771985238736e-06, "loss": 16.7855, "step": 35830 }, { "epoch": 0.6480412077542542, "grad_norm": 43.03125, "learning_rate": 9.898743732932076e-06, "loss": 16.9263, "step": 35840 }, { "epoch": 0.6482220228233821, "grad_norm": 45.625, "learning_rate": 9.898715480625415e-06, "loss": 16.818, "step": 35850 }, { "epoch": 0.64840283789251, "grad_norm": 39.59375, "learning_rate": 9.898687228318758e-06, "loss": 16.6835, "step": 35860 }, { "epoch": 0.6485836529616378, "grad_norm": 41.21875, "learning_rate": 9.898658976012098e-06, "loss": 16.8036, "step": 35870 }, { "epoch": 0.6487644680307657, "grad_norm": 41.5625, "learning_rate": 9.898630723705439e-06, "loss": 17.0221, "step": 35880 }, { "epoch": 0.6489452830998935, "grad_norm": 44.25, "learning_rate": 9.898602471398778e-06, "loss": 16.639, "step": 35890 }, { "epoch": 0.6491260981690214, "grad_norm": 47.71875, "learning_rate": 9.898574219092118e-06, "loss": 16.8011, "step": 35900 }, { "epoch": 0.6493069132381493, "grad_norm": 41.6875, "learning_rate": 9.898545966785459e-06, "loss": 16.8954, "step": 35910 }, { "epoch": 0.6494877283072771, "grad_norm": 41.6875, "learning_rate": 9.8985177144788e-06, "loss": 16.8521, "step": 35920 }, { "epoch": 0.649668543376405, "grad_norm": 42.84375, "learning_rate": 9.89848946217214e-06, "loss": 17.0667, "step": 35930 }, { "epoch": 0.6498493584455328, "grad_norm": 41.75, "learning_rate": 9.89846120986548e-06, "loss": 17.2669, "step": 35940 }, { "epoch": 0.6500301735146607, "grad_norm": 40.4375, "learning_rate": 9.898432957558821e-06, "loss": 16.4704, "step": 35950 }, { "epoch": 0.6502109885837886, "grad_norm": 45.40625, "learning_rate": 9.898404705252162e-06, "loss": 16.8687, "step": 35960 }, { "epoch": 0.6503918036529165, "grad_norm": 42.59375, "learning_rate": 9.898376452945501e-06, "loss": 17.2093, "step": 35970 }, { "epoch": 0.6505726187220443, "grad_norm": 43.125, "learning_rate": 9.898348200638842e-06, "loss": 17.1447, "step": 35980 }, { "epoch": 0.6507534337911721, "grad_norm": 46.625, "learning_rate": 9.898319948332182e-06, "loss": 16.7466, "step": 35990 }, { "epoch": 0.6509342488603, "grad_norm": 43.875, "learning_rate": 9.898291696025523e-06, "loss": 16.8183, "step": 36000 }, { "epoch": 0.6511150639294279, "grad_norm": 42.15625, "learning_rate": 9.898263443718864e-06, "loss": 17.1838, "step": 36010 }, { "epoch": 0.6512958789985558, "grad_norm": 42.6875, "learning_rate": 9.898235191412202e-06, "loss": 16.6245, "step": 36020 }, { "epoch": 0.6514766940676836, "grad_norm": 42.25, "learning_rate": 9.898206939105543e-06, "loss": 16.658, "step": 36030 }, { "epoch": 0.6516575091368114, "grad_norm": 45.125, "learning_rate": 9.898178686798885e-06, "loss": 17.396, "step": 36040 }, { "epoch": 0.6518383242059393, "grad_norm": 41.875, "learning_rate": 9.898150434492226e-06, "loss": 16.9268, "step": 36050 }, { "epoch": 0.6520191392750672, "grad_norm": 42.21875, "learning_rate": 9.898122182185565e-06, "loss": 16.686, "step": 36060 }, { "epoch": 0.6521999543441951, "grad_norm": 42.1875, "learning_rate": 9.898093929878906e-06, "loss": 16.705, "step": 36070 }, { "epoch": 0.6523807694133229, "grad_norm": 43.90625, "learning_rate": 9.898065677572246e-06, "loss": 16.6551, "step": 36080 }, { "epoch": 0.6525615844824507, "grad_norm": 43.28125, "learning_rate": 9.898037425265587e-06, "loss": 16.6608, "step": 36090 }, { "epoch": 0.6527423995515786, "grad_norm": 44.21875, "learning_rate": 9.898009172958927e-06, "loss": 17.0485, "step": 36100 }, { "epoch": 0.6529232146207065, "grad_norm": 42.0625, "learning_rate": 9.897980920652266e-06, "loss": 17.0704, "step": 36110 }, { "epoch": 0.6531040296898344, "grad_norm": 43.15625, "learning_rate": 9.897952668345609e-06, "loss": 16.8169, "step": 36120 }, { "epoch": 0.6532848447589622, "grad_norm": 42.5, "learning_rate": 9.89792441603895e-06, "loss": 16.8907, "step": 36130 }, { "epoch": 0.6534656598280901, "grad_norm": 42.8125, "learning_rate": 9.897896163732288e-06, "loss": 16.8032, "step": 36140 }, { "epoch": 0.6536464748972179, "grad_norm": 40.96875, "learning_rate": 9.897867911425629e-06, "loss": 16.6132, "step": 36150 }, { "epoch": 0.6538272899663458, "grad_norm": 41.53125, "learning_rate": 9.89783965911897e-06, "loss": 16.8102, "step": 36160 }, { "epoch": 0.6540081050354737, "grad_norm": 41.96875, "learning_rate": 9.89781140681231e-06, "loss": 17.1489, "step": 36170 }, { "epoch": 0.6541889201046015, "grad_norm": 44.90625, "learning_rate": 9.89778315450565e-06, "loss": 16.9509, "step": 36180 }, { "epoch": 0.6543697351737294, "grad_norm": 42.59375, "learning_rate": 9.89775490219899e-06, "loss": 17.1862, "step": 36190 }, { "epoch": 0.6545505502428572, "grad_norm": 42.78125, "learning_rate": 9.89772664989233e-06, "loss": 17.2136, "step": 36200 }, { "epoch": 0.6547313653119851, "grad_norm": 42.8125, "learning_rate": 9.897698397585673e-06, "loss": 16.7174, "step": 36210 }, { "epoch": 0.654912180381113, "grad_norm": 41.1875, "learning_rate": 9.897670145279013e-06, "loss": 16.752, "step": 36220 }, { "epoch": 0.6550929954502408, "grad_norm": 44.28125, "learning_rate": 9.897641892972352e-06, "loss": 16.7826, "step": 36230 }, { "epoch": 0.6552738105193687, "grad_norm": 42.875, "learning_rate": 9.897613640665693e-06, "loss": 16.906, "step": 36240 }, { "epoch": 0.6554546255884965, "grad_norm": 42.625, "learning_rate": 9.897585388359033e-06, "loss": 16.7243, "step": 36250 }, { "epoch": 0.6556354406576244, "grad_norm": 44.21875, "learning_rate": 9.897557136052374e-06, "loss": 17.1138, "step": 36260 }, { "epoch": 0.6558162557267523, "grad_norm": 43.8125, "learning_rate": 9.897528883745715e-06, "loss": 17.3506, "step": 36270 }, { "epoch": 0.6559970707958801, "grad_norm": 39.53125, "learning_rate": 9.897500631439054e-06, "loss": 17.1032, "step": 36280 }, { "epoch": 0.656177885865008, "grad_norm": 42.34375, "learning_rate": 9.897472379132394e-06, "loss": 16.862, "step": 36290 }, { "epoch": 0.6563587009341358, "grad_norm": 43.8125, "learning_rate": 9.897444126825736e-06, "loss": 17.4687, "step": 36300 }, { "epoch": 0.6565395160032637, "grad_norm": 42.71875, "learning_rate": 9.897415874519075e-06, "loss": 16.8392, "step": 36310 }, { "epoch": 0.6567203310723916, "grad_norm": 44.40625, "learning_rate": 9.897387622212416e-06, "loss": 17.142, "step": 36320 }, { "epoch": 0.6569011461415194, "grad_norm": 42.125, "learning_rate": 9.897359369905757e-06, "loss": 16.773, "step": 36330 }, { "epoch": 0.6570819612106473, "grad_norm": 41.375, "learning_rate": 9.897331117599097e-06, "loss": 16.9312, "step": 36340 }, { "epoch": 0.6572627762797751, "grad_norm": 44.90625, "learning_rate": 9.897302865292438e-06, "loss": 17.084, "step": 36350 }, { "epoch": 0.6574435913489031, "grad_norm": 46.78125, "learning_rate": 9.897274612985779e-06, "loss": 17.0247, "step": 36360 }, { "epoch": 0.6576244064180309, "grad_norm": 43.4375, "learning_rate": 9.897246360679117e-06, "loss": 16.9223, "step": 36370 }, { "epoch": 0.6578052214871587, "grad_norm": 40.59375, "learning_rate": 9.897218108372458e-06, "loss": 16.8791, "step": 36380 }, { "epoch": 0.6579860365562866, "grad_norm": 42.6875, "learning_rate": 9.8971898560658e-06, "loss": 17.133, "step": 36390 }, { "epoch": 0.6581668516254144, "grad_norm": 44.25, "learning_rate": 9.89716160375914e-06, "loss": 17.3585, "step": 36400 }, { "epoch": 0.6583476666945424, "grad_norm": 41.09375, "learning_rate": 9.89713335145248e-06, "loss": 16.954, "step": 36410 }, { "epoch": 0.6585284817636702, "grad_norm": 41.40625, "learning_rate": 9.89710509914582e-06, "loss": 16.6315, "step": 36420 }, { "epoch": 0.658709296832798, "grad_norm": 42.59375, "learning_rate": 9.897076846839161e-06, "loss": 17.1309, "step": 36430 }, { "epoch": 0.6588901119019259, "grad_norm": 43.75, "learning_rate": 9.897048594532502e-06, "loss": 16.5603, "step": 36440 }, { "epoch": 0.6590709269710537, "grad_norm": 43.3125, "learning_rate": 9.89702034222584e-06, "loss": 17.2705, "step": 36450 }, { "epoch": 0.6592517420401817, "grad_norm": 43.125, "learning_rate": 9.896992089919181e-06, "loss": 17.4744, "step": 36460 }, { "epoch": 0.6594325571093095, "grad_norm": 41.15625, "learning_rate": 9.896963837612524e-06, "loss": 16.9674, "step": 36470 }, { "epoch": 0.6596133721784373, "grad_norm": 44.5, "learning_rate": 9.896935585305864e-06, "loss": 17.0317, "step": 36480 }, { "epoch": 0.6597941872475652, "grad_norm": 42.71875, "learning_rate": 9.896907332999203e-06, "loss": 17.1149, "step": 36490 }, { "epoch": 0.659975002316693, "grad_norm": 43.75, "learning_rate": 9.896879080692544e-06, "loss": 16.7619, "step": 36500 }, { "epoch": 0.660155817385821, "grad_norm": 40.4375, "learning_rate": 9.896850828385884e-06, "loss": 16.9837, "step": 36510 }, { "epoch": 0.6603366324549488, "grad_norm": 43.4375, "learning_rate": 9.896822576079225e-06, "loss": 16.7384, "step": 36520 }, { "epoch": 0.6605174475240767, "grad_norm": 42.375, "learning_rate": 9.896794323772566e-06, "loss": 16.8103, "step": 36530 }, { "epoch": 0.6606982625932045, "grad_norm": 48.125, "learning_rate": 9.896766071465905e-06, "loss": 16.7246, "step": 36540 }, { "epoch": 0.6608790776623323, "grad_norm": 41.125, "learning_rate": 9.896737819159245e-06, "loss": 16.5504, "step": 36550 }, { "epoch": 0.6610598927314603, "grad_norm": 44.46875, "learning_rate": 9.896709566852588e-06, "loss": 17.1036, "step": 36560 }, { "epoch": 0.6612407078005881, "grad_norm": 42.0, "learning_rate": 9.896681314545927e-06, "loss": 16.7741, "step": 36570 }, { "epoch": 0.661421522869716, "grad_norm": 47.5, "learning_rate": 9.896653062239267e-06, "loss": 16.8923, "step": 36580 }, { "epoch": 0.6616023379388438, "grad_norm": 40.0625, "learning_rate": 9.896624809932608e-06, "loss": 16.797, "step": 36590 }, { "epoch": 0.6617831530079716, "grad_norm": 43.65625, "learning_rate": 9.896596557625948e-06, "loss": 16.9909, "step": 36600 }, { "epoch": 0.6619639680770996, "grad_norm": 41.40625, "learning_rate": 9.896568305319289e-06, "loss": 16.6533, "step": 36610 }, { "epoch": 0.6621447831462274, "grad_norm": 43.75, "learning_rate": 9.896540053012628e-06, "loss": 16.85, "step": 36620 }, { "epoch": 0.6623255982153553, "grad_norm": 45.125, "learning_rate": 9.896511800705969e-06, "loss": 16.742, "step": 36630 }, { "epoch": 0.6625064132844831, "grad_norm": 43.09375, "learning_rate": 9.89648354839931e-06, "loss": 16.9515, "step": 36640 }, { "epoch": 0.6626872283536109, "grad_norm": 42.75, "learning_rate": 9.896455296092652e-06, "loss": 17.0241, "step": 36650 }, { "epoch": 0.6628680434227389, "grad_norm": 43.65625, "learning_rate": 9.89642704378599e-06, "loss": 17.213, "step": 36660 }, { "epoch": 0.6630488584918667, "grad_norm": 44.09375, "learning_rate": 9.896398791479331e-06, "loss": 16.7815, "step": 36670 }, { "epoch": 0.6632296735609946, "grad_norm": 43.65625, "learning_rate": 9.896370539172672e-06, "loss": 17.3715, "step": 36680 }, { "epoch": 0.6634104886301224, "grad_norm": 45.6875, "learning_rate": 9.896342286866012e-06, "loss": 17.1651, "step": 36690 }, { "epoch": 0.6635913036992503, "grad_norm": 44.21875, "learning_rate": 9.896314034559353e-06, "loss": 17.0294, "step": 36700 }, { "epoch": 0.6637721187683782, "grad_norm": 42.03125, "learning_rate": 9.896285782252692e-06, "loss": 16.8683, "step": 36710 }, { "epoch": 0.663952933837506, "grad_norm": 43.6875, "learning_rate": 9.896257529946032e-06, "loss": 17.0183, "step": 36720 }, { "epoch": 0.6641337489066339, "grad_norm": 40.625, "learning_rate": 9.896229277639373e-06, "loss": 16.8197, "step": 36730 }, { "epoch": 0.6643145639757617, "grad_norm": 44.21875, "learning_rate": 9.896201025332714e-06, "loss": 16.8733, "step": 36740 }, { "epoch": 0.6644953790448896, "grad_norm": 40.78125, "learning_rate": 9.896172773026054e-06, "loss": 16.7025, "step": 36750 }, { "epoch": 0.6646761941140175, "grad_norm": 44.5625, "learning_rate": 9.896144520719395e-06, "loss": 16.9805, "step": 36760 }, { "epoch": 0.6648570091831453, "grad_norm": 42.28125, "learning_rate": 9.896116268412736e-06, "loss": 17.3065, "step": 36770 }, { "epoch": 0.6650378242522732, "grad_norm": 41.5625, "learning_rate": 9.896088016106076e-06, "loss": 17.0735, "step": 36780 }, { "epoch": 0.665218639321401, "grad_norm": 42.0625, "learning_rate": 9.896059763799417e-06, "loss": 17.0477, "step": 36790 }, { "epoch": 0.6653994543905289, "grad_norm": 43.09375, "learning_rate": 9.896031511492756e-06, "loss": 16.4885, "step": 36800 }, { "epoch": 0.6655802694596568, "grad_norm": 42.0, "learning_rate": 9.896003259186096e-06, "loss": 17.0502, "step": 36810 }, { "epoch": 0.6657610845287846, "grad_norm": 40.9375, "learning_rate": 9.895975006879439e-06, "loss": 16.9869, "step": 36820 }, { "epoch": 0.6659418995979125, "grad_norm": 42.53125, "learning_rate": 9.895946754572778e-06, "loss": 16.7416, "step": 36830 }, { "epoch": 0.6661227146670403, "grad_norm": 44.53125, "learning_rate": 9.895918502266118e-06, "loss": 16.9338, "step": 36840 }, { "epoch": 0.6663035297361682, "grad_norm": 42.8125, "learning_rate": 9.895890249959459e-06, "loss": 16.6768, "step": 36850 }, { "epoch": 0.6664843448052961, "grad_norm": 43.0625, "learning_rate": 9.8958619976528e-06, "loss": 16.521, "step": 36860 }, { "epoch": 0.666665159874424, "grad_norm": 38.9375, "learning_rate": 9.89583374534614e-06, "loss": 16.6117, "step": 36870 }, { "epoch": 0.6668459749435518, "grad_norm": 43.09375, "learning_rate": 9.895805493039479e-06, "loss": 16.6247, "step": 36880 }, { "epoch": 0.6670267900126796, "grad_norm": 41.25, "learning_rate": 9.89577724073282e-06, "loss": 16.7686, "step": 36890 }, { "epoch": 0.6672076050818075, "grad_norm": 43.40625, "learning_rate": 9.89574898842616e-06, "loss": 16.936, "step": 36900 }, { "epoch": 0.6673884201509354, "grad_norm": 39.03125, "learning_rate": 9.895720736119503e-06, "loss": 17.2432, "step": 36910 }, { "epoch": 0.6675692352200633, "grad_norm": 41.75, "learning_rate": 9.895692483812842e-06, "loss": 17.0423, "step": 36920 }, { "epoch": 0.6677500502891911, "grad_norm": 41.78125, "learning_rate": 9.895664231506182e-06, "loss": 16.7086, "step": 36930 }, { "epoch": 0.6679308653583189, "grad_norm": 40.09375, "learning_rate": 9.895635979199523e-06, "loss": 17.0203, "step": 36940 }, { "epoch": 0.6681116804274468, "grad_norm": 43.15625, "learning_rate": 9.895607726892863e-06, "loss": 16.739, "step": 36950 }, { "epoch": 0.6682924954965747, "grad_norm": 40.375, "learning_rate": 9.895579474586204e-06, "loss": 16.8941, "step": 36960 }, { "epoch": 0.6684733105657026, "grad_norm": 44.625, "learning_rate": 9.895551222279543e-06, "loss": 17.0405, "step": 36970 }, { "epoch": 0.6686541256348304, "grad_norm": 43.5, "learning_rate": 9.895522969972884e-06, "loss": 17.3071, "step": 36980 }, { "epoch": 0.6688349407039582, "grad_norm": 41.5, "learning_rate": 9.895494717666224e-06, "loss": 16.9758, "step": 36990 }, { "epoch": 0.6690157557730861, "grad_norm": 40.28125, "learning_rate": 9.895466465359565e-06, "loss": 17.0213, "step": 37000 }, { "epoch": 0.669196570842214, "grad_norm": 41.4375, "learning_rate": 9.895438213052905e-06, "loss": 16.3765, "step": 37010 }, { "epoch": 0.6693773859113419, "grad_norm": 43.9375, "learning_rate": 9.895409960746246e-06, "loss": 16.96, "step": 37020 }, { "epoch": 0.6695582009804697, "grad_norm": 44.1875, "learning_rate": 9.895381708439587e-06, "loss": 16.0563, "step": 37030 }, { "epoch": 0.6697390160495976, "grad_norm": 40.40625, "learning_rate": 9.895353456132927e-06, "loss": 16.8258, "step": 37040 }, { "epoch": 0.6699198311187254, "grad_norm": 41.625, "learning_rate": 9.895325203826266e-06, "loss": 16.9237, "step": 37050 }, { "epoch": 0.6701006461878533, "grad_norm": 43.15625, "learning_rate": 9.895296951519607e-06, "loss": 16.9691, "step": 37060 }, { "epoch": 0.6702814612569812, "grad_norm": 45.4375, "learning_rate": 9.895268699212947e-06, "loss": 16.9079, "step": 37070 }, { "epoch": 0.670462276326109, "grad_norm": 43.84375, "learning_rate": 9.895240446906288e-06, "loss": 16.7044, "step": 37080 }, { "epoch": 0.6706430913952369, "grad_norm": 42.5, "learning_rate": 9.895212194599629e-06, "loss": 16.7271, "step": 37090 }, { "epoch": 0.6708239064643647, "grad_norm": 41.75, "learning_rate": 9.89518394229297e-06, "loss": 16.8627, "step": 37100 }, { "epoch": 0.6710047215334926, "grad_norm": 43.5625, "learning_rate": 9.89515568998631e-06, "loss": 16.9436, "step": 37110 }, { "epoch": 0.6711855366026205, "grad_norm": 39.53125, "learning_rate": 9.89512743767965e-06, "loss": 16.9911, "step": 37120 }, { "epoch": 0.6713663516717483, "grad_norm": 41.0625, "learning_rate": 9.895099185372991e-06, "loss": 16.8614, "step": 37130 }, { "epoch": 0.6715471667408762, "grad_norm": 39.46875, "learning_rate": 9.89507093306633e-06, "loss": 16.749, "step": 37140 }, { "epoch": 0.671727981810004, "grad_norm": 41.96875, "learning_rate": 9.89504268075967e-06, "loss": 17.2616, "step": 37150 }, { "epoch": 0.6719087968791319, "grad_norm": 45.3125, "learning_rate": 9.895014428453011e-06, "loss": 17.2514, "step": 37160 }, { "epoch": 0.6720896119482598, "grad_norm": 41.96875, "learning_rate": 9.894986176146352e-06, "loss": 16.9123, "step": 37170 }, { "epoch": 0.6722704270173876, "grad_norm": 45.4375, "learning_rate": 9.894957923839693e-06, "loss": 16.9001, "step": 37180 }, { "epoch": 0.6724512420865155, "grad_norm": 42.75, "learning_rate": 9.894929671533033e-06, "loss": 16.7307, "step": 37190 }, { "epoch": 0.6726320571556433, "grad_norm": 43.5625, "learning_rate": 9.894901419226374e-06, "loss": 16.4719, "step": 37200 }, { "epoch": 0.6728128722247713, "grad_norm": 41.375, "learning_rate": 9.894873166919714e-06, "loss": 16.7455, "step": 37210 }, { "epoch": 0.6729936872938991, "grad_norm": 43.09375, "learning_rate": 9.894844914613055e-06, "loss": 17.014, "step": 37220 }, { "epoch": 0.6731745023630269, "grad_norm": 42.4375, "learning_rate": 9.894816662306394e-06, "loss": 16.5502, "step": 37230 }, { "epoch": 0.6733553174321548, "grad_norm": 41.34375, "learning_rate": 9.894788409999735e-06, "loss": 16.7081, "step": 37240 }, { "epoch": 0.6735361325012826, "grad_norm": 43.0, "learning_rate": 9.894760157693075e-06, "loss": 16.4107, "step": 37250 }, { "epoch": 0.6737169475704106, "grad_norm": 42.3125, "learning_rate": 9.894731905386416e-06, "loss": 16.7003, "step": 37260 }, { "epoch": 0.6738977626395384, "grad_norm": 42.4375, "learning_rate": 9.894703653079757e-06, "loss": 17.0678, "step": 37270 }, { "epoch": 0.6740785777086662, "grad_norm": 43.75, "learning_rate": 9.894675400773097e-06, "loss": 17.2967, "step": 37280 }, { "epoch": 0.6742593927777941, "grad_norm": 41.59375, "learning_rate": 9.894647148466438e-06, "loss": 16.9273, "step": 37290 }, { "epoch": 0.6744402078469219, "grad_norm": 40.59375, "learning_rate": 9.894618896159778e-06, "loss": 16.7979, "step": 37300 }, { "epoch": 0.6746210229160499, "grad_norm": 40.5625, "learning_rate": 9.894590643853117e-06, "loss": 16.8169, "step": 37310 }, { "epoch": 0.6748018379851777, "grad_norm": 39.21875, "learning_rate": 9.894562391546458e-06, "loss": 16.4699, "step": 37320 }, { "epoch": 0.6749826530543055, "grad_norm": 43.3125, "learning_rate": 9.894534139239799e-06, "loss": 17.2835, "step": 37330 }, { "epoch": 0.6751634681234334, "grad_norm": 44.0625, "learning_rate": 9.89450588693314e-06, "loss": 17.0986, "step": 37340 }, { "epoch": 0.6753442831925612, "grad_norm": 41.71875, "learning_rate": 9.89447763462648e-06, "loss": 16.5451, "step": 37350 }, { "epoch": 0.6755250982616892, "grad_norm": 38.75, "learning_rate": 9.89444938231982e-06, "loss": 16.5853, "step": 37360 }, { "epoch": 0.675705913330817, "grad_norm": 41.5625, "learning_rate": 9.894421130013161e-06, "loss": 16.5989, "step": 37370 }, { "epoch": 0.6758867283999449, "grad_norm": 43.78125, "learning_rate": 9.894392877706502e-06, "loss": 16.3647, "step": 37380 }, { "epoch": 0.6760675434690727, "grad_norm": 43.3125, "learning_rate": 9.894364625399842e-06, "loss": 16.8922, "step": 37390 }, { "epoch": 0.6762483585382005, "grad_norm": 42.53125, "learning_rate": 9.894336373093181e-06, "loss": 16.6042, "step": 37400 }, { "epoch": 0.6764291736073285, "grad_norm": 44.375, "learning_rate": 9.894308120786522e-06, "loss": 16.704, "step": 37410 }, { "epoch": 0.6766099886764563, "grad_norm": 43.21875, "learning_rate": 9.894279868479862e-06, "loss": 16.9412, "step": 37420 }, { "epoch": 0.6767908037455842, "grad_norm": 42.3125, "learning_rate": 9.894251616173203e-06, "loss": 16.9898, "step": 37430 }, { "epoch": 0.676971618814712, "grad_norm": 44.78125, "learning_rate": 9.894223363866544e-06, "loss": 17.0491, "step": 37440 }, { "epoch": 0.6771524338838398, "grad_norm": 44.1875, "learning_rate": 9.894195111559884e-06, "loss": 16.946, "step": 37450 }, { "epoch": 0.6773332489529678, "grad_norm": 42.53125, "learning_rate": 9.894166859253225e-06, "loss": 16.8964, "step": 37460 }, { "epoch": 0.6775140640220956, "grad_norm": 42.125, "learning_rate": 9.894138606946566e-06, "loss": 16.7398, "step": 37470 }, { "epoch": 0.6776948790912235, "grad_norm": 42.71875, "learning_rate": 9.894110354639905e-06, "loss": 17.4216, "step": 37480 }, { "epoch": 0.6778756941603513, "grad_norm": 42.03125, "learning_rate": 9.894082102333245e-06, "loss": 16.6143, "step": 37490 }, { "epoch": 0.6780565092294791, "grad_norm": 42.78125, "learning_rate": 9.894053850026586e-06, "loss": 16.975, "step": 37500 }, { "epoch": 0.6782373242986071, "grad_norm": 41.0, "learning_rate": 9.894025597719926e-06, "loss": 17.1048, "step": 37510 }, { "epoch": 0.6784181393677349, "grad_norm": 41.53125, "learning_rate": 9.893997345413267e-06, "loss": 16.8411, "step": 37520 }, { "epoch": 0.6785989544368628, "grad_norm": 43.34375, "learning_rate": 9.893969093106608e-06, "loss": 16.628, "step": 37530 }, { "epoch": 0.6787797695059906, "grad_norm": 42.875, "learning_rate": 9.893940840799948e-06, "loss": 16.8008, "step": 37540 }, { "epoch": 0.6789605845751185, "grad_norm": 42.71875, "learning_rate": 9.893912588493289e-06, "loss": 17.0204, "step": 37550 }, { "epoch": 0.6791413996442464, "grad_norm": 44.65625, "learning_rate": 9.89388433618663e-06, "loss": 16.8503, "step": 37560 }, { "epoch": 0.6793222147133742, "grad_norm": 42.65625, "learning_rate": 9.893856083879968e-06, "loss": 17.1805, "step": 37570 }, { "epoch": 0.6795030297825021, "grad_norm": 45.84375, "learning_rate": 9.893827831573309e-06, "loss": 17.1783, "step": 37580 }, { "epoch": 0.6796838448516299, "grad_norm": 42.65625, "learning_rate": 9.89379957926665e-06, "loss": 16.4853, "step": 37590 }, { "epoch": 0.6798646599207578, "grad_norm": 45.4375, "learning_rate": 9.89377132695999e-06, "loss": 16.8635, "step": 37600 }, { "epoch": 0.6800454749898857, "grad_norm": 40.90625, "learning_rate": 9.893743074653331e-06, "loss": 16.7238, "step": 37610 }, { "epoch": 0.6802262900590135, "grad_norm": 44.0, "learning_rate": 9.893714822346672e-06, "loss": 16.7515, "step": 37620 }, { "epoch": 0.6804071051281414, "grad_norm": 42.1875, "learning_rate": 9.893686570040012e-06, "loss": 16.5855, "step": 37630 }, { "epoch": 0.6805879201972692, "grad_norm": 45.28125, "learning_rate": 9.893658317733353e-06, "loss": 16.6717, "step": 37640 }, { "epoch": 0.6807687352663971, "grad_norm": 41.75, "learning_rate": 9.893630065426693e-06, "loss": 16.5964, "step": 37650 }, { "epoch": 0.680949550335525, "grad_norm": 41.9375, "learning_rate": 9.893601813120032e-06, "loss": 17.0906, "step": 37660 }, { "epoch": 0.6811303654046528, "grad_norm": 40.65625, "learning_rate": 9.893573560813373e-06, "loss": 16.9528, "step": 37670 }, { "epoch": 0.6813111804737807, "grad_norm": 40.46875, "learning_rate": 9.893545308506714e-06, "loss": 16.7316, "step": 37680 }, { "epoch": 0.6814919955429085, "grad_norm": 41.03125, "learning_rate": 9.893517056200054e-06, "loss": 17.0295, "step": 37690 }, { "epoch": 0.6816728106120364, "grad_norm": 42.71875, "learning_rate": 9.893488803893395e-06, "loss": 16.8099, "step": 37700 }, { "epoch": 0.6818536256811643, "grad_norm": 42.40625, "learning_rate": 9.893460551586735e-06, "loss": 17.2334, "step": 37710 }, { "epoch": 0.6820344407502922, "grad_norm": 43.90625, "learning_rate": 9.893432299280076e-06, "loss": 17.0922, "step": 37720 }, { "epoch": 0.68221525581942, "grad_norm": 42.84375, "learning_rate": 9.893404046973417e-06, "loss": 16.5968, "step": 37730 }, { "epoch": 0.6823960708885478, "grad_norm": 42.78125, "learning_rate": 9.893375794666756e-06, "loss": 16.5034, "step": 37740 }, { "epoch": 0.6825768859576757, "grad_norm": 41.03125, "learning_rate": 9.893347542360096e-06, "loss": 16.4676, "step": 37750 }, { "epoch": 0.6827577010268036, "grad_norm": 44.15625, "learning_rate": 9.893319290053437e-06, "loss": 17.131, "step": 37760 }, { "epoch": 0.6829385160959315, "grad_norm": 43.78125, "learning_rate": 9.893291037746777e-06, "loss": 16.6227, "step": 37770 }, { "epoch": 0.6831193311650593, "grad_norm": 39.53125, "learning_rate": 9.893262785440118e-06, "loss": 17.1105, "step": 37780 }, { "epoch": 0.6833001462341871, "grad_norm": 46.03125, "learning_rate": 9.893234533133459e-06, "loss": 16.6947, "step": 37790 }, { "epoch": 0.683480961303315, "grad_norm": 41.4375, "learning_rate": 9.8932062808268e-06, "loss": 16.7799, "step": 37800 }, { "epoch": 0.6836617763724429, "grad_norm": 42.59375, "learning_rate": 9.89317802852014e-06, "loss": 16.9629, "step": 37810 }, { "epoch": 0.6838425914415708, "grad_norm": 43.34375, "learning_rate": 9.89314977621348e-06, "loss": 16.9785, "step": 37820 }, { "epoch": 0.6840234065106986, "grad_norm": 47.46875, "learning_rate": 9.89312152390682e-06, "loss": 17.1893, "step": 37830 }, { "epoch": 0.6842042215798264, "grad_norm": 45.0625, "learning_rate": 9.89309327160016e-06, "loss": 16.745, "step": 37840 }, { "epoch": 0.6843850366489543, "grad_norm": 41.03125, "learning_rate": 9.8930650192935e-06, "loss": 16.7386, "step": 37850 }, { "epoch": 0.6845658517180822, "grad_norm": 44.5625, "learning_rate": 9.893036766986841e-06, "loss": 17.1872, "step": 37860 }, { "epoch": 0.6847466667872101, "grad_norm": 45.09375, "learning_rate": 9.893008514680182e-06, "loss": 17.1807, "step": 37870 }, { "epoch": 0.6849274818563379, "grad_norm": 40.8125, "learning_rate": 9.892980262373523e-06, "loss": 16.3967, "step": 37880 }, { "epoch": 0.6851082969254658, "grad_norm": 43.3125, "learning_rate": 9.892952010066863e-06, "loss": 17.071, "step": 37890 }, { "epoch": 0.6852891119945936, "grad_norm": 40.75, "learning_rate": 9.892923757760204e-06, "loss": 16.4375, "step": 37900 }, { "epoch": 0.6854699270637215, "grad_norm": 42.3125, "learning_rate": 9.892895505453543e-06, "loss": 16.9052, "step": 37910 }, { "epoch": 0.6856507421328494, "grad_norm": 41.1875, "learning_rate": 9.892867253146883e-06, "loss": 17.2202, "step": 37920 }, { "epoch": 0.6858315572019772, "grad_norm": 41.28125, "learning_rate": 9.892839000840224e-06, "loss": 16.2978, "step": 37930 }, { "epoch": 0.6860123722711051, "grad_norm": 42.3125, "learning_rate": 9.892810748533565e-06, "loss": 16.8237, "step": 37940 }, { "epoch": 0.6861931873402329, "grad_norm": 40.4375, "learning_rate": 9.892782496226905e-06, "loss": 16.7933, "step": 37950 }, { "epoch": 0.6863740024093608, "grad_norm": 41.09375, "learning_rate": 9.892754243920246e-06, "loss": 16.5888, "step": 37960 }, { "epoch": 0.6865548174784887, "grad_norm": 42.625, "learning_rate": 9.892725991613587e-06, "loss": 17.1759, "step": 37970 }, { "epoch": 0.6867356325476165, "grad_norm": 44.28125, "learning_rate": 9.892697739306927e-06, "loss": 16.5377, "step": 37980 }, { "epoch": 0.6869164476167444, "grad_norm": 41.9375, "learning_rate": 9.892669487000268e-06, "loss": 16.3997, "step": 37990 }, { "epoch": 0.6870972626858722, "grad_norm": 43.1875, "learning_rate": 9.892641234693607e-06, "loss": 16.5976, "step": 38000 }, { "epoch": 0.687278077755, "grad_norm": 45.53125, "learning_rate": 9.892612982386947e-06, "loss": 16.7352, "step": 38010 }, { "epoch": 0.687458892824128, "grad_norm": 44.0, "learning_rate": 9.892584730080288e-06, "loss": 17.0146, "step": 38020 }, { "epoch": 0.6876397078932558, "grad_norm": 41.5, "learning_rate": 9.892556477773629e-06, "loss": 16.9501, "step": 38030 }, { "epoch": 0.6878205229623837, "grad_norm": 40.03125, "learning_rate": 9.89252822546697e-06, "loss": 16.6916, "step": 38040 }, { "epoch": 0.6880013380315115, "grad_norm": 41.59375, "learning_rate": 9.89249997316031e-06, "loss": 16.8109, "step": 38050 }, { "epoch": 0.6881821531006395, "grad_norm": 43.09375, "learning_rate": 9.89247172085365e-06, "loss": 16.1281, "step": 38060 }, { "epoch": 0.6883629681697673, "grad_norm": 39.875, "learning_rate": 9.892443468546991e-06, "loss": 16.5465, "step": 38070 }, { "epoch": 0.6885437832388951, "grad_norm": 41.1875, "learning_rate": 9.892415216240332e-06, "loss": 16.8856, "step": 38080 }, { "epoch": 0.688724598308023, "grad_norm": 44.875, "learning_rate": 9.89238696393367e-06, "loss": 17.1266, "step": 38090 }, { "epoch": 0.6889054133771508, "grad_norm": 48.3125, "learning_rate": 9.892358711627011e-06, "loss": 17.2518, "step": 38100 }, { "epoch": 0.6890862284462788, "grad_norm": 45.1875, "learning_rate": 9.892330459320352e-06, "loss": 16.6606, "step": 38110 }, { "epoch": 0.6892670435154066, "grad_norm": 44.1875, "learning_rate": 9.892302207013692e-06, "loss": 17.1191, "step": 38120 }, { "epoch": 0.6894478585845344, "grad_norm": 42.03125, "learning_rate": 9.892273954707033e-06, "loss": 16.8843, "step": 38130 }, { "epoch": 0.6896286736536623, "grad_norm": 43.40625, "learning_rate": 9.892245702400374e-06, "loss": 17.0698, "step": 38140 }, { "epoch": 0.6898094887227901, "grad_norm": 42.8125, "learning_rate": 9.892217450093714e-06, "loss": 16.8898, "step": 38150 }, { "epoch": 0.6899903037919181, "grad_norm": 42.1875, "learning_rate": 9.892189197787055e-06, "loss": 16.6293, "step": 38160 }, { "epoch": 0.6901711188610459, "grad_norm": 40.1875, "learning_rate": 9.892160945480394e-06, "loss": 16.8833, "step": 38170 }, { "epoch": 0.6903519339301737, "grad_norm": 43.1875, "learning_rate": 9.892132693173735e-06, "loss": 16.8337, "step": 38180 }, { "epoch": 0.6905327489993016, "grad_norm": 43.46875, "learning_rate": 9.892104440867075e-06, "loss": 16.8272, "step": 38190 }, { "epoch": 0.6907135640684294, "grad_norm": 42.53125, "learning_rate": 9.892076188560416e-06, "loss": 16.7386, "step": 38200 }, { "epoch": 0.6908943791375574, "grad_norm": 41.09375, "learning_rate": 9.892047936253756e-06, "loss": 17.0917, "step": 38210 }, { "epoch": 0.6910751942066852, "grad_norm": 44.03125, "learning_rate": 9.892019683947097e-06, "loss": 16.8651, "step": 38220 }, { "epoch": 0.6912560092758131, "grad_norm": 43.9375, "learning_rate": 9.891991431640438e-06, "loss": 17.1524, "step": 38230 }, { "epoch": 0.6914368243449409, "grad_norm": 42.25, "learning_rate": 9.891963179333778e-06, "loss": 16.482, "step": 38240 }, { "epoch": 0.6916176394140687, "grad_norm": 40.8125, "learning_rate": 9.891934927027119e-06, "loss": 16.8363, "step": 38250 }, { "epoch": 0.6917984544831967, "grad_norm": 39.59375, "learning_rate": 9.891906674720458e-06, "loss": 16.6902, "step": 38260 }, { "epoch": 0.6919792695523245, "grad_norm": 43.1875, "learning_rate": 9.891878422413798e-06, "loss": 17.1459, "step": 38270 }, { "epoch": 0.6921600846214524, "grad_norm": 41.75, "learning_rate": 9.891850170107139e-06, "loss": 16.5241, "step": 38280 }, { "epoch": 0.6923408996905802, "grad_norm": 43.375, "learning_rate": 9.89182191780048e-06, "loss": 16.95, "step": 38290 }, { "epoch": 0.692521714759708, "grad_norm": 44.4375, "learning_rate": 9.89179366549382e-06, "loss": 16.9044, "step": 38300 }, { "epoch": 0.692702529828836, "grad_norm": 41.375, "learning_rate": 9.891765413187161e-06, "loss": 17.0597, "step": 38310 }, { "epoch": 0.6928833448979638, "grad_norm": 39.875, "learning_rate": 9.891737160880502e-06, "loss": 16.8813, "step": 38320 }, { "epoch": 0.6930641599670917, "grad_norm": 40.96875, "learning_rate": 9.891708908573842e-06, "loss": 17.1459, "step": 38330 }, { "epoch": 0.6932449750362195, "grad_norm": 42.5, "learning_rate": 9.891680656267181e-06, "loss": 16.6943, "step": 38340 }, { "epoch": 0.6934257901053473, "grad_norm": 42.625, "learning_rate": 9.891652403960522e-06, "loss": 16.8816, "step": 38350 }, { "epoch": 0.6936066051744753, "grad_norm": 42.3125, "learning_rate": 9.891624151653862e-06, "loss": 16.8835, "step": 38360 }, { "epoch": 0.6937874202436031, "grad_norm": 44.59375, "learning_rate": 9.891595899347203e-06, "loss": 17.0272, "step": 38370 }, { "epoch": 0.693968235312731, "grad_norm": 40.53125, "learning_rate": 9.891567647040544e-06, "loss": 16.7614, "step": 38380 }, { "epoch": 0.6941490503818588, "grad_norm": 42.125, "learning_rate": 9.891539394733883e-06, "loss": 16.4766, "step": 38390 }, { "epoch": 0.6943298654509867, "grad_norm": 40.4375, "learning_rate": 9.891511142427225e-06, "loss": 17.3731, "step": 38400 }, { "epoch": 0.6945106805201146, "grad_norm": 40.78125, "learning_rate": 9.891482890120565e-06, "loss": 16.6749, "step": 38410 }, { "epoch": 0.6946914955892424, "grad_norm": 43.84375, "learning_rate": 9.891454637813906e-06, "loss": 16.6835, "step": 38420 }, { "epoch": 0.6948723106583703, "grad_norm": 44.875, "learning_rate": 9.891426385507245e-06, "loss": 16.9625, "step": 38430 }, { "epoch": 0.6950531257274981, "grad_norm": 43.0, "learning_rate": 9.891398133200586e-06, "loss": 16.6631, "step": 38440 }, { "epoch": 0.695233940796626, "grad_norm": 41.0625, "learning_rate": 9.891369880893926e-06, "loss": 16.7724, "step": 38450 }, { "epoch": 0.6954147558657539, "grad_norm": 43.96875, "learning_rate": 9.891341628587267e-06, "loss": 16.9028, "step": 38460 }, { "epoch": 0.6955955709348817, "grad_norm": 42.5, "learning_rate": 9.891313376280607e-06, "loss": 17.0701, "step": 38470 }, { "epoch": 0.6957763860040096, "grad_norm": 42.34375, "learning_rate": 9.891285123973946e-06, "loss": 16.3494, "step": 38480 }, { "epoch": 0.6959572010731374, "grad_norm": 43.4375, "learning_rate": 9.891256871667289e-06, "loss": 16.4693, "step": 38490 }, { "epoch": 0.6961380161422653, "grad_norm": 43.25, "learning_rate": 9.89122861936063e-06, "loss": 16.8401, "step": 38500 }, { "epoch": 0.6963188312113932, "grad_norm": 42.8125, "learning_rate": 9.891200367053968e-06, "loss": 16.548, "step": 38510 }, { "epoch": 0.696499646280521, "grad_norm": 42.40625, "learning_rate": 9.891172114747309e-06, "loss": 17.2683, "step": 38520 }, { "epoch": 0.6966804613496489, "grad_norm": 43.71875, "learning_rate": 9.89114386244065e-06, "loss": 17.3941, "step": 38530 }, { "epoch": 0.6968612764187767, "grad_norm": 42.15625, "learning_rate": 9.89111561013399e-06, "loss": 17.3078, "step": 38540 }, { "epoch": 0.6970420914879046, "grad_norm": 41.0625, "learning_rate": 9.89108735782733e-06, "loss": 16.5073, "step": 38550 }, { "epoch": 0.6972229065570325, "grad_norm": 44.9375, "learning_rate": 9.891059105520671e-06, "loss": 16.5629, "step": 38560 }, { "epoch": 0.6974037216261604, "grad_norm": 42.96875, "learning_rate": 9.891030853214012e-06, "loss": 16.6385, "step": 38570 }, { "epoch": 0.6975845366952882, "grad_norm": 43.21875, "learning_rate": 9.891002600907353e-06, "loss": 16.8539, "step": 38580 }, { "epoch": 0.697765351764416, "grad_norm": 43.65625, "learning_rate": 9.890974348600693e-06, "loss": 16.9434, "step": 38590 }, { "epoch": 0.6979461668335439, "grad_norm": 43.3125, "learning_rate": 9.890946096294032e-06, "loss": 16.9127, "step": 38600 }, { "epoch": 0.6981269819026718, "grad_norm": 44.65625, "learning_rate": 9.890917843987373e-06, "loss": 16.9669, "step": 38610 }, { "epoch": 0.6983077969717997, "grad_norm": 45.53125, "learning_rate": 9.890889591680713e-06, "loss": 16.6195, "step": 38620 }, { "epoch": 0.6984886120409275, "grad_norm": 43.03125, "learning_rate": 9.890861339374054e-06, "loss": 16.8106, "step": 38630 }, { "epoch": 0.6986694271100553, "grad_norm": 41.15625, "learning_rate": 9.890833087067395e-06, "loss": 16.8928, "step": 38640 }, { "epoch": 0.6988502421791832, "grad_norm": 44.59375, "learning_rate": 9.890804834760734e-06, "loss": 16.7577, "step": 38650 }, { "epoch": 0.699031057248311, "grad_norm": 42.3125, "learning_rate": 9.890776582454076e-06, "loss": 16.6293, "step": 38660 }, { "epoch": 0.699211872317439, "grad_norm": 42.9375, "learning_rate": 9.890748330147417e-06, "loss": 16.8936, "step": 38670 }, { "epoch": 0.6993926873865668, "grad_norm": 43.125, "learning_rate": 9.890720077840757e-06, "loss": 16.863, "step": 38680 }, { "epoch": 0.6995735024556946, "grad_norm": 44.125, "learning_rate": 9.890691825534096e-06, "loss": 16.9209, "step": 38690 }, { "epoch": 0.6997543175248225, "grad_norm": 47.375, "learning_rate": 9.890663573227437e-06, "loss": 16.7577, "step": 38700 }, { "epoch": 0.6999351325939503, "grad_norm": 44.46875, "learning_rate": 9.890635320920777e-06, "loss": 16.7093, "step": 38710 }, { "epoch": 0.7001159476630783, "grad_norm": 43.75, "learning_rate": 9.890607068614118e-06, "loss": 16.6643, "step": 38720 }, { "epoch": 0.7002967627322061, "grad_norm": 42.28125, "learning_rate": 9.890578816307459e-06, "loss": 16.9856, "step": 38730 }, { "epoch": 0.700477577801334, "grad_norm": 41.59375, "learning_rate": 9.890550564000798e-06, "loss": 16.7979, "step": 38740 }, { "epoch": 0.7006583928704618, "grad_norm": 41.3125, "learning_rate": 9.89052231169414e-06, "loss": 17.2538, "step": 38750 }, { "epoch": 0.7008392079395896, "grad_norm": 46.84375, "learning_rate": 9.89049405938748e-06, "loss": 16.9212, "step": 38760 }, { "epoch": 0.7010200230087176, "grad_norm": 46.71875, "learning_rate": 9.89046580708082e-06, "loss": 17.0221, "step": 38770 }, { "epoch": 0.7012008380778454, "grad_norm": 43.25, "learning_rate": 9.89043755477416e-06, "loss": 17.1704, "step": 38780 }, { "epoch": 0.7013816531469733, "grad_norm": 41.65625, "learning_rate": 9.8904093024675e-06, "loss": 17.0322, "step": 38790 }, { "epoch": 0.7015624682161011, "grad_norm": 42.75, "learning_rate": 9.890381050160841e-06, "loss": 16.8613, "step": 38800 }, { "epoch": 0.701743283285229, "grad_norm": 40.375, "learning_rate": 9.890352797854182e-06, "loss": 16.7168, "step": 38810 }, { "epoch": 0.7019240983543569, "grad_norm": 40.125, "learning_rate": 9.89032454554752e-06, "loss": 16.8869, "step": 38820 }, { "epoch": 0.7021049134234847, "grad_norm": 42.0, "learning_rate": 9.890296293240861e-06, "loss": 17.2882, "step": 38830 }, { "epoch": 0.7022857284926126, "grad_norm": 40.0, "learning_rate": 9.890268040934204e-06, "loss": 16.2854, "step": 38840 }, { "epoch": 0.7024665435617404, "grad_norm": 41.4375, "learning_rate": 9.890239788627544e-06, "loss": 16.8362, "step": 38850 }, { "epoch": 0.7026473586308682, "grad_norm": 46.09375, "learning_rate": 9.890211536320883e-06, "loss": 16.914, "step": 38860 }, { "epoch": 0.7028281736999962, "grad_norm": 41.96875, "learning_rate": 9.890183284014224e-06, "loss": 16.5401, "step": 38870 }, { "epoch": 0.703008988769124, "grad_norm": 40.9375, "learning_rate": 9.890155031707565e-06, "loss": 16.6829, "step": 38880 }, { "epoch": 0.7031898038382519, "grad_norm": 40.0625, "learning_rate": 9.890126779400905e-06, "loss": 16.6298, "step": 38890 }, { "epoch": 0.7033706189073797, "grad_norm": 40.875, "learning_rate": 9.890098527094246e-06, "loss": 16.8991, "step": 38900 }, { "epoch": 0.7035514339765077, "grad_norm": 42.34375, "learning_rate": 9.890070274787585e-06, "loss": 16.7942, "step": 38910 }, { "epoch": 0.7037322490456355, "grad_norm": 43.28125, "learning_rate": 9.890042022480927e-06, "loss": 16.8306, "step": 38920 }, { "epoch": 0.7039130641147633, "grad_norm": 43.78125, "learning_rate": 9.890013770174268e-06, "loss": 16.7857, "step": 38930 }, { "epoch": 0.7040938791838912, "grad_norm": 41.8125, "learning_rate": 9.889985517867607e-06, "loss": 17.0068, "step": 38940 }, { "epoch": 0.704274694253019, "grad_norm": 45.0, "learning_rate": 9.889957265560947e-06, "loss": 17.0016, "step": 38950 }, { "epoch": 0.704455509322147, "grad_norm": 44.0, "learning_rate": 9.889929013254288e-06, "loss": 16.9943, "step": 38960 }, { "epoch": 0.7046363243912748, "grad_norm": 42.25, "learning_rate": 9.889900760947628e-06, "loss": 17.1728, "step": 38970 }, { "epoch": 0.7048171394604026, "grad_norm": 41.28125, "learning_rate": 9.889872508640969e-06, "loss": 16.7221, "step": 38980 }, { "epoch": 0.7049979545295305, "grad_norm": 42.125, "learning_rate": 9.88984425633431e-06, "loss": 17.306, "step": 38990 }, { "epoch": 0.7051787695986583, "grad_norm": 46.5625, "learning_rate": 9.889816004027649e-06, "loss": 16.8126, "step": 39000 }, { "epoch": 0.7053595846677863, "grad_norm": 47.84375, "learning_rate": 9.889787751720991e-06, "loss": 17.1228, "step": 39010 }, { "epoch": 0.7055403997369141, "grad_norm": 44.34375, "learning_rate": 9.889759499414332e-06, "loss": 16.9815, "step": 39020 }, { "epoch": 0.7057212148060419, "grad_norm": 46.75, "learning_rate": 9.88973124710767e-06, "loss": 16.8795, "step": 39030 }, { "epoch": 0.7059020298751698, "grad_norm": 41.25, "learning_rate": 9.889702994801011e-06, "loss": 16.5388, "step": 39040 }, { "epoch": 0.7060828449442976, "grad_norm": 38.8125, "learning_rate": 9.889674742494352e-06, "loss": 17.3997, "step": 39050 }, { "epoch": 0.7062636600134256, "grad_norm": 41.96875, "learning_rate": 9.889646490187692e-06, "loss": 17.334, "step": 39060 }, { "epoch": 0.7064444750825534, "grad_norm": 42.5625, "learning_rate": 9.889618237881033e-06, "loss": 16.7021, "step": 39070 }, { "epoch": 0.7066252901516813, "grad_norm": 42.75, "learning_rate": 9.889589985574372e-06, "loss": 16.7919, "step": 39080 }, { "epoch": 0.7068061052208091, "grad_norm": 42.375, "learning_rate": 9.889561733267713e-06, "loss": 16.9483, "step": 39090 }, { "epoch": 0.7069869202899369, "grad_norm": 44.0625, "learning_rate": 9.889533480961055e-06, "loss": 17.155, "step": 39100 }, { "epoch": 0.7071677353590649, "grad_norm": 43.46875, "learning_rate": 9.889505228654395e-06, "loss": 16.7376, "step": 39110 }, { "epoch": 0.7073485504281927, "grad_norm": 44.15625, "learning_rate": 9.889476976347734e-06, "loss": 16.9099, "step": 39120 }, { "epoch": 0.7075293654973206, "grad_norm": 41.03125, "learning_rate": 9.889448724041075e-06, "loss": 16.8503, "step": 39130 }, { "epoch": 0.7077101805664484, "grad_norm": 41.125, "learning_rate": 9.889420471734416e-06, "loss": 17.4304, "step": 39140 }, { "epoch": 0.7078909956355762, "grad_norm": 41.4375, "learning_rate": 9.889392219427756e-06, "loss": 16.8346, "step": 39150 }, { "epoch": 0.7080718107047042, "grad_norm": 40.125, "learning_rate": 9.889363967121097e-06, "loss": 17.1053, "step": 39160 }, { "epoch": 0.708252625773832, "grad_norm": 40.59375, "learning_rate": 9.889335714814436e-06, "loss": 17.1629, "step": 39170 }, { "epoch": 0.7084334408429599, "grad_norm": 40.5, "learning_rate": 9.889307462507776e-06, "loss": 16.7123, "step": 39180 }, { "epoch": 0.7086142559120877, "grad_norm": 41.75, "learning_rate": 9.889279210201119e-06, "loss": 16.7231, "step": 39190 }, { "epoch": 0.7087950709812155, "grad_norm": 42.6875, "learning_rate": 9.889250957894458e-06, "loss": 16.8257, "step": 39200 }, { "epoch": 0.7089758860503435, "grad_norm": 41.625, "learning_rate": 9.889222705587798e-06, "loss": 16.6544, "step": 39210 }, { "epoch": 0.7091567011194713, "grad_norm": 42.28125, "learning_rate": 9.889194453281139e-06, "loss": 16.5631, "step": 39220 }, { "epoch": 0.7093375161885992, "grad_norm": 45.625, "learning_rate": 9.88916620097448e-06, "loss": 16.7361, "step": 39230 }, { "epoch": 0.709518331257727, "grad_norm": 41.5625, "learning_rate": 9.88913794866782e-06, "loss": 16.61, "step": 39240 }, { "epoch": 0.7096991463268549, "grad_norm": 43.9375, "learning_rate": 9.889109696361159e-06, "loss": 16.7179, "step": 39250 }, { "epoch": 0.7098799613959828, "grad_norm": 43.4375, "learning_rate": 9.8890814440545e-06, "loss": 16.5795, "step": 39260 }, { "epoch": 0.7100607764651106, "grad_norm": 42.25, "learning_rate": 9.88905319174784e-06, "loss": 16.9953, "step": 39270 }, { "epoch": 0.7102415915342385, "grad_norm": 42.96875, "learning_rate": 9.889024939441183e-06, "loss": 17.0232, "step": 39280 }, { "epoch": 0.7104224066033663, "grad_norm": 42.03125, "learning_rate": 9.888996687134522e-06, "loss": 16.6105, "step": 39290 }, { "epoch": 0.7106032216724942, "grad_norm": 42.90625, "learning_rate": 9.888968434827862e-06, "loss": 16.4751, "step": 39300 }, { "epoch": 0.710784036741622, "grad_norm": 44.875, "learning_rate": 9.888940182521203e-06, "loss": 16.6902, "step": 39310 }, { "epoch": 0.7109648518107499, "grad_norm": 42.53125, "learning_rate": 9.888911930214543e-06, "loss": 16.8739, "step": 39320 }, { "epoch": 0.7111456668798778, "grad_norm": 42.34375, "learning_rate": 9.888883677907884e-06, "loss": 16.8996, "step": 39330 }, { "epoch": 0.7113264819490056, "grad_norm": 40.40625, "learning_rate": 9.888855425601223e-06, "loss": 17.1528, "step": 39340 }, { "epoch": 0.7115072970181335, "grad_norm": 43.8125, "learning_rate": 9.888827173294564e-06, "loss": 16.411, "step": 39350 }, { "epoch": 0.7116881120872613, "grad_norm": 42.90625, "learning_rate": 9.888798920987906e-06, "loss": 17.1764, "step": 39360 }, { "epoch": 0.7118689271563892, "grad_norm": 41.65625, "learning_rate": 9.888770668681245e-06, "loss": 16.745, "step": 39370 }, { "epoch": 0.7120497422255171, "grad_norm": 42.875, "learning_rate": 9.888742416374585e-06, "loss": 16.6895, "step": 39380 }, { "epoch": 0.7122305572946449, "grad_norm": 41.8125, "learning_rate": 9.888714164067926e-06, "loss": 16.6713, "step": 39390 }, { "epoch": 0.7124113723637728, "grad_norm": 42.6875, "learning_rate": 9.888685911761267e-06, "loss": 16.8277, "step": 39400 }, { "epoch": 0.7125921874329006, "grad_norm": 44.25, "learning_rate": 9.888657659454607e-06, "loss": 16.4465, "step": 39410 }, { "epoch": 0.7127730025020286, "grad_norm": 43.03125, "learning_rate": 9.888629407147948e-06, "loss": 17.0582, "step": 39420 }, { "epoch": 0.7129538175711564, "grad_norm": 43.21875, "learning_rate": 9.888601154841287e-06, "loss": 17.0685, "step": 39430 }, { "epoch": 0.7131346326402842, "grad_norm": 43.5, "learning_rate": 9.888572902534628e-06, "loss": 16.3677, "step": 39440 }, { "epoch": 0.7133154477094121, "grad_norm": 43.25, "learning_rate": 9.88854465022797e-06, "loss": 16.704, "step": 39450 }, { "epoch": 0.71349626277854, "grad_norm": 43.3125, "learning_rate": 9.888516397921309e-06, "loss": 17.0324, "step": 39460 }, { "epoch": 0.7136770778476679, "grad_norm": 42.125, "learning_rate": 9.88848814561465e-06, "loss": 17.0155, "step": 39470 }, { "epoch": 0.7138578929167957, "grad_norm": 41.34375, "learning_rate": 9.88845989330799e-06, "loss": 17.1789, "step": 39480 }, { "epoch": 0.7140387079859235, "grad_norm": 45.875, "learning_rate": 9.88843164100133e-06, "loss": 16.8495, "step": 39490 }, { "epoch": 0.7142195230550514, "grad_norm": 44.125, "learning_rate": 9.888403388694671e-06, "loss": 16.6137, "step": 39500 }, { "epoch": 0.7144003381241792, "grad_norm": 42.53125, "learning_rate": 9.88837513638801e-06, "loss": 16.8421, "step": 39510 }, { "epoch": 0.7145811531933072, "grad_norm": 45.21875, "learning_rate": 9.88834688408135e-06, "loss": 16.8688, "step": 39520 }, { "epoch": 0.714761968262435, "grad_norm": 42.75, "learning_rate": 9.888318631774691e-06, "loss": 16.5882, "step": 39530 }, { "epoch": 0.7149427833315628, "grad_norm": 43.46875, "learning_rate": 9.888290379468034e-06, "loss": 16.5181, "step": 39540 }, { "epoch": 0.7151235984006907, "grad_norm": 43.15625, "learning_rate": 9.888262127161373e-06, "loss": 17.2262, "step": 39550 }, { "epoch": 0.7153044134698185, "grad_norm": 44.9375, "learning_rate": 9.888233874854713e-06, "loss": 16.6752, "step": 39560 }, { "epoch": 0.7154852285389465, "grad_norm": 46.625, "learning_rate": 9.888205622548054e-06, "loss": 17.0998, "step": 39570 }, { "epoch": 0.7156660436080743, "grad_norm": 42.21875, "learning_rate": 9.888177370241395e-06, "loss": 16.7745, "step": 39580 }, { "epoch": 0.7158468586772022, "grad_norm": 42.6875, "learning_rate": 9.888149117934735e-06, "loss": 16.4374, "step": 39590 }, { "epoch": 0.71602767374633, "grad_norm": 41.71875, "learning_rate": 9.888120865628074e-06, "loss": 16.4207, "step": 39600 }, { "epoch": 0.7162084888154578, "grad_norm": 45.21875, "learning_rate": 9.888092613321415e-06, "loss": 16.7451, "step": 39610 }, { "epoch": 0.7163893038845858, "grad_norm": 39.90625, "learning_rate": 9.888064361014755e-06, "loss": 17.1233, "step": 39620 }, { "epoch": 0.7165701189537136, "grad_norm": 44.5, "learning_rate": 9.888036108708096e-06, "loss": 16.855, "step": 39630 }, { "epoch": 0.7167509340228415, "grad_norm": 40.84375, "learning_rate": 9.888007856401437e-06, "loss": 16.5099, "step": 39640 }, { "epoch": 0.7169317490919693, "grad_norm": 41.1875, "learning_rate": 9.887979604094777e-06, "loss": 17.1891, "step": 39650 }, { "epoch": 0.7171125641610971, "grad_norm": 43.21875, "learning_rate": 9.887951351788118e-06, "loss": 16.7949, "step": 39660 }, { "epoch": 0.7172933792302251, "grad_norm": 40.03125, "learning_rate": 9.887923099481458e-06, "loss": 16.9468, "step": 39670 }, { "epoch": 0.7174741942993529, "grad_norm": 42.0, "learning_rate": 9.887894847174797e-06, "loss": 16.891, "step": 39680 }, { "epoch": 0.7176550093684808, "grad_norm": 43.6875, "learning_rate": 9.887866594868138e-06, "loss": 17.1045, "step": 39690 }, { "epoch": 0.7178358244376086, "grad_norm": 43.125, "learning_rate": 9.887838342561479e-06, "loss": 17.314, "step": 39700 }, { "epoch": 0.7180166395067364, "grad_norm": 43.5625, "learning_rate": 9.887810090254821e-06, "loss": 16.633, "step": 39710 }, { "epoch": 0.7181974545758644, "grad_norm": 45.9375, "learning_rate": 9.88778183794816e-06, "loss": 16.8595, "step": 39720 }, { "epoch": 0.7183782696449922, "grad_norm": 42.78125, "learning_rate": 9.8877535856415e-06, "loss": 16.9203, "step": 39730 }, { "epoch": 0.7185590847141201, "grad_norm": 42.90625, "learning_rate": 9.887725333334841e-06, "loss": 16.9117, "step": 39740 }, { "epoch": 0.7187398997832479, "grad_norm": 44.5625, "learning_rate": 9.887697081028182e-06, "loss": 17.2357, "step": 39750 }, { "epoch": 0.7189207148523757, "grad_norm": 40.5625, "learning_rate": 9.887668828721522e-06, "loss": 16.8096, "step": 39760 }, { "epoch": 0.7191015299215037, "grad_norm": 42.625, "learning_rate": 9.887640576414861e-06, "loss": 16.8199, "step": 39770 }, { "epoch": 0.7192823449906315, "grad_norm": 44.71875, "learning_rate": 9.887612324108202e-06, "loss": 16.8323, "step": 39780 }, { "epoch": 0.7194631600597594, "grad_norm": 41.90625, "learning_rate": 9.887584071801543e-06, "loss": 16.9276, "step": 39790 }, { "epoch": 0.7196439751288872, "grad_norm": 41.65625, "learning_rate": 9.887555819494883e-06, "loss": 16.6748, "step": 39800 }, { "epoch": 0.7198247901980152, "grad_norm": 40.84375, "learning_rate": 9.887527567188224e-06, "loss": 16.8066, "step": 39810 }, { "epoch": 0.720005605267143, "grad_norm": 40.40625, "learning_rate": 9.887499314881564e-06, "loss": 17.2768, "step": 39820 }, { "epoch": 0.7201864203362708, "grad_norm": 44.8125, "learning_rate": 9.887471062574905e-06, "loss": 16.4379, "step": 39830 }, { "epoch": 0.7203672354053987, "grad_norm": 41.3125, "learning_rate": 9.887442810268246e-06, "loss": 16.8064, "step": 39840 }, { "epoch": 0.7205480504745265, "grad_norm": 41.875, "learning_rate": 9.887414557961586e-06, "loss": 16.7328, "step": 39850 }, { "epoch": 0.7207288655436545, "grad_norm": 42.8125, "learning_rate": 9.887386305654925e-06, "loss": 16.8048, "step": 39860 }, { "epoch": 0.7209096806127823, "grad_norm": 44.0, "learning_rate": 9.887358053348266e-06, "loss": 16.5774, "step": 39870 }, { "epoch": 0.7210904956819101, "grad_norm": 44.5625, "learning_rate": 9.887329801041606e-06, "loss": 16.6305, "step": 39880 }, { "epoch": 0.721271310751038, "grad_norm": 40.125, "learning_rate": 9.887301548734947e-06, "loss": 16.7518, "step": 39890 }, { "epoch": 0.7214521258201658, "grad_norm": 42.25, "learning_rate": 9.887273296428288e-06, "loss": 17.4451, "step": 39900 }, { "epoch": 0.7216329408892938, "grad_norm": 43.4375, "learning_rate": 9.887245044121628e-06, "loss": 16.9063, "step": 39910 }, { "epoch": 0.7218137559584216, "grad_norm": 43.40625, "learning_rate": 9.887216791814969e-06, "loss": 16.5798, "step": 39920 }, { "epoch": 0.7219945710275494, "grad_norm": 42.125, "learning_rate": 9.88718853950831e-06, "loss": 16.8749, "step": 39930 }, { "epoch": 0.7221753860966773, "grad_norm": 43.03125, "learning_rate": 9.887160287201648e-06, "loss": 16.9119, "step": 39940 }, { "epoch": 0.7223562011658051, "grad_norm": 42.5, "learning_rate": 9.887132034894989e-06, "loss": 16.7371, "step": 39950 }, { "epoch": 0.722537016234933, "grad_norm": 41.25, "learning_rate": 9.88710378258833e-06, "loss": 17.1396, "step": 39960 }, { "epoch": 0.7227178313040609, "grad_norm": 43.75, "learning_rate": 9.88707553028167e-06, "loss": 16.7851, "step": 39970 }, { "epoch": 0.7228986463731888, "grad_norm": 41.5625, "learning_rate": 9.887047277975011e-06, "loss": 17.002, "step": 39980 }, { "epoch": 0.7230794614423166, "grad_norm": 42.8125, "learning_rate": 9.887019025668352e-06, "loss": 17.0829, "step": 39990 }, { "epoch": 0.7232602765114444, "grad_norm": 39.84375, "learning_rate": 9.886990773361692e-06, "loss": 16.9125, "step": 40000 }, { "epoch": 0.7232602765114444, "eval_loss": 2.107973098754883, "eval_runtime": 229.6504, "eval_samples_per_second": 3161.584, "eval_steps_per_second": 49.401, "step": 40000 }, { "epoch": 0.7234410915805723, "grad_norm": 41.5, "learning_rate": 9.886962521055033e-06, "loss": 16.4671, "step": 40010 }, { "epoch": 0.7236219066497002, "grad_norm": 43.71875, "learning_rate": 9.886934268748373e-06, "loss": 16.8505, "step": 40020 }, { "epoch": 0.7238027217188281, "grad_norm": 40.4375, "learning_rate": 9.886906016441712e-06, "loss": 16.8149, "step": 40030 }, { "epoch": 0.7239835367879559, "grad_norm": 43.125, "learning_rate": 9.886877764135053e-06, "loss": 16.7169, "step": 40040 }, { "epoch": 0.7241643518570837, "grad_norm": 42.40625, "learning_rate": 9.886849511828394e-06, "loss": 16.8032, "step": 40050 }, { "epoch": 0.7243451669262116, "grad_norm": 43.6875, "learning_rate": 9.886821259521734e-06, "loss": 16.7357, "step": 40060 }, { "epoch": 0.7245259819953395, "grad_norm": 43.5, "learning_rate": 9.886793007215075e-06, "loss": 16.8738, "step": 40070 }, { "epoch": 0.7247067970644674, "grad_norm": 42.96875, "learning_rate": 9.886764754908416e-06, "loss": 16.9363, "step": 40080 }, { "epoch": 0.7248876121335952, "grad_norm": 44.875, "learning_rate": 9.886736502601756e-06, "loss": 16.6958, "step": 40090 }, { "epoch": 0.725068427202723, "grad_norm": 41.625, "learning_rate": 9.886708250295097e-06, "loss": 16.8281, "step": 40100 }, { "epoch": 0.725249242271851, "grad_norm": 43.09375, "learning_rate": 9.886679997988436e-06, "loss": 16.6294, "step": 40110 }, { "epoch": 0.7254300573409788, "grad_norm": 41.40625, "learning_rate": 9.886651745681776e-06, "loss": 17.3219, "step": 40120 }, { "epoch": 0.7256108724101067, "grad_norm": 44.75, "learning_rate": 9.886623493375117e-06, "loss": 17.1288, "step": 40130 }, { "epoch": 0.7257916874792345, "grad_norm": 43.0625, "learning_rate": 9.886595241068458e-06, "loss": 16.3775, "step": 40140 }, { "epoch": 0.7259725025483624, "grad_norm": 42.0625, "learning_rate": 9.886566988761798e-06, "loss": 16.2942, "step": 40150 }, { "epoch": 0.7261533176174902, "grad_norm": 42.1875, "learning_rate": 9.886538736455139e-06, "loss": 16.8152, "step": 40160 }, { "epoch": 0.7263341326866181, "grad_norm": 43.15625, "learning_rate": 9.88651048414848e-06, "loss": 17.041, "step": 40170 }, { "epoch": 0.726514947755746, "grad_norm": 43.0, "learning_rate": 9.88648223184182e-06, "loss": 17.1616, "step": 40180 }, { "epoch": 0.7266957628248738, "grad_norm": 43.90625, "learning_rate": 9.88645397953516e-06, "loss": 16.3568, "step": 40190 }, { "epoch": 0.7268765778940017, "grad_norm": 43.4375, "learning_rate": 9.8864257272285e-06, "loss": 16.744, "step": 40200 }, { "epoch": 0.7270573929631295, "grad_norm": 43.03125, "learning_rate": 9.88639747492184e-06, "loss": 16.7687, "step": 40210 }, { "epoch": 0.7272382080322574, "grad_norm": 42.75, "learning_rate": 9.88636922261518e-06, "loss": 16.7254, "step": 40220 }, { "epoch": 0.7274190231013853, "grad_norm": 47.21875, "learning_rate": 9.886340970308521e-06, "loss": 16.8152, "step": 40230 }, { "epoch": 0.7275998381705131, "grad_norm": 42.71875, "learning_rate": 9.886312718001862e-06, "loss": 16.6617, "step": 40240 }, { "epoch": 0.727780653239641, "grad_norm": 42.78125, "learning_rate": 9.886284465695203e-06, "loss": 16.8171, "step": 40250 }, { "epoch": 0.7279614683087688, "grad_norm": 41.09375, "learning_rate": 9.886256213388543e-06, "loss": 16.6807, "step": 40260 }, { "epoch": 0.7281422833778967, "grad_norm": 44.65625, "learning_rate": 9.886227961081884e-06, "loss": 16.8896, "step": 40270 }, { "epoch": 0.7283230984470246, "grad_norm": 44.75, "learning_rate": 9.886199708775225e-06, "loss": 17.2417, "step": 40280 }, { "epoch": 0.7285039135161524, "grad_norm": 40.6875, "learning_rate": 9.886171456468563e-06, "loss": 16.7568, "step": 40290 }, { "epoch": 0.7286847285852803, "grad_norm": 46.21875, "learning_rate": 9.886143204161904e-06, "loss": 16.9543, "step": 40300 }, { "epoch": 0.7288655436544081, "grad_norm": 41.09375, "learning_rate": 9.886114951855245e-06, "loss": 16.9995, "step": 40310 }, { "epoch": 0.7290463587235361, "grad_norm": 41.03125, "learning_rate": 9.886086699548585e-06, "loss": 16.5499, "step": 40320 }, { "epoch": 0.7292271737926639, "grad_norm": 45.96875, "learning_rate": 9.886058447241926e-06, "loss": 16.7233, "step": 40330 }, { "epoch": 0.7294079888617917, "grad_norm": 44.90625, "learning_rate": 9.886030194935267e-06, "loss": 17.1275, "step": 40340 }, { "epoch": 0.7295888039309196, "grad_norm": 44.4375, "learning_rate": 9.886001942628607e-06, "loss": 16.9403, "step": 40350 }, { "epoch": 0.7297696190000474, "grad_norm": 42.34375, "learning_rate": 9.885973690321948e-06, "loss": 16.5147, "step": 40360 }, { "epoch": 0.7299504340691754, "grad_norm": 40.28125, "learning_rate": 9.885945438015287e-06, "loss": 16.7906, "step": 40370 }, { "epoch": 0.7301312491383032, "grad_norm": 42.8125, "learning_rate": 9.885917185708627e-06, "loss": 17.0495, "step": 40380 }, { "epoch": 0.730312064207431, "grad_norm": 42.4375, "learning_rate": 9.885888933401968e-06, "loss": 17.0379, "step": 40390 }, { "epoch": 0.7304928792765589, "grad_norm": 43.09375, "learning_rate": 9.885860681095309e-06, "loss": 16.7473, "step": 40400 }, { "epoch": 0.7306736943456867, "grad_norm": 40.375, "learning_rate": 9.88583242878865e-06, "loss": 16.9922, "step": 40410 }, { "epoch": 0.7308545094148147, "grad_norm": 41.53125, "learning_rate": 9.88580417648199e-06, "loss": 16.7063, "step": 40420 }, { "epoch": 0.7310353244839425, "grad_norm": 40.5625, "learning_rate": 9.88577592417533e-06, "loss": 17.0254, "step": 40430 }, { "epoch": 0.7312161395530703, "grad_norm": 42.5, "learning_rate": 9.885747671868671e-06, "loss": 16.7434, "step": 40440 }, { "epoch": 0.7313969546221982, "grad_norm": 44.5, "learning_rate": 9.885719419562012e-06, "loss": 16.8146, "step": 40450 }, { "epoch": 0.731577769691326, "grad_norm": 41.625, "learning_rate": 9.88569116725535e-06, "loss": 17.4253, "step": 40460 }, { "epoch": 0.731758584760454, "grad_norm": 45.53125, "learning_rate": 9.885662914948691e-06, "loss": 16.398, "step": 40470 }, { "epoch": 0.7319393998295818, "grad_norm": 42.09375, "learning_rate": 9.885634662642032e-06, "loss": 16.5052, "step": 40480 }, { "epoch": 0.7321202148987097, "grad_norm": 42.75, "learning_rate": 9.885606410335373e-06, "loss": 16.7789, "step": 40490 }, { "epoch": 0.7323010299678375, "grad_norm": 44.78125, "learning_rate": 9.885578158028713e-06, "loss": 16.5454, "step": 40500 }, { "epoch": 0.7324818450369653, "grad_norm": 42.78125, "learning_rate": 9.885549905722054e-06, "loss": 16.8163, "step": 40510 }, { "epoch": 0.7326626601060933, "grad_norm": 42.125, "learning_rate": 9.885521653415394e-06, "loss": 17.143, "step": 40520 }, { "epoch": 0.7328434751752211, "grad_norm": 39.1875, "learning_rate": 9.885493401108735e-06, "loss": 17.0652, "step": 40530 }, { "epoch": 0.733024290244349, "grad_norm": 45.71875, "learning_rate": 9.885465148802074e-06, "loss": 17.1131, "step": 40540 }, { "epoch": 0.7332051053134768, "grad_norm": 44.09375, "learning_rate": 9.885436896495415e-06, "loss": 17.2092, "step": 40550 }, { "epoch": 0.7333859203826046, "grad_norm": 44.8125, "learning_rate": 9.885408644188755e-06, "loss": 17.1851, "step": 40560 }, { "epoch": 0.7335667354517326, "grad_norm": 41.4375, "learning_rate": 9.885380391882096e-06, "loss": 17.2057, "step": 40570 }, { "epoch": 0.7337475505208604, "grad_norm": 45.0625, "learning_rate": 9.885352139575436e-06, "loss": 16.8595, "step": 40580 }, { "epoch": 0.7339283655899883, "grad_norm": 42.71875, "learning_rate": 9.885323887268777e-06, "loss": 16.6233, "step": 40590 }, { "epoch": 0.7341091806591161, "grad_norm": 41.0625, "learning_rate": 9.885295634962118e-06, "loss": 16.6906, "step": 40600 }, { "epoch": 0.7342899957282439, "grad_norm": 41.75, "learning_rate": 9.885267382655458e-06, "loss": 16.5386, "step": 40610 }, { "epoch": 0.7344708107973719, "grad_norm": 42.46875, "learning_rate": 9.885239130348799e-06, "loss": 16.8199, "step": 40620 }, { "epoch": 0.7346516258664997, "grad_norm": 42.8125, "learning_rate": 9.885210878042138e-06, "loss": 16.6419, "step": 40630 }, { "epoch": 0.7348324409356276, "grad_norm": 44.34375, "learning_rate": 9.885182625735478e-06, "loss": 16.9355, "step": 40640 }, { "epoch": 0.7350132560047554, "grad_norm": 42.46875, "learning_rate": 9.885154373428819e-06, "loss": 16.7165, "step": 40650 }, { "epoch": 0.7351940710738833, "grad_norm": 44.8125, "learning_rate": 9.88512612112216e-06, "loss": 16.981, "step": 40660 }, { "epoch": 0.7353748861430112, "grad_norm": 40.3125, "learning_rate": 9.8850978688155e-06, "loss": 16.8327, "step": 40670 }, { "epoch": 0.735555701212139, "grad_norm": 42.3125, "learning_rate": 9.885069616508841e-06, "loss": 16.9216, "step": 40680 }, { "epoch": 0.7357365162812669, "grad_norm": 42.53125, "learning_rate": 9.885041364202182e-06, "loss": 17.251, "step": 40690 }, { "epoch": 0.7359173313503947, "grad_norm": 42.1875, "learning_rate": 9.885013111895522e-06, "loss": 16.3896, "step": 40700 }, { "epoch": 0.7360981464195226, "grad_norm": 42.90625, "learning_rate": 9.884984859588861e-06, "loss": 16.8334, "step": 40710 }, { "epoch": 0.7362789614886505, "grad_norm": 43.78125, "learning_rate": 9.884956607282202e-06, "loss": 16.6666, "step": 40720 }, { "epoch": 0.7364597765577783, "grad_norm": 45.0, "learning_rate": 9.884928354975542e-06, "loss": 17.068, "step": 40730 }, { "epoch": 0.7366405916269062, "grad_norm": 41.5, "learning_rate": 9.884900102668883e-06, "loss": 16.5892, "step": 40740 }, { "epoch": 0.736821406696034, "grad_norm": 41.8125, "learning_rate": 9.884871850362224e-06, "loss": 16.977, "step": 40750 }, { "epoch": 0.737002221765162, "grad_norm": 44.0625, "learning_rate": 9.884843598055564e-06, "loss": 17.0296, "step": 40760 }, { "epoch": 0.7371830368342898, "grad_norm": 41.75, "learning_rate": 9.884815345748905e-06, "loss": 16.9444, "step": 40770 }, { "epoch": 0.7373638519034176, "grad_norm": 43.15625, "learning_rate": 9.884787093442246e-06, "loss": 16.7186, "step": 40780 }, { "epoch": 0.7375446669725455, "grad_norm": 41.71875, "learning_rate": 9.884758841135586e-06, "loss": 16.5658, "step": 40790 }, { "epoch": 0.7377254820416733, "grad_norm": 41.21875, "learning_rate": 9.884730588828925e-06, "loss": 16.9532, "step": 40800 }, { "epoch": 0.7379062971108012, "grad_norm": 43.5, "learning_rate": 9.884702336522266e-06, "loss": 16.7144, "step": 40810 }, { "epoch": 0.7380871121799291, "grad_norm": 43.9375, "learning_rate": 9.884674084215606e-06, "loss": 16.8067, "step": 40820 }, { "epoch": 0.738267927249057, "grad_norm": 41.84375, "learning_rate": 9.884645831908947e-06, "loss": 16.7855, "step": 40830 }, { "epoch": 0.7384487423181848, "grad_norm": 40.875, "learning_rate": 9.884617579602288e-06, "loss": 16.8532, "step": 40840 }, { "epoch": 0.7386295573873126, "grad_norm": 43.84375, "learning_rate": 9.884589327295628e-06, "loss": 16.8848, "step": 40850 }, { "epoch": 0.7388103724564405, "grad_norm": 43.90625, "learning_rate": 9.884561074988969e-06, "loss": 16.41, "step": 40860 }, { "epoch": 0.7389911875255684, "grad_norm": 42.9375, "learning_rate": 9.88453282268231e-06, "loss": 16.931, "step": 40870 }, { "epoch": 0.7391720025946963, "grad_norm": 44.3125, "learning_rate": 9.88450457037565e-06, "loss": 16.4792, "step": 40880 }, { "epoch": 0.7393528176638241, "grad_norm": 41.6875, "learning_rate": 9.884476318068989e-06, "loss": 16.5623, "step": 40890 }, { "epoch": 0.7395336327329519, "grad_norm": 44.3125, "learning_rate": 9.88444806576233e-06, "loss": 16.9786, "step": 40900 }, { "epoch": 0.7397144478020798, "grad_norm": 44.46875, "learning_rate": 9.88441981345567e-06, "loss": 16.7459, "step": 40910 }, { "epoch": 0.7398952628712077, "grad_norm": 43.78125, "learning_rate": 9.88439156114901e-06, "loss": 17.09, "step": 40920 }, { "epoch": 0.7400760779403356, "grad_norm": 43.34375, "learning_rate": 9.884363308842351e-06, "loss": 17.1282, "step": 40930 }, { "epoch": 0.7402568930094634, "grad_norm": 40.875, "learning_rate": 9.884335056535692e-06, "loss": 16.7603, "step": 40940 }, { "epoch": 0.7404377080785912, "grad_norm": 44.4375, "learning_rate": 9.884306804229033e-06, "loss": 16.8682, "step": 40950 }, { "epoch": 0.7406185231477191, "grad_norm": 40.40625, "learning_rate": 9.884278551922373e-06, "loss": 17.0364, "step": 40960 }, { "epoch": 0.740799338216847, "grad_norm": 42.53125, "learning_rate": 9.884250299615712e-06, "loss": 17.0327, "step": 40970 }, { "epoch": 0.7409801532859749, "grad_norm": 43.15625, "learning_rate": 9.884222047309053e-06, "loss": 16.9583, "step": 40980 }, { "epoch": 0.7411609683551027, "grad_norm": 41.90625, "learning_rate": 9.884193795002394e-06, "loss": 16.6937, "step": 40990 }, { "epoch": 0.7413417834242306, "grad_norm": 43.65625, "learning_rate": 9.884165542695734e-06, "loss": 16.5979, "step": 41000 }, { "epoch": 0.7415225984933584, "grad_norm": 42.875, "learning_rate": 9.884137290389075e-06, "loss": 16.5445, "step": 41010 }, { "epoch": 0.7417034135624863, "grad_norm": 44.21875, "learning_rate": 9.884109038082414e-06, "loss": 16.9713, "step": 41020 }, { "epoch": 0.7418842286316142, "grad_norm": 42.8125, "learning_rate": 9.884080785775756e-06, "loss": 16.3581, "step": 41030 }, { "epoch": 0.742065043700742, "grad_norm": 42.3125, "learning_rate": 9.884052533469097e-06, "loss": 16.7363, "step": 41040 }, { "epoch": 0.7422458587698699, "grad_norm": 39.625, "learning_rate": 9.884024281162437e-06, "loss": 16.7999, "step": 41050 }, { "epoch": 0.7424266738389977, "grad_norm": 39.53125, "learning_rate": 9.883996028855776e-06, "loss": 16.8929, "step": 41060 }, { "epoch": 0.7426074889081256, "grad_norm": 45.46875, "learning_rate": 9.883967776549117e-06, "loss": 16.6393, "step": 41070 }, { "epoch": 0.7427883039772535, "grad_norm": 43.375, "learning_rate": 9.883939524242457e-06, "loss": 16.7282, "step": 41080 }, { "epoch": 0.7429691190463813, "grad_norm": 40.78125, "learning_rate": 9.883911271935798e-06, "loss": 17.0336, "step": 41090 }, { "epoch": 0.7431499341155092, "grad_norm": 45.5625, "learning_rate": 9.883883019629139e-06, "loss": 16.9114, "step": 41100 }, { "epoch": 0.743330749184637, "grad_norm": 45.5625, "learning_rate": 9.88385476732248e-06, "loss": 17.1745, "step": 41110 }, { "epoch": 0.7435115642537649, "grad_norm": 41.9375, "learning_rate": 9.88382651501582e-06, "loss": 16.4395, "step": 41120 }, { "epoch": 0.7436923793228928, "grad_norm": 44.25, "learning_rate": 9.88379826270916e-06, "loss": 16.8133, "step": 41130 }, { "epoch": 0.7438731943920206, "grad_norm": 42.6875, "learning_rate": 9.8837700104025e-06, "loss": 16.6626, "step": 41140 }, { "epoch": 0.7440540094611485, "grad_norm": 43.40625, "learning_rate": 9.88374175809584e-06, "loss": 16.7532, "step": 41150 }, { "epoch": 0.7442348245302763, "grad_norm": 41.0625, "learning_rate": 9.88371350578918e-06, "loss": 16.8214, "step": 41160 }, { "epoch": 0.7444156395994043, "grad_norm": 42.21875, "learning_rate": 9.883685253482521e-06, "loss": 16.7015, "step": 41170 }, { "epoch": 0.7445964546685321, "grad_norm": 43.96875, "learning_rate": 9.883657001175862e-06, "loss": 17.0512, "step": 41180 }, { "epoch": 0.7447772697376599, "grad_norm": 44.5, "learning_rate": 9.883628748869203e-06, "loss": 17.1258, "step": 41190 }, { "epoch": 0.7449580848067878, "grad_norm": 44.21875, "learning_rate": 9.883600496562543e-06, "loss": 16.8723, "step": 41200 }, { "epoch": 0.7451388998759156, "grad_norm": 42.09375, "learning_rate": 9.883572244255884e-06, "loss": 16.7487, "step": 41210 }, { "epoch": 0.7453197149450436, "grad_norm": 43.84375, "learning_rate": 9.883543991949224e-06, "loss": 16.6667, "step": 41220 }, { "epoch": 0.7455005300141714, "grad_norm": 41.90625, "learning_rate": 9.883515739642563e-06, "loss": 16.8356, "step": 41230 }, { "epoch": 0.7456813450832992, "grad_norm": 44.1875, "learning_rate": 9.883487487335904e-06, "loss": 16.8372, "step": 41240 }, { "epoch": 0.7458621601524271, "grad_norm": 44.84375, "learning_rate": 9.883459235029245e-06, "loss": 16.2635, "step": 41250 }, { "epoch": 0.7460429752215549, "grad_norm": 44.375, "learning_rate": 9.883430982722585e-06, "loss": 16.6561, "step": 41260 }, { "epoch": 0.7462237902906829, "grad_norm": 44.34375, "learning_rate": 9.883402730415926e-06, "loss": 16.6502, "step": 41270 }, { "epoch": 0.7464046053598107, "grad_norm": 42.09375, "learning_rate": 9.883374478109265e-06, "loss": 16.8927, "step": 41280 }, { "epoch": 0.7465854204289385, "grad_norm": 44.28125, "learning_rate": 9.883346225802607e-06, "loss": 16.7709, "step": 41290 }, { "epoch": 0.7467662354980664, "grad_norm": 43.71875, "learning_rate": 9.883317973495948e-06, "loss": 16.8387, "step": 41300 }, { "epoch": 0.7469470505671942, "grad_norm": 43.28125, "learning_rate": 9.883289721189288e-06, "loss": 16.9989, "step": 41310 }, { "epoch": 0.7471278656363222, "grad_norm": 40.96875, "learning_rate": 9.883261468882627e-06, "loss": 16.9145, "step": 41320 }, { "epoch": 0.74730868070545, "grad_norm": 43.6875, "learning_rate": 9.883233216575968e-06, "loss": 16.7206, "step": 41330 }, { "epoch": 0.7474894957745779, "grad_norm": 42.625, "learning_rate": 9.883204964269309e-06, "loss": 16.3599, "step": 41340 }, { "epoch": 0.7476703108437057, "grad_norm": 42.84375, "learning_rate": 9.883176711962649e-06, "loss": 16.4318, "step": 41350 }, { "epoch": 0.7478511259128335, "grad_norm": 43.0625, "learning_rate": 9.88314845965599e-06, "loss": 16.9316, "step": 41360 }, { "epoch": 0.7480319409819615, "grad_norm": 44.09375, "learning_rate": 9.883120207349329e-06, "loss": 16.7205, "step": 41370 }, { "epoch": 0.7482127560510893, "grad_norm": 40.25, "learning_rate": 9.883091955042671e-06, "loss": 16.3273, "step": 41380 }, { "epoch": 0.7483935711202172, "grad_norm": 42.40625, "learning_rate": 9.883063702736012e-06, "loss": 17.1665, "step": 41390 }, { "epoch": 0.748574386189345, "grad_norm": 43.59375, "learning_rate": 9.88303545042935e-06, "loss": 16.7555, "step": 41400 }, { "epoch": 0.7487552012584728, "grad_norm": 43.71875, "learning_rate": 9.883007198122691e-06, "loss": 16.8557, "step": 41410 }, { "epoch": 0.7489360163276008, "grad_norm": 40.375, "learning_rate": 9.882978945816032e-06, "loss": 16.3214, "step": 41420 }, { "epoch": 0.7491168313967286, "grad_norm": 43.6875, "learning_rate": 9.882950693509372e-06, "loss": 17.0458, "step": 41430 }, { "epoch": 0.7492976464658565, "grad_norm": 44.21875, "learning_rate": 9.882922441202713e-06, "loss": 17.0682, "step": 41440 }, { "epoch": 0.7494784615349843, "grad_norm": 42.78125, "learning_rate": 9.882894188896052e-06, "loss": 16.79, "step": 41450 }, { "epoch": 0.7496592766041121, "grad_norm": 44.40625, "learning_rate": 9.882865936589394e-06, "loss": 16.8722, "step": 41460 }, { "epoch": 0.7498400916732401, "grad_norm": 41.3125, "learning_rate": 9.882837684282735e-06, "loss": 16.351, "step": 41470 }, { "epoch": 0.7500209067423679, "grad_norm": 45.6875, "learning_rate": 9.882809431976076e-06, "loss": 16.8484, "step": 41480 }, { "epoch": 0.7502017218114958, "grad_norm": 43.125, "learning_rate": 9.882781179669414e-06, "loss": 16.6952, "step": 41490 }, { "epoch": 0.7503825368806236, "grad_norm": 46.5, "learning_rate": 9.882752927362755e-06, "loss": 16.8167, "step": 41500 }, { "epoch": 0.7505633519497515, "grad_norm": 40.0, "learning_rate": 9.882724675056096e-06, "loss": 16.6396, "step": 41510 }, { "epoch": 0.7507441670188794, "grad_norm": 43.9375, "learning_rate": 9.882696422749436e-06, "loss": 16.5496, "step": 41520 }, { "epoch": 0.7509249820880072, "grad_norm": 44.34375, "learning_rate": 9.882668170442777e-06, "loss": 17.0983, "step": 41530 }, { "epoch": 0.7511057971571351, "grad_norm": 40.84375, "learning_rate": 9.882639918136116e-06, "loss": 16.601, "step": 41540 }, { "epoch": 0.7512866122262629, "grad_norm": 41.75, "learning_rate": 9.882611665829458e-06, "loss": 16.7391, "step": 41550 }, { "epoch": 0.7514674272953908, "grad_norm": 41.71875, "learning_rate": 9.882583413522799e-06, "loss": 17.1427, "step": 41560 }, { "epoch": 0.7516482423645187, "grad_norm": 42.25, "learning_rate": 9.882555161216138e-06, "loss": 16.784, "step": 41570 }, { "epoch": 0.7518290574336465, "grad_norm": 43.65625, "learning_rate": 9.882526908909478e-06, "loss": 16.6273, "step": 41580 }, { "epoch": 0.7520098725027744, "grad_norm": 39.6875, "learning_rate": 9.882498656602819e-06, "loss": 16.5671, "step": 41590 }, { "epoch": 0.7521906875719022, "grad_norm": 44.03125, "learning_rate": 9.88247040429616e-06, "loss": 16.9705, "step": 41600 }, { "epoch": 0.7523715026410301, "grad_norm": 42.15625, "learning_rate": 9.8824421519895e-06, "loss": 16.6402, "step": 41610 }, { "epoch": 0.752552317710158, "grad_norm": 41.71875, "learning_rate": 9.882413899682841e-06, "loss": 16.7703, "step": 41620 }, { "epoch": 0.7527331327792858, "grad_norm": 42.34375, "learning_rate": 9.88238564737618e-06, "loss": 17.2249, "step": 41630 }, { "epoch": 0.7529139478484137, "grad_norm": 41.03125, "learning_rate": 9.882357395069522e-06, "loss": 16.6053, "step": 41640 }, { "epoch": 0.7530947629175415, "grad_norm": 46.1875, "learning_rate": 9.882329142762863e-06, "loss": 17.1259, "step": 41650 }, { "epoch": 0.7532755779866694, "grad_norm": 42.78125, "learning_rate": 9.882300890456202e-06, "loss": 16.6679, "step": 41660 }, { "epoch": 0.7534563930557973, "grad_norm": 44.9375, "learning_rate": 9.882272638149542e-06, "loss": 17.3633, "step": 41670 }, { "epoch": 0.7536372081249252, "grad_norm": 41.5625, "learning_rate": 9.882244385842883e-06, "loss": 16.7871, "step": 41680 }, { "epoch": 0.753818023194053, "grad_norm": 43.46875, "learning_rate": 9.882216133536224e-06, "loss": 16.3583, "step": 41690 }, { "epoch": 0.7539988382631808, "grad_norm": 41.84375, "learning_rate": 9.882187881229564e-06, "loss": 16.692, "step": 41700 }, { "epoch": 0.7541796533323087, "grad_norm": 41.09375, "learning_rate": 9.882159628922903e-06, "loss": 16.7707, "step": 41710 }, { "epoch": 0.7543604684014366, "grad_norm": 42.03125, "learning_rate": 9.882131376616244e-06, "loss": 17.0561, "step": 41720 }, { "epoch": 0.7545412834705645, "grad_norm": 44.3125, "learning_rate": 9.882103124309586e-06, "loss": 17.1578, "step": 41730 }, { "epoch": 0.7547220985396923, "grad_norm": 45.0, "learning_rate": 9.882074872002927e-06, "loss": 16.8074, "step": 41740 }, { "epoch": 0.7549029136088201, "grad_norm": 41.15625, "learning_rate": 9.882046619696266e-06, "loss": 16.7429, "step": 41750 }, { "epoch": 0.755083728677948, "grad_norm": 43.09375, "learning_rate": 9.882018367389606e-06, "loss": 16.8894, "step": 41760 }, { "epoch": 0.7552645437470759, "grad_norm": 42.6875, "learning_rate": 9.881990115082947e-06, "loss": 16.8988, "step": 41770 }, { "epoch": 0.7554453588162038, "grad_norm": 43.21875, "learning_rate": 9.881961862776287e-06, "loss": 16.8764, "step": 41780 }, { "epoch": 0.7556261738853316, "grad_norm": 44.1875, "learning_rate": 9.881933610469628e-06, "loss": 17.0994, "step": 41790 }, { "epoch": 0.7558069889544594, "grad_norm": 44.4375, "learning_rate": 9.881905358162967e-06, "loss": 16.8171, "step": 41800 }, { "epoch": 0.7559878040235873, "grad_norm": 42.90625, "learning_rate": 9.88187710585631e-06, "loss": 17.0823, "step": 41810 }, { "epoch": 0.7561686190927152, "grad_norm": 42.1875, "learning_rate": 9.88184885354965e-06, "loss": 16.7577, "step": 41820 }, { "epoch": 0.7563494341618431, "grad_norm": 41.0, "learning_rate": 9.881820601242989e-06, "loss": 17.2433, "step": 41830 }, { "epoch": 0.7565302492309709, "grad_norm": 43.90625, "learning_rate": 9.88179234893633e-06, "loss": 17.117, "step": 41840 }, { "epoch": 0.7567110643000988, "grad_norm": 42.625, "learning_rate": 9.88176409662967e-06, "loss": 16.6246, "step": 41850 }, { "epoch": 0.7568918793692266, "grad_norm": 41.71875, "learning_rate": 9.88173584432301e-06, "loss": 17.0378, "step": 41860 }, { "epoch": 0.7570726944383545, "grad_norm": 43.3125, "learning_rate": 9.881707592016351e-06, "loss": 16.559, "step": 41870 }, { "epoch": 0.7572535095074824, "grad_norm": 46.1875, "learning_rate": 9.88167933970969e-06, "loss": 16.9933, "step": 41880 }, { "epoch": 0.7574343245766102, "grad_norm": 41.65625, "learning_rate": 9.881651087403031e-06, "loss": 16.5829, "step": 41890 }, { "epoch": 0.7576151396457381, "grad_norm": 40.9375, "learning_rate": 9.881622835096373e-06, "loss": 16.7736, "step": 41900 }, { "epoch": 0.7577959547148659, "grad_norm": 44.59375, "learning_rate": 9.881594582789714e-06, "loss": 16.9979, "step": 41910 }, { "epoch": 0.7579767697839938, "grad_norm": 42.84375, "learning_rate": 9.881566330483053e-06, "loss": 16.8586, "step": 41920 }, { "epoch": 0.7581575848531217, "grad_norm": 41.96875, "learning_rate": 9.881538078176393e-06, "loss": 16.416, "step": 41930 }, { "epoch": 0.7583383999222495, "grad_norm": 43.375, "learning_rate": 9.881509825869734e-06, "loss": 17.0207, "step": 41940 }, { "epoch": 0.7585192149913774, "grad_norm": 45.6875, "learning_rate": 9.881481573563075e-06, "loss": 16.8822, "step": 41950 }, { "epoch": 0.7587000300605052, "grad_norm": 41.375, "learning_rate": 9.881453321256415e-06, "loss": 16.8448, "step": 41960 }, { "epoch": 0.758880845129633, "grad_norm": 45.3125, "learning_rate": 9.881425068949754e-06, "loss": 16.5718, "step": 41970 }, { "epoch": 0.759061660198761, "grad_norm": 44.125, "learning_rate": 9.881396816643095e-06, "loss": 16.2509, "step": 41980 }, { "epoch": 0.7592424752678888, "grad_norm": 43.5, "learning_rate": 9.881368564336437e-06, "loss": 16.4995, "step": 41990 }, { "epoch": 0.7594232903370167, "grad_norm": 42.21875, "learning_rate": 9.881340312029776e-06, "loss": 16.7124, "step": 42000 }, { "epoch": 0.7596041054061445, "grad_norm": 43.03125, "learning_rate": 9.881312059723117e-06, "loss": 16.6199, "step": 42010 }, { "epoch": 0.7597849204752725, "grad_norm": 41.125, "learning_rate": 9.881283807416457e-06, "loss": 17.145, "step": 42020 }, { "epoch": 0.7599657355444003, "grad_norm": 40.96875, "learning_rate": 9.881255555109798e-06, "loss": 16.6874, "step": 42030 }, { "epoch": 0.7601465506135281, "grad_norm": 43.1875, "learning_rate": 9.881227302803139e-06, "loss": 16.8522, "step": 42040 }, { "epoch": 0.760327365682656, "grad_norm": 41.6875, "learning_rate": 9.881199050496479e-06, "loss": 16.6918, "step": 42050 }, { "epoch": 0.7605081807517838, "grad_norm": 41.65625, "learning_rate": 9.881170798189818e-06, "loss": 16.6511, "step": 42060 }, { "epoch": 0.7606889958209118, "grad_norm": 45.875, "learning_rate": 9.881142545883159e-06, "loss": 16.3789, "step": 42070 }, { "epoch": 0.7608698108900396, "grad_norm": 45.34375, "learning_rate": 9.881114293576501e-06, "loss": 16.3309, "step": 42080 }, { "epoch": 0.7610506259591674, "grad_norm": 45.5625, "learning_rate": 9.88108604126984e-06, "loss": 17.0268, "step": 42090 }, { "epoch": 0.7612314410282953, "grad_norm": 41.84375, "learning_rate": 9.88105778896318e-06, "loss": 16.5562, "step": 42100 }, { "epoch": 0.7614122560974231, "grad_norm": 41.59375, "learning_rate": 9.881029536656521e-06, "loss": 16.671, "step": 42110 }, { "epoch": 0.7615930711665511, "grad_norm": 42.0625, "learning_rate": 9.881001284349862e-06, "loss": 16.8582, "step": 42120 }, { "epoch": 0.7617738862356789, "grad_norm": 41.65625, "learning_rate": 9.880973032043202e-06, "loss": 16.7876, "step": 42130 }, { "epoch": 0.7619547013048067, "grad_norm": 39.40625, "learning_rate": 9.880944779736541e-06, "loss": 16.9899, "step": 42140 }, { "epoch": 0.7621355163739346, "grad_norm": 43.4375, "learning_rate": 9.880916527429882e-06, "loss": 16.6254, "step": 42150 }, { "epoch": 0.7623163314430624, "grad_norm": 45.5, "learning_rate": 9.880888275123224e-06, "loss": 16.5524, "step": 42160 }, { "epoch": 0.7624971465121904, "grad_norm": 43.25, "learning_rate": 9.880860022816565e-06, "loss": 17.2899, "step": 42170 }, { "epoch": 0.7626779615813182, "grad_norm": 45.09375, "learning_rate": 9.880831770509904e-06, "loss": 16.8959, "step": 42180 }, { "epoch": 0.7628587766504461, "grad_norm": 43.28125, "learning_rate": 9.880803518203244e-06, "loss": 16.9736, "step": 42190 }, { "epoch": 0.7630395917195739, "grad_norm": 46.625, "learning_rate": 9.880775265896585e-06, "loss": 17.254, "step": 42200 }, { "epoch": 0.7632204067887017, "grad_norm": 43.28125, "learning_rate": 9.880747013589926e-06, "loss": 16.6953, "step": 42210 }, { "epoch": 0.7634012218578297, "grad_norm": 45.03125, "learning_rate": 9.880718761283266e-06, "loss": 16.8156, "step": 42220 }, { "epoch": 0.7635820369269575, "grad_norm": 41.25, "learning_rate": 9.880690508976605e-06, "loss": 16.7833, "step": 42230 }, { "epoch": 0.7637628519960854, "grad_norm": 45.25, "learning_rate": 9.880662256669946e-06, "loss": 17.0329, "step": 42240 }, { "epoch": 0.7639436670652132, "grad_norm": 42.5625, "learning_rate": 9.880634004363288e-06, "loss": 16.7803, "step": 42250 }, { "epoch": 0.764124482134341, "grad_norm": 46.65625, "learning_rate": 9.880605752056627e-06, "loss": 16.7741, "step": 42260 }, { "epoch": 0.764305297203469, "grad_norm": 44.46875, "learning_rate": 9.880577499749968e-06, "loss": 16.4529, "step": 42270 }, { "epoch": 0.7644861122725968, "grad_norm": 43.78125, "learning_rate": 9.880549247443308e-06, "loss": 16.7497, "step": 42280 }, { "epoch": 0.7646669273417247, "grad_norm": 40.59375, "learning_rate": 9.880520995136649e-06, "loss": 16.7111, "step": 42290 }, { "epoch": 0.7648477424108525, "grad_norm": 43.75, "learning_rate": 9.88049274282999e-06, "loss": 16.9267, "step": 42300 }, { "epoch": 0.7650285574799803, "grad_norm": 43.25, "learning_rate": 9.880464490523329e-06, "loss": 17.0684, "step": 42310 }, { "epoch": 0.7652093725491083, "grad_norm": 42.96875, "learning_rate": 9.88043623821667e-06, "loss": 16.6041, "step": 42320 }, { "epoch": 0.7653901876182361, "grad_norm": 41.40625, "learning_rate": 9.88040798591001e-06, "loss": 16.4228, "step": 42330 }, { "epoch": 0.765571002687364, "grad_norm": 43.8125, "learning_rate": 9.880379733603352e-06, "loss": 16.712, "step": 42340 }, { "epoch": 0.7657518177564918, "grad_norm": 42.375, "learning_rate": 9.880351481296691e-06, "loss": 16.6611, "step": 42350 }, { "epoch": 0.7659326328256197, "grad_norm": 43.0, "learning_rate": 9.880323228990032e-06, "loss": 16.5457, "step": 42360 }, { "epoch": 0.7661134478947476, "grad_norm": 44.78125, "learning_rate": 9.880294976683372e-06, "loss": 17.0666, "step": 42370 }, { "epoch": 0.7662942629638754, "grad_norm": 44.03125, "learning_rate": 9.880266724376713e-06, "loss": 16.7831, "step": 42380 }, { "epoch": 0.7664750780330033, "grad_norm": 44.34375, "learning_rate": 9.880238472070054e-06, "loss": 17.1136, "step": 42390 }, { "epoch": 0.7666558931021311, "grad_norm": 42.71875, "learning_rate": 9.880210219763392e-06, "loss": 16.8953, "step": 42400 }, { "epoch": 0.766836708171259, "grad_norm": 41.25, "learning_rate": 9.880181967456733e-06, "loss": 16.8162, "step": 42410 }, { "epoch": 0.7670175232403869, "grad_norm": 42.0, "learning_rate": 9.880153715150074e-06, "loss": 16.8969, "step": 42420 }, { "epoch": 0.7671983383095147, "grad_norm": 43.9375, "learning_rate": 9.880125462843414e-06, "loss": 16.7205, "step": 42430 }, { "epoch": 0.7673791533786426, "grad_norm": 47.90625, "learning_rate": 9.880097210536755e-06, "loss": 16.8168, "step": 42440 }, { "epoch": 0.7675599684477704, "grad_norm": 41.5, "learning_rate": 9.880068958230096e-06, "loss": 17.1686, "step": 42450 }, { "epoch": 0.7677407835168983, "grad_norm": 41.46875, "learning_rate": 9.880040705923436e-06, "loss": 16.6928, "step": 42460 }, { "epoch": 0.7679215985860262, "grad_norm": 40.46875, "learning_rate": 9.880012453616777e-06, "loss": 17.0065, "step": 42470 }, { "epoch": 0.768102413655154, "grad_norm": 45.15625, "learning_rate": 9.879984201310117e-06, "loss": 16.5089, "step": 42480 }, { "epoch": 0.7682832287242819, "grad_norm": 46.0, "learning_rate": 9.879955949003456e-06, "loss": 16.6789, "step": 42490 }, { "epoch": 0.7684640437934097, "grad_norm": 43.75, "learning_rate": 9.879927696696797e-06, "loss": 17.1577, "step": 42500 }, { "epoch": 0.7686448588625376, "grad_norm": 44.0625, "learning_rate": 9.87989944439014e-06, "loss": 16.6613, "step": 42510 }, { "epoch": 0.7688256739316655, "grad_norm": 42.625, "learning_rate": 9.879871192083478e-06, "loss": 16.8114, "step": 42520 }, { "epoch": 0.7690064890007934, "grad_norm": 41.15625, "learning_rate": 9.879842939776819e-06, "loss": 17.2868, "step": 42530 }, { "epoch": 0.7691873040699212, "grad_norm": 40.78125, "learning_rate": 9.87981468747016e-06, "loss": 16.7327, "step": 42540 }, { "epoch": 0.769368119139049, "grad_norm": 41.4375, "learning_rate": 9.8797864351635e-06, "loss": 16.9727, "step": 42550 }, { "epoch": 0.7695489342081769, "grad_norm": 42.375, "learning_rate": 9.87975818285684e-06, "loss": 16.9383, "step": 42560 }, { "epoch": 0.7697297492773048, "grad_norm": 43.375, "learning_rate": 9.87972993055018e-06, "loss": 16.681, "step": 42570 }, { "epoch": 0.7699105643464327, "grad_norm": 43.6875, "learning_rate": 9.87970167824352e-06, "loss": 17.1889, "step": 42580 }, { "epoch": 0.7700913794155605, "grad_norm": 42.25, "learning_rate": 9.879673425936861e-06, "loss": 16.4792, "step": 42590 }, { "epoch": 0.7702721944846883, "grad_norm": 44.46875, "learning_rate": 9.879645173630203e-06, "loss": 16.7619, "step": 42600 }, { "epoch": 0.7704530095538162, "grad_norm": 42.125, "learning_rate": 9.879616921323542e-06, "loss": 16.4408, "step": 42610 }, { "epoch": 0.770633824622944, "grad_norm": 43.3125, "learning_rate": 9.879588669016883e-06, "loss": 16.6894, "step": 42620 }, { "epoch": 0.770814639692072, "grad_norm": 42.46875, "learning_rate": 9.879560416710223e-06, "loss": 16.7997, "step": 42630 }, { "epoch": 0.7709954547611998, "grad_norm": 44.28125, "learning_rate": 9.879532164403564e-06, "loss": 17.1275, "step": 42640 }, { "epoch": 0.7711762698303276, "grad_norm": 41.75, "learning_rate": 9.879503912096905e-06, "loss": 16.7961, "step": 42650 }, { "epoch": 0.7713570848994555, "grad_norm": 45.59375, "learning_rate": 9.879475659790244e-06, "loss": 16.8425, "step": 42660 }, { "epoch": 0.7715378999685834, "grad_norm": 43.9375, "learning_rate": 9.879447407483584e-06, "loss": 16.9238, "step": 42670 }, { "epoch": 0.7717187150377113, "grad_norm": 44.0, "learning_rate": 9.879419155176925e-06, "loss": 16.7722, "step": 42680 }, { "epoch": 0.7718995301068391, "grad_norm": 44.96875, "learning_rate": 9.879390902870265e-06, "loss": 16.8247, "step": 42690 }, { "epoch": 0.772080345175967, "grad_norm": 40.78125, "learning_rate": 9.879362650563606e-06, "loss": 16.495, "step": 42700 }, { "epoch": 0.7722611602450948, "grad_norm": 41.15625, "learning_rate": 9.879334398256947e-06, "loss": 16.5344, "step": 42710 }, { "epoch": 0.7724419753142227, "grad_norm": 44.625, "learning_rate": 9.879306145950287e-06, "loss": 16.6817, "step": 42720 }, { "epoch": 0.7726227903833506, "grad_norm": 44.09375, "learning_rate": 9.879277893643628e-06, "loss": 16.883, "step": 42730 }, { "epoch": 0.7728036054524784, "grad_norm": 44.5625, "learning_rate": 9.879249641336967e-06, "loss": 16.7871, "step": 42740 }, { "epoch": 0.7729844205216063, "grad_norm": 43.625, "learning_rate": 9.879221389030307e-06, "loss": 16.6971, "step": 42750 }, { "epoch": 0.7731652355907341, "grad_norm": 44.53125, "learning_rate": 9.879193136723648e-06, "loss": 16.6874, "step": 42760 }, { "epoch": 0.773346050659862, "grad_norm": 43.84375, "learning_rate": 9.879164884416989e-06, "loss": 16.8721, "step": 42770 }, { "epoch": 0.7735268657289899, "grad_norm": 42.65625, "learning_rate": 9.87913663211033e-06, "loss": 16.7498, "step": 42780 }, { "epoch": 0.7737076807981177, "grad_norm": 41.5625, "learning_rate": 9.87910837980367e-06, "loss": 16.7373, "step": 42790 }, { "epoch": 0.7738884958672456, "grad_norm": 42.65625, "learning_rate": 9.87908012749701e-06, "loss": 16.5815, "step": 42800 }, { "epoch": 0.7740693109363734, "grad_norm": 40.15625, "learning_rate": 9.879051875190351e-06, "loss": 16.9589, "step": 42810 }, { "epoch": 0.7742501260055012, "grad_norm": 44.78125, "learning_rate": 9.879023622883692e-06, "loss": 17.089, "step": 42820 }, { "epoch": 0.7744309410746292, "grad_norm": 40.9375, "learning_rate": 9.87899537057703e-06, "loss": 16.7203, "step": 42830 }, { "epoch": 0.774611756143757, "grad_norm": 43.03125, "learning_rate": 9.878967118270371e-06, "loss": 16.8916, "step": 42840 }, { "epoch": 0.7747925712128849, "grad_norm": 43.4375, "learning_rate": 9.878938865963712e-06, "loss": 16.8227, "step": 42850 }, { "epoch": 0.7749733862820127, "grad_norm": 44.5625, "learning_rate": 9.878910613657053e-06, "loss": 16.8217, "step": 42860 }, { "epoch": 0.7751542013511407, "grad_norm": 42.375, "learning_rate": 9.878882361350393e-06, "loss": 16.9417, "step": 42870 }, { "epoch": 0.7753350164202685, "grad_norm": 43.46875, "learning_rate": 9.878854109043734e-06, "loss": 16.5462, "step": 42880 }, { "epoch": 0.7755158314893963, "grad_norm": 42.28125, "learning_rate": 9.878825856737074e-06, "loss": 16.7315, "step": 42890 }, { "epoch": 0.7756966465585242, "grad_norm": 43.28125, "learning_rate": 9.878797604430415e-06, "loss": 16.9322, "step": 42900 }, { "epoch": 0.775877461627652, "grad_norm": 41.28125, "learning_rate": 9.878769352123756e-06, "loss": 16.8821, "step": 42910 }, { "epoch": 0.77605827669678, "grad_norm": 42.3125, "learning_rate": 9.878741099817095e-06, "loss": 17.2593, "step": 42920 }, { "epoch": 0.7762390917659078, "grad_norm": 42.71875, "learning_rate": 9.878712847510435e-06, "loss": 16.8446, "step": 42930 }, { "epoch": 0.7764199068350356, "grad_norm": 41.90625, "learning_rate": 9.878684595203776e-06, "loss": 16.921, "step": 42940 }, { "epoch": 0.7766007219041635, "grad_norm": 42.4375, "learning_rate": 9.878656342897117e-06, "loss": 17.1003, "step": 42950 }, { "epoch": 0.7767815369732913, "grad_norm": 40.9375, "learning_rate": 9.878628090590457e-06, "loss": 16.8286, "step": 42960 }, { "epoch": 0.7769623520424193, "grad_norm": 43.5625, "learning_rate": 9.878599838283798e-06, "loss": 17.0732, "step": 42970 }, { "epoch": 0.7771431671115471, "grad_norm": 41.71875, "learning_rate": 9.878571585977138e-06, "loss": 16.7076, "step": 42980 }, { "epoch": 0.7773239821806749, "grad_norm": 42.5, "learning_rate": 9.878543333670479e-06, "loss": 16.9669, "step": 42990 }, { "epoch": 0.7775047972498028, "grad_norm": 46.78125, "learning_rate": 9.878515081363818e-06, "loss": 16.9687, "step": 43000 }, { "epoch": 0.7776856123189306, "grad_norm": 43.84375, "learning_rate": 9.878486829057159e-06, "loss": 16.8253, "step": 43010 }, { "epoch": 0.7778664273880586, "grad_norm": 41.25, "learning_rate": 9.8784585767505e-06, "loss": 16.6976, "step": 43020 }, { "epoch": 0.7780472424571864, "grad_norm": 42.53125, "learning_rate": 9.87843032444384e-06, "loss": 17.3396, "step": 43030 }, { "epoch": 0.7782280575263142, "grad_norm": 44.25, "learning_rate": 9.87840207213718e-06, "loss": 16.4274, "step": 43040 }, { "epoch": 0.7784088725954421, "grad_norm": 41.4375, "learning_rate": 9.878373819830521e-06, "loss": 16.6991, "step": 43050 }, { "epoch": 0.7785896876645699, "grad_norm": 43.4375, "learning_rate": 9.878345567523862e-06, "loss": 17.0445, "step": 43060 }, { "epoch": 0.7787705027336979, "grad_norm": 39.5, "learning_rate": 9.878317315217202e-06, "loss": 16.6232, "step": 43070 }, { "epoch": 0.7789513178028257, "grad_norm": 40.9375, "learning_rate": 9.878289062910543e-06, "loss": 16.6776, "step": 43080 }, { "epoch": 0.7791321328719536, "grad_norm": 44.0625, "learning_rate": 9.878260810603882e-06, "loss": 16.6483, "step": 43090 }, { "epoch": 0.7793129479410814, "grad_norm": 41.0625, "learning_rate": 9.878232558297222e-06, "loss": 17.0843, "step": 43100 }, { "epoch": 0.7794937630102092, "grad_norm": 42.4375, "learning_rate": 9.878204305990563e-06, "loss": 16.4456, "step": 43110 }, { "epoch": 0.7796745780793372, "grad_norm": 44.90625, "learning_rate": 9.878176053683904e-06, "loss": 16.9825, "step": 43120 }, { "epoch": 0.779855393148465, "grad_norm": 43.9375, "learning_rate": 9.878147801377244e-06, "loss": 17.033, "step": 43130 }, { "epoch": 0.7800362082175929, "grad_norm": 42.0, "learning_rate": 9.878119549070585e-06, "loss": 16.6665, "step": 43140 }, { "epoch": 0.7802170232867207, "grad_norm": 45.46875, "learning_rate": 9.878091296763926e-06, "loss": 16.7691, "step": 43150 }, { "epoch": 0.7803978383558485, "grad_norm": 42.3125, "learning_rate": 9.878063044457266e-06, "loss": 16.9256, "step": 43160 }, { "epoch": 0.7805786534249765, "grad_norm": 44.9375, "learning_rate": 9.878034792150605e-06, "loss": 16.8972, "step": 43170 }, { "epoch": 0.7807594684941043, "grad_norm": 43.15625, "learning_rate": 9.878006539843946e-06, "loss": 16.2868, "step": 43180 }, { "epoch": 0.7809402835632322, "grad_norm": 43.375, "learning_rate": 9.877978287537286e-06, "loss": 16.5691, "step": 43190 }, { "epoch": 0.78112109863236, "grad_norm": 44.09375, "learning_rate": 9.877950035230627e-06, "loss": 16.9558, "step": 43200 }, { "epoch": 0.7813019137014878, "grad_norm": 42.78125, "learning_rate": 9.877921782923968e-06, "loss": 16.5978, "step": 43210 }, { "epoch": 0.7814827287706158, "grad_norm": 43.5, "learning_rate": 9.877893530617308e-06, "loss": 16.3195, "step": 43220 }, { "epoch": 0.7816635438397436, "grad_norm": 42.78125, "learning_rate": 9.877865278310649e-06, "loss": 16.8766, "step": 43230 }, { "epoch": 0.7818443589088715, "grad_norm": 42.71875, "learning_rate": 9.87783702600399e-06, "loss": 16.7194, "step": 43240 }, { "epoch": 0.7820251739779993, "grad_norm": 43.4375, "learning_rate": 9.87780877369733e-06, "loss": 16.5889, "step": 43250 }, { "epoch": 0.7822059890471272, "grad_norm": 41.75, "learning_rate": 9.877780521390669e-06, "loss": 16.5907, "step": 43260 }, { "epoch": 0.782386804116255, "grad_norm": 44.21875, "learning_rate": 9.87775226908401e-06, "loss": 16.5148, "step": 43270 }, { "epoch": 0.7825676191853829, "grad_norm": 42.53125, "learning_rate": 9.87772401677735e-06, "loss": 17.0262, "step": 43280 }, { "epoch": 0.7827484342545108, "grad_norm": 40.875, "learning_rate": 9.877695764470691e-06, "loss": 16.7053, "step": 43290 }, { "epoch": 0.7829292493236386, "grad_norm": 42.65625, "learning_rate": 9.877667512164032e-06, "loss": 16.8643, "step": 43300 }, { "epoch": 0.7831100643927665, "grad_norm": 46.46875, "learning_rate": 9.877639259857372e-06, "loss": 16.8704, "step": 43310 }, { "epoch": 0.7832908794618944, "grad_norm": 45.46875, "learning_rate": 9.877611007550713e-06, "loss": 16.2131, "step": 43320 }, { "epoch": 0.7834716945310222, "grad_norm": 41.90625, "learning_rate": 9.877582755244053e-06, "loss": 17.0028, "step": 43330 }, { "epoch": 0.7836525096001501, "grad_norm": 43.5, "learning_rate": 9.877554502937392e-06, "loss": 17.1055, "step": 43340 }, { "epoch": 0.7838333246692779, "grad_norm": 42.8125, "learning_rate": 9.877526250630733e-06, "loss": 16.8805, "step": 43350 }, { "epoch": 0.7840141397384058, "grad_norm": 44.78125, "learning_rate": 9.877497998324074e-06, "loss": 16.7048, "step": 43360 }, { "epoch": 0.7841949548075337, "grad_norm": 40.875, "learning_rate": 9.877469746017414e-06, "loss": 16.7925, "step": 43370 }, { "epoch": 0.7843757698766615, "grad_norm": 43.75, "learning_rate": 9.877441493710755e-06, "loss": 16.9311, "step": 43380 }, { "epoch": 0.7845565849457894, "grad_norm": 42.15625, "learning_rate": 9.877413241404095e-06, "loss": 16.5517, "step": 43390 }, { "epoch": 0.7847374000149172, "grad_norm": 43.71875, "learning_rate": 9.877384989097436e-06, "loss": 16.7184, "step": 43400 }, { "epoch": 0.7849182150840451, "grad_norm": 43.625, "learning_rate": 9.877356736790777e-06, "loss": 16.433, "step": 43410 }, { "epoch": 0.785099030153173, "grad_norm": 43.84375, "learning_rate": 9.877328484484117e-06, "loss": 16.5557, "step": 43420 }, { "epoch": 0.7852798452223009, "grad_norm": 43.5625, "learning_rate": 9.877300232177456e-06, "loss": 17.1738, "step": 43430 }, { "epoch": 0.7854606602914287, "grad_norm": 43.09375, "learning_rate": 9.877271979870797e-06, "loss": 16.7636, "step": 43440 }, { "epoch": 0.7856414753605565, "grad_norm": 41.4375, "learning_rate": 9.877243727564137e-06, "loss": 16.6188, "step": 43450 }, { "epoch": 0.7858222904296844, "grad_norm": 46.0, "learning_rate": 9.877215475257478e-06, "loss": 16.8037, "step": 43460 }, { "epoch": 0.7860031054988122, "grad_norm": 42.34375, "learning_rate": 9.877187222950819e-06, "loss": 16.84, "step": 43470 }, { "epoch": 0.7861839205679402, "grad_norm": 40.71875, "learning_rate": 9.87715897064416e-06, "loss": 16.7468, "step": 43480 }, { "epoch": 0.786364735637068, "grad_norm": 42.71875, "learning_rate": 9.8771307183375e-06, "loss": 16.8372, "step": 43490 }, { "epoch": 0.7865455507061958, "grad_norm": 41.9375, "learning_rate": 9.87710246603084e-06, "loss": 16.5974, "step": 43500 }, { "epoch": 0.7867263657753237, "grad_norm": 41.90625, "learning_rate": 9.877074213724181e-06, "loss": 16.6528, "step": 43510 }, { "epoch": 0.7869071808444515, "grad_norm": 43.8125, "learning_rate": 9.87704596141752e-06, "loss": 16.7555, "step": 43520 }, { "epoch": 0.7870879959135795, "grad_norm": 41.5, "learning_rate": 9.87701770911086e-06, "loss": 17.2377, "step": 43530 }, { "epoch": 0.7872688109827073, "grad_norm": 43.375, "learning_rate": 9.876989456804201e-06, "loss": 17.0131, "step": 43540 }, { "epoch": 0.7874496260518351, "grad_norm": 43.8125, "learning_rate": 9.876961204497542e-06, "loss": 16.7978, "step": 43550 }, { "epoch": 0.787630441120963, "grad_norm": 44.0, "learning_rate": 9.876932952190883e-06, "loss": 17.0134, "step": 43560 }, { "epoch": 0.7878112561900908, "grad_norm": 45.40625, "learning_rate": 9.876904699884223e-06, "loss": 16.7749, "step": 43570 }, { "epoch": 0.7879920712592188, "grad_norm": 43.9375, "learning_rate": 9.876876447577564e-06, "loss": 16.8705, "step": 43580 }, { "epoch": 0.7881728863283466, "grad_norm": 42.8125, "learning_rate": 9.876848195270904e-06, "loss": 16.4949, "step": 43590 }, { "epoch": 0.7883537013974745, "grad_norm": 46.65625, "learning_rate": 9.876819942964243e-06, "loss": 17.022, "step": 43600 }, { "epoch": 0.7885345164666023, "grad_norm": 42.15625, "learning_rate": 9.876791690657584e-06, "loss": 16.8378, "step": 43610 }, { "epoch": 0.7887153315357301, "grad_norm": 42.53125, "learning_rate": 9.876763438350925e-06, "loss": 16.9004, "step": 43620 }, { "epoch": 0.7888961466048581, "grad_norm": 44.09375, "learning_rate": 9.876735186044265e-06, "loss": 16.8231, "step": 43630 }, { "epoch": 0.7890769616739859, "grad_norm": 44.375, "learning_rate": 9.876706933737606e-06, "loss": 16.7193, "step": 43640 }, { "epoch": 0.7892577767431138, "grad_norm": 44.1875, "learning_rate": 9.876678681430947e-06, "loss": 16.8568, "step": 43650 }, { "epoch": 0.7894385918122416, "grad_norm": 43.84375, "learning_rate": 9.876650429124287e-06, "loss": 16.5435, "step": 43660 }, { "epoch": 0.7896194068813694, "grad_norm": 43.5, "learning_rate": 9.876622176817628e-06, "loss": 16.9701, "step": 43670 }, { "epoch": 0.7898002219504974, "grad_norm": 44.15625, "learning_rate": 9.876593924510968e-06, "loss": 16.5421, "step": 43680 }, { "epoch": 0.7899810370196252, "grad_norm": 41.40625, "learning_rate": 9.876565672204307e-06, "loss": 16.8361, "step": 43690 }, { "epoch": 0.7901618520887531, "grad_norm": 43.8125, "learning_rate": 9.876537419897648e-06, "loss": 17.0753, "step": 43700 }, { "epoch": 0.7903426671578809, "grad_norm": 43.1875, "learning_rate": 9.876509167590989e-06, "loss": 16.8627, "step": 43710 }, { "epoch": 0.7905234822270087, "grad_norm": 43.40625, "learning_rate": 9.87648091528433e-06, "loss": 16.7451, "step": 43720 }, { "epoch": 0.7907042972961367, "grad_norm": 42.59375, "learning_rate": 9.87645266297767e-06, "loss": 16.8212, "step": 43730 }, { "epoch": 0.7908851123652645, "grad_norm": 41.8125, "learning_rate": 9.87642441067101e-06, "loss": 16.9218, "step": 43740 }, { "epoch": 0.7910659274343924, "grad_norm": 43.96875, "learning_rate": 9.876396158364351e-06, "loss": 16.8543, "step": 43750 }, { "epoch": 0.7912467425035202, "grad_norm": 42.3125, "learning_rate": 9.876367906057692e-06, "loss": 16.5328, "step": 43760 }, { "epoch": 0.7914275575726482, "grad_norm": 43.8125, "learning_rate": 9.87633965375103e-06, "loss": 16.5861, "step": 43770 }, { "epoch": 0.791608372641776, "grad_norm": 43.1875, "learning_rate": 9.876311401444371e-06, "loss": 16.7647, "step": 43780 }, { "epoch": 0.7917891877109038, "grad_norm": 41.5, "learning_rate": 9.876283149137712e-06, "loss": 16.6035, "step": 43790 }, { "epoch": 0.7919700027800317, "grad_norm": 44.40625, "learning_rate": 9.876254896831052e-06, "loss": 16.502, "step": 43800 }, { "epoch": 0.7921508178491595, "grad_norm": 42.5, "learning_rate": 9.876226644524393e-06, "loss": 16.6553, "step": 43810 }, { "epoch": 0.7923316329182875, "grad_norm": 43.21875, "learning_rate": 9.876198392217734e-06, "loss": 16.2923, "step": 43820 }, { "epoch": 0.7925124479874153, "grad_norm": 47.375, "learning_rate": 9.876170139911074e-06, "loss": 16.8679, "step": 43830 }, { "epoch": 0.7926932630565431, "grad_norm": 46.5, "learning_rate": 9.876141887604415e-06, "loss": 17.1426, "step": 43840 }, { "epoch": 0.792874078125671, "grad_norm": 43.5625, "learning_rate": 9.876113635297756e-06, "loss": 16.7616, "step": 43850 }, { "epoch": 0.7930548931947988, "grad_norm": 41.1875, "learning_rate": 9.876085382991095e-06, "loss": 16.7291, "step": 43860 }, { "epoch": 0.7932357082639268, "grad_norm": 41.75, "learning_rate": 9.876057130684435e-06, "loss": 16.9794, "step": 43870 }, { "epoch": 0.7934165233330546, "grad_norm": 45.90625, "learning_rate": 9.876028878377776e-06, "loss": 16.7668, "step": 43880 }, { "epoch": 0.7935973384021824, "grad_norm": 44.6875, "learning_rate": 9.876000626071116e-06, "loss": 17.1568, "step": 43890 }, { "epoch": 0.7937781534713103, "grad_norm": 42.34375, "learning_rate": 9.875972373764457e-06, "loss": 16.3285, "step": 43900 }, { "epoch": 0.7939589685404381, "grad_norm": 45.6875, "learning_rate": 9.875944121457796e-06, "loss": 16.5321, "step": 43910 }, { "epoch": 0.794139783609566, "grad_norm": 43.75, "learning_rate": 9.875915869151138e-06, "loss": 16.8059, "step": 43920 }, { "epoch": 0.7943205986786939, "grad_norm": 41.78125, "learning_rate": 9.875887616844479e-06, "loss": 16.7762, "step": 43930 }, { "epoch": 0.7945014137478218, "grad_norm": 39.8125, "learning_rate": 9.87585936453782e-06, "loss": 16.8121, "step": 43940 }, { "epoch": 0.7946822288169496, "grad_norm": 43.1875, "learning_rate": 9.875831112231158e-06, "loss": 16.7487, "step": 43950 }, { "epoch": 0.7948630438860774, "grad_norm": 46.40625, "learning_rate": 9.875802859924499e-06, "loss": 16.681, "step": 43960 }, { "epoch": 0.7950438589552054, "grad_norm": 44.375, "learning_rate": 9.87577460761784e-06, "loss": 16.903, "step": 43970 }, { "epoch": 0.7952246740243332, "grad_norm": 46.59375, "learning_rate": 9.87574635531118e-06, "loss": 17.0115, "step": 43980 }, { "epoch": 0.7954054890934611, "grad_norm": 43.5625, "learning_rate": 9.875718103004521e-06, "loss": 16.7663, "step": 43990 }, { "epoch": 0.7955863041625889, "grad_norm": 41.90625, "learning_rate": 9.875689850697862e-06, "loss": 16.5117, "step": 44000 }, { "epoch": 0.7957671192317167, "grad_norm": 42.5625, "learning_rate": 9.875661598391202e-06, "loss": 16.799, "step": 44010 }, { "epoch": 0.7959479343008447, "grad_norm": 43.96875, "learning_rate": 9.875633346084543e-06, "loss": 16.8954, "step": 44020 }, { "epoch": 0.7961287493699725, "grad_norm": 44.625, "learning_rate": 9.875605093777882e-06, "loss": 16.9782, "step": 44030 }, { "epoch": 0.7963095644391004, "grad_norm": 42.75, "learning_rate": 9.875576841471222e-06, "loss": 16.9463, "step": 44040 }, { "epoch": 0.7964903795082282, "grad_norm": 40.65625, "learning_rate": 9.875548589164563e-06, "loss": 16.6091, "step": 44050 }, { "epoch": 0.796671194577356, "grad_norm": 44.96875, "learning_rate": 9.875520336857904e-06, "loss": 16.6663, "step": 44060 }, { "epoch": 0.796852009646484, "grad_norm": 44.125, "learning_rate": 9.875492084551244e-06, "loss": 17.0986, "step": 44070 }, { "epoch": 0.7970328247156118, "grad_norm": 41.46875, "learning_rate": 9.875463832244583e-06, "loss": 17.1912, "step": 44080 }, { "epoch": 0.7972136397847397, "grad_norm": 42.09375, "learning_rate": 9.875435579937925e-06, "loss": 17.1274, "step": 44090 }, { "epoch": 0.7973944548538675, "grad_norm": 44.375, "learning_rate": 9.875407327631266e-06, "loss": 16.9897, "step": 44100 }, { "epoch": 0.7975752699229954, "grad_norm": 42.125, "learning_rate": 9.875379075324607e-06, "loss": 16.8096, "step": 44110 }, { "epoch": 0.7977560849921232, "grad_norm": 47.8125, "learning_rate": 9.875350823017946e-06, "loss": 17.1615, "step": 44120 }, { "epoch": 0.7979369000612511, "grad_norm": 43.75, "learning_rate": 9.875322570711286e-06, "loss": 16.8355, "step": 44130 }, { "epoch": 0.798117715130379, "grad_norm": 44.09375, "learning_rate": 9.875294318404627e-06, "loss": 17.0292, "step": 44140 }, { "epoch": 0.7982985301995068, "grad_norm": 43.5, "learning_rate": 9.875266066097967e-06, "loss": 16.3573, "step": 44150 }, { "epoch": 0.7984793452686347, "grad_norm": 42.34375, "learning_rate": 9.875237813791308e-06, "loss": 16.6646, "step": 44160 }, { "epoch": 0.7986601603377625, "grad_norm": 42.34375, "learning_rate": 9.875209561484647e-06, "loss": 16.396, "step": 44170 }, { "epoch": 0.7988409754068904, "grad_norm": 42.71875, "learning_rate": 9.87518130917799e-06, "loss": 16.947, "step": 44180 }, { "epoch": 0.7990217904760183, "grad_norm": 43.65625, "learning_rate": 9.87515305687133e-06, "loss": 16.8843, "step": 44190 }, { "epoch": 0.7992026055451461, "grad_norm": 43.71875, "learning_rate": 9.875124804564669e-06, "loss": 16.7936, "step": 44200 }, { "epoch": 0.799383420614274, "grad_norm": 41.53125, "learning_rate": 9.87509655225801e-06, "loss": 16.9738, "step": 44210 }, { "epoch": 0.7995642356834018, "grad_norm": 43.78125, "learning_rate": 9.87506829995135e-06, "loss": 17.1478, "step": 44220 }, { "epoch": 0.7997450507525297, "grad_norm": 42.40625, "learning_rate": 9.87504004764469e-06, "loss": 16.736, "step": 44230 }, { "epoch": 0.7999258658216576, "grad_norm": 46.125, "learning_rate": 9.875011795338031e-06, "loss": 16.9612, "step": 44240 }, { "epoch": 0.8001066808907854, "grad_norm": 44.625, "learning_rate": 9.874983543031372e-06, "loss": 17.1204, "step": 44250 }, { "epoch": 0.8002874959599133, "grad_norm": 44.40625, "learning_rate": 9.874955290724711e-06, "loss": 16.6089, "step": 44260 }, { "epoch": 0.8004683110290411, "grad_norm": 43.34375, "learning_rate": 9.874927038418053e-06, "loss": 16.6859, "step": 44270 }, { "epoch": 0.8006491260981691, "grad_norm": 41.78125, "learning_rate": 9.874898786111394e-06, "loss": 16.8503, "step": 44280 }, { "epoch": 0.8008299411672969, "grad_norm": 43.75, "learning_rate": 9.874870533804733e-06, "loss": 16.7074, "step": 44290 }, { "epoch": 0.8010107562364247, "grad_norm": 40.625, "learning_rate": 9.874842281498073e-06, "loss": 16.9856, "step": 44300 }, { "epoch": 0.8011915713055526, "grad_norm": 41.15625, "learning_rate": 9.874814029191414e-06, "loss": 17.1389, "step": 44310 }, { "epoch": 0.8013723863746804, "grad_norm": 43.4375, "learning_rate": 9.874785776884755e-06, "loss": 16.9076, "step": 44320 }, { "epoch": 0.8015532014438084, "grad_norm": 42.03125, "learning_rate": 9.874757524578095e-06, "loss": 16.6759, "step": 44330 }, { "epoch": 0.8017340165129362, "grad_norm": 39.84375, "learning_rate": 9.874729272271434e-06, "loss": 16.5483, "step": 44340 }, { "epoch": 0.801914831582064, "grad_norm": 43.125, "learning_rate": 9.874701019964777e-06, "loss": 16.9525, "step": 44350 }, { "epoch": 0.8020956466511919, "grad_norm": 45.78125, "learning_rate": 9.874672767658117e-06, "loss": 16.641, "step": 44360 }, { "epoch": 0.8022764617203197, "grad_norm": 42.34375, "learning_rate": 9.874644515351458e-06, "loss": 16.7242, "step": 44370 }, { "epoch": 0.8024572767894477, "grad_norm": 42.4375, "learning_rate": 9.874616263044797e-06, "loss": 16.9407, "step": 44380 }, { "epoch": 0.8026380918585755, "grad_norm": 44.0625, "learning_rate": 9.874588010738137e-06, "loss": 17.0624, "step": 44390 }, { "epoch": 0.8028189069277033, "grad_norm": 41.59375, "learning_rate": 9.874559758431478e-06, "loss": 16.4757, "step": 44400 }, { "epoch": 0.8029997219968312, "grad_norm": 43.15625, "learning_rate": 9.874531506124819e-06, "loss": 17.0101, "step": 44410 }, { "epoch": 0.803180537065959, "grad_norm": 44.9375, "learning_rate": 9.87450325381816e-06, "loss": 16.7205, "step": 44420 }, { "epoch": 0.803361352135087, "grad_norm": 41.90625, "learning_rate": 9.874475001511498e-06, "loss": 17.1587, "step": 44430 }, { "epoch": 0.8035421672042148, "grad_norm": 42.25, "learning_rate": 9.87444674920484e-06, "loss": 16.4366, "step": 44440 }, { "epoch": 0.8037229822733427, "grad_norm": 46.9375, "learning_rate": 9.874418496898181e-06, "loss": 17.0698, "step": 44450 }, { "epoch": 0.8039037973424705, "grad_norm": 42.5, "learning_rate": 9.87439024459152e-06, "loss": 16.8598, "step": 44460 }, { "epoch": 0.8040846124115983, "grad_norm": 43.65625, "learning_rate": 9.87436199228486e-06, "loss": 16.9069, "step": 44470 }, { "epoch": 0.8042654274807263, "grad_norm": 42.78125, "learning_rate": 9.874333739978201e-06, "loss": 16.4355, "step": 44480 }, { "epoch": 0.8044462425498541, "grad_norm": 47.53125, "learning_rate": 9.874305487671542e-06, "loss": 16.8449, "step": 44490 }, { "epoch": 0.804627057618982, "grad_norm": 42.75, "learning_rate": 9.874277235364882e-06, "loss": 17.022, "step": 44500 }, { "epoch": 0.8048078726881098, "grad_norm": 41.875, "learning_rate": 9.874248983058221e-06, "loss": 17.0746, "step": 44510 }, { "epoch": 0.8049886877572376, "grad_norm": 42.3125, "learning_rate": 9.874220730751562e-06, "loss": 17.0565, "step": 44520 }, { "epoch": 0.8051695028263656, "grad_norm": 40.78125, "learning_rate": 9.874192478444904e-06, "loss": 16.7014, "step": 44530 }, { "epoch": 0.8053503178954934, "grad_norm": 45.09375, "learning_rate": 9.874164226138245e-06, "loss": 16.5769, "step": 44540 }, { "epoch": 0.8055311329646213, "grad_norm": 45.5625, "learning_rate": 9.874135973831584e-06, "loss": 16.7823, "step": 44550 }, { "epoch": 0.8057119480337491, "grad_norm": 46.71875, "learning_rate": 9.874107721524925e-06, "loss": 16.4381, "step": 44560 }, { "epoch": 0.8058927631028769, "grad_norm": 48.0625, "learning_rate": 9.874079469218265e-06, "loss": 16.2223, "step": 44570 }, { "epoch": 0.8060735781720049, "grad_norm": 42.71875, "learning_rate": 9.874051216911606e-06, "loss": 16.6863, "step": 44580 }, { "epoch": 0.8062543932411327, "grad_norm": 41.875, "learning_rate": 9.874022964604946e-06, "loss": 16.6746, "step": 44590 }, { "epoch": 0.8064352083102606, "grad_norm": 40.25, "learning_rate": 9.873994712298285e-06, "loss": 16.7305, "step": 44600 }, { "epoch": 0.8066160233793884, "grad_norm": 47.125, "learning_rate": 9.873966459991626e-06, "loss": 16.9804, "step": 44610 }, { "epoch": 0.8067968384485164, "grad_norm": 46.21875, "learning_rate": 9.873938207684968e-06, "loss": 16.7462, "step": 44620 }, { "epoch": 0.8069776535176442, "grad_norm": 45.9375, "learning_rate": 9.873909955378307e-06, "loss": 16.8084, "step": 44630 }, { "epoch": 0.807158468586772, "grad_norm": 46.125, "learning_rate": 9.873881703071648e-06, "loss": 16.5409, "step": 44640 }, { "epoch": 0.8073392836558999, "grad_norm": 43.46875, "learning_rate": 9.873853450764988e-06, "loss": 16.5718, "step": 44650 }, { "epoch": 0.8075200987250277, "grad_norm": 38.34375, "learning_rate": 9.873825198458329e-06, "loss": 16.6985, "step": 44660 }, { "epoch": 0.8077009137941557, "grad_norm": 41.84375, "learning_rate": 9.87379694615167e-06, "loss": 16.5292, "step": 44670 }, { "epoch": 0.8078817288632835, "grad_norm": 43.65625, "learning_rate": 9.87376869384501e-06, "loss": 16.7325, "step": 44680 }, { "epoch": 0.8080625439324113, "grad_norm": 44.0625, "learning_rate": 9.87374044153835e-06, "loss": 17.2133, "step": 44690 }, { "epoch": 0.8082433590015392, "grad_norm": 44.9375, "learning_rate": 9.873712189231692e-06, "loss": 16.6545, "step": 44700 }, { "epoch": 0.808424174070667, "grad_norm": 44.5625, "learning_rate": 9.873683936925032e-06, "loss": 16.7823, "step": 44710 }, { "epoch": 0.808604989139795, "grad_norm": 41.6875, "learning_rate": 9.873655684618371e-06, "loss": 16.7078, "step": 44720 }, { "epoch": 0.8087858042089228, "grad_norm": 44.625, "learning_rate": 9.873627432311712e-06, "loss": 16.8342, "step": 44730 }, { "epoch": 0.8089666192780506, "grad_norm": 40.3125, "learning_rate": 9.873599180005052e-06, "loss": 17.0291, "step": 44740 }, { "epoch": 0.8091474343471785, "grad_norm": 42.90625, "learning_rate": 9.873570927698393e-06, "loss": 16.6447, "step": 44750 }, { "epoch": 0.8093282494163063, "grad_norm": 45.3125, "learning_rate": 9.873542675391734e-06, "loss": 17.0995, "step": 44760 }, { "epoch": 0.8095090644854342, "grad_norm": 44.21875, "learning_rate": 9.873514423085073e-06, "loss": 16.6301, "step": 44770 }, { "epoch": 0.8096898795545621, "grad_norm": 42.5625, "learning_rate": 9.873486170778413e-06, "loss": 17.1241, "step": 44780 }, { "epoch": 0.80987069462369, "grad_norm": 45.625, "learning_rate": 9.873457918471755e-06, "loss": 16.5711, "step": 44790 }, { "epoch": 0.8100515096928178, "grad_norm": 43.375, "learning_rate": 9.873429666165096e-06, "loss": 16.494, "step": 44800 }, { "epoch": 0.8102323247619456, "grad_norm": 43.875, "learning_rate": 9.873401413858435e-06, "loss": 16.9783, "step": 44810 }, { "epoch": 0.8104131398310735, "grad_norm": 42.09375, "learning_rate": 9.873373161551776e-06, "loss": 16.9719, "step": 44820 }, { "epoch": 0.8105939549002014, "grad_norm": 41.125, "learning_rate": 9.873344909245116e-06, "loss": 16.8303, "step": 44830 }, { "epoch": 0.8107747699693293, "grad_norm": 43.96875, "learning_rate": 9.873316656938457e-06, "loss": 16.6135, "step": 44840 }, { "epoch": 0.8109555850384571, "grad_norm": 47.21875, "learning_rate": 9.873288404631797e-06, "loss": 17.2661, "step": 44850 }, { "epoch": 0.8111364001075849, "grad_norm": 43.09375, "learning_rate": 9.873260152325136e-06, "loss": 16.436, "step": 44860 }, { "epoch": 0.8113172151767128, "grad_norm": 40.4375, "learning_rate": 9.873231900018477e-06, "loss": 16.6702, "step": 44870 }, { "epoch": 0.8114980302458407, "grad_norm": 42.25, "learning_rate": 9.87320364771182e-06, "loss": 16.866, "step": 44880 }, { "epoch": 0.8116788453149686, "grad_norm": 43.34375, "learning_rate": 9.873175395405158e-06, "loss": 16.8068, "step": 44890 }, { "epoch": 0.8118596603840964, "grad_norm": 42.4375, "learning_rate": 9.873147143098499e-06, "loss": 16.5403, "step": 44900 }, { "epoch": 0.8120404754532242, "grad_norm": 42.21875, "learning_rate": 9.87311889079184e-06, "loss": 17.0165, "step": 44910 }, { "epoch": 0.8122212905223521, "grad_norm": 44.25, "learning_rate": 9.87309063848518e-06, "loss": 16.3663, "step": 44920 }, { "epoch": 0.81240210559148, "grad_norm": 42.4375, "learning_rate": 9.87306238617852e-06, "loss": 16.5132, "step": 44930 }, { "epoch": 0.8125829206606079, "grad_norm": 42.21875, "learning_rate": 9.87303413387186e-06, "loss": 16.7811, "step": 44940 }, { "epoch": 0.8127637357297357, "grad_norm": 47.28125, "learning_rate": 9.8730058815652e-06, "loss": 17.3034, "step": 44950 }, { "epoch": 0.8129445507988636, "grad_norm": 42.6875, "learning_rate": 9.872977629258541e-06, "loss": 16.4767, "step": 44960 }, { "epoch": 0.8131253658679914, "grad_norm": 42.125, "learning_rate": 9.872949376951883e-06, "loss": 17.1173, "step": 44970 }, { "epoch": 0.8133061809371193, "grad_norm": 43.46875, "learning_rate": 9.872921124645222e-06, "loss": 16.4008, "step": 44980 }, { "epoch": 0.8134869960062472, "grad_norm": 43.09375, "learning_rate": 9.872892872338563e-06, "loss": 16.4453, "step": 44990 }, { "epoch": 0.813667811075375, "grad_norm": 45.5, "learning_rate": 9.872864620031903e-06, "loss": 16.954, "step": 45000 }, { "epoch": 0.813667811075375, "eval_loss": 2.1011931896209717, "eval_runtime": 229.8768, "eval_samples_per_second": 3158.471, "eval_steps_per_second": 49.353, "step": 45000 }, { "epoch": 0.8138486261445029, "grad_norm": 41.6875, "learning_rate": 9.872836367725244e-06, "loss": 16.1717, "step": 45010 }, { "epoch": 0.8140294412136307, "grad_norm": 40.21875, "learning_rate": 9.872808115418585e-06, "loss": 16.7626, "step": 45020 }, { "epoch": 0.8142102562827586, "grad_norm": 41.25, "learning_rate": 9.872779863111924e-06, "loss": 17.0021, "step": 45030 }, { "epoch": 0.8143910713518865, "grad_norm": 43.40625, "learning_rate": 9.872751610805264e-06, "loss": 16.884, "step": 45040 }, { "epoch": 0.8145718864210143, "grad_norm": 42.0625, "learning_rate": 9.872723358498607e-06, "loss": 16.9262, "step": 45050 }, { "epoch": 0.8147527014901422, "grad_norm": 44.0625, "learning_rate": 9.872695106191945e-06, "loss": 17.088, "step": 45060 }, { "epoch": 0.81493351655927, "grad_norm": 45.5625, "learning_rate": 9.872666853885286e-06, "loss": 16.5677, "step": 45070 }, { "epoch": 0.8151143316283979, "grad_norm": 44.6875, "learning_rate": 9.872638601578627e-06, "loss": 16.9741, "step": 45080 }, { "epoch": 0.8152951466975258, "grad_norm": 42.40625, "learning_rate": 9.872610349271967e-06, "loss": 16.5187, "step": 45090 }, { "epoch": 0.8154759617666536, "grad_norm": 41.84375, "learning_rate": 9.872582096965308e-06, "loss": 16.83, "step": 45100 }, { "epoch": 0.8156567768357815, "grad_norm": 45.0, "learning_rate": 9.872553844658649e-06, "loss": 16.7083, "step": 45110 }, { "epoch": 0.8158375919049093, "grad_norm": 46.1875, "learning_rate": 9.872525592351988e-06, "loss": 17.0987, "step": 45120 }, { "epoch": 0.8160184069740373, "grad_norm": 45.625, "learning_rate": 9.872497340045328e-06, "loss": 16.7546, "step": 45130 }, { "epoch": 0.8161992220431651, "grad_norm": 45.3125, "learning_rate": 9.87246908773867e-06, "loss": 17.1085, "step": 45140 }, { "epoch": 0.8163800371122929, "grad_norm": 43.5, "learning_rate": 9.87244083543201e-06, "loss": 16.7036, "step": 45150 }, { "epoch": 0.8165608521814208, "grad_norm": 45.78125, "learning_rate": 9.87241258312535e-06, "loss": 16.5432, "step": 45160 }, { "epoch": 0.8167416672505486, "grad_norm": 41.1875, "learning_rate": 9.87238433081869e-06, "loss": 16.7783, "step": 45170 }, { "epoch": 0.8169224823196766, "grad_norm": 44.875, "learning_rate": 9.872356078512031e-06, "loss": 17.3186, "step": 45180 }, { "epoch": 0.8171032973888044, "grad_norm": 45.9375, "learning_rate": 9.872327826205372e-06, "loss": 17.2104, "step": 45190 }, { "epoch": 0.8172841124579322, "grad_norm": 42.28125, "learning_rate": 9.87229957389871e-06, "loss": 16.3376, "step": 45200 }, { "epoch": 0.8174649275270601, "grad_norm": 42.46875, "learning_rate": 9.872271321592051e-06, "loss": 16.9167, "step": 45210 }, { "epoch": 0.8176457425961879, "grad_norm": 47.34375, "learning_rate": 9.872243069285392e-06, "loss": 17.1726, "step": 45220 }, { "epoch": 0.8178265576653159, "grad_norm": 43.21875, "learning_rate": 9.872214816978734e-06, "loss": 16.9629, "step": 45230 }, { "epoch": 0.8180073727344437, "grad_norm": 46.375, "learning_rate": 9.872186564672073e-06, "loss": 16.6152, "step": 45240 }, { "epoch": 0.8181881878035715, "grad_norm": 41.53125, "learning_rate": 9.872158312365414e-06, "loss": 16.5411, "step": 45250 }, { "epoch": 0.8183690028726994, "grad_norm": 41.28125, "learning_rate": 9.872130060058755e-06, "loss": 16.7691, "step": 45260 }, { "epoch": 0.8185498179418272, "grad_norm": 43.5, "learning_rate": 9.872101807752095e-06, "loss": 16.5727, "step": 45270 }, { "epoch": 0.8187306330109552, "grad_norm": 42.9375, "learning_rate": 9.872073555445436e-06, "loss": 16.6411, "step": 45280 }, { "epoch": 0.818911448080083, "grad_norm": 41.6875, "learning_rate": 9.872045303138775e-06, "loss": 16.804, "step": 45290 }, { "epoch": 0.8190922631492109, "grad_norm": 44.71875, "learning_rate": 9.872017050832115e-06, "loss": 16.286, "step": 45300 }, { "epoch": 0.8192730782183387, "grad_norm": 43.8125, "learning_rate": 9.871988798525456e-06, "loss": 16.8301, "step": 45310 }, { "epoch": 0.8194538932874665, "grad_norm": 40.5625, "learning_rate": 9.871960546218797e-06, "loss": 16.8716, "step": 45320 }, { "epoch": 0.8196347083565945, "grad_norm": 42.78125, "learning_rate": 9.871932293912137e-06, "loss": 16.4862, "step": 45330 }, { "epoch": 0.8198155234257223, "grad_norm": 46.46875, "learning_rate": 9.871904041605478e-06, "loss": 16.9789, "step": 45340 }, { "epoch": 0.8199963384948502, "grad_norm": 41.34375, "learning_rate": 9.871875789298818e-06, "loss": 16.7286, "step": 45350 }, { "epoch": 0.820177153563978, "grad_norm": 43.5625, "learning_rate": 9.871847536992159e-06, "loss": 16.843, "step": 45360 }, { "epoch": 0.8203579686331058, "grad_norm": 43.3125, "learning_rate": 9.871819284685498e-06, "loss": 16.6393, "step": 45370 }, { "epoch": 0.8205387837022338, "grad_norm": 48.25, "learning_rate": 9.871791032378839e-06, "loss": 16.6034, "step": 45380 }, { "epoch": 0.8207195987713616, "grad_norm": 43.90625, "learning_rate": 9.87176278007218e-06, "loss": 17.0632, "step": 45390 }, { "epoch": 0.8209004138404895, "grad_norm": 44.375, "learning_rate": 9.871734527765522e-06, "loss": 16.6226, "step": 45400 }, { "epoch": 0.8210812289096173, "grad_norm": 45.71875, "learning_rate": 9.87170627545886e-06, "loss": 16.8551, "step": 45410 }, { "epoch": 0.8212620439787451, "grad_norm": 43.25, "learning_rate": 9.871678023152201e-06, "loss": 17.2683, "step": 45420 }, { "epoch": 0.8214428590478731, "grad_norm": 39.3125, "learning_rate": 9.871649770845542e-06, "loss": 17.3095, "step": 45430 }, { "epoch": 0.8216236741170009, "grad_norm": 42.1875, "learning_rate": 9.871621518538882e-06, "loss": 16.9099, "step": 45440 }, { "epoch": 0.8218044891861288, "grad_norm": 42.25, "learning_rate": 9.871593266232223e-06, "loss": 16.8983, "step": 45450 }, { "epoch": 0.8219853042552566, "grad_norm": 44.3125, "learning_rate": 9.871565013925562e-06, "loss": 16.3534, "step": 45460 }, { "epoch": 0.8221661193243845, "grad_norm": 42.5, "learning_rate": 9.871536761618903e-06, "loss": 16.9496, "step": 45470 }, { "epoch": 0.8223469343935124, "grad_norm": 44.21875, "learning_rate": 9.871508509312243e-06, "loss": 16.7329, "step": 45480 }, { "epoch": 0.8225277494626402, "grad_norm": 43.78125, "learning_rate": 9.871480257005584e-06, "loss": 16.4478, "step": 45490 }, { "epoch": 0.8227085645317681, "grad_norm": 46.59375, "learning_rate": 9.871452004698924e-06, "loss": 16.8804, "step": 45500 }, { "epoch": 0.8228893796008959, "grad_norm": 41.25, "learning_rate": 9.871423752392265e-06, "loss": 16.4803, "step": 45510 }, { "epoch": 0.8230701946700238, "grad_norm": 42.375, "learning_rate": 9.871395500085606e-06, "loss": 17.0666, "step": 45520 }, { "epoch": 0.8232510097391517, "grad_norm": 42.03125, "learning_rate": 9.871367247778946e-06, "loss": 17.1151, "step": 45530 }, { "epoch": 0.8234318248082795, "grad_norm": 41.0625, "learning_rate": 9.871338995472285e-06, "loss": 17.0039, "step": 45540 }, { "epoch": 0.8236126398774074, "grad_norm": 45.0, "learning_rate": 9.871310743165626e-06, "loss": 16.9728, "step": 45550 }, { "epoch": 0.8237934549465352, "grad_norm": 48.59375, "learning_rate": 9.871282490858966e-06, "loss": 17.0749, "step": 45560 }, { "epoch": 0.8239742700156631, "grad_norm": 43.375, "learning_rate": 9.871254238552307e-06, "loss": 17.18, "step": 45570 }, { "epoch": 0.824155085084791, "grad_norm": 41.09375, "learning_rate": 9.871225986245648e-06, "loss": 17.0381, "step": 45580 }, { "epoch": 0.8243359001539188, "grad_norm": 42.9375, "learning_rate": 9.871197733938988e-06, "loss": 16.5253, "step": 45590 }, { "epoch": 0.8245167152230467, "grad_norm": 42.28125, "learning_rate": 9.871169481632329e-06, "loss": 16.8323, "step": 45600 }, { "epoch": 0.8246975302921745, "grad_norm": 40.5, "learning_rate": 9.87114122932567e-06, "loss": 16.5359, "step": 45610 }, { "epoch": 0.8248783453613024, "grad_norm": 41.90625, "learning_rate": 9.87111297701901e-06, "loss": 16.7246, "step": 45620 }, { "epoch": 0.8250591604304303, "grad_norm": 47.375, "learning_rate": 9.871084724712349e-06, "loss": 16.5078, "step": 45630 }, { "epoch": 0.8252399754995582, "grad_norm": 43.8125, "learning_rate": 9.87105647240569e-06, "loss": 16.6933, "step": 45640 }, { "epoch": 0.825420790568686, "grad_norm": 43.96875, "learning_rate": 9.87102822009903e-06, "loss": 16.3012, "step": 45650 }, { "epoch": 0.8256016056378138, "grad_norm": 44.125, "learning_rate": 9.870999967792371e-06, "loss": 16.8273, "step": 45660 }, { "epoch": 0.8257824207069417, "grad_norm": 42.65625, "learning_rate": 9.870971715485712e-06, "loss": 17.0442, "step": 45670 }, { "epoch": 0.8259632357760696, "grad_norm": 43.1875, "learning_rate": 9.870943463179052e-06, "loss": 16.7705, "step": 45680 }, { "epoch": 0.8261440508451975, "grad_norm": 42.0, "learning_rate": 9.870915210872393e-06, "loss": 16.8474, "step": 45690 }, { "epoch": 0.8263248659143253, "grad_norm": 45.09375, "learning_rate": 9.870886958565733e-06, "loss": 16.8813, "step": 45700 }, { "epoch": 0.8265056809834531, "grad_norm": 44.375, "learning_rate": 9.870858706259074e-06, "loss": 16.8363, "step": 45710 }, { "epoch": 0.826686496052581, "grad_norm": 43.71875, "learning_rate": 9.870830453952413e-06, "loss": 16.4847, "step": 45720 }, { "epoch": 0.8268673111217089, "grad_norm": 44.9375, "learning_rate": 9.870802201645754e-06, "loss": 16.3199, "step": 45730 }, { "epoch": 0.8270481261908368, "grad_norm": 40.875, "learning_rate": 9.870773949339094e-06, "loss": 17.1142, "step": 45740 }, { "epoch": 0.8272289412599646, "grad_norm": 42.46875, "learning_rate": 9.870745697032435e-06, "loss": 17.0783, "step": 45750 }, { "epoch": 0.8274097563290924, "grad_norm": 45.25, "learning_rate": 9.870717444725775e-06, "loss": 16.8826, "step": 45760 }, { "epoch": 0.8275905713982203, "grad_norm": 42.75, "learning_rate": 9.870689192419116e-06, "loss": 16.5429, "step": 45770 }, { "epoch": 0.8277713864673482, "grad_norm": 42.28125, "learning_rate": 9.870660940112457e-06, "loss": 16.6929, "step": 45780 }, { "epoch": 0.8279522015364761, "grad_norm": 43.5, "learning_rate": 9.870632687805797e-06, "loss": 17.2222, "step": 45790 }, { "epoch": 0.8281330166056039, "grad_norm": 41.3125, "learning_rate": 9.870604435499136e-06, "loss": 17.0767, "step": 45800 }, { "epoch": 0.8283138316747318, "grad_norm": 42.5625, "learning_rate": 9.870576183192477e-06, "loss": 16.9483, "step": 45810 }, { "epoch": 0.8284946467438596, "grad_norm": 43.25, "learning_rate": 9.870547930885818e-06, "loss": 17.0716, "step": 45820 }, { "epoch": 0.8286754618129875, "grad_norm": 42.375, "learning_rate": 9.870519678579158e-06, "loss": 16.5698, "step": 45830 }, { "epoch": 0.8288562768821154, "grad_norm": 40.625, "learning_rate": 9.870491426272499e-06, "loss": 16.5512, "step": 45840 }, { "epoch": 0.8290370919512432, "grad_norm": 43.65625, "learning_rate": 9.87046317396584e-06, "loss": 17.2864, "step": 45850 }, { "epoch": 0.8292179070203711, "grad_norm": 43.125, "learning_rate": 9.87043492165918e-06, "loss": 17.0127, "step": 45860 }, { "epoch": 0.8293987220894989, "grad_norm": 42.28125, "learning_rate": 9.87040666935252e-06, "loss": 16.7168, "step": 45870 }, { "epoch": 0.8295795371586268, "grad_norm": 43.21875, "learning_rate": 9.870378417045861e-06, "loss": 16.454, "step": 45880 }, { "epoch": 0.8297603522277547, "grad_norm": 43.78125, "learning_rate": 9.8703501647392e-06, "loss": 16.5775, "step": 45890 }, { "epoch": 0.8299411672968825, "grad_norm": 41.6875, "learning_rate": 9.87032191243254e-06, "loss": 16.7222, "step": 45900 }, { "epoch": 0.8301219823660104, "grad_norm": 42.4375, "learning_rate": 9.870293660125881e-06, "loss": 16.7991, "step": 45910 }, { "epoch": 0.8303027974351382, "grad_norm": 42.46875, "learning_rate": 9.870265407819222e-06, "loss": 16.6125, "step": 45920 }, { "epoch": 0.830483612504266, "grad_norm": 41.875, "learning_rate": 9.870237155512563e-06, "loss": 16.341, "step": 45930 }, { "epoch": 0.830664427573394, "grad_norm": 42.25, "learning_rate": 9.870208903205903e-06, "loss": 17.3, "step": 45940 }, { "epoch": 0.8308452426425218, "grad_norm": 43.34375, "learning_rate": 9.870180650899244e-06, "loss": 16.0941, "step": 45950 }, { "epoch": 0.8310260577116497, "grad_norm": 44.5, "learning_rate": 9.870152398592585e-06, "loss": 16.6848, "step": 45960 }, { "epoch": 0.8312068727807775, "grad_norm": 42.0, "learning_rate": 9.870124146285923e-06, "loss": 16.6799, "step": 45970 }, { "epoch": 0.8313876878499055, "grad_norm": 42.34375, "learning_rate": 9.870095893979264e-06, "loss": 16.3178, "step": 45980 }, { "epoch": 0.8315685029190333, "grad_norm": 41.96875, "learning_rate": 9.870067641672605e-06, "loss": 16.6089, "step": 45990 }, { "epoch": 0.8317493179881611, "grad_norm": 44.15625, "learning_rate": 9.870039389365945e-06, "loss": 17.0249, "step": 46000 }, { "epoch": 0.831930133057289, "grad_norm": 45.96875, "learning_rate": 9.870011137059286e-06, "loss": 16.8498, "step": 46010 }, { "epoch": 0.8321109481264168, "grad_norm": 45.53125, "learning_rate": 9.869982884752627e-06, "loss": 16.647, "step": 46020 }, { "epoch": 0.8322917631955448, "grad_norm": 43.25, "learning_rate": 9.869954632445967e-06, "loss": 16.6773, "step": 46030 }, { "epoch": 0.8324725782646726, "grad_norm": 43.5625, "learning_rate": 9.869926380139308e-06, "loss": 16.4971, "step": 46040 }, { "epoch": 0.8326533933338004, "grad_norm": 43.1875, "learning_rate": 9.869898127832648e-06, "loss": 16.3478, "step": 46050 }, { "epoch": 0.8328342084029283, "grad_norm": 43.8125, "learning_rate": 9.869869875525987e-06, "loss": 16.8095, "step": 46060 }, { "epoch": 0.8330150234720561, "grad_norm": 45.375, "learning_rate": 9.869841623219328e-06, "loss": 17.218, "step": 46070 }, { "epoch": 0.8331958385411841, "grad_norm": 43.0625, "learning_rate": 9.869813370912669e-06, "loss": 16.6927, "step": 46080 }, { "epoch": 0.8333766536103119, "grad_norm": 44.3125, "learning_rate": 9.86978511860601e-06, "loss": 16.999, "step": 46090 }, { "epoch": 0.8335574686794397, "grad_norm": 47.84375, "learning_rate": 9.86975686629935e-06, "loss": 16.9362, "step": 46100 }, { "epoch": 0.8337382837485676, "grad_norm": 47.09375, "learning_rate": 9.86972861399269e-06, "loss": 16.9018, "step": 46110 }, { "epoch": 0.8339190988176954, "grad_norm": 49.65625, "learning_rate": 9.869700361686031e-06, "loss": 17.1598, "step": 46120 }, { "epoch": 0.8340999138868234, "grad_norm": 44.375, "learning_rate": 9.869672109379372e-06, "loss": 16.3532, "step": 46130 }, { "epoch": 0.8342807289559512, "grad_norm": 42.125, "learning_rate": 9.869643857072712e-06, "loss": 16.8197, "step": 46140 }, { "epoch": 0.8344615440250791, "grad_norm": 43.375, "learning_rate": 9.869615604766051e-06, "loss": 16.5218, "step": 46150 }, { "epoch": 0.8346423590942069, "grad_norm": 41.4375, "learning_rate": 9.869587352459392e-06, "loss": 16.7873, "step": 46160 }, { "epoch": 0.8348231741633347, "grad_norm": 41.4375, "learning_rate": 9.869559100152733e-06, "loss": 16.6171, "step": 46170 }, { "epoch": 0.8350039892324627, "grad_norm": 41.59375, "learning_rate": 9.869530847846073e-06, "loss": 16.6384, "step": 46180 }, { "epoch": 0.8351848043015905, "grad_norm": 42.0, "learning_rate": 9.869502595539414e-06, "loss": 16.6035, "step": 46190 }, { "epoch": 0.8353656193707184, "grad_norm": 44.96875, "learning_rate": 9.869474343232754e-06, "loss": 16.9676, "step": 46200 }, { "epoch": 0.8355464344398462, "grad_norm": 42.96875, "learning_rate": 9.869446090926095e-06, "loss": 16.6818, "step": 46210 }, { "epoch": 0.835727249508974, "grad_norm": 41.6875, "learning_rate": 9.869417838619436e-06, "loss": 16.6355, "step": 46220 }, { "epoch": 0.835908064578102, "grad_norm": 44.5625, "learning_rate": 9.869389586312775e-06, "loss": 16.671, "step": 46230 }, { "epoch": 0.8360888796472298, "grad_norm": 42.625, "learning_rate": 9.869361334006115e-06, "loss": 16.6258, "step": 46240 }, { "epoch": 0.8362696947163577, "grad_norm": 43.3125, "learning_rate": 9.869333081699456e-06, "loss": 16.8311, "step": 46250 }, { "epoch": 0.8364505097854855, "grad_norm": 40.5, "learning_rate": 9.869304829392796e-06, "loss": 16.7952, "step": 46260 }, { "epoch": 0.8366313248546133, "grad_norm": 43.03125, "learning_rate": 9.869276577086137e-06, "loss": 16.7644, "step": 46270 }, { "epoch": 0.8368121399237413, "grad_norm": 43.71875, "learning_rate": 9.869248324779478e-06, "loss": 16.9682, "step": 46280 }, { "epoch": 0.8369929549928691, "grad_norm": 44.375, "learning_rate": 9.869220072472818e-06, "loss": 16.6565, "step": 46290 }, { "epoch": 0.837173770061997, "grad_norm": 45.3125, "learning_rate": 9.869191820166159e-06, "loss": 16.4278, "step": 46300 }, { "epoch": 0.8373545851311248, "grad_norm": 44.75, "learning_rate": 9.8691635678595e-06, "loss": 17.0361, "step": 46310 }, { "epoch": 0.8375354002002527, "grad_norm": 41.8125, "learning_rate": 9.869135315552838e-06, "loss": 16.7528, "step": 46320 }, { "epoch": 0.8377162152693806, "grad_norm": 43.84375, "learning_rate": 9.869107063246179e-06, "loss": 16.6941, "step": 46330 }, { "epoch": 0.8378970303385084, "grad_norm": 41.625, "learning_rate": 9.86907881093952e-06, "loss": 16.5204, "step": 46340 }, { "epoch": 0.8380778454076363, "grad_norm": 41.25, "learning_rate": 9.86905055863286e-06, "loss": 17.0408, "step": 46350 }, { "epoch": 0.8382586604767641, "grad_norm": 40.8125, "learning_rate": 9.869022306326201e-06, "loss": 16.3497, "step": 46360 }, { "epoch": 0.838439475545892, "grad_norm": 42.15625, "learning_rate": 9.868994054019542e-06, "loss": 16.5019, "step": 46370 }, { "epoch": 0.8386202906150199, "grad_norm": 41.8125, "learning_rate": 9.868965801712882e-06, "loss": 16.9828, "step": 46380 }, { "epoch": 0.8388011056841477, "grad_norm": 45.5, "learning_rate": 9.868937549406223e-06, "loss": 16.5874, "step": 46390 }, { "epoch": 0.8389819207532756, "grad_norm": 44.9375, "learning_rate": 9.868909297099562e-06, "loss": 17.0737, "step": 46400 }, { "epoch": 0.8391627358224034, "grad_norm": 40.84375, "learning_rate": 9.868881044792902e-06, "loss": 17.0793, "step": 46410 }, { "epoch": 0.8393435508915313, "grad_norm": 40.0625, "learning_rate": 9.868852792486243e-06, "loss": 16.9732, "step": 46420 }, { "epoch": 0.8395243659606592, "grad_norm": 41.78125, "learning_rate": 9.868824540179584e-06, "loss": 17.0003, "step": 46430 }, { "epoch": 0.839705181029787, "grad_norm": 41.96875, "learning_rate": 9.868796287872924e-06, "loss": 16.6613, "step": 46440 }, { "epoch": 0.8398859960989149, "grad_norm": 43.78125, "learning_rate": 9.868768035566265e-06, "loss": 16.735, "step": 46450 }, { "epoch": 0.8400668111680427, "grad_norm": 44.90625, "learning_rate": 9.868739783259605e-06, "loss": 16.3271, "step": 46460 }, { "epoch": 0.8402476262371706, "grad_norm": 42.40625, "learning_rate": 9.868711530952946e-06, "loss": 16.5614, "step": 46470 }, { "epoch": 0.8404284413062985, "grad_norm": 43.84375, "learning_rate": 9.868683278646287e-06, "loss": 16.7678, "step": 46480 }, { "epoch": 0.8406092563754263, "grad_norm": 44.21875, "learning_rate": 9.868655026339626e-06, "loss": 16.8397, "step": 46490 }, { "epoch": 0.8407900714445542, "grad_norm": 41.28125, "learning_rate": 9.868626774032966e-06, "loss": 16.9057, "step": 46500 }, { "epoch": 0.840970886513682, "grad_norm": 42.09375, "learning_rate": 9.868598521726307e-06, "loss": 16.3798, "step": 46510 }, { "epoch": 0.8411517015828099, "grad_norm": 42.3125, "learning_rate": 9.868570269419648e-06, "loss": 16.7016, "step": 46520 }, { "epoch": 0.8413325166519378, "grad_norm": 44.09375, "learning_rate": 9.868542017112988e-06, "loss": 16.4862, "step": 46530 }, { "epoch": 0.8415133317210657, "grad_norm": 45.59375, "learning_rate": 9.868513764806329e-06, "loss": 16.9339, "step": 46540 }, { "epoch": 0.8416941467901935, "grad_norm": 44.8125, "learning_rate": 9.86848551249967e-06, "loss": 17.1162, "step": 46550 }, { "epoch": 0.8418749618593213, "grad_norm": 43.96875, "learning_rate": 9.86845726019301e-06, "loss": 17.1667, "step": 46560 }, { "epoch": 0.8420557769284492, "grad_norm": 43.21875, "learning_rate": 9.86842900788635e-06, "loss": 16.5697, "step": 46570 }, { "epoch": 0.842236591997577, "grad_norm": 41.125, "learning_rate": 9.86840075557969e-06, "loss": 16.86, "step": 46580 }, { "epoch": 0.842417407066705, "grad_norm": 41.34375, "learning_rate": 9.86837250327303e-06, "loss": 16.8571, "step": 46590 }, { "epoch": 0.8425982221358328, "grad_norm": 45.9375, "learning_rate": 9.86834425096637e-06, "loss": 17.0934, "step": 46600 }, { "epoch": 0.8427790372049606, "grad_norm": 40.28125, "learning_rate": 9.868315998659711e-06, "loss": 17.0367, "step": 46610 }, { "epoch": 0.8429598522740885, "grad_norm": 43.9375, "learning_rate": 9.868287746353052e-06, "loss": 16.933, "step": 46620 }, { "epoch": 0.8431406673432164, "grad_norm": 45.53125, "learning_rate": 9.868259494046393e-06, "loss": 17.0869, "step": 46630 }, { "epoch": 0.8433214824123443, "grad_norm": 42.625, "learning_rate": 9.868231241739733e-06, "loss": 16.5878, "step": 46640 }, { "epoch": 0.8435022974814721, "grad_norm": 42.9375, "learning_rate": 9.868202989433074e-06, "loss": 16.6963, "step": 46650 }, { "epoch": 0.8436831125505999, "grad_norm": 43.3125, "learning_rate": 9.868174737126413e-06, "loss": 17.0261, "step": 46660 }, { "epoch": 0.8438639276197278, "grad_norm": 42.03125, "learning_rate": 9.868146484819753e-06, "loss": 16.5386, "step": 46670 }, { "epoch": 0.8440447426888557, "grad_norm": 40.4375, "learning_rate": 9.868118232513094e-06, "loss": 16.9873, "step": 46680 }, { "epoch": 0.8442255577579836, "grad_norm": 41.1875, "learning_rate": 9.868089980206435e-06, "loss": 16.8286, "step": 46690 }, { "epoch": 0.8444063728271114, "grad_norm": 40.84375, "learning_rate": 9.868061727899775e-06, "loss": 16.9142, "step": 46700 }, { "epoch": 0.8445871878962393, "grad_norm": 41.625, "learning_rate": 9.868033475593114e-06, "loss": 16.7071, "step": 46710 }, { "epoch": 0.8447680029653671, "grad_norm": 41.03125, "learning_rate": 9.868005223286457e-06, "loss": 17.1076, "step": 46720 }, { "epoch": 0.844948818034495, "grad_norm": 43.28125, "learning_rate": 9.867976970979797e-06, "loss": 16.5466, "step": 46730 }, { "epoch": 0.8451296331036229, "grad_norm": 44.875, "learning_rate": 9.867948718673138e-06, "loss": 17.1935, "step": 46740 }, { "epoch": 0.8453104481727507, "grad_norm": 42.96875, "learning_rate": 9.867920466366477e-06, "loss": 16.4253, "step": 46750 }, { "epoch": 0.8454912632418786, "grad_norm": 47.0, "learning_rate": 9.867892214059817e-06, "loss": 16.9154, "step": 46760 }, { "epoch": 0.8456720783110064, "grad_norm": 45.59375, "learning_rate": 9.867863961753158e-06, "loss": 16.6339, "step": 46770 }, { "epoch": 0.8458528933801343, "grad_norm": 40.65625, "learning_rate": 9.867835709446499e-06, "loss": 16.3904, "step": 46780 }, { "epoch": 0.8460337084492622, "grad_norm": 42.59375, "learning_rate": 9.86780745713984e-06, "loss": 16.8356, "step": 46790 }, { "epoch": 0.84621452351839, "grad_norm": 45.6875, "learning_rate": 9.867779204833178e-06, "loss": 16.9262, "step": 46800 }, { "epoch": 0.8463953385875179, "grad_norm": 42.8125, "learning_rate": 9.86775095252652e-06, "loss": 17.0825, "step": 46810 }, { "epoch": 0.8465761536566457, "grad_norm": 42.0625, "learning_rate": 9.867722700219861e-06, "loss": 16.8047, "step": 46820 }, { "epoch": 0.8467569687257736, "grad_norm": 43.9375, "learning_rate": 9.8676944479132e-06, "loss": 16.5422, "step": 46830 }, { "epoch": 0.8469377837949015, "grad_norm": 44.3125, "learning_rate": 9.86766619560654e-06, "loss": 16.8578, "step": 46840 }, { "epoch": 0.8471185988640293, "grad_norm": 42.9375, "learning_rate": 9.867637943299881e-06, "loss": 16.5696, "step": 46850 }, { "epoch": 0.8472994139331572, "grad_norm": 47.4375, "learning_rate": 9.867609690993222e-06, "loss": 16.7412, "step": 46860 }, { "epoch": 0.847480229002285, "grad_norm": 43.46875, "learning_rate": 9.867581438686563e-06, "loss": 16.9719, "step": 46870 }, { "epoch": 0.847661044071413, "grad_norm": 43.75, "learning_rate": 9.867553186379903e-06, "loss": 17.0033, "step": 46880 }, { "epoch": 0.8478418591405408, "grad_norm": 44.4375, "learning_rate": 9.867524934073244e-06, "loss": 16.7468, "step": 46890 }, { "epoch": 0.8480226742096686, "grad_norm": 43.0625, "learning_rate": 9.867496681766584e-06, "loss": 16.8236, "step": 46900 }, { "epoch": 0.8482034892787965, "grad_norm": 44.125, "learning_rate": 9.867468429459925e-06, "loss": 16.9039, "step": 46910 }, { "epoch": 0.8483843043479243, "grad_norm": 42.0, "learning_rate": 9.867440177153264e-06, "loss": 16.8074, "step": 46920 }, { "epoch": 0.8485651194170523, "grad_norm": 40.90625, "learning_rate": 9.867411924846605e-06, "loss": 16.4367, "step": 46930 }, { "epoch": 0.8487459344861801, "grad_norm": 45.90625, "learning_rate": 9.867383672539945e-06, "loss": 17.0261, "step": 46940 }, { "epoch": 0.8489267495553079, "grad_norm": 44.40625, "learning_rate": 9.867355420233286e-06, "loss": 16.4444, "step": 46950 }, { "epoch": 0.8491075646244358, "grad_norm": 42.25, "learning_rate": 9.867327167926626e-06, "loss": 16.2641, "step": 46960 }, { "epoch": 0.8492883796935636, "grad_norm": 42.5625, "learning_rate": 9.867298915619965e-06, "loss": 16.3875, "step": 46970 }, { "epoch": 0.8494691947626916, "grad_norm": 44.8125, "learning_rate": 9.867270663313308e-06, "loss": 16.8093, "step": 46980 }, { "epoch": 0.8496500098318194, "grad_norm": 43.6875, "learning_rate": 9.867242411006648e-06, "loss": 16.3896, "step": 46990 }, { "epoch": 0.8498308249009472, "grad_norm": 44.40625, "learning_rate": 9.867214158699989e-06, "loss": 16.6276, "step": 47000 }, { "epoch": 0.8500116399700751, "grad_norm": 41.5625, "learning_rate": 9.867185906393328e-06, "loss": 16.7573, "step": 47010 }, { "epoch": 0.8501924550392029, "grad_norm": 41.5625, "learning_rate": 9.867157654086668e-06, "loss": 17.2952, "step": 47020 }, { "epoch": 0.8503732701083309, "grad_norm": 42.25, "learning_rate": 9.867129401780009e-06, "loss": 17.0923, "step": 47030 }, { "epoch": 0.8505540851774587, "grad_norm": 41.78125, "learning_rate": 9.86710114947335e-06, "loss": 16.9049, "step": 47040 }, { "epoch": 0.8507349002465866, "grad_norm": 41.78125, "learning_rate": 9.86707289716669e-06, "loss": 16.667, "step": 47050 }, { "epoch": 0.8509157153157144, "grad_norm": 43.09375, "learning_rate": 9.86704464486003e-06, "loss": 16.6055, "step": 47060 }, { "epoch": 0.8510965303848422, "grad_norm": 42.40625, "learning_rate": 9.867016392553372e-06, "loss": 17.369, "step": 47070 }, { "epoch": 0.8512773454539702, "grad_norm": 44.5625, "learning_rate": 9.866988140246712e-06, "loss": 16.3215, "step": 47080 }, { "epoch": 0.851458160523098, "grad_norm": 43.90625, "learning_rate": 9.866959887940051e-06, "loss": 16.7921, "step": 47090 }, { "epoch": 0.8516389755922259, "grad_norm": 45.53125, "learning_rate": 9.866931635633392e-06, "loss": 16.9453, "step": 47100 }, { "epoch": 0.8518197906613537, "grad_norm": 44.0, "learning_rate": 9.866903383326732e-06, "loss": 17.1338, "step": 47110 }, { "epoch": 0.8520006057304815, "grad_norm": 40.0625, "learning_rate": 9.866875131020073e-06, "loss": 16.6403, "step": 47120 }, { "epoch": 0.8521814207996095, "grad_norm": 44.21875, "learning_rate": 9.866846878713414e-06, "loss": 16.5787, "step": 47130 }, { "epoch": 0.8523622358687373, "grad_norm": 47.53125, "learning_rate": 9.866818626406753e-06, "loss": 16.6966, "step": 47140 }, { "epoch": 0.8525430509378652, "grad_norm": 45.90625, "learning_rate": 9.866790374100093e-06, "loss": 16.2386, "step": 47150 }, { "epoch": 0.852723866006993, "grad_norm": 42.59375, "learning_rate": 9.866762121793436e-06, "loss": 16.7593, "step": 47160 }, { "epoch": 0.8529046810761208, "grad_norm": 44.3125, "learning_rate": 9.866733869486776e-06, "loss": 16.6689, "step": 47170 }, { "epoch": 0.8530854961452488, "grad_norm": 46.3125, "learning_rate": 9.866705617180115e-06, "loss": 16.603, "step": 47180 }, { "epoch": 0.8532663112143766, "grad_norm": 42.625, "learning_rate": 9.866677364873456e-06, "loss": 16.7621, "step": 47190 }, { "epoch": 0.8534471262835045, "grad_norm": 43.25, "learning_rate": 9.866649112566796e-06, "loss": 16.6054, "step": 47200 }, { "epoch": 0.8536279413526323, "grad_norm": 44.1875, "learning_rate": 9.866620860260137e-06, "loss": 16.8067, "step": 47210 }, { "epoch": 0.8538087564217602, "grad_norm": 44.40625, "learning_rate": 9.866592607953478e-06, "loss": 16.8855, "step": 47220 }, { "epoch": 0.853989571490888, "grad_norm": 41.1875, "learning_rate": 9.866564355646816e-06, "loss": 16.7962, "step": 47230 }, { "epoch": 0.8541703865600159, "grad_norm": 40.78125, "learning_rate": 9.866536103340159e-06, "loss": 16.797, "step": 47240 }, { "epoch": 0.8543512016291438, "grad_norm": 46.96875, "learning_rate": 9.8665078510335e-06, "loss": 16.5223, "step": 47250 }, { "epoch": 0.8545320166982716, "grad_norm": 45.8125, "learning_rate": 9.866479598726838e-06, "loss": 16.5319, "step": 47260 }, { "epoch": 0.8547128317673995, "grad_norm": 45.53125, "learning_rate": 9.866451346420179e-06, "loss": 16.6102, "step": 47270 }, { "epoch": 0.8548936468365274, "grad_norm": 43.78125, "learning_rate": 9.86642309411352e-06, "loss": 16.6636, "step": 47280 }, { "epoch": 0.8550744619056552, "grad_norm": 43.03125, "learning_rate": 9.86639484180686e-06, "loss": 16.6404, "step": 47290 }, { "epoch": 0.8552552769747831, "grad_norm": 42.9375, "learning_rate": 9.8663665895002e-06, "loss": 16.7613, "step": 47300 }, { "epoch": 0.8554360920439109, "grad_norm": 41.75, "learning_rate": 9.866338337193541e-06, "loss": 17.157, "step": 47310 }, { "epoch": 0.8556169071130388, "grad_norm": 46.71875, "learning_rate": 9.86631008488688e-06, "loss": 17.1103, "step": 47320 }, { "epoch": 0.8557977221821667, "grad_norm": 43.90625, "learning_rate": 9.866281832580223e-06, "loss": 16.653, "step": 47330 }, { "epoch": 0.8559785372512945, "grad_norm": 43.75, "learning_rate": 9.866253580273563e-06, "loss": 17.0226, "step": 47340 }, { "epoch": 0.8561593523204224, "grad_norm": 42.0625, "learning_rate": 9.866225327966902e-06, "loss": 16.7507, "step": 47350 }, { "epoch": 0.8563401673895502, "grad_norm": 44.90625, "learning_rate": 9.866197075660243e-06, "loss": 16.1328, "step": 47360 }, { "epoch": 0.8565209824586781, "grad_norm": 43.3125, "learning_rate": 9.866168823353583e-06, "loss": 16.8316, "step": 47370 }, { "epoch": 0.856701797527806, "grad_norm": 41.5, "learning_rate": 9.866140571046924e-06, "loss": 16.9856, "step": 47380 }, { "epoch": 0.8568826125969339, "grad_norm": 45.125, "learning_rate": 9.866112318740265e-06, "loss": 17.0456, "step": 47390 }, { "epoch": 0.8570634276660617, "grad_norm": 43.46875, "learning_rate": 9.866084066433604e-06, "loss": 17.0106, "step": 47400 }, { "epoch": 0.8572442427351895, "grad_norm": 40.4375, "learning_rate": 9.866055814126944e-06, "loss": 16.3447, "step": 47410 }, { "epoch": 0.8574250578043174, "grad_norm": 45.0625, "learning_rate": 9.866027561820287e-06, "loss": 17.0897, "step": 47420 }, { "epoch": 0.8576058728734453, "grad_norm": 42.21875, "learning_rate": 9.865999309513627e-06, "loss": 16.8414, "step": 47430 }, { "epoch": 0.8577866879425732, "grad_norm": 44.03125, "learning_rate": 9.865971057206966e-06, "loss": 16.7726, "step": 47440 }, { "epoch": 0.857967503011701, "grad_norm": 43.09375, "learning_rate": 9.865942804900307e-06, "loss": 16.839, "step": 47450 }, { "epoch": 0.8581483180808288, "grad_norm": 44.1875, "learning_rate": 9.865914552593647e-06, "loss": 16.4288, "step": 47460 }, { "epoch": 0.8583291331499567, "grad_norm": 42.90625, "learning_rate": 9.865886300286988e-06, "loss": 16.6905, "step": 47470 }, { "epoch": 0.8585099482190846, "grad_norm": 43.34375, "learning_rate": 9.865858047980329e-06, "loss": 16.5618, "step": 47480 }, { "epoch": 0.8586907632882125, "grad_norm": 46.125, "learning_rate": 9.865829795673668e-06, "loss": 16.7685, "step": 47490 }, { "epoch": 0.8588715783573403, "grad_norm": 42.5, "learning_rate": 9.865801543367008e-06, "loss": 16.5453, "step": 47500 }, { "epoch": 0.8590523934264681, "grad_norm": 40.78125, "learning_rate": 9.86577329106035e-06, "loss": 16.8157, "step": 47510 }, { "epoch": 0.859233208495596, "grad_norm": 45.9375, "learning_rate": 9.86574503875369e-06, "loss": 16.924, "step": 47520 }, { "epoch": 0.8594140235647239, "grad_norm": 42.09375, "learning_rate": 9.86571678644703e-06, "loss": 16.8491, "step": 47530 }, { "epoch": 0.8595948386338518, "grad_norm": 42.875, "learning_rate": 9.86568853414037e-06, "loss": 16.7195, "step": 47540 }, { "epoch": 0.8597756537029796, "grad_norm": 47.21875, "learning_rate": 9.865660281833711e-06, "loss": 16.9705, "step": 47550 }, { "epoch": 0.8599564687721075, "grad_norm": 43.375, "learning_rate": 9.865632029527052e-06, "loss": 16.7851, "step": 47560 }, { "epoch": 0.8601372838412353, "grad_norm": 43.625, "learning_rate": 9.865603777220391e-06, "loss": 17.0512, "step": 47570 }, { "epoch": 0.8603180989103631, "grad_norm": 43.34375, "learning_rate": 9.865575524913731e-06, "loss": 16.9528, "step": 47580 }, { "epoch": 0.8604989139794911, "grad_norm": 45.21875, "learning_rate": 9.865547272607074e-06, "loss": 16.8987, "step": 47590 }, { "epoch": 0.8606797290486189, "grad_norm": 41.9375, "learning_rate": 9.865519020300414e-06, "loss": 16.7751, "step": 47600 }, { "epoch": 0.8608605441177468, "grad_norm": 45.34375, "learning_rate": 9.865490767993753e-06, "loss": 16.5027, "step": 47610 }, { "epoch": 0.8610413591868746, "grad_norm": 45.4375, "learning_rate": 9.865462515687094e-06, "loss": 17.264, "step": 47620 }, { "epoch": 0.8612221742560024, "grad_norm": 44.40625, "learning_rate": 9.865434263380435e-06, "loss": 16.5418, "step": 47630 }, { "epoch": 0.8614029893251304, "grad_norm": 44.875, "learning_rate": 9.865406011073775e-06, "loss": 16.6337, "step": 47640 }, { "epoch": 0.8615838043942582, "grad_norm": 43.46875, "learning_rate": 9.865377758767116e-06, "loss": 16.822, "step": 47650 }, { "epoch": 0.8617646194633861, "grad_norm": 43.15625, "learning_rate": 9.865349506460455e-06, "loss": 17.0387, "step": 47660 }, { "epoch": 0.8619454345325139, "grad_norm": 40.4375, "learning_rate": 9.865321254153795e-06, "loss": 16.4247, "step": 47670 }, { "epoch": 0.8621262496016417, "grad_norm": 40.40625, "learning_rate": 9.865293001847138e-06, "loss": 16.9979, "step": 47680 }, { "epoch": 0.8623070646707697, "grad_norm": 42.25, "learning_rate": 9.865264749540477e-06, "loss": 16.5828, "step": 47690 }, { "epoch": 0.8624878797398975, "grad_norm": 42.53125, "learning_rate": 9.865236497233817e-06, "loss": 16.8847, "step": 47700 }, { "epoch": 0.8626686948090254, "grad_norm": 44.09375, "learning_rate": 9.865208244927158e-06, "loss": 16.9782, "step": 47710 }, { "epoch": 0.8628495098781532, "grad_norm": 44.875, "learning_rate": 9.865179992620498e-06, "loss": 16.5684, "step": 47720 }, { "epoch": 0.8630303249472812, "grad_norm": 44.8125, "learning_rate": 9.865151740313839e-06, "loss": 16.2784, "step": 47730 }, { "epoch": 0.863211140016409, "grad_norm": 45.40625, "learning_rate": 9.86512348800718e-06, "loss": 17.0211, "step": 47740 }, { "epoch": 0.8633919550855368, "grad_norm": 46.625, "learning_rate": 9.865095235700519e-06, "loss": 16.7793, "step": 47750 }, { "epoch": 0.8635727701546647, "grad_norm": 38.5625, "learning_rate": 9.86506698339386e-06, "loss": 16.9268, "step": 47760 }, { "epoch": 0.8637535852237925, "grad_norm": 39.9375, "learning_rate": 9.865038731087202e-06, "loss": 16.7704, "step": 47770 }, { "epoch": 0.8639344002929205, "grad_norm": 43.46875, "learning_rate": 9.86501047878054e-06, "loss": 16.9899, "step": 47780 }, { "epoch": 0.8641152153620483, "grad_norm": 42.09375, "learning_rate": 9.864982226473881e-06, "loss": 16.8435, "step": 47790 }, { "epoch": 0.8642960304311761, "grad_norm": 43.34375, "learning_rate": 9.864953974167222e-06, "loss": 16.5593, "step": 47800 }, { "epoch": 0.864476845500304, "grad_norm": 46.1875, "learning_rate": 9.864925721860562e-06, "loss": 16.7038, "step": 47810 }, { "epoch": 0.8646576605694318, "grad_norm": 43.21875, "learning_rate": 9.864897469553903e-06, "loss": 16.8785, "step": 47820 }, { "epoch": 0.8648384756385598, "grad_norm": 41.28125, "learning_rate": 9.864869217247242e-06, "loss": 16.9381, "step": 47830 }, { "epoch": 0.8650192907076876, "grad_norm": 43.96875, "learning_rate": 9.864840964940583e-06, "loss": 16.3128, "step": 47840 }, { "epoch": 0.8652001057768154, "grad_norm": 42.5625, "learning_rate": 9.864812712633923e-06, "loss": 16.5548, "step": 47850 }, { "epoch": 0.8653809208459433, "grad_norm": 46.09375, "learning_rate": 9.864784460327266e-06, "loss": 16.6328, "step": 47860 }, { "epoch": 0.8655617359150711, "grad_norm": 44.8125, "learning_rate": 9.864756208020604e-06, "loss": 16.7783, "step": 47870 }, { "epoch": 0.865742550984199, "grad_norm": 43.375, "learning_rate": 9.864727955713945e-06, "loss": 16.5969, "step": 47880 }, { "epoch": 0.8659233660533269, "grad_norm": 45.15625, "learning_rate": 9.864699703407286e-06, "loss": 16.7332, "step": 47890 }, { "epoch": 0.8661041811224548, "grad_norm": 44.25, "learning_rate": 9.864671451100626e-06, "loss": 16.8043, "step": 47900 }, { "epoch": 0.8662849961915826, "grad_norm": 42.4375, "learning_rate": 9.864643198793967e-06, "loss": 16.796, "step": 47910 }, { "epoch": 0.8664658112607104, "grad_norm": 43.21875, "learning_rate": 9.864614946487306e-06, "loss": 17.0849, "step": 47920 }, { "epoch": 0.8666466263298384, "grad_norm": 42.625, "learning_rate": 9.864586694180646e-06, "loss": 16.6788, "step": 47930 }, { "epoch": 0.8668274413989662, "grad_norm": 42.40625, "learning_rate": 9.864558441873989e-06, "loss": 16.5263, "step": 47940 }, { "epoch": 0.8670082564680941, "grad_norm": 45.59375, "learning_rate": 9.864530189567328e-06, "loss": 16.6813, "step": 47950 }, { "epoch": 0.8671890715372219, "grad_norm": 43.15625, "learning_rate": 9.864501937260668e-06, "loss": 16.7066, "step": 47960 }, { "epoch": 0.8673698866063497, "grad_norm": 43.75, "learning_rate": 9.864473684954009e-06, "loss": 16.4457, "step": 47970 }, { "epoch": 0.8675507016754777, "grad_norm": 44.6875, "learning_rate": 9.86444543264735e-06, "loss": 16.8559, "step": 47980 }, { "epoch": 0.8677315167446055, "grad_norm": 43.6875, "learning_rate": 9.86441718034069e-06, "loss": 16.6689, "step": 47990 }, { "epoch": 0.8679123318137334, "grad_norm": 42.46875, "learning_rate": 9.864388928034029e-06, "loss": 17.0778, "step": 48000 }, { "epoch": 0.8680931468828612, "grad_norm": 43.15625, "learning_rate": 9.86436067572737e-06, "loss": 16.6127, "step": 48010 }, { "epoch": 0.868273961951989, "grad_norm": 42.8125, "learning_rate": 9.86433242342071e-06, "loss": 16.7316, "step": 48020 }, { "epoch": 0.868454777021117, "grad_norm": 42.78125, "learning_rate": 9.864304171114053e-06, "loss": 17.0651, "step": 48030 }, { "epoch": 0.8686355920902448, "grad_norm": 39.875, "learning_rate": 9.864275918807392e-06, "loss": 16.8044, "step": 48040 }, { "epoch": 0.8688164071593727, "grad_norm": 46.4375, "learning_rate": 9.864247666500732e-06, "loss": 16.662, "step": 48050 }, { "epoch": 0.8689972222285005, "grad_norm": 46.34375, "learning_rate": 9.864219414194073e-06, "loss": 16.8479, "step": 48060 }, { "epoch": 0.8691780372976284, "grad_norm": 43.40625, "learning_rate": 9.864191161887413e-06, "loss": 16.6773, "step": 48070 }, { "epoch": 0.8693588523667563, "grad_norm": 45.0625, "learning_rate": 9.864162909580754e-06, "loss": 16.3427, "step": 48080 }, { "epoch": 0.8695396674358841, "grad_norm": 42.09375, "learning_rate": 9.864134657274093e-06, "loss": 16.5875, "step": 48090 }, { "epoch": 0.869720482505012, "grad_norm": 43.03125, "learning_rate": 9.864106404967434e-06, "loss": 16.8144, "step": 48100 }, { "epoch": 0.8699012975741398, "grad_norm": 42.1875, "learning_rate": 9.864078152660774e-06, "loss": 16.7123, "step": 48110 }, { "epoch": 0.8700821126432677, "grad_norm": 46.96875, "learning_rate": 9.864049900354115e-06, "loss": 16.5468, "step": 48120 }, { "epoch": 0.8702629277123956, "grad_norm": 43.3125, "learning_rate": 9.864021648047456e-06, "loss": 16.6123, "step": 48130 }, { "epoch": 0.8704437427815234, "grad_norm": 43.03125, "learning_rate": 9.863993395740796e-06, "loss": 16.6049, "step": 48140 }, { "epoch": 0.8706245578506513, "grad_norm": 42.03125, "learning_rate": 9.863965143434137e-06, "loss": 16.6403, "step": 48150 }, { "epoch": 0.8708053729197791, "grad_norm": 43.21875, "learning_rate": 9.863936891127477e-06, "loss": 16.3105, "step": 48160 }, { "epoch": 0.870986187988907, "grad_norm": 44.1875, "learning_rate": 9.863908638820816e-06, "loss": 16.4318, "step": 48170 }, { "epoch": 0.8711670030580349, "grad_norm": 44.90625, "learning_rate": 9.863880386514157e-06, "loss": 16.7314, "step": 48180 }, { "epoch": 0.8713478181271627, "grad_norm": 45.25, "learning_rate": 9.863852134207498e-06, "loss": 17.3316, "step": 48190 }, { "epoch": 0.8715286331962906, "grad_norm": 43.28125, "learning_rate": 9.863823881900838e-06, "loss": 16.8433, "step": 48200 }, { "epoch": 0.8717094482654184, "grad_norm": 42.75, "learning_rate": 9.863795629594179e-06, "loss": 16.7235, "step": 48210 }, { "epoch": 0.8718902633345463, "grad_norm": 43.4375, "learning_rate": 9.86376737728752e-06, "loss": 16.7464, "step": 48220 }, { "epoch": 0.8720710784036741, "grad_norm": 44.3125, "learning_rate": 9.86373912498086e-06, "loss": 16.5672, "step": 48230 }, { "epoch": 0.8722518934728021, "grad_norm": 42.1875, "learning_rate": 9.8637108726742e-06, "loss": 16.292, "step": 48240 }, { "epoch": 0.8724327085419299, "grad_norm": 42.5625, "learning_rate": 9.863682620367541e-06, "loss": 16.7786, "step": 48250 }, { "epoch": 0.8726135236110577, "grad_norm": 43.6875, "learning_rate": 9.86365436806088e-06, "loss": 16.6473, "step": 48260 }, { "epoch": 0.8727943386801856, "grad_norm": 42.53125, "learning_rate": 9.863626115754221e-06, "loss": 16.7557, "step": 48270 }, { "epoch": 0.8729751537493134, "grad_norm": 43.34375, "learning_rate": 9.863597863447561e-06, "loss": 17.0484, "step": 48280 }, { "epoch": 0.8731559688184414, "grad_norm": 43.125, "learning_rate": 9.863569611140902e-06, "loss": 16.5089, "step": 48290 }, { "epoch": 0.8733367838875692, "grad_norm": 45.5625, "learning_rate": 9.863541358834243e-06, "loss": 16.8514, "step": 48300 }, { "epoch": 0.873517598956697, "grad_norm": 42.8125, "learning_rate": 9.863513106527583e-06, "loss": 16.7409, "step": 48310 }, { "epoch": 0.8736984140258249, "grad_norm": 43.71875, "learning_rate": 9.863484854220924e-06, "loss": 16.4951, "step": 48320 }, { "epoch": 0.8738792290949527, "grad_norm": 42.53125, "learning_rate": 9.863456601914265e-06, "loss": 17.0963, "step": 48330 }, { "epoch": 0.8740600441640807, "grad_norm": 44.78125, "learning_rate": 9.863428349607605e-06, "loss": 17.1169, "step": 48340 }, { "epoch": 0.8742408592332085, "grad_norm": 44.59375, "learning_rate": 9.863400097300944e-06, "loss": 16.532, "step": 48350 }, { "epoch": 0.8744216743023363, "grad_norm": 42.6875, "learning_rate": 9.863371844994285e-06, "loss": 16.4468, "step": 48360 }, { "epoch": 0.8746024893714642, "grad_norm": 42.875, "learning_rate": 9.863343592687625e-06, "loss": 16.5803, "step": 48370 }, { "epoch": 0.874783304440592, "grad_norm": 40.125, "learning_rate": 9.863315340380966e-06, "loss": 16.6159, "step": 48380 }, { "epoch": 0.87496411950972, "grad_norm": 42.1875, "learning_rate": 9.863287088074307e-06, "loss": 16.6872, "step": 48390 }, { "epoch": 0.8751449345788478, "grad_norm": 41.84375, "learning_rate": 9.863258835767647e-06, "loss": 16.4338, "step": 48400 }, { "epoch": 0.8753257496479757, "grad_norm": 43.0625, "learning_rate": 9.863230583460988e-06, "loss": 16.7224, "step": 48410 }, { "epoch": 0.8755065647171035, "grad_norm": 44.21875, "learning_rate": 9.863202331154329e-06, "loss": 16.6712, "step": 48420 }, { "epoch": 0.8756873797862313, "grad_norm": 44.03125, "learning_rate": 9.863174078847667e-06, "loss": 17.0588, "step": 48430 }, { "epoch": 0.8758681948553593, "grad_norm": 41.5625, "learning_rate": 9.863145826541008e-06, "loss": 16.8368, "step": 48440 }, { "epoch": 0.8760490099244871, "grad_norm": 42.28125, "learning_rate": 9.863117574234349e-06, "loss": 16.9934, "step": 48450 }, { "epoch": 0.876229824993615, "grad_norm": 45.53125, "learning_rate": 9.86308932192769e-06, "loss": 17.0267, "step": 48460 }, { "epoch": 0.8764106400627428, "grad_norm": 46.5625, "learning_rate": 9.86306106962103e-06, "loss": 16.648, "step": 48470 }, { "epoch": 0.8765914551318706, "grad_norm": 44.625, "learning_rate": 9.86303281731437e-06, "loss": 16.7814, "step": 48480 }, { "epoch": 0.8767722702009986, "grad_norm": 43.25, "learning_rate": 9.863004565007711e-06, "loss": 16.8009, "step": 48490 }, { "epoch": 0.8769530852701264, "grad_norm": 43.625, "learning_rate": 9.862976312701052e-06, "loss": 16.8888, "step": 48500 }, { "epoch": 0.8771339003392543, "grad_norm": 44.46875, "learning_rate": 9.862948060394392e-06, "loss": 16.9263, "step": 48510 }, { "epoch": 0.8773147154083821, "grad_norm": 43.1875, "learning_rate": 9.862919808087731e-06, "loss": 16.7748, "step": 48520 }, { "epoch": 0.8774955304775099, "grad_norm": 46.65625, "learning_rate": 9.862891555781072e-06, "loss": 16.5806, "step": 48530 }, { "epoch": 0.8776763455466379, "grad_norm": 41.375, "learning_rate": 9.862863303474413e-06, "loss": 16.629, "step": 48540 }, { "epoch": 0.8778571606157657, "grad_norm": 44.15625, "learning_rate": 9.862835051167753e-06, "loss": 17.0033, "step": 48550 }, { "epoch": 0.8780379756848936, "grad_norm": 44.46875, "learning_rate": 9.862806798861094e-06, "loss": 17.1855, "step": 48560 }, { "epoch": 0.8782187907540214, "grad_norm": 43.8125, "learning_rate": 9.862778546554434e-06, "loss": 16.6759, "step": 48570 }, { "epoch": 0.8783996058231494, "grad_norm": 41.90625, "learning_rate": 9.862750294247775e-06, "loss": 16.7433, "step": 48580 }, { "epoch": 0.8785804208922772, "grad_norm": 44.28125, "learning_rate": 9.862722041941116e-06, "loss": 16.637, "step": 48590 }, { "epoch": 0.878761235961405, "grad_norm": 42.40625, "learning_rate": 9.862693789634455e-06, "loss": 16.9434, "step": 48600 }, { "epoch": 0.8789420510305329, "grad_norm": 42.78125, "learning_rate": 9.862665537327795e-06, "loss": 16.7419, "step": 48610 }, { "epoch": 0.8791228660996607, "grad_norm": 44.90625, "learning_rate": 9.862637285021136e-06, "loss": 16.7121, "step": 48620 }, { "epoch": 0.8793036811687887, "grad_norm": 41.40625, "learning_rate": 9.862609032714476e-06, "loss": 17.281, "step": 48630 }, { "epoch": 0.8794844962379165, "grad_norm": 45.46875, "learning_rate": 9.862580780407817e-06, "loss": 17.0728, "step": 48640 }, { "epoch": 0.8796653113070443, "grad_norm": 45.40625, "learning_rate": 9.862552528101158e-06, "loss": 17.1695, "step": 48650 }, { "epoch": 0.8798461263761722, "grad_norm": 43.34375, "learning_rate": 9.862524275794498e-06, "loss": 16.7729, "step": 48660 }, { "epoch": 0.8800269414453, "grad_norm": 42.59375, "learning_rate": 9.862496023487839e-06, "loss": 16.5975, "step": 48670 }, { "epoch": 0.880207756514428, "grad_norm": 41.96875, "learning_rate": 9.86246777118118e-06, "loss": 17.0676, "step": 48680 }, { "epoch": 0.8803885715835558, "grad_norm": 43.625, "learning_rate": 9.862439518874519e-06, "loss": 16.4909, "step": 48690 }, { "epoch": 0.8805693866526836, "grad_norm": 45.96875, "learning_rate": 9.86241126656786e-06, "loss": 16.2651, "step": 48700 }, { "epoch": 0.8807502017218115, "grad_norm": 43.59375, "learning_rate": 9.8623830142612e-06, "loss": 16.8944, "step": 48710 }, { "epoch": 0.8809310167909393, "grad_norm": 43.84375, "learning_rate": 9.86235476195454e-06, "loss": 16.936, "step": 48720 }, { "epoch": 0.8811118318600673, "grad_norm": 43.1875, "learning_rate": 9.862326509647881e-06, "loss": 16.6326, "step": 48730 }, { "epoch": 0.8812926469291951, "grad_norm": 42.65625, "learning_rate": 9.862298257341222e-06, "loss": 16.9422, "step": 48740 }, { "epoch": 0.881473461998323, "grad_norm": 41.375, "learning_rate": 9.862270005034562e-06, "loss": 16.3952, "step": 48750 }, { "epoch": 0.8816542770674508, "grad_norm": 43.0, "learning_rate": 9.862241752727903e-06, "loss": 16.612, "step": 48760 }, { "epoch": 0.8818350921365786, "grad_norm": 44.34375, "learning_rate": 9.862213500421244e-06, "loss": 17.0378, "step": 48770 }, { "epoch": 0.8820159072057066, "grad_norm": 44.09375, "learning_rate": 9.862185248114582e-06, "loss": 16.5489, "step": 48780 }, { "epoch": 0.8821967222748344, "grad_norm": 43.53125, "learning_rate": 9.862156995807923e-06, "loss": 16.7768, "step": 48790 }, { "epoch": 0.8823775373439623, "grad_norm": 47.0, "learning_rate": 9.862128743501264e-06, "loss": 16.9206, "step": 48800 }, { "epoch": 0.8825583524130901, "grad_norm": 39.8125, "learning_rate": 9.862100491194604e-06, "loss": 16.4764, "step": 48810 }, { "epoch": 0.8827391674822179, "grad_norm": 41.875, "learning_rate": 9.862072238887945e-06, "loss": 16.5945, "step": 48820 }, { "epoch": 0.8829199825513458, "grad_norm": 45.625, "learning_rate": 9.862043986581286e-06, "loss": 16.5602, "step": 48830 }, { "epoch": 0.8831007976204737, "grad_norm": 42.0, "learning_rate": 9.862015734274626e-06, "loss": 16.9655, "step": 48840 }, { "epoch": 0.8832816126896016, "grad_norm": 44.21875, "learning_rate": 9.861987481967967e-06, "loss": 16.6877, "step": 48850 }, { "epoch": 0.8834624277587294, "grad_norm": 43.375, "learning_rate": 9.861959229661306e-06, "loss": 16.6978, "step": 48860 }, { "epoch": 0.8836432428278572, "grad_norm": 44.3125, "learning_rate": 9.861930977354646e-06, "loss": 16.9109, "step": 48870 }, { "epoch": 0.8838240578969851, "grad_norm": 46.0625, "learning_rate": 9.861902725047987e-06, "loss": 16.019, "step": 48880 }, { "epoch": 0.884004872966113, "grad_norm": 45.90625, "learning_rate": 9.861874472741328e-06, "loss": 17.0793, "step": 48890 }, { "epoch": 0.8841856880352409, "grad_norm": 44.84375, "learning_rate": 9.861846220434668e-06, "loss": 16.832, "step": 48900 }, { "epoch": 0.8843665031043687, "grad_norm": 43.21875, "learning_rate": 9.861817968128009e-06, "loss": 16.9307, "step": 48910 }, { "epoch": 0.8845473181734966, "grad_norm": 43.21875, "learning_rate": 9.86178971582135e-06, "loss": 16.1817, "step": 48920 }, { "epoch": 0.8847281332426244, "grad_norm": 44.1875, "learning_rate": 9.86176146351469e-06, "loss": 17.1689, "step": 48930 }, { "epoch": 0.8849089483117523, "grad_norm": 42.375, "learning_rate": 9.86173321120803e-06, "loss": 16.9108, "step": 48940 }, { "epoch": 0.8850897633808802, "grad_norm": 42.6875, "learning_rate": 9.86170495890137e-06, "loss": 16.6475, "step": 48950 }, { "epoch": 0.885270578450008, "grad_norm": 42.71875, "learning_rate": 9.86167670659471e-06, "loss": 16.5359, "step": 48960 }, { "epoch": 0.8854513935191359, "grad_norm": 43.9375, "learning_rate": 9.861648454288051e-06, "loss": 16.8834, "step": 48970 }, { "epoch": 0.8856322085882637, "grad_norm": 43.0625, "learning_rate": 9.861620201981391e-06, "loss": 16.5986, "step": 48980 }, { "epoch": 0.8858130236573916, "grad_norm": 47.15625, "learning_rate": 9.861591949674732e-06, "loss": 16.5972, "step": 48990 }, { "epoch": 0.8859938387265195, "grad_norm": 43.8125, "learning_rate": 9.861563697368073e-06, "loss": 16.8942, "step": 49000 }, { "epoch": 0.8861746537956473, "grad_norm": 45.34375, "learning_rate": 9.861535445061413e-06, "loss": 16.5526, "step": 49010 }, { "epoch": 0.8863554688647752, "grad_norm": 41.34375, "learning_rate": 9.861507192754754e-06, "loss": 16.7797, "step": 49020 }, { "epoch": 0.886536283933903, "grad_norm": 43.28125, "learning_rate": 9.861478940448093e-06, "loss": 16.9787, "step": 49030 }, { "epoch": 0.8867170990030309, "grad_norm": 42.6875, "learning_rate": 9.861450688141434e-06, "loss": 16.6541, "step": 49040 }, { "epoch": 0.8868979140721588, "grad_norm": 43.90625, "learning_rate": 9.861422435834774e-06, "loss": 16.7543, "step": 49050 }, { "epoch": 0.8870787291412866, "grad_norm": 43.0, "learning_rate": 9.861394183528115e-06, "loss": 16.7508, "step": 49060 }, { "epoch": 0.8872595442104145, "grad_norm": 40.3125, "learning_rate": 9.861365931221455e-06, "loss": 16.7637, "step": 49070 }, { "epoch": 0.8874403592795423, "grad_norm": 40.28125, "learning_rate": 9.861337678914796e-06, "loss": 16.0263, "step": 49080 }, { "epoch": 0.8876211743486703, "grad_norm": 42.625, "learning_rate": 9.861309426608137e-06, "loss": 16.8237, "step": 49090 }, { "epoch": 0.8878019894177981, "grad_norm": 40.6875, "learning_rate": 9.861281174301477e-06, "loss": 17.5541, "step": 49100 }, { "epoch": 0.8879828044869259, "grad_norm": 44.96875, "learning_rate": 9.861252921994818e-06, "loss": 17.1185, "step": 49110 }, { "epoch": 0.8881636195560538, "grad_norm": 42.9375, "learning_rate": 9.861224669688157e-06, "loss": 16.7611, "step": 49120 }, { "epoch": 0.8883444346251816, "grad_norm": 42.84375, "learning_rate": 9.861196417381497e-06, "loss": 16.2662, "step": 49130 }, { "epoch": 0.8885252496943096, "grad_norm": 43.03125, "learning_rate": 9.861168165074838e-06, "loss": 16.8431, "step": 49140 }, { "epoch": 0.8887060647634374, "grad_norm": 43.09375, "learning_rate": 9.861139912768179e-06, "loss": 16.5856, "step": 49150 }, { "epoch": 0.8888868798325652, "grad_norm": 43.0625, "learning_rate": 9.86111166046152e-06, "loss": 17.3069, "step": 49160 }, { "epoch": 0.8890676949016931, "grad_norm": 43.5, "learning_rate": 9.86108340815486e-06, "loss": 16.948, "step": 49170 }, { "epoch": 0.8892485099708209, "grad_norm": 44.25, "learning_rate": 9.8610551558482e-06, "loss": 16.3898, "step": 49180 }, { "epoch": 0.8894293250399489, "grad_norm": 42.78125, "learning_rate": 9.861026903541541e-06, "loss": 16.5265, "step": 49190 }, { "epoch": 0.8896101401090767, "grad_norm": 45.71875, "learning_rate": 9.860998651234882e-06, "loss": 16.7786, "step": 49200 }, { "epoch": 0.8897909551782045, "grad_norm": 45.84375, "learning_rate": 9.86097039892822e-06, "loss": 16.3813, "step": 49210 }, { "epoch": 0.8899717702473324, "grad_norm": 41.21875, "learning_rate": 9.860942146621561e-06, "loss": 16.3776, "step": 49220 }, { "epoch": 0.8901525853164602, "grad_norm": 42.75, "learning_rate": 9.860913894314902e-06, "loss": 16.5008, "step": 49230 }, { "epoch": 0.8903334003855882, "grad_norm": 44.0, "learning_rate": 9.860885642008243e-06, "loss": 16.4967, "step": 49240 }, { "epoch": 0.890514215454716, "grad_norm": 42.09375, "learning_rate": 9.860857389701583e-06, "loss": 16.8697, "step": 49250 }, { "epoch": 0.8906950305238439, "grad_norm": 43.3125, "learning_rate": 9.860829137394924e-06, "loss": 16.6238, "step": 49260 }, { "epoch": 0.8908758455929717, "grad_norm": 44.28125, "learning_rate": 9.860800885088264e-06, "loss": 16.7561, "step": 49270 }, { "epoch": 0.8910566606620995, "grad_norm": 44.8125, "learning_rate": 9.860772632781605e-06, "loss": 17.2425, "step": 49280 }, { "epoch": 0.8912374757312275, "grad_norm": 43.90625, "learning_rate": 9.860744380474944e-06, "loss": 16.5298, "step": 49290 }, { "epoch": 0.8914182908003553, "grad_norm": 44.5625, "learning_rate": 9.860716128168285e-06, "loss": 16.7332, "step": 49300 }, { "epoch": 0.8915991058694832, "grad_norm": 44.5, "learning_rate": 9.860687875861625e-06, "loss": 17.0327, "step": 49310 }, { "epoch": 0.891779920938611, "grad_norm": 43.3125, "learning_rate": 9.860659623554966e-06, "loss": 16.7635, "step": 49320 }, { "epoch": 0.8919607360077388, "grad_norm": 42.96875, "learning_rate": 9.860631371248307e-06, "loss": 16.7297, "step": 49330 }, { "epoch": 0.8921415510768668, "grad_norm": 44.96875, "learning_rate": 9.860603118941647e-06, "loss": 16.9263, "step": 49340 }, { "epoch": 0.8923223661459946, "grad_norm": 43.96875, "learning_rate": 9.860574866634988e-06, "loss": 17.077, "step": 49350 }, { "epoch": 0.8925031812151225, "grad_norm": 43.1875, "learning_rate": 9.860546614328328e-06, "loss": 16.648, "step": 49360 }, { "epoch": 0.8926839962842503, "grad_norm": 42.0625, "learning_rate": 9.860518362021669e-06, "loss": 16.6563, "step": 49370 }, { "epoch": 0.8928648113533781, "grad_norm": 42.15625, "learning_rate": 9.860490109715008e-06, "loss": 16.6227, "step": 49380 }, { "epoch": 0.8930456264225061, "grad_norm": 44.0625, "learning_rate": 9.860461857408349e-06, "loss": 16.5707, "step": 49390 }, { "epoch": 0.8932264414916339, "grad_norm": 39.40625, "learning_rate": 9.86043360510169e-06, "loss": 17.3028, "step": 49400 }, { "epoch": 0.8934072565607618, "grad_norm": 44.25, "learning_rate": 9.86040535279503e-06, "loss": 16.9679, "step": 49410 }, { "epoch": 0.8935880716298896, "grad_norm": 46.0, "learning_rate": 9.86037710048837e-06, "loss": 16.7102, "step": 49420 }, { "epoch": 0.8937688866990176, "grad_norm": 44.65625, "learning_rate": 9.860348848181711e-06, "loss": 16.5409, "step": 49430 }, { "epoch": 0.8939497017681454, "grad_norm": 43.84375, "learning_rate": 9.860320595875052e-06, "loss": 16.6154, "step": 49440 }, { "epoch": 0.8941305168372732, "grad_norm": 44.125, "learning_rate": 9.860292343568392e-06, "loss": 16.6337, "step": 49450 }, { "epoch": 0.8943113319064011, "grad_norm": 43.0, "learning_rate": 9.860264091261731e-06, "loss": 16.6019, "step": 49460 }, { "epoch": 0.8944921469755289, "grad_norm": 41.84375, "learning_rate": 9.860235838955072e-06, "loss": 16.6053, "step": 49470 }, { "epoch": 0.8946729620446568, "grad_norm": 44.65625, "learning_rate": 9.860207586648412e-06, "loss": 16.5996, "step": 49480 }, { "epoch": 0.8948537771137847, "grad_norm": 44.25, "learning_rate": 9.860179334341753e-06, "loss": 16.5798, "step": 49490 }, { "epoch": 0.8950345921829125, "grad_norm": 43.5, "learning_rate": 9.860151082035094e-06, "loss": 16.9582, "step": 49500 }, { "epoch": 0.8952154072520404, "grad_norm": 41.96875, "learning_rate": 9.860122829728434e-06, "loss": 16.8579, "step": 49510 }, { "epoch": 0.8953962223211682, "grad_norm": 45.125, "learning_rate": 9.860094577421775e-06, "loss": 16.5535, "step": 49520 }, { "epoch": 0.8955770373902961, "grad_norm": 43.875, "learning_rate": 9.860066325115116e-06, "loss": 17.1519, "step": 49530 }, { "epoch": 0.895757852459424, "grad_norm": 42.375, "learning_rate": 9.860038072808456e-06, "loss": 16.4699, "step": 49540 }, { "epoch": 0.8959386675285518, "grad_norm": 42.78125, "learning_rate": 9.860009820501795e-06, "loss": 16.67, "step": 49550 }, { "epoch": 0.8961194825976797, "grad_norm": 45.5625, "learning_rate": 9.859981568195136e-06, "loss": 17.0471, "step": 49560 }, { "epoch": 0.8963002976668075, "grad_norm": 43.8125, "learning_rate": 9.859953315888476e-06, "loss": 17.0289, "step": 49570 }, { "epoch": 0.8964811127359354, "grad_norm": 46.0625, "learning_rate": 9.859925063581817e-06, "loss": 16.8848, "step": 49580 }, { "epoch": 0.8966619278050633, "grad_norm": 44.78125, "learning_rate": 9.859896811275158e-06, "loss": 17.0249, "step": 49590 }, { "epoch": 0.8968427428741912, "grad_norm": 42.03125, "learning_rate": 9.859868558968497e-06, "loss": 16.5212, "step": 49600 }, { "epoch": 0.897023557943319, "grad_norm": 43.34375, "learning_rate": 9.859840306661839e-06, "loss": 16.6439, "step": 49610 }, { "epoch": 0.8972043730124468, "grad_norm": 42.3125, "learning_rate": 9.85981205435518e-06, "loss": 16.4337, "step": 49620 }, { "epoch": 0.8973851880815747, "grad_norm": 45.1875, "learning_rate": 9.85978380204852e-06, "loss": 16.9367, "step": 49630 }, { "epoch": 0.8975660031507026, "grad_norm": 45.3125, "learning_rate": 9.859755549741859e-06, "loss": 17.0094, "step": 49640 }, { "epoch": 0.8977468182198305, "grad_norm": 48.46875, "learning_rate": 9.8597272974352e-06, "loss": 17.0798, "step": 49650 }, { "epoch": 0.8979276332889583, "grad_norm": 46.09375, "learning_rate": 9.85969904512854e-06, "loss": 16.8546, "step": 49660 }, { "epoch": 0.8981084483580861, "grad_norm": 42.65625, "learning_rate": 9.859670792821881e-06, "loss": 16.4153, "step": 49670 }, { "epoch": 0.898289263427214, "grad_norm": 46.1875, "learning_rate": 9.859642540515222e-06, "loss": 16.5445, "step": 49680 }, { "epoch": 0.8984700784963419, "grad_norm": 42.09375, "learning_rate": 9.859614288208562e-06, "loss": 16.6849, "step": 49690 }, { "epoch": 0.8986508935654698, "grad_norm": 42.90625, "learning_rate": 9.859586035901903e-06, "loss": 16.7131, "step": 49700 }, { "epoch": 0.8988317086345976, "grad_norm": 41.15625, "learning_rate": 9.859557783595243e-06, "loss": 16.6575, "step": 49710 }, { "epoch": 0.8990125237037254, "grad_norm": 43.46875, "learning_rate": 9.859529531288582e-06, "loss": 16.6249, "step": 49720 }, { "epoch": 0.8991933387728533, "grad_norm": 44.34375, "learning_rate": 9.859501278981923e-06, "loss": 16.9034, "step": 49730 }, { "epoch": 0.8993741538419812, "grad_norm": 43.46875, "learning_rate": 9.859473026675264e-06, "loss": 16.7501, "step": 49740 }, { "epoch": 0.8995549689111091, "grad_norm": 43.40625, "learning_rate": 9.859444774368604e-06, "loss": 16.6332, "step": 49750 }, { "epoch": 0.8997357839802369, "grad_norm": 44.1875, "learning_rate": 9.859416522061945e-06, "loss": 16.6388, "step": 49760 }, { "epoch": 0.8999165990493648, "grad_norm": 44.75, "learning_rate": 9.859388269755284e-06, "loss": 16.5714, "step": 49770 }, { "epoch": 0.9000974141184926, "grad_norm": 44.65625, "learning_rate": 9.859360017448626e-06, "loss": 16.5571, "step": 49780 }, { "epoch": 0.9002782291876205, "grad_norm": 45.5625, "learning_rate": 9.859331765141967e-06, "loss": 16.8097, "step": 49790 }, { "epoch": 0.9004590442567484, "grad_norm": 39.1875, "learning_rate": 9.859303512835307e-06, "loss": 17.1475, "step": 49800 }, { "epoch": 0.9006398593258762, "grad_norm": 43.875, "learning_rate": 9.859275260528646e-06, "loss": 17.1851, "step": 49810 }, { "epoch": 0.9008206743950041, "grad_norm": 41.71875, "learning_rate": 9.859247008221987e-06, "loss": 16.6011, "step": 49820 }, { "epoch": 0.9010014894641319, "grad_norm": 44.875, "learning_rate": 9.859218755915327e-06, "loss": 16.3234, "step": 49830 }, { "epoch": 0.9011823045332598, "grad_norm": 41.71875, "learning_rate": 9.859190503608668e-06, "loss": 16.7468, "step": 49840 }, { "epoch": 0.9013631196023877, "grad_norm": 49.09375, "learning_rate": 9.859162251302009e-06, "loss": 17.3395, "step": 49850 }, { "epoch": 0.9015439346715155, "grad_norm": 44.03125, "learning_rate": 9.859133998995348e-06, "loss": 16.6495, "step": 49860 }, { "epoch": 0.9017247497406434, "grad_norm": 44.53125, "learning_rate": 9.85910574668869e-06, "loss": 16.7057, "step": 49870 }, { "epoch": 0.9019055648097712, "grad_norm": 44.625, "learning_rate": 9.85907749438203e-06, "loss": 16.5376, "step": 49880 }, { "epoch": 0.9020863798788991, "grad_norm": 44.96875, "learning_rate": 9.85904924207537e-06, "loss": 17.2714, "step": 49890 }, { "epoch": 0.902267194948027, "grad_norm": 46.78125, "learning_rate": 9.85902098976871e-06, "loss": 16.6724, "step": 49900 }, { "epoch": 0.9024480100171548, "grad_norm": 43.875, "learning_rate": 9.85899273746205e-06, "loss": 16.9802, "step": 49910 }, { "epoch": 0.9026288250862827, "grad_norm": 45.1875, "learning_rate": 9.858964485155391e-06, "loss": 17.1209, "step": 49920 }, { "epoch": 0.9028096401554105, "grad_norm": 43.71875, "learning_rate": 9.858936232848732e-06, "loss": 16.8002, "step": 49930 }, { "epoch": 0.9029904552245384, "grad_norm": 43.125, "learning_rate": 9.858907980542073e-06, "loss": 16.709, "step": 49940 }, { "epoch": 0.9031712702936663, "grad_norm": 45.96875, "learning_rate": 9.858879728235412e-06, "loss": 16.9123, "step": 49950 }, { "epoch": 0.9033520853627941, "grad_norm": 46.0625, "learning_rate": 9.858851475928754e-06, "loss": 16.4468, "step": 49960 }, { "epoch": 0.903532900431922, "grad_norm": 43.34375, "learning_rate": 9.858823223622094e-06, "loss": 16.6946, "step": 49970 }, { "epoch": 0.9037137155010498, "grad_norm": 43.71875, "learning_rate": 9.858794971315433e-06, "loss": 16.6168, "step": 49980 }, { "epoch": 0.9038945305701778, "grad_norm": 43.5, "learning_rate": 9.858766719008774e-06, "loss": 16.8038, "step": 49990 }, { "epoch": 0.9040753456393056, "grad_norm": 41.90625, "learning_rate": 9.858738466702115e-06, "loss": 16.6773, "step": 50000 }, { "epoch": 0.9040753456393056, "eval_loss": 2.0931339263916016, "eval_runtime": 229.7481, "eval_samples_per_second": 3160.239, "eval_steps_per_second": 49.38, "step": 50000 }, { "epoch": 0.9042561607084334, "grad_norm": 42.5, "learning_rate": 9.858710214395455e-06, "loss": 16.8778, "step": 50010 }, { "epoch": 0.9044369757775613, "grad_norm": 45.40625, "learning_rate": 9.858681962088796e-06, "loss": 16.5687, "step": 50020 }, { "epoch": 0.9046177908466891, "grad_norm": 44.34375, "learning_rate": 9.858653709782135e-06, "loss": 16.6903, "step": 50030 }, { "epoch": 0.9047986059158171, "grad_norm": 44.15625, "learning_rate": 9.858625457475477e-06, "loss": 16.3654, "step": 50040 }, { "epoch": 0.9049794209849449, "grad_norm": 45.40625, "learning_rate": 9.858597205168818e-06, "loss": 16.9448, "step": 50050 }, { "epoch": 0.9051602360540727, "grad_norm": 41.9375, "learning_rate": 9.858568952862158e-06, "loss": 17.0241, "step": 50060 }, { "epoch": 0.9053410511232006, "grad_norm": 43.3125, "learning_rate": 9.858540700555497e-06, "loss": 17.1035, "step": 50070 }, { "epoch": 0.9055218661923284, "grad_norm": 44.34375, "learning_rate": 9.858512448248838e-06, "loss": 17.3552, "step": 50080 }, { "epoch": 0.9057026812614564, "grad_norm": 44.125, "learning_rate": 9.858484195942179e-06, "loss": 16.1245, "step": 50090 }, { "epoch": 0.9058834963305842, "grad_norm": 41.65625, "learning_rate": 9.85845594363552e-06, "loss": 16.7964, "step": 50100 }, { "epoch": 0.906064311399712, "grad_norm": 46.71875, "learning_rate": 9.85842769132886e-06, "loss": 16.774, "step": 50110 }, { "epoch": 0.9062451264688399, "grad_norm": 45.4375, "learning_rate": 9.858399439022199e-06, "loss": 16.5967, "step": 50120 }, { "epoch": 0.9064259415379677, "grad_norm": 45.03125, "learning_rate": 9.858371186715541e-06, "loss": 16.5357, "step": 50130 }, { "epoch": 0.9066067566070957, "grad_norm": 43.90625, "learning_rate": 9.858342934408882e-06, "loss": 16.9263, "step": 50140 }, { "epoch": 0.9067875716762235, "grad_norm": 45.0625, "learning_rate": 9.85831468210222e-06, "loss": 16.9574, "step": 50150 }, { "epoch": 0.9069683867453514, "grad_norm": 42.34375, "learning_rate": 9.858286429795561e-06, "loss": 16.55, "step": 50160 }, { "epoch": 0.9071492018144792, "grad_norm": 43.8125, "learning_rate": 9.858258177488902e-06, "loss": 16.7054, "step": 50170 }, { "epoch": 0.907330016883607, "grad_norm": 43.5625, "learning_rate": 9.858229925182242e-06, "loss": 16.5315, "step": 50180 }, { "epoch": 0.907510831952735, "grad_norm": 46.71875, "learning_rate": 9.858201672875583e-06, "loss": 17.2083, "step": 50190 }, { "epoch": 0.9076916470218628, "grad_norm": 45.96875, "learning_rate": 9.858173420568922e-06, "loss": 16.9714, "step": 50200 }, { "epoch": 0.9078724620909907, "grad_norm": 45.625, "learning_rate": 9.858145168262263e-06, "loss": 16.5915, "step": 50210 }, { "epoch": 0.9080532771601185, "grad_norm": 43.3125, "learning_rate": 9.858116915955605e-06, "loss": 16.7036, "step": 50220 }, { "epoch": 0.9082340922292463, "grad_norm": 46.09375, "learning_rate": 9.858088663648946e-06, "loss": 16.9555, "step": 50230 }, { "epoch": 0.9084149072983743, "grad_norm": 42.9375, "learning_rate": 9.858060411342284e-06, "loss": 16.5951, "step": 50240 }, { "epoch": 0.9085957223675021, "grad_norm": 43.15625, "learning_rate": 9.858032159035625e-06, "loss": 16.8463, "step": 50250 }, { "epoch": 0.90877653743663, "grad_norm": 42.0, "learning_rate": 9.858003906728966e-06, "loss": 16.7482, "step": 50260 }, { "epoch": 0.9089573525057578, "grad_norm": 42.90625, "learning_rate": 9.857975654422306e-06, "loss": 16.4641, "step": 50270 }, { "epoch": 0.9091381675748856, "grad_norm": 43.4375, "learning_rate": 9.857947402115647e-06, "loss": 17.0, "step": 50280 }, { "epoch": 0.9093189826440136, "grad_norm": 42.125, "learning_rate": 9.857919149808986e-06, "loss": 16.7016, "step": 50290 }, { "epoch": 0.9094997977131414, "grad_norm": 45.3125, "learning_rate": 9.857890897502327e-06, "loss": 16.5641, "step": 50300 }, { "epoch": 0.9096806127822693, "grad_norm": 41.78125, "learning_rate": 9.857862645195669e-06, "loss": 16.8491, "step": 50310 }, { "epoch": 0.9098614278513971, "grad_norm": 43.75, "learning_rate": 9.857834392889008e-06, "loss": 16.7857, "step": 50320 }, { "epoch": 0.910042242920525, "grad_norm": 42.21875, "learning_rate": 9.857806140582348e-06, "loss": 16.7927, "step": 50330 }, { "epoch": 0.9102230579896529, "grad_norm": 42.90625, "learning_rate": 9.857777888275689e-06, "loss": 17.1758, "step": 50340 }, { "epoch": 0.9104038730587807, "grad_norm": 41.0, "learning_rate": 9.85774963596903e-06, "loss": 16.2274, "step": 50350 }, { "epoch": 0.9105846881279086, "grad_norm": 44.1875, "learning_rate": 9.85772138366237e-06, "loss": 16.6145, "step": 50360 }, { "epoch": 0.9107655031970364, "grad_norm": 42.9375, "learning_rate": 9.85769313135571e-06, "loss": 16.3565, "step": 50370 }, { "epoch": 0.9109463182661643, "grad_norm": 44.09375, "learning_rate": 9.85766487904905e-06, "loss": 16.6859, "step": 50380 }, { "epoch": 0.9111271333352922, "grad_norm": 44.3125, "learning_rate": 9.85763662674239e-06, "loss": 16.6597, "step": 50390 }, { "epoch": 0.91130794840442, "grad_norm": 44.75, "learning_rate": 9.857608374435733e-06, "loss": 17.1254, "step": 50400 }, { "epoch": 0.9114887634735479, "grad_norm": 44.75, "learning_rate": 9.857580122129072e-06, "loss": 16.8251, "step": 50410 }, { "epoch": 0.9116695785426757, "grad_norm": 42.40625, "learning_rate": 9.857551869822412e-06, "loss": 16.9839, "step": 50420 }, { "epoch": 0.9118503936118036, "grad_norm": 43.875, "learning_rate": 9.857523617515753e-06, "loss": 16.8362, "step": 50430 }, { "epoch": 0.9120312086809315, "grad_norm": 44.8125, "learning_rate": 9.857495365209094e-06, "loss": 16.8484, "step": 50440 }, { "epoch": 0.9122120237500593, "grad_norm": 47.40625, "learning_rate": 9.857467112902434e-06, "loss": 16.4676, "step": 50450 }, { "epoch": 0.9123928388191872, "grad_norm": 43.0, "learning_rate": 9.857438860595773e-06, "loss": 16.7908, "step": 50460 }, { "epoch": 0.912573653888315, "grad_norm": 43.1875, "learning_rate": 9.857410608289114e-06, "loss": 16.5521, "step": 50470 }, { "epoch": 0.9127544689574429, "grad_norm": 41.875, "learning_rate": 9.857382355982456e-06, "loss": 16.5968, "step": 50480 }, { "epoch": 0.9129352840265708, "grad_norm": 46.65625, "learning_rate": 9.857354103675795e-06, "loss": 16.4031, "step": 50490 }, { "epoch": 0.9131160990956987, "grad_norm": 44.15625, "learning_rate": 9.857325851369136e-06, "loss": 17.1271, "step": 50500 }, { "epoch": 0.9132969141648265, "grad_norm": 43.0, "learning_rate": 9.857297599062476e-06, "loss": 16.6066, "step": 50510 }, { "epoch": 0.9134777292339543, "grad_norm": 42.09375, "learning_rate": 9.857269346755817e-06, "loss": 16.6022, "step": 50520 }, { "epoch": 0.9136585443030822, "grad_norm": 43.65625, "learning_rate": 9.857241094449157e-06, "loss": 16.5399, "step": 50530 }, { "epoch": 0.9138393593722101, "grad_norm": 41.21875, "learning_rate": 9.857212842142498e-06, "loss": 16.75, "step": 50540 }, { "epoch": 0.914020174441338, "grad_norm": 44.21875, "learning_rate": 9.857184589835837e-06, "loss": 17.1319, "step": 50550 }, { "epoch": 0.9142009895104658, "grad_norm": 44.46875, "learning_rate": 9.857156337529178e-06, "loss": 16.3081, "step": 50560 }, { "epoch": 0.9143818045795936, "grad_norm": 42.53125, "learning_rate": 9.85712808522252e-06, "loss": 16.4195, "step": 50570 }, { "epoch": 0.9145626196487215, "grad_norm": 47.9375, "learning_rate": 9.857099832915859e-06, "loss": 16.6389, "step": 50580 }, { "epoch": 0.9147434347178494, "grad_norm": 44.40625, "learning_rate": 9.8570715806092e-06, "loss": 16.4462, "step": 50590 }, { "epoch": 0.9149242497869773, "grad_norm": 46.78125, "learning_rate": 9.85704332830254e-06, "loss": 17.0027, "step": 50600 }, { "epoch": 0.9151050648561051, "grad_norm": 48.78125, "learning_rate": 9.85701507599588e-06, "loss": 17.1624, "step": 50610 }, { "epoch": 0.9152858799252329, "grad_norm": 42.5625, "learning_rate": 9.856986823689221e-06, "loss": 17.0643, "step": 50620 }, { "epoch": 0.9154666949943608, "grad_norm": 43.78125, "learning_rate": 9.85695857138256e-06, "loss": 16.2225, "step": 50630 }, { "epoch": 0.9156475100634887, "grad_norm": 45.375, "learning_rate": 9.856930319075901e-06, "loss": 16.9626, "step": 50640 }, { "epoch": 0.9158283251326166, "grad_norm": 40.28125, "learning_rate": 9.856902066769242e-06, "loss": 17.0447, "step": 50650 }, { "epoch": 0.9160091402017444, "grad_norm": 48.125, "learning_rate": 9.856873814462584e-06, "loss": 16.5066, "step": 50660 }, { "epoch": 0.9161899552708723, "grad_norm": 45.6875, "learning_rate": 9.856845562155923e-06, "loss": 16.6187, "step": 50670 }, { "epoch": 0.9163707703400001, "grad_norm": 46.34375, "learning_rate": 9.856817309849263e-06, "loss": 16.5057, "step": 50680 }, { "epoch": 0.916551585409128, "grad_norm": 45.8125, "learning_rate": 9.856789057542604e-06, "loss": 16.7443, "step": 50690 }, { "epoch": 0.9167324004782559, "grad_norm": 41.09375, "learning_rate": 9.856760805235945e-06, "loss": 16.4325, "step": 50700 }, { "epoch": 0.9169132155473837, "grad_norm": 44.65625, "learning_rate": 9.856732552929285e-06, "loss": 16.9035, "step": 50710 }, { "epoch": 0.9170940306165116, "grad_norm": 45.9375, "learning_rate": 9.856704300622624e-06, "loss": 16.5906, "step": 50720 }, { "epoch": 0.9172748456856394, "grad_norm": 46.90625, "learning_rate": 9.856676048315965e-06, "loss": 16.8493, "step": 50730 }, { "epoch": 0.9174556607547673, "grad_norm": 45.84375, "learning_rate": 9.856647796009305e-06, "loss": 16.6537, "step": 50740 }, { "epoch": 0.9176364758238952, "grad_norm": 43.09375, "learning_rate": 9.856619543702646e-06, "loss": 16.8398, "step": 50750 }, { "epoch": 0.917817290893023, "grad_norm": 43.3125, "learning_rate": 9.856591291395987e-06, "loss": 16.7299, "step": 50760 }, { "epoch": 0.9179981059621509, "grad_norm": 41.625, "learning_rate": 9.856563039089327e-06, "loss": 16.2667, "step": 50770 }, { "epoch": 0.9181789210312787, "grad_norm": 43.09375, "learning_rate": 9.856534786782668e-06, "loss": 16.7683, "step": 50780 }, { "epoch": 0.9183597361004066, "grad_norm": 45.15625, "learning_rate": 9.856506534476009e-06, "loss": 16.4613, "step": 50790 }, { "epoch": 0.9185405511695345, "grad_norm": 45.46875, "learning_rate": 9.856478282169347e-06, "loss": 16.8121, "step": 50800 }, { "epoch": 0.9187213662386623, "grad_norm": 44.5, "learning_rate": 9.856450029862688e-06, "loss": 16.7824, "step": 50810 }, { "epoch": 0.9189021813077902, "grad_norm": 46.375, "learning_rate": 9.856421777556029e-06, "loss": 16.6512, "step": 50820 }, { "epoch": 0.919082996376918, "grad_norm": 43.0625, "learning_rate": 9.856393525249371e-06, "loss": 16.9636, "step": 50830 }, { "epoch": 0.919263811446046, "grad_norm": 44.8125, "learning_rate": 9.85636527294271e-06, "loss": 16.3377, "step": 50840 }, { "epoch": 0.9194446265151738, "grad_norm": 40.8125, "learning_rate": 9.85633702063605e-06, "loss": 16.2992, "step": 50850 }, { "epoch": 0.9196254415843016, "grad_norm": 46.09375, "learning_rate": 9.856308768329391e-06, "loss": 16.8767, "step": 50860 }, { "epoch": 0.9198062566534295, "grad_norm": 42.8125, "learning_rate": 9.856280516022732e-06, "loss": 17.1975, "step": 50870 }, { "epoch": 0.9199870717225573, "grad_norm": 46.25, "learning_rate": 9.856252263716072e-06, "loss": 16.8003, "step": 50880 }, { "epoch": 0.9201678867916853, "grad_norm": 42.46875, "learning_rate": 9.856224011409411e-06, "loss": 16.5021, "step": 50890 }, { "epoch": 0.9203487018608131, "grad_norm": 43.28125, "learning_rate": 9.856195759102752e-06, "loss": 16.8396, "step": 50900 }, { "epoch": 0.9205295169299409, "grad_norm": 41.78125, "learning_rate": 9.856167506796093e-06, "loss": 16.3, "step": 50910 }, { "epoch": 0.9207103319990688, "grad_norm": 47.40625, "learning_rate": 9.856139254489433e-06, "loss": 17.3092, "step": 50920 }, { "epoch": 0.9208911470681966, "grad_norm": 43.96875, "learning_rate": 9.856111002182774e-06, "loss": 17.0263, "step": 50930 }, { "epoch": 0.9210719621373246, "grad_norm": 41.5625, "learning_rate": 9.856082749876115e-06, "loss": 16.6091, "step": 50940 }, { "epoch": 0.9212527772064524, "grad_norm": 46.5, "learning_rate": 9.856054497569455e-06, "loss": 17.2658, "step": 50950 }, { "epoch": 0.9214335922755802, "grad_norm": 43.1875, "learning_rate": 9.856026245262796e-06, "loss": 16.9387, "step": 50960 }, { "epoch": 0.9216144073447081, "grad_norm": 42.1875, "learning_rate": 9.855997992956136e-06, "loss": 16.5608, "step": 50970 }, { "epoch": 0.9217952224138359, "grad_norm": 44.375, "learning_rate": 9.855969740649475e-06, "loss": 16.4755, "step": 50980 }, { "epoch": 0.9219760374829639, "grad_norm": 44.625, "learning_rate": 9.855941488342816e-06, "loss": 16.774, "step": 50990 }, { "epoch": 0.9221568525520917, "grad_norm": 46.0, "learning_rate": 9.855913236036157e-06, "loss": 16.2279, "step": 51000 }, { "epoch": 0.9223376676212196, "grad_norm": 43.25, "learning_rate": 9.855884983729497e-06, "loss": 16.9542, "step": 51010 }, { "epoch": 0.9225184826903474, "grad_norm": 46.4375, "learning_rate": 9.855856731422838e-06, "loss": 17.0627, "step": 51020 }, { "epoch": 0.9226992977594752, "grad_norm": 45.15625, "learning_rate": 9.855828479116178e-06, "loss": 16.5087, "step": 51030 }, { "epoch": 0.9228801128286032, "grad_norm": 45.21875, "learning_rate": 9.855800226809519e-06, "loss": 16.333, "step": 51040 }, { "epoch": 0.923060927897731, "grad_norm": 44.90625, "learning_rate": 9.85577197450286e-06, "loss": 16.7786, "step": 51050 }, { "epoch": 0.9232417429668589, "grad_norm": 43.59375, "learning_rate": 9.855743722196199e-06, "loss": 16.6454, "step": 51060 }, { "epoch": 0.9234225580359867, "grad_norm": 44.71875, "learning_rate": 9.85571546988954e-06, "loss": 16.572, "step": 51070 }, { "epoch": 0.9236033731051145, "grad_norm": 41.09375, "learning_rate": 9.85568721758288e-06, "loss": 16.4866, "step": 51080 }, { "epoch": 0.9237841881742425, "grad_norm": 47.84375, "learning_rate": 9.85565896527622e-06, "loss": 16.9147, "step": 51090 }, { "epoch": 0.9239650032433703, "grad_norm": 44.46875, "learning_rate": 9.855630712969561e-06, "loss": 16.818, "step": 51100 }, { "epoch": 0.9241458183124982, "grad_norm": 47.375, "learning_rate": 9.855602460662902e-06, "loss": 16.7599, "step": 51110 }, { "epoch": 0.924326633381626, "grad_norm": 44.21875, "learning_rate": 9.855574208356242e-06, "loss": 16.9495, "step": 51120 }, { "epoch": 0.9245074484507538, "grad_norm": 44.65625, "learning_rate": 9.855545956049583e-06, "loss": 16.7892, "step": 51130 }, { "epoch": 0.9246882635198818, "grad_norm": 44.75, "learning_rate": 9.855517703742924e-06, "loss": 16.4863, "step": 51140 }, { "epoch": 0.9248690785890096, "grad_norm": 46.03125, "learning_rate": 9.855489451436262e-06, "loss": 16.3058, "step": 51150 }, { "epoch": 0.9250498936581375, "grad_norm": 44.1875, "learning_rate": 9.855461199129603e-06, "loss": 16.1729, "step": 51160 }, { "epoch": 0.9252307087272653, "grad_norm": 43.625, "learning_rate": 9.855432946822944e-06, "loss": 16.9115, "step": 51170 }, { "epoch": 0.9254115237963932, "grad_norm": 44.5, "learning_rate": 9.855404694516284e-06, "loss": 16.577, "step": 51180 }, { "epoch": 0.9255923388655211, "grad_norm": 46.84375, "learning_rate": 9.855376442209625e-06, "loss": 16.3869, "step": 51190 }, { "epoch": 0.9257731539346489, "grad_norm": 45.53125, "learning_rate": 9.855348189902966e-06, "loss": 16.6404, "step": 51200 }, { "epoch": 0.9259539690037768, "grad_norm": 45.46875, "learning_rate": 9.855319937596306e-06, "loss": 16.7318, "step": 51210 }, { "epoch": 0.9261347840729046, "grad_norm": 42.0, "learning_rate": 9.855291685289647e-06, "loss": 16.1691, "step": 51220 }, { "epoch": 0.9263155991420325, "grad_norm": 43.40625, "learning_rate": 9.855263432982986e-06, "loss": 16.7121, "step": 51230 }, { "epoch": 0.9264964142111604, "grad_norm": 44.0625, "learning_rate": 9.855235180676326e-06, "loss": 16.653, "step": 51240 }, { "epoch": 0.9266772292802882, "grad_norm": 44.875, "learning_rate": 9.855206928369667e-06, "loss": 16.7853, "step": 51250 }, { "epoch": 0.9268580443494161, "grad_norm": 42.4375, "learning_rate": 9.855178676063008e-06, "loss": 16.5499, "step": 51260 }, { "epoch": 0.9270388594185439, "grad_norm": 41.375, "learning_rate": 9.855150423756348e-06, "loss": 16.4913, "step": 51270 }, { "epoch": 0.9272196744876718, "grad_norm": 43.40625, "learning_rate": 9.855122171449689e-06, "loss": 16.9152, "step": 51280 }, { "epoch": 0.9274004895567997, "grad_norm": 43.53125, "learning_rate": 9.85509391914303e-06, "loss": 16.3459, "step": 51290 }, { "epoch": 0.9275813046259275, "grad_norm": 42.75, "learning_rate": 9.85506566683637e-06, "loss": 16.257, "step": 51300 }, { "epoch": 0.9277621196950554, "grad_norm": 43.0, "learning_rate": 9.85503741452971e-06, "loss": 16.853, "step": 51310 }, { "epoch": 0.9279429347641832, "grad_norm": 48.90625, "learning_rate": 9.85500916222305e-06, "loss": 16.3804, "step": 51320 }, { "epoch": 0.9281237498333111, "grad_norm": 40.75, "learning_rate": 9.85498090991639e-06, "loss": 16.1179, "step": 51330 }, { "epoch": 0.928304564902439, "grad_norm": 45.3125, "learning_rate": 9.854952657609731e-06, "loss": 16.7624, "step": 51340 }, { "epoch": 0.9284853799715669, "grad_norm": 43.78125, "learning_rate": 9.854924405303072e-06, "loss": 16.7898, "step": 51350 }, { "epoch": 0.9286661950406947, "grad_norm": 45.40625, "learning_rate": 9.854896152996412e-06, "loss": 16.8738, "step": 51360 }, { "epoch": 0.9288470101098225, "grad_norm": 43.25, "learning_rate": 9.854867900689753e-06, "loss": 16.4131, "step": 51370 }, { "epoch": 0.9290278251789504, "grad_norm": 44.03125, "learning_rate": 9.854839648383093e-06, "loss": 16.6141, "step": 51380 }, { "epoch": 0.9292086402480783, "grad_norm": 43.28125, "learning_rate": 9.854811396076434e-06, "loss": 16.2441, "step": 51390 }, { "epoch": 0.9293894553172062, "grad_norm": 43.53125, "learning_rate": 9.854783143769775e-06, "loss": 16.764, "step": 51400 }, { "epoch": 0.929570270386334, "grad_norm": 46.9375, "learning_rate": 9.854754891463114e-06, "loss": 17.3107, "step": 51410 }, { "epoch": 0.9297510854554618, "grad_norm": 46.21875, "learning_rate": 9.854726639156454e-06, "loss": 16.2477, "step": 51420 }, { "epoch": 0.9299319005245897, "grad_norm": 43.96875, "learning_rate": 9.854698386849795e-06, "loss": 16.8256, "step": 51430 }, { "epoch": 0.9301127155937176, "grad_norm": 45.75, "learning_rate": 9.854670134543135e-06, "loss": 16.806, "step": 51440 }, { "epoch": 0.9302935306628455, "grad_norm": 44.40625, "learning_rate": 9.854641882236476e-06, "loss": 16.7943, "step": 51450 }, { "epoch": 0.9304743457319733, "grad_norm": 46.4375, "learning_rate": 9.854613629929817e-06, "loss": 16.4803, "step": 51460 }, { "epoch": 0.9306551608011011, "grad_norm": 41.9375, "learning_rate": 9.854585377623157e-06, "loss": 17.0244, "step": 51470 }, { "epoch": 0.930835975870229, "grad_norm": 44.0625, "learning_rate": 9.854557125316498e-06, "loss": 16.4517, "step": 51480 }, { "epoch": 0.9310167909393569, "grad_norm": 44.84375, "learning_rate": 9.854528873009837e-06, "loss": 16.8978, "step": 51490 }, { "epoch": 0.9311976060084848, "grad_norm": 46.6875, "learning_rate": 9.854500620703178e-06, "loss": 16.7435, "step": 51500 }, { "epoch": 0.9313784210776126, "grad_norm": 44.90625, "learning_rate": 9.854472368396518e-06, "loss": 16.402, "step": 51510 }, { "epoch": 0.9315592361467405, "grad_norm": 48.34375, "learning_rate": 9.854444116089859e-06, "loss": 16.7344, "step": 51520 }, { "epoch": 0.9317400512158683, "grad_norm": 44.40625, "learning_rate": 9.8544158637832e-06, "loss": 16.4329, "step": 51530 }, { "epoch": 0.9319208662849962, "grad_norm": 42.4375, "learning_rate": 9.85438761147654e-06, "loss": 16.3157, "step": 51540 }, { "epoch": 0.9321016813541241, "grad_norm": 42.625, "learning_rate": 9.85435935916988e-06, "loss": 16.9422, "step": 51550 }, { "epoch": 0.9322824964232519, "grad_norm": 44.09375, "learning_rate": 9.854331106863221e-06, "loss": 16.4778, "step": 51560 }, { "epoch": 0.9324633114923798, "grad_norm": 44.8125, "learning_rate": 9.854302854556562e-06, "loss": 16.7903, "step": 51570 }, { "epoch": 0.9326441265615076, "grad_norm": 48.03125, "learning_rate": 9.8542746022499e-06, "loss": 16.8277, "step": 51580 }, { "epoch": 0.9328249416306355, "grad_norm": 44.15625, "learning_rate": 9.854246349943241e-06, "loss": 16.7422, "step": 51590 }, { "epoch": 0.9330057566997634, "grad_norm": 42.5, "learning_rate": 9.854218097636582e-06, "loss": 16.8482, "step": 51600 }, { "epoch": 0.9331865717688912, "grad_norm": 45.625, "learning_rate": 9.854189845329923e-06, "loss": 16.5671, "step": 51610 }, { "epoch": 0.9333673868380191, "grad_norm": 42.625, "learning_rate": 9.854161593023263e-06, "loss": 16.6159, "step": 51620 }, { "epoch": 0.9335482019071469, "grad_norm": 44.03125, "learning_rate": 9.854133340716604e-06, "loss": 16.8276, "step": 51630 }, { "epoch": 0.9337290169762748, "grad_norm": 41.96875, "learning_rate": 9.854105088409945e-06, "loss": 16.4779, "step": 51640 }, { "epoch": 0.9339098320454027, "grad_norm": 43.78125, "learning_rate": 9.854076836103285e-06, "loss": 16.5682, "step": 51650 }, { "epoch": 0.9340906471145305, "grad_norm": 42.96875, "learning_rate": 9.854048583796624e-06, "loss": 16.5536, "step": 51660 }, { "epoch": 0.9342714621836584, "grad_norm": 49.0625, "learning_rate": 9.854020331489965e-06, "loss": 16.8541, "step": 51670 }, { "epoch": 0.9344522772527862, "grad_norm": 44.1875, "learning_rate": 9.853992079183305e-06, "loss": 17.2924, "step": 51680 }, { "epoch": 0.9346330923219142, "grad_norm": 45.8125, "learning_rate": 9.853963826876646e-06, "loss": 16.4853, "step": 51690 }, { "epoch": 0.934813907391042, "grad_norm": 43.71875, "learning_rate": 9.853935574569987e-06, "loss": 16.4445, "step": 51700 }, { "epoch": 0.9349947224601698, "grad_norm": 46.5625, "learning_rate": 9.853907322263327e-06, "loss": 16.7954, "step": 51710 }, { "epoch": 0.9351755375292977, "grad_norm": 45.21875, "learning_rate": 9.853879069956668e-06, "loss": 16.6579, "step": 51720 }, { "epoch": 0.9353563525984255, "grad_norm": 46.5, "learning_rate": 9.853850817650008e-06, "loss": 17.1303, "step": 51730 }, { "epoch": 0.9355371676675535, "grad_norm": 44.75, "learning_rate": 9.853822565343349e-06, "loss": 16.8715, "step": 51740 }, { "epoch": 0.9357179827366813, "grad_norm": 43.75, "learning_rate": 9.853794313036688e-06, "loss": 16.7436, "step": 51750 }, { "epoch": 0.9358987978058091, "grad_norm": 45.5, "learning_rate": 9.853766060730029e-06, "loss": 16.4995, "step": 51760 }, { "epoch": 0.936079612874937, "grad_norm": 42.40625, "learning_rate": 9.85373780842337e-06, "loss": 16.6631, "step": 51770 }, { "epoch": 0.9362604279440648, "grad_norm": 41.625, "learning_rate": 9.85370955611671e-06, "loss": 16.1694, "step": 51780 }, { "epoch": 0.9364412430131928, "grad_norm": 43.40625, "learning_rate": 9.85368130381005e-06, "loss": 16.9699, "step": 51790 }, { "epoch": 0.9366220580823206, "grad_norm": 44.78125, "learning_rate": 9.853653051503391e-06, "loss": 16.5178, "step": 51800 }, { "epoch": 0.9368028731514484, "grad_norm": 42.875, "learning_rate": 9.853624799196732e-06, "loss": 16.3057, "step": 51810 }, { "epoch": 0.9369836882205763, "grad_norm": 45.28125, "learning_rate": 9.853596546890072e-06, "loss": 16.699, "step": 51820 }, { "epoch": 0.9371645032897041, "grad_norm": 41.8125, "learning_rate": 9.853568294583413e-06, "loss": 16.8024, "step": 51830 }, { "epoch": 0.9373453183588321, "grad_norm": 41.59375, "learning_rate": 9.853540042276752e-06, "loss": 16.6078, "step": 51840 }, { "epoch": 0.9375261334279599, "grad_norm": 47.21875, "learning_rate": 9.853511789970093e-06, "loss": 16.8264, "step": 51850 }, { "epoch": 0.9377069484970878, "grad_norm": 44.9375, "learning_rate": 9.853483537663433e-06, "loss": 16.8107, "step": 51860 }, { "epoch": 0.9378877635662156, "grad_norm": 43.34375, "learning_rate": 9.853455285356774e-06, "loss": 16.7995, "step": 51870 }, { "epoch": 0.9380685786353434, "grad_norm": 44.875, "learning_rate": 9.853427033050114e-06, "loss": 16.6142, "step": 51880 }, { "epoch": 0.9382493937044714, "grad_norm": 43.6875, "learning_rate": 9.853398780743455e-06, "loss": 16.7819, "step": 51890 }, { "epoch": 0.9384302087735992, "grad_norm": 44.125, "learning_rate": 9.853370528436796e-06, "loss": 16.7242, "step": 51900 }, { "epoch": 0.9386110238427271, "grad_norm": 43.84375, "learning_rate": 9.853342276130136e-06, "loss": 16.6088, "step": 51910 }, { "epoch": 0.9387918389118549, "grad_norm": 47.1875, "learning_rate": 9.853314023823475e-06, "loss": 16.1789, "step": 51920 }, { "epoch": 0.9389726539809827, "grad_norm": 43.9375, "learning_rate": 9.853285771516816e-06, "loss": 16.3855, "step": 51930 }, { "epoch": 0.9391534690501107, "grad_norm": 45.8125, "learning_rate": 9.853257519210156e-06, "loss": 17.0041, "step": 51940 }, { "epoch": 0.9393342841192385, "grad_norm": 41.3125, "learning_rate": 9.853229266903497e-06, "loss": 16.7801, "step": 51950 }, { "epoch": 0.9395150991883664, "grad_norm": 42.84375, "learning_rate": 9.853201014596838e-06, "loss": 16.7844, "step": 51960 }, { "epoch": 0.9396959142574942, "grad_norm": 42.125, "learning_rate": 9.853172762290178e-06, "loss": 16.6697, "step": 51970 }, { "epoch": 0.939876729326622, "grad_norm": 42.625, "learning_rate": 9.853144509983519e-06, "loss": 16.4901, "step": 51980 }, { "epoch": 0.94005754439575, "grad_norm": 44.375, "learning_rate": 9.85311625767686e-06, "loss": 16.4376, "step": 51990 }, { "epoch": 0.9402383594648778, "grad_norm": 41.625, "learning_rate": 9.8530880053702e-06, "loss": 16.743, "step": 52000 }, { "epoch": 0.9404191745340057, "grad_norm": 45.125, "learning_rate": 9.853059753063539e-06, "loss": 16.7142, "step": 52010 }, { "epoch": 0.9405999896031335, "grad_norm": 47.90625, "learning_rate": 9.85303150075688e-06, "loss": 16.6547, "step": 52020 }, { "epoch": 0.9407808046722614, "grad_norm": 44.53125, "learning_rate": 9.85300324845022e-06, "loss": 17.2, "step": 52030 }, { "epoch": 0.9409616197413893, "grad_norm": 46.03125, "learning_rate": 9.852974996143561e-06, "loss": 16.5735, "step": 52040 }, { "epoch": 0.9411424348105171, "grad_norm": 46.0625, "learning_rate": 9.852946743836902e-06, "loss": 16.9435, "step": 52050 }, { "epoch": 0.941323249879645, "grad_norm": 45.84375, "learning_rate": 9.852918491530242e-06, "loss": 16.5768, "step": 52060 }, { "epoch": 0.9415040649487728, "grad_norm": 43.09375, "learning_rate": 9.852890239223583e-06, "loss": 16.8057, "step": 52070 }, { "epoch": 0.9416848800179007, "grad_norm": 44.96875, "learning_rate": 9.852861986916923e-06, "loss": 16.56, "step": 52080 }, { "epoch": 0.9418656950870286, "grad_norm": 43.84375, "learning_rate": 9.852833734610262e-06, "loss": 17.1698, "step": 52090 }, { "epoch": 0.9420465101561564, "grad_norm": 45.8125, "learning_rate": 9.852805482303603e-06, "loss": 16.6837, "step": 52100 }, { "epoch": 0.9422273252252843, "grad_norm": 45.78125, "learning_rate": 9.852777229996944e-06, "loss": 16.8276, "step": 52110 }, { "epoch": 0.9424081402944121, "grad_norm": 42.4375, "learning_rate": 9.852748977690284e-06, "loss": 16.3848, "step": 52120 }, { "epoch": 0.94258895536354, "grad_norm": 43.28125, "learning_rate": 9.852720725383625e-06, "loss": 16.8509, "step": 52130 }, { "epoch": 0.9427697704326679, "grad_norm": 44.1875, "learning_rate": 9.852692473076965e-06, "loss": 16.6207, "step": 52140 }, { "epoch": 0.9429505855017957, "grad_norm": 44.53125, "learning_rate": 9.852664220770306e-06, "loss": 16.405, "step": 52150 }, { "epoch": 0.9431314005709236, "grad_norm": 43.96875, "learning_rate": 9.852635968463647e-06, "loss": 16.8411, "step": 52160 }, { "epoch": 0.9433122156400514, "grad_norm": 45.96875, "learning_rate": 9.852607716156987e-06, "loss": 16.647, "step": 52170 }, { "epoch": 0.9434930307091793, "grad_norm": 47.15625, "learning_rate": 9.852579463850326e-06, "loss": 16.826, "step": 52180 }, { "epoch": 0.9436738457783072, "grad_norm": 42.59375, "learning_rate": 9.852551211543667e-06, "loss": 16.9083, "step": 52190 }, { "epoch": 0.9438546608474351, "grad_norm": 45.46875, "learning_rate": 9.852522959237008e-06, "loss": 17.141, "step": 52200 }, { "epoch": 0.9440354759165629, "grad_norm": 42.875, "learning_rate": 9.852494706930348e-06, "loss": 16.7258, "step": 52210 }, { "epoch": 0.9442162909856907, "grad_norm": 44.34375, "learning_rate": 9.852466454623689e-06, "loss": 16.6493, "step": 52220 }, { "epoch": 0.9443971060548186, "grad_norm": 43.375, "learning_rate": 9.85243820231703e-06, "loss": 16.6304, "step": 52230 }, { "epoch": 0.9445779211239465, "grad_norm": 45.25, "learning_rate": 9.85240995001037e-06, "loss": 16.5853, "step": 52240 }, { "epoch": 0.9447587361930744, "grad_norm": 43.625, "learning_rate": 9.85238169770371e-06, "loss": 17.031, "step": 52250 }, { "epoch": 0.9449395512622022, "grad_norm": 45.625, "learning_rate": 9.852353445397051e-06, "loss": 16.8603, "step": 52260 }, { "epoch": 0.94512036633133, "grad_norm": 43.6875, "learning_rate": 9.85232519309039e-06, "loss": 16.9095, "step": 52270 }, { "epoch": 0.9453011814004579, "grad_norm": 43.53125, "learning_rate": 9.85229694078373e-06, "loss": 16.8246, "step": 52280 }, { "epoch": 0.9454819964695858, "grad_norm": 45.3125, "learning_rate": 9.852268688477071e-06, "loss": 16.8132, "step": 52290 }, { "epoch": 0.9456628115387137, "grad_norm": 48.875, "learning_rate": 9.852240436170412e-06, "loss": 16.8849, "step": 52300 }, { "epoch": 0.9458436266078415, "grad_norm": 44.75, "learning_rate": 9.852212183863753e-06, "loss": 16.3753, "step": 52310 }, { "epoch": 0.9460244416769693, "grad_norm": 42.59375, "learning_rate": 9.852183931557093e-06, "loss": 16.4101, "step": 52320 }, { "epoch": 0.9462052567460972, "grad_norm": 47.6875, "learning_rate": 9.852155679250434e-06, "loss": 16.9043, "step": 52330 }, { "epoch": 0.946386071815225, "grad_norm": 45.78125, "learning_rate": 9.852127426943775e-06, "loss": 16.5976, "step": 52340 }, { "epoch": 0.946566886884353, "grad_norm": 45.0625, "learning_rate": 9.852099174637113e-06, "loss": 16.648, "step": 52350 }, { "epoch": 0.9467477019534808, "grad_norm": 45.46875, "learning_rate": 9.852070922330454e-06, "loss": 16.923, "step": 52360 }, { "epoch": 0.9469285170226087, "grad_norm": 49.625, "learning_rate": 9.852042670023795e-06, "loss": 17.094, "step": 52370 }, { "epoch": 0.9471093320917365, "grad_norm": 45.75, "learning_rate": 9.852014417717135e-06, "loss": 17.2729, "step": 52380 }, { "epoch": 0.9472901471608643, "grad_norm": 44.53125, "learning_rate": 9.851986165410476e-06, "loss": 16.6978, "step": 52390 }, { "epoch": 0.9474709622299923, "grad_norm": 44.34375, "learning_rate": 9.851957913103815e-06, "loss": 16.9399, "step": 52400 }, { "epoch": 0.9476517772991201, "grad_norm": 44.8125, "learning_rate": 9.851929660797157e-06, "loss": 17.1842, "step": 52410 }, { "epoch": 0.947832592368248, "grad_norm": 45.09375, "learning_rate": 9.851901408490498e-06, "loss": 17.0201, "step": 52420 }, { "epoch": 0.9480134074373758, "grad_norm": 46.3125, "learning_rate": 9.851873156183838e-06, "loss": 16.8392, "step": 52430 }, { "epoch": 0.9481942225065036, "grad_norm": 44.6875, "learning_rate": 9.851844903877177e-06, "loss": 16.222, "step": 52440 }, { "epoch": 0.9483750375756316, "grad_norm": 46.53125, "learning_rate": 9.851816651570518e-06, "loss": 16.7396, "step": 52450 }, { "epoch": 0.9485558526447594, "grad_norm": 44.375, "learning_rate": 9.851788399263859e-06, "loss": 16.944, "step": 52460 }, { "epoch": 0.9487366677138873, "grad_norm": 43.8125, "learning_rate": 9.8517601469572e-06, "loss": 16.3753, "step": 52470 }, { "epoch": 0.9489174827830151, "grad_norm": 43.625, "learning_rate": 9.85173189465054e-06, "loss": 16.7334, "step": 52480 }, { "epoch": 0.949098297852143, "grad_norm": 43.4375, "learning_rate": 9.851703642343879e-06, "loss": 16.5398, "step": 52490 }, { "epoch": 0.9492791129212709, "grad_norm": 45.28125, "learning_rate": 9.851675390037221e-06, "loss": 16.5976, "step": 52500 }, { "epoch": 0.9494599279903987, "grad_norm": 42.5, "learning_rate": 9.851647137730562e-06, "loss": 16.487, "step": 52510 }, { "epoch": 0.9496407430595266, "grad_norm": 43.96875, "learning_rate": 9.8516188854239e-06, "loss": 17.0914, "step": 52520 }, { "epoch": 0.9498215581286544, "grad_norm": 43.5625, "learning_rate": 9.851590633117241e-06, "loss": 16.472, "step": 52530 }, { "epoch": 0.9500023731977824, "grad_norm": 46.90625, "learning_rate": 9.851562380810582e-06, "loss": 16.7584, "step": 52540 }, { "epoch": 0.9501831882669102, "grad_norm": 44.75, "learning_rate": 9.851534128503923e-06, "loss": 16.8644, "step": 52550 }, { "epoch": 0.950364003336038, "grad_norm": 43.875, "learning_rate": 9.851505876197263e-06, "loss": 16.5356, "step": 52560 }, { "epoch": 0.9505448184051659, "grad_norm": 43.875, "learning_rate": 9.851477623890604e-06, "loss": 16.2732, "step": 52570 }, { "epoch": 0.9507256334742937, "grad_norm": 44.84375, "learning_rate": 9.851449371583944e-06, "loss": 16.608, "step": 52580 }, { "epoch": 0.9509064485434217, "grad_norm": 42.40625, "learning_rate": 9.851421119277285e-06, "loss": 16.5287, "step": 52590 }, { "epoch": 0.9510872636125495, "grad_norm": 45.71875, "learning_rate": 9.851392866970626e-06, "loss": 17.0554, "step": 52600 }, { "epoch": 0.9512680786816773, "grad_norm": 43.8125, "learning_rate": 9.851364614663965e-06, "loss": 16.7669, "step": 52610 }, { "epoch": 0.9514488937508052, "grad_norm": 43.65625, "learning_rate": 9.851336362357305e-06, "loss": 16.934, "step": 52620 }, { "epoch": 0.951629708819933, "grad_norm": 44.78125, "learning_rate": 9.851308110050646e-06, "loss": 15.9467, "step": 52630 }, { "epoch": 0.951810523889061, "grad_norm": 45.28125, "learning_rate": 9.851279857743986e-06, "loss": 16.2832, "step": 52640 }, { "epoch": 0.9519913389581888, "grad_norm": 45.375, "learning_rate": 9.851251605437327e-06, "loss": 16.6556, "step": 52650 }, { "epoch": 0.9521721540273166, "grad_norm": 44.65625, "learning_rate": 9.851223353130666e-06, "loss": 16.8088, "step": 52660 }, { "epoch": 0.9523529690964445, "grad_norm": 46.03125, "learning_rate": 9.851195100824008e-06, "loss": 16.5832, "step": 52670 }, { "epoch": 0.9525337841655723, "grad_norm": 45.15625, "learning_rate": 9.851166848517349e-06, "loss": 17.0086, "step": 52680 }, { "epoch": 0.9527145992347003, "grad_norm": 45.71875, "learning_rate": 9.851138596210688e-06, "loss": 16.6411, "step": 52690 }, { "epoch": 0.9528954143038281, "grad_norm": 46.34375, "learning_rate": 9.851110343904028e-06, "loss": 16.5188, "step": 52700 }, { "epoch": 0.953076229372956, "grad_norm": 44.0625, "learning_rate": 9.851082091597369e-06, "loss": 16.6827, "step": 52710 }, { "epoch": 0.9532570444420838, "grad_norm": 46.4375, "learning_rate": 9.85105383929071e-06, "loss": 16.5534, "step": 52720 }, { "epoch": 0.9534378595112116, "grad_norm": 44.40625, "learning_rate": 9.85102558698405e-06, "loss": 16.7903, "step": 52730 }, { "epoch": 0.9536186745803396, "grad_norm": 43.0625, "learning_rate": 9.850997334677391e-06, "loss": 16.7192, "step": 52740 }, { "epoch": 0.9537994896494674, "grad_norm": 45.84375, "learning_rate": 9.85096908237073e-06, "loss": 16.6573, "step": 52750 }, { "epoch": 0.9539803047185953, "grad_norm": 45.625, "learning_rate": 9.850940830064072e-06, "loss": 16.2963, "step": 52760 }, { "epoch": 0.9541611197877231, "grad_norm": 45.09375, "learning_rate": 9.850912577757413e-06, "loss": 16.2455, "step": 52770 }, { "epoch": 0.9543419348568509, "grad_norm": 48.15625, "learning_rate": 9.850884325450752e-06, "loss": 16.774, "step": 52780 }, { "epoch": 0.9545227499259789, "grad_norm": 42.40625, "learning_rate": 9.850856073144092e-06, "loss": 16.7267, "step": 52790 }, { "epoch": 0.9547035649951067, "grad_norm": 43.46875, "learning_rate": 9.850827820837433e-06, "loss": 16.56, "step": 52800 }, { "epoch": 0.9548843800642346, "grad_norm": 41.96875, "learning_rate": 9.850799568530774e-06, "loss": 17.191, "step": 52810 }, { "epoch": 0.9550651951333624, "grad_norm": 43.75, "learning_rate": 9.850771316224114e-06, "loss": 16.2504, "step": 52820 }, { "epoch": 0.9552460102024902, "grad_norm": 44.59375, "learning_rate": 9.850743063917453e-06, "loss": 16.4462, "step": 52830 }, { "epoch": 0.9554268252716182, "grad_norm": 44.25, "learning_rate": 9.850714811610794e-06, "loss": 16.653, "step": 52840 }, { "epoch": 0.955607640340746, "grad_norm": 41.15625, "learning_rate": 9.850686559304136e-06, "loss": 16.4501, "step": 52850 }, { "epoch": 0.9557884554098739, "grad_norm": 45.09375, "learning_rate": 9.850658306997477e-06, "loss": 16.4311, "step": 52860 }, { "epoch": 0.9559692704790017, "grad_norm": 42.5, "learning_rate": 9.850630054690816e-06, "loss": 16.4035, "step": 52870 }, { "epoch": 0.9561500855481296, "grad_norm": 43.46875, "learning_rate": 9.850601802384156e-06, "loss": 16.8654, "step": 52880 }, { "epoch": 0.9563309006172575, "grad_norm": 43.125, "learning_rate": 9.850573550077497e-06, "loss": 16.8872, "step": 52890 }, { "epoch": 0.9565117156863853, "grad_norm": 45.59375, "learning_rate": 9.850545297770838e-06, "loss": 17.3379, "step": 52900 }, { "epoch": 0.9566925307555132, "grad_norm": 47.5, "learning_rate": 9.850517045464178e-06, "loss": 16.7555, "step": 52910 }, { "epoch": 0.956873345824641, "grad_norm": 47.375, "learning_rate": 9.850488793157517e-06, "loss": 16.522, "step": 52920 }, { "epoch": 0.9570541608937689, "grad_norm": 44.65625, "learning_rate": 9.85046054085086e-06, "loss": 16.7725, "step": 52930 }, { "epoch": 0.9572349759628968, "grad_norm": 42.53125, "learning_rate": 9.8504322885442e-06, "loss": 16.493, "step": 52940 }, { "epoch": 0.9574157910320246, "grad_norm": 43.34375, "learning_rate": 9.850404036237539e-06, "loss": 16.8442, "step": 52950 }, { "epoch": 0.9575966061011525, "grad_norm": 47.34375, "learning_rate": 9.85037578393088e-06, "loss": 16.7463, "step": 52960 }, { "epoch": 0.9577774211702803, "grad_norm": 44.40625, "learning_rate": 9.85034753162422e-06, "loss": 16.7918, "step": 52970 }, { "epoch": 0.9579582362394082, "grad_norm": 43.46875, "learning_rate": 9.85031927931756e-06, "loss": 16.6675, "step": 52980 }, { "epoch": 0.958139051308536, "grad_norm": 40.71875, "learning_rate": 9.850291027010901e-06, "loss": 16.662, "step": 52990 }, { "epoch": 0.9583198663776639, "grad_norm": 42.09375, "learning_rate": 9.85026277470424e-06, "loss": 16.813, "step": 53000 }, { "epoch": 0.9585006814467918, "grad_norm": 44.40625, "learning_rate": 9.850234522397581e-06, "loss": 16.8846, "step": 53010 }, { "epoch": 0.9586814965159196, "grad_norm": 45.625, "learning_rate": 9.850206270090923e-06, "loss": 17.0815, "step": 53020 }, { "epoch": 0.9588623115850475, "grad_norm": 47.40625, "learning_rate": 9.850178017784264e-06, "loss": 16.2415, "step": 53030 }, { "epoch": 0.9590431266541753, "grad_norm": 43.3125, "learning_rate": 9.850149765477603e-06, "loss": 16.5826, "step": 53040 }, { "epoch": 0.9592239417233033, "grad_norm": 45.4375, "learning_rate": 9.850121513170943e-06, "loss": 16.5556, "step": 53050 }, { "epoch": 0.9594047567924311, "grad_norm": 44.0625, "learning_rate": 9.850093260864284e-06, "loss": 16.7386, "step": 53060 }, { "epoch": 0.9595855718615589, "grad_norm": 41.21875, "learning_rate": 9.850065008557625e-06, "loss": 16.5449, "step": 53070 }, { "epoch": 0.9597663869306868, "grad_norm": 51.65625, "learning_rate": 9.850036756250965e-06, "loss": 16.669, "step": 53080 }, { "epoch": 0.9599472019998146, "grad_norm": 43.65625, "learning_rate": 9.850008503944304e-06, "loss": 16.748, "step": 53090 }, { "epoch": 0.9601280170689426, "grad_norm": 45.40625, "learning_rate": 9.849980251637645e-06, "loss": 17.1109, "step": 53100 }, { "epoch": 0.9603088321380704, "grad_norm": 40.03125, "learning_rate": 9.849951999330987e-06, "loss": 16.6088, "step": 53110 }, { "epoch": 0.9604896472071982, "grad_norm": 45.21875, "learning_rate": 9.849923747024326e-06, "loss": 16.7283, "step": 53120 }, { "epoch": 0.9606704622763261, "grad_norm": 46.03125, "learning_rate": 9.849895494717667e-06, "loss": 16.4558, "step": 53130 }, { "epoch": 0.960851277345454, "grad_norm": 46.09375, "learning_rate": 9.849867242411007e-06, "loss": 16.6179, "step": 53140 }, { "epoch": 0.9610320924145819, "grad_norm": 42.71875, "learning_rate": 9.849838990104348e-06, "loss": 16.864, "step": 53150 }, { "epoch": 0.9612129074837097, "grad_norm": 43.71875, "learning_rate": 9.849810737797689e-06, "loss": 16.7878, "step": 53160 }, { "epoch": 0.9613937225528375, "grad_norm": 44.59375, "learning_rate": 9.84978248549103e-06, "loss": 16.676, "step": 53170 }, { "epoch": 0.9615745376219654, "grad_norm": 44.0625, "learning_rate": 9.849754233184368e-06, "loss": 16.5465, "step": 53180 }, { "epoch": 0.9617553526910932, "grad_norm": 45.0, "learning_rate": 9.849725980877709e-06, "loss": 16.7175, "step": 53190 }, { "epoch": 0.9619361677602212, "grad_norm": 44.1875, "learning_rate": 9.849697728571051e-06, "loss": 16.9195, "step": 53200 }, { "epoch": 0.962116982829349, "grad_norm": 46.0625, "learning_rate": 9.84966947626439e-06, "loss": 17.0238, "step": 53210 }, { "epoch": 0.9622977978984769, "grad_norm": 44.78125, "learning_rate": 9.84964122395773e-06, "loss": 16.6907, "step": 53220 }, { "epoch": 0.9624786129676047, "grad_norm": 48.65625, "learning_rate": 9.849612971651071e-06, "loss": 16.7542, "step": 53230 }, { "epoch": 0.9626594280367325, "grad_norm": 45.90625, "learning_rate": 9.849584719344412e-06, "loss": 16.4365, "step": 53240 }, { "epoch": 0.9628402431058605, "grad_norm": 43.65625, "learning_rate": 9.849556467037753e-06, "loss": 16.5799, "step": 53250 }, { "epoch": 0.9630210581749883, "grad_norm": 42.71875, "learning_rate": 9.849528214731091e-06, "loss": 17.1928, "step": 53260 }, { "epoch": 0.9632018732441162, "grad_norm": 42.28125, "learning_rate": 9.849499962424432e-06, "loss": 16.5996, "step": 53270 }, { "epoch": 0.963382688313244, "grad_norm": 44.875, "learning_rate": 9.849471710117774e-06, "loss": 16.595, "step": 53280 }, { "epoch": 0.9635635033823718, "grad_norm": 42.9375, "learning_rate": 9.849443457811115e-06, "loss": 16.4589, "step": 53290 }, { "epoch": 0.9637443184514998, "grad_norm": 45.0, "learning_rate": 9.849415205504454e-06, "loss": 16.8769, "step": 53300 }, { "epoch": 0.9639251335206276, "grad_norm": 44.34375, "learning_rate": 9.849386953197795e-06, "loss": 16.4602, "step": 53310 }, { "epoch": 0.9641059485897555, "grad_norm": 39.125, "learning_rate": 9.849358700891135e-06, "loss": 17.0599, "step": 53320 }, { "epoch": 0.9642867636588833, "grad_norm": 46.09375, "learning_rate": 9.849330448584476e-06, "loss": 16.9556, "step": 53330 }, { "epoch": 0.9644675787280111, "grad_norm": 45.875, "learning_rate": 9.849302196277816e-06, "loss": 16.5521, "step": 53340 }, { "epoch": 0.9646483937971391, "grad_norm": 46.15625, "learning_rate": 9.849273943971155e-06, "loss": 16.6604, "step": 53350 }, { "epoch": 0.9648292088662669, "grad_norm": 43.8125, "learning_rate": 9.849245691664496e-06, "loss": 16.7637, "step": 53360 }, { "epoch": 0.9650100239353948, "grad_norm": 42.5, "learning_rate": 9.849217439357838e-06, "loss": 16.5801, "step": 53370 }, { "epoch": 0.9651908390045226, "grad_norm": 45.625, "learning_rate": 9.849189187051177e-06, "loss": 16.5078, "step": 53380 }, { "epoch": 0.9653716540736504, "grad_norm": 46.28125, "learning_rate": 9.849160934744518e-06, "loss": 16.5973, "step": 53390 }, { "epoch": 0.9655524691427784, "grad_norm": 47.65625, "learning_rate": 9.849132682437858e-06, "loss": 17.1258, "step": 53400 }, { "epoch": 0.9657332842119062, "grad_norm": 44.46875, "learning_rate": 9.849104430131199e-06, "loss": 16.6324, "step": 53410 }, { "epoch": 0.9659140992810341, "grad_norm": 42.90625, "learning_rate": 9.84907617782454e-06, "loss": 16.9065, "step": 53420 }, { "epoch": 0.9660949143501619, "grad_norm": 44.46875, "learning_rate": 9.849047925517879e-06, "loss": 16.5174, "step": 53430 }, { "epoch": 0.9662757294192899, "grad_norm": 45.03125, "learning_rate": 9.84901967321122e-06, "loss": 16.9315, "step": 53440 }, { "epoch": 0.9664565444884177, "grad_norm": 49.90625, "learning_rate": 9.84899142090456e-06, "loss": 16.6896, "step": 53450 }, { "epoch": 0.9666373595575455, "grad_norm": 43.5625, "learning_rate": 9.848963168597902e-06, "loss": 16.5972, "step": 53460 }, { "epoch": 0.9668181746266734, "grad_norm": 45.21875, "learning_rate": 9.848934916291241e-06, "loss": 16.7322, "step": 53470 }, { "epoch": 0.9669989896958012, "grad_norm": 45.25, "learning_rate": 9.848906663984582e-06, "loss": 16.9706, "step": 53480 }, { "epoch": 0.9671798047649292, "grad_norm": 46.5625, "learning_rate": 9.848878411677922e-06, "loss": 16.8125, "step": 53490 }, { "epoch": 0.967360619834057, "grad_norm": 45.75, "learning_rate": 9.848850159371263e-06, "loss": 16.3223, "step": 53500 }, { "epoch": 0.9675414349031848, "grad_norm": 40.0, "learning_rate": 9.848821907064604e-06, "loss": 16.235, "step": 53510 }, { "epoch": 0.9677222499723127, "grad_norm": 43.34375, "learning_rate": 9.848793654757943e-06, "loss": 16.5035, "step": 53520 }, { "epoch": 0.9679030650414405, "grad_norm": 46.15625, "learning_rate": 9.848765402451283e-06, "loss": 16.6695, "step": 53530 }, { "epoch": 0.9680838801105685, "grad_norm": 45.46875, "learning_rate": 9.848737150144624e-06, "loss": 16.551, "step": 53540 }, { "epoch": 0.9682646951796963, "grad_norm": 47.21875, "learning_rate": 9.848708897837964e-06, "loss": 16.9247, "step": 53550 }, { "epoch": 0.9684455102488241, "grad_norm": 44.34375, "learning_rate": 9.848680645531305e-06, "loss": 16.7716, "step": 53560 }, { "epoch": 0.968626325317952, "grad_norm": 42.125, "learning_rate": 9.848652393224646e-06, "loss": 17.0277, "step": 53570 }, { "epoch": 0.9688071403870798, "grad_norm": 42.84375, "learning_rate": 9.848624140917986e-06, "loss": 16.7511, "step": 53580 }, { "epoch": 0.9689879554562078, "grad_norm": 46.75, "learning_rate": 9.848595888611327e-06, "loss": 16.8811, "step": 53590 }, { "epoch": 0.9691687705253356, "grad_norm": 42.5625, "learning_rate": 9.848567636304668e-06, "loss": 16.6817, "step": 53600 }, { "epoch": 0.9693495855944635, "grad_norm": 46.46875, "learning_rate": 9.848539383998006e-06, "loss": 16.6221, "step": 53610 }, { "epoch": 0.9695304006635913, "grad_norm": 43.4375, "learning_rate": 9.848511131691347e-06, "loss": 17.0512, "step": 53620 }, { "epoch": 0.9697112157327191, "grad_norm": 40.8125, "learning_rate": 9.84848287938469e-06, "loss": 16.2973, "step": 53630 }, { "epoch": 0.969892030801847, "grad_norm": 44.71875, "learning_rate": 9.848454627078028e-06, "loss": 16.8274, "step": 53640 }, { "epoch": 0.9700728458709749, "grad_norm": 46.03125, "learning_rate": 9.848426374771369e-06, "loss": 16.3493, "step": 53650 }, { "epoch": 0.9702536609401028, "grad_norm": 45.1875, "learning_rate": 9.84839812246471e-06, "loss": 16.2608, "step": 53660 }, { "epoch": 0.9704344760092306, "grad_norm": 41.5625, "learning_rate": 9.84836987015805e-06, "loss": 16.3783, "step": 53670 }, { "epoch": 0.9706152910783584, "grad_norm": 44.75, "learning_rate": 9.84834161785139e-06, "loss": 16.6116, "step": 53680 }, { "epoch": 0.9707961061474863, "grad_norm": 43.5, "learning_rate": 9.84831336554473e-06, "loss": 16.6586, "step": 53690 }, { "epoch": 0.9709769212166142, "grad_norm": 44.71875, "learning_rate": 9.84828511323807e-06, "loss": 16.7844, "step": 53700 }, { "epoch": 0.9711577362857421, "grad_norm": 45.34375, "learning_rate": 9.848256860931411e-06, "loss": 16.8105, "step": 53710 }, { "epoch": 0.9713385513548699, "grad_norm": 45.25, "learning_rate": 9.848228608624753e-06, "loss": 16.7695, "step": 53720 }, { "epoch": 0.9715193664239977, "grad_norm": 44.59375, "learning_rate": 9.848200356318092e-06, "loss": 16.7423, "step": 53730 }, { "epoch": 0.9717001814931256, "grad_norm": 48.625, "learning_rate": 9.848172104011433e-06, "loss": 16.6176, "step": 53740 }, { "epoch": 0.9718809965622535, "grad_norm": 47.3125, "learning_rate": 9.848143851704773e-06, "loss": 16.8514, "step": 53750 }, { "epoch": 0.9720618116313814, "grad_norm": 43.8125, "learning_rate": 9.848115599398114e-06, "loss": 16.7589, "step": 53760 }, { "epoch": 0.9722426267005092, "grad_norm": 47.03125, "learning_rate": 9.848087347091455e-06, "loss": 16.797, "step": 53770 }, { "epoch": 0.9724234417696371, "grad_norm": 45.34375, "learning_rate": 9.848059094784794e-06, "loss": 16.6639, "step": 53780 }, { "epoch": 0.972604256838765, "grad_norm": 41.8125, "learning_rate": 9.848030842478134e-06, "loss": 16.494, "step": 53790 }, { "epoch": 0.9727850719078928, "grad_norm": 44.75, "learning_rate": 9.848002590171475e-06, "loss": 16.8343, "step": 53800 }, { "epoch": 0.9729658869770207, "grad_norm": 43.09375, "learning_rate": 9.847974337864816e-06, "loss": 16.5076, "step": 53810 }, { "epoch": 0.9731467020461485, "grad_norm": 45.3125, "learning_rate": 9.847946085558156e-06, "loss": 16.5628, "step": 53820 }, { "epoch": 0.9733275171152764, "grad_norm": 42.625, "learning_rate": 9.847917833251497e-06, "loss": 16.3621, "step": 53830 }, { "epoch": 0.9735083321844042, "grad_norm": 44.09375, "learning_rate": 9.847889580944837e-06, "loss": 16.68, "step": 53840 }, { "epoch": 0.9736891472535321, "grad_norm": 42.75, "learning_rate": 9.847861328638178e-06, "loss": 16.4268, "step": 53850 }, { "epoch": 0.97386996232266, "grad_norm": 46.90625, "learning_rate": 9.847833076331517e-06, "loss": 16.3298, "step": 53860 }, { "epoch": 0.9740507773917878, "grad_norm": 46.625, "learning_rate": 9.847804824024858e-06, "loss": 16.6696, "step": 53870 }, { "epoch": 0.9742315924609157, "grad_norm": 46.3125, "learning_rate": 9.847776571718198e-06, "loss": 16.8559, "step": 53880 }, { "epoch": 0.9744124075300435, "grad_norm": 40.90625, "learning_rate": 9.847748319411539e-06, "loss": 16.8465, "step": 53890 }, { "epoch": 0.9745932225991714, "grad_norm": 45.03125, "learning_rate": 9.84772006710488e-06, "loss": 16.295, "step": 53900 }, { "epoch": 0.9747740376682993, "grad_norm": 44.125, "learning_rate": 9.84769181479822e-06, "loss": 16.9551, "step": 53910 }, { "epoch": 0.9749548527374271, "grad_norm": 44.4375, "learning_rate": 9.84766356249156e-06, "loss": 16.4401, "step": 53920 }, { "epoch": 0.975135667806555, "grad_norm": 43.96875, "learning_rate": 9.847635310184901e-06, "loss": 16.5902, "step": 53930 }, { "epoch": 0.9753164828756828, "grad_norm": 45.71875, "learning_rate": 9.847607057878242e-06, "loss": 16.6108, "step": 53940 }, { "epoch": 0.9754972979448108, "grad_norm": 43.875, "learning_rate": 9.84757880557158e-06, "loss": 16.9275, "step": 53950 }, { "epoch": 0.9756781130139386, "grad_norm": 45.0, "learning_rate": 9.847550553264921e-06, "loss": 16.6905, "step": 53960 }, { "epoch": 0.9758589280830664, "grad_norm": 43.75, "learning_rate": 9.847522300958262e-06, "loss": 16.5468, "step": 53970 }, { "epoch": 0.9760397431521943, "grad_norm": 44.0625, "learning_rate": 9.847494048651603e-06, "loss": 16.9278, "step": 53980 }, { "epoch": 0.9762205582213221, "grad_norm": 43.8125, "learning_rate": 9.847465796344943e-06, "loss": 16.379, "step": 53990 }, { "epoch": 0.9764013732904501, "grad_norm": 43.40625, "learning_rate": 9.847437544038284e-06, "loss": 16.4727, "step": 54000 }, { "epoch": 0.9765821883595779, "grad_norm": 45.3125, "learning_rate": 9.847409291731625e-06, "loss": 16.854, "step": 54010 }, { "epoch": 0.9767630034287057, "grad_norm": 43.46875, "learning_rate": 9.847381039424965e-06, "loss": 16.6393, "step": 54020 }, { "epoch": 0.9769438184978336, "grad_norm": 45.65625, "learning_rate": 9.847352787118306e-06, "loss": 16.786, "step": 54030 }, { "epoch": 0.9771246335669614, "grad_norm": 43.09375, "learning_rate": 9.847324534811645e-06, "loss": 16.5585, "step": 54040 }, { "epoch": 0.9773054486360894, "grad_norm": 42.46875, "learning_rate": 9.847296282504985e-06, "loss": 16.6101, "step": 54050 }, { "epoch": 0.9774862637052172, "grad_norm": 45.53125, "learning_rate": 9.847268030198326e-06, "loss": 16.7073, "step": 54060 }, { "epoch": 0.977667078774345, "grad_norm": 44.78125, "learning_rate": 9.847239777891667e-06, "loss": 16.6391, "step": 54070 }, { "epoch": 0.9778478938434729, "grad_norm": 42.09375, "learning_rate": 9.847211525585007e-06, "loss": 16.8252, "step": 54080 }, { "epoch": 0.9780287089126007, "grad_norm": 44.375, "learning_rate": 9.847183273278348e-06, "loss": 16.2046, "step": 54090 }, { "epoch": 0.9782095239817287, "grad_norm": 45.4375, "learning_rate": 9.847155020971688e-06, "loss": 16.8368, "step": 54100 }, { "epoch": 0.9783903390508565, "grad_norm": 41.84375, "learning_rate": 9.847126768665029e-06, "loss": 16.9655, "step": 54110 }, { "epoch": 0.9785711541199844, "grad_norm": 42.34375, "learning_rate": 9.847098516358368e-06, "loss": 16.7057, "step": 54120 }, { "epoch": 0.9787519691891122, "grad_norm": 47.28125, "learning_rate": 9.847070264051709e-06, "loss": 16.7744, "step": 54130 }, { "epoch": 0.97893278425824, "grad_norm": 46.1875, "learning_rate": 9.84704201174505e-06, "loss": 16.8817, "step": 54140 }, { "epoch": 0.979113599327368, "grad_norm": 48.625, "learning_rate": 9.84701375943839e-06, "loss": 16.5609, "step": 54150 }, { "epoch": 0.9792944143964958, "grad_norm": 45.09375, "learning_rate": 9.84698550713173e-06, "loss": 16.8131, "step": 54160 }, { "epoch": 0.9794752294656237, "grad_norm": 42.09375, "learning_rate": 9.846957254825071e-06, "loss": 16.4902, "step": 54170 }, { "epoch": 0.9796560445347515, "grad_norm": 43.0625, "learning_rate": 9.846929002518412e-06, "loss": 16.5643, "step": 54180 }, { "epoch": 0.9798368596038793, "grad_norm": 46.84375, "learning_rate": 9.846900750211752e-06, "loss": 16.5155, "step": 54190 }, { "epoch": 0.9800176746730073, "grad_norm": 45.34375, "learning_rate": 9.846872497905093e-06, "loss": 16.5785, "step": 54200 }, { "epoch": 0.9801984897421351, "grad_norm": 46.4375, "learning_rate": 9.846844245598432e-06, "loss": 16.6293, "step": 54210 }, { "epoch": 0.980379304811263, "grad_norm": 42.21875, "learning_rate": 9.846815993291773e-06, "loss": 16.4009, "step": 54220 }, { "epoch": 0.9805601198803908, "grad_norm": 43.21875, "learning_rate": 9.846787740985113e-06, "loss": 16.6647, "step": 54230 }, { "epoch": 0.9807409349495186, "grad_norm": 45.5, "learning_rate": 9.846759488678454e-06, "loss": 16.7975, "step": 54240 }, { "epoch": 0.9809217500186466, "grad_norm": 43.875, "learning_rate": 9.846731236371794e-06, "loss": 16.7165, "step": 54250 }, { "epoch": 0.9811025650877744, "grad_norm": 45.5625, "learning_rate": 9.846702984065135e-06, "loss": 16.5906, "step": 54260 }, { "epoch": 0.9812833801569023, "grad_norm": 48.40625, "learning_rate": 9.846674731758476e-06, "loss": 16.5228, "step": 54270 }, { "epoch": 0.9814641952260301, "grad_norm": 45.40625, "learning_rate": 9.846646479451816e-06, "loss": 16.3795, "step": 54280 }, { "epoch": 0.981645010295158, "grad_norm": 46.78125, "learning_rate": 9.846618227145155e-06, "loss": 17.0372, "step": 54290 }, { "epoch": 0.9818258253642859, "grad_norm": 48.375, "learning_rate": 9.846589974838496e-06, "loss": 16.7747, "step": 54300 }, { "epoch": 0.9820066404334137, "grad_norm": 43.125, "learning_rate": 9.846561722531836e-06, "loss": 16.8236, "step": 54310 }, { "epoch": 0.9821874555025416, "grad_norm": 46.25, "learning_rate": 9.846533470225177e-06, "loss": 16.6047, "step": 54320 }, { "epoch": 0.9823682705716694, "grad_norm": 47.9375, "learning_rate": 9.846505217918518e-06, "loss": 16.911, "step": 54330 }, { "epoch": 0.9825490856407973, "grad_norm": 45.9375, "learning_rate": 9.846476965611858e-06, "loss": 16.969, "step": 54340 }, { "epoch": 0.9827299007099252, "grad_norm": 41.71875, "learning_rate": 9.846448713305199e-06, "loss": 16.8471, "step": 54350 }, { "epoch": 0.982910715779053, "grad_norm": 45.5, "learning_rate": 9.84642046099854e-06, "loss": 16.3527, "step": 54360 }, { "epoch": 0.9830915308481809, "grad_norm": 47.65625, "learning_rate": 9.84639220869188e-06, "loss": 16.3417, "step": 54370 }, { "epoch": 0.9832723459173087, "grad_norm": 43.6875, "learning_rate": 9.846363956385219e-06, "loss": 16.4029, "step": 54380 }, { "epoch": 0.9834531609864366, "grad_norm": 44.09375, "learning_rate": 9.84633570407856e-06, "loss": 16.9436, "step": 54390 }, { "epoch": 0.9836339760555645, "grad_norm": 42.4375, "learning_rate": 9.8463074517719e-06, "loss": 16.6759, "step": 54400 }, { "epoch": 0.9838147911246923, "grad_norm": 45.375, "learning_rate": 9.846279199465241e-06, "loss": 16.7103, "step": 54410 }, { "epoch": 0.9839956061938202, "grad_norm": 42.5625, "learning_rate": 9.846250947158582e-06, "loss": 16.3442, "step": 54420 }, { "epoch": 0.984176421262948, "grad_norm": 46.0625, "learning_rate": 9.846222694851922e-06, "loss": 16.9653, "step": 54430 }, { "epoch": 0.984357236332076, "grad_norm": 47.03125, "learning_rate": 9.846194442545263e-06, "loss": 16.7699, "step": 54440 }, { "epoch": 0.9845380514012038, "grad_norm": 46.34375, "learning_rate": 9.846166190238603e-06, "loss": 16.7369, "step": 54450 }, { "epoch": 0.9847188664703317, "grad_norm": 45.9375, "learning_rate": 9.846137937931944e-06, "loss": 17.2226, "step": 54460 }, { "epoch": 0.9848996815394595, "grad_norm": 45.03125, "learning_rate": 9.846109685625283e-06, "loss": 16.9318, "step": 54470 }, { "epoch": 0.9850804966085873, "grad_norm": 45.15625, "learning_rate": 9.846081433318624e-06, "loss": 16.7337, "step": 54480 }, { "epoch": 0.9852613116777152, "grad_norm": 45.6875, "learning_rate": 9.846053181011964e-06, "loss": 17.0956, "step": 54490 }, { "epoch": 0.9854421267468431, "grad_norm": 42.84375, "learning_rate": 9.846024928705305e-06, "loss": 16.4707, "step": 54500 }, { "epoch": 0.985622941815971, "grad_norm": 41.4375, "learning_rate": 9.845996676398646e-06, "loss": 16.2423, "step": 54510 }, { "epoch": 0.9858037568850988, "grad_norm": 44.90625, "learning_rate": 9.845968424091986e-06, "loss": 16.9379, "step": 54520 }, { "epoch": 0.9859845719542266, "grad_norm": 43.4375, "learning_rate": 9.845940171785327e-06, "loss": 16.7772, "step": 54530 }, { "epoch": 0.9861653870233545, "grad_norm": 43.125, "learning_rate": 9.845911919478667e-06, "loss": 16.7428, "step": 54540 }, { "epoch": 0.9863462020924824, "grad_norm": 45.59375, "learning_rate": 9.845883667172006e-06, "loss": 16.4671, "step": 54550 }, { "epoch": 0.9865270171616103, "grad_norm": 45.28125, "learning_rate": 9.845855414865347e-06, "loss": 16.8481, "step": 54560 }, { "epoch": 0.9867078322307381, "grad_norm": 40.65625, "learning_rate": 9.845827162558688e-06, "loss": 16.5104, "step": 54570 }, { "epoch": 0.9868886472998659, "grad_norm": 44.21875, "learning_rate": 9.845798910252028e-06, "loss": 16.8001, "step": 54580 }, { "epoch": 0.9870694623689938, "grad_norm": 43.78125, "learning_rate": 9.845770657945369e-06, "loss": 16.502, "step": 54590 }, { "epoch": 0.9872502774381217, "grad_norm": 44.59375, "learning_rate": 9.84574240563871e-06, "loss": 16.652, "step": 54600 }, { "epoch": 0.9874310925072496, "grad_norm": 45.875, "learning_rate": 9.84571415333205e-06, "loss": 16.5849, "step": 54610 }, { "epoch": 0.9876119075763774, "grad_norm": 46.46875, "learning_rate": 9.84568590102539e-06, "loss": 16.6386, "step": 54620 }, { "epoch": 0.9877927226455053, "grad_norm": 48.0, "learning_rate": 9.845657648718731e-06, "loss": 16.7037, "step": 54630 }, { "epoch": 0.9879735377146331, "grad_norm": 46.625, "learning_rate": 9.84562939641207e-06, "loss": 16.9638, "step": 54640 }, { "epoch": 0.988154352783761, "grad_norm": 43.34375, "learning_rate": 9.845601144105411e-06, "loss": 16.5736, "step": 54650 }, { "epoch": 0.9883351678528889, "grad_norm": 45.28125, "learning_rate": 9.845572891798751e-06, "loss": 16.9063, "step": 54660 }, { "epoch": 0.9885159829220167, "grad_norm": 42.28125, "learning_rate": 9.845544639492092e-06, "loss": 16.9911, "step": 54670 }, { "epoch": 0.9886967979911446, "grad_norm": 44.78125, "learning_rate": 9.845516387185433e-06, "loss": 16.7827, "step": 54680 }, { "epoch": 0.9888776130602724, "grad_norm": 46.53125, "learning_rate": 9.845488134878773e-06, "loss": 16.8007, "step": 54690 }, { "epoch": 0.9890584281294003, "grad_norm": 43.3125, "learning_rate": 9.845459882572114e-06, "loss": 16.2794, "step": 54700 }, { "epoch": 0.9892392431985282, "grad_norm": 45.59375, "learning_rate": 9.845431630265455e-06, "loss": 16.7441, "step": 54710 }, { "epoch": 0.989420058267656, "grad_norm": 43.25, "learning_rate": 9.845403377958794e-06, "loss": 16.602, "step": 54720 }, { "epoch": 0.9896008733367839, "grad_norm": 46.59375, "learning_rate": 9.845375125652134e-06, "loss": 16.721, "step": 54730 }, { "epoch": 0.9897816884059117, "grad_norm": 46.40625, "learning_rate": 9.845346873345475e-06, "loss": 17.2184, "step": 54740 }, { "epoch": 0.9899625034750396, "grad_norm": 41.8125, "learning_rate": 9.845318621038815e-06, "loss": 16.793, "step": 54750 }, { "epoch": 0.9901433185441675, "grad_norm": 43.5625, "learning_rate": 9.845290368732156e-06, "loss": 16.7583, "step": 54760 }, { "epoch": 0.9903241336132953, "grad_norm": 44.90625, "learning_rate": 9.845262116425497e-06, "loss": 16.8049, "step": 54770 }, { "epoch": 0.9905049486824232, "grad_norm": 42.28125, "learning_rate": 9.845233864118837e-06, "loss": 16.8066, "step": 54780 }, { "epoch": 0.990685763751551, "grad_norm": 42.3125, "learning_rate": 9.845205611812178e-06, "loss": 16.7789, "step": 54790 }, { "epoch": 0.990866578820679, "grad_norm": 46.125, "learning_rate": 9.845177359505518e-06, "loss": 16.5531, "step": 54800 }, { "epoch": 0.9910473938898068, "grad_norm": 43.96875, "learning_rate": 9.845149107198857e-06, "loss": 16.586, "step": 54810 }, { "epoch": 0.9912282089589346, "grad_norm": 44.5625, "learning_rate": 9.845120854892198e-06, "loss": 16.7009, "step": 54820 }, { "epoch": 0.9914090240280625, "grad_norm": 45.9375, "learning_rate": 9.845092602585539e-06, "loss": 16.6305, "step": 54830 }, { "epoch": 0.9915898390971903, "grad_norm": 43.34375, "learning_rate": 9.84506435027888e-06, "loss": 16.8697, "step": 54840 }, { "epoch": 0.9917706541663183, "grad_norm": 45.25, "learning_rate": 9.84503609797222e-06, "loss": 16.5857, "step": 54850 }, { "epoch": 0.9919514692354461, "grad_norm": 45.9375, "learning_rate": 9.84500784566556e-06, "loss": 16.4074, "step": 54860 }, { "epoch": 0.9921322843045739, "grad_norm": 48.34375, "learning_rate": 9.844979593358901e-06, "loss": 16.8042, "step": 54870 }, { "epoch": 0.9923130993737018, "grad_norm": 43.4375, "learning_rate": 9.844951341052242e-06, "loss": 16.6853, "step": 54880 }, { "epoch": 0.9924939144428296, "grad_norm": 46.5625, "learning_rate": 9.844923088745582e-06, "loss": 16.6639, "step": 54890 }, { "epoch": 0.9926747295119576, "grad_norm": 47.875, "learning_rate": 9.844894836438921e-06, "loss": 16.7631, "step": 54900 }, { "epoch": 0.9928555445810854, "grad_norm": 43.34375, "learning_rate": 9.844866584132262e-06, "loss": 17.0811, "step": 54910 }, { "epoch": 0.9930363596502132, "grad_norm": 47.59375, "learning_rate": 9.844838331825603e-06, "loss": 17.37, "step": 54920 }, { "epoch": 0.9932171747193411, "grad_norm": 46.125, "learning_rate": 9.844810079518943e-06, "loss": 16.7417, "step": 54930 }, { "epoch": 0.9933979897884689, "grad_norm": 47.75, "learning_rate": 9.844781827212284e-06, "loss": 16.3878, "step": 54940 }, { "epoch": 0.9935788048575969, "grad_norm": 42.65625, "learning_rate": 9.844753574905624e-06, "loss": 16.5961, "step": 54950 }, { "epoch": 0.9937596199267247, "grad_norm": 45.4375, "learning_rate": 9.844725322598965e-06, "loss": 16.6482, "step": 54960 }, { "epoch": 0.9939404349958526, "grad_norm": 45.8125, "learning_rate": 9.844697070292306e-06, "loss": 16.5548, "step": 54970 }, { "epoch": 0.9941212500649804, "grad_norm": 45.6875, "learning_rate": 9.844668817985645e-06, "loss": 16.468, "step": 54980 }, { "epoch": 0.9943020651341082, "grad_norm": 43.96875, "learning_rate": 9.844640565678985e-06, "loss": 16.5594, "step": 54990 }, { "epoch": 0.9944828802032362, "grad_norm": 45.40625, "learning_rate": 9.844612313372326e-06, "loss": 16.8028, "step": 55000 }, { "epoch": 0.9944828802032362, "eval_loss": 2.082907199859619, "eval_runtime": 228.736, "eval_samples_per_second": 3174.223, "eval_steps_per_second": 49.599, "step": 55000 }, { "epoch": 0.994663695272364, "grad_norm": 44.0, "learning_rate": 9.844584061065666e-06, "loss": 16.6952, "step": 55010 }, { "epoch": 0.9948445103414919, "grad_norm": 44.90625, "learning_rate": 9.844555808759007e-06, "loss": 16.4976, "step": 55020 }, { "epoch": 0.9950253254106197, "grad_norm": 45.875, "learning_rate": 9.844527556452346e-06, "loss": 16.4644, "step": 55030 }, { "epoch": 0.9952061404797475, "grad_norm": 44.0, "learning_rate": 9.844499304145688e-06, "loss": 16.7778, "step": 55040 }, { "epoch": 0.9953869555488755, "grad_norm": 44.3125, "learning_rate": 9.844471051839029e-06, "loss": 17.0716, "step": 55050 }, { "epoch": 0.9955677706180033, "grad_norm": 53.21875, "learning_rate": 9.84444279953237e-06, "loss": 17.1855, "step": 55060 }, { "epoch": 0.9957485856871312, "grad_norm": 45.09375, "learning_rate": 9.844414547225709e-06, "loss": 16.8956, "step": 55070 }, { "epoch": 0.995929400756259, "grad_norm": 45.03125, "learning_rate": 9.844386294919049e-06, "loss": 16.755, "step": 55080 }, { "epoch": 0.9961102158253868, "grad_norm": 46.8125, "learning_rate": 9.84435804261239e-06, "loss": 16.4342, "step": 55090 }, { "epoch": 0.9962910308945148, "grad_norm": 43.59375, "learning_rate": 9.84432979030573e-06, "loss": 16.5041, "step": 55100 }, { "epoch": 0.9964718459636426, "grad_norm": 44.65625, "learning_rate": 9.844301537999071e-06, "loss": 16.9493, "step": 55110 }, { "epoch": 0.9966526610327705, "grad_norm": 41.65625, "learning_rate": 9.844273285692412e-06, "loss": 16.5818, "step": 55120 }, { "epoch": 0.9968334761018983, "grad_norm": 44.625, "learning_rate": 9.844245033385752e-06, "loss": 16.5041, "step": 55130 }, { "epoch": 0.9970142911710262, "grad_norm": 43.3125, "learning_rate": 9.844216781079093e-06, "loss": 16.6332, "step": 55140 }, { "epoch": 0.9971951062401541, "grad_norm": 43.96875, "learning_rate": 9.844188528772432e-06, "loss": 16.9803, "step": 55150 }, { "epoch": 0.9973759213092819, "grad_norm": 46.53125, "learning_rate": 9.844160276465772e-06, "loss": 16.5326, "step": 55160 }, { "epoch": 0.9975567363784098, "grad_norm": 47.46875, "learning_rate": 9.844132024159113e-06, "loss": 16.9527, "step": 55170 }, { "epoch": 0.9977375514475376, "grad_norm": 49.3125, "learning_rate": 9.844103771852454e-06, "loss": 16.533, "step": 55180 }, { "epoch": 0.9979183665166655, "grad_norm": 46.78125, "learning_rate": 9.844075519545794e-06, "loss": 16.6507, "step": 55190 }, { "epoch": 0.9980991815857934, "grad_norm": 45.40625, "learning_rate": 9.844047267239133e-06, "loss": 16.4906, "step": 55200 }, { "epoch": 0.9982799966549212, "grad_norm": 44.3125, "learning_rate": 9.844019014932476e-06, "loss": 16.5156, "step": 55210 }, { "epoch": 0.9984608117240491, "grad_norm": 47.5625, "learning_rate": 9.843990762625816e-06, "loss": 16.7121, "step": 55220 }, { "epoch": 0.9986416267931769, "grad_norm": 44.28125, "learning_rate": 9.843962510319157e-06, "loss": 16.9591, "step": 55230 }, { "epoch": 0.9988224418623048, "grad_norm": 44.78125, "learning_rate": 9.843934258012496e-06, "loss": 16.4461, "step": 55240 }, { "epoch": 0.9990032569314327, "grad_norm": 44.40625, "learning_rate": 9.843906005705836e-06, "loss": 16.741, "step": 55250 }, { "epoch": 0.9991840720005605, "grad_norm": 44.03125, "learning_rate": 9.843877753399177e-06, "loss": 16.965, "step": 55260 }, { "epoch": 0.9993648870696884, "grad_norm": 41.5, "learning_rate": 9.843849501092518e-06, "loss": 16.5569, "step": 55270 }, { "epoch": 0.9995457021388162, "grad_norm": 47.46875, "learning_rate": 9.843821248785858e-06, "loss": 16.7305, "step": 55280 }, { "epoch": 0.9997265172079441, "grad_norm": 42.53125, "learning_rate": 9.843792996479197e-06, "loss": 16.5323, "step": 55290 }, { "epoch": 0.999907332277072, "grad_norm": 43.65625, "learning_rate": 9.84376474417254e-06, "loss": 16.207, "step": 55300 } ], "logging_steps": 10, "max_steps": 55305, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 5000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 9.653613618876383e+18, "train_batch_size": 8, "trial_name": null, "trial_params": null }