{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.9249278152069298, "eval_steps": 500, "global_step": 50000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00019249278152069297, "grad_norm": 1.9932657480239868, "learning_rate": 0.00016, "loss": 2.962, "step": 5 }, { "epoch": 0.00038498556304138594, "grad_norm": 1.2827370166778564, "learning_rate": 0.000199999997073812, "loss": 2.2983, "step": 10 }, { "epoch": 0.0005774783445620789, "grad_norm": 1.3585917949676514, "learning_rate": 0.0001999999851861734, "loss": 1.9389, "step": 15 }, { "epoch": 0.0007699711260827719, "grad_norm": 2.117544412612915, "learning_rate": 0.00019999996415419864, "loss": 1.6659, "step": 20 }, { "epoch": 0.0009624639076034649, "grad_norm": 0.8802940249443054, "learning_rate": 0.0001999999339778896, "loss": 1.6015, "step": 25 }, { "epoch": 0.0011549566891241579, "grad_norm": 1.256873369216919, "learning_rate": 0.000199999894657249, "loss": 1.7428, "step": 30 }, { "epoch": 0.001347449470644851, "grad_norm": 1.9709804058074951, "learning_rate": 0.0001999998461922805, "loss": 1.4316, "step": 35 }, { "epoch": 0.0015399422521655437, "grad_norm": 1.2085392475128174, "learning_rate": 0.00019999978858298848, "loss": 1.8392, "step": 40 }, { "epoch": 0.0017324350336862368, "grad_norm": 0.9966161847114563, "learning_rate": 0.00019999972182937827, "loss": 1.6381, "step": 45 }, { "epoch": 0.0019249278152069298, "grad_norm": 1.5572378635406494, "learning_rate": 0.0001999996459314559, "loss": 1.6214, "step": 50 }, { "epoch": 0.0021174205967276227, "grad_norm": 0.9813450574874878, "learning_rate": 0.00019999956088922837, "loss": 1.5337, "step": 55 }, { "epoch": 0.0023099133782483157, "grad_norm": 1.140754222869873, "learning_rate": 0.00019999946670270341, "loss": 1.5865, "step": 60 }, { "epoch": 0.0025024061597690088, "grad_norm": 1.7033613920211792, "learning_rate": 0.0001999993633718897, "loss": 1.5483, "step": 65 }, { "epoch": 0.002694898941289702, "grad_norm": 0.8782416582107544, "learning_rate": 0.00019999925089679658, "loss": 1.7574, "step": 70 }, { "epoch": 0.0028873917228103944, "grad_norm": 0.94110506772995, "learning_rate": 0.00019999912927743445, "loss": 1.747, "step": 75 }, { "epoch": 0.0030798845043310875, "grad_norm": 2.9130144119262695, "learning_rate": 0.00019999899851381436, "loss": 1.5482, "step": 80 }, { "epoch": 0.0032723772858517805, "grad_norm": 1.444981336593628, "learning_rate": 0.00019999885860594828, "loss": 1.7935, "step": 85 }, { "epoch": 0.0034648700673724736, "grad_norm": 0.8361923098564148, "learning_rate": 0.00019999870955384906, "loss": 1.5566, "step": 90 }, { "epoch": 0.0036573628488931666, "grad_norm": 1.0198391675949097, "learning_rate": 0.00019999855135753025, "loss": 1.6608, "step": 95 }, { "epoch": 0.0038498556304138597, "grad_norm": 0.9720978736877441, "learning_rate": 0.00019999838401700632, "loss": 1.4217, "step": 100 }, { "epoch": 0.004042348411934553, "grad_norm": 0.7735599279403687, "learning_rate": 0.00019999820753229263, "loss": 1.4195, "step": 105 }, { "epoch": 0.004234841193455245, "grad_norm": 1.1776920557022095, "learning_rate": 0.0001999980219034053, "loss": 1.7147, "step": 110 }, { "epoch": 0.004427333974975939, "grad_norm": 1.156069278717041, "learning_rate": 0.0001999978271303613, "loss": 1.7, "step": 115 }, { "epoch": 0.0046198267564966315, "grad_norm": 1.2335503101348877, "learning_rate": 0.0001999976232131784, "loss": 1.3309, "step": 120 }, { "epoch": 0.004812319538017324, "grad_norm": 1.0332967042922974, "learning_rate": 0.0001999974101518753, "loss": 1.7515, "step": 125 }, { "epoch": 0.0050048123195380175, "grad_norm": 1.561087727546692, "learning_rate": 0.00019999718794647145, "loss": 1.5517, "step": 130 }, { "epoch": 0.00519730510105871, "grad_norm": 1.3611408472061157, "learning_rate": 0.00019999695659698717, "loss": 1.5771, "step": 135 }, { "epoch": 0.005389797882579404, "grad_norm": 1.5531154870986938, "learning_rate": 0.0001999967161034437, "loss": 1.4217, "step": 140 }, { "epoch": 0.005582290664100096, "grad_norm": 1.5827676057815552, "learning_rate": 0.00019999646646586287, "loss": 1.611, "step": 145 }, { "epoch": 0.005774783445620789, "grad_norm": 1.1693483591079712, "learning_rate": 0.00019999620768426763, "loss": 1.3961, "step": 150 }, { "epoch": 0.005967276227141482, "grad_norm": 1.4277936220169067, "learning_rate": 0.00019999593975868164, "loss": 1.638, "step": 155 }, { "epoch": 0.006159769008662175, "grad_norm": 1.2951083183288574, "learning_rate": 0.00019999566268912933, "loss": 1.6187, "step": 160 }, { "epoch": 0.0063522617901828685, "grad_norm": 2.4885995388031006, "learning_rate": 0.0001999953764756361, "loss": 1.5669, "step": 165 }, { "epoch": 0.006544754571703561, "grad_norm": 1.3352105617523193, "learning_rate": 0.00019999508111822811, "loss": 1.3157, "step": 170 }, { "epoch": 0.006737247353224254, "grad_norm": 1.2560889720916748, "learning_rate": 0.00019999477661693233, "loss": 1.7011, "step": 175 }, { "epoch": 0.006929740134744947, "grad_norm": 2.4167582988739014, "learning_rate": 0.00019999446297177666, "loss": 1.4827, "step": 180 }, { "epoch": 0.00712223291626564, "grad_norm": 1.0598788261413574, "learning_rate": 0.00019999414018278974, "loss": 1.5718, "step": 185 }, { "epoch": 0.007314725697786333, "grad_norm": 1.5576567649841309, "learning_rate": 0.00019999380825000111, "loss": 1.7717, "step": 190 }, { "epoch": 0.007507218479307026, "grad_norm": 1.005711317062378, "learning_rate": 0.0001999934671734411, "loss": 1.5085, "step": 195 }, { "epoch": 0.007699711260827719, "grad_norm": 1.7211413383483887, "learning_rate": 0.00019999311695314095, "loss": 1.623, "step": 200 }, { "epoch": 0.007892204042348411, "grad_norm": 1.5765767097473145, "learning_rate": 0.00019999275758913261, "loss": 1.5982, "step": 205 }, { "epoch": 0.008084696823869105, "grad_norm": 1.0989298820495605, "learning_rate": 0.00019999238908144896, "loss": 1.3306, "step": 210 }, { "epoch": 0.008277189605389798, "grad_norm": 1.0234464406967163, "learning_rate": 0.0001999920114301238, "loss": 1.5856, "step": 215 }, { "epoch": 0.00846968238691049, "grad_norm": 1.6681355237960815, "learning_rate": 0.0001999916246351915, "loss": 1.4777, "step": 220 }, { "epoch": 0.008662175168431183, "grad_norm": 0.9723508358001709, "learning_rate": 0.00019999122869668754, "loss": 1.5357, "step": 225 }, { "epoch": 0.008854667949951878, "grad_norm": 0.8840959072113037, "learning_rate": 0.0001999908236146481, "loss": 1.5296, "step": 230 }, { "epoch": 0.00904716073147257, "grad_norm": 0.9913238883018494, "learning_rate": 0.0001999904093891102, "loss": 1.5846, "step": 235 }, { "epoch": 0.009239653512993263, "grad_norm": 1.129952073097229, "learning_rate": 0.00019998998602011178, "loss": 1.4455, "step": 240 }, { "epoch": 0.009432146294513956, "grad_norm": 1.0377521514892578, "learning_rate": 0.00019998955350769148, "loss": 1.4212, "step": 245 }, { "epoch": 0.009624639076034648, "grad_norm": 2.2103137969970703, "learning_rate": 0.00019998911185188886, "loss": 1.5812, "step": 250 }, { "epoch": 0.009817131857555342, "grad_norm": 0.8716953992843628, "learning_rate": 0.00019998866105274437, "loss": 1.5326, "step": 255 }, { "epoch": 0.010009624639076035, "grad_norm": 1.1956042051315308, "learning_rate": 0.00019998820111029916, "loss": 1.7183, "step": 260 }, { "epoch": 0.010202117420596728, "grad_norm": 2.747600555419922, "learning_rate": 0.00019998773202459534, "loss": 1.7952, "step": 265 }, { "epoch": 0.01039461020211742, "grad_norm": 1.3412338495254517, "learning_rate": 0.00019998725379567577, "loss": 1.3538, "step": 270 }, { "epoch": 0.010587102983638113, "grad_norm": 1.651822805404663, "learning_rate": 0.00019998676642358422, "loss": 1.5458, "step": 275 }, { "epoch": 0.010779595765158807, "grad_norm": 1.3036198616027832, "learning_rate": 0.00019998626990836522, "loss": 1.7305, "step": 280 }, { "epoch": 0.0109720885466795, "grad_norm": 0.8263657093048096, "learning_rate": 0.00019998576425006416, "loss": 1.3767, "step": 285 }, { "epoch": 0.011164581328200193, "grad_norm": 2.022136926651001, "learning_rate": 0.00019998524944872737, "loss": 1.5823, "step": 290 }, { "epoch": 0.011357074109720885, "grad_norm": 1.1224019527435303, "learning_rate": 0.00019998472550440178, "loss": 1.5723, "step": 295 }, { "epoch": 0.011549566891241578, "grad_norm": 1.375664234161377, "learning_rate": 0.00019998419241713542, "loss": 1.5224, "step": 300 }, { "epoch": 0.011742059672762272, "grad_norm": 1.2721813917160034, "learning_rate": 0.000199983650186977, "loss": 1.7217, "step": 305 }, { "epoch": 0.011934552454282965, "grad_norm": 1.4723321199417114, "learning_rate": 0.0001999830988139761, "loss": 1.4666, "step": 310 }, { "epoch": 0.012127045235803657, "grad_norm": 0.695198118686676, "learning_rate": 0.00019998253829818315, "loss": 1.2672, "step": 315 }, { "epoch": 0.01231953801732435, "grad_norm": 1.716638207435608, "learning_rate": 0.00019998196863964937, "loss": 1.3461, "step": 320 }, { "epoch": 0.012512030798845043, "grad_norm": 1.1060154438018799, "learning_rate": 0.0001999813898384269, "loss": 1.3816, "step": 325 }, { "epoch": 0.012704523580365737, "grad_norm": 1.6124354600906372, "learning_rate": 0.00019998080189456862, "loss": 1.5232, "step": 330 }, { "epoch": 0.01289701636188643, "grad_norm": 1.5060306787490845, "learning_rate": 0.00019998020480812832, "loss": 1.5767, "step": 335 }, { "epoch": 0.013089509143407122, "grad_norm": 1.1920175552368164, "learning_rate": 0.00019997959857916063, "loss": 1.6112, "step": 340 }, { "epoch": 0.013282001924927815, "grad_norm": 1.1669896841049194, "learning_rate": 0.00019997898320772096, "loss": 1.3679, "step": 345 }, { "epoch": 0.013474494706448507, "grad_norm": 1.1692086458206177, "learning_rate": 0.00019997835869386553, "loss": 1.4147, "step": 350 }, { "epoch": 0.013666987487969202, "grad_norm": 2.0466034412384033, "learning_rate": 0.00019997772503765153, "loss": 1.5261, "step": 355 }, { "epoch": 0.013859480269489894, "grad_norm": 1.1581529378890991, "learning_rate": 0.00019997708223913686, "loss": 1.5441, "step": 360 }, { "epoch": 0.014051973051010587, "grad_norm": 1.4370143413543701, "learning_rate": 0.0001999764302983803, "loss": 1.651, "step": 365 }, { "epoch": 0.01424446583253128, "grad_norm": 0.998635470867157, "learning_rate": 0.00019997576921544147, "loss": 1.4311, "step": 370 }, { "epoch": 0.014436958614051972, "grad_norm": 1.2625153064727783, "learning_rate": 0.00019997509899038086, "loss": 1.4634, "step": 375 }, { "epoch": 0.014629451395572667, "grad_norm": 1.171949863433838, "learning_rate": 0.00019997441962325968, "loss": 1.2474, "step": 380 }, { "epoch": 0.01482194417709336, "grad_norm": 1.4312052726745605, "learning_rate": 0.00019997373111414009, "loss": 1.4814, "step": 385 }, { "epoch": 0.015014436958614052, "grad_norm": 1.1508846282958984, "learning_rate": 0.00019997303346308508, "loss": 1.6291, "step": 390 }, { "epoch": 0.015206929740134744, "grad_norm": 1.2096014022827148, "learning_rate": 0.0001999723266701584, "loss": 1.5507, "step": 395 }, { "epoch": 0.015399422521655439, "grad_norm": 0.996391773223877, "learning_rate": 0.00019997161073542473, "loss": 1.6402, "step": 400 }, { "epoch": 0.015591915303176131, "grad_norm": 1.6977828741073608, "learning_rate": 0.00019997088565894947, "loss": 1.5706, "step": 405 }, { "epoch": 0.015784408084696822, "grad_norm": 1.4707343578338623, "learning_rate": 0.000199970151440799, "loss": 1.6348, "step": 410 }, { "epoch": 0.015976900866217517, "grad_norm": 1.5461647510528564, "learning_rate": 0.0001999694080810404, "loss": 1.4836, "step": 415 }, { "epoch": 0.01616939364773821, "grad_norm": 1.6253695487976074, "learning_rate": 0.00019996865557974166, "loss": 1.5834, "step": 420 }, { "epoch": 0.016361886429258902, "grad_norm": 1.671321988105774, "learning_rate": 0.00019996789393697165, "loss": 1.3816, "step": 425 }, { "epoch": 0.016554379210779596, "grad_norm": 0.9412807822227478, "learning_rate": 0.00019996712315279992, "loss": 1.443, "step": 430 }, { "epoch": 0.016746871992300287, "grad_norm": 0.8705793023109436, "learning_rate": 0.000199966343227297, "loss": 1.4938, "step": 435 }, { "epoch": 0.01693936477382098, "grad_norm": 1.6019854545593262, "learning_rate": 0.00019996555416053422, "loss": 1.3622, "step": 440 }, { "epoch": 0.017131857555341676, "grad_norm": 1.0340136289596558, "learning_rate": 0.00019996475595258372, "loss": 1.5803, "step": 445 }, { "epoch": 0.017324350336862367, "grad_norm": 1.4469108581542969, "learning_rate": 0.0001999639486035185, "loss": 1.6074, "step": 450 }, { "epoch": 0.01751684311838306, "grad_norm": 1.3311457633972168, "learning_rate": 0.00019996313211341238, "loss": 1.5337, "step": 455 }, { "epoch": 0.017709335899903755, "grad_norm": 0.9691542387008667, "learning_rate": 0.00019996230648234003, "loss": 1.3835, "step": 460 }, { "epoch": 0.017901828681424446, "grad_norm": 1.0229564905166626, "learning_rate": 0.00019996147171037691, "loss": 1.4925, "step": 465 }, { "epoch": 0.01809432146294514, "grad_norm": 1.0120052099227905, "learning_rate": 0.00019996062779759942, "loss": 1.4781, "step": 470 }, { "epoch": 0.01828681424446583, "grad_norm": 0.8471246361732483, "learning_rate": 0.00019995977474408468, "loss": 1.4961, "step": 475 }, { "epoch": 0.018479307025986526, "grad_norm": 2.020277261734009, "learning_rate": 0.00019995891254991072, "loss": 1.5299, "step": 480 }, { "epoch": 0.01867179980750722, "grad_norm": 1.2169212102890015, "learning_rate": 0.00019995804121515637, "loss": 1.4626, "step": 485 }, { "epoch": 0.01886429258902791, "grad_norm": 2.31048321723938, "learning_rate": 0.00019995716073990133, "loss": 1.3653, "step": 490 }, { "epoch": 0.019056785370548605, "grad_norm": 1.8170429468154907, "learning_rate": 0.0001999562711242261, "loss": 1.3537, "step": 495 }, { "epoch": 0.019249278152069296, "grad_norm": 1.1187188625335693, "learning_rate": 0.00019995537236821198, "loss": 1.6358, "step": 500 }, { "epoch": 0.01944177093358999, "grad_norm": 1.2112963199615479, "learning_rate": 0.0001999544644719412, "loss": 1.4565, "step": 505 }, { "epoch": 0.019634263715110685, "grad_norm": 1.3345009088516235, "learning_rate": 0.0001999535474354968, "loss": 1.647, "step": 510 }, { "epoch": 0.019826756496631376, "grad_norm": 1.3109021186828613, "learning_rate": 0.00019995262125896266, "loss": 1.5462, "step": 515 }, { "epoch": 0.02001924927815207, "grad_norm": 1.1681957244873047, "learning_rate": 0.00019995168594242338, "loss": 1.5292, "step": 520 }, { "epoch": 0.02021174205967276, "grad_norm": 0.9509350657463074, "learning_rate": 0.00019995074148596457, "loss": 1.5566, "step": 525 }, { "epoch": 0.020404234841193455, "grad_norm": 0.6594029664993286, "learning_rate": 0.00019994978788967255, "loss": 1.3693, "step": 530 }, { "epoch": 0.02059672762271415, "grad_norm": 0.8029458522796631, "learning_rate": 0.00019994882515363452, "loss": 1.4664, "step": 535 }, { "epoch": 0.02078922040423484, "grad_norm": 1.1551908254623413, "learning_rate": 0.00019994785327793856, "loss": 1.5342, "step": 540 }, { "epoch": 0.020981713185755535, "grad_norm": 1.3600980043411255, "learning_rate": 0.0001999468722626735, "loss": 1.5262, "step": 545 }, { "epoch": 0.021174205967276226, "grad_norm": 1.0333319902420044, "learning_rate": 0.00019994588210792906, "loss": 1.5079, "step": 550 }, { "epoch": 0.02136669874879692, "grad_norm": 1.2757694721221924, "learning_rate": 0.00019994488281379578, "loss": 1.7721, "step": 555 }, { "epoch": 0.021559191530317615, "grad_norm": 1.1292661428451538, "learning_rate": 0.00019994387438036505, "loss": 1.5077, "step": 560 }, { "epoch": 0.021751684311838305, "grad_norm": 1.105522871017456, "learning_rate": 0.00019994285680772906, "loss": 1.6468, "step": 565 }, { "epoch": 0.021944177093359, "grad_norm": 1.6378583908081055, "learning_rate": 0.00019994183009598086, "loss": 1.5432, "step": 570 }, { "epoch": 0.02213666987487969, "grad_norm": 0.931384801864624, "learning_rate": 0.0001999407942452144, "loss": 1.3818, "step": 575 }, { "epoch": 0.022329162656400385, "grad_norm": 1.0986119508743286, "learning_rate": 0.0001999397492555243, "loss": 1.552, "step": 580 }, { "epoch": 0.02252165543792108, "grad_norm": 1.121957540512085, "learning_rate": 0.00019993869512700623, "loss": 1.5241, "step": 585 }, { "epoch": 0.02271414821944177, "grad_norm": 1.2508270740509033, "learning_rate": 0.00019993763185975646, "loss": 1.6431, "step": 590 }, { "epoch": 0.022906641000962465, "grad_norm": 1.293603777885437, "learning_rate": 0.00019993655945387234, "loss": 1.3788, "step": 595 }, { "epoch": 0.023099133782483156, "grad_norm": 1.3218696117401123, "learning_rate": 0.00019993547790945183, "loss": 1.398, "step": 600 }, { "epoch": 0.02329162656400385, "grad_norm": 0.8816308975219727, "learning_rate": 0.0001999343872265939, "loss": 1.4239, "step": 605 }, { "epoch": 0.023484119345524544, "grad_norm": 1.9127452373504639, "learning_rate": 0.00019993328740539824, "loss": 1.549, "step": 610 }, { "epoch": 0.023676612127045235, "grad_norm": 2.071992874145508, "learning_rate": 0.0001999321784459655, "loss": 1.6769, "step": 615 }, { "epoch": 0.02386910490856593, "grad_norm": 1.335153579711914, "learning_rate": 0.000199931060348397, "loss": 1.6157, "step": 620 }, { "epoch": 0.02406159769008662, "grad_norm": 1.1237496137619019, "learning_rate": 0.000199929933112795, "loss": 1.4733, "step": 625 }, { "epoch": 0.024254090471607315, "grad_norm": 1.2557927370071411, "learning_rate": 0.00019992879673926258, "loss": 1.3888, "step": 630 }, { "epoch": 0.02444658325312801, "grad_norm": 1.0877735614776611, "learning_rate": 0.00019992765122790371, "loss": 1.4241, "step": 635 }, { "epoch": 0.0246390760346487, "grad_norm": 1.0029325485229492, "learning_rate": 0.00019992649657882307, "loss": 1.6504, "step": 640 }, { "epoch": 0.024831568816169394, "grad_norm": 1.5832372903823853, "learning_rate": 0.00019992533279212626, "loss": 1.4662, "step": 645 }, { "epoch": 0.025024061597690085, "grad_norm": 1.1658433675765991, "learning_rate": 0.00019992415986791974, "loss": 1.3723, "step": 650 }, { "epoch": 0.02521655437921078, "grad_norm": 1.8895657062530518, "learning_rate": 0.00019992297780631072, "loss": 1.457, "step": 655 }, { "epoch": 0.025409047160731474, "grad_norm": 1.193961501121521, "learning_rate": 0.00019992178660740732, "loss": 1.623, "step": 660 }, { "epoch": 0.025601539942252165, "grad_norm": 0.9851275086402893, "learning_rate": 0.00019992058627131844, "loss": 1.6884, "step": 665 }, { "epoch": 0.02579403272377286, "grad_norm": 1.5353829860687256, "learning_rate": 0.00019991937679815386, "loss": 1.3246, "step": 670 }, { "epoch": 0.02598652550529355, "grad_norm": 1.2476325035095215, "learning_rate": 0.0001999181581880242, "loss": 1.596, "step": 675 }, { "epoch": 0.026179018286814244, "grad_norm": 1.1163430213928223, "learning_rate": 0.00019991693044104083, "loss": 1.5077, "step": 680 }, { "epoch": 0.02637151106833494, "grad_norm": 1.1388076543807983, "learning_rate": 0.0001999156935573161, "loss": 1.4827, "step": 685 }, { "epoch": 0.02656400384985563, "grad_norm": 0.9100907444953918, "learning_rate": 0.00019991444753696304, "loss": 1.3429, "step": 690 }, { "epoch": 0.026756496631376324, "grad_norm": 2.032510995864868, "learning_rate": 0.00019991319238009565, "loss": 1.5473, "step": 695 }, { "epoch": 0.026948989412897015, "grad_norm": 1.0866800546646118, "learning_rate": 0.00019991192808682868, "loss": 1.5552, "step": 700 }, { "epoch": 0.02714148219441771, "grad_norm": 1.3941971063613892, "learning_rate": 0.00019991065465727774, "loss": 1.4103, "step": 705 }, { "epoch": 0.027333974975938403, "grad_norm": 1.721247911453247, "learning_rate": 0.0001999093720915593, "loss": 1.4965, "step": 710 }, { "epoch": 0.027526467757459094, "grad_norm": 1.4090749025344849, "learning_rate": 0.00019990808038979058, "loss": 1.3159, "step": 715 }, { "epoch": 0.02771896053897979, "grad_norm": 1.731886625289917, "learning_rate": 0.00019990677955208973, "loss": 1.4392, "step": 720 }, { "epoch": 0.02791145332050048, "grad_norm": 1.9695488214492798, "learning_rate": 0.00019990546957857576, "loss": 1.6206, "step": 725 }, { "epoch": 0.028103946102021174, "grad_norm": 0.7977893352508545, "learning_rate": 0.0001999041504693684, "loss": 1.5764, "step": 730 }, { "epoch": 0.02829643888354187, "grad_norm": 0.9448668360710144, "learning_rate": 0.00019990282222458826, "loss": 1.3149, "step": 735 }, { "epoch": 0.02848893166506256, "grad_norm": 1.0612679719924927, "learning_rate": 0.00019990148484435682, "loss": 1.4942, "step": 740 }, { "epoch": 0.028681424446583254, "grad_norm": 1.4038052558898926, "learning_rate": 0.0001999001383287964, "loss": 1.5184, "step": 745 }, { "epoch": 0.028873917228103944, "grad_norm": 1.0545177459716797, "learning_rate": 0.0001998987826780301, "loss": 1.5617, "step": 750 }, { "epoch": 0.02906641000962464, "grad_norm": 2.392878532409668, "learning_rate": 0.0001998974178921819, "loss": 1.3638, "step": 755 }, { "epoch": 0.029258902791145333, "grad_norm": 1.1004624366760254, "learning_rate": 0.0001998960439713766, "loss": 1.5162, "step": 760 }, { "epoch": 0.029451395572666024, "grad_norm": 1.2530279159545898, "learning_rate": 0.0001998946609157398, "loss": 1.5422, "step": 765 }, { "epoch": 0.02964388835418672, "grad_norm": 0.8240470290184021, "learning_rate": 0.00019989326872539803, "loss": 1.3828, "step": 770 }, { "epoch": 0.029836381135707413, "grad_norm": 0.9734111428260803, "learning_rate": 0.00019989186740047857, "loss": 1.7041, "step": 775 }, { "epoch": 0.030028873917228104, "grad_norm": 0.9785217642784119, "learning_rate": 0.00019989045694110953, "loss": 1.6267, "step": 780 }, { "epoch": 0.030221366698748798, "grad_norm": 1.3278164863586426, "learning_rate": 0.00019988903734741994, "loss": 1.5041, "step": 785 }, { "epoch": 0.03041385948026949, "grad_norm": 1.9143437147140503, "learning_rate": 0.00019988760861953958, "loss": 1.4728, "step": 790 }, { "epoch": 0.030606352261790183, "grad_norm": 1.5717315673828125, "learning_rate": 0.0001998861707575991, "loss": 1.3824, "step": 795 }, { "epoch": 0.030798845043310877, "grad_norm": 1.0486010313034058, "learning_rate": 0.00019988472376173, "loss": 1.6186, "step": 800 }, { "epoch": 0.03099133782483157, "grad_norm": 1.1566083431243896, "learning_rate": 0.00019988326763206458, "loss": 1.3773, "step": 805 }, { "epoch": 0.031183830606352263, "grad_norm": 1.6336543560028076, "learning_rate": 0.00019988180236873602, "loss": 1.2998, "step": 810 }, { "epoch": 0.031376323387872954, "grad_norm": 1.4655206203460693, "learning_rate": 0.00019988032797187824, "loss": 1.3966, "step": 815 }, { "epoch": 0.031568816169393644, "grad_norm": 2.0325050354003906, "learning_rate": 0.00019987884444162618, "loss": 1.3464, "step": 820 }, { "epoch": 0.03176130895091434, "grad_norm": 1.254342794418335, "learning_rate": 0.0001998773517781154, "loss": 1.5236, "step": 825 }, { "epoch": 0.03195380173243503, "grad_norm": 0.8909908533096313, "learning_rate": 0.00019987584998148244, "loss": 1.4838, "step": 830 }, { "epoch": 0.032146294513955724, "grad_norm": 1.1440258026123047, "learning_rate": 0.00019987433905186458, "loss": 1.3952, "step": 835 }, { "epoch": 0.03233878729547642, "grad_norm": 1.2138668298721313, "learning_rate": 0.00019987281898940003, "loss": 1.5982, "step": 840 }, { "epoch": 0.03253128007699711, "grad_norm": 1.1847470998764038, "learning_rate": 0.00019987128979422782, "loss": 1.4313, "step": 845 }, { "epoch": 0.032723772858517804, "grad_norm": 1.4961762428283691, "learning_rate": 0.0001998697514664877, "loss": 1.5187, "step": 850 }, { "epoch": 0.0329162656400385, "grad_norm": 1.4735344648361206, "learning_rate": 0.00019986820400632043, "loss": 1.5443, "step": 855 }, { "epoch": 0.03310875842155919, "grad_norm": 1.1350771188735962, "learning_rate": 0.00019986664741386743, "loss": 1.5219, "step": 860 }, { "epoch": 0.03330125120307988, "grad_norm": 1.098781943321228, "learning_rate": 0.0001998650816892711, "loss": 1.6074, "step": 865 }, { "epoch": 0.033493743984600574, "grad_norm": 1.9639078378677368, "learning_rate": 0.0001998635068326746, "loss": 1.342, "step": 870 }, { "epoch": 0.03368623676612127, "grad_norm": 1.1193336248397827, "learning_rate": 0.00019986192284422193, "loss": 1.5647, "step": 875 }, { "epoch": 0.03387872954764196, "grad_norm": 1.0558106899261475, "learning_rate": 0.00019986032972405793, "loss": 1.2448, "step": 880 }, { "epoch": 0.034071222329162654, "grad_norm": 1.1178051233291626, "learning_rate": 0.0001998587274723283, "loss": 1.3455, "step": 885 }, { "epoch": 0.03426371511068335, "grad_norm": 1.728400468826294, "learning_rate": 0.0001998571160891795, "loss": 1.44, "step": 890 }, { "epoch": 0.03445620789220404, "grad_norm": 1.158931016921997, "learning_rate": 0.000199855495574759, "loss": 1.4247, "step": 895 }, { "epoch": 0.03464870067372473, "grad_norm": 1.8745627403259277, "learning_rate": 0.0001998538659292149, "loss": 1.4036, "step": 900 }, { "epoch": 0.03484119345524543, "grad_norm": 1.4273000955581665, "learning_rate": 0.0001998522271526962, "loss": 1.4857, "step": 905 }, { "epoch": 0.03503368623676612, "grad_norm": 1.1671931743621826, "learning_rate": 0.0001998505792453528, "loss": 1.7199, "step": 910 }, { "epoch": 0.03522617901828681, "grad_norm": 1.1703475713729858, "learning_rate": 0.00019984892220733537, "loss": 1.5659, "step": 915 }, { "epoch": 0.03541867179980751, "grad_norm": 0.8550274968147278, "learning_rate": 0.00019984725603879546, "loss": 1.3608, "step": 920 }, { "epoch": 0.0356111645813282, "grad_norm": 1.676072359085083, "learning_rate": 0.0001998455807398854, "loss": 1.4841, "step": 925 }, { "epoch": 0.03580365736284889, "grad_norm": 1.362423062324524, "learning_rate": 0.00019984389631075842, "loss": 1.5501, "step": 930 }, { "epoch": 0.03599615014436958, "grad_norm": 1.1643259525299072, "learning_rate": 0.0001998422027515685, "loss": 1.4954, "step": 935 }, { "epoch": 0.03618864292589028, "grad_norm": 1.4984415769577026, "learning_rate": 0.00019984050006247053, "loss": 1.337, "step": 940 }, { "epoch": 0.03638113570741097, "grad_norm": 1.399708867073059, "learning_rate": 0.00019983878824362023, "loss": 1.5546, "step": 945 }, { "epoch": 0.03657362848893166, "grad_norm": 1.8458516597747803, "learning_rate": 0.00019983706729517412, "loss": 1.5268, "step": 950 }, { "epoch": 0.03676612127045236, "grad_norm": 1.1428085565567017, "learning_rate": 0.00019983533721728956, "loss": 1.4454, "step": 955 }, { "epoch": 0.03695861405197305, "grad_norm": 1.2200374603271484, "learning_rate": 0.00019983359801012475, "loss": 1.5586, "step": 960 }, { "epoch": 0.03715110683349374, "grad_norm": 1.3679723739624023, "learning_rate": 0.00019983184967383875, "loss": 1.3948, "step": 965 }, { "epoch": 0.03734359961501444, "grad_norm": 1.489397644996643, "learning_rate": 0.00019983009220859142, "loss": 1.5154, "step": 970 }, { "epoch": 0.03753609239653513, "grad_norm": 1.0442456007003784, "learning_rate": 0.00019982832561454345, "loss": 1.5704, "step": 975 }, { "epoch": 0.03772858517805582, "grad_norm": 1.7480882406234741, "learning_rate": 0.00019982654989185642, "loss": 1.5235, "step": 980 }, { "epoch": 0.03792107795957651, "grad_norm": 1.0078760385513306, "learning_rate": 0.00019982476504069272, "loss": 1.3936, "step": 985 }, { "epoch": 0.03811357074109721, "grad_norm": 1.0461446046829224, "learning_rate": 0.0001998229710612155, "loss": 1.6994, "step": 990 }, { "epoch": 0.0383060635226179, "grad_norm": 2.1919922828674316, "learning_rate": 0.00019982116795358885, "loss": 1.5739, "step": 995 }, { "epoch": 0.03849855630413859, "grad_norm": 1.7092692852020264, "learning_rate": 0.00019981935571797768, "loss": 1.2746, "step": 1000 }, { "epoch": 0.03869104908565929, "grad_norm": 1.3044835329055786, "learning_rate": 0.00019981753435454764, "loss": 1.5254, "step": 1005 }, { "epoch": 0.03888354186717998, "grad_norm": 1.1550064086914062, "learning_rate": 0.0001998157038634653, "loss": 1.6154, "step": 1010 }, { "epoch": 0.03907603464870067, "grad_norm": 2.0250370502471924, "learning_rate": 0.00019981386424489808, "loss": 1.4807, "step": 1015 }, { "epoch": 0.03926852743022137, "grad_norm": 1.036095380783081, "learning_rate": 0.00019981201549901419, "loss": 1.4124, "step": 1020 }, { "epoch": 0.03946102021174206, "grad_norm": 1.126434564590454, "learning_rate": 0.0001998101576259827, "loss": 1.4959, "step": 1025 }, { "epoch": 0.03965351299326275, "grad_norm": 1.2912375926971436, "learning_rate": 0.00019980829062597342, "loss": 1.5006, "step": 1030 }, { "epoch": 0.03984600577478344, "grad_norm": 1.5378974676132202, "learning_rate": 0.00019980641449915713, "loss": 1.3073, "step": 1035 }, { "epoch": 0.04003849855630414, "grad_norm": 1.52741277217865, "learning_rate": 0.0001998045292457054, "loss": 1.3709, "step": 1040 }, { "epoch": 0.04023099133782483, "grad_norm": 1.6989667415618896, "learning_rate": 0.00019980263486579064, "loss": 1.4784, "step": 1045 }, { "epoch": 0.04042348411934552, "grad_norm": 1.0623974800109863, "learning_rate": 0.00019980073135958607, "loss": 1.5163, "step": 1050 }, { "epoch": 0.04061597690086622, "grad_norm": 1.323283314704895, "learning_rate": 0.0001997988187272657, "loss": 1.4793, "step": 1055 }, { "epoch": 0.04080846968238691, "grad_norm": 1.4508922100067139, "learning_rate": 0.00019979689696900447, "loss": 1.4746, "step": 1060 }, { "epoch": 0.0410009624639076, "grad_norm": 1.159579873085022, "learning_rate": 0.0001997949660849781, "loss": 1.2928, "step": 1065 }, { "epoch": 0.0411934552454283, "grad_norm": 1.5187591314315796, "learning_rate": 0.0001997930260753632, "loss": 1.5116, "step": 1070 }, { "epoch": 0.04138594802694899, "grad_norm": 1.7137175798416138, "learning_rate": 0.0001997910769403371, "loss": 1.6406, "step": 1075 }, { "epoch": 0.04157844080846968, "grad_norm": 1.221326470375061, "learning_rate": 0.00019978911868007807, "loss": 1.418, "step": 1080 }, { "epoch": 0.04177093358999037, "grad_norm": 1.0666981935501099, "learning_rate": 0.0001997871512947652, "loss": 1.3768, "step": 1085 }, { "epoch": 0.04196342637151107, "grad_norm": 0.9577809572219849, "learning_rate": 0.00019978517478457834, "loss": 1.4915, "step": 1090 }, { "epoch": 0.04215591915303176, "grad_norm": 2.3966264724731445, "learning_rate": 0.00019978318914969827, "loss": 1.7057, "step": 1095 }, { "epoch": 0.04234841193455245, "grad_norm": 1.0523775815963745, "learning_rate": 0.0001997811943903066, "loss": 1.3887, "step": 1100 }, { "epoch": 0.04254090471607315, "grad_norm": 1.3975977897644043, "learning_rate": 0.00019977919050658566, "loss": 1.5335, "step": 1105 }, { "epoch": 0.04273339749759384, "grad_norm": 1.5198701620101929, "learning_rate": 0.0001997771774987187, "loss": 1.3939, "step": 1110 }, { "epoch": 0.04292589027911453, "grad_norm": 0.7943345308303833, "learning_rate": 0.00019977515536688984, "loss": 1.5908, "step": 1115 }, { "epoch": 0.04311838306063523, "grad_norm": 0.9602519869804382, "learning_rate": 0.00019977312411128398, "loss": 1.3225, "step": 1120 }, { "epoch": 0.04331087584215592, "grad_norm": 1.0204732418060303, "learning_rate": 0.00019977108373208687, "loss": 1.518, "step": 1125 }, { "epoch": 0.04350336862367661, "grad_norm": 1.2130141258239746, "learning_rate": 0.00019976903422948503, "loss": 1.3693, "step": 1130 }, { "epoch": 0.0436958614051973, "grad_norm": 0.854958176612854, "learning_rate": 0.00019976697560366598, "loss": 1.4907, "step": 1135 }, { "epoch": 0.043888354186718, "grad_norm": 1.3699367046356201, "learning_rate": 0.00019976490785481789, "loss": 1.4448, "step": 1140 }, { "epoch": 0.04408084696823869, "grad_norm": 1.1766821146011353, "learning_rate": 0.00019976283098312983, "loss": 1.5171, "step": 1145 }, { "epoch": 0.04427333974975938, "grad_norm": 1.6543035507202148, "learning_rate": 0.00019976074498879174, "loss": 1.2751, "step": 1150 }, { "epoch": 0.04446583253128008, "grad_norm": 1.2228333950042725, "learning_rate": 0.0001997586498719944, "loss": 1.4522, "step": 1155 }, { "epoch": 0.04465832531280077, "grad_norm": 1.2733262777328491, "learning_rate": 0.00019975654563292937, "loss": 1.6292, "step": 1160 }, { "epoch": 0.04485081809432146, "grad_norm": 1.3934366703033447, "learning_rate": 0.00019975443227178904, "loss": 1.433, "step": 1165 }, { "epoch": 0.04504331087584216, "grad_norm": 1.5495753288269043, "learning_rate": 0.00019975230978876672, "loss": 1.5803, "step": 1170 }, { "epoch": 0.04523580365736285, "grad_norm": 1.0099114179611206, "learning_rate": 0.00019975017818405646, "loss": 1.3434, "step": 1175 }, { "epoch": 0.04542829643888354, "grad_norm": 0.9009067416191101, "learning_rate": 0.0001997480374578532, "loss": 1.2312, "step": 1180 }, { "epoch": 0.04562078922040423, "grad_norm": 1.8678425550460815, "learning_rate": 0.00019974588761035266, "loss": 1.6331, "step": 1185 }, { "epoch": 0.04581328200192493, "grad_norm": 0.8258862495422363, "learning_rate": 0.00019974372864175148, "loss": 1.4584, "step": 1190 }, { "epoch": 0.04600577478344562, "grad_norm": 1.44557523727417, "learning_rate": 0.00019974156055224706, "loss": 1.4866, "step": 1195 }, { "epoch": 0.04619826756496631, "grad_norm": 1.7249491214752197, "learning_rate": 0.00019973938334203763, "loss": 1.3704, "step": 1200 }, { "epoch": 0.04639076034648701, "grad_norm": 1.005623698234558, "learning_rate": 0.0001997371970113223, "loss": 1.1993, "step": 1205 }, { "epoch": 0.0465832531280077, "grad_norm": 1.4596670866012573, "learning_rate": 0.00019973500156030105, "loss": 1.4996, "step": 1210 }, { "epoch": 0.04677574590952839, "grad_norm": 1.3085503578186035, "learning_rate": 0.00019973279698917454, "loss": 1.441, "step": 1215 }, { "epoch": 0.04696823869104909, "grad_norm": 0.9477142691612244, "learning_rate": 0.00019973058329814445, "loss": 1.5278, "step": 1220 }, { "epoch": 0.04716073147256978, "grad_norm": 0.9040088653564453, "learning_rate": 0.00019972836048741318, "loss": 1.5374, "step": 1225 }, { "epoch": 0.04735322425409047, "grad_norm": 1.7435801029205322, "learning_rate": 0.00019972612855718395, "loss": 1.3884, "step": 1230 }, { "epoch": 0.04754571703561117, "grad_norm": 1.180665135383606, "learning_rate": 0.00019972388750766088, "loss": 1.2097, "step": 1235 }, { "epoch": 0.04773820981713186, "grad_norm": 1.066064715385437, "learning_rate": 0.00019972163733904895, "loss": 1.4299, "step": 1240 }, { "epoch": 0.04793070259865255, "grad_norm": 1.1051660776138306, "learning_rate": 0.00019971937805155382, "loss": 1.5055, "step": 1245 }, { "epoch": 0.04812319538017324, "grad_norm": 1.2021822929382324, "learning_rate": 0.0001997171096453822, "loss": 1.5842, "step": 1250 }, { "epoch": 0.04831568816169394, "grad_norm": 2.1715807914733887, "learning_rate": 0.00019971483212074146, "loss": 1.4096, "step": 1255 }, { "epoch": 0.04850818094321463, "grad_norm": 1.1615819931030273, "learning_rate": 0.00019971254547783987, "loss": 1.2554, "step": 1260 }, { "epoch": 0.04870067372473532, "grad_norm": 1.5363492965698242, "learning_rate": 0.00019971024971688652, "loss": 1.5773, "step": 1265 }, { "epoch": 0.04889316650625602, "grad_norm": 1.3774447441101074, "learning_rate": 0.00019970794483809137, "loss": 1.3441, "step": 1270 }, { "epoch": 0.04908565928777671, "grad_norm": 2.065901041030884, "learning_rate": 0.00019970563084166515, "loss": 1.6342, "step": 1275 }, { "epoch": 0.0492781520692974, "grad_norm": 1.3221025466918945, "learning_rate": 0.0001997033077278195, "loss": 1.4967, "step": 1280 }, { "epoch": 0.0494706448508181, "grad_norm": 1.6636276245117188, "learning_rate": 0.00019970097549676684, "loss": 1.4936, "step": 1285 }, { "epoch": 0.04966313763233879, "grad_norm": 1.4630615711212158, "learning_rate": 0.0001996986341487204, "loss": 1.4096, "step": 1290 }, { "epoch": 0.04985563041385948, "grad_norm": 1.9586588144302368, "learning_rate": 0.00019969628368389432, "loss": 1.5956, "step": 1295 }, { "epoch": 0.05004812319538017, "grad_norm": 1.0234311819076538, "learning_rate": 0.00019969392410250353, "loss": 1.247, "step": 1300 }, { "epoch": 0.05024061597690087, "grad_norm": 1.7005319595336914, "learning_rate": 0.0001996915554047638, "loss": 1.4179, "step": 1305 }, { "epoch": 0.05043310875842156, "grad_norm": 1.3052936792373657, "learning_rate": 0.0001996891775908917, "loss": 1.4002, "step": 1310 }, { "epoch": 0.05062560153994225, "grad_norm": 1.0146903991699219, "learning_rate": 0.00019968679066110473, "loss": 1.5062, "step": 1315 }, { "epoch": 0.05081809432146295, "grad_norm": 0.9611810445785522, "learning_rate": 0.00019968439461562104, "loss": 1.5303, "step": 1320 }, { "epoch": 0.05101058710298364, "grad_norm": 0.8518236875534058, "learning_rate": 0.0001996819894546599, "loss": 1.3589, "step": 1325 }, { "epoch": 0.05120307988450433, "grad_norm": 1.6918632984161377, "learning_rate": 0.00019967957517844111, "loss": 1.4589, "step": 1330 }, { "epoch": 0.05139557266602503, "grad_norm": 1.4838560819625854, "learning_rate": 0.00019967715178718551, "loss": 1.2714, "step": 1335 }, { "epoch": 0.05158806544754572, "grad_norm": 1.291231632232666, "learning_rate": 0.00019967471928111465, "loss": 1.6378, "step": 1340 }, { "epoch": 0.05178055822906641, "grad_norm": 1.2091941833496094, "learning_rate": 0.00019967227766045102, "loss": 1.3985, "step": 1345 }, { "epoch": 0.0519730510105871, "grad_norm": 1.2294058799743652, "learning_rate": 0.00019966982692541785, "loss": 1.498, "step": 1350 }, { "epoch": 0.0521655437921078, "grad_norm": 1.1644397974014282, "learning_rate": 0.00019966736707623928, "loss": 1.4185, "step": 1355 }, { "epoch": 0.05235803657362849, "grad_norm": 1.7669397592544556, "learning_rate": 0.0001996648981131402, "loss": 1.3564, "step": 1360 }, { "epoch": 0.05255052935514918, "grad_norm": 0.7178487777709961, "learning_rate": 0.00019966242003634644, "loss": 1.2015, "step": 1365 }, { "epoch": 0.05274302213666988, "grad_norm": 0.8149698376655579, "learning_rate": 0.00019965993284608457, "loss": 1.4046, "step": 1370 }, { "epoch": 0.05293551491819057, "grad_norm": 1.3934742212295532, "learning_rate": 0.00019965743654258198, "loss": 1.5289, "step": 1375 }, { "epoch": 0.05312800769971126, "grad_norm": 1.060002326965332, "learning_rate": 0.00019965493112606702, "loss": 1.391, "step": 1380 }, { "epoch": 0.05332050048123196, "grad_norm": 1.1154258251190186, "learning_rate": 0.00019965241659676875, "loss": 1.3004, "step": 1385 }, { "epoch": 0.05351299326275265, "grad_norm": 1.8101186752319336, "learning_rate": 0.00019964989295491713, "loss": 1.4968, "step": 1390 }, { "epoch": 0.05370548604427334, "grad_norm": 1.075211524963379, "learning_rate": 0.00019964736020074294, "loss": 1.5198, "step": 1395 }, { "epoch": 0.05389797882579403, "grad_norm": 2.0130980014801025, "learning_rate": 0.00019964481833447775, "loss": 1.5495, "step": 1400 }, { "epoch": 0.05409047160731473, "grad_norm": 1.214570164680481, "learning_rate": 0.000199642267356354, "loss": 1.5886, "step": 1405 }, { "epoch": 0.05428296438883542, "grad_norm": 1.6430037021636963, "learning_rate": 0.00019963970726660497, "loss": 1.5293, "step": 1410 }, { "epoch": 0.05447545717035611, "grad_norm": 0.94575035572052, "learning_rate": 0.00019963713806546478, "loss": 1.276, "step": 1415 }, { "epoch": 0.05466794995187681, "grad_norm": 1.1988322734832764, "learning_rate": 0.00019963455975316832, "loss": 1.3151, "step": 1420 }, { "epoch": 0.0548604427333975, "grad_norm": 1.2768787145614624, "learning_rate": 0.00019963197232995142, "loss": 1.5559, "step": 1425 }, { "epoch": 0.05505293551491819, "grad_norm": 1.5184259414672852, "learning_rate": 0.0001996293757960506, "loss": 1.2998, "step": 1430 }, { "epoch": 0.055245428296438887, "grad_norm": 6.240184783935547, "learning_rate": 0.0001996267701517034, "loss": 1.4497, "step": 1435 }, { "epoch": 0.05543792107795958, "grad_norm": 1.4356882572174072, "learning_rate": 0.00019962415539714803, "loss": 1.6364, "step": 1440 }, { "epoch": 0.05563041385948027, "grad_norm": 0.9310120940208435, "learning_rate": 0.00019962153153262358, "loss": 1.417, "step": 1445 }, { "epoch": 0.05582290664100096, "grad_norm": 1.2131333351135254, "learning_rate": 0.00019961889855837, "loss": 1.4059, "step": 1450 }, { "epoch": 0.05601539942252166, "grad_norm": 1.2134804725646973, "learning_rate": 0.00019961625647462808, "loss": 1.458, "step": 1455 }, { "epoch": 0.05620789220404235, "grad_norm": 1.5725634098052979, "learning_rate": 0.0001996136052816394, "loss": 1.352, "step": 1460 }, { "epoch": 0.05640038498556304, "grad_norm": 0.9882212281227112, "learning_rate": 0.00019961094497964642, "loss": 1.1665, "step": 1465 }, { "epoch": 0.05659287776708374, "grad_norm": 1.055966854095459, "learning_rate": 0.00019960827556889235, "loss": 1.388, "step": 1470 }, { "epoch": 0.05678537054860443, "grad_norm": 1.0809309482574463, "learning_rate": 0.00019960559704962133, "loss": 1.4287, "step": 1475 }, { "epoch": 0.05697786333012512, "grad_norm": 1.0014935731887817, "learning_rate": 0.00019960290942207828, "loss": 1.5539, "step": 1480 }, { "epoch": 0.057170356111645816, "grad_norm": 1.1717151403427124, "learning_rate": 0.000199600212686509, "loss": 1.3619, "step": 1485 }, { "epoch": 0.05736284889316651, "grad_norm": 1.3981553316116333, "learning_rate": 0.00019959750684316, "loss": 1.3303, "step": 1490 }, { "epoch": 0.0575553416746872, "grad_norm": 0.7471413016319275, "learning_rate": 0.00019959479189227884, "loss": 1.4048, "step": 1495 }, { "epoch": 0.05774783445620789, "grad_norm": 1.1570223569869995, "learning_rate": 0.00019959206783411372, "loss": 1.6713, "step": 1500 }, { "epoch": 0.05794032723772859, "grad_norm": 1.4656585454940796, "learning_rate": 0.00019958933466891366, "loss": 1.3911, "step": 1505 }, { "epoch": 0.05813282001924928, "grad_norm": 1.5338329076766968, "learning_rate": 0.0001995865923969287, "loss": 1.578, "step": 1510 }, { "epoch": 0.05832531280076997, "grad_norm": 0.9481655955314636, "learning_rate": 0.0001995838410184096, "loss": 1.2903, "step": 1515 }, { "epoch": 0.058517805582290666, "grad_norm": 1.4928970336914062, "learning_rate": 0.00019958108053360788, "loss": 1.4139, "step": 1520 }, { "epoch": 0.05871029836381136, "grad_norm": 1.015381932258606, "learning_rate": 0.00019957831094277604, "loss": 1.5427, "step": 1525 }, { "epoch": 0.05890279114533205, "grad_norm": 1.3471331596374512, "learning_rate": 0.0001995755322461673, "loss": 1.3763, "step": 1530 }, { "epoch": 0.059095283926852746, "grad_norm": 2.0942165851593018, "learning_rate": 0.00019957274444403576, "loss": 1.4669, "step": 1535 }, { "epoch": 0.05928777670837344, "grad_norm": 1.4853599071502686, "learning_rate": 0.00019956994753663634, "loss": 1.4259, "step": 1540 }, { "epoch": 0.05948026948989413, "grad_norm": 1.3337596654891968, "learning_rate": 0.0001995671415242248, "loss": 1.4169, "step": 1545 }, { "epoch": 0.059672762271414825, "grad_norm": 1.3816536664962769, "learning_rate": 0.00019956432640705777, "loss": 1.3679, "step": 1550 }, { "epoch": 0.059865255052935516, "grad_norm": 1.1726235151290894, "learning_rate": 0.00019956150218539262, "loss": 1.4076, "step": 1555 }, { "epoch": 0.06005774783445621, "grad_norm": 1.419520378112793, "learning_rate": 0.00019955866885948764, "loss": 1.3621, "step": 1560 }, { "epoch": 0.0602502406159769, "grad_norm": 1.4154486656188965, "learning_rate": 0.0001995558264296019, "loss": 1.4221, "step": 1565 }, { "epoch": 0.060442733397497596, "grad_norm": 1.4721988439559937, "learning_rate": 0.00019955297489599537, "loss": 1.3641, "step": 1570 }, { "epoch": 0.06063522617901829, "grad_norm": 1.1087952852249146, "learning_rate": 0.0001995501142589287, "loss": 1.3734, "step": 1575 }, { "epoch": 0.06082771896053898, "grad_norm": 1.4815518856048584, "learning_rate": 0.00019954724451866357, "loss": 1.4042, "step": 1580 }, { "epoch": 0.061020211742059675, "grad_norm": 1.835754632949829, "learning_rate": 0.00019954436567546236, "loss": 1.2457, "step": 1585 }, { "epoch": 0.061212704523580366, "grad_norm": 1.3139601945877075, "learning_rate": 0.00019954147772958836, "loss": 1.4457, "step": 1590 }, { "epoch": 0.06140519730510106, "grad_norm": 1.155369758605957, "learning_rate": 0.0001995385806813056, "loss": 1.3483, "step": 1595 }, { "epoch": 0.061597690086621755, "grad_norm": 1.1897907257080078, "learning_rate": 0.00019953567453087902, "loss": 1.467, "step": 1600 }, { "epoch": 0.061790182868142446, "grad_norm": 1.0794181823730469, "learning_rate": 0.00019953275927857438, "loss": 1.5171, "step": 1605 }, { "epoch": 0.06198267564966314, "grad_norm": 0.9538444876670837, "learning_rate": 0.00019952983492465824, "loss": 1.2643, "step": 1610 }, { "epoch": 0.06217516843118383, "grad_norm": 1.1179461479187012, "learning_rate": 0.00019952690146939804, "loss": 1.408, "step": 1615 }, { "epoch": 0.062367661212704525, "grad_norm": 1.8034144639968872, "learning_rate": 0.00019952395891306197, "loss": 1.3685, "step": 1620 }, { "epoch": 0.06256015399422522, "grad_norm": 1.04547119140625, "learning_rate": 0.00019952100725591912, "loss": 1.4271, "step": 1625 }, { "epoch": 0.06275264677574591, "grad_norm": 1.3097724914550781, "learning_rate": 0.00019951804649823949, "loss": 1.3303, "step": 1630 }, { "epoch": 0.0629451395572666, "grad_norm": 1.8794469833374023, "learning_rate": 0.00019951507664029374, "loss": 1.5223, "step": 1635 }, { "epoch": 0.06313763233878729, "grad_norm": 1.4077703952789307, "learning_rate": 0.00019951209768235344, "loss": 1.5582, "step": 1640 }, { "epoch": 0.06333012512030799, "grad_norm": 1.2244471311569214, "learning_rate": 0.000199509109624691, "loss": 1.3437, "step": 1645 }, { "epoch": 0.06352261790182868, "grad_norm": 1.4610791206359863, "learning_rate": 0.00019950611246757972, "loss": 1.6944, "step": 1650 }, { "epoch": 0.06371511068334937, "grad_norm": 1.544989824295044, "learning_rate": 0.00019950310621129358, "loss": 1.3288, "step": 1655 }, { "epoch": 0.06390760346487007, "grad_norm": 1.4837945699691772, "learning_rate": 0.00019950009085610755, "loss": 1.1296, "step": 1660 }, { "epoch": 0.06410009624639076, "grad_norm": 2.2527410984039307, "learning_rate": 0.0001994970664022973, "loss": 1.3105, "step": 1665 }, { "epoch": 0.06429258902791145, "grad_norm": 1.3723945617675781, "learning_rate": 0.00019949403285013948, "loss": 1.3976, "step": 1670 }, { "epoch": 0.06448508180943215, "grad_norm": 1.571265459060669, "learning_rate": 0.0001994909901999114, "loss": 1.4603, "step": 1675 }, { "epoch": 0.06467757459095284, "grad_norm": 1.2445194721221924, "learning_rate": 0.00019948793845189137, "loss": 1.3072, "step": 1680 }, { "epoch": 0.06487006737247353, "grad_norm": 2.068112373352051, "learning_rate": 0.00019948487760635842, "loss": 1.4638, "step": 1685 }, { "epoch": 0.06506256015399423, "grad_norm": 1.0896637439727783, "learning_rate": 0.00019948180766359244, "loss": 1.3184, "step": 1690 }, { "epoch": 0.06525505293551492, "grad_norm": 2.0666351318359375, "learning_rate": 0.00019947872862387413, "loss": 1.3944, "step": 1695 }, { "epoch": 0.06544754571703561, "grad_norm": 1.5204085111618042, "learning_rate": 0.00019947564048748508, "loss": 1.3795, "step": 1700 }, { "epoch": 0.0656400384985563, "grad_norm": 0.9768043160438538, "learning_rate": 0.00019947254325470768, "loss": 1.3329, "step": 1705 }, { "epoch": 0.065832531280077, "grad_norm": 1.3453469276428223, "learning_rate": 0.00019946943692582516, "loss": 1.304, "step": 1710 }, { "epoch": 0.06602502406159769, "grad_norm": 1.0725489854812622, "learning_rate": 0.00019946632150112152, "loss": 1.5547, "step": 1715 }, { "epoch": 0.06621751684311838, "grad_norm": 1.5973418951034546, "learning_rate": 0.0001994631969808817, "loss": 1.3263, "step": 1720 }, { "epoch": 0.06641000962463908, "grad_norm": 1.2451751232147217, "learning_rate": 0.0001994600633653914, "loss": 1.4935, "step": 1725 }, { "epoch": 0.06660250240615977, "grad_norm": 1.3474830389022827, "learning_rate": 0.00019945692065493717, "loss": 1.6282, "step": 1730 }, { "epoch": 0.06679499518768046, "grad_norm": 1.7913939952850342, "learning_rate": 0.00019945376884980643, "loss": 1.2935, "step": 1735 }, { "epoch": 0.06698748796920115, "grad_norm": 1.0764446258544922, "learning_rate": 0.00019945060795028728, "loss": 1.6034, "step": 1740 }, { "epoch": 0.06717998075072185, "grad_norm": 1.0572975873947144, "learning_rate": 0.00019944743795666887, "loss": 1.3997, "step": 1745 }, { "epoch": 0.06737247353224254, "grad_norm": 1.3195079565048218, "learning_rate": 0.00019944425886924102, "loss": 1.4838, "step": 1750 }, { "epoch": 0.06756496631376323, "grad_norm": 1.0044989585876465, "learning_rate": 0.00019944107068829448, "loss": 1.388, "step": 1755 }, { "epoch": 0.06775745909528393, "grad_norm": 1.8276032209396362, "learning_rate": 0.0001994378734141207, "loss": 1.447, "step": 1760 }, { "epoch": 0.06794995187680462, "grad_norm": 1.5056366920471191, "learning_rate": 0.00019943466704701218, "loss": 1.5153, "step": 1765 }, { "epoch": 0.06814244465832531, "grad_norm": 1.6947304010391235, "learning_rate": 0.00019943145158726205, "loss": 1.5551, "step": 1770 }, { "epoch": 0.068334937439846, "grad_norm": 0.9702686667442322, "learning_rate": 0.00019942822703516433, "loss": 1.3168, "step": 1775 }, { "epoch": 0.0685274302213667, "grad_norm": 1.6755216121673584, "learning_rate": 0.0001994249933910139, "loss": 1.6223, "step": 1780 }, { "epoch": 0.06871992300288739, "grad_norm": 1.3666303157806396, "learning_rate": 0.00019942175065510643, "loss": 1.5748, "step": 1785 }, { "epoch": 0.06891241578440808, "grad_norm": 1.3785196542739868, "learning_rate": 0.0001994184988277385, "loss": 1.4033, "step": 1790 }, { "epoch": 0.06910490856592878, "grad_norm": 1.081828236579895, "learning_rate": 0.00019941523790920743, "loss": 1.4, "step": 1795 }, { "epoch": 0.06929740134744947, "grad_norm": 1.1024401187896729, "learning_rate": 0.0001994119678998114, "loss": 1.4751, "step": 1800 }, { "epoch": 0.06948989412897016, "grad_norm": 3.584055185317993, "learning_rate": 0.0001994086887998495, "loss": 1.3449, "step": 1805 }, { "epoch": 0.06968238691049086, "grad_norm": 0.9418397545814514, "learning_rate": 0.0001994054006096215, "loss": 1.3217, "step": 1810 }, { "epoch": 0.06987487969201155, "grad_norm": 1.6071193218231201, "learning_rate": 0.00019940210332942813, "loss": 1.3636, "step": 1815 }, { "epoch": 0.07006737247353224, "grad_norm": 2.0080580711364746, "learning_rate": 0.00019939879695957084, "loss": 1.4779, "step": 1820 }, { "epoch": 0.07025986525505294, "grad_norm": 1.169058918952942, "learning_rate": 0.00019939548150035207, "loss": 1.4031, "step": 1825 }, { "epoch": 0.07045235803657363, "grad_norm": 0.9863006472587585, "learning_rate": 0.00019939215695207496, "loss": 1.3832, "step": 1830 }, { "epoch": 0.07064485081809432, "grad_norm": 1.2257460355758667, "learning_rate": 0.00019938882331504347, "loss": 1.4967, "step": 1835 }, { "epoch": 0.07083734359961502, "grad_norm": 1.0062893629074097, "learning_rate": 0.00019938548058956253, "loss": 1.2637, "step": 1840 }, { "epoch": 0.0710298363811357, "grad_norm": 1.4179530143737793, "learning_rate": 0.0001993821287759377, "loss": 1.2961, "step": 1845 }, { "epoch": 0.0712223291626564, "grad_norm": 1.2181779146194458, "learning_rate": 0.00019937876787447557, "loss": 1.4104, "step": 1850 }, { "epoch": 0.07141482194417709, "grad_norm": 1.6110061407089233, "learning_rate": 0.00019937539788548344, "loss": 1.4045, "step": 1855 }, { "epoch": 0.07160731472569778, "grad_norm": 1.2814903259277344, "learning_rate": 0.0001993720188092695, "loss": 1.4194, "step": 1860 }, { "epoch": 0.07179980750721848, "grad_norm": 1.382265329360962, "learning_rate": 0.00019936863064614268, "loss": 1.5848, "step": 1865 }, { "epoch": 0.07199230028873917, "grad_norm": 1.4708553552627563, "learning_rate": 0.00019936523339641286, "loss": 1.6196, "step": 1870 }, { "epoch": 0.07218479307025986, "grad_norm": 1.0691862106323242, "learning_rate": 0.0001993618270603907, "loss": 1.4939, "step": 1875 }, { "epoch": 0.07237728585178056, "grad_norm": 0.9476374387741089, "learning_rate": 0.0001993584116383876, "loss": 1.5043, "step": 1880 }, { "epoch": 0.07256977863330125, "grad_norm": 1.37090003490448, "learning_rate": 0.000199354987130716, "loss": 1.4371, "step": 1885 }, { "epoch": 0.07276227141482194, "grad_norm": 1.2001820802688599, "learning_rate": 0.000199351553537689, "loss": 1.3048, "step": 1890 }, { "epoch": 0.07295476419634264, "grad_norm": 1.1123398542404175, "learning_rate": 0.00019934811085962055, "loss": 1.4398, "step": 1895 }, { "epoch": 0.07314725697786333, "grad_norm": 1.638574242591858, "learning_rate": 0.0001993446590968255, "loss": 1.3563, "step": 1900 }, { "epoch": 0.07333974975938402, "grad_norm": 1.9532630443572998, "learning_rate": 0.00019934119824961948, "loss": 1.3723, "step": 1905 }, { "epoch": 0.07353224254090472, "grad_norm": 1.3247241973876953, "learning_rate": 0.0001993377283183189, "loss": 1.4474, "step": 1910 }, { "epoch": 0.0737247353224254, "grad_norm": 1.203049659729004, "learning_rate": 0.00019933424930324118, "loss": 1.3347, "step": 1915 }, { "epoch": 0.0739172281039461, "grad_norm": 1.8858312368392944, "learning_rate": 0.00019933076120470436, "loss": 1.4754, "step": 1920 }, { "epoch": 0.0741097208854668, "grad_norm": 1.117814540863037, "learning_rate": 0.00019932726402302744, "loss": 1.4828, "step": 1925 }, { "epoch": 0.07430221366698748, "grad_norm": 1.0317554473876953, "learning_rate": 0.00019932375775853021, "loss": 1.5034, "step": 1930 }, { "epoch": 0.07449470644850818, "grad_norm": 2.315903902053833, "learning_rate": 0.00019932024241153332, "loss": 1.4311, "step": 1935 }, { "epoch": 0.07468719923002888, "grad_norm": 1.5780115127563477, "learning_rate": 0.00019931671798235817, "loss": 1.3917, "step": 1940 }, { "epoch": 0.07487969201154956, "grad_norm": 1.3360038995742798, "learning_rate": 0.00019931318447132706, "loss": 1.3634, "step": 1945 }, { "epoch": 0.07507218479307026, "grad_norm": 2.275620937347412, "learning_rate": 0.00019930964187876314, "loss": 1.414, "step": 1950 }, { "epoch": 0.07526467757459095, "grad_norm": 1.7956300973892212, "learning_rate": 0.00019930609020499032, "loss": 1.5117, "step": 1955 }, { "epoch": 0.07545717035611164, "grad_norm": 1.6429657936096191, "learning_rate": 0.0001993025294503334, "loss": 1.4436, "step": 1960 }, { "epoch": 0.07564966313763234, "grad_norm": 1.432246446609497, "learning_rate": 0.000199298959615118, "loss": 1.3952, "step": 1965 }, { "epoch": 0.07584215591915303, "grad_norm": 1.0579869747161865, "learning_rate": 0.00019929538069967051, "loss": 1.4369, "step": 1970 }, { "epoch": 0.07603464870067372, "grad_norm": 1.766543984413147, "learning_rate": 0.00019929179270431824, "loss": 1.5033, "step": 1975 }, { "epoch": 0.07622714148219442, "grad_norm": 1.0774848461151123, "learning_rate": 0.00019928819562938928, "loss": 1.3399, "step": 1980 }, { "epoch": 0.0764196342637151, "grad_norm": 1.0951963663101196, "learning_rate": 0.00019928458947521252, "loss": 1.3656, "step": 1985 }, { "epoch": 0.0766121270452358, "grad_norm": 1.278283953666687, "learning_rate": 0.0001992809742421178, "loss": 1.3467, "step": 1990 }, { "epoch": 0.0768046198267565, "grad_norm": 1.139508605003357, "learning_rate": 0.00019927734993043566, "loss": 1.4316, "step": 1995 }, { "epoch": 0.07699711260827719, "grad_norm": 1.39482581615448, "learning_rate": 0.00019927371654049748, "loss": 1.2032, "step": 2000 }, { "epoch": 0.07718960538979788, "grad_norm": 0.9154567718505859, "learning_rate": 0.0001992700740726356, "loss": 1.5053, "step": 2005 }, { "epoch": 0.07738209817131858, "grad_norm": 1.5105671882629395, "learning_rate": 0.00019926642252718303, "loss": 1.5059, "step": 2010 }, { "epoch": 0.07757459095283926, "grad_norm": 1.4019540548324585, "learning_rate": 0.00019926276190447367, "loss": 1.4051, "step": 2015 }, { "epoch": 0.07776708373435996, "grad_norm": 1.619841456413269, "learning_rate": 0.00019925909220484234, "loss": 1.1784, "step": 2020 }, { "epoch": 0.07795957651588066, "grad_norm": 1.6128195524215698, "learning_rate": 0.0001992554134286245, "loss": 1.4623, "step": 2025 }, { "epoch": 0.07815206929740134, "grad_norm": 1.2766104936599731, "learning_rate": 0.00019925172557615665, "loss": 1.3162, "step": 2030 }, { "epoch": 0.07834456207892204, "grad_norm": 1.2187426090240479, "learning_rate": 0.00019924802864777598, "loss": 1.2874, "step": 2035 }, { "epoch": 0.07853705486044274, "grad_norm": 1.1050268411636353, "learning_rate": 0.00019924432264382055, "loss": 1.433, "step": 2040 }, { "epoch": 0.07872954764196342, "grad_norm": 1.6128287315368652, "learning_rate": 0.00019924060756462925, "loss": 1.4698, "step": 2045 }, { "epoch": 0.07892204042348412, "grad_norm": 1.6588749885559082, "learning_rate": 0.00019923688341054176, "loss": 1.4972, "step": 2050 }, { "epoch": 0.0791145332050048, "grad_norm": 1.135289192199707, "learning_rate": 0.0001992331501818987, "loss": 1.3991, "step": 2055 }, { "epoch": 0.0793070259865255, "grad_norm": 1.757759928703308, "learning_rate": 0.00019922940787904137, "loss": 1.3736, "step": 2060 }, { "epoch": 0.0794995187680462, "grad_norm": 0.9943239092826843, "learning_rate": 0.00019922565650231207, "loss": 1.4476, "step": 2065 }, { "epoch": 0.07969201154956689, "grad_norm": 0.9459586143493652, "learning_rate": 0.00019922189605205379, "loss": 1.3913, "step": 2070 }, { "epoch": 0.07988450433108758, "grad_norm": 1.2325133085250854, "learning_rate": 0.00019921812652861037, "loss": 1.4658, "step": 2075 }, { "epoch": 0.08007699711260828, "grad_norm": 1.2397321462631226, "learning_rate": 0.00019921434793232658, "loss": 1.2552, "step": 2080 }, { "epoch": 0.08026948989412896, "grad_norm": 0.9636020660400391, "learning_rate": 0.0001992105602635479, "loss": 1.3296, "step": 2085 }, { "epoch": 0.08046198267564966, "grad_norm": 0.900841474533081, "learning_rate": 0.00019920676352262067, "loss": 1.2329, "step": 2090 }, { "epoch": 0.08065447545717036, "grad_norm": 1.0425807237625122, "learning_rate": 0.00019920295770989213, "loss": 1.1604, "step": 2095 }, { "epoch": 0.08084696823869104, "grad_norm": 1.1449722051620483, "learning_rate": 0.00019919914282571024, "loss": 1.3233, "step": 2100 }, { "epoch": 0.08103946102021174, "grad_norm": 1.2076728343963623, "learning_rate": 0.00019919531887042387, "loss": 1.3449, "step": 2105 }, { "epoch": 0.08123195380173244, "grad_norm": 0.968323826789856, "learning_rate": 0.00019919148584438272, "loss": 1.4273, "step": 2110 }, { "epoch": 0.08142444658325312, "grad_norm": 1.7322039604187012, "learning_rate": 0.00019918764374793726, "loss": 1.4994, "step": 2115 }, { "epoch": 0.08161693936477382, "grad_norm": 1.4216794967651367, "learning_rate": 0.00019918379258143884, "loss": 1.4071, "step": 2120 }, { "epoch": 0.08180943214629452, "grad_norm": 1.2262970209121704, "learning_rate": 0.00019917993234523963, "loss": 1.3528, "step": 2125 }, { "epoch": 0.0820019249278152, "grad_norm": 1.3137859106063843, "learning_rate": 0.0001991760630396926, "loss": 1.4367, "step": 2130 }, { "epoch": 0.0821944177093359, "grad_norm": 1.364478588104248, "learning_rate": 0.00019917218466515156, "loss": 1.6896, "step": 2135 }, { "epoch": 0.0823869104908566, "grad_norm": 1.2037614583969116, "learning_rate": 0.00019916829722197124, "loss": 1.5371, "step": 2140 }, { "epoch": 0.08257940327237728, "grad_norm": 1.7590453624725342, "learning_rate": 0.00019916440071050706, "loss": 1.6331, "step": 2145 }, { "epoch": 0.08277189605389798, "grad_norm": 1.6112565994262695, "learning_rate": 0.00019916049513111532, "loss": 1.5066, "step": 2150 }, { "epoch": 0.08296438883541868, "grad_norm": 0.937174916267395, "learning_rate": 0.00019915658048415318, "loss": 1.4698, "step": 2155 }, { "epoch": 0.08315688161693936, "grad_norm": 1.8568309545516968, "learning_rate": 0.00019915265676997862, "loss": 1.3197, "step": 2160 }, { "epoch": 0.08334937439846006, "grad_norm": 1.9865350723266602, "learning_rate": 0.00019914872398895043, "loss": 1.4883, "step": 2165 }, { "epoch": 0.08354186717998074, "grad_norm": 1.0227729082107544, "learning_rate": 0.0001991447821414282, "loss": 1.3967, "step": 2170 }, { "epoch": 0.08373435996150144, "grad_norm": 1.3028923273086548, "learning_rate": 0.00019914083122777245, "loss": 1.4296, "step": 2175 }, { "epoch": 0.08392685274302214, "grad_norm": 1.6131690740585327, "learning_rate": 0.00019913687124834442, "loss": 1.2983, "step": 2180 }, { "epoch": 0.08411934552454282, "grad_norm": 1.1791858673095703, "learning_rate": 0.00019913290220350622, "loss": 1.4632, "step": 2185 }, { "epoch": 0.08431183830606352, "grad_norm": 1.8457857370376587, "learning_rate": 0.00019912892409362085, "loss": 1.3623, "step": 2190 }, { "epoch": 0.08450433108758422, "grad_norm": 1.525680422782898, "learning_rate": 0.00019912493691905198, "loss": 1.2729, "step": 2195 }, { "epoch": 0.0846968238691049, "grad_norm": 1.3267451524734497, "learning_rate": 0.0001991209406801643, "loss": 1.3808, "step": 2200 }, { "epoch": 0.0848893166506256, "grad_norm": 1.37312912940979, "learning_rate": 0.00019911693537732323, "loss": 1.6072, "step": 2205 }, { "epoch": 0.0850818094321463, "grad_norm": 1.3433706760406494, "learning_rate": 0.000199112921010895, "loss": 1.4956, "step": 2210 }, { "epoch": 0.08527430221366698, "grad_norm": 1.220732569694519, "learning_rate": 0.00019910889758124672, "loss": 1.4875, "step": 2215 }, { "epoch": 0.08546679499518768, "grad_norm": 0.9385544657707214, "learning_rate": 0.00019910486508874627, "loss": 1.4202, "step": 2220 }, { "epoch": 0.08565928777670838, "grad_norm": 0.8727134466171265, "learning_rate": 0.0001991008235337624, "loss": 1.2268, "step": 2225 }, { "epoch": 0.08585178055822906, "grad_norm": 2.276063919067383, "learning_rate": 0.00019909677291666473, "loss": 1.3911, "step": 2230 }, { "epoch": 0.08604427333974976, "grad_norm": 1.2023353576660156, "learning_rate": 0.00019909271323782364, "loss": 1.4754, "step": 2235 }, { "epoch": 0.08623676612127046, "grad_norm": 0.9018556475639343, "learning_rate": 0.00019908864449761033, "loss": 1.4073, "step": 2240 }, { "epoch": 0.08642925890279114, "grad_norm": 1.2011221647262573, "learning_rate": 0.00019908456669639687, "loss": 1.3213, "step": 2245 }, { "epoch": 0.08662175168431184, "grad_norm": 1.9858746528625488, "learning_rate": 0.0001990804798345562, "loss": 1.3403, "step": 2250 }, { "epoch": 0.08681424446583254, "grad_norm": 1.0072557926177979, "learning_rate": 0.000199076383912462, "loss": 1.3387, "step": 2255 }, { "epoch": 0.08700673724735322, "grad_norm": 1.4516913890838623, "learning_rate": 0.00019907227893048877, "loss": 1.3755, "step": 2260 }, { "epoch": 0.08719923002887392, "grad_norm": 1.0636364221572876, "learning_rate": 0.00019906816488901195, "loss": 1.2495, "step": 2265 }, { "epoch": 0.0873917228103946, "grad_norm": 1.8495078086853027, "learning_rate": 0.0001990640417884077, "loss": 1.4166, "step": 2270 }, { "epoch": 0.0875842155919153, "grad_norm": 2.327951431274414, "learning_rate": 0.00019905990962905312, "loss": 1.3934, "step": 2275 }, { "epoch": 0.087776708373436, "grad_norm": 1.5719425678253174, "learning_rate": 0.00019905576841132595, "loss": 1.3932, "step": 2280 }, { "epoch": 0.08796920115495668, "grad_norm": 1.5799787044525146, "learning_rate": 0.000199051618135605, "loss": 1.5148, "step": 2285 }, { "epoch": 0.08816169393647738, "grad_norm": 0.7972100377082825, "learning_rate": 0.00019904745880226966, "loss": 1.2456, "step": 2290 }, { "epoch": 0.08835418671799808, "grad_norm": 1.4252464771270752, "learning_rate": 0.00019904329041170042, "loss": 1.4287, "step": 2295 }, { "epoch": 0.08854667949951876, "grad_norm": 1.5532910823822021, "learning_rate": 0.00019903911296427834, "loss": 1.3685, "step": 2300 }, { "epoch": 0.08873917228103946, "grad_norm": 1.3019160032272339, "learning_rate": 0.00019903492646038544, "loss": 1.3928, "step": 2305 }, { "epoch": 0.08893166506256016, "grad_norm": 1.7292853593826294, "learning_rate": 0.00019903073090040457, "loss": 1.369, "step": 2310 }, { "epoch": 0.08912415784408084, "grad_norm": 1.1780908107757568, "learning_rate": 0.00019902652628471938, "loss": 1.2541, "step": 2315 }, { "epoch": 0.08931665062560154, "grad_norm": 1.353721261024475, "learning_rate": 0.00019902231261371433, "loss": 1.2658, "step": 2320 }, { "epoch": 0.08950914340712224, "grad_norm": 1.0020657777786255, "learning_rate": 0.0001990180898877748, "loss": 1.3319, "step": 2325 }, { "epoch": 0.08970163618864292, "grad_norm": 1.1655325889587402, "learning_rate": 0.00019901385810728686, "loss": 1.3783, "step": 2330 }, { "epoch": 0.08989412897016362, "grad_norm": 1.2237039804458618, "learning_rate": 0.00019900961727263748, "loss": 1.2919, "step": 2335 }, { "epoch": 0.09008662175168432, "grad_norm": 1.6417179107666016, "learning_rate": 0.0001990053673842145, "loss": 1.471, "step": 2340 }, { "epoch": 0.090279114533205, "grad_norm": 1.2170498371124268, "learning_rate": 0.00019900110844240653, "loss": 1.3889, "step": 2345 }, { "epoch": 0.0904716073147257, "grad_norm": 1.1462334394454956, "learning_rate": 0.00019899684044760304, "loss": 1.4191, "step": 2350 }, { "epoch": 0.0906641000962464, "grad_norm": 0.961063802242279, "learning_rate": 0.00019899256340019425, "loss": 1.5019, "step": 2355 }, { "epoch": 0.09085659287776708, "grad_norm": 0.9323278069496155, "learning_rate": 0.0001989882773005713, "loss": 1.3988, "step": 2360 }, { "epoch": 0.09104908565928778, "grad_norm": 1.8326833248138428, "learning_rate": 0.00019898398214912612, "loss": 1.4211, "step": 2365 }, { "epoch": 0.09124157844080846, "grad_norm": 1.2725722789764404, "learning_rate": 0.00019897967794625153, "loss": 1.3274, "step": 2370 }, { "epoch": 0.09143407122232916, "grad_norm": 0.9105005860328674, "learning_rate": 0.00019897536469234102, "loss": 1.3309, "step": 2375 }, { "epoch": 0.09162656400384986, "grad_norm": 1.3157737255096436, "learning_rate": 0.00019897104238778907, "loss": 1.4086, "step": 2380 }, { "epoch": 0.09181905678537054, "grad_norm": 1.9295995235443115, "learning_rate": 0.00019896671103299094, "loss": 1.3849, "step": 2385 }, { "epoch": 0.09201154956689124, "grad_norm": 1.0183601379394531, "learning_rate": 0.00019896237062834267, "loss": 1.4397, "step": 2390 }, { "epoch": 0.09220404234841194, "grad_norm": 1.118998646736145, "learning_rate": 0.00019895802117424118, "loss": 1.568, "step": 2395 }, { "epoch": 0.09239653512993262, "grad_norm": 1.6463871002197266, "learning_rate": 0.00019895366267108416, "loss": 1.2755, "step": 2400 }, { "epoch": 0.09258902791145332, "grad_norm": 1.3326902389526367, "learning_rate": 0.00019894929511927022, "loss": 1.4369, "step": 2405 }, { "epoch": 0.09278152069297402, "grad_norm": 1.4168566465377808, "learning_rate": 0.00019894491851919871, "loss": 1.4323, "step": 2410 }, { "epoch": 0.0929740134744947, "grad_norm": 1.3266388177871704, "learning_rate": 0.00019894053287126986, "loss": 1.17, "step": 2415 }, { "epoch": 0.0931665062560154, "grad_norm": 1.7362377643585205, "learning_rate": 0.0001989361381758847, "loss": 1.5996, "step": 2420 }, { "epoch": 0.0933589990375361, "grad_norm": 1.1684424877166748, "learning_rate": 0.00019893173443344511, "loss": 1.3486, "step": 2425 }, { "epoch": 0.09355149181905678, "grad_norm": 1.3784310817718506, "learning_rate": 0.00019892732164435376, "loss": 1.2775, "step": 2430 }, { "epoch": 0.09374398460057748, "grad_norm": 1.1288561820983887, "learning_rate": 0.00019892289980901414, "loss": 1.2044, "step": 2435 }, { "epoch": 0.09393647738209818, "grad_norm": 1.1601535081863403, "learning_rate": 0.00019891846892783067, "loss": 1.4937, "step": 2440 }, { "epoch": 0.09412897016361886, "grad_norm": 1.3866316080093384, "learning_rate": 0.0001989140290012085, "loss": 1.913, "step": 2445 }, { "epoch": 0.09432146294513956, "grad_norm": 1.4638808965682983, "learning_rate": 0.00019890958002955362, "loss": 1.4114, "step": 2450 }, { "epoch": 0.09451395572666026, "grad_norm": 1.4660701751708984, "learning_rate": 0.00019890512201327284, "loss": 1.3607, "step": 2455 }, { "epoch": 0.09470644850818094, "grad_norm": 0.9787619113922119, "learning_rate": 0.00019890065495277388, "loss": 1.3729, "step": 2460 }, { "epoch": 0.09489894128970164, "grad_norm": 1.4845494031906128, "learning_rate": 0.00019889617884846517, "loss": 1.3326, "step": 2465 }, { "epoch": 0.09509143407122234, "grad_norm": 1.2955145835876465, "learning_rate": 0.000198891693700756, "loss": 1.3738, "step": 2470 }, { "epoch": 0.09528392685274302, "grad_norm": 1.7431209087371826, "learning_rate": 0.00019888719951005656, "loss": 1.3676, "step": 2475 }, { "epoch": 0.09547641963426372, "grad_norm": 0.923613965511322, "learning_rate": 0.00019888269627677777, "loss": 1.4142, "step": 2480 }, { "epoch": 0.0956689124157844, "grad_norm": 1.0258625745773315, "learning_rate": 0.0001988781840013315, "loss": 1.3868, "step": 2485 }, { "epoch": 0.0958614051973051, "grad_norm": 1.1365761756896973, "learning_rate": 0.00019887366268413025, "loss": 1.2871, "step": 2490 }, { "epoch": 0.0960538979788258, "grad_norm": 2.3250112533569336, "learning_rate": 0.00019886913232558754, "loss": 1.4345, "step": 2495 }, { "epoch": 0.09624639076034648, "grad_norm": 1.1625771522521973, "learning_rate": 0.00019886459292611767, "loss": 1.5796, "step": 2500 }, { "epoch": 0.09643888354186718, "grad_norm": 1.7454233169555664, "learning_rate": 0.00019886004448613562, "loss": 1.6151, "step": 2505 }, { "epoch": 0.09663137632338788, "grad_norm": 1.3514907360076904, "learning_rate": 0.00019885548700605745, "loss": 1.4529, "step": 2510 }, { "epoch": 0.09682386910490856, "grad_norm": 1.9735958576202393, "learning_rate": 0.00019885092048629982, "loss": 1.4945, "step": 2515 }, { "epoch": 0.09701636188642926, "grad_norm": 1.190207600593567, "learning_rate": 0.00019884634492728037, "loss": 1.473, "step": 2520 }, { "epoch": 0.09720885466794996, "grad_norm": 1.1596134901046753, "learning_rate": 0.00019884176032941743, "loss": 1.3745, "step": 2525 }, { "epoch": 0.09740134744947064, "grad_norm": 1.0496324300765991, "learning_rate": 0.0001988371666931303, "loss": 1.3853, "step": 2530 }, { "epoch": 0.09759384023099134, "grad_norm": 1.2820552587509155, "learning_rate": 0.000198832564018839, "loss": 1.4205, "step": 2535 }, { "epoch": 0.09778633301251204, "grad_norm": 0.9559310674667358, "learning_rate": 0.00019882795230696446, "loss": 1.2517, "step": 2540 }, { "epoch": 0.09797882579403272, "grad_norm": 1.026782751083374, "learning_rate": 0.00019882333155792835, "loss": 1.335, "step": 2545 }, { "epoch": 0.09817131857555342, "grad_norm": 1.3378793001174927, "learning_rate": 0.00019881870177215319, "loss": 1.3419, "step": 2550 }, { "epoch": 0.09836381135707412, "grad_norm": 1.0646761655807495, "learning_rate": 0.00019881406295006238, "loss": 1.3793, "step": 2555 }, { "epoch": 0.0985563041385948, "grad_norm": 1.3302899599075317, "learning_rate": 0.00019880941509208005, "loss": 1.3056, "step": 2560 }, { "epoch": 0.0987487969201155, "grad_norm": 1.3029305934906006, "learning_rate": 0.00019880475819863134, "loss": 1.3028, "step": 2565 }, { "epoch": 0.0989412897016362, "grad_norm": 1.6653764247894287, "learning_rate": 0.00019880009227014197, "loss": 1.4698, "step": 2570 }, { "epoch": 0.09913378248315688, "grad_norm": 1.5575610399246216, "learning_rate": 0.00019879541730703865, "loss": 1.2843, "step": 2575 }, { "epoch": 0.09932627526467758, "grad_norm": 1.1219451427459717, "learning_rate": 0.0001987907333097489, "loss": 1.2824, "step": 2580 }, { "epoch": 0.09951876804619826, "grad_norm": 1.680050253868103, "learning_rate": 0.000198786040278701, "loss": 1.431, "step": 2585 }, { "epoch": 0.09971126082771896, "grad_norm": 2.5341451168060303, "learning_rate": 0.00019878133821432412, "loss": 1.3925, "step": 2590 }, { "epoch": 0.09990375360923966, "grad_norm": 1.132542610168457, "learning_rate": 0.00019877662711704824, "loss": 1.4082, "step": 2595 }, { "epoch": 0.10009624639076034, "grad_norm": 1.0605584383010864, "learning_rate": 0.0001987719069873041, "loss": 1.2904, "step": 2600 }, { "epoch": 0.10028873917228104, "grad_norm": 1.161116361618042, "learning_rate": 0.0001987671778255234, "loss": 1.2922, "step": 2605 }, { "epoch": 0.10048123195380174, "grad_norm": 2.2763168811798096, "learning_rate": 0.0001987624396321386, "loss": 1.4692, "step": 2610 }, { "epoch": 0.10067372473532242, "grad_norm": 1.547316312789917, "learning_rate": 0.00019875769240758286, "loss": 1.458, "step": 2615 }, { "epoch": 0.10086621751684312, "grad_norm": 1.0679529905319214, "learning_rate": 0.0001987529361522904, "loss": 1.3075, "step": 2620 }, { "epoch": 0.10105871029836382, "grad_norm": 1.9426227807998657, "learning_rate": 0.0001987481708666961, "loss": 1.4985, "step": 2625 }, { "epoch": 0.1012512030798845, "grad_norm": 1.1619765758514404, "learning_rate": 0.00019874339655123575, "loss": 1.329, "step": 2630 }, { "epoch": 0.1014436958614052, "grad_norm": 0.8115332722663879, "learning_rate": 0.00019873861320634587, "loss": 1.218, "step": 2635 }, { "epoch": 0.1016361886429259, "grad_norm": 1.2575538158416748, "learning_rate": 0.0001987338208324639, "loss": 1.3133, "step": 2640 }, { "epoch": 0.10182868142444658, "grad_norm": 0.9605635404586792, "learning_rate": 0.00019872901943002806, "loss": 1.4462, "step": 2645 }, { "epoch": 0.10202117420596728, "grad_norm": 1.7909116744995117, "learning_rate": 0.00019872420899947742, "loss": 1.257, "step": 2650 }, { "epoch": 0.10221366698748797, "grad_norm": 1.5501129627227783, "learning_rate": 0.00019871938954125185, "loss": 1.2825, "step": 2655 }, { "epoch": 0.10240615976900866, "grad_norm": 1.4636069536209106, "learning_rate": 0.00019871456105579208, "loss": 1.3909, "step": 2660 }, { "epoch": 0.10259865255052936, "grad_norm": 1.4283297061920166, "learning_rate": 0.0001987097235435396, "loss": 1.2148, "step": 2665 }, { "epoch": 0.10279114533205005, "grad_norm": 1.316149115562439, "learning_rate": 0.00019870487700493684, "loss": 1.393, "step": 2670 }, { "epoch": 0.10298363811357074, "grad_norm": 0.8449459671974182, "learning_rate": 0.00019870002144042689, "loss": 1.4969, "step": 2675 }, { "epoch": 0.10317613089509144, "grad_norm": 1.3309835195541382, "learning_rate": 0.00019869515685045383, "loss": 1.4927, "step": 2680 }, { "epoch": 0.10336862367661212, "grad_norm": 0.9159907102584839, "learning_rate": 0.00019869028323546246, "loss": 1.3526, "step": 2685 }, { "epoch": 0.10356111645813282, "grad_norm": 2.2842464447021484, "learning_rate": 0.00019868540059589845, "loss": 1.3646, "step": 2690 }, { "epoch": 0.10375360923965352, "grad_norm": 0.9444146156311035, "learning_rate": 0.00019868050893220832, "loss": 1.349, "step": 2695 }, { "epoch": 0.1039461020211742, "grad_norm": 1.8546898365020752, "learning_rate": 0.0001986756082448393, "loss": 1.3195, "step": 2700 }, { "epoch": 0.1041385948026949, "grad_norm": 1.310783863067627, "learning_rate": 0.00019867069853423961, "loss": 1.6065, "step": 2705 }, { "epoch": 0.1043310875842156, "grad_norm": 1.248542308807373, "learning_rate": 0.00019866577980085813, "loss": 1.1987, "step": 2710 }, { "epoch": 0.10452358036573628, "grad_norm": 1.421844482421875, "learning_rate": 0.00019866085204514472, "loss": 1.3576, "step": 2715 }, { "epoch": 0.10471607314725698, "grad_norm": 1.1641993522644043, "learning_rate": 0.00019865591526754996, "loss": 1.436, "step": 2720 }, { "epoch": 0.10490856592877768, "grad_norm": 1.1122993230819702, "learning_rate": 0.0001986509694685253, "loss": 1.4218, "step": 2725 }, { "epoch": 0.10510105871029836, "grad_norm": 1.222016453742981, "learning_rate": 0.00019864601464852295, "loss": 1.2965, "step": 2730 }, { "epoch": 0.10529355149181906, "grad_norm": 1.6765378713607788, "learning_rate": 0.00019864105080799602, "loss": 1.3908, "step": 2735 }, { "epoch": 0.10548604427333975, "grad_norm": 1.8405592441558838, "learning_rate": 0.00019863607794739845, "loss": 1.2583, "step": 2740 }, { "epoch": 0.10567853705486044, "grad_norm": 1.3908604383468628, "learning_rate": 0.00019863109606718497, "loss": 1.2726, "step": 2745 }, { "epoch": 0.10587102983638114, "grad_norm": 1.3825894594192505, "learning_rate": 0.0001986261051678111, "loss": 1.3234, "step": 2750 }, { "epoch": 0.10606352261790183, "grad_norm": 1.5409029722213745, "learning_rate": 0.00019862110524973328, "loss": 1.4151, "step": 2755 }, { "epoch": 0.10625601539942252, "grad_norm": 2.1902191638946533, "learning_rate": 0.00019861609631340868, "loss": 1.3865, "step": 2760 }, { "epoch": 0.10644850818094322, "grad_norm": 0.9851712584495544, "learning_rate": 0.00019861107835929533, "loss": 1.4799, "step": 2765 }, { "epoch": 0.10664100096246391, "grad_norm": 1.2206732034683228, "learning_rate": 0.0001986060513878521, "loss": 1.3456, "step": 2770 }, { "epoch": 0.1068334937439846, "grad_norm": 1.3443645238876343, "learning_rate": 0.0001986010153995387, "loss": 1.2586, "step": 2775 }, { "epoch": 0.1070259865255053, "grad_norm": 1.1602864265441895, "learning_rate": 0.00019859597039481561, "loss": 1.1789, "step": 2780 }, { "epoch": 0.107218479307026, "grad_norm": 0.8068190813064575, "learning_rate": 0.00019859091637414414, "loss": 1.4228, "step": 2785 }, { "epoch": 0.10741097208854668, "grad_norm": 1.4439321756362915, "learning_rate": 0.0001985858533379865, "loss": 1.4365, "step": 2790 }, { "epoch": 0.10760346487006738, "grad_norm": 1.0814299583435059, "learning_rate": 0.00019858078128680564, "loss": 1.2755, "step": 2795 }, { "epoch": 0.10779595765158806, "grad_norm": 1.7848068475723267, "learning_rate": 0.00019857570022106536, "loss": 1.4061, "step": 2800 }, { "epoch": 0.10798845043310876, "grad_norm": 1.3163549900054932, "learning_rate": 0.0001985706101412303, "loss": 1.3599, "step": 2805 }, { "epoch": 0.10818094321462945, "grad_norm": 1.439104437828064, "learning_rate": 0.0001985655110477659, "loss": 1.3054, "step": 2810 }, { "epoch": 0.10837343599615014, "grad_norm": 0.892706036567688, "learning_rate": 0.0001985604029411385, "loss": 1.3504, "step": 2815 }, { "epoch": 0.10856592877767084, "grad_norm": 1.102704405784607, "learning_rate": 0.0001985552858218151, "loss": 1.3902, "step": 2820 }, { "epoch": 0.10875842155919153, "grad_norm": 1.21804678440094, "learning_rate": 0.0001985501596902637, "loss": 1.36, "step": 2825 }, { "epoch": 0.10895091434071222, "grad_norm": 1.6015477180480957, "learning_rate": 0.00019854502454695302, "loss": 1.6163, "step": 2830 }, { "epoch": 0.10914340712223292, "grad_norm": 1.3947224617004395, "learning_rate": 0.00019853988039235265, "loss": 1.2207, "step": 2835 }, { "epoch": 0.10933589990375361, "grad_norm": 1.616458535194397, "learning_rate": 0.00019853472722693302, "loss": 1.2081, "step": 2840 }, { "epoch": 0.1095283926852743, "grad_norm": 2.1588330268859863, "learning_rate": 0.00019852956505116528, "loss": 1.4428, "step": 2845 }, { "epoch": 0.109720885466795, "grad_norm": 1.2287509441375732, "learning_rate": 0.00019852439386552152, "loss": 1.4548, "step": 2850 }, { "epoch": 0.1099133782483157, "grad_norm": 1.7198657989501953, "learning_rate": 0.00019851921367047463, "loss": 1.2034, "step": 2855 }, { "epoch": 0.11010587102983638, "grad_norm": 1.4924067258834839, "learning_rate": 0.00019851402446649825, "loss": 1.3635, "step": 2860 }, { "epoch": 0.11029836381135708, "grad_norm": 1.3675332069396973, "learning_rate": 0.00019850882625406695, "loss": 1.29, "step": 2865 }, { "epoch": 0.11049085659287777, "grad_norm": 1.2170599699020386, "learning_rate": 0.00019850361903365603, "loss": 1.3495, "step": 2870 }, { "epoch": 0.11068334937439846, "grad_norm": 1.6067026853561401, "learning_rate": 0.00019849840280574167, "loss": 1.4679, "step": 2875 }, { "epoch": 0.11087584215591915, "grad_norm": 1.0457261800765991, "learning_rate": 0.00019849317757080092, "loss": 1.3289, "step": 2880 }, { "epoch": 0.11106833493743985, "grad_norm": 0.6958736181259155, "learning_rate": 0.00019848794332931146, "loss": 0.9412, "step": 2885 }, { "epoch": 0.11126082771896054, "grad_norm": 0.9687005281448364, "learning_rate": 0.00019848270008175205, "loss": 1.2777, "step": 2890 }, { "epoch": 0.11145332050048123, "grad_norm": 0.8073298931121826, "learning_rate": 0.00019847744782860213, "loss": 1.4295, "step": 2895 }, { "epoch": 0.11164581328200192, "grad_norm": 0.8794350624084473, "learning_rate": 0.00019847218657034193, "loss": 1.2199, "step": 2900 }, { "epoch": 0.11183830606352262, "grad_norm": 1.644554853439331, "learning_rate": 0.00019846691630745258, "loss": 1.3076, "step": 2905 }, { "epoch": 0.11203079884504331, "grad_norm": 1.0819231271743774, "learning_rate": 0.00019846163704041603, "loss": 1.385, "step": 2910 }, { "epoch": 0.112223291626564, "grad_norm": 1.4424269199371338, "learning_rate": 0.000198456348769715, "loss": 1.4287, "step": 2915 }, { "epoch": 0.1124157844080847, "grad_norm": 1.289413332939148, "learning_rate": 0.00019845105149583308, "loss": 1.25, "step": 2920 }, { "epoch": 0.1126082771896054, "grad_norm": 1.4669229984283447, "learning_rate": 0.00019844574521925474, "loss": 1.5371, "step": 2925 }, { "epoch": 0.11280076997112608, "grad_norm": 2.102736473083496, "learning_rate": 0.0001984404299404651, "loss": 1.5017, "step": 2930 }, { "epoch": 0.11299326275264678, "grad_norm": 1.1487330198287964, "learning_rate": 0.00019843510565995025, "loss": 1.3164, "step": 2935 }, { "epoch": 0.11318575553416747, "grad_norm": 1.259538173675537, "learning_rate": 0.00019842977237819707, "loss": 1.2946, "step": 2940 }, { "epoch": 0.11337824831568816, "grad_norm": 2.3158466815948486, "learning_rate": 0.00019842443009569324, "loss": 1.4614, "step": 2945 }, { "epoch": 0.11357074109720885, "grad_norm": 1.5077046155929565, "learning_rate": 0.0001984190788129273, "loss": 1.3478, "step": 2950 }, { "epoch": 0.11376323387872955, "grad_norm": 1.2548809051513672, "learning_rate": 0.00019841371853038852, "loss": 1.3351, "step": 2955 }, { "epoch": 0.11395572666025024, "grad_norm": 1.4622430801391602, "learning_rate": 0.00019840834924856715, "loss": 1.2788, "step": 2960 }, { "epoch": 0.11414821944177093, "grad_norm": 0.9759154319763184, "learning_rate": 0.00019840297096795415, "loss": 1.2793, "step": 2965 }, { "epoch": 0.11434071222329163, "grad_norm": 1.2217987775802612, "learning_rate": 0.00019839758368904128, "loss": 1.284, "step": 2970 }, { "epoch": 0.11453320500481232, "grad_norm": 2.180697441101074, "learning_rate": 0.00019839326738746614, "loss": 1.4163, "step": 2975 }, { "epoch": 0.11472569778633301, "grad_norm": 1.156293511390686, "learning_rate": 0.00019838786391285554, "loss": 1.3045, "step": 2980 }, { "epoch": 0.11491819056785371, "grad_norm": 1.1444417238235474, "learning_rate": 0.00019838245144132658, "loss": 1.4522, "step": 2985 }, { "epoch": 0.1151106833493744, "grad_norm": 1.3959949016571045, "learning_rate": 0.00019837702997337414, "loss": 1.3959, "step": 2990 }, { "epoch": 0.1153031761308951, "grad_norm": 1.2789435386657715, "learning_rate": 0.00019837159950949402, "loss": 1.2951, "step": 2995 }, { "epoch": 0.11549566891241578, "grad_norm": 1.0902299880981445, "learning_rate": 0.00019836616005018275, "loss": 1.4573, "step": 3000 }, { "epoch": 0.11568816169393648, "grad_norm": 1.452920913696289, "learning_rate": 0.0001983607115959378, "loss": 1.4688, "step": 3005 }, { "epoch": 0.11588065447545717, "grad_norm": 2.192514419555664, "learning_rate": 0.0001983552541472573, "loss": 1.4282, "step": 3010 }, { "epoch": 0.11607314725697786, "grad_norm": 1.938883900642395, "learning_rate": 0.0001983497877046404, "loss": 1.6123, "step": 3015 }, { "epoch": 0.11626564003849855, "grad_norm": 2.4365732669830322, "learning_rate": 0.0001983443122685869, "loss": 1.4987, "step": 3020 }, { "epoch": 0.11645813282001925, "grad_norm": 1.827972173690796, "learning_rate": 0.0001983388278395975, "loss": 1.2196, "step": 3025 }, { "epoch": 0.11665062560153994, "grad_norm": 1.6184618473052979, "learning_rate": 0.00019833333441817374, "loss": 1.5257, "step": 3030 }, { "epoch": 0.11684311838306063, "grad_norm": 1.0191036462783813, "learning_rate": 0.00019832783200481797, "loss": 1.4799, "step": 3035 }, { "epoch": 0.11703561116458133, "grad_norm": 1.1552925109863281, "learning_rate": 0.0001983223206000333, "loss": 1.2014, "step": 3040 }, { "epoch": 0.11722810394610202, "grad_norm": 0.9793531894683838, "learning_rate": 0.00019831680020432376, "loss": 1.2092, "step": 3045 }, { "epoch": 0.11742059672762271, "grad_norm": 1.480634331703186, "learning_rate": 0.0001983112708181941, "loss": 1.3238, "step": 3050 }, { "epoch": 0.11761308950914341, "grad_norm": 1.5112073421478271, "learning_rate": 0.00019830573244215, "loss": 1.5513, "step": 3055 }, { "epoch": 0.1178055822906641, "grad_norm": 1.4130852222442627, "learning_rate": 0.00019830018507669786, "loss": 1.4368, "step": 3060 }, { "epoch": 0.1179980750721848, "grad_norm": 1.401934027671814, "learning_rate": 0.000198294628722345, "loss": 1.243, "step": 3065 }, { "epoch": 0.11819056785370549, "grad_norm": 1.8309379816055298, "learning_rate": 0.00019828906337959946, "loss": 1.1656, "step": 3070 }, { "epoch": 0.11838306063522618, "grad_norm": 0.8511875867843628, "learning_rate": 0.0001982834890489702, "loss": 1.406, "step": 3075 }, { "epoch": 0.11857555341674687, "grad_norm": 1.4291598796844482, "learning_rate": 0.00019827790573096694, "loss": 1.3963, "step": 3080 }, { "epoch": 0.11876804619826757, "grad_norm": 0.6835631132125854, "learning_rate": 0.0001982723134261002, "loss": 1.1238, "step": 3085 }, { "epoch": 0.11896053897978826, "grad_norm": 1.6569236516952515, "learning_rate": 0.00019826671213488145, "loss": 1.3335, "step": 3090 }, { "epoch": 0.11915303176130895, "grad_norm": 1.0488132238388062, "learning_rate": 0.00019826110185782277, "loss": 1.3009, "step": 3095 }, { "epoch": 0.11934552454282965, "grad_norm": 1.3253639936447144, "learning_rate": 0.00019825548259543726, "loss": 1.3863, "step": 3100 }, { "epoch": 0.11953801732435033, "grad_norm": 0.9408076405525208, "learning_rate": 0.00019824985434823878, "loss": 1.3184, "step": 3105 }, { "epoch": 0.11973051010587103, "grad_norm": 0.9649772644042969, "learning_rate": 0.00019824421711674194, "loss": 1.2427, "step": 3110 }, { "epoch": 0.11992300288739172, "grad_norm": 1.7673052549362183, "learning_rate": 0.00019823857090146225, "loss": 1.2804, "step": 3115 }, { "epoch": 0.12011549566891241, "grad_norm": 1.230724811553955, "learning_rate": 0.00019823291570291604, "loss": 1.3527, "step": 3120 }, { "epoch": 0.12030798845043311, "grad_norm": 2.382617473602295, "learning_rate": 0.0001982272515216204, "loss": 1.4123, "step": 3125 }, { "epoch": 0.1205004812319538, "grad_norm": 1.2811720371246338, "learning_rate": 0.00019822157835809332, "loss": 1.3935, "step": 3130 }, { "epoch": 0.1206929740134745, "grad_norm": 1.9592630863189697, "learning_rate": 0.00019821589621285356, "loss": 1.2387, "step": 3135 }, { "epoch": 0.12088546679499519, "grad_norm": 1.659197449684143, "learning_rate": 0.0001982102050864207, "loss": 1.4228, "step": 3140 }, { "epoch": 0.12107795957651588, "grad_norm": 1.2591451406478882, "learning_rate": 0.00019820450497931517, "loss": 1.3192, "step": 3145 }, { "epoch": 0.12127045235803657, "grad_norm": 1.1670453548431396, "learning_rate": 0.00019819879589205822, "loss": 1.2593, "step": 3150 }, { "epoch": 0.12146294513955727, "grad_norm": 1.680776834487915, "learning_rate": 0.0001981930778251719, "loss": 1.5809, "step": 3155 }, { "epoch": 0.12165543792107796, "grad_norm": 1.388492226600647, "learning_rate": 0.00019818735077917904, "loss": 1.5646, "step": 3160 }, { "epoch": 0.12184793070259865, "grad_norm": 1.3851470947265625, "learning_rate": 0.00019818161475460342, "loss": 1.3282, "step": 3165 }, { "epoch": 0.12204042348411935, "grad_norm": 1.252103567123413, "learning_rate": 0.0001981758697519695, "loss": 1.3326, "step": 3170 }, { "epoch": 0.12223291626564003, "grad_norm": 2.6637227535247803, "learning_rate": 0.0001981701157718027, "loss": 1.4247, "step": 3175 }, { "epoch": 0.12242540904716073, "grad_norm": 1.4228829145431519, "learning_rate": 0.00019816435281462907, "loss": 1.3287, "step": 3180 }, { "epoch": 0.12261790182868143, "grad_norm": 1.0654631853103638, "learning_rate": 0.00019815858088097565, "loss": 1.3651, "step": 3185 }, { "epoch": 0.12281039461020211, "grad_norm": 1.1779879331588745, "learning_rate": 0.00019815279997137028, "loss": 1.2699, "step": 3190 }, { "epoch": 0.12300288739172281, "grad_norm": 0.966482937335968, "learning_rate": 0.0001981470100863416, "loss": 1.3029, "step": 3195 }, { "epoch": 0.12319538017324351, "grad_norm": 1.13119375705719, "learning_rate": 0.00019814121122641894, "loss": 1.3431, "step": 3200 }, { "epoch": 0.1233878729547642, "grad_norm": 1.0690468549728394, "learning_rate": 0.00019813540339213263, "loss": 1.237, "step": 3205 }, { "epoch": 0.12358036573628489, "grad_norm": 1.169592022895813, "learning_rate": 0.00019812958658401382, "loss": 1.3341, "step": 3210 }, { "epoch": 0.12377285851780558, "grad_norm": 0.9310591816902161, "learning_rate": 0.00019812376080259435, "loss": 1.3168, "step": 3215 }, { "epoch": 0.12396535129932627, "grad_norm": 1.1262513399124146, "learning_rate": 0.00019811792604840694, "loss": 1.322, "step": 3220 }, { "epoch": 0.12415784408084697, "grad_norm": 1.0723376274108887, "learning_rate": 0.00019811208232198518, "loss": 1.2814, "step": 3225 }, { "epoch": 0.12435033686236766, "grad_norm": 1.5084266662597656, "learning_rate": 0.00019810622962386344, "loss": 1.3136, "step": 3230 }, { "epoch": 0.12454282964388835, "grad_norm": 1.5219266414642334, "learning_rate": 0.0001981003679545769, "loss": 1.2971, "step": 3235 }, { "epoch": 0.12473532242540905, "grad_norm": 1.8135708570480347, "learning_rate": 0.00019809449731466154, "loss": 1.3987, "step": 3240 }, { "epoch": 0.12492781520692973, "grad_norm": 1.9838290214538574, "learning_rate": 0.00019808861770465424, "loss": 1.4063, "step": 3245 }, { "epoch": 0.12512030798845045, "grad_norm": 0.9821895956993103, "learning_rate": 0.00019808272912509258, "loss": 1.4336, "step": 3250 }, { "epoch": 0.12531280076997112, "grad_norm": 1.0371532440185547, "learning_rate": 0.00019807683157651513, "loss": 1.4659, "step": 3255 }, { "epoch": 0.12550529355149181, "grad_norm": 1.2441003322601318, "learning_rate": 0.0001980709250594611, "loss": 1.3807, "step": 3260 }, { "epoch": 0.1256977863330125, "grad_norm": 1.6097456216812134, "learning_rate": 0.00019806500957447067, "loss": 1.4115, "step": 3265 }, { "epoch": 0.1258902791145332, "grad_norm": 1.4005634784698486, "learning_rate": 0.0001980590851220847, "loss": 1.6008, "step": 3270 }, { "epoch": 0.1260827718960539, "grad_norm": 1.1883544921875, "learning_rate": 0.00019805315170284498, "loss": 1.3768, "step": 3275 }, { "epoch": 0.12627526467757458, "grad_norm": 1.2404242753982544, "learning_rate": 0.00019804720931729413, "loss": 1.463, "step": 3280 }, { "epoch": 0.12646775745909528, "grad_norm": 0.625027596950531, "learning_rate": 0.00019804125796597544, "loss": 1.3286, "step": 3285 }, { "epoch": 0.12666025024061597, "grad_norm": 1.5616633892059326, "learning_rate": 0.0001980352976494332, "loss": 1.4161, "step": 3290 }, { "epoch": 0.12685274302213667, "grad_norm": 0.8003360629081726, "learning_rate": 0.0001980293283682124, "loss": 1.4117, "step": 3295 }, { "epoch": 0.12704523580365737, "grad_norm": 1.0671011209487915, "learning_rate": 0.0001980233501228589, "loss": 1.4192, "step": 3300 }, { "epoch": 0.12723772858517807, "grad_norm": 1.4135669469833374, "learning_rate": 0.0001980173629139194, "loss": 1.3046, "step": 3305 }, { "epoch": 0.12743022136669874, "grad_norm": 1.0450470447540283, "learning_rate": 0.00019801136674194134, "loss": 1.4156, "step": 3310 }, { "epoch": 0.12762271414821943, "grad_norm": 1.1435261964797974, "learning_rate": 0.00019800536160747306, "loss": 1.2311, "step": 3315 }, { "epoch": 0.12781520692974013, "grad_norm": 1.5508229732513428, "learning_rate": 0.0001979993475110637, "loss": 1.4224, "step": 3320 }, { "epoch": 0.12800769971126083, "grad_norm": 0.9542085528373718, "learning_rate": 0.0001979933244532632, "loss": 1.2423, "step": 3325 }, { "epoch": 0.12820019249278153, "grad_norm": 1.5797593593597412, "learning_rate": 0.0001979872924346223, "loss": 1.3357, "step": 3330 }, { "epoch": 0.12839268527430223, "grad_norm": 1.0982688665390015, "learning_rate": 0.00019798125145569263, "loss": 1.2404, "step": 3335 }, { "epoch": 0.1285851780558229, "grad_norm": 1.5471248626708984, "learning_rate": 0.0001979752015170266, "loss": 1.3556, "step": 3340 }, { "epoch": 0.1287776708373436, "grad_norm": 1.64442777633667, "learning_rate": 0.0001979691426191774, "loss": 1.3407, "step": 3345 }, { "epoch": 0.1289701636188643, "grad_norm": 1.494186520576477, "learning_rate": 0.0001979630747626991, "loss": 1.4509, "step": 3350 }, { "epoch": 0.129162656400385, "grad_norm": 0.9598186612129211, "learning_rate": 0.00019795699794814654, "loss": 1.3221, "step": 3355 }, { "epoch": 0.1293551491819057, "grad_norm": 1.1328315734863281, "learning_rate": 0.00019795091217607544, "loss": 1.5129, "step": 3360 }, { "epoch": 0.12954764196342639, "grad_norm": 1.0476043224334717, "learning_rate": 0.00019794481744704227, "loss": 1.3448, "step": 3365 }, { "epoch": 0.12974013474494706, "grad_norm": 1.2570463418960571, "learning_rate": 0.0001979387137616044, "loss": 1.2726, "step": 3370 }, { "epoch": 0.12993262752646775, "grad_norm": 1.395627498626709, "learning_rate": 0.00019793260112031992, "loss": 1.1469, "step": 3375 }, { "epoch": 0.13012512030798845, "grad_norm": 2.2382960319519043, "learning_rate": 0.00019792647952374782, "loss": 1.3375, "step": 3380 }, { "epoch": 0.13031761308950915, "grad_norm": 1.4930087327957153, "learning_rate": 0.00019792034897244784, "loss": 1.3684, "step": 3385 }, { "epoch": 0.13051010587102985, "grad_norm": 0.9732452034950256, "learning_rate": 0.00019791420946698064, "loss": 1.0792, "step": 3390 }, { "epoch": 0.13070259865255052, "grad_norm": 1.9484987258911133, "learning_rate": 0.0001979080610079076, "loss": 1.4284, "step": 3395 }, { "epoch": 0.13089509143407121, "grad_norm": 1.3746837377548218, "learning_rate": 0.00019790190359579097, "loss": 1.4393, "step": 3400 }, { "epoch": 0.1310875842155919, "grad_norm": 1.2191319465637207, "learning_rate": 0.0001978957372311938, "loss": 1.2184, "step": 3405 }, { "epoch": 0.1312800769971126, "grad_norm": 1.0825196504592896, "learning_rate": 0.00019788956191467994, "loss": 1.3891, "step": 3410 }, { "epoch": 0.1314725697786333, "grad_norm": 1.9972898960113525, "learning_rate": 0.00019788337764681412, "loss": 1.3207, "step": 3415 }, { "epoch": 0.131665062560154, "grad_norm": 1.3864003419876099, "learning_rate": 0.00019787718442816182, "loss": 1.3791, "step": 3420 }, { "epoch": 0.13185755534167468, "grad_norm": 1.3315006494522095, "learning_rate": 0.0001978709822592894, "loss": 1.4253, "step": 3425 }, { "epoch": 0.13205004812319537, "grad_norm": 1.0171843767166138, "learning_rate": 0.00019786477114076397, "loss": 1.2974, "step": 3430 }, { "epoch": 0.13224254090471607, "grad_norm": 1.293380618095398, "learning_rate": 0.00019785855107315353, "loss": 1.3616, "step": 3435 }, { "epoch": 0.13243503368623677, "grad_norm": 2.0498528480529785, "learning_rate": 0.00019785232205702681, "loss": 1.3431, "step": 3440 }, { "epoch": 0.13262752646775747, "grad_norm": 0.8635803461074829, "learning_rate": 0.0001978460840929535, "loss": 1.3672, "step": 3445 }, { "epoch": 0.13282001924927817, "grad_norm": 0.9983857274055481, "learning_rate": 0.00019783983718150392, "loss": 1.4856, "step": 3450 }, { "epoch": 0.13301251203079884, "grad_norm": 4.542407989501953, "learning_rate": 0.00019783358132324937, "loss": 1.4599, "step": 3455 }, { "epoch": 0.13320500481231953, "grad_norm": 1.5495860576629639, "learning_rate": 0.00019782731651876194, "loss": 1.3641, "step": 3460 }, { "epoch": 0.13339749759384023, "grad_norm": 1.2070780992507935, "learning_rate": 0.00019782104276861443, "loss": 1.3596, "step": 3465 }, { "epoch": 0.13358999037536093, "grad_norm": 1.1749752759933472, "learning_rate": 0.00019781476007338058, "loss": 1.2387, "step": 3470 }, { "epoch": 0.13378248315688163, "grad_norm": 1.8580079078674316, "learning_rate": 0.00019780846843363485, "loss": 1.3966, "step": 3475 }, { "epoch": 0.1339749759384023, "grad_norm": 1.9713795185089111, "learning_rate": 0.00019780216784995265, "loss": 1.2541, "step": 3480 }, { "epoch": 0.134167468719923, "grad_norm": 1.4017597436904907, "learning_rate": 0.00019779585832291002, "loss": 1.4827, "step": 3485 }, { "epoch": 0.1343599615014437, "grad_norm": 1.188761591911316, "learning_rate": 0.00019778953985308406, "loss": 1.3972, "step": 3490 }, { "epoch": 0.1345524542829644, "grad_norm": 1.0930372476577759, "learning_rate": 0.00019778321244105242, "loss": 1.4706, "step": 3495 }, { "epoch": 0.1347449470644851, "grad_norm": 1.3041532039642334, "learning_rate": 0.0001977768760873938, "loss": 1.1929, "step": 3500 }, { "epoch": 0.13493743984600579, "grad_norm": 2.6741833686828613, "learning_rate": 0.00019777053079268753, "loss": 1.268, "step": 3505 }, { "epoch": 0.13512993262752646, "grad_norm": 1.091823935508728, "learning_rate": 0.0001977641765575139, "loss": 1.2776, "step": 3510 }, { "epoch": 0.13532242540904715, "grad_norm": 0.9205764532089233, "learning_rate": 0.00019775781338245398, "loss": 1.3007, "step": 3515 }, { "epoch": 0.13551491819056785, "grad_norm": 1.6321576833724976, "learning_rate": 0.00019775144126808958, "loss": 1.4214, "step": 3520 }, { "epoch": 0.13570741097208855, "grad_norm": 1.7947146892547607, "learning_rate": 0.00019774506021500343, "loss": 1.3895, "step": 3525 }, { "epoch": 0.13589990375360925, "grad_norm": 1.6696717739105225, "learning_rate": 0.00019773867022377902, "loss": 1.3968, "step": 3530 }, { "epoch": 0.13609239653512994, "grad_norm": 1.1003444194793701, "learning_rate": 0.0001977322712950007, "loss": 1.4084, "step": 3535 }, { "epoch": 0.13628488931665061, "grad_norm": 1.0268352031707764, "learning_rate": 0.00019772586342925357, "loss": 1.254, "step": 3540 }, { "epoch": 0.1364773820981713, "grad_norm": 1.3906810283660889, "learning_rate": 0.0001977194466271236, "loss": 1.3266, "step": 3545 }, { "epoch": 0.136669874879692, "grad_norm": 1.1786664724349976, "learning_rate": 0.00019771302088919757, "loss": 1.3114, "step": 3550 }, { "epoch": 0.1368623676612127, "grad_norm": 1.0252714157104492, "learning_rate": 0.00019770658621606307, "loss": 1.2089, "step": 3555 }, { "epoch": 0.1370548604427334, "grad_norm": 0.8099033236503601, "learning_rate": 0.00019770014260830853, "loss": 1.2607, "step": 3560 }, { "epoch": 0.1372473532242541, "grad_norm": 1.3679542541503906, "learning_rate": 0.0001976936900665231, "loss": 1.376, "step": 3565 }, { "epoch": 0.13743984600577477, "grad_norm": 1.7685283422470093, "learning_rate": 0.00019768722859129693, "loss": 1.4522, "step": 3570 }, { "epoch": 0.13763233878729547, "grad_norm": 1.0158277750015259, "learning_rate": 0.00019768075818322081, "loss": 1.2714, "step": 3575 }, { "epoch": 0.13782483156881617, "grad_norm": 1.7043020725250244, "learning_rate": 0.00019767427884288642, "loss": 1.5669, "step": 3580 }, { "epoch": 0.13801732435033687, "grad_norm": 1.8171344995498657, "learning_rate": 0.00019766779057088627, "loss": 1.4186, "step": 3585 }, { "epoch": 0.13820981713185757, "grad_norm": 1.0524088144302368, "learning_rate": 0.00019766129336781365, "loss": 1.167, "step": 3590 }, { "epoch": 0.13840230991337824, "grad_norm": 1.558383584022522, "learning_rate": 0.0001976547872342627, "loss": 1.5015, "step": 3595 }, { "epoch": 0.13859480269489893, "grad_norm": 1.9925919771194458, "learning_rate": 0.00019764827217082838, "loss": 1.3661, "step": 3600 }, { "epoch": 0.13878729547641963, "grad_norm": 1.5693559646606445, "learning_rate": 0.0001976417481781064, "loss": 1.3389, "step": 3605 }, { "epoch": 0.13897978825794033, "grad_norm": 1.2609871625900269, "learning_rate": 0.00019763521525669343, "loss": 1.2883, "step": 3610 }, { "epoch": 0.13917228103946103, "grad_norm": 1.4910306930541992, "learning_rate": 0.00019762867340718674, "loss": 1.4237, "step": 3615 }, { "epoch": 0.13936477382098172, "grad_norm": 0.9409481287002563, "learning_rate": 0.0001976221226301846, "loss": 1.4289, "step": 3620 }, { "epoch": 0.1395572666025024, "grad_norm": 0.9263445138931274, "learning_rate": 0.00019761556292628604, "loss": 1.2987, "step": 3625 }, { "epoch": 0.1397497593840231, "grad_norm": 0.9329832792282104, "learning_rate": 0.0001976089942960909, "loss": 1.3709, "step": 3630 }, { "epoch": 0.1399422521655438, "grad_norm": 1.7852829694747925, "learning_rate": 0.00019760241674019984, "loss": 1.2282, "step": 3635 }, { "epoch": 0.1401347449470645, "grad_norm": 1.0068609714508057, "learning_rate": 0.0001975958302592143, "loss": 1.3143, "step": 3640 }, { "epoch": 0.14032723772858519, "grad_norm": 2.1680188179016113, "learning_rate": 0.0001975892348537366, "loss": 1.4447, "step": 3645 }, { "epoch": 0.14051973051010588, "grad_norm": 1.633169412612915, "learning_rate": 0.00019758263052436988, "loss": 1.2633, "step": 3650 }, { "epoch": 0.14071222329162655, "grad_norm": 1.3609623908996582, "learning_rate": 0.000197576017271718, "loss": 1.3352, "step": 3655 }, { "epoch": 0.14090471607314725, "grad_norm": 1.50294828414917, "learning_rate": 0.00019756939509638573, "loss": 1.3557, "step": 3660 }, { "epoch": 0.14109720885466795, "grad_norm": 0.9931232333183289, "learning_rate": 0.0001975627639989786, "loss": 1.4719, "step": 3665 }, { "epoch": 0.14128970163618865, "grad_norm": 1.3870011568069458, "learning_rate": 0.000197556123980103, "loss": 1.5173, "step": 3670 }, { "epoch": 0.14148219441770934, "grad_norm": 1.274064540863037, "learning_rate": 0.00019754947504036608, "loss": 1.3951, "step": 3675 }, { "epoch": 0.14167468719923004, "grad_norm": 1.6096014976501465, "learning_rate": 0.00019754281718037593, "loss": 1.4478, "step": 3680 }, { "epoch": 0.1418671799807507, "grad_norm": 1.155772089958191, "learning_rate": 0.00019753615040074131, "loss": 1.229, "step": 3685 }, { "epoch": 0.1420596727622714, "grad_norm": 1.123856544494629, "learning_rate": 0.0001975294747020718, "loss": 1.5036, "step": 3690 }, { "epoch": 0.1422521655437921, "grad_norm": 1.541308879852295, "learning_rate": 0.00019752279008497796, "loss": 1.1174, "step": 3695 }, { "epoch": 0.1424446583253128, "grad_norm": 1.8912441730499268, "learning_rate": 0.00019751609655007098, "loss": 1.3753, "step": 3700 }, { "epoch": 0.1426371511068335, "grad_norm": 1.7746648788452148, "learning_rate": 0.00019750939409796293, "loss": 1.3115, "step": 3705 }, { "epoch": 0.14282964388835417, "grad_norm": 1.2228045463562012, "learning_rate": 0.00019750268272926676, "loss": 1.3477, "step": 3710 }, { "epoch": 0.14302213666987487, "grad_norm": 1.5031695365905762, "learning_rate": 0.00019749596244459614, "loss": 1.1905, "step": 3715 }, { "epoch": 0.14321462945139557, "grad_norm": 2.871879816055298, "learning_rate": 0.0001974892332445656, "loss": 1.3334, "step": 3720 }, { "epoch": 0.14340712223291627, "grad_norm": 1.1911511421203613, "learning_rate": 0.00019748249512979048, "loss": 1.2528, "step": 3725 }, { "epoch": 0.14359961501443697, "grad_norm": 1.2722115516662598, "learning_rate": 0.00019747574810088697, "loss": 1.3314, "step": 3730 }, { "epoch": 0.14379210779595766, "grad_norm": 1.0464539527893066, "learning_rate": 0.00019746899215847198, "loss": 1.1621, "step": 3735 }, { "epoch": 0.14398460057747833, "grad_norm": 1.8877158164978027, "learning_rate": 0.00019746222730316338, "loss": 1.2534, "step": 3740 }, { "epoch": 0.14417709335899903, "grad_norm": 1.5137780904769897, "learning_rate": 0.00019745545353557967, "loss": 1.1738, "step": 3745 }, { "epoch": 0.14436958614051973, "grad_norm": 1.7104227542877197, "learning_rate": 0.00019744867085634034, "loss": 1.2868, "step": 3750 }, { "epoch": 0.14456207892204043, "grad_norm": 1.2920212745666504, "learning_rate": 0.00019744187926606558, "loss": 1.3054, "step": 3755 }, { "epoch": 0.14475457170356112, "grad_norm": 2.3661959171295166, "learning_rate": 0.00019743507876537647, "loss": 1.3187, "step": 3760 }, { "epoch": 0.14494706448508182, "grad_norm": 1.4622807502746582, "learning_rate": 0.00019742826935489487, "loss": 1.1548, "step": 3765 }, { "epoch": 0.1451395572666025, "grad_norm": 1.7818437814712524, "learning_rate": 0.00019742145103524342, "loss": 1.4081, "step": 3770 }, { "epoch": 0.1453320500481232, "grad_norm": 1.023716926574707, "learning_rate": 0.00019741462380704566, "loss": 1.3367, "step": 3775 }, { "epoch": 0.1455245428296439, "grad_norm": 1.4382961988449097, "learning_rate": 0.00019740778767092585, "loss": 1.3498, "step": 3780 }, { "epoch": 0.14571703561116459, "grad_norm": 1.5282870531082153, "learning_rate": 0.0001974009426275091, "loss": 1.2685, "step": 3785 }, { "epoch": 0.14590952839268528, "grad_norm": 1.2222365140914917, "learning_rate": 0.0001973940886774214, "loss": 1.2273, "step": 3790 }, { "epoch": 0.14610202117420595, "grad_norm": 1.3231360912322998, "learning_rate": 0.00019738722582128944, "loss": 1.5449, "step": 3795 }, { "epoch": 0.14629451395572665, "grad_norm": 1.2198995351791382, "learning_rate": 0.00019738035405974085, "loss": 1.4927, "step": 3800 }, { "epoch": 0.14648700673724735, "grad_norm": 1.1108288764953613, "learning_rate": 0.00019737347339340394, "loss": 1.3894, "step": 3805 }, { "epoch": 0.14667949951876805, "grad_norm": 1.1478091478347778, "learning_rate": 0.0001973665838229079, "loss": 1.342, "step": 3810 }, { "epoch": 0.14687199230028875, "grad_norm": 1.555680751800537, "learning_rate": 0.0001973596853488828, "loss": 1.269, "step": 3815 }, { "epoch": 0.14706448508180944, "grad_norm": 1.2819339036941528, "learning_rate": 0.0001973527779719594, "loss": 1.3462, "step": 3820 }, { "epoch": 0.1472569778633301, "grad_norm": 1.6733057498931885, "learning_rate": 0.00019734586169276939, "loss": 1.3179, "step": 3825 }, { "epoch": 0.1474494706448508, "grad_norm": 1.8622225522994995, "learning_rate": 0.00019733893651194517, "loss": 1.452, "step": 3830 }, { "epoch": 0.1476419634263715, "grad_norm": 1.2225052118301392, "learning_rate": 0.00019733200243012006, "loss": 1.2925, "step": 3835 }, { "epoch": 0.1478344562078922, "grad_norm": 0.7980884313583374, "learning_rate": 0.00019732505944792804, "loss": 1.1505, "step": 3840 }, { "epoch": 0.1480269489894129, "grad_norm": 1.3874131441116333, "learning_rate": 0.00019731810756600405, "loss": 1.2989, "step": 3845 }, { "epoch": 0.1482194417709336, "grad_norm": 1.4387590885162354, "learning_rate": 0.00019731114678498378, "loss": 1.3295, "step": 3850 }, { "epoch": 0.14841193455245427, "grad_norm": 1.8189646005630493, "learning_rate": 0.00019730417710550383, "loss": 1.2926, "step": 3855 }, { "epoch": 0.14860442733397497, "grad_norm": 0.9577664732933044, "learning_rate": 0.0001972971985282014, "loss": 1.2375, "step": 3860 }, { "epoch": 0.14879692011549567, "grad_norm": 1.7154825925827026, "learning_rate": 0.00019729021105371474, "loss": 1.2853, "step": 3865 }, { "epoch": 0.14898941289701637, "grad_norm": 2.1061089038848877, "learning_rate": 0.00019728321468268277, "loss": 1.3391, "step": 3870 }, { "epoch": 0.14918190567853706, "grad_norm": 1.0177017450332642, "learning_rate": 0.00019727620941574524, "loss": 1.2801, "step": 3875 }, { "epoch": 0.14937439846005776, "grad_norm": 1.0773547887802124, "learning_rate": 0.00019726919525354277, "loss": 1.3063, "step": 3880 }, { "epoch": 0.14956689124157843, "grad_norm": 0.9082854986190796, "learning_rate": 0.00019726217219671673, "loss": 1.3601, "step": 3885 }, { "epoch": 0.14975938402309913, "grad_norm": 1.341280221939087, "learning_rate": 0.00019725514024590934, "loss": 1.4052, "step": 3890 }, { "epoch": 0.14995187680461983, "grad_norm": 2.240399122238159, "learning_rate": 0.00019724809940176364, "loss": 1.1955, "step": 3895 }, { "epoch": 0.15014436958614052, "grad_norm": 1.549137830734253, "learning_rate": 0.00019724104966492348, "loss": 1.3089, "step": 3900 }, { "epoch": 0.15033686236766122, "grad_norm": 1.6887294054031372, "learning_rate": 0.00019723399103603346, "loss": 1.4147, "step": 3905 }, { "epoch": 0.1505293551491819, "grad_norm": 1.793087363243103, "learning_rate": 0.0001972269235157391, "loss": 1.2674, "step": 3910 }, { "epoch": 0.1507218479307026, "grad_norm": 1.718336820602417, "learning_rate": 0.00019721984710468663, "loss": 1.2716, "step": 3915 }, { "epoch": 0.1509143407122233, "grad_norm": 2.2342288494110107, "learning_rate": 0.0001972127618035232, "loss": 0.965, "step": 3920 }, { "epoch": 0.15110683349374399, "grad_norm": 1.5450822114944458, "learning_rate": 0.00019720566761289665, "loss": 1.3461, "step": 3925 }, { "epoch": 0.15129932627526468, "grad_norm": 1.4395346641540527, "learning_rate": 0.0001971985645334557, "loss": 1.3462, "step": 3930 }, { "epoch": 0.15149181905678538, "grad_norm": 1.1160500049591064, "learning_rate": 0.00019719145256584994, "loss": 1.3334, "step": 3935 }, { "epoch": 0.15168431183830605, "grad_norm": 1.0270999670028687, "learning_rate": 0.00019718433171072967, "loss": 1.2737, "step": 3940 }, { "epoch": 0.15187680461982675, "grad_norm": 1.4266023635864258, "learning_rate": 0.00019717720196874608, "loss": 1.3639, "step": 3945 }, { "epoch": 0.15206929740134745, "grad_norm": 1.552283525466919, "learning_rate": 0.00019717006334055108, "loss": 1.301, "step": 3950 }, { "epoch": 0.15226179018286815, "grad_norm": 1.5459437370300293, "learning_rate": 0.0001971629158267975, "loss": 1.265, "step": 3955 }, { "epoch": 0.15245428296438884, "grad_norm": 1.4866915941238403, "learning_rate": 0.00019715575942813888, "loss": 1.5694, "step": 3960 }, { "epoch": 0.15264677574590954, "grad_norm": 1.1116254329681396, "learning_rate": 0.00019714859414522967, "loss": 1.4858, "step": 3965 }, { "epoch": 0.1528392685274302, "grad_norm": 1.1708245277404785, "learning_rate": 0.0001971414199787251, "loss": 1.3582, "step": 3970 }, { "epoch": 0.1530317613089509, "grad_norm": 1.1672711372375488, "learning_rate": 0.00019713423692928114, "loss": 1.3393, "step": 3975 }, { "epoch": 0.1532242540904716, "grad_norm": 1.4800153970718384, "learning_rate": 0.0001971270449975547, "loss": 1.22, "step": 3980 }, { "epoch": 0.1534167468719923, "grad_norm": 1.92826509475708, "learning_rate": 0.00019711984418420338, "loss": 1.3902, "step": 3985 }, { "epoch": 0.153609239653513, "grad_norm": 1.2292252779006958, "learning_rate": 0.00019711263448988567, "loss": 1.2327, "step": 3990 }, { "epoch": 0.1538017324350337, "grad_norm": 1.1007169485092163, "learning_rate": 0.00019710541591526085, "loss": 1.4284, "step": 3995 }, { "epoch": 0.15399422521655437, "grad_norm": 0.9456301927566528, "learning_rate": 0.00019709818846098905, "loss": 1.1589, "step": 4000 }, { "epoch": 0.15418671799807507, "grad_norm": 1.518704891204834, "learning_rate": 0.0001970909521277311, "loss": 1.3976, "step": 4005 }, { "epoch": 0.15437921077959577, "grad_norm": 1.3318589925765991, "learning_rate": 0.00019708370691614872, "loss": 1.3635, "step": 4010 }, { "epoch": 0.15457170356111646, "grad_norm": 1.752626657485962, "learning_rate": 0.0001970764528269045, "loss": 1.3175, "step": 4015 }, { "epoch": 0.15476419634263716, "grad_norm": 2.055469512939453, "learning_rate": 0.00019706918986066172, "loss": 1.2873, "step": 4020 }, { "epoch": 0.15495668912415783, "grad_norm": 2.1063289642333984, "learning_rate": 0.00019706191801808457, "loss": 1.3208, "step": 4025 }, { "epoch": 0.15514918190567853, "grad_norm": 1.2449209690093994, "learning_rate": 0.00019705463729983798, "loss": 1.2863, "step": 4030 }, { "epoch": 0.15534167468719923, "grad_norm": 1.4950852394104004, "learning_rate": 0.00019704734770658778, "loss": 1.2338, "step": 4035 }, { "epoch": 0.15553416746871992, "grad_norm": 0.9372254014015198, "learning_rate": 0.00019704004923900046, "loss": 1.2105, "step": 4040 }, { "epoch": 0.15572666025024062, "grad_norm": 1.2273038625717163, "learning_rate": 0.00019703274189774347, "loss": 1.3584, "step": 4045 }, { "epoch": 0.15591915303176132, "grad_norm": 1.1560612916946411, "learning_rate": 0.00019702542568348502, "loss": 1.432, "step": 4050 }, { "epoch": 0.156111645813282, "grad_norm": 1.2214939594268799, "learning_rate": 0.00019701810059689415, "loss": 1.3237, "step": 4055 }, { "epoch": 0.1563041385948027, "grad_norm": 1.255182147026062, "learning_rate": 0.00019701076663864066, "loss": 1.5111, "step": 4060 }, { "epoch": 0.1564966313763234, "grad_norm": 1.2496423721313477, "learning_rate": 0.0001970034238093952, "loss": 1.3917, "step": 4065 }, { "epoch": 0.15668912415784408, "grad_norm": 2.773935556411743, "learning_rate": 0.00019699607210982918, "loss": 1.3072, "step": 4070 }, { "epoch": 0.15688161693936478, "grad_norm": 2.5853006839752197, "learning_rate": 0.00019698871154061497, "loss": 1.2737, "step": 4075 }, { "epoch": 0.15707410972088548, "grad_norm": 0.9573465585708618, "learning_rate": 0.00019698134210242553, "loss": 1.411, "step": 4080 }, { "epoch": 0.15726660250240615, "grad_norm": 2.204242467880249, "learning_rate": 0.00019697396379593482, "loss": 1.2493, "step": 4085 }, { "epoch": 0.15745909528392685, "grad_norm": 1.4688855409622192, "learning_rate": 0.0001969665766218175, "loss": 1.273, "step": 4090 }, { "epoch": 0.15765158806544755, "grad_norm": 2.1439919471740723, "learning_rate": 0.0001969591805807491, "loss": 1.4691, "step": 4095 }, { "epoch": 0.15784408084696824, "grad_norm": 1.4877434968948364, "learning_rate": 0.00019695177567340594, "loss": 1.4427, "step": 4100 }, { "epoch": 0.15803657362848894, "grad_norm": 1.3709458112716675, "learning_rate": 0.00019694436190046514, "loss": 1.2713, "step": 4105 }, { "epoch": 0.1582290664100096, "grad_norm": 2.1676931381225586, "learning_rate": 0.00019693693926260464, "loss": 1.1888, "step": 4110 }, { "epoch": 0.1584215591915303, "grad_norm": 1.1726205348968506, "learning_rate": 0.0001969295077605032, "loss": 1.3544, "step": 4115 }, { "epoch": 0.158614051973051, "grad_norm": 1.2441811561584473, "learning_rate": 0.00019692206739484037, "loss": 1.4796, "step": 4120 }, { "epoch": 0.1588065447545717, "grad_norm": 1.4889960289001465, "learning_rate": 0.00019691461816629652, "loss": 1.418, "step": 4125 }, { "epoch": 0.1589990375360924, "grad_norm": 1.3810794353485107, "learning_rate": 0.00019690716007555282, "loss": 1.6398, "step": 4130 }, { "epoch": 0.1591915303176131, "grad_norm": 1.589390754699707, "learning_rate": 0.00019689969312329132, "loss": 1.3203, "step": 4135 }, { "epoch": 0.15938402309913377, "grad_norm": 0.8731974959373474, "learning_rate": 0.00019689221731019477, "loss": 1.2408, "step": 4140 }, { "epoch": 0.15957651588065447, "grad_norm": 1.046852707862854, "learning_rate": 0.00019688473263694678, "loss": 1.1249, "step": 4145 }, { "epoch": 0.15976900866217517, "grad_norm": 0.8767102360725403, "learning_rate": 0.0001968772391042318, "loss": 1.2611, "step": 4150 }, { "epoch": 0.15996150144369586, "grad_norm": 1.1452685594558716, "learning_rate": 0.0001968697367127351, "loss": 1.2992, "step": 4155 }, { "epoch": 0.16015399422521656, "grad_norm": 0.9254185557365417, "learning_rate": 0.00019686222546314266, "loss": 1.3894, "step": 4160 }, { "epoch": 0.16034648700673726, "grad_norm": 0.9607768654823303, "learning_rate": 0.00019685470535614133, "loss": 1.3076, "step": 4165 }, { "epoch": 0.16053897978825793, "grad_norm": 1.2880384922027588, "learning_rate": 0.0001968471763924188, "loss": 1.3868, "step": 4170 }, { "epoch": 0.16073147256977863, "grad_norm": 1.1116464138031006, "learning_rate": 0.00019683963857266356, "loss": 1.2489, "step": 4175 }, { "epoch": 0.16092396535129933, "grad_norm": 0.9132522940635681, "learning_rate": 0.0001968320918975649, "loss": 1.3788, "step": 4180 }, { "epoch": 0.16111645813282002, "grad_norm": 1.1793001890182495, "learning_rate": 0.00019682453636781283, "loss": 1.4742, "step": 4185 }, { "epoch": 0.16130895091434072, "grad_norm": 1.1624877452850342, "learning_rate": 0.00019681697198409835, "loss": 1.3547, "step": 4190 }, { "epoch": 0.16150144369586142, "grad_norm": 1.1367181539535522, "learning_rate": 0.00019680939874711312, "loss": 1.3692, "step": 4195 }, { "epoch": 0.1616939364773821, "grad_norm": 1.0168886184692383, "learning_rate": 0.00019680181665754972, "loss": 1.4148, "step": 4200 }, { "epoch": 0.1618864292589028, "grad_norm": 1.3179705142974854, "learning_rate": 0.0001967942257161014, "loss": 1.2674, "step": 4205 }, { "epoch": 0.16207892204042348, "grad_norm": 0.8679062724113464, "learning_rate": 0.00019678662592346235, "loss": 1.4001, "step": 4210 }, { "epoch": 0.16227141482194418, "grad_norm": 0.8477693200111389, "learning_rate": 0.00019677901728032754, "loss": 1.3527, "step": 4215 }, { "epoch": 0.16246390760346488, "grad_norm": 1.280357003211975, "learning_rate": 0.00019677139978739266, "loss": 1.2576, "step": 4220 }, { "epoch": 0.16265640038498555, "grad_norm": 3.5572381019592285, "learning_rate": 0.00019676377344535434, "loss": 1.3059, "step": 4225 }, { "epoch": 0.16284889316650625, "grad_norm": 0.9162838459014893, "learning_rate": 0.0001967561382549099, "loss": 1.3655, "step": 4230 }, { "epoch": 0.16304138594802695, "grad_norm": 1.0635076761245728, "learning_rate": 0.00019674849421675764, "loss": 1.2356, "step": 4235 }, { "epoch": 0.16323387872954764, "grad_norm": 2.3638720512390137, "learning_rate": 0.00019674084133159642, "loss": 1.3598, "step": 4240 }, { "epoch": 0.16342637151106834, "grad_norm": 1.013108730316162, "learning_rate": 0.00019673317960012615, "loss": 1.6119, "step": 4245 }, { "epoch": 0.16361886429258904, "grad_norm": 1.391450047492981, "learning_rate": 0.00019672550902304737, "loss": 1.2481, "step": 4250 }, { "epoch": 0.1638113570741097, "grad_norm": 1.5574865341186523, "learning_rate": 0.00019671782960106157, "loss": 1.345, "step": 4255 }, { "epoch": 0.1640038498556304, "grad_norm": 1.8456825017929077, "learning_rate": 0.00019671014133487095, "loss": 1.3582, "step": 4260 }, { "epoch": 0.1641963426371511, "grad_norm": 1.4087297916412354, "learning_rate": 0.00019670244422517855, "loss": 1.3162, "step": 4265 }, { "epoch": 0.1643888354186718, "grad_norm": 1.167403221130371, "learning_rate": 0.0001966947382726882, "loss": 1.3841, "step": 4270 }, { "epoch": 0.1645813282001925, "grad_norm": 1.3395906686782837, "learning_rate": 0.0001966870234781046, "loss": 1.1306, "step": 4275 }, { "epoch": 0.1647738209817132, "grad_norm": 0.8549813628196716, "learning_rate": 0.00019667929984213317, "loss": 1.3017, "step": 4280 }, { "epoch": 0.16496631376323387, "grad_norm": 0.8681890368461609, "learning_rate": 0.00019667156736548021, "loss": 1.2152, "step": 4285 }, { "epoch": 0.16515880654475457, "grad_norm": 1.8476097583770752, "learning_rate": 0.00019666382604885283, "loss": 1.2571, "step": 4290 }, { "epoch": 0.16535129932627526, "grad_norm": 1.6583194732666016, "learning_rate": 0.00019665607589295888, "loss": 1.3866, "step": 4295 }, { "epoch": 0.16554379210779596, "grad_norm": 1.6784121990203857, "learning_rate": 0.00019664831689850712, "loss": 1.2966, "step": 4300 }, { "epoch": 0.16573628488931666, "grad_norm": 1.5268521308898926, "learning_rate": 0.00019664054906620696, "loss": 1.3086, "step": 4305 }, { "epoch": 0.16592877767083736, "grad_norm": 2.0114951133728027, "learning_rate": 0.00019663277239676877, "loss": 1.2137, "step": 4310 }, { "epoch": 0.16612127045235803, "grad_norm": 1.4572757482528687, "learning_rate": 0.00019662498689090372, "loss": 1.2505, "step": 4315 }, { "epoch": 0.16631376323387873, "grad_norm": 1.4267566204071045, "learning_rate": 0.00019661719254932369, "loss": 1.1485, "step": 4320 }, { "epoch": 0.16650625601539942, "grad_norm": 0.9921162128448486, "learning_rate": 0.00019660938937274142, "loss": 1.304, "step": 4325 }, { "epoch": 0.16669874879692012, "grad_norm": 1.3901869058609009, "learning_rate": 0.00019660157736187047, "loss": 1.4347, "step": 4330 }, { "epoch": 0.16689124157844082, "grad_norm": 1.5446443557739258, "learning_rate": 0.0001965937565174252, "loss": 1.3157, "step": 4335 }, { "epoch": 0.1670837343599615, "grad_norm": 1.2553350925445557, "learning_rate": 0.0001965859268401208, "loss": 1.1882, "step": 4340 }, { "epoch": 0.1672762271414822, "grad_norm": 1.9385195970535278, "learning_rate": 0.0001965780883306732, "loss": 1.4522, "step": 4345 }, { "epoch": 0.16746871992300288, "grad_norm": 1.426032543182373, "learning_rate": 0.00019657024098979916, "loss": 1.1029, "step": 4350 }, { "epoch": 0.16766121270452358, "grad_norm": 1.5562461614608765, "learning_rate": 0.0001965623848182163, "loss": 1.4837, "step": 4355 }, { "epoch": 0.16785370548604428, "grad_norm": 1.0057613849639893, "learning_rate": 0.00019655451981664306, "loss": 1.3095, "step": 4360 }, { "epoch": 0.16804619826756498, "grad_norm": 1.447845697402954, "learning_rate": 0.00019654664598579857, "loss": 1.4002, "step": 4365 }, { "epoch": 0.16823869104908565, "grad_norm": 0.9452415108680725, "learning_rate": 0.00019653876332640288, "loss": 1.3324, "step": 4370 }, { "epoch": 0.16843118383060635, "grad_norm": 1.7831186056137085, "learning_rate": 0.00019653087183917677, "loss": 1.3004, "step": 4375 }, { "epoch": 0.16862367661212704, "grad_norm": 1.0656229257583618, "learning_rate": 0.0001965229715248419, "loss": 1.5165, "step": 4380 }, { "epoch": 0.16881616939364774, "grad_norm": 1.0360915660858154, "learning_rate": 0.0001965150623841207, "loss": 1.2842, "step": 4385 }, { "epoch": 0.16900866217516844, "grad_norm": 1.286447525024414, "learning_rate": 0.00019650714441773643, "loss": 1.2902, "step": 4390 }, { "epoch": 0.16920115495668914, "grad_norm": 1.2435790300369263, "learning_rate": 0.00019649921762641306, "loss": 1.3049, "step": 4395 }, { "epoch": 0.1693936477382098, "grad_norm": 1.9299678802490234, "learning_rate": 0.0001964912820108755, "loss": 1.3057, "step": 4400 }, { "epoch": 0.1695861405197305, "grad_norm": 1.7493208646774292, "learning_rate": 0.0001964833375718494, "loss": 1.3225, "step": 4405 }, { "epoch": 0.1697786333012512, "grad_norm": 1.3697878122329712, "learning_rate": 0.0001964753843100612, "loss": 1.3518, "step": 4410 }, { "epoch": 0.1699711260827719, "grad_norm": 1.343985676765442, "learning_rate": 0.0001964674222262382, "loss": 1.3195, "step": 4415 }, { "epoch": 0.1701636188642926, "grad_norm": 1.0094975233078003, "learning_rate": 0.00019645945132110853, "loss": 1.3184, "step": 4420 }, { "epoch": 0.17035611164581327, "grad_norm": 1.6048771142959595, "learning_rate": 0.00019645147159540096, "loss": 1.3307, "step": 4425 }, { "epoch": 0.17054860442733397, "grad_norm": 2.14099383354187, "learning_rate": 0.00019644348304984524, "loss": 1.3221, "step": 4430 }, { "epoch": 0.17074109720885466, "grad_norm": 2.5571303367614746, "learning_rate": 0.00019643548568517192, "loss": 1.3092, "step": 4435 }, { "epoch": 0.17093358999037536, "grad_norm": 1.1076972484588623, "learning_rate": 0.00019642747950211225, "loss": 1.1981, "step": 4440 }, { "epoch": 0.17112608277189606, "grad_norm": 1.1315946578979492, "learning_rate": 0.00019641946450139831, "loss": 1.335, "step": 4445 }, { "epoch": 0.17131857555341676, "grad_norm": 1.33171808719635, "learning_rate": 0.00019641144068376312, "loss": 1.4677, "step": 4450 }, { "epoch": 0.17151106833493743, "grad_norm": 0.87531977891922, "learning_rate": 0.0001964034080499403, "loss": 1.1795, "step": 4455 }, { "epoch": 0.17170356111645813, "grad_norm": 1.6923136711120605, "learning_rate": 0.00019639536660066446, "loss": 1.2491, "step": 4460 }, { "epoch": 0.17189605389797882, "grad_norm": 1.481703519821167, "learning_rate": 0.0001963873163366709, "loss": 1.2894, "step": 4465 }, { "epoch": 0.17208854667949952, "grad_norm": 3.3689515590667725, "learning_rate": 0.00019637925725869576, "loss": 1.3785, "step": 4470 }, { "epoch": 0.17228103946102022, "grad_norm": 2.498059034347534, "learning_rate": 0.000196371189367476, "loss": 1.2854, "step": 4475 }, { "epoch": 0.17247353224254092, "grad_norm": 1.2852959632873535, "learning_rate": 0.00019636311266374939, "loss": 1.2272, "step": 4480 }, { "epoch": 0.1726660250240616, "grad_norm": 0.9257192015647888, "learning_rate": 0.00019635502714825446, "loss": 1.1707, "step": 4485 }, { "epoch": 0.17285851780558228, "grad_norm": 0.989142656326294, "learning_rate": 0.00019634693282173058, "loss": 1.3174, "step": 4490 }, { "epoch": 0.17305101058710298, "grad_norm": 1.4923882484436035, "learning_rate": 0.00019633882968491794, "loss": 1.2334, "step": 4495 }, { "epoch": 0.17324350336862368, "grad_norm": 1.2684218883514404, "learning_rate": 0.0001963307177385575, "loss": 1.2468, "step": 4500 }, { "epoch": 0.17343599615014438, "grad_norm": 0.9474775791168213, "learning_rate": 0.0001963225969833911, "loss": 1.2767, "step": 4505 }, { "epoch": 0.17362848893166508, "grad_norm": 2.477541446685791, "learning_rate": 0.00019631446742016126, "loss": 1.4144, "step": 4510 }, { "epoch": 0.17382098171318575, "grad_norm": 1.040477991104126, "learning_rate": 0.00019630632904961138, "loss": 1.5665, "step": 4515 }, { "epoch": 0.17401347449470644, "grad_norm": 1.3127304315567017, "learning_rate": 0.0001962981818724857, "loss": 1.3511, "step": 4520 }, { "epoch": 0.17420596727622714, "grad_norm": 1.6968106031417847, "learning_rate": 0.0001962900258895292, "loss": 1.3202, "step": 4525 }, { "epoch": 0.17439846005774784, "grad_norm": 2.2431318759918213, "learning_rate": 0.0001962818611014877, "loss": 1.351, "step": 4530 }, { "epoch": 0.17459095283926854, "grad_norm": 1.2938642501831055, "learning_rate": 0.00019627368750910779, "loss": 1.276, "step": 4535 }, { "epoch": 0.1747834456207892, "grad_norm": 1.1331931352615356, "learning_rate": 0.00019626550511313694, "loss": 1.4734, "step": 4540 }, { "epoch": 0.1749759384023099, "grad_norm": 1.4755507707595825, "learning_rate": 0.00019625731391432333, "loss": 1.24, "step": 4545 }, { "epoch": 0.1751684311838306, "grad_norm": 1.5442554950714111, "learning_rate": 0.00019624911391341604, "loss": 1.0894, "step": 4550 }, { "epoch": 0.1753609239653513, "grad_norm": 1.2970473766326904, "learning_rate": 0.00019624090511116481, "loss": 1.3262, "step": 4555 }, { "epoch": 0.175553416746872, "grad_norm": 2.1946523189544678, "learning_rate": 0.0001962326875083204, "loss": 1.4652, "step": 4560 }, { "epoch": 0.1757459095283927, "grad_norm": 1.1216411590576172, "learning_rate": 0.00019622446110563417, "loss": 1.1608, "step": 4565 }, { "epoch": 0.17593840230991337, "grad_norm": 1.996535301208496, "learning_rate": 0.00019621622590385842, "loss": 1.2568, "step": 4570 }, { "epoch": 0.17613089509143406, "grad_norm": 1.9742660522460938, "learning_rate": 0.0001962079819037462, "loss": 1.3335, "step": 4575 }, { "epoch": 0.17632338787295476, "grad_norm": 1.985192060470581, "learning_rate": 0.00019619972910605134, "loss": 1.3529, "step": 4580 }, { "epoch": 0.17651588065447546, "grad_norm": 0.8765020966529846, "learning_rate": 0.00019619146751152848, "loss": 1.3956, "step": 4585 }, { "epoch": 0.17670837343599616, "grad_norm": 1.483407974243164, "learning_rate": 0.00019618319712093319, "loss": 1.4396, "step": 4590 }, { "epoch": 0.17690086621751686, "grad_norm": 1.5663124322891235, "learning_rate": 0.00019617491793502164, "loss": 1.3896, "step": 4595 }, { "epoch": 0.17709335899903753, "grad_norm": 1.3831099271774292, "learning_rate": 0.00019616662995455096, "loss": 1.2669, "step": 4600 }, { "epoch": 0.17728585178055822, "grad_norm": 0.8688403964042664, "learning_rate": 0.00019615833318027898, "loss": 1.2098, "step": 4605 }, { "epoch": 0.17747834456207892, "grad_norm": 1.9218660593032837, "learning_rate": 0.00019615002761296446, "loss": 1.1568, "step": 4610 }, { "epoch": 0.17767083734359962, "grad_norm": 1.5095698833465576, "learning_rate": 0.00019614171325336684, "loss": 1.0516, "step": 4615 }, { "epoch": 0.17786333012512032, "grad_norm": 0.9288404583930969, "learning_rate": 0.00019613339010224646, "loss": 1.075, "step": 4620 }, { "epoch": 0.17805582290664101, "grad_norm": 1.414787769317627, "learning_rate": 0.00019612505816036434, "loss": 1.2158, "step": 4625 }, { "epoch": 0.17824831568816168, "grad_norm": 1.3182802200317383, "learning_rate": 0.0001961167174284824, "loss": 1.3719, "step": 4630 }, { "epoch": 0.17844080846968238, "grad_norm": 1.1671231985092163, "learning_rate": 0.0001961083679073634, "loss": 1.3067, "step": 4635 }, { "epoch": 0.17863330125120308, "grad_norm": 1.11225163936615, "learning_rate": 0.0001961000095977708, "loss": 1.1593, "step": 4640 }, { "epoch": 0.17882579403272378, "grad_norm": 1.235335111618042, "learning_rate": 0.00019609164250046894, "loss": 1.2232, "step": 4645 }, { "epoch": 0.17901828681424448, "grad_norm": 1.0023348331451416, "learning_rate": 0.00019608326661622291, "loss": 1.2926, "step": 4650 }, { "epoch": 0.17921077959576515, "grad_norm": 1.7143383026123047, "learning_rate": 0.00019607488194579867, "loss": 1.3149, "step": 4655 }, { "epoch": 0.17940327237728584, "grad_norm": 1.135324478149414, "learning_rate": 0.00019606648848996287, "loss": 1.4155, "step": 4660 }, { "epoch": 0.17959576515880654, "grad_norm": 0.7830592393875122, "learning_rate": 0.0001960580862494831, "loss": 1.2632, "step": 4665 }, { "epoch": 0.17978825794032724, "grad_norm": 1.546481966972351, "learning_rate": 0.0001960496752251277, "loss": 1.4674, "step": 4670 }, { "epoch": 0.17998075072184794, "grad_norm": 1.5377360582351685, "learning_rate": 0.00019604125541766574, "loss": 1.0782, "step": 4675 }, { "epoch": 0.18017324350336864, "grad_norm": 2.1382510662078857, "learning_rate": 0.0001960328268278672, "loss": 1.3008, "step": 4680 }, { "epoch": 0.1803657362848893, "grad_norm": 1.4963937997817993, "learning_rate": 0.00019602438945650277, "loss": 1.2601, "step": 4685 }, { "epoch": 0.18055822906641, "grad_norm": 1.4736862182617188, "learning_rate": 0.00019601594330434405, "loss": 1.163, "step": 4690 }, { "epoch": 0.1807507218479307, "grad_norm": 0.9905889630317688, "learning_rate": 0.00019600748837216337, "loss": 1.3675, "step": 4695 }, { "epoch": 0.1809432146294514, "grad_norm": 1.1800122261047363, "learning_rate": 0.00019599902466073385, "loss": 1.3252, "step": 4700 }, { "epoch": 0.1811357074109721, "grad_norm": 1.1933966875076294, "learning_rate": 0.00019599055217082949, "loss": 1.2163, "step": 4705 }, { "epoch": 0.1813282001924928, "grad_norm": 1.3980772495269775, "learning_rate": 0.000195982070903225, "loss": 1.2807, "step": 4710 }, { "epoch": 0.18152069297401346, "grad_norm": 2.541808605194092, "learning_rate": 0.00019597358085869594, "loss": 1.1333, "step": 4715 }, { "epoch": 0.18171318575553416, "grad_norm": 1.616479516029358, "learning_rate": 0.0001959650820380187, "loss": 1.2991, "step": 4720 }, { "epoch": 0.18190567853705486, "grad_norm": 0.9473749399185181, "learning_rate": 0.00019595657444197037, "loss": 1.2273, "step": 4725 }, { "epoch": 0.18209817131857556, "grad_norm": 1.3119609355926514, "learning_rate": 0.000195948058071329, "loss": 1.2754, "step": 4730 }, { "epoch": 0.18229066410009626, "grad_norm": 1.0062682628631592, "learning_rate": 0.00019593953292687332, "loss": 1.2494, "step": 4735 }, { "epoch": 0.18248315688161693, "grad_norm": 1.2124086618423462, "learning_rate": 0.0001959309990093829, "loss": 1.3725, "step": 4740 }, { "epoch": 0.18267564966313762, "grad_norm": 1.2050824165344238, "learning_rate": 0.0001959224563196381, "loss": 1.5103, "step": 4745 }, { "epoch": 0.18286814244465832, "grad_norm": 0.9262427091598511, "learning_rate": 0.00019591390485842008, "loss": 1.4155, "step": 4750 }, { "epoch": 0.18306063522617902, "grad_norm": 1.5612881183624268, "learning_rate": 0.00019590534462651086, "loss": 1.2289, "step": 4755 }, { "epoch": 0.18325312800769972, "grad_norm": 1.5384646654129028, "learning_rate": 0.00019589677562469312, "loss": 1.2474, "step": 4760 }, { "epoch": 0.18344562078922041, "grad_norm": 1.397716999053955, "learning_rate": 0.00019588819785375057, "loss": 1.4273, "step": 4765 }, { "epoch": 0.18363811357074108, "grad_norm": 1.169207215309143, "learning_rate": 0.00019587961131446754, "loss": 1.3963, "step": 4770 }, { "epoch": 0.18383060635226178, "grad_norm": 1.5064833164215088, "learning_rate": 0.00019587101600762916, "loss": 1.5192, "step": 4775 }, { "epoch": 0.18402309913378248, "grad_norm": 0.9700071811676025, "learning_rate": 0.00019586241193402147, "loss": 1.2697, "step": 4780 }, { "epoch": 0.18421559191530318, "grad_norm": 1.2304507493972778, "learning_rate": 0.00019585379909443123, "loss": 1.3025, "step": 4785 }, { "epoch": 0.18440808469682388, "grad_norm": 1.3768020868301392, "learning_rate": 0.00019584517748964605, "loss": 1.3785, "step": 4790 }, { "epoch": 0.18460057747834457, "grad_norm": 1.062251091003418, "learning_rate": 0.0001958365471204543, "loss": 1.5416, "step": 4795 }, { "epoch": 0.18479307025986524, "grad_norm": 0.9126803874969482, "learning_rate": 0.00019582790798764518, "loss": 1.1479, "step": 4800 }, { "epoch": 0.18498556304138594, "grad_norm": 1.579830288887024, "learning_rate": 0.00019581926009200866, "loss": 1.3315, "step": 4805 }, { "epoch": 0.18517805582290664, "grad_norm": 2.351717710494995, "learning_rate": 0.00019581060343433555, "loss": 1.2503, "step": 4810 }, { "epoch": 0.18537054860442734, "grad_norm": 1.1480222940444946, "learning_rate": 0.00019580193801541746, "loss": 1.2048, "step": 4815 }, { "epoch": 0.18556304138594804, "grad_norm": 1.606439471244812, "learning_rate": 0.00019579326383604675, "loss": 1.5204, "step": 4820 }, { "epoch": 0.18575553416746873, "grad_norm": 1.520969271659851, "learning_rate": 0.00019578458089701664, "loss": 1.2584, "step": 4825 }, { "epoch": 0.1859480269489894, "grad_norm": 1.9096931219100952, "learning_rate": 0.00019577588919912113, "loss": 1.5508, "step": 4830 }, { "epoch": 0.1861405197305101, "grad_norm": 1.004654884338379, "learning_rate": 0.00019576718874315501, "loss": 1.2249, "step": 4835 }, { "epoch": 0.1863330125120308, "grad_norm": 1.0160667896270752, "learning_rate": 0.00019575847952991388, "loss": 1.0782, "step": 4840 }, { "epoch": 0.1865255052935515, "grad_norm": 1.4719328880310059, "learning_rate": 0.0001957497615601941, "loss": 1.4679, "step": 4845 }, { "epoch": 0.1867179980750722, "grad_norm": 1.229625940322876, "learning_rate": 0.00019574103483479296, "loss": 1.347, "step": 4850 }, { "epoch": 0.18691049085659286, "grad_norm": 3.0996217727661133, "learning_rate": 0.00019573229935450842, "loss": 1.3325, "step": 4855 }, { "epoch": 0.18710298363811356, "grad_norm": 1.59645676612854, "learning_rate": 0.00019572355512013922, "loss": 1.2983, "step": 4860 }, { "epoch": 0.18729547641963426, "grad_norm": 1.373542070388794, "learning_rate": 0.00019571480213248504, "loss": 1.3285, "step": 4865 }, { "epoch": 0.18748796920115496, "grad_norm": 0.9625198245048523, "learning_rate": 0.00019570604039234626, "loss": 1.2823, "step": 4870 }, { "epoch": 0.18768046198267566, "grad_norm": 1.1096363067626953, "learning_rate": 0.00019569726990052407, "loss": 1.2508, "step": 4875 }, { "epoch": 0.18787295476419635, "grad_norm": 1.2040042877197266, "learning_rate": 0.0001956884906578205, "loss": 1.3767, "step": 4880 }, { "epoch": 0.18806544754571702, "grad_norm": 1.103530764579773, "learning_rate": 0.00019567970266503833, "loss": 1.4559, "step": 4885 }, { "epoch": 0.18825794032723772, "grad_norm": 1.1266409158706665, "learning_rate": 0.0001956709059229812, "loss": 1.0687, "step": 4890 }, { "epoch": 0.18845043310875842, "grad_norm": 1.2266972064971924, "learning_rate": 0.00019566210043245344, "loss": 1.1801, "step": 4895 }, { "epoch": 0.18864292589027912, "grad_norm": 1.416676640510559, "learning_rate": 0.0001956532861942603, "loss": 1.346, "step": 4900 }, { "epoch": 0.18883541867179982, "grad_norm": 1.5538910627365112, "learning_rate": 0.0001956444632092078, "loss": 1.3498, "step": 4905 }, { "epoch": 0.1890279114533205, "grad_norm": 1.1525146961212158, "learning_rate": 0.00019563563147810274, "loss": 1.39, "step": 4910 }, { "epoch": 0.18922040423484118, "grad_norm": 1.6796061992645264, "learning_rate": 0.00019562679100175266, "loss": 1.3377, "step": 4915 }, { "epoch": 0.18941289701636188, "grad_norm": 1.6094450950622559, "learning_rate": 0.00019561794178096607, "loss": 1.3057, "step": 4920 }, { "epoch": 0.18960538979788258, "grad_norm": 1.8123548030853271, "learning_rate": 0.00019560908381655208, "loss": 1.1257, "step": 4925 }, { "epoch": 0.18979788257940328, "grad_norm": 1.5495673418045044, "learning_rate": 0.00019560021710932074, "loss": 1.303, "step": 4930 }, { "epoch": 0.18999037536092397, "grad_norm": 1.623429298400879, "learning_rate": 0.00019559134166008283, "loss": 1.1491, "step": 4935 }, { "epoch": 0.19018286814244467, "grad_norm": 1.2682925462722778, "learning_rate": 0.00019558245746964997, "loss": 1.3774, "step": 4940 }, { "epoch": 0.19037536092396534, "grad_norm": 0.9362719058990479, "learning_rate": 0.00019557356453883456, "loss": 1.2936, "step": 4945 }, { "epoch": 0.19056785370548604, "grad_norm": 1.4271594285964966, "learning_rate": 0.00019556466286844976, "loss": 1.3865, "step": 4950 }, { "epoch": 0.19076034648700674, "grad_norm": 1.4094691276550293, "learning_rate": 0.00019555575245930963, "loss": 1.2941, "step": 4955 }, { "epoch": 0.19095283926852744, "grad_norm": 0.9695935249328613, "learning_rate": 0.00019554683331222893, "loss": 1.1724, "step": 4960 }, { "epoch": 0.19114533205004813, "grad_norm": 1.110616683959961, "learning_rate": 0.00019553790542802327, "loss": 1.3999, "step": 4965 }, { "epoch": 0.1913378248315688, "grad_norm": 1.5389796495437622, "learning_rate": 0.000195528968807509, "loss": 1.2693, "step": 4970 }, { "epoch": 0.1915303176130895, "grad_norm": 1.921168565750122, "learning_rate": 0.00019552002345150338, "loss": 1.2392, "step": 4975 }, { "epoch": 0.1917228103946102, "grad_norm": 1.3342314958572388, "learning_rate": 0.00019551106936082437, "loss": 1.2477, "step": 4980 }, { "epoch": 0.1919153031761309, "grad_norm": 1.745754361152649, "learning_rate": 0.0001955021065362908, "loss": 1.7169, "step": 4985 }, { "epoch": 0.1921077959576516, "grad_norm": 1.090145468711853, "learning_rate": 0.0001954931349787222, "loss": 1.1156, "step": 4990 }, { "epoch": 0.1923002887391723, "grad_norm": 1.5357612371444702, "learning_rate": 0.00019548415468893899, "loss": 1.5436, "step": 4995 }, { "epoch": 0.19249278152069296, "grad_norm": 1.0309633016586304, "learning_rate": 0.00019547516566776238, "loss": 1.3212, "step": 5000 }, { "epoch": 0.19268527430221366, "grad_norm": 1.000688076019287, "learning_rate": 0.0001954661679160143, "loss": 1.2821, "step": 5005 }, { "epoch": 0.19287776708373436, "grad_norm": 1.268754243850708, "learning_rate": 0.0001954571614345176, "loss": 1.2168, "step": 5010 }, { "epoch": 0.19307025986525506, "grad_norm": 1.3859111070632935, "learning_rate": 0.00019544814622409582, "loss": 1.0701, "step": 5015 }, { "epoch": 0.19326275264677575, "grad_norm": 2.248309850692749, "learning_rate": 0.00019543912228557337, "loss": 1.3548, "step": 5020 }, { "epoch": 0.19345524542829645, "grad_norm": 1.0269944667816162, "learning_rate": 0.00019543008961977538, "loss": 1.213, "step": 5025 }, { "epoch": 0.19364773820981712, "grad_norm": 1.0082924365997314, "learning_rate": 0.00019542104822752789, "loss": 1.2395, "step": 5030 }, { "epoch": 0.19384023099133782, "grad_norm": 2.1287014484405518, "learning_rate": 0.00019541199810965766, "loss": 1.3794, "step": 5035 }, { "epoch": 0.19403272377285852, "grad_norm": 1.230859637260437, "learning_rate": 0.0001954029392669922, "loss": 1.3985, "step": 5040 }, { "epoch": 0.19422521655437922, "grad_norm": 1.0987460613250732, "learning_rate": 0.00019539387170035996, "loss": 1.2637, "step": 5045 }, { "epoch": 0.1944177093358999, "grad_norm": 1.2570157051086426, "learning_rate": 0.00019538479541059007, "loss": 1.2752, "step": 5050 }, { "epoch": 0.19461020211742058, "grad_norm": 0.5122241377830505, "learning_rate": 0.00019537571039851252, "loss": 1.1927, "step": 5055 }, { "epoch": 0.19480269489894128, "grad_norm": 1.7925124168395996, "learning_rate": 0.00019536661666495807, "loss": 1.1414, "step": 5060 }, { "epoch": 0.19499518768046198, "grad_norm": 0.8517950773239136, "learning_rate": 0.00019535751421075826, "loss": 1.2359, "step": 5065 }, { "epoch": 0.19518768046198268, "grad_norm": 0.582260012626648, "learning_rate": 0.00019534840303674544, "loss": 1.3528, "step": 5070 }, { "epoch": 0.19538017324350337, "grad_norm": 1.3547414541244507, "learning_rate": 0.0001953392831437528, "loss": 1.296, "step": 5075 }, { "epoch": 0.19557266602502407, "grad_norm": Infinity, "learning_rate": 0.0001953319809522536, "loss": 1.4074, "step": 5080 }, { "epoch": 0.19576515880654474, "grad_norm": 2.2984917163848877, "learning_rate": 0.00019532284536719936, "loss": 1.2002, "step": 5085 }, { "epoch": 0.19595765158806544, "grad_norm": 1.4113095998764038, "learning_rate": 0.0001953137010655024, "loss": 1.2755, "step": 5090 }, { "epoch": 0.19615014436958614, "grad_norm": 1.921242594718933, "learning_rate": 0.00019530454804799881, "loss": 1.2431, "step": 5095 }, { "epoch": 0.19634263715110684, "grad_norm": 1.3097113370895386, "learning_rate": 0.0001952953863155257, "loss": 1.415, "step": 5100 }, { "epoch": 0.19653512993262753, "grad_norm": 2.1493217945098877, "learning_rate": 0.00019528621586892072, "loss": 1.4282, "step": 5105 }, { "epoch": 0.19672762271414823, "grad_norm": 1.2487257719039917, "learning_rate": 0.0001952770367090226, "loss": 1.3512, "step": 5110 }, { "epoch": 0.1969201154956689, "grad_norm": 0.9984391331672668, "learning_rate": 0.00019526784883667055, "loss": 1.5437, "step": 5115 }, { "epoch": 0.1971126082771896, "grad_norm": 1.241417646408081, "learning_rate": 0.00019525865225270486, "loss": 1.2399, "step": 5120 }, { "epoch": 0.1973051010587103, "grad_norm": 1.5192227363586426, "learning_rate": 0.00019524944695796642, "loss": 1.3236, "step": 5125 }, { "epoch": 0.197497593840231, "grad_norm": 1.7465555667877197, "learning_rate": 0.00019524023295329704, "loss": 1.4247, "step": 5130 }, { "epoch": 0.1976900866217517, "grad_norm": 1.455175757408142, "learning_rate": 0.00019523101023953925, "loss": 1.5053, "step": 5135 }, { "epoch": 0.1978825794032724, "grad_norm": 2.164982318878174, "learning_rate": 0.00019522177881753643, "loss": 1.2796, "step": 5140 }, { "epoch": 0.19807507218479306, "grad_norm": 1.58863365650177, "learning_rate": 0.00019521253868813273, "loss": 1.349, "step": 5145 }, { "epoch": 0.19826756496631376, "grad_norm": 1.5380641222000122, "learning_rate": 0.0001952032898521731, "loss": 1.3107, "step": 5150 }, { "epoch": 0.19846005774783446, "grad_norm": 1.1790603399276733, "learning_rate": 0.00019519403231050327, "loss": 1.2178, "step": 5155 }, { "epoch": 0.19865255052935515, "grad_norm": 1.7905482053756714, "learning_rate": 0.0001951847660639698, "loss": 1.3579, "step": 5160 }, { "epoch": 0.19884504331087585, "grad_norm": 1.1262041330337524, "learning_rate": 0.00019517549111342, "loss": 1.2988, "step": 5165 }, { "epoch": 0.19903753609239652, "grad_norm": 1.6370010375976562, "learning_rate": 0.00019516620745970199, "loss": 1.2326, "step": 5170 }, { "epoch": 0.19923002887391722, "grad_norm": 1.1789335012435913, "learning_rate": 0.00019515691510366476, "loss": 1.1357, "step": 5175 }, { "epoch": 0.19942252165543792, "grad_norm": 1.167226791381836, "learning_rate": 0.000195147614046158, "loss": 1.4007, "step": 5180 }, { "epoch": 0.19961501443695862, "grad_norm": 1.3708933591842651, "learning_rate": 0.00019513830428803225, "loss": 1.3029, "step": 5185 }, { "epoch": 0.1998075072184793, "grad_norm": 1.6595165729522705, "learning_rate": 0.00019512898583013875, "loss": 1.3159, "step": 5190 }, { "epoch": 0.2, "grad_norm": 1.1252923011779785, "learning_rate": 0.00019511965867332972, "loss": 1.1894, "step": 5195 }, { "epoch": 0.20019249278152068, "grad_norm": 0.8440331816673279, "learning_rate": 0.00019511032281845797, "loss": 1.2108, "step": 5200 }, { "epoch": 0.20038498556304138, "grad_norm": 1.427147626876831, "learning_rate": 0.0001951009782663773, "loss": 1.197, "step": 5205 }, { "epoch": 0.20057747834456208, "grad_norm": 1.3509503602981567, "learning_rate": 0.00019509162501794213, "loss": 1.3348, "step": 5210 }, { "epoch": 0.20076997112608277, "grad_norm": 1.533103108406067, "learning_rate": 0.00019508226307400777, "loss": 1.1919, "step": 5215 }, { "epoch": 0.20096246390760347, "grad_norm": 1.1347332000732422, "learning_rate": 0.0001950728924354303, "loss": 1.2954, "step": 5220 }, { "epoch": 0.20115495668912417, "grad_norm": 1.65277099609375, "learning_rate": 0.00019506351310306664, "loss": 1.2686, "step": 5225 }, { "epoch": 0.20134744947064484, "grad_norm": 1.0601050853729248, "learning_rate": 0.00019505412507777442, "loss": 1.4066, "step": 5230 }, { "epoch": 0.20153994225216554, "grad_norm": 0.9429787397384644, "learning_rate": 0.00019504472836041217, "loss": 1.208, "step": 5235 }, { "epoch": 0.20173243503368624, "grad_norm": 0.9101033806800842, "learning_rate": 0.00019503532295183908, "loss": 1.3172, "step": 5240 }, { "epoch": 0.20192492781520693, "grad_norm": 1.1404805183410645, "learning_rate": 0.0001950259088529153, "loss": 1.1539, "step": 5245 }, { "epoch": 0.20211742059672763, "grad_norm": 1.1555522680282593, "learning_rate": 0.00019501648606450161, "loss": 1.3754, "step": 5250 }, { "epoch": 0.20230991337824833, "grad_norm": 1.5473912954330444, "learning_rate": 0.00019500705458745974, "loss": 1.1878, "step": 5255 }, { "epoch": 0.202502406159769, "grad_norm": 1.8766716718673706, "learning_rate": 0.00019499761442265208, "loss": 1.2445, "step": 5260 }, { "epoch": 0.2026948989412897, "grad_norm": 1.7951183319091797, "learning_rate": 0.00019498816557094188, "loss": 1.3496, "step": 5265 }, { "epoch": 0.2028873917228104, "grad_norm": 1.6615973711013794, "learning_rate": 0.00019497870803319317, "loss": 1.2919, "step": 5270 }, { "epoch": 0.2030798845043311, "grad_norm": 1.2885236740112305, "learning_rate": 0.00019496924181027078, "loss": 1.1807, "step": 5275 }, { "epoch": 0.2032723772858518, "grad_norm": 0.9546861052513123, "learning_rate": 0.00019495976690304034, "loss": 1.309, "step": 5280 }, { "epoch": 0.20346487006737246, "grad_norm": 1.6904189586639404, "learning_rate": 0.0001949502833123683, "loss": 1.2244, "step": 5285 }, { "epoch": 0.20365736284889316, "grad_norm": 1.394254446029663, "learning_rate": 0.0001949407910391218, "loss": 1.2877, "step": 5290 }, { "epoch": 0.20384985563041386, "grad_norm": 0.8937919735908508, "learning_rate": 0.0001949312900841689, "loss": 1.2389, "step": 5295 }, { "epoch": 0.20404234841193455, "grad_norm": 1.1096867322921753, "learning_rate": 0.00019492178044837837, "loss": 1.3766, "step": 5300 }, { "epoch": 0.20423484119345525, "grad_norm": 1.009758472442627, "learning_rate": 0.00019491226213261983, "loss": 1.2281, "step": 5305 }, { "epoch": 0.20442733397497595, "grad_norm": 1.4888296127319336, "learning_rate": 0.00019490273513776365, "loss": 1.0624, "step": 5310 }, { "epoch": 0.20461982675649662, "grad_norm": 1.4901612997055054, "learning_rate": 0.00019489319946468104, "loss": 1.1554, "step": 5315 }, { "epoch": 0.20481231953801732, "grad_norm": 1.2920863628387451, "learning_rate": 0.0001948836551142439, "loss": 1.2103, "step": 5320 }, { "epoch": 0.20500481231953802, "grad_norm": 1.3616580963134766, "learning_rate": 0.00019487410208732508, "loss": 1.3246, "step": 5325 }, { "epoch": 0.2051973051010587, "grad_norm": 1.0202921628952026, "learning_rate": 0.0001948645403847981, "loss": 1.3046, "step": 5330 }, { "epoch": 0.2053897978825794, "grad_norm": 1.0083186626434326, "learning_rate": 0.00019485497000753735, "loss": 1.2541, "step": 5335 }, { "epoch": 0.2055822906641001, "grad_norm": 1.137617588043213, "learning_rate": 0.0001948453909564179, "loss": 1.3143, "step": 5340 }, { "epoch": 0.20577478344562078, "grad_norm": 1.6331067085266113, "learning_rate": 0.00019483580323231578, "loss": 1.1129, "step": 5345 }, { "epoch": 0.20596727622714148, "grad_norm": 1.4032361507415771, "learning_rate": 0.00019482620683610767, "loss": 1.3412, "step": 5350 }, { "epoch": 0.20615976900866217, "grad_norm": 1.3207452297210693, "learning_rate": 0.00019481660176867108, "loss": 1.4614, "step": 5355 }, { "epoch": 0.20635226179018287, "grad_norm": 0.9236577749252319, "learning_rate": 0.0001948069880308844, "loss": 1.3131, "step": 5360 }, { "epoch": 0.20654475457170357, "grad_norm": 2.2021703720092773, "learning_rate": 0.0001947973656236267, "loss": 1.2434, "step": 5365 }, { "epoch": 0.20673724735322424, "grad_norm": 1.5074305534362793, "learning_rate": 0.00019478773454777789, "loss": 1.4204, "step": 5370 }, { "epoch": 0.20692974013474494, "grad_norm": 1.5073877573013306, "learning_rate": 0.00019477809480421865, "loss": 1.4193, "step": 5375 }, { "epoch": 0.20712223291626564, "grad_norm": 1.0522600412368774, "learning_rate": 0.00019476844639383049, "loss": 1.228, "step": 5380 }, { "epoch": 0.20731472569778633, "grad_norm": 1.1478843688964844, "learning_rate": 0.0001947587893174957, "loss": 1.2315, "step": 5385 }, { "epoch": 0.20750721847930703, "grad_norm": 0.922837495803833, "learning_rate": 0.00019474912357609733, "loss": 1.2567, "step": 5390 }, { "epoch": 0.20769971126082773, "grad_norm": 1.156615972518921, "learning_rate": 0.0001947394491705193, "loss": 1.443, "step": 5395 }, { "epoch": 0.2078922040423484, "grad_norm": 1.909555435180664, "learning_rate": 0.0001947297661016462, "loss": 1.1625, "step": 5400 }, { "epoch": 0.2080846968238691, "grad_norm": 1.8379411697387695, "learning_rate": 0.00019472007437036352, "loss": 1.3015, "step": 5405 }, { "epoch": 0.2082771896053898, "grad_norm": 1.188402771949768, "learning_rate": 0.00019471037397755754, "loss": 1.3294, "step": 5410 }, { "epoch": 0.2084696823869105, "grad_norm": 1.597538948059082, "learning_rate": 0.00019470066492411521, "loss": 1.3824, "step": 5415 }, { "epoch": 0.2086621751684312, "grad_norm": 1.0081026554107666, "learning_rate": 0.00019469094721092444, "loss": 1.2914, "step": 5420 }, { "epoch": 0.2088546679499519, "grad_norm": 1.3790476322174072, "learning_rate": 0.0001946812208388738, "loss": 1.2817, "step": 5425 }, { "epoch": 0.20904716073147256, "grad_norm": 1.777570128440857, "learning_rate": 0.00019467148580885272, "loss": 1.2253, "step": 5430 }, { "epoch": 0.20923965351299326, "grad_norm": 1.1196024417877197, "learning_rate": 0.00019466174212175142, "loss": 1.2956, "step": 5435 }, { "epoch": 0.20943214629451395, "grad_norm": 2.940906524658203, "learning_rate": 0.00019465198977846086, "loss": 1.3912, "step": 5440 }, { "epoch": 0.20962463907603465, "grad_norm": 1.9075424671173096, "learning_rate": 0.00019464222877987286, "loss": 1.2518, "step": 5445 }, { "epoch": 0.20981713185755535, "grad_norm": 1.0282469987869263, "learning_rate": 0.00019463245912687996, "loss": 1.2569, "step": 5450 }, { "epoch": 0.21000962463907605, "grad_norm": 1.1651009321212769, "learning_rate": 0.0001946226808203756, "loss": 1.4676, "step": 5455 }, { "epoch": 0.21020211742059672, "grad_norm": 1.1911680698394775, "learning_rate": 0.00019461289386125388, "loss": 1.3822, "step": 5460 }, { "epoch": 0.21039461020211742, "grad_norm": 0.7187578082084656, "learning_rate": 0.00019460309825040974, "loss": 1.1462, "step": 5465 }, { "epoch": 0.2105871029836381, "grad_norm": 2.401764154434204, "learning_rate": 0.000194593293988739, "loss": 1.3187, "step": 5470 }, { "epoch": 0.2107795957651588, "grad_norm": 1.783333659172058, "learning_rate": 0.0001945834810771381, "loss": 1.3539, "step": 5475 }, { "epoch": 0.2109720885466795, "grad_norm": 0.9923986196517944, "learning_rate": 0.00019457365951650445, "loss": 1.4837, "step": 5480 }, { "epoch": 0.21116458132820018, "grad_norm": 1.0704642534255981, "learning_rate": 0.00019456382930773612, "loss": 1.2345, "step": 5485 }, { "epoch": 0.21135707410972088, "grad_norm": 1.5242959260940552, "learning_rate": 0.000194553990451732, "loss": 1.2113, "step": 5490 }, { "epoch": 0.21154956689124157, "grad_norm": 1.3185608386993408, "learning_rate": 0.00019454414294939185, "loss": 1.4083, "step": 5495 }, { "epoch": 0.21174205967276227, "grad_norm": 1.1448662281036377, "learning_rate": 0.00019453428680161615, "loss": 1.4091, "step": 5500 }, { "epoch": 0.21193455245428297, "grad_norm": 1.172396183013916, "learning_rate": 0.0001945244220093061, "loss": 1.1414, "step": 5505 }, { "epoch": 0.21212704523580367, "grad_norm": 2.988346576690674, "learning_rate": 0.00019451454857336383, "loss": 1.3968, "step": 5510 }, { "epoch": 0.21231953801732434, "grad_norm": 0.8824801445007324, "learning_rate": 0.00019450466649469222, "loss": 1.2229, "step": 5515 }, { "epoch": 0.21251203079884504, "grad_norm": 1.7703745365142822, "learning_rate": 0.00019449477577419488, "loss": 1.3073, "step": 5520 }, { "epoch": 0.21270452358036573, "grad_norm": 1.3374749422073364, "learning_rate": 0.00019448487641277629, "loss": 1.3908, "step": 5525 }, { "epoch": 0.21289701636188643, "grad_norm": 1.2366503477096558, "learning_rate": 0.00019447496841134163, "loss": 1.2764, "step": 5530 }, { "epoch": 0.21308950914340713, "grad_norm": 1.242353081703186, "learning_rate": 0.00019446505177079696, "loss": 1.3136, "step": 5535 }, { "epoch": 0.21328200192492783, "grad_norm": 1.046583652496338, "learning_rate": 0.00019445512649204907, "loss": 1.1483, "step": 5540 }, { "epoch": 0.2134744947064485, "grad_norm": 1.6280517578125, "learning_rate": 0.00019444519257600558, "loss": 1.4076, "step": 5545 }, { "epoch": 0.2136669874879692, "grad_norm": 1.7472679615020752, "learning_rate": 0.00019443525002357486, "loss": 1.2842, "step": 5550 }, { "epoch": 0.2138594802694899, "grad_norm": 1.101185917854309, "learning_rate": 0.00019442529883566612, "loss": 1.3037, "step": 5555 }, { "epoch": 0.2140519730510106, "grad_norm": 1.8548834323883057, "learning_rate": 0.0001944153390131893, "loss": 1.4081, "step": 5560 }, { "epoch": 0.2142444658325313, "grad_norm": 1.4205219745635986, "learning_rate": 0.00019440537055705515, "loss": 1.3419, "step": 5565 }, { "epoch": 0.214436958614052, "grad_norm": 1.135933756828308, "learning_rate": 0.0001943953934681753, "loss": 0.9906, "step": 5570 }, { "epoch": 0.21462945139557266, "grad_norm": 1.7350742816925049, "learning_rate": 0.00019438540774746198, "loss": 1.1193, "step": 5575 }, { "epoch": 0.21482194417709335, "grad_norm": 1.891998291015625, "learning_rate": 0.00019437541339582836, "loss": 1.2271, "step": 5580 }, { "epoch": 0.21501443695861405, "grad_norm": 1.2564722299575806, "learning_rate": 0.0001943654104141884, "loss": 1.5134, "step": 5585 }, { "epoch": 0.21520692974013475, "grad_norm": 1.3632197380065918, "learning_rate": 0.00019435539880345673, "loss": 1.1772, "step": 5590 }, { "epoch": 0.21539942252165545, "grad_norm": 1.8670414686203003, "learning_rate": 0.00019434537856454894, "loss": 1.2685, "step": 5595 }, { "epoch": 0.21559191530317612, "grad_norm": 2.5948314666748047, "learning_rate": 0.00019433534969838122, "loss": 1.487, "step": 5600 }, { "epoch": 0.21578440808469682, "grad_norm": 1.2312328815460205, "learning_rate": 0.00019432531220587071, "loss": 1.3394, "step": 5605 }, { "epoch": 0.2159769008662175, "grad_norm": 0.9402896165847778, "learning_rate": 0.0001943152660879352, "loss": 1.1471, "step": 5610 }, { "epoch": 0.2161693936477382, "grad_norm": 0.3871050477027893, "learning_rate": 0.00019430521134549346, "loss": 0.9597, "step": 5615 }, { "epoch": 0.2163618864292589, "grad_norm": 0.9395222067832947, "learning_rate": 0.0001942951479794648, "loss": 1.3055, "step": 5620 }, { "epoch": 0.2165543792107796, "grad_norm": 0.8928638696670532, "learning_rate": 0.00019428507599076955, "loss": 1.4099, "step": 5625 }, { "epoch": 0.21674687199230028, "grad_norm": 1.8891551494598389, "learning_rate": 0.00019427499538032865, "loss": 1.5009, "step": 5630 }, { "epoch": 0.21693936477382098, "grad_norm": 0.6684243679046631, "learning_rate": 0.00019426490614906394, "loss": 1.2251, "step": 5635 }, { "epoch": 0.21713185755534167, "grad_norm": 1.5765355825424194, "learning_rate": 0.00019425480829789803, "loss": 1.1114, "step": 5640 }, { "epoch": 0.21732435033686237, "grad_norm": 0.9966096878051758, "learning_rate": 0.00019424470182775427, "loss": 1.2907, "step": 5645 }, { "epoch": 0.21751684311838307, "grad_norm": 1.263469934463501, "learning_rate": 0.00019423458673955684, "loss": 1.1443, "step": 5650 }, { "epoch": 0.21770933589990377, "grad_norm": 1.5138813257217407, "learning_rate": 0.0001942244630342307, "loss": 1.2699, "step": 5655 }, { "epoch": 0.21790182868142444, "grad_norm": 1.0215526819229126, "learning_rate": 0.00019421433071270156, "loss": 1.4265, "step": 5660 }, { "epoch": 0.21809432146294513, "grad_norm": 0.7587301731109619, "learning_rate": 0.00019420418977589605, "loss": 1.1706, "step": 5665 }, { "epoch": 0.21828681424446583, "grad_norm": 0.9531148672103882, "learning_rate": 0.0001941940402247414, "loss": 1.4041, "step": 5670 }, { "epoch": 0.21847930702598653, "grad_norm": 1.098739743232727, "learning_rate": 0.00019418388206016575, "loss": 1.3476, "step": 5675 }, { "epoch": 0.21867179980750723, "grad_norm": 1.0307271480560303, "learning_rate": 0.000194173715283098, "loss": 1.2333, "step": 5680 }, { "epoch": 0.2188642925890279, "grad_norm": 1.538256049156189, "learning_rate": 0.00019416353989446785, "loss": 1.4489, "step": 5685 }, { "epoch": 0.2190567853705486, "grad_norm": 1.5411714315414429, "learning_rate": 0.00019415335589520574, "loss": 1.2597, "step": 5690 }, { "epoch": 0.2192492781520693, "grad_norm": 1.3543205261230469, "learning_rate": 0.00019414316328624293, "loss": 1.265, "step": 5695 }, { "epoch": 0.21944177093359, "grad_norm": 0.7644770741462708, "learning_rate": 0.0001941329620685115, "loss": 1.1888, "step": 5700 }, { "epoch": 0.2196342637151107, "grad_norm": 2.1122093200683594, "learning_rate": 0.00019412275224294423, "loss": 1.1301, "step": 5705 }, { "epoch": 0.2198267564966314, "grad_norm": 1.4159448146820068, "learning_rate": 0.00019411253381047477, "loss": 1.209, "step": 5710 }, { "epoch": 0.22001924927815206, "grad_norm": 1.4212615489959717, "learning_rate": 0.00019410230677203755, "loss": 1.3268, "step": 5715 }, { "epoch": 0.22021174205967275, "grad_norm": 1.2042075395584106, "learning_rate": 0.00019409207112856778, "loss": 1.1976, "step": 5720 }, { "epoch": 0.22040423484119345, "grad_norm": 1.5765044689178467, "learning_rate": 0.00019408182688100136, "loss": 1.3631, "step": 5725 }, { "epoch": 0.22059672762271415, "grad_norm": 2.197000026702881, "learning_rate": 0.00019407157403027514, "loss": 1.2964, "step": 5730 }, { "epoch": 0.22078922040423485, "grad_norm": 1.3434042930603027, "learning_rate": 0.00019406131257732664, "loss": 1.244, "step": 5735 }, { "epoch": 0.22098171318575555, "grad_norm": 1.2889900207519531, "learning_rate": 0.0001940510425230942, "loss": 1.1333, "step": 5740 }, { "epoch": 0.22117420596727622, "grad_norm": 0.8795220851898193, "learning_rate": 0.00019404076386851692, "loss": 1.2635, "step": 5745 }, { "epoch": 0.22136669874879691, "grad_norm": 1.0312747955322266, "learning_rate": 0.00019403047661453477, "loss": 1.3195, "step": 5750 }, { "epoch": 0.2215591915303176, "grad_norm": 1.5083264112472534, "learning_rate": 0.00019402018076208845, "loss": 1.3417, "step": 5755 }, { "epoch": 0.2217516843118383, "grad_norm": 1.1538232564926147, "learning_rate": 0.00019400987631211936, "loss": 1.2956, "step": 5760 }, { "epoch": 0.221944177093359, "grad_norm": 1.975381851196289, "learning_rate": 0.0001939995632655699, "loss": 1.4641, "step": 5765 }, { "epoch": 0.2221366698748797, "grad_norm": 1.3251721858978271, "learning_rate": 0.00019398924162338305, "loss": 1.3429, "step": 5770 }, { "epoch": 0.22232916265640038, "grad_norm": 1.1281229257583618, "learning_rate": 0.0001939789113865027, "loss": 1.2155, "step": 5775 }, { "epoch": 0.22252165543792107, "grad_norm": 2.6070075035095215, "learning_rate": 0.00019396857255587344, "loss": 1.2634, "step": 5780 }, { "epoch": 0.22271414821944177, "grad_norm": 1.0815184116363525, "learning_rate": 0.00019395822513244067, "loss": 1.1176, "step": 5785 }, { "epoch": 0.22290664100096247, "grad_norm": 2.819180965423584, "learning_rate": 0.0001939478691171507, "loss": 1.2624, "step": 5790 }, { "epoch": 0.22309913378248317, "grad_norm": 1.180055022239685, "learning_rate": 0.0001939375045109504, "loss": 1.3433, "step": 5795 }, { "epoch": 0.22329162656400384, "grad_norm": 1.1582396030426025, "learning_rate": 0.0001939271313147876, "loss": 1.2815, "step": 5800 }, { "epoch": 0.22348411934552453, "grad_norm": 2.32379412651062, "learning_rate": 0.00019391674952961085, "loss": 1.4095, "step": 5805 }, { "epoch": 0.22367661212704523, "grad_norm": 1.5146657228469849, "learning_rate": 0.0001939063591563695, "loss": 1.2434, "step": 5810 }, { "epoch": 0.22386910490856593, "grad_norm": 1.6434500217437744, "learning_rate": 0.00019389596019601365, "loss": 1.1739, "step": 5815 }, { "epoch": 0.22406159769008663, "grad_norm": 1.7917993068695068, "learning_rate": 0.0001938855526494943, "loss": 1.5106, "step": 5820 }, { "epoch": 0.22425409047160733, "grad_norm": 1.10679030418396, "learning_rate": 0.00019387513651776303, "loss": 1.284, "step": 5825 }, { "epoch": 0.224446583253128, "grad_norm": 1.521506905555725, "learning_rate": 0.00019386471180177247, "loss": 1.4129, "step": 5830 }, { "epoch": 0.2246390760346487, "grad_norm": 1.4055581092834473, "learning_rate": 0.00019385427850247572, "loss": 1.2476, "step": 5835 }, { "epoch": 0.2248315688161694, "grad_norm": 0.9506363868713379, "learning_rate": 0.00019384383662082703, "loss": 1.3105, "step": 5840 }, { "epoch": 0.2250240615976901, "grad_norm": 1.354658842086792, "learning_rate": 0.00019383338615778107, "loss": 1.29, "step": 5845 }, { "epoch": 0.2252165543792108, "grad_norm": 0.8972203135490417, "learning_rate": 0.00019382292711429353, "loss": 1.3407, "step": 5850 }, { "epoch": 0.22540904716073148, "grad_norm": 0.9989115595817566, "learning_rate": 0.00019381245949132085, "loss": 1.1662, "step": 5855 }, { "epoch": 0.22560153994225216, "grad_norm": 1.1133052110671997, "learning_rate": 0.0001938019832898202, "loss": 1.2674, "step": 5860 }, { "epoch": 0.22579403272377285, "grad_norm": 1.3640556335449219, "learning_rate": 0.00019379149851074957, "loss": 1.1989, "step": 5865 }, { "epoch": 0.22598652550529355, "grad_norm": 1.2812589406967163, "learning_rate": 0.0001937810051550677, "loss": 1.4749, "step": 5870 }, { "epoch": 0.22617901828681425, "grad_norm": 1.223944902420044, "learning_rate": 0.00019377050322373412, "loss": 1.305, "step": 5875 }, { "epoch": 0.22637151106833495, "grad_norm": 1.3493690490722656, "learning_rate": 0.00019375999271770925, "loss": 1.458, "step": 5880 }, { "epoch": 0.22656400384985564, "grad_norm": 1.4042202234268188, "learning_rate": 0.0001937494736379541, "loss": 1.1714, "step": 5885 }, { "epoch": 0.22675649663137631, "grad_norm": 1.6239880323410034, "learning_rate": 0.00019373894598543066, "loss": 1.3224, "step": 5890 }, { "epoch": 0.226948989412897, "grad_norm": 1.096960425376892, "learning_rate": 0.00019372840976110154, "loss": 1.128, "step": 5895 }, { "epoch": 0.2271414821944177, "grad_norm": 1.6740233898162842, "learning_rate": 0.00019371786496593028, "loss": 1.195, "step": 5900 }, { "epoch": 0.2273339749759384, "grad_norm": 1.454030156135559, "learning_rate": 0.00019370731160088105, "loss": 1.2641, "step": 5905 }, { "epoch": 0.2275264677574591, "grad_norm": 1.4465221166610718, "learning_rate": 0.00019369674966691897, "loss": 1.331, "step": 5910 }, { "epoch": 0.22771896053897978, "grad_norm": 1.6115851402282715, "learning_rate": 0.00019368617916500978, "loss": 1.4061, "step": 5915 }, { "epoch": 0.22791145332050047, "grad_norm": 1.0165706872940063, "learning_rate": 0.00019367560009612013, "loss": 1.177, "step": 5920 }, { "epoch": 0.22810394610202117, "grad_norm": 1.5200728178024292, "learning_rate": 0.00019366501246121737, "loss": 1.1323, "step": 5925 }, { "epoch": 0.22829643888354187, "grad_norm": 1.4613386392593384, "learning_rate": 0.00019365441626126976, "loss": 1.4626, "step": 5930 }, { "epoch": 0.22848893166506257, "grad_norm": 1.2502466440200806, "learning_rate": 0.00019364381149724613, "loss": 1.2797, "step": 5935 }, { "epoch": 0.22868142444658326, "grad_norm": 1.2946960926055908, "learning_rate": 0.0001936331981701163, "loss": 1.3844, "step": 5940 }, { "epoch": 0.22887391722810393, "grad_norm": 1.2478231191635132, "learning_rate": 0.00019362257628085074, "loss": 1.2855, "step": 5945 }, { "epoch": 0.22906641000962463, "grad_norm": 1.0097830295562744, "learning_rate": 0.0001936119458304208, "loss": 1.1223, "step": 5950 }, { "epoch": 0.22925890279114533, "grad_norm": 1.3235141038894653, "learning_rate": 0.00019360130681979852, "loss": 1.284, "step": 5955 }, { "epoch": 0.22945139557266603, "grad_norm": 1.6869986057281494, "learning_rate": 0.00019359065924995678, "loss": 1.517, "step": 5960 }, { "epoch": 0.22964388835418673, "grad_norm": 0.9644334316253662, "learning_rate": 0.00019358000312186925, "loss": 1.0607, "step": 5965 }, { "epoch": 0.22983638113570742, "grad_norm": 1.063192367553711, "learning_rate": 0.0001935693384365103, "loss": 0.9187, "step": 5970 }, { "epoch": 0.2300288739172281, "grad_norm": 1.0339081287384033, "learning_rate": 0.00019355866519485523, "loss": 1.2946, "step": 5975 }, { "epoch": 0.2302213666987488, "grad_norm": 1.3194791078567505, "learning_rate": 0.00019354798339788, "loss": 1.4293, "step": 5980 }, { "epoch": 0.2304138594802695, "grad_norm": 1.8870794773101807, "learning_rate": 0.00019353729304656136, "loss": 1.4124, "step": 5985 }, { "epoch": 0.2306063522617902, "grad_norm": 1.132385015487671, "learning_rate": 0.00019352659414187694, "loss": 1.1949, "step": 5990 }, { "epoch": 0.23079884504331089, "grad_norm": 2.763613700866699, "learning_rate": 0.000193515886684805, "loss": 1.2341, "step": 5995 }, { "epoch": 0.23099133782483156, "grad_norm": 1.6793404817581177, "learning_rate": 0.00019350517067632473, "loss": 1.3597, "step": 6000 }, { "epoch": 0.23118383060635225, "grad_norm": 1.1538963317871094, "learning_rate": 0.000193494446117416, "loss": 1.1981, "step": 6005 }, { "epoch": 0.23137632338787295, "grad_norm": 1.0233584642410278, "learning_rate": 0.00019348371300905955, "loss": 1.2821, "step": 6010 }, { "epoch": 0.23156881616939365, "grad_norm": 1.3905096054077148, "learning_rate": 0.0001934729713522368, "loss": 1.3471, "step": 6015 }, { "epoch": 0.23176130895091435, "grad_norm": 1.345563292503357, "learning_rate": 0.00019346222114793, "loss": 1.0454, "step": 6020 }, { "epoch": 0.23195380173243504, "grad_norm": 0.739811897277832, "learning_rate": 0.00019345146239712225, "loss": 1.3125, "step": 6025 }, { "epoch": 0.23214629451395571, "grad_norm": 1.977918028831482, "learning_rate": 0.0001934406951007973, "loss": 1.3328, "step": 6030 }, { "epoch": 0.2323387872954764, "grad_norm": 0.9505223035812378, "learning_rate": 0.00019342991925993977, "loss": 1.1388, "step": 6035 }, { "epoch": 0.2325312800769971, "grad_norm": 1.257755160331726, "learning_rate": 0.00019341913487553502, "loss": 1.3064, "step": 6040 }, { "epoch": 0.2327237728585178, "grad_norm": 1.2003203630447388, "learning_rate": 0.00019340834194856926, "loss": 1.4369, "step": 6045 }, { "epoch": 0.2329162656400385, "grad_norm": 1.2289738655090332, "learning_rate": 0.0001933975404800294, "loss": 1.1462, "step": 6050 }, { "epoch": 0.2331087584215592, "grad_norm": 1.227171540260315, "learning_rate": 0.00019338673047090317, "loss": 1.1829, "step": 6055 }, { "epoch": 0.23330125120307987, "grad_norm": 1.2766560316085815, "learning_rate": 0.00019337591192217904, "loss": 1.2572, "step": 6060 }, { "epoch": 0.23349374398460057, "grad_norm": 2.6716904640197754, "learning_rate": 0.00019336508483484634, "loss": 1.0195, "step": 6065 }, { "epoch": 0.23368623676612127, "grad_norm": 1.1586931943893433, "learning_rate": 0.00019335424920989512, "loss": 1.4932, "step": 6070 }, { "epoch": 0.23387872954764197, "grad_norm": 1.0196670293807983, "learning_rate": 0.00019334340504831624, "loss": 1.3497, "step": 6075 }, { "epoch": 0.23407122232916266, "grad_norm": 1.6527109146118164, "learning_rate": 0.00019333255235110127, "loss": 1.1239, "step": 6080 }, { "epoch": 0.23426371511068336, "grad_norm": 0.9913870096206665, "learning_rate": 0.00019332169111924271, "loss": 1.2757, "step": 6085 }, { "epoch": 0.23445620789220403, "grad_norm": 1.1027697324752808, "learning_rate": 0.00019331082135373367, "loss": 1.2512, "step": 6090 }, { "epoch": 0.23464870067372473, "grad_norm": 1.9269218444824219, "learning_rate": 0.00019329994305556815, "loss": 1.4698, "step": 6095 }, { "epoch": 0.23484119345524543, "grad_norm": 1.1504942178726196, "learning_rate": 0.00019328905622574086, "loss": 1.4844, "step": 6100 }, { "epoch": 0.23503368623676613, "grad_norm": 1.1164321899414062, "learning_rate": 0.0001932781608652474, "loss": 1.2972, "step": 6105 }, { "epoch": 0.23522617901828682, "grad_norm": 1.283000111579895, "learning_rate": 0.00019326725697508407, "loss": 1.3117, "step": 6110 }, { "epoch": 0.2354186717998075, "grad_norm": 1.3553595542907715, "learning_rate": 0.00019325634455624787, "loss": 1.027, "step": 6115 }, { "epoch": 0.2356111645813282, "grad_norm": 2.1605517864227295, "learning_rate": 0.00019324542360973674, "loss": 1.2211, "step": 6120 }, { "epoch": 0.2358036573628489, "grad_norm": 1.1028283834457397, "learning_rate": 0.00019323449413654933, "loss": 1.3034, "step": 6125 }, { "epoch": 0.2359961501443696, "grad_norm": 1.1728841066360474, "learning_rate": 0.00019322355613768505, "loss": 1.3135, "step": 6130 }, { "epoch": 0.23618864292589029, "grad_norm": 1.7304178476333618, "learning_rate": 0.0001932126096141441, "loss": 1.3516, "step": 6135 }, { "epoch": 0.23638113570741098, "grad_norm": 1.3326451778411865, "learning_rate": 0.00019320165456692748, "loss": 1.3371, "step": 6140 }, { "epoch": 0.23657362848893165, "grad_norm": 1.6894330978393555, "learning_rate": 0.00019319069099703697, "loss": 1.2126, "step": 6145 }, { "epoch": 0.23676612127045235, "grad_norm": 1.7248213291168213, "learning_rate": 0.0001931797189054751, "loss": 1.193, "step": 6150 }, { "epoch": 0.23695861405197305, "grad_norm": 1.1517174243927002, "learning_rate": 0.0001931687382932452, "loss": 1.1472, "step": 6155 }, { "epoch": 0.23715110683349375, "grad_norm": 2.4606590270996094, "learning_rate": 0.00019315774916135134, "loss": 1.524, "step": 6160 }, { "epoch": 0.23734359961501444, "grad_norm": 1.6130386590957642, "learning_rate": 0.00019314675151079844, "loss": 1.052, "step": 6165 }, { "epoch": 0.23753609239653514, "grad_norm": 1.3845412731170654, "learning_rate": 0.00019313574534259216, "loss": 1.2557, "step": 6170 }, { "epoch": 0.2377285851780558, "grad_norm": 1.3509567975997925, "learning_rate": 0.00019312473065773893, "loss": 1.3083, "step": 6175 }, { "epoch": 0.2379210779595765, "grad_norm": 1.358113408088684, "learning_rate": 0.000193113707457246, "loss": 1.2226, "step": 6180 }, { "epoch": 0.2381135707410972, "grad_norm": 0.9598337411880493, "learning_rate": 0.00019310267574212134, "loss": 1.1861, "step": 6185 }, { "epoch": 0.2383060635226179, "grad_norm": 1.347159743309021, "learning_rate": 0.0001930916355133737, "loss": 1.2782, "step": 6190 }, { "epoch": 0.2384985563041386, "grad_norm": 1.0227164030075073, "learning_rate": 0.0001930805867720127, "loss": 1.2909, "step": 6195 }, { "epoch": 0.2386910490856593, "grad_norm": 1.8373135328292847, "learning_rate": 0.00019306952951904865, "loss": 1.3371, "step": 6200 }, { "epoch": 0.23888354186717997, "grad_norm": 2.130218267440796, "learning_rate": 0.00019305846375549263, "loss": 1.3275, "step": 6205 }, { "epoch": 0.23907603464870067, "grad_norm": 1.3699109554290771, "learning_rate": 0.00019304738948235656, "loss": 1.172, "step": 6210 }, { "epoch": 0.23926852743022137, "grad_norm": 1.8254964351654053, "learning_rate": 0.0001930363067006531, "loss": 1.166, "step": 6215 }, { "epoch": 0.23946102021174206, "grad_norm": 2.6475026607513428, "learning_rate": 0.00019302521541139571, "loss": 1.3168, "step": 6220 }, { "epoch": 0.23965351299326276, "grad_norm": 1.4869440793991089, "learning_rate": 0.0001930141156155986, "loss": 1.1112, "step": 6225 }, { "epoch": 0.23984600577478343, "grad_norm": 1.0316526889801025, "learning_rate": 0.00019300300731427678, "loss": 1.3845, "step": 6230 }, { "epoch": 0.24003849855630413, "grad_norm": 1.1549556255340576, "learning_rate": 0.00019299189050844603, "loss": 1.378, "step": 6235 }, { "epoch": 0.24023099133782483, "grad_norm": 1.9833987951278687, "learning_rate": 0.00019298076519912294, "loss": 1.2631, "step": 6240 }, { "epoch": 0.24042348411934553, "grad_norm": 1.1354988813400269, "learning_rate": 0.00019296963138732478, "loss": 1.6525, "step": 6245 }, { "epoch": 0.24061597690086622, "grad_norm": 1.6483670473098755, "learning_rate": 0.0001929584890740697, "loss": 0.9828, "step": 6250 }, { "epoch": 0.24080846968238692, "grad_norm": 1.537610650062561, "learning_rate": 0.00019294733826037659, "loss": 1.3566, "step": 6255 }, { "epoch": 0.2410009624639076, "grad_norm": 1.207406759262085, "learning_rate": 0.0001929361789472651, "loss": 1.3306, "step": 6260 }, { "epoch": 0.2411934552454283, "grad_norm": 1.4772666692733765, "learning_rate": 0.00019292501113575572, "loss": 1.3117, "step": 6265 }, { "epoch": 0.241385948026949, "grad_norm": 1.8285613059997559, "learning_rate": 0.00019291383482686962, "loss": 1.3711, "step": 6270 }, { "epoch": 0.24157844080846969, "grad_norm": 0.9223503470420837, "learning_rate": 0.00019290265002162884, "loss": 1.1712, "step": 6275 }, { "epoch": 0.24177093358999038, "grad_norm": 2.1818087100982666, "learning_rate": 0.00019289145672105612, "loss": 1.1596, "step": 6280 }, { "epoch": 0.24196342637151108, "grad_norm": 0.8749092817306519, "learning_rate": 0.00019288025492617504, "loss": 1.0726, "step": 6285 }, { "epoch": 0.24215591915303175, "grad_norm": 1.1598855257034302, "learning_rate": 0.00019286904463800995, "loss": 1.2931, "step": 6290 }, { "epoch": 0.24234841193455245, "grad_norm": 1.4357101917266846, "learning_rate": 0.0001928578258575859, "loss": 1.2612, "step": 6295 }, { "epoch": 0.24254090471607315, "grad_norm": 0.9731203317642212, "learning_rate": 0.0001928465985859288, "loss": 1.178, "step": 6300 }, { "epoch": 0.24273339749759384, "grad_norm": 1.1217381954193115, "learning_rate": 0.00019283536282406534, "loss": 1.285, "step": 6305 }, { "epoch": 0.24292589027911454, "grad_norm": 1.415860891342163, "learning_rate": 0.0001928241185730229, "loss": 1.399, "step": 6310 }, { "epoch": 0.2431183830606352, "grad_norm": 0.9067175388336182, "learning_rate": 0.00019281286583382973, "loss": 1.2336, "step": 6315 }, { "epoch": 0.2433108758421559, "grad_norm": 1.6320233345031738, "learning_rate": 0.0001928016046075148, "loss": 1.4348, "step": 6320 }, { "epoch": 0.2435033686236766, "grad_norm": 1.3945854902267456, "learning_rate": 0.0001927903348951079, "loss": 1.1614, "step": 6325 }, { "epoch": 0.2436958614051973, "grad_norm": 1.37948477268219, "learning_rate": 0.00019277905669763952, "loss": 1.2058, "step": 6330 }, { "epoch": 0.243888354186718, "grad_norm": 1.3325083255767822, "learning_rate": 0.00019276777001614104, "loss": 1.2737, "step": 6335 }, { "epoch": 0.2440808469682387, "grad_norm": 1.5902581214904785, "learning_rate": 0.00019275647485164453, "loss": 1.3706, "step": 6340 }, { "epoch": 0.24427333974975937, "grad_norm": 1.1309142112731934, "learning_rate": 0.00019274517120518284, "loss": 1.2408, "step": 6345 }, { "epoch": 0.24446583253128007, "grad_norm": 1.9998489618301392, "learning_rate": 0.0001927338590777896, "loss": 1.3079, "step": 6350 }, { "epoch": 0.24465832531280077, "grad_norm": 1.569667100906372, "learning_rate": 0.00019272253847049927, "loss": 1.2365, "step": 6355 }, { "epoch": 0.24485081809432147, "grad_norm": 1.2294694185256958, "learning_rate": 0.00019271120938434702, "loss": 1.3544, "step": 6360 }, { "epoch": 0.24504331087584216, "grad_norm": 1.9876806735992432, "learning_rate": 0.00019269987182036883, "loss": 1.3675, "step": 6365 }, { "epoch": 0.24523580365736286, "grad_norm": 1.3317819833755493, "learning_rate": 0.0001926885257796015, "loss": 1.0949, "step": 6370 }, { "epoch": 0.24542829643888353, "grad_norm": 1.7602546215057373, "learning_rate": 0.00019267717126308242, "loss": 1.3168, "step": 6375 }, { "epoch": 0.24562078922040423, "grad_norm": 1.5651274919509888, "learning_rate": 0.00019266580827184996, "loss": 1.2802, "step": 6380 }, { "epoch": 0.24581328200192493, "grad_norm": 0.9537544846534729, "learning_rate": 0.0001926544368069432, "loss": 1.1876, "step": 6385 }, { "epoch": 0.24600577478344562, "grad_norm": 0.9649773240089417, "learning_rate": 0.000192643056869402, "loss": 1.1378, "step": 6390 }, { "epoch": 0.24619826756496632, "grad_norm": 1.6363686323165894, "learning_rate": 0.00019263166846026692, "loss": 1.3284, "step": 6395 }, { "epoch": 0.24639076034648702, "grad_norm": 1.748897910118103, "learning_rate": 0.00019262027158057943, "loss": 1.4314, "step": 6400 }, { "epoch": 0.2465832531280077, "grad_norm": 2.138967990875244, "learning_rate": 0.00019260886623138164, "loss": 1.2244, "step": 6405 }, { "epoch": 0.2467757459095284, "grad_norm": 2.517312526702881, "learning_rate": 0.0001925974524137165, "loss": 1.3394, "step": 6410 }, { "epoch": 0.24696823869104909, "grad_norm": 1.7510714530944824, "learning_rate": 0.00019258603012862772, "loss": 1.3369, "step": 6415 }, { "epoch": 0.24716073147256978, "grad_norm": 1.1651504039764404, "learning_rate": 0.00019257459937715985, "loss": 1.2953, "step": 6420 }, { "epoch": 0.24735322425409048, "grad_norm": 1.325554609298706, "learning_rate": 0.0001925631601603581, "loss": 1.3062, "step": 6425 }, { "epoch": 0.24754571703561115, "grad_norm": 1.0340043306350708, "learning_rate": 0.00019255171247926852, "loss": 1.337, "step": 6430 }, { "epoch": 0.24773820981713185, "grad_norm": 1.677131175994873, "learning_rate": 0.00019254025633493792, "loss": 1.3179, "step": 6435 }, { "epoch": 0.24793070259865255, "grad_norm": 2.475339651107788, "learning_rate": 0.00019252879172841395, "loss": 1.4765, "step": 6440 }, { "epoch": 0.24812319538017324, "grad_norm": 1.1302917003631592, "learning_rate": 0.00019251731866074486, "loss": 1.3029, "step": 6445 }, { "epoch": 0.24831568816169394, "grad_norm": 1.3425379991531372, "learning_rate": 0.0001925058371329799, "loss": 1.1263, "step": 6450 }, { "epoch": 0.24850818094321464, "grad_norm": 1.0058633089065552, "learning_rate": 0.0001924943471461689, "loss": 1.1059, "step": 6455 }, { "epoch": 0.2487006737247353, "grad_norm": 1.9793190956115723, "learning_rate": 0.0001924828487013626, "loss": 1.5268, "step": 6460 }, { "epoch": 0.248893166506256, "grad_norm": 1.0673744678497314, "learning_rate": 0.00019247134179961242, "loss": 1.2199, "step": 6465 }, { "epoch": 0.2490856592877767, "grad_norm": 1.1182838678359985, "learning_rate": 0.00019245982644197057, "loss": 1.5456, "step": 6470 }, { "epoch": 0.2492781520692974, "grad_norm": 0.9264312982559204, "learning_rate": 0.00019244830262949014, "loss": 1.2367, "step": 6475 }, { "epoch": 0.2494706448508181, "grad_norm": 1.2094528675079346, "learning_rate": 0.00019243677036322478, "loss": 1.2026, "step": 6480 }, { "epoch": 0.2496631376323388, "grad_norm": 1.275902509689331, "learning_rate": 0.00019242522964422917, "loss": 1.206, "step": 6485 }, { "epoch": 0.24985563041385947, "grad_norm": 1.515559434890747, "learning_rate": 0.00019241368047355853, "loss": 1.2222, "step": 6490 }, { "epoch": 0.25004812319538017, "grad_norm": 0.9974495768547058, "learning_rate": 0.000192402122852269, "loss": 1.5274, "step": 6495 }, { "epoch": 0.2502406159769009, "grad_norm": 1.8940407037734985, "learning_rate": 0.00019239055678141746, "loss": 1.3639, "step": 6500 }, { "epoch": 0.25043310875842156, "grad_norm": 1.7484371662139893, "learning_rate": 0.00019237898226206153, "loss": 1.3517, "step": 6505 }, { "epoch": 0.25062560153994223, "grad_norm": 1.004660725593567, "learning_rate": 0.00019236739929525963, "loss": 1.0603, "step": 6510 }, { "epoch": 0.25081809432146296, "grad_norm": 0.9729489684104919, "learning_rate": 0.00019235580788207093, "loss": 1.3252, "step": 6515 }, { "epoch": 0.25101058710298363, "grad_norm": 0.4645654857158661, "learning_rate": 0.00019234420802355539, "loss": 1.1804, "step": 6520 }, { "epoch": 0.25120307988450435, "grad_norm": 1.0810743570327759, "learning_rate": 0.00019233259972077378, "loss": 1.3045, "step": 6525 }, { "epoch": 0.251395572666025, "grad_norm": 1.1666224002838135, "learning_rate": 0.00019232098297478756, "loss": 1.324, "step": 6530 }, { "epoch": 0.2515880654475457, "grad_norm": 1.06947660446167, "learning_rate": 0.000192309357786659, "loss": 1.3131, "step": 6535 }, { "epoch": 0.2517805582290664, "grad_norm": 1.1774028539657593, "learning_rate": 0.0001922977241574512, "loss": 1.301, "step": 6540 }, { "epoch": 0.2519730510105871, "grad_norm": 1.528041958808899, "learning_rate": 0.0001922860820882279, "loss": 1.2542, "step": 6545 }, { "epoch": 0.2521655437921078, "grad_norm": 1.1932915449142456, "learning_rate": 0.00019227443158005377, "loss": 1.125, "step": 6550 }, { "epoch": 0.2523580365736285, "grad_norm": 1.3258370161056519, "learning_rate": 0.0001922627726339941, "loss": 1.3776, "step": 6555 }, { "epoch": 0.25255052935514916, "grad_norm": 0.994076132774353, "learning_rate": 0.0001922511052511151, "loss": 1.0908, "step": 6560 }, { "epoch": 0.2527430221366699, "grad_norm": 1.0820032358169556, "learning_rate": 0.00019223942943248358, "loss": 1.215, "step": 6565 }, { "epoch": 0.25293551491819055, "grad_norm": 0.9792138338088989, "learning_rate": 0.00019222774517916734, "loss": 1.2413, "step": 6570 }, { "epoch": 0.2531280076997113, "grad_norm": 1.1704801321029663, "learning_rate": 0.0001922160524922347, "loss": 1.5203, "step": 6575 }, { "epoch": 0.25332050048123195, "grad_norm": 1.6249198913574219, "learning_rate": 0.00019220435137275494, "loss": 1.2771, "step": 6580 }, { "epoch": 0.2535129932627527, "grad_norm": 1.3218034505844116, "learning_rate": 0.00019219264182179804, "loss": 1.4433, "step": 6585 }, { "epoch": 0.25370548604427334, "grad_norm": 1.7230724096298218, "learning_rate": 0.0001921809238404348, "loss": 1.1069, "step": 6590 }, { "epoch": 0.253897978825794, "grad_norm": 1.3148738145828247, "learning_rate": 0.00019216919742973669, "loss": 1.2386, "step": 6595 }, { "epoch": 0.25409047160731474, "grad_norm": 1.257513403892517, "learning_rate": 0.00019215746259077605, "loss": 1.3476, "step": 6600 }, { "epoch": 0.2542829643888354, "grad_norm": 0.965403139591217, "learning_rate": 0.00019214571932462592, "loss": 1.1045, "step": 6605 }, { "epoch": 0.25447545717035613, "grad_norm": 0.8903887867927551, "learning_rate": 0.0001921339676323602, "loss": 1.1481, "step": 6610 }, { "epoch": 0.2546679499518768, "grad_norm": 1.284529209136963, "learning_rate": 0.00019212220751505345, "loss": 1.3179, "step": 6615 }, { "epoch": 0.2548604427333975, "grad_norm": 2.3491082191467285, "learning_rate": 0.0001921104389737811, "loss": 1.3042, "step": 6620 }, { "epoch": 0.2550529355149182, "grad_norm": 1.4170057773590088, "learning_rate": 0.00019209866200961927, "loss": 1.3775, "step": 6625 }, { "epoch": 0.25524542829643887, "grad_norm": 1.4182847738265991, "learning_rate": 0.00019208687662364488, "loss": 1.3895, "step": 6630 }, { "epoch": 0.2554379210779596, "grad_norm": 1.2162110805511475, "learning_rate": 0.00019207508281693568, "loss": 1.0754, "step": 6635 }, { "epoch": 0.25563041385948027, "grad_norm": 1.473873257637024, "learning_rate": 0.00019206328059057006, "loss": 1.3323, "step": 6640 }, { "epoch": 0.25582290664100094, "grad_norm": 1.2990386486053467, "learning_rate": 0.0001920514699456273, "loss": 1.2304, "step": 6645 }, { "epoch": 0.25601539942252166, "grad_norm": 1.2828303575515747, "learning_rate": 0.00019203965088318743, "loss": 1.2566, "step": 6650 }, { "epoch": 0.25620789220404233, "grad_norm": 0.9165570735931396, "learning_rate": 0.00019202782340433115, "loss": 1.2186, "step": 6655 }, { "epoch": 0.25640038498556306, "grad_norm": 2.0381886959075928, "learning_rate": 0.00019201598751014006, "loss": 1.114, "step": 6660 }, { "epoch": 0.2565928777670837, "grad_norm": 1.252790093421936, "learning_rate": 0.00019200414320169647, "loss": 1.2354, "step": 6665 }, { "epoch": 0.25678537054860445, "grad_norm": 1.1557594537734985, "learning_rate": 0.00019199229048008347, "loss": 1.3652, "step": 6670 }, { "epoch": 0.2569778633301251, "grad_norm": 1.356181025505066, "learning_rate": 0.0001919804293463849, "loss": 1.1026, "step": 6675 }, { "epoch": 0.2571703561116458, "grad_norm": 1.2493314743041992, "learning_rate": 0.00019196855980168536, "loss": 1.2225, "step": 6680 }, { "epoch": 0.2573628488931665, "grad_norm": 1.7480677366256714, "learning_rate": 0.00019195668184707025, "loss": 1.2898, "step": 6685 }, { "epoch": 0.2575553416746872, "grad_norm": 1.0522620677947998, "learning_rate": 0.00019194479548362577, "loss": 1.1404, "step": 6690 }, { "epoch": 0.2577478344562079, "grad_norm": 1.4085676670074463, "learning_rate": 0.00019193290071243882, "loss": 1.5024, "step": 6695 }, { "epoch": 0.2579403272377286, "grad_norm": 1.393096923828125, "learning_rate": 0.0001919209975345971, "loss": 1.2555, "step": 6700 }, { "epoch": 0.25813282001924925, "grad_norm": 1.5740808248519897, "learning_rate": 0.00019190908595118907, "loss": 1.2362, "step": 6705 }, { "epoch": 0.25832531280077, "grad_norm": 1.3243273496627808, "learning_rate": 0.00019189716596330395, "loss": 1.2517, "step": 6710 }, { "epoch": 0.25851780558229065, "grad_norm": 2.5867626667022705, "learning_rate": 0.00019188523757203177, "loss": 1.3509, "step": 6715 }, { "epoch": 0.2587102983638114, "grad_norm": 1.450181484222412, "learning_rate": 0.00019187330077846334, "loss": 1.3451, "step": 6720 }, { "epoch": 0.25890279114533205, "grad_norm": 1.4387754201889038, "learning_rate": 0.0001918613555836901, "loss": 1.2518, "step": 6725 }, { "epoch": 0.25909528392685277, "grad_norm": 1.427882432937622, "learning_rate": 0.00019184940198880448, "loss": 1.235, "step": 6730 }, { "epoch": 0.25928777670837344, "grad_norm": 1.060436487197876, "learning_rate": 0.00019183743999489947, "loss": 1.4583, "step": 6735 }, { "epoch": 0.2594802694898941, "grad_norm": 1.0780494213104248, "learning_rate": 0.00019182546960306893, "loss": 1.1134, "step": 6740 }, { "epoch": 0.25967276227141484, "grad_norm": 1.3795710802078247, "learning_rate": 0.0001918134908144075, "loss": 1.2979, "step": 6745 }, { "epoch": 0.2598652550529355, "grad_norm": 2.0972957611083984, "learning_rate": 0.00019180150363001051, "loss": 1.6512, "step": 6750 }, { "epoch": 0.26005774783445623, "grad_norm": 1.129204273223877, "learning_rate": 0.00019178950805097416, "loss": 1.2263, "step": 6755 }, { "epoch": 0.2602502406159769, "grad_norm": 0.8816843628883362, "learning_rate": 0.00019177750407839536, "loss": 1.2265, "step": 6760 }, { "epoch": 0.26044273339749757, "grad_norm": 1.5167860984802246, "learning_rate": 0.00019176549171337178, "loss": 1.226, "step": 6765 }, { "epoch": 0.2606352261790183, "grad_norm": 1.329172968864441, "learning_rate": 0.00019175347095700188, "loss": 1.3375, "step": 6770 }, { "epoch": 0.26082771896053897, "grad_norm": 1.8215051889419556, "learning_rate": 0.00019174144181038485, "loss": 1.2453, "step": 6775 }, { "epoch": 0.2610202117420597, "grad_norm": 1.147878646850586, "learning_rate": 0.00019172940427462072, "loss": 1.3137, "step": 6780 }, { "epoch": 0.26121270452358036, "grad_norm": 1.5783206224441528, "learning_rate": 0.0001917173583508102, "loss": 1.1803, "step": 6785 }, { "epoch": 0.26140519730510103, "grad_norm": 1.7433182001113892, "learning_rate": 0.00019170530404005485, "loss": 1.171, "step": 6790 }, { "epoch": 0.26159769008662176, "grad_norm": 1.5278960466384888, "learning_rate": 0.0001916932413434569, "loss": 1.2274, "step": 6795 }, { "epoch": 0.26179018286814243, "grad_norm": 1.375710368156433, "learning_rate": 0.00019168117026211948, "loss": 1.241, "step": 6800 }, { "epoch": 0.26198267564966315, "grad_norm": 2.146165370941162, "learning_rate": 0.00019166909079714636, "loss": 1.2778, "step": 6805 }, { "epoch": 0.2621751684311838, "grad_norm": 1.7670506238937378, "learning_rate": 0.00019165700294964216, "loss": 1.3293, "step": 6810 }, { "epoch": 0.26236766121270455, "grad_norm": 1.5492186546325684, "learning_rate": 0.00019164490672071217, "loss": 1.2808, "step": 6815 }, { "epoch": 0.2625601539942252, "grad_norm": 1.4138727188110352, "learning_rate": 0.00019163280211146257, "loss": 1.2352, "step": 6820 }, { "epoch": 0.2627526467757459, "grad_norm": 1.185674786567688, "learning_rate": 0.00019162068912300024, "loss": 1.1883, "step": 6825 }, { "epoch": 0.2629451395572666, "grad_norm": 1.717349886894226, "learning_rate": 0.0001916085677564328, "loss": 1.1329, "step": 6830 }, { "epoch": 0.2631376323387873, "grad_norm": 1.1391080617904663, "learning_rate": 0.00019159643801286872, "loss": 1.4104, "step": 6835 }, { "epoch": 0.263330125120308, "grad_norm": 1.0915690660476685, "learning_rate": 0.00019158429989341716, "loss": 1.2813, "step": 6840 }, { "epoch": 0.2635226179018287, "grad_norm": 1.120492696762085, "learning_rate": 0.000191572153399188, "loss": 1.2669, "step": 6845 }, { "epoch": 0.26371511068334935, "grad_norm": 1.0648150444030762, "learning_rate": 0.0001915599985312921, "loss": 1.2581, "step": 6850 }, { "epoch": 0.2639076034648701, "grad_norm": 1.7173513174057007, "learning_rate": 0.0001915478352908408, "loss": 1.2081, "step": 6855 }, { "epoch": 0.26410009624639075, "grad_norm": 1.3801002502441406, "learning_rate": 0.00019153566367894644, "loss": 1.4625, "step": 6860 }, { "epoch": 0.2642925890279115, "grad_norm": 2.5863940715789795, "learning_rate": 0.00019152348369672203, "loss": 1.4777, "step": 6865 }, { "epoch": 0.26448508180943214, "grad_norm": 1.5995707511901855, "learning_rate": 0.0001915112953452813, "loss": 1.2089, "step": 6870 }, { "epoch": 0.2646775745909528, "grad_norm": 1.2661023139953613, "learning_rate": 0.0001914990986257388, "loss": 1.1937, "step": 6875 }, { "epoch": 0.26487006737247354, "grad_norm": 1.4782702922821045, "learning_rate": 0.00019148689353920987, "loss": 1.2462, "step": 6880 }, { "epoch": 0.2650625601539942, "grad_norm": 1.8557063341140747, "learning_rate": 0.0001914746800868106, "loss": 1.425, "step": 6885 }, { "epoch": 0.26525505293551493, "grad_norm": 2.825359582901001, "learning_rate": 0.00019146245826965775, "loss": 1.3628, "step": 6890 }, { "epoch": 0.2654475457170356, "grad_norm": 1.7262654304504395, "learning_rate": 0.00019145022808886902, "loss": 1.2902, "step": 6895 }, { "epoch": 0.26564003849855633, "grad_norm": 0.9676236510276794, "learning_rate": 0.00019143798954556268, "loss": 1.3342, "step": 6900 }, { "epoch": 0.265832531280077, "grad_norm": 1.4607850313186646, "learning_rate": 0.00019142574264085797, "loss": 1.3084, "step": 6905 }, { "epoch": 0.26602502406159767, "grad_norm": 2.181511878967285, "learning_rate": 0.0001914134873758747, "loss": 1.1746, "step": 6910 }, { "epoch": 0.2662175168431184, "grad_norm": 1.4534579515457153, "learning_rate": 0.00019140122375173362, "loss": 1.3071, "step": 6915 }, { "epoch": 0.26641000962463907, "grad_norm": 1.607039213180542, "learning_rate": 0.00019138895176955604, "loss": 1.2883, "step": 6920 }, { "epoch": 0.2666025024061598, "grad_norm": 0.9929762482643127, "learning_rate": 0.00019137667143046425, "loss": 1.1122, "step": 6925 }, { "epoch": 0.26679499518768046, "grad_norm": 1.6732393503189087, "learning_rate": 0.0001913643827355812, "loss": 1.149, "step": 6930 }, { "epoch": 0.26698748796920113, "grad_norm": 1.3785120248794556, "learning_rate": 0.0001913520856860305, "loss": 1.3759, "step": 6935 }, { "epoch": 0.26717998075072186, "grad_norm": 1.8252770900726318, "learning_rate": 0.0001913397802829368, "loss": 1.2633, "step": 6940 }, { "epoch": 0.2673724735322425, "grad_norm": 1.6789536476135254, "learning_rate": 0.0001913274665274252, "loss": 1.2741, "step": 6945 }, { "epoch": 0.26756496631376325, "grad_norm": 2.0153861045837402, "learning_rate": 0.00019131514442062184, "loss": 1.196, "step": 6950 }, { "epoch": 0.2677574590952839, "grad_norm": 1.0000704526901245, "learning_rate": 0.0001913028139636534, "loss": 1.1872, "step": 6955 }, { "epoch": 0.2679499518768046, "grad_norm": 1.2803142070770264, "learning_rate": 0.00019129047515764743, "loss": 1.2655, "step": 6960 }, { "epoch": 0.2681424446583253, "grad_norm": 0.9827659130096436, "learning_rate": 0.00019127812800373225, "loss": 1.3503, "step": 6965 }, { "epoch": 0.268334937439846, "grad_norm": 1.3766348361968994, "learning_rate": 0.00019126577250303697, "loss": 1.2851, "step": 6970 }, { "epoch": 0.2685274302213667, "grad_norm": 2.285708427429199, "learning_rate": 0.00019125340865669134, "loss": 1.3247, "step": 6975 }, { "epoch": 0.2687199230028874, "grad_norm": 1.79937744140625, "learning_rate": 0.000191241036465826, "loss": 1.0306, "step": 6980 }, { "epoch": 0.2689124157844081, "grad_norm": 1.6062885522842407, "learning_rate": 0.0001912286559315723, "loss": 1.2068, "step": 6985 }, { "epoch": 0.2691049085659288, "grad_norm": 1.9590744972229004, "learning_rate": 0.00019121626705506233, "loss": 1.2195, "step": 6990 }, { "epoch": 0.26929740134744945, "grad_norm": 1.366186261177063, "learning_rate": 0.000191203869837429, "loss": 1.1627, "step": 6995 }, { "epoch": 0.2694898941289702, "grad_norm": 0.9655261635780334, "learning_rate": 0.00019119146427980593, "loss": 1.053, "step": 7000 }, { "epoch": 0.26968238691049085, "grad_norm": 1.4636151790618896, "learning_rate": 0.00019117905038332756, "loss": 1.0954, "step": 7005 }, { "epoch": 0.26987487969201157, "grad_norm": 1.4435783624649048, "learning_rate": 0.00019116662814912903, "loss": 1.2102, "step": 7010 }, { "epoch": 0.27006737247353224, "grad_norm": 0.9880768060684204, "learning_rate": 0.00019115419757834628, "loss": 1.0698, "step": 7015 }, { "epoch": 0.2702598652550529, "grad_norm": 1.516515851020813, "learning_rate": 0.000191141758672116, "loss": 1.3894, "step": 7020 }, { "epoch": 0.27045235803657364, "grad_norm": 2.1763806343078613, "learning_rate": 0.00019112931143157563, "loss": 1.3794, "step": 7025 }, { "epoch": 0.2706448508180943, "grad_norm": 1.2275705337524414, "learning_rate": 0.00019111685585786344, "loss": 1.2897, "step": 7030 }, { "epoch": 0.27083734359961503, "grad_norm": 0.966526985168457, "learning_rate": 0.00019110439195211835, "loss": 1.2112, "step": 7035 }, { "epoch": 0.2710298363811357, "grad_norm": 1.251911997795105, "learning_rate": 0.00019109191971548016, "loss": 1.2481, "step": 7040 }, { "epoch": 0.27122232916265643, "grad_norm": 2.3555140495300293, "learning_rate": 0.0001910794391490893, "loss": 1.3372, "step": 7045 }, { "epoch": 0.2714148219441771, "grad_norm": 1.229268193244934, "learning_rate": 0.0001910669502540871, "loss": 1.4362, "step": 7050 }, { "epoch": 0.27160731472569777, "grad_norm": 1.2356593608856201, "learning_rate": 0.00019105445303161555, "loss": 1.379, "step": 7055 }, { "epoch": 0.2717998075072185, "grad_norm": 1.910232424736023, "learning_rate": 0.00019104194748281747, "loss": 1.2902, "step": 7060 }, { "epoch": 0.27199230028873916, "grad_norm": 1.9058904647827148, "learning_rate": 0.0001910294336088364, "loss": 1.3313, "step": 7065 }, { "epoch": 0.2721847930702599, "grad_norm": 0.8631892800331116, "learning_rate": 0.0001910169114108166, "loss": 1.2843, "step": 7070 }, { "epoch": 0.27237728585178056, "grad_norm": 1.2212119102478027, "learning_rate": 0.0001910043808899032, "loss": 1.2588, "step": 7075 }, { "epoch": 0.27256977863330123, "grad_norm": 2.3140738010406494, "learning_rate": 0.00019099184204724202, "loss": 1.1781, "step": 7080 }, { "epoch": 0.27276227141482196, "grad_norm": 1.0162906646728516, "learning_rate": 0.00019097929488397965, "loss": 1.3433, "step": 7085 }, { "epoch": 0.2729547641963426, "grad_norm": 1.719766616821289, "learning_rate": 0.00019096673940126343, "loss": 1.1469, "step": 7090 }, { "epoch": 0.27314725697786335, "grad_norm": 1.5173147916793823, "learning_rate": 0.00019095417560024153, "loss": 1.1663, "step": 7095 }, { "epoch": 0.273339749759384, "grad_norm": 2.1228654384613037, "learning_rate": 0.00019094160348206277, "loss": 1.3433, "step": 7100 }, { "epoch": 0.2735322425409047, "grad_norm": 1.3896198272705078, "learning_rate": 0.00019092902304787679, "loss": 1.1782, "step": 7105 }, { "epoch": 0.2737247353224254, "grad_norm": 1.6935322284698486, "learning_rate": 0.00019091643429883402, "loss": 1.1867, "step": 7110 }, { "epoch": 0.2739172281039461, "grad_norm": 1.5454139709472656, "learning_rate": 0.00019090383723608558, "loss": 1.3938, "step": 7115 }, { "epoch": 0.2741097208854668, "grad_norm": 1.1493245363235474, "learning_rate": 0.00019089123186078342, "loss": 1.2127, "step": 7120 }, { "epoch": 0.2743022136669875, "grad_norm": 1.7321335077285767, "learning_rate": 0.00019087861817408021, "loss": 1.3068, "step": 7125 }, { "epoch": 0.2744947064485082, "grad_norm": 1.7654987573623657, "learning_rate": 0.00019086599617712936, "loss": 1.3236, "step": 7130 }, { "epoch": 0.2746871992300289, "grad_norm": 1.0047959089279175, "learning_rate": 0.0001908533658710851, "loss": 1.404, "step": 7135 }, { "epoch": 0.27487969201154955, "grad_norm": 1.9708582162857056, "learning_rate": 0.0001908407272571024, "loss": 1.2387, "step": 7140 }, { "epoch": 0.2750721847930703, "grad_norm": 2.097369432449341, "learning_rate": 0.00019082808033633696, "loss": 1.189, "step": 7145 }, { "epoch": 0.27526467757459094, "grad_norm": 1.1789932250976562, "learning_rate": 0.00019081542510994523, "loss": 1.4815, "step": 7150 }, { "epoch": 0.27545717035611167, "grad_norm": 1.7205069065093994, "learning_rate": 0.00019080276157908447, "loss": 1.2906, "step": 7155 }, { "epoch": 0.27564966313763234, "grad_norm": 1.7320606708526611, "learning_rate": 0.0001907900897449127, "loss": 1.339, "step": 7160 }, { "epoch": 0.275842155919153, "grad_norm": 2.100649356842041, "learning_rate": 0.00019077740960858863, "loss": 1.3145, "step": 7165 }, { "epoch": 0.27603464870067373, "grad_norm": 1.9302312135696411, "learning_rate": 0.00019076472117127182, "loss": 1.3082, "step": 7170 }, { "epoch": 0.2762271414821944, "grad_norm": 0.5863549113273621, "learning_rate": 0.0001907520244341225, "loss": 1.0183, "step": 7175 }, { "epoch": 0.27641963426371513, "grad_norm": 1.0428977012634277, "learning_rate": 0.00019073931939830174, "loss": 1.2488, "step": 7180 }, { "epoch": 0.2766121270452358, "grad_norm": 1.1643081903457642, "learning_rate": 0.0001907266060649713, "loss": 1.476, "step": 7185 }, { "epoch": 0.27680461982675647, "grad_norm": 1.0771207809448242, "learning_rate": 0.00019071388443529376, "loss": 1.3134, "step": 7190 }, { "epoch": 0.2769971126082772, "grad_norm": 1.9787309169769287, "learning_rate": 0.00019070115451043238, "loss": 1.3884, "step": 7195 }, { "epoch": 0.27718960538979787, "grad_norm": 2.095546245574951, "learning_rate": 0.0001906884162915513, "loss": 1.1221, "step": 7200 }, { "epoch": 0.2773820981713186, "grad_norm": 2.0389225482940674, "learning_rate": 0.00019067566977981528, "loss": 1.0463, "step": 7205 }, { "epoch": 0.27757459095283926, "grad_norm": 0.9991855621337891, "learning_rate": 0.00019066291497638993, "loss": 1.341, "step": 7210 }, { "epoch": 0.27776708373436, "grad_norm": 1.411401391029358, "learning_rate": 0.0001906501518824416, "loss": 1.434, "step": 7215 }, { "epoch": 0.27795957651588066, "grad_norm": 1.61775803565979, "learning_rate": 0.0001906373804991374, "loss": 1.1553, "step": 7220 }, { "epoch": 0.2781520692974013, "grad_norm": 2.546022653579712, "learning_rate": 0.00019062460082764515, "loss": 1.2496, "step": 7225 }, { "epoch": 0.27834456207892205, "grad_norm": 1.2731270790100098, "learning_rate": 0.00019061181286913348, "loss": 1.3236, "step": 7230 }, { "epoch": 0.2785370548604427, "grad_norm": 1.0163904428482056, "learning_rate": 0.00019059901662477177, "loss": 1.2854, "step": 7235 }, { "epoch": 0.27872954764196345, "grad_norm": 1.0653849840164185, "learning_rate": 0.0001905862120957302, "loss": 1.6351, "step": 7240 }, { "epoch": 0.2789220404234841, "grad_norm": 1.081264853477478, "learning_rate": 0.00019057339928317958, "loss": 1.2466, "step": 7245 }, { "epoch": 0.2791145332050048, "grad_norm": 1.3285462856292725, "learning_rate": 0.00019056057818829156, "loss": 1.2087, "step": 7250 }, { "epoch": 0.2793070259865255, "grad_norm": 1.067254900932312, "learning_rate": 0.0001905477488122386, "loss": 1.3877, "step": 7255 }, { "epoch": 0.2794995187680462, "grad_norm": 0.9383085370063782, "learning_rate": 0.0001905349111561938, "loss": 1.0643, "step": 7260 }, { "epoch": 0.2796920115495669, "grad_norm": 2.7797493934631348, "learning_rate": 0.00019052206522133117, "loss": 1.3828, "step": 7265 }, { "epoch": 0.2798845043310876, "grad_norm": 1.410261631011963, "learning_rate": 0.0001905092110088253, "loss": 1.3019, "step": 7270 }, { "epoch": 0.28007699711260825, "grad_norm": 2.313541889190674, "learning_rate": 0.0001904963485198517, "loss": 1.2058, "step": 7275 }, { "epoch": 0.280269489894129, "grad_norm": 1.4474842548370361, "learning_rate": 0.00019048347775558645, "loss": 1.2187, "step": 7280 }, { "epoch": 0.28046198267564965, "grad_norm": 1.5846171379089355, "learning_rate": 0.00019047059871720657, "loss": 1.0326, "step": 7285 }, { "epoch": 0.28065447545717037, "grad_norm": 1.1118413209915161, "learning_rate": 0.00019045771140588976, "loss": 1.2881, "step": 7290 }, { "epoch": 0.28084696823869104, "grad_norm": 2.5894134044647217, "learning_rate": 0.00019044481582281448, "loss": 1.3885, "step": 7295 }, { "epoch": 0.28103946102021177, "grad_norm": 1.6019679307937622, "learning_rate": 0.00019043191196915993, "loss": 1.3247, "step": 7300 }, { "epoch": 0.28123195380173244, "grad_norm": 1.3384417295455933, "learning_rate": 0.00019041899984610606, "loss": 1.346, "step": 7305 }, { "epoch": 0.2814244465832531, "grad_norm": 1.3584142923355103, "learning_rate": 0.00019040607945483367, "loss": 1.3418, "step": 7310 }, { "epoch": 0.28161693936477383, "grad_norm": 1.379162073135376, "learning_rate": 0.00019039315079652416, "loss": 1.293, "step": 7315 }, { "epoch": 0.2818094321462945, "grad_norm": 1.499841570854187, "learning_rate": 0.00019038021387235982, "loss": 1.2131, "step": 7320 }, { "epoch": 0.28200192492781523, "grad_norm": 1.9813991785049438, "learning_rate": 0.00019036726868352366, "loss": 1.3282, "step": 7325 }, { "epoch": 0.2821944177093359, "grad_norm": 1.404096245765686, "learning_rate": 0.00019035431523119938, "loss": 1.2238, "step": 7330 }, { "epoch": 0.28238691049085657, "grad_norm": 1.1089609861373901, "learning_rate": 0.00019034135351657152, "loss": 1.1705, "step": 7335 }, { "epoch": 0.2825794032723773, "grad_norm": 1.0567266941070557, "learning_rate": 0.00019032838354082535, "loss": 1.1228, "step": 7340 }, { "epoch": 0.28277189605389796, "grad_norm": 1.2407151460647583, "learning_rate": 0.00019031540530514685, "loss": 1.1154, "step": 7345 }, { "epoch": 0.2829643888354187, "grad_norm": 1.3094842433929443, "learning_rate": 0.00019030241881072283, "loss": 1.2251, "step": 7350 }, { "epoch": 0.28315688161693936, "grad_norm": 0.9434831142425537, "learning_rate": 0.00019028942405874082, "loss": 1.0644, "step": 7355 }, { "epoch": 0.2833493743984601, "grad_norm": 1.107958197593689, "learning_rate": 0.0001902764210503891, "loss": 1.295, "step": 7360 }, { "epoch": 0.28354186717998076, "grad_norm": 1.4402803182601929, "learning_rate": 0.00019026340978685666, "loss": 1.3339, "step": 7365 }, { "epoch": 0.2837343599615014, "grad_norm": 1.1564158201217651, "learning_rate": 0.0001902503902693334, "loss": 1.252, "step": 7370 }, { "epoch": 0.28392685274302215, "grad_norm": 1.8258494138717651, "learning_rate": 0.00019023736249900973, "loss": 1.3495, "step": 7375 }, { "epoch": 0.2841193455245428, "grad_norm": 1.1436362266540527, "learning_rate": 0.00019022432647707708, "loss": 1.4295, "step": 7380 }, { "epoch": 0.28431183830606355, "grad_norm": 1.1649361848831177, "learning_rate": 0.00019021128220472747, "loss": 1.3438, "step": 7385 }, { "epoch": 0.2845043310875842, "grad_norm": 1.7044711112976074, "learning_rate": 0.00019019822968315364, "loss": 1.2735, "step": 7390 }, { "epoch": 0.2846968238691049, "grad_norm": 0.8998376727104187, "learning_rate": 0.00019018516891354924, "loss": 1.1817, "step": 7395 }, { "epoch": 0.2848893166506256, "grad_norm": 1.8617538213729858, "learning_rate": 0.00019017209989710855, "loss": 1.3235, "step": 7400 }, { "epoch": 0.2850818094321463, "grad_norm": 0.9981639981269836, "learning_rate": 0.00019015902263502669, "loss": 1.1171, "step": 7405 }, { "epoch": 0.285274302213667, "grad_norm": 0.935457170009613, "learning_rate": 0.00019014593712849944, "loss": 1.1926, "step": 7410 }, { "epoch": 0.2854667949951877, "grad_norm": 1.3465532064437866, "learning_rate": 0.00019013284337872341, "loss": 1.5102, "step": 7415 }, { "epoch": 0.28565928777670835, "grad_norm": 1.3213337659835815, "learning_rate": 0.00019011974138689595, "loss": 1.2597, "step": 7420 }, { "epoch": 0.2858517805582291, "grad_norm": 1.655229091644287, "learning_rate": 0.0001901066311542151, "loss": 1.0345, "step": 7425 }, { "epoch": 0.28604427333974974, "grad_norm": 1.0165207386016846, "learning_rate": 0.00019009351268187974, "loss": 1.2854, "step": 7430 }, { "epoch": 0.28623676612127047, "grad_norm": 1.3425116539001465, "learning_rate": 0.00019008038597108945, "loss": 1.381, "step": 7435 }, { "epoch": 0.28642925890279114, "grad_norm": 1.2017732858657837, "learning_rate": 0.0001900672510230446, "loss": 1.2171, "step": 7440 }, { "epoch": 0.28662175168431187, "grad_norm": 1.4958349466323853, "learning_rate": 0.00019005410783894626, "loss": 1.3524, "step": 7445 }, { "epoch": 0.28681424446583254, "grad_norm": 1.1109000444412231, "learning_rate": 0.00019004095641999636, "loss": 1.2046, "step": 7450 }, { "epoch": 0.2870067372473532, "grad_norm": 1.5347834825515747, "learning_rate": 0.00019002779676739745, "loss": 1.2295, "step": 7455 }, { "epoch": 0.28719923002887393, "grad_norm": 1.5204600095748901, "learning_rate": 0.00019001462888235286, "loss": 1.0319, "step": 7460 }, { "epoch": 0.2873917228103946, "grad_norm": 2.0644850730895996, "learning_rate": 0.00019000145276606677, "loss": 1.2371, "step": 7465 }, { "epoch": 0.2875842155919153, "grad_norm": 1.5903024673461914, "learning_rate": 0.00018998826841974407, "loss": 1.3781, "step": 7470 }, { "epoch": 0.287776708373436, "grad_norm": 1.045086145401001, "learning_rate": 0.00018997507584459032, "loss": 1.0918, "step": 7475 }, { "epoch": 0.28796920115495667, "grad_norm": 1.499211311340332, "learning_rate": 0.0001899618750418119, "loss": 1.2377, "step": 7480 }, { "epoch": 0.2881616939364774, "grad_norm": 1.2885223627090454, "learning_rate": 0.00018994866601261597, "loss": 1.2936, "step": 7485 }, { "epoch": 0.28835418671799806, "grad_norm": 1.9687073230743408, "learning_rate": 0.00018993544875821035, "loss": 1.2043, "step": 7490 }, { "epoch": 0.2885466794995188, "grad_norm": 0.9758608937263489, "learning_rate": 0.00018992222327980375, "loss": 1.0775, "step": 7495 }, { "epoch": 0.28873917228103946, "grad_norm": 1.4256442785263062, "learning_rate": 0.00018990898957860547, "loss": 1.2608, "step": 7500 }, { "epoch": 0.28893166506256013, "grad_norm": 1.267991304397583, "learning_rate": 0.00018989574765582572, "loss": 1.3826, "step": 7505 }, { "epoch": 0.28912415784408085, "grad_norm": 1.4104158878326416, "learning_rate": 0.00018988249751267534, "loss": 1.1589, "step": 7510 }, { "epoch": 0.2893166506256015, "grad_norm": 0.9540778994560242, "learning_rate": 0.000189869239150366, "loss": 1.196, "step": 7515 }, { "epoch": 0.28950914340712225, "grad_norm": 4.175881385803223, "learning_rate": 0.00018985597257011006, "loss": 1.3408, "step": 7520 }, { "epoch": 0.2897016361886429, "grad_norm": 1.79558527469635, "learning_rate": 0.00018984269777312066, "loss": 1.0596, "step": 7525 }, { "epoch": 0.28989412897016364, "grad_norm": 1.5449460744857788, "learning_rate": 0.0001898294147606117, "loss": 1.2628, "step": 7530 }, { "epoch": 0.2900866217516843, "grad_norm": 1.5056041479110718, "learning_rate": 0.00018981612353379784, "loss": 1.132, "step": 7535 }, { "epoch": 0.290279114533205, "grad_norm": 1.7045507431030273, "learning_rate": 0.00018980282409389445, "loss": 1.1663, "step": 7540 }, { "epoch": 0.2904716073147257, "grad_norm": 1.203892469406128, "learning_rate": 0.00018978951644211766, "loss": 1.1168, "step": 7545 }, { "epoch": 0.2906641000962464, "grad_norm": 0.9239038228988647, "learning_rate": 0.0001897762005796844, "loss": 1.3328, "step": 7550 }, { "epoch": 0.2908565928777671, "grad_norm": 1.3521167039871216, "learning_rate": 0.00018976287650781238, "loss": 1.2766, "step": 7555 }, { "epoch": 0.2910490856592878, "grad_norm": 1.3824992179870605, "learning_rate": 0.00018974954422771987, "loss": 1.0153, "step": 7560 }, { "epoch": 0.29124157844080845, "grad_norm": 0.9183006286621094, "learning_rate": 0.00018973620374062607, "loss": 1.0558, "step": 7565 }, { "epoch": 0.29143407122232917, "grad_norm": 1.7128045558929443, "learning_rate": 0.0001897228550477509, "loss": 1.316, "step": 7570 }, { "epoch": 0.29162656400384984, "grad_norm": 1.3998011350631714, "learning_rate": 0.000189709498150315, "loss": 1.2359, "step": 7575 }, { "epoch": 0.29181905678537057, "grad_norm": 1.2251836061477661, "learning_rate": 0.00018969613304953975, "loss": 1.2464, "step": 7580 }, { "epoch": 0.29201154956689124, "grad_norm": 1.3014954328536987, "learning_rate": 0.00018968275974664734, "loss": 1.0624, "step": 7585 }, { "epoch": 0.2922040423484119, "grad_norm": 1.8785862922668457, "learning_rate": 0.00018966937824286062, "loss": 1.3491, "step": 7590 }, { "epoch": 0.29239653512993263, "grad_norm": 1.0634154081344604, "learning_rate": 0.00018965598853940327, "loss": 1.1012, "step": 7595 }, { "epoch": 0.2925890279114533, "grad_norm": 0.9114715456962585, "learning_rate": 0.00018964259063749967, "loss": 1.3738, "step": 7600 }, { "epoch": 0.29278152069297403, "grad_norm": 1.9063506126403809, "learning_rate": 0.00018962918453837503, "loss": 1.1161, "step": 7605 }, { "epoch": 0.2929740134744947, "grad_norm": 1.12264084815979, "learning_rate": 0.00018961577024325516, "loss": 1.4191, "step": 7610 }, { "epoch": 0.2931665062560154, "grad_norm": 1.4751306772232056, "learning_rate": 0.00018960234775336677, "loss": 1.2153, "step": 7615 }, { "epoch": 0.2933589990375361, "grad_norm": 1.4374860525131226, "learning_rate": 0.00018958891706993724, "loss": 1.1999, "step": 7620 }, { "epoch": 0.29355149181905676, "grad_norm": 1.5792250633239746, "learning_rate": 0.0001895754781941947, "loss": 1.266, "step": 7625 }, { "epoch": 0.2937439846005775, "grad_norm": 1.3390734195709229, "learning_rate": 0.00018956203112736807, "loss": 1.2703, "step": 7630 }, { "epoch": 0.29393647738209816, "grad_norm": 1.2470978498458862, "learning_rate": 0.00018954857587068701, "loss": 1.0415, "step": 7635 }, { "epoch": 0.2941289701636189, "grad_norm": 1.6102235317230225, "learning_rate": 0.00018953511242538186, "loss": 1.2707, "step": 7640 }, { "epoch": 0.29432146294513956, "grad_norm": 1.334554672241211, "learning_rate": 0.0001895216407926838, "loss": 1.2672, "step": 7645 }, { "epoch": 0.2945139557266602, "grad_norm": 1.2881218194961548, "learning_rate": 0.00018950816097382475, "loss": 1.1641, "step": 7650 }, { "epoch": 0.29470644850818095, "grad_norm": 1.2150179147720337, "learning_rate": 0.00018949467297003732, "loss": 1.2636, "step": 7655 }, { "epoch": 0.2948989412897016, "grad_norm": 1.1388130187988281, "learning_rate": 0.00018948117678255485, "loss": 1.2354, "step": 7660 }, { "epoch": 0.29509143407122235, "grad_norm": 0.785776674747467, "learning_rate": 0.0001894676724126115, "loss": 1.2621, "step": 7665 }, { "epoch": 0.295283926852743, "grad_norm": 1.005819320678711, "learning_rate": 0.00018945415986144223, "loss": 1.1175, "step": 7670 }, { "epoch": 0.29547641963426374, "grad_norm": 2.2892065048217773, "learning_rate": 0.00018944063913028264, "loss": 1.148, "step": 7675 }, { "epoch": 0.2956689124157844, "grad_norm": 2.0920302867889404, "learning_rate": 0.00018942711022036903, "loss": 1.178, "step": 7680 }, { "epoch": 0.2958614051973051, "grad_norm": 1.228538155555725, "learning_rate": 0.00018941357313293863, "loss": 1.2499, "step": 7685 }, { "epoch": 0.2960538979788258, "grad_norm": 1.8671079874038696, "learning_rate": 0.00018940002786922925, "loss": 1.2361, "step": 7690 }, { "epoch": 0.2962463907603465, "grad_norm": 1.7283247709274292, "learning_rate": 0.00018938647443047957, "loss": 1.2695, "step": 7695 }, { "epoch": 0.2964388835418672, "grad_norm": 1.9629713296890259, "learning_rate": 0.0001893729128179289, "loss": 1.5226, "step": 7700 }, { "epoch": 0.2966313763233879, "grad_norm": 1.2868784666061401, "learning_rate": 0.00018935934303281743, "loss": 1.3237, "step": 7705 }, { "epoch": 0.29682386910490854, "grad_norm": 1.3925827741622925, "learning_rate": 0.000189345765076386, "loss": 1.4075, "step": 7710 }, { "epoch": 0.29701636188642927, "grad_norm": 1.1560002565383911, "learning_rate": 0.0001893321789498762, "loss": 1.3212, "step": 7715 }, { "epoch": 0.29720885466794994, "grad_norm": 1.207263708114624, "learning_rate": 0.0001893185846545304, "loss": 1.3106, "step": 7720 }, { "epoch": 0.29740134744947067, "grad_norm": Infinity, "learning_rate": 0.00018930770333752716, "loss": 1.5499, "step": 7725 }, { "epoch": 0.29759384023099134, "grad_norm": 1.2437909841537476, "learning_rate": 0.0001892940943414097, "loss": 1.2797, "step": 7730 }, { "epoch": 0.297786333012512, "grad_norm": 0.8919286131858826, "learning_rate": 0.00018928047717993885, "loss": 1.1074, "step": 7735 }, { "epoch": 0.29797882579403273, "grad_norm": 1.219995379447937, "learning_rate": 0.00018926685185435978, "loss": 1.0856, "step": 7740 }, { "epoch": 0.2981713185755534, "grad_norm": 0.8819857835769653, "learning_rate": 0.00018925321836591846, "loss": 1.3518, "step": 7745 }, { "epoch": 0.2983638113570741, "grad_norm": 1.2268033027648926, "learning_rate": 0.00018923957671586154, "loss": 1.3786, "step": 7750 }, { "epoch": 0.2985563041385948, "grad_norm": 0.9456066489219666, "learning_rate": 0.0001892259269054365, "loss": 1.3424, "step": 7755 }, { "epoch": 0.2987487969201155, "grad_norm": 1.5397047996520996, "learning_rate": 0.0001892122689358915, "loss": 1.3618, "step": 7760 }, { "epoch": 0.2989412897016362, "grad_norm": 1.3874872922897339, "learning_rate": 0.0001891986028084755, "loss": 1.2717, "step": 7765 }, { "epoch": 0.29913378248315686, "grad_norm": 1.1725342273712158, "learning_rate": 0.00018918492852443817, "loss": 1.4347, "step": 7770 }, { "epoch": 0.2993262752646776, "grad_norm": 1.2135777473449707, "learning_rate": 0.0001891712460850299, "loss": 1.1892, "step": 7775 }, { "epoch": 0.29951876804619826, "grad_norm": 1.549715280532837, "learning_rate": 0.00018915755549150188, "loss": 1.2041, "step": 7780 }, { "epoch": 0.299711260827719, "grad_norm": 0.9927541613578796, "learning_rate": 0.00018914385674510605, "loss": 1.2198, "step": 7785 }, { "epoch": 0.29990375360923965, "grad_norm": 1.3314557075500488, "learning_rate": 0.00018913014984709502, "loss": 1.1805, "step": 7790 }, { "epoch": 0.3000962463907603, "grad_norm": 1.4021222591400146, "learning_rate": 0.00018911643479872225, "loss": 1.3375, "step": 7795 }, { "epoch": 0.30028873917228105, "grad_norm": 1.0226534605026245, "learning_rate": 0.00018910271160124182, "loss": 1.329, "step": 7800 }, { "epoch": 0.3004812319538017, "grad_norm": 0.8493847846984863, "learning_rate": 0.0001890889802559087, "loss": 1.4581, "step": 7805 }, { "epoch": 0.30067372473532245, "grad_norm": 1.0437967777252197, "learning_rate": 0.00018907524076397847, "loss": 1.409, "step": 7810 }, { "epoch": 0.3008662175168431, "grad_norm": 2.574695110321045, "learning_rate": 0.00018906149312670754, "loss": 1.3962, "step": 7815 }, { "epoch": 0.3010587102983638, "grad_norm": 1.3757768869400024, "learning_rate": 0.00018904773734535306, "loss": 1.4098, "step": 7820 }, { "epoch": 0.3012512030798845, "grad_norm": 1.2249635457992554, "learning_rate": 0.0001890339734211729, "loss": 1.1643, "step": 7825 }, { "epoch": 0.3014436958614052, "grad_norm": 1.6329936981201172, "learning_rate": 0.00018902020135542564, "loss": 1.1914, "step": 7830 }, { "epoch": 0.3016361886429259, "grad_norm": 1.0217385292053223, "learning_rate": 0.0001890064211493707, "loss": 1.043, "step": 7835 }, { "epoch": 0.3018286814244466, "grad_norm": 1.448754072189331, "learning_rate": 0.0001889926328042681, "loss": 1.0953, "step": 7840 }, { "epoch": 0.3020211742059673, "grad_norm": 0.9284221529960632, "learning_rate": 0.00018897883632137881, "loss": 1.321, "step": 7845 }, { "epoch": 0.30221366698748797, "grad_norm": 1.4679608345031738, "learning_rate": 0.00018896503170196435, "loss": 1.2266, "step": 7850 }, { "epoch": 0.30240615976900864, "grad_norm": 1.1148631572723389, "learning_rate": 0.00018895121894728709, "loss": 1.1666, "step": 7855 }, { "epoch": 0.30259865255052937, "grad_norm": 1.0431932210922241, "learning_rate": 0.00018893739805861008, "loss": 1.2986, "step": 7860 }, { "epoch": 0.30279114533205004, "grad_norm": 1.5691524744033813, "learning_rate": 0.00018892356903719718, "loss": 1.3928, "step": 7865 }, { "epoch": 0.30298363811357076, "grad_norm": 1.6849128007888794, "learning_rate": 0.000188909731884313, "loss": 1.3569, "step": 7870 }, { "epoch": 0.30317613089509143, "grad_norm": 1.1832456588745117, "learning_rate": 0.00018889588660122276, "loss": 1.2984, "step": 7875 }, { "epoch": 0.3033686236766121, "grad_norm": 1.3270272016525269, "learning_rate": 0.0001888820331891926, "loss": 1.1498, "step": 7880 }, { "epoch": 0.30356111645813283, "grad_norm": 1.6383373737335205, "learning_rate": 0.0001888681716494893, "loss": 1.4725, "step": 7885 }, { "epoch": 0.3037536092396535, "grad_norm": 1.1068469285964966, "learning_rate": 0.00018885430198338038, "loss": 1.3326, "step": 7890 }, { "epoch": 0.3039461020211742, "grad_norm": 1.8454192876815796, "learning_rate": 0.00018884042419213412, "loss": 1.2307, "step": 7895 }, { "epoch": 0.3041385948026949, "grad_norm": 1.160762906074524, "learning_rate": 0.00018882653827701965, "loss": 1.6025, "step": 7900 }, { "epoch": 0.30433108758421556, "grad_norm": 1.9325065612792969, "learning_rate": 0.00018881264423930663, "loss": 1.3071, "step": 7905 }, { "epoch": 0.3045235803657363, "grad_norm": 0.9047966003417969, "learning_rate": 0.00018879874208026562, "loss": 1.3166, "step": 7910 }, { "epoch": 0.30471607314725696, "grad_norm": 0.9753623008728027, "learning_rate": 0.00018878483180116793, "loss": 1.3702, "step": 7915 }, { "epoch": 0.3049085659287777, "grad_norm": 1.210321307182312, "learning_rate": 0.00018877091340328549, "loss": 1.3775, "step": 7920 }, { "epoch": 0.30510105871029836, "grad_norm": 1.287484049797058, "learning_rate": 0.00018875698688789106, "loss": 1.3534, "step": 7925 }, { "epoch": 0.3052935514918191, "grad_norm": 1.1604797840118408, "learning_rate": 0.00018874305225625814, "loss": 1.2154, "step": 7930 }, { "epoch": 0.30548604427333975, "grad_norm": 1.4771429300308228, "learning_rate": 0.00018872910950966097, "loss": 1.2438, "step": 7935 }, { "epoch": 0.3056785370548604, "grad_norm": 1.1472980976104736, "learning_rate": 0.00018871515864937453, "loss": 1.0805, "step": 7940 }, { "epoch": 0.30587102983638115, "grad_norm": 1.1015262603759766, "learning_rate": 0.0001887011996766745, "loss": 1.0594, "step": 7945 }, { "epoch": 0.3060635226179018, "grad_norm": 1.5410771369934082, "learning_rate": 0.00018868723259283737, "loss": 1.2624, "step": 7950 }, { "epoch": 0.30625601539942254, "grad_norm": 1.2014496326446533, "learning_rate": 0.0001886732573991403, "loss": 1.2259, "step": 7955 }, { "epoch": 0.3064485081809432, "grad_norm": 2.0007143020629883, "learning_rate": 0.0001886592740968612, "loss": 1.3877, "step": 7960 }, { "epoch": 0.3066410009624639, "grad_norm": 1.2455111742019653, "learning_rate": 0.00018864528268727887, "loss": 1.3254, "step": 7965 }, { "epoch": 0.3068334937439846, "grad_norm": 1.2766424417495728, "learning_rate": 0.00018863128317167264, "loss": 1.2663, "step": 7970 }, { "epoch": 0.3070259865255053, "grad_norm": 1.2151165008544922, "learning_rate": 0.0001886172755513227, "loss": 1.3597, "step": 7975 }, { "epoch": 0.307218479307026, "grad_norm": 1.1774568557739258, "learning_rate": 0.0001886032598275099, "loss": 1.1311, "step": 7980 }, { "epoch": 0.3074109720885467, "grad_norm": 1.43276846408844, "learning_rate": 0.00018858923600151596, "loss": 1.1123, "step": 7985 }, { "epoch": 0.3076034648700674, "grad_norm": 1.691684603691101, "learning_rate": 0.00018857520407462326, "loss": 1.4089, "step": 7990 }, { "epoch": 0.30779595765158807, "grad_norm": 1.7944872379302979, "learning_rate": 0.00018856116404811487, "loss": 1.3098, "step": 7995 }, { "epoch": 0.30798845043310874, "grad_norm": 1.2894377708435059, "learning_rate": 0.00018854711592327473, "loss": 1.2128, "step": 8000 }, { "epoch": 0.30818094321462947, "grad_norm": 2.52504301071167, "learning_rate": 0.00018853305970138737, "loss": 1.4214, "step": 8005 }, { "epoch": 0.30837343599615014, "grad_norm": 1.0757540464401245, "learning_rate": 0.0001885189953837382, "loss": 1.1836, "step": 8010 }, { "epoch": 0.30856592877767086, "grad_norm": 0.9253488183021545, "learning_rate": 0.0001885049229716133, "loss": 1.0756, "step": 8015 }, { "epoch": 0.30875842155919153, "grad_norm": 2.042194366455078, "learning_rate": 0.00018849084246629945, "loss": 1.4017, "step": 8020 }, { "epoch": 0.3089509143407122, "grad_norm": 1.750023603439331, "learning_rate": 0.00018847675386908427, "loss": 1.2352, "step": 8025 }, { "epoch": 0.3091434071222329, "grad_norm": 1.5334408283233643, "learning_rate": 0.00018846265718125605, "loss": 1.3053, "step": 8030 }, { "epoch": 0.3093358999037536, "grad_norm": 1.262428641319275, "learning_rate": 0.00018844855240410387, "loss": 1.28, "step": 8035 }, { "epoch": 0.3095283926852743, "grad_norm": 1.1430000066757202, "learning_rate": 0.0001884344395389175, "loss": 1.2133, "step": 8040 }, { "epoch": 0.309720885466795, "grad_norm": 1.792740821838379, "learning_rate": 0.0001884203185869874, "loss": 1.3004, "step": 8045 }, { "epoch": 0.30991337824831566, "grad_norm": 1.7067112922668457, "learning_rate": 0.00018840618954960495, "loss": 1.4131, "step": 8050 }, { "epoch": 0.3101058710298364, "grad_norm": 1.5428810119628906, "learning_rate": 0.00018839205242806206, "loss": 1.2361, "step": 8055 }, { "epoch": 0.31029836381135706, "grad_norm": 1.078902244567871, "learning_rate": 0.00018837790722365152, "loss": 1.2126, "step": 8060 }, { "epoch": 0.3104908565928778, "grad_norm": 1.5348985195159912, "learning_rate": 0.00018836375393766684, "loss": 1.2591, "step": 8065 }, { "epoch": 0.31068334937439845, "grad_norm": 1.2026286125183105, "learning_rate": 0.00018834959257140222, "loss": 1.3059, "step": 8070 }, { "epoch": 0.3108758421559192, "grad_norm": 1.3559043407440186, "learning_rate": 0.0001883354231261526, "loss": 1.2006, "step": 8075 }, { "epoch": 0.31106833493743985, "grad_norm": 1.2358171939849854, "learning_rate": 0.00018832124560321374, "loss": 1.2656, "step": 8080 }, { "epoch": 0.3112608277189605, "grad_norm": 1.720358967781067, "learning_rate": 0.00018830706000388202, "loss": 1.3493, "step": 8085 }, { "epoch": 0.31145332050048125, "grad_norm": 1.4281798601150513, "learning_rate": 0.00018829286632945463, "loss": 1.1485, "step": 8090 }, { "epoch": 0.3116458132820019, "grad_norm": 1.6174485683441162, "learning_rate": 0.00018827866458122951, "loss": 1.4384, "step": 8095 }, { "epoch": 0.31183830606352264, "grad_norm": 1.0020065307617188, "learning_rate": 0.00018826445476050532, "loss": 1.0489, "step": 8100 }, { "epoch": 0.3120307988450433, "grad_norm": 1.8663140535354614, "learning_rate": 0.0001882502368685814, "loss": 1.3252, "step": 8105 }, { "epoch": 0.312223291626564, "grad_norm": 1.4404470920562744, "learning_rate": 0.00018823601090675796, "loss": 1.1452, "step": 8110 }, { "epoch": 0.3124157844080847, "grad_norm": 1.3358442783355713, "learning_rate": 0.00018822177687633583, "loss": 1.1581, "step": 8115 }, { "epoch": 0.3126082771896054, "grad_norm": 1.6938860416412354, "learning_rate": 0.00018820753477861662, "loss": 1.5378, "step": 8120 }, { "epoch": 0.3128007699711261, "grad_norm": 1.1914762258529663, "learning_rate": 0.00018819328461490268, "loss": 1.172, "step": 8125 }, { "epoch": 0.3129932627526468, "grad_norm": 2.0504634380340576, "learning_rate": 0.0001881790263864971, "loss": 1.2462, "step": 8130 }, { "epoch": 0.31318575553416744, "grad_norm": 1.548021912574768, "learning_rate": 0.00018816476009470367, "loss": 1.271, "step": 8135 }, { "epoch": 0.31337824831568817, "grad_norm": 1.2875434160232544, "learning_rate": 0.00018815048574082698, "loss": 1.2484, "step": 8140 }, { "epoch": 0.31357074109720884, "grad_norm": 0.936850905418396, "learning_rate": 0.00018813620332617227, "loss": 1.2765, "step": 8145 }, { "epoch": 0.31376323387872956, "grad_norm": 1.2823413610458374, "learning_rate": 0.00018812191285204566, "loss": 1.1859, "step": 8150 }, { "epoch": 0.31395572666025023, "grad_norm": 2.052490472793579, "learning_rate": 0.00018810761431975386, "loss": 1.2033, "step": 8155 }, { "epoch": 0.31414821944177096, "grad_norm": 2.4439830780029297, "learning_rate": 0.00018809330773060442, "loss": 1.3678, "step": 8160 }, { "epoch": 0.31434071222329163, "grad_norm": 1.9978455305099487, "learning_rate": 0.0001880789930859055, "loss": 1.25, "step": 8165 }, { "epoch": 0.3145332050048123, "grad_norm": 1.2606321573257446, "learning_rate": 0.00018806467038696615, "loss": 1.4966, "step": 8170 }, { "epoch": 0.314725697786333, "grad_norm": 1.4588353633880615, "learning_rate": 0.00018805033963509605, "loss": 1.1843, "step": 8175 }, { "epoch": 0.3149181905678537, "grad_norm": 2.8686156272888184, "learning_rate": 0.00018803600083160574, "loss": 1.3017, "step": 8180 }, { "epoch": 0.3151106833493744, "grad_norm": 1.812328815460205, "learning_rate": 0.00018802165397780626, "loss": 1.4141, "step": 8185 }, { "epoch": 0.3153031761308951, "grad_norm": 1.4686119556427002, "learning_rate": 0.00018800729907500968, "loss": 1.4522, "step": 8190 }, { "epoch": 0.31549566891241576, "grad_norm": 1.766160249710083, "learning_rate": 0.00018799293612452856, "loss": 1.1501, "step": 8195 }, { "epoch": 0.3156881616939365, "grad_norm": 1.5843030214309692, "learning_rate": 0.00018797856512767634, "loss": 1.2997, "step": 8200 }, { "epoch": 0.31588065447545716, "grad_norm": 1.2028679847717285, "learning_rate": 0.00018796418608576712, "loss": 1.108, "step": 8205 }, { "epoch": 0.3160731472569779, "grad_norm": 1.4626559019088745, "learning_rate": 0.0001879497990001158, "loss": 1.116, "step": 8210 }, { "epoch": 0.31626564003849855, "grad_norm": 1.956745982170105, "learning_rate": 0.000187935403872038, "loss": 1.2741, "step": 8215 }, { "epoch": 0.3164581328200192, "grad_norm": 1.1932622194290161, "learning_rate": 0.00018792100070285002, "loss": 1.1966, "step": 8220 }, { "epoch": 0.31665062560153995, "grad_norm": 2.212184429168701, "learning_rate": 0.00018790658949386892, "loss": 1.1485, "step": 8225 }, { "epoch": 0.3168431183830606, "grad_norm": 0.867708146572113, "learning_rate": 0.00018789217024641256, "loss": 1.2457, "step": 8230 }, { "epoch": 0.31703561116458134, "grad_norm": 2.4929304122924805, "learning_rate": 0.0001878777429617995, "loss": 1.1819, "step": 8235 }, { "epoch": 0.317228103946102, "grad_norm": 1.4232670068740845, "learning_rate": 0.00018786330764134897, "loss": 1.2189, "step": 8240 }, { "epoch": 0.31742059672762274, "grad_norm": 1.8306447267532349, "learning_rate": 0.00018784886428638094, "loss": 1.2939, "step": 8245 }, { "epoch": 0.3176130895091434, "grad_norm": 0.9103988409042358, "learning_rate": 0.00018783441289821627, "loss": 1.2982, "step": 8250 }, { "epoch": 0.3178055822906641, "grad_norm": 1.08035409450531, "learning_rate": 0.0001878199534781764, "loss": 1.2777, "step": 8255 }, { "epoch": 0.3179980750721848, "grad_norm": 1.1342133283615112, "learning_rate": 0.0001878054860275835, "loss": 1.1476, "step": 8260 }, { "epoch": 0.3181905678537055, "grad_norm": 1.7727190256118774, "learning_rate": 0.0001877910105477606, "loss": 1.1887, "step": 8265 }, { "epoch": 0.3183830606352262, "grad_norm": 2.5168001651763916, "learning_rate": 0.0001877765270400313, "loss": 1.0494, "step": 8270 }, { "epoch": 0.31857555341674687, "grad_norm": 1.2397305965423584, "learning_rate": 0.0001877620355057201, "loss": 1.321, "step": 8275 }, { "epoch": 0.31876804619826754, "grad_norm": 1.3002814054489136, "learning_rate": 0.0001877475359461521, "loss": 1.1543, "step": 8280 }, { "epoch": 0.31896053897978827, "grad_norm": 1.5683960914611816, "learning_rate": 0.00018773302836265322, "loss": 1.1987, "step": 8285 }, { "epoch": 0.31915303176130894, "grad_norm": 1.6934245824813843, "learning_rate": 0.00018771851275655008, "loss": 1.2946, "step": 8290 }, { "epoch": 0.31934552454282966, "grad_norm": 1.4387637376785278, "learning_rate": 0.00018770398912917004, "loss": 1.2151, "step": 8295 }, { "epoch": 0.31953801732435033, "grad_norm": 1.3155730962753296, "learning_rate": 0.00018768945748184117, "loss": 1.1692, "step": 8300 }, { "epoch": 0.31973051010587106, "grad_norm": 1.039670467376709, "learning_rate": 0.0001876749178158923, "loss": 1.2783, "step": 8305 }, { "epoch": 0.3199230028873917, "grad_norm": 1.1988794803619385, "learning_rate": 0.00018766037013265302, "loss": 1.1775, "step": 8310 }, { "epoch": 0.3201154956689124, "grad_norm": 1.39814031124115, "learning_rate": 0.00018764581443345355, "loss": 1.2256, "step": 8315 }, { "epoch": 0.3203079884504331, "grad_norm": 1.7934690713882446, "learning_rate": 0.00018763125071962495, "loss": 1.3505, "step": 8320 }, { "epoch": 0.3205004812319538, "grad_norm": 1.5974578857421875, "learning_rate": 0.00018761667899249899, "loss": 1.1725, "step": 8325 }, { "epoch": 0.3206929740134745, "grad_norm": 0.9480400085449219, "learning_rate": 0.00018760209925340818, "loss": 1.2059, "step": 8330 }, { "epoch": 0.3208854667949952, "grad_norm": 1.9734187126159668, "learning_rate": 0.00018758751150368564, "loss": 1.2116, "step": 8335 }, { "epoch": 0.32107795957651586, "grad_norm": 0.9984979033470154, "learning_rate": 0.00018757291574466543, "loss": 1.1347, "step": 8340 }, { "epoch": 0.3212704523580366, "grad_norm": 0.96681147813797, "learning_rate": 0.00018755831197768215, "loss": 1.2824, "step": 8345 }, { "epoch": 0.32146294513955725, "grad_norm": 1.5365724563598633, "learning_rate": 0.00018754370020407127, "loss": 1.3718, "step": 8350 }, { "epoch": 0.321655437921078, "grad_norm": 1.6202696561813354, "learning_rate": 0.00018752908042516897, "loss": 1.3233, "step": 8355 }, { "epoch": 0.32184793070259865, "grad_norm": 2.0272514820098877, "learning_rate": 0.00018751445264231207, "loss": 1.3406, "step": 8360 }, { "epoch": 0.3220404234841193, "grad_norm": 1.1724604368209839, "learning_rate": 0.0001874998168568382, "loss": 1.2649, "step": 8365 }, { "epoch": 0.32223291626564005, "grad_norm": 1.0908805131912231, "learning_rate": 0.00018748517307008573, "loss": 1.2924, "step": 8370 }, { "epoch": 0.3224254090471607, "grad_norm": 1.0658169984817505, "learning_rate": 0.0001874705212833937, "loss": 1.1266, "step": 8375 }, { "epoch": 0.32261790182868144, "grad_norm": 1.2267755270004272, "learning_rate": 0.00018745586149810194, "loss": 1.172, "step": 8380 }, { "epoch": 0.3228103946102021, "grad_norm": 0.9808927178382874, "learning_rate": 0.000187441193715551, "loss": 1.1241, "step": 8385 }, { "epoch": 0.32300288739172284, "grad_norm": 1.2251529693603516, "learning_rate": 0.00018742651793708212, "loss": 1.1649, "step": 8390 }, { "epoch": 0.3231953801732435, "grad_norm": 1.7396290302276611, "learning_rate": 0.00018741183416403734, "loss": 1.173, "step": 8395 }, { "epoch": 0.3233878729547642, "grad_norm": 1.1498087644577026, "learning_rate": 0.00018739714239775936, "loss": 1.266, "step": 8400 }, { "epoch": 0.3235803657362849, "grad_norm": 0.9458256959915161, "learning_rate": 0.0001873824426395917, "loss": 1.1651, "step": 8405 }, { "epoch": 0.3237728585178056, "grad_norm": 1.701441764831543, "learning_rate": 0.00018736773489087845, "loss": 1.4314, "step": 8410 }, { "epoch": 0.3239653512993263, "grad_norm": 1.3168058395385742, "learning_rate": 0.00018735301915296466, "loss": 1.3837, "step": 8415 }, { "epoch": 0.32415784408084697, "grad_norm": 1.2277673482894897, "learning_rate": 0.0001873382954271959, "loss": 1.2433, "step": 8420 }, { "epoch": 0.32435033686236764, "grad_norm": 1.3443776369094849, "learning_rate": 0.00018732356371491858, "loss": 1.1514, "step": 8425 }, { "epoch": 0.32454282964388836, "grad_norm": 1.3421462774276733, "learning_rate": 0.00018730882401747984, "loss": 1.2908, "step": 8430 }, { "epoch": 0.32473532242540903, "grad_norm": 2.7043700218200684, "learning_rate": 0.0001872940763362275, "loss": 1.426, "step": 8435 }, { "epoch": 0.32492781520692976, "grad_norm": 1.2363086938858032, "learning_rate": 0.00018727932067251016, "loss": 1.2172, "step": 8440 }, { "epoch": 0.32512030798845043, "grad_norm": 1.7551484107971191, "learning_rate": 0.00018726455702767713, "loss": 1.2379, "step": 8445 }, { "epoch": 0.3253128007699711, "grad_norm": 1.2935433387756348, "learning_rate": 0.00018724978540307844, "loss": 1.2109, "step": 8450 }, { "epoch": 0.3255052935514918, "grad_norm": 1.723219871520996, "learning_rate": 0.00018723500580006483, "loss": 1.3996, "step": 8455 }, { "epoch": 0.3256977863330125, "grad_norm": 1.1455639600753784, "learning_rate": 0.0001872202182199878, "loss": 1.1223, "step": 8460 }, { "epoch": 0.3258902791145332, "grad_norm": 1.194926381111145, "learning_rate": 0.0001872054226641996, "loss": 1.3301, "step": 8465 }, { "epoch": 0.3260827718960539, "grad_norm": 1.9672341346740723, "learning_rate": 0.00018719061913405322, "loss": 1.3884, "step": 8470 }, { "epoch": 0.3262752646775746, "grad_norm": 1.5594457387924194, "learning_rate": 0.0001871758076309023, "loss": 1.1862, "step": 8475 }, { "epoch": 0.3264677574590953, "grad_norm": 1.141787052154541, "learning_rate": 0.0001871609881561012, "loss": 1.2375, "step": 8480 }, { "epoch": 0.32666025024061596, "grad_norm": 1.1914411783218384, "learning_rate": 0.0001871461607110052, "loss": 1.397, "step": 8485 }, { "epoch": 0.3268527430221367, "grad_norm": 1.2841687202453613, "learning_rate": 0.00018713132529697007, "loss": 1.3052, "step": 8490 }, { "epoch": 0.32704523580365735, "grad_norm": 2.2977144718170166, "learning_rate": 0.0001871164819153524, "loss": 1.2819, "step": 8495 }, { "epoch": 0.3272377285851781, "grad_norm": 1.62446928024292, "learning_rate": 0.00018710163056750957, "loss": 1.1739, "step": 8500 }, { "epoch": 0.32743022136669875, "grad_norm": 1.471348524093628, "learning_rate": 0.00018708677125479963, "loss": 1.0684, "step": 8505 }, { "epoch": 0.3276227141482194, "grad_norm": 1.0703455209732056, "learning_rate": 0.00018707190397858133, "loss": 1.0832, "step": 8510 }, { "epoch": 0.32781520692974014, "grad_norm": 1.3942466974258423, "learning_rate": 0.00018705702874021425, "loss": 1.1855, "step": 8515 }, { "epoch": 0.3280076997112608, "grad_norm": 1.1790398359298706, "learning_rate": 0.00018704214554105856, "loss": 1.1459, "step": 8520 }, { "epoch": 0.32820019249278154, "grad_norm": 1.2982394695281982, "learning_rate": 0.00018702725438247527, "loss": 1.2642, "step": 8525 }, { "epoch": 0.3283926852743022, "grad_norm": 1.4757968187332153, "learning_rate": 0.00018701235526582608, "loss": 1.291, "step": 8530 }, { "epoch": 0.3285851780558229, "grad_norm": 1.6837409734725952, "learning_rate": 0.0001870004302436148, "loss": 1.3796, "step": 8535 }, { "epoch": 0.3287776708373436, "grad_norm": 1.1914480924606323, "learning_rate": 0.00018698551680588075, "loss": 1.2608, "step": 8540 }, { "epoch": 0.3289701636188643, "grad_norm": 1.2581427097320557, "learning_rate": 0.00018697059541389742, "loss": 1.3011, "step": 8545 }, { "epoch": 0.329162656400385, "grad_norm": 1.5642743110656738, "learning_rate": 0.0001869556660690293, "loss": 1.2273, "step": 8550 }, { "epoch": 0.32935514918190567, "grad_norm": 1.621721863746643, "learning_rate": 0.0001869407287726415, "loss": 1.1648, "step": 8555 }, { "epoch": 0.3295476419634264, "grad_norm": 0.9840386509895325, "learning_rate": 0.00018692578352610002, "loss": 1.2741, "step": 8560 }, { "epoch": 0.32974013474494707, "grad_norm": 1.5852268934249878, "learning_rate": 0.00018691083033077144, "loss": 1.2913, "step": 8565 }, { "epoch": 0.32993262752646774, "grad_norm": 1.280247688293457, "learning_rate": 0.00018689586918802314, "loss": 1.172, "step": 8570 }, { "epoch": 0.33012512030798846, "grad_norm": 1.3940321207046509, "learning_rate": 0.0001868809000992233, "loss": 1.175, "step": 8575 }, { "epoch": 0.33031761308950913, "grad_norm": 1.0753341913223267, "learning_rate": 0.00018686592306574063, "loss": 1.3922, "step": 8580 }, { "epoch": 0.33051010587102986, "grad_norm": 1.5959515571594238, "learning_rate": 0.00018685093808894476, "loss": 1.2741, "step": 8585 }, { "epoch": 0.33070259865255053, "grad_norm": 1.1567896604537964, "learning_rate": 0.00018683594517020593, "loss": 1.1325, "step": 8590 }, { "epoch": 0.3308950914340712, "grad_norm": 1.202486276626587, "learning_rate": 0.0001868209443108951, "loss": 1.1915, "step": 8595 }, { "epoch": 0.3310875842155919, "grad_norm": 1.6866669654846191, "learning_rate": 0.00018680593551238412, "loss": 1.2806, "step": 8600 }, { "epoch": 0.3312800769971126, "grad_norm": 1.1932209730148315, "learning_rate": 0.00018679091877604536, "loss": 1.2254, "step": 8605 }, { "epoch": 0.3314725697786333, "grad_norm": 1.5348761081695557, "learning_rate": 0.000186775894103252, "loss": 1.1519, "step": 8610 }, { "epoch": 0.331665062560154, "grad_norm": 1.908500075340271, "learning_rate": 0.00018676086149537792, "loss": 1.3105, "step": 8615 }, { "epoch": 0.3318575553416747, "grad_norm": 2.0427961349487305, "learning_rate": 0.00018674582095379788, "loss": 1.1415, "step": 8620 }, { "epoch": 0.3320500481231954, "grad_norm": 1.0964915752410889, "learning_rate": 0.00018673077247988707, "loss": 1.2041, "step": 8625 }, { "epoch": 0.33224254090471605, "grad_norm": 1.2229498624801636, "learning_rate": 0.00018671571607502168, "loss": 1.2975, "step": 8630 }, { "epoch": 0.3324350336862368, "grad_norm": 1.3551470041275024, "learning_rate": 0.00018670065174057854, "loss": 1.1592, "step": 8635 }, { "epoch": 0.33262752646775745, "grad_norm": 0.8810299634933472, "learning_rate": 0.0001866855794779351, "loss": 1.1414, "step": 8640 }, { "epoch": 0.3328200192492782, "grad_norm": 1.5907199382781982, "learning_rate": 0.00018667049928846967, "loss": 1.2191, "step": 8645 }, { "epoch": 0.33301251203079885, "grad_norm": 2.042478561401367, "learning_rate": 0.0001866554111735612, "loss": 1.1619, "step": 8650 }, { "epoch": 0.3332050048123195, "grad_norm": 1.6686564683914185, "learning_rate": 0.00018664031513458942, "loss": 1.2534, "step": 8655 }, { "epoch": 0.33339749759384024, "grad_norm": 1.7643070220947266, "learning_rate": 0.0001866252111729348, "loss": 1.2631, "step": 8660 }, { "epoch": 0.3335899903753609, "grad_norm": 1.4883722066879272, "learning_rate": 0.0001866100992899784, "loss": 1.1786, "step": 8665 }, { "epoch": 0.33378248315688164, "grad_norm": 0.9850770235061646, "learning_rate": 0.00018659497948710218, "loss": 1.4181, "step": 8670 }, { "epoch": 0.3339749759384023, "grad_norm": 0.9056932926177979, "learning_rate": 0.00018657985176568875, "loss": 1.0365, "step": 8675 }, { "epoch": 0.334167468719923, "grad_norm": 1.9456449747085571, "learning_rate": 0.00018656471612712137, "loss": 1.227, "step": 8680 }, { "epoch": 0.3343599615014437, "grad_norm": 1.289870262145996, "learning_rate": 0.00018654957257278415, "loss": 1.32, "step": 8685 }, { "epoch": 0.3345524542829644, "grad_norm": 1.048143744468689, "learning_rate": 0.00018653442110406189, "loss": 1.2123, "step": 8690 }, { "epoch": 0.3347449470644851, "grad_norm": 1.1696733236312866, "learning_rate": 0.00018651926172234004, "loss": 1.0226, "step": 8695 }, { "epoch": 0.33493743984600577, "grad_norm": 1.4806257486343384, "learning_rate": 0.00018650409442900486, "loss": 1.1715, "step": 8700 }, { "epoch": 0.3351299326275265, "grad_norm": 1.525719404220581, "learning_rate": 0.00018648891922544325, "loss": 1.2037, "step": 8705 }, { "epoch": 0.33532242540904716, "grad_norm": 1.3378442525863647, "learning_rate": 0.00018647373611304293, "loss": 1.2188, "step": 8710 }, { "epoch": 0.33551491819056783, "grad_norm": 0.870988130569458, "learning_rate": 0.00018645854509319226, "loss": 1.2153, "step": 8715 }, { "epoch": 0.33570741097208856, "grad_norm": 1.5496007204055786, "learning_rate": 0.00018644334616728042, "loss": 1.1974, "step": 8720 }, { "epoch": 0.33589990375360923, "grad_norm": 1.0248416662216187, "learning_rate": 0.00018642813933669717, "loss": 1.2845, "step": 8725 }, { "epoch": 0.33609239653512996, "grad_norm": 1.9984816312789917, "learning_rate": 0.00018641292460283313, "loss": 1.3144, "step": 8730 }, { "epoch": 0.3362848893166506, "grad_norm": 1.3114112615585327, "learning_rate": 0.00018639770196707955, "loss": 1.209, "step": 8735 }, { "epoch": 0.3364773820981713, "grad_norm": 1.1683485507965088, "learning_rate": 0.00018638247143082848, "loss": 1.2688, "step": 8740 }, { "epoch": 0.336669874879692, "grad_norm": 1.507900595664978, "learning_rate": 0.0001863672329954726, "loss": 1.1325, "step": 8745 }, { "epoch": 0.3368623676612127, "grad_norm": 1.3393852710723877, "learning_rate": 0.00018635198666240542, "loss": 1.1573, "step": 8750 }, { "epoch": 0.3370548604427334, "grad_norm": 1.0203709602355957, "learning_rate": 0.00018633673243302108, "loss": 1.2922, "step": 8755 }, { "epoch": 0.3372473532242541, "grad_norm": 0.8483877778053284, "learning_rate": 0.00018632147030871448, "loss": 1.2252, "step": 8760 }, { "epoch": 0.33743984600577476, "grad_norm": 0.983748197555542, "learning_rate": 0.00018630620029088125, "loss": 1.2027, "step": 8765 }, { "epoch": 0.3376323387872955, "grad_norm": 1.2489101886749268, "learning_rate": 0.00018629092238091775, "loss": 1.1962, "step": 8770 }, { "epoch": 0.33782483156881615, "grad_norm": 1.4553676843643188, "learning_rate": 0.000186275636580221, "loss": 1.3698, "step": 8775 }, { "epoch": 0.3380173243503369, "grad_norm": 0.9494854807853699, "learning_rate": 0.0001862603428901888, "loss": 1.25, "step": 8780 }, { "epoch": 0.33820981713185755, "grad_norm": 0.8667522072792053, "learning_rate": 0.00018624504131221968, "loss": 1.222, "step": 8785 }, { "epoch": 0.3384023099133783, "grad_norm": 1.4215630292892456, "learning_rate": 0.00018622973184771285, "loss": 1.2592, "step": 8790 }, { "epoch": 0.33859480269489894, "grad_norm": 0.9913888573646545, "learning_rate": 0.00018621441449806828, "loss": 1.2904, "step": 8795 }, { "epoch": 0.3387872954764196, "grad_norm": 0.9612273573875427, "learning_rate": 0.00018619908926468664, "loss": 1.24, "step": 8800 }, { "epoch": 0.33897978825794034, "grad_norm": 1.656568169593811, "learning_rate": 0.00018618375614896926, "loss": 1.1763, "step": 8805 }, { "epoch": 0.339172281039461, "grad_norm": 1.4496088027954102, "learning_rate": 0.0001861684151523183, "loss": 1.2045, "step": 8810 }, { "epoch": 0.33936477382098174, "grad_norm": 1.3886058330535889, "learning_rate": 0.0001861530662761366, "loss": 1.3111, "step": 8815 }, { "epoch": 0.3395572666025024, "grad_norm": 1.644887089729309, "learning_rate": 0.0001861377095218277, "loss": 1.3172, "step": 8820 }, { "epoch": 0.3397497593840231, "grad_norm": 1.1925910711288452, "learning_rate": 0.00018612234489079587, "loss": 1.3268, "step": 8825 }, { "epoch": 0.3399422521655438, "grad_norm": 1.1367309093475342, "learning_rate": 0.0001861069723844461, "loss": 1.1209, "step": 8830 }, { "epoch": 0.34013474494706447, "grad_norm": 1.0649480819702148, "learning_rate": 0.00018609159200418414, "loss": 1.1514, "step": 8835 }, { "epoch": 0.3403272377285852, "grad_norm": 1.1887884140014648, "learning_rate": 0.00018607620375141637, "loss": 1.1026, "step": 8840 }, { "epoch": 0.34051973051010587, "grad_norm": 1.9125694036483765, "learning_rate": 0.00018606080762754995, "loss": 1.4718, "step": 8845 }, { "epoch": 0.34071222329162654, "grad_norm": 1.1742594242095947, "learning_rate": 0.00018604540363399282, "loss": 1.3206, "step": 8850 }, { "epoch": 0.34090471607314726, "grad_norm": 1.504146695137024, "learning_rate": 0.0001860299917721535, "loss": 1.1639, "step": 8855 }, { "epoch": 0.34109720885466793, "grad_norm": 0.8869237899780273, "learning_rate": 0.00018601457204344131, "loss": 1.2674, "step": 8860 }, { "epoch": 0.34128970163618866, "grad_norm": 0.8492304682731628, "learning_rate": 0.00018599914444926636, "loss": 1.2732, "step": 8865 }, { "epoch": 0.34148219441770933, "grad_norm": 1.1681571006774902, "learning_rate": 0.00018598370899103932, "loss": 1.2995, "step": 8870 }, { "epoch": 0.34167468719923005, "grad_norm": 1.6912837028503418, "learning_rate": 0.00018596826567017166, "loss": 1.3217, "step": 8875 }, { "epoch": 0.3418671799807507, "grad_norm": 1.0427602529525757, "learning_rate": 0.0001859528144880756, "loss": 1.05, "step": 8880 }, { "epoch": 0.3420596727622714, "grad_norm": 1.9644991159439087, "learning_rate": 0.00018593735544616404, "loss": 1.1087, "step": 8885 }, { "epoch": 0.3422521655437921, "grad_norm": 1.966264247894287, "learning_rate": 0.0001859218885458506, "loss": 1.2221, "step": 8890 }, { "epoch": 0.3424446583253128, "grad_norm": 1.9770557880401611, "learning_rate": 0.00018590641378854965, "loss": 1.2489, "step": 8895 }, { "epoch": 0.3426371511068335, "grad_norm": 1.4175180196762085, "learning_rate": 0.00018589093117567625, "loss": 1.1292, "step": 8900 }, { "epoch": 0.3428296438883542, "grad_norm": 1.066177487373352, "learning_rate": 0.00018587544070864612, "loss": 1.1182, "step": 8905 }, { "epoch": 0.34302213666987486, "grad_norm": 2.6207172870635986, "learning_rate": 0.00018585994238887586, "loss": 1.1, "step": 8910 }, { "epoch": 0.3432146294513956, "grad_norm": 1.6905888319015503, "learning_rate": 0.0001858444362177826, "loss": 1.3135, "step": 8915 }, { "epoch": 0.34340712223291625, "grad_norm": 1.117883324623108, "learning_rate": 0.00018582892219678435, "loss": 1.3394, "step": 8920 }, { "epoch": 0.343599615014437, "grad_norm": 1.549805760383606, "learning_rate": 0.00018581340032729972, "loss": 1.1957, "step": 8925 }, { "epoch": 0.34379210779595765, "grad_norm": 1.165260672569275, "learning_rate": 0.00018579787061074807, "loss": 1.2406, "step": 8930 }, { "epoch": 0.34398460057747837, "grad_norm": 1.1872533559799194, "learning_rate": 0.00018578233304854952, "loss": 1.1831, "step": 8935 }, { "epoch": 0.34417709335899904, "grad_norm": 0.8727648854255676, "learning_rate": 0.00018576678764212489, "loss": 1.2645, "step": 8940 }, { "epoch": 0.3443695861405197, "grad_norm": 1.1179304122924805, "learning_rate": 0.00018575123439289567, "loss": 1.297, "step": 8945 }, { "epoch": 0.34456207892204044, "grad_norm": 1.9064927101135254, "learning_rate": 0.0001857356733022841, "loss": 1.3917, "step": 8950 }, { "epoch": 0.3447545717035611, "grad_norm": 2.100154399871826, "learning_rate": 0.00018572010437171315, "loss": 1.1723, "step": 8955 }, { "epoch": 0.34494706448508183, "grad_norm": 1.0105838775634766, "learning_rate": 0.00018570452760260654, "loss": 1.0851, "step": 8960 }, { "epoch": 0.3451395572666025, "grad_norm": 1.760038137435913, "learning_rate": 0.0001856889429963886, "loss": 1.0612, "step": 8965 }, { "epoch": 0.3453320500481232, "grad_norm": 1.5740501880645752, "learning_rate": 0.00018567335055448444, "loss": 1.117, "step": 8970 }, { "epoch": 0.3455245428296439, "grad_norm": 1.4148597717285156, "learning_rate": 0.00018565775027831993, "loss": 1.2003, "step": 8975 }, { "epoch": 0.34571703561116457, "grad_norm": 1.2243534326553345, "learning_rate": 0.00018564214216932159, "loss": 1.2106, "step": 8980 }, { "epoch": 0.3459095283926853, "grad_norm": 1.3532603979110718, "learning_rate": 0.00018562652622891666, "loss": 1.1703, "step": 8985 }, { "epoch": 0.34610202117420596, "grad_norm": 1.6701220273971558, "learning_rate": 0.00018561090245853315, "loss": 1.2409, "step": 8990 }, { "epoch": 0.34629451395572663, "grad_norm": 1.6342322826385498, "learning_rate": 0.00018559527085959968, "loss": 1.2981, "step": 8995 }, { "epoch": 0.34648700673724736, "grad_norm": 2.4354701042175293, "learning_rate": 0.00018557963143354576, "loss": 1.1021, "step": 9000 }, { "epoch": 0.34667949951876803, "grad_norm": 1.5688186883926392, "learning_rate": 0.00018556398418180146, "loss": 1.2649, "step": 9005 }, { "epoch": 0.34687199230028876, "grad_norm": 2.2158894538879395, "learning_rate": 0.0001855483291057976, "loss": 1.2335, "step": 9010 }, { "epoch": 0.3470644850818094, "grad_norm": 1.7294437885284424, "learning_rate": 0.00018553266620696573, "loss": 1.3235, "step": 9015 }, { "epoch": 0.34725697786333015, "grad_norm": 1.1023756265640259, "learning_rate": 0.00018551699548673814, "loss": 1.3515, "step": 9020 }, { "epoch": 0.3474494706448508, "grad_norm": 1.4505863189697266, "learning_rate": 0.00018550131694654784, "loss": 1.3773, "step": 9025 }, { "epoch": 0.3476419634263715, "grad_norm": 2.221957206726074, "learning_rate": 0.00018548563058782847, "loss": 1.0896, "step": 9030 }, { "epoch": 0.3478344562078922, "grad_norm": 0.917010486125946, "learning_rate": 0.0001854699364120145, "loss": 1.1569, "step": 9035 }, { "epoch": 0.3480269489894129, "grad_norm": 1.4631186723709106, "learning_rate": 0.00018545423442054105, "loss": 1.2169, "step": 9040 }, { "epoch": 0.3482194417709336, "grad_norm": 1.0917268991470337, "learning_rate": 0.0001854385246148439, "loss": 1.2425, "step": 9045 }, { "epoch": 0.3484119345524543, "grad_norm": 1.5985426902770996, "learning_rate": 0.00018542280699635968, "loss": 1.0944, "step": 9050 }, { "epoch": 0.34860442733397495, "grad_norm": 1.5402495861053467, "learning_rate": 0.0001854070815665256, "loss": 1.1497, "step": 9055 }, { "epoch": 0.3487969201154957, "grad_norm": 1.211295485496521, "learning_rate": 0.00018539134832677972, "loss": 1.0403, "step": 9060 }, { "epoch": 0.34898941289701635, "grad_norm": 1.0569374561309814, "learning_rate": 0.00018537560727856068, "loss": 1.2886, "step": 9065 }, { "epoch": 0.3491819056785371, "grad_norm": 1.550212025642395, "learning_rate": 0.00018535985842330793, "loss": 1.2654, "step": 9070 }, { "epoch": 0.34937439846005774, "grad_norm": 1.7941083908081055, "learning_rate": 0.00018534410176246154, "loss": 1.2757, "step": 9075 }, { "epoch": 0.3495668912415784, "grad_norm": 0.9004856944084167, "learning_rate": 0.00018532833729746243, "loss": 1.2045, "step": 9080 }, { "epoch": 0.34975938402309914, "grad_norm": 0.9916037321090698, "learning_rate": 0.00018531256502975216, "loss": 1.1788, "step": 9085 }, { "epoch": 0.3499518768046198, "grad_norm": 1.0524908304214478, "learning_rate": 0.00018529678496077292, "loss": 1.3298, "step": 9090 }, { "epoch": 0.35014436958614054, "grad_norm": 2.7244019508361816, "learning_rate": 0.00018528099709196774, "loss": 1.3274, "step": 9095 }, { "epoch": 0.3503368623676612, "grad_norm": 1.4286680221557617, "learning_rate": 0.0001852652014247803, "loss": 1.193, "step": 9100 }, { "epoch": 0.35052935514918193, "grad_norm": 1.0943810939788818, "learning_rate": 0.00018524939796065503, "loss": 1.2953, "step": 9105 }, { "epoch": 0.3507218479307026, "grad_norm": 1.1513092517852783, "learning_rate": 0.00018523358670103704, "loss": 1.3436, "step": 9110 }, { "epoch": 0.35091434071222327, "grad_norm": 2.142829656600952, "learning_rate": 0.00018521776764737218, "loss": 1.2998, "step": 9115 }, { "epoch": 0.351106833493744, "grad_norm": 0.9734616875648499, "learning_rate": 0.00018520194080110699, "loss": 1.2794, "step": 9120 }, { "epoch": 0.35129932627526467, "grad_norm": 1.0793628692626953, "learning_rate": 0.00018518610616368868, "loss": 1.2574, "step": 9125 }, { "epoch": 0.3514918190567854, "grad_norm": 2.409484386444092, "learning_rate": 0.00018517026373656532, "loss": 1.1601, "step": 9130 }, { "epoch": 0.35168431183830606, "grad_norm": 1.1166318655014038, "learning_rate": 0.0001851544135211855, "loss": 1.2705, "step": 9135 }, { "epoch": 0.35187680461982673, "grad_norm": 1.183131217956543, "learning_rate": 0.0001851385555189987, "loss": 1.132, "step": 9140 }, { "epoch": 0.35206929740134746, "grad_norm": 1.3792176246643066, "learning_rate": 0.00018512268973145497, "loss": 1.1271, "step": 9145 }, { "epoch": 0.35226179018286813, "grad_norm": 1.3978809118270874, "learning_rate": 0.00018510681616000513, "loss": 1.3828, "step": 9150 }, { "epoch": 0.35245428296438885, "grad_norm": 1.0242118835449219, "learning_rate": 0.00018509093480610078, "loss": 1.1982, "step": 9155 }, { "epoch": 0.3526467757459095, "grad_norm": 1.326621174812317, "learning_rate": 0.00018507504567119408, "loss": 1.0175, "step": 9160 }, { "epoch": 0.3528392685274302, "grad_norm": 1.1905460357666016, "learning_rate": 0.00018505914875673805, "loss": 1.3367, "step": 9165 }, { "epoch": 0.3530317613089509, "grad_norm": 1.5423171520233154, "learning_rate": 0.0001850432440641863, "loss": 1.1721, "step": 9170 }, { "epoch": 0.3532242540904716, "grad_norm": 1.0577900409698486, "learning_rate": 0.00018502733159499326, "loss": 1.2173, "step": 9175 }, { "epoch": 0.3534167468719923, "grad_norm": 0.8053417205810547, "learning_rate": 0.000185011411350614, "loss": 1.1492, "step": 9180 }, { "epoch": 0.353609239653513, "grad_norm": 1.076053261756897, "learning_rate": 0.0001849954833325043, "loss": 1.2117, "step": 9185 }, { "epoch": 0.3538017324350337, "grad_norm": 1.206359624862671, "learning_rate": 0.0001849795475421207, "loss": 1.1659, "step": 9190 }, { "epoch": 0.3539942252165544, "grad_norm": 1.4652369022369385, "learning_rate": 0.00018496360398092046, "loss": 1.2605, "step": 9195 }, { "epoch": 0.35418671799807505, "grad_norm": 1.158055067062378, "learning_rate": 0.00018494765265036144, "loss": 1.414, "step": 9200 }, { "epoch": 0.3543792107795958, "grad_norm": 2.4634461402893066, "learning_rate": 0.0001849316935519023, "loss": 1.1982, "step": 9205 }, { "epoch": 0.35457170356111645, "grad_norm": 1.875139594078064, "learning_rate": 0.00018491572668700242, "loss": 1.4133, "step": 9210 }, { "epoch": 0.3547641963426372, "grad_norm": 1.0054875612258911, "learning_rate": 0.00018489975205712185, "loss": 1.2294, "step": 9215 }, { "epoch": 0.35495668912415784, "grad_norm": 2.2620842456817627, "learning_rate": 0.00018488376966372134, "loss": 1.2672, "step": 9220 }, { "epoch": 0.3551491819056785, "grad_norm": 1.584251880645752, "learning_rate": 0.00018486777950826243, "loss": 1.4366, "step": 9225 }, { "epoch": 0.35534167468719924, "grad_norm": 1.6498923301696777, "learning_rate": 0.00018485178159220725, "loss": 1.3502, "step": 9230 }, { "epoch": 0.3555341674687199, "grad_norm": 1.6700108051300049, "learning_rate": 0.00018483577591701876, "loss": 1.2462, "step": 9235 }, { "epoch": 0.35572666025024063, "grad_norm": 1.6976680755615234, "learning_rate": 0.00018481976248416052, "loss": 1.4637, "step": 9240 }, { "epoch": 0.3559191530317613, "grad_norm": 0.9686551094055176, "learning_rate": 0.0001848037412950969, "loss": 1.1902, "step": 9245 }, { "epoch": 0.35611164581328203, "grad_norm": 1.2102336883544922, "learning_rate": 0.00018478771235129292, "loss": 1.586, "step": 9250 }, { "epoch": 0.3563041385948027, "grad_norm": 1.7220674753189087, "learning_rate": 0.0001847716756542143, "loss": 1.2324, "step": 9255 }, { "epoch": 0.35649663137632337, "grad_norm": 1.7433216571807861, "learning_rate": 0.0001847556312053275, "loss": 1.4454, "step": 9260 }, { "epoch": 0.3566891241578441, "grad_norm": 0.9930455088615417, "learning_rate": 0.0001847395790060997, "loss": 1.1601, "step": 9265 }, { "epoch": 0.35688161693936477, "grad_norm": 1.1169023513793945, "learning_rate": 0.00018472351905799873, "loss": 1.2534, "step": 9270 }, { "epoch": 0.3570741097208855, "grad_norm": 1.238748550415039, "learning_rate": 0.00018470745136249316, "loss": 1.2174, "step": 9275 }, { "epoch": 0.35726660250240616, "grad_norm": 2.130223035812378, "learning_rate": 0.00018469137592105235, "loss": 1.3975, "step": 9280 }, { "epoch": 0.35745909528392683, "grad_norm": 1.4341787099838257, "learning_rate": 0.0001846752927351462, "loss": 1.1725, "step": 9285 }, { "epoch": 0.35765158806544756, "grad_norm": 1.948145866394043, "learning_rate": 0.00018465920180624548, "loss": 1.2741, "step": 9290 }, { "epoch": 0.3578440808469682, "grad_norm": 1.0314382314682007, "learning_rate": 0.00018464310313582157, "loss": 1.0998, "step": 9295 }, { "epoch": 0.35803657362848895, "grad_norm": 1.0461472272872925, "learning_rate": 0.0001846269967253466, "loss": 1.1953, "step": 9300 }, { "epoch": 0.3582290664100096, "grad_norm": 1.781084656715393, "learning_rate": 0.00018461088257629334, "loss": 1.3629, "step": 9305 }, { "epoch": 0.3584215591915303, "grad_norm": 1.9082306623458862, "learning_rate": 0.00018459476069013537, "loss": 1.2675, "step": 9310 }, { "epoch": 0.358614051973051, "grad_norm": 1.803348422050476, "learning_rate": 0.00018457863106834693, "loss": 1.2303, "step": 9315 }, { "epoch": 0.3588065447545717, "grad_norm": 1.5346139669418335, "learning_rate": 0.000184562493712403, "loss": 1.3354, "step": 9320 }, { "epoch": 0.3589990375360924, "grad_norm": 1.3731290102005005, "learning_rate": 0.00018454634862377916, "loss": 1.4874, "step": 9325 }, { "epoch": 0.3591915303176131, "grad_norm": 1.186759352684021, "learning_rate": 0.0001845301958039518, "loss": 1.29, "step": 9330 }, { "epoch": 0.3593840230991338, "grad_norm": 3.729174852371216, "learning_rate": 0.00018451403525439802, "loss": 1.2589, "step": 9335 }, { "epoch": 0.3595765158806545, "grad_norm": 2.46051025390625, "learning_rate": 0.00018449786697659554, "loss": 1.1818, "step": 9340 }, { "epoch": 0.35976900866217515, "grad_norm": 1.6652323007583618, "learning_rate": 0.00018448169097202288, "loss": 1.2719, "step": 9345 }, { "epoch": 0.3599615014436959, "grad_norm": 1.375410556793213, "learning_rate": 0.00018446550724215922, "loss": 1.2687, "step": 9350 }, { "epoch": 0.36015399422521654, "grad_norm": 1.9113675355911255, "learning_rate": 0.00018444931578848447, "loss": 1.2475, "step": 9355 }, { "epoch": 0.36034648700673727, "grad_norm": 1.8949065208435059, "learning_rate": 0.0001844331166124792, "loss": 1.3439, "step": 9360 }, { "epoch": 0.36053897978825794, "grad_norm": 1.0940630435943604, "learning_rate": 0.00018441690971562476, "loss": 1.203, "step": 9365 }, { "epoch": 0.3607314725697786, "grad_norm": 1.2999101877212524, "learning_rate": 0.00018440069509940315, "loss": 1.2729, "step": 9370 }, { "epoch": 0.36092396535129934, "grad_norm": 1.3675721883773804, "learning_rate": 0.00018438447276529702, "loss": 1.2024, "step": 9375 }, { "epoch": 0.36111645813282, "grad_norm": 1.6651533842086792, "learning_rate": 0.00018436824271478988, "loss": 1.2235, "step": 9380 }, { "epoch": 0.36130895091434073, "grad_norm": 2.16670823097229, "learning_rate": 0.00018435200494936585, "loss": 1.4486, "step": 9385 }, { "epoch": 0.3615014436958614, "grad_norm": 1.3305730819702148, "learning_rate": 0.00018433575947050972, "loss": 1.2003, "step": 9390 }, { "epoch": 0.36169393647738207, "grad_norm": 1.5913615226745605, "learning_rate": 0.00018431950627970708, "loss": 1.2722, "step": 9395 }, { "epoch": 0.3618864292589028, "grad_norm": 0.9965779781341553, "learning_rate": 0.00018430324537844415, "loss": 1.0604, "step": 9400 }, { "epoch": 0.36207892204042347, "grad_norm": 1.7614198923110962, "learning_rate": 0.00018428697676820788, "loss": 1.2734, "step": 9405 }, { "epoch": 0.3622714148219442, "grad_norm": 1.190706491470337, "learning_rate": 0.00018427070045048594, "loss": 1.2309, "step": 9410 }, { "epoch": 0.36246390760346486, "grad_norm": 1.1487165689468384, "learning_rate": 0.00018425441642676667, "loss": 1.2049, "step": 9415 }, { "epoch": 0.3626564003849856, "grad_norm": 1.0437067747116089, "learning_rate": 0.00018423812469853918, "loss": 1.3632, "step": 9420 }, { "epoch": 0.36284889316650626, "grad_norm": 1.7774686813354492, "learning_rate": 0.00018422182526729318, "loss": 1.1797, "step": 9425 }, { "epoch": 0.36304138594802693, "grad_norm": 1.3748910427093506, "learning_rate": 0.0001842055181345192, "loss": 1.4438, "step": 9430 }, { "epoch": 0.36323387872954765, "grad_norm": 0.891248881816864, "learning_rate": 0.00018418920330170842, "loss": 1.3017, "step": 9435 }, { "epoch": 0.3634263715110683, "grad_norm": 1.5410393476486206, "learning_rate": 0.00018417288077035267, "loss": 1.2239, "step": 9440 }, { "epoch": 0.36361886429258905, "grad_norm": 1.3638213872909546, "learning_rate": 0.00018415655054194457, "loss": 1.2245, "step": 9445 }, { "epoch": 0.3638113570741097, "grad_norm": 1.84505033493042, "learning_rate": 0.00018414021261797743, "loss": 1.1362, "step": 9450 }, { "epoch": 0.3640038498556304, "grad_norm": 1.5999794006347656, "learning_rate": 0.00018412386699994518, "loss": 1.1647, "step": 9455 }, { "epoch": 0.3641963426371511, "grad_norm": 1.55308997631073, "learning_rate": 0.0001841075136893426, "loss": 1.2612, "step": 9460 }, { "epoch": 0.3643888354186718, "grad_norm": 1.3549528121948242, "learning_rate": 0.00018409115268766505, "loss": 1.2095, "step": 9465 }, { "epoch": 0.3645813282001925, "grad_norm": 1.123184323310852, "learning_rate": 0.00018407478399640862, "loss": 1.3047, "step": 9470 }, { "epoch": 0.3647738209817132, "grad_norm": 1.3776748180389404, "learning_rate": 0.00018405840761707016, "loss": 1.1064, "step": 9475 }, { "epoch": 0.36496631376323385, "grad_norm": 1.3778200149536133, "learning_rate": 0.00018404202355114718, "loss": 1.0956, "step": 9480 }, { "epoch": 0.3651588065447546, "grad_norm": 0.9069898128509521, "learning_rate": 0.00018402563180013783, "loss": 1.141, "step": 9485 }, { "epoch": 0.36535129932627525, "grad_norm": 1.3908804655075073, "learning_rate": 0.0001840092323655411, "loss": 1.2679, "step": 9490 }, { "epoch": 0.365543792107796, "grad_norm": 1.3785732984542847, "learning_rate": 0.00018399282524885654, "loss": 1.22, "step": 9495 }, { "epoch": 0.36573628488931664, "grad_norm": 1.1326193809509277, "learning_rate": 0.00018397641045158453, "loss": 1.2289, "step": 9500 }, { "epoch": 0.36592877767083737, "grad_norm": 1.2267814874649048, "learning_rate": 0.0001839599879752261, "loss": 1.1337, "step": 9505 }, { "epoch": 0.36612127045235804, "grad_norm": 0.8690314888954163, "learning_rate": 0.00018394355782128295, "loss": 1.2535, "step": 9510 }, { "epoch": 0.3663137632338787, "grad_norm": 1.448415994644165, "learning_rate": 0.00018392711999125748, "loss": 1.1405, "step": 9515 }, { "epoch": 0.36650625601539943, "grad_norm": 1.8989317417144775, "learning_rate": 0.00018391067448665288, "loss": 1.091, "step": 9520 }, { "epoch": 0.3666987487969201, "grad_norm": 1.2263299226760864, "learning_rate": 0.00018389422130897295, "loss": 1.1925, "step": 9525 }, { "epoch": 0.36689124157844083, "grad_norm": 0.8818153142929077, "learning_rate": 0.00018387776045972225, "loss": 1.2961, "step": 9530 }, { "epoch": 0.3670837343599615, "grad_norm": 1.0975017547607422, "learning_rate": 0.00018386129194040597, "loss": 1.414, "step": 9535 }, { "epoch": 0.36727622714148217, "grad_norm": 2.2097692489624023, "learning_rate": 0.00018384481575253004, "loss": 1.1941, "step": 9540 }, { "epoch": 0.3674687199230029, "grad_norm": 1.2249376773834229, "learning_rate": 0.0001838283318976012, "loss": 1.4472, "step": 9545 }, { "epoch": 0.36766121270452357, "grad_norm": 1.0000889301300049, "learning_rate": 0.0001838118403771267, "loss": 1.2399, "step": 9550 }, { "epoch": 0.3678537054860443, "grad_norm": 1.0249544382095337, "learning_rate": 0.00018379534119261458, "loss": 1.3182, "step": 9555 }, { "epoch": 0.36804619826756496, "grad_norm": 1.2347283363342285, "learning_rate": 0.00018377883434557362, "loss": 1.1313, "step": 9560 }, { "epoch": 0.3682386910490857, "grad_norm": 1.1021714210510254, "learning_rate": 0.0001837623198375132, "loss": 1.2381, "step": 9565 }, { "epoch": 0.36843118383060636, "grad_norm": 1.0923985242843628, "learning_rate": 0.00018374579766994355, "loss": 1.3386, "step": 9570 }, { "epoch": 0.368623676612127, "grad_norm": 1.7709978818893433, "learning_rate": 0.00018372926784437547, "loss": 1.2405, "step": 9575 }, { "epoch": 0.36881616939364775, "grad_norm": 1.316901683807373, "learning_rate": 0.00018371273036232047, "loss": 1.1244, "step": 9580 }, { "epoch": 0.3690086621751684, "grad_norm": 1.7281345129013062, "learning_rate": 0.00018369618522529085, "loss": 1.2979, "step": 9585 }, { "epoch": 0.36920115495668915, "grad_norm": 1.6363762617111206, "learning_rate": 0.00018367963243479953, "loss": 1.1528, "step": 9590 }, { "epoch": 0.3693936477382098, "grad_norm": 1.7078179121017456, "learning_rate": 0.00018366307199236013, "loss": 1.2833, "step": 9595 }, { "epoch": 0.3695861405197305, "grad_norm": 1.9110232591629028, "learning_rate": 0.000183646503899487, "loss": 1.4191, "step": 9600 }, { "epoch": 0.3697786333012512, "grad_norm": 0.952301025390625, "learning_rate": 0.00018362992815769525, "loss": 1.1504, "step": 9605 }, { "epoch": 0.3699711260827719, "grad_norm": 0.9142165780067444, "learning_rate": 0.0001836133447685005, "loss": 1.2617, "step": 9610 }, { "epoch": 0.3701636188642926, "grad_norm": 1.5571134090423584, "learning_rate": 0.0001835967537334193, "loss": 1.3054, "step": 9615 }, { "epoch": 0.3703561116458133, "grad_norm": 1.799795389175415, "learning_rate": 0.00018358015505396877, "loss": 1.0603, "step": 9620 }, { "epoch": 0.37054860442733395, "grad_norm": 1.6660315990447998, "learning_rate": 0.0001835635487316667, "loss": 1.1757, "step": 9625 }, { "epoch": 0.3707410972088547, "grad_norm": 0.9840423464775085, "learning_rate": 0.00018354693476803168, "loss": 0.9815, "step": 9630 }, { "epoch": 0.37093358999037535, "grad_norm": 2.0538954734802246, "learning_rate": 0.00018353031316458286, "loss": 1.2396, "step": 9635 }, { "epoch": 0.37112608277189607, "grad_norm": 1.2079198360443115, "learning_rate": 0.0001835136839228403, "loss": 1.2731, "step": 9640 }, { "epoch": 0.37131857555341674, "grad_norm": 1.7076921463012695, "learning_rate": 0.00018349704704432457, "loss": 1.1388, "step": 9645 }, { "epoch": 0.37151106833493747, "grad_norm": 1.0324435234069824, "learning_rate": 0.00018348040253055698, "loss": 0.9949, "step": 9650 }, { "epoch": 0.37170356111645814, "grad_norm": 1.3635584115982056, "learning_rate": 0.0001834637503830596, "loss": 1.307, "step": 9655 }, { "epoch": 0.3718960538979788, "grad_norm": 1.6683429479599, "learning_rate": 0.00018344709060335513, "loss": 1.1687, "step": 9660 }, { "epoch": 0.37208854667949953, "grad_norm": 2.3687121868133545, "learning_rate": 0.00018343042319296702, "loss": 1.4163, "step": 9665 }, { "epoch": 0.3722810394610202, "grad_norm": 1.9078242778778076, "learning_rate": 0.00018341374815341937, "loss": 1.2986, "step": 9670 }, { "epoch": 0.37247353224254093, "grad_norm": 1.6381220817565918, "learning_rate": 0.00018339706548623706, "loss": 1.5092, "step": 9675 }, { "epoch": 0.3726660250240616, "grad_norm": 1.3529161214828491, "learning_rate": 0.00018338037519294553, "loss": 1.2296, "step": 9680 }, { "epoch": 0.37285851780558227, "grad_norm": 1.1034053564071655, "learning_rate": 0.00018336367727507104, "loss": 1.2774, "step": 9685 }, { "epoch": 0.373051010587103, "grad_norm": 2.0935397148132324, "learning_rate": 0.0001833469717341405, "loss": 1.2247, "step": 9690 }, { "epoch": 0.37324350336862366, "grad_norm": 1.6294866800308228, "learning_rate": 0.0001833302585716815, "loss": 1.3766, "step": 9695 }, { "epoch": 0.3734359961501444, "grad_norm": 1.6927978992462158, "learning_rate": 0.0001833135377892224, "loss": 1.3069, "step": 9700 }, { "epoch": 0.37362848893166506, "grad_norm": 0.8497247695922852, "learning_rate": 0.00018329680938829212, "loss": 1.0906, "step": 9705 }, { "epoch": 0.37382098171318573, "grad_norm": 1.9347554445266724, "learning_rate": 0.00018328007337042046, "loss": 1.277, "step": 9710 }, { "epoch": 0.37401347449470645, "grad_norm": 1.023130178451538, "learning_rate": 0.00018326332973713776, "loss": 1.254, "step": 9715 }, { "epoch": 0.3742059672762271, "grad_norm": 1.7206385135650635, "learning_rate": 0.0001832465784899751, "loss": 1.2141, "step": 9720 }, { "epoch": 0.37439846005774785, "grad_norm": 1.2445294857025146, "learning_rate": 0.00018322981963046433, "loss": 1.3817, "step": 9725 }, { "epoch": 0.3745909528392685, "grad_norm": 1.832334280014038, "learning_rate": 0.00018321305316013788, "loss": 1.3584, "step": 9730 }, { "epoch": 0.37478344562078925, "grad_norm": 1.2087010145187378, "learning_rate": 0.00018319627908052898, "loss": 1.116, "step": 9735 }, { "epoch": 0.3749759384023099, "grad_norm": 1.286687970161438, "learning_rate": 0.00018317949739317147, "loss": 1.1913, "step": 9740 }, { "epoch": 0.3751684311838306, "grad_norm": 1.44833242893219, "learning_rate": 0.00018316270809959993, "loss": 1.2713, "step": 9745 }, { "epoch": 0.3753609239653513, "grad_norm": 1.1395667791366577, "learning_rate": 0.00018314591120134963, "loss": 1.2912, "step": 9750 }, { "epoch": 0.375553416746872, "grad_norm": 1.1399837732315063, "learning_rate": 0.00018312910669995654, "loss": 1.2804, "step": 9755 }, { "epoch": 0.3757459095283927, "grad_norm": 1.814249038696289, "learning_rate": 0.00018311229459695735, "loss": 1.1062, "step": 9760 }, { "epoch": 0.3759384023099134, "grad_norm": 1.4851144552230835, "learning_rate": 0.00018309547489388933, "loss": 1.2826, "step": 9765 }, { "epoch": 0.37613089509143405, "grad_norm": 0.9308827519416809, "learning_rate": 0.00018307864759229065, "loss": 1.3706, "step": 9770 }, { "epoch": 0.3763233878729548, "grad_norm": 3.707566261291504, "learning_rate": 0.00018306181269369998, "loss": 1.2292, "step": 9775 }, { "epoch": 0.37651588065447544, "grad_norm": 2.6666324138641357, "learning_rate": 0.00018304497019965677, "loss": 1.4645, "step": 9780 }, { "epoch": 0.37670837343599617, "grad_norm": 1.5997512340545654, "learning_rate": 0.00018302812011170114, "loss": 1.2812, "step": 9785 }, { "epoch": 0.37690086621751684, "grad_norm": 0.8998873233795166, "learning_rate": 0.00018301126243137395, "loss": 1.195, "step": 9790 }, { "epoch": 0.3770933589990375, "grad_norm": 1.407524585723877, "learning_rate": 0.0001829943971602167, "loss": 1.1793, "step": 9795 }, { "epoch": 0.37728585178055823, "grad_norm": 1.1469497680664062, "learning_rate": 0.00018297752429977164, "loss": 1.3624, "step": 9800 }, { "epoch": 0.3774783445620789, "grad_norm": 1.4583423137664795, "learning_rate": 0.00018296064385158164, "loss": 1.2033, "step": 9805 }, { "epoch": 0.37767083734359963, "grad_norm": 1.0782575607299805, "learning_rate": 0.00018294375581719036, "loss": 1.1823, "step": 9810 }, { "epoch": 0.3778633301251203, "grad_norm": 1.1890922784805298, "learning_rate": 0.00018292686019814202, "loss": 1.2711, "step": 9815 }, { "epoch": 0.378055822906641, "grad_norm": 0.854491651058197, "learning_rate": 0.00018290995699598165, "loss": 1.1953, "step": 9820 }, { "epoch": 0.3782483156881617, "grad_norm": 1.2184374332427979, "learning_rate": 0.00018289304621225497, "loss": 1.2052, "step": 9825 }, { "epoch": 0.37844080846968237, "grad_norm": 1.1952948570251465, "learning_rate": 0.0001828761278485083, "loss": 1.2516, "step": 9830 }, { "epoch": 0.3786333012512031, "grad_norm": 2.1117265224456787, "learning_rate": 0.00018285920190628879, "loss": 1.2834, "step": 9835 }, { "epoch": 0.37882579403272376, "grad_norm": 1.1815403699874878, "learning_rate": 0.00018284226838714412, "loss": 1.0574, "step": 9840 }, { "epoch": 0.3790182868142445, "grad_norm": 1.3763145208358765, "learning_rate": 0.00018282532729262278, "loss": 1.2813, "step": 9845 }, { "epoch": 0.37921077959576516, "grad_norm": 1.5308822393417358, "learning_rate": 0.00018280837862427393, "loss": 1.2118, "step": 9850 }, { "epoch": 0.3794032723772858, "grad_norm": 1.1991111040115356, "learning_rate": 0.00018279142238364745, "loss": 1.0999, "step": 9855 }, { "epoch": 0.37959576515880655, "grad_norm": 1.7062435150146484, "learning_rate": 0.0001827744585722938, "loss": 1.2103, "step": 9860 }, { "epoch": 0.3797882579403272, "grad_norm": 1.5572453737258911, "learning_rate": 0.00018275748719176425, "loss": 1.112, "step": 9865 }, { "epoch": 0.37998075072184795, "grad_norm": 0.9328321218490601, "learning_rate": 0.00018274050824361072, "loss": 1.2688, "step": 9870 }, { "epoch": 0.3801732435033686, "grad_norm": 1.290634036064148, "learning_rate": 0.0001827235217293858, "loss": 1.1486, "step": 9875 }, { "epoch": 0.38036573628488934, "grad_norm": 1.7471963167190552, "learning_rate": 0.00018270652765064283, "loss": 1.2584, "step": 9880 }, { "epoch": 0.38055822906641, "grad_norm": 1.4827409982681274, "learning_rate": 0.00018268952600893577, "loss": 1.3655, "step": 9885 }, { "epoch": 0.3807507218479307, "grad_norm": 1.0229063034057617, "learning_rate": 0.00018267251680581935, "loss": 1.1955, "step": 9890 }, { "epoch": 0.3809432146294514, "grad_norm": 1.3075898885726929, "learning_rate": 0.0001826555000428489, "loss": 0.9779, "step": 9895 }, { "epoch": 0.3811357074109721, "grad_norm": 1.5942119359970093, "learning_rate": 0.00018263847572158053, "loss": 1.2556, "step": 9900 }, { "epoch": 0.3813282001924928, "grad_norm": 0.9223330616950989, "learning_rate": 0.00018262144384357097, "loss": 1.1109, "step": 9905 }, { "epoch": 0.3815206929740135, "grad_norm": 1.7757457494735718, "learning_rate": 0.00018260440441037766, "loss": 1.2219, "step": 9910 }, { "epoch": 0.38171318575553415, "grad_norm": 1.4870551824569702, "learning_rate": 0.00018258735742355883, "loss": 1.3312, "step": 9915 }, { "epoch": 0.38190567853705487, "grad_norm": 1.2982031106948853, "learning_rate": 0.00018257030288467322, "loss": 1.2421, "step": 9920 }, { "epoch": 0.38209817131857554, "grad_norm": 1.016822338104248, "learning_rate": 0.0001825532407952804, "loss": 1.3542, "step": 9925 }, { "epoch": 0.38229066410009627, "grad_norm": 1.0763219594955444, "learning_rate": 0.00018253617115694058, "loss": 1.2579, "step": 9930 }, { "epoch": 0.38248315688161694, "grad_norm": 1.7673341035842896, "learning_rate": 0.00018251909397121464, "loss": 1.1875, "step": 9935 }, { "epoch": 0.3826756496631376, "grad_norm": 1.3719041347503662, "learning_rate": 0.00018250200923966423, "loss": 1.1493, "step": 9940 }, { "epoch": 0.38286814244465833, "grad_norm": 1.8589760065078735, "learning_rate": 0.00018248491696385157, "loss": 1.2751, "step": 9945 }, { "epoch": 0.383060635226179, "grad_norm": 1.6069539785385132, "learning_rate": 0.0001824678171453397, "loss": 1.415, "step": 9950 }, { "epoch": 0.38325312800769973, "grad_norm": 1.7131226062774658, "learning_rate": 0.0001824507097856922, "loss": 1.1773, "step": 9955 }, { "epoch": 0.3834456207892204, "grad_norm": 0.7622759342193604, "learning_rate": 0.0001824335948864735, "loss": 1.1588, "step": 9960 }, { "epoch": 0.3836381135707411, "grad_norm": 1.6202800273895264, "learning_rate": 0.0001824164724492486, "loss": 1.3064, "step": 9965 }, { "epoch": 0.3838306063522618, "grad_norm": 1.5452194213867188, "learning_rate": 0.0001823993424755833, "loss": 1.2993, "step": 9970 }, { "epoch": 0.38402309913378246, "grad_norm": 1.013929009437561, "learning_rate": 0.00018238220496704396, "loss": 1.3123, "step": 9975 }, { "epoch": 0.3842155919153032, "grad_norm": 0.9624648094177246, "learning_rate": 0.0001823650599251977, "loss": 1.0517, "step": 9980 }, { "epoch": 0.38440808469682386, "grad_norm": 1.2065962553024292, "learning_rate": 0.00018234790735161232, "loss": 1.1954, "step": 9985 }, { "epoch": 0.3846005774783446, "grad_norm": 1.425376057624817, "learning_rate": 0.00018233074724785634, "loss": 1.069, "step": 9990 }, { "epoch": 0.38479307025986526, "grad_norm": 1.0355112552642822, "learning_rate": 0.00018231357961549888, "loss": 1.0839, "step": 9995 }, { "epoch": 0.3849855630413859, "grad_norm": 1.7273633480072021, "learning_rate": 0.00018229640445610988, "loss": 1.1324, "step": 10000 }, { "epoch": 0.38517805582290665, "grad_norm": 1.413021445274353, "learning_rate": 0.00018227922177125984, "loss": 1.0402, "step": 10005 }, { "epoch": 0.3853705486044273, "grad_norm": 1.125299334526062, "learning_rate": 0.00018226203156252005, "loss": 1.271, "step": 10010 }, { "epoch": 0.38556304138594805, "grad_norm": 1.2611075639724731, "learning_rate": 0.00018224483383146237, "loss": 1.2228, "step": 10015 }, { "epoch": 0.3857555341674687, "grad_norm": 1.0332306623458862, "learning_rate": 0.00018222762857965944, "loss": 1.2059, "step": 10020 }, { "epoch": 0.3859480269489894, "grad_norm": 1.965288758277893, "learning_rate": 0.00018221041580868464, "loss": 1.217, "step": 10025 }, { "epoch": 0.3861405197305101, "grad_norm": 0.8059799075126648, "learning_rate": 0.00018219319552011186, "loss": 1.2039, "step": 10030 }, { "epoch": 0.3863330125120308, "grad_norm": 1.4955195188522339, "learning_rate": 0.00018217596771551584, "loss": 1.2206, "step": 10035 }, { "epoch": 0.3865255052935515, "grad_norm": 0.987479567527771, "learning_rate": 0.00018215873239647197, "loss": 1.3134, "step": 10040 }, { "epoch": 0.3867179980750722, "grad_norm": 1.7247464656829834, "learning_rate": 0.00018214148956455627, "loss": 1.1786, "step": 10045 }, { "epoch": 0.3869104908565929, "grad_norm": 0.9822973608970642, "learning_rate": 0.00018212423922134546, "loss": 1.0866, "step": 10050 }, { "epoch": 0.3871029836381136, "grad_norm": 1.1217613220214844, "learning_rate": 0.000182106981368417, "loss": 1.3292, "step": 10055 }, { "epoch": 0.38729547641963424, "grad_norm": 1.2722941637039185, "learning_rate": 0.000182089716007349, "loss": 1.0294, "step": 10060 }, { "epoch": 0.38748796920115497, "grad_norm": 1.6616365909576416, "learning_rate": 0.00018207244313972026, "loss": 1.3691, "step": 10065 }, { "epoch": 0.38768046198267564, "grad_norm": 4.093936443328857, "learning_rate": 0.0001820551627671103, "loss": 1.1916, "step": 10070 }, { "epoch": 0.38787295476419636, "grad_norm": 1.9061866998672485, "learning_rate": 0.00018203787489109926, "loss": 1.3733, "step": 10075 }, { "epoch": 0.38806544754571703, "grad_norm": 1.6439005136489868, "learning_rate": 0.00018202057951326804, "loss": 1.3533, "step": 10080 }, { "epoch": 0.3882579403272377, "grad_norm": 1.535980224609375, "learning_rate": 0.0001820032766351981, "loss": 1.3916, "step": 10085 }, { "epoch": 0.38845043310875843, "grad_norm": 1.6342761516571045, "learning_rate": 0.00018198596625847177, "loss": 1.335, "step": 10090 }, { "epoch": 0.3886429258902791, "grad_norm": 2.2760815620422363, "learning_rate": 0.00018196864838467192, "loss": 1.0399, "step": 10095 }, { "epoch": 0.3888354186717998, "grad_norm": 1.173302412033081, "learning_rate": 0.0001819513230153822, "loss": 1.3414, "step": 10100 }, { "epoch": 0.3890279114533205, "grad_norm": 1.7409497499465942, "learning_rate": 0.00018193399015218684, "loss": 1.3377, "step": 10105 }, { "epoch": 0.38922040423484117, "grad_norm": 1.3547555208206177, "learning_rate": 0.00018191664979667085, "loss": 1.2576, "step": 10110 }, { "epoch": 0.3894128970163619, "grad_norm": 2.2421867847442627, "learning_rate": 0.0001818993019504199, "loss": 1.1624, "step": 10115 }, { "epoch": 0.38960538979788256, "grad_norm": 1.5812993049621582, "learning_rate": 0.00018188194661502029, "loss": 1.2319, "step": 10120 }, { "epoch": 0.3897978825794033, "grad_norm": 1.8024287223815918, "learning_rate": 0.00018186458379205908, "loss": 1.4016, "step": 10125 }, { "epoch": 0.38999037536092396, "grad_norm": 0.9069392681121826, "learning_rate": 0.000181847213483124, "loss": 1.1683, "step": 10130 }, { "epoch": 0.3901828681424447, "grad_norm": 1.6808935403823853, "learning_rate": 0.00018182983568980346, "loss": 1.3519, "step": 10135 }, { "epoch": 0.39037536092396535, "grad_norm": 2.584958553314209, "learning_rate": 0.0001818124504136865, "loss": 1.3804, "step": 10140 }, { "epoch": 0.390567853705486, "grad_norm": 1.4569361209869385, "learning_rate": 0.00018179505765636287, "loss": 1.2862, "step": 10145 }, { "epoch": 0.39076034648700675, "grad_norm": 2.0809457302093506, "learning_rate": 0.0001817776574194231, "loss": 1.1108, "step": 10150 }, { "epoch": 0.3909528392685274, "grad_norm": 1.7902493476867676, "learning_rate": 0.00018176024970445828, "loss": 1.0611, "step": 10155 }, { "epoch": 0.39114533205004814, "grad_norm": 0.9953207969665527, "learning_rate": 0.00018174283451306025, "loss": 1.1883, "step": 10160 }, { "epoch": 0.3913378248315688, "grad_norm": 1.0629642009735107, "learning_rate": 0.00018172541184682147, "loss": 1.3, "step": 10165 }, { "epoch": 0.3915303176130895, "grad_norm": 1.546132206916809, "learning_rate": 0.0001817079817073352, "loss": 1.2446, "step": 10170 }, { "epoch": 0.3917228103946102, "grad_norm": 1.379883050918579, "learning_rate": 0.0001816905440961952, "loss": 1.2964, "step": 10175 }, { "epoch": 0.3919153031761309, "grad_norm": 1.132592797279358, "learning_rate": 0.00018167309901499613, "loss": 1.3951, "step": 10180 }, { "epoch": 0.3921077959576516, "grad_norm": 1.4765934944152832, "learning_rate": 0.00018165564646533322, "loss": 1.2278, "step": 10185 }, { "epoch": 0.3923002887391723, "grad_norm": 1.5826079845428467, "learning_rate": 0.00018163818644880233, "loss": 1.2615, "step": 10190 }, { "epoch": 0.392492781520693, "grad_norm": 1.5647984743118286, "learning_rate": 0.00018162071896700007, "loss": 1.4696, "step": 10195 }, { "epoch": 0.39268527430221367, "grad_norm": 1.0377607345581055, "learning_rate": 0.0001816032440215238, "loss": 1.1309, "step": 10200 }, { "epoch": 0.39287776708373434, "grad_norm": 1.1878221035003662, "learning_rate": 0.0001815857616139714, "loss": 1.1442, "step": 10205 }, { "epoch": 0.39307025986525507, "grad_norm": 1.5119047164916992, "learning_rate": 0.00018156827174594157, "loss": 1.2436, "step": 10210 }, { "epoch": 0.39326275264677574, "grad_norm": 1.6624690294265747, "learning_rate": 0.00018155077441903364, "loss": 1.1726, "step": 10215 }, { "epoch": 0.39345524542829646, "grad_norm": 1.2995012998580933, "learning_rate": 0.0001815332696348476, "loss": 1.3053, "step": 10220 }, { "epoch": 0.39364773820981713, "grad_norm": 1.3727355003356934, "learning_rate": 0.00018151575739498417, "loss": 1.4224, "step": 10225 }, { "epoch": 0.3938402309913378, "grad_norm": 1.1980619430541992, "learning_rate": 0.0001814982377010447, "loss": 1.0973, "step": 10230 }, { "epoch": 0.39403272377285853, "grad_norm": 1.4235668182373047, "learning_rate": 0.00018148071055463128, "loss": 1.1659, "step": 10235 }, { "epoch": 0.3942252165543792, "grad_norm": 1.1501004695892334, "learning_rate": 0.00018146317595734663, "loss": 1.2738, "step": 10240 }, { "epoch": 0.3944177093358999, "grad_norm": 1.1686300039291382, "learning_rate": 0.00018144563391079419, "loss": 1.1691, "step": 10245 }, { "epoch": 0.3946102021174206, "grad_norm": 1.3350188732147217, "learning_rate": 0.00018142808441657806, "loss": 1.2344, "step": 10250 }, { "epoch": 0.39480269489894126, "grad_norm": 1.0583946704864502, "learning_rate": 0.00018141052747630302, "loss": 1.1358, "step": 10255 }, { "epoch": 0.394995187680462, "grad_norm": 1.0637165307998657, "learning_rate": 0.00018139296309157454, "loss": 1.2589, "step": 10260 }, { "epoch": 0.39518768046198266, "grad_norm": 1.971304178237915, "learning_rate": 0.00018137539126399874, "loss": 1.1413, "step": 10265 }, { "epoch": 0.3953801732435034, "grad_norm": 1.1685267686843872, "learning_rate": 0.0001813578119951825, "loss": 1.1702, "step": 10270 }, { "epoch": 0.39557266602502406, "grad_norm": 1.620936393737793, "learning_rate": 0.0001813402252867333, "loss": 1.2636, "step": 10275 }, { "epoch": 0.3957651588065448, "grad_norm": 1.553240180015564, "learning_rate": 0.00018132263114025934, "loss": 1.4167, "step": 10280 }, { "epoch": 0.39595765158806545, "grad_norm": 1.260498285293579, "learning_rate": 0.00018130502955736942, "loss": 1.2984, "step": 10285 }, { "epoch": 0.3961501443695861, "grad_norm": 1.7073127031326294, "learning_rate": 0.0001812874205396732, "loss": 1.29, "step": 10290 }, { "epoch": 0.39634263715110685, "grad_norm": 0.900610625743866, "learning_rate": 0.00018126980408878082, "loss": 1.0423, "step": 10295 }, { "epoch": 0.3965351299326275, "grad_norm": 1.359563946723938, "learning_rate": 0.00018125218020630324, "loss": 1.1576, "step": 10300 }, { "epoch": 0.39672762271414824, "grad_norm": 0.9399506449699402, "learning_rate": 0.000181234548893852, "loss": 1.1481, "step": 10305 }, { "epoch": 0.3969201154956689, "grad_norm": 1.4632538557052612, "learning_rate": 0.00018121691015303944, "loss": 1.1404, "step": 10310 }, { "epoch": 0.3971126082771896, "grad_norm": 1.644718050956726, "learning_rate": 0.00018119926398547839, "loss": 1.1783, "step": 10315 }, { "epoch": 0.3973051010587103, "grad_norm": 1.299018144607544, "learning_rate": 0.00018118161039278258, "loss": 1.2076, "step": 10320 }, { "epoch": 0.397497593840231, "grad_norm": 1.5833697319030762, "learning_rate": 0.00018116394937656632, "loss": 1.0825, "step": 10325 }, { "epoch": 0.3976900866217517, "grad_norm": 1.4813597202301025, "learning_rate": 0.0001811462809384445, "loss": 1.263, "step": 10330 }, { "epoch": 0.3978825794032724, "grad_norm": 1.8714033365249634, "learning_rate": 0.00018112860508003284, "loss": 1.2425, "step": 10335 }, { "epoch": 0.39807507218479304, "grad_norm": 1.5847947597503662, "learning_rate": 0.0001811109218029477, "loss": 1.0863, "step": 10340 }, { "epoch": 0.39826756496631377, "grad_norm": 1.339046597480774, "learning_rate": 0.00018109323110880604, "loss": 1.3871, "step": 10345 }, { "epoch": 0.39846005774783444, "grad_norm": 2.370396375656128, "learning_rate": 0.0001810755329992256, "loss": 1.2629, "step": 10350 }, { "epoch": 0.39865255052935517, "grad_norm": 1.2930303812026978, "learning_rate": 0.00018105782747582474, "loss": 1.1281, "step": 10355 }, { "epoch": 0.39884504331087584, "grad_norm": 1.2590947151184082, "learning_rate": 0.0001810401145402225, "loss": 1.2229, "step": 10360 }, { "epoch": 0.39903753609239656, "grad_norm": 0.8280492424964905, "learning_rate": 0.00018102239419403866, "loss": 1.2601, "step": 10365 }, { "epoch": 0.39923002887391723, "grad_norm": 1.6567853689193726, "learning_rate": 0.0001810046664388936, "loss": 1.1296, "step": 10370 }, { "epoch": 0.3994225216554379, "grad_norm": 1.2103195190429688, "learning_rate": 0.00018098693127640834, "loss": 1.1524, "step": 10375 }, { "epoch": 0.3996150144369586, "grad_norm": 1.4716650247573853, "learning_rate": 0.00018096918870820475, "loss": 1.1805, "step": 10380 }, { "epoch": 0.3998075072184793, "grad_norm": 1.291873574256897, "learning_rate": 0.00018095143873590524, "loss": 1.2877, "step": 10385 }, { "epoch": 0.4, "grad_norm": 0.8508723974227905, "learning_rate": 0.0001809336813611329, "loss": 0.9215, "step": 10390 }, { "epoch": 0.4001924927815207, "grad_norm": 1.1256935596466064, "learning_rate": 0.00018091591658551154, "loss": 1.3286, "step": 10395 }, { "epoch": 0.40038498556304136, "grad_norm": 1.1910960674285889, "learning_rate": 0.0001808981444106656, "loss": 1.1078, "step": 10400 }, { "epoch": 0.4005774783445621, "grad_norm": 2.188884735107422, "learning_rate": 0.00018088036483822028, "loss": 1.2762, "step": 10405 }, { "epoch": 0.40076997112608276, "grad_norm": 0.9240724444389343, "learning_rate": 0.00018086257786980136, "loss": 1.1288, "step": 10410 }, { "epoch": 0.4009624639076035, "grad_norm": 1.961204171180725, "learning_rate": 0.00018084478350703537, "loss": 1.1863, "step": 10415 }, { "epoch": 0.40115495668912415, "grad_norm": 1.5713763236999512, "learning_rate": 0.00018082698175154947, "loss": 1.2157, "step": 10420 }, { "epoch": 0.4013474494706448, "grad_norm": 2.006776809692383, "learning_rate": 0.00018080917260497153, "loss": 1.1671, "step": 10425 }, { "epoch": 0.40153994225216555, "grad_norm": 1.511513352394104, "learning_rate": 0.00018079135606893006, "loss": 1.2428, "step": 10430 }, { "epoch": 0.4017324350336862, "grad_norm": 1.5270637273788452, "learning_rate": 0.00018077353214505427, "loss": 1.2887, "step": 10435 }, { "epoch": 0.40192492781520694, "grad_norm": 1.470389723777771, "learning_rate": 0.00018075570083497407, "loss": 1.2739, "step": 10440 }, { "epoch": 0.4021174205967276, "grad_norm": 1.224330186843872, "learning_rate": 0.00018073786214031992, "loss": 1.0882, "step": 10445 }, { "epoch": 0.40230991337824834, "grad_norm": 2.0693979263305664, "learning_rate": 0.00018072001606272316, "loss": 1.5091, "step": 10450 }, { "epoch": 0.402502406159769, "grad_norm": 1.418346643447876, "learning_rate": 0.00018070216260381567, "loss": 1.2886, "step": 10455 }, { "epoch": 0.4026948989412897, "grad_norm": 1.8632601499557495, "learning_rate": 0.00018068430176522998, "loss": 1.1809, "step": 10460 }, { "epoch": 0.4028873917228104, "grad_norm": 1.6064730882644653, "learning_rate": 0.00018066643354859937, "loss": 1.2394, "step": 10465 }, { "epoch": 0.4030798845043311, "grad_norm": 1.2319833040237427, "learning_rate": 0.0001806485579555578, "loss": 1.1979, "step": 10470 }, { "epoch": 0.4032723772858518, "grad_norm": 1.5506865978240967, "learning_rate": 0.00018063067498773987, "loss": 1.1899, "step": 10475 }, { "epoch": 0.40346487006737247, "grad_norm": 1.360120415687561, "learning_rate": 0.00018061278464678082, "loss": 1.0995, "step": 10480 }, { "epoch": 0.40365736284889314, "grad_norm": 1.133346438407898, "learning_rate": 0.00018059488693431664, "loss": 1.1972, "step": 10485 }, { "epoch": 0.40384985563041387, "grad_norm": 1.6961482763290405, "learning_rate": 0.00018057698185198394, "loss": 1.0823, "step": 10490 }, { "epoch": 0.40404234841193454, "grad_norm": 1.0126832723617554, "learning_rate": 0.00018055906940142, "loss": 1.3294, "step": 10495 }, { "epoch": 0.40423484119345526, "grad_norm": 1.378825068473816, "learning_rate": 0.00018054114958426283, "loss": 1.3188, "step": 10500 }, { "epoch": 0.40442733397497593, "grad_norm": 1.1392402648925781, "learning_rate": 0.00018052322240215104, "loss": 1.2428, "step": 10505 }, { "epoch": 0.40461982675649666, "grad_norm": 1.0441240072250366, "learning_rate": 0.00018050528785672402, "loss": 1.2997, "step": 10510 }, { "epoch": 0.40481231953801733, "grad_norm": 1.3564190864562988, "learning_rate": 0.00018048734594962171, "loss": 1.3018, "step": 10515 }, { "epoch": 0.405004812319538, "grad_norm": 1.3429349660873413, "learning_rate": 0.0001804693966824848, "loss": 1.0567, "step": 10520 }, { "epoch": 0.4051973051010587, "grad_norm": 0.920313835144043, "learning_rate": 0.00018045144005695462, "loss": 1.1386, "step": 10525 }, { "epoch": 0.4053897978825794, "grad_norm": 2.402700662612915, "learning_rate": 0.00018043347607467317, "loss": 1.2837, "step": 10530 }, { "epoch": 0.4055822906641001, "grad_norm": 1.7154083251953125, "learning_rate": 0.00018041550473728318, "loss": 1.3188, "step": 10535 }, { "epoch": 0.4057747834456208, "grad_norm": 0.8770251274108887, "learning_rate": 0.000180397526046428, "loss": 1.1641, "step": 10540 }, { "epoch": 0.40596727622714146, "grad_norm": 0.9887571334838867, "learning_rate": 0.0001803795400037516, "loss": 1.0042, "step": 10545 }, { "epoch": 0.4061597690086622, "grad_norm": 2.665354013442993, "learning_rate": 0.00018036154661089877, "loss": 1.2579, "step": 10550 }, { "epoch": 0.40635226179018286, "grad_norm": 2.6088809967041016, "learning_rate": 0.00018034354586951486, "loss": 1.1098, "step": 10555 }, { "epoch": 0.4065447545717036, "grad_norm": 1.4641830921173096, "learning_rate": 0.00018032553778124586, "loss": 1.1108, "step": 10560 }, { "epoch": 0.40673724735322425, "grad_norm": 1.0744770765304565, "learning_rate": 0.00018030752234773854, "loss": 1.1234, "step": 10565 }, { "epoch": 0.4069297401347449, "grad_norm": 1.2617886066436768, "learning_rate": 0.00018028949957064034, "loss": 1.1753, "step": 10570 }, { "epoch": 0.40712223291626565, "grad_norm": 1.4641857147216797, "learning_rate": 0.00018027146945159923, "loss": 1.2671, "step": 10575 }, { "epoch": 0.4073147256977863, "grad_norm": 1.4347914457321167, "learning_rate": 0.00018025343199226402, "loss": 1.1348, "step": 10580 }, { "epoch": 0.40750721847930704, "grad_norm": 1.434019923210144, "learning_rate": 0.00018023538719428407, "loss": 1.2439, "step": 10585 }, { "epoch": 0.4076997112608277, "grad_norm": 1.1034338474273682, "learning_rate": 0.00018021733505930944, "loss": 1.0502, "step": 10590 }, { "epoch": 0.40789220404234844, "grad_norm": 1.591850996017456, "learning_rate": 0.00018019927558899097, "loss": 1.178, "step": 10595 }, { "epoch": 0.4080846968238691, "grad_norm": 1.672735333442688, "learning_rate": 0.00018018120878498, "loss": 1.2363, "step": 10600 }, { "epoch": 0.4082771896053898, "grad_norm": 1.8779442310333252, "learning_rate": 0.00018016313464892862, "loss": 1.2537, "step": 10605 }, { "epoch": 0.4084696823869105, "grad_norm": 1.075453281402588, "learning_rate": 0.00018014505318248963, "loss": 1.081, "step": 10610 }, { "epoch": 0.4086621751684312, "grad_norm": 1.350914478302002, "learning_rate": 0.0001801269643873164, "loss": 1.3958, "step": 10615 }, { "epoch": 0.4088546679499519, "grad_norm": 1.6566729545593262, "learning_rate": 0.0001801088682650631, "loss": 1.3208, "step": 10620 }, { "epoch": 0.40904716073147257, "grad_norm": 1.243171215057373, "learning_rate": 0.00018009076481738446, "loss": 1.17, "step": 10625 }, { "epoch": 0.40923965351299324, "grad_norm": 1.110456109046936, "learning_rate": 0.00018007265404593593, "loss": 1.1311, "step": 10630 }, { "epoch": 0.40943214629451397, "grad_norm": 2.485719919204712, "learning_rate": 0.00018005453595237362, "loss": 1.3703, "step": 10635 }, { "epoch": 0.40962463907603464, "grad_norm": 1.3115043640136719, "learning_rate": 0.00018003641053835435, "loss": 1.2551, "step": 10640 }, { "epoch": 0.40981713185755536, "grad_norm": 1.530535340309143, "learning_rate": 0.0001800182778055355, "loss": 1.3577, "step": 10645 }, { "epoch": 0.41000962463907603, "grad_norm": 1.2339287996292114, "learning_rate": 0.00018000013775557521, "loss": 1.2539, "step": 10650 }, { "epoch": 0.4102021174205967, "grad_norm": 1.579942226409912, "learning_rate": 0.00017998199039013225, "loss": 1.3568, "step": 10655 }, { "epoch": 0.4103946102021174, "grad_norm": 1.831764817237854, "learning_rate": 0.00017996383571086612, "loss": 1.3662, "step": 10660 }, { "epoch": 0.4105871029836381, "grad_norm": 2.1747963428497314, "learning_rate": 0.00017994567371943697, "loss": 1.1333, "step": 10665 }, { "epoch": 0.4107795957651588, "grad_norm": 1.5603039264678955, "learning_rate": 0.00017992750441750549, "loss": 1.2327, "step": 10670 }, { "epoch": 0.4109720885466795, "grad_norm": 1.7836112976074219, "learning_rate": 0.00017990932780673324, "loss": 1.0281, "step": 10675 }, { "epoch": 0.4111645813282002, "grad_norm": 1.5049426555633545, "learning_rate": 0.0001798911438887823, "loss": 1.3338, "step": 10680 }, { "epoch": 0.4113570741097209, "grad_norm": 1.6236990690231323, "learning_rate": 0.00017987295266531548, "loss": 1.3937, "step": 10685 }, { "epoch": 0.41154956689124156, "grad_norm": 1.2450697422027588, "learning_rate": 0.00017985475413799623, "loss": 1.3456, "step": 10690 }, { "epoch": 0.4117420596727623, "grad_norm": 1.031137228012085, "learning_rate": 0.00017983654830848873, "loss": 1.0254, "step": 10695 }, { "epoch": 0.41193455245428295, "grad_norm": 1.588884949684143, "learning_rate": 0.00017981833517845773, "loss": 1.0554, "step": 10700 }, { "epoch": 0.4121270452358037, "grad_norm": 1.2405824661254883, "learning_rate": 0.00017980011474956874, "loss": 1.4561, "step": 10705 }, { "epoch": 0.41231953801732435, "grad_norm": 2.03009295463562, "learning_rate": 0.00017978188702348792, "loss": 1.2479, "step": 10710 }, { "epoch": 0.412512030798845, "grad_norm": 0.9755954146385193, "learning_rate": 0.00017976365200188198, "loss": 1.1632, "step": 10715 }, { "epoch": 0.41270452358036575, "grad_norm": 1.3121798038482666, "learning_rate": 0.00017974540968641848, "loss": 1.2069, "step": 10720 }, { "epoch": 0.4128970163618864, "grad_norm": 1.880199909210205, "learning_rate": 0.00017972716007876556, "loss": 1.32, "step": 10725 }, { "epoch": 0.41308950914340714, "grad_norm": 2.1090636253356934, "learning_rate": 0.00017970890318059194, "loss": 1.2943, "step": 10730 }, { "epoch": 0.4132820019249278, "grad_norm": 1.2155611515045166, "learning_rate": 0.00017969063899356716, "loss": 1.3022, "step": 10735 }, { "epoch": 0.4134744947064485, "grad_norm": 1.191871166229248, "learning_rate": 0.00017967236751936135, "loss": 1.2699, "step": 10740 }, { "epoch": 0.4136669874879692, "grad_norm": 1.4702094793319702, "learning_rate": 0.00017965408875964534, "loss": 1.3936, "step": 10745 }, { "epoch": 0.4138594802694899, "grad_norm": 1.7658724784851074, "learning_rate": 0.00017963580271609052, "loss": 1.1633, "step": 10750 }, { "epoch": 0.4140519730510106, "grad_norm": 1.5030126571655273, "learning_rate": 0.00017961750939036913, "loss": 1.4213, "step": 10755 }, { "epoch": 0.4142444658325313, "grad_norm": 1.5616711378097534, "learning_rate": 0.0001795992087841539, "loss": 1.3342, "step": 10760 }, { "epoch": 0.414436958614052, "grad_norm": 1.2506111860275269, "learning_rate": 0.0001795809008991183, "loss": 1.0034, "step": 10765 }, { "epoch": 0.41462945139557267, "grad_norm": 1.1011154651641846, "learning_rate": 0.00017956258573693657, "loss": 1.0936, "step": 10770 }, { "epoch": 0.41482194417709334, "grad_norm": 1.2040156126022339, "learning_rate": 0.00017954426329928335, "loss": 1.1974, "step": 10775 }, { "epoch": 0.41501443695861406, "grad_norm": 1.5271620750427246, "learning_rate": 0.0001795259335878342, "loss": 1.2563, "step": 10780 }, { "epoch": 0.41520692974013473, "grad_norm": 1.342129111289978, "learning_rate": 0.00017950759660426523, "loss": 1.2319, "step": 10785 }, { "epoch": 0.41539942252165546, "grad_norm": 0.9986871480941772, "learning_rate": 0.00017948925235025326, "loss": 1.0781, "step": 10790 }, { "epoch": 0.41559191530317613, "grad_norm": 1.107088327407837, "learning_rate": 0.00017947090082747573, "loss": 1.1499, "step": 10795 }, { "epoch": 0.4157844080846968, "grad_norm": 1.5566056966781616, "learning_rate": 0.00017945254203761076, "loss": 1.0997, "step": 10800 }, { "epoch": 0.4159769008662175, "grad_norm": 1.4681777954101562, "learning_rate": 0.00017943417598233715, "loss": 1.3307, "step": 10805 }, { "epoch": 0.4161693936477382, "grad_norm": 1.4198453426361084, "learning_rate": 0.00017941580266333433, "loss": 0.9664, "step": 10810 }, { "epoch": 0.4163618864292589, "grad_norm": 1.1474230289459229, "learning_rate": 0.00017939742208228246, "loss": 1.2454, "step": 10815 }, { "epoch": 0.4165543792107796, "grad_norm": 1.186672568321228, "learning_rate": 0.00017937903424086228, "loss": 1.3311, "step": 10820 }, { "epoch": 0.4167468719923003, "grad_norm": 1.4548507928848267, "learning_rate": 0.00017936063914075526, "loss": 1.2508, "step": 10825 }, { "epoch": 0.416939364773821, "grad_norm": 1.0224876403808594, "learning_rate": 0.00017934223678364353, "loss": 0.9364, "step": 10830 }, { "epoch": 0.41713185755534166, "grad_norm": 1.5561485290527344, "learning_rate": 0.00017932382717120984, "loss": 1.1686, "step": 10835 }, { "epoch": 0.4173243503368624, "grad_norm": 1.9549082517623901, "learning_rate": 0.00017930541030513762, "loss": 1.2678, "step": 10840 }, { "epoch": 0.41751684311838305, "grad_norm": 1.2266019582748413, "learning_rate": 0.00017928698618711094, "loss": 1.2963, "step": 10845 }, { "epoch": 0.4177093358999038, "grad_norm": 0.6992445588111877, "learning_rate": 0.00017926855481881465, "loss": 1.1042, "step": 10850 }, { "epoch": 0.41790182868142445, "grad_norm": 1.515512466430664, "learning_rate": 0.00017925011620193408, "loss": 1.0718, "step": 10855 }, { "epoch": 0.4180943214629451, "grad_norm": 1.5123271942138672, "learning_rate": 0.0001792316703381554, "loss": 1.1307, "step": 10860 }, { "epoch": 0.41828681424446584, "grad_norm": 1.3709865808486938, "learning_rate": 0.00017921321722916535, "loss": 1.3652, "step": 10865 }, { "epoch": 0.4184793070259865, "grad_norm": 1.3327142000198364, "learning_rate": 0.0001791947568766513, "loss": 1.2644, "step": 10870 }, { "epoch": 0.41867179980750724, "grad_norm": 1.460595726966858, "learning_rate": 0.00017917628928230134, "loss": 1.2783, "step": 10875 }, { "epoch": 0.4188642925890279, "grad_norm": 1.1008737087249756, "learning_rate": 0.00017915781444780425, "loss": 1.2889, "step": 10880 }, { "epoch": 0.4190567853705486, "grad_norm": 1.8467929363250732, "learning_rate": 0.00017913933237484936, "loss": 1.1897, "step": 10885 }, { "epoch": 0.4192492781520693, "grad_norm": 1.286544680595398, "learning_rate": 0.00017912084306512683, "loss": 1.1239, "step": 10890 }, { "epoch": 0.41944177093359, "grad_norm": 1.8240995407104492, "learning_rate": 0.00017910234652032726, "loss": 1.3085, "step": 10895 }, { "epoch": 0.4196342637151107, "grad_norm": 1.1262156963348389, "learning_rate": 0.00017908384274214215, "loss": 1.3779, "step": 10900 }, { "epoch": 0.41982675649663137, "grad_norm": 1.2274012565612793, "learning_rate": 0.0001790653317322635, "loss": 1.3361, "step": 10905 }, { "epoch": 0.4200192492781521, "grad_norm": 2.0522284507751465, "learning_rate": 0.000179046813492384, "loss": 1.2329, "step": 10910 }, { "epoch": 0.42021174205967277, "grad_norm": 1.927666187286377, "learning_rate": 0.0001790282880241971, "loss": 1.2217, "step": 10915 }, { "epoch": 0.42040423484119344, "grad_norm": 2.254720687866211, "learning_rate": 0.0001790097553293967, "loss": 1.2867, "step": 10920 }, { "epoch": 0.42059672762271416, "grad_norm": 1.9560370445251465, "learning_rate": 0.0001789912154096776, "loss": 1.2959, "step": 10925 }, { "epoch": 0.42078922040423483, "grad_norm": 1.109393835067749, "learning_rate": 0.00017897266826673517, "loss": 1.2397, "step": 10930 }, { "epoch": 0.42098171318575556, "grad_norm": 1.1880956888198853, "learning_rate": 0.00017895411390226527, "loss": 1.192, "step": 10935 }, { "epoch": 0.4211742059672762, "grad_norm": 1.851517677307129, "learning_rate": 0.00017893555231796477, "loss": 1.1866, "step": 10940 }, { "epoch": 0.4213666987487969, "grad_norm": 1.1871724128723145, "learning_rate": 0.0001789169835155309, "loss": 1.1627, "step": 10945 }, { "epoch": 0.4215591915303176, "grad_norm": 0.9478880167007446, "learning_rate": 0.0001788984074966616, "loss": 1.198, "step": 10950 }, { "epoch": 0.4217516843118383, "grad_norm": 1.753989577293396, "learning_rate": 0.00017887982426305566, "loss": 1.2923, "step": 10955 }, { "epoch": 0.421944177093359, "grad_norm": 2.161820650100708, "learning_rate": 0.00017886123381641227, "loss": 1.2651, "step": 10960 }, { "epoch": 0.4221366698748797, "grad_norm": 1.203307867050171, "learning_rate": 0.00017884263615843145, "loss": 1.1854, "step": 10965 }, { "epoch": 0.42232916265640036, "grad_norm": 1.6671913862228394, "learning_rate": 0.0001788240312908139, "loss": 1.2466, "step": 10970 }, { "epoch": 0.4225216554379211, "grad_norm": 1.643796443939209, "learning_rate": 0.0001788054192152608, "loss": 1.202, "step": 10975 }, { "epoch": 0.42271414821944175, "grad_norm": 1.024296522140503, "learning_rate": 0.00017878679993347415, "loss": 1.2392, "step": 10980 }, { "epoch": 0.4229066410009625, "grad_norm": 1.363425612449646, "learning_rate": 0.0001787681734471566, "loss": 1.3577, "step": 10985 }, { "epoch": 0.42309913378248315, "grad_norm": 1.7815190553665161, "learning_rate": 0.00017874953975801134, "loss": 0.9826, "step": 10990 }, { "epoch": 0.4232916265640039, "grad_norm": 1.6736468076705933, "learning_rate": 0.00017873089886774236, "loss": 1.168, "step": 10995 }, { "epoch": 0.42348411934552455, "grad_norm": 1.3047553300857544, "learning_rate": 0.0001787122507780542, "loss": 1.1839, "step": 11000 }, { "epoch": 0.4236766121270452, "grad_norm": 1.5737935304641724, "learning_rate": 0.00017869359549065216, "loss": 1.0693, "step": 11005 }, { "epoch": 0.42386910490856594, "grad_norm": 0.9130328893661499, "learning_rate": 0.00017867493300724208, "loss": 1.1609, "step": 11010 }, { "epoch": 0.4240615976900866, "grad_norm": 2.444490432739258, "learning_rate": 0.00017865626332953056, "loss": 1.2422, "step": 11015 }, { "epoch": 0.42425409047160734, "grad_norm": 1.4214091300964355, "learning_rate": 0.00017863758645922481, "loss": 1.2028, "step": 11020 }, { "epoch": 0.424446583253128, "grad_norm": 1.3986276388168335, "learning_rate": 0.0001786189023980327, "loss": 0.9271, "step": 11025 }, { "epoch": 0.4246390760346487, "grad_norm": 1.6309832334518433, "learning_rate": 0.00017860021114766275, "loss": 1.1242, "step": 11030 }, { "epoch": 0.4248315688161694, "grad_norm": 1.0703374147415161, "learning_rate": 0.00017858151270982423, "loss": 1.1688, "step": 11035 }, { "epoch": 0.4250240615976901, "grad_norm": 0.9345492720603943, "learning_rate": 0.00017856280708622687, "loss": 1.0759, "step": 11040 }, { "epoch": 0.4252165543792108, "grad_norm": 1.1012792587280273, "learning_rate": 0.00017854409427858124, "loss": 1.3299, "step": 11045 }, { "epoch": 0.42540904716073147, "grad_norm": 1.087344765663147, "learning_rate": 0.00017852537428859853, "loss": 1.1188, "step": 11050 }, { "epoch": 0.42560153994225214, "grad_norm": 1.0374698638916016, "learning_rate": 0.0001785066471179905, "loss": 1.2403, "step": 11055 }, { "epoch": 0.42579403272377286, "grad_norm": 1.2250018119812012, "learning_rate": 0.00017848791276846963, "loss": 1.1217, "step": 11060 }, { "epoch": 0.42598652550529353, "grad_norm": 1.9863545894622803, "learning_rate": 0.0001784691712417491, "loss": 1.0159, "step": 11065 }, { "epoch": 0.42617901828681426, "grad_norm": 1.3587582111358643, "learning_rate": 0.0001784504225395427, "loss": 1.1266, "step": 11070 }, { "epoch": 0.42637151106833493, "grad_norm": 1.3274664878845215, "learning_rate": 0.0001784316666635648, "loss": 1.2295, "step": 11075 }, { "epoch": 0.42656400384985566, "grad_norm": 1.594498872756958, "learning_rate": 0.00017841290361553057, "loss": 1.2942, "step": 11080 }, { "epoch": 0.4267564966313763, "grad_norm": 2.5940325260162354, "learning_rate": 0.00017839413339715572, "loss": 1.3333, "step": 11085 }, { "epoch": 0.426948989412897, "grad_norm": 1.5368024110794067, "learning_rate": 0.0001783753560101567, "loss": 1.2738, "step": 11090 }, { "epoch": 0.4271414821944177, "grad_norm": 1.8095320463180542, "learning_rate": 0.00017835657145625055, "loss": 1.3245, "step": 11095 }, { "epoch": 0.4273339749759384, "grad_norm": 1.4597771167755127, "learning_rate": 0.000178337779737155, "loss": 1.3837, "step": 11100 }, { "epoch": 0.4275264677574591, "grad_norm": 1.052746057510376, "learning_rate": 0.00017831898085458842, "loss": 1.1603, "step": 11105 }, { "epoch": 0.4277189605389798, "grad_norm": 1.547523856163025, "learning_rate": 0.0001783001748102699, "loss": 1.2277, "step": 11110 }, { "epoch": 0.42791145332050046, "grad_norm": 2.109560012817383, "learning_rate": 0.00017828136160591906, "loss": 1.1299, "step": 11115 }, { "epoch": 0.4281039461020212, "grad_norm": 0.9221099019050598, "learning_rate": 0.00017826254124325626, "loss": 1.1447, "step": 11120 }, { "epoch": 0.42829643888354185, "grad_norm": 1.1257829666137695, "learning_rate": 0.00017824371372400255, "loss": 1.0844, "step": 11125 }, { "epoch": 0.4284889316650626, "grad_norm": 1.9643393754959106, "learning_rate": 0.00017822487904987948, "loss": 1.1511, "step": 11130 }, { "epoch": 0.42868142444658325, "grad_norm": 1.2279611825942993, "learning_rate": 0.00017820603722260944, "loss": 1.3039, "step": 11135 }, { "epoch": 0.428873917228104, "grad_norm": 1.8037766218185425, "learning_rate": 0.00017818718824391536, "loss": 1.2338, "step": 11140 }, { "epoch": 0.42906641000962464, "grad_norm": 2.1256327629089355, "learning_rate": 0.00017816833211552085, "loss": 1.2502, "step": 11145 }, { "epoch": 0.4292589027911453, "grad_norm": 1.1520932912826538, "learning_rate": 0.0001781494688391502, "loss": 1.121, "step": 11150 }, { "epoch": 0.42945139557266604, "grad_norm": 1.1287842988967896, "learning_rate": 0.00017813059841652833, "loss": 1.2012, "step": 11155 }, { "epoch": 0.4296438883541867, "grad_norm": 1.2584294080734253, "learning_rate": 0.00017811172084938076, "loss": 1.3221, "step": 11160 }, { "epoch": 0.42983638113570743, "grad_norm": 1.901994228363037, "learning_rate": 0.0001780928361394338, "loss": 1.1184, "step": 11165 }, { "epoch": 0.4300288739172281, "grad_norm": 1.564501166343689, "learning_rate": 0.00017807394428841428, "loss": 1.12, "step": 11170 }, { "epoch": 0.4302213666987488, "grad_norm": 2.138155221939087, "learning_rate": 0.00017805504529804975, "loss": 1.1928, "step": 11175 }, { "epoch": 0.4304138594802695, "grad_norm": 1.3132466077804565, "learning_rate": 0.00017803613917006841, "loss": 1.2674, "step": 11180 }, { "epoch": 0.43060635226179017, "grad_norm": 1.1847275495529175, "learning_rate": 0.00017801722590619903, "loss": 1.1457, "step": 11185 }, { "epoch": 0.4307988450433109, "grad_norm": 1.6100077629089355, "learning_rate": 0.00017799830550817124, "loss": 1.3779, "step": 11190 }, { "epoch": 0.43099133782483157, "grad_norm": 2.1193013191223145, "learning_rate": 0.00017797937797771503, "loss": 1.0515, "step": 11195 }, { "epoch": 0.43118383060635224, "grad_norm": 1.6185005903244019, "learning_rate": 0.0001779604433165613, "loss": 1.078, "step": 11200 }, { "epoch": 0.43137632338787296, "grad_norm": 1.275046467781067, "learning_rate": 0.00017794150152644148, "loss": 2.2652, "step": 11205 }, { "epoch": 0.43156881616939363, "grad_norm": 1.4507300853729248, "learning_rate": 0.00017792255260908765, "loss": 1.3556, "step": 11210 }, { "epoch": 0.43176130895091436, "grad_norm": 1.5722453594207764, "learning_rate": 0.00017790359656623256, "loss": 1.1115, "step": 11215 }, { "epoch": 0.431953801732435, "grad_norm": 1.802585244178772, "learning_rate": 0.00017788463339960962, "loss": 1.1885, "step": 11220 }, { "epoch": 0.43214629451395575, "grad_norm": 1.0945521593093872, "learning_rate": 0.00017786566311095295, "loss": 1.2419, "step": 11225 }, { "epoch": 0.4323387872954764, "grad_norm": 1.6798467636108398, "learning_rate": 0.00017784668570199714, "loss": 1.0404, "step": 11230 }, { "epoch": 0.4325312800769971, "grad_norm": 1.9263988733291626, "learning_rate": 0.00017782770117447764, "loss": 1.2925, "step": 11235 }, { "epoch": 0.4327237728585178, "grad_norm": 1.3327709436416626, "learning_rate": 0.0001778087095301304, "loss": 1.2621, "step": 11240 }, { "epoch": 0.4329162656400385, "grad_norm": 1.540216088294983, "learning_rate": 0.00017778971077069214, "loss": 1.2733, "step": 11245 }, { "epoch": 0.4331087584215592, "grad_norm": 0.8980332612991333, "learning_rate": 0.00017777070489790014, "loss": 1.1849, "step": 11250 }, { "epoch": 0.4333012512030799, "grad_norm": 1.1286743879318237, "learning_rate": 0.00017775169191349238, "loss": 1.0491, "step": 11255 }, { "epoch": 0.43349374398460055, "grad_norm": 1.5880367755889893, "learning_rate": 0.0001777326718192074, "loss": 1.1371, "step": 11260 }, { "epoch": 0.4336862367661213, "grad_norm": 1.8634532690048218, "learning_rate": 0.00017771364461678454, "loss": 1.3491, "step": 11265 }, { "epoch": 0.43387872954764195, "grad_norm": 1.13876473903656, "learning_rate": 0.0001776946103079637, "loss": 1.1284, "step": 11270 }, { "epoch": 0.4340712223291627, "grad_norm": 1.1511520147323608, "learning_rate": 0.0001776755688944854, "loss": 0.9705, "step": 11275 }, { "epoch": 0.43426371511068335, "grad_norm": 2.0832314491271973, "learning_rate": 0.00017765652037809087, "loss": 1.1134, "step": 11280 }, { "epoch": 0.434456207892204, "grad_norm": 1.3219777345657349, "learning_rate": 0.000177637464760522, "loss": 1.1519, "step": 11285 }, { "epoch": 0.43464870067372474, "grad_norm": 1.2205532789230347, "learning_rate": 0.0001776184020435213, "loss": 1.1526, "step": 11290 }, { "epoch": 0.4348411934552454, "grad_norm": 1.1612414121627808, "learning_rate": 0.00017759933222883187, "loss": 1.2236, "step": 11295 }, { "epoch": 0.43503368623676614, "grad_norm": 2.214245319366455, "learning_rate": 0.00017758025531819756, "loss": 1.1962, "step": 11300 }, { "epoch": 0.4352261790182868, "grad_norm": 1.1582585573196411, "learning_rate": 0.00017756117131336284, "loss": 1.1488, "step": 11305 }, { "epoch": 0.43541867179980753, "grad_norm": 1.6610682010650635, "learning_rate": 0.0001775420802160728, "loss": 1.2349, "step": 11310 }, { "epoch": 0.4356111645813282, "grad_norm": 1.2163527011871338, "learning_rate": 0.00017752298202807317, "loss": 1.0914, "step": 11315 }, { "epoch": 0.4358036573628489, "grad_norm": 1.3684804439544678, "learning_rate": 0.00017750387675111043, "loss": 1.1035, "step": 11320 }, { "epoch": 0.4359961501443696, "grad_norm": 2.0042598247528076, "learning_rate": 0.00017748476438693151, "loss": 1.1783, "step": 11325 }, { "epoch": 0.43618864292589027, "grad_norm": 1.4552195072174072, "learning_rate": 0.00017746564493728424, "loss": 1.1373, "step": 11330 }, { "epoch": 0.436381135707411, "grad_norm": 1.1513317823410034, "learning_rate": 0.00017744651840391685, "loss": 1.122, "step": 11335 }, { "epoch": 0.43657362848893166, "grad_norm": 1.1842467784881592, "learning_rate": 0.0001774273847885784, "loss": 1.085, "step": 11340 }, { "epoch": 0.43676612127045233, "grad_norm": 1.5492455959320068, "learning_rate": 0.00017740824409301852, "loss": 1.1355, "step": 11345 }, { "epoch": 0.43695861405197306, "grad_norm": 1.6276592016220093, "learning_rate": 0.00017738909631898753, "loss": 1.3922, "step": 11350 }, { "epoch": 0.43715110683349373, "grad_norm": 1.5947320461273193, "learning_rate": 0.0001773699414682363, "loss": 1.1952, "step": 11355 }, { "epoch": 0.43734359961501446, "grad_norm": 1.0628368854522705, "learning_rate": 0.00017735077954251648, "loss": 1.3908, "step": 11360 }, { "epoch": 0.4375360923965351, "grad_norm": 1.6347852945327759, "learning_rate": 0.00017733161054358027, "loss": 1.3614, "step": 11365 }, { "epoch": 0.4377285851780558, "grad_norm": 0.98406583070755, "learning_rate": 0.00017731243447318055, "loss": 1.0818, "step": 11370 }, { "epoch": 0.4379210779595765, "grad_norm": 2.522155284881592, "learning_rate": 0.0001772932513330708, "loss": 1.1043, "step": 11375 }, { "epoch": 0.4381135707410972, "grad_norm": 1.3053642511367798, "learning_rate": 0.0001772740611250053, "loss": 1.1731, "step": 11380 }, { "epoch": 0.4383060635226179, "grad_norm": 1.540334701538086, "learning_rate": 0.0001772548638507388, "loss": 1.0897, "step": 11385 }, { "epoch": 0.4384985563041386, "grad_norm": 1.756795048713684, "learning_rate": 0.00017723565951202673, "loss": 1.26, "step": 11390 }, { "epoch": 0.4386910490856593, "grad_norm": 2.263253688812256, "learning_rate": 0.00017721644811062524, "loss": 1.2498, "step": 11395 }, { "epoch": 0.43888354186718, "grad_norm": 1.2686541080474854, "learning_rate": 0.0001771972296482911, "loss": 1.2002, "step": 11400 }, { "epoch": 0.43907603464870065, "grad_norm": 1.7692358493804932, "learning_rate": 0.00017717800412678168, "loss": 1.2989, "step": 11405 }, { "epoch": 0.4392685274302214, "grad_norm": 0.9414786100387573, "learning_rate": 0.00017715877154785505, "loss": 1.0743, "step": 11410 }, { "epoch": 0.43946102021174205, "grad_norm": 1.6488560438156128, "learning_rate": 0.0001771395319132699, "loss": 1.3217, "step": 11415 }, { "epoch": 0.4396535129932628, "grad_norm": 0.9546147584915161, "learning_rate": 0.00017712028522478556, "loss": 1.1849, "step": 11420 }, { "epoch": 0.43984600577478344, "grad_norm": 1.9460307359695435, "learning_rate": 0.000177101031484162, "loss": 1.3702, "step": 11425 }, { "epoch": 0.4400384985563041, "grad_norm": 0.8990427255630493, "learning_rate": 0.00017708177069315987, "loss": 1.2009, "step": 11430 }, { "epoch": 0.44023099133782484, "grad_norm": 1.3581219911575317, "learning_rate": 0.0001770625028535404, "loss": 1.1846, "step": 11435 }, { "epoch": 0.4404234841193455, "grad_norm": 1.259728193283081, "learning_rate": 0.00017704322796706557, "loss": 1.2683, "step": 11440 }, { "epoch": 0.44061597690086624, "grad_norm": 1.1262446641921997, "learning_rate": 0.00017702394603549788, "loss": 1.0015, "step": 11445 }, { "epoch": 0.4408084696823869, "grad_norm": 2.5833356380462646, "learning_rate": 0.0001770046570606006, "loss": 1.1348, "step": 11450 }, { "epoch": 0.44100096246390763, "grad_norm": 0.9725410342216492, "learning_rate": 0.00017698536104413749, "loss": 1.2214, "step": 11455 }, { "epoch": 0.4411934552454283, "grad_norm": 1.0890756845474243, "learning_rate": 0.00017696605798787313, "loss": 1.1178, "step": 11460 }, { "epoch": 0.44138594802694897, "grad_norm": 1.3130367994308472, "learning_rate": 0.0001769467478935726, "loss": 1.1582, "step": 11465 }, { "epoch": 0.4415784408084697, "grad_norm": 2.401630163192749, "learning_rate": 0.00017692743076300172, "loss": 1.3043, "step": 11470 }, { "epoch": 0.44177093358999037, "grad_norm": 1.2928016185760498, "learning_rate": 0.00017690810659792686, "loss": 1.1354, "step": 11475 }, { "epoch": 0.4419634263715111, "grad_norm": 1.6433988809585571, "learning_rate": 0.00017688877540011517, "loss": 1.2385, "step": 11480 }, { "epoch": 0.44215591915303176, "grad_norm": 1.5450482368469238, "learning_rate": 0.00017686943717133428, "loss": 1.3096, "step": 11485 }, { "epoch": 0.44234841193455243, "grad_norm": 2.0231974124908447, "learning_rate": 0.00017685009191335257, "loss": 1.2129, "step": 11490 }, { "epoch": 0.44254090471607316, "grad_norm": 1.2831270694732666, "learning_rate": 0.00017683073962793908, "loss": 1.2213, "step": 11495 }, { "epoch": 0.44273339749759383, "grad_norm": 1.035520076751709, "learning_rate": 0.00017681138031686337, "loss": 1.248, "step": 11500 }, { "epoch": 0.44292589027911455, "grad_norm": 1.113934874534607, "learning_rate": 0.00017679201398189577, "loss": 1.0017, "step": 11505 }, { "epoch": 0.4431183830606352, "grad_norm": 1.202412724494934, "learning_rate": 0.0001767726406248072, "loss": 1.0873, "step": 11510 }, { "epoch": 0.4433108758421559, "grad_norm": 1.2946287393569946, "learning_rate": 0.0001767532602473692, "loss": 1.3873, "step": 11515 }, { "epoch": 0.4435033686236766, "grad_norm": 1.2840358018875122, "learning_rate": 0.00017673387285135398, "loss": 1.2559, "step": 11520 }, { "epoch": 0.4436958614051973, "grad_norm": 0.9422056078910828, "learning_rate": 0.00017671447843853444, "loss": 1.1179, "step": 11525 }, { "epoch": 0.443888354186718, "grad_norm": 1.9112647771835327, "learning_rate": 0.000176695077010684, "loss": 1.1519, "step": 11530 }, { "epoch": 0.4440808469682387, "grad_norm": 0.9463594555854797, "learning_rate": 0.00017667566856957687, "loss": 1.1175, "step": 11535 }, { "epoch": 0.4442733397497594, "grad_norm": 2.1585206985473633, "learning_rate": 0.00017665625311698776, "loss": 1.1535, "step": 11540 }, { "epoch": 0.4444658325312801, "grad_norm": 1.038095474243164, "learning_rate": 0.0001766368306546921, "loss": 1.1633, "step": 11545 }, { "epoch": 0.44465832531280075, "grad_norm": 1.4679070711135864, "learning_rate": 0.00017661740118446594, "loss": 1.3792, "step": 11550 }, { "epoch": 0.4448508180943215, "grad_norm": 1.3058511018753052, "learning_rate": 0.00017659796470808597, "loss": 1.2802, "step": 11555 }, { "epoch": 0.44504331087584215, "grad_norm": 1.0330942869186401, "learning_rate": 0.0001765785212273296, "loss": 1.1621, "step": 11560 }, { "epoch": 0.44523580365736287, "grad_norm": 1.6481776237487793, "learning_rate": 0.0001765590707439747, "loss": 1.1098, "step": 11565 }, { "epoch": 0.44542829643888354, "grad_norm": 1.3850781917572021, "learning_rate": 0.00017653961325979998, "loss": 1.3687, "step": 11570 }, { "epoch": 0.4456207892204042, "grad_norm": 1.6551322937011719, "learning_rate": 0.0001765201487765846, "loss": 1.3436, "step": 11575 }, { "epoch": 0.44581328200192494, "grad_norm": 1.0752167701721191, "learning_rate": 0.00017650067729610856, "loss": 1.1667, "step": 11580 }, { "epoch": 0.4460057747834456, "grad_norm": 1.4762775897979736, "learning_rate": 0.00017648119882015232, "loss": 1.0119, "step": 11585 }, { "epoch": 0.44619826756496633, "grad_norm": 0.7833762764930725, "learning_rate": 0.0001764617133504971, "loss": 1.1047, "step": 11590 }, { "epoch": 0.446390760346487, "grad_norm": 1.1666022539138794, "learning_rate": 0.00017644222088892473, "loss": 1.2339, "step": 11595 }, { "epoch": 0.4465832531280077, "grad_norm": 1.7897813320159912, "learning_rate": 0.0001764227214372176, "loss": 1.205, "step": 11600 }, { "epoch": 0.4467757459095284, "grad_norm": 1.2021222114562988, "learning_rate": 0.00017640321499715888, "loss": 1.2518, "step": 11605 }, { "epoch": 0.44696823869104907, "grad_norm": 2.9843320846557617, "learning_rate": 0.00017638370157053228, "loss": 0.9705, "step": 11610 }, { "epoch": 0.4471607314725698, "grad_norm": 1.2910903692245483, "learning_rate": 0.00017636418115912213, "loss": 1.4018, "step": 11615 }, { "epoch": 0.44735322425409046, "grad_norm": 1.0188699960708618, "learning_rate": 0.00017634855980214943, "loss": 1.6206, "step": 11620 }, { "epoch": 0.4475457170356112, "grad_norm": 1.0419138669967651, "learning_rate": 0.00017632902682262764, "loss": 1.2483, "step": 11625 }, { "epoch": 0.44773820981713186, "grad_norm": 1.665586233139038, "learning_rate": 0.000176309486863322, "loss": 1.1838, "step": 11630 }, { "epoch": 0.44793070259865253, "grad_norm": 2.3444008827209473, "learning_rate": 0.00017628993992601925, "loss": 1.291, "step": 11635 }, { "epoch": 0.44812319538017326, "grad_norm": 1.960339069366455, "learning_rate": 0.00017627038601250686, "loss": 1.2312, "step": 11640 }, { "epoch": 0.4483156881616939, "grad_norm": 1.5672719478607178, "learning_rate": 0.00017625082512457297, "loss": 1.2281, "step": 11645 }, { "epoch": 0.44850818094321465, "grad_norm": 1.5053352117538452, "learning_rate": 0.00017623125726400621, "loss": 1.1688, "step": 11650 }, { "epoch": 0.4487006737247353, "grad_norm": 1.841610312461853, "learning_rate": 0.00017621168243259596, "loss": 1.1607, "step": 11655 }, { "epoch": 0.448893166506256, "grad_norm": 1.1526665687561035, "learning_rate": 0.0001761921006321322, "loss": 1.1788, "step": 11660 }, { "epoch": 0.4490856592877767, "grad_norm": 1.4064139127731323, "learning_rate": 0.00017617251186440556, "loss": 1.0825, "step": 11665 }, { "epoch": 0.4492781520692974, "grad_norm": 1.1119096279144287, "learning_rate": 0.00017615291613120736, "loss": 1.2768, "step": 11670 }, { "epoch": 0.4494706448508181, "grad_norm": 1.2367806434631348, "learning_rate": 0.00017613331343432938, "loss": 1.2612, "step": 11675 }, { "epoch": 0.4496631376323388, "grad_norm": 1.093410611152649, "learning_rate": 0.00017611370377556423, "loss": 1.3075, "step": 11680 }, { "epoch": 0.44985563041385945, "grad_norm": 1.0085220336914062, "learning_rate": 0.00017609408715670512, "loss": 1.2391, "step": 11685 }, { "epoch": 0.4500481231953802, "grad_norm": 1.4346550703048706, "learning_rate": 0.0001760744635795458, "loss": 1.2241, "step": 11690 }, { "epoch": 0.45024061597690085, "grad_norm": 1.483905553817749, "learning_rate": 0.0001760548330458807, "loss": 1.2696, "step": 11695 }, { "epoch": 0.4504331087584216, "grad_norm": 1.6455215215682983, "learning_rate": 0.00017603519555750498, "loss": 1.2113, "step": 11700 }, { "epoch": 0.45062560153994224, "grad_norm": 1.7613027095794678, "learning_rate": 0.00017601555111621428, "loss": 1.1581, "step": 11705 }, { "epoch": 0.45081809432146297, "grad_norm": 1.5872759819030762, "learning_rate": 0.000175995899723805, "loss": 1.0977, "step": 11710 }, { "epoch": 0.45101058710298364, "grad_norm": 1.5521520376205444, "learning_rate": 0.00017597624138207413, "loss": 1.3003, "step": 11715 }, { "epoch": 0.4512030798845043, "grad_norm": 2.1746668815612793, "learning_rate": 0.0001759565760928193, "loss": 1.1861, "step": 11720 }, { "epoch": 0.45139557266602504, "grad_norm": 1.73439359664917, "learning_rate": 0.00017593690385783866, "loss": 1.242, "step": 11725 }, { "epoch": 0.4515880654475457, "grad_norm": 1.6027134656906128, "learning_rate": 0.0001759172246789313, "loss": 1.2936, "step": 11730 }, { "epoch": 0.45178055822906643, "grad_norm": 1.62489652633667, "learning_rate": 0.0001758975385578966, "loss": 1.3521, "step": 11735 }, { "epoch": 0.4519730510105871, "grad_norm": 1.3407773971557617, "learning_rate": 0.00017587784549653477, "loss": 1.1653, "step": 11740 }, { "epoch": 0.45216554379210777, "grad_norm": 2.064875364303589, "learning_rate": 0.00017585814549664664, "loss": 1.2321, "step": 11745 }, { "epoch": 0.4523580365736285, "grad_norm": 1.115850806236267, "learning_rate": 0.0001758384385600336, "loss": 1.0289, "step": 11750 }, { "epoch": 0.45255052935514917, "grad_norm": 1.3943949937820435, "learning_rate": 0.00017581872468849777, "loss": 1.2846, "step": 11755 }, { "epoch": 0.4527430221366699, "grad_norm": 1.0405654907226562, "learning_rate": 0.0001757990038838418, "loss": 1.1209, "step": 11760 }, { "epoch": 0.45293551491819056, "grad_norm": 1.0115854740142822, "learning_rate": 0.00017577927614786902, "loss": 1.0178, "step": 11765 }, { "epoch": 0.4531280076997113, "grad_norm": 2.48100209236145, "learning_rate": 0.00017575954148238345, "loss": 1.2485, "step": 11770 }, { "epoch": 0.45332050048123196, "grad_norm": 1.5187568664550781, "learning_rate": 0.00017573979988918967, "loss": 1.3345, "step": 11775 }, { "epoch": 0.45351299326275263, "grad_norm": 1.2286217212677002, "learning_rate": 0.00017572005137009292, "loss": 1.1079, "step": 11780 }, { "epoch": 0.45370548604427335, "grad_norm": 1.5858092308044434, "learning_rate": 0.00017570029592689908, "loss": 1.4054, "step": 11785 }, { "epoch": 0.453897978825794, "grad_norm": 2.0436697006225586, "learning_rate": 0.00017568053356141464, "loss": 1.3221, "step": 11790 }, { "epoch": 0.45409047160731475, "grad_norm": 1.6980565786361694, "learning_rate": 0.00017566076427544673, "loss": 1.2384, "step": 11795 }, { "epoch": 0.4542829643888354, "grad_norm": 1.3811545372009277, "learning_rate": 0.00017564098807080315, "loss": 1.171, "step": 11800 }, { "epoch": 0.4544754571703561, "grad_norm": 1.2215286493301392, "learning_rate": 0.00017562120494929228, "loss": 1.1781, "step": 11805 }, { "epoch": 0.4546679499518768, "grad_norm": 1.1313782930374146, "learning_rate": 0.00017560141491272319, "loss": 1.2166, "step": 11810 }, { "epoch": 0.4548604427333975, "grad_norm": 1.2630988359451294, "learning_rate": 0.0001755816179629055, "loss": 1.2652, "step": 11815 }, { "epoch": 0.4550529355149182, "grad_norm": 1.0977842807769775, "learning_rate": 0.0001755618141016495, "loss": 1.3057, "step": 11820 }, { "epoch": 0.4552454282964389, "grad_norm": 0.8517459034919739, "learning_rate": 0.0001755420033307662, "loss": 1.1769, "step": 11825 }, { "epoch": 0.45543792107795955, "grad_norm": 0.7195164561271667, "learning_rate": 0.00017552218565206707, "loss": 0.9777, "step": 11830 }, { "epoch": 0.4556304138594803, "grad_norm": 1.125056266784668, "learning_rate": 0.00017550236106736436, "loss": 1.1008, "step": 11835 }, { "epoch": 0.45582290664100095, "grad_norm": 0.8211593627929688, "learning_rate": 0.00017548252957847092, "loss": 1.1539, "step": 11840 }, { "epoch": 0.4560153994225217, "grad_norm": 1.8936784267425537, "learning_rate": 0.00017546269118720015, "loss": 1.143, "step": 11845 }, { "epoch": 0.45620789220404234, "grad_norm": 1.5479308366775513, "learning_rate": 0.00017544284589536617, "loss": 1.1481, "step": 11850 }, { "epoch": 0.45640038498556307, "grad_norm": 1.4597593545913696, "learning_rate": 0.00017542299370478372, "loss": 1.2907, "step": 11855 }, { "epoch": 0.45659287776708374, "grad_norm": 1.4036239385604858, "learning_rate": 0.0001754031346172681, "loss": 1.2927, "step": 11860 }, { "epoch": 0.4567853705486044, "grad_norm": 0.9842814207077026, "learning_rate": 0.00017538326863463533, "loss": 1.0571, "step": 11865 }, { "epoch": 0.45697786333012513, "grad_norm": 2.478254556655884, "learning_rate": 0.000175363395758702, "loss": 1.3115, "step": 11870 }, { "epoch": 0.4571703561116458, "grad_norm": 1.000182032585144, "learning_rate": 0.00017534351599128538, "loss": 1.3071, "step": 11875 }, { "epoch": 0.45736284889316653, "grad_norm": 1.8669004440307617, "learning_rate": 0.0001753236293342033, "loss": 1.2386, "step": 11880 }, { "epoch": 0.4575553416746872, "grad_norm": 1.6287200450897217, "learning_rate": 0.00017530373578927432, "loss": 1.2196, "step": 11885 }, { "epoch": 0.45774783445620787, "grad_norm": 2.1733322143554688, "learning_rate": 0.00017528383535831755, "loss": 1.6165, "step": 11890 }, { "epoch": 0.4579403272377286, "grad_norm": 1.0370094776153564, "learning_rate": 0.00017526392804315273, "loss": 1.1799, "step": 11895 }, { "epoch": 0.45813282001924927, "grad_norm": 1.3969937562942505, "learning_rate": 0.00017524401384560025, "loss": 1.2224, "step": 11900 }, { "epoch": 0.45832531280077, "grad_norm": 1.3850924968719482, "learning_rate": 0.00017522409276748117, "loss": 1.4161, "step": 11905 }, { "epoch": 0.45851780558229066, "grad_norm": 1.4318947792053223, "learning_rate": 0.00017520416481061712, "loss": 1.4166, "step": 11910 }, { "epoch": 0.45871029836381133, "grad_norm": 1.525709629058838, "learning_rate": 0.00017518422997683038, "loss": 1.255, "step": 11915 }, { "epoch": 0.45890279114533206, "grad_norm": 0.9193233847618103, "learning_rate": 0.00017516428826794384, "loss": 1.2299, "step": 11920 }, { "epoch": 0.4590952839268527, "grad_norm": 1.8636525869369507, "learning_rate": 0.00017514433968578107, "loss": 1.1992, "step": 11925 }, { "epoch": 0.45928777670837345, "grad_norm": 1.3876943588256836, "learning_rate": 0.00017512438423216624, "loss": 1.2022, "step": 11930 }, { "epoch": 0.4594802694898941, "grad_norm": 1.5370129346847534, "learning_rate": 0.00017510442190892412, "loss": 1.319, "step": 11935 }, { "epoch": 0.45967276227141485, "grad_norm": 1.8562203645706177, "learning_rate": 0.00017508445271788013, "loss": 1.0784, "step": 11940 }, { "epoch": 0.4598652550529355, "grad_norm": 1.1265978813171387, "learning_rate": 0.0001750644766608603, "loss": 1.1591, "step": 11945 }, { "epoch": 0.4600577478344562, "grad_norm": 1.3049321174621582, "learning_rate": 0.00017504449373969137, "loss": 1.2567, "step": 11950 }, { "epoch": 0.4602502406159769, "grad_norm": 1.4252487421035767, "learning_rate": 0.0001750245039562006, "loss": 1.0848, "step": 11955 }, { "epoch": 0.4604427333974976, "grad_norm": 1.888185977935791, "learning_rate": 0.00017500450731221592, "loss": 1.1976, "step": 11960 }, { "epoch": 0.4606352261790183, "grad_norm": 1.4043982028961182, "learning_rate": 0.00017498450380956594, "loss": 1.2038, "step": 11965 }, { "epoch": 0.460827718960539, "grad_norm": 1.182576060295105, "learning_rate": 0.00017496449345007982, "loss": 1.3408, "step": 11970 }, { "epoch": 0.46102021174205965, "grad_norm": 2.084197521209717, "learning_rate": 0.00017494447623558733, "loss": 1.269, "step": 11975 }, { "epoch": 0.4612127045235804, "grad_norm": 1.709518551826477, "learning_rate": 0.00017492445216791896, "loss": 1.3736, "step": 11980 }, { "epoch": 0.46140519730510104, "grad_norm": 1.1446977853775024, "learning_rate": 0.00017490442124890577, "loss": 1.2449, "step": 11985 }, { "epoch": 0.46159769008662177, "grad_norm": 1.9139240980148315, "learning_rate": 0.00017488438348037946, "loss": 1.0845, "step": 11990 }, { "epoch": 0.46179018286814244, "grad_norm": 1.6536133289337158, "learning_rate": 0.00017486433886417234, "loss": 1.4398, "step": 11995 }, { "epoch": 0.4619826756496631, "grad_norm": 1.0629438161849976, "learning_rate": 0.00017484428740211736, "loss": 1.2631, "step": 12000 }, { "epoch": 0.46217516843118384, "grad_norm": 1.1966623067855835, "learning_rate": 0.00017482422909604809, "loss": 1.088, "step": 12005 }, { "epoch": 0.4623676612127045, "grad_norm": 1.1087130308151245, "learning_rate": 0.00017480416394779878, "loss": 1.2133, "step": 12010 }, { "epoch": 0.46256015399422523, "grad_norm": 1.9408375024795532, "learning_rate": 0.00017478409195920413, "loss": 1.1677, "step": 12015 }, { "epoch": 0.4627526467757459, "grad_norm": 1.2703943252563477, "learning_rate": 0.00017476401313209973, "loss": 1.232, "step": 12020 }, { "epoch": 0.4629451395572666, "grad_norm": 1.7841099500656128, "learning_rate": 0.0001747439274683216, "loss": 1.1688, "step": 12025 }, { "epoch": 0.4631376323387873, "grad_norm": 1.9395395517349243, "learning_rate": 0.0001747238349697064, "loss": 1.2336, "step": 12030 }, { "epoch": 0.46333012512030797, "grad_norm": 1.5011239051818848, "learning_rate": 0.0001747037356380915, "loss": 1.1849, "step": 12035 }, { "epoch": 0.4635226179018287, "grad_norm": 1.6130584478378296, "learning_rate": 0.00017468362947531486, "loss": 1.3113, "step": 12040 }, { "epoch": 0.46371511068334936, "grad_norm": 1.5666422843933105, "learning_rate": 0.000174663516483215, "loss": 1.2178, "step": 12045 }, { "epoch": 0.4639076034648701, "grad_norm": 1.490662932395935, "learning_rate": 0.0001746433966636312, "loss": 1.3034, "step": 12050 }, { "epoch": 0.46410009624639076, "grad_norm": 1.1972042322158813, "learning_rate": 0.00017462327001840322, "loss": 1.1732, "step": 12055 }, { "epoch": 0.46429258902791143, "grad_norm": 1.5201470851898193, "learning_rate": 0.00017460313654937154, "loss": 1.1545, "step": 12060 }, { "epoch": 0.46448508180943215, "grad_norm": 0.8927121758460999, "learning_rate": 0.00017458299625837723, "loss": 1.1516, "step": 12065 }, { "epoch": 0.4646775745909528, "grad_norm": 1.394187092781067, "learning_rate": 0.00017456284914726196, "loss": 1.2791, "step": 12070 }, { "epoch": 0.46487006737247355, "grad_norm": 1.8900322914123535, "learning_rate": 0.00017454269521786808, "loss": 1.244, "step": 12075 }, { "epoch": 0.4650625601539942, "grad_norm": 2.20624041557312, "learning_rate": 0.00017452253447203852, "loss": 1.2526, "step": 12080 }, { "epoch": 0.46525505293551495, "grad_norm": 1.404261827468872, "learning_rate": 0.00017450236691161686, "loss": 1.1711, "step": 12085 }, { "epoch": 0.4654475457170356, "grad_norm": 1.6828880310058594, "learning_rate": 0.00017448219253844726, "loss": 1.3007, "step": 12090 }, { "epoch": 0.4656400384985563, "grad_norm": 1.0239325761795044, "learning_rate": 0.00017446201135437456, "loss": 1.1359, "step": 12095 }, { "epoch": 0.465832531280077, "grad_norm": 0.9242125749588013, "learning_rate": 0.0001744418233612442, "loss": 1.1848, "step": 12100 }, { "epoch": 0.4660250240615977, "grad_norm": 2.9907031059265137, "learning_rate": 0.0001744216285609022, "loss": 1.155, "step": 12105 }, { "epoch": 0.4662175168431184, "grad_norm": 0.9708018898963928, "learning_rate": 0.0001744014269551953, "loss": 1.4752, "step": 12110 }, { "epoch": 0.4664100096246391, "grad_norm": 1.1917387247085571, "learning_rate": 0.00017438121854597075, "loss": 1.1197, "step": 12115 }, { "epoch": 0.46660250240615975, "grad_norm": 1.5464357137680054, "learning_rate": 0.00017436100333507648, "loss": 1.1908, "step": 12120 }, { "epoch": 0.4667949951876805, "grad_norm": 1.9502155780792236, "learning_rate": 0.00017434078132436107, "loss": 1.1888, "step": 12125 }, { "epoch": 0.46698748796920114, "grad_norm": 2.054029941558838, "learning_rate": 0.00017432055251567365, "loss": 1.2771, "step": 12130 }, { "epoch": 0.46717998075072187, "grad_norm": 1.276356816291809, "learning_rate": 0.00017430031691086407, "loss": 1.2392, "step": 12135 }, { "epoch": 0.46737247353224254, "grad_norm": 1.4474079608917236, "learning_rate": 0.00017428007451178267, "loss": 1.2111, "step": 12140 }, { "epoch": 0.4675649663137632, "grad_norm": 1.389797568321228, "learning_rate": 0.00017425982532028053, "loss": 1.2094, "step": 12145 }, { "epoch": 0.46775745909528393, "grad_norm": 1.2491530179977417, "learning_rate": 0.00017423956933820928, "loss": 1.2374, "step": 12150 }, { "epoch": 0.4679499518768046, "grad_norm": 1.0517950057983398, "learning_rate": 0.00017421930656742122, "loss": 1.1003, "step": 12155 }, { "epoch": 0.46814244465832533, "grad_norm": 1.410630226135254, "learning_rate": 0.00017419903700976924, "loss": 1.2722, "step": 12160 }, { "epoch": 0.468334937439846, "grad_norm": 1.5544359683990479, "learning_rate": 0.00017417876066710682, "loss": 1.1961, "step": 12165 }, { "epoch": 0.4685274302213667, "grad_norm": 1.3200881481170654, "learning_rate": 0.00017415847754128817, "loss": 1.0058, "step": 12170 }, { "epoch": 0.4687199230028874, "grad_norm": 1.998949646949768, "learning_rate": 0.00017413818763416795, "loss": 1.1513, "step": 12175 }, { "epoch": 0.46891241578440807, "grad_norm": 1.4105117321014404, "learning_rate": 0.0001741178909476016, "loss": 1.2993, "step": 12180 }, { "epoch": 0.4691049085659288, "grad_norm": 1.4521151781082153, "learning_rate": 0.00017409758748344515, "loss": 1.1659, "step": 12185 }, { "epoch": 0.46929740134744946, "grad_norm": 1.3822886943817139, "learning_rate": 0.00017407727724355515, "loss": 1.3419, "step": 12190 }, { "epoch": 0.4694898941289702, "grad_norm": 1.5602283477783203, "learning_rate": 0.00017405696022978885, "loss": 1.1506, "step": 12195 }, { "epoch": 0.46968238691049086, "grad_norm": 1.2674669027328491, "learning_rate": 0.00017403663644400413, "loss": 1.2992, "step": 12200 }, { "epoch": 0.4698748796920115, "grad_norm": 1.6091759204864502, "learning_rate": 0.00017401630588805947, "loss": 1.1105, "step": 12205 }, { "epoch": 0.47006737247353225, "grad_norm": 1.591635823249817, "learning_rate": 0.00017399596856381395, "loss": 1.2884, "step": 12210 }, { "epoch": 0.4702598652550529, "grad_norm": 1.5781102180480957, "learning_rate": 0.00017397562447312725, "loss": 1.1476, "step": 12215 }, { "epoch": 0.47045235803657365, "grad_norm": 1.4029310941696167, "learning_rate": 0.00017395527361785976, "loss": 1.4271, "step": 12220 }, { "epoch": 0.4706448508180943, "grad_norm": 1.8287990093231201, "learning_rate": 0.0001739349159998724, "loss": 1.2079, "step": 12225 }, { "epoch": 0.470837343599615, "grad_norm": 0.9693268537521362, "learning_rate": 0.00017391455162102677, "loss": 1.2341, "step": 12230 }, { "epoch": 0.4710298363811357, "grad_norm": 1.4181095361709595, "learning_rate": 0.00017389418048318502, "loss": 1.2796, "step": 12235 }, { "epoch": 0.4712223291626564, "grad_norm": 1.9247058629989624, "learning_rate": 0.00017387380258820993, "loss": 1.1858, "step": 12240 }, { "epoch": 0.4714148219441771, "grad_norm": 1.0236104726791382, "learning_rate": 0.00017385341793796502, "loss": 1.1713, "step": 12245 }, { "epoch": 0.4716073147256978, "grad_norm": 1.0250846147537231, "learning_rate": 0.00017383302653431427, "loss": 1.3036, "step": 12250 }, { "epoch": 0.4717998075072185, "grad_norm": 1.1760774850845337, "learning_rate": 0.00017381262837912228, "loss": 1.1779, "step": 12255 }, { "epoch": 0.4719923002887392, "grad_norm": 1.6482713222503662, "learning_rate": 0.00017379222347425446, "loss": 1.151, "step": 12260 }, { "epoch": 0.47218479307025985, "grad_norm": 1.3430352210998535, "learning_rate": 0.00017377181182157657, "loss": 1.3512, "step": 12265 }, { "epoch": 0.47237728585178057, "grad_norm": 2.4042775630950928, "learning_rate": 0.00017375139342295522, "loss": 1.3002, "step": 12270 }, { "epoch": 0.47256977863330124, "grad_norm": 0.967472493648529, "learning_rate": 0.00017373096828025752, "loss": 1.0813, "step": 12275 }, { "epoch": 0.47276227141482197, "grad_norm": 1.9774664640426636, "learning_rate": 0.00017371053639535117, "loss": 1.2232, "step": 12280 }, { "epoch": 0.47295476419634264, "grad_norm": 1.2525962591171265, "learning_rate": 0.00017369009777010454, "loss": 1.3974, "step": 12285 }, { "epoch": 0.4731472569778633, "grad_norm": 2.268892765045166, "learning_rate": 0.00017366965240638664, "loss": 1.2812, "step": 12290 }, { "epoch": 0.47333974975938403, "grad_norm": 1.143028974533081, "learning_rate": 0.000173649200306067, "loss": 1.2017, "step": 12295 }, { "epoch": 0.4735322425409047, "grad_norm": 0.9833802580833435, "learning_rate": 0.00017362874147101596, "loss": 1.1669, "step": 12300 }, { "epoch": 0.4737247353224254, "grad_norm": 1.5986253023147583, "learning_rate": 0.0001736082759031042, "loss": 1.2275, "step": 12305 }, { "epoch": 0.4739172281039461, "grad_norm": 1.8394620418548584, "learning_rate": 0.0001735878036042032, "loss": 1.1077, "step": 12310 }, { "epoch": 0.47410972088546677, "grad_norm": 2.2321078777313232, "learning_rate": 0.00017356732457618506, "loss": 1.1385, "step": 12315 }, { "epoch": 0.4743022136669875, "grad_norm": 1.2479119300842285, "learning_rate": 0.00017354683882092245, "loss": 1.2189, "step": 12320 }, { "epoch": 0.47449470644850816, "grad_norm": 1.6812646389007568, "learning_rate": 0.0001735263463402886, "loss": 1.3836, "step": 12325 }, { "epoch": 0.4746871992300289, "grad_norm": 1.4916552305221558, "learning_rate": 0.00017350584713615746, "loss": 1.1306, "step": 12330 }, { "epoch": 0.47487969201154956, "grad_norm": 1.7067712545394897, "learning_rate": 0.00017348534121040354, "loss": 1.3352, "step": 12335 }, { "epoch": 0.4750721847930703, "grad_norm": 1.1849184036254883, "learning_rate": 0.00017346482856490196, "loss": 0.9746, "step": 12340 }, { "epoch": 0.47526467757459095, "grad_norm": 1.700038194656372, "learning_rate": 0.00017344430920152845, "loss": 1.3462, "step": 12345 }, { "epoch": 0.4754571703561116, "grad_norm": 1.4579262733459473, "learning_rate": 0.0001734237831221594, "loss": 1.2296, "step": 12350 }, { "epoch": 0.47564966313763235, "grad_norm": 1.230469822883606, "learning_rate": 0.00017340325032867178, "loss": 1.1615, "step": 12355 }, { "epoch": 0.475842155919153, "grad_norm": 1.4839364290237427, "learning_rate": 0.00017338271082294315, "loss": 1.2143, "step": 12360 }, { "epoch": 0.47603464870067375, "grad_norm": 0.8386423587799072, "learning_rate": 0.00017336216460685173, "loss": 1.1173, "step": 12365 }, { "epoch": 0.4762271414821944, "grad_norm": 1.9203957319259644, "learning_rate": 0.00017334161168227634, "loss": 1.2371, "step": 12370 }, { "epoch": 0.4764196342637151, "grad_norm": 1.752314567565918, "learning_rate": 0.00017332105205109641, "loss": 1.1022, "step": 12375 }, { "epoch": 0.4766121270452358, "grad_norm": 1.2998472452163696, "learning_rate": 0.00017330048571519198, "loss": 1.3008, "step": 12380 }, { "epoch": 0.4768046198267565, "grad_norm": 1.8506637811660767, "learning_rate": 0.0001732799126764437, "loss": 1.0814, "step": 12385 }, { "epoch": 0.4769971126082772, "grad_norm": 1.4652866125106812, "learning_rate": 0.00017325933293673283, "loss": 1.3528, "step": 12390 }, { "epoch": 0.4771896053897979, "grad_norm": 1.0838465690612793, "learning_rate": 0.00017323874649794127, "loss": 1.1435, "step": 12395 }, { "epoch": 0.4773820981713186, "grad_norm": 1.1437288522720337, "learning_rate": 0.0001732181533619515, "loss": 1.2403, "step": 12400 }, { "epoch": 0.4775745909528393, "grad_norm": 1.5026469230651855, "learning_rate": 0.00017319755353064665, "loss": 1.3211, "step": 12405 }, { "epoch": 0.47776708373435994, "grad_norm": 1.477759838104248, "learning_rate": 0.00017317694700591041, "loss": 1.062, "step": 12410 }, { "epoch": 0.47795957651588067, "grad_norm": 1.6099724769592285, "learning_rate": 0.00017315633378962712, "loss": 1.322, "step": 12415 }, { "epoch": 0.47815206929740134, "grad_norm": 1.2413129806518555, "learning_rate": 0.00017313571388368173, "loss": 1.3106, "step": 12420 }, { "epoch": 0.47834456207892206, "grad_norm": 1.2218198776245117, "learning_rate": 0.00017311508728995976, "loss": 1.3899, "step": 12425 }, { "epoch": 0.47853705486044273, "grad_norm": 1.337332844734192, "learning_rate": 0.0001730944540103474, "loss": 1.2143, "step": 12430 }, { "epoch": 0.4787295476419634, "grad_norm": 1.132523775100708, "learning_rate": 0.00017307381404673143, "loss": 1.2243, "step": 12435 }, { "epoch": 0.47892204042348413, "grad_norm": 1.481467366218567, "learning_rate": 0.00017305316740099928, "loss": 1.1563, "step": 12440 }, { "epoch": 0.4791145332050048, "grad_norm": 1.3262776136398315, "learning_rate": 0.00017303251407503885, "loss": 1.1995, "step": 12445 }, { "epoch": 0.4793070259865255, "grad_norm": 1.8554911613464355, "learning_rate": 0.00017301185407073884, "loss": 1.2679, "step": 12450 }, { "epoch": 0.4794995187680462, "grad_norm": 1.5468156337738037, "learning_rate": 0.0001729911873899884, "loss": 1.1839, "step": 12455 }, { "epoch": 0.47969201154956687, "grad_norm": 1.1333458423614502, "learning_rate": 0.00017297051403467745, "loss": 1.2927, "step": 12460 }, { "epoch": 0.4798845043310876, "grad_norm": 1.5019558668136597, "learning_rate": 0.00017294983400669632, "loss": 1.1634, "step": 12465 }, { "epoch": 0.48007699711260826, "grad_norm": 0.9122928977012634, "learning_rate": 0.00017292914730793614, "loss": 1.0842, "step": 12470 }, { "epoch": 0.480269489894129, "grad_norm": 3.623866558074951, "learning_rate": 0.00017290845394028853, "loss": 1.2303, "step": 12475 }, { "epoch": 0.48046198267564966, "grad_norm": 1.1163458824157715, "learning_rate": 0.0001728877539056458, "loss": 1.1877, "step": 12480 }, { "epoch": 0.4806544754571704, "grad_norm": 1.1737778186798096, "learning_rate": 0.00017286704720590083, "loss": 1.2183, "step": 12485 }, { "epoch": 0.48084696823869105, "grad_norm": 1.0381931066513062, "learning_rate": 0.0001728463338429471, "loss": 1.103, "step": 12490 }, { "epoch": 0.4810394610202117, "grad_norm": 1.4400640726089478, "learning_rate": 0.00017282561381867865, "loss": 1.0941, "step": 12495 }, { "epoch": 0.48123195380173245, "grad_norm": 1.774886131286621, "learning_rate": 0.00017280488713499029, "loss": 1.2161, "step": 12500 }, { "epoch": 0.4814244465832531, "grad_norm": 1.680755376815796, "learning_rate": 0.00017278415379377724, "loss": 1.2248, "step": 12505 }, { "epoch": 0.48161693936477384, "grad_norm": 1.0955753326416016, "learning_rate": 0.00017276341379693553, "loss": 1.1558, "step": 12510 }, { "epoch": 0.4818094321462945, "grad_norm": 1.9817503690719604, "learning_rate": 0.00017274266714636163, "loss": 0.9682, "step": 12515 }, { "epoch": 0.4820019249278152, "grad_norm": 1.2484976053237915, "learning_rate": 0.00017272191384395266, "loss": 1.1304, "step": 12520 }, { "epoch": 0.4821944177093359, "grad_norm": 1.263295292854309, "learning_rate": 0.00017270115389160645, "loss": 1.1851, "step": 12525 }, { "epoch": 0.4823869104908566, "grad_norm": 1.749971628189087, "learning_rate": 0.00017268038729122126, "loss": 1.2665, "step": 12530 }, { "epoch": 0.4825794032723773, "grad_norm": 2.1695139408111572, "learning_rate": 0.0001726596140446962, "loss": 1.2351, "step": 12535 }, { "epoch": 0.482771896053898, "grad_norm": 1.8199032545089722, "learning_rate": 0.00017263883415393069, "loss": 1.0922, "step": 12540 }, { "epoch": 0.48296438883541865, "grad_norm": 1.5746350288391113, "learning_rate": 0.00017261804762082501, "loss": 1.2759, "step": 12545 }, { "epoch": 0.48315688161693937, "grad_norm": 1.1286424398422241, "learning_rate": 0.00017259725444727995, "loss": 1.2286, "step": 12550 }, { "epoch": 0.48334937439846004, "grad_norm": 1.1256860494613647, "learning_rate": 0.00017257645463519686, "loss": 1.1408, "step": 12555 }, { "epoch": 0.48354186717998077, "grad_norm": 0.907913863658905, "learning_rate": 0.00017255564818647776, "loss": 1.1889, "step": 12560 }, { "epoch": 0.48373435996150144, "grad_norm": 1.0480241775512695, "learning_rate": 0.0001725348351030253, "loss": 1.0954, "step": 12565 }, { "epoch": 0.48392685274302216, "grad_norm": 1.4278559684753418, "learning_rate": 0.0001725140153867426, "loss": 1.1367, "step": 12570 }, { "epoch": 0.48411934552454283, "grad_norm": 0.9501696825027466, "learning_rate": 0.00017249318903953364, "loss": 1.2135, "step": 12575 }, { "epoch": 0.4843118383060635, "grad_norm": 1.8626338243484497, "learning_rate": 0.00017247235606330271, "loss": 1.2106, "step": 12580 }, { "epoch": 0.48450433108758423, "grad_norm": 1.3876720666885376, "learning_rate": 0.00017245151645995494, "loss": 1.3711, "step": 12585 }, { "epoch": 0.4846968238691049, "grad_norm": 1.6943193674087524, "learning_rate": 0.0001724306702313959, "loss": 1.117, "step": 12590 }, { "epoch": 0.4848893166506256, "grad_norm": 0.9052426218986511, "learning_rate": 0.00017240981737953192, "loss": 1.2578, "step": 12595 }, { "epoch": 0.4850818094321463, "grad_norm": 0.8325613141059875, "learning_rate": 0.00017238895790626976, "loss": 1.1599, "step": 12600 }, { "epoch": 0.48527430221366696, "grad_norm": 1.2736178636550903, "learning_rate": 0.00017236809181351697, "loss": 1.266, "step": 12605 }, { "epoch": 0.4854667949951877, "grad_norm": 1.8093243837356567, "learning_rate": 0.00017234721910318158, "loss": 1.2076, "step": 12610 }, { "epoch": 0.48565928777670836, "grad_norm": 1.7740625143051147, "learning_rate": 0.00017232633977717226, "loss": 1.2431, "step": 12615 }, { "epoch": 0.4858517805582291, "grad_norm": 0.83774334192276, "learning_rate": 0.00017230545383739826, "loss": 1.1987, "step": 12620 }, { "epoch": 0.48604427333974976, "grad_norm": 0.987789511680603, "learning_rate": 0.0001722845612857695, "loss": 1.0, "step": 12625 }, { "epoch": 0.4862367661212704, "grad_norm": 1.1741127967834473, "learning_rate": 0.00017226366212419645, "loss": 1.1961, "step": 12630 }, { "epoch": 0.48642925890279115, "grad_norm": 1.9914991855621338, "learning_rate": 0.00017224275635459023, "loss": 1.3657, "step": 12635 }, { "epoch": 0.4866217516843118, "grad_norm": 1.187045693397522, "learning_rate": 0.00017222184397886245, "loss": 1.1048, "step": 12640 }, { "epoch": 0.48681424446583255, "grad_norm": 1.1656368970870972, "learning_rate": 0.0001722009249989255, "loss": 1.1226, "step": 12645 }, { "epoch": 0.4870067372473532, "grad_norm": 1.050398349761963, "learning_rate": 0.00017217999941669226, "loss": 1.2018, "step": 12650 }, { "epoch": 0.48719923002887394, "grad_norm": 1.4217538833618164, "learning_rate": 0.00017215906723407618, "loss": 1.0039, "step": 12655 }, { "epoch": 0.4873917228103946, "grad_norm": 1.1657346487045288, "learning_rate": 0.00017213812845299145, "loss": 1.3153, "step": 12660 }, { "epoch": 0.4875842155919153, "grad_norm": 1.0622743368148804, "learning_rate": 0.0001721171830753527, "loss": 1.1889, "step": 12665 }, { "epoch": 0.487776708373436, "grad_norm": 1.3200461864471436, "learning_rate": 0.00017209623110307534, "loss": 1.2643, "step": 12670 }, { "epoch": 0.4879692011549567, "grad_norm": 1.3201463222503662, "learning_rate": 0.0001720752725380752, "loss": 1.1021, "step": 12675 }, { "epoch": 0.4881616939364774, "grad_norm": 1.3110108375549316, "learning_rate": 0.00017205430738226885, "loss": 1.297, "step": 12680 }, { "epoch": 0.4883541867179981, "grad_norm": 1.9574589729309082, "learning_rate": 0.00017203333563757344, "loss": 1.0589, "step": 12685 }, { "epoch": 0.48854667949951874, "grad_norm": 2.1387152671813965, "learning_rate": 0.00017201235730590663, "loss": 1.1929, "step": 12690 }, { "epoch": 0.48873917228103947, "grad_norm": 1.007409691810608, "learning_rate": 0.0001719913723891868, "loss": 1.1213, "step": 12695 }, { "epoch": 0.48893166506256014, "grad_norm": 0.9808946847915649, "learning_rate": 0.00017197038088933285, "loss": 1.2553, "step": 12700 }, { "epoch": 0.48912415784408086, "grad_norm": 1.276231050491333, "learning_rate": 0.00017194938280826433, "loss": 1.2783, "step": 12705 }, { "epoch": 0.48931665062560153, "grad_norm": 2.288778305053711, "learning_rate": 0.0001719283781479014, "loss": 1.2736, "step": 12710 }, { "epoch": 0.48950914340712226, "grad_norm": 1.3838545083999634, "learning_rate": 0.00017190736691016475, "loss": 1.1101, "step": 12715 }, { "epoch": 0.48970163618864293, "grad_norm": 1.5680103302001953, "learning_rate": 0.00017188634909697572, "loss": 1.3685, "step": 12720 }, { "epoch": 0.4898941289701636, "grad_norm": 0.9690240621566772, "learning_rate": 0.00017186532471025626, "loss": 1.1677, "step": 12725 }, { "epoch": 0.4900866217516843, "grad_norm": 1.617100477218628, "learning_rate": 0.00017184429375192894, "loss": 1.3773, "step": 12730 }, { "epoch": 0.490279114533205, "grad_norm": 1.751895785331726, "learning_rate": 0.0001718232562239169, "loss": 1.1795, "step": 12735 }, { "epoch": 0.4904716073147257, "grad_norm": 1.1766438484191895, "learning_rate": 0.00017180221212814386, "loss": 1.0915, "step": 12740 }, { "epoch": 0.4906641000962464, "grad_norm": 1.384392499923706, "learning_rate": 0.00017178116146653415, "loss": 1.2823, "step": 12745 }, { "epoch": 0.49085659287776706, "grad_norm": 1.2379616498947144, "learning_rate": 0.00017176010424101274, "loss": 1.1182, "step": 12750 }, { "epoch": 0.4910490856592878, "grad_norm": 1.0620834827423096, "learning_rate": 0.00017173904045350515, "loss": 1.0414, "step": 12755 }, { "epoch": 0.49124157844080846, "grad_norm": 1.2448549270629883, "learning_rate": 0.00017171797010593755, "loss": 1.2487, "step": 12760 }, { "epoch": 0.4914340712223292, "grad_norm": 1.1862956285476685, "learning_rate": 0.00017169689320023666, "loss": 1.2117, "step": 12765 }, { "epoch": 0.49162656400384985, "grad_norm": 0.8380292057991028, "learning_rate": 0.00017167580973832984, "loss": 1.1396, "step": 12770 }, { "epoch": 0.4918190567853705, "grad_norm": 1.807305097579956, "learning_rate": 0.00017165471972214506, "loss": 1.2316, "step": 12775 }, { "epoch": 0.49201154956689125, "grad_norm": 1.5265247821807861, "learning_rate": 0.00017163362315361077, "loss": 1.1191, "step": 12780 }, { "epoch": 0.4922040423484119, "grad_norm": 1.4569288492202759, "learning_rate": 0.00017161252003465626, "loss": 1.3273, "step": 12785 }, { "epoch": 0.49239653512993264, "grad_norm": 1.0343568325042725, "learning_rate": 0.00017159141036721112, "loss": 1.3457, "step": 12790 }, { "epoch": 0.4925890279114533, "grad_norm": 1.4867749214172363, "learning_rate": 0.00017157029415320577, "loss": 1.1783, "step": 12795 }, { "epoch": 0.49278152069297404, "grad_norm": 1.0775165557861328, "learning_rate": 0.0001715491713945711, "loss": 1.2355, "step": 12800 }, { "epoch": 0.4929740134744947, "grad_norm": 1.1424553394317627, "learning_rate": 0.0001715280420932387, "loss": 1.1305, "step": 12805 }, { "epoch": 0.4931665062560154, "grad_norm": 2.403656482696533, "learning_rate": 0.00017150690625114065, "loss": 1.2118, "step": 12810 }, { "epoch": 0.4933589990375361, "grad_norm": 1.2673543691635132, "learning_rate": 0.00017148576387020976, "loss": 1.163, "step": 12815 }, { "epoch": 0.4935514918190568, "grad_norm": 1.4545459747314453, "learning_rate": 0.0001714646149523793, "loss": 1.1661, "step": 12820 }, { "epoch": 0.4937439846005775, "grad_norm": 1.8716140985488892, "learning_rate": 0.0001714434594995832, "loss": 1.0518, "step": 12825 }, { "epoch": 0.49393647738209817, "grad_norm": 1.4617652893066406, "learning_rate": 0.000171422297513756, "loss": 0.9978, "step": 12830 }, { "epoch": 0.49412897016361884, "grad_norm": 1.8650813102722168, "learning_rate": 0.00017140112899683284, "loss": 1.0077, "step": 12835 }, { "epoch": 0.49432146294513957, "grad_norm": 1.4080079793930054, "learning_rate": 0.00017137995395074938, "loss": 1.2906, "step": 12840 }, { "epoch": 0.49451395572666024, "grad_norm": 1.2144896984100342, "learning_rate": 0.000171358772377442, "loss": 1.1323, "step": 12845 }, { "epoch": 0.49470644850818096, "grad_norm": 1.3294404745101929, "learning_rate": 0.0001713375842788476, "loss": 1.1987, "step": 12850 }, { "epoch": 0.49489894128970163, "grad_norm": 1.3663264513015747, "learning_rate": 0.00017131638965690372, "loss": 1.224, "step": 12855 }, { "epoch": 0.4950914340712223, "grad_norm": 2.325491428375244, "learning_rate": 0.0001712951885135484, "loss": 1.2449, "step": 12860 }, { "epoch": 0.49528392685274303, "grad_norm": 1.3226628303527832, "learning_rate": 0.00017127398085072039, "loss": 1.2321, "step": 12865 }, { "epoch": 0.4954764196342637, "grad_norm": 1.309049367904663, "learning_rate": 0.00017125276667035895, "loss": 1.1242, "step": 12870 }, { "epoch": 0.4956689124157844, "grad_norm": 2.189549446105957, "learning_rate": 0.00017123154597440402, "loss": 1.1479, "step": 12875 }, { "epoch": 0.4958614051973051, "grad_norm": 1.258787989616394, "learning_rate": 0.00017121031876479606, "loss": 1.3375, "step": 12880 }, { "epoch": 0.4960538979788258, "grad_norm": 1.4555957317352295, "learning_rate": 0.00017118908504347623, "loss": 1.29, "step": 12885 }, { "epoch": 0.4962463907603465, "grad_norm": 1.4220309257507324, "learning_rate": 0.00017116784481238613, "loss": 1.2438, "step": 12890 }, { "epoch": 0.49643888354186716, "grad_norm": 1.1141269207000732, "learning_rate": 0.00017114659807346803, "loss": 1.1286, "step": 12895 }, { "epoch": 0.4966313763233879, "grad_norm": 2.7541897296905518, "learning_rate": 0.00017112534482866486, "loss": 1.2506, "step": 12900 }, { "epoch": 0.49682386910490856, "grad_norm": 1.8478270769119263, "learning_rate": 0.00017110408507992007, "loss": 1.2337, "step": 12905 }, { "epoch": 0.4970163618864293, "grad_norm": 1.3013496398925781, "learning_rate": 0.00017108281882917775, "loss": 1.109, "step": 12910 }, { "epoch": 0.49720885466794995, "grad_norm": 1.6363266706466675, "learning_rate": 0.00017106154607838249, "loss": 1.2546, "step": 12915 }, { "epoch": 0.4974013474494706, "grad_norm": 1.4996516704559326, "learning_rate": 0.0001710402668294796, "loss": 1.3066, "step": 12920 }, { "epoch": 0.49759384023099135, "grad_norm": 1.2411632537841797, "learning_rate": 0.0001710189810844149, "loss": 1.2678, "step": 12925 }, { "epoch": 0.497786333012512, "grad_norm": 1.197771430015564, "learning_rate": 0.00017099768884513484, "loss": 1.1069, "step": 12930 }, { "epoch": 0.49797882579403274, "grad_norm": 1.1568015813827515, "learning_rate": 0.00017097639011358644, "loss": 1.1863, "step": 12935 }, { "epoch": 0.4981713185755534, "grad_norm": 1.8848886489868164, "learning_rate": 0.00017095508489171736, "loss": 1.3294, "step": 12940 }, { "epoch": 0.4983638113570741, "grad_norm": 1.4993650913238525, "learning_rate": 0.00017093377318147578, "loss": 1.1768, "step": 12945 }, { "epoch": 0.4985563041385948, "grad_norm": 1.1212975978851318, "learning_rate": 0.00017091245498481055, "loss": 1.2018, "step": 12950 }, { "epoch": 0.4987487969201155, "grad_norm": 1.086147665977478, "learning_rate": 0.00017089113030367107, "loss": 1.3022, "step": 12955 }, { "epoch": 0.4989412897016362, "grad_norm": 2.185974359512329, "learning_rate": 0.00017086979914000732, "loss": 1.239, "step": 12960 }, { "epoch": 0.4991337824831569, "grad_norm": 1.0493237972259521, "learning_rate": 0.00017084846149576993, "loss": 1.1614, "step": 12965 }, { "epoch": 0.4993262752646776, "grad_norm": 2.034449815750122, "learning_rate": 0.00017082711737291005, "loss": 1.2636, "step": 12970 }, { "epoch": 0.49951876804619827, "grad_norm": 1.6736334562301636, "learning_rate": 0.00017080576677337944, "loss": 1.1721, "step": 12975 }, { "epoch": 0.49971126082771894, "grad_norm": 1.8189449310302734, "learning_rate": 0.00017078440969913055, "loss": 1.3739, "step": 12980 }, { "epoch": 0.49990375360923966, "grad_norm": 1.500243067741394, "learning_rate": 0.00017076304615211627, "loss": 1.0446, "step": 12985 }, { "epoch": 0.5000962463907603, "grad_norm": 1.2671639919281006, "learning_rate": 0.0001707416761342902, "loss": 1.1841, "step": 12990 }, { "epoch": 0.500288739172281, "grad_norm": 1.3602843284606934, "learning_rate": 0.00017072029964760644, "loss": 1.3311, "step": 12995 }, { "epoch": 0.5004812319538018, "grad_norm": 1.106224775314331, "learning_rate": 0.00017069891669401982, "loss": 1.1083, "step": 13000 }, { "epoch": 0.5006737247353225, "grad_norm": 1.3517072200775146, "learning_rate": 0.00017067752727548555, "loss": 1.2598, "step": 13005 }, { "epoch": 0.5008662175168431, "grad_norm": 1.1175580024719238, "learning_rate": 0.00017065613139395958, "loss": 1.0876, "step": 13010 }, { "epoch": 0.5010587102983638, "grad_norm": 1.8314218521118164, "learning_rate": 0.00017063472905139854, "loss": 1.3194, "step": 13015 }, { "epoch": 0.5012512030798845, "grad_norm": 0.7892528176307678, "learning_rate": 0.00017061332024975937, "loss": 1.0172, "step": 13020 }, { "epoch": 0.5014436958614052, "grad_norm": 0.8713880777359009, "learning_rate": 0.00017059190499099986, "loss": 1.0775, "step": 13025 }, { "epoch": 0.5016361886429259, "grad_norm": 2.4726779460906982, "learning_rate": 0.00017057048327707826, "loss": 1.3351, "step": 13030 }, { "epoch": 0.5018286814244466, "grad_norm": 1.1800824403762817, "learning_rate": 0.00017054905510995342, "loss": 1.3102, "step": 13035 }, { "epoch": 0.5020211742059673, "grad_norm": 1.583617091178894, "learning_rate": 0.0001705276204915849, "loss": 1.3633, "step": 13040 }, { "epoch": 0.5022136669874879, "grad_norm": 2.0497727394104004, "learning_rate": 0.00017050617942393264, "loss": 1.2055, "step": 13045 }, { "epoch": 0.5024061597690087, "grad_norm": 1.775793433189392, "learning_rate": 0.00017048473190895734, "loss": 1.1747, "step": 13050 }, { "epoch": 0.5025986525505294, "grad_norm": 1.8494744300842285, "learning_rate": 0.00017046327794862024, "loss": 1.2159, "step": 13055 }, { "epoch": 0.50279114533205, "grad_norm": 1.6188912391662598, "learning_rate": 0.00017044181754488315, "loss": 1.156, "step": 13060 }, { "epoch": 0.5029836381135707, "grad_norm": 1.575838565826416, "learning_rate": 0.00017042035069970846, "loss": 1.2103, "step": 13065 }, { "epoch": 0.5031761308950914, "grad_norm": 1.7594157457351685, "learning_rate": 0.0001703988774150592, "loss": 1.4271, "step": 13070 }, { "epoch": 0.5033686236766122, "grad_norm": 2.066418170928955, "learning_rate": 0.00017037739769289894, "loss": 1.215, "step": 13075 }, { "epoch": 0.5035611164581328, "grad_norm": 1.8343390226364136, "learning_rate": 0.0001703559115351919, "loss": 1.3318, "step": 13080 }, { "epoch": 0.5037536092396535, "grad_norm": 1.177186369895935, "learning_rate": 0.00017033441894390278, "loss": 1.2845, "step": 13085 }, { "epoch": 0.5039461020211742, "grad_norm": 1.7650407552719116, "learning_rate": 0.000170312919920997, "loss": 1.2133, "step": 13090 }, { "epoch": 0.5041385948026949, "grad_norm": 1.0483806133270264, "learning_rate": 0.00017029141446844043, "loss": 1.2309, "step": 13095 }, { "epoch": 0.5043310875842156, "grad_norm": 1.1729573011398315, "learning_rate": 0.00017026990258819968, "loss": 1.2975, "step": 13100 }, { "epoch": 0.5045235803657363, "grad_norm": 1.8557977676391602, "learning_rate": 0.00017024838428224184, "loss": 1.1332, "step": 13105 }, { "epoch": 0.504716073147257, "grad_norm": 2.2607064247131348, "learning_rate": 0.00017022685955253458, "loss": 1.186, "step": 13110 }, { "epoch": 0.5049085659287776, "grad_norm": 1.0992141962051392, "learning_rate": 0.00017020532840104625, "loss": 1.0708, "step": 13115 }, { "epoch": 0.5051010587102983, "grad_norm": 0.9550696611404419, "learning_rate": 0.0001701837908297457, "loss": 1.2069, "step": 13120 }, { "epoch": 0.5052935514918191, "grad_norm": 1.9301509857177734, "learning_rate": 0.00017016224684060242, "loss": 1.1152, "step": 13125 }, { "epoch": 0.5054860442733398, "grad_norm": 1.2657769918441772, "learning_rate": 0.0001701406964355864, "loss": 1.1028, "step": 13130 }, { "epoch": 0.5056785370548604, "grad_norm": 1.549902081489563, "learning_rate": 0.00017011913961666837, "loss": 1.1677, "step": 13135 }, { "epoch": 0.5058710298363811, "grad_norm": 1.5015727281570435, "learning_rate": 0.00017009757638581952, "loss": 1.1799, "step": 13140 }, { "epoch": 0.5060635226179019, "grad_norm": 1.206145167350769, "learning_rate": 0.00017007600674501166, "loss": 1.131, "step": 13145 }, { "epoch": 0.5062560153994226, "grad_norm": 1.1034317016601562, "learning_rate": 0.00017005443069621716, "loss": 1.0504, "step": 13150 }, { "epoch": 0.5064485081809432, "grad_norm": 1.0830001831054688, "learning_rate": 0.00017003284824140908, "loss": 1.1653, "step": 13155 }, { "epoch": 0.5066410009624639, "grad_norm": 1.8119686841964722, "learning_rate": 0.00017001125938256094, "loss": 1.1646, "step": 13160 }, { "epoch": 0.5068334937439846, "grad_norm": 1.1078890562057495, "learning_rate": 0.00016998966412164692, "loss": 1.3281, "step": 13165 }, { "epoch": 0.5070259865255053, "grad_norm": 2.1965198516845703, "learning_rate": 0.00016996806246064174, "loss": 1.1042, "step": 13170 }, { "epoch": 0.507218479307026, "grad_norm": 1.3997282981872559, "learning_rate": 0.00016994645440152075, "loss": 1.1662, "step": 13175 }, { "epoch": 0.5074109720885467, "grad_norm": 1.2493458986282349, "learning_rate": 0.00016992483994625985, "loss": 1.1594, "step": 13180 }, { "epoch": 0.5076034648700674, "grad_norm": 0.8307852745056152, "learning_rate": 0.00016990321909683557, "loss": 1.1701, "step": 13185 }, { "epoch": 0.507795957651588, "grad_norm": 1.1798492670059204, "learning_rate": 0.00016988159185522497, "loss": 1.2217, "step": 13190 }, { "epoch": 0.5079884504331088, "grad_norm": 2.131786823272705, "learning_rate": 0.00016985995822340567, "loss": 1.277, "step": 13195 }, { "epoch": 0.5081809432146295, "grad_norm": 1.0258443355560303, "learning_rate": 0.00016983831820335603, "loss": 1.0954, "step": 13200 }, { "epoch": 0.5083734359961501, "grad_norm": 2.4005777835845947, "learning_rate": 0.0001698166717970548, "loss": 1.3484, "step": 13205 }, { "epoch": 0.5085659287776708, "grad_norm": 1.3329745531082153, "learning_rate": 0.00016979501900648143, "loss": 1.2548, "step": 13210 }, { "epoch": 0.5087584215591915, "grad_norm": 1.2792582511901855, "learning_rate": 0.00016977335983361594, "loss": 1.1056, "step": 13215 }, { "epoch": 0.5089509143407123, "grad_norm": 1.1146180629730225, "learning_rate": 0.00016975169428043888, "loss": 1.1728, "step": 13220 }, { "epoch": 0.5091434071222329, "grad_norm": 1.155003309249878, "learning_rate": 0.0001697300223489314, "loss": 1.2469, "step": 13225 }, { "epoch": 0.5093358999037536, "grad_norm": 1.8456053733825684, "learning_rate": 0.00016970834404107535, "loss": 1.1515, "step": 13230 }, { "epoch": 0.5095283926852743, "grad_norm": 1.3863856792449951, "learning_rate": 0.000169686659358853, "loss": 1.2561, "step": 13235 }, { "epoch": 0.509720885466795, "grad_norm": 2.099985361099243, "learning_rate": 0.00016966496830424728, "loss": 1.2639, "step": 13240 }, { "epoch": 0.5099133782483157, "grad_norm": 1.4132083654403687, "learning_rate": 0.0001696432708792417, "loss": 0.9859, "step": 13245 }, { "epoch": 0.5101058710298364, "grad_norm": 1.0421473979949951, "learning_rate": 0.00016962156708582037, "loss": 1.1239, "step": 13250 }, { "epoch": 0.5102983638113571, "grad_norm": 1.4971591234207153, "learning_rate": 0.0001695998569259679, "loss": 1.183, "step": 13255 }, { "epoch": 0.5104908565928777, "grad_norm": 1.7850632667541504, "learning_rate": 0.00016957814040166955, "loss": 1.2342, "step": 13260 }, { "epoch": 0.5106833493743984, "grad_norm": 2.817624092102051, "learning_rate": 0.0001695564175149112, "loss": 1.1795, "step": 13265 }, { "epoch": 0.5108758421559192, "grad_norm": 1.4107112884521484, "learning_rate": 0.00016953468826767925, "loss": 1.2791, "step": 13270 }, { "epoch": 0.5110683349374399, "grad_norm": 1.4817914962768555, "learning_rate": 0.00016951295266196063, "loss": 1.0816, "step": 13275 }, { "epoch": 0.5112608277189605, "grad_norm": 1.054870367050171, "learning_rate": 0.00016949121069974302, "loss": 1.2114, "step": 13280 }, { "epoch": 0.5114533205004812, "grad_norm": 1.2629690170288086, "learning_rate": 0.00016946946238301453, "loss": 1.1014, "step": 13285 }, { "epoch": 0.5116458132820019, "grad_norm": 0.9189853668212891, "learning_rate": 0.00016944770771376387, "loss": 1.319, "step": 13290 }, { "epoch": 0.5118383060635227, "grad_norm": 1.4326847791671753, "learning_rate": 0.0001694259466939804, "loss": 1.299, "step": 13295 }, { "epoch": 0.5120307988450433, "grad_norm": 1.1833186149597168, "learning_rate": 0.00016940417932565402, "loss": 1.2863, "step": 13300 }, { "epoch": 0.512223291626564, "grad_norm": 1.1329289674758911, "learning_rate": 0.0001693824056107752, "loss": 1.2228, "step": 13305 }, { "epoch": 0.5124157844080847, "grad_norm": 1.2103817462921143, "learning_rate": 0.000169360625551335, "loss": 1.042, "step": 13310 }, { "epoch": 0.5126082771896054, "grad_norm": 1.2664172649383545, "learning_rate": 0.00016933883914932506, "loss": 1.299, "step": 13315 }, { "epoch": 0.5128007699711261, "grad_norm": 1.8509985208511353, "learning_rate": 0.0001693170464067376, "loss": 1.135, "step": 13320 }, { "epoch": 0.5129932627526468, "grad_norm": 2.1004250049591064, "learning_rate": 0.00016929524732556546, "loss": 1.1321, "step": 13325 }, { "epoch": 0.5131857555341675, "grad_norm": 1.1648815870285034, "learning_rate": 0.00016927344190780197, "loss": 1.094, "step": 13330 }, { "epoch": 0.5133782483156881, "grad_norm": 0.9492617249488831, "learning_rate": 0.0001692516301554411, "loss": 1.2055, "step": 13335 }, { "epoch": 0.5135707410972089, "grad_norm": 1.7911789417266846, "learning_rate": 0.00016922981207047742, "loss": 1.1726, "step": 13340 }, { "epoch": 0.5137632338787296, "grad_norm": 1.2055487632751465, "learning_rate": 0.00016920798765490601, "loss": 1.158, "step": 13345 }, { "epoch": 0.5139557266602502, "grad_norm": 1.1120411157608032, "learning_rate": 0.0001691861569107226, "loss": 1.2508, "step": 13350 }, { "epoch": 0.5141482194417709, "grad_norm": 1.1816275119781494, "learning_rate": 0.0001691643198399235, "loss": 1.0294, "step": 13355 }, { "epoch": 0.5143407122232916, "grad_norm": 1.1714962720870972, "learning_rate": 0.00016914247644450546, "loss": 1.2843, "step": 13360 }, { "epoch": 0.5145332050048124, "grad_norm": 2.0812292098999023, "learning_rate": 0.000169120626726466, "loss": 1.284, "step": 13365 }, { "epoch": 0.514725697786333, "grad_norm": 1.7628620862960815, "learning_rate": 0.00016909877068780314, "loss": 1.1104, "step": 13370 }, { "epoch": 0.5149181905678537, "grad_norm": 1.7429643869400024, "learning_rate": 0.0001690769083305154, "loss": 1.1387, "step": 13375 }, { "epoch": 0.5151106833493744, "grad_norm": 2.087916612625122, "learning_rate": 0.00016905503965660196, "loss": 1.2737, "step": 13380 }, { "epoch": 0.515303176130895, "grad_norm": 1.5689221620559692, "learning_rate": 0.00016903316466806265, "loss": 1.145, "step": 13385 }, { "epoch": 0.5154956689124158, "grad_norm": 1.0740375518798828, "learning_rate": 0.0001690112833668977, "loss": 0.9748, "step": 13390 }, { "epoch": 0.5156881616939365, "grad_norm": 1.4595876932144165, "learning_rate": 0.00016898939575510805, "loss": 1.1378, "step": 13395 }, { "epoch": 0.5158806544754572, "grad_norm": 1.9210182428359985, "learning_rate": 0.00016896750183469517, "loss": 1.2581, "step": 13400 }, { "epoch": 0.5160731472569778, "grad_norm": 1.0922927856445312, "learning_rate": 0.00016894560160766117, "loss": 1.1601, "step": 13405 }, { "epoch": 0.5162656400384985, "grad_norm": 2.037611246109009, "learning_rate": 0.00016892369507600855, "loss": 1.2394, "step": 13410 }, { "epoch": 0.5164581328200193, "grad_norm": 1.1577821969985962, "learning_rate": 0.0001689017822417406, "loss": 1.2428, "step": 13415 }, { "epoch": 0.51665062560154, "grad_norm": 1.1762430667877197, "learning_rate": 0.00016887986310686114, "loss": 1.1648, "step": 13420 }, { "epoch": 0.5168431183830606, "grad_norm": 1.8631316423416138, "learning_rate": 0.00016885793767337445, "loss": 1.2288, "step": 13425 }, { "epoch": 0.5170356111645813, "grad_norm": 1.191747784614563, "learning_rate": 0.0001688360059432855, "loss": 1.1287, "step": 13430 }, { "epoch": 0.517228103946102, "grad_norm": 1.092367172241211, "learning_rate": 0.00016881406791859985, "loss": 1.2073, "step": 13435 }, { "epoch": 0.5174205967276228, "grad_norm": 0.9805938601493835, "learning_rate": 0.00016879212360132345, "loss": 1.3199, "step": 13440 }, { "epoch": 0.5176130895091434, "grad_norm": 1.0042074918746948, "learning_rate": 0.00016877017299346314, "loss": 1.1389, "step": 13445 }, { "epoch": 0.5178055822906641, "grad_norm": 1.3087821006774902, "learning_rate": 0.00016874821609702605, "loss": 1.2112, "step": 13450 }, { "epoch": 0.5179980750721848, "grad_norm": 1.4208637475967407, "learning_rate": 0.00016872625291401998, "loss": 1.119, "step": 13455 }, { "epoch": 0.5181905678537055, "grad_norm": 0.9211226105690002, "learning_rate": 0.0001687042834464534, "loss": 1.1458, "step": 13460 }, { "epoch": 0.5183830606352262, "grad_norm": 1.1774996519088745, "learning_rate": 0.00016868230769633518, "loss": 1.195, "step": 13465 }, { "epoch": 0.5185755534167469, "grad_norm": 1.395883321762085, "learning_rate": 0.0001686603256656749, "loss": 1.1633, "step": 13470 }, { "epoch": 0.5187680461982676, "grad_norm": 2.2554938793182373, "learning_rate": 0.00016863833735648268, "loss": 1.1345, "step": 13475 }, { "epoch": 0.5189605389797882, "grad_norm": 1.2396293878555298, "learning_rate": 0.00016861634277076922, "loss": 1.1109, "step": 13480 }, { "epoch": 0.519153031761309, "grad_norm": 1.2292909622192383, "learning_rate": 0.00016859434191054574, "loss": 1.1029, "step": 13485 }, { "epoch": 0.5193455245428297, "grad_norm": 1.145571231842041, "learning_rate": 0.00016857233477782409, "loss": 1.1734, "step": 13490 }, { "epoch": 0.5195380173243503, "grad_norm": 1.7307795286178589, "learning_rate": 0.00016855032137461667, "loss": 1.1476, "step": 13495 }, { "epoch": 0.519730510105871, "grad_norm": 1.611140489578247, "learning_rate": 0.0001685283017029365, "loss": 1.1304, "step": 13500 }, { "epoch": 0.5199230028873917, "grad_norm": 1.3966014385223389, "learning_rate": 0.00016850627576479705, "loss": 1.1231, "step": 13505 }, { "epoch": 0.5201154956689125, "grad_norm": 1.505765676498413, "learning_rate": 0.0001684842435622125, "loss": 1.0741, "step": 13510 }, { "epoch": 0.5203079884504331, "grad_norm": 1.791595220565796, "learning_rate": 0.00016846220509719755, "loss": 1.1928, "step": 13515 }, { "epoch": 0.5205004812319538, "grad_norm": 1.2992479801177979, "learning_rate": 0.00016844016037176744, "loss": 1.1523, "step": 13520 }, { "epoch": 0.5206929740134745, "grad_norm": 1.8747221231460571, "learning_rate": 0.00016841810938793807, "loss": 1.0704, "step": 13525 }, { "epoch": 0.5208854667949951, "grad_norm": 1.3441274166107178, "learning_rate": 0.00016839605214772583, "loss": 1.1979, "step": 13530 }, { "epoch": 0.5210779595765159, "grad_norm": 0.8640159964561462, "learning_rate": 0.0001683739886531477, "loss": 1.1577, "step": 13535 }, { "epoch": 0.5212704523580366, "grad_norm": 1.7198442220687866, "learning_rate": 0.00016835191890622123, "loss": 1.2623, "step": 13540 }, { "epoch": 0.5214629451395573, "grad_norm": 1.2651041746139526, "learning_rate": 0.0001683298429089646, "loss": 1.2428, "step": 13545 }, { "epoch": 0.5216554379210779, "grad_norm": 1.9191710948944092, "learning_rate": 0.00016830776066339642, "loss": 1.2872, "step": 13550 }, { "epoch": 0.5218479307025986, "grad_norm": 1.8098481893539429, "learning_rate": 0.00016828567217153605, "loss": 1.2838, "step": 13555 }, { "epoch": 0.5220404234841194, "grad_norm": 1.732160210609436, "learning_rate": 0.00016826357743540332, "loss": 1.0766, "step": 13560 }, { "epoch": 0.5222329162656401, "grad_norm": 1.4580518007278442, "learning_rate": 0.00016824147645701863, "loss": 1.2825, "step": 13565 }, { "epoch": 0.5224254090471607, "grad_norm": 1.5836480855941772, "learning_rate": 0.000168219369238403, "loss": 1.1772, "step": 13570 }, { "epoch": 0.5226179018286814, "grad_norm": 1.5529143810272217, "learning_rate": 0.00016819725578157794, "loss": 1.2795, "step": 13575 }, { "epoch": 0.5228103946102021, "grad_norm": 1.1405484676361084, "learning_rate": 0.0001681751360885656, "loss": 1.2133, "step": 13580 }, { "epoch": 0.5230028873917228, "grad_norm": 1.0912057161331177, "learning_rate": 0.00016815301016138873, "loss": 1.0493, "step": 13585 }, { "epoch": 0.5231953801732435, "grad_norm": 0.9384201169013977, "learning_rate": 0.0001681308780020705, "loss": 1.1638, "step": 13590 }, { "epoch": 0.5233878729547642, "grad_norm": 1.3467286825180054, "learning_rate": 0.0001681087396126348, "loss": 1.1927, "step": 13595 }, { "epoch": 0.5235803657362849, "grad_norm": 0.9008259773254395, "learning_rate": 0.00016808659499510607, "loss": 1.2158, "step": 13600 }, { "epoch": 0.5237728585178055, "grad_norm": 1.1013727188110352, "learning_rate": 0.00016806444415150927, "loss": 1.1575, "step": 13605 }, { "epoch": 0.5239653512993263, "grad_norm": 1.160654902458191, "learning_rate": 0.00016804228708386992, "loss": 1.1662, "step": 13610 }, { "epoch": 0.524157844080847, "grad_norm": 1.5752032995224, "learning_rate": 0.00016802012379421414, "loss": 1.1596, "step": 13615 }, { "epoch": 0.5243503368623676, "grad_norm": 1.1819881200790405, "learning_rate": 0.00016799795428456865, "loss": 1.1686, "step": 13620 }, { "epoch": 0.5245428296438883, "grad_norm": 0.9841921329498291, "learning_rate": 0.00016797577855696069, "loss": 1.0872, "step": 13625 }, { "epoch": 0.5247353224254091, "grad_norm": 1.2292228937149048, "learning_rate": 0.00016795359661341808, "loss": 1.3943, "step": 13630 }, { "epoch": 0.5249278152069298, "grad_norm": 1.2674068212509155, "learning_rate": 0.0001679314084559692, "loss": 1.232, "step": 13635 }, { "epoch": 0.5251203079884504, "grad_norm": 1.1942312717437744, "learning_rate": 0.00016790921408664302, "loss": 1.2223, "step": 13640 }, { "epoch": 0.5253128007699711, "grad_norm": 1.5753337144851685, "learning_rate": 0.00016788701350746907, "loss": 1.2936, "step": 13645 }, { "epoch": 0.5255052935514918, "grad_norm": 1.1031461954116821, "learning_rate": 0.00016786480672047744, "loss": 1.2651, "step": 13650 }, { "epoch": 0.5256977863330126, "grad_norm": 3.8325674533843994, "learning_rate": 0.00016784259372769884, "loss": 1.1693, "step": 13655 }, { "epoch": 0.5258902791145332, "grad_norm": 1.6535909175872803, "learning_rate": 0.0001678203745311644, "loss": 1.1606, "step": 13660 }, { "epoch": 0.5260827718960539, "grad_norm": 1.6406097412109375, "learning_rate": 0.000167798149132906, "loss": 1.3232, "step": 13665 }, { "epoch": 0.5262752646775746, "grad_norm": 1.6994904279708862, "learning_rate": 0.000167775917534956, "loss": 1.0998, "step": 13670 }, { "epoch": 0.5264677574590952, "grad_norm": 1.6446374654769897, "learning_rate": 0.0001677536797393473, "loss": 1.2997, "step": 13675 }, { "epoch": 0.526660250240616, "grad_norm": 1.6050851345062256, "learning_rate": 0.0001677314357481134, "loss": 1.19, "step": 13680 }, { "epoch": 0.5268527430221367, "grad_norm": 1.473940134048462, "learning_rate": 0.00016770918556328844, "loss": 1.2007, "step": 13685 }, { "epoch": 0.5270452358036574, "grad_norm": 1.1209567785263062, "learning_rate": 0.00016768692918690695, "loss": 1.2956, "step": 13690 }, { "epoch": 0.527237728585178, "grad_norm": 1.4143558740615845, "learning_rate": 0.00016766466662100415, "loss": 1.1734, "step": 13695 }, { "epoch": 0.5274302213666987, "grad_norm": 1.138107180595398, "learning_rate": 0.00016764239786761585, "loss": 1.1318, "step": 13700 }, { "epoch": 0.5276227141482195, "grad_norm": 1.5194774866104126, "learning_rate": 0.00016762012292877835, "loss": 1.1525, "step": 13705 }, { "epoch": 0.5278152069297402, "grad_norm": 1.136946439743042, "learning_rate": 0.00016759784180652858, "loss": 1.1289, "step": 13710 }, { "epoch": 0.5280076997112608, "grad_norm": 1.5263949632644653, "learning_rate": 0.00016757555450290396, "loss": 1.2811, "step": 13715 }, { "epoch": 0.5282001924927815, "grad_norm": 2.306833505630493, "learning_rate": 0.00016755326101994248, "loss": 1.0326, "step": 13720 }, { "epoch": 0.5283926852743022, "grad_norm": 1.4330452680587769, "learning_rate": 0.0001675309613596828, "loss": 1.0126, "step": 13725 }, { "epoch": 0.528585178055823, "grad_norm": 0.8746087551116943, "learning_rate": 0.00016750865552416408, "loss": 1.224, "step": 13730 }, { "epoch": 0.5287776708373436, "grad_norm": 2.576612949371338, "learning_rate": 0.000167486343515426, "loss": 1.178, "step": 13735 }, { "epoch": 0.5289701636188643, "grad_norm": 1.3074976205825806, "learning_rate": 0.00016746402533550887, "loss": 1.2453, "step": 13740 }, { "epoch": 0.529162656400385, "grad_norm": 1.0941317081451416, "learning_rate": 0.00016744170098645353, "loss": 0.9341, "step": 13745 }, { "epoch": 0.5293551491819056, "grad_norm": 1.6738418340682983, "learning_rate": 0.00016741937047030139, "loss": 1.1423, "step": 13750 }, { "epoch": 0.5295476419634264, "grad_norm": 1.9735844135284424, "learning_rate": 0.00016739703378909444, "loss": 1.0691, "step": 13755 }, { "epoch": 0.5297401347449471, "grad_norm": 1.0063233375549316, "learning_rate": 0.00016737469094487518, "loss": 1.0096, "step": 13760 }, { "epoch": 0.5299326275264677, "grad_norm": 1.2500115633010864, "learning_rate": 0.00016735234193968678, "loss": 1.1627, "step": 13765 }, { "epoch": 0.5301251203079884, "grad_norm": 1.0908536911010742, "learning_rate": 0.00016732998677557287, "loss": 1.0477, "step": 13770 }, { "epoch": 0.5303176130895092, "grad_norm": 1.609208106994629, "learning_rate": 0.0001673076254545777, "loss": 0.964, "step": 13775 }, { "epoch": 0.5305101058710299, "grad_norm": 1.0210634469985962, "learning_rate": 0.00016728525797874607, "loss": 1.2982, "step": 13780 }, { "epoch": 0.5307025986525505, "grad_norm": 2.0595545768737793, "learning_rate": 0.0001672628843501233, "loss": 1.1969, "step": 13785 }, { "epoch": 0.5308950914340712, "grad_norm": 1.7514983415603638, "learning_rate": 0.00016724050457075533, "loss": 1.2918, "step": 13790 }, { "epoch": 0.5310875842155919, "grad_norm": 1.483798861503601, "learning_rate": 0.00016721811864268865, "loss": 1.0163, "step": 13795 }, { "epoch": 0.5312800769971127, "grad_norm": 1.4174484014511108, "learning_rate": 0.0001671957265679703, "loss": 1.1936, "step": 13800 }, { "epoch": 0.5314725697786333, "grad_norm": 1.4664232730865479, "learning_rate": 0.00016717332834864787, "loss": 1.2553, "step": 13805 }, { "epoch": 0.531665062560154, "grad_norm": 0.6863868832588196, "learning_rate": 0.00016715092398676958, "loss": 0.8998, "step": 13810 }, { "epoch": 0.5318575553416747, "grad_norm": 2.3511574268341064, "learning_rate": 0.00016712851348438408, "loss": 1.4484, "step": 13815 }, { "epoch": 0.5320500481231953, "grad_norm": 1.418361783027649, "learning_rate": 0.00016710609684354074, "loss": 1.1139, "step": 13820 }, { "epoch": 0.5322425409047161, "grad_norm": 1.5918070077896118, "learning_rate": 0.00016708367406628938, "loss": 1.1045, "step": 13825 }, { "epoch": 0.5324350336862368, "grad_norm": 1.1937044858932495, "learning_rate": 0.00016706124515468042, "loss": 1.2665, "step": 13830 }, { "epoch": 0.5326275264677575, "grad_norm": 1.60366952419281, "learning_rate": 0.00016703881011076482, "loss": 1.3277, "step": 13835 }, { "epoch": 0.5328200192492781, "grad_norm": 1.2769535779953003, "learning_rate": 0.00016701636893659414, "loss": 1.2517, "step": 13840 }, { "epoch": 0.5330125120307988, "grad_norm": 1.3906430006027222, "learning_rate": 0.00016699392163422043, "loss": 1.3485, "step": 13845 }, { "epoch": 0.5332050048123196, "grad_norm": 1.461391568183899, "learning_rate": 0.0001669714682056964, "loss": 1.1297, "step": 13850 }, { "epoch": 0.5333974975938403, "grad_norm": 1.3566093444824219, "learning_rate": 0.00016694900865307525, "loss": 1.2833, "step": 13855 }, { "epoch": 0.5335899903753609, "grad_norm": 1.4480105638504028, "learning_rate": 0.00016692654297841076, "loss": 1.0877, "step": 13860 }, { "epoch": 0.5337824831568816, "grad_norm": 1.0896391868591309, "learning_rate": 0.00016690407118375724, "loss": 1.1286, "step": 13865 }, { "epoch": 0.5339749759384023, "grad_norm": 1.101636290550232, "learning_rate": 0.00016688159327116962, "loss": 1.0802, "step": 13870 }, { "epoch": 0.534167468719923, "grad_norm": 1.1488208770751953, "learning_rate": 0.00016685910924270337, "loss": 1.144, "step": 13875 }, { "epoch": 0.5343599615014437, "grad_norm": 1.4691115617752075, "learning_rate": 0.00016683661910041445, "loss": 1.2133, "step": 13880 }, { "epoch": 0.5345524542829644, "grad_norm": 0.9920752048492432, "learning_rate": 0.0001668141228463595, "loss": 1.1326, "step": 13885 }, { "epoch": 0.534744947064485, "grad_norm": 1.2828654050827026, "learning_rate": 0.00016679162048259557, "loss": 1.2162, "step": 13890 }, { "epoch": 0.5349374398460057, "grad_norm": 1.3294516801834106, "learning_rate": 0.00016676911201118043, "loss": 1.1797, "step": 13895 }, { "epoch": 0.5351299326275265, "grad_norm": 1.5326685905456543, "learning_rate": 0.00016674659743417232, "loss": 1.1147, "step": 13900 }, { "epoch": 0.5353224254090472, "grad_norm": 1.9222960472106934, "learning_rate": 0.00016672407675363, "loss": 1.1615, "step": 13905 }, { "epoch": 0.5355149181905678, "grad_norm": 1.412458062171936, "learning_rate": 0.00016670154997161288, "loss": 1.1556, "step": 13910 }, { "epoch": 0.5357074109720885, "grad_norm": 1.230669617652893, "learning_rate": 0.00016667901709018087, "loss": 1.062, "step": 13915 }, { "epoch": 0.5358999037536092, "grad_norm": 1.431746006011963, "learning_rate": 0.00016665647811139444, "loss": 1.0561, "step": 13920 }, { "epoch": 0.53609239653513, "grad_norm": 1.6623647212982178, "learning_rate": 0.00016663393303731466, "loss": 1.1495, "step": 13925 }, { "epoch": 0.5362848893166506, "grad_norm": 1.5261880159378052, "learning_rate": 0.00016661138187000312, "loss": 1.3093, "step": 13930 }, { "epoch": 0.5364773820981713, "grad_norm": 1.5623407363891602, "learning_rate": 0.00016658882461152195, "loss": 1.0859, "step": 13935 }, { "epoch": 0.536669874879692, "grad_norm": 1.2155213356018066, "learning_rate": 0.0001665662612639339, "loss": 1.2502, "step": 13940 }, { "epoch": 0.5368623676612128, "grad_norm": 0.7948794364929199, "learning_rate": 0.0001665436918293022, "loss": 1.1741, "step": 13945 }, { "epoch": 0.5370548604427334, "grad_norm": 1.370322585105896, "learning_rate": 0.0001665211163096907, "loss": 1.2727, "step": 13950 }, { "epoch": 0.5372473532242541, "grad_norm": 1.146519660949707, "learning_rate": 0.00016649853470716378, "loss": 1.2603, "step": 13955 }, { "epoch": 0.5374398460057748, "grad_norm": 1.1492048501968384, "learning_rate": 0.00016647594702378637, "loss": 1.1772, "step": 13960 }, { "epoch": 0.5376323387872954, "grad_norm": 2.4730112552642822, "learning_rate": 0.00016645335326162397, "loss": 1.4024, "step": 13965 }, { "epoch": 0.5378248315688162, "grad_norm": 1.411889910697937, "learning_rate": 0.00016643075342274264, "loss": 1.1121, "step": 13970 }, { "epoch": 0.5380173243503369, "grad_norm": 1.0818617343902588, "learning_rate": 0.00016640814750920895, "loss": 1.2139, "step": 13975 }, { "epoch": 0.5382098171318576, "grad_norm": 1.1196002960205078, "learning_rate": 0.0001663855355230901, "loss": 1.0877, "step": 13980 }, { "epoch": 0.5384023099133782, "grad_norm": 1.5476993322372437, "learning_rate": 0.00016636291746645378, "loss": 1.1055, "step": 13985 }, { "epoch": 0.5385948026948989, "grad_norm": 0.924186646938324, "learning_rate": 0.00016634029334136827, "loss": 1.0307, "step": 13990 }, { "epoch": 0.5387872954764197, "grad_norm": 1.157355546951294, "learning_rate": 0.0001663176631499024, "loss": 1.0783, "step": 13995 }, { "epoch": 0.5389797882579404, "grad_norm": 1.1704423427581787, "learning_rate": 0.00016629502689412555, "loss": 1.3452, "step": 14000 }, { "epoch": 0.539172281039461, "grad_norm": 2.0251457691192627, "learning_rate": 0.00016627238457610766, "loss": 1.3611, "step": 14005 }, { "epoch": 0.5393647738209817, "grad_norm": 1.018612265586853, "learning_rate": 0.0001662497361979192, "loss": 1.115, "step": 14010 }, { "epoch": 0.5395572666025024, "grad_norm": 1.2389349937438965, "learning_rate": 0.00016622708176163126, "loss": 1.2055, "step": 14015 }, { "epoch": 0.5397497593840231, "grad_norm": 2.2555086612701416, "learning_rate": 0.0001662044212693154, "loss": 1.0512, "step": 14020 }, { "epoch": 0.5399422521655438, "grad_norm": 1.059856653213501, "learning_rate": 0.00016618175472304375, "loss": 1.2114, "step": 14025 }, { "epoch": 0.5401347449470645, "grad_norm": 1.484417200088501, "learning_rate": 0.00016615908212488906, "loss": 1.1872, "step": 14030 }, { "epoch": 0.5403272377285852, "grad_norm": 1.4816780090332031, "learning_rate": 0.00016613640347692458, "loss": 1.1261, "step": 14035 }, { "epoch": 0.5405197305101058, "grad_norm": 1.6735597848892212, "learning_rate": 0.00016611371878122412, "loss": 1.2311, "step": 14040 }, { "epoch": 0.5407122232916266, "grad_norm": 1.8882919549942017, "learning_rate": 0.00016609102803986204, "loss": 1.3099, "step": 14045 }, { "epoch": 0.5409047160731473, "grad_norm": 1.4272384643554688, "learning_rate": 0.00016606833125491327, "loss": 1.2343, "step": 14050 }, { "epoch": 0.5410972088546679, "grad_norm": 1.2361105680465698, "learning_rate": 0.0001660456284284532, "loss": 1.155, "step": 14055 }, { "epoch": 0.5412897016361886, "grad_norm": 1.294826626777649, "learning_rate": 0.000166022919562558, "loss": 1.0691, "step": 14060 }, { "epoch": 0.5414821944177093, "grad_norm": 2.163748264312744, "learning_rate": 0.00016600020465930415, "loss": 1.4603, "step": 14065 }, { "epoch": 0.5416746871992301, "grad_norm": 2.8181777000427246, "learning_rate": 0.00016597748372076878, "loss": 1.1513, "step": 14070 }, { "epoch": 0.5418671799807507, "grad_norm": 1.558497667312622, "learning_rate": 0.00016595475674902957, "loss": 1.1758, "step": 14075 }, { "epoch": 0.5420596727622714, "grad_norm": 1.5868738889694214, "learning_rate": 0.0001659320237461648, "loss": 1.1867, "step": 14080 }, { "epoch": 0.5422521655437921, "grad_norm": 0.850387692451477, "learning_rate": 0.0001659092847142532, "loss": 0.8849, "step": 14085 }, { "epoch": 0.5424446583253129, "grad_norm": 1.334726095199585, "learning_rate": 0.00016588653965537412, "loss": 1.252, "step": 14090 }, { "epoch": 0.5426371511068335, "grad_norm": 1.1548973321914673, "learning_rate": 0.00016586378857160743, "loss": 1.2255, "step": 14095 }, { "epoch": 0.5428296438883542, "grad_norm": 1.3282769918441772, "learning_rate": 0.00016584103146503364, "loss": 1.0991, "step": 14100 }, { "epoch": 0.5430221366698749, "grad_norm": 1.635657548904419, "learning_rate": 0.00016581826833773363, "loss": 1.1963, "step": 14105 }, { "epoch": 0.5432146294513955, "grad_norm": 1.7892380952835083, "learning_rate": 0.00016579549919178903, "loss": 1.0593, "step": 14110 }, { "epoch": 0.5434071222329163, "grad_norm": 2.381394147872925, "learning_rate": 0.00016577272402928183, "loss": 1.2743, "step": 14115 }, { "epoch": 0.543599615014437, "grad_norm": 1.1770328283309937, "learning_rate": 0.00016574994285229478, "loss": 1.1433, "step": 14120 }, { "epoch": 0.5437921077959577, "grad_norm": 1.9077178239822388, "learning_rate": 0.00016572715566291098, "loss": 1.2422, "step": 14125 }, { "epoch": 0.5439846005774783, "grad_norm": 1.2600334882736206, "learning_rate": 0.00016570436246321417, "loss": 1.1479, "step": 14130 }, { "epoch": 0.544177093358999, "grad_norm": 1.0997780561447144, "learning_rate": 0.0001656815632552887, "loss": 1.2516, "step": 14135 }, { "epoch": 0.5443695861405198, "grad_norm": 1.1767383813858032, "learning_rate": 0.00016565875804121935, "loss": 1.1713, "step": 14140 }, { "epoch": 0.5445620789220404, "grad_norm": 1.62860906124115, "learning_rate": 0.00016563594682309152, "loss": 1.2017, "step": 14145 }, { "epoch": 0.5447545717035611, "grad_norm": 1.6149252653121948, "learning_rate": 0.0001656131296029912, "loss": 1.2104, "step": 14150 }, { "epoch": 0.5449470644850818, "grad_norm": 1.0693351030349731, "learning_rate": 0.0001655903063830048, "loss": 1.2656, "step": 14155 }, { "epoch": 0.5451395572666025, "grad_norm": 1.4624438285827637, "learning_rate": 0.00016556747716521937, "loss": 1.2323, "step": 14160 }, { "epoch": 0.5453320500481232, "grad_norm": 1.8848096132278442, "learning_rate": 0.0001655446419517225, "loss": 1.2104, "step": 14165 }, { "epoch": 0.5455245428296439, "grad_norm": 1.076907753944397, "learning_rate": 0.00016552180074460231, "loss": 1.2503, "step": 14170 }, { "epoch": 0.5457170356111646, "grad_norm": 2.496718645095825, "learning_rate": 0.00016549895354594748, "loss": 1.146, "step": 14175 }, { "epoch": 0.5459095283926853, "grad_norm": 1.8133556842803955, "learning_rate": 0.00016547610035784724, "loss": 1.274, "step": 14180 }, { "epoch": 0.5461020211742059, "grad_norm": 1.1353720426559448, "learning_rate": 0.0001654532411823914, "loss": 1.3842, "step": 14185 }, { "epoch": 0.5462945139557267, "grad_norm": 2.368894577026367, "learning_rate": 0.00016543037602167017, "loss": 1.3566, "step": 14190 }, { "epoch": 0.5464870067372474, "grad_norm": 1.884104609489441, "learning_rate": 0.00016540750487777455, "loss": 1.15, "step": 14195 }, { "epoch": 0.546679499518768, "grad_norm": 1.1348326206207275, "learning_rate": 0.00016538462775279587, "loss": 1.1782, "step": 14200 }, { "epoch": 0.5468719923002887, "grad_norm": 1.2342017889022827, "learning_rate": 0.00016536174464882613, "loss": 1.1361, "step": 14205 }, { "epoch": 0.5470644850818094, "grad_norm": 1.0037345886230469, "learning_rate": 0.0001653388555679578, "loss": 1.1282, "step": 14210 }, { "epoch": 0.5472569778633302, "grad_norm": 2.8669965267181396, "learning_rate": 0.000165315960512284, "loss": 1.184, "step": 14215 }, { "epoch": 0.5474494706448508, "grad_norm": 1.0212280750274658, "learning_rate": 0.00016529305948389825, "loss": 1.1422, "step": 14220 }, { "epoch": 0.5476419634263715, "grad_norm": 1.1197772026062012, "learning_rate": 0.00016527015248489474, "loss": 1.077, "step": 14225 }, { "epoch": 0.5478344562078922, "grad_norm": 1.4821882247924805, "learning_rate": 0.0001652472395173682, "loss": 1.3187, "step": 14230 }, { "epoch": 0.5480269489894128, "grad_norm": 1.1993844509124756, "learning_rate": 0.00016522432058341377, "loss": 1.1834, "step": 14235 }, { "epoch": 0.5482194417709336, "grad_norm": 1.9386481046676636, "learning_rate": 0.00016520139568512734, "loss": 1.1461, "step": 14240 }, { "epoch": 0.5484119345524543, "grad_norm": 0.8914703130722046, "learning_rate": 0.00016517846482460517, "loss": 1.4175, "step": 14245 }, { "epoch": 0.548604427333975, "grad_norm": 1.8703666925430298, "learning_rate": 0.00016515552800394417, "loss": 1.2483, "step": 14250 }, { "epoch": 0.5487969201154956, "grad_norm": 1.1656851768493652, "learning_rate": 0.00016513258522524177, "loss": 1.2293, "step": 14255 }, { "epoch": 0.5489894128970164, "grad_norm": 1.402370810508728, "learning_rate": 0.0001651096364905959, "loss": 1.3326, "step": 14260 }, { "epoch": 0.5491819056785371, "grad_norm": 1.8804208040237427, "learning_rate": 0.00016508668180210506, "loss": 1.1033, "step": 14265 }, { "epoch": 0.5493743984600578, "grad_norm": 1.0970590114593506, "learning_rate": 0.00016506372116186836, "loss": 1.1887, "step": 14270 }, { "epoch": 0.5495668912415784, "grad_norm": 1.3364982604980469, "learning_rate": 0.00016504075457198533, "loss": 1.1183, "step": 14275 }, { "epoch": 0.5497593840230991, "grad_norm": 1.4718800783157349, "learning_rate": 0.0001650177820345562, "loss": 1.1305, "step": 14280 }, { "epoch": 0.5499518768046199, "grad_norm": 1.3023836612701416, "learning_rate": 0.00016499480355168156, "loss": 1.2267, "step": 14285 }, { "epoch": 0.5501443695861405, "grad_norm": 1.809346079826355, "learning_rate": 0.0001649718191254627, "loss": 1.081, "step": 14290 }, { "epoch": 0.5503368623676612, "grad_norm": 1.3828262090682983, "learning_rate": 0.0001649488287580014, "loss": 1.209, "step": 14295 }, { "epoch": 0.5505293551491819, "grad_norm": 1.4741365909576416, "learning_rate": 0.00016492583245139995, "loss": 1.0607, "step": 14300 }, { "epoch": 0.5507218479307026, "grad_norm": 1.057210922241211, "learning_rate": 0.0001649028302077612, "loss": 1.281, "step": 14305 }, { "epoch": 0.5509143407122233, "grad_norm": 2.588911294937134, "learning_rate": 0.00016487982202918858, "loss": 1.382, "step": 14310 }, { "epoch": 0.551106833493744, "grad_norm": 2.2811248302459717, "learning_rate": 0.00016485680791778604, "loss": 1.3173, "step": 14315 }, { "epoch": 0.5512993262752647, "grad_norm": 1.675776481628418, "learning_rate": 0.00016483378787565802, "loss": 1.1948, "step": 14320 }, { "epoch": 0.5514918190567853, "grad_norm": 1.1149309873580933, "learning_rate": 0.0001648107619049096, "loss": 1.1406, "step": 14325 }, { "epoch": 0.551684311838306, "grad_norm": 1.0165066719055176, "learning_rate": 0.00016478773000764635, "loss": 1.1491, "step": 14330 }, { "epoch": 0.5518768046198268, "grad_norm": 1.8692020177841187, "learning_rate": 0.00016476469218597433, "loss": 1.0848, "step": 14335 }, { "epoch": 0.5520692974013475, "grad_norm": 0.9627811908721924, "learning_rate": 0.0001647416484420003, "loss": 1.2159, "step": 14340 }, { "epoch": 0.5522617901828681, "grad_norm": 2.1085097789764404, "learning_rate": 0.00016471859877783133, "loss": 1.1551, "step": 14345 }, { "epoch": 0.5524542829643888, "grad_norm": 2.2478790283203125, "learning_rate": 0.00016469554319557527, "loss": 1.3081, "step": 14350 }, { "epoch": 0.5526467757459095, "grad_norm": 1.0580302476882935, "learning_rate": 0.00016467248169734037, "loss": 1.3293, "step": 14355 }, { "epoch": 0.5528392685274303, "grad_norm": 1.3953101634979248, "learning_rate": 0.00016464941428523538, "loss": 1.1256, "step": 14360 }, { "epoch": 0.5530317613089509, "grad_norm": 0.9302542209625244, "learning_rate": 0.0001646263409613697, "loss": 0.9645, "step": 14365 }, { "epoch": 0.5532242540904716, "grad_norm": 1.9415937662124634, "learning_rate": 0.00016460326172785332, "loss": 1.3428, "step": 14370 }, { "epoch": 0.5534167468719923, "grad_norm": 0.9449756145477295, "learning_rate": 0.00016458017658679656, "loss": 1.3183, "step": 14375 }, { "epoch": 0.5536092396535129, "grad_norm": 1.2944326400756836, "learning_rate": 0.00016455708554031047, "loss": 1.1277, "step": 14380 }, { "epoch": 0.5538017324350337, "grad_norm": 1.3632171154022217, "learning_rate": 0.00016453398859050657, "loss": 1.3262, "step": 14385 }, { "epoch": 0.5539942252165544, "grad_norm": 1.119086503982544, "learning_rate": 0.00016451088573949692, "loss": 1.2639, "step": 14390 }, { "epoch": 0.5541867179980751, "grad_norm": 1.3261640071868896, "learning_rate": 0.00016448777698939407, "loss": 1.0911, "step": 14395 }, { "epoch": 0.5543792107795957, "grad_norm": 1.6098653078079224, "learning_rate": 0.00016446466234231125, "loss": 1.2942, "step": 14400 }, { "epoch": 0.5545717035611165, "grad_norm": 2.1425249576568604, "learning_rate": 0.0001644415418003621, "loss": 1.1566, "step": 14405 }, { "epoch": 0.5547641963426372, "grad_norm": 1.0087484121322632, "learning_rate": 0.0001644184153656608, "loss": 1.1515, "step": 14410 }, { "epoch": 0.5549566891241579, "grad_norm": 1.3792825937271118, "learning_rate": 0.00016439528304032218, "loss": 1.3815, "step": 14415 }, { "epoch": 0.5551491819056785, "grad_norm": 1.0076264142990112, "learning_rate": 0.0001643721448264615, "loss": 1.1996, "step": 14420 }, { "epoch": 0.5553416746871992, "grad_norm": 1.6108455657958984, "learning_rate": 0.0001643490007261946, "loss": 1.2801, "step": 14425 }, { "epoch": 0.55553416746872, "grad_norm": 1.4850428104400635, "learning_rate": 0.00016432585074163783, "loss": 1.1272, "step": 14430 }, { "epoch": 0.5557266602502406, "grad_norm": 1.482926607131958, "learning_rate": 0.0001643026948749082, "loss": 0.9271, "step": 14435 }, { "epoch": 0.5559191530317613, "grad_norm": 1.404266119003296, "learning_rate": 0.000164279533128123, "loss": 1.2476, "step": 14440 }, { "epoch": 0.556111645813282, "grad_norm": 1.5951578617095947, "learning_rate": 0.00016425636550340035, "loss": 1.036, "step": 14445 }, { "epoch": 0.5563041385948027, "grad_norm": 1.11802339553833, "learning_rate": 0.00016423319200285877, "loss": 1.2595, "step": 14450 }, { "epoch": 0.5564966313763234, "grad_norm": 0.9702684283256531, "learning_rate": 0.00016421001262861723, "loss": 1.1478, "step": 14455 }, { "epoch": 0.5566891241578441, "grad_norm": 1.4077606201171875, "learning_rate": 0.00016418682738279542, "loss": 1.2807, "step": 14460 }, { "epoch": 0.5568816169393648, "grad_norm": 1.5000783205032349, "learning_rate": 0.00016416363626751344, "loss": 1.3231, "step": 14465 }, { "epoch": 0.5570741097208854, "grad_norm": 1.0804152488708496, "learning_rate": 0.00016414043928489195, "loss": 1.2609, "step": 14470 }, { "epoch": 0.5572666025024061, "grad_norm": 2.0902814865112305, "learning_rate": 0.0001641172364370522, "loss": 1.1007, "step": 14475 }, { "epoch": 0.5574590952839269, "grad_norm": 0.9129114151000977, "learning_rate": 0.0001640940277261159, "loss": 1.2385, "step": 14480 }, { "epoch": 0.5576515880654476, "grad_norm": 1.5251227617263794, "learning_rate": 0.0001640708131542054, "loss": 1.3068, "step": 14485 }, { "epoch": 0.5578440808469682, "grad_norm": 1.7822771072387695, "learning_rate": 0.00016404759272344342, "loss": 1.2942, "step": 14490 }, { "epoch": 0.5580365736284889, "grad_norm": 1.7675615549087524, "learning_rate": 0.00016402436643595336, "loss": 0.9753, "step": 14495 }, { "epoch": 0.5582290664100096, "grad_norm": 1.4113742113113403, "learning_rate": 0.0001640011342938591, "loss": 1.2727, "step": 14500 }, { "epoch": 0.5584215591915304, "grad_norm": 2.213724136352539, "learning_rate": 0.0001639778962992851, "loss": 1.1275, "step": 14505 }, { "epoch": 0.558614051973051, "grad_norm": 0.777229368686676, "learning_rate": 0.0001639546524543563, "loss": 1.0434, "step": 14510 }, { "epoch": 0.5588065447545717, "grad_norm": 1.0420740842819214, "learning_rate": 0.00016393140276119817, "loss": 1.1202, "step": 14515 }, { "epoch": 0.5589990375360924, "grad_norm": 1.4241138696670532, "learning_rate": 0.00016390814722193678, "loss": 1.0245, "step": 14520 }, { "epoch": 0.559191530317613, "grad_norm": 1.1826037168502808, "learning_rate": 0.00016388488583869872, "loss": 1.1894, "step": 14525 }, { "epoch": 0.5593840230991338, "grad_norm": 1.136072039604187, "learning_rate": 0.000163861618613611, "loss": 1.2093, "step": 14530 }, { "epoch": 0.5595765158806545, "grad_norm": 1.0932581424713135, "learning_rate": 0.0001638383455488013, "loss": 1.26, "step": 14535 }, { "epoch": 0.5597690086621752, "grad_norm": 1.4892606735229492, "learning_rate": 0.00016381506664639784, "loss": 1.0244, "step": 14540 }, { "epoch": 0.5599615014436958, "grad_norm": 1.4259272813796997, "learning_rate": 0.0001637917819085292, "loss": 0.9896, "step": 14545 }, { "epoch": 0.5601539942252165, "grad_norm": 1.0615971088409424, "learning_rate": 0.00016376849133732473, "loss": 1.1619, "step": 14550 }, { "epoch": 0.5603464870067373, "grad_norm": 0.8815811276435852, "learning_rate": 0.00016374519493491413, "loss": 1.1123, "step": 14555 }, { "epoch": 0.560538979788258, "grad_norm": 1.2956461906433105, "learning_rate": 0.00016372189270342778, "loss": 1.1978, "step": 14560 }, { "epoch": 0.5607314725697786, "grad_norm": 1.8797427415847778, "learning_rate": 0.00016369858464499641, "loss": 1.4186, "step": 14565 }, { "epoch": 0.5609239653512993, "grad_norm": 1.6631108522415161, "learning_rate": 0.00016367527076175143, "loss": 0.9839, "step": 14570 }, { "epoch": 0.5611164581328201, "grad_norm": 1.8200160264968872, "learning_rate": 0.0001636519510558248, "loss": 1.1272, "step": 14575 }, { "epoch": 0.5613089509143407, "grad_norm": 1.884712815284729, "learning_rate": 0.00016362862552934886, "loss": 1.15, "step": 14580 }, { "epoch": 0.5615014436958614, "grad_norm": 0.7094476222991943, "learning_rate": 0.00016360529418445662, "loss": 0.9581, "step": 14585 }, { "epoch": 0.5616939364773821, "grad_norm": 0.9652591347694397, "learning_rate": 0.00016358195702328158, "loss": 1.0858, "step": 14590 }, { "epoch": 0.5618864292589028, "grad_norm": 1.3010308742523193, "learning_rate": 0.00016355861404795778, "loss": 1.2491, "step": 14595 }, { "epoch": 0.5620789220404235, "grad_norm": 1.459953784942627, "learning_rate": 0.00016353526526061973, "loss": 1.1194, "step": 14600 }, { "epoch": 0.5622714148219442, "grad_norm": 1.0818215608596802, "learning_rate": 0.0001635119106634026, "loss": 1.202, "step": 14605 }, { "epoch": 0.5624639076034649, "grad_norm": 1.0625619888305664, "learning_rate": 0.0001634885502584419, "loss": 1.3284, "step": 14610 }, { "epoch": 0.5626564003849855, "grad_norm": 1.5708478689193726, "learning_rate": 0.0001634651840478739, "loss": 1.036, "step": 14615 }, { "epoch": 0.5628488931665062, "grad_norm": 1.2847293615341187, "learning_rate": 0.00016344181203383523, "loss": 1.0858, "step": 14620 }, { "epoch": 0.563041385948027, "grad_norm": 1.082689881324768, "learning_rate": 0.00016341843421846313, "loss": 1.3457, "step": 14625 }, { "epoch": 0.5632338787295477, "grad_norm": 1.9000965356826782, "learning_rate": 0.0001633950506038953, "loss": 1.3901, "step": 14630 }, { "epoch": 0.5634263715110683, "grad_norm": 1.4664018154144287, "learning_rate": 0.0001633716611922701, "loss": 1.0836, "step": 14635 }, { "epoch": 0.563618864292589, "grad_norm": 1.6126337051391602, "learning_rate": 0.0001633482659857262, "loss": 1.0794, "step": 14640 }, { "epoch": 0.5638113570741097, "grad_norm": 1.865504503250122, "learning_rate": 0.00016332486498640307, "loss": 0.9427, "step": 14645 }, { "epoch": 0.5640038498556305, "grad_norm": 1.4346791505813599, "learning_rate": 0.0001633014581964405, "loss": 1.1952, "step": 14650 }, { "epoch": 0.5641963426371511, "grad_norm": 1.3558484315872192, "learning_rate": 0.00016327804561797895, "loss": 1.1679, "step": 14655 }, { "epoch": 0.5643888354186718, "grad_norm": 1.3297834396362305, "learning_rate": 0.00016325462725315926, "loss": 1.2225, "step": 14660 }, { "epoch": 0.5645813282001925, "grad_norm": 2.106694221496582, "learning_rate": 0.00016323120310412297, "loss": 1.072, "step": 14665 }, { "epoch": 0.5647738209817131, "grad_norm": 1.284629225730896, "learning_rate": 0.00016320777317301198, "loss": 1.0004, "step": 14670 }, { "epoch": 0.5649663137632339, "grad_norm": 1.4289201498031616, "learning_rate": 0.0001631843374619689, "loss": 1.1239, "step": 14675 }, { "epoch": 0.5651588065447546, "grad_norm": 1.9027820825576782, "learning_rate": 0.0001631608959731367, "loss": 1.2137, "step": 14680 }, { "epoch": 0.5653512993262753, "grad_norm": 1.878009557723999, "learning_rate": 0.00016313744870865895, "loss": 1.247, "step": 14685 }, { "epoch": 0.5655437921077959, "grad_norm": 1.4919451475143433, "learning_rate": 0.00016311399567067974, "loss": 1.3506, "step": 14690 }, { "epoch": 0.5657362848893166, "grad_norm": 2.0583205223083496, "learning_rate": 0.00016309053686134378, "loss": 1.1191, "step": 14695 }, { "epoch": 0.5659287776708374, "grad_norm": 1.1545616388320923, "learning_rate": 0.00016306707228279615, "loss": 1.2105, "step": 14700 }, { "epoch": 0.566121270452358, "grad_norm": 0.8714199662208557, "learning_rate": 0.0001630436019371825, "loss": 1.0834, "step": 14705 }, { "epoch": 0.5663137632338787, "grad_norm": 2.1866228580474854, "learning_rate": 0.0001630201258266491, "loss": 1.3334, "step": 14710 }, { "epoch": 0.5665062560153994, "grad_norm": 1.4117622375488281, "learning_rate": 0.00016299664395334266, "loss": 1.1353, "step": 14715 }, { "epoch": 0.5666987487969202, "grad_norm": 1.5454515218734741, "learning_rate": 0.00016297315631941045, "loss": 1.096, "step": 14720 }, { "epoch": 0.5668912415784408, "grad_norm": 1.1799986362457275, "learning_rate": 0.00016294966292700026, "loss": 1.214, "step": 14725 }, { "epoch": 0.5670837343599615, "grad_norm": 1.2906007766723633, "learning_rate": 0.00016292616377826038, "loss": 1.2613, "step": 14730 }, { "epoch": 0.5672762271414822, "grad_norm": 2.8731329441070557, "learning_rate": 0.00016290265887533968, "loss": 1.3257, "step": 14735 }, { "epoch": 0.5674687199230029, "grad_norm": 1.0078117847442627, "learning_rate": 0.0001628791482203875, "loss": 1.2053, "step": 14740 }, { "epoch": 0.5676612127045236, "grad_norm": 1.05767023563385, "learning_rate": 0.0001628556318155538, "loss": 0.9775, "step": 14745 }, { "epoch": 0.5678537054860443, "grad_norm": 2.118110418319702, "learning_rate": 0.0001628321096629889, "loss": 1.1801, "step": 14750 }, { "epoch": 0.568046198267565, "grad_norm": 1.1577699184417725, "learning_rate": 0.00016280858176484384, "loss": 1.2156, "step": 14755 }, { "epoch": 0.5682386910490856, "grad_norm": 1.5565030574798584, "learning_rate": 0.00016278504812327002, "loss": 1.0586, "step": 14760 }, { "epoch": 0.5684311838306063, "grad_norm": 1.5205986499786377, "learning_rate": 0.00016276150874041946, "loss": 1.3679, "step": 14765 }, { "epoch": 0.5686236766121271, "grad_norm": 0.9402291774749756, "learning_rate": 0.00016273796361844468, "loss": 1.0996, "step": 14770 }, { "epoch": 0.5688161693936478, "grad_norm": 1.3806294202804565, "learning_rate": 0.00016271441275949875, "loss": 1.1815, "step": 14775 }, { "epoch": 0.5690086621751684, "grad_norm": 2.0714609622955322, "learning_rate": 0.0001626908561657352, "loss": 1.1945, "step": 14780 }, { "epoch": 0.5692011549566891, "grad_norm": 0.9732249975204468, "learning_rate": 0.00016266729383930816, "loss": 1.0233, "step": 14785 }, { "epoch": 0.5693936477382098, "grad_norm": 1.3748955726623535, "learning_rate": 0.0001626437257823722, "loss": 1.25, "step": 14790 }, { "epoch": 0.5695861405197306, "grad_norm": 1.9781707525253296, "learning_rate": 0.00016262015199708252, "loss": 1.1745, "step": 14795 }, { "epoch": 0.5697786333012512, "grad_norm": 1.5062282085418701, "learning_rate": 0.00016259657248559475, "loss": 1.098, "step": 14800 }, { "epoch": 0.5699711260827719, "grad_norm": 1.7073885202407837, "learning_rate": 0.0001625729872500651, "loss": 1.1191, "step": 14805 }, { "epoch": 0.5701636188642926, "grad_norm": 2.0891575813293457, "learning_rate": 0.00016254939629265026, "loss": 1.2533, "step": 14810 }, { "epoch": 0.5703561116458132, "grad_norm": 1.6380434036254883, "learning_rate": 0.0001625257996155075, "loss": 1.2756, "step": 14815 }, { "epoch": 0.570548604427334, "grad_norm": 1.1182715892791748, "learning_rate": 0.00016250219722079452, "loss": 1.3084, "step": 14820 }, { "epoch": 0.5707410972088547, "grad_norm": 1.2113651037216187, "learning_rate": 0.0001624785891106697, "loss": 1.287, "step": 14825 }, { "epoch": 0.5709335899903754, "grad_norm": 1.1726208925247192, "learning_rate": 0.00016245497528729174, "loss": 1.1491, "step": 14830 }, { "epoch": 0.571126082771896, "grad_norm": 1.0203557014465332, "learning_rate": 0.00016243135575282004, "loss": 1.0809, "step": 14835 }, { "epoch": 0.5713185755534167, "grad_norm": 1.2878923416137695, "learning_rate": 0.00016240773050941443, "loss": 1.1848, "step": 14840 }, { "epoch": 0.5715110683349375, "grad_norm": 1.5805665254592896, "learning_rate": 0.00016238409955923527, "loss": 1.1191, "step": 14845 }, { "epoch": 0.5717035611164581, "grad_norm": 1.089296579360962, "learning_rate": 0.00016236046290444347, "loss": 1.066, "step": 14850 }, { "epoch": 0.5718960538979788, "grad_norm": 1.1492708921432495, "learning_rate": 0.0001623368205472004, "loss": 1.174, "step": 14855 }, { "epoch": 0.5720885466794995, "grad_norm": 1.9744573831558228, "learning_rate": 0.00016231317248966809, "loss": 1.248, "step": 14860 }, { "epoch": 0.5722810394610202, "grad_norm": 2.2061898708343506, "learning_rate": 0.0001622895187340089, "loss": 1.2028, "step": 14865 }, { "epoch": 0.5724735322425409, "grad_norm": 1.0993640422821045, "learning_rate": 0.0001622658592823859, "loss": 1.059, "step": 14870 }, { "epoch": 0.5726660250240616, "grad_norm": 1.5680936574935913, "learning_rate": 0.00016224219413696252, "loss": 1.4181, "step": 14875 }, { "epoch": 0.5728585178055823, "grad_norm": 1.3295773267745972, "learning_rate": 0.00016221852329990276, "loss": 1.214, "step": 14880 }, { "epoch": 0.573051010587103, "grad_norm": 1.2004729509353638, "learning_rate": 0.00016219484677337126, "loss": 1.1474, "step": 14885 }, { "epoch": 0.5732435033686237, "grad_norm": 1.3868520259857178, "learning_rate": 0.000162171164559533, "loss": 1.2373, "step": 14890 }, { "epoch": 0.5734359961501444, "grad_norm": 1.2218377590179443, "learning_rate": 0.00016214747666055358, "loss": 1.1009, "step": 14895 }, { "epoch": 0.5736284889316651, "grad_norm": 1.1113415956497192, "learning_rate": 0.00016212378307859914, "loss": 1.2191, "step": 14900 }, { "epoch": 0.5738209817131857, "grad_norm": 1.099223256111145, "learning_rate": 0.00016210008381583623, "loss": 1.2024, "step": 14905 }, { "epoch": 0.5740134744947064, "grad_norm": 1.3597705364227295, "learning_rate": 0.00016207637887443208, "loss": 1.1785, "step": 14910 }, { "epoch": 0.5742059672762272, "grad_norm": 1.675276279449463, "learning_rate": 0.00016205266825655427, "loss": 1.1492, "step": 14915 }, { "epoch": 0.5743984600577479, "grad_norm": 1.5977553129196167, "learning_rate": 0.000162028951964371, "loss": 1.1355, "step": 14920 }, { "epoch": 0.5745909528392685, "grad_norm": 2.0862395763397217, "learning_rate": 0.000162005230000051, "loss": 1.1747, "step": 14925 }, { "epoch": 0.5747834456207892, "grad_norm": 0.8812354803085327, "learning_rate": 0.00016198150236576347, "loss": 1.2876, "step": 14930 }, { "epoch": 0.5749759384023099, "grad_norm": 1.3878661394119263, "learning_rate": 0.0001619577690636781, "loss": 1.2269, "step": 14935 }, { "epoch": 0.5751684311838307, "grad_norm": 1.0739976167678833, "learning_rate": 0.0001619340300959652, "loss": 1.2467, "step": 14940 }, { "epoch": 0.5753609239653513, "grad_norm": 0.766392707824707, "learning_rate": 0.0001619102854647955, "loss": 1.0829, "step": 14945 }, { "epoch": 0.575553416746872, "grad_norm": 1.2837680578231812, "learning_rate": 0.00016188653517234036, "loss": 1.2027, "step": 14950 }, { "epoch": 0.5757459095283927, "grad_norm": 2.2257256507873535, "learning_rate": 0.00016186277922077152, "loss": 1.1181, "step": 14955 }, { "epoch": 0.5759384023099133, "grad_norm": 1.257380723953247, "learning_rate": 0.00016183901761226133, "loss": 1.1899, "step": 14960 }, { "epoch": 0.5761308950914341, "grad_norm": 1.1324365139007568, "learning_rate": 0.00016181525034898261, "loss": 1.0823, "step": 14965 }, { "epoch": 0.5763233878729548, "grad_norm": 0.9696788787841797, "learning_rate": 0.00016179147743310872, "loss": 1.176, "step": 14970 }, { "epoch": 0.5765158806544755, "grad_norm": 1.8557454347610474, "learning_rate": 0.00016176769886681357, "loss": 1.1396, "step": 14975 }, { "epoch": 0.5767083734359961, "grad_norm": 1.2395600080490112, "learning_rate": 0.00016174391465227154, "loss": 1.0799, "step": 14980 }, { "epoch": 0.5769008662175168, "grad_norm": 1.8957431316375732, "learning_rate": 0.00016172012479165752, "loss": 1.146, "step": 14985 }, { "epoch": 0.5770933589990376, "grad_norm": 1.191486120223999, "learning_rate": 0.00016169632928714697, "loss": 1.0166, "step": 14990 }, { "epoch": 0.5772858517805582, "grad_norm": 1.7964496612548828, "learning_rate": 0.0001616725281409158, "loss": 1.2131, "step": 14995 }, { "epoch": 0.5774783445620789, "grad_norm": 1.4722768068313599, "learning_rate": 0.00016164872135514044, "loss": 1.0148, "step": 15000 }, { "epoch": 0.5776708373435996, "grad_norm": 1.265663981437683, "learning_rate": 0.00016162490893199791, "loss": 1.1166, "step": 15005 }, { "epoch": 0.5778633301251203, "grad_norm": 1.2796491384506226, "learning_rate": 0.0001616010908736657, "loss": 1.2911, "step": 15010 }, { "epoch": 0.578055822906641, "grad_norm": 1.025158166885376, "learning_rate": 0.00016157726718232177, "loss": 1.0723, "step": 15015 }, { "epoch": 0.5782483156881617, "grad_norm": 1.5206444263458252, "learning_rate": 0.00016155343786014472, "loss": 1.0406, "step": 15020 }, { "epoch": 0.5784408084696824, "grad_norm": 1.5212637186050415, "learning_rate": 0.0001615296029093135, "loss": 1.0445, "step": 15025 }, { "epoch": 0.578633301251203, "grad_norm": 1.3746932744979858, "learning_rate": 0.0001615057623320077, "loss": 1.1385, "step": 15030 }, { "epoch": 0.5788257940327238, "grad_norm": 1.8660439252853394, "learning_rate": 0.00016148191613040734, "loss": 1.0786, "step": 15035 }, { "epoch": 0.5790182868142445, "grad_norm": 2.343719720840454, "learning_rate": 0.0001614580643066931, "loss": 1.276, "step": 15040 }, { "epoch": 0.5792107795957652, "grad_norm": 1.1358321905136108, "learning_rate": 0.00016143420686304594, "loss": 1.3055, "step": 15045 }, { "epoch": 0.5794032723772858, "grad_norm": 1.6678638458251953, "learning_rate": 0.00016141034380164754, "loss": 1.0694, "step": 15050 }, { "epoch": 0.5795957651588065, "grad_norm": 1.6096512079238892, "learning_rate": 0.00016138647512468004, "loss": 1.4079, "step": 15055 }, { "epoch": 0.5797882579403273, "grad_norm": 2.3922042846679688, "learning_rate": 0.000161362600834326, "loss": 1.2739, "step": 15060 }, { "epoch": 0.579980750721848, "grad_norm": 1.167476773262024, "learning_rate": 0.0001613387209327686, "loss": 1.2336, "step": 15065 }, { "epoch": 0.5801732435033686, "grad_norm": 0.9550272226333618, "learning_rate": 0.00016131483542219152, "loss": 1.1557, "step": 15070 }, { "epoch": 0.5803657362848893, "grad_norm": 1.1105631589889526, "learning_rate": 0.00016129094430477893, "loss": 1.0289, "step": 15075 }, { "epoch": 0.58055822906641, "grad_norm": 1.3411318063735962, "learning_rate": 0.00016126704758271548, "loss": 1.2454, "step": 15080 }, { "epoch": 0.5807507218479308, "grad_norm": 1.2867335081100464, "learning_rate": 0.00016124314525818635, "loss": 1.2983, "step": 15085 }, { "epoch": 0.5809432146294514, "grad_norm": 1.6035441160202026, "learning_rate": 0.00016121923733337736, "loss": 1.2227, "step": 15090 }, { "epoch": 0.5811357074109721, "grad_norm": 1.6657713651657104, "learning_rate": 0.0001611953238104746, "loss": 1.1144, "step": 15095 }, { "epoch": 0.5813282001924928, "grad_norm": 1.8781518936157227, "learning_rate": 0.00016117140469166486, "loss": 1.1393, "step": 15100 }, { "epoch": 0.5815206929740134, "grad_norm": 1.542438268661499, "learning_rate": 0.00016114747997913542, "loss": 1.188, "step": 15105 }, { "epoch": 0.5817131857555342, "grad_norm": 2.148175001144409, "learning_rate": 0.00016112354967507398, "loss": 1.1323, "step": 15110 }, { "epoch": 0.5819056785370549, "grad_norm": 1.3092713356018066, "learning_rate": 0.0001610996137816688, "loss": 1.1799, "step": 15115 }, { "epoch": 0.5820981713185756, "grad_norm": 1.4203580617904663, "learning_rate": 0.00016107567230110874, "loss": 1.0916, "step": 15120 }, { "epoch": 0.5822906641000962, "grad_norm": 1.2932054996490479, "learning_rate": 0.00016105172523558301, "loss": 1.13, "step": 15125 }, { "epoch": 0.5824831568816169, "grad_norm": 2.218705654144287, "learning_rate": 0.00016102777258728142, "loss": 1.153, "step": 15130 }, { "epoch": 0.5826756496631377, "grad_norm": 1.180166244506836, "learning_rate": 0.00016100381435839433, "loss": 1.2611, "step": 15135 }, { "epoch": 0.5828681424446583, "grad_norm": 2.007887125015259, "learning_rate": 0.00016097985055111256, "loss": 1.2046, "step": 15140 }, { "epoch": 0.583060635226179, "grad_norm": 1.20327889919281, "learning_rate": 0.00016095588116762734, "loss": 1.3217, "step": 15145 }, { "epoch": 0.5832531280076997, "grad_norm": 1.2758903503417969, "learning_rate": 0.00016093190621013063, "loss": 1.1277, "step": 15150 }, { "epoch": 0.5834456207892204, "grad_norm": 1.851881980895996, "learning_rate": 0.00016090792568081473, "loss": 1.1701, "step": 15155 }, { "epoch": 0.5836381135707411, "grad_norm": 1.6895406246185303, "learning_rate": 0.00016088393958187247, "loss": 1.3331, "step": 15160 }, { "epoch": 0.5838306063522618, "grad_norm": 1.4138762950897217, "learning_rate": 0.0001608599479154973, "loss": 1.3016, "step": 15165 }, { "epoch": 0.5840230991337825, "grad_norm": 1.3571628332138062, "learning_rate": 0.00016083595068388303, "loss": 1.1407, "step": 15170 }, { "epoch": 0.5842155919153031, "grad_norm": 1.3217098712921143, "learning_rate": 0.00016081194788922405, "loss": 1.0032, "step": 15175 }, { "epoch": 0.5844080846968238, "grad_norm": 0.9765079617500305, "learning_rate": 0.00016078793953371533, "loss": 1.1543, "step": 15180 }, { "epoch": 0.5846005774783446, "grad_norm": 1.0757596492767334, "learning_rate": 0.0001607639256195522, "loss": 1.0828, "step": 15185 }, { "epoch": 0.5847930702598653, "grad_norm": 1.2296372652053833, "learning_rate": 0.00016073990614893057, "loss": 1.2089, "step": 15190 }, { "epoch": 0.5849855630413859, "grad_norm": 1.8743308782577515, "learning_rate": 0.00016071588112404693, "loss": 1.2195, "step": 15195 }, { "epoch": 0.5851780558229066, "grad_norm": 1.347332239151001, "learning_rate": 0.00016069185054709814, "loss": 1.1664, "step": 15200 }, { "epoch": 0.5853705486044274, "grad_norm": 1.629981279373169, "learning_rate": 0.00016066781442028165, "loss": 1.1888, "step": 15205 }, { "epoch": 0.5855630413859481, "grad_norm": 1.353702425956726, "learning_rate": 0.00016064377274579544, "loss": 1.265, "step": 15210 }, { "epoch": 0.5857555341674687, "grad_norm": 0.9861169457435608, "learning_rate": 0.00016061972552583795, "loss": 1.0908, "step": 15215 }, { "epoch": 0.5859480269489894, "grad_norm": 1.1305365562438965, "learning_rate": 0.00016059567276260813, "loss": 1.0076, "step": 15220 }, { "epoch": 0.5861405197305101, "grad_norm": 1.4098013639450073, "learning_rate": 0.00016057161445830542, "loss": 1.1882, "step": 15225 }, { "epoch": 0.5863330125120308, "grad_norm": 1.1900111436843872, "learning_rate": 0.00016054755061512986, "loss": 1.1961, "step": 15230 }, { "epoch": 0.5865255052935515, "grad_norm": 1.0856738090515137, "learning_rate": 0.00016052348123528183, "loss": 1.2169, "step": 15235 }, { "epoch": 0.5867179980750722, "grad_norm": 1.109937071800232, "learning_rate": 0.0001604994063209624, "loss": 1.0818, "step": 15240 }, { "epoch": 0.5869104908565929, "grad_norm": 1.9059746265411377, "learning_rate": 0.00016047532587437304, "loss": 1.3035, "step": 15245 }, { "epoch": 0.5871029836381135, "grad_norm": 1.089796781539917, "learning_rate": 0.00016045123989771575, "loss": 1.0872, "step": 15250 }, { "epoch": 0.5872954764196343, "grad_norm": 1.3014196157455444, "learning_rate": 0.00016042714839319298, "loss": 1.1809, "step": 15255 }, { "epoch": 0.587487969201155, "grad_norm": 1.5097154378890991, "learning_rate": 0.00016040305136300783, "loss": 1.1026, "step": 15260 }, { "epoch": 0.5876804619826757, "grad_norm": 1.9508148431777954, "learning_rate": 0.00016037894880936376, "loss": 1.0489, "step": 15265 }, { "epoch": 0.5878729547641963, "grad_norm": 1.2007025480270386, "learning_rate": 0.0001603548407344648, "loss": 1.2376, "step": 15270 }, { "epoch": 0.588065447545717, "grad_norm": 4.035842418670654, "learning_rate": 0.00016033072714051545, "loss": 1.2894, "step": 15275 }, { "epoch": 0.5882579403272378, "grad_norm": 1.2279680967330933, "learning_rate": 0.00016030660802972074, "loss": 1.1945, "step": 15280 }, { "epoch": 0.5884504331087584, "grad_norm": 1.0882714986801147, "learning_rate": 0.00016028248340428625, "loss": 1.0842, "step": 15285 }, { "epoch": 0.5886429258902791, "grad_norm": 1.3169769048690796, "learning_rate": 0.00016025835326641797, "loss": 1.0085, "step": 15290 }, { "epoch": 0.5888354186717998, "grad_norm": 1.3032643795013428, "learning_rate": 0.00016023421761832246, "loss": 1.1994, "step": 15295 }, { "epoch": 0.5890279114533205, "grad_norm": 1.053415060043335, "learning_rate": 0.00016021007646220678, "loss": 1.0983, "step": 15300 }, { "epoch": 0.5892204042348412, "grad_norm": 1.483736515045166, "learning_rate": 0.00016018592980027846, "loss": 1.1709, "step": 15305 }, { "epoch": 0.5894128970163619, "grad_norm": 1.4688469171524048, "learning_rate": 0.00016016177763474555, "loss": 1.0505, "step": 15310 }, { "epoch": 0.5896053897978826, "grad_norm": 1.7809165716171265, "learning_rate": 0.00016013761996781661, "loss": 1.1585, "step": 15315 }, { "epoch": 0.5897978825794032, "grad_norm": 1.5344901084899902, "learning_rate": 0.00016011345680170072, "loss": 1.1269, "step": 15320 }, { "epoch": 0.5899903753609239, "grad_norm": 1.298094630241394, "learning_rate": 0.0001600892881386074, "loss": 1.1804, "step": 15325 }, { "epoch": 0.5901828681424447, "grad_norm": 1.8283668756484985, "learning_rate": 0.0001600651139807467, "loss": 1.2059, "step": 15330 }, { "epoch": 0.5903753609239654, "grad_norm": 1.3290801048278809, "learning_rate": 0.00016004093433032924, "loss": 1.2334, "step": 15335 }, { "epoch": 0.590567853705486, "grad_norm": 1.461422324180603, "learning_rate": 0.00016001674918956612, "loss": 1.2987, "step": 15340 }, { "epoch": 0.5907603464870067, "grad_norm": 1.6681803464889526, "learning_rate": 0.00015999255856066885, "loss": 1.0221, "step": 15345 }, { "epoch": 0.5909528392685275, "grad_norm": 1.1714918613433838, "learning_rate": 0.00015996836244584948, "loss": 1.0144, "step": 15350 }, { "epoch": 0.5911453320500482, "grad_norm": 0.9316911697387695, "learning_rate": 0.00015994416084732062, "loss": 1.3241, "step": 15355 }, { "epoch": 0.5913378248315688, "grad_norm": 2.429568290710449, "learning_rate": 0.00015991995376729535, "loss": 1.3155, "step": 15360 }, { "epoch": 0.5915303176130895, "grad_norm": 1.3793234825134277, "learning_rate": 0.00015989574120798725, "loss": 1.2822, "step": 15365 }, { "epoch": 0.5917228103946102, "grad_norm": 1.1756724119186401, "learning_rate": 0.0001598715231716104, "loss": 1.0682, "step": 15370 }, { "epoch": 0.591915303176131, "grad_norm": 1.9872701168060303, "learning_rate": 0.00015984729966037934, "loss": 1.2034, "step": 15375 }, { "epoch": 0.5921077959576516, "grad_norm": 1.5333032608032227, "learning_rate": 0.00015982307067650918, "loss": 1.3922, "step": 15380 }, { "epoch": 0.5923002887391723, "grad_norm": 1.1813582181930542, "learning_rate": 0.00015979883622221555, "loss": 1.1811, "step": 15385 }, { "epoch": 0.592492781520693, "grad_norm": 1.632565975189209, "learning_rate": 0.00015977459629971442, "loss": 1.0877, "step": 15390 }, { "epoch": 0.5926852743022136, "grad_norm": 1.0945332050323486, "learning_rate": 0.00015975035091122245, "loss": 1.0836, "step": 15395 }, { "epoch": 0.5928777670837344, "grad_norm": 0.8069517016410828, "learning_rate": 0.0001597261000589567, "loss": 1.1574, "step": 15400 }, { "epoch": 0.5930702598652551, "grad_norm": 1.8364413976669312, "learning_rate": 0.00015970184374513476, "loss": 1.1935, "step": 15405 }, { "epoch": 0.5932627526467757, "grad_norm": 1.5146484375, "learning_rate": 0.00015967758197197468, "loss": 1.06, "step": 15410 }, { "epoch": 0.5934552454282964, "grad_norm": 1.5792328119277954, "learning_rate": 0.00015965331474169508, "loss": 1.1464, "step": 15415 }, { "epoch": 0.5936477382098171, "grad_norm": 1.887292742729187, "learning_rate": 0.00015962904205651495, "loss": 1.2039, "step": 15420 }, { "epoch": 0.5938402309913379, "grad_norm": 1.8241037130355835, "learning_rate": 0.000159604763918654, "loss": 1.2011, "step": 15425 }, { "epoch": 0.5940327237728585, "grad_norm": 1.2130569219589233, "learning_rate": 0.0001595804803303322, "loss": 1.2401, "step": 15430 }, { "epoch": 0.5942252165543792, "grad_norm": 1.1083897352218628, "learning_rate": 0.00015955619129377017, "loss": 1.2919, "step": 15435 }, { "epoch": 0.5944177093358999, "grad_norm": 1.8266736268997192, "learning_rate": 0.00015953189681118895, "loss": 1.1609, "step": 15440 }, { "epoch": 0.5946102021174205, "grad_norm": 1.5710999965667725, "learning_rate": 0.0001595075968848102, "loss": 1.2178, "step": 15445 }, { "epoch": 0.5948026948989413, "grad_norm": 2.023061752319336, "learning_rate": 0.00015948329151685583, "loss": 1.2577, "step": 15450 }, { "epoch": 0.594995187680462, "grad_norm": 1.3245149850845337, "learning_rate": 0.00015945898070954853, "loss": 0.9832, "step": 15455 }, { "epoch": 0.5951876804619827, "grad_norm": 1.7696577310562134, "learning_rate": 0.00015943466446511132, "loss": 1.1991, "step": 15460 }, { "epoch": 0.5953801732435033, "grad_norm": 1.0893733501434326, "learning_rate": 0.00015941034278576775, "loss": 1.3321, "step": 15465 }, { "epoch": 0.595572666025024, "grad_norm": 1.294731616973877, "learning_rate": 0.0001593860156737419, "loss": 1.0485, "step": 15470 }, { "epoch": 0.5957651588065448, "grad_norm": 1.1282588243484497, "learning_rate": 0.00015936168313125833, "loss": 1.0585, "step": 15475 }, { "epoch": 0.5959576515880655, "grad_norm": 0.9207860231399536, "learning_rate": 0.00015933734516054203, "loss": 1.1343, "step": 15480 }, { "epoch": 0.5961501443695861, "grad_norm": 2.2860140800476074, "learning_rate": 0.00015931300176381865, "loss": 1.3317, "step": 15485 }, { "epoch": 0.5963426371511068, "grad_norm": 1.2698768377304077, "learning_rate": 0.00015928865294331413, "loss": 1.175, "step": 15490 }, { "epoch": 0.5965351299326275, "grad_norm": 1.0986465215682983, "learning_rate": 0.00015926429870125505, "loss": 1.1309, "step": 15495 }, { "epoch": 0.5967276227141483, "grad_norm": 1.5664902925491333, "learning_rate": 0.00015923993903986844, "loss": 1.0117, "step": 15500 }, { "epoch": 0.5969201154956689, "grad_norm": 1.3162322044372559, "learning_rate": 0.00015921557396138188, "loss": 1.1964, "step": 15505 }, { "epoch": 0.5971126082771896, "grad_norm": 0.8635309934616089, "learning_rate": 0.0001591912034680233, "loss": 1.1119, "step": 15510 }, { "epoch": 0.5973051010587103, "grad_norm": 1.3118690252304077, "learning_rate": 0.00015916682756202127, "loss": 1.0618, "step": 15515 }, { "epoch": 0.597497593840231, "grad_norm": 1.0313913822174072, "learning_rate": 0.00015914244624560481, "loss": 1.0686, "step": 15520 }, { "epoch": 0.5976900866217517, "grad_norm": 1.3414394855499268, "learning_rate": 0.00015911805952100347, "loss": 1.2013, "step": 15525 }, { "epoch": 0.5978825794032724, "grad_norm": 1.2710504531860352, "learning_rate": 0.00015909366739044715, "loss": 1.3748, "step": 15530 }, { "epoch": 0.598075072184793, "grad_norm": 1.6694974899291992, "learning_rate": 0.0001590692698561664, "loss": 0.9833, "step": 15535 }, { "epoch": 0.5982675649663137, "grad_norm": 1.5924476385116577, "learning_rate": 0.00015904486692039227, "loss": 1.2046, "step": 15540 }, { "epoch": 0.5984600577478345, "grad_norm": 2.3105616569519043, "learning_rate": 0.00015902045858535616, "loss": 1.233, "step": 15545 }, { "epoch": 0.5986525505293552, "grad_norm": 1.3003478050231934, "learning_rate": 0.00015899604485329012, "loss": 1.1891, "step": 15550 }, { "epoch": 0.5988450433108758, "grad_norm": 1.2988343238830566, "learning_rate": 0.00015897162572642656, "loss": 1.0767, "step": 15555 }, { "epoch": 0.5990375360923965, "grad_norm": 1.0845260620117188, "learning_rate": 0.00015894720120699849, "loss": 1.2702, "step": 15560 }, { "epoch": 0.5992300288739172, "grad_norm": 1.0050013065338135, "learning_rate": 0.00015892277129723935, "loss": 1.2267, "step": 15565 }, { "epoch": 0.599422521655438, "grad_norm": 1.3145102262496948, "learning_rate": 0.0001588983359993831, "loss": 1.1086, "step": 15570 }, { "epoch": 0.5996150144369586, "grad_norm": 1.817396640777588, "learning_rate": 0.00015887389531566424, "loss": 1.0999, "step": 15575 }, { "epoch": 0.5998075072184793, "grad_norm": 1.4001067876815796, "learning_rate": 0.0001588494492483176, "loss": 1.2802, "step": 15580 }, { "epoch": 0.6, "grad_norm": 2.1305971145629883, "learning_rate": 0.00015882499779957868, "loss": 1.2481, "step": 15585 }, { "epoch": 0.6001924927815206, "grad_norm": 1.5675426721572876, "learning_rate": 0.00015880054097168337, "loss": 1.2555, "step": 15590 }, { "epoch": 0.6003849855630414, "grad_norm": 1.3107160329818726, "learning_rate": 0.00015877607876686815, "loss": 1.273, "step": 15595 }, { "epoch": 0.6005774783445621, "grad_norm": 0.5348256230354309, "learning_rate": 0.00015875161118736986, "loss": 0.9708, "step": 15600 }, { "epoch": 0.6007699711260828, "grad_norm": 1.0877107381820679, "learning_rate": 0.00015872713823542593, "loss": 1.1419, "step": 15605 }, { "epoch": 0.6009624639076034, "grad_norm": 1.0563950538635254, "learning_rate": 0.00015870265991327424, "loss": 1.0216, "step": 15610 }, { "epoch": 0.6011549566891241, "grad_norm": 1.0346797704696655, "learning_rate": 0.00015867817622315316, "loss": 1.205, "step": 15615 }, { "epoch": 0.6013474494706449, "grad_norm": 1.67006254196167, "learning_rate": 0.00015865368716730158, "loss": 1.2875, "step": 15620 }, { "epoch": 0.6015399422521656, "grad_norm": 1.8183788061141968, "learning_rate": 0.00015862919274795884, "loss": 1.1703, "step": 15625 }, { "epoch": 0.6017324350336862, "grad_norm": 1.1460903882980347, "learning_rate": 0.00015860469296736482, "loss": 1.1998, "step": 15630 }, { "epoch": 0.6019249278152069, "grad_norm": 1.5365129709243774, "learning_rate": 0.00015858018782775985, "loss": 1.054, "step": 15635 }, { "epoch": 0.6021174205967276, "grad_norm": 1.4886486530303955, "learning_rate": 0.00015855567733138478, "loss": 1.2914, "step": 15640 }, { "epoch": 0.6023099133782484, "grad_norm": 1.519114375114441, "learning_rate": 0.00015853116148048087, "loss": 1.0586, "step": 15645 }, { "epoch": 0.602502406159769, "grad_norm": 1.2735627889633179, "learning_rate": 0.00015850664027729, "loss": 1.1287, "step": 15650 }, { "epoch": 0.6026948989412897, "grad_norm": 2.464672327041626, "learning_rate": 0.00015848211372405444, "loss": 1.0616, "step": 15655 }, { "epoch": 0.6028873917228104, "grad_norm": 0.9507278800010681, "learning_rate": 0.000158457581823017, "loss": 1.0118, "step": 15660 }, { "epoch": 0.6030798845043311, "grad_norm": 1.155150294303894, "learning_rate": 0.00015843304457642093, "loss": 1.0563, "step": 15665 }, { "epoch": 0.6032723772858518, "grad_norm": 2.669029474258423, "learning_rate": 0.00015840850198651002, "loss": 1.1918, "step": 15670 }, { "epoch": 0.6034648700673725, "grad_norm": 1.4008570909500122, "learning_rate": 0.00015838395405552854, "loss": 1.2122, "step": 15675 }, { "epoch": 0.6036573628488932, "grad_norm": 1.4199731349945068, "learning_rate": 0.0001583594007857212, "loss": 1.2546, "step": 15680 }, { "epoch": 0.6038498556304138, "grad_norm": 2.2346031665802, "learning_rate": 0.0001583348421793333, "loss": 1.1184, "step": 15685 }, { "epoch": 0.6040423484119346, "grad_norm": 1.1559759378433228, "learning_rate": 0.00015831027823861048, "loss": 1.157, "step": 15690 }, { "epoch": 0.6042348411934553, "grad_norm": 1.9930438995361328, "learning_rate": 0.00015828570896579897, "loss": 1.1095, "step": 15695 }, { "epoch": 0.6044273339749759, "grad_norm": 1.040358304977417, "learning_rate": 0.00015826113436314548, "loss": 1.062, "step": 15700 }, { "epoch": 0.6046198267564966, "grad_norm": 0.8409137725830078, "learning_rate": 0.00015823655443289724, "loss": 1.0204, "step": 15705 }, { "epoch": 0.6048123195380173, "grad_norm": 1.477950930595398, "learning_rate": 0.00015821196917730184, "loss": 1.2479, "step": 15710 }, { "epoch": 0.6050048123195381, "grad_norm": 1.5752694606781006, "learning_rate": 0.00015818737859860752, "loss": 1.343, "step": 15715 }, { "epoch": 0.6051973051010587, "grad_norm": 1.505356788635254, "learning_rate": 0.00015816278269906284, "loss": 1.0742, "step": 15720 }, { "epoch": 0.6053897978825794, "grad_norm": 1.165273904800415, "learning_rate": 0.000158138181480917, "loss": 1.1023, "step": 15725 }, { "epoch": 0.6055822906641001, "grad_norm": 1.7088487148284912, "learning_rate": 0.00015811357494641958, "loss": 1.2899, "step": 15730 }, { "epoch": 0.6057747834456207, "grad_norm": 1.6200921535491943, "learning_rate": 0.0001580889630978207, "loss": 1.0353, "step": 15735 }, { "epoch": 0.6059672762271415, "grad_norm": 1.1059575080871582, "learning_rate": 0.00015806434593737095, "loss": 0.8117, "step": 15740 }, { "epoch": 0.6061597690086622, "grad_norm": 1.3026262521743774, "learning_rate": 0.00015803972346732143, "loss": 1.1648, "step": 15745 }, { "epoch": 0.6063522617901829, "grad_norm": 1.316931128501892, "learning_rate": 0.00015801509568992366, "loss": 1.0999, "step": 15750 }, { "epoch": 0.6065447545717035, "grad_norm": 0.9396672248840332, "learning_rate": 0.00015799046260742968, "loss": 1.0374, "step": 15755 }, { "epoch": 0.6067372473532242, "grad_norm": 1.1851413249969482, "learning_rate": 0.00015796582422209206, "loss": 1.1861, "step": 15760 }, { "epoch": 0.606929740134745, "grad_norm": 2.0202128887176514, "learning_rate": 0.00015794118053616383, "loss": 1.139, "step": 15765 }, { "epoch": 0.6071222329162657, "grad_norm": 1.642561912536621, "learning_rate": 0.00015791653155189841, "loss": 1.1811, "step": 15770 }, { "epoch": 0.6073147256977863, "grad_norm": 1.4148608446121216, "learning_rate": 0.0001578918772715499, "loss": 1.3261, "step": 15775 }, { "epoch": 0.607507218479307, "grad_norm": 1.160662293434143, "learning_rate": 0.0001578672176973727, "loss": 1.2117, "step": 15780 }, { "epoch": 0.6076997112608277, "grad_norm": 1.4699779748916626, "learning_rate": 0.00015784255283162176, "loss": 1.1937, "step": 15785 }, { "epoch": 0.6078922040423484, "grad_norm": 1.579142451286316, "learning_rate": 0.00015781788267655252, "loss": 1.2722, "step": 15790 }, { "epoch": 0.6080846968238691, "grad_norm": 1.3598978519439697, "learning_rate": 0.00015779320723442096, "loss": 1.0829, "step": 15795 }, { "epoch": 0.6082771896053898, "grad_norm": 1.1840283870697021, "learning_rate": 0.0001577734630755471, "loss": 1.3599, "step": 15800 }, { "epoch": 0.6084696823869105, "grad_norm": 1.1004847288131714, "learning_rate": 0.00015774877812238972, "loss": 1.3756, "step": 15805 }, { "epoch": 0.6086621751684311, "grad_norm": 1.6455458402633667, "learning_rate": 0.00015772408788848914, "loss": 1.1067, "step": 15810 }, { "epoch": 0.6088546679499519, "grad_norm": 1.6274205446243286, "learning_rate": 0.00015769939237610312, "loss": 1.3339, "step": 15815 }, { "epoch": 0.6090471607314726, "grad_norm": 1.2150076627731323, "learning_rate": 0.00015767469158748987, "loss": 0.9821, "step": 15820 }, { "epoch": 0.6092396535129933, "grad_norm": 1.2452518939971924, "learning_rate": 0.00015764998552490815, "loss": 0.9994, "step": 15825 }, { "epoch": 0.6094321462945139, "grad_norm": 1.4766079187393188, "learning_rate": 0.00015762527419061715, "loss": 1.0281, "step": 15830 }, { "epoch": 0.6096246390760347, "grad_norm": 1.1288725137710571, "learning_rate": 0.00015760055758687655, "loss": 1.082, "step": 15835 }, { "epoch": 0.6098171318575554, "grad_norm": 1.181159496307373, "learning_rate": 0.00015757583571594653, "loss": 1.1515, "step": 15840 }, { "epoch": 0.610009624639076, "grad_norm": 1.2939519882202148, "learning_rate": 0.00015755110858008773, "loss": 0.9892, "step": 15845 }, { "epoch": 0.6102021174205967, "grad_norm": 2.3088269233703613, "learning_rate": 0.0001575263761815613, "loss": 0.9798, "step": 15850 }, { "epoch": 0.6103946102021174, "grad_norm": 1.4175939559936523, "learning_rate": 0.00015750163852262886, "loss": 1.1408, "step": 15855 }, { "epoch": 0.6105871029836382, "grad_norm": 1.0206336975097656, "learning_rate": 0.00015747689560555248, "loss": 1.2078, "step": 15860 }, { "epoch": 0.6107795957651588, "grad_norm": 0.9995696544647217, "learning_rate": 0.0001574521474325948, "loss": 1.1922, "step": 15865 }, { "epoch": 0.6109720885466795, "grad_norm": 1.6652652025222778, "learning_rate": 0.00015742739400601872, "loss": 1.1039, "step": 15870 }, { "epoch": 0.6111645813282002, "grad_norm": 1.3411548137664795, "learning_rate": 0.00015740263532808792, "loss": 1.1592, "step": 15875 }, { "epoch": 0.6113570741097208, "grad_norm": 0.9215561151504517, "learning_rate": 0.0001573778714010664, "loss": 1.1379, "step": 15880 }, { "epoch": 0.6115495668912416, "grad_norm": 1.269482970237732, "learning_rate": 0.00015735310222721863, "loss": 1.2042, "step": 15885 }, { "epoch": 0.6117420596727623, "grad_norm": 1.316909909248352, "learning_rate": 0.00015732832780880957, "loss": 1.2702, "step": 15890 }, { "epoch": 0.611934552454283, "grad_norm": 1.2689425945281982, "learning_rate": 0.0001573035481481047, "loss": 1.0755, "step": 15895 }, { "epoch": 0.6121270452358036, "grad_norm": 1.0369685888290405, "learning_rate": 0.00015727876324736996, "loss": 1.0574, "step": 15900 }, { "epoch": 0.6123195380173243, "grad_norm": 1.0056127309799194, "learning_rate": 0.00015725397310887174, "loss": 1.2219, "step": 15905 }, { "epoch": 0.6125120307988451, "grad_norm": 1.3123587369918823, "learning_rate": 0.00015722917773487702, "loss": 1.2203, "step": 15910 }, { "epoch": 0.6127045235803658, "grad_norm": 1.0959875583648682, "learning_rate": 0.00015720437712765306, "loss": 1.2516, "step": 15915 }, { "epoch": 0.6128970163618864, "grad_norm": 2.0152196884155273, "learning_rate": 0.00015717957128946774, "loss": 1.2099, "step": 15920 }, { "epoch": 0.6130895091434071, "grad_norm": 2.816568374633789, "learning_rate": 0.00015715476022258942, "loss": 1.1093, "step": 15925 }, { "epoch": 0.6132820019249278, "grad_norm": 1.8223321437835693, "learning_rate": 0.00015712994392928689, "loss": 1.0474, "step": 15930 }, { "epoch": 0.6134744947064485, "grad_norm": 1.2718263864517212, "learning_rate": 0.00015710512241182945, "loss": 1.1405, "step": 15935 }, { "epoch": 0.6136669874879692, "grad_norm": 1.2518097162246704, "learning_rate": 0.00015708029567248683, "loss": 1.13, "step": 15940 }, { "epoch": 0.6138594802694899, "grad_norm": 0.8542113900184631, "learning_rate": 0.0001570554637135293, "loss": 1.0871, "step": 15945 }, { "epoch": 0.6140519730510106, "grad_norm": 1.0798470973968506, "learning_rate": 0.00015703062653722757, "loss": 1.1563, "step": 15950 }, { "epoch": 0.6142444658325312, "grad_norm": 1.123974084854126, "learning_rate": 0.00015700578414585284, "loss": 1.1253, "step": 15955 }, { "epoch": 0.614436958614052, "grad_norm": 1.2129628658294678, "learning_rate": 0.0001569809365416768, "loss": 1.1123, "step": 15960 }, { "epoch": 0.6146294513955727, "grad_norm": 1.4137890338897705, "learning_rate": 0.00015695608372697154, "loss": 1.115, "step": 15965 }, { "epoch": 0.6148219441770933, "grad_norm": 1.2815289497375488, "learning_rate": 0.00015693122570400975, "loss": 1.0876, "step": 15970 }, { "epoch": 0.615014436958614, "grad_norm": 0.9300668835639954, "learning_rate": 0.00015690636247506448, "loss": 1.1442, "step": 15975 }, { "epoch": 0.6152069297401348, "grad_norm": 0.9866906404495239, "learning_rate": 0.00015688149404240938, "loss": 1.0664, "step": 15980 }, { "epoch": 0.6153994225216555, "grad_norm": 1.1951825618743896, "learning_rate": 0.0001568566204083184, "loss": 1.0933, "step": 15985 }, { "epoch": 0.6155919153031761, "grad_norm": 1.4439541101455688, "learning_rate": 0.00015683174157506616, "loss": 1.1618, "step": 15990 }, { "epoch": 0.6157844080846968, "grad_norm": 1.242619276046753, "learning_rate": 0.00015680685754492762, "loss": 1.0794, "step": 15995 }, { "epoch": 0.6159769008662175, "grad_norm": 1.9631248712539673, "learning_rate": 0.00015678196832017823, "loss": 1.1082, "step": 16000 }, { "epoch": 0.6161693936477383, "grad_norm": 1.056715488433838, "learning_rate": 0.00015675707390309403, "loss": 1.0893, "step": 16005 }, { "epoch": 0.6163618864292589, "grad_norm": 2.3864753246307373, "learning_rate": 0.00015673217429595143, "loss": 1.3378, "step": 16010 }, { "epoch": 0.6165543792107796, "grad_norm": 1.3226178884506226, "learning_rate": 0.00015670726950102725, "loss": 1.1959, "step": 16015 }, { "epoch": 0.6167468719923003, "grad_norm": 2.254422426223755, "learning_rate": 0.00015668235952059892, "loss": 1.1495, "step": 16020 }, { "epoch": 0.6169393647738209, "grad_norm": 1.6376910209655762, "learning_rate": 0.00015665744435694435, "loss": 1.0027, "step": 16025 }, { "epoch": 0.6171318575553417, "grad_norm": 1.190169334411621, "learning_rate": 0.00015663252401234177, "loss": 1.0419, "step": 16030 }, { "epoch": 0.6173243503368624, "grad_norm": 1.6388911008834839, "learning_rate": 0.00015660759848907008, "loss": 1.3868, "step": 16035 }, { "epoch": 0.6175168431183831, "grad_norm": 0.9445647597312927, "learning_rate": 0.00015658266778940843, "loss": 1.1382, "step": 16040 }, { "epoch": 0.6177093358999037, "grad_norm": 0.9717797636985779, "learning_rate": 0.00015655773191563664, "loss": 1.3385, "step": 16045 }, { "epoch": 0.6179018286814244, "grad_norm": 1.7297828197479248, "learning_rate": 0.000156532790870035, "loss": 1.2607, "step": 16050 }, { "epoch": 0.6180943214629452, "grad_norm": 1.3885836601257324, "learning_rate": 0.00015650784465488405, "loss": 1.2271, "step": 16055 }, { "epoch": 0.6182868142444659, "grad_norm": 1.3968501091003418, "learning_rate": 0.00015648289327246508, "loss": 1.255, "step": 16060 }, { "epoch": 0.6184793070259865, "grad_norm": 1.7532678842544556, "learning_rate": 0.00015645793672505967, "loss": 1.2088, "step": 16065 }, { "epoch": 0.6186717998075072, "grad_norm": 1.4146851301193237, "learning_rate": 0.00015643297501494999, "loss": 0.9797, "step": 16070 }, { "epoch": 0.6188642925890279, "grad_norm": 1.4249024391174316, "learning_rate": 0.00015640800814441851, "loss": 1.1446, "step": 16075 }, { "epoch": 0.6190567853705486, "grad_norm": 1.3387399911880493, "learning_rate": 0.0001563830361157484, "loss": 1.2204, "step": 16080 }, { "epoch": 0.6192492781520693, "grad_norm": 1.137149691581726, "learning_rate": 0.00015635805893122312, "loss": 1.1626, "step": 16085 }, { "epoch": 0.61944177093359, "grad_norm": 1.8353437185287476, "learning_rate": 0.0001563330765931267, "loss": 1.2476, "step": 16090 }, { "epoch": 0.6196342637151107, "grad_norm": 0.969289243221283, "learning_rate": 0.00015630808910374358, "loss": 1.026, "step": 16095 }, { "epoch": 0.6198267564966313, "grad_norm": 1.0529965162277222, "learning_rate": 0.0001562830964653587, "loss": 1.15, "step": 16100 }, { "epoch": 0.6200192492781521, "grad_norm": 1.2508490085601807, "learning_rate": 0.00015625809868025756, "loss": 1.08, "step": 16105 }, { "epoch": 0.6202117420596728, "grad_norm": 1.1188933849334717, "learning_rate": 0.0001562330957507259, "loss": 1.2273, "step": 16110 }, { "epoch": 0.6204042348411934, "grad_norm": 1.9137325286865234, "learning_rate": 0.00015620808767905018, "loss": 1.1073, "step": 16115 }, { "epoch": 0.6205967276227141, "grad_norm": 1.146921157836914, "learning_rate": 0.0001561830744675172, "loss": 1.0292, "step": 16120 }, { "epoch": 0.6207892204042348, "grad_norm": 1.6574608087539673, "learning_rate": 0.00015615805611841424, "loss": 1.2067, "step": 16125 }, { "epoch": 0.6209817131857556, "grad_norm": 1.599156379699707, "learning_rate": 0.00015613303263402903, "loss": 1.4416, "step": 16130 }, { "epoch": 0.6211742059672762, "grad_norm": 1.9472912549972534, "learning_rate": 0.00015610800401664988, "loss": 0.9591, "step": 16135 }, { "epoch": 0.6213666987487969, "grad_norm": 1.2037914991378784, "learning_rate": 0.00015608297026856538, "loss": 1.0899, "step": 16140 }, { "epoch": 0.6215591915303176, "grad_norm": 1.0116618871688843, "learning_rate": 0.0001560579313920648, "loss": 1.2294, "step": 16145 }, { "epoch": 0.6217516843118384, "grad_norm": 1.6344687938690186, "learning_rate": 0.00015603288738943774, "loss": 1.1918, "step": 16150 }, { "epoch": 0.621944177093359, "grad_norm": 1.3862853050231934, "learning_rate": 0.0001560078382629743, "loss": 1.157, "step": 16155 }, { "epoch": 0.6221366698748797, "grad_norm": 0.9576367139816284, "learning_rate": 0.00015598278401496508, "loss": 1.0759, "step": 16160 }, { "epoch": 0.6223291626564004, "grad_norm": 1.2092609405517578, "learning_rate": 0.0001559577246477011, "loss": 1.1928, "step": 16165 }, { "epoch": 0.622521655437921, "grad_norm": 1.594510793685913, "learning_rate": 0.0001559326601634739, "loss": 1.2336, "step": 16170 }, { "epoch": 0.6227141482194418, "grad_norm": 0.851620078086853, "learning_rate": 0.00015590759056457546, "loss": 1.1646, "step": 16175 }, { "epoch": 0.6229066410009625, "grad_norm": 1.1468600034713745, "learning_rate": 0.0001558825158532982, "loss": 1.1879, "step": 16180 }, { "epoch": 0.6230991337824832, "grad_norm": 1.934251308441162, "learning_rate": 0.00015585743603193505, "loss": 1.1207, "step": 16185 }, { "epoch": 0.6232916265640038, "grad_norm": 0.9963223934173584, "learning_rate": 0.00015583235110277943, "loss": 1.068, "step": 16190 }, { "epoch": 0.6234841193455245, "grad_norm": 0.8857359290122986, "learning_rate": 0.00015580726106812512, "loss": 1.1148, "step": 16195 }, { "epoch": 0.6236766121270453, "grad_norm": 1.2589722871780396, "learning_rate": 0.00015578216593026647, "loss": 1.0485, "step": 16200 }, { "epoch": 0.623869104908566, "grad_norm": 1.0346484184265137, "learning_rate": 0.0001557570656914983, "loss": 1.1276, "step": 16205 }, { "epoch": 0.6240615976900866, "grad_norm": 0.8794786334037781, "learning_rate": 0.0001557319603541158, "loss": 1.2591, "step": 16210 }, { "epoch": 0.6242540904716073, "grad_norm": 1.0909137725830078, "learning_rate": 0.00015570684992041473, "loss": 1.1197, "step": 16215 }, { "epoch": 0.624446583253128, "grad_norm": 1.3499592542648315, "learning_rate": 0.0001556817343926913, "loss": 1.0165, "step": 16220 }, { "epoch": 0.6246390760346487, "grad_norm": 1.5356526374816895, "learning_rate": 0.00015565661377324203, "loss": 1.0144, "step": 16225 }, { "epoch": 0.6248315688161694, "grad_norm": 1.849442958831787, "learning_rate": 0.0001556314880643642, "loss": 1.2191, "step": 16230 }, { "epoch": 0.6250240615976901, "grad_norm": 1.1928755044937134, "learning_rate": 0.00015560635726835525, "loss": 1.2685, "step": 16235 }, { "epoch": 0.6252165543792108, "grad_norm": 1.1445300579071045, "learning_rate": 0.00015558122138751332, "loss": 1.445, "step": 16240 }, { "epoch": 0.6254090471607314, "grad_norm": 1.7465559244155884, "learning_rate": 0.00015555608042413689, "loss": 1.2479, "step": 16245 }, { "epoch": 0.6256015399422522, "grad_norm": 1.1695505380630493, "learning_rate": 0.0001555309343805249, "loss": 1.2347, "step": 16250 }, { "epoch": 0.6257940327237729, "grad_norm": 1.2655342817306519, "learning_rate": 0.00015550578325897687, "loss": 1.2343, "step": 16255 }, { "epoch": 0.6259865255052935, "grad_norm": 1.569800853729248, "learning_rate": 0.0001554806270617926, "loss": 1.0798, "step": 16260 }, { "epoch": 0.6261790182868142, "grad_norm": 2.0027542114257812, "learning_rate": 0.00015545546579127256, "loss": 1.0084, "step": 16265 }, { "epoch": 0.6263715110683349, "grad_norm": 2.259096145629883, "learning_rate": 0.0001554302994497175, "loss": 1.1921, "step": 16270 }, { "epoch": 0.6265640038498557, "grad_norm": 1.092046856880188, "learning_rate": 0.00015540512803942878, "loss": 1.112, "step": 16275 }, { "epoch": 0.6267564966313763, "grad_norm": 1.67642343044281, "learning_rate": 0.00015537995156270808, "loss": 1.3709, "step": 16280 }, { "epoch": 0.626948989412897, "grad_norm": 1.7039928436279297, "learning_rate": 0.0001553547700218577, "loss": 1.2211, "step": 16285 }, { "epoch": 0.6271414821944177, "grad_norm": 2.0744543075561523, "learning_rate": 0.00015532958341918027, "loss": 1.2324, "step": 16290 }, { "epoch": 0.6273339749759385, "grad_norm": 1.2610362768173218, "learning_rate": 0.00015530439175697898, "loss": 1.1924, "step": 16295 }, { "epoch": 0.6275264677574591, "grad_norm": 1.8385295867919922, "learning_rate": 0.00015527919503755742, "loss": 1.2602, "step": 16300 }, { "epoch": 0.6277189605389798, "grad_norm": 1.62607741355896, "learning_rate": 0.00015525399326321966, "loss": 1.2135, "step": 16305 }, { "epoch": 0.6279114533205005, "grad_norm": 1.164507508277893, "learning_rate": 0.00015522878643627023, "loss": 1.07, "step": 16310 }, { "epoch": 0.6281039461020211, "grad_norm": 0.9871059060096741, "learning_rate": 0.0001552035745590142, "loss": 1.1749, "step": 16315 }, { "epoch": 0.6282964388835419, "grad_norm": 1.1414002180099487, "learning_rate": 0.00015517835763375688, "loss": 1.233, "step": 16320 }, { "epoch": 0.6284889316650626, "grad_norm": 1.1266084909439087, "learning_rate": 0.00015515313566280428, "loss": 1.1642, "step": 16325 }, { "epoch": 0.6286814244465833, "grad_norm": 1.8156638145446777, "learning_rate": 0.00015512790864846286, "loss": 1.0328, "step": 16330 }, { "epoch": 0.6288739172281039, "grad_norm": 1.9357597827911377, "learning_rate": 0.00015510267659303933, "loss": 1.3325, "step": 16335 }, { "epoch": 0.6290664100096246, "grad_norm": 1.767910122871399, "learning_rate": 0.00015507743949884104, "loss": 1.2381, "step": 16340 }, { "epoch": 0.6292589027911454, "grad_norm": 2.1196887493133545, "learning_rate": 0.0001550521973681758, "loss": 1.2286, "step": 16345 }, { "epoch": 0.629451395572666, "grad_norm": 1.7220022678375244, "learning_rate": 0.00015502695020335177, "loss": 1.1699, "step": 16350 }, { "epoch": 0.6296438883541867, "grad_norm": 1.9612696170806885, "learning_rate": 0.00015500169800667765, "loss": 1.0786, "step": 16355 }, { "epoch": 0.6298363811357074, "grad_norm": 1.666223406791687, "learning_rate": 0.00015497644078046261, "loss": 1.2211, "step": 16360 }, { "epoch": 0.6300288739172281, "grad_norm": 1.7156059741973877, "learning_rate": 0.00015495117852701626, "loss": 1.0621, "step": 16365 }, { "epoch": 0.6302213666987488, "grad_norm": 1.5840719938278198, "learning_rate": 0.00015492591124864865, "loss": 1.2364, "step": 16370 }, { "epoch": 0.6304138594802695, "grad_norm": 1.1821776628494263, "learning_rate": 0.0001549006389476703, "loss": 1.1479, "step": 16375 }, { "epoch": 0.6306063522617902, "grad_norm": 1.2549364566802979, "learning_rate": 0.00015487536162639223, "loss": 1.0564, "step": 16380 }, { "epoch": 0.6307988450433109, "grad_norm": 1.5308479070663452, "learning_rate": 0.0001548500792871258, "loss": 1.1825, "step": 16385 }, { "epoch": 0.6309913378248315, "grad_norm": 1.6546053886413574, "learning_rate": 0.000154824791932183, "loss": 1.1673, "step": 16390 }, { "epoch": 0.6311838306063523, "grad_norm": 1.1561111211776733, "learning_rate": 0.00015479949956387617, "loss": 1.1014, "step": 16395 }, { "epoch": 0.631376323387873, "grad_norm": 1.6901589632034302, "learning_rate": 0.0001547742021845181, "loss": 1.2377, "step": 16400 }, { "epoch": 0.6315688161693936, "grad_norm": 1.2808809280395508, "learning_rate": 0.0001547488997964221, "loss": 1.2976, "step": 16405 }, { "epoch": 0.6317613089509143, "grad_norm": 0.9793625473976135, "learning_rate": 0.0001547235924019019, "loss": 1.0328, "step": 16410 }, { "epoch": 0.631953801732435, "grad_norm": 1.6001505851745605, "learning_rate": 0.00015469828000327164, "loss": 1.0232, "step": 16415 }, { "epoch": 0.6321462945139558, "grad_norm": 1.3900479078292847, "learning_rate": 0.00015467296260284605, "loss": 1.2412, "step": 16420 }, { "epoch": 0.6323387872954764, "grad_norm": 1.6030535697937012, "learning_rate": 0.0001546476402029402, "loss": 1.178, "step": 16425 }, { "epoch": 0.6325312800769971, "grad_norm": 1.5602627992630005, "learning_rate": 0.00015462231280586965, "loss": 1.2834, "step": 16430 }, { "epoch": 0.6327237728585178, "grad_norm": 1.3648455142974854, "learning_rate": 0.00015459698041395045, "loss": 1.1425, "step": 16435 }, { "epoch": 0.6329162656400384, "grad_norm": 1.4346479177474976, "learning_rate": 0.00015457164302949908, "loss": 1.0076, "step": 16440 }, { "epoch": 0.6331087584215592, "grad_norm": 0.9692068696022034, "learning_rate": 0.00015454630065483242, "loss": 1.0133, "step": 16445 }, { "epoch": 0.6333012512030799, "grad_norm": 1.479915976524353, "learning_rate": 0.0001545209532922679, "loss": 1.131, "step": 16450 }, { "epoch": 0.6334937439846006, "grad_norm": 1.0446960926055908, "learning_rate": 0.00015449560094412342, "loss": 1.2545, "step": 16455 }, { "epoch": 0.6336862367661212, "grad_norm": 1.458414077758789, "learning_rate": 0.00015447024361271721, "loss": 1.325, "step": 16460 }, { "epoch": 0.633878729547642, "grad_norm": 1.2071151733398438, "learning_rate": 0.00015444488130036802, "loss": 1.2303, "step": 16465 }, { "epoch": 0.6340712223291627, "grad_norm": 1.9108256101608276, "learning_rate": 0.00015441951400939515, "loss": 1.2031, "step": 16470 }, { "epoch": 0.6342637151106834, "grad_norm": 1.1393382549285889, "learning_rate": 0.0001543941417421182, "loss": 1.016, "step": 16475 }, { "epoch": 0.634456207892204, "grad_norm": 2.0735628604888916, "learning_rate": 0.00015436876450085728, "loss": 1.1619, "step": 16480 }, { "epoch": 0.6346487006737247, "grad_norm": 1.6895620822906494, "learning_rate": 0.00015434338228793306, "loss": 1.1621, "step": 16485 }, { "epoch": 0.6348411934552455, "grad_norm": 1.9663159847259521, "learning_rate": 0.0001543179951056665, "loss": 1.2465, "step": 16490 }, { "epoch": 0.6350336862367661, "grad_norm": 1.1372085809707642, "learning_rate": 0.0001542926029563791, "loss": 1.1643, "step": 16495 }, { "epoch": 0.6352261790182868, "grad_norm": 0.5948193669319153, "learning_rate": 0.00015426720584239283, "loss": 0.9659, "step": 16500 }, { "epoch": 0.6354186717998075, "grad_norm": 1.829047441482544, "learning_rate": 0.00015424180376603008, "loss": 1.1334, "step": 16505 }, { "epoch": 0.6356111645813282, "grad_norm": 1.4863371849060059, "learning_rate": 0.00015421639672961367, "loss": 1.1206, "step": 16510 }, { "epoch": 0.6358036573628489, "grad_norm": 1.2481038570404053, "learning_rate": 0.00015419098473546696, "loss": 1.1101, "step": 16515 }, { "epoch": 0.6359961501443696, "grad_norm": 1.8721559047698975, "learning_rate": 0.00015416556778591363, "loss": 1.1293, "step": 16520 }, { "epoch": 0.6361886429258903, "grad_norm": 1.5730985403060913, "learning_rate": 0.000154140145883278, "loss": 1.277, "step": 16525 }, { "epoch": 0.636381135707411, "grad_norm": 1.4351321458816528, "learning_rate": 0.00015411471902988463, "loss": 1.2475, "step": 16530 }, { "epoch": 0.6365736284889316, "grad_norm": 0.8733989596366882, "learning_rate": 0.00015408928722805874, "loss": 1.0728, "step": 16535 }, { "epoch": 0.6367661212704524, "grad_norm": 1.454068899154663, "learning_rate": 0.00015406385048012577, "loss": 1.0163, "step": 16540 }, { "epoch": 0.6369586140519731, "grad_norm": 0.9600105285644531, "learning_rate": 0.00015403840878841182, "loss": 1.097, "step": 16545 }, { "epoch": 0.6371511068334937, "grad_norm": 2.419609546661377, "learning_rate": 0.00015401296215524345, "loss": 1.2003, "step": 16550 }, { "epoch": 0.6373435996150144, "grad_norm": 1.313755989074707, "learning_rate": 0.0001539875105829474, "loss": 1.1276, "step": 16555 }, { "epoch": 0.6375360923965351, "grad_norm": 1.6932001113891602, "learning_rate": 0.00015396205407385116, "loss": 1.1689, "step": 16560 }, { "epoch": 0.6377285851780559, "grad_norm": 1.076905608177185, "learning_rate": 0.00015393659263028257, "loss": 1.189, "step": 16565 }, { "epoch": 0.6379210779595765, "grad_norm": 1.2433785200119019, "learning_rate": 0.00015391112625456983, "loss": 0.9797, "step": 16570 }, { "epoch": 0.6381135707410972, "grad_norm": 1.1299281120300293, "learning_rate": 0.00015388565494904176, "loss": 1.1399, "step": 16575 }, { "epoch": 0.6383060635226179, "grad_norm": 1.0440160036087036, "learning_rate": 0.0001538601787160275, "loss": 1.2491, "step": 16580 }, { "epoch": 0.6384985563041385, "grad_norm": 1.1874500513076782, "learning_rate": 0.00015383469755785668, "loss": 1.2762, "step": 16585 }, { "epoch": 0.6386910490856593, "grad_norm": 1.2737995386123657, "learning_rate": 0.0001538092114768594, "loss": 1.2102, "step": 16590 }, { "epoch": 0.63888354186718, "grad_norm": 1.8649038076400757, "learning_rate": 0.0001537837204753662, "loss": 1.0711, "step": 16595 }, { "epoch": 0.6390760346487007, "grad_norm": 1.0375845432281494, "learning_rate": 0.000153758224555708, "loss": 1.0349, "step": 16600 }, { "epoch": 0.6392685274302213, "grad_norm": 1.4500465393066406, "learning_rate": 0.0001537327237202163, "loss": 1.1501, "step": 16605 }, { "epoch": 0.6394610202117421, "grad_norm": 1.5905102491378784, "learning_rate": 0.000153707217971223, "loss": 1.1946, "step": 16610 }, { "epoch": 0.6396535129932628, "grad_norm": 1.224752426147461, "learning_rate": 0.00015368170731106036, "loss": 1.1101, "step": 16615 }, { "epoch": 0.6398460057747835, "grad_norm": 2.605717182159424, "learning_rate": 0.00015365619174206117, "loss": 1.0483, "step": 16620 }, { "epoch": 0.6400384985563041, "grad_norm": 1.2829294204711914, "learning_rate": 0.00015363067126655873, "loss": 1.2265, "step": 16625 }, { "epoch": 0.6402309913378248, "grad_norm": 1.1748125553131104, "learning_rate": 0.00015360514588688665, "loss": 1.0909, "step": 16630 }, { "epoch": 0.6404234841193456, "grad_norm": 1.0052121877670288, "learning_rate": 0.00015357961560537908, "loss": 1.3145, "step": 16635 }, { "epoch": 0.6406159769008662, "grad_norm": 1.1692798137664795, "learning_rate": 0.00015355408042437061, "loss": 1.3134, "step": 16640 }, { "epoch": 0.6408084696823869, "grad_norm": 1.1379728317260742, "learning_rate": 0.00015352854034619622, "loss": 1.0519, "step": 16645 }, { "epoch": 0.6410009624639076, "grad_norm": 1.067920207977295, "learning_rate": 0.00015350299537319147, "loss": 1.17, "step": 16650 }, { "epoch": 0.6411934552454283, "grad_norm": 1.9951469898223877, "learning_rate": 0.00015347744550769216, "loss": 1.0478, "step": 16655 }, { "epoch": 0.641385948026949, "grad_norm": 1.036605715751648, "learning_rate": 0.00015345189075203477, "loss": 1.1288, "step": 16660 }, { "epoch": 0.6415784408084697, "grad_norm": 0.5938658714294434, "learning_rate": 0.000153426331108556, "loss": 1.0589, "step": 16665 }, { "epoch": 0.6417709335899904, "grad_norm": 1.212049961090088, "learning_rate": 0.00015340076657959317, "loss": 1.1104, "step": 16670 }, { "epoch": 0.641963426371511, "grad_norm": 1.3548222780227661, "learning_rate": 0.00015337519716748403, "loss": 1.2639, "step": 16675 }, { "epoch": 0.6421559191530317, "grad_norm": 1.210879921913147, "learning_rate": 0.00015334962287456665, "loss": 1.0576, "step": 16680 }, { "epoch": 0.6423484119345525, "grad_norm": 2.2316668033599854, "learning_rate": 0.00015332404370317965, "loss": 1.2075, "step": 16685 }, { "epoch": 0.6425409047160732, "grad_norm": 1.0065557956695557, "learning_rate": 0.00015329845965566215, "loss": 1.0872, "step": 16690 }, { "epoch": 0.6427333974975938, "grad_norm": 1.36894953250885, "learning_rate": 0.00015327287073435355, "loss": 0.9866, "step": 16695 }, { "epoch": 0.6429258902791145, "grad_norm": 0.9726212620735168, "learning_rate": 0.0001532472769415938, "loss": 1.0069, "step": 16700 }, { "epoch": 0.6431183830606352, "grad_norm": 0.9447348117828369, "learning_rate": 0.00015322167827972334, "loss": 1.3184, "step": 16705 }, { "epoch": 0.643310875842156, "grad_norm": 1.7236000299453735, "learning_rate": 0.00015319607475108296, "loss": 1.2547, "step": 16710 }, { "epoch": 0.6435033686236766, "grad_norm": 2.3541550636291504, "learning_rate": 0.00015317046635801392, "loss": 1.2886, "step": 16715 }, { "epoch": 0.6436958614051973, "grad_norm": 1.8849072456359863, "learning_rate": 0.00015314485310285796, "loss": 1.1295, "step": 16720 }, { "epoch": 0.643888354186718, "grad_norm": 4.183611869812012, "learning_rate": 0.00015311923498795724, "loss": 1.1109, "step": 16725 }, { "epoch": 0.6440808469682386, "grad_norm": 1.4037699699401855, "learning_rate": 0.00015309361201565436, "loss": 1.1097, "step": 16730 }, { "epoch": 0.6442733397497594, "grad_norm": 1.626489520072937, "learning_rate": 0.00015306798418829236, "loss": 1.2515, "step": 16735 }, { "epoch": 0.6444658325312801, "grad_norm": 2.0744874477386475, "learning_rate": 0.00015304235150821475, "loss": 1.2196, "step": 16740 }, { "epoch": 0.6446583253128008, "grad_norm": 1.2196972370147705, "learning_rate": 0.0001530167139777655, "loss": 1.0935, "step": 16745 }, { "epoch": 0.6448508180943214, "grad_norm": 2.1287968158721924, "learning_rate": 0.00015299107159928897, "loss": 0.9476, "step": 16750 }, { "epoch": 0.6450433108758421, "grad_norm": 1.6050670146942139, "learning_rate": 0.00015296542437512995, "loss": 1.2276, "step": 16755 }, { "epoch": 0.6452358036573629, "grad_norm": 1.316373348236084, "learning_rate": 0.0001529397723076337, "loss": 1.121, "step": 16760 }, { "epoch": 0.6454282964388836, "grad_norm": 1.4219224452972412, "learning_rate": 0.00015291411539914603, "loss": 1.3219, "step": 16765 }, { "epoch": 0.6456207892204042, "grad_norm": 1.3470525741577148, "learning_rate": 0.00015288845365201299, "loss": 1.0538, "step": 16770 }, { "epoch": 0.6458132820019249, "grad_norm": 1.6893870830535889, "learning_rate": 0.0001528627870685812, "loss": 1.1907, "step": 16775 }, { "epoch": 0.6460057747834457, "grad_norm": 1.7264561653137207, "learning_rate": 0.00015283711565119775, "loss": 1.167, "step": 16780 }, { "epoch": 0.6461982675649663, "grad_norm": 1.1093302965164185, "learning_rate": 0.0001528114394022101, "loss": 1.1477, "step": 16785 }, { "epoch": 0.646390760346487, "grad_norm": 1.1114470958709717, "learning_rate": 0.00015278575832396613, "loss": 1.1224, "step": 16790 }, { "epoch": 0.6465832531280077, "grad_norm": 2.0239744186401367, "learning_rate": 0.00015276007241881424, "loss": 1.1655, "step": 16795 }, { "epoch": 0.6467757459095284, "grad_norm": 1.0726968050003052, "learning_rate": 0.00015273438168910322, "loss": 0.9021, "step": 16800 }, { "epoch": 0.6469682386910491, "grad_norm": 1.2715688943862915, "learning_rate": 0.00015270868613718238, "loss": 1.1776, "step": 16805 }, { "epoch": 0.6471607314725698, "grad_norm": 1.4808478355407715, "learning_rate": 0.00015268298576540129, "loss": 1.1023, "step": 16810 }, { "epoch": 0.6473532242540905, "grad_norm": 1.63973069190979, "learning_rate": 0.0001526572805761102, "loss": 1.0025, "step": 16815 }, { "epoch": 0.6475457170356111, "grad_norm": 1.0935505628585815, "learning_rate": 0.0001526315705716596, "loss": 1.1039, "step": 16820 }, { "epoch": 0.6477382098171318, "grad_norm": 1.0586233139038086, "learning_rate": 0.00015260585575440052, "loss": 1.0884, "step": 16825 }, { "epoch": 0.6479307025986526, "grad_norm": 1.0608752965927124, "learning_rate": 0.0001525801361266844, "loss": 1.2997, "step": 16830 }, { "epoch": 0.6481231953801733, "grad_norm": 1.0017322301864624, "learning_rate": 0.00015255441169086318, "loss": 1.3023, "step": 16835 }, { "epoch": 0.6483156881616939, "grad_norm": 0.9409940242767334, "learning_rate": 0.00015252868244928914, "loss": 1.2462, "step": 16840 }, { "epoch": 0.6485081809432146, "grad_norm": 1.646735429763794, "learning_rate": 0.00015250294840431504, "loss": 1.1759, "step": 16845 }, { "epoch": 0.6487006737247353, "grad_norm": 2.878627300262451, "learning_rate": 0.00015247720955829412, "loss": 1.2458, "step": 16850 }, { "epoch": 0.6488931665062561, "grad_norm": 1.6578867435455322, "learning_rate": 0.00015245146591358002, "loss": 1.297, "step": 16855 }, { "epoch": 0.6490856592877767, "grad_norm": 1.9454634189605713, "learning_rate": 0.00015242571747252682, "loss": 1.2366, "step": 16860 }, { "epoch": 0.6492781520692974, "grad_norm": 1.8211311101913452, "learning_rate": 0.00015239996423748906, "loss": 1.1163, "step": 16865 }, { "epoch": 0.6494706448508181, "grad_norm": 1.5382091999053955, "learning_rate": 0.00015237420621082163, "loss": 1.0103, "step": 16870 }, { "epoch": 0.6496631376323387, "grad_norm": 1.7348453998565674, "learning_rate": 0.00015234844339488004, "loss": 1.1667, "step": 16875 }, { "epoch": 0.6498556304138595, "grad_norm": 1.0255297422409058, "learning_rate": 0.0001523226757920201, "loss": 1.1472, "step": 16880 }, { "epoch": 0.6500481231953802, "grad_norm": 1.730460524559021, "learning_rate": 0.00015229690340459802, "loss": 1.2442, "step": 16885 }, { "epoch": 0.6502406159769009, "grad_norm": 1.6826850175857544, "learning_rate": 0.00015227112623497058, "loss": 1.2426, "step": 16890 }, { "epoch": 0.6504331087584215, "grad_norm": 1.6523195505142212, "learning_rate": 0.00015224534428549488, "loss": 1.1543, "step": 16895 }, { "epoch": 0.6506256015399422, "grad_norm": 2.3335843086242676, "learning_rate": 0.00015221955755852858, "loss": 1.115, "step": 16900 }, { "epoch": 0.650818094321463, "grad_norm": 1.0122956037521362, "learning_rate": 0.00015219376605642962, "loss": 1.2913, "step": 16905 }, { "epoch": 0.6510105871029837, "grad_norm": 1.5100213289260864, "learning_rate": 0.00015216796978155655, "loss": 1.0309, "step": 16910 }, { "epoch": 0.6512030798845043, "grad_norm": 1.1331759691238403, "learning_rate": 0.0001521421687362682, "loss": 1.0732, "step": 16915 }, { "epoch": 0.651395572666025, "grad_norm": 0.9450187087059021, "learning_rate": 0.00015211636292292394, "loss": 1.2011, "step": 16920 }, { "epoch": 0.6515880654475458, "grad_norm": 1.1546697616577148, "learning_rate": 0.00015209055234388354, "loss": 1.1368, "step": 16925 }, { "epoch": 0.6517805582290664, "grad_norm": 1.5972734689712524, "learning_rate": 0.00015206473700150717, "loss": 1.0546, "step": 16930 }, { "epoch": 0.6519730510105871, "grad_norm": 1.1828382015228271, "learning_rate": 0.0001520389168981555, "loss": 1.0311, "step": 16935 }, { "epoch": 0.6521655437921078, "grad_norm": 1.0515602827072144, "learning_rate": 0.00015201309203618962, "loss": 1.3763, "step": 16940 }, { "epoch": 0.6523580365736285, "grad_norm": 1.0648945569992065, "learning_rate": 0.00015198726241797103, "loss": 1.136, "step": 16945 }, { "epoch": 0.6525505293551492, "grad_norm": 1.3983291387557983, "learning_rate": 0.00015196142804586166, "loss": 1.121, "step": 16950 }, { "epoch": 0.6527430221366699, "grad_norm": 1.1980384588241577, "learning_rate": 0.00015193558892222394, "loss": 1.1442, "step": 16955 }, { "epoch": 0.6529355149181906, "grad_norm": 0.92877596616745, "learning_rate": 0.00015190974504942064, "loss": 1.1025, "step": 16960 }, { "epoch": 0.6531280076997112, "grad_norm": 1.3868606090545654, "learning_rate": 0.00015188389642981502, "loss": 1.0714, "step": 16965 }, { "epoch": 0.6533205004812319, "grad_norm": 2.058389663696289, "learning_rate": 0.00015185804306577075, "loss": 1.3543, "step": 16970 }, { "epoch": 0.6535129932627527, "grad_norm": 0.5963343381881714, "learning_rate": 0.00015183218495965202, "loss": 0.9247, "step": 16975 }, { "epoch": 0.6537054860442734, "grad_norm": 1.6353943347930908, "learning_rate": 0.0001518063221138233, "loss": 1.1284, "step": 16980 }, { "epoch": 0.653897978825794, "grad_norm": 2.303635597229004, "learning_rate": 0.00015178045453064962, "loss": 1.3496, "step": 16985 }, { "epoch": 0.6540904716073147, "grad_norm": 0.9238683581352234, "learning_rate": 0.00015175458221249638, "loss": 1.1348, "step": 16990 }, { "epoch": 0.6542829643888354, "grad_norm": 1.4203814268112183, "learning_rate": 0.00015172870516172942, "loss": 1.1032, "step": 16995 }, { "epoch": 0.6544754571703562, "grad_norm": 1.018648386001587, "learning_rate": 0.0001517028233807151, "loss": 1.237, "step": 17000 }, { "epoch": 0.6546679499518768, "grad_norm": 1.4779586791992188, "learning_rate": 0.00015167693687182, "loss": 1.173, "step": 17005 }, { "epoch": 0.6548604427333975, "grad_norm": 1.7097437381744385, "learning_rate": 0.0001516510456374114, "loss": 1.1935, "step": 17010 }, { "epoch": 0.6550529355149182, "grad_norm": 1.4055527448654175, "learning_rate": 0.00015162514967985682, "loss": 1.0832, "step": 17015 }, { "epoch": 0.6552454282964388, "grad_norm": 1.5012494325637817, "learning_rate": 0.00015159924900152432, "loss": 1.3221, "step": 17020 }, { "epoch": 0.6554379210779596, "grad_norm": 1.13307785987854, "learning_rate": 0.00015157334360478228, "loss": 1.2599, "step": 17025 }, { "epoch": 0.6556304138594803, "grad_norm": 2.10911226272583, "learning_rate": 0.0001515474334919996, "loss": 1.1446, "step": 17030 }, { "epoch": 0.655822906641001, "grad_norm": 1.4689563512802124, "learning_rate": 0.00015152151866554563, "loss": 1.3851, "step": 17035 }, { "epoch": 0.6560153994225216, "grad_norm": 1.3363420963287354, "learning_rate": 0.00015149559912779005, "loss": 1.1939, "step": 17040 }, { "epoch": 0.6562078922040423, "grad_norm": 1.665319561958313, "learning_rate": 0.00015146967488110307, "loss": 1.3353, "step": 17045 }, { "epoch": 0.6564003849855631, "grad_norm": 1.03946852684021, "learning_rate": 0.00015144374592785528, "loss": 1.0736, "step": 17050 }, { "epoch": 0.6565928777670837, "grad_norm": 1.941311240196228, "learning_rate": 0.0001514178122704177, "loss": 1.1745, "step": 17055 }, { "epoch": 0.6567853705486044, "grad_norm": 2.091871738433838, "learning_rate": 0.00015139187391116182, "loss": 0.9826, "step": 17060 }, { "epoch": 0.6569778633301251, "grad_norm": 1.3722056150436401, "learning_rate": 0.0001513659308524595, "loss": 1.0969, "step": 17065 }, { "epoch": 0.6571703561116458, "grad_norm": 1.9604045152664185, "learning_rate": 0.00015133998309668306, "loss": 1.0726, "step": 17070 }, { "epoch": 0.6573628488931665, "grad_norm": 1.1731983423233032, "learning_rate": 0.00015131403064620527, "loss": 1.0909, "step": 17075 }, { "epoch": 0.6575553416746872, "grad_norm": 1.3418563604354858, "learning_rate": 0.0001512880735033993, "loss": 1.2574, "step": 17080 }, { "epoch": 0.6577478344562079, "grad_norm": 2.054722785949707, "learning_rate": 0.00015126211167063876, "loss": 1.1705, "step": 17085 }, { "epoch": 0.6579403272377286, "grad_norm": 1.1431398391723633, "learning_rate": 0.00015123614515029772, "loss": 1.2606, "step": 17090 }, { "epoch": 0.6581328200192493, "grad_norm": 1.4750339984893799, "learning_rate": 0.0001512101739447506, "loss": 1.1471, "step": 17095 }, { "epoch": 0.65832531280077, "grad_norm": 1.6877497434616089, "learning_rate": 0.00015118419805637228, "loss": 0.986, "step": 17100 }, { "epoch": 0.6585178055822907, "grad_norm": 0.6538336873054504, "learning_rate": 0.0001511582174875381, "loss": 0.9426, "step": 17105 }, { "epoch": 0.6587102983638113, "grad_norm": 1.1754498481750488, "learning_rate": 0.00015113223224062384, "loss": 1.0994, "step": 17110 }, { "epoch": 0.658902791145332, "grad_norm": 2.219837188720703, "learning_rate": 0.00015110624231800567, "loss": 1.1205, "step": 17115 }, { "epoch": 0.6590952839268528, "grad_norm": 1.826324701309204, "learning_rate": 0.0001510802477220602, "loss": 1.2335, "step": 17120 }, { "epoch": 0.6592877767083735, "grad_norm": 1.8668159246444702, "learning_rate": 0.00015105424845516445, "loss": 1.2609, "step": 17125 }, { "epoch": 0.6594802694898941, "grad_norm": 0.9887051582336426, "learning_rate": 0.00015102824451969585, "loss": 1.0539, "step": 17130 }, { "epoch": 0.6596727622714148, "grad_norm": 1.2473443746566772, "learning_rate": 0.00015100223591803236, "loss": 1.2355, "step": 17135 }, { "epoch": 0.6598652550529355, "grad_norm": 1.2736021280288696, "learning_rate": 0.00015097622265255222, "loss": 1.3073, "step": 17140 }, { "epoch": 0.6600577478344563, "grad_norm": 1.0870583057403564, "learning_rate": 0.00015095020472563424, "loss": 0.8381, "step": 17145 }, { "epoch": 0.6602502406159769, "grad_norm": 1.6099382638931274, "learning_rate": 0.0001509241821396575, "loss": 1.2738, "step": 17150 }, { "epoch": 0.6604427333974976, "grad_norm": 1.3321658372879028, "learning_rate": 0.0001508981548970017, "loss": 1.2924, "step": 17155 }, { "epoch": 0.6606352261790183, "grad_norm": 1.0399209260940552, "learning_rate": 0.00015087212300004678, "loss": 0.9254, "step": 17160 }, { "epoch": 0.6608277189605389, "grad_norm": 0.9332255721092224, "learning_rate": 0.0001508460864511732, "loss": 1.2693, "step": 17165 }, { "epoch": 0.6610202117420597, "grad_norm": 1.408109188079834, "learning_rate": 0.00015082004525276185, "loss": 1.0394, "step": 17170 }, { "epoch": 0.6612127045235804, "grad_norm": 1.3958436250686646, "learning_rate": 0.00015079399940719402, "loss": 1.1119, "step": 17175 }, { "epoch": 0.6614051973051011, "grad_norm": 1.3326903581619263, "learning_rate": 0.00015076794891685143, "loss": 1.0996, "step": 17180 }, { "epoch": 0.6615976900866217, "grad_norm": 1.1485531330108643, "learning_rate": 0.00015074189378411622, "loss": 1.1617, "step": 17185 }, { "epoch": 0.6617901828681424, "grad_norm": 1.9735444784164429, "learning_rate": 0.00015071583401137092, "loss": 1.1168, "step": 17190 }, { "epoch": 0.6619826756496632, "grad_norm": 1.6123241186141968, "learning_rate": 0.00015068976960099862, "loss": 1.1232, "step": 17195 }, { "epoch": 0.6621751684311838, "grad_norm": 1.3553659915924072, "learning_rate": 0.0001506637005553826, "loss": 0.9969, "step": 17200 }, { "epoch": 0.6623676612127045, "grad_norm": 1.3059508800506592, "learning_rate": 0.00015063762687690684, "loss": 1.0852, "step": 17205 }, { "epoch": 0.6625601539942252, "grad_norm": 0.9797844290733337, "learning_rate": 0.00015061154856795553, "loss": 0.8927, "step": 17210 }, { "epoch": 0.6627526467757459, "grad_norm": 1.2405691146850586, "learning_rate": 0.00015058546563091337, "loss": 1.1381, "step": 17215 }, { "epoch": 0.6629451395572666, "grad_norm": 0.7226620316505432, "learning_rate": 0.00015055937806816548, "loss": 0.9773, "step": 17220 }, { "epoch": 0.6631376323387873, "grad_norm": 1.302935004234314, "learning_rate": 0.0001505332858820974, "loss": 1.2386, "step": 17225 }, { "epoch": 0.663330125120308, "grad_norm": 0.8981648683547974, "learning_rate": 0.00015050718907509505, "loss": 1.1499, "step": 17230 }, { "epoch": 0.6635226179018286, "grad_norm": 1.6177557706832886, "learning_rate": 0.00015048108764954487, "loss": 1.0118, "step": 17235 }, { "epoch": 0.6637151106833494, "grad_norm": 1.4030743837356567, "learning_rate": 0.00015045498160783362, "loss": 1.2892, "step": 17240 }, { "epoch": 0.6639076034648701, "grad_norm": 1.3468968868255615, "learning_rate": 0.00015042887095234852, "loss": 1.2397, "step": 17245 }, { "epoch": 0.6641000962463908, "grad_norm": 0.9706347584724426, "learning_rate": 0.00015040275568547728, "loss": 1.0251, "step": 17250 }, { "epoch": 0.6642925890279114, "grad_norm": 1.623147964477539, "learning_rate": 0.00015037663580960787, "loss": 1.1651, "step": 17255 }, { "epoch": 0.6644850818094321, "grad_norm": 0.9518052935600281, "learning_rate": 0.00015035051132712883, "loss": 1.1605, "step": 17260 }, { "epoch": 0.6646775745909529, "grad_norm": 1.36576509475708, "learning_rate": 0.00015032438224042908, "loss": 1.1485, "step": 17265 }, { "epoch": 0.6648700673724736, "grad_norm": 1.4218300580978394, "learning_rate": 0.00015029824855189797, "loss": 1.0527, "step": 17270 }, { "epoch": 0.6650625601539942, "grad_norm": 1.573996663093567, "learning_rate": 0.0001502721102639252, "loss": 1.1692, "step": 17275 }, { "epoch": 0.6652550529355149, "grad_norm": 1.1809152364730835, "learning_rate": 0.00015024596737890097, "loss": 1.0801, "step": 17280 }, { "epoch": 0.6654475457170356, "grad_norm": 1.043346881866455, "learning_rate": 0.00015021981989921587, "loss": 1.181, "step": 17285 }, { "epoch": 0.6656400384985564, "grad_norm": 0.9252155423164368, "learning_rate": 0.00015019366782726093, "loss": 1.0204, "step": 17290 }, { "epoch": 0.665832531280077, "grad_norm": 1.4319888353347778, "learning_rate": 0.00015016751116542757, "loss": 1.2009, "step": 17295 }, { "epoch": 0.6660250240615977, "grad_norm": 0.6749492287635803, "learning_rate": 0.00015014134991610766, "loss": 1.1157, "step": 17300 }, { "epoch": 0.6662175168431184, "grad_norm": 0.9866890907287598, "learning_rate": 0.0001501151840816934, "loss": 1.1943, "step": 17305 }, { "epoch": 0.666410009624639, "grad_norm": 1.4207334518432617, "learning_rate": 0.00015008901366457756, "loss": 1.1103, "step": 17310 }, { "epoch": 0.6666025024061598, "grad_norm": 1.0321522951126099, "learning_rate": 0.00015006283866715326, "loss": 1.079, "step": 17315 }, { "epoch": 0.6667949951876805, "grad_norm": 1.6033141613006592, "learning_rate": 0.000150036659091814, "loss": 0.982, "step": 17320 }, { "epoch": 0.6669874879692012, "grad_norm": 1.503190279006958, "learning_rate": 0.00015001047494095368, "loss": 1.1371, "step": 17325 }, { "epoch": 0.6671799807507218, "grad_norm": 1.2487331628799438, "learning_rate": 0.00014998428621696677, "loss": 1.1328, "step": 17330 }, { "epoch": 0.6673724735322425, "grad_norm": 1.2876261472702026, "learning_rate": 0.00014995809292224797, "loss": 1.2034, "step": 17335 }, { "epoch": 0.6675649663137633, "grad_norm": 1.0377410650253296, "learning_rate": 0.0001499318950591925, "loss": 1.2794, "step": 17340 }, { "epoch": 0.667757459095284, "grad_norm": 2.4566397666931152, "learning_rate": 0.00014990569263019602, "loss": 1.1211, "step": 17345 }, { "epoch": 0.6679499518768046, "grad_norm": 1.3069671392440796, "learning_rate": 0.00014987948563765455, "loss": 1.1101, "step": 17350 }, { "epoch": 0.6681424446583253, "grad_norm": 1.0914125442504883, "learning_rate": 0.0001498532740839645, "loss": 1.0383, "step": 17355 }, { "epoch": 0.668334937439846, "grad_norm": 1.1379315853118896, "learning_rate": 0.00014982705797152285, "loss": 1.0903, "step": 17360 }, { "epoch": 0.6685274302213667, "grad_norm": 0.9188007712364197, "learning_rate": 0.00014980083730272675, "loss": 1.0696, "step": 17365 }, { "epoch": 0.6687199230028874, "grad_norm": 1.2434134483337402, "learning_rate": 0.00014977461207997403, "loss": 1.2438, "step": 17370 }, { "epoch": 0.6689124157844081, "grad_norm": 1.1543229818344116, "learning_rate": 0.00014974838230566274, "loss": 1.12, "step": 17375 }, { "epoch": 0.6691049085659287, "grad_norm": 1.4789245128631592, "learning_rate": 0.00014972214798219144, "loss": 1.0437, "step": 17380 }, { "epoch": 0.6692974013474494, "grad_norm": 1.4191787242889404, "learning_rate": 0.0001496959091119591, "loss": 1.1827, "step": 17385 }, { "epoch": 0.6694898941289702, "grad_norm": 1.749631404876709, "learning_rate": 0.00014966966569736508, "loss": 1.0353, "step": 17390 }, { "epoch": 0.6696823869104909, "grad_norm": 1.4120956659317017, "learning_rate": 0.00014964341774080912, "loss": 1.2257, "step": 17395 }, { "epoch": 0.6698748796920115, "grad_norm": 1.6030794382095337, "learning_rate": 0.00014961716524469152, "loss": 1.0767, "step": 17400 }, { "epoch": 0.6700673724735322, "grad_norm": 1.4263496398925781, "learning_rate": 0.00014959090821141282, "loss": 1.1188, "step": 17405 }, { "epoch": 0.670259865255053, "grad_norm": 1.1514267921447754, "learning_rate": 0.00014956464664337408, "loss": 1.0731, "step": 17410 }, { "epoch": 0.6704523580365737, "grad_norm": 1.5985325574874878, "learning_rate": 0.00014953838054297672, "loss": 1.1342, "step": 17415 }, { "epoch": 0.6706448508180943, "grad_norm": 2.1868584156036377, "learning_rate": 0.00014951210991262262, "loss": 1.1169, "step": 17420 }, { "epoch": 0.670837343599615, "grad_norm": 1.1203131675720215, "learning_rate": 0.0001494858347547141, "loss": 1.051, "step": 17425 }, { "epoch": 0.6710298363811357, "grad_norm": 1.3077278137207031, "learning_rate": 0.00014945955507165377, "loss": 1.19, "step": 17430 }, { "epoch": 0.6712223291626565, "grad_norm": 1.1149485111236572, "learning_rate": 0.00014943327086584476, "loss": 1.3471, "step": 17435 }, { "epoch": 0.6714148219441771, "grad_norm": 1.7210713624954224, "learning_rate": 0.00014940698213969063, "loss": 1.0918, "step": 17440 }, { "epoch": 0.6716073147256978, "grad_norm": 1.265023946762085, "learning_rate": 0.00014938068889559526, "loss": 1.0716, "step": 17445 }, { "epoch": 0.6717998075072185, "grad_norm": 1.37469482421875, "learning_rate": 0.00014935439113596298, "loss": 1.1524, "step": 17450 }, { "epoch": 0.6719923002887391, "grad_norm": 1.189141035079956, "learning_rate": 0.0001493280888631986, "loss": 1.1097, "step": 17455 }, { "epoch": 0.6721847930702599, "grad_norm": 1.5825908184051514, "learning_rate": 0.00014930178207970727, "loss": 1.2842, "step": 17460 }, { "epoch": 0.6723772858517806, "grad_norm": 1.1093425750732422, "learning_rate": 0.00014927547078789452, "loss": 1.0679, "step": 17465 }, { "epoch": 0.6725697786333013, "grad_norm": 1.3306807279586792, "learning_rate": 0.00014924915499016646, "loss": 1.2877, "step": 17470 }, { "epoch": 0.6727622714148219, "grad_norm": 1.9391852617263794, "learning_rate": 0.00014922283468892935, "loss": 1.1743, "step": 17475 }, { "epoch": 0.6729547641963426, "grad_norm": 1.5213755369186401, "learning_rate": 0.0001491965098865901, "loss": 1.1793, "step": 17480 }, { "epoch": 0.6731472569778634, "grad_norm": 1.6637414693832397, "learning_rate": 0.00014917018058555593, "loss": 1.1441, "step": 17485 }, { "epoch": 0.673339749759384, "grad_norm": 1.7859970331192017, "learning_rate": 0.00014914384678823447, "loss": 1.1376, "step": 17490 }, { "epoch": 0.6735322425409047, "grad_norm": 0.9251899719238281, "learning_rate": 0.00014911750849703378, "loss": 1.0523, "step": 17495 }, { "epoch": 0.6737247353224254, "grad_norm": 2.6382827758789062, "learning_rate": 0.00014909116571436228, "loss": 1.311, "step": 17500 }, { "epoch": 0.673917228103946, "grad_norm": 2.1472413539886475, "learning_rate": 0.00014906481844262888, "loss": 1.3515, "step": 17505 }, { "epoch": 0.6741097208854668, "grad_norm": 1.6070085763931274, "learning_rate": 0.0001490384666842429, "loss": 1.121, "step": 17510 }, { "epoch": 0.6743022136669875, "grad_norm": 1.637009620666504, "learning_rate": 0.00014901211044161393, "loss": 1.1249, "step": 17515 }, { "epoch": 0.6744947064485082, "grad_norm": 1.4050389528274536, "learning_rate": 0.00014898574971715218, "loss": 1.1719, "step": 17520 }, { "epoch": 0.6746871992300288, "grad_norm": 1.7863889932632446, "learning_rate": 0.0001489593845132681, "loss": 1.2576, "step": 17525 }, { "epoch": 0.6748796920115495, "grad_norm": 1.149431586265564, "learning_rate": 0.00014893301483237263, "loss": 1.0863, "step": 17530 }, { "epoch": 0.6750721847930703, "grad_norm": 1.4066704511642456, "learning_rate": 0.0001489066406768771, "loss": 1.1338, "step": 17535 }, { "epoch": 0.675264677574591, "grad_norm": 1.2270228862762451, "learning_rate": 0.00014888026204919327, "loss": 1.1118, "step": 17540 }, { "epoch": 0.6754571703561116, "grad_norm": 1.6182643175125122, "learning_rate": 0.0001488538789517333, "loss": 1.3269, "step": 17545 }, { "epoch": 0.6756496631376323, "grad_norm": 2.3642048835754395, "learning_rate": 0.0001488274913869097, "loss": 1.439, "step": 17550 }, { "epoch": 0.6758421559191531, "grad_norm": 1.8097171783447266, "learning_rate": 0.00014880109935713548, "loss": 1.093, "step": 17555 }, { "epoch": 0.6760346487006738, "grad_norm": 0.8650147914886475, "learning_rate": 0.00014877470286482397, "loss": 1.0413, "step": 17560 }, { "epoch": 0.6762271414821944, "grad_norm": 1.2217522859573364, "learning_rate": 0.00014874830191238903, "loss": 1.1818, "step": 17565 }, { "epoch": 0.6764196342637151, "grad_norm": 1.1500258445739746, "learning_rate": 0.00014872189650224477, "loss": 1.0607, "step": 17570 }, { "epoch": 0.6766121270452358, "grad_norm": 1.1867146492004395, "learning_rate": 0.00014869548663680584, "loss": 0.9716, "step": 17575 }, { "epoch": 0.6768046198267565, "grad_norm": 1.0046483278274536, "learning_rate": 0.00014866907231848723, "loss": 1.1875, "step": 17580 }, { "epoch": 0.6769971126082772, "grad_norm": 2.1072323322296143, "learning_rate": 0.00014864265354970436, "loss": 1.194, "step": 17585 }, { "epoch": 0.6771896053897979, "grad_norm": 1.4290494918823242, "learning_rate": 0.00014861623033287307, "loss": 1.2389, "step": 17590 }, { "epoch": 0.6773820981713186, "grad_norm": 0.8890597820281982, "learning_rate": 0.00014858980267040957, "loss": 0.9362, "step": 17595 }, { "epoch": 0.6775745909528392, "grad_norm": 0.9515128135681152, "learning_rate": 0.00014856337056473045, "loss": 1.039, "step": 17600 }, { "epoch": 0.67776708373436, "grad_norm": 1.540008544921875, "learning_rate": 0.00014853693401825283, "loss": 1.1778, "step": 17605 }, { "epoch": 0.6779595765158807, "grad_norm": 1.0766023397445679, "learning_rate": 0.00014851049303339414, "loss": 0.9362, "step": 17610 }, { "epoch": 0.6781520692974013, "grad_norm": 1.854201078414917, "learning_rate": 0.00014848404761257217, "loss": 0.9427, "step": 17615 }, { "epoch": 0.678344562078922, "grad_norm": 2.292722463607788, "learning_rate": 0.00014845759775820527, "loss": 1.0835, "step": 17620 }, { "epoch": 0.6785370548604427, "grad_norm": 1.768997311592102, "learning_rate": 0.00014843114347271204, "loss": 1.0976, "step": 17625 }, { "epoch": 0.6787295476419635, "grad_norm": 2.223881721496582, "learning_rate": 0.00014840468475851154, "loss": 1.1417, "step": 17630 }, { "epoch": 0.6789220404234841, "grad_norm": 1.1589646339416504, "learning_rate": 0.0001483782216180233, "loss": 1.2536, "step": 17635 }, { "epoch": 0.6791145332050048, "grad_norm": 1.6478285789489746, "learning_rate": 0.00014835175405366718, "loss": 1.1534, "step": 17640 }, { "epoch": 0.6793070259865255, "grad_norm": 1.6837091445922852, "learning_rate": 0.00014832528206786344, "loss": 1.3415, "step": 17645 }, { "epoch": 0.6794995187680462, "grad_norm": 1.6697105169296265, "learning_rate": 0.00014829880566303273, "loss": 1.0241, "step": 17650 }, { "epoch": 0.6796920115495669, "grad_norm": 1.08551025390625, "learning_rate": 0.00014827232484159624, "loss": 1.2322, "step": 17655 }, { "epoch": 0.6798845043310876, "grad_norm": 1.9399616718292236, "learning_rate": 0.00014824583960597543, "loss": 1.393, "step": 17660 }, { "epoch": 0.6800769971126083, "grad_norm": 1.0628485679626465, "learning_rate": 0.00014821934995859216, "loss": 1.2078, "step": 17665 }, { "epoch": 0.6802694898941289, "grad_norm": 0.9613144397735596, "learning_rate": 0.00014819285590186875, "loss": 1.1234, "step": 17670 }, { "epoch": 0.6804619826756496, "grad_norm": 0.9686816930770874, "learning_rate": 0.00014816635743822795, "loss": 1.1959, "step": 17675 }, { "epoch": 0.6806544754571704, "grad_norm": 1.4415709972381592, "learning_rate": 0.00014813985457009282, "loss": 1.0775, "step": 17680 }, { "epoch": 0.6808469682386911, "grad_norm": 1.5800002813339233, "learning_rate": 0.00014811334729988688, "loss": 1.0802, "step": 17685 }, { "epoch": 0.6810394610202117, "grad_norm": 1.1061028242111206, "learning_rate": 0.0001480868356300341, "loss": 1.0415, "step": 17690 }, { "epoch": 0.6812319538017324, "grad_norm": 2.3262946605682373, "learning_rate": 0.00014806031956295868, "loss": 1.2431, "step": 17695 }, { "epoch": 0.6814244465832531, "grad_norm": 1.6517562866210938, "learning_rate": 0.00014803379910108543, "loss": 1.1792, "step": 17700 }, { "epoch": 0.6816169393647739, "grad_norm": 1.3823506832122803, "learning_rate": 0.00014800727424683948, "loss": 1.1293, "step": 17705 }, { "epoch": 0.6818094321462945, "grad_norm": 1.5448585748672485, "learning_rate": 0.00014798074500264627, "loss": 1.2126, "step": 17710 }, { "epoch": 0.6820019249278152, "grad_norm": 1.2395973205566406, "learning_rate": 0.0001479542113709318, "loss": 1.3002, "step": 17715 }, { "epoch": 0.6821944177093359, "grad_norm": 1.8366637229919434, "learning_rate": 0.00014792767335412233, "loss": 1.1798, "step": 17720 }, { "epoch": 0.6823869104908566, "grad_norm": 1.3830804824829102, "learning_rate": 0.00014790113095464465, "loss": 1.3001, "step": 17725 }, { "epoch": 0.6825794032723773, "grad_norm": 10.001764297485352, "learning_rate": 0.0001478745841749259, "loss": 1.1643, "step": 17730 }, { "epoch": 0.682771896053898, "grad_norm": 1.0113561153411865, "learning_rate": 0.00014784803301739352, "loss": 1.1725, "step": 17735 }, { "epoch": 0.6829643888354187, "grad_norm": 2.7240827083587646, "learning_rate": 0.00014782147748447554, "loss": 1.2348, "step": 17740 }, { "epoch": 0.6831568816169393, "grad_norm": 1.0802150964736938, "learning_rate": 0.00014779491757860015, "loss": 1.3556, "step": 17745 }, { "epoch": 0.6833493743984601, "grad_norm": 1.6339032649993896, "learning_rate": 0.00014776835330219623, "loss": 0.9967, "step": 17750 }, { "epoch": 0.6835418671799808, "grad_norm": 1.6983892917633057, "learning_rate": 0.0001477417846576928, "loss": 1.041, "step": 17755 }, { "epoch": 0.6837343599615014, "grad_norm": 1.6230486631393433, "learning_rate": 0.00014771521164751942, "loss": 1.2298, "step": 17760 }, { "epoch": 0.6839268527430221, "grad_norm": 1.1079175472259521, "learning_rate": 0.00014768863427410604, "loss": 1.214, "step": 17765 }, { "epoch": 0.6841193455245428, "grad_norm": 1.1601203680038452, "learning_rate": 0.00014766205253988294, "loss": 1.2399, "step": 17770 }, { "epoch": 0.6843118383060636, "grad_norm": 2.2776849269866943, "learning_rate": 0.00014763546644728088, "loss": 1.0071, "step": 17775 }, { "epoch": 0.6845043310875842, "grad_norm": 1.362021565437317, "learning_rate": 0.00014760887599873094, "loss": 1.1233, "step": 17780 }, { "epoch": 0.6846968238691049, "grad_norm": 1.933518409729004, "learning_rate": 0.00014758228119666472, "loss": 1.0854, "step": 17785 }, { "epoch": 0.6848893166506256, "grad_norm": 1.148533582687378, "learning_rate": 0.00014755568204351407, "loss": 1.0694, "step": 17790 }, { "epoch": 0.6850818094321462, "grad_norm": 1.2880831956863403, "learning_rate": 0.0001475290785417113, "loss": 1.0814, "step": 17795 }, { "epoch": 0.685274302213667, "grad_norm": 1.5790437459945679, "learning_rate": 0.0001475024706936892, "loss": 1.0467, "step": 17800 }, { "epoch": 0.6854667949951877, "grad_norm": 1.636828899383545, "learning_rate": 0.0001474758585018808, "loss": 1.3419, "step": 17805 }, { "epoch": 0.6856592877767084, "grad_norm": 1.0403766632080078, "learning_rate": 0.00014744924196871963, "loss": 1.1468, "step": 17810 }, { "epoch": 0.685851780558229, "grad_norm": 1.1266472339630127, "learning_rate": 0.0001474226210966396, "loss": 1.2723, "step": 17815 }, { "epoch": 0.6860442733397497, "grad_norm": 1.352543830871582, "learning_rate": 0.00014739599588807506, "loss": 1.1345, "step": 17820 }, { "epoch": 0.6862367661212705, "grad_norm": 1.674023151397705, "learning_rate": 0.00014736936634546062, "loss": 1.2522, "step": 17825 }, { "epoch": 0.6864292589027912, "grad_norm": 1.3684656620025635, "learning_rate": 0.00014734273247123144, "loss": 1.1169, "step": 17830 }, { "epoch": 0.6866217516843118, "grad_norm": 1.917075514793396, "learning_rate": 0.00014731609426782297, "loss": 1.2523, "step": 17835 }, { "epoch": 0.6868142444658325, "grad_norm": 1.5463966131210327, "learning_rate": 0.00014728945173767116, "loss": 0.9929, "step": 17840 }, { "epoch": 0.6870067372473532, "grad_norm": 1.7427698373794556, "learning_rate": 0.00014726280488321222, "loss": 1.22, "step": 17845 }, { "epoch": 0.687199230028874, "grad_norm": 1.8021422624588013, "learning_rate": 0.0001472361537068829, "loss": 1.0429, "step": 17850 }, { "epoch": 0.6873917228103946, "grad_norm": 1.571053147315979, "learning_rate": 0.0001472094982111202, "loss": 1.17, "step": 17855 }, { "epoch": 0.6875842155919153, "grad_norm": 1.3607596158981323, "learning_rate": 0.00014718283839836166, "loss": 1.0644, "step": 17860 }, { "epoch": 0.687776708373436, "grad_norm": 0.9396845102310181, "learning_rate": 0.00014715617427104504, "loss": 1.0807, "step": 17865 }, { "epoch": 0.6879692011549567, "grad_norm": 1.605432152748108, "learning_rate": 0.00014712950583160872, "loss": 1.0641, "step": 17870 }, { "epoch": 0.6881616939364774, "grad_norm": 1.4847965240478516, "learning_rate": 0.0001471081679769722, "loss": 1.1625, "step": 17875 }, { "epoch": 0.6883541867179981, "grad_norm": 1.930336594581604, "learning_rate": 0.00014708149178186593, "loss": 1.3346, "step": 17880 }, { "epoch": 0.6885466794995188, "grad_norm": 1.7398570775985718, "learning_rate": 0.00014705481128146917, "loss": 1.2316, "step": 17885 }, { "epoch": 0.6887391722810394, "grad_norm": 1.5817015171051025, "learning_rate": 0.00014702812647822162, "loss": 1.0292, "step": 17890 }, { "epoch": 0.6889316650625602, "grad_norm": 3.2520430088043213, "learning_rate": 0.00014700143737456342, "loss": 1.088, "step": 17895 }, { "epoch": 0.6891241578440809, "grad_norm": 2.165456533432007, "learning_rate": 0.00014697474397293517, "loss": 0.9452, "step": 17900 }, { "epoch": 0.6893166506256015, "grad_norm": 0.9637191295623779, "learning_rate": 0.00014694804627577771, "loss": 1.266, "step": 17905 }, { "epoch": 0.6895091434071222, "grad_norm": 1.9606934785842896, "learning_rate": 0.00014692134428553248, "loss": 1.0773, "step": 17910 }, { "epoch": 0.6897016361886429, "grad_norm": 1.1911338567733765, "learning_rate": 0.0001468946380046411, "loss": 1.1359, "step": 17915 }, { "epoch": 0.6898941289701637, "grad_norm": 1.3913235664367676, "learning_rate": 0.00014686792743554575, "loss": 1.3053, "step": 17920 }, { "epoch": 0.6900866217516843, "grad_norm": 1.2314075231552124, "learning_rate": 0.00014684121258068888, "loss": 1.0624, "step": 17925 }, { "epoch": 0.690279114533205, "grad_norm": 2.1499176025390625, "learning_rate": 0.00014681449344251338, "loss": 1.2147, "step": 17930 }, { "epoch": 0.6904716073147257, "grad_norm": 1.6417664289474487, "learning_rate": 0.00014678777002346264, "loss": 1.1139, "step": 17935 }, { "epoch": 0.6906641000962463, "grad_norm": 1.181154727935791, "learning_rate": 0.00014676104232598026, "loss": 1.0503, "step": 17940 }, { "epoch": 0.6908565928777671, "grad_norm": 1.7786331176757812, "learning_rate": 0.00014673431035251027, "loss": 1.05, "step": 17945 }, { "epoch": 0.6910490856592878, "grad_norm": 0.948625922203064, "learning_rate": 0.00014670757410549724, "loss": 1.0888, "step": 17950 }, { "epoch": 0.6912415784408085, "grad_norm": 1.9812164306640625, "learning_rate": 0.00014668083358738597, "loss": 1.1467, "step": 17955 }, { "epoch": 0.6914340712223291, "grad_norm": 0.9091313481330872, "learning_rate": 0.0001466540888006217, "loss": 1.1226, "step": 17960 }, { "epoch": 0.6916265640038498, "grad_norm": 2.100114583969116, "learning_rate": 0.00014662733974765005, "loss": 1.1233, "step": 17965 }, { "epoch": 0.6918190567853706, "grad_norm": 2.0999033451080322, "learning_rate": 0.00014660058643091702, "loss": 1.086, "step": 17970 }, { "epoch": 0.6920115495668913, "grad_norm": 1.543411374092102, "learning_rate": 0.0001465738288528691, "loss": 1.218, "step": 17975 }, { "epoch": 0.6922040423484119, "grad_norm": 2.6429097652435303, "learning_rate": 0.00014654706701595305, "loss": 1.1425, "step": 17980 }, { "epoch": 0.6923965351299326, "grad_norm": 1.258535385131836, "learning_rate": 0.00014652030092261606, "loss": 1.124, "step": 17985 }, { "epoch": 0.6925890279114533, "grad_norm": 0.9203128814697266, "learning_rate": 0.00014649353057530573, "loss": 1.0035, "step": 17990 }, { "epoch": 0.692781520692974, "grad_norm": 1.7482789754867554, "learning_rate": 0.00014646675597647003, "loss": 1.2393, "step": 17995 }, { "epoch": 0.6929740134744947, "grad_norm": 1.3026279211044312, "learning_rate": 0.0001464399771285573, "loss": 1.294, "step": 18000 }, { "epoch": 0.6931665062560154, "grad_norm": 1.5518649816513062, "learning_rate": 0.00014641319403401628, "loss": 1.2397, "step": 18005 }, { "epoch": 0.6933589990375361, "grad_norm": 1.3904852867126465, "learning_rate": 0.00014638640669529615, "loss": 1.079, "step": 18010 }, { "epoch": 0.6935514918190567, "grad_norm": 0.7677931189537048, "learning_rate": 0.0001463596151148464, "loss": 1.1485, "step": 18015 }, { "epoch": 0.6937439846005775, "grad_norm": 1.1935845613479614, "learning_rate": 0.00014633281929511696, "loss": 1.167, "step": 18020 }, { "epoch": 0.6939364773820982, "grad_norm": 1.8612521886825562, "learning_rate": 0.00014630601923855814, "loss": 1.2335, "step": 18025 }, { "epoch": 0.6941289701636189, "grad_norm": 1.9979881048202515, "learning_rate": 0.00014627921494762055, "loss": 1.0421, "step": 18030 }, { "epoch": 0.6943214629451395, "grad_norm": 1.9426400661468506, "learning_rate": 0.00014625240642475538, "loss": 1.0918, "step": 18035 }, { "epoch": 0.6945139557266603, "grad_norm": 0.9990954399108887, "learning_rate": 0.000146225593672414, "loss": 1.3456, "step": 18040 }, { "epoch": 0.694706448508181, "grad_norm": 2.187206745147705, "learning_rate": 0.00014619877669304834, "loss": 1.0926, "step": 18045 }, { "epoch": 0.6948989412897016, "grad_norm": 1.5417639017105103, "learning_rate": 0.00014617195548911053, "loss": 1.2796, "step": 18050 }, { "epoch": 0.6950914340712223, "grad_norm": 1.476150631904602, "learning_rate": 0.0001461451300630533, "loss": 1.1623, "step": 18055 }, { "epoch": 0.695283926852743, "grad_norm": 1.6524615287780762, "learning_rate": 0.0001461183004173296, "loss": 1.0976, "step": 18060 }, { "epoch": 0.6954764196342638, "grad_norm": 1.4800169467926025, "learning_rate": 0.0001460914665543928, "loss": 1.2641, "step": 18065 }, { "epoch": 0.6956689124157844, "grad_norm": 1.2046303749084473, "learning_rate": 0.00014606462847669674, "loss": 1.0037, "step": 18070 }, { "epoch": 0.6958614051973051, "grad_norm": 1.3457711935043335, "learning_rate": 0.00014603778618669556, "loss": 1.1599, "step": 18075 }, { "epoch": 0.6960538979788258, "grad_norm": 1.8690896034240723, "learning_rate": 0.0001460109396868438, "loss": 1.3016, "step": 18080 }, { "epoch": 0.6962463907603464, "grad_norm": 0.8788353204727173, "learning_rate": 0.00014598408897959639, "loss": 1.0261, "step": 18085 }, { "epoch": 0.6964388835418672, "grad_norm": 1.064239501953125, "learning_rate": 0.00014595723406740868, "loss": 1.1159, "step": 18090 }, { "epoch": 0.6966313763233879, "grad_norm": 0.9102209210395813, "learning_rate": 0.00014593037495273635, "loss": 1.1263, "step": 18095 }, { "epoch": 0.6968238691049086, "grad_norm": 1.4841855764389038, "learning_rate": 0.00014590351163803545, "loss": 1.0526, "step": 18100 }, { "epoch": 0.6970163618864292, "grad_norm": 2.282543182373047, "learning_rate": 0.00014587664412576254, "loss": 1.0876, "step": 18105 }, { "epoch": 0.6972088546679499, "grad_norm": 1.149782657623291, "learning_rate": 0.0001458497724183744, "loss": 1.2092, "step": 18110 }, { "epoch": 0.6974013474494707, "grad_norm": 1.6531153917312622, "learning_rate": 0.0001458228965183283, "loss": 1.2421, "step": 18115 }, { "epoch": 0.6975938402309914, "grad_norm": 2.376281976699829, "learning_rate": 0.00014579601642808192, "loss": 1.2179, "step": 18120 }, { "epoch": 0.697786333012512, "grad_norm": 1.9077723026275635, "learning_rate": 0.0001457691321500932, "loss": 1.1962, "step": 18125 }, { "epoch": 0.6979788257940327, "grad_norm": 1.3130842447280884, "learning_rate": 0.00014574224368682048, "loss": 1.3169, "step": 18130 }, { "epoch": 0.6981713185755534, "grad_norm": 1.0211979150772095, "learning_rate": 0.00014571535104072262, "loss": 1.0256, "step": 18135 }, { "epoch": 0.6983638113570741, "grad_norm": 1.7479397058486938, "learning_rate": 0.00014568845421425875, "loss": 1.0906, "step": 18140 }, { "epoch": 0.6985563041385948, "grad_norm": 1.3305407762527466, "learning_rate": 0.00014566155320988838, "loss": 1.206, "step": 18145 }, { "epoch": 0.6987487969201155, "grad_norm": 1.2185992002487183, "learning_rate": 0.00014563464803007145, "loss": 1.2765, "step": 18150 }, { "epoch": 0.6989412897016362, "grad_norm": 1.3256112337112427, "learning_rate": 0.00014560773867726827, "loss": 1.0899, "step": 18155 }, { "epoch": 0.6991337824831568, "grad_norm": 1.9090956449508667, "learning_rate": 0.0001455808251539395, "loss": 1.1944, "step": 18160 }, { "epoch": 0.6993262752646776, "grad_norm": 1.078116774559021, "learning_rate": 0.00014555390746254622, "loss": 1.1393, "step": 18165 }, { "epoch": 0.6995187680461983, "grad_norm": 1.21144437789917, "learning_rate": 0.00014552698560554988, "loss": 1.0835, "step": 18170 }, { "epoch": 0.699711260827719, "grad_norm": 1.4013081789016724, "learning_rate": 0.00014550005958541227, "loss": 1.0785, "step": 18175 }, { "epoch": 0.6999037536092396, "grad_norm": 1.102122187614441, "learning_rate": 0.00014547312940459562, "loss": 1.0839, "step": 18180 }, { "epoch": 0.7000962463907604, "grad_norm": 1.602994680404663, "learning_rate": 0.00014544619506556256, "loss": 1.2608, "step": 18185 }, { "epoch": 0.7002887391722811, "grad_norm": 2.8694801330566406, "learning_rate": 0.000145419256570776, "loss": 1.3161, "step": 18190 }, { "epoch": 0.7004812319538017, "grad_norm": 1.5687551498413086, "learning_rate": 0.00014539231392269927, "loss": 1.0668, "step": 18195 }, { "epoch": 0.7006737247353224, "grad_norm": 1.1013094186782837, "learning_rate": 0.00014536536712379618, "loss": 1.0829, "step": 18200 }, { "epoch": 0.7008662175168431, "grad_norm": 1.4294344186782837, "learning_rate": 0.00014533841617653075, "loss": 1.0003, "step": 18205 }, { "epoch": 0.7010587102983639, "grad_norm": 1.168997049331665, "learning_rate": 0.0001453114610833675, "loss": 1.2252, "step": 18210 }, { "epoch": 0.7012512030798845, "grad_norm": 1.21929132938385, "learning_rate": 0.0001452845018467713, "loss": 1.102, "step": 18215 }, { "epoch": 0.7014436958614052, "grad_norm": 1.0682016611099243, "learning_rate": 0.00014525753846920738, "loss": 1.0219, "step": 18220 }, { "epoch": 0.7016361886429259, "grad_norm": 1.210161566734314, "learning_rate": 0.00014523057095314142, "loss": 1.0666, "step": 18225 }, { "epoch": 0.7018286814244465, "grad_norm": 0.9966996312141418, "learning_rate": 0.0001452035993010393, "loss": 1.1343, "step": 18230 }, { "epoch": 0.7020211742059673, "grad_norm": 1.2477959394454956, "learning_rate": 0.00014517662351536752, "loss": 1.2147, "step": 18235 }, { "epoch": 0.702213666987488, "grad_norm": 1.8020172119140625, "learning_rate": 0.00014514964359859276, "loss": 1.1945, "step": 18240 }, { "epoch": 0.7024061597690087, "grad_norm": 1.0535303354263306, "learning_rate": 0.0001451226595531822, "loss": 1.0792, "step": 18245 }, { "epoch": 0.7025986525505293, "grad_norm": 1.913590431213379, "learning_rate": 0.0001450956713816033, "loss": 1.1344, "step": 18250 }, { "epoch": 0.70279114533205, "grad_norm": 0.998621940612793, "learning_rate": 0.00014506867908632403, "loss": 1.1139, "step": 18255 }, { "epoch": 0.7029836381135708, "grad_norm": 1.8913546800613403, "learning_rate": 0.0001450416826698126, "loss": 1.0621, "step": 18260 }, { "epoch": 0.7031761308950915, "grad_norm": 1.0329716205596924, "learning_rate": 0.00014501468213453763, "loss": 1.2732, "step": 18265 }, { "epoch": 0.7033686236766121, "grad_norm": 0.9243387579917908, "learning_rate": 0.0001449876774829682, "loss": 1.2272, "step": 18270 }, { "epoch": 0.7035611164581328, "grad_norm": 1.6289262771606445, "learning_rate": 0.0001449606687175737, "loss": 1.0912, "step": 18275 }, { "epoch": 0.7037536092396535, "grad_norm": 2.005293607711792, "learning_rate": 0.00014493365584082384, "loss": 1.018, "step": 18280 }, { "epoch": 0.7039461020211742, "grad_norm": 1.2743504047393799, "learning_rate": 0.00014490663885518881, "loss": 1.0026, "step": 18285 }, { "epoch": 0.7041385948026949, "grad_norm": 1.4915635585784912, "learning_rate": 0.00014487961776313922, "loss": 1.0489, "step": 18290 }, { "epoch": 0.7043310875842156, "grad_norm": 0.9605044722557068, "learning_rate": 0.00014485259256714577, "loss": 1.1053, "step": 18295 }, { "epoch": 0.7045235803657363, "grad_norm": 1.8121784925460815, "learning_rate": 0.0001448255632696799, "loss": 1.1862, "step": 18300 }, { "epoch": 0.7047160731472569, "grad_norm": 1.2540571689605713, "learning_rate": 0.00014479852987321322, "loss": 1.1361, "step": 18305 }, { "epoch": 0.7049085659287777, "grad_norm": 1.4160270690917969, "learning_rate": 0.00014477149238021776, "loss": 1.0917, "step": 18310 }, { "epoch": 0.7051010587102984, "grad_norm": 1.4298075437545776, "learning_rate": 0.0001447444507931659, "loss": 1.1407, "step": 18315 }, { "epoch": 0.705293551491819, "grad_norm": 1.0214334726333618, "learning_rate": 0.00014471740511453037, "loss": 1.0714, "step": 18320 }, { "epoch": 0.7054860442733397, "grad_norm": 1.6246428489685059, "learning_rate": 0.00014469035534678444, "loss": 1.258, "step": 18325 }, { "epoch": 0.7056785370548604, "grad_norm": 1.5467473268508911, "learning_rate": 0.0001446633014924015, "loss": 1.1811, "step": 18330 }, { "epoch": 0.7058710298363812, "grad_norm": 2.038041114807129, "learning_rate": 0.00014463624355385557, "loss": 1.1339, "step": 18335 }, { "epoch": 0.7060635226179018, "grad_norm": 1.5328725576400757, "learning_rate": 0.0001446091815336208, "loss": 1.2261, "step": 18340 }, { "epoch": 0.7062560153994225, "grad_norm": 0.9550712704658508, "learning_rate": 0.0001445821154341719, "loss": 1.0973, "step": 18345 }, { "epoch": 0.7064485081809432, "grad_norm": 1.4610974788665771, "learning_rate": 0.0001445550452579839, "loss": 1.2341, "step": 18350 }, { "epoch": 0.706641000962464, "grad_norm": 1.9539941549301147, "learning_rate": 0.00014452797100753212, "loss": 1.1115, "step": 18355 }, { "epoch": 0.7068334937439846, "grad_norm": 1.136670708656311, "learning_rate": 0.0001445008926852924, "loss": 1.1883, "step": 18360 }, { "epoch": 0.7070259865255053, "grad_norm": 1.2136088609695435, "learning_rate": 0.00014447381029374082, "loss": 1.1384, "step": 18365 }, { "epoch": 0.707218479307026, "grad_norm": 1.3836339712142944, "learning_rate": 0.00014444672383535388, "loss": 1.2371, "step": 18370 }, { "epoch": 0.7074109720885466, "grad_norm": 1.6226662397384644, "learning_rate": 0.00014441963331260848, "loss": 1.3057, "step": 18375 }, { "epoch": 0.7076034648700674, "grad_norm": 1.249576449394226, "learning_rate": 0.0001443925387279819, "loss": 1.0849, "step": 18380 }, { "epoch": 0.7077959576515881, "grad_norm": 1.9330114126205444, "learning_rate": 0.0001443654400839517, "loss": 0.9933, "step": 18385 }, { "epoch": 0.7079884504331088, "grad_norm": 1.4878582954406738, "learning_rate": 0.0001443383373829959, "loss": 0.8842, "step": 18390 }, { "epoch": 0.7081809432146294, "grad_norm": 2.3553292751312256, "learning_rate": 0.00014431123062759286, "loss": 1.1733, "step": 18395 }, { "epoch": 0.7083734359961501, "grad_norm": 0.8834003210067749, "learning_rate": 0.00014428411982022135, "loss": 1.1275, "step": 18400 }, { "epoch": 0.7085659287776709, "grad_norm": 1.331040620803833, "learning_rate": 0.00014425700496336038, "loss": 1.0753, "step": 18405 }, { "epoch": 0.7087584215591916, "grad_norm": 1.0972214937210083, "learning_rate": 0.0001442298860594895, "loss": 1.2045, "step": 18410 }, { "epoch": 0.7089509143407122, "grad_norm": 1.5350794792175293, "learning_rate": 0.00014420276311108857, "loss": 1.0097, "step": 18415 }, { "epoch": 0.7091434071222329, "grad_norm": 1.8360435962677002, "learning_rate": 0.00014417563612063777, "loss": 1.177, "step": 18420 }, { "epoch": 0.7093358999037536, "grad_norm": 1.0898863077163696, "learning_rate": 0.00014414850509061764, "loss": 1.0374, "step": 18425 }, { "epoch": 0.7095283926852743, "grad_norm": 1.2654744386672974, "learning_rate": 0.00014412137002350919, "loss": 1.1494, "step": 18430 }, { "epoch": 0.709720885466795, "grad_norm": 1.8603087663650513, "learning_rate": 0.00014409423092179375, "loss": 1.2723, "step": 18435 }, { "epoch": 0.7099133782483157, "grad_norm": 0.9974476099014282, "learning_rate": 0.00014406708778795296, "loss": 1.1139, "step": 18440 }, { "epoch": 0.7101058710298364, "grad_norm": 0.998330295085907, "learning_rate": 0.00014403994062446893, "loss": 1.2881, "step": 18445 }, { "epoch": 0.710298363811357, "grad_norm": 2.04758882522583, "learning_rate": 0.00014401278943382406, "loss": 1.0089, "step": 18450 }, { "epoch": 0.7104908565928778, "grad_norm": 1.301059603691101, "learning_rate": 0.0001439856342185012, "loss": 1.1405, "step": 18455 }, { "epoch": 0.7106833493743985, "grad_norm": 1.684041142463684, "learning_rate": 0.00014395847498098338, "loss": 1.1387, "step": 18460 }, { "epoch": 0.7108758421559191, "grad_norm": 1.95292067527771, "learning_rate": 0.0001439313117237543, "loss": 1.1659, "step": 18465 }, { "epoch": 0.7110683349374398, "grad_norm": 1.1917790174484253, "learning_rate": 0.00014390414444929775, "loss": 1.0497, "step": 18470 }, { "epoch": 0.7112608277189605, "grad_norm": 1.1583658456802368, "learning_rate": 0.000143876973160098, "loss": 1.2276, "step": 18475 }, { "epoch": 0.7114533205004813, "grad_norm": 1.116721749305725, "learning_rate": 0.00014384979785863976, "loss": 1.3688, "step": 18480 }, { "epoch": 0.7116458132820019, "grad_norm": 1.1651076078414917, "learning_rate": 0.00014382261854740795, "loss": 1.3093, "step": 18485 }, { "epoch": 0.7118383060635226, "grad_norm": 1.2162317037582397, "learning_rate": 0.00014379543522888798, "loss": 1.1324, "step": 18490 }, { "epoch": 0.7120307988450433, "grad_norm": 1.5792020559310913, "learning_rate": 0.0001437682479055656, "loss": 1.123, "step": 18495 }, { "epoch": 0.7122232916265641, "grad_norm": 0.9636641144752502, "learning_rate": 0.00014374105657992688, "loss": 1.0547, "step": 18500 }, { "epoch": 0.7124157844080847, "grad_norm": 1.1409319639205933, "learning_rate": 0.00014371386125445828, "loss": 1.1277, "step": 18505 }, { "epoch": 0.7126082771896054, "grad_norm": 1.074267029762268, "learning_rate": 0.00014368666193164664, "loss": 1.1041, "step": 18510 }, { "epoch": 0.7128007699711261, "grad_norm": 1.2324203252792358, "learning_rate": 0.00014365945861397918, "loss": 1.1274, "step": 18515 }, { "epoch": 0.7129932627526467, "grad_norm": 1.2441449165344238, "learning_rate": 0.00014363225130394343, "loss": 1.0739, "step": 18520 }, { "epoch": 0.7131857555341675, "grad_norm": 1.0249239206314087, "learning_rate": 0.00014360504000402737, "loss": 1.1945, "step": 18525 }, { "epoch": 0.7133782483156882, "grad_norm": 1.0297977924346924, "learning_rate": 0.00014357782471671922, "loss": 1.1694, "step": 18530 }, { "epoch": 0.7135707410972089, "grad_norm": 1.6610252857208252, "learning_rate": 0.00014355060544450767, "loss": 1.2034, "step": 18535 }, { "epoch": 0.7137632338787295, "grad_norm": 1.290869951248169, "learning_rate": 0.0001435233821898818, "loss": 1.1195, "step": 18540 }, { "epoch": 0.7139557266602502, "grad_norm": 1.4730745553970337, "learning_rate": 0.0001434961549553309, "loss": 1.1237, "step": 18545 }, { "epoch": 0.714148219441771, "grad_norm": 1.0857551097869873, "learning_rate": 0.00014346892374334479, "loss": 1.013, "step": 18550 }, { "epoch": 0.7143407122232917, "grad_norm": 1.0761737823486328, "learning_rate": 0.00014344168855641356, "loss": 0.9948, "step": 18555 }, { "epoch": 0.7145332050048123, "grad_norm": 2.012099027633667, "learning_rate": 0.00014341444939702767, "loss": 1.1598, "step": 18560 }, { "epoch": 0.714725697786333, "grad_norm": 1.837538242340088, "learning_rate": 0.000143387206267678, "loss": 1.1389, "step": 18565 }, { "epoch": 0.7149181905678537, "grad_norm": 1.1099295616149902, "learning_rate": 0.0001433599591708557, "loss": 1.0835, "step": 18570 }, { "epoch": 0.7151106833493744, "grad_norm": 0.9746969938278198, "learning_rate": 0.00014333270810905238, "loss": 0.973, "step": 18575 }, { "epoch": 0.7153031761308951, "grad_norm": 1.9786537885665894, "learning_rate": 0.00014330545308475996, "loss": 1.1564, "step": 18580 }, { "epoch": 0.7154956689124158, "grad_norm": 1.020973801612854, "learning_rate": 0.0001432781941004707, "loss": 1.0202, "step": 18585 }, { "epoch": 0.7156881616939365, "grad_norm": 1.2314329147338867, "learning_rate": 0.0001432509311586773, "loss": 1.2654, "step": 18590 }, { "epoch": 0.7158806544754571, "grad_norm": 1.1897294521331787, "learning_rate": 0.00014322366426187277, "loss": 1.3241, "step": 18595 }, { "epoch": 0.7160731472569779, "grad_norm": 1.2122468948364258, "learning_rate": 0.00014319639341255048, "loss": 1.0044, "step": 18600 }, { "epoch": 0.7162656400384986, "grad_norm": 1.5471996068954468, "learning_rate": 0.00014316911861320415, "loss": 1.2251, "step": 18605 }, { "epoch": 0.7164581328200192, "grad_norm": 1.4441865682601929, "learning_rate": 0.00014314183986632788, "loss": 1.1717, "step": 18610 }, { "epoch": 0.7166506256015399, "grad_norm": 1.092637300491333, "learning_rate": 0.00014311455717441616, "loss": 1.0724, "step": 18615 }, { "epoch": 0.7168431183830606, "grad_norm": 1.0974675416946411, "learning_rate": 0.00014308727053996377, "loss": 1.0623, "step": 18620 }, { "epoch": 0.7170356111645814, "grad_norm": 1.513769507408142, "learning_rate": 0.00014305997996546594, "loss": 1.1027, "step": 18625 }, { "epoch": 0.717228103946102, "grad_norm": 1.0637279748916626, "learning_rate": 0.00014303268545341817, "loss": 1.0313, "step": 18630 }, { "epoch": 0.7174205967276227, "grad_norm": 1.3569130897521973, "learning_rate": 0.00014300538700631643, "loss": 1.0324, "step": 18635 }, { "epoch": 0.7176130895091434, "grad_norm": 1.0008260011672974, "learning_rate": 0.00014297808462665688, "loss": 1.0383, "step": 18640 }, { "epoch": 0.717805582290664, "grad_norm": 1.291493535041809, "learning_rate": 0.0001429507783169362, "loss": 1.2128, "step": 18645 }, { "epoch": 0.7179980750721848, "grad_norm": 2.5597760677337646, "learning_rate": 0.0001429234680796514, "loss": 1.146, "step": 18650 }, { "epoch": 0.7181905678537055, "grad_norm": 2.4308478832244873, "learning_rate": 0.00014289615391729974, "loss": 1.3797, "step": 18655 }, { "epoch": 0.7183830606352262, "grad_norm": 1.1110010147094727, "learning_rate": 0.00014286883583237896, "loss": 1.2471, "step": 18660 }, { "epoch": 0.7185755534167468, "grad_norm": 1.075013279914856, "learning_rate": 0.00014284151382738718, "loss": 1.0836, "step": 18665 }, { "epoch": 0.7187680461982676, "grad_norm": 1.9422922134399414, "learning_rate": 0.00014281418790482273, "loss": 1.3271, "step": 18670 }, { "epoch": 0.7189605389797883, "grad_norm": 1.58540678024292, "learning_rate": 0.00014278685806718442, "loss": 1.0762, "step": 18675 }, { "epoch": 0.719153031761309, "grad_norm": 1.1696521043777466, "learning_rate": 0.00014275952431697138, "loss": 1.1783, "step": 18680 }, { "epoch": 0.7193455245428296, "grad_norm": 1.6518898010253906, "learning_rate": 0.0001427321866566831, "loss": 0.9509, "step": 18685 }, { "epoch": 0.7195380173243503, "grad_norm": 1.2448405027389526, "learning_rate": 0.0001427048450888194, "loss": 1.1316, "step": 18690 }, { "epoch": 0.7197305101058711, "grad_norm": 0.9715486168861389, "learning_rate": 0.00014267749961588053, "loss": 1.1547, "step": 18695 }, { "epoch": 0.7199230028873917, "grad_norm": 1.176511287689209, "learning_rate": 0.00014265015024036702, "loss": 1.0325, "step": 18700 }, { "epoch": 0.7201154956689124, "grad_norm": 1.096604824066162, "learning_rate": 0.0001426227969647798, "loss": 0.9389, "step": 18705 }, { "epoch": 0.7203079884504331, "grad_norm": 1.2895269393920898, "learning_rate": 0.00014259543979162017, "loss": 1.1157, "step": 18710 }, { "epoch": 0.7205004812319538, "grad_norm": 1.1590831279754639, "learning_rate": 0.00014256807872338974, "loss": 1.0154, "step": 18715 }, { "epoch": 0.7206929740134745, "grad_norm": 1.1659713983535767, "learning_rate": 0.00014254071376259046, "loss": 1.0744, "step": 18720 }, { "epoch": 0.7208854667949952, "grad_norm": 1.3548671007156372, "learning_rate": 0.00014251334491172473, "loss": 0.9823, "step": 18725 }, { "epoch": 0.7210779595765159, "grad_norm": 1.5639405250549316, "learning_rate": 0.00014248597217329526, "loss": 1.1793, "step": 18730 }, { "epoch": 0.7212704523580366, "grad_norm": 1.9836759567260742, "learning_rate": 0.00014245859554980504, "loss": 1.1827, "step": 18735 }, { "epoch": 0.7214629451395572, "grad_norm": 1.2241086959838867, "learning_rate": 0.00014243121504375753, "loss": 1.2403, "step": 18740 }, { "epoch": 0.721655437921078, "grad_norm": 1.1298317909240723, "learning_rate": 0.0001424038306576565, "loss": 1.1577, "step": 18745 }, { "epoch": 0.7218479307025987, "grad_norm": 1.325210452079773, "learning_rate": 0.00014237644239400605, "loss": 1.1232, "step": 18750 }, { "epoch": 0.7220404234841193, "grad_norm": 1.613929033279419, "learning_rate": 0.00014234905025531066, "loss": 0.9627, "step": 18755 }, { "epoch": 0.72223291626564, "grad_norm": 1.6307876110076904, "learning_rate": 0.00014232165424407517, "loss": 1.0229, "step": 18760 }, { "epoch": 0.7224254090471607, "grad_norm": 0.8971173167228699, "learning_rate": 0.00014229425436280475, "loss": 1.1371, "step": 18765 }, { "epoch": 0.7226179018286815, "grad_norm": 1.3740814924240112, "learning_rate": 0.00014226685061400496, "loss": 1.1431, "step": 18770 }, { "epoch": 0.7228103946102021, "grad_norm": 1.7433820962905884, "learning_rate": 0.00014223944300018163, "loss": 1.1876, "step": 18775 }, { "epoch": 0.7230028873917228, "grad_norm": 1.1470065116882324, "learning_rate": 0.0001422120315238411, "loss": 1.1971, "step": 18780 }, { "epoch": 0.7231953801732435, "grad_norm": 2.0566489696502686, "learning_rate": 0.00014218461618748987, "loss": 1.0274, "step": 18785 }, { "epoch": 0.7233878729547641, "grad_norm": 1.589087724685669, "learning_rate": 0.00014215719699363496, "loss": 1.067, "step": 18790 }, { "epoch": 0.7235803657362849, "grad_norm": 2.473461866378784, "learning_rate": 0.00014212977394478365, "loss": 1.2185, "step": 18795 }, { "epoch": 0.7237728585178056, "grad_norm": 1.3214609622955322, "learning_rate": 0.00014210234704344359, "loss": 1.2501, "step": 18800 }, { "epoch": 0.7239653512993263, "grad_norm": 2.281226873397827, "learning_rate": 0.0001420749162921228, "loss": 1.2262, "step": 18805 }, { "epoch": 0.7241578440808469, "grad_norm": 1.188148021697998, "learning_rate": 0.0001420474816933296, "loss": 1.3338, "step": 18810 }, { "epoch": 0.7243503368623677, "grad_norm": 2.0242867469787598, "learning_rate": 0.00014202004324957279, "loss": 1.1157, "step": 18815 }, { "epoch": 0.7245428296438884, "grad_norm": 1.2399152517318726, "learning_rate": 0.00014199260096336134, "loss": 1.119, "step": 18820 }, { "epoch": 0.7247353224254091, "grad_norm": 1.7323557138442993, "learning_rate": 0.00014196515483720477, "loss": 1.1672, "step": 18825 }, { "epoch": 0.7249278152069297, "grad_norm": 1.4888850450515747, "learning_rate": 0.00014193770487361273, "loss": 0.9814, "step": 18830 }, { "epoch": 0.7251203079884504, "grad_norm": 1.5241479873657227, "learning_rate": 0.0001419102510750954, "loss": 1.0614, "step": 18835 }, { "epoch": 0.7253128007699712, "grad_norm": 1.2932441234588623, "learning_rate": 0.00014188279344416323, "loss": 1.1905, "step": 18840 }, { "epoch": 0.7255052935514918, "grad_norm": 1.4357131719589233, "learning_rate": 0.0001418553319833271, "loss": 1.1303, "step": 18845 }, { "epoch": 0.7256977863330125, "grad_norm": 2.1818439960479736, "learning_rate": 0.00014182786669509806, "loss": 1.2141, "step": 18850 }, { "epoch": 0.7258902791145332, "grad_norm": 2.111520528793335, "learning_rate": 0.00014180039758198774, "loss": 1.165, "step": 18855 }, { "epoch": 0.7260827718960539, "grad_norm": 1.3923039436340332, "learning_rate": 0.00014177292464650796, "loss": 1.3364, "step": 18860 }, { "epoch": 0.7262752646775746, "grad_norm": 1.905661702156067, "learning_rate": 0.0001417454478911709, "loss": 1.1535, "step": 18865 }, { "epoch": 0.7264677574590953, "grad_norm": 1.1814746856689453, "learning_rate": 0.0001417179673184892, "loss": 1.2141, "step": 18870 }, { "epoch": 0.726660250240616, "grad_norm": 1.4515434503555298, "learning_rate": 0.00014169048293097576, "loss": 1.0955, "step": 18875 }, { "epoch": 0.7268527430221366, "grad_norm": 1.2174112796783447, "learning_rate": 0.0001416629947311438, "loss": 1.0399, "step": 18880 }, { "epoch": 0.7270452358036573, "grad_norm": 1.3769662380218506, "learning_rate": 0.00014163550272150698, "loss": 1.2164, "step": 18885 }, { "epoch": 0.7272377285851781, "grad_norm": 1.3401464223861694, "learning_rate": 0.00014160800690457927, "loss": 1.1039, "step": 18890 }, { "epoch": 0.7274302213666988, "grad_norm": 1.1210380792617798, "learning_rate": 0.0001415805072828749, "loss": 1.0771, "step": 18895 }, { "epoch": 0.7276227141482194, "grad_norm": 1.3425636291503906, "learning_rate": 0.00014155300385890863, "loss": 1.1506, "step": 18900 }, { "epoch": 0.7278152069297401, "grad_norm": 1.155220866203308, "learning_rate": 0.0001415254966351954, "loss": 1.1321, "step": 18905 }, { "epoch": 0.7280076997112608, "grad_norm": 1.440024733543396, "learning_rate": 0.0001414979856142506, "loss": 1.2324, "step": 18910 }, { "epoch": 0.7282001924927816, "grad_norm": 1.6521823406219482, "learning_rate": 0.0001414704707985899, "loss": 1.196, "step": 18915 }, { "epoch": 0.7283926852743022, "grad_norm": 3.4958372116088867, "learning_rate": 0.00014144295219072937, "loss": 0.9906, "step": 18920 }, { "epoch": 0.7285851780558229, "grad_norm": 0.9254593849182129, "learning_rate": 0.00014141542979318538, "loss": 1.2552, "step": 18925 }, { "epoch": 0.7287776708373436, "grad_norm": 1.519364833831787, "learning_rate": 0.00014138790360847473, "loss": 1.0491, "step": 18930 }, { "epoch": 0.7289701636188642, "grad_norm": 1.199167013168335, "learning_rate": 0.0001413603736391144, "loss": 0.9939, "step": 18935 }, { "epoch": 0.729162656400385, "grad_norm": 1.0213391780853271, "learning_rate": 0.00014133283988762192, "loss": 1.222, "step": 18940 }, { "epoch": 0.7293551491819057, "grad_norm": 1.27894127368927, "learning_rate": 0.00014130530235651506, "loss": 1.2881, "step": 18945 }, { "epoch": 0.7295476419634264, "grad_norm": 1.1660070419311523, "learning_rate": 0.0001412777610483119, "loss": 1.1839, "step": 18950 }, { "epoch": 0.729740134744947, "grad_norm": 0.9614414572715759, "learning_rate": 0.00014125021596553093, "loss": 1.0397, "step": 18955 }, { "epoch": 0.7299326275264677, "grad_norm": 1.5278538465499878, "learning_rate": 0.00014122266711069095, "loss": 1.2835, "step": 18960 }, { "epoch": 0.7301251203079885, "grad_norm": 1.2992238998413086, "learning_rate": 0.00014119511448631118, "loss": 1.2873, "step": 18965 }, { "epoch": 0.7303176130895092, "grad_norm": 1.0794028043746948, "learning_rate": 0.00014116755809491104, "loss": 1.1677, "step": 18970 }, { "epoch": 0.7305101058710298, "grad_norm": 1.672555685043335, "learning_rate": 0.00014113999793901046, "loss": 0.9295, "step": 18975 }, { "epoch": 0.7307025986525505, "grad_norm": 1.630053997039795, "learning_rate": 0.00014111243402112957, "loss": 1.1635, "step": 18980 }, { "epoch": 0.7308950914340713, "grad_norm": 1.3171367645263672, "learning_rate": 0.00014108486634378895, "loss": 1.065, "step": 18985 }, { "epoch": 0.731087584215592, "grad_norm": 1.1997402906417847, "learning_rate": 0.00014105729490950948, "loss": 1.0747, "step": 18990 }, { "epoch": 0.7312800769971126, "grad_norm": 1.6320029497146606, "learning_rate": 0.00014102971972081233, "loss": 1.2414, "step": 18995 }, { "epoch": 0.7314725697786333, "grad_norm": 1.3852897882461548, "learning_rate": 0.00014100214078021915, "loss": 1.0307, "step": 19000 }, { "epoch": 0.731665062560154, "grad_norm": 1.29547119140625, "learning_rate": 0.00014097455809025178, "loss": 1.1411, "step": 19005 }, { "epoch": 0.7318575553416747, "grad_norm": 1.0764034986495972, "learning_rate": 0.00014094697165343252, "loss": 1.1789, "step": 19010 }, { "epoch": 0.7320500481231954, "grad_norm": 1.7445317506790161, "learning_rate": 0.00014091938147228395, "loss": 1.2379, "step": 19015 }, { "epoch": 0.7322425409047161, "grad_norm": 1.844789743423462, "learning_rate": 0.00014089178754932898, "loss": 1.0126, "step": 19020 }, { "epoch": 0.7324350336862367, "grad_norm": 1.370970368385315, "learning_rate": 0.00014086418988709095, "loss": 1.3182, "step": 19025 }, { "epoch": 0.7326275264677574, "grad_norm": 1.2565025091171265, "learning_rate": 0.00014083658848809347, "loss": 1.1753, "step": 19030 }, { "epoch": 0.7328200192492782, "grad_norm": 1.7159111499786377, "learning_rate": 0.00014080898335486046, "loss": 1.0572, "step": 19035 }, { "epoch": 0.7330125120307989, "grad_norm": 1.8323345184326172, "learning_rate": 0.0001407813744899163, "loss": 1.0822, "step": 19040 }, { "epoch": 0.7332050048123195, "grad_norm": 1.6878646612167358, "learning_rate": 0.00014075376189578553, "loss": 1.1133, "step": 19045 }, { "epoch": 0.7333974975938402, "grad_norm": 1.7448841333389282, "learning_rate": 0.00014072614557499323, "loss": 1.0922, "step": 19050 }, { "epoch": 0.7335899903753609, "grad_norm": 0.9125509262084961, "learning_rate": 0.00014069852553006472, "loss": 1.1788, "step": 19055 }, { "epoch": 0.7337824831568817, "grad_norm": 1.8741627931594849, "learning_rate": 0.00014067090176352563, "loss": 1.0538, "step": 19060 }, { "epoch": 0.7339749759384023, "grad_norm": 3.1138720512390137, "learning_rate": 0.00014064327427790201, "loss": 1.256, "step": 19065 }, { "epoch": 0.734167468719923, "grad_norm": 1.3083161115646362, "learning_rate": 0.00014061564307572022, "loss": 0.976, "step": 19070 }, { "epoch": 0.7343599615014437, "grad_norm": 1.176721215248108, "learning_rate": 0.00014058800815950687, "loss": 1.1733, "step": 19075 }, { "epoch": 0.7345524542829643, "grad_norm": 0.9016759395599365, "learning_rate": 0.00014056036953178906, "loss": 1.0671, "step": 19080 }, { "epoch": 0.7347449470644851, "grad_norm": 1.4011337757110596, "learning_rate": 0.00014053272719509417, "loss": 1.1453, "step": 19085 }, { "epoch": 0.7349374398460058, "grad_norm": 1.2671010494232178, "learning_rate": 0.00014050508115194988, "loss": 1.1453, "step": 19090 }, { "epoch": 0.7351299326275265, "grad_norm": 1.3316471576690674, "learning_rate": 0.00014047743140488422, "loss": 0.9451, "step": 19095 }, { "epoch": 0.7353224254090471, "grad_norm": 1.963815689086914, "learning_rate": 0.0001404497779564256, "loss": 0.993, "step": 19100 }, { "epoch": 0.7355149181905678, "grad_norm": 1.4354350566864014, "learning_rate": 0.00014042212080910276, "loss": 1.1263, "step": 19105 }, { "epoch": 0.7357074109720886, "grad_norm": 1.6670982837677002, "learning_rate": 0.00014039445996544473, "loss": 1.0964, "step": 19110 }, { "epoch": 0.7358999037536093, "grad_norm": 0.9805311560630798, "learning_rate": 0.00014036679542798092, "loss": 1.056, "step": 19115 }, { "epoch": 0.7360923965351299, "grad_norm": 1.4659690856933594, "learning_rate": 0.0001403391271992411, "loss": 1.0984, "step": 19120 }, { "epoch": 0.7362848893166506, "grad_norm": 0.5292593240737915, "learning_rate": 0.00014031145528175525, "loss": 1.0774, "step": 19125 }, { "epoch": 0.7364773820981714, "grad_norm": 1.9471726417541504, "learning_rate": 0.00014028377967805392, "loss": 1.1648, "step": 19130 }, { "epoch": 0.736669874879692, "grad_norm": 1.2082020044326782, "learning_rate": 0.0001402561003906678, "loss": 1.0764, "step": 19135 }, { "epoch": 0.7368623676612127, "grad_norm": 1.558237075805664, "learning_rate": 0.00014022841742212792, "loss": 1.3944, "step": 19140 }, { "epoch": 0.7370548604427334, "grad_norm": 1.7463306188583374, "learning_rate": 0.0001402007307749658, "loss": 1.0599, "step": 19145 }, { "epoch": 0.737247353224254, "grad_norm": 1.2820191383361816, "learning_rate": 0.00014017304045171316, "loss": 1.042, "step": 19150 }, { "epoch": 0.7374398460057748, "grad_norm": 1.617754340171814, "learning_rate": 0.00014014534645490206, "loss": 1.1031, "step": 19155 }, { "epoch": 0.7376323387872955, "grad_norm": 1.0561091899871826, "learning_rate": 0.00014011764878706497, "loss": 1.1711, "step": 19160 }, { "epoch": 0.7378248315688162, "grad_norm": 1.0614964962005615, "learning_rate": 0.00014008994745073468, "loss": 1.0783, "step": 19165 }, { "epoch": 0.7380173243503368, "grad_norm": 1.5456453561782837, "learning_rate": 0.0001400622424484442, "loss": 1.0303, "step": 19170 }, { "epoch": 0.7382098171318575, "grad_norm": 1.4854921102523804, "learning_rate": 0.00014003453378272712, "loss": 1.0719, "step": 19175 }, { "epoch": 0.7384023099133783, "grad_norm": 1.4764469861984253, "learning_rate": 0.00014000682145611708, "loss": 1.2755, "step": 19180 }, { "epoch": 0.738594802694899, "grad_norm": 1.6524717807769775, "learning_rate": 0.00013997910547114826, "loss": 1.1086, "step": 19185 }, { "epoch": 0.7387872954764196, "grad_norm": 1.264930248260498, "learning_rate": 0.00013995138583035508, "loss": 1.1087, "step": 19190 }, { "epoch": 0.7389797882579403, "grad_norm": 1.8001179695129395, "learning_rate": 0.0001399236625362723, "loss": 1.1736, "step": 19195 }, { "epoch": 0.739172281039461, "grad_norm": 1.0975139141082764, "learning_rate": 0.00013989593559143507, "loss": 1.1669, "step": 19200 }, { "epoch": 0.7393647738209818, "grad_norm": 1.078940987586975, "learning_rate": 0.0001398682049983788, "loss": 1.1259, "step": 19205 }, { "epoch": 0.7395572666025024, "grad_norm": 1.0370323657989502, "learning_rate": 0.0001398404707596393, "loss": 1.2454, "step": 19210 }, { "epoch": 0.7397497593840231, "grad_norm": 1.8001567125320435, "learning_rate": 0.00013981273287775266, "loss": 1.2803, "step": 19215 }, { "epoch": 0.7399422521655438, "grad_norm": 1.00836181640625, "learning_rate": 0.00013978499135525535, "loss": 1.2406, "step": 19220 }, { "epoch": 0.7401347449470644, "grad_norm": 1.169600009918213, "learning_rate": 0.00013975724619468414, "loss": 1.2738, "step": 19225 }, { "epoch": 0.7403272377285852, "grad_norm": 1.6229758262634277, "learning_rate": 0.00013972949739857613, "loss": 1.1428, "step": 19230 }, { "epoch": 0.7405197305101059, "grad_norm": 1.573930263519287, "learning_rate": 0.00013970174496946873, "loss": 1.1467, "step": 19235 }, { "epoch": 0.7407122232916266, "grad_norm": 1.4224984645843506, "learning_rate": 0.00013967398890989979, "loss": 1.1335, "step": 19240 }, { "epoch": 0.7409047160731472, "grad_norm": 1.5381492376327515, "learning_rate": 0.00013964622922240736, "loss": 1.1332, "step": 19245 }, { "epoch": 0.7410972088546679, "grad_norm": 1.7980502843856812, "learning_rate": 0.0001396184659095299, "loss": 1.2107, "step": 19250 }, { "epoch": 0.7412897016361887, "grad_norm": 0.8735668063163757, "learning_rate": 0.00013959069897380617, "loss": 1.0948, "step": 19255 }, { "epoch": 0.7414821944177094, "grad_norm": 1.1920636892318726, "learning_rate": 0.0001395629284177753, "loss": 1.1663, "step": 19260 }, { "epoch": 0.74167468719923, "grad_norm": 1.3055362701416016, "learning_rate": 0.0001395351542439767, "loss": 1.1108, "step": 19265 }, { "epoch": 0.7418671799807507, "grad_norm": 1.8382583856582642, "learning_rate": 0.00013950737645495014, "loss": 1.0279, "step": 19270 }, { "epoch": 0.7420596727622714, "grad_norm": 0.865042507648468, "learning_rate": 0.00013947959505323577, "loss": 1.1005, "step": 19275 }, { "epoch": 0.7422521655437921, "grad_norm": 1.182671070098877, "learning_rate": 0.0001394518100413739, "loss": 1.199, "step": 19280 }, { "epoch": 0.7424446583253128, "grad_norm": 1.5641695261001587, "learning_rate": 0.00013942402142190532, "loss": 1.098, "step": 19285 }, { "epoch": 0.7426371511068335, "grad_norm": 0.924503743648529, "learning_rate": 0.0001393962291973712, "loss": 1.2765, "step": 19290 }, { "epoch": 0.7428296438883542, "grad_norm": 1.1100239753723145, "learning_rate": 0.00013936843337031287, "loss": 1.2573, "step": 19295 }, { "epoch": 0.7430221366698749, "grad_norm": 1.2185837030410767, "learning_rate": 0.0001393406339432721, "loss": 1.2064, "step": 19300 }, { "epoch": 0.7432146294513956, "grad_norm": 1.5463718175888062, "learning_rate": 0.000139312830918791, "loss": 1.1559, "step": 19305 }, { "epoch": 0.7434071222329163, "grad_norm": 1.8315119743347168, "learning_rate": 0.00013928502429941188, "loss": 1.2292, "step": 19310 }, { "epoch": 0.7435996150144369, "grad_norm": 1.303144097328186, "learning_rate": 0.00013925721408767757, "loss": 1.1463, "step": 19315 }, { "epoch": 0.7437921077959576, "grad_norm": 1.2040412425994873, "learning_rate": 0.00013922940028613106, "loss": 0.9717, "step": 19320 }, { "epoch": 0.7439846005774784, "grad_norm": 1.0547009706497192, "learning_rate": 0.0001392015828973158, "loss": 1.0389, "step": 19325 }, { "epoch": 0.7441770933589991, "grad_norm": 1.597541093826294, "learning_rate": 0.00013917376192377543, "loss": 1.0937, "step": 19330 }, { "epoch": 0.7443695861405197, "grad_norm": 0.9714812636375427, "learning_rate": 0.00013914593736805402, "loss": 1.1641, "step": 19335 }, { "epoch": 0.7445620789220404, "grad_norm": 1.2114696502685547, "learning_rate": 0.00013911810923269603, "loss": 1.2167, "step": 19340 }, { "epoch": 0.7447545717035611, "grad_norm": 1.330718755722046, "learning_rate": 0.000139090277520246, "loss": 1.0583, "step": 19345 }, { "epoch": 0.7449470644850819, "grad_norm": 2.2889277935028076, "learning_rate": 0.0001390624422332491, "loss": 0.9747, "step": 19350 }, { "epoch": 0.7451395572666025, "grad_norm": 1.2337167263031006, "learning_rate": 0.0001390346033742506, "loss": 1.0294, "step": 19355 }, { "epoch": 0.7453320500481232, "grad_norm": 1.0727423429489136, "learning_rate": 0.0001390067609457962, "loss": 1.0961, "step": 19360 }, { "epoch": 0.7455245428296439, "grad_norm": 1.7654608488082886, "learning_rate": 0.0001389789149504319, "loss": 1.0775, "step": 19365 }, { "epoch": 0.7457170356111645, "grad_norm": 1.2609182596206665, "learning_rate": 0.0001389510653907041, "loss": 1.0706, "step": 19370 }, { "epoch": 0.7459095283926853, "grad_norm": 1.906533122062683, "learning_rate": 0.00013892321226915933, "loss": 1.1035, "step": 19375 }, { "epoch": 0.746102021174206, "grad_norm": 1.1768391132354736, "learning_rate": 0.00013889535558834462, "loss": 1.097, "step": 19380 }, { "epoch": 0.7462945139557267, "grad_norm": 1.7764432430267334, "learning_rate": 0.00013886749535080737, "loss": 1.2136, "step": 19385 }, { "epoch": 0.7464870067372473, "grad_norm": 2.2302021980285645, "learning_rate": 0.0001388396315590951, "loss": 1.1236, "step": 19390 }, { "epoch": 0.746679499518768, "grad_norm": 1.0161263942718506, "learning_rate": 0.00013881176421575583, "loss": 1.3818, "step": 19395 }, { "epoch": 0.7468719923002888, "grad_norm": 1.002767562866211, "learning_rate": 0.00013878389332333784, "loss": 1.1468, "step": 19400 }, { "epoch": 0.7470644850818094, "grad_norm": 1.637412667274475, "learning_rate": 0.00013875601888438968, "loss": 1.2074, "step": 19405 }, { "epoch": 0.7472569778633301, "grad_norm": 1.440507173538208, "learning_rate": 0.00013872814090146036, "loss": 0.9903, "step": 19410 }, { "epoch": 0.7474494706448508, "grad_norm": 1.8063361644744873, "learning_rate": 0.00013870025937709913, "loss": 1.1508, "step": 19415 }, { "epoch": 0.7476419634263715, "grad_norm": 1.0809664726257324, "learning_rate": 0.0001386723743138555, "loss": 1.1509, "step": 19420 }, { "epoch": 0.7478344562078922, "grad_norm": 0.7736053466796875, "learning_rate": 0.00013864448571427945, "loss": 1.048, "step": 19425 }, { "epoch": 0.7480269489894129, "grad_norm": 1.588610291481018, "learning_rate": 0.00013861659358092117, "loss": 1.1118, "step": 19430 }, { "epoch": 0.7482194417709336, "grad_norm": 1.5141923427581787, "learning_rate": 0.00013858869791633124, "loss": 1.1847, "step": 19435 }, { "epoch": 0.7484119345524542, "grad_norm": 1.6033471822738647, "learning_rate": 0.00013856079872306046, "loss": 1.1109, "step": 19440 }, { "epoch": 0.748604427333975, "grad_norm": 1.1898064613342285, "learning_rate": 0.0001385328960036601, "loss": 1.1758, "step": 19445 }, { "epoch": 0.7487969201154957, "grad_norm": 1.8286123275756836, "learning_rate": 0.00013850498976068166, "loss": 1.1827, "step": 19450 }, { "epoch": 0.7489894128970164, "grad_norm": 1.6806395053863525, "learning_rate": 0.00013847707999667698, "loss": 0.9877, "step": 19455 }, { "epoch": 0.749181905678537, "grad_norm": 2.1087961196899414, "learning_rate": 0.00013844916671419823, "loss": 1.2052, "step": 19460 }, { "epoch": 0.7493743984600577, "grad_norm": 1.4643951654434204, "learning_rate": 0.0001384212499157979, "loss": 0.9652, "step": 19465 }, { "epoch": 0.7495668912415785, "grad_norm": 1.4248661994934082, "learning_rate": 0.00013839332960402872, "loss": 1.1903, "step": 19470 }, { "epoch": 0.7497593840230992, "grad_norm": 1.8101911544799805, "learning_rate": 0.0001383709908267036, "loss": 1.0335, "step": 19475 }, { "epoch": 0.7499518768046198, "grad_norm": 1.9175690412521362, "learning_rate": 0.00013834306419730473, "loss": 1.2055, "step": 19480 }, { "epoch": 0.7501443695861405, "grad_norm": 0.930147647857666, "learning_rate": 0.00013831513406168663, "loss": 1.2383, "step": 19485 }, { "epoch": 0.7503368623676612, "grad_norm": 2.496994733810425, "learning_rate": 0.00013828720042240338, "loss": 1.2861, "step": 19490 }, { "epoch": 0.750529355149182, "grad_norm": 1.9224547147750854, "learning_rate": 0.00013825926328200926, "loss": 1.247, "step": 19495 }, { "epoch": 0.7507218479307026, "grad_norm": 1.7266567945480347, "learning_rate": 0.00013823132264305894, "loss": 1.1127, "step": 19500 }, { "epoch": 0.7509143407122233, "grad_norm": 1.2304484844207764, "learning_rate": 0.00013820337850810744, "loss": 1.1432, "step": 19505 }, { "epoch": 0.751106833493744, "grad_norm": 2.311600685119629, "learning_rate": 0.00013817543087971004, "loss": 1.2405, "step": 19510 }, { "epoch": 0.7512993262752646, "grad_norm": 2.358445644378662, "learning_rate": 0.0001381474797604224, "loss": 1.2407, "step": 19515 }, { "epoch": 0.7514918190567854, "grad_norm": 1.2297358512878418, "learning_rate": 0.00013811952515280042, "loss": 0.9701, "step": 19520 }, { "epoch": 0.7516843118383061, "grad_norm": 1.260389804840088, "learning_rate": 0.00013809156705940037, "loss": 1.1711, "step": 19525 }, { "epoch": 0.7518768046198268, "grad_norm": 0.9936877489089966, "learning_rate": 0.00013806360548277886, "loss": 0.9045, "step": 19530 }, { "epoch": 0.7520692974013474, "grad_norm": 1.951540470123291, "learning_rate": 0.0001380356404254928, "loss": 1.0988, "step": 19535 }, { "epoch": 0.7522617901828681, "grad_norm": 1.0897135734558105, "learning_rate": 0.00013800767189009935, "loss": 1.0216, "step": 19540 }, { "epoch": 0.7524542829643889, "grad_norm": 1.3618708848953247, "learning_rate": 0.00013797969987915608, "loss": 1.0604, "step": 19545 }, { "epoch": 0.7526467757459095, "grad_norm": 1.413282871246338, "learning_rate": 0.00013795172439522087, "loss": 1.2045, "step": 19550 }, { "epoch": 0.7528392685274302, "grad_norm": 1.4086360931396484, "learning_rate": 0.00013792374544085187, "loss": 1.0724, "step": 19555 }, { "epoch": 0.7530317613089509, "grad_norm": 1.2165982723236084, "learning_rate": 0.00013789576301860757, "loss": 1.1886, "step": 19560 }, { "epoch": 0.7532242540904716, "grad_norm": 1.4711132049560547, "learning_rate": 0.00013786777713104678, "loss": 1.1847, "step": 19565 }, { "epoch": 0.7534167468719923, "grad_norm": 1.0978587865829468, "learning_rate": 0.00013783978778072862, "loss": 1.1521, "step": 19570 }, { "epoch": 0.753609239653513, "grad_norm": 1.1508560180664062, "learning_rate": 0.00013781179497021251, "loss": 1.089, "step": 19575 }, { "epoch": 0.7538017324350337, "grad_norm": 1.3086730241775513, "learning_rate": 0.00013778379870205829, "loss": 1.2293, "step": 19580 }, { "epoch": 0.7539942252165543, "grad_norm": 1.63782799243927, "learning_rate": 0.0001377557989788259, "loss": 1.0373, "step": 19585 }, { "epoch": 0.754186717998075, "grad_norm": 1.4707633256912231, "learning_rate": 0.00013772779580307584, "loss": 1.0481, "step": 19590 }, { "epoch": 0.7543792107795958, "grad_norm": 1.6030997037887573, "learning_rate": 0.0001376997891773688, "loss": 1.0953, "step": 19595 }, { "epoch": 0.7545717035611165, "grad_norm": 1.0709367990493774, "learning_rate": 0.00013767177910426574, "loss": 1.1094, "step": 19600 }, { "epoch": 0.7547641963426371, "grad_norm": 1.2302757501602173, "learning_rate": 0.00013764376558632807, "loss": 0.9815, "step": 19605 }, { "epoch": 0.7549566891241578, "grad_norm": 2.4043121337890625, "learning_rate": 0.00013761574862611737, "loss": 1.1146, "step": 19610 }, { "epoch": 0.7551491819056786, "grad_norm": 1.2333440780639648, "learning_rate": 0.00013758772822619565, "loss": 1.367, "step": 19615 }, { "epoch": 0.7553416746871993, "grad_norm": 2.032453775405884, "learning_rate": 0.0001375597043891252, "loss": 1.1401, "step": 19620 }, { "epoch": 0.7555341674687199, "grad_norm": 1.1483811140060425, "learning_rate": 0.00013753167711746858, "loss": 1.0757, "step": 19625 }, { "epoch": 0.7557266602502406, "grad_norm": 2.314659833908081, "learning_rate": 0.0001375036464137887, "loss": 1.162, "step": 19630 }, { "epoch": 0.7559191530317613, "grad_norm": 1.460924744606018, "learning_rate": 0.0001374756122806488, "loss": 1.1596, "step": 19635 }, { "epoch": 0.756111645813282, "grad_norm": 1.628796935081482, "learning_rate": 0.0001374475747206124, "loss": 1.2437, "step": 19640 }, { "epoch": 0.7563041385948027, "grad_norm": 0.9428819417953491, "learning_rate": 0.0001374195337362434, "loss": 1.1804, "step": 19645 }, { "epoch": 0.7564966313763234, "grad_norm": 1.1497089862823486, "learning_rate": 0.00013739148933010587, "loss": 1.0776, "step": 19650 }, { "epoch": 0.7566891241578441, "grad_norm": 1.2695974111557007, "learning_rate": 0.00013736344150476435, "loss": 1.2446, "step": 19655 }, { "epoch": 0.7568816169393647, "grad_norm": 1.4802236557006836, "learning_rate": 0.00013733539026278364, "loss": 1.066, "step": 19660 }, { "epoch": 0.7570741097208855, "grad_norm": 1.7089695930480957, "learning_rate": 0.0001373073356067288, "loss": 1.0265, "step": 19665 }, { "epoch": 0.7572666025024062, "grad_norm": 1.4578391313552856, "learning_rate": 0.00013727927753916523, "loss": 1.0214, "step": 19670 }, { "epoch": 0.7574590952839269, "grad_norm": 0.7848085761070251, "learning_rate": 0.00013725121606265872, "loss": 1.0254, "step": 19675 }, { "epoch": 0.7576515880654475, "grad_norm": 1.5217918157577515, "learning_rate": 0.00013722315117977525, "loss": 1.2226, "step": 19680 }, { "epoch": 0.7578440808469682, "grad_norm": 1.0195049047470093, "learning_rate": 0.00013719508289308118, "loss": 1.0981, "step": 19685 }, { "epoch": 0.758036573628489, "grad_norm": 0.8907167315483093, "learning_rate": 0.00013716701120514323, "loss": 1.0012, "step": 19690 }, { "epoch": 0.7582290664100096, "grad_norm": 1.6701477766036987, "learning_rate": 0.00013713893611852824, "loss": 1.1048, "step": 19695 }, { "epoch": 0.7584215591915303, "grad_norm": 1.4811270236968994, "learning_rate": 0.0001371108576358036, "loss": 1.2534, "step": 19700 }, { "epoch": 0.758614051973051, "grad_norm": 2.0855724811553955, "learning_rate": 0.00013708277575953686, "loss": 0.968, "step": 19705 }, { "epoch": 0.7588065447545717, "grad_norm": 1.4841949939727783, "learning_rate": 0.00013705469049229594, "loss": 1.1039, "step": 19710 }, { "epoch": 0.7589990375360924, "grad_norm": 1.2720580101013184, "learning_rate": 0.000137026601836649, "loss": 0.9966, "step": 19715 }, { "epoch": 0.7591915303176131, "grad_norm": 2.275491714477539, "learning_rate": 0.00013699850979516465, "loss": 1.1684, "step": 19720 }, { "epoch": 0.7593840230991338, "grad_norm": 1.2187795639038086, "learning_rate": 0.00013697041437041167, "loss": 0.9793, "step": 19725 }, { "epoch": 0.7595765158806544, "grad_norm": 1.1858078241348267, "learning_rate": 0.00013694231556495915, "loss": 1.1833, "step": 19730 }, { "epoch": 0.7597690086621751, "grad_norm": 1.2739187479019165, "learning_rate": 0.00013691421338137664, "loss": 1.1139, "step": 19735 }, { "epoch": 0.7599615014436959, "grad_norm": 1.7635918855667114, "learning_rate": 0.00013688610782223382, "loss": 1.0935, "step": 19740 }, { "epoch": 0.7601539942252166, "grad_norm": 1.7312551736831665, "learning_rate": 0.00013685799889010084, "loss": 1.036, "step": 19745 }, { "epoch": 0.7603464870067372, "grad_norm": 1.222069501876831, "learning_rate": 0.00013682988658754797, "loss": 1.1653, "step": 19750 }, { "epoch": 0.7605389797882579, "grad_norm": 2.664635181427002, "learning_rate": 0.00013680177091714596, "loss": 1.281, "step": 19755 }, { "epoch": 0.7607314725697787, "grad_norm": 1.2842050790786743, "learning_rate": 0.00013677365188146577, "loss": 1.1799, "step": 19760 }, { "epoch": 0.7609239653512994, "grad_norm": 1.45145583152771, "learning_rate": 0.00013674552948307874, "loss": 1.1625, "step": 19765 }, { "epoch": 0.76111645813282, "grad_norm": 1.8897767066955566, "learning_rate": 0.00013671740372455648, "loss": 1.1714, "step": 19770 }, { "epoch": 0.7613089509143407, "grad_norm": 1.171235203742981, "learning_rate": 0.00013668927460847084, "loss": 1.2752, "step": 19775 }, { "epoch": 0.7615014436958614, "grad_norm": 0.9240397810935974, "learning_rate": 0.00013666114213739408, "loss": 1.0669, "step": 19780 }, { "epoch": 0.7616939364773821, "grad_norm": 1.654099941253662, "learning_rate": 0.0001366330063138988, "loss": 1.1941, "step": 19785 }, { "epoch": 0.7618864292589028, "grad_norm": 1.2961543798446655, "learning_rate": 0.00013660486714055768, "loss": 1.2552, "step": 19790 }, { "epoch": 0.7620789220404235, "grad_norm": 1.2810674905776978, "learning_rate": 0.00013657672461994398, "loss": 1.1035, "step": 19795 }, { "epoch": 0.7622714148219442, "grad_norm": 3.574240207672119, "learning_rate": 0.00013654857875463111, "loss": 1.1724, "step": 19800 }, { "epoch": 0.7624639076034648, "grad_norm": 1.0426640510559082, "learning_rate": 0.00013652042954719282, "loss": 1.351, "step": 19805 }, { "epoch": 0.7626564003849856, "grad_norm": 0.9059193730354309, "learning_rate": 0.00013649227700020318, "loss": 1.1989, "step": 19810 }, { "epoch": 0.7628488931665063, "grad_norm": 2.0250661373138428, "learning_rate": 0.00013646412111623657, "loss": 1.5794, "step": 19815 }, { "epoch": 0.763041385948027, "grad_norm": 1.030274748802185, "learning_rate": 0.00013643596189786758, "loss": 0.965, "step": 19820 }, { "epoch": 0.7632338787295476, "grad_norm": 1.976044774055481, "learning_rate": 0.0001364077993476713, "loss": 1.1147, "step": 19825 }, { "epoch": 0.7634263715110683, "grad_norm": 1.6923823356628418, "learning_rate": 0.00013637963346822292, "loss": 1.1618, "step": 19830 }, { "epoch": 0.7636188642925891, "grad_norm": 1.3266521692276, "learning_rate": 0.00013635146426209805, "loss": 1.1782, "step": 19835 }, { "epoch": 0.7638113570741097, "grad_norm": 1.6700036525726318, "learning_rate": 0.00013632329173187256, "loss": 0.9154, "step": 19840 }, { "epoch": 0.7640038498556304, "grad_norm": 1.5041186809539795, "learning_rate": 0.00013629511588012273, "loss": 1.1082, "step": 19845 }, { "epoch": 0.7641963426371511, "grad_norm": 1.4730234146118164, "learning_rate": 0.0001362669367094249, "loss": 1.128, "step": 19850 }, { "epoch": 0.7643888354186718, "grad_norm": 1.415727972984314, "learning_rate": 0.00013623875422235602, "loss": 1.1844, "step": 19855 }, { "epoch": 0.7645813282001925, "grad_norm": 1.9785696268081665, "learning_rate": 0.00013621056842149306, "loss": 1.1183, "step": 19860 }, { "epoch": 0.7647738209817132, "grad_norm": 1.0615553855895996, "learning_rate": 0.00013618237930941357, "loss": 1.1257, "step": 19865 }, { "epoch": 0.7649663137632339, "grad_norm": 1.749930500984192, "learning_rate": 0.00013615418688869512, "loss": 0.9933, "step": 19870 }, { "epoch": 0.7651588065447545, "grad_norm": 1.5585590600967407, "learning_rate": 0.0001361259911619158, "loss": 1.1877, "step": 19875 }, { "epoch": 0.7653512993262752, "grad_norm": 1.4237456321716309, "learning_rate": 0.00013609779213165393, "loss": 1.0918, "step": 19880 }, { "epoch": 0.765543792107796, "grad_norm": 1.2364110946655273, "learning_rate": 0.00013606958980048805, "loss": 1.0557, "step": 19885 }, { "epoch": 0.7657362848893167, "grad_norm": 1.0982424020767212, "learning_rate": 0.00013604138417099712, "loss": 1.1845, "step": 19890 }, { "epoch": 0.7659287776708373, "grad_norm": 0.8089034557342529, "learning_rate": 0.00013601317524576038, "loss": 1.139, "step": 19895 }, { "epoch": 0.766121270452358, "grad_norm": 1.0913941860198975, "learning_rate": 0.0001359849630273573, "loss": 1.2198, "step": 19900 }, { "epoch": 0.7663137632338787, "grad_norm": 1.3398661613464355, "learning_rate": 0.00013595674751836777, "loss": 0.9453, "step": 19905 }, { "epoch": 0.7665062560153995, "grad_norm": 2.1962482929229736, "learning_rate": 0.00013592852872137186, "loss": 1.3174, "step": 19910 }, { "epoch": 0.7666987487969201, "grad_norm": 1.4308804273605347, "learning_rate": 0.00013590030663895001, "loss": 1.1015, "step": 19915 }, { "epoch": 0.7668912415784408, "grad_norm": 0.915403425693512, "learning_rate": 0.00013587208127368292, "loss": 1.0555, "step": 19920 }, { "epoch": 0.7670837343599615, "grad_norm": 1.0108091831207275, "learning_rate": 0.00013584385262815164, "loss": 1.1591, "step": 19925 }, { "epoch": 0.7672762271414822, "grad_norm": 1.7001339197158813, "learning_rate": 0.00013581562070493747, "loss": 1.1671, "step": 19930 }, { "epoch": 0.7674687199230029, "grad_norm": 0.7533661723136902, "learning_rate": 0.00013578738550662207, "loss": 0.9644, "step": 19935 }, { "epoch": 0.7676612127045236, "grad_norm": 1.1101553440093994, "learning_rate": 0.0001357591470357873, "loss": 1.0671, "step": 19940 }, { "epoch": 0.7678537054860443, "grad_norm": 2.112529993057251, "learning_rate": 0.00013573090529501544, "loss": 1.1979, "step": 19945 }, { "epoch": 0.7680461982675649, "grad_norm": 1.2636981010437012, "learning_rate": 0.000135702660286889, "loss": 1.1494, "step": 19950 }, { "epoch": 0.7682386910490857, "grad_norm": 1.5712652206420898, "learning_rate": 0.00013567441201399073, "loss": 1.1171, "step": 19955 }, { "epoch": 0.7684311838306064, "grad_norm": 1.0548748970031738, "learning_rate": 0.00013564616047890383, "loss": 0.9806, "step": 19960 }, { "epoch": 0.768623676612127, "grad_norm": 1.828020453453064, "learning_rate": 0.00013561790568421172, "loss": 1.2924, "step": 19965 }, { "epoch": 0.7688161693936477, "grad_norm": 1.037383794784546, "learning_rate": 0.00013558964763249804, "loss": 1.0602, "step": 19970 }, { "epoch": 0.7690086621751684, "grad_norm": 1.3003454208374023, "learning_rate": 0.00013556138632634686, "loss": 1.0049, "step": 19975 }, { "epoch": 0.7692011549566892, "grad_norm": 1.0770816802978516, "learning_rate": 0.00013553312176834247, "loss": 1.2497, "step": 19980 }, { "epoch": 0.7693936477382098, "grad_norm": 1.5512239933013916, "learning_rate": 0.00013550485396106947, "loss": 1.053, "step": 19985 }, { "epoch": 0.7695861405197305, "grad_norm": 1.0486184358596802, "learning_rate": 0.0001354765829071128, "loss": 1.2014, "step": 19990 }, { "epoch": 0.7697786333012512, "grad_norm": 1.2066504955291748, "learning_rate": 0.00013544830860905762, "loss": 1.1933, "step": 19995 }, { "epoch": 0.7699711260827719, "grad_norm": 1.090703010559082, "learning_rate": 0.00013542003106948943, "loss": 1.1504, "step": 20000 }, { "epoch": 0.7701636188642926, "grad_norm": 1.804743766784668, "learning_rate": 0.00013539175029099405, "loss": 1.1263, "step": 20005 }, { "epoch": 0.7703561116458133, "grad_norm": 1.006145715713501, "learning_rate": 0.00013536346627615755, "loss": 1.2072, "step": 20010 }, { "epoch": 0.770548604427334, "grad_norm": 1.6577104330062866, "learning_rate": 0.0001353351790275663, "loss": 0.9571, "step": 20015 }, { "epoch": 0.7707410972088546, "grad_norm": 1.1842409372329712, "learning_rate": 0.00013530688854780705, "loss": 1.0826, "step": 20020 }, { "epoch": 0.7709335899903753, "grad_norm": 2.0414535999298096, "learning_rate": 0.00013527859483946668, "loss": 0.9676, "step": 20025 }, { "epoch": 0.7711260827718961, "grad_norm": 1.6052886247634888, "learning_rate": 0.00013525029790513254, "loss": 1.211, "step": 20030 }, { "epoch": 0.7713185755534168, "grad_norm": 1.241951823234558, "learning_rate": 0.00013522199774739218, "loss": 1.0928, "step": 20035 }, { "epoch": 0.7715110683349374, "grad_norm": 1.220537543296814, "learning_rate": 0.0001351936943688334, "loss": 1.0877, "step": 20040 }, { "epoch": 0.7717035611164581, "grad_norm": 1.6344817876815796, "learning_rate": 0.00013516538777204445, "loss": 0.9949, "step": 20045 }, { "epoch": 0.7718960538979788, "grad_norm": 1.798012137413025, "learning_rate": 0.00013513707795961374, "loss": 1.1155, "step": 20050 }, { "epoch": 0.7720885466794996, "grad_norm": 1.1736531257629395, "learning_rate": 0.00013510876493413, "loss": 1.1333, "step": 20055 }, { "epoch": 0.7722810394610202, "grad_norm": 1.3480515480041504, "learning_rate": 0.00013508044869818225, "loss": 1.316, "step": 20060 }, { "epoch": 0.7724735322425409, "grad_norm": 1.5204951763153076, "learning_rate": 0.00013505212925435994, "loss": 1.157, "step": 20065 }, { "epoch": 0.7726660250240616, "grad_norm": 1.116921067237854, "learning_rate": 0.00013502380660525255, "loss": 0.9601, "step": 20070 }, { "epoch": 0.7728585178055823, "grad_norm": 2.0013625621795654, "learning_rate": 0.00013499548075345002, "loss": 0.9889, "step": 20075 }, { "epoch": 0.773051010587103, "grad_norm": 2.779212474822998, "learning_rate": 0.00013496715170154266, "loss": 1.1228, "step": 20080 }, { "epoch": 0.7732435033686237, "grad_norm": 1.983289361000061, "learning_rate": 0.00013493881945212088, "loss": 1.1228, "step": 20085 }, { "epoch": 0.7734359961501444, "grad_norm": 1.5862370729446411, "learning_rate": 0.0001349104840077755, "loss": 1.2704, "step": 20090 }, { "epoch": 0.773628488931665, "grad_norm": 1.318428874015808, "learning_rate": 0.00013488214537109766, "loss": 1.0183, "step": 20095 }, { "epoch": 0.7738209817131858, "grad_norm": 1.2089256048202515, "learning_rate": 0.00013485380354467868, "loss": 1.2204, "step": 20100 }, { "epoch": 0.7740134744947065, "grad_norm": 1.587889552116394, "learning_rate": 0.00013482545853111023, "loss": 1.1076, "step": 20105 }, { "epoch": 0.7742059672762271, "grad_norm": 1.168034553527832, "learning_rate": 0.00013479711033298432, "loss": 1.1339, "step": 20110 }, { "epoch": 0.7743984600577478, "grad_norm": 1.2924379110336304, "learning_rate": 0.0001347687589528932, "loss": 1.1975, "step": 20115 }, { "epoch": 0.7745909528392685, "grad_norm": 1.0745187997817993, "learning_rate": 0.00013474040439342933, "loss": 1.185, "step": 20120 }, { "epoch": 0.7747834456207893, "grad_norm": 1.319265365600586, "learning_rate": 0.00013471204665718568, "loss": 0.9424, "step": 20125 }, { "epoch": 0.7749759384023099, "grad_norm": 1.1227604150772095, "learning_rate": 0.00013468368574675528, "loss": 1.1125, "step": 20130 }, { "epoch": 0.7751684311838306, "grad_norm": 0.9614366888999939, "learning_rate": 0.00013465532166473157, "loss": 1.2149, "step": 20135 }, { "epoch": 0.7753609239653513, "grad_norm": 1.4985253810882568, "learning_rate": 0.00013462695441370827, "loss": 1.0558, "step": 20140 }, { "epoch": 0.775553416746872, "grad_norm": 2.495373487472534, "learning_rate": 0.00013459858399627938, "loss": 1.1317, "step": 20145 }, { "epoch": 0.7757459095283927, "grad_norm": 1.7242252826690674, "learning_rate": 0.00013457021041503916, "loss": 1.1204, "step": 20150 }, { "epoch": 0.7759384023099134, "grad_norm": 1.0122432708740234, "learning_rate": 0.00013454183367258223, "loss": 1.0719, "step": 20155 }, { "epoch": 0.7761308950914341, "grad_norm": 1.057358741760254, "learning_rate": 0.00013451345377150342, "loss": 1.0651, "step": 20160 }, { "epoch": 0.7763233878729547, "grad_norm": 0.9141941666603088, "learning_rate": 0.00013448507071439788, "loss": 1.0958, "step": 20165 }, { "epoch": 0.7765158806544754, "grad_norm": 1.0692884922027588, "learning_rate": 0.00013445668450386106, "loss": 0.9905, "step": 20170 }, { "epoch": 0.7767083734359962, "grad_norm": 1.3968685865402222, "learning_rate": 0.00013442829514248873, "loss": 1.0801, "step": 20175 }, { "epoch": 0.7769008662175169, "grad_norm": 1.427119255065918, "learning_rate": 0.00013439990263287686, "loss": 1.2859, "step": 20180 }, { "epoch": 0.7770933589990375, "grad_norm": 1.9501906633377075, "learning_rate": 0.00013437150697762177, "loss": 1.1413, "step": 20185 }, { "epoch": 0.7772858517805582, "grad_norm": 0.9638822674751282, "learning_rate": 0.00013434310817932006, "loss": 1.0041, "step": 20190 }, { "epoch": 0.7774783445620789, "grad_norm": 1.9460463523864746, "learning_rate": 0.00013431470624056862, "loss": 1.1464, "step": 20195 }, { "epoch": 0.7776708373435997, "grad_norm": 1.4077537059783936, "learning_rate": 0.00013428630116396457, "loss": 1.086, "step": 20200 }, { "epoch": 0.7778633301251203, "grad_norm": 1.545357346534729, "learning_rate": 0.00013425789295210545, "loss": 1.0626, "step": 20205 }, { "epoch": 0.778055822906641, "grad_norm": 1.826687216758728, "learning_rate": 0.00013422948160758894, "loss": 1.1287, "step": 20210 }, { "epoch": 0.7782483156881617, "grad_norm": 1.4467684030532837, "learning_rate": 0.00013420106713301307, "loss": 1.0774, "step": 20215 }, { "epoch": 0.7784408084696823, "grad_norm": 1.0557719469070435, "learning_rate": 0.00013417264953097618, "loss": 0.968, "step": 20220 }, { "epoch": 0.7786333012512031, "grad_norm": 1.5922484397888184, "learning_rate": 0.00013414422880407686, "loss": 1.1911, "step": 20225 }, { "epoch": 0.7788257940327238, "grad_norm": 1.2360528707504272, "learning_rate": 0.000134115804954914, "loss": 1.123, "step": 20230 }, { "epoch": 0.7790182868142445, "grad_norm": 1.3299078941345215, "learning_rate": 0.0001340873779860868, "loss": 1.1586, "step": 20235 }, { "epoch": 0.7792107795957651, "grad_norm": 1.2143315076828003, "learning_rate": 0.00013405894790019467, "loss": 1.0069, "step": 20240 }, { "epoch": 0.7794032723772859, "grad_norm": 0.9542683959007263, "learning_rate": 0.00013403051469983737, "loss": 1.2013, "step": 20245 }, { "epoch": 0.7795957651588066, "grad_norm": 1.3608813285827637, "learning_rate": 0.00013400207838761496, "loss": 1.1684, "step": 20250 }, { "epoch": 0.7797882579403272, "grad_norm": 0.4999949038028717, "learning_rate": 0.0001339736389661277, "loss": 1.1161, "step": 20255 }, { "epoch": 0.7799807507218479, "grad_norm": 1.8794244527816772, "learning_rate": 0.00013394519643797622, "loss": 0.8838, "step": 20260 }, { "epoch": 0.7801732435033686, "grad_norm": 0.8917430639266968, "learning_rate": 0.00013391675080576138, "loss": 0.9322, "step": 20265 }, { "epoch": 0.7803657362848894, "grad_norm": 0.8630626797676086, "learning_rate": 0.0001338883020720844, "loss": 1.0805, "step": 20270 }, { "epoch": 0.78055822906641, "grad_norm": 1.7655322551727295, "learning_rate": 0.00013385985023954664, "loss": 1.0011, "step": 20275 }, { "epoch": 0.7807507218479307, "grad_norm": 2.459364652633667, "learning_rate": 0.00013383139531074987, "loss": 1.2634, "step": 20280 }, { "epoch": 0.7809432146294514, "grad_norm": 0.9355961084365845, "learning_rate": 0.00013380293728829613, "loss": 1.1082, "step": 20285 }, { "epoch": 0.781135707410972, "grad_norm": 1.9479714632034302, "learning_rate": 0.00013377447617478772, "loss": 1.2329, "step": 20290 }, { "epoch": 0.7813282001924928, "grad_norm": 1.408583164215088, "learning_rate": 0.00013374601197282715, "loss": 1.0494, "step": 20295 }, { "epoch": 0.7815206929740135, "grad_norm": 1.0006494522094727, "learning_rate": 0.00013371754468501735, "loss": 1.0935, "step": 20300 }, { "epoch": 0.7817131857555342, "grad_norm": 1.8096888065338135, "learning_rate": 0.00013368907431396147, "loss": 1.14, "step": 20305 }, { "epoch": 0.7819056785370548, "grad_norm": 1.373366117477417, "learning_rate": 0.00013366060086226288, "loss": 1.0523, "step": 20310 }, { "epoch": 0.7820981713185755, "grad_norm": 1.7965810298919678, "learning_rate": 0.00013363212433252538, "loss": 1.1531, "step": 20315 }, { "epoch": 0.7822906641000963, "grad_norm": 1.3974156379699707, "learning_rate": 0.00013360364472735285, "loss": 1.0295, "step": 20320 }, { "epoch": 0.782483156881617, "grad_norm": 2.2229206562042236, "learning_rate": 0.00013357516204934961, "loss": 1.2168, "step": 20325 }, { "epoch": 0.7826756496631376, "grad_norm": 2.140164852142334, "learning_rate": 0.00013354667630112026, "loss": 1.1943, "step": 20330 }, { "epoch": 0.7828681424446583, "grad_norm": 1.12675940990448, "learning_rate": 0.00013351818748526953, "loss": 1.1229, "step": 20335 }, { "epoch": 0.783060635226179, "grad_norm": 0.9767451286315918, "learning_rate": 0.00013348969560440262, "loss": 1.0961, "step": 20340 }, { "epoch": 0.7832531280076998, "grad_norm": 2.367725372314453, "learning_rate": 0.00013346120066112492, "loss": 0.9969, "step": 20345 }, { "epoch": 0.7834456207892204, "grad_norm": 1.4590439796447754, "learning_rate": 0.00013343270265804205, "loss": 1.2416, "step": 20350 }, { "epoch": 0.7836381135707411, "grad_norm": 1.0241880416870117, "learning_rate": 0.00013340420159776, "loss": 1.1929, "step": 20355 }, { "epoch": 0.7838306063522618, "grad_norm": 1.1249408721923828, "learning_rate": 0.000133375697482885, "loss": 1.1346, "step": 20360 }, { "epoch": 0.7840230991337824, "grad_norm": 0.3698437809944153, "learning_rate": 0.00013334719031602357, "loss": 0.9347, "step": 20365 }, { "epoch": 0.7842155919153032, "grad_norm": 0.9476606845855713, "learning_rate": 0.00013331868009978248, "loss": 1.0438, "step": 20370 }, { "epoch": 0.7844080846968239, "grad_norm": 1.432447910308838, "learning_rate": 0.0001332901668367688, "loss": 1.0981, "step": 20375 }, { "epoch": 0.7846005774783446, "grad_norm": 0.9563504457473755, "learning_rate": 0.0001332616505295899, "loss": 1.0899, "step": 20380 }, { "epoch": 0.7847930702598652, "grad_norm": 1.3649405241012573, "learning_rate": 0.00013323313118085341, "loss": 1.2495, "step": 20385 }, { "epoch": 0.784985563041386, "grad_norm": 1.109165906906128, "learning_rate": 0.00013320460879316719, "loss": 1.122, "step": 20390 }, { "epoch": 0.7851780558229067, "grad_norm": 1.2185308933258057, "learning_rate": 0.0001331760833691395, "loss": 1.0217, "step": 20395 }, { "epoch": 0.7853705486044273, "grad_norm": 1.0969536304473877, "learning_rate": 0.00013314755491137872, "loss": 1.0269, "step": 20400 }, { "epoch": 0.785563041385948, "grad_norm": 1.2872892618179321, "learning_rate": 0.00013311902342249364, "loss": 1.2322, "step": 20405 }, { "epoch": 0.7857555341674687, "grad_norm": 1.7610993385314941, "learning_rate": 0.0001330904889050933, "loss": 0.9873, "step": 20410 }, { "epoch": 0.7859480269489895, "grad_norm": 2.3899195194244385, "learning_rate": 0.00013306195136178687, "loss": 1.4362, "step": 20415 }, { "epoch": 0.7861405197305101, "grad_norm": 0.9143069982528687, "learning_rate": 0.00013303341079518404, "loss": 1.0994, "step": 20420 }, { "epoch": 0.7863330125120308, "grad_norm": 0.9000052809715271, "learning_rate": 0.00013300486720789465, "loss": 1.0955, "step": 20425 }, { "epoch": 0.7865255052935515, "grad_norm": 1.6691547632217407, "learning_rate": 0.00013297632060252875, "loss": 1.1152, "step": 20430 }, { "epoch": 0.7867179980750721, "grad_norm": 1.8236645460128784, "learning_rate": 0.0001329477709816968, "loss": 0.9309, "step": 20435 }, { "epoch": 0.7869104908565929, "grad_norm": 1.151060700416565, "learning_rate": 0.00013291921834800947, "loss": 1.0847, "step": 20440 }, { "epoch": 0.7871029836381136, "grad_norm": 0.9802745580673218, "learning_rate": 0.00013289066270407766, "loss": 1.18, "step": 20445 }, { "epoch": 0.7872954764196343, "grad_norm": 1.4023500680923462, "learning_rate": 0.00013286210405251262, "loss": 1.1138, "step": 20450 }, { "epoch": 0.7874879692011549, "grad_norm": 2.108933925628662, "learning_rate": 0.0001328335423959259, "loss": 1.245, "step": 20455 }, { "epoch": 0.7876804619826756, "grad_norm": 1.586288332939148, "learning_rate": 0.0001328049777369292, "loss": 1.1811, "step": 20460 }, { "epoch": 0.7878729547641964, "grad_norm": 1.2598310708999634, "learning_rate": 0.00013277641007813457, "loss": 0.9954, "step": 20465 }, { "epoch": 0.7880654475457171, "grad_norm": 0.8444984555244446, "learning_rate": 0.00013274783942215443, "loss": 0.9792, "step": 20470 }, { "epoch": 0.7882579403272377, "grad_norm": 1.5442594289779663, "learning_rate": 0.00013271926577160126, "loss": 1.1249, "step": 20475 }, { "epoch": 0.7884504331087584, "grad_norm": 1.1979268789291382, "learning_rate": 0.000132690689129088, "loss": 1.1101, "step": 20480 }, { "epoch": 0.7886429258902791, "grad_norm": 1.0047119855880737, "learning_rate": 0.00013266210949722777, "loss": 1.2296, "step": 20485 }, { "epoch": 0.7888354186717998, "grad_norm": 0.9286414980888367, "learning_rate": 0.000132633526878634, "loss": 1.0575, "step": 20490 }, { "epoch": 0.7890279114533205, "grad_norm": 1.0984097719192505, "learning_rate": 0.00013260494127592036, "loss": 1.2199, "step": 20495 }, { "epoch": 0.7892204042348412, "grad_norm": 2.0137503147125244, "learning_rate": 0.00013257635269170082, "loss": 1.1294, "step": 20500 }, { "epoch": 0.7894128970163619, "grad_norm": 1.5907988548278809, "learning_rate": 0.00013254776112858966, "loss": 1.0603, "step": 20505 }, { "epoch": 0.7896053897978825, "grad_norm": 1.6451369524002075, "learning_rate": 0.00013251916658920133, "loss": 1.2061, "step": 20510 }, { "epoch": 0.7897978825794033, "grad_norm": 1.7516169548034668, "learning_rate": 0.00013249056907615065, "loss": 1.0927, "step": 20515 }, { "epoch": 0.789990375360924, "grad_norm": 2.2118046283721924, "learning_rate": 0.00013246196859205265, "loss": 1.2632, "step": 20520 }, { "epoch": 0.7901828681424446, "grad_norm": 1.0801787376403809, "learning_rate": 0.00013243336513952265, "loss": 1.104, "step": 20525 }, { "epoch": 0.7903753609239653, "grad_norm": 1.737065076828003, "learning_rate": 0.00013240475872117625, "loss": 1.1356, "step": 20530 }, { "epoch": 0.790567853705486, "grad_norm": 2.103365659713745, "learning_rate": 0.0001323761493396294, "loss": 1.1386, "step": 20535 }, { "epoch": 0.7907603464870068, "grad_norm": 1.2037795782089233, "learning_rate": 0.0001323475369974981, "loss": 1.1696, "step": 20540 }, { "epoch": 0.7909528392685274, "grad_norm": 2.0184690952301025, "learning_rate": 0.0001323189216973988, "loss": 1.2378, "step": 20545 }, { "epoch": 0.7911453320500481, "grad_norm": 1.1056318283081055, "learning_rate": 0.0001322903034419483, "loss": 1.0001, "step": 20550 }, { "epoch": 0.7913378248315688, "grad_norm": 1.8512266874313354, "learning_rate": 0.00013226168223376335, "loss": 1.2393, "step": 20555 }, { "epoch": 0.7915303176130896, "grad_norm": 1.0075215101242065, "learning_rate": 0.00013223305807546134, "loss": 1.0399, "step": 20560 }, { "epoch": 0.7917228103946102, "grad_norm": 1.206949234008789, "learning_rate": 0.00013220443096965967, "loss": 1.131, "step": 20565 }, { "epoch": 0.7919153031761309, "grad_norm": 1.0401040315628052, "learning_rate": 0.00013217580091897613, "loss": 1.0425, "step": 20570 }, { "epoch": 0.7921077959576516, "grad_norm": 1.1868935823440552, "learning_rate": 0.00013214716792602873, "loss": 1.2813, "step": 20575 }, { "epoch": 0.7923002887391722, "grad_norm": 1.0238397121429443, "learning_rate": 0.00013211853199343577, "loss": 1.1308, "step": 20580 }, { "epoch": 0.792492781520693, "grad_norm": 1.0374101400375366, "learning_rate": 0.00013208989312381585, "loss": 0.9725, "step": 20585 }, { "epoch": 0.7926852743022137, "grad_norm": 1.1706335544586182, "learning_rate": 0.00013206125131978778, "loss": 1.2237, "step": 20590 }, { "epoch": 0.7928777670837344, "grad_norm": 1.3591363430023193, "learning_rate": 0.00013203260658397066, "loss": 1.217, "step": 20595 }, { "epoch": 0.793070259865255, "grad_norm": 1.0355279445648193, "learning_rate": 0.00013200395891898388, "loss": 1.1943, "step": 20600 }, { "epoch": 0.7932627526467757, "grad_norm": 1.4872701168060303, "learning_rate": 0.000131975308327447, "loss": 1.0404, "step": 20605 }, { "epoch": 0.7934552454282965, "grad_norm": 1.1777255535125732, "learning_rate": 0.00013194665481198006, "loss": 1.1498, "step": 20610 }, { "epoch": 0.7936477382098172, "grad_norm": 1.446821689605713, "learning_rate": 0.00013191799837520312, "loss": 1.1557, "step": 20615 }, { "epoch": 0.7938402309913378, "grad_norm": 1.1746444702148438, "learning_rate": 0.00013188933901973668, "loss": 1.0335, "step": 20620 }, { "epoch": 0.7940327237728585, "grad_norm": 1.7785364389419556, "learning_rate": 0.00013186067674820145, "loss": 1.2201, "step": 20625 }, { "epoch": 0.7942252165543792, "grad_norm": 2.1019060611724854, "learning_rate": 0.0001318320115632184, "loss": 1.3937, "step": 20630 }, { "epoch": 0.7944177093359, "grad_norm": 0.9918488264083862, "learning_rate": 0.0001318033434674087, "loss": 0.899, "step": 20635 }, { "epoch": 0.7946102021174206, "grad_norm": 1.4679549932479858, "learning_rate": 0.00013177467246339395, "loss": 1.1425, "step": 20640 }, { "epoch": 0.7948026948989413, "grad_norm": 1.2556777000427246, "learning_rate": 0.0001317459985537959, "loss": 1.0685, "step": 20645 }, { "epoch": 0.794995187680462, "grad_norm": 1.1215819120407104, "learning_rate": 0.00013171732174123657, "loss": 1.1001, "step": 20650 }, { "epoch": 0.7951876804619826, "grad_norm": 1.128730297088623, "learning_rate": 0.0001316886420283383, "loss": 1.1104, "step": 20655 }, { "epoch": 0.7953801732435034, "grad_norm": 0.9813867807388306, "learning_rate": 0.0001316599594177236, "loss": 1.1989, "step": 20660 }, { "epoch": 0.7955726660250241, "grad_norm": 1.1886361837387085, "learning_rate": 0.00013163127391201536, "loss": 1.1553, "step": 20665 }, { "epoch": 0.7957651588065447, "grad_norm": 1.1876790523529053, "learning_rate": 0.00013160258551383664, "loss": 0.9751, "step": 20670 }, { "epoch": 0.7959576515880654, "grad_norm": 0.8354418873786926, "learning_rate": 0.00013157389422581087, "loss": 1.1553, "step": 20675 }, { "epoch": 0.7961501443695861, "grad_norm": 1.0195808410644531, "learning_rate": 0.00013154520005056157, "loss": 1.173, "step": 20680 }, { "epoch": 0.7963426371511069, "grad_norm": 1.6569643020629883, "learning_rate": 0.0001315165029907127, "loss": 1.0836, "step": 20685 }, { "epoch": 0.7965351299326275, "grad_norm": 1.1490013599395752, "learning_rate": 0.00013148780304888846, "loss": 1.1071, "step": 20690 }, { "epoch": 0.7967276227141482, "grad_norm": 1.1535710096359253, "learning_rate": 0.0001314591002277132, "loss": 0.9335, "step": 20695 }, { "epoch": 0.7969201154956689, "grad_norm": 1.2786650657653809, "learning_rate": 0.00013143039452981158, "loss": 1.0348, "step": 20700 }, { "epoch": 0.7971126082771897, "grad_norm": 1.0382118225097656, "learning_rate": 0.00013140168595780863, "loss": 1.0676, "step": 20705 }, { "epoch": 0.7973051010587103, "grad_norm": 1.2393159866333008, "learning_rate": 0.00013137297451432952, "loss": 1.0786, "step": 20710 }, { "epoch": 0.797497593840231, "grad_norm": 1.04021155834198, "learning_rate": 0.00013134426020199968, "loss": 1.1149, "step": 20715 }, { "epoch": 0.7976900866217517, "grad_norm": 0.871838390827179, "learning_rate": 0.0001313155430234449, "loss": 1.1709, "step": 20720 }, { "epoch": 0.7978825794032723, "grad_norm": 1.0589998960494995, "learning_rate": 0.00013128682298129118, "loss": 1.0185, "step": 20725 }, { "epoch": 0.7980750721847931, "grad_norm": 1.4314393997192383, "learning_rate": 0.0001312581000781647, "loss": 1.0613, "step": 20730 }, { "epoch": 0.7982675649663138, "grad_norm": 1.0706419944763184, "learning_rate": 0.00013122937431669208, "loss": 1.3543, "step": 20735 }, { "epoch": 0.7984600577478345, "grad_norm": 1.705357313156128, "learning_rate": 0.00013120064569950004, "loss": 1.0586, "step": 20740 }, { "epoch": 0.7986525505293551, "grad_norm": 1.2182518243789673, "learning_rate": 0.00013117191422921566, "loss": 1.1202, "step": 20745 }, { "epoch": 0.7988450433108758, "grad_norm": 1.1450048685073853, "learning_rate": 0.00013114317990846618, "loss": 1.2812, "step": 20750 }, { "epoch": 0.7990375360923966, "grad_norm": 1.8270835876464844, "learning_rate": 0.00013111444273987923, "loss": 1.1169, "step": 20755 }, { "epoch": 0.7992300288739173, "grad_norm": 2.412691593170166, "learning_rate": 0.0001310857027260826, "loss": 1.2125, "step": 20760 }, { "epoch": 0.7994225216554379, "grad_norm": 1.1688083410263062, "learning_rate": 0.00013105695986970434, "loss": 1.1396, "step": 20765 }, { "epoch": 0.7996150144369586, "grad_norm": 1.3766731023788452, "learning_rate": 0.00013102821417337288, "loss": 1.0521, "step": 20770 }, { "epoch": 0.7998075072184793, "grad_norm": 1.4227287769317627, "learning_rate": 0.00013099946563971678, "loss": 1.2142, "step": 20775 }, { "epoch": 0.8, "grad_norm": 1.2115936279296875, "learning_rate": 0.00013097071427136485, "loss": 1.1114, "step": 20780 }, { "epoch": 0.8001924927815207, "grad_norm": 1.5830806493759155, "learning_rate": 0.00013094196007094633, "loss": 1.015, "step": 20785 }, { "epoch": 0.8003849855630414, "grad_norm": 1.4669450521469116, "learning_rate": 0.0001309132030410905, "loss": 1.1081, "step": 20790 }, { "epoch": 0.800577478344562, "grad_norm": 1.534693956375122, "learning_rate": 0.00013088444318442704, "loss": 1.1994, "step": 20795 }, { "epoch": 0.8007699711260827, "grad_norm": 1.6019155979156494, "learning_rate": 0.0001308556805035858, "loss": 1.1598, "step": 20800 }, { "epoch": 0.8009624639076035, "grad_norm": 2.0105199813842773, "learning_rate": 0.00013082691500119703, "loss": 1.0093, "step": 20805 }, { "epoch": 0.8011549566891242, "grad_norm": 0.9521466493606567, "learning_rate": 0.00013079814667989108, "loss": 0.9551, "step": 20810 }, { "epoch": 0.8013474494706448, "grad_norm": 1.155561089515686, "learning_rate": 0.00013076937554229866, "loss": 1.012, "step": 20815 }, { "epoch": 0.8015399422521655, "grad_norm": 2.6006276607513428, "learning_rate": 0.00013074060159105061, "loss": 1.2012, "step": 20820 }, { "epoch": 0.8017324350336862, "grad_norm": 1.318688154220581, "learning_rate": 0.0001307118248287782, "loss": 1.0862, "step": 20825 }, { "epoch": 0.801924927815207, "grad_norm": 1.0366714000701904, "learning_rate": 0.0001306830452581129, "loss": 0.8681, "step": 20830 }, { "epoch": 0.8021174205967276, "grad_norm": 1.463513731956482, "learning_rate": 0.00013065426288168635, "loss": 1.0677, "step": 20835 }, { "epoch": 0.8023099133782483, "grad_norm": 1.2675501108169556, "learning_rate": 0.00013062547770213053, "loss": 1.0329, "step": 20840 }, { "epoch": 0.802502406159769, "grad_norm": 1.9978399276733398, "learning_rate": 0.00013059668972207762, "loss": 1.106, "step": 20845 }, { "epoch": 0.8026948989412896, "grad_norm": 0.831674337387085, "learning_rate": 0.00013056789894416013, "loss": 1.1393, "step": 20850 }, { "epoch": 0.8028873917228104, "grad_norm": 1.492956280708313, "learning_rate": 0.00013053910537101076, "loss": 1.1106, "step": 20855 }, { "epoch": 0.8030798845043311, "grad_norm": 1.5051852464675903, "learning_rate": 0.00013051030900526256, "loss": 1.1677, "step": 20860 }, { "epoch": 0.8032723772858518, "grad_norm": 1.2556471824645996, "learning_rate": 0.00013048150984954867, "loss": 1.006, "step": 20865 }, { "epoch": 0.8034648700673724, "grad_norm": 1.4677566289901733, "learning_rate": 0.00013045270790650263, "loss": 1.2083, "step": 20870 }, { "epoch": 0.8036573628488932, "grad_norm": 1.2184150218963623, "learning_rate": 0.00013042390317875822, "loss": 1.0107, "step": 20875 }, { "epoch": 0.8038498556304139, "grad_norm": 1.0294650793075562, "learning_rate": 0.00013039509566894935, "loss": 1.1263, "step": 20880 }, { "epoch": 0.8040423484119346, "grad_norm": 1.0486441850662231, "learning_rate": 0.00013036628537971037, "loss": 1.2297, "step": 20885 }, { "epoch": 0.8042348411934552, "grad_norm": 1.209472894668579, "learning_rate": 0.00013033747231367573, "loss": 1.0295, "step": 20890 }, { "epoch": 0.8044273339749759, "grad_norm": 1.3195816278457642, "learning_rate": 0.00013030865647348022, "loss": 1.1223, "step": 20895 }, { "epoch": 0.8046198267564967, "grad_norm": 1.127369999885559, "learning_rate": 0.00013027983786175883, "loss": 1.1293, "step": 20900 }, { "epoch": 0.8048123195380174, "grad_norm": 1.7267670631408691, "learning_rate": 0.00013025101648114692, "loss": 1.1115, "step": 20905 }, { "epoch": 0.805004812319538, "grad_norm": 1.1385564804077148, "learning_rate": 0.0001302221923342799, "loss": 1.2283, "step": 20910 }, { "epoch": 0.8051973051010587, "grad_norm": 2.3153762817382812, "learning_rate": 0.0001301933654237936, "loss": 1.0369, "step": 20915 }, { "epoch": 0.8053897978825794, "grad_norm": 2.7795004844665527, "learning_rate": 0.000130164535752324, "loss": 1.1383, "step": 20920 }, { "epoch": 0.8055822906641001, "grad_norm": 1.4328944683074951, "learning_rate": 0.00013013570332250752, "loss": 1.0496, "step": 20925 }, { "epoch": 0.8057747834456208, "grad_norm": 1.35623037815094, "learning_rate": 0.00013010686813698055, "loss": 1.034, "step": 20930 }, { "epoch": 0.8059672762271415, "grad_norm": 2.241270065307617, "learning_rate": 0.0001300780301983799, "loss": 1.1334, "step": 20935 }, { "epoch": 0.8061597690086622, "grad_norm": 1.0069741010665894, "learning_rate": 0.00013004918950934268, "loss": 1.0983, "step": 20940 }, { "epoch": 0.8063522617901828, "grad_norm": 2.6190080642700195, "learning_rate": 0.0001300203460725061, "loss": 1.0864, "step": 20945 }, { "epoch": 0.8065447545717036, "grad_norm": 1.0321654081344604, "learning_rate": 0.00012999149989050777, "loss": 1.1463, "step": 20950 }, { "epoch": 0.8067372473532243, "grad_norm": 1.1893829107284546, "learning_rate": 0.00012996265096598545, "loss": 1.1653, "step": 20955 }, { "epoch": 0.8069297401347449, "grad_norm": 1.0437567234039307, "learning_rate": 0.00012993379930157717, "loss": 1.0993, "step": 20960 }, { "epoch": 0.8071222329162656, "grad_norm": 0.9310737252235413, "learning_rate": 0.00012990494489992124, "loss": 1.0351, "step": 20965 }, { "epoch": 0.8073147256977863, "grad_norm": 1.0145392417907715, "learning_rate": 0.00012987608776365622, "loss": 1.0316, "step": 20970 }, { "epoch": 0.8075072184793071, "grad_norm": 0.8872208595275879, "learning_rate": 0.00012984722789542084, "loss": 0.9912, "step": 20975 }, { "epoch": 0.8076997112608277, "grad_norm": 1.4148224592208862, "learning_rate": 0.00012981836529785422, "loss": 1.1786, "step": 20980 }, { "epoch": 0.8078922040423484, "grad_norm": 0.9235820770263672, "learning_rate": 0.00012978949997359565, "loss": 1.1649, "step": 20985 }, { "epoch": 0.8080846968238691, "grad_norm": 1.932556390762329, "learning_rate": 0.0001297606319252846, "loss": 1.1976, "step": 20990 }, { "epoch": 0.8082771896053897, "grad_norm": 1.1439324617385864, "learning_rate": 0.00012973176115556092, "loss": 0.9586, "step": 20995 }, { "epoch": 0.8084696823869105, "grad_norm": 1.4145176410675049, "learning_rate": 0.00012970288766706465, "loss": 1.1235, "step": 21000 }, { "epoch": 0.8086621751684312, "grad_norm": 1.2468702793121338, "learning_rate": 0.00012967401146243604, "loss": 1.2867, "step": 21005 }, { "epoch": 0.8088546679499519, "grad_norm": 1.230528473854065, "learning_rate": 0.00012964513254431567, "loss": 1.1464, "step": 21010 }, { "epoch": 0.8090471607314725, "grad_norm": 1.3284807205200195, "learning_rate": 0.00012961625091534432, "loss": 1.2665, "step": 21015 }, { "epoch": 0.8092396535129933, "grad_norm": 1.5851751565933228, "learning_rate": 0.000129587366578163, "loss": 1.1016, "step": 21020 }, { "epoch": 0.809432146294514, "grad_norm": 1.7818470001220703, "learning_rate": 0.00012955847953541303, "loss": 1.1393, "step": 21025 }, { "epoch": 0.8096246390760347, "grad_norm": 2.6962087154388428, "learning_rate": 0.00012952958978973585, "loss": 1.1494, "step": 21030 }, { "epoch": 0.8098171318575553, "grad_norm": 1.0526777505874634, "learning_rate": 0.00012950069734377336, "loss": 1.1411, "step": 21035 }, { "epoch": 0.810009624639076, "grad_norm": 0.8282346129417419, "learning_rate": 0.0001294718022001675, "loss": 1.2306, "step": 21040 }, { "epoch": 0.8102021174205968, "grad_norm": 0.9357605576515198, "learning_rate": 0.00012944290436156056, "loss": 1.0414, "step": 21045 }, { "epoch": 0.8103946102021174, "grad_norm": 1.5050805807113647, "learning_rate": 0.00012941400383059508, "loss": 1.0919, "step": 21050 }, { "epoch": 0.8105871029836381, "grad_norm": 1.8481179475784302, "learning_rate": 0.00012938510060991377, "loss": 1.2084, "step": 21055 }, { "epoch": 0.8107795957651588, "grad_norm": 1.1937155723571777, "learning_rate": 0.00012935619470215968, "loss": 1.2939, "step": 21060 }, { "epoch": 0.8109720885466795, "grad_norm": 1.077461838722229, "learning_rate": 0.00012932728610997606, "loss": 1.1172, "step": 21065 }, { "epoch": 0.8111645813282002, "grad_norm": 1.2674756050109863, "learning_rate": 0.00012929837483600638, "loss": 1.0764, "step": 21070 }, { "epoch": 0.8113570741097209, "grad_norm": 1.3384912014007568, "learning_rate": 0.00012926946088289443, "loss": 1.0626, "step": 21075 }, { "epoch": 0.8115495668912416, "grad_norm": 1.9473553895950317, "learning_rate": 0.00012924054425328415, "loss": 0.9904, "step": 21080 }, { "epoch": 0.8117420596727623, "grad_norm": 0.9565866589546204, "learning_rate": 0.00012921162494981982, "loss": 1.015, "step": 21085 }, { "epoch": 0.8119345524542829, "grad_norm": 2.1136703491210938, "learning_rate": 0.00012918270297514588, "loss": 1.1105, "step": 21090 }, { "epoch": 0.8121270452358037, "grad_norm": 1.2925735712051392, "learning_rate": 0.0001291537783319071, "loss": 1.1225, "step": 21095 }, { "epoch": 0.8123195380173244, "grad_norm": 1.0134514570236206, "learning_rate": 0.00012912485102274836, "loss": 1.1333, "step": 21100 }, { "epoch": 0.812512030798845, "grad_norm": 1.2157081365585327, "learning_rate": 0.00012909592105031495, "loss": 1.2096, "step": 21105 }, { "epoch": 0.8127045235803657, "grad_norm": 1.0096231698989868, "learning_rate": 0.00012906698841725234, "loss": 1.1434, "step": 21110 }, { "epoch": 0.8128970163618864, "grad_norm": 1.3600200414657593, "learning_rate": 0.00012903805312620617, "loss": 1.0407, "step": 21115 }, { "epoch": 0.8130895091434072, "grad_norm": 1.7842247486114502, "learning_rate": 0.00012900911517982238, "loss": 1.0577, "step": 21120 }, { "epoch": 0.8132820019249278, "grad_norm": 1.8753024339675903, "learning_rate": 0.0001289801745807472, "loss": 1.1792, "step": 21125 }, { "epoch": 0.8134744947064485, "grad_norm": 1.000182867050171, "learning_rate": 0.00012895123133162704, "loss": 1.0124, "step": 21130 }, { "epoch": 0.8136669874879692, "grad_norm": 0.9506773948669434, "learning_rate": 0.00012892228543510856, "loss": 1.2867, "step": 21135 }, { "epoch": 0.8138594802694898, "grad_norm": 1.084410548210144, "learning_rate": 0.00012889333689383863, "loss": 1.2061, "step": 21140 }, { "epoch": 0.8140519730510106, "grad_norm": 2.0268046855926514, "learning_rate": 0.00012886438571046448, "loss": 0.9941, "step": 21145 }, { "epoch": 0.8142444658325313, "grad_norm": 1.1148359775543213, "learning_rate": 0.00012883543188763345, "loss": 1.3397, "step": 21150 }, { "epoch": 0.814436958614052, "grad_norm": 1.990718126296997, "learning_rate": 0.00012880647542799317, "loss": 1.1802, "step": 21155 }, { "epoch": 0.8146294513955726, "grad_norm": 0.9613543152809143, "learning_rate": 0.00012877751633419158, "loss": 1.0403, "step": 21160 }, { "epoch": 0.8148219441770933, "grad_norm": 1.0529913902282715, "learning_rate": 0.00012874855460887674, "loss": 1.0797, "step": 21165 }, { "epoch": 0.8150144369586141, "grad_norm": 1.083970069885254, "learning_rate": 0.000128719590254697, "loss": 1.0772, "step": 21170 }, { "epoch": 0.8152069297401348, "grad_norm": 1.39369535446167, "learning_rate": 0.000128690623274301, "loss": 1.1004, "step": 21175 }, { "epoch": 0.8153994225216554, "grad_norm": 1.5318938493728638, "learning_rate": 0.00012866165367033755, "loss": 1.0723, "step": 21180 }, { "epoch": 0.8155919153031761, "grad_norm": 2.069422483444214, "learning_rate": 0.00012863268144545575, "loss": 1.1655, "step": 21185 }, { "epoch": 0.8157844080846969, "grad_norm": 2.672980785369873, "learning_rate": 0.0001286037066023049, "loss": 1.1651, "step": 21190 }, { "epoch": 0.8159769008662175, "grad_norm": 1.552042007446289, "learning_rate": 0.00012857472914353456, "loss": 1.1443, "step": 21195 }, { "epoch": 0.8161693936477382, "grad_norm": 1.6215529441833496, "learning_rate": 0.00012854574907179454, "loss": 1.0127, "step": 21200 }, { "epoch": 0.8163618864292589, "grad_norm": 1.9641019105911255, "learning_rate": 0.00012851676638973486, "loss": 1.1913, "step": 21205 }, { "epoch": 0.8165543792107796, "grad_norm": 1.0362906455993652, "learning_rate": 0.00012848778110000582, "loss": 1.0131, "step": 21210 }, { "epoch": 0.8167468719923003, "grad_norm": 1.3487643003463745, "learning_rate": 0.00012845879320525787, "loss": 1.3209, "step": 21215 }, { "epoch": 0.816939364773821, "grad_norm": 1.2834949493408203, "learning_rate": 0.00012842980270814182, "loss": 0.9396, "step": 21220 }, { "epoch": 0.8171318575553417, "grad_norm": 1.530375599861145, "learning_rate": 0.00012840080961130864, "loss": 0.9925, "step": 21225 }, { "epoch": 0.8173243503368623, "grad_norm": 1.6313562393188477, "learning_rate": 0.00012837181391740958, "loss": 1.1684, "step": 21230 }, { "epoch": 0.817516843118383, "grad_norm": 2.681976556777954, "learning_rate": 0.0001283428156290961, "loss": 1.0749, "step": 21235 }, { "epoch": 0.8177093358999038, "grad_norm": 1.3075588941574097, "learning_rate": 0.0001283138147490198, "loss": 1.2704, "step": 21240 }, { "epoch": 0.8179018286814245, "grad_norm": 1.4993854761123657, "learning_rate": 0.0001282848112798328, "loss": 1.1457, "step": 21245 }, { "epoch": 0.8180943214629451, "grad_norm": 0.991303563117981, "learning_rate": 0.00012825580522418716, "loss": 1.0834, "step": 21250 }, { "epoch": 0.8182868142444658, "grad_norm": 1.5046969652175903, "learning_rate": 0.0001282267965847353, "loss": 1.1156, "step": 21255 }, { "epoch": 0.8184793070259865, "grad_norm": 2.711761713027954, "learning_rate": 0.00012819778536412983, "loss": 1.0736, "step": 21260 }, { "epoch": 0.8186717998075073, "grad_norm": 2.3247745037078857, "learning_rate": 0.00012816877156502374, "loss": 1.0703, "step": 21265 }, { "epoch": 0.8188642925890279, "grad_norm": 1.8839764595031738, "learning_rate": 0.00012813975519007005, "loss": 1.2301, "step": 21270 }, { "epoch": 0.8190567853705486, "grad_norm": 1.3224703073501587, "learning_rate": 0.00012811073624192218, "loss": 0.942, "step": 21275 }, { "epoch": 0.8192492781520693, "grad_norm": 1.2975091934204102, "learning_rate": 0.00012808171472323366, "loss": 1.0815, "step": 21280 }, { "epoch": 0.8194417709335899, "grad_norm": 1.1744860410690308, "learning_rate": 0.0001280526906366584, "loss": 1.2635, "step": 21285 }, { "epoch": 0.8196342637151107, "grad_norm": 1.3168641328811646, "learning_rate": 0.0001280236639848504, "loss": 1.1034, "step": 21290 }, { "epoch": 0.8198267564966314, "grad_norm": 1.7554851770401, "learning_rate": 0.00012799463477046396, "loss": 1.215, "step": 21295 }, { "epoch": 0.8200192492781521, "grad_norm": 1.458289384841919, "learning_rate": 0.00012796560299615365, "loss": 1.0511, "step": 21300 }, { "epoch": 0.8202117420596727, "grad_norm": 1.4137495756149292, "learning_rate": 0.00012793656866457416, "loss": 0.9379, "step": 21305 }, { "epoch": 0.8204042348411934, "grad_norm": 1.4759835004806519, "learning_rate": 0.00012790753177838052, "loss": 1.1523, "step": 21310 }, { "epoch": 0.8205967276227142, "grad_norm": 1.7477853298187256, "learning_rate": 0.00012787849234022802, "loss": 1.1982, "step": 21315 }, { "epoch": 0.8207892204042349, "grad_norm": 1.6621637344360352, "learning_rate": 0.00012784945035277206, "loss": 1.1225, "step": 21320 }, { "epoch": 0.8209817131857555, "grad_norm": 1.2455633878707886, "learning_rate": 0.0001278204058186683, "loss": 1.1079, "step": 21325 }, { "epoch": 0.8211742059672762, "grad_norm": 2.1116111278533936, "learning_rate": 0.0001277913587405728, "loss": 1.16, "step": 21330 }, { "epoch": 0.821366698748797, "grad_norm": 1.7187306880950928, "learning_rate": 0.00012776230912114157, "loss": 1.0653, "step": 21335 }, { "epoch": 0.8215591915303176, "grad_norm": 1.4385313987731934, "learning_rate": 0.00012773325696303114, "loss": 1.1132, "step": 21340 }, { "epoch": 0.8217516843118383, "grad_norm": 1.1039965152740479, "learning_rate": 0.00012770420226889802, "loss": 1.072, "step": 21345 }, { "epoch": 0.821944177093359, "grad_norm": 1.4439443349838257, "learning_rate": 0.0001276751450413992, "loss": 1.2279, "step": 21350 }, { "epoch": 0.8221366698748797, "grad_norm": 1.0179024934768677, "learning_rate": 0.00012764608528319165, "loss": 1.0026, "step": 21355 }, { "epoch": 0.8223291626564004, "grad_norm": 1.2904207706451416, "learning_rate": 0.00012761702299693274, "loss": 1.0848, "step": 21360 }, { "epoch": 0.8225216554379211, "grad_norm": 0.8580986261367798, "learning_rate": 0.00012758795818528003, "loss": 1.0943, "step": 21365 }, { "epoch": 0.8227141482194418, "grad_norm": 1.2557419538497925, "learning_rate": 0.00012755889085089127, "loss": 1.1489, "step": 21370 }, { "epoch": 0.8229066410009624, "grad_norm": 1.0299915075302124, "learning_rate": 0.00012752982099642455, "loss": 1.0721, "step": 21375 }, { "epoch": 0.8230991337824831, "grad_norm": 0.9946519136428833, "learning_rate": 0.00012750074862453802, "loss": 1.096, "step": 21380 }, { "epoch": 0.8232916265640039, "grad_norm": 1.6168420314788818, "learning_rate": 0.00012747167373789023, "loss": 1.275, "step": 21385 }, { "epoch": 0.8234841193455246, "grad_norm": 1.7367397546768188, "learning_rate": 0.00012744259633913987, "loss": 1.1666, "step": 21390 }, { "epoch": 0.8236766121270452, "grad_norm": 1.6572107076644897, "learning_rate": 0.00012741351643094583, "loss": 1.0053, "step": 21395 }, { "epoch": 0.8238691049085659, "grad_norm": 0.7522270083427429, "learning_rate": 0.00012738443401596734, "loss": 1.0007, "step": 21400 }, { "epoch": 0.8240615976900866, "grad_norm": 1.8706845045089722, "learning_rate": 0.00012735534909686375, "loss": 1.2394, "step": 21405 }, { "epoch": 0.8242540904716074, "grad_norm": 2.571775197982788, "learning_rate": 0.0001273262616762947, "loss": 1.4034, "step": 21410 }, { "epoch": 0.824446583253128, "grad_norm": 1.1532241106033325, "learning_rate": 0.00012729717175692006, "loss": 1.095, "step": 21415 }, { "epoch": 0.8246390760346487, "grad_norm": 1.6400253772735596, "learning_rate": 0.00012726807934139987, "loss": 1.1114, "step": 21420 }, { "epoch": 0.8248315688161694, "grad_norm": 1.572492003440857, "learning_rate": 0.00012723898443239447, "loss": 1.0996, "step": 21425 }, { "epoch": 0.82502406159769, "grad_norm": 1.266436219215393, "learning_rate": 0.00012720988703256438, "loss": 1.1825, "step": 21430 }, { "epoch": 0.8252165543792108, "grad_norm": 1.0934292078018188, "learning_rate": 0.00012718078714457035, "loss": 1.1494, "step": 21435 }, { "epoch": 0.8254090471607315, "grad_norm": 1.5081112384796143, "learning_rate": 0.0001271516847710734, "loss": 1.2056, "step": 21440 }, { "epoch": 0.8256015399422522, "grad_norm": 0.9314339756965637, "learning_rate": 0.00012712257991473477, "loss": 1.105, "step": 21445 }, { "epoch": 0.8257940327237728, "grad_norm": 1.483852744102478, "learning_rate": 0.00012709347257821587, "loss": 1.2253, "step": 21450 }, { "epoch": 0.8259865255052935, "grad_norm": 0.9682250022888184, "learning_rate": 0.00012706436276417837, "loss": 1.0754, "step": 21455 }, { "epoch": 0.8261790182868143, "grad_norm": 1.4954203367233276, "learning_rate": 0.00012703525047528418, "loss": 1.1777, "step": 21460 }, { "epoch": 0.826371511068335, "grad_norm": 1.303179383277893, "learning_rate": 0.00012700613571419543, "loss": 1.1226, "step": 21465 }, { "epoch": 0.8265640038498556, "grad_norm": 1.0216439962387085, "learning_rate": 0.0001269770184835745, "loss": 1.0702, "step": 21470 }, { "epoch": 0.8267564966313763, "grad_norm": 1.5021692514419556, "learning_rate": 0.00012694789878608386, "loss": 1.1696, "step": 21475 }, { "epoch": 0.826948989412897, "grad_norm": 1.1756644248962402, "learning_rate": 0.00012691877662438642, "loss": 1.1441, "step": 21480 }, { "epoch": 0.8271414821944177, "grad_norm": 1.7375133037567139, "learning_rate": 0.0001268896520011452, "loss": 1.0854, "step": 21485 }, { "epoch": 0.8273339749759384, "grad_norm": 0.894169270992279, "learning_rate": 0.00012686052491902344, "loss": 1.0767, "step": 21490 }, { "epoch": 0.8275264677574591, "grad_norm": 2.3879435062408447, "learning_rate": 0.00012683139538068457, "loss": 1.0683, "step": 21495 }, { "epoch": 0.8277189605389798, "grad_norm": 0.7615637183189392, "learning_rate": 0.00012680226338879238, "loss": 1.0482, "step": 21500 }, { "epoch": 0.8279114533205005, "grad_norm": 1.056795597076416, "learning_rate": 0.0001267731289460107, "loss": 1.0303, "step": 21505 }, { "epoch": 0.8281039461020212, "grad_norm": 1.099278211593628, "learning_rate": 0.00012674399205500375, "loss": 1.1923, "step": 21510 }, { "epoch": 0.8282964388835419, "grad_norm": 1.1515510082244873, "learning_rate": 0.0001267148527184359, "loss": 1.1697, "step": 21515 }, { "epoch": 0.8284889316650625, "grad_norm": 1.8237887620925903, "learning_rate": 0.00012668571093897175, "loss": 1.0131, "step": 21520 }, { "epoch": 0.8286814244465832, "grad_norm": 1.1378989219665527, "learning_rate": 0.0001266565667192761, "loss": 1.1541, "step": 21525 }, { "epoch": 0.828873917228104, "grad_norm": 1.1044882535934448, "learning_rate": 0.000126627420062014, "loss": 0.9193, "step": 21530 }, { "epoch": 0.8290664100096247, "grad_norm": 1.0803862810134888, "learning_rate": 0.00012659827096985073, "loss": 1.0778, "step": 21535 }, { "epoch": 0.8292589027911453, "grad_norm": 1.004387617111206, "learning_rate": 0.00012656911944545178, "loss": 1.0134, "step": 21540 }, { "epoch": 0.829451395572666, "grad_norm": 1.0931042432785034, "learning_rate": 0.00012653996549148285, "loss": 1.0738, "step": 21545 }, { "epoch": 0.8296438883541867, "grad_norm": 1.053134560585022, "learning_rate": 0.00012651080911060993, "loss": 0.9885, "step": 21550 }, { "epoch": 0.8298363811357075, "grad_norm": 1.2585800886154175, "learning_rate": 0.00012648165030549908, "loss": 1.0844, "step": 21555 }, { "epoch": 0.8300288739172281, "grad_norm": 1.301685094833374, "learning_rate": 0.00012645248907881677, "loss": 1.1168, "step": 21560 }, { "epoch": 0.8302213666987488, "grad_norm": 2.356616973876953, "learning_rate": 0.00012642332543322954, "loss": 1.3397, "step": 21565 }, { "epoch": 0.8304138594802695, "grad_norm": 1.0727852582931519, "learning_rate": 0.00012639415937140428, "loss": 1.1311, "step": 21570 }, { "epoch": 0.8306063522617901, "grad_norm": 1.3422895669937134, "learning_rate": 0.00012636499089600794, "loss": 1.2474, "step": 21575 }, { "epoch": 0.8307988450433109, "grad_norm": 1.1913752555847168, "learning_rate": 0.00012633582000970788, "loss": 1.1439, "step": 21580 }, { "epoch": 0.8309913378248316, "grad_norm": 1.068538784980774, "learning_rate": 0.0001263066467151715, "loss": 1.0762, "step": 21585 }, { "epoch": 0.8311838306063523, "grad_norm": 1.4798871278762817, "learning_rate": 0.00012627747101506655, "loss": 1.1836, "step": 21590 }, { "epoch": 0.8313763233878729, "grad_norm": 0.9586134552955627, "learning_rate": 0.00012624829291206098, "loss": 1.154, "step": 21595 }, { "epoch": 0.8315688161693936, "grad_norm": 1.3402488231658936, "learning_rate": 0.00012621911240882287, "loss": 1.0223, "step": 21600 }, { "epoch": 0.8317613089509144, "grad_norm": 0.896938145160675, "learning_rate": 0.00012618992950802062, "loss": 1.0489, "step": 21605 }, { "epoch": 0.831953801732435, "grad_norm": 1.466774821281433, "learning_rate": 0.00012616074421232281, "loss": 1.0963, "step": 21610 }, { "epoch": 0.8321462945139557, "grad_norm": 1.714902639389038, "learning_rate": 0.00012613155652439826, "loss": 1.1946, "step": 21615 }, { "epoch": 0.8323387872954764, "grad_norm": 1.3679184913635254, "learning_rate": 0.00012610236644691592, "loss": 1.1356, "step": 21620 }, { "epoch": 0.8325312800769971, "grad_norm": 2.8363022804260254, "learning_rate": 0.00012607317398254515, "loss": 1.2324, "step": 21625 }, { "epoch": 0.8327237728585178, "grad_norm": 1.7993220090866089, "learning_rate": 0.00012604397913395528, "loss": 1.0687, "step": 21630 }, { "epoch": 0.8329162656400385, "grad_norm": 1.42396879196167, "learning_rate": 0.00012601478190381608, "loss": 1.1426, "step": 21635 }, { "epoch": 0.8331087584215592, "grad_norm": 0.9980899691581726, "learning_rate": 0.0001259855822947974, "loss": 1.0239, "step": 21640 }, { "epoch": 0.8333012512030799, "grad_norm": 1.641063928604126, "learning_rate": 0.00012595638030956936, "loss": 1.1006, "step": 21645 }, { "epoch": 0.8334937439846006, "grad_norm": 0.7932726740837097, "learning_rate": 0.00012592717595080226, "loss": 0.9438, "step": 21650 }, { "epoch": 0.8336862367661213, "grad_norm": 0.988121747970581, "learning_rate": 0.0001258979692211667, "loss": 1.0918, "step": 21655 }, { "epoch": 0.833878729547642, "grad_norm": 2.024733066558838, "learning_rate": 0.0001258687601233334, "loss": 1.1982, "step": 21660 }, { "epoch": 0.8340712223291626, "grad_norm": 1.4302330017089844, "learning_rate": 0.00012583954865997337, "loss": 1.0746, "step": 21665 }, { "epoch": 0.8342637151106833, "grad_norm": 1.084583044052124, "learning_rate": 0.00012581033483375777, "loss": 1.173, "step": 21670 }, { "epoch": 0.8344562078922041, "grad_norm": 1.3885962963104248, "learning_rate": 0.00012578111864735802, "loss": 1.0817, "step": 21675 }, { "epoch": 0.8346487006737248, "grad_norm": 1.2596712112426758, "learning_rate": 0.00012575190010344578, "loss": 1.0657, "step": 21680 }, { "epoch": 0.8348411934552454, "grad_norm": 1.376128911972046, "learning_rate": 0.00012572267920469283, "loss": 1.0961, "step": 21685 }, { "epoch": 0.8350336862367661, "grad_norm": 1.203218936920166, "learning_rate": 0.00012569345595377128, "loss": 1.2123, "step": 21690 }, { "epoch": 0.8352261790182868, "grad_norm": 2.2045395374298096, "learning_rate": 0.00012566423035335338, "loss": 1.2024, "step": 21695 }, { "epoch": 0.8354186717998076, "grad_norm": 2.305617094039917, "learning_rate": 0.00012563500240611166, "loss": 1.027, "step": 21700 }, { "epoch": 0.8356111645813282, "grad_norm": 0.8593419194221497, "learning_rate": 0.00012560577211471875, "loss": 1.0436, "step": 21705 }, { "epoch": 0.8358036573628489, "grad_norm": 1.3252314329147339, "learning_rate": 0.00012557653948184761, "loss": 0.9099, "step": 21710 }, { "epoch": 0.8359961501443696, "grad_norm": 1.569014072418213, "learning_rate": 0.0001255473045101714, "loss": 1.1202, "step": 21715 }, { "epoch": 0.8361886429258902, "grad_norm": 0.9022922515869141, "learning_rate": 0.00012551806720236338, "loss": 1.1666, "step": 21720 }, { "epoch": 0.836381135707411, "grad_norm": 1.5258845090866089, "learning_rate": 0.00012548882756109717, "loss": 1.1072, "step": 21725 }, { "epoch": 0.8365736284889317, "grad_norm": 1.4463955163955688, "learning_rate": 0.0001254595855890465, "loss": 1.0444, "step": 21730 }, { "epoch": 0.8367661212704524, "grad_norm": 1.6336601972579956, "learning_rate": 0.00012543034128888544, "loss": 1.2973, "step": 21735 }, { "epoch": 0.836958614051973, "grad_norm": 1.2576451301574707, "learning_rate": 0.0001254010946632881, "loss": 1.0968, "step": 21740 }, { "epoch": 0.8371511068334937, "grad_norm": 1.2530792951583862, "learning_rate": 0.0001253718457149289, "loss": 0.9215, "step": 21745 }, { "epoch": 0.8373435996150145, "grad_norm": 1.3370553255081177, "learning_rate": 0.0001253425944464825, "loss": 1.1147, "step": 21750 }, { "epoch": 0.8375360923965351, "grad_norm": 1.5159647464752197, "learning_rate": 0.00012531334086062374, "loss": 1.1326, "step": 21755 }, { "epoch": 0.8377285851780558, "grad_norm": 1.4643933773040771, "learning_rate": 0.0001252840849600276, "loss": 0.9886, "step": 21760 }, { "epoch": 0.8379210779595765, "grad_norm": 1.5805963277816772, "learning_rate": 0.00012525482674736942, "loss": 1.1807, "step": 21765 }, { "epoch": 0.8381135707410972, "grad_norm": 1.3167610168457031, "learning_rate": 0.00012522556622532462, "loss": 0.93, "step": 21770 }, { "epoch": 0.8383060635226179, "grad_norm": 1.4402116537094116, "learning_rate": 0.00012519630339656888, "loss": 1.1223, "step": 21775 }, { "epoch": 0.8384985563041386, "grad_norm": 1.5734583139419556, "learning_rate": 0.00012516703826377814, "loss": 1.1476, "step": 21780 }, { "epoch": 0.8386910490856593, "grad_norm": 1.581139326095581, "learning_rate": 0.00012513777082962842, "loss": 1.3, "step": 21785 }, { "epoch": 0.83888354186718, "grad_norm": 1.0609793663024902, "learning_rate": 0.0001251085010967961, "loss": 1.1791, "step": 21790 }, { "epoch": 0.8390760346487006, "grad_norm": 1.130085825920105, "learning_rate": 0.00012507922906795772, "loss": 1.0816, "step": 21795 }, { "epoch": 0.8392685274302214, "grad_norm": 2.4827651977539062, "learning_rate": 0.00012504995474578993, "loss": 1.1677, "step": 21800 }, { "epoch": 0.8394610202117421, "grad_norm": 1.187456727027893, "learning_rate": 0.00012502067813296972, "loss": 0.9263, "step": 21805 }, { "epoch": 0.8396535129932627, "grad_norm": 1.375260353088379, "learning_rate": 0.00012499139923217425, "loss": 1.1236, "step": 21810 }, { "epoch": 0.8398460057747834, "grad_norm": 1.0707255601882935, "learning_rate": 0.00012496211804608089, "loss": 1.0672, "step": 21815 }, { "epoch": 0.8400384985563042, "grad_norm": 1.7111786603927612, "learning_rate": 0.00012493283457736716, "loss": 1.0458, "step": 21820 }, { "epoch": 0.8402309913378249, "grad_norm": 0.9973586201667786, "learning_rate": 0.0001249035488287109, "loss": 0.9639, "step": 21825 }, { "epoch": 0.8404234841193455, "grad_norm": 1.5254673957824707, "learning_rate": 0.00012487426080279006, "loss": 1.2324, "step": 21830 }, { "epoch": 0.8406159769008662, "grad_norm": 1.4964022636413574, "learning_rate": 0.00012484497050228285, "loss": 1.1236, "step": 21835 }, { "epoch": 0.8408084696823869, "grad_norm": 1.157464623451233, "learning_rate": 0.00012481567792986767, "loss": 1.0155, "step": 21840 }, { "epoch": 0.8410009624639077, "grad_norm": 1.8712735176086426, "learning_rate": 0.0001247863830882231, "loss": 1.0772, "step": 21845 }, { "epoch": 0.8411934552454283, "grad_norm": 1.2394201755523682, "learning_rate": 0.00012475708598002805, "loss": 1.2253, "step": 21850 }, { "epoch": 0.841385948026949, "grad_norm": 1.3483343124389648, "learning_rate": 0.00012472778660796145, "loss": 1.0598, "step": 21855 }, { "epoch": 0.8415784408084697, "grad_norm": 1.1623843908309937, "learning_rate": 0.0001246984849747026, "loss": 1.0261, "step": 21860 }, { "epoch": 0.8417709335899903, "grad_norm": 5.310647964477539, "learning_rate": 0.0001246691810829309, "loss": 1.094, "step": 21865 }, { "epoch": 0.8419634263715111, "grad_norm": 1.5004624128341675, "learning_rate": 0.000124639874935326, "loss": 1.1192, "step": 21870 }, { "epoch": 0.8421559191530318, "grad_norm": 1.1160995960235596, "learning_rate": 0.00012461056653456775, "loss": 1.1409, "step": 21875 }, { "epoch": 0.8423484119345525, "grad_norm": 1.3328487873077393, "learning_rate": 0.0001245812558833362, "loss": 0.7442, "step": 21880 }, { "epoch": 0.8425409047160731, "grad_norm": 1.2014681100845337, "learning_rate": 0.0001245519429843117, "loss": 1.0333, "step": 21885 }, { "epoch": 0.8427333974975938, "grad_norm": 0.916928231716156, "learning_rate": 0.00012452262784017464, "loss": 1.0076, "step": 21890 }, { "epoch": 0.8429258902791146, "grad_norm": 1.1074447631835938, "learning_rate": 0.0001244933104536057, "loss": 1.1731, "step": 21895 }, { "epoch": 0.8431183830606352, "grad_norm": 1.059792160987854, "learning_rate": 0.00012446399082728578, "loss": 1.0673, "step": 21900 }, { "epoch": 0.8433108758421559, "grad_norm": 0.8817097544670105, "learning_rate": 0.00012443466896389595, "loss": 0.9764, "step": 21905 }, { "epoch": 0.8435033686236766, "grad_norm": 1.0615062713623047, "learning_rate": 0.0001244053448661175, "loss": 1.162, "step": 21910 }, { "epoch": 0.8436958614051973, "grad_norm": 1.173344612121582, "learning_rate": 0.0001243760185366319, "loss": 0.997, "step": 21915 }, { "epoch": 0.843888354186718, "grad_norm": 1.5543683767318726, "learning_rate": 0.00012434668997812092, "loss": 1.1386, "step": 21920 }, { "epoch": 0.8440808469682387, "grad_norm": 1.753692388534546, "learning_rate": 0.00012431735919326645, "loss": 1.24, "step": 21925 }, { "epoch": 0.8442733397497594, "grad_norm": 1.3150075674057007, "learning_rate": 0.00012428802618475053, "loss": 1.0783, "step": 21930 }, { "epoch": 0.84446583253128, "grad_norm": 1.4009346961975098, "learning_rate": 0.00012425869095525552, "loss": 0.9953, "step": 21935 }, { "epoch": 0.8446583253128007, "grad_norm": 1.7203071117401123, "learning_rate": 0.00012422935350746395, "loss": 0.9115, "step": 21940 }, { "epoch": 0.8448508180943215, "grad_norm": 1.1231714487075806, "learning_rate": 0.00012420001384405845, "loss": 1.1767, "step": 21945 }, { "epoch": 0.8450433108758422, "grad_norm": 1.0938594341278076, "learning_rate": 0.00012417067196772202, "loss": 1.0433, "step": 21950 }, { "epoch": 0.8452358036573628, "grad_norm": 1.9310616254806519, "learning_rate": 0.00012414132788113777, "loss": 1.0809, "step": 21955 }, { "epoch": 0.8454282964388835, "grad_norm": 1.4712507724761963, "learning_rate": 0.00012411198158698898, "loss": 1.0663, "step": 21960 }, { "epoch": 0.8456207892204043, "grad_norm": 0.9529402256011963, "learning_rate": 0.00012408263308795922, "loss": 1.0486, "step": 21965 }, { "epoch": 0.845813282001925, "grad_norm": 0.8903969526290894, "learning_rate": 0.0001240532823867322, "loss": 1.0795, "step": 21970 }, { "epoch": 0.8460057747834456, "grad_norm": 1.0439949035644531, "learning_rate": 0.00012402392948599185, "loss": 1.0456, "step": 21975 }, { "epoch": 0.8461982675649663, "grad_norm": 1.5263192653656006, "learning_rate": 0.00012399457438842226, "loss": 1.004, "step": 21980 }, { "epoch": 0.846390760346487, "grad_norm": 1.3312830924987793, "learning_rate": 0.00012396521709670782, "loss": 1.1062, "step": 21985 }, { "epoch": 0.8465832531280078, "grad_norm": 1.6657592058181763, "learning_rate": 0.00012393585761353302, "loss": 1.019, "step": 21990 }, { "epoch": 0.8467757459095284, "grad_norm": 0.9013869166374207, "learning_rate": 0.0001239064959415826, "loss": 1.3499, "step": 21995 }, { "epoch": 0.8469682386910491, "grad_norm": 0.9990372061729431, "learning_rate": 0.0001238771320835415, "loss": 1.064, "step": 22000 }, { "epoch": 0.8471607314725698, "grad_norm": 1.070052146911621, "learning_rate": 0.00012384776604209484, "loss": 1.0608, "step": 22005 }, { "epoch": 0.8473532242540904, "grad_norm": 2.112750291824341, "learning_rate": 0.00012381839781992797, "loss": 1.1134, "step": 22010 }, { "epoch": 0.8475457170356112, "grad_norm": 1.3599921464920044, "learning_rate": 0.00012378902741972636, "loss": 0.964, "step": 22015 }, { "epoch": 0.8477382098171319, "grad_norm": 1.8712466955184937, "learning_rate": 0.00012375965484417582, "loss": 1.129, "step": 22020 }, { "epoch": 0.8479307025986526, "grad_norm": 2.496819019317627, "learning_rate": 0.00012373028009596222, "loss": 1.1981, "step": 22025 }, { "epoch": 0.8481231953801732, "grad_norm": 1.8566465377807617, "learning_rate": 0.00012370090317777172, "loss": 1.1229, "step": 22030 }, { "epoch": 0.8483156881616939, "grad_norm": 1.0162265300750732, "learning_rate": 0.0001236715240922906, "loss": 1.2759, "step": 22035 }, { "epoch": 0.8485081809432147, "grad_norm": 1.4011484384536743, "learning_rate": 0.0001236421428422054, "loss": 1.0725, "step": 22040 }, { "epoch": 0.8487006737247353, "grad_norm": 0.9119108319282532, "learning_rate": 0.0001236127594302029, "loss": 1.0087, "step": 22045 }, { "epoch": 0.848893166506256, "grad_norm": 1.4366172552108765, "learning_rate": 0.00012358337385896993, "loss": 0.9379, "step": 22050 }, { "epoch": 0.8490856592877767, "grad_norm": 1.162611961364746, "learning_rate": 0.00012355398613119367, "loss": 1.0068, "step": 22055 }, { "epoch": 0.8492781520692974, "grad_norm": 1.5164228677749634, "learning_rate": 0.0001235245962495614, "loss": 1.2239, "step": 22060 }, { "epoch": 0.8494706448508181, "grad_norm": 1.0570234060287476, "learning_rate": 0.00012349520421676066, "loss": 1.0332, "step": 22065 }, { "epoch": 0.8496631376323388, "grad_norm": 1.4047578573226929, "learning_rate": 0.0001234658100354791, "loss": 1.051, "step": 22070 }, { "epoch": 0.8498556304138595, "grad_norm": 1.317559003829956, "learning_rate": 0.00012343641370840465, "loss": 1.0702, "step": 22075 }, { "epoch": 0.8500481231953801, "grad_norm": 1.0683611631393433, "learning_rate": 0.0001234070152382255, "loss": 0.9898, "step": 22080 }, { "epoch": 0.8502406159769008, "grad_norm": 0.960689902305603, "learning_rate": 0.00012337761462762978, "loss": 1.377, "step": 22085 }, { "epoch": 0.8504331087584216, "grad_norm": 1.2345051765441895, "learning_rate": 0.0001233482118793061, "loss": 1.0417, "step": 22090 }, { "epoch": 0.8506256015399423, "grad_norm": 1.9396436214447021, "learning_rate": 0.0001233188069959431, "loss": 1.204, "step": 22095 }, { "epoch": 0.8508180943214629, "grad_norm": 1.8735712766647339, "learning_rate": 0.0001232893999802297, "loss": 0.9903, "step": 22100 }, { "epoch": 0.8510105871029836, "grad_norm": 2.0837035179138184, "learning_rate": 0.00012325999083485494, "loss": 1.01, "step": 22105 }, { "epoch": 0.8512030798845043, "grad_norm": 1.9637258052825928, "learning_rate": 0.00012323057956250806, "loss": 1.1652, "step": 22110 }, { "epoch": 0.8513955726660251, "grad_norm": 1.3333475589752197, "learning_rate": 0.0001232011661658786, "loss": 0.9568, "step": 22115 }, { "epoch": 0.8515880654475457, "grad_norm": 1.8034814596176147, "learning_rate": 0.0001231717506476562, "loss": 1.1863, "step": 22120 }, { "epoch": 0.8517805582290664, "grad_norm": 0.7985187768936157, "learning_rate": 0.00012314233301053068, "loss": 0.9462, "step": 22125 }, { "epoch": 0.8519730510105871, "grad_norm": 1.7575490474700928, "learning_rate": 0.00012311291325719213, "loss": 1.1151, "step": 22130 }, { "epoch": 0.8521655437921078, "grad_norm": 1.058147668838501, "learning_rate": 0.00012308349139033076, "loss": 1.071, "step": 22135 }, { "epoch": 0.8523580365736285, "grad_norm": 1.0178147554397583, "learning_rate": 0.00012305406741263701, "loss": 1.2246, "step": 22140 }, { "epoch": 0.8525505293551492, "grad_norm": 1.0183746814727783, "learning_rate": 0.0001230246413268015, "loss": 1.076, "step": 22145 }, { "epoch": 0.8527430221366699, "grad_norm": 1.4708818197250366, "learning_rate": 0.0001229952131355151, "loss": 1.2221, "step": 22150 }, { "epoch": 0.8529355149181905, "grad_norm": 1.1673319339752197, "learning_rate": 0.00012296578284146879, "loss": 1.0985, "step": 22155 }, { "epoch": 0.8531280076997113, "grad_norm": 1.2929120063781738, "learning_rate": 0.00012293635044735373, "loss": 1.1093, "step": 22160 }, { "epoch": 0.853320500481232, "grad_norm": 1.097374677658081, "learning_rate": 0.0001229069159558614, "loss": 1.1361, "step": 22165 }, { "epoch": 0.8535129932627527, "grad_norm": 1.5961623191833496, "learning_rate": 0.00012287747936968335, "loss": 1.0493, "step": 22170 }, { "epoch": 0.8537054860442733, "grad_norm": 1.378281593322754, "learning_rate": 0.00012284804069151133, "loss": 1.0436, "step": 22175 }, { "epoch": 0.853897978825794, "grad_norm": 0.4067041277885437, "learning_rate": 0.00012281859992403736, "loss": 1.0238, "step": 22180 }, { "epoch": 0.8540904716073148, "grad_norm": 1.24373197555542, "learning_rate": 0.00012278915706995358, "loss": 1.0049, "step": 22185 }, { "epoch": 0.8542829643888354, "grad_norm": 2.2100603580474854, "learning_rate": 0.00012275971213195237, "loss": 1.1251, "step": 22190 }, { "epoch": 0.8544754571703561, "grad_norm": 1.0543677806854248, "learning_rate": 0.00012273026511272622, "loss": 1.0744, "step": 22195 }, { "epoch": 0.8546679499518768, "grad_norm": 1.5290037393569946, "learning_rate": 0.00012270081601496792, "loss": 1.1105, "step": 22200 }, { "epoch": 0.8548604427333975, "grad_norm": 1.0767719745635986, "learning_rate": 0.00012267136484137042, "loss": 1.0246, "step": 22205 }, { "epoch": 0.8550529355149182, "grad_norm": 1.285439133644104, "learning_rate": 0.00012264191159462674, "loss": 0.9625, "step": 22210 }, { "epoch": 0.8552454282964389, "grad_norm": 1.0118284225463867, "learning_rate": 0.00012261245627743022, "loss": 0.8992, "step": 22215 }, { "epoch": 0.8554379210779596, "grad_norm": 1.4398612976074219, "learning_rate": 0.0001225829988924744, "loss": 1.2539, "step": 22220 }, { "epoch": 0.8556304138594802, "grad_norm": 0.7380401492118835, "learning_rate": 0.00012255353944245294, "loss": 0.9303, "step": 22225 }, { "epoch": 0.8558229066410009, "grad_norm": 1.2834627628326416, "learning_rate": 0.00012252407793005964, "loss": 1.0884, "step": 22230 }, { "epoch": 0.8560153994225217, "grad_norm": 1.7945185899734497, "learning_rate": 0.00012249461435798867, "loss": 1.1546, "step": 22235 }, { "epoch": 0.8562078922040424, "grad_norm": 1.4909627437591553, "learning_rate": 0.00012246514872893424, "loss": 1.097, "step": 22240 }, { "epoch": 0.856400384985563, "grad_norm": 1.3793891668319702, "learning_rate": 0.00012243568104559075, "loss": 1.0008, "step": 22245 }, { "epoch": 0.8565928777670837, "grad_norm": 1.5637750625610352, "learning_rate": 0.00012240621131065287, "loss": 1.0054, "step": 22250 }, { "epoch": 0.8567853705486044, "grad_norm": 1.0803687572479248, "learning_rate": 0.00012237673952681538, "loss": 1.0349, "step": 22255 }, { "epoch": 0.8569778633301252, "grad_norm": 1.220694661140442, "learning_rate": 0.00012234726569677328, "loss": 1.0886, "step": 22260 }, { "epoch": 0.8571703561116458, "grad_norm": 1.5394152402877808, "learning_rate": 0.0001223177898232218, "loss": 1.1316, "step": 22265 }, { "epoch": 0.8573628488931665, "grad_norm": 1.927856206893921, "learning_rate": 0.00012228831190885627, "loss": 1.1063, "step": 22270 }, { "epoch": 0.8575553416746872, "grad_norm": 2.0876247882843018, "learning_rate": 0.00012225883195637224, "loss": 1.0356, "step": 22275 }, { "epoch": 0.857747834456208, "grad_norm": 1.3493059873580933, "learning_rate": 0.00012222934996846551, "loss": 1.191, "step": 22280 }, { "epoch": 0.8579403272377286, "grad_norm": 2.021245002746582, "learning_rate": 0.00012219986594783194, "loss": 1.2042, "step": 22285 }, { "epoch": 0.8581328200192493, "grad_norm": 0.9374911189079285, "learning_rate": 0.00012217037989716774, "loss": 1.1407, "step": 22290 }, { "epoch": 0.85832531280077, "grad_norm": 1.2450497150421143, "learning_rate": 0.00012214089181916915, "loss": 1.0325, "step": 22295 }, { "epoch": 0.8585178055822906, "grad_norm": 1.8689814805984497, "learning_rate": 0.00012211140171653265, "loss": 1.086, "step": 22300 }, { "epoch": 0.8587102983638114, "grad_norm": 1.4810429811477661, "learning_rate": 0.00012208190959195496, "loss": 1.1968, "step": 22305 }, { "epoch": 0.8589027911453321, "grad_norm": 0.8686331510543823, "learning_rate": 0.00012205241544813289, "loss": 1.1492, "step": 22310 }, { "epoch": 0.8590952839268527, "grad_norm": 1.288923740386963, "learning_rate": 0.00012202291928776355, "loss": 1.1514, "step": 22315 }, { "epoch": 0.8592877767083734, "grad_norm": 1.5871915817260742, "learning_rate": 0.00012199342111354411, "loss": 0.9645, "step": 22320 }, { "epoch": 0.8594802694898941, "grad_norm": 1.8444538116455078, "learning_rate": 0.00012196392092817202, "loss": 1.173, "step": 22325 }, { "epoch": 0.8596727622714149, "grad_norm": 1.4887306690216064, "learning_rate": 0.00012193441873434481, "loss": 1.0281, "step": 22330 }, { "epoch": 0.8598652550529355, "grad_norm": 1.3933312892913818, "learning_rate": 0.00012190491453476036, "loss": 0.9568, "step": 22335 }, { "epoch": 0.8600577478344562, "grad_norm": 1.9519412517547607, "learning_rate": 0.00012187540833211657, "loss": 1.0294, "step": 22340 }, { "epoch": 0.8602502406159769, "grad_norm": 1.5969189405441284, "learning_rate": 0.00012184590012911158, "loss": 1.111, "step": 22345 }, { "epoch": 0.8604427333974976, "grad_norm": 1.068150281906128, "learning_rate": 0.00012181638992844377, "loss": 1.119, "step": 22350 }, { "epoch": 0.8606352261790183, "grad_norm": 1.8505868911743164, "learning_rate": 0.00012178687773281159, "loss": 1.1762, "step": 22355 }, { "epoch": 0.860827718960539, "grad_norm": 0.8779274225234985, "learning_rate": 0.00012175736354491381, "loss": 1.0762, "step": 22360 }, { "epoch": 0.8610202117420597, "grad_norm": 0.7734150886535645, "learning_rate": 0.0001217278473674492, "loss": 1.139, "step": 22365 }, { "epoch": 0.8612127045235803, "grad_norm": 1.2648577690124512, "learning_rate": 0.00012169832920311693, "loss": 0.9826, "step": 22370 }, { "epoch": 0.861405197305101, "grad_norm": 1.2812930345535278, "learning_rate": 0.00012166880905461618, "loss": 1.0287, "step": 22375 }, { "epoch": 0.8615976900866218, "grad_norm": 1.3110228776931763, "learning_rate": 0.00012163928692464637, "loss": 1.1135, "step": 22380 }, { "epoch": 0.8617901828681425, "grad_norm": 1.1708343029022217, "learning_rate": 0.00012160976281590713, "loss": 1.2012, "step": 22385 }, { "epoch": 0.8619826756496631, "grad_norm": 1.004078984260559, "learning_rate": 0.00012158023673109824, "loss": 0.978, "step": 22390 }, { "epoch": 0.8621751684311838, "grad_norm": 1.4461795091629028, "learning_rate": 0.00012155070867291965, "loss": 1.2483, "step": 22395 }, { "epoch": 0.8623676612127045, "grad_norm": 1.3903266191482544, "learning_rate": 0.00012152117864407149, "loss": 1.2962, "step": 22400 }, { "epoch": 0.8625601539942253, "grad_norm": 1.88125479221344, "learning_rate": 0.00012149164664725414, "loss": 1.1184, "step": 22405 }, { "epoch": 0.8627526467757459, "grad_norm": 1.5932334661483765, "learning_rate": 0.00012146211268516805, "loss": 1.117, "step": 22410 }, { "epoch": 0.8629451395572666, "grad_norm": 1.5401246547698975, "learning_rate": 0.00012143257676051394, "loss": 1.1546, "step": 22415 }, { "epoch": 0.8631376323387873, "grad_norm": 1.215257167816162, "learning_rate": 0.00012140303887599269, "loss": 1.139, "step": 22420 }, { "epoch": 0.8633301251203079, "grad_norm": 1.1622364521026611, "learning_rate": 0.00012137349903430528, "loss": 1.1076, "step": 22425 }, { "epoch": 0.8635226179018287, "grad_norm": 1.5525301694869995, "learning_rate": 0.00012134395723815297, "loss": 0.8998, "step": 22430 }, { "epoch": 0.8637151106833494, "grad_norm": 1.1511106491088867, "learning_rate": 0.00012131441349023718, "loss": 1.0576, "step": 22435 }, { "epoch": 0.86390760346487, "grad_norm": 1.3346484899520874, "learning_rate": 0.00012128486779325947, "loss": 1.1717, "step": 22440 }, { "epoch": 0.8641000962463907, "grad_norm": 1.0038443803787231, "learning_rate": 0.0001212553201499216, "loss": 1.0569, "step": 22445 }, { "epoch": 0.8642925890279115, "grad_norm": 1.3039393424987793, "learning_rate": 0.00012122577056292548, "loss": 1.117, "step": 22450 }, { "epoch": 0.8644850818094322, "grad_norm": 1.9241334199905396, "learning_rate": 0.00012119621903497329, "loss": 1.3177, "step": 22455 }, { "epoch": 0.8646775745909528, "grad_norm": 1.7553646564483643, "learning_rate": 0.00012116666556876725, "loss": 1.1536, "step": 22460 }, { "epoch": 0.8648700673724735, "grad_norm": 1.4490256309509277, "learning_rate": 0.00012113711016700986, "loss": 1.0066, "step": 22465 }, { "epoch": 0.8650625601539942, "grad_norm": 0.8614715337753296, "learning_rate": 0.0001211075528324038, "loss": 1.0534, "step": 22470 }, { "epoch": 0.865255052935515, "grad_norm": 1.0804800987243652, "learning_rate": 0.00012107799356765181, "loss": 1.0743, "step": 22475 }, { "epoch": 0.8654475457170356, "grad_norm": 1.186589241027832, "learning_rate": 0.00012104843237545695, "loss": 1.1916, "step": 22480 }, { "epoch": 0.8656400384985563, "grad_norm": 1.1526156663894653, "learning_rate": 0.00012101886925852242, "loss": 1.069, "step": 22485 }, { "epoch": 0.865832531280077, "grad_norm": 1.382684350013733, "learning_rate": 0.0001209893042195515, "loss": 1.1266, "step": 22490 }, { "epoch": 0.8660250240615976, "grad_norm": 1.9565086364746094, "learning_rate": 0.00012095973726124774, "loss": 1.3461, "step": 22495 }, { "epoch": 0.8662175168431184, "grad_norm": 0.820969820022583, "learning_rate": 0.0001209301683863149, "loss": 0.9658, "step": 22500 }, { "epoch": 0.8664100096246391, "grad_norm": 1.1080729961395264, "learning_rate": 0.00012090059759745677, "loss": 1.0718, "step": 22505 }, { "epoch": 0.8666025024061598, "grad_norm": 1.6945521831512451, "learning_rate": 0.00012087102489737746, "loss": 1.1481, "step": 22510 }, { "epoch": 0.8667949951876804, "grad_norm": 1.9991236925125122, "learning_rate": 0.00012084145028878117, "loss": 1.06, "step": 22515 }, { "epoch": 0.8669874879692011, "grad_norm": 1.1101261377334595, "learning_rate": 0.00012081187377437233, "loss": 1.1425, "step": 22520 }, { "epoch": 0.8671799807507219, "grad_norm": 0.9426703453063965, "learning_rate": 0.00012078229535685548, "loss": 1.1868, "step": 22525 }, { "epoch": 0.8673724735322426, "grad_norm": 1.0097079277038574, "learning_rate": 0.0001207527150389354, "loss": 1.1158, "step": 22530 }, { "epoch": 0.8675649663137632, "grad_norm": 1.3798326253890991, "learning_rate": 0.000120723132823317, "loss": 1.217, "step": 22535 }, { "epoch": 0.8677574590952839, "grad_norm": 1.17965567111969, "learning_rate": 0.00012069354871270538, "loss": 1.1504, "step": 22540 }, { "epoch": 0.8679499518768046, "grad_norm": 0.9790163040161133, "learning_rate": 0.00012066396270980583, "loss": 1.1189, "step": 22545 }, { "epoch": 0.8681424446583254, "grad_norm": 1.134660005569458, "learning_rate": 0.00012063437481732375, "loss": 1.1093, "step": 22550 }, { "epoch": 0.868334937439846, "grad_norm": 2.053043842315674, "learning_rate": 0.0001206047850379648, "loss": 1.2373, "step": 22555 }, { "epoch": 0.8685274302213667, "grad_norm": 1.0980230569839478, "learning_rate": 0.00012057519337443477, "loss": 1.0597, "step": 22560 }, { "epoch": 0.8687199230028874, "grad_norm": 0.8968497514724731, "learning_rate": 0.00012054559982943958, "loss": 1.0963, "step": 22565 }, { "epoch": 0.868912415784408, "grad_norm": 1.183481216430664, "learning_rate": 0.00012051600440568538, "loss": 1.0797, "step": 22570 }, { "epoch": 0.8691049085659288, "grad_norm": 1.0228111743927002, "learning_rate": 0.00012048640710587848, "loss": 1.1906, "step": 22575 }, { "epoch": 0.8692974013474495, "grad_norm": 1.070470929145813, "learning_rate": 0.00012045680793272537, "loss": 1.0835, "step": 22580 }, { "epoch": 0.8694898941289702, "grad_norm": 1.7659672498703003, "learning_rate": 0.0001204272068889327, "loss": 1.0718, "step": 22585 }, { "epoch": 0.8696823869104908, "grad_norm": 1.503348469734192, "learning_rate": 0.00012039760397720727, "loss": 1.052, "step": 22590 }, { "epoch": 0.8698748796920116, "grad_norm": 1.4245517253875732, "learning_rate": 0.00012036799920025607, "loss": 1.1893, "step": 22595 }, { "epoch": 0.8700673724735323, "grad_norm": 1.6989365816116333, "learning_rate": 0.0001203383925607863, "loss": 1.1963, "step": 22600 }, { "epoch": 0.870259865255053, "grad_norm": 0.9346055388450623, "learning_rate": 0.0001203087840615052, "loss": 1.2617, "step": 22605 }, { "epoch": 0.8704523580365736, "grad_norm": 1.197245717048645, "learning_rate": 0.00012027917370512039, "loss": 1.0629, "step": 22610 }, { "epoch": 0.8706448508180943, "grad_norm": 1.3511501550674438, "learning_rate": 0.00012024956149433946, "loss": 1.2573, "step": 22615 }, { "epoch": 0.8708373435996151, "grad_norm": 1.259965181350708, "learning_rate": 0.00012021994743187026, "loss": 1.03, "step": 22620 }, { "epoch": 0.8710298363811357, "grad_norm": 0.90989089012146, "learning_rate": 0.00012019033152042083, "loss": 1.1222, "step": 22625 }, { "epoch": 0.8712223291626564, "grad_norm": 1.5417457818984985, "learning_rate": 0.00012016071376269932, "loss": 1.2125, "step": 22630 }, { "epoch": 0.8714148219441771, "grad_norm": 1.0979586839675903, "learning_rate": 0.00012013109416141408, "loss": 1.1149, "step": 22635 }, { "epoch": 0.8716073147256977, "grad_norm": 1.3153108358383179, "learning_rate": 0.00012010147271927367, "loss": 0.9733, "step": 22640 }, { "epoch": 0.8717998075072185, "grad_norm": 1.1540855169296265, "learning_rate": 0.0001200718494389867, "loss": 1.1337, "step": 22645 }, { "epoch": 0.8719923002887392, "grad_norm": 2.1098320484161377, "learning_rate": 0.00012004222432326211, "loss": 1.2008, "step": 22650 }, { "epoch": 0.8721847930702599, "grad_norm": 2.331286668777466, "learning_rate": 0.00012001259737480883, "loss": 1.3357, "step": 22655 }, { "epoch": 0.8723772858517805, "grad_norm": 0.9849653244018555, "learning_rate": 0.00011998296859633613, "loss": 0.9834, "step": 22660 }, { "epoch": 0.8725697786333012, "grad_norm": 1.9478217363357544, "learning_rate": 0.0001199533379905533, "loss": 1.1192, "step": 22665 }, { "epoch": 0.872762271414822, "grad_norm": 1.009040355682373, "learning_rate": 0.0001199237055601699, "loss": 1.2274, "step": 22670 }, { "epoch": 0.8729547641963427, "grad_norm": 1.4086748361587524, "learning_rate": 0.00011989407130789558, "loss": 1.028, "step": 22675 }, { "epoch": 0.8731472569778633, "grad_norm": 1.224057674407959, "learning_rate": 0.00011986443523644025, "loss": 1.046, "step": 22680 }, { "epoch": 0.873339749759384, "grad_norm": 1.7258949279785156, "learning_rate": 0.00011983479734851393, "loss": 1.1056, "step": 22685 }, { "epoch": 0.8735322425409047, "grad_norm": 1.5146769285202026, "learning_rate": 0.00011980515764682674, "loss": 1.3491, "step": 22690 }, { "epoch": 0.8737247353224254, "grad_norm": 1.7950037717819214, "learning_rate": 0.0001197755161340891, "loss": 1.0302, "step": 22695 }, { "epoch": 0.8739172281039461, "grad_norm": 1.0561470985412598, "learning_rate": 0.00011974587281301151, "loss": 1.026, "step": 22700 }, { "epoch": 0.8741097208854668, "grad_norm": 2.0204522609710693, "learning_rate": 0.00011971622768630466, "loss": 0.999, "step": 22705 }, { "epoch": 0.8743022136669875, "grad_norm": 1.23675537109375, "learning_rate": 0.00011968658075667938, "loss": 0.9875, "step": 22710 }, { "epoch": 0.8744947064485081, "grad_norm": 1.2208534479141235, "learning_rate": 0.00011965693202684671, "loss": 1.1022, "step": 22715 }, { "epoch": 0.8746871992300289, "grad_norm": 1.2948905229568481, "learning_rate": 0.00011962728149951785, "loss": 1.1026, "step": 22720 }, { "epoch": 0.8748796920115496, "grad_norm": 1.0135208368301392, "learning_rate": 0.00011959762917740407, "loss": 1.1093, "step": 22725 }, { "epoch": 0.8750721847930703, "grad_norm": 1.3146969079971313, "learning_rate": 0.00011956797506321695, "loss": 1.0676, "step": 22730 }, { "epoch": 0.8752646775745909, "grad_norm": 1.5172390937805176, "learning_rate": 0.00011953831915966813, "loss": 1.0981, "step": 22735 }, { "epoch": 0.8754571703561116, "grad_norm": 1.0460716485977173, "learning_rate": 0.00011950866146946946, "loss": 1.1096, "step": 22740 }, { "epoch": 0.8756496631376324, "grad_norm": 1.5505667924880981, "learning_rate": 0.00011947900199533291, "loss": 1.1211, "step": 22745 }, { "epoch": 0.875842155919153, "grad_norm": 1.3323757648468018, "learning_rate": 0.00011944934073997069, "loss": 1.0103, "step": 22750 }, { "epoch": 0.8760346487006737, "grad_norm": 1.1330286264419556, "learning_rate": 0.0001194196777060951, "loss": 1.0935, "step": 22755 }, { "epoch": 0.8762271414821944, "grad_norm": 1.3528751134872437, "learning_rate": 0.00011939001289641863, "loss": 1.0812, "step": 22760 }, { "epoch": 0.8764196342637152, "grad_norm": 1.3828253746032715, "learning_rate": 0.0001193603463136539, "loss": 1.0722, "step": 22765 }, { "epoch": 0.8766121270452358, "grad_norm": 1.1661975383758545, "learning_rate": 0.00011933067796051378, "loss": 1.0631, "step": 22770 }, { "epoch": 0.8768046198267565, "grad_norm": 1.3716634511947632, "learning_rate": 0.00011930100783971118, "loss": 1.0528, "step": 22775 }, { "epoch": 0.8769971126082772, "grad_norm": 1.4899778366088867, "learning_rate": 0.00011927133595395932, "loss": 1.0373, "step": 22780 }, { "epoch": 0.8771896053897978, "grad_norm": 1.0731201171875, "learning_rate": 0.0001192416623059714, "loss": 1.1288, "step": 22785 }, { "epoch": 0.8773820981713186, "grad_norm": 1.1406874656677246, "learning_rate": 0.00011921198689846094, "loss": 1.0467, "step": 22790 }, { "epoch": 0.8775745909528393, "grad_norm": 1.203275442123413, "learning_rate": 0.00011918230973414157, "loss": 1.1394, "step": 22795 }, { "epoch": 0.87776708373436, "grad_norm": 1.0929608345031738, "learning_rate": 0.000119152630815727, "loss": 1.0472, "step": 22800 }, { "epoch": 0.8779595765158806, "grad_norm": 0.9629132747650146, "learning_rate": 0.00011912295014593124, "loss": 1.0325, "step": 22805 }, { "epoch": 0.8781520692974013, "grad_norm": 1.3864428997039795, "learning_rate": 0.00011909326772746839, "loss": 0.9702, "step": 22810 }, { "epoch": 0.8783445620789221, "grad_norm": 2.243774175643921, "learning_rate": 0.00011906358356305265, "loss": 1.1504, "step": 22815 }, { "epoch": 0.8785370548604428, "grad_norm": 1.1354432106018066, "learning_rate": 0.0001190338976553985, "loss": 1.0098, "step": 22820 }, { "epoch": 0.8787295476419634, "grad_norm": 1.9807575941085815, "learning_rate": 0.00011900421000722049, "loss": 1.1305, "step": 22825 }, { "epoch": 0.8789220404234841, "grad_norm": 1.0899982452392578, "learning_rate": 0.00011897452062123338, "loss": 1.0502, "step": 22830 }, { "epoch": 0.8791145332050048, "grad_norm": 1.194968819618225, "learning_rate": 0.00011894482950015202, "loss": 1.0947, "step": 22835 }, { "epoch": 0.8793070259865255, "grad_norm": 1.032558560371399, "learning_rate": 0.00011891513664669152, "loss": 1.3069, "step": 22840 }, { "epoch": 0.8794995187680462, "grad_norm": 1.0530674457550049, "learning_rate": 0.00011888544206356709, "loss": 1.0373, "step": 22845 }, { "epoch": 0.8796920115495669, "grad_norm": 2.1762609481811523, "learning_rate": 0.00011885574575349407, "loss": 1.1478, "step": 22850 }, { "epoch": 0.8798845043310876, "grad_norm": 1.5496394634246826, "learning_rate": 0.00011882604771918802, "loss": 1.078, "step": 22855 }, { "epoch": 0.8800769971126082, "grad_norm": 1.5198618173599243, "learning_rate": 0.0001187963479633646, "loss": 0.9666, "step": 22860 }, { "epoch": 0.880269489894129, "grad_norm": 2.006168842315674, "learning_rate": 0.00011876664648873969, "loss": 1.0162, "step": 22865 }, { "epoch": 0.8804619826756497, "grad_norm": 1.6335991621017456, "learning_rate": 0.0001187369432980293, "loss": 1.2807, "step": 22870 }, { "epoch": 0.8806544754571703, "grad_norm": 1.4370218515396118, "learning_rate": 0.00011870723839394953, "loss": 1.0569, "step": 22875 }, { "epoch": 0.880846968238691, "grad_norm": 1.4292523860931396, "learning_rate": 0.00011867753177921675, "loss": 1.0509, "step": 22880 }, { "epoch": 0.8810394610202117, "grad_norm": 0.8672935366630554, "learning_rate": 0.00011864782345654739, "loss": 1.0674, "step": 22885 }, { "epoch": 0.8812319538017325, "grad_norm": 2.000182628631592, "learning_rate": 0.00011861811342865814, "loss": 1.142, "step": 22890 }, { "epoch": 0.8814244465832531, "grad_norm": 1.2838269472122192, "learning_rate": 0.00011858840169826573, "loss": 1.2496, "step": 22895 }, { "epoch": 0.8816169393647738, "grad_norm": 0.8586519956588745, "learning_rate": 0.00011855868826808712, "loss": 1.0047, "step": 22900 }, { "epoch": 0.8818094321462945, "grad_norm": 0.9435043334960938, "learning_rate": 0.00011852897314083942, "loss": 0.9603, "step": 22905 }, { "epoch": 0.8820019249278153, "grad_norm": 1.5908135175704956, "learning_rate": 0.00011849925631923986, "loss": 1.1064, "step": 22910 }, { "epoch": 0.8821944177093359, "grad_norm": 1.6797462701797485, "learning_rate": 0.00011846953780600585, "loss": 1.1746, "step": 22915 }, { "epoch": 0.8823869104908566, "grad_norm": 1.5768578052520752, "learning_rate": 0.00011843981760385496, "loss": 1.153, "step": 22920 }, { "epoch": 0.8825794032723773, "grad_norm": 1.136809229850769, "learning_rate": 0.0001184100957155049, "loss": 1.1523, "step": 22925 }, { "epoch": 0.8827718960538979, "grad_norm": 1.0501734018325806, "learning_rate": 0.00011838037214367354, "loss": 1.0341, "step": 22930 }, { "epoch": 0.8829643888354187, "grad_norm": 1.3567155599594116, "learning_rate": 0.00011835064689107893, "loss": 1.1462, "step": 22935 }, { "epoch": 0.8831568816169394, "grad_norm": 2.095165729522705, "learning_rate": 0.00011832091996043921, "loss": 1.1365, "step": 22940 }, { "epoch": 0.8833493743984601, "grad_norm": 1.119279384613037, "learning_rate": 0.00011829119135447274, "loss": 1.1334, "step": 22945 }, { "epoch": 0.8835418671799807, "grad_norm": 0.8329470157623291, "learning_rate": 0.00011826146107589795, "loss": 0.9907, "step": 22950 }, { "epoch": 0.8837343599615014, "grad_norm": 1.6559773683547974, "learning_rate": 0.00011823172912743355, "loss": 1.0937, "step": 22955 }, { "epoch": 0.8839268527430222, "grad_norm": 1.1509395837783813, "learning_rate": 0.00011820199551179827, "loss": 1.213, "step": 22960 }, { "epoch": 0.8841193455245429, "grad_norm": 1.926554799079895, "learning_rate": 0.00011817226023171109, "loss": 1.1967, "step": 22965 }, { "epoch": 0.8843118383060635, "grad_norm": 1.3691015243530273, "learning_rate": 0.00011814252328989111, "loss": 1.2489, "step": 22970 }, { "epoch": 0.8845043310875842, "grad_norm": 1.807802438735962, "learning_rate": 0.00011811278468905753, "loss": 1.2371, "step": 22975 }, { "epoch": 0.8846968238691049, "grad_norm": 1.0577672719955444, "learning_rate": 0.00011808304443192982, "loss": 0.9763, "step": 22980 }, { "epoch": 0.8848893166506256, "grad_norm": 1.69723379611969, "learning_rate": 0.00011805330252122743, "loss": 1.3159, "step": 22985 }, { "epoch": 0.8850818094321463, "grad_norm": 2.4447522163391113, "learning_rate": 0.00011802355895967017, "loss": 1.1288, "step": 22990 }, { "epoch": 0.885274302213667, "grad_norm": 1.1180788278579712, "learning_rate": 0.0001179938137499778, "loss": 1.2414, "step": 22995 }, { "epoch": 0.8854667949951877, "grad_norm": 1.8794695138931274, "learning_rate": 0.00011796406689487038, "loss": 1.1329, "step": 23000 }, { "epoch": 0.8856592877767083, "grad_norm": 1.1928154230117798, "learning_rate": 0.00011793431839706803, "loss": 1.0499, "step": 23005 }, { "epoch": 0.8858517805582291, "grad_norm": 0.8071653842926025, "learning_rate": 0.00011790456825929106, "loss": 0.9116, "step": 23010 }, { "epoch": 0.8860442733397498, "grad_norm": 1.4556511640548706, "learning_rate": 0.00011787481648425995, "loss": 0.9842, "step": 23015 }, { "epoch": 0.8862367661212704, "grad_norm": 1.4011625051498413, "learning_rate": 0.00011784506307469527, "loss": 1.1377, "step": 23020 }, { "epoch": 0.8864292589027911, "grad_norm": 1.596177339553833, "learning_rate": 0.00011781530803331778, "loss": 1.2584, "step": 23025 }, { "epoch": 0.8866217516843118, "grad_norm": 1.4121986627578735, "learning_rate": 0.00011778555136284839, "loss": 1.0889, "step": 23030 }, { "epoch": 0.8868142444658326, "grad_norm": 1.7569849491119385, "learning_rate": 0.00011775579306600814, "loss": 1.0882, "step": 23035 }, { "epoch": 0.8870067372473532, "grad_norm": 2.0657474994659424, "learning_rate": 0.00011772603314551825, "loss": 1.0512, "step": 23040 }, { "epoch": 0.8871992300288739, "grad_norm": 1.419628620147705, "learning_rate": 0.00011769627160410006, "loss": 1.2821, "step": 23045 }, { "epoch": 0.8873917228103946, "grad_norm": 2.094897747039795, "learning_rate": 0.00011766650844447505, "loss": 1.0668, "step": 23050 }, { "epoch": 0.8875842155919152, "grad_norm": 1.6981192827224731, "learning_rate": 0.00011763674366936485, "loss": 1.1257, "step": 23055 }, { "epoch": 0.887776708373436, "grad_norm": 2.0729424953460693, "learning_rate": 0.0001176069772814913, "loss": 1.1388, "step": 23060 }, { "epoch": 0.8879692011549567, "grad_norm": 1.7522398233413696, "learning_rate": 0.0001175772092835763, "loss": 1.3847, "step": 23065 }, { "epoch": 0.8881616939364774, "grad_norm": 1.2705082893371582, "learning_rate": 0.00011754743967834197, "loss": 1.042, "step": 23070 }, { "epoch": 0.888354186717998, "grad_norm": 1.9361348152160645, "learning_rate": 0.0001175176684685105, "loss": 1.1387, "step": 23075 }, { "epoch": 0.8885466794995188, "grad_norm": 2.4146616458892822, "learning_rate": 0.00011748789565680429, "loss": 1.1017, "step": 23080 }, { "epoch": 0.8887391722810395, "grad_norm": 1.01850163936615, "learning_rate": 0.00011745812124594589, "loss": 0.9676, "step": 23085 }, { "epoch": 0.8889316650625602, "grad_norm": 1.8511348962783813, "learning_rate": 0.00011742834523865796, "loss": 1.0274, "step": 23090 }, { "epoch": 0.8891241578440808, "grad_norm": 1.0579948425292969, "learning_rate": 0.0001173985676376633, "loss": 0.9528, "step": 23095 }, { "epoch": 0.8893166506256015, "grad_norm": 1.9102215766906738, "learning_rate": 0.00011736878844568486, "loss": 1.0718, "step": 23100 }, { "epoch": 0.8895091434071223, "grad_norm": 1.7665621042251587, "learning_rate": 0.00011733900766544579, "loss": 0.9721, "step": 23105 }, { "epoch": 0.889701636188643, "grad_norm": 1.6327630281448364, "learning_rate": 0.00011730922529966934, "loss": 1.1314, "step": 23110 }, { "epoch": 0.8898941289701636, "grad_norm": 1.567986011505127, "learning_rate": 0.00011727944135107889, "loss": 1.1881, "step": 23115 }, { "epoch": 0.8900866217516843, "grad_norm": 1.3827725648880005, "learning_rate": 0.00011724965582239798, "loss": 1.0747, "step": 23120 }, { "epoch": 0.890279114533205, "grad_norm": 1.1218315362930298, "learning_rate": 0.00011721986871635034, "loss": 1.0224, "step": 23125 }, { "epoch": 0.8904716073147257, "grad_norm": 0.9669145941734314, "learning_rate": 0.00011719008003565975, "loss": 1.1214, "step": 23130 }, { "epoch": 0.8906641000962464, "grad_norm": 2.2118616104125977, "learning_rate": 0.00011716028978305023, "loss": 1.0058, "step": 23135 }, { "epoch": 0.8908565928777671, "grad_norm": 1.0324665307998657, "learning_rate": 0.00011713049796124589, "loss": 1.0645, "step": 23140 }, { "epoch": 0.8910490856592878, "grad_norm": 0.9733279347419739, "learning_rate": 0.00011710070457297097, "loss": 1.1258, "step": 23145 }, { "epoch": 0.8912415784408084, "grad_norm": 0.8568596243858337, "learning_rate": 0.00011707090962094991, "loss": 1.046, "step": 23150 }, { "epoch": 0.8914340712223292, "grad_norm": 1.0557111501693726, "learning_rate": 0.00011704111310790728, "loss": 1.0856, "step": 23155 }, { "epoch": 0.8916265640038499, "grad_norm": 1.436998724937439, "learning_rate": 0.00011701131503656771, "loss": 1.0475, "step": 23160 }, { "epoch": 0.8918190567853705, "grad_norm": 1.3652065992355347, "learning_rate": 0.00011698151540965607, "loss": 1.2369, "step": 23165 }, { "epoch": 0.8920115495668912, "grad_norm": 1.6808583736419678, "learning_rate": 0.00011695171422989735, "loss": 1.2898, "step": 23170 }, { "epoch": 0.8922040423484119, "grad_norm": 1.468636155128479, "learning_rate": 0.00011692191150001666, "loss": 1.1976, "step": 23175 }, { "epoch": 0.8923965351299327, "grad_norm": 1.6479716300964355, "learning_rate": 0.00011689210722273925, "loss": 0.9855, "step": 23180 }, { "epoch": 0.8925890279114533, "grad_norm": 1.1517993211746216, "learning_rate": 0.00011686230140079054, "loss": 1.0893, "step": 23185 }, { "epoch": 0.892781520692974, "grad_norm": 1.0213438272476196, "learning_rate": 0.00011683249403689606, "loss": 0.9994, "step": 23190 }, { "epoch": 0.8929740134744947, "grad_norm": 0.9689280390739441, "learning_rate": 0.00011680268513378152, "loss": 0.9765, "step": 23195 }, { "epoch": 0.8931665062560153, "grad_norm": 1.0091511011123657, "learning_rate": 0.00011677287469417272, "loss": 1.143, "step": 23200 }, { "epoch": 0.8933589990375361, "grad_norm": 1.0775808095932007, "learning_rate": 0.00011674306272079567, "loss": 0.9407, "step": 23205 }, { "epoch": 0.8935514918190568, "grad_norm": 2.0991709232330322, "learning_rate": 0.00011671324921637641, "loss": 1.1681, "step": 23210 }, { "epoch": 0.8937439846005775, "grad_norm": 1.5666394233703613, "learning_rate": 0.00011668343418364122, "loss": 1.2306, "step": 23215 }, { "epoch": 0.8939364773820981, "grad_norm": 1.256421685218811, "learning_rate": 0.00011665361762531653, "loss": 1.092, "step": 23220 }, { "epoch": 0.8941289701636189, "grad_norm": 1.5252572298049927, "learning_rate": 0.00011662379954412879, "loss": 1.0102, "step": 23225 }, { "epoch": 0.8943214629451396, "grad_norm": 1.1907771825790405, "learning_rate": 0.00011659397994280472, "loss": 1.0496, "step": 23230 }, { "epoch": 0.8945139557266603, "grad_norm": 1.2852728366851807, "learning_rate": 0.00011656415882407113, "loss": 1.093, "step": 23235 }, { "epoch": 0.8947064485081809, "grad_norm": 1.3600996732711792, "learning_rate": 0.0001165343361906549, "loss": 1.0328, "step": 23240 }, { "epoch": 0.8948989412897016, "grad_norm": 1.609398603439331, "learning_rate": 0.00011650451204528316, "loss": 1.2157, "step": 23245 }, { "epoch": 0.8950914340712224, "grad_norm": 0.9227400422096252, "learning_rate": 0.00011647468639068318, "loss": 1.0264, "step": 23250 }, { "epoch": 0.895283926852743, "grad_norm": 1.7690601348876953, "learning_rate": 0.00011644485922958222, "loss": 1.3007, "step": 23255 }, { "epoch": 0.8954764196342637, "grad_norm": 0.9615120887756348, "learning_rate": 0.00011641503056470782, "loss": 0.9696, "step": 23260 }, { "epoch": 0.8956689124157844, "grad_norm": 1.3075053691864014, "learning_rate": 0.00011638520039878762, "loss": 1.0539, "step": 23265 }, { "epoch": 0.8958614051973051, "grad_norm": 1.8990029096603394, "learning_rate": 0.00011635536873454937, "loss": 1.2406, "step": 23270 }, { "epoch": 0.8960538979788258, "grad_norm": 1.562389850616455, "learning_rate": 0.00011632553557472101, "loss": 1.1273, "step": 23275 }, { "epoch": 0.8962463907603465, "grad_norm": 0.8242204785346985, "learning_rate": 0.00011629570092203059, "loss": 0.991, "step": 23280 }, { "epoch": 0.8964388835418672, "grad_norm": 1.8995356559753418, "learning_rate": 0.00011626586477920625, "loss": 1.3523, "step": 23285 }, { "epoch": 0.8966313763233879, "grad_norm": 1.2772774696350098, "learning_rate": 0.00011623602714897632, "loss": 1.1427, "step": 23290 }, { "epoch": 0.8968238691049085, "grad_norm": 1.5422502756118774, "learning_rate": 0.00011620618803406929, "loss": 1.0688, "step": 23295 }, { "epoch": 0.8970163618864293, "grad_norm": 1.4164756536483765, "learning_rate": 0.00011617634743721369, "loss": 1.0575, "step": 23300 }, { "epoch": 0.89720885466795, "grad_norm": 0.9892400503158569, "learning_rate": 0.0001161465053611383, "loss": 1.0522, "step": 23305 }, { "epoch": 0.8974013474494706, "grad_norm": 1.217403531074524, "learning_rate": 0.00011611666180857193, "loss": 1.2071, "step": 23310 }, { "epoch": 0.8975938402309913, "grad_norm": 1.9891669750213623, "learning_rate": 0.00011608681678224362, "loss": 1.0608, "step": 23315 }, { "epoch": 0.897786333012512, "grad_norm": 1.7623512744903564, "learning_rate": 0.00011605697028488244, "loss": 1.0479, "step": 23320 }, { "epoch": 0.8979788257940328, "grad_norm": 1.8815721273422241, "learning_rate": 0.00011602712231921775, "loss": 1.2632, "step": 23325 }, { "epoch": 0.8981713185755534, "grad_norm": 1.2247025966644287, "learning_rate": 0.00011599727288797888, "loss": 1.0253, "step": 23330 }, { "epoch": 0.8983638113570741, "grad_norm": 1.4108113050460815, "learning_rate": 0.00011596742199389534, "loss": 1.1154, "step": 23335 }, { "epoch": 0.8985563041385948, "grad_norm": 1.6885722875595093, "learning_rate": 0.00011593756963969686, "loss": 1.0641, "step": 23340 }, { "epoch": 0.8987487969201154, "grad_norm": 1.6542564630508423, "learning_rate": 0.00011590771582811324, "loss": 0.914, "step": 23345 }, { "epoch": 0.8989412897016362, "grad_norm": 1.0524113178253174, "learning_rate": 0.00011587786056187435, "loss": 1.0983, "step": 23350 }, { "epoch": 0.8991337824831569, "grad_norm": 1.223191499710083, "learning_rate": 0.00011584800384371028, "loss": 0.996, "step": 23355 }, { "epoch": 0.8993262752646776, "grad_norm": 1.6196818351745605, "learning_rate": 0.00011581814567635128, "loss": 0.973, "step": 23360 }, { "epoch": 0.8995187680461982, "grad_norm": 1.0416576862335205, "learning_rate": 0.00011578828606252763, "loss": 1.0055, "step": 23365 }, { "epoch": 0.8997112608277189, "grad_norm": 0.8480489253997803, "learning_rate": 0.00011575842500496979, "loss": 1.039, "step": 23370 }, { "epoch": 0.8999037536092397, "grad_norm": 1.4879505634307861, "learning_rate": 0.0001157285625064084, "loss": 1.1408, "step": 23375 }, { "epoch": 0.9000962463907604, "grad_norm": 1.2672336101531982, "learning_rate": 0.00011569869856957416, "loss": 0.9923, "step": 23380 }, { "epoch": 0.900288739172281, "grad_norm": 1.147540807723999, "learning_rate": 0.0001156688331971979, "loss": 1.04, "step": 23385 }, { "epoch": 0.9004812319538017, "grad_norm": 1.0331413745880127, "learning_rate": 0.00011563896639201068, "loss": 1.0502, "step": 23390 }, { "epoch": 0.9006737247353225, "grad_norm": 1.2713568210601807, "learning_rate": 0.00011560909815674354, "loss": 1.109, "step": 23395 }, { "epoch": 0.9008662175168431, "grad_norm": 1.5601911544799805, "learning_rate": 0.0001155792284941278, "loss": 0.9567, "step": 23400 }, { "epoch": 0.9010587102983638, "grad_norm": 2.2069153785705566, "learning_rate": 0.00011554935740689483, "loss": 1.1025, "step": 23405 }, { "epoch": 0.9012512030798845, "grad_norm": 1.4782518148422241, "learning_rate": 0.00011551948489777611, "loss": 1.1219, "step": 23410 }, { "epoch": 0.9014436958614052, "grad_norm": 1.1145074367523193, "learning_rate": 0.00011548961096950332, "loss": 0.9797, "step": 23415 }, { "epoch": 0.9016361886429259, "grad_norm": 1.0069323778152466, "learning_rate": 0.00011545973562480821, "loss": 1.1028, "step": 23420 }, { "epoch": 0.9018286814244466, "grad_norm": 1.3039555549621582, "learning_rate": 0.00011542985886642268, "loss": 0.9536, "step": 23425 }, { "epoch": 0.9020211742059673, "grad_norm": 1.2636560201644897, "learning_rate": 0.00011539998069707876, "loss": 1.1687, "step": 23430 }, { "epoch": 0.902213666987488, "grad_norm": 1.4051100015640259, "learning_rate": 0.00011537010111950866, "loss": 1.1032, "step": 23435 }, { "epoch": 0.9024061597690086, "grad_norm": 1.3477351665496826, "learning_rate": 0.00011534022013644459, "loss": 1.0785, "step": 23440 }, { "epoch": 0.9025986525505294, "grad_norm": 1.7776321172714233, "learning_rate": 0.00011531033775061901, "loss": 0.9295, "step": 23445 }, { "epoch": 0.9027911453320501, "grad_norm": 1.9729288816452026, "learning_rate": 0.0001152804539647645, "loss": 1.1657, "step": 23450 }, { "epoch": 0.9029836381135707, "grad_norm": 0.9185901284217834, "learning_rate": 0.00011525056878161367, "loss": 1.0089, "step": 23455 }, { "epoch": 0.9031761308950914, "grad_norm": 1.6784918308258057, "learning_rate": 0.00011522068220389934, "loss": 1.0847, "step": 23460 }, { "epoch": 0.9033686236766121, "grad_norm": 1.261899709701538, "learning_rate": 0.00011519079423435449, "loss": 1.0281, "step": 23465 }, { "epoch": 0.9035611164581329, "grad_norm": 1.1247740983963013, "learning_rate": 0.0001151609048757121, "loss": 1.143, "step": 23470 }, { "epoch": 0.9037536092396535, "grad_norm": 1.3260306119918823, "learning_rate": 0.0001151310141307054, "loss": 1.022, "step": 23475 }, { "epoch": 0.9039461020211742, "grad_norm": 1.0106364488601685, "learning_rate": 0.00011510112200206769, "loss": 1.0949, "step": 23480 }, { "epoch": 0.9041385948026949, "grad_norm": 1.2663025856018066, "learning_rate": 0.00011507122849253243, "loss": 1.1526, "step": 23485 }, { "epoch": 0.9043310875842155, "grad_norm": 1.2372430562973022, "learning_rate": 0.00011504731269249489, "loss": 1.0767, "step": 23490 }, { "epoch": 0.9045235803657363, "grad_norm": 1.1163527965545654, "learning_rate": 0.00011501741670423267, "loss": 1.0479, "step": 23495 }, { "epoch": 0.904716073147257, "grad_norm": 1.766180157661438, "learning_rate": 0.00011498751934272714, "loss": 1.0545, "step": 23500 }, { "epoch": 0.9049085659287777, "grad_norm": 1.2410964965820312, "learning_rate": 0.00011495762061071227, "loss": 1.0771, "step": 23505 }, { "epoch": 0.9051010587102983, "grad_norm": 1.8052672147750854, "learning_rate": 0.00011492772051092206, "loss": 1.2158, "step": 23510 }, { "epoch": 0.905293551491819, "grad_norm": 1.9236223697662354, "learning_rate": 0.00011489781904609071, "loss": 1.147, "step": 23515 }, { "epoch": 0.9054860442733398, "grad_norm": 1.454115867614746, "learning_rate": 0.0001148679162189525, "loss": 1.2127, "step": 23520 }, { "epoch": 0.9056785370548605, "grad_norm": 1.20502769947052, "learning_rate": 0.00011483801203224185, "loss": 1.2464, "step": 23525 }, { "epoch": 0.9058710298363811, "grad_norm": 1.5403794050216675, "learning_rate": 0.00011480810648869327, "loss": 0.9593, "step": 23530 }, { "epoch": 0.9060635226179018, "grad_norm": 0.9871656894683838, "learning_rate": 0.00011477819959104145, "loss": 1.0469, "step": 23535 }, { "epoch": 0.9062560153994226, "grad_norm": 1.037180781364441, "learning_rate": 0.00011474829134202121, "loss": 0.8791, "step": 23540 }, { "epoch": 0.9064485081809432, "grad_norm": 0.9241686463356018, "learning_rate": 0.00011471838174436738, "loss": 1.1382, "step": 23545 }, { "epoch": 0.9066410009624639, "grad_norm": 1.891224980354309, "learning_rate": 0.00011468847080081506, "loss": 1.0493, "step": 23550 }, { "epoch": 0.9068334937439846, "grad_norm": 1.9200764894485474, "learning_rate": 0.00011465855851409939, "loss": 1.0265, "step": 23555 }, { "epoch": 0.9070259865255053, "grad_norm": 0.8929172158241272, "learning_rate": 0.00011462864488695563, "loss": 1.1777, "step": 23560 }, { "epoch": 0.907218479307026, "grad_norm": 2.08207368850708, "learning_rate": 0.00011459872992211922, "loss": 0.9146, "step": 23565 }, { "epoch": 0.9074109720885467, "grad_norm": 1.6728849411010742, "learning_rate": 0.00011456881362232567, "loss": 1.1298, "step": 23570 }, { "epoch": 0.9076034648700674, "grad_norm": 2.403190851211548, "learning_rate": 0.0001145388959903106, "loss": 1.1824, "step": 23575 }, { "epoch": 0.907795957651588, "grad_norm": 1.7039401531219482, "learning_rate": 0.00011450897702880981, "loss": 1.1583, "step": 23580 }, { "epoch": 0.9079884504331087, "grad_norm": 1.2045179605484009, "learning_rate": 0.0001144790567405592, "loss": 1.2031, "step": 23585 }, { "epoch": 0.9081809432146295, "grad_norm": 1.4418444633483887, "learning_rate": 0.00011444913512829476, "loss": 1.1499, "step": 23590 }, { "epoch": 0.9083734359961502, "grad_norm": 2.100736379623413, "learning_rate": 0.00011441921219475264, "loss": 1.1585, "step": 23595 }, { "epoch": 0.9085659287776708, "grad_norm": 1.10848867893219, "learning_rate": 0.00011438928794266905, "loss": 1.2494, "step": 23600 }, { "epoch": 0.9087584215591915, "grad_norm": 1.8016185760498047, "learning_rate": 0.00011435936237478045, "loss": 1.061, "step": 23605 }, { "epoch": 0.9089509143407122, "grad_norm": 1.1875159740447998, "learning_rate": 0.00011432943549382325, "loss": 1.1222, "step": 23610 }, { "epoch": 0.909143407122233, "grad_norm": 1.8396415710449219, "learning_rate": 0.00011429950730253407, "loss": 1.1891, "step": 23615 }, { "epoch": 0.9093358999037536, "grad_norm": 1.5621596574783325, "learning_rate": 0.00011426957780364973, "loss": 1.0585, "step": 23620 }, { "epoch": 0.9095283926852743, "grad_norm": 1.253409504890442, "learning_rate": 0.000114239646999907, "loss": 1.1791, "step": 23625 }, { "epoch": 0.909720885466795, "grad_norm": 2.3588509559631348, "learning_rate": 0.00011420971489404287, "loss": 1.1373, "step": 23630 }, { "epoch": 0.9099133782483156, "grad_norm": 1.597448706626892, "learning_rate": 0.00011417978148879445, "loss": 1.2299, "step": 23635 }, { "epoch": 0.9101058710298364, "grad_norm": 0.90355384349823, "learning_rate": 0.00011414984678689895, "loss": 0.9769, "step": 23640 }, { "epoch": 0.9102983638113571, "grad_norm": 1.7792707681655884, "learning_rate": 0.00011411991079109368, "loss": 1.0331, "step": 23645 }, { "epoch": 0.9104908565928778, "grad_norm": 1.5718694925308228, "learning_rate": 0.00011408997350411614, "loss": 1.1637, "step": 23650 }, { "epoch": 0.9106833493743984, "grad_norm": 1.0493242740631104, "learning_rate": 0.00011406003492870383, "loss": 0.9587, "step": 23655 }, { "epoch": 0.9108758421559191, "grad_norm": 1.3035274744033813, "learning_rate": 0.00011403009506759445, "loss": 1.0251, "step": 23660 }, { "epoch": 0.9110683349374399, "grad_norm": 1.4814205169677734, "learning_rate": 0.00011400015392352585, "loss": 1.0466, "step": 23665 }, { "epoch": 0.9112608277189606, "grad_norm": 1.178098440170288, "learning_rate": 0.0001139702114992359, "loss": 0.9776, "step": 23670 }, { "epoch": 0.9114533205004812, "grad_norm": 1.1865342855453491, "learning_rate": 0.00011394026779746267, "loss": 1.0895, "step": 23675 }, { "epoch": 0.9116458132820019, "grad_norm": 0.8956665992736816, "learning_rate": 0.00011391032282094429, "loss": 1.0819, "step": 23680 }, { "epoch": 0.9118383060635226, "grad_norm": 1.0805063247680664, "learning_rate": 0.00011388037657241904, "loss": 0.9655, "step": 23685 }, { "epoch": 0.9120307988450433, "grad_norm": 1.116804838180542, "learning_rate": 0.00011385042905462526, "loss": 0.945, "step": 23690 }, { "epoch": 0.912223291626564, "grad_norm": 1.1748266220092773, "learning_rate": 0.00011382048027030155, "loss": 1.0803, "step": 23695 }, { "epoch": 0.9124157844080847, "grad_norm": 1.5685302019119263, "learning_rate": 0.00011379053022218645, "loss": 1.1678, "step": 23700 }, { "epoch": 0.9126082771896054, "grad_norm": 1.1496498584747314, "learning_rate": 0.00011376057891301873, "loss": 1.007, "step": 23705 }, { "epoch": 0.9128007699711261, "grad_norm": 1.3414727449417114, "learning_rate": 0.0001137306263455372, "loss": 1.0309, "step": 23710 }, { "epoch": 0.9129932627526468, "grad_norm": 0.9307123422622681, "learning_rate": 0.00011370067252248085, "loss": 1.2324, "step": 23715 }, { "epoch": 0.9131857555341675, "grad_norm": 2.051027536392212, "learning_rate": 0.00011367071744658875, "loss": 1.2293, "step": 23720 }, { "epoch": 0.9133782483156881, "grad_norm": 1.3794739246368408, "learning_rate": 0.00011364076112060014, "loss": 1.1828, "step": 23725 }, { "epoch": 0.9135707410972088, "grad_norm": 1.082683801651001, "learning_rate": 0.00011361080354725424, "loss": 0.9434, "step": 23730 }, { "epoch": 0.9137632338787296, "grad_norm": 2.3093974590301514, "learning_rate": 0.00011358084472929054, "loss": 1.1994, "step": 23735 }, { "epoch": 0.9139557266602503, "grad_norm": 1.007186770439148, "learning_rate": 0.00011355088466944855, "loss": 1.0195, "step": 23740 }, { "epoch": 0.9141482194417709, "grad_norm": 1.239039659500122, "learning_rate": 0.00011352092337046791, "loss": 1.1732, "step": 23745 }, { "epoch": 0.9143407122232916, "grad_norm": 1.795091986656189, "learning_rate": 0.00011349096083508841, "loss": 1.1553, "step": 23750 }, { "epoch": 0.9145332050048123, "grad_norm": 1.2720720767974854, "learning_rate": 0.00011346099706604991, "loss": 1.0455, "step": 23755 }, { "epoch": 0.9147256977863331, "grad_norm": 1.3282806873321533, "learning_rate": 0.00011343103206609241, "loss": 0.9086, "step": 23760 }, { "epoch": 0.9149181905678537, "grad_norm": 1.055714726448059, "learning_rate": 0.00011340106583795597, "loss": 1.2688, "step": 23765 }, { "epoch": 0.9151106833493744, "grad_norm": 1.3557606935501099, "learning_rate": 0.00011337109838438085, "loss": 1.1387, "step": 23770 }, { "epoch": 0.9153031761308951, "grad_norm": 1.2776196002960205, "learning_rate": 0.00011334112970810737, "loss": 0.9351, "step": 23775 }, { "epoch": 0.9154956689124157, "grad_norm": 1.4037102460861206, "learning_rate": 0.00011331115981187595, "loss": 1.0117, "step": 23780 }, { "epoch": 0.9156881616939365, "grad_norm": 0.8925278782844543, "learning_rate": 0.00011328118869842712, "loss": 1.108, "step": 23785 }, { "epoch": 0.9158806544754572, "grad_norm": 2.050219774246216, "learning_rate": 0.0001132512163705016, "loss": 1.1364, "step": 23790 }, { "epoch": 0.9160731472569779, "grad_norm": 1.0239925384521484, "learning_rate": 0.0001132212428308401, "loss": 1.0919, "step": 23795 }, { "epoch": 0.9162656400384985, "grad_norm": 1.554990530014038, "learning_rate": 0.00011319126808218355, "loss": 1.1526, "step": 23800 }, { "epoch": 0.9164581328200192, "grad_norm": 1.433929443359375, "learning_rate": 0.00011316129212727292, "loss": 1.1302, "step": 23805 }, { "epoch": 0.91665062560154, "grad_norm": 1.6558189392089844, "learning_rate": 0.00011313131496884927, "loss": 1.1572, "step": 23810 }, { "epoch": 0.9168431183830607, "grad_norm": 1.2385213375091553, "learning_rate": 0.00011310133660965387, "loss": 1.0693, "step": 23815 }, { "epoch": 0.9170356111645813, "grad_norm": 1.636640191078186, "learning_rate": 0.00011307135705242806, "loss": 1.0358, "step": 23820 }, { "epoch": 0.917228103946102, "grad_norm": 1.5593379735946655, "learning_rate": 0.0001130413762999132, "loss": 1.1899, "step": 23825 }, { "epoch": 0.9174205967276227, "grad_norm": 1.4657301902770996, "learning_rate": 0.00011301139435485086, "loss": 1.0543, "step": 23830 }, { "epoch": 0.9176130895091434, "grad_norm": 1.0801408290863037, "learning_rate": 0.00011298141121998273, "loss": 1.181, "step": 23835 }, { "epoch": 0.9178055822906641, "grad_norm": 1.6339335441589355, "learning_rate": 0.00011295142689805052, "loss": 1.197, "step": 23840 }, { "epoch": 0.9179980750721848, "grad_norm": 1.4383010864257812, "learning_rate": 0.00011292144139179612, "loss": 0.98, "step": 23845 }, { "epoch": 0.9181905678537055, "grad_norm": 1.440516471862793, "learning_rate": 0.00011289145470396152, "loss": 1.1183, "step": 23850 }, { "epoch": 0.9183830606352262, "grad_norm": 0.940071165561676, "learning_rate": 0.00011286146683728876, "loss": 1.0116, "step": 23855 }, { "epoch": 0.9185755534167469, "grad_norm": 1.1691455841064453, "learning_rate": 0.00011283147779452003, "loss": 1.0608, "step": 23860 }, { "epoch": 0.9187680461982676, "grad_norm": 1.218925952911377, "learning_rate": 0.00011280148757839771, "loss": 1.0606, "step": 23865 }, { "epoch": 0.9189605389797882, "grad_norm": 0.890487790107727, "learning_rate": 0.00011277149619166414, "loss": 0.8924, "step": 23870 }, { "epoch": 0.9191530317613089, "grad_norm": 1.4061552286148071, "learning_rate": 0.00011274150363706182, "loss": 1.1201, "step": 23875 }, { "epoch": 0.9193455245428297, "grad_norm": 2.254596471786499, "learning_rate": 0.00011271150991733341, "loss": 1.1457, "step": 23880 }, { "epoch": 0.9195380173243504, "grad_norm": 1.2079321146011353, "learning_rate": 0.00011268151503522164, "loss": 1.0976, "step": 23885 }, { "epoch": 0.919730510105871, "grad_norm": 1.8789191246032715, "learning_rate": 0.00011265151899346931, "loss": 1.0205, "step": 23890 }, { "epoch": 0.9199230028873917, "grad_norm": 1.5862555503845215, "learning_rate": 0.00011262152179481938, "loss": 1.1655, "step": 23895 }, { "epoch": 0.9201154956689124, "grad_norm": 1.342571496963501, "learning_rate": 0.00011259152344201492, "loss": 1.1423, "step": 23900 }, { "epoch": 0.9203079884504332, "grad_norm": 1.4979714155197144, "learning_rate": 0.00011256152393779901, "loss": 1.0377, "step": 23905 }, { "epoch": 0.9205004812319538, "grad_norm": 2.0960614681243896, "learning_rate": 0.00011253152328491496, "loss": 1.0474, "step": 23910 }, { "epoch": 0.9206929740134745, "grad_norm": 1.4610568284988403, "learning_rate": 0.00011250152148610613, "loss": 1.1822, "step": 23915 }, { "epoch": 0.9208854667949952, "grad_norm": 1.4984396696090698, "learning_rate": 0.000112471518544116, "loss": 0.9195, "step": 23920 }, { "epoch": 0.9210779595765158, "grad_norm": 1.0438648462295532, "learning_rate": 0.00011244151446168807, "loss": 0.9973, "step": 23925 }, { "epoch": 0.9212704523580366, "grad_norm": 1.51102876663208, "learning_rate": 0.00011241150924156609, "loss": 1.1294, "step": 23930 }, { "epoch": 0.9214629451395573, "grad_norm": 1.743852138519287, "learning_rate": 0.00011238150288649381, "loss": 1.0525, "step": 23935 }, { "epoch": 0.921655437921078, "grad_norm": 1.6502209901809692, "learning_rate": 0.00011235149539921508, "loss": 1.0904, "step": 23940 }, { "epoch": 0.9218479307025986, "grad_norm": 1.1279150247573853, "learning_rate": 0.00011232148678247393, "loss": 1.0176, "step": 23945 }, { "epoch": 0.9220404234841193, "grad_norm": 1.2196617126464844, "learning_rate": 0.00011229147703901444, "loss": 0.8492, "step": 23950 }, { "epoch": 0.9222329162656401, "grad_norm": 1.4295985698699951, "learning_rate": 0.0001122614661715808, "loss": 1.1331, "step": 23955 }, { "epoch": 0.9224254090471607, "grad_norm": 1.8270727396011353, "learning_rate": 0.00011223145418291731, "loss": 1.0813, "step": 23960 }, { "epoch": 0.9226179018286814, "grad_norm": 1.327848196029663, "learning_rate": 0.00011220144107576834, "loss": 0.9626, "step": 23965 }, { "epoch": 0.9228103946102021, "grad_norm": 1.5928614139556885, "learning_rate": 0.00011217142685287842, "loss": 1.1328, "step": 23970 }, { "epoch": 0.9230028873917228, "grad_norm": 1.3863136768341064, "learning_rate": 0.00011214141151699215, "loss": 1.1175, "step": 23975 }, { "epoch": 0.9231953801732435, "grad_norm": 1.4480311870574951, "learning_rate": 0.00011211139507085422, "loss": 0.9841, "step": 23980 }, { "epoch": 0.9233878729547642, "grad_norm": 1.1457321643829346, "learning_rate": 0.00011208137751720944, "loss": 1.0934, "step": 23985 }, { "epoch": 0.9235803657362849, "grad_norm": 1.614317774772644, "learning_rate": 0.00011205135885880272, "loss": 1.0216, "step": 23990 }, { "epoch": 0.9237728585178056, "grad_norm": 1.0232197046279907, "learning_rate": 0.00011202133909837906, "loss": 1.1024, "step": 23995 }, { "epoch": 0.9239653512993262, "grad_norm": 1.317587971687317, "learning_rate": 0.00011199131823868358, "loss": 1.2039, "step": 24000 }, { "epoch": 0.924157844080847, "grad_norm": 1.0058887004852295, "learning_rate": 0.00011196129628246148, "loss": 1.068, "step": 24005 }, { "epoch": 0.9243503368623677, "grad_norm": 1.1201086044311523, "learning_rate": 0.00011193127323245809, "loss": 1.0745, "step": 24010 }, { "epoch": 0.9245428296438883, "grad_norm": 1.1013514995574951, "learning_rate": 0.00011190124909141877, "loss": 1.1211, "step": 24015 }, { "epoch": 0.924735322425409, "grad_norm": 1.3184977769851685, "learning_rate": 0.00011187122386208908, "loss": 1.2223, "step": 24020 }, { "epoch": 0.9249278152069298, "grad_norm": 1.053260087966919, "learning_rate": 0.0001118411975472146, "loss": 0.9556, "step": 24025 }, { "epoch": 0.9251203079884505, "grad_norm": 1.6878530979156494, "learning_rate": 0.00011181117014954105, "loss": 0.9915, "step": 24030 }, { "epoch": 0.9253128007699711, "grad_norm": 0.6977512836456299, "learning_rate": 0.00011178114167181423, "loss": 1.0629, "step": 24035 }, { "epoch": 0.9255052935514918, "grad_norm": 1.4478118419647217, "learning_rate": 0.00011175111211678006, "loss": 1.1585, "step": 24040 }, { "epoch": 0.9256977863330125, "grad_norm": 1.1715469360351562, "learning_rate": 0.0001117210814871845, "loss": 0.9811, "step": 24045 }, { "epoch": 0.9258902791145333, "grad_norm": 1.2581676244735718, "learning_rate": 0.00011169104978577369, "loss": 1.0722, "step": 24050 }, { "epoch": 0.9260827718960539, "grad_norm": 1.064126968383789, "learning_rate": 0.00011166101701529385, "loss": 0.983, "step": 24055 }, { "epoch": 0.9262752646775746, "grad_norm": 1.0658310651779175, "learning_rate": 0.00011163098317849123, "loss": 1.0436, "step": 24060 }, { "epoch": 0.9264677574590953, "grad_norm": 1.1407309770584106, "learning_rate": 0.00011160094827811223, "loss": 0.957, "step": 24065 }, { "epoch": 0.9266602502406159, "grad_norm": 1.3511934280395508, "learning_rate": 0.0001115709123169034, "loss": 1.0751, "step": 24070 }, { "epoch": 0.9268527430221367, "grad_norm": 1.416675329208374, "learning_rate": 0.00011154087529761125, "loss": 1.2558, "step": 24075 }, { "epoch": 0.9270452358036574, "grad_norm": 1.5210598707199097, "learning_rate": 0.00011151083722298252, "loss": 0.945, "step": 24080 }, { "epoch": 0.9272377285851781, "grad_norm": 1.1831562519073486, "learning_rate": 0.00011148079809576399, "loss": 1.0632, "step": 24085 }, { "epoch": 0.9274302213666987, "grad_norm": 1.1255745887756348, "learning_rate": 0.00011145075791870252, "loss": 1.0556, "step": 24090 }, { "epoch": 0.9276227141482194, "grad_norm": 1.213739275932312, "learning_rate": 0.00011142071669454507, "loss": 1.1672, "step": 24095 }, { "epoch": 0.9278152069297402, "grad_norm": 1.0633070468902588, "learning_rate": 0.00011139067442603877, "loss": 1.1084, "step": 24100 }, { "epoch": 0.9280076997112608, "grad_norm": 0.8085178136825562, "learning_rate": 0.00011136063111593073, "loss": 0.9406, "step": 24105 }, { "epoch": 0.9282001924927815, "grad_norm": 1.0284137725830078, "learning_rate": 0.00011133058676696823, "loss": 1.1271, "step": 24110 }, { "epoch": 0.9283926852743022, "grad_norm": 1.3331536054611206, "learning_rate": 0.00011130054138189863, "loss": 1.1798, "step": 24115 }, { "epoch": 0.9285851780558229, "grad_norm": 1.1535316705703735, "learning_rate": 0.00011127049496346939, "loss": 0.9878, "step": 24120 }, { "epoch": 0.9287776708373436, "grad_norm": 1.4867428541183472, "learning_rate": 0.00011124044751442803, "loss": 1.0329, "step": 24125 }, { "epoch": 0.9289701636188643, "grad_norm": 0.9730262756347656, "learning_rate": 0.00011121039903752224, "loss": 1.0598, "step": 24130 }, { "epoch": 0.929162656400385, "grad_norm": 2.8743958473205566, "learning_rate": 0.0001111803495354997, "loss": 0.9686, "step": 24135 }, { "epoch": 0.9293551491819056, "grad_norm": 2.40010404586792, "learning_rate": 0.00011115029901110825, "loss": 1.1852, "step": 24140 }, { "epoch": 0.9295476419634263, "grad_norm": 1.7937008142471313, "learning_rate": 0.00011112024746709581, "loss": 1.0724, "step": 24145 }, { "epoch": 0.9297401347449471, "grad_norm": 1.1320335865020752, "learning_rate": 0.00011109019490621044, "loss": 1.0574, "step": 24150 }, { "epoch": 0.9299326275264678, "grad_norm": 1.4164587259292603, "learning_rate": 0.0001110601413312002, "loss": 1.1617, "step": 24155 }, { "epoch": 0.9301251203079884, "grad_norm": 1.665640950202942, "learning_rate": 0.00011103008674481328, "loss": 1.1355, "step": 24160 }, { "epoch": 0.9303176130895091, "grad_norm": 1.509874701499939, "learning_rate": 0.00011100003114979802, "loss": 1.0042, "step": 24165 }, { "epoch": 0.9305101058710299, "grad_norm": 1.702763319015503, "learning_rate": 0.00011096997454890275, "loss": 1.1039, "step": 24170 }, { "epoch": 0.9307025986525506, "grad_norm": 0.9513006210327148, "learning_rate": 0.00011093991694487597, "loss": 1.1699, "step": 24175 }, { "epoch": 0.9308950914340712, "grad_norm": 0.8490985035896301, "learning_rate": 0.0001109098583404663, "loss": 1.0261, "step": 24180 }, { "epoch": 0.9310875842155919, "grad_norm": 1.2935947179794312, "learning_rate": 0.00011087979873842233, "loss": 1.1481, "step": 24185 }, { "epoch": 0.9312800769971126, "grad_norm": 0.9786511063575745, "learning_rate": 0.00011084973814149284, "loss": 0.9385, "step": 24190 }, { "epoch": 0.9314725697786334, "grad_norm": 1.1575534343719482, "learning_rate": 0.00011081967655242668, "loss": 0.966, "step": 24195 }, { "epoch": 0.931665062560154, "grad_norm": 1.2462104558944702, "learning_rate": 0.00011078961397397276, "loss": 1.0768, "step": 24200 }, { "epoch": 0.9318575553416747, "grad_norm": 1.0118372440338135, "learning_rate": 0.00011075955040888011, "loss": 1.033, "step": 24205 }, { "epoch": 0.9320500481231954, "grad_norm": 1.2791298627853394, "learning_rate": 0.00011072948585989789, "loss": 0.9917, "step": 24210 }, { "epoch": 0.932242540904716, "grad_norm": 1.0876747369766235, "learning_rate": 0.00011069942032977522, "loss": 1.1617, "step": 24215 }, { "epoch": 0.9324350336862368, "grad_norm": 2.205045223236084, "learning_rate": 0.00011066935382126144, "loss": 0.996, "step": 24220 }, { "epoch": 0.9326275264677575, "grad_norm": 1.4552040100097656, "learning_rate": 0.00011063928633710596, "loss": 1.0561, "step": 24225 }, { "epoch": 0.9328200192492782, "grad_norm": 1.6502306461334229, "learning_rate": 0.0001106092178800582, "loss": 1.0455, "step": 24230 }, { "epoch": 0.9330125120307988, "grad_norm": 1.861480474472046, "learning_rate": 0.00011057914845286777, "loss": 0.9918, "step": 24235 }, { "epoch": 0.9332050048123195, "grad_norm": 1.1051427125930786, "learning_rate": 0.00011054907805828427, "loss": 1.0833, "step": 24240 }, { "epoch": 0.9333974975938403, "grad_norm": 1.3723036050796509, "learning_rate": 0.00011051900669905748, "loss": 1.1338, "step": 24245 }, { "epoch": 0.933589990375361, "grad_norm": 2.1225595474243164, "learning_rate": 0.00011048893437793721, "loss": 1.0879, "step": 24250 }, { "epoch": 0.9337824831568816, "grad_norm": 1.4194490909576416, "learning_rate": 0.00011045886109767336, "loss": 1.1349, "step": 24255 }, { "epoch": 0.9339749759384023, "grad_norm": 1.5220705270767212, "learning_rate": 0.00011042878686101597, "loss": 1.0283, "step": 24260 }, { "epoch": 0.934167468719923, "grad_norm": 1.7352094650268555, "learning_rate": 0.00011039871167071507, "loss": 1.2378, "step": 24265 }, { "epoch": 0.9343599615014437, "grad_norm": 2.2150251865386963, "learning_rate": 0.00011036863552952088, "loss": 1.1452, "step": 24270 }, { "epoch": 0.9345524542829644, "grad_norm": 0.9917442202568054, "learning_rate": 0.00011033855844018368, "loss": 0.8594, "step": 24275 }, { "epoch": 0.9347449470644851, "grad_norm": 1.0165492296218872, "learning_rate": 0.00011030848040545378, "loss": 1.2048, "step": 24280 }, { "epoch": 0.9349374398460057, "grad_norm": 1.1605945825576782, "learning_rate": 0.00011027840142808163, "loss": 1.0497, "step": 24285 }, { "epoch": 0.9351299326275264, "grad_norm": 1.9988510608673096, "learning_rate": 0.00011024832151081778, "loss": 1.1581, "step": 24290 }, { "epoch": 0.9353224254090472, "grad_norm": 1.6264874935150146, "learning_rate": 0.0001102182406564128, "loss": 1.089, "step": 24295 }, { "epoch": 0.9355149181905679, "grad_norm": 1.006515622138977, "learning_rate": 0.0001101881588676174, "loss": 0.9477, "step": 24300 }, { "epoch": 0.9357074109720885, "grad_norm": 1.7896440029144287, "learning_rate": 0.00011015807614718236, "loss": 0.9834, "step": 24305 }, { "epoch": 0.9358999037536092, "grad_norm": 1.7942471504211426, "learning_rate": 0.00011012799249785854, "loss": 1.1321, "step": 24310 }, { "epoch": 0.9360923965351299, "grad_norm": 1.4186915159225464, "learning_rate": 0.00011009790792239692, "loss": 1.1373, "step": 24315 }, { "epoch": 0.9362848893166507, "grad_norm": 0.9895558953285217, "learning_rate": 0.00011006782242354852, "loss": 1.0627, "step": 24320 }, { "epoch": 0.9364773820981713, "grad_norm": 1.438604474067688, "learning_rate": 0.00011003773600406442, "loss": 1.0535, "step": 24325 }, { "epoch": 0.936669874879692, "grad_norm": 1.1675355434417725, "learning_rate": 0.00011000764866669586, "loss": 1.1195, "step": 24330 }, { "epoch": 0.9368623676612127, "grad_norm": 1.4219048023223877, "learning_rate": 0.00010997756041419416, "loss": 1.0578, "step": 24335 }, { "epoch": 0.9370548604427335, "grad_norm": 2.0969905853271484, "learning_rate": 0.00010994747124931062, "loss": 1.1122, "step": 24340 }, { "epoch": 0.9372473532242541, "grad_norm": 1.433214545249939, "learning_rate": 0.00010991738117479673, "loss": 1.0633, "step": 24345 }, { "epoch": 0.9374398460057748, "grad_norm": 1.5070579051971436, "learning_rate": 0.00010988729019340407, "loss": 0.9797, "step": 24350 }, { "epoch": 0.9376323387872955, "grad_norm": 0.9930442571640015, "learning_rate": 0.00010985719830788417, "loss": 0.9648, "step": 24355 }, { "epoch": 0.9378248315688161, "grad_norm": 1.7221930027008057, "learning_rate": 0.00010982710552098883, "loss": 0.9712, "step": 24360 }, { "epoch": 0.9380173243503369, "grad_norm": 1.0316636562347412, "learning_rate": 0.00010979701183546976, "loss": 1.0854, "step": 24365 }, { "epoch": 0.9382098171318576, "grad_norm": 1.5038282871246338, "learning_rate": 0.00010976691725407886, "loss": 1.1366, "step": 24370 }, { "epoch": 0.9384023099133783, "grad_norm": 1.6560473442077637, "learning_rate": 0.00010973682177956808, "loss": 1.1497, "step": 24375 }, { "epoch": 0.9385948026948989, "grad_norm": 1.3170095682144165, "learning_rate": 0.00010970672541468943, "loss": 1.1246, "step": 24380 }, { "epoch": 0.9387872954764196, "grad_norm": 1.8806647062301636, "learning_rate": 0.00010967662816219506, "loss": 1.1005, "step": 24385 }, { "epoch": 0.9389797882579404, "grad_norm": 1.4266135692596436, "learning_rate": 0.00010964653002483713, "loss": 1.0151, "step": 24390 }, { "epoch": 0.939172281039461, "grad_norm": 2.0234365463256836, "learning_rate": 0.00010961643100536794, "loss": 1.1368, "step": 24395 }, { "epoch": 0.9393647738209817, "grad_norm": 1.403439998626709, "learning_rate": 0.00010958633110653987, "loss": 1.2229, "step": 24400 }, { "epoch": 0.9395572666025024, "grad_norm": 1.2399364709854126, "learning_rate": 0.0001095562303311053, "loss": 1.0342, "step": 24405 }, { "epoch": 0.939749759384023, "grad_norm": 1.1271092891693115, "learning_rate": 0.00010952612868181673, "loss": 1.1703, "step": 24410 }, { "epoch": 0.9399422521655438, "grad_norm": 1.1748933792114258, "learning_rate": 0.00010949602616142685, "loss": 1.1191, "step": 24415 }, { "epoch": 0.9401347449470645, "grad_norm": 1.4523221254348755, "learning_rate": 0.00010946592277268825, "loss": 1.1226, "step": 24420 }, { "epoch": 0.9403272377285852, "grad_norm": 2.0243780612945557, "learning_rate": 0.00010943581851835373, "loss": 1.1353, "step": 24425 }, { "epoch": 0.9405197305101058, "grad_norm": 1.318712830543518, "learning_rate": 0.00010940571340117613, "loss": 1.1688, "step": 24430 }, { "epoch": 0.9407122232916265, "grad_norm": 1.6720144748687744, "learning_rate": 0.00010937560742390833, "loss": 1.1818, "step": 24435 }, { "epoch": 0.9409047160731473, "grad_norm": 0.8121243119239807, "learning_rate": 0.00010934550058930336, "loss": 0.9643, "step": 24440 }, { "epoch": 0.941097208854668, "grad_norm": 1.427445650100708, "learning_rate": 0.00010931539290011425, "loss": 1.0619, "step": 24445 }, { "epoch": 0.9412897016361886, "grad_norm": 1.4542162418365479, "learning_rate": 0.00010928528435909415, "loss": 1.278, "step": 24450 }, { "epoch": 0.9414821944177093, "grad_norm": 0.9350261688232422, "learning_rate": 0.00010925517496899633, "loss": 1.1934, "step": 24455 }, { "epoch": 0.94167468719923, "grad_norm": 0.9128903150558472, "learning_rate": 0.00010922506473257408, "loss": 0.991, "step": 24460 }, { "epoch": 0.9418671799807508, "grad_norm": 2.040079355239868, "learning_rate": 0.00010919495365258077, "loss": 1.2565, "step": 24465 }, { "epoch": 0.9420596727622714, "grad_norm": 1.4088531732559204, "learning_rate": 0.00010916484173176984, "loss": 1.0567, "step": 24470 }, { "epoch": 0.9422521655437921, "grad_norm": 1.0955448150634766, "learning_rate": 0.00010913472897289485, "loss": 0.9855, "step": 24475 }, { "epoch": 0.9424446583253128, "grad_norm": 1.6121997833251953, "learning_rate": 0.00010910461537870942, "loss": 1.0122, "step": 24480 }, { "epoch": 0.9426371511068335, "grad_norm": 1.548582911491394, "learning_rate": 0.00010907450095196718, "loss": 0.9924, "step": 24485 }, { "epoch": 0.9428296438883542, "grad_norm": 1.2104709148406982, "learning_rate": 0.00010904438569542202, "loss": 1.0338, "step": 24490 }, { "epoch": 0.9430221366698749, "grad_norm": 1.3403939008712769, "learning_rate": 0.00010901426961182764, "loss": 1.0474, "step": 24495 }, { "epoch": 0.9432146294513956, "grad_norm": 1.8973404169082642, "learning_rate": 0.00010898415270393802, "loss": 0.9729, "step": 24500 }, { "epoch": 0.9434071222329162, "grad_norm": 0.9071372747421265, "learning_rate": 0.00010895403497450716, "loss": 1.2973, "step": 24505 }, { "epoch": 0.943599615014437, "grad_norm": 2.5191125869750977, "learning_rate": 0.00010892391642628912, "loss": 1.1838, "step": 24510 }, { "epoch": 0.9437921077959577, "grad_norm": 1.3101541996002197, "learning_rate": 0.00010889379706203804, "loss": 1.0826, "step": 24515 }, { "epoch": 0.9439846005774783, "grad_norm": 1.1167513132095337, "learning_rate": 0.00010886367688450811, "loss": 0.9787, "step": 24520 }, { "epoch": 0.944177093358999, "grad_norm": 1.8214826583862305, "learning_rate": 0.0001088335558964537, "loss": 1.1172, "step": 24525 }, { "epoch": 0.9443695861405197, "grad_norm": 0.8352447748184204, "learning_rate": 0.00010880343410062908, "loss": 0.9896, "step": 24530 }, { "epoch": 0.9445620789220405, "grad_norm": 1.6729201078414917, "learning_rate": 0.00010877331149978873, "loss": 1.1613, "step": 24535 }, { "epoch": 0.9447545717035611, "grad_norm": 1.173158049583435, "learning_rate": 0.00010874318809668718, "loss": 1.206, "step": 24540 }, { "epoch": 0.9449470644850818, "grad_norm": 2.4930291175842285, "learning_rate": 0.00010871306389407898, "loss": 1.2144, "step": 24545 }, { "epoch": 0.9451395572666025, "grad_norm": 0.8936457633972168, "learning_rate": 0.00010868293889471881, "loss": 1.0495, "step": 24550 }, { "epoch": 0.9453320500481232, "grad_norm": 1.9174963235855103, "learning_rate": 0.00010865281310136142, "loss": 1.2971, "step": 24555 }, { "epoch": 0.9455245428296439, "grad_norm": 0.8766136169433594, "learning_rate": 0.00010862268651676155, "loss": 1.3837, "step": 24560 }, { "epoch": 0.9457170356111646, "grad_norm": 1.577832818031311, "learning_rate": 0.00010859255914367414, "loss": 0.9897, "step": 24565 }, { "epoch": 0.9459095283926853, "grad_norm": 1.1107361316680908, "learning_rate": 0.00010856243098485412, "loss": 1.0474, "step": 24570 }, { "epoch": 0.9461020211742059, "grad_norm": 1.0004538297653198, "learning_rate": 0.00010853230204305651, "loss": 1.0261, "step": 24575 }, { "epoch": 0.9462945139557266, "grad_norm": 0.9005370736122131, "learning_rate": 0.00010850217232103639, "loss": 0.9002, "step": 24580 }, { "epoch": 0.9464870067372474, "grad_norm": 0.3695490062236786, "learning_rate": 0.00010847204182154895, "loss": 0.9869, "step": 24585 }, { "epoch": 0.9466794995187681, "grad_norm": 0.9990583658218384, "learning_rate": 0.00010844191054734938, "loss": 1.0765, "step": 24590 }, { "epoch": 0.9468719923002887, "grad_norm": 1.0551097393035889, "learning_rate": 0.00010841177850119301, "loss": 1.0853, "step": 24595 }, { "epoch": 0.9470644850818094, "grad_norm": 1.0011540651321411, "learning_rate": 0.00010838164568583526, "loss": 1.2651, "step": 24600 }, { "epoch": 0.9472569778633301, "grad_norm": 1.1266576051712036, "learning_rate": 0.0001083515121040315, "loss": 0.9916, "step": 24605 }, { "epoch": 0.9474494706448509, "grad_norm": 1.543762445449829, "learning_rate": 0.00010832137775853728, "loss": 1.2188, "step": 24610 }, { "epoch": 0.9476419634263715, "grad_norm": 1.1634615659713745, "learning_rate": 0.00010829124265210822, "loss": 0.953, "step": 24615 }, { "epoch": 0.9478344562078922, "grad_norm": 2.0008814334869385, "learning_rate": 0.00010826110678749992, "loss": 1.1433, "step": 24620 }, { "epoch": 0.9480269489894129, "grad_norm": 1.6230239868164062, "learning_rate": 0.00010823097016746813, "loss": 1.0228, "step": 24625 }, { "epoch": 0.9482194417709335, "grad_norm": 0.9347220659255981, "learning_rate": 0.00010820083279476865, "loss": 1.0337, "step": 24630 }, { "epoch": 0.9484119345524543, "grad_norm": 1.8142660856246948, "learning_rate": 0.00010817069467215732, "loss": 1.0862, "step": 24635 }, { "epoch": 0.948604427333975, "grad_norm": 1.6012142896652222, "learning_rate": 0.0001081405558023901, "loss": 1.0592, "step": 24640 }, { "epoch": 0.9487969201154957, "grad_norm": 2.480301856994629, "learning_rate": 0.00010811041618822297, "loss": 1.1688, "step": 24645 }, { "epoch": 0.9489894128970163, "grad_norm": 1.339879035949707, "learning_rate": 0.00010808027583241203, "loss": 1.1535, "step": 24650 }, { "epoch": 0.9491819056785371, "grad_norm": 2.2561163902282715, "learning_rate": 0.00010805013473771337, "loss": 1.1379, "step": 24655 }, { "epoch": 0.9493743984600578, "grad_norm": 0.9310626983642578, "learning_rate": 0.00010801999290688323, "loss": 1.117, "step": 24660 }, { "epoch": 0.9495668912415784, "grad_norm": 1.099223256111145, "learning_rate": 0.00010798985034267786, "loss": 0.9384, "step": 24665 }, { "epoch": 0.9497593840230991, "grad_norm": 1.5492980480194092, "learning_rate": 0.0001079597070478536, "loss": 1.0908, "step": 24670 }, { "epoch": 0.9499518768046198, "grad_norm": 0.9033131003379822, "learning_rate": 0.00010792956302516688, "loss": 1.0303, "step": 24675 }, { "epoch": 0.9501443695861406, "grad_norm": 1.0277695655822754, "learning_rate": 0.00010789941827737411, "loss": 1.1463, "step": 24680 }, { "epoch": 0.9503368623676612, "grad_norm": 1.6218613386154175, "learning_rate": 0.00010786927280723192, "loss": 1.0647, "step": 24685 }, { "epoch": 0.9505293551491819, "grad_norm": 0.9777045249938965, "learning_rate": 0.00010783912661749682, "loss": 0.9672, "step": 24690 }, { "epoch": 0.9507218479307026, "grad_norm": 1.7008765935897827, "learning_rate": 0.00010780897971092554, "loss": 1.0837, "step": 24695 }, { "epoch": 0.9509143407122232, "grad_norm": 1.286529779434204, "learning_rate": 0.00010777883209027477, "loss": 1.0237, "step": 24700 }, { "epoch": 0.951106833493744, "grad_norm": 1.0446587800979614, "learning_rate": 0.00010774868375830133, "loss": 1.1015, "step": 24705 }, { "epoch": 0.9512993262752647, "grad_norm": 1.1524755954742432, "learning_rate": 0.00010771853471776215, "loss": 1.0399, "step": 24710 }, { "epoch": 0.9514918190567854, "grad_norm": 1.3211500644683838, "learning_rate": 0.00010768838497141404, "loss": 1.1109, "step": 24715 }, { "epoch": 0.951684311838306, "grad_norm": 1.840817928314209, "learning_rate": 0.00010765823452201406, "loss": 1.1012, "step": 24720 }, { "epoch": 0.9518768046198267, "grad_norm": 1.2010953426361084, "learning_rate": 0.00010762808337231931, "loss": 0.9257, "step": 24725 }, { "epoch": 0.9520692974013475, "grad_norm": 1.1567620038986206, "learning_rate": 0.00010759793152508684, "loss": 1.1649, "step": 24730 }, { "epoch": 0.9522617901828682, "grad_norm": 1.1958009004592896, "learning_rate": 0.00010756777898307384, "loss": 1.0035, "step": 24735 }, { "epoch": 0.9524542829643888, "grad_norm": 1.7795593738555908, "learning_rate": 0.00010753762574903763, "loss": 1.1245, "step": 24740 }, { "epoch": 0.9526467757459095, "grad_norm": 1.2421553134918213, "learning_rate": 0.00010750747182573544, "loss": 1.1169, "step": 24745 }, { "epoch": 0.9528392685274302, "grad_norm": 2.212799549102783, "learning_rate": 0.00010747731721592469, "loss": 1.1195, "step": 24750 }, { "epoch": 0.953031761308951, "grad_norm": 1.3483097553253174, "learning_rate": 0.00010744716192236284, "loss": 1.0309, "step": 24755 }, { "epoch": 0.9532242540904716, "grad_norm": 0.8752015233039856, "learning_rate": 0.00010741700594780734, "loss": 0.9972, "step": 24760 }, { "epoch": 0.9534167468719923, "grad_norm": 0.778307318687439, "learning_rate": 0.00010738684929501576, "loss": 0.9989, "step": 24765 }, { "epoch": 0.953609239653513, "grad_norm": 1.3148306608200073, "learning_rate": 0.00010735669196674578, "loss": 1.0519, "step": 24770 }, { "epoch": 0.9538017324350336, "grad_norm": 1.7633293867111206, "learning_rate": 0.00010732653396575504, "loss": 1.0377, "step": 24775 }, { "epoch": 0.9539942252165544, "grad_norm": 1.5333178043365479, "learning_rate": 0.00010729637529480132, "loss": 0.9923, "step": 24780 }, { "epoch": 0.9541867179980751, "grad_norm": 1.2737149000167847, "learning_rate": 0.0001072662159566424, "loss": 1.2048, "step": 24785 }, { "epoch": 0.9543792107795958, "grad_norm": 1.3000686168670654, "learning_rate": 0.00010723605595403616, "loss": 0.8916, "step": 24790 }, { "epoch": 0.9545717035611164, "grad_norm": 1.2083548307418823, "learning_rate": 0.00010720589528974056, "loss": 1.0176, "step": 24795 }, { "epoch": 0.9547641963426372, "grad_norm": 1.8047492504119873, "learning_rate": 0.00010717573396651355, "loss": 1.094, "step": 24800 }, { "epoch": 0.9549566891241579, "grad_norm": 1.2499585151672363, "learning_rate": 0.00010714557198711321, "loss": 1.2239, "step": 24805 }, { "epoch": 0.9551491819056785, "grad_norm": 1.58943510055542, "learning_rate": 0.00010711540935429764, "loss": 1.1283, "step": 24810 }, { "epoch": 0.9553416746871992, "grad_norm": 1.0315110683441162, "learning_rate": 0.00010708524607082502, "loss": 0.9449, "step": 24815 }, { "epoch": 0.9555341674687199, "grad_norm": 1.182955265045166, "learning_rate": 0.00010705508213945362, "loss": 0.9075, "step": 24820 }, { "epoch": 0.9557266602502407, "grad_norm": 1.0133094787597656, "learning_rate": 0.00010702491756294164, "loss": 1.0881, "step": 24825 }, { "epoch": 0.9559191530317613, "grad_norm": 1.5667768716812134, "learning_rate": 0.00010699475234404749, "loss": 1.0341, "step": 24830 }, { "epoch": 0.956111645813282, "grad_norm": 1.2236697673797607, "learning_rate": 0.0001069645864855296, "loss": 1.1377, "step": 24835 }, { "epoch": 0.9563041385948027, "grad_norm": 1.6470654010772705, "learning_rate": 0.0001069344199901464, "loss": 1.1478, "step": 24840 }, { "epoch": 0.9564966313763233, "grad_norm": 0.8241544365882874, "learning_rate": 0.00010690425286065642, "loss": 1.018, "step": 24845 }, { "epoch": 0.9566891241578441, "grad_norm": 1.580249547958374, "learning_rate": 0.00010687408509981827, "loss": 1.2341, "step": 24850 }, { "epoch": 0.9568816169393648, "grad_norm": 2.250305414199829, "learning_rate": 0.00010684391671039056, "loss": 1.0795, "step": 24855 }, { "epoch": 0.9570741097208855, "grad_norm": 1.0578948259353638, "learning_rate": 0.000106813747695132, "loss": 1.0914, "step": 24860 }, { "epoch": 0.9572666025024061, "grad_norm": 1.2390888929367065, "learning_rate": 0.00010678357805680137, "loss": 1.2108, "step": 24865 }, { "epoch": 0.9574590952839268, "grad_norm": 2.08738112449646, "learning_rate": 0.00010675340779815745, "loss": 1.064, "step": 24870 }, { "epoch": 0.9576515880654476, "grad_norm": 1.882812738418579, "learning_rate": 0.00010672323692195912, "loss": 0.9825, "step": 24875 }, { "epoch": 0.9578440808469683, "grad_norm": 1.5878349542617798, "learning_rate": 0.00010669306543096534, "loss": 1.1894, "step": 24880 }, { "epoch": 0.9580365736284889, "grad_norm": 1.1730328798294067, "learning_rate": 0.00010666289332793503, "loss": 1.0882, "step": 24885 }, { "epoch": 0.9582290664100096, "grad_norm": 1.14130699634552, "learning_rate": 0.00010663272061562726, "loss": 0.9501, "step": 24890 }, { "epoch": 0.9584215591915303, "grad_norm": 1.1763651371002197, "learning_rate": 0.00010660254729680117, "loss": 1.1355, "step": 24895 }, { "epoch": 0.958614051973051, "grad_norm": 1.5443150997161865, "learning_rate": 0.00010657237337421582, "loss": 0.8956, "step": 24900 }, { "epoch": 0.9588065447545717, "grad_norm": 1.0410224199295044, "learning_rate": 0.0001065421988506305, "loss": 1.0776, "step": 24905 }, { "epoch": 0.9589990375360924, "grad_norm": 0.9984595775604248, "learning_rate": 0.0001065120237288044, "loss": 1.1329, "step": 24910 }, { "epoch": 0.9591915303176131, "grad_norm": 1.8508329391479492, "learning_rate": 0.00010648184801149689, "loss": 1.0724, "step": 24915 }, { "epoch": 0.9593840230991337, "grad_norm": 2.0840296745300293, "learning_rate": 0.00010645167170146733, "loss": 1.0316, "step": 24920 }, { "epoch": 0.9595765158806545, "grad_norm": 1.6280990839004517, "learning_rate": 0.00010642149480147509, "loss": 0.9811, "step": 24925 }, { "epoch": 0.9597690086621752, "grad_norm": 1.7951642274856567, "learning_rate": 0.00010639131731427974, "loss": 1.0428, "step": 24930 }, { "epoch": 0.9599615014436959, "grad_norm": 1.0857096910476685, "learning_rate": 0.00010636113924264073, "loss": 0.9805, "step": 24935 }, { "epoch": 0.9601539942252165, "grad_norm": 1.919179916381836, "learning_rate": 0.00010633096058931766, "loss": 1.0721, "step": 24940 }, { "epoch": 0.9603464870067372, "grad_norm": 0.9968999028205872, "learning_rate": 0.0001063007813570702, "loss": 1.0928, "step": 24945 }, { "epoch": 0.960538979788258, "grad_norm": 1.4042326211929321, "learning_rate": 0.00010627060154865802, "loss": 1.1576, "step": 24950 }, { "epoch": 0.9607314725697786, "grad_norm": 1.0694444179534912, "learning_rate": 0.00010624042116684088, "loss": 1.0468, "step": 24955 }, { "epoch": 0.9609239653512993, "grad_norm": 1.127734899520874, "learning_rate": 0.00010621024021437855, "loss": 1.1967, "step": 24960 }, { "epoch": 0.96111645813282, "grad_norm": 0.9498047232627869, "learning_rate": 0.0001061800586940309, "loss": 1.0848, "step": 24965 }, { "epoch": 0.9613089509143408, "grad_norm": 1.6244068145751953, "learning_rate": 0.0001061498766085578, "loss": 1.1116, "step": 24970 }, { "epoch": 0.9615014436958614, "grad_norm": 1.0797539949417114, "learning_rate": 0.00010611969396071926, "loss": 1.025, "step": 24975 }, { "epoch": 0.9616939364773821, "grad_norm": 1.1946651935577393, "learning_rate": 0.00010608951075327522, "loss": 1.3113, "step": 24980 }, { "epoch": 0.9618864292589028, "grad_norm": 0.915337324142456, "learning_rate": 0.00010605932698898576, "loss": 1.1864, "step": 24985 }, { "epoch": 0.9620789220404234, "grad_norm": 0.821142315864563, "learning_rate": 0.00010602914267061101, "loss": 0.9772, "step": 24990 }, { "epoch": 0.9622714148219442, "grad_norm": 1.0821641683578491, "learning_rate": 0.00010599895780091106, "loss": 0.9488, "step": 24995 }, { "epoch": 0.9624639076034649, "grad_norm": 1.600436806678772, "learning_rate": 0.0001059687723826462, "loss": 1.1065, "step": 25000 }, { "epoch": 0.9626564003849856, "grad_norm": 1.296973466873169, "learning_rate": 0.00010593858641857664, "loss": 1.0329, "step": 25005 }, { "epoch": 0.9628488931665062, "grad_norm": 1.2150559425354004, "learning_rate": 0.00010590839991146269, "loss": 1.176, "step": 25010 }, { "epoch": 0.9630413859480269, "grad_norm": 1.5039284229278564, "learning_rate": 0.00010587821286406469, "loss": 1.055, "step": 25015 }, { "epoch": 0.9632338787295477, "grad_norm": 1.5378166437149048, "learning_rate": 0.00010584802527914308, "loss": 1.1067, "step": 25020 }, { "epoch": 0.9634263715110684, "grad_norm": 1.1952167749404907, "learning_rate": 0.0001058178371594583, "loss": 1.1889, "step": 25025 }, { "epoch": 0.963618864292589, "grad_norm": 1.8712360858917236, "learning_rate": 0.00010578764850777084, "loss": 1.2401, "step": 25030 }, { "epoch": 0.9638113570741097, "grad_norm": 1.0512574911117554, "learning_rate": 0.00010575745932684125, "loss": 1.1021, "step": 25035 }, { "epoch": 0.9640038498556304, "grad_norm": 1.1200644969940186, "learning_rate": 0.00010572726961943017, "loss": 1.1023, "step": 25040 }, { "epoch": 0.9641963426371511, "grad_norm": 1.448231816291809, "learning_rate": 0.00010569707938829821, "loss": 0.979, "step": 25045 }, { "epoch": 0.9643888354186718, "grad_norm": 2.4415132999420166, "learning_rate": 0.00010566688863620608, "loss": 1.037, "step": 25050 }, { "epoch": 0.9645813282001925, "grad_norm": 1.2661056518554688, "learning_rate": 0.00010563669736591453, "loss": 1.2452, "step": 25055 }, { "epoch": 0.9647738209817132, "grad_norm": 1.1040503978729248, "learning_rate": 0.00010560650558018434, "loss": 1.0118, "step": 25060 }, { "epoch": 0.9649663137632338, "grad_norm": 1.2488709688186646, "learning_rate": 0.00010557631328177636, "loss": 1.0467, "step": 25065 }, { "epoch": 0.9651588065447546, "grad_norm": 1.267828106880188, "learning_rate": 0.00010554612047345147, "loss": 0.9722, "step": 25070 }, { "epoch": 0.9653512993262753, "grad_norm": 1.5606194734573364, "learning_rate": 0.00010551592715797058, "loss": 1.0212, "step": 25075 }, { "epoch": 0.965543792107796, "grad_norm": 1.2547963857650757, "learning_rate": 0.0001054857333380947, "loss": 0.9649, "step": 25080 }, { "epoch": 0.9657362848893166, "grad_norm": 1.1586534976959229, "learning_rate": 0.00010545553901658486, "loss": 1.1205, "step": 25085 }, { "epoch": 0.9659287776708373, "grad_norm": 1.03237783908844, "learning_rate": 0.00010542534419620214, "loss": 0.9806, "step": 25090 }, { "epoch": 0.9661212704523581, "grad_norm": 1.0828287601470947, "learning_rate": 0.00010539514887970758, "loss": 1.1297, "step": 25095 }, { "epoch": 0.9663137632338787, "grad_norm": 1.0693068504333496, "learning_rate": 0.00010536495306986243, "loss": 0.9167, "step": 25100 }, { "epoch": 0.9665062560153994, "grad_norm": 0.9770272374153137, "learning_rate": 0.00010533475676942785, "loss": 1.1324, "step": 25105 }, { "epoch": 0.9666987487969201, "grad_norm": 1.3809349536895752, "learning_rate": 0.00010530455998116511, "loss": 1.2411, "step": 25110 }, { "epoch": 0.9668912415784409, "grad_norm": 1.8168786764144897, "learning_rate": 0.00010527436270783551, "loss": 1.0129, "step": 25115 }, { "epoch": 0.9670837343599615, "grad_norm": 1.3123118877410889, "learning_rate": 0.00010524416495220035, "loss": 1.0677, "step": 25120 }, { "epoch": 0.9672762271414822, "grad_norm": 1.329199194908142, "learning_rate": 0.00010521396671702106, "loss": 1.0804, "step": 25125 }, { "epoch": 0.9674687199230029, "grad_norm": 1.5670257806777954, "learning_rate": 0.00010518376800505907, "loss": 1.1608, "step": 25130 }, { "epoch": 0.9676612127045235, "grad_norm": 1.2058695554733276, "learning_rate": 0.00010515356881907581, "loss": 1.2903, "step": 25135 }, { "epoch": 0.9678537054860443, "grad_norm": 1.4438813924789429, "learning_rate": 0.0001051233691618328, "loss": 1.2552, "step": 25140 }, { "epoch": 0.968046198267565, "grad_norm": 1.1188582181930542, "learning_rate": 0.00010509316903609167, "loss": 1.1011, "step": 25145 }, { "epoch": 0.9682386910490857, "grad_norm": 1.0020146369934082, "learning_rate": 0.00010506296844461394, "loss": 1.3431, "step": 25150 }, { "epoch": 0.9684311838306063, "grad_norm": 1.0120351314544678, "learning_rate": 0.00010503276739016128, "loss": 1.0744, "step": 25155 }, { "epoch": 0.968623676612127, "grad_norm": 1.0021151304244995, "learning_rate": 0.0001050025658754954, "loss": 1.0356, "step": 25160 }, { "epoch": 0.9688161693936478, "grad_norm": 1.2012884616851807, "learning_rate": 0.00010497236390337801, "loss": 1.0556, "step": 25165 }, { "epoch": 0.9690086621751685, "grad_norm": 2.0097708702087402, "learning_rate": 0.00010494216147657086, "loss": 1.1924, "step": 25170 }, { "epoch": 0.9692011549566891, "grad_norm": 1.8217684030532837, "learning_rate": 0.0001049119585978358, "loss": 1.1365, "step": 25175 }, { "epoch": 0.9693936477382098, "grad_norm": 2.422804832458496, "learning_rate": 0.00010488175526993466, "loss": 1.1675, "step": 25180 }, { "epoch": 0.9695861405197305, "grad_norm": 1.59537672996521, "learning_rate": 0.00010485155149562933, "loss": 1.018, "step": 25185 }, { "epoch": 0.9697786333012512, "grad_norm": 1.0549684762954712, "learning_rate": 0.00010482134727768175, "loss": 1.076, "step": 25190 }, { "epoch": 0.9699711260827719, "grad_norm": 1.1994887590408325, "learning_rate": 0.00010479114261885395, "loss": 1.0012, "step": 25195 }, { "epoch": 0.9701636188642926, "grad_norm": 1.6719814538955688, "learning_rate": 0.00010476093752190784, "loss": 1.1797, "step": 25200 }, { "epoch": 0.9703561116458133, "grad_norm": 1.5435569286346436, "learning_rate": 0.00010473073198960555, "loss": 1.0019, "step": 25205 }, { "epoch": 0.9705486044273339, "grad_norm": 1.0406394004821777, "learning_rate": 0.00010470052602470917, "loss": 1.0301, "step": 25210 }, { "epoch": 0.9707410972088547, "grad_norm": 0.8994914889335632, "learning_rate": 0.0001046703196299808, "loss": 0.9683, "step": 25215 }, { "epoch": 0.9709335899903754, "grad_norm": 1.6628937721252441, "learning_rate": 0.00010464011280818266, "loss": 1.2508, "step": 25220 }, { "epoch": 0.971126082771896, "grad_norm": 1.7515249252319336, "learning_rate": 0.00010460990556207693, "loss": 1.3141, "step": 25225 }, { "epoch": 0.9713185755534167, "grad_norm": 1.4734959602355957, "learning_rate": 0.00010457969789442587, "loss": 1.0589, "step": 25230 }, { "epoch": 0.9715110683349374, "grad_norm": 1.102239966392517, "learning_rate": 0.00010454948980799179, "loss": 1.0115, "step": 25235 }, { "epoch": 0.9717035611164582, "grad_norm": 1.9879227876663208, "learning_rate": 0.000104519281305537, "loss": 0.9781, "step": 25240 }, { "epoch": 0.9718960538979788, "grad_norm": 1.2674641609191895, "learning_rate": 0.00010448907238982387, "loss": 1.1459, "step": 25245 }, { "epoch": 0.9720885466794995, "grad_norm": 1.922810673713684, "learning_rate": 0.00010445886306361479, "loss": 1.0625, "step": 25250 }, { "epoch": 0.9722810394610202, "grad_norm": 1.0224851369857788, "learning_rate": 0.00010442865332967225, "loss": 0.9894, "step": 25255 }, { "epoch": 0.9724735322425409, "grad_norm": 1.3521889448165894, "learning_rate": 0.00010439844319075868, "loss": 1.0474, "step": 25260 }, { "epoch": 0.9726660250240616, "grad_norm": 1.4089123010635376, "learning_rate": 0.00010436823264963662, "loss": 1.0874, "step": 25265 }, { "epoch": 0.9728585178055823, "grad_norm": 1.0368316173553467, "learning_rate": 0.00010433802170906863, "loss": 1.0466, "step": 25270 }, { "epoch": 0.973051010587103, "grad_norm": 1.541629672050476, "learning_rate": 0.00010430781037181727, "loss": 1.0565, "step": 25275 }, { "epoch": 0.9732435033686236, "grad_norm": 0.7525898814201355, "learning_rate": 0.00010427759864064521, "loss": 0.9438, "step": 25280 }, { "epoch": 0.9734359961501444, "grad_norm": 1.5569766759872437, "learning_rate": 0.00010424738651831507, "loss": 1.1061, "step": 25285 }, { "epoch": 0.9736284889316651, "grad_norm": 1.4111804962158203, "learning_rate": 0.0001042171740075896, "loss": 1.2118, "step": 25290 }, { "epoch": 0.9738209817131858, "grad_norm": 1.2276591062545776, "learning_rate": 0.00010418696111123148, "loss": 1.1494, "step": 25295 }, { "epoch": 0.9740134744947064, "grad_norm": 1.320253849029541, "learning_rate": 0.00010415674783200349, "loss": 1.1241, "step": 25300 }, { "epoch": 0.9742059672762271, "grad_norm": 1.6126363277435303, "learning_rate": 0.00010412653417266849, "loss": 1.068, "step": 25305 }, { "epoch": 0.9743984600577479, "grad_norm": 1.2316113710403442, "learning_rate": 0.00010409632013598924, "loss": 1.124, "step": 25310 }, { "epoch": 0.9745909528392686, "grad_norm": 1.3497854471206665, "learning_rate": 0.00010406610572472866, "loss": 1.0544, "step": 25315 }, { "epoch": 0.9747834456207892, "grad_norm": 1.1057459115982056, "learning_rate": 0.00010403589094164966, "loss": 0.982, "step": 25320 }, { "epoch": 0.9749759384023099, "grad_norm": 1.654336929321289, "learning_rate": 0.00010400567578951515, "loss": 1.1425, "step": 25325 }, { "epoch": 0.9751684311838306, "grad_norm": 0.8302839398384094, "learning_rate": 0.00010397546027108814, "loss": 1.0543, "step": 25330 }, { "epoch": 0.9753609239653513, "grad_norm": 3.0035483837127686, "learning_rate": 0.00010394524438913161, "loss": 1.0277, "step": 25335 }, { "epoch": 0.975553416746872, "grad_norm": 1.4531986713409424, "learning_rate": 0.00010391502814640864, "loss": 1.1209, "step": 25340 }, { "epoch": 0.9757459095283927, "grad_norm": 1.7825361490249634, "learning_rate": 0.00010388481154568224, "loss": 0.9739, "step": 25345 }, { "epoch": 0.9759384023099134, "grad_norm": 0.9865066409111023, "learning_rate": 0.00010385459458971558, "loss": 1.0587, "step": 25350 }, { "epoch": 0.976130895091434, "grad_norm": 1.5732641220092773, "learning_rate": 0.00010382437728127176, "loss": 1.1509, "step": 25355 }, { "epoch": 0.9763233878729548, "grad_norm": 1.3935140371322632, "learning_rate": 0.000103794159623114, "loss": 1.0582, "step": 25360 }, { "epoch": 0.9765158806544755, "grad_norm": 1.156906247138977, "learning_rate": 0.0001037639416180055, "loss": 0.9355, "step": 25365 }, { "epoch": 0.9767083734359961, "grad_norm": 1.3732504844665527, "learning_rate": 0.0001037337232687094, "loss": 1.085, "step": 25370 }, { "epoch": 0.9769008662175168, "grad_norm": 1.41510009765625, "learning_rate": 0.00010370350457798907, "loss": 1.0622, "step": 25375 }, { "epoch": 0.9770933589990375, "grad_norm": 2.79821515083313, "learning_rate": 0.00010367328554860783, "loss": 1.1103, "step": 25380 }, { "epoch": 0.9772858517805583, "grad_norm": 0.6538259387016296, "learning_rate": 0.00010364306618332889, "loss": 0.8355, "step": 25385 }, { "epoch": 0.9774783445620789, "grad_norm": 1.126719355583191, "learning_rate": 0.00010361284648491571, "loss": 1.0243, "step": 25390 }, { "epoch": 0.9776708373435996, "grad_norm": 1.7321900129318237, "learning_rate": 0.00010358262645613166, "loss": 1.1714, "step": 25395 }, { "epoch": 0.9778633301251203, "grad_norm": 1.1369127035140991, "learning_rate": 0.00010355240609974015, "loss": 0.9301, "step": 25400 }, { "epoch": 0.978055822906641, "grad_norm": 1.0569565296173096, "learning_rate": 0.00010352218541850461, "loss": 1.1441, "step": 25405 }, { "epoch": 0.9782483156881617, "grad_norm": 1.2385330200195312, "learning_rate": 0.00010349196441518855, "loss": 1.0675, "step": 25410 }, { "epoch": 0.9784408084696824, "grad_norm": 1.161966323852539, "learning_rate": 0.00010346174309255552, "loss": 1.0511, "step": 25415 }, { "epoch": 0.9786333012512031, "grad_norm": 1.3913847208023071, "learning_rate": 0.00010343152145336899, "loss": 1.2011, "step": 25420 }, { "epoch": 0.9788257940327237, "grad_norm": 2.150585412979126, "learning_rate": 0.00010340129950039253, "loss": 0.9328, "step": 25425 }, { "epoch": 0.9790182868142445, "grad_norm": 0.9360420107841492, "learning_rate": 0.00010337107723638979, "loss": 1.0759, "step": 25430 }, { "epoch": 0.9792107795957652, "grad_norm": 1.2746261358261108, "learning_rate": 0.00010334085466412435, "loss": 1.0884, "step": 25435 }, { "epoch": 0.9794032723772859, "grad_norm": 1.4159737825393677, "learning_rate": 0.00010331063178635991, "loss": 0.8767, "step": 25440 }, { "epoch": 0.9795957651588065, "grad_norm": 1.0822529792785645, "learning_rate": 0.00010328040860586013, "loss": 1.0087, "step": 25445 }, { "epoch": 0.9797882579403272, "grad_norm": 1.3765298128128052, "learning_rate": 0.00010325018512538868, "loss": 1.1506, "step": 25450 }, { "epoch": 0.979980750721848, "grad_norm": 1.1696947813034058, "learning_rate": 0.00010321996134770935, "loss": 1.1162, "step": 25455 }, { "epoch": 0.9801732435033687, "grad_norm": 1.0629351139068604, "learning_rate": 0.0001031897372755859, "loss": 1.0682, "step": 25460 }, { "epoch": 0.9803657362848893, "grad_norm": 1.7273883819580078, "learning_rate": 0.00010315951291178208, "loss": 1.0015, "step": 25465 }, { "epoch": 0.98055822906641, "grad_norm": 1.154524803161621, "learning_rate": 0.00010312928825906172, "loss": 1.0113, "step": 25470 }, { "epoch": 0.9807507218479307, "grad_norm": 1.5868383646011353, "learning_rate": 0.0001030990633201887, "loss": 1.1021, "step": 25475 }, { "epoch": 0.9809432146294514, "grad_norm": 1.425208568572998, "learning_rate": 0.00010306883809792687, "loss": 1.0993, "step": 25480 }, { "epoch": 0.9811357074109721, "grad_norm": 1.5670615434646606, "learning_rate": 0.00010303861259504011, "loss": 1.1032, "step": 25485 }, { "epoch": 0.9813282001924928, "grad_norm": 0.9901431202888489, "learning_rate": 0.00010300838681429239, "loss": 1.0594, "step": 25490 }, { "epoch": 0.9815206929740135, "grad_norm": 1.2887781858444214, "learning_rate": 0.0001029781607584476, "loss": 1.0748, "step": 25495 }, { "epoch": 0.9817131857555341, "grad_norm": 1.1845282316207886, "learning_rate": 0.00010294793443026974, "loss": 1.0229, "step": 25500 }, { "epoch": 0.9819056785370549, "grad_norm": 1.3194280862808228, "learning_rate": 0.0001029177078325228, "loss": 1.0796, "step": 25505 }, { "epoch": 0.9820981713185756, "grad_norm": 1.5246963500976562, "learning_rate": 0.0001028874809679708, "loss": 1.2291, "step": 25510 }, { "epoch": 0.9822906641000962, "grad_norm": 1.210331678390503, "learning_rate": 0.00010285725383937782, "loss": 0.9774, "step": 25515 }, { "epoch": 0.9824831568816169, "grad_norm": 1.1501210927963257, "learning_rate": 0.00010282702644950788, "loss": 0.8948, "step": 25520 }, { "epoch": 0.9826756496631376, "grad_norm": 1.2343779802322388, "learning_rate": 0.0001027967988011251, "loss": 1.1353, "step": 25525 }, { "epoch": 0.9828681424446584, "grad_norm": 0.9154959321022034, "learning_rate": 0.00010276657089699359, "loss": 1.1039, "step": 25530 }, { "epoch": 0.983060635226179, "grad_norm": 2.102421283721924, "learning_rate": 0.00010273634273987754, "loss": 1.1101, "step": 25535 }, { "epoch": 0.9832531280076997, "grad_norm": 1.5680655241012573, "learning_rate": 0.00010270611433254102, "loss": 0.9902, "step": 25540 }, { "epoch": 0.9834456207892204, "grad_norm": 0.9244110584259033, "learning_rate": 0.0001026758856777483, "loss": 0.9179, "step": 25545 }, { "epoch": 0.983638113570741, "grad_norm": 0.8917207717895508, "learning_rate": 0.00010264565677826356, "loss": 0.987, "step": 25550 }, { "epoch": 0.9838306063522618, "grad_norm": 1.52036452293396, "learning_rate": 0.00010261542763685104, "loss": 0.9879, "step": 25555 }, { "epoch": 0.9840230991337825, "grad_norm": 1.535866618156433, "learning_rate": 0.000102585198256275, "loss": 1.121, "step": 25560 }, { "epoch": 0.9842155919153032, "grad_norm": 1.3526102304458618, "learning_rate": 0.00010255496863929965, "loss": 1.0613, "step": 25565 }, { "epoch": 0.9844080846968238, "grad_norm": 1.4781625270843506, "learning_rate": 0.0001025247387886894, "loss": 1.0461, "step": 25570 }, { "epoch": 0.9846005774783445, "grad_norm": 1.0873816013336182, "learning_rate": 0.00010249450870720849, "loss": 0.9743, "step": 25575 }, { "epoch": 0.9847930702598653, "grad_norm": 1.3945918083190918, "learning_rate": 0.00010246427839762127, "loss": 1.1183, "step": 25580 }, { "epoch": 0.984985563041386, "grad_norm": 2.6019561290740967, "learning_rate": 0.00010243404786269215, "loss": 1.0604, "step": 25585 }, { "epoch": 0.9851780558229066, "grad_norm": 1.1407113075256348, "learning_rate": 0.00010240381710518542, "loss": 0.9804, "step": 25590 }, { "epoch": 0.9853705486044273, "grad_norm": 1.5416691303253174, "learning_rate": 0.00010237358612786558, "loss": 1.097, "step": 25595 }, { "epoch": 0.9855630413859481, "grad_norm": 1.0081915855407715, "learning_rate": 0.00010234335493349703, "loss": 1.0342, "step": 25600 }, { "epoch": 0.9857555341674687, "grad_norm": 1.0994783639907837, "learning_rate": 0.00010231312352484417, "loss": 1.2357, "step": 25605 }, { "epoch": 0.9859480269489894, "grad_norm": 1.106605887413025, "learning_rate": 0.00010228289190467146, "loss": 1.0279, "step": 25610 }, { "epoch": 0.9861405197305101, "grad_norm": 0.9887366890907288, "learning_rate": 0.00010225266007574345, "loss": 1.1127, "step": 25615 }, { "epoch": 0.9863330125120308, "grad_norm": 1.079148530960083, "learning_rate": 0.00010222242804082458, "loss": 1.1353, "step": 25620 }, { "epoch": 0.9865255052935515, "grad_norm": 1.2472405433654785, "learning_rate": 0.00010219219580267938, "loss": 1.1579, "step": 25625 }, { "epoch": 0.9867179980750722, "grad_norm": 1.0981230735778809, "learning_rate": 0.00010216196336407242, "loss": 1.1264, "step": 25630 }, { "epoch": 0.9869104908565929, "grad_norm": 1.4295231103897095, "learning_rate": 0.00010213173072776823, "loss": 1.0099, "step": 25635 }, { "epoch": 0.9871029836381136, "grad_norm": 1.3902168273925781, "learning_rate": 0.00010210149789653137, "loss": 1.1308, "step": 25640 }, { "epoch": 0.9872954764196342, "grad_norm": 1.2584228515625, "learning_rate": 0.00010207126487312646, "loss": 1.0638, "step": 25645 }, { "epoch": 0.987487969201155, "grad_norm": 1.27899968624115, "learning_rate": 0.00010204103166031809, "loss": 1.0931, "step": 25650 }, { "epoch": 0.9876804619826757, "grad_norm": 2.330070972442627, "learning_rate": 0.00010201079826087088, "loss": 1.1429, "step": 25655 }, { "epoch": 0.9878729547641963, "grad_norm": 1.0945899486541748, "learning_rate": 0.00010198056467754953, "loss": 1.0337, "step": 25660 }, { "epoch": 0.988065447545717, "grad_norm": 1.3564465045928955, "learning_rate": 0.00010195033091311866, "loss": 1.1632, "step": 25665 }, { "epoch": 0.9882579403272377, "grad_norm": 1.0468459129333496, "learning_rate": 0.0001019200969703429, "loss": 1.1247, "step": 25670 }, { "epoch": 0.9884504331087585, "grad_norm": 1.7445849180221558, "learning_rate": 0.00010188986285198703, "loss": 0.9937, "step": 25675 }, { "epoch": 0.9886429258902791, "grad_norm": 0.9142084121704102, "learning_rate": 0.0001018596285608157, "loss": 1.1336, "step": 25680 }, { "epoch": 0.9888354186717998, "grad_norm": 1.7198858261108398, "learning_rate": 0.00010182939409959366, "loss": 1.1145, "step": 25685 }, { "epoch": 0.9890279114533205, "grad_norm": 1.204819679260254, "learning_rate": 0.00010179915947108565, "loss": 1.0323, "step": 25690 }, { "epoch": 0.9892204042348411, "grad_norm": 0.8539313673973083, "learning_rate": 0.00010176892467805646, "loss": 0.8969, "step": 25695 }, { "epoch": 0.9894128970163619, "grad_norm": 1.1470470428466797, "learning_rate": 0.00010173868972327079, "loss": 1.1075, "step": 25700 }, { "epoch": 0.9896053897978826, "grad_norm": 1.3425363302230835, "learning_rate": 0.00010170845460949345, "loss": 1.178, "step": 25705 }, { "epoch": 0.9897978825794033, "grad_norm": 1.2167271375656128, "learning_rate": 0.00010167821933948929, "loss": 0.9844, "step": 25710 }, { "epoch": 0.9899903753609239, "grad_norm": 1.0317187309265137, "learning_rate": 0.00010164798391602306, "loss": 1.1865, "step": 25715 }, { "epoch": 0.9901828681424446, "grad_norm": 1.983253836631775, "learning_rate": 0.00010161774834185962, "loss": 1.1188, "step": 25720 }, { "epoch": 0.9903753609239654, "grad_norm": 0.8479856848716736, "learning_rate": 0.00010158751261976382, "loss": 1.1431, "step": 25725 }, { "epoch": 0.9905678537054861, "grad_norm": 1.3559181690216064, "learning_rate": 0.0001015572767525005, "loss": 1.1233, "step": 25730 }, { "epoch": 0.9907603464870067, "grad_norm": 0.8876566290855408, "learning_rate": 0.00010152704074283454, "loss": 1.0564, "step": 25735 }, { "epoch": 0.9909528392685274, "grad_norm": 1.189932942390442, "learning_rate": 0.00010149680459353083, "loss": 1.0271, "step": 25740 }, { "epoch": 0.9911453320500482, "grad_norm": 2.000798225402832, "learning_rate": 0.00010146656830735424, "loss": 1.3183, "step": 25745 }, { "epoch": 0.9913378248315688, "grad_norm": 1.558982253074646, "learning_rate": 0.00010143633188706969, "loss": 0.9362, "step": 25750 }, { "epoch": 0.9915303176130895, "grad_norm": 1.8294328451156616, "learning_rate": 0.00010140609533544215, "loss": 0.94, "step": 25755 }, { "epoch": 0.9917228103946102, "grad_norm": 1.833295226097107, "learning_rate": 0.00010137585865523644, "loss": 0.8496, "step": 25760 }, { "epoch": 0.9919153031761309, "grad_norm": 0.8973735570907593, "learning_rate": 0.00010134562184921761, "loss": 1.2634, "step": 25765 }, { "epoch": 0.9921077959576516, "grad_norm": 1.6813448667526245, "learning_rate": 0.00010131538492015056, "loss": 1.1065, "step": 25770 }, { "epoch": 0.9923002887391723, "grad_norm": 1.0313218832015991, "learning_rate": 0.0001012851478708003, "loss": 1.026, "step": 25775 }, { "epoch": 0.992492781520693, "grad_norm": 1.324073314666748, "learning_rate": 0.00010125491070393176, "loss": 1.168, "step": 25780 }, { "epoch": 0.9926852743022136, "grad_norm": 1.5872528553009033, "learning_rate": 0.00010122467342230997, "loss": 0.9647, "step": 25785 }, { "epoch": 0.9928777670837343, "grad_norm": 1.108878254890442, "learning_rate": 0.00010119443602869987, "loss": 0.9983, "step": 25790 }, { "epoch": 0.9930702598652551, "grad_norm": 0.7914887070655823, "learning_rate": 0.00010116419852586652, "loss": 0.974, "step": 25795 }, { "epoch": 0.9932627526467758, "grad_norm": 1.9735599756240845, "learning_rate": 0.00010113396091657492, "loss": 1.1217, "step": 25800 }, { "epoch": 0.9934552454282964, "grad_norm": 1.5698872804641724, "learning_rate": 0.00010110372320359014, "loss": 1.1984, "step": 25805 }, { "epoch": 0.9936477382098171, "grad_norm": 1.5581351518630981, "learning_rate": 0.00010107348538967714, "loss": 1.0301, "step": 25810 }, { "epoch": 0.9938402309913378, "grad_norm": 0.868083655834198, "learning_rate": 0.00010104324747760104, "loss": 1.1439, "step": 25815 }, { "epoch": 0.9940327237728586, "grad_norm": 1.065421462059021, "learning_rate": 0.00010101300947012686, "loss": 1.0275, "step": 25820 }, { "epoch": 0.9942252165543792, "grad_norm": 1.3174570798873901, "learning_rate": 0.00010098277137001967, "loss": 1.0925, "step": 25825 }, { "epoch": 0.9944177093358999, "grad_norm": 1.1705584526062012, "learning_rate": 0.00010095253318004457, "loss": 1.1513, "step": 25830 }, { "epoch": 0.9946102021174206, "grad_norm": 1.9153684377670288, "learning_rate": 0.00010092229490296661, "loss": 0.9746, "step": 25835 }, { "epoch": 0.9948026948989412, "grad_norm": 1.1694215536117554, "learning_rate": 0.00010089205654155087, "loss": 1.1149, "step": 25840 }, { "epoch": 0.994995187680462, "grad_norm": 1.2398862838745117, "learning_rate": 0.00010086181809856248, "loss": 1.0225, "step": 25845 }, { "epoch": 0.9951876804619827, "grad_norm": 0.803766131401062, "learning_rate": 0.00010083157957676657, "loss": 0.9342, "step": 25850 }, { "epoch": 0.9953801732435034, "grad_norm": 1.1521053314208984, "learning_rate": 0.00010080134097892817, "loss": 1.1817, "step": 25855 }, { "epoch": 0.995572666025024, "grad_norm": 1.6817054748535156, "learning_rate": 0.00010077110230781246, "loss": 0.9607, "step": 25860 }, { "epoch": 0.9957651588065447, "grad_norm": 1.8072839975357056, "learning_rate": 0.00010074086356618457, "loss": 1.2474, "step": 25865 }, { "epoch": 0.9959576515880655, "grad_norm": 1.1039899587631226, "learning_rate": 0.00010071062475680959, "loss": 1.0931, "step": 25870 }, { "epoch": 0.9961501443695862, "grad_norm": 1.3213703632354736, "learning_rate": 0.0001006803858824527, "loss": 0.9709, "step": 25875 }, { "epoch": 0.9963426371511068, "grad_norm": 2.1169626712799072, "learning_rate": 0.00010065014694587902, "loss": 1.0185, "step": 25880 }, { "epoch": 0.9965351299326275, "grad_norm": 1.997347116470337, "learning_rate": 0.00010061990794985372, "loss": 0.9641, "step": 25885 }, { "epoch": 0.9967276227141482, "grad_norm": 2.4005537033081055, "learning_rate": 0.00010058966889714192, "loss": 0.956, "step": 25890 }, { "epoch": 0.996920115495669, "grad_norm": 1.1593470573425293, "learning_rate": 0.00010055942979050886, "loss": 0.9971, "step": 25895 }, { "epoch": 0.9971126082771896, "grad_norm": 1.5145149230957031, "learning_rate": 0.0001005291906327196, "loss": 1.029, "step": 25900 }, { "epoch": 0.9973051010587103, "grad_norm": 1.6127921342849731, "learning_rate": 0.00010049895142653936, "loss": 1.0643, "step": 25905 }, { "epoch": 0.997497593840231, "grad_norm": 1.326854944229126, "learning_rate": 0.00010046871217473334, "loss": 1.0119, "step": 25910 }, { "epoch": 0.9976900866217517, "grad_norm": 1.0918645858764648, "learning_rate": 0.00010043847288006666, "loss": 1.2797, "step": 25915 }, { "epoch": 0.9978825794032724, "grad_norm": 1.0558148622512817, "learning_rate": 0.00010040823354530457, "loss": 1.0526, "step": 25920 }, { "epoch": 0.9980750721847931, "grad_norm": 1.1978880167007446, "learning_rate": 0.00010037799417321222, "loss": 1.0564, "step": 25925 }, { "epoch": 0.9982675649663137, "grad_norm": 1.9178636074066162, "learning_rate": 0.00010034775476655482, "loss": 1.068, "step": 25930 }, { "epoch": 0.9984600577478344, "grad_norm": 1.7356677055358887, "learning_rate": 0.00010031751532809755, "loss": 1.2817, "step": 25935 }, { "epoch": 0.9986525505293552, "grad_norm": 1.0780870914459229, "learning_rate": 0.00010028727586060558, "loss": 1.0834, "step": 25940 }, { "epoch": 0.9988450433108759, "grad_norm": 1.4201254844665527, "learning_rate": 0.00010025703636684416, "loss": 0.9698, "step": 25945 }, { "epoch": 0.9990375360923965, "grad_norm": 1.0446133613586426, "learning_rate": 0.00010022679684957845, "loss": 1.1268, "step": 25950 }, { "epoch": 0.9992300288739172, "grad_norm": 1.2714285850524902, "learning_rate": 0.0001001965573115737, "loss": 1.2019, "step": 25955 }, { "epoch": 0.9994225216554379, "grad_norm": 0.9263586401939392, "learning_rate": 0.00010016631775559506, "loss": 1.1232, "step": 25960 }, { "epoch": 0.9996150144369587, "grad_norm": 2.6559371948242188, "learning_rate": 0.00010013607818440775, "loss": 1.1367, "step": 25965 }, { "epoch": 0.9998075072184793, "grad_norm": 1.6373353004455566, "learning_rate": 0.00010010583860077703, "loss": 1.002, "step": 25970 }, { "epoch": 1.0, "grad_norm": 2.38175630569458, "learning_rate": 0.00010007559900746805, "loss": 1.198, "step": 25975 }, { "epoch": 1.0001924927815207, "grad_norm": 0.9677699208259583, "learning_rate": 0.00010004535940724604, "loss": 0.9076, "step": 25980 }, { "epoch": 1.0003849855630413, "grad_norm": 1.5017547607421875, "learning_rate": 0.00010001511980287623, "loss": 0.9611, "step": 25985 }, { "epoch": 1.000577478344562, "grad_norm": 0.8973893523216248, "learning_rate": 9.998488019712379e-05, "loss": 0.8939, "step": 25990 }, { "epoch": 1.0007699711260827, "grad_norm": 1.614953875541687, "learning_rate": 9.995464059275396e-05, "loss": 0.6977, "step": 25995 }, { "epoch": 1.0009624639076036, "grad_norm": 2.5739526748657227, "learning_rate": 9.992440099253196e-05, "loss": 0.967, "step": 26000 }, { "epoch": 1.0011549566891242, "grad_norm": 1.2019327878952026, "learning_rate": 9.989416139922301e-05, "loss": 0.8387, "step": 26005 }, { "epoch": 1.001347449470645, "grad_norm": 1.8093628883361816, "learning_rate": 9.986392181559223e-05, "loss": 0.9027, "step": 26010 }, { "epoch": 1.0015399422521656, "grad_norm": 1.4864177703857422, "learning_rate": 9.983368224440496e-05, "loss": 0.7639, "step": 26015 }, { "epoch": 1.0017324350336863, "grad_norm": 1.123429775238037, "learning_rate": 9.980344268842634e-05, "loss": 0.7783, "step": 26020 }, { "epoch": 1.001924927815207, "grad_norm": 1.5506818294525146, "learning_rate": 9.977320315042154e-05, "loss": 0.7143, "step": 26025 }, { "epoch": 1.0021174205967276, "grad_norm": 2.1665854454040527, "learning_rate": 9.974296363315585e-05, "loss": 0.9101, "step": 26030 }, { "epoch": 1.0023099133782483, "grad_norm": 1.8006185293197632, "learning_rate": 9.971272413939444e-05, "loss": 0.9212, "step": 26035 }, { "epoch": 1.002502406159769, "grad_norm": 1.2385761737823486, "learning_rate": 9.968248467190245e-05, "loss": 0.8715, "step": 26040 }, { "epoch": 1.0026948989412896, "grad_norm": 1.4337763786315918, "learning_rate": 9.965224523344519e-05, "loss": 0.6949, "step": 26045 }, { "epoch": 1.0028873917228105, "grad_norm": 1.586151123046875, "learning_rate": 9.96220058267878e-05, "loss": 0.913, "step": 26050 }, { "epoch": 1.0030798845043312, "grad_norm": 1.3005743026733398, "learning_rate": 9.959176645469542e-05, "loss": 0.8218, "step": 26055 }, { "epoch": 1.0032723772858518, "grad_norm": 0.735365629196167, "learning_rate": 9.956152711993335e-05, "loss": 0.7206, "step": 26060 }, { "epoch": 1.0034648700673725, "grad_norm": 1.7162381410598755, "learning_rate": 9.95312878252667e-05, "loss": 0.7916, "step": 26065 }, { "epoch": 1.0036573628488932, "grad_norm": 0.894173800945282, "learning_rate": 9.950104857346064e-05, "loss": 0.9683, "step": 26070 }, { "epoch": 1.0038498556304138, "grad_norm": 1.6856434345245361, "learning_rate": 9.947080936728044e-05, "loss": 0.7997, "step": 26075 }, { "epoch": 1.0040423484119345, "grad_norm": 1.2891976833343506, "learning_rate": 9.94405702094912e-05, "loss": 0.802, "step": 26080 }, { "epoch": 1.0042348411934552, "grad_norm": 1.5862544775009155, "learning_rate": 9.941033110285809e-05, "loss": 0.857, "step": 26085 }, { "epoch": 1.0044273339749759, "grad_norm": 2.165689706802368, "learning_rate": 9.93800920501463e-05, "loss": 1.01, "step": 26090 }, { "epoch": 1.0046198267564965, "grad_norm": 1.3247888088226318, "learning_rate": 9.9349853054121e-05, "loss": 0.872, "step": 26095 }, { "epoch": 1.0048123195380174, "grad_norm": 1.7285022735595703, "learning_rate": 9.931961411754732e-05, "loss": 0.913, "step": 26100 }, { "epoch": 1.005004812319538, "grad_norm": 1.504143238067627, "learning_rate": 9.928937524319043e-05, "loss": 0.7396, "step": 26105 }, { "epoch": 1.0051973051010588, "grad_norm": 1.633792519569397, "learning_rate": 9.925913643381546e-05, "loss": 0.8946, "step": 26110 }, { "epoch": 1.0053897978825794, "grad_norm": 1.8839080333709717, "learning_rate": 9.922889769218755e-05, "loss": 0.8844, "step": 26115 }, { "epoch": 1.0055822906641, "grad_norm": 0.9367867708206177, "learning_rate": 9.919865902107185e-05, "loss": 0.8064, "step": 26120 }, { "epoch": 1.0057747834456208, "grad_norm": 1.0992298126220703, "learning_rate": 9.916842042323348e-05, "loss": 0.8674, "step": 26125 }, { "epoch": 1.0059672762271414, "grad_norm": 1.031310796737671, "learning_rate": 9.913818190143751e-05, "loss": 0.8082, "step": 26130 }, { "epoch": 1.006159769008662, "grad_norm": 1.5799235105514526, "learning_rate": 9.910794345844914e-05, "loss": 0.7128, "step": 26135 }, { "epoch": 1.0063522617901828, "grad_norm": 1.5742192268371582, "learning_rate": 9.907770509703344e-05, "loss": 0.8554, "step": 26140 }, { "epoch": 1.0065447545717037, "grad_norm": 1.3698316812515259, "learning_rate": 9.904746681995544e-05, "loss": 0.6932, "step": 26145 }, { "epoch": 1.0067372473532243, "grad_norm": 1.2731399536132812, "learning_rate": 9.901722862998034e-05, "loss": 0.9624, "step": 26150 }, { "epoch": 1.006929740134745, "grad_norm": 1.0184792280197144, "learning_rate": 9.898699052987318e-05, "loss": 0.8248, "step": 26155 }, { "epoch": 1.0071222329162657, "grad_norm": 1.1166132688522339, "learning_rate": 9.895675252239896e-05, "loss": 0.7907, "step": 26160 }, { "epoch": 1.0073147256977864, "grad_norm": 2.4963834285736084, "learning_rate": 9.892651461032287e-05, "loss": 0.8758, "step": 26165 }, { "epoch": 1.007507218479307, "grad_norm": 2.192413091659546, "learning_rate": 9.889627679640991e-05, "loss": 0.9262, "step": 26170 }, { "epoch": 1.0076997112608277, "grad_norm": 1.760149598121643, "learning_rate": 9.886603908342508e-05, "loss": 0.8604, "step": 26175 }, { "epoch": 1.0078922040423484, "grad_norm": 1.2598868608474731, "learning_rate": 9.88358014741335e-05, "loss": 0.9674, "step": 26180 }, { "epoch": 1.008084696823869, "grad_norm": 1.29741632938385, "learning_rate": 9.880556397130018e-05, "loss": 0.8107, "step": 26185 }, { "epoch": 1.0082771896053897, "grad_norm": 1.4447206258773804, "learning_rate": 9.877532657769006e-05, "loss": 0.9182, "step": 26190 }, { "epoch": 1.0084696823869106, "grad_norm": 1.8422036170959473, "learning_rate": 9.874508929606827e-05, "loss": 1.0373, "step": 26195 }, { "epoch": 1.0086621751684313, "grad_norm": 1.393976092338562, "learning_rate": 9.871485212919974e-05, "loss": 0.8484, "step": 26200 }, { "epoch": 1.008854667949952, "grad_norm": 1.280700922012329, "learning_rate": 9.868461507984945e-05, "loss": 0.7288, "step": 26205 }, { "epoch": 1.0090471607314726, "grad_norm": 1.8476749658584595, "learning_rate": 9.86543781507824e-05, "loss": 0.9356, "step": 26210 }, { "epoch": 1.0092396535129933, "grad_norm": 1.1410245895385742, "learning_rate": 9.862414134476358e-05, "loss": 0.7967, "step": 26215 }, { "epoch": 1.009432146294514, "grad_norm": 1.401632308959961, "learning_rate": 9.859390466455789e-05, "loss": 0.825, "step": 26220 }, { "epoch": 1.0096246390760346, "grad_norm": 1.705074667930603, "learning_rate": 9.856366811293033e-05, "loss": 1.0509, "step": 26225 }, { "epoch": 1.0098171318575553, "grad_norm": 1.788540244102478, "learning_rate": 9.853343169264581e-05, "loss": 0.9138, "step": 26230 }, { "epoch": 1.010009624639076, "grad_norm": 1.9128284454345703, "learning_rate": 9.850319540646919e-05, "loss": 0.931, "step": 26235 }, { "epoch": 1.0102021174205966, "grad_norm": 1.1917674541473389, "learning_rate": 9.847295925716548e-05, "loss": 0.7869, "step": 26240 }, { "epoch": 1.0103946102021175, "grad_norm": 1.351189136505127, "learning_rate": 9.844272324749955e-05, "loss": 0.868, "step": 26245 }, { "epoch": 1.0105871029836382, "grad_norm": 1.3934807777404785, "learning_rate": 9.841248738023619e-05, "loss": 0.9278, "step": 26250 }, { "epoch": 1.0107795957651589, "grad_norm": 1.337436556816101, "learning_rate": 9.83822516581404e-05, "loss": 0.9855, "step": 26255 }, { "epoch": 1.0109720885466795, "grad_norm": 1.5759284496307373, "learning_rate": 9.835201608397695e-05, "loss": 0.6804, "step": 26260 }, { "epoch": 1.0111645813282002, "grad_norm": 1.530190110206604, "learning_rate": 9.832178066051075e-05, "loss": 1.1206, "step": 26265 }, { "epoch": 1.0113570741097209, "grad_norm": 1.9554476737976074, "learning_rate": 9.829154539050657e-05, "loss": 1.0103, "step": 26270 }, { "epoch": 1.0115495668912415, "grad_norm": 1.9727224111557007, "learning_rate": 9.826131027672922e-05, "loss": 0.9634, "step": 26275 }, { "epoch": 1.0117420596727622, "grad_norm": 1.2419236898422241, "learning_rate": 9.823107532194358e-05, "loss": 0.6752, "step": 26280 }, { "epoch": 1.0119345524542829, "grad_norm": 1.707638144493103, "learning_rate": 9.820084052891436e-05, "loss": 0.8883, "step": 26285 }, { "epoch": 1.0121270452358035, "grad_norm": 1.57963228225708, "learning_rate": 9.817060590040633e-05, "loss": 0.8674, "step": 26290 }, { "epoch": 1.0123195380173244, "grad_norm": 0.9437096118927002, "learning_rate": 9.81403714391843e-05, "loss": 0.7648, "step": 26295 }, { "epoch": 1.012512030798845, "grad_norm": 1.6244466304779053, "learning_rate": 9.8110137148013e-05, "loss": 0.8088, "step": 26300 }, { "epoch": 1.0127045235803658, "grad_norm": 1.200982689857483, "learning_rate": 9.807990302965712e-05, "loss": 0.9869, "step": 26305 }, { "epoch": 1.0128970163618864, "grad_norm": 1.7576247453689575, "learning_rate": 9.804966908688137e-05, "loss": 0.7445, "step": 26310 }, { "epoch": 1.0130895091434071, "grad_norm": 1.2843434810638428, "learning_rate": 9.801943532245049e-05, "loss": 0.7024, "step": 26315 }, { "epoch": 1.0132820019249278, "grad_norm": 1.0596975088119507, "learning_rate": 9.79892017391291e-05, "loss": 0.8457, "step": 26320 }, { "epoch": 1.0134744947064485, "grad_norm": 1.8278613090515137, "learning_rate": 9.795896833968193e-05, "loss": 0.9703, "step": 26325 }, { "epoch": 1.0136669874879691, "grad_norm": 1.732739806175232, "learning_rate": 9.792873512687359e-05, "loss": 0.8173, "step": 26330 }, { "epoch": 1.0138594802694898, "grad_norm": 0.9648169279098511, "learning_rate": 9.789850210346864e-05, "loss": 0.8673, "step": 26335 }, { "epoch": 1.0140519730510107, "grad_norm": 1.4606342315673828, "learning_rate": 9.78682692722318e-05, "loss": 0.9835, "step": 26340 }, { "epoch": 1.0142444658325314, "grad_norm": 1.4847335815429688, "learning_rate": 9.783803663592762e-05, "loss": 0.9562, "step": 26345 }, { "epoch": 1.014436958614052, "grad_norm": 1.0714442729949951, "learning_rate": 9.78078041973206e-05, "loss": 0.8459, "step": 26350 }, { "epoch": 1.0146294513955727, "grad_norm": 1.2830302715301514, "learning_rate": 9.777757195917544e-05, "loss": 1.0389, "step": 26355 }, { "epoch": 1.0148219441770934, "grad_norm": 1.007949948310852, "learning_rate": 9.774733992425659e-05, "loss": 0.8296, "step": 26360 }, { "epoch": 1.015014436958614, "grad_norm": 1.4365791082382202, "learning_rate": 9.771710809532853e-05, "loss": 0.8113, "step": 26365 }, { "epoch": 1.0152069297401347, "grad_norm": 1.213368535041809, "learning_rate": 9.768687647515587e-05, "loss": 0.829, "step": 26370 }, { "epoch": 1.0153994225216554, "grad_norm": 1.6301283836364746, "learning_rate": 9.765664506650302e-05, "loss": 0.8657, "step": 26375 }, { "epoch": 1.015591915303176, "grad_norm": 1.4944705963134766, "learning_rate": 9.762641387213442e-05, "loss": 0.8043, "step": 26380 }, { "epoch": 1.0157844080846967, "grad_norm": 1.2845187187194824, "learning_rate": 9.759618289481459e-05, "loss": 0.6866, "step": 26385 }, { "epoch": 1.0159769008662176, "grad_norm": 1.635873556137085, "learning_rate": 9.75659521373079e-05, "loss": 0.9432, "step": 26390 }, { "epoch": 1.0161693936477383, "grad_norm": 0.9494144320487976, "learning_rate": 9.753572160237873e-05, "loss": 0.7428, "step": 26395 }, { "epoch": 1.016361886429259, "grad_norm": 1.2726250886917114, "learning_rate": 9.750549129279153e-05, "loss": 0.6935, "step": 26400 }, { "epoch": 1.0165543792107796, "grad_norm": 1.4167382717132568, "learning_rate": 9.747526121131064e-05, "loss": 1.0576, "step": 26405 }, { "epoch": 1.0167468719923003, "grad_norm": 1.8464453220367432, "learning_rate": 9.744503136070036e-05, "loss": 0.8568, "step": 26410 }, { "epoch": 1.016939364773821, "grad_norm": 1.164482593536377, "learning_rate": 9.741480174372504e-05, "loss": 0.8255, "step": 26415 }, { "epoch": 1.0171318575553416, "grad_norm": 0.8602749109268188, "learning_rate": 9.738457236314898e-05, "loss": 0.7958, "step": 26420 }, { "epoch": 1.0173243503368623, "grad_norm": 1.5463776588439941, "learning_rate": 9.735434322173645e-05, "loss": 0.79, "step": 26425 }, { "epoch": 1.017516843118383, "grad_norm": 1.2181867361068726, "learning_rate": 9.732411432225173e-05, "loss": 0.8152, "step": 26430 }, { "epoch": 1.0177093358999039, "grad_norm": 1.4031577110290527, "learning_rate": 9.729388566745899e-05, "loss": 0.8809, "step": 26435 }, { "epoch": 1.0179018286814245, "grad_norm": 0.7895151376724243, "learning_rate": 9.726365726012249e-05, "loss": 0.8356, "step": 26440 }, { "epoch": 1.0180943214629452, "grad_norm": 1.0852110385894775, "learning_rate": 9.723342910300642e-05, "loss": 0.71, "step": 26445 }, { "epoch": 1.0182868142444659, "grad_norm": 1.8769862651824951, "learning_rate": 9.720320119887494e-05, "loss": 0.8581, "step": 26450 }, { "epoch": 1.0184793070259865, "grad_norm": 1.0951234102249146, "learning_rate": 9.717297355049215e-05, "loss": 0.9184, "step": 26455 }, { "epoch": 1.0186717998075072, "grad_norm": 1.4983422756195068, "learning_rate": 9.714274616062222e-05, "loss": 0.9141, "step": 26460 }, { "epoch": 1.0188642925890279, "grad_norm": 1.0044662952423096, "learning_rate": 9.711251903202923e-05, "loss": 0.77, "step": 26465 }, { "epoch": 1.0190567853705486, "grad_norm": 1.339004635810852, "learning_rate": 9.708229216747721e-05, "loss": 1.0225, "step": 26470 }, { "epoch": 1.0192492781520692, "grad_norm": 1.5618289709091187, "learning_rate": 9.70520655697303e-05, "loss": 0.9058, "step": 26475 }, { "epoch": 1.01944177093359, "grad_norm": 0.7431265115737915, "learning_rate": 9.702183924155246e-05, "loss": 0.8251, "step": 26480 }, { "epoch": 1.0196342637151108, "grad_norm": 1.1725512742996216, "learning_rate": 9.699161318570765e-05, "loss": 0.8755, "step": 26485 }, { "epoch": 1.0198267564966315, "grad_norm": 2.5678439140319824, "learning_rate": 9.696138740495991e-05, "loss": 0.9383, "step": 26490 }, { "epoch": 1.0200192492781521, "grad_norm": 1.203439474105835, "learning_rate": 9.693116190207318e-05, "loss": 0.8122, "step": 26495 }, { "epoch": 1.0202117420596728, "grad_norm": 1.7654101848602295, "learning_rate": 9.690093667981132e-05, "loss": 0.8668, "step": 26500 }, { "epoch": 1.0204042348411935, "grad_norm": 1.5297460556030273, "learning_rate": 9.687071174093831e-05, "loss": 0.9342, "step": 26505 }, { "epoch": 1.0205967276227141, "grad_norm": 1.2737789154052734, "learning_rate": 9.684048708821796e-05, "loss": 0.8387, "step": 26510 }, { "epoch": 1.0207892204042348, "grad_norm": 1.0593212842941284, "learning_rate": 9.681026272441414e-05, "loss": 0.9148, "step": 26515 }, { "epoch": 1.0209817131857555, "grad_norm": 1.3698254823684692, "learning_rate": 9.678003865229067e-05, "loss": 0.9545, "step": 26520 }, { "epoch": 1.0211742059672761, "grad_norm": 1.5303738117218018, "learning_rate": 9.674981487461133e-05, "loss": 0.98, "step": 26525 }, { "epoch": 1.0213666987487968, "grad_norm": 0.8624149560928345, "learning_rate": 9.671959139413991e-05, "loss": 0.8921, "step": 26530 }, { "epoch": 1.0215591915303177, "grad_norm": 0.9601109027862549, "learning_rate": 9.66893682136401e-05, "loss": 0.8469, "step": 26535 }, { "epoch": 1.0217516843118384, "grad_norm": 1.3074990510940552, "learning_rate": 9.665914533587563e-05, "loss": 0.8174, "step": 26540 }, { "epoch": 1.021944177093359, "grad_norm": 1.4614131450653076, "learning_rate": 9.662892276361022e-05, "loss": 1.0461, "step": 26545 }, { "epoch": 1.0221366698748797, "grad_norm": 2.2266509532928467, "learning_rate": 9.65987004996075e-05, "loss": 0.9664, "step": 26550 }, { "epoch": 1.0223291626564004, "grad_norm": 1.8726714849472046, "learning_rate": 9.656847854663102e-05, "loss": 0.9988, "step": 26555 }, { "epoch": 1.022521655437921, "grad_norm": 1.4316071271896362, "learning_rate": 9.65382569074445e-05, "loss": 0.9426, "step": 26560 }, { "epoch": 1.0227141482194417, "grad_norm": 0.9609633088111877, "learning_rate": 9.650803558481146e-05, "loss": 0.8867, "step": 26565 }, { "epoch": 1.0229066410009624, "grad_norm": 2.933589458465576, "learning_rate": 9.647781458149539e-05, "loss": 0.9714, "step": 26570 }, { "epoch": 1.023099133782483, "grad_norm": 1.019455075263977, "learning_rate": 9.644759390025988e-05, "loss": 0.7742, "step": 26575 }, { "epoch": 1.0232916265640037, "grad_norm": 1.2443771362304688, "learning_rate": 9.641737354386839e-05, "loss": 0.7977, "step": 26580 }, { "epoch": 1.0234841193455246, "grad_norm": 1.4607973098754883, "learning_rate": 9.63871535150843e-05, "loss": 0.9563, "step": 26585 }, { "epoch": 1.0236766121270453, "grad_norm": 1.0155866146087646, "learning_rate": 9.635693381667112e-05, "loss": 0.8839, "step": 26590 }, { "epoch": 1.023869104908566, "grad_norm": 1.453011155128479, "learning_rate": 9.632671445139223e-05, "loss": 0.8905, "step": 26595 }, { "epoch": 1.0240615976900866, "grad_norm": 1.598872184753418, "learning_rate": 9.629649542201091e-05, "loss": 0.8839, "step": 26600 }, { "epoch": 1.0242540904716073, "grad_norm": 1.3655000925064087, "learning_rate": 9.626627673129062e-05, "loss": 0.8606, "step": 26605 }, { "epoch": 1.024446583253128, "grad_norm": 0.9747033715248108, "learning_rate": 9.623605838199457e-05, "loss": 0.9387, "step": 26610 }, { "epoch": 1.0246390760346487, "grad_norm": 1.2741761207580566, "learning_rate": 9.6205840376886e-05, "loss": 0.8915, "step": 26615 }, { "epoch": 1.0248315688161693, "grad_norm": 1.0303744077682495, "learning_rate": 9.617562271872825e-05, "loss": 0.9422, "step": 26620 }, { "epoch": 1.02502406159769, "grad_norm": 1.2387011051177979, "learning_rate": 9.614540541028445e-05, "loss": 0.9212, "step": 26625 }, { "epoch": 1.0252165543792109, "grad_norm": 1.31088387966156, "learning_rate": 9.611518845431778e-05, "loss": 0.8258, "step": 26630 }, { "epoch": 1.0254090471607316, "grad_norm": 1.3650875091552734, "learning_rate": 9.60849718535914e-05, "loss": 0.8836, "step": 26635 }, { "epoch": 1.0256015399422522, "grad_norm": 1.539516568183899, "learning_rate": 9.605475561086842e-05, "loss": 0.8616, "step": 26640 }, { "epoch": 1.025794032723773, "grad_norm": 1.0243744850158691, "learning_rate": 9.602453972891189e-05, "loss": 0.7763, "step": 26645 }, { "epoch": 1.0259865255052936, "grad_norm": 1.4782140254974365, "learning_rate": 9.599432421048488e-05, "loss": 0.8951, "step": 26650 }, { "epoch": 1.0261790182868142, "grad_norm": 1.0604002475738525, "learning_rate": 9.596410905835037e-05, "loss": 0.8381, "step": 26655 }, { "epoch": 1.026371511068335, "grad_norm": 1.6597970724105835, "learning_rate": 9.593389427527135e-05, "loss": 0.9625, "step": 26660 }, { "epoch": 1.0265640038498556, "grad_norm": 0.9541493654251099, "learning_rate": 9.590367986401078e-05, "loss": 0.8285, "step": 26665 }, { "epoch": 1.0267564966313762, "grad_norm": 1.096489667892456, "learning_rate": 9.587346582733155e-05, "loss": 0.8462, "step": 26670 }, { "epoch": 1.026948989412897, "grad_norm": 1.7347831726074219, "learning_rate": 9.58432521679965e-05, "loss": 0.9057, "step": 26675 }, { "epoch": 1.0271414821944178, "grad_norm": 1.02500319480896, "learning_rate": 9.581303888876855e-05, "loss": 0.8081, "step": 26680 }, { "epoch": 1.0273339749759385, "grad_norm": 1.090671181678772, "learning_rate": 9.578282599241044e-05, "loss": 0.829, "step": 26685 }, { "epoch": 1.0275264677574591, "grad_norm": 1.345054030418396, "learning_rate": 9.575261348168493e-05, "loss": 0.9518, "step": 26690 }, { "epoch": 1.0277189605389798, "grad_norm": 1.129591703414917, "learning_rate": 9.572240135935483e-05, "loss": 0.8178, "step": 26695 }, { "epoch": 1.0279114533205005, "grad_norm": 1.1437878608703613, "learning_rate": 9.569218962818276e-05, "loss": 0.8266, "step": 26700 }, { "epoch": 1.0281039461020212, "grad_norm": 1.2581342458724976, "learning_rate": 9.566197829093138e-05, "loss": 0.8247, "step": 26705 }, { "epoch": 1.0282964388835418, "grad_norm": 1.0760700702667236, "learning_rate": 9.56317673503634e-05, "loss": 1.003, "step": 26710 }, { "epoch": 1.0284889316650625, "grad_norm": 1.10942804813385, "learning_rate": 9.560155680924137e-05, "loss": 0.7992, "step": 26715 }, { "epoch": 1.0286814244465832, "grad_norm": 1.9202826023101807, "learning_rate": 9.557134667032776e-05, "loss": 0.989, "step": 26720 }, { "epoch": 1.0288739172281038, "grad_norm": 2.0237245559692383, "learning_rate": 9.554113693638522e-05, "loss": 0.9404, "step": 26725 }, { "epoch": 1.0290664100096247, "grad_norm": 2.128678798675537, "learning_rate": 9.551092761017618e-05, "loss": 1.0402, "step": 26730 }, { "epoch": 1.0292589027911454, "grad_norm": 0.9286008477210999, "learning_rate": 9.5480718694463e-05, "loss": 0.9927, "step": 26735 }, { "epoch": 1.029451395572666, "grad_norm": 1.2808765172958374, "learning_rate": 9.545051019200823e-05, "loss": 0.9026, "step": 26740 }, { "epoch": 1.0296438883541867, "grad_norm": 1.4116573333740234, "learning_rate": 9.542030210557415e-05, "loss": 0.8285, "step": 26745 }, { "epoch": 1.0298363811357074, "grad_norm": 2.100184440612793, "learning_rate": 9.539009443792309e-05, "loss": 0.925, "step": 26750 }, { "epoch": 1.030028873917228, "grad_norm": 1.3617011308670044, "learning_rate": 9.535988719181736e-05, "loss": 0.8356, "step": 26755 }, { "epoch": 1.0302213666987488, "grad_norm": 0.9685754776000977, "learning_rate": 9.532968037001923e-05, "loss": 0.8056, "step": 26760 }, { "epoch": 1.0304138594802694, "grad_norm": 1.2491365671157837, "learning_rate": 9.529947397529086e-05, "loss": 0.9138, "step": 26765 }, { "epoch": 1.03060635226179, "grad_norm": 0.8603072166442871, "learning_rate": 9.526926801039449e-05, "loss": 0.9497, "step": 26770 }, { "epoch": 1.030798845043311, "grad_norm": 0.8559325337409973, "learning_rate": 9.523906247809216e-05, "loss": 0.6938, "step": 26775 }, { "epoch": 1.0309913378248317, "grad_norm": 1.330196738243103, "learning_rate": 9.520885738114609e-05, "loss": 0.8141, "step": 26780 }, { "epoch": 1.0311838306063523, "grad_norm": 1.7451072931289673, "learning_rate": 9.517865272231827e-05, "loss": 0.8798, "step": 26785 }, { "epoch": 1.031376323387873, "grad_norm": 2.3189198970794678, "learning_rate": 9.514844850437066e-05, "loss": 0.9944, "step": 26790 }, { "epoch": 1.0315688161693937, "grad_norm": 1.4511535167694092, "learning_rate": 9.511824473006535e-05, "loss": 0.8759, "step": 26795 }, { "epoch": 1.0317613089509143, "grad_norm": 1.5764187574386597, "learning_rate": 9.508804140216423e-05, "loss": 0.8792, "step": 26800 }, { "epoch": 1.031953801732435, "grad_norm": 1.368013620376587, "learning_rate": 9.505783852342913e-05, "loss": 0.8946, "step": 26805 }, { "epoch": 1.0321462945139557, "grad_norm": 1.747062087059021, "learning_rate": 9.502763609662201e-05, "loss": 0.9188, "step": 26810 }, { "epoch": 1.0323387872954763, "grad_norm": 1.7886524200439453, "learning_rate": 9.499743412450463e-05, "loss": 0.8947, "step": 26815 }, { "epoch": 1.032531280076997, "grad_norm": 1.3724089860916138, "learning_rate": 9.496723260983871e-05, "loss": 0.909, "step": 26820 }, { "epoch": 1.032723772858518, "grad_norm": 1.7624560594558716, "learning_rate": 9.493703155538607e-05, "loss": 0.6966, "step": 26825 }, { "epoch": 1.0329162656400386, "grad_norm": 1.3632899522781372, "learning_rate": 9.490683096390836e-05, "loss": 0.7867, "step": 26830 }, { "epoch": 1.0331087584215592, "grad_norm": 0.9639708399772644, "learning_rate": 9.487663083816718e-05, "loss": 1.0168, "step": 26835 }, { "epoch": 1.03330125120308, "grad_norm": 1.6078749895095825, "learning_rate": 9.484643118092422e-05, "loss": 0.8775, "step": 26840 }, { "epoch": 1.0334937439846006, "grad_norm": 0.9812796711921692, "learning_rate": 9.481623199494098e-05, "loss": 0.8573, "step": 26845 }, { "epoch": 1.0336862367661213, "grad_norm": 1.926906943321228, "learning_rate": 9.478603328297896e-05, "loss": 0.8621, "step": 26850 }, { "epoch": 1.033878729547642, "grad_norm": 1.4582041501998901, "learning_rate": 9.475583504779966e-05, "loss": 0.8978, "step": 26855 }, { "epoch": 1.0340712223291626, "grad_norm": 1.22900390625, "learning_rate": 9.472563729216453e-05, "loss": 0.8079, "step": 26860 }, { "epoch": 1.0342637151106833, "grad_norm": 1.8301913738250732, "learning_rate": 9.46954400188349e-05, "loss": 0.8844, "step": 26865 }, { "epoch": 1.034456207892204, "grad_norm": 0.9074123501777649, "learning_rate": 9.466524323057217e-05, "loss": 0.8437, "step": 26870 }, { "epoch": 1.0346487006737248, "grad_norm": 1.1448841094970703, "learning_rate": 9.463504693013761e-05, "loss": 0.9197, "step": 26875 }, { "epoch": 1.0348411934552455, "grad_norm": 1.2825102806091309, "learning_rate": 9.460485112029241e-05, "loss": 0.8754, "step": 26880 }, { "epoch": 1.0350336862367662, "grad_norm": 1.4492766857147217, "learning_rate": 9.45746558037979e-05, "loss": 1.068, "step": 26885 }, { "epoch": 1.0352261790182868, "grad_norm": 1.4964579343795776, "learning_rate": 9.454446098341516e-05, "loss": 0.7369, "step": 26890 }, { "epoch": 1.0354186717998075, "grad_norm": 1.2019277811050415, "learning_rate": 9.45142666619053e-05, "loss": 0.8437, "step": 26895 }, { "epoch": 1.0356111645813282, "grad_norm": 1.22611665725708, "learning_rate": 9.448407284202943e-05, "loss": 0.7403, "step": 26900 }, { "epoch": 1.0358036573628489, "grad_norm": 1.3472671508789062, "learning_rate": 9.445387952654857e-05, "loss": 0.8897, "step": 26905 }, { "epoch": 1.0359961501443695, "grad_norm": 0.9485424160957336, "learning_rate": 9.442368671822364e-05, "loss": 1.0716, "step": 26910 }, { "epoch": 1.0361886429258902, "grad_norm": 1.5943937301635742, "learning_rate": 9.439349441981569e-05, "loss": 0.797, "step": 26915 }, { "epoch": 1.036381135707411, "grad_norm": 1.312227487564087, "learning_rate": 9.436330263408551e-05, "loss": 0.7671, "step": 26920 }, { "epoch": 1.0365736284889318, "grad_norm": 1.4809763431549072, "learning_rate": 9.433311136379393e-05, "loss": 0.958, "step": 26925 }, { "epoch": 1.0367661212704524, "grad_norm": 1.0630615949630737, "learning_rate": 9.430292061170181e-05, "loss": 0.8594, "step": 26930 }, { "epoch": 1.036958614051973, "grad_norm": 2.018148422241211, "learning_rate": 9.427273038056988e-05, "loss": 0.8566, "step": 26935 }, { "epoch": 1.0371511068334938, "grad_norm": 1.4871729612350464, "learning_rate": 9.424254067315875e-05, "loss": 1.0174, "step": 26940 }, { "epoch": 1.0373435996150144, "grad_norm": 1.2022002935409546, "learning_rate": 9.421235149222919e-05, "loss": 0.7991, "step": 26945 }, { "epoch": 1.037536092396535, "grad_norm": 1.7707716226577759, "learning_rate": 9.418216284054174e-05, "loss": 0.881, "step": 26950 }, { "epoch": 1.0377285851780558, "grad_norm": 1.1901010274887085, "learning_rate": 9.415197472085693e-05, "loss": 0.7872, "step": 26955 }, { "epoch": 1.0379210779595764, "grad_norm": 1.460284948348999, "learning_rate": 9.412178713593532e-05, "loss": 0.9017, "step": 26960 }, { "epoch": 1.0381135707410971, "grad_norm": 1.8410388231277466, "learning_rate": 9.409160008853735e-05, "loss": 0.8422, "step": 26965 }, { "epoch": 1.038306063522618, "grad_norm": 1.3502047061920166, "learning_rate": 9.406141358142339e-05, "loss": 0.6949, "step": 26970 }, { "epoch": 1.0384985563041387, "grad_norm": 1.433706521987915, "learning_rate": 9.403122761735382e-05, "loss": 0.8941, "step": 26975 }, { "epoch": 1.0386910490856593, "grad_norm": 1.3440196514129639, "learning_rate": 9.400104219908895e-05, "loss": 1.1485, "step": 26980 }, { "epoch": 1.03888354186718, "grad_norm": 1.2331663370132446, "learning_rate": 9.397085732938902e-05, "loss": 0.7441, "step": 26985 }, { "epoch": 1.0390760346487007, "grad_norm": 0.953507125377655, "learning_rate": 9.394067301101425e-05, "loss": 0.9023, "step": 26990 }, { "epoch": 1.0392685274302214, "grad_norm": 1.993162751197815, "learning_rate": 9.391048924672483e-05, "loss": 0.9112, "step": 26995 }, { "epoch": 1.039461020211742, "grad_norm": 1.193826675415039, "learning_rate": 9.388030603928077e-05, "loss": 0.9214, "step": 27000 }, { "epoch": 1.0396535129932627, "grad_norm": 1.5678527355194092, "learning_rate": 9.385012339144221e-05, "loss": 0.8798, "step": 27005 }, { "epoch": 1.0398460057747834, "grad_norm": 1.09235680103302, "learning_rate": 9.381994130596916e-05, "loss": 0.9111, "step": 27010 }, { "epoch": 1.040038498556304, "grad_norm": 1.3763002157211304, "learning_rate": 9.378975978562147e-05, "loss": 0.8024, "step": 27015 }, { "epoch": 1.040230991337825, "grad_norm": 2.1225831508636475, "learning_rate": 9.375957883315916e-05, "loss": 0.8644, "step": 27020 }, { "epoch": 1.0404234841193456, "grad_norm": 1.7183401584625244, "learning_rate": 9.372939845134197e-05, "loss": 0.6908, "step": 27025 }, { "epoch": 1.0406159769008663, "grad_norm": 1.1370265483856201, "learning_rate": 9.369921864292981e-05, "loss": 0.7209, "step": 27030 }, { "epoch": 1.040808469682387, "grad_norm": 1.4425493478775024, "learning_rate": 9.366903941068237e-05, "loss": 0.9938, "step": 27035 }, { "epoch": 1.0410009624639076, "grad_norm": 1.009415864944458, "learning_rate": 9.363886075735928e-05, "loss": 0.822, "step": 27040 }, { "epoch": 1.0411934552454283, "grad_norm": 1.7961349487304688, "learning_rate": 9.36086826857203e-05, "loss": 0.8856, "step": 27045 }, { "epoch": 1.041385948026949, "grad_norm": 1.6287988424301147, "learning_rate": 9.357850519852492e-05, "loss": 0.6491, "step": 27050 }, { "epoch": 1.0415784408084696, "grad_norm": 0.8782758116722107, "learning_rate": 9.354832829853268e-05, "loss": 0.7966, "step": 27055 }, { "epoch": 1.0417709335899903, "grad_norm": 2.51961612701416, "learning_rate": 9.351815198850312e-05, "loss": 0.7717, "step": 27060 }, { "epoch": 1.041963426371511, "grad_norm": 1.5812909603118896, "learning_rate": 9.348797627119561e-05, "loss": 0.8912, "step": 27065 }, { "epoch": 1.0421559191530319, "grad_norm": 2.106905460357666, "learning_rate": 9.345780114936951e-05, "loss": 0.8447, "step": 27070 }, { "epoch": 1.0423484119345525, "grad_norm": 1.7063692808151245, "learning_rate": 9.342762662578419e-05, "loss": 0.9389, "step": 27075 }, { "epoch": 1.0425409047160732, "grad_norm": 1.5984817743301392, "learning_rate": 9.339745270319886e-05, "loss": 0.9812, "step": 27080 }, { "epoch": 1.0427333974975939, "grad_norm": 1.0404373407363892, "learning_rate": 9.336727938437274e-05, "loss": 0.8982, "step": 27085 }, { "epoch": 1.0429258902791145, "grad_norm": 1.7776751518249512, "learning_rate": 9.333710667206498e-05, "loss": 0.7817, "step": 27090 }, { "epoch": 1.0431183830606352, "grad_norm": 1.092366099357605, "learning_rate": 9.330693456903471e-05, "loss": 0.8735, "step": 27095 }, { "epoch": 1.0433108758421559, "grad_norm": 1.3183677196502686, "learning_rate": 9.327676307804088e-05, "loss": 0.855, "step": 27100 }, { "epoch": 1.0435033686236765, "grad_norm": 1.6006138324737549, "learning_rate": 9.324659220184256e-05, "loss": 0.9884, "step": 27105 }, { "epoch": 1.0436958614051972, "grad_norm": 2.0356695652008057, "learning_rate": 9.321642194319867e-05, "loss": 0.9388, "step": 27110 }, { "epoch": 1.043888354186718, "grad_norm": 1.6827491521835327, "learning_rate": 9.3186252304868e-05, "loss": 0.8646, "step": 27115 }, { "epoch": 1.0440808469682388, "grad_norm": 1.6442832946777344, "learning_rate": 9.315608328960946e-05, "loss": 0.9892, "step": 27120 }, { "epoch": 1.0442733397497594, "grad_norm": 2.0704457759857178, "learning_rate": 9.312591490018176e-05, "loss": 0.7849, "step": 27125 }, { "epoch": 1.0444658325312801, "grad_norm": 1.5209687948226929, "learning_rate": 9.309574713934359e-05, "loss": 0.7079, "step": 27130 }, { "epoch": 1.0446583253128008, "grad_norm": 1.6896909475326538, "learning_rate": 9.306558000985363e-05, "loss": 0.8909, "step": 27135 }, { "epoch": 1.0448508180943215, "grad_norm": 1.2594019174575806, "learning_rate": 9.303541351447043e-05, "loss": 0.9134, "step": 27140 }, { "epoch": 1.0450433108758421, "grad_norm": 1.4927109479904175, "learning_rate": 9.300524765595252e-05, "loss": 0.9467, "step": 27145 }, { "epoch": 1.0452358036573628, "grad_norm": 2.307347059249878, "learning_rate": 9.297508243705838e-05, "loss": 0.9415, "step": 27150 }, { "epoch": 1.0454282964388835, "grad_norm": 1.2363544702529907, "learning_rate": 9.294491786054645e-05, "loss": 0.8342, "step": 27155 }, { "epoch": 1.0456207892204041, "grad_norm": 1.7037842273712158, "learning_rate": 9.291475392917498e-05, "loss": 0.8995, "step": 27160 }, { "epoch": 1.045813282001925, "grad_norm": 1.9591143131256104, "learning_rate": 9.288459064570238e-05, "loss": 0.9448, "step": 27165 }, { "epoch": 1.0460057747834457, "grad_norm": 1.5714846849441528, "learning_rate": 9.285442801288681e-05, "loss": 0.7211, "step": 27170 }, { "epoch": 1.0461982675649664, "grad_norm": 1.2620651721954346, "learning_rate": 9.282426603348648e-05, "loss": 0.8678, "step": 27175 }, { "epoch": 1.046390760346487, "grad_norm": 1.3091076612472534, "learning_rate": 9.279410471025946e-05, "loss": 0.947, "step": 27180 }, { "epoch": 1.0465832531280077, "grad_norm": 1.055066704750061, "learning_rate": 9.276394404596384e-05, "loss": 0.8502, "step": 27185 }, { "epoch": 1.0467757459095284, "grad_norm": 1.4299228191375732, "learning_rate": 9.27337840433576e-05, "loss": 0.8472, "step": 27190 }, { "epoch": 1.046968238691049, "grad_norm": 1.315097451210022, "learning_rate": 9.27036247051987e-05, "loss": 0.9291, "step": 27195 }, { "epoch": 1.0471607314725697, "grad_norm": 1.491268515586853, "learning_rate": 9.267346603424497e-05, "loss": 0.8054, "step": 27200 }, { "epoch": 1.0473532242540904, "grad_norm": 1.2065061330795288, "learning_rate": 9.264330803325422e-05, "loss": 0.7778, "step": 27205 }, { "epoch": 1.0475457170356113, "grad_norm": 1.1134014129638672, "learning_rate": 9.261315070498425e-05, "loss": 0.9711, "step": 27210 }, { "epoch": 1.047738209817132, "grad_norm": 1.3288688659667969, "learning_rate": 9.258299405219271e-05, "loss": 0.833, "step": 27215 }, { "epoch": 1.0479307025986526, "grad_norm": 1.440891146659851, "learning_rate": 9.255283807763719e-05, "loss": 0.9198, "step": 27220 }, { "epoch": 1.0481231953801733, "grad_norm": 1.3492053747177124, "learning_rate": 9.252268278407532e-05, "loss": 0.7503, "step": 27225 }, { "epoch": 1.048315688161694, "grad_norm": 0.9512596726417542, "learning_rate": 9.249252817426459e-05, "loss": 0.8406, "step": 27230 }, { "epoch": 1.0485081809432146, "grad_norm": 1.8228788375854492, "learning_rate": 9.24623742509624e-05, "loss": 0.8661, "step": 27235 }, { "epoch": 1.0487006737247353, "grad_norm": 1.9597275257110596, "learning_rate": 9.243222101692617e-05, "loss": 0.8972, "step": 27240 }, { "epoch": 1.048893166506256, "grad_norm": 0.8910306692123413, "learning_rate": 9.24020684749132e-05, "loss": 0.7583, "step": 27245 }, { "epoch": 1.0490856592877766, "grad_norm": 1.1714658737182617, "learning_rate": 9.237191662768071e-05, "loss": 0.7962, "step": 27250 }, { "epoch": 1.0492781520692973, "grad_norm": 1.0180914402008057, "learning_rate": 9.234176547798595e-05, "loss": 0.7615, "step": 27255 }, { "epoch": 1.0494706448508182, "grad_norm": 1.5986478328704834, "learning_rate": 9.231161502858599e-05, "loss": 0.6797, "step": 27260 }, { "epoch": 1.0496631376323389, "grad_norm": 1.1098225116729736, "learning_rate": 9.228146528223787e-05, "loss": 0.867, "step": 27265 }, { "epoch": 1.0498556304138595, "grad_norm": 1.7311850786209106, "learning_rate": 9.225131624169868e-05, "loss": 0.8742, "step": 27270 }, { "epoch": 1.0500481231953802, "grad_norm": 1.0296027660369873, "learning_rate": 9.222116790972526e-05, "loss": 0.9372, "step": 27275 }, { "epoch": 1.0502406159769009, "grad_norm": 1.6226547956466675, "learning_rate": 9.219102028907448e-05, "loss": 0.7823, "step": 27280 }, { "epoch": 1.0504331087584216, "grad_norm": 1.745197057723999, "learning_rate": 9.216087338250321e-05, "loss": 0.7434, "step": 27285 }, { "epoch": 1.0506256015399422, "grad_norm": 1.3191584348678589, "learning_rate": 9.21307271927681e-05, "loss": 1.0177, "step": 27290 }, { "epoch": 1.050818094321463, "grad_norm": 1.4447453022003174, "learning_rate": 9.21005817226259e-05, "loss": 0.7972, "step": 27295 }, { "epoch": 1.0510105871029836, "grad_norm": 1.4396947622299194, "learning_rate": 9.207043697483315e-05, "loss": 0.901, "step": 27300 }, { "epoch": 1.0512030798845042, "grad_norm": 2.1123616695404053, "learning_rate": 9.204029295214641e-05, "loss": 1.0325, "step": 27305 }, { "epoch": 1.0513955726660251, "grad_norm": 1.1420029401779175, "learning_rate": 9.201014965732216e-05, "loss": 0.8349, "step": 27310 }, { "epoch": 1.0515880654475458, "grad_norm": 1.5591840744018555, "learning_rate": 9.198000709311681e-05, "loss": 0.8653, "step": 27315 }, { "epoch": 1.0517805582290665, "grad_norm": 1.5964603424072266, "learning_rate": 9.194986526228662e-05, "loss": 0.9203, "step": 27320 }, { "epoch": 1.0519730510105871, "grad_norm": 1.5072814226150513, "learning_rate": 9.191972416758798e-05, "loss": 0.8917, "step": 27325 }, { "epoch": 1.0521655437921078, "grad_norm": 1.7254325151443481, "learning_rate": 9.188958381177705e-05, "loss": 0.7569, "step": 27330 }, { "epoch": 1.0523580365736285, "grad_norm": 1.856126070022583, "learning_rate": 9.18594441976099e-05, "loss": 0.8379, "step": 27335 }, { "epoch": 1.0525505293551491, "grad_norm": 1.1711968183517456, "learning_rate": 9.18293053278427e-05, "loss": 0.9595, "step": 27340 }, { "epoch": 1.0527430221366698, "grad_norm": 0.8440997004508972, "learning_rate": 9.179916720523138e-05, "loss": 0.8221, "step": 27345 }, { "epoch": 1.0529355149181905, "grad_norm": 1.532020092010498, "learning_rate": 9.176902983253187e-05, "loss": 0.8155, "step": 27350 }, { "epoch": 1.0531280076997112, "grad_norm": 1.3105454444885254, "learning_rate": 9.173889321250009e-05, "loss": 1.0119, "step": 27355 }, { "epoch": 1.053320500481232, "grad_norm": 1.6066848039627075, "learning_rate": 9.170875734789182e-05, "loss": 0.8037, "step": 27360 }, { "epoch": 1.0535129932627527, "grad_norm": 0.9645727872848511, "learning_rate": 9.167862224146271e-05, "loss": 0.8052, "step": 27365 }, { "epoch": 1.0537054860442734, "grad_norm": 1.2492599487304688, "learning_rate": 9.164848789596851e-05, "loss": 0.8299, "step": 27370 }, { "epoch": 1.053897978825794, "grad_norm": 1.861069679260254, "learning_rate": 9.161835431416478e-05, "loss": 0.8103, "step": 27375 }, { "epoch": 1.0540904716073147, "grad_norm": 1.3065990209579468, "learning_rate": 9.158822149880699e-05, "loss": 0.7432, "step": 27380 }, { "epoch": 1.0542829643888354, "grad_norm": 1.0527342557907104, "learning_rate": 9.155808945265063e-05, "loss": 0.9543, "step": 27385 }, { "epoch": 1.054475457170356, "grad_norm": 1.0329846143722534, "learning_rate": 9.152795817845109e-05, "loss": 0.7798, "step": 27390 }, { "epoch": 1.0546679499518767, "grad_norm": 1.2137219905853271, "learning_rate": 9.149782767896364e-05, "loss": 0.8731, "step": 27395 }, { "epoch": 1.0548604427333974, "grad_norm": 1.7884348630905151, "learning_rate": 9.146769795694351e-05, "loss": 0.7893, "step": 27400 }, { "epoch": 1.0550529355149183, "grad_norm": 1.842133641242981, "learning_rate": 9.143756901514591e-05, "loss": 1.0068, "step": 27405 }, { "epoch": 1.055245428296439, "grad_norm": 1.8429181575775146, "learning_rate": 9.140744085632587e-05, "loss": 0.8311, "step": 27410 }, { "epoch": 1.0554379210779596, "grad_norm": 1.6385420560836792, "learning_rate": 9.137731348323848e-05, "loss": 0.9761, "step": 27415 }, { "epoch": 1.0556304138594803, "grad_norm": 1.289536714553833, "learning_rate": 9.134718689863863e-05, "loss": 0.9112, "step": 27420 }, { "epoch": 1.055822906641001, "grad_norm": 1.3178956508636475, "learning_rate": 9.13170611052812e-05, "loss": 0.9767, "step": 27425 }, { "epoch": 1.0560153994225217, "grad_norm": 1.588117003440857, "learning_rate": 9.128693610592104e-05, "loss": 0.9646, "step": 27430 }, { "epoch": 1.0562078922040423, "grad_norm": 2.409613609313965, "learning_rate": 9.125681190331287e-05, "loss": 0.9672, "step": 27435 }, { "epoch": 1.056400384985563, "grad_norm": 2.1366968154907227, "learning_rate": 9.123271311673885e-05, "loss": 1.0157, "step": 27440 }, { "epoch": 1.0565928777670837, "grad_norm": 1.3184314966201782, "learning_rate": 9.12025903552259e-05, "loss": 0.7433, "step": 27445 }, { "epoch": 1.0567853705486043, "grad_norm": 1.710648775100708, "learning_rate": 9.117246839817776e-05, "loss": 1.145, "step": 27450 }, { "epoch": 1.0569778633301252, "grad_norm": 1.0037586688995361, "learning_rate": 9.114234724834895e-05, "loss": 0.8956, "step": 27455 }, { "epoch": 1.057170356111646, "grad_norm": 1.9379615783691406, "learning_rate": 9.11122269084938e-05, "loss": 0.8227, "step": 27460 }, { "epoch": 1.0573628488931666, "grad_norm": 1.1451090574264526, "learning_rate": 9.108210738136661e-05, "loss": 1.0623, "step": 27465 }, { "epoch": 1.0575553416746872, "grad_norm": 1.3829764127731323, "learning_rate": 9.105198866972162e-05, "loss": 0.6903, "step": 27470 }, { "epoch": 1.057747834456208, "grad_norm": 0.7273270487785339, "learning_rate": 9.102187077631302e-05, "loss": 0.7338, "step": 27475 }, { "epoch": 1.0579403272377286, "grad_norm": 1.3922228813171387, "learning_rate": 9.099175370389482e-05, "loss": 0.9767, "step": 27480 }, { "epoch": 1.0581328200192492, "grad_norm": 1.5572834014892578, "learning_rate": 9.096163745522112e-05, "loss": 0.7942, "step": 27485 }, { "epoch": 1.05832531280077, "grad_norm": 1.2158164978027344, "learning_rate": 9.093152203304578e-05, "loss": 0.8304, "step": 27490 }, { "epoch": 1.0585178055822906, "grad_norm": 1.2715733051300049, "learning_rate": 9.090140744012265e-05, "loss": 0.7802, "step": 27495 }, { "epoch": 1.0587102983638113, "grad_norm": 1.2508288621902466, "learning_rate": 9.087129367920557e-05, "loss": 0.8579, "step": 27500 }, { "epoch": 1.0589027911453321, "grad_norm": 1.9734135866165161, "learning_rate": 9.084118075304821e-05, "loss": 0.8703, "step": 27505 }, { "epoch": 1.0590952839268528, "grad_norm": 1.4569380283355713, "learning_rate": 9.081106866440415e-05, "loss": 0.9245, "step": 27510 }, { "epoch": 1.0592877767083735, "grad_norm": 1.2966618537902832, "learning_rate": 9.078095741602704e-05, "loss": 0.8588, "step": 27515 }, { "epoch": 1.0594802694898942, "grad_norm": 1.5596932172775269, "learning_rate": 9.07508470106703e-05, "loss": 0.8499, "step": 27520 }, { "epoch": 1.0596727622714148, "grad_norm": 1.3186569213867188, "learning_rate": 9.07207374510873e-05, "loss": 0.825, "step": 27525 }, { "epoch": 1.0598652550529355, "grad_norm": 1.393056869506836, "learning_rate": 9.069062874003141e-05, "loss": 0.7853, "step": 27530 }, { "epoch": 1.0600577478344562, "grad_norm": 1.091132402420044, "learning_rate": 9.066052088025587e-05, "loss": 0.9176, "step": 27535 }, { "epoch": 1.0602502406159768, "grad_norm": 2.1310064792633057, "learning_rate": 9.063041387451378e-05, "loss": 0.6352, "step": 27540 }, { "epoch": 1.0604427333974975, "grad_norm": 1.153936505317688, "learning_rate": 9.060030772555833e-05, "loss": 0.9243, "step": 27545 }, { "epoch": 1.0606352261790182, "grad_norm": 1.3241852521896362, "learning_rate": 9.05702024361424e-05, "loss": 0.7682, "step": 27550 }, { "epoch": 1.060827718960539, "grad_norm": 1.4337151050567627, "learning_rate": 9.054009800901906e-05, "loss": 0.8711, "step": 27555 }, { "epoch": 1.0610202117420597, "grad_norm": 1.3560181856155396, "learning_rate": 9.050999444694108e-05, "loss": 0.8178, "step": 27560 }, { "epoch": 1.0612127045235804, "grad_norm": 1.4069291353225708, "learning_rate": 9.047989175266123e-05, "loss": 0.7403, "step": 27565 }, { "epoch": 1.061405197305101, "grad_norm": 2.0509390830993652, "learning_rate": 9.044978992893219e-05, "loss": 1.0252, "step": 27570 }, { "epoch": 1.0615976900866217, "grad_norm": 0.6099883913993835, "learning_rate": 9.041968897850664e-05, "loss": 0.8478, "step": 27575 }, { "epoch": 1.0617901828681424, "grad_norm": 1.1845272779464722, "learning_rate": 9.038958890413705e-05, "loss": 0.9869, "step": 27580 }, { "epoch": 1.061982675649663, "grad_norm": 1.2324453592300415, "learning_rate": 9.035948970857589e-05, "loss": 0.8353, "step": 27585 }, { "epoch": 1.0621751684311838, "grad_norm": 2.6329379081726074, "learning_rate": 9.032939139457556e-05, "loss": 0.8846, "step": 27590 }, { "epoch": 1.0623676612127044, "grad_norm": 1.4631036520004272, "learning_rate": 9.029929396488826e-05, "loss": 0.8486, "step": 27595 }, { "epoch": 1.0625601539942253, "grad_norm": 2.0869596004486084, "learning_rate": 9.026919742226633e-05, "loss": 0.9341, "step": 27600 }, { "epoch": 1.062752646775746, "grad_norm": 1.2778350114822388, "learning_rate": 9.023910176946182e-05, "loss": 0.7416, "step": 27605 }, { "epoch": 1.0629451395572667, "grad_norm": 1.2002792358398438, "learning_rate": 9.020900700922675e-05, "loss": 0.8239, "step": 27610 }, { "epoch": 1.0631376323387873, "grad_norm": 1.811063528060913, "learning_rate": 9.017891314431318e-05, "loss": 0.961, "step": 27615 }, { "epoch": 1.063330125120308, "grad_norm": 1.2313002347946167, "learning_rate": 9.014882017747294e-05, "loss": 0.987, "step": 27620 }, { "epoch": 1.0635226179018287, "grad_norm": 1.319166898727417, "learning_rate": 9.01187281114578e-05, "loss": 0.8669, "step": 27625 }, { "epoch": 1.0637151106833493, "grad_norm": 1.418129801750183, "learning_rate": 9.008863694901955e-05, "loss": 0.8651, "step": 27630 }, { "epoch": 1.06390760346487, "grad_norm": 1.349688172340393, "learning_rate": 9.00585466929098e-05, "loss": 0.7572, "step": 27635 }, { "epoch": 1.0641000962463907, "grad_norm": 1.4476646184921265, "learning_rate": 9.002845734588005e-05, "loss": 0.8944, "step": 27640 }, { "epoch": 1.0642925890279114, "grad_norm": 2.580551862716675, "learning_rate": 8.999836891068187e-05, "loss": 0.6778, "step": 27645 }, { "epoch": 1.0644850818094322, "grad_norm": 1.5499801635742188, "learning_rate": 8.99682813900666e-05, "loss": 0.7813, "step": 27650 }, { "epoch": 1.064677574590953, "grad_norm": 3.0829336643218994, "learning_rate": 8.993819478678549e-05, "loss": 0.9905, "step": 27655 }, { "epoch": 1.0648700673724736, "grad_norm": 1.489709734916687, "learning_rate": 8.990810910358987e-05, "loss": 0.9231, "step": 27660 }, { "epoch": 1.0650625601539943, "grad_norm": 1.199367642402649, "learning_rate": 8.987802434323081e-05, "loss": 0.971, "step": 27665 }, { "epoch": 1.065255052935515, "grad_norm": 1.2478957176208496, "learning_rate": 8.984794050845933e-05, "loss": 0.9586, "step": 27670 }, { "epoch": 1.0654475457170356, "grad_norm": 1.5783430337905884, "learning_rate": 8.981785760202647e-05, "loss": 0.7706, "step": 27675 }, { "epoch": 1.0656400384985563, "grad_norm": 1.6337023973464966, "learning_rate": 8.97877756266831e-05, "loss": 0.7538, "step": 27680 }, { "epoch": 1.065832531280077, "grad_norm": 1.195513367652893, "learning_rate": 8.975769458517997e-05, "loss": 0.9013, "step": 27685 }, { "epoch": 1.0660250240615976, "grad_norm": 1.2982577085494995, "learning_rate": 8.972761448026785e-05, "loss": 0.9172, "step": 27690 }, { "epoch": 1.0662175168431185, "grad_norm": 1.3517059087753296, "learning_rate": 8.96975353146973e-05, "loss": 0.8085, "step": 27695 }, { "epoch": 1.0664100096246392, "grad_norm": 0.9628300666809082, "learning_rate": 8.966745709121893e-05, "loss": 0.9518, "step": 27700 }, { "epoch": 1.0666025024061598, "grad_norm": 1.872689127922058, "learning_rate": 8.963737981258315e-05, "loss": 0.8118, "step": 27705 }, { "epoch": 1.0667949951876805, "grad_norm": 1.307530403137207, "learning_rate": 8.960730348154037e-05, "loss": 0.7289, "step": 27710 }, { "epoch": 1.0669874879692012, "grad_norm": 1.127726435661316, "learning_rate": 8.957722810084077e-05, "loss": 0.8415, "step": 27715 }, { "epoch": 1.0671799807507218, "grad_norm": 1.2474530935287476, "learning_rate": 8.954715367323468e-05, "loss": 0.9341, "step": 27720 }, { "epoch": 1.0673724735322425, "grad_norm": 1.6957672834396362, "learning_rate": 8.951708020147212e-05, "loss": 0.9228, "step": 27725 }, { "epoch": 1.0675649663137632, "grad_norm": 2.220019817352295, "learning_rate": 8.94870076883031e-05, "loss": 0.9219, "step": 27730 }, { "epoch": 1.0677574590952839, "grad_norm": 0.9045952558517456, "learning_rate": 8.945693613647763e-05, "loss": 0.6171, "step": 27735 }, { "epoch": 1.0679499518768045, "grad_norm": 1.5750988721847534, "learning_rate": 8.94268655487455e-05, "loss": 0.9442, "step": 27740 }, { "epoch": 1.0681424446583252, "grad_norm": 1.328931450843811, "learning_rate": 8.939679592785646e-05, "loss": 0.8479, "step": 27745 }, { "epoch": 1.068334937439846, "grad_norm": 1.809605598449707, "learning_rate": 8.936672727656021e-05, "loss": 0.8478, "step": 27750 }, { "epoch": 1.0685274302213668, "grad_norm": 1.630718469619751, "learning_rate": 8.933665959760632e-05, "loss": 0.7898, "step": 27755 }, { "epoch": 1.0687199230028874, "grad_norm": 1.7636911869049072, "learning_rate": 8.930659289374422e-05, "loss": 0.8724, "step": 27760 }, { "epoch": 1.068912415784408, "grad_norm": 1.659645915031433, "learning_rate": 8.927652716772342e-05, "loss": 1.0174, "step": 27765 }, { "epoch": 1.0691049085659288, "grad_norm": 2.0495598316192627, "learning_rate": 8.924646242229317e-05, "loss": 0.8524, "step": 27770 }, { "epoch": 1.0692974013474494, "grad_norm": 0.9576983451843262, "learning_rate": 8.921639866020264e-05, "loss": 0.8567, "step": 27775 }, { "epoch": 1.06948989412897, "grad_norm": 1.3908922672271729, "learning_rate": 8.918633588420106e-05, "loss": 0.9384, "step": 27780 }, { "epoch": 1.0696823869104908, "grad_norm": 1.0620615482330322, "learning_rate": 8.915627409703745e-05, "loss": 0.9836, "step": 27785 }, { "epoch": 1.0698748796920114, "grad_norm": 1.0709110498428345, "learning_rate": 8.912621330146071e-05, "loss": 0.9535, "step": 27790 }, { "epoch": 1.0700673724735323, "grad_norm": 1.297035813331604, "learning_rate": 8.909615350021973e-05, "loss": 0.8007, "step": 27795 }, { "epoch": 1.070259865255053, "grad_norm": 0.9443962574005127, "learning_rate": 8.906609469606329e-05, "loss": 0.9376, "step": 27800 }, { "epoch": 1.0704523580365737, "grad_norm": 2.0467529296875, "learning_rate": 8.903603689174006e-05, "loss": 0.9165, "step": 27805 }, { "epoch": 1.0706448508180944, "grad_norm": 1.3403770923614502, "learning_rate": 8.900598008999864e-05, "loss": 0.8009, "step": 27810 }, { "epoch": 1.070837343599615, "grad_norm": 1.8648827075958252, "learning_rate": 8.897592429358746e-05, "loss": 0.8834, "step": 27815 }, { "epoch": 1.0710298363811357, "grad_norm": 1.1583093404769897, "learning_rate": 8.894586950525502e-05, "loss": 0.8895, "step": 27820 }, { "epoch": 1.0712223291626564, "grad_norm": 1.1083035469055176, "learning_rate": 8.89158157277496e-05, "loss": 0.9152, "step": 27825 }, { "epoch": 1.071414821944177, "grad_norm": 0.8488582372665405, "learning_rate": 8.888576296381936e-05, "loss": 0.6838, "step": 27830 }, { "epoch": 1.0716073147256977, "grad_norm": 2.6488595008850098, "learning_rate": 8.885571121621251e-05, "loss": 0.9074, "step": 27835 }, { "epoch": 1.0717998075072184, "grad_norm": 1.0296202898025513, "learning_rate": 8.882566048767705e-05, "loss": 0.8427, "step": 27840 }, { "epoch": 1.0719923002887393, "grad_norm": 1.2265279293060303, "learning_rate": 8.879561078096088e-05, "loss": 0.9545, "step": 27845 }, { "epoch": 1.07218479307026, "grad_norm": 1.3146073818206787, "learning_rate": 8.87655620988119e-05, "loss": 0.8112, "step": 27850 }, { "epoch": 1.0723772858517806, "grad_norm": 1.1017197370529175, "learning_rate": 8.873551444397785e-05, "loss": 0.739, "step": 27855 }, { "epoch": 1.0725697786333013, "grad_norm": 1.3772103786468506, "learning_rate": 8.870546781920636e-05, "loss": 0.86, "step": 27860 }, { "epoch": 1.072762271414822, "grad_norm": 1.4853140115737915, "learning_rate": 8.867542222724504e-05, "loss": 0.9195, "step": 27865 }, { "epoch": 1.0729547641963426, "grad_norm": 1.3116743564605713, "learning_rate": 8.864537767084135e-05, "loss": 0.8492, "step": 27870 }, { "epoch": 1.0731472569778633, "grad_norm": 1.78131103515625, "learning_rate": 8.861533415274262e-05, "loss": 0.8794, "step": 27875 }, { "epoch": 1.073339749759384, "grad_norm": 1.1533085107803345, "learning_rate": 8.858529167569619e-05, "loss": 0.952, "step": 27880 }, { "epoch": 1.0735322425409046, "grad_norm": 2.657715320587158, "learning_rate": 8.855525024244923e-05, "loss": 0.885, "step": 27885 }, { "epoch": 1.0737247353224255, "grad_norm": 1.6451362371444702, "learning_rate": 8.85252098557488e-05, "loss": 0.823, "step": 27890 }, { "epoch": 1.0739172281039462, "grad_norm": 1.7528831958770752, "learning_rate": 8.84951705183419e-05, "loss": 0.7666, "step": 27895 }, { "epoch": 1.0741097208854669, "grad_norm": 1.086206078529358, "learning_rate": 8.846513223297549e-05, "loss": 0.8837, "step": 27900 }, { "epoch": 1.0743022136669875, "grad_norm": 1.413942575454712, "learning_rate": 8.843509500239628e-05, "loss": 0.737, "step": 27905 }, { "epoch": 1.0744947064485082, "grad_norm": 1.2918885946273804, "learning_rate": 8.840505882935106e-05, "loss": 0.7301, "step": 27910 }, { "epoch": 1.0746871992300289, "grad_norm": 1.7935266494750977, "learning_rate": 8.837502371658639e-05, "loss": 0.9754, "step": 27915 }, { "epoch": 1.0748796920115495, "grad_norm": 1.8554896116256714, "learning_rate": 8.834498966684876e-05, "loss": 1.0135, "step": 27920 }, { "epoch": 1.0750721847930702, "grad_norm": 1.3628082275390625, "learning_rate": 8.831495668288467e-05, "loss": 0.836, "step": 27925 }, { "epoch": 1.0752646775745909, "grad_norm": 1.181231141090393, "learning_rate": 8.828492476744037e-05, "loss": 1.0394, "step": 27930 }, { "epoch": 1.0754571703561115, "grad_norm": 1.8281370401382446, "learning_rate": 8.825489392326205e-05, "loss": 0.8396, "step": 27935 }, { "epoch": 1.0756496631376324, "grad_norm": 1.069940209388733, "learning_rate": 8.822486415309592e-05, "loss": 0.9147, "step": 27940 }, { "epoch": 1.075842155919153, "grad_norm": 1.711498737335205, "learning_rate": 8.819483545968797e-05, "loss": 0.7523, "step": 27945 }, { "epoch": 1.0760346487006738, "grad_norm": 1.5465821027755737, "learning_rate": 8.816480784578406e-05, "loss": 0.8782, "step": 27950 }, { "epoch": 1.0762271414821944, "grad_norm": 1.0662082433700562, "learning_rate": 8.813478131413013e-05, "loss": 0.6925, "step": 27955 }, { "epoch": 1.0764196342637151, "grad_norm": 0.8745738863945007, "learning_rate": 8.810475586747183e-05, "loss": 0.7974, "step": 27960 }, { "epoch": 1.0766121270452358, "grad_norm": 1.5703550577163696, "learning_rate": 8.807473150855476e-05, "loss": 0.7965, "step": 27965 }, { "epoch": 1.0768046198267565, "grad_norm": 2.1191911697387695, "learning_rate": 8.804470824012456e-05, "loss": 0.8466, "step": 27970 }, { "epoch": 1.0769971126082771, "grad_norm": 2.149782419204712, "learning_rate": 8.801468606492659e-05, "loss": 1.009, "step": 27975 }, { "epoch": 1.0771896053897978, "grad_norm": 1.0900071859359741, "learning_rate": 8.798466498570614e-05, "loss": 0.8092, "step": 27980 }, { "epoch": 1.0773820981713187, "grad_norm": 1.022477149963379, "learning_rate": 8.795464500520851e-05, "loss": 0.9038, "step": 27985 }, { "epoch": 1.0775745909528394, "grad_norm": 1.4904948472976685, "learning_rate": 8.792462612617882e-05, "loss": 0.8276, "step": 27990 }, { "epoch": 1.07776708373436, "grad_norm": 1.8655019998550415, "learning_rate": 8.789460835136203e-05, "loss": 0.9897, "step": 27995 }, { "epoch": 1.0779595765158807, "grad_norm": 1.3288123607635498, "learning_rate": 8.786459168350316e-05, "loss": 0.7634, "step": 28000 }, { "epoch": 1.0781520692974014, "grad_norm": 1.4979158639907837, "learning_rate": 8.783457612534699e-05, "loss": 1.0058, "step": 28005 }, { "epoch": 1.078344562078922, "grad_norm": 0.987000048160553, "learning_rate": 8.780456167963821e-05, "loss": 0.7555, "step": 28010 }, { "epoch": 1.0785370548604427, "grad_norm": 1.124505639076233, "learning_rate": 8.77745483491215e-05, "loss": 0.8965, "step": 28015 }, { "epoch": 1.0787295476419634, "grad_norm": 1.8975436687469482, "learning_rate": 8.774453613654136e-05, "loss": 0.7971, "step": 28020 }, { "epoch": 1.078922040423484, "grad_norm": 2.273435115814209, "learning_rate": 8.77145250446422e-05, "loss": 0.9065, "step": 28025 }, { "epoch": 1.0791145332050047, "grad_norm": 2.141667366027832, "learning_rate": 8.768451507616834e-05, "loss": 0.937, "step": 28030 }, { "epoch": 1.0793070259865254, "grad_norm": 1.4163758754730225, "learning_rate": 8.7654506233864e-05, "loss": 0.8959, "step": 28035 }, { "epoch": 1.0794995187680463, "grad_norm": 2.3507914543151855, "learning_rate": 8.762449852047326e-05, "loss": 0.9258, "step": 28040 }, { "epoch": 1.079692011549567, "grad_norm": 1.0547107458114624, "learning_rate": 8.759449193874018e-05, "loss": 0.9966, "step": 28045 }, { "epoch": 1.0798845043310876, "grad_norm": 1.1531494855880737, "learning_rate": 8.756448649140864e-05, "loss": 0.8557, "step": 28050 }, { "epoch": 1.0800769971126083, "grad_norm": 0.93568354845047, "learning_rate": 8.753448218122239e-05, "loss": 0.9067, "step": 28055 }, { "epoch": 1.080269489894129, "grad_norm": 1.3804124593734741, "learning_rate": 8.75044790109252e-05, "loss": 0.9757, "step": 28060 }, { "epoch": 1.0804619826756496, "grad_norm": 1.065398097038269, "learning_rate": 8.747447698326058e-05, "loss": 0.8644, "step": 28065 }, { "epoch": 1.0806544754571703, "grad_norm": 2.9915854930877686, "learning_rate": 8.744447610097213e-05, "loss": 0.8151, "step": 28070 }, { "epoch": 1.080846968238691, "grad_norm": 1.506426453590393, "learning_rate": 8.741447636680317e-05, "loss": 1.007, "step": 28075 }, { "epoch": 1.0810394610202116, "grad_norm": 0.8236234784126282, "learning_rate": 8.738447778349692e-05, "loss": 0.8457, "step": 28080 }, { "epoch": 1.0812319538017325, "grad_norm": 1.224062442779541, "learning_rate": 8.735448035379665e-05, "loss": 0.8969, "step": 28085 }, { "epoch": 1.0814244465832532, "grad_norm": 1.5686476230621338, "learning_rate": 8.732448408044539e-05, "loss": 0.8932, "step": 28090 }, { "epoch": 1.0816169393647739, "grad_norm": 1.104203462600708, "learning_rate": 8.729448896618603e-05, "loss": 0.8526, "step": 28095 }, { "epoch": 1.0818094321462945, "grad_norm": 0.723469614982605, "learning_rate": 8.726449501376153e-05, "loss": 0.761, "step": 28100 }, { "epoch": 1.0820019249278152, "grad_norm": 1.4393385648727417, "learning_rate": 8.723450222591462e-05, "loss": 0.876, "step": 28105 }, { "epoch": 1.0821944177093359, "grad_norm": 1.0453604459762573, "learning_rate": 8.720451060538789e-05, "loss": 0.7178, "step": 28110 }, { "epoch": 1.0823869104908566, "grad_norm": 1.7050689458847046, "learning_rate": 8.717452015492388e-05, "loss": 0.8199, "step": 28115 }, { "epoch": 1.0825794032723772, "grad_norm": 1.400423526763916, "learning_rate": 8.71445308772651e-05, "loss": 0.8114, "step": 28120 }, { "epoch": 1.082771896053898, "grad_norm": 1.93320631980896, "learning_rate": 8.711454277515377e-05, "loss": 0.7295, "step": 28125 }, { "epoch": 1.0829643888354186, "grad_norm": 1.5179084539413452, "learning_rate": 8.708455585133216e-05, "loss": 1.083, "step": 28130 }, { "epoch": 1.0831568816169395, "grad_norm": 2.2808666229248047, "learning_rate": 8.705457010854235e-05, "loss": 0.9567, "step": 28135 }, { "epoch": 1.0833493743984601, "grad_norm": 1.0833265781402588, "learning_rate": 8.702458554952635e-05, "loss": 0.8316, "step": 28140 }, { "epoch": 1.0835418671799808, "grad_norm": 1.4321517944335938, "learning_rate": 8.699460217702606e-05, "loss": 0.7606, "step": 28145 }, { "epoch": 1.0837343599615015, "grad_norm": 0.9563835263252258, "learning_rate": 8.696461999378324e-05, "loss": 0.8625, "step": 28150 }, { "epoch": 1.0839268527430221, "grad_norm": 1.0265910625457764, "learning_rate": 8.693463900253954e-05, "loss": 0.842, "step": 28155 }, { "epoch": 1.0841193455245428, "grad_norm": 1.2975974082946777, "learning_rate": 8.690465920603659e-05, "loss": 0.9416, "step": 28160 }, { "epoch": 1.0843118383060635, "grad_norm": 0.9792843461036682, "learning_rate": 8.687468060701582e-05, "loss": 0.786, "step": 28165 }, { "epoch": 1.0845043310875842, "grad_norm": 1.5455747842788696, "learning_rate": 8.684470320821851e-05, "loss": 0.9023, "step": 28170 }, { "epoch": 1.0846968238691048, "grad_norm": 1.3106191158294678, "learning_rate": 8.681472701238599e-05, "loss": 0.793, "step": 28175 }, { "epoch": 1.0848893166506257, "grad_norm": 1.0658930540084839, "learning_rate": 8.678475202225933e-05, "loss": 0.8117, "step": 28180 }, { "epoch": 1.0850818094321464, "grad_norm": 1.1267075538635254, "learning_rate": 8.675477824057953e-05, "loss": 0.9166, "step": 28185 }, { "epoch": 1.085274302213667, "grad_norm": 2.638794183731079, "learning_rate": 8.672480567008753e-05, "loss": 0.9889, "step": 28190 }, { "epoch": 1.0854667949951877, "grad_norm": 1.0422507524490356, "learning_rate": 8.669483431352414e-05, "loss": 0.8901, "step": 28195 }, { "epoch": 1.0856592877767084, "grad_norm": 1.2292778491973877, "learning_rate": 8.666486417362996e-05, "loss": 0.917, "step": 28200 }, { "epoch": 1.085851780558229, "grad_norm": 1.1497111320495605, "learning_rate": 8.663489525314568e-05, "loss": 0.7443, "step": 28205 }, { "epoch": 1.0860442733397497, "grad_norm": 1.712801456451416, "learning_rate": 8.660492755481167e-05, "loss": 1.0358, "step": 28210 }, { "epoch": 1.0862367661212704, "grad_norm": 1.057096004486084, "learning_rate": 8.657496108136826e-05, "loss": 0.829, "step": 28215 }, { "epoch": 1.086429258902791, "grad_norm": 0.9071182012557983, "learning_rate": 8.654499583555576e-05, "loss": 0.8827, "step": 28220 }, { "epoch": 1.0866217516843117, "grad_norm": 1.586111068725586, "learning_rate": 8.651503182011427e-05, "loss": 0.8244, "step": 28225 }, { "epoch": 1.0868142444658326, "grad_norm": 0.845116913318634, "learning_rate": 8.648506903778377e-05, "loss": 0.9803, "step": 28230 }, { "epoch": 1.0870067372473533, "grad_norm": 1.4213730096817017, "learning_rate": 8.64551074913042e-05, "loss": 0.7278, "step": 28235 }, { "epoch": 1.087199230028874, "grad_norm": 1.0875608921051025, "learning_rate": 8.64251471834153e-05, "loss": 0.8259, "step": 28240 }, { "epoch": 1.0873917228103946, "grad_norm": 0.9991816878318787, "learning_rate": 8.639518811685676e-05, "loss": 0.847, "step": 28245 }, { "epoch": 1.0875842155919153, "grad_norm": 1.6519125699996948, "learning_rate": 8.636523029436819e-05, "loss": 0.8359, "step": 28250 }, { "epoch": 1.087776708373436, "grad_norm": 1.6119956970214844, "learning_rate": 8.633527371868896e-05, "loss": 0.9016, "step": 28255 }, { "epoch": 1.0879692011549567, "grad_norm": 0.7314580678939819, "learning_rate": 8.630531839255839e-05, "loss": 0.6791, "step": 28260 }, { "epoch": 1.0881616939364773, "grad_norm": 0.9393051266670227, "learning_rate": 8.627536431871578e-05, "loss": 0.8801, "step": 28265 }, { "epoch": 1.088354186717998, "grad_norm": 2.3234903812408447, "learning_rate": 8.624541149990018e-05, "loss": 0.8656, "step": 28270 }, { "epoch": 1.0885466794995187, "grad_norm": 0.9072073101997375, "learning_rate": 8.621545993885054e-05, "loss": 0.8702, "step": 28275 }, { "epoch": 1.0887391722810396, "grad_norm": 1.1024999618530273, "learning_rate": 8.618550963830583e-05, "loss": 0.9067, "step": 28280 }, { "epoch": 1.0889316650625602, "grad_norm": 1.7317997217178345, "learning_rate": 8.615556060100474e-05, "loss": 0.975, "step": 28285 }, { "epoch": 1.089124157844081, "grad_norm": 1.9279747009277344, "learning_rate": 8.612561282968588e-05, "loss": 0.9101, "step": 28290 }, { "epoch": 1.0893166506256016, "grad_norm": 1.848134994506836, "learning_rate": 8.609566632708786e-05, "loss": 0.7642, "step": 28295 }, { "epoch": 1.0895091434071222, "grad_norm": 1.3254327774047852, "learning_rate": 8.606572109594906e-05, "loss": 1.0743, "step": 28300 }, { "epoch": 1.089701636188643, "grad_norm": 1.483266830444336, "learning_rate": 8.60357771390077e-05, "loss": 0.7679, "step": 28305 }, { "epoch": 1.0898941289701636, "grad_norm": 1.0175541639328003, "learning_rate": 8.600583445900206e-05, "loss": 0.8647, "step": 28310 }, { "epoch": 1.0900866217516842, "grad_norm": 1.3284434080123901, "learning_rate": 8.597589305867012e-05, "loss": 0.8266, "step": 28315 }, { "epoch": 1.090279114533205, "grad_norm": 1.2873848676681519, "learning_rate": 8.59459529407499e-05, "loss": 0.9412, "step": 28320 }, { "epoch": 1.0904716073147256, "grad_norm": 1.6508309841156006, "learning_rate": 8.591601410797918e-05, "loss": 0.8731, "step": 28325 }, { "epoch": 1.0906641000962465, "grad_norm": 1.7631590366363525, "learning_rate": 8.588607656309565e-05, "loss": 0.8443, "step": 28330 }, { "epoch": 1.0908565928777672, "grad_norm": 1.6212012767791748, "learning_rate": 8.585614030883695e-05, "loss": 0.9399, "step": 28335 }, { "epoch": 1.0910490856592878, "grad_norm": 1.2359917163848877, "learning_rate": 8.58262053479405e-05, "loss": 0.8504, "step": 28340 }, { "epoch": 1.0912415784408085, "grad_norm": 1.0911660194396973, "learning_rate": 8.579627168314368e-05, "loss": 0.7694, "step": 28345 }, { "epoch": 1.0914340712223292, "grad_norm": 1.2516300678253174, "learning_rate": 8.576633931718374e-05, "loss": 0.7347, "step": 28350 }, { "epoch": 1.0916265640038498, "grad_norm": 1.3555577993392944, "learning_rate": 8.573640825279779e-05, "loss": 0.9815, "step": 28355 }, { "epoch": 1.0918190567853705, "grad_norm": 1.2513625621795654, "learning_rate": 8.570647849272276e-05, "loss": 0.856, "step": 28360 }, { "epoch": 1.0920115495668912, "grad_norm": 1.3024554252624512, "learning_rate": 8.567655003969563e-05, "loss": 0.8517, "step": 28365 }, { "epoch": 1.0922040423484118, "grad_norm": 0.9453557729721069, "learning_rate": 8.564662289645313e-05, "loss": 0.9392, "step": 28370 }, { "epoch": 1.0923965351299327, "grad_norm": 1.9402847290039062, "learning_rate": 8.561669706573184e-05, "loss": 0.9265, "step": 28375 }, { "epoch": 1.0925890279114534, "grad_norm": 0.8357999324798584, "learning_rate": 8.558677255026835e-05, "loss": 0.8744, "step": 28380 }, { "epoch": 1.092781520692974, "grad_norm": 1.287455439567566, "learning_rate": 8.555684935279904e-05, "loss": 0.7733, "step": 28385 }, { "epoch": 1.0929740134744947, "grad_norm": 1.2665926218032837, "learning_rate": 8.552692747606014e-05, "loss": 0.8229, "step": 28390 }, { "epoch": 1.0931665062560154, "grad_norm": 1.5380029678344727, "learning_rate": 8.549700692278788e-05, "loss": 0.9208, "step": 28395 }, { "epoch": 1.093358999037536, "grad_norm": 1.7858625650405884, "learning_rate": 8.546708769571827e-05, "loss": 0.8756, "step": 28400 }, { "epoch": 1.0935514918190568, "grad_norm": 2.022096633911133, "learning_rate": 8.543716979758717e-05, "loss": 0.828, "step": 28405 }, { "epoch": 1.0937439846005774, "grad_norm": 1.1538034677505493, "learning_rate": 8.540725323113047e-05, "loss": 0.8699, "step": 28410 }, { "epoch": 1.093936477382098, "grad_norm": 2.0683727264404297, "learning_rate": 8.537733799908379e-05, "loss": 0.9554, "step": 28415 }, { "epoch": 1.0941289701636188, "grad_norm": 1.3001796007156372, "learning_rate": 8.534742410418265e-05, "loss": 0.9327, "step": 28420 }, { "epoch": 1.0943214629451397, "grad_norm": 0.7399016618728638, "learning_rate": 8.531751154916254e-05, "loss": 0.904, "step": 28425 }, { "epoch": 1.0945139557266603, "grad_norm": 1.1065526008605957, "learning_rate": 8.528760033675875e-05, "loss": 0.7723, "step": 28430 }, { "epoch": 1.094706448508181, "grad_norm": 1.9489480257034302, "learning_rate": 8.525769046970642e-05, "loss": 0.8233, "step": 28435 }, { "epoch": 1.0948989412897017, "grad_norm": 2.0606977939605713, "learning_rate": 8.522778195074064e-05, "loss": 0.9149, "step": 28440 }, { "epoch": 1.0950914340712223, "grad_norm": 1.784341812133789, "learning_rate": 8.519787478259637e-05, "loss": 0.8731, "step": 28445 }, { "epoch": 1.095283926852743, "grad_norm": 0.9500823020935059, "learning_rate": 8.516796896800836e-05, "loss": 0.8144, "step": 28450 }, { "epoch": 1.0954764196342637, "grad_norm": 1.5665167570114136, "learning_rate": 8.513806450971137e-05, "loss": 0.7198, "step": 28455 }, { "epoch": 1.0956689124157843, "grad_norm": 0.8766529560089111, "learning_rate": 8.51081614104399e-05, "loss": 0.8412, "step": 28460 }, { "epoch": 1.095861405197305, "grad_norm": 1.1104719638824463, "learning_rate": 8.507825967292842e-05, "loss": 0.7384, "step": 28465 }, { "epoch": 1.096053897978826, "grad_norm": 1.2092657089233398, "learning_rate": 8.504835929991128e-05, "loss": 0.9372, "step": 28470 }, { "epoch": 1.0962463907603466, "grad_norm": 1.244201898574829, "learning_rate": 8.501846029412262e-05, "loss": 1.0071, "step": 28475 }, { "epoch": 1.0964388835418672, "grad_norm": 1.6996936798095703, "learning_rate": 8.49885626582965e-05, "loss": 0.8313, "step": 28480 }, { "epoch": 1.096631376323388, "grad_norm": 1.2669320106506348, "learning_rate": 8.49586663951669e-05, "loss": 0.8763, "step": 28485 }, { "epoch": 1.0968238691049086, "grad_norm": 1.1832565069198608, "learning_rate": 8.492877150746764e-05, "loss": 0.9931, "step": 28490 }, { "epoch": 1.0970163618864293, "grad_norm": 0.8875114321708679, "learning_rate": 8.489887799793232e-05, "loss": 0.9127, "step": 28495 }, { "epoch": 1.09720885466795, "grad_norm": 0.9826710224151611, "learning_rate": 8.486898586929464e-05, "loss": 0.8378, "step": 28500 }, { "epoch": 1.0974013474494706, "grad_norm": 1.0573769807815552, "learning_rate": 8.483909512428796e-05, "loss": 0.8276, "step": 28505 }, { "epoch": 1.0975938402309913, "grad_norm": 0.9481573104858398, "learning_rate": 8.480920576564555e-05, "loss": 0.8358, "step": 28510 }, { "epoch": 1.097786333012512, "grad_norm": 1.1130627393722534, "learning_rate": 8.477931779610068e-05, "loss": 0.9346, "step": 28515 }, { "epoch": 1.0979788257940326, "grad_norm": 2.038252115249634, "learning_rate": 8.474943121838638e-05, "loss": 0.787, "step": 28520 }, { "epoch": 1.0981713185755535, "grad_norm": 1.3713279962539673, "learning_rate": 8.471954603523553e-05, "loss": 0.805, "step": 28525 }, { "epoch": 1.0983638113570742, "grad_norm": 1.3389123678207397, "learning_rate": 8.468966224938101e-05, "loss": 0.8322, "step": 28530 }, { "epoch": 1.0985563041385948, "grad_norm": 2.199444532394409, "learning_rate": 8.465977986355545e-05, "loss": 1.0354, "step": 28535 }, { "epoch": 1.0987487969201155, "grad_norm": 1.5610674619674683, "learning_rate": 8.462989888049137e-05, "loss": 0.9338, "step": 28540 }, { "epoch": 1.0989412897016362, "grad_norm": 1.1883718967437744, "learning_rate": 8.460001930292125e-05, "loss": 0.7661, "step": 28545 }, { "epoch": 1.0991337824831569, "grad_norm": 1.4481984376907349, "learning_rate": 8.457014113357736e-05, "loss": 0.8787, "step": 28550 }, { "epoch": 1.0993262752646775, "grad_norm": 1.3184305429458618, "learning_rate": 8.454026437519182e-05, "loss": 0.789, "step": 28555 }, { "epoch": 1.0995187680461982, "grad_norm": 1.6092246770858765, "learning_rate": 8.45103890304967e-05, "loss": 0.9798, "step": 28560 }, { "epoch": 1.0997112608277189, "grad_norm": 1.512444019317627, "learning_rate": 8.448051510222391e-05, "loss": 0.7606, "step": 28565 }, { "epoch": 1.0999037536092398, "grad_norm": 0.880029022693634, "learning_rate": 8.445064259310518e-05, "loss": 0.7627, "step": 28570 }, { "epoch": 1.1000962463907604, "grad_norm": 1.796959638595581, "learning_rate": 8.442077150587222e-05, "loss": 0.8827, "step": 28575 }, { "epoch": 1.100288739172281, "grad_norm": 1.1779733896255493, "learning_rate": 8.439090184325644e-05, "loss": 0.9346, "step": 28580 }, { "epoch": 1.1004812319538018, "grad_norm": 0.7998082041740417, "learning_rate": 8.436103360798934e-05, "loss": 0.799, "step": 28585 }, { "epoch": 1.1006737247353224, "grad_norm": 1.9549007415771484, "learning_rate": 8.433116680280212e-05, "loss": 1.0649, "step": 28590 }, { "epoch": 1.100866217516843, "grad_norm": 1.2117373943328857, "learning_rate": 8.430130143042584e-05, "loss": 0.8325, "step": 28595 }, { "epoch": 1.1010587102983638, "grad_norm": 1.4036140441894531, "learning_rate": 8.427143749359161e-05, "loss": 0.7584, "step": 28600 }, { "epoch": 1.1012512030798844, "grad_norm": 0.979111909866333, "learning_rate": 8.424157499503022e-05, "loss": 0.8124, "step": 28605 }, { "epoch": 1.1014436958614051, "grad_norm": 1.6154838800430298, "learning_rate": 8.421171393747237e-05, "loss": 1.0092, "step": 28610 }, { "epoch": 1.1016361886429258, "grad_norm": 1.6151224374771118, "learning_rate": 8.418185432364873e-05, "loss": 0.8584, "step": 28615 }, { "epoch": 1.1018286814244467, "grad_norm": 0.9842272996902466, "learning_rate": 8.415199615628974e-05, "loss": 0.819, "step": 28620 }, { "epoch": 1.1020211742059673, "grad_norm": 1.6019426584243774, "learning_rate": 8.412213943812566e-05, "loss": 0.7806, "step": 28625 }, { "epoch": 1.102213666987488, "grad_norm": 1.4778478145599365, "learning_rate": 8.40922841718868e-05, "loss": 0.878, "step": 28630 }, { "epoch": 1.1024061597690087, "grad_norm": 1.3348811864852905, "learning_rate": 8.406243036030316e-05, "loss": 0.8364, "step": 28635 }, { "epoch": 1.1025986525505294, "grad_norm": 2.31394100189209, "learning_rate": 8.403257800610465e-05, "loss": 0.8119, "step": 28640 }, { "epoch": 1.10279114533205, "grad_norm": 2.239318370819092, "learning_rate": 8.400272711202115e-05, "loss": 0.865, "step": 28645 }, { "epoch": 1.1029836381135707, "grad_norm": 2.2287495136260986, "learning_rate": 8.397287768078227e-05, "loss": 0.9477, "step": 28650 }, { "epoch": 1.1031761308950914, "grad_norm": 1.1342699527740479, "learning_rate": 8.394302971511757e-05, "loss": 0.8375, "step": 28655 }, { "epoch": 1.103368623676612, "grad_norm": 1.326915979385376, "learning_rate": 8.39131832177564e-05, "loss": 0.8521, "step": 28660 }, { "epoch": 1.103561116458133, "grad_norm": 1.2437760829925537, "learning_rate": 8.38833381914281e-05, "loss": 0.8906, "step": 28665 }, { "epoch": 1.1037536092396536, "grad_norm": 1.5128203630447388, "learning_rate": 8.385349463886174e-05, "loss": 0.9937, "step": 28670 }, { "epoch": 1.1039461020211743, "grad_norm": 1.7990190982818604, "learning_rate": 8.382365256278635e-05, "loss": 0.8797, "step": 28675 }, { "epoch": 1.104138594802695, "grad_norm": 0.9128474593162537, "learning_rate": 8.379381196593075e-05, "loss": 0.8568, "step": 28680 }, { "epoch": 1.1043310875842156, "grad_norm": 0.8986829519271851, "learning_rate": 8.376397285102369e-05, "loss": 0.6961, "step": 28685 }, { "epoch": 1.1045235803657363, "grad_norm": 1.1345797777175903, "learning_rate": 8.373413522079377e-05, "loss": 0.8966, "step": 28690 }, { "epoch": 1.104716073147257, "grad_norm": 0.927941083908081, "learning_rate": 8.370429907796945e-05, "loss": 0.955, "step": 28695 }, { "epoch": 1.1049085659287776, "grad_norm": 1.2426468133926392, "learning_rate": 8.367446442527898e-05, "loss": 0.6989, "step": 28700 }, { "epoch": 1.1051010587102983, "grad_norm": 1.2944527864456177, "learning_rate": 8.364463126545064e-05, "loss": 0.8288, "step": 28705 }, { "epoch": 1.105293551491819, "grad_norm": 0.8285412788391113, "learning_rate": 8.361479960121242e-05, "loss": 0.7919, "step": 28710 }, { "epoch": 1.1054860442733399, "grad_norm": 1.3904072046279907, "learning_rate": 8.358496943529219e-05, "loss": 0.8751, "step": 28715 }, { "epoch": 1.1056785370548605, "grad_norm": 1.4166022539138794, "learning_rate": 8.355514077041782e-05, "loss": 0.8239, "step": 28720 }, { "epoch": 1.1058710298363812, "grad_norm": 1.4481518268585205, "learning_rate": 8.352531360931688e-05, "loss": 0.7997, "step": 28725 }, { "epoch": 1.1060635226179019, "grad_norm": 1.293131709098816, "learning_rate": 8.349548795471683e-05, "loss": 0.8535, "step": 28730 }, { "epoch": 1.1062560153994225, "grad_norm": 1.2604767084121704, "learning_rate": 8.346566380934511e-05, "loss": 0.892, "step": 28735 }, { "epoch": 1.1064485081809432, "grad_norm": 1.0578190088272095, "learning_rate": 8.343584117592894e-05, "loss": 0.8556, "step": 28740 }, { "epoch": 1.1066410009624639, "grad_norm": 0.7822481989860535, "learning_rate": 8.340602005719529e-05, "loss": 0.8676, "step": 28745 }, { "epoch": 1.1068334937439845, "grad_norm": 1.1127487421035767, "learning_rate": 8.337620045587123e-05, "loss": 0.8864, "step": 28750 }, { "epoch": 1.1070259865255052, "grad_norm": 1.4301708936691284, "learning_rate": 8.334638237468352e-05, "loss": 0.7653, "step": 28755 }, { "epoch": 1.107218479307026, "grad_norm": 1.5563634634017944, "learning_rate": 8.331656581635879e-05, "loss": 0.8272, "step": 28760 }, { "epoch": 1.1074109720885468, "grad_norm": 1.9292504787445068, "learning_rate": 8.328675078362363e-05, "loss": 0.9858, "step": 28765 }, { "epoch": 1.1076034648700674, "grad_norm": 0.9912068247795105, "learning_rate": 8.325693727920438e-05, "loss": 0.805, "step": 28770 }, { "epoch": 1.1077959576515881, "grad_norm": 1.0305575132369995, "learning_rate": 8.32271253058273e-05, "loss": 0.9662, "step": 28775 }, { "epoch": 1.1079884504331088, "grad_norm": 1.2367445230484009, "learning_rate": 8.31973148662185e-05, "loss": 0.9497, "step": 28780 }, { "epoch": 1.1081809432146295, "grad_norm": 1.2530933618545532, "learning_rate": 8.316750596310396e-05, "loss": 0.8991, "step": 28785 }, { "epoch": 1.1083734359961501, "grad_norm": 0.8720499873161316, "learning_rate": 8.313769859920947e-05, "loss": 0.7811, "step": 28790 }, { "epoch": 1.1085659287776708, "grad_norm": 1.3998817205429077, "learning_rate": 8.310789277726079e-05, "loss": 0.8827, "step": 28795 }, { "epoch": 1.1087584215591915, "grad_norm": 2.0500388145446777, "learning_rate": 8.30780884999834e-05, "loss": 0.8259, "step": 28800 }, { "epoch": 1.1089509143407121, "grad_norm": 1.5865284204483032, "learning_rate": 8.304828577010266e-05, "loss": 0.7836, "step": 28805 }, { "epoch": 1.1091434071222328, "grad_norm": 1.2251750230789185, "learning_rate": 8.301848459034396e-05, "loss": 0.773, "step": 28810 }, { "epoch": 1.1093358999037537, "grad_norm": 1.2303904294967651, "learning_rate": 8.298868496343234e-05, "loss": 0.9684, "step": 28815 }, { "epoch": 1.1095283926852744, "grad_norm": 1.153835415840149, "learning_rate": 8.295888689209274e-05, "loss": 0.834, "step": 28820 }, { "epoch": 1.109720885466795, "grad_norm": 1.5492242574691772, "learning_rate": 8.29290903790501e-05, "loss": 0.7547, "step": 28825 }, { "epoch": 1.1099133782483157, "grad_norm": 1.5805435180664062, "learning_rate": 8.289929542702905e-05, "loss": 0.8278, "step": 28830 }, { "epoch": 1.1101058710298364, "grad_norm": 1.0030450820922852, "learning_rate": 8.286950203875412e-05, "loss": 0.8725, "step": 28835 }, { "epoch": 1.110298363811357, "grad_norm": 1.6949737071990967, "learning_rate": 8.28397102169498e-05, "loss": 0.8892, "step": 28840 }, { "epoch": 1.1104908565928777, "grad_norm": 1.040964126586914, "learning_rate": 8.280991996434025e-05, "loss": 0.9165, "step": 28845 }, { "epoch": 1.1106833493743984, "grad_norm": 1.1363362073898315, "learning_rate": 8.278013128364967e-05, "loss": 0.8674, "step": 28850 }, { "epoch": 1.110875842155919, "grad_norm": 0.8567915558815002, "learning_rate": 8.275034417760205e-05, "loss": 0.8689, "step": 28855 }, { "epoch": 1.11106833493744, "grad_norm": 1.597185492515564, "learning_rate": 8.272055864892112e-05, "loss": 0.7718, "step": 28860 }, { "epoch": 1.1112608277189606, "grad_norm": 0.7887008786201477, "learning_rate": 8.269077470033068e-05, "loss": 0.8011, "step": 28865 }, { "epoch": 1.1114533205004813, "grad_norm": 1.3229820728302002, "learning_rate": 8.266099233455423e-05, "loss": 0.8735, "step": 28870 }, { "epoch": 1.111645813282002, "grad_norm": 1.3275078535079956, "learning_rate": 8.263121155431515e-05, "loss": 0.9547, "step": 28875 }, { "epoch": 1.1118383060635226, "grad_norm": 1.2389519214630127, "learning_rate": 8.260143236233673e-05, "loss": 1.079, "step": 28880 }, { "epoch": 1.1120307988450433, "grad_norm": 1.1708885431289673, "learning_rate": 8.257165476134207e-05, "loss": 0.9086, "step": 28885 }, { "epoch": 1.112223291626564, "grad_norm": 1.1817975044250488, "learning_rate": 8.254187875405411e-05, "loss": 0.914, "step": 28890 }, { "epoch": 1.1124157844080846, "grad_norm": 1.4715931415557861, "learning_rate": 8.251210434319572e-05, "loss": 0.9382, "step": 28895 }, { "epoch": 1.1126082771896053, "grad_norm": 1.0971523523330688, "learning_rate": 8.248233153148953e-05, "loss": 0.6749, "step": 28900 }, { "epoch": 1.112800769971126, "grad_norm": 0.9213184714317322, "learning_rate": 8.245256032165804e-05, "loss": 0.8894, "step": 28905 }, { "epoch": 1.1129932627526469, "grad_norm": 1.3410868644714355, "learning_rate": 8.242279071642371e-05, "loss": 0.8031, "step": 28910 }, { "epoch": 1.1131857555341675, "grad_norm": 0.9468138813972473, "learning_rate": 8.239302271850874e-05, "loss": 0.9081, "step": 28915 }, { "epoch": 1.1133782483156882, "grad_norm": 0.9489649534225464, "learning_rate": 8.236325633063515e-05, "loss": 0.9047, "step": 28920 }, { "epoch": 1.1135707410972089, "grad_norm": 1.7119163274765015, "learning_rate": 8.233349155552499e-05, "loss": 0.7558, "step": 28925 }, { "epoch": 1.1137632338787296, "grad_norm": 1.610944151878357, "learning_rate": 8.230372839589998e-05, "loss": 0.868, "step": 28930 }, { "epoch": 1.1139557266602502, "grad_norm": 1.2964144945144653, "learning_rate": 8.227396685448176e-05, "loss": 0.7636, "step": 28935 }, { "epoch": 1.114148219441771, "grad_norm": 1.2506333589553833, "learning_rate": 8.224420693399187e-05, "loss": 0.8933, "step": 28940 }, { "epoch": 1.1143407122232916, "grad_norm": 1.260901689529419, "learning_rate": 8.221444863715165e-05, "loss": 0.8687, "step": 28945 }, { "epoch": 1.1145332050048122, "grad_norm": 0.6778833866119385, "learning_rate": 8.218469196668222e-05, "loss": 0.7826, "step": 28950 }, { "epoch": 1.1147256977863331, "grad_norm": 2.43918514251709, "learning_rate": 8.215493692530475e-05, "loss": 1.0159, "step": 28955 }, { "epoch": 1.1149181905678538, "grad_norm": 1.1964274644851685, "learning_rate": 8.21251835157401e-05, "loss": 0.8936, "step": 28960 }, { "epoch": 1.1151106833493745, "grad_norm": 1.0071650743484497, "learning_rate": 8.209543174070894e-05, "loss": 0.894, "step": 28965 }, { "epoch": 1.1153031761308951, "grad_norm": 1.0765222311019897, "learning_rate": 8.2065681602932e-05, "loss": 0.943, "step": 28970 }, { "epoch": 1.1154956689124158, "grad_norm": 1.2114105224609375, "learning_rate": 8.203593310512967e-05, "loss": 0.9682, "step": 28975 }, { "epoch": 1.1156881616939365, "grad_norm": 0.9356218576431274, "learning_rate": 8.200618625002223e-05, "loss": 0.7903, "step": 28980 }, { "epoch": 1.1158806544754571, "grad_norm": 1.1432327032089233, "learning_rate": 8.197644104032987e-05, "loss": 0.79, "step": 28985 }, { "epoch": 1.1160731472569778, "grad_norm": 1.7446037530899048, "learning_rate": 8.194669747877259e-05, "loss": 0.8792, "step": 28990 }, { "epoch": 1.1162656400384985, "grad_norm": 2.04817533493042, "learning_rate": 8.191695556807022e-05, "loss": 0.9906, "step": 28995 }, { "epoch": 1.1164581328200192, "grad_norm": 1.7263697385787964, "learning_rate": 8.18872153109425e-05, "loss": 0.7607, "step": 29000 }, { "epoch": 1.1166506256015398, "grad_norm": 1.177629828453064, "learning_rate": 8.185747671010891e-05, "loss": 0.7068, "step": 29005 }, { "epoch": 1.1168431183830607, "grad_norm": 0.9758390188217163, "learning_rate": 8.18277397682889e-05, "loss": 0.9517, "step": 29010 }, { "epoch": 1.1170356111645814, "grad_norm": 1.2482577562332153, "learning_rate": 8.179800448820174e-05, "loss": 0.839, "step": 29015 }, { "epoch": 1.117228103946102, "grad_norm": 1.2977570295333862, "learning_rate": 8.176827087256649e-05, "loss": 0.8067, "step": 29020 }, { "epoch": 1.1174205967276227, "grad_norm": 1.2474614381790161, "learning_rate": 8.173853892410206e-05, "loss": 0.947, "step": 29025 }, { "epoch": 1.1176130895091434, "grad_norm": 1.5093929767608643, "learning_rate": 8.17088086455273e-05, "loss": 0.67, "step": 29030 }, { "epoch": 1.117805582290664, "grad_norm": 1.944604754447937, "learning_rate": 8.167908003956082e-05, "loss": 0.8075, "step": 29035 }, { "epoch": 1.1179980750721847, "grad_norm": 1.2519726753234863, "learning_rate": 8.164935310892108e-05, "loss": 0.9158, "step": 29040 }, { "epoch": 1.1181905678537054, "grad_norm": 1.3411145210266113, "learning_rate": 8.161962785632646e-05, "loss": 0.9831, "step": 29045 }, { "epoch": 1.118383060635226, "grad_norm": 1.7142786979675293, "learning_rate": 8.158990428449514e-05, "loss": 0.8387, "step": 29050 }, { "epoch": 1.118575553416747, "grad_norm": 1.1233665943145752, "learning_rate": 8.156018239614505e-05, "loss": 0.8295, "step": 29055 }, { "epoch": 1.1187680461982676, "grad_norm": 1.210729718208313, "learning_rate": 8.153046219399418e-05, "loss": 0.8221, "step": 29060 }, { "epoch": 1.1189605389797883, "grad_norm": 1.134706735610962, "learning_rate": 8.150074368076019e-05, "loss": 0.8517, "step": 29065 }, { "epoch": 1.119153031761309, "grad_norm": 1.5711884498596191, "learning_rate": 8.14710268591606e-05, "loss": 0.8291, "step": 29070 }, { "epoch": 1.1193455245428297, "grad_norm": 1.2242764234542847, "learning_rate": 8.144131173191292e-05, "loss": 0.8554, "step": 29075 }, { "epoch": 1.1195380173243503, "grad_norm": 0.9937723875045776, "learning_rate": 8.141159830173432e-05, "loss": 0.9085, "step": 29080 }, { "epoch": 1.119730510105871, "grad_norm": 1.4553319215774536, "learning_rate": 8.138188657134188e-05, "loss": 0.7981, "step": 29085 }, { "epoch": 1.1199230028873917, "grad_norm": 1.1583729982376099, "learning_rate": 8.135217654345263e-05, "loss": 0.8145, "step": 29090 }, { "epoch": 1.1201154956689123, "grad_norm": 1.2025479078292847, "learning_rate": 8.132246822078328e-05, "loss": 0.925, "step": 29095 }, { "epoch": 1.120307988450433, "grad_norm": 1.4865069389343262, "learning_rate": 8.12927616060505e-05, "loss": 0.7075, "step": 29100 }, { "epoch": 1.120500481231954, "grad_norm": 2.3236899375915527, "learning_rate": 8.126305670197073e-05, "loss": 0.9911, "step": 29105 }, { "epoch": 1.1206929740134746, "grad_norm": 0.9563505053520203, "learning_rate": 8.12333535112603e-05, "loss": 0.7803, "step": 29110 }, { "epoch": 1.1208854667949952, "grad_norm": 1.071954607963562, "learning_rate": 8.12036520366354e-05, "loss": 0.808, "step": 29115 }, { "epoch": 1.121077959576516, "grad_norm": 0.5228383541107178, "learning_rate": 8.117395228081202e-05, "loss": 0.8721, "step": 29120 }, { "epoch": 1.1212704523580366, "grad_norm": 1.238578200340271, "learning_rate": 8.114425424650592e-05, "loss": 0.8783, "step": 29125 }, { "epoch": 1.1214629451395572, "grad_norm": 0.9781158566474915, "learning_rate": 8.111455793643292e-05, "loss": 0.8673, "step": 29130 }, { "epoch": 1.121655437921078, "grad_norm": 1.529806137084961, "learning_rate": 8.10848633533085e-05, "loss": 0.9377, "step": 29135 }, { "epoch": 1.1218479307025986, "grad_norm": 1.0390987396240234, "learning_rate": 8.105517049984797e-05, "loss": 0.8467, "step": 29140 }, { "epoch": 1.1220404234841193, "grad_norm": 1.066085696220398, "learning_rate": 8.102547937876664e-05, "loss": 0.7405, "step": 29145 }, { "epoch": 1.1222329162656401, "grad_norm": 0.9266438484191895, "learning_rate": 8.099578999277953e-05, "loss": 0.8576, "step": 29150 }, { "epoch": 1.1224254090471608, "grad_norm": 1.52492356300354, "learning_rate": 8.09661023446015e-05, "loss": 0.86, "step": 29155 }, { "epoch": 1.1226179018286815, "grad_norm": 1.8862872123718262, "learning_rate": 8.093641643694736e-05, "loss": 0.9879, "step": 29160 }, { "epoch": 1.1228103946102022, "grad_norm": 1.223181128501892, "learning_rate": 8.090673227253165e-05, "loss": 0.8792, "step": 29165 }, { "epoch": 1.1230028873917228, "grad_norm": 1.4018183946609497, "learning_rate": 8.087704985406875e-05, "loss": 0.9087, "step": 29170 }, { "epoch": 1.1231953801732435, "grad_norm": 0.8999775052070618, "learning_rate": 8.084736918427302e-05, "loss": 0.9368, "step": 29175 }, { "epoch": 1.1233878729547642, "grad_norm": 1.3538589477539062, "learning_rate": 8.081769026585848e-05, "loss": 0.7531, "step": 29180 }, { "epoch": 1.1235803657362848, "grad_norm": 1.039942741394043, "learning_rate": 8.078801310153906e-05, "loss": 0.89, "step": 29185 }, { "epoch": 1.1237728585178055, "grad_norm": 1.8016685247421265, "learning_rate": 8.075833769402861e-05, "loss": 0.8751, "step": 29190 }, { "epoch": 1.1239653512993262, "grad_norm": 1.5245671272277832, "learning_rate": 8.072866404604073e-05, "loss": 0.7489, "step": 29195 }, { "epoch": 1.124157844080847, "grad_norm": 1.9634850025177002, "learning_rate": 8.069899216028883e-05, "loss": 0.7756, "step": 29200 }, { "epoch": 1.1243503368623677, "grad_norm": 1.0487682819366455, "learning_rate": 8.066932203948625e-05, "loss": 0.697, "step": 29205 }, { "epoch": 1.1245428296438884, "grad_norm": 1.292322039604187, "learning_rate": 8.063965368634611e-05, "loss": 0.8285, "step": 29210 }, { "epoch": 1.124735322425409, "grad_norm": 1.1624624729156494, "learning_rate": 8.06099871035814e-05, "loss": 0.7489, "step": 29215 }, { "epoch": 1.1249278152069297, "grad_norm": 0.9536521434783936, "learning_rate": 8.058032229390493e-05, "loss": 0.8356, "step": 29220 }, { "epoch": 1.1251203079884504, "grad_norm": 1.499011516571045, "learning_rate": 8.055065926002933e-05, "loss": 0.964, "step": 29225 }, { "epoch": 1.125312800769971, "grad_norm": 2.0605900287628174, "learning_rate": 8.052099800466708e-05, "loss": 0.8371, "step": 29230 }, { "epoch": 1.1255052935514918, "grad_norm": 1.7930225133895874, "learning_rate": 8.049133853053057e-05, "loss": 0.7997, "step": 29235 }, { "epoch": 1.1256977863330124, "grad_norm": 2.043825626373291, "learning_rate": 8.04616808403319e-05, "loss": 0.8135, "step": 29240 }, { "epoch": 1.1258902791145333, "grad_norm": 1.0916283130645752, "learning_rate": 8.043202493678306e-05, "loss": 0.8819, "step": 29245 }, { "epoch": 1.126082771896054, "grad_norm": 1.748055100440979, "learning_rate": 8.040237082259594e-05, "loss": 0.8833, "step": 29250 }, { "epoch": 1.1262752646775747, "grad_norm": 1.4773916006088257, "learning_rate": 8.03727185004822e-05, "loss": 0.7417, "step": 29255 }, { "epoch": 1.1264677574590953, "grad_norm": 0.9700600504875183, "learning_rate": 8.034306797315328e-05, "loss": 0.7682, "step": 29260 }, { "epoch": 1.126660250240616, "grad_norm": 1.6670438051223755, "learning_rate": 8.031341924332063e-05, "loss": 0.8024, "step": 29265 }, { "epoch": 1.1268527430221367, "grad_norm": 1.4361203908920288, "learning_rate": 8.028377231369538e-05, "loss": 0.9223, "step": 29270 }, { "epoch": 1.1270452358036573, "grad_norm": 2.1577651500701904, "learning_rate": 8.02541271869885e-05, "loss": 0.7592, "step": 29275 }, { "epoch": 1.127237728585178, "grad_norm": 1.7753071784973145, "learning_rate": 8.022448386591093e-05, "loss": 0.7609, "step": 29280 }, { "epoch": 1.1274302213666987, "grad_norm": 1.2456077337265015, "learning_rate": 8.01948423531733e-05, "loss": 0.9491, "step": 29285 }, { "epoch": 1.1276227141482194, "grad_norm": 1.634460687637329, "learning_rate": 8.01652026514861e-05, "loss": 0.9136, "step": 29290 }, { "epoch": 1.12781520692974, "grad_norm": 1.8183555603027344, "learning_rate": 8.013556476355976e-05, "loss": 0.8587, "step": 29295 }, { "epoch": 1.128007699711261, "grad_norm": 1.5357935428619385, "learning_rate": 8.010592869210445e-05, "loss": 0.8325, "step": 29300 }, { "epoch": 1.1282001924927816, "grad_norm": 1.0693864822387695, "learning_rate": 8.007629443983013e-05, "loss": 0.7546, "step": 29305 }, { "epoch": 1.1283926852743023, "grad_norm": 1.100164771080017, "learning_rate": 8.004666200944673e-05, "loss": 0.9358, "step": 29310 }, { "epoch": 1.128585178055823, "grad_norm": 1.2228190898895264, "learning_rate": 8.00170314036639e-05, "loss": 0.9255, "step": 29315 }, { "epoch": 1.1287776708373436, "grad_norm": 1.408209204673767, "learning_rate": 7.998740262519118e-05, "loss": 0.7362, "step": 29320 }, { "epoch": 1.1289701636188643, "grad_norm": 2.5063939094543457, "learning_rate": 7.995777567673791e-05, "loss": 0.7319, "step": 29325 }, { "epoch": 1.129162656400385, "grad_norm": 1.9858168363571167, "learning_rate": 7.992815056101331e-05, "loss": 0.9686, "step": 29330 }, { "epoch": 1.1293551491819056, "grad_norm": 1.276520013809204, "learning_rate": 7.989852728072635e-05, "loss": 0.8679, "step": 29335 }, { "epoch": 1.1295476419634265, "grad_norm": 1.9740073680877686, "learning_rate": 7.986890583858593e-05, "loss": 0.9984, "step": 29340 }, { "epoch": 1.1297401347449472, "grad_norm": 0.7693697214126587, "learning_rate": 7.983928623730073e-05, "loss": 0.7382, "step": 29345 }, { "epoch": 1.1299326275264678, "grad_norm": 1.349514365196228, "learning_rate": 7.980966847957918e-05, "loss": 0.9728, "step": 29350 }, { "epoch": 1.1301251203079885, "grad_norm": 1.5095326900482178, "learning_rate": 7.978005256812977e-05, "loss": 0.9931, "step": 29355 }, { "epoch": 1.1303176130895092, "grad_norm": 1.1477704048156738, "learning_rate": 7.975043850566053e-05, "loss": 0.7388, "step": 29360 }, { "epoch": 1.1305101058710298, "grad_norm": 1.9314098358154297, "learning_rate": 7.972082629487962e-05, "loss": 0.8511, "step": 29365 }, { "epoch": 1.1307025986525505, "grad_norm": 2.503371477127075, "learning_rate": 7.969121593849481e-05, "loss": 0.8943, "step": 29370 }, { "epoch": 1.1308950914340712, "grad_norm": 1.379692792892456, "learning_rate": 7.966160743921371e-05, "loss": 0.8356, "step": 29375 }, { "epoch": 1.1310875842155919, "grad_norm": 1.3109577894210815, "learning_rate": 7.963200079974394e-05, "loss": 0.9496, "step": 29380 }, { "epoch": 1.1312800769971125, "grad_norm": 1.2980083227157593, "learning_rate": 7.960239602279275e-05, "loss": 0.9289, "step": 29385 }, { "epoch": 1.1314725697786332, "grad_norm": 1.131455659866333, "learning_rate": 7.95727931110673e-05, "loss": 0.8283, "step": 29390 }, { "epoch": 1.131665062560154, "grad_norm": 2.513030529022217, "learning_rate": 7.954319206727464e-05, "loss": 0.8418, "step": 29395 }, { "epoch": 1.1318575553416748, "grad_norm": 1.2375028133392334, "learning_rate": 7.951359289412154e-05, "loss": 0.9583, "step": 29400 }, { "epoch": 1.1320500481231954, "grad_norm": 1.336858868598938, "learning_rate": 7.948399559431463e-05, "loss": 0.8757, "step": 29405 }, { "epoch": 1.132242540904716, "grad_norm": 1.1518652439117432, "learning_rate": 7.945440017056045e-05, "loss": 0.8316, "step": 29410 }, { "epoch": 1.1324350336862368, "grad_norm": 1.9742542505264282, "learning_rate": 7.942480662556527e-05, "loss": 0.7784, "step": 29415 }, { "epoch": 1.1326275264677574, "grad_norm": 1.131545066833496, "learning_rate": 7.939521496203521e-05, "loss": 0.974, "step": 29420 }, { "epoch": 1.132820019249278, "grad_norm": 1.558993935585022, "learning_rate": 7.936562518267625e-05, "loss": 1.0205, "step": 29425 }, { "epoch": 1.1330125120307988, "grad_norm": 1.082209825515747, "learning_rate": 7.93360372901942e-05, "loss": 1.0551, "step": 29430 }, { "epoch": 1.1332050048123194, "grad_norm": 1.8491493463516235, "learning_rate": 7.930645128729462e-05, "loss": 0.8227, "step": 29435 }, { "epoch": 1.1333974975938403, "grad_norm": 1.647204875946045, "learning_rate": 7.927686717668302e-05, "loss": 0.9909, "step": 29440 }, { "epoch": 1.133589990375361, "grad_norm": 1.1486252546310425, "learning_rate": 7.924728496106464e-05, "loss": 0.8855, "step": 29445 }, { "epoch": 1.1337824831568817, "grad_norm": 1.3618193864822388, "learning_rate": 7.921770464314453e-05, "loss": 0.8427, "step": 29450 }, { "epoch": 1.1339749759384024, "grad_norm": 1.4775711297988892, "learning_rate": 7.91881262256277e-05, "loss": 0.8925, "step": 29455 }, { "epoch": 1.134167468719923, "grad_norm": 1.0450371503829956, "learning_rate": 7.915854971121886e-05, "loss": 0.6872, "step": 29460 }, { "epoch": 1.1343599615014437, "grad_norm": 1.4951140880584717, "learning_rate": 7.912897510262255e-05, "loss": 0.7814, "step": 29465 }, { "epoch": 1.1345524542829644, "grad_norm": 0.9859576225280762, "learning_rate": 7.909940240254325e-05, "loss": 0.7146, "step": 29470 }, { "epoch": 1.134744947064485, "grad_norm": 2.4736711978912354, "learning_rate": 7.906983161368515e-05, "loss": 1.0436, "step": 29475 }, { "epoch": 1.1349374398460057, "grad_norm": 0.912487268447876, "learning_rate": 7.904026273875225e-05, "loss": 0.8881, "step": 29480 }, { "epoch": 1.1351299326275264, "grad_norm": 1.1811585426330566, "learning_rate": 7.901069578044852e-05, "loss": 0.8878, "step": 29485 }, { "epoch": 1.135322425409047, "grad_norm": 1.4744383096694946, "learning_rate": 7.898113074147762e-05, "loss": 0.8272, "step": 29490 }, { "epoch": 1.135514918190568, "grad_norm": 1.061719536781311, "learning_rate": 7.895748009403729e-05, "loss": 1.3432, "step": 29495 }, { "epoch": 1.1357074109720886, "grad_norm": 1.2878315448760986, "learning_rate": 7.892791851667824e-05, "loss": 1.045, "step": 29500 }, { "epoch": 1.1358999037536093, "grad_norm": 1.3372122049331665, "learning_rate": 7.889835886622145e-05, "loss": 0.8942, "step": 29505 }, { "epoch": 1.13609239653513, "grad_norm": 1.7610641717910767, "learning_rate": 7.886880114536997e-05, "loss": 0.9568, "step": 29510 }, { "epoch": 1.1362848893166506, "grad_norm": 1.9298161268234253, "learning_rate": 7.883924535682665e-05, "loss": 0.8845, "step": 29515 }, { "epoch": 1.1364773820981713, "grad_norm": 1.6231752634048462, "learning_rate": 7.880969150329412e-05, "loss": 0.7979, "step": 29520 }, { "epoch": 1.136669874879692, "grad_norm": 2.3804054260253906, "learning_rate": 7.878013958747496e-05, "loss": 1.0056, "step": 29525 }, { "epoch": 1.1368623676612126, "grad_norm": 1.7176682949066162, "learning_rate": 7.875058961207148e-05, "loss": 0.8073, "step": 29530 }, { "epoch": 1.1370548604427335, "grad_norm": 1.6357464790344238, "learning_rate": 7.872104157978576e-05, "loss": 1.0359, "step": 29535 }, { "epoch": 1.1372473532242542, "grad_norm": 0.9791454076766968, "learning_rate": 7.869149549331989e-05, "loss": 0.5608, "step": 29540 }, { "epoch": 1.1374398460057749, "grad_norm": 1.282942533493042, "learning_rate": 7.866195135537558e-05, "loss": 0.8483, "step": 29545 }, { "epoch": 1.1376323387872955, "grad_norm": 1.7176309823989868, "learning_rate": 7.863240916865445e-05, "loss": 0.8454, "step": 29550 }, { "epoch": 1.1378248315688162, "grad_norm": 2.1249489784240723, "learning_rate": 7.860286893585798e-05, "loss": 1.0242, "step": 29555 }, { "epoch": 1.1380173243503369, "grad_norm": 1.37616765499115, "learning_rate": 7.85733306596874e-05, "loss": 0.7752, "step": 29560 }, { "epoch": 1.1382098171318575, "grad_norm": 1.312436580657959, "learning_rate": 7.854379434284375e-05, "loss": 0.7942, "step": 29565 }, { "epoch": 1.1384023099133782, "grad_norm": 1.2645299434661865, "learning_rate": 7.851425998802801e-05, "loss": 0.8355, "step": 29570 }, { "epoch": 1.1385948026948989, "grad_norm": 1.6107802391052246, "learning_rate": 7.848472759794087e-05, "loss": 0.7958, "step": 29575 }, { "epoch": 1.1387872954764195, "grad_norm": 1.874605655670166, "learning_rate": 7.845519717528284e-05, "loss": 1.0369, "step": 29580 }, { "epoch": 1.1389797882579402, "grad_norm": 0.8068329691886902, "learning_rate": 7.842566872275433e-05, "loss": 0.7825, "step": 29585 }, { "epoch": 1.139172281039461, "grad_norm": 0.9913658499717712, "learning_rate": 7.839614224305551e-05, "loss": 0.8762, "step": 29590 }, { "epoch": 1.1393647738209818, "grad_norm": 1.0012116432189941, "learning_rate": 7.836661773888632e-05, "loss": 0.7111, "step": 29595 }, { "epoch": 1.1395572666025024, "grad_norm": 1.5434329509735107, "learning_rate": 7.833709521294668e-05, "loss": 0.7117, "step": 29600 }, { "epoch": 1.1397497593840231, "grad_norm": 1.6689229011535645, "learning_rate": 7.830757466793617e-05, "loss": 0.878, "step": 29605 }, { "epoch": 1.1399422521655438, "grad_norm": 1.267446756362915, "learning_rate": 7.827805610655423e-05, "loss": 0.8429, "step": 29610 }, { "epoch": 1.1401347449470645, "grad_norm": 1.4829270839691162, "learning_rate": 7.824853953150019e-05, "loss": 0.9683, "step": 29615 }, { "epoch": 1.1403272377285851, "grad_norm": 1.028507113456726, "learning_rate": 7.821902494547309e-05, "loss": 0.7345, "step": 29620 }, { "epoch": 1.1405197305101058, "grad_norm": 1.30324125289917, "learning_rate": 7.818951235117191e-05, "loss": 0.8416, "step": 29625 }, { "epoch": 1.1407122232916265, "grad_norm": 1.0241384506225586, "learning_rate": 7.816000175129534e-05, "loss": 0.9229, "step": 29630 }, { "epoch": 1.1409047160731474, "grad_norm": 1.6417129039764404, "learning_rate": 7.813049314854189e-05, "loss": 1.075, "step": 29635 }, { "epoch": 1.141097208854668, "grad_norm": 0.99880450963974, "learning_rate": 7.810098654561002e-05, "loss": 0.6836, "step": 29640 }, { "epoch": 1.1412897016361887, "grad_norm": 1.3350822925567627, "learning_rate": 7.807148194519784e-05, "loss": 0.8302, "step": 29645 }, { "epoch": 1.1414821944177094, "grad_norm": 1.4701656103134155, "learning_rate": 7.804197935000336e-05, "loss": 0.8758, "step": 29650 }, { "epoch": 1.14167468719923, "grad_norm": 1.867395281791687, "learning_rate": 7.801247876272444e-05, "loss": 0.8234, "step": 29655 }, { "epoch": 1.1418671799807507, "grad_norm": 1.0658506155014038, "learning_rate": 7.798298018605868e-05, "loss": 0.854, "step": 29660 }, { "epoch": 1.1420596727622714, "grad_norm": 1.4265475273132324, "learning_rate": 7.795348362270349e-05, "loss": 0.8507, "step": 29665 }, { "epoch": 1.142252165543792, "grad_norm": 1.2551696300506592, "learning_rate": 7.792398907535622e-05, "loss": 0.9168, "step": 29670 }, { "epoch": 1.1424446583253127, "grad_norm": 1.5808075666427612, "learning_rate": 7.789449654671391e-05, "loss": 0.9197, "step": 29675 }, { "epoch": 1.1426371511068334, "grad_norm": 1.2171685695648193, "learning_rate": 7.786500603947342e-05, "loss": 0.8965, "step": 29680 }, { "epoch": 1.142829643888354, "grad_norm": 1.4671955108642578, "learning_rate": 7.783551755633152e-05, "loss": 0.84, "step": 29685 }, { "epoch": 1.143022136669875, "grad_norm": 0.9525561332702637, "learning_rate": 7.780603109998475e-05, "loss": 0.9211, "step": 29690 }, { "epoch": 1.1432146294513956, "grad_norm": 2.0476224422454834, "learning_rate": 7.777654667312934e-05, "loss": 0.8695, "step": 29695 }, { "epoch": 1.1434071222329163, "grad_norm": 0.7733902335166931, "learning_rate": 7.774706427846157e-05, "loss": 0.8636, "step": 29700 }, { "epoch": 1.143599615014437, "grad_norm": 2.125204086303711, "learning_rate": 7.771758391867736e-05, "loss": 1.0092, "step": 29705 }, { "epoch": 1.1437921077959576, "grad_norm": 1.576104998588562, "learning_rate": 7.768810559647248e-05, "loss": 0.7684, "step": 29710 }, { "epoch": 1.1439846005774783, "grad_norm": 1.8463459014892578, "learning_rate": 7.765862931454256e-05, "loss": 0.9924, "step": 29715 }, { "epoch": 1.144177093358999, "grad_norm": 1.1519972085952759, "learning_rate": 7.762915507558295e-05, "loss": 0.8446, "step": 29720 }, { "epoch": 1.1443695861405196, "grad_norm": 1.6794402599334717, "learning_rate": 7.759968288228892e-05, "loss": 0.8016, "step": 29725 }, { "epoch": 1.1445620789220405, "grad_norm": 0.8587988018989563, "learning_rate": 7.757021273735554e-05, "loss": 0.7753, "step": 29730 }, { "epoch": 1.1447545717035612, "grad_norm": 1.1401095390319824, "learning_rate": 7.754074464347761e-05, "loss": 0.7876, "step": 29735 }, { "epoch": 1.1449470644850819, "grad_norm": 0.9994805455207825, "learning_rate": 7.751127860334977e-05, "loss": 0.9192, "step": 29740 }, { "epoch": 1.1451395572666025, "grad_norm": 1.1678563356399536, "learning_rate": 7.748181461966658e-05, "loss": 0.8725, "step": 29745 }, { "epoch": 1.1453320500481232, "grad_norm": 0.8279195427894592, "learning_rate": 7.745235269512224e-05, "loss": 0.7797, "step": 29750 }, { "epoch": 1.1455245428296439, "grad_norm": 1.5446690320968628, "learning_rate": 7.742289283241086e-05, "loss": 0.7591, "step": 29755 }, { "epoch": 1.1457170356111646, "grad_norm": 1.552101969718933, "learning_rate": 7.739343503422641e-05, "loss": 0.958, "step": 29760 }, { "epoch": 1.1459095283926852, "grad_norm": 1.6127655506134033, "learning_rate": 7.736397930326259e-05, "loss": 0.8331, "step": 29765 }, { "epoch": 1.146102021174206, "grad_norm": 1.63381028175354, "learning_rate": 7.733452564221284e-05, "loss": 1.0676, "step": 29770 }, { "epoch": 1.1462945139557266, "grad_norm": 2.0071661472320557, "learning_rate": 7.730507405377062e-05, "loss": 0.8955, "step": 29775 }, { "epoch": 1.1464870067372472, "grad_norm": 1.577379822731018, "learning_rate": 7.727562454062906e-05, "loss": 0.9404, "step": 29780 }, { "epoch": 1.1466794995187681, "grad_norm": 1.0836290121078491, "learning_rate": 7.724617710548104e-05, "loss": 0.9939, "step": 29785 }, { "epoch": 1.1468719923002888, "grad_norm": 1.5348353385925293, "learning_rate": 7.721673175101944e-05, "loss": 0.8655, "step": 29790 }, { "epoch": 1.1470644850818095, "grad_norm": 1.7705544233322144, "learning_rate": 7.718728847993679e-05, "loss": 0.9213, "step": 29795 }, { "epoch": 1.1472569778633301, "grad_norm": 1.228938341140747, "learning_rate": 7.715784729492546e-05, "loss": 0.8888, "step": 29800 }, { "epoch": 1.1474494706448508, "grad_norm": 1.3519344329833984, "learning_rate": 7.712840819867771e-05, "loss": 0.7919, "step": 29805 }, { "epoch": 1.1476419634263715, "grad_norm": 2.427093267440796, "learning_rate": 7.709897119388553e-05, "loss": 0.9861, "step": 29810 }, { "epoch": 1.1478344562078922, "grad_norm": 1.0887788534164429, "learning_rate": 7.706953628324069e-05, "loss": 0.9012, "step": 29815 }, { "epoch": 1.1480269489894128, "grad_norm": 1.7134822607040405, "learning_rate": 7.704010346943488e-05, "loss": 0.849, "step": 29820 }, { "epoch": 1.1482194417709337, "grad_norm": 0.9797286987304688, "learning_rate": 7.701067275515952e-05, "loss": 0.8742, "step": 29825 }, { "epoch": 1.1484119345524544, "grad_norm": 1.1243619918823242, "learning_rate": 7.698124414310584e-05, "loss": 0.7453, "step": 29830 }, { "epoch": 1.148604427333975, "grad_norm": 0.9771775603294373, "learning_rate": 7.695181763596489e-05, "loss": 0.8668, "step": 29835 }, { "epoch": 1.1487969201154957, "grad_norm": 1.8043612241744995, "learning_rate": 7.692239323642753e-05, "loss": 0.9907, "step": 29840 }, { "epoch": 1.1489894128970164, "grad_norm": 2.071789503097534, "learning_rate": 7.689297094718444e-05, "loss": 0.8891, "step": 29845 }, { "epoch": 1.149181905678537, "grad_norm": 1.8060542345046997, "learning_rate": 7.686355077092611e-05, "loss": 1.0377, "step": 29850 }, { "epoch": 1.1493743984600577, "grad_norm": 1.0479457378387451, "learning_rate": 7.683413271034279e-05, "loss": 0.7771, "step": 29855 }, { "epoch": 1.1495668912415784, "grad_norm": 1.4982253313064575, "learning_rate": 7.680471676812453e-05, "loss": 0.8573, "step": 29860 }, { "epoch": 1.149759384023099, "grad_norm": 1.1916199922561646, "learning_rate": 7.677530294696131e-05, "loss": 0.726, "step": 29865 }, { "epoch": 1.1499518768046197, "grad_norm": 1.0968197584152222, "learning_rate": 7.674589124954274e-05, "loss": 0.7354, "step": 29870 }, { "epoch": 1.1501443695861404, "grad_norm": 1.3688973188400269, "learning_rate": 7.671648167855842e-05, "loss": 0.7691, "step": 29875 }, { "epoch": 1.1503368623676613, "grad_norm": 2.174705743789673, "learning_rate": 7.66870742366976e-05, "loss": 0.8993, "step": 29880 }, { "epoch": 1.150529355149182, "grad_norm": 1.718237280845642, "learning_rate": 7.665766892664938e-05, "loss": 0.8576, "step": 29885 }, { "epoch": 1.1507218479307026, "grad_norm": 1.2384010553359985, "learning_rate": 7.662826575110276e-05, "loss": 0.8853, "step": 29890 }, { "epoch": 1.1509143407122233, "grad_norm": 1.3564085960388184, "learning_rate": 7.65988647127464e-05, "loss": 0.7828, "step": 29895 }, { "epoch": 1.151106833493744, "grad_norm": 1.820794701576233, "learning_rate": 7.65694658142688e-05, "loss": 0.893, "step": 29900 }, { "epoch": 1.1512993262752647, "grad_norm": 2.1679587364196777, "learning_rate": 7.654006905835838e-05, "loss": 0.8594, "step": 29905 }, { "epoch": 1.1514918190567853, "grad_norm": 2.045226573944092, "learning_rate": 7.651067444770324e-05, "loss": 1.0744, "step": 29910 }, { "epoch": 1.151684311838306, "grad_norm": 1.4716484546661377, "learning_rate": 7.648128198499128e-05, "loss": 0.6354, "step": 29915 }, { "epoch": 1.1518768046198267, "grad_norm": 2.3998184204101562, "learning_rate": 7.64518916729103e-05, "loss": 0.7672, "step": 29920 }, { "epoch": 1.1520692974013476, "grad_norm": 1.5186283588409424, "learning_rate": 7.642250351414786e-05, "loss": 0.8835, "step": 29925 }, { "epoch": 1.1522617901828682, "grad_norm": 1.9617729187011719, "learning_rate": 7.639311751139127e-05, "loss": 0.828, "step": 29930 }, { "epoch": 1.152454282964389, "grad_norm": 0.981577455997467, "learning_rate": 7.636373366732771e-05, "loss": 0.8872, "step": 29935 }, { "epoch": 1.1526467757459096, "grad_norm": 1.1515179872512817, "learning_rate": 7.633435198464411e-05, "loss": 0.9887, "step": 29940 }, { "epoch": 1.1528392685274302, "grad_norm": 1.667133092880249, "learning_rate": 7.630497246602725e-05, "loss": 0.8211, "step": 29945 }, { "epoch": 1.153031761308951, "grad_norm": 0.8391001224517822, "learning_rate": 7.627559511416373e-05, "loss": 0.8981, "step": 29950 }, { "epoch": 1.1532242540904716, "grad_norm": 1.4484329223632812, "learning_rate": 7.624621993173985e-05, "loss": 0.865, "step": 29955 }, { "epoch": 1.1534167468719922, "grad_norm": 1.7156023979187012, "learning_rate": 7.621684692144178e-05, "loss": 0.8699, "step": 29960 }, { "epoch": 1.153609239653513, "grad_norm": 1.1260522603988647, "learning_rate": 7.618747608595555e-05, "loss": 0.9687, "step": 29965 }, { "epoch": 1.1538017324350336, "grad_norm": 0.9241002202033997, "learning_rate": 7.615810742796688e-05, "loss": 0.7649, "step": 29970 }, { "epoch": 1.1539942252165543, "grad_norm": 1.7365343570709229, "learning_rate": 7.612874095016132e-05, "loss": 0.9088, "step": 29975 }, { "epoch": 1.1541867179980752, "grad_norm": 1.0506408214569092, "learning_rate": 7.60993766552243e-05, "loss": 0.9671, "step": 29980 }, { "epoch": 1.1543792107795958, "grad_norm": 1.6173326969146729, "learning_rate": 7.607001454584097e-05, "loss": 0.9204, "step": 29985 }, { "epoch": 1.1545717035611165, "grad_norm": 0.9955803155899048, "learning_rate": 7.604065462469623e-05, "loss": 0.923, "step": 29990 }, { "epoch": 1.1547641963426372, "grad_norm": 1.820084571838379, "learning_rate": 7.601129689447495e-05, "loss": 0.9913, "step": 29995 }, { "epoch": 1.1549566891241578, "grad_norm": 1.0505925416946411, "learning_rate": 7.598194135786166e-05, "loss": 0.8086, "step": 30000 }, { "epoch": 1.1551491819056785, "grad_norm": 1.5883291959762573, "learning_rate": 7.59525880175407e-05, "loss": 0.7994, "step": 30005 }, { "epoch": 1.1553416746871992, "grad_norm": 0.8642548322677612, "learning_rate": 7.59232368761963e-05, "loss": 0.8237, "step": 30010 }, { "epoch": 1.1555341674687198, "grad_norm": 1.0051319599151611, "learning_rate": 7.589388793651239e-05, "loss": 0.8595, "step": 30015 }, { "epoch": 1.1557266602502407, "grad_norm": 0.859375, "learning_rate": 7.586454120117271e-05, "loss": 0.8389, "step": 30020 }, { "epoch": 1.1559191530317614, "grad_norm": 1.0464012622833252, "learning_rate": 7.583519667286088e-05, "loss": 0.9032, "step": 30025 }, { "epoch": 1.156111645813282, "grad_norm": 1.8586502075195312, "learning_rate": 7.580585435426024e-05, "loss": 0.8104, "step": 30030 }, { "epoch": 1.1563041385948027, "grad_norm": 1.273821234703064, "learning_rate": 7.577651424805392e-05, "loss": 0.7211, "step": 30035 }, { "epoch": 1.1564966313763234, "grad_norm": 1.5293164253234863, "learning_rate": 7.574717635692492e-05, "loss": 0.8558, "step": 30040 }, { "epoch": 1.156689124157844, "grad_norm": 1.440649390220642, "learning_rate": 7.5717840683556e-05, "loss": 0.816, "step": 30045 }, { "epoch": 1.1568816169393648, "grad_norm": 2.2141449451446533, "learning_rate": 7.568850723062967e-05, "loss": 0.8884, "step": 30050 }, { "epoch": 1.1570741097208854, "grad_norm": 1.564818024635315, "learning_rate": 7.565917600082833e-05, "loss": 1.0443, "step": 30055 }, { "epoch": 1.157266602502406, "grad_norm": 0.9042505621910095, "learning_rate": 7.562984699683408e-05, "loss": 0.6834, "step": 30060 }, { "epoch": 1.1574590952839268, "grad_norm": 1.246706485748291, "learning_rate": 7.560052022132889e-05, "loss": 0.9267, "step": 30065 }, { "epoch": 1.1576515880654474, "grad_norm": 1.4773999452590942, "learning_rate": 7.557119567699452e-05, "loss": 0.7131, "step": 30070 }, { "epoch": 1.1578440808469683, "grad_norm": 1.9604829549789429, "learning_rate": 7.554187336651247e-05, "loss": 0.8189, "step": 30075 }, { "epoch": 1.158036573628489, "grad_norm": 1.0815962553024292, "learning_rate": 7.551255329256402e-05, "loss": 0.8375, "step": 30080 }, { "epoch": 1.1582290664100097, "grad_norm": 1.2420703172683716, "learning_rate": 7.548323545783042e-05, "loss": 0.9146, "step": 30085 }, { "epoch": 1.1584215591915303, "grad_norm": 1.9062912464141846, "learning_rate": 7.545391986499252e-05, "loss": 0.8919, "step": 30090 }, { "epoch": 1.158614051973051, "grad_norm": 1.7222148180007935, "learning_rate": 7.5424606516731e-05, "loss": 0.7967, "step": 30095 }, { "epoch": 1.1588065447545717, "grad_norm": 1.2561261653900146, "learning_rate": 7.539529541572647e-05, "loss": 0.837, "step": 30100 }, { "epoch": 1.1589990375360923, "grad_norm": 1.5790022611618042, "learning_rate": 7.536598656465918e-05, "loss": 0.8983, "step": 30105 }, { "epoch": 1.159191530317613, "grad_norm": 1.3885726928710938, "learning_rate": 7.533667996620919e-05, "loss": 0.8999, "step": 30110 }, { "epoch": 1.1593840230991337, "grad_norm": 1.676992416381836, "learning_rate": 7.530737562305649e-05, "loss": 1.0169, "step": 30115 }, { "epoch": 1.1595765158806546, "grad_norm": 1.5144734382629395, "learning_rate": 7.52780735378807e-05, "loss": 0.9365, "step": 30120 }, { "epoch": 1.1597690086621752, "grad_norm": 1.8382542133331299, "learning_rate": 7.524877371336129e-05, "loss": 0.8854, "step": 30125 }, { "epoch": 1.159961501443696, "grad_norm": 2.0379273891448975, "learning_rate": 7.52194761521776e-05, "loss": 0.8692, "step": 30130 }, { "epoch": 1.1601539942252166, "grad_norm": 1.4084811210632324, "learning_rate": 7.519018085700861e-05, "loss": 0.9688, "step": 30135 }, { "epoch": 1.1603464870067373, "grad_norm": 1.7691874504089355, "learning_rate": 7.516088783053327e-05, "loss": 1.031, "step": 30140 }, { "epoch": 1.160538979788258, "grad_norm": 1.5990632772445679, "learning_rate": 7.51315970754302e-05, "loss": 0.9808, "step": 30145 }, { "epoch": 1.1607314725697786, "grad_norm": 1.0263117551803589, "learning_rate": 7.510230859437781e-05, "loss": 0.85, "step": 30150 }, { "epoch": 1.1609239653512993, "grad_norm": 0.9199804067611694, "learning_rate": 7.50730223900544e-05, "loss": 0.8131, "step": 30155 }, { "epoch": 1.16111645813282, "grad_norm": 1.6857373714447021, "learning_rate": 7.504373846513796e-05, "loss": 0.9652, "step": 30160 }, { "epoch": 1.1613089509143406, "grad_norm": 0.9786146283149719, "learning_rate": 7.501445682230628e-05, "loss": 0.8816, "step": 30165 }, { "epoch": 1.1615014436958615, "grad_norm": 1.5863782167434692, "learning_rate": 7.498517746423706e-05, "loss": 0.8624, "step": 30170 }, { "epoch": 1.1616939364773822, "grad_norm": 1.287121295928955, "learning_rate": 7.495590039360763e-05, "loss": 0.9224, "step": 30175 }, { "epoch": 1.1618864292589028, "grad_norm": 2.6649892330169678, "learning_rate": 7.492662561309518e-05, "loss": 0.8616, "step": 30180 }, { "epoch": 1.1620789220404235, "grad_norm": 1.1567127704620361, "learning_rate": 7.489735312537676e-05, "loss": 0.759, "step": 30185 }, { "epoch": 1.1622714148219442, "grad_norm": 1.2174935340881348, "learning_rate": 7.48680829331291e-05, "loss": 0.7874, "step": 30190 }, { "epoch": 1.1624639076034649, "grad_norm": 1.6268326044082642, "learning_rate": 7.483881503902874e-05, "loss": 0.8314, "step": 30195 }, { "epoch": 1.1626564003849855, "grad_norm": 1.1430530548095703, "learning_rate": 7.480954944575212e-05, "loss": 0.8684, "step": 30200 }, { "epoch": 1.1628488931665062, "grad_norm": 1.0805143117904663, "learning_rate": 7.478028615597532e-05, "loss": 0.9936, "step": 30205 }, { "epoch": 1.1630413859480269, "grad_norm": 1.8118839263916016, "learning_rate": 7.475102517237424e-05, "loss": 0.7458, "step": 30210 }, { "epoch": 1.1632338787295478, "grad_norm": 0.9031025171279907, "learning_rate": 7.47217664976247e-05, "loss": 0.7417, "step": 30215 }, { "epoch": 1.1634263715110684, "grad_norm": 1.5473326444625854, "learning_rate": 7.469251013440215e-05, "loss": 0.8992, "step": 30220 }, { "epoch": 1.163618864292589, "grad_norm": 1.164165735244751, "learning_rate": 7.466325608538185e-05, "loss": 0.9047, "step": 30225 }, { "epoch": 1.1638113570741098, "grad_norm": 1.0037180185317993, "learning_rate": 7.463400435323899e-05, "loss": 0.7691, "step": 30230 }, { "epoch": 1.1640038498556304, "grad_norm": 1.021618127822876, "learning_rate": 7.460475494064841e-05, "loss": 0.6932, "step": 30235 }, { "epoch": 1.164196342637151, "grad_norm": 1.689186930656433, "learning_rate": 7.457550785028472e-05, "loss": 0.891, "step": 30240 }, { "epoch": 1.1643888354186718, "grad_norm": 1.9816436767578125, "learning_rate": 7.454626308482244e-05, "loss": 0.8508, "step": 30245 }, { "epoch": 1.1645813282001924, "grad_norm": 1.265624761581421, "learning_rate": 7.45170206469358e-05, "loss": 0.8929, "step": 30250 }, { "epoch": 1.1647738209817131, "grad_norm": 1.2222243547439575, "learning_rate": 7.44877805392988e-05, "loss": 0.9135, "step": 30255 }, { "epoch": 1.1649663137632338, "grad_norm": 2.1236441135406494, "learning_rate": 7.445854276458527e-05, "loss": 0.7737, "step": 30260 }, { "epoch": 1.1651588065447545, "grad_norm": 0.9599865078926086, "learning_rate": 7.44293073254688e-05, "loss": 0.8891, "step": 30265 }, { "epoch": 1.1653512993262753, "grad_norm": 1.1516355276107788, "learning_rate": 7.440007422462276e-05, "loss": 0.7167, "step": 30270 }, { "epoch": 1.165543792107796, "grad_norm": 1.8902909755706787, "learning_rate": 7.43708434647204e-05, "loss": 0.8657, "step": 30275 }, { "epoch": 1.1657362848893167, "grad_norm": 2.002458095550537, "learning_rate": 7.434161504843461e-05, "loss": 0.7881, "step": 30280 }, { "epoch": 1.1659287776708374, "grad_norm": 1.3423494100570679, "learning_rate": 7.431238897843811e-05, "loss": 0.9164, "step": 30285 }, { "epoch": 1.166121270452358, "grad_norm": 1.7833929061889648, "learning_rate": 7.428316525740353e-05, "loss": 0.9904, "step": 30290 }, { "epoch": 1.1663137632338787, "grad_norm": 1.0303772687911987, "learning_rate": 7.425394388800311e-05, "loss": 0.7141, "step": 30295 }, { "epoch": 1.1665062560153994, "grad_norm": 1.3508719205856323, "learning_rate": 7.422472487290893e-05, "loss": 0.792, "step": 30300 }, { "epoch": 1.16669874879692, "grad_norm": 1.598431944847107, "learning_rate": 7.419550821479298e-05, "loss": 0.8109, "step": 30305 }, { "epoch": 1.166891241578441, "grad_norm": 1.6909544467926025, "learning_rate": 7.416629391632683e-05, "loss": 0.9068, "step": 30310 }, { "epoch": 1.1670837343599616, "grad_norm": 1.0492467880249023, "learning_rate": 7.413708198018195e-05, "loss": 0.9566, "step": 30315 }, { "epoch": 1.1672762271414823, "grad_norm": 2.0640323162078857, "learning_rate": 7.410787240902963e-05, "loss": 0.818, "step": 30320 }, { "epoch": 1.167468719923003, "grad_norm": 1.4553757905960083, "learning_rate": 7.407866520554087e-05, "loss": 0.8928, "step": 30325 }, { "epoch": 1.1676612127045236, "grad_norm": 1.5272727012634277, "learning_rate": 7.404946037238641e-05, "loss": 0.8591, "step": 30330 }, { "epoch": 1.1678537054860443, "grad_norm": 1.040766716003418, "learning_rate": 7.402025791223694e-05, "loss": 0.851, "step": 30335 }, { "epoch": 1.168046198267565, "grad_norm": 0.9818170666694641, "learning_rate": 7.399105782776276e-05, "loss": 0.7963, "step": 30340 }, { "epoch": 1.1682386910490856, "grad_norm": 1.3725941181182861, "learning_rate": 7.396186012163404e-05, "loss": 0.7677, "step": 30345 }, { "epoch": 1.1684311838306063, "grad_norm": 1.8234846591949463, "learning_rate": 7.393266479652075e-05, "loss": 0.7203, "step": 30350 }, { "epoch": 1.168623676612127, "grad_norm": 1.143553376197815, "learning_rate": 7.390347185509258e-05, "loss": 0.8207, "step": 30355 }, { "epoch": 1.1688161693936476, "grad_norm": 1.3585904836654663, "learning_rate": 7.387428130001904e-05, "loss": 0.8421, "step": 30360 }, { "epoch": 1.1690086621751685, "grad_norm": 2.451633930206299, "learning_rate": 7.384509313396939e-05, "loss": 0.8745, "step": 30365 }, { "epoch": 1.1692011549566892, "grad_norm": 0.9068706035614014, "learning_rate": 7.381590735961272e-05, "loss": 1.0768, "step": 30370 }, { "epoch": 1.1693936477382099, "grad_norm": 1.411170244216919, "learning_rate": 7.378672397961788e-05, "loss": 0.9386, "step": 30375 }, { "epoch": 1.1695861405197305, "grad_norm": 2.274181842803955, "learning_rate": 7.375754299665348e-05, "loss": 0.9783, "step": 30380 }, { "epoch": 1.1697786333012512, "grad_norm": 2.0918445587158203, "learning_rate": 7.372836441338789e-05, "loss": 0.8331, "step": 30385 }, { "epoch": 1.1699711260827719, "grad_norm": 0.9770554900169373, "learning_rate": 7.36991882324894e-05, "loss": 0.8223, "step": 30390 }, { "epoch": 1.1701636188642925, "grad_norm": 1.2381200790405273, "learning_rate": 7.367001445662591e-05, "loss": 0.8515, "step": 30395 }, { "epoch": 1.1703561116458132, "grad_norm": 1.2155394554138184, "learning_rate": 7.364084308846512e-05, "loss": 0.9158, "step": 30400 }, { "epoch": 1.1705486044273339, "grad_norm": 1.4437912702560425, "learning_rate": 7.361167413067469e-05, "loss": 0.8522, "step": 30405 }, { "epoch": 1.1707410972088548, "grad_norm": 1.226899266242981, "learning_rate": 7.358250758592184e-05, "loss": 0.9088, "step": 30410 }, { "epoch": 1.1709335899903754, "grad_norm": 0.8239467144012451, "learning_rate": 7.355334345687361e-05, "loss": 0.8154, "step": 30415 }, { "epoch": 1.1711260827718961, "grad_norm": 1.354842185974121, "learning_rate": 7.3524181746197e-05, "loss": 0.7883, "step": 30420 }, { "epoch": 1.1713185755534168, "grad_norm": 0.8536742329597473, "learning_rate": 7.349502245655857e-05, "loss": 0.6467, "step": 30425 }, { "epoch": 1.1715110683349375, "grad_norm": 1.2554560899734497, "learning_rate": 7.346586559062472e-05, "loss": 0.8991, "step": 30430 }, { "epoch": 1.1717035611164581, "grad_norm": 1.260736107826233, "learning_rate": 7.343671115106172e-05, "loss": 0.7645, "step": 30435 }, { "epoch": 1.1718960538979788, "grad_norm": 1.7142833471298218, "learning_rate": 7.340755914053552e-05, "loss": 0.8953, "step": 30440 }, { "epoch": 1.1720885466794995, "grad_norm": 1.4862356185913086, "learning_rate": 7.337840956171184e-05, "loss": 0.8362, "step": 30445 }, { "epoch": 1.1722810394610201, "grad_norm": 1.4640238285064697, "learning_rate": 7.33492624172563e-05, "loss": 0.7173, "step": 30450 }, { "epoch": 1.1724735322425408, "grad_norm": 1.3464069366455078, "learning_rate": 7.332011770983417e-05, "loss": 0.8988, "step": 30455 }, { "epoch": 1.1726660250240615, "grad_norm": 1.3081461191177368, "learning_rate": 7.32909754421105e-05, "loss": 1.0021, "step": 30460 }, { "epoch": 1.1728585178055824, "grad_norm": 0.9563549160957336, "learning_rate": 7.326183561675022e-05, "loss": 0.7904, "step": 30465 }, { "epoch": 1.173051010587103, "grad_norm": 1.8011226654052734, "learning_rate": 7.323269823641794e-05, "loss": 0.9917, "step": 30470 }, { "epoch": 1.1732435033686237, "grad_norm": 1.0634485483169556, "learning_rate": 7.320356330377809e-05, "loss": 0.8297, "step": 30475 }, { "epoch": 1.1734359961501444, "grad_norm": 1.195955753326416, "learning_rate": 7.317443082149488e-05, "loss": 1.7867, "step": 30480 }, { "epoch": 1.173628488931665, "grad_norm": 0.9291029572486877, "learning_rate": 7.314530079223225e-05, "loss": 0.7805, "step": 30485 }, { "epoch": 1.1738209817131857, "grad_norm": 1.8289291858673096, "learning_rate": 7.311617321865396e-05, "loss": 0.8179, "step": 30490 }, { "epoch": 1.1740134744947064, "grad_norm": 2.2788593769073486, "learning_rate": 7.308704810342357e-05, "loss": 1.0026, "step": 30495 }, { "epoch": 1.174205967276227, "grad_norm": 0.9969417452812195, "learning_rate": 7.305792544920433e-05, "loss": 0.9294, "step": 30500 }, { "epoch": 1.174398460057748, "grad_norm": 1.9698654413223267, "learning_rate": 7.302880525865932e-05, "loss": 1.031, "step": 30505 }, { "epoch": 1.1745909528392686, "grad_norm": 2.6228976249694824, "learning_rate": 7.299968753445142e-05, "loss": 0.8685, "step": 30510 }, { "epoch": 1.1747834456207893, "grad_norm": 1.7787238359451294, "learning_rate": 7.297057227924324e-05, "loss": 0.7516, "step": 30515 }, { "epoch": 1.17497593840231, "grad_norm": 1.7885974645614624, "learning_rate": 7.294145949569713e-05, "loss": 0.9863, "step": 30520 }, { "epoch": 1.1751684311838306, "grad_norm": 0.9006327986717224, "learning_rate": 7.291234918647534e-05, "loss": 0.8484, "step": 30525 }, { "epoch": 1.1753609239653513, "grad_norm": 1.1137930154800415, "learning_rate": 7.288324135423979e-05, "loss": 0.8275, "step": 30530 }, { "epoch": 1.175553416746872, "grad_norm": 0.8534238338470459, "learning_rate": 7.285413600165214e-05, "loss": 0.8755, "step": 30535 }, { "epoch": 1.1757459095283926, "grad_norm": 1.3716295957565308, "learning_rate": 7.282503313137397e-05, "loss": 0.7926, "step": 30540 }, { "epoch": 1.1759384023099133, "grad_norm": 2.3967831134796143, "learning_rate": 7.27959327460665e-05, "loss": 0.7856, "step": 30545 }, { "epoch": 1.176130895091434, "grad_norm": 0.8649664521217346, "learning_rate": 7.276683484839074e-05, "loss": 0.6645, "step": 30550 }, { "epoch": 1.1763233878729547, "grad_norm": 1.004795789718628, "learning_rate": 7.273773944100755e-05, "loss": 0.9662, "step": 30555 }, { "epoch": 1.1765158806544755, "grad_norm": 1.4656823873519897, "learning_rate": 7.27086465265775e-05, "loss": 1.0357, "step": 30560 }, { "epoch": 1.1767083734359962, "grad_norm": 1.2406914234161377, "learning_rate": 7.267955610776089e-05, "loss": 0.7643, "step": 30565 }, { "epoch": 1.1769008662175169, "grad_norm": 0.761400043964386, "learning_rate": 7.265046818721795e-05, "loss": 0.7157, "step": 30570 }, { "epoch": 1.1770933589990376, "grad_norm": 1.4222087860107422, "learning_rate": 7.26213827676085e-05, "loss": 0.7819, "step": 30575 }, { "epoch": 1.1772858517805582, "grad_norm": 1.604976773262024, "learning_rate": 7.259229985159223e-05, "loss": 0.8072, "step": 30580 }, { "epoch": 1.177478344562079, "grad_norm": 1.5902528762817383, "learning_rate": 7.256321944182856e-05, "loss": 0.8419, "step": 30585 }, { "epoch": 1.1776708373435996, "grad_norm": 1.3055870532989502, "learning_rate": 7.253414154097675e-05, "loss": 0.8395, "step": 30590 }, { "epoch": 1.1778633301251202, "grad_norm": 1.5313003063201904, "learning_rate": 7.250506615169573e-05, "loss": 0.853, "step": 30595 }, { "epoch": 1.1780558229066411, "grad_norm": 2.564984083175659, "learning_rate": 7.24759932766443e-05, "loss": 0.8832, "step": 30600 }, { "epoch": 1.1782483156881618, "grad_norm": 1.0367460250854492, "learning_rate": 7.244692291848091e-05, "loss": 0.7639, "step": 30605 }, { "epoch": 1.1784408084696825, "grad_norm": 1.0948158502578735, "learning_rate": 7.241785507986392e-05, "loss": 0.8525, "step": 30610 }, { "epoch": 1.1786333012512031, "grad_norm": 1.3061574697494507, "learning_rate": 7.23887897634514e-05, "loss": 0.7501, "step": 30615 }, { "epoch": 1.1788257940327238, "grad_norm": 2.5141634941101074, "learning_rate": 7.235972697190112e-05, "loss": 0.8892, "step": 30620 }, { "epoch": 1.1790182868142445, "grad_norm": 1.4427481889724731, "learning_rate": 7.233066670787068e-05, "loss": 0.8994, "step": 30625 }, { "epoch": 1.1792107795957651, "grad_norm": 0.6231327652931213, "learning_rate": 7.230160897401752e-05, "loss": 0.711, "step": 30630 }, { "epoch": 1.1794032723772858, "grad_norm": 1.6313964128494263, "learning_rate": 7.227255377299873e-05, "loss": 0.9548, "step": 30635 }, { "epoch": 1.1795957651588065, "grad_norm": 1.321081280708313, "learning_rate": 7.224350110747118e-05, "loss": 0.7634, "step": 30640 }, { "epoch": 1.1797882579403272, "grad_norm": 1.1919338703155518, "learning_rate": 7.221445098009163e-05, "loss": 0.8379, "step": 30645 }, { "epoch": 1.1799807507218478, "grad_norm": 1.954206109046936, "learning_rate": 7.218540339351643e-05, "loss": 0.6995, "step": 30650 }, { "epoch": 1.1801732435033687, "grad_norm": 1.4222872257232666, "learning_rate": 7.215635835040187e-05, "loss": 0.7785, "step": 30655 }, { "epoch": 1.1803657362848894, "grad_norm": 1.8065252304077148, "learning_rate": 7.21273158534039e-05, "loss": 0.8699, "step": 30660 }, { "epoch": 1.18055822906641, "grad_norm": 0.9798047542572021, "learning_rate": 7.209827590517822e-05, "loss": 0.9183, "step": 30665 }, { "epoch": 1.1807507218479307, "grad_norm": 1.1454569101333618, "learning_rate": 7.206923850838041e-05, "loss": 0.8424, "step": 30670 }, { "epoch": 1.1809432146294514, "grad_norm": 0.8481330871582031, "learning_rate": 7.204020366566571e-05, "loss": 0.8378, "step": 30675 }, { "epoch": 1.181135707410972, "grad_norm": 1.2855151891708374, "learning_rate": 7.201117137968915e-05, "loss": 0.8317, "step": 30680 }, { "epoch": 1.1813282001924927, "grad_norm": 2.0627200603485107, "learning_rate": 7.198214165310555e-05, "loss": 0.7981, "step": 30685 }, { "epoch": 1.1815206929740134, "grad_norm": 0.8609490394592285, "learning_rate": 7.195311448856952e-05, "loss": 0.7964, "step": 30690 }, { "epoch": 1.181713185755534, "grad_norm": 1.0228421688079834, "learning_rate": 7.192408988873537e-05, "loss": 0.914, "step": 30695 }, { "epoch": 1.181905678537055, "grad_norm": 1.3062362670898438, "learning_rate": 7.189506785625722e-05, "loss": 0.8929, "step": 30700 }, { "epoch": 1.1820981713185756, "grad_norm": 0.9503874182701111, "learning_rate": 7.186604839378891e-05, "loss": 0.8451, "step": 30705 }, { "epoch": 1.1822906641000963, "grad_norm": 1.9427149295806885, "learning_rate": 7.183703150398414e-05, "loss": 1.0375, "step": 30710 }, { "epoch": 1.182483156881617, "grad_norm": 1.4807151556015015, "learning_rate": 7.180801718949626e-05, "loss": 0.9003, "step": 30715 }, { "epoch": 1.1826756496631377, "grad_norm": 1.048420786857605, "learning_rate": 7.177900545297846e-05, "loss": 0.775, "step": 30720 }, { "epoch": 1.1828681424446583, "grad_norm": 1.6969749927520752, "learning_rate": 7.174999629708363e-05, "loss": 0.9417, "step": 30725 }, { "epoch": 1.183060635226179, "grad_norm": 1.2834903001785278, "learning_rate": 7.172098972446453e-05, "loss": 0.9584, "step": 30730 }, { "epoch": 1.1832531280076997, "grad_norm": 2.063692331314087, "learning_rate": 7.169198573777361e-05, "loss": 0.8202, "step": 30735 }, { "epoch": 1.1834456207892203, "grad_norm": 0.9396731853485107, "learning_rate": 7.166298433966301e-05, "loss": 0.9421, "step": 30740 }, { "epoch": 1.183638113570741, "grad_norm": 1.4655632972717285, "learning_rate": 7.163398553278483e-05, "loss": 0.8273, "step": 30745 }, { "epoch": 1.1838306063522617, "grad_norm": 2.0412590503692627, "learning_rate": 7.160498931979076e-05, "loss": 0.8433, "step": 30750 }, { "epoch": 1.1840230991337826, "grad_norm": 1.9432737827301025, "learning_rate": 7.157599570333226e-05, "loss": 0.9141, "step": 30755 }, { "epoch": 1.1842155919153032, "grad_norm": 0.863294243812561, "learning_rate": 7.154700468606073e-05, "loss": 0.9083, "step": 30760 }, { "epoch": 1.184408084696824, "grad_norm": 1.1578996181488037, "learning_rate": 7.151801627062713e-05, "loss": 0.8211, "step": 30765 }, { "epoch": 1.1846005774783446, "grad_norm": 1.4375747442245483, "learning_rate": 7.148903045968221e-05, "loss": 0.7579, "step": 30770 }, { "epoch": 1.1847930702598652, "grad_norm": 1.566175937652588, "learning_rate": 7.146004725587664e-05, "loss": 0.7958, "step": 30775 }, { "epoch": 1.184985563041386, "grad_norm": 2.4917242527008057, "learning_rate": 7.143106666186068e-05, "loss": 0.9255, "step": 30780 }, { "epoch": 1.1851780558229066, "grad_norm": 1.2194336652755737, "learning_rate": 7.14020886802844e-05, "loss": 0.715, "step": 30785 }, { "epoch": 1.1853705486044273, "grad_norm": 0.9946492314338684, "learning_rate": 7.137311331379769e-05, "loss": 0.8673, "step": 30790 }, { "epoch": 1.1855630413859481, "grad_norm": 1.6159164905548096, "learning_rate": 7.134414056505015e-05, "loss": 0.7092, "step": 30795 }, { "epoch": 1.1857555341674688, "grad_norm": 1.3039904832839966, "learning_rate": 7.131517043669108e-05, "loss": 0.9336, "step": 30800 }, { "epoch": 1.1859480269489895, "grad_norm": 0.7158260345458984, "learning_rate": 7.12862029313697e-05, "loss": 0.9515, "step": 30805 }, { "epoch": 1.1861405197305102, "grad_norm": 1.4139379262924194, "learning_rate": 7.12572380517348e-05, "loss": 0.8344, "step": 30810 }, { "epoch": 1.1863330125120308, "grad_norm": 1.963030219078064, "learning_rate": 7.122827580043509e-05, "loss": 0.8987, "step": 30815 }, { "epoch": 1.1865255052935515, "grad_norm": 1.060595989227295, "learning_rate": 7.1199316180119e-05, "loss": 0.7739, "step": 30820 }, { "epoch": 1.1867179980750722, "grad_norm": 1.1344969272613525, "learning_rate": 7.117035919343464e-05, "loss": 0.9343, "step": 30825 }, { "epoch": 1.1869104908565928, "grad_norm": 1.3690916299819946, "learning_rate": 7.114140484302992e-05, "loss": 0.6558, "step": 30830 }, { "epoch": 1.1871029836381135, "grad_norm": 1.053550124168396, "learning_rate": 7.11124531315526e-05, "loss": 0.7993, "step": 30835 }, { "epoch": 1.1872954764196342, "grad_norm": 1.509711503982544, "learning_rate": 7.108350406165007e-05, "loss": 1.0615, "step": 30840 }, { "epoch": 1.1874879692011548, "grad_norm": 1.440314769744873, "learning_rate": 7.10545576359695e-05, "loss": 0.7368, "step": 30845 }, { "epoch": 1.1876804619826757, "grad_norm": 1.1829510927200317, "learning_rate": 7.102561385715794e-05, "loss": 0.7871, "step": 30850 }, { "epoch": 1.1878729547641964, "grad_norm": 1.2442659139633179, "learning_rate": 7.099667272786205e-05, "loss": 0.7765, "step": 30855 }, { "epoch": 1.188065447545717, "grad_norm": 1.2763118743896484, "learning_rate": 7.096773425072827e-05, "loss": 0.9741, "step": 30860 }, { "epoch": 1.1882579403272377, "grad_norm": 1.0801767110824585, "learning_rate": 7.093879842840289e-05, "loss": 0.7907, "step": 30865 }, { "epoch": 1.1884504331087584, "grad_norm": 1.863099217414856, "learning_rate": 7.090986526353192e-05, "loss": 0.7885, "step": 30870 }, { "epoch": 1.188642925890279, "grad_norm": 2.064089298248291, "learning_rate": 7.088093475876098e-05, "loss": 0.9183, "step": 30875 }, { "epoch": 1.1888354186717998, "grad_norm": 1.4984971284866333, "learning_rate": 7.085200691673573e-05, "loss": 0.9088, "step": 30880 }, { "epoch": 1.1890279114533204, "grad_norm": 1.2373710870742798, "learning_rate": 7.082308174010138e-05, "loss": 0.8277, "step": 30885 }, { "epoch": 1.189220404234841, "grad_norm": 0.864380955696106, "learning_rate": 7.079415923150285e-05, "loss": 0.7536, "step": 30890 }, { "epoch": 1.189412897016362, "grad_norm": 1.1806195974349976, "learning_rate": 7.076523939358504e-05, "loss": 0.7892, "step": 30895 }, { "epoch": 1.1896053897978827, "grad_norm": 1.486262559890747, "learning_rate": 7.07363222289924e-05, "loss": 0.7652, "step": 30900 }, { "epoch": 1.1897978825794033, "grad_norm": 1.093450665473938, "learning_rate": 7.070740774036926e-05, "loss": 0.7741, "step": 30905 }, { "epoch": 1.189990375360924, "grad_norm": 0.9283474683761597, "learning_rate": 7.067849593035962e-05, "loss": 0.7752, "step": 30910 }, { "epoch": 1.1901828681424447, "grad_norm": 1.1739362478256226, "learning_rate": 7.064958680160729e-05, "loss": 0.762, "step": 30915 }, { "epoch": 1.1903753609239653, "grad_norm": 1.4015986919403076, "learning_rate": 7.062068035675584e-05, "loss": 0.866, "step": 30920 }, { "epoch": 1.190567853705486, "grad_norm": 1.121469259262085, "learning_rate": 7.059177659844853e-05, "loss": 0.7242, "step": 30925 }, { "epoch": 1.1907603464870067, "grad_norm": 1.6382770538330078, "learning_rate": 7.056287552932842e-05, "loss": 0.8558, "step": 30930 }, { "epoch": 1.1909528392685274, "grad_norm": 1.2235866785049438, "learning_rate": 7.053397715203837e-05, "loss": 1.0149, "step": 30935 }, { "epoch": 1.191145332050048, "grad_norm": 1.0109368562698364, "learning_rate": 7.050508146922093e-05, "loss": 0.8432, "step": 30940 }, { "epoch": 1.1913378248315687, "grad_norm": 1.2103551626205444, "learning_rate": 7.047618848351835e-05, "loss": 0.8176, "step": 30945 }, { "epoch": 1.1915303176130896, "grad_norm": 1.0425639152526855, "learning_rate": 7.044729819757279e-05, "loss": 0.7266, "step": 30950 }, { "epoch": 1.1917228103946103, "grad_norm": 1.1277648210525513, "learning_rate": 7.041841061402606e-05, "loss": 0.743, "step": 30955 }, { "epoch": 1.191915303176131, "grad_norm": 1.4034432172775269, "learning_rate": 7.038952573551967e-05, "loss": 0.9021, "step": 30960 }, { "epoch": 1.1921077959576516, "grad_norm": 0.9935101270675659, "learning_rate": 7.036064356469504e-05, "loss": 0.7794, "step": 30965 }, { "epoch": 1.1923002887391723, "grad_norm": 1.6952626705169678, "learning_rate": 7.033176410419322e-05, "loss": 0.8085, "step": 30970 }, { "epoch": 1.192492781520693, "grad_norm": 1.1734607219696045, "learning_rate": 7.030288735665498e-05, "loss": 0.742, "step": 30975 }, { "epoch": 1.1926852743022136, "grad_norm": 1.4427086114883423, "learning_rate": 7.027401332472102e-05, "loss": 0.9636, "step": 30980 }, { "epoch": 1.1928777670837343, "grad_norm": 1.4568814039230347, "learning_rate": 7.024514201103163e-05, "loss": 0.857, "step": 30985 }, { "epoch": 1.1930702598652552, "grad_norm": 0.9864609241485596, "learning_rate": 7.021627341822684e-05, "loss": 0.7251, "step": 30990 }, { "epoch": 1.1932627526467758, "grad_norm": 1.3467034101486206, "learning_rate": 7.018740754894659e-05, "loss": 0.8612, "step": 30995 }, { "epoch": 1.1934552454282965, "grad_norm": 0.9315931797027588, "learning_rate": 7.015854440583044e-05, "loss": 0.7092, "step": 31000 }, { "epoch": 1.1936477382098172, "grad_norm": 1.0659290552139282, "learning_rate": 7.012968399151769e-05, "loss": 0.8342, "step": 31005 }, { "epoch": 1.1938402309913378, "grad_norm": 1.1278982162475586, "learning_rate": 7.010082630864748e-05, "loss": 0.8266, "step": 31010 }, { "epoch": 1.1940327237728585, "grad_norm": 1.739660382270813, "learning_rate": 7.007197135985865e-05, "loss": 0.8522, "step": 31015 }, { "epoch": 1.1942252165543792, "grad_norm": 1.020524263381958, "learning_rate": 7.004311914778977e-05, "loss": 0.7267, "step": 31020 }, { "epoch": 1.1944177093358999, "grad_norm": 1.6146787405014038, "learning_rate": 7.001426967507921e-05, "loss": 0.8447, "step": 31025 }, { "epoch": 1.1946102021174205, "grad_norm": 2.2342722415924072, "learning_rate": 6.998542294436504e-05, "loss": 0.9637, "step": 31030 }, { "epoch": 1.1948026948989412, "grad_norm": 1.4456665515899658, "learning_rate": 6.995657895828511e-05, "loss": 0.8051, "step": 31035 }, { "epoch": 1.1949951876804619, "grad_norm": 3.0097768306732178, "learning_rate": 6.992773771947703e-05, "loss": 0.9212, "step": 31040 }, { "epoch": 1.1951876804619828, "grad_norm": 2.814483165740967, "learning_rate": 6.989889923057813e-05, "loss": 0.7442, "step": 31045 }, { "epoch": 1.1953801732435034, "grad_norm": 1.2800745964050293, "learning_rate": 6.987006349422546e-05, "loss": 0.8654, "step": 31050 }, { "epoch": 1.195572666025024, "grad_norm": 1.355048418045044, "learning_rate": 6.98412305130559e-05, "loss": 0.7145, "step": 31055 }, { "epoch": 1.1957651588065448, "grad_norm": 1.7989205121994019, "learning_rate": 6.981240028970607e-05, "loss": 1.0386, "step": 31060 }, { "epoch": 1.1959576515880654, "grad_norm": 2.1455464363098145, "learning_rate": 6.97835728268122e-05, "loss": 1.0023, "step": 31065 }, { "epoch": 1.196150144369586, "grad_norm": 1.548008918762207, "learning_rate": 6.975474812701047e-05, "loss": 0.834, "step": 31070 }, { "epoch": 1.1963426371511068, "grad_norm": 1.2741403579711914, "learning_rate": 6.972592619293665e-05, "loss": 0.6641, "step": 31075 }, { "epoch": 1.1965351299326275, "grad_norm": 1.5108489990234375, "learning_rate": 6.969710702722632e-05, "loss": 0.8214, "step": 31080 }, { "epoch": 1.1967276227141483, "grad_norm": 1.490020513534546, "learning_rate": 6.966829063251484e-05, "loss": 0.8304, "step": 31085 }, { "epoch": 1.196920115495669, "grad_norm": 0.9961896538734436, "learning_rate": 6.963947701143724e-05, "loss": 0.804, "step": 31090 }, { "epoch": 1.1971126082771897, "grad_norm": 1.3393563032150269, "learning_rate": 6.961066616662834e-05, "loss": 0.882, "step": 31095 }, { "epoch": 1.1973051010587104, "grad_norm": 0.9488278031349182, "learning_rate": 6.958185810072273e-05, "loss": 0.8017, "step": 31100 }, { "epoch": 1.197497593840231, "grad_norm": 1.5525720119476318, "learning_rate": 6.955305281635469e-05, "loss": 0.8404, "step": 31105 }, { "epoch": 1.1976900866217517, "grad_norm": 1.5819134712219238, "learning_rate": 6.952425031615823e-05, "loss": 0.8679, "step": 31110 }, { "epoch": 1.1978825794032724, "grad_norm": 1.090757966041565, "learning_rate": 6.949545060276726e-05, "loss": 0.6385, "step": 31115 }, { "epoch": 1.198075072184793, "grad_norm": 1.378661870956421, "learning_rate": 6.946665367881523e-05, "loss": 0.8704, "step": 31120 }, { "epoch": 1.1982675649663137, "grad_norm": 1.3583415746688843, "learning_rate": 6.943785954693547e-05, "loss": 0.992, "step": 31125 }, { "epoch": 1.1984600577478344, "grad_norm": 1.195250153541565, "learning_rate": 6.940906820976097e-05, "loss": 0.8763, "step": 31130 }, { "epoch": 1.198652550529355, "grad_norm": 1.0746605396270752, "learning_rate": 6.938027966992458e-05, "loss": 0.7867, "step": 31135 }, { "epoch": 1.198845043310876, "grad_norm": 1.0139212608337402, "learning_rate": 6.935149393005873e-05, "loss": 0.8423, "step": 31140 }, { "epoch": 1.1990375360923966, "grad_norm": 1.3249413967132568, "learning_rate": 6.932271099279576e-05, "loss": 0.8433, "step": 31145 }, { "epoch": 1.1992300288739173, "grad_norm": 0.8639804124832153, "learning_rate": 6.929393086076765e-05, "loss": 0.9309, "step": 31150 }, { "epoch": 1.199422521655438, "grad_norm": 1.7848960161209106, "learning_rate": 6.926515353660614e-05, "loss": 0.9963, "step": 31155 }, { "epoch": 1.1996150144369586, "grad_norm": 1.1610658168792725, "learning_rate": 6.923637902294275e-05, "loss": 0.8316, "step": 31160 }, { "epoch": 1.1998075072184793, "grad_norm": 1.335810899734497, "learning_rate": 6.920760732240868e-05, "loss": 0.9464, "step": 31165 }, { "epoch": 1.2, "grad_norm": 1.3204543590545654, "learning_rate": 6.917883843763497e-05, "loss": 0.7098, "step": 31170 }, { "epoch": 1.2001924927815206, "grad_norm": 1.3988627195358276, "learning_rate": 6.915007237125233e-05, "loss": 0.8002, "step": 31175 }, { "epoch": 1.2003849855630413, "grad_norm": 1.4686270952224731, "learning_rate": 6.912130912589116e-05, "loss": 0.7664, "step": 31180 }, { "epoch": 1.2005774783445622, "grad_norm": 1.4970102310180664, "learning_rate": 6.909254870418176e-05, "loss": 0.8932, "step": 31185 }, { "epoch": 1.2007699711260829, "grad_norm": 1.4880247116088867, "learning_rate": 6.906379110875403e-05, "loss": 0.8988, "step": 31190 }, { "epoch": 1.2009624639076035, "grad_norm": 1.4801645278930664, "learning_rate": 6.903503634223764e-05, "loss": 0.9222, "step": 31195 }, { "epoch": 1.2011549566891242, "grad_norm": 1.630956768989563, "learning_rate": 6.900628440726209e-05, "loss": 0.8345, "step": 31200 }, { "epoch": 1.2013474494706449, "grad_norm": 1.5143691301345825, "learning_rate": 6.897753530645652e-05, "loss": 0.7211, "step": 31205 }, { "epoch": 1.2015399422521655, "grad_norm": 0.8206589818000793, "learning_rate": 6.894878904244979e-05, "loss": 0.8318, "step": 31210 }, { "epoch": 1.2017324350336862, "grad_norm": 1.4505168199539185, "learning_rate": 6.892004561787064e-05, "loss": 0.8583, "step": 31215 }, { "epoch": 1.2019249278152069, "grad_norm": 1.4092106819152832, "learning_rate": 6.889130503534745e-05, "loss": 0.9484, "step": 31220 }, { "epoch": 1.2021174205967275, "grad_norm": 1.2476885318756104, "learning_rate": 6.886256729750832e-05, "loss": 0.7712, "step": 31225 }, { "epoch": 1.2023099133782482, "grad_norm": 0.6989136338233948, "learning_rate": 6.883383240698114e-05, "loss": 0.7654, "step": 31230 }, { "epoch": 1.2025024061597689, "grad_norm": 1.4014019966125488, "learning_rate": 6.880510036639354e-05, "loss": 0.9665, "step": 31235 }, { "epoch": 1.2026948989412898, "grad_norm": 2.1451468467712402, "learning_rate": 6.877637117837286e-05, "loss": 0.9765, "step": 31240 }, { "epoch": 1.2028873917228105, "grad_norm": 1.5151429176330566, "learning_rate": 6.87476448455462e-05, "loss": 0.8647, "step": 31245 }, { "epoch": 1.2030798845043311, "grad_norm": 0.9325594902038574, "learning_rate": 6.871892137054038e-05, "loss": 0.8579, "step": 31250 }, { "epoch": 1.2032723772858518, "grad_norm": 1.5600671768188477, "learning_rate": 6.869020075598198e-05, "loss": 0.7753, "step": 31255 }, { "epoch": 1.2034648700673725, "grad_norm": 1.1633321046829224, "learning_rate": 6.866148300449733e-05, "loss": 0.8108, "step": 31260 }, { "epoch": 1.2036573628488931, "grad_norm": 1.3864375352859497, "learning_rate": 6.863276811871246e-05, "loss": 0.8359, "step": 31265 }, { "epoch": 1.2038498556304138, "grad_norm": 1.111289381980896, "learning_rate": 6.860405610125313e-05, "loss": 0.8911, "step": 31270 }, { "epoch": 1.2040423484119345, "grad_norm": 1.1889983415603638, "learning_rate": 6.857534695474492e-05, "loss": 0.9569, "step": 31275 }, { "epoch": 1.2042348411934554, "grad_norm": 1.9969587326049805, "learning_rate": 6.854664068181307e-05, "loss": 0.9898, "step": 31280 }, { "epoch": 1.204427333974976, "grad_norm": 1.4256417751312256, "learning_rate": 6.851793728508251e-05, "loss": 0.8671, "step": 31285 }, { "epoch": 1.2046198267564967, "grad_norm": 1.4675942659378052, "learning_rate": 6.84892367671781e-05, "loss": 0.8998, "step": 31290 }, { "epoch": 1.2048123195380174, "grad_norm": 1.571075201034546, "learning_rate": 6.846053913072423e-05, "loss": 0.8697, "step": 31295 }, { "epoch": 1.205004812319538, "grad_norm": 1.4728553295135498, "learning_rate": 6.84318443783451e-05, "loss": 0.874, "step": 31300 }, { "epoch": 1.2051973051010587, "grad_norm": 1.1161712408065796, "learning_rate": 6.840315251266472e-05, "loss": 0.7356, "step": 31305 }, { "epoch": 1.2053897978825794, "grad_norm": 1.4653940200805664, "learning_rate": 6.837446353630672e-05, "loss": 0.8024, "step": 31310 }, { "epoch": 1.2055822906641, "grad_norm": 1.2116683721542358, "learning_rate": 6.83457774518945e-05, "loss": 0.8066, "step": 31315 }, { "epoch": 1.2057747834456207, "grad_norm": 1.4532692432403564, "learning_rate": 6.831709426205128e-05, "loss": 0.8092, "step": 31320 }, { "epoch": 1.2059672762271414, "grad_norm": 1.5983264446258545, "learning_rate": 6.82884139693999e-05, "loss": 1.2661, "step": 31325 }, { "epoch": 1.206159769008662, "grad_norm": 1.1187528371810913, "learning_rate": 6.825973657656297e-05, "loss": 0.8836, "step": 31330 }, { "epoch": 1.206352261790183, "grad_norm": 1.9153698682785034, "learning_rate": 6.82310620861629e-05, "loss": 0.8676, "step": 31335 }, { "epoch": 1.2065447545717036, "grad_norm": 1.0382484197616577, "learning_rate": 6.820239050082175e-05, "loss": 0.7845, "step": 31340 }, { "epoch": 1.2067372473532243, "grad_norm": 1.228818416595459, "learning_rate": 6.817372182316135e-05, "loss": 0.9074, "step": 31345 }, { "epoch": 1.206929740134745, "grad_norm": 1.1787536144256592, "learning_rate": 6.814505605580323e-05, "loss": 0.8794, "step": 31350 }, { "epoch": 1.2071222329162656, "grad_norm": 1.3236230611801147, "learning_rate": 6.811639320136876e-05, "loss": 0.7517, "step": 31355 }, { "epoch": 1.2073147256977863, "grad_norm": 1.2837519645690918, "learning_rate": 6.808773326247888e-05, "loss": 1.0796, "step": 31360 }, { "epoch": 1.207507218479307, "grad_norm": 1.7796859741210938, "learning_rate": 6.805907624175443e-05, "loss": 0.8648, "step": 31365 }, { "epoch": 1.2076997112608276, "grad_norm": 1.2428163290023804, "learning_rate": 6.803042214181586e-05, "loss": 0.9098, "step": 31370 }, { "epoch": 1.2078922040423483, "grad_norm": 0.7685776352882385, "learning_rate": 6.800177096528337e-05, "loss": 0.935, "step": 31375 }, { "epoch": 1.2080846968238692, "grad_norm": 1.5274989604949951, "learning_rate": 6.797312271477699e-05, "loss": 0.8187, "step": 31380 }, { "epoch": 1.2082771896053899, "grad_norm": 1.497298240661621, "learning_rate": 6.794447739291639e-05, "loss": 0.7576, "step": 31385 }, { "epoch": 1.2084696823869105, "grad_norm": 1.8094556331634521, "learning_rate": 6.791583500232092e-05, "loss": 0.8676, "step": 31390 }, { "epoch": 1.2086621751684312, "grad_norm": 1.844341516494751, "learning_rate": 6.788719554560987e-05, "loss": 0.8661, "step": 31395 }, { "epoch": 1.2088546679499519, "grad_norm": 2.015148639678955, "learning_rate": 6.785855902540205e-05, "loss": 0.8985, "step": 31400 }, { "epoch": 1.2090471607314726, "grad_norm": 1.5119047164916992, "learning_rate": 6.782992544431603e-05, "loss": 0.8247, "step": 31405 }, { "epoch": 1.2092396535129932, "grad_norm": 1.3965736627578735, "learning_rate": 6.780129480497028e-05, "loss": 0.7286, "step": 31410 }, { "epoch": 1.209432146294514, "grad_norm": 1.238343358039856, "learning_rate": 6.777266710998283e-05, "loss": 0.9662, "step": 31415 }, { "epoch": 1.2096246390760346, "grad_norm": 0.8886631727218628, "learning_rate": 6.774404236197144e-05, "loss": 0.7578, "step": 31420 }, { "epoch": 1.2098171318575552, "grad_norm": 1.1155580282211304, "learning_rate": 6.771542056355373e-05, "loss": 0.6954, "step": 31425 }, { "epoch": 1.2100096246390761, "grad_norm": 1.5056712627410889, "learning_rate": 6.768680171734692e-05, "loss": 0.9816, "step": 31430 }, { "epoch": 1.2102021174205968, "grad_norm": 1.3402817249298096, "learning_rate": 6.765818582596805e-05, "loss": 0.9098, "step": 31435 }, { "epoch": 1.2103946102021175, "grad_norm": 1.0524393320083618, "learning_rate": 6.762957289203386e-05, "loss": 0.8848, "step": 31440 }, { "epoch": 1.2105871029836381, "grad_norm": 0.9701569676399231, "learning_rate": 6.760096291816078e-05, "loss": 0.9192, "step": 31445 }, { "epoch": 1.2107795957651588, "grad_norm": 2.2335519790649414, "learning_rate": 6.757235590696503e-05, "loss": 0.8811, "step": 31450 }, { "epoch": 1.2109720885466795, "grad_norm": 1.1316584348678589, "learning_rate": 6.75437518610625e-05, "loss": 0.796, "step": 31455 }, { "epoch": 1.2111645813282002, "grad_norm": 1.6016689538955688, "learning_rate": 6.751515078306887e-05, "loss": 0.8402, "step": 31460 }, { "epoch": 1.2113570741097208, "grad_norm": 1.7846043109893799, "learning_rate": 6.748655267559951e-05, "loss": 0.8634, "step": 31465 }, { "epoch": 1.2115495668912415, "grad_norm": 1.7141379117965698, "learning_rate": 6.745795754126953e-05, "loss": 1.0208, "step": 31470 }, { "epoch": 1.2117420596727624, "grad_norm": 1.225437879562378, "learning_rate": 6.742936538269373e-05, "loss": 0.9122, "step": 31475 }, { "epoch": 1.211934552454283, "grad_norm": 1.7134424448013306, "learning_rate": 6.740077620248675e-05, "loss": 0.8984, "step": 31480 }, { "epoch": 1.2121270452358037, "grad_norm": 1.0409084558486938, "learning_rate": 6.737219000326283e-05, "loss": 0.9374, "step": 31485 }, { "epoch": 1.2123195380173244, "grad_norm": 1.527655005455017, "learning_rate": 6.734360678763593e-05, "loss": 0.9277, "step": 31490 }, { "epoch": 1.212512030798845, "grad_norm": 1.9206295013427734, "learning_rate": 6.731502655821993e-05, "loss": 0.7926, "step": 31495 }, { "epoch": 1.2127045235803657, "grad_norm": 1.035962462425232, "learning_rate": 6.728644931762824e-05, "loss": 0.8946, "step": 31500 }, { "epoch": 1.2128970163618864, "grad_norm": 1.146831750869751, "learning_rate": 6.725787506847399e-05, "loss": 0.9099, "step": 31505 }, { "epoch": 1.213089509143407, "grad_norm": 1.0527504682540894, "learning_rate": 6.723501782474154e-05, "loss": 0.7984, "step": 31510 }, { "epoch": 1.2132820019249277, "grad_norm": 1.390851378440857, "learning_rate": 6.72064489667592e-05, "loss": 0.9729, "step": 31515 }, { "epoch": 1.2134744947064484, "grad_norm": 1.2551788091659546, "learning_rate": 6.717788310752988e-05, "loss": 0.945, "step": 31520 }, { "epoch": 1.213666987487969, "grad_norm": 1.2362979650497437, "learning_rate": 6.714932024966574e-05, "loss": 1.068, "step": 31525 }, { "epoch": 1.21385948026949, "grad_norm": 1.221686601638794, "learning_rate": 6.712076039577862e-05, "loss": 0.897, "step": 31530 }, { "epoch": 1.2140519730510106, "grad_norm": 1.4022241830825806, "learning_rate": 6.709220354848018e-05, "loss": 0.8292, "step": 31535 }, { "epoch": 1.2142444658325313, "grad_norm": 1.1567379236221313, "learning_rate": 6.706364971038173e-05, "loss": 0.838, "step": 31540 }, { "epoch": 1.214436958614052, "grad_norm": 1.1204020977020264, "learning_rate": 6.70350988840943e-05, "loss": 0.9189, "step": 31545 }, { "epoch": 1.2146294513955727, "grad_norm": 1.358654260635376, "learning_rate": 6.700655107222874e-05, "loss": 0.8147, "step": 31550 }, { "epoch": 1.2148219441770933, "grad_norm": 1.1854889392852783, "learning_rate": 6.697800627739552e-05, "loss": 0.7513, "step": 31555 }, { "epoch": 1.215014436958614, "grad_norm": 1.1493016481399536, "learning_rate": 6.694946450220483e-05, "loss": 1.0312, "step": 31560 }, { "epoch": 1.2152069297401347, "grad_norm": 1.5674629211425781, "learning_rate": 6.692092574926673e-05, "loss": 0.7985, "step": 31565 }, { "epoch": 1.2153994225216556, "grad_norm": 1.1671700477600098, "learning_rate": 6.689239002119084e-05, "loss": 0.9128, "step": 31570 }, { "epoch": 1.2155919153031762, "grad_norm": 1.3727703094482422, "learning_rate": 6.686385732058651e-05, "loss": 0.8517, "step": 31575 }, { "epoch": 1.215784408084697, "grad_norm": 0.9869505763053894, "learning_rate": 6.683532765006296e-05, "loss": 0.8142, "step": 31580 }, { "epoch": 1.2159769008662176, "grad_norm": 1.681252121925354, "learning_rate": 6.680680101222901e-05, "loss": 0.7783, "step": 31585 }, { "epoch": 1.2161693936477382, "grad_norm": 1.7720850706100464, "learning_rate": 6.677827740969317e-05, "loss": 0.922, "step": 31590 }, { "epoch": 1.216361886429259, "grad_norm": 1.2633975744247437, "learning_rate": 6.674975684506384e-05, "loss": 0.8357, "step": 31595 }, { "epoch": 1.2165543792107796, "grad_norm": 1.2901790142059326, "learning_rate": 6.672123932094898e-05, "loss": 0.856, "step": 31600 }, { "epoch": 1.2167468719923002, "grad_norm": 0.9209010004997253, "learning_rate": 6.66927248399563e-05, "loss": 0.8126, "step": 31605 }, { "epoch": 1.216939364773821, "grad_norm": 1.8657188415527344, "learning_rate": 6.666421340469331e-05, "loss": 0.8026, "step": 31610 }, { "epoch": 1.2171318575553416, "grad_norm": 0.8723480701446533, "learning_rate": 6.66357050177672e-05, "loss": 0.6801, "step": 31615 }, { "epoch": 1.2173243503368623, "grad_norm": 1.3046056032180786, "learning_rate": 6.660719968178479e-05, "loss": 0.7513, "step": 31620 }, { "epoch": 1.2175168431183832, "grad_norm": 1.1734675168991089, "learning_rate": 6.657869739935282e-05, "loss": 0.7972, "step": 31625 }, { "epoch": 1.2177093358999038, "grad_norm": 1.3330581188201904, "learning_rate": 6.655019817307758e-05, "loss": 0.8381, "step": 31630 }, { "epoch": 1.2179018286814245, "grad_norm": 1.2588646411895752, "learning_rate": 6.652170200556508e-05, "loss": 0.8309, "step": 31635 }, { "epoch": 1.2180943214629452, "grad_norm": 1.5502039194107056, "learning_rate": 6.649320889942121e-05, "loss": 0.9067, "step": 31640 }, { "epoch": 1.2182868142444658, "grad_norm": 1.2267836332321167, "learning_rate": 6.646471885725139e-05, "loss": 0.9091, "step": 31645 }, { "epoch": 1.2184793070259865, "grad_norm": 0.8720155954360962, "learning_rate": 6.643623188166088e-05, "loss": 0.9406, "step": 31650 }, { "epoch": 1.2186717998075072, "grad_norm": 2.29374623298645, "learning_rate": 6.640774797525464e-05, "loss": 0.976, "step": 31655 }, { "epoch": 1.2188642925890278, "grad_norm": 1.3115676641464233, "learning_rate": 6.637926714063734e-05, "loss": 0.7881, "step": 31660 }, { "epoch": 1.2190567853705485, "grad_norm": 1.2413272857666016, "learning_rate": 6.635078938041328e-05, "loss": 0.8841, "step": 31665 }, { "epoch": 1.2192492781520694, "grad_norm": 1.0983569622039795, "learning_rate": 6.632231469718668e-05, "loss": 0.8903, "step": 31670 }, { "epoch": 1.21944177093359, "grad_norm": 1.3633029460906982, "learning_rate": 6.62938430935613e-05, "loss": 0.7965, "step": 31675 }, { "epoch": 1.2196342637151107, "grad_norm": 1.1858634948730469, "learning_rate": 6.626537457214064e-05, "loss": 0.7937, "step": 31680 }, { "epoch": 1.2198267564966314, "grad_norm": 1.2882673740386963, "learning_rate": 6.623690913552804e-05, "loss": 0.9946, "step": 31685 }, { "epoch": 1.220019249278152, "grad_norm": 0.9842979311943054, "learning_rate": 6.620844678632641e-05, "loss": 0.7972, "step": 31690 }, { "epoch": 1.2202117420596728, "grad_norm": 1.0719999074935913, "learning_rate": 6.617998752713852e-05, "loss": 0.823, "step": 31695 }, { "epoch": 1.2204042348411934, "grad_norm": 1.3692537546157837, "learning_rate": 6.615153136056674e-05, "loss": 0.7786, "step": 31700 }, { "epoch": 1.220596727622714, "grad_norm": 1.2221060991287231, "learning_rate": 6.612307828921313e-05, "loss": 0.7953, "step": 31705 }, { "epoch": 1.2207892204042348, "grad_norm": 1.3177229166030884, "learning_rate": 6.609462831567964e-05, "loss": 0.7488, "step": 31710 }, { "epoch": 1.2209817131857554, "grad_norm": 0.9634941816329956, "learning_rate": 6.60661814425678e-05, "loss": 0.7675, "step": 31715 }, { "epoch": 1.221174205967276, "grad_norm": 0.9454881548881531, "learning_rate": 6.603773767247885e-05, "loss": 0.7102, "step": 31720 }, { "epoch": 1.221366698748797, "grad_norm": 1.579321026802063, "learning_rate": 6.60092970080138e-05, "loss": 0.9614, "step": 31725 }, { "epoch": 1.2215591915303177, "grad_norm": 1.7888600826263428, "learning_rate": 6.59808594517734e-05, "loss": 0.96, "step": 31730 }, { "epoch": 1.2217516843118383, "grad_norm": 1.8311741352081299, "learning_rate": 6.595242500635804e-05, "loss": 0.9041, "step": 31735 }, { "epoch": 1.221944177093359, "grad_norm": 1.2068012952804565, "learning_rate": 6.592399367436787e-05, "loss": 0.9126, "step": 31740 }, { "epoch": 1.2221366698748797, "grad_norm": 0.8985161781311035, "learning_rate": 6.589556545840274e-05, "loss": 0.9111, "step": 31745 }, { "epoch": 1.2223291626564003, "grad_norm": 1.460936427116394, "learning_rate": 6.586714036106221e-05, "loss": 0.6821, "step": 31750 }, { "epoch": 1.222521655437921, "grad_norm": 1.1933438777923584, "learning_rate": 6.583871838494562e-05, "loss": 0.7843, "step": 31755 }, { "epoch": 1.2227141482194417, "grad_norm": 1.5719155073165894, "learning_rate": 6.581029953265192e-05, "loss": 0.8344, "step": 31760 }, { "epoch": 1.2229066410009626, "grad_norm": 1.3877325057983398, "learning_rate": 6.578188380677981e-05, "loss": 0.7456, "step": 31765 }, { "epoch": 1.2230991337824832, "grad_norm": 1.7668566703796387, "learning_rate": 6.575347120992779e-05, "loss": 1.0394, "step": 31770 }, { "epoch": 1.223291626564004, "grad_norm": 1.813501000404358, "learning_rate": 6.572506174469398e-05, "loss": 0.8027, "step": 31775 }, { "epoch": 1.2234841193455246, "grad_norm": 0.9930652976036072, "learning_rate": 6.569665541367616e-05, "loss": 0.9634, "step": 31780 }, { "epoch": 1.2236766121270453, "grad_norm": 1.6402193307876587, "learning_rate": 6.566825221947201e-05, "loss": 0.8719, "step": 31785 }, { "epoch": 1.223869104908566, "grad_norm": 2.223097562789917, "learning_rate": 6.563985216467879e-05, "loss": 0.8185, "step": 31790 }, { "epoch": 1.2240615976900866, "grad_norm": 1.260113000869751, "learning_rate": 6.56114552518934e-05, "loss": 0.835, "step": 31795 }, { "epoch": 1.2242540904716073, "grad_norm": 2.260995864868164, "learning_rate": 6.558306148371269e-05, "loss": 0.8712, "step": 31800 }, { "epoch": 1.224446583253128, "grad_norm": 1.3736824989318848, "learning_rate": 6.5554670862733e-05, "loss": 0.8764, "step": 31805 }, { "epoch": 1.2246390760346486, "grad_norm": 1.0253816843032837, "learning_rate": 6.552628339155044e-05, "loss": 0.8936, "step": 31810 }, { "epoch": 1.2248315688161693, "grad_norm": 1.0663061141967773, "learning_rate": 6.549789907276094e-05, "loss": 0.8789, "step": 31815 }, { "epoch": 1.2250240615976902, "grad_norm": 1.781774640083313, "learning_rate": 6.546951790896003e-05, "loss": 0.8116, "step": 31820 }, { "epoch": 1.2252165543792108, "grad_norm": 1.6696699857711792, "learning_rate": 6.544113990274291e-05, "loss": 0.9066, "step": 31825 }, { "epoch": 1.2254090471607315, "grad_norm": 1.6726038455963135, "learning_rate": 6.541276505670466e-05, "loss": 0.7497, "step": 31830 }, { "epoch": 1.2256015399422522, "grad_norm": 1.851981282234192, "learning_rate": 6.538439337343991e-05, "loss": 0.98, "step": 31835 }, { "epoch": 1.2257940327237729, "grad_norm": 1.3425476551055908, "learning_rate": 6.535602485554307e-05, "loss": 0.8501, "step": 31840 }, { "epoch": 1.2259865255052935, "grad_norm": 1.2194117307662964, "learning_rate": 6.532765950560827e-05, "loss": 0.8375, "step": 31845 }, { "epoch": 1.2261790182868142, "grad_norm": 1.3500804901123047, "learning_rate": 6.529929732622932e-05, "loss": 0.9349, "step": 31850 }, { "epoch": 1.2263715110683349, "grad_norm": 1.5260719060897827, "learning_rate": 6.527093831999977e-05, "loss": 0.8235, "step": 31855 }, { "epoch": 1.2265640038498558, "grad_norm": 1.9417953491210938, "learning_rate": 6.524258248951285e-05, "loss": 0.8308, "step": 31860 }, { "epoch": 1.2267564966313764, "grad_norm": 1.3070498704910278, "learning_rate": 6.521422983736151e-05, "loss": 0.8632, "step": 31865 }, { "epoch": 1.226948989412897, "grad_norm": 0.9319446086883545, "learning_rate": 6.51858803661384e-05, "loss": 0.8022, "step": 31870 }, { "epoch": 1.2271414821944178, "grad_norm": 1.5047639608383179, "learning_rate": 6.515753407843595e-05, "loss": 0.9273, "step": 31875 }, { "epoch": 1.2273339749759384, "grad_norm": 1.0930204391479492, "learning_rate": 6.512919097684617e-05, "loss": 0.8992, "step": 31880 }, { "epoch": 1.227526467757459, "grad_norm": 1.2178794145584106, "learning_rate": 6.510085106396085e-05, "loss": 0.8333, "step": 31885 }, { "epoch": 1.2277189605389798, "grad_norm": 1.62590491771698, "learning_rate": 6.507251434237155e-05, "loss": 0.7445, "step": 31890 }, { "epoch": 1.2279114533205004, "grad_norm": 3.1164746284484863, "learning_rate": 6.504418081466945e-05, "loss": 1.0109, "step": 31895 }, { "epoch": 1.2281039461020211, "grad_norm": 1.2229804992675781, "learning_rate": 6.50158504834454e-05, "loss": 0.8863, "step": 31900 }, { "epoch": 1.2282964388835418, "grad_norm": 1.2148635387420654, "learning_rate": 6.498752335129014e-05, "loss": 0.817, "step": 31905 }, { "epoch": 1.2284889316650625, "grad_norm": 0.9945757389068604, "learning_rate": 6.495919942079391e-05, "loss": 0.819, "step": 31910 }, { "epoch": 1.2286814244465833, "grad_norm": 2.1877007484436035, "learning_rate": 6.493087869454673e-05, "loss": 0.8977, "step": 31915 }, { "epoch": 1.228873917228104, "grad_norm": 2.055363178253174, "learning_rate": 6.490256117513845e-05, "loss": 0.9108, "step": 31920 }, { "epoch": 1.2290664100096247, "grad_norm": 1.7472233772277832, "learning_rate": 6.487424686515842e-05, "loss": 1.0029, "step": 31925 }, { "epoch": 1.2292589027911454, "grad_norm": 0.9974002838134766, "learning_rate": 6.48459357671958e-05, "loss": 0.8006, "step": 31930 }, { "epoch": 1.229451395572666, "grad_norm": 1.9604872465133667, "learning_rate": 6.481762788383951e-05, "loss": 0.9948, "step": 31935 }, { "epoch": 1.2296438883541867, "grad_norm": 0.9676084518432617, "learning_rate": 6.478932321767808e-05, "loss": 0.7514, "step": 31940 }, { "epoch": 1.2298363811357074, "grad_norm": 2.2556610107421875, "learning_rate": 6.476102177129978e-05, "loss": 0.8251, "step": 31945 }, { "epoch": 1.230028873917228, "grad_norm": 0.8798750042915344, "learning_rate": 6.473272354729263e-05, "loss": 0.772, "step": 31950 }, { "epoch": 1.2302213666987487, "grad_norm": 1.4595062732696533, "learning_rate": 6.470442854824425e-05, "loss": 0.9378, "step": 31955 }, { "epoch": 1.2304138594802696, "grad_norm": 0.9831540584564209, "learning_rate": 6.467613677674212e-05, "loss": 0.752, "step": 31960 }, { "epoch": 1.2306063522617903, "grad_norm": 2.3248252868652344, "learning_rate": 6.464784823537324e-05, "loss": 0.8944, "step": 31965 }, { "epoch": 1.230798845043311, "grad_norm": 1.3205488920211792, "learning_rate": 6.461956292672447e-05, "loss": 0.7635, "step": 31970 }, { "epoch": 1.2309913378248316, "grad_norm": 2.114743232727051, "learning_rate": 6.459128085338229e-05, "loss": 0.8587, "step": 31975 }, { "epoch": 1.2311838306063523, "grad_norm": 1.4038870334625244, "learning_rate": 6.456300201793292e-05, "loss": 0.8453, "step": 31980 }, { "epoch": 1.231376323387873, "grad_norm": 1.8678901195526123, "learning_rate": 6.453472642296226e-05, "loss": 0.9314, "step": 31985 }, { "epoch": 1.2315688161693936, "grad_norm": 1.9399640560150146, "learning_rate": 6.450645407105594e-05, "loss": 0.9189, "step": 31990 }, { "epoch": 1.2317613089509143, "grad_norm": 1.849877953529358, "learning_rate": 6.447818496479927e-05, "loss": 0.8623, "step": 31995 }, { "epoch": 1.231953801732435, "grad_norm": 0.9687787890434265, "learning_rate": 6.444991910677725e-05, "loss": 0.7928, "step": 32000 }, { "epoch": 1.2321462945139556, "grad_norm": 1.2318834066390991, "learning_rate": 6.442165649957467e-05, "loss": 0.9324, "step": 32005 }, { "epoch": 1.2323387872954763, "grad_norm": 1.2354921102523804, "learning_rate": 6.439339714577592e-05, "loss": 0.9398, "step": 32010 }, { "epoch": 1.2325312800769972, "grad_norm": 1.4701021909713745, "learning_rate": 6.436514104796507e-05, "loss": 0.8559, "step": 32015 }, { "epoch": 1.2327237728585179, "grad_norm": 0.9806448221206665, "learning_rate": 6.433688820872607e-05, "loss": 0.8319, "step": 32020 }, { "epoch": 1.2329162656400385, "grad_norm": 1.2079403400421143, "learning_rate": 6.430863863064238e-05, "loss": 0.8578, "step": 32025 }, { "epoch": 1.2331087584215592, "grad_norm": 1.240198016166687, "learning_rate": 6.428039231629723e-05, "loss": 0.8681, "step": 32030 }, { "epoch": 1.2333012512030799, "grad_norm": 1.0847597122192383, "learning_rate": 6.425214926827361e-05, "loss": 0.8595, "step": 32035 }, { "epoch": 1.2334937439846005, "grad_norm": 1.1608355045318604, "learning_rate": 6.422390948915414e-05, "loss": 0.7778, "step": 32040 }, { "epoch": 1.2336862367661212, "grad_norm": 0.8766851425170898, "learning_rate": 6.419567298152111e-05, "loss": 0.8284, "step": 32045 }, { "epoch": 1.2338787295476419, "grad_norm": 1.4966709613800049, "learning_rate": 6.416743974795665e-05, "loss": 0.7442, "step": 32050 }, { "epoch": 1.2340712223291628, "grad_norm": 1.342774748802185, "learning_rate": 6.413920979104244e-05, "loss": 0.875, "step": 32055 }, { "epoch": 1.2342637151106834, "grad_norm": 1.4193564653396606, "learning_rate": 6.411098311335993e-05, "loss": 0.7753, "step": 32060 }, { "epoch": 1.2344562078922041, "grad_norm": 1.7735865116119385, "learning_rate": 6.408275971749027e-05, "loss": 0.7883, "step": 32065 }, { "epoch": 1.2346487006737248, "grad_norm": 1.5808089971542358, "learning_rate": 6.405453960601432e-05, "loss": 0.7633, "step": 32070 }, { "epoch": 1.2348411934552455, "grad_norm": 1.0840727090835571, "learning_rate": 6.402632278151259e-05, "loss": 0.8083, "step": 32075 }, { "epoch": 1.2350336862367661, "grad_norm": 1.475110650062561, "learning_rate": 6.399810924656537e-05, "loss": 0.6657, "step": 32080 }, { "epoch": 1.2352261790182868, "grad_norm": 0.9028719663619995, "learning_rate": 6.396989900375256e-05, "loss": 0.9991, "step": 32085 }, { "epoch": 1.2354186717998075, "grad_norm": 1.3939464092254639, "learning_rate": 6.394169205565377e-05, "loss": 0.8523, "step": 32090 }, { "epoch": 1.2356111645813281, "grad_norm": 1.7557820081710815, "learning_rate": 6.391348840484841e-05, "loss": 0.7734, "step": 32095 }, { "epoch": 1.2358036573628488, "grad_norm": 1.2897764444351196, "learning_rate": 6.388528805391548e-05, "loss": 0.7278, "step": 32100 }, { "epoch": 1.2359961501443695, "grad_norm": 1.3506861925125122, "learning_rate": 6.38570910054337e-05, "loss": 0.795, "step": 32105 }, { "epoch": 1.2361886429258904, "grad_norm": 2.072033405303955, "learning_rate": 6.382889726198154e-05, "loss": 0.8527, "step": 32110 }, { "epoch": 1.236381135707411, "grad_norm": 1.8020820617675781, "learning_rate": 6.380070682613711e-05, "loss": 0.9461, "step": 32115 }, { "epoch": 1.2365736284889317, "grad_norm": 2.3181891441345215, "learning_rate": 6.377251970047822e-05, "loss": 0.8269, "step": 32120 }, { "epoch": 1.2367661212704524, "grad_norm": 1.5341979265213013, "learning_rate": 6.374433588758246e-05, "loss": 0.9718, "step": 32125 }, { "epoch": 1.236958614051973, "grad_norm": 1.0951958894729614, "learning_rate": 6.3716155390027e-05, "loss": 0.8376, "step": 32130 }, { "epoch": 1.2371511068334937, "grad_norm": 1.2054345607757568, "learning_rate": 6.368797821038874e-05, "loss": 0.7169, "step": 32135 }, { "epoch": 1.2373435996150144, "grad_norm": 1.4558501243591309, "learning_rate": 6.365980435124435e-05, "loss": 0.8068, "step": 32140 }, { "epoch": 1.237536092396535, "grad_norm": 1.592523217201233, "learning_rate": 6.363163381517015e-05, "loss": 0.8709, "step": 32145 }, { "epoch": 1.2377285851780557, "grad_norm": 1.191009759902954, "learning_rate": 6.360346660474206e-05, "loss": 0.9404, "step": 32150 }, { "epoch": 1.2379210779595766, "grad_norm": 1.2547787427902222, "learning_rate": 6.357530272253587e-05, "loss": 0.7077, "step": 32155 }, { "epoch": 1.2381135707410973, "grad_norm": 1.442322850227356, "learning_rate": 6.354714217112698e-05, "loss": 0.8027, "step": 32160 }, { "epoch": 1.238306063522618, "grad_norm": 1.3657112121582031, "learning_rate": 6.351898495309041e-05, "loss": 0.8016, "step": 32165 }, { "epoch": 1.2384985563041386, "grad_norm": 1.7618088722229004, "learning_rate": 6.349083107100104e-05, "loss": 0.8553, "step": 32170 }, { "epoch": 1.2386910490856593, "grad_norm": 1.7059003114700317, "learning_rate": 6.346268052743331e-05, "loss": 0.5883, "step": 32175 }, { "epoch": 1.23888354186718, "grad_norm": 1.33328115940094, "learning_rate": 6.343453332496141e-05, "loss": 0.7425, "step": 32180 }, { "epoch": 1.2390760346487006, "grad_norm": 1.3579884767532349, "learning_rate": 6.340638946615922e-05, "loss": 0.7521, "step": 32185 }, { "epoch": 1.2392685274302213, "grad_norm": 1.7783567905426025, "learning_rate": 6.337824895360028e-05, "loss": 0.8292, "step": 32190 }, { "epoch": 1.239461020211742, "grad_norm": 1.4413247108459473, "learning_rate": 6.335011178985788e-05, "loss": 0.781, "step": 32195 }, { "epoch": 1.2396535129932627, "grad_norm": 1.817164421081543, "learning_rate": 6.3321977977505e-05, "loss": 0.8307, "step": 32200 }, { "epoch": 1.2398460057747833, "grad_norm": 0.7775023579597473, "learning_rate": 6.329384751911422e-05, "loss": 0.8496, "step": 32205 }, { "epoch": 1.2400384985563042, "grad_norm": 1.8241838216781616, "learning_rate": 6.326572041725795e-05, "loss": 0.8688, "step": 32210 }, { "epoch": 1.2402309913378249, "grad_norm": 0.9852082133293152, "learning_rate": 6.323759667450824e-05, "loss": 0.8371, "step": 32215 }, { "epoch": 1.2404234841193456, "grad_norm": 1.4170035123825073, "learning_rate": 6.320947629343672e-05, "loss": 1.0588, "step": 32220 }, { "epoch": 1.2406159769008662, "grad_norm": 1.4265270233154297, "learning_rate": 6.318135927661493e-05, "loss": 0.8352, "step": 32225 }, { "epoch": 1.240808469682387, "grad_norm": 0.9820341467857361, "learning_rate": 6.315324562661393e-05, "loss": 0.9075, "step": 32230 }, { "epoch": 1.2410009624639076, "grad_norm": 1.0748177766799927, "learning_rate": 6.312513534600448e-05, "loss": 0.9917, "step": 32235 }, { "epoch": 1.2411934552454282, "grad_norm": 1.1158158779144287, "learning_rate": 6.309702843735719e-05, "loss": 0.851, "step": 32240 }, { "epoch": 1.241385948026949, "grad_norm": 1.3649555444717407, "learning_rate": 6.306892490324217e-05, "loss": 0.9673, "step": 32245 }, { "epoch": 1.2415784408084698, "grad_norm": 1.7820628881454468, "learning_rate": 6.30408247462293e-05, "loss": 0.8305, "step": 32250 }, { "epoch": 1.2417709335899905, "grad_norm": 1.373089075088501, "learning_rate": 6.30127279688882e-05, "loss": 0.881, "step": 32255 }, { "epoch": 1.2419634263715111, "grad_norm": 1.3298135995864868, "learning_rate": 6.298463457378812e-05, "loss": 0.9557, "step": 32260 }, { "epoch": 1.2421559191530318, "grad_norm": 1.0167731046676636, "learning_rate": 6.295654456349794e-05, "loss": 0.9262, "step": 32265 }, { "epoch": 1.2423484119345525, "grad_norm": 0.9300015568733215, "learning_rate": 6.292845794058644e-05, "loss": 0.8913, "step": 32270 }, { "epoch": 1.2425409047160731, "grad_norm": 2.170581102371216, "learning_rate": 6.290037470762186e-05, "loss": 0.8422, "step": 32275 }, { "epoch": 1.2427333974975938, "grad_norm": 2.3894753456115723, "learning_rate": 6.287229486717224e-05, "loss": 0.8744, "step": 32280 }, { "epoch": 1.2429258902791145, "grad_norm": 1.2343543767929077, "learning_rate": 6.284421842180534e-05, "loss": 0.757, "step": 32285 }, { "epoch": 1.2431183830606352, "grad_norm": 1.0302842855453491, "learning_rate": 6.281614537408849e-05, "loss": 0.8185, "step": 32290 }, { "epoch": 1.2433108758421558, "grad_norm": 1.0366548299789429, "learning_rate": 6.278807572658883e-05, "loss": 0.9143, "step": 32295 }, { "epoch": 1.2435033686236765, "grad_norm": 1.3391799926757812, "learning_rate": 6.276000948187317e-05, "loss": 0.8389, "step": 32300 }, { "epoch": 1.2436958614051974, "grad_norm": 1.0500547885894775, "learning_rate": 6.273194664250795e-05, "loss": 0.7855, "step": 32305 }, { "epoch": 1.243888354186718, "grad_norm": 1.3002054691314697, "learning_rate": 6.270388721105927e-05, "loss": 0.8311, "step": 32310 }, { "epoch": 1.2440808469682387, "grad_norm": 1.2565590143203735, "learning_rate": 6.267583119009309e-05, "loss": 0.7857, "step": 32315 }, { "epoch": 1.2442733397497594, "grad_norm": 1.8590493202209473, "learning_rate": 6.26477785821749e-05, "loss": 0.9076, "step": 32320 }, { "epoch": 1.24446583253128, "grad_norm": 2.736550807952881, "learning_rate": 6.261972938986989e-05, "loss": 0.8985, "step": 32325 }, { "epoch": 1.2446583253128007, "grad_norm": 1.4446461200714111, "learning_rate": 6.259168361574303e-05, "loss": 0.848, "step": 32330 }, { "epoch": 1.2448508180943214, "grad_norm": 2.1344213485717773, "learning_rate": 6.256364126235892e-05, "loss": 0.9543, "step": 32335 }, { "epoch": 1.245043310875842, "grad_norm": 1.068670392036438, "learning_rate": 6.253560233228176e-05, "loss": 0.8341, "step": 32340 }, { "epoch": 1.245235803657363, "grad_norm": 0.9258108735084534, "learning_rate": 6.250756682807564e-05, "loss": 0.7102, "step": 32345 }, { "epoch": 1.2454282964388836, "grad_norm": 1.4921131134033203, "learning_rate": 6.247953475230417e-05, "loss": 0.854, "step": 32350 }, { "epoch": 1.2456207892204043, "grad_norm": 1.102432131767273, "learning_rate": 6.245150610753066e-05, "loss": 0.9141, "step": 32355 }, { "epoch": 1.245813282001925, "grad_norm": 1.5257620811462402, "learning_rate": 6.24234808963182e-05, "loss": 0.8949, "step": 32360 }, { "epoch": 1.2460057747834457, "grad_norm": 2.053269624710083, "learning_rate": 6.239545912122951e-05, "loss": 0.9681, "step": 32365 }, { "epoch": 1.2461982675649663, "grad_norm": 2.117687702178955, "learning_rate": 6.236744078482693e-05, "loss": 0.8536, "step": 32370 }, { "epoch": 1.246390760346487, "grad_norm": 1.0575151443481445, "learning_rate": 6.233942588967264e-05, "loss": 0.6996, "step": 32375 }, { "epoch": 1.2465832531280077, "grad_norm": 1.6774235963821411, "learning_rate": 6.231141443832835e-05, "loss": 0.7842, "step": 32380 }, { "epoch": 1.2467757459095283, "grad_norm": 0.9772216081619263, "learning_rate": 6.228340643335554e-05, "loss": 0.825, "step": 32385 }, { "epoch": 1.246968238691049, "grad_norm": 1.221349835395813, "learning_rate": 6.225540187731538e-05, "loss": 0.8491, "step": 32390 }, { "epoch": 1.2471607314725697, "grad_norm": 1.2611428499221802, "learning_rate": 6.222740077276869e-05, "loss": 0.8939, "step": 32395 }, { "epoch": 1.2473532242540906, "grad_norm": 1.5732476711273193, "learning_rate": 6.219940312227596e-05, "loss": 0.7606, "step": 32400 }, { "epoch": 1.2475457170356112, "grad_norm": 1.725197196006775, "learning_rate": 6.217140892839744e-05, "loss": 0.8467, "step": 32405 }, { "epoch": 1.247738209817132, "grad_norm": 1.427253246307373, "learning_rate": 6.214341819369294e-05, "loss": 0.7289, "step": 32410 }, { "epoch": 1.2479307025986526, "grad_norm": 1.0685710906982422, "learning_rate": 6.211543092072209e-05, "loss": 0.9277, "step": 32415 }, { "epoch": 1.2481231953801732, "grad_norm": 1.519148826599121, "learning_rate": 6.208744711204413e-05, "loss": 0.9213, "step": 32420 }, { "epoch": 1.248315688161694, "grad_norm": 0.941789448261261, "learning_rate": 6.205946677021797e-05, "loss": 0.8961, "step": 32425 }, { "epoch": 1.2485081809432146, "grad_norm": 1.4877368211746216, "learning_rate": 6.203148989780223e-05, "loss": 0.8506, "step": 32430 }, { "epoch": 1.2487006737247353, "grad_norm": 1.1104483604431152, "learning_rate": 6.200351649735524e-05, "loss": 0.859, "step": 32435 }, { "epoch": 1.248893166506256, "grad_norm": 1.6220526695251465, "learning_rate": 6.197554657143496e-05, "loss": 0.978, "step": 32440 }, { "epoch": 1.2490856592877768, "grad_norm": 1.7555328607559204, "learning_rate": 6.194758012259903e-05, "loss": 1.0804, "step": 32445 }, { "epoch": 1.2492781520692975, "grad_norm": 1.939076542854309, "learning_rate": 6.191961715340487e-05, "loss": 0.8503, "step": 32450 }, { "epoch": 1.2494706448508182, "grad_norm": 1.9745993614196777, "learning_rate": 6.18916576664094e-05, "loss": 0.9226, "step": 32455 }, { "epoch": 1.2496631376323388, "grad_norm": 0.7877691984176636, "learning_rate": 6.186370166416943e-05, "loss": 0.8174, "step": 32460 }, { "epoch": 1.2498556304138595, "grad_norm": 1.0937036275863647, "learning_rate": 6.183574914924135e-05, "loss": 0.9914, "step": 32465 }, { "epoch": 1.2500481231953802, "grad_norm": 1.2272846698760986, "learning_rate": 6.180780012418112e-05, "loss": 0.8287, "step": 32470 }, { "epoch": 1.2502406159769008, "grad_norm": 1.2231870889663696, "learning_rate": 6.177985459154462e-05, "loss": 0.889, "step": 32475 }, { "epoch": 1.2504331087584215, "grad_norm": 1.420955777168274, "learning_rate": 6.175191255388723e-05, "loss": 0.9343, "step": 32480 }, { "epoch": 1.2506256015399422, "grad_norm": 0.7461833953857422, "learning_rate": 6.172397401376404e-05, "loss": 0.7098, "step": 32485 }, { "epoch": 1.2508180943214628, "grad_norm": 0.9062157869338989, "learning_rate": 6.169603897372988e-05, "loss": 0.8073, "step": 32490 }, { "epoch": 1.2510105871029835, "grad_norm": 0.5169772505760193, "learning_rate": 6.166810743633924e-05, "loss": 0.9059, "step": 32495 }, { "epoch": 1.2512030798845044, "grad_norm": 1.1018903255462646, "learning_rate": 6.164017940414621e-05, "loss": 0.9242, "step": 32500 }, { "epoch": 1.251395572666025, "grad_norm": 1.6499050855636597, "learning_rate": 6.16122548797047e-05, "loss": 0.7794, "step": 32505 }, { "epoch": 1.2515880654475458, "grad_norm": 1.915618658065796, "learning_rate": 6.158433386556817e-05, "loss": 0.8513, "step": 32510 }, { "epoch": 1.2517805582290664, "grad_norm": 1.2275880575180054, "learning_rate": 6.155641636428981e-05, "loss": 0.8407, "step": 32515 }, { "epoch": 1.251973051010587, "grad_norm": 1.4273251295089722, "learning_rate": 6.152850237842255e-05, "loss": 0.7142, "step": 32520 }, { "epoch": 1.2521655437921078, "grad_norm": 1.4185856580734253, "learning_rate": 6.15005919105189e-05, "loss": 0.7348, "step": 32525 }, { "epoch": 1.2523580365736284, "grad_norm": 1.3497728109359741, "learning_rate": 6.147268496313101e-05, "loss": 0.9843, "step": 32530 }, { "epoch": 1.252550529355149, "grad_norm": 1.5742275714874268, "learning_rate": 6.144478153881093e-05, "loss": 0.9799, "step": 32535 }, { "epoch": 1.25274302213667, "grad_norm": 1.4493976831436157, "learning_rate": 6.141688164011014e-05, "loss": 0.8815, "step": 32540 }, { "epoch": 1.2529355149181907, "grad_norm": 1.0895041227340698, "learning_rate": 6.138898526957993e-05, "loss": 0.8332, "step": 32545 }, { "epoch": 1.2531280076997113, "grad_norm": 2.0370841026306152, "learning_rate": 6.136109242977126e-05, "loss": 1.0292, "step": 32550 }, { "epoch": 1.253320500481232, "grad_norm": 1.0882296562194824, "learning_rate": 6.133320312323473e-05, "loss": 0.7347, "step": 32555 }, { "epoch": 1.2535129932627527, "grad_norm": 1.2545933723449707, "learning_rate": 6.13053173525206e-05, "loss": 0.7906, "step": 32560 }, { "epoch": 1.2537054860442733, "grad_norm": 1.7179350852966309, "learning_rate": 6.127743512017892e-05, "loss": 0.9072, "step": 32565 }, { "epoch": 1.253897978825794, "grad_norm": 1.0224990844726562, "learning_rate": 6.124955642875927e-05, "loss": 0.8066, "step": 32570 }, { "epoch": 1.2540904716073147, "grad_norm": 1.681639313697815, "learning_rate": 6.122168128081096e-05, "loss": 0.9829, "step": 32575 }, { "epoch": 1.2542829643888354, "grad_norm": 2.0990216732025146, "learning_rate": 6.119380967888304e-05, "loss": 1.0276, "step": 32580 }, { "epoch": 1.254475457170356, "grad_norm": 1.4491196870803833, "learning_rate": 6.116594162552416e-05, "loss": 0.8186, "step": 32585 }, { "epoch": 1.2546679499518767, "grad_norm": 1.1606724262237549, "learning_rate": 6.113807712328261e-05, "loss": 0.857, "step": 32590 }, { "epoch": 1.2548604427333974, "grad_norm": 1.064584493637085, "learning_rate": 6.111021617470654e-05, "loss": 0.7753, "step": 32595 }, { "epoch": 1.2550529355149183, "grad_norm": 1.3536344766616821, "learning_rate": 6.108235878234357e-05, "loss": 0.8481, "step": 32600 }, { "epoch": 1.255245428296439, "grad_norm": 1.1210103034973145, "learning_rate": 6.105450494874105e-05, "loss": 0.8809, "step": 32605 }, { "epoch": 1.2554379210779596, "grad_norm": 1.9485646486282349, "learning_rate": 6.102665467644607e-05, "loss": 0.909, "step": 32610 }, { "epoch": 1.2556304138594803, "grad_norm": 0.9706501960754395, "learning_rate": 6.099880796800537e-05, "loss": 0.9546, "step": 32615 }, { "epoch": 1.255822906641001, "grad_norm": 0.9644052386283875, "learning_rate": 6.097096482596529e-05, "loss": 0.7045, "step": 32620 }, { "epoch": 1.2560153994225216, "grad_norm": 1.0382925271987915, "learning_rate": 6.094312525287197e-05, "loss": 0.7066, "step": 32625 }, { "epoch": 1.2562078922040423, "grad_norm": 1.0624562501907349, "learning_rate": 6.0915289251271076e-05, "loss": 0.7864, "step": 32630 }, { "epoch": 1.2564003849855632, "grad_norm": 0.8789560794830322, "learning_rate": 6.088745682370809e-05, "loss": 0.9018, "step": 32635 }, { "epoch": 1.2565928777670838, "grad_norm": 1.3996667861938477, "learning_rate": 6.0859627972728086e-05, "loss": 0.8411, "step": 32640 }, { "epoch": 1.2567853705486045, "grad_norm": 1.1187728643417358, "learning_rate": 6.083180270087583e-05, "loss": 0.7984, "step": 32645 }, { "epoch": 1.2569778633301252, "grad_norm": 1.481068730354309, "learning_rate": 6.080398101069571e-05, "loss": 0.7356, "step": 32650 }, { "epoch": 1.2571703561116458, "grad_norm": 1.1698410511016846, "learning_rate": 6.0776162904731915e-05, "loss": 0.9059, "step": 32655 }, { "epoch": 1.2573628488931665, "grad_norm": 1.0608761310577393, "learning_rate": 6.0748348385528185e-05, "loss": 0.8176, "step": 32660 }, { "epoch": 1.2575553416746872, "grad_norm": 1.4496049880981445, "learning_rate": 6.0720537455627944e-05, "loss": 1.139, "step": 32665 }, { "epoch": 1.2577478344562079, "grad_norm": 1.7745342254638672, "learning_rate": 6.069273011757439e-05, "loss": 0.924, "step": 32670 }, { "epoch": 1.2579403272377285, "grad_norm": 1.3103641271591187, "learning_rate": 6.066492637391028e-05, "loss": 0.9955, "step": 32675 }, { "epoch": 1.2581328200192492, "grad_norm": 1.4893795251846313, "learning_rate": 6.063712622717803e-05, "loss": 0.7122, "step": 32680 }, { "epoch": 1.2583253128007699, "grad_norm": 1.8530791997909546, "learning_rate": 6.060932967991988e-05, "loss": 0.7563, "step": 32685 }, { "epoch": 1.2585178055822905, "grad_norm": 0.8076843619346619, "learning_rate": 6.058153673467759e-05, "loss": 0.7344, "step": 32690 }, { "epoch": 1.2587102983638114, "grad_norm": 1.2442243099212646, "learning_rate": 6.055374739399261e-05, "loss": 0.8517, "step": 32695 }, { "epoch": 1.258902791145332, "grad_norm": 1.8538851737976074, "learning_rate": 6.052596166040616e-05, "loss": 0.799, "step": 32700 }, { "epoch": 1.2590952839268528, "grad_norm": 1.2174726724624634, "learning_rate": 6.049817953645902e-05, "loss": 0.734, "step": 32705 }, { "epoch": 1.2592877767083734, "grad_norm": 1.1436805725097656, "learning_rate": 6.0470401024691646e-05, "loss": 0.8512, "step": 32710 }, { "epoch": 1.259480269489894, "grad_norm": 0.9243611097335815, "learning_rate": 6.044262612764429e-05, "loss": 0.7716, "step": 32715 }, { "epoch": 1.2596727622714148, "grad_norm": 1.3061074018478394, "learning_rate": 6.0414854847856694e-05, "loss": 0.7524, "step": 32720 }, { "epoch": 1.2598652550529355, "grad_norm": 1.4081029891967773, "learning_rate": 6.038708718786843e-05, "loss": 0.8902, "step": 32725 }, { "epoch": 1.2600577478344563, "grad_norm": 1.0270228385925293, "learning_rate": 6.0359323150218616e-05, "loss": 1.0107, "step": 32730 }, { "epoch": 1.260250240615977, "grad_norm": 1.0451138019561768, "learning_rate": 6.033156273744607e-05, "loss": 0.9819, "step": 32735 }, { "epoch": 1.2604427333974977, "grad_norm": 0.9528657793998718, "learning_rate": 6.030380595208938e-05, "loss": 0.8202, "step": 32740 }, { "epoch": 1.2606352261790184, "grad_norm": 2.1417109966278076, "learning_rate": 6.027605279668666e-05, "loss": 0.8817, "step": 32745 }, { "epoch": 1.260827718960539, "grad_norm": 1.5614452362060547, "learning_rate": 6.024830327377573e-05, "loss": 0.8378, "step": 32750 }, { "epoch": 1.2610202117420597, "grad_norm": 1.5928235054016113, "learning_rate": 6.0220557385894184e-05, "loss": 0.9096, "step": 32755 }, { "epoch": 1.2612127045235804, "grad_norm": 0.8466683030128479, "learning_rate": 6.019281513557913e-05, "loss": 0.9457, "step": 32760 }, { "epoch": 1.261405197305101, "grad_norm": 1.4196317195892334, "learning_rate": 6.01650765253674e-05, "loss": 0.7927, "step": 32765 }, { "epoch": 1.2615976900866217, "grad_norm": 0.9929688572883606, "learning_rate": 6.01373415577956e-05, "loss": 0.8208, "step": 32770 }, { "epoch": 1.2617901828681424, "grad_norm": 1.321770429611206, "learning_rate": 6.0109610235399826e-05, "loss": 0.9258, "step": 32775 }, { "epoch": 1.261982675649663, "grad_norm": 1.1568975448608398, "learning_rate": 6.008188256071592e-05, "loss": 0.9201, "step": 32780 }, { "epoch": 1.2621751684311837, "grad_norm": 1.2595579624176025, "learning_rate": 6.0054158536279446e-05, "loss": 0.8037, "step": 32785 }, { "epoch": 1.2623676612127046, "grad_norm": 1.0337523221969604, "learning_rate": 6.0026438164625565e-05, "loss": 0.7372, "step": 32790 }, { "epoch": 1.2625601539942253, "grad_norm": 2.097609758377075, "learning_rate": 5.999872144828907e-05, "loss": 0.9208, "step": 32795 }, { "epoch": 1.262752646775746, "grad_norm": 0.9220959544181824, "learning_rate": 5.997100838980456e-05, "loss": 0.7774, "step": 32800 }, { "epoch": 1.2629451395572666, "grad_norm": 0.9968023896217346, "learning_rate": 5.994329899170617e-05, "loss": 0.8029, "step": 32805 }, { "epoch": 1.2631376323387873, "grad_norm": 2.015770196914673, "learning_rate": 5.991559325652769e-05, "loss": 0.8083, "step": 32810 }, { "epoch": 1.263330125120308, "grad_norm": 1.5899488925933838, "learning_rate": 5.988789118680272e-05, "loss": 0.7113, "step": 32815 }, { "epoch": 1.2635226179018286, "grad_norm": 1.1025646924972534, "learning_rate": 5.986019278506436e-05, "loss": 0.8005, "step": 32820 }, { "epoch": 1.2637151106833493, "grad_norm": 1.4543554782867432, "learning_rate": 5.983249805384545e-05, "loss": 0.7893, "step": 32825 }, { "epoch": 1.2639076034648702, "grad_norm": 1.1230753660202026, "learning_rate": 5.980480699567854e-05, "loss": 0.9324, "step": 32830 }, { "epoch": 1.2641000962463909, "grad_norm": 1.583644986152649, "learning_rate": 5.9777119613095755e-05, "loss": 0.7477, "step": 32835 }, { "epoch": 1.2642925890279115, "grad_norm": 1.9501726627349854, "learning_rate": 5.9749435908628915e-05, "loss": 0.8344, "step": 32840 }, { "epoch": 1.2644850818094322, "grad_norm": 0.6011807918548584, "learning_rate": 5.972175588480954e-05, "loss": 0.7556, "step": 32845 }, { "epoch": 1.2646775745909529, "grad_norm": 1.0539261102676392, "learning_rate": 5.9694079544168766e-05, "loss": 0.8686, "step": 32850 }, { "epoch": 1.2648700673724735, "grad_norm": 1.184226393699646, "learning_rate": 5.966640688923739e-05, "loss": 0.908, "step": 32855 }, { "epoch": 1.2650625601539942, "grad_norm": 0.9844930768013, "learning_rate": 5.963873792254595e-05, "loss": 0.724, "step": 32860 }, { "epoch": 1.2652550529355149, "grad_norm": 1.4224261045455933, "learning_rate": 5.9611072646624565e-05, "loss": 0.8247, "step": 32865 }, { "epoch": 1.2654475457170355, "grad_norm": 1.5510427951812744, "learning_rate": 5.9583411064002995e-05, "loss": 0.8445, "step": 32870 }, { "epoch": 1.2656400384985562, "grad_norm": 1.473929524421692, "learning_rate": 5.955575317721078e-05, "loss": 0.7831, "step": 32875 }, { "epoch": 1.265832531280077, "grad_norm": 1.2767577171325684, "learning_rate": 5.9528098988777024e-05, "loss": 0.7781, "step": 32880 }, { "epoch": 1.2660250240615976, "grad_norm": 1.8396121263504028, "learning_rate": 5.950044850123047e-05, "loss": 0.856, "step": 32885 }, { "epoch": 1.2662175168431185, "grad_norm": 0.8659001588821411, "learning_rate": 5.947280171709966e-05, "loss": 0.9269, "step": 32890 }, { "epoch": 1.2664100096246391, "grad_norm": 2.1501402854919434, "learning_rate": 5.9445158638912646e-05, "loss": 0.8831, "step": 32895 }, { "epoch": 1.2666025024061598, "grad_norm": 0.7777537703514099, "learning_rate": 5.941751926919721e-05, "loss": 0.7503, "step": 32900 }, { "epoch": 1.2667949951876805, "grad_norm": 1.1741917133331299, "learning_rate": 5.9389883610480835e-05, "loss": 0.9508, "step": 32905 }, { "epoch": 1.2669874879692011, "grad_norm": 1.2577232122421265, "learning_rate": 5.936225166529057e-05, "loss": 0.7789, "step": 32910 }, { "epoch": 1.2671799807507218, "grad_norm": 1.0245388746261597, "learning_rate": 5.933462343615317e-05, "loss": 0.8104, "step": 32915 }, { "epoch": 1.2673724735322425, "grad_norm": 1.5200345516204834, "learning_rate": 5.9306998925595105e-05, "loss": 0.8895, "step": 32920 }, { "epoch": 1.2675649663137634, "grad_norm": 1.0815972089767456, "learning_rate": 5.927937813614241e-05, "loss": 0.746, "step": 32925 }, { "epoch": 1.267757459095284, "grad_norm": 1.1192761659622192, "learning_rate": 5.9251761070320845e-05, "loss": 0.8543, "step": 32930 }, { "epoch": 1.2679499518768047, "grad_norm": 1.0540046691894531, "learning_rate": 5.9224147730655766e-05, "loss": 0.9522, "step": 32935 }, { "epoch": 1.2681424446583254, "grad_norm": 1.5147613286972046, "learning_rate": 5.9196538119672297e-05, "loss": 0.868, "step": 32940 }, { "epoch": 1.268334937439846, "grad_norm": 2.390244960784912, "learning_rate": 5.91689322398951e-05, "loss": 0.7859, "step": 32945 }, { "epoch": 1.2685274302213667, "grad_norm": 1.3347294330596924, "learning_rate": 5.914133009384859e-05, "loss": 0.8369, "step": 32950 }, { "epoch": 1.2687199230028874, "grad_norm": 1.1025590896606445, "learning_rate": 5.911373168405676e-05, "loss": 0.7856, "step": 32955 }, { "epoch": 1.268912415784408, "grad_norm": 1.335947871208191, "learning_rate": 5.908613701304331e-05, "loss": 0.8423, "step": 32960 }, { "epoch": 1.2691049085659287, "grad_norm": 1.9916443824768066, "learning_rate": 5.905854608333163e-05, "loss": 0.7697, "step": 32965 }, { "epoch": 1.2692974013474494, "grad_norm": 1.1374703645706177, "learning_rate": 5.903095889744466e-05, "loss": 0.6431, "step": 32970 }, { "epoch": 1.26948989412897, "grad_norm": 1.484373688697815, "learning_rate": 5.900337545790513e-05, "loss": 0.8287, "step": 32975 }, { "epoch": 1.2696823869104907, "grad_norm": 1.3706483840942383, "learning_rate": 5.8975795767235354e-05, "loss": 0.9413, "step": 32980 }, { "epoch": 1.2698748796920116, "grad_norm": 1.4376710653305054, "learning_rate": 5.894821982795724e-05, "loss": 0.7843, "step": 32985 }, { "epoch": 1.2700673724735323, "grad_norm": 1.0955665111541748, "learning_rate": 5.892064764259253e-05, "loss": 0.8953, "step": 32990 }, { "epoch": 1.270259865255053, "grad_norm": 1.9772429466247559, "learning_rate": 5.889307921366245e-05, "loss": 0.7771, "step": 32995 }, { "epoch": 1.2704523580365736, "grad_norm": 0.92624431848526, "learning_rate": 5.886551454368794e-05, "loss": 0.8009, "step": 33000 }, { "epoch": 1.2706448508180943, "grad_norm": 1.8379896879196167, "learning_rate": 5.883795363518968e-05, "loss": 1.0357, "step": 33005 }, { "epoch": 1.270837343599615, "grad_norm": 0.9164409041404724, "learning_rate": 5.881039649068788e-05, "loss": 0.8232, "step": 33010 }, { "epoch": 1.2710298363811356, "grad_norm": 1.9721314907073975, "learning_rate": 5.8782843112702436e-05, "loss": 0.6249, "step": 33015 }, { "epoch": 1.2712223291626565, "grad_norm": 1.4756965637207031, "learning_rate": 5.8755293503752975e-05, "loss": 0.7346, "step": 33020 }, { "epoch": 1.2714148219441772, "grad_norm": 1.0704786777496338, "learning_rate": 5.8727747666358704e-05, "loss": 0.8372, "step": 33025 }, { "epoch": 1.2716073147256979, "grad_norm": 0.9195441007614136, "learning_rate": 5.8700205603038484e-05, "loss": 0.8975, "step": 33030 }, { "epoch": 1.2717998075072185, "grad_norm": 0.9442548155784607, "learning_rate": 5.867266731631088e-05, "loss": 0.8154, "step": 33035 }, { "epoch": 1.2719923002887392, "grad_norm": 1.0243034362792969, "learning_rate": 5.86451328086941e-05, "loss": 0.9787, "step": 33040 }, { "epoch": 1.27218479307026, "grad_norm": 1.9684053659439087, "learning_rate": 5.8617602082705955e-05, "loss": 0.8663, "step": 33045 }, { "epoch": 1.2723772858517806, "grad_norm": 2.0619711875915527, "learning_rate": 5.859007514086399e-05, "loss": 0.8495, "step": 33050 }, { "epoch": 1.2725697786333012, "grad_norm": 1.4054616689682007, "learning_rate": 5.856255198568532e-05, "loss": 0.8247, "step": 33055 }, { "epoch": 1.272762271414822, "grad_norm": 1.5287714004516602, "learning_rate": 5.853503261968676e-05, "loss": 0.9497, "step": 33060 }, { "epoch": 1.2729547641963426, "grad_norm": 2.1714093685150146, "learning_rate": 5.850751704538482e-05, "loss": 0.8473, "step": 33065 }, { "epoch": 1.2731472569778632, "grad_norm": 2.1262662410736084, "learning_rate": 5.848000526529558e-05, "loss": 0.9971, "step": 33070 }, { "epoch": 1.273339749759384, "grad_norm": 0.8755722641944885, "learning_rate": 5.845249728193476e-05, "loss": 0.8989, "step": 33075 }, { "epoch": 1.2735322425409046, "grad_norm": 1.1650525331497192, "learning_rate": 5.842499309781789e-05, "loss": 0.8994, "step": 33080 }, { "epoch": 1.2737247353224255, "grad_norm": 1.2620556354522705, "learning_rate": 5.839749271545999e-05, "loss": 0.8267, "step": 33085 }, { "epoch": 1.2739172281039461, "grad_norm": 1.1734013557434082, "learning_rate": 5.836999613737577e-05, "loss": 0.9163, "step": 33090 }, { "epoch": 1.2741097208854668, "grad_norm": 2.227886199951172, "learning_rate": 5.8342503366079624e-05, "loss": 0.878, "step": 33095 }, { "epoch": 1.2743022136669875, "grad_norm": 0.975045382976532, "learning_rate": 5.83150144040856e-05, "loss": 0.873, "step": 33100 }, { "epoch": 1.2744947064485082, "grad_norm": 0.9172561764717102, "learning_rate": 5.828752925390737e-05, "loss": 0.7909, "step": 33105 }, { "epoch": 1.2746871992300288, "grad_norm": 1.7539204359054565, "learning_rate": 5.82600479180583e-05, "loss": 0.915, "step": 33110 }, { "epoch": 1.2748796920115495, "grad_norm": 2.9440081119537354, "learning_rate": 5.823257039905131e-05, "loss": 0.8617, "step": 33115 }, { "epoch": 1.2750721847930704, "grad_norm": 1.3572540283203125, "learning_rate": 5.8205096699399074e-05, "loss": 0.8082, "step": 33120 }, { "epoch": 1.275264677574591, "grad_norm": 1.449960470199585, "learning_rate": 5.8177626821613876e-05, "loss": 0.8901, "step": 33125 }, { "epoch": 1.2754571703561117, "grad_norm": 1.7100377082824707, "learning_rate": 5.8150160768207704e-05, "loss": 0.9157, "step": 33130 }, { "epoch": 1.2756496631376324, "grad_norm": 0.9856832027435303, "learning_rate": 5.812269854169201e-05, "loss": 0.7319, "step": 33135 }, { "epoch": 1.275842155919153, "grad_norm": 2.1571691036224365, "learning_rate": 5.809524014457821e-05, "loss": 0.8611, "step": 33140 }, { "epoch": 1.2760346487006737, "grad_norm": 1.537744402885437, "learning_rate": 5.806778557937707e-05, "loss": 0.967, "step": 33145 }, { "epoch": 1.2762271414821944, "grad_norm": 2.2577595710754395, "learning_rate": 5.8040334848599166e-05, "loss": 0.8516, "step": 33150 }, { "epoch": 1.276419634263715, "grad_norm": 1.4101483821868896, "learning_rate": 5.801288795475466e-05, "loss": 0.8992, "step": 33155 }, { "epoch": 1.2766121270452357, "grad_norm": 1.332582950592041, "learning_rate": 5.798544490035347e-05, "loss": 0.8865, "step": 33160 }, { "epoch": 1.2768046198267564, "grad_norm": 1.6516834497451782, "learning_rate": 5.795800568790495e-05, "loss": 0.8555, "step": 33165 }, { "epoch": 1.276997112608277, "grad_norm": 1.0843029022216797, "learning_rate": 5.793057031991836e-05, "loss": 0.7221, "step": 33170 }, { "epoch": 1.2771896053897978, "grad_norm": 0.715171754360199, "learning_rate": 5.790313879890241e-05, "loss": 0.8228, "step": 33175 }, { "epoch": 1.2773820981713186, "grad_norm": 1.1554157733917236, "learning_rate": 5.787571112736554e-05, "loss": 0.8982, "step": 33180 }, { "epoch": 1.2775745909528393, "grad_norm": 1.4646668434143066, "learning_rate": 5.784828730781585e-05, "loss": 0.9118, "step": 33185 }, { "epoch": 1.27776708373436, "grad_norm": 1.4582480192184448, "learning_rate": 5.782086734276109e-05, "loss": 0.6899, "step": 33190 }, { "epoch": 1.2779595765158807, "grad_norm": 0.8803904056549072, "learning_rate": 5.779345123470853e-05, "loss": 0.918, "step": 33195 }, { "epoch": 1.2781520692974013, "grad_norm": 1.423751711845398, "learning_rate": 5.7766038986165325e-05, "loss": 0.7573, "step": 33200 }, { "epoch": 1.278344562078922, "grad_norm": 1.1513595581054688, "learning_rate": 5.7738630599638054e-05, "loss": 0.8907, "step": 33205 }, { "epoch": 1.2785370548604427, "grad_norm": 0.7591530084609985, "learning_rate": 5.7711226077633065e-05, "loss": 0.7822, "step": 33210 }, { "epoch": 1.2787295476419636, "grad_norm": 1.3467390537261963, "learning_rate": 5.768382542265629e-05, "loss": 0.97, "step": 33215 }, { "epoch": 1.2789220404234842, "grad_norm": 0.9523210525512695, "learning_rate": 5.765642863721341e-05, "loss": 0.861, "step": 33220 }, { "epoch": 1.279114533205005, "grad_norm": 1.2797752618789673, "learning_rate": 5.76290357238096e-05, "loss": 0.8446, "step": 33225 }, { "epoch": 1.2793070259865256, "grad_norm": 2.0043842792510986, "learning_rate": 5.7601646684949784e-05, "loss": 0.8505, "step": 33230 }, { "epoch": 1.2794995187680462, "grad_norm": 1.1926679611206055, "learning_rate": 5.7574261523138514e-05, "loss": 1.039, "step": 33235 }, { "epoch": 1.279692011549567, "grad_norm": 1.4371801614761353, "learning_rate": 5.754688024088e-05, "loss": 0.975, "step": 33240 }, { "epoch": 1.2798845043310876, "grad_norm": 0.7704469561576843, "learning_rate": 5.751950284067809e-05, "loss": 0.6496, "step": 33245 }, { "epoch": 1.2800769971126083, "grad_norm": 1.707034707069397, "learning_rate": 5.749212932503614e-05, "loss": 0.8091, "step": 33250 }, { "epoch": 1.280269489894129, "grad_norm": 1.1753286123275757, "learning_rate": 5.746475969645747e-05, "loss": 0.7916, "step": 33255 }, { "epoch": 1.2804619826756496, "grad_norm": 1.4952518939971924, "learning_rate": 5.7437393957444716e-05, "loss": 0.8581, "step": 33260 }, { "epoch": 1.2806544754571703, "grad_norm": 1.7860945463180542, "learning_rate": 5.7410032110500335e-05, "loss": 0.6714, "step": 33265 }, { "epoch": 1.280846968238691, "grad_norm": 1.6719321012496948, "learning_rate": 5.7382674158126385e-05, "loss": 0.9253, "step": 33270 }, { "epoch": 1.2810394610202118, "grad_norm": 1.7569420337677002, "learning_rate": 5.735532010282461e-05, "loss": 0.8774, "step": 33275 }, { "epoch": 1.2812319538017325, "grad_norm": 0.898158073425293, "learning_rate": 5.732796994709623e-05, "loss": 0.8034, "step": 33280 }, { "epoch": 1.2814244465832532, "grad_norm": 1.5631740093231201, "learning_rate": 5.73006236934424e-05, "loss": 0.8423, "step": 33285 }, { "epoch": 1.2816169393647738, "grad_norm": 1.7265678644180298, "learning_rate": 5.727328134436364e-05, "loss": 0.8724, "step": 33290 }, { "epoch": 1.2818094321462945, "grad_norm": 0.971207320690155, "learning_rate": 5.7245942902360274e-05, "loss": 0.7923, "step": 33295 }, { "epoch": 1.2820019249278152, "grad_norm": 1.0335525274276733, "learning_rate": 5.721860836993218e-05, "loss": 0.6989, "step": 33300 }, { "epoch": 1.2821944177093358, "grad_norm": 0.9981820583343506, "learning_rate": 5.719127774957902e-05, "loss": 0.9665, "step": 33305 }, { "epoch": 1.2823869104908565, "grad_norm": 2.178272247314453, "learning_rate": 5.716395104379988e-05, "loss": 0.8747, "step": 33310 }, { "epoch": 1.2825794032723774, "grad_norm": 1.31416654586792, "learning_rate": 5.713662825509365e-05, "loss": 0.8943, "step": 33315 }, { "epoch": 1.282771896053898, "grad_norm": 1.1709874868392944, "learning_rate": 5.710930938595882e-05, "loss": 0.8149, "step": 33320 }, { "epoch": 1.2829643888354187, "grad_norm": 1.7127902507781982, "learning_rate": 5.708199443889353e-05, "loss": 0.8275, "step": 33325 }, { "epoch": 1.2831568816169394, "grad_norm": 0.7726919054985046, "learning_rate": 5.705468341639557e-05, "loss": 0.7617, "step": 33330 }, { "epoch": 1.28334937439846, "grad_norm": 1.722057819366455, "learning_rate": 5.702737632096229e-05, "loss": 0.8446, "step": 33335 }, { "epoch": 1.2835418671799808, "grad_norm": 1.3741867542266846, "learning_rate": 5.700007315509078e-05, "loss": 0.9602, "step": 33340 }, { "epoch": 1.2837343599615014, "grad_norm": 2.013511896133423, "learning_rate": 5.6972773921277734e-05, "loss": 0.8998, "step": 33345 }, { "epoch": 1.283926852743022, "grad_norm": 2.3441731929779053, "learning_rate": 5.6945478622019524e-05, "loss": 0.857, "step": 33350 }, { "epoch": 1.2841193455245428, "grad_norm": 1.1220638751983643, "learning_rate": 5.691818725981199e-05, "loss": 0.9617, "step": 33355 }, { "epoch": 1.2843118383060634, "grad_norm": 1.3120940923690796, "learning_rate": 5.6890899837150944e-05, "loss": 0.8752, "step": 33360 }, { "epoch": 1.284504331087584, "grad_norm": 1.2576730251312256, "learning_rate": 5.686361635653148e-05, "loss": 0.8307, "step": 33365 }, { "epoch": 1.2846968238691048, "grad_norm": 1.0636831521987915, "learning_rate": 5.6836336820448556e-05, "loss": 0.7714, "step": 33370 }, { "epoch": 1.2848893166506257, "grad_norm": 0.9599676728248596, "learning_rate": 5.680906123139669e-05, "loss": 0.8351, "step": 33375 }, { "epoch": 1.2850818094321463, "grad_norm": 1.8764914274215698, "learning_rate": 5.67817895918701e-05, "loss": 0.9712, "step": 33380 }, { "epoch": 1.285274302213667, "grad_norm": 1.3599872589111328, "learning_rate": 5.675452190436248e-05, "loss": 0.9119, "step": 33385 }, { "epoch": 1.2854667949951877, "grad_norm": 0.9683537483215332, "learning_rate": 5.672725817136744e-05, "loss": 0.8115, "step": 33390 }, { "epoch": 1.2856592877767083, "grad_norm": 1.1662638187408447, "learning_rate": 5.669999839537794e-05, "loss": 0.9159, "step": 33395 }, { "epoch": 1.285851780558229, "grad_norm": 2.2945556640625, "learning_rate": 5.667274257888675e-05, "loss": 0.8131, "step": 33400 }, { "epoch": 1.2860442733397497, "grad_norm": 0.9132623076438904, "learning_rate": 5.664549072438624e-05, "loss": 0.799, "step": 33405 }, { "epoch": 1.2862367661212706, "grad_norm": 0.9698240756988525, "learning_rate": 5.661824283436844e-05, "loss": 0.9786, "step": 33410 }, { "epoch": 1.2864292589027913, "grad_norm": 0.7265766263008118, "learning_rate": 5.659099891132488e-05, "loss": 0.7017, "step": 33415 }, { "epoch": 1.286621751684312, "grad_norm": 0.8965588212013245, "learning_rate": 5.656375895774699e-05, "loss": 0.8478, "step": 33420 }, { "epoch": 1.2868142444658326, "grad_norm": 1.5300185680389404, "learning_rate": 5.653652297612556e-05, "loss": 0.851, "step": 33425 }, { "epoch": 1.2870067372473533, "grad_norm": 1.6665253639221191, "learning_rate": 5.650929096895119e-05, "loss": 0.8494, "step": 33430 }, { "epoch": 1.287199230028874, "grad_norm": 1.2362685203552246, "learning_rate": 5.6482062938714095e-05, "loss": 0.7511, "step": 33435 }, { "epoch": 1.2873917228103946, "grad_norm": 1.0025197267532349, "learning_rate": 5.645483888790404e-05, "loss": 0.8445, "step": 33440 }, { "epoch": 1.2875842155919153, "grad_norm": 3.0761525630950928, "learning_rate": 5.6427618819010486e-05, "loss": 0.9236, "step": 33445 }, { "epoch": 1.287776708373436, "grad_norm": 1.8657230138778687, "learning_rate": 5.640040273452256e-05, "loss": 0.925, "step": 33450 }, { "epoch": 1.2879692011549566, "grad_norm": 1.3410662412643433, "learning_rate": 5.637319063692903e-05, "loss": 0.7961, "step": 33455 }, { "epoch": 1.2881616939364773, "grad_norm": 1.056356430053711, "learning_rate": 5.6345982528718125e-05, "loss": 0.8616, "step": 33460 }, { "epoch": 1.288354186717998, "grad_norm": 2.1360607147216797, "learning_rate": 5.6318778412378024e-05, "loss": 0.8894, "step": 33465 }, { "epoch": 1.2885466794995188, "grad_norm": 1.3970694541931152, "learning_rate": 5.629157829039623e-05, "loss": 1.0804, "step": 33470 }, { "epoch": 1.2887391722810395, "grad_norm": 1.7947211265563965, "learning_rate": 5.6264382165260065e-05, "loss": 0.6816, "step": 33475 }, { "epoch": 1.2889316650625602, "grad_norm": 1.3382033109664917, "learning_rate": 5.6237190039456425e-05, "loss": 0.9228, "step": 33480 }, { "epoch": 1.2891241578440809, "grad_norm": 1.0116480588912964, "learning_rate": 5.6210001915471896e-05, "loss": 0.9483, "step": 33485 }, { "epoch": 1.2893166506256015, "grad_norm": 1.6364978551864624, "learning_rate": 5.618281779579253e-05, "loss": 1.0702, "step": 33490 }, { "epoch": 1.2895091434071222, "grad_norm": 1.1366705894470215, "learning_rate": 5.6155637682904284e-05, "loss": 0.8214, "step": 33495 }, { "epoch": 1.2897016361886429, "grad_norm": 1.1931660175323486, "learning_rate": 5.6128461579292456e-05, "loss": 0.6575, "step": 33500 }, { "epoch": 1.2898941289701638, "grad_norm": 1.7498856782913208, "learning_rate": 5.610128948744229e-05, "loss": 0.843, "step": 33505 }, { "epoch": 1.2900866217516844, "grad_norm": 1.446738839149475, "learning_rate": 5.6074121409838345e-05, "loss": 0.7902, "step": 33510 }, { "epoch": 1.290279114533205, "grad_norm": 1.2919175624847412, "learning_rate": 5.6046957348965014e-05, "loss": 0.8396, "step": 33515 }, { "epoch": 1.2904716073147258, "grad_norm": 1.1369450092315674, "learning_rate": 5.6019797307306264e-05, "loss": 0.9185, "step": 33520 }, { "epoch": 1.2906641000962464, "grad_norm": 1.1856272220611572, "learning_rate": 5.599264128734576e-05, "loss": 0.8774, "step": 33525 }, { "epoch": 1.290856592877767, "grad_norm": 2.4234628677368164, "learning_rate": 5.5965489291566645e-05, "loss": 1.0206, "step": 33530 }, { "epoch": 1.2910490856592878, "grad_norm": 1.700831651687622, "learning_rate": 5.593834132245183e-05, "loss": 0.8308, "step": 33535 }, { "epoch": 1.2912415784408084, "grad_norm": 0.7165193557739258, "learning_rate": 5.591119738248382e-05, "loss": 0.7948, "step": 33540 }, { "epoch": 1.2914340712223291, "grad_norm": 1.512609601020813, "learning_rate": 5.5884057474144736e-05, "loss": 0.8691, "step": 33545 }, { "epoch": 1.2916265640038498, "grad_norm": 1.0967670679092407, "learning_rate": 5.585692159991641e-05, "loss": 0.7975, "step": 33550 }, { "epoch": 1.2918190567853705, "grad_norm": 1.0149413347244263, "learning_rate": 5.582978976228015e-05, "loss": 0.6579, "step": 33555 }, { "epoch": 1.2920115495668911, "grad_norm": 1.2751359939575195, "learning_rate": 5.5802661963716994e-05, "loss": 0.8688, "step": 33560 }, { "epoch": 1.2922040423484118, "grad_norm": 0.7386071681976318, "learning_rate": 5.5775538206707636e-05, "loss": 0.819, "step": 33565 }, { "epoch": 1.2923965351299327, "grad_norm": 1.0043171644210815, "learning_rate": 5.574841849373238e-05, "loss": 0.7907, "step": 33570 }, { "epoch": 1.2925890279114534, "grad_norm": 0.9456561803817749, "learning_rate": 5.5721302827271035e-05, "loss": 1.0571, "step": 33575 }, { "epoch": 1.292781520692974, "grad_norm": 2.149979591369629, "learning_rate": 5.5694191209803313e-05, "loss": 0.8367, "step": 33580 }, { "epoch": 1.2929740134744947, "grad_norm": 1.2153743505477905, "learning_rate": 5.566708364380826e-05, "loss": 0.7986, "step": 33585 }, { "epoch": 1.2931665062560154, "grad_norm": 1.090076208114624, "learning_rate": 5.563998013176474e-05, "loss": 0.7111, "step": 33590 }, { "epoch": 1.293358999037536, "grad_norm": 1.1182845830917358, "learning_rate": 5.5612880676151154e-05, "loss": 0.6748, "step": 33595 }, { "epoch": 1.2935514918190567, "grad_norm": 1.2427207231521606, "learning_rate": 5.5585785279445654e-05, "loss": 0.8749, "step": 33600 }, { "epoch": 1.2937439846005776, "grad_norm": 1.2770304679870605, "learning_rate": 5.555869394412578e-05, "loss": 0.9485, "step": 33605 }, { "epoch": 1.2939364773820983, "grad_norm": 1.2712188959121704, "learning_rate": 5.5531606672669045e-05, "loss": 0.7984, "step": 33610 }, { "epoch": 1.294128970163619, "grad_norm": 1.0220839977264404, "learning_rate": 5.550452346755225e-05, "loss": 0.8363, "step": 33615 }, { "epoch": 1.2943214629451396, "grad_norm": 2.0478289127349854, "learning_rate": 5.547744433125204e-05, "loss": 0.8417, "step": 33620 }, { "epoch": 1.2945139557266603, "grad_norm": 0.9348976612091064, "learning_rate": 5.5450369266244595e-05, "loss": 0.8596, "step": 33625 }, { "epoch": 1.294706448508181, "grad_norm": 1.4484078884124756, "learning_rate": 5.542329827500581e-05, "loss": 0.8122, "step": 33630 }, { "epoch": 1.2948989412897016, "grad_norm": 1.7183796167373657, "learning_rate": 5.5396231360011074e-05, "loss": 0.857, "step": 33635 }, { "epoch": 1.2950914340712223, "grad_norm": 0.8953412175178528, "learning_rate": 5.5369168523735505e-05, "loss": 1.0084, "step": 33640 }, { "epoch": 1.295283926852743, "grad_norm": 0.8453150987625122, "learning_rate": 5.5342109768653815e-05, "loss": 0.8156, "step": 33645 }, { "epoch": 1.2954764196342636, "grad_norm": 1.296000599861145, "learning_rate": 5.531505509724036e-05, "loss": 0.808, "step": 33650 }, { "epoch": 1.2956689124157843, "grad_norm": Infinity, "learning_rate": 5.529341430201327e-05, "loss": 0.8415, "step": 33655 }, { "epoch": 1.295861405197305, "grad_norm": 1.4675425291061401, "learning_rate": 5.526636698743678e-05, "loss": 0.8757, "step": 33660 }, { "epoch": 1.2960538979788259, "grad_norm": 1.3060146570205688, "learning_rate": 5.5239323763454696e-05, "loss": 0.8328, "step": 33665 }, { "epoch": 1.2962463907603465, "grad_norm": 1.034527063369751, "learning_rate": 5.5212284632539976e-05, "loss": 0.9086, "step": 33670 }, { "epoch": 1.2964388835418672, "grad_norm": 1.3816114664077759, "learning_rate": 5.518524959716507e-05, "loss": 0.8794, "step": 33675 }, { "epoch": 1.2966313763233879, "grad_norm": 1.4892889261245728, "learning_rate": 5.515821865980228e-05, "loss": 0.8859, "step": 33680 }, { "epoch": 1.2968238691049085, "grad_norm": 1.8635764122009277, "learning_rate": 5.513119182292332e-05, "loss": 0.8079, "step": 33685 }, { "epoch": 1.2970163618864292, "grad_norm": 1.203260898590088, "learning_rate": 5.5104169088999644e-05, "loss": 0.7971, "step": 33690 }, { "epoch": 1.2972088546679499, "grad_norm": 1.3224972486495972, "learning_rate": 5.507715046050228e-05, "loss": 0.916, "step": 33695 }, { "epoch": 1.2974013474494708, "grad_norm": 1.774021029472351, "learning_rate": 5.505013593990197e-05, "loss": 0.9424, "step": 33700 }, { "epoch": 1.2975938402309914, "grad_norm": 0.8528000712394714, "learning_rate": 5.502312552966892e-05, "loss": 0.7671, "step": 33705 }, { "epoch": 1.2977863330125121, "grad_norm": 2.2666587829589844, "learning_rate": 5.49961192322731e-05, "loss": 0.8172, "step": 33710 }, { "epoch": 1.2979788257940328, "grad_norm": 1.4332960844039917, "learning_rate": 5.496911705018404e-05, "loss": 0.9298, "step": 33715 }, { "epoch": 1.2981713185755535, "grad_norm": 2.359644889831543, "learning_rate": 5.494211898587094e-05, "loss": 0.9154, "step": 33720 }, { "epoch": 1.2983638113570741, "grad_norm": 1.5822657346725464, "learning_rate": 5.491512504180261e-05, "loss": 0.8183, "step": 33725 }, { "epoch": 1.2985563041385948, "grad_norm": 1.935848593711853, "learning_rate": 5.488813522044739e-05, "loss": 0.813, "step": 33730 }, { "epoch": 1.2987487969201155, "grad_norm": 2.000776529312134, "learning_rate": 5.486114952427337e-05, "loss": 0.7426, "step": 33735 }, { "epoch": 1.2989412897016361, "grad_norm": 1.2055097818374634, "learning_rate": 5.4834167955748204e-05, "loss": 0.82, "step": 33740 }, { "epoch": 1.2991337824831568, "grad_norm": 1.9020949602127075, "learning_rate": 5.480719051733918e-05, "loss": 0.788, "step": 33745 }, { "epoch": 1.2993262752646775, "grad_norm": 1.5985333919525146, "learning_rate": 5.478021721151323e-05, "loss": 0.7221, "step": 33750 }, { "epoch": 1.2995187680461981, "grad_norm": 1.5660638809204102, "learning_rate": 5.475324804073687e-05, "loss": 0.8852, "step": 33755 }, { "epoch": 1.299711260827719, "grad_norm": 1.3215242624282837, "learning_rate": 5.4726283007476196e-05, "loss": 0.7963, "step": 33760 }, { "epoch": 1.2999037536092397, "grad_norm": 1.6176220178604126, "learning_rate": 5.4699322114197084e-05, "loss": 0.8704, "step": 33765 }, { "epoch": 1.3000962463907604, "grad_norm": 1.5978866815567017, "learning_rate": 5.4672365363364855e-05, "loss": 0.8481, "step": 33770 }, { "epoch": 1.300288739172281, "grad_norm": 0.9393962621688843, "learning_rate": 5.4645412757444525e-05, "loss": 0.7539, "step": 33775 }, { "epoch": 1.3004812319538017, "grad_norm": 1.5960298776626587, "learning_rate": 5.461846429890077e-05, "loss": 0.8225, "step": 33780 }, { "epoch": 1.3006737247353224, "grad_norm": 1.2632452249526978, "learning_rate": 5.459151999019787e-05, "loss": 0.8851, "step": 33785 }, { "epoch": 1.300866217516843, "grad_norm": 0.8271371722221375, "learning_rate": 5.456457983379957e-05, "loss": 0.8399, "step": 33790 }, { "epoch": 1.3010587102983637, "grad_norm": 1.2971397638320923, "learning_rate": 5.453764383216955e-05, "loss": 0.7467, "step": 33795 }, { "epoch": 1.3012512030798846, "grad_norm": 1.0965392589569092, "learning_rate": 5.45107119877708e-05, "loss": 0.7249, "step": 33800 }, { "epoch": 1.3014436958614053, "grad_norm": 1.258906364440918, "learning_rate": 5.4483784303066096e-05, "loss": 0.8473, "step": 33805 }, { "epoch": 1.301636188642926, "grad_norm": 1.6706708669662476, "learning_rate": 5.44568607805178e-05, "loss": 0.8842, "step": 33810 }, { "epoch": 1.3018286814244466, "grad_norm": 1.3727566003799438, "learning_rate": 5.442994142258794e-05, "loss": 0.8538, "step": 33815 }, { "epoch": 1.3020211742059673, "grad_norm": 0.9679449796676636, "learning_rate": 5.440302623173801e-05, "loss": 0.739, "step": 33820 }, { "epoch": 1.302213666987488, "grad_norm": 1.0687127113342285, "learning_rate": 5.437611521042929e-05, "loss": 0.665, "step": 33825 }, { "epoch": 1.3024061597690086, "grad_norm": 1.3849055767059326, "learning_rate": 5.4349208361122604e-05, "loss": 0.9492, "step": 33830 }, { "epoch": 1.3025986525505293, "grad_norm": 2.082472562789917, "learning_rate": 5.4322305686278386e-05, "loss": 0.8852, "step": 33835 }, { "epoch": 1.30279114533205, "grad_norm": 1.0276436805725098, "learning_rate": 5.4295407188356784e-05, "loss": 0.7791, "step": 33840 }, { "epoch": 1.3029836381135707, "grad_norm": 1.8117910623550415, "learning_rate": 5.426851286981738e-05, "loss": 1.0149, "step": 33845 }, { "epoch": 1.3031761308950913, "grad_norm": 1.362642526626587, "learning_rate": 5.4241622733119545e-05, "loss": 0.8697, "step": 33850 }, { "epoch": 1.303368623676612, "grad_norm": 1.1220524311065674, "learning_rate": 5.421473678072217e-05, "loss": 0.7154, "step": 33855 }, { "epoch": 1.3035611164581329, "grad_norm": 1.0112050771713257, "learning_rate": 5.4187855015083875e-05, "loss": 0.7251, "step": 33860 }, { "epoch": 1.3037536092396536, "grad_norm": 1.8056666851043701, "learning_rate": 5.4160977438662665e-05, "loss": 1.0671, "step": 33865 }, { "epoch": 1.3039461020211742, "grad_norm": 1.237334966659546, "learning_rate": 5.41341040539165e-05, "loss": 0.7798, "step": 33870 }, { "epoch": 1.304138594802695, "grad_norm": 1.337863802909851, "learning_rate": 5.410723486330265e-05, "loss": 0.8027, "step": 33875 }, { "epoch": 1.3043310875842156, "grad_norm": 1.7887320518493652, "learning_rate": 5.408036986927816e-05, "loss": 0.8194, "step": 33880 }, { "epoch": 1.3045235803657362, "grad_norm": 1.657067894935608, "learning_rate": 5.405350907429965e-05, "loss": 0.9871, "step": 33885 }, { "epoch": 1.304716073147257, "grad_norm": 1.5351759195327759, "learning_rate": 5.40266524808234e-05, "loss": 0.9261, "step": 33890 }, { "epoch": 1.3049085659287778, "grad_norm": 1.6100406646728516, "learning_rate": 5.399980009130516e-05, "loss": 0.8397, "step": 33895 }, { "epoch": 1.3051010587102985, "grad_norm": 1.196283221244812, "learning_rate": 5.397295190820058e-05, "loss": 0.832, "step": 33900 }, { "epoch": 1.3052935514918191, "grad_norm": 1.709205150604248, "learning_rate": 5.3946107933964576e-05, "loss": 0.9753, "step": 33905 }, { "epoch": 1.3054860442733398, "grad_norm": 1.0625784397125244, "learning_rate": 5.391926817105194e-05, "loss": 0.7821, "step": 33910 }, { "epoch": 1.3056785370548605, "grad_norm": 1.3273873329162598, "learning_rate": 5.389243262191697e-05, "loss": 0.9206, "step": 33915 }, { "epoch": 1.3058710298363811, "grad_norm": 2.023766040802002, "learning_rate": 5.3865601289013636e-05, "loss": 0.8539, "step": 33920 }, { "epoch": 1.3060635226179018, "grad_norm": 1.1720694303512573, "learning_rate": 5.383877417479541e-05, "loss": 0.8685, "step": 33925 }, { "epoch": 1.3062560153994225, "grad_norm": 1.6969107389450073, "learning_rate": 5.38119512817155e-05, "loss": 0.8275, "step": 33930 }, { "epoch": 1.3064485081809432, "grad_norm": 1.802259922027588, "learning_rate": 5.378513261222669e-05, "loss": 0.8782, "step": 33935 }, { "epoch": 1.3066410009624638, "grad_norm": 0.5480902791023254, "learning_rate": 5.375831816878134e-05, "loss": 0.8165, "step": 33940 }, { "epoch": 1.3068334937439845, "grad_norm": 2.5565297603607178, "learning_rate": 5.3731507953831514e-05, "loss": 0.8427, "step": 33945 }, { "epoch": 1.3070259865255052, "grad_norm": 1.1750694513320923, "learning_rate": 5.3704701969828754e-05, "loss": 0.9518, "step": 33950 }, { "epoch": 1.307218479307026, "grad_norm": 0.9000421762466431, "learning_rate": 5.367790021922431e-05, "loss": 0.7622, "step": 33955 }, { "epoch": 1.3074109720885467, "grad_norm": 1.3734017610549927, "learning_rate": 5.365110270446904e-05, "loss": 0.835, "step": 33960 }, { "epoch": 1.3076034648700674, "grad_norm": 1.1932064294815063, "learning_rate": 5.362430942801343e-05, "loss": 0.8661, "step": 33965 }, { "epoch": 1.307795957651588, "grad_norm": 1.3349897861480713, "learning_rate": 5.3597520392307434e-05, "loss": 0.7571, "step": 33970 }, { "epoch": 1.3079884504331087, "grad_norm": 1.5428650379180908, "learning_rate": 5.357073559980089e-05, "loss": 0.7853, "step": 33975 }, { "epoch": 1.3081809432146294, "grad_norm": 1.3346575498580933, "learning_rate": 5.354395505294297e-05, "loss": 0.7985, "step": 33980 }, { "epoch": 1.30837343599615, "grad_norm": 1.840759038925171, "learning_rate": 5.351717875418263e-05, "loss": 0.9832, "step": 33985 }, { "epoch": 1.308565928777671, "grad_norm": 1.7884091138839722, "learning_rate": 5.349040670596835e-05, "loss": 0.8081, "step": 33990 }, { "epoch": 1.3087584215591916, "grad_norm": 1.082604169845581, "learning_rate": 5.346363891074833e-05, "loss": 0.7615, "step": 33995 }, { "epoch": 1.3089509143407123, "grad_norm": 1.071516990661621, "learning_rate": 5.3436875370970176e-05, "loss": 0.9025, "step": 34000 }, { "epoch": 1.309143407122233, "grad_norm": 0.9769271612167358, "learning_rate": 5.3410116089081394e-05, "loss": 0.7979, "step": 34005 }, { "epoch": 1.3093358999037537, "grad_norm": 1.50300133228302, "learning_rate": 5.3383361067528795e-05, "loss": 0.9068, "step": 34010 }, { "epoch": 1.3095283926852743, "grad_norm": 1.6212974786758423, "learning_rate": 5.335661030875909e-05, "loss": 0.8189, "step": 34015 }, { "epoch": 1.309720885466795, "grad_norm": 1.2834120988845825, "learning_rate": 5.3329863815218354e-05, "loss": 0.9268, "step": 34020 }, { "epoch": 1.3099133782483157, "grad_norm": 1.2864872217178345, "learning_rate": 5.33031215893524e-05, "loss": 0.8804, "step": 34025 }, { "epoch": 1.3101058710298363, "grad_norm": 1.1591218709945679, "learning_rate": 5.3276383633606656e-05, "loss": 0.9498, "step": 34030 }, { "epoch": 1.310298363811357, "grad_norm": 1.4199464321136475, "learning_rate": 5.324964995042614e-05, "loss": 0.7704, "step": 34035 }, { "epoch": 1.3104908565928777, "grad_norm": 1.1240346431732178, "learning_rate": 5.322292054225539e-05, "loss": 0.846, "step": 34040 }, { "epoch": 1.3106833493743983, "grad_norm": 1.1645770072937012, "learning_rate": 5.3196195411538706e-05, "loss": 0.8291, "step": 34045 }, { "epoch": 1.3108758421559192, "grad_norm": 1.1007463932037354, "learning_rate": 5.316947456071994e-05, "loss": 0.7795, "step": 34050 }, { "epoch": 1.31106833493744, "grad_norm": 1.4447979927062988, "learning_rate": 5.314275799224243e-05, "loss": 0.8513, "step": 34055 }, { "epoch": 1.3112608277189606, "grad_norm": 1.0073038339614868, "learning_rate": 5.311604570854938e-05, "loss": 0.7007, "step": 34060 }, { "epoch": 1.3114533205004812, "grad_norm": 1.1897684335708618, "learning_rate": 5.308933771208332e-05, "loss": 0.8345, "step": 34065 }, { "epoch": 1.311645813282002, "grad_norm": 0.9960253834724426, "learning_rate": 5.3062634005286586e-05, "loss": 0.7782, "step": 34070 }, { "epoch": 1.3118383060635226, "grad_norm": 1.1287857294082642, "learning_rate": 5.303593459060103e-05, "loss": 0.8248, "step": 34075 }, { "epoch": 1.3120307988450433, "grad_norm": 1.133621096611023, "learning_rate": 5.300923947046819e-05, "loss": 0.7083, "step": 34080 }, { "epoch": 1.312223291626564, "grad_norm": 0.9852873086929321, "learning_rate": 5.2982548647329034e-05, "loss": 0.8222, "step": 34085 }, { "epoch": 1.3124157844080848, "grad_norm": 1.1880803108215332, "learning_rate": 5.295586212362442e-05, "loss": 1.0247, "step": 34090 }, { "epoch": 1.3126082771896055, "grad_norm": 2.150893449783325, "learning_rate": 5.292917990179453e-05, "loss": 0.8991, "step": 34095 }, { "epoch": 1.3128007699711262, "grad_norm": 1.5206340551376343, "learning_rate": 5.290250198427934e-05, "loss": 0.8353, "step": 34100 }, { "epoch": 1.3129932627526468, "grad_norm": 1.733988642692566, "learning_rate": 5.2875828373518344e-05, "loss": 0.8553, "step": 34105 }, { "epoch": 1.3131857555341675, "grad_norm": 2.8545048236846924, "learning_rate": 5.2849159071950716e-05, "loss": 0.835, "step": 34110 }, { "epoch": 1.3133782483156882, "grad_norm": 1.2156729698181152, "learning_rate": 5.282249408201505e-05, "loss": 0.8605, "step": 34115 }, { "epoch": 1.3135707410972088, "grad_norm": 1.5454976558685303, "learning_rate": 5.2795833406149876e-05, "loss": 0.7758, "step": 34120 }, { "epoch": 1.3137632338787295, "grad_norm": 1.7499946355819702, "learning_rate": 5.276917704679299e-05, "loss": 0.8126, "step": 34125 }, { "epoch": 1.3139557266602502, "grad_norm": 1.7527188062667847, "learning_rate": 5.2742525006381994e-05, "loss": 0.8095, "step": 34130 }, { "epoch": 1.3141482194417708, "grad_norm": 1.1449981927871704, "learning_rate": 5.271587728735402e-05, "loss": 0.9137, "step": 34135 }, { "epoch": 1.3143407122232915, "grad_norm": 1.3999851942062378, "learning_rate": 5.268923389214588e-05, "loss": 0.8175, "step": 34140 }, { "epoch": 1.3145332050048122, "grad_norm": 1.119730830192566, "learning_rate": 5.2662594823193865e-05, "loss": 0.9228, "step": 34145 }, { "epoch": 1.314725697786333, "grad_norm": 0.8965536952018738, "learning_rate": 5.263596008293398e-05, "loss": 0.8623, "step": 34150 }, { "epoch": 1.3149181905678538, "grad_norm": 1.6485599279403687, "learning_rate": 5.260932967380178e-05, "loss": 0.7832, "step": 34155 }, { "epoch": 1.3151106833493744, "grad_norm": 1.0954639911651611, "learning_rate": 5.2582703598232444e-05, "loss": 0.7878, "step": 34160 }, { "epoch": 1.315303176130895, "grad_norm": 1.3316006660461426, "learning_rate": 5.255608185866079e-05, "loss": 0.8128, "step": 34165 }, { "epoch": 1.3154956689124158, "grad_norm": 0.8699563145637512, "learning_rate": 5.252946445752113e-05, "loss": 0.8295, "step": 34170 }, { "epoch": 1.3156881616939364, "grad_norm": 1.7148654460906982, "learning_rate": 5.2502851397247476e-05, "loss": 0.9354, "step": 34175 }, { "epoch": 1.315880654475457, "grad_norm": 1.602002501487732, "learning_rate": 5.247624268027342e-05, "loss": 0.6457, "step": 34180 }, { "epoch": 1.316073147256978, "grad_norm": 0.8863468170166016, "learning_rate": 5.24496383090322e-05, "loss": 0.8983, "step": 34185 }, { "epoch": 1.3162656400384987, "grad_norm": 1.1766276359558105, "learning_rate": 5.242303828595649e-05, "loss": 0.7426, "step": 34190 }, { "epoch": 1.3164581328200193, "grad_norm": 1.0617400407791138, "learning_rate": 5.2396442613478825e-05, "loss": 0.8074, "step": 34195 }, { "epoch": 1.31665062560154, "grad_norm": 1.5441138744354248, "learning_rate": 5.236985129403112e-05, "loss": 0.7365, "step": 34200 }, { "epoch": 1.3168431183830607, "grad_norm": 1.1525336503982544, "learning_rate": 5.234326433004497e-05, "loss": 0.8009, "step": 34205 }, { "epoch": 1.3170356111645813, "grad_norm": 1.6178624629974365, "learning_rate": 5.231668172395161e-05, "loss": 0.8423, "step": 34210 }, { "epoch": 1.317228103946102, "grad_norm": 0.7218144536018372, "learning_rate": 5.229010347818187e-05, "loss": 0.5935, "step": 34215 }, { "epoch": 1.3174205967276227, "grad_norm": 1.203621506690979, "learning_rate": 5.226352959516605e-05, "loss": 0.812, "step": 34220 }, { "epoch": 1.3176130895091434, "grad_norm": 1.475656509399414, "learning_rate": 5.2236960077334296e-05, "loss": 1.0383, "step": 34225 }, { "epoch": 1.317805582290664, "grad_norm": 1.2111153602600098, "learning_rate": 5.2210394927116105e-05, "loss": 0.8406, "step": 34230 }, { "epoch": 1.3179980750721847, "grad_norm": 2.3121533393859863, "learning_rate": 5.21838341469407e-05, "loss": 0.8353, "step": 34235 }, { "epoch": 1.3181905678537054, "grad_norm": 1.2453131675720215, "learning_rate": 5.215727773923693e-05, "loss": 0.8723, "step": 34240 }, { "epoch": 1.3183830606352263, "grad_norm": 1.351366639137268, "learning_rate": 5.2130725706433224e-05, "loss": 0.905, "step": 34245 }, { "epoch": 1.318575553416747, "grad_norm": 1.8964182138442993, "learning_rate": 5.21041780509575e-05, "loss": 0.9963, "step": 34250 }, { "epoch": 1.3187680461982676, "grad_norm": 1.1896933317184448, "learning_rate": 5.2077634775237415e-05, "loss": 0.8957, "step": 34255 }, { "epoch": 1.3189605389797883, "grad_norm": 2.013298511505127, "learning_rate": 5.205109588170016e-05, "loss": 0.8777, "step": 34260 }, { "epoch": 1.319153031761309, "grad_norm": 1.3011314868927002, "learning_rate": 5.202456137277256e-05, "loss": 1.0396, "step": 34265 }, { "epoch": 1.3193455245428296, "grad_norm": 1.5230119228363037, "learning_rate": 5.199803125088107e-05, "loss": 1.0502, "step": 34270 }, { "epoch": 1.3195380173243503, "grad_norm": 1.011749505996704, "learning_rate": 5.197150551845155e-05, "loss": 0.7362, "step": 34275 }, { "epoch": 1.3197305101058712, "grad_norm": 1.0205798149108887, "learning_rate": 5.1944984177909765e-05, "loss": 0.8891, "step": 34280 }, { "epoch": 1.3199230028873918, "grad_norm": 1.5326029062271118, "learning_rate": 5.1918467231680815e-05, "loss": 0.7624, "step": 34285 }, { "epoch": 1.3201154956689125, "grad_norm": 1.5170354843139648, "learning_rate": 5.1891954682189505e-05, "loss": 0.8149, "step": 34290 }, { "epoch": 1.3203079884504332, "grad_norm": 0.9941301345825195, "learning_rate": 5.186544653186026e-05, "loss": 0.9011, "step": 34295 }, { "epoch": 1.3205004812319538, "grad_norm": 3.1168370246887207, "learning_rate": 5.183894278311712e-05, "loss": 0.9253, "step": 34300 }, { "epoch": 1.3206929740134745, "grad_norm": 0.9092430472373962, "learning_rate": 5.181244343838353e-05, "loss": 0.8621, "step": 34305 }, { "epoch": 1.3208854667949952, "grad_norm": 0.8857255578041077, "learning_rate": 5.178594850008286e-05, "loss": 0.7869, "step": 34310 }, { "epoch": 1.3210779595765159, "grad_norm": 0.8203316330909729, "learning_rate": 5.175945797063777e-05, "loss": 0.9274, "step": 34315 }, { "epoch": 1.3212704523580365, "grad_norm": 1.180347204208374, "learning_rate": 5.173297185247068e-05, "loss": 0.7436, "step": 34320 }, { "epoch": 1.3214629451395572, "grad_norm": 2.1619889736175537, "learning_rate": 5.170649014800358e-05, "loss": 0.7501, "step": 34325 }, { "epoch": 1.3216554379210779, "grad_norm": 1.5437921285629272, "learning_rate": 5.168001285965808e-05, "loss": 0.8124, "step": 34330 }, { "epoch": 1.3218479307025985, "grad_norm": 1.1066104173660278, "learning_rate": 5.165353998985523e-05, "loss": 0.8066, "step": 34335 }, { "epoch": 1.3220404234841192, "grad_norm": 0.8616192936897278, "learning_rate": 5.162707154101597e-05, "loss": 0.7576, "step": 34340 }, { "epoch": 1.32223291626564, "grad_norm": 0.889975368976593, "learning_rate": 5.160060751556053e-05, "loss": 0.7436, "step": 34345 }, { "epoch": 1.3224254090471608, "grad_norm": 2.0296168327331543, "learning_rate": 5.157414791590891e-05, "loss": 0.6742, "step": 34350 }, { "epoch": 1.3226179018286814, "grad_norm": 1.0081722736358643, "learning_rate": 5.154769274448068e-05, "loss": 0.7889, "step": 34355 }, { "epoch": 1.3228103946102021, "grad_norm": 1.0939741134643555, "learning_rate": 5.152124200369503e-05, "loss": 0.8367, "step": 34360 }, { "epoch": 1.3230028873917228, "grad_norm": 1.5666990280151367, "learning_rate": 5.14947956959706e-05, "loss": 0.8282, "step": 34365 }, { "epoch": 1.3231953801732435, "grad_norm": 1.164833903312683, "learning_rate": 5.146835382372579e-05, "loss": 0.8148, "step": 34370 }, { "epoch": 1.3233878729547641, "grad_norm": 1.489606499671936, "learning_rate": 5.144191638937854e-05, "loss": 0.8357, "step": 34375 }, { "epoch": 1.323580365736285, "grad_norm": 1.4213351011276245, "learning_rate": 5.1415483395346356e-05, "loss": 0.9297, "step": 34380 }, { "epoch": 1.3237728585178057, "grad_norm": 1.414014220237732, "learning_rate": 5.138905484404641e-05, "loss": 0.9808, "step": 34385 }, { "epoch": 1.3239653512993264, "grad_norm": 1.255434274673462, "learning_rate": 5.136263073789536e-05, "loss": 0.8107, "step": 34390 }, { "epoch": 1.324157844080847, "grad_norm": 1.3263583183288574, "learning_rate": 5.133621107930951e-05, "loss": 0.837, "step": 34395 }, { "epoch": 1.3243503368623677, "grad_norm": 1.525387167930603, "learning_rate": 5.1309795870704815e-05, "loss": 0.8036, "step": 34400 }, { "epoch": 1.3245428296438884, "grad_norm": 0.9029211401939392, "learning_rate": 5.128338511449676e-05, "loss": 0.9465, "step": 34405 }, { "epoch": 1.324735322425409, "grad_norm": 1.5629209280014038, "learning_rate": 5.1256978813100354e-05, "loss": 0.8984, "step": 34410 }, { "epoch": 1.3249278152069297, "grad_norm": 1.4381479024887085, "learning_rate": 5.123057696893042e-05, "loss": 0.7762, "step": 34415 }, { "epoch": 1.3251203079884504, "grad_norm": 1.8477442264556885, "learning_rate": 5.1204179584401115e-05, "loss": 0.8943, "step": 34420 }, { "epoch": 1.325312800769971, "grad_norm": 1.091605305671692, "learning_rate": 5.117778666192634e-05, "loss": 0.7989, "step": 34425 }, { "epoch": 1.3255052935514917, "grad_norm": 1.6211384534835815, "learning_rate": 5.1151398203919564e-05, "loss": 0.7188, "step": 34430 }, { "epoch": 1.3256977863330124, "grad_norm": 1.434945821762085, "learning_rate": 5.1125014212793854e-05, "loss": 0.8346, "step": 34435 }, { "epoch": 1.3258902791145333, "grad_norm": 1.272206425666809, "learning_rate": 5.1098634690961765e-05, "loss": 0.8576, "step": 34440 }, { "epoch": 1.326082771896054, "grad_norm": 0.9219827055931091, "learning_rate": 5.107225964083566e-05, "loss": 0.7725, "step": 34445 }, { "epoch": 1.3262752646775746, "grad_norm": 1.1530919075012207, "learning_rate": 5.1045889064827255e-05, "loss": 0.9481, "step": 34450 }, { "epoch": 1.3264677574590953, "grad_norm": 0.9978432655334473, "learning_rate": 5.101952296534802e-05, "loss": 0.8516, "step": 34455 }, { "epoch": 1.326660250240616, "grad_norm": 1.018656611442566, "learning_rate": 5.0993161344808924e-05, "loss": 0.9756, "step": 34460 }, { "epoch": 1.3268527430221366, "grad_norm": 1.7046258449554443, "learning_rate": 5.0966804205620635e-05, "loss": 1.018, "step": 34465 }, { "epoch": 1.3270452358036573, "grad_norm": 1.1494272947311401, "learning_rate": 5.094045155019325e-05, "loss": 0.9536, "step": 34470 }, { "epoch": 1.3272377285851782, "grad_norm": 2.197441339492798, "learning_rate": 5.0914103380936564e-05, "loss": 0.7673, "step": 34475 }, { "epoch": 1.3274302213666989, "grad_norm": 1.758847713470459, "learning_rate": 5.0887759700259965e-05, "loss": 0.8132, "step": 34480 }, { "epoch": 1.3276227141482195, "grad_norm": 1.2169286012649536, "learning_rate": 5.086142051057241e-05, "loss": 0.7716, "step": 34485 }, { "epoch": 1.3278152069297402, "grad_norm": 1.7917555570602417, "learning_rate": 5.083508581428247e-05, "loss": 0.9653, "step": 34490 }, { "epoch": 1.3280076997112609, "grad_norm": 1.4146066904067993, "learning_rate": 5.080875561379821e-05, "loss": 0.8737, "step": 34495 }, { "epoch": 1.3282001924927815, "grad_norm": 1.2591365575790405, "learning_rate": 5.0782429911527374e-05, "loss": 0.7709, "step": 34500 }, { "epoch": 1.3283926852743022, "grad_norm": 1.0664514303207397, "learning_rate": 5.07561087098773e-05, "loss": 0.8534, "step": 34505 }, { "epoch": 1.3285851780558229, "grad_norm": 2.1911308765411377, "learning_rate": 5.072979201125491e-05, "loss": 0.944, "step": 34510 }, { "epoch": 1.3287776708373435, "grad_norm": 0.8972740769386292, "learning_rate": 5.070347981806657e-05, "loss": 0.7574, "step": 34515 }, { "epoch": 1.3289701636188642, "grad_norm": 1.0241605043411255, "learning_rate": 5.067717213271852e-05, "loss": 0.9743, "step": 34520 }, { "epoch": 1.329162656400385, "grad_norm": 1.0732407569885254, "learning_rate": 5.065086895761628e-05, "loss": 0.7564, "step": 34525 }, { "epoch": 1.3293551491819056, "grad_norm": 1.0235189199447632, "learning_rate": 5.062457029516523e-05, "loss": 0.9152, "step": 34530 }, { "epoch": 1.3295476419634265, "grad_norm": 2.0122148990631104, "learning_rate": 5.059827614777011e-05, "loss": 1.0821, "step": 34535 }, { "epoch": 1.3297401347449471, "grad_norm": 0.8569071292877197, "learning_rate": 5.057198651783538e-05, "loss": 0.7275, "step": 34540 }, { "epoch": 1.3299326275264678, "grad_norm": 1.707134485244751, "learning_rate": 5.0545701407765045e-05, "loss": 0.9075, "step": 34545 }, { "epoch": 1.3301251203079885, "grad_norm": 1.5125768184661865, "learning_rate": 5.051942081996276e-05, "loss": 0.6996, "step": 34550 }, { "epoch": 1.3303176130895091, "grad_norm": 0.8091332316398621, "learning_rate": 5.049314475683158e-05, "loss": 0.6558, "step": 34555 }, { "epoch": 1.3305101058710298, "grad_norm": 1.32486093044281, "learning_rate": 5.046687322077444e-05, "loss": 0.8852, "step": 34560 }, { "epoch": 1.3307025986525505, "grad_norm": 1.4415032863616943, "learning_rate": 5.0440606214193574e-05, "loss": 0.8404, "step": 34565 }, { "epoch": 1.3308950914340711, "grad_norm": 1.403547763824463, "learning_rate": 5.0414343739490975e-05, "loss": 0.7527, "step": 34570 }, { "epoch": 1.331087584215592, "grad_norm": 2.1569442749023438, "learning_rate": 5.038808579906816e-05, "loss": 0.7985, "step": 34575 }, { "epoch": 1.3312800769971127, "grad_norm": 1.6453166007995605, "learning_rate": 5.036183239532629e-05, "loss": 1.0465, "step": 34580 }, { "epoch": 1.3314725697786334, "grad_norm": 1.2388137578964233, "learning_rate": 5.0335583530665985e-05, "loss": 0.8442, "step": 34585 }, { "epoch": 1.331665062560154, "grad_norm": 1.0725327730178833, "learning_rate": 5.0309339207487574e-05, "loss": 0.7288, "step": 34590 }, { "epoch": 1.3318575553416747, "grad_norm": 2.725146532058716, "learning_rate": 5.028309942819091e-05, "loss": 0.89, "step": 34595 }, { "epoch": 1.3320500481231954, "grad_norm": 0.9890113472938538, "learning_rate": 5.025686419517548e-05, "loss": 0.7072, "step": 34600 }, { "epoch": 1.332242540904716, "grad_norm": 1.8415369987487793, "learning_rate": 5.023063351084033e-05, "loss": 0.8123, "step": 34605 }, { "epoch": 1.3324350336862367, "grad_norm": 0.9944572448730469, "learning_rate": 5.020440737758401e-05, "loss": 0.9136, "step": 34610 }, { "epoch": 1.3326275264677574, "grad_norm": 1.131773591041565, "learning_rate": 5.017818579780478e-05, "loss": 0.8766, "step": 34615 }, { "epoch": 1.332820019249278, "grad_norm": 1.1717694997787476, "learning_rate": 5.01519687739004e-05, "loss": 0.8695, "step": 34620 }, { "epoch": 1.3330125120307987, "grad_norm": 2.2559797763824463, "learning_rate": 5.0125756308268324e-05, "loss": 0.7504, "step": 34625 }, { "epoch": 1.3332050048123194, "grad_norm": 1.2070955038070679, "learning_rate": 5.0099548403305354e-05, "loss": 0.7427, "step": 34630 }, { "epoch": 1.3333974975938403, "grad_norm": 1.3688490390777588, "learning_rate": 5.0073345061408205e-05, "loss": 0.8539, "step": 34635 }, { "epoch": 1.333589990375361, "grad_norm": 0.8127152919769287, "learning_rate": 5.004714628497288e-05, "loss": 0.8662, "step": 34640 }, { "epoch": 1.3337824831568816, "grad_norm": 1.9359740018844604, "learning_rate": 5.0020952076395124e-05, "loss": 0.9428, "step": 34645 }, { "epoch": 1.3339749759384023, "grad_norm": 2.281846523284912, "learning_rate": 4.999476243807021e-05, "loss": 0.9462, "step": 34650 }, { "epoch": 1.334167468719923, "grad_norm": 0.9672248959541321, "learning_rate": 4.996857737239305e-05, "loss": 0.6946, "step": 34655 }, { "epoch": 1.3343599615014436, "grad_norm": 1.2799361944198608, "learning_rate": 4.994239688175799e-05, "loss": 0.8645, "step": 34660 }, { "epoch": 1.3345524542829643, "grad_norm": 1.4629347324371338, "learning_rate": 4.991622096855923e-05, "loss": 0.7476, "step": 34665 }, { "epoch": 1.3347449470644852, "grad_norm": 1.2526726722717285, "learning_rate": 4.9890049635190216e-05, "loss": 0.6983, "step": 34670 }, { "epoch": 1.3349374398460059, "grad_norm": 1.9672549962997437, "learning_rate": 4.9863882884044234e-05, "loss": 0.9105, "step": 34675 }, { "epoch": 1.3351299326275265, "grad_norm": 1.3764963150024414, "learning_rate": 4.983772071751405e-05, "loss": 0.8268, "step": 34680 }, { "epoch": 1.3353224254090472, "grad_norm": 1.1114428043365479, "learning_rate": 4.9811563137992036e-05, "loss": 0.6683, "step": 34685 }, { "epoch": 1.335514918190568, "grad_norm": 1.1706123352050781, "learning_rate": 4.978541014787006e-05, "loss": 0.8398, "step": 34690 }, { "epoch": 1.3357074109720886, "grad_norm": 1.0030032396316528, "learning_rate": 4.9759261749539695e-05, "loss": 0.7736, "step": 34695 }, { "epoch": 1.3358999037536092, "grad_norm": 2.052016258239746, "learning_rate": 4.9733117945392026e-05, "loss": 0.9667, "step": 34700 }, { "epoch": 1.33609239653513, "grad_norm": 2.7888071537017822, "learning_rate": 4.970697873781774e-05, "loss": 1.0169, "step": 34705 }, { "epoch": 1.3362848893166506, "grad_norm": 1.691266655921936, "learning_rate": 4.968084412920712e-05, "loss": 0.6926, "step": 34710 }, { "epoch": 1.3364773820981712, "grad_norm": 1.3733407258987427, "learning_rate": 4.965471412194993e-05, "loss": 0.7997, "step": 34715 }, { "epoch": 1.336669874879692, "grad_norm": 2.068976879119873, "learning_rate": 4.9628588718435634e-05, "loss": 0.9425, "step": 34720 }, { "epoch": 1.3368623676612126, "grad_norm": 1.9062495231628418, "learning_rate": 4.960246792105322e-05, "loss": 0.8659, "step": 34725 }, { "epoch": 1.3370548604427335, "grad_norm": 1.3141814470291138, "learning_rate": 4.957635173219129e-05, "loss": 0.7289, "step": 34730 }, { "epoch": 1.3372473532242541, "grad_norm": 0.9774174094200134, "learning_rate": 4.955024015423789e-05, "loss": 0.7244, "step": 34735 }, { "epoch": 1.3374398460057748, "grad_norm": 1.086158037185669, "learning_rate": 4.952413318958092e-05, "loss": 0.9397, "step": 34740 }, { "epoch": 1.3376323387872955, "grad_norm": 1.092448353767395, "learning_rate": 4.9498030840607547e-05, "loss": 0.7571, "step": 34745 }, { "epoch": 1.3378248315688162, "grad_norm": 1.8962229490280151, "learning_rate": 4.947193310970471e-05, "loss": 0.8264, "step": 34750 }, { "epoch": 1.3380173243503368, "grad_norm": 1.5247009992599487, "learning_rate": 4.944583999925888e-05, "loss": 0.7856, "step": 34755 }, { "epoch": 1.3382098171318575, "grad_norm": 1.6550134420394897, "learning_rate": 4.941975151165613e-05, "loss": 1.024, "step": 34760 }, { "epoch": 1.3384023099133784, "grad_norm": 1.2675317525863647, "learning_rate": 4.939366764928196e-05, "loss": 0.8908, "step": 34765 }, { "epoch": 1.338594802694899, "grad_norm": 1.3318907022476196, "learning_rate": 4.9367588414521714e-05, "loss": 0.7813, "step": 34770 }, { "epoch": 1.3387872954764197, "grad_norm": 1.605064034461975, "learning_rate": 4.934151380976007e-05, "loss": 0.9145, "step": 34775 }, { "epoch": 1.3389797882579404, "grad_norm": 1.6478382349014282, "learning_rate": 4.9315443837381417e-05, "loss": 1.0189, "step": 34780 }, { "epoch": 1.339172281039461, "grad_norm": 1.617727279663086, "learning_rate": 4.9289378499769655e-05, "loss": 0.7663, "step": 34785 }, { "epoch": 1.3393647738209817, "grad_norm": 2.157989025115967, "learning_rate": 4.9263317799308305e-05, "loss": 0.9308, "step": 34790 }, { "epoch": 1.3395572666025024, "grad_norm": 1.5868369340896606, "learning_rate": 4.923726173838048e-05, "loss": 1.0234, "step": 34795 }, { "epoch": 1.339749759384023, "grad_norm": 1.0689306259155273, "learning_rate": 4.921121031936876e-05, "loss": 0.9376, "step": 34800 }, { "epoch": 1.3399422521655437, "grad_norm": 1.227166771888733, "learning_rate": 4.918516354465541e-05, "loss": 0.9385, "step": 34805 }, { "epoch": 1.3401347449470644, "grad_norm": 0.9989410638809204, "learning_rate": 4.915912141662225e-05, "loss": 0.865, "step": 34810 }, { "epoch": 1.340327237728585, "grad_norm": 1.1852025985717773, "learning_rate": 4.913308393765066e-05, "loss": 0.9824, "step": 34815 }, { "epoch": 1.3405197305101058, "grad_norm": 1.3979310989379883, "learning_rate": 4.910705111012153e-05, "loss": 0.9297, "step": 34820 }, { "epoch": 1.3407122232916264, "grad_norm": 1.6673941612243652, "learning_rate": 4.90810229364155e-05, "loss": 0.777, "step": 34825 }, { "epoch": 1.3409047160731473, "grad_norm": 1.139458417892456, "learning_rate": 4.9054999418912586e-05, "loss": 0.7229, "step": 34830 }, { "epoch": 1.341097208854668, "grad_norm": 0.8161736726760864, "learning_rate": 4.902898055999249e-05, "loss": 0.7305, "step": 34835 }, { "epoch": 1.3412897016361887, "grad_norm": 1.6782662868499756, "learning_rate": 4.9002966362034464e-05, "loss": 0.8107, "step": 34840 }, { "epoch": 1.3414821944177093, "grad_norm": 1.5833643674850464, "learning_rate": 4.897695682741739e-05, "loss": 0.7968, "step": 34845 }, { "epoch": 1.34167468719923, "grad_norm": 1.409138798713684, "learning_rate": 4.895095195851953e-05, "loss": 0.9225, "step": 34850 }, { "epoch": 1.3418671799807507, "grad_norm": 1.4280591011047363, "learning_rate": 4.892495175771903e-05, "loss": 0.7304, "step": 34855 }, { "epoch": 1.3420596727622713, "grad_norm": 1.0369322299957275, "learning_rate": 4.889895622739331e-05, "loss": 0.884, "step": 34860 }, { "epoch": 1.3422521655437922, "grad_norm": 0.9493328928947449, "learning_rate": 4.887296536991953e-05, "loss": 0.8159, "step": 34865 }, { "epoch": 1.342444658325313, "grad_norm": 1.258876085281372, "learning_rate": 4.884697918767438e-05, "loss": 0.8475, "step": 34870 }, { "epoch": 1.3426371511068336, "grad_norm": 1.2037972211837769, "learning_rate": 4.8820997683034166e-05, "loss": 0.8062, "step": 34875 }, { "epoch": 1.3428296438883542, "grad_norm": 1.7686017751693726, "learning_rate": 4.879502085837461e-05, "loss": 0.8544, "step": 34880 }, { "epoch": 1.343022136669875, "grad_norm": 1.291748285293579, "learning_rate": 4.8769048716071264e-05, "loss": 0.8541, "step": 34885 }, { "epoch": 1.3432146294513956, "grad_norm": 1.2345646619796753, "learning_rate": 4.8743081258499005e-05, "loss": 0.9746, "step": 34890 }, { "epoch": 1.3434071222329163, "grad_norm": 2.3298258781433105, "learning_rate": 4.871711848803241e-05, "loss": 0.9702, "step": 34895 }, { "epoch": 1.343599615014437, "grad_norm": 1.7724379301071167, "learning_rate": 4.869116040704562e-05, "loss": 0.8757, "step": 34900 }, { "epoch": 1.3437921077959576, "grad_norm": 1.737610101699829, "learning_rate": 4.866520701791235e-05, "loss": 0.7819, "step": 34905 }, { "epoch": 1.3439846005774783, "grad_norm": 1.1426078081130981, "learning_rate": 4.863925832300581e-05, "loss": 0.8039, "step": 34910 }, { "epoch": 1.344177093358999, "grad_norm": 1.7456549406051636, "learning_rate": 4.8613314324698855e-05, "loss": 0.899, "step": 34915 }, { "epoch": 1.3443695861405196, "grad_norm": 1.9409685134887695, "learning_rate": 4.8587375025363914e-05, "loss": 0.9121, "step": 34920 }, { "epoch": 1.3445620789220405, "grad_norm": 0.8889964818954468, "learning_rate": 4.856144042737293e-05, "loss": 0.8982, "step": 34925 }, { "epoch": 1.3447545717035612, "grad_norm": 1.207217812538147, "learning_rate": 4.8535510533097516e-05, "loss": 0.9708, "step": 34930 }, { "epoch": 1.3449470644850818, "grad_norm": 1.7888151407241821, "learning_rate": 4.8509585344908705e-05, "loss": 0.7838, "step": 34935 }, { "epoch": 1.3451395572666025, "grad_norm": 1.8165149688720703, "learning_rate": 4.8483664865177226e-05, "loss": 0.8495, "step": 34940 }, { "epoch": 1.3453320500481232, "grad_norm": 1.4321086406707764, "learning_rate": 4.845774909627332e-05, "loss": 0.8093, "step": 34945 }, { "epoch": 1.3455245428296438, "grad_norm": 1.2221522331237793, "learning_rate": 4.843183804056687e-05, "loss": 0.906, "step": 34950 }, { "epoch": 1.3457170356111645, "grad_norm": 1.2078337669372559, "learning_rate": 4.8405931700427145e-05, "loss": 0.7406, "step": 34955 }, { "epoch": 1.3459095283926854, "grad_norm": 1.0744653940200806, "learning_rate": 4.838003007822326e-05, "loss": 0.8133, "step": 34960 }, { "epoch": 1.346102021174206, "grad_norm": 1.2182228565216064, "learning_rate": 4.835413317632363e-05, "loss": 0.8024, "step": 34965 }, { "epoch": 1.3462945139557267, "grad_norm": 1.1697922945022583, "learning_rate": 4.8328240997096406e-05, "loss": 0.9592, "step": 34970 }, { "epoch": 1.3464870067372474, "grad_norm": 1.4923276901245117, "learning_rate": 4.830235354290925e-05, "loss": 0.7919, "step": 34975 }, { "epoch": 1.346679499518768, "grad_norm": 1.7563990354537964, "learning_rate": 4.827647081612944e-05, "loss": 0.8882, "step": 34980 }, { "epoch": 1.3468719923002888, "grad_norm": 2.2947778701782227, "learning_rate": 4.825059281912365e-05, "loss": 0.9188, "step": 34985 }, { "epoch": 1.3470644850818094, "grad_norm": 1.5248280763626099, "learning_rate": 4.822471955425841e-05, "loss": 0.965, "step": 34990 }, { "epoch": 1.34725697786333, "grad_norm": 0.8243402242660522, "learning_rate": 4.819885102389956e-05, "loss": 0.8224, "step": 34995 }, { "epoch": 1.3474494706448508, "grad_norm": 0.7794885039329529, "learning_rate": 4.817298723041264e-05, "loss": 0.7754, "step": 35000 }, { "epoch": 1.3476419634263714, "grad_norm": 1.2712163925170898, "learning_rate": 4.8147128176162695e-05, "loss": 0.8612, "step": 35005 }, { "epoch": 1.347834456207892, "grad_norm": 0.9318971633911133, "learning_rate": 4.8121273863514435e-05, "loss": 0.7837, "step": 35010 }, { "epoch": 1.3480269489894128, "grad_norm": 1.127020001411438, "learning_rate": 4.809542429483197e-05, "loss": 0.7845, "step": 35015 }, { "epoch": 1.3482194417709337, "grad_norm": 1.16558039188385, "learning_rate": 4.806957947247912e-05, "loss": 0.8318, "step": 35020 }, { "epoch": 1.3484119345524543, "grad_norm": 1.0364631414413452, "learning_rate": 4.804373939881922e-05, "loss": 0.9216, "step": 35025 }, { "epoch": 1.348604427333975, "grad_norm": 1.3050929307937622, "learning_rate": 4.801790407621518e-05, "loss": 0.9627, "step": 35030 }, { "epoch": 1.3487969201154957, "grad_norm": 1.1998785734176636, "learning_rate": 4.799207350702949e-05, "loss": 0.884, "step": 35035 }, { "epoch": 1.3489894128970163, "grad_norm": 0.8209556341171265, "learning_rate": 4.796624769362409e-05, "loss": 0.6925, "step": 35040 }, { "epoch": 1.349181905678537, "grad_norm": 2.4278218746185303, "learning_rate": 4.794042663836071e-05, "loss": 0.7594, "step": 35045 }, { "epoch": 1.3493743984600577, "grad_norm": 1.8275748491287231, "learning_rate": 4.791461034360043e-05, "loss": 0.9516, "step": 35050 }, { "epoch": 1.3495668912415784, "grad_norm": 1.21636962890625, "learning_rate": 4.7888798811703985e-05, "loss": 0.9819, "step": 35055 }, { "epoch": 1.3497593840230993, "grad_norm": 1.5131886005401611, "learning_rate": 4.7862992045031684e-05, "loss": 0.955, "step": 35060 }, { "epoch": 1.34995187680462, "grad_norm": 1.186279296875, "learning_rate": 4.7837190045943436e-05, "loss": 0.8632, "step": 35065 }, { "epoch": 1.3501443695861406, "grad_norm": 1.2006460428237915, "learning_rate": 4.7811392816798525e-05, "loss": 0.8132, "step": 35070 }, { "epoch": 1.3503368623676613, "grad_norm": 1.6792789697647095, "learning_rate": 4.7785600359956096e-05, "loss": 0.8756, "step": 35075 }, { "epoch": 1.350529355149182, "grad_norm": 1.5003536939620972, "learning_rate": 4.77598126777746e-05, "loss": 0.7711, "step": 35080 }, { "epoch": 1.3507218479307026, "grad_norm": 1.4191226959228516, "learning_rate": 4.7734029772612165e-05, "loss": 0.7895, "step": 35085 }, { "epoch": 1.3509143407122233, "grad_norm": 1.391128659248352, "learning_rate": 4.7708251646826476e-05, "loss": 0.8829, "step": 35090 }, { "epoch": 1.351106833493744, "grad_norm": 1.1470931768417358, "learning_rate": 4.7682478302774816e-05, "loss": 0.8812, "step": 35095 }, { "epoch": 1.3512993262752646, "grad_norm": 1.3453543186187744, "learning_rate": 4.765670974281386e-05, "loss": 0.9689, "step": 35100 }, { "epoch": 1.3514918190567853, "grad_norm": 1.4172368049621582, "learning_rate": 4.763094596930014e-05, "loss": 0.9129, "step": 35105 }, { "epoch": 1.351684311838306, "grad_norm": 1.895308494567871, "learning_rate": 4.7605186984589456e-05, "loss": 0.8221, "step": 35110 }, { "epoch": 1.3518768046198266, "grad_norm": 1.4038105010986328, "learning_rate": 4.7579432791037335e-05, "loss": 0.9491, "step": 35115 }, { "epoch": 1.3520692974013475, "grad_norm": 1.111763834953308, "learning_rate": 4.755368339099884e-05, "loss": 0.7322, "step": 35120 }, { "epoch": 1.3522617901828682, "grad_norm": 1.7646887302398682, "learning_rate": 4.752793878682861e-05, "loss": 0.8792, "step": 35125 }, { "epoch": 1.3524542829643889, "grad_norm": 1.1743024587631226, "learning_rate": 4.750219898088073e-05, "loss": 0.9847, "step": 35130 }, { "epoch": 1.3526467757459095, "grad_norm": 1.2739955186843872, "learning_rate": 4.7476463975509e-05, "loss": 0.7783, "step": 35135 }, { "epoch": 1.3528392685274302, "grad_norm": 1.317272424697876, "learning_rate": 4.74507337730667e-05, "loss": 0.9875, "step": 35140 }, { "epoch": 1.3530317613089509, "grad_norm": 0.9840208888053894, "learning_rate": 4.74250083759067e-05, "loss": 0.8396, "step": 35145 }, { "epoch": 1.3532242540904715, "grad_norm": 1.4527745246887207, "learning_rate": 4.739928778638143e-05, "loss": 0.8583, "step": 35150 }, { "epoch": 1.3534167468719924, "grad_norm": 1.6825000047683716, "learning_rate": 4.7373572006842806e-05, "loss": 0.9443, "step": 35155 }, { "epoch": 1.353609239653513, "grad_norm": 1.2413737773895264, "learning_rate": 4.734786103964242e-05, "loss": 0.9722, "step": 35160 }, { "epoch": 1.3538017324350338, "grad_norm": 1.2684810161590576, "learning_rate": 4.732215488713133e-05, "loss": 0.7281, "step": 35165 }, { "epoch": 1.3539942252165544, "grad_norm": 1.8992153406143188, "learning_rate": 4.729645355166027e-05, "loss": 0.8221, "step": 35170 }, { "epoch": 1.354186717998075, "grad_norm": 1.6467632055282593, "learning_rate": 4.7270757035579325e-05, "loss": 0.8439, "step": 35175 }, { "epoch": 1.3543792107795958, "grad_norm": 1.0282069444656372, "learning_rate": 4.724506534123843e-05, "loss": 0.8158, "step": 35180 }, { "epoch": 1.3545717035611164, "grad_norm": 1.7379052639007568, "learning_rate": 4.72193784709868e-05, "loss": 0.8245, "step": 35185 }, { "epoch": 1.3547641963426371, "grad_norm": 2.1506638526916504, "learning_rate": 4.719369642717336e-05, "loss": 0.8818, "step": 35190 }, { "epoch": 1.3549566891241578, "grad_norm": 1.284745454788208, "learning_rate": 4.7168019212146576e-05, "loss": 0.8457, "step": 35195 }, { "epoch": 1.3551491819056785, "grad_norm": 0.897658109664917, "learning_rate": 4.71423468282545e-05, "loss": 0.8173, "step": 35200 }, { "epoch": 1.3553416746871991, "grad_norm": 2.404270887374878, "learning_rate": 4.711667927784458e-05, "loss": 0.8396, "step": 35205 }, { "epoch": 1.3555341674687198, "grad_norm": 1.8264572620391846, "learning_rate": 4.7091016563264087e-05, "loss": 0.9482, "step": 35210 }, { "epoch": 1.3557266602502407, "grad_norm": 2.137230396270752, "learning_rate": 4.70653586868596e-05, "loss": 0.8982, "step": 35215 }, { "epoch": 1.3559191530317614, "grad_norm": 1.1221061944961548, "learning_rate": 4.703970565097742e-05, "loss": 0.8983, "step": 35220 }, { "epoch": 1.356111645813282, "grad_norm": 1.6483477354049683, "learning_rate": 4.7014057457963315e-05, "loss": 0.801, "step": 35225 }, { "epoch": 1.3563041385948027, "grad_norm": 1.8643723726272583, "learning_rate": 4.698841411016269e-05, "loss": 1.0251, "step": 35230 }, { "epoch": 1.3564966313763234, "grad_norm": 1.128015398979187, "learning_rate": 4.6962775609920394e-05, "loss": 0.844, "step": 35235 }, { "epoch": 1.356689124157844, "grad_norm": 0.9045073390007019, "learning_rate": 4.693714195958092e-05, "loss": 0.869, "step": 35240 }, { "epoch": 1.3568816169393647, "grad_norm": 1.8441210985183716, "learning_rate": 4.691151316148832e-05, "loss": 0.9395, "step": 35245 }, { "epoch": 1.3570741097208856, "grad_norm": 1.437767505645752, "learning_rate": 4.688588921798616e-05, "loss": 0.6946, "step": 35250 }, { "epoch": 1.3572666025024063, "grad_norm": 1.447969675064087, "learning_rate": 4.68602701314176e-05, "loss": 0.8183, "step": 35255 }, { "epoch": 1.357459095283927, "grad_norm": 2.16900634765625, "learning_rate": 4.68346559041253e-05, "loss": 0.7718, "step": 35260 }, { "epoch": 1.3576515880654476, "grad_norm": 1.2576295137405396, "learning_rate": 4.680904653845152e-05, "loss": 0.8408, "step": 35265 }, { "epoch": 1.3578440808469683, "grad_norm": 1.3232860565185547, "learning_rate": 4.678344203673808e-05, "loss": 0.8454, "step": 35270 }, { "epoch": 1.358036573628489, "grad_norm": 1.5161672830581665, "learning_rate": 4.675784240132638e-05, "loss": 0.8271, "step": 35275 }, { "epoch": 1.3582290664100096, "grad_norm": 1.339381217956543, "learning_rate": 4.6732247634557214e-05, "loss": 0.8369, "step": 35280 }, { "epoch": 1.3584215591915303, "grad_norm": 1.2155077457427979, "learning_rate": 4.670665773877121e-05, "loss": 0.8823, "step": 35285 }, { "epoch": 1.358614051973051, "grad_norm": 1.6625285148620605, "learning_rate": 4.6681072716308285e-05, "loss": 0.9456, "step": 35290 }, { "epoch": 1.3588065447545716, "grad_norm": 0.9515489339828491, "learning_rate": 4.6655492569508056e-05, "loss": 0.889, "step": 35295 }, { "epoch": 1.3589990375360923, "grad_norm": 1.9020076990127563, "learning_rate": 4.662991730070966e-05, "loss": 0.8547, "step": 35300 }, { "epoch": 1.359191530317613, "grad_norm": 1.41860032081604, "learning_rate": 4.660434691225177e-05, "loss": 0.8674, "step": 35305 }, { "epoch": 1.3593840230991339, "grad_norm": 1.2650240659713745, "learning_rate": 4.657878140647265e-05, "loss": 0.834, "step": 35310 }, { "epoch": 1.3595765158806545, "grad_norm": 1.1325031518936157, "learning_rate": 4.655322078571013e-05, "loss": 0.8641, "step": 35315 }, { "epoch": 1.3597690086621752, "grad_norm": 1.4202800989151, "learning_rate": 4.652766505230143e-05, "loss": 0.8613, "step": 35320 }, { "epoch": 1.3599615014436959, "grad_norm": 1.1674113273620605, "learning_rate": 4.650211420858361e-05, "loss": 0.9188, "step": 35325 }, { "epoch": 1.3601539942252165, "grad_norm": 1.0588910579681396, "learning_rate": 4.6476568256893025e-05, "loss": 0.9119, "step": 35330 }, { "epoch": 1.3603464870067372, "grad_norm": 2.010552167892456, "learning_rate": 4.645102719956572e-05, "loss": 0.7566, "step": 35335 }, { "epoch": 1.3605389797882579, "grad_norm": 1.1530786752700806, "learning_rate": 4.6425491038937244e-05, "loss": 0.8898, "step": 35340 }, { "epoch": 1.3607314725697786, "grad_norm": 1.016413927078247, "learning_rate": 4.6399959777342746e-05, "loss": 0.9265, "step": 35345 }, { "epoch": 1.3609239653512994, "grad_norm": 0.9319851398468018, "learning_rate": 4.6374433417116826e-05, "loss": 0.8908, "step": 35350 }, { "epoch": 1.3611164581328201, "grad_norm": 1.6301521062850952, "learning_rate": 4.6348911960593736e-05, "loss": 0.8063, "step": 35355 }, { "epoch": 1.3613089509143408, "grad_norm": 1.0370879173278809, "learning_rate": 4.632339541010726e-05, "loss": 0.8456, "step": 35360 }, { "epoch": 1.3615014436958615, "grad_norm": 1.1396170854568481, "learning_rate": 4.629788376799065e-05, "loss": 0.7626, "step": 35365 }, { "epoch": 1.3616939364773821, "grad_norm": 1.1902436017990112, "learning_rate": 4.6272377036576886e-05, "loss": 0.8911, "step": 35370 }, { "epoch": 1.3618864292589028, "grad_norm": 1.496978521347046, "learning_rate": 4.6246875218198294e-05, "loss": 0.7299, "step": 35375 }, { "epoch": 1.3620789220404235, "grad_norm": 1.5510118007659912, "learning_rate": 4.622137831518688e-05, "loss": 0.7804, "step": 35380 }, { "epoch": 1.3622714148219441, "grad_norm": 1.1670730113983154, "learning_rate": 4.619588632987416e-05, "loss": 0.8348, "step": 35385 }, { "epoch": 1.3624639076034648, "grad_norm": 1.505469799041748, "learning_rate": 4.617039926459127e-05, "loss": 0.7221, "step": 35390 }, { "epoch": 1.3626564003849855, "grad_norm": 1.3715969324111938, "learning_rate": 4.61449171216687e-05, "loss": 0.9265, "step": 35395 }, { "epoch": 1.3628488931665061, "grad_norm": 0.8129710555076599, "learning_rate": 4.611943990343677e-05, "loss": 0.8726, "step": 35400 }, { "epoch": 1.3630413859480268, "grad_norm": 0.745360791683197, "learning_rate": 4.60939676122251e-05, "loss": 0.8237, "step": 35405 }, { "epoch": 1.3632338787295477, "grad_norm": 1.287010908126831, "learning_rate": 4.606850025036299e-05, "loss": 0.8159, "step": 35410 }, { "epoch": 1.3634263715110684, "grad_norm": 1.7442882061004639, "learning_rate": 4.604303782017928e-05, "loss": 0.6109, "step": 35415 }, { "epoch": 1.363618864292589, "grad_norm": 2.1345295906066895, "learning_rate": 4.6017580324002364e-05, "loss": 0.9068, "step": 35420 }, { "epoch": 1.3638113570741097, "grad_norm": 1.987751841545105, "learning_rate": 4.5992127764160054e-05, "loss": 0.932, "step": 35425 }, { "epoch": 1.3640038498556304, "grad_norm": 1.3692355155944824, "learning_rate": 4.5966680142979954e-05, "loss": 0.7202, "step": 35430 }, { "epoch": 1.364196342637151, "grad_norm": 1.3024131059646606, "learning_rate": 4.594123746278899e-05, "loss": 0.9063, "step": 35435 }, { "epoch": 1.3643888354186717, "grad_norm": 1.8773390054702759, "learning_rate": 4.591579972591376e-05, "loss": 0.9294, "step": 35440 }, { "epoch": 1.3645813282001926, "grad_norm": 1.8753331899642944, "learning_rate": 4.589036693468035e-05, "loss": 0.9329, "step": 35445 }, { "epoch": 1.3647738209817133, "grad_norm": 0.8658643364906311, "learning_rate": 4.5864939091414495e-05, "loss": 0.7045, "step": 35450 }, { "epoch": 1.364966313763234, "grad_norm": 1.2768117189407349, "learning_rate": 4.5839516198441304e-05, "loss": 0.9337, "step": 35455 }, { "epoch": 1.3651588065447546, "grad_norm": 1.145644187927246, "learning_rate": 4.581409825808557e-05, "loss": 0.8107, "step": 35460 }, { "epoch": 1.3653512993262753, "grad_norm": 1.1863698959350586, "learning_rate": 4.5788685272671605e-05, "loss": 0.6387, "step": 35465 }, { "epoch": 1.365543792107796, "grad_norm": 1.2812061309814453, "learning_rate": 4.576327724452326e-05, "loss": 0.756, "step": 35470 }, { "epoch": 1.3657362848893166, "grad_norm": 1.6907789707183838, "learning_rate": 4.5737874175963956e-05, "loss": 0.9023, "step": 35475 }, { "epoch": 1.3659287776708373, "grad_norm": 1.9304261207580566, "learning_rate": 4.5712476069316576e-05, "loss": 0.8313, "step": 35480 }, { "epoch": 1.366121270452358, "grad_norm": 1.2450244426727295, "learning_rate": 4.568708292690364e-05, "loss": 0.8494, "step": 35485 }, { "epoch": 1.3663137632338787, "grad_norm": 1.577697515487671, "learning_rate": 4.566169475104717e-05, "loss": 0.8129, "step": 35490 }, { "epoch": 1.3665062560153993, "grad_norm": 2.367910146713257, "learning_rate": 4.56363115440688e-05, "loss": 0.8501, "step": 35495 }, { "epoch": 1.36669874879692, "grad_norm": 1.588900089263916, "learning_rate": 4.561093330828954e-05, "loss": 0.9732, "step": 35500 }, { "epoch": 1.3668912415784409, "grad_norm": 1.3308700323104858, "learning_rate": 4.558556004603019e-05, "loss": 0.8681, "step": 35505 }, { "epoch": 1.3670837343599616, "grad_norm": 1.5678372383117676, "learning_rate": 4.556019175961091e-05, "loss": 0.8269, "step": 35510 }, { "epoch": 1.3672762271414822, "grad_norm": 1.0886204242706299, "learning_rate": 4.553482845135143e-05, "loss": 0.9315, "step": 35515 }, { "epoch": 1.367468719923003, "grad_norm": 1.2117904424667358, "learning_rate": 4.5509470123571095e-05, "loss": 0.8139, "step": 35520 }, { "epoch": 1.3676612127045236, "grad_norm": 1.5415525436401367, "learning_rate": 4.5484116778588807e-05, "loss": 0.8667, "step": 35525 }, { "epoch": 1.3678537054860442, "grad_norm": 1.6939245462417603, "learning_rate": 4.545876841872281e-05, "loss": 0.8011, "step": 35530 }, { "epoch": 1.368046198267565, "grad_norm": 1.9691619873046875, "learning_rate": 4.5433425046291224e-05, "loss": 0.9255, "step": 35535 }, { "epoch": 1.3682386910490858, "grad_norm": 2.5566859245300293, "learning_rate": 4.54080866636114e-05, "loss": 0.998, "step": 35540 }, { "epoch": 1.3684311838306065, "grad_norm": 1.200829029083252, "learning_rate": 4.538275327300042e-05, "loss": 0.7522, "step": 35545 }, { "epoch": 1.3686236766121271, "grad_norm": 1.0914881229400635, "learning_rate": 4.535742487677485e-05, "loss": 0.8378, "step": 35550 }, { "epoch": 1.3688161693936478, "grad_norm": 1.1408578157424927, "learning_rate": 4.5332101477250796e-05, "loss": 0.8735, "step": 35555 }, { "epoch": 1.3690086621751685, "grad_norm": 0.8551640510559082, "learning_rate": 4.5306783076743955e-05, "loss": 0.8452, "step": 35560 }, { "epoch": 1.3692011549566891, "grad_norm": 2.295720100402832, "learning_rate": 4.5281469677569456e-05, "loss": 1.0684, "step": 35565 }, { "epoch": 1.3693936477382098, "grad_norm": 1.2390626668930054, "learning_rate": 4.5256161282042085e-05, "loss": 0.7534, "step": 35570 }, { "epoch": 1.3695861405197305, "grad_norm": 1.0765421390533447, "learning_rate": 4.5230857892476106e-05, "loss": 0.8554, "step": 35575 }, { "epoch": 1.3697786333012512, "grad_norm": 1.3477513790130615, "learning_rate": 4.5205559511185415e-05, "loss": 0.8043, "step": 35580 }, { "epoch": 1.3699711260827718, "grad_norm": 1.3972609043121338, "learning_rate": 4.518026614048324e-05, "loss": 0.7081, "step": 35585 }, { "epoch": 1.3701636188642925, "grad_norm": 0.9186348915100098, "learning_rate": 4.515497778268266e-05, "loss": 0.6965, "step": 35590 }, { "epoch": 1.3703561116458132, "grad_norm": 1.2708579301834106, "learning_rate": 4.5129694440096005e-05, "loss": 0.801, "step": 35595 }, { "epoch": 1.3705486044273338, "grad_norm": 1.0227978229522705, "learning_rate": 4.5104416115035306e-05, "loss": 0.7764, "step": 35600 }, { "epoch": 1.3707410972088547, "grad_norm": 1.2434272766113281, "learning_rate": 4.507914280981211e-05, "loss": 0.7463, "step": 35605 }, { "epoch": 1.3709335899903754, "grad_norm": 1.1386263370513916, "learning_rate": 4.505387452673753e-05, "loss": 0.8806, "step": 35610 }, { "epoch": 1.371126082771896, "grad_norm": 0.9424572587013245, "learning_rate": 4.502861126812205e-05, "loss": 0.9189, "step": 35615 }, { "epoch": 1.3713185755534167, "grad_norm": 0.7779520153999329, "learning_rate": 4.500335303627601e-05, "loss": 0.8486, "step": 35620 }, { "epoch": 1.3715110683349374, "grad_norm": 1.1644350290298462, "learning_rate": 4.497809983350897e-05, "loss": 0.7089, "step": 35625 }, { "epoch": 1.371703561116458, "grad_norm": 1.2244153022766113, "learning_rate": 4.4952851662130216e-05, "loss": 0.9906, "step": 35630 }, { "epoch": 1.3718960538979788, "grad_norm": 2.0696568489074707, "learning_rate": 4.4927608524448515e-05, "loss": 0.7632, "step": 35635 }, { "epoch": 1.3720885466794996, "grad_norm": 1.5171512365341187, "learning_rate": 4.4902370422772233e-05, "loss": 0.9423, "step": 35640 }, { "epoch": 1.3722810394610203, "grad_norm": 1.1163785457611084, "learning_rate": 4.4877137359409116e-05, "loss": 0.7411, "step": 35645 }, { "epoch": 1.372473532242541, "grad_norm": 1.2916820049285889, "learning_rate": 4.485190933666671e-05, "loss": 0.8892, "step": 35650 }, { "epoch": 1.3726660250240617, "grad_norm": 1.1933842897415161, "learning_rate": 4.4826686356851834e-05, "loss": 0.8643, "step": 35655 }, { "epoch": 1.3728585178055823, "grad_norm": 1.0002435445785522, "learning_rate": 4.4801468422271e-05, "loss": 0.7632, "step": 35660 }, { "epoch": 1.373051010587103, "grad_norm": 1.1150939464569092, "learning_rate": 4.4776255535230216e-05, "loss": 0.6594, "step": 35665 }, { "epoch": 1.3732435033686237, "grad_norm": 1.2958896160125732, "learning_rate": 4.4751047698035075e-05, "loss": 0.8391, "step": 35670 }, { "epoch": 1.3734359961501443, "grad_norm": 1.687971591949463, "learning_rate": 4.47258449129906e-05, "loss": 0.6359, "step": 35675 }, { "epoch": 1.373628488931665, "grad_norm": 1.6192028522491455, "learning_rate": 4.4700647182401456e-05, "loss": 0.8449, "step": 35680 }, { "epoch": 1.3738209817131857, "grad_norm": 1.6572599411010742, "learning_rate": 4.467545450857179e-05, "loss": 0.8733, "step": 35685 }, { "epoch": 1.3740134744947063, "grad_norm": 1.175234317779541, "learning_rate": 4.465026689380532e-05, "loss": 0.8851, "step": 35690 }, { "epoch": 1.374205967276227, "grad_norm": 0.9541772603988647, "learning_rate": 4.4625084340405333e-05, "loss": 0.852, "step": 35695 }, { "epoch": 1.374398460057748, "grad_norm": 1.063652515411377, "learning_rate": 4.4599906850674514e-05, "loss": 0.8568, "step": 35700 }, { "epoch": 1.3745909528392686, "grad_norm": 1.2114187479019165, "learning_rate": 4.457473442691522e-05, "loss": 0.731, "step": 35705 }, { "epoch": 1.3747834456207892, "grad_norm": 1.2288764715194702, "learning_rate": 4.454956707142931e-05, "loss": 0.7752, "step": 35710 }, { "epoch": 1.37497593840231, "grad_norm": 1.9316095113754272, "learning_rate": 4.452440478651819e-05, "loss": 0.7302, "step": 35715 }, { "epoch": 1.3751684311838306, "grad_norm": 1.061178207397461, "learning_rate": 4.449924757448269e-05, "loss": 0.8386, "step": 35720 }, { "epoch": 1.3753609239653513, "grad_norm": 1.0967941284179688, "learning_rate": 4.447409543762342e-05, "loss": 0.7719, "step": 35725 }, { "epoch": 1.375553416746872, "grad_norm": 2.174457311630249, "learning_rate": 4.4448948378240264e-05, "loss": 0.9424, "step": 35730 }, { "epoch": 1.3757459095283928, "grad_norm": 1.3424992561340332, "learning_rate": 4.442380639863277e-05, "loss": 0.7166, "step": 35735 }, { "epoch": 1.3759384023099135, "grad_norm": 1.0383292436599731, "learning_rate": 4.4398669501100044e-05, "loss": 0.8558, "step": 35740 }, { "epoch": 1.3761308950914342, "grad_norm": 0.9943356513977051, "learning_rate": 4.437353768794069e-05, "loss": 0.6549, "step": 35745 }, { "epoch": 1.3763233878729548, "grad_norm": 2.162888526916504, "learning_rate": 4.4348410961452744e-05, "loss": 0.8268, "step": 35750 }, { "epoch": 1.3765158806544755, "grad_norm": 0.9995039701461792, "learning_rate": 4.432328932393405e-05, "loss": 0.8245, "step": 35755 }, { "epoch": 1.3767083734359962, "grad_norm": 1.1602789163589478, "learning_rate": 4.429817277768167e-05, "loss": 0.8199, "step": 35760 }, { "epoch": 1.3769008662175168, "grad_norm": 1.8660534620285034, "learning_rate": 4.42730613249924e-05, "loss": 0.9195, "step": 35765 }, { "epoch": 1.3770933589990375, "grad_norm": 1.5461866855621338, "learning_rate": 4.424795496816252e-05, "loss": 0.801, "step": 35770 }, { "epoch": 1.3772858517805582, "grad_norm": 1.3390212059020996, "learning_rate": 4.4222853709487866e-05, "loss": 0.8652, "step": 35775 }, { "epoch": 1.3774783445620788, "grad_norm": 1.2544982433319092, "learning_rate": 4.419775755126372e-05, "loss": 0.7289, "step": 35780 }, { "epoch": 1.3776708373435995, "grad_norm": 1.8691519498825073, "learning_rate": 4.4172666495784984e-05, "loss": 0.8905, "step": 35785 }, { "epoch": 1.3778633301251202, "grad_norm": 2.0388388633728027, "learning_rate": 4.414758054534608e-05, "loss": 1.0152, "step": 35790 }, { "epoch": 1.378055822906641, "grad_norm": 1.040433645248413, "learning_rate": 4.4122499702240946e-05, "loss": 0.8018, "step": 35795 }, { "epoch": 1.3782483156881618, "grad_norm": 1.2068662643432617, "learning_rate": 4.409742396876309e-05, "loss": 0.6547, "step": 35800 }, { "epoch": 1.3784408084696824, "grad_norm": 1.445438265800476, "learning_rate": 4.4072353347205466e-05, "loss": 0.8732, "step": 35805 }, { "epoch": 1.378633301251203, "grad_norm": 0.9814010858535767, "learning_rate": 4.404728783986063e-05, "loss": 0.8397, "step": 35810 }, { "epoch": 1.3788257940327238, "grad_norm": 2.0966150760650635, "learning_rate": 4.4022227449020684e-05, "loss": 0.936, "step": 35815 }, { "epoch": 1.3790182868142444, "grad_norm": 1.3904740810394287, "learning_rate": 4.39971721769772e-05, "loss": 0.9165, "step": 35820 }, { "epoch": 1.379210779595765, "grad_norm": 1.3555712699890137, "learning_rate": 4.3972122026021346e-05, "loss": 1.0844, "step": 35825 }, { "epoch": 1.3794032723772858, "grad_norm": 1.2570695877075195, "learning_rate": 4.394707699844381e-05, "loss": 0.9331, "step": 35830 }, { "epoch": 1.3795957651588067, "grad_norm": 0.8406626582145691, "learning_rate": 4.392203709653471e-05, "loss": 0.8221, "step": 35835 }, { "epoch": 1.3797882579403273, "grad_norm": 1.3139702081680298, "learning_rate": 4.3897002322583894e-05, "loss": 0.9063, "step": 35840 }, { "epoch": 1.379980750721848, "grad_norm": 0.7752715945243835, "learning_rate": 4.3871972678880535e-05, "loss": 0.7454, "step": 35845 }, { "epoch": 1.3801732435033687, "grad_norm": 1.247069239616394, "learning_rate": 4.384694816771345e-05, "loss": 0.7338, "step": 35850 }, { "epoch": 1.3803657362848893, "grad_norm": 1.1150643825531006, "learning_rate": 4.3821928791370995e-05, "loss": 0.9423, "step": 35855 }, { "epoch": 1.38055822906641, "grad_norm": 1.191602349281311, "learning_rate": 4.3796914552141035e-05, "loss": 0.7278, "step": 35860 }, { "epoch": 1.3807507218479307, "grad_norm": 1.868516206741333, "learning_rate": 4.3771905452310844e-05, "loss": 0.8649, "step": 35865 }, { "epoch": 1.3809432146294514, "grad_norm": 1.443765640258789, "learning_rate": 4.37469014941675e-05, "loss": 0.8188, "step": 35870 }, { "epoch": 1.381135707410972, "grad_norm": 1.2555128335952759, "learning_rate": 4.372190267999734e-05, "loss": 0.8372, "step": 35875 }, { "epoch": 1.3813282001924927, "grad_norm": 1.0106040239334106, "learning_rate": 4.369690901208637e-05, "loss": 0.769, "step": 35880 }, { "epoch": 1.3815206929740134, "grad_norm": 0.8901906609535217, "learning_rate": 4.3671920492720095e-05, "loss": 0.8127, "step": 35885 }, { "epoch": 1.381713185755534, "grad_norm": 1.0237152576446533, "learning_rate": 4.3646937124183594e-05, "loss": 0.8046, "step": 35890 }, { "epoch": 1.381905678537055, "grad_norm": 2.2792160511016846, "learning_rate": 4.362195890876135e-05, "loss": 0.8081, "step": 35895 }, { "epoch": 1.3820981713185756, "grad_norm": 1.6603785753250122, "learning_rate": 4.359698584873749e-05, "loss": 0.8406, "step": 35900 }, { "epoch": 1.3822906641000963, "grad_norm": 0.9667415618896484, "learning_rate": 4.357201794639568e-05, "loss": 0.7888, "step": 35905 }, { "epoch": 1.382483156881617, "grad_norm": 1.3200799226760864, "learning_rate": 4.354705520401895e-05, "loss": 0.7056, "step": 35910 }, { "epoch": 1.3826756496631376, "grad_norm": 1.4623115062713623, "learning_rate": 4.352209762389013e-05, "loss": 0.8487, "step": 35915 }, { "epoch": 1.3828681424446583, "grad_norm": 1.7032670974731445, "learning_rate": 4.3497145208291314e-05, "loss": 0.9328, "step": 35920 }, { "epoch": 1.383060635226179, "grad_norm": 0.9201510548591614, "learning_rate": 4.347219795950427e-05, "loss": 0.8634, "step": 35925 }, { "epoch": 1.3832531280076998, "grad_norm": 1.2735239267349243, "learning_rate": 4.3447255879810266e-05, "loss": 0.7324, "step": 35930 }, { "epoch": 1.3834456207892205, "grad_norm": 1.2430258989334106, "learning_rate": 4.3422318971490116e-05, "loss": 0.8445, "step": 35935 }, { "epoch": 1.3836381135707412, "grad_norm": 0.880877673625946, "learning_rate": 4.3397387236824025e-05, "loss": 0.7624, "step": 35940 }, { "epoch": 1.3838306063522618, "grad_norm": 0.9846094250679016, "learning_rate": 4.3372460678091984e-05, "loss": 0.7539, "step": 35945 }, { "epoch": 1.3840230991337825, "grad_norm": 2.2208034992218018, "learning_rate": 4.334753929757327e-05, "loss": 0.7707, "step": 35950 }, { "epoch": 1.3842155919153032, "grad_norm": 1.152341365814209, "learning_rate": 4.332262309754679e-05, "loss": 0.86, "step": 35955 }, { "epoch": 1.3844080846968239, "grad_norm": 1.2790590524673462, "learning_rate": 4.3297712080290975e-05, "loss": 0.7621, "step": 35960 }, { "epoch": 1.3846005774783445, "grad_norm": 1.3545982837677002, "learning_rate": 4.327280624808381e-05, "loss": 0.7659, "step": 35965 }, { "epoch": 1.3847930702598652, "grad_norm": 1.7217894792556763, "learning_rate": 4.324790560320265e-05, "loss": 0.8812, "step": 35970 }, { "epoch": 1.3849855630413859, "grad_norm": 1.3170634508132935, "learning_rate": 4.322798882370276e-05, "loss": 0.9111, "step": 35975 }, { "epoch": 1.3851780558229065, "grad_norm": 1.05010986328125, "learning_rate": 4.320309752174627e-05, "loss": 0.8722, "step": 35980 }, { "epoch": 1.3853705486044272, "grad_norm": 1.8216160535812378, "learning_rate": 4.317821141349036e-05, "loss": 0.9751, "step": 35985 }, { "epoch": 1.385563041385948, "grad_norm": 1.5039763450622559, "learning_rate": 4.315333050121055e-05, "loss": 0.9311, "step": 35990 }, { "epoch": 1.3857555341674688, "grad_norm": 1.1297041177749634, "learning_rate": 4.312845478718211e-05, "loss": 0.8074, "step": 35995 }, { "epoch": 1.3859480269489894, "grad_norm": 1.2953757047653198, "learning_rate": 4.310358427367972e-05, "loss": 0.7828, "step": 36000 }, { "epoch": 1.3861405197305101, "grad_norm": 1.7586846351623535, "learning_rate": 4.3078718962977684e-05, "loss": 0.9055, "step": 36005 }, { "epoch": 1.3863330125120308, "grad_norm": 1.59012770652771, "learning_rate": 4.305385885734966e-05, "loss": 0.7722, "step": 36010 }, { "epoch": 1.3865255052935515, "grad_norm": 0.7984145879745483, "learning_rate": 4.302900395906909e-05, "loss": 0.803, "step": 36015 }, { "epoch": 1.3867179980750721, "grad_norm": 2.04715895652771, "learning_rate": 4.3004154270408666e-05, "loss": 0.9151, "step": 36020 }, { "epoch": 1.386910490856593, "grad_norm": 1.13097083568573, "learning_rate": 4.2979309793640776e-05, "loss": 0.7337, "step": 36025 }, { "epoch": 1.3871029836381137, "grad_norm": 1.4123458862304688, "learning_rate": 4.295447053103727e-05, "loss": 0.7956, "step": 36030 }, { "epoch": 1.3872954764196344, "grad_norm": 1.3871421813964844, "learning_rate": 4.2929636484869586e-05, "loss": 0.8623, "step": 36035 }, { "epoch": 1.387487969201155, "grad_norm": 0.9964479804039001, "learning_rate": 4.290480765740851e-05, "loss": 0.7375, "step": 36040 }, { "epoch": 1.3876804619826757, "grad_norm": 2.126817464828491, "learning_rate": 4.287998405092463e-05, "loss": 0.9251, "step": 36045 }, { "epoch": 1.3878729547641964, "grad_norm": 1.7415242195129395, "learning_rate": 4.285516566768779e-05, "loss": 0.88, "step": 36050 }, { "epoch": 1.388065447545717, "grad_norm": 1.341607689857483, "learning_rate": 4.2830352509967486e-05, "loss": 0.8688, "step": 36055 }, { "epoch": 1.3882579403272377, "grad_norm": 2.5146100521087646, "learning_rate": 4.280554458003274e-05, "loss": 0.7789, "step": 36060 }, { "epoch": 1.3884504331087584, "grad_norm": 1.7495235204696655, "learning_rate": 4.2780741880152106e-05, "loss": 0.749, "step": 36065 }, { "epoch": 1.388642925890279, "grad_norm": 1.4821887016296387, "learning_rate": 4.275594441259354e-05, "loss": 0.8738, "step": 36070 }, { "epoch": 1.3888354186717997, "grad_norm": 1.3802404403686523, "learning_rate": 4.273115217962466e-05, "loss": 0.8245, "step": 36075 }, { "epoch": 1.3890279114533204, "grad_norm": 1.0245096683502197, "learning_rate": 4.270636518351252e-05, "loss": 0.8295, "step": 36080 }, { "epoch": 1.389220404234841, "grad_norm": 1.633852243423462, "learning_rate": 4.268158342652376e-05, "loss": 0.8341, "step": 36085 }, { "epoch": 1.389412897016362, "grad_norm": 1.1715857982635498, "learning_rate": 4.265680691092454e-05, "loss": 0.8579, "step": 36090 }, { "epoch": 1.3896053897978826, "grad_norm": 1.3279740810394287, "learning_rate": 4.263203563898038e-05, "loss": 0.8271, "step": 36095 }, { "epoch": 1.3897978825794033, "grad_norm": 0.87353515625, "learning_rate": 4.2607269612956615e-05, "loss": 0.7767, "step": 36100 }, { "epoch": 1.389990375360924, "grad_norm": 0.9114276170730591, "learning_rate": 4.258250883511782e-05, "loss": 0.6756, "step": 36105 }, { "epoch": 1.3901828681424446, "grad_norm": 1.5339330434799194, "learning_rate": 4.255775330772822e-05, "loss": 0.6985, "step": 36110 }, { "epoch": 1.3903753609239653, "grad_norm": 1.8716709613800049, "learning_rate": 4.253300303305157e-05, "loss": 0.8967, "step": 36115 }, { "epoch": 1.390567853705486, "grad_norm": 1.4840834140777588, "learning_rate": 4.250825801335114e-05, "loss": 0.7657, "step": 36120 }, { "epoch": 1.3907603464870069, "grad_norm": 1.0822495222091675, "learning_rate": 4.24835182508896e-05, "loss": 0.7959, "step": 36125 }, { "epoch": 1.3909528392685275, "grad_norm": 1.4410738945007324, "learning_rate": 4.2458783747929375e-05, "loss": 0.8108, "step": 36130 }, { "epoch": 1.3911453320500482, "grad_norm": 1.0785428285598755, "learning_rate": 4.2434054506732157e-05, "loss": 0.8385, "step": 36135 }, { "epoch": 1.3913378248315689, "grad_norm": 1.1617215871810913, "learning_rate": 4.240933052955932e-05, "loss": 0.8655, "step": 36140 }, { "epoch": 1.3915303176130895, "grad_norm": 1.7867512702941895, "learning_rate": 4.238461181867171e-05, "loss": 0.9933, "step": 36145 }, { "epoch": 1.3917228103946102, "grad_norm": 1.6082379817962646, "learning_rate": 4.235989837632971e-05, "loss": 0.7886, "step": 36150 }, { "epoch": 1.3919153031761309, "grad_norm": 1.7708418369293213, "learning_rate": 4.23351902047931e-05, "loss": 0.9419, "step": 36155 }, { "epoch": 1.3921077959576516, "grad_norm": 2.8401410579681396, "learning_rate": 4.231048730632142e-05, "loss": 0.9881, "step": 36160 }, { "epoch": 1.3923002887391722, "grad_norm": 2.1983916759490967, "learning_rate": 4.228578968317349e-05, "loss": 0.8503, "step": 36165 }, { "epoch": 1.392492781520693, "grad_norm": 1.9838260412216187, "learning_rate": 4.226109733760777e-05, "loss": 0.9066, "step": 36170 }, { "epoch": 1.3926852743022136, "grad_norm": 1.032500147819519, "learning_rate": 4.223641027188226e-05, "loss": 0.7624, "step": 36175 }, { "epoch": 1.3928777670837342, "grad_norm": 2.5539069175720215, "learning_rate": 4.221172848825432e-05, "loss": 0.971, "step": 36180 }, { "epoch": 1.3930702598652551, "grad_norm": 1.1746537685394287, "learning_rate": 4.218705198898102e-05, "loss": 0.8605, "step": 36185 }, { "epoch": 1.3932627526467758, "grad_norm": 1.7174090147018433, "learning_rate": 4.216238077631882e-05, "loss": 0.8074, "step": 36190 }, { "epoch": 1.3934552454282965, "grad_norm": 1.3414026498794556, "learning_rate": 4.2137714852523814e-05, "loss": 0.8017, "step": 36195 }, { "epoch": 1.3936477382098171, "grad_norm": 1.387629747390747, "learning_rate": 4.21130542198514e-05, "loss": 0.8292, "step": 36200 }, { "epoch": 1.3938402309913378, "grad_norm": 1.5130243301391602, "learning_rate": 4.2088398880556786e-05, "loss": 0.8602, "step": 36205 }, { "epoch": 1.3940327237728585, "grad_norm": 1.6352779865264893, "learning_rate": 4.206374883689444e-05, "loss": 0.7357, "step": 36210 }, { "epoch": 1.3942252165543791, "grad_norm": 1.0258371829986572, "learning_rate": 4.203910409111845e-05, "loss": 0.9261, "step": 36215 }, { "epoch": 1.3944177093359, "grad_norm": 1.0647536516189575, "learning_rate": 4.2014464645482444e-05, "loss": 0.7186, "step": 36220 }, { "epoch": 1.3946102021174207, "grad_norm": 1.0326642990112305, "learning_rate": 4.198983050223957e-05, "loss": 0.7769, "step": 36225 }, { "epoch": 1.3948026948989414, "grad_norm": 2.0613622665405273, "learning_rate": 4.196520166364233e-05, "loss": 0.8248, "step": 36230 }, { "epoch": 1.394995187680462, "grad_norm": 2.12548565864563, "learning_rate": 4.194057813194303e-05, "loss": 0.872, "step": 36235 }, { "epoch": 1.3951876804619827, "grad_norm": 2.1166744232177734, "learning_rate": 4.191595990939322e-05, "loss": 0.8744, "step": 36240 }, { "epoch": 1.3953801732435034, "grad_norm": 1.4747822284698486, "learning_rate": 4.18913469982441e-05, "loss": 0.8514, "step": 36245 }, { "epoch": 1.395572666025024, "grad_norm": 1.3667523860931396, "learning_rate": 4.186673940074637e-05, "loss": 0.9452, "step": 36250 }, { "epoch": 1.3957651588065447, "grad_norm": 1.1476492881774902, "learning_rate": 4.1842137119150246e-05, "loss": 0.7274, "step": 36255 }, { "epoch": 1.3959576515880654, "grad_norm": 2.258908987045288, "learning_rate": 4.181754015570536e-05, "loss": 1.1584, "step": 36260 }, { "epoch": 1.396150144369586, "grad_norm": 1.5475572347640991, "learning_rate": 4.179294851266107e-05, "loss": 0.8509, "step": 36265 }, { "epoch": 1.3963426371511067, "grad_norm": 0.9235596656799316, "learning_rate": 4.176836219226602e-05, "loss": 0.6882, "step": 36270 }, { "epoch": 1.3965351299326274, "grad_norm": 2.50726580619812, "learning_rate": 4.17437811967685e-05, "loss": 0.9346, "step": 36275 }, { "epoch": 1.3967276227141483, "grad_norm": 2.3889918327331543, "learning_rate": 4.171920552841627e-05, "loss": 0.7456, "step": 36280 }, { "epoch": 1.396920115495669, "grad_norm": 1.233176350593567, "learning_rate": 4.169463518945667e-05, "loss": 0.8134, "step": 36285 }, { "epoch": 1.3971126082771896, "grad_norm": 1.5915166139602661, "learning_rate": 4.167007018213639e-05, "loss": 0.9035, "step": 36290 }, { "epoch": 1.3973051010587103, "grad_norm": 1.1252813339233398, "learning_rate": 4.1645510508701805e-05, "loss": 0.7778, "step": 36295 }, { "epoch": 1.397497593840231, "grad_norm": 1.1730810403823853, "learning_rate": 4.162095617139871e-05, "loss": 0.8332, "step": 36300 }, { "epoch": 1.3976900866217516, "grad_norm": 1.0456668138504028, "learning_rate": 4.159640717247245e-05, "loss": 0.8277, "step": 36305 }, { "epoch": 1.3978825794032723, "grad_norm": 1.0216035842895508, "learning_rate": 4.157186351416791e-05, "loss": 0.8409, "step": 36310 }, { "epoch": 1.398075072184793, "grad_norm": 2.1484153270721436, "learning_rate": 4.154732519872936e-05, "loss": 0.9005, "step": 36315 }, { "epoch": 1.3982675649663139, "grad_norm": 0.9150620698928833, "learning_rate": 4.15227922284007e-05, "loss": 0.8182, "step": 36320 }, { "epoch": 1.3984600577478346, "grad_norm": 2.366243362426758, "learning_rate": 4.149826460542532e-05, "loss": 0.8423, "step": 36325 }, { "epoch": 1.3986525505293552, "grad_norm": 1.0907059907913208, "learning_rate": 4.147374233204611e-05, "loss": 0.8498, "step": 36330 }, { "epoch": 1.398845043310876, "grad_norm": 1.077556848526001, "learning_rate": 4.1449225410505456e-05, "loss": 0.8998, "step": 36335 }, { "epoch": 1.3990375360923966, "grad_norm": 1.197625756263733, "learning_rate": 4.1424713843045305e-05, "loss": 0.7545, "step": 36340 }, { "epoch": 1.3992300288739172, "grad_norm": 1.4147288799285889, "learning_rate": 4.1400207631906985e-05, "loss": 0.8045, "step": 36345 }, { "epoch": 1.399422521655438, "grad_norm": 2.3983185291290283, "learning_rate": 4.1375706779331556e-05, "loss": 1.0072, "step": 36350 }, { "epoch": 1.3996150144369586, "grad_norm": 1.1587196588516235, "learning_rate": 4.1351211287559366e-05, "loss": 0.7277, "step": 36355 }, { "epoch": 1.3998075072184792, "grad_norm": 1.1984206438064575, "learning_rate": 4.132672115883037e-05, "loss": 0.8874, "step": 36360 }, { "epoch": 1.4, "grad_norm": 1.4345682859420776, "learning_rate": 4.130223639538406e-05, "loss": 0.9031, "step": 36365 }, { "epoch": 1.4001924927815206, "grad_norm": 1.10869300365448, "learning_rate": 4.127775699945944e-05, "loss": 0.7777, "step": 36370 }, { "epoch": 1.4003849855630413, "grad_norm": 0.9464773535728455, "learning_rate": 4.12532829732949e-05, "loss": 0.7748, "step": 36375 }, { "epoch": 1.4005774783445621, "grad_norm": 1.2995601892471313, "learning_rate": 4.122881431912847e-05, "loss": 0.9252, "step": 36380 }, { "epoch": 1.4007699711260828, "grad_norm": 1.4381591081619263, "learning_rate": 4.120435103919765e-05, "loss": 0.7513, "step": 36385 }, { "epoch": 1.4009624639076035, "grad_norm": 1.7021411657333374, "learning_rate": 4.117989313573943e-05, "loss": 0.9105, "step": 36390 }, { "epoch": 1.4011549566891242, "grad_norm": 1.6943551301956177, "learning_rate": 4.115544061099039e-05, "loss": 0.7316, "step": 36395 }, { "epoch": 1.4013474494706448, "grad_norm": 1.5277372598648071, "learning_rate": 4.1130993467186454e-05, "loss": 0.8301, "step": 36400 }, { "epoch": 1.4015399422521655, "grad_norm": 1.3319824934005737, "learning_rate": 4.110655170656319e-05, "loss": 0.78, "step": 36405 }, { "epoch": 1.4017324350336862, "grad_norm": 1.3265659809112549, "learning_rate": 4.108211533135563e-05, "loss": 0.8302, "step": 36410 }, { "epoch": 1.401924927815207, "grad_norm": 1.4805601835250854, "learning_rate": 4.1057684343798376e-05, "loss": 0.8165, "step": 36415 }, { "epoch": 1.4021174205967277, "grad_norm": 1.1178621053695679, "learning_rate": 4.103325874612536e-05, "loss": 0.7693, "step": 36420 }, { "epoch": 1.4023099133782484, "grad_norm": 2.39166522026062, "learning_rate": 4.1008838540570284e-05, "loss": 0.8679, "step": 36425 }, { "epoch": 1.402502406159769, "grad_norm": 1.941292405128479, "learning_rate": 4.098442372936611e-05, "loss": 0.8451, "step": 36430 }, { "epoch": 1.4026948989412897, "grad_norm": 0.9209627509117126, "learning_rate": 4.0960014314745435e-05, "loss": 0.8751, "step": 36435 }, { "epoch": 1.4028873917228104, "grad_norm": 1.5222151279449463, "learning_rate": 4.093561029894034e-05, "loss": 0.8304, "step": 36440 }, { "epoch": 1.403079884504331, "grad_norm": 1.1667454242706299, "learning_rate": 4.091121168418247e-05, "loss": 0.8462, "step": 36445 }, { "epoch": 1.4032723772858517, "grad_norm": 1.9360030889511108, "learning_rate": 4.088681847270278e-05, "loss": 0.9054, "step": 36450 }, { "epoch": 1.4034648700673724, "grad_norm": 0.9800208806991577, "learning_rate": 4.086243066673203e-05, "loss": 0.8068, "step": 36455 }, { "epoch": 1.403657362848893, "grad_norm": 1.7991399765014648, "learning_rate": 4.083804826850022e-05, "loss": 0.6918, "step": 36460 }, { "epoch": 1.4038498556304138, "grad_norm": 1.230858564376831, "learning_rate": 4.081367128023697e-05, "loss": 0.7422, "step": 36465 }, { "epoch": 1.4040423484119344, "grad_norm": 1.1397539377212524, "learning_rate": 4.0789299704171405e-05, "loss": 0.8296, "step": 36470 }, { "epoch": 1.4042348411934553, "grad_norm": 0.9111887812614441, "learning_rate": 4.076493354253219e-05, "loss": 0.7259, "step": 36475 }, { "epoch": 1.404427333974976, "grad_norm": 1.0511916875839233, "learning_rate": 4.074057279754733e-05, "loss": 0.7495, "step": 36480 }, { "epoch": 1.4046198267564967, "grad_norm": 1.596725344657898, "learning_rate": 4.071621747144461e-05, "loss": 0.8516, "step": 36485 }, { "epoch": 1.4048123195380173, "grad_norm": 1.4566305875778198, "learning_rate": 4.069186756645104e-05, "loss": 0.9033, "step": 36490 }, { "epoch": 1.405004812319538, "grad_norm": 1.7779717445373535, "learning_rate": 4.06675230847933e-05, "loss": 0.9614, "step": 36495 }, { "epoch": 1.4051973051010587, "grad_norm": 1.1408967971801758, "learning_rate": 4.064318402869753e-05, "loss": 0.8547, "step": 36500 }, { "epoch": 1.4053897978825793, "grad_norm": 0.9641178846359253, "learning_rate": 4.061885040038942e-05, "loss": 0.7535, "step": 36505 }, { "epoch": 1.4055822906641002, "grad_norm": 1.1024551391601562, "learning_rate": 4.059452220209403e-05, "loss": 0.7627, "step": 36510 }, { "epoch": 1.405774783445621, "grad_norm": 1.3117225170135498, "learning_rate": 4.057019943603607e-05, "loss": 0.96, "step": 36515 }, { "epoch": 1.4059672762271416, "grad_norm": 1.1285604238510132, "learning_rate": 4.054588210443969e-05, "loss": 0.8481, "step": 36520 }, { "epoch": 1.4061597690086622, "grad_norm": 1.3916871547698975, "learning_rate": 4.0521570209528534e-05, "loss": 0.8622, "step": 36525 }, { "epoch": 1.406352261790183, "grad_norm": 1.8372492790222168, "learning_rate": 4.049726375352582e-05, "loss": 0.8732, "step": 36530 }, { "epoch": 1.4065447545717036, "grad_norm": 1.3047436475753784, "learning_rate": 4.047296273865412e-05, "loss": 0.8297, "step": 36535 }, { "epoch": 1.4067372473532243, "grad_norm": 1.984266757965088, "learning_rate": 4.044866716713565e-05, "loss": 0.7249, "step": 36540 }, { "epoch": 1.406929740134745, "grad_norm": 1.0713486671447754, "learning_rate": 4.0424377041192075e-05, "loss": 0.7389, "step": 36545 }, { "epoch": 1.4071222329162656, "grad_norm": 1.0382349491119385, "learning_rate": 4.0400092363044594e-05, "loss": 0.7965, "step": 36550 }, { "epoch": 1.4073147256977863, "grad_norm": 2.4277896881103516, "learning_rate": 4.03758131349138e-05, "loss": 0.7452, "step": 36555 }, { "epoch": 1.407507218479307, "grad_norm": 1.7688560485839844, "learning_rate": 4.0351539359019985e-05, "loss": 0.8284, "step": 36560 }, { "epoch": 1.4076997112608276, "grad_norm": 1.1814532279968262, "learning_rate": 4.0327271037582726e-05, "loss": 0.872, "step": 36565 }, { "epoch": 1.4078922040423485, "grad_norm": 2.220003128051758, "learning_rate": 4.0303008172821235e-05, "loss": 0.8533, "step": 36570 }, { "epoch": 1.4080846968238692, "grad_norm": 1.1941807270050049, "learning_rate": 4.0278750766954176e-05, "loss": 0.7296, "step": 36575 }, { "epoch": 1.4082771896053898, "grad_norm": 1.4177144765853882, "learning_rate": 4.025449882219979e-05, "loss": 0.8877, "step": 36580 }, { "epoch": 1.4084696823869105, "grad_norm": 1.0108013153076172, "learning_rate": 4.023025234077564e-05, "loss": 0.9344, "step": 36585 }, { "epoch": 1.4086621751684312, "grad_norm": 1.2472012042999268, "learning_rate": 4.0206011324899054e-05, "loss": 0.9062, "step": 36590 }, { "epoch": 1.4088546679499518, "grad_norm": 1.353990077972412, "learning_rate": 4.018177577678658e-05, "loss": 0.7711, "step": 36595 }, { "epoch": 1.4090471607314725, "grad_norm": 1.1395666599273682, "learning_rate": 4.0157545698654465e-05, "loss": 0.6574, "step": 36600 }, { "epoch": 1.4092396535129932, "grad_norm": 1.177996039390564, "learning_rate": 4.013332109271837e-05, "loss": 0.8385, "step": 36605 }, { "epoch": 1.409432146294514, "grad_norm": 1.8291305303573608, "learning_rate": 4.0109101961193474e-05, "loss": 0.8298, "step": 36610 }, { "epoch": 1.4096246390760347, "grad_norm": 1.83578622341156, "learning_rate": 4.008488830629451e-05, "loss": 1.0385, "step": 36615 }, { "epoch": 1.4098171318575554, "grad_norm": 1.2734320163726807, "learning_rate": 4.0060680130235564e-05, "loss": 0.8062, "step": 36620 }, { "epoch": 1.410009624639076, "grad_norm": 0.9485237002372742, "learning_rate": 4.003647743523037e-05, "loss": 0.958, "step": 36625 }, { "epoch": 1.4102021174205968, "grad_norm": 1.1491374969482422, "learning_rate": 4.0012280223492084e-05, "loss": 0.7825, "step": 36630 }, { "epoch": 1.4103946102021174, "grad_norm": 1.9539427757263184, "learning_rate": 3.9988088497233424e-05, "loss": 0.7944, "step": 36635 }, { "epoch": 1.410587102983638, "grad_norm": 1.9131194353103638, "learning_rate": 3.9963902258666465e-05, "loss": 0.9911, "step": 36640 }, { "epoch": 1.4107795957651588, "grad_norm": 0.8639621734619141, "learning_rate": 3.9939721510003e-05, "loss": 0.7923, "step": 36645 }, { "epoch": 1.4109720885466794, "grad_norm": 1.7485369443893433, "learning_rate": 3.991554625345412e-05, "loss": 0.7266, "step": 36650 }, { "epoch": 1.4111645813282, "grad_norm": 0.921247124671936, "learning_rate": 3.989137649123051e-05, "loss": 0.7662, "step": 36655 }, { "epoch": 1.4113570741097208, "grad_norm": 0.8794036507606506, "learning_rate": 3.986721222554232e-05, "loss": 0.6367, "step": 36660 }, { "epoch": 1.4115495668912414, "grad_norm": 1.2151905298233032, "learning_rate": 3.984305345859928e-05, "loss": 0.7589, "step": 36665 }, { "epoch": 1.4117420596727623, "grad_norm": 1.3154112100601196, "learning_rate": 3.981890019261042e-05, "loss": 1.0049, "step": 36670 }, { "epoch": 1.411934552454283, "grad_norm": 1.4974151849746704, "learning_rate": 3.979475242978454e-05, "loss": 0.7273, "step": 36675 }, { "epoch": 1.4121270452358037, "grad_norm": 1.543226718902588, "learning_rate": 3.97706101723297e-05, "loss": 0.8911, "step": 36680 }, { "epoch": 1.4123195380173243, "grad_norm": 1.6745073795318604, "learning_rate": 3.974647342245358e-05, "loss": 0.7785, "step": 36685 }, { "epoch": 1.412512030798845, "grad_norm": 0.8186720609664917, "learning_rate": 3.972234218236331e-05, "loss": 0.8083, "step": 36690 }, { "epoch": 1.4127045235803657, "grad_norm": 1.7771086692810059, "learning_rate": 3.969821645426559e-05, "loss": 0.6911, "step": 36695 }, { "epoch": 1.4128970163618864, "grad_norm": 1.0022178888320923, "learning_rate": 3.9674096240366445e-05, "loss": 0.9787, "step": 36700 }, { "epoch": 1.4130895091434073, "grad_norm": 0.9852539896965027, "learning_rate": 3.964998154287164e-05, "loss": 0.7145, "step": 36705 }, { "epoch": 1.413282001924928, "grad_norm": 1.1110121011734009, "learning_rate": 3.9625872363986205e-05, "loss": 0.7933, "step": 36710 }, { "epoch": 1.4134744947064486, "grad_norm": 1.6256468296051025, "learning_rate": 3.96017687059148e-05, "loss": 1.0778, "step": 36715 }, { "epoch": 1.4136669874879693, "grad_norm": 0.9198029041290283, "learning_rate": 3.957767057086158e-05, "loss": 0.6291, "step": 36720 }, { "epoch": 1.41385948026949, "grad_norm": 0.8681755065917969, "learning_rate": 3.9553577961030094e-05, "loss": 0.8225, "step": 36725 }, { "epoch": 1.4140519730510106, "grad_norm": 0.9288125038146973, "learning_rate": 3.952949087862349e-05, "loss": 0.7758, "step": 36730 }, { "epoch": 1.4142444658325313, "grad_norm": 1.3477758169174194, "learning_rate": 3.9505409325844346e-05, "loss": 0.8854, "step": 36735 }, { "epoch": 1.414436958614052, "grad_norm": 1.035380482673645, "learning_rate": 3.948133330489483e-05, "loss": 0.9035, "step": 36740 }, { "epoch": 1.4146294513955726, "grad_norm": 1.4395008087158203, "learning_rate": 3.9457262817976405e-05, "loss": 0.8584, "step": 36745 }, { "epoch": 1.4148219441770933, "grad_norm": 1.6281189918518066, "learning_rate": 3.943319786729031e-05, "loss": 0.8451, "step": 36750 }, { "epoch": 1.415014436958614, "grad_norm": 1.3493483066558838, "learning_rate": 3.940913845503701e-05, "loss": 0.9527, "step": 36755 }, { "epoch": 1.4152069297401346, "grad_norm": 1.200760841369629, "learning_rate": 3.938508458341663e-05, "loss": 0.812, "step": 36760 }, { "epoch": 1.4153994225216555, "grad_norm": 1.3952566385269165, "learning_rate": 3.9361036254628726e-05, "loss": 0.9364, "step": 36765 }, { "epoch": 1.4155919153031762, "grad_norm": 1.607851266860962, "learning_rate": 3.9336993470872385e-05, "loss": 0.8368, "step": 36770 }, { "epoch": 1.4157844080846969, "grad_norm": 1.7063467502593994, "learning_rate": 3.931295623434608e-05, "loss": 0.8981, "step": 36775 }, { "epoch": 1.4159769008662175, "grad_norm": 2.502572536468506, "learning_rate": 3.928892454724797e-05, "loss": 0.8994, "step": 36780 }, { "epoch": 1.4161693936477382, "grad_norm": 2.07956862449646, "learning_rate": 3.926489841177551e-05, "loss": 1.1487, "step": 36785 }, { "epoch": 1.4163618864292589, "grad_norm": 1.2496376037597656, "learning_rate": 3.924087783012575e-05, "loss": 0.7492, "step": 36790 }, { "epoch": 1.4165543792107795, "grad_norm": 1.3253594636917114, "learning_rate": 3.921686280449523e-05, "loss": 0.8016, "step": 36795 }, { "epoch": 1.4167468719923004, "grad_norm": 0.9799445271492004, "learning_rate": 3.919285333707998e-05, "loss": 0.7401, "step": 36800 }, { "epoch": 1.416939364773821, "grad_norm": 0.9721969366073608, "learning_rate": 3.916884943007541e-05, "loss": 0.6816, "step": 36805 }, { "epoch": 1.4171318575553418, "grad_norm": 0.9141532778739929, "learning_rate": 3.914485108567667e-05, "loss": 0.8165, "step": 36810 }, { "epoch": 1.4173243503368624, "grad_norm": 1.3266597986221313, "learning_rate": 3.912085830607813e-05, "loss": 0.8702, "step": 36815 }, { "epoch": 1.417516843118383, "grad_norm": 1.6577297449111938, "learning_rate": 3.909687109347382e-05, "loss": 0.7582, "step": 36820 }, { "epoch": 1.4177093358999038, "grad_norm": 1.3083659410476685, "learning_rate": 3.9072889450057194e-05, "loss": 0.7892, "step": 36825 }, { "epoch": 1.4179018286814244, "grad_norm": 1.1180342435836792, "learning_rate": 3.904891337802126e-05, "loss": 0.9944, "step": 36830 }, { "epoch": 1.4180943214629451, "grad_norm": 1.2931442260742188, "learning_rate": 3.902494287955839e-05, "loss": 0.769, "step": 36835 }, { "epoch": 1.4182868142444658, "grad_norm": 1.012529730796814, "learning_rate": 3.900097795686057e-05, "loss": 0.6058, "step": 36840 }, { "epoch": 1.4184793070259865, "grad_norm": 1.3702960014343262, "learning_rate": 3.897701861211924e-05, "loss": 0.8077, "step": 36845 }, { "epoch": 1.4186717998075071, "grad_norm": 1.4221330881118774, "learning_rate": 3.8953064847525324e-05, "loss": 0.7043, "step": 36850 }, { "epoch": 1.4188642925890278, "grad_norm": 1.2348641157150269, "learning_rate": 3.892911666526926e-05, "loss": 0.9082, "step": 36855 }, { "epoch": 1.4190567853705485, "grad_norm": 1.2621009349822998, "learning_rate": 3.890517406754085e-05, "loss": 0.8831, "step": 36860 }, { "epoch": 1.4192492781520694, "grad_norm": 1.2773290872573853, "learning_rate": 3.888123705652962e-05, "loss": 0.7864, "step": 36865 }, { "epoch": 1.41944177093359, "grad_norm": 1.2234888076782227, "learning_rate": 3.8857305634424356e-05, "loss": 0.7859, "step": 36870 }, { "epoch": 1.4196342637151107, "grad_norm": 1.1090830564498901, "learning_rate": 3.8833379803413463e-05, "loss": 0.8045, "step": 36875 }, { "epoch": 1.4198267564966314, "grad_norm": 1.4224649667739868, "learning_rate": 3.8809459565684784e-05, "loss": 0.8793, "step": 36880 }, { "epoch": 1.420019249278152, "grad_norm": 1.293678879737854, "learning_rate": 3.878554492342572e-05, "loss": 0.7928, "step": 36885 }, { "epoch": 1.4202117420596727, "grad_norm": 1.27838134765625, "learning_rate": 3.876163587882299e-05, "loss": 0.8078, "step": 36890 }, { "epoch": 1.4204042348411934, "grad_norm": 0.9022307991981506, "learning_rate": 3.873773243406306e-05, "loss": 0.8416, "step": 36895 }, { "epoch": 1.4205967276227143, "grad_norm": 1.2606749534606934, "learning_rate": 3.8713834591331646e-05, "loss": 0.816, "step": 36900 }, { "epoch": 1.420789220404235, "grad_norm": 1.7780064344406128, "learning_rate": 3.868994235281407e-05, "loss": 0.9733, "step": 36905 }, { "epoch": 1.4209817131857556, "grad_norm": 1.242295742034912, "learning_rate": 3.8666055720695126e-05, "loss": 0.6507, "step": 36910 }, { "epoch": 1.4211742059672763, "grad_norm": 1.5196983814239502, "learning_rate": 3.864217469715912e-05, "loss": 0.8056, "step": 36915 }, { "epoch": 1.421366698748797, "grad_norm": 0.9677949547767639, "learning_rate": 3.8618299284389747e-05, "loss": 0.8748, "step": 36920 }, { "epoch": 1.4215591915303176, "grad_norm": 0.9580836892127991, "learning_rate": 3.859442948457028e-05, "loss": 0.8544, "step": 36925 }, { "epoch": 1.4217516843118383, "grad_norm": 1.5484107732772827, "learning_rate": 3.857056529988347e-05, "loss": 0.8237, "step": 36930 }, { "epoch": 1.421944177093359, "grad_norm": 0.9140622019767761, "learning_rate": 3.854670673251153e-05, "loss": 0.6624, "step": 36935 }, { "epoch": 1.4221366698748796, "grad_norm": 1.0796998739242554, "learning_rate": 3.852285378463619e-05, "loss": 0.7723, "step": 36940 }, { "epoch": 1.4223291626564003, "grad_norm": 0.9047008752822876, "learning_rate": 3.84990064584386e-05, "loss": 0.787, "step": 36945 }, { "epoch": 1.422521655437921, "grad_norm": 2.0904667377471924, "learning_rate": 3.847516475609947e-05, "loss": 0.8196, "step": 36950 }, { "epoch": 1.4227141482194416, "grad_norm": 1.2479698657989502, "learning_rate": 3.845132867979895e-05, "loss": 0.8103, "step": 36955 }, { "epoch": 1.4229066410009625, "grad_norm": 1.6270309686660767, "learning_rate": 3.842749823171673e-05, "loss": 0.8852, "step": 36960 }, { "epoch": 1.4230991337824832, "grad_norm": 1.9030733108520508, "learning_rate": 3.840367341403185e-05, "loss": 0.9027, "step": 36965 }, { "epoch": 1.4232916265640039, "grad_norm": 1.098138689994812, "learning_rate": 3.837985422892307e-05, "loss": 0.8737, "step": 36970 }, { "epoch": 1.4234841193455245, "grad_norm": 1.9470241069793701, "learning_rate": 3.8356040678568393e-05, "loss": 0.7401, "step": 36975 }, { "epoch": 1.4236766121270452, "grad_norm": 1.122239589691162, "learning_rate": 3.833223276514544e-05, "loss": 0.873, "step": 36980 }, { "epoch": 1.4238691049085659, "grad_norm": 1.3224730491638184, "learning_rate": 3.830843049083128e-05, "loss": 0.7331, "step": 36985 }, { "epoch": 1.4240615976900866, "grad_norm": 1.4504386186599731, "learning_rate": 3.8284633857802524e-05, "loss": 0.7491, "step": 36990 }, { "epoch": 1.4242540904716074, "grad_norm": 1.9830818176269531, "learning_rate": 3.82608428682351e-05, "loss": 0.8795, "step": 36995 }, { "epoch": 1.4244465832531281, "grad_norm": 0.9479761719703674, "learning_rate": 3.823705752430469e-05, "loss": 0.972, "step": 37000 }, { "epoch": 1.4246390760346488, "grad_norm": 2.649916172027588, "learning_rate": 3.821327782818619e-05, "loss": 0.8017, "step": 37005 }, { "epoch": 1.4248315688161695, "grad_norm": 1.3527096509933472, "learning_rate": 3.8189503782054135e-05, "loss": 0.796, "step": 37010 }, { "epoch": 1.4250240615976901, "grad_norm": 1.6791696548461914, "learning_rate": 3.816573538808249e-05, "loss": 0.7578, "step": 37015 }, { "epoch": 1.4252165543792108, "grad_norm": 1.8477129936218262, "learning_rate": 3.814197264844478e-05, "loss": 0.7141, "step": 37020 }, { "epoch": 1.4254090471607315, "grad_norm": 1.5426512956619263, "learning_rate": 3.811821556531382e-05, "loss": 0.8485, "step": 37025 }, { "epoch": 1.4256015399422521, "grad_norm": 1.2095710039138794, "learning_rate": 3.809446414086218e-05, "loss": 0.8202, "step": 37030 }, { "epoch": 1.4257940327237728, "grad_norm": 1.1557666063308716, "learning_rate": 3.8070718377261696e-05, "loss": 0.7613, "step": 37035 }, { "epoch": 1.4259865255052935, "grad_norm": 1.4314980506896973, "learning_rate": 3.8046978276683756e-05, "loss": 0.8334, "step": 37040 }, { "epoch": 1.4261790182868141, "grad_norm": 0.7171056866645813, "learning_rate": 3.802324384129925e-05, "loss": 0.886, "step": 37045 }, { "epoch": 1.4263715110683348, "grad_norm": 2.3960771560668945, "learning_rate": 3.799951507327858e-05, "loss": 0.857, "step": 37050 }, { "epoch": 1.4265640038498557, "grad_norm": 1.7112245559692383, "learning_rate": 3.7975791974791494e-05, "loss": 0.7698, "step": 37055 }, { "epoch": 1.4267564966313764, "grad_norm": 1.118683934211731, "learning_rate": 3.795207454800737e-05, "loss": 0.6801, "step": 37060 }, { "epoch": 1.426948989412897, "grad_norm": 1.9905716180801392, "learning_rate": 3.7928362795095e-05, "loss": 0.793, "step": 37065 }, { "epoch": 1.4271414821944177, "grad_norm": 1.2646980285644531, "learning_rate": 3.790465671822265e-05, "loss": 0.8383, "step": 37070 }, { "epoch": 1.4273339749759384, "grad_norm": 1.1731975078582764, "learning_rate": 3.7880956319558146e-05, "loss": 0.7778, "step": 37075 }, { "epoch": 1.427526467757459, "grad_norm": 1.5719605684280396, "learning_rate": 3.785726160126864e-05, "loss": 0.8979, "step": 37080 }, { "epoch": 1.4277189605389797, "grad_norm": 1.1388436555862427, "learning_rate": 3.7833572565520924e-05, "loss": 0.8414, "step": 37085 }, { "epoch": 1.4279114533205004, "grad_norm": 1.2077659368515015, "learning_rate": 3.7809889214481165e-05, "loss": 0.8771, "step": 37090 }, { "epoch": 1.4281039461020213, "grad_norm": 1.342726469039917, "learning_rate": 3.778621155031511e-05, "loss": 0.724, "step": 37095 }, { "epoch": 1.428296438883542, "grad_norm": 0.8151179552078247, "learning_rate": 3.77625395751878e-05, "loss": 0.7926, "step": 37100 }, { "epoch": 1.4284889316650626, "grad_norm": 1.3730394840240479, "learning_rate": 3.773887329126404e-05, "loss": 0.9403, "step": 37105 }, { "epoch": 1.4286814244465833, "grad_norm": 1.6528422832489014, "learning_rate": 3.77152127007078e-05, "loss": 0.796, "step": 37110 }, { "epoch": 1.428873917228104, "grad_norm": 1.3892961740493774, "learning_rate": 3.769155780568283e-05, "loss": 0.8341, "step": 37115 }, { "epoch": 1.4290664100096246, "grad_norm": 0.6289898753166199, "learning_rate": 3.766790860835211e-05, "loss": 0.7489, "step": 37120 }, { "epoch": 1.4292589027911453, "grad_norm": 1.1175183057785034, "learning_rate": 3.764426511087823e-05, "loss": 0.9842, "step": 37125 }, { "epoch": 1.429451395572666, "grad_norm": 1.6422615051269531, "learning_rate": 3.762062731542324e-05, "loss": 0.8007, "step": 37130 }, { "epoch": 1.4296438883541867, "grad_norm": 2.0091776847839355, "learning_rate": 3.759699522414869e-05, "loss": 0.7446, "step": 37135 }, { "epoch": 1.4298363811357073, "grad_norm": 1.1759101152420044, "learning_rate": 3.757336883921552e-05, "loss": 0.7502, "step": 37140 }, { "epoch": 1.430028873917228, "grad_norm": 1.3325209617614746, "learning_rate": 3.754974816278422e-05, "loss": 0.7311, "step": 37145 }, { "epoch": 1.4302213666987487, "grad_norm": 0.9399605989456177, "learning_rate": 3.752613319701476e-05, "loss": 0.7261, "step": 37150 }, { "epoch": 1.4304138594802696, "grad_norm": 0.8700345754623413, "learning_rate": 3.750252394406656e-05, "loss": 0.7377, "step": 37155 }, { "epoch": 1.4306063522617902, "grad_norm": 1.1583389043807983, "learning_rate": 3.747892040609858e-05, "loss": 0.6968, "step": 37160 }, { "epoch": 1.430798845043311, "grad_norm": 0.9834827184677124, "learning_rate": 3.7455322585269125e-05, "loss": 0.8028, "step": 37165 }, { "epoch": 1.4309913378248316, "grad_norm": 1.4735242128372192, "learning_rate": 3.74317304837361e-05, "loss": 0.8179, "step": 37170 }, { "epoch": 1.4311838306063522, "grad_norm": 1.5621691942214966, "learning_rate": 3.740814410365685e-05, "loss": 0.8056, "step": 37175 }, { "epoch": 1.431376323387873, "grad_norm": 3.325570583343506, "learning_rate": 3.7384563447188226e-05, "loss": 0.9204, "step": 37180 }, { "epoch": 1.4315688161693936, "grad_norm": 0.9473647475242615, "learning_rate": 3.736098851648641e-05, "loss": 0.9339, "step": 37185 }, { "epoch": 1.4317613089509145, "grad_norm": 1.8556329011917114, "learning_rate": 3.733741931370731e-05, "loss": 0.8082, "step": 37190 }, { "epoch": 1.4319538017324351, "grad_norm": 1.158944845199585, "learning_rate": 3.73138558410061e-05, "loss": 0.8515, "step": 37195 }, { "epoch": 1.4321462945139558, "grad_norm": 1.7924774885177612, "learning_rate": 3.729029810053749e-05, "loss": 0.904, "step": 37200 }, { "epoch": 1.4323387872954765, "grad_norm": 1.753733515739441, "learning_rate": 3.726674609445572e-05, "loss": 0.9027, "step": 37205 }, { "epoch": 1.4325312800769971, "grad_norm": 1.4341967105865479, "learning_rate": 3.7243199824914484e-05, "loss": 1.0106, "step": 37210 }, { "epoch": 1.4327237728585178, "grad_norm": 1.0536222457885742, "learning_rate": 3.721965929406682e-05, "loss": 0.8197, "step": 37215 }, { "epoch": 1.4329162656400385, "grad_norm": 1.952141284942627, "learning_rate": 3.719612450406551e-05, "loss": 0.8157, "step": 37220 }, { "epoch": 1.4331087584215592, "grad_norm": 2.054227113723755, "learning_rate": 3.717259545706254e-05, "loss": 0.859, "step": 37225 }, { "epoch": 1.4333012512030798, "grad_norm": 0.9729071259498596, "learning_rate": 3.714907215520952e-05, "loss": 0.8556, "step": 37230 }, { "epoch": 1.4334937439846005, "grad_norm": 1.8992177248001099, "learning_rate": 3.712555460065751e-05, "loss": 0.97, "step": 37235 }, { "epoch": 1.4336862367661212, "grad_norm": 1.2505042552947998, "learning_rate": 3.710204279555705e-05, "loss": 0.8497, "step": 37240 }, { "epoch": 1.4338787295476418, "grad_norm": 1.4273964166641235, "learning_rate": 3.707853674205806e-05, "loss": 0.8451, "step": 37245 }, { "epoch": 1.4340712223291627, "grad_norm": 1.104282259941101, "learning_rate": 3.705503644231012e-05, "loss": 0.8491, "step": 37250 }, { "epoch": 1.4342637151106834, "grad_norm": 1.683293104171753, "learning_rate": 3.7031541898462096e-05, "loss": 0.8464, "step": 37255 }, { "epoch": 1.434456207892204, "grad_norm": 1.0647557973861694, "learning_rate": 3.700805311266244e-05, "loss": 0.7783, "step": 37260 }, { "epoch": 1.4346487006737247, "grad_norm": 1.2208470106124878, "learning_rate": 3.6984570087059045e-05, "loss": 0.7758, "step": 37265 }, { "epoch": 1.4348411934552454, "grad_norm": 1.9771546125411987, "learning_rate": 3.6961092823799314e-05, "loss": 0.9314, "step": 37270 }, { "epoch": 1.435033686236766, "grad_norm": 0.9171108603477478, "learning_rate": 3.6937621325030016e-05, "loss": 0.7365, "step": 37275 }, { "epoch": 1.4352261790182868, "grad_norm": 1.75246000289917, "learning_rate": 3.691415559289749e-05, "loss": 0.8452, "step": 37280 }, { "epoch": 1.4354186717998076, "grad_norm": 1.3954969644546509, "learning_rate": 3.6890695629547564e-05, "loss": 0.8532, "step": 37285 }, { "epoch": 1.4356111645813283, "grad_norm": 1.296316146850586, "learning_rate": 3.68672414371254e-05, "loss": 0.7224, "step": 37290 }, { "epoch": 1.435803657362849, "grad_norm": 1.0747795104980469, "learning_rate": 3.684379301777585e-05, "loss": 0.9024, "step": 37295 }, { "epoch": 1.4359961501443697, "grad_norm": 1.5699114799499512, "learning_rate": 3.682035037364303e-05, "loss": 0.9244, "step": 37300 }, { "epoch": 1.4361886429258903, "grad_norm": 2.6069118976593018, "learning_rate": 3.679691350687064e-05, "loss": 0.645, "step": 37305 }, { "epoch": 1.436381135707411, "grad_norm": 1.1996586322784424, "learning_rate": 3.6773482419601826e-05, "loss": 0.7952, "step": 37310 }, { "epoch": 1.4365736284889317, "grad_norm": 1.3153495788574219, "learning_rate": 3.675005711397924e-05, "loss": 0.9398, "step": 37315 }, { "epoch": 1.4367661212704523, "grad_norm": 1.404503583908081, "learning_rate": 3.672663759214487e-05, "loss": 0.845, "step": 37320 }, { "epoch": 1.436958614051973, "grad_norm": 0.9732365012168884, "learning_rate": 3.670322385624042e-05, "loss": 0.8258, "step": 37325 }, { "epoch": 1.4371511068334937, "grad_norm": 1.2123562097549438, "learning_rate": 3.667981590840681e-05, "loss": 0.8841, "step": 37330 }, { "epoch": 1.4373435996150143, "grad_norm": 2.156731367111206, "learning_rate": 3.6656413750784565e-05, "loss": 0.9473, "step": 37335 }, { "epoch": 1.437536092396535, "grad_norm": 1.4987213611602783, "learning_rate": 3.6633017385513676e-05, "loss": 0.7732, "step": 37340 }, { "epoch": 1.4377285851780557, "grad_norm": 1.517491102218628, "learning_rate": 3.660962681473362e-05, "loss": 0.8491, "step": 37345 }, { "epoch": 1.4379210779595766, "grad_norm": 1.0631245374679565, "learning_rate": 3.65862420405832e-05, "loss": 0.6431, "step": 37350 }, { "epoch": 1.4381135707410972, "grad_norm": 1.114632248878479, "learning_rate": 3.656286306520094e-05, "loss": 0.8981, "step": 37355 }, { "epoch": 1.438306063522618, "grad_norm": 2.538287401199341, "learning_rate": 3.6539489890724575e-05, "loss": 0.8646, "step": 37360 }, { "epoch": 1.4384985563041386, "grad_norm": 1.4105244874954224, "learning_rate": 3.651612251929147e-05, "loss": 0.9686, "step": 37365 }, { "epoch": 1.4386910490856593, "grad_norm": 1.3305569887161255, "learning_rate": 3.649276095303843e-05, "loss": 0.9057, "step": 37370 }, { "epoch": 1.43888354186718, "grad_norm": 1.292600154876709, "learning_rate": 3.646940519410169e-05, "loss": 0.8515, "step": 37375 }, { "epoch": 1.4390760346487006, "grad_norm": 1.3273907899856567, "learning_rate": 3.644605524461704e-05, "loss": 0.9926, "step": 37380 }, { "epoch": 1.4392685274302215, "grad_norm": 1.1350206136703491, "learning_rate": 3.6422711106719595e-05, "loss": 0.9411, "step": 37385 }, { "epoch": 1.4394610202117422, "grad_norm": 1.1308443546295166, "learning_rate": 3.639937278254406e-05, "loss": 0.7784, "step": 37390 }, { "epoch": 1.4396535129932628, "grad_norm": 1.2704259157180786, "learning_rate": 3.637604027422457e-05, "loss": 0.7807, "step": 37395 }, { "epoch": 1.4398460057747835, "grad_norm": 1.5792492628097534, "learning_rate": 3.6352713583894746e-05, "loss": 0.7525, "step": 37400 }, { "epoch": 1.4400384985563042, "grad_norm": 1.706715703010559, "learning_rate": 3.632939271368758e-05, "loss": 0.856, "step": 37405 }, { "epoch": 1.4402309913378248, "grad_norm": 1.0315080881118774, "learning_rate": 3.630607766573574e-05, "loss": 0.907, "step": 37410 }, { "epoch": 1.4404234841193455, "grad_norm": 1.4221409559249878, "learning_rate": 3.628276844217113e-05, "loss": 0.7504, "step": 37415 }, { "epoch": 1.4406159769008662, "grad_norm": 0.8785042762756348, "learning_rate": 3.6259465045125265e-05, "loss": 0.8181, "step": 37420 }, { "epoch": 1.4408084696823868, "grad_norm": 1.8334077596664429, "learning_rate": 3.623616747672907e-05, "loss": 0.8067, "step": 37425 }, { "epoch": 1.4410009624639075, "grad_norm": 0.9262105226516724, "learning_rate": 3.621287573911299e-05, "loss": 0.8618, "step": 37430 }, { "epoch": 1.4411934552454282, "grad_norm": 1.228710651397705, "learning_rate": 3.618958983440682e-05, "loss": 0.8697, "step": 37435 }, { "epoch": 1.4413859480269489, "grad_norm": 1.6522730588912964, "learning_rate": 3.616630976474003e-05, "loss": 0.9191, "step": 37440 }, { "epoch": 1.4415784408084698, "grad_norm": 1.1182198524475098, "learning_rate": 3.6143035532241296e-05, "loss": 0.818, "step": 37445 }, { "epoch": 1.4417709335899904, "grad_norm": 0.9378697872161865, "learning_rate": 3.611976713903895e-05, "loss": 0.8066, "step": 37450 }, { "epoch": 1.441963426371511, "grad_norm": 1.22636079788208, "learning_rate": 3.609650458726075e-05, "loss": 0.6601, "step": 37455 }, { "epoch": 1.4421559191530318, "grad_norm": 1.1185452938079834, "learning_rate": 3.60732478790339e-05, "loss": 0.7541, "step": 37460 }, { "epoch": 1.4423484119345524, "grad_norm": 0.9403701424598694, "learning_rate": 3.6049997016485024e-05, "loss": 0.7677, "step": 37465 }, { "epoch": 1.442540904716073, "grad_norm": 0.9934571981430054, "learning_rate": 3.602675200174031e-05, "loss": 0.8691, "step": 37470 }, { "epoch": 1.4427333974975938, "grad_norm": 1.63606858253479, "learning_rate": 3.600351283692531e-05, "loss": 0.7052, "step": 37475 }, { "epoch": 1.4429258902791147, "grad_norm": 1.6457669734954834, "learning_rate": 3.598027952416515e-05, "loss": 0.8816, "step": 37480 }, { "epoch": 1.4431183830606353, "grad_norm": 1.1504909992218018, "learning_rate": 3.595705206558435e-05, "loss": 0.8399, "step": 37485 }, { "epoch": 1.443310875842156, "grad_norm": 1.7006773948669434, "learning_rate": 3.593383046330687e-05, "loss": 0.8129, "step": 37490 }, { "epoch": 1.4435033686236767, "grad_norm": 0.8277364373207092, "learning_rate": 3.591061471945619e-05, "loss": 0.7758, "step": 37495 }, { "epoch": 1.4436958614051973, "grad_norm": 1.3335161209106445, "learning_rate": 3.5887404836155244e-05, "loss": 0.8057, "step": 37500 }, { "epoch": 1.443888354186718, "grad_norm": 1.1840698719024658, "learning_rate": 3.586420081552646e-05, "loss": 0.681, "step": 37505 }, { "epoch": 1.4440808469682387, "grad_norm": 1.2329528331756592, "learning_rate": 3.584100265969157e-05, "loss": 0.899, "step": 37510 }, { "epoch": 1.4442733397497594, "grad_norm": 1.9120075702667236, "learning_rate": 3.5817810370772046e-05, "loss": 0.8751, "step": 37515 }, { "epoch": 1.44446583253128, "grad_norm": 1.4686700105667114, "learning_rate": 3.579462395088856e-05, "loss": 0.9473, "step": 37520 }, { "epoch": 1.4446583253128007, "grad_norm": 0.9519988298416138, "learning_rate": 3.5771443402161396e-05, "loss": 0.782, "step": 37525 }, { "epoch": 1.4448508180943214, "grad_norm": 1.1774522066116333, "learning_rate": 3.5748268726710254e-05, "loss": 0.914, "step": 37530 }, { "epoch": 1.445043310875842, "grad_norm": 1.1277164220809937, "learning_rate": 3.5725099926654334e-05, "loss": 0.8537, "step": 37535 }, { "epoch": 1.445235803657363, "grad_norm": 1.815774917602539, "learning_rate": 3.570193700411219e-05, "loss": 0.7308, "step": 37540 }, { "epoch": 1.4454282964388836, "grad_norm": 1.1836425065994263, "learning_rate": 3.5678779961202035e-05, "loss": 0.7268, "step": 37545 }, { "epoch": 1.4456207892204043, "grad_norm": 0.964604914188385, "learning_rate": 3.565562880004133e-05, "loss": 0.689, "step": 37550 }, { "epoch": 1.445813282001925, "grad_norm": 1.3372588157653809, "learning_rate": 3.563248352274712e-05, "loss": 0.8704, "step": 37555 }, { "epoch": 1.4460057747834456, "grad_norm": 1.824810266494751, "learning_rate": 3.56093441314359e-05, "loss": 0.8523, "step": 37560 }, { "epoch": 1.4461982675649663, "grad_norm": 1.0827716588974, "learning_rate": 3.558621062822365e-05, "loss": 0.9851, "step": 37565 }, { "epoch": 1.446390760346487, "grad_norm": 1.1135975122451782, "learning_rate": 3.556308301522566e-05, "loss": 0.8532, "step": 37570 }, { "epoch": 1.4465832531280076, "grad_norm": 1.0616776943206787, "learning_rate": 3.553996129455694e-05, "loss": 0.6833, "step": 37575 }, { "epoch": 1.4467757459095285, "grad_norm": 0.9578964114189148, "learning_rate": 3.551684546833173e-05, "loss": 0.7893, "step": 37580 }, { "epoch": 1.4469682386910492, "grad_norm": 1.2795662879943848, "learning_rate": 3.549373553866383e-05, "loss": 0.8313, "step": 37585 }, { "epoch": 1.4471607314725699, "grad_norm": 0.9473350048065186, "learning_rate": 3.547063150766651e-05, "loss": 0.7281, "step": 37590 }, { "epoch": 1.4473532242540905, "grad_norm": 2.0121681690216064, "learning_rate": 3.544753337745249e-05, "loss": 0.9083, "step": 37595 }, { "epoch": 1.4475457170356112, "grad_norm": 1.5119409561157227, "learning_rate": 3.542444115013389e-05, "loss": 0.9774, "step": 37600 }, { "epoch": 1.4477382098171319, "grad_norm": 1.390446424484253, "learning_rate": 3.5401354827822386e-05, "loss": 0.8337, "step": 37605 }, { "epoch": 1.4479307025986525, "grad_norm": 2.891700267791748, "learning_rate": 3.537827441262904e-05, "loss": 0.7639, "step": 37610 }, { "epoch": 1.4481231953801732, "grad_norm": 0.9751268625259399, "learning_rate": 3.5355199906664426e-05, "loss": 0.7479, "step": 37615 }, { "epoch": 1.4483156881616939, "grad_norm": 1.9345892667770386, "learning_rate": 3.533213131203859e-05, "loss": 0.8192, "step": 37620 }, { "epoch": 1.4485081809432145, "grad_norm": 0.8891172409057617, "learning_rate": 3.5309068630860886e-05, "loss": 0.7572, "step": 37625 }, { "epoch": 1.4487006737247352, "grad_norm": 1.0947364568710327, "learning_rate": 3.528601186524038e-05, "loss": 0.8355, "step": 37630 }, { "epoch": 1.4488931665062559, "grad_norm": 1.883512020111084, "learning_rate": 3.5262961017285365e-05, "loss": 0.705, "step": 37635 }, { "epoch": 1.4490856592877768, "grad_norm": 1.5759938955307007, "learning_rate": 3.523991608910373e-05, "loss": 0.7667, "step": 37640 }, { "epoch": 1.4492781520692974, "grad_norm": 1.6118040084838867, "learning_rate": 3.521687708280277e-05, "loss": 0.799, "step": 37645 }, { "epoch": 1.4494706448508181, "grad_norm": 1.8567378520965576, "learning_rate": 3.5193844000489275e-05, "loss": 0.8733, "step": 37650 }, { "epoch": 1.4496631376323388, "grad_norm": 1.5645298957824707, "learning_rate": 3.5170816844269386e-05, "loss": 0.833, "step": 37655 }, { "epoch": 1.4498556304138595, "grad_norm": 1.3709477186203003, "learning_rate": 3.5147795616248916e-05, "loss": 0.8262, "step": 37660 }, { "epoch": 1.4500481231953801, "grad_norm": 1.078057885169983, "learning_rate": 3.512478031853288e-05, "loss": 0.7562, "step": 37665 }, { "epoch": 1.4502406159769008, "grad_norm": 2.0203640460968018, "learning_rate": 3.510177095322593e-05, "loss": 0.7648, "step": 37670 }, { "epoch": 1.4504331087584217, "grad_norm": 2.2243845462799072, "learning_rate": 3.5078767522432124e-05, "loss": 0.6971, "step": 37675 }, { "epoch": 1.4506256015399424, "grad_norm": 1.4907315969467163, "learning_rate": 3.5055770028254995e-05, "loss": 0.7486, "step": 37680 }, { "epoch": 1.450818094321463, "grad_norm": 1.167829155921936, "learning_rate": 3.503277847279745e-05, "loss": 1.0481, "step": 37685 }, { "epoch": 1.4510105871029837, "grad_norm": 2.3201534748077393, "learning_rate": 3.500979285816194e-05, "loss": 0.8633, "step": 37690 }, { "epoch": 1.4512030798845044, "grad_norm": 1.0884878635406494, "learning_rate": 3.498681318645035e-05, "loss": 0.7201, "step": 37695 }, { "epoch": 1.451395572666025, "grad_norm": 1.033949375152588, "learning_rate": 3.496383945976403e-05, "loss": 0.828, "step": 37700 }, { "epoch": 1.4515880654475457, "grad_norm": 1.7160718441009521, "learning_rate": 3.494087168020378e-05, "loss": 1.0329, "step": 37705 }, { "epoch": 1.4517805582290664, "grad_norm": 1.1474641561508179, "learning_rate": 3.491790984986982e-05, "loss": 0.7973, "step": 37710 }, { "epoch": 1.451973051010587, "grad_norm": 1.491860270500183, "learning_rate": 3.4894953970861875e-05, "loss": 0.8382, "step": 37715 }, { "epoch": 1.4521655437921077, "grad_norm": 1.2758644819259644, "learning_rate": 3.487200404527911e-05, "loss": 0.7288, "step": 37720 }, { "epoch": 1.4523580365736284, "grad_norm": 1.046302318572998, "learning_rate": 3.484906007522017e-05, "loss": 0.787, "step": 37725 }, { "epoch": 1.452550529355149, "grad_norm": 1.3662068843841553, "learning_rate": 3.4826122062783026e-05, "loss": 0.859, "step": 37730 }, { "epoch": 1.45274302213667, "grad_norm": 1.0735856294631958, "learning_rate": 3.480319001006536e-05, "loss": 0.882, "step": 37735 }, { "epoch": 1.4529355149181906, "grad_norm": 3.124530553817749, "learning_rate": 3.4780263919164046e-05, "loss": 1.0391, "step": 37740 }, { "epoch": 1.4531280076997113, "grad_norm": 1.3612589836120605, "learning_rate": 3.475734379217555e-05, "loss": 0.731, "step": 37745 }, { "epoch": 1.453320500481232, "grad_norm": 1.0555542707443237, "learning_rate": 3.473442963119576e-05, "loss": 0.9877, "step": 37750 }, { "epoch": 1.4535129932627526, "grad_norm": 1.4260815382003784, "learning_rate": 3.4711521438320094e-05, "loss": 0.8009, "step": 37755 }, { "epoch": 1.4537054860442733, "grad_norm": 1.2460854053497314, "learning_rate": 3.4688619215643215e-05, "loss": 0.8803, "step": 37760 }, { "epoch": 1.453897978825794, "grad_norm": 0.8238735198974609, "learning_rate": 3.4665722965259525e-05, "loss": 0.7875, "step": 37765 }, { "epoch": 1.4540904716073149, "grad_norm": 1.1429873704910278, "learning_rate": 3.464283268926264e-05, "loss": 0.7947, "step": 37770 }, { "epoch": 1.4542829643888355, "grad_norm": 1.3068310022354126, "learning_rate": 3.461994838974576e-05, "loss": 0.826, "step": 37775 }, { "epoch": 1.4544754571703562, "grad_norm": 1.26819908618927, "learning_rate": 3.4597070068801484e-05, "loss": 0.9455, "step": 37780 }, { "epoch": 1.4546679499518769, "grad_norm": 1.3983910083770752, "learning_rate": 3.457419772852194e-05, "loss": 0.8876, "step": 37785 }, { "epoch": 1.4548604427333975, "grad_norm": 1.6775517463684082, "learning_rate": 3.455133137099853e-05, "loss": 0.7722, "step": 37790 }, { "epoch": 1.4550529355149182, "grad_norm": 1.2544530630111694, "learning_rate": 3.452847099832238e-05, "loss": 0.8891, "step": 37795 }, { "epoch": 1.4552454282964389, "grad_norm": 0.9618650674819946, "learning_rate": 3.4505616612583824e-05, "loss": 0.7674, "step": 37800 }, { "epoch": 1.4554379210779596, "grad_norm": 1.694305658340454, "learning_rate": 3.448276821587275e-05, "loss": 0.8635, "step": 37805 }, { "epoch": 1.4556304138594802, "grad_norm": 1.1212447881698608, "learning_rate": 3.445992581027853e-05, "loss": 0.7715, "step": 37810 }, { "epoch": 1.455822906641001, "grad_norm": 1.3266422748565674, "learning_rate": 3.443708939788995e-05, "loss": 0.8759, "step": 37815 }, { "epoch": 1.4560153994225216, "grad_norm": 1.0106375217437744, "learning_rate": 3.44142589807952e-05, "loss": 0.639, "step": 37820 }, { "epoch": 1.4562078922040422, "grad_norm": 1.1478214263916016, "learning_rate": 3.4391434561082e-05, "loss": 0.9026, "step": 37825 }, { "epoch": 1.4564003849855631, "grad_norm": 1.7715827226638794, "learning_rate": 3.436861614083753e-05, "loss": 0.7835, "step": 37830 }, { "epoch": 1.4565928777670838, "grad_norm": 1.5840331315994263, "learning_rate": 3.434580372214829e-05, "loss": 0.9205, "step": 37835 }, { "epoch": 1.4567853705486045, "grad_norm": 1.0657918453216553, "learning_rate": 3.432299730710042e-05, "loss": 0.8256, "step": 37840 }, { "epoch": 1.4569778633301251, "grad_norm": 1.3026148080825806, "learning_rate": 3.430019689777936e-05, "loss": 0.8559, "step": 37845 }, { "epoch": 1.4571703561116458, "grad_norm": 1.7707239389419556, "learning_rate": 3.4277402496270075e-05, "loss": 0.8683, "step": 37850 }, { "epoch": 1.4573628488931665, "grad_norm": 1.134018063545227, "learning_rate": 3.4254614104656945e-05, "loss": 0.7637, "step": 37855 }, { "epoch": 1.4575553416746871, "grad_norm": 1.713789463043213, "learning_rate": 3.423183172502388e-05, "loss": 1.0235, "step": 37860 }, { "epoch": 1.4577478344562078, "grad_norm": 1.0064624547958374, "learning_rate": 3.420905535945406e-05, "loss": 0.8631, "step": 37865 }, { "epoch": 1.4579403272377287, "grad_norm": 1.2187049388885498, "learning_rate": 3.418628501003036e-05, "loss": 0.7412, "step": 37870 }, { "epoch": 1.4581328200192494, "grad_norm": 0.9734949469566345, "learning_rate": 3.416352067883489e-05, "loss": 0.7388, "step": 37875 }, { "epoch": 1.45832531280077, "grad_norm": 2.005460262298584, "learning_rate": 3.414076236794933e-05, "loss": 0.9135, "step": 37880 }, { "epoch": 1.4585178055822907, "grad_norm": 1.063448429107666, "learning_rate": 3.4118010079454775e-05, "loss": 0.8584, "step": 37885 }, { "epoch": 1.4587102983638114, "grad_norm": 1.4160773754119873, "learning_rate": 3.4095263815431765e-05, "loss": 0.871, "step": 37890 }, { "epoch": 1.458902791145332, "grad_norm": 2.008450508117676, "learning_rate": 3.40725235779603e-05, "loss": 0.886, "step": 37895 }, { "epoch": 1.4590952839268527, "grad_norm": 3.2115590572357178, "learning_rate": 3.4049789369119866e-05, "loss": 0.7679, "step": 37900 }, { "epoch": 1.4592877767083734, "grad_norm": 1.1200793981552124, "learning_rate": 3.402706119098927e-05, "loss": 0.7649, "step": 37905 }, { "epoch": 1.459480269489894, "grad_norm": 1.700742483139038, "learning_rate": 3.4004339045646906e-05, "loss": 0.7951, "step": 37910 }, { "epoch": 1.4596727622714147, "grad_norm": 1.5388984680175781, "learning_rate": 3.3981622935170554e-05, "loss": 0.6965, "step": 37915 }, { "epoch": 1.4598652550529354, "grad_norm": 1.1668038368225098, "learning_rate": 3.3958912861637446e-05, "loss": 0.807, "step": 37920 }, { "epoch": 1.460057747834456, "grad_norm": 1.614430546760559, "learning_rate": 3.3936208827124316e-05, "loss": 0.7649, "step": 37925 }, { "epoch": 1.460250240615977, "grad_norm": 1.4020650386810303, "learning_rate": 3.3913510833707215e-05, "loss": 0.8886, "step": 37930 }, { "epoch": 1.4604427333974976, "grad_norm": 1.0239553451538086, "learning_rate": 3.3890818883461774e-05, "loss": 0.8201, "step": 37935 }, { "epoch": 1.4606352261790183, "grad_norm": 1.2521649599075317, "learning_rate": 3.386813297846301e-05, "loss": 0.8144, "step": 37940 }, { "epoch": 1.460827718960539, "grad_norm": 1.6655640602111816, "learning_rate": 3.384545312078543e-05, "loss": 0.7052, "step": 37945 }, { "epoch": 1.4610202117420596, "grad_norm": 1.3284400701522827, "learning_rate": 3.382277931250287e-05, "loss": 0.7968, "step": 37950 }, { "epoch": 1.4612127045235803, "grad_norm": 1.4124058485031128, "learning_rate": 3.380011155568882e-05, "loss": 0.7928, "step": 37955 }, { "epoch": 1.461405197305101, "grad_norm": 1.6810020208358765, "learning_rate": 3.3777449852416e-05, "loss": 0.8213, "step": 37960 }, { "epoch": 1.4615976900866219, "grad_norm": 0.6957064867019653, "learning_rate": 3.375479420475671e-05, "loss": 0.6974, "step": 37965 }, { "epoch": 1.4617901828681426, "grad_norm": 1.094000220298767, "learning_rate": 3.3732144614782655e-05, "loss": 0.7123, "step": 37970 }, { "epoch": 1.4619826756496632, "grad_norm": 1.638156771659851, "learning_rate": 3.370950108456502e-05, "loss": 0.9115, "step": 37975 }, { "epoch": 1.462175168431184, "grad_norm": 1.41256582736969, "learning_rate": 3.368686361617431e-05, "loss": 0.7888, "step": 37980 }, { "epoch": 1.4623676612127046, "grad_norm": 1.2503211498260498, "learning_rate": 3.366423221168071e-05, "loss": 0.7508, "step": 37985 }, { "epoch": 1.4625601539942252, "grad_norm": 1.3764451742172241, "learning_rate": 3.3641606873153596e-05, "loss": 0.8037, "step": 37990 }, { "epoch": 1.462752646775746, "grad_norm": 0.9157146215438843, "learning_rate": 3.3618987602661966e-05, "loss": 0.7469, "step": 37995 }, { "epoch": 1.4629451395572666, "grad_norm": 1.5734952688217163, "learning_rate": 3.359637440227418e-05, "loss": 0.8036, "step": 38000 }, { "epoch": 1.4631376323387872, "grad_norm": 1.2893953323364258, "learning_rate": 3.357376727405809e-05, "loss": 0.7856, "step": 38005 }, { "epoch": 1.463330125120308, "grad_norm": 1.133774995803833, "learning_rate": 3.3551166220080896e-05, "loss": 0.8766, "step": 38010 }, { "epoch": 1.4635226179018286, "grad_norm": 1.4209762811660767, "learning_rate": 3.3528571242409435e-05, "loss": 0.8307, "step": 38015 }, { "epoch": 1.4637151106833493, "grad_norm": 1.2726255655288696, "learning_rate": 3.350598234310977e-05, "loss": 0.8027, "step": 38020 }, { "epoch": 1.4639076034648701, "grad_norm": 1.623399257659912, "learning_rate": 3.3483399524247525e-05, "loss": 0.8838, "step": 38025 }, { "epoch": 1.4641000962463908, "grad_norm": 1.2219460010528564, "learning_rate": 3.3460822787887805e-05, "loss": 1.0114, "step": 38030 }, { "epoch": 1.4642925890279115, "grad_norm": 0.8296359777450562, "learning_rate": 3.3438252136095016e-05, "loss": 0.7348, "step": 38035 }, { "epoch": 1.4644850818094322, "grad_norm": 1.6310055255889893, "learning_rate": 3.341568757093314e-05, "loss": 0.892, "step": 38040 }, { "epoch": 1.4646775745909528, "grad_norm": 0.8464129567146301, "learning_rate": 3.339312909446557e-05, "loss": 0.8913, "step": 38045 }, { "epoch": 1.4648700673724735, "grad_norm": 1.2375149726867676, "learning_rate": 3.337057670875513e-05, "loss": 0.6927, "step": 38050 }, { "epoch": 1.4650625601539942, "grad_norm": 2.361973285675049, "learning_rate": 3.334803041586402e-05, "loss": 0.7511, "step": 38055 }, { "epoch": 1.465255052935515, "grad_norm": 2.223909616470337, "learning_rate": 3.332999776976661e-05, "loss": 0.9986, "step": 38060 }, { "epoch": 1.4654475457170357, "grad_norm": 0.8594219088554382, "learning_rate": 3.330746244914553e-05, "loss": 0.6887, "step": 38065 }, { "epoch": 1.4656400384985564, "grad_norm": 1.6795843839645386, "learning_rate": 3.3284933227115236e-05, "loss": 0.7934, "step": 38070 }, { "epoch": 1.465832531280077, "grad_norm": 0.9740133881568909, "learning_rate": 3.3262410105735864e-05, "loss": 0.7993, "step": 38075 }, { "epoch": 1.4660250240615977, "grad_norm": 1.578036904335022, "learning_rate": 3.323989308706693e-05, "loss": 0.8793, "step": 38080 }, { "epoch": 1.4662175168431184, "grad_norm": 1.1103001832962036, "learning_rate": 3.32173821731676e-05, "loss": 0.8391, "step": 38085 }, { "epoch": 1.466410009624639, "grad_norm": 1.6756675243377686, "learning_rate": 3.3194877366096246e-05, "loss": 0.955, "step": 38090 }, { "epoch": 1.4666025024061597, "grad_norm": 1.257323980331421, "learning_rate": 3.3172378667910796e-05, "loss": 0.7475, "step": 38095 }, { "epoch": 1.4667949951876804, "grad_norm": 1.2056502103805542, "learning_rate": 3.314988608066867e-05, "loss": 0.8694, "step": 38100 }, { "epoch": 1.466987487969201, "grad_norm": 1.552820086479187, "learning_rate": 3.312739960642659e-05, "loss": 0.8827, "step": 38105 }, { "epoch": 1.4671799807507218, "grad_norm": 0.8146559596061707, "learning_rate": 3.310491924724082e-05, "loss": 0.592, "step": 38110 }, { "epoch": 1.4673724735322424, "grad_norm": 0.8254255056381226, "learning_rate": 3.3082445005167053e-05, "loss": 0.7371, "step": 38115 }, { "epoch": 1.467564966313763, "grad_norm": 1.7715586423873901, "learning_rate": 3.3059976882260424e-05, "loss": 0.7059, "step": 38120 }, { "epoch": 1.467757459095284, "grad_norm": 1.2485114336013794, "learning_rate": 3.303751488057541e-05, "loss": 0.7397, "step": 38125 }, { "epoch": 1.4679499518768047, "grad_norm": 1.529093623161316, "learning_rate": 3.301505900216614e-05, "loss": 0.8473, "step": 38130 }, { "epoch": 1.4681424446583253, "grad_norm": 1.5459060668945312, "learning_rate": 3.299260924908596e-05, "loss": 0.906, "step": 38135 }, { "epoch": 1.468334937439846, "grad_norm": 1.5474683046340942, "learning_rate": 3.2970165623387785e-05, "loss": 0.8314, "step": 38140 }, { "epoch": 1.4685274302213667, "grad_norm": 2.399840831756592, "learning_rate": 3.2947728127123924e-05, "loss": 0.8761, "step": 38145 }, { "epoch": 1.4687199230028873, "grad_norm": 0.8112540245056152, "learning_rate": 3.292529676234615e-05, "loss": 0.7489, "step": 38150 }, { "epoch": 1.468912415784408, "grad_norm": 1.178676962852478, "learning_rate": 3.290287153110565e-05, "loss": 0.8148, "step": 38155 }, { "epoch": 1.469104908565929, "grad_norm": 1.379562258720398, "learning_rate": 3.28804524354531e-05, "loss": 0.9524, "step": 38160 }, { "epoch": 1.4692974013474496, "grad_norm": 2.1788909435272217, "learning_rate": 3.28580394774385e-05, "loss": 0.7175, "step": 38165 }, { "epoch": 1.4694898941289702, "grad_norm": 1.2615634202957153, "learning_rate": 3.283563265911147e-05, "loss": 1.0649, "step": 38170 }, { "epoch": 1.469682386910491, "grad_norm": 1.2906595468521118, "learning_rate": 3.281323198252087e-05, "loss": 0.6275, "step": 38175 }, { "epoch": 1.4698748796920116, "grad_norm": 0.8177803158760071, "learning_rate": 3.279083744971515e-05, "loss": 0.786, "step": 38180 }, { "epoch": 1.4700673724735323, "grad_norm": 1.317015290260315, "learning_rate": 3.276844906274211e-05, "loss": 0.8074, "step": 38185 }, { "epoch": 1.470259865255053, "grad_norm": 1.0974969863891602, "learning_rate": 3.274606682364908e-05, "loss": 0.851, "step": 38190 }, { "epoch": 1.4704523580365736, "grad_norm": 0.8745707273483276, "learning_rate": 3.272369073448269e-05, "loss": 0.9159, "step": 38195 }, { "epoch": 1.4706448508180943, "grad_norm": 1.3758927583694458, "learning_rate": 3.2701320797289114e-05, "loss": 0.7093, "step": 38200 }, { "epoch": 1.470837343599615, "grad_norm": 1.6409497261047363, "learning_rate": 3.267895701411393e-05, "loss": 0.7493, "step": 38205 }, { "epoch": 1.4710298363811356, "grad_norm": 1.0862737894058228, "learning_rate": 3.2656599387002176e-05, "loss": 0.9189, "step": 38210 }, { "epoch": 1.4712223291626563, "grad_norm": 0.9804477095603943, "learning_rate": 3.263424791799833e-05, "loss": 0.8635, "step": 38215 }, { "epoch": 1.4714148219441772, "grad_norm": 1.2791975736618042, "learning_rate": 3.2611902609146215e-05, "loss": 0.8745, "step": 38220 }, { "epoch": 1.4716073147256978, "grad_norm": 1.2094603776931763, "learning_rate": 3.2589563462489214e-05, "loss": 0.8492, "step": 38225 }, { "epoch": 1.4717998075072185, "grad_norm": 0.9160768389701843, "learning_rate": 3.256723048007006e-05, "loss": 0.8906, "step": 38230 }, { "epoch": 1.4719923002887392, "grad_norm": 2.3353841304779053, "learning_rate": 3.254490366393104e-05, "loss": 0.8709, "step": 38235 }, { "epoch": 1.4721847930702598, "grad_norm": 1.8757665157318115, "learning_rate": 3.2522583016113636e-05, "loss": 0.6895, "step": 38240 }, { "epoch": 1.4723772858517805, "grad_norm": 1.9597855806350708, "learning_rate": 3.250026853865911e-05, "loss": 0.7915, "step": 38245 }, { "epoch": 1.4725697786333012, "grad_norm": 1.1252086162567139, "learning_rate": 3.247796023360783e-05, "loss": 0.8083, "step": 38250 }, { "epoch": 1.472762271414822, "grad_norm": 1.7507824897766113, "learning_rate": 3.2455658102999796e-05, "loss": 0.8343, "step": 38255 }, { "epoch": 1.4729547641963427, "grad_norm": 0.8124133944511414, "learning_rate": 3.243336214887439e-05, "loss": 0.7325, "step": 38260 }, { "epoch": 1.4731472569778634, "grad_norm": 0.9057835936546326, "learning_rate": 3.241107237327047e-05, "loss": 0.8326, "step": 38265 }, { "epoch": 1.473339749759384, "grad_norm": 2.141209602355957, "learning_rate": 3.238878877822616e-05, "loss": 0.8098, "step": 38270 }, { "epoch": 1.4735322425409048, "grad_norm": 2.2603015899658203, "learning_rate": 3.236651136577932e-05, "loss": 0.7639, "step": 38275 }, { "epoch": 1.4737247353224254, "grad_norm": 1.785561203956604, "learning_rate": 3.234424013796694e-05, "loss": 0.9348, "step": 38280 }, { "epoch": 1.473917228103946, "grad_norm": 1.9484220743179321, "learning_rate": 3.232197509682562e-05, "loss": 1.0465, "step": 38285 }, { "epoch": 1.4741097208854668, "grad_norm": 1.0257092714309692, "learning_rate": 3.2299716244391356e-05, "loss": 0.7771, "step": 38290 }, { "epoch": 1.4743022136669874, "grad_norm": 0.8890891075134277, "learning_rate": 3.2277463582699595e-05, "loss": 0.7485, "step": 38295 }, { "epoch": 1.474494706448508, "grad_norm": 1.7376580238342285, "learning_rate": 3.225521711378514e-05, "loss": 0.7943, "step": 38300 }, { "epoch": 1.4746871992300288, "grad_norm": 0.7426727414131165, "learning_rate": 3.2232976839682316e-05, "loss": 0.7996, "step": 38305 }, { "epoch": 1.4748796920115494, "grad_norm": 1.1797425746917725, "learning_rate": 3.221074276242484e-05, "loss": 0.7686, "step": 38310 }, { "epoch": 1.4750721847930703, "grad_norm": 1.5532947778701782, "learning_rate": 3.2188514884045885e-05, "loss": 0.6781, "step": 38315 }, { "epoch": 1.475264677574591, "grad_norm": 1.4531184434890747, "learning_rate": 3.216629320657806e-05, "loss": 0.9722, "step": 38320 }, { "epoch": 1.4754571703561117, "grad_norm": 1.0439348220825195, "learning_rate": 3.214407773205333e-05, "loss": 0.9018, "step": 38325 }, { "epoch": 1.4756496631376324, "grad_norm": 1.6608530282974243, "learning_rate": 3.212186846250318e-05, "loss": 0.8168, "step": 38330 }, { "epoch": 1.475842155919153, "grad_norm": 1.7269642353057861, "learning_rate": 3.209966539995851e-05, "loss": 0.9625, "step": 38335 }, { "epoch": 1.4760346487006737, "grad_norm": 1.6941852569580078, "learning_rate": 3.2077468546449684e-05, "loss": 0.8007, "step": 38340 }, { "epoch": 1.4762271414821944, "grad_norm": 1.0302637815475464, "learning_rate": 3.205527790400634e-05, "loss": 0.9428, "step": 38345 }, { "epoch": 1.476419634263715, "grad_norm": 1.4871275424957275, "learning_rate": 3.2033093474657806e-05, "loss": 0.9217, "step": 38350 }, { "epoch": 1.476612127045236, "grad_norm": 1.700111746788025, "learning_rate": 3.201091526043261e-05, "loss": 0.791, "step": 38355 }, { "epoch": 1.4768046198267566, "grad_norm": 0.7985191345214844, "learning_rate": 3.198874326335881e-05, "loss": 0.6741, "step": 38360 }, { "epoch": 1.4769971126082773, "grad_norm": 1.5285080671310425, "learning_rate": 3.1966577485463913e-05, "loss": 0.8607, "step": 38365 }, { "epoch": 1.477189605389798, "grad_norm": 1.746016263961792, "learning_rate": 3.1944417928774864e-05, "loss": 0.71, "step": 38370 }, { "epoch": 1.4773820981713186, "grad_norm": 2.4140141010284424, "learning_rate": 3.1922264595317895e-05, "loss": 0.9232, "step": 38375 }, { "epoch": 1.4775745909528393, "grad_norm": 1.8177671432495117, "learning_rate": 3.190011748711892e-05, "loss": 0.7186, "step": 38380 }, { "epoch": 1.47776708373436, "grad_norm": 1.3410675525665283, "learning_rate": 3.187797660620305e-05, "loss": 0.8414, "step": 38385 }, { "epoch": 1.4779595765158806, "grad_norm": 0.9143676161766052, "learning_rate": 3.185584195459496e-05, "loss": 0.7884, "step": 38390 }, { "epoch": 1.4781520692974013, "grad_norm": 1.212738037109375, "learning_rate": 3.18337135343187e-05, "loss": 0.7888, "step": 38395 }, { "epoch": 1.478344562078922, "grad_norm": 1.165589690208435, "learning_rate": 3.181159134739777e-05, "loss": 0.8875, "step": 38400 }, { "epoch": 1.4785370548604426, "grad_norm": 1.0738500356674194, "learning_rate": 3.178947539585512e-05, "loss": 0.7586, "step": 38405 }, { "epoch": 1.4787295476419633, "grad_norm": 1.069633960723877, "learning_rate": 3.1767365681713123e-05, "loss": 0.9111, "step": 38410 }, { "epoch": 1.4789220404234842, "grad_norm": 1.1516032218933105, "learning_rate": 3.17452622069935e-05, "loss": 0.9261, "step": 38415 }, { "epoch": 1.4791145332050049, "grad_norm": 1.2986743450164795, "learning_rate": 3.172316497371749e-05, "loss": 0.8498, "step": 38420 }, { "epoch": 1.4793070259865255, "grad_norm": 1.1140365600585938, "learning_rate": 3.170107398390576e-05, "loss": 0.8164, "step": 38425 }, { "epoch": 1.4794995187680462, "grad_norm": 0.8522436022758484, "learning_rate": 3.167898923957838e-05, "loss": 0.7513, "step": 38430 }, { "epoch": 1.4796920115495669, "grad_norm": 1.4208568334579468, "learning_rate": 3.1656910742754876e-05, "loss": 0.8429, "step": 38435 }, { "epoch": 1.4798845043310875, "grad_norm": 1.4376440048217773, "learning_rate": 3.163483849545412e-05, "loss": 0.7375, "step": 38440 }, { "epoch": 1.4800769971126082, "grad_norm": 1.1318036317825317, "learning_rate": 3.161277249969451e-05, "loss": 0.8024, "step": 38445 }, { "epoch": 1.480269489894129, "grad_norm": 1.1824818849563599, "learning_rate": 3.159071275749382e-05, "loss": 0.7278, "step": 38450 }, { "epoch": 1.4804619826756498, "grad_norm": 1.4010226726531982, "learning_rate": 3.1568659270869315e-05, "loss": 0.7994, "step": 38455 }, { "epoch": 1.4806544754571704, "grad_norm": 1.4055135250091553, "learning_rate": 3.154661204183755e-05, "loss": 0.8096, "step": 38460 }, { "epoch": 1.480846968238691, "grad_norm": 1.6588884592056274, "learning_rate": 3.152457107241471e-05, "loss": 0.858, "step": 38465 }, { "epoch": 1.4810394610202118, "grad_norm": 0.7389982342720032, "learning_rate": 3.15025363646162e-05, "loss": 0.8122, "step": 38470 }, { "epoch": 1.4812319538017324, "grad_norm": 1.2149773836135864, "learning_rate": 3.148050792045699e-05, "loss": 0.8371, "step": 38475 }, { "epoch": 1.4814244465832531, "grad_norm": 0.945489227771759, "learning_rate": 3.1458485741951425e-05, "loss": 0.9891, "step": 38480 }, { "epoch": 1.4816169393647738, "grad_norm": 1.013566255569458, "learning_rate": 3.1436469831113334e-05, "loss": 0.8877, "step": 38485 }, { "epoch": 1.4818094321462945, "grad_norm": 1.1482200622558594, "learning_rate": 3.1414460189955805e-05, "loss": 0.9323, "step": 38490 }, { "epoch": 1.4820019249278151, "grad_norm": 1.5675556659698486, "learning_rate": 3.139245682049163e-05, "loss": 0.7917, "step": 38495 }, { "epoch": 1.4821944177093358, "grad_norm": 1.299532413482666, "learning_rate": 3.137045972473277e-05, "loss": 0.7983, "step": 38500 }, { "epoch": 1.4823869104908565, "grad_norm": 1.051574945449829, "learning_rate": 3.134846890469073e-05, "loss": 0.7102, "step": 38505 }, { "epoch": 1.4825794032723774, "grad_norm": 1.790773868560791, "learning_rate": 3.132648436237644e-05, "loss": 0.8816, "step": 38510 }, { "epoch": 1.482771896053898, "grad_norm": 1.0724399089813232, "learning_rate": 3.1304506099800256e-05, "loss": 0.9583, "step": 38515 }, { "epoch": 1.4829643888354187, "grad_norm": 2.1715545654296875, "learning_rate": 3.1282534118971893e-05, "loss": 0.907, "step": 38520 }, { "epoch": 1.4831568816169394, "grad_norm": 1.1639543771743774, "learning_rate": 3.126056842190058e-05, "loss": 0.8888, "step": 38525 }, { "epoch": 1.48334937439846, "grad_norm": 1.2022185325622559, "learning_rate": 3.123860901059493e-05, "loss": 0.7886, "step": 38530 }, { "epoch": 1.4835418671799807, "grad_norm": 0.8449472188949585, "learning_rate": 3.121665588706297e-05, "loss": 0.7561, "step": 38535 }, { "epoch": 1.4837343599615014, "grad_norm": 1.3795840740203857, "learning_rate": 3.119470905331223e-05, "loss": 0.9203, "step": 38540 }, { "epoch": 1.4839268527430223, "grad_norm": 1.0770927667617798, "learning_rate": 3.1172768511349514e-05, "loss": 0.9815, "step": 38545 }, { "epoch": 1.484119345524543, "grad_norm": 1.2355643510818481, "learning_rate": 3.1150834263181187e-05, "loss": 0.7474, "step": 38550 }, { "epoch": 1.4843118383060636, "grad_norm": 1.1445075273513794, "learning_rate": 3.112890631081297e-05, "loss": 1.0919, "step": 38555 }, { "epoch": 1.4845043310875843, "grad_norm": 1.6212882995605469, "learning_rate": 3.1106984656250074e-05, "loss": 0.7342, "step": 38560 }, { "epoch": 1.484696823869105, "grad_norm": 1.2851066589355469, "learning_rate": 3.1085069301496993e-05, "loss": 0.7242, "step": 38565 }, { "epoch": 1.4848893166506256, "grad_norm": 1.7971413135528564, "learning_rate": 3.1063160248557876e-05, "loss": 0.7274, "step": 38570 }, { "epoch": 1.4850818094321463, "grad_norm": 0.9864533543586731, "learning_rate": 3.104125749943605e-05, "loss": 0.7071, "step": 38575 }, { "epoch": 1.485274302213667, "grad_norm": 1.3613252639770508, "learning_rate": 3.101936105613442e-05, "loss": 0.8422, "step": 38580 }, { "epoch": 1.4854667949951876, "grad_norm": 1.0878537893295288, "learning_rate": 3.099747092065527e-05, "loss": 0.8569, "step": 38585 }, { "epoch": 1.4856592877767083, "grad_norm": 2.1633172035217285, "learning_rate": 3.0975587095000335e-05, "loss": 0.904, "step": 38590 }, { "epoch": 1.485851780558229, "grad_norm": 1.3097665309906006, "learning_rate": 3.095370958117064e-05, "loss": 0.8018, "step": 38595 }, { "epoch": 1.4860442733397496, "grad_norm": 1.019307017326355, "learning_rate": 3.093183838116689e-05, "loss": 0.8721, "step": 38600 }, { "epoch": 1.4862367661212703, "grad_norm": 1.0038857460021973, "learning_rate": 3.090997349698895e-05, "loss": 0.7122, "step": 38605 }, { "epoch": 1.4864292589027912, "grad_norm": 1.9515637159347534, "learning_rate": 3.0888114930636256e-05, "loss": 0.7896, "step": 38610 }, { "epoch": 1.4866217516843119, "grad_norm": 1.1651268005371094, "learning_rate": 3.0866262684107626e-05, "loss": 0.7926, "step": 38615 }, { "epoch": 1.4868142444658325, "grad_norm": 1.1827764511108398, "learning_rate": 3.084441675940134e-05, "loss": 0.8771, "step": 38620 }, { "epoch": 1.4870067372473532, "grad_norm": 1.2885782718658447, "learning_rate": 3.0822577158514954e-05, "loss": 0.8086, "step": 38625 }, { "epoch": 1.4871992300288739, "grad_norm": 1.6408069133758545, "learning_rate": 3.0800743883445703e-05, "loss": 0.8421, "step": 38630 }, { "epoch": 1.4873917228103946, "grad_norm": 1.5079654455184937, "learning_rate": 3.077891693618998e-05, "loss": 0.951, "step": 38635 }, { "epoch": 1.4875842155919152, "grad_norm": 1.0820412635803223, "learning_rate": 3.075709631874376e-05, "loss": 0.9189, "step": 38640 }, { "epoch": 1.4877767083734361, "grad_norm": 1.5331305265426636, "learning_rate": 3.073528203310242e-05, "loss": 0.8078, "step": 38645 }, { "epoch": 1.4879692011549568, "grad_norm": 1.087971568107605, "learning_rate": 3.0713474081260674e-05, "loss": 0.9791, "step": 38650 }, { "epoch": 1.4881616939364775, "grad_norm": 1.125899076461792, "learning_rate": 3.069167246521273e-05, "loss": 0.855, "step": 38655 }, { "epoch": 1.4883541867179981, "grad_norm": 1.5379077196121216, "learning_rate": 3.0669877186952226e-05, "loss": 0.8884, "step": 38660 }, { "epoch": 1.4885466794995188, "grad_norm": 1.4419198036193848, "learning_rate": 3.064808824847217e-05, "loss": 0.9177, "step": 38665 }, { "epoch": 1.4887391722810395, "grad_norm": 1.242751955986023, "learning_rate": 3.062630565176504e-05, "loss": 0.7661, "step": 38670 }, { "epoch": 1.4889316650625601, "grad_norm": 0.9697130918502808, "learning_rate": 3.060452939882273e-05, "loss": 0.7302, "step": 38675 }, { "epoch": 1.4891241578440808, "grad_norm": 1.1777362823486328, "learning_rate": 3.0582759491636445e-05, "loss": 0.8163, "step": 38680 }, { "epoch": 1.4893166506256015, "grad_norm": 1.943617582321167, "learning_rate": 3.0560995932197015e-05, "loss": 0.8965, "step": 38685 }, { "epoch": 1.4895091434071221, "grad_norm": 1.0683708190917969, "learning_rate": 3.053923872249448e-05, "loss": 0.9112, "step": 38690 }, { "epoch": 1.4897016361886428, "grad_norm": 1.1417663097381592, "learning_rate": 3.0517487864518436e-05, "loss": 0.8912, "step": 38695 }, { "epoch": 1.4898941289701635, "grad_norm": 1.4740487337112427, "learning_rate": 3.0495743360257845e-05, "loss": 0.8111, "step": 38700 }, { "epoch": 1.4900866217516844, "grad_norm": 2.3702943325042725, "learning_rate": 3.047400521170113e-05, "loss": 0.9583, "step": 38705 }, { "epoch": 1.490279114533205, "grad_norm": 1.1470707654953003, "learning_rate": 3.0452273420836007e-05, "loss": 0.9308, "step": 38710 }, { "epoch": 1.4904716073147257, "grad_norm": 1.7966952323913574, "learning_rate": 3.0430547989649827e-05, "loss": 0.9539, "step": 38715 }, { "epoch": 1.4906641000962464, "grad_norm": 1.2520999908447266, "learning_rate": 3.0408828920129152e-05, "loss": 0.9079, "step": 38720 }, { "epoch": 1.490856592877767, "grad_norm": 0.930090606212616, "learning_rate": 3.038711621426007e-05, "loss": 0.7571, "step": 38725 }, { "epoch": 1.4910490856592877, "grad_norm": 1.0837591886520386, "learning_rate": 3.0365409874028074e-05, "loss": 0.756, "step": 38730 }, { "epoch": 1.4912415784408084, "grad_norm": 1.3901774883270264, "learning_rate": 3.0343709901418084e-05, "loss": 0.8403, "step": 38735 }, { "epoch": 1.4914340712223293, "grad_norm": 1.2014509439468384, "learning_rate": 3.032201629841437e-05, "loss": 0.9118, "step": 38740 }, { "epoch": 1.49162656400385, "grad_norm": 1.7022885084152222, "learning_rate": 3.0300329067000677e-05, "loss": 0.7848, "step": 38745 }, { "epoch": 1.4918190567853706, "grad_norm": 1.2013940811157227, "learning_rate": 3.0278648209160176e-05, "loss": 0.8609, "step": 38750 }, { "epoch": 1.4920115495668913, "grad_norm": 1.0092146396636963, "learning_rate": 3.0256973726875436e-05, "loss": 0.8774, "step": 38755 }, { "epoch": 1.492204042348412, "grad_norm": 1.953743577003479, "learning_rate": 3.0235305622128483e-05, "loss": 0.8166, "step": 38760 }, { "epoch": 1.4923965351299326, "grad_norm": 2.1745972633361816, "learning_rate": 3.0213643896900646e-05, "loss": 0.7682, "step": 38765 }, { "epoch": 1.4925890279114533, "grad_norm": 1.1667450666427612, "learning_rate": 3.0191988553172778e-05, "loss": 0.8615, "step": 38770 }, { "epoch": 1.492781520692974, "grad_norm": 1.3305031061172485, "learning_rate": 3.0170339592925125e-05, "loss": 0.8538, "step": 38775 }, { "epoch": 1.4929740134744947, "grad_norm": 1.1499173641204834, "learning_rate": 3.0148697018137374e-05, "loss": 0.8741, "step": 38780 }, { "epoch": 1.4931665062560153, "grad_norm": 1.1782283782958984, "learning_rate": 3.0127060830788477e-05, "loss": 0.7251, "step": 38785 }, { "epoch": 1.493358999037536, "grad_norm": 0.9500432014465332, "learning_rate": 3.010543103285708e-05, "loss": 0.8529, "step": 38790 }, { "epoch": 1.4935514918190567, "grad_norm": 1.2924103736877441, "learning_rate": 3.008380762632097e-05, "loss": 0.8166, "step": 38795 }, { "epoch": 1.4937439846005776, "grad_norm": 1.125877022743225, "learning_rate": 3.0062190613157505e-05, "loss": 0.8048, "step": 38800 }, { "epoch": 1.4939364773820982, "grad_norm": 1.3560032844543457, "learning_rate": 3.00405799953434e-05, "loss": 0.8974, "step": 38805 }, { "epoch": 1.494128970163619, "grad_norm": 1.8842239379882812, "learning_rate": 3.0018975774854864e-05, "loss": 0.8511, "step": 38810 }, { "epoch": 1.4943214629451396, "grad_norm": 1.4684277772903442, "learning_rate": 2.9997377953667337e-05, "loss": 0.7795, "step": 38815 }, { "epoch": 1.4945139557266602, "grad_norm": 1.0859155654907227, "learning_rate": 2.9975786533755935e-05, "loss": 0.8679, "step": 38820 }, { "epoch": 1.494706448508181, "grad_norm": 1.0024845600128174, "learning_rate": 2.995420151709496e-05, "loss": 0.99, "step": 38825 }, { "epoch": 1.4948989412897016, "grad_norm": 1.1407462358474731, "learning_rate": 2.9932622905658238e-05, "loss": 0.9279, "step": 38830 }, { "epoch": 1.4950914340712222, "grad_norm": 1.0193146467208862, "learning_rate": 2.9911050701419007e-05, "loss": 0.7641, "step": 38835 }, { "epoch": 1.4952839268527431, "grad_norm": 0.9654618501663208, "learning_rate": 2.9889484906349918e-05, "loss": 0.745, "step": 38840 }, { "epoch": 1.4954764196342638, "grad_norm": 0.9131496548652649, "learning_rate": 2.9867925522422957e-05, "loss": 0.8413, "step": 38845 }, { "epoch": 1.4956689124157845, "grad_norm": 1.6795357465744019, "learning_rate": 2.9846372551609637e-05, "loss": 0.8643, "step": 38850 }, { "epoch": 1.4958614051973051, "grad_norm": 0.6970282793045044, "learning_rate": 2.982482599588081e-05, "loss": 0.671, "step": 38855 }, { "epoch": 1.4960538979788258, "grad_norm": 2.4015212059020996, "learning_rate": 2.9803285857206787e-05, "loss": 1.0025, "step": 38860 }, { "epoch": 1.4962463907603465, "grad_norm": 0.9597976207733154, "learning_rate": 2.9781752137557296e-05, "loss": 0.8127, "step": 38865 }, { "epoch": 1.4964388835418672, "grad_norm": 1.1318256855010986, "learning_rate": 2.9760224838901372e-05, "loss": 0.7509, "step": 38870 }, { "epoch": 1.4966313763233878, "grad_norm": 1.4549952745437622, "learning_rate": 2.9738703963207604e-05, "loss": 0.8471, "step": 38875 }, { "epoch": 1.4968238691049085, "grad_norm": 1.4533936977386475, "learning_rate": 2.9717189512443922e-05, "loss": 0.9046, "step": 38880 }, { "epoch": 1.4970163618864292, "grad_norm": 1.5385301113128662, "learning_rate": 2.9695681488577697e-05, "loss": 0.8941, "step": 38885 }, { "epoch": 1.4972088546679498, "grad_norm": 1.1736408472061157, "learning_rate": 2.9674179893575626e-05, "loss": 0.7051, "step": 38890 }, { "epoch": 1.4974013474494705, "grad_norm": 1.6953740119934082, "learning_rate": 2.965268472940399e-05, "loss": 0.8457, "step": 38895 }, { "epoch": 1.4975938402309914, "grad_norm": 0.6899955868721008, "learning_rate": 2.9631195998028305e-05, "loss": 0.7057, "step": 38900 }, { "epoch": 1.497786333012512, "grad_norm": 1.721205472946167, "learning_rate": 2.960971370141359e-05, "loss": 0.9267, "step": 38905 }, { "epoch": 1.4979788257940327, "grad_norm": 1.408977746963501, "learning_rate": 2.958823784152426e-05, "loss": 0.6861, "step": 38910 }, { "epoch": 1.4981713185755534, "grad_norm": 1.3313335180282593, "learning_rate": 2.9566768420324188e-05, "loss": 0.8315, "step": 38915 }, { "epoch": 1.498363811357074, "grad_norm": 1.7257198095321655, "learning_rate": 2.9545305439776494e-05, "loss": 0.7739, "step": 38920 }, { "epoch": 1.4985563041385948, "grad_norm": 1.2223293781280518, "learning_rate": 2.9523848901843953e-05, "loss": 0.8514, "step": 38925 }, { "epoch": 1.4987487969201154, "grad_norm": 0.9924798607826233, "learning_rate": 2.950239880848852e-05, "loss": 0.8442, "step": 38930 }, { "epoch": 1.4989412897016363, "grad_norm": 1.178801417350769, "learning_rate": 2.9480955161671774e-05, "loss": 0.9034, "step": 38935 }, { "epoch": 1.499133782483157, "grad_norm": 0.8374056220054626, "learning_rate": 2.94595179633545e-05, "loss": 0.7436, "step": 38940 }, { "epoch": 1.4993262752646777, "grad_norm": 1.187247633934021, "learning_rate": 2.9438087215497013e-05, "loss": 0.8352, "step": 38945 }, { "epoch": 1.4995187680461983, "grad_norm": 0.9191497564315796, "learning_rate": 2.9416662920059024e-05, "loss": 0.7596, "step": 38950 }, { "epoch": 1.499711260827719, "grad_norm": 0.9378131628036499, "learning_rate": 2.9395245078999665e-05, "loss": 0.7416, "step": 38955 }, { "epoch": 1.4999037536092397, "grad_norm": 1.5277785062789917, "learning_rate": 2.937383369427741e-05, "loss": 0.6751, "step": 38960 }, { "epoch": 1.5000962463907603, "grad_norm": 0.8396899104118347, "learning_rate": 2.9352428767850204e-05, "loss": 0.8376, "step": 38965 }, { "epoch": 1.500288739172281, "grad_norm": 1.3302197456359863, "learning_rate": 2.9331030301675388e-05, "loss": 0.875, "step": 38970 }, { "epoch": 1.5004812319538017, "grad_norm": 1.8585106134414673, "learning_rate": 2.9309638297709708e-05, "loss": 0.804, "step": 38975 }, { "epoch": 1.5006737247353223, "grad_norm": 1.9096689224243164, "learning_rate": 2.928825275790936e-05, "loss": 0.8378, "step": 38980 }, { "epoch": 1.500866217516843, "grad_norm": 1.947161078453064, "learning_rate": 2.9266873684229846e-05, "loss": 0.9819, "step": 38985 }, { "epoch": 1.5010587102983637, "grad_norm": 1.3019959926605225, "learning_rate": 2.924550107862617e-05, "loss": 0.7941, "step": 38990 }, { "epoch": 1.5012512030798844, "grad_norm": 1.5726149082183838, "learning_rate": 2.9224134943052717e-05, "loss": 0.8298, "step": 38995 }, { "epoch": 1.5014436958614052, "grad_norm": 1.7521274089813232, "learning_rate": 2.92027752794633e-05, "loss": 0.7795, "step": 39000 }, { "epoch": 1.501636188642926, "grad_norm": 1.5771510601043701, "learning_rate": 2.918142208981104e-05, "loss": 0.7677, "step": 39005 }, { "epoch": 1.5018286814244466, "grad_norm": 1.1696386337280273, "learning_rate": 2.9160075376048656e-05, "loss": 0.735, "step": 39010 }, { "epoch": 1.5020211742059673, "grad_norm": 1.4408166408538818, "learning_rate": 2.913873514012807e-05, "loss": 0.8714, "step": 39015 }, { "epoch": 1.502213666987488, "grad_norm": 0.9149922728538513, "learning_rate": 2.9117401384000753e-05, "loss": 0.6836, "step": 39020 }, { "epoch": 1.5024061597690088, "grad_norm": 1.5898869037628174, "learning_rate": 2.9096074109617522e-05, "loss": 0.7895, "step": 39025 }, { "epoch": 1.5025986525505295, "grad_norm": 1.2088052034378052, "learning_rate": 2.907475331892865e-05, "loss": 0.7443, "step": 39030 }, { "epoch": 1.5027911453320502, "grad_norm": 1.8943687677383423, "learning_rate": 2.905343901388369e-05, "loss": 0.9301, "step": 39035 }, { "epoch": 1.5029836381135708, "grad_norm": 1.088670253753662, "learning_rate": 2.903213119643181e-05, "loss": 0.5987, "step": 39040 }, { "epoch": 1.5031761308950915, "grad_norm": 1.1137605905532837, "learning_rate": 2.9010829868521393e-05, "loss": 0.8678, "step": 39045 }, { "epoch": 1.5033686236766122, "grad_norm": 1.1481714248657227, "learning_rate": 2.8989535032100312e-05, "loss": 0.8033, "step": 39050 }, { "epoch": 1.5035611164581328, "grad_norm": 1.2706315517425537, "learning_rate": 2.8968246689115862e-05, "loss": 0.7202, "step": 39055 }, { "epoch": 1.5037536092396535, "grad_norm": 1.0178372859954834, "learning_rate": 2.8946964841514735e-05, "loss": 0.8806, "step": 39060 }, { "epoch": 1.5039461020211742, "grad_norm": 1.0535964965820312, "learning_rate": 2.8925689491242958e-05, "loss": 0.8236, "step": 39065 }, { "epoch": 1.5041385948026949, "grad_norm": 0.9003209471702576, "learning_rate": 2.8904420640246054e-05, "loss": 0.7783, "step": 39070 }, { "epoch": 1.5043310875842155, "grad_norm": 1.5490949153900146, "learning_rate": 2.8883158290468916e-05, "loss": 0.8359, "step": 39075 }, { "epoch": 1.5045235803657362, "grad_norm": 0.9985781908035278, "learning_rate": 2.886190244385585e-05, "loss": 1.091, "step": 39080 }, { "epoch": 1.5047160731472569, "grad_norm": 1.7629387378692627, "learning_rate": 2.884065310235059e-05, "loss": 0.9146, "step": 39085 }, { "epoch": 1.5049085659287775, "grad_norm": 1.2450439929962158, "learning_rate": 2.8819410267896173e-05, "loss": 0.7949, "step": 39090 }, { "epoch": 1.5051010587102982, "grad_norm": 1.3784174919128418, "learning_rate": 2.879817394243517e-05, "loss": 0.8738, "step": 39095 }, { "epoch": 1.505293551491819, "grad_norm": 1.2022943496704102, "learning_rate": 2.877694412790949e-05, "loss": 0.8506, "step": 39100 }, { "epoch": 1.5054860442733398, "grad_norm": 1.1159878969192505, "learning_rate": 2.8755720826260492e-05, "loss": 0.7559, "step": 39105 }, { "epoch": 1.5056785370548604, "grad_norm": 1.8302503824234009, "learning_rate": 2.8734504039428813e-05, "loss": 0.839, "step": 39110 }, { "epoch": 1.505871029836381, "grad_norm": 1.0273211002349854, "learning_rate": 2.871329376935471e-05, "loss": 0.7604, "step": 39115 }, { "epoch": 1.506063522617902, "grad_norm": 2.0029349327087402, "learning_rate": 2.8692090017977626e-05, "loss": 0.8672, "step": 39120 }, { "epoch": 1.5062560153994227, "grad_norm": 1.7396565675735474, "learning_rate": 2.8670892787236536e-05, "loss": 0.8613, "step": 39125 }, { "epoch": 1.5064485081809433, "grad_norm": 1.0118446350097656, "learning_rate": 2.8649702079069797e-05, "loss": 0.7605, "step": 39130 }, { "epoch": 1.506641000962464, "grad_norm": 1.3765640258789062, "learning_rate": 2.8628517895415173e-05, "loss": 0.7897, "step": 39135 }, { "epoch": 1.5068334937439847, "grad_norm": 0.9701485633850098, "learning_rate": 2.8607340238209747e-05, "loss": 0.7784, "step": 39140 }, { "epoch": 1.5070259865255053, "grad_norm": 1.591834306716919, "learning_rate": 2.858616910939017e-05, "loss": 0.837, "step": 39145 }, { "epoch": 1.507218479307026, "grad_norm": 1.928481936454773, "learning_rate": 2.8565004510892336e-05, "loss": 0.8828, "step": 39150 }, { "epoch": 1.5074109720885467, "grad_norm": 1.1933850049972534, "learning_rate": 2.854384644465161e-05, "loss": 0.7229, "step": 39155 }, { "epoch": 1.5076034648700674, "grad_norm": 1.4722340106964111, "learning_rate": 2.852269491260279e-05, "loss": 0.7789, "step": 39160 }, { "epoch": 1.507795957651588, "grad_norm": 1.0103994607925415, "learning_rate": 2.8501549916680047e-05, "loss": 0.8624, "step": 39165 }, { "epoch": 1.5079884504331087, "grad_norm": 1.9095921516418457, "learning_rate": 2.848041145881687e-05, "loss": 0.9677, "step": 39170 }, { "epoch": 1.5081809432146294, "grad_norm": 0.9631972312927246, "learning_rate": 2.845927954094635e-05, "loss": 0.6126, "step": 39175 }, { "epoch": 1.50837343599615, "grad_norm": 0.9686654210090637, "learning_rate": 2.8438154165000774e-05, "loss": 0.7955, "step": 39180 }, { "epoch": 1.5085659287776707, "grad_norm": 1.5519697666168213, "learning_rate": 2.8417035332911946e-05, "loss": 0.8444, "step": 39185 }, { "epoch": 1.5087584215591914, "grad_norm": 0.9993698596954346, "learning_rate": 2.839592304661107e-05, "loss": 0.8372, "step": 39190 }, { "epoch": 1.5089509143407123, "grad_norm": 2.2285327911376953, "learning_rate": 2.8374817308028645e-05, "loss": 0.8041, "step": 39195 }, { "epoch": 1.509143407122233, "grad_norm": 1.6649866104125977, "learning_rate": 2.8353718119094762e-05, "loss": 0.8547, "step": 39200 }, { "epoch": 1.5093358999037536, "grad_norm": 1.4915626049041748, "learning_rate": 2.8332625481738705e-05, "loss": 0.9186, "step": 39205 }, { "epoch": 1.5095283926852743, "grad_norm": 0.9645226001739502, "learning_rate": 2.8311539397889307e-05, "loss": 0.6819, "step": 39210 }, { "epoch": 1.509720885466795, "grad_norm": 1.2806873321533203, "learning_rate": 2.829045986947474e-05, "loss": 0.8868, "step": 39215 }, { "epoch": 1.5099133782483158, "grad_norm": 1.3241313695907593, "learning_rate": 2.82693868984226e-05, "loss": 0.7592, "step": 39220 }, { "epoch": 1.5101058710298365, "grad_norm": 0.9667055606842041, "learning_rate": 2.824832048665981e-05, "loss": 0.8439, "step": 39225 }, { "epoch": 1.5102983638113572, "grad_norm": 1.5384224653244019, "learning_rate": 2.822726063611285e-05, "loss": 0.7319, "step": 39230 }, { "epoch": 1.5104908565928779, "grad_norm": 1.0840857028961182, "learning_rate": 2.8206207348707435e-05, "loss": 0.7401, "step": 39235 }, { "epoch": 1.5106833493743985, "grad_norm": 0.7421225905418396, "learning_rate": 2.8185160626368755e-05, "loss": 0.7408, "step": 39240 }, { "epoch": 1.5108758421559192, "grad_norm": 1.7684946060180664, "learning_rate": 2.8164120471021417e-05, "loss": 0.8469, "step": 39245 }, { "epoch": 1.5110683349374399, "grad_norm": 1.4627081155776978, "learning_rate": 2.814308688458942e-05, "loss": 0.8621, "step": 39250 }, { "epoch": 1.5112608277189605, "grad_norm": 0.941478967666626, "learning_rate": 2.8122059868996055e-05, "loss": 0.7465, "step": 39255 }, { "epoch": 1.5114533205004812, "grad_norm": 1.5532718896865845, "learning_rate": 2.810103942616423e-05, "loss": 0.7821, "step": 39260 }, { "epoch": 1.5116458132820019, "grad_norm": 0.9734495282173157, "learning_rate": 2.8080025558016033e-05, "loss": 0.8102, "step": 39265 }, { "epoch": 1.5118383060635225, "grad_norm": 1.1670575141906738, "learning_rate": 2.8059018266473068e-05, "loss": 0.8069, "step": 39270 }, { "epoch": 1.5120307988450432, "grad_norm": 1.0015208721160889, "learning_rate": 2.803801755345633e-05, "loss": 0.8015, "step": 39275 }, { "epoch": 1.5122232916265639, "grad_norm": 1.1417772769927979, "learning_rate": 2.8017023420886202e-05, "loss": 0.7996, "step": 39280 }, { "epoch": 1.5124157844080846, "grad_norm": 1.6754858493804932, "learning_rate": 2.7996035870682402e-05, "loss": 0.7731, "step": 39285 }, { "epoch": 1.5126082771896054, "grad_norm": 2.357158899307251, "learning_rate": 2.797505490476415e-05, "loss": 0.817, "step": 39290 }, { "epoch": 1.5128007699711261, "grad_norm": 0.9653787612915039, "learning_rate": 2.7954080525050007e-05, "loss": 0.7779, "step": 39295 }, { "epoch": 1.5129932627526468, "grad_norm": 0.9732620716094971, "learning_rate": 2.7933112733457933e-05, "loss": 0.8127, "step": 39300 }, { "epoch": 1.5131857555341675, "grad_norm": 0.9965852499008179, "learning_rate": 2.791215153190533e-05, "loss": 0.9347, "step": 39305 }, { "epoch": 1.5133782483156881, "grad_norm": 1.4929567575454712, "learning_rate": 2.7891196922308905e-05, "loss": 0.8412, "step": 39310 }, { "epoch": 1.513570741097209, "grad_norm": 1.590889811515808, "learning_rate": 2.787024890658484e-05, "loss": 0.792, "step": 39315 }, { "epoch": 1.5137632338787297, "grad_norm": 1.5452625751495361, "learning_rate": 2.7849307486648702e-05, "loss": 0.8094, "step": 39320 }, { "epoch": 1.5139557266602504, "grad_norm": 1.8608287572860718, "learning_rate": 2.782837266441546e-05, "loss": 0.8371, "step": 39325 }, { "epoch": 1.514148219441771, "grad_norm": 1.141189455986023, "learning_rate": 2.780744444179939e-05, "loss": 0.8949, "step": 39330 }, { "epoch": 1.5143407122232917, "grad_norm": 1.3165732622146606, "learning_rate": 2.7786522820714355e-05, "loss": 0.8983, "step": 39335 }, { "epoch": 1.5145332050048124, "grad_norm": 1.809031367301941, "learning_rate": 2.776560780307341e-05, "loss": 0.7893, "step": 39340 }, { "epoch": 1.514725697786333, "grad_norm": 0.9109099507331848, "learning_rate": 2.774469939078912e-05, "loss": 0.7076, "step": 39345 }, { "epoch": 1.5149181905678537, "grad_norm": 1.9604289531707764, "learning_rate": 2.7723797585773426e-05, "loss": 0.916, "step": 39350 }, { "epoch": 1.5151106833493744, "grad_norm": 1.2984347343444824, "learning_rate": 2.770290238993768e-05, "loss": 0.9168, "step": 39355 }, { "epoch": 1.515303176130895, "grad_norm": 2.4720041751861572, "learning_rate": 2.768201380519253e-05, "loss": 0.8866, "step": 39360 }, { "epoch": 1.5154956689124157, "grad_norm": 0.9475586414337158, "learning_rate": 2.7661131833448216e-05, "loss": 0.8357, "step": 39365 }, { "epoch": 1.5156881616939364, "grad_norm": 1.0659955739974976, "learning_rate": 2.7640256476614157e-05, "loss": 0.8456, "step": 39370 }, { "epoch": 1.515880654475457, "grad_norm": 1.2105602025985718, "learning_rate": 2.7619387736599312e-05, "loss": 0.7651, "step": 39375 }, { "epoch": 1.5160731472569777, "grad_norm": 1.759903907775879, "learning_rate": 2.7598525615311975e-05, "loss": 0.7607, "step": 39380 }, { "epoch": 1.5162656400384984, "grad_norm": 0.6724603176116943, "learning_rate": 2.7577670114659892e-05, "loss": 0.7695, "step": 39385 }, { "epoch": 1.5164581328200193, "grad_norm": 1.3016246557235718, "learning_rate": 2.755682123655009e-05, "loss": 0.8609, "step": 39390 }, { "epoch": 1.51665062560154, "grad_norm": 1.1983976364135742, "learning_rate": 2.75359789828891e-05, "loss": 0.8225, "step": 39395 }, { "epoch": 1.5168431183830606, "grad_norm": 1.0239381790161133, "learning_rate": 2.7515143355582786e-05, "loss": 0.6805, "step": 39400 }, { "epoch": 1.5170356111645813, "grad_norm": 1.6102731227874756, "learning_rate": 2.7494314356536456e-05, "loss": 0.8922, "step": 39405 }, { "epoch": 1.517228103946102, "grad_norm": 1.7071436643600464, "learning_rate": 2.7473491987654798e-05, "loss": 0.9025, "step": 39410 }, { "epoch": 1.5174205967276229, "grad_norm": 1.8176323175430298, "learning_rate": 2.7452676250841824e-05, "loss": 0.9699, "step": 39415 }, { "epoch": 1.5176130895091435, "grad_norm": 2.1587765216827393, "learning_rate": 2.7431867148001024e-05, "loss": 1.0543, "step": 39420 }, { "epoch": 1.5178055822906642, "grad_norm": 1.8511626720428467, "learning_rate": 2.741106468103526e-05, "loss": 0.7427, "step": 39425 }, { "epoch": 1.5179980750721849, "grad_norm": 1.2232860326766968, "learning_rate": 2.739026885184679e-05, "loss": 0.834, "step": 39430 }, { "epoch": 1.5181905678537055, "grad_norm": 0.9527449011802673, "learning_rate": 2.7369479662337195e-05, "loss": 0.8507, "step": 39435 }, { "epoch": 1.5183830606352262, "grad_norm": 1.3037556409835815, "learning_rate": 2.7348697114407595e-05, "loss": 0.8492, "step": 39440 }, { "epoch": 1.5185755534167469, "grad_norm": 0.8813561797142029, "learning_rate": 2.732792120995832e-05, "loss": 0.8644, "step": 39445 }, { "epoch": 1.5187680461982676, "grad_norm": 1.0147926807403564, "learning_rate": 2.7307151950889308e-05, "loss": 0.7337, "step": 39450 }, { "epoch": 1.5189605389797882, "grad_norm": 1.2371786832809448, "learning_rate": 2.728638933909966e-05, "loss": 0.7856, "step": 39455 }, { "epoch": 1.519153031761309, "grad_norm": 1.3520861864089966, "learning_rate": 2.726563337648803e-05, "loss": 0.8224, "step": 39460 }, { "epoch": 1.5193455245428296, "grad_norm": 1.1333503723144531, "learning_rate": 2.7244884064952414e-05, "loss": 0.7827, "step": 39465 }, { "epoch": 1.5195380173243502, "grad_norm": 1.4535999298095703, "learning_rate": 2.7224141406390214e-05, "loss": 0.9174, "step": 39470 }, { "epoch": 1.519730510105871, "grad_norm": 1.9915587902069092, "learning_rate": 2.7203405402698133e-05, "loss": 0.9327, "step": 39475 }, { "epoch": 1.5199230028873916, "grad_norm": 1.0400042533874512, "learning_rate": 2.7182676055772448e-05, "loss": 0.7821, "step": 39480 }, { "epoch": 1.5201154956689125, "grad_norm": 1.4389541149139404, "learning_rate": 2.7161953367508643e-05, "loss": 0.826, "step": 39485 }, { "epoch": 1.5203079884504331, "grad_norm": 1.9297611713409424, "learning_rate": 2.7141237339801685e-05, "loss": 0.9919, "step": 39490 }, { "epoch": 1.5205004812319538, "grad_norm": 1.1149492263793945, "learning_rate": 2.712052797454594e-05, "loss": 0.7048, "step": 39495 }, { "epoch": 1.5206929740134745, "grad_norm": 1.5707048177719116, "learning_rate": 2.709982527363515e-05, "loss": 0.8483, "step": 39500 }, { "epoch": 1.5208854667949951, "grad_norm": 1.289869785308838, "learning_rate": 2.7079129238962386e-05, "loss": 0.6966, "step": 39505 }, { "epoch": 1.521077959576516, "grad_norm": 1.144384741783142, "learning_rate": 2.7058439872420204e-05, "loss": 0.7713, "step": 39510 }, { "epoch": 1.5212704523580367, "grad_norm": 0.8240516781806946, "learning_rate": 2.7037757175900503e-05, "loss": 0.7334, "step": 39515 }, { "epoch": 1.5214629451395574, "grad_norm": 2.318265199661255, "learning_rate": 2.7017081151294576e-05, "loss": 0.8006, "step": 39520 }, { "epoch": 1.521655437921078, "grad_norm": 1.3979312181472778, "learning_rate": 2.6996411800493137e-05, "loss": 0.7323, "step": 39525 }, { "epoch": 1.5218479307025987, "grad_norm": 0.9698436856269836, "learning_rate": 2.6975749125386207e-05, "loss": 0.952, "step": 39530 }, { "epoch": 1.5220404234841194, "grad_norm": 1.6334707736968994, "learning_rate": 2.6955093127863272e-05, "loss": 0.7626, "step": 39535 }, { "epoch": 1.52223291626564, "grad_norm": 1.1066991090774536, "learning_rate": 2.6934443809813203e-05, "loss": 0.7948, "step": 39540 }, { "epoch": 1.5224254090471607, "grad_norm": 1.206701636314392, "learning_rate": 2.6913801173124253e-05, "loss": 0.6289, "step": 39545 }, { "epoch": 1.5226179018286814, "grad_norm": 1.3689944744110107, "learning_rate": 2.689316521968398e-05, "loss": 0.7864, "step": 39550 }, { "epoch": 1.522810394610202, "grad_norm": 2.103961944580078, "learning_rate": 2.687253595137952e-05, "loss": 0.6763, "step": 39555 }, { "epoch": 1.5230028873917227, "grad_norm": 1.0298928022384644, "learning_rate": 2.6851913370097194e-05, "loss": 0.7179, "step": 39560 }, { "epoch": 1.5231953801732434, "grad_norm": 1.2706328630447388, "learning_rate": 2.6831297477722815e-05, "loss": 0.6922, "step": 39565 }, { "epoch": 1.523387872954764, "grad_norm": 0.9494262933731079, "learning_rate": 2.6810688276141592e-05, "loss": 0.8778, "step": 39570 }, { "epoch": 1.5235803657362847, "grad_norm": 0.9296857118606567, "learning_rate": 2.6790085767238126e-05, "loss": 0.8307, "step": 39575 }, { "epoch": 1.5237728585178054, "grad_norm": 1.7679259777069092, "learning_rate": 2.6769489952896275e-05, "loss": 0.8162, "step": 39580 }, { "epoch": 1.5239653512993263, "grad_norm": 1.1842856407165527, "learning_rate": 2.6748900834999534e-05, "loss": 0.724, "step": 39585 }, { "epoch": 1.524157844080847, "grad_norm": 0.9693066477775574, "learning_rate": 2.672831841543053e-05, "loss": 0.8326, "step": 39590 }, { "epoch": 1.5243503368623676, "grad_norm": 1.6108996868133545, "learning_rate": 2.6707742696071425e-05, "loss": 0.7797, "step": 39595 }, { "epoch": 1.5245428296438883, "grad_norm": 2.4159984588623047, "learning_rate": 2.6687173678803734e-05, "loss": 0.9232, "step": 39600 }, { "epoch": 1.5247353224254092, "grad_norm": 1.5150139331817627, "learning_rate": 2.6666611365508388e-05, "loss": 0.7605, "step": 39605 }, { "epoch": 1.5249278152069299, "grad_norm": 1.5143542289733887, "learning_rate": 2.6646055758065613e-05, "loss": 0.9481, "step": 39610 }, { "epoch": 1.5251203079884506, "grad_norm": 0.9554632306098938, "learning_rate": 2.662550685835511e-05, "loss": 0.9355, "step": 39615 }, { "epoch": 1.5253128007699712, "grad_norm": 1.936998963356018, "learning_rate": 2.660496466825595e-05, "loss": 0.7781, "step": 39620 }, { "epoch": 1.525505293551492, "grad_norm": 2.4360251426696777, "learning_rate": 2.6584429189646576e-05, "loss": 0.8181, "step": 39625 }, { "epoch": 1.5256977863330126, "grad_norm": 1.0304756164550781, "learning_rate": 2.6563900424404843e-05, "loss": 0.6611, "step": 39630 }, { "epoch": 1.5258902791145332, "grad_norm": 1.5006756782531738, "learning_rate": 2.6543378374407913e-05, "loss": 0.8139, "step": 39635 }, { "epoch": 1.526082771896054, "grad_norm": 2.475623846054077, "learning_rate": 2.652286304153243e-05, "loss": 0.8565, "step": 39640 }, { "epoch": 1.5262752646775746, "grad_norm": 1.3540338277816772, "learning_rate": 2.6502354427654375e-05, "loss": 0.9392, "step": 39645 }, { "epoch": 1.5264677574590952, "grad_norm": 1.5708937644958496, "learning_rate": 2.6481852534649165e-05, "loss": 0.7956, "step": 39650 }, { "epoch": 1.526660250240616, "grad_norm": 1.0419323444366455, "learning_rate": 2.646135736439147e-05, "loss": 0.7812, "step": 39655 }, { "epoch": 1.5268527430221366, "grad_norm": 1.3944579362869263, "learning_rate": 2.6440868918755556e-05, "loss": 0.9234, "step": 39660 }, { "epoch": 1.5270452358036573, "grad_norm": 1.2345746755599976, "learning_rate": 2.642038719961486e-05, "loss": 0.7003, "step": 39665 }, { "epoch": 1.527237728585178, "grad_norm": 1.7590235471725464, "learning_rate": 2.6399912208842337e-05, "loss": 1.0322, "step": 39670 }, { "epoch": 1.5274302213666986, "grad_norm": 1.5209788084030151, "learning_rate": 2.637944394831028e-05, "loss": 0.7132, "step": 39675 }, { "epoch": 1.5276227141482195, "grad_norm": 1.2953201532363892, "learning_rate": 2.635898241989042e-05, "loss": 0.8862, "step": 39680 }, { "epoch": 1.5278152069297402, "grad_norm": 0.8863399028778076, "learning_rate": 2.6338527625453725e-05, "loss": 0.8335, "step": 39685 }, { "epoch": 1.5280076997112608, "grad_norm": 0.9998905062675476, "learning_rate": 2.631807956687078e-05, "loss": 0.8523, "step": 39690 }, { "epoch": 1.5282001924927815, "grad_norm": 1.509939432144165, "learning_rate": 2.62976382460113e-05, "loss": 0.7919, "step": 39695 }, { "epoch": 1.5283926852743022, "grad_norm": 1.2834575176239014, "learning_rate": 2.627720366474463e-05, "loss": 0.8921, "step": 39700 }, { "epoch": 1.528585178055823, "grad_norm": 0.9761720299720764, "learning_rate": 2.6256775824939283e-05, "loss": 0.7869, "step": 39705 }, { "epoch": 1.5287776708373437, "grad_norm": 2.2494795322418213, "learning_rate": 2.6236354728463286e-05, "loss": 0.765, "step": 39710 }, { "epoch": 1.5289701636188644, "grad_norm": 1.8769516944885254, "learning_rate": 2.6215940377184002e-05, "loss": 0.6916, "step": 39715 }, { "epoch": 1.529162656400385, "grad_norm": 0.8097474575042725, "learning_rate": 2.6195532772968235e-05, "loss": 0.7983, "step": 39720 }, { "epoch": 1.5293551491819057, "grad_norm": 1.8454664945602417, "learning_rate": 2.617513191768205e-05, "loss": 0.8183, "step": 39725 }, { "epoch": 1.5295476419634264, "grad_norm": 1.6850463151931763, "learning_rate": 2.615473781319101e-05, "loss": 1.0079, "step": 39730 }, { "epoch": 1.529740134744947, "grad_norm": 1.442956566810608, "learning_rate": 2.6134350461360013e-05, "loss": 0.7613, "step": 39735 }, { "epoch": 1.5299326275264677, "grad_norm": 1.101926326751709, "learning_rate": 2.6113969864053356e-05, "loss": 0.7822, "step": 39740 }, { "epoch": 1.5301251203079884, "grad_norm": 0.8331781625747681, "learning_rate": 2.6093596023134736e-05, "loss": 0.8167, "step": 39745 }, { "epoch": 1.530317613089509, "grad_norm": 1.0116935968399048, "learning_rate": 2.6073228940467142e-05, "loss": 0.9155, "step": 39750 }, { "epoch": 1.5305101058710298, "grad_norm": 1.2244682312011719, "learning_rate": 2.605286861791304e-05, "loss": 0.7738, "step": 39755 }, { "epoch": 1.5307025986525504, "grad_norm": 1.583099126815796, "learning_rate": 2.603251505733424e-05, "loss": 0.8342, "step": 39760 }, { "epoch": 1.530895091434071, "grad_norm": 2.7618401050567627, "learning_rate": 2.6012168260591997e-05, "loss": 0.7933, "step": 39765 }, { "epoch": 1.5310875842155918, "grad_norm": 1.7654789686203003, "learning_rate": 2.5991828229546766e-05, "loss": 0.9115, "step": 39770 }, { "epoch": 1.5312800769971127, "grad_norm": 1.1165564060211182, "learning_rate": 2.597149496605865e-05, "loss": 0.8808, "step": 39775 }, { "epoch": 1.5314725697786333, "grad_norm": 2.018768310546875, "learning_rate": 2.5951168471986896e-05, "loss": 0.6884, "step": 39780 }, { "epoch": 1.531665062560154, "grad_norm": 1.567855954170227, "learning_rate": 2.593084874919025e-05, "loss": 0.7812, "step": 39785 }, { "epoch": 1.5318575553416747, "grad_norm": 1.3749490976333618, "learning_rate": 2.5910535799526834e-05, "loss": 0.8049, "step": 39790 }, { "epoch": 1.5320500481231953, "grad_norm": 0.9157447814941406, "learning_rate": 2.5890229624854146e-05, "loss": 0.7877, "step": 39795 }, { "epoch": 1.5322425409047162, "grad_norm": 1.0359165668487549, "learning_rate": 2.586993022702897e-05, "loss": 0.6694, "step": 39800 }, { "epoch": 1.532435033686237, "grad_norm": 1.5438929796218872, "learning_rate": 2.584963760790766e-05, "loss": 0.7098, "step": 39805 }, { "epoch": 1.5326275264677576, "grad_norm": 1.700568675994873, "learning_rate": 2.5829351769345765e-05, "loss": 0.6406, "step": 39810 }, { "epoch": 1.5328200192492782, "grad_norm": 1.7474377155303955, "learning_rate": 2.5809072713198313e-05, "loss": 0.7675, "step": 39815 }, { "epoch": 1.533012512030799, "grad_norm": 1.255887746810913, "learning_rate": 2.5788800441319693e-05, "loss": 0.8331, "step": 39820 }, { "epoch": 1.5332050048123196, "grad_norm": 1.3320515155792236, "learning_rate": 2.5768534955563694e-05, "loss": 0.685, "step": 39825 }, { "epoch": 1.5333974975938403, "grad_norm": 1.2977993488311768, "learning_rate": 2.574827625778341e-05, "loss": 0.9356, "step": 39830 }, { "epoch": 1.533589990375361, "grad_norm": 1.2286765575408936, "learning_rate": 2.572802434983139e-05, "loss": 0.7974, "step": 39835 }, { "epoch": 1.5337824831568816, "grad_norm": 1.5404809713363647, "learning_rate": 2.5707779233559537e-05, "loss": 0.8141, "step": 39840 }, { "epoch": 1.5339749759384023, "grad_norm": 2.166562795639038, "learning_rate": 2.568754091081913e-05, "loss": 0.7906, "step": 39845 }, { "epoch": 1.534167468719923, "grad_norm": 2.0202114582061768, "learning_rate": 2.5667309383460857e-05, "loss": 0.8012, "step": 39850 }, { "epoch": 1.5343599615014436, "grad_norm": 1.2766706943511963, "learning_rate": 2.5647084653334707e-05, "loss": 0.8452, "step": 39855 }, { "epoch": 1.5345524542829643, "grad_norm": 1.7136098146438599, "learning_rate": 2.562686672229012e-05, "loss": 0.7912, "step": 39860 }, { "epoch": 1.534744947064485, "grad_norm": 1.3089460134506226, "learning_rate": 2.56066555921759e-05, "loss": 0.7512, "step": 39865 }, { "epoch": 1.5349374398460056, "grad_norm": 1.317051649093628, "learning_rate": 2.558645126484024e-05, "loss": 0.9469, "step": 39870 }, { "epoch": 1.5351299326275265, "grad_norm": 1.0458593368530273, "learning_rate": 2.5566253742130608e-05, "loss": 0.8018, "step": 39875 }, { "epoch": 1.5353224254090472, "grad_norm": 0.9164959788322449, "learning_rate": 2.5546063025894053e-05, "loss": 0.7815, "step": 39880 }, { "epoch": 1.5355149181905678, "grad_norm": 1.2855099439620972, "learning_rate": 2.5525879117976794e-05, "loss": 0.7955, "step": 39885 }, { "epoch": 1.5357074109720885, "grad_norm": 1.3438894748687744, "learning_rate": 2.5505702020224542e-05, "loss": 0.724, "step": 39890 }, { "epoch": 1.5358999037536092, "grad_norm": 1.1192864179611206, "learning_rate": 2.548553173448236e-05, "loss": 0.8516, "step": 39895 }, { "epoch": 1.53609239653513, "grad_norm": 1.1312519311904907, "learning_rate": 2.5465368262594726e-05, "loss": 0.8709, "step": 39900 }, { "epoch": 1.5362848893166507, "grad_norm": 1.1640228033065796, "learning_rate": 2.544521160640535e-05, "loss": 0.8806, "step": 39905 }, { "epoch": 1.5364773820981714, "grad_norm": 0.9546281695365906, "learning_rate": 2.5425061767757553e-05, "loss": 0.7814, "step": 39910 }, { "epoch": 1.536669874879692, "grad_norm": 1.2589070796966553, "learning_rate": 2.540491874849381e-05, "loss": 0.8007, "step": 39915 }, { "epoch": 1.5368623676612128, "grad_norm": 1.1835721731185913, "learning_rate": 2.53847825504561e-05, "loss": 0.8729, "step": 39920 }, { "epoch": 1.5370548604427334, "grad_norm": 1.403292179107666, "learning_rate": 2.5364653175485754e-05, "loss": 0.8297, "step": 39925 }, { "epoch": 1.537247353224254, "grad_norm": 1.9332057237625122, "learning_rate": 2.534453062542348e-05, "loss": 0.8723, "step": 39930 }, { "epoch": 1.5374398460057748, "grad_norm": 1.8926912546157837, "learning_rate": 2.5324414902109316e-05, "loss": 0.732, "step": 39935 }, { "epoch": 1.5376323387872954, "grad_norm": 1.3487472534179688, "learning_rate": 2.5304306007382716e-05, "loss": 0.863, "step": 39940 }, { "epoch": 1.537824831568816, "grad_norm": 1.4540005922317505, "learning_rate": 2.528420394308253e-05, "loss": 0.6911, "step": 39945 }, { "epoch": 1.5380173243503368, "grad_norm": 1.1179677248001099, "learning_rate": 2.526410871104694e-05, "loss": 0.779, "step": 39950 }, { "epoch": 1.5382098171318574, "grad_norm": 1.332623839378357, "learning_rate": 2.5244020313113558e-05, "loss": 0.6558, "step": 39955 }, { "epoch": 1.5384023099133781, "grad_norm": 2.073150634765625, "learning_rate": 2.5223938751119248e-05, "loss": 0.9246, "step": 39960 }, { "epoch": 1.5385948026948988, "grad_norm": 1.640775203704834, "learning_rate": 2.5203864026900448e-05, "loss": 0.7962, "step": 39965 }, { "epoch": 1.5387872954764197, "grad_norm": 1.4655123949050903, "learning_rate": 2.518379614229278e-05, "loss": 0.8892, "step": 39970 }, { "epoch": 1.5389797882579404, "grad_norm": 0.9323534369468689, "learning_rate": 2.5163735099131336e-05, "loss": 0.7165, "step": 39975 }, { "epoch": 1.539172281039461, "grad_norm": 1.2594026327133179, "learning_rate": 2.5143680899250577e-05, "loss": 0.6767, "step": 39980 }, { "epoch": 1.5393647738209817, "grad_norm": 1.3983330726623535, "learning_rate": 2.5123633544484347e-05, "loss": 0.7979, "step": 39985 }, { "epoch": 1.5395572666025024, "grad_norm": 1.1931166648864746, "learning_rate": 2.510359303666576e-05, "loss": 0.8501, "step": 39990 }, { "epoch": 1.5397497593840233, "grad_norm": 1.4925061464309692, "learning_rate": 2.5083559377627518e-05, "loss": 0.8053, "step": 39995 }, { "epoch": 1.539942252165544, "grad_norm": 1.4643807411193848, "learning_rate": 2.5063532569201454e-05, "loss": 0.765, "step": 40000 }, { "epoch": 1.5401347449470646, "grad_norm": 1.959747076034546, "learning_rate": 2.5043512613218933e-05, "loss": 0.8532, "step": 40005 }, { "epoch": 1.5403272377285853, "grad_norm": 1.0300486087799072, "learning_rate": 2.502349951151064e-05, "loss": 0.7775, "step": 40010 }, { "epoch": 1.540519730510106, "grad_norm": 1.6674004793167114, "learning_rate": 2.5003493265906664e-05, "loss": 0.9549, "step": 40015 }, { "epoch": 1.5407122232916266, "grad_norm": 1.486302137374878, "learning_rate": 2.4983493878236374e-05, "loss": 0.7536, "step": 40020 }, { "epoch": 1.5409047160731473, "grad_norm": 0.8989318013191223, "learning_rate": 2.496350135032869e-05, "loss": 0.805, "step": 40025 }, { "epoch": 1.541097208854668, "grad_norm": 1.692155361175537, "learning_rate": 2.49435156840117e-05, "loss": 0.8884, "step": 40030 }, { "epoch": 1.5412897016361886, "grad_norm": 0.7467209100723267, "learning_rate": 2.4923536881112997e-05, "loss": 0.7268, "step": 40035 }, { "epoch": 1.5414821944177093, "grad_norm": 1.0695582628250122, "learning_rate": 2.490356494345951e-05, "loss": 0.6906, "step": 40040 }, { "epoch": 1.54167468719923, "grad_norm": 1.119673728942871, "learning_rate": 2.4883599872877583e-05, "loss": 0.8238, "step": 40045 }, { "epoch": 1.5418671799807506, "grad_norm": 1.4105931520462036, "learning_rate": 2.4863641671192806e-05, "loss": 0.7352, "step": 40050 }, { "epoch": 1.5420596727622713, "grad_norm": 0.8062414526939392, "learning_rate": 2.4843690340230265e-05, "loss": 0.8423, "step": 40055 }, { "epoch": 1.542252165543792, "grad_norm": 1.100274682044983, "learning_rate": 2.4823745881814374e-05, "loss": 0.7665, "step": 40060 }, { "epoch": 1.5424446583253129, "grad_norm": 1.5957090854644775, "learning_rate": 2.4803808297768937e-05, "loss": 0.7665, "step": 40065 }, { "epoch": 1.5426371511068335, "grad_norm": 1.4533089399337769, "learning_rate": 2.4783877589917125e-05, "loss": 0.7874, "step": 40070 }, { "epoch": 1.5428296438883542, "grad_norm": 1.0134419202804565, "learning_rate": 2.4763953760081414e-05, "loss": 0.8836, "step": 40075 }, { "epoch": 1.5430221366698749, "grad_norm": 1.2606679201126099, "learning_rate": 2.4744036810083738e-05, "loss": 0.8233, "step": 40080 }, { "epoch": 1.5432146294513955, "grad_norm": 1.0272793769836426, "learning_rate": 2.472412674174538e-05, "loss": 0.8782, "step": 40085 }, { "epoch": 1.5434071222329164, "grad_norm": 0.853097140789032, "learning_rate": 2.4704223556886998e-05, "loss": 0.7858, "step": 40090 }, { "epoch": 1.543599615014437, "grad_norm": 1.5345498323440552, "learning_rate": 2.4684327257328522e-05, "loss": 0.7227, "step": 40095 }, { "epoch": 1.5437921077959578, "grad_norm": 1.565811276435852, "learning_rate": 2.4664437844889454e-05, "loss": 0.8012, "step": 40100 }, { "epoch": 1.5439846005774784, "grad_norm": 1.7416857481002808, "learning_rate": 2.4644555321388462e-05, "loss": 0.7459, "step": 40105 }, { "epoch": 1.544177093358999, "grad_norm": 1.2711718082427979, "learning_rate": 2.4624679688643716e-05, "loss": 0.7688, "step": 40110 }, { "epoch": 1.5443695861405198, "grad_norm": 1.38042414188385, "learning_rate": 2.4604810948472677e-05, "loss": 0.8042, "step": 40115 }, { "epoch": 1.5445620789220404, "grad_norm": 1.0315542221069336, "learning_rate": 2.458494910269228e-05, "loss": 0.8793, "step": 40120 }, { "epoch": 1.5447545717035611, "grad_norm": 1.4990910291671753, "learning_rate": 2.456509415311864e-05, "loss": 0.766, "step": 40125 }, { "epoch": 1.5449470644850818, "grad_norm": 1.0002154111862183, "learning_rate": 2.4545246101567476e-05, "loss": 0.8452, "step": 40130 }, { "epoch": 1.5451395572666025, "grad_norm": 1.0912269353866577, "learning_rate": 2.4525404949853702e-05, "loss": 0.71, "step": 40135 }, { "epoch": 1.5453320500481231, "grad_norm": 1.2968130111694336, "learning_rate": 2.4505570699791668e-05, "loss": 0.6927, "step": 40140 }, { "epoch": 1.5455245428296438, "grad_norm": 0.9810501933097839, "learning_rate": 2.448574335319508e-05, "loss": 0.821, "step": 40145 }, { "epoch": 1.5457170356111645, "grad_norm": 1.6552939414978027, "learning_rate": 2.446592291187706e-05, "loss": 0.8898, "step": 40150 }, { "epoch": 1.5459095283926851, "grad_norm": 2.9171223640441895, "learning_rate": 2.4446109377649996e-05, "loss": 0.8895, "step": 40155 }, { "epoch": 1.5461020211742058, "grad_norm": 1.7031809091567993, "learning_rate": 2.4426302752325735e-05, "loss": 0.7551, "step": 40160 }, { "epoch": 1.5462945139557267, "grad_norm": 0.709255039691925, "learning_rate": 2.4406503037715445e-05, "loss": 0.7308, "step": 40165 }, { "epoch": 1.5464870067372474, "grad_norm": 1.2134255170822144, "learning_rate": 2.4386710235629708e-05, "loss": 0.8311, "step": 40170 }, { "epoch": 1.546679499518768, "grad_norm": 0.7918189764022827, "learning_rate": 2.436692434787844e-05, "loss": 0.7764, "step": 40175 }, { "epoch": 1.5468719923002887, "grad_norm": 1.3109943866729736, "learning_rate": 2.4347145376270896e-05, "loss": 0.8472, "step": 40180 }, { "epoch": 1.5470644850818094, "grad_norm": 1.0307000875473022, "learning_rate": 2.4327373322615754e-05, "loss": 0.8618, "step": 40185 }, { "epoch": 1.5472569778633303, "grad_norm": 0.8745335936546326, "learning_rate": 2.430760818872103e-05, "loss": 0.7465, "step": 40190 }, { "epoch": 1.547449470644851, "grad_norm": 1.271261215209961, "learning_rate": 2.428784997639415e-05, "loss": 0.6754, "step": 40195 }, { "epoch": 1.5476419634263716, "grad_norm": 0.9863643646240234, "learning_rate": 2.42680986874418e-05, "loss": 0.7717, "step": 40200 }, { "epoch": 1.5478344562078923, "grad_norm": 1.5643993616104126, "learning_rate": 2.4248354323670185e-05, "loss": 0.8209, "step": 40205 }, { "epoch": 1.548026948989413, "grad_norm": 1.2394016981124878, "learning_rate": 2.4228616886884713e-05, "loss": 0.8473, "step": 40210 }, { "epoch": 1.5482194417709336, "grad_norm": 1.1060245037078857, "learning_rate": 2.420888637889034e-05, "loss": 0.6498, "step": 40215 }, { "epoch": 1.5484119345524543, "grad_norm": 1.6194653511047363, "learning_rate": 2.4189162801491206e-05, "loss": 0.839, "step": 40220 }, { "epoch": 1.548604427333975, "grad_norm": 0.9702569842338562, "learning_rate": 2.4169446156490938e-05, "loss": 0.8054, "step": 40225 }, { "epoch": 1.5487969201154956, "grad_norm": 0.964542031288147, "learning_rate": 2.4149736445692483e-05, "loss": 0.7966, "step": 40230 }, { "epoch": 1.5489894128970163, "grad_norm": 1.230900526046753, "learning_rate": 2.413003367089821e-05, "loss": 0.8332, "step": 40235 }, { "epoch": 1.549181905678537, "grad_norm": 0.8581804633140564, "learning_rate": 2.411033783390969e-05, "loss": 0.8305, "step": 40240 }, { "epoch": 1.5493743984600576, "grad_norm": 1.232013463973999, "learning_rate": 2.4090648936528125e-05, "loss": 0.7663, "step": 40245 }, { "epoch": 1.5495668912415783, "grad_norm": 1.6917791366577148, "learning_rate": 2.407096698055382e-05, "loss": 0.8289, "step": 40250 }, { "epoch": 1.549759384023099, "grad_norm": 1.128301978111267, "learning_rate": 2.4051291967786605e-05, "loss": 0.8291, "step": 40255 }, { "epoch": 1.5499518768046199, "grad_norm": 1.8056789636611938, "learning_rate": 2.4031623900025624e-05, "loss": 0.7495, "step": 40260 }, { "epoch": 1.5501443695861405, "grad_norm": 1.2374507188796997, "learning_rate": 2.4011962779069432e-05, "loss": 0.7374, "step": 40265 }, { "epoch": 1.5503368623676612, "grad_norm": 1.8159735202789307, "learning_rate": 2.3992308606715828e-05, "loss": 0.7937, "step": 40270 }, { "epoch": 1.5505293551491819, "grad_norm": 1.4417248964309692, "learning_rate": 2.3972661384762096e-05, "loss": 0.8844, "step": 40275 }, { "epoch": 1.5507218479307026, "grad_norm": 1.61116623878479, "learning_rate": 2.3953021115004858e-05, "loss": 0.8133, "step": 40280 }, { "epoch": 1.5509143407122234, "grad_norm": 1.6744754314422607, "learning_rate": 2.393338779924006e-05, "loss": 0.7884, "step": 40285 }, { "epoch": 1.5511068334937441, "grad_norm": 1.0945570468902588, "learning_rate": 2.3913761439263095e-05, "loss": 0.9085, "step": 40290 }, { "epoch": 1.5512993262752648, "grad_norm": 1.8907279968261719, "learning_rate": 2.3894142036868583e-05, "loss": 0.9115, "step": 40295 }, { "epoch": 1.5514918190567855, "grad_norm": 1.0637871026992798, "learning_rate": 2.3874529593850624e-05, "loss": 0.7011, "step": 40300 }, { "epoch": 1.5516843118383061, "grad_norm": 1.661790132522583, "learning_rate": 2.3854924112002665e-05, "loss": 0.7207, "step": 40305 }, { "epoch": 1.5518768046198268, "grad_norm": 1.138431429862976, "learning_rate": 2.3835325593117498e-05, "loss": 0.8999, "step": 40310 }, { "epoch": 1.5520692974013475, "grad_norm": 1.0955361127853394, "learning_rate": 2.381573403898719e-05, "loss": 0.8033, "step": 40315 }, { "epoch": 1.5522617901828681, "grad_norm": 0.4663851857185364, "learning_rate": 2.3796149451403405e-05, "loss": 0.8223, "step": 40320 }, { "epoch": 1.5524542829643888, "grad_norm": 0.9485166072845459, "learning_rate": 2.3776571832156914e-05, "loss": 0.8846, "step": 40325 }, { "epoch": 1.5526467757459095, "grad_norm": 0.8865009546279907, "learning_rate": 2.3757001183037998e-05, "loss": 0.7741, "step": 40330 }, { "epoch": 1.5528392685274301, "grad_norm": 1.5837032794952393, "learning_rate": 2.3737437505836257e-05, "loss": 0.8743, "step": 40335 }, { "epoch": 1.5530317613089508, "grad_norm": 1.4707670211791992, "learning_rate": 2.3717880802340698e-05, "loss": 0.8894, "step": 40340 }, { "epoch": 1.5532242540904715, "grad_norm": 1.321226954460144, "learning_rate": 2.3698331074339553e-05, "loss": 0.9362, "step": 40345 }, { "epoch": 1.5534167468719922, "grad_norm": 1.60404372215271, "learning_rate": 2.3678788323620638e-05, "loss": 0.7989, "step": 40350 }, { "epoch": 1.5536092396535128, "grad_norm": 1.4875679016113281, "learning_rate": 2.3659252551970922e-05, "loss": 0.8456, "step": 40355 }, { "epoch": 1.5538017324350337, "grad_norm": 1.7115007638931274, "learning_rate": 2.3639723761176847e-05, "loss": 0.8346, "step": 40360 }, { "epoch": 1.5539942252165544, "grad_norm": 1.4487700462341309, "learning_rate": 2.362020195302419e-05, "loss": 0.7359, "step": 40365 }, { "epoch": 1.554186717998075, "grad_norm": 1.6501834392547607, "learning_rate": 2.3600687129298126e-05, "loss": 0.769, "step": 40370 }, { "epoch": 1.5543792107795957, "grad_norm": 1.4506645202636719, "learning_rate": 2.3581179291783094e-05, "loss": 0.9698, "step": 40375 }, { "epoch": 1.5545717035611166, "grad_norm": 1.098969578742981, "learning_rate": 2.356167844226299e-05, "loss": 0.8277, "step": 40380 }, { "epoch": 1.5547641963426373, "grad_norm": 0.9754076600074768, "learning_rate": 2.3542184582521034e-05, "loss": 0.754, "step": 40385 }, { "epoch": 1.554956689124158, "grad_norm": 1.5836515426635742, "learning_rate": 2.3522697714339814e-05, "loss": 0.8996, "step": 40390 }, { "epoch": 1.5551491819056786, "grad_norm": 1.1244460344314575, "learning_rate": 2.3503217839501302e-05, "loss": 0.721, "step": 40395 }, { "epoch": 1.5553416746871993, "grad_norm": 0.96147620677948, "learning_rate": 2.3483744959786735e-05, "loss": 0.7998, "step": 40400 }, { "epoch": 1.55553416746872, "grad_norm": 0.9615457653999329, "learning_rate": 2.3464279076976837e-05, "loss": 0.9238, "step": 40405 }, { "epoch": 1.5557266602502406, "grad_norm": 2.3235690593719482, "learning_rate": 2.344482019285159e-05, "loss": 0.9334, "step": 40410 }, { "epoch": 1.5559191530317613, "grad_norm": 1.226501703262329, "learning_rate": 2.3425368309190455e-05, "loss": 0.8288, "step": 40415 }, { "epoch": 1.556111645813282, "grad_norm": 1.4257621765136719, "learning_rate": 2.3405923427772057e-05, "loss": 0.5693, "step": 40420 }, { "epoch": 1.5563041385948027, "grad_norm": 1.0261151790618896, "learning_rate": 2.3386485550374636e-05, "loss": 0.9288, "step": 40425 }, { "epoch": 1.5564966313763233, "grad_norm": 1.0929063558578491, "learning_rate": 2.3367054678775557e-05, "loss": 0.7563, "step": 40430 }, { "epoch": 1.556689124157844, "grad_norm": 0.8762004375457764, "learning_rate": 2.3347630814751687e-05, "loss": 0.7824, "step": 40435 }, { "epoch": 1.5568816169393647, "grad_norm": 1.4772645235061646, "learning_rate": 2.3328213960079205e-05, "loss": 0.8197, "step": 40440 }, { "epoch": 1.5570741097208853, "grad_norm": 1.4557557106018066, "learning_rate": 2.330880411653368e-05, "loss": 0.8008, "step": 40445 }, { "epoch": 1.557266602502406, "grad_norm": 1.2183635234832764, "learning_rate": 2.3289401285889934e-05, "loss": 0.769, "step": 40450 }, { "epoch": 1.557459095283927, "grad_norm": 0.9675846695899963, "learning_rate": 2.327000546992233e-05, "loss": 0.8019, "step": 40455 }, { "epoch": 1.5576515880654476, "grad_norm": 1.5185277462005615, "learning_rate": 2.32506166704044e-05, "loss": 0.6456, "step": 40460 }, { "epoch": 1.5578440808469682, "grad_norm": 1.6056936979293823, "learning_rate": 2.3231234889109165e-05, "loss": 0.8468, "step": 40465 }, { "epoch": 1.558036573628489, "grad_norm": 1.182332992553711, "learning_rate": 2.3211860127808948e-05, "loss": 0.991, "step": 40470 }, { "epoch": 1.5582290664100096, "grad_norm": 1.1876064538955688, "learning_rate": 2.3192492388275454e-05, "loss": 0.7254, "step": 40475 }, { "epoch": 1.5584215591915305, "grad_norm": 1.7529163360595703, "learning_rate": 2.3173131672279725e-05, "loss": 0.9351, "step": 40480 }, { "epoch": 1.5586140519730511, "grad_norm": 1.0285496711730957, "learning_rate": 2.3153777981592207e-05, "loss": 0.835, "step": 40485 }, { "epoch": 1.5588065447545718, "grad_norm": 1.4586035013198853, "learning_rate": 2.3134431317982597e-05, "loss": 0.7694, "step": 40490 }, { "epoch": 1.5589990375360925, "grad_norm": 1.656645655632019, "learning_rate": 2.311509168322006e-05, "loss": 0.7945, "step": 40495 }, { "epoch": 1.5591915303176132, "grad_norm": 1.66303551197052, "learning_rate": 2.30957590790731e-05, "loss": 0.7964, "step": 40500 }, { "epoch": 1.5593840230991338, "grad_norm": 1.375352382659912, "learning_rate": 2.307643350730947e-05, "loss": 0.9546, "step": 40505 }, { "epoch": 1.5595765158806545, "grad_norm": 1.1943459510803223, "learning_rate": 2.305711496969648e-05, "loss": 0.6616, "step": 40510 }, { "epoch": 1.5597690086621752, "grad_norm": 1.4341318607330322, "learning_rate": 2.3037803468000597e-05, "loss": 0.8164, "step": 40515 }, { "epoch": 1.5599615014436958, "grad_norm": 1.3383629322052002, "learning_rate": 2.301849900398776e-05, "loss": 0.8711, "step": 40520 }, { "epoch": 1.5601539942252165, "grad_norm": 1.8261584043502808, "learning_rate": 2.2999201579423236e-05, "loss": 0.8195, "step": 40525 }, { "epoch": 1.5603464870067372, "grad_norm": 2.052516222000122, "learning_rate": 2.2979911196071668e-05, "loss": 0.9955, "step": 40530 }, { "epoch": 1.5605389797882578, "grad_norm": 1.2053110599517822, "learning_rate": 2.2960627855696958e-05, "loss": 0.8021, "step": 40535 }, { "epoch": 1.5607314725697785, "grad_norm": 1.301857590675354, "learning_rate": 2.294135156006255e-05, "loss": 0.8293, "step": 40540 }, { "epoch": 1.5609239653512992, "grad_norm": 1.1416163444519043, "learning_rate": 2.2922082310931036e-05, "loss": 0.8486, "step": 40545 }, { "epoch": 1.56111645813282, "grad_norm": 1.3656333684921265, "learning_rate": 2.2902820110064503e-05, "loss": 0.7598, "step": 40550 }, { "epoch": 1.5613089509143407, "grad_norm": 2.20866060256958, "learning_rate": 2.288356495922436e-05, "loss": 0.7617, "step": 40555 }, { "epoch": 1.5615014436958614, "grad_norm": 1.5742281675338745, "learning_rate": 2.2864316860171375e-05, "loss": 0.9051, "step": 40560 }, { "epoch": 1.561693936477382, "grad_norm": 1.7272670269012451, "learning_rate": 2.2845075814665573e-05, "loss": 0.8531, "step": 40565 }, { "epoch": 1.5618864292589028, "grad_norm": 1.8699864149093628, "learning_rate": 2.2825841824466543e-05, "loss": 0.7353, "step": 40570 }, { "epoch": 1.5620789220404236, "grad_norm": 1.5581755638122559, "learning_rate": 2.2806614891333022e-05, "loss": 0.7466, "step": 40575 }, { "epoch": 1.5622714148219443, "grad_norm": 1.340266227722168, "learning_rate": 2.2787395017023205e-05, "loss": 0.7033, "step": 40580 }, { "epoch": 1.562463907603465, "grad_norm": 2.0326457023620605, "learning_rate": 2.276818220329463e-05, "loss": 0.8551, "step": 40585 }, { "epoch": 1.5626564003849857, "grad_norm": 1.2885262966156006, "learning_rate": 2.2748976451904203e-05, "loss": 0.7386, "step": 40590 }, { "epoch": 1.5628488931665063, "grad_norm": 0.5644479393959045, "learning_rate": 2.2729777764608108e-05, "loss": 0.618, "step": 40595 }, { "epoch": 1.563041385948027, "grad_norm": 1.572131633758545, "learning_rate": 2.2710586143161972e-05, "loss": 0.7215, "step": 40600 }, { "epoch": 1.5632338787295477, "grad_norm": 1.1839364767074585, "learning_rate": 2.2691401589320737e-05, "loss": 0.7607, "step": 40605 }, { "epoch": 1.5634263715110683, "grad_norm": 1.3653178215026855, "learning_rate": 2.2672224104838713e-05, "loss": 0.6762, "step": 40610 }, { "epoch": 1.563618864292589, "grad_norm": 1.8901535272598267, "learning_rate": 2.2653053691469563e-05, "loss": 0.8515, "step": 40615 }, { "epoch": 1.5638113570741097, "grad_norm": 1.8246430158615112, "learning_rate": 2.2633890350966248e-05, "loss": 0.85, "step": 40620 }, { "epoch": 1.5640038498556303, "grad_norm": 1.524446725845337, "learning_rate": 2.2614734085081158e-05, "loss": 0.7622, "step": 40625 }, { "epoch": 1.564196342637151, "grad_norm": 1.142157793045044, "learning_rate": 2.2595584895566e-05, "loss": 0.7196, "step": 40630 }, { "epoch": 1.5643888354186717, "grad_norm": 1.0782500505447388, "learning_rate": 2.2576442784171892e-05, "loss": 0.7491, "step": 40635 }, { "epoch": 1.5645813282001924, "grad_norm": 1.034097671508789, "learning_rate": 2.2557307752649137e-05, "loss": 0.851, "step": 40640 }, { "epoch": 1.564773820981713, "grad_norm": 1.894054889678955, "learning_rate": 2.253817980274764e-05, "loss": 0.6389, "step": 40645 }, { "epoch": 1.564966313763234, "grad_norm": 0.6015737056732178, "learning_rate": 2.251905893621642e-05, "loss": 0.749, "step": 40650 }, { "epoch": 1.5651588065447546, "grad_norm": 0.9904623627662659, "learning_rate": 2.2499945154804013e-05, "loss": 0.836, "step": 40655 }, { "epoch": 1.5653512993262753, "grad_norm": 1.3582662343978882, "learning_rate": 2.2480838460258226e-05, "loss": 0.8239, "step": 40660 }, { "epoch": 1.565543792107796, "grad_norm": 1.4158751964569092, "learning_rate": 2.2461738854326263e-05, "loss": 0.8528, "step": 40665 }, { "epoch": 1.5657362848893166, "grad_norm": 1.0276498794555664, "learning_rate": 2.244264633875459e-05, "loss": 0.7761, "step": 40670 }, { "epoch": 1.5659287776708375, "grad_norm": 1.4849311113357544, "learning_rate": 2.24235609152892e-05, "loss": 0.934, "step": 40675 }, { "epoch": 1.5661212704523582, "grad_norm": 1.024477481842041, "learning_rate": 2.2404482585675225e-05, "loss": 0.675, "step": 40680 }, { "epoch": 1.5663137632338788, "grad_norm": 1.2070591449737549, "learning_rate": 2.2385411351657303e-05, "loss": 0.7506, "step": 40685 }, { "epoch": 1.5665062560153995, "grad_norm": 1.2458285093307495, "learning_rate": 2.2366347214979366e-05, "loss": 0.6916, "step": 40690 }, { "epoch": 1.5666987487969202, "grad_norm": 1.7189850807189941, "learning_rate": 2.2347290177384726e-05, "loss": 0.7032, "step": 40695 }, { "epoch": 1.5668912415784408, "grad_norm": 1.3378642797470093, "learning_rate": 2.2328240240615972e-05, "loss": 0.6995, "step": 40700 }, { "epoch": 1.5670837343599615, "grad_norm": 1.4483795166015625, "learning_rate": 2.2309197406415117e-05, "loss": 0.7532, "step": 40705 }, { "epoch": 1.5672762271414822, "grad_norm": 1.1107079982757568, "learning_rate": 2.2290161676523503e-05, "loss": 0.7787, "step": 40710 }, { "epoch": 1.5674687199230029, "grad_norm": 1.3404992818832397, "learning_rate": 2.2271133052681825e-05, "loss": 0.7247, "step": 40715 }, { "epoch": 1.5676612127045235, "grad_norm": 1.0901662111282349, "learning_rate": 2.2252111536630148e-05, "loss": 0.8229, "step": 40720 }, { "epoch": 1.5678537054860442, "grad_norm": 1.3521157503128052, "learning_rate": 2.2233097130107782e-05, "loss": 0.6774, "step": 40725 }, { "epoch": 1.5680461982675649, "grad_norm": 1.1512130498886108, "learning_rate": 2.221408983485358e-05, "loss": 0.8416, "step": 40730 }, { "epoch": 1.5682386910490855, "grad_norm": 1.2217384576797485, "learning_rate": 2.219508965260555e-05, "loss": 0.8276, "step": 40735 }, { "epoch": 1.5684311838306062, "grad_norm": 1.0364052057266235, "learning_rate": 2.217609658510117e-05, "loss": 0.7916, "step": 40740 }, { "epoch": 1.568623676612127, "grad_norm": 1.143720269203186, "learning_rate": 2.2157110634077215e-05, "loss": 0.98, "step": 40745 }, { "epoch": 1.5688161693936478, "grad_norm": 1.7398204803466797, "learning_rate": 2.2138131801269857e-05, "loss": 0.8718, "step": 40750 }, { "epoch": 1.5690086621751684, "grad_norm": 1.4039459228515625, "learning_rate": 2.2119160088414502e-05, "loss": 1.0399, "step": 40755 }, { "epoch": 1.569201154956689, "grad_norm": 1.1382685899734497, "learning_rate": 2.2100195497246103e-05, "loss": 0.8066, "step": 40760 }, { "epoch": 1.5693936477382098, "grad_norm": 1.2807059288024902, "learning_rate": 2.208123802949875e-05, "loss": 0.8231, "step": 40765 }, { "epoch": 1.5695861405197307, "grad_norm": 1.0869920253753662, "learning_rate": 2.2062287686906026e-05, "loss": 0.7107, "step": 40770 }, { "epoch": 1.5697786333012513, "grad_norm": 1.4672589302062988, "learning_rate": 2.20433444712008e-05, "loss": 0.6363, "step": 40775 }, { "epoch": 1.569971126082772, "grad_norm": 1.185530185699463, "learning_rate": 2.2024408384115337e-05, "loss": 0.7595, "step": 40780 }, { "epoch": 1.5701636188642927, "grad_norm": 1.210688829421997, "learning_rate": 2.2005479427381126e-05, "loss": 0.7845, "step": 40785 }, { "epoch": 1.5703561116458133, "grad_norm": 0.9549643397331238, "learning_rate": 2.1986557602729207e-05, "loss": 0.7966, "step": 40790 }, { "epoch": 1.570548604427334, "grad_norm": 1.4014770984649658, "learning_rate": 2.1967642911889787e-05, "loss": 0.7121, "step": 40795 }, { "epoch": 1.5707410972088547, "grad_norm": 1.2537885904312134, "learning_rate": 2.1948735356592497e-05, "loss": 0.8161, "step": 40800 }, { "epoch": 1.5709335899903754, "grad_norm": 1.4344686269760132, "learning_rate": 2.1929834938566317e-05, "loss": 0.9185, "step": 40805 }, { "epoch": 1.571126082771896, "grad_norm": 1.5772898197174072, "learning_rate": 2.1910941659539585e-05, "loss": 0.7617, "step": 40810 }, { "epoch": 1.5713185755534167, "grad_norm": 0.9699328541755676, "learning_rate": 2.189205552123993e-05, "loss": 0.7618, "step": 40815 }, { "epoch": 1.5715110683349374, "grad_norm": 1.8218029737472534, "learning_rate": 2.1873176525394378e-05, "loss": 0.8044, "step": 40820 }, { "epoch": 1.571703561116458, "grad_norm": 1.0945191383361816, "learning_rate": 2.1854304673729287e-05, "loss": 0.8599, "step": 40825 }, { "epoch": 1.5718960538979787, "grad_norm": 1.4021522998809814, "learning_rate": 2.183543996797036e-05, "loss": 0.8188, "step": 40830 }, { "epoch": 1.5720885466794994, "grad_norm": 0.9821120500564575, "learning_rate": 2.181658240984269e-05, "loss": 0.8562, "step": 40835 }, { "epoch": 1.57228103946102, "grad_norm": 1.1095430850982666, "learning_rate": 2.1797732001070613e-05, "loss": 0.7987, "step": 40840 }, { "epoch": 1.572473532242541, "grad_norm": 1.1971663236618042, "learning_rate": 2.17788887433779e-05, "loss": 0.942, "step": 40845 }, { "epoch": 1.5726660250240616, "grad_norm": 1.4306426048278809, "learning_rate": 2.176005263848765e-05, "loss": 0.922, "step": 40850 }, { "epoch": 1.5728585178055823, "grad_norm": 1.3114017248153687, "learning_rate": 2.1741223688122313e-05, "loss": 0.76, "step": 40855 }, { "epoch": 1.573051010587103, "grad_norm": 1.3243577480316162, "learning_rate": 2.1722401894003608e-05, "loss": 0.7896, "step": 40860 }, { "epoch": 1.5732435033686238, "grad_norm": 1.723612666130066, "learning_rate": 2.1703587257852755e-05, "loss": 0.9003, "step": 40865 }, { "epoch": 1.5734359961501445, "grad_norm": 1.3303959369659424, "learning_rate": 2.1684779781390152e-05, "loss": 0.8007, "step": 40870 }, { "epoch": 1.5736284889316652, "grad_norm": 1.080396056175232, "learning_rate": 2.166597946633565e-05, "loss": 0.7348, "step": 40875 }, { "epoch": 1.5738209817131859, "grad_norm": 0.7364131808280945, "learning_rate": 2.1647186314408408e-05, "loss": 0.8049, "step": 40880 }, { "epoch": 1.5740134744947065, "grad_norm": 0.581088662147522, "learning_rate": 2.162840032732697e-05, "loss": 0.7795, "step": 40885 }, { "epoch": 1.5742059672762272, "grad_norm": 1.5997105836868286, "learning_rate": 2.1609621506809097e-05, "loss": 0.7561, "step": 40890 }, { "epoch": 1.5743984600577479, "grad_norm": 1.5176271200180054, "learning_rate": 2.1590849854572114e-05, "loss": 0.6649, "step": 40895 }, { "epoch": 1.5745909528392685, "grad_norm": 2.400883197784424, "learning_rate": 2.1572085372332463e-05, "loss": 0.8173, "step": 40900 }, { "epoch": 1.5747834456207892, "grad_norm": 1.7215204238891602, "learning_rate": 2.1553328061806065e-05, "loss": 0.8483, "step": 40905 }, { "epoch": 1.5749759384023099, "grad_norm": 1.2602962255477905, "learning_rate": 2.1534577924708155e-05, "loss": 0.7841, "step": 40910 }, { "epoch": 1.5751684311838305, "grad_norm": 2.122758388519287, "learning_rate": 2.1515834962753346e-05, "loss": 0.7834, "step": 40915 }, { "epoch": 1.5753609239653512, "grad_norm": 1.6246029138565063, "learning_rate": 2.1497099177655476e-05, "loss": 0.7015, "step": 40920 }, { "epoch": 1.5755534167468719, "grad_norm": 1.3783003091812134, "learning_rate": 2.147837057112786e-05, "loss": 0.9372, "step": 40925 }, { "epoch": 1.5757459095283926, "grad_norm": 1.430508017539978, "learning_rate": 2.14596491448831e-05, "loss": 0.8844, "step": 40930 }, { "epoch": 1.5759384023099132, "grad_norm": 1.1913694143295288, "learning_rate": 2.1440934900633148e-05, "loss": 0.8196, "step": 40935 }, { "epoch": 1.5761308950914341, "grad_norm": 0.9103003740310669, "learning_rate": 2.1422227840089303e-05, "loss": 0.8644, "step": 40940 }, { "epoch": 1.5763233878729548, "grad_norm": 1.2136942148208618, "learning_rate": 2.1403527964962176e-05, "loss": 0.8063, "step": 40945 }, { "epoch": 1.5765158806544755, "grad_norm": 1.0555609464645386, "learning_rate": 2.1384835276961767e-05, "loss": 0.7176, "step": 40950 }, { "epoch": 1.5767083734359961, "grad_norm": 1.4605985879898071, "learning_rate": 2.1369886302441366e-05, "loss": 0.8852, "step": 40955 }, { "epoch": 1.5769008662175168, "grad_norm": 1.9542622566223145, "learning_rate": 2.1351206555576065e-05, "loss": 0.8881, "step": 40960 }, { "epoch": 1.5770933589990377, "grad_norm": 0.9330739974975586, "learning_rate": 2.1332534000621972e-05, "loss": 0.701, "step": 40965 }, { "epoch": 1.5772858517805584, "grad_norm": 1.3597526550292969, "learning_rate": 2.1313868639286494e-05, "loss": 0.928, "step": 40970 }, { "epoch": 1.577478344562079, "grad_norm": 1.327373743057251, "learning_rate": 2.1295210473276484e-05, "loss": 0.8114, "step": 40975 }, { "epoch": 1.5776708373435997, "grad_norm": 1.1426680088043213, "learning_rate": 2.12765595042981e-05, "loss": 0.8321, "step": 40980 }, { "epoch": 1.5778633301251204, "grad_norm": 1.3431953191757202, "learning_rate": 2.1257915734056854e-05, "loss": 0.7583, "step": 40985 }, { "epoch": 1.578055822906641, "grad_norm": 1.6750779151916504, "learning_rate": 2.123927916425763e-05, "loss": 0.7295, "step": 40990 }, { "epoch": 1.5782483156881617, "grad_norm": 1.191678762435913, "learning_rate": 2.122064979660455e-05, "loss": 0.731, "step": 40995 }, { "epoch": 1.5784408084696824, "grad_norm": 1.6331219673156738, "learning_rate": 2.120202763280118e-05, "loss": 0.7803, "step": 41000 }, { "epoch": 1.578633301251203, "grad_norm": 1.1647868156433105, "learning_rate": 2.1183412674550396e-05, "loss": 0.8047, "step": 41005 }, { "epoch": 1.5788257940327237, "grad_norm": 1.082160472869873, "learning_rate": 2.1164804923554438e-05, "loss": 0.7405, "step": 41010 }, { "epoch": 1.5790182868142444, "grad_norm": 2.297400951385498, "learning_rate": 2.1146204381514788e-05, "loss": 1.0183, "step": 41015 }, { "epoch": 1.579210779595765, "grad_norm": 1.0821828842163086, "learning_rate": 2.1127611050132435e-05, "loss": 0.9013, "step": 41020 }, { "epoch": 1.5794032723772857, "grad_norm": 0.7487876415252686, "learning_rate": 2.1109024931107547e-05, "loss": 0.7388, "step": 41025 }, { "epoch": 1.5795957651588064, "grad_norm": 1.8875432014465332, "learning_rate": 2.109044602613971e-05, "loss": 0.9369, "step": 41030 }, { "epoch": 1.5797882579403273, "grad_norm": 1.49079167842865, "learning_rate": 2.1071874336927876e-05, "loss": 0.8084, "step": 41035 }, { "epoch": 1.579980750721848, "grad_norm": 1.307343602180481, "learning_rate": 2.10533098651703e-05, "loss": 0.8797, "step": 41040 }, { "epoch": 1.5801732435033686, "grad_norm": 1.5751981735229492, "learning_rate": 2.1034752612564503e-05, "loss": 0.6406, "step": 41045 }, { "epoch": 1.5803657362848893, "grad_norm": 1.0212968587875366, "learning_rate": 2.1016202580807544e-05, "loss": 0.8681, "step": 41050 }, { "epoch": 1.58055822906641, "grad_norm": 0.9244958758354187, "learning_rate": 2.0997659771595612e-05, "loss": 0.76, "step": 41055 }, { "epoch": 1.5807507218479309, "grad_norm": 2.024540662765503, "learning_rate": 2.0979124186624356e-05, "loss": 0.8869, "step": 41060 }, { "epoch": 1.5809432146294515, "grad_norm": 1.2309986352920532, "learning_rate": 2.0960595827588713e-05, "loss": 0.8485, "step": 41065 }, { "epoch": 1.5811357074109722, "grad_norm": 1.1824150085449219, "learning_rate": 2.0942074696183033e-05, "loss": 0.8635, "step": 41070 }, { "epoch": 1.5813282001924929, "grad_norm": 1.0491746664047241, "learning_rate": 2.092356079410086e-05, "loss": 0.8331, "step": 41075 }, { "epoch": 1.5815206929740135, "grad_norm": 1.1368030309677124, "learning_rate": 2.090505412303526e-05, "loss": 0.6937, "step": 41080 }, { "epoch": 1.5817131857555342, "grad_norm": 1.2097387313842773, "learning_rate": 2.0886554684678485e-05, "loss": 0.7318, "step": 41085 }, { "epoch": 1.5819056785370549, "grad_norm": 2.0431954860687256, "learning_rate": 2.0868062480722206e-05, "loss": 0.8352, "step": 41090 }, { "epoch": 1.5820981713185756, "grad_norm": 1.763728141784668, "learning_rate": 2.084957751285741e-05, "loss": 0.7479, "step": 41095 }, { "epoch": 1.5822906641000962, "grad_norm": 1.1527445316314697, "learning_rate": 2.0831099782774455e-05, "loss": 0.8316, "step": 41100 }, { "epoch": 1.582483156881617, "grad_norm": 1.493467092514038, "learning_rate": 2.0812629292162955e-05, "loss": 0.772, "step": 41105 }, { "epoch": 1.5826756496631376, "grad_norm": 1.3418041467666626, "learning_rate": 2.0794166042711936e-05, "loss": 0.7486, "step": 41110 }, { "epoch": 1.5828681424446582, "grad_norm": 0.8369565606117249, "learning_rate": 2.0775710036109762e-05, "loss": 0.7444, "step": 41115 }, { "epoch": 1.583060635226179, "grad_norm": 1.1210306882858276, "learning_rate": 2.0757261274044048e-05, "loss": 0.7875, "step": 41120 }, { "epoch": 1.5832531280076996, "grad_norm": 1.1858896017074585, "learning_rate": 2.07388197582019e-05, "loss": 0.7042, "step": 41125 }, { "epoch": 1.5834456207892202, "grad_norm": 0.9110993146896362, "learning_rate": 2.072038549026961e-05, "loss": 0.7668, "step": 41130 }, { "epoch": 1.5836381135707411, "grad_norm": 1.5564924478530884, "learning_rate": 2.070195847193288e-05, "loss": 0.7743, "step": 41135 }, { "epoch": 1.5838306063522618, "grad_norm": 1.6773813962936401, "learning_rate": 2.068353870487675e-05, "loss": 0.7443, "step": 41140 }, { "epoch": 1.5840230991337825, "grad_norm": 1.0506762266159058, "learning_rate": 2.066512619078561e-05, "loss": 0.681, "step": 41145 }, { "epoch": 1.5842155919153031, "grad_norm": 1.599478006362915, "learning_rate": 2.0646720931343078e-05, "loss": 0.7801, "step": 41150 }, { "epoch": 1.5844080846968238, "grad_norm": 0.9615776538848877, "learning_rate": 2.06283229282323e-05, "loss": 0.6929, "step": 41155 }, { "epoch": 1.5846005774783447, "grad_norm": 1.4779179096221924, "learning_rate": 2.0609932183135582e-05, "loss": 0.8112, "step": 41160 }, { "epoch": 1.5847930702598654, "grad_norm": 1.1211433410644531, "learning_rate": 2.059154869773465e-05, "loss": 0.7598, "step": 41165 }, { "epoch": 1.584985563041386, "grad_norm": 1.0643943548202515, "learning_rate": 2.0573172473710567e-05, "loss": 0.8607, "step": 41170 }, { "epoch": 1.5851780558229067, "grad_norm": 2.5210201740264893, "learning_rate": 2.0554803512743724e-05, "loss": 0.9052, "step": 41175 }, { "epoch": 1.5853705486044274, "grad_norm": 1.6734479665756226, "learning_rate": 2.053644181651376e-05, "loss": 0.8745, "step": 41180 }, { "epoch": 1.585563041385948, "grad_norm": 1.5669529438018799, "learning_rate": 2.051808738669987e-05, "loss": 0.7996, "step": 41185 }, { "epoch": 1.5857555341674687, "grad_norm": 1.2366806268692017, "learning_rate": 2.0499740224980325e-05, "loss": 0.719, "step": 41190 }, { "epoch": 1.5859480269489894, "grad_norm": 1.0737662315368652, "learning_rate": 2.0481400333032897e-05, "loss": 0.6766, "step": 41195 }, { "epoch": 1.58614051973051, "grad_norm": 2.486666679382324, "learning_rate": 2.0463067712534656e-05, "loss": 0.9854, "step": 41200 }, { "epoch": 1.5863330125120307, "grad_norm": 1.0239930152893066, "learning_rate": 2.0444742365162005e-05, "loss": 0.8435, "step": 41205 }, { "epoch": 1.5865255052935514, "grad_norm": 1.0103557109832764, "learning_rate": 2.042642429259064e-05, "loss": 0.8264, "step": 41210 }, { "epoch": 1.586717998075072, "grad_norm": 1.7183647155761719, "learning_rate": 2.040811349649564e-05, "loss": 0.781, "step": 41215 }, { "epoch": 1.5869104908565927, "grad_norm": 1.1959103345870972, "learning_rate": 2.038980997855142e-05, "loss": 0.7821, "step": 41220 }, { "epoch": 1.5871029836381134, "grad_norm": 1.4438652992248535, "learning_rate": 2.03715137404317e-05, "loss": 0.8455, "step": 41225 }, { "epoch": 1.5872954764196343, "grad_norm": 1.0840461254119873, "learning_rate": 2.0353224783809587e-05, "loss": 0.7846, "step": 41230 }, { "epoch": 1.587487969201155, "grad_norm": 1.6811954975128174, "learning_rate": 2.033494311035744e-05, "loss": 0.8039, "step": 41235 }, { "epoch": 1.5876804619826757, "grad_norm": 1.2145577669143677, "learning_rate": 2.0316668721747e-05, "loss": 0.8147, "step": 41240 }, { "epoch": 1.5878729547641963, "grad_norm": 1.1736549139022827, "learning_rate": 2.0298401619649353e-05, "loss": 1.0046, "step": 41245 }, { "epoch": 1.588065447545717, "grad_norm": 1.1645379066467285, "learning_rate": 2.028014180573491e-05, "loss": 0.8703, "step": 41250 }, { "epoch": 1.5882579403272379, "grad_norm": 1.2913013696670532, "learning_rate": 2.0261889281673397e-05, "loss": 0.8041, "step": 41255 }, { "epoch": 1.5884504331087586, "grad_norm": 1.1715826988220215, "learning_rate": 2.0243644049133926e-05, "loss": 0.8894, "step": 41260 }, { "epoch": 1.5886429258902792, "grad_norm": 1.0594674348831177, "learning_rate": 2.0225406109784805e-05, "loss": 0.8548, "step": 41265 }, { "epoch": 1.5888354186718, "grad_norm": 2.4181666374206543, "learning_rate": 2.0207175465293904e-05, "loss": 0.8165, "step": 41270 }, { "epoch": 1.5890279114533206, "grad_norm": 0.9365816712379456, "learning_rate": 2.0188952117328186e-05, "loss": 0.8741, "step": 41275 }, { "epoch": 1.5892204042348412, "grad_norm": 2.1108558177948, "learning_rate": 2.0170736067554108e-05, "loss": 0.8202, "step": 41280 }, { "epoch": 1.589412897016362, "grad_norm": 1.5129166841506958, "learning_rate": 2.0152527317637394e-05, "loss": 0.7616, "step": 41285 }, { "epoch": 1.5896053897978826, "grad_norm": 1.32972252368927, "learning_rate": 2.0134325869243144e-05, "loss": 0.905, "step": 41290 }, { "epoch": 1.5897978825794032, "grad_norm": 0.950168788433075, "learning_rate": 2.011613172403567e-05, "loss": 0.6316, "step": 41295 }, { "epoch": 1.589990375360924, "grad_norm": 0.9195626378059387, "learning_rate": 2.009794488367882e-05, "loss": 0.8385, "step": 41300 }, { "epoch": 1.5901828681424446, "grad_norm": 1.176831603050232, "learning_rate": 2.007976534983559e-05, "loss": 0.8977, "step": 41305 }, { "epoch": 1.5903753609239653, "grad_norm": 1.0982027053833008, "learning_rate": 2.0061593124168398e-05, "loss": 0.8981, "step": 41310 }, { "epoch": 1.590567853705486, "grad_norm": 1.0368854999542236, "learning_rate": 2.0043428208338987e-05, "loss": 0.6795, "step": 41315 }, { "epoch": 1.5907603464870066, "grad_norm": 0.7842391729354858, "learning_rate": 2.0025270604008384e-05, "loss": 0.6644, "step": 41320 }, { "epoch": 1.5909528392685275, "grad_norm": 1.4070217609405518, "learning_rate": 2.0007120312837e-05, "loss": 0.7587, "step": 41325 }, { "epoch": 1.5911453320500482, "grad_norm": 1.3950802087783813, "learning_rate": 1.998897733648456e-05, "loss": 0.8808, "step": 41330 }, { "epoch": 1.5913378248315688, "grad_norm": 0.873643696308136, "learning_rate": 1.9970841676610143e-05, "loss": 0.8078, "step": 41335 }, { "epoch": 1.5915303176130895, "grad_norm": 1.5144951343536377, "learning_rate": 1.995271333487205e-05, "loss": 0.7164, "step": 41340 }, { "epoch": 1.5917228103946102, "grad_norm": 1.031936526298523, "learning_rate": 1.993459231292811e-05, "loss": 0.6438, "step": 41345 }, { "epoch": 1.591915303176131, "grad_norm": 1.1276479959487915, "learning_rate": 1.991647861243531e-05, "loss": 0.8856, "step": 41350 }, { "epoch": 1.5921077959576517, "grad_norm": 0.9697341322898865, "learning_rate": 1.9898372235050022e-05, "loss": 0.788, "step": 41355 }, { "epoch": 1.5923002887391724, "grad_norm": 1.0358269214630127, "learning_rate": 1.9880273182427965e-05, "loss": 0.7337, "step": 41360 }, { "epoch": 1.592492781520693, "grad_norm": 1.1483445167541504, "learning_rate": 1.9862181456224216e-05, "loss": 0.8329, "step": 41365 }, { "epoch": 1.5926852743022137, "grad_norm": 0.7864820957183838, "learning_rate": 1.9844097058093047e-05, "loss": 0.664, "step": 41370 }, { "epoch": 1.5928777670837344, "grad_norm": 0.8901979327201843, "learning_rate": 1.9826019989688283e-05, "loss": 0.7352, "step": 41375 }, { "epoch": 1.593070259865255, "grad_norm": 1.5492757558822632, "learning_rate": 1.9807950252662854e-05, "loss": 0.8297, "step": 41380 }, { "epoch": 1.5932627526467757, "grad_norm": 1.5439116954803467, "learning_rate": 1.9789887848669143e-05, "loss": 0.7502, "step": 41385 }, { "epoch": 1.5934552454282964, "grad_norm": 1.6057932376861572, "learning_rate": 1.9771832779358857e-05, "loss": 0.8005, "step": 41390 }, { "epoch": 1.593647738209817, "grad_norm": 1.489683985710144, "learning_rate": 1.9753785046383022e-05, "loss": 0.8509, "step": 41395 }, { "epoch": 1.5938402309913378, "grad_norm": 1.2398594617843628, "learning_rate": 1.9735744651391906e-05, "loss": 0.839, "step": 41400 }, { "epoch": 1.5940327237728584, "grad_norm": 0.5610536932945251, "learning_rate": 1.9717711596035292e-05, "loss": 0.686, "step": 41405 }, { "epoch": 1.594225216554379, "grad_norm": 1.1076955795288086, "learning_rate": 1.9699685881962115e-05, "loss": 0.7216, "step": 41410 }, { "epoch": 1.5944177093358998, "grad_norm": 0.8716223835945129, "learning_rate": 1.9681667510820713e-05, "loss": 0.8062, "step": 41415 }, { "epoch": 1.5946102021174204, "grad_norm": 0.9298526048660278, "learning_rate": 1.9663656484258764e-05, "loss": 0.8884, "step": 41420 }, { "epoch": 1.5948026948989413, "grad_norm": 1.1228723526000977, "learning_rate": 1.9645652803923266e-05, "loss": 0.7249, "step": 41425 }, { "epoch": 1.594995187680462, "grad_norm": 1.0350871086120605, "learning_rate": 1.9627656471460498e-05, "loss": 0.7171, "step": 41430 }, { "epoch": 1.5951876804619827, "grad_norm": 1.5256117582321167, "learning_rate": 1.9609667488516138e-05, "loss": 1.0133, "step": 41435 }, { "epoch": 1.5953801732435033, "grad_norm": 1.4316469430923462, "learning_rate": 1.9591685856735144e-05, "loss": 0.7591, "step": 41440 }, { "epoch": 1.595572666025024, "grad_norm": 1.8641667366027832, "learning_rate": 1.9573711577761812e-05, "loss": 0.8817, "step": 41445 }, { "epoch": 1.595765158806545, "grad_norm": 1.348741054534912, "learning_rate": 1.9555744653239815e-05, "loss": 0.9214, "step": 41450 }, { "epoch": 1.5959576515880656, "grad_norm": 1.2496349811553955, "learning_rate": 1.9537785084812044e-05, "loss": 0.7907, "step": 41455 }, { "epoch": 1.5961501443695862, "grad_norm": 1.1491204500198364, "learning_rate": 1.9519832874120824e-05, "loss": 0.8712, "step": 41460 }, { "epoch": 1.596342637151107, "grad_norm": 0.7588009238243103, "learning_rate": 1.9501888022807745e-05, "loss": 0.7473, "step": 41465 }, { "epoch": 1.5965351299326276, "grad_norm": 1.0782458782196045, "learning_rate": 1.9483950532513783e-05, "loss": 0.7309, "step": 41470 }, { "epoch": 1.5967276227141483, "grad_norm": 1.52632474899292, "learning_rate": 1.9466020404879127e-05, "loss": 0.8698, "step": 41475 }, { "epoch": 1.596920115495669, "grad_norm": 1.286037564277649, "learning_rate": 1.9448097641543462e-05, "loss": 0.8768, "step": 41480 }, { "epoch": 1.5971126082771896, "grad_norm": 0.8476093411445618, "learning_rate": 1.9430182244145646e-05, "loss": 0.9017, "step": 41485 }, { "epoch": 1.5973051010587103, "grad_norm": 1.0609407424926758, "learning_rate": 1.9412274214323923e-05, "loss": 0.8112, "step": 41490 }, { "epoch": 1.597497593840231, "grad_norm": 1.7310614585876465, "learning_rate": 1.9394373553715885e-05, "loss": 1.0244, "step": 41495 }, { "epoch": 1.5976900866217516, "grad_norm": 1.7635456323623657, "learning_rate": 1.9376480263958453e-05, "loss": 0.8217, "step": 41500 }, { "epoch": 1.5978825794032723, "grad_norm": 0.9183743000030518, "learning_rate": 1.9358594346687765e-05, "loss": 0.8512, "step": 41505 }, { "epoch": 1.598075072184793, "grad_norm": 1.0998347997665405, "learning_rate": 1.9340715803539466e-05, "loss": 0.7804, "step": 41510 }, { "epoch": 1.5982675649663136, "grad_norm": 1.388878345489502, "learning_rate": 1.9322844636148375e-05, "loss": 0.8285, "step": 41515 }, { "epoch": 1.5984600577478345, "grad_norm": 1.1632678508758545, "learning_rate": 1.930498084614869e-05, "loss": 0.7176, "step": 41520 }, { "epoch": 1.5986525505293552, "grad_norm": 0.8704983592033386, "learning_rate": 1.9287124435173964e-05, "loss": 0.7018, "step": 41525 }, { "epoch": 1.5988450433108758, "grad_norm": 1.2163587808609009, "learning_rate": 1.9269275404857022e-05, "loss": 0.6076, "step": 41530 }, { "epoch": 1.5990375360923965, "grad_norm": 0.8491012454032898, "learning_rate": 1.9251433756830095e-05, "loss": 0.7527, "step": 41535 }, { "epoch": 1.5992300288739172, "grad_norm": 0.996997058391571, "learning_rate": 1.9233599492724607e-05, "loss": 0.6848, "step": 41540 }, { "epoch": 1.599422521655438, "grad_norm": 1.3385977745056152, "learning_rate": 1.9215772614171413e-05, "loss": 0.8753, "step": 41545 }, { "epoch": 1.5996150144369587, "grad_norm": 0.9963300228118896, "learning_rate": 1.919795312280067e-05, "loss": 0.8053, "step": 41550 }, { "epoch": 1.5998075072184794, "grad_norm": 0.8091634511947632, "learning_rate": 1.918014102024187e-05, "loss": 0.8183, "step": 41555 }, { "epoch": 1.6, "grad_norm": 1.6417735815048218, "learning_rate": 1.916233630812374e-05, "loss": 0.7796, "step": 41560 }, { "epoch": 1.6001924927815208, "grad_norm": 0.8539758324623108, "learning_rate": 1.914453898807451e-05, "loss": 0.7709, "step": 41565 }, { "epoch": 1.6003849855630414, "grad_norm": 1.14285409450531, "learning_rate": 1.912674906172155e-05, "loss": 0.762, "step": 41570 }, { "epoch": 1.600577478344562, "grad_norm": 1.2252355813980103, "learning_rate": 1.910896653069165e-05, "loss": 0.9033, "step": 41575 }, { "epoch": 1.6007699711260828, "grad_norm": 1.3306511640548706, "learning_rate": 1.9091191396610895e-05, "loss": 0.8857, "step": 41580 }, { "epoch": 1.6009624639076034, "grad_norm": 0.9147464632987976, "learning_rate": 1.9073423661104762e-05, "loss": 0.7447, "step": 41585 }, { "epoch": 1.601154956689124, "grad_norm": 1.4393258094787598, "learning_rate": 1.9055663325797877e-05, "loss": 0.7681, "step": 41590 }, { "epoch": 1.6013474494706448, "grad_norm": 1.5441241264343262, "learning_rate": 1.903791039231443e-05, "loss": 0.9034, "step": 41595 }, { "epoch": 1.6015399422521654, "grad_norm": 1.5993388891220093, "learning_rate": 1.9020164862277724e-05, "loss": 0.7531, "step": 41600 }, { "epoch": 1.6017324350336861, "grad_norm": 1.3545401096343994, "learning_rate": 1.900242673731051e-05, "loss": 0.7417, "step": 41605 }, { "epoch": 1.6019249278152068, "grad_norm": 2.0168793201446533, "learning_rate": 1.89846960190348e-05, "loss": 0.83, "step": 41610 }, { "epoch": 1.6021174205967275, "grad_norm": 0.8939989805221558, "learning_rate": 1.8966972709071985e-05, "loss": 0.7973, "step": 41615 }, { "epoch": 1.6023099133782484, "grad_norm": 1.6125478744506836, "learning_rate": 1.894925680904268e-05, "loss": 0.8518, "step": 41620 }, { "epoch": 1.602502406159769, "grad_norm": 1.4295742511749268, "learning_rate": 1.8931548320566972e-05, "loss": 0.782, "step": 41625 }, { "epoch": 1.6026948989412897, "grad_norm": 0.9275886416435242, "learning_rate": 1.8913847245264116e-05, "loss": 0.8498, "step": 41630 }, { "epoch": 1.6028873917228104, "grad_norm": 1.2459815740585327, "learning_rate": 1.8896153584752785e-05, "loss": 0.8219, "step": 41635 }, { "epoch": 1.6030798845043313, "grad_norm": 1.0280636548995972, "learning_rate": 1.887846734065094e-05, "loss": 0.7993, "step": 41640 }, { "epoch": 1.603272377285852, "grad_norm": 0.9304599761962891, "learning_rate": 1.886078851457591e-05, "loss": 0.8449, "step": 41645 }, { "epoch": 1.6034648700673726, "grad_norm": 1.3438136577606201, "learning_rate": 1.884311710814425e-05, "loss": 0.706, "step": 41650 }, { "epoch": 1.6036573628488933, "grad_norm": 1.3000657558441162, "learning_rate": 1.8825453122971904e-05, "loss": 0.8801, "step": 41655 }, { "epoch": 1.603849855630414, "grad_norm": 0.9097469449043274, "learning_rate": 1.880779656067414e-05, "loss": 0.7417, "step": 41660 }, { "epoch": 1.6040423484119346, "grad_norm": 1.5576119422912598, "learning_rate": 1.8790147422865532e-05, "loss": 0.8143, "step": 41665 }, { "epoch": 1.6042348411934553, "grad_norm": 1.0649263858795166, "learning_rate": 1.8772505711160003e-05, "loss": 0.6215, "step": 41670 }, { "epoch": 1.604427333974976, "grad_norm": 1.2119357585906982, "learning_rate": 1.8754871427170716e-05, "loss": 0.7959, "step": 41675 }, { "epoch": 1.6046198267564966, "grad_norm": 1.5147475004196167, "learning_rate": 1.8737244572510238e-05, "loss": 0.6919, "step": 41680 }, { "epoch": 1.6048123195380173, "grad_norm": 1.540199637413025, "learning_rate": 1.8719625148790432e-05, "loss": 0.9114, "step": 41685 }, { "epoch": 1.605004812319538, "grad_norm": 1.3708090782165527, "learning_rate": 1.8702013157622488e-05, "loss": 0.7575, "step": 41690 }, { "epoch": 1.6051973051010586, "grad_norm": 0.8762415647506714, "learning_rate": 1.8684408600616855e-05, "loss": 0.7135, "step": 41695 }, { "epoch": 1.6053897978825793, "grad_norm": 1.0719411373138428, "learning_rate": 1.866681147938343e-05, "loss": 0.7409, "step": 41700 }, { "epoch": 1.6055822906641, "grad_norm": 1.3425824642181396, "learning_rate": 1.864922179553128e-05, "loss": 1.0134, "step": 41705 }, { "epoch": 1.6057747834456206, "grad_norm": 1.2449208498001099, "learning_rate": 1.8631639550668912e-05, "loss": 0.8516, "step": 41710 }, { "epoch": 1.6059672762271415, "grad_norm": 0.9909605979919434, "learning_rate": 1.861406474640408e-05, "loss": 0.8218, "step": 41715 }, { "epoch": 1.6061597690086622, "grad_norm": 1.1478825807571411, "learning_rate": 1.8596497384343926e-05, "loss": 0.9639, "step": 41720 }, { "epoch": 1.6063522617901829, "grad_norm": 1.305647611618042, "learning_rate": 1.857893746609478e-05, "loss": 0.8432, "step": 41725 }, { "epoch": 1.6065447545717035, "grad_norm": 1.028073787689209, "learning_rate": 1.8561384993262497e-05, "loss": 0.7462, "step": 41730 }, { "epoch": 1.6067372473532242, "grad_norm": 1.058986783027649, "learning_rate": 1.8543839967452047e-05, "loss": 0.8153, "step": 41735 }, { "epoch": 1.606929740134745, "grad_norm": 1.5460129976272583, "learning_rate": 1.8526302390267836e-05, "loss": 0.7346, "step": 41740 }, { "epoch": 1.6071222329162658, "grad_norm": 1.0355900526046753, "learning_rate": 1.8508772263313556e-05, "loss": 0.8171, "step": 41745 }, { "epoch": 1.6073147256977864, "grad_norm": 1.0379014015197754, "learning_rate": 1.849124958819224e-05, "loss": 0.7432, "step": 41750 }, { "epoch": 1.607507218479307, "grad_norm": 1.5380711555480957, "learning_rate": 1.847373436650619e-05, "loss": 0.9295, "step": 41755 }, { "epoch": 1.6076997112608278, "grad_norm": 1.3490667343139648, "learning_rate": 1.8456226599857064e-05, "loss": 0.9094, "step": 41760 }, { "epoch": 1.6078922040423484, "grad_norm": 1.5740834474563599, "learning_rate": 1.8438726289845833e-05, "loss": 0.7674, "step": 41765 }, { "epoch": 1.6080846968238691, "grad_norm": 0.9285767674446106, "learning_rate": 1.8421233438072795e-05, "loss": 0.7368, "step": 41770 }, { "epoch": 1.6082771896053898, "grad_norm": 1.3774783611297607, "learning_rate": 1.840374804613757e-05, "loss": 0.7023, "step": 41775 }, { "epoch": 1.6084696823869105, "grad_norm": 1.00751793384552, "learning_rate": 1.8386270115639013e-05, "loss": 0.7488, "step": 41780 }, { "epoch": 1.6086621751684311, "grad_norm": 1.7420424222946167, "learning_rate": 1.836879964817546e-05, "loss": 0.9531, "step": 41785 }, { "epoch": 1.6088546679499518, "grad_norm": 1.4065346717834473, "learning_rate": 1.8351336645344408e-05, "loss": 0.8083, "step": 41790 }, { "epoch": 1.6090471607314725, "grad_norm": 1.2782877683639526, "learning_rate": 1.8333881108742736e-05, "loss": 0.9427, "step": 41795 }, { "epoch": 1.6092396535129931, "grad_norm": 1.675103783607483, "learning_rate": 1.8316433039966653e-05, "loss": 0.9521, "step": 41800 }, { "epoch": 1.6094321462945138, "grad_norm": 1.5068132877349854, "learning_rate": 1.8298992440611686e-05, "loss": 0.8201, "step": 41805 }, { "epoch": 1.6096246390760347, "grad_norm": 1.5667356252670288, "learning_rate": 1.828155931227259e-05, "loss": 0.882, "step": 41810 }, { "epoch": 1.6098171318575554, "grad_norm": 1.710042119026184, "learning_rate": 1.8264133656543613e-05, "loss": 0.867, "step": 41815 }, { "epoch": 1.610009624639076, "grad_norm": 1.5348536968231201, "learning_rate": 1.824671547501814e-05, "loss": 0.8304, "step": 41820 }, { "epoch": 1.6102021174205967, "grad_norm": 1.3562618494033813, "learning_rate": 1.8229304769288956e-05, "loss": 0.8852, "step": 41825 }, { "epoch": 1.6103946102021174, "grad_norm": 1.1753439903259277, "learning_rate": 1.8211901540948183e-05, "loss": 0.8705, "step": 41830 }, { "epoch": 1.6105871029836383, "grad_norm": 0.7911993265151978, "learning_rate": 1.8194505791587245e-05, "loss": 0.642, "step": 41835 }, { "epoch": 1.610779595765159, "grad_norm": 1.2742297649383545, "learning_rate": 1.8177117522796784e-05, "loss": 0.7778, "step": 41840 }, { "epoch": 1.6109720885466796, "grad_norm": 0.5650262236595154, "learning_rate": 1.8159736736166943e-05, "loss": 0.6833, "step": 41845 }, { "epoch": 1.6111645813282003, "grad_norm": 0.8346420526504517, "learning_rate": 1.8142363433287026e-05, "loss": 0.7115, "step": 41850 }, { "epoch": 1.611357074109721, "grad_norm": 2.0539052486419678, "learning_rate": 1.812499761574571e-05, "loss": 0.9054, "step": 41855 }, { "epoch": 1.6115495668912416, "grad_norm": 1.3676276206970215, "learning_rate": 1.8107639285131005e-05, "loss": 0.7522, "step": 41860 }, { "epoch": 1.6117420596727623, "grad_norm": 0.8344974517822266, "learning_rate": 1.809028844303018e-05, "loss": 0.7912, "step": 41865 }, { "epoch": 1.611934552454283, "grad_norm": 2.75577449798584, "learning_rate": 1.807294509102988e-05, "loss": 0.7293, "step": 41870 }, { "epoch": 1.6121270452358036, "grad_norm": 1.7650033235549927, "learning_rate": 1.8055609230716032e-05, "loss": 0.7737, "step": 41875 }, { "epoch": 1.6123195380173243, "grad_norm": 1.5389347076416016, "learning_rate": 1.8038280863673907e-05, "loss": 0.7563, "step": 41880 }, { "epoch": 1.612512030798845, "grad_norm": 1.5038697719573975, "learning_rate": 1.8020959991488006e-05, "loss": 0.7828, "step": 41885 }, { "epoch": 1.6127045235803656, "grad_norm": 1.3098371028900146, "learning_rate": 1.8003646615742308e-05, "loss": 0.7512, "step": 41890 }, { "epoch": 1.6128970163618863, "grad_norm": 1.2375686168670654, "learning_rate": 1.7986340738019912e-05, "loss": 0.8669, "step": 41895 }, { "epoch": 1.613089509143407, "grad_norm": 1.7652937173843384, "learning_rate": 1.7969042359903376e-05, "loss": 0.6875, "step": 41900 }, { "epoch": 1.6132820019249277, "grad_norm": 1.404327630996704, "learning_rate": 1.795175148297451e-05, "loss": 0.786, "step": 41905 }, { "epoch": 1.6134744947064485, "grad_norm": 1.2358356714248657, "learning_rate": 1.7934468108814472e-05, "loss": 0.7852, "step": 41910 }, { "epoch": 1.6136669874879692, "grad_norm": 1.1059602499008179, "learning_rate": 1.7917192239003644e-05, "loss": 0.7523, "step": 41915 }, { "epoch": 1.6138594802694899, "grad_norm": 1.4840894937515259, "learning_rate": 1.7899923875121882e-05, "loss": 0.7958, "step": 41920 }, { "epoch": 1.6140519730510106, "grad_norm": 1.5999846458435059, "learning_rate": 1.7882663018748193e-05, "loss": 0.8017, "step": 41925 }, { "epoch": 1.6142444658325312, "grad_norm": 1.9015750885009766, "learning_rate": 1.7865409671460996e-05, "loss": 0.8296, "step": 41930 }, { "epoch": 1.6144369586140521, "grad_norm": 1.0894391536712646, "learning_rate": 1.7848163834837995e-05, "loss": 0.8116, "step": 41935 }, { "epoch": 1.6146294513955728, "grad_norm": 1.131320834159851, "learning_rate": 1.783092551045623e-05, "loss": 0.7442, "step": 41940 }, { "epoch": 1.6148219441770935, "grad_norm": 2.2992284297943115, "learning_rate": 1.781369469989196e-05, "loss": 0.8746, "step": 41945 }, { "epoch": 1.6150144369586141, "grad_norm": 1.1256681680679321, "learning_rate": 1.7796471404720916e-05, "loss": 0.721, "step": 41950 }, { "epoch": 1.6152069297401348, "grad_norm": 1.115007758140564, "learning_rate": 1.7779255626518e-05, "loss": 0.7711, "step": 41955 }, { "epoch": 1.6153994225216555, "grad_norm": 1.9384006261825562, "learning_rate": 1.7762047366857483e-05, "loss": 1.0144, "step": 41960 }, { "epoch": 1.6155919153031761, "grad_norm": 1.0422606468200684, "learning_rate": 1.7744846627312962e-05, "loss": 0.8137, "step": 41965 }, { "epoch": 1.6157844080846968, "grad_norm": 0.9894946217536926, "learning_rate": 1.7727653409457358e-05, "loss": 0.7198, "step": 41970 }, { "epoch": 1.6159769008662175, "grad_norm": 1.2634601593017578, "learning_rate": 1.771046771486281e-05, "loss": 0.9481, "step": 41975 }, { "epoch": 1.6161693936477382, "grad_norm": 1.1557183265686035, "learning_rate": 1.7693289545100876e-05, "loss": 0.8676, "step": 41980 }, { "epoch": 1.6163618864292588, "grad_norm": 1.1935524940490723, "learning_rate": 1.767611890174238e-05, "loss": 0.8162, "step": 41985 }, { "epoch": 1.6165543792107795, "grad_norm": 1.1359078884124756, "learning_rate": 1.7658955786357455e-05, "loss": 0.8553, "step": 41990 }, { "epoch": 1.6167468719923002, "grad_norm": 1.311218500137329, "learning_rate": 1.76418002005156e-05, "loss": 0.745, "step": 41995 }, { "epoch": 1.6169393647738208, "grad_norm": 1.595218300819397, "learning_rate": 1.7624652145785523e-05, "loss": 0.6425, "step": 42000 }, { "epoch": 1.6171318575553417, "grad_norm": 1.5510958433151245, "learning_rate": 1.7607511623735317e-05, "loss": 0.7513, "step": 42005 }, { "epoch": 1.6173243503368624, "grad_norm": 1.0332719087600708, "learning_rate": 1.759037863593237e-05, "loss": 0.7869, "step": 42010 }, { "epoch": 1.617516843118383, "grad_norm": 1.556477427482605, "learning_rate": 1.7573253183943404e-05, "loss": 0.6109, "step": 42015 }, { "epoch": 1.6177093358999037, "grad_norm": 3.525594472885132, "learning_rate": 1.755613526933436e-05, "loss": 0.9172, "step": 42020 }, { "epoch": 1.6179018286814244, "grad_norm": 2.301964044570923, "learning_rate": 1.7539024893670664e-05, "loss": 1.1278, "step": 42025 }, { "epoch": 1.6180943214629453, "grad_norm": 0.9457361698150635, "learning_rate": 1.7521922058516827e-05, "loss": 0.6383, "step": 42030 }, { "epoch": 1.618286814244466, "grad_norm": 2.5374155044555664, "learning_rate": 1.7504826765436898e-05, "loss": 0.74, "step": 42035 }, { "epoch": 1.6184793070259866, "grad_norm": 1.9584330320358276, "learning_rate": 1.7487739015994064e-05, "loss": 0.693, "step": 42040 }, { "epoch": 1.6186717998075073, "grad_norm": 1.7019546031951904, "learning_rate": 1.7470658811750905e-05, "loss": 0.8639, "step": 42045 }, { "epoch": 1.618864292589028, "grad_norm": 0.9637229442596436, "learning_rate": 1.7453586154269287e-05, "loss": 0.7905, "step": 42050 }, { "epoch": 1.6190567853705486, "grad_norm": 1.0330854654312134, "learning_rate": 1.7436521045110422e-05, "loss": 0.6918, "step": 42055 }, { "epoch": 1.6192492781520693, "grad_norm": 1.4338631629943848, "learning_rate": 1.741946348583474e-05, "loss": 0.7246, "step": 42060 }, { "epoch": 1.61944177093359, "grad_norm": 1.057032823562622, "learning_rate": 1.740241347800209e-05, "loss": 0.8211, "step": 42065 }, { "epoch": 1.6196342637151107, "grad_norm": 1.3909273147583008, "learning_rate": 1.738537102317156e-05, "loss": 0.8421, "step": 42070 }, { "epoch": 1.6198267564966313, "grad_norm": 1.2499020099639893, "learning_rate": 1.7368336122901573e-05, "loss": 0.7806, "step": 42075 }, { "epoch": 1.620019249278152, "grad_norm": 0.8923901319503784, "learning_rate": 1.7351308778749897e-05, "loss": 0.6891, "step": 42080 }, { "epoch": 1.6202117420596727, "grad_norm": 1.582690954208374, "learning_rate": 1.7334288992273505e-05, "loss": 0.8041, "step": 42085 }, { "epoch": 1.6204042348411933, "grad_norm": 2.02708101272583, "learning_rate": 1.731727676502878e-05, "loss": 0.9111, "step": 42090 }, { "epoch": 1.620596727622714, "grad_norm": 0.7452544569969177, "learning_rate": 1.730027209857137e-05, "loss": 0.7931, "step": 42095 }, { "epoch": 1.6207892204042347, "grad_norm": 1.4981223344802856, "learning_rate": 1.7283274994456267e-05, "loss": 0.8637, "step": 42100 }, { "epoch": 1.6209817131857556, "grad_norm": 1.1747374534606934, "learning_rate": 1.7266285454237664e-05, "loss": 0.8657, "step": 42105 }, { "epoch": 1.6211742059672762, "grad_norm": 1.1444958448410034, "learning_rate": 1.7249303479469247e-05, "loss": 0.8245, "step": 42110 }, { "epoch": 1.621366698748797, "grad_norm": 1.2287684679031372, "learning_rate": 1.7232329071703833e-05, "loss": 0.8287, "step": 42115 }, { "epoch": 1.6215591915303176, "grad_norm": 2.1734228134155273, "learning_rate": 1.7215362232493638e-05, "loss": 0.8472, "step": 42120 }, { "epoch": 1.6217516843118385, "grad_norm": 1.0564271211624146, "learning_rate": 1.719840296339017e-05, "loss": 0.7898, "step": 42125 }, { "epoch": 1.6219441770933591, "grad_norm": 1.1211154460906982, "learning_rate": 1.718145126594426e-05, "loss": 0.8268, "step": 42130 }, { "epoch": 1.6221366698748798, "grad_norm": 1.5351982116699219, "learning_rate": 1.7164507141705967e-05, "loss": 0.8677, "step": 42135 }, { "epoch": 1.6223291626564005, "grad_norm": 1.2579121589660645, "learning_rate": 1.7147570592224803e-05, "loss": 0.9406, "step": 42140 }, { "epoch": 1.6225216554379212, "grad_norm": 1.0373250246047974, "learning_rate": 1.7130641619049436e-05, "loss": 0.7835, "step": 42145 }, { "epoch": 1.6227141482194418, "grad_norm": 1.5844688415527344, "learning_rate": 1.7113720223727937e-05, "loss": 0.8265, "step": 42150 }, { "epoch": 1.6229066410009625, "grad_norm": 1.3220007419586182, "learning_rate": 1.7096806407807653e-05, "loss": 0.8124, "step": 42155 }, { "epoch": 1.6230991337824832, "grad_norm": 1.0909702777862549, "learning_rate": 1.7079900172835263e-05, "loss": 0.7539, "step": 42160 }, { "epoch": 1.6232916265640038, "grad_norm": 1.1731175184249878, "learning_rate": 1.7063001520356658e-05, "loss": 0.8448, "step": 42165 }, { "epoch": 1.6234841193455245, "grad_norm": 1.3320856094360352, "learning_rate": 1.7046110451917207e-05, "loss": 0.81, "step": 42170 }, { "epoch": 1.6236766121270452, "grad_norm": 1.6085408926010132, "learning_rate": 1.7029226969061407e-05, "loss": 0.8147, "step": 42175 }, { "epoch": 1.6238691049085658, "grad_norm": 1.2450767755508423, "learning_rate": 1.7012351073333168e-05, "loss": 0.7533, "step": 42180 }, { "epoch": 1.6240615976900865, "grad_norm": 0.9313931465148926, "learning_rate": 1.6995482766275682e-05, "loss": 0.8205, "step": 42185 }, { "epoch": 1.6242540904716072, "grad_norm": 2.4100301265716553, "learning_rate": 1.697862204943148e-05, "loss": 0.8043, "step": 42190 }, { "epoch": 1.6244465832531279, "grad_norm": 1.371580958366394, "learning_rate": 1.6965138941945725e-05, "loss": 1.6104, "step": 42195 }, { "epoch": 1.6246390760346487, "grad_norm": 0.8591129779815674, "learning_rate": 1.6948291891370227e-05, "loss": 0.7858, "step": 42200 }, { "epoch": 1.6248315688161694, "grad_norm": 1.4864912033081055, "learning_rate": 1.6931452435323226e-05, "loss": 0.8206, "step": 42205 }, { "epoch": 1.62502406159769, "grad_norm": 1.010176420211792, "learning_rate": 1.6914620575344663e-05, "loss": 0.766, "step": 42210 }, { "epoch": 1.6252165543792108, "grad_norm": 1.7786433696746826, "learning_rate": 1.6897796312973634e-05, "loss": 0.8346, "step": 42215 }, { "epoch": 1.6254090471607314, "grad_norm": 0.9494479894638062, "learning_rate": 1.688097964974863e-05, "loss": 0.8556, "step": 42220 }, { "epoch": 1.6256015399422523, "grad_norm": 1.153810739517212, "learning_rate": 1.686417058720743e-05, "loss": 0.7756, "step": 42225 }, { "epoch": 1.625794032723773, "grad_norm": 1.232348918914795, "learning_rate": 1.6847369126887124e-05, "loss": 0.8343, "step": 42230 }, { "epoch": 1.6259865255052937, "grad_norm": 1.010253667831421, "learning_rate": 1.6830575270324022e-05, "loss": 0.9215, "step": 42235 }, { "epoch": 1.6261790182868143, "grad_norm": 2.2589335441589355, "learning_rate": 1.6813789019053926e-05, "loss": 0.7625, "step": 42240 }, { "epoch": 1.626371511068335, "grad_norm": 2.0202784538269043, "learning_rate": 1.679701037461173e-05, "loss": 0.8108, "step": 42245 }, { "epoch": 1.6265640038498557, "grad_norm": 1.7455151081085205, "learning_rate": 1.6780239338531777e-05, "loss": 0.946, "step": 42250 }, { "epoch": 1.6267564966313763, "grad_norm": 1.2754290103912354, "learning_rate": 1.676347591234765e-05, "loss": 0.9888, "step": 42255 }, { "epoch": 1.626948989412897, "grad_norm": 1.6412559747695923, "learning_rate": 1.6746720097592285e-05, "loss": 0.8801, "step": 42260 }, { "epoch": 1.6271414821944177, "grad_norm": 1.385617733001709, "learning_rate": 1.6729971895797835e-05, "loss": 0.8472, "step": 42265 }, { "epoch": 1.6273339749759383, "grad_norm": 1.6804052591323853, "learning_rate": 1.6713231308495846e-05, "loss": 0.8365, "step": 42270 }, { "epoch": 1.627526467757459, "grad_norm": 1.2974660396575928, "learning_rate": 1.6696498337217125e-05, "loss": 0.7316, "step": 42275 }, { "epoch": 1.6277189605389797, "grad_norm": 1.354999303817749, "learning_rate": 1.6679772983491804e-05, "loss": 0.9026, "step": 42280 }, { "epoch": 1.6279114533205004, "grad_norm": 1.667365312576294, "learning_rate": 1.666305524884931e-05, "loss": 0.8071, "step": 42285 }, { "epoch": 1.628103946102021, "grad_norm": 0.764616847038269, "learning_rate": 1.664634513481832e-05, "loss": 0.6431, "step": 42290 }, { "epoch": 1.628296438883542, "grad_norm": 1.2956533432006836, "learning_rate": 1.6629642642926947e-05, "loss": 0.8066, "step": 42295 }, { "epoch": 1.6284889316650626, "grad_norm": 1.1321804523468018, "learning_rate": 1.661294777470245e-05, "loss": 0.7923, "step": 42300 }, { "epoch": 1.6286814244465833, "grad_norm": 1.4168885946273804, "learning_rate": 1.659626053167149e-05, "loss": 0.6876, "step": 42305 }, { "epoch": 1.628873917228104, "grad_norm": 1.1318153142929077, "learning_rate": 1.6579580915360003e-05, "loss": 1.0224, "step": 42310 }, { "epoch": 1.6290664100096246, "grad_norm": 1.2682236433029175, "learning_rate": 1.656290892729325e-05, "loss": 0.6783, "step": 42315 }, { "epoch": 1.6292589027911455, "grad_norm": 1.0369542837142944, "learning_rate": 1.654624456899572e-05, "loss": 0.746, "step": 42320 }, { "epoch": 1.6294513955726662, "grad_norm": 1.284934639930725, "learning_rate": 1.6529587841991336e-05, "loss": 0.7735, "step": 42325 }, { "epoch": 1.6296438883541868, "grad_norm": 0.9802597761154175, "learning_rate": 1.6512938747803186e-05, "loss": 0.7332, "step": 42330 }, { "epoch": 1.6298363811357075, "grad_norm": 1.6991342306137085, "learning_rate": 1.649629728795372e-05, "loss": 0.8675, "step": 42335 }, { "epoch": 1.6300288739172282, "grad_norm": 1.371645212173462, "learning_rate": 1.6479663463964722e-05, "loss": 0.7313, "step": 42340 }, { "epoch": 1.6302213666987488, "grad_norm": 1.8463810682296753, "learning_rate": 1.646303727735724e-05, "loss": 0.9124, "step": 42345 }, { "epoch": 1.6304138594802695, "grad_norm": 2.088982582092285, "learning_rate": 1.6446418729651604e-05, "loss": 0.8852, "step": 42350 }, { "epoch": 1.6306063522617902, "grad_norm": 1.3795658349990845, "learning_rate": 1.6429807822367482e-05, "loss": 0.838, "step": 42355 }, { "epoch": 1.6307988450433109, "grad_norm": 1.5469810962677002, "learning_rate": 1.641320455702383e-05, "loss": 0.9927, "step": 42360 }, { "epoch": 1.6309913378248315, "grad_norm": 1.1996501684188843, "learning_rate": 1.6396608935138902e-05, "loss": 0.7755, "step": 42365 }, { "epoch": 1.6311838306063522, "grad_norm": 0.9938015937805176, "learning_rate": 1.63800209582303e-05, "loss": 0.7619, "step": 42370 }, { "epoch": 1.6313763233878729, "grad_norm": 1.932384729385376, "learning_rate": 1.636344062781482e-05, "loss": 0.8774, "step": 42375 }, { "epoch": 1.6315688161693935, "grad_norm": 0.921647310256958, "learning_rate": 1.6346867945408662e-05, "loss": 0.7319, "step": 42380 }, { "epoch": 1.6317613089509142, "grad_norm": 1.4289840459823608, "learning_rate": 1.6330302912527263e-05, "loss": 0.7474, "step": 42385 }, { "epoch": 1.6319538017324349, "grad_norm": 2.0691914558410645, "learning_rate": 1.6313745530685443e-05, "loss": 0.9581, "step": 42390 }, { "epoch": 1.6321462945139558, "grad_norm": 1.3220711946487427, "learning_rate": 1.6297195801397157e-05, "loss": 0.6706, "step": 42395 }, { "epoch": 1.6323387872954764, "grad_norm": 1.0470640659332275, "learning_rate": 1.6280653726175897e-05, "loss": 0.6795, "step": 42400 }, { "epoch": 1.632531280076997, "grad_norm": 1.722261667251587, "learning_rate": 1.626411930653423e-05, "loss": 0.8447, "step": 42405 }, { "epoch": 1.6327237728585178, "grad_norm": 2.425245761871338, "learning_rate": 1.624759254398417e-05, "loss": 0.9633, "step": 42410 }, { "epoch": 1.6329162656400384, "grad_norm": 1.6226695775985718, "learning_rate": 1.6231073440036947e-05, "loss": 0.8426, "step": 42415 }, { "epoch": 1.6331087584215593, "grad_norm": 2.568894147872925, "learning_rate": 1.621456199620317e-05, "loss": 0.729, "step": 42420 }, { "epoch": 1.63330125120308, "grad_norm": 1.7268556356430054, "learning_rate": 1.6198058213992617e-05, "loss": 0.7958, "step": 42425 }, { "epoch": 1.6334937439846007, "grad_norm": 1.1411241292953491, "learning_rate": 1.618156209491456e-05, "loss": 0.8699, "step": 42430 }, { "epoch": 1.6336862367661213, "grad_norm": 1.1613513231277466, "learning_rate": 1.6165073640477368e-05, "loss": 0.8642, "step": 42435 }, { "epoch": 1.633878729547642, "grad_norm": 1.3415052890777588, "learning_rate": 1.6148592852188838e-05, "loss": 0.9454, "step": 42440 }, { "epoch": 1.6340712223291627, "grad_norm": 0.5342742204666138, "learning_rate": 1.613211973155604e-05, "loss": 0.8002, "step": 42445 }, { "epoch": 1.6342637151106834, "grad_norm": 1.4904274940490723, "learning_rate": 1.6115654280085335e-05, "loss": 0.8969, "step": 42450 }, { "epoch": 1.634456207892204, "grad_norm": 0.9641575813293457, "learning_rate": 1.609919649928231e-05, "loss": 0.7795, "step": 42455 }, { "epoch": 1.6346487006737247, "grad_norm": 1.267978310585022, "learning_rate": 1.6082746390652026e-05, "loss": 0.7523, "step": 42460 }, { "epoch": 1.6348411934552454, "grad_norm": 1.0590537786483765, "learning_rate": 1.606630395569866e-05, "loss": 0.7176, "step": 42465 }, { "epoch": 1.635033686236766, "grad_norm": 0.7965215444564819, "learning_rate": 1.604986919592578e-05, "loss": 0.8919, "step": 42470 }, { "epoch": 1.6352261790182867, "grad_norm": 0.9418613314628601, "learning_rate": 1.603344211283625e-05, "loss": 0.7038, "step": 42475 }, { "epoch": 1.6354186717998074, "grad_norm": 1.7966370582580566, "learning_rate": 1.6017022707932237e-05, "loss": 0.7553, "step": 42480 }, { "epoch": 1.635611164581328, "grad_norm": 0.9546367526054382, "learning_rate": 1.6000610982715135e-05, "loss": 0.7796, "step": 42485 }, { "epoch": 1.635803657362849, "grad_norm": 1.3808022737503052, "learning_rate": 1.598420693868571e-05, "loss": 0.6758, "step": 42490 }, { "epoch": 1.6359961501443696, "grad_norm": 0.980531632900238, "learning_rate": 1.5967810577344034e-05, "loss": 0.8068, "step": 42495 }, { "epoch": 1.6361886429258903, "grad_norm": 1.2579573392868042, "learning_rate": 1.5951421900189366e-05, "loss": 0.9325, "step": 42500 }, { "epoch": 1.636381135707411, "grad_norm": 1.0082838535308838, "learning_rate": 1.5935040908720455e-05, "loss": 0.8162, "step": 42505 }, { "epoch": 1.6365736284889316, "grad_norm": 1.9807016849517822, "learning_rate": 1.5918667604435132e-05, "loss": 0.7665, "step": 42510 }, { "epoch": 1.6367661212704525, "grad_norm": 1.1579536199569702, "learning_rate": 1.5902301988830682e-05, "loss": 0.856, "step": 42515 }, { "epoch": 1.6369586140519732, "grad_norm": 1.3920001983642578, "learning_rate": 1.588594406340361e-05, "loss": 0.8391, "step": 42520 }, { "epoch": 1.6371511068334939, "grad_norm": 1.4542222023010254, "learning_rate": 1.5869593829649787e-05, "loss": 0.81, "step": 42525 }, { "epoch": 1.6373435996150145, "grad_norm": 1.2081284523010254, "learning_rate": 1.5853251289064242e-05, "loss": 0.8316, "step": 42530 }, { "epoch": 1.6375360923965352, "grad_norm": 3.1090595722198486, "learning_rate": 1.583691644314148e-05, "loss": 0.9793, "step": 42535 }, { "epoch": 1.6377285851780559, "grad_norm": 1.5080655813217163, "learning_rate": 1.5820589293375142e-05, "loss": 0.9199, "step": 42540 }, { "epoch": 1.6379210779595765, "grad_norm": 1.2855561971664429, "learning_rate": 1.5804269841258323e-05, "loss": 0.7934, "step": 42545 }, { "epoch": 1.6381135707410972, "grad_norm": 1.6619151830673218, "learning_rate": 1.578795808828326e-05, "loss": 0.8615, "step": 42550 }, { "epoch": 1.6383060635226179, "grad_norm": 1.0249853134155273, "learning_rate": 1.5771654035941574e-05, "loss": 0.8015, "step": 42555 }, { "epoch": 1.6384985563041385, "grad_norm": 1.0512340068817139, "learning_rate": 1.575535768572416e-05, "loss": 0.7564, "step": 42560 }, { "epoch": 1.6386910490856592, "grad_norm": 1.487186074256897, "learning_rate": 1.5739069039121245e-05, "loss": 0.8638, "step": 42565 }, { "epoch": 1.6388835418671799, "grad_norm": 1.0381027460098267, "learning_rate": 1.5722788097622267e-05, "loss": 0.917, "step": 42570 }, { "epoch": 1.6390760346487006, "grad_norm": 1.8956249952316284, "learning_rate": 1.5706514862716028e-05, "loss": 0.8835, "step": 42575 }, { "epoch": 1.6392685274302212, "grad_norm": 0.9786503314971924, "learning_rate": 1.5690249335890605e-05, "loss": 0.7697, "step": 42580 }, { "epoch": 1.6394610202117421, "grad_norm": 1.1818338632583618, "learning_rate": 1.567399151863339e-05, "loss": 0.8998, "step": 42585 }, { "epoch": 1.6396535129932628, "grad_norm": 1.3527082204818726, "learning_rate": 1.565774141243106e-05, "loss": 0.7461, "step": 42590 }, { "epoch": 1.6398460057747835, "grad_norm": 1.5790470838546753, "learning_rate": 1.5641499018769545e-05, "loss": 0.8563, "step": 42595 }, { "epoch": 1.6400384985563041, "grad_norm": 0.7987781763076782, "learning_rate": 1.5625264339134115e-05, "loss": 0.7464, "step": 42600 }, { "epoch": 1.6402309913378248, "grad_norm": 1.0493252277374268, "learning_rate": 1.5609037375009326e-05, "loss": 0.6797, "step": 42605 }, { "epoch": 1.6404234841193457, "grad_norm": 1.0951621532440186, "learning_rate": 1.559281812787906e-05, "loss": 0.7957, "step": 42610 }, { "epoch": 1.6406159769008664, "grad_norm": 1.529111623764038, "learning_rate": 1.5576606599226383e-05, "loss": 0.894, "step": 42615 }, { "epoch": 1.640808469682387, "grad_norm": 0.8548210263252258, "learning_rate": 1.5560402790533823e-05, "loss": 0.7031, "step": 42620 }, { "epoch": 1.6410009624639077, "grad_norm": 1.763548493385315, "learning_rate": 1.554420670328305e-05, "loss": 0.7808, "step": 42625 }, { "epoch": 1.6411934552454284, "grad_norm": 1.7161123752593994, "learning_rate": 1.5528018338955098e-05, "loss": 0.686, "step": 42630 }, { "epoch": 1.641385948026949, "grad_norm": 2.433823347091675, "learning_rate": 1.5511837699030295e-05, "loss": 0.788, "step": 42635 }, { "epoch": 1.6415784408084697, "grad_norm": 1.3295754194259644, "learning_rate": 1.549566478498827e-05, "loss": 0.765, "step": 42640 }, { "epoch": 1.6417709335899904, "grad_norm": 1.3078267574310303, "learning_rate": 1.547949959830787e-05, "loss": 0.7929, "step": 42645 }, { "epoch": 1.641963426371511, "grad_norm": 2.7279410362243652, "learning_rate": 1.5463342140467373e-05, "loss": 0.9705, "step": 42650 }, { "epoch": 1.6421559191530317, "grad_norm": 1.5727055072784424, "learning_rate": 1.5447192412944223e-05, "loss": 0.8038, "step": 42655 }, { "epoch": 1.6423484119345524, "grad_norm": 1.5005487203598022, "learning_rate": 1.5431050417215208e-05, "loss": 0.8304, "step": 42660 }, { "epoch": 1.642540904716073, "grad_norm": 1.3737221956253052, "learning_rate": 1.541491615475642e-05, "loss": 0.9049, "step": 42665 }, { "epoch": 1.6427333974975937, "grad_norm": 0.9405394196510315, "learning_rate": 1.5398789627043243e-05, "loss": 0.7638, "step": 42670 }, { "epoch": 1.6429258902791144, "grad_norm": 1.3767449855804443, "learning_rate": 1.5382670835550293e-05, "loss": 0.791, "step": 42675 }, { "epoch": 1.643118383060635, "grad_norm": 0.9052772521972656, "learning_rate": 1.5366559781751566e-05, "loss": 0.8165, "step": 42680 }, { "epoch": 1.643310875842156, "grad_norm": 1.9760810136795044, "learning_rate": 1.53504564671203e-05, "loss": 1.0684, "step": 42685 }, { "epoch": 1.6435033686236766, "grad_norm": 0.9905810952186584, "learning_rate": 1.533436089312904e-05, "loss": 0.7722, "step": 42690 }, { "epoch": 1.6436958614051973, "grad_norm": 1.814131259918213, "learning_rate": 1.531827306124963e-05, "loss": 0.7637, "step": 42695 }, { "epoch": 1.643888354186718, "grad_norm": 1.4231481552124023, "learning_rate": 1.530219297295318e-05, "loss": 0.8173, "step": 42700 }, { "epoch": 1.6440808469682386, "grad_norm": 1.3890655040740967, "learning_rate": 1.5286120629710098e-05, "loss": 0.7146, "step": 42705 }, { "epoch": 1.6442733397497595, "grad_norm": 1.4815847873687744, "learning_rate": 1.527005603299011e-05, "loss": 0.8625, "step": 42710 }, { "epoch": 1.6444658325312802, "grad_norm": 1.5338302850723267, "learning_rate": 1.5253999184262235e-05, "loss": 0.6705, "step": 42715 }, { "epoch": 1.6446583253128009, "grad_norm": 1.2656974792480469, "learning_rate": 1.523795008499469e-05, "loss": 0.9487, "step": 42720 }, { "epoch": 1.6448508180943215, "grad_norm": 1.0494598150253296, "learning_rate": 1.5221908736655167e-05, "loss": 0.7775, "step": 42725 }, { "epoch": 1.6450433108758422, "grad_norm": 1.1466418504714966, "learning_rate": 1.5205875140710458e-05, "loss": 0.9629, "step": 42730 }, { "epoch": 1.6452358036573629, "grad_norm": 1.3125535249710083, "learning_rate": 1.5189849298626769e-05, "loss": 0.9284, "step": 42735 }, { "epoch": 1.6454282964388836, "grad_norm": 1.2060669660568237, "learning_rate": 1.5173831211869539e-05, "loss": 0.8197, "step": 42740 }, { "epoch": 1.6456207892204042, "grad_norm": 1.393114447593689, "learning_rate": 1.5157820881903539e-05, "loss": 0.8467, "step": 42745 }, { "epoch": 1.645813282001925, "grad_norm": 2.256540060043335, "learning_rate": 1.5141818310192756e-05, "loss": 0.9494, "step": 42750 }, { "epoch": 1.6460057747834456, "grad_norm": 1.1511859893798828, "learning_rate": 1.5125823498200598e-05, "loss": 0.8653, "step": 42755 }, { "epoch": 1.6461982675649662, "grad_norm": 1.7524446249008179, "learning_rate": 1.5109836447389613e-05, "loss": 0.7703, "step": 42760 }, { "epoch": 1.646390760346487, "grad_norm": 1.9260464906692505, "learning_rate": 1.5093857159221747e-05, "loss": 0.8431, "step": 42765 }, { "epoch": 1.6465832531280076, "grad_norm": 1.6984754800796509, "learning_rate": 1.5077885635158185e-05, "loss": 0.7436, "step": 42770 }, { "epoch": 1.6467757459095282, "grad_norm": 1.1173107624053955, "learning_rate": 1.5061921876659446e-05, "loss": 0.7743, "step": 42775 }, { "epoch": 1.6469682386910491, "grad_norm": 1.5459610223770142, "learning_rate": 1.5045965885185253e-05, "loss": 0.7828, "step": 42780 }, { "epoch": 1.6471607314725698, "grad_norm": 1.2092872858047485, "learning_rate": 1.503001766219475e-05, "loss": 0.8356, "step": 42785 }, { "epoch": 1.6473532242540905, "grad_norm": 1.4177119731903076, "learning_rate": 1.5014077209146227e-05, "loss": 0.6802, "step": 42790 }, { "epoch": 1.6475457170356111, "grad_norm": 1.1195024251937866, "learning_rate": 1.499814452749737e-05, "loss": 0.8973, "step": 42795 }, { "epoch": 1.6477382098171318, "grad_norm": 1.1029400825500488, "learning_rate": 1.4982219618705119e-05, "loss": 0.7615, "step": 42800 }, { "epoch": 1.6479307025986527, "grad_norm": 1.2463111877441406, "learning_rate": 1.4966302484225681e-05, "loss": 0.8533, "step": 42805 }, { "epoch": 1.6481231953801734, "grad_norm": 1.6013331413269043, "learning_rate": 1.4950393125514605e-05, "loss": 0.7185, "step": 42810 }, { "epoch": 1.648315688161694, "grad_norm": 1.4675661325454712, "learning_rate": 1.4934491544026663e-05, "loss": 0.6616, "step": 42815 }, { "epoch": 1.6485081809432147, "grad_norm": 1.2098175287246704, "learning_rate": 1.4918597741215957e-05, "loss": 0.7673, "step": 42820 }, { "epoch": 1.6487006737247354, "grad_norm": 0.9863401651382446, "learning_rate": 1.4902711718535866e-05, "loss": 0.7465, "step": 42825 }, { "epoch": 1.648893166506256, "grad_norm": 1.357666254043579, "learning_rate": 1.4886833477439099e-05, "loss": 0.8188, "step": 42830 }, { "epoch": 1.6490856592877767, "grad_norm": 2.8012890815734863, "learning_rate": 1.4870963019377548e-05, "loss": 0.8675, "step": 42835 }, { "epoch": 1.6492781520692974, "grad_norm": 2.035005807876587, "learning_rate": 1.4855100345802542e-05, "loss": 0.9065, "step": 42840 }, { "epoch": 1.649470644850818, "grad_norm": 1.256974458694458, "learning_rate": 1.4839245458164553e-05, "loss": 0.8464, "step": 42845 }, { "epoch": 1.6496631376323387, "grad_norm": 1.1360011100769043, "learning_rate": 1.4823398357913432e-05, "loss": 0.8485, "step": 42850 }, { "epoch": 1.6498556304138594, "grad_norm": 1.2088253498077393, "learning_rate": 1.4807559046498287e-05, "loss": 0.8786, "step": 42855 }, { "epoch": 1.65004812319538, "grad_norm": 1.3639460802078247, "learning_rate": 1.4791727525367539e-05, "loss": 0.9163, "step": 42860 }, { "epoch": 1.6502406159769007, "grad_norm": 0.8863676190376282, "learning_rate": 1.4775903795968804e-05, "loss": 0.7723, "step": 42865 }, { "epoch": 1.6504331087584214, "grad_norm": 1.2062976360321045, "learning_rate": 1.476008785974916e-05, "loss": 0.7728, "step": 42870 }, { "epoch": 1.650625601539942, "grad_norm": 2.016284227371216, "learning_rate": 1.4744279718154797e-05, "loss": 0.8003, "step": 42875 }, { "epoch": 1.650818094321463, "grad_norm": 2.4403231143951416, "learning_rate": 1.4728479372631287e-05, "loss": 0.8282, "step": 42880 }, { "epoch": 1.6510105871029837, "grad_norm": 1.3682914972305298, "learning_rate": 1.4712686824623466e-05, "loss": 0.8877, "step": 42885 }, { "epoch": 1.6512030798845043, "grad_norm": 1.542219877243042, "learning_rate": 1.469690207557548e-05, "loss": 0.8553, "step": 42890 }, { "epoch": 1.651395572666025, "grad_norm": 1.1081209182739258, "learning_rate": 1.4681125126930695e-05, "loss": 0.8886, "step": 42895 }, { "epoch": 1.6515880654475459, "grad_norm": 2.007857084274292, "learning_rate": 1.4665355980131834e-05, "loss": 0.8171, "step": 42900 }, { "epoch": 1.6517805582290666, "grad_norm": 1.1662068367004395, "learning_rate": 1.4649594636620878e-05, "loss": 0.8906, "step": 42905 }, { "epoch": 1.6519730510105872, "grad_norm": 1.0259616374969482, "learning_rate": 1.4633841097839096e-05, "loss": 0.6473, "step": 42910 }, { "epoch": 1.652165543792108, "grad_norm": 0.8987205624580383, "learning_rate": 1.4618095365227069e-05, "loss": 0.7654, "step": 42915 }, { "epoch": 1.6523580365736286, "grad_norm": 1.9955406188964844, "learning_rate": 1.460235744022459e-05, "loss": 0.882, "step": 42920 }, { "epoch": 1.6525505293551492, "grad_norm": 1.3027300834655762, "learning_rate": 1.458662732427083e-05, "loss": 0.9002, "step": 42925 }, { "epoch": 1.65274302213667, "grad_norm": 0.7334115505218506, "learning_rate": 1.457090501880417e-05, "loss": 0.8804, "step": 42930 }, { "epoch": 1.6529355149181906, "grad_norm": 1.0303765535354614, "learning_rate": 1.4555190525262363e-05, "loss": 0.7637, "step": 42935 }, { "epoch": 1.6531280076997112, "grad_norm": 0.9630566239356995, "learning_rate": 1.4539483845082324e-05, "loss": 0.8624, "step": 42940 }, { "epoch": 1.653320500481232, "grad_norm": 1.3009661436080933, "learning_rate": 1.4523784979700395e-05, "loss": 0.799, "step": 42945 }, { "epoch": 1.6535129932627526, "grad_norm": 1.6627331972122192, "learning_rate": 1.4508093930552092e-05, "loss": 0.9133, "step": 42950 }, { "epoch": 1.6537054860442733, "grad_norm": 1.2457365989685059, "learning_rate": 1.4492410699072256e-05, "loss": 0.8653, "step": 42955 }, { "epoch": 1.653897978825794, "grad_norm": 2.1877925395965576, "learning_rate": 1.447673528669503e-05, "loss": 0.9323, "step": 42960 }, { "epoch": 1.6540904716073146, "grad_norm": 1.2046828269958496, "learning_rate": 1.4461067694853847e-05, "loss": 0.7993, "step": 42965 }, { "epoch": 1.6542829643888353, "grad_norm": 1.9960542917251587, "learning_rate": 1.4445407924981325e-05, "loss": 0.7632, "step": 42970 }, { "epoch": 1.6544754571703562, "grad_norm": 1.4354302883148193, "learning_rate": 1.4429755978509551e-05, "loss": 0.7538, "step": 42975 }, { "epoch": 1.6546679499518768, "grad_norm": 1.3290390968322754, "learning_rate": 1.4414111856869727e-05, "loss": 0.7158, "step": 42980 }, { "epoch": 1.6548604427333975, "grad_norm": 0.9498152136802673, "learning_rate": 1.4398475561492409e-05, "loss": 0.7065, "step": 42985 }, { "epoch": 1.6550529355149182, "grad_norm": 1.9382153749465942, "learning_rate": 1.438284709380745e-05, "loss": 0.6704, "step": 42990 }, { "epoch": 1.6552454282964388, "grad_norm": 1.109632968902588, "learning_rate": 1.4367226455243988e-05, "loss": 0.8012, "step": 42995 }, { "epoch": 1.6554379210779597, "grad_norm": 1.8244105577468872, "learning_rate": 1.4351613647230344e-05, "loss": 0.7368, "step": 43000 }, { "epoch": 1.6556304138594804, "grad_norm": 1.2546757459640503, "learning_rate": 1.4336008671194311e-05, "loss": 0.7095, "step": 43005 }, { "epoch": 1.655822906641001, "grad_norm": 1.7501899003982544, "learning_rate": 1.4320411528562806e-05, "loss": 0.6884, "step": 43010 }, { "epoch": 1.6560153994225217, "grad_norm": 1.3198050260543823, "learning_rate": 1.430482222076207e-05, "loss": 0.679, "step": 43015 }, { "epoch": 1.6562078922040424, "grad_norm": 1.1397435665130615, "learning_rate": 1.428924074921768e-05, "loss": 0.7621, "step": 43020 }, { "epoch": 1.656400384985563, "grad_norm": 1.1400476694107056, "learning_rate": 1.427366711535445e-05, "loss": 0.7215, "step": 43025 }, { "epoch": 1.6565928777670837, "grad_norm": 1.151114821434021, "learning_rate": 1.4258101320596462e-05, "loss": 0.856, "step": 43030 }, { "epoch": 1.6567853705486044, "grad_norm": 1.7196004390716553, "learning_rate": 1.4242543366367122e-05, "loss": 0.8217, "step": 43035 }, { "epoch": 1.656977863330125, "grad_norm": 1.8382436037063599, "learning_rate": 1.4226993254089127e-05, "loss": 0.8671, "step": 43040 }, { "epoch": 1.6571703561116458, "grad_norm": 0.8144475221633911, "learning_rate": 1.4211450985184349e-05, "loss": 0.9136, "step": 43045 }, { "epoch": 1.6573628488931664, "grad_norm": 1.0420215129852295, "learning_rate": 1.4195916561074129e-05, "loss": 0.8111, "step": 43050 }, { "epoch": 1.657555341674687, "grad_norm": 1.1034629344940186, "learning_rate": 1.41803899831789e-05, "loss": 0.7063, "step": 43055 }, { "epoch": 1.6577478344562078, "grad_norm": 1.1261588335037231, "learning_rate": 1.4164871252918544e-05, "loss": 0.9457, "step": 43060 }, { "epoch": 1.6579403272377284, "grad_norm": 1.1537283658981323, "learning_rate": 1.4149360371712084e-05, "loss": 1.0019, "step": 43065 }, { "epoch": 1.6581328200192493, "grad_norm": 1.0885237455368042, "learning_rate": 1.4133857340977908e-05, "loss": 0.8041, "step": 43070 }, { "epoch": 1.65832531280077, "grad_norm": 1.2921056747436523, "learning_rate": 1.411836216213367e-05, "loss": 0.8121, "step": 43075 }, { "epoch": 1.6585178055822907, "grad_norm": 1.4436019659042358, "learning_rate": 1.4102874836596325e-05, "loss": 0.7213, "step": 43080 }, { "epoch": 1.6587102983638113, "grad_norm": 1.3824903964996338, "learning_rate": 1.4087395365782008e-05, "loss": 0.8929, "step": 43085 }, { "epoch": 1.658902791145332, "grad_norm": 1.0888464450836182, "learning_rate": 1.407192375110632e-05, "loss": 0.828, "step": 43090 }, { "epoch": 1.659095283926853, "grad_norm": 1.3013052940368652, "learning_rate": 1.405645999398395e-05, "loss": 0.7308, "step": 43095 }, { "epoch": 1.6592877767083736, "grad_norm": 1.4749294519424438, "learning_rate": 1.4041004095828992e-05, "loss": 0.8799, "step": 43100 }, { "epoch": 1.6594802694898942, "grad_norm": 1.4504752159118652, "learning_rate": 1.4025556058054789e-05, "loss": 0.7252, "step": 43105 }, { "epoch": 1.659672762271415, "grad_norm": 1.064914584159851, "learning_rate": 1.4010115882073971e-05, "loss": 0.8309, "step": 43110 }, { "epoch": 1.6598652550529356, "grad_norm": 1.1333822011947632, "learning_rate": 1.3994683569298406e-05, "loss": 0.8019, "step": 43115 }, { "epoch": 1.6600577478344563, "grad_norm": 1.1221379041671753, "learning_rate": 1.39792591211393e-05, "loss": 0.9147, "step": 43120 }, { "epoch": 1.660250240615977, "grad_norm": 1.3781883716583252, "learning_rate": 1.3963842539007098e-05, "loss": 0.7619, "step": 43125 }, { "epoch": 1.6604427333974976, "grad_norm": 0.8834213614463806, "learning_rate": 1.394843382431158e-05, "loss": 0.9033, "step": 43130 }, { "epoch": 1.6606352261790183, "grad_norm": 1.049788236618042, "learning_rate": 1.3933032978461757e-05, "loss": 0.6857, "step": 43135 }, { "epoch": 1.660827718960539, "grad_norm": 1.520080804824829, "learning_rate": 1.3917640002865905e-05, "loss": 0.8151, "step": 43140 }, { "epoch": 1.6610202117420596, "grad_norm": 1.1372543573379517, "learning_rate": 1.3902254898931633e-05, "loss": 0.7423, "step": 43145 }, { "epoch": 1.6612127045235803, "grad_norm": 0.8767010569572449, "learning_rate": 1.3886877668065802e-05, "loss": 0.874, "step": 43150 }, { "epoch": 1.661405197305101, "grad_norm": 1.2810707092285156, "learning_rate": 1.3871508311674587e-05, "loss": 0.7602, "step": 43155 }, { "epoch": 1.6615976900866216, "grad_norm": 1.0464438199996948, "learning_rate": 1.3856146831163341e-05, "loss": 0.6807, "step": 43160 }, { "epoch": 1.6617901828681423, "grad_norm": 2.142082452774048, "learning_rate": 1.3840793227936867e-05, "loss": 0.7548, "step": 43165 }, { "epoch": 1.6619826756496632, "grad_norm": 0.9313360452651978, "learning_rate": 1.382544750339907e-05, "loss": 0.804, "step": 43170 }, { "epoch": 1.6621751684311838, "grad_norm": 0.9089387059211731, "learning_rate": 1.3810109658953252e-05, "loss": 0.6871, "step": 43175 }, { "epoch": 1.6623676612127045, "grad_norm": 1.2716861963272095, "learning_rate": 1.3794779696001948e-05, "loss": 0.914, "step": 43180 }, { "epoch": 1.6625601539942252, "grad_norm": 1.5578489303588867, "learning_rate": 1.3779457615947e-05, "loss": 0.7148, "step": 43185 }, { "epoch": 1.6627526467757459, "grad_norm": 0.8890004754066467, "learning_rate": 1.3764143420189457e-05, "loss": 0.7453, "step": 43190 }, { "epoch": 1.6629451395572667, "grad_norm": 1.7845152616500854, "learning_rate": 1.3748837110129774e-05, "loss": 0.8449, "step": 43195 }, { "epoch": 1.6631376323387874, "grad_norm": 1.1087902784347534, "learning_rate": 1.3733538687167558e-05, "loss": 0.8144, "step": 43200 }, { "epoch": 1.663330125120308, "grad_norm": 1.7164037227630615, "learning_rate": 1.3718248152701773e-05, "loss": 0.8841, "step": 43205 }, { "epoch": 1.6635226179018288, "grad_norm": 1.1538746356964111, "learning_rate": 1.3702965508130616e-05, "loss": 0.9051, "step": 43210 }, { "epoch": 1.6637151106833494, "grad_norm": 1.328781247138977, "learning_rate": 1.3687690754851634e-05, "loss": 0.7957, "step": 43215 }, { "epoch": 1.66390760346487, "grad_norm": 1.2953364849090576, "learning_rate": 1.367242389426151e-05, "loss": 0.8322, "step": 43220 }, { "epoch": 1.6641000962463908, "grad_norm": 1.3127518892288208, "learning_rate": 1.3657164927756405e-05, "loss": 0.7086, "step": 43225 }, { "epoch": 1.6642925890279114, "grad_norm": 1.5213780403137207, "learning_rate": 1.3641913856731569e-05, "loss": 0.8327, "step": 43230 }, { "epoch": 1.664485081809432, "grad_norm": 1.8218622207641602, "learning_rate": 1.3626670682581655e-05, "loss": 0.9758, "step": 43235 }, { "epoch": 1.6646775745909528, "grad_norm": 1.4787392616271973, "learning_rate": 1.3611435406700546e-05, "loss": 0.6978, "step": 43240 }, { "epoch": 1.6648700673724735, "grad_norm": 1.1849435567855835, "learning_rate": 1.3596208030481372e-05, "loss": 0.9329, "step": 43245 }, { "epoch": 1.6650625601539941, "grad_norm": 0.34040671586990356, "learning_rate": 1.358098855531661e-05, "loss": 0.67, "step": 43250 }, { "epoch": 1.6652550529355148, "grad_norm": 1.1814277172088623, "learning_rate": 1.3565776982597966e-05, "loss": 0.8672, "step": 43255 }, { "epoch": 1.6654475457170355, "grad_norm": 1.2392293214797974, "learning_rate": 1.3550573313716463e-05, "loss": 0.9071, "step": 43260 }, { "epoch": 1.6656400384985564, "grad_norm": 1.1878242492675781, "learning_rate": 1.3535377550062323e-05, "loss": 0.7641, "step": 43265 }, { "epoch": 1.665832531280077, "grad_norm": 0.885995090007782, "learning_rate": 1.3520189693025164e-05, "loss": 0.7289, "step": 43270 }, { "epoch": 1.6660250240615977, "grad_norm": 1.223071575164795, "learning_rate": 1.3505009743993757e-05, "loss": 0.6685, "step": 43275 }, { "epoch": 1.6662175168431184, "grad_norm": 1.0630251169204712, "learning_rate": 1.3489837704356235e-05, "loss": 0.8338, "step": 43280 }, { "epoch": 1.666410009624639, "grad_norm": 1.187340497970581, "learning_rate": 1.3474673575499986e-05, "loss": 0.6967, "step": 43285 }, { "epoch": 1.66660250240616, "grad_norm": 2.237581729888916, "learning_rate": 1.3459517358811668e-05, "loss": 0.8451, "step": 43290 }, { "epoch": 1.6667949951876806, "grad_norm": 1.1633507013320923, "learning_rate": 1.3444369055677175e-05, "loss": 0.7811, "step": 43295 }, { "epoch": 1.6669874879692013, "grad_norm": 1.9150440692901611, "learning_rate": 1.3429228667481797e-05, "loss": 0.8467, "step": 43300 }, { "epoch": 1.667179980750722, "grad_norm": 0.9145646691322327, "learning_rate": 1.341409619560996e-05, "loss": 0.8669, "step": 43305 }, { "epoch": 1.6673724735322426, "grad_norm": 1.4156630039215088, "learning_rate": 1.3398971641445434e-05, "loss": 0.7688, "step": 43310 }, { "epoch": 1.6675649663137633, "grad_norm": 1.122641682624817, "learning_rate": 1.3383855006371281e-05, "loss": 0.9059, "step": 43315 }, { "epoch": 1.667757459095284, "grad_norm": 1.1816550493240356, "learning_rate": 1.3368746291769806e-05, "loss": 0.6822, "step": 43320 }, { "epoch": 1.6679499518768046, "grad_norm": 1.2884527444839478, "learning_rate": 1.3353645499022605e-05, "loss": 0.989, "step": 43325 }, { "epoch": 1.6681424446583253, "grad_norm": 2.128404140472412, "learning_rate": 1.333855262951056e-05, "loss": 0.835, "step": 43330 }, { "epoch": 1.668334937439846, "grad_norm": 1.3156408071517944, "learning_rate": 1.3323467684613789e-05, "loss": 0.7899, "step": 43335 }, { "epoch": 1.6685274302213666, "grad_norm": 2.0468761920928955, "learning_rate": 1.3308390665711701e-05, "loss": 0.7012, "step": 43340 }, { "epoch": 1.6687199230028873, "grad_norm": 1.0322657823562622, "learning_rate": 1.3293321574183016e-05, "loss": 0.6967, "step": 43345 }, { "epoch": 1.668912415784408, "grad_norm": 0.9102632999420166, "learning_rate": 1.3278260411405697e-05, "loss": 0.7604, "step": 43350 }, { "epoch": 1.6691049085659286, "grad_norm": 1.8231315612792969, "learning_rate": 1.3263207178756997e-05, "loss": 0.6853, "step": 43355 }, { "epoch": 1.6692974013474493, "grad_norm": 1.3293402194976807, "learning_rate": 1.3248161877613408e-05, "loss": 0.9419, "step": 43360 }, { "epoch": 1.6694898941289702, "grad_norm": 1.4297016859054565, "learning_rate": 1.3233124509350736e-05, "loss": 0.9883, "step": 43365 }, { "epoch": 1.6696823869104909, "grad_norm": 1.4253541231155396, "learning_rate": 1.3218095075344051e-05, "loss": 0.8432, "step": 43370 }, { "epoch": 1.6698748796920115, "grad_norm": 1.1191109418869019, "learning_rate": 1.3203073576967717e-05, "loss": 0.87, "step": 43375 }, { "epoch": 1.6700673724735322, "grad_norm": 1.2235162258148193, "learning_rate": 1.3188060015595271e-05, "loss": 0.943, "step": 43380 }, { "epoch": 1.670259865255053, "grad_norm": 0.8941404223442078, "learning_rate": 1.3173054392599715e-05, "loss": 0.7541, "step": 43385 }, { "epoch": 1.6704523580365738, "grad_norm": 1.3058192729949951, "learning_rate": 1.3158056709353139e-05, "loss": 0.7509, "step": 43390 }, { "epoch": 1.6706448508180944, "grad_norm": 1.0127733945846558, "learning_rate": 1.3143066967226992e-05, "loss": 0.6995, "step": 43395 }, { "epoch": 1.670837343599615, "grad_norm": 1.4045414924621582, "learning_rate": 1.3128085167592007e-05, "loss": 0.751, "step": 43400 }, { "epoch": 1.6710298363811358, "grad_norm": 1.8028631210327148, "learning_rate": 1.3113111311818171e-05, "loss": 0.8459, "step": 43405 }, { "epoch": 1.6712223291626565, "grad_norm": 2.1013052463531494, "learning_rate": 1.3098145401274697e-05, "loss": 0.8916, "step": 43410 }, { "epoch": 1.6714148219441771, "grad_norm": 1.4260973930358887, "learning_rate": 1.3083187437330192e-05, "loss": 0.6563, "step": 43415 }, { "epoch": 1.6716073147256978, "grad_norm": 1.321631908416748, "learning_rate": 1.3068237421352414e-05, "loss": 0.7359, "step": 43420 }, { "epoch": 1.6717998075072185, "grad_norm": 1.628443717956543, "learning_rate": 1.3053295354708439e-05, "loss": 0.7925, "step": 43425 }, { "epoch": 1.6719923002887391, "grad_norm": 1.8474700450897217, "learning_rate": 1.3038361238764641e-05, "loss": 0.8156, "step": 43430 }, { "epoch": 1.6721847930702598, "grad_norm": 1.3773387670516968, "learning_rate": 1.3023435074886658e-05, "loss": 0.7901, "step": 43435 }, { "epoch": 1.6723772858517805, "grad_norm": 0.8466984033584595, "learning_rate": 1.3008516864439357e-05, "loss": 0.8321, "step": 43440 }, { "epoch": 1.6725697786333011, "grad_norm": 1.169967770576477, "learning_rate": 1.2993606608786913e-05, "loss": 0.7029, "step": 43445 }, { "epoch": 1.6727622714148218, "grad_norm": 1.8645579814910889, "learning_rate": 1.2978704309292789e-05, "loss": 0.8151, "step": 43450 }, { "epoch": 1.6729547641963425, "grad_norm": 0.84092777967453, "learning_rate": 1.2963809967319685e-05, "loss": 0.8796, "step": 43455 }, { "epoch": 1.6731472569778634, "grad_norm": 1.5797427892684937, "learning_rate": 1.2948923584229622e-05, "loss": 0.8069, "step": 43460 }, { "epoch": 1.673339749759384, "grad_norm": 1.3241761922836304, "learning_rate": 1.2934045161383824e-05, "loss": 0.9362, "step": 43465 }, { "epoch": 1.6735322425409047, "grad_norm": 1.459402322769165, "learning_rate": 1.2919174700142822e-05, "loss": 0.8827, "step": 43470 }, { "epoch": 1.6737247353224254, "grad_norm": 1.3563963174819946, "learning_rate": 1.2904312201866443e-05, "loss": 0.7035, "step": 43475 }, { "epoch": 1.673917228103946, "grad_norm": 1.3299168348312378, "learning_rate": 1.2889457667913785e-05, "loss": 0.8429, "step": 43480 }, { "epoch": 1.674109720885467, "grad_norm": 1.492050051689148, "learning_rate": 1.2874611099643108e-05, "loss": 0.9821, "step": 43485 }, { "epoch": 1.6743022136669876, "grad_norm": 0.9983692169189453, "learning_rate": 1.2859772498412149e-05, "loss": 0.6268, "step": 43490 }, { "epoch": 1.6744947064485083, "grad_norm": 1.0508897304534912, "learning_rate": 1.2844941865577719e-05, "loss": 0.8499, "step": 43495 }, { "epoch": 1.674687199230029, "grad_norm": 1.9004470109939575, "learning_rate": 1.2830119202496016e-05, "loss": 0.8397, "step": 43500 }, { "epoch": 1.6748796920115496, "grad_norm": 1.5322777032852173, "learning_rate": 1.2815304510522453e-05, "loss": 0.9092, "step": 43505 }, { "epoch": 1.6750721847930703, "grad_norm": 1.5270432233810425, "learning_rate": 1.2800497791011768e-05, "loss": 0.739, "step": 43510 }, { "epoch": 1.675264677574591, "grad_norm": 1.1705281734466553, "learning_rate": 1.2785699045317878e-05, "loss": 0.7841, "step": 43515 }, { "epoch": 1.6754571703561116, "grad_norm": 1.2318379878997803, "learning_rate": 1.2770908274794102e-05, "loss": 0.7705, "step": 43520 }, { "epoch": 1.6756496631376323, "grad_norm": 1.481648325920105, "learning_rate": 1.2756125480792912e-05, "loss": 0.92, "step": 43525 }, { "epoch": 1.675842155919153, "grad_norm": 1.4268490076065063, "learning_rate": 1.2741350664666108e-05, "loss": 0.8993, "step": 43530 }, { "epoch": 1.6760346487006736, "grad_norm": 1.973768711090088, "learning_rate": 1.2726583827764748e-05, "loss": 0.7195, "step": 43535 }, { "epoch": 1.6762271414821943, "grad_norm": 1.2458196878433228, "learning_rate": 1.271182497143919e-05, "loss": 0.7451, "step": 43540 }, { "epoch": 1.676419634263715, "grad_norm": 1.4529706239700317, "learning_rate": 1.2697074097038964e-05, "loss": 0.726, "step": 43545 }, { "epoch": 1.6766121270452357, "grad_norm": 2.0508511066436768, "learning_rate": 1.2682331205913012e-05, "loss": 0.8247, "step": 43550 }, { "epoch": 1.6768046198267565, "grad_norm": 1.2535037994384766, "learning_rate": 1.2667596299409434e-05, "loss": 0.8559, "step": 43555 }, { "epoch": 1.6769971126082772, "grad_norm": 1.0279282331466675, "learning_rate": 1.2652869378875654e-05, "loss": 0.7823, "step": 43560 }, { "epoch": 1.6771896053897979, "grad_norm": 0.9795730113983154, "learning_rate": 1.2638150445658337e-05, "loss": 0.6945, "step": 43565 }, { "epoch": 1.6773820981713186, "grad_norm": 1.2508692741394043, "learning_rate": 1.2623439501103452e-05, "loss": 0.6393, "step": 43570 }, { "epoch": 1.6775745909528392, "grad_norm": 1.5805447101593018, "learning_rate": 1.260873654655622e-05, "loss": 0.7677, "step": 43575 }, { "epoch": 1.6777670837343601, "grad_norm": 1.477809190750122, "learning_rate": 1.2594041583361105e-05, "loss": 0.8169, "step": 43580 }, { "epoch": 1.6779595765158808, "grad_norm": 1.8884755373001099, "learning_rate": 1.257935461286187e-05, "loss": 0.7033, "step": 43585 }, { "epoch": 1.6781520692974015, "grad_norm": 1.1278042793273926, "learning_rate": 1.2564675636401557e-05, "loss": 0.6912, "step": 43590 }, { "epoch": 1.6783445620789221, "grad_norm": 1.1749690771102905, "learning_rate": 1.2550004655322457e-05, "loss": 0.8726, "step": 43595 }, { "epoch": 1.6785370548604428, "grad_norm": 2.014747142791748, "learning_rate": 1.2535341670966094e-05, "loss": 0.835, "step": 43600 }, { "epoch": 1.6787295476419635, "grad_norm": 1.0013171434402466, "learning_rate": 1.2520686684673377e-05, "loss": 0.7554, "step": 43605 }, { "epoch": 1.6789220404234841, "grad_norm": 2.000584602355957, "learning_rate": 1.2506039697784345e-05, "loss": 0.9073, "step": 43610 }, { "epoch": 1.6791145332050048, "grad_norm": 1.03412926197052, "learning_rate": 1.2491400711638378e-05, "loss": 0.9339, "step": 43615 }, { "epoch": 1.6793070259865255, "grad_norm": 1.905938744544983, "learning_rate": 1.2476769727574133e-05, "loss": 0.7793, "step": 43620 }, { "epoch": 1.6794995187680462, "grad_norm": 1.2626748085021973, "learning_rate": 1.2462146746929538e-05, "loss": 0.6789, "step": 43625 }, { "epoch": 1.6796920115495668, "grad_norm": 1.5359333753585815, "learning_rate": 1.2447531771041677e-05, "loss": 0.7657, "step": 43630 }, { "epoch": 1.6798845043310875, "grad_norm": 0.68184894323349, "learning_rate": 1.2432924801247115e-05, "loss": 1.0114, "step": 43635 }, { "epoch": 1.6800769971126082, "grad_norm": 2.0016860961914062, "learning_rate": 1.2418325838881462e-05, "loss": 0.7352, "step": 43640 }, { "epoch": 1.6802694898941288, "grad_norm": 1.29106605052948, "learning_rate": 1.240373488527975e-05, "loss": 0.743, "step": 43645 }, { "epoch": 1.6804619826756495, "grad_norm": 1.8269158601760864, "learning_rate": 1.23891519417762e-05, "loss": 0.8502, "step": 43650 }, { "epoch": 1.6806544754571704, "grad_norm": 0.9514836072921753, "learning_rate": 1.2374577009704357e-05, "loss": 0.8288, "step": 43655 }, { "epoch": 1.680846968238691, "grad_norm": 1.3545150756835938, "learning_rate": 1.2360010090396968e-05, "loss": 0.9582, "step": 43660 }, { "epoch": 1.6810394610202117, "grad_norm": 1.4287010431289673, "learning_rate": 1.2345451185186097e-05, "loss": 0.8761, "step": 43665 }, { "epoch": 1.6812319538017324, "grad_norm": 0.9901794195175171, "learning_rate": 1.2330900295403048e-05, "loss": 0.878, "step": 43670 }, { "epoch": 1.681424446583253, "grad_norm": 1.1503381729125977, "learning_rate": 1.231635742237841e-05, "loss": 0.747, "step": 43675 }, { "epoch": 1.681616939364774, "grad_norm": 0.8054229617118835, "learning_rate": 1.2301822567442067e-05, "loss": 0.724, "step": 43680 }, { "epoch": 1.6818094321462946, "grad_norm": 2.6240620613098145, "learning_rate": 1.2287295731923077e-05, "loss": 0.7472, "step": 43685 }, { "epoch": 1.6820019249278153, "grad_norm": 0.9073877930641174, "learning_rate": 1.2272776917149841e-05, "loss": 0.7837, "step": 43690 }, { "epoch": 1.682194417709336, "grad_norm": 0.9025770425796509, "learning_rate": 1.2258266124450024e-05, "loss": 0.8009, "step": 43695 }, { "epoch": 1.6823869104908566, "grad_norm": 0.9586546421051025, "learning_rate": 1.224376335515055e-05, "loss": 0.8342, "step": 43700 }, { "epoch": 1.6825794032723773, "grad_norm": 1.0272797346115112, "learning_rate": 1.222926861057755e-05, "loss": 0.8561, "step": 43705 }, { "epoch": 1.682771896053898, "grad_norm": 1.4825763702392578, "learning_rate": 1.2214781892056548e-05, "loss": 0.938, "step": 43710 }, { "epoch": 1.6829643888354187, "grad_norm": 1.5734376907348633, "learning_rate": 1.2200303200912199e-05, "loss": 0.7255, "step": 43715 }, { "epoch": 1.6831568816169393, "grad_norm": 1.2651904821395874, "learning_rate": 1.21858325384685e-05, "loss": 0.762, "step": 43720 }, { "epoch": 1.68334937439846, "grad_norm": 1.516546368598938, "learning_rate": 1.2171369906048703e-05, "loss": 0.7557, "step": 43725 }, { "epoch": 1.6835418671799807, "grad_norm": 1.8338733911514282, "learning_rate": 1.2156915304975325e-05, "loss": 0.7487, "step": 43730 }, { "epoch": 1.6837343599615013, "grad_norm": 2.001396417617798, "learning_rate": 1.21424687365701e-05, "loss": 0.8106, "step": 43735 }, { "epoch": 1.683926852743022, "grad_norm": 1.8002668619155884, "learning_rate": 1.212803020215415e-05, "loss": 0.6761, "step": 43740 }, { "epoch": 1.6841193455245427, "grad_norm": 1.0602316856384277, "learning_rate": 1.2113599703047728e-05, "loss": 0.7588, "step": 43745 }, { "epoch": 1.6843118383060636, "grad_norm": 1.10280442237854, "learning_rate": 1.2099177240570403e-05, "loss": 0.8926, "step": 43750 }, { "epoch": 1.6845043310875842, "grad_norm": 1.1837788820266724, "learning_rate": 1.208476281604104e-05, "loss": 0.8216, "step": 43755 }, { "epoch": 1.684696823869105, "grad_norm": 1.3022428750991821, "learning_rate": 1.2070356430777752e-05, "loss": 0.6659, "step": 43760 }, { "epoch": 1.6848893166506256, "grad_norm": 1.8487025499343872, "learning_rate": 1.205595808609784e-05, "loss": 0.7536, "step": 43765 }, { "epoch": 1.6850818094321462, "grad_norm": 1.2362842559814453, "learning_rate": 1.2041567783318031e-05, "loss": 0.8764, "step": 43770 }, { "epoch": 1.6852743022136671, "grad_norm": 1.0950088500976562, "learning_rate": 1.2027185523754159e-05, "loss": 0.9292, "step": 43775 }, { "epoch": 1.6854667949951878, "grad_norm": 1.8685953617095947, "learning_rate": 1.2012811308721395e-05, "loss": 0.6901, "step": 43780 }, { "epoch": 1.6856592877767085, "grad_norm": 1.1124467849731445, "learning_rate": 1.1998445139534209e-05, "loss": 0.835, "step": 43785 }, { "epoch": 1.6858517805582292, "grad_norm": 1.07558012008667, "learning_rate": 1.1984087017506228e-05, "loss": 0.8145, "step": 43790 }, { "epoch": 1.6860442733397498, "grad_norm": 1.631927490234375, "learning_rate": 1.1969736943950439e-05, "loss": 0.9775, "step": 43795 }, { "epoch": 1.6862367661212705, "grad_norm": 1.3678085803985596, "learning_rate": 1.1955394920179053e-05, "loss": 0.7821, "step": 43800 }, { "epoch": 1.6864292589027912, "grad_norm": 1.4719831943511963, "learning_rate": 1.1941060947503591e-05, "loss": 0.7762, "step": 43805 }, { "epoch": 1.6866217516843118, "grad_norm": 1.012670636177063, "learning_rate": 1.1926735027234726e-05, "loss": 0.8692, "step": 43810 }, { "epoch": 1.6868142444658325, "grad_norm": 1.3539128303527832, "learning_rate": 1.1912417160682543e-05, "loss": 0.6571, "step": 43815 }, { "epoch": 1.6870067372473532, "grad_norm": 1.2712576389312744, "learning_rate": 1.1898107349156274e-05, "loss": 0.7417, "step": 43820 }, { "epoch": 1.6871992300288738, "grad_norm": 2.003711462020874, "learning_rate": 1.188380559396446e-05, "loss": 0.8174, "step": 43825 }, { "epoch": 1.6873917228103945, "grad_norm": 2.019716739654541, "learning_rate": 1.186951189641491e-05, "loss": 0.6646, "step": 43830 }, { "epoch": 1.6875842155919152, "grad_norm": 1.0991610288619995, "learning_rate": 1.1855226257814688e-05, "loss": 0.7247, "step": 43835 }, { "epoch": 1.6877767083734359, "grad_norm": 1.6221376657485962, "learning_rate": 1.184094867947011e-05, "loss": 0.8466, "step": 43840 }, { "epoch": 1.6879692011549567, "grad_norm": 1.4020202159881592, "learning_rate": 1.1826679162686805e-05, "loss": 0.8511, "step": 43845 }, { "epoch": 1.6881616939364774, "grad_norm": 1.3694734573364258, "learning_rate": 1.1812417708769552e-05, "loss": 0.8935, "step": 43850 }, { "epoch": 1.688354186717998, "grad_norm": 1.3666578531265259, "learning_rate": 1.1798164319022554e-05, "loss": 0.8224, "step": 43855 }, { "epoch": 1.6885466794995188, "grad_norm": 1.5298011302947998, "learning_rate": 1.1783918994749122e-05, "loss": 0.834, "step": 43860 }, { "epoch": 1.6887391722810394, "grad_norm": 1.589240550994873, "learning_rate": 1.1769681737251914e-05, "loss": 0.7738, "step": 43865 }, { "epoch": 1.6889316650625603, "grad_norm": 1.132411241531372, "learning_rate": 1.1755452547832846e-05, "loss": 0.8, "step": 43870 }, { "epoch": 1.689124157844081, "grad_norm": 0.9924208521842957, "learning_rate": 1.1741231427793097e-05, "loss": 0.7401, "step": 43875 }, { "epoch": 1.6893166506256017, "grad_norm": 1.3757164478302002, "learning_rate": 1.172701837843304e-05, "loss": 0.9371, "step": 43880 }, { "epoch": 1.6895091434071223, "grad_norm": 1.4748291969299316, "learning_rate": 1.1712813401052414e-05, "loss": 0.7713, "step": 43885 }, { "epoch": 1.689701636188643, "grad_norm": 2.083977460861206, "learning_rate": 1.1698616496950143e-05, "loss": 0.7284, "step": 43890 }, { "epoch": 1.6898941289701637, "grad_norm": 1.163346529006958, "learning_rate": 1.1684427667424458e-05, "loss": 0.7961, "step": 43895 }, { "epoch": 1.6900866217516843, "grad_norm": 1.0689622163772583, "learning_rate": 1.1670246913772841e-05, "loss": 0.8038, "step": 43900 }, { "epoch": 1.690279114533205, "grad_norm": 1.596536636352539, "learning_rate": 1.1656074237291991e-05, "loss": 0.852, "step": 43905 }, { "epoch": 1.6904716073147257, "grad_norm": 2.0381526947021484, "learning_rate": 1.1641909639277936e-05, "loss": 0.8369, "step": 43910 }, { "epoch": 1.6906641000962463, "grad_norm": 0.8638001084327698, "learning_rate": 1.162775312102592e-05, "loss": 0.5935, "step": 43915 }, { "epoch": 1.690856592877767, "grad_norm": 1.7793620824813843, "learning_rate": 1.161360468383048e-05, "loss": 0.8603, "step": 43920 }, { "epoch": 1.6910490856592877, "grad_norm": 1.2274523973464966, "learning_rate": 1.1599464328985355e-05, "loss": 0.8225, "step": 43925 }, { "epoch": 1.6912415784408084, "grad_norm": 1.3787938356399536, "learning_rate": 1.158533205778366e-05, "loss": 0.9415, "step": 43930 }, { "epoch": 1.691434071222329, "grad_norm": 1.7021337747573853, "learning_rate": 1.157120787151763e-05, "loss": 0.9423, "step": 43935 }, { "epoch": 1.6916265640038497, "grad_norm": 1.1812869310379028, "learning_rate": 1.1557091771478855e-05, "loss": 0.8007, "step": 43940 }, { "epoch": 1.6918190567853706, "grad_norm": 1.0627899169921875, "learning_rate": 1.1542983758958148e-05, "loss": 0.8179, "step": 43945 }, { "epoch": 1.6920115495668913, "grad_norm": 1.115240216255188, "learning_rate": 1.152888383524563e-05, "loss": 0.811, "step": 43950 }, { "epoch": 1.692204042348412, "grad_norm": 0.7895986437797546, "learning_rate": 1.151479200163058e-05, "loss": 0.8463, "step": 43955 }, { "epoch": 1.6923965351299326, "grad_norm": 1.0171902179718018, "learning_rate": 1.1500708259401682e-05, "loss": 0.7925, "step": 43960 }, { "epoch": 1.6925890279114533, "grad_norm": 0.9279621243476868, "learning_rate": 1.1486632609846726e-05, "loss": 0.7073, "step": 43965 }, { "epoch": 1.6927815206929742, "grad_norm": 1.056172490119934, "learning_rate": 1.1472565054252882e-05, "loss": 0.7971, "step": 43970 }, { "epoch": 1.6929740134744948, "grad_norm": 1.2332504987716675, "learning_rate": 1.1458505593906522e-05, "loss": 0.8648, "step": 43975 }, { "epoch": 1.6931665062560155, "grad_norm": 1.2039694786071777, "learning_rate": 1.1444454230093315e-05, "loss": 0.7207, "step": 43980 }, { "epoch": 1.6933589990375362, "grad_norm": 1.3070398569107056, "learning_rate": 1.1430410964098115e-05, "loss": 0.6398, "step": 43985 }, { "epoch": 1.6935514918190568, "grad_norm": 1.1500900983810425, "learning_rate": 1.1416375797205114e-05, "loss": 0.7082, "step": 43990 }, { "epoch": 1.6937439846005775, "grad_norm": 2.088250160217285, "learning_rate": 1.1402348730697731e-05, "loss": 0.7245, "step": 43995 }, { "epoch": 1.6939364773820982, "grad_norm": 1.7187893390655518, "learning_rate": 1.1388329765858651e-05, "loss": 0.7806, "step": 44000 }, { "epoch": 1.6941289701636189, "grad_norm": 1.2111575603485107, "learning_rate": 1.137431890396985e-05, "loss": 0.6781, "step": 44005 }, { "epoch": 1.6943214629451395, "grad_norm": 1.4132143259048462, "learning_rate": 1.1360316146312455e-05, "loss": 1.0329, "step": 44010 }, { "epoch": 1.6945139557266602, "grad_norm": 1.3241591453552246, "learning_rate": 1.1346321494166978e-05, "loss": 0.8164, "step": 44015 }, { "epoch": 1.6947064485081809, "grad_norm": 2.9741406440734863, "learning_rate": 1.1332334948813117e-05, "loss": 0.8342, "step": 44020 }, { "epoch": 1.6948989412897015, "grad_norm": 1.5332659482955933, "learning_rate": 1.1318356511529871e-05, "loss": 0.6292, "step": 44025 }, { "epoch": 1.6950914340712222, "grad_norm": 1.2463070154190063, "learning_rate": 1.1304386183595428e-05, "loss": 0.7732, "step": 44030 }, { "epoch": 1.6952839268527429, "grad_norm": 1.6358565092086792, "learning_rate": 1.1290423966287345e-05, "loss": 0.7089, "step": 44035 }, { "epoch": 1.6954764196342638, "grad_norm": 1.1293914318084717, "learning_rate": 1.1276469860882332e-05, "loss": 0.8117, "step": 44040 }, { "epoch": 1.6956689124157844, "grad_norm": 1.2490708827972412, "learning_rate": 1.1262523868656405e-05, "loss": 0.8166, "step": 44045 }, { "epoch": 1.695861405197305, "grad_norm": 1.3659740686416626, "learning_rate": 1.124858599088484e-05, "loss": 0.7455, "step": 44050 }, { "epoch": 1.6960538979788258, "grad_norm": 1.2188682556152344, "learning_rate": 1.1234656228842177e-05, "loss": 0.8226, "step": 44055 }, { "epoch": 1.6962463907603464, "grad_norm": 1.0454777479171753, "learning_rate": 1.122073458380215e-05, "loss": 0.8888, "step": 44060 }, { "epoch": 1.6964388835418673, "grad_norm": 2.2984609603881836, "learning_rate": 1.1206821057037886e-05, "loss": 0.8094, "step": 44065 }, { "epoch": 1.696631376323388, "grad_norm": 1.107490062713623, "learning_rate": 1.11929156498216e-05, "loss": 0.8469, "step": 44070 }, { "epoch": 1.6968238691049087, "grad_norm": 0.9354246854782104, "learning_rate": 1.1179018363424899e-05, "loss": 0.6261, "step": 44075 }, { "epoch": 1.6970163618864293, "grad_norm": 1.164993405342102, "learning_rate": 1.1165129199118574e-05, "loss": 0.61, "step": 44080 }, { "epoch": 1.69720885466795, "grad_norm": 0.9452062845230103, "learning_rate": 1.1151248158172722e-05, "loss": 0.7721, "step": 44085 }, { "epoch": 1.6974013474494707, "grad_norm": 0.8486310839653015, "learning_rate": 1.1137375241856619e-05, "loss": 0.9994, "step": 44090 }, { "epoch": 1.6975938402309914, "grad_norm": 1.195550560951233, "learning_rate": 1.1123510451438934e-05, "loss": 0.812, "step": 44095 }, { "epoch": 1.697786333012512, "grad_norm": 1.6935982704162598, "learning_rate": 1.1109653788187447e-05, "loss": 0.7816, "step": 44100 }, { "epoch": 1.6979788257940327, "grad_norm": 1.238194465637207, "learning_rate": 1.1095805253369274e-05, "loss": 0.8888, "step": 44105 }, { "epoch": 1.6981713185755534, "grad_norm": 1.1349773406982422, "learning_rate": 1.108196484825077e-05, "loss": 0.6549, "step": 44110 }, { "epoch": 1.698363811357074, "grad_norm": 1.0032782554626465, "learning_rate": 1.1068132574097557e-05, "loss": 0.7298, "step": 44115 }, { "epoch": 1.6985563041385947, "grad_norm": 2.31643009185791, "learning_rate": 1.1054308432174521e-05, "loss": 0.871, "step": 44120 }, { "epoch": 1.6987487969201154, "grad_norm": 1.0429223775863647, "learning_rate": 1.1040492423745752e-05, "loss": 0.7892, "step": 44125 }, { "epoch": 1.698941289701636, "grad_norm": 1.4634356498718262, "learning_rate": 1.102668455007464e-05, "loss": 0.679, "step": 44130 }, { "epoch": 1.6991337824831567, "grad_norm": 1.5524640083312988, "learning_rate": 1.1012884812423829e-05, "loss": 0.9072, "step": 44135 }, { "epoch": 1.6993262752646776, "grad_norm": 0.9184067845344543, "learning_rate": 1.0999093212055244e-05, "loss": 0.6238, "step": 44140 }, { "epoch": 1.6995187680461983, "grad_norm": 1.144820213317871, "learning_rate": 1.0985309750229966e-05, "loss": 0.7566, "step": 44145 }, { "epoch": 1.699711260827719, "grad_norm": 1.1043323278427124, "learning_rate": 1.0971534428208485e-05, "loss": 0.7834, "step": 44150 }, { "epoch": 1.6999037536092396, "grad_norm": 0.9373242259025574, "learning_rate": 1.0957767247250395e-05, "loss": 0.7216, "step": 44155 }, { "epoch": 1.7000962463907605, "grad_norm": 0.977449357509613, "learning_rate": 1.0944008208614643e-05, "loss": 0.7585, "step": 44160 }, { "epoch": 1.7002887391722812, "grad_norm": 1.5112518072128296, "learning_rate": 1.09302573135594e-05, "loss": 0.7718, "step": 44165 }, { "epoch": 1.7004812319538019, "grad_norm": 1.095468282699585, "learning_rate": 1.0916514563342106e-05, "loss": 0.9487, "step": 44170 }, { "epoch": 1.7006737247353225, "grad_norm": 1.084921956062317, "learning_rate": 1.0902779959219401e-05, "loss": 0.8464, "step": 44175 }, { "epoch": 1.7008662175168432, "grad_norm": 2.3119983673095703, "learning_rate": 1.0889053502447278e-05, "loss": 0.7722, "step": 44180 }, { "epoch": 1.7010587102983639, "grad_norm": 0.5821973085403442, "learning_rate": 1.08753351942809e-05, "loss": 0.715, "step": 44185 }, { "epoch": 1.7012512030798845, "grad_norm": 1.1047406196594238, "learning_rate": 1.086162503597472e-05, "loss": 0.7629, "step": 44190 }, { "epoch": 1.7014436958614052, "grad_norm": 1.3281866312026978, "learning_rate": 1.0847923028782437e-05, "loss": 0.7576, "step": 44195 }, { "epoch": 1.7016361886429259, "grad_norm": 1.2818950414657593, "learning_rate": 1.0834229173957045e-05, "loss": 0.722, "step": 44200 }, { "epoch": 1.7018286814244465, "grad_norm": 0.4923422336578369, "learning_rate": 1.08205434727507e-05, "loss": 0.7202, "step": 44205 }, { "epoch": 1.7020211742059672, "grad_norm": 1.5802128314971924, "learning_rate": 1.0806865926414889e-05, "loss": 0.7272, "step": 44210 }, { "epoch": 1.7022136669874879, "grad_norm": 2.5249600410461426, "learning_rate": 1.0793196536200346e-05, "loss": 0.8254, "step": 44215 }, { "epoch": 1.7024061597690086, "grad_norm": 1.4376665353775024, "learning_rate": 1.0779535303357035e-05, "loss": 0.8114, "step": 44220 }, { "epoch": 1.7025986525505292, "grad_norm": 1.4501975774765015, "learning_rate": 1.0765882229134205e-05, "loss": 0.953, "step": 44225 }, { "epoch": 1.70279114533205, "grad_norm": 1.0345442295074463, "learning_rate": 1.0752237314780311e-05, "loss": 0.7878, "step": 44230 }, { "epoch": 1.7029836381135708, "grad_norm": 0.9220666289329529, "learning_rate": 1.07386005615431e-05, "loss": 0.8297, "step": 44235 }, { "epoch": 1.7031761308950915, "grad_norm": 1.8300734758377075, "learning_rate": 1.0724971970669561e-05, "loss": 0.7797, "step": 44240 }, { "epoch": 1.7033686236766121, "grad_norm": 1.5990431308746338, "learning_rate": 1.0711351543405967e-05, "loss": 0.79, "step": 44245 }, { "epoch": 1.7035611164581328, "grad_norm": 1.114249587059021, "learning_rate": 1.0697739280997753e-05, "loss": 0.7867, "step": 44250 }, { "epoch": 1.7037536092396535, "grad_norm": 1.217193603515625, "learning_rate": 1.0684135184689748e-05, "loss": 0.756, "step": 44255 }, { "epoch": 1.7039461020211744, "grad_norm": 1.1771777868270874, "learning_rate": 1.0670539255725886e-05, "loss": 0.7378, "step": 44260 }, { "epoch": 1.704138594802695, "grad_norm": 1.1569862365722656, "learning_rate": 1.0656951495349466e-05, "loss": 0.8281, "step": 44265 }, { "epoch": 1.7043310875842157, "grad_norm": 1.5429112911224365, "learning_rate": 1.064337190480299e-05, "loss": 0.7918, "step": 44270 }, { "epoch": 1.7045235803657364, "grad_norm": 1.192523717880249, "learning_rate": 1.0629800485328235e-05, "loss": 0.8646, "step": 44275 }, { "epoch": 1.704716073147257, "grad_norm": 2.4051706790924072, "learning_rate": 1.061623723816616e-05, "loss": 0.7809, "step": 44280 }, { "epoch": 1.7049085659287777, "grad_norm": 1.8021824359893799, "learning_rate": 1.0602682164557121e-05, "loss": 0.8048, "step": 44285 }, { "epoch": 1.7051010587102984, "grad_norm": 0.9928067922592163, "learning_rate": 1.0589135265740569e-05, "loss": 0.8615, "step": 44290 }, { "epoch": 1.705293551491819, "grad_norm": 0.8101016879081726, "learning_rate": 1.0575596542955312e-05, "loss": 0.6574, "step": 44295 }, { "epoch": 1.7054860442733397, "grad_norm": 1.1880675554275513, "learning_rate": 1.0562065997439364e-05, "loss": 0.7205, "step": 44300 }, { "epoch": 1.7056785370548604, "grad_norm": 1.8503644466400146, "learning_rate": 1.054854363043003e-05, "loss": 0.8311, "step": 44305 }, { "epoch": 1.705871029836381, "grad_norm": 1.2988847494125366, "learning_rate": 1.053502944316378e-05, "loss": 0.8, "step": 44310 }, { "epoch": 1.7060635226179017, "grad_norm": 1.75754714012146, "learning_rate": 1.0521523436876479e-05, "loss": 0.7782, "step": 44315 }, { "epoch": 1.7062560153994224, "grad_norm": 1.1873159408569336, "learning_rate": 1.0508025612803096e-05, "loss": 0.8162, "step": 44320 }, { "epoch": 1.706448508180943, "grad_norm": 1.8230105638504028, "learning_rate": 1.0494535972177932e-05, "loss": 0.7841, "step": 44325 }, { "epoch": 1.706641000962464, "grad_norm": 1.1108636856079102, "learning_rate": 1.0481054516234546e-05, "loss": 0.7711, "step": 44330 }, { "epoch": 1.7068334937439846, "grad_norm": 1.4503856897354126, "learning_rate": 1.0467581246205726e-05, "loss": 0.8515, "step": 44335 }, { "epoch": 1.7070259865255053, "grad_norm": 1.6712833642959595, "learning_rate": 1.0454116163323491e-05, "loss": 0.7857, "step": 44340 }, { "epoch": 1.707218479307026, "grad_norm": 2.288127899169922, "learning_rate": 1.0440659268819143e-05, "loss": 0.6903, "step": 44345 }, { "epoch": 1.7074109720885466, "grad_norm": 1.7935099601745605, "learning_rate": 1.0427210563923228e-05, "loss": 0.7954, "step": 44350 }, { "epoch": 1.7076034648700675, "grad_norm": 1.5855534076690674, "learning_rate": 1.0413770049865546e-05, "loss": 0.8879, "step": 44355 }, { "epoch": 1.7077959576515882, "grad_norm": 0.9473410844802856, "learning_rate": 1.0400337727875153e-05, "loss": 1.0308, "step": 44360 }, { "epoch": 1.7079884504331089, "grad_norm": 1.1841708421707153, "learning_rate": 1.0386913599180293e-05, "loss": 0.9523, "step": 44365 }, { "epoch": 1.7081809432146295, "grad_norm": 1.8651456832885742, "learning_rate": 1.037349766500859e-05, "loss": 0.8538, "step": 44370 }, { "epoch": 1.7083734359961502, "grad_norm": 1.2280970811843872, "learning_rate": 1.036008992658679e-05, "loss": 0.7538, "step": 44375 }, { "epoch": 1.7085659287776709, "grad_norm": 1.300763726234436, "learning_rate": 1.0346690385140956e-05, "loss": 0.9075, "step": 44380 }, { "epoch": 1.7087584215591916, "grad_norm": 1.2974258661270142, "learning_rate": 1.0333299041896383e-05, "loss": 0.8188, "step": 44385 }, { "epoch": 1.7089509143407122, "grad_norm": 1.9830495119094849, "learning_rate": 1.0319915898077648e-05, "loss": 0.7887, "step": 44390 }, { "epoch": 1.709143407122233, "grad_norm": 1.0923196077346802, "learning_rate": 1.0306540954908483e-05, "loss": 0.8392, "step": 44395 }, { "epoch": 1.7093358999037536, "grad_norm": 1.537200689315796, "learning_rate": 1.0293174213612023e-05, "loss": 0.8336, "step": 44400 }, { "epoch": 1.7095283926852742, "grad_norm": 1.2134389877319336, "learning_rate": 1.027981567541051e-05, "loss": 0.7743, "step": 44405 }, { "epoch": 1.709720885466795, "grad_norm": 0.6608363389968872, "learning_rate": 1.026646534152551e-05, "loss": 0.6123, "step": 44410 }, { "epoch": 1.7099133782483156, "grad_norm": 1.0624284744262695, "learning_rate": 1.0253123213177828e-05, "loss": 0.6536, "step": 44415 }, { "epoch": 1.7101058710298362, "grad_norm": 1.9946283102035522, "learning_rate": 1.0239789291587531e-05, "loss": 0.8706, "step": 44420 }, { "epoch": 1.710298363811357, "grad_norm": 1.8715145587921143, "learning_rate": 1.0226463577973877e-05, "loss": 0.8116, "step": 44425 }, { "epoch": 1.7104908565928778, "grad_norm": 1.597334384918213, "learning_rate": 1.0213146073555424e-05, "loss": 0.6751, "step": 44430 }, { "epoch": 1.7106833493743985, "grad_norm": 1.811631202697754, "learning_rate": 1.0199836779549987e-05, "loss": 0.779, "step": 44435 }, { "epoch": 1.7108758421559191, "grad_norm": 1.2666516304016113, "learning_rate": 1.0186535697174603e-05, "loss": 0.8132, "step": 44440 }, { "epoch": 1.7110683349374398, "grad_norm": 1.3092554807662964, "learning_rate": 1.017324282764559e-05, "loss": 0.6895, "step": 44445 }, { "epoch": 1.7112608277189605, "grad_norm": 1.3765099048614502, "learning_rate": 1.0159958172178452e-05, "loss": 0.7155, "step": 44450 }, { "epoch": 1.7114533205004814, "grad_norm": 1.301037311553955, "learning_rate": 1.0146681731988006e-05, "loss": 0.827, "step": 44455 }, { "epoch": 1.711645813282002, "grad_norm": 1.229777455329895, "learning_rate": 1.0133413508288292e-05, "loss": 0.7375, "step": 44460 }, { "epoch": 1.7118383060635227, "grad_norm": 1.2582491636276245, "learning_rate": 1.0120153502292618e-05, "loss": 0.9079, "step": 44465 }, { "epoch": 1.7120307988450434, "grad_norm": 1.4134953022003174, "learning_rate": 1.0106901715213468e-05, "loss": 0.7504, "step": 44470 }, { "epoch": 1.712223291626564, "grad_norm": 1.3563551902770996, "learning_rate": 1.0093658148262709e-05, "loss": 0.7363, "step": 44475 }, { "epoch": 1.7124157844080847, "grad_norm": 1.642959713935852, "learning_rate": 1.0080422802651312e-05, "loss": 0.7629, "step": 44480 }, { "epoch": 1.7126082771896054, "grad_norm": 1.9216701984405518, "learning_rate": 1.0067195679589591e-05, "loss": 0.8328, "step": 44485 }, { "epoch": 1.712800769971126, "grad_norm": 0.9144423007965088, "learning_rate": 1.0053976780287078e-05, "loss": 0.7265, "step": 44490 }, { "epoch": 1.7129932627526467, "grad_norm": 1.0702438354492188, "learning_rate": 1.0040766105952559e-05, "loss": 0.8567, "step": 44495 }, { "epoch": 1.7131857555341674, "grad_norm": 1.9012739658355713, "learning_rate": 1.0027563657794026e-05, "loss": 0.8425, "step": 44500 }, { "epoch": 1.713378248315688, "grad_norm": 1.2741745710372925, "learning_rate": 1.0014369437018823e-05, "loss": 0.8694, "step": 44505 }, { "epoch": 1.7135707410972087, "grad_norm": 2.203899383544922, "learning_rate": 1.0001183444833417e-05, "loss": 0.7839, "step": 44510 }, { "epoch": 1.7137632338787294, "grad_norm": 1.059478998184204, "learning_rate": 9.98800568244359e-06, "loss": 0.7702, "step": 44515 }, { "epoch": 1.71395572666025, "grad_norm": 2.4111287593841553, "learning_rate": 9.974836151054367e-06, "loss": 0.7798, "step": 44520 }, { "epoch": 1.714148219441771, "grad_norm": 1.2681207656860352, "learning_rate": 9.96167485187004e-06, "loss": 0.818, "step": 44525 }, { "epoch": 1.7143407122232917, "grad_norm": 1.0277059078216553, "learning_rate": 9.948521786094079e-06, "loss": 0.6552, "step": 44530 }, { "epoch": 1.7145332050048123, "grad_norm": 0.9331089854240417, "learning_rate": 9.935376954929265e-06, "loss": 0.8268, "step": 44535 }, { "epoch": 1.714725697786333, "grad_norm": 1.3255749940872192, "learning_rate": 9.922240359577606e-06, "loss": 0.8625, "step": 44540 }, { "epoch": 1.7149181905678537, "grad_norm": 1.65230393409729, "learning_rate": 9.90911200124035e-06, "loss": 0.7516, "step": 44545 }, { "epoch": 1.7151106833493746, "grad_norm": 1.164333462715149, "learning_rate": 9.895991881118028e-06, "loss": 0.8635, "step": 44550 }, { "epoch": 1.7153031761308952, "grad_norm": 1.0087164640426636, "learning_rate": 9.882880000410344e-06, "loss": 0.703, "step": 44555 }, { "epoch": 1.715495668912416, "grad_norm": 1.272603988647461, "learning_rate": 9.869776360316307e-06, "loss": 0.6907, "step": 44560 }, { "epoch": 1.7156881616939366, "grad_norm": 0.8092613816261292, "learning_rate": 9.85668096203417e-06, "loss": 0.7481, "step": 44565 }, { "epoch": 1.7158806544754572, "grad_norm": 2.1582627296447754, "learning_rate": 9.84359380676143e-06, "loss": 0.9793, "step": 44570 }, { "epoch": 1.716073147256978, "grad_norm": 2.2021024227142334, "learning_rate": 9.830514895694775e-06, "loss": 0.7774, "step": 44575 }, { "epoch": 1.7162656400384986, "grad_norm": 0.9477373361587524, "learning_rate": 9.817444230030247e-06, "loss": 0.7769, "step": 44580 }, { "epoch": 1.7164581328200192, "grad_norm": 1.2492976188659668, "learning_rate": 9.804381810963015e-06, "loss": 0.7954, "step": 44585 }, { "epoch": 1.71665062560154, "grad_norm": 1.2578282356262207, "learning_rate": 9.791327639687587e-06, "loss": 0.9006, "step": 44590 }, { "epoch": 1.7168431183830606, "grad_norm": 1.1162382364273071, "learning_rate": 9.778281717397652e-06, "loss": 0.755, "step": 44595 }, { "epoch": 1.7170356111645813, "grad_norm": 1.123497724533081, "learning_rate": 9.765244045286227e-06, "loss": 0.711, "step": 44600 }, { "epoch": 1.717228103946102, "grad_norm": 1.5501790046691895, "learning_rate": 9.75221462454543e-06, "loss": 0.8798, "step": 44605 }, { "epoch": 1.7174205967276226, "grad_norm": 1.510923147201538, "learning_rate": 9.739193456366813e-06, "loss": 0.8363, "step": 44610 }, { "epoch": 1.7176130895091433, "grad_norm": 1.6598632335662842, "learning_rate": 9.726180541941e-06, "loss": 0.8614, "step": 44615 }, { "epoch": 1.717805582290664, "grad_norm": 1.1419432163238525, "learning_rate": 9.713175882458003e-06, "loss": 0.8118, "step": 44620 }, { "epoch": 1.7179980750721848, "grad_norm": 1.0711899995803833, "learning_rate": 9.700179479106953e-06, "loss": 0.7971, "step": 44625 }, { "epoch": 1.7181905678537055, "grad_norm": 1.0233666896820068, "learning_rate": 9.687191333076306e-06, "loss": 0.7606, "step": 44630 }, { "epoch": 1.7183830606352262, "grad_norm": 2.493830919265747, "learning_rate": 9.674211445553738e-06, "loss": 0.9428, "step": 44635 }, { "epoch": 1.7185755534167468, "grad_norm": 1.0951639413833618, "learning_rate": 9.661239817726209e-06, "loss": 0.9051, "step": 44640 }, { "epoch": 1.7187680461982677, "grad_norm": 1.3658279180526733, "learning_rate": 9.648276450779836e-06, "loss": 0.5982, "step": 44645 }, { "epoch": 1.7189605389797884, "grad_norm": 1.604722499847412, "learning_rate": 9.635321345900061e-06, "loss": 0.8538, "step": 44650 }, { "epoch": 1.719153031761309, "grad_norm": 1.2309184074401855, "learning_rate": 9.622374504271536e-06, "loss": 0.7353, "step": 44655 }, { "epoch": 1.7193455245428297, "grad_norm": 1.8740421533584595, "learning_rate": 9.60943592707817e-06, "loss": 0.8622, "step": 44660 }, { "epoch": 1.7195380173243504, "grad_norm": 1.5069630146026611, "learning_rate": 9.596505615503116e-06, "loss": 0.7259, "step": 44665 }, { "epoch": 1.719730510105871, "grad_norm": 0.8005820512771606, "learning_rate": 9.583583570728738e-06, "loss": 0.876, "step": 44670 }, { "epoch": 1.7199230028873917, "grad_norm": 1.9598007202148438, "learning_rate": 9.570669793936694e-06, "loss": 0.799, "step": 44675 }, { "epoch": 1.7201154956689124, "grad_norm": 1.1604948043823242, "learning_rate": 9.557764286307846e-06, "loss": 0.7464, "step": 44680 }, { "epoch": 1.720307988450433, "grad_norm": 0.9628298282623291, "learning_rate": 9.544867049022354e-06, "loss": 0.7404, "step": 44685 }, { "epoch": 1.7205004812319538, "grad_norm": 1.5390831232070923, "learning_rate": 9.53197808325953e-06, "loss": 0.8366, "step": 44690 }, { "epoch": 1.7206929740134744, "grad_norm": 0.8520711660385132, "learning_rate": 9.51909739019804e-06, "loss": 0.7651, "step": 44695 }, { "epoch": 1.720885466794995, "grad_norm": 1.1799733638763428, "learning_rate": 9.506224971015709e-06, "loss": 0.7417, "step": 44700 }, { "epoch": 1.7210779595765158, "grad_norm": 1.2342482805252075, "learning_rate": 9.49336082688962e-06, "loss": 0.829, "step": 44705 }, { "epoch": 1.7212704523580364, "grad_norm": 1.1500221490859985, "learning_rate": 9.48050495899615e-06, "loss": 0.8516, "step": 44710 }, { "epoch": 1.721462945139557, "grad_norm": 1.6604115962982178, "learning_rate": 9.467657368510874e-06, "loss": 0.8381, "step": 44715 }, { "epoch": 1.721655437921078, "grad_norm": 1.3249857425689697, "learning_rate": 9.454818056608573e-06, "loss": 0.7726, "step": 44720 }, { "epoch": 1.7218479307025987, "grad_norm": 1.1568611860275269, "learning_rate": 9.441987024463384e-06, "loss": 0.7704, "step": 44725 }, { "epoch": 1.7220404234841193, "grad_norm": 1.5718170404434204, "learning_rate": 9.429164273248581e-06, "loss": 0.7952, "step": 44730 }, { "epoch": 1.72223291626564, "grad_norm": 1.3416763544082642, "learning_rate": 9.416349804136726e-06, "loss": 0.77, "step": 44735 }, { "epoch": 1.7224254090471607, "grad_norm": 1.5594481229782104, "learning_rate": 9.403543618299614e-06, "loss": 0.7476, "step": 44740 }, { "epoch": 1.7226179018286816, "grad_norm": 1.517449975013733, "learning_rate": 9.390745716908312e-06, "loss": 0.6838, "step": 44745 }, { "epoch": 1.7228103946102022, "grad_norm": 0.9632403254508972, "learning_rate": 9.377956101133068e-06, "loss": 0.7085, "step": 44750 }, { "epoch": 1.723002887391723, "grad_norm": 1.5700565576553345, "learning_rate": 9.365174772143426e-06, "loss": 0.8404, "step": 44755 }, { "epoch": 1.7231953801732436, "grad_norm": 1.4765150547027588, "learning_rate": 9.35240173110814e-06, "loss": 0.811, "step": 44760 }, { "epoch": 1.7233878729547643, "grad_norm": 1.0645157098770142, "learning_rate": 9.339636979195244e-06, "loss": 0.7075, "step": 44765 }, { "epoch": 1.723580365736285, "grad_norm": 1.7466483116149902, "learning_rate": 9.326880517571978e-06, "loss": 0.8613, "step": 44770 }, { "epoch": 1.7237728585178056, "grad_norm": 1.016494870185852, "learning_rate": 9.314132347404824e-06, "loss": 0.697, "step": 44775 }, { "epoch": 1.7239653512993263, "grad_norm": 1.2336843013763428, "learning_rate": 9.301392469859527e-06, "loss": 0.8933, "step": 44780 }, { "epoch": 1.724157844080847, "grad_norm": 1.7032203674316406, "learning_rate": 9.288660886101075e-06, "loss": 0.7887, "step": 44785 }, { "epoch": 1.7243503368623676, "grad_norm": 0.8340054154396057, "learning_rate": 9.275937597293682e-06, "loss": 0.8859, "step": 44790 }, { "epoch": 1.7245428296438883, "grad_norm": 1.1701608896255493, "learning_rate": 9.263222604600774e-06, "loss": 0.7429, "step": 44795 }, { "epoch": 1.724735322425409, "grad_norm": 0.891636073589325, "learning_rate": 9.25051590918512e-06, "loss": 0.7672, "step": 44800 }, { "epoch": 1.7249278152069296, "grad_norm": 1.2118922472000122, "learning_rate": 9.237817512208602e-06, "loss": 0.8563, "step": 44805 }, { "epoch": 1.7251203079884503, "grad_norm": 1.1670253276824951, "learning_rate": 9.225127414832436e-06, "loss": 0.8383, "step": 44810 }, { "epoch": 1.7253128007699712, "grad_norm": 1.722845435142517, "learning_rate": 9.212445618217035e-06, "loss": 0.8429, "step": 44815 }, { "epoch": 1.7255052935514918, "grad_norm": 1.2573529481887817, "learning_rate": 9.199772123522088e-06, "loss": 0.9031, "step": 44820 }, { "epoch": 1.7256977863330125, "grad_norm": 1.844647765159607, "learning_rate": 9.187106931906442e-06, "loss": 0.8546, "step": 44825 }, { "epoch": 1.7258902791145332, "grad_norm": 2.288663864135742, "learning_rate": 9.174450044528327e-06, "loss": 0.7559, "step": 44830 }, { "epoch": 1.7260827718960539, "grad_norm": 1.2201417684555054, "learning_rate": 9.161801462545084e-06, "loss": 0.7306, "step": 44835 }, { "epoch": 1.7262752646775748, "grad_norm": 1.2007523775100708, "learning_rate": 9.14916118711333e-06, "loss": 0.7666, "step": 44840 }, { "epoch": 1.7264677574590954, "grad_norm": 0.9047454595565796, "learning_rate": 9.136529219388968e-06, "loss": 0.7994, "step": 44845 }, { "epoch": 1.726660250240616, "grad_norm": 0.8384250402450562, "learning_rate": 9.123905560527102e-06, "loss": 0.6711, "step": 44850 }, { "epoch": 1.7268527430221368, "grad_norm": 1.0188740491867065, "learning_rate": 9.111290211682044e-06, "loss": 0.7907, "step": 44855 }, { "epoch": 1.7270452358036574, "grad_norm": 1.1332343816757202, "learning_rate": 9.09868317400745e-06, "loss": 0.7105, "step": 44860 }, { "epoch": 1.727237728585178, "grad_norm": 0.6927266716957092, "learning_rate": 9.086084448656095e-06, "loss": 0.9879, "step": 44865 }, { "epoch": 1.7274302213666988, "grad_norm": 1.233681559562683, "learning_rate": 9.073494036780062e-06, "loss": 0.7542, "step": 44870 }, { "epoch": 1.7276227141482194, "grad_norm": 1.4534273147583008, "learning_rate": 9.06091193953067e-06, "loss": 0.7775, "step": 44875 }, { "epoch": 1.72781520692974, "grad_norm": 1.0983202457427979, "learning_rate": 9.048338158058467e-06, "loss": 0.9082, "step": 44880 }, { "epoch": 1.7280076997112608, "grad_norm": 0.7827137112617493, "learning_rate": 9.035772693513256e-06, "loss": 0.7013, "step": 44885 }, { "epoch": 1.7282001924927815, "grad_norm": 1.1753028631210327, "learning_rate": 9.023215547044028e-06, "loss": 0.6837, "step": 44890 }, { "epoch": 1.7283926852743021, "grad_norm": 1.4475585222244263, "learning_rate": 9.010666719799077e-06, "loss": 0.7162, "step": 44895 }, { "epoch": 1.7285851780558228, "grad_norm": 1.2361037731170654, "learning_rate": 8.998126212925906e-06, "loss": 0.7424, "step": 44900 }, { "epoch": 1.7287776708373435, "grad_norm": 1.0601775646209717, "learning_rate": 8.985594027571276e-06, "loss": 0.6877, "step": 44905 }, { "epoch": 1.7289701636188641, "grad_norm": 1.1087630987167358, "learning_rate": 8.973070164881126e-06, "loss": 0.7211, "step": 44910 }, { "epoch": 1.729162656400385, "grad_norm": 1.3524638414382935, "learning_rate": 8.96055462600074e-06, "loss": 0.7259, "step": 44915 }, { "epoch": 1.7293551491819057, "grad_norm": 1.305503487586975, "learning_rate": 8.948047412074534e-06, "loss": 0.8636, "step": 44920 }, { "epoch": 1.7295476419634264, "grad_norm": 1.1058146953582764, "learning_rate": 8.935548524246229e-06, "loss": 0.8791, "step": 44925 }, { "epoch": 1.729740134744947, "grad_norm": 1.93928062915802, "learning_rate": 8.923057963658766e-06, "loss": 0.7892, "step": 44930 }, { "epoch": 1.7299326275264677, "grad_norm": 0.9320961236953735, "learning_rate": 8.91057573145434e-06, "loss": 0.8925, "step": 44935 }, { "epoch": 1.7301251203079886, "grad_norm": 1.0383516550064087, "learning_rate": 8.898101828774318e-06, "loss": 0.7483, "step": 44940 }, { "epoch": 1.7303176130895093, "grad_norm": 1.3876311779022217, "learning_rate": 8.885636256759422e-06, "loss": 0.8479, "step": 44945 }, { "epoch": 1.73051010587103, "grad_norm": 2.2601609230041504, "learning_rate": 8.873179016549505e-06, "loss": 0.9106, "step": 44950 }, { "epoch": 1.7307025986525506, "grad_norm": 0.9025186896324158, "learning_rate": 8.8607301092837e-06, "loss": 0.7625, "step": 44955 }, { "epoch": 1.7308950914340713, "grad_norm": 2.425801992416382, "learning_rate": 8.848289536100374e-06, "loss": 0.9814, "step": 44960 }, { "epoch": 1.731087584215592, "grad_norm": 1.1960424184799194, "learning_rate": 8.835857298137173e-06, "loss": 0.7639, "step": 44965 }, { "epoch": 1.7312800769971126, "grad_norm": 1.3602185249328613, "learning_rate": 8.8234333965309e-06, "loss": 0.7965, "step": 44970 }, { "epoch": 1.7314725697786333, "grad_norm": 1.1339243650436401, "learning_rate": 8.811017832417645e-06, "loss": 0.9131, "step": 44975 }, { "epoch": 1.731665062560154, "grad_norm": 1.8668417930603027, "learning_rate": 8.798610606932744e-06, "loss": 0.8395, "step": 44980 }, { "epoch": 1.7318575553416746, "grad_norm": 2.117316246032715, "learning_rate": 8.786211721210747e-06, "loss": 0.6666, "step": 44985 }, { "epoch": 1.7320500481231953, "grad_norm": 0.8929471373558044, "learning_rate": 8.773821176385466e-06, "loss": 0.829, "step": 44990 }, { "epoch": 1.732242540904716, "grad_norm": 2.004077196121216, "learning_rate": 8.761438973589908e-06, "loss": 0.8636, "step": 44995 }, { "epoch": 1.7324350336862366, "grad_norm": 0.8660832643508911, "learning_rate": 8.749065113956357e-06, "loss": 0.7696, "step": 45000 }, { "epoch": 1.7326275264677573, "grad_norm": 1.3923887014389038, "learning_rate": 8.736699598616305e-06, "loss": 0.8972, "step": 45005 }, { "epoch": 1.7328200192492782, "grad_norm": 0.835381805896759, "learning_rate": 8.72434242870055e-06, "loss": 0.7019, "step": 45010 }, { "epoch": 1.7330125120307989, "grad_norm": 0.8896954655647278, "learning_rate": 8.711993605338985e-06, "loss": 0.8025, "step": 45015 }, { "epoch": 1.7332050048123195, "grad_norm": 1.4621267318725586, "learning_rate": 8.699653129660912e-06, "loss": 0.7648, "step": 45020 }, { "epoch": 1.7333974975938402, "grad_norm": 1.1056898832321167, "learning_rate": 8.687321002794734e-06, "loss": 0.7427, "step": 45025 }, { "epoch": 1.7335899903753609, "grad_norm": 1.546595573425293, "learning_rate": 8.674997225868165e-06, "loss": 0.8088, "step": 45030 }, { "epoch": 1.7337824831568818, "grad_norm": 0.7969051599502563, "learning_rate": 8.662681800008121e-06, "loss": 0.7576, "step": 45035 }, { "epoch": 1.7339749759384024, "grad_norm": 0.9694302678108215, "learning_rate": 8.650374726340793e-06, "loss": 0.9295, "step": 45040 }, { "epoch": 1.734167468719923, "grad_norm": 0.5235655903816223, "learning_rate": 8.638076005991524e-06, "loss": 0.7925, "step": 45045 }, { "epoch": 1.7343599615014438, "grad_norm": 1.4736652374267578, "learning_rate": 8.625785640085026e-06, "loss": 0.8665, "step": 45050 }, { "epoch": 1.7345524542829645, "grad_norm": 1.5209852457046509, "learning_rate": 8.613503629745113e-06, "loss": 0.8142, "step": 45055 }, { "epoch": 1.7347449470644851, "grad_norm": 1.1320667266845703, "learning_rate": 8.601229976094904e-06, "loss": 0.7985, "step": 45060 }, { "epoch": 1.7349374398460058, "grad_norm": 1.1330043077468872, "learning_rate": 8.588964680256761e-06, "loss": 0.8037, "step": 45065 }, { "epoch": 1.7351299326275265, "grad_norm": 1.6776443719863892, "learning_rate": 8.576707743352275e-06, "loss": 0.8429, "step": 45070 }, { "epoch": 1.7353224254090471, "grad_norm": 0.8801172971725464, "learning_rate": 8.564459166502204e-06, "loss": 0.8425, "step": 45075 }, { "epoch": 1.7355149181905678, "grad_norm": 0.9202898740768433, "learning_rate": 8.552218950826662e-06, "loss": 0.7558, "step": 45080 }, { "epoch": 1.7357074109720885, "grad_norm": 0.8619052767753601, "learning_rate": 8.539987097444912e-06, "loss": 0.7801, "step": 45085 }, { "epoch": 1.7358999037536091, "grad_norm": 1.3250690698623657, "learning_rate": 8.527763607475459e-06, "loss": 0.8215, "step": 45090 }, { "epoch": 1.7360923965351298, "grad_norm": 1.4232019186019897, "learning_rate": 8.515548482036106e-06, "loss": 0.9854, "step": 45095 }, { "epoch": 1.7362848893166505, "grad_norm": 0.8504436016082764, "learning_rate": 8.503341722243785e-06, "loss": 0.7121, "step": 45100 }, { "epoch": 1.7364773820981714, "grad_norm": 1.5012342929840088, "learning_rate": 8.491143329214768e-06, "loss": 0.8461, "step": 45105 }, { "epoch": 1.736669874879692, "grad_norm": 1.4541362524032593, "learning_rate": 8.478953304064485e-06, "loss": 0.7671, "step": 45110 }, { "epoch": 1.7368623676612127, "grad_norm": 1.0640056133270264, "learning_rate": 8.466771647907679e-06, "loss": 0.7623, "step": 45115 }, { "epoch": 1.7370548604427334, "grad_norm": 1.897068738937378, "learning_rate": 8.454598361858223e-06, "loss": 0.8152, "step": 45120 }, { "epoch": 1.737247353224254, "grad_norm": 1.7493306398391724, "learning_rate": 8.44243344702934e-06, "loss": 0.8049, "step": 45125 }, { "epoch": 1.737439846005775, "grad_norm": 1.0308282375335693, "learning_rate": 8.430276904533375e-06, "loss": 0.9274, "step": 45130 }, { "epoch": 1.7376323387872956, "grad_norm": 1.5391184091567993, "learning_rate": 8.418128735482033e-06, "loss": 0.8391, "step": 45135 }, { "epoch": 1.7378248315688163, "grad_norm": 1.622266173362732, "learning_rate": 8.40598894098612e-06, "loss": 0.7576, "step": 45140 }, { "epoch": 1.738017324350337, "grad_norm": 1.7522730827331543, "learning_rate": 8.393857522155758e-06, "loss": 0.6953, "step": 45145 }, { "epoch": 1.7382098171318576, "grad_norm": 1.5102694034576416, "learning_rate": 8.3817344801003e-06, "loss": 0.7033, "step": 45150 }, { "epoch": 1.7384023099133783, "grad_norm": 1.0262799263000488, "learning_rate": 8.369619815928321e-06, "loss": 0.8477, "step": 45155 }, { "epoch": 1.738594802694899, "grad_norm": 1.1589692831039429, "learning_rate": 8.357513530747585e-06, "loss": 0.6228, "step": 45160 }, { "epoch": 1.7387872954764196, "grad_norm": 1.038752555847168, "learning_rate": 8.345415625665187e-06, "loss": 0.8333, "step": 45165 }, { "epoch": 1.7389797882579403, "grad_norm": 1.9438576698303223, "learning_rate": 8.333326101787365e-06, "loss": 0.8507, "step": 45170 }, { "epoch": 1.739172281039461, "grad_norm": 1.3162816762924194, "learning_rate": 8.321244960219632e-06, "loss": 0.8264, "step": 45175 }, { "epoch": 1.7393647738209816, "grad_norm": 1.4058345556259155, "learning_rate": 8.309172202066728e-06, "loss": 0.8086, "step": 45180 }, { "epoch": 1.7395572666025023, "grad_norm": 1.2361959218978882, "learning_rate": 8.297107828432649e-06, "loss": 0.6796, "step": 45185 }, { "epoch": 1.739749759384023, "grad_norm": 1.3547639846801758, "learning_rate": 8.285051840420565e-06, "loss": 0.8473, "step": 45190 }, { "epoch": 1.7399422521655437, "grad_norm": 1.3733632564544678, "learning_rate": 8.273004239132932e-06, "loss": 0.7467, "step": 45195 }, { "epoch": 1.7401347449470643, "grad_norm": 1.2235511541366577, "learning_rate": 8.26096502567143e-06, "loss": 0.8072, "step": 45200 }, { "epoch": 1.7403272377285852, "grad_norm": 1.30812406539917, "learning_rate": 8.24893420113696e-06, "loss": 0.8308, "step": 45205 }, { "epoch": 1.740519730510106, "grad_norm": 1.1475272178649902, "learning_rate": 8.236911766629674e-06, "loss": 0.7597, "step": 45210 }, { "epoch": 1.7407122232916266, "grad_norm": 1.2318791151046753, "learning_rate": 8.224897723248926e-06, "loss": 0.7935, "step": 45215 }, { "epoch": 1.7409047160731472, "grad_norm": 0.8889015913009644, "learning_rate": 8.212892072093314e-06, "loss": 0.7292, "step": 45220 }, { "epoch": 1.741097208854668, "grad_norm": 2.2487030029296875, "learning_rate": 8.200894814260695e-06, "loss": 0.8981, "step": 45225 }, { "epoch": 1.7412897016361888, "grad_norm": 2.454103469848633, "learning_rate": 8.188905950848157e-06, "loss": 0.9552, "step": 45230 }, { "epoch": 1.7414821944177095, "grad_norm": 1.027070164680481, "learning_rate": 8.176925482951925e-06, "loss": 0.8115, "step": 45235 }, { "epoch": 1.7416746871992301, "grad_norm": 1.2418460845947266, "learning_rate": 8.16495341166763e-06, "loss": 0.7263, "step": 45240 }, { "epoch": 1.7418671799807508, "grad_norm": 1.0683330297470093, "learning_rate": 8.152989738089978e-06, "loss": 0.8064, "step": 45245 }, { "epoch": 1.7420596727622715, "grad_norm": 0.9566577672958374, "learning_rate": 8.14103446331298e-06, "loss": 0.7102, "step": 45250 }, { "epoch": 1.7422521655437921, "grad_norm": 0.8572260737419128, "learning_rate": 8.129087588429873e-06, "loss": 0.8575, "step": 45255 }, { "epoch": 1.7424446583253128, "grad_norm": 0.9579412341117859, "learning_rate": 8.11714911453314e-06, "loss": 0.8609, "step": 45260 }, { "epoch": 1.7426371511068335, "grad_norm": 1.0199538469314575, "learning_rate": 8.105219042714406e-06, "loss": 0.7884, "step": 45265 }, { "epoch": 1.7428296438883542, "grad_norm": 1.9061774015426636, "learning_rate": 8.093297374064679e-06, "loss": 0.7509, "step": 45270 }, { "epoch": 1.7430221366698748, "grad_norm": 1.1046141386032104, "learning_rate": 8.081384109674073e-06, "loss": 0.861, "step": 45275 }, { "epoch": 1.7432146294513955, "grad_norm": 0.9744033217430115, "learning_rate": 8.069479250631972e-06, "loss": 0.8627, "step": 45280 }, { "epoch": 1.7434071222329162, "grad_norm": 1.1140841245651245, "learning_rate": 8.057582798027019e-06, "loss": 0.8167, "step": 45285 }, { "epoch": 1.7435996150144368, "grad_norm": 0.7895367741584778, "learning_rate": 8.045694752947076e-06, "loss": 0.7456, "step": 45290 }, { "epoch": 1.7437921077959575, "grad_norm": 1.113579511642456, "learning_rate": 8.033815116479182e-06, "loss": 0.7822, "step": 45295 }, { "epoch": 1.7439846005774784, "grad_norm": 1.654111385345459, "learning_rate": 8.021943889709682e-06, "loss": 0.8276, "step": 45300 }, { "epoch": 1.744177093358999, "grad_norm": 1.1563928127288818, "learning_rate": 8.010081073724107e-06, "loss": 0.7169, "step": 45305 }, { "epoch": 1.7443695861405197, "grad_norm": 1.4437462091445923, "learning_rate": 7.998226669607245e-06, "loss": 0.759, "step": 45310 }, { "epoch": 1.7445620789220404, "grad_norm": 1.2180962562561035, "learning_rate": 7.986380678443117e-06, "loss": 0.8452, "step": 45315 }, { "epoch": 1.744754571703561, "grad_norm": 1.4761182069778442, "learning_rate": 7.974543101314912e-06, "loss": 0.8095, "step": 45320 }, { "epoch": 1.744947064485082, "grad_norm": 0.9857079982757568, "learning_rate": 7.962713939305143e-06, "loss": 0.7167, "step": 45325 }, { "epoch": 1.7451395572666026, "grad_norm": 1.562577724456787, "learning_rate": 7.95089319349549e-06, "loss": 0.829, "step": 45330 }, { "epoch": 1.7453320500481233, "grad_norm": 1.0676238536834717, "learning_rate": 7.939080864966897e-06, "loss": 0.7187, "step": 45335 }, { "epoch": 1.745524542829644, "grad_norm": 1.6058528423309326, "learning_rate": 7.92727695479948e-06, "loss": 0.7018, "step": 45340 }, { "epoch": 1.7457170356111646, "grad_norm": 1.4000190496444702, "learning_rate": 7.915481464072694e-06, "loss": 0.7528, "step": 45345 }, { "epoch": 1.7459095283926853, "grad_norm": 1.476828694343567, "learning_rate": 7.903694393865102e-06, "loss": 0.6115, "step": 45350 }, { "epoch": 1.746102021174206, "grad_norm": 0.9761121273040771, "learning_rate": 7.891915745254574e-06, "loss": 0.775, "step": 45355 }, { "epoch": 1.7462945139557267, "grad_norm": 1.4030824899673462, "learning_rate": 7.880145519318205e-06, "loss": 0.7931, "step": 45360 }, { "epoch": 1.7464870067372473, "grad_norm": 2.230025291442871, "learning_rate": 7.868383717132299e-06, "loss": 0.9373, "step": 45365 }, { "epoch": 1.746679499518768, "grad_norm": 1.5030139684677124, "learning_rate": 7.856630339772341e-06, "loss": 0.8815, "step": 45370 }, { "epoch": 1.7468719923002887, "grad_norm": 1.0745930671691895, "learning_rate": 7.844885388313194e-06, "loss": 0.7682, "step": 45375 }, { "epoch": 1.7470644850818093, "grad_norm": 1.3051297664642334, "learning_rate": 7.833148863828766e-06, "loss": 0.7516, "step": 45380 }, { "epoch": 1.74725697786333, "grad_norm": 1.2218395471572876, "learning_rate": 7.821420767392362e-06, "loss": 0.7248, "step": 45385 }, { "epoch": 1.7474494706448507, "grad_norm": 1.7142714262008667, "learning_rate": 7.809701100076384e-06, "loss": 0.953, "step": 45390 }, { "epoch": 1.7476419634263713, "grad_norm": 1.6819877624511719, "learning_rate": 7.797989862952525e-06, "loss": 0.8154, "step": 45395 }, { "epoch": 1.7478344562078922, "grad_norm": 2.180309295654297, "learning_rate": 7.786287057091723e-06, "loss": 0.7794, "step": 45400 }, { "epoch": 1.748026948989413, "grad_norm": 1.5509096384048462, "learning_rate": 7.77459268356413e-06, "loss": 0.9624, "step": 45405 }, { "epoch": 1.7482194417709336, "grad_norm": 1.4458515644073486, "learning_rate": 7.762906743439069e-06, "loss": 0.836, "step": 45410 }, { "epoch": 1.7484119345524542, "grad_norm": 1.2456862926483154, "learning_rate": 7.751229237785173e-06, "loss": 0.863, "step": 45415 }, { "epoch": 1.7486044273339751, "grad_norm": 1.0490598678588867, "learning_rate": 7.739560167670279e-06, "loss": 0.8947, "step": 45420 }, { "epoch": 1.7487969201154958, "grad_norm": 1.548740029335022, "learning_rate": 7.727899534161431e-06, "loss": 0.8212, "step": 45425 }, { "epoch": 1.7489894128970165, "grad_norm": 1.1737068891525269, "learning_rate": 7.716247338324945e-06, "loss": 0.6706, "step": 45430 }, { "epoch": 1.7491819056785372, "grad_norm": 1.5018718242645264, "learning_rate": 7.7046035812263e-06, "loss": 0.7855, "step": 45435 }, { "epoch": 1.7493743984600578, "grad_norm": 1.2772295475006104, "learning_rate": 7.692968263930255e-06, "loss": 0.7608, "step": 45440 }, { "epoch": 1.7495668912415785, "grad_norm": 1.0926804542541504, "learning_rate": 7.681341387500784e-06, "loss": 0.7587, "step": 45445 }, { "epoch": 1.7497593840230992, "grad_norm": 1.2796525955200195, "learning_rate": 7.669722953001113e-06, "loss": 0.8821, "step": 45450 }, { "epoch": 1.7499518768046198, "grad_norm": 0.9413089752197266, "learning_rate": 7.658112961493602e-06, "loss": 0.7892, "step": 45455 }, { "epoch": 1.7501443695861405, "grad_norm": 1.1582038402557373, "learning_rate": 7.646511414039981e-06, "loss": 0.759, "step": 45460 }, { "epoch": 1.7503368623676612, "grad_norm": 1.9622395038604736, "learning_rate": 7.6349183117011e-06, "loss": 0.7499, "step": 45465 }, { "epoch": 1.7505293551491818, "grad_norm": 1.9417619705200195, "learning_rate": 7.623333655537068e-06, "loss": 0.8312, "step": 45470 }, { "epoch": 1.7507218479307025, "grad_norm": 1.0554540157318115, "learning_rate": 7.611757446607238e-06, "loss": 0.8, "step": 45475 }, { "epoch": 1.7509143407122232, "grad_norm": 0.9951033592224121, "learning_rate": 7.6001896859701935e-06, "loss": 0.7422, "step": 45480 }, { "epoch": 1.7511068334937439, "grad_norm": 1.446395993232727, "learning_rate": 7.58863037468367e-06, "loss": 0.8357, "step": 45485 }, { "epoch": 1.7512993262752645, "grad_norm": 1.0408679246902466, "learning_rate": 7.577079513804764e-06, "loss": 0.8824, "step": 45490 }, { "epoch": 1.7514918190567854, "grad_norm": 2.0275330543518066, "learning_rate": 7.565537104389675e-06, "loss": 0.865, "step": 45495 }, { "epoch": 1.751684311838306, "grad_norm": 0.5688580870628357, "learning_rate": 7.5540031474938936e-06, "loss": 0.7585, "step": 45500 }, { "epoch": 1.7518768046198268, "grad_norm": 0.9278910160064697, "learning_rate": 7.54247764417213e-06, "loss": 0.8563, "step": 45505 }, { "epoch": 1.7520692974013474, "grad_norm": 1.2069793939590454, "learning_rate": 7.53096059547832e-06, "loss": 0.8896, "step": 45510 }, { "epoch": 1.752261790182868, "grad_norm": 1.3929320573806763, "learning_rate": 7.51945200246561e-06, "loss": 0.6949, "step": 45515 }, { "epoch": 1.752454282964389, "grad_norm": 1.403387427330017, "learning_rate": 7.507951866186369e-06, "loss": 0.8077, "step": 45520 }, { "epoch": 1.7526467757459097, "grad_norm": 1.3101508617401123, "learning_rate": 7.496460187692233e-06, "loss": 0.8342, "step": 45525 }, { "epoch": 1.7528392685274303, "grad_norm": 1.3768970966339111, "learning_rate": 7.484976968034041e-06, "loss": 0.7386, "step": 45530 }, { "epoch": 1.753031761308951, "grad_norm": 1.012230396270752, "learning_rate": 7.473502208261862e-06, "loss": 0.7605, "step": 45535 }, { "epoch": 1.7532242540904717, "grad_norm": 0.9598119258880615, "learning_rate": 7.462035909424947e-06, "loss": 0.7581, "step": 45540 }, { "epoch": 1.7534167468719923, "grad_norm": 1.7553858757019043, "learning_rate": 7.450578072571857e-06, "loss": 0.6762, "step": 45545 }, { "epoch": 1.753609239653513, "grad_norm": 1.4688769578933716, "learning_rate": 7.439128698750309e-06, "loss": 0.661, "step": 45550 }, { "epoch": 1.7538017324350337, "grad_norm": 1.9736151695251465, "learning_rate": 7.427687789007299e-06, "loss": 0.8019, "step": 45555 }, { "epoch": 1.7539942252165543, "grad_norm": 0.9081621170043945, "learning_rate": 7.416255344388967e-06, "loss": 0.8455, "step": 45560 }, { "epoch": 1.754186717998075, "grad_norm": 0.812998354434967, "learning_rate": 7.404831365940812e-06, "loss": 0.8729, "step": 45565 }, { "epoch": 1.7543792107795957, "grad_norm": 1.4121613502502441, "learning_rate": 7.393415854707419e-06, "loss": 0.7995, "step": 45570 }, { "epoch": 1.7545717035611164, "grad_norm": 1.363328456878662, "learning_rate": 7.382008811732688e-06, "loss": 0.7346, "step": 45575 }, { "epoch": 1.754764196342637, "grad_norm": 1.5376365184783936, "learning_rate": 7.370610238059716e-06, "loss": 0.703, "step": 45580 }, { "epoch": 1.7549566891241577, "grad_norm": 1.3588625192642212, "learning_rate": 7.359220134730838e-06, "loss": 0.8671, "step": 45585 }, { "epoch": 1.7551491819056786, "grad_norm": 1.2630447149276733, "learning_rate": 7.347838502787563e-06, "loss": 0.8949, "step": 45590 }, { "epoch": 1.7553416746871993, "grad_norm": 1.2255010604858398, "learning_rate": 7.336465343270715e-06, "loss": 0.8899, "step": 45595 }, { "epoch": 1.75553416746872, "grad_norm": 0.93909752368927, "learning_rate": 7.325100657220263e-06, "loss": 0.6891, "step": 45600 }, { "epoch": 1.7557266602502406, "grad_norm": 1.0947200059890747, "learning_rate": 7.31374444567543e-06, "loss": 0.7669, "step": 45605 }, { "epoch": 1.7559191530317613, "grad_norm": 1.0405867099761963, "learning_rate": 7.302396709674686e-06, "loss": 0.9153, "step": 45610 }, { "epoch": 1.7561116458132822, "grad_norm": 1.1569056510925293, "learning_rate": 7.291057450255712e-06, "loss": 0.7694, "step": 45615 }, { "epoch": 1.7563041385948028, "grad_norm": 2.7271223068237305, "learning_rate": 7.279726668455366e-06, "loss": 0.9885, "step": 45620 }, { "epoch": 1.7564966313763235, "grad_norm": 1.421797752380371, "learning_rate": 7.268404365309822e-06, "loss": 0.9199, "step": 45625 }, { "epoch": 1.7566891241578442, "grad_norm": 1.7297556400299072, "learning_rate": 7.257090541854405e-06, "loss": 0.8034, "step": 45630 }, { "epoch": 1.7568816169393648, "grad_norm": 1.0657942295074463, "learning_rate": 7.245785199123689e-06, "loss": 0.7211, "step": 45635 }, { "epoch": 1.7570741097208855, "grad_norm": 1.2450711727142334, "learning_rate": 7.23448833815148e-06, "loss": 0.9191, "step": 45640 }, { "epoch": 1.7572666025024062, "grad_norm": 1.1879651546478271, "learning_rate": 7.223199959970784e-06, "loss": 0.7244, "step": 45645 }, { "epoch": 1.7574590952839269, "grad_norm": 1.0603852272033691, "learning_rate": 7.211920065613875e-06, "loss": 0.757, "step": 45650 }, { "epoch": 1.7576515880654475, "grad_norm": 1.9512847661972046, "learning_rate": 7.200648656112208e-06, "loss": 0.6762, "step": 45655 }, { "epoch": 1.7578440808469682, "grad_norm": 1.7029409408569336, "learning_rate": 7.189385732496479e-06, "loss": 0.7148, "step": 45660 }, { "epoch": 1.7580365736284889, "grad_norm": 1.464708685874939, "learning_rate": 7.178131295796609e-06, "loss": 0.7853, "step": 45665 }, { "epoch": 1.7582290664100095, "grad_norm": 1.0993046760559082, "learning_rate": 7.166885347041763e-06, "loss": 0.8098, "step": 45670 }, { "epoch": 1.7584215591915302, "grad_norm": 1.787937045097351, "learning_rate": 7.155647887260253e-06, "loss": 0.9171, "step": 45675 }, { "epoch": 1.7586140519730509, "grad_norm": 1.478559136390686, "learning_rate": 7.144418917479734e-06, "loss": 0.7519, "step": 45680 }, { "epoch": 1.7588065447545715, "grad_norm": 1.5054091215133667, "learning_rate": 7.133198438726985e-06, "loss": 0.6784, "step": 45685 }, { "epoch": 1.7589990375360924, "grad_norm": 1.190794587135315, "learning_rate": 7.121986452028051e-06, "loss": 0.7847, "step": 45690 }, { "epoch": 1.759191530317613, "grad_norm": 1.474478006362915, "learning_rate": 7.110782958408191e-06, "loss": 0.7256, "step": 45695 }, { "epoch": 1.7593840230991338, "grad_norm": 0.9803133010864258, "learning_rate": 7.0995879588919176e-06, "loss": 0.7769, "step": 45700 }, { "epoch": 1.7595765158806544, "grad_norm": 1.1921695470809937, "learning_rate": 7.08840145450288e-06, "loss": 0.7835, "step": 45705 }, { "epoch": 1.7597690086621751, "grad_norm": 0.8569211363792419, "learning_rate": 7.077223446264081e-06, "loss": 0.7431, "step": 45710 }, { "epoch": 1.759961501443696, "grad_norm": 1.798243522644043, "learning_rate": 7.066053935197625e-06, "loss": 0.9124, "step": 45715 }, { "epoch": 1.7601539942252167, "grad_norm": 1.0873985290527344, "learning_rate": 7.054892922324896e-06, "loss": 0.7808, "step": 45720 }, { "epoch": 1.7603464870067373, "grad_norm": 0.899448573589325, "learning_rate": 7.043740408666511e-06, "loss": 0.6514, "step": 45725 }, { "epoch": 1.760538979788258, "grad_norm": 0.8346114754676819, "learning_rate": 7.032596395242308e-06, "loss": 0.7472, "step": 45730 }, { "epoch": 1.7607314725697787, "grad_norm": 1.0554609298706055, "learning_rate": 7.021460883071296e-06, "loss": 0.7476, "step": 45735 }, { "epoch": 1.7609239653512994, "grad_norm": 2.069948196411133, "learning_rate": 7.010333873171749e-06, "loss": 0.8979, "step": 45740 }, { "epoch": 1.76111645813282, "grad_norm": 1.2894495725631714, "learning_rate": 6.999215366561174e-06, "loss": 0.8933, "step": 45745 }, { "epoch": 1.7613089509143407, "grad_norm": 0.8491725325584412, "learning_rate": 6.988105364256281e-06, "loss": 0.7834, "step": 45750 }, { "epoch": 1.7615014436958614, "grad_norm": 1.3714752197265625, "learning_rate": 6.977003867273024e-06, "loss": 0.8987, "step": 45755 }, { "epoch": 1.761693936477382, "grad_norm": 1.4427109956741333, "learning_rate": 6.965910876626525e-06, "loss": 0.7007, "step": 45760 }, { "epoch": 1.7618864292589027, "grad_norm": 2.3159167766571045, "learning_rate": 6.954826393331182e-06, "loss": 0.7829, "step": 45765 }, { "epoch": 1.7620789220404234, "grad_norm": 1.3657716512680054, "learning_rate": 6.9437504184005964e-06, "loss": 0.8195, "step": 45770 }, { "epoch": 1.762271414821944, "grad_norm": 1.9438031911849976, "learning_rate": 6.932682952847602e-06, "loss": 0.851, "step": 45775 }, { "epoch": 1.7624639076034647, "grad_norm": 1.6237000226974487, "learning_rate": 6.9216239976842125e-06, "loss": 0.7393, "step": 45780 }, { "epoch": 1.7626564003849856, "grad_norm": 1.7042378187179565, "learning_rate": 6.910573553921762e-06, "loss": 0.7954, "step": 45785 }, { "epoch": 1.7628488931665063, "grad_norm": 1.3898320198059082, "learning_rate": 6.899531622570665e-06, "loss": 0.7336, "step": 45790 }, { "epoch": 1.763041385948027, "grad_norm": 0.9973713159561157, "learning_rate": 6.888498204640681e-06, "loss": 0.7636, "step": 45795 }, { "epoch": 1.7632338787295476, "grad_norm": 1.350479006767273, "learning_rate": 6.877473301140725e-06, "loss": 0.7871, "step": 45800 }, { "epoch": 1.7634263715110683, "grad_norm": 0.9988759756088257, "learning_rate": 6.866456913078967e-06, "loss": 0.7535, "step": 45805 }, { "epoch": 1.7636188642925892, "grad_norm": 1.9600189924240112, "learning_rate": 6.855449041462736e-06, "loss": 0.8776, "step": 45810 }, { "epoch": 1.7638113570741099, "grad_norm": 1.4199215173721313, "learning_rate": 6.844449687298704e-06, "loss": 0.8597, "step": 45815 }, { "epoch": 1.7640038498556305, "grad_norm": 0.9643407464027405, "learning_rate": 6.833458851592633e-06, "loss": 0.8721, "step": 45820 }, { "epoch": 1.7641963426371512, "grad_norm": 1.3059306144714355, "learning_rate": 6.822476535349576e-06, "loss": 0.8427, "step": 45825 }, { "epoch": 1.7643888354186719, "grad_norm": 1.1498661041259766, "learning_rate": 6.811502739573794e-06, "loss": 0.8962, "step": 45830 }, { "epoch": 1.7645813282001925, "grad_norm": 1.7601451873779297, "learning_rate": 6.800537465268786e-06, "loss": 0.6432, "step": 45835 }, { "epoch": 1.7647738209817132, "grad_norm": 1.7763153314590454, "learning_rate": 6.7895807134372265e-06, "loss": 0.7724, "step": 45840 }, { "epoch": 1.7649663137632339, "grad_norm": 0.8977097868919373, "learning_rate": 6.778632485081038e-06, "loss": 0.9375, "step": 45845 }, { "epoch": 1.7651588065447545, "grad_norm": 2.03190541267395, "learning_rate": 6.767692781201385e-06, "loss": 0.8175, "step": 45850 }, { "epoch": 1.7653512993262752, "grad_norm": 1.9310617446899414, "learning_rate": 6.756761602798611e-06, "loss": 0.7512, "step": 45855 }, { "epoch": 1.7655437921077959, "grad_norm": 1.507844090461731, "learning_rate": 6.7458389508723295e-06, "loss": 0.989, "step": 45860 }, { "epoch": 1.7657362848893166, "grad_norm": 1.0742762088775635, "learning_rate": 6.734924826421296e-06, "loss": 0.7757, "step": 45865 }, { "epoch": 1.7659287776708372, "grad_norm": 3.061866283416748, "learning_rate": 6.724019230443579e-06, "loss": 0.8364, "step": 45870 }, { "epoch": 1.766121270452358, "grad_norm": 1.1504889726638794, "learning_rate": 6.713122163936392e-06, "loss": 0.7237, "step": 45875 }, { "epoch": 1.7663137632338786, "grad_norm": 0.9810017347335815, "learning_rate": 6.702233627896237e-06, "loss": 0.8952, "step": 45880 }, { "epoch": 1.7665062560153995, "grad_norm": 1.719298005104065, "learning_rate": 6.691353623318752e-06, "loss": 0.9511, "step": 45885 }, { "epoch": 1.7666987487969201, "grad_norm": 1.0837730169296265, "learning_rate": 6.680482151198886e-06, "loss": 1.0137, "step": 45890 }, { "epoch": 1.7668912415784408, "grad_norm": 1.030633807182312, "learning_rate": 6.669619212530709e-06, "loss": 0.8197, "step": 45895 }, { "epoch": 1.7670837343599615, "grad_norm": 1.0519740581512451, "learning_rate": 6.658764808307638e-06, "loss": 0.7061, "step": 45900 }, { "epoch": 1.7672762271414824, "grad_norm": 2.0290040969848633, "learning_rate": 6.647918939522168e-06, "loss": 0.8013, "step": 45905 }, { "epoch": 1.767468719923003, "grad_norm": 1.4919228553771973, "learning_rate": 6.637081607166129e-06, "loss": 0.7577, "step": 45910 }, { "epoch": 1.7676612127045237, "grad_norm": 1.4189355373382568, "learning_rate": 6.626252812230494e-06, "loss": 0.6824, "step": 45915 }, { "epoch": 1.7678537054860444, "grad_norm": 1.654653787612915, "learning_rate": 6.615432555705503e-06, "loss": 0.7618, "step": 45920 }, { "epoch": 1.768046198267565, "grad_norm": 1.591550350189209, "learning_rate": 6.604620838580566e-06, "loss": 0.8461, "step": 45925 }, { "epoch": 1.7682386910490857, "grad_norm": 1.0825369358062744, "learning_rate": 6.593817661844393e-06, "loss": 0.8319, "step": 45930 }, { "epoch": 1.7684311838306064, "grad_norm": 1.893295407295227, "learning_rate": 6.5830230264848245e-06, "loss": 0.8209, "step": 45935 }, { "epoch": 1.768623676612127, "grad_norm": 0.8829330205917358, "learning_rate": 6.572236933488962e-06, "loss": 0.7583, "step": 45940 }, { "epoch": 1.7688161693936477, "grad_norm": 1.44454026222229, "learning_rate": 6.561459383843138e-06, "loss": 0.8129, "step": 45945 }, { "epoch": 1.7690086621751684, "grad_norm": 1.0190069675445557, "learning_rate": 6.5506903785328865e-06, "loss": 0.7915, "step": 45950 }, { "epoch": 1.769201154956689, "grad_norm": 1.2443015575408936, "learning_rate": 6.539929918542953e-06, "loss": 0.6906, "step": 45955 }, { "epoch": 1.7693936477382097, "grad_norm": 1.2522984743118286, "learning_rate": 6.529178004857295e-06, "loss": 0.7546, "step": 45960 }, { "epoch": 1.7695861405197304, "grad_norm": 0.4453613758087158, "learning_rate": 6.5184346384591365e-06, "loss": 0.7618, "step": 45965 }, { "epoch": 1.769778633301251, "grad_norm": 1.1776552200317383, "learning_rate": 6.507699820330859e-06, "loss": 0.7245, "step": 45970 }, { "epoch": 1.7699711260827717, "grad_norm": 0.9290987253189087, "learning_rate": 6.4969735514541216e-06, "loss": 0.8716, "step": 45975 }, { "epoch": 1.7701636188642926, "grad_norm": 1.227397084236145, "learning_rate": 6.48625583280974e-06, "loss": 0.9719, "step": 45980 }, { "epoch": 1.7703561116458133, "grad_norm": 1.5179232358932495, "learning_rate": 6.4755466653777965e-06, "loss": 0.7412, "step": 45985 }, { "epoch": 1.770548604427334, "grad_norm": 1.2842813730239868, "learning_rate": 6.464846050137552e-06, "loss": 0.8631, "step": 45990 }, { "epoch": 1.7707410972088546, "grad_norm": 1.102016806602478, "learning_rate": 6.454153988067557e-06, "loss": 0.7781, "step": 45995 }, { "epoch": 1.7709335899903753, "grad_norm": 1.2582801580429077, "learning_rate": 6.443470480145452e-06, "loss": 0.8891, "step": 46000 }, { "epoch": 1.7711260827718962, "grad_norm": 1.6584640741348267, "learning_rate": 6.432795527348246e-06, "loss": 0.845, "step": 46005 }, { "epoch": 1.7713185755534169, "grad_norm": 0.9719191789627075, "learning_rate": 6.422129130652055e-06, "loss": 0.7511, "step": 46010 }, { "epoch": 1.7715110683349375, "grad_norm": 0.968356192111969, "learning_rate": 6.411471291032245e-06, "loss": 0.8289, "step": 46015 }, { "epoch": 1.7717035611164582, "grad_norm": 1.0656431913375854, "learning_rate": 6.400822009463437e-06, "loss": 0.8338, "step": 46020 }, { "epoch": 1.7718960538979789, "grad_norm": 0.6920823454856873, "learning_rate": 6.3901812869194165e-06, "loss": 0.6625, "step": 46025 }, { "epoch": 1.7720885466794996, "grad_norm": 1.6432719230651855, "learning_rate": 6.379549124373185e-06, "loss": 0.8689, "step": 46030 }, { "epoch": 1.7722810394610202, "grad_norm": 1.784363865852356, "learning_rate": 6.368925522797042e-06, "loss": 0.8466, "step": 46035 }, { "epoch": 1.772473532242541, "grad_norm": 0.9978721141815186, "learning_rate": 6.358310483162389e-06, "loss": 0.6407, "step": 46040 }, { "epoch": 1.7726660250240616, "grad_norm": 1.793863296508789, "learning_rate": 6.347704006439936e-06, "loss": 0.7827, "step": 46045 }, { "epoch": 1.7728585178055822, "grad_norm": 0.9259586930274963, "learning_rate": 6.337106093599543e-06, "loss": 0.8073, "step": 46050 }, { "epoch": 1.773051010587103, "grad_norm": 1.971684455871582, "learning_rate": 6.326516745610367e-06, "loss": 0.7771, "step": 46055 }, { "epoch": 1.7732435033686236, "grad_norm": 1.9362154006958008, "learning_rate": 6.315935963440689e-06, "loss": 0.9013, "step": 46060 }, { "epoch": 1.7734359961501442, "grad_norm": 1.4866061210632324, "learning_rate": 6.305363748058057e-06, "loss": 0.9351, "step": 46065 }, { "epoch": 1.773628488931665, "grad_norm": 0.8735952973365784, "learning_rate": 6.2948001004292435e-06, "loss": 0.7682, "step": 46070 }, { "epoch": 1.7738209817131858, "grad_norm": 1.655439019203186, "learning_rate": 6.2842450215202295e-06, "loss": 0.9094, "step": 46075 }, { "epoch": 1.7740134744947065, "grad_norm": 1.2115354537963867, "learning_rate": 6.2736985122962e-06, "loss": 0.8336, "step": 46080 }, { "epoch": 1.7742059672762271, "grad_norm": 1.3702316284179688, "learning_rate": 6.263160573721561e-06, "loss": 0.7794, "step": 46085 }, { "epoch": 1.7743984600577478, "grad_norm": 0.8656373023986816, "learning_rate": 6.252631206759929e-06, "loss": 0.6995, "step": 46090 }, { "epoch": 1.7745909528392685, "grad_norm": 1.3100272417068481, "learning_rate": 6.242110412374158e-06, "loss": 1.0182, "step": 46095 }, { "epoch": 1.7747834456207894, "grad_norm": 0.7958429455757141, "learning_rate": 6.231598191526311e-06, "loss": 0.7499, "step": 46100 }, { "epoch": 1.77497593840231, "grad_norm": 1.7569608688354492, "learning_rate": 6.221094545177619e-06, "loss": 0.6442, "step": 46105 }, { "epoch": 1.7751684311838307, "grad_norm": 1.1067934036254883, "learning_rate": 6.2105994742886365e-06, "loss": 0.7889, "step": 46110 }, { "epoch": 1.7753609239653514, "grad_norm": 1.4211006164550781, "learning_rate": 6.200112979819017e-06, "loss": 0.7786, "step": 46115 }, { "epoch": 1.775553416746872, "grad_norm": 0.9069014191627502, "learning_rate": 6.189635062727695e-06, "loss": 0.8214, "step": 46120 }, { "epoch": 1.7757459095283927, "grad_norm": 1.2809123992919922, "learning_rate": 6.179165723972824e-06, "loss": 0.8923, "step": 46125 }, { "epoch": 1.7759384023099134, "grad_norm": 1.3749951124191284, "learning_rate": 6.16870496451174e-06, "loss": 0.7566, "step": 46130 }, { "epoch": 1.776130895091434, "grad_norm": 1.8189775943756104, "learning_rate": 6.158252785300988e-06, "loss": 0.9776, "step": 46135 }, { "epoch": 1.7763233878729547, "grad_norm": 1.2725715637207031, "learning_rate": 6.147809187296405e-06, "loss": 0.7726, "step": 46140 }, { "epoch": 1.7765158806544754, "grad_norm": 1.479418396949768, "learning_rate": 6.137374171452948e-06, "loss": 0.8255, "step": 46145 }, { "epoch": 1.776708373435996, "grad_norm": 0.9075431227684021, "learning_rate": 6.1269477387248306e-06, "loss": 0.9074, "step": 46150 }, { "epoch": 1.7769008662175168, "grad_norm": 1.1781271696090698, "learning_rate": 6.116529890065492e-06, "loss": 0.7702, "step": 46155 }, { "epoch": 1.7770933589990374, "grad_norm": 1.5959038734436035, "learning_rate": 6.1061206264275805e-06, "loss": 0.7611, "step": 46160 }, { "epoch": 1.777285851780558, "grad_norm": 1.5484418869018555, "learning_rate": 6.095719948762934e-06, "loss": 0.8188, "step": 46165 }, { "epoch": 1.7774783445620788, "grad_norm": 1.0949671268463135, "learning_rate": 6.085327858022672e-06, "loss": 0.7638, "step": 46170 }, { "epoch": 1.7776708373435997, "grad_norm": 1.3612221479415894, "learning_rate": 6.07494435515702e-06, "loss": 0.8475, "step": 46175 }, { "epoch": 1.7778633301251203, "grad_norm": 1.385720133781433, "learning_rate": 6.06456944111552e-06, "loss": 0.7755, "step": 46180 }, { "epoch": 1.778055822906641, "grad_norm": 1.639599323272705, "learning_rate": 6.05420311684689e-06, "loss": 0.7855, "step": 46185 }, { "epoch": 1.7782483156881617, "grad_norm": 1.1654934883117676, "learning_rate": 6.043845383299018e-06, "loss": 0.9169, "step": 46190 }, { "epoch": 1.7784408084696823, "grad_norm": 0.8145331740379333, "learning_rate": 6.033496241419112e-06, "loss": 0.7727, "step": 46195 }, { "epoch": 1.7786333012512032, "grad_norm": 1.7755072116851807, "learning_rate": 6.0231556921534925e-06, "loss": 0.9423, "step": 46200 }, { "epoch": 1.778825794032724, "grad_norm": 0.9639081358909607, "learning_rate": 6.012823736447748e-06, "loss": 0.7255, "step": 46205 }, { "epoch": 1.7790182868142446, "grad_norm": 1.6836614608764648, "learning_rate": 6.002500375246667e-06, "loss": 0.7516, "step": 46210 }, { "epoch": 1.7792107795957652, "grad_norm": 0.9299353957176208, "learning_rate": 5.9921856094942604e-06, "loss": 0.7697, "step": 46215 }, { "epoch": 1.779403272377286, "grad_norm": 1.0631067752838135, "learning_rate": 5.981879440133709e-06, "loss": 0.732, "step": 46220 }, { "epoch": 1.7795957651588066, "grad_norm": 1.403578519821167, "learning_rate": 5.97158186810749e-06, "loss": 0.7957, "step": 46225 }, { "epoch": 1.7797882579403272, "grad_norm": 2.0105881690979004, "learning_rate": 5.961292894357217e-06, "loss": 0.681, "step": 46230 }, { "epoch": 1.779980750721848, "grad_norm": 1.8827532529830933, "learning_rate": 5.951012519823762e-06, "loss": 0.9873, "step": 46235 }, { "epoch": 1.7801732435033686, "grad_norm": 0.4478518068790436, "learning_rate": 5.940740745447193e-06, "loss": 0.7578, "step": 46240 }, { "epoch": 1.7803657362848893, "grad_norm": 1.727774977684021, "learning_rate": 5.930477572166815e-06, "loss": 0.7855, "step": 46245 }, { "epoch": 1.78055822906641, "grad_norm": 0.8507458567619324, "learning_rate": 5.920223000921077e-06, "loss": 0.7533, "step": 46250 }, { "epoch": 1.7807507218479306, "grad_norm": 1.1110725402832031, "learning_rate": 5.909977032647762e-06, "loss": 0.9928, "step": 46255 }, { "epoch": 1.7809432146294513, "grad_norm": 1.1344168186187744, "learning_rate": 5.899739668283732e-06, "loss": 0.7474, "step": 46260 }, { "epoch": 1.781135707410972, "grad_norm": 1.6794506311416626, "learning_rate": 5.8895109087651616e-06, "loss": 0.7804, "step": 46265 }, { "epoch": 1.7813282001924928, "grad_norm": 1.0263005495071411, "learning_rate": 5.879290755027378e-06, "loss": 0.8814, "step": 46270 }, { "epoch": 1.7815206929740135, "grad_norm": 1.7495372295379639, "learning_rate": 5.869079208004991e-06, "loss": 0.9627, "step": 46275 }, { "epoch": 1.7817131857555342, "grad_norm": 1.242300271987915, "learning_rate": 5.85887626863173e-06, "loss": 0.8365, "step": 46280 }, { "epoch": 1.7819056785370548, "grad_norm": 2.084526777267456, "learning_rate": 5.848681937840605e-06, "loss": 0.8406, "step": 46285 }, { "epoch": 1.7820981713185755, "grad_norm": 0.9705522656440735, "learning_rate": 5.8384962165638265e-06, "loss": 0.7655, "step": 46290 }, { "epoch": 1.7822906641000964, "grad_norm": 1.0004565715789795, "learning_rate": 5.8283191057328045e-06, "loss": 0.7498, "step": 46295 }, { "epoch": 1.782483156881617, "grad_norm": 1.0826503038406372, "learning_rate": 5.818150606278183e-06, "loss": 0.8392, "step": 46300 }, { "epoch": 1.7826756496631377, "grad_norm": 2.7211012840270996, "learning_rate": 5.807990719129786e-06, "loss": 0.8496, "step": 46305 }, { "epoch": 1.7828681424446584, "grad_norm": 2.3229408264160156, "learning_rate": 5.797839445216657e-06, "loss": 0.7426, "step": 46310 }, { "epoch": 1.783060635226179, "grad_norm": 2.3727757930755615, "learning_rate": 5.78769678546709e-06, "loss": 0.7056, "step": 46315 }, { "epoch": 1.7832531280076998, "grad_norm": 1.557446837425232, "learning_rate": 5.777562740808562e-06, "loss": 0.8112, "step": 46320 }, { "epoch": 1.7834456207892204, "grad_norm": 1.3999435901641846, "learning_rate": 5.7674373121677226e-06, "loss": 0.7352, "step": 46325 }, { "epoch": 1.783638113570741, "grad_norm": 1.531625509262085, "learning_rate": 5.757320500470542e-06, "loss": 0.6754, "step": 46330 }, { "epoch": 1.7838306063522618, "grad_norm": 1.059308648109436, "learning_rate": 5.74721230664208e-06, "loss": 0.7609, "step": 46335 }, { "epoch": 1.7840230991337824, "grad_norm": 1.5941221714019775, "learning_rate": 5.737112731606698e-06, "loss": 0.8795, "step": 46340 }, { "epoch": 1.784215591915303, "grad_norm": 1.1354868412017822, "learning_rate": 5.7270217762879105e-06, "loss": 0.7511, "step": 46345 }, { "epoch": 1.7844080846968238, "grad_norm": 1.1096879243850708, "learning_rate": 5.716939441608504e-06, "loss": 0.6635, "step": 46350 }, { "epoch": 1.7846005774783444, "grad_norm": 2.472426176071167, "learning_rate": 5.706865728490373e-06, "loss": 0.805, "step": 46355 }, { "epoch": 1.784793070259865, "grad_norm": 1.060925841331482, "learning_rate": 5.696800637854782e-06, "loss": 0.8253, "step": 46360 }, { "epoch": 1.784985563041386, "grad_norm": 1.1766963005065918, "learning_rate": 5.686744170622049e-06, "loss": 0.6249, "step": 46365 }, { "epoch": 1.7851780558229067, "grad_norm": 1.531556248664856, "learning_rate": 5.676696327711795e-06, "loss": 0.7294, "step": 46370 }, { "epoch": 1.7853705486044273, "grad_norm": 1.9163962602615356, "learning_rate": 5.666657110042828e-06, "loss": 0.7667, "step": 46375 }, { "epoch": 1.785563041385948, "grad_norm": 1.7495805025100708, "learning_rate": 5.65662651853317e-06, "loss": 0.8198, "step": 46380 }, { "epoch": 1.7857555341674687, "grad_norm": 0.7979353666305542, "learning_rate": 5.6466045541000546e-06, "loss": 0.7781, "step": 46385 }, { "epoch": 1.7859480269489896, "grad_norm": 1.199220895767212, "learning_rate": 5.636591217659903e-06, "loss": 0.8034, "step": 46390 }, { "epoch": 1.7861405197305102, "grad_norm": 1.2027233839035034, "learning_rate": 5.626586510128384e-06, "loss": 0.7879, "step": 46395 }, { "epoch": 1.786333012512031, "grad_norm": 0.7976201772689819, "learning_rate": 5.616590432420376e-06, "loss": 0.6871, "step": 46400 }, { "epoch": 1.7865255052935516, "grad_norm": 1.0275331735610962, "learning_rate": 5.606602985449949e-06, "loss": 0.7664, "step": 46405 }, { "epoch": 1.7867179980750723, "grad_norm": 2.2883615493774414, "learning_rate": 5.59662417013036e-06, "loss": 0.957, "step": 46410 }, { "epoch": 1.786910490856593, "grad_norm": 0.8585185408592224, "learning_rate": 5.586653987374125e-06, "loss": 0.6444, "step": 46415 }, { "epoch": 1.7871029836381136, "grad_norm": 2.087956666946411, "learning_rate": 5.576692438092956e-06, "loss": 0.8373, "step": 46420 }, { "epoch": 1.7872954764196343, "grad_norm": 1.8517224788665771, "learning_rate": 5.566739523197762e-06, "loss": 0.8641, "step": 46425 }, { "epoch": 1.787487969201155, "grad_norm": 1.3989286422729492, "learning_rate": 5.556795243598678e-06, "loss": 0.8464, "step": 46430 }, { "epoch": 1.7876804619826756, "grad_norm": 1.8883378505706787, "learning_rate": 5.546859600205057e-06, "loss": 0.8401, "step": 46435 }, { "epoch": 1.7878729547641963, "grad_norm": 1.1194407939910889, "learning_rate": 5.536932593925403e-06, "loss": 0.8559, "step": 46440 }, { "epoch": 1.788065447545717, "grad_norm": 1.501118779182434, "learning_rate": 5.527014225667526e-06, "loss": 0.7472, "step": 46445 }, { "epoch": 1.7882579403272376, "grad_norm": 1.1292697191238403, "learning_rate": 5.517104496338365e-06, "loss": 0.8151, "step": 46450 }, { "epoch": 1.7884504331087583, "grad_norm": 1.5353429317474365, "learning_rate": 5.507203406844097e-06, "loss": 0.693, "step": 46455 }, { "epoch": 1.788642925890279, "grad_norm": 1.1052043437957764, "learning_rate": 5.497310958090129e-06, "loss": 0.74, "step": 46460 }, { "epoch": 1.7888354186717998, "grad_norm": 1.2323806285858154, "learning_rate": 5.487427150981061e-06, "loss": 0.7457, "step": 46465 }, { "epoch": 1.7890279114533205, "grad_norm": 1.003144383430481, "learning_rate": 5.477551986420659e-06, "loss": 0.6708, "step": 46470 }, { "epoch": 1.7892204042348412, "grad_norm": 1.9035112857818604, "learning_rate": 5.467685465312e-06, "loss": 0.7999, "step": 46475 }, { "epoch": 1.7894128970163619, "grad_norm": 2.7132394313812256, "learning_rate": 5.457827588557285e-06, "loss": 0.7343, "step": 46480 }, { "epoch": 1.7896053897978825, "grad_norm": 2.29404878616333, "learning_rate": 5.4479783570579366e-06, "loss": 0.8014, "step": 46485 }, { "epoch": 1.7897978825794034, "grad_norm": 1.4308083057403564, "learning_rate": 5.438137771714635e-06, "loss": 0.888, "step": 46490 }, { "epoch": 1.789990375360924, "grad_norm": 1.0539064407348633, "learning_rate": 5.428305833427216e-06, "loss": 0.8976, "step": 46495 }, { "epoch": 1.7901828681424448, "grad_norm": 1.6149170398712158, "learning_rate": 5.41848254309475e-06, "loss": 0.8853, "step": 46500 }, { "epoch": 1.7903753609239654, "grad_norm": 0.8334898948669434, "learning_rate": 5.408667901615494e-06, "loss": 0.6841, "step": 46505 }, { "epoch": 1.790567853705486, "grad_norm": 1.9171321392059326, "learning_rate": 5.398861909886965e-06, "loss": 0.6301, "step": 46510 }, { "epoch": 1.7907603464870068, "grad_norm": 1.2018779516220093, "learning_rate": 5.391023344927259e-06, "loss": 1.3233, "step": 46515 }, { "epoch": 1.7909528392685274, "grad_norm": 1.1932569742202759, "learning_rate": 5.381232925009128e-06, "loss": 0.7556, "step": 46520 }, { "epoch": 1.7911453320500481, "grad_norm": 0.987067699432373, "learning_rate": 5.37145115735046e-06, "loss": 0.6867, "step": 46525 }, { "epoch": 1.7913378248315688, "grad_norm": 1.0372260808944702, "learning_rate": 5.361678042845731e-06, "loss": 0.7445, "step": 46530 }, { "epoch": 1.7915303176130895, "grad_norm": 0.9253237843513489, "learning_rate": 5.351913582388635e-06, "loss": 0.7732, "step": 46535 }, { "epoch": 1.7917228103946101, "grad_norm": 0.9296650886535645, "learning_rate": 5.342157776872025e-06, "loss": 0.7397, "step": 46540 }, { "epoch": 1.7919153031761308, "grad_norm": 2.767267942428589, "learning_rate": 5.332410627188067e-06, "loss": 0.7794, "step": 46545 }, { "epoch": 1.7921077959576515, "grad_norm": 1.5188485383987427, "learning_rate": 5.322672134228024e-06, "loss": 0.7275, "step": 46550 }, { "epoch": 1.7923002887391721, "grad_norm": 1.1152452230453491, "learning_rate": 5.312942298882439e-06, "loss": 0.7639, "step": 46555 }, { "epoch": 1.792492781520693, "grad_norm": 1.1599907875061035, "learning_rate": 5.303221122041036e-06, "loss": 0.7895, "step": 46560 }, { "epoch": 1.7926852743022137, "grad_norm": 1.0555464029312134, "learning_rate": 5.293508604592768e-06, "loss": 0.8518, "step": 46565 }, { "epoch": 1.7928777670837344, "grad_norm": 1.0384188890457153, "learning_rate": 5.283804747425747e-06, "loss": 0.7783, "step": 46570 }, { "epoch": 1.793070259865255, "grad_norm": 1.148019790649414, "learning_rate": 5.274109551427342e-06, "loss": 0.8909, "step": 46575 }, { "epoch": 1.7932627526467757, "grad_norm": 2.0219950675964355, "learning_rate": 5.264423017484122e-06, "loss": 0.7428, "step": 46580 }, { "epoch": 1.7934552454282966, "grad_norm": 1.3461076021194458, "learning_rate": 5.254745146481843e-06, "loss": 0.675, "step": 46585 }, { "epoch": 1.7936477382098173, "grad_norm": 1.6037160158157349, "learning_rate": 5.2450759393055104e-06, "loss": 0.8687, "step": 46590 }, { "epoch": 1.793840230991338, "grad_norm": 0.929437518119812, "learning_rate": 5.235415396839283e-06, "loss": 0.9081, "step": 46595 }, { "epoch": 1.7940327237728586, "grad_norm": 0.9498231410980225, "learning_rate": 5.225763519966542e-06, "loss": 0.7378, "step": 46600 }, { "epoch": 1.7942252165543793, "grad_norm": 1.3678570985794067, "learning_rate": 5.216120309569917e-06, "loss": 1.001, "step": 46605 }, { "epoch": 1.7944177093359, "grad_norm": 1.7113560438156128, "learning_rate": 5.206485766531222e-06, "loss": 0.8138, "step": 46610 }, { "epoch": 1.7946102021174206, "grad_norm": 1.4284687042236328, "learning_rate": 5.19685989173142e-06, "loss": 0.7692, "step": 46615 }, { "epoch": 1.7948026948989413, "grad_norm": 1.2268130779266357, "learning_rate": 5.1872426860507975e-06, "loss": 0.7408, "step": 46620 }, { "epoch": 1.794995187680462, "grad_norm": 1.4930412769317627, "learning_rate": 5.177634150368738e-06, "loss": 0.9012, "step": 46625 }, { "epoch": 1.7951876804619826, "grad_norm": 1.3684518337249756, "learning_rate": 5.1680342855638945e-06, "loss": 0.927, "step": 46630 }, { "epoch": 1.7953801732435033, "grad_norm": 1.5305689573287964, "learning_rate": 5.158443092514109e-06, "loss": 0.8608, "step": 46635 }, { "epoch": 1.795572666025024, "grad_norm": 1.6696339845657349, "learning_rate": 5.148860572096459e-06, "loss": 0.8078, "step": 46640 }, { "epoch": 1.7957651588065446, "grad_norm": 1.3109546899795532, "learning_rate": 5.139286725187143e-06, "loss": 0.7532, "step": 46645 }, { "epoch": 1.7959576515880653, "grad_norm": 1.0849995613098145, "learning_rate": 5.129721552661681e-06, "loss": 0.9083, "step": 46650 }, { "epoch": 1.796150144369586, "grad_norm": 2.3269553184509277, "learning_rate": 5.120165055394721e-06, "loss": 0.7511, "step": 46655 }, { "epoch": 1.7963426371511069, "grad_norm": 1.75252103805542, "learning_rate": 5.110617234260151e-06, "loss": 0.7646, "step": 46660 }, { "epoch": 1.7965351299326275, "grad_norm": 1.0314127206802368, "learning_rate": 5.1010780901310395e-06, "loss": 0.6695, "step": 46665 }, { "epoch": 1.7967276227141482, "grad_norm": 1.0665658712387085, "learning_rate": 5.091547623879711e-06, "loss": 0.8726, "step": 46670 }, { "epoch": 1.7969201154956689, "grad_norm": 1.8659719228744507, "learning_rate": 5.082025836377624e-06, "loss": 0.8587, "step": 46675 }, { "epoch": 1.7971126082771898, "grad_norm": 1.0363880395889282, "learning_rate": 5.072512728495493e-06, "loss": 0.7135, "step": 46680 }, { "epoch": 1.7973051010587104, "grad_norm": 1.0198173522949219, "learning_rate": 5.063008301103245e-06, "loss": 0.8571, "step": 46685 }, { "epoch": 1.7974975938402311, "grad_norm": 1.313674807548523, "learning_rate": 5.0535125550699834e-06, "loss": 0.8776, "step": 46690 }, { "epoch": 1.7976900866217518, "grad_norm": 0.9799014925956726, "learning_rate": 5.044025491264049e-06, "loss": 0.8095, "step": 46695 }, { "epoch": 1.7978825794032725, "grad_norm": 0.9720066785812378, "learning_rate": 5.0345471105529345e-06, "loss": 0.7772, "step": 46700 }, { "epoch": 1.7980750721847931, "grad_norm": 1.2402182817459106, "learning_rate": 5.025077413803425e-06, "loss": 0.7378, "step": 46705 }, { "epoch": 1.7982675649663138, "grad_norm": 1.4397085905075073, "learning_rate": 5.015616401881418e-06, "loss": 0.8355, "step": 46710 }, { "epoch": 1.7984600577478345, "grad_norm": 1.2105292081832886, "learning_rate": 5.0061640756520754e-06, "loss": 0.8271, "step": 46715 }, { "epoch": 1.7986525505293551, "grad_norm": 1.2763471603393555, "learning_rate": 4.996720435979763e-06, "loss": 0.7776, "step": 46720 }, { "epoch": 1.7988450433108758, "grad_norm": 1.6655856370925903, "learning_rate": 4.987285483728033e-06, "loss": 0.7638, "step": 46725 }, { "epoch": 1.7990375360923965, "grad_norm": 1.3366689682006836, "learning_rate": 4.977859219759617e-06, "loss": 0.6957, "step": 46730 }, { "epoch": 1.7992300288739171, "grad_norm": 0.9900131225585938, "learning_rate": 4.968441644936539e-06, "loss": 0.7898, "step": 46735 }, { "epoch": 1.7994225216554378, "grad_norm": 1.191714882850647, "learning_rate": 4.95903276011993e-06, "loss": 0.8838, "step": 46740 }, { "epoch": 1.7996150144369585, "grad_norm": 1.0521550178527832, "learning_rate": 4.949632566170181e-06, "loss": 0.7767, "step": 46745 }, { "epoch": 1.7998075072184792, "grad_norm": 1.6969459056854248, "learning_rate": 4.940241063946893e-06, "loss": 0.8962, "step": 46750 }, { "epoch": 1.8, "grad_norm": 1.4018580913543701, "learning_rate": 4.930858254308856e-06, "loss": 0.8324, "step": 46755 }, { "epoch": 1.8001924927815207, "grad_norm": 1.4890011548995972, "learning_rate": 4.921484138114029e-06, "loss": 0.6562, "step": 46760 }, { "epoch": 1.8003849855630414, "grad_norm": 1.6725586652755737, "learning_rate": 4.912118716219672e-06, "loss": 0.7311, "step": 46765 }, { "epoch": 1.800577478344562, "grad_norm": 1.5450921058654785, "learning_rate": 4.9027619894821404e-06, "loss": 0.8046, "step": 46770 }, { "epoch": 1.8007699711260827, "grad_norm": 1.5963780879974365, "learning_rate": 4.893413958757065e-06, "loss": 0.7562, "step": 46775 }, { "epoch": 1.8009624639076036, "grad_norm": 2.59673810005188, "learning_rate": 4.88407462489926e-06, "loss": 0.7906, "step": 46780 }, { "epoch": 1.8011549566891243, "grad_norm": 1.0897094011306763, "learning_rate": 4.874743988762753e-06, "loss": 0.6483, "step": 46785 }, { "epoch": 1.801347449470645, "grad_norm": 1.5644094944000244, "learning_rate": 4.865422051200752e-06, "loss": 0.668, "step": 46790 }, { "epoch": 1.8015399422521656, "grad_norm": 1.7178467512130737, "learning_rate": 4.856108813065696e-06, "loss": 0.7188, "step": 46795 }, { "epoch": 1.8017324350336863, "grad_norm": 1.8276166915893555, "learning_rate": 4.846804275209216e-06, "loss": 0.7897, "step": 46800 }, { "epoch": 1.801924927815207, "grad_norm": 1.107142448425293, "learning_rate": 4.8375084384821635e-06, "loss": 0.6513, "step": 46805 }, { "epoch": 1.8021174205967276, "grad_norm": 1.1322238445281982, "learning_rate": 4.82822130373457e-06, "loss": 0.8737, "step": 46810 }, { "epoch": 1.8023099133782483, "grad_norm": 1.5215245485305786, "learning_rate": 4.818942871815679e-06, "loss": 0.7255, "step": 46815 }, { "epoch": 1.802502406159769, "grad_norm": 1.0915231704711914, "learning_rate": 4.809673143573934e-06, "loss": 0.6991, "step": 46820 }, { "epoch": 1.8026948989412896, "grad_norm": 1.1544078588485718, "learning_rate": 4.800412119857012e-06, "loss": 0.8788, "step": 46825 }, { "epoch": 1.8028873917228103, "grad_norm": 1.521331787109375, "learning_rate": 4.791159801511769e-06, "loss": 0.8318, "step": 46830 }, { "epoch": 1.803079884504331, "grad_norm": 1.9487007856369019, "learning_rate": 4.781916189384239e-06, "loss": 0.7977, "step": 46835 }, { "epoch": 1.8032723772858517, "grad_norm": 0.9875978827476501, "learning_rate": 4.772681284319736e-06, "loss": 0.7773, "step": 46840 }, { "epoch": 1.8034648700673723, "grad_norm": 0.9986359477043152, "learning_rate": 4.7634550871626935e-06, "loss": 0.7418, "step": 46845 }, { "epoch": 1.8036573628488932, "grad_norm": 0.9883870482444763, "learning_rate": 4.754237598756806e-06, "loss": 0.6378, "step": 46850 }, { "epoch": 1.803849855630414, "grad_norm": 1.2948468923568726, "learning_rate": 4.745028819944941e-06, "loss": 0.6721, "step": 46855 }, { "epoch": 1.8040423484119346, "grad_norm": 1.422836422920227, "learning_rate": 4.735828751569194e-06, "loss": 0.77, "step": 46860 }, { "epoch": 1.8042348411934552, "grad_norm": 0.956969678401947, "learning_rate": 4.726637394470812e-06, "loss": 0.7077, "step": 46865 }, { "epoch": 1.804427333974976, "grad_norm": 1.3407295942306519, "learning_rate": 4.717454749490336e-06, "loss": 0.7287, "step": 46870 }, { "epoch": 1.8046198267564968, "grad_norm": 1.0364344120025635, "learning_rate": 4.7082808174674255e-06, "loss": 0.8322, "step": 46875 }, { "epoch": 1.8048123195380175, "grad_norm": 1.2396444082260132, "learning_rate": 4.6991155992409885e-06, "loss": 0.6264, "step": 46880 }, { "epoch": 1.8050048123195381, "grad_norm": 1.7426707744598389, "learning_rate": 4.6899590956491105e-06, "loss": 0.6881, "step": 46885 }, { "epoch": 1.8051973051010588, "grad_norm": 1.3045077323913574, "learning_rate": 4.68081130752912e-06, "loss": 0.7758, "step": 46890 }, { "epoch": 1.8053897978825795, "grad_norm": 1.462531566619873, "learning_rate": 4.671672235717494e-06, "loss": 0.9664, "step": 46895 }, { "epoch": 1.8055822906641001, "grad_norm": 1.063500165939331, "learning_rate": 4.662541881049942e-06, "loss": 0.8255, "step": 46900 }, { "epoch": 1.8057747834456208, "grad_norm": 1.0657175779342651, "learning_rate": 4.653420244361395e-06, "loss": 0.8206, "step": 46905 }, { "epoch": 1.8059672762271415, "grad_norm": 1.0056036710739136, "learning_rate": 4.644307326485941e-06, "loss": 0.8588, "step": 46910 }, { "epoch": 1.8061597690086622, "grad_norm": 1.1666234731674194, "learning_rate": 4.635203128256927e-06, "loss": 0.5983, "step": 46915 }, { "epoch": 1.8063522617901828, "grad_norm": 1.6504344940185547, "learning_rate": 4.626107650506839e-06, "loss": 0.7481, "step": 46920 }, { "epoch": 1.8065447545717035, "grad_norm": 1.4112893342971802, "learning_rate": 4.617020894067403e-06, "loss": 0.9829, "step": 46925 }, { "epoch": 1.8067372473532242, "grad_norm": 1.058632493019104, "learning_rate": 4.607942859769565e-06, "loss": 0.7069, "step": 46930 }, { "epoch": 1.8069297401347448, "grad_norm": 1.1201543807983398, "learning_rate": 4.598873548443427e-06, "loss": 0.8061, "step": 46935 }, { "epoch": 1.8071222329162655, "grad_norm": 0.8915508389472961, "learning_rate": 4.589812960918338e-06, "loss": 0.82, "step": 46940 }, { "epoch": 1.8073147256977862, "grad_norm": 2.371730327606201, "learning_rate": 4.580761098022835e-06, "loss": 0.8294, "step": 46945 }, { "epoch": 1.807507218479307, "grad_norm": 1.3790137767791748, "learning_rate": 4.571717960584598e-06, "loss": 0.7928, "step": 46950 }, { "epoch": 1.8076997112608277, "grad_norm": 1.2915151119232178, "learning_rate": 4.562683549430624e-06, "loss": 0.7858, "step": 46955 }, { "epoch": 1.8078922040423484, "grad_norm": 1.653926968574524, "learning_rate": 4.553657865387018e-06, "loss": 0.8588, "step": 46960 }, { "epoch": 1.808084696823869, "grad_norm": 1.2625577449798584, "learning_rate": 4.54464090927913e-06, "loss": 0.8673, "step": 46965 }, { "epoch": 1.8082771896053897, "grad_norm": 0.9362401962280273, "learning_rate": 4.5356326819314894e-06, "loss": 0.7839, "step": 46970 }, { "epoch": 1.8084696823869106, "grad_norm": 1.078352451324463, "learning_rate": 4.526633184167861e-06, "loss": 0.8862, "step": 46975 }, { "epoch": 1.8086621751684313, "grad_norm": 1.3022172451019287, "learning_rate": 4.517642416811152e-06, "loss": 0.8553, "step": 46980 }, { "epoch": 1.808854667949952, "grad_norm": 1.2438225746154785, "learning_rate": 4.50866038068356e-06, "loss": 0.7513, "step": 46985 }, { "epoch": 1.8090471607314726, "grad_norm": 0.9540335536003113, "learning_rate": 4.4996870766063845e-06, "loss": 0.7287, "step": 46990 }, { "epoch": 1.8092396535129933, "grad_norm": 1.5209485292434692, "learning_rate": 4.490722505400191e-06, "loss": 0.8749, "step": 46995 }, { "epoch": 1.809432146294514, "grad_norm": 1.1515183448791504, "learning_rate": 4.481766667884757e-06, "loss": 0.8147, "step": 47000 }, { "epoch": 1.8096246390760347, "grad_norm": 1.2854846715927124, "learning_rate": 4.472819564878994e-06, "loss": 0.8026, "step": 47005 }, { "epoch": 1.8098171318575553, "grad_norm": 1.7539061307907104, "learning_rate": 4.4638811972010696e-06, "loss": 0.8365, "step": 47010 }, { "epoch": 1.810009624639076, "grad_norm": 1.8201215267181396, "learning_rate": 4.454951565668341e-06, "loss": 0.9819, "step": 47015 }, { "epoch": 1.8102021174205967, "grad_norm": 1.3828089237213135, "learning_rate": 4.4460306710973786e-06, "loss": 0.7404, "step": 47020 }, { "epoch": 1.8103946102021173, "grad_norm": 2.6402223110198975, "learning_rate": 4.437118514303895e-06, "loss": 0.887, "step": 47025 }, { "epoch": 1.810587102983638, "grad_norm": 1.3061926364898682, "learning_rate": 4.428215096102906e-06, "loss": 0.8308, "step": 47030 }, { "epoch": 1.8107795957651587, "grad_norm": 1.3701341152191162, "learning_rate": 4.419320417308526e-06, "loss": 0.7879, "step": 47035 }, { "epoch": 1.8109720885466793, "grad_norm": 1.355481505393982, "learning_rate": 4.410434478734127e-06, "loss": 0.8011, "step": 47040 }, { "epoch": 1.8111645813282002, "grad_norm": 1.3704595565795898, "learning_rate": 4.401557281192281e-06, "loss": 0.8246, "step": 47045 }, { "epoch": 1.811357074109721, "grad_norm": 1.3024649620056152, "learning_rate": 4.39268882549474e-06, "loss": 0.8316, "step": 47050 }, { "epoch": 1.8115495668912416, "grad_norm": 1.1199791431427002, "learning_rate": 4.383829112452454e-06, "loss": 0.8883, "step": 47055 }, { "epoch": 1.8117420596727623, "grad_norm": 1.2826857566833496, "learning_rate": 4.374978142875608e-06, "loss": 0.7167, "step": 47060 }, { "epoch": 1.811934552454283, "grad_norm": 0.9372978806495667, "learning_rate": 4.366135917573555e-06, "loss": 0.7517, "step": 47065 }, { "epoch": 1.8121270452358038, "grad_norm": 1.2647088766098022, "learning_rate": 4.357302437354848e-06, "loss": 0.6803, "step": 47070 }, { "epoch": 1.8123195380173245, "grad_norm": 0.9146122932434082, "learning_rate": 4.348477703027254e-06, "loss": 0.8121, "step": 47075 }, { "epoch": 1.8125120307988452, "grad_norm": 1.3470911979675293, "learning_rate": 4.3396617153977585e-06, "loss": 0.7432, "step": 47080 }, { "epoch": 1.8127045235803658, "grad_norm": 1.119855284690857, "learning_rate": 4.330854475272483e-06, "loss": 0.694, "step": 47085 }, { "epoch": 1.8128970163618865, "grad_norm": 0.8852443099021912, "learning_rate": 4.322055983456841e-06, "loss": 0.8118, "step": 47090 }, { "epoch": 1.8130895091434072, "grad_norm": 1.6684072017669678, "learning_rate": 4.313266240755354e-06, "loss": 0.9023, "step": 47095 }, { "epoch": 1.8132820019249278, "grad_norm": 1.1562302112579346, "learning_rate": 4.304485247971812e-06, "loss": 0.7498, "step": 47100 }, { "epoch": 1.8134744947064485, "grad_norm": 1.1458892822265625, "learning_rate": 4.2957130059091635e-06, "loss": 0.7468, "step": 47105 }, { "epoch": 1.8136669874879692, "grad_norm": 1.3492836952209473, "learning_rate": 4.286949515369587e-06, "loss": 0.7202, "step": 47110 }, { "epoch": 1.8138594802694898, "grad_norm": 1.3468396663665771, "learning_rate": 4.278194777154432e-06, "loss": 0.9034, "step": 47115 }, { "epoch": 1.8140519730510105, "grad_norm": 1.1316415071487427, "learning_rate": 4.269448792064257e-06, "loss": 0.8748, "step": 47120 }, { "epoch": 1.8142444658325312, "grad_norm": 2.3300271034240723, "learning_rate": 4.260711560898833e-06, "loss": 0.6797, "step": 47125 }, { "epoch": 1.8144369586140519, "grad_norm": 1.528640866279602, "learning_rate": 4.251983084457134e-06, "loss": 0.8478, "step": 47130 }, { "epoch": 1.8146294513955725, "grad_norm": 1.0944819450378418, "learning_rate": 4.2432633635373084e-06, "loss": 0.7424, "step": 47135 }, { "epoch": 1.8148219441770932, "grad_norm": 1.4346030950546265, "learning_rate": 4.234552398936709e-06, "loss": 0.7892, "step": 47140 }, { "epoch": 1.815014436958614, "grad_norm": 1.987533450126648, "learning_rate": 4.225850191451908e-06, "loss": 0.728, "step": 47145 }, { "epoch": 1.8152069297401348, "grad_norm": 1.4819614887237549, "learning_rate": 4.217156741878658e-06, "loss": 0.8437, "step": 47150 }, { "epoch": 1.8153994225216554, "grad_norm": 1.6680911779403687, "learning_rate": 4.2084720510119355e-06, "loss": 0.9606, "step": 47155 }, { "epoch": 1.815591915303176, "grad_norm": 1.2909523248672485, "learning_rate": 4.19979611964586e-06, "loss": 0.8903, "step": 47160 }, { "epoch": 1.815784408084697, "grad_norm": 1.6894687414169312, "learning_rate": 4.19112894857383e-06, "loss": 0.8166, "step": 47165 }, { "epoch": 1.8159769008662177, "grad_norm": 1.0353437662124634, "learning_rate": 4.1824705385883786e-06, "loss": 0.6971, "step": 47170 }, { "epoch": 1.8161693936477383, "grad_norm": 1.8149842023849487, "learning_rate": 4.17382089048125e-06, "loss": 0.8447, "step": 47175 }, { "epoch": 1.816361886429259, "grad_norm": 1.26813805103302, "learning_rate": 4.165180005043423e-06, "loss": 0.7669, "step": 47180 }, { "epoch": 1.8165543792107797, "grad_norm": 1.110489010810852, "learning_rate": 4.156547883065043e-06, "loss": 0.7015, "step": 47185 }, { "epoch": 1.8167468719923003, "grad_norm": 1.940280795097351, "learning_rate": 4.147924525335434e-06, "loss": 0.7819, "step": 47190 }, { "epoch": 1.816939364773821, "grad_norm": 0.8619933724403381, "learning_rate": 4.139309932643187e-06, "loss": 0.9221, "step": 47195 }, { "epoch": 1.8171318575553417, "grad_norm": 1.2719132900238037, "learning_rate": 4.130704105776018e-06, "loss": 0.7197, "step": 47200 }, { "epoch": 1.8173243503368623, "grad_norm": 1.1760082244873047, "learning_rate": 4.1221070455208735e-06, "loss": 0.8961, "step": 47205 }, { "epoch": 1.817516843118383, "grad_norm": 1.3913214206695557, "learning_rate": 4.113518752663914e-06, "loss": 0.787, "step": 47210 }, { "epoch": 1.8177093358999037, "grad_norm": 1.941649079322815, "learning_rate": 4.104939227990478e-06, "loss": 0.918, "step": 47215 }, { "epoch": 1.8179018286814244, "grad_norm": 0.8367112278938293, "learning_rate": 4.096368472285106e-06, "loss": 0.636, "step": 47220 }, { "epoch": 1.818094321462945, "grad_norm": 1.0153110027313232, "learning_rate": 4.0878064863315356e-06, "loss": 0.8449, "step": 47225 }, { "epoch": 1.8182868142444657, "grad_norm": 1.1490223407745361, "learning_rate": 4.079253270912687e-06, "loss": 0.7839, "step": 47230 }, { "epoch": 1.8184793070259864, "grad_norm": 1.2465404272079468, "learning_rate": 4.070708826810721e-06, "loss": 0.6734, "step": 47235 }, { "epoch": 1.8186717998075073, "grad_norm": 1.5955170392990112, "learning_rate": 4.06217315480697e-06, "loss": 0.868, "step": 47240 }, { "epoch": 1.818864292589028, "grad_norm": 2.5519890785217285, "learning_rate": 4.05364625568192e-06, "loss": 0.6769, "step": 47245 }, { "epoch": 1.8190567853705486, "grad_norm": 0.968748152256012, "learning_rate": 4.04512813021537e-06, "loss": 0.7728, "step": 47250 }, { "epoch": 1.8192492781520693, "grad_norm": 1.381900668144226, "learning_rate": 4.0366187791861855e-06, "loss": 0.6947, "step": 47255 }, { "epoch": 1.81944177093359, "grad_norm": 0.9397858381271362, "learning_rate": 4.028118203372521e-06, "loss": 0.7413, "step": 47260 }, { "epoch": 1.8196342637151108, "grad_norm": 1.2155348062515259, "learning_rate": 4.019626403551691e-06, "loss": 0.8101, "step": 47265 }, { "epoch": 1.8198267564966315, "grad_norm": 1.4671454429626465, "learning_rate": 4.011143380500226e-06, "loss": 0.6367, "step": 47270 }, { "epoch": 1.8200192492781522, "grad_norm": 1.2641961574554443, "learning_rate": 4.002669134993808e-06, "loss": 0.8871, "step": 47275 }, { "epoch": 1.8202117420596728, "grad_norm": 1.482218861579895, "learning_rate": 3.994203667807395e-06, "loss": 0.7126, "step": 47280 }, { "epoch": 1.8204042348411935, "grad_norm": 1.0232921838760376, "learning_rate": 3.985746979715066e-06, "loss": 0.7237, "step": 47285 }, { "epoch": 1.8205967276227142, "grad_norm": 2.7515757083892822, "learning_rate": 3.977299071490148e-06, "loss": 0.7837, "step": 47290 }, { "epoch": 1.8207892204042349, "grad_norm": 1.110560417175293, "learning_rate": 3.968859943905124e-06, "loss": 0.7886, "step": 47295 }, { "epoch": 1.8209817131857555, "grad_norm": 1.8675193786621094, "learning_rate": 3.96042959773173e-06, "loss": 0.6506, "step": 47300 }, { "epoch": 1.8211742059672762, "grad_norm": 1.233778953552246, "learning_rate": 3.952008033740817e-06, "loss": 0.7465, "step": 47305 }, { "epoch": 1.8213666987487969, "grad_norm": 1.6183221340179443, "learning_rate": 3.943595252702537e-06, "loss": 0.7255, "step": 47310 }, { "epoch": 1.8215591915303175, "grad_norm": 1.736594796180725, "learning_rate": 3.935191255386139e-06, "loss": 0.6987, "step": 47315 }, { "epoch": 1.8217516843118382, "grad_norm": 1.2381728887557983, "learning_rate": 3.926796042560133e-06, "loss": 0.9369, "step": 47320 }, { "epoch": 1.8219441770933589, "grad_norm": 0.9554552435874939, "learning_rate": 3.9184096149922025e-06, "loss": 0.8767, "step": 47325 }, { "epoch": 1.8221366698748795, "grad_norm": 1.7278703451156616, "learning_rate": 3.9100319734492465e-06, "loss": 0.8719, "step": 47330 }, { "epoch": 1.8223291626564004, "grad_norm": 0.6862698197364807, "learning_rate": 3.901663118697308e-06, "loss": 0.8551, "step": 47335 }, { "epoch": 1.822521655437921, "grad_norm": 1.7460417747497559, "learning_rate": 3.893303051501685e-06, "loss": 0.7529, "step": 47340 }, { "epoch": 1.8227141482194418, "grad_norm": 1.0978227853775024, "learning_rate": 3.884951772626854e-06, "loss": 0.6831, "step": 47345 }, { "epoch": 1.8229066410009624, "grad_norm": 1.4301860332489014, "learning_rate": 3.8766092828364856e-06, "loss": 0.7509, "step": 47350 }, { "epoch": 1.8230991337824831, "grad_norm": 1.623776912689209, "learning_rate": 3.868275582893444e-06, "loss": 0.891, "step": 47355 }, { "epoch": 1.823291626564004, "grad_norm": 0.9064698219299316, "learning_rate": 3.859950673559765e-06, "loss": 0.923, "step": 47360 }, { "epoch": 1.8234841193455247, "grad_norm": 1.351654052734375, "learning_rate": 3.8516345555967396e-06, "loss": 0.8076, "step": 47365 }, { "epoch": 1.8236766121270453, "grad_norm": 1.3697307109832764, "learning_rate": 3.8433272297648035e-06, "loss": 0.8001, "step": 47370 }, { "epoch": 1.823869104908566, "grad_norm": 1.3666245937347412, "learning_rate": 3.835028696823628e-06, "loss": 0.7855, "step": 47375 }, { "epoch": 1.8240615976900867, "grad_norm": 1.0923709869384766, "learning_rate": 3.826738957532017e-06, "loss": 0.8134, "step": 47380 }, { "epoch": 1.8242540904716074, "grad_norm": 1.4689079523086548, "learning_rate": 3.818458012648063e-06, "loss": 0.8976, "step": 47385 }, { "epoch": 1.824446583253128, "grad_norm": 0.9157153367996216, "learning_rate": 3.810185862928972e-06, "loss": 0.7001, "step": 47390 }, { "epoch": 1.8246390760346487, "grad_norm": 1.6523871421813965, "learning_rate": 3.801922509131184e-06, "loss": 0.7818, "step": 47395 }, { "epoch": 1.8248315688161694, "grad_norm": 1.4929468631744385, "learning_rate": 3.793667952010327e-06, "loss": 0.7585, "step": 47400 }, { "epoch": 1.82502406159769, "grad_norm": 0.9369055032730103, "learning_rate": 3.7854221923212307e-06, "loss": 0.6929, "step": 47405 }, { "epoch": 1.8252165543792107, "grad_norm": 2.0922114849090576, "learning_rate": 3.7771852308178925e-06, "loss": 0.8816, "step": 47410 }, { "epoch": 1.8254090471607314, "grad_norm": 1.3968013525009155, "learning_rate": 3.7689570682535758e-06, "loss": 0.8213, "step": 47415 }, { "epoch": 1.825601539942252, "grad_norm": 1.271930456161499, "learning_rate": 3.7607377053806567e-06, "loss": 0.7507, "step": 47420 }, { "epoch": 1.8257940327237727, "grad_norm": 1.283036231994629, "learning_rate": 3.752527142950735e-06, "loss": 0.7738, "step": 47425 }, { "epoch": 1.8259865255052934, "grad_norm": 1.001495122909546, "learning_rate": 3.7443253817146306e-06, "loss": 0.8228, "step": 47430 }, { "epoch": 1.8261790182868143, "grad_norm": 1.5130256414413452, "learning_rate": 3.736132422422345e-06, "loss": 0.8288, "step": 47435 }, { "epoch": 1.826371511068335, "grad_norm": 0.9629761576652527, "learning_rate": 3.7279482658230445e-06, "loss": 0.819, "step": 47440 }, { "epoch": 1.8265640038498556, "grad_norm": 1.0789672136306763, "learning_rate": 3.7197729126651317e-06, "loss": 0.8386, "step": 47445 }, { "epoch": 1.8267564966313763, "grad_norm": 1.1328613758087158, "learning_rate": 3.7116063636961964e-06, "loss": 0.916, "step": 47450 }, { "epoch": 1.826948989412897, "grad_norm": 0.8800186514854431, "learning_rate": 3.703448619662997e-06, "loss": 0.7408, "step": 47455 }, { "epoch": 1.8271414821944179, "grad_norm": 1.6841456890106201, "learning_rate": 3.695299681311537e-06, "loss": 0.8142, "step": 47460 }, { "epoch": 1.8273339749759385, "grad_norm": 1.3652926683425903, "learning_rate": 3.6871595493869316e-06, "loss": 0.8027, "step": 47465 }, { "epoch": 1.8275264677574592, "grad_norm": 1.3112471103668213, "learning_rate": 3.679028224633596e-06, "loss": 0.9375, "step": 47470 }, { "epoch": 1.8277189605389799, "grad_norm": 1.2834017276763916, "learning_rate": 3.670905707795047e-06, "loss": 0.8115, "step": 47475 }, { "epoch": 1.8279114533205005, "grad_norm": 1.075247049331665, "learning_rate": 3.6627919996140457e-06, "loss": 0.8848, "step": 47480 }, { "epoch": 1.8281039461020212, "grad_norm": 1.0291975736618042, "learning_rate": 3.6546871008325433e-06, "loss": 0.9769, "step": 47485 }, { "epoch": 1.8282964388835419, "grad_norm": 1.429732084274292, "learning_rate": 3.646591012191691e-06, "loss": 0.6725, "step": 47490 }, { "epoch": 1.8284889316650625, "grad_norm": 1.1338505744934082, "learning_rate": 3.6385037344317862e-06, "loss": 0.8288, "step": 47495 }, { "epoch": 1.8286814244465832, "grad_norm": 1.1439357995986938, "learning_rate": 3.6304252682924036e-06, "loss": 0.8643, "step": 47500 }, { "epoch": 1.8288739172281039, "grad_norm": 2.053274631500244, "learning_rate": 3.6223556145122186e-06, "loss": 0.9137, "step": 47505 }, { "epoch": 1.8290664100096246, "grad_norm": 0.9340258836746216, "learning_rate": 3.6142947738291744e-06, "loss": 0.702, "step": 47510 }, { "epoch": 1.8292589027911452, "grad_norm": 2.1745176315307617, "learning_rate": 3.6062427469803705e-06, "loss": 0.8425, "step": 47515 }, { "epoch": 1.829451395572666, "grad_norm": 1.1325429677963257, "learning_rate": 3.5981995347021403e-06, "loss": 0.8262, "step": 47520 }, { "epoch": 1.8296438883541866, "grad_norm": 1.0720462799072266, "learning_rate": 3.5901651377299285e-06, "loss": 0.8925, "step": 47525 }, { "epoch": 1.8298363811357075, "grad_norm": 0.8257774114608765, "learning_rate": 3.5821395567984805e-06, "loss": 0.7577, "step": 47530 }, { "epoch": 1.8300288739172281, "grad_norm": 1.3935045003890991, "learning_rate": 3.5741227926416545e-06, "loss": 0.7907, "step": 47535 }, { "epoch": 1.8302213666987488, "grad_norm": 1.0141493082046509, "learning_rate": 3.56611484599253e-06, "loss": 0.6785, "step": 47540 }, { "epoch": 1.8304138594802695, "grad_norm": 1.2392221689224243, "learning_rate": 3.558115717583388e-06, "loss": 0.9234, "step": 47545 }, { "epoch": 1.8306063522617901, "grad_norm": 1.5585402250289917, "learning_rate": 3.5501254081457104e-06, "loss": 0.9621, "step": 47550 }, { "epoch": 1.830798845043311, "grad_norm": 1.6715527772903442, "learning_rate": 3.5421439184101234e-06, "loss": 0.869, "step": 47555 }, { "epoch": 1.8309913378248317, "grad_norm": 0.8488251566886902, "learning_rate": 3.5341712491065103e-06, "loss": 0.8197, "step": 47560 }, { "epoch": 1.8311838306063524, "grad_norm": 2.0500576496124268, "learning_rate": 3.52620740096391e-06, "loss": 0.9029, "step": 47565 }, { "epoch": 1.831376323387873, "grad_norm": 2.1723897457122803, "learning_rate": 3.518252374710551e-06, "loss": 0.7979, "step": 47570 }, { "epoch": 1.8315688161693937, "grad_norm": 0.6698965430259705, "learning_rate": 3.510306171073896e-06, "loss": 0.8421, "step": 47575 }, { "epoch": 1.8317613089509144, "grad_norm": 1.0468194484710693, "learning_rate": 3.5023687907805415e-06, "loss": 0.7916, "step": 47580 }, { "epoch": 1.831953801732435, "grad_norm": 1.206809163093567, "learning_rate": 3.4944402345563177e-06, "loss": 0.7774, "step": 47585 }, { "epoch": 1.8321462945139557, "grad_norm": 1.2247503995895386, "learning_rate": 3.486520503126256e-06, "loss": 0.8677, "step": 47590 }, { "epoch": 1.8323387872954764, "grad_norm": 1.510204553604126, "learning_rate": 3.4786095972145547e-06, "loss": 0.8027, "step": 47595 }, { "epoch": 1.832531280076997, "grad_norm": 0.9858173131942749, "learning_rate": 3.4707075175445915e-06, "loss": 0.7336, "step": 47600 }, { "epoch": 1.8327237728585177, "grad_norm": 1.1785179376602173, "learning_rate": 3.462814264838998e-06, "loss": 0.7863, "step": 47605 }, { "epoch": 1.8329162656400384, "grad_norm": 1.1377897262573242, "learning_rate": 3.4549298398195316e-06, "loss": 0.7096, "step": 47610 }, { "epoch": 1.833108758421559, "grad_norm": 1.5963387489318848, "learning_rate": 3.4470542432071704e-06, "loss": 0.8752, "step": 47615 }, { "epoch": 1.8333012512030797, "grad_norm": 1.3239431381225586, "learning_rate": 3.4391874757221054e-06, "loss": 0.7784, "step": 47620 }, { "epoch": 1.8334937439846006, "grad_norm": 1.5421568155288696, "learning_rate": 3.431329538083694e-06, "loss": 0.7401, "step": 47625 }, { "epoch": 1.8336862367661213, "grad_norm": 1.7763359546661377, "learning_rate": 3.423480431010462e-06, "loss": 0.739, "step": 47630 }, { "epoch": 1.833878729547642, "grad_norm": 1.8040746450424194, "learning_rate": 3.415640155220212e-06, "loss": 0.848, "step": 47635 }, { "epoch": 1.8340712223291626, "grad_norm": 1.0281445980072021, "learning_rate": 3.4078087114298495e-06, "loss": 0.7374, "step": 47640 }, { "epoch": 1.8342637151106833, "grad_norm": 1.061551570892334, "learning_rate": 3.399986100355501e-06, "loss": 0.7567, "step": 47645 }, { "epoch": 1.8344562078922042, "grad_norm": 0.8794161677360535, "learning_rate": 3.392172322712517e-06, "loss": 0.7726, "step": 47650 }, { "epoch": 1.8346487006737249, "grad_norm": 2.0824060440063477, "learning_rate": 3.384367379215425e-06, "loss": 0.9414, "step": 47655 }, { "epoch": 1.8348411934552455, "grad_norm": 1.3683054447174072, "learning_rate": 3.3765712705778884e-06, "loss": 0.7369, "step": 47660 }, { "epoch": 1.8350336862367662, "grad_norm": 1.2808324098587036, "learning_rate": 3.3687839975128477e-06, "loss": 0.7476, "step": 47665 }, { "epoch": 1.8352261790182869, "grad_norm": 1.3409979343414307, "learning_rate": 3.3610055607323887e-06, "loss": 0.7112, "step": 47670 }, { "epoch": 1.8354186717998076, "grad_norm": 1.0510163307189941, "learning_rate": 3.353235960947787e-06, "loss": 0.7489, "step": 47675 }, { "epoch": 1.8356111645813282, "grad_norm": 0.9734552502632141, "learning_rate": 3.345475198869552e-06, "loss": 0.6948, "step": 47680 }, { "epoch": 1.835803657362849, "grad_norm": 0.9429619908332825, "learning_rate": 3.337723275207316e-06, "loss": 0.5987, "step": 47685 }, { "epoch": 1.8359961501443696, "grad_norm": 1.4983386993408203, "learning_rate": 3.3299801906699567e-06, "loss": 0.809, "step": 47690 }, { "epoch": 1.8361886429258902, "grad_norm": 2.2176480293273926, "learning_rate": 3.3222459459655297e-06, "loss": 0.8457, "step": 47695 }, { "epoch": 1.836381135707411, "grad_norm": 0.8564404249191284, "learning_rate": 3.3145205418012915e-06, "loss": 0.8729, "step": 47700 }, { "epoch": 1.8365736284889316, "grad_norm": 1.057340383529663, "learning_rate": 3.3068039788836435e-06, "loss": 0.7122, "step": 47705 }, { "epoch": 1.8367661212704522, "grad_norm": 1.1386967897415161, "learning_rate": 3.299096257918255e-06, "loss": 0.7144, "step": 47710 }, { "epoch": 1.836958614051973, "grad_norm": 1.4860408306121826, "learning_rate": 3.2913973796099174e-06, "loss": 0.6879, "step": 47715 }, { "epoch": 1.8371511068334936, "grad_norm": 1.474790096282959, "learning_rate": 3.2837073446626677e-06, "loss": 0.9557, "step": 47720 }, { "epoch": 1.8373435996150145, "grad_norm": 1.0030909776687622, "learning_rate": 3.276026153779688e-06, "loss": 0.7611, "step": 47725 }, { "epoch": 1.8375360923965351, "grad_norm": 1.4009109735488892, "learning_rate": 3.2683538076633714e-06, "loss": 0.7646, "step": 47730 }, { "epoch": 1.8377285851780558, "grad_norm": 1.6726889610290527, "learning_rate": 3.2606903070153127e-06, "loss": 0.8261, "step": 47735 }, { "epoch": 1.8379210779595765, "grad_norm": 1.5140515565872192, "learning_rate": 3.253035652536307e-06, "loss": 0.8136, "step": 47740 }, { "epoch": 1.8381135707410972, "grad_norm": 1.0216679573059082, "learning_rate": 3.2453898449262834e-06, "loss": 0.8699, "step": 47745 }, { "epoch": 1.838306063522618, "grad_norm": 0.9776056408882141, "learning_rate": 3.2377528848844154e-06, "loss": 0.6758, "step": 47750 }, { "epoch": 1.8384985563041387, "grad_norm": 1.2438172101974487, "learning_rate": 3.2301247731090557e-06, "loss": 0.848, "step": 47755 }, { "epoch": 1.8386910490856594, "grad_norm": 1.5708495378494263, "learning_rate": 3.2225055102977464e-06, "loss": 0.7504, "step": 47760 }, { "epoch": 1.83888354186718, "grad_norm": 1.5115718841552734, "learning_rate": 3.2148950971472302e-06, "loss": 0.8011, "step": 47765 }, { "epoch": 1.8390760346487007, "grad_norm": 1.2615517377853394, "learning_rate": 3.207293534353395e-06, "loss": 0.8359, "step": 47770 }, { "epoch": 1.8392685274302214, "grad_norm": 1.3792976140975952, "learning_rate": 3.1997008226113734e-06, "loss": 0.8824, "step": 47775 }, { "epoch": 1.839461020211742, "grad_norm": 0.8459932804107666, "learning_rate": 3.192116962615477e-06, "loss": 0.7326, "step": 47780 }, { "epoch": 1.8396535129932627, "grad_norm": 1.6874253749847412, "learning_rate": 3.184541955059195e-06, "loss": 0.9437, "step": 47785 }, { "epoch": 1.8398460057747834, "grad_norm": 1.0827080011367798, "learning_rate": 3.1769758006351846e-06, "loss": 0.7969, "step": 47790 }, { "epoch": 1.840038498556304, "grad_norm": 1.4325987100601196, "learning_rate": 3.1694185000353703e-06, "loss": 0.6621, "step": 47795 }, { "epoch": 1.8402309913378248, "grad_norm": 0.8930256366729736, "learning_rate": 3.1618700539507774e-06, "loss": 0.8178, "step": 47800 }, { "epoch": 1.8404234841193454, "grad_norm": 1.8248862028121948, "learning_rate": 3.154330463071675e-06, "loss": 0.6502, "step": 47805 }, { "epoch": 1.840615976900866, "grad_norm": 1.318647027015686, "learning_rate": 3.146799728087513e-06, "loss": 0.7669, "step": 47810 }, { "epoch": 1.8408084696823868, "grad_norm": 1.4266585111618042, "learning_rate": 3.139277849686928e-06, "loss": 0.8331, "step": 47815 }, { "epoch": 1.8410009624639077, "grad_norm": 1.6008580923080444, "learning_rate": 3.131764828557715e-06, "loss": 0.7556, "step": 47820 }, { "epoch": 1.8411934552454283, "grad_norm": 1.1239802837371826, "learning_rate": 3.1242606653869355e-06, "loss": 0.7312, "step": 47825 }, { "epoch": 1.841385948026949, "grad_norm": 1.142177939414978, "learning_rate": 3.116765360860774e-06, "loss": 0.7444, "step": 47830 }, { "epoch": 1.8415784408084697, "grad_norm": 1.0553315877914429, "learning_rate": 3.109278915664615e-06, "loss": 0.8287, "step": 47835 }, { "epoch": 1.8417709335899903, "grad_norm": 1.4490617513656616, "learning_rate": 3.101801330483067e-06, "loss": 0.9347, "step": 47840 }, { "epoch": 1.8419634263715112, "grad_norm": 2.255176305770874, "learning_rate": 3.0943326059999056e-06, "loss": 0.8066, "step": 47845 }, { "epoch": 1.842155919153032, "grad_norm": 1.7092933654785156, "learning_rate": 3.0868727428980617e-06, "loss": 0.9151, "step": 47850 }, { "epoch": 1.8423484119345526, "grad_norm": 1.135002613067627, "learning_rate": 3.079421741859734e-06, "loss": 0.873, "step": 47855 }, { "epoch": 1.8425409047160732, "grad_norm": 1.5216232538223267, "learning_rate": 3.071979603566233e-06, "loss": 0.9477, "step": 47860 }, { "epoch": 1.842733397497594, "grad_norm": 1.8921546936035156, "learning_rate": 3.0645463286981148e-06, "loss": 0.7256, "step": 47865 }, { "epoch": 1.8429258902791146, "grad_norm": 1.7546651363372803, "learning_rate": 3.0571219179351016e-06, "loss": 0.7983, "step": 47870 }, { "epoch": 1.8431183830606352, "grad_norm": 1.1996418237686157, "learning_rate": 3.0497063719561068e-06, "loss": 0.7587, "step": 47875 }, { "epoch": 1.843310875842156, "grad_norm": 1.1013127565383911, "learning_rate": 3.0422996914392098e-06, "loss": 0.7332, "step": 47880 }, { "epoch": 1.8435033686236766, "grad_norm": 1.216601014137268, "learning_rate": 3.0349018770617354e-06, "loss": 0.8414, "step": 47885 }, { "epoch": 1.8436958614051973, "grad_norm": 0.8273448944091797, "learning_rate": 3.0275129295001315e-06, "loss": 0.7199, "step": 47890 }, { "epoch": 1.843888354186718, "grad_norm": 1.1838254928588867, "learning_rate": 3.020132849430102e-06, "loss": 0.7555, "step": 47895 }, { "epoch": 1.8440808469682386, "grad_norm": 0.9097992181777954, "learning_rate": 3.0127616375264956e-06, "loss": 0.7774, "step": 47900 }, { "epoch": 1.8442733397497593, "grad_norm": 3.2226083278656006, "learning_rate": 3.0053992944633404e-06, "loss": 0.8008, "step": 47905 }, { "epoch": 1.84446583253128, "grad_norm": 1.555508017539978, "learning_rate": 2.998045820913886e-06, "loss": 0.814, "step": 47910 }, { "epoch": 1.8446583253128006, "grad_norm": 1.5878740549087524, "learning_rate": 2.990701217550573e-06, "loss": 0.7585, "step": 47915 }, { "epoch": 1.8448508180943215, "grad_norm": 1.0371932983398438, "learning_rate": 2.983365485045009e-06, "loss": 0.8261, "step": 47920 }, { "epoch": 1.8450433108758422, "grad_norm": 0.9782282114028931, "learning_rate": 2.976038624067978e-06, "loss": 0.6692, "step": 47925 }, { "epoch": 1.8452358036573628, "grad_norm": 1.5847156047821045, "learning_rate": 2.9687206352895125e-06, "loss": 0.846, "step": 47930 }, { "epoch": 1.8454282964388835, "grad_norm": 0.9324734807014465, "learning_rate": 2.961411519378754e-06, "loss": 0.8455, "step": 47935 }, { "epoch": 1.8456207892204044, "grad_norm": 1.0669317245483398, "learning_rate": 2.9541112770041013e-06, "loss": 0.944, "step": 47940 }, { "epoch": 1.845813282001925, "grad_norm": 1.0065456628799438, "learning_rate": 2.9468199088330985e-06, "loss": 0.7346, "step": 47945 }, { "epoch": 1.8460057747834457, "grad_norm": 1.4775437116622925, "learning_rate": 2.9395374155325007e-06, "loss": 0.6656, "step": 47950 }, { "epoch": 1.8461982675649664, "grad_norm": 0.8717533349990845, "learning_rate": 2.93226379776822e-06, "loss": 0.696, "step": 47955 }, { "epoch": 1.846390760346487, "grad_norm": 0.9198839664459229, "learning_rate": 2.924999056205424e-06, "loss": 0.8188, "step": 47960 }, { "epoch": 1.8465832531280078, "grad_norm": 1.2768006324768066, "learning_rate": 2.917743191508393e-06, "loss": 0.8166, "step": 47965 }, { "epoch": 1.8467757459095284, "grad_norm": 1.1566869020462036, "learning_rate": 2.9104962043406293e-06, "loss": 0.7985, "step": 47970 }, { "epoch": 1.846968238691049, "grad_norm": 1.2734941244125366, "learning_rate": 2.9032580953648357e-06, "loss": 0.8511, "step": 47975 }, { "epoch": 1.8471607314725698, "grad_norm": 0.7918460369110107, "learning_rate": 2.8960288652428726e-06, "loss": 0.7173, "step": 47980 }, { "epoch": 1.8473532242540904, "grad_norm": 1.3652775287628174, "learning_rate": 2.8888085146358324e-06, "loss": 0.7498, "step": 47985 }, { "epoch": 1.847545717035611, "grad_norm": 1.2592366933822632, "learning_rate": 2.881597044203943e-06, "loss": 0.8583, "step": 47990 }, { "epoch": 1.8477382098171318, "grad_norm": 2.0500288009643555, "learning_rate": 2.8743944546066437e-06, "loss": 0.854, "step": 47995 }, { "epoch": 1.8479307025986524, "grad_norm": 1.140693187713623, "learning_rate": 2.867200746502585e-06, "loss": 0.6055, "step": 48000 }, { "epoch": 1.8481231953801731, "grad_norm": 1.287409782409668, "learning_rate": 2.8600159205495748e-06, "loss": 0.8655, "step": 48005 }, { "epoch": 1.8483156881616938, "grad_norm": 1.279222846031189, "learning_rate": 2.8528399774045977e-06, "loss": 0.7086, "step": 48010 }, { "epoch": 1.8485081809432147, "grad_norm": 1.2571754455566406, "learning_rate": 2.845672917723885e-06, "loss": 0.8281, "step": 48015 }, { "epoch": 1.8487006737247353, "grad_norm": 1.407504916191101, "learning_rate": 2.838514742162779e-06, "loss": 0.8478, "step": 48020 }, { "epoch": 1.848893166506256, "grad_norm": 1.116768479347229, "learning_rate": 2.831365451375867e-06, "loss": 0.7778, "step": 48025 }, { "epoch": 1.8490856592877767, "grad_norm": 1.208387017250061, "learning_rate": 2.824225046016904e-06, "loss": 0.7465, "step": 48030 }, { "epoch": 1.8492781520692974, "grad_norm": 1.3610413074493408, "learning_rate": 2.8170935267388343e-06, "loss": 0.7879, "step": 48035 }, { "epoch": 1.8494706448508182, "grad_norm": 0.9448495507240295, "learning_rate": 2.8099708941937697e-06, "loss": 0.7607, "step": 48040 }, { "epoch": 1.849663137632339, "grad_norm": 1.0731843709945679, "learning_rate": 2.8028571490330556e-06, "loss": 0.7323, "step": 48045 }, { "epoch": 1.8498556304138596, "grad_norm": 1.2336262464523315, "learning_rate": 2.795752291907183e-06, "loss": 0.827, "step": 48050 }, { "epoch": 1.8500481231953803, "grad_norm": 1.0452556610107422, "learning_rate": 2.7886563234658327e-06, "loss": 0.7965, "step": 48055 }, { "epoch": 1.850240615976901, "grad_norm": 1.5202009677886963, "learning_rate": 2.7815692443579066e-06, "loss": 0.7943, "step": 48060 }, { "epoch": 1.8504331087584216, "grad_norm": 1.1691776514053345, "learning_rate": 2.774491055231465e-06, "loss": 0.7282, "step": 48065 }, { "epoch": 1.8506256015399423, "grad_norm": 2.0975561141967773, "learning_rate": 2.7674217567337348e-06, "loss": 0.8538, "step": 48070 }, { "epoch": 1.850818094321463, "grad_norm": 1.0445414781570435, "learning_rate": 2.760361349511198e-06, "loss": 0.9576, "step": 48075 }, { "epoch": 1.8510105871029836, "grad_norm": 1.9726266860961914, "learning_rate": 2.75330983420945e-06, "loss": 0.8439, "step": 48080 }, { "epoch": 1.8512030798845043, "grad_norm": 1.502845048904419, "learning_rate": 2.746267211473319e-06, "loss": 0.9805, "step": 48085 }, { "epoch": 1.851395572666025, "grad_norm": 1.754075050354004, "learning_rate": 2.7392334819468123e-06, "loss": 0.9215, "step": 48090 }, { "epoch": 1.8515880654475456, "grad_norm": 0.9861617088317871, "learning_rate": 2.7322086462731157e-06, "loss": 0.9203, "step": 48095 }, { "epoch": 1.8517805582290663, "grad_norm": 1.416224718093872, "learning_rate": 2.7251927050945813e-06, "loss": 0.834, "step": 48100 }, { "epoch": 1.851973051010587, "grad_norm": 1.3885353803634644, "learning_rate": 2.7181856590527967e-06, "loss": 0.8002, "step": 48105 }, { "epoch": 1.8521655437921078, "grad_norm": 1.1199766397476196, "learning_rate": 2.7111875087885042e-06, "loss": 0.8423, "step": 48110 }, { "epoch": 1.8523580365736285, "grad_norm": 0.9421118497848511, "learning_rate": 2.7041982549416144e-06, "loss": 0.7188, "step": 48115 }, { "epoch": 1.8525505293551492, "grad_norm": 2.011782646179199, "learning_rate": 2.697217898151294e-06, "loss": 0.8969, "step": 48120 }, { "epoch": 1.8527430221366699, "grad_norm": 1.3184289932250977, "learning_rate": 2.6902464390558103e-06, "loss": 0.6961, "step": 48125 }, { "epoch": 1.8529355149181905, "grad_norm": 1.5041390657424927, "learning_rate": 2.683283878292675e-06, "loss": 0.7917, "step": 48130 }, { "epoch": 1.8531280076997114, "grad_norm": 1.7774921655654907, "learning_rate": 2.6763302164985573e-06, "loss": 1.0091, "step": 48135 }, { "epoch": 1.853320500481232, "grad_norm": 1.642179250717163, "learning_rate": 2.6693854543093476e-06, "loss": 0.7543, "step": 48140 }, { "epoch": 1.8535129932627528, "grad_norm": 1.1686973571777344, "learning_rate": 2.662449592360061e-06, "loss": 0.7695, "step": 48145 }, { "epoch": 1.8537054860442734, "grad_norm": 0.9292591214179993, "learning_rate": 2.655522631284979e-06, "loss": 0.7245, "step": 48150 }, { "epoch": 1.853897978825794, "grad_norm": 2.467902660369873, "learning_rate": 2.6486045717174836e-06, "loss": 0.8604, "step": 48155 }, { "epoch": 1.8540904716073148, "grad_norm": 1.306139349937439, "learning_rate": 2.641695414290224e-06, "loss": 0.7734, "step": 48160 }, { "epoch": 1.8542829643888354, "grad_norm": 1.376107096672058, "learning_rate": 2.634795159634962e-06, "loss": 0.6813, "step": 48165 }, { "epoch": 1.8544754571703561, "grad_norm": 1.3315315246582031, "learning_rate": 2.6279038083827146e-06, "loss": 0.7047, "step": 48170 }, { "epoch": 1.8546679499518768, "grad_norm": 1.155645489692688, "learning_rate": 2.6210213611636115e-06, "loss": 0.7626, "step": 48175 }, { "epoch": 1.8548604427333975, "grad_norm": 1.1847624778747559, "learning_rate": 2.6141478186070487e-06, "loss": 0.8735, "step": 48180 }, { "epoch": 1.8550529355149181, "grad_norm": 1.0727204084396362, "learning_rate": 2.6072831813415354e-06, "loss": 0.7699, "step": 48185 }, { "epoch": 1.8552454282964388, "grad_norm": 1.2296266555786133, "learning_rate": 2.600427449994813e-06, "loss": 0.5567, "step": 48190 }, { "epoch": 1.8554379210779595, "grad_norm": 1.0693732500076294, "learning_rate": 2.593580625193781e-06, "loss": 0.6918, "step": 48195 }, { "epoch": 1.8556304138594801, "grad_norm": 1.5725680589675903, "learning_rate": 2.58674270756456e-06, "loss": 0.835, "step": 48200 }, { "epoch": 1.8558229066410008, "grad_norm": 1.0935879945755005, "learning_rate": 2.5799136977323948e-06, "loss": 0.822, "step": 48205 }, { "epoch": 1.8560153994225217, "grad_norm": 0.9157683849334717, "learning_rate": 2.573093596321774e-06, "loss": 0.8057, "step": 48210 }, { "epoch": 1.8562078922040424, "grad_norm": 1.6122345924377441, "learning_rate": 2.566282403956355e-06, "loss": 0.9039, "step": 48215 }, { "epoch": 1.856400384985563, "grad_norm": 1.6490685939788818, "learning_rate": 2.5594801212589613e-06, "loss": 0.9311, "step": 48220 }, { "epoch": 1.8565928777670837, "grad_norm": 1.5705087184906006, "learning_rate": 2.5526867488516513e-06, "loss": 0.6325, "step": 48225 }, { "epoch": 1.8567853705486044, "grad_norm": 1.2620983123779297, "learning_rate": 2.5459022873555726e-06, "loss": 0.7455, "step": 48230 }, { "epoch": 1.8569778633301253, "grad_norm": 1.1116305589675903, "learning_rate": 2.5391267373911842e-06, "loss": 0.65, "step": 48235 }, { "epoch": 1.857170356111646, "grad_norm": 1.5795679092407227, "learning_rate": 2.532360099578024e-06, "loss": 0.8738, "step": 48240 }, { "epoch": 1.8573628488931666, "grad_norm": 1.2260464429855347, "learning_rate": 2.5256023745348746e-06, "loss": 0.9088, "step": 48245 }, { "epoch": 1.8575553416746873, "grad_norm": 1.582663893699646, "learning_rate": 2.518853562879675e-06, "loss": 0.7772, "step": 48250 }, { "epoch": 1.857747834456208, "grad_norm": 1.497031807899475, "learning_rate": 2.5121136652295764e-06, "loss": 0.8043, "step": 48255 }, { "epoch": 1.8579403272377286, "grad_norm": 1.0930918455123901, "learning_rate": 2.505382682200863e-06, "loss": 0.7846, "step": 48260 }, { "epoch": 1.8581328200192493, "grad_norm": 1.2151498794555664, "learning_rate": 2.4986606144090762e-06, "loss": 0.7133, "step": 48265 }, { "epoch": 1.85832531280077, "grad_norm": 1.189026951789856, "learning_rate": 2.49194746246888e-06, "loss": 0.7892, "step": 48270 }, { "epoch": 1.8585178055822906, "grad_norm": 1.6892609596252441, "learning_rate": 2.4852432269941607e-06, "loss": 0.8151, "step": 48275 }, { "epoch": 1.8587102983638113, "grad_norm": 1.126964807510376, "learning_rate": 2.4785479085979724e-06, "loss": 1.0332, "step": 48280 }, { "epoch": 1.858902791145332, "grad_norm": 1.3323931694030762, "learning_rate": 2.471861507892559e-06, "loss": 0.7541, "step": 48285 }, { "epoch": 1.8590952839268526, "grad_norm": 1.3149107694625854, "learning_rate": 2.465184025489331e-06, "loss": 0.7269, "step": 48290 }, { "epoch": 1.8592877767083733, "grad_norm": 1.9822214841842651, "learning_rate": 2.458515461998945e-06, "loss": 0.7885, "step": 48295 }, { "epoch": 1.859480269489894, "grad_norm": 1.3917293548583984, "learning_rate": 2.4518558180311456e-06, "loss": 0.7677, "step": 48300 }, { "epoch": 1.8596727622714149, "grad_norm": 0.9052494764328003, "learning_rate": 2.4452050941949357e-06, "loss": 0.7665, "step": 48305 }, { "epoch": 1.8598652550529355, "grad_norm": 1.0716874599456787, "learning_rate": 2.4385632910984834e-06, "loss": 0.6622, "step": 48310 }, { "epoch": 1.8600577478344562, "grad_norm": 1.5617578029632568, "learning_rate": 2.431930409349137e-06, "loss": 0.7643, "step": 48315 }, { "epoch": 1.8602502406159769, "grad_norm": 1.0250791311264038, "learning_rate": 2.4253064495534106e-06, "loss": 0.8006, "step": 48320 }, { "epoch": 1.8604427333974976, "grad_norm": 1.2734397649765015, "learning_rate": 2.4186914123170423e-06, "loss": 0.6411, "step": 48325 }, { "epoch": 1.8606352261790184, "grad_norm": 0.9825404286384583, "learning_rate": 2.412085298244937e-06, "loss": 0.7262, "step": 48330 }, { "epoch": 1.8608277189605391, "grad_norm": 1.2708152532577515, "learning_rate": 2.4054881079411564e-06, "loss": 0.7606, "step": 48335 }, { "epoch": 1.8610202117420598, "grad_norm": 1.1390200853347778, "learning_rate": 2.3988998420089947e-06, "loss": 0.734, "step": 48340 }, { "epoch": 1.8612127045235805, "grad_norm": 1.1848487854003906, "learning_rate": 2.3923205010508932e-06, "loss": 0.742, "step": 48345 }, { "epoch": 1.8614051973051011, "grad_norm": 1.2743182182312012, "learning_rate": 2.385750085668481e-06, "loss": 0.7798, "step": 48350 }, { "epoch": 1.8615976900866218, "grad_norm": 0.963897168636322, "learning_rate": 2.3791885964625894e-06, "loss": 0.8682, "step": 48355 }, { "epoch": 1.8617901828681425, "grad_norm": 1.480070948600769, "learning_rate": 2.3726360340332376e-06, "loss": 0.7644, "step": 48360 }, { "epoch": 1.8619826756496631, "grad_norm": 0.9536148905754089, "learning_rate": 2.3660923989795803e-06, "loss": 0.8048, "step": 48365 }, { "epoch": 1.8621751684311838, "grad_norm": 1.9504202604293823, "learning_rate": 2.3595576919000163e-06, "loss": 0.8524, "step": 48370 }, { "epoch": 1.8623676612127045, "grad_norm": 0.9313097596168518, "learning_rate": 2.3530319133920896e-06, "loss": 0.8765, "step": 48375 }, { "epoch": 1.8625601539942251, "grad_norm": 1.1792408227920532, "learning_rate": 2.3465150640525456e-06, "loss": 0.7123, "step": 48380 }, { "epoch": 1.8627526467757458, "grad_norm": 0.8612557649612427, "learning_rate": 2.3400071444772964e-06, "loss": 0.6412, "step": 48385 }, { "epoch": 1.8629451395572665, "grad_norm": 1.3216156959533691, "learning_rate": 2.3335081552614768e-06, "loss": 0.7946, "step": 48390 }, { "epoch": 1.8631376323387872, "grad_norm": 0.8052036166191101, "learning_rate": 2.3270180969993226e-06, "loss": 0.8188, "step": 48395 }, { "epoch": 1.8633301251203078, "grad_norm": 1.8680044412612915, "learning_rate": 2.3205369702843703e-06, "loss": 0.767, "step": 48400 }, { "epoch": 1.8635226179018287, "grad_norm": 0.8687813878059387, "learning_rate": 2.314064775709224e-06, "loss": 0.751, "step": 48405 }, { "epoch": 1.8637151106833494, "grad_norm": 1.280096173286438, "learning_rate": 2.3076015138657537e-06, "loss": 0.8564, "step": 48410 }, { "epoch": 1.86390760346487, "grad_norm": 0.4581018090248108, "learning_rate": 2.301147185344965e-06, "loss": 0.6742, "step": 48415 }, { "epoch": 1.8641000962463907, "grad_norm": 1.88551926612854, "learning_rate": 2.294701790737086e-06, "loss": 0.7482, "step": 48420 }, { "epoch": 1.8642925890279116, "grad_norm": 1.5057963132858276, "learning_rate": 2.2882653306314673e-06, "loss": 0.8031, "step": 48425 }, { "epoch": 1.8644850818094323, "grad_norm": 1.2725811004638672, "learning_rate": 2.2818378056167155e-06, "loss": 0.7703, "step": 48430 }, { "epoch": 1.864677574590953, "grad_norm": 1.0294675827026367, "learning_rate": 2.275419216280572e-06, "loss": 0.8374, "step": 48435 }, { "epoch": 1.8648700673724736, "grad_norm": 1.523382306098938, "learning_rate": 2.2690095632099785e-06, "loss": 0.9034, "step": 48440 }, { "epoch": 1.8650625601539943, "grad_norm": 1.2364825010299683, "learning_rate": 2.2626088469910547e-06, "loss": 0.6834, "step": 48445 }, { "epoch": 1.865255052935515, "grad_norm": 1.2433017492294312, "learning_rate": 2.2562170682090877e-06, "loss": 0.7689, "step": 48450 }, { "epoch": 1.8654475457170356, "grad_norm": 1.3385852575302124, "learning_rate": 2.2498342274485774e-06, "loss": 0.8025, "step": 48455 }, { "epoch": 1.8656400384985563, "grad_norm": 2.2688310146331787, "learning_rate": 2.2434603252932006e-06, "loss": 0.8375, "step": 48460 }, { "epoch": 1.865832531280077, "grad_norm": 1.4485090970993042, "learning_rate": 2.237095362325803e-06, "loss": 0.9097, "step": 48465 }, { "epoch": 1.8660250240615976, "grad_norm": 1.457838773727417, "learning_rate": 2.230739339128396e-06, "loss": 0.7724, "step": 48470 }, { "epoch": 1.8662175168431183, "grad_norm": 1.2256953716278076, "learning_rate": 2.2243922562822374e-06, "loss": 0.769, "step": 48475 }, { "epoch": 1.866410009624639, "grad_norm": 1.5888760089874268, "learning_rate": 2.218054114367685e-06, "loss": 0.7524, "step": 48480 }, { "epoch": 1.8666025024061597, "grad_norm": 1.6216797828674316, "learning_rate": 2.2117249139643415e-06, "loss": 0.8443, "step": 48485 }, { "epoch": 1.8667949951876803, "grad_norm": 0.9898841381072998, "learning_rate": 2.2054046556509666e-06, "loss": 0.7376, "step": 48490 }, { "epoch": 1.866987487969201, "grad_norm": 1.0620770454406738, "learning_rate": 2.1990933400055093e-06, "loss": 0.8391, "step": 48495 }, { "epoch": 1.867179980750722, "grad_norm": 0.9419720768928528, "learning_rate": 2.192790967605085e-06, "loss": 0.7434, "step": 48500 }, { "epoch": 1.8673724735322426, "grad_norm": 2.1842215061187744, "learning_rate": 2.1864975390260334e-06, "loss": 0.9107, "step": 48505 }, { "epoch": 1.8675649663137632, "grad_norm": 2.277456283569336, "learning_rate": 2.180213054843816e-06, "loss": 0.8018, "step": 48510 }, { "epoch": 1.867757459095284, "grad_norm": 1.2959650754928589, "learning_rate": 2.1739375156331176e-06, "loss": 0.6821, "step": 48515 }, { "epoch": 1.8679499518768046, "grad_norm": 1.474094271659851, "learning_rate": 2.1676709219677905e-06, "loss": 0.7479, "step": 48520 }, { "epoch": 1.8681424446583255, "grad_norm": 3.1948869228363037, "learning_rate": 2.161413274420876e-06, "loss": 1.0174, "step": 48525 }, { "epoch": 1.8683349374398461, "grad_norm": 1.1724430322647095, "learning_rate": 2.1551645735646053e-06, "loss": 0.8481, "step": 48530 }, { "epoch": 1.8685274302213668, "grad_norm": 1.5295108556747437, "learning_rate": 2.1489248199703773e-06, "loss": 0.7603, "step": 48535 }, { "epoch": 1.8687199230028875, "grad_norm": 1.8773775100708008, "learning_rate": 2.142694014208757e-06, "loss": 0.8019, "step": 48540 }, { "epoch": 1.8689124157844081, "grad_norm": 1.0877622365951538, "learning_rate": 2.136472156849523e-06, "loss": 0.8444, "step": 48545 }, { "epoch": 1.8691049085659288, "grad_norm": 1.389959692955017, "learning_rate": 2.130259248461641e-06, "loss": 0.9192, "step": 48550 }, { "epoch": 1.8692974013474495, "grad_norm": 0.84064120054245, "learning_rate": 2.1240552896131918e-06, "loss": 0.7529, "step": 48555 }, { "epoch": 1.8694898941289702, "grad_norm": 0.8932579159736633, "learning_rate": 2.117860280871542e-06, "loss": 0.7907, "step": 48560 }, { "epoch": 1.8696823869104908, "grad_norm": 1.7808706760406494, "learning_rate": 2.1116742228031616e-06, "loss": 0.8738, "step": 48565 }, { "epoch": 1.8698748796920115, "grad_norm": 1.6530510187149048, "learning_rate": 2.105497115973709e-06, "loss": 0.8155, "step": 48570 }, { "epoch": 1.8700673724735322, "grad_norm": 1.3979318141937256, "learning_rate": 2.0993289609480547e-06, "loss": 0.8067, "step": 48575 }, { "epoch": 1.8702598652550528, "grad_norm": 1.452545404434204, "learning_rate": 2.0931697582902476e-06, "loss": 0.8966, "step": 48580 }, { "epoch": 1.8704523580365735, "grad_norm": 1.182625651359558, "learning_rate": 2.087019508563481e-06, "loss": 0.7396, "step": 48585 }, { "epoch": 1.8706448508180942, "grad_norm": 1.7071011066436768, "learning_rate": 2.080878212330173e-06, "loss": 0.773, "step": 48590 }, { "epoch": 1.870837343599615, "grad_norm": 0.9710677862167358, "learning_rate": 2.074745870151895e-06, "loss": 0.762, "step": 48595 }, { "epoch": 1.8710298363811357, "grad_norm": 1.9075227975845337, "learning_rate": 2.068622482589411e-06, "loss": 0.7325, "step": 48600 }, { "epoch": 1.8712223291626564, "grad_norm": 1.70927894115448, "learning_rate": 2.0625080502026606e-06, "loss": 0.7394, "step": 48605 }, { "epoch": 1.871414821944177, "grad_norm": 1.2238922119140625, "learning_rate": 2.0564025735507864e-06, "loss": 0.8871, "step": 48610 }, { "epoch": 1.8716073147256977, "grad_norm": 1.3306856155395508, "learning_rate": 2.0503060531920528e-06, "loss": 0.7314, "step": 48615 }, { "epoch": 1.8717998075072186, "grad_norm": 1.1816201210021973, "learning_rate": 2.0442184896840023e-06, "loss": 0.7342, "step": 48620 }, { "epoch": 1.8719923002887393, "grad_norm": 1.3129255771636963, "learning_rate": 2.0381398835832567e-06, "loss": 0.6599, "step": 48625 }, { "epoch": 1.87218479307026, "grad_norm": 2.268023729324341, "learning_rate": 2.032070235445682e-06, "loss": 0.9488, "step": 48630 }, { "epoch": 1.8723772858517806, "grad_norm": 1.5048736333847046, "learning_rate": 2.0260095458263128e-06, "loss": 0.7311, "step": 48635 }, { "epoch": 1.8725697786333013, "grad_norm": 2.065556287765503, "learning_rate": 2.019957815279361e-06, "loss": 0.9459, "step": 48640 }, { "epoch": 1.872762271414822, "grad_norm": 1.1562378406524658, "learning_rate": 2.0139150443581944e-06, "loss": 0.8719, "step": 48645 }, { "epoch": 1.8729547641963427, "grad_norm": 1.01041579246521, "learning_rate": 2.007881233615394e-06, "loss": 0.8623, "step": 48650 }, { "epoch": 1.8731472569778633, "grad_norm": 1.1639820337295532, "learning_rate": 2.0018563836027293e-06, "loss": 0.7525, "step": 48655 }, { "epoch": 1.873339749759384, "grad_norm": 1.1873496770858765, "learning_rate": 1.995840494871104e-06, "loss": 0.7482, "step": 48660 }, { "epoch": 1.8735322425409047, "grad_norm": 1.128056526184082, "learning_rate": 1.989833567970667e-06, "loss": 0.8526, "step": 48665 }, { "epoch": 1.8737247353224253, "grad_norm": 2.4238860607147217, "learning_rate": 1.98383560345069e-06, "loss": 0.848, "step": 48670 }, { "epoch": 1.873917228103946, "grad_norm": 2.036635160446167, "learning_rate": 1.977846601859634e-06, "loss": 0.8757, "step": 48675 }, { "epoch": 1.8741097208854667, "grad_norm": 1.680403709411621, "learning_rate": 1.9718665637451726e-06, "loss": 0.9098, "step": 48680 }, { "epoch": 1.8743022136669873, "grad_norm": 0.7588726878166199, "learning_rate": 1.965895489654157e-06, "loss": 0.7572, "step": 48685 }, { "epoch": 1.874494706448508, "grad_norm": 1.095284104347229, "learning_rate": 1.9599333801325505e-06, "loss": 0.8009, "step": 48690 }, { "epoch": 1.874687199230029, "grad_norm": 1.7757399082183838, "learning_rate": 1.9539802357256055e-06, "loss": 0.7788, "step": 48695 }, { "epoch": 1.8748796920115496, "grad_norm": 1.0243725776672363, "learning_rate": 1.9480360569776647e-06, "loss": 0.8988, "step": 48700 }, { "epoch": 1.8750721847930703, "grad_norm": 1.7366422414779663, "learning_rate": 1.942100844432293e-06, "loss": 0.7865, "step": 48705 }, { "epoch": 1.875264677574591, "grad_norm": 1.935095191001892, "learning_rate": 1.936174598632212e-06, "loss": 0.9178, "step": 48710 }, { "epoch": 1.8754571703561116, "grad_norm": 0.9639614224433899, "learning_rate": 1.9302573201193776e-06, "loss": 0.6945, "step": 48715 }, { "epoch": 1.8756496631376325, "grad_norm": 1.3461066484451294, "learning_rate": 1.924349009434834e-06, "loss": 0.7487, "step": 48720 }, { "epoch": 1.8758421559191532, "grad_norm": 1.1384443044662476, "learning_rate": 1.9184496671188933e-06, "loss": 0.7095, "step": 48725 }, { "epoch": 1.8760346487006738, "grad_norm": 0.8311898112297058, "learning_rate": 1.9125592937109916e-06, "loss": 0.7814, "step": 48730 }, { "epoch": 1.8762271414821945, "grad_norm": 1.4966212511062622, "learning_rate": 1.906677889749775e-06, "loss": 0.9245, "step": 48735 }, { "epoch": 1.8764196342637152, "grad_norm": 1.414469599723816, "learning_rate": 1.900805455773058e-06, "loss": 0.8376, "step": 48740 }, { "epoch": 1.8766121270452358, "grad_norm": 1.3187638521194458, "learning_rate": 1.8949419923178336e-06, "loss": 0.7, "step": 48745 }, { "epoch": 1.8768046198267565, "grad_norm": 1.3405956029891968, "learning_rate": 1.8890874999202946e-06, "loss": 0.9212, "step": 48750 }, { "epoch": 1.8769971126082772, "grad_norm": 1.8978711366653442, "learning_rate": 1.8832419791157574e-06, "loss": 0.8214, "step": 48755 }, { "epoch": 1.8771896053897978, "grad_norm": 1.8074771165847778, "learning_rate": 1.8774054304387834e-06, "loss": 0.8833, "step": 48760 }, { "epoch": 1.8773820981713185, "grad_norm": 1.301705241203308, "learning_rate": 1.871577854423079e-06, "loss": 0.7134, "step": 48765 }, { "epoch": 1.8775745909528392, "grad_norm": 0.8135727047920227, "learning_rate": 1.8657592516015398e-06, "loss": 0.6364, "step": 48770 }, { "epoch": 1.8777670837343599, "grad_norm": 0.7817474007606506, "learning_rate": 1.8599496225062296e-06, "loss": 0.6671, "step": 48775 }, { "epoch": 1.8779595765158805, "grad_norm": 1.2370343208312988, "learning_rate": 1.8541489676684232e-06, "loss": 0.8199, "step": 48780 }, { "epoch": 1.8781520692974012, "grad_norm": 1.585356593132019, "learning_rate": 1.8483572876185296e-06, "loss": 0.9021, "step": 48785 }, { "epoch": 1.878344562078922, "grad_norm": 1.4099061489105225, "learning_rate": 1.8425745828861585e-06, "loss": 0.7381, "step": 48790 }, { "epoch": 1.8785370548604428, "grad_norm": 2.376617670059204, "learning_rate": 1.8368008540001203e-06, "loss": 0.7274, "step": 48795 }, { "epoch": 1.8787295476419634, "grad_norm": 2.1324985027313232, "learning_rate": 1.8310361014883703e-06, "loss": 0.953, "step": 48800 }, { "epoch": 1.878922040423484, "grad_norm": 2.0138044357299805, "learning_rate": 1.8252803258780538e-06, "loss": 0.8029, "step": 48805 }, { "epoch": 1.8791145332050048, "grad_norm": 1.857802152633667, "learning_rate": 1.8195335276955162e-06, "loss": 0.8503, "step": 48810 }, { "epoch": 1.8793070259865257, "grad_norm": 1.9087456464767456, "learning_rate": 1.813795707466237e-06, "loss": 0.8598, "step": 48815 }, { "epoch": 1.8794995187680463, "grad_norm": 1.3068690299987793, "learning_rate": 1.8080668657149192e-06, "loss": 0.6669, "step": 48820 }, { "epoch": 1.879692011549567, "grad_norm": 1.2544077634811401, "learning_rate": 1.802347002965421e-06, "loss": 0.851, "step": 48825 }, { "epoch": 1.8798845043310877, "grad_norm": 1.29012930393219, "learning_rate": 1.7966361197408022e-06, "loss": 0.8332, "step": 48830 }, { "epoch": 1.8800769971126083, "grad_norm": 1.0903306007385254, "learning_rate": 1.7909342165632558e-06, "loss": 0.6718, "step": 48835 }, { "epoch": 1.880269489894129, "grad_norm": 1.1692110300064087, "learning_rate": 1.7852412939542208e-06, "loss": 0.8295, "step": 48840 }, { "epoch": 1.8804619826756497, "grad_norm": 1.5184317827224731, "learning_rate": 1.7795573524342356e-06, "loss": 0.7913, "step": 48845 }, { "epoch": 1.8806544754571703, "grad_norm": 1.50426185131073, "learning_rate": 1.7738823925230964e-06, "loss": 0.8514, "step": 48850 }, { "epoch": 1.880846968238691, "grad_norm": 1.971408486366272, "learning_rate": 1.7682164147397208e-06, "loss": 0.8384, "step": 48855 }, { "epoch": 1.8810394610202117, "grad_norm": 0.9322729706764221, "learning_rate": 1.7625594196022166e-06, "loss": 0.7074, "step": 48860 }, { "epoch": 1.8812319538017324, "grad_norm": 1.4018663167953491, "learning_rate": 1.7569114076278924e-06, "loss": 0.7446, "step": 48865 }, { "epoch": 1.881424446583253, "grad_norm": 1.2637794017791748, "learning_rate": 1.7512723793332242e-06, "loss": 0.7912, "step": 48870 }, { "epoch": 1.8816169393647737, "grad_norm": 0.7229753732681274, "learning_rate": 1.7456423352338658e-06, "loss": 0.7339, "step": 48875 }, { "epoch": 1.8818094321462944, "grad_norm": 0.9690874814987183, "learning_rate": 1.7400212758446276e-06, "loss": 0.727, "step": 48880 }, { "epoch": 1.8820019249278153, "grad_norm": 1.093895673751831, "learning_rate": 1.7344092016795433e-06, "loss": 0.7939, "step": 48885 }, { "epoch": 1.882194417709336, "grad_norm": 1.0549845695495605, "learning_rate": 1.7288061132517686e-06, "loss": 0.8103, "step": 48890 }, { "epoch": 1.8823869104908566, "grad_norm": 1.6850600242614746, "learning_rate": 1.723212011073705e-06, "loss": 0.7877, "step": 48895 }, { "epoch": 1.8825794032723773, "grad_norm": 1.4307023286819458, "learning_rate": 1.7176268956568653e-06, "loss": 0.7448, "step": 48900 }, { "epoch": 1.882771896053898, "grad_norm": 1.4105719327926636, "learning_rate": 1.7120507675120078e-06, "loss": 0.9085, "step": 48905 }, { "epoch": 1.8829643888354188, "grad_norm": 1.330667495727539, "learning_rate": 1.7064836271489803e-06, "loss": 0.8304, "step": 48910 }, { "epoch": 1.8831568816169395, "grad_norm": 1.1804107427597046, "learning_rate": 1.7009254750769088e-06, "loss": 0.8184, "step": 48915 }, { "epoch": 1.8833493743984602, "grad_norm": 0.7987260818481445, "learning_rate": 1.695376311804031e-06, "loss": 0.6531, "step": 48920 }, { "epoch": 1.8835418671799808, "grad_norm": 0.7400739789009094, "learning_rate": 1.6898361378377747e-06, "loss": 0.7269, "step": 48925 }, { "epoch": 1.8837343599615015, "grad_norm": 1.043596863746643, "learning_rate": 1.6843049536847567e-06, "loss": 0.7467, "step": 48930 }, { "epoch": 1.8839268527430222, "grad_norm": 1.6760101318359375, "learning_rate": 1.6787827598507721e-06, "loss": 0.6619, "step": 48935 }, { "epoch": 1.8841193455245429, "grad_norm": 1.3724150657653809, "learning_rate": 1.6732695568407842e-06, "loss": 0.7495, "step": 48940 }, { "epoch": 1.8843118383060635, "grad_norm": 0.6713613867759705, "learning_rate": 1.6677653451589448e-06, "loss": 0.7356, "step": 48945 }, { "epoch": 1.8845043310875842, "grad_norm": 1.0061167478561401, "learning_rate": 1.6622701253085626e-06, "loss": 0.8709, "step": 48950 }, { "epoch": 1.8846968238691049, "grad_norm": 1.6838310956954956, "learning_rate": 1.656783897792158e-06, "loss": 0.8505, "step": 48955 }, { "epoch": 1.8848893166506255, "grad_norm": 1.262903094291687, "learning_rate": 1.651306663111396e-06, "loss": 0.8333, "step": 48960 }, { "epoch": 1.8850818094321462, "grad_norm": 0.9866868257522583, "learning_rate": 1.645838421767154e-06, "loss": 0.8363, "step": 48965 }, { "epoch": 1.8852743022136669, "grad_norm": 1.7241532802581787, "learning_rate": 1.6403791742594433e-06, "loss": 0.8477, "step": 48970 }, { "epoch": 1.8854667949951875, "grad_norm": 1.6335369348526, "learning_rate": 1.6349289210874752e-06, "loss": 0.7239, "step": 48975 }, { "epoch": 1.8856592877767082, "grad_norm": 2.1081769466400146, "learning_rate": 1.6294876627496624e-06, "loss": 0.6491, "step": 48980 }, { "epoch": 1.885851780558229, "grad_norm": 1.3898777961730957, "learning_rate": 1.6240553997435403e-06, "loss": 0.7792, "step": 48985 }, { "epoch": 1.8860442733397498, "grad_norm": 0.9727959632873535, "learning_rate": 1.6186321325658893e-06, "loss": 0.7878, "step": 48990 }, { "epoch": 1.8862367661212704, "grad_norm": 1.1837464570999146, "learning_rate": 1.6132178617126016e-06, "loss": 1.0175, "step": 48995 }, { "epoch": 1.8864292589027911, "grad_norm": 2.5681560039520264, "learning_rate": 1.607812587678792e-06, "loss": 0.9884, "step": 49000 }, { "epoch": 1.8866217516843118, "grad_norm": 0.915593683719635, "learning_rate": 1.602416310958732e-06, "loss": 0.677, "step": 49005 }, { "epoch": 1.8868142444658327, "grad_norm": 1.0929181575775146, "learning_rate": 1.5970290320458715e-06, "loss": 0.7596, "step": 49010 }, { "epoch": 1.8870067372473533, "grad_norm": 1.0402964353561401, "learning_rate": 1.5916507514328494e-06, "loss": 0.6634, "step": 49015 }, { "epoch": 1.887199230028874, "grad_norm": 1.7485926151275635, "learning_rate": 1.5862814696114836e-06, "loss": 0.7347, "step": 49020 }, { "epoch": 1.8873917228103947, "grad_norm": 2.60465669631958, "learning_rate": 1.5809211870727259e-06, "loss": 0.9895, "step": 49025 }, { "epoch": 1.8875842155919154, "grad_norm": 1.0043087005615234, "learning_rate": 1.5755699043067728e-06, "loss": 0.7079, "step": 49030 }, { "epoch": 1.887776708373436, "grad_norm": 0.876469075679779, "learning_rate": 1.5702276218029444e-06, "loss": 0.6647, "step": 49035 }, { "epoch": 1.8879692011549567, "grad_norm": 1.5398415327072144, "learning_rate": 1.5648943400497495e-06, "loss": 0.8207, "step": 49040 }, { "epoch": 1.8881616939364774, "grad_norm": 0.8192068934440613, "learning_rate": 1.5595700595349093e-06, "loss": 0.8108, "step": 49045 }, { "epoch": 1.888354186717998, "grad_norm": 2.2673983573913574, "learning_rate": 1.554254780745279e-06, "loss": 0.8681, "step": 49050 }, { "epoch": 1.8885466794995187, "grad_norm": 1.8705366849899292, "learning_rate": 1.5489485041669026e-06, "loss": 0.7649, "step": 49055 }, { "epoch": 1.8887391722810394, "grad_norm": 1.0904033184051514, "learning_rate": 1.5436512302850148e-06, "loss": 0.7311, "step": 49060 }, { "epoch": 1.88893166506256, "grad_norm": 1.5426052808761597, "learning_rate": 1.5383629595839944e-06, "loss": 0.8157, "step": 49065 }, { "epoch": 1.8891241578440807, "grad_norm": 1.1552188396453857, "learning_rate": 1.5330836925474434e-06, "loss": 0.8184, "step": 49070 }, { "epoch": 1.8893166506256014, "grad_norm": 1.0506844520568848, "learning_rate": 1.5278134296580981e-06, "loss": 0.7162, "step": 49075 }, { "epoch": 1.8895091434071223, "grad_norm": 1.1629083156585693, "learning_rate": 1.5225521713979063e-06, "loss": 0.8053, "step": 49080 }, { "epoch": 1.889701636188643, "grad_norm": 1.4255765676498413, "learning_rate": 1.5172999182479496e-06, "loss": 0.7563, "step": 49085 }, { "epoch": 1.8898941289701636, "grad_norm": 1.1012247800827026, "learning_rate": 1.512056670688533e-06, "loss": 0.7282, "step": 49090 }, { "epoch": 1.8900866217516843, "grad_norm": 1.0495306253433228, "learning_rate": 1.5068224291991174e-06, "loss": 0.9035, "step": 49095 }, { "epoch": 1.890279114533205, "grad_norm": 1.2971009016036987, "learning_rate": 1.5015971942583196e-06, "loss": 0.7806, "step": 49100 }, { "epoch": 1.8904716073147259, "grad_norm": 1.4070019721984863, "learning_rate": 1.4963809663439908e-06, "loss": 0.7514, "step": 49105 }, { "epoch": 1.8906641000962465, "grad_norm": 1.4686723947525024, "learning_rate": 1.4911737459330722e-06, "loss": 0.8591, "step": 49110 }, { "epoch": 1.8908565928777672, "grad_norm": 1.885849952697754, "learning_rate": 1.48597553350176e-06, "loss": 0.771, "step": 49115 }, { "epoch": 1.8910490856592879, "grad_norm": 1.6949317455291748, "learning_rate": 1.4807863295253965e-06, "loss": 0.9033, "step": 49120 }, { "epoch": 1.8912415784408085, "grad_norm": 1.8095852136611938, "learning_rate": 1.4756061344784912e-06, "loss": 0.7384, "step": 49125 }, { "epoch": 1.8914340712223292, "grad_norm": 1.4184739589691162, "learning_rate": 1.4704349488347313e-06, "loss": 0.8234, "step": 49130 }, { "epoch": 1.8916265640038499, "grad_norm": 1.2031188011169434, "learning_rate": 1.4652727730670058e-06, "loss": 0.6538, "step": 49135 }, { "epoch": 1.8918190567853705, "grad_norm": 0.8883029222488403, "learning_rate": 1.4601196076473478e-06, "loss": 0.778, "step": 49140 }, { "epoch": 1.8920115495668912, "grad_norm": 1.1456712484359741, "learning_rate": 1.4549754530469805e-06, "loss": 0.8069, "step": 49145 }, { "epoch": 1.8922040423484119, "grad_norm": 1.2302826642990112, "learning_rate": 1.4498403097363167e-06, "loss": 0.6927, "step": 49150 }, { "epoch": 1.8923965351299326, "grad_norm": 1.2416324615478516, "learning_rate": 1.4457386835319498e-06, "loss": 0.8545, "step": 49155 }, { "epoch": 1.8925890279114532, "grad_norm": 1.3639918565750122, "learning_rate": 1.4406197617255034e-06, "loss": 0.9177, "step": 49160 }, { "epoch": 1.892781520692974, "grad_norm": 1.4873005151748657, "learning_rate": 1.4355098525214883e-06, "loss": 0.7928, "step": 49165 }, { "epoch": 1.8929740134744946, "grad_norm": 0.8439772725105286, "learning_rate": 1.4304089563871525e-06, "loss": 0.9517, "step": 49170 }, { "epoch": 1.8931665062560152, "grad_norm": 1.5179928541183472, "learning_rate": 1.4253170737889787e-06, "loss": 0.8324, "step": 49175 }, { "epoch": 1.8933589990375361, "grad_norm": 2.050947904586792, "learning_rate": 1.4202342051925387e-06, "loss": 0.8277, "step": 49180 }, { "epoch": 1.8935514918190568, "grad_norm": 1.4222488403320312, "learning_rate": 1.415160351062661e-06, "loss": 0.8468, "step": 49185 }, { "epoch": 1.8937439846005775, "grad_norm": 1.9091302156448364, "learning_rate": 1.4100955118632964e-06, "loss": 0.8484, "step": 49190 }, { "epoch": 1.8939364773820981, "grad_norm": 1.2363388538360596, "learning_rate": 1.4050396880576189e-06, "loss": 0.7329, "step": 49195 }, { "epoch": 1.894128970163619, "grad_norm": 1.5010732412338257, "learning_rate": 1.3999928801079033e-06, "loss": 0.7513, "step": 49200 }, { "epoch": 1.8943214629451397, "grad_norm": 0.9450666904449463, "learning_rate": 1.3949550884756913e-06, "loss": 0.7275, "step": 49205 }, { "epoch": 1.8945139557266604, "grad_norm": 1.6268372535705566, "learning_rate": 1.3899263136216367e-06, "loss": 0.773, "step": 49210 }, { "epoch": 1.894706448508181, "grad_norm": 1.2723169326782227, "learning_rate": 1.384906556005583e-06, "loss": 0.7464, "step": 49215 }, { "epoch": 1.8948989412897017, "grad_norm": 1.1981759071350098, "learning_rate": 1.3798958160865517e-06, "loss": 0.8826, "step": 49220 }, { "epoch": 1.8950914340712224, "grad_norm": 1.8858412504196167, "learning_rate": 1.374894094322765e-06, "loss": 0.7508, "step": 49225 }, { "epoch": 1.895283926852743, "grad_norm": 1.544263482093811, "learning_rate": 1.3699013911715685e-06, "loss": 0.7827, "step": 49230 }, { "epoch": 1.8954764196342637, "grad_norm": 1.369147777557373, "learning_rate": 1.3649177070895414e-06, "loss": 0.8175, "step": 49235 }, { "epoch": 1.8956689124157844, "grad_norm": 1.682911992073059, "learning_rate": 1.3599430425323856e-06, "loss": 0.6453, "step": 49240 }, { "epoch": 1.895861405197305, "grad_norm": 1.029921293258667, "learning_rate": 1.354977397955004e-06, "loss": 0.7134, "step": 49245 }, { "epoch": 1.8960538979788257, "grad_norm": 1.0889908075332642, "learning_rate": 1.3500207738114777e-06, "loss": 0.7324, "step": 49250 }, { "epoch": 1.8962463907603464, "grad_norm": 1.1377679109573364, "learning_rate": 1.3450731705550557e-06, "loss": 0.7995, "step": 49255 }, { "epoch": 1.896438883541867, "grad_norm": 1.6833134889602661, "learning_rate": 1.3401345886381644e-06, "loss": 0.6787, "step": 49260 }, { "epoch": 1.8966313763233877, "grad_norm": 1.080384373664856, "learning_rate": 1.3352050285123986e-06, "loss": 0.7251, "step": 49265 }, { "epoch": 1.8968238691049084, "grad_norm": 1.5266867876052856, "learning_rate": 1.3302844906285417e-06, "loss": 0.8563, "step": 49270 }, { "epoch": 1.8970163618864293, "grad_norm": 1.8439712524414062, "learning_rate": 1.325372975436545e-06, "loss": 0.7046, "step": 49275 }, { "epoch": 1.89720885466795, "grad_norm": 1.3293750286102295, "learning_rate": 1.3204704833855275e-06, "loss": 0.8011, "step": 49280 }, { "epoch": 1.8974013474494706, "grad_norm": 0.9964110255241394, "learning_rate": 1.3155770149237856e-06, "loss": 0.7107, "step": 49285 }, { "epoch": 1.8975938402309913, "grad_norm": 1.0830775499343872, "learning_rate": 1.310692570498806e-06, "loss": 0.9186, "step": 49290 }, { "epoch": 1.897786333012512, "grad_norm": 2.34660267829895, "learning_rate": 1.3058171505572424e-06, "loss": 0.8195, "step": 49295 }, { "epoch": 1.8979788257940329, "grad_norm": 1.1595919132232666, "learning_rate": 1.300950755544894e-06, "loss": 0.8621, "step": 49300 }, { "epoch": 1.8981713185755535, "grad_norm": 1.1527278423309326, "learning_rate": 1.2960933859067937e-06, "loss": 0.7031, "step": 49305 }, { "epoch": 1.8983638113570742, "grad_norm": 1.7028868198394775, "learning_rate": 1.291245042087097e-06, "loss": 0.8185, "step": 49310 }, { "epoch": 1.8985563041385949, "grad_norm": 1.3725398778915405, "learning_rate": 1.2864057245291384e-06, "loss": 0.7387, "step": 49315 }, { "epoch": 1.8987487969201156, "grad_norm": 0.7600288987159729, "learning_rate": 1.2815754336754748e-06, "loss": 0.7169, "step": 49320 }, { "epoch": 1.8989412897016362, "grad_norm": 1.943352460861206, "learning_rate": 1.2767541699677865e-06, "loss": 0.8994, "step": 49325 }, { "epoch": 1.899133782483157, "grad_norm": 1.6857982873916626, "learning_rate": 1.271941933846943e-06, "loss": 0.8606, "step": 49330 }, { "epoch": 1.8993262752646776, "grad_norm": 1.2063673734664917, "learning_rate": 1.2671387257530033e-06, "loss": 0.8712, "step": 49335 }, { "epoch": 1.8995187680461982, "grad_norm": 1.1214938163757324, "learning_rate": 1.2623445461251826e-06, "loss": 0.8094, "step": 49340 }, { "epoch": 1.899711260827719, "grad_norm": 1.7402042150497437, "learning_rate": 1.257559395401875e-06, "loss": 0.7574, "step": 49345 }, { "epoch": 1.8999037536092396, "grad_norm": 2.6430463790893555, "learning_rate": 1.2527832740206413e-06, "loss": 0.89, "step": 49350 }, { "epoch": 1.9000962463907602, "grad_norm": 1.625260829925537, "learning_rate": 1.2480161824182435e-06, "loss": 0.8766, "step": 49355 }, { "epoch": 1.900288739172281, "grad_norm": 1.6484109163284302, "learning_rate": 1.2432581210305883e-06, "loss": 0.6991, "step": 49360 }, { "epoch": 1.9004812319538016, "grad_norm": 1.4141255617141724, "learning_rate": 1.2385090902927943e-06, "loss": 0.8387, "step": 49365 }, { "epoch": 1.9006737247353225, "grad_norm": 2.3338122367858887, "learning_rate": 1.233769090639092e-06, "loss": 0.9757, "step": 49370 }, { "epoch": 1.9008662175168431, "grad_norm": 1.6034566164016724, "learning_rate": 1.2290381225029345e-06, "loss": 0.8093, "step": 49375 }, { "epoch": 1.9010587102983638, "grad_norm": 1.138555645942688, "learning_rate": 1.224316186316954e-06, "loss": 0.7738, "step": 49380 }, { "epoch": 1.9012512030798845, "grad_norm": 1.7570353746414185, "learning_rate": 1.2196032825129377e-06, "loss": 0.8496, "step": 49385 }, { "epoch": 1.9014436958614052, "grad_norm": 0.8414403796195984, "learning_rate": 1.2148994115218194e-06, "loss": 0.8599, "step": 49390 }, { "epoch": 1.901636188642926, "grad_norm": 1.4490456581115723, "learning_rate": 1.2102045737737655e-06, "loss": 0.7314, "step": 49395 }, { "epoch": 1.9018286814244467, "grad_norm": 1.2178351879119873, "learning_rate": 1.2055187696980885e-06, "loss": 0.8655, "step": 49400 }, { "epoch": 1.9020211742059674, "grad_norm": 1.1484450101852417, "learning_rate": 1.2008419997232567e-06, "loss": 0.7352, "step": 49405 }, { "epoch": 1.902213666987488, "grad_norm": 2.422426462173462, "learning_rate": 1.1961742642769502e-06, "loss": 0.8654, "step": 49410 }, { "epoch": 1.9024061597690087, "grad_norm": 1.6864680051803589, "learning_rate": 1.1915155637859942e-06, "loss": 0.7319, "step": 49415 }, { "epoch": 1.9025986525505294, "grad_norm": 1.0465269088745117, "learning_rate": 1.1868658986763704e-06, "loss": 0.6556, "step": 49420 }, { "epoch": 1.90279114533205, "grad_norm": 1.5697258710861206, "learning_rate": 1.182225269373305e-06, "loss": 0.8959, "step": 49425 }, { "epoch": 1.9029836381135707, "grad_norm": 1.0091673135757446, "learning_rate": 1.1775936763011252e-06, "loss": 0.8843, "step": 49430 }, { "epoch": 1.9031761308950914, "grad_norm": 1.5295612812042236, "learning_rate": 1.1729711198833592e-06, "loss": 0.7747, "step": 49435 }, { "epoch": 1.903368623676612, "grad_norm": 1.655747413635254, "learning_rate": 1.1683576005427243e-06, "loss": 0.7618, "step": 49440 }, { "epoch": 1.9035611164581328, "grad_norm": 1.3846567869186401, "learning_rate": 1.163753118701083e-06, "loss": 0.7765, "step": 49445 }, { "epoch": 1.9037536092396534, "grad_norm": 1.6685348749160767, "learning_rate": 1.1591576747794874e-06, "loss": 0.7638, "step": 49450 }, { "epoch": 1.903946102021174, "grad_norm": 1.5088547468185425, "learning_rate": 1.154571269198168e-06, "loss": 0.7608, "step": 49455 }, { "epoch": 1.9041385948026948, "grad_norm": 2.2280006408691406, "learning_rate": 1.1499939023765116e-06, "loss": 0.8825, "step": 49460 }, { "epoch": 1.9043310875842154, "grad_norm": 1.4215261936187744, "learning_rate": 1.1454255747330834e-06, "loss": 0.953, "step": 49465 }, { "epoch": 1.9045235803657363, "grad_norm": 1.827169418334961, "learning_rate": 1.1408662866856379e-06, "loss": 0.7853, "step": 49470 }, { "epoch": 1.904716073147257, "grad_norm": 1.3283016681671143, "learning_rate": 1.1363160386510975e-06, "loss": 0.6103, "step": 49475 }, { "epoch": 1.9049085659287777, "grad_norm": 1.2778635025024414, "learning_rate": 1.13177483104554e-06, "loss": 0.7057, "step": 49480 }, { "epoch": 1.9051010587102983, "grad_norm": 1.315544605255127, "learning_rate": 1.1272426642842337e-06, "loss": 0.7023, "step": 49485 }, { "epoch": 1.905293551491819, "grad_norm": 1.4004102945327759, "learning_rate": 1.1227195387816136e-06, "loss": 0.6889, "step": 49490 }, { "epoch": 1.90548604427334, "grad_norm": 2.1009254455566406, "learning_rate": 1.118205454951271e-06, "loss": 0.8759, "step": 49495 }, { "epoch": 1.9056785370548606, "grad_norm": 1.2553610801696777, "learning_rate": 1.1137004132060314e-06, "loss": 0.7435, "step": 49500 }, { "epoch": 1.9058710298363812, "grad_norm": 0.8491818308830261, "learning_rate": 1.1092044139578205e-06, "loss": 0.6341, "step": 49505 }, { "epoch": 1.906063522617902, "grad_norm": 1.680105447769165, "learning_rate": 1.1047174576177654e-06, "loss": 0.8919, "step": 49510 }, { "epoch": 1.9062560153994226, "grad_norm": 1.8832811117172241, "learning_rate": 1.1002395445961932e-06, "loss": 0.7662, "step": 49515 }, { "epoch": 1.9064485081809432, "grad_norm": 1.2031275033950806, "learning_rate": 1.0957706753025432e-06, "loss": 0.7988, "step": 49520 }, { "epoch": 1.906641000962464, "grad_norm": 2.169948101043701, "learning_rate": 1.0913108501454993e-06, "loss": 0.8608, "step": 49525 }, { "epoch": 1.9068334937439846, "grad_norm": 1.2307795286178589, "learning_rate": 1.086860069532869e-06, "loss": 0.7195, "step": 49530 }, { "epoch": 1.9070259865255053, "grad_norm": 1.0904724597930908, "learning_rate": 1.0824183338716377e-06, "loss": 0.7586, "step": 49535 }, { "epoch": 1.907218479307026, "grad_norm": 2.3190550804138184, "learning_rate": 1.0779856435679913e-06, "loss": 0.9954, "step": 49540 }, { "epoch": 1.9074109720885466, "grad_norm": 1.5595754384994507, "learning_rate": 1.0735619990272506e-06, "loss": 0.7631, "step": 49545 }, { "epoch": 1.9076034648700673, "grad_norm": 1.5422192811965942, "learning_rate": 1.069147400653936e-06, "loss": 0.7709, "step": 49550 }, { "epoch": 1.907795957651588, "grad_norm": 1.085741639137268, "learning_rate": 1.0647418488517358e-06, "loss": 0.6323, "step": 49555 }, { "epoch": 1.9079884504331086, "grad_norm": 1.0943596363067627, "learning_rate": 1.0603453440235168e-06, "loss": 0.7902, "step": 49560 }, { "epoch": 1.9081809432146295, "grad_norm": 0.9542067646980286, "learning_rate": 1.0559578865712905e-06, "loss": 0.8609, "step": 49565 }, { "epoch": 1.9083734359961502, "grad_norm": 2.1979501247406006, "learning_rate": 1.0515794768962806e-06, "loss": 0.7887, "step": 49570 }, { "epoch": 1.9085659287776708, "grad_norm": 1.0749517679214478, "learning_rate": 1.0472101153988446e-06, "loss": 0.8218, "step": 49575 }, { "epoch": 1.9087584215591915, "grad_norm": 1.4718987941741943, "learning_rate": 1.0428498024785404e-06, "loss": 0.8026, "step": 49580 }, { "epoch": 1.9089509143407122, "grad_norm": 1.4935468435287476, "learning_rate": 1.038498538534105e-06, "loss": 0.7906, "step": 49585 }, { "epoch": 1.909143407122233, "grad_norm": 1.2869309186935425, "learning_rate": 1.034156323963409e-06, "loss": 0.7009, "step": 49590 }, { "epoch": 1.9093358999037537, "grad_norm": 1.7238849401474, "learning_rate": 1.0298231591635232e-06, "loss": 0.6669, "step": 49595 }, { "epoch": 1.9095283926852744, "grad_norm": 1.0271257162094116, "learning_rate": 1.0254990445306978e-06, "loss": 0.893, "step": 49600 }, { "epoch": 1.909720885466795, "grad_norm": 1.4019641876220703, "learning_rate": 1.0211839804603385e-06, "loss": 0.7958, "step": 49605 }, { "epoch": 1.9099133782483158, "grad_norm": 2.0241641998291016, "learning_rate": 1.0168779673470296e-06, "loss": 0.9105, "step": 49610 }, { "epoch": 1.9101058710298364, "grad_norm": 1.333714485168457, "learning_rate": 1.0125810055845231e-06, "loss": 0.7325, "step": 49615 }, { "epoch": 1.910298363811357, "grad_norm": 1.3105881214141846, "learning_rate": 1.00829309556576e-06, "loss": 0.8457, "step": 49620 }, { "epoch": 1.9104908565928778, "grad_norm": 1.3533077239990234, "learning_rate": 1.0040142376828266e-06, "loss": 0.7181, "step": 49625 }, { "epoch": 1.9106833493743984, "grad_norm": 1.5712995529174805, "learning_rate": 9.9974443232701e-07, "loss": 0.9438, "step": 49630 }, { "epoch": 1.910875842155919, "grad_norm": 1.0359176397323608, "learning_rate": 9.954836798887424e-07, "loss": 0.7692, "step": 49635 }, { "epoch": 1.9110683349374398, "grad_norm": 1.355884313583374, "learning_rate": 9.912319807576452e-07, "loss": 0.856, "step": 49640 }, { "epoch": 1.9112608277189604, "grad_norm": 1.1907180547714233, "learning_rate": 9.86989335322519e-07, "loss": 0.7598, "step": 49645 }, { "epoch": 1.9114533205004811, "grad_norm": 1.5118666887283325, "learning_rate": 9.827557439713086e-07, "loss": 0.9634, "step": 49650 }, { "epoch": 1.9116458132820018, "grad_norm": 1.2580753564834595, "learning_rate": 9.7853120709116e-07, "loss": 0.7613, "step": 49655 }, { "epoch": 1.9118383060635225, "grad_norm": 1.043454885482788, "learning_rate": 9.743157250683644e-07, "loss": 0.7669, "step": 49660 }, { "epoch": 1.9120307988450433, "grad_norm": 1.603769302368164, "learning_rate": 9.701092982884351e-07, "loss": 0.8249, "step": 49665 }, { "epoch": 1.912223291626564, "grad_norm": 1.0182993412017822, "learning_rate": 9.659119271359762e-07, "loss": 0.7097, "step": 49670 }, { "epoch": 1.9124157844080847, "grad_norm": 1.6166123151779175, "learning_rate": 9.617236119948358e-07, "loss": 0.6695, "step": 49675 }, { "epoch": 1.9126082771896054, "grad_norm": 1.0179489850997925, "learning_rate": 9.575443532480076e-07, "loss": 0.7893, "step": 49680 }, { "epoch": 1.9128007699711262, "grad_norm": 1.4863917827606201, "learning_rate": 9.533741512776417e-07, "loss": 0.7844, "step": 49685 }, { "epoch": 1.912993262752647, "grad_norm": 2.095845937728882, "learning_rate": 9.492130064650995e-07, "loss": 0.7687, "step": 49690 }, { "epoch": 1.9131857555341676, "grad_norm": 0.9106748104095459, "learning_rate": 9.450609191908766e-07, "loss": 0.8065, "step": 49695 }, { "epoch": 1.9133782483156883, "grad_norm": 0.5192959308624268, "learning_rate": 9.40917889834636e-07, "loss": 0.8384, "step": 49700 }, { "epoch": 1.913570741097209, "grad_norm": 0.8835710287094116, "learning_rate": 9.367839187752636e-07, "loss": 0.8304, "step": 49705 }, { "epoch": 1.9137632338787296, "grad_norm": 1.3475233316421509, "learning_rate": 9.326590063907681e-07, "loss": 0.8051, "step": 49710 }, { "epoch": 1.9139557266602503, "grad_norm": 1.0911017656326294, "learning_rate": 9.285431530583366e-07, "loss": 0.6483, "step": 49715 }, { "epoch": 1.914148219441771, "grad_norm": 1.183143973350525, "learning_rate": 9.244363591543459e-07, "loss": 0.9217, "step": 49720 }, { "epoch": 1.9143407122232916, "grad_norm": 1.7121336460113525, "learning_rate": 9.2033862505434e-07, "loss": 0.7959, "step": 49725 }, { "epoch": 1.9145332050048123, "grad_norm": 1.2456363439559937, "learning_rate": 9.162499511330192e-07, "loss": 0.7527, "step": 49730 }, { "epoch": 1.914725697786333, "grad_norm": 2.138845205307007, "learning_rate": 9.121703377642732e-07, "loss": 0.9166, "step": 49735 }, { "epoch": 1.9149181905678536, "grad_norm": 1.7342472076416016, "learning_rate": 9.080997853211592e-07, "loss": 0.7697, "step": 49740 }, { "epoch": 1.9151106833493743, "grad_norm": 1.9589825868606567, "learning_rate": 9.040382941758907e-07, "loss": 0.86, "step": 49745 }, { "epoch": 1.915303176130895, "grad_norm": 0.9614568948745728, "learning_rate": 8.999858646998704e-07, "loss": 0.8351, "step": 49750 }, { "epoch": 1.9154956689124156, "grad_norm": 2.190661668777466, "learning_rate": 8.959424972636576e-07, "loss": 0.8423, "step": 49755 }, { "epoch": 1.9156881616939365, "grad_norm": 1.1599905490875244, "learning_rate": 8.91908192237012e-07, "loss": 0.9698, "step": 49760 }, { "epoch": 1.9158806544754572, "grad_norm": 0.9257279634475708, "learning_rate": 8.878829499888164e-07, "loss": 0.7918, "step": 49765 }, { "epoch": 1.9160731472569779, "grad_norm": 1.8219150304794312, "learning_rate": 8.838667708871873e-07, "loss": 0.7945, "step": 49770 }, { "epoch": 1.9162656400384985, "grad_norm": 1.6939747333526611, "learning_rate": 8.798596552993421e-07, "loss": 0.8348, "step": 49775 }, { "epoch": 1.9164581328200192, "grad_norm": 1.0439796447753906, "learning_rate": 8.758616035917211e-07, "loss": 0.8166, "step": 49780 }, { "epoch": 1.91665062560154, "grad_norm": 1.6584985256195068, "learning_rate": 8.718726161299206e-07, "loss": 0.7026, "step": 49785 }, { "epoch": 1.9168431183830608, "grad_norm": 1.5818768739700317, "learning_rate": 8.678926932787157e-07, "loss": 0.7577, "step": 49790 }, { "epoch": 1.9170356111645814, "grad_norm": 1.8825992345809937, "learning_rate": 8.639218354020151e-07, "loss": 0.6868, "step": 49795 }, { "epoch": 1.917228103946102, "grad_norm": 1.7052052021026611, "learning_rate": 8.599600428629617e-07, "loss": 0.705, "step": 49800 }, { "epoch": 1.9174205967276228, "grad_norm": 1.2492605447769165, "learning_rate": 8.560073160238213e-07, "loss": 0.7604, "step": 49805 }, { "epoch": 1.9176130895091434, "grad_norm": 1.3016527891159058, "learning_rate": 8.520636552460381e-07, "loss": 0.681, "step": 49810 }, { "epoch": 1.9178055822906641, "grad_norm": 1.7560960054397583, "learning_rate": 8.481290608902348e-07, "loss": 0.8405, "step": 49815 }, { "epoch": 1.9179980750721848, "grad_norm": 1.0738979578018188, "learning_rate": 8.442035333162013e-07, "loss": 0.9153, "step": 49820 }, { "epoch": 1.9181905678537055, "grad_norm": 2.123007297515869, "learning_rate": 8.402870728829282e-07, "loss": 0.8122, "step": 49825 }, { "epoch": 1.9183830606352261, "grad_norm": 1.9177762269973755, "learning_rate": 8.363796799485069e-07, "loss": 0.9144, "step": 49830 }, { "epoch": 1.9185755534167468, "grad_norm": 1.4648776054382324, "learning_rate": 8.324813548702847e-07, "loss": 0.8814, "step": 49835 }, { "epoch": 1.9187680461982675, "grad_norm": 1.2725647687911987, "learning_rate": 8.285920980047102e-07, "loss": 0.8659, "step": 49840 }, { "epoch": 1.9189605389797881, "grad_norm": 0.6527199149131775, "learning_rate": 8.247119097074319e-07, "loss": 0.7217, "step": 49845 }, { "epoch": 1.9191530317613088, "grad_norm": 1.6857417821884155, "learning_rate": 8.208407903332771e-07, "loss": 0.8141, "step": 49850 }, { "epoch": 1.9193455245428297, "grad_norm": 0.8140450716018677, "learning_rate": 8.169787402362406e-07, "loss": 0.7364, "step": 49855 }, { "epoch": 1.9195380173243504, "grad_norm": 1.138654112815857, "learning_rate": 8.131257597694508e-07, "loss": 0.9553, "step": 49860 }, { "epoch": 1.919730510105871, "grad_norm": 1.369598150253296, "learning_rate": 8.092818492852705e-07, "loss": 0.7387, "step": 49865 }, { "epoch": 1.9199230028873917, "grad_norm": 1.1333906650543213, "learning_rate": 8.054470091351851e-07, "loss": 0.7376, "step": 49870 }, { "epoch": 1.9201154956689124, "grad_norm": 1.7450077533721924, "learning_rate": 8.016212396698697e-07, "loss": 0.9635, "step": 49875 }, { "epoch": 1.9203079884504333, "grad_norm": 0.9076559543609619, "learning_rate": 7.978045412391555e-07, "loss": 0.7919, "step": 49880 }, { "epoch": 1.920500481231954, "grad_norm": 1.402205228805542, "learning_rate": 7.939969141920744e-07, "loss": 0.8091, "step": 49885 }, { "epoch": 1.9206929740134746, "grad_norm": 1.2806485891342163, "learning_rate": 7.901983588767814e-07, "loss": 0.7798, "step": 49890 }, { "epoch": 1.9208854667949953, "grad_norm": 2.2308027744293213, "learning_rate": 7.864088756406429e-07, "loss": 0.829, "step": 49895 }, { "epoch": 1.921077959576516, "grad_norm": 1.4024001359939575, "learning_rate": 7.826284648301929e-07, "loss": 0.7619, "step": 49900 }, { "epoch": 1.9212704523580366, "grad_norm": 1.3569003343582153, "learning_rate": 7.788571267911104e-07, "loss": 0.7127, "step": 49905 }, { "epoch": 1.9214629451395573, "grad_norm": 2.439872980117798, "learning_rate": 7.75094861868264e-07, "loss": 0.9349, "step": 49910 }, { "epoch": 1.921655437921078, "grad_norm": 1.2316006422042847, "learning_rate": 7.713416704056897e-07, "loss": 0.8283, "step": 49915 }, { "epoch": 1.9218479307025986, "grad_norm": 1.0408631563186646, "learning_rate": 7.675975527465906e-07, "loss": 0.765, "step": 49920 }, { "epoch": 1.9220404234841193, "grad_norm": 1.4583711624145508, "learning_rate": 7.638625092333373e-07, "loss": 0.8184, "step": 49925 }, { "epoch": 1.92223291626564, "grad_norm": 2.3234689235687256, "learning_rate": 7.601365402074789e-07, "loss": 0.8176, "step": 49930 }, { "epoch": 1.9224254090471606, "grad_norm": 1.4219454526901245, "learning_rate": 7.564196460097316e-07, "loss": 0.6931, "step": 49935 }, { "epoch": 1.9226179018286813, "grad_norm": 1.5232850313186646, "learning_rate": 7.527118269799793e-07, "loss": 0.8985, "step": 49940 }, { "epoch": 1.922810394610202, "grad_norm": 1.4048689603805542, "learning_rate": 7.490130834572728e-07, "loss": 0.9178, "step": 49945 }, { "epoch": 1.9230028873917226, "grad_norm": 0.891716718673706, "learning_rate": 7.453234157798416e-07, "loss": 0.9839, "step": 49950 }, { "epoch": 1.9231953801732435, "grad_norm": 1.1254078149795532, "learning_rate": 7.416428242850937e-07, "loss": 0.928, "step": 49955 }, { "epoch": 1.9233878729547642, "grad_norm": 1.3273284435272217, "learning_rate": 7.379713093095708e-07, "loss": 0.7957, "step": 49960 }, { "epoch": 1.9235803657362849, "grad_norm": 1.1084377765655518, "learning_rate": 7.343088711890267e-07, "loss": 0.9596, "step": 49965 }, { "epoch": 1.9237728585178056, "grad_norm": 1.641728162765503, "learning_rate": 7.306555102583601e-07, "loss": 0.8442, "step": 49970 }, { "epoch": 1.9239653512993262, "grad_norm": 1.5067929029464722, "learning_rate": 7.270112268516483e-07, "loss": 0.7893, "step": 49975 }, { "epoch": 1.9241578440808471, "grad_norm": 0.8102709054946899, "learning_rate": 7.233760213021357e-07, "loss": 0.7961, "step": 49980 }, { "epoch": 1.9243503368623678, "grad_norm": 2.6088552474975586, "learning_rate": 7.197498939422343e-07, "loss": 0.8979, "step": 49985 }, { "epoch": 1.9245428296438885, "grad_norm": 1.865166187286377, "learning_rate": 7.161328451035454e-07, "loss": 0.8434, "step": 49990 }, { "epoch": 1.9247353224254091, "grad_norm": 1.1561760902404785, "learning_rate": 7.125248751167934e-07, "loss": 0.7265, "step": 49995 }, { "epoch": 1.9249278152069298, "grad_norm": 1.1306687593460083, "learning_rate": 7.089259843119478e-07, "loss": 0.9502, "step": 50000 } ], "logging_steps": 5, "max_steps": 51950, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 10000, "total_flos": 1.5612551596802458e+18, "train_batch_size": 2, "trial_name": null, "trial_params": null }