{ "best_metric": null, "best_model_checkpoint": null, "epoch": 5.374257307497089, "eval_steps": 1000, "global_step": 180000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 2.9856985041650495e-05, "grad_norm": 8.064935684204102, "learning_rate": 0.0, "loss": 3.4849, "step": 1 }, { "epoch": 0.0014928492520825246, "grad_norm": 3.3433420658111572, "learning_rate": 0.00019998742849959144, "loss": 1.9038, "step": 50 }, { "epoch": 0.0029856985041650493, "grad_norm": 2.907883405685425, "learning_rate": 0.0001999731427036726, "loss": 1.608, "step": 100 }, { "epoch": 0.004478547756247574, "grad_norm": 3.2614288330078125, "learning_rate": 0.00019995885690775376, "loss": 1.5505, "step": 150 }, { "epoch": 0.005971397008330099, "grad_norm": 3.8400654792785645, "learning_rate": 0.00019994457111183493, "loss": 1.5737, "step": 200 }, { "epoch": 0.007464246260412624, "grad_norm": 3.3358442783355713, "learning_rate": 0.00019993028531591612, "loss": 1.567, "step": 250 }, { "epoch": 0.008957095512495149, "grad_norm": 2.131911277770996, "learning_rate": 0.00019991599951999726, "loss": 1.5208, "step": 300 }, { "epoch": 0.010449944764577673, "grad_norm": 3.180992364883423, "learning_rate": 0.00019990171372407845, "loss": 1.5586, "step": 350 }, { "epoch": 0.011942794016660197, "grad_norm": 3.024989128112793, "learning_rate": 0.0001998874279281596, "loss": 1.5267, "step": 400 }, { "epoch": 0.013435643268742723, "grad_norm": 3.4935102462768555, "learning_rate": 0.00019987314213224078, "loss": 1.5609, "step": 450 }, { "epoch": 0.014928492520825247, "grad_norm": 3.565504550933838, "learning_rate": 0.00019985885633632194, "loss": 1.5112, "step": 500 }, { "epoch": 0.01642134177290777, "grad_norm": 2.0692882537841797, "learning_rate": 0.00019984457054040308, "loss": 1.5087, "step": 550 }, { "epoch": 0.017914191024990297, "grad_norm": 4.110323905944824, "learning_rate": 0.00019983028474448427, "loss": 1.5073, "step": 600 }, { "epoch": 0.01940704027707282, "grad_norm": 2.8632736206054688, "learning_rate": 0.0001998159989485654, "loss": 1.472, "step": 650 }, { "epoch": 0.020899889529155346, "grad_norm": 2.629347324371338, "learning_rate": 0.0001998017131526466, "loss": 1.5236, "step": 700 }, { "epoch": 0.022392738781237872, "grad_norm": 3.696873188018799, "learning_rate": 0.00019978742735672774, "loss": 1.5216, "step": 750 }, { "epoch": 0.023885588033320394, "grad_norm": 3.253305435180664, "learning_rate": 0.00019977314156080893, "loss": 1.5915, "step": 800 }, { "epoch": 0.02537843728540292, "grad_norm": 2.9587886333465576, "learning_rate": 0.0001997588557648901, "loss": 1.4862, "step": 850 }, { "epoch": 0.026871286537485446, "grad_norm": 3.3311073780059814, "learning_rate": 0.00019974456996897126, "loss": 1.4703, "step": 900 }, { "epoch": 0.02836413578956797, "grad_norm": 2.5960264205932617, "learning_rate": 0.00019973028417305243, "loss": 1.4818, "step": 950 }, { "epoch": 0.029856985041650495, "grad_norm": 3.3142144680023193, "learning_rate": 0.0001997159983771336, "loss": 1.4746, "step": 1000 }, { "epoch": 0.03134983429373302, "grad_norm": 3.5049827098846436, "learning_rate": 0.00019970171258121476, "loss": 1.442, "step": 1050 }, { "epoch": 0.03284268354581554, "grad_norm": 3.0218605995178223, "learning_rate": 0.00019968742678529592, "loss": 1.5265, "step": 1100 }, { "epoch": 0.03433553279789807, "grad_norm": 2.936182975769043, "learning_rate": 0.00019967314098937709, "loss": 1.5174, "step": 1150 }, { "epoch": 0.035828382049980595, "grad_norm": 2.877253293991089, "learning_rate": 0.00019965885519345825, "loss": 1.4499, "step": 1200 }, { "epoch": 0.03732123130206312, "grad_norm": 6.07016658782959, "learning_rate": 0.00019964456939753941, "loss": 1.4542, "step": 1250 }, { "epoch": 0.03881408055414564, "grad_norm": 2.1618189811706543, "learning_rate": 0.0001996302836016206, "loss": 1.4343, "step": 1300 }, { "epoch": 0.040306929806228166, "grad_norm": 2.8267719745635986, "learning_rate": 0.00019961599780570174, "loss": 1.463, "step": 1350 }, { "epoch": 0.04179977905831069, "grad_norm": 2.6036462783813477, "learning_rate": 0.00019960171200978294, "loss": 1.4557, "step": 1400 }, { "epoch": 0.04329262831039322, "grad_norm": 3.0187127590179443, "learning_rate": 0.00019958742621386407, "loss": 1.4472, "step": 1450 }, { "epoch": 0.044785477562475744, "grad_norm": 3.9822633266448975, "learning_rate": 0.00019957314041794526, "loss": 1.4384, "step": 1500 }, { "epoch": 0.04627832681455826, "grad_norm": 2.919654607772827, "learning_rate": 0.0001995588546220264, "loss": 1.4969, "step": 1550 }, { "epoch": 0.04777117606664079, "grad_norm": 2.922963857650757, "learning_rate": 0.0001995445688261076, "loss": 1.4987, "step": 1600 }, { "epoch": 0.049264025318723315, "grad_norm": 2.9638512134552, "learning_rate": 0.00019953028303018876, "loss": 1.4722, "step": 1650 }, { "epoch": 0.05075687457080584, "grad_norm": 3.408391237258911, "learning_rate": 0.00019951599723426992, "loss": 1.4723, "step": 1700 }, { "epoch": 0.052249723822888366, "grad_norm": 3.023597240447998, "learning_rate": 0.0001995017114383511, "loss": 1.4478, "step": 1750 }, { "epoch": 0.05374257307497089, "grad_norm": 2.1655213832855225, "learning_rate": 0.00019948742564243225, "loss": 1.4071, "step": 1800 }, { "epoch": 0.05523542232705341, "grad_norm": 3.796663999557495, "learning_rate": 0.00019947313984651342, "loss": 1.446, "step": 1850 }, { "epoch": 0.05672827157913594, "grad_norm": 3.0415594577789307, "learning_rate": 0.00019945885405059458, "loss": 1.4324, "step": 1900 }, { "epoch": 0.05822112083121846, "grad_norm": 2.524627685546875, "learning_rate": 0.00019944456825467575, "loss": 1.3963, "step": 1950 }, { "epoch": 0.05971397008330099, "grad_norm": 3.2881991863250732, "learning_rate": 0.0001994302824587569, "loss": 1.4522, "step": 2000 }, { "epoch": 0.061206819335383515, "grad_norm": 3.392430067062378, "learning_rate": 0.00019941599666283808, "loss": 1.4329, "step": 2050 }, { "epoch": 0.06269966858746603, "grad_norm": 3.9426393508911133, "learning_rate": 0.00019940171086691927, "loss": 1.5203, "step": 2100 }, { "epoch": 0.06419251783954856, "grad_norm": 3.3737235069274902, "learning_rate": 0.0001993874250710004, "loss": 1.3674, "step": 2150 }, { "epoch": 0.06568536709163109, "grad_norm": 3.783085346221924, "learning_rate": 0.0001993731392750816, "loss": 1.4339, "step": 2200 }, { "epoch": 0.06717821634371361, "grad_norm": 3.4819202423095703, "learning_rate": 0.00019935885347916273, "loss": 1.4436, "step": 2250 }, { "epoch": 0.06867106559579614, "grad_norm": 3.141775608062744, "learning_rate": 0.00019934456768324393, "loss": 1.4683, "step": 2300 }, { "epoch": 0.07016391484787866, "grad_norm": 3.2881035804748535, "learning_rate": 0.00019933028188732506, "loss": 1.4395, "step": 2350 }, { "epoch": 0.07165676409996119, "grad_norm": 3.718122959136963, "learning_rate": 0.00019931599609140626, "loss": 1.4396, "step": 2400 }, { "epoch": 0.07314961335204372, "grad_norm": 4.3829474449157715, "learning_rate": 0.00019930171029548742, "loss": 1.4477, "step": 2450 }, { "epoch": 0.07464246260412624, "grad_norm": 3.3698525428771973, "learning_rate": 0.00019928742449956858, "loss": 1.3529, "step": 2500 }, { "epoch": 0.07613531185620875, "grad_norm": 3.7569565773010254, "learning_rate": 0.00019927313870364975, "loss": 1.4246, "step": 2550 }, { "epoch": 0.07762816110829128, "grad_norm": 3.1486406326293945, "learning_rate": 0.00019925885290773091, "loss": 1.3813, "step": 2600 }, { "epoch": 0.0791210103603738, "grad_norm": 4.0635480880737305, "learning_rate": 0.00019924456711181208, "loss": 1.4259, "step": 2650 }, { "epoch": 0.08061385961245633, "grad_norm": 3.2710611820220947, "learning_rate": 0.00019923028131589324, "loss": 1.3747, "step": 2700 }, { "epoch": 0.08210670886453886, "grad_norm": 3.4968345165252686, "learning_rate": 0.0001992159955199744, "loss": 1.4721, "step": 2750 }, { "epoch": 0.08359955811662138, "grad_norm": 4.274214267730713, "learning_rate": 0.00019920170972405557, "loss": 1.437, "step": 2800 }, { "epoch": 0.08509240736870391, "grad_norm": 2.970602512359619, "learning_rate": 0.00019918742392813674, "loss": 1.3336, "step": 2850 }, { "epoch": 0.08658525662078644, "grad_norm": 4.143342971801758, "learning_rate": 0.00019917313813221793, "loss": 1.4264, "step": 2900 }, { "epoch": 0.08807810587286896, "grad_norm": 3.7546920776367188, "learning_rate": 0.00019915885233629907, "loss": 1.441, "step": 2950 }, { "epoch": 0.08957095512495149, "grad_norm": 3.9160516262054443, "learning_rate": 0.00019914456654038026, "loss": 1.4261, "step": 3000 }, { "epoch": 0.09106380437703401, "grad_norm": 3.842073917388916, "learning_rate": 0.0001991302807444614, "loss": 1.4076, "step": 3050 }, { "epoch": 0.09255665362911653, "grad_norm": 4.392395496368408, "learning_rate": 0.0001991159949485426, "loss": 1.3789, "step": 3100 }, { "epoch": 0.09404950288119905, "grad_norm": 3.822425603866577, "learning_rate": 0.00019910170915262373, "loss": 1.3877, "step": 3150 }, { "epoch": 0.09554235213328158, "grad_norm": 3.1348562240600586, "learning_rate": 0.0001990874233567049, "loss": 1.4081, "step": 3200 }, { "epoch": 0.0970352013853641, "grad_norm": 3.453887939453125, "learning_rate": 0.00019907313756078608, "loss": 1.4143, "step": 3250 }, { "epoch": 0.09852805063744663, "grad_norm": 3.5057384967803955, "learning_rate": 0.00019905885176486722, "loss": 1.4264, "step": 3300 }, { "epoch": 0.10002089988952916, "grad_norm": 3.145796060562134, "learning_rate": 0.0001990445659689484, "loss": 1.4368, "step": 3350 }, { "epoch": 0.10151374914161168, "grad_norm": 3.4077043533325195, "learning_rate": 0.00019903028017302955, "loss": 1.388, "step": 3400 }, { "epoch": 0.10300659839369421, "grad_norm": 3.65567946434021, "learning_rate": 0.00019901599437711074, "loss": 1.42, "step": 3450 }, { "epoch": 0.10449944764577673, "grad_norm": 4.460702419281006, "learning_rate": 0.0001990017085811919, "loss": 1.3991, "step": 3500 }, { "epoch": 0.10599229689785926, "grad_norm": 4.155653476715088, "learning_rate": 0.00019898742278527307, "loss": 1.371, "step": 3550 }, { "epoch": 0.10748514614994178, "grad_norm": 3.8904318809509277, "learning_rate": 0.00019897313698935423, "loss": 1.4378, "step": 3600 }, { "epoch": 0.1089779954020243, "grad_norm": 4.0509233474731445, "learning_rate": 0.0001989588511934354, "loss": 1.3945, "step": 3650 }, { "epoch": 0.11047084465410682, "grad_norm": 3.785123109817505, "learning_rate": 0.00019894456539751656, "loss": 1.436, "step": 3700 }, { "epoch": 0.11196369390618935, "grad_norm": 3.4556167125701904, "learning_rate": 0.00019893027960159773, "loss": 1.3794, "step": 3750 }, { "epoch": 0.11345654315827187, "grad_norm": 4.0479559898376465, "learning_rate": 0.0001989159938056789, "loss": 1.4734, "step": 3800 }, { "epoch": 0.1149493924103544, "grad_norm": 3.890805721282959, "learning_rate": 0.00019890170800976006, "loss": 1.4341, "step": 3850 }, { "epoch": 0.11644224166243693, "grad_norm": 3.8178727626800537, "learning_rate": 0.00019888742221384122, "loss": 1.4754, "step": 3900 }, { "epoch": 0.11793509091451945, "grad_norm": 2.456165075302124, "learning_rate": 0.00019887313641792241, "loss": 1.3887, "step": 3950 }, { "epoch": 0.11942794016660198, "grad_norm": 3.5763051509857178, "learning_rate": 0.00019885885062200355, "loss": 1.3901, "step": 4000 }, { "epoch": 0.1209207894186845, "grad_norm": 3.885662317276001, "learning_rate": 0.00019884456482608474, "loss": 1.3856, "step": 4050 }, { "epoch": 0.12241363867076703, "grad_norm": 3.6095409393310547, "learning_rate": 0.00019883027903016588, "loss": 1.448, "step": 4100 }, { "epoch": 0.12390648792284956, "grad_norm": 3.7112534046173096, "learning_rate": 0.00019881599323424707, "loss": 1.3537, "step": 4150 }, { "epoch": 0.12539933717493207, "grad_norm": 3.3566672801971436, "learning_rate": 0.0001988017074383282, "loss": 1.4389, "step": 4200 }, { "epoch": 0.1268921864270146, "grad_norm": 4.570401191711426, "learning_rate": 0.0001987874216424094, "loss": 1.4191, "step": 4250 }, { "epoch": 0.12838503567909712, "grad_norm": 4.455029010772705, "learning_rate": 0.00019877313584649057, "loss": 1.3677, "step": 4300 }, { "epoch": 0.12987788493117966, "grad_norm": 3.0861828327178955, "learning_rate": 0.00019875885005057173, "loss": 1.3677, "step": 4350 }, { "epoch": 0.13137073418326217, "grad_norm": 4.419896602630615, "learning_rate": 0.0001987445642546529, "loss": 1.4524, "step": 4400 }, { "epoch": 0.1328635834353447, "grad_norm": 5.187576770782471, "learning_rate": 0.00019873027845873406, "loss": 1.3868, "step": 4450 }, { "epoch": 0.13435643268742722, "grad_norm": 5.111696243286133, "learning_rate": 0.00019871599266281523, "loss": 1.4458, "step": 4500 }, { "epoch": 0.13584928193950974, "grad_norm": 3.2652997970581055, "learning_rate": 0.0001987017068668964, "loss": 1.4529, "step": 4550 }, { "epoch": 0.13734213119159228, "grad_norm": 4.190273761749268, "learning_rate": 0.00019868742107097755, "loss": 1.3991, "step": 4600 }, { "epoch": 0.1388349804436748, "grad_norm": 4.85620641708374, "learning_rate": 0.00019867313527505872, "loss": 1.3916, "step": 4650 }, { "epoch": 0.14032782969575733, "grad_norm": 3.030954360961914, "learning_rate": 0.00019865884947913988, "loss": 1.3805, "step": 4700 }, { "epoch": 0.14182067894783984, "grad_norm": 3.264406681060791, "learning_rate": 0.00019864456368322108, "loss": 1.4048, "step": 4750 }, { "epoch": 0.14331352819992238, "grad_norm": 3.2138588428497314, "learning_rate": 0.0001986302778873022, "loss": 1.4092, "step": 4800 }, { "epoch": 0.1448063774520049, "grad_norm": 3.847222328186035, "learning_rate": 0.0001986159920913834, "loss": 1.3871, "step": 4850 }, { "epoch": 0.14629922670408743, "grad_norm": 4.004987716674805, "learning_rate": 0.00019860170629546454, "loss": 1.3845, "step": 4900 }, { "epoch": 0.14779207595616994, "grad_norm": 3.5088725090026855, "learning_rate": 0.00019858742049954573, "loss": 1.379, "step": 4950 }, { "epoch": 0.14928492520825248, "grad_norm": 3.275099277496338, "learning_rate": 0.00019857313470362687, "loss": 1.3628, "step": 5000 }, { "epoch": 0.150777774460335, "grad_norm": 3.7903060913085938, "learning_rate": 0.00019855884890770806, "loss": 1.3804, "step": 5050 }, { "epoch": 0.1522706237124175, "grad_norm": 4.294798374176025, "learning_rate": 0.00019854456311178923, "loss": 1.3988, "step": 5100 }, { "epoch": 0.15376347296450005, "grad_norm": 3.2719295024871826, "learning_rate": 0.0001985302773158704, "loss": 1.387, "step": 5150 }, { "epoch": 0.15525632221658256, "grad_norm": 4.143224239349365, "learning_rate": 0.00019851599151995156, "loss": 1.393, "step": 5200 }, { "epoch": 0.1567491714686651, "grad_norm": 3.404754638671875, "learning_rate": 0.00019850170572403272, "loss": 1.4205, "step": 5250 }, { "epoch": 0.1582420207207476, "grad_norm": 3.607126474380493, "learning_rate": 0.0001984874199281139, "loss": 1.4239, "step": 5300 }, { "epoch": 0.15973486997283015, "grad_norm": 4.140823841094971, "learning_rate": 0.00019847313413219505, "loss": 1.4204, "step": 5350 }, { "epoch": 0.16122771922491266, "grad_norm": 3.893251419067383, "learning_rate": 0.00019845884833627622, "loss": 1.392, "step": 5400 }, { "epoch": 0.1627205684769952, "grad_norm": 4.304211139678955, "learning_rate": 0.00019844456254035738, "loss": 1.44, "step": 5450 }, { "epoch": 0.16421341772907772, "grad_norm": 5.273501873016357, "learning_rate": 0.00019843027674443855, "loss": 1.445, "step": 5500 }, { "epoch": 0.16570626698116026, "grad_norm": 4.787700176239014, "learning_rate": 0.00019841599094851974, "loss": 1.3668, "step": 5550 }, { "epoch": 0.16719911623324277, "grad_norm": 3.7984108924865723, "learning_rate": 0.00019840170515260087, "loss": 1.3523, "step": 5600 }, { "epoch": 0.16869196548532528, "grad_norm": 3.885608673095703, "learning_rate": 0.00019838741935668207, "loss": 1.3917, "step": 5650 }, { "epoch": 0.17018481473740782, "grad_norm": 3.459803342819214, "learning_rate": 0.0001983731335607632, "loss": 1.3833, "step": 5700 }, { "epoch": 0.17167766398949033, "grad_norm": 3.7103006839752197, "learning_rate": 0.0001983588477648444, "loss": 1.4473, "step": 5750 }, { "epoch": 0.17317051324157287, "grad_norm": 6.645928382873535, "learning_rate": 0.00019834456196892553, "loss": 1.3706, "step": 5800 }, { "epoch": 0.17466336249365538, "grad_norm": 3.7201037406921387, "learning_rate": 0.0001983302761730067, "loss": 1.3733, "step": 5850 }, { "epoch": 0.17615621174573792, "grad_norm": 4.050106048583984, "learning_rate": 0.0001983159903770879, "loss": 1.4096, "step": 5900 }, { "epoch": 0.17764906099782043, "grad_norm": 4.190842628479004, "learning_rate": 0.00019830170458116903, "loss": 1.4404, "step": 5950 }, { "epoch": 0.17914191024990297, "grad_norm": 4.393162727355957, "learning_rate": 0.00019828741878525022, "loss": 1.4443, "step": 6000 }, { "epoch": 0.1806347595019855, "grad_norm": 3.597520351409912, "learning_rate": 0.00019827313298933136, "loss": 1.4063, "step": 6050 }, { "epoch": 0.18212760875406803, "grad_norm": 3.608085870742798, "learning_rate": 0.00019825884719341255, "loss": 1.3857, "step": 6100 }, { "epoch": 0.18362045800615054, "grad_norm": 3.7055492401123047, "learning_rate": 0.0001982445613974937, "loss": 1.3997, "step": 6150 }, { "epoch": 0.18511330725823305, "grad_norm": 3.875457763671875, "learning_rate": 0.00019823027560157488, "loss": 1.4296, "step": 6200 }, { "epoch": 0.1866061565103156, "grad_norm": 5.074592590332031, "learning_rate": 0.00019821598980565604, "loss": 1.3785, "step": 6250 }, { "epoch": 0.1880990057623981, "grad_norm": 6.013392448425293, "learning_rate": 0.0001982017040097372, "loss": 1.4391, "step": 6300 }, { "epoch": 0.18959185501448064, "grad_norm": 5.679958820343018, "learning_rate": 0.00019818741821381837, "loss": 1.367, "step": 6350 }, { "epoch": 0.19108470426656315, "grad_norm": 3.6182546615600586, "learning_rate": 0.00019817313241789954, "loss": 1.4508, "step": 6400 }, { "epoch": 0.1925775535186457, "grad_norm": 5.209213733673096, "learning_rate": 0.0001981588466219807, "loss": 1.3878, "step": 6450 }, { "epoch": 0.1940704027707282, "grad_norm": 3.0043230056762695, "learning_rate": 0.00019814456082606187, "loss": 1.4248, "step": 6500 }, { "epoch": 0.19556325202281075, "grad_norm": 3.157851219177246, "learning_rate": 0.00019813027503014303, "loss": 1.3725, "step": 6550 }, { "epoch": 0.19705610127489326, "grad_norm": 3.5292418003082275, "learning_rate": 0.0001981159892342242, "loss": 1.3932, "step": 6600 }, { "epoch": 0.1985489505269758, "grad_norm": 3.2819600105285645, "learning_rate": 0.00019810170343830536, "loss": 1.3495, "step": 6650 }, { "epoch": 0.2000417997790583, "grad_norm": 3.0243399143218994, "learning_rate": 0.00019808741764238655, "loss": 1.3689, "step": 6700 }, { "epoch": 0.20153464903114082, "grad_norm": 3.4495368003845215, "learning_rate": 0.0001980731318464677, "loss": 1.3725, "step": 6750 }, { "epoch": 0.20302749828322336, "grad_norm": 3.538259744644165, "learning_rate": 0.00019805884605054888, "loss": 1.3905, "step": 6800 }, { "epoch": 0.20452034753530587, "grad_norm": 4.162181377410889, "learning_rate": 0.00019804456025463002, "loss": 1.4129, "step": 6850 }, { "epoch": 0.20601319678738841, "grad_norm": 4.592432022094727, "learning_rate": 0.0001980302744587112, "loss": 1.3634, "step": 6900 }, { "epoch": 0.20750604603947093, "grad_norm": 3.45967960357666, "learning_rate": 0.00019801598866279237, "loss": 1.416, "step": 6950 }, { "epoch": 0.20899889529155347, "grad_norm": 4.221930503845215, "learning_rate": 0.00019800170286687354, "loss": 1.4051, "step": 7000 }, { "epoch": 0.21049174454363598, "grad_norm": 4.144239902496338, "learning_rate": 0.0001979874170709547, "loss": 1.4219, "step": 7050 }, { "epoch": 0.21198459379571852, "grad_norm": 4.7492570877075195, "learning_rate": 0.00019797313127503587, "loss": 1.4028, "step": 7100 }, { "epoch": 0.21347744304780103, "grad_norm": 3.5841355323791504, "learning_rate": 0.00019795884547911703, "loss": 1.4361, "step": 7150 }, { "epoch": 0.21497029229988357, "grad_norm": 4.662593364715576, "learning_rate": 0.0001979445596831982, "loss": 1.3816, "step": 7200 }, { "epoch": 0.21646314155196608, "grad_norm": 4.700701713562012, "learning_rate": 0.00019793027388727936, "loss": 1.4226, "step": 7250 }, { "epoch": 0.2179559908040486, "grad_norm": 4.025181293487549, "learning_rate": 0.00019791598809136053, "loss": 1.4291, "step": 7300 }, { "epoch": 0.21944884005613113, "grad_norm": 3.064573049545288, "learning_rate": 0.0001979017022954417, "loss": 1.4293, "step": 7350 }, { "epoch": 0.22094168930821365, "grad_norm": 6.342152118682861, "learning_rate": 0.00019788741649952288, "loss": 1.4173, "step": 7400 }, { "epoch": 0.22243453856029619, "grad_norm": 5.89996337890625, "learning_rate": 0.00019787313070360402, "loss": 1.396, "step": 7450 }, { "epoch": 0.2239273878123787, "grad_norm": 4.462945938110352, "learning_rate": 0.0001978588449076852, "loss": 1.3868, "step": 7500 }, { "epoch": 0.22542023706446124, "grad_norm": 3.6449055671691895, "learning_rate": 0.00019784455911176635, "loss": 1.396, "step": 7550 }, { "epoch": 0.22691308631654375, "grad_norm": 4.674243927001953, "learning_rate": 0.00019783027331584754, "loss": 1.395, "step": 7600 }, { "epoch": 0.2284059355686263, "grad_norm": 3.6160385608673096, "learning_rate": 0.00019781598751992868, "loss": 1.3918, "step": 7650 }, { "epoch": 0.2298987848207088, "grad_norm": 4.326193332672119, "learning_rate": 0.00019780170172400987, "loss": 1.3947, "step": 7700 }, { "epoch": 0.23139163407279134, "grad_norm": 5.4003777503967285, "learning_rate": 0.00019778741592809104, "loss": 1.344, "step": 7750 }, { "epoch": 0.23288448332487385, "grad_norm": 4.711580753326416, "learning_rate": 0.0001977731301321722, "loss": 1.3959, "step": 7800 }, { "epoch": 0.23437733257695637, "grad_norm": 3.4752814769744873, "learning_rate": 0.00019775884433625337, "loss": 1.3722, "step": 7850 }, { "epoch": 0.2358701818290389, "grad_norm": 4.028527736663818, "learning_rate": 0.00019774455854033453, "loss": 1.3683, "step": 7900 }, { "epoch": 0.23736303108112142, "grad_norm": 4.094334602355957, "learning_rate": 0.0001977302727444157, "loss": 1.3607, "step": 7950 }, { "epoch": 0.23885588033320396, "grad_norm": 5.232580661773682, "learning_rate": 0.00019771598694849686, "loss": 1.4354, "step": 8000 }, { "epoch": 0.24034872958528647, "grad_norm": 4.269852161407471, "learning_rate": 0.00019770170115257802, "loss": 1.4372, "step": 8050 }, { "epoch": 0.241841578837369, "grad_norm": 3.312541961669922, "learning_rate": 0.0001976874153566592, "loss": 1.3826, "step": 8100 }, { "epoch": 0.24333442808945152, "grad_norm": 3.8900692462921143, "learning_rate": 0.00019767312956074035, "loss": 1.4189, "step": 8150 }, { "epoch": 0.24482727734153406, "grad_norm": 3.894512414932251, "learning_rate": 0.00019765884376482155, "loss": 1.3365, "step": 8200 }, { "epoch": 0.24632012659361657, "grad_norm": 4.644411563873291, "learning_rate": 0.00019764455796890268, "loss": 1.4311, "step": 8250 }, { "epoch": 0.2478129758456991, "grad_norm": 8.174029350280762, "learning_rate": 0.00019763027217298387, "loss": 1.361, "step": 8300 }, { "epoch": 0.24930582509778162, "grad_norm": 4.615732192993164, "learning_rate": 0.000197615986377065, "loss": 1.4552, "step": 8350 }, { "epoch": 0.25079867434986414, "grad_norm": 4.421249866485596, "learning_rate": 0.0001976017005811462, "loss": 1.3463, "step": 8400 }, { "epoch": 0.2522915236019467, "grad_norm": 2.8386716842651367, "learning_rate": 0.00019758741478522734, "loss": 1.348, "step": 8450 }, { "epoch": 0.2537843728540292, "grad_norm": 4.3141703605651855, "learning_rate": 0.0001975731289893085, "loss": 1.4306, "step": 8500 }, { "epoch": 0.2552772221061117, "grad_norm": 3.947331428527832, "learning_rate": 0.0001975588431933897, "loss": 1.3823, "step": 8550 }, { "epoch": 0.25677007135819424, "grad_norm": 3.2268636226654053, "learning_rate": 0.00019754455739747084, "loss": 1.4199, "step": 8600 }, { "epoch": 0.2582629206102768, "grad_norm": 4.0353102684021, "learning_rate": 0.00019753027160155203, "loss": 1.3927, "step": 8650 }, { "epoch": 0.2597557698623593, "grad_norm": 3.490560293197632, "learning_rate": 0.00019751598580563316, "loss": 1.401, "step": 8700 }, { "epoch": 0.2612486191144418, "grad_norm": 5.577207088470459, "learning_rate": 0.00019750170000971436, "loss": 1.3586, "step": 8750 }, { "epoch": 0.26274146836652434, "grad_norm": 4.168467998504639, "learning_rate": 0.0001974874142137955, "loss": 1.3303, "step": 8800 }, { "epoch": 0.2642343176186069, "grad_norm": 3.812627077102661, "learning_rate": 0.00019747312841787669, "loss": 1.3717, "step": 8850 }, { "epoch": 0.2657271668706894, "grad_norm": 4.875237464904785, "learning_rate": 0.00019745884262195785, "loss": 1.3873, "step": 8900 }, { "epoch": 0.2672200161227719, "grad_norm": 4.048189163208008, "learning_rate": 0.00019744455682603902, "loss": 1.3775, "step": 8950 }, { "epoch": 0.26871286537485445, "grad_norm": 3.9090261459350586, "learning_rate": 0.00019743027103012018, "loss": 1.4296, "step": 9000 }, { "epoch": 0.270205714626937, "grad_norm": 2.8476953506469727, "learning_rate": 0.00019741598523420134, "loss": 1.4175, "step": 9050 }, { "epoch": 0.2716985638790195, "grad_norm": 5.782102584838867, "learning_rate": 0.0001974016994382825, "loss": 1.3835, "step": 9100 }, { "epoch": 0.273191413131102, "grad_norm": 4.640264987945557, "learning_rate": 0.00019738741364236367, "loss": 1.4524, "step": 9150 }, { "epoch": 0.27468426238318455, "grad_norm": 4.81790828704834, "learning_rate": 0.00019737312784644484, "loss": 1.3183, "step": 9200 }, { "epoch": 0.2761771116352671, "grad_norm": 2.685009717941284, "learning_rate": 0.000197358842050526, "loss": 1.3243, "step": 9250 }, { "epoch": 0.2776699608873496, "grad_norm": 5.321321487426758, "learning_rate": 0.00019734455625460717, "loss": 1.4086, "step": 9300 }, { "epoch": 0.2791628101394321, "grad_norm": 3.065791368484497, "learning_rate": 0.00019733027045868836, "loss": 1.337, "step": 9350 }, { "epoch": 0.28065565939151466, "grad_norm": 4.3569817543029785, "learning_rate": 0.0001973159846627695, "loss": 1.4082, "step": 9400 }, { "epoch": 0.2821485086435972, "grad_norm": 4.67582368850708, "learning_rate": 0.0001973016988668507, "loss": 1.3832, "step": 9450 }, { "epoch": 0.2836413578956797, "grad_norm": 4.942144870758057, "learning_rate": 0.00019728741307093183, "loss": 1.3734, "step": 9500 }, { "epoch": 0.2851342071477622, "grad_norm": 4.853246688842773, "learning_rate": 0.00019727312727501302, "loss": 1.4111, "step": 9550 }, { "epoch": 0.28662705639984476, "grad_norm": 3.071237325668335, "learning_rate": 0.00019725884147909418, "loss": 1.3746, "step": 9600 }, { "epoch": 0.28811990565192724, "grad_norm": 4.844615459442139, "learning_rate": 0.00019724455568317535, "loss": 1.3051, "step": 9650 }, { "epoch": 0.2896127549040098, "grad_norm": 5.954223155975342, "learning_rate": 0.0001972302698872565, "loss": 1.4131, "step": 9700 }, { "epoch": 0.2911056041560923, "grad_norm": 3.6717801094055176, "learning_rate": 0.00019721598409133768, "loss": 1.4166, "step": 9750 }, { "epoch": 0.29259845340817486, "grad_norm": 3.6257095336914062, "learning_rate": 0.00019720169829541884, "loss": 1.3679, "step": 9800 }, { "epoch": 0.29409130266025735, "grad_norm": 4.245635032653809, "learning_rate": 0.0001971874124995, "loss": 1.3171, "step": 9850 }, { "epoch": 0.2955841519123399, "grad_norm": 5.362602710723877, "learning_rate": 0.00019717312670358117, "loss": 1.3932, "step": 9900 }, { "epoch": 0.2970770011644224, "grad_norm": 4.6283721923828125, "learning_rate": 0.00019715884090766234, "loss": 1.3757, "step": 9950 }, { "epoch": 0.29856985041650497, "grad_norm": 4.299574851989746, "learning_rate": 0.0001971445551117435, "loss": 1.4018, "step": 10000 }, { "epoch": 0.30006269966858745, "grad_norm": 4.7913641929626465, "learning_rate": 0.0001971302693158247, "loss": 1.4228, "step": 10050 }, { "epoch": 0.30155554892067, "grad_norm": 5.312823295593262, "learning_rate": 0.00019711598351990583, "loss": 1.3866, "step": 10100 }, { "epoch": 0.30304839817275253, "grad_norm": 4.099662780761719, "learning_rate": 0.00019710169772398702, "loss": 1.454, "step": 10150 }, { "epoch": 0.304541247424835, "grad_norm": 4.254878520965576, "learning_rate": 0.00019708741192806816, "loss": 1.3526, "step": 10200 }, { "epoch": 0.30603409667691756, "grad_norm": 4.056606292724609, "learning_rate": 0.00019707312613214935, "loss": 1.4167, "step": 10250 }, { "epoch": 0.3075269459290001, "grad_norm": 3.790809154510498, "learning_rate": 0.0001970588403362305, "loss": 1.3536, "step": 10300 }, { "epoch": 0.30901979518108263, "grad_norm": 4.46298360824585, "learning_rate": 0.00019704455454031168, "loss": 1.3613, "step": 10350 }, { "epoch": 0.3105126444331651, "grad_norm": 4.52452278137207, "learning_rate": 0.00019703026874439284, "loss": 1.4591, "step": 10400 }, { "epoch": 0.31200549368524766, "grad_norm": 4.735177040100098, "learning_rate": 0.000197015982948474, "loss": 1.4617, "step": 10450 }, { "epoch": 0.3134983429373302, "grad_norm": 4.48261833190918, "learning_rate": 0.00019700169715255517, "loss": 1.4072, "step": 10500 }, { "epoch": 0.31499119218941274, "grad_norm": 3.3441503047943115, "learning_rate": 0.00019698741135663634, "loss": 1.4082, "step": 10550 }, { "epoch": 0.3164840414414952, "grad_norm": 3.9771218299865723, "learning_rate": 0.0001969731255607175, "loss": 1.4184, "step": 10600 }, { "epoch": 0.31797689069357776, "grad_norm": 6.366194725036621, "learning_rate": 0.00019695883976479867, "loss": 1.387, "step": 10650 }, { "epoch": 0.3194697399456603, "grad_norm": 5.072678089141846, "learning_rate": 0.00019694455396887983, "loss": 1.3996, "step": 10700 }, { "epoch": 0.3209625891977428, "grad_norm": 3.7204978466033936, "learning_rate": 0.000196930268172961, "loss": 1.3774, "step": 10750 }, { "epoch": 0.3224554384498253, "grad_norm": 4.47731351852417, "learning_rate": 0.00019691598237704216, "loss": 1.3552, "step": 10800 }, { "epoch": 0.32394828770190787, "grad_norm": 3.4569220542907715, "learning_rate": 0.00019690169658112335, "loss": 1.3794, "step": 10850 }, { "epoch": 0.3254411369539904, "grad_norm": 4.344145774841309, "learning_rate": 0.0001968874107852045, "loss": 1.4419, "step": 10900 }, { "epoch": 0.3269339862060729, "grad_norm": 4.089848041534424, "learning_rate": 0.00019687312498928568, "loss": 1.4116, "step": 10950 }, { "epoch": 0.32842683545815543, "grad_norm": 3.995945930480957, "learning_rate": 0.00019685883919336682, "loss": 1.3532, "step": 11000 }, { "epoch": 0.32991968471023797, "grad_norm": 3.8309378623962402, "learning_rate": 0.00019684455339744798, "loss": 1.3957, "step": 11050 }, { "epoch": 0.3314125339623205, "grad_norm": 4.386235237121582, "learning_rate": 0.00019683026760152915, "loss": 1.3616, "step": 11100 }, { "epoch": 0.332905383214403, "grad_norm": 5.133239269256592, "learning_rate": 0.00019681598180561031, "loss": 1.3959, "step": 11150 }, { "epoch": 0.33439823246648553, "grad_norm": 4.216183662414551, "learning_rate": 0.0001968016960096915, "loss": 1.3446, "step": 11200 }, { "epoch": 0.3358910817185681, "grad_norm": 3.631131172180176, "learning_rate": 0.00019678741021377264, "loss": 1.3779, "step": 11250 }, { "epoch": 0.33738393097065056, "grad_norm": 4.603448390960693, "learning_rate": 0.00019677312441785384, "loss": 1.3962, "step": 11300 }, { "epoch": 0.3388767802227331, "grad_norm": 3.6482913494110107, "learning_rate": 0.00019675883862193497, "loss": 1.3931, "step": 11350 }, { "epoch": 0.34036962947481564, "grad_norm": 5.040388107299805, "learning_rate": 0.00019674455282601616, "loss": 1.3465, "step": 11400 }, { "epoch": 0.3418624787268982, "grad_norm": 5.762825012207031, "learning_rate": 0.0001967302670300973, "loss": 1.3577, "step": 11450 }, { "epoch": 0.34335532797898066, "grad_norm": 4.941501617431641, "learning_rate": 0.0001967159812341785, "loss": 1.3676, "step": 11500 }, { "epoch": 0.3448481772310632, "grad_norm": 5.368370532989502, "learning_rate": 0.00019670169543825966, "loss": 1.4265, "step": 11550 }, { "epoch": 0.34634102648314574, "grad_norm": 4.931522369384766, "learning_rate": 0.00019668740964234082, "loss": 1.3551, "step": 11600 }, { "epoch": 0.3478338757352283, "grad_norm": 3.9685990810394287, "learning_rate": 0.000196673123846422, "loss": 1.402, "step": 11650 }, { "epoch": 0.34932672498731077, "grad_norm": 5.771200656890869, "learning_rate": 0.00019665883805050315, "loss": 1.3596, "step": 11700 }, { "epoch": 0.3508195742393933, "grad_norm": 5.142852306365967, "learning_rate": 0.00019664455225458432, "loss": 1.385, "step": 11750 }, { "epoch": 0.35231242349147585, "grad_norm": 3.295628786087036, "learning_rate": 0.00019663026645866548, "loss": 1.3454, "step": 11800 }, { "epoch": 0.35380527274355833, "grad_norm": 4.34658145904541, "learning_rate": 0.00019661598066274665, "loss": 1.3976, "step": 11850 }, { "epoch": 0.35529812199564087, "grad_norm": 4.032591819763184, "learning_rate": 0.0001966016948668278, "loss": 1.3571, "step": 11900 }, { "epoch": 0.3567909712477234, "grad_norm": 3.9286158084869385, "learning_rate": 0.00019658740907090898, "loss": 1.398, "step": 11950 }, { "epoch": 0.35828382049980595, "grad_norm": 5.184597492218018, "learning_rate": 0.00019657312327499017, "loss": 1.3827, "step": 12000 }, { "epoch": 0.35977666975188843, "grad_norm": 4.4749226570129395, "learning_rate": 0.0001965588374790713, "loss": 1.373, "step": 12050 }, { "epoch": 0.361269519003971, "grad_norm": 3.5633764266967773, "learning_rate": 0.0001965445516831525, "loss": 1.3704, "step": 12100 }, { "epoch": 0.3627623682560535, "grad_norm": 7.570897102355957, "learning_rate": 0.00019653026588723363, "loss": 1.3515, "step": 12150 }, { "epoch": 0.36425521750813605, "grad_norm": 4.239411354064941, "learning_rate": 0.00019651598009131483, "loss": 1.3813, "step": 12200 }, { "epoch": 0.36574806676021854, "grad_norm": 3.8941049575805664, "learning_rate": 0.00019650169429539596, "loss": 1.3954, "step": 12250 }, { "epoch": 0.3672409160123011, "grad_norm": 4.8694586753845215, "learning_rate": 0.00019648740849947716, "loss": 1.3531, "step": 12300 }, { "epoch": 0.3687337652643836, "grad_norm": 3.914964437484741, "learning_rate": 0.00019647312270355832, "loss": 1.3954, "step": 12350 }, { "epoch": 0.3702266145164661, "grad_norm": 3.4050538539886475, "learning_rate": 0.00019645883690763948, "loss": 1.3498, "step": 12400 }, { "epoch": 0.37171946376854864, "grad_norm": 4.436797618865967, "learning_rate": 0.00019644455111172065, "loss": 1.3889, "step": 12450 }, { "epoch": 0.3732123130206312, "grad_norm": 2.7660670280456543, "learning_rate": 0.00019643026531580181, "loss": 1.3392, "step": 12500 }, { "epoch": 0.3747051622727137, "grad_norm": 5.364072799682617, "learning_rate": 0.00019641597951988298, "loss": 1.3157, "step": 12550 }, { "epoch": 0.3761980115247962, "grad_norm": 5.123339653015137, "learning_rate": 0.00019640169372396414, "loss": 1.3523, "step": 12600 }, { "epoch": 0.37769086077687875, "grad_norm": 3.4495439529418945, "learning_rate": 0.0001963874079280453, "loss": 1.3331, "step": 12650 }, { "epoch": 0.3791837100289613, "grad_norm": 4.613680362701416, "learning_rate": 0.00019637312213212647, "loss": 1.3707, "step": 12700 }, { "epoch": 0.3806765592810438, "grad_norm": 3.5819404125213623, "learning_rate": 0.00019635883633620764, "loss": 1.4023, "step": 12750 }, { "epoch": 0.3821694085331263, "grad_norm": 3.5075576305389404, "learning_rate": 0.00019634455054028883, "loss": 1.3815, "step": 12800 }, { "epoch": 0.38366225778520885, "grad_norm": 4.425256729125977, "learning_rate": 0.00019633026474436997, "loss": 1.3801, "step": 12850 }, { "epoch": 0.3851551070372914, "grad_norm": 3.5711112022399902, "learning_rate": 0.00019631597894845116, "loss": 1.3375, "step": 12900 }, { "epoch": 0.3866479562893739, "grad_norm": 5.728016376495361, "learning_rate": 0.0001963016931525323, "loss": 1.3914, "step": 12950 }, { "epoch": 0.3881408055414564, "grad_norm": 4.294504642486572, "learning_rate": 0.0001962874073566135, "loss": 1.4561, "step": 13000 }, { "epoch": 0.38963365479353895, "grad_norm": 5.329941749572754, "learning_rate": 0.00019627312156069465, "loss": 1.4018, "step": 13050 }, { "epoch": 0.3911265040456215, "grad_norm": 4.166362762451172, "learning_rate": 0.00019625883576477582, "loss": 1.4334, "step": 13100 }, { "epoch": 0.392619353297704, "grad_norm": 3.3922691345214844, "learning_rate": 0.00019624454996885698, "loss": 1.3956, "step": 13150 }, { "epoch": 0.3941122025497865, "grad_norm": 4.340898036956787, "learning_rate": 0.00019623026417293815, "loss": 1.3643, "step": 13200 }, { "epoch": 0.39560505180186906, "grad_norm": 6.623823165893555, "learning_rate": 0.0001962159783770193, "loss": 1.362, "step": 13250 }, { "epoch": 0.3970979010539516, "grad_norm": 4.490639686584473, "learning_rate": 0.00019620169258110048, "loss": 1.3742, "step": 13300 }, { "epoch": 0.3985907503060341, "grad_norm": 4.179808139801025, "learning_rate": 0.00019618740678518164, "loss": 1.4191, "step": 13350 }, { "epoch": 0.4000835995581166, "grad_norm": 5.623187065124512, "learning_rate": 0.0001961731209892628, "loss": 1.3793, "step": 13400 }, { "epoch": 0.40157644881019916, "grad_norm": 3.9650678634643555, "learning_rate": 0.00019615883519334397, "loss": 1.3917, "step": 13450 }, { "epoch": 0.40306929806228164, "grad_norm": 5.047702312469482, "learning_rate": 0.00019614454939742516, "loss": 1.4501, "step": 13500 }, { "epoch": 0.4045621473143642, "grad_norm": 3.46647310256958, "learning_rate": 0.0001961302636015063, "loss": 1.4091, "step": 13550 }, { "epoch": 0.4060549965664467, "grad_norm": 3.2203481197357178, "learning_rate": 0.0001961159778055875, "loss": 1.4003, "step": 13600 }, { "epoch": 0.40754784581852926, "grad_norm": 3.727679967880249, "learning_rate": 0.00019610169200966863, "loss": 1.4004, "step": 13650 }, { "epoch": 0.40904069507061175, "grad_norm": 4.469257831573486, "learning_rate": 0.0001960874062137498, "loss": 1.4061, "step": 13700 }, { "epoch": 0.4105335443226943, "grad_norm": 4.041538715362549, "learning_rate": 0.00019607312041783096, "loss": 1.411, "step": 13750 }, { "epoch": 0.41202639357477683, "grad_norm": 5.2691779136657715, "learning_rate": 0.00019605883462191212, "loss": 1.3396, "step": 13800 }, { "epoch": 0.41351924282685937, "grad_norm": 6.236726760864258, "learning_rate": 0.00019604454882599331, "loss": 1.4295, "step": 13850 }, { "epoch": 0.41501209207894185, "grad_norm": 6.703745365142822, "learning_rate": 0.00019603026303007445, "loss": 1.4105, "step": 13900 }, { "epoch": 0.4165049413310244, "grad_norm": 3.997664451599121, "learning_rate": 0.00019601597723415564, "loss": 1.347, "step": 13950 }, { "epoch": 0.41799779058310693, "grad_norm": 5.311407566070557, "learning_rate": 0.00019600169143823678, "loss": 1.4546, "step": 14000 }, { "epoch": 0.4194906398351894, "grad_norm": 4.0283098220825195, "learning_rate": 0.00019598740564231797, "loss": 1.4513, "step": 14050 }, { "epoch": 0.42098348908727196, "grad_norm": 7.345764636993408, "learning_rate": 0.0001959731198463991, "loss": 1.3832, "step": 14100 }, { "epoch": 0.4224763383393545, "grad_norm": 4.324542045593262, "learning_rate": 0.0001959588340504803, "loss": 1.3751, "step": 14150 }, { "epoch": 0.42396918759143704, "grad_norm": 3.8322675228118896, "learning_rate": 0.00019594454825456147, "loss": 1.344, "step": 14200 }, { "epoch": 0.4254620368435195, "grad_norm": 4.62548303604126, "learning_rate": 0.00019593026245864263, "loss": 1.4346, "step": 14250 }, { "epoch": 0.42695488609560206, "grad_norm": 4.585489273071289, "learning_rate": 0.0001959159766627238, "loss": 1.4145, "step": 14300 }, { "epoch": 0.4284477353476846, "grad_norm": 3.64227557182312, "learning_rate": 0.00019590169086680496, "loss": 1.367, "step": 14350 }, { "epoch": 0.42994058459976714, "grad_norm": 4.730580806732178, "learning_rate": 0.00019588740507088613, "loss": 1.3653, "step": 14400 }, { "epoch": 0.4314334338518496, "grad_norm": 4.4075398445129395, "learning_rate": 0.0001958731192749673, "loss": 1.4081, "step": 14450 }, { "epoch": 0.43292628310393216, "grad_norm": 3.0498785972595215, "learning_rate": 0.00019585883347904845, "loss": 1.343, "step": 14500 }, { "epoch": 0.4344191323560147, "grad_norm": 4.179199695587158, "learning_rate": 0.00019584454768312962, "loss": 1.3662, "step": 14550 }, { "epoch": 0.4359119816080972, "grad_norm": 4.1148786544799805, "learning_rate": 0.00019583026188721078, "loss": 1.3785, "step": 14600 }, { "epoch": 0.4374048308601797, "grad_norm": 4.012060165405273, "learning_rate": 0.00019581597609129198, "loss": 1.4056, "step": 14650 }, { "epoch": 0.43889768011226227, "grad_norm": 7.186342716217041, "learning_rate": 0.0001958016902953731, "loss": 1.3943, "step": 14700 }, { "epoch": 0.4403905293643448, "grad_norm": 3.951267957687378, "learning_rate": 0.0001957874044994543, "loss": 1.3976, "step": 14750 }, { "epoch": 0.4418833786164273, "grad_norm": 5.276801586151123, "learning_rate": 0.00019577311870353544, "loss": 1.4137, "step": 14800 }, { "epoch": 0.44337622786850983, "grad_norm": 4.107429504394531, "learning_rate": 0.00019575883290761663, "loss": 1.388, "step": 14850 }, { "epoch": 0.44486907712059237, "grad_norm": 4.274941444396973, "learning_rate": 0.00019574454711169777, "loss": 1.3446, "step": 14900 }, { "epoch": 0.4463619263726749, "grad_norm": 4.174200534820557, "learning_rate": 0.00019573026131577896, "loss": 1.3703, "step": 14950 }, { "epoch": 0.4478547756247574, "grad_norm": 3.867125988006592, "learning_rate": 0.00019571597551986013, "loss": 1.3745, "step": 15000 }, { "epoch": 0.44934762487683994, "grad_norm": 6.454402923583984, "learning_rate": 0.0001957016897239413, "loss": 1.3081, "step": 15050 }, { "epoch": 0.4508404741289225, "grad_norm": 3.8726885318756104, "learning_rate": 0.00019568740392802246, "loss": 1.3527, "step": 15100 }, { "epoch": 0.45233332338100496, "grad_norm": 3.6218361854553223, "learning_rate": 0.00019567311813210362, "loss": 1.4549, "step": 15150 }, { "epoch": 0.4538261726330875, "grad_norm": 5.2475361824035645, "learning_rate": 0.0001956588323361848, "loss": 1.361, "step": 15200 }, { "epoch": 0.45531902188517004, "grad_norm": 4.388748645782471, "learning_rate": 0.00019564454654026595, "loss": 1.3164, "step": 15250 }, { "epoch": 0.4568118711372526, "grad_norm": 4.97973108291626, "learning_rate": 0.00019563026074434712, "loss": 1.3755, "step": 15300 }, { "epoch": 0.45830472038933506, "grad_norm": 4.538138389587402, "learning_rate": 0.00019561597494842828, "loss": 1.4339, "step": 15350 }, { "epoch": 0.4597975696414176, "grad_norm": 4.389719009399414, "learning_rate": 0.00019560168915250945, "loss": 1.4163, "step": 15400 }, { "epoch": 0.46129041889350014, "grad_norm": 4.347919464111328, "learning_rate": 0.00019558740335659064, "loss": 1.3663, "step": 15450 }, { "epoch": 0.4627832681455827, "grad_norm": 4.820595741271973, "learning_rate": 0.00019557311756067177, "loss": 1.3441, "step": 15500 }, { "epoch": 0.46427611739766517, "grad_norm": 2.6965413093566895, "learning_rate": 0.00019555883176475297, "loss": 1.3657, "step": 15550 }, { "epoch": 0.4657689666497477, "grad_norm": 4.741116523742676, "learning_rate": 0.0001955445459688341, "loss": 1.4414, "step": 15600 }, { "epoch": 0.46726181590183025, "grad_norm": 3.9512829780578613, "learning_rate": 0.0001955302601729153, "loss": 1.354, "step": 15650 }, { "epoch": 0.46875466515391273, "grad_norm": 7.704863548278809, "learning_rate": 0.00019551597437699646, "loss": 1.4107, "step": 15700 }, { "epoch": 0.47024751440599527, "grad_norm": 3.502988338470459, "learning_rate": 0.00019550168858107763, "loss": 1.415, "step": 15750 }, { "epoch": 0.4717403636580778, "grad_norm": 4.246065139770508, "learning_rate": 0.0001954874027851588, "loss": 1.3414, "step": 15800 }, { "epoch": 0.47323321291016035, "grad_norm": 3.4039735794067383, "learning_rate": 0.00019547311698923995, "loss": 1.3303, "step": 15850 }, { "epoch": 0.47472606216224283, "grad_norm": 3.279521942138672, "learning_rate": 0.00019545883119332112, "loss": 1.3447, "step": 15900 }, { "epoch": 0.4762189114143254, "grad_norm": 2.9335134029388428, "learning_rate": 0.00019544454539740228, "loss": 1.3998, "step": 15950 }, { "epoch": 0.4777117606664079, "grad_norm": 3.317011833190918, "learning_rate": 0.00019543025960148345, "loss": 1.3506, "step": 16000 }, { "epoch": 0.47920460991849045, "grad_norm": 3.5980935096740723, "learning_rate": 0.0001954159738055646, "loss": 1.3622, "step": 16050 }, { "epoch": 0.48069745917057294, "grad_norm": 4.726743698120117, "learning_rate": 0.00019540168800964578, "loss": 1.3414, "step": 16100 }, { "epoch": 0.4821903084226555, "grad_norm": 5.129758358001709, "learning_rate": 0.00019538740221372694, "loss": 1.3873, "step": 16150 }, { "epoch": 0.483683157674738, "grad_norm": 5.122271537780762, "learning_rate": 0.0001953731164178081, "loss": 1.428, "step": 16200 }, { "epoch": 0.4851760069268205, "grad_norm": 3.359868049621582, "learning_rate": 0.0001953588306218893, "loss": 1.3111, "step": 16250 }, { "epoch": 0.48666885617890304, "grad_norm": 5.066514492034912, "learning_rate": 0.00019534454482597044, "loss": 1.2961, "step": 16300 }, { "epoch": 0.4881617054309856, "grad_norm": 4.902595520019531, "learning_rate": 0.0001953302590300516, "loss": 1.3682, "step": 16350 }, { "epoch": 0.4896545546830681, "grad_norm": 5.0537028312683105, "learning_rate": 0.00019531597323413277, "loss": 1.315, "step": 16400 }, { "epoch": 0.4911474039351506, "grad_norm": 3.7002792358398438, "learning_rate": 0.00019530168743821393, "loss": 1.3441, "step": 16450 }, { "epoch": 0.49264025318723315, "grad_norm": 4.845950603485107, "learning_rate": 0.00019528740164229512, "loss": 1.3887, "step": 16500 }, { "epoch": 0.4941331024393157, "grad_norm": 4.933434963226318, "learning_rate": 0.00019527311584637626, "loss": 1.3865, "step": 16550 }, { "epoch": 0.4956259516913982, "grad_norm": 3.8103625774383545, "learning_rate": 0.00019525883005045745, "loss": 1.3757, "step": 16600 }, { "epoch": 0.4971188009434807, "grad_norm": 4.501999855041504, "learning_rate": 0.0001952445442545386, "loss": 1.4482, "step": 16650 }, { "epoch": 0.49861165019556325, "grad_norm": 5.600002765655518, "learning_rate": 0.00019523025845861978, "loss": 1.4209, "step": 16700 }, { "epoch": 0.5001044994476458, "grad_norm": 5.138682842254639, "learning_rate": 0.00019521597266270092, "loss": 1.4287, "step": 16750 }, { "epoch": 0.5015973486997283, "grad_norm": 5.575449466705322, "learning_rate": 0.0001952016868667821, "loss": 1.3992, "step": 16800 }, { "epoch": 0.5030901979518109, "grad_norm": 3.6443893909454346, "learning_rate": 0.00019518740107086327, "loss": 1.3661, "step": 16850 }, { "epoch": 0.5045830472038934, "grad_norm": 3.904905319213867, "learning_rate": 0.00019517311527494444, "loss": 1.4448, "step": 16900 }, { "epoch": 0.5060758964559758, "grad_norm": 4.380904197692871, "learning_rate": 0.0001951588294790256, "loss": 1.4076, "step": 16950 }, { "epoch": 0.5075687457080584, "grad_norm": 3.5924415588378906, "learning_rate": 0.00019514454368310677, "loss": 1.3833, "step": 17000 }, { "epoch": 0.5090615949601409, "grad_norm": 3.7007193565368652, "learning_rate": 0.00019513025788718793, "loss": 1.391, "step": 17050 }, { "epoch": 0.5105544442122234, "grad_norm": 3.9879095554351807, "learning_rate": 0.0001951159720912691, "loss": 1.3419, "step": 17100 }, { "epoch": 0.512047293464306, "grad_norm": 5.663998126983643, "learning_rate": 0.00019510168629535026, "loss": 1.3481, "step": 17150 }, { "epoch": 0.5135401427163885, "grad_norm": 3.9803707599639893, "learning_rate": 0.00019508740049943143, "loss": 1.4034, "step": 17200 }, { "epoch": 0.5150329919684711, "grad_norm": 3.718477725982666, "learning_rate": 0.0001950731147035126, "loss": 1.406, "step": 17250 }, { "epoch": 0.5165258412205536, "grad_norm": 4.864751815795898, "learning_rate": 0.00019505882890759378, "loss": 1.375, "step": 17300 }, { "epoch": 0.518018690472636, "grad_norm": 3.697645664215088, "learning_rate": 0.00019504454311167492, "loss": 1.4283, "step": 17350 }, { "epoch": 0.5195115397247186, "grad_norm": 4.063074111938477, "learning_rate": 0.0001950302573157561, "loss": 1.378, "step": 17400 }, { "epoch": 0.5210043889768011, "grad_norm": 4.223004341125488, "learning_rate": 0.00019501597151983725, "loss": 1.3789, "step": 17450 }, { "epoch": 0.5224972382288836, "grad_norm": 3.329366683959961, "learning_rate": 0.00019500168572391844, "loss": 1.346, "step": 17500 }, { "epoch": 0.5239900874809662, "grad_norm": 4.774710178375244, "learning_rate": 0.00019498739992799958, "loss": 1.3895, "step": 17550 }, { "epoch": 0.5254829367330487, "grad_norm": 6.2145490646362305, "learning_rate": 0.00019497311413208077, "loss": 1.3715, "step": 17600 }, { "epoch": 0.5269757859851312, "grad_norm": 3.9069626331329346, "learning_rate": 0.00019495882833616194, "loss": 1.4572, "step": 17650 }, { "epoch": 0.5284686352372138, "grad_norm": 3.347576141357422, "learning_rate": 0.0001949445425402431, "loss": 1.359, "step": 17700 }, { "epoch": 0.5299614844892963, "grad_norm": 5.305202484130859, "learning_rate": 0.00019493025674432427, "loss": 1.4038, "step": 17750 }, { "epoch": 0.5314543337413788, "grad_norm": 3.865619659423828, "learning_rate": 0.00019491597094840543, "loss": 1.3913, "step": 17800 }, { "epoch": 0.5329471829934613, "grad_norm": 4.791336536407471, "learning_rate": 0.0001949016851524866, "loss": 1.3852, "step": 17850 }, { "epoch": 0.5344400322455438, "grad_norm": 3.7827060222625732, "learning_rate": 0.00019488739935656776, "loss": 1.3108, "step": 17900 }, { "epoch": 0.5359328814976264, "grad_norm": 4.945117473602295, "learning_rate": 0.00019487311356064892, "loss": 1.346, "step": 17950 }, { "epoch": 0.5374257307497089, "grad_norm": 4.561169147491455, "learning_rate": 0.0001948588277647301, "loss": 1.3904, "step": 18000 }, { "epoch": 0.5389185800017914, "grad_norm": 4.608798027038574, "learning_rate": 0.00019484454196881125, "loss": 1.4133, "step": 18050 }, { "epoch": 0.540411429253874, "grad_norm": 4.303143501281738, "learning_rate": 0.00019483025617289245, "loss": 1.3994, "step": 18100 }, { "epoch": 0.5419042785059565, "grad_norm": 5.815835952758789, "learning_rate": 0.00019481597037697358, "loss": 1.392, "step": 18150 }, { "epoch": 0.543397127758039, "grad_norm": 5.349491596221924, "learning_rate": 0.00019480168458105477, "loss": 1.418, "step": 18200 }, { "epoch": 0.5448899770101215, "grad_norm": 3.7407824993133545, "learning_rate": 0.0001947873987851359, "loss": 1.3747, "step": 18250 }, { "epoch": 0.546382826262204, "grad_norm": 5.2810163497924805, "learning_rate": 0.0001947731129892171, "loss": 1.4023, "step": 18300 }, { "epoch": 0.5478756755142866, "grad_norm": 4.417948246002197, "learning_rate": 0.00019475882719329824, "loss": 1.4005, "step": 18350 }, { "epoch": 0.5493685247663691, "grad_norm": 5.287749290466309, "learning_rate": 0.00019474454139737943, "loss": 1.4211, "step": 18400 }, { "epoch": 0.5508613740184516, "grad_norm": 4.0996809005737305, "learning_rate": 0.0001947302556014606, "loss": 1.3559, "step": 18450 }, { "epoch": 0.5523542232705342, "grad_norm": 5.229327201843262, "learning_rate": 0.00019471596980554176, "loss": 1.4549, "step": 18500 }, { "epoch": 0.5538470725226167, "grad_norm": 4.409546852111816, "learning_rate": 0.00019470168400962293, "loss": 1.3683, "step": 18550 }, { "epoch": 0.5553399217746992, "grad_norm": 5.4077229499816895, "learning_rate": 0.0001946873982137041, "loss": 1.398, "step": 18600 }, { "epoch": 0.5568327710267817, "grad_norm": 5.208966255187988, "learning_rate": 0.00019467311241778526, "loss": 1.3276, "step": 18650 }, { "epoch": 0.5583256202788642, "grad_norm": 4.8162617683410645, "learning_rate": 0.00019465882662186642, "loss": 1.3314, "step": 18700 }, { "epoch": 0.5598184695309467, "grad_norm": 5.04697322845459, "learning_rate": 0.00019464454082594759, "loss": 1.3883, "step": 18750 }, { "epoch": 0.5613113187830293, "grad_norm": 4.038108825683594, "learning_rate": 0.00019463025503002875, "loss": 1.4377, "step": 18800 }, { "epoch": 0.5628041680351118, "grad_norm": 4.910576820373535, "learning_rate": 0.00019461596923410992, "loss": 1.3713, "step": 18850 }, { "epoch": 0.5642970172871944, "grad_norm": 5.3433756828308105, "learning_rate": 0.00019460168343819108, "loss": 1.4359, "step": 18900 }, { "epoch": 0.5657898665392769, "grad_norm": 3.9515552520751953, "learning_rate": 0.00019458739764227224, "loss": 1.3608, "step": 18950 }, { "epoch": 0.5672827157913594, "grad_norm": 4.515705108642578, "learning_rate": 0.0001945731118463534, "loss": 1.3274, "step": 19000 }, { "epoch": 0.568775565043442, "grad_norm": 4.434077262878418, "learning_rate": 0.00019455882605043457, "loss": 1.3681, "step": 19050 }, { "epoch": 0.5702684142955244, "grad_norm": 4.534008979797363, "learning_rate": 0.00019454454025451574, "loss": 1.3863, "step": 19100 }, { "epoch": 0.5717612635476069, "grad_norm": 4.200322151184082, "learning_rate": 0.00019453025445859693, "loss": 1.3743, "step": 19150 }, { "epoch": 0.5732541127996895, "grad_norm": 5.686845779418945, "learning_rate": 0.00019451596866267807, "loss": 1.3225, "step": 19200 }, { "epoch": 0.574746962051772, "grad_norm": 7.821211814880371, "learning_rate": 0.00019450168286675926, "loss": 1.3775, "step": 19250 }, { "epoch": 0.5762398113038545, "grad_norm": 5.200834274291992, "learning_rate": 0.0001944873970708404, "loss": 1.3423, "step": 19300 }, { "epoch": 0.5777326605559371, "grad_norm": 5.26302433013916, "learning_rate": 0.0001944731112749216, "loss": 1.3813, "step": 19350 }, { "epoch": 0.5792255098080196, "grad_norm": 3.3207719326019287, "learning_rate": 0.00019445882547900273, "loss": 1.3922, "step": 19400 }, { "epoch": 0.5807183590601022, "grad_norm": 4.619020938873291, "learning_rate": 0.00019444453968308392, "loss": 1.3533, "step": 19450 }, { "epoch": 0.5822112083121846, "grad_norm": 5.780002593994141, "learning_rate": 0.00019443025388716508, "loss": 1.4035, "step": 19500 }, { "epoch": 0.5837040575642671, "grad_norm": 4.961215496063232, "learning_rate": 0.00019441596809124625, "loss": 1.3687, "step": 19550 }, { "epoch": 0.5851969068163497, "grad_norm": 4.50115442276001, "learning_rate": 0.0001944016822953274, "loss": 1.342, "step": 19600 }, { "epoch": 0.5866897560684322, "grad_norm": 3.9477944374084473, "learning_rate": 0.00019438739649940858, "loss": 1.394, "step": 19650 }, { "epoch": 0.5881826053205147, "grad_norm": 3.7466814517974854, "learning_rate": 0.00019437311070348974, "loss": 1.3414, "step": 19700 }, { "epoch": 0.5896754545725973, "grad_norm": 4.382058143615723, "learning_rate": 0.0001943588249075709, "loss": 1.3669, "step": 19750 }, { "epoch": 0.5911683038246798, "grad_norm": 3.7016665935516357, "learning_rate": 0.00019434453911165207, "loss": 1.4548, "step": 19800 }, { "epoch": 0.5926611530767623, "grad_norm": 4.4738030433654785, "learning_rate": 0.00019433025331573324, "loss": 1.4273, "step": 19850 }, { "epoch": 0.5941540023288449, "grad_norm": 5.2445454597473145, "learning_rate": 0.0001943159675198144, "loss": 1.3746, "step": 19900 }, { "epoch": 0.5956468515809273, "grad_norm": 3.766219139099121, "learning_rate": 0.0001943016817238956, "loss": 1.4391, "step": 19950 }, { "epoch": 0.5971397008330099, "grad_norm": 6.310808181762695, "learning_rate": 0.00019428739592797673, "loss": 1.3316, "step": 20000 }, { "epoch": 0.5986325500850924, "grad_norm": 4.055521488189697, "learning_rate": 0.00019427311013205792, "loss": 1.3784, "step": 20050 }, { "epoch": 0.6001253993371749, "grad_norm": 4.933177471160889, "learning_rate": 0.00019425882433613906, "loss": 1.3352, "step": 20100 }, { "epoch": 0.6016182485892575, "grad_norm": 3.8867061138153076, "learning_rate": 0.00019424453854022025, "loss": 1.4169, "step": 20150 }, { "epoch": 0.60311109784134, "grad_norm": 3.364475727081299, "learning_rate": 0.0001942302527443014, "loss": 1.3767, "step": 20200 }, { "epoch": 0.6046039470934225, "grad_norm": 3.48152232170105, "learning_rate": 0.00019421596694838258, "loss": 1.3659, "step": 20250 }, { "epoch": 0.6060967963455051, "grad_norm": 3.3658649921417236, "learning_rate": 0.00019420168115246374, "loss": 1.3745, "step": 20300 }, { "epoch": 0.6075896455975875, "grad_norm": 4.441917896270752, "learning_rate": 0.0001941873953565449, "loss": 1.3827, "step": 20350 }, { "epoch": 0.60908249484967, "grad_norm": 5.014800548553467, "learning_rate": 0.00019417310956062607, "loss": 1.3792, "step": 20400 }, { "epoch": 0.6105753441017526, "grad_norm": 5.472316265106201, "learning_rate": 0.00019415882376470724, "loss": 1.394, "step": 20450 }, { "epoch": 0.6120681933538351, "grad_norm": 5.35073184967041, "learning_rate": 0.0001941445379687884, "loss": 1.401, "step": 20500 }, { "epoch": 0.6135610426059177, "grad_norm": 4.284445762634277, "learning_rate": 0.00019413025217286957, "loss": 1.4354, "step": 20550 }, { "epoch": 0.6150538918580002, "grad_norm": 3.561774492263794, "learning_rate": 0.00019411596637695073, "loss": 1.3295, "step": 20600 }, { "epoch": 0.6165467411100827, "grad_norm": 4.479186534881592, "learning_rate": 0.0001941016805810319, "loss": 1.3629, "step": 20650 }, { "epoch": 0.6180395903621653, "grad_norm": 4.186618804931641, "learning_rate": 0.00019408739478511306, "loss": 1.3399, "step": 20700 }, { "epoch": 0.6195324396142478, "grad_norm": 3.589655637741089, "learning_rate": 0.00019407310898919425, "loss": 1.3516, "step": 20750 }, { "epoch": 0.6210252888663302, "grad_norm": 4.330646514892578, "learning_rate": 0.0001940588231932754, "loss": 1.3148, "step": 20800 }, { "epoch": 0.6225181381184128, "grad_norm": 6.324933052062988, "learning_rate": 0.00019404453739735658, "loss": 1.331, "step": 20850 }, { "epoch": 0.6240109873704953, "grad_norm": 4.652800559997559, "learning_rate": 0.00019403025160143772, "loss": 1.3604, "step": 20900 }, { "epoch": 0.6255038366225778, "grad_norm": 4.831106662750244, "learning_rate": 0.0001940159658055189, "loss": 1.3322, "step": 20950 }, { "epoch": 0.6269966858746604, "grad_norm": 6.0938920974731445, "learning_rate": 0.00019400168000960005, "loss": 1.4106, "step": 21000 }, { "epoch": 0.6284895351267429, "grad_norm": 4.424108028411865, "learning_rate": 0.00019398739421368124, "loss": 1.4001, "step": 21050 }, { "epoch": 0.6299823843788255, "grad_norm": 4.329803466796875, "learning_rate": 0.0001939731084177624, "loss": 1.4048, "step": 21100 }, { "epoch": 0.631475233630908, "grad_norm": 3.915818929672241, "learning_rate": 0.00019395882262184357, "loss": 1.3619, "step": 21150 }, { "epoch": 0.6329680828829904, "grad_norm": 3.9562571048736572, "learning_rate": 0.00019394453682592474, "loss": 1.3636, "step": 21200 }, { "epoch": 0.634460932135073, "grad_norm": 5.229249954223633, "learning_rate": 0.0001939302510300059, "loss": 1.3502, "step": 21250 }, { "epoch": 0.6359537813871555, "grad_norm": 4.149145126342773, "learning_rate": 0.00019391596523408706, "loss": 1.3603, "step": 21300 }, { "epoch": 0.637446630639238, "grad_norm": 4.068868637084961, "learning_rate": 0.00019390167943816823, "loss": 1.3945, "step": 21350 }, { "epoch": 0.6389394798913206, "grad_norm": 3.6808931827545166, "learning_rate": 0.0001938873936422494, "loss": 1.4015, "step": 21400 }, { "epoch": 0.6404323291434031, "grad_norm": 4.391795635223389, "learning_rate": 0.00019387310784633056, "loss": 1.3428, "step": 21450 }, { "epoch": 0.6419251783954856, "grad_norm": 5.109655380249023, "learning_rate": 0.00019385882205041172, "loss": 1.3874, "step": 21500 }, { "epoch": 0.6434180276475682, "grad_norm": 4.560513496398926, "learning_rate": 0.0001938445362544929, "loss": 1.3833, "step": 21550 }, { "epoch": 0.6449108768996507, "grad_norm": 3.6108620166778564, "learning_rate": 0.00019383025045857405, "loss": 1.3981, "step": 21600 }, { "epoch": 0.6464037261517332, "grad_norm": 4.100367546081543, "learning_rate": 0.00019381596466265522, "loss": 1.3139, "step": 21650 }, { "epoch": 0.6478965754038157, "grad_norm": 3.1513540744781494, "learning_rate": 0.00019380167886673638, "loss": 1.3359, "step": 21700 }, { "epoch": 0.6493894246558982, "grad_norm": 4.793807506561279, "learning_rate": 0.00019378739307081755, "loss": 1.4025, "step": 21750 }, { "epoch": 0.6508822739079808, "grad_norm": 4.403114318847656, "learning_rate": 0.0001937731072748987, "loss": 1.349, "step": 21800 }, { "epoch": 0.6523751231600633, "grad_norm": 4.445423603057861, "learning_rate": 0.00019375882147897988, "loss": 1.4641, "step": 21850 }, { "epoch": 0.6538679724121458, "grad_norm": 5.721547603607178, "learning_rate": 0.00019374453568306107, "loss": 1.3451, "step": 21900 }, { "epoch": 0.6553608216642284, "grad_norm": 4.411465644836426, "learning_rate": 0.0001937302498871422, "loss": 1.3893, "step": 21950 }, { "epoch": 0.6568536709163109, "grad_norm": 3.181412696838379, "learning_rate": 0.0001937159640912234, "loss": 1.385, "step": 22000 }, { "epoch": 0.6583465201683933, "grad_norm": 7.374556541442871, "learning_rate": 0.00019370167829530453, "loss": 1.4051, "step": 22050 }, { "epoch": 0.6598393694204759, "grad_norm": 4.452084064483643, "learning_rate": 0.00019368739249938573, "loss": 1.3389, "step": 22100 }, { "epoch": 0.6613322186725584, "grad_norm": 5.60634708404541, "learning_rate": 0.0001936731067034669, "loss": 1.3924, "step": 22150 }, { "epoch": 0.662825067924641, "grad_norm": 4.35741662979126, "learning_rate": 0.00019365882090754806, "loss": 1.3957, "step": 22200 }, { "epoch": 0.6643179171767235, "grad_norm": 4.173916816711426, "learning_rate": 0.00019364453511162922, "loss": 1.3669, "step": 22250 }, { "epoch": 0.665810766428806, "grad_norm": 3.8707377910614014, "learning_rate": 0.00019363024931571038, "loss": 1.3478, "step": 22300 }, { "epoch": 0.6673036156808886, "grad_norm": 4.765937328338623, "learning_rate": 0.00019361596351979155, "loss": 1.4007, "step": 22350 }, { "epoch": 0.6687964649329711, "grad_norm": 4.315809726715088, "learning_rate": 0.00019360167772387271, "loss": 1.3622, "step": 22400 }, { "epoch": 0.6702893141850536, "grad_norm": 5.977219104766846, "learning_rate": 0.00019358739192795388, "loss": 1.3836, "step": 22450 }, { "epoch": 0.6717821634371361, "grad_norm": 4.420149803161621, "learning_rate": 0.00019357310613203504, "loss": 1.3488, "step": 22500 }, { "epoch": 0.6732750126892186, "grad_norm": 3.9537293910980225, "learning_rate": 0.0001935588203361162, "loss": 1.3892, "step": 22550 }, { "epoch": 0.6747678619413011, "grad_norm": 5.468355655670166, "learning_rate": 0.0001935445345401974, "loss": 1.4475, "step": 22600 }, { "epoch": 0.6762607111933837, "grad_norm": 4.3148674964904785, "learning_rate": 0.00019353024874427854, "loss": 1.3866, "step": 22650 }, { "epoch": 0.6777535604454662, "grad_norm": 4.004809379577637, "learning_rate": 0.00019351596294835973, "loss": 1.3811, "step": 22700 }, { "epoch": 0.6792464096975488, "grad_norm": 4.404988765716553, "learning_rate": 0.00019350167715244087, "loss": 1.3648, "step": 22750 }, { "epoch": 0.6807392589496313, "grad_norm": 5.115052223205566, "learning_rate": 0.00019348739135652206, "loss": 1.3523, "step": 22800 }, { "epoch": 0.6822321082017138, "grad_norm": 5.083719730377197, "learning_rate": 0.0001934731055606032, "loss": 1.3978, "step": 22850 }, { "epoch": 0.6837249574537964, "grad_norm": 4.038282871246338, "learning_rate": 0.0001934588197646844, "loss": 1.2788, "step": 22900 }, { "epoch": 0.6852178067058788, "grad_norm": 5.438407897949219, "learning_rate": 0.00019344453396876555, "loss": 1.4181, "step": 22950 }, { "epoch": 0.6867106559579613, "grad_norm": 5.381191730499268, "learning_rate": 0.00019343024817284672, "loss": 1.4145, "step": 23000 }, { "epoch": 0.6882035052100439, "grad_norm": 3.183706283569336, "learning_rate": 0.00019341596237692788, "loss": 1.3271, "step": 23050 }, { "epoch": 0.6896963544621264, "grad_norm": 4.063404083251953, "learning_rate": 0.00019340167658100905, "loss": 1.3717, "step": 23100 }, { "epoch": 0.6911892037142089, "grad_norm": 5.677481651306152, "learning_rate": 0.0001933873907850902, "loss": 1.344, "step": 23150 }, { "epoch": 0.6926820529662915, "grad_norm": 5.376470565795898, "learning_rate": 0.00019337310498917138, "loss": 1.295, "step": 23200 }, { "epoch": 0.694174902218374, "grad_norm": 3.8844990730285645, "learning_rate": 0.00019335881919325254, "loss": 1.381, "step": 23250 }, { "epoch": 0.6956677514704566, "grad_norm": 5.285017013549805, "learning_rate": 0.0001933445333973337, "loss": 1.3505, "step": 23300 }, { "epoch": 0.697160600722539, "grad_norm": 4.133642196655273, "learning_rate": 0.00019333024760141487, "loss": 1.3604, "step": 23350 }, { "epoch": 0.6986534499746215, "grad_norm": 5.037447929382324, "learning_rate": 0.00019331596180549606, "loss": 1.3209, "step": 23400 }, { "epoch": 0.7001462992267041, "grad_norm": 4.916257858276367, "learning_rate": 0.0001933016760095772, "loss": 1.4084, "step": 23450 }, { "epoch": 0.7016391484787866, "grad_norm": 3.469505786895752, "learning_rate": 0.0001932873902136584, "loss": 1.3705, "step": 23500 }, { "epoch": 0.7031319977308691, "grad_norm": 3.624896287918091, "learning_rate": 0.00019327310441773953, "loss": 1.3507, "step": 23550 }, { "epoch": 0.7046248469829517, "grad_norm": 4.352174758911133, "learning_rate": 0.00019325881862182072, "loss": 1.3184, "step": 23600 }, { "epoch": 0.7061176962350342, "grad_norm": 6.4549150466918945, "learning_rate": 0.00019324453282590186, "loss": 1.4161, "step": 23650 }, { "epoch": 0.7076105454871167, "grad_norm": 6.221735000610352, "learning_rate": 0.00019323024702998305, "loss": 1.3425, "step": 23700 }, { "epoch": 0.7091033947391993, "grad_norm": 4.285811901092529, "learning_rate": 0.00019321596123406421, "loss": 1.3418, "step": 23750 }, { "epoch": 0.7105962439912817, "grad_norm": 4.533527374267578, "learning_rate": 0.00019320167543814538, "loss": 1.3723, "step": 23800 }, { "epoch": 0.7120890932433643, "grad_norm": 3.838109016418457, "learning_rate": 0.00019318738964222654, "loss": 1.3624, "step": 23850 }, { "epoch": 0.7135819424954468, "grad_norm": 3.8401126861572266, "learning_rate": 0.0001931731038463077, "loss": 1.4239, "step": 23900 }, { "epoch": 0.7150747917475293, "grad_norm": 5.9049787521362305, "learning_rate": 0.00019315881805038887, "loss": 1.3642, "step": 23950 }, { "epoch": 0.7165676409996119, "grad_norm": 5.397033214569092, "learning_rate": 0.00019314453225447004, "loss": 1.3298, "step": 24000 }, { "epoch": 0.7180604902516944, "grad_norm": 5.649387359619141, "learning_rate": 0.0001931302464585512, "loss": 1.4102, "step": 24050 }, { "epoch": 0.7195533395037769, "grad_norm": 5.697938919067383, "learning_rate": 0.00019311596066263237, "loss": 1.3765, "step": 24100 }, { "epoch": 0.7210461887558595, "grad_norm": 4.244998455047607, "learning_rate": 0.00019310167486671353, "loss": 1.3471, "step": 24150 }, { "epoch": 0.722539038007942, "grad_norm": 4.579226493835449, "learning_rate": 0.0001930873890707947, "loss": 1.3897, "step": 24200 }, { "epoch": 0.7240318872600244, "grad_norm": 8.80657958984375, "learning_rate": 0.00019307310327487586, "loss": 1.3628, "step": 24250 }, { "epoch": 0.725524736512107, "grad_norm": 5.42709493637085, "learning_rate": 0.00019305881747895703, "loss": 1.3587, "step": 24300 }, { "epoch": 0.7270175857641895, "grad_norm": 4.2680840492248535, "learning_rate": 0.0001930445316830382, "loss": 1.3414, "step": 24350 }, { "epoch": 0.7285104350162721, "grad_norm": 5.641107559204102, "learning_rate": 0.00019303024588711935, "loss": 1.4095, "step": 24400 }, { "epoch": 0.7300032842683546, "grad_norm": 5.29530143737793, "learning_rate": 0.00019301596009120052, "loss": 1.389, "step": 24450 }, { "epoch": 0.7314961335204371, "grad_norm": 4.110136032104492, "learning_rate": 0.00019300167429528168, "loss": 1.4641, "step": 24500 }, { "epoch": 0.7329889827725197, "grad_norm": 4.390302658081055, "learning_rate": 0.00019298738849936288, "loss": 1.4028, "step": 24550 }, { "epoch": 0.7344818320246022, "grad_norm": 3.717409372329712, "learning_rate": 0.000192973102703444, "loss": 1.436, "step": 24600 }, { "epoch": 0.7359746812766846, "grad_norm": 5.399808406829834, "learning_rate": 0.0001929588169075252, "loss": 1.2887, "step": 24650 }, { "epoch": 0.7374675305287672, "grad_norm": 5.027936935424805, "learning_rate": 0.00019294453111160634, "loss": 1.3591, "step": 24700 }, { "epoch": 0.7389603797808497, "grad_norm": 5.12235689163208, "learning_rate": 0.00019293024531568753, "loss": 1.4305, "step": 24750 }, { "epoch": 0.7404532290329322, "grad_norm": 4.445746898651123, "learning_rate": 0.0001929159595197687, "loss": 1.3609, "step": 24800 }, { "epoch": 0.7419460782850148, "grad_norm": 4.590288162231445, "learning_rate": 0.00019290167372384986, "loss": 1.3811, "step": 24850 }, { "epoch": 0.7434389275370973, "grad_norm": 3.841573476791382, "learning_rate": 0.00019288738792793103, "loss": 1.3632, "step": 24900 }, { "epoch": 0.7449317767891799, "grad_norm": 5.2155327796936035, "learning_rate": 0.0001928731021320122, "loss": 1.4422, "step": 24950 }, { "epoch": 0.7464246260412624, "grad_norm": 4.942720413208008, "learning_rate": 0.00019285881633609336, "loss": 1.3916, "step": 25000 }, { "epoch": 0.7479174752933448, "grad_norm": 5.292357921600342, "learning_rate": 0.00019284453054017452, "loss": 1.3048, "step": 25050 }, { "epoch": 0.7494103245454274, "grad_norm": 6.690375804901123, "learning_rate": 0.0001928302447442557, "loss": 1.3967, "step": 25100 }, { "epoch": 0.7509031737975099, "grad_norm": 4.590394496917725, "learning_rate": 0.00019281595894833685, "loss": 1.3925, "step": 25150 }, { "epoch": 0.7523960230495924, "grad_norm": 4.220889091491699, "learning_rate": 0.00019280167315241802, "loss": 1.3263, "step": 25200 }, { "epoch": 0.753888872301675, "grad_norm": 4.847371578216553, "learning_rate": 0.00019278738735649918, "loss": 1.4141, "step": 25250 }, { "epoch": 0.7553817215537575, "grad_norm": 5.392793655395508, "learning_rate": 0.00019277310156058035, "loss": 1.3147, "step": 25300 }, { "epoch": 0.75687457080584, "grad_norm": 4.261468887329102, "learning_rate": 0.00019275881576466154, "loss": 1.3102, "step": 25350 }, { "epoch": 0.7583674200579226, "grad_norm": 4.498802661895752, "learning_rate": 0.00019274452996874267, "loss": 1.3912, "step": 25400 }, { "epoch": 0.759860269310005, "grad_norm": 5.2235283851623535, "learning_rate": 0.00019273024417282387, "loss": 1.3375, "step": 25450 }, { "epoch": 0.7613531185620876, "grad_norm": 6.409016132354736, "learning_rate": 0.000192715958376905, "loss": 1.3339, "step": 25500 }, { "epoch": 0.7628459678141701, "grad_norm": 4.4392805099487305, "learning_rate": 0.0001927016725809862, "loss": 1.3599, "step": 25550 }, { "epoch": 0.7643388170662526, "grad_norm": 5.580776691436768, "learning_rate": 0.00019268738678506736, "loss": 1.3812, "step": 25600 }, { "epoch": 0.7658316663183352, "grad_norm": 5.473046779632568, "learning_rate": 0.00019267310098914852, "loss": 1.3582, "step": 25650 }, { "epoch": 0.7673245155704177, "grad_norm": 6.353076934814453, "learning_rate": 0.0001926588151932297, "loss": 1.3388, "step": 25700 }, { "epoch": 0.7688173648225002, "grad_norm": 4.247453689575195, "learning_rate": 0.00019264452939731085, "loss": 1.3745, "step": 25750 }, { "epoch": 0.7703102140745828, "grad_norm": 5.048892498016357, "learning_rate": 0.00019263024360139202, "loss": 1.405, "step": 25800 }, { "epoch": 0.7718030633266653, "grad_norm": 4.883440017700195, "learning_rate": 0.00019261595780547318, "loss": 1.4082, "step": 25850 }, { "epoch": 0.7732959125787477, "grad_norm": 4.221151828765869, "learning_rate": 0.00019260167200955435, "loss": 1.3766, "step": 25900 }, { "epoch": 0.7747887618308303, "grad_norm": 8.68738079071045, "learning_rate": 0.0001925873862136355, "loss": 1.4046, "step": 25950 }, { "epoch": 0.7762816110829128, "grad_norm": 4.479017734527588, "learning_rate": 0.00019257310041771668, "loss": 1.3452, "step": 26000 }, { "epoch": 0.7777744603349954, "grad_norm": 4.299473285675049, "learning_rate": 0.00019255881462179787, "loss": 1.3702, "step": 26050 }, { "epoch": 0.7792673095870779, "grad_norm": 5.272589683532715, "learning_rate": 0.000192544528825879, "loss": 1.3725, "step": 26100 }, { "epoch": 0.7807601588391604, "grad_norm": 4.644061088562012, "learning_rate": 0.0001925302430299602, "loss": 1.4143, "step": 26150 }, { "epoch": 0.782253008091243, "grad_norm": 4.7033185958862305, "learning_rate": 0.00019251595723404134, "loss": 1.3666, "step": 26200 }, { "epoch": 0.7837458573433255, "grad_norm": 3.5262560844421387, "learning_rate": 0.00019250167143812253, "loss": 1.4135, "step": 26250 }, { "epoch": 0.785238706595408, "grad_norm": 3.8599159717559814, "learning_rate": 0.00019248738564220367, "loss": 1.3258, "step": 26300 }, { "epoch": 0.7867315558474905, "grad_norm": 5.743364334106445, "learning_rate": 0.00019247309984628486, "loss": 1.3946, "step": 26350 }, { "epoch": 0.788224405099573, "grad_norm": 5.478078365325928, "learning_rate": 0.00019245881405036602, "loss": 1.3881, "step": 26400 }, { "epoch": 0.7897172543516555, "grad_norm": 5.912649631500244, "learning_rate": 0.0001924445282544472, "loss": 1.4139, "step": 26450 }, { "epoch": 0.7912101036037381, "grad_norm": 3.753570079803467, "learning_rate": 0.00019243024245852835, "loss": 1.3267, "step": 26500 }, { "epoch": 0.7927029528558206, "grad_norm": 4.155190944671631, "learning_rate": 0.00019241595666260952, "loss": 1.3539, "step": 26550 }, { "epoch": 0.7941958021079032, "grad_norm": 4.314638614654541, "learning_rate": 0.00019240167086669068, "loss": 1.3635, "step": 26600 }, { "epoch": 0.7956886513599857, "grad_norm": 4.177329063415527, "learning_rate": 0.00019238738507077185, "loss": 1.3642, "step": 26650 }, { "epoch": 0.7971815006120682, "grad_norm": 4.3052144050598145, "learning_rate": 0.000192373099274853, "loss": 1.3011, "step": 26700 }, { "epoch": 0.7986743498641508, "grad_norm": 5.800657272338867, "learning_rate": 0.00019235881347893417, "loss": 1.3247, "step": 26750 }, { "epoch": 0.8001671991162332, "grad_norm": 4.211179733276367, "learning_rate": 0.00019234452768301534, "loss": 1.3399, "step": 26800 }, { "epoch": 0.8016600483683157, "grad_norm": 4.8080315589904785, "learning_rate": 0.0001923302418870965, "loss": 1.3439, "step": 26850 }, { "epoch": 0.8031528976203983, "grad_norm": 5.03045129776001, "learning_rate": 0.00019231595609117767, "loss": 1.4009, "step": 26900 }, { "epoch": 0.8046457468724808, "grad_norm": 8.932162284851074, "learning_rate": 0.00019230167029525883, "loss": 1.3298, "step": 26950 }, { "epoch": 0.8061385961245633, "grad_norm": 5.651429176330566, "learning_rate": 0.00019228738449934, "loss": 1.4044, "step": 27000 }, { "epoch": 0.8076314453766459, "grad_norm": 4.428074836730957, "learning_rate": 0.00019227309870342116, "loss": 1.3988, "step": 27050 }, { "epoch": 0.8091242946287284, "grad_norm": 6.512781620025635, "learning_rate": 0.00019225881290750233, "loss": 1.3529, "step": 27100 }, { "epoch": 0.810617143880811, "grad_norm": 5.393406867980957, "learning_rate": 0.0001922445271115835, "loss": 1.3266, "step": 27150 }, { "epoch": 0.8121099931328934, "grad_norm": 3.74702525138855, "learning_rate": 0.00019223024131566468, "loss": 1.416, "step": 27200 }, { "epoch": 0.8136028423849759, "grad_norm": 3.856290817260742, "learning_rate": 0.00019221595551974582, "loss": 1.4082, "step": 27250 }, { "epoch": 0.8150956916370585, "grad_norm": 4.597848415374756, "learning_rate": 0.000192201669723827, "loss": 1.3751, "step": 27300 }, { "epoch": 0.816588540889141, "grad_norm": 3.875593662261963, "learning_rate": 0.00019218738392790815, "loss": 1.4024, "step": 27350 }, { "epoch": 0.8180813901412235, "grad_norm": 5.470495700836182, "learning_rate": 0.00019217309813198934, "loss": 1.394, "step": 27400 }, { "epoch": 0.8195742393933061, "grad_norm": 4.562834739685059, "learning_rate": 0.00019215881233607048, "loss": 1.3161, "step": 27450 }, { "epoch": 0.8210670886453886, "grad_norm": 4.183352470397949, "learning_rate": 0.00019214452654015167, "loss": 1.3427, "step": 27500 }, { "epoch": 0.8225599378974711, "grad_norm": 5.345236301422119, "learning_rate": 0.00019213024074423284, "loss": 1.3551, "step": 27550 }, { "epoch": 0.8240527871495537, "grad_norm": 5.217405319213867, "learning_rate": 0.000192115954948314, "loss": 1.3174, "step": 27600 }, { "epoch": 0.8255456364016361, "grad_norm": 4.486965179443359, "learning_rate": 0.00019210166915239517, "loss": 1.3931, "step": 27650 }, { "epoch": 0.8270384856537187, "grad_norm": 4.183075904846191, "learning_rate": 0.00019208738335647633, "loss": 1.3672, "step": 27700 }, { "epoch": 0.8285313349058012, "grad_norm": 4.916398525238037, "learning_rate": 0.0001920730975605575, "loss": 1.3993, "step": 27750 }, { "epoch": 0.8300241841578837, "grad_norm": 3.99765682220459, "learning_rate": 0.00019205881176463866, "loss": 1.362, "step": 27800 }, { "epoch": 0.8315170334099663, "grad_norm": 3.948981523513794, "learning_rate": 0.00019204452596871982, "loss": 1.3912, "step": 27850 }, { "epoch": 0.8330098826620488, "grad_norm": 6.524683475494385, "learning_rate": 0.000192030240172801, "loss": 1.3845, "step": 27900 }, { "epoch": 0.8345027319141313, "grad_norm": 4.598319053649902, "learning_rate": 0.00019201595437688215, "loss": 1.38, "step": 27950 }, { "epoch": 0.8359955811662139, "grad_norm": 4.287365436553955, "learning_rate": 0.00019200166858096335, "loss": 1.3687, "step": 28000 }, { "epoch": 0.8374884304182963, "grad_norm": 5.497170448303223, "learning_rate": 0.00019198738278504448, "loss": 1.4324, "step": 28050 }, { "epoch": 0.8389812796703788, "grad_norm": 3.9373042583465576, "learning_rate": 0.00019197309698912567, "loss": 1.3892, "step": 28100 }, { "epoch": 0.8404741289224614, "grad_norm": 5.157922267913818, "learning_rate": 0.0001919588111932068, "loss": 1.3764, "step": 28150 }, { "epoch": 0.8419669781745439, "grad_norm": 4.0024895668029785, "learning_rate": 0.000191944525397288, "loss": 1.3571, "step": 28200 }, { "epoch": 0.8434598274266265, "grad_norm": 4.894957542419434, "learning_rate": 0.00019193023960136917, "loss": 1.3997, "step": 28250 }, { "epoch": 0.844952676678709, "grad_norm": 4.829247951507568, "learning_rate": 0.00019191595380545033, "loss": 1.3751, "step": 28300 }, { "epoch": 0.8464455259307915, "grad_norm": 4.123593330383301, "learning_rate": 0.0001919016680095315, "loss": 1.4183, "step": 28350 }, { "epoch": 0.8479383751828741, "grad_norm": 4.012912273406982, "learning_rate": 0.00019188738221361266, "loss": 1.3757, "step": 28400 }, { "epoch": 0.8494312244349566, "grad_norm": 5.17979097366333, "learning_rate": 0.00019187309641769383, "loss": 1.3975, "step": 28450 }, { "epoch": 0.850924073687039, "grad_norm": 4.665642261505127, "learning_rate": 0.000191858810621775, "loss": 1.4539, "step": 28500 }, { "epoch": 0.8524169229391216, "grad_norm": 4.333798408508301, "learning_rate": 0.00019184452482585616, "loss": 1.3711, "step": 28550 }, { "epoch": 0.8539097721912041, "grad_norm": 4.485865116119385, "learning_rate": 0.00019183023902993732, "loss": 1.4299, "step": 28600 }, { "epoch": 0.8554026214432866, "grad_norm": 6.3318328857421875, "learning_rate": 0.00019181595323401849, "loss": 1.3621, "step": 28650 }, { "epoch": 0.8568954706953692, "grad_norm": 3.915356159210205, "learning_rate": 0.00019180166743809965, "loss": 1.4561, "step": 28700 }, { "epoch": 0.8583883199474517, "grad_norm": 4.613978862762451, "learning_rate": 0.00019178738164218081, "loss": 1.3267, "step": 28750 }, { "epoch": 0.8598811691995343, "grad_norm": 4.935427188873291, "learning_rate": 0.000191773095846262, "loss": 1.3962, "step": 28800 }, { "epoch": 0.8613740184516168, "grad_norm": 4.204285144805908, "learning_rate": 0.00019175881005034314, "loss": 1.3644, "step": 28850 }, { "epoch": 0.8628668677036992, "grad_norm": 3.901266098022461, "learning_rate": 0.00019174452425442434, "loss": 1.344, "step": 28900 }, { "epoch": 0.8643597169557818, "grad_norm": 5.1740922927856445, "learning_rate": 0.00019173023845850547, "loss": 1.3852, "step": 28950 }, { "epoch": 0.8658525662078643, "grad_norm": 4.984529972076416, "learning_rate": 0.00019171595266258667, "loss": 1.3853, "step": 29000 }, { "epoch": 0.8673454154599468, "grad_norm": 4.719232082366943, "learning_rate": 0.00019170166686666783, "loss": 1.3962, "step": 29050 }, { "epoch": 0.8688382647120294, "grad_norm": 6.084499359130859, "learning_rate": 0.000191687381070749, "loss": 1.3377, "step": 29100 }, { "epoch": 0.8703311139641119, "grad_norm": 5.261173248291016, "learning_rate": 0.00019167309527483016, "loss": 1.3673, "step": 29150 }, { "epoch": 0.8718239632161944, "grad_norm": 5.18539571762085, "learning_rate": 0.00019165880947891132, "loss": 1.358, "step": 29200 }, { "epoch": 0.873316812468277, "grad_norm": 4.789880275726318, "learning_rate": 0.0001916445236829925, "loss": 1.3426, "step": 29250 }, { "epoch": 0.8748096617203595, "grad_norm": 3.2945287227630615, "learning_rate": 0.00019163023788707365, "loss": 1.3308, "step": 29300 }, { "epoch": 0.876302510972442, "grad_norm": 5.060371398925781, "learning_rate": 0.00019161595209115482, "loss": 1.3497, "step": 29350 }, { "epoch": 0.8777953602245245, "grad_norm": 4.771635055541992, "learning_rate": 0.00019160166629523598, "loss": 1.3517, "step": 29400 }, { "epoch": 0.879288209476607, "grad_norm": 4.408042907714844, "learning_rate": 0.00019158738049931715, "loss": 1.3636, "step": 29450 }, { "epoch": 0.8807810587286896, "grad_norm": 5.183899879455566, "learning_rate": 0.0001915730947033983, "loss": 1.3569, "step": 29500 }, { "epoch": 0.8822739079807721, "grad_norm": 4.176016807556152, "learning_rate": 0.00019155880890747948, "loss": 1.3659, "step": 29550 }, { "epoch": 0.8837667572328546, "grad_norm": 4.783830165863037, "learning_rate": 0.00019154452311156064, "loss": 1.368, "step": 29600 }, { "epoch": 0.8852596064849372, "grad_norm": 3.7985739707946777, "learning_rate": 0.0001915302373156418, "loss": 1.4091, "step": 29650 }, { "epoch": 0.8867524557370197, "grad_norm": 4.401581287384033, "learning_rate": 0.00019151595151972297, "loss": 1.3613, "step": 29700 }, { "epoch": 0.8882453049891021, "grad_norm": 6.214754581451416, "learning_rate": 0.00019150166572380414, "loss": 1.4077, "step": 29750 }, { "epoch": 0.8897381542411847, "grad_norm": 4.197826862335205, "learning_rate": 0.0001914873799278853, "loss": 1.347, "step": 29800 }, { "epoch": 0.8912310034932672, "grad_norm": 3.1535027027130127, "learning_rate": 0.0001914730941319665, "loss": 1.3452, "step": 29850 }, { "epoch": 0.8927238527453498, "grad_norm": 4.618538856506348, "learning_rate": 0.00019145880833604763, "loss": 1.3417, "step": 29900 }, { "epoch": 0.8942167019974323, "grad_norm": 3.913506031036377, "learning_rate": 0.00019144452254012882, "loss": 1.3503, "step": 29950 }, { "epoch": 0.8957095512495148, "grad_norm": 4.013761043548584, "learning_rate": 0.00019143023674420996, "loss": 1.4109, "step": 30000 }, { "epoch": 0.8972024005015974, "grad_norm": 4.6257758140563965, "learning_rate": 0.00019141595094829115, "loss": 1.3679, "step": 30050 }, { "epoch": 0.8986952497536799, "grad_norm": 4.422520637512207, "learning_rate": 0.0001914016651523723, "loss": 1.3893, "step": 30100 }, { "epoch": 0.9001880990057624, "grad_norm": 4.911538124084473, "learning_rate": 0.00019138737935645348, "loss": 1.3346, "step": 30150 }, { "epoch": 0.901680948257845, "grad_norm": 6.410045623779297, "learning_rate": 0.00019137309356053464, "loss": 1.3651, "step": 30200 }, { "epoch": 0.9031737975099274, "grad_norm": 3.8565444946289062, "learning_rate": 0.0001913588077646158, "loss": 1.3436, "step": 30250 }, { "epoch": 0.9046666467620099, "grad_norm": 4.632288455963135, "learning_rate": 0.00019134452196869697, "loss": 1.3858, "step": 30300 }, { "epoch": 0.9061594960140925, "grad_norm": 5.382070064544678, "learning_rate": 0.00019133023617277814, "loss": 1.3872, "step": 30350 }, { "epoch": 0.907652345266175, "grad_norm": 4.425014019012451, "learning_rate": 0.0001913159503768593, "loss": 1.4488, "step": 30400 }, { "epoch": 0.9091451945182576, "grad_norm": 3.7266223430633545, "learning_rate": 0.00019130166458094047, "loss": 1.376, "step": 30450 }, { "epoch": 0.9106380437703401, "grad_norm": 7.25547981262207, "learning_rate": 0.00019128737878502163, "loss": 1.3488, "step": 30500 }, { "epoch": 0.9121308930224226, "grad_norm": 5.824578285217285, "learning_rate": 0.0001912730929891028, "loss": 1.4001, "step": 30550 }, { "epoch": 0.9136237422745052, "grad_norm": 5.808544635772705, "learning_rate": 0.00019125880719318396, "loss": 1.3132, "step": 30600 }, { "epoch": 0.9151165915265876, "grad_norm": 4.343785285949707, "learning_rate": 0.00019124452139726515, "loss": 1.3592, "step": 30650 }, { "epoch": 0.9166094407786701, "grad_norm": 5.0626325607299805, "learning_rate": 0.0001912302356013463, "loss": 1.4418, "step": 30700 }, { "epoch": 0.9181022900307527, "grad_norm": 4.336055755615234, "learning_rate": 0.00019121594980542748, "loss": 1.3624, "step": 30750 }, { "epoch": 0.9195951392828352, "grad_norm": 6.215260982513428, "learning_rate": 0.00019120166400950862, "loss": 1.4053, "step": 30800 }, { "epoch": 0.9210879885349177, "grad_norm": 4.496364116668701, "learning_rate": 0.0001911873782135898, "loss": 1.4117, "step": 30850 }, { "epoch": 0.9225808377870003, "grad_norm": 4.023138046264648, "learning_rate": 0.00019117309241767095, "loss": 1.3783, "step": 30900 }, { "epoch": 0.9240736870390828, "grad_norm": 3.8177783489227295, "learning_rate": 0.00019115880662175214, "loss": 1.3127, "step": 30950 }, { "epoch": 0.9255665362911654, "grad_norm": 3.893087387084961, "learning_rate": 0.0001911445208258333, "loss": 1.3359, "step": 31000 }, { "epoch": 0.9270593855432478, "grad_norm": 3.9967384338378906, "learning_rate": 0.00019113023502991447, "loss": 1.3897, "step": 31050 }, { "epoch": 0.9285522347953303, "grad_norm": 3.2351789474487305, "learning_rate": 0.00019111594923399563, "loss": 1.3406, "step": 31100 }, { "epoch": 0.9300450840474129, "grad_norm": 4.571500778198242, "learning_rate": 0.0001911016634380768, "loss": 1.3783, "step": 31150 }, { "epoch": 0.9315379332994954, "grad_norm": 6.241518020629883, "learning_rate": 0.00019108737764215796, "loss": 1.3659, "step": 31200 }, { "epoch": 0.9330307825515779, "grad_norm": 3.2547826766967773, "learning_rate": 0.00019107309184623913, "loss": 1.3137, "step": 31250 }, { "epoch": 0.9345236318036605, "grad_norm": 5.386812686920166, "learning_rate": 0.0001910588060503203, "loss": 1.3974, "step": 31300 }, { "epoch": 0.936016481055743, "grad_norm": 4.239005088806152, "learning_rate": 0.00019104452025440146, "loss": 1.3789, "step": 31350 }, { "epoch": 0.9375093303078255, "grad_norm": 3.7252326011657715, "learning_rate": 0.00019103023445848262, "loss": 1.3622, "step": 31400 }, { "epoch": 0.9390021795599081, "grad_norm": 5.890969276428223, "learning_rate": 0.00019101594866256381, "loss": 1.3905, "step": 31450 }, { "epoch": 0.9404950288119905, "grad_norm": 5.796853542327881, "learning_rate": 0.00019100166286664495, "loss": 1.3797, "step": 31500 }, { "epoch": 0.9419878780640731, "grad_norm": 4.130026817321777, "learning_rate": 0.00019098737707072614, "loss": 1.3365, "step": 31550 }, { "epoch": 0.9434807273161556, "grad_norm": 3.65081524848938, "learning_rate": 0.00019097309127480728, "loss": 1.4347, "step": 31600 }, { "epoch": 0.9449735765682381, "grad_norm": 4.91404390335083, "learning_rate": 0.00019095880547888847, "loss": 1.4056, "step": 31650 }, { "epoch": 0.9464664258203207, "grad_norm": 4.7272114753723145, "learning_rate": 0.00019094451968296964, "loss": 1.2828, "step": 31700 }, { "epoch": 0.9479592750724032, "grad_norm": 5.111660957336426, "learning_rate": 0.0001909302338870508, "loss": 1.3486, "step": 31750 }, { "epoch": 0.9494521243244857, "grad_norm": 3.1206741333007812, "learning_rate": 0.00019091594809113197, "loss": 1.3416, "step": 31800 }, { "epoch": 0.9509449735765683, "grad_norm": 4.359163761138916, "learning_rate": 0.00019090166229521313, "loss": 1.3471, "step": 31850 }, { "epoch": 0.9524378228286507, "grad_norm": 4.667807102203369, "learning_rate": 0.0001908873764992943, "loss": 1.4042, "step": 31900 }, { "epoch": 0.9539306720807332, "grad_norm": 4.410369873046875, "learning_rate": 0.00019087309070337546, "loss": 1.3909, "step": 31950 }, { "epoch": 0.9554235213328158, "grad_norm": 4.987631320953369, "learning_rate": 0.00019085880490745663, "loss": 1.3548, "step": 32000 }, { "epoch": 0.9569163705848983, "grad_norm": 4.66121768951416, "learning_rate": 0.0001908445191115378, "loss": 1.3753, "step": 32050 }, { "epoch": 0.9584092198369809, "grad_norm": 6.010061740875244, "learning_rate": 0.00019083023331561896, "loss": 1.4128, "step": 32100 }, { "epoch": 0.9599020690890634, "grad_norm": 3.118246078491211, "learning_rate": 0.00019081594751970012, "loss": 1.3625, "step": 32150 }, { "epoch": 0.9613949183411459, "grad_norm": 3.821732759475708, "learning_rate": 0.00019080166172378128, "loss": 1.3691, "step": 32200 }, { "epoch": 0.9628877675932285, "grad_norm": 5.321105480194092, "learning_rate": 0.00019078737592786245, "loss": 1.3499, "step": 32250 }, { "epoch": 0.964380616845311, "grad_norm": 5.963992595672607, "learning_rate": 0.00019077309013194361, "loss": 1.3145, "step": 32300 }, { "epoch": 0.9658734660973934, "grad_norm": 4.071386337280273, "learning_rate": 0.00019075880433602478, "loss": 1.3215, "step": 32350 }, { "epoch": 0.967366315349476, "grad_norm": 3.7641048431396484, "learning_rate": 0.00019074451854010594, "loss": 1.3846, "step": 32400 }, { "epoch": 0.9688591646015585, "grad_norm": 5.985085487365723, "learning_rate": 0.0001907302327441871, "loss": 1.3383, "step": 32450 }, { "epoch": 0.970352013853641, "grad_norm": 6.602528095245361, "learning_rate": 0.0001907159469482683, "loss": 1.4195, "step": 32500 }, { "epoch": 0.9718448631057236, "grad_norm": 5.370469570159912, "learning_rate": 0.00019070166115234944, "loss": 1.3485, "step": 32550 }, { "epoch": 0.9733377123578061, "grad_norm": 4.588457107543945, "learning_rate": 0.00019068737535643063, "loss": 1.3896, "step": 32600 }, { "epoch": 0.9748305616098887, "grad_norm": 6.389341354370117, "learning_rate": 0.00019067308956051177, "loss": 1.3118, "step": 32650 }, { "epoch": 0.9763234108619712, "grad_norm": 4.051975250244141, "learning_rate": 0.00019065880376459296, "loss": 1.4065, "step": 32700 }, { "epoch": 0.9778162601140536, "grad_norm": 5.474813938140869, "learning_rate": 0.0001906445179686741, "loss": 1.3967, "step": 32750 }, { "epoch": 0.9793091093661362, "grad_norm": 4.033454895019531, "learning_rate": 0.0001906302321727553, "loss": 1.3212, "step": 32800 }, { "epoch": 0.9808019586182187, "grad_norm": 4.46946382522583, "learning_rate": 0.00019061594637683645, "loss": 1.3917, "step": 32850 }, { "epoch": 0.9822948078703012, "grad_norm": 4.98955774307251, "learning_rate": 0.00019060166058091762, "loss": 1.3814, "step": 32900 }, { "epoch": 0.9837876571223838, "grad_norm": 4.879435062408447, "learning_rate": 0.00019058737478499878, "loss": 1.3486, "step": 32950 }, { "epoch": 0.9852805063744663, "grad_norm": 4.0941925048828125, "learning_rate": 0.00019057308898907995, "loss": 1.3289, "step": 33000 }, { "epoch": 0.9867733556265488, "grad_norm": 5.2992706298828125, "learning_rate": 0.0001905588031931611, "loss": 1.3458, "step": 33050 }, { "epoch": 0.9882662048786314, "grad_norm": 4.284411430358887, "learning_rate": 0.00019054451739724228, "loss": 1.3932, "step": 33100 }, { "epoch": 0.9897590541307139, "grad_norm": 4.969634532928467, "learning_rate": 0.00019053023160132344, "loss": 1.4044, "step": 33150 }, { "epoch": 0.9912519033827965, "grad_norm": 5.532953262329102, "learning_rate": 0.0001905159458054046, "loss": 1.3658, "step": 33200 }, { "epoch": 0.9927447526348789, "grad_norm": 4.402670383453369, "learning_rate": 0.00019050166000948577, "loss": 1.343, "step": 33250 }, { "epoch": 0.9942376018869614, "grad_norm": 4.458900451660156, "learning_rate": 0.00019048737421356696, "loss": 1.4002, "step": 33300 }, { "epoch": 0.995730451139044, "grad_norm": 3.934340238571167, "learning_rate": 0.0001904730884176481, "loss": 1.413, "step": 33350 }, { "epoch": 0.9972233003911265, "grad_norm": 4.274466514587402, "learning_rate": 0.0001904588026217293, "loss": 1.3552, "step": 33400 }, { "epoch": 0.998716149643209, "grad_norm": 4.774006366729736, "learning_rate": 0.00019044451682581043, "loss": 1.3825, "step": 33450 }, { "epoch": 1.0002089988952916, "grad_norm": 3.870762825012207, "learning_rate": 0.00019043023102989162, "loss": 1.3715, "step": 33500 }, { "epoch": 1.001701848147374, "grad_norm": 3.595686912536621, "learning_rate": 0.00019041594523397276, "loss": 1.3352, "step": 33550 }, { "epoch": 1.0031946973994565, "grad_norm": 5.492071151733398, "learning_rate": 0.00019040165943805395, "loss": 1.2927, "step": 33600 }, { "epoch": 1.004687546651539, "grad_norm": 5.381584167480469, "learning_rate": 0.00019038737364213511, "loss": 1.2545, "step": 33650 }, { "epoch": 1.0061803959036217, "grad_norm": 5.983656883239746, "learning_rate": 0.00019037308784621628, "loss": 1.2393, "step": 33700 }, { "epoch": 1.0076732451557042, "grad_norm": 4.956115245819092, "learning_rate": 0.00019035880205029744, "loss": 1.2931, "step": 33750 }, { "epoch": 1.0091660944077867, "grad_norm": 4.604572296142578, "learning_rate": 0.0001903445162543786, "loss": 1.2786, "step": 33800 }, { "epoch": 1.0106589436598692, "grad_norm": 5.043067932128906, "learning_rate": 0.00019033023045845977, "loss": 1.2933, "step": 33850 }, { "epoch": 1.0121517929119517, "grad_norm": 4.53336238861084, "learning_rate": 0.00019031594466254094, "loss": 1.3114, "step": 33900 }, { "epoch": 1.0136446421640344, "grad_norm": 5.106605052947998, "learning_rate": 0.0001903016588666221, "loss": 1.2994, "step": 33950 }, { "epoch": 1.0151374914161169, "grad_norm": 5.796351909637451, "learning_rate": 0.00019028737307070327, "loss": 1.2555, "step": 34000 }, { "epoch": 1.0166303406681994, "grad_norm": 4.970977306365967, "learning_rate": 0.00019027308727478443, "loss": 1.3343, "step": 34050 }, { "epoch": 1.0181231899202818, "grad_norm": 4.233397960662842, "learning_rate": 0.00019025880147886562, "loss": 1.2485, "step": 34100 }, { "epoch": 1.0196160391723643, "grad_norm": 4.012045383453369, "learning_rate": 0.00019024451568294676, "loss": 1.2816, "step": 34150 }, { "epoch": 1.0211088884244468, "grad_norm": 4.715073585510254, "learning_rate": 0.00019023022988702795, "loss": 1.2541, "step": 34200 }, { "epoch": 1.0226017376765295, "grad_norm": 3.7285947799682617, "learning_rate": 0.0001902159440911091, "loss": 1.3094, "step": 34250 }, { "epoch": 1.024094586928612, "grad_norm": 4.012038707733154, "learning_rate": 0.00019020165829519028, "loss": 1.2791, "step": 34300 }, { "epoch": 1.0255874361806945, "grad_norm": 4.04899263381958, "learning_rate": 0.00019018737249927142, "loss": 1.3085, "step": 34350 }, { "epoch": 1.027080285432777, "grad_norm": 4.380834102630615, "learning_rate": 0.0001901730867033526, "loss": 1.3212, "step": 34400 }, { "epoch": 1.0285731346848594, "grad_norm": 5.275148868560791, "learning_rate": 0.00019015880090743378, "loss": 1.2749, "step": 34450 }, { "epoch": 1.0300659839369422, "grad_norm": 4.162272930145264, "learning_rate": 0.00019014451511151494, "loss": 1.296, "step": 34500 }, { "epoch": 1.0315588331890246, "grad_norm": 4.128006935119629, "learning_rate": 0.0001901302293155961, "loss": 1.2547, "step": 34550 }, { "epoch": 1.0330516824411071, "grad_norm": 3.930121421813965, "learning_rate": 0.00019011594351967727, "loss": 1.2831, "step": 34600 }, { "epoch": 1.0345445316931896, "grad_norm": 4.515873908996582, "learning_rate": 0.00019010165772375843, "loss": 1.2747, "step": 34650 }, { "epoch": 1.036037380945272, "grad_norm": 4.484467506408691, "learning_rate": 0.0001900873719278396, "loss": 1.289, "step": 34700 }, { "epoch": 1.0375302301973546, "grad_norm": 3.8937742710113525, "learning_rate": 0.00019007308613192076, "loss": 1.2939, "step": 34750 }, { "epoch": 1.0390230794494373, "grad_norm": 4.367059230804443, "learning_rate": 0.00019005880033600193, "loss": 1.2522, "step": 34800 }, { "epoch": 1.0405159287015198, "grad_norm": 5.143396854400635, "learning_rate": 0.0001900445145400831, "loss": 1.2996, "step": 34850 }, { "epoch": 1.0420087779536022, "grad_norm": 5.060800552368164, "learning_rate": 0.00019003022874416426, "loss": 1.3122, "step": 34900 }, { "epoch": 1.0435016272056847, "grad_norm": 4.775914192199707, "learning_rate": 0.00019001594294824542, "loss": 1.2595, "step": 34950 }, { "epoch": 1.0449944764577672, "grad_norm": 5.256932258605957, "learning_rate": 0.0001900016571523266, "loss": 1.3148, "step": 35000 }, { "epoch": 1.04648732570985, "grad_norm": 4.253088474273682, "learning_rate": 0.00018998737135640775, "loss": 1.2575, "step": 35050 }, { "epoch": 1.0479801749619324, "grad_norm": 4.784812927246094, "learning_rate": 0.00018997308556048892, "loss": 1.2911, "step": 35100 }, { "epoch": 1.049473024214015, "grad_norm": 3.9015183448791504, "learning_rate": 0.0001899587997645701, "loss": 1.2647, "step": 35150 }, { "epoch": 1.0509658734660974, "grad_norm": 4.729675769805908, "learning_rate": 0.00018994451396865125, "loss": 1.2876, "step": 35200 }, { "epoch": 1.0524587227181799, "grad_norm": 4.921074867248535, "learning_rate": 0.00018993022817273244, "loss": 1.2956, "step": 35250 }, { "epoch": 1.0539515719702623, "grad_norm": 4.466576099395752, "learning_rate": 0.00018991594237681357, "loss": 1.2938, "step": 35300 }, { "epoch": 1.055444421222345, "grad_norm": 4.142183780670166, "learning_rate": 0.00018990165658089477, "loss": 1.282, "step": 35350 }, { "epoch": 1.0569372704744275, "grad_norm": 4.372234344482422, "learning_rate": 0.0001898873707849759, "loss": 1.2793, "step": 35400 }, { "epoch": 1.05843011972651, "grad_norm": 5.349823474884033, "learning_rate": 0.0001898730849890571, "loss": 1.3678, "step": 35450 }, { "epoch": 1.0599229689785925, "grad_norm": 4.57612419128418, "learning_rate": 0.00018985879919313826, "loss": 1.3256, "step": 35500 }, { "epoch": 1.061415818230675, "grad_norm": 5.3342156410217285, "learning_rate": 0.00018984451339721942, "loss": 1.2897, "step": 35550 }, { "epoch": 1.0629086674827577, "grad_norm": 5.361871242523193, "learning_rate": 0.0001898302276013006, "loss": 1.2933, "step": 35600 }, { "epoch": 1.0644015167348402, "grad_norm": 3.6657068729400635, "learning_rate": 0.00018981594180538175, "loss": 1.3359, "step": 35650 }, { "epoch": 1.0658943659869227, "grad_norm": 4.428374290466309, "learning_rate": 0.00018980165600946292, "loss": 1.3278, "step": 35700 }, { "epoch": 1.0673872152390051, "grad_norm": 5.079537391662598, "learning_rate": 0.00018978737021354408, "loss": 1.2699, "step": 35750 }, { "epoch": 1.0688800644910876, "grad_norm": 5.303153038024902, "learning_rate": 0.00018977308441762525, "loss": 1.2985, "step": 35800 }, { "epoch": 1.0703729137431701, "grad_norm": 4.319901943206787, "learning_rate": 0.0001897587986217064, "loss": 1.2661, "step": 35850 }, { "epoch": 1.0718657629952528, "grad_norm": 6.0582451820373535, "learning_rate": 0.00018974451282578758, "loss": 1.2837, "step": 35900 }, { "epoch": 1.0733586122473353, "grad_norm": 5.738245964050293, "learning_rate": 0.00018973022702986877, "loss": 1.3075, "step": 35950 }, { "epoch": 1.0748514614994178, "grad_norm": 4.602933883666992, "learning_rate": 0.0001897159412339499, "loss": 1.2585, "step": 36000 }, { "epoch": 1.0763443107515003, "grad_norm": 4.273153305053711, "learning_rate": 0.0001897016554380311, "loss": 1.3382, "step": 36050 }, { "epoch": 1.0778371600035828, "grad_norm": 4.225873947143555, "learning_rate": 0.00018968736964211224, "loss": 1.2964, "step": 36100 }, { "epoch": 1.0793300092556652, "grad_norm": 4.828727722167969, "learning_rate": 0.00018967308384619343, "loss": 1.3734, "step": 36150 }, { "epoch": 1.080822858507748, "grad_norm": 3.7251532077789307, "learning_rate": 0.00018965879805027457, "loss": 1.2706, "step": 36200 }, { "epoch": 1.0823157077598304, "grad_norm": 4.92055606842041, "learning_rate": 0.00018964451225435576, "loss": 1.3187, "step": 36250 }, { "epoch": 1.083808557011913, "grad_norm": 4.827970027923584, "learning_rate": 0.00018963022645843692, "loss": 1.2574, "step": 36300 }, { "epoch": 1.0853014062639954, "grad_norm": 4.742151737213135, "learning_rate": 0.00018961594066251809, "loss": 1.3076, "step": 36350 }, { "epoch": 1.086794255516078, "grad_norm": 3.70794939994812, "learning_rate": 0.00018960165486659925, "loss": 1.3075, "step": 36400 }, { "epoch": 1.0882871047681606, "grad_norm": 4.062809944152832, "learning_rate": 0.00018958736907068042, "loss": 1.2628, "step": 36450 }, { "epoch": 1.089779954020243, "grad_norm": 5.072466850280762, "learning_rate": 0.00018957308327476158, "loss": 1.3278, "step": 36500 }, { "epoch": 1.0912728032723256, "grad_norm": 4.530171871185303, "learning_rate": 0.00018955879747884274, "loss": 1.3233, "step": 36550 }, { "epoch": 1.092765652524408, "grad_norm": 3.9391825199127197, "learning_rate": 0.0001895445116829239, "loss": 1.2973, "step": 36600 }, { "epoch": 1.0942585017764905, "grad_norm": 6.774008274078369, "learning_rate": 0.00018953022588700507, "loss": 1.2801, "step": 36650 }, { "epoch": 1.0957513510285732, "grad_norm": 4.565464496612549, "learning_rate": 0.00018951594009108624, "loss": 1.3718, "step": 36700 }, { "epoch": 1.0972442002806557, "grad_norm": 5.68241548538208, "learning_rate": 0.00018950165429516743, "loss": 1.2928, "step": 36750 }, { "epoch": 1.0987370495327382, "grad_norm": 4.235448360443115, "learning_rate": 0.00018948736849924857, "loss": 1.3097, "step": 36800 }, { "epoch": 1.1002298987848207, "grad_norm": 4.672369003295898, "learning_rate": 0.00018947308270332976, "loss": 1.2875, "step": 36850 }, { "epoch": 1.1017227480369032, "grad_norm": 6.449750900268555, "learning_rate": 0.0001894587969074109, "loss": 1.2823, "step": 36900 }, { "epoch": 1.1032155972889857, "grad_norm": 5.093149662017822, "learning_rate": 0.0001894445111114921, "loss": 1.315, "step": 36950 }, { "epoch": 1.1047084465410684, "grad_norm": 5.433828353881836, "learning_rate": 0.00018943022531557323, "loss": 1.2781, "step": 37000 }, { "epoch": 1.1062012957931509, "grad_norm": 5.176681995391846, "learning_rate": 0.00018941593951965442, "loss": 1.2652, "step": 37050 }, { "epoch": 1.1076941450452333, "grad_norm": 4.09026575088501, "learning_rate": 0.00018940165372373558, "loss": 1.3045, "step": 37100 }, { "epoch": 1.1091869942973158, "grad_norm": 5.223085880279541, "learning_rate": 0.00018938736792781675, "loss": 1.3009, "step": 37150 }, { "epoch": 1.1106798435493983, "grad_norm": 5.105752944946289, "learning_rate": 0.0001893730821318979, "loss": 1.2915, "step": 37200 }, { "epoch": 1.1121726928014808, "grad_norm": 3.945962905883789, "learning_rate": 0.00018935879633597908, "loss": 1.341, "step": 37250 }, { "epoch": 1.1136655420535635, "grad_norm": 4.856802463531494, "learning_rate": 0.00018934451054006024, "loss": 1.3318, "step": 37300 }, { "epoch": 1.115158391305646, "grad_norm": 4.858597755432129, "learning_rate": 0.0001893302247441414, "loss": 1.2756, "step": 37350 }, { "epoch": 1.1166512405577285, "grad_norm": 4.594838619232178, "learning_rate": 0.00018931593894822257, "loss": 1.2712, "step": 37400 }, { "epoch": 1.118144089809811, "grad_norm": 4.745405673980713, "learning_rate": 0.00018930165315230374, "loss": 1.2878, "step": 37450 }, { "epoch": 1.1196369390618934, "grad_norm": 4.112718105316162, "learning_rate": 0.0001892873673563849, "loss": 1.3337, "step": 37500 }, { "epoch": 1.1211297883139761, "grad_norm": 4.297769069671631, "learning_rate": 0.00018927308156046607, "loss": 1.3324, "step": 37550 }, { "epoch": 1.1226226375660586, "grad_norm": 5.46763801574707, "learning_rate": 0.00018925879576454723, "loss": 1.276, "step": 37600 }, { "epoch": 1.124115486818141, "grad_norm": 5.648976802825928, "learning_rate": 0.0001892445099686284, "loss": 1.3238, "step": 37650 }, { "epoch": 1.1256083360702236, "grad_norm": 4.185100078582764, "learning_rate": 0.00018923022417270956, "loss": 1.2848, "step": 37700 }, { "epoch": 1.127101185322306, "grad_norm": 4.676313877105713, "learning_rate": 0.00018921593837679072, "loss": 1.3038, "step": 37750 }, { "epoch": 1.1285940345743888, "grad_norm": 4.426568508148193, "learning_rate": 0.00018920165258087192, "loss": 1.3343, "step": 37800 }, { "epoch": 1.1300868838264713, "grad_norm": 4.887205600738525, "learning_rate": 0.00018918736678495305, "loss": 1.3456, "step": 37850 }, { "epoch": 1.1315797330785538, "grad_norm": 5.455615043640137, "learning_rate": 0.00018917308098903424, "loss": 1.3258, "step": 37900 }, { "epoch": 1.1330725823306362, "grad_norm": 5.676678657531738, "learning_rate": 0.00018915879519311538, "loss": 1.3702, "step": 37950 }, { "epoch": 1.1345654315827187, "grad_norm": 3.9528415203094482, "learning_rate": 0.00018914450939719657, "loss": 1.3097, "step": 38000 }, { "epoch": 1.1360582808348014, "grad_norm": 4.538026332855225, "learning_rate": 0.0001891302236012777, "loss": 1.316, "step": 38050 }, { "epoch": 1.137551130086884, "grad_norm": 4.46422815322876, "learning_rate": 0.0001891159378053589, "loss": 1.2479, "step": 38100 }, { "epoch": 1.1390439793389664, "grad_norm": 4.823958396911621, "learning_rate": 0.00018910165200944007, "loss": 1.2616, "step": 38150 }, { "epoch": 1.1405368285910489, "grad_norm": 5.7128167152404785, "learning_rate": 0.00018908736621352123, "loss": 1.2853, "step": 38200 }, { "epoch": 1.1420296778431314, "grad_norm": 3.5420877933502197, "learning_rate": 0.0001890730804176024, "loss": 1.3008, "step": 38250 }, { "epoch": 1.1435225270952138, "grad_norm": 4.617981910705566, "learning_rate": 0.00018905879462168356, "loss": 1.2888, "step": 38300 }, { "epoch": 1.1450153763472963, "grad_norm": 4.542544364929199, "learning_rate": 0.00018904450882576473, "loss": 1.3009, "step": 38350 }, { "epoch": 1.146508225599379, "grad_norm": 4.301428318023682, "learning_rate": 0.0001890302230298459, "loss": 1.2834, "step": 38400 }, { "epoch": 1.1480010748514615, "grad_norm": 4.136764049530029, "learning_rate": 0.00018901593723392706, "loss": 1.3895, "step": 38450 }, { "epoch": 1.149493924103544, "grad_norm": 5.59256649017334, "learning_rate": 0.00018900165143800822, "loss": 1.295, "step": 38500 }, { "epoch": 1.1509867733556265, "grad_norm": 3.4925365447998047, "learning_rate": 0.00018898736564208939, "loss": 1.3385, "step": 38550 }, { "epoch": 1.152479622607709, "grad_norm": 4.884555816650391, "learning_rate": 0.00018897307984617058, "loss": 1.2776, "step": 38600 }, { "epoch": 1.1539724718597917, "grad_norm": 3.866908550262451, "learning_rate": 0.00018895879405025171, "loss": 1.3173, "step": 38650 }, { "epoch": 1.1554653211118742, "grad_norm": 4.8104939460754395, "learning_rate": 0.0001889445082543329, "loss": 1.2988, "step": 38700 }, { "epoch": 1.1569581703639567, "grad_norm": 3.814675807952881, "learning_rate": 0.00018893022245841404, "loss": 1.348, "step": 38750 }, { "epoch": 1.1584510196160391, "grad_norm": 5.720306873321533, "learning_rate": 0.00018891593666249524, "loss": 1.2844, "step": 38800 }, { "epoch": 1.1599438688681216, "grad_norm": 4.06850528717041, "learning_rate": 0.00018890165086657637, "loss": 1.3514, "step": 38850 }, { "epoch": 1.1614367181202043, "grad_norm": 6.193358898162842, "learning_rate": 0.00018888736507065757, "loss": 1.3587, "step": 38900 }, { "epoch": 1.1629295673722868, "grad_norm": 4.8998212814331055, "learning_rate": 0.00018887307927473873, "loss": 1.317, "step": 38950 }, { "epoch": 1.1644224166243693, "grad_norm": 4.4293107986450195, "learning_rate": 0.0001888587934788199, "loss": 1.3473, "step": 39000 }, { "epoch": 1.1659152658764518, "grad_norm": 6.795536994934082, "learning_rate": 0.00018884450768290106, "loss": 1.2793, "step": 39050 }, { "epoch": 1.1674081151285343, "grad_norm": 4.158294200897217, "learning_rate": 0.00018883022188698222, "loss": 1.3175, "step": 39100 }, { "epoch": 1.168900964380617, "grad_norm": 5.839204788208008, "learning_rate": 0.0001888159360910634, "loss": 1.2931, "step": 39150 }, { "epoch": 1.1703938136326995, "grad_norm": 6.633917331695557, "learning_rate": 0.00018880165029514455, "loss": 1.3053, "step": 39200 }, { "epoch": 1.171886662884782, "grad_norm": 4.409125328063965, "learning_rate": 0.00018878736449922572, "loss": 1.3074, "step": 39250 }, { "epoch": 1.1733795121368644, "grad_norm": 4.820318698883057, "learning_rate": 0.00018877307870330688, "loss": 1.329, "step": 39300 }, { "epoch": 1.174872361388947, "grad_norm": 5.104337215423584, "learning_rate": 0.00018875879290738805, "loss": 1.2628, "step": 39350 }, { "epoch": 1.1763652106410294, "grad_norm": 5.449405670166016, "learning_rate": 0.00018874450711146924, "loss": 1.2672, "step": 39400 }, { "epoch": 1.1778580598931119, "grad_norm": 5.3521504402160645, "learning_rate": 0.00018873022131555038, "loss": 1.3236, "step": 39450 }, { "epoch": 1.1793509091451946, "grad_norm": 5.748175144195557, "learning_rate": 0.00018871593551963157, "loss": 1.284, "step": 39500 }, { "epoch": 1.180843758397277, "grad_norm": 4.418118953704834, "learning_rate": 0.0001887016497237127, "loss": 1.299, "step": 39550 }, { "epoch": 1.1823366076493596, "grad_norm": 4.267107009887695, "learning_rate": 0.0001886873639277939, "loss": 1.3287, "step": 39600 }, { "epoch": 1.183829456901442, "grad_norm": 4.607259273529053, "learning_rate": 0.00018867307813187503, "loss": 1.3087, "step": 39650 }, { "epoch": 1.1853223061535245, "grad_norm": 4.254667282104492, "learning_rate": 0.00018865879233595623, "loss": 1.3015, "step": 39700 }, { "epoch": 1.1868151554056072, "grad_norm": 5.307114124298096, "learning_rate": 0.0001886445065400374, "loss": 1.2802, "step": 39750 }, { "epoch": 1.1883080046576897, "grad_norm": 3.9305639266967773, "learning_rate": 0.00018863022074411856, "loss": 1.2975, "step": 39800 }, { "epoch": 1.1898008539097722, "grad_norm": 4.650544166564941, "learning_rate": 0.00018861593494819972, "loss": 1.3239, "step": 39850 }, { "epoch": 1.1912937031618547, "grad_norm": 4.182717800140381, "learning_rate": 0.00018860164915228086, "loss": 1.3948, "step": 39900 }, { "epoch": 1.1927865524139372, "grad_norm": 5.322524547576904, "learning_rate": 0.00018858736335636205, "loss": 1.3411, "step": 39950 }, { "epoch": 1.1942794016660199, "grad_norm": 5.221969127655029, "learning_rate": 0.0001885730775604432, "loss": 1.3047, "step": 40000 }, { "epoch": 1.1957722509181024, "grad_norm": 4.373313903808594, "learning_rate": 0.00018855879176452438, "loss": 1.366, "step": 40050 }, { "epoch": 1.1972651001701848, "grad_norm": 4.842396259307861, "learning_rate": 0.00018854450596860554, "loss": 1.3294, "step": 40100 }, { "epoch": 1.1987579494222673, "grad_norm": 5.024688720703125, "learning_rate": 0.0001885302201726867, "loss": 1.3531, "step": 40150 }, { "epoch": 1.2002507986743498, "grad_norm": 5.733693599700928, "learning_rate": 0.00018851593437676787, "loss": 1.3177, "step": 40200 }, { "epoch": 1.2017436479264325, "grad_norm": 4.634098529815674, "learning_rate": 0.00018850164858084904, "loss": 1.3534, "step": 40250 }, { "epoch": 1.203236497178515, "grad_norm": 5.662175178527832, "learning_rate": 0.0001884873627849302, "loss": 1.2813, "step": 40300 }, { "epoch": 1.2047293464305975, "grad_norm": 4.25814151763916, "learning_rate": 0.00018847307698901137, "loss": 1.3704, "step": 40350 }, { "epoch": 1.20622219568268, "grad_norm": 4.733590126037598, "learning_rate": 0.00018845879119309253, "loss": 1.2999, "step": 40400 }, { "epoch": 1.2077150449347624, "grad_norm": 5.873198509216309, "learning_rate": 0.0001884445053971737, "loss": 1.3697, "step": 40450 }, { "epoch": 1.209207894186845, "grad_norm": 4.847556114196777, "learning_rate": 0.00018843021960125486, "loss": 1.2706, "step": 40500 }, { "epoch": 1.2107007434389274, "grad_norm": 4.76710319519043, "learning_rate": 0.00018841593380533605, "loss": 1.3873, "step": 40550 }, { "epoch": 1.2121935926910101, "grad_norm": 4.693939685821533, "learning_rate": 0.0001884016480094172, "loss": 1.2752, "step": 40600 }, { "epoch": 1.2136864419430926, "grad_norm": 5.377971649169922, "learning_rate": 0.00018838736221349838, "loss": 1.3083, "step": 40650 }, { "epoch": 1.215179291195175, "grad_norm": 4.257247447967529, "learning_rate": 0.00018837307641757952, "loss": 1.3318, "step": 40700 }, { "epoch": 1.2166721404472576, "grad_norm": 4.820394039154053, "learning_rate": 0.0001883587906216607, "loss": 1.3063, "step": 40750 }, { "epoch": 1.21816498969934, "grad_norm": 3.7783172130584717, "learning_rate": 0.00018834450482574188, "loss": 1.2765, "step": 40800 }, { "epoch": 1.2196578389514228, "grad_norm": 5.447601795196533, "learning_rate": 0.00018833021902982304, "loss": 1.3067, "step": 40850 }, { "epoch": 1.2211506882035053, "grad_norm": 4.7725510597229, "learning_rate": 0.0001883159332339042, "loss": 1.3366, "step": 40900 }, { "epoch": 1.2226435374555877, "grad_norm": 5.6868672370910645, "learning_rate": 0.00018830164743798537, "loss": 1.3441, "step": 40950 }, { "epoch": 1.2241363867076702, "grad_norm": 8.460538864135742, "learning_rate": 0.00018828736164206653, "loss": 1.3305, "step": 41000 }, { "epoch": 1.2256292359597527, "grad_norm": 5.594083309173584, "learning_rate": 0.0001882730758461477, "loss": 1.2825, "step": 41050 }, { "epoch": 1.2271220852118354, "grad_norm": 6.125982284545898, "learning_rate": 0.00018825879005022886, "loss": 1.3253, "step": 41100 }, { "epoch": 1.228614934463918, "grad_norm": 4.173381328582764, "learning_rate": 0.00018824450425431003, "loss": 1.3363, "step": 41150 }, { "epoch": 1.2301077837160004, "grad_norm": 4.884373664855957, "learning_rate": 0.0001882302184583912, "loss": 1.3124, "step": 41200 }, { "epoch": 1.2316006329680829, "grad_norm": 6.418700695037842, "learning_rate": 0.00018821593266247239, "loss": 1.3123, "step": 41250 }, { "epoch": 1.2330934822201653, "grad_norm": 4.717264175415039, "learning_rate": 0.00018820164686655352, "loss": 1.287, "step": 41300 }, { "epoch": 1.234586331472248, "grad_norm": 5.55648136138916, "learning_rate": 0.00018818736107063471, "loss": 1.3426, "step": 41350 }, { "epoch": 1.2360791807243305, "grad_norm": 5.190313339233398, "learning_rate": 0.00018817307527471585, "loss": 1.2945, "step": 41400 }, { "epoch": 1.237572029976413, "grad_norm": 6.027787208557129, "learning_rate": 0.00018815878947879704, "loss": 1.3313, "step": 41450 }, { "epoch": 1.2390648792284955, "grad_norm": 4.4704413414001465, "learning_rate": 0.00018814450368287818, "loss": 1.3031, "step": 41500 }, { "epoch": 1.240557728480578, "grad_norm": 4.408693790435791, "learning_rate": 0.00018813021788695937, "loss": 1.287, "step": 41550 }, { "epoch": 1.2420505777326605, "grad_norm": 5.230643272399902, "learning_rate": 0.00018811593209104054, "loss": 1.2846, "step": 41600 }, { "epoch": 1.243543426984743, "grad_norm": 4.442893981933594, "learning_rate": 0.0001881016462951217, "loss": 1.3199, "step": 41650 }, { "epoch": 1.2450362762368257, "grad_norm": 3.9362857341766357, "learning_rate": 0.00018808736049920287, "loss": 1.3301, "step": 41700 }, { "epoch": 1.2465291254889082, "grad_norm": 4.765340805053711, "learning_rate": 0.00018807307470328403, "loss": 1.3092, "step": 41750 }, { "epoch": 1.2480219747409906, "grad_norm": 4.7868146896362305, "learning_rate": 0.0001880587889073652, "loss": 1.3077, "step": 41800 }, { "epoch": 1.2495148239930731, "grad_norm": 5.724763870239258, "learning_rate": 0.00018804450311144636, "loss": 1.3035, "step": 41850 }, { "epoch": 1.2510076732451556, "grad_norm": 5.832367897033691, "learning_rate": 0.00018803021731552753, "loss": 1.3703, "step": 41900 }, { "epoch": 1.2525005224972383, "grad_norm": 5.079206466674805, "learning_rate": 0.0001880159315196087, "loss": 1.3328, "step": 41950 }, { "epoch": 1.2539933717493208, "grad_norm": 5.704753398895264, "learning_rate": 0.00018800164572368986, "loss": 1.354, "step": 42000 }, { "epoch": 1.2554862210014033, "grad_norm": 5.396346569061279, "learning_rate": 0.00018798735992777105, "loss": 1.2794, "step": 42050 }, { "epoch": 1.2569790702534858, "grad_norm": 5.461711406707764, "learning_rate": 0.00018797307413185218, "loss": 1.3335, "step": 42100 }, { "epoch": 1.2584719195055682, "grad_norm": 3.3528285026550293, "learning_rate": 0.00018795878833593338, "loss": 1.296, "step": 42150 }, { "epoch": 1.259964768757651, "grad_norm": 4.413696765899658, "learning_rate": 0.00018794450254001451, "loss": 1.2828, "step": 42200 }, { "epoch": 1.2614576180097334, "grad_norm": 5.430131435394287, "learning_rate": 0.0001879302167440957, "loss": 1.3107, "step": 42250 }, { "epoch": 1.262950467261816, "grad_norm": 4.623260974884033, "learning_rate": 0.00018791593094817684, "loss": 1.3252, "step": 42300 }, { "epoch": 1.2644433165138984, "grad_norm": 5.377962112426758, "learning_rate": 0.00018790164515225803, "loss": 1.3073, "step": 42350 }, { "epoch": 1.265936165765981, "grad_norm": 5.34269905090332, "learning_rate": 0.0001878873593563392, "loss": 1.3705, "step": 42400 }, { "epoch": 1.2674290150180636, "grad_norm": 5.574904918670654, "learning_rate": 0.00018787307356042036, "loss": 1.3848, "step": 42450 }, { "epoch": 1.268921864270146, "grad_norm": 4.49995231628418, "learning_rate": 0.00018785878776450153, "loss": 1.3169, "step": 42500 }, { "epoch": 1.2704147135222286, "grad_norm": 3.9160735607147217, "learning_rate": 0.00018784450196858267, "loss": 1.2994, "step": 42550 }, { "epoch": 1.271907562774311, "grad_norm": 6.018070697784424, "learning_rate": 0.00018783021617266386, "loss": 1.3213, "step": 42600 }, { "epoch": 1.2734004120263935, "grad_norm": 5.017271041870117, "learning_rate": 0.000187815930376745, "loss": 1.2986, "step": 42650 }, { "epoch": 1.274893261278476, "grad_norm": 4.147556304931641, "learning_rate": 0.0001878016445808262, "loss": 1.2738, "step": 42700 }, { "epoch": 1.2763861105305585, "grad_norm": 4.864907741546631, "learning_rate": 0.00018778735878490735, "loss": 1.3414, "step": 42750 }, { "epoch": 1.2778789597826412, "grad_norm": 4.564859390258789, "learning_rate": 0.00018777307298898852, "loss": 1.3028, "step": 42800 }, { "epoch": 1.2793718090347237, "grad_norm": 4.96591854095459, "learning_rate": 0.00018775878719306968, "loss": 1.3197, "step": 42850 }, { "epoch": 1.2808646582868062, "grad_norm": 4.384543418884277, "learning_rate": 0.00018774450139715085, "loss": 1.2802, "step": 42900 }, { "epoch": 1.2823575075388887, "grad_norm": 3.956608295440674, "learning_rate": 0.000187730215601232, "loss": 1.2764, "step": 42950 }, { "epoch": 1.2838503567909711, "grad_norm": 4.815617561340332, "learning_rate": 0.00018771592980531318, "loss": 1.2384, "step": 43000 }, { "epoch": 1.2853432060430539, "grad_norm": 4.381842136383057, "learning_rate": 0.00018770164400939434, "loss": 1.3016, "step": 43050 }, { "epoch": 1.2868360552951363, "grad_norm": 6.777477741241455, "learning_rate": 0.0001876873582134755, "loss": 1.2956, "step": 43100 }, { "epoch": 1.2883289045472188, "grad_norm": 4.342850208282471, "learning_rate": 0.00018767307241755667, "loss": 1.338, "step": 43150 }, { "epoch": 1.2898217537993013, "grad_norm": 5.081860065460205, "learning_rate": 0.00018765878662163786, "loss": 1.292, "step": 43200 }, { "epoch": 1.2913146030513838, "grad_norm": 4.7942986488342285, "learning_rate": 0.000187644500825719, "loss": 1.2969, "step": 43250 }, { "epoch": 1.2928074523034665, "grad_norm": 3.766878604888916, "learning_rate": 0.0001876302150298002, "loss": 1.3561, "step": 43300 }, { "epoch": 1.294300301555549, "grad_norm": 5.662395000457764, "learning_rate": 0.00018761592923388133, "loss": 1.2993, "step": 43350 }, { "epoch": 1.2957931508076315, "grad_norm": 4.3839545249938965, "learning_rate": 0.00018760164343796252, "loss": 1.3283, "step": 43400 }, { "epoch": 1.297286000059714, "grad_norm": 5.862764835357666, "learning_rate": 0.00018758735764204368, "loss": 1.3876, "step": 43450 }, { "epoch": 1.2987788493117964, "grad_norm": 4.883128643035889, "learning_rate": 0.00018757307184612485, "loss": 1.3456, "step": 43500 }, { "epoch": 1.3002716985638791, "grad_norm": 5.6903533935546875, "learning_rate": 0.000187558786050206, "loss": 1.351, "step": 43550 }, { "epoch": 1.3017645478159616, "grad_norm": 3.8554911613464355, "learning_rate": 0.00018754450025428718, "loss": 1.3385, "step": 43600 }, { "epoch": 1.303257397068044, "grad_norm": 4.693499565124512, "learning_rate": 0.00018753021445836834, "loss": 1.2923, "step": 43650 }, { "epoch": 1.3047502463201266, "grad_norm": 4.351410388946533, "learning_rate": 0.0001875159286624495, "loss": 1.37, "step": 43700 }, { "epoch": 1.306243095572209, "grad_norm": 3.915044069290161, "learning_rate": 0.00018750164286653067, "loss": 1.3378, "step": 43750 }, { "epoch": 1.3077359448242916, "grad_norm": 5.484678745269775, "learning_rate": 0.00018748735707061184, "loss": 1.2753, "step": 43800 }, { "epoch": 1.309228794076374, "grad_norm": 4.64120626449585, "learning_rate": 0.000187473071274693, "loss": 1.346, "step": 43850 }, { "epoch": 1.3107216433284568, "grad_norm": 6.287013053894043, "learning_rate": 0.00018745878547877417, "loss": 1.3233, "step": 43900 }, { "epoch": 1.3122144925805392, "grad_norm": 4.840941905975342, "learning_rate": 0.00018744449968285533, "loss": 1.3651, "step": 43950 }, { "epoch": 1.3137073418326217, "grad_norm": 4.027961730957031, "learning_rate": 0.00018743021388693652, "loss": 1.3241, "step": 44000 }, { "epoch": 1.3152001910847042, "grad_norm": 5.4013190269470215, "learning_rate": 0.00018741592809101766, "loss": 1.3048, "step": 44050 }, { "epoch": 1.3166930403367867, "grad_norm": 5.291625022888184, "learning_rate": 0.00018740164229509885, "loss": 1.3171, "step": 44100 }, { "epoch": 1.3181858895888694, "grad_norm": 3.7480239868164062, "learning_rate": 0.00018738735649918, "loss": 1.3527, "step": 44150 }, { "epoch": 1.3196787388409519, "grad_norm": 3.080198287963867, "learning_rate": 0.00018737307070326118, "loss": 1.3782, "step": 44200 }, { "epoch": 1.3211715880930344, "grad_norm": 5.115420818328857, "learning_rate": 0.00018735878490734235, "loss": 1.3359, "step": 44250 }, { "epoch": 1.3226644373451169, "grad_norm": 4.522632598876953, "learning_rate": 0.0001873444991114235, "loss": 1.3557, "step": 44300 }, { "epoch": 1.3241572865971993, "grad_norm": 5.980693817138672, "learning_rate": 0.00018733021331550468, "loss": 1.3356, "step": 44350 }, { "epoch": 1.325650135849282, "grad_norm": 4.4641947746276855, "learning_rate": 0.00018731592751958584, "loss": 1.3179, "step": 44400 }, { "epoch": 1.3271429851013645, "grad_norm": 5.057164669036865, "learning_rate": 0.000187301641723667, "loss": 1.2966, "step": 44450 }, { "epoch": 1.328635834353447, "grad_norm": 4.383616924285889, "learning_rate": 0.00018728735592774817, "loss": 1.317, "step": 44500 }, { "epoch": 1.3301286836055295, "grad_norm": 5.158847332000732, "learning_rate": 0.00018727307013182933, "loss": 1.3278, "step": 44550 }, { "epoch": 1.331621532857612, "grad_norm": 4.217287063598633, "learning_rate": 0.0001872587843359105, "loss": 1.2695, "step": 44600 }, { "epoch": 1.3331143821096947, "grad_norm": 3.9984827041625977, "learning_rate": 0.00018724449853999166, "loss": 1.287, "step": 44650 }, { "epoch": 1.3346072313617772, "grad_norm": 3.536656379699707, "learning_rate": 0.00018723021274407285, "loss": 1.3528, "step": 44700 }, { "epoch": 1.3361000806138597, "grad_norm": 8.800326347351074, "learning_rate": 0.000187215926948154, "loss": 1.3632, "step": 44750 }, { "epoch": 1.3375929298659421, "grad_norm": 5.808169364929199, "learning_rate": 0.00018720164115223518, "loss": 1.3802, "step": 44800 }, { "epoch": 1.3390857791180246, "grad_norm": 3.631208896636963, "learning_rate": 0.00018718735535631632, "loss": 1.3953, "step": 44850 }, { "epoch": 1.340578628370107, "grad_norm": 3.4300222396850586, "learning_rate": 0.0001871730695603975, "loss": 1.3154, "step": 44900 }, { "epoch": 1.3420714776221896, "grad_norm": 4.281446933746338, "learning_rate": 0.00018715878376447865, "loss": 1.3453, "step": 44950 }, { "epoch": 1.3435643268742723, "grad_norm": 4.406055450439453, "learning_rate": 0.00018714449796855984, "loss": 1.3131, "step": 45000 }, { "epoch": 1.3450571761263548, "grad_norm": 4.559666156768799, "learning_rate": 0.000187130212172641, "loss": 1.3005, "step": 45050 }, { "epoch": 1.3465500253784373, "grad_norm": 4.3830342292785645, "learning_rate": 0.00018711592637672217, "loss": 1.2796, "step": 45100 }, { "epoch": 1.3480428746305197, "grad_norm": 3.840522289276123, "learning_rate": 0.00018710164058080334, "loss": 1.3683, "step": 45150 }, { "epoch": 1.3495357238826022, "grad_norm": 6.221634387969971, "learning_rate": 0.00018708735478488447, "loss": 1.3393, "step": 45200 }, { "epoch": 1.351028573134685, "grad_norm": 5.086376190185547, "learning_rate": 0.00018707306898896567, "loss": 1.3007, "step": 45250 }, { "epoch": 1.3525214223867674, "grad_norm": 5.3263373374938965, "learning_rate": 0.0001870587831930468, "loss": 1.3334, "step": 45300 }, { "epoch": 1.35401427163885, "grad_norm": 3.790532350540161, "learning_rate": 0.000187044497397128, "loss": 1.3282, "step": 45350 }, { "epoch": 1.3555071208909324, "grad_norm": 5.929101943969727, "learning_rate": 0.00018703021160120916, "loss": 1.3048, "step": 45400 }, { "epoch": 1.3569999701430149, "grad_norm": 4.900948524475098, "learning_rate": 0.00018701592580529032, "loss": 1.3282, "step": 45450 }, { "epoch": 1.3584928193950976, "grad_norm": 4.735415935516357, "learning_rate": 0.0001870016400093715, "loss": 1.3076, "step": 45500 }, { "epoch": 1.35998566864718, "grad_norm": 3.394000768661499, "learning_rate": 0.00018698735421345265, "loss": 1.3283, "step": 45550 }, { "epoch": 1.3614785178992626, "grad_norm": 4.902571678161621, "learning_rate": 0.00018697306841753382, "loss": 1.3456, "step": 45600 }, { "epoch": 1.362971367151345, "grad_norm": 3.773273229598999, "learning_rate": 0.00018695878262161498, "loss": 1.3095, "step": 45650 }, { "epoch": 1.3644642164034275, "grad_norm": 3.6063497066497803, "learning_rate": 0.00018694449682569615, "loss": 1.3088, "step": 45700 }, { "epoch": 1.3659570656555102, "grad_norm": 5.380884170532227, "learning_rate": 0.0001869302110297773, "loss": 1.2992, "step": 45750 }, { "epoch": 1.3674499149075927, "grad_norm": 3.108400821685791, "learning_rate": 0.00018691592523385848, "loss": 1.3638, "step": 45800 }, { "epoch": 1.3689427641596752, "grad_norm": 5.948930740356445, "learning_rate": 0.00018690163943793967, "loss": 1.4066, "step": 45850 }, { "epoch": 1.3704356134117577, "grad_norm": 3.5984768867492676, "learning_rate": 0.0001868873536420208, "loss": 1.3167, "step": 45900 }, { "epoch": 1.3719284626638402, "grad_norm": 4.595841884613037, "learning_rate": 0.000186873067846102, "loss": 1.2317, "step": 45950 }, { "epoch": 1.3734213119159226, "grad_norm": 4.499626159667969, "learning_rate": 0.00018685878205018314, "loss": 1.3094, "step": 46000 }, { "epoch": 1.3749141611680051, "grad_norm": 4.876091480255127, "learning_rate": 0.00018684449625426433, "loss": 1.3723, "step": 46050 }, { "epoch": 1.3764070104200878, "grad_norm": 5.354704856872559, "learning_rate": 0.00018683021045834547, "loss": 1.3672, "step": 46100 }, { "epoch": 1.3778998596721703, "grad_norm": 4.629252910614014, "learning_rate": 0.00018681592466242666, "loss": 1.3295, "step": 46150 }, { "epoch": 1.3793927089242528, "grad_norm": 3.6931405067443848, "learning_rate": 0.00018680163886650782, "loss": 1.2989, "step": 46200 }, { "epoch": 1.3808855581763353, "grad_norm": 6.378023624420166, "learning_rate": 0.00018678735307058899, "loss": 1.3022, "step": 46250 }, { "epoch": 1.3823784074284178, "grad_norm": 4.780524253845215, "learning_rate": 0.00018677306727467015, "loss": 1.298, "step": 46300 }, { "epoch": 1.3838712566805005, "grad_norm": 4.4194560050964355, "learning_rate": 0.00018675878147875132, "loss": 1.3396, "step": 46350 }, { "epoch": 1.385364105932583, "grad_norm": 5.250397205352783, "learning_rate": 0.00018674449568283248, "loss": 1.347, "step": 46400 }, { "epoch": 1.3868569551846655, "grad_norm": 5.2740607261657715, "learning_rate": 0.00018673020988691364, "loss": 1.3962, "step": 46450 }, { "epoch": 1.388349804436748, "grad_norm": 4.3260884284973145, "learning_rate": 0.0001867159240909948, "loss": 1.3236, "step": 46500 }, { "epoch": 1.3898426536888304, "grad_norm": 4.773256778717041, "learning_rate": 0.00018670163829507597, "loss": 1.324, "step": 46550 }, { "epoch": 1.3913355029409131, "grad_norm": 6.666245460510254, "learning_rate": 0.00018668735249915714, "loss": 1.3242, "step": 46600 }, { "epoch": 1.3928283521929956, "grad_norm": 7.051351547241211, "learning_rate": 0.00018667306670323833, "loss": 1.3228, "step": 46650 }, { "epoch": 1.394321201445078, "grad_norm": 7.103764057159424, "learning_rate": 0.00018665878090731947, "loss": 1.3815, "step": 46700 }, { "epoch": 1.3958140506971606, "grad_norm": 4.093582630157471, "learning_rate": 0.00018664449511140066, "loss": 1.3352, "step": 46750 }, { "epoch": 1.397306899949243, "grad_norm": 4.276589870452881, "learning_rate": 0.0001866302093154818, "loss": 1.2578, "step": 46800 }, { "epoch": 1.3987997492013258, "grad_norm": 4.325264930725098, "learning_rate": 0.000186615923519563, "loss": 1.297, "step": 46850 }, { "epoch": 1.4002925984534083, "grad_norm": 4.978733062744141, "learning_rate": 0.00018660163772364415, "loss": 1.3142, "step": 46900 }, { "epoch": 1.4017854477054907, "grad_norm": 4.2968854904174805, "learning_rate": 0.00018658735192772532, "loss": 1.3085, "step": 46950 }, { "epoch": 1.4032782969575732, "grad_norm": 3.587477922439575, "learning_rate": 0.00018657306613180648, "loss": 1.3348, "step": 47000 }, { "epoch": 1.4047711462096557, "grad_norm": 5.918453693389893, "learning_rate": 0.00018655878033588765, "loss": 1.3163, "step": 47050 }, { "epoch": 1.4062639954617382, "grad_norm": 4.217932224273682, "learning_rate": 0.0001865444945399688, "loss": 1.358, "step": 47100 }, { "epoch": 1.4077568447138207, "grad_norm": 4.987464904785156, "learning_rate": 0.00018653020874404998, "loss": 1.2865, "step": 47150 }, { "epoch": 1.4092496939659034, "grad_norm": 4.03493070602417, "learning_rate": 0.00018651592294813114, "loss": 1.2961, "step": 47200 }, { "epoch": 1.4107425432179859, "grad_norm": 4.768476486206055, "learning_rate": 0.0001865016371522123, "loss": 1.3301, "step": 47250 }, { "epoch": 1.4122353924700684, "grad_norm": 4.223608016967773, "learning_rate": 0.00018648735135629347, "loss": 1.358, "step": 47300 }, { "epoch": 1.4137282417221508, "grad_norm": 5.97542142868042, "learning_rate": 0.00018647306556037464, "loss": 1.3001, "step": 47350 }, { "epoch": 1.4152210909742333, "grad_norm": 3.4876928329467773, "learning_rate": 0.0001864587797644558, "loss": 1.358, "step": 47400 }, { "epoch": 1.416713940226316, "grad_norm": 3.8894033432006836, "learning_rate": 0.000186444493968537, "loss": 1.3079, "step": 47450 }, { "epoch": 1.4182067894783985, "grad_norm": 5.399030685424805, "learning_rate": 0.00018643020817261813, "loss": 1.3059, "step": 47500 }, { "epoch": 1.419699638730481, "grad_norm": 3.9021189212799072, "learning_rate": 0.00018641592237669932, "loss": 1.2862, "step": 47550 }, { "epoch": 1.4211924879825635, "grad_norm": 4.103232383728027, "learning_rate": 0.00018640163658078046, "loss": 1.3533, "step": 47600 }, { "epoch": 1.422685337234646, "grad_norm": 4.994762897491455, "learning_rate": 0.00018638735078486165, "loss": 1.3515, "step": 47650 }, { "epoch": 1.4241781864867287, "grad_norm": 4.363476276397705, "learning_rate": 0.00018637306498894282, "loss": 1.3524, "step": 47700 }, { "epoch": 1.4256710357388112, "grad_norm": 4.2915873527526855, "learning_rate": 0.00018635877919302398, "loss": 1.3245, "step": 47750 }, { "epoch": 1.4271638849908936, "grad_norm": 4.654321670532227, "learning_rate": 0.00018634449339710514, "loss": 1.369, "step": 47800 }, { "epoch": 1.4286567342429761, "grad_norm": 4.786460876464844, "learning_rate": 0.00018633020760118628, "loss": 1.3485, "step": 47850 }, { "epoch": 1.4301495834950586, "grad_norm": 4.434101104736328, "learning_rate": 0.00018631592180526747, "loss": 1.3131, "step": 47900 }, { "epoch": 1.4316424327471413, "grad_norm": 4.622028827667236, "learning_rate": 0.0001863016360093486, "loss": 1.3383, "step": 47950 }, { "epoch": 1.4331352819992238, "grad_norm": 5.579216480255127, "learning_rate": 0.0001862873502134298, "loss": 1.3214, "step": 48000 }, { "epoch": 1.4346281312513063, "grad_norm": 5.444978713989258, "learning_rate": 0.00018627306441751097, "loss": 1.3243, "step": 48050 }, { "epoch": 1.4361209805033888, "grad_norm": 4.104014873504639, "learning_rate": 0.00018625877862159213, "loss": 1.241, "step": 48100 }, { "epoch": 1.4376138297554713, "grad_norm": 4.517495632171631, "learning_rate": 0.0001862444928256733, "loss": 1.4197, "step": 48150 }, { "epoch": 1.4391066790075537, "grad_norm": 5.38925838470459, "learning_rate": 0.00018623020702975446, "loss": 1.3125, "step": 48200 }, { "epoch": 1.4405995282596362, "grad_norm": 6.211074352264404, "learning_rate": 0.00018621592123383563, "loss": 1.2659, "step": 48250 }, { "epoch": 1.442092377511719, "grad_norm": 5.461385726928711, "learning_rate": 0.0001862016354379168, "loss": 1.3043, "step": 48300 }, { "epoch": 1.4435852267638014, "grad_norm": 8.296045303344727, "learning_rate": 0.00018618734964199796, "loss": 1.3422, "step": 48350 }, { "epoch": 1.445078076015884, "grad_norm": 4.640946388244629, "learning_rate": 0.00018617306384607912, "loss": 1.3317, "step": 48400 }, { "epoch": 1.4465709252679664, "grad_norm": 4.698127269744873, "learning_rate": 0.00018615877805016029, "loss": 1.3241, "step": 48450 }, { "epoch": 1.4480637745200489, "grad_norm": 4.810885429382324, "learning_rate": 0.00018614449225424148, "loss": 1.3204, "step": 48500 }, { "epoch": 1.4495566237721316, "grad_norm": 3.57252836227417, "learning_rate": 0.00018613020645832261, "loss": 1.3412, "step": 48550 }, { "epoch": 1.451049473024214, "grad_norm": 4.79307222366333, "learning_rate": 0.0001861159206624038, "loss": 1.325, "step": 48600 }, { "epoch": 1.4525423222762965, "grad_norm": 5.495840072631836, "learning_rate": 0.00018610163486648494, "loss": 1.3191, "step": 48650 }, { "epoch": 1.454035171528379, "grad_norm": 4.352155685424805, "learning_rate": 0.00018608734907056614, "loss": 1.3129, "step": 48700 }, { "epoch": 1.4555280207804615, "grad_norm": 4.606874942779541, "learning_rate": 0.00018607306327464727, "loss": 1.3212, "step": 48750 }, { "epoch": 1.4570208700325442, "grad_norm": 5.2679219245910645, "learning_rate": 0.00018605877747872846, "loss": 1.3531, "step": 48800 }, { "epoch": 1.4585137192846267, "grad_norm": 4.821314811706543, "learning_rate": 0.00018604449168280963, "loss": 1.3195, "step": 48850 }, { "epoch": 1.4600065685367092, "grad_norm": 4.137834072113037, "learning_rate": 0.0001860302058868908, "loss": 1.2751, "step": 48900 }, { "epoch": 1.4614994177887917, "grad_norm": 6.577256202697754, "learning_rate": 0.00018601592009097196, "loss": 1.3971, "step": 48950 }, { "epoch": 1.4629922670408742, "grad_norm": 4.749688148498535, "learning_rate": 0.00018600163429505312, "loss": 1.3557, "step": 49000 }, { "epoch": 1.4644851162929569, "grad_norm": 5.410492897033691, "learning_rate": 0.0001859873484991343, "loss": 1.2467, "step": 49050 }, { "epoch": 1.4659779655450393, "grad_norm": 4.303316593170166, "learning_rate": 0.00018597306270321545, "loss": 1.3166, "step": 49100 }, { "epoch": 1.4674708147971218, "grad_norm": 5.683919906616211, "learning_rate": 0.00018595877690729662, "loss": 1.2932, "step": 49150 }, { "epoch": 1.4689636640492043, "grad_norm": 5.251864433288574, "learning_rate": 0.00018594449111137778, "loss": 1.3179, "step": 49200 }, { "epoch": 1.4704565133012868, "grad_norm": 4.565097808837891, "learning_rate": 0.00018593020531545895, "loss": 1.327, "step": 49250 }, { "epoch": 1.4719493625533693, "grad_norm": 5.532310485839844, "learning_rate": 0.00018591591951954014, "loss": 1.3868, "step": 49300 }, { "epoch": 1.4734422118054518, "grad_norm": 4.5699782371521, "learning_rate": 0.00018590163372362128, "loss": 1.2583, "step": 49350 }, { "epoch": 1.4749350610575345, "grad_norm": 4.792230606079102, "learning_rate": 0.00018588734792770247, "loss": 1.3805, "step": 49400 }, { "epoch": 1.476427910309617, "grad_norm": 4.52764368057251, "learning_rate": 0.0001858730621317836, "loss": 1.3465, "step": 49450 }, { "epoch": 1.4779207595616994, "grad_norm": 5.21061372756958, "learning_rate": 0.0001858587763358648, "loss": 1.3778, "step": 49500 }, { "epoch": 1.479413608813782, "grad_norm": 4.855043888092041, "learning_rate": 0.00018584449053994593, "loss": 1.3313, "step": 49550 }, { "epoch": 1.4809064580658644, "grad_norm": 4.446802616119385, "learning_rate": 0.00018583020474402713, "loss": 1.3224, "step": 49600 }, { "epoch": 1.4823993073179471, "grad_norm": 5.054227828979492, "learning_rate": 0.0001858159189481083, "loss": 1.3782, "step": 49650 }, { "epoch": 1.4838921565700296, "grad_norm": 6.877427101135254, "learning_rate": 0.00018580163315218946, "loss": 1.3399, "step": 49700 }, { "epoch": 1.485385005822112, "grad_norm": 6.227908611297607, "learning_rate": 0.00018578734735627062, "loss": 1.2996, "step": 49750 }, { "epoch": 1.4868778550741946, "grad_norm": 6.2445759773254395, "learning_rate": 0.00018577306156035179, "loss": 1.3005, "step": 49800 }, { "epoch": 1.488370704326277, "grad_norm": 6.488719463348389, "learning_rate": 0.00018575877576443295, "loss": 1.3755, "step": 49850 }, { "epoch": 1.4898635535783598, "grad_norm": 5.3022589683532715, "learning_rate": 0.00018574448996851411, "loss": 1.3402, "step": 49900 }, { "epoch": 1.4913564028304422, "grad_norm": 4.986827373504639, "learning_rate": 0.00018573020417259528, "loss": 1.3695, "step": 49950 }, { "epoch": 1.4928492520825247, "grad_norm": 5.669780731201172, "learning_rate": 0.00018571591837667644, "loss": 1.314, "step": 50000 }, { "epoch": 1.4943421013346072, "grad_norm": 5.21511697769165, "learning_rate": 0.0001857016325807576, "loss": 1.363, "step": 50050 }, { "epoch": 1.4958349505866897, "grad_norm": 4.303940296173096, "learning_rate": 0.0001856873467848388, "loss": 1.2926, "step": 50100 }, { "epoch": 1.4973277998387724, "grad_norm": 4.7364020347595215, "learning_rate": 0.00018567306098891994, "loss": 1.3325, "step": 50150 }, { "epoch": 1.4988206490908549, "grad_norm": 4.325379371643066, "learning_rate": 0.00018565877519300113, "loss": 1.31, "step": 50200 }, { "epoch": 1.5003134983429374, "grad_norm": 5.442165374755859, "learning_rate": 0.00018564448939708227, "loss": 1.3445, "step": 50250 }, { "epoch": 1.5018063475950199, "grad_norm": 5.861702919006348, "learning_rate": 0.00018563020360116346, "loss": 1.3392, "step": 50300 }, { "epoch": 1.5032991968471023, "grad_norm": 4.277650356292725, "learning_rate": 0.00018561591780524462, "loss": 1.3127, "step": 50350 }, { "epoch": 1.504792046099185, "grad_norm": 5.670056343078613, "learning_rate": 0.00018560163200932576, "loss": 1.346, "step": 50400 }, { "epoch": 1.5062848953512673, "grad_norm": 4.479572772979736, "learning_rate": 0.00018558734621340695, "loss": 1.3254, "step": 50450 }, { "epoch": 1.50777774460335, "grad_norm": 4.820215225219727, "learning_rate": 0.0001855730604174881, "loss": 1.3043, "step": 50500 }, { "epoch": 1.5092705938554325, "grad_norm": 5.21779203414917, "learning_rate": 0.00018555877462156928, "loss": 1.2953, "step": 50550 }, { "epoch": 1.510763443107515, "grad_norm": 4.987244129180908, "learning_rate": 0.00018554448882565042, "loss": 1.3008, "step": 50600 }, { "epoch": 1.5122562923595977, "grad_norm": 4.658651828765869, "learning_rate": 0.0001855302030297316, "loss": 1.3146, "step": 50650 }, { "epoch": 1.51374914161168, "grad_norm": 4.724532604217529, "learning_rate": 0.00018551591723381278, "loss": 1.3467, "step": 50700 }, { "epoch": 1.5152419908637627, "grad_norm": 6.199383735656738, "learning_rate": 0.00018550163143789394, "loss": 1.3244, "step": 50750 }, { "epoch": 1.5167348401158451, "grad_norm": 4.255542278289795, "learning_rate": 0.0001854873456419751, "loss": 1.3373, "step": 50800 }, { "epoch": 1.5182276893679276, "grad_norm": 5.011137962341309, "learning_rate": 0.00018547305984605627, "loss": 1.2945, "step": 50850 }, { "epoch": 1.51972053862001, "grad_norm": 4.276299476623535, "learning_rate": 0.00018545877405013743, "loss": 1.3503, "step": 50900 }, { "epoch": 1.5212133878720926, "grad_norm": 5.867751598358154, "learning_rate": 0.0001854444882542186, "loss": 1.345, "step": 50950 }, { "epoch": 1.5227062371241753, "grad_norm": 3.6653764247894287, "learning_rate": 0.00018543020245829976, "loss": 1.3353, "step": 51000 }, { "epoch": 1.5241990863762576, "grad_norm": 4.758911609649658, "learning_rate": 0.00018541591666238093, "loss": 1.3488, "step": 51050 }, { "epoch": 1.5256919356283403, "grad_norm": 4.028106689453125, "learning_rate": 0.0001854016308664621, "loss": 1.3215, "step": 51100 }, { "epoch": 1.5271847848804228, "grad_norm": 4.353278636932373, "learning_rate": 0.00018538734507054329, "loss": 1.2984, "step": 51150 }, { "epoch": 1.5286776341325052, "grad_norm": 7.112466812133789, "learning_rate": 0.00018537305927462442, "loss": 1.3057, "step": 51200 }, { "epoch": 1.530170483384588, "grad_norm": 4.703035354614258, "learning_rate": 0.00018535877347870561, "loss": 1.3289, "step": 51250 }, { "epoch": 1.5316633326366702, "grad_norm": 4.110894203186035, "learning_rate": 0.00018534448768278675, "loss": 1.3587, "step": 51300 }, { "epoch": 1.533156181888753, "grad_norm": 5.426900863647461, "learning_rate": 0.00018533020188686794, "loss": 1.316, "step": 51350 }, { "epoch": 1.5346490311408354, "grad_norm": 4.927971839904785, "learning_rate": 0.00018531591609094908, "loss": 1.3775, "step": 51400 }, { "epoch": 1.5361418803929179, "grad_norm": 4.867948055267334, "learning_rate": 0.00018530163029503027, "loss": 1.3038, "step": 51450 }, { "epoch": 1.5376347296450006, "grad_norm": 4.8490471839904785, "learning_rate": 0.00018528734449911144, "loss": 1.3851, "step": 51500 }, { "epoch": 1.5391275788970828, "grad_norm": 4.634105682373047, "learning_rate": 0.0001852730587031926, "loss": 1.3215, "step": 51550 }, { "epoch": 1.5406204281491656, "grad_norm": 3.7799713611602783, "learning_rate": 0.00018525877290727377, "loss": 1.3371, "step": 51600 }, { "epoch": 1.542113277401248, "grad_norm": 3.9134316444396973, "learning_rate": 0.00018524448711135493, "loss": 1.3073, "step": 51650 }, { "epoch": 1.5436061266533305, "grad_norm": 3.5114309787750244, "learning_rate": 0.0001852302013154361, "loss": 1.3859, "step": 51700 }, { "epoch": 1.5450989759054132, "grad_norm": 3.7832348346710205, "learning_rate": 0.00018521591551951726, "loss": 1.369, "step": 51750 }, { "epoch": 1.5465918251574955, "grad_norm": 3.666222333908081, "learning_rate": 0.00018520162972359843, "loss": 1.3552, "step": 51800 }, { "epoch": 1.5480846744095782, "grad_norm": 4.288083076477051, "learning_rate": 0.0001851873439276796, "loss": 1.3139, "step": 51850 }, { "epoch": 1.5495775236616607, "grad_norm": 6.1794939041137695, "learning_rate": 0.00018517305813176075, "loss": 1.2984, "step": 51900 }, { "epoch": 1.5510703729137432, "grad_norm": 4.89127779006958, "learning_rate": 0.00018515877233584195, "loss": 1.3251, "step": 51950 }, { "epoch": 1.5525632221658257, "grad_norm": 3.1737000942230225, "learning_rate": 0.00018514448653992308, "loss": 1.342, "step": 52000 }, { "epoch": 1.5540560714179081, "grad_norm": 5.618998050689697, "learning_rate": 0.00018513020074400428, "loss": 1.3234, "step": 52050 }, { "epoch": 1.5555489206699908, "grad_norm": 4.3047685623168945, "learning_rate": 0.0001851159149480854, "loss": 1.3305, "step": 52100 }, { "epoch": 1.557041769922073, "grad_norm": 5.245848178863525, "learning_rate": 0.0001851016291521666, "loss": 1.3052, "step": 52150 }, { "epoch": 1.5585346191741558, "grad_norm": 5.337231636047363, "learning_rate": 0.00018508734335624774, "loss": 1.3353, "step": 52200 }, { "epoch": 1.5600274684262383, "grad_norm": 4.924198627471924, "learning_rate": 0.00018507305756032893, "loss": 1.3022, "step": 52250 }, { "epoch": 1.5615203176783208, "grad_norm": 4.464673042297363, "learning_rate": 0.0001850587717644101, "loss": 1.3168, "step": 52300 }, { "epoch": 1.5630131669304035, "grad_norm": 3.7909131050109863, "learning_rate": 0.00018504448596849126, "loss": 1.3307, "step": 52350 }, { "epoch": 1.5645060161824857, "grad_norm": 4.331950664520264, "learning_rate": 0.00018503020017257243, "loss": 1.3574, "step": 52400 }, { "epoch": 1.5659988654345685, "grad_norm": 4.650835990905762, "learning_rate": 0.0001850159143766536, "loss": 1.3356, "step": 52450 }, { "epoch": 1.567491714686651, "grad_norm": 5.968230247497559, "learning_rate": 0.00018500162858073476, "loss": 1.3515, "step": 52500 }, { "epoch": 1.5689845639387334, "grad_norm": 3.9560632705688477, "learning_rate": 0.00018498734278481592, "loss": 1.3262, "step": 52550 }, { "epoch": 1.5704774131908161, "grad_norm": 3.6332058906555176, "learning_rate": 0.0001849730569888971, "loss": 1.3708, "step": 52600 }, { "epoch": 1.5719702624428984, "grad_norm": 5.171941757202148, "learning_rate": 0.00018495877119297825, "loss": 1.3201, "step": 52650 }, { "epoch": 1.573463111694981, "grad_norm": 4.531052589416504, "learning_rate": 0.00018494448539705942, "loss": 1.364, "step": 52700 }, { "epoch": 1.5749559609470636, "grad_norm": 3.860273838043213, "learning_rate": 0.0001849301996011406, "loss": 1.3729, "step": 52750 }, { "epoch": 1.576448810199146, "grad_norm": 4.562480926513672, "learning_rate": 0.00018491591380522175, "loss": 1.2745, "step": 52800 }, { "epoch": 1.5779416594512288, "grad_norm": 4.759164333343506, "learning_rate": 0.00018490162800930294, "loss": 1.3399, "step": 52850 }, { "epoch": 1.579434508703311, "grad_norm": 6.885429859161377, "learning_rate": 0.00018488734221338408, "loss": 1.2915, "step": 52900 }, { "epoch": 1.5809273579553937, "grad_norm": 4.555217742919922, "learning_rate": 0.00018487305641746527, "loss": 1.3897, "step": 52950 }, { "epoch": 1.5824202072074762, "grad_norm": 5.686909198760986, "learning_rate": 0.0001848587706215464, "loss": 1.3234, "step": 53000 }, { "epoch": 1.5839130564595587, "grad_norm": 3.994685411453247, "learning_rate": 0.00018484448482562757, "loss": 1.3265, "step": 53050 }, { "epoch": 1.5854059057116412, "grad_norm": 4.623023986816406, "learning_rate": 0.00018483019902970876, "loss": 1.3479, "step": 53100 }, { "epoch": 1.5868987549637237, "grad_norm": 6.000095844268799, "learning_rate": 0.0001848159132337899, "loss": 1.3333, "step": 53150 }, { "epoch": 1.5883916042158064, "grad_norm": 7.388947010040283, "learning_rate": 0.0001848016274378711, "loss": 1.306, "step": 53200 }, { "epoch": 1.5898844534678886, "grad_norm": 4.04311990737915, "learning_rate": 0.00018478734164195223, "loss": 1.3263, "step": 53250 }, { "epoch": 1.5913773027199714, "grad_norm": 5.370228290557861, "learning_rate": 0.00018477305584603342, "loss": 1.3279, "step": 53300 }, { "epoch": 1.5928701519720538, "grad_norm": 5.453476905822754, "learning_rate": 0.00018475877005011458, "loss": 1.3477, "step": 53350 }, { "epoch": 1.5943630012241363, "grad_norm": 3.9221408367156982, "learning_rate": 0.00018474448425419575, "loss": 1.3391, "step": 53400 }, { "epoch": 1.595855850476219, "grad_norm": 4.4245781898498535, "learning_rate": 0.0001847301984582769, "loss": 1.3263, "step": 53450 }, { "epoch": 1.5973486997283013, "grad_norm": 4.352360725402832, "learning_rate": 0.00018471591266235808, "loss": 1.3059, "step": 53500 }, { "epoch": 1.598841548980384, "grad_norm": 5.2914509773254395, "learning_rate": 0.00018470162686643924, "loss": 1.3353, "step": 53550 }, { "epoch": 1.6003343982324665, "grad_norm": 5.199918746948242, "learning_rate": 0.0001846873410705204, "loss": 1.3667, "step": 53600 }, { "epoch": 1.601827247484549, "grad_norm": 4.404350280761719, "learning_rate": 0.00018467305527460157, "loss": 1.3245, "step": 53650 }, { "epoch": 1.6033200967366317, "grad_norm": 4.452194690704346, "learning_rate": 0.00018465876947868274, "loss": 1.3194, "step": 53700 }, { "epoch": 1.604812945988714, "grad_norm": 4.387017726898193, "learning_rate": 0.0001846444836827639, "loss": 1.2448, "step": 53750 }, { "epoch": 1.6063057952407966, "grad_norm": 5.375933647155762, "learning_rate": 0.0001846301978868451, "loss": 1.3515, "step": 53800 }, { "epoch": 1.6077986444928791, "grad_norm": 4.564199447631836, "learning_rate": 0.00018461591209092623, "loss": 1.3182, "step": 53850 }, { "epoch": 1.6092914937449616, "grad_norm": 5.435900688171387, "learning_rate": 0.00018460162629500742, "loss": 1.3071, "step": 53900 }, { "epoch": 1.6107843429970443, "grad_norm": 3.600592613220215, "learning_rate": 0.00018458734049908856, "loss": 1.4009, "step": 53950 }, { "epoch": 1.6122771922491266, "grad_norm": 5.12654447555542, "learning_rate": 0.00018457305470316975, "loss": 1.3285, "step": 54000 }, { "epoch": 1.6137700415012093, "grad_norm": 3.6472108364105225, "learning_rate": 0.0001845587689072509, "loss": 1.32, "step": 54050 }, { "epoch": 1.6152628907532918, "grad_norm": 5.0967559814453125, "learning_rate": 0.00018454448311133208, "loss": 1.3018, "step": 54100 }, { "epoch": 1.6167557400053743, "grad_norm": 5.078010082244873, "learning_rate": 0.00018453019731541325, "loss": 1.3539, "step": 54150 }, { "epoch": 1.6182485892574567, "grad_norm": 4.601762294769287, "learning_rate": 0.0001845159115194944, "loss": 1.3153, "step": 54200 }, { "epoch": 1.6197414385095392, "grad_norm": 3.8307271003723145, "learning_rate": 0.00018450162572357557, "loss": 1.34, "step": 54250 }, { "epoch": 1.621234287761622, "grad_norm": 6.145284175872803, "learning_rate": 0.00018448733992765674, "loss": 1.3315, "step": 54300 }, { "epoch": 1.6227271370137042, "grad_norm": 9.96362590789795, "learning_rate": 0.0001844730541317379, "loss": 1.3269, "step": 54350 }, { "epoch": 1.624219986265787, "grad_norm": 5.126537322998047, "learning_rate": 0.00018445876833581907, "loss": 1.3604, "step": 54400 }, { "epoch": 1.6257128355178694, "grad_norm": 3.604433059692383, "learning_rate": 0.00018444448253990023, "loss": 1.3651, "step": 54450 }, { "epoch": 1.6272056847699519, "grad_norm": 4.835113048553467, "learning_rate": 0.0001844301967439814, "loss": 1.2787, "step": 54500 }, { "epoch": 1.6286985340220346, "grad_norm": 4.534168243408203, "learning_rate": 0.00018441591094806256, "loss": 1.3354, "step": 54550 }, { "epoch": 1.6301913832741168, "grad_norm": 7.929208755493164, "learning_rate": 0.00018440162515214375, "loss": 1.3361, "step": 54600 }, { "epoch": 1.6316842325261995, "grad_norm": 4.582381248474121, "learning_rate": 0.0001843873393562249, "loss": 1.354, "step": 54650 }, { "epoch": 1.633177081778282, "grad_norm": 4.781309604644775, "learning_rate": 0.00018437305356030608, "loss": 1.3035, "step": 54700 }, { "epoch": 1.6346699310303645, "grad_norm": 6.0308308601379395, "learning_rate": 0.00018435876776438722, "loss": 1.3392, "step": 54750 }, { "epoch": 1.6361627802824472, "grad_norm": 5.485566139221191, "learning_rate": 0.0001843444819684684, "loss": 1.3408, "step": 54800 }, { "epoch": 1.6376556295345295, "grad_norm": 6.658644676208496, "learning_rate": 0.00018433019617254955, "loss": 1.3772, "step": 54850 }, { "epoch": 1.6391484787866122, "grad_norm": 4.54780912399292, "learning_rate": 0.00018431591037663074, "loss": 1.3511, "step": 54900 }, { "epoch": 1.6406413280386947, "grad_norm": 5.600758075714111, "learning_rate": 0.0001843016245807119, "loss": 1.3301, "step": 54950 }, { "epoch": 1.6421341772907772, "grad_norm": 4.194091796875, "learning_rate": 0.00018428733878479307, "loss": 1.3159, "step": 55000 }, { "epoch": 1.6436270265428599, "grad_norm": 6.306137561798096, "learning_rate": 0.00018427305298887424, "loss": 1.3401, "step": 55050 }, { "epoch": 1.6451198757949421, "grad_norm": 4.377103328704834, "learning_rate": 0.0001842587671929554, "loss": 1.34, "step": 55100 }, { "epoch": 1.6466127250470248, "grad_norm": 3.450533390045166, "learning_rate": 0.00018424448139703657, "loss": 1.3367, "step": 55150 }, { "epoch": 1.6481055742991073, "grad_norm": 3.5845348834991455, "learning_rate": 0.00018423019560111773, "loss": 1.3755, "step": 55200 }, { "epoch": 1.6495984235511898, "grad_norm": 4.741848468780518, "learning_rate": 0.0001842159098051989, "loss": 1.3116, "step": 55250 }, { "epoch": 1.6510912728032723, "grad_norm": 4.1155619621276855, "learning_rate": 0.00018420162400928006, "loss": 1.3682, "step": 55300 }, { "epoch": 1.6525841220553548, "grad_norm": 4.3722028732299805, "learning_rate": 0.00018418733821336122, "loss": 1.2916, "step": 55350 }, { "epoch": 1.6540769713074375, "grad_norm": 4.6453118324279785, "learning_rate": 0.00018417305241744242, "loss": 1.3743, "step": 55400 }, { "epoch": 1.6555698205595197, "grad_norm": 4.749643802642822, "learning_rate": 0.00018415876662152355, "loss": 1.3649, "step": 55450 }, { "epoch": 1.6570626698116024, "grad_norm": 3.9023261070251465, "learning_rate": 0.00018414448082560475, "loss": 1.4002, "step": 55500 }, { "epoch": 1.658555519063685, "grad_norm": 4.34132719039917, "learning_rate": 0.00018413019502968588, "loss": 1.3344, "step": 55550 }, { "epoch": 1.6600483683157674, "grad_norm": 5.479841709136963, "learning_rate": 0.00018411590923376707, "loss": 1.3511, "step": 55600 }, { "epoch": 1.6615412175678501, "grad_norm": 3.340822696685791, "learning_rate": 0.0001841016234378482, "loss": 1.3789, "step": 55650 }, { "epoch": 1.6630340668199324, "grad_norm": 5.259148597717285, "learning_rate": 0.00018408733764192938, "loss": 1.3092, "step": 55700 }, { "epoch": 1.664526916072015, "grad_norm": 3.590052843093872, "learning_rate": 0.00018407305184601057, "loss": 1.3596, "step": 55750 }, { "epoch": 1.6660197653240976, "grad_norm": 5.688297748565674, "learning_rate": 0.0001840587660500917, "loss": 1.3629, "step": 55800 }, { "epoch": 1.66751261457618, "grad_norm": 5.826660633087158, "learning_rate": 0.0001840444802541729, "loss": 1.3757, "step": 55850 }, { "epoch": 1.6690054638282628, "grad_norm": 3.0926566123962402, "learning_rate": 0.00018403019445825404, "loss": 1.3507, "step": 55900 }, { "epoch": 1.670498313080345, "grad_norm": 7.726663589477539, "learning_rate": 0.00018401590866233523, "loss": 1.3162, "step": 55950 }, { "epoch": 1.6719911623324277, "grad_norm": 4.687432765960693, "learning_rate": 0.0001840016228664164, "loss": 1.289, "step": 56000 }, { "epoch": 1.6734840115845102, "grad_norm": 4.279799461364746, "learning_rate": 0.00018398733707049756, "loss": 1.3438, "step": 56050 }, { "epoch": 1.6749768608365927, "grad_norm": 6.4994797706604, "learning_rate": 0.00018397305127457872, "loss": 1.3373, "step": 56100 }, { "epoch": 1.6764697100886754, "grad_norm": 4.511581897735596, "learning_rate": 0.00018395876547865989, "loss": 1.3549, "step": 56150 }, { "epoch": 1.6779625593407577, "grad_norm": 5.1127190589904785, "learning_rate": 0.00018394447968274105, "loss": 1.2734, "step": 56200 }, { "epoch": 1.6794554085928404, "grad_norm": 4.7369561195373535, "learning_rate": 0.00018393019388682222, "loss": 1.3193, "step": 56250 }, { "epoch": 1.6809482578449229, "grad_norm": 5.101438522338867, "learning_rate": 0.00018391590809090338, "loss": 1.3311, "step": 56300 }, { "epoch": 1.6824411070970053, "grad_norm": 5.68798303604126, "learning_rate": 0.00018390162229498454, "loss": 1.3723, "step": 56350 }, { "epoch": 1.6839339563490878, "grad_norm": 5.620144367218018, "learning_rate": 0.0001838873364990657, "loss": 1.3051, "step": 56400 }, { "epoch": 1.6854268056011703, "grad_norm": 4.335910797119141, "learning_rate": 0.00018387305070314687, "loss": 1.2625, "step": 56450 }, { "epoch": 1.686919654853253, "grad_norm": 5.112996578216553, "learning_rate": 0.00018385876490722804, "loss": 1.3288, "step": 56500 }, { "epoch": 1.6884125041053353, "grad_norm": 5.485559463500977, "learning_rate": 0.00018384447911130923, "loss": 1.3171, "step": 56550 }, { "epoch": 1.689905353357418, "grad_norm": 5.58554744720459, "learning_rate": 0.00018383019331539037, "loss": 1.3002, "step": 56600 }, { "epoch": 1.6913982026095005, "grad_norm": 4.202605247497559, "learning_rate": 0.00018381590751947156, "loss": 1.3018, "step": 56650 }, { "epoch": 1.692891051861583, "grad_norm": 5.206381797790527, "learning_rate": 0.0001838016217235527, "loss": 1.3696, "step": 56700 }, { "epoch": 1.6943839011136657, "grad_norm": 5.87765645980835, "learning_rate": 0.0001837873359276339, "loss": 1.3616, "step": 56750 }, { "epoch": 1.695876750365748, "grad_norm": 3.4311394691467285, "learning_rate": 0.00018377305013171505, "loss": 1.3249, "step": 56800 }, { "epoch": 1.6973695996178306, "grad_norm": 2.7731411457061768, "learning_rate": 0.00018375876433579622, "loss": 1.3715, "step": 56850 }, { "epoch": 1.698862448869913, "grad_norm": 6.399103164672852, "learning_rate": 0.00018374447853987738, "loss": 1.3654, "step": 56900 }, { "epoch": 1.7003552981219956, "grad_norm": 3.8202717304229736, "learning_rate": 0.00018373019274395855, "loss": 1.3565, "step": 56950 }, { "epoch": 1.7018481473740783, "grad_norm": 4.040468215942383, "learning_rate": 0.0001837159069480397, "loss": 1.3373, "step": 57000 }, { "epoch": 1.7033409966261606, "grad_norm": 4.522890090942383, "learning_rate": 0.00018370162115212088, "loss": 1.3393, "step": 57050 }, { "epoch": 1.7048338458782433, "grad_norm": 4.771418571472168, "learning_rate": 0.00018368733535620204, "loss": 1.2437, "step": 57100 }, { "epoch": 1.7063266951303258, "grad_norm": 4.333567142486572, "learning_rate": 0.0001836730495602832, "loss": 1.2876, "step": 57150 }, { "epoch": 1.7078195443824082, "grad_norm": 5.1498703956604, "learning_rate": 0.00018365876376436437, "loss": 1.4321, "step": 57200 }, { "epoch": 1.709312393634491, "grad_norm": 3.8978734016418457, "learning_rate": 0.00018364447796844556, "loss": 1.2815, "step": 57250 }, { "epoch": 1.7108052428865732, "grad_norm": 5.365299224853516, "learning_rate": 0.0001836301921725267, "loss": 1.2443, "step": 57300 }, { "epoch": 1.712298092138656, "grad_norm": 4.474836826324463, "learning_rate": 0.0001836159063766079, "loss": 1.3634, "step": 57350 }, { "epoch": 1.7137909413907384, "grad_norm": 3.8736836910247803, "learning_rate": 0.00018360162058068903, "loss": 1.3889, "step": 57400 }, { "epoch": 1.7152837906428209, "grad_norm": 4.919806957244873, "learning_rate": 0.00018358733478477022, "loss": 1.3216, "step": 57450 }, { "epoch": 1.7167766398949034, "grad_norm": 6.2346954345703125, "learning_rate": 0.00018357304898885136, "loss": 1.3294, "step": 57500 }, { "epoch": 1.7182694891469859, "grad_norm": 4.782268524169922, "learning_rate": 0.00018355876319293255, "loss": 1.363, "step": 57550 }, { "epoch": 1.7197623383990686, "grad_norm": 5.770125389099121, "learning_rate": 0.00018354447739701372, "loss": 1.3441, "step": 57600 }, { "epoch": 1.721255187651151, "grad_norm": 5.742440223693848, "learning_rate": 0.00018353019160109488, "loss": 1.302, "step": 57650 }, { "epoch": 1.7227480369032335, "grad_norm": 5.430489540100098, "learning_rate": 0.00018351590580517604, "loss": 1.3183, "step": 57700 }, { "epoch": 1.724240886155316, "grad_norm": 6.17573356628418, "learning_rate": 0.0001835016200092572, "loss": 1.3421, "step": 57750 }, { "epoch": 1.7257337354073985, "grad_norm": 5.362539291381836, "learning_rate": 0.00018348733421333837, "loss": 1.3333, "step": 57800 }, { "epoch": 1.7272265846594812, "grad_norm": 4.4574294090271, "learning_rate": 0.00018347304841741954, "loss": 1.2792, "step": 57850 }, { "epoch": 1.7287194339115635, "grad_norm": 4.277164936065674, "learning_rate": 0.0001834587626215007, "loss": 1.342, "step": 57900 }, { "epoch": 1.7302122831636462, "grad_norm": 4.775753498077393, "learning_rate": 0.00018344447682558187, "loss": 1.3618, "step": 57950 }, { "epoch": 1.7317051324157287, "grad_norm": 4.7877702713012695, "learning_rate": 0.00018343019102966303, "loss": 1.3631, "step": 58000 }, { "epoch": 1.7331979816678111, "grad_norm": 5.616472244262695, "learning_rate": 0.00018341590523374422, "loss": 1.3558, "step": 58050 }, { "epoch": 1.7346908309198938, "grad_norm": 5.436590671539307, "learning_rate": 0.00018340161943782536, "loss": 1.3999, "step": 58100 }, { "epoch": 1.736183680171976, "grad_norm": 4.245835304260254, "learning_rate": 0.00018338733364190655, "loss": 1.3592, "step": 58150 }, { "epoch": 1.7376765294240588, "grad_norm": 4.440322399139404, "learning_rate": 0.0001833730478459877, "loss": 1.312, "step": 58200 }, { "epoch": 1.7391693786761413, "grad_norm": 4.439347743988037, "learning_rate": 0.00018335876205006888, "loss": 1.3769, "step": 58250 }, { "epoch": 1.7406622279282238, "grad_norm": 4.374370098114014, "learning_rate": 0.00018334447625415002, "loss": 1.3328, "step": 58300 }, { "epoch": 1.7421550771803065, "grad_norm": 3.9568932056427, "learning_rate": 0.00018333019045823119, "loss": 1.2992, "step": 58350 }, { "epoch": 1.7436479264323888, "grad_norm": 4.133091926574707, "learning_rate": 0.00018331590466231238, "loss": 1.3685, "step": 58400 }, { "epoch": 1.7451407756844715, "grad_norm": 4.492006778717041, "learning_rate": 0.00018330161886639351, "loss": 1.3543, "step": 58450 }, { "epoch": 1.746633624936554, "grad_norm": 2.6073009967803955, "learning_rate": 0.0001832873330704747, "loss": 1.36, "step": 58500 }, { "epoch": 1.7481264741886364, "grad_norm": 4.137593746185303, "learning_rate": 0.00018327304727455584, "loss": 1.3599, "step": 58550 }, { "epoch": 1.749619323440719, "grad_norm": 4.932790756225586, "learning_rate": 0.00018325876147863704, "loss": 1.3265, "step": 58600 }, { "epoch": 1.7511121726928014, "grad_norm": 4.623594760894775, "learning_rate": 0.00018324447568271817, "loss": 1.3186, "step": 58650 }, { "epoch": 1.752605021944884, "grad_norm": 4.066229343414307, "learning_rate": 0.00018323018988679936, "loss": 1.3196, "step": 58700 }, { "epoch": 1.7540978711969666, "grad_norm": 4.105048656463623, "learning_rate": 0.00018321590409088053, "loss": 1.3663, "step": 58750 }, { "epoch": 1.755590720449049, "grad_norm": 4.454251766204834, "learning_rate": 0.0001832016182949617, "loss": 1.2823, "step": 58800 }, { "epoch": 1.7570835697011316, "grad_norm": 4.3030242919921875, "learning_rate": 0.00018318733249904286, "loss": 1.3267, "step": 58850 }, { "epoch": 1.758576418953214, "grad_norm": 5.350499629974365, "learning_rate": 0.00018317304670312402, "loss": 1.2992, "step": 58900 }, { "epoch": 1.7600692682052967, "grad_norm": 4.497105598449707, "learning_rate": 0.0001831587609072052, "loss": 1.2759, "step": 58950 }, { "epoch": 1.761562117457379, "grad_norm": 4.683063983917236, "learning_rate": 0.00018314447511128635, "loss": 1.3342, "step": 59000 }, { "epoch": 1.7630549667094617, "grad_norm": 3.4937946796417236, "learning_rate": 0.00018313018931536752, "loss": 1.2969, "step": 59050 }, { "epoch": 1.7645478159615442, "grad_norm": 5.422043800354004, "learning_rate": 0.00018311590351944868, "loss": 1.3116, "step": 59100 }, { "epoch": 1.7660406652136267, "grad_norm": 3.8882923126220703, "learning_rate": 0.00018310161772352985, "loss": 1.3073, "step": 59150 }, { "epoch": 1.7675335144657094, "grad_norm": 5.60578727722168, "learning_rate": 0.00018308733192761104, "loss": 1.31, "step": 59200 }, { "epoch": 1.7690263637177917, "grad_norm": 5.257917881011963, "learning_rate": 0.00018307304613169218, "loss": 1.3617, "step": 59250 }, { "epoch": 1.7705192129698744, "grad_norm": 4.838101863861084, "learning_rate": 0.00018305876033577337, "loss": 1.367, "step": 59300 }, { "epoch": 1.7720120622219568, "grad_norm": 4.63686990737915, "learning_rate": 0.0001830444745398545, "loss": 1.3653, "step": 59350 }, { "epoch": 1.7735049114740393, "grad_norm": 3.8766531944274902, "learning_rate": 0.0001830301887439357, "loss": 1.298, "step": 59400 }, { "epoch": 1.774997760726122, "grad_norm": 5.356032848358154, "learning_rate": 0.00018301590294801686, "loss": 1.3132, "step": 59450 }, { "epoch": 1.7764906099782043, "grad_norm": 4.647774696350098, "learning_rate": 0.00018300161715209803, "loss": 1.3594, "step": 59500 }, { "epoch": 1.777983459230287, "grad_norm": 3.6505117416381836, "learning_rate": 0.0001829873313561792, "loss": 1.3178, "step": 59550 }, { "epoch": 1.7794763084823695, "grad_norm": 4.109731674194336, "learning_rate": 0.00018297304556026036, "loss": 1.2996, "step": 59600 }, { "epoch": 1.780969157734452, "grad_norm": 3.942424774169922, "learning_rate": 0.00018295875976434152, "loss": 1.3066, "step": 59650 }, { "epoch": 1.7824620069865345, "grad_norm": 5.511881351470947, "learning_rate": 0.00018294447396842268, "loss": 1.3009, "step": 59700 }, { "epoch": 1.783954856238617, "grad_norm": 4.0874552726745605, "learning_rate": 0.00018293018817250385, "loss": 1.3426, "step": 59750 }, { "epoch": 1.7854477054906996, "grad_norm": 4.787434101104736, "learning_rate": 0.00018291590237658501, "loss": 1.3765, "step": 59800 }, { "epoch": 1.7869405547427821, "grad_norm": 4.477612018585205, "learning_rate": 0.00018290161658066618, "loss": 1.3215, "step": 59850 }, { "epoch": 1.7884334039948646, "grad_norm": 6.102437973022461, "learning_rate": 0.00018288733078474737, "loss": 1.2529, "step": 59900 }, { "epoch": 1.789926253246947, "grad_norm": 4.351520538330078, "learning_rate": 0.0001828730449888285, "loss": 1.2889, "step": 59950 }, { "epoch": 1.7914191024990296, "grad_norm": 5.043869972229004, "learning_rate": 0.0001828587591929097, "loss": 1.3149, "step": 60000 }, { "epoch": 1.7929119517511123, "grad_norm": 4.172428607940674, "learning_rate": 0.00018284447339699084, "loss": 1.2703, "step": 60050 }, { "epoch": 1.7944048010031945, "grad_norm": 5.1870903968811035, "learning_rate": 0.00018283018760107203, "loss": 1.3107, "step": 60100 }, { "epoch": 1.7958976502552773, "grad_norm": 4.141382694244385, "learning_rate": 0.00018281590180515317, "loss": 1.3252, "step": 60150 }, { "epoch": 1.7973904995073597, "grad_norm": 3.9119277000427246, "learning_rate": 0.00018280161600923436, "loss": 1.3875, "step": 60200 }, { "epoch": 1.7988833487594422, "grad_norm": 4.159456729888916, "learning_rate": 0.00018278733021331552, "loss": 1.3871, "step": 60250 }, { "epoch": 1.800376198011525, "grad_norm": 4.536618709564209, "learning_rate": 0.0001827730444173967, "loss": 1.2892, "step": 60300 }, { "epoch": 1.8018690472636072, "grad_norm": 7.839147090911865, "learning_rate": 0.00018275875862147785, "loss": 1.2953, "step": 60350 }, { "epoch": 1.80336189651569, "grad_norm": 3.6673262119293213, "learning_rate": 0.00018274447282555902, "loss": 1.3245, "step": 60400 }, { "epoch": 1.8048547457677724, "grad_norm": 4.275418281555176, "learning_rate": 0.00018273018702964018, "loss": 1.3061, "step": 60450 }, { "epoch": 1.8063475950198549, "grad_norm": 4.154748916625977, "learning_rate": 0.00018271590123372135, "loss": 1.3127, "step": 60500 }, { "epoch": 1.8078404442719376, "grad_norm": 4.65784215927124, "learning_rate": 0.0001827016154378025, "loss": 1.3717, "step": 60550 }, { "epoch": 1.8093332935240198, "grad_norm": 6.874159812927246, "learning_rate": 0.00018268732964188368, "loss": 1.3043, "step": 60600 }, { "epoch": 1.8108261427761025, "grad_norm": 5.431716442108154, "learning_rate": 0.00018267304384596484, "loss": 1.3138, "step": 60650 }, { "epoch": 1.812318992028185, "grad_norm": 4.631954669952393, "learning_rate": 0.00018265875805004603, "loss": 1.3241, "step": 60700 }, { "epoch": 1.8138118412802675, "grad_norm": 4.336903095245361, "learning_rate": 0.00018264447225412717, "loss": 1.2709, "step": 60750 }, { "epoch": 1.81530469053235, "grad_norm": 4.103146553039551, "learning_rate": 0.00018263018645820836, "loss": 1.2912, "step": 60800 }, { "epoch": 1.8167975397844325, "grad_norm": 4.709009170532227, "learning_rate": 0.0001826159006622895, "loss": 1.304, "step": 60850 }, { "epoch": 1.8182903890365152, "grad_norm": 5.0871148109436035, "learning_rate": 0.00018260161486637066, "loss": 1.301, "step": 60900 }, { "epoch": 1.8197832382885977, "grad_norm": 4.265575408935547, "learning_rate": 0.00018258732907045183, "loss": 1.3914, "step": 60950 }, { "epoch": 1.8212760875406802, "grad_norm": 4.705594539642334, "learning_rate": 0.000182573043274533, "loss": 1.3056, "step": 61000 }, { "epoch": 1.8227689367927626, "grad_norm": 3.86602783203125, "learning_rate": 0.00018255875747861418, "loss": 1.3394, "step": 61050 }, { "epoch": 1.8242617860448451, "grad_norm": 3.9397497177124023, "learning_rate": 0.00018254447168269532, "loss": 1.3625, "step": 61100 }, { "epoch": 1.8257546352969278, "grad_norm": 4.313930988311768, "learning_rate": 0.00018253018588677651, "loss": 1.3445, "step": 61150 }, { "epoch": 1.82724748454901, "grad_norm": 5.848763942718506, "learning_rate": 0.00018251590009085765, "loss": 1.3896, "step": 61200 }, { "epoch": 1.8287403338010928, "grad_norm": 4.323721885681152, "learning_rate": 0.00018250161429493884, "loss": 1.3616, "step": 61250 }, { "epoch": 1.8302331830531753, "grad_norm": 5.140015125274658, "learning_rate": 0.00018248732849901998, "loss": 1.2641, "step": 61300 }, { "epoch": 1.8317260323052578, "grad_norm": 5.685877799987793, "learning_rate": 0.00018247304270310117, "loss": 1.3396, "step": 61350 }, { "epoch": 1.8332188815573405, "grad_norm": 4.268862724304199, "learning_rate": 0.00018245875690718234, "loss": 1.3201, "step": 61400 }, { "epoch": 1.8347117308094227, "grad_norm": 5.0206732749938965, "learning_rate": 0.0001824444711112635, "loss": 1.372, "step": 61450 }, { "epoch": 1.8362045800615054, "grad_norm": 4.32529354095459, "learning_rate": 0.00018243018531534467, "loss": 1.3, "step": 61500 }, { "epoch": 1.837697429313588, "grad_norm": 5.349672317504883, "learning_rate": 0.00018241589951942583, "loss": 1.2957, "step": 61550 }, { "epoch": 1.8391902785656704, "grad_norm": 4.735100746154785, "learning_rate": 0.000182401613723507, "loss": 1.3546, "step": 61600 }, { "epoch": 1.8406831278177531, "grad_norm": 4.489027500152588, "learning_rate": 0.00018238732792758816, "loss": 1.2695, "step": 61650 }, { "epoch": 1.8421759770698354, "grad_norm": 3.2674996852874756, "learning_rate": 0.00018237304213166933, "loss": 1.3425, "step": 61700 }, { "epoch": 1.843668826321918, "grad_norm": 3.9815094470977783, "learning_rate": 0.0001823587563357505, "loss": 1.3396, "step": 61750 }, { "epoch": 1.8451616755740006, "grad_norm": 3.9253129959106445, "learning_rate": 0.00018234447053983165, "loss": 1.3641, "step": 61800 }, { "epoch": 1.846654524826083, "grad_norm": 6.536600112915039, "learning_rate": 0.00018233018474391285, "loss": 1.2578, "step": 61850 }, { "epoch": 1.8481473740781655, "grad_norm": 4.420536518096924, "learning_rate": 0.00018231589894799398, "loss": 1.3635, "step": 61900 }, { "epoch": 1.849640223330248, "grad_norm": 3.770279884338379, "learning_rate": 0.00018230161315207518, "loss": 1.2869, "step": 61950 }, { "epoch": 1.8511330725823307, "grad_norm": 4.154125213623047, "learning_rate": 0.0001822873273561563, "loss": 1.3246, "step": 62000 }, { "epoch": 1.8526259218344132, "grad_norm": 3.884587049484253, "learning_rate": 0.0001822730415602375, "loss": 1.329, "step": 62050 }, { "epoch": 1.8541187710864957, "grad_norm": 5.329457759857178, "learning_rate": 0.00018225875576431864, "loss": 1.3787, "step": 62100 }, { "epoch": 1.8556116203385782, "grad_norm": 5.966410160064697, "learning_rate": 0.00018224446996839983, "loss": 1.3574, "step": 62150 }, { "epoch": 1.8571044695906607, "grad_norm": 5.356328010559082, "learning_rate": 0.000182230184172481, "loss": 1.3113, "step": 62200 }, { "epoch": 1.8585973188427434, "grad_norm": 3.6018612384796143, "learning_rate": 0.00018221589837656216, "loss": 1.3296, "step": 62250 }, { "epoch": 1.8600901680948256, "grad_norm": 5.247616767883301, "learning_rate": 0.00018220161258064333, "loss": 1.3518, "step": 62300 }, { "epoch": 1.8615830173469083, "grad_norm": 5.828862190246582, "learning_rate": 0.0001821873267847245, "loss": 1.3027, "step": 62350 }, { "epoch": 1.8630758665989908, "grad_norm": 5.451898574829102, "learning_rate": 0.00018217304098880566, "loss": 1.3647, "step": 62400 }, { "epoch": 1.8645687158510733, "grad_norm": 3.905299663543701, "learning_rate": 0.00018215875519288682, "loss": 1.3528, "step": 62450 }, { "epoch": 1.866061565103156, "grad_norm": 4.312819004058838, "learning_rate": 0.000182144469396968, "loss": 1.3487, "step": 62500 }, { "epoch": 1.8675544143552383, "grad_norm": 4.3358073234558105, "learning_rate": 0.00018213018360104915, "loss": 1.2925, "step": 62550 }, { "epoch": 1.869047263607321, "grad_norm": 5.664048194885254, "learning_rate": 0.00018211589780513032, "loss": 1.3822, "step": 62600 }, { "epoch": 1.8705401128594035, "grad_norm": 4.992645263671875, "learning_rate": 0.0001821016120092115, "loss": 1.3819, "step": 62650 }, { "epoch": 1.872032962111486, "grad_norm": 4.036514759063721, "learning_rate": 0.00018208732621329265, "loss": 1.3193, "step": 62700 }, { "epoch": 1.8735258113635687, "grad_norm": 4.094419956207275, "learning_rate": 0.00018207304041737384, "loss": 1.3649, "step": 62750 }, { "epoch": 1.875018660615651, "grad_norm": 5.437168121337891, "learning_rate": 0.00018205875462145497, "loss": 1.3445, "step": 62800 }, { "epoch": 1.8765115098677336, "grad_norm": 4.937995433807373, "learning_rate": 0.00018204446882553617, "loss": 1.3588, "step": 62850 }, { "epoch": 1.8780043591198161, "grad_norm": 3.918588876724243, "learning_rate": 0.00018203018302961733, "loss": 1.3195, "step": 62900 }, { "epoch": 1.8794972083718986, "grad_norm": 4.650214672088623, "learning_rate": 0.0001820158972336985, "loss": 1.3812, "step": 62950 }, { "epoch": 1.880990057623981, "grad_norm": 3.844249725341797, "learning_rate": 0.00018200161143777966, "loss": 1.2877, "step": 63000 }, { "epoch": 1.8824829068760636, "grad_norm": 4.988340854644775, "learning_rate": 0.00018198732564186083, "loss": 1.2705, "step": 63050 }, { "epoch": 1.8839757561281463, "grad_norm": 4.229214668273926, "learning_rate": 0.000181973039845942, "loss": 1.3314, "step": 63100 }, { "epoch": 1.8854686053802288, "grad_norm": 4.095324993133545, "learning_rate": 0.00018195875405002315, "loss": 1.3779, "step": 63150 }, { "epoch": 1.8869614546323112, "grad_norm": 5.325993537902832, "learning_rate": 0.00018194446825410432, "loss": 1.3163, "step": 63200 }, { "epoch": 1.8884543038843937, "grad_norm": 5.301263809204102, "learning_rate": 0.00018193018245818548, "loss": 1.4235, "step": 63250 }, { "epoch": 1.8899471531364762, "grad_norm": 5.347028732299805, "learning_rate": 0.00018191589666226665, "loss": 1.356, "step": 63300 }, { "epoch": 1.891440002388559, "grad_norm": 4.957263469696045, "learning_rate": 0.00018190161086634784, "loss": 1.2915, "step": 63350 }, { "epoch": 1.8929328516406412, "grad_norm": 3.9531173706054688, "learning_rate": 0.00018188732507042898, "loss": 1.2905, "step": 63400 }, { "epoch": 1.8944257008927239, "grad_norm": 5.983827590942383, "learning_rate": 0.00018187303927451017, "loss": 1.3129, "step": 63450 }, { "epoch": 1.8959185501448064, "grad_norm": 4.4479522705078125, "learning_rate": 0.0001818587534785913, "loss": 1.3528, "step": 63500 }, { "epoch": 1.8974113993968889, "grad_norm": 4.956698417663574, "learning_rate": 0.00018184446768267247, "loss": 1.2675, "step": 63550 }, { "epoch": 1.8989042486489716, "grad_norm": 3.7053678035736084, "learning_rate": 0.00018183018188675364, "loss": 1.3316, "step": 63600 }, { "epoch": 1.9003970979010538, "grad_norm": 5.076517105102539, "learning_rate": 0.0001818158960908348, "loss": 1.3603, "step": 63650 }, { "epoch": 1.9018899471531365, "grad_norm": 4.377649307250977, "learning_rate": 0.000181801610294916, "loss": 1.3491, "step": 63700 }, { "epoch": 1.903382796405219, "grad_norm": 5.296698570251465, "learning_rate": 0.00018178732449899713, "loss": 1.3768, "step": 63750 }, { "epoch": 1.9048756456573015, "grad_norm": 4.674185752868652, "learning_rate": 0.00018177303870307832, "loss": 1.3599, "step": 63800 }, { "epoch": 1.9063684949093842, "grad_norm": 5.758728981018066, "learning_rate": 0.00018175875290715946, "loss": 1.3239, "step": 63850 }, { "epoch": 1.9078613441614665, "grad_norm": 4.195470333099365, "learning_rate": 0.00018174446711124065, "loss": 1.3132, "step": 63900 }, { "epoch": 1.9093541934135492, "grad_norm": 4.118239402770996, "learning_rate": 0.0001817301813153218, "loss": 1.2887, "step": 63950 }, { "epoch": 1.9108470426656317, "grad_norm": 3.598353147506714, "learning_rate": 0.00018171589551940298, "loss": 1.3341, "step": 64000 }, { "epoch": 1.9123398919177141, "grad_norm": 3.8190245628356934, "learning_rate": 0.00018170160972348415, "loss": 1.3011, "step": 64050 }, { "epoch": 1.9138327411697966, "grad_norm": 6.505800724029541, "learning_rate": 0.0001816873239275653, "loss": 1.3583, "step": 64100 }, { "epoch": 1.915325590421879, "grad_norm": 4.134885311126709, "learning_rate": 0.00018167303813164647, "loss": 1.3363, "step": 64150 }, { "epoch": 1.9168184396739618, "grad_norm": 3.753544807434082, "learning_rate": 0.00018165875233572764, "loss": 1.3415, "step": 64200 }, { "epoch": 1.9183112889260443, "grad_norm": 3.716143846511841, "learning_rate": 0.0001816444665398088, "loss": 1.3276, "step": 64250 }, { "epoch": 1.9198041381781268, "grad_norm": 4.619287490844727, "learning_rate": 0.00018163018074388997, "loss": 1.3624, "step": 64300 }, { "epoch": 1.9212969874302093, "grad_norm": 3.8095598220825195, "learning_rate": 0.00018161589494797113, "loss": 1.3488, "step": 64350 }, { "epoch": 1.9227898366822918, "grad_norm": 6.268500328063965, "learning_rate": 0.0001816016091520523, "loss": 1.3481, "step": 64400 }, { "epoch": 1.9242826859343745, "grad_norm": 6.642594814300537, "learning_rate": 0.00018158732335613346, "loss": 1.3392, "step": 64450 }, { "epoch": 1.9257755351864567, "grad_norm": 5.921909332275391, "learning_rate": 0.00018157303756021465, "loss": 1.3218, "step": 64500 }, { "epoch": 1.9272683844385394, "grad_norm": 4.467897415161133, "learning_rate": 0.0001815587517642958, "loss": 1.3087, "step": 64550 }, { "epoch": 1.928761233690622, "grad_norm": 6.775788307189941, "learning_rate": 0.00018154446596837698, "loss": 1.3712, "step": 64600 }, { "epoch": 1.9302540829427044, "grad_norm": 4.561954975128174, "learning_rate": 0.00018153018017245812, "loss": 1.3214, "step": 64650 }, { "epoch": 1.931746932194787, "grad_norm": 5.288097381591797, "learning_rate": 0.0001815158943765393, "loss": 1.3926, "step": 64700 }, { "epoch": 1.9332397814468694, "grad_norm": 4.883842945098877, "learning_rate": 0.00018150160858062045, "loss": 1.2545, "step": 64750 }, { "epoch": 1.934732630698952, "grad_norm": 4.528720378875732, "learning_rate": 0.00018148732278470164, "loss": 1.3733, "step": 64800 }, { "epoch": 1.9362254799510346, "grad_norm": 3.9458446502685547, "learning_rate": 0.0001814730369887828, "loss": 1.3685, "step": 64850 }, { "epoch": 1.937718329203117, "grad_norm": 5.117423057556152, "learning_rate": 0.00018145875119286397, "loss": 1.3712, "step": 64900 }, { "epoch": 1.9392111784551997, "grad_norm": 4.276913166046143, "learning_rate": 0.00018144446539694514, "loss": 1.2996, "step": 64950 }, { "epoch": 1.940704027707282, "grad_norm": 3.9713099002838135, "learning_rate": 0.0001814301796010263, "loss": 1.301, "step": 65000 }, { "epoch": 1.9421968769593647, "grad_norm": 4.240658760070801, "learning_rate": 0.00018141589380510747, "loss": 1.4013, "step": 65050 }, { "epoch": 1.9436897262114472, "grad_norm": 4.406320571899414, "learning_rate": 0.00018140160800918863, "loss": 1.3481, "step": 65100 }, { "epoch": 1.9451825754635297, "grad_norm": 4.139687538146973, "learning_rate": 0.0001813873222132698, "loss": 1.3179, "step": 65150 }, { "epoch": 1.9466754247156122, "grad_norm": 3.8167126178741455, "learning_rate": 0.00018137303641735096, "loss": 1.2714, "step": 65200 }, { "epoch": 1.9481682739676947, "grad_norm": 4.680548191070557, "learning_rate": 0.00018135875062143212, "loss": 1.3571, "step": 65250 }, { "epoch": 1.9496611232197774, "grad_norm": 8.530779838562012, "learning_rate": 0.00018134446482551332, "loss": 1.323, "step": 65300 }, { "epoch": 1.9511539724718598, "grad_norm": 4.576430320739746, "learning_rate": 0.00018133017902959445, "loss": 1.3709, "step": 65350 }, { "epoch": 1.9526468217239423, "grad_norm": 5.320089817047119, "learning_rate": 0.00018131589323367565, "loss": 1.3857, "step": 65400 }, { "epoch": 1.9541396709760248, "grad_norm": 4.247587203979492, "learning_rate": 0.00018130160743775678, "loss": 1.3637, "step": 65450 }, { "epoch": 1.9556325202281073, "grad_norm": 4.2958903312683105, "learning_rate": 0.00018128732164183797, "loss": 1.2861, "step": 65500 }, { "epoch": 1.95712536948019, "grad_norm": 4.818902969360352, "learning_rate": 0.00018127303584591914, "loss": 1.3264, "step": 65550 }, { "epoch": 1.9586182187322723, "grad_norm": 4.755589962005615, "learning_rate": 0.0001812587500500003, "loss": 1.3821, "step": 65600 }, { "epoch": 1.960111067984355, "grad_norm": 3.589580774307251, "learning_rate": 0.00018124446425408147, "loss": 1.2799, "step": 65650 }, { "epoch": 1.9616039172364375, "grad_norm": 5.7323431968688965, "learning_rate": 0.00018123017845816263, "loss": 1.2616, "step": 65700 }, { "epoch": 1.96309676648852, "grad_norm": 6.535925388336182, "learning_rate": 0.0001812158926622438, "loss": 1.3391, "step": 65750 }, { "epoch": 1.9645896157406026, "grad_norm": 3.783618688583374, "learning_rate": 0.00018120160686632496, "loss": 1.2743, "step": 65800 }, { "epoch": 1.966082464992685, "grad_norm": 4.055028438568115, "learning_rate": 0.00018118732107040613, "loss": 1.3319, "step": 65850 }, { "epoch": 1.9675753142447676, "grad_norm": 4.827310562133789, "learning_rate": 0.0001811730352744873, "loss": 1.3518, "step": 65900 }, { "epoch": 1.96906816349685, "grad_norm": 4.298334121704102, "learning_rate": 0.00018115874947856846, "loss": 1.3755, "step": 65950 }, { "epoch": 1.9705610127489326, "grad_norm": 4.718268871307373, "learning_rate": 0.00018114446368264962, "loss": 1.2615, "step": 66000 }, { "epoch": 1.9720538620010153, "grad_norm": 4.387974262237549, "learning_rate": 0.00018113017788673079, "loss": 1.3475, "step": 66050 }, { "epoch": 1.9735467112530976, "grad_norm": 4.896945476531982, "learning_rate": 0.00018111589209081198, "loss": 1.3145, "step": 66100 }, { "epoch": 1.9750395605051803, "grad_norm": 5.453362941741943, "learning_rate": 0.00018110160629489312, "loss": 1.3377, "step": 66150 }, { "epoch": 1.9765324097572627, "grad_norm": 6.6892499923706055, "learning_rate": 0.00018108732049897428, "loss": 1.2734, "step": 66200 }, { "epoch": 1.9780252590093452, "grad_norm": 4.2804765701293945, "learning_rate": 0.00018107303470305544, "loss": 1.3742, "step": 66250 }, { "epoch": 1.9795181082614277, "grad_norm": 4.235034465789795, "learning_rate": 0.0001810587489071366, "loss": 1.2921, "step": 66300 }, { "epoch": 1.9810109575135102, "grad_norm": 4.113801956176758, "learning_rate": 0.0001810444631112178, "loss": 1.3475, "step": 66350 }, { "epoch": 1.982503806765593, "grad_norm": 4.276202201843262, "learning_rate": 0.00018103017731529894, "loss": 1.2955, "step": 66400 }, { "epoch": 1.9839966560176754, "grad_norm": 4.72022008895874, "learning_rate": 0.00018101589151938013, "loss": 1.3676, "step": 66450 }, { "epoch": 1.9854895052697579, "grad_norm": 4.182514190673828, "learning_rate": 0.00018100160572346127, "loss": 1.3706, "step": 66500 }, { "epoch": 1.9869823545218404, "grad_norm": 3.8737592697143555, "learning_rate": 0.00018098731992754246, "loss": 1.3103, "step": 66550 }, { "epoch": 1.9884752037739228, "grad_norm": 6.266764163970947, "learning_rate": 0.0001809730341316236, "loss": 1.3175, "step": 66600 }, { "epoch": 1.9899680530260055, "grad_norm": 5.243070125579834, "learning_rate": 0.0001809587483357048, "loss": 1.3484, "step": 66650 }, { "epoch": 1.9914609022780878, "grad_norm": 4.289467811584473, "learning_rate": 0.00018094446253978595, "loss": 1.3806, "step": 66700 }, { "epoch": 1.9929537515301705, "grad_norm": 5.450198173522949, "learning_rate": 0.00018093017674386712, "loss": 1.3155, "step": 66750 }, { "epoch": 1.994446600782253, "grad_norm": 4.389066219329834, "learning_rate": 0.00018091589094794828, "loss": 1.293, "step": 66800 }, { "epoch": 1.9959394500343355, "grad_norm": 4.606175422668457, "learning_rate": 0.00018090160515202945, "loss": 1.3558, "step": 66850 }, { "epoch": 1.9974322992864182, "grad_norm": 4.8083319664001465, "learning_rate": 0.0001808873193561106, "loss": 1.2621, "step": 66900 }, { "epoch": 1.9989251485385005, "grad_norm": 3.760852813720703, "learning_rate": 0.00018087303356019178, "loss": 1.3539, "step": 66950 }, { "epoch": 2.000417997790583, "grad_norm": 3.6912026405334473, "learning_rate": 0.00018085874776427294, "loss": 1.2619, "step": 67000 }, { "epoch": 2.0019108470426654, "grad_norm": 4.681028842926025, "learning_rate": 0.0001808444619683541, "loss": 1.2457, "step": 67050 }, { "epoch": 2.003403696294748, "grad_norm": 4.194242477416992, "learning_rate": 0.00018083017617243527, "loss": 1.297, "step": 67100 }, { "epoch": 2.004896545546831, "grad_norm": 3.831331729888916, "learning_rate": 0.00018081589037651646, "loss": 1.2989, "step": 67150 }, { "epoch": 2.006389394798913, "grad_norm": 4.249265193939209, "learning_rate": 0.0001808016045805976, "loss": 1.2078, "step": 67200 }, { "epoch": 2.007882244050996, "grad_norm": 7.441315650939941, "learning_rate": 0.0001807873187846788, "loss": 1.2945, "step": 67250 }, { "epoch": 2.009375093303078, "grad_norm": 4.2568745613098145, "learning_rate": 0.00018077303298875993, "loss": 1.2295, "step": 67300 }, { "epoch": 2.0108679425551608, "grad_norm": 5.394452095031738, "learning_rate": 0.00018075874719284112, "loss": 1.2135, "step": 67350 }, { "epoch": 2.0123607918072435, "grad_norm": 3.199411630630493, "learning_rate": 0.00018074446139692226, "loss": 1.2314, "step": 67400 }, { "epoch": 2.0138536410593257, "grad_norm": 3.7905383110046387, "learning_rate": 0.00018073017560100345, "loss": 1.2227, "step": 67450 }, { "epoch": 2.0153464903114084, "grad_norm": 7.900835990905762, "learning_rate": 0.00018071588980508462, "loss": 1.2067, "step": 67500 }, { "epoch": 2.0168393395634907, "grad_norm": 4.803965091705322, "learning_rate": 0.00018070160400916578, "loss": 1.2894, "step": 67550 }, { "epoch": 2.0183321888155734, "grad_norm": 4.046397686004639, "learning_rate": 0.00018068731821324694, "loss": 1.2627, "step": 67600 }, { "epoch": 2.019825038067656, "grad_norm": 4.9588518142700195, "learning_rate": 0.0001806730324173281, "loss": 1.1931, "step": 67650 }, { "epoch": 2.0213178873197384, "grad_norm": 3.5710065364837646, "learning_rate": 0.00018065874662140927, "loss": 1.1937, "step": 67700 }, { "epoch": 2.022810736571821, "grad_norm": 4.107451915740967, "learning_rate": 0.00018064446082549044, "loss": 1.2236, "step": 67750 }, { "epoch": 2.0243035858239034, "grad_norm": 4.8303422927856445, "learning_rate": 0.0001806301750295716, "loss": 1.2223, "step": 67800 }, { "epoch": 2.025796435075986, "grad_norm": 6.539272308349609, "learning_rate": 0.00018061588923365277, "loss": 1.3199, "step": 67850 }, { "epoch": 2.0272892843280688, "grad_norm": 4.415231227874756, "learning_rate": 0.00018060160343773393, "loss": 1.2988, "step": 67900 }, { "epoch": 2.028782133580151, "grad_norm": 4.396175861358643, "learning_rate": 0.00018058731764181512, "loss": 1.1952, "step": 67950 }, { "epoch": 2.0302749828322337, "grad_norm": 3.2673914432525635, "learning_rate": 0.00018057303184589626, "loss": 1.2285, "step": 68000 }, { "epoch": 2.031767832084316, "grad_norm": 3.6252634525299072, "learning_rate": 0.00018055874604997745, "loss": 1.314, "step": 68050 }, { "epoch": 2.0332606813363987, "grad_norm": 4.550121307373047, "learning_rate": 0.0001805444602540586, "loss": 1.2846, "step": 68100 }, { "epoch": 2.034753530588481, "grad_norm": 4.809107303619385, "learning_rate": 0.00018053017445813978, "loss": 1.3165, "step": 68150 }, { "epoch": 2.0362463798405637, "grad_norm": 4.466585159301758, "learning_rate": 0.00018051588866222092, "loss": 1.2543, "step": 68200 }, { "epoch": 2.0377392290926464, "grad_norm": 4.243835926055908, "learning_rate": 0.0001805016028663021, "loss": 1.2908, "step": 68250 }, { "epoch": 2.0392320783447286, "grad_norm": 4.369659423828125, "learning_rate": 0.00018048731707038328, "loss": 1.2167, "step": 68300 }, { "epoch": 2.0407249275968113, "grad_norm": 4.6665496826171875, "learning_rate": 0.00018047303127446444, "loss": 1.2453, "step": 68350 }, { "epoch": 2.0422177768488936, "grad_norm": 4.3825507164001465, "learning_rate": 0.0001804587454785456, "loss": 1.2272, "step": 68400 }, { "epoch": 2.0437106261009763, "grad_norm": 4.299841403961182, "learning_rate": 0.00018044445968262677, "loss": 1.2198, "step": 68450 }, { "epoch": 2.045203475353059, "grad_norm": 3.937131881713867, "learning_rate": 0.00018043017388670794, "loss": 1.2763, "step": 68500 }, { "epoch": 2.0466963246051413, "grad_norm": 6.704258918762207, "learning_rate": 0.0001804158880907891, "loss": 1.3289, "step": 68550 }, { "epoch": 2.048189173857224, "grad_norm": 3.8973612785339355, "learning_rate": 0.00018040160229487026, "loss": 1.2907, "step": 68600 }, { "epoch": 2.0496820231093063, "grad_norm": 8.648911476135254, "learning_rate": 0.00018038731649895143, "loss": 1.2488, "step": 68650 }, { "epoch": 2.051174872361389, "grad_norm": 4.553196430206299, "learning_rate": 0.0001803730307030326, "loss": 1.2489, "step": 68700 }, { "epoch": 2.0526677216134717, "grad_norm": 4.379563808441162, "learning_rate": 0.00018035874490711376, "loss": 1.253, "step": 68750 }, { "epoch": 2.054160570865554, "grad_norm": 6.393334865570068, "learning_rate": 0.00018034445911119492, "loss": 1.1783, "step": 68800 }, { "epoch": 2.0556534201176366, "grad_norm": 4.846323013305664, "learning_rate": 0.0001803301733152761, "loss": 1.2583, "step": 68850 }, { "epoch": 2.057146269369719, "grad_norm": 5.24686861038208, "learning_rate": 0.00018031588751935725, "loss": 1.333, "step": 68900 }, { "epoch": 2.0586391186218016, "grad_norm": 4.824219226837158, "learning_rate": 0.00018030160172343842, "loss": 1.2931, "step": 68950 }, { "epoch": 2.0601319678738843, "grad_norm": 5.001037120819092, "learning_rate": 0.0001802873159275196, "loss": 1.2625, "step": 69000 }, { "epoch": 2.0616248171259666, "grad_norm": 4.036411762237549, "learning_rate": 0.00018027303013160075, "loss": 1.2585, "step": 69050 }, { "epoch": 2.0631176663780493, "grad_norm": 4.597596645355225, "learning_rate": 0.00018025874433568194, "loss": 1.2812, "step": 69100 }, { "epoch": 2.0646105156301315, "grad_norm": 7.785471439361572, "learning_rate": 0.00018024445853976308, "loss": 1.2749, "step": 69150 }, { "epoch": 2.0661033648822142, "grad_norm": 4.905806541442871, "learning_rate": 0.00018023017274384427, "loss": 1.2619, "step": 69200 }, { "epoch": 2.0675962141342965, "grad_norm": 5.271987438201904, "learning_rate": 0.0001802158869479254, "loss": 1.2769, "step": 69250 }, { "epoch": 2.069089063386379, "grad_norm": 4.129353046417236, "learning_rate": 0.0001802016011520066, "loss": 1.2552, "step": 69300 }, { "epoch": 2.070581912638462, "grad_norm": 4.855686187744141, "learning_rate": 0.00018018731535608776, "loss": 1.2452, "step": 69350 }, { "epoch": 2.072074761890544, "grad_norm": 4.6511383056640625, "learning_rate": 0.00018017302956016893, "loss": 1.2529, "step": 69400 }, { "epoch": 2.073567611142627, "grad_norm": 4.4711995124816895, "learning_rate": 0.0001801587437642501, "loss": 1.2721, "step": 69450 }, { "epoch": 2.075060460394709, "grad_norm": 4.466591835021973, "learning_rate": 0.00018014445796833126, "loss": 1.2654, "step": 69500 }, { "epoch": 2.076553309646792, "grad_norm": 5.165157318115234, "learning_rate": 0.00018013017217241242, "loss": 1.2522, "step": 69550 }, { "epoch": 2.0780461588988746, "grad_norm": 4.762765407562256, "learning_rate": 0.00018011588637649358, "loss": 1.2318, "step": 69600 }, { "epoch": 2.079539008150957, "grad_norm": 5.024138927459717, "learning_rate": 0.00018010160058057475, "loss": 1.2713, "step": 69650 }, { "epoch": 2.0810318574030395, "grad_norm": 5.054821014404297, "learning_rate": 0.00018008731478465591, "loss": 1.2714, "step": 69700 }, { "epoch": 2.082524706655122, "grad_norm": 5.052605152130127, "learning_rate": 0.00018007302898873708, "loss": 1.2001, "step": 69750 }, { "epoch": 2.0840175559072045, "grad_norm": 6.121365547180176, "learning_rate": 0.00018005874319281827, "loss": 1.2372, "step": 69800 }, { "epoch": 2.085510405159287, "grad_norm": 4.6751627922058105, "learning_rate": 0.0001800444573968994, "loss": 1.2366, "step": 69850 }, { "epoch": 2.0870032544113695, "grad_norm": 4.396819591522217, "learning_rate": 0.0001800301716009806, "loss": 1.2878, "step": 69900 }, { "epoch": 2.088496103663452, "grad_norm": 5.096658229827881, "learning_rate": 0.00018001588580506174, "loss": 1.2265, "step": 69950 }, { "epoch": 2.0899889529155344, "grad_norm": 4.45212459564209, "learning_rate": 0.00018000160000914293, "loss": 1.2261, "step": 70000 }, { "epoch": 2.091481802167617, "grad_norm": 4.837035179138184, "learning_rate": 0.00017998731421322407, "loss": 1.2857, "step": 70050 }, { "epoch": 2.0929746514197, "grad_norm": 4.402126789093018, "learning_rate": 0.00017997302841730526, "loss": 1.2614, "step": 70100 }, { "epoch": 2.094467500671782, "grad_norm": 3.9073615074157715, "learning_rate": 0.00017995874262138642, "loss": 1.2485, "step": 70150 }, { "epoch": 2.095960349923865, "grad_norm": 5.109444618225098, "learning_rate": 0.0001799444568254676, "loss": 1.2674, "step": 70200 }, { "epoch": 2.097453199175947, "grad_norm": 6.542872428894043, "learning_rate": 0.00017993017102954875, "loss": 1.2715, "step": 70250 }, { "epoch": 2.09894604842803, "grad_norm": 5.090799808502197, "learning_rate": 0.00017991588523362992, "loss": 1.2821, "step": 70300 }, { "epoch": 2.100438897680112, "grad_norm": 5.429018497467041, "learning_rate": 0.00017990159943771108, "loss": 1.2994, "step": 70350 }, { "epoch": 2.1019317469321948, "grad_norm": 8.873945236206055, "learning_rate": 0.00017988731364179225, "loss": 1.2912, "step": 70400 }, { "epoch": 2.1034245961842775, "grad_norm": 5.144947528839111, "learning_rate": 0.0001798730278458734, "loss": 1.2744, "step": 70450 }, { "epoch": 2.1049174454363597, "grad_norm": 5.346638202667236, "learning_rate": 0.00017985874204995458, "loss": 1.311, "step": 70500 }, { "epoch": 2.1064102946884424, "grad_norm": 5.384441375732422, "learning_rate": 0.00017984445625403574, "loss": 1.2662, "step": 70550 }, { "epoch": 2.1079031439405247, "grad_norm": 4.645060062408447, "learning_rate": 0.00017983017045811693, "loss": 1.2366, "step": 70600 }, { "epoch": 2.1093959931926074, "grad_norm": 4.9864349365234375, "learning_rate": 0.00017981588466219807, "loss": 1.2531, "step": 70650 }, { "epoch": 2.11088884244469, "grad_norm": 4.057093143463135, "learning_rate": 0.00017980159886627926, "loss": 1.2477, "step": 70700 }, { "epoch": 2.1123816916967724, "grad_norm": 4.121668815612793, "learning_rate": 0.0001797873130703604, "loss": 1.2533, "step": 70750 }, { "epoch": 2.113874540948855, "grad_norm": 5.140614032745361, "learning_rate": 0.0001797730272744416, "loss": 1.1833, "step": 70800 }, { "epoch": 2.1153673902009373, "grad_norm": 5.469926834106445, "learning_rate": 0.00017975874147852273, "loss": 1.2746, "step": 70850 }, { "epoch": 2.11686023945302, "grad_norm": 3.0967893600463867, "learning_rate": 0.00017974445568260392, "loss": 1.2449, "step": 70900 }, { "epoch": 2.1183530887051027, "grad_norm": 4.644883155822754, "learning_rate": 0.00017973016988668508, "loss": 1.2304, "step": 70950 }, { "epoch": 2.119845937957185, "grad_norm": 4.095651626586914, "learning_rate": 0.00017971588409076625, "loss": 1.3378, "step": 71000 }, { "epoch": 2.1213387872092677, "grad_norm": 5.656352519989014, "learning_rate": 0.00017970159829484741, "loss": 1.268, "step": 71050 }, { "epoch": 2.12283163646135, "grad_norm": 5.86411190032959, "learning_rate": 0.00017968731249892858, "loss": 1.3146, "step": 71100 }, { "epoch": 2.1243244857134327, "grad_norm": 4.751001358032227, "learning_rate": 0.00017967302670300974, "loss": 1.2861, "step": 71150 }, { "epoch": 2.1258173349655154, "grad_norm": 4.90724515914917, "learning_rate": 0.0001796587409070909, "loss": 1.2307, "step": 71200 }, { "epoch": 2.1273101842175977, "grad_norm": 4.715629577636719, "learning_rate": 0.00017964445511117207, "loss": 1.2602, "step": 71250 }, { "epoch": 2.1288030334696804, "grad_norm": 3.9297828674316406, "learning_rate": 0.00017963016931525324, "loss": 1.2705, "step": 71300 }, { "epoch": 2.1302958827217626, "grad_norm": 4.373486518859863, "learning_rate": 0.0001796158835193344, "loss": 1.264, "step": 71350 }, { "epoch": 2.1317887319738453, "grad_norm": 5.277180194854736, "learning_rate": 0.00017960159772341557, "loss": 1.2703, "step": 71400 }, { "epoch": 2.133281581225928, "grad_norm": 3.8668055534362793, "learning_rate": 0.00017958731192749673, "loss": 1.2655, "step": 71450 }, { "epoch": 2.1347744304780103, "grad_norm": 4.491888046264648, "learning_rate": 0.0001795730261315779, "loss": 1.2659, "step": 71500 }, { "epoch": 2.136267279730093, "grad_norm": 4.461415767669678, "learning_rate": 0.00017955874033565906, "loss": 1.3075, "step": 71550 }, { "epoch": 2.1377601289821753, "grad_norm": 5.062393665313721, "learning_rate": 0.00017954445453974023, "loss": 1.2328, "step": 71600 }, { "epoch": 2.139252978234258, "grad_norm": 5.79408073425293, "learning_rate": 0.0001795301687438214, "loss": 1.2528, "step": 71650 }, { "epoch": 2.1407458274863402, "grad_norm": 5.496243476867676, "learning_rate": 0.00017951588294790255, "loss": 1.2409, "step": 71700 }, { "epoch": 2.142238676738423, "grad_norm": 5.3767805099487305, "learning_rate": 0.00017950159715198375, "loss": 1.2383, "step": 71750 }, { "epoch": 2.1437315259905056, "grad_norm": 6.0737199783325195, "learning_rate": 0.00017948731135606488, "loss": 1.2936, "step": 71800 }, { "epoch": 2.145224375242588, "grad_norm": 5.363690376281738, "learning_rate": 0.00017947302556014608, "loss": 1.2774, "step": 71850 }, { "epoch": 2.1467172244946706, "grad_norm": 4.2048773765563965, "learning_rate": 0.0001794587397642272, "loss": 1.2476, "step": 71900 }, { "epoch": 2.148210073746753, "grad_norm": 5.775049686431885, "learning_rate": 0.0001794444539683084, "loss": 1.2646, "step": 71950 }, { "epoch": 2.1497029229988356, "grad_norm": 4.7285475730896, "learning_rate": 0.00017943016817238957, "loss": 1.3038, "step": 72000 }, { "epoch": 2.1511957722509183, "grad_norm": 4.310266971588135, "learning_rate": 0.00017941588237647073, "loss": 1.2478, "step": 72050 }, { "epoch": 2.1526886215030006, "grad_norm": 5.618819713592529, "learning_rate": 0.0001794015965805519, "loss": 1.2788, "step": 72100 }, { "epoch": 2.1541814707550833, "grad_norm": 5.624669075012207, "learning_rate": 0.00017938731078463306, "loss": 1.3053, "step": 72150 }, { "epoch": 2.1556743200071655, "grad_norm": 3.442650079727173, "learning_rate": 0.00017937302498871423, "loss": 1.2774, "step": 72200 }, { "epoch": 2.1571671692592482, "grad_norm": 5.232537746429443, "learning_rate": 0.0001793587391927954, "loss": 1.3006, "step": 72250 }, { "epoch": 2.1586600185113305, "grad_norm": 4.750761985778809, "learning_rate": 0.00017934445339687656, "loss": 1.2696, "step": 72300 }, { "epoch": 2.160152867763413, "grad_norm": 5.479654788970947, "learning_rate": 0.00017933016760095772, "loss": 1.2896, "step": 72350 }, { "epoch": 2.161645717015496, "grad_norm": 4.499126434326172, "learning_rate": 0.0001793158818050389, "loss": 1.2962, "step": 72400 }, { "epoch": 2.163138566267578, "grad_norm": 5.156326770782471, "learning_rate": 0.00017930159600912008, "loss": 1.2852, "step": 72450 }, { "epoch": 2.164631415519661, "grad_norm": 4.520088195800781, "learning_rate": 0.00017928731021320122, "loss": 1.2527, "step": 72500 }, { "epoch": 2.166124264771743, "grad_norm": 4.284939289093018, "learning_rate": 0.0001792730244172824, "loss": 1.3213, "step": 72550 }, { "epoch": 2.167617114023826, "grad_norm": 4.373524188995361, "learning_rate": 0.00017925873862136355, "loss": 1.2853, "step": 72600 }, { "epoch": 2.1691099632759085, "grad_norm": 3.9682133197784424, "learning_rate": 0.00017924445282544474, "loss": 1.2707, "step": 72650 }, { "epoch": 2.170602812527991, "grad_norm": 5.218033313751221, "learning_rate": 0.00017923016702952587, "loss": 1.2492, "step": 72700 }, { "epoch": 2.1720956617800735, "grad_norm": 4.900534152984619, "learning_rate": 0.00017921588123360707, "loss": 1.256, "step": 72750 }, { "epoch": 2.173588511032156, "grad_norm": 4.519747257232666, "learning_rate": 0.00017920159543768823, "loss": 1.2367, "step": 72800 }, { "epoch": 2.1750813602842385, "grad_norm": 4.465813159942627, "learning_rate": 0.0001791873096417694, "loss": 1.286, "step": 72850 }, { "epoch": 2.176574209536321, "grad_norm": 6.142160892486572, "learning_rate": 0.00017917302384585056, "loss": 1.2959, "step": 72900 }, { "epoch": 2.1780670587884035, "grad_norm": 3.851306200027466, "learning_rate": 0.00017915873804993173, "loss": 1.2511, "step": 72950 }, { "epoch": 2.179559908040486, "grad_norm": 5.0324788093566895, "learning_rate": 0.0001791444522540129, "loss": 1.2652, "step": 73000 }, { "epoch": 2.1810527572925684, "grad_norm": 4.664618492126465, "learning_rate": 0.00017913016645809405, "loss": 1.2639, "step": 73050 }, { "epoch": 2.182545606544651, "grad_norm": 4.418179988861084, "learning_rate": 0.00017911588066217522, "loss": 1.2948, "step": 73100 }, { "epoch": 2.184038455796734, "grad_norm": 4.895113945007324, "learning_rate": 0.00017910159486625638, "loss": 1.2283, "step": 73150 }, { "epoch": 2.185531305048816, "grad_norm": 4.2365946769714355, "learning_rate": 0.00017908730907033755, "loss": 1.2772, "step": 73200 }, { "epoch": 2.187024154300899, "grad_norm": 6.705420017242432, "learning_rate": 0.00017907302327441874, "loss": 1.273, "step": 73250 }, { "epoch": 2.188517003552981, "grad_norm": 5.094226360321045, "learning_rate": 0.00017905873747849988, "loss": 1.2391, "step": 73300 }, { "epoch": 2.1900098528050638, "grad_norm": 4.424404621124268, "learning_rate": 0.00017904445168258107, "loss": 1.2198, "step": 73350 }, { "epoch": 2.1915027020571465, "grad_norm": 5.0152201652526855, "learning_rate": 0.0001790301658866622, "loss": 1.3167, "step": 73400 }, { "epoch": 2.1929955513092287, "grad_norm": 4.916745662689209, "learning_rate": 0.0001790158800907434, "loss": 1.2482, "step": 73450 }, { "epoch": 2.1944884005613114, "grad_norm": 4.396121978759766, "learning_rate": 0.00017900159429482454, "loss": 1.2425, "step": 73500 }, { "epoch": 2.1959812498133937, "grad_norm": 4.289492607116699, "learning_rate": 0.00017898730849890573, "loss": 1.2647, "step": 73550 }, { "epoch": 2.1974740990654764, "grad_norm": 6.336133003234863, "learning_rate": 0.0001789730227029869, "loss": 1.2513, "step": 73600 }, { "epoch": 2.198966948317559, "grad_norm": 4.202422618865967, "learning_rate": 0.00017895873690706806, "loss": 1.2657, "step": 73650 }, { "epoch": 2.2004597975696414, "grad_norm": 4.372437953948975, "learning_rate": 0.00017894445111114922, "loss": 1.2432, "step": 73700 }, { "epoch": 2.201952646821724, "grad_norm": 5.008980751037598, "learning_rate": 0.0001789301653152304, "loss": 1.2848, "step": 73750 }, { "epoch": 2.2034454960738064, "grad_norm": 5.251125812530518, "learning_rate": 0.00017891587951931155, "loss": 1.3296, "step": 73800 }, { "epoch": 2.204938345325889, "grad_norm": 3.898746967315674, "learning_rate": 0.00017890159372339272, "loss": 1.2639, "step": 73850 }, { "epoch": 2.2064311945779713, "grad_norm": 5.178562164306641, "learning_rate": 0.00017888730792747388, "loss": 1.3065, "step": 73900 }, { "epoch": 2.207924043830054, "grad_norm": 5.667030334472656, "learning_rate": 0.00017887302213155505, "loss": 1.2954, "step": 73950 }, { "epoch": 2.2094168930821367, "grad_norm": 7.237538814544678, "learning_rate": 0.0001788587363356362, "loss": 1.2558, "step": 74000 }, { "epoch": 2.210909742334219, "grad_norm": 4.029843330383301, "learning_rate": 0.00017884445053971737, "loss": 1.3004, "step": 74050 }, { "epoch": 2.2124025915863017, "grad_norm": 5.686726093292236, "learning_rate": 0.00017883016474379854, "loss": 1.278, "step": 74100 }, { "epoch": 2.213895440838384, "grad_norm": 5.373737812042236, "learning_rate": 0.0001788158789478797, "loss": 1.3089, "step": 74150 }, { "epoch": 2.2153882900904667, "grad_norm": 4.493228912353516, "learning_rate": 0.00017880159315196087, "loss": 1.2856, "step": 74200 }, { "epoch": 2.2168811393425494, "grad_norm": 5.083597183227539, "learning_rate": 0.00017878730735604203, "loss": 1.2792, "step": 74250 }, { "epoch": 2.2183739885946316, "grad_norm": 5.1655144691467285, "learning_rate": 0.0001787730215601232, "loss": 1.2852, "step": 74300 }, { "epoch": 2.2198668378467143, "grad_norm": 3.2413179874420166, "learning_rate": 0.00017875873576420436, "loss": 1.2617, "step": 74350 }, { "epoch": 2.2213596870987966, "grad_norm": 4.30172061920166, "learning_rate": 0.00017874444996828555, "loss": 1.2949, "step": 74400 }, { "epoch": 2.2228525363508793, "grad_norm": 4.27219295501709, "learning_rate": 0.0001787301641723667, "loss": 1.2293, "step": 74450 }, { "epoch": 2.2243453856029616, "grad_norm": 5.430578708648682, "learning_rate": 0.00017871587837644788, "loss": 1.2662, "step": 74500 }, { "epoch": 2.2258382348550443, "grad_norm": 5.0630292892456055, "learning_rate": 0.00017870159258052902, "loss": 1.308, "step": 74550 }, { "epoch": 2.227331084107127, "grad_norm": 4.326575756072998, "learning_rate": 0.0001786873067846102, "loss": 1.3244, "step": 74600 }, { "epoch": 2.2288239333592093, "grad_norm": 4.2859206199646, "learning_rate": 0.00017867302098869138, "loss": 1.2879, "step": 74650 }, { "epoch": 2.230316782611292, "grad_norm": 3.931136131286621, "learning_rate": 0.00017865873519277254, "loss": 1.3046, "step": 74700 }, { "epoch": 2.231809631863374, "grad_norm": 3.9645185470581055, "learning_rate": 0.0001786444493968537, "loss": 1.2712, "step": 74750 }, { "epoch": 2.233302481115457, "grad_norm": 5.313785076141357, "learning_rate": 0.00017863016360093487, "loss": 1.2514, "step": 74800 }, { "epoch": 2.2347953303675396, "grad_norm": 4.994843482971191, "learning_rate": 0.00017861587780501604, "loss": 1.2803, "step": 74850 }, { "epoch": 2.236288179619622, "grad_norm": 6.394142150878906, "learning_rate": 0.0001786015920090972, "loss": 1.2424, "step": 74900 }, { "epoch": 2.2377810288717046, "grad_norm": 4.67982816696167, "learning_rate": 0.00017858730621317837, "loss": 1.2775, "step": 74950 }, { "epoch": 2.239273878123787, "grad_norm": 5.697408199310303, "learning_rate": 0.00017857302041725953, "loss": 1.2665, "step": 75000 }, { "epoch": 2.2407667273758696, "grad_norm": 6.522719383239746, "learning_rate": 0.0001785587346213407, "loss": 1.3017, "step": 75050 }, { "epoch": 2.2422595766279523, "grad_norm": 3.7727861404418945, "learning_rate": 0.00017854444882542186, "loss": 1.3516, "step": 75100 }, { "epoch": 2.2437524258800345, "grad_norm": 4.810925483703613, "learning_rate": 0.00017853016302950302, "loss": 1.277, "step": 75150 }, { "epoch": 2.2452452751321172, "grad_norm": 3.6915650367736816, "learning_rate": 0.00017851587723358422, "loss": 1.3159, "step": 75200 }, { "epoch": 2.2467381243841995, "grad_norm": 4.427685260772705, "learning_rate": 0.00017850159143766535, "loss": 1.2725, "step": 75250 }, { "epoch": 2.248230973636282, "grad_norm": 7.222316265106201, "learning_rate": 0.00017848730564174655, "loss": 1.3272, "step": 75300 }, { "epoch": 2.249723822888365, "grad_norm": 4.113325595855713, "learning_rate": 0.00017847301984582768, "loss": 1.2861, "step": 75350 }, { "epoch": 2.251216672140447, "grad_norm": 3.8266818523406982, "learning_rate": 0.00017845873404990887, "loss": 1.2688, "step": 75400 }, { "epoch": 2.25270952139253, "grad_norm": 5.281492710113525, "learning_rate": 0.00017844444825399004, "loss": 1.2743, "step": 75450 }, { "epoch": 2.254202370644612, "grad_norm": 5.222156047821045, "learning_rate": 0.0001784301624580712, "loss": 1.2324, "step": 75500 }, { "epoch": 2.255695219896695, "grad_norm": 4.961234092712402, "learning_rate": 0.00017841587666215237, "loss": 1.3248, "step": 75550 }, { "epoch": 2.2571880691487776, "grad_norm": 5.501556396484375, "learning_rate": 0.00017840159086623353, "loss": 1.2868, "step": 75600 }, { "epoch": 2.25868091840086, "grad_norm": 4.440338611602783, "learning_rate": 0.0001783873050703147, "loss": 1.237, "step": 75650 }, { "epoch": 2.2601737676529425, "grad_norm": 7.373712539672852, "learning_rate": 0.00017837301927439586, "loss": 1.2931, "step": 75700 }, { "epoch": 2.261666616905025, "grad_norm": 4.324766635894775, "learning_rate": 0.00017835873347847703, "loss": 1.2762, "step": 75750 }, { "epoch": 2.2631594661571075, "grad_norm": 7.707190990447998, "learning_rate": 0.0001783444476825582, "loss": 1.2565, "step": 75800 }, { "epoch": 2.26465231540919, "grad_norm": 3.9111223220825195, "learning_rate": 0.00017833016188663936, "loss": 1.2134, "step": 75850 }, { "epoch": 2.2661451646612725, "grad_norm": 4.735013961791992, "learning_rate": 0.00017831587609072055, "loss": 1.1908, "step": 75900 }, { "epoch": 2.267638013913355, "grad_norm": 5.743906021118164, "learning_rate": 0.00017830159029480169, "loss": 1.294, "step": 75950 }, { "epoch": 2.2691308631654374, "grad_norm": 3.792663097381592, "learning_rate": 0.00017828730449888288, "loss": 1.3725, "step": 76000 }, { "epoch": 2.27062371241752, "grad_norm": 4.77357292175293, "learning_rate": 0.00017827301870296402, "loss": 1.2511, "step": 76050 }, { "epoch": 2.272116561669603, "grad_norm": 5.1196675300598145, "learning_rate": 0.0001782587329070452, "loss": 1.3099, "step": 76100 }, { "epoch": 2.273609410921685, "grad_norm": 5.5518574714660645, "learning_rate": 0.00017824444711112634, "loss": 1.2841, "step": 76150 }, { "epoch": 2.275102260173768, "grad_norm": 4.075642108917236, "learning_rate": 0.00017823016131520754, "loss": 1.2857, "step": 76200 }, { "epoch": 2.27659510942585, "grad_norm": 6.330162048339844, "learning_rate": 0.0001782158755192887, "loss": 1.253, "step": 76250 }, { "epoch": 2.278087958677933, "grad_norm": 3.8504278659820557, "learning_rate": 0.00017820158972336987, "loss": 1.3167, "step": 76300 }, { "epoch": 2.279580807930015, "grad_norm": 5.074968338012695, "learning_rate": 0.00017818730392745103, "loss": 1.3109, "step": 76350 }, { "epoch": 2.2810736571820978, "grad_norm": 4.519435405731201, "learning_rate": 0.0001781730181315322, "loss": 1.2721, "step": 76400 }, { "epoch": 2.2825665064341805, "grad_norm": 5.285332202911377, "learning_rate": 0.00017815873233561336, "loss": 1.3091, "step": 76450 }, { "epoch": 2.2840593556862627, "grad_norm": 6.193671226501465, "learning_rate": 0.00017814444653969452, "loss": 1.2676, "step": 76500 }, { "epoch": 2.2855522049383454, "grad_norm": 4.4518961906433105, "learning_rate": 0.0001781301607437757, "loss": 1.2637, "step": 76550 }, { "epoch": 2.2870450541904277, "grad_norm": 5.584630966186523, "learning_rate": 0.00017811587494785685, "loss": 1.2906, "step": 76600 }, { "epoch": 2.2885379034425104, "grad_norm": 5.528042793273926, "learning_rate": 0.00017810158915193802, "loss": 1.2701, "step": 76650 }, { "epoch": 2.2900307526945927, "grad_norm": 5.087693214416504, "learning_rate": 0.00017808730335601918, "loss": 1.33, "step": 76700 }, { "epoch": 2.2915236019466754, "grad_norm": 4.278517723083496, "learning_rate": 0.00017807301756010035, "loss": 1.2854, "step": 76750 }, { "epoch": 2.293016451198758, "grad_norm": 4.385780334472656, "learning_rate": 0.0001780587317641815, "loss": 1.2567, "step": 76800 }, { "epoch": 2.2945093004508403, "grad_norm": 4.630320072174072, "learning_rate": 0.00017804444596826268, "loss": 1.2706, "step": 76850 }, { "epoch": 2.296002149702923, "grad_norm": 6.519657135009766, "learning_rate": 0.00017803016017234384, "loss": 1.2892, "step": 76900 }, { "epoch": 2.2974949989550053, "grad_norm": 5.163790702819824, "learning_rate": 0.000178015874376425, "loss": 1.3251, "step": 76950 }, { "epoch": 2.298987848207088, "grad_norm": 4.371760368347168, "learning_rate": 0.00017800158858050617, "loss": 1.3014, "step": 77000 }, { "epoch": 2.3004806974591707, "grad_norm": 4.700384616851807, "learning_rate": 0.00017798730278458736, "loss": 1.2648, "step": 77050 }, { "epoch": 2.301973546711253, "grad_norm": 4.602656364440918, "learning_rate": 0.0001779730169886685, "loss": 1.3043, "step": 77100 }, { "epoch": 2.3034663959633357, "grad_norm": 4.436239719390869, "learning_rate": 0.0001779587311927497, "loss": 1.2702, "step": 77150 }, { "epoch": 2.304959245215418, "grad_norm": 5.805758476257324, "learning_rate": 0.00017794444539683083, "loss": 1.2556, "step": 77200 }, { "epoch": 2.3064520944675007, "grad_norm": 5.682462215423584, "learning_rate": 0.00017793015960091202, "loss": 1.2422, "step": 77250 }, { "epoch": 2.3079449437195834, "grad_norm": 5.270015716552734, "learning_rate": 0.00017791587380499316, "loss": 1.2666, "step": 77300 }, { "epoch": 2.3094377929716656, "grad_norm": 5.771697998046875, "learning_rate": 0.00017790158800907435, "loss": 1.2735, "step": 77350 }, { "epoch": 2.3109306422237483, "grad_norm": 4.319996356964111, "learning_rate": 0.00017788730221315551, "loss": 1.2362, "step": 77400 }, { "epoch": 2.3124234914758306, "grad_norm": 7.646838188171387, "learning_rate": 0.00017787301641723668, "loss": 1.2998, "step": 77450 }, { "epoch": 2.3139163407279133, "grad_norm": 4.850773334503174, "learning_rate": 0.00017785873062131784, "loss": 1.2505, "step": 77500 }, { "epoch": 2.315409189979996, "grad_norm": 8.903724670410156, "learning_rate": 0.000177844444825399, "loss": 1.2483, "step": 77550 }, { "epoch": 2.3169020392320783, "grad_norm": 4.112529754638672, "learning_rate": 0.00017783015902948017, "loss": 1.2622, "step": 77600 }, { "epoch": 2.318394888484161, "grad_norm": 5.173572063446045, "learning_rate": 0.00017781587323356134, "loss": 1.2783, "step": 77650 }, { "epoch": 2.3198877377362432, "grad_norm": 3.9499335289001465, "learning_rate": 0.0001778015874376425, "loss": 1.3203, "step": 77700 }, { "epoch": 2.321380586988326, "grad_norm": 4.4116926193237305, "learning_rate": 0.00017778730164172367, "loss": 1.2819, "step": 77750 }, { "epoch": 2.3228734362404087, "grad_norm": 5.434197902679443, "learning_rate": 0.00017777301584580483, "loss": 1.2591, "step": 77800 }, { "epoch": 2.324366285492491, "grad_norm": 3.9472904205322266, "learning_rate": 0.00017775873004988602, "loss": 1.2609, "step": 77850 }, { "epoch": 2.3258591347445736, "grad_norm": 4.981082439422607, "learning_rate": 0.00017774444425396716, "loss": 1.3558, "step": 77900 }, { "epoch": 2.327351983996656, "grad_norm": 4.96250057220459, "learning_rate": 0.00017773015845804835, "loss": 1.2478, "step": 77950 }, { "epoch": 2.3288448332487386, "grad_norm": 7.093445301055908, "learning_rate": 0.0001777158726621295, "loss": 1.2659, "step": 78000 }, { "epoch": 2.3303376825008213, "grad_norm": 4.415938854217529, "learning_rate": 0.00017770158686621068, "loss": 1.2994, "step": 78050 }, { "epoch": 2.3318305317529036, "grad_norm": 6.983063697814941, "learning_rate": 0.00017768730107029185, "loss": 1.2994, "step": 78100 }, { "epoch": 2.3333233810049863, "grad_norm": 6.34420919418335, "learning_rate": 0.000177673015274373, "loss": 1.3172, "step": 78150 }, { "epoch": 2.3348162302570685, "grad_norm": 4.156967639923096, "learning_rate": 0.00017765872947845418, "loss": 1.2425, "step": 78200 }, { "epoch": 2.3363090795091512, "grad_norm": 4.001344680786133, "learning_rate": 0.00017764444368253534, "loss": 1.3034, "step": 78250 }, { "epoch": 2.337801928761234, "grad_norm": 5.008638381958008, "learning_rate": 0.0001776301578866165, "loss": 1.2912, "step": 78300 }, { "epoch": 2.339294778013316, "grad_norm": 3.917297840118408, "learning_rate": 0.00017761587209069767, "loss": 1.2807, "step": 78350 }, { "epoch": 2.340787627265399, "grad_norm": 4.353655815124512, "learning_rate": 0.00017760158629477884, "loss": 1.3237, "step": 78400 }, { "epoch": 2.342280476517481, "grad_norm": 5.5648698806762695, "learning_rate": 0.00017758730049886, "loss": 1.3145, "step": 78450 }, { "epoch": 2.343773325769564, "grad_norm": 3.928065538406372, "learning_rate": 0.00017757301470294116, "loss": 1.297, "step": 78500 }, { "epoch": 2.345266175021646, "grad_norm": 4.029613494873047, "learning_rate": 0.00017755872890702233, "loss": 1.2477, "step": 78550 }, { "epoch": 2.346759024273729, "grad_norm": 3.9108121395111084, "learning_rate": 0.0001775444431111035, "loss": 1.2537, "step": 78600 }, { "epoch": 2.3482518735258116, "grad_norm": 4.727762699127197, "learning_rate": 0.00017753015731518469, "loss": 1.2956, "step": 78650 }, { "epoch": 2.349744722777894, "grad_norm": 4.287507057189941, "learning_rate": 0.00017751587151926582, "loss": 1.2689, "step": 78700 }, { "epoch": 2.3512375720299765, "grad_norm": 4.162285804748535, "learning_rate": 0.00017750158572334701, "loss": 1.2803, "step": 78750 }, { "epoch": 2.352730421282059, "grad_norm": 5.458852291107178, "learning_rate": 0.00017748729992742815, "loss": 1.2789, "step": 78800 }, { "epoch": 2.3542232705341415, "grad_norm": 4.103569984436035, "learning_rate": 0.00017747301413150934, "loss": 1.3299, "step": 78850 }, { "epoch": 2.3557161197862238, "grad_norm": 4.286417007446289, "learning_rate": 0.0001774587283355905, "loss": 1.3333, "step": 78900 }, { "epoch": 2.3572089690383065, "grad_norm": 4.811729431152344, "learning_rate": 0.00017744444253967167, "loss": 1.3414, "step": 78950 }, { "epoch": 2.358701818290389, "grad_norm": 3.960171699523926, "learning_rate": 0.00017743015674375284, "loss": 1.2661, "step": 79000 }, { "epoch": 2.3601946675424714, "grad_norm": 5.161068439483643, "learning_rate": 0.000177415870947834, "loss": 1.2622, "step": 79050 }, { "epoch": 2.361687516794554, "grad_norm": 5.81963586807251, "learning_rate": 0.00017740158515191517, "loss": 1.3336, "step": 79100 }, { "epoch": 2.3631803660466364, "grad_norm": 5.815190315246582, "learning_rate": 0.00017738729935599633, "loss": 1.3078, "step": 79150 }, { "epoch": 2.364673215298719, "grad_norm": 3.994666814804077, "learning_rate": 0.0001773730135600775, "loss": 1.2237, "step": 79200 }, { "epoch": 2.366166064550802, "grad_norm": 5.232024669647217, "learning_rate": 0.00017735872776415866, "loss": 1.3267, "step": 79250 }, { "epoch": 2.367658913802884, "grad_norm": 4.517995357513428, "learning_rate": 0.00017734444196823983, "loss": 1.3168, "step": 79300 }, { "epoch": 2.3691517630549668, "grad_norm": 4.49736213684082, "learning_rate": 0.000177330156172321, "loss": 1.3403, "step": 79350 }, { "epoch": 2.370644612307049, "grad_norm": 5.295841693878174, "learning_rate": 0.00017731587037640216, "loss": 1.2639, "step": 79400 }, { "epoch": 2.3721374615591317, "grad_norm": 6.010612487792969, "learning_rate": 0.00017730158458048332, "loss": 1.273, "step": 79450 }, { "epoch": 2.3736303108112144, "grad_norm": 4.014612674713135, "learning_rate": 0.00017728729878456448, "loss": 1.3027, "step": 79500 }, { "epoch": 2.3751231600632967, "grad_norm": 5.472930908203125, "learning_rate": 0.00017727301298864565, "loss": 1.2927, "step": 79550 }, { "epoch": 2.3766160093153794, "grad_norm": 5.090315341949463, "learning_rate": 0.00017725872719272681, "loss": 1.2994, "step": 79600 }, { "epoch": 2.3781088585674617, "grad_norm": 4.583268165588379, "learning_rate": 0.00017724444139680798, "loss": 1.3249, "step": 79650 }, { "epoch": 2.3796017078195444, "grad_norm": 5.765147686004639, "learning_rate": 0.00017723015560088917, "loss": 1.2582, "step": 79700 }, { "epoch": 2.381094557071627, "grad_norm": 4.751801490783691, "learning_rate": 0.0001772158698049703, "loss": 1.2497, "step": 79750 }, { "epoch": 2.3825874063237094, "grad_norm": 6.316953182220459, "learning_rate": 0.0001772015840090515, "loss": 1.2949, "step": 79800 }, { "epoch": 2.384080255575792, "grad_norm": 4.6090779304504395, "learning_rate": 0.00017718729821313264, "loss": 1.2922, "step": 79850 }, { "epoch": 2.3855731048278743, "grad_norm": 5.004429340362549, "learning_rate": 0.00017717301241721383, "loss": 1.336, "step": 79900 }, { "epoch": 2.387065954079957, "grad_norm": 5.7317352294921875, "learning_rate": 0.00017715872662129497, "loss": 1.2296, "step": 79950 }, { "epoch": 2.3885588033320397, "grad_norm": 4.938337326049805, "learning_rate": 0.00017714444082537616, "loss": 1.3118, "step": 80000 }, { "epoch": 2.390051652584122, "grad_norm": 7.3811869621276855, "learning_rate": 0.00017713015502945732, "loss": 1.2989, "step": 80050 }, { "epoch": 2.3915445018362047, "grad_norm": 4.3925604820251465, "learning_rate": 0.0001771158692335385, "loss": 1.3183, "step": 80100 }, { "epoch": 2.393037351088287, "grad_norm": 3.382279396057129, "learning_rate": 0.00017710158343761965, "loss": 1.2749, "step": 80150 }, { "epoch": 2.3945302003403697, "grad_norm": 6.085289478302002, "learning_rate": 0.00017708729764170082, "loss": 1.2881, "step": 80200 }, { "epoch": 2.3960230495924524, "grad_norm": 3.96476149559021, "learning_rate": 0.00017707301184578198, "loss": 1.2656, "step": 80250 }, { "epoch": 2.3975158988445346, "grad_norm": 4.456584930419922, "learning_rate": 0.00017705872604986315, "loss": 1.2969, "step": 80300 }, { "epoch": 2.3990087480966173, "grad_norm": 5.09492301940918, "learning_rate": 0.0001770444402539443, "loss": 1.2679, "step": 80350 }, { "epoch": 2.4005015973486996, "grad_norm": 4.211208820343018, "learning_rate": 0.00017703015445802548, "loss": 1.2235, "step": 80400 }, { "epoch": 2.4019944466007823, "grad_norm": 4.683793067932129, "learning_rate": 0.00017701586866210664, "loss": 1.2791, "step": 80450 }, { "epoch": 2.403487295852865, "grad_norm": 4.003592014312744, "learning_rate": 0.00017700158286618783, "loss": 1.2498, "step": 80500 }, { "epoch": 2.4049801451049473, "grad_norm": 5.065978527069092, "learning_rate": 0.00017698729707026897, "loss": 1.2681, "step": 80550 }, { "epoch": 2.40647299435703, "grad_norm": 4.453160762786865, "learning_rate": 0.00017697301127435016, "loss": 1.3567, "step": 80600 }, { "epoch": 2.4079658436091123, "grad_norm": 4.852217197418213, "learning_rate": 0.0001769587254784313, "loss": 1.2808, "step": 80650 }, { "epoch": 2.409458692861195, "grad_norm": 4.629857063293457, "learning_rate": 0.0001769444396825125, "loss": 1.2994, "step": 80700 }, { "epoch": 2.4109515421132772, "grad_norm": 4.611933708190918, "learning_rate": 0.00017693015388659363, "loss": 1.2477, "step": 80750 }, { "epoch": 2.41244439136536, "grad_norm": 4.3425822257995605, "learning_rate": 0.00017691586809067482, "loss": 1.2861, "step": 80800 }, { "epoch": 2.4139372406174426, "grad_norm": 5.061252593994141, "learning_rate": 0.00017690158229475598, "loss": 1.345, "step": 80850 }, { "epoch": 2.415430089869525, "grad_norm": 5.230067253112793, "learning_rate": 0.00017688729649883715, "loss": 1.3097, "step": 80900 }, { "epoch": 2.4169229391216076, "grad_norm": 4.092568874359131, "learning_rate": 0.00017687301070291831, "loss": 1.2609, "step": 80950 }, { "epoch": 2.41841578837369, "grad_norm": 7.115472316741943, "learning_rate": 0.00017685872490699948, "loss": 1.3306, "step": 81000 }, { "epoch": 2.4199086376257726, "grad_norm": 4.142023086547852, "learning_rate": 0.00017684443911108064, "loss": 1.2715, "step": 81050 }, { "epoch": 2.421401486877855, "grad_norm": 4.5259575843811035, "learning_rate": 0.0001768301533151618, "loss": 1.3097, "step": 81100 }, { "epoch": 2.4228943361299375, "grad_norm": 4.749493598937988, "learning_rate": 0.00017681586751924297, "loss": 1.2413, "step": 81150 }, { "epoch": 2.4243871853820202, "grad_norm": 3.844097137451172, "learning_rate": 0.00017680158172332414, "loss": 1.3365, "step": 81200 }, { "epoch": 2.4258800346341025, "grad_norm": 4.504172325134277, "learning_rate": 0.0001767872959274053, "loss": 1.2889, "step": 81250 }, { "epoch": 2.427372883886185, "grad_norm": 4.036706447601318, "learning_rate": 0.0001767730101314865, "loss": 1.3041, "step": 81300 }, { "epoch": 2.4288657331382675, "grad_norm": 4.100889205932617, "learning_rate": 0.00017675872433556763, "loss": 1.2886, "step": 81350 }, { "epoch": 2.43035858239035, "grad_norm": 5.082368850708008, "learning_rate": 0.00017674443853964882, "loss": 1.333, "step": 81400 }, { "epoch": 2.431851431642433, "grad_norm": 4.644407749176025, "learning_rate": 0.00017673015274372996, "loss": 1.3191, "step": 81450 }, { "epoch": 2.433344280894515, "grad_norm": 4.445605754852295, "learning_rate": 0.00017671586694781115, "loss": 1.2961, "step": 81500 }, { "epoch": 2.434837130146598, "grad_norm": 4.809011936187744, "learning_rate": 0.00017670158115189232, "loss": 1.2496, "step": 81550 }, { "epoch": 2.43632997939868, "grad_norm": 5.294357776641846, "learning_rate": 0.00017668729535597348, "loss": 1.2885, "step": 81600 }, { "epoch": 2.437822828650763, "grad_norm": 4.103408336639404, "learning_rate": 0.00017667300956005465, "loss": 1.2612, "step": 81650 }, { "epoch": 2.4393156779028455, "grad_norm": 4.427671909332275, "learning_rate": 0.0001766587237641358, "loss": 1.2849, "step": 81700 }, { "epoch": 2.440808527154928, "grad_norm": 5.336637496948242, "learning_rate": 0.00017664443796821698, "loss": 1.2923, "step": 81750 }, { "epoch": 2.4423013764070105, "grad_norm": 5.235950946807861, "learning_rate": 0.00017663015217229814, "loss": 1.3086, "step": 81800 }, { "epoch": 2.4437942256590928, "grad_norm": 5.3591766357421875, "learning_rate": 0.0001766158663763793, "loss": 1.3326, "step": 81850 }, { "epoch": 2.4452870749111755, "grad_norm": 5.432180881500244, "learning_rate": 0.00017660158058046047, "loss": 1.2918, "step": 81900 }, { "epoch": 2.446779924163258, "grad_norm": 5.184196949005127, "learning_rate": 0.00017658729478454163, "loss": 1.2652, "step": 81950 }, { "epoch": 2.4482727734153404, "grad_norm": 4.962531566619873, "learning_rate": 0.0001765730089886228, "loss": 1.2579, "step": 82000 }, { "epoch": 2.449765622667423, "grad_norm": 3.7370457649230957, "learning_rate": 0.00017655872319270396, "loss": 1.3204, "step": 82050 }, { "epoch": 2.4512584719195054, "grad_norm": 4.21932315826416, "learning_rate": 0.00017654443739678513, "loss": 1.333, "step": 82100 }, { "epoch": 2.452751321171588, "grad_norm": 4.366473197937012, "learning_rate": 0.0001765301516008663, "loss": 1.2594, "step": 82150 }, { "epoch": 2.454244170423671, "grad_norm": 5.760409832000732, "learning_rate": 0.00017651586580494746, "loss": 1.3004, "step": 82200 }, { "epoch": 2.455737019675753, "grad_norm": 4.652368545532227, "learning_rate": 0.00017650158000902862, "loss": 1.3092, "step": 82250 }, { "epoch": 2.457229868927836, "grad_norm": 3.5619237422943115, "learning_rate": 0.0001764872942131098, "loss": 1.3315, "step": 82300 }, { "epoch": 2.458722718179918, "grad_norm": 3.285752534866333, "learning_rate": 0.00017647300841719098, "loss": 1.2906, "step": 82350 }, { "epoch": 2.4602155674320008, "grad_norm": 4.153928279876709, "learning_rate": 0.00017645872262127212, "loss": 1.3055, "step": 82400 }, { "epoch": 2.4617084166840835, "grad_norm": 3.767779588699341, "learning_rate": 0.0001764444368253533, "loss": 1.264, "step": 82450 }, { "epoch": 2.4632012659361657, "grad_norm": 4.4513983726501465, "learning_rate": 0.00017643015102943445, "loss": 1.2749, "step": 82500 }, { "epoch": 2.4646941151882484, "grad_norm": 4.600104808807373, "learning_rate": 0.00017641586523351564, "loss": 1.3194, "step": 82550 }, { "epoch": 2.4661869644403307, "grad_norm": 5.654660701751709, "learning_rate": 0.00017640157943759677, "loss": 1.2525, "step": 82600 }, { "epoch": 2.4676798136924134, "grad_norm": 5.852449893951416, "learning_rate": 0.00017638729364167797, "loss": 1.2609, "step": 82650 }, { "epoch": 2.469172662944496, "grad_norm": 4.8410139083862305, "learning_rate": 0.00017637300784575913, "loss": 1.2892, "step": 82700 }, { "epoch": 2.4706655121965784, "grad_norm": 4.150029182434082, "learning_rate": 0.0001763587220498403, "loss": 1.2893, "step": 82750 }, { "epoch": 2.472158361448661, "grad_norm": 4.355432033538818, "learning_rate": 0.00017634443625392146, "loss": 1.294, "step": 82800 }, { "epoch": 2.4736512107007433, "grad_norm": 4.689866542816162, "learning_rate": 0.00017633015045800262, "loss": 1.3327, "step": 82850 }, { "epoch": 2.475144059952826, "grad_norm": 5.386442184448242, "learning_rate": 0.0001763158646620838, "loss": 1.3027, "step": 82900 }, { "epoch": 2.4766369092049083, "grad_norm": 4.756774425506592, "learning_rate": 0.00017630157886616495, "loss": 1.2994, "step": 82950 }, { "epoch": 2.478129758456991, "grad_norm": 5.8331451416015625, "learning_rate": 0.00017628729307024612, "loss": 1.298, "step": 83000 }, { "epoch": 2.4796226077090737, "grad_norm": 4.216109275817871, "learning_rate": 0.00017627300727432728, "loss": 1.3288, "step": 83050 }, { "epoch": 2.481115456961156, "grad_norm": 5.123735427856445, "learning_rate": 0.00017625872147840845, "loss": 1.3103, "step": 83100 }, { "epoch": 2.4826083062132387, "grad_norm": 3.7272541522979736, "learning_rate": 0.00017624443568248964, "loss": 1.2753, "step": 83150 }, { "epoch": 2.484101155465321, "grad_norm": 5.656236171722412, "learning_rate": 0.00017623014988657078, "loss": 1.3013, "step": 83200 }, { "epoch": 2.4855940047174037, "grad_norm": 4.702991962432861, "learning_rate": 0.00017621586409065197, "loss": 1.3073, "step": 83250 }, { "epoch": 2.487086853969486, "grad_norm": 4.512374401092529, "learning_rate": 0.0001762015782947331, "loss": 1.2951, "step": 83300 }, { "epoch": 2.4885797032215686, "grad_norm": 3.560082197189331, "learning_rate": 0.0001761872924988143, "loss": 1.3083, "step": 83350 }, { "epoch": 2.4900725524736513, "grad_norm": 5.189292907714844, "learning_rate": 0.00017617300670289544, "loss": 1.3153, "step": 83400 }, { "epoch": 2.4915654017257336, "grad_norm": 4.624311447143555, "learning_rate": 0.00017615872090697663, "loss": 1.2606, "step": 83450 }, { "epoch": 2.4930582509778163, "grad_norm": 4.030190944671631, "learning_rate": 0.0001761444351110578, "loss": 1.2552, "step": 83500 }, { "epoch": 2.4945511002298986, "grad_norm": 5.9866042137146, "learning_rate": 0.00017613014931513896, "loss": 1.2818, "step": 83550 }, { "epoch": 2.4960439494819813, "grad_norm": 4.025645732879639, "learning_rate": 0.00017611586351922012, "loss": 1.325, "step": 83600 }, { "epoch": 2.497536798734064, "grad_norm": 5.12217903137207, "learning_rate": 0.0001761015777233013, "loss": 1.2656, "step": 83650 }, { "epoch": 2.4990296479861462, "grad_norm": 3.5725717544555664, "learning_rate": 0.00017608729192738245, "loss": 1.2605, "step": 83700 }, { "epoch": 2.500522497238229, "grad_norm": 6.810322284698486, "learning_rate": 0.00017607300613146362, "loss": 1.3275, "step": 83750 }, { "epoch": 2.502015346490311, "grad_norm": 6.723217487335205, "learning_rate": 0.00017605872033554478, "loss": 1.3038, "step": 83800 }, { "epoch": 2.503508195742394, "grad_norm": 5.044365882873535, "learning_rate": 0.00017604443453962595, "loss": 1.2821, "step": 83850 }, { "epoch": 2.5050010449944766, "grad_norm": 4.397944450378418, "learning_rate": 0.0001760301487437071, "loss": 1.3281, "step": 83900 }, { "epoch": 2.506493894246559, "grad_norm": 3.9087088108062744, "learning_rate": 0.0001760158629477883, "loss": 1.2674, "step": 83950 }, { "epoch": 2.5079867434986416, "grad_norm": 4.3125128746032715, "learning_rate": 0.00017600157715186944, "loss": 1.2654, "step": 84000 }, { "epoch": 2.509479592750724, "grad_norm": 5.792150020599365, "learning_rate": 0.00017598729135595063, "loss": 1.3846, "step": 84050 }, { "epoch": 2.5109724420028066, "grad_norm": 4.449563026428223, "learning_rate": 0.00017597300556003177, "loss": 1.2402, "step": 84100 }, { "epoch": 2.5124652912548893, "grad_norm": 3.286771774291992, "learning_rate": 0.00017595871976411296, "loss": 1.2547, "step": 84150 }, { "epoch": 2.5139581405069715, "grad_norm": 4.84338903427124, "learning_rate": 0.0001759444339681941, "loss": 1.2127, "step": 84200 }, { "epoch": 2.5154509897590542, "grad_norm": 4.913907051086426, "learning_rate": 0.0001759301481722753, "loss": 1.3453, "step": 84250 }, { "epoch": 2.5169438390111365, "grad_norm": 3.828268527984619, "learning_rate": 0.00017591586237635645, "loss": 1.3056, "step": 84300 }, { "epoch": 2.518436688263219, "grad_norm": 4.86277961730957, "learning_rate": 0.00017590157658043762, "loss": 1.2613, "step": 84350 }, { "epoch": 2.519929537515302, "grad_norm": 3.3477306365966797, "learning_rate": 0.00017588729078451878, "loss": 1.2784, "step": 84400 }, { "epoch": 2.521422386767384, "grad_norm": 3.982935905456543, "learning_rate": 0.00017587300498859995, "loss": 1.279, "step": 84450 }, { "epoch": 2.522915236019467, "grad_norm": 4.321762561798096, "learning_rate": 0.0001758587191926811, "loss": 1.2706, "step": 84500 }, { "epoch": 2.524408085271549, "grad_norm": 3.989497423171997, "learning_rate": 0.00017584443339676228, "loss": 1.2431, "step": 84550 }, { "epoch": 2.525900934523632, "grad_norm": 4.457726955413818, "learning_rate": 0.00017583014760084344, "loss": 1.2686, "step": 84600 }, { "epoch": 2.5273937837757146, "grad_norm": 4.100541114807129, "learning_rate": 0.0001758158618049246, "loss": 1.2379, "step": 84650 }, { "epoch": 2.528886633027797, "grad_norm": 5.386853218078613, "learning_rate": 0.00017580157600900577, "loss": 1.2767, "step": 84700 }, { "epoch": 2.5303794822798795, "grad_norm": 4.51826286315918, "learning_rate": 0.00017578729021308694, "loss": 1.2072, "step": 84750 }, { "epoch": 2.531872331531962, "grad_norm": 7.255303382873535, "learning_rate": 0.0001757730044171681, "loss": 1.2324, "step": 84800 }, { "epoch": 2.5333651807840445, "grad_norm": 4.7532572746276855, "learning_rate": 0.00017575871862124927, "loss": 1.2534, "step": 84850 }, { "epoch": 2.534858030036127, "grad_norm": 4.200450897216797, "learning_rate": 0.00017574443282533043, "loss": 1.2732, "step": 84900 }, { "epoch": 2.5363508792882095, "grad_norm": 4.062733173370361, "learning_rate": 0.0001757301470294116, "loss": 1.2987, "step": 84950 }, { "epoch": 2.537843728540292, "grad_norm": 4.786027908325195, "learning_rate": 0.00017571586123349279, "loss": 1.2593, "step": 85000 }, { "epoch": 2.5393365777923744, "grad_norm": 6.346192359924316, "learning_rate": 0.00017570157543757392, "loss": 1.2854, "step": 85050 }, { "epoch": 2.540829427044457, "grad_norm": 5.030992031097412, "learning_rate": 0.00017568728964165512, "loss": 1.2498, "step": 85100 }, { "epoch": 2.54232227629654, "grad_norm": 3.7949612140655518, "learning_rate": 0.00017567300384573625, "loss": 1.275, "step": 85150 }, { "epoch": 2.543815125548622, "grad_norm": 4.442857265472412, "learning_rate": 0.00017565871804981745, "loss": 1.2156, "step": 85200 }, { "epoch": 2.5453079748007044, "grad_norm": 5.143223285675049, "learning_rate": 0.00017564443225389858, "loss": 1.3554, "step": 85250 }, { "epoch": 2.546800824052787, "grad_norm": 4.240771770477295, "learning_rate": 0.00017563014645797977, "loss": 1.3288, "step": 85300 }, { "epoch": 2.5482936733048698, "grad_norm": 5.060192108154297, "learning_rate": 0.00017561586066206094, "loss": 1.3284, "step": 85350 }, { "epoch": 2.549786522556952, "grad_norm": 4.155733585357666, "learning_rate": 0.0001756015748661421, "loss": 1.2422, "step": 85400 }, { "epoch": 2.5512793718090347, "grad_norm": 5.4249749183654785, "learning_rate": 0.00017558728907022327, "loss": 1.262, "step": 85450 }, { "epoch": 2.552772221061117, "grad_norm": 5.704741954803467, "learning_rate": 0.00017557300327430443, "loss": 1.308, "step": 85500 }, { "epoch": 2.5542650703131997, "grad_norm": 4.013110637664795, "learning_rate": 0.0001755587174783856, "loss": 1.2667, "step": 85550 }, { "epoch": 2.5557579195652824, "grad_norm": 4.5984673500061035, "learning_rate": 0.00017554443168246676, "loss": 1.2756, "step": 85600 }, { "epoch": 2.5572507688173647, "grad_norm": 4.716318130493164, "learning_rate": 0.00017553014588654793, "loss": 1.2896, "step": 85650 }, { "epoch": 2.5587436180694474, "grad_norm": 3.9019134044647217, "learning_rate": 0.0001755158600906291, "loss": 1.3419, "step": 85700 }, { "epoch": 2.5602364673215297, "grad_norm": 4.762269020080566, "learning_rate": 0.00017550157429471026, "loss": 1.2856, "step": 85750 }, { "epoch": 2.5617293165736124, "grad_norm": 4.8936567306518555, "learning_rate": 0.00017548728849879145, "loss": 1.3033, "step": 85800 }, { "epoch": 2.563222165825695, "grad_norm": 5.605986595153809, "learning_rate": 0.00017547300270287259, "loss": 1.2928, "step": 85850 }, { "epoch": 2.5647150150777773, "grad_norm": 5.456994533538818, "learning_rate": 0.00017545871690695378, "loss": 1.2646, "step": 85900 }, { "epoch": 2.56620786432986, "grad_norm": 5.125797748565674, "learning_rate": 0.00017544443111103491, "loss": 1.3337, "step": 85950 }, { "epoch": 2.5677007135819423, "grad_norm": 6.698281288146973, "learning_rate": 0.0001754301453151161, "loss": 1.231, "step": 86000 }, { "epoch": 2.569193562834025, "grad_norm": 4.8110175132751465, "learning_rate": 0.00017541585951919724, "loss": 1.2829, "step": 86050 }, { "epoch": 2.5706864120861077, "grad_norm": 4.389436721801758, "learning_rate": 0.00017540157372327844, "loss": 1.3394, "step": 86100 }, { "epoch": 2.57217926133819, "grad_norm": 4.567478179931641, "learning_rate": 0.0001753872879273596, "loss": 1.3179, "step": 86150 }, { "epoch": 2.5736721105902727, "grad_norm": 6.0036444664001465, "learning_rate": 0.00017537300213144077, "loss": 1.2837, "step": 86200 }, { "epoch": 2.575164959842355, "grad_norm": 4.954610824584961, "learning_rate": 0.00017535871633552193, "loss": 1.3057, "step": 86250 }, { "epoch": 2.5766578090944376, "grad_norm": 3.623244524002075, "learning_rate": 0.0001753444305396031, "loss": 1.3203, "step": 86300 }, { "epoch": 2.5781506583465204, "grad_norm": 3.969407081604004, "learning_rate": 0.00017533014474368426, "loss": 1.2465, "step": 86350 }, { "epoch": 2.5796435075986026, "grad_norm": 4.700910568237305, "learning_rate": 0.00017531585894776542, "loss": 1.2673, "step": 86400 }, { "epoch": 2.5811363568506853, "grad_norm": 4.05939245223999, "learning_rate": 0.0001753015731518466, "loss": 1.2177, "step": 86450 }, { "epoch": 2.5826292061027676, "grad_norm": 4.144320011138916, "learning_rate": 0.00017528728735592775, "loss": 1.2198, "step": 86500 }, { "epoch": 2.5841220553548503, "grad_norm": 4.483496189117432, "learning_rate": 0.00017527300156000892, "loss": 1.3403, "step": 86550 }, { "epoch": 2.585614904606933, "grad_norm": 6.164941787719727, "learning_rate": 0.0001752587157640901, "loss": 1.3121, "step": 86600 }, { "epoch": 2.5871077538590153, "grad_norm": 5.265034198760986, "learning_rate": 0.00017524442996817125, "loss": 1.2484, "step": 86650 }, { "epoch": 2.588600603111098, "grad_norm": 4.718167304992676, "learning_rate": 0.00017523014417225244, "loss": 1.3256, "step": 86700 }, { "epoch": 2.5900934523631802, "grad_norm": 3.9518258571624756, "learning_rate": 0.00017521585837633358, "loss": 1.271, "step": 86750 }, { "epoch": 2.591586301615263, "grad_norm": 4.604781627655029, "learning_rate": 0.00017520157258041477, "loss": 1.2821, "step": 86800 }, { "epoch": 2.5930791508673456, "grad_norm": 3.5907840728759766, "learning_rate": 0.0001751872867844959, "loss": 1.2958, "step": 86850 }, { "epoch": 2.594572000119428, "grad_norm": 5.262489318847656, "learning_rate": 0.0001751730009885771, "loss": 1.3186, "step": 86900 }, { "epoch": 2.5960648493715106, "grad_norm": 4.812371253967285, "learning_rate": 0.00017515871519265826, "loss": 1.2586, "step": 86950 }, { "epoch": 2.597557698623593, "grad_norm": 4.645204544067383, "learning_rate": 0.00017514442939673943, "loss": 1.2656, "step": 87000 }, { "epoch": 2.5990505478756756, "grad_norm": 5.977488040924072, "learning_rate": 0.0001751301436008206, "loss": 1.3167, "step": 87050 }, { "epoch": 2.6005433971277583, "grad_norm": 4.534546375274658, "learning_rate": 0.00017511585780490176, "loss": 1.3263, "step": 87100 }, { "epoch": 2.6020362463798405, "grad_norm": 4.562127113342285, "learning_rate": 0.00017510157200898292, "loss": 1.3122, "step": 87150 }, { "epoch": 2.6035290956319233, "grad_norm": 3.588327646255493, "learning_rate": 0.00017508728621306409, "loss": 1.2953, "step": 87200 }, { "epoch": 2.6050219448840055, "grad_norm": 5.136357307434082, "learning_rate": 0.00017507300041714525, "loss": 1.2831, "step": 87250 }, { "epoch": 2.606514794136088, "grad_norm": 4.8900299072265625, "learning_rate": 0.00017505871462122641, "loss": 1.2847, "step": 87300 }, { "epoch": 2.608007643388171, "grad_norm": 3.7218592166900635, "learning_rate": 0.00017504442882530758, "loss": 1.2828, "step": 87350 }, { "epoch": 2.609500492640253, "grad_norm": 3.4648168087005615, "learning_rate": 0.00017503014302938874, "loss": 1.2576, "step": 87400 }, { "epoch": 2.6109933418923355, "grad_norm": 3.6955080032348633, "learning_rate": 0.0001750158572334699, "loss": 1.3208, "step": 87450 }, { "epoch": 2.612486191144418, "grad_norm": 4.296437740325928, "learning_rate": 0.00017500157143755107, "loss": 1.2455, "step": 87500 }, { "epoch": 2.613979040396501, "grad_norm": 4.3054518699646, "learning_rate": 0.00017498728564163224, "loss": 1.2722, "step": 87550 }, { "epoch": 2.615471889648583, "grad_norm": 6.258134841918945, "learning_rate": 0.0001749729998457134, "loss": 1.2777, "step": 87600 }, { "epoch": 2.616964738900666, "grad_norm": 5.092291355133057, "learning_rate": 0.0001749587140497946, "loss": 1.2906, "step": 87650 }, { "epoch": 2.618457588152748, "grad_norm": 3.986844778060913, "learning_rate": 0.00017494442825387573, "loss": 1.3326, "step": 87700 }, { "epoch": 2.619950437404831, "grad_norm": 5.269211292266846, "learning_rate": 0.00017493014245795692, "loss": 1.3253, "step": 87750 }, { "epoch": 2.6214432866569135, "grad_norm": 3.6939330101013184, "learning_rate": 0.00017491585666203806, "loss": 1.318, "step": 87800 }, { "epoch": 2.6229361359089958, "grad_norm": 5.028196334838867, "learning_rate": 0.00017490157086611925, "loss": 1.3089, "step": 87850 }, { "epoch": 2.6244289851610785, "grad_norm": 5.179739475250244, "learning_rate": 0.0001748872850702004, "loss": 1.2471, "step": 87900 }, { "epoch": 2.6259218344131607, "grad_norm": 4.581385135650635, "learning_rate": 0.00017487299927428158, "loss": 1.2703, "step": 87950 }, { "epoch": 2.6274146836652434, "grad_norm": 4.185626983642578, "learning_rate": 0.00017485871347836275, "loss": 1.2967, "step": 88000 }, { "epoch": 2.628907532917326, "grad_norm": 4.166527271270752, "learning_rate": 0.0001748444276824439, "loss": 1.282, "step": 88050 }, { "epoch": 2.6304003821694084, "grad_norm": 5.894084930419922, "learning_rate": 0.00017483014188652508, "loss": 1.3337, "step": 88100 }, { "epoch": 2.631893231421491, "grad_norm": 5.775843620300293, "learning_rate": 0.00017481585609060624, "loss": 1.3186, "step": 88150 }, { "epoch": 2.6333860806735734, "grad_norm": 4.601424217224121, "learning_rate": 0.0001748015702946874, "loss": 1.2961, "step": 88200 }, { "epoch": 2.634878929925656, "grad_norm": 4.345365524291992, "learning_rate": 0.00017478728449876857, "loss": 1.2776, "step": 88250 }, { "epoch": 2.636371779177739, "grad_norm": 4.751996040344238, "learning_rate": 0.00017477299870284973, "loss": 1.2847, "step": 88300 }, { "epoch": 2.637864628429821, "grad_norm": 4.37894344329834, "learning_rate": 0.0001747587129069309, "loss": 1.2631, "step": 88350 }, { "epoch": 2.6393574776819038, "grad_norm": 4.729623794555664, "learning_rate": 0.00017474442711101206, "loss": 1.3288, "step": 88400 }, { "epoch": 2.640850326933986, "grad_norm": 4.7951788902282715, "learning_rate": 0.00017473014131509326, "loss": 1.3207, "step": 88450 }, { "epoch": 2.6423431761860687, "grad_norm": 3.692270040512085, "learning_rate": 0.0001747158555191744, "loss": 1.3038, "step": 88500 }, { "epoch": 2.6438360254381514, "grad_norm": 4.1009297370910645, "learning_rate": 0.00017470156972325559, "loss": 1.2874, "step": 88550 }, { "epoch": 2.6453288746902337, "grad_norm": 4.919840335845947, "learning_rate": 0.00017468728392733672, "loss": 1.294, "step": 88600 }, { "epoch": 2.6468217239423164, "grad_norm": 3.7501938343048096, "learning_rate": 0.00017467299813141791, "loss": 1.3196, "step": 88650 }, { "epoch": 2.6483145731943987, "grad_norm": 4.218129634857178, "learning_rate": 0.00017465871233549905, "loss": 1.2823, "step": 88700 }, { "epoch": 2.6498074224464814, "grad_norm": 5.627344131469727, "learning_rate": 0.00017464442653958024, "loss": 1.2796, "step": 88750 }, { "epoch": 2.651300271698564, "grad_norm": 4.543562412261963, "learning_rate": 0.0001746301407436614, "loss": 1.2605, "step": 88800 }, { "epoch": 2.6527931209506463, "grad_norm": 4.705561637878418, "learning_rate": 0.00017461585494774257, "loss": 1.2822, "step": 88850 }, { "epoch": 2.654285970202729, "grad_norm": 4.356922149658203, "learning_rate": 0.00017460156915182374, "loss": 1.3247, "step": 88900 }, { "epoch": 2.6557788194548113, "grad_norm": 4.276957988739014, "learning_rate": 0.0001745872833559049, "loss": 1.3103, "step": 88950 }, { "epoch": 2.657271668706894, "grad_norm": 4.122916221618652, "learning_rate": 0.00017457299755998607, "loss": 1.302, "step": 89000 }, { "epoch": 2.6587645179589767, "grad_norm": 4.473240852355957, "learning_rate": 0.00017455871176406723, "loss": 1.274, "step": 89050 }, { "epoch": 2.660257367211059, "grad_norm": 5.299698829650879, "learning_rate": 0.0001745444259681484, "loss": 1.2879, "step": 89100 }, { "epoch": 2.6617502164631417, "grad_norm": 5.3960347175598145, "learning_rate": 0.00017453014017222956, "loss": 1.3083, "step": 89150 }, { "epoch": 2.663243065715224, "grad_norm": 9.475055694580078, "learning_rate": 0.00017451585437631073, "loss": 1.3123, "step": 89200 }, { "epoch": 2.6647359149673067, "grad_norm": 4.01664924621582, "learning_rate": 0.00017450156858039192, "loss": 1.2673, "step": 89250 }, { "epoch": 2.6662287642193894, "grad_norm": 4.74378776550293, "learning_rate": 0.00017448728278447306, "loss": 1.2704, "step": 89300 }, { "epoch": 2.6677216134714716, "grad_norm": 3.8527019023895264, "learning_rate": 0.00017447299698855425, "loss": 1.2638, "step": 89350 }, { "epoch": 2.6692144627235543, "grad_norm": 5.14741849899292, "learning_rate": 0.00017445871119263538, "loss": 1.2843, "step": 89400 }, { "epoch": 2.6707073119756366, "grad_norm": 4.814597129821777, "learning_rate": 0.00017444442539671658, "loss": 1.2955, "step": 89450 }, { "epoch": 2.6722001612277193, "grad_norm": 3.8238747119903564, "learning_rate": 0.00017443013960079771, "loss": 1.2921, "step": 89500 }, { "epoch": 2.673693010479802, "grad_norm": 5.118923187255859, "learning_rate": 0.0001744158538048789, "loss": 1.3114, "step": 89550 }, { "epoch": 2.6751858597318843, "grad_norm": 4.3184380531311035, "learning_rate": 0.00017440156800896007, "loss": 1.2387, "step": 89600 }, { "epoch": 2.6766787089839665, "grad_norm": 4.6351728439331055, "learning_rate": 0.00017438728221304123, "loss": 1.2524, "step": 89650 }, { "epoch": 2.6781715582360492, "grad_norm": 4.391300678253174, "learning_rate": 0.0001743729964171224, "loss": 1.2591, "step": 89700 }, { "epoch": 2.679664407488132, "grad_norm": 3.749375104904175, "learning_rate": 0.00017435871062120354, "loss": 1.247, "step": 89750 }, { "epoch": 2.681157256740214, "grad_norm": 4.260756492614746, "learning_rate": 0.00017434442482528473, "loss": 1.2672, "step": 89800 }, { "epoch": 2.682650105992297, "grad_norm": 3.6358819007873535, "learning_rate": 0.00017433013902936587, "loss": 1.2941, "step": 89850 }, { "epoch": 2.684142955244379, "grad_norm": 4.653028964996338, "learning_rate": 0.00017431585323344706, "loss": 1.3237, "step": 89900 }, { "epoch": 2.685635804496462, "grad_norm": 4.692080497741699, "learning_rate": 0.00017430156743752822, "loss": 1.325, "step": 89950 }, { "epoch": 2.6871286537485446, "grad_norm": 4.317993640899658, "learning_rate": 0.0001742872816416094, "loss": 1.2491, "step": 90000 }, { "epoch": 2.688621503000627, "grad_norm": 3.7262911796569824, "learning_rate": 0.00017427299584569055, "loss": 1.3218, "step": 90050 }, { "epoch": 2.6901143522527096, "grad_norm": 4.5458855628967285, "learning_rate": 0.00017425871004977172, "loss": 1.3188, "step": 90100 }, { "epoch": 2.691607201504792, "grad_norm": 4.8145599365234375, "learning_rate": 0.00017424442425385288, "loss": 1.2746, "step": 90150 }, { "epoch": 2.6931000507568745, "grad_norm": 5.353755474090576, "learning_rate": 0.00017423013845793405, "loss": 1.2153, "step": 90200 }, { "epoch": 2.6945929000089572, "grad_norm": 5.111592769622803, "learning_rate": 0.0001742158526620152, "loss": 1.2482, "step": 90250 }, { "epoch": 2.6960857492610395, "grad_norm": 5.903563022613525, "learning_rate": 0.00017420156686609638, "loss": 1.2749, "step": 90300 }, { "epoch": 2.697578598513122, "grad_norm": 5.421234130859375, "learning_rate": 0.00017418728107017754, "loss": 1.3023, "step": 90350 }, { "epoch": 2.6990714477652045, "grad_norm": 4.8063154220581055, "learning_rate": 0.00017417299527425873, "loss": 1.3413, "step": 90400 }, { "epoch": 2.700564297017287, "grad_norm": 4.831643104553223, "learning_rate": 0.00017415870947833987, "loss": 1.3733, "step": 90450 }, { "epoch": 2.70205714626937, "grad_norm": 5.314348220825195, "learning_rate": 0.00017414442368242106, "loss": 1.2819, "step": 90500 }, { "epoch": 2.703549995521452, "grad_norm": 5.592355728149414, "learning_rate": 0.0001741301378865022, "loss": 1.2959, "step": 90550 }, { "epoch": 2.705042844773535, "grad_norm": 4.042795181274414, "learning_rate": 0.0001741158520905834, "loss": 1.28, "step": 90600 }, { "epoch": 2.706535694025617, "grad_norm": 4.347578525543213, "learning_rate": 0.00017410156629466456, "loss": 1.3491, "step": 90650 }, { "epoch": 2.7080285432777, "grad_norm": 5.4727678298950195, "learning_rate": 0.00017408728049874572, "loss": 1.3294, "step": 90700 }, { "epoch": 2.7095213925297825, "grad_norm": 4.353364944458008, "learning_rate": 0.00017407299470282688, "loss": 1.27, "step": 90750 }, { "epoch": 2.711014241781865, "grad_norm": 3.916985034942627, "learning_rate": 0.00017405870890690805, "loss": 1.297, "step": 90800 }, { "epoch": 2.7125070910339475, "grad_norm": 4.451666831970215, "learning_rate": 0.00017404442311098921, "loss": 1.3051, "step": 90850 }, { "epoch": 2.7139999402860298, "grad_norm": 5.726661205291748, "learning_rate": 0.00017403013731507038, "loss": 1.3353, "step": 90900 }, { "epoch": 2.7154927895381125, "grad_norm": 4.238119125366211, "learning_rate": 0.00017401585151915154, "loss": 1.3279, "step": 90950 }, { "epoch": 2.716985638790195, "grad_norm": 4.4539642333984375, "learning_rate": 0.0001740015657232327, "loss": 1.2492, "step": 91000 }, { "epoch": 2.7184784880422774, "grad_norm": 5.241584300994873, "learning_rate": 0.00017398727992731387, "loss": 1.3039, "step": 91050 }, { "epoch": 2.71997133729436, "grad_norm": 7.063384056091309, "learning_rate": 0.00017397299413139506, "loss": 1.2849, "step": 91100 }, { "epoch": 2.7214641865464424, "grad_norm": 3.5526702404022217, "learning_rate": 0.0001739587083354762, "loss": 1.3074, "step": 91150 }, { "epoch": 2.722957035798525, "grad_norm": 4.808590888977051, "learning_rate": 0.0001739444225395574, "loss": 1.3124, "step": 91200 }, { "epoch": 2.724449885050608, "grad_norm": 6.006200790405273, "learning_rate": 0.00017393013674363853, "loss": 1.2375, "step": 91250 }, { "epoch": 2.72594273430269, "grad_norm": 3.5086710453033447, "learning_rate": 0.00017391585094771972, "loss": 1.2535, "step": 91300 }, { "epoch": 2.727435583554773, "grad_norm": 4.1073832511901855, "learning_rate": 0.00017390156515180086, "loss": 1.3369, "step": 91350 }, { "epoch": 2.728928432806855, "grad_norm": 4.759340286254883, "learning_rate": 0.00017388727935588205, "loss": 1.2975, "step": 91400 }, { "epoch": 2.7304212820589377, "grad_norm": 4.313277244567871, "learning_rate": 0.00017387299355996322, "loss": 1.2607, "step": 91450 }, { "epoch": 2.7319141313110205, "grad_norm": 4.681273937225342, "learning_rate": 0.00017385870776404438, "loss": 1.2994, "step": 91500 }, { "epoch": 2.7334069805631027, "grad_norm": 5.569629192352295, "learning_rate": 0.00017384442196812555, "loss": 1.29, "step": 91550 }, { "epoch": 2.7348998298151854, "grad_norm": 5.809197425842285, "learning_rate": 0.0001738301361722067, "loss": 1.3245, "step": 91600 }, { "epoch": 2.7363926790672677, "grad_norm": 3.6838326454162598, "learning_rate": 0.00017381585037628788, "loss": 1.2904, "step": 91650 }, { "epoch": 2.7378855283193504, "grad_norm": 5.378135681152344, "learning_rate": 0.00017380156458036904, "loss": 1.2837, "step": 91700 }, { "epoch": 2.739378377571433, "grad_norm": 5.717269420623779, "learning_rate": 0.0001737872787844502, "loss": 1.301, "step": 91750 }, { "epoch": 2.7408712268235154, "grad_norm": 5.068012237548828, "learning_rate": 0.00017377299298853137, "loss": 1.2899, "step": 91800 }, { "epoch": 2.7423640760755976, "grad_norm": 5.394426345825195, "learning_rate": 0.00017375870719261253, "loss": 1.2719, "step": 91850 }, { "epoch": 2.7438569253276803, "grad_norm": 4.564161777496338, "learning_rate": 0.00017374442139669373, "loss": 1.2951, "step": 91900 }, { "epoch": 2.745349774579763, "grad_norm": 5.253499507904053, "learning_rate": 0.00017373013560077486, "loss": 1.2543, "step": 91950 }, { "epoch": 2.7468426238318453, "grad_norm": 3.995630979537964, "learning_rate": 0.00017371584980485605, "loss": 1.2854, "step": 92000 }, { "epoch": 2.748335473083928, "grad_norm": 3.954859733581543, "learning_rate": 0.0001737015640089372, "loss": 1.287, "step": 92050 }, { "epoch": 2.7498283223360103, "grad_norm": 4.409485340118408, "learning_rate": 0.00017368727821301838, "loss": 1.2889, "step": 92100 }, { "epoch": 2.751321171588093, "grad_norm": 5.021519660949707, "learning_rate": 0.00017367299241709952, "loss": 1.3849, "step": 92150 }, { "epoch": 2.7528140208401757, "grad_norm": 4.707982063293457, "learning_rate": 0.00017365870662118071, "loss": 1.3099, "step": 92200 }, { "epoch": 2.754306870092258, "grad_norm": 3.8836755752563477, "learning_rate": 0.00017364442082526188, "loss": 1.2707, "step": 92250 }, { "epoch": 2.7557997193443406, "grad_norm": 4.692137718200684, "learning_rate": 0.00017363013502934304, "loss": 1.3051, "step": 92300 }, { "epoch": 2.757292568596423, "grad_norm": 4.50584602355957, "learning_rate": 0.0001736158492334242, "loss": 1.2969, "step": 92350 }, { "epoch": 2.7587854178485056, "grad_norm": 5.027088642120361, "learning_rate": 0.00017360156343750535, "loss": 1.2846, "step": 92400 }, { "epoch": 2.7602782671005883, "grad_norm": 5.339383125305176, "learning_rate": 0.00017358727764158654, "loss": 1.3153, "step": 92450 }, { "epoch": 2.7617711163526706, "grad_norm": 3.3670814037323, "learning_rate": 0.00017357299184566767, "loss": 1.2268, "step": 92500 }, { "epoch": 2.7632639656047533, "grad_norm": 4.426408290863037, "learning_rate": 0.00017355870604974887, "loss": 1.2737, "step": 92550 }, { "epoch": 2.7647568148568356, "grad_norm": 4.831822395324707, "learning_rate": 0.00017354442025383003, "loss": 1.3036, "step": 92600 }, { "epoch": 2.7662496641089183, "grad_norm": 5.008368015289307, "learning_rate": 0.0001735301344579112, "loss": 1.2846, "step": 92650 }, { "epoch": 2.767742513361001, "grad_norm": 4.761803150177002, "learning_rate": 0.00017351584866199236, "loss": 1.2715, "step": 92700 }, { "epoch": 2.7692353626130832, "grad_norm": 4.651097774505615, "learning_rate": 0.00017350156286607352, "loss": 1.2337, "step": 92750 }, { "epoch": 2.770728211865166, "grad_norm": 4.635573387145996, "learning_rate": 0.0001734872770701547, "loss": 1.3133, "step": 92800 }, { "epoch": 2.772221061117248, "grad_norm": 4.60062313079834, "learning_rate": 0.00017347299127423585, "loss": 1.2319, "step": 92850 }, { "epoch": 2.773713910369331, "grad_norm": 5.222136497497559, "learning_rate": 0.00017345870547831702, "loss": 1.3058, "step": 92900 }, { "epoch": 2.7752067596214136, "grad_norm": 5.110498428344727, "learning_rate": 0.00017344441968239818, "loss": 1.2217, "step": 92950 }, { "epoch": 2.776699608873496, "grad_norm": 4.382105350494385, "learning_rate": 0.00017343013388647935, "loss": 1.2964, "step": 93000 }, { "epoch": 2.7781924581255786, "grad_norm": 4.167068958282471, "learning_rate": 0.00017341584809056054, "loss": 1.3275, "step": 93050 }, { "epoch": 2.779685307377661, "grad_norm": 2.9642536640167236, "learning_rate": 0.00017340156229464168, "loss": 1.3143, "step": 93100 }, { "epoch": 2.7811781566297435, "grad_norm": 3.9589407444000244, "learning_rate": 0.00017338727649872287, "loss": 1.2388, "step": 93150 }, { "epoch": 2.7826710058818263, "grad_norm": 4.6660051345825195, "learning_rate": 0.000173372990702804, "loss": 1.3026, "step": 93200 }, { "epoch": 2.7841638551339085, "grad_norm": 4.213698387145996, "learning_rate": 0.0001733587049068852, "loss": 1.3192, "step": 93250 }, { "epoch": 2.7856567043859912, "grad_norm": 4.1265645027160645, "learning_rate": 0.00017334441911096636, "loss": 1.2899, "step": 93300 }, { "epoch": 2.7871495536380735, "grad_norm": 5.5079545974731445, "learning_rate": 0.00017333013331504753, "loss": 1.2968, "step": 93350 }, { "epoch": 2.788642402890156, "grad_norm": 4.825951099395752, "learning_rate": 0.0001733158475191287, "loss": 1.2432, "step": 93400 }, { "epoch": 2.790135252142239, "grad_norm": 3.7230324745178223, "learning_rate": 0.00017330156172320986, "loss": 1.385, "step": 93450 }, { "epoch": 2.791628101394321, "grad_norm": 4.764435291290283, "learning_rate": 0.00017328727592729102, "loss": 1.2362, "step": 93500 }, { "epoch": 2.793120950646404, "grad_norm": 5.750154972076416, "learning_rate": 0.00017327299013137219, "loss": 1.2868, "step": 93550 }, { "epoch": 2.794613799898486, "grad_norm": 3.704388380050659, "learning_rate": 0.00017325870433545335, "loss": 1.2839, "step": 93600 }, { "epoch": 2.796106649150569, "grad_norm": 5.4798407554626465, "learning_rate": 0.00017324441853953452, "loss": 1.3227, "step": 93650 }, { "epoch": 2.7975994984026515, "grad_norm": 4.121763229370117, "learning_rate": 0.00017323013274361568, "loss": 1.2249, "step": 93700 }, { "epoch": 2.799092347654734, "grad_norm": 4.939391136169434, "learning_rate": 0.00017321584694769684, "loss": 1.3017, "step": 93750 }, { "epoch": 2.8005851969068165, "grad_norm": 4.476954936981201, "learning_rate": 0.000173201561151778, "loss": 1.2851, "step": 93800 }, { "epoch": 2.8020780461588988, "grad_norm": 4.639068603515625, "learning_rate": 0.0001731872753558592, "loss": 1.3026, "step": 93850 }, { "epoch": 2.8035708954109815, "grad_norm": 4.282186031341553, "learning_rate": 0.00017317298955994034, "loss": 1.2374, "step": 93900 }, { "epoch": 2.805063744663064, "grad_norm": 5.077556610107422, "learning_rate": 0.00017315870376402153, "loss": 1.3172, "step": 93950 }, { "epoch": 2.8065565939151464, "grad_norm": 5.154215335845947, "learning_rate": 0.00017314441796810267, "loss": 1.2998, "step": 94000 }, { "epoch": 2.8080494431672287, "grad_norm": 6.019194602966309, "learning_rate": 0.00017313013217218386, "loss": 1.3121, "step": 94050 }, { "epoch": 2.8095422924193114, "grad_norm": 4.445097923278809, "learning_rate": 0.00017311584637626502, "loss": 1.2887, "step": 94100 }, { "epoch": 2.811035141671394, "grad_norm": 3.711930990219116, "learning_rate": 0.0001731015605803462, "loss": 1.3031, "step": 94150 }, { "epoch": 2.8125279909234764, "grad_norm": 7.101813316345215, "learning_rate": 0.00017308727478442735, "loss": 1.2877, "step": 94200 }, { "epoch": 2.814020840175559, "grad_norm": 5.068753719329834, "learning_rate": 0.00017307298898850852, "loss": 1.2459, "step": 94250 }, { "epoch": 2.8155136894276414, "grad_norm": 5.185253143310547, "learning_rate": 0.00017305870319258968, "loss": 1.2292, "step": 94300 }, { "epoch": 2.817006538679724, "grad_norm": 4.654400825500488, "learning_rate": 0.00017304441739667085, "loss": 1.325, "step": 94350 }, { "epoch": 2.8184993879318068, "grad_norm": 4.575232028961182, "learning_rate": 0.000173030131600752, "loss": 1.3077, "step": 94400 }, { "epoch": 2.819992237183889, "grad_norm": 5.062366485595703, "learning_rate": 0.00017301584580483318, "loss": 1.319, "step": 94450 }, { "epoch": 2.8214850864359717, "grad_norm": 3.9217934608459473, "learning_rate": 0.00017300156000891434, "loss": 1.3042, "step": 94500 }, { "epoch": 2.822977935688054, "grad_norm": 7.488821506500244, "learning_rate": 0.00017298727421299553, "loss": 1.302, "step": 94550 }, { "epoch": 2.8244707849401367, "grad_norm": 4.662975311279297, "learning_rate": 0.00017297298841707667, "loss": 1.2965, "step": 94600 }, { "epoch": 2.8259636341922194, "grad_norm": 4.872460842132568, "learning_rate": 0.00017295870262115786, "loss": 1.2729, "step": 94650 }, { "epoch": 2.8274564834443017, "grad_norm": 4.52310848236084, "learning_rate": 0.000172944416825239, "loss": 1.2872, "step": 94700 }, { "epoch": 2.8289493326963844, "grad_norm": 4.012574672698975, "learning_rate": 0.0001729301310293202, "loss": 1.2537, "step": 94750 }, { "epoch": 2.8304421819484666, "grad_norm": 5.984830379486084, "learning_rate": 0.00017291584523340133, "loss": 1.3413, "step": 94800 }, { "epoch": 2.8319350312005493, "grad_norm": 4.736281394958496, "learning_rate": 0.00017290155943748252, "loss": 1.3116, "step": 94850 }, { "epoch": 2.833427880452632, "grad_norm": 4.227638244628906, "learning_rate": 0.00017288727364156369, "loss": 1.2607, "step": 94900 }, { "epoch": 2.8349207297047143, "grad_norm": 4.373624324798584, "learning_rate": 0.00017287298784564485, "loss": 1.3173, "step": 94950 }, { "epoch": 2.836413578956797, "grad_norm": 3.9513514041900635, "learning_rate": 0.00017285870204972602, "loss": 1.2344, "step": 95000 }, { "epoch": 2.8379064282088793, "grad_norm": 4.8411664962768555, "learning_rate": 0.00017284441625380715, "loss": 1.2618, "step": 95050 }, { "epoch": 2.839399277460962, "grad_norm": 6.098468780517578, "learning_rate": 0.00017283013045788834, "loss": 1.2512, "step": 95100 }, { "epoch": 2.8408921267130447, "grad_norm": 5.991581439971924, "learning_rate": 0.00017281584466196948, "loss": 1.306, "step": 95150 }, { "epoch": 2.842384975965127, "grad_norm": 4.810219764709473, "learning_rate": 0.00017280155886605067, "loss": 1.3146, "step": 95200 }, { "epoch": 2.8438778252172097, "grad_norm": 4.411154747009277, "learning_rate": 0.00017278727307013184, "loss": 1.3104, "step": 95250 }, { "epoch": 2.845370674469292, "grad_norm": 5.2504472732543945, "learning_rate": 0.000172772987274213, "loss": 1.3055, "step": 95300 }, { "epoch": 2.8468635237213746, "grad_norm": 3.6604950428009033, "learning_rate": 0.00017275870147829417, "loss": 1.2861, "step": 95350 }, { "epoch": 2.8483563729734573, "grad_norm": 4.993444442749023, "learning_rate": 0.00017274441568237533, "loss": 1.3199, "step": 95400 }, { "epoch": 2.8498492222255396, "grad_norm": 4.28807258605957, "learning_rate": 0.0001727301298864565, "loss": 1.2676, "step": 95450 }, { "epoch": 2.8513420714776223, "grad_norm": 4.000975608825684, "learning_rate": 0.00017271584409053766, "loss": 1.2916, "step": 95500 }, { "epoch": 2.8528349207297046, "grad_norm": 3.3593053817749023, "learning_rate": 0.00017270155829461883, "loss": 1.2802, "step": 95550 }, { "epoch": 2.8543277699817873, "grad_norm": 4.179107666015625, "learning_rate": 0.0001726872724987, "loss": 1.3147, "step": 95600 }, { "epoch": 2.85582061923387, "grad_norm": 3.5861895084381104, "learning_rate": 0.00017267298670278116, "loss": 1.2653, "step": 95650 }, { "epoch": 2.8573134684859522, "grad_norm": 4.41680383682251, "learning_rate": 0.00017265870090686235, "loss": 1.2961, "step": 95700 }, { "epoch": 2.858806317738035, "grad_norm": 6.727049827575684, "learning_rate": 0.00017264441511094349, "loss": 1.2936, "step": 95750 }, { "epoch": 2.860299166990117, "grad_norm": 4.895215034484863, "learning_rate": 0.00017263012931502468, "loss": 1.2528, "step": 95800 }, { "epoch": 2.8617920162422, "grad_norm": 5.4271955490112305, "learning_rate": 0.00017261584351910581, "loss": 1.3365, "step": 95850 }, { "epoch": 2.8632848654942826, "grad_norm": 4.273880481719971, "learning_rate": 0.000172601557723187, "loss": 1.2779, "step": 95900 }, { "epoch": 2.864777714746365, "grad_norm": 4.767480850219727, "learning_rate": 0.00017258727192726814, "loss": 1.2716, "step": 95950 }, { "epoch": 2.8662705639984476, "grad_norm": 4.384129524230957, "learning_rate": 0.00017257298613134934, "loss": 1.2969, "step": 96000 }, { "epoch": 2.86776341325053, "grad_norm": 4.527589797973633, "learning_rate": 0.0001725587003354305, "loss": 1.266, "step": 96050 }, { "epoch": 2.8692562625026126, "grad_norm": 4.149820804595947, "learning_rate": 0.00017254441453951167, "loss": 1.31, "step": 96100 }, { "epoch": 2.8707491117546953, "grad_norm": 4.574423313140869, "learning_rate": 0.00017253012874359283, "loss": 1.3, "step": 96150 }, { "epoch": 2.8722419610067775, "grad_norm": 4.80661153793335, "learning_rate": 0.000172515842947674, "loss": 1.304, "step": 96200 }, { "epoch": 2.87373481025886, "grad_norm": 4.065107822418213, "learning_rate": 0.00017250155715175516, "loss": 1.2581, "step": 96250 }, { "epoch": 2.8752276595109425, "grad_norm": 4.727302551269531, "learning_rate": 0.00017248727135583632, "loss": 1.321, "step": 96300 }, { "epoch": 2.876720508763025, "grad_norm": 4.85087251663208, "learning_rate": 0.0001724729855599175, "loss": 1.3013, "step": 96350 }, { "epoch": 2.8782133580151075, "grad_norm": 5.125801086425781, "learning_rate": 0.00017245869976399865, "loss": 1.2956, "step": 96400 }, { "epoch": 2.87970620726719, "grad_norm": 4.144227504730225, "learning_rate": 0.00017244441396807982, "loss": 1.2929, "step": 96450 }, { "epoch": 2.8811990565192724, "grad_norm": 4.702892303466797, "learning_rate": 0.000172430128172161, "loss": 1.361, "step": 96500 }, { "epoch": 2.882691905771355, "grad_norm": 5.188230991363525, "learning_rate": 0.00017241584237624215, "loss": 1.2474, "step": 96550 }, { "epoch": 2.884184755023438, "grad_norm": 5.263503551483154, "learning_rate": 0.00017240155658032334, "loss": 1.2728, "step": 96600 }, { "epoch": 2.88567760427552, "grad_norm": 3.8955039978027344, "learning_rate": 0.00017238727078440448, "loss": 1.2638, "step": 96650 }, { "epoch": 2.887170453527603, "grad_norm": 4.1187238693237305, "learning_rate": 0.00017237298498848567, "loss": 1.2934, "step": 96700 }, { "epoch": 2.888663302779685, "grad_norm": 6.000136375427246, "learning_rate": 0.00017235869919256683, "loss": 1.293, "step": 96750 }, { "epoch": 2.890156152031768, "grad_norm": 5.494427680969238, "learning_rate": 0.000172344413396648, "loss": 1.331, "step": 96800 }, { "epoch": 2.8916490012838505, "grad_norm": 5.483157157897949, "learning_rate": 0.00017233012760072916, "loss": 1.3299, "step": 96850 }, { "epoch": 2.8931418505359328, "grad_norm": 4.264294147491455, "learning_rate": 0.00017231584180481033, "loss": 1.2966, "step": 96900 }, { "epoch": 2.8946346997880155, "grad_norm": 3.7987403869628906, "learning_rate": 0.0001723015560088915, "loss": 1.3028, "step": 96950 }, { "epoch": 2.8961275490400977, "grad_norm": 4.712928771972656, "learning_rate": 0.00017228727021297266, "loss": 1.2154, "step": 97000 }, { "epoch": 2.8976203982921804, "grad_norm": 5.0749640464782715, "learning_rate": 0.00017227298441705382, "loss": 1.269, "step": 97050 }, { "epoch": 2.899113247544263, "grad_norm": 4.288541316986084, "learning_rate": 0.00017225869862113499, "loss": 1.2575, "step": 97100 }, { "epoch": 2.9006060967963454, "grad_norm": 4.452602386474609, "learning_rate": 0.00017224441282521615, "loss": 1.2613, "step": 97150 }, { "epoch": 2.902098946048428, "grad_norm": 4.485718250274658, "learning_rate": 0.00017223012702929731, "loss": 1.2678, "step": 97200 }, { "epoch": 2.9035917953005104, "grad_norm": 3.692293643951416, "learning_rate": 0.00017221584123337848, "loss": 1.2929, "step": 97250 }, { "epoch": 2.905084644552593, "grad_norm": 4.699619770050049, "learning_rate": 0.00017220155543745967, "loss": 1.2601, "step": 97300 }, { "epoch": 2.906577493804676, "grad_norm": 4.477214336395264, "learning_rate": 0.0001721872696415408, "loss": 1.3272, "step": 97350 }, { "epoch": 2.908070343056758, "grad_norm": 4.274310111999512, "learning_rate": 0.000172172983845622, "loss": 1.3221, "step": 97400 }, { "epoch": 2.9095631923088408, "grad_norm": 4.423600196838379, "learning_rate": 0.00017215869804970314, "loss": 1.3161, "step": 97450 }, { "epoch": 2.911056041560923, "grad_norm": 4.693662166595459, "learning_rate": 0.00017214441225378433, "loss": 1.3374, "step": 97500 }, { "epoch": 2.9125488908130057, "grad_norm": 5.746738910675049, "learning_rate": 0.0001721301264578655, "loss": 1.3171, "step": 97550 }, { "epoch": 2.9140417400650884, "grad_norm": 5.049290657043457, "learning_rate": 0.00017211584066194666, "loss": 1.2713, "step": 97600 }, { "epoch": 2.9155345893171707, "grad_norm": 4.0549468994140625, "learning_rate": 0.00017210155486602782, "loss": 1.3214, "step": 97650 }, { "epoch": 2.9170274385692534, "grad_norm": 5.128767013549805, "learning_rate": 0.00017208726907010896, "loss": 1.2717, "step": 97700 }, { "epoch": 2.9185202878213357, "grad_norm": 5.148744106292725, "learning_rate": 0.00017207298327419015, "loss": 1.2602, "step": 97750 }, { "epoch": 2.9200131370734184, "grad_norm": 3.682936429977417, "learning_rate": 0.0001720586974782713, "loss": 1.2941, "step": 97800 }, { "epoch": 2.921505986325501, "grad_norm": 4.665682315826416, "learning_rate": 0.00017204441168235248, "loss": 1.2959, "step": 97850 }, { "epoch": 2.9229988355775833, "grad_norm": 4.523177146911621, "learning_rate": 0.00017203012588643365, "loss": 1.2755, "step": 97900 }, { "epoch": 2.924491684829666, "grad_norm": 4.719842433929443, "learning_rate": 0.0001720158400905148, "loss": 1.2523, "step": 97950 }, { "epoch": 2.9259845340817483, "grad_norm": 3.7076125144958496, "learning_rate": 0.00017200155429459598, "loss": 1.2909, "step": 98000 }, { "epoch": 2.927477383333831, "grad_norm": 8.15734577178955, "learning_rate": 0.00017198726849867714, "loss": 1.2369, "step": 98050 }, { "epoch": 2.9289702325859137, "grad_norm": 4.653754711151123, "learning_rate": 0.0001719729827027583, "loss": 1.3744, "step": 98100 }, { "epoch": 2.930463081837996, "grad_norm": 4.599735736846924, "learning_rate": 0.00017195869690683947, "loss": 1.333, "step": 98150 }, { "epoch": 2.9319559310900787, "grad_norm": 5.794803619384766, "learning_rate": 0.00017194441111092063, "loss": 1.3269, "step": 98200 }, { "epoch": 2.933448780342161, "grad_norm": 4.360086441040039, "learning_rate": 0.0001719301253150018, "loss": 1.3025, "step": 98250 }, { "epoch": 2.9349416295942437, "grad_norm": 4.956981182098389, "learning_rate": 0.00017191583951908296, "loss": 1.2484, "step": 98300 }, { "epoch": 2.9364344788463264, "grad_norm": 3.210866928100586, "learning_rate": 0.00017190155372316416, "loss": 1.2796, "step": 98350 }, { "epoch": 2.9379273280984086, "grad_norm": 5.565187931060791, "learning_rate": 0.0001718872679272453, "loss": 1.3243, "step": 98400 }, { "epoch": 2.939420177350491, "grad_norm": 4.15312385559082, "learning_rate": 0.00017187298213132649, "loss": 1.2922, "step": 98450 }, { "epoch": 2.9409130266025736, "grad_norm": 4.836455345153809, "learning_rate": 0.00017185869633540762, "loss": 1.2495, "step": 98500 }, { "epoch": 2.9424058758546563, "grad_norm": 5.803861618041992, "learning_rate": 0.00017184441053948881, "loss": 1.3052, "step": 98550 }, { "epoch": 2.9438987251067386, "grad_norm": 5.279322147369385, "learning_rate": 0.00017183012474356995, "loss": 1.299, "step": 98600 }, { "epoch": 2.9453915743588213, "grad_norm": 3.5537776947021484, "learning_rate": 0.00017181583894765114, "loss": 1.3192, "step": 98650 }, { "epoch": 2.9468844236109035, "grad_norm": 4.724547386169434, "learning_rate": 0.0001718015531517323, "loss": 1.2877, "step": 98700 }, { "epoch": 2.9483772728629862, "grad_norm": 3.863722801208496, "learning_rate": 0.00017178726735581347, "loss": 1.2713, "step": 98750 }, { "epoch": 2.949870122115069, "grad_norm": 3.8161838054656982, "learning_rate": 0.00017177298155989464, "loss": 1.2484, "step": 98800 }, { "epoch": 2.951362971367151, "grad_norm": 4.72680139541626, "learning_rate": 0.0001717586957639758, "loss": 1.3184, "step": 98850 }, { "epoch": 2.952855820619234, "grad_norm": 4.270802021026611, "learning_rate": 0.00017174440996805697, "loss": 1.2655, "step": 98900 }, { "epoch": 2.954348669871316, "grad_norm": 4.669827461242676, "learning_rate": 0.00017173012417213813, "loss": 1.3366, "step": 98950 }, { "epoch": 2.955841519123399, "grad_norm": 4.791524887084961, "learning_rate": 0.0001717158383762193, "loss": 1.2887, "step": 99000 }, { "epoch": 2.9573343683754816, "grad_norm": 5.6565423011779785, "learning_rate": 0.00017170155258030046, "loss": 1.2693, "step": 99050 }, { "epoch": 2.958827217627564, "grad_norm": 3.3838722705841064, "learning_rate": 0.00017168726678438163, "loss": 1.3281, "step": 99100 }, { "epoch": 2.9603200668796466, "grad_norm": 5.198723793029785, "learning_rate": 0.00017167298098846282, "loss": 1.2931, "step": 99150 }, { "epoch": 2.961812916131729, "grad_norm": 3.6596312522888184, "learning_rate": 0.00017165869519254395, "loss": 1.235, "step": 99200 }, { "epoch": 2.9633057653838115, "grad_norm": 4.816101551055908, "learning_rate": 0.00017164440939662515, "loss": 1.3069, "step": 99250 }, { "epoch": 2.9647986146358942, "grad_norm": 4.188026428222656, "learning_rate": 0.00017163012360070628, "loss": 1.3104, "step": 99300 }, { "epoch": 2.9662914638879765, "grad_norm": 4.76021671295166, "learning_rate": 0.00017161583780478748, "loss": 1.3125, "step": 99350 }, { "epoch": 2.967784313140059, "grad_norm": 4.8385138511657715, "learning_rate": 0.00017160155200886861, "loss": 1.2871, "step": 99400 }, { "epoch": 2.9692771623921415, "grad_norm": 10.232873916625977, "learning_rate": 0.0001715872662129498, "loss": 1.2901, "step": 99450 }, { "epoch": 2.970770011644224, "grad_norm": 4.217905521392822, "learning_rate": 0.00017157298041703097, "loss": 1.3533, "step": 99500 }, { "epoch": 2.972262860896307, "grad_norm": 4.916757583618164, "learning_rate": 0.00017155869462111213, "loss": 1.3062, "step": 99550 }, { "epoch": 2.973755710148389, "grad_norm": 4.588007926940918, "learning_rate": 0.0001715444088251933, "loss": 1.2755, "step": 99600 }, { "epoch": 2.975248559400472, "grad_norm": 7.835936546325684, "learning_rate": 0.00017153012302927446, "loss": 1.2472, "step": 99650 }, { "epoch": 2.976741408652554, "grad_norm": 3.825573444366455, "learning_rate": 0.00017151583723335563, "loss": 1.252, "step": 99700 }, { "epoch": 2.978234257904637, "grad_norm": 4.329678535461426, "learning_rate": 0.0001715015514374368, "loss": 1.2689, "step": 99750 }, { "epoch": 2.9797271071567195, "grad_norm": 5.514796733856201, "learning_rate": 0.00017148726564151796, "loss": 1.2776, "step": 99800 }, { "epoch": 2.9812199564088018, "grad_norm": 5.769738674163818, "learning_rate": 0.00017147297984559912, "loss": 1.2717, "step": 99850 }, { "epoch": 2.9827128056608845, "grad_norm": 5.01526403427124, "learning_rate": 0.0001714586940496803, "loss": 1.2571, "step": 99900 }, { "epoch": 2.9842056549129667, "grad_norm": 4.526731491088867, "learning_rate": 0.00017144440825376148, "loss": 1.3401, "step": 99950 }, { "epoch": 2.9856985041650494, "grad_norm": 3.969651222229004, "learning_rate": 0.00017143012245784262, "loss": 1.2814, "step": 100000 }, { "epoch": 2.987191353417132, "grad_norm": 4.796939849853516, "learning_rate": 0.0001714158366619238, "loss": 1.286, "step": 100050 }, { "epoch": 2.9886842026692144, "grad_norm": 5.400031089782715, "learning_rate": 0.00017140155086600495, "loss": 1.2939, "step": 100100 }, { "epoch": 2.990177051921297, "grad_norm": 4.253983497619629, "learning_rate": 0.00017138726507008614, "loss": 1.2753, "step": 100150 }, { "epoch": 2.9916699011733794, "grad_norm": 4.808609485626221, "learning_rate": 0.0001713729792741673, "loss": 1.3019, "step": 100200 }, { "epoch": 2.993162750425462, "grad_norm": 4.482404708862305, "learning_rate": 0.00017135869347824844, "loss": 1.2642, "step": 100250 }, { "epoch": 2.994655599677545, "grad_norm": 5.035706043243408, "learning_rate": 0.00017134440768232963, "loss": 1.2927, "step": 100300 }, { "epoch": 2.996148448929627, "grad_norm": 4.565020561218262, "learning_rate": 0.00017133012188641077, "loss": 1.3068, "step": 100350 }, { "epoch": 2.9976412981817098, "grad_norm": 5.071930408477783, "learning_rate": 0.00017131583609049196, "loss": 1.3127, "step": 100400 }, { "epoch": 2.999134147433792, "grad_norm": 5.127132415771484, "learning_rate": 0.0001713015502945731, "loss": 1.3057, "step": 100450 }, { "epoch": 3.0006269966858747, "grad_norm": 5.550358772277832, "learning_rate": 0.0001712872644986543, "loss": 1.2978, "step": 100500 }, { "epoch": 3.002119845937957, "grad_norm": 5.580805778503418, "learning_rate": 0.00017127297870273545, "loss": 1.2057, "step": 100550 }, { "epoch": 3.0036126951900397, "grad_norm": 3.326228618621826, "learning_rate": 0.00017125869290681662, "loss": 1.1558, "step": 100600 }, { "epoch": 3.0051055444421224, "grad_norm": 4.807900905609131, "learning_rate": 0.00017124440711089778, "loss": 1.1667, "step": 100650 }, { "epoch": 3.0065983936942047, "grad_norm": 4.419902801513672, "learning_rate": 0.00017123012131497895, "loss": 1.1765, "step": 100700 }, { "epoch": 3.0080912429462874, "grad_norm": 4.567293643951416, "learning_rate": 0.0001712158355190601, "loss": 1.2552, "step": 100750 }, { "epoch": 3.0095840921983696, "grad_norm": 3.129995346069336, "learning_rate": 0.00017120154972314128, "loss": 1.2222, "step": 100800 }, { "epoch": 3.0110769414504523, "grad_norm": 4.515715599060059, "learning_rate": 0.00017118726392722244, "loss": 1.2794, "step": 100850 }, { "epoch": 3.012569790702535, "grad_norm": 4.2073283195495605, "learning_rate": 0.0001711729781313036, "loss": 1.2108, "step": 100900 }, { "epoch": 3.0140626399546173, "grad_norm": 4.649296760559082, "learning_rate": 0.00017115869233538477, "loss": 1.2171, "step": 100950 }, { "epoch": 3.0155554892067, "grad_norm": 5.268732070922852, "learning_rate": 0.00017114440653946596, "loss": 1.2566, "step": 101000 }, { "epoch": 3.0170483384587823, "grad_norm": 4.506618976593018, "learning_rate": 0.0001711301207435471, "loss": 1.1528, "step": 101050 }, { "epoch": 3.018541187710865, "grad_norm": 5.6522016525268555, "learning_rate": 0.0001711158349476283, "loss": 1.2399, "step": 101100 }, { "epoch": 3.0200340369629477, "grad_norm": 4.122422695159912, "learning_rate": 0.00017110154915170943, "loss": 1.2239, "step": 101150 }, { "epoch": 3.02152688621503, "grad_norm": 3.759425401687622, "learning_rate": 0.00017108726335579062, "loss": 1.2463, "step": 101200 }, { "epoch": 3.0230197354671127, "grad_norm": 3.6985554695129395, "learning_rate": 0.00017107297755987176, "loss": 1.1758, "step": 101250 }, { "epoch": 3.024512584719195, "grad_norm": 4.267302513122559, "learning_rate": 0.00017105869176395295, "loss": 1.1626, "step": 101300 }, { "epoch": 3.0260054339712776, "grad_norm": 4.210064888000488, "learning_rate": 0.00017104440596803412, "loss": 1.1989, "step": 101350 }, { "epoch": 3.02749828322336, "grad_norm": 4.68282413482666, "learning_rate": 0.00017103012017211528, "loss": 1.1906, "step": 101400 }, { "epoch": 3.0289911324754426, "grad_norm": 4.420328617095947, "learning_rate": 0.00017101583437619645, "loss": 1.2236, "step": 101450 }, { "epoch": 3.0304839817275253, "grad_norm": 4.2961931228637695, "learning_rate": 0.0001710015485802776, "loss": 1.159, "step": 101500 }, { "epoch": 3.0319768309796076, "grad_norm": 5.494664669036865, "learning_rate": 0.00017098726278435878, "loss": 1.2286, "step": 101550 }, { "epoch": 3.0334696802316903, "grad_norm": 4.834953308105469, "learning_rate": 0.00017097297698843994, "loss": 1.1724, "step": 101600 }, { "epoch": 3.0349625294837725, "grad_norm": 4.731471538543701, "learning_rate": 0.0001709586911925211, "loss": 1.2089, "step": 101650 }, { "epoch": 3.0364553787358552, "grad_norm": 3.613417863845825, "learning_rate": 0.00017094440539660227, "loss": 1.2864, "step": 101700 }, { "epoch": 3.037948227987938, "grad_norm": 4.939467430114746, "learning_rate": 0.00017093011960068343, "loss": 1.2509, "step": 101750 }, { "epoch": 3.03944107724002, "grad_norm": 4.377845287322998, "learning_rate": 0.00017091583380476463, "loss": 1.2377, "step": 101800 }, { "epoch": 3.040933926492103, "grad_norm": 5.249380111694336, "learning_rate": 0.00017090154800884576, "loss": 1.1936, "step": 101850 }, { "epoch": 3.042426775744185, "grad_norm": 5.340421676635742, "learning_rate": 0.00017088726221292695, "loss": 1.2575, "step": 101900 }, { "epoch": 3.043919624996268, "grad_norm": 4.105010986328125, "learning_rate": 0.0001708729764170081, "loss": 1.1913, "step": 101950 }, { "epoch": 3.0454124742483506, "grad_norm": 4.164525032043457, "learning_rate": 0.00017085869062108928, "loss": 1.2782, "step": 102000 }, { "epoch": 3.046905323500433, "grad_norm": 5.213939666748047, "learning_rate": 0.00017084440482517042, "loss": 1.2888, "step": 102050 }, { "epoch": 3.0483981727525156, "grad_norm": 6.272274494171143, "learning_rate": 0.0001708301190292516, "loss": 1.1914, "step": 102100 }, { "epoch": 3.049891022004598, "grad_norm": 4.293304443359375, "learning_rate": 0.00017081583323333278, "loss": 1.2037, "step": 102150 }, { "epoch": 3.0513838712566805, "grad_norm": 4.839235305786133, "learning_rate": 0.00017080154743741394, "loss": 1.2241, "step": 102200 }, { "epoch": 3.0528767205087632, "grad_norm": 5.0241498947143555, "learning_rate": 0.0001707872616414951, "loss": 1.2341, "step": 102250 }, { "epoch": 3.0543695697608455, "grad_norm": 6.890449047088623, "learning_rate": 0.00017077297584557627, "loss": 1.2411, "step": 102300 }, { "epoch": 3.055862419012928, "grad_norm": 5.459249496459961, "learning_rate": 0.00017075869004965744, "loss": 1.2553, "step": 102350 }, { "epoch": 3.0573552682650105, "grad_norm": 4.772298336029053, "learning_rate": 0.0001707444042537386, "loss": 1.1836, "step": 102400 }, { "epoch": 3.058848117517093, "grad_norm": 4.730257511138916, "learning_rate": 0.00017073011845781977, "loss": 1.2091, "step": 102450 }, { "epoch": 3.0603409667691754, "grad_norm": 4.203242301940918, "learning_rate": 0.00017071583266190093, "loss": 1.2106, "step": 102500 }, { "epoch": 3.061833816021258, "grad_norm": 5.499764442443848, "learning_rate": 0.0001707015468659821, "loss": 1.2492, "step": 102550 }, { "epoch": 3.063326665273341, "grad_norm": 4.593760967254639, "learning_rate": 0.0001706872610700633, "loss": 1.2849, "step": 102600 }, { "epoch": 3.064819514525423, "grad_norm": 4.561386585235596, "learning_rate": 0.00017067297527414442, "loss": 1.2404, "step": 102650 }, { "epoch": 3.066312363777506, "grad_norm": 5.402337551116943, "learning_rate": 0.00017065868947822562, "loss": 1.1952, "step": 102700 }, { "epoch": 3.067805213029588, "grad_norm": 5.212750434875488, "learning_rate": 0.00017064440368230675, "loss": 1.2505, "step": 102750 }, { "epoch": 3.069298062281671, "grad_norm": 5.780417442321777, "learning_rate": 0.00017063011788638795, "loss": 1.2653, "step": 102800 }, { "epoch": 3.0707909115337535, "grad_norm": 5.937560081481934, "learning_rate": 0.00017061583209046908, "loss": 1.2068, "step": 102850 }, { "epoch": 3.0722837607858358, "grad_norm": 4.921445369720459, "learning_rate": 0.00017060154629455025, "loss": 1.1634, "step": 102900 }, { "epoch": 3.0737766100379185, "grad_norm": 5.346931457519531, "learning_rate": 0.00017058726049863144, "loss": 1.2136, "step": 102950 }, { "epoch": 3.0752694592900007, "grad_norm": 4.446613788604736, "learning_rate": 0.00017057297470271258, "loss": 1.1925, "step": 103000 }, { "epoch": 3.0767623085420834, "grad_norm": 4.870328903198242, "learning_rate": 0.00017055868890679377, "loss": 1.1855, "step": 103050 }, { "epoch": 3.078255157794166, "grad_norm": 5.902019500732422, "learning_rate": 0.0001705444031108749, "loss": 1.2081, "step": 103100 }, { "epoch": 3.0797480070462484, "grad_norm": 4.221609592437744, "learning_rate": 0.0001705301173149561, "loss": 1.2125, "step": 103150 }, { "epoch": 3.081240856298331, "grad_norm": 4.263118267059326, "learning_rate": 0.00017051583151903726, "loss": 1.2164, "step": 103200 }, { "epoch": 3.0827337055504134, "grad_norm": 7.199416160583496, "learning_rate": 0.00017050154572311843, "loss": 1.2191, "step": 103250 }, { "epoch": 3.084226554802496, "grad_norm": 5.3002119064331055, "learning_rate": 0.0001704872599271996, "loss": 1.2828, "step": 103300 }, { "epoch": 3.0857194040545783, "grad_norm": 4.3020405769348145, "learning_rate": 0.00017047297413128076, "loss": 1.2341, "step": 103350 }, { "epoch": 3.087212253306661, "grad_norm": 5.1998443603515625, "learning_rate": 0.00017045868833536192, "loss": 1.2271, "step": 103400 }, { "epoch": 3.0887051025587438, "grad_norm": 4.382910251617432, "learning_rate": 0.00017044440253944309, "loss": 1.1994, "step": 103450 }, { "epoch": 3.090197951810826, "grad_norm": 5.094580173492432, "learning_rate": 0.00017043011674352425, "loss": 1.2557, "step": 103500 }, { "epoch": 3.0916908010629087, "grad_norm": 3.9512152671813965, "learning_rate": 0.00017041583094760542, "loss": 1.283, "step": 103550 }, { "epoch": 3.093183650314991, "grad_norm": 6.366419792175293, "learning_rate": 0.00017040154515168658, "loss": 1.2327, "step": 103600 }, { "epoch": 3.0946764995670737, "grad_norm": 3.0555920600891113, "learning_rate": 0.00017038725935576777, "loss": 1.1865, "step": 103650 }, { "epoch": 3.0961693488191564, "grad_norm": 5.861559867858887, "learning_rate": 0.0001703729735598489, "loss": 1.2394, "step": 103700 }, { "epoch": 3.0976621980712387, "grad_norm": 4.790325164794922, "learning_rate": 0.0001703586877639301, "loss": 1.2256, "step": 103750 }, { "epoch": 3.0991550473233214, "grad_norm": 4.111683368682861, "learning_rate": 0.00017034440196801124, "loss": 1.2228, "step": 103800 }, { "epoch": 3.1006478965754036, "grad_norm": 4.152289390563965, "learning_rate": 0.00017033011617209243, "loss": 1.2547, "step": 103850 }, { "epoch": 3.1021407458274863, "grad_norm": 5.741196155548096, "learning_rate": 0.00017031583037617357, "loss": 1.2049, "step": 103900 }, { "epoch": 3.103633595079569, "grad_norm": 6.526316165924072, "learning_rate": 0.00017030154458025476, "loss": 1.2845, "step": 103950 }, { "epoch": 3.1051264443316513, "grad_norm": 4.736927509307861, "learning_rate": 0.00017028725878433592, "loss": 1.2777, "step": 104000 }, { "epoch": 3.106619293583734, "grad_norm": 4.231018543243408, "learning_rate": 0.0001702729729884171, "loss": 1.2387, "step": 104050 }, { "epoch": 3.1081121428358163, "grad_norm": 4.244640350341797, "learning_rate": 0.00017025868719249825, "loss": 1.1534, "step": 104100 }, { "epoch": 3.109604992087899, "grad_norm": 4.443108558654785, "learning_rate": 0.00017024440139657942, "loss": 1.198, "step": 104150 }, { "epoch": 3.1110978413399817, "grad_norm": 4.030060768127441, "learning_rate": 0.00017023011560066058, "loss": 1.2005, "step": 104200 }, { "epoch": 3.112590690592064, "grad_norm": 4.423096179962158, "learning_rate": 0.00017021582980474175, "loss": 1.2502, "step": 104250 }, { "epoch": 3.1140835398441467, "grad_norm": 4.914687156677246, "learning_rate": 0.0001702015440088229, "loss": 1.2291, "step": 104300 }, { "epoch": 3.115576389096229, "grad_norm": 6.267758369445801, "learning_rate": 0.00017018725821290408, "loss": 1.2694, "step": 104350 }, { "epoch": 3.1170692383483116, "grad_norm": 4.870142936706543, "learning_rate": 0.00017017297241698524, "loss": 1.1742, "step": 104400 }, { "epoch": 3.1185620876003943, "grad_norm": 4.136995792388916, "learning_rate": 0.00017015868662106643, "loss": 1.2683, "step": 104450 }, { "epoch": 3.1200549368524766, "grad_norm": 3.877889633178711, "learning_rate": 0.00017014440082514757, "loss": 1.2226, "step": 104500 }, { "epoch": 3.1215477861045593, "grad_norm": 5.544692039489746, "learning_rate": 0.00017013011502922876, "loss": 1.2163, "step": 104550 }, { "epoch": 3.1230406353566416, "grad_norm": 4.616161346435547, "learning_rate": 0.0001701158292333099, "loss": 1.2569, "step": 104600 }, { "epoch": 3.1245334846087243, "grad_norm": 4.393960475921631, "learning_rate": 0.0001701015434373911, "loss": 1.277, "step": 104650 }, { "epoch": 3.126026333860807, "grad_norm": 5.330219268798828, "learning_rate": 0.00017008725764147223, "loss": 1.1929, "step": 104700 }, { "epoch": 3.1275191831128892, "grad_norm": 5.1908979415893555, "learning_rate": 0.00017007297184555342, "loss": 1.2356, "step": 104750 }, { "epoch": 3.129012032364972, "grad_norm": 3.941892623901367, "learning_rate": 0.00017005868604963459, "loss": 1.1774, "step": 104800 }, { "epoch": 3.130504881617054, "grad_norm": 5.030502796173096, "learning_rate": 0.00017004440025371575, "loss": 1.2153, "step": 104850 }, { "epoch": 3.131997730869137, "grad_norm": 3.437596082687378, "learning_rate": 0.00017003011445779692, "loss": 1.2438, "step": 104900 }, { "epoch": 3.133490580121219, "grad_norm": 4.600546836853027, "learning_rate": 0.00017001582866187808, "loss": 1.2477, "step": 104950 }, { "epoch": 3.134983429373302, "grad_norm": 7.366837501525879, "learning_rate": 0.00017000154286595924, "loss": 1.2411, "step": 105000 }, { "epoch": 3.1364762786253846, "grad_norm": 4.779984474182129, "learning_rate": 0.0001699872570700404, "loss": 1.1974, "step": 105050 }, { "epoch": 3.137969127877467, "grad_norm": 3.6350603103637695, "learning_rate": 0.00016997297127412157, "loss": 1.2811, "step": 105100 }, { "epoch": 3.1394619771295496, "grad_norm": 3.3783535957336426, "learning_rate": 0.00016995868547820274, "loss": 1.2377, "step": 105150 }, { "epoch": 3.140954826381632, "grad_norm": 5.765346527099609, "learning_rate": 0.0001699443996822839, "loss": 1.2648, "step": 105200 }, { "epoch": 3.1424476756337145, "grad_norm": 6.171380996704102, "learning_rate": 0.0001699301138863651, "loss": 1.2653, "step": 105250 }, { "epoch": 3.1439405248857972, "grad_norm": 4.658510684967041, "learning_rate": 0.00016991582809044623, "loss": 1.2838, "step": 105300 }, { "epoch": 3.1454333741378795, "grad_norm": 4.044935703277588, "learning_rate": 0.00016990154229452742, "loss": 1.2807, "step": 105350 }, { "epoch": 3.146926223389962, "grad_norm": 3.6364893913269043, "learning_rate": 0.00016988725649860856, "loss": 1.2631, "step": 105400 }, { "epoch": 3.1484190726420445, "grad_norm": 4.018857955932617, "learning_rate": 0.00016987297070268975, "loss": 1.2301, "step": 105450 }, { "epoch": 3.149911921894127, "grad_norm": 4.4695725440979, "learning_rate": 0.0001698586849067709, "loss": 1.2299, "step": 105500 }, { "epoch": 3.1514047711462094, "grad_norm": 3.9617760181427, "learning_rate": 0.00016984439911085206, "loss": 1.2364, "step": 105550 }, { "epoch": 3.152897620398292, "grad_norm": 4.522684574127197, "learning_rate": 0.00016983011331493325, "loss": 1.2737, "step": 105600 }, { "epoch": 3.154390469650375, "grad_norm": 4.221574783325195, "learning_rate": 0.00016981582751901439, "loss": 1.2214, "step": 105650 }, { "epoch": 3.155883318902457, "grad_norm": 4.848055839538574, "learning_rate": 0.00016980154172309558, "loss": 1.1756, "step": 105700 }, { "epoch": 3.15737616815454, "grad_norm": 4.364899635314941, "learning_rate": 0.00016978725592717671, "loss": 1.2188, "step": 105750 }, { "epoch": 3.158869017406622, "grad_norm": 3.952366352081299, "learning_rate": 0.0001697729701312579, "loss": 1.2422, "step": 105800 }, { "epoch": 3.1603618666587048, "grad_norm": 4.575440883636475, "learning_rate": 0.00016975868433533907, "loss": 1.2711, "step": 105850 }, { "epoch": 3.1618547159107875, "grad_norm": 5.710911273956299, "learning_rate": 0.00016974439853942024, "loss": 1.1996, "step": 105900 }, { "epoch": 3.1633475651628697, "grad_norm": 3.885944128036499, "learning_rate": 0.0001697301127435014, "loss": 1.2633, "step": 105950 }, { "epoch": 3.1648404144149525, "grad_norm": 4.504218578338623, "learning_rate": 0.00016971582694758256, "loss": 1.2028, "step": 106000 }, { "epoch": 3.1663332636670347, "grad_norm": 5.381597995758057, "learning_rate": 0.00016970154115166373, "loss": 1.2445, "step": 106050 }, { "epoch": 3.1678261129191174, "grad_norm": 3.7343199253082275, "learning_rate": 0.0001696872553557449, "loss": 1.2553, "step": 106100 }, { "epoch": 3.1693189621712, "grad_norm": 4.142834186553955, "learning_rate": 0.00016967296955982606, "loss": 1.2319, "step": 106150 }, { "epoch": 3.1708118114232824, "grad_norm": 3.856889486312866, "learning_rate": 0.00016965868376390722, "loss": 1.2911, "step": 106200 }, { "epoch": 3.172304660675365, "grad_norm": 4.8363938331604, "learning_rate": 0.0001696443979679884, "loss": 1.2712, "step": 106250 }, { "epoch": 3.1737975099274474, "grad_norm": 5.556827068328857, "learning_rate": 0.00016963011217206955, "loss": 1.2295, "step": 106300 }, { "epoch": 3.17529035917953, "grad_norm": 4.371210098266602, "learning_rate": 0.00016961582637615072, "loss": 1.2745, "step": 106350 }, { "epoch": 3.1767832084316128, "grad_norm": 4.789484024047852, "learning_rate": 0.0001696015405802319, "loss": 1.232, "step": 106400 }, { "epoch": 3.178276057683695, "grad_norm": 3.9675486087799072, "learning_rate": 0.00016958725478431305, "loss": 1.2131, "step": 106450 }, { "epoch": 3.1797689069357777, "grad_norm": 6.18342924118042, "learning_rate": 0.00016957296898839424, "loss": 1.2337, "step": 106500 }, { "epoch": 3.18126175618786, "grad_norm": 3.3635101318359375, "learning_rate": 0.00016955868319247538, "loss": 1.2632, "step": 106550 }, { "epoch": 3.1827546054399427, "grad_norm": 3.658357620239258, "learning_rate": 0.00016954439739655657, "loss": 1.2306, "step": 106600 }, { "epoch": 3.1842474546920254, "grad_norm": 5.325497627258301, "learning_rate": 0.00016953011160063773, "loss": 1.2381, "step": 106650 }, { "epoch": 3.1857403039441077, "grad_norm": 5.2231268882751465, "learning_rate": 0.0001695158258047189, "loss": 1.2236, "step": 106700 }, { "epoch": 3.1872331531961904, "grad_norm": 5.6110334396362305, "learning_rate": 0.00016950154000880006, "loss": 1.2479, "step": 106750 }, { "epoch": 3.1887260024482726, "grad_norm": 4.795280933380127, "learning_rate": 0.00016948725421288123, "loss": 1.1846, "step": 106800 }, { "epoch": 3.1902188517003554, "grad_norm": 5.203749179840088, "learning_rate": 0.0001694729684169624, "loss": 1.2531, "step": 106850 }, { "epoch": 3.191711700952438, "grad_norm": 6.236663818359375, "learning_rate": 0.00016945868262104356, "loss": 1.1977, "step": 106900 }, { "epoch": 3.1932045502045203, "grad_norm": 4.13303279876709, "learning_rate": 0.00016944439682512472, "loss": 1.2418, "step": 106950 }, { "epoch": 3.194697399456603, "grad_norm": 6.106944561004639, "learning_rate": 0.00016943011102920589, "loss": 1.1926, "step": 107000 }, { "epoch": 3.1961902487086853, "grad_norm": 4.1147332191467285, "learning_rate": 0.00016941582523328705, "loss": 1.2549, "step": 107050 }, { "epoch": 3.197683097960768, "grad_norm": 4.951442718505859, "learning_rate": 0.00016940153943736824, "loss": 1.2964, "step": 107100 }, { "epoch": 3.1991759472128503, "grad_norm": 6.196277141571045, "learning_rate": 0.00016938725364144938, "loss": 1.2541, "step": 107150 }, { "epoch": 3.200668796464933, "grad_norm": 5.174891948699951, "learning_rate": 0.00016937296784553057, "loss": 1.238, "step": 107200 }, { "epoch": 3.2021616457170157, "grad_norm": 6.037936687469482, "learning_rate": 0.0001693586820496117, "loss": 1.2919, "step": 107250 }, { "epoch": 3.203654494969098, "grad_norm": 4.104307651519775, "learning_rate": 0.0001693443962536929, "loss": 1.178, "step": 107300 }, { "epoch": 3.2051473442211806, "grad_norm": 4.1462273597717285, "learning_rate": 0.00016933011045777404, "loss": 1.2468, "step": 107350 }, { "epoch": 3.206640193473263, "grad_norm": 3.701998233795166, "learning_rate": 0.00016931582466185523, "loss": 1.2262, "step": 107400 }, { "epoch": 3.2081330427253456, "grad_norm": 4.66901159286499, "learning_rate": 0.0001693015388659364, "loss": 1.2413, "step": 107450 }, { "epoch": 3.2096258919774283, "grad_norm": 5.96376371383667, "learning_rate": 0.00016928725307001756, "loss": 1.1554, "step": 107500 }, { "epoch": 3.2111187412295106, "grad_norm": 3.801471710205078, "learning_rate": 0.00016927296727409872, "loss": 1.2821, "step": 107550 }, { "epoch": 3.2126115904815933, "grad_norm": 5.026028633117676, "learning_rate": 0.0001692586814781799, "loss": 1.2307, "step": 107600 }, { "epoch": 3.2141044397336755, "grad_norm": 5.475889682769775, "learning_rate": 0.00016924439568226105, "loss": 1.2, "step": 107650 }, { "epoch": 3.2155972889857583, "grad_norm": 6.249619007110596, "learning_rate": 0.00016923010988634222, "loss": 1.2448, "step": 107700 }, { "epoch": 3.2170901382378405, "grad_norm": 4.803418159484863, "learning_rate": 0.00016921582409042338, "loss": 1.2669, "step": 107750 }, { "epoch": 3.218582987489923, "grad_norm": 3.9626855850219727, "learning_rate": 0.00016920153829450455, "loss": 1.2167, "step": 107800 }, { "epoch": 3.220075836742006, "grad_norm": 3.5219650268554688, "learning_rate": 0.0001691872524985857, "loss": 1.2554, "step": 107850 }, { "epoch": 3.221568685994088, "grad_norm": 4.510613441467285, "learning_rate": 0.0001691729667026669, "loss": 1.1923, "step": 107900 }, { "epoch": 3.223061535246171, "grad_norm": 5.354753494262695, "learning_rate": 0.00016915868090674804, "loss": 1.1883, "step": 107950 }, { "epoch": 3.224554384498253, "grad_norm": 4.148642063140869, "learning_rate": 0.00016914439511082923, "loss": 1.2497, "step": 108000 }, { "epoch": 3.226047233750336, "grad_norm": 4.1666646003723145, "learning_rate": 0.00016913010931491037, "loss": 1.1872, "step": 108050 }, { "epoch": 3.2275400830024186, "grad_norm": 4.137986183166504, "learning_rate": 0.00016911582351899156, "loss": 1.2788, "step": 108100 }, { "epoch": 3.229032932254501, "grad_norm": 3.901366710662842, "learning_rate": 0.0001691015377230727, "loss": 1.2651, "step": 108150 }, { "epoch": 3.2305257815065835, "grad_norm": 3.96573543548584, "learning_rate": 0.00016908725192715386, "loss": 1.2277, "step": 108200 }, { "epoch": 3.232018630758666, "grad_norm": 4.226304054260254, "learning_rate": 0.00016907296613123506, "loss": 1.2792, "step": 108250 }, { "epoch": 3.2335114800107485, "grad_norm": 5.907839775085449, "learning_rate": 0.0001690586803353162, "loss": 1.2123, "step": 108300 }, { "epoch": 3.235004329262831, "grad_norm": 3.7594401836395264, "learning_rate": 0.00016904439453939738, "loss": 1.1956, "step": 108350 }, { "epoch": 3.2364971785149135, "grad_norm": 3.6791820526123047, "learning_rate": 0.00016903010874347852, "loss": 1.2103, "step": 108400 }, { "epoch": 3.237990027766996, "grad_norm": 4.249289512634277, "learning_rate": 0.00016901582294755971, "loss": 1.2348, "step": 108450 }, { "epoch": 3.2394828770190784, "grad_norm": 5.203214645385742, "learning_rate": 0.00016900153715164085, "loss": 1.2122, "step": 108500 }, { "epoch": 3.240975726271161, "grad_norm": 5.2223591804504395, "learning_rate": 0.00016898725135572204, "loss": 1.1984, "step": 108550 }, { "epoch": 3.242468575523244, "grad_norm": 3.9512999057769775, "learning_rate": 0.0001689729655598032, "loss": 1.2791, "step": 108600 }, { "epoch": 3.243961424775326, "grad_norm": 5.953644752502441, "learning_rate": 0.00016895867976388437, "loss": 1.2582, "step": 108650 }, { "epoch": 3.245454274027409, "grad_norm": 4.990479469299316, "learning_rate": 0.00016894439396796554, "loss": 1.2594, "step": 108700 }, { "epoch": 3.246947123279491, "grad_norm": 4.3011651039123535, "learning_rate": 0.0001689301081720467, "loss": 1.2351, "step": 108750 }, { "epoch": 3.248439972531574, "grad_norm": 4.554538249969482, "learning_rate": 0.00016891582237612787, "loss": 1.2482, "step": 108800 }, { "epoch": 3.2499328217836565, "grad_norm": 4.715415000915527, "learning_rate": 0.00016890153658020903, "loss": 1.2691, "step": 108850 }, { "epoch": 3.2514256710357388, "grad_norm": 5.759798049926758, "learning_rate": 0.0001688872507842902, "loss": 1.2224, "step": 108900 }, { "epoch": 3.2529185202878215, "grad_norm": 3.7031774520874023, "learning_rate": 0.00016887296498837136, "loss": 1.2166, "step": 108950 }, { "epoch": 3.2544113695399037, "grad_norm": 5.22156286239624, "learning_rate": 0.00016885867919245253, "loss": 1.2465, "step": 109000 }, { "epoch": 3.2559042187919864, "grad_norm": 5.56991720199585, "learning_rate": 0.00016884439339653372, "loss": 1.2274, "step": 109050 }, { "epoch": 3.257397068044069, "grad_norm": 4.750823020935059, "learning_rate": 0.00016883010760061485, "loss": 1.1885, "step": 109100 }, { "epoch": 3.2588899172961514, "grad_norm": 4.139292240142822, "learning_rate": 0.00016881582180469605, "loss": 1.2308, "step": 109150 }, { "epoch": 3.260382766548234, "grad_norm": 3.652226209640503, "learning_rate": 0.00016880153600877718, "loss": 1.212, "step": 109200 }, { "epoch": 3.2618756158003164, "grad_norm": 5.819267272949219, "learning_rate": 0.00016878725021285838, "loss": 1.1852, "step": 109250 }, { "epoch": 3.263368465052399, "grad_norm": 4.4434027671813965, "learning_rate": 0.00016877296441693954, "loss": 1.2124, "step": 109300 }, { "epoch": 3.2648613143044813, "grad_norm": 3.6519103050231934, "learning_rate": 0.0001687586786210207, "loss": 1.2033, "step": 109350 }, { "epoch": 3.266354163556564, "grad_norm": 5.874778747558594, "learning_rate": 0.00016874439282510187, "loss": 1.2745, "step": 109400 }, { "epoch": 3.2678470128086468, "grad_norm": 4.622878551483154, "learning_rate": 0.00016873010702918303, "loss": 1.2599, "step": 109450 }, { "epoch": 3.269339862060729, "grad_norm": 4.694761276245117, "learning_rate": 0.0001687158212332642, "loss": 1.2233, "step": 109500 }, { "epoch": 3.2708327113128117, "grad_norm": 5.996031284332275, "learning_rate": 0.00016870153543734536, "loss": 1.2452, "step": 109550 }, { "epoch": 3.272325560564894, "grad_norm": 3.6771366596221924, "learning_rate": 0.00016868724964142653, "loss": 1.253, "step": 109600 }, { "epoch": 3.2738184098169767, "grad_norm": 3.622293710708618, "learning_rate": 0.0001686729638455077, "loss": 1.2456, "step": 109650 }, { "epoch": 3.2753112590690594, "grad_norm": 6.092465400695801, "learning_rate": 0.00016865867804958886, "loss": 1.2742, "step": 109700 }, { "epoch": 3.2768041083211417, "grad_norm": 4.962725639343262, "learning_rate": 0.00016864439225367005, "loss": 1.2426, "step": 109750 }, { "epoch": 3.2782969575732244, "grad_norm": 4.702395915985107, "learning_rate": 0.0001686301064577512, "loss": 1.2324, "step": 109800 }, { "epoch": 3.2797898068253066, "grad_norm": 4.226596832275391, "learning_rate": 0.00016861582066183238, "loss": 1.2791, "step": 109850 }, { "epoch": 3.2812826560773893, "grad_norm": 4.105664253234863, "learning_rate": 0.00016860153486591352, "loss": 1.2606, "step": 109900 }, { "epoch": 3.2827755053294716, "grad_norm": 4.2797369956970215, "learning_rate": 0.0001685872490699947, "loss": 1.3038, "step": 109950 }, { "epoch": 3.2842683545815543, "grad_norm": 5.301023483276367, "learning_rate": 0.00016857296327407585, "loss": 1.1694, "step": 110000 }, { "epoch": 3.285761203833637, "grad_norm": 4.061399459838867, "learning_rate": 0.00016855867747815704, "loss": 1.2366, "step": 110050 }, { "epoch": 3.2872540530857193, "grad_norm": 4.011737823486328, "learning_rate": 0.0001685443916822382, "loss": 1.2354, "step": 110100 }, { "epoch": 3.288746902337802, "grad_norm": 4.765646457672119, "learning_rate": 0.00016853010588631937, "loss": 1.2935, "step": 110150 }, { "epoch": 3.2902397515898842, "grad_norm": 4.911751747131348, "learning_rate": 0.00016851582009040053, "loss": 1.2079, "step": 110200 }, { "epoch": 3.291732600841967, "grad_norm": 5.475641250610352, "learning_rate": 0.0001685015342944817, "loss": 1.2861, "step": 110250 }, { "epoch": 3.2932254500940497, "grad_norm": 4.465792655944824, "learning_rate": 0.00016848724849856286, "loss": 1.239, "step": 110300 }, { "epoch": 3.294718299346132, "grad_norm": 6.820103645324707, "learning_rate": 0.00016847296270264403, "loss": 1.1682, "step": 110350 }, { "epoch": 3.2962111485982146, "grad_norm": 6.0468010902404785, "learning_rate": 0.0001684586769067252, "loss": 1.2195, "step": 110400 }, { "epoch": 3.297703997850297, "grad_norm": 6.527349472045898, "learning_rate": 0.00016844439111080635, "loss": 1.2655, "step": 110450 }, { "epoch": 3.2991968471023796, "grad_norm": 4.763921737670898, "learning_rate": 0.00016843010531488752, "loss": 1.2427, "step": 110500 }, { "epoch": 3.3006896963544623, "grad_norm": 5.6205949783325195, "learning_rate": 0.0001684158195189687, "loss": 1.2629, "step": 110550 }, { "epoch": 3.3021825456065446, "grad_norm": 4.788918972015381, "learning_rate": 0.00016840153372304985, "loss": 1.246, "step": 110600 }, { "epoch": 3.3036753948586273, "grad_norm": 4.326768398284912, "learning_rate": 0.00016838724792713104, "loss": 1.23, "step": 110650 }, { "epoch": 3.3051682441107095, "grad_norm": 4.26240873336792, "learning_rate": 0.00016837296213121218, "loss": 1.2487, "step": 110700 }, { "epoch": 3.3066610933627922, "grad_norm": 5.802476406097412, "learning_rate": 0.00016835867633529334, "loss": 1.2401, "step": 110750 }, { "epoch": 3.308153942614875, "grad_norm": 3.884272575378418, "learning_rate": 0.0001683443905393745, "loss": 1.2199, "step": 110800 }, { "epoch": 3.309646791866957, "grad_norm": 5.074997425079346, "learning_rate": 0.00016833010474345567, "loss": 1.2285, "step": 110850 }, { "epoch": 3.31113964111904, "grad_norm": 3.9242136478424072, "learning_rate": 0.00016831581894753686, "loss": 1.2276, "step": 110900 }, { "epoch": 3.312632490371122, "grad_norm": 5.645691394805908, "learning_rate": 0.000168301533151618, "loss": 1.2031, "step": 110950 }, { "epoch": 3.314125339623205, "grad_norm": 6.003003120422363, "learning_rate": 0.0001682872473556992, "loss": 1.2442, "step": 111000 }, { "epoch": 3.3156181888752876, "grad_norm": 4.822206020355225, "learning_rate": 0.00016827296155978033, "loss": 1.2398, "step": 111050 }, { "epoch": 3.31711103812737, "grad_norm": 3.79050350189209, "learning_rate": 0.00016825867576386152, "loss": 1.2108, "step": 111100 }, { "epoch": 3.3186038873794526, "grad_norm": 5.795800685882568, "learning_rate": 0.00016824438996794266, "loss": 1.2537, "step": 111150 }, { "epoch": 3.320096736631535, "grad_norm": 4.554315090179443, "learning_rate": 0.00016823010417202385, "loss": 1.2485, "step": 111200 }, { "epoch": 3.3215895858836175, "grad_norm": 3.562445640563965, "learning_rate": 0.00016821581837610502, "loss": 1.2802, "step": 111250 }, { "epoch": 3.3230824351357002, "grad_norm": 4.7466278076171875, "learning_rate": 0.00016820153258018618, "loss": 1.2108, "step": 111300 }, { "epoch": 3.3245752843877825, "grad_norm": 4.278189659118652, "learning_rate": 0.00016818724678426735, "loss": 1.2035, "step": 111350 }, { "epoch": 3.326068133639865, "grad_norm": 3.9288856983184814, "learning_rate": 0.0001681729609883485, "loss": 1.2155, "step": 111400 }, { "epoch": 3.3275609828919475, "grad_norm": 5.247483253479004, "learning_rate": 0.00016815867519242967, "loss": 1.2523, "step": 111450 }, { "epoch": 3.32905383214403, "grad_norm": 5.946502685546875, "learning_rate": 0.00016814438939651084, "loss": 1.2254, "step": 111500 }, { "epoch": 3.3305466813961124, "grad_norm": 4.208354473114014, "learning_rate": 0.000168130103600592, "loss": 1.2168, "step": 111550 }, { "epoch": 3.332039530648195, "grad_norm": 6.033560752868652, "learning_rate": 0.00016811581780467317, "loss": 1.2876, "step": 111600 }, { "epoch": 3.333532379900278, "grad_norm": 4.848031520843506, "learning_rate": 0.00016810153200875433, "loss": 1.2404, "step": 111650 }, { "epoch": 3.33502522915236, "grad_norm": 3.873622179031372, "learning_rate": 0.00016808724621283553, "loss": 1.2489, "step": 111700 }, { "epoch": 3.336518078404443, "grad_norm": 4.604708671569824, "learning_rate": 0.00016807296041691666, "loss": 1.2255, "step": 111750 }, { "epoch": 3.338010927656525, "grad_norm": 5.416150093078613, "learning_rate": 0.00016805867462099785, "loss": 1.2108, "step": 111800 }, { "epoch": 3.339503776908608, "grad_norm": 4.228431224822998, "learning_rate": 0.000168044388825079, "loss": 1.2447, "step": 111850 }, { "epoch": 3.3409966261606905, "grad_norm": 4.664206027984619, "learning_rate": 0.00016803010302916018, "loss": 1.2296, "step": 111900 }, { "epoch": 3.3424894754127727, "grad_norm": 4.114374160766602, "learning_rate": 0.00016801581723324132, "loss": 1.1755, "step": 111950 }, { "epoch": 3.3439823246648555, "grad_norm": 5.543557643890381, "learning_rate": 0.0001680015314373225, "loss": 1.2252, "step": 112000 }, { "epoch": 3.3454751739169377, "grad_norm": 4.992297649383545, "learning_rate": 0.00016798724564140368, "loss": 1.2215, "step": 112050 }, { "epoch": 3.3469680231690204, "grad_norm": 5.890657901763916, "learning_rate": 0.00016797295984548484, "loss": 1.258, "step": 112100 }, { "epoch": 3.3484608724211027, "grad_norm": 8.049378395080566, "learning_rate": 0.000167958674049566, "loss": 1.2354, "step": 112150 }, { "epoch": 3.3499537216731854, "grad_norm": 4.3732075691223145, "learning_rate": 0.00016794438825364717, "loss": 1.1945, "step": 112200 }, { "epoch": 3.351446570925268, "grad_norm": 6.537196159362793, "learning_rate": 0.00016793010245772834, "loss": 1.2857, "step": 112250 }, { "epoch": 3.3529394201773504, "grad_norm": 4.264357089996338, "learning_rate": 0.0001679158166618095, "loss": 1.2425, "step": 112300 }, { "epoch": 3.354432269429433, "grad_norm": 5.0003662109375, "learning_rate": 0.00016790153086589067, "loss": 1.2781, "step": 112350 }, { "epoch": 3.3559251186815153, "grad_norm": 3.599029064178467, "learning_rate": 0.00016788724506997183, "loss": 1.1866, "step": 112400 }, { "epoch": 3.357417967933598, "grad_norm": 5.207790374755859, "learning_rate": 0.000167872959274053, "loss": 1.248, "step": 112450 }, { "epoch": 3.3589108171856807, "grad_norm": 4.075089931488037, "learning_rate": 0.0001678586734781342, "loss": 1.195, "step": 112500 }, { "epoch": 3.360403666437763, "grad_norm": 5.74088716506958, "learning_rate": 0.00016784438768221532, "loss": 1.2343, "step": 112550 }, { "epoch": 3.3618965156898457, "grad_norm": 4.6949262619018555, "learning_rate": 0.00016783010188629652, "loss": 1.2295, "step": 112600 }, { "epoch": 3.363389364941928, "grad_norm": 5.432741165161133, "learning_rate": 0.00016781581609037765, "loss": 1.3118, "step": 112650 }, { "epoch": 3.3648822141940107, "grad_norm": 4.652493000030518, "learning_rate": 0.00016780153029445885, "loss": 1.2686, "step": 112700 }, { "epoch": 3.3663750634460934, "grad_norm": 4.528727054595947, "learning_rate": 0.00016778724449854, "loss": 1.2235, "step": 112750 }, { "epoch": 3.3678679126981756, "grad_norm": 4.215224266052246, "learning_rate": 0.00016777295870262117, "loss": 1.2993, "step": 112800 }, { "epoch": 3.3693607619502584, "grad_norm": 4.306272506713867, "learning_rate": 0.00016775867290670234, "loss": 1.2298, "step": 112850 }, { "epoch": 3.3708536112023406, "grad_norm": 3.477867841720581, "learning_rate": 0.0001677443871107835, "loss": 1.2557, "step": 112900 }, { "epoch": 3.3723464604544233, "grad_norm": 4.306337833404541, "learning_rate": 0.00016773010131486467, "loss": 1.1917, "step": 112950 }, { "epoch": 3.373839309706506, "grad_norm": 4.982789993286133, "learning_rate": 0.00016771581551894583, "loss": 1.2572, "step": 113000 }, { "epoch": 3.3753321589585883, "grad_norm": 4.850037097930908, "learning_rate": 0.000167701529723027, "loss": 1.2686, "step": 113050 }, { "epoch": 3.376825008210671, "grad_norm": 4.561743259429932, "learning_rate": 0.00016768724392710816, "loss": 1.2696, "step": 113100 }, { "epoch": 3.3783178574627533, "grad_norm": 4.380636692047119, "learning_rate": 0.00016767295813118933, "loss": 1.2335, "step": 113150 }, { "epoch": 3.379810706714836, "grad_norm": 5.846193313598633, "learning_rate": 0.00016765867233527052, "loss": 1.2353, "step": 113200 }, { "epoch": 3.3813035559669187, "grad_norm": 3.9056570529937744, "learning_rate": 0.00016764438653935166, "loss": 1.3195, "step": 113250 }, { "epoch": 3.382796405219001, "grad_norm": 5.165102958679199, "learning_rate": 0.00016763010074343285, "loss": 1.2193, "step": 113300 }, { "epoch": 3.3842892544710836, "grad_norm": 4.945791244506836, "learning_rate": 0.00016761581494751399, "loss": 1.22, "step": 113350 }, { "epoch": 3.385782103723166, "grad_norm": 6.68800687789917, "learning_rate": 0.00016760152915159515, "loss": 1.2546, "step": 113400 }, { "epoch": 3.3872749529752486, "grad_norm": 8.491449356079102, "learning_rate": 0.00016758724335567632, "loss": 1.2185, "step": 113450 }, { "epoch": 3.3887678022273313, "grad_norm": 5.642709255218506, "learning_rate": 0.00016757295755975748, "loss": 1.2071, "step": 113500 }, { "epoch": 3.3902606514794136, "grad_norm": 4.33923864364624, "learning_rate": 0.00016755867176383867, "loss": 1.2902, "step": 113550 }, { "epoch": 3.3917535007314963, "grad_norm": 9.632731437683105, "learning_rate": 0.0001675443859679198, "loss": 1.2163, "step": 113600 }, { "epoch": 3.3932463499835785, "grad_norm": 4.608974456787109, "learning_rate": 0.000167530100172001, "loss": 1.2247, "step": 113650 }, { "epoch": 3.3947391992356613, "grad_norm": 3.9156320095062256, "learning_rate": 0.00016751581437608214, "loss": 1.2184, "step": 113700 }, { "epoch": 3.3962320484877435, "grad_norm": 3.8621175289154053, "learning_rate": 0.00016750152858016333, "loss": 1.1579, "step": 113750 }, { "epoch": 3.397724897739826, "grad_norm": 4.917006969451904, "learning_rate": 0.00016748724278424447, "loss": 1.2958, "step": 113800 }, { "epoch": 3.399217746991909, "grad_norm": 4.837984561920166, "learning_rate": 0.00016747295698832566, "loss": 1.25, "step": 113850 }, { "epoch": 3.400710596243991, "grad_norm": 5.688441753387451, "learning_rate": 0.00016745867119240682, "loss": 1.2573, "step": 113900 }, { "epoch": 3.402203445496074, "grad_norm": 5.892030239105225, "learning_rate": 0.000167444385396488, "loss": 1.2146, "step": 113950 }, { "epoch": 3.403696294748156, "grad_norm": 5.0287652015686035, "learning_rate": 0.00016743009960056915, "loss": 1.2544, "step": 114000 }, { "epoch": 3.405189144000239, "grad_norm": 3.9118645191192627, "learning_rate": 0.00016741581380465032, "loss": 1.2881, "step": 114050 }, { "epoch": 3.4066819932523216, "grad_norm": 4.481536865234375, "learning_rate": 0.00016740152800873148, "loss": 1.2773, "step": 114100 }, { "epoch": 3.408174842504404, "grad_norm": 4.5398125648498535, "learning_rate": 0.00016738724221281265, "loss": 1.2502, "step": 114150 }, { "epoch": 3.4096676917564865, "grad_norm": 4.823166847229004, "learning_rate": 0.0001673729564168938, "loss": 1.2654, "step": 114200 }, { "epoch": 3.411160541008569, "grad_norm": 6.366767406463623, "learning_rate": 0.00016735867062097498, "loss": 1.2552, "step": 114250 }, { "epoch": 3.4126533902606515, "grad_norm": 4.806399822235107, "learning_rate": 0.00016734438482505614, "loss": 1.1956, "step": 114300 }, { "epoch": 3.4141462395127338, "grad_norm": 7.901057720184326, "learning_rate": 0.00016733009902913733, "loss": 1.2192, "step": 114350 }, { "epoch": 3.4156390887648165, "grad_norm": 4.8132500648498535, "learning_rate": 0.00016731581323321847, "loss": 1.2719, "step": 114400 }, { "epoch": 3.417131938016899, "grad_norm": 4.0474629402160645, "learning_rate": 0.00016730152743729966, "loss": 1.2212, "step": 114450 }, { "epoch": 3.4186247872689814, "grad_norm": 5.218587875366211, "learning_rate": 0.0001672872416413808, "loss": 1.2162, "step": 114500 }, { "epoch": 3.420117636521064, "grad_norm": 4.62558650970459, "learning_rate": 0.000167272955845462, "loss": 1.2715, "step": 114550 }, { "epoch": 3.4216104857731464, "grad_norm": 5.178062915802002, "learning_rate": 0.00016725867004954313, "loss": 1.2799, "step": 114600 }, { "epoch": 3.423103335025229, "grad_norm": 4.345101833343506, "learning_rate": 0.00016724438425362432, "loss": 1.2453, "step": 114650 }, { "epoch": 3.424596184277312, "grad_norm": 4.486167907714844, "learning_rate": 0.00016723009845770549, "loss": 1.2591, "step": 114700 }, { "epoch": 3.426089033529394, "grad_norm": 4.727272033691406, "learning_rate": 0.00016721581266178665, "loss": 1.3124, "step": 114750 }, { "epoch": 3.427581882781477, "grad_norm": 3.8477623462677, "learning_rate": 0.00016720152686586782, "loss": 1.2389, "step": 114800 }, { "epoch": 3.429074732033559, "grad_norm": 3.8839187622070312, "learning_rate": 0.00016718724106994898, "loss": 1.2635, "step": 114850 }, { "epoch": 3.4305675812856418, "grad_norm": 4.389588356018066, "learning_rate": 0.00016717295527403014, "loss": 1.3258, "step": 114900 }, { "epoch": 3.4320604305377245, "grad_norm": 4.100683689117432, "learning_rate": 0.0001671586694781113, "loss": 1.1715, "step": 114950 }, { "epoch": 3.4335532797898067, "grad_norm": 4.422749042510986, "learning_rate": 0.00016714438368219247, "loss": 1.2415, "step": 115000 }, { "epoch": 3.4350461290418894, "grad_norm": 5.368492126464844, "learning_rate": 0.00016713009788627364, "loss": 1.2644, "step": 115050 }, { "epoch": 3.4365389782939717, "grad_norm": 4.524289608001709, "learning_rate": 0.0001671158120903548, "loss": 1.2413, "step": 115100 }, { "epoch": 3.4380318275460544, "grad_norm": 6.099424362182617, "learning_rate": 0.000167101526294436, "loss": 1.2915, "step": 115150 }, { "epoch": 3.439524676798137, "grad_norm": 4.018702507019043, "learning_rate": 0.00016708724049851713, "loss": 1.2868, "step": 115200 }, { "epoch": 3.4410175260502194, "grad_norm": 4.0083112716674805, "learning_rate": 0.00016707295470259832, "loss": 1.1582, "step": 115250 }, { "epoch": 3.442510375302302, "grad_norm": 4.636148452758789, "learning_rate": 0.00016705866890667946, "loss": 1.2637, "step": 115300 }, { "epoch": 3.4440032245543843, "grad_norm": 4.421489238739014, "learning_rate": 0.00016704438311076065, "loss": 1.2649, "step": 115350 }, { "epoch": 3.445496073806467, "grad_norm": 5.654540538787842, "learning_rate": 0.00016703009731484182, "loss": 1.3091, "step": 115400 }, { "epoch": 3.4469889230585498, "grad_norm": 4.134968280792236, "learning_rate": 0.00016701581151892298, "loss": 1.2496, "step": 115450 }, { "epoch": 3.448481772310632, "grad_norm": 4.880650043487549, "learning_rate": 0.00016700152572300415, "loss": 1.2744, "step": 115500 }, { "epoch": 3.4499746215627147, "grad_norm": 6.422858238220215, "learning_rate": 0.0001669872399270853, "loss": 1.1798, "step": 115550 }, { "epoch": 3.451467470814797, "grad_norm": 5.5306315422058105, "learning_rate": 0.00016697295413116648, "loss": 1.2321, "step": 115600 }, { "epoch": 3.4529603200668797, "grad_norm": 3.598203659057617, "learning_rate": 0.00016695866833524764, "loss": 1.2494, "step": 115650 }, { "epoch": 3.4544531693189624, "grad_norm": 4.61868143081665, "learning_rate": 0.0001669443825393288, "loss": 1.2525, "step": 115700 }, { "epoch": 3.4559460185710447, "grad_norm": 3.731189727783203, "learning_rate": 0.00016693009674340997, "loss": 1.2407, "step": 115750 }, { "epoch": 3.4574388678231274, "grad_norm": 5.61367654800415, "learning_rate": 0.00016691581094749114, "loss": 1.176, "step": 115800 }, { "epoch": 3.4589317170752096, "grad_norm": 4.095551490783691, "learning_rate": 0.0001669015251515723, "loss": 1.2203, "step": 115850 }, { "epoch": 3.4604245663272923, "grad_norm": 5.298232555389404, "learning_rate": 0.00016688723935565346, "loss": 1.2516, "step": 115900 }, { "epoch": 3.4619174155793746, "grad_norm": 5.313197135925293, "learning_rate": 0.00016687295355973466, "loss": 1.2375, "step": 115950 }, { "epoch": 3.4634102648314573, "grad_norm": 5.016303062438965, "learning_rate": 0.0001668586677638158, "loss": 1.2515, "step": 116000 }, { "epoch": 3.46490311408354, "grad_norm": 4.23463249206543, "learning_rate": 0.00016684438196789696, "loss": 1.2218, "step": 116050 }, { "epoch": 3.4663959633356223, "grad_norm": 4.841723918914795, "learning_rate": 0.00016683009617197812, "loss": 1.2503, "step": 116100 }, { "epoch": 3.467888812587705, "grad_norm": 4.741985321044922, "learning_rate": 0.0001668158103760593, "loss": 1.257, "step": 116150 }, { "epoch": 3.4693816618397872, "grad_norm": 5.977380275726318, "learning_rate": 0.00016680152458014048, "loss": 1.2774, "step": 116200 }, { "epoch": 3.47087451109187, "grad_norm": 4.758930683135986, "learning_rate": 0.00016678723878422162, "loss": 1.2718, "step": 116250 }, { "epoch": 3.4723673603439527, "grad_norm": 3.923983097076416, "learning_rate": 0.0001667729529883028, "loss": 1.2274, "step": 116300 }, { "epoch": 3.473860209596035, "grad_norm": 4.535523891448975, "learning_rate": 0.00016675866719238395, "loss": 1.2749, "step": 116350 }, { "epoch": 3.4753530588481176, "grad_norm": 3.575572967529297, "learning_rate": 0.00016674438139646514, "loss": 1.2378, "step": 116400 }, { "epoch": 3.4768459081002, "grad_norm": 4.385488510131836, "learning_rate": 0.00016673009560054628, "loss": 1.278, "step": 116450 }, { "epoch": 3.4783387573522826, "grad_norm": 3.994697332382202, "learning_rate": 0.00016671580980462747, "loss": 1.2324, "step": 116500 }, { "epoch": 3.479831606604365, "grad_norm": 4.184852600097656, "learning_rate": 0.00016670152400870863, "loss": 1.2528, "step": 116550 }, { "epoch": 3.4813244558564476, "grad_norm": 4.509354591369629, "learning_rate": 0.0001666872382127898, "loss": 1.2764, "step": 116600 }, { "epoch": 3.4828173051085303, "grad_norm": 4.86820125579834, "learning_rate": 0.00016667295241687096, "loss": 1.2332, "step": 116650 }, { "epoch": 3.4843101543606125, "grad_norm": 5.312245845794678, "learning_rate": 0.00016665866662095213, "loss": 1.2443, "step": 116700 }, { "epoch": 3.4858030036126952, "grad_norm": 4.096804618835449, "learning_rate": 0.0001666443808250333, "loss": 1.2352, "step": 116750 }, { "epoch": 3.4872958528647775, "grad_norm": 4.2911200523376465, "learning_rate": 0.00016663009502911446, "loss": 1.2366, "step": 116800 }, { "epoch": 3.48878870211686, "grad_norm": 5.922074794769287, "learning_rate": 0.00016661580923319562, "loss": 1.28, "step": 116850 }, { "epoch": 3.490281551368943, "grad_norm": 4.1473612785339355, "learning_rate": 0.00016660152343727678, "loss": 1.1911, "step": 116900 }, { "epoch": 3.491774400621025, "grad_norm": 4.428061485290527, "learning_rate": 0.00016658723764135795, "loss": 1.2649, "step": 116950 }, { "epoch": 3.493267249873108, "grad_norm": 4.338399887084961, "learning_rate": 0.00016657295184543914, "loss": 1.2596, "step": 117000 }, { "epoch": 3.49476009912519, "grad_norm": 5.209356307983398, "learning_rate": 0.00016655866604952028, "loss": 1.2898, "step": 117050 }, { "epoch": 3.496252948377273, "grad_norm": 4.742622375488281, "learning_rate": 0.00016654438025360147, "loss": 1.2243, "step": 117100 }, { "epoch": 3.4977457976293556, "grad_norm": 5.037703037261963, "learning_rate": 0.0001665300944576826, "loss": 1.2425, "step": 117150 }, { "epoch": 3.499238646881438, "grad_norm": 4.114877700805664, "learning_rate": 0.0001665158086617638, "loss": 1.2498, "step": 117200 }, { "epoch": 3.5007314961335205, "grad_norm": 3.778136968612671, "learning_rate": 0.00016650152286584494, "loss": 1.2793, "step": 117250 }, { "epoch": 3.502224345385603, "grad_norm": 3.3782308101654053, "learning_rate": 0.00016648723706992613, "loss": 1.234, "step": 117300 }, { "epoch": 3.5037171946376855, "grad_norm": 3.4435694217681885, "learning_rate": 0.0001664729512740073, "loss": 1.2218, "step": 117350 }, { "epoch": 3.505210043889768, "grad_norm": 6.202013969421387, "learning_rate": 0.00016645866547808846, "loss": 1.2541, "step": 117400 }, { "epoch": 3.5067028931418505, "grad_norm": 4.274670600891113, "learning_rate": 0.00016644437968216962, "loss": 1.257, "step": 117450 }, { "epoch": 3.508195742393933, "grad_norm": 4.702452182769775, "learning_rate": 0.0001664300938862508, "loss": 1.2419, "step": 117500 }, { "epoch": 3.5096885916460154, "grad_norm": 2.897954225540161, "learning_rate": 0.00016641580809033195, "loss": 1.2716, "step": 117550 }, { "epoch": 3.511181440898098, "grad_norm": 4.723432540893555, "learning_rate": 0.00016640152229441312, "loss": 1.2357, "step": 117600 }, { "epoch": 3.512674290150181, "grad_norm": 4.234017848968506, "learning_rate": 0.00016638723649849428, "loss": 1.239, "step": 117650 }, { "epoch": 3.514167139402263, "grad_norm": 6.463681221008301, "learning_rate": 0.00016637295070257545, "loss": 1.2579, "step": 117700 }, { "epoch": 3.515659988654346, "grad_norm": 5.568453311920166, "learning_rate": 0.0001663586649066566, "loss": 1.2486, "step": 117750 }, { "epoch": 3.517152837906428, "grad_norm": 4.636147975921631, "learning_rate": 0.0001663443791107378, "loss": 1.2411, "step": 117800 }, { "epoch": 3.518645687158511, "grad_norm": 5.019303798675537, "learning_rate": 0.00016633009331481894, "loss": 1.2608, "step": 117850 }, { "epoch": 3.5201385364105935, "grad_norm": 5.414350986480713, "learning_rate": 0.00016631580751890013, "loss": 1.2642, "step": 117900 }, { "epoch": 3.5216313856626758, "grad_norm": 3.891533136367798, "learning_rate": 0.00016630152172298127, "loss": 1.2433, "step": 117950 }, { "epoch": 3.5231242349147585, "grad_norm": 3.9345791339874268, "learning_rate": 0.00016628723592706246, "loss": 1.2654, "step": 118000 }, { "epoch": 3.5246170841668407, "grad_norm": 3.8173060417175293, "learning_rate": 0.0001662729501311436, "loss": 1.2531, "step": 118050 }, { "epoch": 3.5261099334189234, "grad_norm": 4.534266471862793, "learning_rate": 0.0001662586643352248, "loss": 1.2924, "step": 118100 }, { "epoch": 3.527602782671006, "grad_norm": 5.261129856109619, "learning_rate": 0.00016624437853930596, "loss": 1.2433, "step": 118150 }, { "epoch": 3.5290956319230884, "grad_norm": 4.014307022094727, "learning_rate": 0.00016623009274338712, "loss": 1.2501, "step": 118200 }, { "epoch": 3.5305884811751707, "grad_norm": 4.939244270324707, "learning_rate": 0.00016621580694746828, "loss": 1.2156, "step": 118250 }, { "epoch": 3.5320813304272534, "grad_norm": 7.338710308074951, "learning_rate": 0.00016620152115154945, "loss": 1.2571, "step": 118300 }, { "epoch": 3.533574179679336, "grad_norm": 4.462497711181641, "learning_rate": 0.00016618723535563061, "loss": 1.2532, "step": 118350 }, { "epoch": 3.5350670289314188, "grad_norm": 3.8068175315856934, "learning_rate": 0.00016617294955971178, "loss": 1.2707, "step": 118400 }, { "epoch": 3.536559878183501, "grad_norm": 4.011793613433838, "learning_rate": 0.00016615866376379294, "loss": 1.2012, "step": 118450 }, { "epoch": 3.5380527274355833, "grad_norm": 4.955504894256592, "learning_rate": 0.0001661443779678741, "loss": 1.2312, "step": 118500 }, { "epoch": 3.539545576687666, "grad_norm": 5.355716228485107, "learning_rate": 0.00016613009217195527, "loss": 1.2677, "step": 118550 }, { "epoch": 3.5410384259397487, "grad_norm": 3.9078450202941895, "learning_rate": 0.00016611580637603644, "loss": 1.2216, "step": 118600 }, { "epoch": 3.542531275191831, "grad_norm": 4.30654764175415, "learning_rate": 0.0001661015205801176, "loss": 1.2864, "step": 118650 }, { "epoch": 3.5440241244439137, "grad_norm": 5.134391784667969, "learning_rate": 0.00016608723478419877, "loss": 1.2577, "step": 118700 }, { "epoch": 3.545516973695996, "grad_norm": 3.4655020236968994, "learning_rate": 0.00016607294898827993, "loss": 1.2427, "step": 118750 }, { "epoch": 3.5470098229480787, "grad_norm": 4.221212863922119, "learning_rate": 0.0001660586631923611, "loss": 1.243, "step": 118800 }, { "epoch": 3.5485026722001614, "grad_norm": 5.412088394165039, "learning_rate": 0.0001660443773964423, "loss": 1.2377, "step": 118850 }, { "epoch": 3.5499955214522436, "grad_norm": 5.42820930480957, "learning_rate": 0.00016603009160052343, "loss": 1.245, "step": 118900 }, { "epoch": 3.5514883707043263, "grad_norm": 4.585496425628662, "learning_rate": 0.00016601580580460462, "loss": 1.2885, "step": 118950 }, { "epoch": 3.5529812199564086, "grad_norm": 4.376836776733398, "learning_rate": 0.00016600152000868575, "loss": 1.2915, "step": 119000 }, { "epoch": 3.5544740692084913, "grad_norm": 5.338039875030518, "learning_rate": 0.00016598723421276695, "loss": 1.1785, "step": 119050 }, { "epoch": 3.555966918460574, "grad_norm": 4.422656059265137, "learning_rate": 0.00016597294841684808, "loss": 1.235, "step": 119100 }, { "epoch": 3.5574597677126563, "grad_norm": 5.377553939819336, "learning_rate": 0.00016595866262092928, "loss": 1.2796, "step": 119150 }, { "epoch": 3.558952616964739, "grad_norm": 4.3863959312438965, "learning_rate": 0.00016594437682501044, "loss": 1.2497, "step": 119200 }, { "epoch": 3.5604454662168212, "grad_norm": 5.522170066833496, "learning_rate": 0.0001659300910290916, "loss": 1.2899, "step": 119250 }, { "epoch": 3.561938315468904, "grad_norm": 3.6835429668426514, "learning_rate": 0.00016591580523317277, "loss": 1.2272, "step": 119300 }, { "epoch": 3.5634311647209866, "grad_norm": 3.4317328929901123, "learning_rate": 0.00016590151943725393, "loss": 1.2315, "step": 119350 }, { "epoch": 3.564924013973069, "grad_norm": 4.688202381134033, "learning_rate": 0.0001658872336413351, "loss": 1.2938, "step": 119400 }, { "epoch": 3.5664168632251516, "grad_norm": 3.5351858139038086, "learning_rate": 0.00016587294784541626, "loss": 1.2247, "step": 119450 }, { "epoch": 3.567909712477234, "grad_norm": 4.518115043640137, "learning_rate": 0.00016585866204949743, "loss": 1.3105, "step": 119500 }, { "epoch": 3.5694025617293166, "grad_norm": 3.226865530014038, "learning_rate": 0.0001658443762535786, "loss": 1.2498, "step": 119550 }, { "epoch": 3.5708954109813993, "grad_norm": 5.850762367248535, "learning_rate": 0.00016583009045765976, "loss": 1.2802, "step": 119600 }, { "epoch": 3.5723882602334815, "grad_norm": 6.21754789352417, "learning_rate": 0.00016581580466174095, "loss": 1.23, "step": 119650 }, { "epoch": 3.5738811094855643, "grad_norm": 5.319993019104004, "learning_rate": 0.0001658015188658221, "loss": 1.3102, "step": 119700 }, { "epoch": 3.5753739587376465, "grad_norm": 4.379292964935303, "learning_rate": 0.00016578723306990328, "loss": 1.203, "step": 119750 }, { "epoch": 3.5768668079897292, "grad_norm": 3.9974188804626465, "learning_rate": 0.00016577294727398442, "loss": 1.29, "step": 119800 }, { "epoch": 3.578359657241812, "grad_norm": 4.0016188621521, "learning_rate": 0.0001657586614780656, "loss": 1.2131, "step": 119850 }, { "epoch": 3.579852506493894, "grad_norm": 5.10989236831665, "learning_rate": 0.00016574437568214675, "loss": 1.2519, "step": 119900 }, { "epoch": 3.581345355745977, "grad_norm": 3.3042585849761963, "learning_rate": 0.00016573008988622794, "loss": 1.2462, "step": 119950 }, { "epoch": 3.582838204998059, "grad_norm": 6.814448356628418, "learning_rate": 0.0001657158040903091, "loss": 1.2539, "step": 120000 }, { "epoch": 3.584331054250142, "grad_norm": 6.447416305541992, "learning_rate": 0.00016570151829439027, "loss": 1.2407, "step": 120050 }, { "epoch": 3.5858239035022246, "grad_norm": 4.890771389007568, "learning_rate": 0.00016568723249847143, "loss": 1.282, "step": 120100 }, { "epoch": 3.587316752754307, "grad_norm": 3.6665565967559814, "learning_rate": 0.0001656729467025526, "loss": 1.269, "step": 120150 }, { "epoch": 3.5888096020063895, "grad_norm": 3.8311381340026855, "learning_rate": 0.00016565866090663376, "loss": 1.2546, "step": 120200 }, { "epoch": 3.590302451258472, "grad_norm": 3.9717676639556885, "learning_rate": 0.00016564437511071493, "loss": 1.2402, "step": 120250 }, { "epoch": 3.5917953005105545, "grad_norm": 5.8756585121154785, "learning_rate": 0.0001656300893147961, "loss": 1.2934, "step": 120300 }, { "epoch": 3.593288149762637, "grad_norm": 4.234692573547363, "learning_rate": 0.00016561580351887725, "loss": 1.2773, "step": 120350 }, { "epoch": 3.5947809990147195, "grad_norm": 5.237219333648682, "learning_rate": 0.00016560151772295842, "loss": 1.2638, "step": 120400 }, { "epoch": 3.5962738482668017, "grad_norm": 3.977701187133789, "learning_rate": 0.0001655872319270396, "loss": 1.2452, "step": 120450 }, { "epoch": 3.5977666975188844, "grad_norm": 4.238033294677734, "learning_rate": 0.00016557294613112075, "loss": 1.2813, "step": 120500 }, { "epoch": 3.599259546770967, "grad_norm": 5.174859523773193, "learning_rate": 0.00016555866033520194, "loss": 1.2477, "step": 120550 }, { "epoch": 3.60075239602305, "grad_norm": 4.832043647766113, "learning_rate": 0.00016554437453928308, "loss": 1.2081, "step": 120600 }, { "epoch": 3.602245245275132, "grad_norm": 5.991222858428955, "learning_rate": 0.00016553008874336427, "loss": 1.2275, "step": 120650 }, { "epoch": 3.6037380945272144, "grad_norm": 5.4362101554870605, "learning_rate": 0.0001655158029474454, "loss": 1.262, "step": 120700 }, { "epoch": 3.605230943779297, "grad_norm": 5.659782409667969, "learning_rate": 0.0001655015171515266, "loss": 1.3115, "step": 120750 }, { "epoch": 3.60672379303138, "grad_norm": 4.6319146156311035, "learning_rate": 0.00016548723135560776, "loss": 1.3261, "step": 120800 }, { "epoch": 3.608216642283462, "grad_norm": 4.129281520843506, "learning_rate": 0.00016547294555968893, "loss": 1.3001, "step": 120850 }, { "epoch": 3.6097094915355448, "grad_norm": 4.629218101501465, "learning_rate": 0.0001654586597637701, "loss": 1.2856, "step": 120900 }, { "epoch": 3.611202340787627, "grad_norm": 4.741670608520508, "learning_rate": 0.00016544437396785126, "loss": 1.2671, "step": 120950 }, { "epoch": 3.6126951900397097, "grad_norm": 3.8376450538635254, "learning_rate": 0.00016543008817193242, "loss": 1.1981, "step": 121000 }, { "epoch": 3.6141880392917924, "grad_norm": 5.130064010620117, "learning_rate": 0.0001654158023760136, "loss": 1.2174, "step": 121050 }, { "epoch": 3.6156808885438747, "grad_norm": 4.116199970245361, "learning_rate": 0.00016540151658009475, "loss": 1.2786, "step": 121100 }, { "epoch": 3.6171737377959574, "grad_norm": 3.8612489700317383, "learning_rate": 0.00016538723078417592, "loss": 1.2788, "step": 121150 }, { "epoch": 3.6186665870480397, "grad_norm": 4.064988136291504, "learning_rate": 0.00016537294498825708, "loss": 1.2465, "step": 121200 }, { "epoch": 3.6201594363001224, "grad_norm": 4.097548007965088, "learning_rate": 0.00016535865919233825, "loss": 1.2659, "step": 121250 }, { "epoch": 3.621652285552205, "grad_norm": 4.937625885009766, "learning_rate": 0.0001653443733964194, "loss": 1.2071, "step": 121300 }, { "epoch": 3.6231451348042873, "grad_norm": 4.893852710723877, "learning_rate": 0.00016533008760050057, "loss": 1.2962, "step": 121350 }, { "epoch": 3.62463798405637, "grad_norm": 5.306116580963135, "learning_rate": 0.00016531580180458174, "loss": 1.27, "step": 121400 }, { "epoch": 3.6261308333084523, "grad_norm": 4.8264360427856445, "learning_rate": 0.0001653015160086629, "loss": 1.2717, "step": 121450 }, { "epoch": 3.627623682560535, "grad_norm": 4.300417423248291, "learning_rate": 0.00016528723021274407, "loss": 1.3015, "step": 121500 }, { "epoch": 3.6291165318126177, "grad_norm": 4.886301517486572, "learning_rate": 0.00016527294441682523, "loss": 1.2295, "step": 121550 }, { "epoch": 3.6306093810647, "grad_norm": 5.038539409637451, "learning_rate": 0.00016525865862090643, "loss": 1.2542, "step": 121600 }, { "epoch": 3.6321022303167827, "grad_norm": 4.360227584838867, "learning_rate": 0.00016524437282498756, "loss": 1.2712, "step": 121650 }, { "epoch": 3.633595079568865, "grad_norm": 4.984081745147705, "learning_rate": 0.00016523008702906875, "loss": 1.2949, "step": 121700 }, { "epoch": 3.6350879288209477, "grad_norm": 5.2939229011535645, "learning_rate": 0.0001652158012331499, "loss": 1.2393, "step": 121750 }, { "epoch": 3.6365807780730304, "grad_norm": 5.242954730987549, "learning_rate": 0.00016520151543723108, "loss": 1.1846, "step": 121800 }, { "epoch": 3.6380736273251126, "grad_norm": 4.06920862197876, "learning_rate": 0.00016518722964131225, "loss": 1.2599, "step": 121850 }, { "epoch": 3.6395664765771953, "grad_norm": 5.329120635986328, "learning_rate": 0.0001651729438453934, "loss": 1.2407, "step": 121900 }, { "epoch": 3.6410593258292776, "grad_norm": 5.278786659240723, "learning_rate": 0.00016515865804947458, "loss": 1.2645, "step": 121950 }, { "epoch": 3.6425521750813603, "grad_norm": 3.5512640476226807, "learning_rate": 0.00016514437225355574, "loss": 1.2492, "step": 122000 }, { "epoch": 3.644045024333443, "grad_norm": 3.9851057529449463, "learning_rate": 0.0001651300864576369, "loss": 1.27, "step": 122050 }, { "epoch": 3.6455378735855253, "grad_norm": 2.720550298690796, "learning_rate": 0.00016511580066171807, "loss": 1.2387, "step": 122100 }, { "epoch": 3.647030722837608, "grad_norm": 4.045743465423584, "learning_rate": 0.00016510151486579924, "loss": 1.2768, "step": 122150 }, { "epoch": 3.6485235720896902, "grad_norm": 4.347272872924805, "learning_rate": 0.0001650872290698804, "loss": 1.2114, "step": 122200 }, { "epoch": 3.650016421341773, "grad_norm": 4.8486151695251465, "learning_rate": 0.00016507294327396157, "loss": 1.2761, "step": 122250 }, { "epoch": 3.6515092705938557, "grad_norm": 4.846822261810303, "learning_rate": 0.00016505865747804276, "loss": 1.2563, "step": 122300 }, { "epoch": 3.653002119845938, "grad_norm": 5.770145893096924, "learning_rate": 0.0001650443716821239, "loss": 1.2244, "step": 122350 }, { "epoch": 3.6544949690980206, "grad_norm": 4.940767765045166, "learning_rate": 0.0001650300858862051, "loss": 1.2658, "step": 122400 }, { "epoch": 3.655987818350103, "grad_norm": 4.317561626434326, "learning_rate": 0.00016501580009028622, "loss": 1.2454, "step": 122450 }, { "epoch": 3.6574806676021856, "grad_norm": 4.162029266357422, "learning_rate": 0.00016500151429436742, "loss": 1.2757, "step": 122500 }, { "epoch": 3.6589735168542683, "grad_norm": 3.620096445083618, "learning_rate": 0.00016498722849844855, "loss": 1.2615, "step": 122550 }, { "epoch": 3.6604663661063506, "grad_norm": 4.369839191436768, "learning_rate": 0.00016497294270252975, "loss": 1.2435, "step": 122600 }, { "epoch": 3.661959215358433, "grad_norm": 5.848557472229004, "learning_rate": 0.0001649586569066109, "loss": 1.2422, "step": 122650 }, { "epoch": 3.6634520646105155, "grad_norm": 4.413859844207764, "learning_rate": 0.00016494437111069207, "loss": 1.2505, "step": 122700 }, { "epoch": 3.6649449138625982, "grad_norm": 4.773179531097412, "learning_rate": 0.00016493008531477324, "loss": 1.2856, "step": 122750 }, { "epoch": 3.666437763114681, "grad_norm": 6.004321098327637, "learning_rate": 0.0001649157995188544, "loss": 1.2183, "step": 122800 }, { "epoch": 3.667930612366763, "grad_norm": 4.601461410522461, "learning_rate": 0.00016490151372293557, "loss": 1.2328, "step": 122850 }, { "epoch": 3.6694234616188455, "grad_norm": 4.456672668457031, "learning_rate": 0.00016488722792701673, "loss": 1.2269, "step": 122900 }, { "epoch": 3.670916310870928, "grad_norm": 4.381960391998291, "learning_rate": 0.0001648729421310979, "loss": 1.2971, "step": 122950 }, { "epoch": 3.672409160123011, "grad_norm": 4.233649730682373, "learning_rate": 0.00016485865633517906, "loss": 1.2597, "step": 123000 }, { "epoch": 3.673902009375093, "grad_norm": 3.898301601409912, "learning_rate": 0.00016484437053926023, "loss": 1.2237, "step": 123050 }, { "epoch": 3.675394858627176, "grad_norm": 4.5442585945129395, "learning_rate": 0.00016483008474334142, "loss": 1.2503, "step": 123100 }, { "epoch": 3.676887707879258, "grad_norm": 6.81040620803833, "learning_rate": 0.00016481579894742256, "loss": 1.2685, "step": 123150 }, { "epoch": 3.678380557131341, "grad_norm": 3.5733816623687744, "learning_rate": 0.00016480151315150375, "loss": 1.2621, "step": 123200 }, { "epoch": 3.6798734063834235, "grad_norm": 5.7314252853393555, "learning_rate": 0.00016478722735558489, "loss": 1.2356, "step": 123250 }, { "epoch": 3.681366255635506, "grad_norm": 3.6596696376800537, "learning_rate": 0.00016477294155966608, "loss": 1.2131, "step": 123300 }, { "epoch": 3.6828591048875885, "grad_norm": 4.004091739654541, "learning_rate": 0.00016475865576374722, "loss": 1.2796, "step": 123350 }, { "epoch": 3.6843519541396708, "grad_norm": 4.247511863708496, "learning_rate": 0.0001647443699678284, "loss": 1.2252, "step": 123400 }, { "epoch": 3.6858448033917535, "grad_norm": 4.852141380310059, "learning_rate": 0.00016473008417190957, "loss": 1.2485, "step": 123450 }, { "epoch": 3.687337652643836, "grad_norm": 4.641091346740723, "learning_rate": 0.00016471579837599074, "loss": 1.2414, "step": 123500 }, { "epoch": 3.6888305018959184, "grad_norm": 3.5827832221984863, "learning_rate": 0.0001647015125800719, "loss": 1.2304, "step": 123550 }, { "epoch": 3.690323351148001, "grad_norm": 3.877167224884033, "learning_rate": 0.00016468722678415307, "loss": 1.2609, "step": 123600 }, { "epoch": 3.6918162004000834, "grad_norm": 4.917414665222168, "learning_rate": 0.00016467294098823423, "loss": 1.3357, "step": 123650 }, { "epoch": 3.693309049652166, "grad_norm": 5.0784406661987305, "learning_rate": 0.0001646586551923154, "loss": 1.2776, "step": 123700 }, { "epoch": 3.694801898904249, "grad_norm": 5.322165489196777, "learning_rate": 0.00016464436939639656, "loss": 1.265, "step": 123750 }, { "epoch": 3.696294748156331, "grad_norm": 4.516918182373047, "learning_rate": 0.00016463008360047772, "loss": 1.2559, "step": 123800 }, { "epoch": 3.697787597408414, "grad_norm": 4.489822864532471, "learning_rate": 0.0001646157978045589, "loss": 1.2206, "step": 123850 }, { "epoch": 3.699280446660496, "grad_norm": 5.039775848388672, "learning_rate": 0.00016460151200864005, "loss": 1.2285, "step": 123900 }, { "epoch": 3.7007732959125788, "grad_norm": 3.899019241333008, "learning_rate": 0.00016458722621272122, "loss": 1.2484, "step": 123950 }, { "epoch": 3.7022661451646615, "grad_norm": 5.370189189910889, "learning_rate": 0.00016457294041680238, "loss": 1.2901, "step": 124000 }, { "epoch": 3.7037589944167437, "grad_norm": 4.206950664520264, "learning_rate": 0.00016455865462088355, "loss": 1.2384, "step": 124050 }, { "epoch": 3.7052518436688264, "grad_norm": 6.357316970825195, "learning_rate": 0.0001645443688249647, "loss": 1.2222, "step": 124100 }, { "epoch": 3.7067446929209087, "grad_norm": 5.480586051940918, "learning_rate": 0.00016453008302904588, "loss": 1.2402, "step": 124150 }, { "epoch": 3.7082375421729914, "grad_norm": 4.471677303314209, "learning_rate": 0.00016451579723312704, "loss": 1.2962, "step": 124200 }, { "epoch": 3.709730391425074, "grad_norm": 4.873519420623779, "learning_rate": 0.00016450151143720823, "loss": 1.2438, "step": 124250 }, { "epoch": 3.7112232406771564, "grad_norm": 4.237666130065918, "learning_rate": 0.00016448722564128937, "loss": 1.3002, "step": 124300 }, { "epoch": 3.712716089929239, "grad_norm": 5.313760280609131, "learning_rate": 0.00016447293984537056, "loss": 1.217, "step": 124350 }, { "epoch": 3.7142089391813213, "grad_norm": 6.441144943237305, "learning_rate": 0.0001644586540494517, "loss": 1.3176, "step": 124400 }, { "epoch": 3.715701788433404, "grad_norm": 5.12811803817749, "learning_rate": 0.0001644443682535329, "loss": 1.2937, "step": 124450 }, { "epoch": 3.7171946376854867, "grad_norm": 4.984979152679443, "learning_rate": 0.00016443008245761406, "loss": 1.1999, "step": 124500 }, { "epoch": 3.718687486937569, "grad_norm": 4.063045501708984, "learning_rate": 0.00016441579666169522, "loss": 1.235, "step": 124550 }, { "epoch": 3.7201803361896517, "grad_norm": 3.6467928886413574, "learning_rate": 0.00016440151086577639, "loss": 1.1857, "step": 124600 }, { "epoch": 3.721673185441734, "grad_norm": 4.508729934692383, "learning_rate": 0.00016438722506985755, "loss": 1.3138, "step": 124650 }, { "epoch": 3.7231660346938167, "grad_norm": 5.27504825592041, "learning_rate": 0.00016437293927393872, "loss": 1.2026, "step": 124700 }, { "epoch": 3.7246588839458994, "grad_norm": 4.412220478057861, "learning_rate": 0.00016435865347801988, "loss": 1.2699, "step": 124750 }, { "epoch": 3.7261517331979817, "grad_norm": 5.649445533752441, "learning_rate": 0.00016434436768210104, "loss": 1.2803, "step": 124800 }, { "epoch": 3.7276445824500644, "grad_norm": 3.868227005004883, "learning_rate": 0.0001643300818861822, "loss": 1.2614, "step": 124850 }, { "epoch": 3.7291374317021466, "grad_norm": 4.377996444702148, "learning_rate": 0.00016431579609026337, "loss": 1.2548, "step": 124900 }, { "epoch": 3.7306302809542293, "grad_norm": 5.023475646972656, "learning_rate": 0.00016430151029434454, "loss": 1.2775, "step": 124950 }, { "epoch": 3.732123130206312, "grad_norm": 8.013720512390137, "learning_rate": 0.0001642872244984257, "loss": 1.2716, "step": 125000 }, { "epoch": 3.7336159794583943, "grad_norm": 4.947465419769287, "learning_rate": 0.0001642729387025069, "loss": 1.2681, "step": 125050 }, { "epoch": 3.7351088287104766, "grad_norm": 3.9547464847564697, "learning_rate": 0.00016425865290658803, "loss": 1.244, "step": 125100 }, { "epoch": 3.7366016779625593, "grad_norm": 5.331160545349121, "learning_rate": 0.00016424436711066922, "loss": 1.3086, "step": 125150 }, { "epoch": 3.738094527214642, "grad_norm": 3.980027914047241, "learning_rate": 0.00016423008131475036, "loss": 1.2461, "step": 125200 }, { "epoch": 3.7395873764667242, "grad_norm": 3.4833130836486816, "learning_rate": 0.00016421579551883155, "loss": 1.2107, "step": 125250 }, { "epoch": 3.741080225718807, "grad_norm": 4.746307373046875, "learning_rate": 0.00016420150972291272, "loss": 1.2875, "step": 125300 }, { "epoch": 3.742573074970889, "grad_norm": 3.9247684478759766, "learning_rate": 0.00016418722392699388, "loss": 1.2185, "step": 125350 }, { "epoch": 3.744065924222972, "grad_norm": 5.135631084442139, "learning_rate": 0.00016417293813107505, "loss": 1.2512, "step": 125400 }, { "epoch": 3.7455587734750546, "grad_norm": 4.875301361083984, "learning_rate": 0.0001641586523351562, "loss": 1.2794, "step": 125450 }, { "epoch": 3.747051622727137, "grad_norm": 5.335410118103027, "learning_rate": 0.00016414436653923738, "loss": 1.2617, "step": 125500 }, { "epoch": 3.7485444719792196, "grad_norm": 4.500457286834717, "learning_rate": 0.00016413008074331854, "loss": 1.2482, "step": 125550 }, { "epoch": 3.750037321231302, "grad_norm": 4.4383087158203125, "learning_rate": 0.0001641157949473997, "loss": 1.2534, "step": 125600 }, { "epoch": 3.7515301704833846, "grad_norm": 4.272806644439697, "learning_rate": 0.00016410150915148087, "loss": 1.2564, "step": 125650 }, { "epoch": 3.7530230197354673, "grad_norm": 6.498635292053223, "learning_rate": 0.00016408722335556204, "loss": 1.2422, "step": 125700 }, { "epoch": 3.7545158689875495, "grad_norm": 4.720774173736572, "learning_rate": 0.00016407293755964323, "loss": 1.2543, "step": 125750 }, { "epoch": 3.7560087182396322, "grad_norm": 4.762580871582031, "learning_rate": 0.00016405865176372436, "loss": 1.2453, "step": 125800 }, { "epoch": 3.7575015674917145, "grad_norm": 3.98309326171875, "learning_rate": 0.00016404436596780556, "loss": 1.2295, "step": 125850 }, { "epoch": 3.758994416743797, "grad_norm": 5.4489336013793945, "learning_rate": 0.0001640300801718867, "loss": 1.2466, "step": 125900 }, { "epoch": 3.76048726599588, "grad_norm": 5.064475059509277, "learning_rate": 0.00016401579437596789, "loss": 1.2451, "step": 125950 }, { "epoch": 3.761980115247962, "grad_norm": 4.22551155090332, "learning_rate": 0.00016400150858004902, "loss": 1.2346, "step": 126000 }, { "epoch": 3.763472964500045, "grad_norm": 4.364709854125977, "learning_rate": 0.00016398722278413021, "loss": 1.2894, "step": 126050 }, { "epoch": 3.764965813752127, "grad_norm": 4.05735969543457, "learning_rate": 0.00016397293698821138, "loss": 1.1943, "step": 126100 }, { "epoch": 3.76645866300421, "grad_norm": 4.884429931640625, "learning_rate": 0.00016395865119229254, "loss": 1.2587, "step": 126150 }, { "epoch": 3.7679515122562925, "grad_norm": 4.451826572418213, "learning_rate": 0.0001639443653963737, "loss": 1.2856, "step": 126200 }, { "epoch": 3.769444361508375, "grad_norm": 5.6031646728515625, "learning_rate": 0.00016393007960045487, "loss": 1.2705, "step": 126250 }, { "epoch": 3.7709372107604575, "grad_norm": 4.248857498168945, "learning_rate": 0.00016391579380453604, "loss": 1.3043, "step": 126300 }, { "epoch": 3.7724300600125398, "grad_norm": 4.71324348449707, "learning_rate": 0.0001639015080086172, "loss": 1.2658, "step": 126350 }, { "epoch": 3.7739229092646225, "grad_norm": 5.27174186706543, "learning_rate": 0.00016388722221269837, "loss": 1.2325, "step": 126400 }, { "epoch": 3.775415758516705, "grad_norm": 4.696889877319336, "learning_rate": 0.00016387293641677953, "loss": 1.2819, "step": 126450 }, { "epoch": 3.7769086077687875, "grad_norm": 5.210821151733398, "learning_rate": 0.0001638586506208607, "loss": 1.2733, "step": 126500 }, { "epoch": 3.77840145702087, "grad_norm": 4.370712757110596, "learning_rate": 0.00016384436482494186, "loss": 1.2121, "step": 126550 }, { "epoch": 3.7798943062729524, "grad_norm": 3.9094676971435547, "learning_rate": 0.00016383007902902303, "loss": 1.2744, "step": 126600 }, { "epoch": 3.781387155525035, "grad_norm": 4.375615119934082, "learning_rate": 0.0001638157932331042, "loss": 1.2808, "step": 126650 }, { "epoch": 3.782880004777118, "grad_norm": 4.995912075042725, "learning_rate": 0.00016380150743718536, "loss": 1.3116, "step": 126700 }, { "epoch": 3.7843728540292, "grad_norm": 3.9363934993743896, "learning_rate": 0.00016378722164126652, "loss": 1.2554, "step": 126750 }, { "epoch": 3.785865703281283, "grad_norm": 3.885279893875122, "learning_rate": 0.00016377293584534768, "loss": 1.2099, "step": 126800 }, { "epoch": 3.787358552533365, "grad_norm": 4.745761871337891, "learning_rate": 0.00016375865004942885, "loss": 1.2336, "step": 126850 }, { "epoch": 3.7888514017854478, "grad_norm": 5.675398826599121, "learning_rate": 0.00016374436425351004, "loss": 1.2736, "step": 126900 }, { "epoch": 3.7903442510375305, "grad_norm": 5.933438301086426, "learning_rate": 0.00016373007845759118, "loss": 1.2245, "step": 126950 }, { "epoch": 3.7918371002896127, "grad_norm": 4.410390853881836, "learning_rate": 0.00016371579266167237, "loss": 1.2786, "step": 127000 }, { "epoch": 3.7933299495416954, "grad_norm": 3.837341785430908, "learning_rate": 0.0001637015068657535, "loss": 1.2154, "step": 127050 }, { "epoch": 3.7948227987937777, "grad_norm": 4.170938014984131, "learning_rate": 0.0001636872210698347, "loss": 1.1933, "step": 127100 }, { "epoch": 3.7963156480458604, "grad_norm": 5.0940985679626465, "learning_rate": 0.00016367293527391584, "loss": 1.2399, "step": 127150 }, { "epoch": 3.797808497297943, "grad_norm": 3.5157582759857178, "learning_rate": 0.00016365864947799703, "loss": 1.2217, "step": 127200 }, { "epoch": 3.7993013465500254, "grad_norm": 4.230498790740967, "learning_rate": 0.0001636443636820782, "loss": 1.2665, "step": 127250 }, { "epoch": 3.8007941958021076, "grad_norm": 4.24791955947876, "learning_rate": 0.00016363007788615936, "loss": 1.28, "step": 127300 }, { "epoch": 3.8022870450541904, "grad_norm": 4.877384185791016, "learning_rate": 0.00016361579209024052, "loss": 1.3076, "step": 127350 }, { "epoch": 3.803779894306273, "grad_norm": 6.252450942993164, "learning_rate": 0.0001636015062943217, "loss": 1.2648, "step": 127400 }, { "epoch": 3.8052727435583553, "grad_norm": 4.372878551483154, "learning_rate": 0.00016358722049840285, "loss": 1.2749, "step": 127450 }, { "epoch": 3.806765592810438, "grad_norm": 5.042996883392334, "learning_rate": 0.00016357293470248402, "loss": 1.2379, "step": 127500 }, { "epoch": 3.8082584420625203, "grad_norm": 4.833359241485596, "learning_rate": 0.00016355864890656518, "loss": 1.2456, "step": 127550 }, { "epoch": 3.809751291314603, "grad_norm": 3.7556586265563965, "learning_rate": 0.00016354436311064635, "loss": 1.2998, "step": 127600 }, { "epoch": 3.8112441405666857, "grad_norm": 4.22800874710083, "learning_rate": 0.0001635300773147275, "loss": 1.2307, "step": 127650 }, { "epoch": 3.812736989818768, "grad_norm": 3.8682031631469727, "learning_rate": 0.0001635157915188087, "loss": 1.2429, "step": 127700 }, { "epoch": 3.8142298390708507, "grad_norm": 5.100236415863037, "learning_rate": 0.00016350150572288984, "loss": 1.2725, "step": 127750 }, { "epoch": 3.815722688322933, "grad_norm": 5.069801330566406, "learning_rate": 0.00016348721992697103, "loss": 1.3199, "step": 127800 }, { "epoch": 3.8172155375750156, "grad_norm": 4.382961273193359, "learning_rate": 0.00016347293413105217, "loss": 1.2727, "step": 127850 }, { "epoch": 3.8187083868270983, "grad_norm": 4.622864246368408, "learning_rate": 0.00016345864833513336, "loss": 1.3309, "step": 127900 }, { "epoch": 3.8202012360791806, "grad_norm": 5.895843982696533, "learning_rate": 0.00016344436253921453, "loss": 1.2453, "step": 127950 }, { "epoch": 3.8216940853312633, "grad_norm": 4.68233060836792, "learning_rate": 0.0001634300767432957, "loss": 1.2948, "step": 128000 }, { "epoch": 3.8231869345833456, "grad_norm": 4.286251068115234, "learning_rate": 0.00016341579094737686, "loss": 1.2758, "step": 128050 }, { "epoch": 3.8246797838354283, "grad_norm": 3.9140446186065674, "learning_rate": 0.00016340150515145802, "loss": 1.2791, "step": 128100 }, { "epoch": 3.826172633087511, "grad_norm": 5.249776840209961, "learning_rate": 0.00016338721935553918, "loss": 1.3379, "step": 128150 }, { "epoch": 3.8276654823395933, "grad_norm": 4.369449138641357, "learning_rate": 0.00016337293355962035, "loss": 1.179, "step": 128200 }, { "epoch": 3.829158331591676, "grad_norm": 5.655234336853027, "learning_rate": 0.00016335864776370151, "loss": 1.2535, "step": 128250 }, { "epoch": 3.830651180843758, "grad_norm": 5.233351230621338, "learning_rate": 0.00016334436196778268, "loss": 1.2601, "step": 128300 }, { "epoch": 3.832144030095841, "grad_norm": 4.451437950134277, "learning_rate": 0.00016333007617186384, "loss": 1.2733, "step": 128350 }, { "epoch": 3.8336368793479236, "grad_norm": 3.807561159133911, "learning_rate": 0.000163315790375945, "loss": 1.2721, "step": 128400 }, { "epoch": 3.835129728600006, "grad_norm": 4.507883071899414, "learning_rate": 0.00016330150458002617, "loss": 1.2498, "step": 128450 }, { "epoch": 3.8366225778520886, "grad_norm": 4.246575832366943, "learning_rate": 0.00016328721878410736, "loss": 1.2299, "step": 128500 }, { "epoch": 3.838115427104171, "grad_norm": 5.045656204223633, "learning_rate": 0.0001632729329881885, "loss": 1.2536, "step": 128550 }, { "epoch": 3.8396082763562536, "grad_norm": 3.3326852321624756, "learning_rate": 0.0001632586471922697, "loss": 1.2401, "step": 128600 }, { "epoch": 3.8411011256083363, "grad_norm": 4.201671600341797, "learning_rate": 0.00016324436139635083, "loss": 1.2681, "step": 128650 }, { "epoch": 3.8425939748604185, "grad_norm": 3.979217052459717, "learning_rate": 0.00016323007560043202, "loss": 1.2901, "step": 128700 }, { "epoch": 3.8440868241125012, "grad_norm": 4.908186435699463, "learning_rate": 0.0001632157898045132, "loss": 1.2443, "step": 128750 }, { "epoch": 3.8455796733645835, "grad_norm": 7.0551252365112305, "learning_rate": 0.00016320150400859435, "loss": 1.3312, "step": 128800 }, { "epoch": 3.847072522616666, "grad_norm": 2.9734935760498047, "learning_rate": 0.00016318721821267552, "loss": 1.1726, "step": 128850 }, { "epoch": 3.848565371868749, "grad_norm": 8.724651336669922, "learning_rate": 0.00016317293241675668, "loss": 1.3131, "step": 128900 }, { "epoch": 3.850058221120831, "grad_norm": 4.633723735809326, "learning_rate": 0.00016315864662083785, "loss": 1.2309, "step": 128950 }, { "epoch": 3.851551070372914, "grad_norm": 5.132857799530029, "learning_rate": 0.000163144360824919, "loss": 1.1988, "step": 129000 }, { "epoch": 3.853043919624996, "grad_norm": 3.8106892108917236, "learning_rate": 0.00016313007502900018, "loss": 1.2284, "step": 129050 }, { "epoch": 3.854536768877079, "grad_norm": 3.9938535690307617, "learning_rate": 0.00016311578923308134, "loss": 1.2252, "step": 129100 }, { "epoch": 3.8560296181291616, "grad_norm": 4.499520301818848, "learning_rate": 0.0001631015034371625, "loss": 1.2834, "step": 129150 }, { "epoch": 3.857522467381244, "grad_norm": 3.780081033706665, "learning_rate": 0.00016308721764124367, "loss": 1.1696, "step": 129200 }, { "epoch": 3.8590153166333265, "grad_norm": 5.509545803070068, "learning_rate": 0.00016307293184532483, "loss": 1.2129, "step": 129250 }, { "epoch": 3.860508165885409, "grad_norm": 3.87497615814209, "learning_rate": 0.000163058646049406, "loss": 1.2971, "step": 129300 }, { "epoch": 3.8620010151374915, "grad_norm": 4.839779853820801, "learning_rate": 0.00016304436025348716, "loss": 1.2385, "step": 129350 }, { "epoch": 3.863493864389574, "grad_norm": 7.801334857940674, "learning_rate": 0.00016303007445756833, "loss": 1.2482, "step": 129400 }, { "epoch": 3.8649867136416565, "grad_norm": 3.884004831314087, "learning_rate": 0.0001630157886616495, "loss": 1.264, "step": 129450 }, { "epoch": 3.8664795628937387, "grad_norm": 5.332859516143799, "learning_rate": 0.00016300150286573066, "loss": 1.2263, "step": 129500 }, { "epoch": 3.8679724121458214, "grad_norm": 4.195265293121338, "learning_rate": 0.00016298721706981185, "loss": 1.2579, "step": 129550 }, { "epoch": 3.869465261397904, "grad_norm": 4.393342018127441, "learning_rate": 0.000162972931273893, "loss": 1.2513, "step": 129600 }, { "epoch": 3.8709581106499864, "grad_norm": 5.231433391571045, "learning_rate": 0.00016295864547797418, "loss": 1.2508, "step": 129650 }, { "epoch": 3.872450959902069, "grad_norm": 4.832424640655518, "learning_rate": 0.00016294435968205532, "loss": 1.2902, "step": 129700 }, { "epoch": 3.8739438091541514, "grad_norm": 3.9749419689178467, "learning_rate": 0.0001629300738861365, "loss": 1.2803, "step": 129750 }, { "epoch": 3.875436658406234, "grad_norm": 4.696380138397217, "learning_rate": 0.00016291578809021765, "loss": 1.2226, "step": 129800 }, { "epoch": 3.876929507658317, "grad_norm": 3.757120132446289, "learning_rate": 0.00016290150229429884, "loss": 1.2499, "step": 129850 }, { "epoch": 3.878422356910399, "grad_norm": 4.010384559631348, "learning_rate": 0.00016288721649838, "loss": 1.3455, "step": 129900 }, { "epoch": 3.8799152061624818, "grad_norm": 3.7654688358306885, "learning_rate": 0.00016287293070246117, "loss": 1.2689, "step": 129950 }, { "epoch": 3.881408055414564, "grad_norm": 4.330887317657471, "learning_rate": 0.00016285864490654233, "loss": 1.2988, "step": 130000 }, { "epoch": 3.8829009046666467, "grad_norm": 3.8731918334960938, "learning_rate": 0.0001628443591106235, "loss": 1.2158, "step": 130050 }, { "epoch": 3.8843937539187294, "grad_norm": 4.860089302062988, "learning_rate": 0.00016283007331470466, "loss": 1.3152, "step": 130100 }, { "epoch": 3.8858866031708117, "grad_norm": 3.8552567958831787, "learning_rate": 0.00016281578751878583, "loss": 1.2038, "step": 130150 }, { "epoch": 3.8873794524228944, "grad_norm": 3.8789255619049072, "learning_rate": 0.000162801501722867, "loss": 1.1898, "step": 130200 }, { "epoch": 3.8888723016749767, "grad_norm": 5.833735466003418, "learning_rate": 0.00016278721592694815, "loss": 1.327, "step": 130250 }, { "epoch": 3.8903651509270594, "grad_norm": 5.1781463623046875, "learning_rate": 0.00016277293013102932, "loss": 1.3002, "step": 130300 }, { "epoch": 3.891858000179142, "grad_norm": 5.102818012237549, "learning_rate": 0.0001627586443351105, "loss": 1.257, "step": 130350 }, { "epoch": 3.8933508494312243, "grad_norm": 4.642506122589111, "learning_rate": 0.00016274435853919165, "loss": 1.2387, "step": 130400 }, { "epoch": 3.894843698683307, "grad_norm": 4.327921390533447, "learning_rate": 0.00016273007274327284, "loss": 1.2563, "step": 130450 }, { "epoch": 3.8963365479353893, "grad_norm": 4.99731969833374, "learning_rate": 0.00016271578694735398, "loss": 1.2621, "step": 130500 }, { "epoch": 3.897829397187472, "grad_norm": 3.6538639068603516, "learning_rate": 0.00016270150115143517, "loss": 1.2284, "step": 130550 }, { "epoch": 3.8993222464395547, "grad_norm": 4.081072807312012, "learning_rate": 0.0001626872153555163, "loss": 1.2901, "step": 130600 }, { "epoch": 3.900815095691637, "grad_norm": 5.384579658508301, "learning_rate": 0.0001626729295595975, "loss": 1.2898, "step": 130650 }, { "epoch": 3.9023079449437197, "grad_norm": 4.1314520835876465, "learning_rate": 0.00016265864376367866, "loss": 1.2692, "step": 130700 }, { "epoch": 3.903800794195802, "grad_norm": 5.35874080657959, "learning_rate": 0.00016264435796775983, "loss": 1.2377, "step": 130750 }, { "epoch": 3.9052936434478847, "grad_norm": 4.041543006896973, "learning_rate": 0.000162630072171841, "loss": 1.2757, "step": 130800 }, { "epoch": 3.9067864926999674, "grad_norm": 4.784633636474609, "learning_rate": 0.00016261578637592216, "loss": 1.234, "step": 130850 }, { "epoch": 3.9082793419520496, "grad_norm": 4.274779319763184, "learning_rate": 0.00016260150058000332, "loss": 1.2506, "step": 130900 }, { "epoch": 3.9097721912041323, "grad_norm": 4.858269214630127, "learning_rate": 0.0001625872147840845, "loss": 1.2345, "step": 130950 }, { "epoch": 3.9112650404562146, "grad_norm": 3.8560070991516113, "learning_rate": 0.00016257292898816565, "loss": 1.2689, "step": 131000 }, { "epoch": 3.9127578897082973, "grad_norm": 4.95020866394043, "learning_rate": 0.00016255864319224682, "loss": 1.2156, "step": 131050 }, { "epoch": 3.91425073896038, "grad_norm": 4.7120795249938965, "learning_rate": 0.00016254435739632798, "loss": 1.3119, "step": 131100 }, { "epoch": 3.9157435882124623, "grad_norm": 5.613494873046875, "learning_rate": 0.00016253007160040917, "loss": 1.2806, "step": 131150 }, { "epoch": 3.917236437464545, "grad_norm": 5.0516767501831055, "learning_rate": 0.0001625157858044903, "loss": 1.2254, "step": 131200 }, { "epoch": 3.9187292867166272, "grad_norm": 4.780202388763428, "learning_rate": 0.0001625015000085715, "loss": 1.2915, "step": 131250 }, { "epoch": 3.92022213596871, "grad_norm": 5.579587459564209, "learning_rate": 0.00016248721421265264, "loss": 1.2509, "step": 131300 }, { "epoch": 3.9217149852207926, "grad_norm": 4.543426990509033, "learning_rate": 0.00016247292841673383, "loss": 1.2974, "step": 131350 }, { "epoch": 3.923207834472875, "grad_norm": 5.310088634490967, "learning_rate": 0.000162458642620815, "loss": 1.2247, "step": 131400 }, { "epoch": 3.9247006837249576, "grad_norm": 4.900899887084961, "learning_rate": 0.00016244435682489616, "loss": 1.3038, "step": 131450 }, { "epoch": 3.92619353297704, "grad_norm": 4.816722869873047, "learning_rate": 0.00016243007102897732, "loss": 1.2484, "step": 131500 }, { "epoch": 3.9276863822291226, "grad_norm": 3.618678092956543, "learning_rate": 0.0001624157852330585, "loss": 1.3475, "step": 131550 }, { "epoch": 3.9291792314812053, "grad_norm": 5.137662410736084, "learning_rate": 0.00016240149943713965, "loss": 1.2443, "step": 131600 }, { "epoch": 3.9306720807332876, "grad_norm": 4.798673152923584, "learning_rate": 0.00016238721364122082, "loss": 1.2654, "step": 131650 }, { "epoch": 3.93216492998537, "grad_norm": 4.472255229949951, "learning_rate": 0.00016237292784530198, "loss": 1.2686, "step": 131700 }, { "epoch": 3.9336577792374525, "grad_norm": 4.61024284362793, "learning_rate": 0.00016235864204938315, "loss": 1.2491, "step": 131750 }, { "epoch": 3.9351506284895352, "grad_norm": 4.1179280281066895, "learning_rate": 0.0001623443562534643, "loss": 1.2044, "step": 131800 }, { "epoch": 3.9366434777416175, "grad_norm": 6.594708442687988, "learning_rate": 0.00016233007045754548, "loss": 1.2111, "step": 131850 }, { "epoch": 3.9381363269937, "grad_norm": 8.489596366882324, "learning_rate": 0.00016231578466162664, "loss": 1.2011, "step": 131900 }, { "epoch": 3.9396291762457825, "grad_norm": 5.426854133605957, "learning_rate": 0.0001623014988657078, "loss": 1.268, "step": 131950 }, { "epoch": 3.941122025497865, "grad_norm": 4.155928611755371, "learning_rate": 0.00016228721306978897, "loss": 1.2722, "step": 132000 }, { "epoch": 3.942614874749948, "grad_norm": 4.766868591308594, "learning_rate": 0.00016227292727387014, "loss": 1.288, "step": 132050 }, { "epoch": 3.94410772400203, "grad_norm": 4.16912317276001, "learning_rate": 0.0001622586414779513, "loss": 1.2261, "step": 132100 }, { "epoch": 3.945600573254113, "grad_norm": 6.41418981552124, "learning_rate": 0.00016224435568203247, "loss": 1.2414, "step": 132150 }, { "epoch": 3.947093422506195, "grad_norm": 4.736821174621582, "learning_rate": 0.00016223006988611366, "loss": 1.2481, "step": 132200 }, { "epoch": 3.948586271758278, "grad_norm": 4.794166564941406, "learning_rate": 0.0001622157840901948, "loss": 1.2831, "step": 132250 }, { "epoch": 3.9500791210103605, "grad_norm": 6.948697090148926, "learning_rate": 0.000162201498294276, "loss": 1.2545, "step": 132300 }, { "epoch": 3.951571970262443, "grad_norm": 5.114023208618164, "learning_rate": 0.00016218721249835712, "loss": 1.2809, "step": 132350 }, { "epoch": 3.9530648195145255, "grad_norm": 4.972701072692871, "learning_rate": 0.00016217292670243832, "loss": 1.2143, "step": 132400 }, { "epoch": 3.9545576687666077, "grad_norm": 3.860616445541382, "learning_rate": 0.00016215864090651945, "loss": 1.2209, "step": 132450 }, { "epoch": 3.9560505180186905, "grad_norm": 3.6322524547576904, "learning_rate": 0.00016214435511060065, "loss": 1.2252, "step": 132500 }, { "epoch": 3.957543367270773, "grad_norm": 5.153745174407959, "learning_rate": 0.0001621300693146818, "loss": 1.2748, "step": 132550 }, { "epoch": 3.9590362165228554, "grad_norm": 4.3787617683410645, "learning_rate": 0.00016211578351876297, "loss": 1.2634, "step": 132600 }, { "epoch": 3.960529065774938, "grad_norm": 4.335618019104004, "learning_rate": 0.00016210149772284414, "loss": 1.2661, "step": 132650 }, { "epoch": 3.9620219150270204, "grad_norm": 4.789446830749512, "learning_rate": 0.0001620872119269253, "loss": 1.2877, "step": 132700 }, { "epoch": 3.963514764279103, "grad_norm": 5.508213996887207, "learning_rate": 0.00016207292613100647, "loss": 1.2759, "step": 132750 }, { "epoch": 3.965007613531186, "grad_norm": 3.3553807735443115, "learning_rate": 0.00016205864033508763, "loss": 1.2907, "step": 132800 }, { "epoch": 3.966500462783268, "grad_norm": 4.773813247680664, "learning_rate": 0.0001620443545391688, "loss": 1.3115, "step": 132850 }, { "epoch": 3.9679933120353508, "grad_norm": 4.2718939781188965, "learning_rate": 0.00016203006874324996, "loss": 1.2467, "step": 132900 }, { "epoch": 3.969486161287433, "grad_norm": 4.36405086517334, "learning_rate": 0.00016201578294733113, "loss": 1.2536, "step": 132950 }, { "epoch": 3.9709790105395157, "grad_norm": 5.21968936920166, "learning_rate": 0.00016200149715141232, "loss": 1.3317, "step": 133000 }, { "epoch": 3.9724718597915984, "grad_norm": 3.792954683303833, "learning_rate": 0.00016198721135549346, "loss": 1.2041, "step": 133050 }, { "epoch": 3.9739647090436807, "grad_norm": 4.445356369018555, "learning_rate": 0.00016197292555957465, "loss": 1.2283, "step": 133100 }, { "epoch": 3.9754575582957634, "grad_norm": 4.6043548583984375, "learning_rate": 0.00016195863976365579, "loss": 1.328, "step": 133150 }, { "epoch": 3.9769504075478457, "grad_norm": 4.0124053955078125, "learning_rate": 0.00016194435396773698, "loss": 1.1772, "step": 133200 }, { "epoch": 3.9784432567999284, "grad_norm": 4.683640480041504, "learning_rate": 0.00016193006817181811, "loss": 1.2748, "step": 133250 }, { "epoch": 3.979936106052011, "grad_norm": 5.253026008605957, "learning_rate": 0.0001619157823758993, "loss": 1.2193, "step": 133300 }, { "epoch": 3.9814289553040934, "grad_norm": 4.60040283203125, "learning_rate": 0.00016190149657998047, "loss": 1.2518, "step": 133350 }, { "epoch": 3.982921804556176, "grad_norm": 5.973727226257324, "learning_rate": 0.00016188721078406164, "loss": 1.242, "step": 133400 }, { "epoch": 3.9844146538082583, "grad_norm": 5.09537410736084, "learning_rate": 0.0001618729249881428, "loss": 1.2484, "step": 133450 }, { "epoch": 3.985907503060341, "grad_norm": 4.428155899047852, "learning_rate": 0.00016185863919222397, "loss": 1.2112, "step": 133500 }, { "epoch": 3.9874003523124237, "grad_norm": 4.2854838371276855, "learning_rate": 0.00016184435339630513, "loss": 1.2402, "step": 133550 }, { "epoch": 3.988893201564506, "grad_norm": 4.689759254455566, "learning_rate": 0.0001618300676003863, "loss": 1.2705, "step": 133600 }, { "epoch": 3.9903860508165887, "grad_norm": 4.272946357727051, "learning_rate": 0.00016181578180446746, "loss": 1.2105, "step": 133650 }, { "epoch": 3.991878900068671, "grad_norm": 4.82036018371582, "learning_rate": 0.00016180149600854862, "loss": 1.2379, "step": 133700 }, { "epoch": 3.9933717493207537, "grad_norm": 5.448759078979492, "learning_rate": 0.0001617872102126298, "loss": 1.2077, "step": 133750 }, { "epoch": 3.9948645985728364, "grad_norm": 3.519653797149658, "learning_rate": 0.00016177292441671098, "loss": 1.2057, "step": 133800 }, { "epoch": 3.9963574478249186, "grad_norm": 4.229268550872803, "learning_rate": 0.00016175863862079212, "loss": 1.3109, "step": 133850 }, { "epoch": 3.997850297077001, "grad_norm": 3.772096633911133, "learning_rate": 0.0001617443528248733, "loss": 1.3218, "step": 133900 }, { "epoch": 3.9993431463290836, "grad_norm": 3.7670326232910156, "learning_rate": 0.00016173006702895445, "loss": 1.2882, "step": 133950 }, { "epoch": 4.000835995581166, "grad_norm": 4.392240524291992, "learning_rate": 0.00016171578123303564, "loss": 1.2225, "step": 134000 }, { "epoch": 4.002328844833249, "grad_norm": 4.192978382110596, "learning_rate": 0.00016170149543711678, "loss": 1.1469, "step": 134050 }, { "epoch": 4.003821694085331, "grad_norm": 4.702342987060547, "learning_rate": 0.00016168720964119797, "loss": 1.1452, "step": 134100 }, { "epoch": 4.0053145433374135, "grad_norm": 4.7361555099487305, "learning_rate": 0.00016167292384527913, "loss": 1.1871, "step": 134150 }, { "epoch": 4.006807392589496, "grad_norm": 3.4626450538635254, "learning_rate": 0.0001616586380493603, "loss": 1.159, "step": 134200 }, { "epoch": 4.008300241841579, "grad_norm": 4.5900654792785645, "learning_rate": 0.00016164435225344146, "loss": 1.2012, "step": 134250 }, { "epoch": 4.009793091093662, "grad_norm": 6.448458194732666, "learning_rate": 0.00016163006645752263, "loss": 1.1656, "step": 134300 }, { "epoch": 4.0112859403457435, "grad_norm": 4.073904991149902, "learning_rate": 0.0001616157806616038, "loss": 1.131, "step": 134350 }, { "epoch": 4.012778789597826, "grad_norm": 5.392414569854736, "learning_rate": 0.00016160149486568496, "loss": 1.1462, "step": 134400 }, { "epoch": 4.014271638849909, "grad_norm": 4.959211349487305, "learning_rate": 0.00016158720906976612, "loss": 1.1275, "step": 134450 }, { "epoch": 4.015764488101992, "grad_norm": 4.862436771392822, "learning_rate": 0.00016157292327384729, "loss": 1.1395, "step": 134500 }, { "epoch": 4.017257337354074, "grad_norm": 4.025382995605469, "learning_rate": 0.00016155863747792845, "loss": 1.1529, "step": 134550 }, { "epoch": 4.018750186606156, "grad_norm": 5.5243449211120605, "learning_rate": 0.00016154435168200961, "loss": 1.1552, "step": 134600 }, { "epoch": 4.020243035858239, "grad_norm": 5.48097038269043, "learning_rate": 0.00016153006588609078, "loss": 1.1542, "step": 134650 }, { "epoch": 4.0217358851103215, "grad_norm": 5.017364978790283, "learning_rate": 0.00016151578009017194, "loss": 1.1294, "step": 134700 }, { "epoch": 4.023228734362404, "grad_norm": 3.903317451477051, "learning_rate": 0.0001615014942942531, "loss": 1.1743, "step": 134750 }, { "epoch": 4.024721583614487, "grad_norm": 5.144746780395508, "learning_rate": 0.00016148720849833427, "loss": 1.184, "step": 134800 }, { "epoch": 4.026214432866569, "grad_norm": 3.201718330383301, "learning_rate": 0.00016147292270241547, "loss": 1.1294, "step": 134850 }, { "epoch": 4.0277072821186515, "grad_norm": 5.051429748535156, "learning_rate": 0.0001614586369064966, "loss": 1.1625, "step": 134900 }, { "epoch": 4.029200131370734, "grad_norm": 4.083528995513916, "learning_rate": 0.0001614443511105778, "loss": 1.1865, "step": 134950 }, { "epoch": 4.030692980622817, "grad_norm": 4.694931983947754, "learning_rate": 0.00016143006531465893, "loss": 1.1607, "step": 135000 }, { "epoch": 4.0321858298749, "grad_norm": 3.7577016353607178, "learning_rate": 0.00016141577951874012, "loss": 1.2009, "step": 135050 }, { "epoch": 4.033678679126981, "grad_norm": 3.913961887359619, "learning_rate": 0.00016140149372282126, "loss": 1.148, "step": 135100 }, { "epoch": 4.035171528379064, "grad_norm": 4.2388129234313965, "learning_rate": 0.00016138720792690245, "loss": 1.1673, "step": 135150 }, { "epoch": 4.036664377631147, "grad_norm": 4.703933238983154, "learning_rate": 0.00016137292213098362, "loss": 1.1249, "step": 135200 }, { "epoch": 4.0381572268832295, "grad_norm": 5.607854843139648, "learning_rate": 0.00016135863633506478, "loss": 1.1609, "step": 135250 }, { "epoch": 4.039650076135312, "grad_norm": 4.3828840255737305, "learning_rate": 0.00016134435053914595, "loss": 1.1788, "step": 135300 }, { "epoch": 4.041142925387394, "grad_norm": 5.778615474700928, "learning_rate": 0.0001613300647432271, "loss": 1.2101, "step": 135350 }, { "epoch": 4.042635774639477, "grad_norm": 4.011424541473389, "learning_rate": 0.00016131577894730828, "loss": 1.1991, "step": 135400 }, { "epoch": 4.0441286238915595, "grad_norm": 5.844552993774414, "learning_rate": 0.00016130149315138944, "loss": 1.2172, "step": 135450 }, { "epoch": 4.045621473143642, "grad_norm": 4.851437091827393, "learning_rate": 0.0001612872073554706, "loss": 1.2379, "step": 135500 }, { "epoch": 4.047114322395725, "grad_norm": 6.846874713897705, "learning_rate": 0.00016127292155955177, "loss": 1.1666, "step": 135550 }, { "epoch": 4.048607171647807, "grad_norm": 3.9584429264068604, "learning_rate": 0.00016125863576363294, "loss": 1.191, "step": 135600 }, { "epoch": 4.050100020899889, "grad_norm": 3.547799587249756, "learning_rate": 0.00016124434996771413, "loss": 1.234, "step": 135650 }, { "epoch": 4.051592870151972, "grad_norm": 4.4900641441345215, "learning_rate": 0.00016123006417179526, "loss": 1.1759, "step": 135700 }, { "epoch": 4.053085719404055, "grad_norm": 4.560009479522705, "learning_rate": 0.00016121577837587646, "loss": 1.1759, "step": 135750 }, { "epoch": 4.0545785686561375, "grad_norm": 4.02714204788208, "learning_rate": 0.0001612014925799576, "loss": 1.1788, "step": 135800 }, { "epoch": 4.056071417908219, "grad_norm": 4.854091644287109, "learning_rate": 0.00016118720678403879, "loss": 1.1621, "step": 135850 }, { "epoch": 4.057564267160302, "grad_norm": 4.28786563873291, "learning_rate": 0.00016117292098811992, "loss": 1.1694, "step": 135900 }, { "epoch": 4.059057116412385, "grad_norm": 5.054505825042725, "learning_rate": 0.00016115863519220111, "loss": 1.2124, "step": 135950 }, { "epoch": 4.0605499656644675, "grad_norm": 4.706223011016846, "learning_rate": 0.00016114434939628228, "loss": 1.1867, "step": 136000 }, { "epoch": 4.06204281491655, "grad_norm": 3.443118095397949, "learning_rate": 0.00016113006360036344, "loss": 1.1398, "step": 136050 }, { "epoch": 4.063535664168632, "grad_norm": 5.108642578125, "learning_rate": 0.0001611157778044446, "loss": 1.1902, "step": 136100 }, { "epoch": 4.065028513420715, "grad_norm": 5.258908271789551, "learning_rate": 0.00016110149200852577, "loss": 1.2093, "step": 136150 }, { "epoch": 4.066521362672797, "grad_norm": 3.8509016036987305, "learning_rate": 0.00016108720621260694, "loss": 1.1354, "step": 136200 }, { "epoch": 4.06801421192488, "grad_norm": 5.995665073394775, "learning_rate": 0.0001610729204166881, "loss": 1.1516, "step": 136250 }, { "epoch": 4.069507061176962, "grad_norm": 4.4564738273620605, "learning_rate": 0.00016105863462076927, "loss": 1.1405, "step": 136300 }, { "epoch": 4.070999910429045, "grad_norm": 7.378493309020996, "learning_rate": 0.00016104434882485043, "loss": 1.1817, "step": 136350 }, { "epoch": 4.072492759681127, "grad_norm": 4.1566009521484375, "learning_rate": 0.0001610300630289316, "loss": 1.1988, "step": 136400 }, { "epoch": 4.07398560893321, "grad_norm": 4.735561370849609, "learning_rate": 0.0001610157772330128, "loss": 1.1847, "step": 136450 }, { "epoch": 4.075478458185293, "grad_norm": 6.040157318115234, "learning_rate": 0.00016100149143709393, "loss": 1.1762, "step": 136500 }, { "epoch": 4.076971307437375, "grad_norm": 4.695454120635986, "learning_rate": 0.00016098720564117512, "loss": 1.2151, "step": 136550 }, { "epoch": 4.078464156689457, "grad_norm": 5.111282825469971, "learning_rate": 0.00016097291984525626, "loss": 1.2581, "step": 136600 }, { "epoch": 4.07995700594154, "grad_norm": 6.601406574249268, "learning_rate": 0.00016095863404933745, "loss": 1.1902, "step": 136650 }, { "epoch": 4.081449855193623, "grad_norm": 4.173478126525879, "learning_rate": 0.00016094434825341858, "loss": 1.1809, "step": 136700 }, { "epoch": 4.082942704445705, "grad_norm": 4.189050674438477, "learning_rate": 0.00016093006245749978, "loss": 1.1837, "step": 136750 }, { "epoch": 4.084435553697787, "grad_norm": 4.181232929229736, "learning_rate": 0.00016091577666158094, "loss": 1.124, "step": 136800 }, { "epoch": 4.08592840294987, "grad_norm": 6.015085697174072, "learning_rate": 0.0001609014908656621, "loss": 1.2253, "step": 136850 }, { "epoch": 4.087421252201953, "grad_norm": 4.773750305175781, "learning_rate": 0.00016088720506974327, "loss": 1.1498, "step": 136900 }, { "epoch": 4.088914101454035, "grad_norm": 4.635052680969238, "learning_rate": 0.00016087291927382443, "loss": 1.1946, "step": 136950 }, { "epoch": 4.090406950706118, "grad_norm": 3.5755093097686768, "learning_rate": 0.0001608586334779056, "loss": 1.178, "step": 137000 }, { "epoch": 4.0918997999582, "grad_norm": 5.496055603027344, "learning_rate": 0.00016084434768198676, "loss": 1.1409, "step": 137050 }, { "epoch": 4.093392649210283, "grad_norm": 4.117304801940918, "learning_rate": 0.00016083006188606793, "loss": 1.2, "step": 137100 }, { "epoch": 4.094885498462365, "grad_norm": 5.538839817047119, "learning_rate": 0.0001608157760901491, "loss": 1.1778, "step": 137150 }, { "epoch": 4.096378347714448, "grad_norm": 4.6166558265686035, "learning_rate": 0.00016080149029423026, "loss": 1.1797, "step": 137200 }, { "epoch": 4.097871196966531, "grad_norm": 4.904277324676514, "learning_rate": 0.00016078720449831142, "loss": 1.1951, "step": 137250 }, { "epoch": 4.0993640462186125, "grad_norm": 4.139499664306641, "learning_rate": 0.0001607729187023926, "loss": 1.1909, "step": 137300 }, { "epoch": 4.100856895470695, "grad_norm": 4.454082489013672, "learning_rate": 0.00016075863290647375, "loss": 1.1944, "step": 137350 }, { "epoch": 4.102349744722778, "grad_norm": 6.1854963302612305, "learning_rate": 0.00016074434711055492, "loss": 1.1934, "step": 137400 }, { "epoch": 4.103842593974861, "grad_norm": 3.4759559631347656, "learning_rate": 0.00016073006131463608, "loss": 1.1489, "step": 137450 }, { "epoch": 4.105335443226943, "grad_norm": 5.522613525390625, "learning_rate": 0.00016071577551871727, "loss": 1.1765, "step": 137500 }, { "epoch": 4.106828292479025, "grad_norm": 4.545987606048584, "learning_rate": 0.0001607014897227984, "loss": 1.2346, "step": 137550 }, { "epoch": 4.108321141731108, "grad_norm": 3.9791605472564697, "learning_rate": 0.0001606872039268796, "loss": 1.1124, "step": 137600 }, { "epoch": 4.109813990983191, "grad_norm": 5.259634971618652, "learning_rate": 0.00016067291813096074, "loss": 1.2323, "step": 137650 }, { "epoch": 4.111306840235273, "grad_norm": 3.549207925796509, "learning_rate": 0.00016065863233504193, "loss": 1.2269, "step": 137700 }, { "epoch": 4.112799689487356, "grad_norm": 4.890096664428711, "learning_rate": 0.00016064434653912307, "loss": 1.142, "step": 137750 }, { "epoch": 4.114292538739438, "grad_norm": 3.7870566844940186, "learning_rate": 0.00016063006074320426, "loss": 1.1446, "step": 137800 }, { "epoch": 4.1157853879915205, "grad_norm": 5.0174689292907715, "learning_rate": 0.00016061577494728543, "loss": 1.2022, "step": 137850 }, { "epoch": 4.117278237243603, "grad_norm": 4.542319297790527, "learning_rate": 0.0001606014891513666, "loss": 1.1809, "step": 137900 }, { "epoch": 4.118771086495686, "grad_norm": 5.590012073516846, "learning_rate": 0.00016058720335544776, "loss": 1.1811, "step": 137950 }, { "epoch": 4.120263935747769, "grad_norm": 4.569715976715088, "learning_rate": 0.00016057291755952892, "loss": 1.1715, "step": 138000 }, { "epoch": 4.12175678499985, "grad_norm": 3.725158214569092, "learning_rate": 0.00016055863176361008, "loss": 1.1825, "step": 138050 }, { "epoch": 4.123249634251933, "grad_norm": 3.941608428955078, "learning_rate": 0.00016054434596769125, "loss": 1.1482, "step": 138100 }, { "epoch": 4.124742483504016, "grad_norm": 4.545751094818115, "learning_rate": 0.00016053006017177241, "loss": 1.2089, "step": 138150 }, { "epoch": 4.1262353327560986, "grad_norm": 6.809751033782959, "learning_rate": 0.00016051577437585358, "loss": 1.1664, "step": 138200 }, { "epoch": 4.12772818200818, "grad_norm": 5.444116592407227, "learning_rate": 0.00016050148857993474, "loss": 1.2127, "step": 138250 }, { "epoch": 4.129221031260263, "grad_norm": 4.276831150054932, "learning_rate": 0.00016048720278401593, "loss": 1.2138, "step": 138300 }, { "epoch": 4.130713880512346, "grad_norm": 3.9716556072235107, "learning_rate": 0.00016047291698809707, "loss": 1.2237, "step": 138350 }, { "epoch": 4.1322067297644285, "grad_norm": 3.9793570041656494, "learning_rate": 0.00016045863119217826, "loss": 1.1997, "step": 138400 }, { "epoch": 4.133699579016511, "grad_norm": 4.590352535247803, "learning_rate": 0.0001604443453962594, "loss": 1.1631, "step": 138450 }, { "epoch": 4.135192428268593, "grad_norm": 3.9549121856689453, "learning_rate": 0.0001604300596003406, "loss": 1.1518, "step": 138500 }, { "epoch": 4.136685277520676, "grad_norm": 5.076075077056885, "learning_rate": 0.00016041577380442173, "loss": 1.1806, "step": 138550 }, { "epoch": 4.138178126772758, "grad_norm": 6.228057384490967, "learning_rate": 0.00016040148800850292, "loss": 1.1547, "step": 138600 }, { "epoch": 4.139670976024841, "grad_norm": 4.276813507080078, "learning_rate": 0.0001603872022125841, "loss": 1.1351, "step": 138650 }, { "epoch": 4.141163825276924, "grad_norm": 5.215205669403076, "learning_rate": 0.00016037291641666525, "loss": 1.2021, "step": 138700 }, { "epoch": 4.142656674529006, "grad_norm": 4.386962413787842, "learning_rate": 0.00016035863062074642, "loss": 1.2346, "step": 138750 }, { "epoch": 4.144149523781088, "grad_norm": 4.739329814910889, "learning_rate": 0.00016034434482482758, "loss": 1.1971, "step": 138800 }, { "epoch": 4.145642373033171, "grad_norm": 3.994267225265503, "learning_rate": 0.00016033005902890875, "loss": 1.2259, "step": 138850 }, { "epoch": 4.147135222285254, "grad_norm": 4.481820106506348, "learning_rate": 0.0001603157732329899, "loss": 1.2428, "step": 138900 }, { "epoch": 4.1486280715373365, "grad_norm": 5.454226016998291, "learning_rate": 0.00016030148743707108, "loss": 1.1596, "step": 138950 }, { "epoch": 4.150120920789418, "grad_norm": 5.072449684143066, "learning_rate": 0.00016028720164115224, "loss": 1.1755, "step": 139000 }, { "epoch": 4.151613770041501, "grad_norm": 4.940500259399414, "learning_rate": 0.0001602729158452334, "loss": 1.1765, "step": 139050 }, { "epoch": 4.153106619293584, "grad_norm": 4.294040203094482, "learning_rate": 0.0001602586300493146, "loss": 1.1638, "step": 139100 }, { "epoch": 4.154599468545666, "grad_norm": 3.98103928565979, "learning_rate": 0.00016024434425339573, "loss": 1.192, "step": 139150 }, { "epoch": 4.156092317797749, "grad_norm": 5.18140983581543, "learning_rate": 0.00016023005845747693, "loss": 1.2121, "step": 139200 }, { "epoch": 4.157585167049831, "grad_norm": 4.342568874359131, "learning_rate": 0.00016021577266155806, "loss": 1.2056, "step": 139250 }, { "epoch": 4.159078016301914, "grad_norm": 4.284645080566406, "learning_rate": 0.00016020148686563926, "loss": 1.1864, "step": 139300 }, { "epoch": 4.160570865553996, "grad_norm": 5.542572498321533, "learning_rate": 0.0001601872010697204, "loss": 1.1903, "step": 139350 }, { "epoch": 4.162063714806079, "grad_norm": 4.752786159515381, "learning_rate": 0.00016017291527380158, "loss": 1.2012, "step": 139400 }, { "epoch": 4.163556564058162, "grad_norm": 4.298015117645264, "learning_rate": 0.00016015862947788275, "loss": 1.2108, "step": 139450 }, { "epoch": 4.165049413310244, "grad_norm": 4.048568248748779, "learning_rate": 0.00016014434368196391, "loss": 1.2191, "step": 139500 }, { "epoch": 4.166542262562326, "grad_norm": 4.178681373596191, "learning_rate": 0.00016013005788604508, "loss": 1.1892, "step": 139550 }, { "epoch": 4.168035111814409, "grad_norm": 6.201132297515869, "learning_rate": 0.00016011577209012622, "loss": 1.1917, "step": 139600 }, { "epoch": 4.169527961066492, "grad_norm": 4.938151836395264, "learning_rate": 0.0001601014862942074, "loss": 1.1954, "step": 139650 }, { "epoch": 4.171020810318574, "grad_norm": 6.060943126678467, "learning_rate": 0.00016008720049828855, "loss": 1.1471, "step": 139700 }, { "epoch": 4.172513659570656, "grad_norm": 5.0269880294799805, "learning_rate": 0.00016007291470236974, "loss": 1.1868, "step": 139750 }, { "epoch": 4.174006508822739, "grad_norm": 4.509561538696289, "learning_rate": 0.0001600586289064509, "loss": 1.1906, "step": 139800 }, { "epoch": 4.175499358074822, "grad_norm": 4.10978364944458, "learning_rate": 0.00016004434311053207, "loss": 1.1515, "step": 139850 }, { "epoch": 4.176992207326904, "grad_norm": 3.7452332973480225, "learning_rate": 0.00016003005731461323, "loss": 1.1807, "step": 139900 }, { "epoch": 4.178485056578987, "grad_norm": 4.911968231201172, "learning_rate": 0.0001600157715186944, "loss": 1.1568, "step": 139950 }, { "epoch": 4.179977905831069, "grad_norm": 4.606587886810303, "learning_rate": 0.00016000148572277556, "loss": 1.2099, "step": 140000 }, { "epoch": 4.181470755083152, "grad_norm": 4.736166954040527, "learning_rate": 0.00015998719992685672, "loss": 1.2101, "step": 140050 }, { "epoch": 4.182963604335234, "grad_norm": 4.431340217590332, "learning_rate": 0.0001599729141309379, "loss": 1.1885, "step": 140100 }, { "epoch": 4.184456453587317, "grad_norm": 4.86411190032959, "learning_rate": 0.00015995862833501905, "loss": 1.2, "step": 140150 }, { "epoch": 4.1859493028394, "grad_norm": 4.4166951179504395, "learning_rate": 0.00015994434253910022, "loss": 1.1717, "step": 140200 }, { "epoch": 4.1874421520914815, "grad_norm": 4.2804856300354, "learning_rate": 0.0001599300567431814, "loss": 1.1695, "step": 140250 }, { "epoch": 4.188935001343564, "grad_norm": 3.3184804916381836, "learning_rate": 0.00015991577094726255, "loss": 1.1864, "step": 140300 }, { "epoch": 4.190427850595647, "grad_norm": 3.712420701980591, "learning_rate": 0.00015990148515134374, "loss": 1.2254, "step": 140350 }, { "epoch": 4.19192069984773, "grad_norm": 4.40815544128418, "learning_rate": 0.00015988719935542488, "loss": 1.1645, "step": 140400 }, { "epoch": 4.193413549099812, "grad_norm": 4.534137725830078, "learning_rate": 0.00015987291355950607, "loss": 1.2137, "step": 140450 }, { "epoch": 4.194906398351894, "grad_norm": 3.4060471057891846, "learning_rate": 0.00015985862776358723, "loss": 1.1804, "step": 140500 }, { "epoch": 4.196399247603977, "grad_norm": 3.3433594703674316, "learning_rate": 0.0001598443419676684, "loss": 1.1801, "step": 140550 }, { "epoch": 4.19789209685606, "grad_norm": 4.448024272918701, "learning_rate": 0.00015983005617174956, "loss": 1.2327, "step": 140600 }, { "epoch": 4.199384946108142, "grad_norm": 4.073503494262695, "learning_rate": 0.00015981577037583073, "loss": 1.2058, "step": 140650 }, { "epoch": 4.200877795360224, "grad_norm": 4.200987339019775, "learning_rate": 0.0001598014845799119, "loss": 1.166, "step": 140700 }, { "epoch": 4.202370644612307, "grad_norm": 6.871157169342041, "learning_rate": 0.00015978719878399306, "loss": 1.1599, "step": 140750 }, { "epoch": 4.2038634938643895, "grad_norm": 6.4246745109558105, "learning_rate": 0.00015977291298807422, "loss": 1.2119, "step": 140800 }, { "epoch": 4.205356343116472, "grad_norm": 4.552119731903076, "learning_rate": 0.0001597586271921554, "loss": 1.2171, "step": 140850 }, { "epoch": 4.206849192368555, "grad_norm": 4.876185894012451, "learning_rate": 0.00015974434139623655, "loss": 1.2087, "step": 140900 }, { "epoch": 4.208342041620637, "grad_norm": 5.435263633728027, "learning_rate": 0.00015973005560031774, "loss": 1.1805, "step": 140950 }, { "epoch": 4.2098348908727194, "grad_norm": 6.356777191162109, "learning_rate": 0.00015971576980439888, "loss": 1.1849, "step": 141000 }, { "epoch": 4.211327740124802, "grad_norm": 5.153776168823242, "learning_rate": 0.00015970148400848007, "loss": 1.1769, "step": 141050 }, { "epoch": 4.212820589376885, "grad_norm": 3.6175692081451416, "learning_rate": 0.0001596871982125612, "loss": 1.1857, "step": 141100 }, { "epoch": 4.214313438628968, "grad_norm": 3.5825202465057373, "learning_rate": 0.0001596729124166424, "loss": 1.2036, "step": 141150 }, { "epoch": 4.215806287881049, "grad_norm": 4.836036205291748, "learning_rate": 0.00015965862662072354, "loss": 1.2118, "step": 141200 }, { "epoch": 4.217299137133132, "grad_norm": 4.852596759796143, "learning_rate": 0.00015964434082480473, "loss": 1.1825, "step": 141250 }, { "epoch": 4.218791986385215, "grad_norm": 6.009531497955322, "learning_rate": 0.0001596300550288859, "loss": 1.2429, "step": 141300 }, { "epoch": 4.2202848356372975, "grad_norm": 4.5213727951049805, "learning_rate": 0.00015961576923296706, "loss": 1.1961, "step": 141350 }, { "epoch": 4.22177768488938, "grad_norm": 4.069146633148193, "learning_rate": 0.00015960148343704822, "loss": 1.214, "step": 141400 }, { "epoch": 4.223270534141462, "grad_norm": 4.735863208770752, "learning_rate": 0.0001595871976411294, "loss": 1.2156, "step": 141450 }, { "epoch": 4.224763383393545, "grad_norm": 4.679515361785889, "learning_rate": 0.00015957291184521055, "loss": 1.1919, "step": 141500 }, { "epoch": 4.226256232645627, "grad_norm": 4.299509048461914, "learning_rate": 0.00015955862604929172, "loss": 1.198, "step": 141550 }, { "epoch": 4.22774908189771, "grad_norm": 4.518818378448486, "learning_rate": 0.00015954434025337288, "loss": 1.1918, "step": 141600 }, { "epoch": 4.229241931149793, "grad_norm": 2.8362114429473877, "learning_rate": 0.00015953005445745405, "loss": 1.1891, "step": 141650 }, { "epoch": 4.230734780401875, "grad_norm": 5.10905647277832, "learning_rate": 0.0001595157686615352, "loss": 1.2033, "step": 141700 }, { "epoch": 4.232227629653957, "grad_norm": 5.38252592086792, "learning_rate": 0.0001595014828656164, "loss": 1.2235, "step": 141750 }, { "epoch": 4.23372047890604, "grad_norm": 4.693549156188965, "learning_rate": 0.00015948719706969754, "loss": 1.2876, "step": 141800 }, { "epoch": 4.235213328158123, "grad_norm": 5.683078289031982, "learning_rate": 0.00015947291127377873, "loss": 1.175, "step": 141850 }, { "epoch": 4.2367061774102055, "grad_norm": 4.098508358001709, "learning_rate": 0.00015945862547785987, "loss": 1.2489, "step": 141900 }, { "epoch": 4.238199026662287, "grad_norm": 4.055385589599609, "learning_rate": 0.00015944433968194106, "loss": 1.1783, "step": 141950 }, { "epoch": 4.23969187591437, "grad_norm": 4.988245010375977, "learning_rate": 0.0001594300538860222, "loss": 1.1549, "step": 142000 }, { "epoch": 4.241184725166453, "grad_norm": 4.256673812866211, "learning_rate": 0.0001594157680901034, "loss": 1.2084, "step": 142050 }, { "epoch": 4.242677574418535, "grad_norm": 5.259324550628662, "learning_rate": 0.00015940148229418456, "loss": 1.1815, "step": 142100 }, { "epoch": 4.244170423670618, "grad_norm": 5.063663005828857, "learning_rate": 0.00015938719649826572, "loss": 1.1878, "step": 142150 }, { "epoch": 4.2456632729227, "grad_norm": 3.863372325897217, "learning_rate": 0.00015937291070234689, "loss": 1.1603, "step": 142200 }, { "epoch": 4.247156122174783, "grad_norm": 4.990203380584717, "learning_rate": 0.00015935862490642802, "loss": 1.2208, "step": 142250 }, { "epoch": 4.248648971426865, "grad_norm": 3.7190535068511963, "learning_rate": 0.00015934433911050922, "loss": 1.2135, "step": 142300 }, { "epoch": 4.250141820678948, "grad_norm": 6.787582874298096, "learning_rate": 0.00015933005331459035, "loss": 1.1978, "step": 142350 }, { "epoch": 4.251634669931031, "grad_norm": 4.888009548187256, "learning_rate": 0.00015931576751867155, "loss": 1.2557, "step": 142400 }, { "epoch": 4.253127519183113, "grad_norm": 4.288732528686523, "learning_rate": 0.0001593014817227527, "loss": 1.2057, "step": 142450 }, { "epoch": 4.254620368435195, "grad_norm": 4.282674789428711, "learning_rate": 0.00015928719592683387, "loss": 1.1783, "step": 142500 }, { "epoch": 4.256113217687278, "grad_norm": 4.2092719078063965, "learning_rate": 0.00015927291013091504, "loss": 1.214, "step": 142550 }, { "epoch": 4.257606066939361, "grad_norm": 3.5486562252044678, "learning_rate": 0.0001592586243349962, "loss": 1.1579, "step": 142600 }, { "epoch": 4.2590989161914425, "grad_norm": 4.74271821975708, "learning_rate": 0.00015924433853907737, "loss": 1.145, "step": 142650 }, { "epoch": 4.260591765443525, "grad_norm": 5.327475070953369, "learning_rate": 0.00015923005274315853, "loss": 1.1631, "step": 142700 }, { "epoch": 4.262084614695608, "grad_norm": 4.441630840301514, "learning_rate": 0.0001592157669472397, "loss": 1.1797, "step": 142750 }, { "epoch": 4.263577463947691, "grad_norm": 4.510640621185303, "learning_rate": 0.00015920148115132086, "loss": 1.1697, "step": 142800 }, { "epoch": 4.265070313199773, "grad_norm": 4.31403112411499, "learning_rate": 0.00015918719535540203, "loss": 1.2071, "step": 142850 }, { "epoch": 4.266563162451856, "grad_norm": 4.649517059326172, "learning_rate": 0.00015917290955948322, "loss": 1.1932, "step": 142900 }, { "epoch": 4.268056011703938, "grad_norm": 3.873218059539795, "learning_rate": 0.00015915862376356436, "loss": 1.1632, "step": 142950 }, { "epoch": 4.269548860956021, "grad_norm": 4.308788776397705, "learning_rate": 0.00015914433796764555, "loss": 1.2257, "step": 143000 }, { "epoch": 4.271041710208103, "grad_norm": 5.109141826629639, "learning_rate": 0.00015913005217172669, "loss": 1.2442, "step": 143050 }, { "epoch": 4.272534559460186, "grad_norm": 3.967876434326172, "learning_rate": 0.00015911576637580788, "loss": 1.2319, "step": 143100 }, { "epoch": 4.274027408712268, "grad_norm": 5.368978500366211, "learning_rate": 0.00015910148057988904, "loss": 1.1588, "step": 143150 }, { "epoch": 4.2755202579643505, "grad_norm": 5.190979480743408, "learning_rate": 0.0001590871947839702, "loss": 1.2443, "step": 143200 }, { "epoch": 4.277013107216433, "grad_norm": 3.6689977645874023, "learning_rate": 0.00015907290898805137, "loss": 1.2022, "step": 143250 }, { "epoch": 4.278505956468516, "grad_norm": 4.946050643920898, "learning_rate": 0.00015905862319213254, "loss": 1.2022, "step": 143300 }, { "epoch": 4.279998805720599, "grad_norm": 3.930406093597412, "learning_rate": 0.0001590443373962137, "loss": 1.2069, "step": 143350 }, { "epoch": 4.2814916549726805, "grad_norm": 4.970086574554443, "learning_rate": 0.00015903005160029487, "loss": 1.1877, "step": 143400 }, { "epoch": 4.282984504224763, "grad_norm": 4.933574199676514, "learning_rate": 0.00015901576580437603, "loss": 1.2037, "step": 143450 }, { "epoch": 4.284477353476846, "grad_norm": 5.583881378173828, "learning_rate": 0.0001590014800084572, "loss": 1.2082, "step": 143500 }, { "epoch": 4.285970202728929, "grad_norm": 3.963679552078247, "learning_rate": 0.00015898719421253836, "loss": 1.176, "step": 143550 }, { "epoch": 4.287463051981011, "grad_norm": 3.982883930206299, "learning_rate": 0.00015897290841661952, "loss": 1.2513, "step": 143600 }, { "epoch": 4.288955901233093, "grad_norm": 3.9288153648376465, "learning_rate": 0.0001589586226207007, "loss": 1.187, "step": 143650 }, { "epoch": 4.290448750485176, "grad_norm": 5.109147071838379, "learning_rate": 0.00015894433682478188, "loss": 1.2322, "step": 143700 }, { "epoch": 4.2919415997372585, "grad_norm": 7.246605396270752, "learning_rate": 0.00015893005102886302, "loss": 1.1822, "step": 143750 }, { "epoch": 4.293434448989341, "grad_norm": 4.197552680969238, "learning_rate": 0.0001589157652329442, "loss": 1.2281, "step": 143800 }, { "epoch": 4.294927298241424, "grad_norm": 6.113961696624756, "learning_rate": 0.00015890147943702535, "loss": 1.1814, "step": 143850 }, { "epoch": 4.296420147493506, "grad_norm": 6.120628356933594, "learning_rate": 0.00015888719364110654, "loss": 1.1497, "step": 143900 }, { "epoch": 4.2979129967455885, "grad_norm": 5.152296543121338, "learning_rate": 0.0001588729078451877, "loss": 1.2277, "step": 143950 }, { "epoch": 4.299405845997671, "grad_norm": 4.7058000564575195, "learning_rate": 0.00015885862204926887, "loss": 1.1586, "step": 144000 }, { "epoch": 4.300898695249754, "grad_norm": 4.757035732269287, "learning_rate": 0.00015884433625335003, "loss": 1.1701, "step": 144050 }, { "epoch": 4.302391544501837, "grad_norm": 5.381356239318848, "learning_rate": 0.0001588300504574312, "loss": 1.2242, "step": 144100 }, { "epoch": 4.303884393753918, "grad_norm": 4.9179511070251465, "learning_rate": 0.00015881576466151236, "loss": 1.2313, "step": 144150 }, { "epoch": 4.305377243006001, "grad_norm": 4.601654529571533, "learning_rate": 0.00015880147886559353, "loss": 1.2054, "step": 144200 }, { "epoch": 4.306870092258084, "grad_norm": 4.032548904418945, "learning_rate": 0.0001587871930696747, "loss": 1.2266, "step": 144250 }, { "epoch": 4.3083629415101665, "grad_norm": 4.067842960357666, "learning_rate": 0.00015877290727375586, "loss": 1.1795, "step": 144300 }, { "epoch": 4.309855790762249, "grad_norm": 4.99582576751709, "learning_rate": 0.00015875862147783702, "loss": 1.2216, "step": 144350 }, { "epoch": 4.311348640014331, "grad_norm": 6.543945789337158, "learning_rate": 0.0001587443356819182, "loss": 1.1862, "step": 144400 }, { "epoch": 4.312841489266414, "grad_norm": 4.526428699493408, "learning_rate": 0.00015873004988599935, "loss": 1.1575, "step": 144450 }, { "epoch": 4.3143343385184965, "grad_norm": 5.83212947845459, "learning_rate": 0.00015871576409008054, "loss": 1.2166, "step": 144500 }, { "epoch": 4.315827187770579, "grad_norm": 3.717582941055298, "learning_rate": 0.00015870147829416168, "loss": 1.2081, "step": 144550 }, { "epoch": 4.317320037022661, "grad_norm": 4.536131381988525, "learning_rate": 0.00015868719249824287, "loss": 1.2142, "step": 144600 }, { "epoch": 4.318812886274744, "grad_norm": 4.8904290199279785, "learning_rate": 0.000158672906702324, "loss": 1.2321, "step": 144650 }, { "epoch": 4.320305735526826, "grad_norm": 3.6146597862243652, "learning_rate": 0.0001586586209064052, "loss": 1.1443, "step": 144700 }, { "epoch": 4.321798584778909, "grad_norm": 4.939590930938721, "learning_rate": 0.00015864433511048637, "loss": 1.2164, "step": 144750 }, { "epoch": 4.323291434030992, "grad_norm": 3.37874698638916, "learning_rate": 0.00015863004931456753, "loss": 1.2104, "step": 144800 }, { "epoch": 4.3247842832830745, "grad_norm": 4.682936191558838, "learning_rate": 0.0001586157635186487, "loss": 1.2236, "step": 144850 }, { "epoch": 4.326277132535156, "grad_norm": 3.446786880493164, "learning_rate": 0.00015860147772272983, "loss": 1.1452, "step": 144900 }, { "epoch": 4.327769981787239, "grad_norm": 3.864423990249634, "learning_rate": 0.00015858719192681102, "loss": 1.1497, "step": 144950 }, { "epoch": 4.329262831039322, "grad_norm": 5.804245471954346, "learning_rate": 0.00015857290613089216, "loss": 1.2297, "step": 145000 }, { "epoch": 4.3307556802914045, "grad_norm": 6.271811485290527, "learning_rate": 0.00015855862033497335, "loss": 1.1358, "step": 145050 }, { "epoch": 4.332248529543486, "grad_norm": 5.019343852996826, "learning_rate": 0.00015854433453905452, "loss": 1.2191, "step": 145100 }, { "epoch": 4.333741378795569, "grad_norm": 7.10085916519165, "learning_rate": 0.00015853004874313568, "loss": 1.1678, "step": 145150 }, { "epoch": 4.335234228047652, "grad_norm": 4.680956840515137, "learning_rate": 0.00015851576294721685, "loss": 1.155, "step": 145200 }, { "epoch": 4.336727077299734, "grad_norm": 4.227858066558838, "learning_rate": 0.000158501477151298, "loss": 1.1563, "step": 145250 }, { "epoch": 4.338219926551817, "grad_norm": 6.304920196533203, "learning_rate": 0.00015848719135537918, "loss": 1.2083, "step": 145300 }, { "epoch": 4.339712775803899, "grad_norm": 3.7268805503845215, "learning_rate": 0.00015847290555946034, "loss": 1.2171, "step": 145350 }, { "epoch": 4.341205625055982, "grad_norm": 3.7859396934509277, "learning_rate": 0.0001584586197635415, "loss": 1.198, "step": 145400 }, { "epoch": 4.342698474308064, "grad_norm": 4.7007246017456055, "learning_rate": 0.00015844433396762267, "loss": 1.2418, "step": 145450 }, { "epoch": 4.344191323560147, "grad_norm": 4.652912616729736, "learning_rate": 0.00015843004817170383, "loss": 1.2408, "step": 145500 }, { "epoch": 4.34568417281223, "grad_norm": 5.060214519500732, "learning_rate": 0.00015841576237578503, "loss": 1.1922, "step": 145550 }, { "epoch": 4.347177022064312, "grad_norm": 4.346338272094727, "learning_rate": 0.00015840147657986616, "loss": 1.223, "step": 145600 }, { "epoch": 4.348669871316394, "grad_norm": 3.7870049476623535, "learning_rate": 0.00015838719078394736, "loss": 1.205, "step": 145650 }, { "epoch": 4.350162720568477, "grad_norm": 4.397316932678223, "learning_rate": 0.0001583729049880285, "loss": 1.1928, "step": 145700 }, { "epoch": 4.35165556982056, "grad_norm": 4.058841228485107, "learning_rate": 0.00015835861919210969, "loss": 1.2002, "step": 145750 }, { "epoch": 4.353148419072642, "grad_norm": 4.240140914916992, "learning_rate": 0.00015834433339619082, "loss": 1.1746, "step": 145800 }, { "epoch": 4.354641268324724, "grad_norm": 4.605095863342285, "learning_rate": 0.00015833004760027201, "loss": 1.1907, "step": 145850 }, { "epoch": 4.356134117576807, "grad_norm": 4.193150997161865, "learning_rate": 0.00015831576180435318, "loss": 1.2754, "step": 145900 }, { "epoch": 4.35762696682889, "grad_norm": 4.516674995422363, "learning_rate": 0.00015830147600843434, "loss": 1.2, "step": 145950 }, { "epoch": 4.359119816080972, "grad_norm": 4.528346538543701, "learning_rate": 0.0001582871902125155, "loss": 1.2143, "step": 146000 }, { "epoch": 4.360612665333055, "grad_norm": 4.620771408081055, "learning_rate": 0.00015827290441659667, "loss": 1.2008, "step": 146050 }, { "epoch": 4.362105514585137, "grad_norm": 5.53831148147583, "learning_rate": 0.00015825861862067784, "loss": 1.1736, "step": 146100 }, { "epoch": 4.3635983638372196, "grad_norm": 5.173369884490967, "learning_rate": 0.000158244332824759, "loss": 1.2242, "step": 146150 }, { "epoch": 4.365091213089302, "grad_norm": 4.372010231018066, "learning_rate": 0.00015823004702884017, "loss": 1.1915, "step": 146200 }, { "epoch": 4.366584062341385, "grad_norm": 6.224375247955322, "learning_rate": 0.00015821576123292133, "loss": 1.2133, "step": 146250 }, { "epoch": 4.368076911593468, "grad_norm": 8.175760269165039, "learning_rate": 0.0001582014754370025, "loss": 1.208, "step": 146300 }, { "epoch": 4.3695697608455495, "grad_norm": 4.286147594451904, "learning_rate": 0.0001581871896410837, "loss": 1.2283, "step": 146350 }, { "epoch": 4.371062610097632, "grad_norm": 5.579278945922852, "learning_rate": 0.00015817290384516483, "loss": 1.1817, "step": 146400 }, { "epoch": 4.372555459349715, "grad_norm": 3.485745668411255, "learning_rate": 0.00015815861804924602, "loss": 1.2079, "step": 146450 }, { "epoch": 4.374048308601798, "grad_norm": 5.126974582672119, "learning_rate": 0.00015814433225332716, "loss": 1.1943, "step": 146500 }, { "epoch": 4.37554115785388, "grad_norm": 5.43192720413208, "learning_rate": 0.00015813004645740835, "loss": 1.1759, "step": 146550 }, { "epoch": 4.377034007105962, "grad_norm": 5.335154056549072, "learning_rate": 0.0001581157606614895, "loss": 1.2406, "step": 146600 }, { "epoch": 4.378526856358045, "grad_norm": 4.4820756912231445, "learning_rate": 0.00015810147486557068, "loss": 1.2259, "step": 146650 }, { "epoch": 4.3800197056101275, "grad_norm": 4.541141986846924, "learning_rate": 0.00015808718906965184, "loss": 1.2041, "step": 146700 }, { "epoch": 4.38151255486221, "grad_norm": 5.367811679840088, "learning_rate": 0.000158072903273733, "loss": 1.1894, "step": 146750 }, { "epoch": 4.383005404114293, "grad_norm": 3.965756893157959, "learning_rate": 0.00015805861747781417, "loss": 1.2322, "step": 146800 }, { "epoch": 4.384498253366375, "grad_norm": 5.187005519866943, "learning_rate": 0.00015804433168189533, "loss": 1.2183, "step": 146850 }, { "epoch": 4.3859911026184575, "grad_norm": 4.8554205894470215, "learning_rate": 0.0001580300458859765, "loss": 1.2169, "step": 146900 }, { "epoch": 4.38748395187054, "grad_norm": 5.468644618988037, "learning_rate": 0.00015801576009005766, "loss": 1.2101, "step": 146950 }, { "epoch": 4.388976801122623, "grad_norm": 3.7732479572296143, "learning_rate": 0.00015800147429413883, "loss": 1.226, "step": 147000 }, { "epoch": 4.390469650374705, "grad_norm": 4.265769958496094, "learning_rate": 0.00015798718849822, "loss": 1.1803, "step": 147050 }, { "epoch": 4.391962499626787, "grad_norm": 5.331263065338135, "learning_rate": 0.00015797290270230116, "loss": 1.1799, "step": 147100 }, { "epoch": 4.39345534887887, "grad_norm": 4.761427879333496, "learning_rate": 0.00015795861690638235, "loss": 1.2091, "step": 147150 }, { "epoch": 4.394948198130953, "grad_norm": 4.461631774902344, "learning_rate": 0.0001579443311104635, "loss": 1.2328, "step": 147200 }, { "epoch": 4.3964410473830355, "grad_norm": 4.383672714233398, "learning_rate": 0.00015793004531454468, "loss": 1.2279, "step": 147250 }, { "epoch": 4.397933896635118, "grad_norm": 6.641529560089111, "learning_rate": 0.00015791575951862582, "loss": 1.2231, "step": 147300 }, { "epoch": 4.3994267458872, "grad_norm": 3.944716215133667, "learning_rate": 0.000157901473722707, "loss": 1.2308, "step": 147350 }, { "epoch": 4.400919595139283, "grad_norm": 3.688462495803833, "learning_rate": 0.00015788718792678817, "loss": 1.252, "step": 147400 }, { "epoch": 4.4024124443913655, "grad_norm": 5.539252281188965, "learning_rate": 0.00015787290213086934, "loss": 1.2468, "step": 147450 }, { "epoch": 4.403905293643448, "grad_norm": 5.409310340881348, "learning_rate": 0.0001578586163349505, "loss": 1.2176, "step": 147500 }, { "epoch": 4.40539814289553, "grad_norm": 4.958510875701904, "learning_rate": 0.00015784433053903164, "loss": 1.2498, "step": 147550 }, { "epoch": 4.406890992147613, "grad_norm": 5.825056076049805, "learning_rate": 0.00015783004474311283, "loss": 1.2347, "step": 147600 }, { "epoch": 4.408383841399695, "grad_norm": 6.798030853271484, "learning_rate": 0.00015781575894719397, "loss": 1.2459, "step": 147650 }, { "epoch": 4.409876690651778, "grad_norm": 4.517495632171631, "learning_rate": 0.00015780147315127516, "loss": 1.2076, "step": 147700 }, { "epoch": 4.411369539903861, "grad_norm": 4.233908653259277, "learning_rate": 0.00015778718735535633, "loss": 1.1905, "step": 147750 }, { "epoch": 4.412862389155943, "grad_norm": 4.206421375274658, "learning_rate": 0.0001577729015594375, "loss": 1.2357, "step": 147800 }, { "epoch": 4.414355238408025, "grad_norm": 4.130222797393799, "learning_rate": 0.00015775861576351866, "loss": 1.201, "step": 147850 }, { "epoch": 4.415848087660108, "grad_norm": 3.8784375190734863, "learning_rate": 0.00015774432996759982, "loss": 1.2536, "step": 147900 }, { "epoch": 4.417340936912191, "grad_norm": 5.533395290374756, "learning_rate": 0.00015773004417168098, "loss": 1.208, "step": 147950 }, { "epoch": 4.4188337861642735, "grad_norm": 5.0397419929504395, "learning_rate": 0.00015771575837576215, "loss": 1.1951, "step": 148000 }, { "epoch": 4.420326635416355, "grad_norm": 4.333613872528076, "learning_rate": 0.00015770147257984331, "loss": 1.196, "step": 148050 }, { "epoch": 4.421819484668438, "grad_norm": 5.430739879608154, "learning_rate": 0.00015768718678392448, "loss": 1.1636, "step": 148100 }, { "epoch": 4.423312333920521, "grad_norm": 4.669544219970703, "learning_rate": 0.00015767290098800564, "loss": 1.2302, "step": 148150 }, { "epoch": 4.424805183172603, "grad_norm": 4.452166557312012, "learning_rate": 0.00015765861519208683, "loss": 1.1997, "step": 148200 }, { "epoch": 4.426298032424686, "grad_norm": 3.695939779281616, "learning_rate": 0.00015764432939616797, "loss": 1.2327, "step": 148250 }, { "epoch": 4.427790881676768, "grad_norm": 5.5830535888671875, "learning_rate": 0.00015763004360024916, "loss": 1.2306, "step": 148300 }, { "epoch": 4.429283730928851, "grad_norm": 3.978583335876465, "learning_rate": 0.0001576157578043303, "loss": 1.1639, "step": 148350 }, { "epoch": 4.430776580180933, "grad_norm": 3.8745334148406982, "learning_rate": 0.0001576014720084115, "loss": 1.22, "step": 148400 }, { "epoch": 4.432269429433016, "grad_norm": 4.7659711837768555, "learning_rate": 0.00015758718621249263, "loss": 1.1871, "step": 148450 }, { "epoch": 4.433762278685099, "grad_norm": 4.240323066711426, "learning_rate": 0.00015757290041657382, "loss": 1.1782, "step": 148500 }, { "epoch": 4.435255127937181, "grad_norm": 4.188791751861572, "learning_rate": 0.000157558614620655, "loss": 1.2258, "step": 148550 }, { "epoch": 4.436747977189263, "grad_norm": 4.706462860107422, "learning_rate": 0.00015754432882473615, "loss": 1.2095, "step": 148600 }, { "epoch": 4.438240826441346, "grad_norm": 3.878901958465576, "learning_rate": 0.00015753004302881732, "loss": 1.2249, "step": 148650 }, { "epoch": 4.439733675693429, "grad_norm": 3.91372013092041, "learning_rate": 0.00015751575723289848, "loss": 1.2382, "step": 148700 }, { "epoch": 4.441226524945511, "grad_norm": 3.9671475887298584, "learning_rate": 0.00015750147143697965, "loss": 1.2729, "step": 148750 }, { "epoch": 4.442719374197593, "grad_norm": 4.4688591957092285, "learning_rate": 0.0001574871856410608, "loss": 1.2032, "step": 148800 }, { "epoch": 4.444212223449676, "grad_norm": 4.55964469909668, "learning_rate": 0.00015747289984514198, "loss": 1.2129, "step": 148850 }, { "epoch": 4.445705072701759, "grad_norm": 5.789206027984619, "learning_rate": 0.00015745861404922314, "loss": 1.1947, "step": 148900 }, { "epoch": 4.447197921953841, "grad_norm": 4.169732570648193, "learning_rate": 0.0001574443282533043, "loss": 1.2135, "step": 148950 }, { "epoch": 4.448690771205923, "grad_norm": 4.305688858032227, "learning_rate": 0.0001574300424573855, "loss": 1.1931, "step": 149000 }, { "epoch": 4.450183620458006, "grad_norm": 5.780201435089111, "learning_rate": 0.00015741575666146663, "loss": 1.2308, "step": 149050 }, { "epoch": 4.451676469710089, "grad_norm": 5.42836332321167, "learning_rate": 0.00015740147086554783, "loss": 1.1898, "step": 149100 }, { "epoch": 4.453169318962171, "grad_norm": 4.967898845672607, "learning_rate": 0.00015738718506962896, "loss": 1.1961, "step": 149150 }, { "epoch": 4.454662168214254, "grad_norm": 4.566812038421631, "learning_rate": 0.00015737289927371015, "loss": 1.2633, "step": 149200 }, { "epoch": 4.456155017466337, "grad_norm": 4.523815155029297, "learning_rate": 0.0001573586134777913, "loss": 1.2104, "step": 149250 }, { "epoch": 4.4576478667184185, "grad_norm": 4.3855180740356445, "learning_rate": 0.00015734432768187248, "loss": 1.1904, "step": 149300 }, { "epoch": 4.459140715970501, "grad_norm": 4.116260051727295, "learning_rate": 0.00015733004188595365, "loss": 1.2162, "step": 149350 }, { "epoch": 4.460633565222584, "grad_norm": 4.964864253997803, "learning_rate": 0.0001573157560900348, "loss": 1.233, "step": 149400 }, { "epoch": 4.462126414474667, "grad_norm": 4.709635257720947, "learning_rate": 0.00015730147029411598, "loss": 1.2226, "step": 149450 }, { "epoch": 4.463619263726748, "grad_norm": 5.310390949249268, "learning_rate": 0.00015728718449819714, "loss": 1.1655, "step": 149500 }, { "epoch": 4.465112112978831, "grad_norm": 5.156966209411621, "learning_rate": 0.0001572728987022783, "loss": 1.1738, "step": 149550 }, { "epoch": 4.466604962230914, "grad_norm": 4.476001739501953, "learning_rate": 0.00015725861290635947, "loss": 1.188, "step": 149600 }, { "epoch": 4.468097811482997, "grad_norm": 4.519705295562744, "learning_rate": 0.00015724432711044064, "loss": 1.1512, "step": 149650 }, { "epoch": 4.469590660735079, "grad_norm": 5.426435470581055, "learning_rate": 0.0001572300413145218, "loss": 1.2137, "step": 149700 }, { "epoch": 4.471083509987161, "grad_norm": 3.799715757369995, "learning_rate": 0.00015721575551860297, "loss": 1.2298, "step": 149750 }, { "epoch": 4.472576359239244, "grad_norm": 3.640909194946289, "learning_rate": 0.00015720146972268416, "loss": 1.1837, "step": 149800 }, { "epoch": 4.4740692084913265, "grad_norm": 3.8437955379486084, "learning_rate": 0.0001571871839267653, "loss": 1.2025, "step": 149850 }, { "epoch": 4.475562057743409, "grad_norm": 3.9769036769866943, "learning_rate": 0.0001571728981308465, "loss": 1.1918, "step": 149900 }, { "epoch": 4.477054906995492, "grad_norm": 4.485401630401611, "learning_rate": 0.00015715861233492762, "loss": 1.1588, "step": 149950 }, { "epoch": 4.478547756247574, "grad_norm": 4.401375770568848, "learning_rate": 0.00015714432653900882, "loss": 1.2089, "step": 150000 }, { "epoch": 4.480040605499656, "grad_norm": 4.181282043457031, "learning_rate": 0.00015713004074308998, "loss": 1.2139, "step": 150050 }, { "epoch": 4.481533454751739, "grad_norm": 4.993724822998047, "learning_rate": 0.00015711575494717112, "loss": 1.2247, "step": 150100 }, { "epoch": 4.483026304003822, "grad_norm": 4.870810031890869, "learning_rate": 0.0001571014691512523, "loss": 1.281, "step": 150150 }, { "epoch": 4.4845191532559046, "grad_norm": 5.145561218261719, "learning_rate": 0.00015708718335533345, "loss": 1.2263, "step": 150200 }, { "epoch": 4.486012002507986, "grad_norm": 4.650093078613281, "learning_rate": 0.00015707289755941464, "loss": 1.2627, "step": 150250 }, { "epoch": 4.487504851760069, "grad_norm": 4.286979675292969, "learning_rate": 0.00015705861176349578, "loss": 1.1671, "step": 150300 }, { "epoch": 4.488997701012152, "grad_norm": 4.116031646728516, "learning_rate": 0.00015704432596757697, "loss": 1.1746, "step": 150350 }, { "epoch": 4.4904905502642345, "grad_norm": 4.999283313751221, "learning_rate": 0.00015703004017165813, "loss": 1.1993, "step": 150400 }, { "epoch": 4.491983399516317, "grad_norm": 5.618175983428955, "learning_rate": 0.0001570157543757393, "loss": 1.2608, "step": 150450 }, { "epoch": 4.493476248768399, "grad_norm": 4.471452236175537, "learning_rate": 0.00015700146857982046, "loss": 1.1811, "step": 150500 }, { "epoch": 4.494969098020482, "grad_norm": 4.4648518562316895, "learning_rate": 0.00015698718278390163, "loss": 1.2314, "step": 150550 }, { "epoch": 4.496461947272564, "grad_norm": 5.73147439956665, "learning_rate": 0.0001569728969879828, "loss": 1.243, "step": 150600 }, { "epoch": 4.497954796524647, "grad_norm": 6.473092555999756, "learning_rate": 0.00015695861119206396, "loss": 1.2208, "step": 150650 }, { "epoch": 4.49944764577673, "grad_norm": 6.348034381866455, "learning_rate": 0.00015694432539614512, "loss": 1.214, "step": 150700 }, { "epoch": 4.500940495028812, "grad_norm": 4.37158727645874, "learning_rate": 0.00015693003960022629, "loss": 1.2777, "step": 150750 }, { "epoch": 4.502433344280894, "grad_norm": 4.451486110687256, "learning_rate": 0.00015691575380430745, "loss": 1.1995, "step": 150800 }, { "epoch": 4.503926193532977, "grad_norm": 4.328061580657959, "learning_rate": 0.00015690146800838864, "loss": 1.225, "step": 150850 }, { "epoch": 4.50541904278506, "grad_norm": 5.565524578094482, "learning_rate": 0.00015688718221246978, "loss": 1.2471, "step": 150900 }, { "epoch": 4.506911892037142, "grad_norm": 4.532073497772217, "learning_rate": 0.00015687289641655097, "loss": 1.2396, "step": 150950 }, { "epoch": 4.508404741289224, "grad_norm": 5.4925618171691895, "learning_rate": 0.0001568586106206321, "loss": 1.2135, "step": 151000 }, { "epoch": 4.509897590541307, "grad_norm": 5.885026931762695, "learning_rate": 0.0001568443248247133, "loss": 1.2067, "step": 151050 }, { "epoch": 4.51139043979339, "grad_norm": 4.559401988983154, "learning_rate": 0.00015683003902879444, "loss": 1.1973, "step": 151100 }, { "epoch": 4.512883289045472, "grad_norm": 2.85530686378479, "learning_rate": 0.00015681575323287563, "loss": 1.2574, "step": 151150 }, { "epoch": 4.514376138297555, "grad_norm": 3.6753365993499756, "learning_rate": 0.0001568014674369568, "loss": 1.2358, "step": 151200 }, { "epoch": 4.515868987549637, "grad_norm": 4.216865539550781, "learning_rate": 0.00015678718164103796, "loss": 1.2624, "step": 151250 }, { "epoch": 4.51736183680172, "grad_norm": 4.118535995483398, "learning_rate": 0.00015677289584511912, "loss": 1.2322, "step": 151300 }, { "epoch": 4.518854686053802, "grad_norm": 3.65169095993042, "learning_rate": 0.0001567586100492003, "loss": 1.1924, "step": 151350 }, { "epoch": 4.520347535305885, "grad_norm": 4.938554763793945, "learning_rate": 0.00015674432425328145, "loss": 1.2083, "step": 151400 }, { "epoch": 4.521840384557967, "grad_norm": 5.959898471832275, "learning_rate": 0.00015673003845736262, "loss": 1.3091, "step": 151450 }, { "epoch": 4.52333323381005, "grad_norm": 4.388880252838135, "learning_rate": 0.00015671575266144378, "loss": 1.2481, "step": 151500 }, { "epoch": 4.524826083062132, "grad_norm": 4.406062602996826, "learning_rate": 0.00015670146686552495, "loss": 1.2337, "step": 151550 }, { "epoch": 4.526318932314215, "grad_norm": 3.515559196472168, "learning_rate": 0.0001566871810696061, "loss": 1.1663, "step": 151600 }, { "epoch": 4.527811781566298, "grad_norm": 5.7760701179504395, "learning_rate": 0.0001566728952736873, "loss": 1.1952, "step": 151650 }, { "epoch": 4.52930463081838, "grad_norm": 3.783639907836914, "learning_rate": 0.00015665860947776844, "loss": 1.2086, "step": 151700 }, { "epoch": 4.530797480070462, "grad_norm": 4.640422344207764, "learning_rate": 0.00015664432368184963, "loss": 1.2152, "step": 151750 }, { "epoch": 4.532290329322545, "grad_norm": 5.015472412109375, "learning_rate": 0.00015663003788593077, "loss": 1.2133, "step": 151800 }, { "epoch": 4.533783178574628, "grad_norm": 5.7063117027282715, "learning_rate": 0.00015661575209001196, "loss": 1.2345, "step": 151850 }, { "epoch": 4.53527602782671, "grad_norm": 4.530743598937988, "learning_rate": 0.0001566014662940931, "loss": 1.2408, "step": 151900 }, { "epoch": 4.536768877078792, "grad_norm": 3.597856044769287, "learning_rate": 0.0001565871804981743, "loss": 1.2222, "step": 151950 }, { "epoch": 4.538261726330875, "grad_norm": 5.361748218536377, "learning_rate": 0.00015657289470225546, "loss": 1.1898, "step": 152000 }, { "epoch": 4.539754575582958, "grad_norm": 6.137198448181152, "learning_rate": 0.00015655860890633662, "loss": 1.1988, "step": 152050 }, { "epoch": 4.54124742483504, "grad_norm": 4.32033634185791, "learning_rate": 0.00015654432311041779, "loss": 1.2221, "step": 152100 }, { "epoch": 4.542740274087123, "grad_norm": 4.112492084503174, "learning_rate": 0.00015653003731449895, "loss": 1.229, "step": 152150 }, { "epoch": 4.544233123339206, "grad_norm": 4.592637062072754, "learning_rate": 0.00015651575151858012, "loss": 1.2376, "step": 152200 }, { "epoch": 4.5457259725912875, "grad_norm": 4.471153736114502, "learning_rate": 0.00015650146572266128, "loss": 1.1976, "step": 152250 }, { "epoch": 4.54721882184337, "grad_norm": 6.161034107208252, "learning_rate": 0.00015648717992674244, "loss": 1.1973, "step": 152300 }, { "epoch": 4.548711671095453, "grad_norm": 4.57336950302124, "learning_rate": 0.0001564728941308236, "loss": 1.1773, "step": 152350 }, { "epoch": 4.550204520347536, "grad_norm": 3.936795949935913, "learning_rate": 0.00015645860833490477, "loss": 1.2448, "step": 152400 }, { "epoch": 4.5516973695996175, "grad_norm": 3.8005549907684326, "learning_rate": 0.00015644432253898597, "loss": 1.2391, "step": 152450 }, { "epoch": 4.5531902188517, "grad_norm": 4.863176345825195, "learning_rate": 0.0001564300367430671, "loss": 1.2185, "step": 152500 }, { "epoch": 4.554683068103783, "grad_norm": 4.168069362640381, "learning_rate": 0.0001564157509471483, "loss": 1.1793, "step": 152550 }, { "epoch": 4.556175917355866, "grad_norm": 4.241150856018066, "learning_rate": 0.00015640146515122943, "loss": 1.2087, "step": 152600 }, { "epoch": 4.557668766607948, "grad_norm": 5.077085971832275, "learning_rate": 0.00015638717935531062, "loss": 1.1999, "step": 152650 }, { "epoch": 4.55916161586003, "grad_norm": 4.880026817321777, "learning_rate": 0.00015637289355939176, "loss": 1.1691, "step": 152700 }, { "epoch": 4.560654465112113, "grad_norm": 4.0811662673950195, "learning_rate": 0.00015635860776347293, "loss": 1.2294, "step": 152750 }, { "epoch": 4.5621473143641955, "grad_norm": 3.9246151447296143, "learning_rate": 0.00015634432196755412, "loss": 1.2704, "step": 152800 }, { "epoch": 4.563640163616278, "grad_norm": 4.361538887023926, "learning_rate": 0.00015633003617163526, "loss": 1.2227, "step": 152850 }, { "epoch": 4.565133012868361, "grad_norm": 3.964625358581543, "learning_rate": 0.00015631575037571645, "loss": 1.2269, "step": 152900 }, { "epoch": 4.566625862120443, "grad_norm": 4.382391452789307, "learning_rate": 0.00015630146457979759, "loss": 1.2066, "step": 152950 }, { "epoch": 4.5681187113725255, "grad_norm": 4.823386192321777, "learning_rate": 0.00015628717878387878, "loss": 1.2191, "step": 153000 }, { "epoch": 4.569611560624608, "grad_norm": 4.2271223068237305, "learning_rate": 0.00015627289298795994, "loss": 1.2611, "step": 153050 }, { "epoch": 4.571104409876691, "grad_norm": 5.857795238494873, "learning_rate": 0.0001562586071920411, "loss": 1.2222, "step": 153100 }, { "epoch": 4.572597259128774, "grad_norm": 3.7787981033325195, "learning_rate": 0.00015624432139612227, "loss": 1.2648, "step": 153150 }, { "epoch": 4.574090108380855, "grad_norm": 4.284102916717529, "learning_rate": 0.00015623003560020344, "loss": 1.2814, "step": 153200 }, { "epoch": 4.575582957632938, "grad_norm": 6.498587131500244, "learning_rate": 0.0001562157498042846, "loss": 1.209, "step": 153250 }, { "epoch": 4.577075806885021, "grad_norm": 3.2543442249298096, "learning_rate": 0.00015620146400836577, "loss": 1.1867, "step": 153300 }, { "epoch": 4.5785686561371035, "grad_norm": 3.8081936836242676, "learning_rate": 0.00015618717821244693, "loss": 1.2242, "step": 153350 }, { "epoch": 4.580061505389185, "grad_norm": 3.8605005741119385, "learning_rate": 0.0001561728924165281, "loss": 1.1957, "step": 153400 }, { "epoch": 4.581554354641268, "grad_norm": 6.710892677307129, "learning_rate": 0.00015615860662060926, "loss": 1.1945, "step": 153450 }, { "epoch": 4.583047203893351, "grad_norm": 4.212560176849365, "learning_rate": 0.00015614432082469045, "loss": 1.2124, "step": 153500 }, { "epoch": 4.5845400531454334, "grad_norm": 4.561640739440918, "learning_rate": 0.0001561300350287716, "loss": 1.2355, "step": 153550 }, { "epoch": 4.586032902397516, "grad_norm": 3.4632205963134766, "learning_rate": 0.00015611574923285278, "loss": 1.2606, "step": 153600 }, { "epoch": 4.587525751649599, "grad_norm": 3.846022844314575, "learning_rate": 0.00015610146343693392, "loss": 1.2386, "step": 153650 }, { "epoch": 4.589018600901681, "grad_norm": 4.297937870025635, "learning_rate": 0.0001560871776410151, "loss": 1.2084, "step": 153700 }, { "epoch": 4.590511450153763, "grad_norm": 3.9638116359710693, "learning_rate": 0.00015607289184509625, "loss": 1.2334, "step": 153750 }, { "epoch": 4.592004299405846, "grad_norm": 5.708649158477783, "learning_rate": 0.00015605860604917744, "loss": 1.1872, "step": 153800 }, { "epoch": 4.593497148657929, "grad_norm": 4.930756092071533, "learning_rate": 0.0001560443202532586, "loss": 1.2585, "step": 153850 }, { "epoch": 4.594989997910011, "grad_norm": 4.887782573699951, "learning_rate": 0.00015603003445733977, "loss": 1.2325, "step": 153900 }, { "epoch": 4.596482847162093, "grad_norm": 5.5042290687561035, "learning_rate": 0.00015601574866142093, "loss": 1.261, "step": 153950 }, { "epoch": 4.597975696414176, "grad_norm": 5.776236534118652, "learning_rate": 0.0001560014628655021, "loss": 1.205, "step": 154000 }, { "epoch": 4.599468545666259, "grad_norm": 4.465933799743652, "learning_rate": 0.00015598717706958326, "loss": 1.2886, "step": 154050 }, { "epoch": 4.600961394918341, "grad_norm": 4.536434173583984, "learning_rate": 0.00015597289127366443, "loss": 1.2107, "step": 154100 }, { "epoch": 4.602454244170424, "grad_norm": 6.132566452026367, "learning_rate": 0.0001559586054777456, "loss": 1.2365, "step": 154150 }, { "epoch": 4.603947093422506, "grad_norm": 5.581535816192627, "learning_rate": 0.00015594431968182676, "loss": 1.2163, "step": 154200 }, { "epoch": 4.605439942674589, "grad_norm": 4.754735946655273, "learning_rate": 0.00015593003388590792, "loss": 1.1939, "step": 154250 }, { "epoch": 4.606932791926671, "grad_norm": 4.529079914093018, "learning_rate": 0.0001559157480899891, "loss": 1.2434, "step": 154300 }, { "epoch": 4.608425641178754, "grad_norm": 3.8123152256011963, "learning_rate": 0.00015590146229407025, "loss": 1.1727, "step": 154350 }, { "epoch": 4.609918490430836, "grad_norm": 7.249644756317139, "learning_rate": 0.00015588717649815144, "loss": 1.2392, "step": 154400 }, { "epoch": 4.611411339682919, "grad_norm": 5.355637073516846, "learning_rate": 0.00015587289070223258, "loss": 1.1899, "step": 154450 }, { "epoch": 4.612904188935001, "grad_norm": 3.952632427215576, "learning_rate": 0.00015585860490631377, "loss": 1.248, "step": 154500 }, { "epoch": 4.614397038187084, "grad_norm": 5.123134136199951, "learning_rate": 0.0001558443191103949, "loss": 1.1783, "step": 154550 }, { "epoch": 4.615889887439167, "grad_norm": 5.032320022583008, "learning_rate": 0.0001558300333144761, "loss": 1.2189, "step": 154600 }, { "epoch": 4.6173827366912485, "grad_norm": 4.798953533172607, "learning_rate": 0.00015581574751855726, "loss": 1.2064, "step": 154650 }, { "epoch": 4.618875585943331, "grad_norm": 4.720959663391113, "learning_rate": 0.00015580146172263843, "loss": 1.2685, "step": 154700 }, { "epoch": 4.620368435195414, "grad_norm": 5.178957939147949, "learning_rate": 0.0001557871759267196, "loss": 1.2969, "step": 154750 }, { "epoch": 4.621861284447497, "grad_norm": 2.8498313426971436, "learning_rate": 0.00015577289013080076, "loss": 1.2293, "step": 154800 }, { "epoch": 4.623354133699579, "grad_norm": 4.07916784286499, "learning_rate": 0.00015575860433488192, "loss": 1.2489, "step": 154850 }, { "epoch": 4.624846982951661, "grad_norm": 4.014176845550537, "learning_rate": 0.0001557443185389631, "loss": 1.2244, "step": 154900 }, { "epoch": 4.626339832203744, "grad_norm": 6.280478477478027, "learning_rate": 0.00015573003274304425, "loss": 1.2029, "step": 154950 }, { "epoch": 4.627832681455827, "grad_norm": 4.045797348022461, "learning_rate": 0.00015571574694712542, "loss": 1.1694, "step": 155000 }, { "epoch": 4.629325530707909, "grad_norm": 4.864063262939453, "learning_rate": 0.00015570146115120658, "loss": 1.1799, "step": 155050 }, { "epoch": 4.630818379959992, "grad_norm": 3.5234014987945557, "learning_rate": 0.00015568717535528777, "loss": 1.2153, "step": 155100 }, { "epoch": 4.632311229212074, "grad_norm": 4.5075273513793945, "learning_rate": 0.0001556728895593689, "loss": 1.1483, "step": 155150 }, { "epoch": 4.6338040784641565, "grad_norm": 5.258499622344971, "learning_rate": 0.0001556586037634501, "loss": 1.2606, "step": 155200 }, { "epoch": 4.635296927716239, "grad_norm": 5.108059883117676, "learning_rate": 0.00015564431796753124, "loss": 1.2517, "step": 155250 }, { "epoch": 4.636789776968322, "grad_norm": 4.033377647399902, "learning_rate": 0.00015563003217161243, "loss": 1.1887, "step": 155300 }, { "epoch": 4.638282626220404, "grad_norm": 5.546184062957764, "learning_rate": 0.00015561574637569357, "loss": 1.1573, "step": 155350 }, { "epoch": 4.6397754754724865, "grad_norm": 5.48836088180542, "learning_rate": 0.00015560146057977473, "loss": 1.2637, "step": 155400 }, { "epoch": 4.641268324724569, "grad_norm": 5.246400833129883, "learning_rate": 0.00015558717478385593, "loss": 1.2008, "step": 155450 }, { "epoch": 4.642761173976652, "grad_norm": 4.412383556365967, "learning_rate": 0.00015557288898793706, "loss": 1.168, "step": 155500 }, { "epoch": 4.644254023228735, "grad_norm": 3.9869327545166016, "learning_rate": 0.00015555860319201826, "loss": 1.1371, "step": 155550 }, { "epoch": 4.645746872480817, "grad_norm": 3.883467197418213, "learning_rate": 0.0001555443173960994, "loss": 1.216, "step": 155600 }, { "epoch": 4.647239721732899, "grad_norm": 4.335739612579346, "learning_rate": 0.00015553003160018059, "loss": 1.1792, "step": 155650 }, { "epoch": 4.648732570984982, "grad_norm": 4.266781806945801, "learning_rate": 0.00015551574580426175, "loss": 1.2357, "step": 155700 }, { "epoch": 4.6502254202370645, "grad_norm": 4.26521635055542, "learning_rate": 0.00015550146000834291, "loss": 1.2613, "step": 155750 }, { "epoch": 4.651718269489147, "grad_norm": 4.667625427246094, "learning_rate": 0.00015548717421242408, "loss": 1.213, "step": 155800 }, { "epoch": 4.653211118741229, "grad_norm": 4.781721591949463, "learning_rate": 0.00015547288841650524, "loss": 1.2303, "step": 155850 }, { "epoch": 4.654703967993312, "grad_norm": 4.377721309661865, "learning_rate": 0.0001554586026205864, "loss": 1.2062, "step": 155900 }, { "epoch": 4.6561968172453945, "grad_norm": 3.987110137939453, "learning_rate": 0.00015544431682466757, "loss": 1.1393, "step": 155950 }, { "epoch": 4.657689666497477, "grad_norm": 5.804355144500732, "learning_rate": 0.00015543003102874874, "loss": 1.2368, "step": 156000 }, { "epoch": 4.65918251574956, "grad_norm": 4.505835056304932, "learning_rate": 0.0001554157452328299, "loss": 1.2318, "step": 156050 }, { "epoch": 4.660675365001643, "grad_norm": 5.454282760620117, "learning_rate": 0.00015540145943691107, "loss": 1.2058, "step": 156100 }, { "epoch": 4.662168214253724, "grad_norm": 4.576279163360596, "learning_rate": 0.00015538717364099223, "loss": 1.1932, "step": 156150 }, { "epoch": 4.663661063505807, "grad_norm": 4.093862533569336, "learning_rate": 0.0001553728878450734, "loss": 1.2505, "step": 156200 }, { "epoch": 4.66515391275789, "grad_norm": 4.815280437469482, "learning_rate": 0.0001553586020491546, "loss": 1.3025, "step": 156250 }, { "epoch": 4.6666467620099725, "grad_norm": 5.615932464599609, "learning_rate": 0.00015534431625323573, "loss": 1.2428, "step": 156300 }, { "epoch": 4.668139611262054, "grad_norm": 4.396317005157471, "learning_rate": 0.00015533003045731692, "loss": 1.2452, "step": 156350 }, { "epoch": 4.669632460514137, "grad_norm": 5.061831951141357, "learning_rate": 0.00015531574466139805, "loss": 1.2262, "step": 156400 }, { "epoch": 4.67112530976622, "grad_norm": 4.902180194854736, "learning_rate": 0.00015530145886547925, "loss": 1.2485, "step": 156450 }, { "epoch": 4.6726181590183025, "grad_norm": 5.020074367523193, "learning_rate": 0.0001552871730695604, "loss": 1.2692, "step": 156500 }, { "epoch": 4.674111008270385, "grad_norm": 4.696333885192871, "learning_rate": 0.00015527288727364158, "loss": 1.2462, "step": 156550 }, { "epoch": 4.675603857522468, "grad_norm": 4.284012794494629, "learning_rate": 0.00015525860147772274, "loss": 1.1821, "step": 156600 }, { "epoch": 4.67709670677455, "grad_norm": 5.056790828704834, "learning_rate": 0.0001552443156818039, "loss": 1.2571, "step": 156650 }, { "epoch": 4.678589556026632, "grad_norm": 5.424617767333984, "learning_rate": 0.00015523002988588507, "loss": 1.2369, "step": 156700 }, { "epoch": 4.680082405278715, "grad_norm": 4.2647624015808105, "learning_rate": 0.00015521574408996623, "loss": 1.1531, "step": 156750 }, { "epoch": 4.681575254530798, "grad_norm": 5.538541316986084, "learning_rate": 0.0001552014582940474, "loss": 1.1512, "step": 156800 }, { "epoch": 4.68306810378288, "grad_norm": 5.385500907897949, "learning_rate": 0.00015518717249812856, "loss": 1.1912, "step": 156850 }, { "epoch": 4.684560953034962, "grad_norm": 4.456097602844238, "learning_rate": 0.00015517288670220973, "loss": 1.266, "step": 156900 }, { "epoch": 4.686053802287045, "grad_norm": 4.176798343658447, "learning_rate": 0.00015515860090629092, "loss": 1.1689, "step": 156950 }, { "epoch": 4.687546651539128, "grad_norm": 4.8275980949401855, "learning_rate": 0.00015514431511037206, "loss": 1.2168, "step": 157000 }, { "epoch": 4.6890395007912105, "grad_norm": 4.710529327392578, "learning_rate": 0.00015513002931445325, "loss": 1.2356, "step": 157050 }, { "epoch": 4.690532350043292, "grad_norm": 3.7281603813171387, "learning_rate": 0.0001551157435185344, "loss": 1.1486, "step": 157100 }, { "epoch": 4.692025199295375, "grad_norm": 5.062285900115967, "learning_rate": 0.00015510145772261558, "loss": 1.215, "step": 157150 }, { "epoch": 4.693518048547458, "grad_norm": 3.41444993019104, "learning_rate": 0.00015508717192669672, "loss": 1.2052, "step": 157200 }, { "epoch": 4.69501089779954, "grad_norm": 3.7359328269958496, "learning_rate": 0.0001550728861307779, "loss": 1.2664, "step": 157250 }, { "epoch": 4.696503747051623, "grad_norm": 4.192359924316406, "learning_rate": 0.00015505860033485907, "loss": 1.2171, "step": 157300 }, { "epoch": 4.697996596303705, "grad_norm": 5.198591232299805, "learning_rate": 0.00015504431453894024, "loss": 1.2094, "step": 157350 }, { "epoch": 4.699489445555788, "grad_norm": 4.99566650390625, "learning_rate": 0.0001550300287430214, "loss": 1.1983, "step": 157400 }, { "epoch": 4.70098229480787, "grad_norm": 4.944712162017822, "learning_rate": 0.00015501574294710257, "loss": 1.2355, "step": 157450 }, { "epoch": 4.702475144059953, "grad_norm": 4.957249164581299, "learning_rate": 0.00015500145715118373, "loss": 1.1986, "step": 157500 }, { "epoch": 4.703967993312036, "grad_norm": 3.442450523376465, "learning_rate": 0.0001549871713552649, "loss": 1.2072, "step": 157550 }, { "epoch": 4.705460842564118, "grad_norm": 4.920107364654541, "learning_rate": 0.00015497288555934606, "loss": 1.2014, "step": 157600 }, { "epoch": 4.7069536918162, "grad_norm": 3.697744131088257, "learning_rate": 0.00015495859976342723, "loss": 1.1869, "step": 157650 }, { "epoch": 4.708446541068283, "grad_norm": 6.023653984069824, "learning_rate": 0.0001549443139675084, "loss": 1.2255, "step": 157700 }, { "epoch": 4.709939390320366, "grad_norm": 4.989515781402588, "learning_rate": 0.00015493002817158958, "loss": 1.1685, "step": 157750 }, { "epoch": 4.7114322395724475, "grad_norm": 3.7004759311676025, "learning_rate": 0.00015491574237567072, "loss": 1.1584, "step": 157800 }, { "epoch": 4.71292508882453, "grad_norm": 4.318152904510498, "learning_rate": 0.0001549014565797519, "loss": 1.2163, "step": 157850 }, { "epoch": 4.714417938076613, "grad_norm": 4.550259590148926, "learning_rate": 0.00015488717078383305, "loss": 1.2507, "step": 157900 }, { "epoch": 4.715910787328696, "grad_norm": 4.413512706756592, "learning_rate": 0.00015487288498791424, "loss": 1.2037, "step": 157950 }, { "epoch": 4.717403636580778, "grad_norm": 3.8001725673675537, "learning_rate": 0.00015485859919199538, "loss": 1.2543, "step": 158000 }, { "epoch": 4.718896485832861, "grad_norm": 3.4299263954162598, "learning_rate": 0.00015484431339607654, "loss": 1.1916, "step": 158050 }, { "epoch": 4.720389335084943, "grad_norm": 4.620875835418701, "learning_rate": 0.00015483002760015773, "loss": 1.2697, "step": 158100 }, { "epoch": 4.721882184337026, "grad_norm": 4.62977409362793, "learning_rate": 0.00015481574180423887, "loss": 1.1801, "step": 158150 }, { "epoch": 4.723375033589108, "grad_norm": 3.916433572769165, "learning_rate": 0.00015480145600832006, "loss": 1.1801, "step": 158200 }, { "epoch": 4.724867882841191, "grad_norm": 4.217404842376709, "learning_rate": 0.0001547871702124012, "loss": 1.2328, "step": 158250 }, { "epoch": 4.726360732093273, "grad_norm": 6.347287654876709, "learning_rate": 0.0001547728844164824, "loss": 1.207, "step": 158300 }, { "epoch": 4.7278535813453555, "grad_norm": 4.39561653137207, "learning_rate": 0.00015475859862056353, "loss": 1.1908, "step": 158350 }, { "epoch": 4.729346430597438, "grad_norm": 4.472975730895996, "learning_rate": 0.00015474431282464472, "loss": 1.2007, "step": 158400 }, { "epoch": 4.730839279849521, "grad_norm": 4.133266448974609, "learning_rate": 0.0001547300270287259, "loss": 1.2425, "step": 158450 }, { "epoch": 4.732332129101604, "grad_norm": 3.9597208499908447, "learning_rate": 0.00015471574123280705, "loss": 1.2095, "step": 158500 }, { "epoch": 4.733824978353686, "grad_norm": 4.040492057800293, "learning_rate": 0.00015470145543688822, "loss": 1.1701, "step": 158550 }, { "epoch": 4.735317827605768, "grad_norm": 4.925788402557373, "learning_rate": 0.00015468716964096938, "loss": 1.2441, "step": 158600 }, { "epoch": 4.736810676857851, "grad_norm": 3.6005380153656006, "learning_rate": 0.00015467288384505055, "loss": 1.2654, "step": 158650 }, { "epoch": 4.7383035261099336, "grad_norm": 4.446198463439941, "learning_rate": 0.0001546585980491317, "loss": 1.2385, "step": 158700 }, { "epoch": 4.739796375362016, "grad_norm": 4.482244491577148, "learning_rate": 0.00015464431225321288, "loss": 1.2554, "step": 158750 }, { "epoch": 4.741289224614098, "grad_norm": 3.8426311016082764, "learning_rate": 0.00015463002645729404, "loss": 1.1721, "step": 158800 }, { "epoch": 4.742782073866181, "grad_norm": 5.686476707458496, "learning_rate": 0.0001546157406613752, "loss": 1.2192, "step": 158850 }, { "epoch": 4.7442749231182635, "grad_norm": 5.65896463394165, "learning_rate": 0.0001546014548654564, "loss": 1.1794, "step": 158900 }, { "epoch": 4.745767772370346, "grad_norm": 7.737217426300049, "learning_rate": 0.00015458716906953753, "loss": 1.259, "step": 158950 }, { "epoch": 4.747260621622429, "grad_norm": 5.426517963409424, "learning_rate": 0.00015457288327361873, "loss": 1.2547, "step": 159000 }, { "epoch": 4.748753470874511, "grad_norm": 4.5140509605407715, "learning_rate": 0.00015455859747769986, "loss": 1.2253, "step": 159050 }, { "epoch": 4.750246320126593, "grad_norm": 4.854976177215576, "learning_rate": 0.00015454431168178105, "loss": 1.2273, "step": 159100 }, { "epoch": 4.751739169378676, "grad_norm": 5.603672504425049, "learning_rate": 0.00015453002588586222, "loss": 1.2483, "step": 159150 }, { "epoch": 4.753232018630759, "grad_norm": 5.602535247802734, "learning_rate": 0.00015451574008994338, "loss": 1.2304, "step": 159200 }, { "epoch": 4.7547248678828415, "grad_norm": 5.0422773361206055, "learning_rate": 0.00015450145429402455, "loss": 1.2187, "step": 159250 }, { "epoch": 4.756217717134923, "grad_norm": 4.779214859008789, "learning_rate": 0.0001544871684981057, "loss": 1.1922, "step": 159300 }, { "epoch": 4.757710566387006, "grad_norm": 3.4088544845581055, "learning_rate": 0.00015447288270218688, "loss": 1.2072, "step": 159350 }, { "epoch": 4.759203415639089, "grad_norm": 5.209439277648926, "learning_rate": 0.00015445859690626804, "loss": 1.2119, "step": 159400 }, { "epoch": 4.7606962648911715, "grad_norm": 4.001313209533691, "learning_rate": 0.0001544443111103492, "loss": 1.2626, "step": 159450 }, { "epoch": 4.762189114143254, "grad_norm": 3.787076234817505, "learning_rate": 0.00015443002531443037, "loss": 1.2197, "step": 159500 }, { "epoch": 4.763681963395336, "grad_norm": 4.906338214874268, "learning_rate": 0.00015441573951851154, "loss": 1.2964, "step": 159550 }, { "epoch": 4.765174812647419, "grad_norm": 5.104478359222412, "learning_rate": 0.00015440145372259273, "loss": 1.23, "step": 159600 }, { "epoch": 4.766667661899501, "grad_norm": 4.3545918464660645, "learning_rate": 0.00015438716792667387, "loss": 1.2015, "step": 159650 }, { "epoch": 4.768160511151584, "grad_norm": 4.524704456329346, "learning_rate": 0.00015437288213075506, "loss": 1.2358, "step": 159700 }, { "epoch": 4.769653360403666, "grad_norm": 4.5480637550354, "learning_rate": 0.0001543585963348362, "loss": 1.1828, "step": 159750 }, { "epoch": 4.771146209655749, "grad_norm": 4.825510501861572, "learning_rate": 0.0001543443105389174, "loss": 1.1657, "step": 159800 }, { "epoch": 4.772639058907831, "grad_norm": 3.6792662143707275, "learning_rate": 0.00015433002474299852, "loss": 1.2789, "step": 159850 }, { "epoch": 4.774131908159914, "grad_norm": 4.740036964416504, "learning_rate": 0.00015431573894707972, "loss": 1.2292, "step": 159900 }, { "epoch": 4.775624757411997, "grad_norm": 5.538552761077881, "learning_rate": 0.00015430145315116088, "loss": 1.1691, "step": 159950 }, { "epoch": 4.7771176066640795, "grad_norm": 4.624987602233887, "learning_rate": 0.00015428716735524205, "loss": 1.2157, "step": 160000 }, { "epoch": 4.778610455916161, "grad_norm": 6.003055572509766, "learning_rate": 0.0001542728815593232, "loss": 1.2021, "step": 160050 }, { "epoch": 4.780103305168244, "grad_norm": 3.8452155590057373, "learning_rate": 0.00015425859576340437, "loss": 1.254, "step": 160100 }, { "epoch": 4.781596154420327, "grad_norm": 4.149327278137207, "learning_rate": 0.00015424430996748554, "loss": 1.235, "step": 160150 }, { "epoch": 4.783089003672409, "grad_norm": 4.331029415130615, "learning_rate": 0.0001542300241715667, "loss": 1.2077, "step": 160200 }, { "epoch": 4.784581852924491, "grad_norm": 4.8423662185668945, "learning_rate": 0.00015421573837564787, "loss": 1.2223, "step": 160250 }, { "epoch": 4.786074702176574, "grad_norm": 4.183012008666992, "learning_rate": 0.00015420145257972903, "loss": 1.2438, "step": 160300 }, { "epoch": 4.787567551428657, "grad_norm": 3.7776644229888916, "learning_rate": 0.0001541871667838102, "loss": 1.2093, "step": 160350 }, { "epoch": 4.789060400680739, "grad_norm": 4.586690902709961, "learning_rate": 0.0001541728809878914, "loss": 1.2353, "step": 160400 }, { "epoch": 4.790553249932822, "grad_norm": 4.448139190673828, "learning_rate": 0.00015415859519197253, "loss": 1.2149, "step": 160450 }, { "epoch": 4.792046099184905, "grad_norm": 4.895113945007324, "learning_rate": 0.00015414430939605372, "loss": 1.2682, "step": 160500 }, { "epoch": 4.793538948436987, "grad_norm": 6.31041955947876, "learning_rate": 0.00015413002360013486, "loss": 1.2296, "step": 160550 }, { "epoch": 4.795031797689069, "grad_norm": 5.237212181091309, "learning_rate": 0.00015411573780421602, "loss": 1.2121, "step": 160600 }, { "epoch": 4.796524646941152, "grad_norm": 5.017521381378174, "learning_rate": 0.00015410145200829719, "loss": 1.2413, "step": 160650 }, { "epoch": 4.798017496193235, "grad_norm": 4.569025039672852, "learning_rate": 0.00015408716621237835, "loss": 1.2321, "step": 160700 }, { "epoch": 4.7995103454453165, "grad_norm": 4.4759650230407715, "learning_rate": 0.00015407288041645954, "loss": 1.2579, "step": 160750 }, { "epoch": 4.801003194697399, "grad_norm": 4.48403263092041, "learning_rate": 0.00015405859462054068, "loss": 1.2332, "step": 160800 }, { "epoch": 4.802496043949482, "grad_norm": 4.603891849517822, "learning_rate": 0.00015404430882462187, "loss": 1.2004, "step": 160850 }, { "epoch": 4.803988893201565, "grad_norm": 3.970634937286377, "learning_rate": 0.000154030023028703, "loss": 1.2121, "step": 160900 }, { "epoch": 4.805481742453647, "grad_norm": 5.254501819610596, "learning_rate": 0.0001540157372327842, "loss": 1.3071, "step": 160950 }, { "epoch": 4.80697459170573, "grad_norm": 3.6827309131622314, "learning_rate": 0.00015400145143686534, "loss": 1.2399, "step": 161000 }, { "epoch": 4.808467440957812, "grad_norm": 4.880918502807617, "learning_rate": 0.00015398716564094653, "loss": 1.1842, "step": 161050 }, { "epoch": 4.809960290209895, "grad_norm": 3.4792227745056152, "learning_rate": 0.0001539728798450277, "loss": 1.22, "step": 161100 }, { "epoch": 4.811453139461977, "grad_norm": 4.746030807495117, "learning_rate": 0.00015395859404910886, "loss": 1.2068, "step": 161150 }, { "epoch": 4.81294598871406, "grad_norm": 4.76997709274292, "learning_rate": 0.00015394430825319002, "loss": 1.2713, "step": 161200 }, { "epoch": 4.814438837966142, "grad_norm": 3.699706792831421, "learning_rate": 0.0001539300224572712, "loss": 1.2059, "step": 161250 }, { "epoch": 4.8159316872182245, "grad_norm": 5.2487664222717285, "learning_rate": 0.00015391573666135235, "loss": 1.2295, "step": 161300 }, { "epoch": 4.817424536470307, "grad_norm": 4.549964904785156, "learning_rate": 0.00015390145086543352, "loss": 1.2362, "step": 161350 }, { "epoch": 4.81891738572239, "grad_norm": 6.63731575012207, "learning_rate": 0.00015388716506951468, "loss": 1.2018, "step": 161400 }, { "epoch": 4.820410234974473, "grad_norm": 4.054471969604492, "learning_rate": 0.00015387287927359585, "loss": 1.2234, "step": 161450 }, { "epoch": 4.8219030842265544, "grad_norm": 4.350498199462891, "learning_rate": 0.000153858593477677, "loss": 1.2006, "step": 161500 }, { "epoch": 4.823395933478637, "grad_norm": 4.074748992919922, "learning_rate": 0.0001538443076817582, "loss": 1.2353, "step": 161550 }, { "epoch": 4.82488878273072, "grad_norm": 4.194019317626953, "learning_rate": 0.00015383002188583934, "loss": 1.2294, "step": 161600 }, { "epoch": 4.826381631982803, "grad_norm": 4.11326789855957, "learning_rate": 0.00015381573608992053, "loss": 1.2195, "step": 161650 }, { "epoch": 4.827874481234885, "grad_norm": 5.007305145263672, "learning_rate": 0.00015380145029400167, "loss": 1.221, "step": 161700 }, { "epoch": 4.829367330486967, "grad_norm": 5.089259624481201, "learning_rate": 0.00015378716449808286, "loss": 1.1968, "step": 161750 }, { "epoch": 4.83086017973905, "grad_norm": 5.502153396606445, "learning_rate": 0.000153772878702164, "loss": 1.2386, "step": 161800 }, { "epoch": 4.8323530289911325, "grad_norm": 4.014642238616943, "learning_rate": 0.0001537585929062452, "loss": 1.2358, "step": 161850 }, { "epoch": 4.833845878243215, "grad_norm": 4.9020915031433105, "learning_rate": 0.00015374430711032636, "loss": 1.1838, "step": 161900 }, { "epoch": 4.835338727495298, "grad_norm": 3.0818114280700684, "learning_rate": 0.00015373002131440752, "loss": 1.3071, "step": 161950 }, { "epoch": 4.83683157674738, "grad_norm": 4.447099208831787, "learning_rate": 0.00015371573551848869, "loss": 1.2656, "step": 162000 }, { "epoch": 4.838324425999462, "grad_norm": 3.805255174636841, "learning_rate": 0.00015370144972256985, "loss": 1.233, "step": 162050 }, { "epoch": 4.839817275251545, "grad_norm": 4.1598968505859375, "learning_rate": 0.00015368716392665102, "loss": 1.256, "step": 162100 }, { "epoch": 4.841310124503628, "grad_norm": 4.9868364334106445, "learning_rate": 0.00015367287813073218, "loss": 1.1727, "step": 162150 }, { "epoch": 4.84280297375571, "grad_norm": 5.927983283996582, "learning_rate": 0.00015365859233481334, "loss": 1.1832, "step": 162200 }, { "epoch": 4.844295823007792, "grad_norm": 4.542764663696289, "learning_rate": 0.0001536443065388945, "loss": 1.2064, "step": 162250 }, { "epoch": 4.845788672259875, "grad_norm": 5.132274627685547, "learning_rate": 0.00015363002074297567, "loss": 1.2295, "step": 162300 }, { "epoch": 4.847281521511958, "grad_norm": 4.797247409820557, "learning_rate": 0.00015361573494705687, "loss": 1.1985, "step": 162350 }, { "epoch": 4.8487743707640405, "grad_norm": 3.799029588699341, "learning_rate": 0.000153601449151138, "loss": 1.2029, "step": 162400 }, { "epoch": 4.850267220016123, "grad_norm": 3.758556842803955, "learning_rate": 0.0001535871633552192, "loss": 1.2401, "step": 162450 }, { "epoch": 4.851760069268205, "grad_norm": 4.282034397125244, "learning_rate": 0.00015357287755930033, "loss": 1.1842, "step": 162500 }, { "epoch": 4.853252918520288, "grad_norm": 4.154748916625977, "learning_rate": 0.00015355859176338152, "loss": 1.2201, "step": 162550 }, { "epoch": 4.85474576777237, "grad_norm": 4.305574893951416, "learning_rate": 0.0001535443059674627, "loss": 1.206, "step": 162600 }, { "epoch": 4.856238617024453, "grad_norm": 4.58249568939209, "learning_rate": 0.00015353002017154385, "loss": 1.1851, "step": 162650 }, { "epoch": 4.857731466276535, "grad_norm": 3.7399301528930664, "learning_rate": 0.00015351573437562502, "loss": 1.2155, "step": 162700 }, { "epoch": 4.859224315528618, "grad_norm": 4.139153480529785, "learning_rate": 0.00015350144857970618, "loss": 1.2233, "step": 162750 }, { "epoch": 4.8607171647807, "grad_norm": 4.756731986999512, "learning_rate": 0.00015348716278378735, "loss": 1.2671, "step": 162800 }, { "epoch": 4.862210014032783, "grad_norm": 4.127963542938232, "learning_rate": 0.0001534728769878685, "loss": 1.2087, "step": 162850 }, { "epoch": 4.863702863284866, "grad_norm": 5.56052303314209, "learning_rate": 0.00015345859119194968, "loss": 1.2257, "step": 162900 }, { "epoch": 4.8651957125369485, "grad_norm": 6.622554302215576, "learning_rate": 0.00015344430539603084, "loss": 1.2274, "step": 162950 }, { "epoch": 4.86668856178903, "grad_norm": 3.089730739593506, "learning_rate": 0.000153430019600112, "loss": 1.2615, "step": 163000 }, { "epoch": 4.868181411041113, "grad_norm": 5.314934253692627, "learning_rate": 0.0001534157338041932, "loss": 1.2035, "step": 163050 }, { "epoch": 4.869674260293196, "grad_norm": 4.352352619171143, "learning_rate": 0.00015340144800827434, "loss": 1.2113, "step": 163100 }, { "epoch": 4.871167109545278, "grad_norm": 4.171778202056885, "learning_rate": 0.00015338716221235553, "loss": 1.1977, "step": 163150 }, { "epoch": 4.87265995879736, "grad_norm": 3.6624767780303955, "learning_rate": 0.00015337287641643666, "loss": 1.1419, "step": 163200 }, { "epoch": 4.874152808049443, "grad_norm": 3.778160810470581, "learning_rate": 0.00015335859062051783, "loss": 1.257, "step": 163250 }, { "epoch": 4.875645657301526, "grad_norm": 4.410187244415283, "learning_rate": 0.000153344304824599, "loss": 1.1846, "step": 163300 }, { "epoch": 4.877138506553608, "grad_norm": 4.407317161560059, "learning_rate": 0.00015333001902868016, "loss": 1.2594, "step": 163350 }, { "epoch": 4.878631355805691, "grad_norm": 3.622882843017578, "learning_rate": 0.00015331573323276135, "loss": 1.1928, "step": 163400 }, { "epoch": 4.880124205057773, "grad_norm": 4.268970012664795, "learning_rate": 0.0001533014474368425, "loss": 1.2575, "step": 163450 }, { "epoch": 4.881617054309856, "grad_norm": 5.875527381896973, "learning_rate": 0.00015328716164092368, "loss": 1.1402, "step": 163500 }, { "epoch": 4.883109903561938, "grad_norm": 4.78771448135376, "learning_rate": 0.00015327287584500482, "loss": 1.2003, "step": 163550 }, { "epoch": 4.884602752814021, "grad_norm": 4.128746509552002, "learning_rate": 0.000153258590049086, "loss": 1.1964, "step": 163600 }, { "epoch": 4.886095602066104, "grad_norm": 2.846057176589966, "learning_rate": 0.00015324430425316715, "loss": 1.2154, "step": 163650 }, { "epoch": 4.8875884513181855, "grad_norm": 3.893822193145752, "learning_rate": 0.00015323001845724834, "loss": 1.1727, "step": 163700 }, { "epoch": 4.889081300570268, "grad_norm": 3.671567916870117, "learning_rate": 0.0001532157326613295, "loss": 1.247, "step": 163750 }, { "epoch": 4.890574149822351, "grad_norm": 4.6587066650390625, "learning_rate": 0.00015320144686541067, "loss": 1.2278, "step": 163800 }, { "epoch": 4.892066999074434, "grad_norm": 4.010086536407471, "learning_rate": 0.00015318716106949183, "loss": 1.2153, "step": 163850 }, { "epoch": 4.893559848326516, "grad_norm": 3.343904495239258, "learning_rate": 0.000153172875273573, "loss": 1.1807, "step": 163900 }, { "epoch": 4.895052697578598, "grad_norm": 4.4615702629089355, "learning_rate": 0.00015315858947765416, "loss": 1.1961, "step": 163950 }, { "epoch": 4.896545546830681, "grad_norm": 4.361425399780273, "learning_rate": 0.00015314430368173533, "loss": 1.2436, "step": 164000 }, { "epoch": 4.898038396082764, "grad_norm": 4.161865234375, "learning_rate": 0.0001531300178858165, "loss": 1.2104, "step": 164050 }, { "epoch": 4.899531245334846, "grad_norm": 5.719996929168701, "learning_rate": 0.00015311573208989766, "loss": 1.2363, "step": 164100 }, { "epoch": 4.901024094586928, "grad_norm": 4.253158092498779, "learning_rate": 0.00015310144629397882, "loss": 1.2513, "step": 164150 }, { "epoch": 4.902516943839011, "grad_norm": 2.575803518295288, "learning_rate": 0.00015308716049806, "loss": 1.1488, "step": 164200 }, { "epoch": 4.9040097930910935, "grad_norm": 6.142227649688721, "learning_rate": 0.00015307287470214115, "loss": 1.2197, "step": 164250 }, { "epoch": 4.905502642343176, "grad_norm": 5.662699222564697, "learning_rate": 0.00015305858890622234, "loss": 1.2282, "step": 164300 }, { "epoch": 4.906995491595259, "grad_norm": 4.345313549041748, "learning_rate": 0.00015304430311030348, "loss": 1.263, "step": 164350 }, { "epoch": 4.908488340847342, "grad_norm": 4.106235980987549, "learning_rate": 0.00015303001731438467, "loss": 1.2568, "step": 164400 }, { "epoch": 4.9099811900994235, "grad_norm": 5.795431613922119, "learning_rate": 0.0001530157315184658, "loss": 1.1668, "step": 164450 }, { "epoch": 4.911474039351506, "grad_norm": 4.605996608734131, "learning_rate": 0.000153001445722547, "loss": 1.2513, "step": 164500 }, { "epoch": 4.912966888603589, "grad_norm": 6.11320686340332, "learning_rate": 0.00015298715992662816, "loss": 1.1922, "step": 164550 }, { "epoch": 4.914459737855672, "grad_norm": 6.641072750091553, "learning_rate": 0.00015297287413070933, "loss": 1.2272, "step": 164600 }, { "epoch": 4.915952587107753, "grad_norm": 3.955686092376709, "learning_rate": 0.0001529585883347905, "loss": 1.2089, "step": 164650 }, { "epoch": 4.917445436359836, "grad_norm": 3.445519208908081, "learning_rate": 0.00015294430253887166, "loss": 1.2013, "step": 164700 }, { "epoch": 4.918938285611919, "grad_norm": 5.487844944000244, "learning_rate": 0.00015293001674295282, "loss": 1.2818, "step": 164750 }, { "epoch": 4.9204311348640015, "grad_norm": 5.098935127258301, "learning_rate": 0.000152915730947034, "loss": 1.2195, "step": 164800 }, { "epoch": 4.921923984116084, "grad_norm": 3.2200589179992676, "learning_rate": 0.00015290144515111515, "loss": 1.2425, "step": 164850 }, { "epoch": 4.923416833368167, "grad_norm": 5.7381205558776855, "learning_rate": 0.00015288715935519632, "loss": 1.2012, "step": 164900 }, { "epoch": 4.924909682620249, "grad_norm": 4.34859037399292, "learning_rate": 0.00015287287355927748, "loss": 1.2595, "step": 164950 }, { "epoch": 4.9264025318723315, "grad_norm": 4.303558349609375, "learning_rate": 0.00015285858776335867, "loss": 1.2802, "step": 165000 }, { "epoch": 4.927895381124414, "grad_norm": 3.9082958698272705, "learning_rate": 0.0001528443019674398, "loss": 1.2252, "step": 165050 }, { "epoch": 4.929388230376497, "grad_norm": 3.7715795040130615, "learning_rate": 0.000152830016171521, "loss": 1.2849, "step": 165100 }, { "epoch": 4.930881079628579, "grad_norm": 5.810252666473389, "learning_rate": 0.00015281573037560214, "loss": 1.1903, "step": 165150 }, { "epoch": 4.932373928880661, "grad_norm": 4.941216468811035, "learning_rate": 0.00015280144457968333, "loss": 1.199, "step": 165200 }, { "epoch": 4.933866778132744, "grad_norm": 4.764664173126221, "learning_rate": 0.0001527871587837645, "loss": 1.2358, "step": 165250 }, { "epoch": 4.935359627384827, "grad_norm": 4.884801387786865, "learning_rate": 0.00015277287298784566, "loss": 1.168, "step": 165300 }, { "epoch": 4.9368524766369095, "grad_norm": 4.915900230407715, "learning_rate": 0.00015275858719192683, "loss": 1.2395, "step": 165350 }, { "epoch": 4.938345325888992, "grad_norm": 4.347561836242676, "learning_rate": 0.000152744301396008, "loss": 1.2134, "step": 165400 }, { "epoch": 4.939838175141074, "grad_norm": 5.786562919616699, "learning_rate": 0.00015273001560008916, "loss": 1.2413, "step": 165450 }, { "epoch": 4.941331024393157, "grad_norm": 4.780857086181641, "learning_rate": 0.00015271572980417032, "loss": 1.2336, "step": 165500 }, { "epoch": 4.9428238736452395, "grad_norm": 4.805835723876953, "learning_rate": 0.00015270144400825148, "loss": 1.2464, "step": 165550 }, { "epoch": 4.944316722897322, "grad_norm": 4.181761264801025, "learning_rate": 0.00015268715821233265, "loss": 1.2486, "step": 165600 }, { "epoch": 4.945809572149404, "grad_norm": 6.043191432952881, "learning_rate": 0.00015267287241641381, "loss": 1.2485, "step": 165650 }, { "epoch": 4.947302421401487, "grad_norm": 3.7125275135040283, "learning_rate": 0.00015265858662049498, "loss": 1.2216, "step": 165700 }, { "epoch": 4.948795270653569, "grad_norm": 4.299423694610596, "learning_rate": 0.00015264430082457614, "loss": 1.1784, "step": 165750 }, { "epoch": 4.950288119905652, "grad_norm": 3.839174270629883, "learning_rate": 0.00015263001502865734, "loss": 1.2377, "step": 165800 }, { "epoch": 4.951780969157735, "grad_norm": 4.8078837394714355, "learning_rate": 0.00015261572923273847, "loss": 1.2435, "step": 165850 }, { "epoch": 4.953273818409817, "grad_norm": 4.367580413818359, "learning_rate": 0.00015260144343681964, "loss": 1.1683, "step": 165900 }, { "epoch": 4.954766667661899, "grad_norm": 4.1319756507873535, "learning_rate": 0.0001525871576409008, "loss": 1.259, "step": 165950 }, { "epoch": 4.956259516913982, "grad_norm": 4.9268293380737305, "learning_rate": 0.00015257287184498197, "loss": 1.1774, "step": 166000 }, { "epoch": 4.957752366166065, "grad_norm": 3.851490020751953, "learning_rate": 0.00015255858604906316, "loss": 1.2051, "step": 166050 }, { "epoch": 4.9592452154181474, "grad_norm": 6.583017826080322, "learning_rate": 0.0001525443002531443, "loss": 1.2744, "step": 166100 }, { "epoch": 4.960738064670229, "grad_norm": 3.7507100105285645, "learning_rate": 0.0001525300144572255, "loss": 1.1927, "step": 166150 }, { "epoch": 4.962230913922312, "grad_norm": 5.387413501739502, "learning_rate": 0.00015251572866130663, "loss": 1.203, "step": 166200 }, { "epoch": 4.963723763174395, "grad_norm": 3.7446823120117188, "learning_rate": 0.00015250144286538782, "loss": 1.2062, "step": 166250 }, { "epoch": 4.965216612426477, "grad_norm": 4.162785053253174, "learning_rate": 0.00015248715706946895, "loss": 1.1969, "step": 166300 }, { "epoch": 4.96670946167856, "grad_norm": 4.15479040145874, "learning_rate": 0.00015247287127355015, "loss": 1.2164, "step": 166350 }, { "epoch": 4.968202310930642, "grad_norm": 4.103176593780518, "learning_rate": 0.0001524585854776313, "loss": 1.2318, "step": 166400 }, { "epoch": 4.969695160182725, "grad_norm": 4.903412818908691, "learning_rate": 0.00015244429968171248, "loss": 1.1984, "step": 166450 }, { "epoch": 4.971188009434807, "grad_norm": 5.026261806488037, "learning_rate": 0.00015243001388579364, "loss": 1.2472, "step": 166500 }, { "epoch": 4.97268085868689, "grad_norm": 4.777016639709473, "learning_rate": 0.0001524157280898748, "loss": 1.2363, "step": 166550 }, { "epoch": 4.974173707938972, "grad_norm": 3.636580467224121, "learning_rate": 0.00015240144229395597, "loss": 1.2543, "step": 166600 }, { "epoch": 4.9756665571910546, "grad_norm": 3.8117880821228027, "learning_rate": 0.00015238715649803713, "loss": 1.1817, "step": 166650 }, { "epoch": 4.977159406443137, "grad_norm": 5.417634963989258, "learning_rate": 0.0001523728707021183, "loss": 1.2251, "step": 166700 }, { "epoch": 4.97865225569522, "grad_norm": 4.955143451690674, "learning_rate": 0.00015235858490619946, "loss": 1.2164, "step": 166750 }, { "epoch": 4.980145104947303, "grad_norm": 4.069868564605713, "learning_rate": 0.00015234429911028063, "loss": 1.2496, "step": 166800 }, { "epoch": 4.981637954199385, "grad_norm": 3.862034320831299, "learning_rate": 0.00015233001331436182, "loss": 1.19, "step": 166850 }, { "epoch": 4.983130803451467, "grad_norm": 5.259671211242676, "learning_rate": 0.00015231572751844296, "loss": 1.216, "step": 166900 }, { "epoch": 4.98462365270355, "grad_norm": 4.1818742752075195, "learning_rate": 0.00015230144172252415, "loss": 1.2089, "step": 166950 }, { "epoch": 4.986116501955633, "grad_norm": 4.551473617553711, "learning_rate": 0.0001522871559266053, "loss": 1.2218, "step": 167000 }, { "epoch": 4.987609351207715, "grad_norm": 4.714193820953369, "learning_rate": 0.00015227287013068648, "loss": 1.2117, "step": 167050 }, { "epoch": 4.989102200459797, "grad_norm": 5.234292030334473, "learning_rate": 0.00015225858433476762, "loss": 1.1861, "step": 167100 }, { "epoch": 4.99059504971188, "grad_norm": 3.052459239959717, "learning_rate": 0.0001522442985388488, "loss": 1.1598, "step": 167150 }, { "epoch": 4.9920878989639625, "grad_norm": 4.330234527587891, "learning_rate": 0.00015223001274292997, "loss": 1.2177, "step": 167200 }, { "epoch": 4.993580748216045, "grad_norm": 4.3281636238098145, "learning_rate": 0.00015221572694701114, "loss": 1.291, "step": 167250 }, { "epoch": 4.995073597468128, "grad_norm": 4.501579761505127, "learning_rate": 0.0001522014411510923, "loss": 1.215, "step": 167300 }, { "epoch": 4.996566446720211, "grad_norm": 3.957705497741699, "learning_rate": 0.00015218715535517347, "loss": 1.2056, "step": 167350 }, { "epoch": 4.9980592959722925, "grad_norm": 3.6151764392852783, "learning_rate": 0.00015217286955925463, "loss": 1.2531, "step": 167400 }, { "epoch": 4.999552145224375, "grad_norm": 5.759849548339844, "learning_rate": 0.0001521585837633358, "loss": 1.233, "step": 167450 }, { "epoch": 5.001044994476458, "grad_norm": 4.063636779785156, "learning_rate": 0.00015214429796741696, "loss": 1.0927, "step": 167500 }, { "epoch": 5.002537843728541, "grad_norm": 5.337904930114746, "learning_rate": 0.00015213001217149813, "loss": 1.0732, "step": 167550 }, { "epoch": 5.004030692980622, "grad_norm": 3.7725415229797363, "learning_rate": 0.0001521157263755793, "loss": 1.0745, "step": 167600 }, { "epoch": 5.005523542232705, "grad_norm": 5.607946395874023, "learning_rate": 0.00015210144057966048, "loss": 1.0961, "step": 167650 }, { "epoch": 5.007016391484788, "grad_norm": 5.750009059906006, "learning_rate": 0.00015208715478374162, "loss": 1.1483, "step": 167700 }, { "epoch": 5.0085092407368705, "grad_norm": 4.2717180252075195, "learning_rate": 0.0001520728689878228, "loss": 1.1728, "step": 167750 }, { "epoch": 5.010002089988953, "grad_norm": 4.048334121704102, "learning_rate": 0.00015205858319190395, "loss": 1.1091, "step": 167800 }, { "epoch": 5.011494939241035, "grad_norm": 4.57917594909668, "learning_rate": 0.00015204429739598514, "loss": 1.1404, "step": 167850 }, { "epoch": 5.012987788493118, "grad_norm": 5.619372367858887, "learning_rate": 0.00015203001160006628, "loss": 1.1803, "step": 167900 }, { "epoch": 5.0144806377452005, "grad_norm": 4.788344383239746, "learning_rate": 0.00015201572580414747, "loss": 1.1131, "step": 167950 }, { "epoch": 5.015973486997283, "grad_norm": 4.1209588050842285, "learning_rate": 0.00015200144000822863, "loss": 1.108, "step": 168000 }, { "epoch": 5.017466336249366, "grad_norm": 3.88391375541687, "learning_rate": 0.0001519871542123098, "loss": 1.1692, "step": 168050 }, { "epoch": 5.018959185501448, "grad_norm": 5.6544904708862305, "learning_rate": 0.00015197286841639096, "loss": 1.1457, "step": 168100 }, { "epoch": 5.02045203475353, "grad_norm": 5.817676544189453, "learning_rate": 0.00015195858262047213, "loss": 1.124, "step": 168150 }, { "epoch": 5.021944884005613, "grad_norm": 6.914028644561768, "learning_rate": 0.0001519442968245533, "loss": 1.0979, "step": 168200 }, { "epoch": 5.023437733257696, "grad_norm": 4.256136417388916, "learning_rate": 0.00015193001102863446, "loss": 1.1524, "step": 168250 }, { "epoch": 5.0249305825097785, "grad_norm": 4.776482582092285, "learning_rate": 0.00015191572523271562, "loss": 1.1005, "step": 168300 }, { "epoch": 5.02642343176186, "grad_norm": 4.402430057525635, "learning_rate": 0.0001519014394367968, "loss": 1.1295, "step": 168350 }, { "epoch": 5.027916281013943, "grad_norm": 4.203427314758301, "learning_rate": 0.00015188715364087795, "loss": 1.1377, "step": 168400 }, { "epoch": 5.029409130266026, "grad_norm": 3.8063766956329346, "learning_rate": 0.00015187286784495912, "loss": 1.0963, "step": 168450 }, { "epoch": 5.0309019795181085, "grad_norm": 4.944723129272461, "learning_rate": 0.00015185858204904028, "loss": 1.1298, "step": 168500 }, { "epoch": 5.032394828770191, "grad_norm": 4.621240615844727, "learning_rate": 0.00015184429625312145, "loss": 1.1597, "step": 168550 }, { "epoch": 5.033887678022273, "grad_norm": 5.244666576385498, "learning_rate": 0.0001518300104572026, "loss": 1.1266, "step": 168600 }, { "epoch": 5.035380527274356, "grad_norm": 3.7527828216552734, "learning_rate": 0.00015181572466128377, "loss": 1.1254, "step": 168650 }, { "epoch": 5.036873376526438, "grad_norm": 5.852657794952393, "learning_rate": 0.00015180143886536497, "loss": 1.1909, "step": 168700 }, { "epoch": 5.038366225778521, "grad_norm": 4.197393417358398, "learning_rate": 0.0001517871530694461, "loss": 1.1742, "step": 168750 }, { "epoch": 5.039859075030604, "grad_norm": 4.88790225982666, "learning_rate": 0.0001517728672735273, "loss": 1.1273, "step": 168800 }, { "epoch": 5.041351924282686, "grad_norm": 5.1781134605407715, "learning_rate": 0.00015175858147760843, "loss": 1.1011, "step": 168850 }, { "epoch": 5.042844773534768, "grad_norm": 6.221542835235596, "learning_rate": 0.00015174429568168963, "loss": 1.0775, "step": 168900 }, { "epoch": 5.044337622786851, "grad_norm": 3.557209014892578, "learning_rate": 0.00015173000988577076, "loss": 1.1295, "step": 168950 }, { "epoch": 5.045830472038934, "grad_norm": 4.908199310302734, "learning_rate": 0.00015171572408985195, "loss": 1.1428, "step": 169000 }, { "epoch": 5.0473233212910165, "grad_norm": 3.2522714138031006, "learning_rate": 0.00015170143829393312, "loss": 1.1454, "step": 169050 }, { "epoch": 5.048816170543098, "grad_norm": 4.945195198059082, "learning_rate": 0.00015168715249801428, "loss": 1.1541, "step": 169100 }, { "epoch": 5.050309019795181, "grad_norm": 3.7441771030426025, "learning_rate": 0.00015167286670209545, "loss": 1.1223, "step": 169150 }, { "epoch": 5.051801869047264, "grad_norm": 4.811500549316406, "learning_rate": 0.0001516585809061766, "loss": 1.1861, "step": 169200 }, { "epoch": 5.053294718299346, "grad_norm": 4.175551414489746, "learning_rate": 0.00015164429511025778, "loss": 1.1655, "step": 169250 }, { "epoch": 5.054787567551428, "grad_norm": 4.7304205894470215, "learning_rate": 0.00015163000931433894, "loss": 1.1238, "step": 169300 }, { "epoch": 5.056280416803511, "grad_norm": 4.252381801605225, "learning_rate": 0.0001516157235184201, "loss": 1.1683, "step": 169350 }, { "epoch": 5.057773266055594, "grad_norm": 3.368849039077759, "learning_rate": 0.00015160143772250127, "loss": 1.1584, "step": 169400 }, { "epoch": 5.059266115307676, "grad_norm": 4.560303688049316, "learning_rate": 0.00015158715192658244, "loss": 1.1668, "step": 169450 }, { "epoch": 5.060758964559759, "grad_norm": 4.084552764892578, "learning_rate": 0.00015157286613066363, "loss": 1.1385, "step": 169500 }, { "epoch": 5.062251813811841, "grad_norm": 4.2204060554504395, "learning_rate": 0.00015155858033474477, "loss": 1.1486, "step": 169550 }, { "epoch": 5.063744663063924, "grad_norm": 3.872606039047241, "learning_rate": 0.00015154429453882596, "loss": 1.1732, "step": 169600 }, { "epoch": 5.065237512316006, "grad_norm": 6.434457302093506, "learning_rate": 0.0001515300087429071, "loss": 1.1658, "step": 169650 }, { "epoch": 5.066730361568089, "grad_norm": 4.881868362426758, "learning_rate": 0.0001515157229469883, "loss": 1.1769, "step": 169700 }, { "epoch": 5.068223210820172, "grad_norm": 4.622376918792725, "learning_rate": 0.00015150143715106942, "loss": 1.0902, "step": 169750 }, { "epoch": 5.0697160600722535, "grad_norm": 3.6144649982452393, "learning_rate": 0.00015148715135515062, "loss": 1.1098, "step": 169800 }, { "epoch": 5.071208909324336, "grad_norm": 4.620079517364502, "learning_rate": 0.00015147286555923178, "loss": 1.1716, "step": 169850 }, { "epoch": 5.072701758576419, "grad_norm": 4.734360694885254, "learning_rate": 0.00015145857976331295, "loss": 1.1539, "step": 169900 }, { "epoch": 5.074194607828502, "grad_norm": 4.301705360412598, "learning_rate": 0.0001514442939673941, "loss": 1.1533, "step": 169950 }, { "epoch": 5.075687457080584, "grad_norm": 5.320747375488281, "learning_rate": 0.00015143000817147527, "loss": 1.1233, "step": 170000 }, { "epoch": 5.077180306332666, "grad_norm": 4.841482639312744, "learning_rate": 0.00015141572237555644, "loss": 1.1201, "step": 170050 }, { "epoch": 5.078673155584749, "grad_norm": 5.517151832580566, "learning_rate": 0.0001514014365796376, "loss": 1.0986, "step": 170100 }, { "epoch": 5.080166004836832, "grad_norm": 4.323915004730225, "learning_rate": 0.00015138715078371877, "loss": 1.1362, "step": 170150 }, { "epoch": 5.081658854088914, "grad_norm": 4.015235900878906, "learning_rate": 0.00015137286498779993, "loss": 1.1571, "step": 170200 }, { "epoch": 5.083151703340997, "grad_norm": 5.928144454956055, "learning_rate": 0.0001513585791918811, "loss": 1.1672, "step": 170250 }, { "epoch": 5.084644552593079, "grad_norm": 4.04873514175415, "learning_rate": 0.0001513442933959623, "loss": 1.1028, "step": 170300 }, { "epoch": 5.0861374018451615, "grad_norm": 5.569055557250977, "learning_rate": 0.00015133000760004343, "loss": 1.1479, "step": 170350 }, { "epoch": 5.087630251097244, "grad_norm": 5.128480911254883, "learning_rate": 0.00015131572180412462, "loss": 1.1567, "step": 170400 }, { "epoch": 5.089123100349327, "grad_norm": 5.323390007019043, "learning_rate": 0.00015130143600820576, "loss": 1.1072, "step": 170450 }, { "epoch": 5.09061594960141, "grad_norm": 3.7644312381744385, "learning_rate": 0.00015128715021228695, "loss": 1.1435, "step": 170500 }, { "epoch": 5.092108798853491, "grad_norm": 4.050746917724609, "learning_rate": 0.00015127286441636809, "loss": 1.2082, "step": 170550 }, { "epoch": 5.093601648105574, "grad_norm": 6.779710292816162, "learning_rate": 0.00015125857862044928, "loss": 1.1314, "step": 170600 }, { "epoch": 5.095094497357657, "grad_norm": 4.560274124145508, "learning_rate": 0.00015124429282453044, "loss": 1.2157, "step": 170650 }, { "epoch": 5.0965873466097396, "grad_norm": 5.227869987487793, "learning_rate": 0.0001512300070286116, "loss": 1.187, "step": 170700 }, { "epoch": 5.098080195861822, "grad_norm": 5.666630744934082, "learning_rate": 0.00015121572123269277, "loss": 1.2005, "step": 170750 }, { "epoch": 5.099573045113904, "grad_norm": 4.06814432144165, "learning_rate": 0.00015120143543677394, "loss": 1.1351, "step": 170800 }, { "epoch": 5.101065894365987, "grad_norm": 5.10957145690918, "learning_rate": 0.0001511871496408551, "loss": 1.193, "step": 170850 }, { "epoch": 5.1025587436180695, "grad_norm": 5.230775833129883, "learning_rate": 0.00015117286384493627, "loss": 1.2186, "step": 170900 }, { "epoch": 5.104051592870152, "grad_norm": 4.77035665512085, "learning_rate": 0.00015115857804901743, "loss": 1.165, "step": 170950 }, { "epoch": 5.105544442122235, "grad_norm": 3.762669801712036, "learning_rate": 0.0001511442922530986, "loss": 1.0899, "step": 171000 }, { "epoch": 5.107037291374317, "grad_norm": 4.551312446594238, "learning_rate": 0.00015113000645717976, "loss": 1.1306, "step": 171050 }, { "epoch": 5.108530140626399, "grad_norm": 3.9182794094085693, "learning_rate": 0.00015111572066126092, "loss": 1.1365, "step": 171100 }, { "epoch": 5.110022989878482, "grad_norm": 4.611422538757324, "learning_rate": 0.0001511014348653421, "loss": 1.2169, "step": 171150 }, { "epoch": 5.111515839130565, "grad_norm": 4.5237321853637695, "learning_rate": 0.00015108714906942325, "loss": 1.1373, "step": 171200 }, { "epoch": 5.1130086883826475, "grad_norm": 4.893795967102051, "learning_rate": 0.00015107286327350442, "loss": 1.1307, "step": 171250 }, { "epoch": 5.114501537634729, "grad_norm": 4.26010799407959, "learning_rate": 0.00015105857747758558, "loss": 1.1461, "step": 171300 }, { "epoch": 5.115994386886812, "grad_norm": 5.365528583526611, "learning_rate": 0.00015104429168166675, "loss": 1.1156, "step": 171350 }, { "epoch": 5.117487236138895, "grad_norm": 3.8632538318634033, "learning_rate": 0.0001510300058857479, "loss": 1.1354, "step": 171400 }, { "epoch": 5.1189800853909775, "grad_norm": 5.570279598236084, "learning_rate": 0.0001510157200898291, "loss": 1.1758, "step": 171450 }, { "epoch": 5.120472934643059, "grad_norm": 4.220273017883301, "learning_rate": 0.00015100143429391024, "loss": 1.149, "step": 171500 }, { "epoch": 5.121965783895142, "grad_norm": 5.220869541168213, "learning_rate": 0.00015098714849799143, "loss": 1.208, "step": 171550 }, { "epoch": 5.123458633147225, "grad_norm": 3.9469594955444336, "learning_rate": 0.00015097286270207257, "loss": 1.1431, "step": 171600 }, { "epoch": 5.124951482399307, "grad_norm": 3.932490587234497, "learning_rate": 0.00015095857690615376, "loss": 1.1621, "step": 171650 }, { "epoch": 5.12644433165139, "grad_norm": 5.017124176025391, "learning_rate": 0.00015094429111023493, "loss": 1.1341, "step": 171700 }, { "epoch": 5.127937180903472, "grad_norm": 5.0581488609313965, "learning_rate": 0.0001509300053143161, "loss": 1.1756, "step": 171750 }, { "epoch": 5.129430030155555, "grad_norm": 5.208496570587158, "learning_rate": 0.00015091571951839726, "loss": 1.1226, "step": 171800 }, { "epoch": 5.130922879407637, "grad_norm": 3.9350638389587402, "learning_rate": 0.00015090143372247842, "loss": 1.1443, "step": 171850 }, { "epoch": 5.13241572865972, "grad_norm": 3.7098276615142822, "learning_rate": 0.00015088714792655959, "loss": 1.1346, "step": 171900 }, { "epoch": 5.133908577911803, "grad_norm": 4.124345779418945, "learning_rate": 0.00015087286213064075, "loss": 1.1955, "step": 171950 }, { "epoch": 5.135401427163885, "grad_norm": 3.5282959938049316, "learning_rate": 0.00015085857633472192, "loss": 1.1457, "step": 172000 }, { "epoch": 5.136894276415967, "grad_norm": 4.17992639541626, "learning_rate": 0.00015084429053880308, "loss": 1.161, "step": 172050 }, { "epoch": 5.13838712566805, "grad_norm": 3.838498830795288, "learning_rate": 0.00015083000474288424, "loss": 1.1381, "step": 172100 }, { "epoch": 5.139879974920133, "grad_norm": 5.275324821472168, "learning_rate": 0.00015081571894696544, "loss": 1.0803, "step": 172150 }, { "epoch": 5.141372824172215, "grad_norm": 4.769461154937744, "learning_rate": 0.00015080143315104657, "loss": 1.1494, "step": 172200 }, { "epoch": 5.142865673424297, "grad_norm": 4.681296348571777, "learning_rate": 0.00015078714735512777, "loss": 1.1726, "step": 172250 }, { "epoch": 5.14435852267638, "grad_norm": 4.308079719543457, "learning_rate": 0.0001507728615592089, "loss": 1.1331, "step": 172300 }, { "epoch": 5.145851371928463, "grad_norm": 4.144331455230713, "learning_rate": 0.0001507585757632901, "loss": 1.1579, "step": 172350 }, { "epoch": 5.147344221180545, "grad_norm": 4.138561725616455, "learning_rate": 0.00015074428996737123, "loss": 1.1462, "step": 172400 }, { "epoch": 5.148837070432628, "grad_norm": 4.038719177246094, "learning_rate": 0.00015073000417145242, "loss": 1.1604, "step": 172450 }, { "epoch": 5.15032991968471, "grad_norm": 3.903709888458252, "learning_rate": 0.0001507157183755336, "loss": 1.1504, "step": 172500 }, { "epoch": 5.151822768936793, "grad_norm": 5.332316875457764, "learning_rate": 0.00015070143257961475, "loss": 1.1616, "step": 172550 }, { "epoch": 5.153315618188875, "grad_norm": 4.094432353973389, "learning_rate": 0.00015068714678369592, "loss": 1.1771, "step": 172600 }, { "epoch": 5.154808467440958, "grad_norm": 4.575977325439453, "learning_rate": 0.00015067286098777708, "loss": 1.1587, "step": 172650 }, { "epoch": 5.156301316693041, "grad_norm": 4.0832414627075195, "learning_rate": 0.00015065857519185825, "loss": 1.1432, "step": 172700 }, { "epoch": 5.1577941659451225, "grad_norm": 3.969329595565796, "learning_rate": 0.0001506442893959394, "loss": 1.1735, "step": 172750 }, { "epoch": 5.159287015197205, "grad_norm": 4.063047885894775, "learning_rate": 0.00015063000360002058, "loss": 1.1071, "step": 172800 }, { "epoch": 5.160779864449288, "grad_norm": 4.360136985778809, "learning_rate": 0.00015061571780410174, "loss": 1.154, "step": 172850 }, { "epoch": 5.162272713701371, "grad_norm": 3.850008010864258, "learning_rate": 0.0001506014320081829, "loss": 1.2022, "step": 172900 }, { "epoch": 5.163765562953453, "grad_norm": 4.514410972595215, "learning_rate": 0.0001505871462122641, "loss": 1.1265, "step": 172950 }, { "epoch": 5.165258412205535, "grad_norm": 4.641385555267334, "learning_rate": 0.00015057286041634524, "loss": 1.1373, "step": 173000 }, { "epoch": 5.166751261457618, "grad_norm": 4.439742088317871, "learning_rate": 0.00015055857462042643, "loss": 1.1792, "step": 173050 }, { "epoch": 5.168244110709701, "grad_norm": 4.819581985473633, "learning_rate": 0.00015054428882450756, "loss": 1.1055, "step": 173100 }, { "epoch": 5.169736959961783, "grad_norm": 4.719875335693359, "learning_rate": 0.00015053000302858876, "loss": 1.1097, "step": 173150 }, { "epoch": 5.171229809213866, "grad_norm": 3.7248473167419434, "learning_rate": 0.0001505157172326699, "loss": 1.1809, "step": 173200 }, { "epoch": 5.172722658465948, "grad_norm": 4.468180179595947, "learning_rate": 0.00015050143143675109, "loss": 1.1218, "step": 173250 }, { "epoch": 5.1742155077180305, "grad_norm": 5.018815994262695, "learning_rate": 0.00015048714564083225, "loss": 1.1562, "step": 173300 }, { "epoch": 5.175708356970113, "grad_norm": 5.088767051696777, "learning_rate": 0.00015047285984491342, "loss": 1.0996, "step": 173350 }, { "epoch": 5.177201206222196, "grad_norm": 5.453111171722412, "learning_rate": 0.00015045857404899458, "loss": 1.2058, "step": 173400 }, { "epoch": 5.178694055474279, "grad_norm": 5.479541778564453, "learning_rate": 0.00015044428825307574, "loss": 1.1409, "step": 173450 }, { "epoch": 5.1801869047263605, "grad_norm": 5.977447509765625, "learning_rate": 0.0001504300024571569, "loss": 1.1197, "step": 173500 }, { "epoch": 5.181679753978443, "grad_norm": 5.65791130065918, "learning_rate": 0.00015041571666123807, "loss": 1.1486, "step": 173550 }, { "epoch": 5.183172603230526, "grad_norm": 6.653776168823242, "learning_rate": 0.00015040143086531924, "loss": 1.1734, "step": 173600 }, { "epoch": 5.184665452482609, "grad_norm": 4.445573806762695, "learning_rate": 0.0001503871450694004, "loss": 1.1776, "step": 173650 }, { "epoch": 5.186158301734691, "grad_norm": 4.370992183685303, "learning_rate": 0.00015037285927348157, "loss": 1.1274, "step": 173700 }, { "epoch": 5.187651150986773, "grad_norm": 4.479329586029053, "learning_rate": 0.00015035857347756273, "loss": 1.1387, "step": 173750 }, { "epoch": 5.189144000238856, "grad_norm": 4.528141498565674, "learning_rate": 0.0001503442876816439, "loss": 1.1586, "step": 173800 }, { "epoch": 5.1906368494909385, "grad_norm": 4.396657943725586, "learning_rate": 0.00015033000188572506, "loss": 1.1918, "step": 173850 }, { "epoch": 5.192129698743021, "grad_norm": 4.700211048126221, "learning_rate": 0.00015031571608980623, "loss": 1.144, "step": 173900 }, { "epoch": 5.193622547995103, "grad_norm": 3.748830795288086, "learning_rate": 0.0001503014302938874, "loss": 1.1584, "step": 173950 }, { "epoch": 5.195115397247186, "grad_norm": 4.841403484344482, "learning_rate": 0.00015028714449796856, "loss": 1.1419, "step": 174000 }, { "epoch": 5.1966082464992684, "grad_norm": 5.811513423919678, "learning_rate": 0.00015027285870204972, "loss": 1.1155, "step": 174050 }, { "epoch": 5.198101095751351, "grad_norm": 4.072244167327881, "learning_rate": 0.0001502585729061309, "loss": 1.0956, "step": 174100 }, { "epoch": 5.199593945003434, "grad_norm": 4.929732799530029, "learning_rate": 0.00015024428711021205, "loss": 1.1761, "step": 174150 }, { "epoch": 5.201086794255516, "grad_norm": 6.669888019561768, "learning_rate": 0.00015023000131429324, "loss": 1.1291, "step": 174200 }, { "epoch": 5.202579643507598, "grad_norm": 4.264486789703369, "learning_rate": 0.00015021571551837438, "loss": 1.1596, "step": 174250 }, { "epoch": 5.204072492759681, "grad_norm": 4.881564140319824, "learning_rate": 0.00015020142972245557, "loss": 1.1884, "step": 174300 }, { "epoch": 5.205565342011764, "grad_norm": 5.406867504119873, "learning_rate": 0.00015018714392653674, "loss": 1.1987, "step": 174350 }, { "epoch": 5.2070581912638465, "grad_norm": 5.110748767852783, "learning_rate": 0.0001501728581306179, "loss": 1.1758, "step": 174400 }, { "epoch": 5.208551040515928, "grad_norm": 4.510517120361328, "learning_rate": 0.00015015857233469906, "loss": 1.1746, "step": 174450 }, { "epoch": 5.210043889768011, "grad_norm": 4.028598308563232, "learning_rate": 0.00015014428653878023, "loss": 1.1727, "step": 174500 }, { "epoch": 5.211536739020094, "grad_norm": 5.600837707519531, "learning_rate": 0.0001501300007428614, "loss": 1.2241, "step": 174550 }, { "epoch": 5.213029588272176, "grad_norm": 5.198084831237793, "learning_rate": 0.00015011571494694256, "loss": 1.1599, "step": 174600 }, { "epoch": 5.214522437524259, "grad_norm": 4.601158142089844, "learning_rate": 0.00015010142915102372, "loss": 1.1764, "step": 174650 }, { "epoch": 5.216015286776341, "grad_norm": 5.581248760223389, "learning_rate": 0.0001500871433551049, "loss": 1.174, "step": 174700 }, { "epoch": 5.217508136028424, "grad_norm": 6.802553176879883, "learning_rate": 0.00015007285755918605, "loss": 1.1922, "step": 174750 }, { "epoch": 5.219000985280506, "grad_norm": 4.5715651512146, "learning_rate": 0.00015005857176326722, "loss": 1.1835, "step": 174800 }, { "epoch": 5.220493834532589, "grad_norm": 3.920050621032715, "learning_rate": 0.00015004428596734838, "loss": 1.1529, "step": 174850 }, { "epoch": 5.221986683784672, "grad_norm": 5.379642009735107, "learning_rate": 0.00015003000017142957, "loss": 1.1016, "step": 174900 }, { "epoch": 5.223479533036754, "grad_norm": 5.268077373504639, "learning_rate": 0.0001500157143755107, "loss": 1.1792, "step": 174950 }, { "epoch": 5.224972382288836, "grad_norm": 4.864146709442139, "learning_rate": 0.0001500014285795919, "loss": 1.1511, "step": 175000 }, { "epoch": 5.226465231540919, "grad_norm": 3.050143241882324, "learning_rate": 0.00014998714278367304, "loss": 1.2307, "step": 175050 }, { "epoch": 5.227958080793002, "grad_norm": 4.5110764503479, "learning_rate": 0.00014997285698775423, "loss": 1.1558, "step": 175100 }, { "epoch": 5.229450930045084, "grad_norm": 4.925455093383789, "learning_rate": 0.0001499585711918354, "loss": 1.194, "step": 175150 }, { "epoch": 5.230943779297166, "grad_norm": 5.321854591369629, "learning_rate": 0.00014994428539591656, "loss": 1.1927, "step": 175200 }, { "epoch": 5.232436628549249, "grad_norm": 3.1668155193328857, "learning_rate": 0.00014992999959999773, "loss": 1.1243, "step": 175250 }, { "epoch": 5.233929477801332, "grad_norm": 5.259143829345703, "learning_rate": 0.0001499157138040789, "loss": 1.181, "step": 175300 }, { "epoch": 5.235422327053414, "grad_norm": 5.783114910125732, "learning_rate": 0.00014990142800816006, "loss": 1.1156, "step": 175350 }, { "epoch": 5.236915176305497, "grad_norm": 5.6330952644348145, "learning_rate": 0.00014988714221224122, "loss": 1.1513, "step": 175400 }, { "epoch": 5.238408025557579, "grad_norm": 4.543177127838135, "learning_rate": 0.00014987285641632238, "loss": 1.1898, "step": 175450 }, { "epoch": 5.239900874809662, "grad_norm": 3.058159828186035, "learning_rate": 0.00014985857062040355, "loss": 1.2119, "step": 175500 }, { "epoch": 5.241393724061744, "grad_norm": 5.873176097869873, "learning_rate": 0.00014984428482448471, "loss": 1.1351, "step": 175550 }, { "epoch": 5.242886573313827, "grad_norm": 4.126943588256836, "learning_rate": 0.0001498299990285659, "loss": 1.2393, "step": 175600 }, { "epoch": 5.24437942256591, "grad_norm": 5.249665260314941, "learning_rate": 0.00014981571323264704, "loss": 1.2033, "step": 175650 }, { "epoch": 5.2458722718179915, "grad_norm": 6.529717445373535, "learning_rate": 0.00014980142743672824, "loss": 1.1383, "step": 175700 }, { "epoch": 5.247365121070074, "grad_norm": 4.902076244354248, "learning_rate": 0.00014978714164080937, "loss": 1.1252, "step": 175750 }, { "epoch": 5.248857970322157, "grad_norm": 5.3138017654418945, "learning_rate": 0.00014977285584489056, "loss": 1.1629, "step": 175800 }, { "epoch": 5.25035081957424, "grad_norm": 4.891197204589844, "learning_rate": 0.0001497585700489717, "loss": 1.144, "step": 175850 }, { "epoch": 5.2518436688263215, "grad_norm": 5.521958827972412, "learning_rate": 0.0001497442842530529, "loss": 1.2136, "step": 175900 }, { "epoch": 5.253336518078404, "grad_norm": 3.749346971511841, "learning_rate": 0.00014972999845713406, "loss": 1.1887, "step": 175950 }, { "epoch": 5.254829367330487, "grad_norm": 3.947131633758545, "learning_rate": 0.00014971571266121522, "loss": 1.1959, "step": 176000 }, { "epoch": 5.25632221658257, "grad_norm": 3.47645902633667, "learning_rate": 0.0001497014268652964, "loss": 1.1597, "step": 176050 }, { "epoch": 5.257815065834652, "grad_norm": 3.3379621505737305, "learning_rate": 0.00014968714106937755, "loss": 1.1643, "step": 176100 }, { "epoch": 5.259307915086735, "grad_norm": 3.426164388656616, "learning_rate": 0.00014967285527345872, "loss": 1.131, "step": 176150 }, { "epoch": 5.260800764338817, "grad_norm": 4.857641696929932, "learning_rate": 0.00014965856947753988, "loss": 1.1171, "step": 176200 }, { "epoch": 5.2622936135908995, "grad_norm": 3.9440646171569824, "learning_rate": 0.00014964428368162105, "loss": 1.1529, "step": 176250 }, { "epoch": 5.263786462842982, "grad_norm": 4.392827987670898, "learning_rate": 0.0001496299978857022, "loss": 1.1946, "step": 176300 }, { "epoch": 5.265279312095065, "grad_norm": 4.131694316864014, "learning_rate": 0.00014961571208978338, "loss": 1.1702, "step": 176350 }, { "epoch": 5.266772161347147, "grad_norm": 4.955690383911133, "learning_rate": 0.00014960142629386454, "loss": 1.1059, "step": 176400 }, { "epoch": 5.2682650105992295, "grad_norm": 3.52262544631958, "learning_rate": 0.0001495871404979457, "loss": 1.1873, "step": 176450 }, { "epoch": 5.269757859851312, "grad_norm": 5.748971939086914, "learning_rate": 0.00014957285470202687, "loss": 1.1285, "step": 176500 }, { "epoch": 5.271250709103395, "grad_norm": 5.502897262573242, "learning_rate": 0.00014955856890610803, "loss": 1.1292, "step": 176550 }, { "epoch": 5.272743558355478, "grad_norm": 3.8092453479766846, "learning_rate": 0.0001495442831101892, "loss": 1.1545, "step": 176600 }, { "epoch": 5.274236407607559, "grad_norm": 4.742307186126709, "learning_rate": 0.00014952999731427036, "loss": 1.1732, "step": 176650 }, { "epoch": 5.275729256859642, "grad_norm": 5.108506202697754, "learning_rate": 0.00014951571151835153, "loss": 1.2033, "step": 176700 }, { "epoch": 5.277222106111725, "grad_norm": 3.655372381210327, "learning_rate": 0.00014950142572243272, "loss": 1.1837, "step": 176750 }, { "epoch": 5.2787149553638075, "grad_norm": 4.246975898742676, "learning_rate": 0.00014948713992651386, "loss": 1.1572, "step": 176800 }, { "epoch": 5.28020780461589, "grad_norm": 3.185184955596924, "learning_rate": 0.00014947285413059505, "loss": 1.1773, "step": 176850 }, { "epoch": 5.281700653867972, "grad_norm": 4.376567363739014, "learning_rate": 0.0001494585683346762, "loss": 1.1662, "step": 176900 }, { "epoch": 5.283193503120055, "grad_norm": 4.7851457595825195, "learning_rate": 0.00014944428253875738, "loss": 1.2018, "step": 176950 }, { "epoch": 5.2846863523721375, "grad_norm": 4.13614559173584, "learning_rate": 0.00014942999674283852, "loss": 1.1137, "step": 177000 }, { "epoch": 5.28617920162422, "grad_norm": 5.2566609382629395, "learning_rate": 0.0001494157109469197, "loss": 1.1698, "step": 177050 }, { "epoch": 5.287672050876303, "grad_norm": 4.4063825607299805, "learning_rate": 0.00014940142515100087, "loss": 1.1507, "step": 177100 }, { "epoch": 5.289164900128385, "grad_norm": 6.457488059997559, "learning_rate": 0.00014938713935508204, "loss": 1.2139, "step": 177150 }, { "epoch": 5.290657749380467, "grad_norm": 3.639181137084961, "learning_rate": 0.0001493728535591632, "loss": 1.146, "step": 177200 }, { "epoch": 5.29215059863255, "grad_norm": 4.316925525665283, "learning_rate": 0.00014935856776324437, "loss": 1.1696, "step": 177250 }, { "epoch": 5.293643447884633, "grad_norm": 3.8231780529022217, "learning_rate": 0.00014934428196732553, "loss": 1.2053, "step": 177300 }, { "epoch": 5.2951362971367155, "grad_norm": 5.597898483276367, "learning_rate": 0.0001493299961714067, "loss": 1.2097, "step": 177350 }, { "epoch": 5.296629146388797, "grad_norm": 5.123006820678711, "learning_rate": 0.00014931571037548786, "loss": 1.1213, "step": 177400 }, { "epoch": 5.29812199564088, "grad_norm": 4.827045917510986, "learning_rate": 0.00014930142457956903, "loss": 1.1784, "step": 177450 }, { "epoch": 5.299614844892963, "grad_norm": 4.859231472015381, "learning_rate": 0.0001492871387836502, "loss": 1.1418, "step": 177500 }, { "epoch": 5.3011076941450455, "grad_norm": 5.134034633636475, "learning_rate": 0.00014927285298773138, "loss": 1.1989, "step": 177550 }, { "epoch": 5.302600543397128, "grad_norm": 6.3303327560424805, "learning_rate": 0.00014925856719181252, "loss": 1.1866, "step": 177600 }, { "epoch": 5.30409339264921, "grad_norm": 4.182743549346924, "learning_rate": 0.0001492442813958937, "loss": 1.1781, "step": 177650 }, { "epoch": 5.305586241901293, "grad_norm": 4.055047512054443, "learning_rate": 0.00014922999559997485, "loss": 1.1735, "step": 177700 }, { "epoch": 5.307079091153375, "grad_norm": 5.255928993225098, "learning_rate": 0.00014921570980405604, "loss": 1.1627, "step": 177750 }, { "epoch": 5.308571940405458, "grad_norm": 4.9416584968566895, "learning_rate": 0.0001492014240081372, "loss": 1.0982, "step": 177800 }, { "epoch": 5.31006478965754, "grad_norm": 6.426959037780762, "learning_rate": 0.00014918713821221837, "loss": 1.1585, "step": 177850 }, { "epoch": 5.311557638909623, "grad_norm": 3.433701753616333, "learning_rate": 0.00014917285241629953, "loss": 1.1728, "step": 177900 }, { "epoch": 5.313050488161705, "grad_norm": 4.7097320556640625, "learning_rate": 0.0001491585666203807, "loss": 1.1183, "step": 177950 }, { "epoch": 5.314543337413788, "grad_norm": 5.154112815856934, "learning_rate": 0.00014914428082446186, "loss": 1.2097, "step": 178000 }, { "epoch": 5.316036186665871, "grad_norm": 4.5182929039001465, "learning_rate": 0.00014912999502854303, "loss": 1.2144, "step": 178050 }, { "epoch": 5.3175290359179535, "grad_norm": 4.899813175201416, "learning_rate": 0.0001491157092326242, "loss": 1.1513, "step": 178100 }, { "epoch": 5.319021885170035, "grad_norm": 4.790624141693115, "learning_rate": 0.00014910142343670536, "loss": 1.1579, "step": 178150 }, { "epoch": 5.320514734422118, "grad_norm": 4.103714466094971, "learning_rate": 0.00014908713764078652, "loss": 1.1407, "step": 178200 }, { "epoch": 5.322007583674201, "grad_norm": 3.641630172729492, "learning_rate": 0.0001490728518448677, "loss": 1.1401, "step": 178250 }, { "epoch": 5.323500432926283, "grad_norm": 4.197120666503906, "learning_rate": 0.00014905856604894885, "loss": 1.151, "step": 178300 }, { "epoch": 5.324993282178365, "grad_norm": 6.3722405433654785, "learning_rate": 0.00014904428025303004, "loss": 1.2082, "step": 178350 }, { "epoch": 5.326486131430448, "grad_norm": 6.299606800079346, "learning_rate": 0.00014902999445711118, "loss": 1.2194, "step": 178400 }, { "epoch": 5.327978980682531, "grad_norm": 4.542901992797852, "learning_rate": 0.00014901570866119237, "loss": 1.199, "step": 178450 }, { "epoch": 5.329471829934613, "grad_norm": 5.075666904449463, "learning_rate": 0.0001490014228652735, "loss": 1.1677, "step": 178500 }, { "epoch": 5.330964679186696, "grad_norm": 4.868073463439941, "learning_rate": 0.0001489871370693547, "loss": 1.1416, "step": 178550 }, { "epoch": 5.332457528438778, "grad_norm": 4.414193630218506, "learning_rate": 0.00014897285127343587, "loss": 1.1757, "step": 178600 }, { "epoch": 5.3339503776908606, "grad_norm": 4.902929306030273, "learning_rate": 0.00014895856547751703, "loss": 1.1555, "step": 178650 }, { "epoch": 5.335443226942943, "grad_norm": 3.7560372352600098, "learning_rate": 0.0001489442796815982, "loss": 1.1768, "step": 178700 }, { "epoch": 5.336936076195026, "grad_norm": 4.610017776489258, "learning_rate": 0.00014892999388567936, "loss": 1.1467, "step": 178750 }, { "epoch": 5.338428925447109, "grad_norm": 3.9438891410827637, "learning_rate": 0.00014891570808976053, "loss": 1.1995, "step": 178800 }, { "epoch": 5.3399217746991905, "grad_norm": 4.497828483581543, "learning_rate": 0.0001489014222938417, "loss": 1.1962, "step": 178850 }, { "epoch": 5.341414623951273, "grad_norm": 6.109000205993652, "learning_rate": 0.00014888713649792285, "loss": 1.1826, "step": 178900 }, { "epoch": 5.342907473203356, "grad_norm": 4.150059223175049, "learning_rate": 0.00014887285070200402, "loss": 1.1613, "step": 178950 }, { "epoch": 5.344400322455439, "grad_norm": 4.3024492263793945, "learning_rate": 0.00014885856490608518, "loss": 1.1438, "step": 179000 }, { "epoch": 5.345893171707521, "grad_norm": 4.783211708068848, "learning_rate": 0.00014884427911016635, "loss": 1.1864, "step": 179050 }, { "epoch": 5.347386020959603, "grad_norm": 5.038503646850586, "learning_rate": 0.0001488299933142475, "loss": 1.2297, "step": 179100 }, { "epoch": 5.348878870211686, "grad_norm": 5.009586334228516, "learning_rate": 0.00014881570751832868, "loss": 1.1509, "step": 179150 }, { "epoch": 5.3503717194637685, "grad_norm": 5.057339191436768, "learning_rate": 0.00014880142172240984, "loss": 1.162, "step": 179200 }, { "epoch": 5.351864568715851, "grad_norm": 7.249011993408203, "learning_rate": 0.000148787135926491, "loss": 1.1634, "step": 179250 }, { "epoch": 5.353357417967934, "grad_norm": 4.474646091461182, "learning_rate": 0.00014877285013057217, "loss": 1.1652, "step": 179300 }, { "epoch": 5.354850267220016, "grad_norm": 4.699128150939941, "learning_rate": 0.00014875856433465334, "loss": 1.1875, "step": 179350 }, { "epoch": 5.3563431164720985, "grad_norm": 3.6825220584869385, "learning_rate": 0.00014874427853873453, "loss": 1.1896, "step": 179400 }, { "epoch": 5.357835965724181, "grad_norm": 5.787204742431641, "learning_rate": 0.00014872999274281567, "loss": 1.1793, "step": 179450 }, { "epoch": 5.359328814976264, "grad_norm": 6.388000011444092, "learning_rate": 0.00014871570694689686, "loss": 1.2219, "step": 179500 }, { "epoch": 5.360821664228347, "grad_norm": 4.790463924407959, "learning_rate": 0.000148701421150978, "loss": 1.1739, "step": 179550 }, { "epoch": 5.362314513480428, "grad_norm": 2.8773367404937744, "learning_rate": 0.0001486871353550592, "loss": 1.1894, "step": 179600 }, { "epoch": 5.363807362732511, "grad_norm": 3.305040121078491, "learning_rate": 0.00014867284955914032, "loss": 1.2278, "step": 179650 }, { "epoch": 5.365300211984594, "grad_norm": 4.368061065673828, "learning_rate": 0.00014865856376322152, "loss": 1.214, "step": 179700 }, { "epoch": 5.3667930612366765, "grad_norm": 5.629176139831543, "learning_rate": 0.00014864427796730268, "loss": 1.1611, "step": 179750 }, { "epoch": 5.368285910488759, "grad_norm": 5.811650276184082, "learning_rate": 0.00014862999217138385, "loss": 1.1861, "step": 179800 }, { "epoch": 5.369778759740841, "grad_norm": 3.6000754833221436, "learning_rate": 0.000148615706375465, "loss": 1.1636, "step": 179850 }, { "epoch": 5.371271608992924, "grad_norm": 4.922330379486084, "learning_rate": 0.00014860142057954617, "loss": 1.1534, "step": 179900 }, { "epoch": 5.3727644582450065, "grad_norm": 5.780492305755615, "learning_rate": 0.00014858713478362734, "loss": 1.2066, "step": 179950 }, { "epoch": 5.374257307497089, "grad_norm": 4.670080184936523, "learning_rate": 0.0001485728489877085, "loss": 1.1812, "step": 180000 } ], "logging_steps": 50, "max_steps": 700001, "num_input_tokens_seen": 0, "num_train_epochs": 21, "save_steps": 10000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 4.5513784407443374e+18, "train_batch_size": 2, "trial_name": null, "trial_params": null }