{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.8957095512495148, "eval_steps": 1000, "global_step": 30000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 2.9856985041650495e-05, "grad_norm": 8.064935684204102, "learning_rate": 0.0, "loss": 3.4849, "step": 1 }, { "epoch": 0.0014928492520825246, "grad_norm": 3.3433420658111572, "learning_rate": 0.00019998742849959144, "loss": 1.9038, "step": 50 }, { "epoch": 0.0029856985041650493, "grad_norm": 2.907883405685425, "learning_rate": 0.0001999731427036726, "loss": 1.608, "step": 100 }, { "epoch": 0.004478547756247574, "grad_norm": 3.2614288330078125, "learning_rate": 0.00019995885690775376, "loss": 1.5505, "step": 150 }, { "epoch": 0.005971397008330099, "grad_norm": 3.8400654792785645, "learning_rate": 0.00019994457111183493, "loss": 1.5737, "step": 200 }, { "epoch": 0.007464246260412624, "grad_norm": 3.3358442783355713, "learning_rate": 0.00019993028531591612, "loss": 1.567, "step": 250 }, { "epoch": 0.008957095512495149, "grad_norm": 2.131911277770996, "learning_rate": 0.00019991599951999726, "loss": 1.5208, "step": 300 }, { "epoch": 0.010449944764577673, "grad_norm": 3.180992364883423, "learning_rate": 0.00019990171372407845, "loss": 1.5586, "step": 350 }, { "epoch": 0.011942794016660197, "grad_norm": 3.024989128112793, "learning_rate": 0.0001998874279281596, "loss": 1.5267, "step": 400 }, { "epoch": 0.013435643268742723, "grad_norm": 3.4935102462768555, "learning_rate": 0.00019987314213224078, "loss": 1.5609, "step": 450 }, { "epoch": 0.014928492520825247, "grad_norm": 3.565504550933838, "learning_rate": 0.00019985885633632194, "loss": 1.5112, "step": 500 }, { "epoch": 0.01642134177290777, "grad_norm": 2.0692882537841797, "learning_rate": 0.00019984457054040308, "loss": 1.5087, "step": 550 }, { "epoch": 0.017914191024990297, "grad_norm": 4.110323905944824, "learning_rate": 0.00019983028474448427, "loss": 1.5073, "step": 600 }, { "epoch": 0.01940704027707282, "grad_norm": 2.8632736206054688, "learning_rate": 0.0001998159989485654, "loss": 1.472, "step": 650 }, { "epoch": 0.020899889529155346, "grad_norm": 2.629347324371338, "learning_rate": 0.0001998017131526466, "loss": 1.5236, "step": 700 }, { "epoch": 0.022392738781237872, "grad_norm": 3.696873188018799, "learning_rate": 0.00019978742735672774, "loss": 1.5216, "step": 750 }, { "epoch": 0.023885588033320394, "grad_norm": 3.253305435180664, "learning_rate": 0.00019977314156080893, "loss": 1.5915, "step": 800 }, { "epoch": 0.02537843728540292, "grad_norm": 2.9587886333465576, "learning_rate": 0.0001997588557648901, "loss": 1.4862, "step": 850 }, { "epoch": 0.026871286537485446, "grad_norm": 3.3311073780059814, "learning_rate": 0.00019974456996897126, "loss": 1.4703, "step": 900 }, { "epoch": 0.02836413578956797, "grad_norm": 2.5960264205932617, "learning_rate": 0.00019973028417305243, "loss": 1.4818, "step": 950 }, { "epoch": 0.029856985041650495, "grad_norm": 3.3142144680023193, "learning_rate": 0.0001997159983771336, "loss": 1.4746, "step": 1000 }, { "epoch": 0.03134983429373302, "grad_norm": 3.5049827098846436, "learning_rate": 0.00019970171258121476, "loss": 1.442, "step": 1050 }, { "epoch": 0.03284268354581554, "grad_norm": 3.0218605995178223, "learning_rate": 0.00019968742678529592, "loss": 1.5265, "step": 1100 }, { "epoch": 0.03433553279789807, "grad_norm": 2.936182975769043, "learning_rate": 0.00019967314098937709, "loss": 1.5174, "step": 1150 }, { "epoch": 0.035828382049980595, "grad_norm": 2.877253293991089, "learning_rate": 0.00019965885519345825, "loss": 1.4499, "step": 1200 }, { "epoch": 0.03732123130206312, "grad_norm": 6.07016658782959, "learning_rate": 0.00019964456939753941, "loss": 1.4542, "step": 1250 }, { "epoch": 0.03881408055414564, "grad_norm": 2.1618189811706543, "learning_rate": 0.0001996302836016206, "loss": 1.4343, "step": 1300 }, { "epoch": 0.040306929806228166, "grad_norm": 2.8267719745635986, "learning_rate": 0.00019961599780570174, "loss": 1.463, "step": 1350 }, { "epoch": 0.04179977905831069, "grad_norm": 2.6036462783813477, "learning_rate": 0.00019960171200978294, "loss": 1.4557, "step": 1400 }, { "epoch": 0.04329262831039322, "grad_norm": 3.0187127590179443, "learning_rate": 0.00019958742621386407, "loss": 1.4472, "step": 1450 }, { "epoch": 0.044785477562475744, "grad_norm": 3.9822633266448975, "learning_rate": 0.00019957314041794526, "loss": 1.4384, "step": 1500 }, { "epoch": 0.04627832681455826, "grad_norm": 2.919654607772827, "learning_rate": 0.0001995588546220264, "loss": 1.4969, "step": 1550 }, { "epoch": 0.04777117606664079, "grad_norm": 2.922963857650757, "learning_rate": 0.0001995445688261076, "loss": 1.4987, "step": 1600 }, { "epoch": 0.049264025318723315, "grad_norm": 2.9638512134552, "learning_rate": 0.00019953028303018876, "loss": 1.4722, "step": 1650 }, { "epoch": 0.05075687457080584, "grad_norm": 3.408391237258911, "learning_rate": 0.00019951599723426992, "loss": 1.4723, "step": 1700 }, { "epoch": 0.052249723822888366, "grad_norm": 3.023597240447998, "learning_rate": 0.0001995017114383511, "loss": 1.4478, "step": 1750 }, { "epoch": 0.05374257307497089, "grad_norm": 2.1655213832855225, "learning_rate": 0.00019948742564243225, "loss": 1.4071, "step": 1800 }, { "epoch": 0.05523542232705341, "grad_norm": 3.796663999557495, "learning_rate": 0.00019947313984651342, "loss": 1.446, "step": 1850 }, { "epoch": 0.05672827157913594, "grad_norm": 3.0415594577789307, "learning_rate": 0.00019945885405059458, "loss": 1.4324, "step": 1900 }, { "epoch": 0.05822112083121846, "grad_norm": 2.524627685546875, "learning_rate": 0.00019944456825467575, "loss": 1.3963, "step": 1950 }, { "epoch": 0.05971397008330099, "grad_norm": 3.2881991863250732, "learning_rate": 0.0001994302824587569, "loss": 1.4522, "step": 2000 }, { "epoch": 0.061206819335383515, "grad_norm": 3.392430067062378, "learning_rate": 0.00019941599666283808, "loss": 1.4329, "step": 2050 }, { "epoch": 0.06269966858746603, "grad_norm": 3.9426393508911133, "learning_rate": 0.00019940171086691927, "loss": 1.5203, "step": 2100 }, { "epoch": 0.06419251783954856, "grad_norm": 3.3737235069274902, "learning_rate": 0.0001993874250710004, "loss": 1.3674, "step": 2150 }, { "epoch": 0.06568536709163109, "grad_norm": 3.783085346221924, "learning_rate": 0.0001993731392750816, "loss": 1.4339, "step": 2200 }, { "epoch": 0.06717821634371361, "grad_norm": 3.4819202423095703, "learning_rate": 0.00019935885347916273, "loss": 1.4436, "step": 2250 }, { "epoch": 0.06867106559579614, "grad_norm": 3.141775608062744, "learning_rate": 0.00019934456768324393, "loss": 1.4683, "step": 2300 }, { "epoch": 0.07016391484787866, "grad_norm": 3.2881035804748535, "learning_rate": 0.00019933028188732506, "loss": 1.4395, "step": 2350 }, { "epoch": 0.07165676409996119, "grad_norm": 3.718122959136963, "learning_rate": 0.00019931599609140626, "loss": 1.4396, "step": 2400 }, { "epoch": 0.07314961335204372, "grad_norm": 4.3829474449157715, "learning_rate": 0.00019930171029548742, "loss": 1.4477, "step": 2450 }, { "epoch": 0.07464246260412624, "grad_norm": 3.3698525428771973, "learning_rate": 0.00019928742449956858, "loss": 1.3529, "step": 2500 }, { "epoch": 0.07613531185620875, "grad_norm": 3.7569565773010254, "learning_rate": 0.00019927313870364975, "loss": 1.4246, "step": 2550 }, { "epoch": 0.07762816110829128, "grad_norm": 3.1486406326293945, "learning_rate": 0.00019925885290773091, "loss": 1.3813, "step": 2600 }, { "epoch": 0.0791210103603738, "grad_norm": 4.0635480880737305, "learning_rate": 0.00019924456711181208, "loss": 1.4259, "step": 2650 }, { "epoch": 0.08061385961245633, "grad_norm": 3.2710611820220947, "learning_rate": 0.00019923028131589324, "loss": 1.3747, "step": 2700 }, { "epoch": 0.08210670886453886, "grad_norm": 3.4968345165252686, "learning_rate": 0.0001992159955199744, "loss": 1.4721, "step": 2750 }, { "epoch": 0.08359955811662138, "grad_norm": 4.274214267730713, "learning_rate": 0.00019920170972405557, "loss": 1.437, "step": 2800 }, { "epoch": 0.08509240736870391, "grad_norm": 2.970602512359619, "learning_rate": 0.00019918742392813674, "loss": 1.3336, "step": 2850 }, { "epoch": 0.08658525662078644, "grad_norm": 4.143342971801758, "learning_rate": 0.00019917313813221793, "loss": 1.4264, "step": 2900 }, { "epoch": 0.08807810587286896, "grad_norm": 3.7546920776367188, "learning_rate": 0.00019915885233629907, "loss": 1.441, "step": 2950 }, { "epoch": 0.08957095512495149, "grad_norm": 3.9160516262054443, "learning_rate": 0.00019914456654038026, "loss": 1.4261, "step": 3000 }, { "epoch": 0.09106380437703401, "grad_norm": 3.842073917388916, "learning_rate": 0.0001991302807444614, "loss": 1.4076, "step": 3050 }, { "epoch": 0.09255665362911653, "grad_norm": 4.392395496368408, "learning_rate": 0.0001991159949485426, "loss": 1.3789, "step": 3100 }, { "epoch": 0.09404950288119905, "grad_norm": 3.822425603866577, "learning_rate": 0.00019910170915262373, "loss": 1.3877, "step": 3150 }, { "epoch": 0.09554235213328158, "grad_norm": 3.1348562240600586, "learning_rate": 0.0001990874233567049, "loss": 1.4081, "step": 3200 }, { "epoch": 0.0970352013853641, "grad_norm": 3.453887939453125, "learning_rate": 0.00019907313756078608, "loss": 1.4143, "step": 3250 }, { "epoch": 0.09852805063744663, "grad_norm": 3.5057384967803955, "learning_rate": 0.00019905885176486722, "loss": 1.4264, "step": 3300 }, { "epoch": 0.10002089988952916, "grad_norm": 3.145796060562134, "learning_rate": 0.0001990445659689484, "loss": 1.4368, "step": 3350 }, { "epoch": 0.10151374914161168, "grad_norm": 3.4077043533325195, "learning_rate": 0.00019903028017302955, "loss": 1.388, "step": 3400 }, { "epoch": 0.10300659839369421, "grad_norm": 3.65567946434021, "learning_rate": 0.00019901599437711074, "loss": 1.42, "step": 3450 }, { "epoch": 0.10449944764577673, "grad_norm": 4.460702419281006, "learning_rate": 0.0001990017085811919, "loss": 1.3991, "step": 3500 }, { "epoch": 0.10599229689785926, "grad_norm": 4.155653476715088, "learning_rate": 0.00019898742278527307, "loss": 1.371, "step": 3550 }, { "epoch": 0.10748514614994178, "grad_norm": 3.8904318809509277, "learning_rate": 0.00019897313698935423, "loss": 1.4378, "step": 3600 }, { "epoch": 0.1089779954020243, "grad_norm": 4.0509233474731445, "learning_rate": 0.0001989588511934354, "loss": 1.3945, "step": 3650 }, { "epoch": 0.11047084465410682, "grad_norm": 3.785123109817505, "learning_rate": 0.00019894456539751656, "loss": 1.436, "step": 3700 }, { "epoch": 0.11196369390618935, "grad_norm": 3.4556167125701904, "learning_rate": 0.00019893027960159773, "loss": 1.3794, "step": 3750 }, { "epoch": 0.11345654315827187, "grad_norm": 4.0479559898376465, "learning_rate": 0.0001989159938056789, "loss": 1.4734, "step": 3800 }, { "epoch": 0.1149493924103544, "grad_norm": 3.890805721282959, "learning_rate": 0.00019890170800976006, "loss": 1.4341, "step": 3850 }, { "epoch": 0.11644224166243693, "grad_norm": 3.8178727626800537, "learning_rate": 0.00019888742221384122, "loss": 1.4754, "step": 3900 }, { "epoch": 0.11793509091451945, "grad_norm": 2.456165075302124, "learning_rate": 0.00019887313641792241, "loss": 1.3887, "step": 3950 }, { "epoch": 0.11942794016660198, "grad_norm": 3.5763051509857178, "learning_rate": 0.00019885885062200355, "loss": 1.3901, "step": 4000 }, { "epoch": 0.1209207894186845, "grad_norm": 3.885662317276001, "learning_rate": 0.00019884456482608474, "loss": 1.3856, "step": 4050 }, { "epoch": 0.12241363867076703, "grad_norm": 3.6095409393310547, "learning_rate": 0.00019883027903016588, "loss": 1.448, "step": 4100 }, { "epoch": 0.12390648792284956, "grad_norm": 3.7112534046173096, "learning_rate": 0.00019881599323424707, "loss": 1.3537, "step": 4150 }, { "epoch": 0.12539933717493207, "grad_norm": 3.3566672801971436, "learning_rate": 0.0001988017074383282, "loss": 1.4389, "step": 4200 }, { "epoch": 0.1268921864270146, "grad_norm": 4.570401191711426, "learning_rate": 0.0001987874216424094, "loss": 1.4191, "step": 4250 }, { "epoch": 0.12838503567909712, "grad_norm": 4.455029010772705, "learning_rate": 0.00019877313584649057, "loss": 1.3677, "step": 4300 }, { "epoch": 0.12987788493117966, "grad_norm": 3.0861828327178955, "learning_rate": 0.00019875885005057173, "loss": 1.3677, "step": 4350 }, { "epoch": 0.13137073418326217, "grad_norm": 4.419896602630615, "learning_rate": 0.0001987445642546529, "loss": 1.4524, "step": 4400 }, { "epoch": 0.1328635834353447, "grad_norm": 5.187576770782471, "learning_rate": 0.00019873027845873406, "loss": 1.3868, "step": 4450 }, { "epoch": 0.13435643268742722, "grad_norm": 5.111696243286133, "learning_rate": 0.00019871599266281523, "loss": 1.4458, "step": 4500 }, { "epoch": 0.13584928193950974, "grad_norm": 3.2652997970581055, "learning_rate": 0.0001987017068668964, "loss": 1.4529, "step": 4550 }, { "epoch": 0.13734213119159228, "grad_norm": 4.190273761749268, "learning_rate": 0.00019868742107097755, "loss": 1.3991, "step": 4600 }, { "epoch": 0.1388349804436748, "grad_norm": 4.85620641708374, "learning_rate": 0.00019867313527505872, "loss": 1.3916, "step": 4650 }, { "epoch": 0.14032782969575733, "grad_norm": 3.030954360961914, "learning_rate": 0.00019865884947913988, "loss": 1.3805, "step": 4700 }, { "epoch": 0.14182067894783984, "grad_norm": 3.264406681060791, "learning_rate": 0.00019864456368322108, "loss": 1.4048, "step": 4750 }, { "epoch": 0.14331352819992238, "grad_norm": 3.2138588428497314, "learning_rate": 0.0001986302778873022, "loss": 1.4092, "step": 4800 }, { "epoch": 0.1448063774520049, "grad_norm": 3.847222328186035, "learning_rate": 0.0001986159920913834, "loss": 1.3871, "step": 4850 }, { "epoch": 0.14629922670408743, "grad_norm": 4.004987716674805, "learning_rate": 0.00019860170629546454, "loss": 1.3845, "step": 4900 }, { "epoch": 0.14779207595616994, "grad_norm": 3.5088725090026855, "learning_rate": 0.00019858742049954573, "loss": 1.379, "step": 4950 }, { "epoch": 0.14928492520825248, "grad_norm": 3.275099277496338, "learning_rate": 0.00019857313470362687, "loss": 1.3628, "step": 5000 }, { "epoch": 0.150777774460335, "grad_norm": 3.7903060913085938, "learning_rate": 0.00019855884890770806, "loss": 1.3804, "step": 5050 }, { "epoch": 0.1522706237124175, "grad_norm": 4.294798374176025, "learning_rate": 0.00019854456311178923, "loss": 1.3988, "step": 5100 }, { "epoch": 0.15376347296450005, "grad_norm": 3.2719295024871826, "learning_rate": 0.0001985302773158704, "loss": 1.387, "step": 5150 }, { "epoch": 0.15525632221658256, "grad_norm": 4.143224239349365, "learning_rate": 0.00019851599151995156, "loss": 1.393, "step": 5200 }, { "epoch": 0.1567491714686651, "grad_norm": 3.404754638671875, "learning_rate": 0.00019850170572403272, "loss": 1.4205, "step": 5250 }, { "epoch": 0.1582420207207476, "grad_norm": 3.607126474380493, "learning_rate": 0.0001984874199281139, "loss": 1.4239, "step": 5300 }, { "epoch": 0.15973486997283015, "grad_norm": 4.140823841094971, "learning_rate": 0.00019847313413219505, "loss": 1.4204, "step": 5350 }, { "epoch": 0.16122771922491266, "grad_norm": 3.893251419067383, "learning_rate": 0.00019845884833627622, "loss": 1.392, "step": 5400 }, { "epoch": 0.1627205684769952, "grad_norm": 4.304211139678955, "learning_rate": 0.00019844456254035738, "loss": 1.44, "step": 5450 }, { "epoch": 0.16421341772907772, "grad_norm": 5.273501873016357, "learning_rate": 0.00019843027674443855, "loss": 1.445, "step": 5500 }, { "epoch": 0.16570626698116026, "grad_norm": 4.787700176239014, "learning_rate": 0.00019841599094851974, "loss": 1.3668, "step": 5550 }, { "epoch": 0.16719911623324277, "grad_norm": 3.7984108924865723, "learning_rate": 0.00019840170515260087, "loss": 1.3523, "step": 5600 }, { "epoch": 0.16869196548532528, "grad_norm": 3.885608673095703, "learning_rate": 0.00019838741935668207, "loss": 1.3917, "step": 5650 }, { "epoch": 0.17018481473740782, "grad_norm": 3.459803342819214, "learning_rate": 0.0001983731335607632, "loss": 1.3833, "step": 5700 }, { "epoch": 0.17167766398949033, "grad_norm": 3.7103006839752197, "learning_rate": 0.0001983588477648444, "loss": 1.4473, "step": 5750 }, { "epoch": 0.17317051324157287, "grad_norm": 6.645928382873535, "learning_rate": 0.00019834456196892553, "loss": 1.3706, "step": 5800 }, { "epoch": 0.17466336249365538, "grad_norm": 3.7201037406921387, "learning_rate": 0.0001983302761730067, "loss": 1.3733, "step": 5850 }, { "epoch": 0.17615621174573792, "grad_norm": 4.050106048583984, "learning_rate": 0.0001983159903770879, "loss": 1.4096, "step": 5900 }, { "epoch": 0.17764906099782043, "grad_norm": 4.190842628479004, "learning_rate": 0.00019830170458116903, "loss": 1.4404, "step": 5950 }, { "epoch": 0.17914191024990297, "grad_norm": 4.393162727355957, "learning_rate": 0.00019828741878525022, "loss": 1.4443, "step": 6000 }, { "epoch": 0.1806347595019855, "grad_norm": 3.597520351409912, "learning_rate": 0.00019827313298933136, "loss": 1.4063, "step": 6050 }, { "epoch": 0.18212760875406803, "grad_norm": 3.608085870742798, "learning_rate": 0.00019825884719341255, "loss": 1.3857, "step": 6100 }, { "epoch": 0.18362045800615054, "grad_norm": 3.7055492401123047, "learning_rate": 0.0001982445613974937, "loss": 1.3997, "step": 6150 }, { "epoch": 0.18511330725823305, "grad_norm": 3.875457763671875, "learning_rate": 0.00019823027560157488, "loss": 1.4296, "step": 6200 }, { "epoch": 0.1866061565103156, "grad_norm": 5.074592590332031, "learning_rate": 0.00019821598980565604, "loss": 1.3785, "step": 6250 }, { "epoch": 0.1880990057623981, "grad_norm": 6.013392448425293, "learning_rate": 0.0001982017040097372, "loss": 1.4391, "step": 6300 }, { "epoch": 0.18959185501448064, "grad_norm": 5.679958820343018, "learning_rate": 0.00019818741821381837, "loss": 1.367, "step": 6350 }, { "epoch": 0.19108470426656315, "grad_norm": 3.6182546615600586, "learning_rate": 0.00019817313241789954, "loss": 1.4508, "step": 6400 }, { "epoch": 0.1925775535186457, "grad_norm": 5.209213733673096, "learning_rate": 0.0001981588466219807, "loss": 1.3878, "step": 6450 }, { "epoch": 0.1940704027707282, "grad_norm": 3.0043230056762695, "learning_rate": 0.00019814456082606187, "loss": 1.4248, "step": 6500 }, { "epoch": 0.19556325202281075, "grad_norm": 3.157851219177246, "learning_rate": 0.00019813027503014303, "loss": 1.3725, "step": 6550 }, { "epoch": 0.19705610127489326, "grad_norm": 3.5292418003082275, "learning_rate": 0.0001981159892342242, "loss": 1.3932, "step": 6600 }, { "epoch": 0.1985489505269758, "grad_norm": 3.2819600105285645, "learning_rate": 0.00019810170343830536, "loss": 1.3495, "step": 6650 }, { "epoch": 0.2000417997790583, "grad_norm": 3.0243399143218994, "learning_rate": 0.00019808741764238655, "loss": 1.3689, "step": 6700 }, { "epoch": 0.20153464903114082, "grad_norm": 3.4495368003845215, "learning_rate": 0.0001980731318464677, "loss": 1.3725, "step": 6750 }, { "epoch": 0.20302749828322336, "grad_norm": 3.538259744644165, "learning_rate": 0.00019805884605054888, "loss": 1.3905, "step": 6800 }, { "epoch": 0.20452034753530587, "grad_norm": 4.162181377410889, "learning_rate": 0.00019804456025463002, "loss": 1.4129, "step": 6850 }, { "epoch": 0.20601319678738841, "grad_norm": 4.592432022094727, "learning_rate": 0.0001980302744587112, "loss": 1.3634, "step": 6900 }, { "epoch": 0.20750604603947093, "grad_norm": 3.45967960357666, "learning_rate": 0.00019801598866279237, "loss": 1.416, "step": 6950 }, { "epoch": 0.20899889529155347, "grad_norm": 4.221930503845215, "learning_rate": 0.00019800170286687354, "loss": 1.4051, "step": 7000 }, { "epoch": 0.21049174454363598, "grad_norm": 4.144239902496338, "learning_rate": 0.0001979874170709547, "loss": 1.4219, "step": 7050 }, { "epoch": 0.21198459379571852, "grad_norm": 4.7492570877075195, "learning_rate": 0.00019797313127503587, "loss": 1.4028, "step": 7100 }, { "epoch": 0.21347744304780103, "grad_norm": 3.5841355323791504, "learning_rate": 0.00019795884547911703, "loss": 1.4361, "step": 7150 }, { "epoch": 0.21497029229988357, "grad_norm": 4.662593364715576, "learning_rate": 0.0001979445596831982, "loss": 1.3816, "step": 7200 }, { "epoch": 0.21646314155196608, "grad_norm": 4.700701713562012, "learning_rate": 0.00019793027388727936, "loss": 1.4226, "step": 7250 }, { "epoch": 0.2179559908040486, "grad_norm": 4.025181293487549, "learning_rate": 0.00019791598809136053, "loss": 1.4291, "step": 7300 }, { "epoch": 0.21944884005613113, "grad_norm": 3.064573049545288, "learning_rate": 0.0001979017022954417, "loss": 1.4293, "step": 7350 }, { "epoch": 0.22094168930821365, "grad_norm": 6.342152118682861, "learning_rate": 0.00019788741649952288, "loss": 1.4173, "step": 7400 }, { "epoch": 0.22243453856029619, "grad_norm": 5.89996337890625, "learning_rate": 0.00019787313070360402, "loss": 1.396, "step": 7450 }, { "epoch": 0.2239273878123787, "grad_norm": 4.462945938110352, "learning_rate": 0.0001978588449076852, "loss": 1.3868, "step": 7500 }, { "epoch": 0.22542023706446124, "grad_norm": 3.6449055671691895, "learning_rate": 0.00019784455911176635, "loss": 1.396, "step": 7550 }, { "epoch": 0.22691308631654375, "grad_norm": 4.674243927001953, "learning_rate": 0.00019783027331584754, "loss": 1.395, "step": 7600 }, { "epoch": 0.2284059355686263, "grad_norm": 3.6160385608673096, "learning_rate": 0.00019781598751992868, "loss": 1.3918, "step": 7650 }, { "epoch": 0.2298987848207088, "grad_norm": 4.326193332672119, "learning_rate": 0.00019780170172400987, "loss": 1.3947, "step": 7700 }, { "epoch": 0.23139163407279134, "grad_norm": 5.4003777503967285, "learning_rate": 0.00019778741592809104, "loss": 1.344, "step": 7750 }, { "epoch": 0.23288448332487385, "grad_norm": 4.711580753326416, "learning_rate": 0.0001977731301321722, "loss": 1.3959, "step": 7800 }, { "epoch": 0.23437733257695637, "grad_norm": 3.4752814769744873, "learning_rate": 0.00019775884433625337, "loss": 1.3722, "step": 7850 }, { "epoch": 0.2358701818290389, "grad_norm": 4.028527736663818, "learning_rate": 0.00019774455854033453, "loss": 1.3683, "step": 7900 }, { "epoch": 0.23736303108112142, "grad_norm": 4.094334602355957, "learning_rate": 0.0001977302727444157, "loss": 1.3607, "step": 7950 }, { "epoch": 0.23885588033320396, "grad_norm": 5.232580661773682, "learning_rate": 0.00019771598694849686, "loss": 1.4354, "step": 8000 }, { "epoch": 0.24034872958528647, "grad_norm": 4.269852161407471, "learning_rate": 0.00019770170115257802, "loss": 1.4372, "step": 8050 }, { "epoch": 0.241841578837369, "grad_norm": 3.312541961669922, "learning_rate": 0.0001976874153566592, "loss": 1.3826, "step": 8100 }, { "epoch": 0.24333442808945152, "grad_norm": 3.8900692462921143, "learning_rate": 0.00019767312956074035, "loss": 1.4189, "step": 8150 }, { "epoch": 0.24482727734153406, "grad_norm": 3.894512414932251, "learning_rate": 0.00019765884376482155, "loss": 1.3365, "step": 8200 }, { "epoch": 0.24632012659361657, "grad_norm": 4.644411563873291, "learning_rate": 0.00019764455796890268, "loss": 1.4311, "step": 8250 }, { "epoch": 0.2478129758456991, "grad_norm": 8.174029350280762, "learning_rate": 0.00019763027217298387, "loss": 1.361, "step": 8300 }, { "epoch": 0.24930582509778162, "grad_norm": 4.615732192993164, "learning_rate": 0.000197615986377065, "loss": 1.4552, "step": 8350 }, { "epoch": 0.25079867434986414, "grad_norm": 4.421249866485596, "learning_rate": 0.0001976017005811462, "loss": 1.3463, "step": 8400 }, { "epoch": 0.2522915236019467, "grad_norm": 2.8386716842651367, "learning_rate": 0.00019758741478522734, "loss": 1.348, "step": 8450 }, { "epoch": 0.2537843728540292, "grad_norm": 4.3141703605651855, "learning_rate": 0.0001975731289893085, "loss": 1.4306, "step": 8500 }, { "epoch": 0.2552772221061117, "grad_norm": 3.947331428527832, "learning_rate": 0.0001975588431933897, "loss": 1.3823, "step": 8550 }, { "epoch": 0.25677007135819424, "grad_norm": 3.2268636226654053, "learning_rate": 0.00019754455739747084, "loss": 1.4199, "step": 8600 }, { "epoch": 0.2582629206102768, "grad_norm": 4.0353102684021, "learning_rate": 0.00019753027160155203, "loss": 1.3927, "step": 8650 }, { "epoch": 0.2597557698623593, "grad_norm": 3.490560293197632, "learning_rate": 0.00019751598580563316, "loss": 1.401, "step": 8700 }, { "epoch": 0.2612486191144418, "grad_norm": 5.577207088470459, "learning_rate": 0.00019750170000971436, "loss": 1.3586, "step": 8750 }, { "epoch": 0.26274146836652434, "grad_norm": 4.168467998504639, "learning_rate": 0.0001974874142137955, "loss": 1.3303, "step": 8800 }, { "epoch": 0.2642343176186069, "grad_norm": 3.812627077102661, "learning_rate": 0.00019747312841787669, "loss": 1.3717, "step": 8850 }, { "epoch": 0.2657271668706894, "grad_norm": 4.875237464904785, "learning_rate": 0.00019745884262195785, "loss": 1.3873, "step": 8900 }, { "epoch": 0.2672200161227719, "grad_norm": 4.048189163208008, "learning_rate": 0.00019744455682603902, "loss": 1.3775, "step": 8950 }, { "epoch": 0.26871286537485445, "grad_norm": 3.9090261459350586, "learning_rate": 0.00019743027103012018, "loss": 1.4296, "step": 9000 }, { "epoch": 0.270205714626937, "grad_norm": 2.8476953506469727, "learning_rate": 0.00019741598523420134, "loss": 1.4175, "step": 9050 }, { "epoch": 0.2716985638790195, "grad_norm": 5.782102584838867, "learning_rate": 0.0001974016994382825, "loss": 1.3835, "step": 9100 }, { "epoch": 0.273191413131102, "grad_norm": 4.640264987945557, "learning_rate": 0.00019738741364236367, "loss": 1.4524, "step": 9150 }, { "epoch": 0.27468426238318455, "grad_norm": 4.81790828704834, "learning_rate": 0.00019737312784644484, "loss": 1.3183, "step": 9200 }, { "epoch": 0.2761771116352671, "grad_norm": 2.685009717941284, "learning_rate": 0.000197358842050526, "loss": 1.3243, "step": 9250 }, { "epoch": 0.2776699608873496, "grad_norm": 5.321321487426758, "learning_rate": 0.00019734455625460717, "loss": 1.4086, "step": 9300 }, { "epoch": 0.2791628101394321, "grad_norm": 3.065791368484497, "learning_rate": 0.00019733027045868836, "loss": 1.337, "step": 9350 }, { "epoch": 0.28065565939151466, "grad_norm": 4.3569817543029785, "learning_rate": 0.0001973159846627695, "loss": 1.4082, "step": 9400 }, { "epoch": 0.2821485086435972, "grad_norm": 4.67582368850708, "learning_rate": 0.0001973016988668507, "loss": 1.3832, "step": 9450 }, { "epoch": 0.2836413578956797, "grad_norm": 4.942144870758057, "learning_rate": 0.00019728741307093183, "loss": 1.3734, "step": 9500 }, { "epoch": 0.2851342071477622, "grad_norm": 4.853246688842773, "learning_rate": 0.00019727312727501302, "loss": 1.4111, "step": 9550 }, { "epoch": 0.28662705639984476, "grad_norm": 3.071237325668335, "learning_rate": 0.00019725884147909418, "loss": 1.3746, "step": 9600 }, { "epoch": 0.28811990565192724, "grad_norm": 4.844615459442139, "learning_rate": 0.00019724455568317535, "loss": 1.3051, "step": 9650 }, { "epoch": 0.2896127549040098, "grad_norm": 5.954223155975342, "learning_rate": 0.0001972302698872565, "loss": 1.4131, "step": 9700 }, { "epoch": 0.2911056041560923, "grad_norm": 3.6717801094055176, "learning_rate": 0.00019721598409133768, "loss": 1.4166, "step": 9750 }, { "epoch": 0.29259845340817486, "grad_norm": 3.6257095336914062, "learning_rate": 0.00019720169829541884, "loss": 1.3679, "step": 9800 }, { "epoch": 0.29409130266025735, "grad_norm": 4.245635032653809, "learning_rate": 0.0001971874124995, "loss": 1.3171, "step": 9850 }, { "epoch": 0.2955841519123399, "grad_norm": 5.362602710723877, "learning_rate": 0.00019717312670358117, "loss": 1.3932, "step": 9900 }, { "epoch": 0.2970770011644224, "grad_norm": 4.6283721923828125, "learning_rate": 0.00019715884090766234, "loss": 1.3757, "step": 9950 }, { "epoch": 0.29856985041650497, "grad_norm": 4.299574851989746, "learning_rate": 0.0001971445551117435, "loss": 1.4018, "step": 10000 }, { "epoch": 0.30006269966858745, "grad_norm": 4.7913641929626465, "learning_rate": 0.0001971302693158247, "loss": 1.4228, "step": 10050 }, { "epoch": 0.30155554892067, "grad_norm": 5.312823295593262, "learning_rate": 0.00019711598351990583, "loss": 1.3866, "step": 10100 }, { "epoch": 0.30304839817275253, "grad_norm": 4.099662780761719, "learning_rate": 0.00019710169772398702, "loss": 1.454, "step": 10150 }, { "epoch": 0.304541247424835, "grad_norm": 4.254878520965576, "learning_rate": 0.00019708741192806816, "loss": 1.3526, "step": 10200 }, { "epoch": 0.30603409667691756, "grad_norm": 4.056606292724609, "learning_rate": 0.00019707312613214935, "loss": 1.4167, "step": 10250 }, { "epoch": 0.3075269459290001, "grad_norm": 3.790809154510498, "learning_rate": 0.0001970588403362305, "loss": 1.3536, "step": 10300 }, { "epoch": 0.30901979518108263, "grad_norm": 4.46298360824585, "learning_rate": 0.00019704455454031168, "loss": 1.3613, "step": 10350 }, { "epoch": 0.3105126444331651, "grad_norm": 4.52452278137207, "learning_rate": 0.00019703026874439284, "loss": 1.4591, "step": 10400 }, { "epoch": 0.31200549368524766, "grad_norm": 4.735177040100098, "learning_rate": 0.000197015982948474, "loss": 1.4617, "step": 10450 }, { "epoch": 0.3134983429373302, "grad_norm": 4.48261833190918, "learning_rate": 0.00019700169715255517, "loss": 1.4072, "step": 10500 }, { "epoch": 0.31499119218941274, "grad_norm": 3.3441503047943115, "learning_rate": 0.00019698741135663634, "loss": 1.4082, "step": 10550 }, { "epoch": 0.3164840414414952, "grad_norm": 3.9771218299865723, "learning_rate": 0.0001969731255607175, "loss": 1.4184, "step": 10600 }, { "epoch": 0.31797689069357776, "grad_norm": 6.366194725036621, "learning_rate": 0.00019695883976479867, "loss": 1.387, "step": 10650 }, { "epoch": 0.3194697399456603, "grad_norm": 5.072678089141846, "learning_rate": 0.00019694455396887983, "loss": 1.3996, "step": 10700 }, { "epoch": 0.3209625891977428, "grad_norm": 3.7204978466033936, "learning_rate": 0.000196930268172961, "loss": 1.3774, "step": 10750 }, { "epoch": 0.3224554384498253, "grad_norm": 4.47731351852417, "learning_rate": 0.00019691598237704216, "loss": 1.3552, "step": 10800 }, { "epoch": 0.32394828770190787, "grad_norm": 3.4569220542907715, "learning_rate": 0.00019690169658112335, "loss": 1.3794, "step": 10850 }, { "epoch": 0.3254411369539904, "grad_norm": 4.344145774841309, "learning_rate": 0.0001968874107852045, "loss": 1.4419, "step": 10900 }, { "epoch": 0.3269339862060729, "grad_norm": 4.089848041534424, "learning_rate": 0.00019687312498928568, "loss": 1.4116, "step": 10950 }, { "epoch": 0.32842683545815543, "grad_norm": 3.995945930480957, "learning_rate": 0.00019685883919336682, "loss": 1.3532, "step": 11000 }, { "epoch": 0.32991968471023797, "grad_norm": 3.8309378623962402, "learning_rate": 0.00019684455339744798, "loss": 1.3957, "step": 11050 }, { "epoch": 0.3314125339623205, "grad_norm": 4.386235237121582, "learning_rate": 0.00019683026760152915, "loss": 1.3616, "step": 11100 }, { "epoch": 0.332905383214403, "grad_norm": 5.133239269256592, "learning_rate": 0.00019681598180561031, "loss": 1.3959, "step": 11150 }, { "epoch": 0.33439823246648553, "grad_norm": 4.216183662414551, "learning_rate": 0.0001968016960096915, "loss": 1.3446, "step": 11200 }, { "epoch": 0.3358910817185681, "grad_norm": 3.631131172180176, "learning_rate": 0.00019678741021377264, "loss": 1.3779, "step": 11250 }, { "epoch": 0.33738393097065056, "grad_norm": 4.603448390960693, "learning_rate": 0.00019677312441785384, "loss": 1.3962, "step": 11300 }, { "epoch": 0.3388767802227331, "grad_norm": 3.6482913494110107, "learning_rate": 0.00019675883862193497, "loss": 1.3931, "step": 11350 }, { "epoch": 0.34036962947481564, "grad_norm": 5.040388107299805, "learning_rate": 0.00019674455282601616, "loss": 1.3465, "step": 11400 }, { "epoch": 0.3418624787268982, "grad_norm": 5.762825012207031, "learning_rate": 0.0001967302670300973, "loss": 1.3577, "step": 11450 }, { "epoch": 0.34335532797898066, "grad_norm": 4.941501617431641, "learning_rate": 0.0001967159812341785, "loss": 1.3676, "step": 11500 }, { "epoch": 0.3448481772310632, "grad_norm": 5.368370532989502, "learning_rate": 0.00019670169543825966, "loss": 1.4265, "step": 11550 }, { "epoch": 0.34634102648314574, "grad_norm": 4.931522369384766, "learning_rate": 0.00019668740964234082, "loss": 1.3551, "step": 11600 }, { "epoch": 0.3478338757352283, "grad_norm": 3.9685990810394287, "learning_rate": 0.000196673123846422, "loss": 1.402, "step": 11650 }, { "epoch": 0.34932672498731077, "grad_norm": 5.771200656890869, "learning_rate": 0.00019665883805050315, "loss": 1.3596, "step": 11700 }, { "epoch": 0.3508195742393933, "grad_norm": 5.142852306365967, "learning_rate": 0.00019664455225458432, "loss": 1.385, "step": 11750 }, { "epoch": 0.35231242349147585, "grad_norm": 3.295628786087036, "learning_rate": 0.00019663026645866548, "loss": 1.3454, "step": 11800 }, { "epoch": 0.35380527274355833, "grad_norm": 4.34658145904541, "learning_rate": 0.00019661598066274665, "loss": 1.3976, "step": 11850 }, { "epoch": 0.35529812199564087, "grad_norm": 4.032591819763184, "learning_rate": 0.0001966016948668278, "loss": 1.3571, "step": 11900 }, { "epoch": 0.3567909712477234, "grad_norm": 3.9286158084869385, "learning_rate": 0.00019658740907090898, "loss": 1.398, "step": 11950 }, { "epoch": 0.35828382049980595, "grad_norm": 5.184597492218018, "learning_rate": 0.00019657312327499017, "loss": 1.3827, "step": 12000 }, { "epoch": 0.35977666975188843, "grad_norm": 4.4749226570129395, "learning_rate": 0.0001965588374790713, "loss": 1.373, "step": 12050 }, { "epoch": 0.361269519003971, "grad_norm": 3.5633764266967773, "learning_rate": 0.0001965445516831525, "loss": 1.3704, "step": 12100 }, { "epoch": 0.3627623682560535, "grad_norm": 7.570897102355957, "learning_rate": 0.00019653026588723363, "loss": 1.3515, "step": 12150 }, { "epoch": 0.36425521750813605, "grad_norm": 4.239411354064941, "learning_rate": 0.00019651598009131483, "loss": 1.3813, "step": 12200 }, { "epoch": 0.36574806676021854, "grad_norm": 3.8941049575805664, "learning_rate": 0.00019650169429539596, "loss": 1.3954, "step": 12250 }, { "epoch": 0.3672409160123011, "grad_norm": 4.8694586753845215, "learning_rate": 0.00019648740849947716, "loss": 1.3531, "step": 12300 }, { "epoch": 0.3687337652643836, "grad_norm": 3.914964437484741, "learning_rate": 0.00019647312270355832, "loss": 1.3954, "step": 12350 }, { "epoch": 0.3702266145164661, "grad_norm": 3.4050538539886475, "learning_rate": 0.00019645883690763948, "loss": 1.3498, "step": 12400 }, { "epoch": 0.37171946376854864, "grad_norm": 4.436797618865967, "learning_rate": 0.00019644455111172065, "loss": 1.3889, "step": 12450 }, { "epoch": 0.3732123130206312, "grad_norm": 2.7660670280456543, "learning_rate": 0.00019643026531580181, "loss": 1.3392, "step": 12500 }, { "epoch": 0.3747051622727137, "grad_norm": 5.364072799682617, "learning_rate": 0.00019641597951988298, "loss": 1.3157, "step": 12550 }, { "epoch": 0.3761980115247962, "grad_norm": 5.123339653015137, "learning_rate": 0.00019640169372396414, "loss": 1.3523, "step": 12600 }, { "epoch": 0.37769086077687875, "grad_norm": 3.4495439529418945, "learning_rate": 0.0001963874079280453, "loss": 1.3331, "step": 12650 }, { "epoch": 0.3791837100289613, "grad_norm": 4.613680362701416, "learning_rate": 0.00019637312213212647, "loss": 1.3707, "step": 12700 }, { "epoch": 0.3806765592810438, "grad_norm": 3.5819404125213623, "learning_rate": 0.00019635883633620764, "loss": 1.4023, "step": 12750 }, { "epoch": 0.3821694085331263, "grad_norm": 3.5075576305389404, "learning_rate": 0.00019634455054028883, "loss": 1.3815, "step": 12800 }, { "epoch": 0.38366225778520885, "grad_norm": 4.425256729125977, "learning_rate": 0.00019633026474436997, "loss": 1.3801, "step": 12850 }, { "epoch": 0.3851551070372914, "grad_norm": 3.5711112022399902, "learning_rate": 0.00019631597894845116, "loss": 1.3375, "step": 12900 }, { "epoch": 0.3866479562893739, "grad_norm": 5.728016376495361, "learning_rate": 0.0001963016931525323, "loss": 1.3914, "step": 12950 }, { "epoch": 0.3881408055414564, "grad_norm": 4.294504642486572, "learning_rate": 0.0001962874073566135, "loss": 1.4561, "step": 13000 }, { "epoch": 0.38963365479353895, "grad_norm": 5.329941749572754, "learning_rate": 0.00019627312156069465, "loss": 1.4018, "step": 13050 }, { "epoch": 0.3911265040456215, "grad_norm": 4.166362762451172, "learning_rate": 0.00019625883576477582, "loss": 1.4334, "step": 13100 }, { "epoch": 0.392619353297704, "grad_norm": 3.3922691345214844, "learning_rate": 0.00019624454996885698, "loss": 1.3956, "step": 13150 }, { "epoch": 0.3941122025497865, "grad_norm": 4.340898036956787, "learning_rate": 0.00019623026417293815, "loss": 1.3643, "step": 13200 }, { "epoch": 0.39560505180186906, "grad_norm": 6.623823165893555, "learning_rate": 0.0001962159783770193, "loss": 1.362, "step": 13250 }, { "epoch": 0.3970979010539516, "grad_norm": 4.490639686584473, "learning_rate": 0.00019620169258110048, "loss": 1.3742, "step": 13300 }, { "epoch": 0.3985907503060341, "grad_norm": 4.179808139801025, "learning_rate": 0.00019618740678518164, "loss": 1.4191, "step": 13350 }, { "epoch": 0.4000835995581166, "grad_norm": 5.623187065124512, "learning_rate": 0.0001961731209892628, "loss": 1.3793, "step": 13400 }, { "epoch": 0.40157644881019916, "grad_norm": 3.9650678634643555, "learning_rate": 0.00019615883519334397, "loss": 1.3917, "step": 13450 }, { "epoch": 0.40306929806228164, "grad_norm": 5.047702312469482, "learning_rate": 0.00019614454939742516, "loss": 1.4501, "step": 13500 }, { "epoch": 0.4045621473143642, "grad_norm": 3.46647310256958, "learning_rate": 0.0001961302636015063, "loss": 1.4091, "step": 13550 }, { "epoch": 0.4060549965664467, "grad_norm": 3.2203481197357178, "learning_rate": 0.0001961159778055875, "loss": 1.4003, "step": 13600 }, { "epoch": 0.40754784581852926, "grad_norm": 3.727679967880249, "learning_rate": 0.00019610169200966863, "loss": 1.4004, "step": 13650 }, { "epoch": 0.40904069507061175, "grad_norm": 4.469257831573486, "learning_rate": 0.0001960874062137498, "loss": 1.4061, "step": 13700 }, { "epoch": 0.4105335443226943, "grad_norm": 4.041538715362549, "learning_rate": 0.00019607312041783096, "loss": 1.411, "step": 13750 }, { "epoch": 0.41202639357477683, "grad_norm": 5.2691779136657715, "learning_rate": 0.00019605883462191212, "loss": 1.3396, "step": 13800 }, { "epoch": 0.41351924282685937, "grad_norm": 6.236726760864258, "learning_rate": 0.00019604454882599331, "loss": 1.4295, "step": 13850 }, { "epoch": 0.41501209207894185, "grad_norm": 6.703745365142822, "learning_rate": 0.00019603026303007445, "loss": 1.4105, "step": 13900 }, { "epoch": 0.4165049413310244, "grad_norm": 3.997664451599121, "learning_rate": 0.00019601597723415564, "loss": 1.347, "step": 13950 }, { "epoch": 0.41799779058310693, "grad_norm": 5.311407566070557, "learning_rate": 0.00019600169143823678, "loss": 1.4546, "step": 14000 }, { "epoch": 0.4194906398351894, "grad_norm": 4.0283098220825195, "learning_rate": 0.00019598740564231797, "loss": 1.4513, "step": 14050 }, { "epoch": 0.42098348908727196, "grad_norm": 7.345764636993408, "learning_rate": 0.0001959731198463991, "loss": 1.3832, "step": 14100 }, { "epoch": 0.4224763383393545, "grad_norm": 4.324542045593262, "learning_rate": 0.0001959588340504803, "loss": 1.3751, "step": 14150 }, { "epoch": 0.42396918759143704, "grad_norm": 3.8322675228118896, "learning_rate": 0.00019594454825456147, "loss": 1.344, "step": 14200 }, { "epoch": 0.4254620368435195, "grad_norm": 4.62548303604126, "learning_rate": 0.00019593026245864263, "loss": 1.4346, "step": 14250 }, { "epoch": 0.42695488609560206, "grad_norm": 4.585489273071289, "learning_rate": 0.0001959159766627238, "loss": 1.4145, "step": 14300 }, { "epoch": 0.4284477353476846, "grad_norm": 3.64227557182312, "learning_rate": 0.00019590169086680496, "loss": 1.367, "step": 14350 }, { "epoch": 0.42994058459976714, "grad_norm": 4.730580806732178, "learning_rate": 0.00019588740507088613, "loss": 1.3653, "step": 14400 }, { "epoch": 0.4314334338518496, "grad_norm": 4.4075398445129395, "learning_rate": 0.0001958731192749673, "loss": 1.4081, "step": 14450 }, { "epoch": 0.43292628310393216, "grad_norm": 3.0498785972595215, "learning_rate": 0.00019585883347904845, "loss": 1.343, "step": 14500 }, { "epoch": 0.4344191323560147, "grad_norm": 4.179199695587158, "learning_rate": 0.00019584454768312962, "loss": 1.3662, "step": 14550 }, { "epoch": 0.4359119816080972, "grad_norm": 4.1148786544799805, "learning_rate": 0.00019583026188721078, "loss": 1.3785, "step": 14600 }, { "epoch": 0.4374048308601797, "grad_norm": 4.012060165405273, "learning_rate": 0.00019581597609129198, "loss": 1.4056, "step": 14650 }, { "epoch": 0.43889768011226227, "grad_norm": 7.186342716217041, "learning_rate": 0.0001958016902953731, "loss": 1.3943, "step": 14700 }, { "epoch": 0.4403905293643448, "grad_norm": 3.951267957687378, "learning_rate": 0.0001957874044994543, "loss": 1.3976, "step": 14750 }, { "epoch": 0.4418833786164273, "grad_norm": 5.276801586151123, "learning_rate": 0.00019577311870353544, "loss": 1.4137, "step": 14800 }, { "epoch": 0.44337622786850983, "grad_norm": 4.107429504394531, "learning_rate": 0.00019575883290761663, "loss": 1.388, "step": 14850 }, { "epoch": 0.44486907712059237, "grad_norm": 4.274941444396973, "learning_rate": 0.00019574454711169777, "loss": 1.3446, "step": 14900 }, { "epoch": 0.4463619263726749, "grad_norm": 4.174200534820557, "learning_rate": 0.00019573026131577896, "loss": 1.3703, "step": 14950 }, { "epoch": 0.4478547756247574, "grad_norm": 3.867125988006592, "learning_rate": 0.00019571597551986013, "loss": 1.3745, "step": 15000 }, { "epoch": 0.44934762487683994, "grad_norm": 6.454402923583984, "learning_rate": 0.0001957016897239413, "loss": 1.3081, "step": 15050 }, { "epoch": 0.4508404741289225, "grad_norm": 3.8726885318756104, "learning_rate": 0.00019568740392802246, "loss": 1.3527, "step": 15100 }, { "epoch": 0.45233332338100496, "grad_norm": 3.6218361854553223, "learning_rate": 0.00019567311813210362, "loss": 1.4549, "step": 15150 }, { "epoch": 0.4538261726330875, "grad_norm": 5.2475361824035645, "learning_rate": 0.0001956588323361848, "loss": 1.361, "step": 15200 }, { "epoch": 0.45531902188517004, "grad_norm": 4.388748645782471, "learning_rate": 0.00019564454654026595, "loss": 1.3164, "step": 15250 }, { "epoch": 0.4568118711372526, "grad_norm": 4.97973108291626, "learning_rate": 0.00019563026074434712, "loss": 1.3755, "step": 15300 }, { "epoch": 0.45830472038933506, "grad_norm": 4.538138389587402, "learning_rate": 0.00019561597494842828, "loss": 1.4339, "step": 15350 }, { "epoch": 0.4597975696414176, "grad_norm": 4.389719009399414, "learning_rate": 0.00019560168915250945, "loss": 1.4163, "step": 15400 }, { "epoch": 0.46129041889350014, "grad_norm": 4.347919464111328, "learning_rate": 0.00019558740335659064, "loss": 1.3663, "step": 15450 }, { "epoch": 0.4627832681455827, "grad_norm": 4.820595741271973, "learning_rate": 0.00019557311756067177, "loss": 1.3441, "step": 15500 }, { "epoch": 0.46427611739766517, "grad_norm": 2.6965413093566895, "learning_rate": 0.00019555883176475297, "loss": 1.3657, "step": 15550 }, { "epoch": 0.4657689666497477, "grad_norm": 4.741116523742676, "learning_rate": 0.0001955445459688341, "loss": 1.4414, "step": 15600 }, { "epoch": 0.46726181590183025, "grad_norm": 3.9512829780578613, "learning_rate": 0.0001955302601729153, "loss": 1.354, "step": 15650 }, { "epoch": 0.46875466515391273, "grad_norm": 7.704863548278809, "learning_rate": 0.00019551597437699646, "loss": 1.4107, "step": 15700 }, { "epoch": 0.47024751440599527, "grad_norm": 3.502988338470459, "learning_rate": 0.00019550168858107763, "loss": 1.415, "step": 15750 }, { "epoch": 0.4717403636580778, "grad_norm": 4.246065139770508, "learning_rate": 0.0001954874027851588, "loss": 1.3414, "step": 15800 }, { "epoch": 0.47323321291016035, "grad_norm": 3.4039735794067383, "learning_rate": 0.00019547311698923995, "loss": 1.3303, "step": 15850 }, { "epoch": 0.47472606216224283, "grad_norm": 3.279521942138672, "learning_rate": 0.00019545883119332112, "loss": 1.3447, "step": 15900 }, { "epoch": 0.4762189114143254, "grad_norm": 2.9335134029388428, "learning_rate": 0.00019544454539740228, "loss": 1.3998, "step": 15950 }, { "epoch": 0.4777117606664079, "grad_norm": 3.317011833190918, "learning_rate": 0.00019543025960148345, "loss": 1.3506, "step": 16000 }, { "epoch": 0.47920460991849045, "grad_norm": 3.5980935096740723, "learning_rate": 0.0001954159738055646, "loss": 1.3622, "step": 16050 }, { "epoch": 0.48069745917057294, "grad_norm": 4.726743698120117, "learning_rate": 0.00019540168800964578, "loss": 1.3414, "step": 16100 }, { "epoch": 0.4821903084226555, "grad_norm": 5.129758358001709, "learning_rate": 0.00019538740221372694, "loss": 1.3873, "step": 16150 }, { "epoch": 0.483683157674738, "grad_norm": 5.122271537780762, "learning_rate": 0.0001953731164178081, "loss": 1.428, "step": 16200 }, { "epoch": 0.4851760069268205, "grad_norm": 3.359868049621582, "learning_rate": 0.0001953588306218893, "loss": 1.3111, "step": 16250 }, { "epoch": 0.48666885617890304, "grad_norm": 5.066514492034912, "learning_rate": 0.00019534454482597044, "loss": 1.2961, "step": 16300 }, { "epoch": 0.4881617054309856, "grad_norm": 4.902595520019531, "learning_rate": 0.0001953302590300516, "loss": 1.3682, "step": 16350 }, { "epoch": 0.4896545546830681, "grad_norm": 5.0537028312683105, "learning_rate": 0.00019531597323413277, "loss": 1.315, "step": 16400 }, { "epoch": 0.4911474039351506, "grad_norm": 3.7002792358398438, "learning_rate": 0.00019530168743821393, "loss": 1.3441, "step": 16450 }, { "epoch": 0.49264025318723315, "grad_norm": 4.845950603485107, "learning_rate": 0.00019528740164229512, "loss": 1.3887, "step": 16500 }, { "epoch": 0.4941331024393157, "grad_norm": 4.933434963226318, "learning_rate": 0.00019527311584637626, "loss": 1.3865, "step": 16550 }, { "epoch": 0.4956259516913982, "grad_norm": 3.8103625774383545, "learning_rate": 0.00019525883005045745, "loss": 1.3757, "step": 16600 }, { "epoch": 0.4971188009434807, "grad_norm": 4.501999855041504, "learning_rate": 0.0001952445442545386, "loss": 1.4482, "step": 16650 }, { "epoch": 0.49861165019556325, "grad_norm": 5.600002765655518, "learning_rate": 0.00019523025845861978, "loss": 1.4209, "step": 16700 }, { "epoch": 0.5001044994476458, "grad_norm": 5.138682842254639, "learning_rate": 0.00019521597266270092, "loss": 1.4287, "step": 16750 }, { "epoch": 0.5015973486997283, "grad_norm": 5.575449466705322, "learning_rate": 0.0001952016868667821, "loss": 1.3992, "step": 16800 }, { "epoch": 0.5030901979518109, "grad_norm": 3.6443893909454346, "learning_rate": 0.00019518740107086327, "loss": 1.3661, "step": 16850 }, { "epoch": 0.5045830472038934, "grad_norm": 3.904905319213867, "learning_rate": 0.00019517311527494444, "loss": 1.4448, "step": 16900 }, { "epoch": 0.5060758964559758, "grad_norm": 4.380904197692871, "learning_rate": 0.0001951588294790256, "loss": 1.4076, "step": 16950 }, { "epoch": 0.5075687457080584, "grad_norm": 3.5924415588378906, "learning_rate": 0.00019514454368310677, "loss": 1.3833, "step": 17000 }, { "epoch": 0.5090615949601409, "grad_norm": 3.7007193565368652, "learning_rate": 0.00019513025788718793, "loss": 1.391, "step": 17050 }, { "epoch": 0.5105544442122234, "grad_norm": 3.9879095554351807, "learning_rate": 0.0001951159720912691, "loss": 1.3419, "step": 17100 }, { "epoch": 0.512047293464306, "grad_norm": 5.663998126983643, "learning_rate": 0.00019510168629535026, "loss": 1.3481, "step": 17150 }, { "epoch": 0.5135401427163885, "grad_norm": 3.9803707599639893, "learning_rate": 0.00019508740049943143, "loss": 1.4034, "step": 17200 }, { "epoch": 0.5150329919684711, "grad_norm": 3.718477725982666, "learning_rate": 0.0001950731147035126, "loss": 1.406, "step": 17250 }, { "epoch": 0.5165258412205536, "grad_norm": 4.864751815795898, "learning_rate": 0.00019505882890759378, "loss": 1.375, "step": 17300 }, { "epoch": 0.518018690472636, "grad_norm": 3.697645664215088, "learning_rate": 0.00019504454311167492, "loss": 1.4283, "step": 17350 }, { "epoch": 0.5195115397247186, "grad_norm": 4.063074111938477, "learning_rate": 0.0001950302573157561, "loss": 1.378, "step": 17400 }, { "epoch": 0.5210043889768011, "grad_norm": 4.223004341125488, "learning_rate": 0.00019501597151983725, "loss": 1.3789, "step": 17450 }, { "epoch": 0.5224972382288836, "grad_norm": 3.329366683959961, "learning_rate": 0.00019500168572391844, "loss": 1.346, "step": 17500 }, { "epoch": 0.5239900874809662, "grad_norm": 4.774710178375244, "learning_rate": 0.00019498739992799958, "loss": 1.3895, "step": 17550 }, { "epoch": 0.5254829367330487, "grad_norm": 6.2145490646362305, "learning_rate": 0.00019497311413208077, "loss": 1.3715, "step": 17600 }, { "epoch": 0.5269757859851312, "grad_norm": 3.9069626331329346, "learning_rate": 0.00019495882833616194, "loss": 1.4572, "step": 17650 }, { "epoch": 0.5284686352372138, "grad_norm": 3.347576141357422, "learning_rate": 0.0001949445425402431, "loss": 1.359, "step": 17700 }, { "epoch": 0.5299614844892963, "grad_norm": 5.305202484130859, "learning_rate": 0.00019493025674432427, "loss": 1.4038, "step": 17750 }, { "epoch": 0.5314543337413788, "grad_norm": 3.865619659423828, "learning_rate": 0.00019491597094840543, "loss": 1.3913, "step": 17800 }, { "epoch": 0.5329471829934613, "grad_norm": 4.791336536407471, "learning_rate": 0.0001949016851524866, "loss": 1.3852, "step": 17850 }, { "epoch": 0.5344400322455438, "grad_norm": 3.7827060222625732, "learning_rate": 0.00019488739935656776, "loss": 1.3108, "step": 17900 }, { "epoch": 0.5359328814976264, "grad_norm": 4.945117473602295, "learning_rate": 0.00019487311356064892, "loss": 1.346, "step": 17950 }, { "epoch": 0.5374257307497089, "grad_norm": 4.561169147491455, "learning_rate": 0.0001948588277647301, "loss": 1.3904, "step": 18000 }, { "epoch": 0.5389185800017914, "grad_norm": 4.608798027038574, "learning_rate": 0.00019484454196881125, "loss": 1.4133, "step": 18050 }, { "epoch": 0.540411429253874, "grad_norm": 4.303143501281738, "learning_rate": 0.00019483025617289245, "loss": 1.3994, "step": 18100 }, { "epoch": 0.5419042785059565, "grad_norm": 5.815835952758789, "learning_rate": 0.00019481597037697358, "loss": 1.392, "step": 18150 }, { "epoch": 0.543397127758039, "grad_norm": 5.349491596221924, "learning_rate": 0.00019480168458105477, "loss": 1.418, "step": 18200 }, { "epoch": 0.5448899770101215, "grad_norm": 3.7407824993133545, "learning_rate": 0.0001947873987851359, "loss": 1.3747, "step": 18250 }, { "epoch": 0.546382826262204, "grad_norm": 5.2810163497924805, "learning_rate": 0.0001947731129892171, "loss": 1.4023, "step": 18300 }, { "epoch": 0.5478756755142866, "grad_norm": 4.417948246002197, "learning_rate": 0.00019475882719329824, "loss": 1.4005, "step": 18350 }, { "epoch": 0.5493685247663691, "grad_norm": 5.287749290466309, "learning_rate": 0.00019474454139737943, "loss": 1.4211, "step": 18400 }, { "epoch": 0.5508613740184516, "grad_norm": 4.0996809005737305, "learning_rate": 0.0001947302556014606, "loss": 1.3559, "step": 18450 }, { "epoch": 0.5523542232705342, "grad_norm": 5.229327201843262, "learning_rate": 0.00019471596980554176, "loss": 1.4549, "step": 18500 }, { "epoch": 0.5538470725226167, "grad_norm": 4.409546852111816, "learning_rate": 0.00019470168400962293, "loss": 1.3683, "step": 18550 }, { "epoch": 0.5553399217746992, "grad_norm": 5.4077229499816895, "learning_rate": 0.0001946873982137041, "loss": 1.398, "step": 18600 }, { "epoch": 0.5568327710267817, "grad_norm": 5.208966255187988, "learning_rate": 0.00019467311241778526, "loss": 1.3276, "step": 18650 }, { "epoch": 0.5583256202788642, "grad_norm": 4.8162617683410645, "learning_rate": 0.00019465882662186642, "loss": 1.3314, "step": 18700 }, { "epoch": 0.5598184695309467, "grad_norm": 5.04697322845459, "learning_rate": 0.00019464454082594759, "loss": 1.3883, "step": 18750 }, { "epoch": 0.5613113187830293, "grad_norm": 4.038108825683594, "learning_rate": 0.00019463025503002875, "loss": 1.4377, "step": 18800 }, { "epoch": 0.5628041680351118, "grad_norm": 4.910576820373535, "learning_rate": 0.00019461596923410992, "loss": 1.3713, "step": 18850 }, { "epoch": 0.5642970172871944, "grad_norm": 5.3433756828308105, "learning_rate": 0.00019460168343819108, "loss": 1.4359, "step": 18900 }, { "epoch": 0.5657898665392769, "grad_norm": 3.9515552520751953, "learning_rate": 0.00019458739764227224, "loss": 1.3608, "step": 18950 }, { "epoch": 0.5672827157913594, "grad_norm": 4.515705108642578, "learning_rate": 0.0001945731118463534, "loss": 1.3274, "step": 19000 }, { "epoch": 0.568775565043442, "grad_norm": 4.434077262878418, "learning_rate": 0.00019455882605043457, "loss": 1.3681, "step": 19050 }, { "epoch": 0.5702684142955244, "grad_norm": 4.534008979797363, "learning_rate": 0.00019454454025451574, "loss": 1.3863, "step": 19100 }, { "epoch": 0.5717612635476069, "grad_norm": 4.200322151184082, "learning_rate": 0.00019453025445859693, "loss": 1.3743, "step": 19150 }, { "epoch": 0.5732541127996895, "grad_norm": 5.686845779418945, "learning_rate": 0.00019451596866267807, "loss": 1.3225, "step": 19200 }, { "epoch": 0.574746962051772, "grad_norm": 7.821211814880371, "learning_rate": 0.00019450168286675926, "loss": 1.3775, "step": 19250 }, { "epoch": 0.5762398113038545, "grad_norm": 5.200834274291992, "learning_rate": 0.0001944873970708404, "loss": 1.3423, "step": 19300 }, { "epoch": 0.5777326605559371, "grad_norm": 5.26302433013916, "learning_rate": 0.0001944731112749216, "loss": 1.3813, "step": 19350 }, { "epoch": 0.5792255098080196, "grad_norm": 3.3207719326019287, "learning_rate": 0.00019445882547900273, "loss": 1.3922, "step": 19400 }, { "epoch": 0.5807183590601022, "grad_norm": 4.619020938873291, "learning_rate": 0.00019444453968308392, "loss": 1.3533, "step": 19450 }, { "epoch": 0.5822112083121846, "grad_norm": 5.780002593994141, "learning_rate": 0.00019443025388716508, "loss": 1.4035, "step": 19500 }, { "epoch": 0.5837040575642671, "grad_norm": 4.961215496063232, "learning_rate": 0.00019441596809124625, "loss": 1.3687, "step": 19550 }, { "epoch": 0.5851969068163497, "grad_norm": 4.50115442276001, "learning_rate": 0.0001944016822953274, "loss": 1.342, "step": 19600 }, { "epoch": 0.5866897560684322, "grad_norm": 3.9477944374084473, "learning_rate": 0.00019438739649940858, "loss": 1.394, "step": 19650 }, { "epoch": 0.5881826053205147, "grad_norm": 3.7466814517974854, "learning_rate": 0.00019437311070348974, "loss": 1.3414, "step": 19700 }, { "epoch": 0.5896754545725973, "grad_norm": 4.382058143615723, "learning_rate": 0.0001943588249075709, "loss": 1.3669, "step": 19750 }, { "epoch": 0.5911683038246798, "grad_norm": 3.7016665935516357, "learning_rate": 0.00019434453911165207, "loss": 1.4548, "step": 19800 }, { "epoch": 0.5926611530767623, "grad_norm": 4.4738030433654785, "learning_rate": 0.00019433025331573324, "loss": 1.4273, "step": 19850 }, { "epoch": 0.5941540023288449, "grad_norm": 5.2445454597473145, "learning_rate": 0.0001943159675198144, "loss": 1.3746, "step": 19900 }, { "epoch": 0.5956468515809273, "grad_norm": 3.766219139099121, "learning_rate": 0.0001943016817238956, "loss": 1.4391, "step": 19950 }, { "epoch": 0.5971397008330099, "grad_norm": 6.310808181762695, "learning_rate": 0.00019428739592797673, "loss": 1.3316, "step": 20000 }, { "epoch": 0.5986325500850924, "grad_norm": 4.055521488189697, "learning_rate": 0.00019427311013205792, "loss": 1.3784, "step": 20050 }, { "epoch": 0.6001253993371749, "grad_norm": 4.933177471160889, "learning_rate": 0.00019425882433613906, "loss": 1.3352, "step": 20100 }, { "epoch": 0.6016182485892575, "grad_norm": 3.8867061138153076, "learning_rate": 0.00019424453854022025, "loss": 1.4169, "step": 20150 }, { "epoch": 0.60311109784134, "grad_norm": 3.364475727081299, "learning_rate": 0.0001942302527443014, "loss": 1.3767, "step": 20200 }, { "epoch": 0.6046039470934225, "grad_norm": 3.48152232170105, "learning_rate": 0.00019421596694838258, "loss": 1.3659, "step": 20250 }, { "epoch": 0.6060967963455051, "grad_norm": 3.3658649921417236, "learning_rate": 0.00019420168115246374, "loss": 1.3745, "step": 20300 }, { "epoch": 0.6075896455975875, "grad_norm": 4.441917896270752, "learning_rate": 0.0001941873953565449, "loss": 1.3827, "step": 20350 }, { "epoch": 0.60908249484967, "grad_norm": 5.014800548553467, "learning_rate": 0.00019417310956062607, "loss": 1.3792, "step": 20400 }, { "epoch": 0.6105753441017526, "grad_norm": 5.472316265106201, "learning_rate": 0.00019415882376470724, "loss": 1.394, "step": 20450 }, { "epoch": 0.6120681933538351, "grad_norm": 5.35073184967041, "learning_rate": 0.0001941445379687884, "loss": 1.401, "step": 20500 }, { "epoch": 0.6135610426059177, "grad_norm": 4.284445762634277, "learning_rate": 0.00019413025217286957, "loss": 1.4354, "step": 20550 }, { "epoch": 0.6150538918580002, "grad_norm": 3.561774492263794, "learning_rate": 0.00019411596637695073, "loss": 1.3295, "step": 20600 }, { "epoch": 0.6165467411100827, "grad_norm": 4.479186534881592, "learning_rate": 0.0001941016805810319, "loss": 1.3629, "step": 20650 }, { "epoch": 0.6180395903621653, "grad_norm": 4.186618804931641, "learning_rate": 0.00019408739478511306, "loss": 1.3399, "step": 20700 }, { "epoch": 0.6195324396142478, "grad_norm": 3.589655637741089, "learning_rate": 0.00019407310898919425, "loss": 1.3516, "step": 20750 }, { "epoch": 0.6210252888663302, "grad_norm": 4.330646514892578, "learning_rate": 0.0001940588231932754, "loss": 1.3148, "step": 20800 }, { "epoch": 0.6225181381184128, "grad_norm": 6.324933052062988, "learning_rate": 0.00019404453739735658, "loss": 1.331, "step": 20850 }, { "epoch": 0.6240109873704953, "grad_norm": 4.652800559997559, "learning_rate": 0.00019403025160143772, "loss": 1.3604, "step": 20900 }, { "epoch": 0.6255038366225778, "grad_norm": 4.831106662750244, "learning_rate": 0.0001940159658055189, "loss": 1.3322, "step": 20950 }, { "epoch": 0.6269966858746604, "grad_norm": 6.0938920974731445, "learning_rate": 0.00019400168000960005, "loss": 1.4106, "step": 21000 }, { "epoch": 0.6284895351267429, "grad_norm": 4.424108028411865, "learning_rate": 0.00019398739421368124, "loss": 1.4001, "step": 21050 }, { "epoch": 0.6299823843788255, "grad_norm": 4.329803466796875, "learning_rate": 0.0001939731084177624, "loss": 1.4048, "step": 21100 }, { "epoch": 0.631475233630908, "grad_norm": 3.915818929672241, "learning_rate": 0.00019395882262184357, "loss": 1.3619, "step": 21150 }, { "epoch": 0.6329680828829904, "grad_norm": 3.9562571048736572, "learning_rate": 0.00019394453682592474, "loss": 1.3636, "step": 21200 }, { "epoch": 0.634460932135073, "grad_norm": 5.229249954223633, "learning_rate": 0.0001939302510300059, "loss": 1.3502, "step": 21250 }, { "epoch": 0.6359537813871555, "grad_norm": 4.149145126342773, "learning_rate": 0.00019391596523408706, "loss": 1.3603, "step": 21300 }, { "epoch": 0.637446630639238, "grad_norm": 4.068868637084961, "learning_rate": 0.00019390167943816823, "loss": 1.3945, "step": 21350 }, { "epoch": 0.6389394798913206, "grad_norm": 3.6808931827545166, "learning_rate": 0.0001938873936422494, "loss": 1.4015, "step": 21400 }, { "epoch": 0.6404323291434031, "grad_norm": 4.391795635223389, "learning_rate": 0.00019387310784633056, "loss": 1.3428, "step": 21450 }, { "epoch": 0.6419251783954856, "grad_norm": 5.109655380249023, "learning_rate": 0.00019385882205041172, "loss": 1.3874, "step": 21500 }, { "epoch": 0.6434180276475682, "grad_norm": 4.560513496398926, "learning_rate": 0.0001938445362544929, "loss": 1.3833, "step": 21550 }, { "epoch": 0.6449108768996507, "grad_norm": 3.6108620166778564, "learning_rate": 0.00019383025045857405, "loss": 1.3981, "step": 21600 }, { "epoch": 0.6464037261517332, "grad_norm": 4.100367546081543, "learning_rate": 0.00019381596466265522, "loss": 1.3139, "step": 21650 }, { "epoch": 0.6478965754038157, "grad_norm": 3.1513540744781494, "learning_rate": 0.00019380167886673638, "loss": 1.3359, "step": 21700 }, { "epoch": 0.6493894246558982, "grad_norm": 4.793807506561279, "learning_rate": 0.00019378739307081755, "loss": 1.4025, "step": 21750 }, { "epoch": 0.6508822739079808, "grad_norm": 4.403114318847656, "learning_rate": 0.0001937731072748987, "loss": 1.349, "step": 21800 }, { "epoch": 0.6523751231600633, "grad_norm": 4.445423603057861, "learning_rate": 0.00019375882147897988, "loss": 1.4641, "step": 21850 }, { "epoch": 0.6538679724121458, "grad_norm": 5.721547603607178, "learning_rate": 0.00019374453568306107, "loss": 1.3451, "step": 21900 }, { "epoch": 0.6553608216642284, "grad_norm": 4.411465644836426, "learning_rate": 0.0001937302498871422, "loss": 1.3893, "step": 21950 }, { "epoch": 0.6568536709163109, "grad_norm": 3.181412696838379, "learning_rate": 0.0001937159640912234, "loss": 1.385, "step": 22000 }, { "epoch": 0.6583465201683933, "grad_norm": 7.374556541442871, "learning_rate": 0.00019370167829530453, "loss": 1.4051, "step": 22050 }, { "epoch": 0.6598393694204759, "grad_norm": 4.452084064483643, "learning_rate": 0.00019368739249938573, "loss": 1.3389, "step": 22100 }, { "epoch": 0.6613322186725584, "grad_norm": 5.60634708404541, "learning_rate": 0.0001936731067034669, "loss": 1.3924, "step": 22150 }, { "epoch": 0.662825067924641, "grad_norm": 4.35741662979126, "learning_rate": 0.00019365882090754806, "loss": 1.3957, "step": 22200 }, { "epoch": 0.6643179171767235, "grad_norm": 4.173916816711426, "learning_rate": 0.00019364453511162922, "loss": 1.3669, "step": 22250 }, { "epoch": 0.665810766428806, "grad_norm": 3.8707377910614014, "learning_rate": 0.00019363024931571038, "loss": 1.3478, "step": 22300 }, { "epoch": 0.6673036156808886, "grad_norm": 4.765937328338623, "learning_rate": 0.00019361596351979155, "loss": 1.4007, "step": 22350 }, { "epoch": 0.6687964649329711, "grad_norm": 4.315809726715088, "learning_rate": 0.00019360167772387271, "loss": 1.3622, "step": 22400 }, { "epoch": 0.6702893141850536, "grad_norm": 5.977219104766846, "learning_rate": 0.00019358739192795388, "loss": 1.3836, "step": 22450 }, { "epoch": 0.6717821634371361, "grad_norm": 4.420149803161621, "learning_rate": 0.00019357310613203504, "loss": 1.3488, "step": 22500 }, { "epoch": 0.6732750126892186, "grad_norm": 3.9537293910980225, "learning_rate": 0.0001935588203361162, "loss": 1.3892, "step": 22550 }, { "epoch": 0.6747678619413011, "grad_norm": 5.468355655670166, "learning_rate": 0.0001935445345401974, "loss": 1.4475, "step": 22600 }, { "epoch": 0.6762607111933837, "grad_norm": 4.3148674964904785, "learning_rate": 0.00019353024874427854, "loss": 1.3866, "step": 22650 }, { "epoch": 0.6777535604454662, "grad_norm": 4.004809379577637, "learning_rate": 0.00019351596294835973, "loss": 1.3811, "step": 22700 }, { "epoch": 0.6792464096975488, "grad_norm": 4.404988765716553, "learning_rate": 0.00019350167715244087, "loss": 1.3648, "step": 22750 }, { "epoch": 0.6807392589496313, "grad_norm": 5.115052223205566, "learning_rate": 0.00019348739135652206, "loss": 1.3523, "step": 22800 }, { "epoch": 0.6822321082017138, "grad_norm": 5.083719730377197, "learning_rate": 0.0001934731055606032, "loss": 1.3978, "step": 22850 }, { "epoch": 0.6837249574537964, "grad_norm": 4.038282871246338, "learning_rate": 0.0001934588197646844, "loss": 1.2788, "step": 22900 }, { "epoch": 0.6852178067058788, "grad_norm": 5.438407897949219, "learning_rate": 0.00019344453396876555, "loss": 1.4181, "step": 22950 }, { "epoch": 0.6867106559579613, "grad_norm": 5.381191730499268, "learning_rate": 0.00019343024817284672, "loss": 1.4145, "step": 23000 }, { "epoch": 0.6882035052100439, "grad_norm": 3.183706283569336, "learning_rate": 0.00019341596237692788, "loss": 1.3271, "step": 23050 }, { "epoch": 0.6896963544621264, "grad_norm": 4.063404083251953, "learning_rate": 0.00019340167658100905, "loss": 1.3717, "step": 23100 }, { "epoch": 0.6911892037142089, "grad_norm": 5.677481651306152, "learning_rate": 0.0001933873907850902, "loss": 1.344, "step": 23150 }, { "epoch": 0.6926820529662915, "grad_norm": 5.376470565795898, "learning_rate": 0.00019337310498917138, "loss": 1.295, "step": 23200 }, { "epoch": 0.694174902218374, "grad_norm": 3.8844990730285645, "learning_rate": 0.00019335881919325254, "loss": 1.381, "step": 23250 }, { "epoch": 0.6956677514704566, "grad_norm": 5.285017013549805, "learning_rate": 0.0001933445333973337, "loss": 1.3505, "step": 23300 }, { "epoch": 0.697160600722539, "grad_norm": 4.133642196655273, "learning_rate": 0.00019333024760141487, "loss": 1.3604, "step": 23350 }, { "epoch": 0.6986534499746215, "grad_norm": 5.037447929382324, "learning_rate": 0.00019331596180549606, "loss": 1.3209, "step": 23400 }, { "epoch": 0.7001462992267041, "grad_norm": 4.916257858276367, "learning_rate": 0.0001933016760095772, "loss": 1.4084, "step": 23450 }, { "epoch": 0.7016391484787866, "grad_norm": 3.469505786895752, "learning_rate": 0.0001932873902136584, "loss": 1.3705, "step": 23500 }, { "epoch": 0.7031319977308691, "grad_norm": 3.624896287918091, "learning_rate": 0.00019327310441773953, "loss": 1.3507, "step": 23550 }, { "epoch": 0.7046248469829517, "grad_norm": 4.352174758911133, "learning_rate": 0.00019325881862182072, "loss": 1.3184, "step": 23600 }, { "epoch": 0.7061176962350342, "grad_norm": 6.4549150466918945, "learning_rate": 0.00019324453282590186, "loss": 1.4161, "step": 23650 }, { "epoch": 0.7076105454871167, "grad_norm": 6.221735000610352, "learning_rate": 0.00019323024702998305, "loss": 1.3425, "step": 23700 }, { "epoch": 0.7091033947391993, "grad_norm": 4.285811901092529, "learning_rate": 0.00019321596123406421, "loss": 1.3418, "step": 23750 }, { "epoch": 0.7105962439912817, "grad_norm": 4.533527374267578, "learning_rate": 0.00019320167543814538, "loss": 1.3723, "step": 23800 }, { "epoch": 0.7120890932433643, "grad_norm": 3.838109016418457, "learning_rate": 0.00019318738964222654, "loss": 1.3624, "step": 23850 }, { "epoch": 0.7135819424954468, "grad_norm": 3.8401126861572266, "learning_rate": 0.0001931731038463077, "loss": 1.4239, "step": 23900 }, { "epoch": 0.7150747917475293, "grad_norm": 5.9049787521362305, "learning_rate": 0.00019315881805038887, "loss": 1.3642, "step": 23950 }, { "epoch": 0.7165676409996119, "grad_norm": 5.397033214569092, "learning_rate": 0.00019314453225447004, "loss": 1.3298, "step": 24000 }, { "epoch": 0.7180604902516944, "grad_norm": 5.649387359619141, "learning_rate": 0.0001931302464585512, "loss": 1.4102, "step": 24050 }, { "epoch": 0.7195533395037769, "grad_norm": 5.697938919067383, "learning_rate": 0.00019311596066263237, "loss": 1.3765, "step": 24100 }, { "epoch": 0.7210461887558595, "grad_norm": 4.244998455047607, "learning_rate": 0.00019310167486671353, "loss": 1.3471, "step": 24150 }, { "epoch": 0.722539038007942, "grad_norm": 4.579226493835449, "learning_rate": 0.0001930873890707947, "loss": 1.3897, "step": 24200 }, { "epoch": 0.7240318872600244, "grad_norm": 8.80657958984375, "learning_rate": 0.00019307310327487586, "loss": 1.3628, "step": 24250 }, { "epoch": 0.725524736512107, "grad_norm": 5.42709493637085, "learning_rate": 0.00019305881747895703, "loss": 1.3587, "step": 24300 }, { "epoch": 0.7270175857641895, "grad_norm": 4.2680840492248535, "learning_rate": 0.0001930445316830382, "loss": 1.3414, "step": 24350 }, { "epoch": 0.7285104350162721, "grad_norm": 5.641107559204102, "learning_rate": 0.00019303024588711935, "loss": 1.4095, "step": 24400 }, { "epoch": 0.7300032842683546, "grad_norm": 5.29530143737793, "learning_rate": 0.00019301596009120052, "loss": 1.389, "step": 24450 }, { "epoch": 0.7314961335204371, "grad_norm": 4.110136032104492, "learning_rate": 0.00019300167429528168, "loss": 1.4641, "step": 24500 }, { "epoch": 0.7329889827725197, "grad_norm": 4.390302658081055, "learning_rate": 0.00019298738849936288, "loss": 1.4028, "step": 24550 }, { "epoch": 0.7344818320246022, "grad_norm": 3.717409372329712, "learning_rate": 0.000192973102703444, "loss": 1.436, "step": 24600 }, { "epoch": 0.7359746812766846, "grad_norm": 5.399808406829834, "learning_rate": 0.0001929588169075252, "loss": 1.2887, "step": 24650 }, { "epoch": 0.7374675305287672, "grad_norm": 5.027936935424805, "learning_rate": 0.00019294453111160634, "loss": 1.3591, "step": 24700 }, { "epoch": 0.7389603797808497, "grad_norm": 5.12235689163208, "learning_rate": 0.00019293024531568753, "loss": 1.4305, "step": 24750 }, { "epoch": 0.7404532290329322, "grad_norm": 4.445746898651123, "learning_rate": 0.0001929159595197687, "loss": 1.3609, "step": 24800 }, { "epoch": 0.7419460782850148, "grad_norm": 4.590288162231445, "learning_rate": 0.00019290167372384986, "loss": 1.3811, "step": 24850 }, { "epoch": 0.7434389275370973, "grad_norm": 3.841573476791382, "learning_rate": 0.00019288738792793103, "loss": 1.3632, "step": 24900 }, { "epoch": 0.7449317767891799, "grad_norm": 5.2155327796936035, "learning_rate": 0.0001928731021320122, "loss": 1.4422, "step": 24950 }, { "epoch": 0.7464246260412624, "grad_norm": 4.942720413208008, "learning_rate": 0.00019285881633609336, "loss": 1.3916, "step": 25000 }, { "epoch": 0.7479174752933448, "grad_norm": 5.292357921600342, "learning_rate": 0.00019284453054017452, "loss": 1.3048, "step": 25050 }, { "epoch": 0.7494103245454274, "grad_norm": 6.690375804901123, "learning_rate": 0.0001928302447442557, "loss": 1.3967, "step": 25100 }, { "epoch": 0.7509031737975099, "grad_norm": 4.590394496917725, "learning_rate": 0.00019281595894833685, "loss": 1.3925, "step": 25150 }, { "epoch": 0.7523960230495924, "grad_norm": 4.220889091491699, "learning_rate": 0.00019280167315241802, "loss": 1.3263, "step": 25200 }, { "epoch": 0.753888872301675, "grad_norm": 4.847371578216553, "learning_rate": 0.00019278738735649918, "loss": 1.4141, "step": 25250 }, { "epoch": 0.7553817215537575, "grad_norm": 5.392793655395508, "learning_rate": 0.00019277310156058035, "loss": 1.3147, "step": 25300 }, { "epoch": 0.75687457080584, "grad_norm": 4.261468887329102, "learning_rate": 0.00019275881576466154, "loss": 1.3102, "step": 25350 }, { "epoch": 0.7583674200579226, "grad_norm": 4.498802661895752, "learning_rate": 0.00019274452996874267, "loss": 1.3912, "step": 25400 }, { "epoch": 0.759860269310005, "grad_norm": 5.2235283851623535, "learning_rate": 0.00019273024417282387, "loss": 1.3375, "step": 25450 }, { "epoch": 0.7613531185620876, "grad_norm": 6.409016132354736, "learning_rate": 0.000192715958376905, "loss": 1.3339, "step": 25500 }, { "epoch": 0.7628459678141701, "grad_norm": 4.4392805099487305, "learning_rate": 0.0001927016725809862, "loss": 1.3599, "step": 25550 }, { "epoch": 0.7643388170662526, "grad_norm": 5.580776691436768, "learning_rate": 0.00019268738678506736, "loss": 1.3812, "step": 25600 }, { "epoch": 0.7658316663183352, "grad_norm": 5.473046779632568, "learning_rate": 0.00019267310098914852, "loss": 1.3582, "step": 25650 }, { "epoch": 0.7673245155704177, "grad_norm": 6.353076934814453, "learning_rate": 0.0001926588151932297, "loss": 1.3388, "step": 25700 }, { "epoch": 0.7688173648225002, "grad_norm": 4.247453689575195, "learning_rate": 0.00019264452939731085, "loss": 1.3745, "step": 25750 }, { "epoch": 0.7703102140745828, "grad_norm": 5.048892498016357, "learning_rate": 0.00019263024360139202, "loss": 1.405, "step": 25800 }, { "epoch": 0.7718030633266653, "grad_norm": 4.883440017700195, "learning_rate": 0.00019261595780547318, "loss": 1.4082, "step": 25850 }, { "epoch": 0.7732959125787477, "grad_norm": 4.221151828765869, "learning_rate": 0.00019260167200955435, "loss": 1.3766, "step": 25900 }, { "epoch": 0.7747887618308303, "grad_norm": 8.68738079071045, "learning_rate": 0.0001925873862136355, "loss": 1.4046, "step": 25950 }, { "epoch": 0.7762816110829128, "grad_norm": 4.479017734527588, "learning_rate": 0.00019257310041771668, "loss": 1.3452, "step": 26000 }, { "epoch": 0.7777744603349954, "grad_norm": 4.299473285675049, "learning_rate": 0.00019255881462179787, "loss": 1.3702, "step": 26050 }, { "epoch": 0.7792673095870779, "grad_norm": 5.272589683532715, "learning_rate": 0.000192544528825879, "loss": 1.3725, "step": 26100 }, { "epoch": 0.7807601588391604, "grad_norm": 4.644061088562012, "learning_rate": 0.0001925302430299602, "loss": 1.4143, "step": 26150 }, { "epoch": 0.782253008091243, "grad_norm": 4.7033185958862305, "learning_rate": 0.00019251595723404134, "loss": 1.3666, "step": 26200 }, { "epoch": 0.7837458573433255, "grad_norm": 3.5262560844421387, "learning_rate": 0.00019250167143812253, "loss": 1.4135, "step": 26250 }, { "epoch": 0.785238706595408, "grad_norm": 3.8599159717559814, "learning_rate": 0.00019248738564220367, "loss": 1.3258, "step": 26300 }, { "epoch": 0.7867315558474905, "grad_norm": 5.743364334106445, "learning_rate": 0.00019247309984628486, "loss": 1.3946, "step": 26350 }, { "epoch": 0.788224405099573, "grad_norm": 5.478078365325928, "learning_rate": 0.00019245881405036602, "loss": 1.3881, "step": 26400 }, { "epoch": 0.7897172543516555, "grad_norm": 5.912649631500244, "learning_rate": 0.0001924445282544472, "loss": 1.4139, "step": 26450 }, { "epoch": 0.7912101036037381, "grad_norm": 3.753570079803467, "learning_rate": 0.00019243024245852835, "loss": 1.3267, "step": 26500 }, { "epoch": 0.7927029528558206, "grad_norm": 4.155190944671631, "learning_rate": 0.00019241595666260952, "loss": 1.3539, "step": 26550 }, { "epoch": 0.7941958021079032, "grad_norm": 4.314638614654541, "learning_rate": 0.00019240167086669068, "loss": 1.3635, "step": 26600 }, { "epoch": 0.7956886513599857, "grad_norm": 4.177329063415527, "learning_rate": 0.00019238738507077185, "loss": 1.3642, "step": 26650 }, { "epoch": 0.7971815006120682, "grad_norm": 4.3052144050598145, "learning_rate": 0.000192373099274853, "loss": 1.3011, "step": 26700 }, { "epoch": 0.7986743498641508, "grad_norm": 5.800657272338867, "learning_rate": 0.00019235881347893417, "loss": 1.3247, "step": 26750 }, { "epoch": 0.8001671991162332, "grad_norm": 4.211179733276367, "learning_rate": 0.00019234452768301534, "loss": 1.3399, "step": 26800 }, { "epoch": 0.8016600483683157, "grad_norm": 4.8080315589904785, "learning_rate": 0.0001923302418870965, "loss": 1.3439, "step": 26850 }, { "epoch": 0.8031528976203983, "grad_norm": 5.03045129776001, "learning_rate": 0.00019231595609117767, "loss": 1.4009, "step": 26900 }, { "epoch": 0.8046457468724808, "grad_norm": 8.932162284851074, "learning_rate": 0.00019230167029525883, "loss": 1.3298, "step": 26950 }, { "epoch": 0.8061385961245633, "grad_norm": 5.651429176330566, "learning_rate": 0.00019228738449934, "loss": 1.4044, "step": 27000 }, { "epoch": 0.8076314453766459, "grad_norm": 4.428074836730957, "learning_rate": 0.00019227309870342116, "loss": 1.3988, "step": 27050 }, { "epoch": 0.8091242946287284, "grad_norm": 6.512781620025635, "learning_rate": 0.00019225881290750233, "loss": 1.3529, "step": 27100 }, { "epoch": 0.810617143880811, "grad_norm": 5.393406867980957, "learning_rate": 0.0001922445271115835, "loss": 1.3266, "step": 27150 }, { "epoch": 0.8121099931328934, "grad_norm": 3.74702525138855, "learning_rate": 0.00019223024131566468, "loss": 1.416, "step": 27200 }, { "epoch": 0.8136028423849759, "grad_norm": 3.856290817260742, "learning_rate": 0.00019221595551974582, "loss": 1.4082, "step": 27250 }, { "epoch": 0.8150956916370585, "grad_norm": 4.597848415374756, "learning_rate": 0.000192201669723827, "loss": 1.3751, "step": 27300 }, { "epoch": 0.816588540889141, "grad_norm": 3.875593662261963, "learning_rate": 0.00019218738392790815, "loss": 1.4024, "step": 27350 }, { "epoch": 0.8180813901412235, "grad_norm": 5.470495700836182, "learning_rate": 0.00019217309813198934, "loss": 1.394, "step": 27400 }, { "epoch": 0.8195742393933061, "grad_norm": 4.562834739685059, "learning_rate": 0.00019215881233607048, "loss": 1.3161, "step": 27450 }, { "epoch": 0.8210670886453886, "grad_norm": 4.183352470397949, "learning_rate": 0.00019214452654015167, "loss": 1.3427, "step": 27500 }, { "epoch": 0.8225599378974711, "grad_norm": 5.345236301422119, "learning_rate": 0.00019213024074423284, "loss": 1.3551, "step": 27550 }, { "epoch": 0.8240527871495537, "grad_norm": 5.217405319213867, "learning_rate": 0.000192115954948314, "loss": 1.3174, "step": 27600 }, { "epoch": 0.8255456364016361, "grad_norm": 4.486965179443359, "learning_rate": 0.00019210166915239517, "loss": 1.3931, "step": 27650 }, { "epoch": 0.8270384856537187, "grad_norm": 4.183075904846191, "learning_rate": 0.00019208738335647633, "loss": 1.3672, "step": 27700 }, { "epoch": 0.8285313349058012, "grad_norm": 4.916398525238037, "learning_rate": 0.0001920730975605575, "loss": 1.3993, "step": 27750 }, { "epoch": 0.8300241841578837, "grad_norm": 3.99765682220459, "learning_rate": 0.00019205881176463866, "loss": 1.362, "step": 27800 }, { "epoch": 0.8315170334099663, "grad_norm": 3.948981523513794, "learning_rate": 0.00019204452596871982, "loss": 1.3912, "step": 27850 }, { "epoch": 0.8330098826620488, "grad_norm": 6.524683475494385, "learning_rate": 0.000192030240172801, "loss": 1.3845, "step": 27900 }, { "epoch": 0.8345027319141313, "grad_norm": 4.598319053649902, "learning_rate": 0.00019201595437688215, "loss": 1.38, "step": 27950 }, { "epoch": 0.8359955811662139, "grad_norm": 4.287365436553955, "learning_rate": 0.00019200166858096335, "loss": 1.3687, "step": 28000 }, { "epoch": 0.8374884304182963, "grad_norm": 5.497170448303223, "learning_rate": 0.00019198738278504448, "loss": 1.4324, "step": 28050 }, { "epoch": 0.8389812796703788, "grad_norm": 3.9373042583465576, "learning_rate": 0.00019197309698912567, "loss": 1.3892, "step": 28100 }, { "epoch": 0.8404741289224614, "grad_norm": 5.157922267913818, "learning_rate": 0.0001919588111932068, "loss": 1.3764, "step": 28150 }, { "epoch": 0.8419669781745439, "grad_norm": 4.0024895668029785, "learning_rate": 0.000191944525397288, "loss": 1.3571, "step": 28200 }, { "epoch": 0.8434598274266265, "grad_norm": 4.894957542419434, "learning_rate": 0.00019193023960136917, "loss": 1.3997, "step": 28250 }, { "epoch": 0.844952676678709, "grad_norm": 4.829247951507568, "learning_rate": 0.00019191595380545033, "loss": 1.3751, "step": 28300 }, { "epoch": 0.8464455259307915, "grad_norm": 4.123593330383301, "learning_rate": 0.0001919016680095315, "loss": 1.4183, "step": 28350 }, { "epoch": 0.8479383751828741, "grad_norm": 4.012912273406982, "learning_rate": 0.00019188738221361266, "loss": 1.3757, "step": 28400 }, { "epoch": 0.8494312244349566, "grad_norm": 5.17979097366333, "learning_rate": 0.00019187309641769383, "loss": 1.3975, "step": 28450 }, { "epoch": 0.850924073687039, "grad_norm": 4.665642261505127, "learning_rate": 0.000191858810621775, "loss": 1.4539, "step": 28500 }, { "epoch": 0.8524169229391216, "grad_norm": 4.333798408508301, "learning_rate": 0.00019184452482585616, "loss": 1.3711, "step": 28550 }, { "epoch": 0.8539097721912041, "grad_norm": 4.485865116119385, "learning_rate": 0.00019183023902993732, "loss": 1.4299, "step": 28600 }, { "epoch": 0.8554026214432866, "grad_norm": 6.3318328857421875, "learning_rate": 0.00019181595323401849, "loss": 1.3621, "step": 28650 }, { "epoch": 0.8568954706953692, "grad_norm": 3.915356159210205, "learning_rate": 0.00019180166743809965, "loss": 1.4561, "step": 28700 }, { "epoch": 0.8583883199474517, "grad_norm": 4.613978862762451, "learning_rate": 0.00019178738164218081, "loss": 1.3267, "step": 28750 }, { "epoch": 0.8598811691995343, "grad_norm": 4.935427188873291, "learning_rate": 0.000191773095846262, "loss": 1.3962, "step": 28800 }, { "epoch": 0.8613740184516168, "grad_norm": 4.204285144805908, "learning_rate": 0.00019175881005034314, "loss": 1.3644, "step": 28850 }, { "epoch": 0.8628668677036992, "grad_norm": 3.901266098022461, "learning_rate": 0.00019174452425442434, "loss": 1.344, "step": 28900 }, { "epoch": 0.8643597169557818, "grad_norm": 5.1740922927856445, "learning_rate": 0.00019173023845850547, "loss": 1.3852, "step": 28950 }, { "epoch": 0.8658525662078643, "grad_norm": 4.984529972076416, "learning_rate": 0.00019171595266258667, "loss": 1.3853, "step": 29000 }, { "epoch": 0.8673454154599468, "grad_norm": 4.719232082366943, "learning_rate": 0.00019170166686666783, "loss": 1.3962, "step": 29050 }, { "epoch": 0.8688382647120294, "grad_norm": 6.084499359130859, "learning_rate": 0.000191687381070749, "loss": 1.3377, "step": 29100 }, { "epoch": 0.8703311139641119, "grad_norm": 5.261173248291016, "learning_rate": 0.00019167309527483016, "loss": 1.3673, "step": 29150 }, { "epoch": 0.8718239632161944, "grad_norm": 5.18539571762085, "learning_rate": 0.00019165880947891132, "loss": 1.358, "step": 29200 }, { "epoch": 0.873316812468277, "grad_norm": 4.789880275726318, "learning_rate": 0.0001916445236829925, "loss": 1.3426, "step": 29250 }, { "epoch": 0.8748096617203595, "grad_norm": 3.2945287227630615, "learning_rate": 0.00019163023788707365, "loss": 1.3308, "step": 29300 }, { "epoch": 0.876302510972442, "grad_norm": 5.060371398925781, "learning_rate": 0.00019161595209115482, "loss": 1.3497, "step": 29350 }, { "epoch": 0.8777953602245245, "grad_norm": 4.771635055541992, "learning_rate": 0.00019160166629523598, "loss": 1.3517, "step": 29400 }, { "epoch": 0.879288209476607, "grad_norm": 4.408042907714844, "learning_rate": 0.00019158738049931715, "loss": 1.3636, "step": 29450 }, { "epoch": 0.8807810587286896, "grad_norm": 5.183899879455566, "learning_rate": 0.0001915730947033983, "loss": 1.3569, "step": 29500 }, { "epoch": 0.8822739079807721, "grad_norm": 4.176016807556152, "learning_rate": 0.00019155880890747948, "loss": 1.3659, "step": 29550 }, { "epoch": 0.8837667572328546, "grad_norm": 4.783830165863037, "learning_rate": 0.00019154452311156064, "loss": 1.368, "step": 29600 }, { "epoch": 0.8852596064849372, "grad_norm": 3.7985739707946777, "learning_rate": 0.0001915302373156418, "loss": 1.4091, "step": 29650 }, { "epoch": 0.8867524557370197, "grad_norm": 4.401581287384033, "learning_rate": 0.00019151595151972297, "loss": 1.3613, "step": 29700 }, { "epoch": 0.8882453049891021, "grad_norm": 6.214754581451416, "learning_rate": 0.00019150166572380414, "loss": 1.4077, "step": 29750 }, { "epoch": 0.8897381542411847, "grad_norm": 4.197826862335205, "learning_rate": 0.0001914873799278853, "loss": 1.347, "step": 29800 }, { "epoch": 0.8912310034932672, "grad_norm": 3.1535027027130127, "learning_rate": 0.0001914730941319665, "loss": 1.3452, "step": 29850 }, { "epoch": 0.8927238527453498, "grad_norm": 4.618538856506348, "learning_rate": 0.00019145880833604763, "loss": 1.3417, "step": 29900 }, { "epoch": 0.8942167019974323, "grad_norm": 3.913506031036377, "learning_rate": 0.00019144452254012882, "loss": 1.3503, "step": 29950 }, { "epoch": 0.8957095512495148, "grad_norm": 4.013761043548584, "learning_rate": 0.00019143023674420996, "loss": 1.4109, "step": 30000 } ], "logging_steps": 50, "max_steps": 700001, "num_input_tokens_seen": 0, "num_train_epochs": 21, "save_steps": 10000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 7.580165416503214e+17, "train_batch_size": 2, "trial_name": null, "trial_params": null }