diff --git "a/irish_llama31_8b_data_v3/checkpoint-1900/trainer_state.json" "b/irish_llama31_8b_data_v3/checkpoint-1900/trainer_state.json"
new file mode 100644--- /dev/null
+++ "b/irish_llama31_8b_data_v3/checkpoint-1900/trainer_state.json"
@@ -0,0 +1,6690 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.9720534629404617,
+  "eval_steps": 500,
+  "global_step": 1900,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0005116070857581378,
+      "grad_norm": 5.084794521331787,
+      "learning_rate": 5.115089514066497e-07,
+      "loss": 2.9408,
+      "step": 1
+    },
+    {
+      "epoch": 0.0010232141715162755,
+      "grad_norm": 5.157843112945557,
+      "learning_rate": 1.0230179028132994e-06,
+      "loss": 3.0401,
+      "step": 2
+    },
+    {
+      "epoch": 0.002046428343032551,
+      "grad_norm": 5.386075973510742,
+      "learning_rate": 2.0460358056265987e-06,
+      "loss": 3.071,
+      "step": 4
+    },
+    {
+      "epoch": 0.0030696425145488263,
+      "grad_norm": 5.142333984375,
+      "learning_rate": 3.069053708439898e-06,
+      "loss": 3.1039,
+      "step": 6
+    },
+    {
+      "epoch": 0.004092856686065102,
+      "grad_norm": 3.101259231567383,
+      "learning_rate": 4.092071611253197e-06,
+      "loss": 2.8181,
+      "step": 8
+    },
+    {
+      "epoch": 0.005116070857581378,
+      "grad_norm": 2.207404375076294,
+      "learning_rate": 5.1150895140664966e-06,
+      "loss": 2.5627,
+      "step": 10
+    },
+    {
+      "epoch": 0.006139285029097653,
+      "grad_norm": 1.8845449686050415,
+      "learning_rate": 6.138107416879796e-06,
+      "loss": 2.6209,
+      "step": 12
+    },
+    {
+      "epoch": 0.007162499200613928,
+      "grad_norm": 2.1659703254699707,
+      "learning_rate": 7.161125319693095e-06,
+      "loss": 2.5467,
+      "step": 14
+    },
+    {
+      "epoch": 0.008185713372130204,
+      "grad_norm": 1.6377224922180176,
+      "learning_rate": 8.184143222506395e-06,
+      "loss": 2.5057,
+      "step": 16
+    },
+    {
+      "epoch": 0.009208927543646479,
+      "grad_norm": 1.1535893678665161,
+      "learning_rate": 9.207161125319694e-06,
+      "loss": 2.538,
+      "step": 18
+    },
+    {
+      "epoch": 0.010232141715162756,
+      "grad_norm": 1.0513185262680054,
+      "learning_rate": 1.0230179028132993e-05,
+      "loss": 2.5247,
+      "step": 20
+    },
+    {
+      "epoch": 0.01125535588667903,
+      "grad_norm": 0.8281764388084412,
+      "learning_rate": 1.1253196930946292e-05,
+      "loss": 2.4595,
+      "step": 22
+    },
+    {
+      "epoch": 0.012278570058195305,
+      "grad_norm": 0.5129208564758301,
+      "learning_rate": 1.2276214833759591e-05,
+      "loss": 2.3742,
+      "step": 24
+    },
+    {
+      "epoch": 0.013301784229711582,
+      "grad_norm": 0.46613597869873047,
+      "learning_rate": 1.3299232736572892e-05,
+      "loss": 2.4564,
+      "step": 26
+    },
+    {
+      "epoch": 0.014324998401227857,
+      "grad_norm": 0.354717493057251,
+      "learning_rate": 1.432225063938619e-05,
+      "loss": 2.3467,
+      "step": 28
+    },
+    {
+      "epoch": 0.015348212572744133,
+      "grad_norm": 0.3325178623199463,
+      "learning_rate": 1.534526854219949e-05,
+      "loss": 2.3978,
+      "step": 30
+    },
+    {
+      "epoch": 0.016371426744260408,
+      "grad_norm": 0.32920145988464355,
+      "learning_rate": 1.636828644501279e-05,
+      "loss": 2.2522,
+      "step": 32
+    },
+    {
+      "epoch": 0.017394640915776683,
+      "grad_norm": 0.25466033816337585,
+      "learning_rate": 1.739130434782609e-05,
+      "loss": 2.243,
+      "step": 34
+    },
+    {
+      "epoch": 0.018417855087292958,
+      "grad_norm": 0.35631808638572693,
+      "learning_rate": 1.8414322250639388e-05,
+      "loss": 2.2527,
+      "step": 36
+    },
+    {
+      "epoch": 0.019441069258809233,
+      "grad_norm": 0.23582319915294647,
+      "learning_rate": 1.9437340153452684e-05,
+      "loss": 2.1452,
+      "step": 38
+    },
+    {
+      "epoch": 0.02046428343032551,
+      "grad_norm": 0.2491885870695114,
+      "learning_rate": 2.0460358056265986e-05,
+      "loss": 2.1778,
+      "step": 40
+    },
+    {
+      "epoch": 0.021487497601841786,
+      "grad_norm": 0.2993784546852112,
+      "learning_rate": 2.1483375959079285e-05,
+      "loss": 2.1006,
+      "step": 42
+    },
+    {
+      "epoch": 0.02251071177335806,
+      "grad_norm": 0.21940283477306366,
+      "learning_rate": 2.2506393861892585e-05,
+      "loss": 2.1752,
+      "step": 44
+    },
+    {
+      "epoch": 0.023533925944874336,
+      "grad_norm": 0.15252649784088135,
+      "learning_rate": 2.3529411764705884e-05,
+      "loss": 2.1295,
+      "step": 46
+    },
+    {
+      "epoch": 0.02455714011639061,
+      "grad_norm": 0.19182737171649933,
+      "learning_rate": 2.4552429667519183e-05,
+      "loss": 2.1181,
+      "step": 48
+    },
+    {
+      "epoch": 0.02558035428790689,
+      "grad_norm": 0.19416701793670654,
+      "learning_rate": 2.5575447570332482e-05,
+      "loss": 2.0953,
+      "step": 50
+    },
+    {
+      "epoch": 0.026603568459423164,
+      "grad_norm": 0.12562625110149384,
+      "learning_rate": 2.6598465473145784e-05,
+      "loss": 2.0856,
+      "step": 52
+    },
+    {
+      "epoch": 0.02762678263093944,
+      "grad_norm": 0.13417182862758636,
+      "learning_rate": 2.7621483375959077e-05,
+      "loss": 2.0948,
+      "step": 54
+    },
+    {
+      "epoch": 0.028649996802455713,
+      "grad_norm": 0.10808593034744263,
+      "learning_rate": 2.864450127877238e-05,
+      "loss": 2.0541,
+      "step": 56
+    },
+    {
+      "epoch": 0.02967321097397199,
+      "grad_norm": 0.14162665605545044,
+      "learning_rate": 2.966751918158568e-05,
+      "loss": 2.0756,
+      "step": 58
+    },
+    {
+      "epoch": 0.030696425145488267,
+      "grad_norm": 0.10216689854860306,
+      "learning_rate": 3.069053708439898e-05,
+      "loss": 2.0502,
+      "step": 60
+    },
+    {
+      "epoch": 0.03171963931700454,
+      "grad_norm": 0.0772320106625557,
+      "learning_rate": 3.171355498721228e-05,
+      "loss": 2.0598,
+      "step": 62
+    },
+    {
+      "epoch": 0.032742853488520816,
+      "grad_norm": 0.07200902700424194,
+      "learning_rate": 3.273657289002558e-05,
+      "loss": 2.0416,
+      "step": 64
+    },
+    {
+      "epoch": 0.03376606766003709,
+      "grad_norm": 0.07764917612075806,
+      "learning_rate": 3.375959079283887e-05,
+      "loss": 2.04,
+      "step": 66
+    },
+    {
+      "epoch": 0.034789281831553366,
+      "grad_norm": 0.07703404128551483,
+      "learning_rate": 3.478260869565218e-05,
+      "loss": 2.0426,
+      "step": 68
+    },
+    {
+      "epoch": 0.03581249600306964,
+      "grad_norm": 0.05096273496747017,
+      "learning_rate": 3.580562659846548e-05,
+      "loss": 2.0264,
+      "step": 70
+    },
+    {
+      "epoch": 0.036835710174585916,
+      "grad_norm": 0.07172555476427078,
+      "learning_rate": 3.6828644501278776e-05,
+      "loss": 1.9799,
+      "step": 72
+    },
+    {
+      "epoch": 0.03785892434610219,
+      "grad_norm": 0.05563480406999588,
+      "learning_rate": 3.7851662404092075e-05,
+      "loss": 1.9922,
+      "step": 74
+    },
+    {
+      "epoch": 0.038882138517618466,
+      "grad_norm": 0.04726962745189667,
+      "learning_rate": 3.887468030690537e-05,
+      "loss": 1.9826,
+      "step": 76
+    },
+    {
+      "epoch": 0.03990535268913475,
+      "grad_norm": 0.040130794048309326,
+      "learning_rate": 3.989769820971867e-05,
+      "loss": 1.9693,
+      "step": 78
+    },
+    {
+      "epoch": 0.04092856686065102,
+      "grad_norm": 0.051317401230335236,
+      "learning_rate": 4.092071611253197e-05,
+      "loss": 1.9454,
+      "step": 80
+    },
+    {
+      "epoch": 0.0419517810321673,
+      "grad_norm": 0.03843973949551582,
+      "learning_rate": 4.194373401534527e-05,
+      "loss": 1.9535,
+      "step": 82
+    },
+    {
+      "epoch": 0.04297499520368357,
+      "grad_norm": 0.04338320344686508,
+      "learning_rate": 4.296675191815857e-05,
+      "loss": 1.9017,
+      "step": 84
+    },
+    {
+      "epoch": 0.04399820937519985,
+      "grad_norm": 0.0422111339867115,
+      "learning_rate": 4.398976982097187e-05,
+      "loss": 1.9806,
+      "step": 86
+    },
+    {
+      "epoch": 0.04502142354671612,
+      "grad_norm": 0.043594423681497574,
+      "learning_rate": 4.501278772378517e-05,
+      "loss": 1.9809,
+      "step": 88
+    },
+    {
+      "epoch": 0.0460446377182324,
+      "grad_norm": 0.050932493060827255,
+      "learning_rate": 4.603580562659847e-05,
+      "loss": 2.002,
+      "step": 90
+    },
+    {
+      "epoch": 0.04706785188974867,
+      "grad_norm": 0.039923008531332016,
+      "learning_rate": 4.705882352941177e-05,
+      "loss": 1.9898,
+      "step": 92
+    },
+    {
+      "epoch": 0.048091066061264946,
+      "grad_norm": 0.04199720919132233,
+      "learning_rate": 4.8081841432225067e-05,
+      "loss": 1.9375,
+      "step": 94
+    },
+    {
+      "epoch": 0.04911428023278122,
+      "grad_norm": 0.03885011374950409,
+      "learning_rate": 4.9104859335038366e-05,
+      "loss": 1.9594,
+      "step": 96
+    },
+    {
+      "epoch": 0.0501374944042975,
+      "grad_norm": 0.04459952563047409,
+      "learning_rate": 5.0127877237851665e-05,
+      "loss": 1.9327,
+      "step": 98
+    },
+    {
+      "epoch": 0.05116070857581378,
+      "grad_norm": 0.04154925048351288,
+      "learning_rate": 5.1150895140664964e-05,
+      "loss": 1.9385,
+      "step": 100
+    },
+    {
+      "epoch": 0.05218392274733005,
+      "grad_norm": 0.04149138927459717,
+      "learning_rate": 5.217391304347826e-05,
+      "loss": 1.9251,
+      "step": 102
+    },
+    {
+      "epoch": 0.05320713691884633,
+      "grad_norm": 0.05338102579116821,
+      "learning_rate": 5.319693094629157e-05,
+      "loss": 1.9211,
+      "step": 104
+    },
+    {
+      "epoch": 0.0542303510903626,
+      "grad_norm": 0.04964439943432808,
+      "learning_rate": 5.421994884910486e-05,
+      "loss": 1.8863,
+      "step": 106
+    },
+    {
+      "epoch": 0.05525356526187888,
+      "grad_norm": 0.040731314569711685,
+      "learning_rate": 5.5242966751918154e-05,
+      "loss": 1.9002,
+      "step": 108
+    },
+    {
+      "epoch": 0.05627677943339515,
+      "grad_norm": 0.05813027173280716,
+      "learning_rate": 5.626598465473146e-05,
+      "loss": 1.8944,
+      "step": 110
+    },
+    {
+      "epoch": 0.05729999360491143,
+      "grad_norm": 0.04966093972325325,
+      "learning_rate": 5.728900255754476e-05,
+      "loss": 1.898,
+      "step": 112
+    },
+    {
+      "epoch": 0.0583232077764277,
+      "grad_norm": 0.050573479384183884,
+      "learning_rate": 5.8312020460358065e-05,
+      "loss": 1.8778,
+      "step": 114
+    },
+    {
+      "epoch": 0.05934642194794398,
+      "grad_norm": 0.05025520175695419,
+      "learning_rate": 5.933503836317136e-05,
+      "loss": 1.9044,
+      "step": 116
+    },
+    {
+      "epoch": 0.06036963611946025,
+      "grad_norm": 0.05153055489063263,
+      "learning_rate": 6.035805626598465e-05,
+      "loss": 1.9045,
+      "step": 118
+    },
+    {
+      "epoch": 0.06139285029097653,
+      "grad_norm": 0.051311247050762177,
+      "learning_rate": 6.138107416879796e-05,
+      "loss": 1.9077,
+      "step": 120
+    },
+    {
+      "epoch": 0.06241606446249281,
+      "grad_norm": 0.05084897577762604,
+      "learning_rate": 6.240409207161125e-05,
+      "loss": 1.8538,
+      "step": 122
+    },
+    {
+      "epoch": 0.06343927863400908,
+      "grad_norm": 0.05961287021636963,
+      "learning_rate": 6.342710997442456e-05,
+      "loss": 1.8792,
+      "step": 124
+    },
+    {
+      "epoch": 0.06446249280552535,
+      "grad_norm": 0.05775010585784912,
+      "learning_rate": 6.445012787723786e-05,
+      "loss": 1.8587,
+      "step": 126
+    },
+    {
+      "epoch": 0.06548570697704163,
+      "grad_norm": 0.09344275295734406,
+      "learning_rate": 6.547314578005116e-05,
+      "loss": 1.8454,
+      "step": 128
+    },
+    {
+      "epoch": 0.0665089211485579,
+      "grad_norm": 0.0748172476887703,
+      "learning_rate": 6.649616368286446e-05,
+      "loss": 1.8998,
+      "step": 130
+    },
+    {
+      "epoch": 0.06753213532007418,
+      "grad_norm": 0.07188538461923599,
+      "learning_rate": 6.751918158567774e-05,
+      "loss": 1.8219,
+      "step": 132
+    },
+    {
+      "epoch": 0.06855534949159046,
+      "grad_norm": 0.05799673870205879,
+      "learning_rate": 6.854219948849106e-05,
+      "loss": 1.8549,
+      "step": 134
+    },
+    {
+      "epoch": 0.06957856366310673,
+      "grad_norm": 0.07886774092912674,
+      "learning_rate": 6.956521739130436e-05,
+      "loss": 1.8885,
+      "step": 136
+    },
+    {
+      "epoch": 0.07060177783462301,
+      "grad_norm": 0.0599171444773674,
+      "learning_rate": 7.058823529411765e-05,
+      "loss": 1.829,
+      "step": 138
+    },
+    {
+      "epoch": 0.07162499200613928,
+      "grad_norm": 0.07810111343860626,
+      "learning_rate": 7.161125319693095e-05,
+      "loss": 1.8878,
+      "step": 140
+    },
+    {
+      "epoch": 0.07264820617765556,
+      "grad_norm": 0.062123704701662064,
+      "learning_rate": 7.263427109974424e-05,
+      "loss": 1.8633,
+      "step": 142
+    },
+    {
+      "epoch": 0.07367142034917183,
+      "grad_norm": 0.08402098715305328,
+      "learning_rate": 7.365728900255755e-05,
+      "loss": 1.8377,
+      "step": 144
+    },
+    {
+      "epoch": 0.07469463452068811,
+      "grad_norm": 0.06189502775669098,
+      "learning_rate": 7.468030690537085e-05,
+      "loss": 1.8683,
+      "step": 146
+    },
+    {
+      "epoch": 0.07571784869220438,
+      "grad_norm": 0.07368986308574677,
+      "learning_rate": 7.570332480818415e-05,
+      "loss": 1.8636,
+      "step": 148
+    },
+    {
+      "epoch": 0.07674106286372066,
+      "grad_norm": 0.06430894136428833,
+      "learning_rate": 7.672634271099745e-05,
+      "loss": 1.8341,
+      "step": 150
+    },
+    {
+      "epoch": 0.07776427703523693,
+      "grad_norm": 0.05924483761191368,
+      "learning_rate": 7.774936061381073e-05,
+      "loss": 1.9151,
+      "step": 152
+    },
+    {
+      "epoch": 0.07878749120675321,
+      "grad_norm": 0.06166929751634598,
+      "learning_rate": 7.877237851662405e-05,
+      "loss": 1.8306,
+      "step": 154
+    },
+    {
+      "epoch": 0.0798107053782695,
+      "grad_norm": 0.07514499127864838,
+      "learning_rate": 7.979539641943735e-05,
+      "loss": 1.8572,
+      "step": 156
+    },
+    {
+      "epoch": 0.08083391954978576,
+      "grad_norm": 0.06925056874752045,
+      "learning_rate": 8.081841432225065e-05,
+      "loss": 1.8449,
+      "step": 158
+    },
+    {
+      "epoch": 0.08185713372130204,
+      "grad_norm": 0.08889607340097427,
+      "learning_rate": 8.184143222506395e-05,
+      "loss": 1.8217,
+      "step": 160
+    },
+    {
+      "epoch": 0.08288034789281831,
+      "grad_norm": 0.11205849796533585,
+      "learning_rate": 8.286445012787724e-05,
+      "loss": 1.7859,
+      "step": 162
+    },
+    {
+      "epoch": 0.0839035620643346,
+      "grad_norm": 0.13293609023094177,
+      "learning_rate": 8.388746803069054e-05,
+      "loss": 1.8245,
+      "step": 164
+    },
+    {
+      "epoch": 0.08492677623585086,
+      "grad_norm": 0.14082959294319153,
+      "learning_rate": 8.491048593350384e-05,
+      "loss": 1.8077,
+      "step": 166
+    },
+    {
+      "epoch": 0.08594999040736714,
+      "grad_norm": 0.0726478174328804,
+      "learning_rate": 8.593350383631714e-05,
+      "loss": 1.8081,
+      "step": 168
+    },
+    {
+      "epoch": 0.08697320457888341,
+      "grad_norm": 0.21175715327262878,
+      "learning_rate": 8.695652173913044e-05,
+      "loss": 1.8289,
+      "step": 170
+    },
+    {
+      "epoch": 0.0879964187503997,
+      "grad_norm": 0.19227363169193268,
+      "learning_rate": 8.797953964194374e-05,
+      "loss": 1.8092,
+      "step": 172
+    },
+    {
+      "epoch": 0.08901963292191598,
+      "grad_norm": 0.13788004219532013,
+      "learning_rate": 8.900255754475704e-05,
+      "loss": 1.7986,
+      "step": 174
+    },
+    {
+      "epoch": 0.09004284709343224,
+      "grad_norm": 0.09351494908332825,
+      "learning_rate": 9.002557544757034e-05,
+      "loss": 1.8077,
+      "step": 176
+    },
+    {
+      "epoch": 0.09106606126494853,
+      "grad_norm": 0.09681002050638199,
+      "learning_rate": 9.104859335038364e-05,
+      "loss": 1.794,
+      "step": 178
+    },
+    {
+      "epoch": 0.0920892754364648,
+      "grad_norm": 0.061654381453990936,
+      "learning_rate": 9.207161125319694e-05,
+      "loss": 1.7935,
+      "step": 180
+    },
+    {
+      "epoch": 0.09311248960798107,
+      "grad_norm": 0.06282493472099304,
+      "learning_rate": 9.309462915601024e-05,
+      "loss": 1.7758,
+      "step": 182
+    },
+    {
+      "epoch": 0.09413570377949734,
+      "grad_norm": 0.08118202537298203,
+      "learning_rate": 9.411764705882353e-05,
+      "loss": 1.8209,
+      "step": 184
+    },
+    {
+      "epoch": 0.09515891795101362,
+      "grad_norm": 0.0755864828824997,
+      "learning_rate": 9.514066496163683e-05,
+      "loss": 1.7672,
+      "step": 186
+    },
+    {
+      "epoch": 0.09618213212252989,
+      "grad_norm": 0.07810387760400772,
+      "learning_rate": 9.616368286445013e-05,
+      "loss": 1.7655,
+      "step": 188
+    },
+    {
+      "epoch": 0.09720534629404617,
+      "grad_norm": 0.08016899228096008,
+      "learning_rate": 9.718670076726343e-05,
+      "loss": 1.7818,
+      "step": 190
+    },
+    {
+      "epoch": 0.09822856046556244,
+      "grad_norm": 0.07527964562177658,
+      "learning_rate": 9.820971867007673e-05,
+      "loss": 1.7386,
+      "step": 192
+    },
+    {
+      "epoch": 0.09925177463707872,
+      "grad_norm": 0.08135760575532913,
+      "learning_rate": 9.923273657289003e-05,
+      "loss": 1.7678,
+      "step": 194
+    },
+    {
+      "epoch": 0.100274988808595,
+      "grad_norm": 0.06465744972229004,
+      "learning_rate": 0.00010025575447570333,
+      "loss": 1.8469,
+      "step": 196
+    },
+    {
+      "epoch": 0.10129820298011127,
+      "grad_norm": 0.0678311362862587,
+      "learning_rate": 0.00010127877237851664,
+      "loss": 1.7856,
+      "step": 198
+    },
+    {
+      "epoch": 0.10232141715162756,
+      "grad_norm": 0.06425610929727554,
+      "learning_rate": 0.00010230179028132993,
+      "loss": 1.7542,
+      "step": 200
+    },
+    {
+      "epoch": 0.10334463132314382,
+      "grad_norm": 0.06820003688335419,
+      "learning_rate": 0.00010332480818414323,
+      "loss": 1.783,
+      "step": 202
+    },
+    {
+      "epoch": 0.1043678454946601,
+      "grad_norm": 0.0690922886133194,
+      "learning_rate": 0.00010434782608695653,
+      "loss": 1.7612,
+      "step": 204
+    },
+    {
+      "epoch": 0.10539105966617637,
+      "grad_norm": 0.06488107144832611,
+      "learning_rate": 0.00010537084398976983,
+      "loss": 1.7648,
+      "step": 206
+    },
+    {
+      "epoch": 0.10641427383769266,
+      "grad_norm": 0.08278009295463562,
+      "learning_rate": 0.00010639386189258314,
+      "loss": 1.7661,
+      "step": 208
+    },
+    {
+      "epoch": 0.10743748800920892,
+      "grad_norm": 0.08722035586833954,
+      "learning_rate": 0.00010741687979539642,
+      "loss": 1.7578,
+      "step": 210
+    },
+    {
+      "epoch": 0.1084607021807252,
+      "grad_norm": 0.0737011507153511,
+      "learning_rate": 0.00010843989769820972,
+      "loss": 1.7381,
+      "step": 212
+    },
+    {
+      "epoch": 0.10948391635224147,
+      "grad_norm": 0.08060843497514725,
+      "learning_rate": 0.00010946291560102302,
+      "loss": 1.7967,
+      "step": 214
+    },
+    {
+      "epoch": 0.11050713052375775,
+      "grad_norm": 0.10279374569654465,
+      "learning_rate": 0.00011048593350383631,
+      "loss": 1.7703,
+      "step": 216
+    },
+    {
+      "epoch": 0.11153034469527404,
+      "grad_norm": 0.0777791365981102,
+      "learning_rate": 0.00011150895140664963,
+      "loss": 1.8015,
+      "step": 218
+    },
+    {
+      "epoch": 0.1125535588667903,
+      "grad_norm": 0.06883997470140457,
+      "learning_rate": 0.00011253196930946292,
+      "loss": 1.7731,
+      "step": 220
+    },
+    {
+      "epoch": 0.11357677303830659,
+      "grad_norm": 0.06231442466378212,
+      "learning_rate": 0.00011355498721227622,
+      "loss": 1.8063,
+      "step": 222
+    },
+    {
+      "epoch": 0.11459998720982285,
+      "grad_norm": 0.06607846170663834,
+      "learning_rate": 0.00011457800511508952,
+      "loss": 1.7616,
+      "step": 224
+    },
+    {
+      "epoch": 0.11562320138133914,
+      "grad_norm": 0.05903138220310211,
+      "learning_rate": 0.0001156010230179028,
+      "loss": 1.7993,
+      "step": 226
+    },
+    {
+      "epoch": 0.1166464155528554,
+      "grad_norm": 0.07282232493162155,
+      "learning_rate": 0.00011662404092071613,
+      "loss": 1.7374,
+      "step": 228
+    },
+    {
+      "epoch": 0.11766962972437169,
+      "grad_norm": 0.06793032586574554,
+      "learning_rate": 0.00011764705882352942,
+      "loss": 1.7852,
+      "step": 230
+    },
+    {
+      "epoch": 0.11869284389588795,
+      "grad_norm": 0.06404048949480057,
+      "learning_rate": 0.00011867007672634271,
+      "loss": 1.775,
+      "step": 232
+    },
+    {
+      "epoch": 0.11971605806740424,
+      "grad_norm": 0.08423135429620743,
+      "learning_rate": 0.00011969309462915601,
+      "loss": 1.779,
+      "step": 234
+    },
+    {
+      "epoch": 0.1207392722389205,
+      "grad_norm": 0.0814799889922142,
+      "learning_rate": 0.0001207161125319693,
+      "loss": 1.7082,
+      "step": 236
+    },
+    {
+      "epoch": 0.12176248641043678,
+      "grad_norm": 0.08876215666532516,
+      "learning_rate": 0.00012173913043478263,
+      "loss": 1.7767,
+      "step": 238
+    },
+    {
+      "epoch": 0.12278570058195307,
+      "grad_norm": 0.07051345705986023,
+      "learning_rate": 0.00012276214833759592,
+      "loss": 1.7181,
+      "step": 240
+    },
+    {
+      "epoch": 0.12380891475346933,
+      "grad_norm": 0.07023751735687256,
+      "learning_rate": 0.00012378516624040922,
+      "loss": 1.7308,
+      "step": 242
+    },
+    {
+      "epoch": 0.12483212892498562,
+      "grad_norm": 0.0754849910736084,
+      "learning_rate": 0.0001248081841432225,
+      "loss": 1.7782,
+      "step": 244
+    },
+    {
+      "epoch": 0.1258553430965019,
+      "grad_norm": 0.07223635166883469,
+      "learning_rate": 0.0001258312020460358,
+      "loss": 1.718,
+      "step": 246
+    },
+    {
+      "epoch": 0.12687855726801817,
+      "grad_norm": 0.07007969915866852,
+      "learning_rate": 0.00012685421994884912,
+      "loss": 1.7686,
+      "step": 248
+    },
+    {
+      "epoch": 0.12790177143953443,
+      "grad_norm": 0.06361662596464157,
+      "learning_rate": 0.00012787723785166242,
+      "loss": 1.7217,
+      "step": 250
+    },
+    {
+      "epoch": 0.1289249856110507,
+      "grad_norm": 0.08723774552345276,
+      "learning_rate": 0.00012890025575447572,
+      "loss": 1.7369,
+      "step": 252
+    },
+    {
+      "epoch": 0.129948199782567,
+      "grad_norm": 0.06651702523231506,
+      "learning_rate": 0.000129923273657289,
+      "loss": 1.7163,
+      "step": 254
+    },
+    {
+      "epoch": 0.13097141395408327,
+      "grad_norm": 0.07153377681970596,
+      "learning_rate": 0.00013094629156010232,
+      "loss": 1.7168,
+      "step": 256
+    },
+    {
+      "epoch": 0.13199462812559953,
+      "grad_norm": 0.09451760351657867,
+      "learning_rate": 0.00013196930946291562,
+      "loss": 1.7182,
+      "step": 258
+    },
+    {
+      "epoch": 0.1330178422971158,
+      "grad_norm": 0.08822207897901535,
+      "learning_rate": 0.00013299232736572892,
+      "loss": 1.7483,
+      "step": 260
+    },
+    {
+      "epoch": 0.1340410564686321,
+      "grad_norm": 0.11073771119117737,
+      "learning_rate": 0.00013401534526854221,
+      "loss": 1.7087,
+      "step": 262
+    },
+    {
+      "epoch": 0.13506427064014837,
+      "grad_norm": 0.07717689871788025,
+      "learning_rate": 0.0001350383631713555,
+      "loss": 1.6943,
+      "step": 264
+    },
+    {
+      "epoch": 0.13608748481166463,
+      "grad_norm": 0.09418254345655441,
+      "learning_rate": 0.0001360613810741688,
+      "loss": 1.7084,
+      "step": 266
+    },
+    {
+      "epoch": 0.13711069898318093,
+      "grad_norm": 0.0922132208943367,
+      "learning_rate": 0.0001370843989769821,
+      "loss": 1.7526,
+      "step": 268
+    },
+    {
+      "epoch": 0.1381339131546972,
+      "grad_norm": 0.08973314613103867,
+      "learning_rate": 0.0001381074168797954,
+      "loss": 1.7049,
+      "step": 270
+    },
+    {
+      "epoch": 0.13915712732621346,
+      "grad_norm": 0.0772908478975296,
+      "learning_rate": 0.0001391304347826087,
+      "loss": 1.7444,
+      "step": 272
+    },
+    {
+      "epoch": 0.14018034149772973,
+      "grad_norm": 0.07179255038499832,
+      "learning_rate": 0.00014015345268542198,
+      "loss": 1.7309,
+      "step": 274
+    },
+    {
+      "epoch": 0.14120355566924603,
+      "grad_norm": 0.10786614567041397,
+      "learning_rate": 0.0001411764705882353,
+      "loss": 1.7413,
+      "step": 276
+    },
+    {
+      "epoch": 0.1422267698407623,
+      "grad_norm": 0.0815059244632721,
+      "learning_rate": 0.0001421994884910486,
+      "loss": 1.6895,
+      "step": 278
+    },
+    {
+      "epoch": 0.14324998401227856,
+      "grad_norm": 0.12658405303955078,
+      "learning_rate": 0.0001432225063938619,
+      "loss": 1.7013,
+      "step": 280
+    },
+    {
+      "epoch": 0.14427319818379483,
+      "grad_norm": 0.0807737335562706,
+      "learning_rate": 0.0001442455242966752,
+      "loss": 1.7378,
+      "step": 282
+    },
+    {
+      "epoch": 0.14529641235531113,
+      "grad_norm": 0.09726593643426895,
+      "learning_rate": 0.00014526854219948848,
+      "loss": 1.7143,
+      "step": 284
+    },
+    {
+      "epoch": 0.1463196265268274,
+      "grad_norm": 0.08326689153909683,
+      "learning_rate": 0.0001462915601023018,
+      "loss": 1.7395,
+      "step": 286
+    },
+    {
+      "epoch": 0.14734284069834366,
+      "grad_norm": 0.08783421665430069,
+      "learning_rate": 0.0001473145780051151,
+      "loss": 1.7466,
+      "step": 288
+    },
+    {
+      "epoch": 0.14836605486985996,
+      "grad_norm": 0.0639604702591896,
+      "learning_rate": 0.0001483375959079284,
+      "loss": 1.7019,
+      "step": 290
+    },
+    {
+      "epoch": 0.14938926904137623,
+      "grad_norm": 0.08028368651866913,
+      "learning_rate": 0.0001493606138107417,
+      "loss": 1.7134,
+      "step": 292
+    },
+    {
+      "epoch": 0.1504124832128925,
+      "grad_norm": 0.0739947184920311,
+      "learning_rate": 0.00015038363171355497,
+      "loss": 1.702,
+      "step": 294
+    },
+    {
+      "epoch": 0.15143569738440876,
+      "grad_norm": 0.07335802167654037,
+      "learning_rate": 0.0001514066496163683,
+      "loss": 1.7321,
+      "step": 296
+    },
+    {
+      "epoch": 0.15245891155592506,
+      "grad_norm": 0.07030144333839417,
+      "learning_rate": 0.0001524296675191816,
+      "loss": 1.6654,
+      "step": 298
+    },
+    {
+      "epoch": 0.15348212572744133,
+      "grad_norm": 0.07079968601465225,
+      "learning_rate": 0.0001534526854219949,
+      "loss": 1.7129,
+      "step": 300
+    },
+    {
+      "epoch": 0.1545053398989576,
+      "grad_norm": 0.06605160236358643,
+      "learning_rate": 0.0001544757033248082,
+      "loss": 1.713,
+      "step": 302
+    },
+    {
+      "epoch": 0.15552855407047386,
+      "grad_norm": 0.08417898416519165,
+      "learning_rate": 0.00015549872122762147,
+      "loss": 1.7063,
+      "step": 304
+    },
+    {
+      "epoch": 0.15655176824199016,
+      "grad_norm": 0.07255028933286667,
+      "learning_rate": 0.0001565217391304348,
+      "loss": 1.742,
+      "step": 306
+    },
+    {
+      "epoch": 0.15757498241350643,
+      "grad_norm": 0.06561743468046188,
+      "learning_rate": 0.0001575447570332481,
+      "loss": 1.6912,
+      "step": 308
+    },
+    {
+      "epoch": 0.1585981965850227,
+      "grad_norm": 0.07030262053012848,
+      "learning_rate": 0.0001585677749360614,
+      "loss": 1.7434,
+      "step": 310
+    },
+    {
+      "epoch": 0.159621410756539,
+      "grad_norm": 0.076111800968647,
+      "learning_rate": 0.0001595907928388747,
+      "loss": 1.6783,
+      "step": 312
+    },
+    {
+      "epoch": 0.16064462492805526,
+      "grad_norm": 0.06267083436250687,
+      "learning_rate": 0.000160613810741688,
+      "loss": 1.7193,
+      "step": 314
+    },
+    {
+      "epoch": 0.16166783909957153,
+      "grad_norm": 0.07638990879058838,
+      "learning_rate": 0.0001616368286445013,
+      "loss": 1.7395,
+      "step": 316
+    },
+    {
+      "epoch": 0.1626910532710878,
+      "grad_norm": 0.07447683811187744,
+      "learning_rate": 0.0001626598465473146,
+      "loss": 1.6574,
+      "step": 318
+    },
+    {
+      "epoch": 0.1637142674426041,
+      "grad_norm": 0.07413692772388458,
+      "learning_rate": 0.0001636828644501279,
+      "loss": 1.6868,
+      "step": 320
+    },
+    {
+      "epoch": 0.16473748161412036,
+      "grad_norm": 0.07566969096660614,
+      "learning_rate": 0.0001647058823529412,
+      "loss": 1.779,
+      "step": 322
+    },
+    {
+      "epoch": 0.16576069578563662,
+      "grad_norm": 0.09093326330184937,
+      "learning_rate": 0.0001657289002557545,
+      "loss": 1.6807,
+      "step": 324
+    },
+    {
+      "epoch": 0.16678390995715292,
+      "grad_norm": 0.0930614024400711,
+      "learning_rate": 0.0001667519181585678,
+      "loss": 1.7067,
+      "step": 326
+    },
+    {
+      "epoch": 0.1678071241286692,
+      "grad_norm": 0.06676892936229706,
+      "learning_rate": 0.0001677749360613811,
+      "loss": 1.6609,
+      "step": 328
+    },
+    {
+      "epoch": 0.16883033830018546,
+      "grad_norm": 0.08882534503936768,
+      "learning_rate": 0.00016879795396419439,
+      "loss": 1.6796,
+      "step": 330
+    },
+    {
+      "epoch": 0.16985355247170172,
+      "grad_norm": 0.07226958125829697,
+      "learning_rate": 0.00016982097186700768,
+      "loss": 1.7163,
+      "step": 332
+    },
+    {
+      "epoch": 0.17087676664321802,
+      "grad_norm": 0.07271122932434082,
+      "learning_rate": 0.00017084398976982098,
+      "loss": 1.7585,
+      "step": 334
+    },
+    {
+      "epoch": 0.1718999808147343,
+      "grad_norm": 0.08161617070436478,
+      "learning_rate": 0.00017186700767263428,
+      "loss": 1.6299,
+      "step": 336
+    },
+    {
+      "epoch": 0.17292319498625056,
+      "grad_norm": 0.08419859409332275,
+      "learning_rate": 0.00017289002557544758,
+      "loss": 1.6848,
+      "step": 338
+    },
+    {
+      "epoch": 0.17394640915776682,
+      "grad_norm": 0.08996909856796265,
+      "learning_rate": 0.00017391304347826088,
+      "loss": 1.6582,
+      "step": 340
+    },
+    {
+      "epoch": 0.17496962332928312,
+      "grad_norm": 0.09278981387615204,
+      "learning_rate": 0.00017493606138107418,
+      "loss": 1.7044,
+      "step": 342
+    },
+    {
+      "epoch": 0.1759928375007994,
+      "grad_norm": 0.08387704193592072,
+      "learning_rate": 0.00017595907928388748,
+      "loss": 1.6503,
+      "step": 344
+    },
+    {
+      "epoch": 0.17701605167231566,
+      "grad_norm": 0.07442387193441391,
+      "learning_rate": 0.00017698209718670078,
+      "loss": 1.7058,
+      "step": 346
+    },
+    {
+      "epoch": 0.17803926584383195,
+      "grad_norm": 0.06898263841867447,
+      "learning_rate": 0.00017800511508951408,
+      "loss": 1.6708,
+      "step": 348
+    },
+    {
+      "epoch": 0.17906248001534822,
+      "grad_norm": 0.07982076704502106,
+      "learning_rate": 0.00017902813299232738,
+      "loss": 1.6807,
+      "step": 350
+    },
+    {
+      "epoch": 0.1800856941868645,
+      "grad_norm": 0.07170634716749191,
+      "learning_rate": 0.00018005115089514068,
+      "loss": 1.6753,
+      "step": 352
+    },
+    {
+      "epoch": 0.18110890835838075,
+      "grad_norm": 0.07484789937734604,
+      "learning_rate": 0.00018107416879795398,
+      "loss": 1.6883,
+      "step": 354
+    },
+    {
+      "epoch": 0.18213212252989705,
+      "grad_norm": 0.08390472084283829,
+      "learning_rate": 0.00018209718670076727,
+      "loss": 1.6783,
+      "step": 356
+    },
+    {
+      "epoch": 0.18315533670141332,
+      "grad_norm": 0.0833701565861702,
+      "learning_rate": 0.00018312020460358057,
+      "loss": 1.6804,
+      "step": 358
+    },
+    {
+      "epoch": 0.1841785508729296,
+      "grad_norm": 0.07489979267120361,
+      "learning_rate": 0.00018414322250639387,
+      "loss": 1.6179,
+      "step": 360
+    },
+    {
+      "epoch": 0.18520176504444585,
+      "grad_norm": 0.14307746291160583,
+      "learning_rate": 0.00018516624040920717,
+      "loss": 1.6396,
+      "step": 362
+    },
+    {
+      "epoch": 0.18622497921596215,
+      "grad_norm": 0.13637496531009674,
+      "learning_rate": 0.00018618925831202047,
+      "loss": 1.6425,
+      "step": 364
+    },
+    {
+      "epoch": 0.18724819338747842,
+      "grad_norm": 0.13586537539958954,
+      "learning_rate": 0.00018721227621483377,
+      "loss": 1.6915,
+      "step": 366
+    },
+    {
+      "epoch": 0.18827140755899469,
+      "grad_norm": 0.07892754673957825,
+      "learning_rate": 0.00018823529411764707,
+      "loss": 1.6628,
+      "step": 368
+    },
+    {
+      "epoch": 0.18929462173051098,
+      "grad_norm": 0.20291955769062042,
+      "learning_rate": 0.00018925831202046037,
+      "loss": 1.6572,
+      "step": 370
+    },
+    {
+      "epoch": 0.19031783590202725,
+      "grad_norm": 0.3548440933227539,
+      "learning_rate": 0.00019028132992327367,
+      "loss": 1.6963,
+      "step": 372
+    },
+    {
+      "epoch": 0.19134105007354352,
+      "grad_norm": 0.19051846861839294,
+      "learning_rate": 0.00019130434782608697,
+      "loss": 1.6853,
+      "step": 374
+    },
+    {
+      "epoch": 0.19236426424505979,
+      "grad_norm": 0.3201465308666229,
+      "learning_rate": 0.00019232736572890027,
+      "loss": 1.6549,
+      "step": 376
+    },
+    {
+      "epoch": 0.19338747841657608,
+      "grad_norm": 0.1700785905122757,
+      "learning_rate": 0.00019335038363171357,
+      "loss": 1.658,
+      "step": 378
+    },
+    {
+      "epoch": 0.19441069258809235,
+      "grad_norm": 0.1742287576198578,
+      "learning_rate": 0.00019437340153452686,
+      "loss": 1.6644,
+      "step": 380
+    },
+    {
+      "epoch": 0.19543390675960862,
+      "grad_norm": 0.0945478230714798,
+      "learning_rate": 0.00019539641943734016,
+      "loss": 1.65,
+      "step": 382
+    },
+    {
+      "epoch": 0.19645712093112488,
+      "grad_norm": 0.06995284557342529,
+      "learning_rate": 0.00019641943734015346,
+      "loss": 1.6608,
+      "step": 384
+    },
+    {
+      "epoch": 0.19748033510264118,
+      "grad_norm": 0.07590003311634064,
+      "learning_rate": 0.00019744245524296676,
+      "loss": 1.6367,
+      "step": 386
+    },
+    {
+      "epoch": 0.19850354927415745,
+      "grad_norm": 0.09830451011657715,
+      "learning_rate": 0.00019846547314578006,
+      "loss": 1.6638,
+      "step": 388
+    },
+    {
+      "epoch": 0.19952676344567372,
+      "grad_norm": 0.10720949620008469,
+      "learning_rate": 0.00019948849104859336,
+      "loss": 1.6571,
+      "step": 390
+    },
+    {
+      "epoch": 0.20054997761719,
+      "grad_norm": 0.06915664672851562,
+      "learning_rate": 0.0001999999910488914,
+      "loss": 1.669,
+      "step": 392
+    },
+    {
+      "epoch": 0.20157319178870628,
+      "grad_norm": 0.04960264638066292,
+      "learning_rate": 0.00019999991944003202,
+      "loss": 1.6529,
+      "step": 394
+    },
+    {
+      "epoch": 0.20259640596022255,
+      "grad_norm": 0.05139967054128647,
+      "learning_rate": 0.00019999977622236462,
+      "loss": 1.6053,
+      "step": 396
+    },
+    {
+      "epoch": 0.20361962013173882,
+      "grad_norm": 0.05288904160261154,
+      "learning_rate": 0.0001999995613959917,
+      "loss": 1.6905,
+      "step": 398
+    },
+    {
+      "epoch": 0.2046428343032551,
+      "grad_norm": 0.056239306926727295,
+      "learning_rate": 0.00019999927496106707,
+      "loss": 1.6662,
+      "step": 400
+    },
+    {
+      "epoch": 0.20566604847477138,
+      "grad_norm": 0.06484871357679367,
+      "learning_rate": 0.0001999989169177959,
+      "loss": 1.6803,
+      "step": 402
+    },
+    {
+      "epoch": 0.20668926264628765,
+      "grad_norm": 0.11631152778863907,
+      "learning_rate": 0.00019999848726643454,
+      "loss": 1.6389,
+      "step": 404
+    },
+    {
+      "epoch": 0.20771247681780391,
+      "grad_norm": 0.06311234086751938,
+      "learning_rate": 0.00019999798600729064,
+      "loss": 1.7017,
+      "step": 406
+    },
+    {
+      "epoch": 0.2087356909893202,
+      "grad_norm": 0.06155601888895035,
+      "learning_rate": 0.00019999741314072323,
+      "loss": 1.7014,
+      "step": 408
+    },
+    {
+      "epoch": 0.20975890516083648,
+      "grad_norm": 0.06340397894382477,
+      "learning_rate": 0.00019999676866714244,
+      "loss": 1.6735,
+      "step": 410
+    },
+    {
+      "epoch": 0.21078211933235275,
+      "grad_norm": 0.06068040430545807,
+      "learning_rate": 0.00019999605258700983,
+      "loss": 1.6224,
+      "step": 412
+    },
+    {
+      "epoch": 0.21180533350386904,
+      "grad_norm": 0.06651381403207779,
+      "learning_rate": 0.00019999526490083817,
+      "loss": 1.6279,
+      "step": 414
+    },
+    {
+      "epoch": 0.2128285476753853,
+      "grad_norm": 0.06273658573627472,
+      "learning_rate": 0.00019999440560919152,
+      "loss": 1.6591,
+      "step": 416
+    },
+    {
+      "epoch": 0.21385176184690158,
+      "grad_norm": 0.06989671289920807,
+      "learning_rate": 0.00019999347471268516,
+      "loss": 1.6405,
+      "step": 418
+    },
+    {
+      "epoch": 0.21487497601841785,
+      "grad_norm": 0.06204582378268242,
+      "learning_rate": 0.00019999247221198573,
+      "loss": 1.6512,
+      "step": 420
+    },
+    {
+      "epoch": 0.21589819018993414,
+      "grad_norm": 0.1728357970714569,
+      "learning_rate": 0.00019999139810781112,
+      "loss": 1.6332,
+      "step": 422
+    },
+    {
+      "epoch": 0.2169214043614504,
+      "grad_norm": 0.0696343332529068,
+      "learning_rate": 0.00019999025240093044,
+      "loss": 1.6649,
+      "step": 424
+    },
+    {
+      "epoch": 0.21794461853296668,
+      "grad_norm": 0.060923777520656586,
+      "learning_rate": 0.00019998903509216415,
+      "loss": 1.6269,
+      "step": 426
+    },
+    {
+      "epoch": 0.21896783270448295,
+      "grad_norm": 0.061977677047252655,
+      "learning_rate": 0.00019998774618238394,
+      "loss": 1.6636,
+      "step": 428
+    },
+    {
+      "epoch": 0.21999104687599924,
+      "grad_norm": 0.07241713255643845,
+      "learning_rate": 0.0001999863856725128,
+      "loss": 1.643,
+      "step": 430
+    },
+    {
+      "epoch": 0.2210142610475155,
+      "grad_norm": 0.06513350456953049,
+      "learning_rate": 0.000199984953563525,
+      "loss": 1.6184,
+      "step": 432
+    },
+    {
+      "epoch": 0.22203747521903178,
+      "grad_norm": 0.06109536439180374,
+      "learning_rate": 0.000199983449856446,
+      "loss": 1.6734,
+      "step": 434
+    },
+    {
+      "epoch": 0.22306068939054807,
+      "grad_norm": 0.09125282615423203,
+      "learning_rate": 0.0001999818745523526,
+      "loss": 1.6617,
+      "step": 436
+    },
+    {
+      "epoch": 0.22408390356206434,
+      "grad_norm": 0.05963214859366417,
+      "learning_rate": 0.00019998022765237288,
+      "loss": 1.648,
+      "step": 438
+    },
+    {
+      "epoch": 0.2251071177335806,
+      "grad_norm": 0.18775390088558197,
+      "learning_rate": 0.00019997850915768613,
+      "loss": 1.6599,
+      "step": 440
+    },
+    {
+      "epoch": 0.22613033190509688,
+      "grad_norm": 0.05968334153294563,
+      "learning_rate": 0.00019997671906952298,
+      "loss": 1.6072,
+      "step": 442
+    },
+    {
+      "epoch": 0.22715354607661317,
+      "grad_norm": 0.05431201308965683,
+      "learning_rate": 0.0001999748573891653,
+      "loss": 1.6315,
+      "step": 444
+    },
+    {
+      "epoch": 0.22817676024812944,
+      "grad_norm": 0.05960986390709877,
+      "learning_rate": 0.00019997292411794618,
+      "loss": 1.6565,
+      "step": 446
+    },
+    {
+      "epoch": 0.2291999744196457,
+      "grad_norm": 0.07451862096786499,
+      "learning_rate": 0.00019997091925725004,
+      "loss": 1.6793,
+      "step": 448
+    },
+    {
+      "epoch": 0.23022318859116198,
+      "grad_norm": 0.05454723909497261,
+      "learning_rate": 0.0001999688428085125,
+      "loss": 1.6055,
+      "step": 450
+    },
+    {
+      "epoch": 0.23124640276267827,
+      "grad_norm": 0.05422728881239891,
+      "learning_rate": 0.00019996669477322055,
+      "loss": 1.6455,
+      "step": 452
+    },
+    {
+      "epoch": 0.23226961693419454,
+      "grad_norm": 0.06064201146364212,
+      "learning_rate": 0.00019996447515291233,
+      "loss": 1.5895,
+      "step": 454
+    },
+    {
+      "epoch": 0.2332928311057108,
+      "grad_norm": 0.04667961224913597,
+      "learning_rate": 0.0001999621839491773,
+      "loss": 1.652,
+      "step": 456
+    },
+    {
+      "epoch": 0.2343160452772271,
+      "grad_norm": 0.06072809919714928,
+      "learning_rate": 0.00019995982116365616,
+      "loss": 1.6073,
+      "step": 458
+    },
+    {
+      "epoch": 0.23533925944874337,
+      "grad_norm": 0.05477429926395416,
+      "learning_rate": 0.00019995738679804085,
+      "loss": 1.6412,
+      "step": 460
+    },
+    {
+      "epoch": 0.23636247362025964,
+      "grad_norm": 0.08307594060897827,
+      "learning_rate": 0.00019995488085407462,
+      "loss": 1.6396,
+      "step": 462
+    },
+    {
+      "epoch": 0.2373856877917759,
+      "grad_norm": 0.059893883764743805,
+      "learning_rate": 0.00019995230333355192,
+      "loss": 1.6426,
+      "step": 464
+    },
+    {
+      "epoch": 0.2384089019632922,
+      "grad_norm": 0.06132538989186287,
+      "learning_rate": 0.00019994965423831854,
+      "loss": 1.6133,
+      "step": 466
+    },
+    {
+      "epoch": 0.23943211613480847,
+      "grad_norm": 0.07076270133256912,
+      "learning_rate": 0.00019994693357027138,
+      "loss": 1.576,
+      "step": 468
+    },
+    {
+      "epoch": 0.24045533030632474,
+      "grad_norm": 0.06282426416873932,
+      "learning_rate": 0.00019994414133135877,
+      "loss": 1.6373,
+      "step": 470
+    },
+    {
+      "epoch": 0.241478544477841,
+      "grad_norm": 0.058667294681072235,
+      "learning_rate": 0.00019994127752358013,
+      "loss": 1.619,
+      "step": 472
+    },
+    {
+      "epoch": 0.2425017586493573,
+      "grad_norm": 0.08359505236148834,
+      "learning_rate": 0.00019993834214898626,
+      "loss": 1.6225,
+      "step": 474
+    },
+    {
+      "epoch": 0.24352497282087357,
+      "grad_norm": 0.06758000701665878,
+      "learning_rate": 0.00019993533520967912,
+      "loss": 1.5799,
+      "step": 476
+    },
+    {
+      "epoch": 0.24454818699238984,
+      "grad_norm": 0.11436283588409424,
+      "learning_rate": 0.0001999322567078119,
+      "loss": 1.6385,
+      "step": 478
+    },
+    {
+      "epoch": 0.24557140116390613,
+      "grad_norm": 0.05773819610476494,
+      "learning_rate": 0.00019992910664558915,
+      "loss": 1.6022,
+      "step": 480
+    },
+    {
+      "epoch": 0.2465946153354224,
+      "grad_norm": 0.052521176636219025,
+      "learning_rate": 0.00019992588502526658,
+      "loss": 1.6137,
+      "step": 482
+    },
+    {
+      "epoch": 0.24761782950693867,
+      "grad_norm": 0.056573059409856796,
+      "learning_rate": 0.00019992259184915115,
+      "loss": 1.6065,
+      "step": 484
+    },
+    {
+      "epoch": 0.24864104367845494,
+      "grad_norm": 0.05170164257287979,
+      "learning_rate": 0.00019991922711960102,
+      "loss": 1.6325,
+      "step": 486
+    },
+    {
+      "epoch": 0.24966425784997123,
+      "grad_norm": 0.05951111018657684,
+      "learning_rate": 0.00019991579083902572,
+      "loss": 1.6034,
+      "step": 488
+    },
+    {
+      "epoch": 0.2506874720214875,
+      "grad_norm": 0.054325833916664124,
+      "learning_rate": 0.00019991228300988585,
+      "loss": 1.6102,
+      "step": 490
+    },
+    {
+      "epoch": 0.2517106861930038,
+      "grad_norm": 0.07080011814832687,
+      "learning_rate": 0.0001999087036346934,
+      "loss": 1.6302,
+      "step": 492
+    },
+    {
+      "epoch": 0.25273390036452004,
+      "grad_norm": 0.06116727367043495,
+      "learning_rate": 0.00019990505271601144,
+      "loss": 1.6243,
+      "step": 494
+    },
+    {
+      "epoch": 0.25375711453603633,
+      "grad_norm": 0.0602283850312233,
+      "learning_rate": 0.0001999013302564544,
+      "loss": 1.6024,
+      "step": 496
+    },
+    {
+      "epoch": 0.2547803287075526,
+      "grad_norm": 0.06313999742269516,
+      "learning_rate": 0.0001998975362586879,
+      "loss": 1.6238,
+      "step": 498
+    },
+    {
+      "epoch": 0.25580354287906887,
+      "grad_norm": 0.06217190623283386,
+      "learning_rate": 0.00019989367072542876,
+      "loss": 1.6251,
+      "step": 500
+    },
+    {
+      "epoch": 0.25682675705058516,
+      "grad_norm": 0.07256064563989639,
+      "learning_rate": 0.00019988973365944507,
+      "loss": 1.5929,
+      "step": 502
+    },
+    {
+      "epoch": 0.2578499712221014,
+      "grad_norm": 0.062201980501413345,
+      "learning_rate": 0.00019988572506355606,
+      "loss": 1.5933,
+      "step": 504
+    },
+    {
+      "epoch": 0.2588731853936177,
+      "grad_norm": 0.07168910652399063,
+      "learning_rate": 0.00019988164494063226,
+      "loss": 1.6474,
+      "step": 506
+    },
+    {
+      "epoch": 0.259896399565134,
+      "grad_norm": 0.056935928761959076,
+      "learning_rate": 0.00019987749329359548,
+      "loss": 1.5992,
+      "step": 508
+    },
+    {
+      "epoch": 0.26091961373665024,
+      "grad_norm": 0.07088612020015717,
+      "learning_rate": 0.00019987327012541855,
+      "loss": 1.5952,
+      "step": 510
+    },
+    {
+      "epoch": 0.26194282790816653,
+      "grad_norm": 0.06023348495364189,
+      "learning_rate": 0.0001998689754391257,
+      "loss": 1.6064,
+      "step": 512
+    },
+    {
+      "epoch": 0.2629660420796828,
+      "grad_norm": 0.05686601996421814,
+      "learning_rate": 0.0001998646092377923,
+      "loss": 1.5992,
+      "step": 514
+    },
+    {
+      "epoch": 0.26398925625119907,
+      "grad_norm": 0.07028970122337341,
+      "learning_rate": 0.00019986017152454495,
+      "loss": 1.5835,
+      "step": 516
+    },
+    {
+      "epoch": 0.26501247042271536,
+      "grad_norm": 0.0645250454545021,
+      "learning_rate": 0.0001998556623025614,
+      "loss": 1.6055,
+      "step": 518
+    },
+    {
+      "epoch": 0.2660356845942316,
+      "grad_norm": 0.0723612904548645,
+      "learning_rate": 0.00019985108157507067,
+      "loss": 1.6248,
+      "step": 520
+    },
+    {
+      "epoch": 0.2670588987657479,
+      "grad_norm": 0.06222670525312424,
+      "learning_rate": 0.00019984642934535297,
+      "loss": 1.6411,
+      "step": 522
+    },
+    {
+      "epoch": 0.2680821129372642,
+      "grad_norm": 0.057786975055933,
+      "learning_rate": 0.00019984170561673976,
+      "loss": 1.6313,
+      "step": 524
+    },
+    {
+      "epoch": 0.26910532710878043,
+      "grad_norm": 0.061039313673973083,
+      "learning_rate": 0.00019983691039261357,
+      "loss": 1.5896,
+      "step": 526
+    },
+    {
+      "epoch": 0.27012854128029673,
+      "grad_norm": 0.04816308245062828,
+      "learning_rate": 0.00019983204367640824,
+      "loss": 1.5986,
+      "step": 528
+    },
+    {
+      "epoch": 0.271151755451813,
+      "grad_norm": 0.06095914542675018,
+      "learning_rate": 0.0001998271054716088,
+      "loss": 1.5995,
+      "step": 530
+    },
+    {
+      "epoch": 0.27217496962332927,
+      "grad_norm": 0.05422305688261986,
+      "learning_rate": 0.00019982209578175137,
+      "loss": 1.6047,
+      "step": 532
+    },
+    {
+      "epoch": 0.27319818379484556,
+      "grad_norm": 0.05381491780281067,
+      "learning_rate": 0.0001998170146104234,
+      "loss": 1.5748,
+      "step": 534
+    },
+    {
+      "epoch": 0.27422139796636186,
+      "grad_norm": 0.08168444782495499,
+      "learning_rate": 0.0001998118619612634,
+      "loss": 1.5941,
+      "step": 536
+    },
+    {
+      "epoch": 0.2752446121378781,
+      "grad_norm": 0.05323650687932968,
+      "learning_rate": 0.00019980663783796118,
+      "loss": 1.6015,
+      "step": 538
+    },
+    {
+      "epoch": 0.2762678263093944,
+      "grad_norm": 0.08093535900115967,
+      "learning_rate": 0.0001998013422442577,
+      "loss": 1.6325,
+      "step": 540
+    },
+    {
+      "epoch": 0.27729104048091063,
+      "grad_norm": 0.05909120664000511,
+      "learning_rate": 0.00019979597518394491,
+      "loss": 1.6684,
+      "step": 542
+    },
+    {
+      "epoch": 0.27831425465242693,
+      "grad_norm": 0.0684690847992897,
+      "learning_rate": 0.00019979053666086634,
+      "loss": 1.6682,
+      "step": 544
+    },
+    {
+      "epoch": 0.2793374688239432,
+      "grad_norm": 0.05854607746005058,
+      "learning_rate": 0.00019978502667891625,
+      "loss": 1.6133,
+      "step": 546
+    },
+    {
+      "epoch": 0.28036068299545946,
+      "grad_norm": 0.05019630119204521,
+      "learning_rate": 0.00019977944524204037,
+      "loss": 1.5968,
+      "step": 548
+    },
+    {
+      "epoch": 0.28138389716697576,
+      "grad_norm": 0.0662982240319252,
+      "learning_rate": 0.00019977379235423551,
+      "loss": 1.589,
+      "step": 550
+    },
+    {
+      "epoch": 0.28240711133849206,
+      "grad_norm": 0.049058698117733,
+      "learning_rate": 0.00019976806801954964,
+      "loss": 1.5979,
+      "step": 552
+    },
+    {
+      "epoch": 0.2834303255100083,
+      "grad_norm": 0.058459024876356125,
+      "learning_rate": 0.00019976227224208183,
+      "loss": 1.5813,
+      "step": 554
+    },
+    {
+      "epoch": 0.2844535396815246,
+      "grad_norm": 0.048455361276865005,
+      "learning_rate": 0.00019975640502598244,
+      "loss": 1.5652,
+      "step": 556
+    },
+    {
+      "epoch": 0.2854767538530409,
+      "grad_norm": 0.06029395014047623,
+      "learning_rate": 0.00019975046637545288,
+      "loss": 1.6166,
+      "step": 558
+    },
+    {
+      "epoch": 0.28649996802455713,
+      "grad_norm": 0.05902372673153877,
+      "learning_rate": 0.00019974445629474574,
+      "loss": 1.5955,
+      "step": 560
+    },
+    {
+      "epoch": 0.2875231821960734,
+      "grad_norm": 0.04898110404610634,
+      "learning_rate": 0.0001997383747881648,
+      "loss": 1.5554,
+      "step": 562
+    },
+    {
+      "epoch": 0.28854639636758966,
+      "grad_norm": 0.07228821516036987,
+      "learning_rate": 0.00019973222186006498,
+      "loss": 1.6178,
+      "step": 564
+    },
+    {
+      "epoch": 0.28956961053910596,
+      "grad_norm": 0.07162781804800034,
+      "learning_rate": 0.00019972599751485226,
+      "loss": 1.6128,
+      "step": 566
+    },
+    {
+      "epoch": 0.29059282471062226,
+      "grad_norm": 0.047708939760923386,
+      "learning_rate": 0.00019971970175698385,
+      "loss": 1.5776,
+      "step": 568
+    },
+    {
+      "epoch": 0.2916160388821385,
+      "grad_norm": 0.05930710583925247,
+      "learning_rate": 0.0001997133345909681,
+      "loss": 1.6095,
+      "step": 570
+    },
+    {
+      "epoch": 0.2926392530536548,
+      "grad_norm": 0.057511184364557266,
+      "learning_rate": 0.00019970689602136438,
+      "loss": 1.564,
+      "step": 572
+    },
+    {
+      "epoch": 0.2936624672251711,
+      "grad_norm": 0.0659165233373642,
+      "learning_rate": 0.00019970038605278338,
+      "loss": 1.6057,
+      "step": 574
+    },
+    {
+      "epoch": 0.2946856813966873,
+      "grad_norm": 0.0638163760304451,
+      "learning_rate": 0.00019969380468988677,
+      "loss": 1.5684,
+      "step": 576
+    },
+    {
+      "epoch": 0.2957088955682036,
+      "grad_norm": 0.0477282889187336,
+      "learning_rate": 0.00019968715193738738,
+      "loss": 1.5596,
+      "step": 578
+    },
+    {
+      "epoch": 0.2967321097397199,
+      "grad_norm": 0.055721577256917953,
+      "learning_rate": 0.00019968042780004917,
+      "loss": 1.5854,
+      "step": 580
+    },
+    {
+      "epoch": 0.29775532391123616,
+      "grad_norm": 0.05852237716317177,
+      "learning_rate": 0.00019967363228268724,
+      "loss": 1.5952,
+      "step": 582
+    },
+    {
+      "epoch": 0.29877853808275245,
+      "grad_norm": 0.04583214595913887,
+      "learning_rate": 0.00019966676539016779,
+      "loss": 1.5835,
+      "step": 584
+    },
+    {
+      "epoch": 0.2998017522542687,
+      "grad_norm": 0.052682552486658096,
+      "learning_rate": 0.00019965982712740808,
+      "loss": 1.5932,
+      "step": 586
+    },
+    {
+      "epoch": 0.300824966425785,
+      "grad_norm": 0.06101151555776596,
+      "learning_rate": 0.00019965281749937655,
+      "loss": 1.661,
+      "step": 588
+    },
+    {
+      "epoch": 0.3018481805973013,
+      "grad_norm": 0.052221182733774185,
+      "learning_rate": 0.0001996457365110927,
+      "loss": 1.5834,
+      "step": 590
+    },
+    {
+      "epoch": 0.3028713947688175,
+      "grad_norm": 0.05288353189826012,
+      "learning_rate": 0.00019963858416762717,
+      "loss": 1.561,
+      "step": 592
+    },
+    {
+      "epoch": 0.3038946089403338,
+      "grad_norm": 0.05072011053562164,
+      "learning_rate": 0.00019963136047410166,
+      "loss": 1.5542,
+      "step": 594
+    },
+    {
+      "epoch": 0.3049178231118501,
+      "grad_norm": 0.05482899025082588,
+      "learning_rate": 0.00019962406543568898,
+      "loss": 1.6568,
+      "step": 596
+    },
+    {
+      "epoch": 0.30594103728336636,
+      "grad_norm": 0.06114513427019119,
+      "learning_rate": 0.00019961669905761302,
+      "loss": 1.5619,
+      "step": 598
+    },
+    {
+      "epoch": 0.30696425145488265,
+      "grad_norm": 0.14878755807876587,
+      "learning_rate": 0.00019960926134514873,
+      "loss": 1.6222,
+      "step": 600
+    },
+    {
+      "epoch": 0.30798746562639895,
+      "grad_norm": 0.05369825288653374,
+      "learning_rate": 0.00019960175230362222,
+      "loss": 1.574,
+      "step": 602
+    },
+    {
+      "epoch": 0.3090106797979152,
+      "grad_norm": 0.04912363365292549,
+      "learning_rate": 0.00019959417193841063,
+      "loss": 1.5644,
+      "step": 604
+    },
+    {
+      "epoch": 0.3100338939694315,
+      "grad_norm": 0.055376555770635605,
+      "learning_rate": 0.00019958652025494212,
+      "loss": 1.5978,
+      "step": 606
+    },
+    {
+      "epoch": 0.3110571081409477,
+      "grad_norm": 0.054994821548461914,
+      "learning_rate": 0.00019957879725869602,
+      "loss": 1.6327,
+      "step": 608
+    },
+    {
+      "epoch": 0.312080322312464,
+      "grad_norm": 0.05939999222755432,
+      "learning_rate": 0.00019957100295520266,
+      "loss": 1.5706,
+      "step": 610
+    },
+    {
+      "epoch": 0.3131035364839803,
+      "grad_norm": 0.05616987124085426,
+      "learning_rate": 0.00019956313735004346,
+      "loss": 1.5932,
+      "step": 612
+    },
+    {
+      "epoch": 0.31412675065549656,
+      "grad_norm": 0.10900183767080307,
+      "learning_rate": 0.00019955520044885087,
+      "loss": 1.5757,
+      "step": 614
+    },
+    {
+      "epoch": 0.31514996482701285,
+      "grad_norm": 1.115419864654541,
+      "learning_rate": 0.00019954719225730847,
+      "loss": 1.666,
+      "step": 616
+    },
+    {
+      "epoch": 0.31617317899852915,
+      "grad_norm": 0.13737702369689941,
+      "learning_rate": 0.00019953911278115078,
+      "loss": 1.6406,
+      "step": 618
+    },
+    {
+      "epoch": 0.3171963931700454,
+      "grad_norm": 0.18733379244804382,
+      "learning_rate": 0.00019953096202616344,
+      "loss": 1.6465,
+      "step": 620
+    },
+    {
+      "epoch": 0.3182196073415617,
+      "grad_norm": 0.513283371925354,
+      "learning_rate": 0.0001995227399981831,
+      "loss": 1.6477,
+      "step": 622
+    },
+    {
+      "epoch": 0.319242821513078,
+      "grad_norm": 0.30918484926223755,
+      "learning_rate": 0.0001995144467030975,
+      "loss": 1.6566,
+      "step": 624
+    },
+    {
+      "epoch": 0.3202660356845942,
+      "grad_norm": 0.0951157733798027,
+      "learning_rate": 0.00019950608214684535,
+      "loss": 1.6034,
+      "step": 626
+    },
+    {
+      "epoch": 0.3212892498561105,
+      "grad_norm": 0.05696268379688263,
+      "learning_rate": 0.00019949764633541643,
+      "loss": 1.6518,
+      "step": 628
+    },
+    {
+      "epoch": 0.32231246402762675,
+      "grad_norm": 0.06777111440896988,
+      "learning_rate": 0.00019948913927485146,
+      "loss": 1.6585,
+      "step": 630
+    },
+    {
+      "epoch": 0.32333567819914305,
+      "grad_norm": 0.055656664073467255,
+      "learning_rate": 0.00019948056097124234,
+      "loss": 1.5623,
+      "step": 632
+    },
+    {
+      "epoch": 0.32435889237065935,
+      "grad_norm": 0.05220302939414978,
+      "learning_rate": 0.00019947191143073186,
+      "loss": 1.6067,
+      "step": 634
+    },
+    {
+      "epoch": 0.3253821065421756,
+      "grad_norm": 0.05276400223374367,
+      "learning_rate": 0.00019946319065951382,
+      "loss": 1.5997,
+      "step": 636
+    },
+    {
+      "epoch": 0.3264053207136919,
+      "grad_norm": 0.06689111888408661,
+      "learning_rate": 0.00019945439866383312,
+      "loss": 1.5621,
+      "step": 638
+    },
+    {
+      "epoch": 0.3274285348852082,
+      "grad_norm": 0.07574088871479034,
+      "learning_rate": 0.00019944553544998562,
+      "loss": 1.5873,
+      "step": 640
+    },
+    {
+      "epoch": 0.3284517490567244,
+      "grad_norm": 0.1480696201324463,
+      "learning_rate": 0.0001994366010243181,
+      "loss": 1.6142,
+      "step": 642
+    },
+    {
+      "epoch": 0.3294749632282407,
+      "grad_norm": 0.2425205558538437,
+      "learning_rate": 0.00019942759539322844,
+      "loss": 1.6513,
+      "step": 644
+    },
+    {
+      "epoch": 0.330498177399757,
+      "grad_norm": 0.10395582765340805,
+      "learning_rate": 0.00019941851856316548,
+      "loss": 1.6186,
+      "step": 646
+    },
+    {
+      "epoch": 0.33152139157127325,
+      "grad_norm": 0.07959388941526413,
+      "learning_rate": 0.000199409370540629,
+      "loss": 1.5954,
+      "step": 648
+    },
+    {
+      "epoch": 0.33254460574278955,
+      "grad_norm": 0.08391022682189941,
+      "learning_rate": 0.00019940015133216985,
+      "loss": 1.6359,
+      "step": 650
+    },
+    {
+      "epoch": 0.33356781991430584,
+      "grad_norm": 0.10863954573869705,
+      "learning_rate": 0.00019939086094438975,
+      "loss": 1.5591,
+      "step": 652
+    },
+    {
+      "epoch": 0.3345910340858221,
+      "grad_norm": 0.0719527155160904,
+      "learning_rate": 0.00019938149938394145,
+      "loss": 1.5536,
+      "step": 654
+    },
+    {
+      "epoch": 0.3356142482573384,
+      "grad_norm": 0.054009951651096344,
+      "learning_rate": 0.0001993720666575287,
+      "loss": 1.5925,
+      "step": 656
+    },
+    {
+      "epoch": 0.3366374624288546,
+      "grad_norm": 0.06805548816919327,
+      "learning_rate": 0.00019936256277190608,
+      "loss": 1.6079,
+      "step": 658
+    },
+    {
+      "epoch": 0.3376606766003709,
+      "grad_norm": 0.057809535413980484,
+      "learning_rate": 0.0001993529877338793,
+      "loss": 1.5569,
+      "step": 660
+    },
+    {
+      "epoch": 0.3386838907718872,
+      "grad_norm": 0.05796423181891441,
+      "learning_rate": 0.0001993433415503049,
+      "loss": 1.6148,
+      "step": 662
+    },
+    {
+      "epoch": 0.33970710494340345,
+      "grad_norm": 0.0450466088950634,
+      "learning_rate": 0.0001993336242280904,
+      "loss": 1.6024,
+      "step": 664
+    },
+    {
+      "epoch": 0.34073031911491974,
+      "grad_norm": 0.05356905981898308,
+      "learning_rate": 0.00019932383577419432,
+      "loss": 1.5696,
+      "step": 666
+    },
+    {
+      "epoch": 0.34175353328643604,
+      "grad_norm": 0.04915151000022888,
+      "learning_rate": 0.00019931397619562597,
+      "loss": 1.601,
+      "step": 668
+    },
+    {
+      "epoch": 0.3427767474579523,
+      "grad_norm": 0.2238396257162094,
+      "learning_rate": 0.00019930404549944574,
+      "loss": 1.6144,
+      "step": 670
+    },
+    {
+      "epoch": 0.3437999616294686,
+      "grad_norm": 0.07003773748874664,
+      "learning_rate": 0.00019929404369276488,
+      "loss": 1.6132,
+      "step": 672
+    },
+    {
+      "epoch": 0.34482317580098487,
+      "grad_norm": 0.07609610259532928,
+      "learning_rate": 0.00019928397078274555,
+      "loss": 1.5351,
+      "step": 674
+    },
+    {
+      "epoch": 0.3458463899725011,
+      "grad_norm": 0.057023849338293076,
+      "learning_rate": 0.00019927382677660088,
+      "loss": 1.5643,
+      "step": 676
+    },
+    {
+      "epoch": 0.3468696041440174,
+      "grad_norm": 0.0493864081799984,
+      "learning_rate": 0.0001992636116815948,
+      "loss": 1.5837,
+      "step": 678
+    },
+    {
+      "epoch": 0.34789281831553365,
+      "grad_norm": 0.05028039962053299,
+      "learning_rate": 0.00019925332550504234,
+      "loss": 1.6003,
+      "step": 680
+    },
+    {
+      "epoch": 0.34891603248704994,
+      "grad_norm": 0.050032299011945724,
+      "learning_rate": 0.00019924296825430925,
+      "loss": 1.5583,
+      "step": 682
+    },
+    {
+      "epoch": 0.34993924665856624,
+      "grad_norm": 0.04059847444295883,
+      "learning_rate": 0.00019923253993681225,
+      "loss": 1.6101,
+      "step": 684
+    },
+    {
+      "epoch": 0.3509624608300825,
+      "grad_norm": 0.045728132128715515,
+      "learning_rate": 0.00019922204056001895,
+      "loss": 1.5973,
+      "step": 686
+    },
+    {
+      "epoch": 0.3519856750015988,
+      "grad_norm": 0.04674302786588669,
+      "learning_rate": 0.0001992114701314478,
+      "loss": 1.5785,
+      "step": 688
+    },
+    {
+      "epoch": 0.35300888917311507,
+      "grad_norm": 0.04860880225896835,
+      "learning_rate": 0.00019920082865866818,
+      "loss": 1.5761,
+      "step": 690
+    },
+    {
+      "epoch": 0.3540321033446313,
+      "grad_norm": 0.04689641669392586,
+      "learning_rate": 0.00019919011614930035,
+      "loss": 1.6015,
+      "step": 692
+    },
+    {
+      "epoch": 0.3550553175161476,
+      "grad_norm": 0.04507840797305107,
+      "learning_rate": 0.0001991793326110154,
+      "loss": 1.5762,
+      "step": 694
+    },
+    {
+      "epoch": 0.3560785316876639,
+      "grad_norm": 0.04468555748462677,
+      "learning_rate": 0.00019916847805153526,
+      "loss": 1.5615,
+      "step": 696
+    },
+    {
+      "epoch": 0.35710174585918014,
+      "grad_norm": 0.07028740644454956,
+      "learning_rate": 0.00019915755247863285,
+      "loss": 1.6001,
+      "step": 698
+    },
+    {
+      "epoch": 0.35812496003069644,
+      "grad_norm": 0.03917892277240753,
+      "learning_rate": 0.00019914655590013176,
+      "loss": 1.6153,
+      "step": 700
+    },
+    {
+      "epoch": 0.3591481742022127,
+      "grad_norm": 0.06443695724010468,
+      "learning_rate": 0.0001991354883239066,
+      "loss": 1.5588,
+      "step": 702
+    },
+    {
+      "epoch": 0.360171388373729,
+      "grad_norm": 0.04684121161699295,
+      "learning_rate": 0.00019912434975788264,
+      "loss": 1.5726,
+      "step": 704
+    },
+    {
+      "epoch": 0.36119460254524527,
+      "grad_norm": 0.04538768157362938,
+      "learning_rate": 0.00019911314021003613,
+      "loss": 1.592,
+      "step": 706
+    },
+    {
+      "epoch": 0.3622178167167615,
+      "grad_norm": 0.040085602551698685,
+      "learning_rate": 0.0001991018596883941,
+      "loss": 1.577,
+      "step": 708
+    },
+    {
+      "epoch": 0.3632410308882778,
+      "grad_norm": 0.04734279587864876,
+      "learning_rate": 0.00019909050820103442,
+      "loss": 1.6194,
+      "step": 710
+    },
+    {
+      "epoch": 0.3642642450597941,
+      "grad_norm": 0.051557011902332306,
+      "learning_rate": 0.00019907908575608573,
+      "loss": 1.5776,
+      "step": 712
+    },
+    {
+      "epoch": 0.36528745923131034,
+      "grad_norm": 0.042105671018362045,
+      "learning_rate": 0.00019906759236172752,
+      "loss": 1.562,
+      "step": 714
+    },
+    {
+      "epoch": 0.36631067340282664,
+      "grad_norm": 0.04763809219002724,
+      "learning_rate": 0.00019905602802619007,
+      "loss": 1.5727,
+      "step": 716
+    },
+    {
+      "epoch": 0.36733388757434293,
+      "grad_norm": 0.05205756798386574,
+      "learning_rate": 0.00019904439275775452,
+      "loss": 1.5595,
+      "step": 718
+    },
+    {
+      "epoch": 0.3683571017458592,
+      "grad_norm": 0.04210933670401573,
+      "learning_rate": 0.0001990326865647527,
+      "loss": 1.5812,
+      "step": 720
+    },
+    {
+      "epoch": 0.36938031591737547,
+      "grad_norm": 0.04100721701979637,
+      "learning_rate": 0.00019902090945556728,
+      "loss": 1.5492,
+      "step": 722
+    },
+    {
+      "epoch": 0.3704035300888917,
+      "grad_norm": 0.04252148047089577,
+      "learning_rate": 0.0001990090614386318,
+      "loss": 1.5397,
+      "step": 724
+    },
+    {
+      "epoch": 0.371426744260408,
+      "grad_norm": 0.040999703109264374,
+      "learning_rate": 0.00019899714252243035,
+      "loss": 1.533,
+      "step": 726
+    },
+    {
+      "epoch": 0.3724499584319243,
+      "grad_norm": 0.03823763504624367,
+      "learning_rate": 0.00019898515271549804,
+      "loss": 1.5385,
+      "step": 728
+    },
+    {
+      "epoch": 0.37347317260344054,
+      "grad_norm": 0.041486915200948715,
+      "learning_rate": 0.0001989730920264206,
+      "loss": 1.5975,
+      "step": 730
+    },
+    {
+      "epoch": 0.37449638677495684,
+      "grad_norm": 0.042897533625364304,
+      "learning_rate": 0.00019896096046383456,
+      "loss": 1.574,
+      "step": 732
+    },
+    {
+      "epoch": 0.37551960094647313,
+      "grad_norm": 0.05677172914147377,
+      "learning_rate": 0.00019894875803642715,
+      "loss": 1.5564,
+      "step": 734
+    },
+    {
+      "epoch": 0.37654281511798937,
+      "grad_norm": 0.0416000559926033,
+      "learning_rate": 0.00019893648475293648,
+      "loss": 1.5982,
+      "step": 736
+    },
+    {
+      "epoch": 0.37756602928950567,
+      "grad_norm": 0.04389720410108566,
+      "learning_rate": 0.00019892414062215122,
+      "loss": 1.5661,
+      "step": 738
+    },
+    {
+      "epoch": 0.37858924346102196,
+      "grad_norm": 0.048660341650247574,
+      "learning_rate": 0.0001989117256529109,
+      "loss": 1.5554,
+      "step": 740
+    },
+    {
+      "epoch": 0.3796124576325382,
+      "grad_norm": 0.04659014940261841,
+      "learning_rate": 0.00019889923985410576,
+      "loss": 1.5932,
+      "step": 742
+    },
+    {
+      "epoch": 0.3806356718040545,
+      "grad_norm": 0.04693235456943512,
+      "learning_rate": 0.00019888668323467669,
+      "loss": 1.5985,
+      "step": 744
+    },
+    {
+      "epoch": 0.38165888597557074,
+      "grad_norm": 0.05906931310892105,
+      "learning_rate": 0.00019887405580361537,
+      "loss": 1.592,
+      "step": 746
+    },
+    {
+      "epoch": 0.38268210014708703,
+      "grad_norm": 0.0707060918211937,
+      "learning_rate": 0.0001988613575699642,
+      "loss": 1.5491,
+      "step": 748
+    },
+    {
+      "epoch": 0.38370531431860333,
+      "grad_norm": 0.0510844886302948,
+      "learning_rate": 0.00019884858854281613,
+      "loss": 1.5433,
+      "step": 750
+    },
+    {
+      "epoch": 0.38472852849011957,
+      "grad_norm": 0.058799102902412415,
+      "learning_rate": 0.00019883574873131503,
+      "loss": 1.5467,
+      "step": 752
+    },
+    {
+      "epoch": 0.38575174266163587,
+      "grad_norm": 0.04918012022972107,
+      "learning_rate": 0.0001988228381446553,
+      "loss": 1.5685,
+      "step": 754
+    },
+    {
+      "epoch": 0.38677495683315216,
+      "grad_norm": 0.044637810438871384,
+      "learning_rate": 0.00019880985679208207,
+      "loss": 1.5767,
+      "step": 756
+    },
+    {
+      "epoch": 0.3877981710046684,
+      "grad_norm": 0.052684806287288666,
+      "learning_rate": 0.0001987968046828911,
+      "loss": 1.5457,
+      "step": 758
+    },
+    {
+      "epoch": 0.3888213851761847,
+      "grad_norm": 0.045015860348939896,
+      "learning_rate": 0.0001987836818264289,
+      "loss": 1.5136,
+      "step": 760
+    },
+    {
+      "epoch": 0.389844599347701,
+      "grad_norm": 0.0538019984960556,
+      "learning_rate": 0.0001987704882320926,
+      "loss": 1.5673,
+      "step": 762
+    },
+    {
+      "epoch": 0.39086781351921723,
+      "grad_norm": 0.04201149195432663,
+      "learning_rate": 0.00019875722390932997,
+      "loss": 1.5559,
+      "step": 764
+    },
+    {
+      "epoch": 0.39189102769073353,
+      "grad_norm": 0.04188109561800957,
+      "learning_rate": 0.00019874388886763944,
+      "loss": 1.4982,
+      "step": 766
+    },
+    {
+      "epoch": 0.39291424186224977,
+      "grad_norm": 0.0503980815410614,
+      "learning_rate": 0.00019873048311657007,
+      "loss": 1.5018,
+      "step": 768
+    },
+    {
+      "epoch": 0.39393745603376606,
+      "grad_norm": 0.04854050651192665,
+      "learning_rate": 0.0001987170066657216,
+      "loss": 1.5331,
+      "step": 770
+    },
+    {
+      "epoch": 0.39496067020528236,
+      "grad_norm": 0.04634295031428337,
+      "learning_rate": 0.00019870345952474437,
+      "loss": 1.5304,
+      "step": 772
+    },
+    {
+      "epoch": 0.3959838843767986,
+      "grad_norm": 0.04464833438396454,
+      "learning_rate": 0.0001986898417033393,
+      "loss": 1.5518,
+      "step": 774
+    },
+    {
+      "epoch": 0.3970070985483149,
+      "grad_norm": 0.04434438794851303,
+      "learning_rate": 0.00019867615321125795,
+      "loss": 1.5372,
+      "step": 776
+    },
+    {
+      "epoch": 0.3980303127198312,
+      "grad_norm": 0.04564082249999046,
+      "learning_rate": 0.00019866239405830248,
+      "loss": 1.5373,
+      "step": 778
+    },
+    {
+      "epoch": 0.39905352689134743,
+      "grad_norm": 0.042439211159944534,
+      "learning_rate": 0.00019864856425432574,
+      "loss": 1.5682,
+      "step": 780
+    },
+    {
+      "epoch": 0.4000767410628637,
+      "grad_norm": 0.051853910088539124,
+      "learning_rate": 0.00019863466380923105,
+      "loss": 1.5408,
+      "step": 782
+    },
+    {
+      "epoch": 0.40109995523438,
+      "grad_norm": 0.04109041020274162,
+      "learning_rate": 0.00019862069273297232,
+      "loss": 1.5557,
+      "step": 784
+    },
+    {
+      "epoch": 0.40212316940589626,
+      "grad_norm": 0.04249493032693863,
+      "learning_rate": 0.00019860665103555415,
+      "loss": 1.5723,
+      "step": 786
+    },
+    {
+      "epoch": 0.40314638357741256,
+      "grad_norm": 0.041393015533685684,
+      "learning_rate": 0.0001985925387270316,
+      "loss": 1.6034,
+      "step": 788
+    },
+    {
+      "epoch": 0.4041695977489288,
+      "grad_norm": 0.03967997431755066,
+      "learning_rate": 0.00019857835581751037,
+      "loss": 1.5252,
+      "step": 790
+    },
+    {
+      "epoch": 0.4051928119204451,
+      "grad_norm": 0.0383961945772171,
+      "learning_rate": 0.00019856410231714662,
+      "loss": 1.5718,
+      "step": 792
+    },
+    {
+      "epoch": 0.4062160260919614,
+      "grad_norm": 0.04732939228415489,
+      "learning_rate": 0.00019854977823614717,
+      "loss": 1.5473,
+      "step": 794
+    },
+    {
+      "epoch": 0.40723924026347763,
+      "grad_norm": 0.04425951838493347,
+      "learning_rate": 0.00019853538358476932,
+      "loss": 1.5976,
+      "step": 796
+    },
+    {
+      "epoch": 0.4082624544349939,
+      "grad_norm": 0.041833970695734024,
+      "learning_rate": 0.0001985209183733209,
+      "loss": 1.6024,
+      "step": 798
+    },
+    {
+      "epoch": 0.4092856686065102,
+      "grad_norm": 0.04387862607836723,
+      "learning_rate": 0.0001985063826121603,
+      "loss": 1.5384,
+      "step": 800
+    },
+    {
+      "epoch": 0.41030888277802646,
+      "grad_norm": 0.04852529242634773,
+      "learning_rate": 0.00019849177631169643,
+      "loss": 1.5485,
+      "step": 802
+    },
+    {
+      "epoch": 0.41133209694954276,
+      "grad_norm": 0.04267437756061554,
+      "learning_rate": 0.00019847709948238865,
+      "loss": 1.5186,
+      "step": 804
+    },
+    {
+      "epoch": 0.41235531112105905,
+      "grad_norm": 0.04403737559914589,
+      "learning_rate": 0.00019846235213474692,
+      "loss": 1.5374,
+      "step": 806
+    },
+    {
+      "epoch": 0.4133785252925753,
+      "grad_norm": 0.04668973386287689,
+      "learning_rate": 0.00019844753427933164,
+      "loss": 1.5209,
+      "step": 808
+    },
+    {
+      "epoch": 0.4144017394640916,
+      "grad_norm": 0.045447513461112976,
+      "learning_rate": 0.00019843264592675367,
+      "loss": 1.5888,
+      "step": 810
+    },
+    {
+      "epoch": 0.41542495363560783,
+      "grad_norm": 0.04239337146282196,
+      "learning_rate": 0.00019841768708767438,
+      "loss": 1.5866,
+      "step": 812
+    },
+    {
+      "epoch": 0.4164481678071241,
+      "grad_norm": 0.04571668431162834,
+      "learning_rate": 0.0001984026577728057,
+      "loss": 1.5134,
+      "step": 814
+    },
+    {
+      "epoch": 0.4174713819786404,
+      "grad_norm": 0.041478246450424194,
+      "learning_rate": 0.00019838755799290994,
+      "loss": 1.5555,
+      "step": 816
+    },
+    {
+      "epoch": 0.41849459615015666,
+      "grad_norm": 0.04084784537553787,
+      "learning_rate": 0.00019837238775879983,
+      "loss": 1.5847,
+      "step": 818
+    },
+    {
+      "epoch": 0.41951781032167296,
+      "grad_norm": 0.0393175333738327,
+      "learning_rate": 0.00019835714708133862,
+      "loss": 1.5377,
+      "step": 820
+    },
+    {
+      "epoch": 0.42054102449318925,
+      "grad_norm": 0.03987790644168854,
+      "learning_rate": 0.00019834183597143996,
+      "loss": 1.5604,
+      "step": 822
+    },
+    {
+      "epoch": 0.4215642386647055,
+      "grad_norm": 0.04945560172200203,
+      "learning_rate": 0.00019832645444006804,
+      "loss": 1.5239,
+      "step": 824
+    },
+    {
+      "epoch": 0.4225874528362218,
+      "grad_norm": 0.042219970375299454,
+      "learning_rate": 0.00019831100249823733,
+      "loss": 1.5435,
+      "step": 826
+    },
+    {
+      "epoch": 0.4236106670077381,
+      "grad_norm": 0.06793594360351562,
+      "learning_rate": 0.00019829548015701283,
+      "loss": 1.5204,
+      "step": 828
+    },
+    {
+      "epoch": 0.4246338811792543,
+      "grad_norm": 0.04633813723921776,
+      "learning_rate": 0.00019827988742750988,
+      "loss": 1.5494,
+      "step": 830
+    },
+    {
+      "epoch": 0.4256570953507706,
+      "grad_norm": 0.041469499468803406,
+      "learning_rate": 0.0001982642243208943,
+      "loss": 1.5549,
+      "step": 832
+    },
+    {
+      "epoch": 0.42668030952228686,
+      "grad_norm": 0.039512719959020615,
+      "learning_rate": 0.0001982484908483822,
+      "loss": 1.5614,
+      "step": 834
+    },
+    {
+      "epoch": 0.42770352369380316,
+      "grad_norm": 0.04240869730710983,
+      "learning_rate": 0.0001982326870212402,
+      "loss": 1.5597,
+      "step": 836
+    },
+    {
+      "epoch": 0.42872673786531945,
+      "grad_norm": 0.04469761997461319,
+      "learning_rate": 0.00019821681285078522,
+      "loss": 1.575,
+      "step": 838
+    },
+    {
+      "epoch": 0.4297499520368357,
+      "grad_norm": 0.05203311890363693,
+      "learning_rate": 0.00019820086834838456,
+      "loss": 1.5144,
+      "step": 840
+    },
+    {
+      "epoch": 0.430773166208352,
+      "grad_norm": 0.046044569462537766,
+      "learning_rate": 0.00019818485352545592,
+      "loss": 1.5328,
+      "step": 842
+    },
+    {
+      "epoch": 0.4317963803798683,
+      "grad_norm": 0.05522793158888817,
+      "learning_rate": 0.00019816876839346735,
+      "loss": 1.5266,
+      "step": 844
+    },
+    {
+      "epoch": 0.4328195945513845,
+      "grad_norm": 0.04644525796175003,
+      "learning_rate": 0.00019815261296393715,
+      "loss": 1.5682,
+      "step": 846
+    },
+    {
+      "epoch": 0.4338428087229008,
+      "grad_norm": 0.06290300190448761,
+      "learning_rate": 0.00019813638724843413,
+      "loss": 1.5643,
+      "step": 848
+    },
+    {
+      "epoch": 0.4348660228944171,
+      "grad_norm": 0.050486985594034195,
+      "learning_rate": 0.00019812009125857728,
+      "loss": 1.5491,
+      "step": 850
+    },
+    {
+      "epoch": 0.43588923706593335,
+      "grad_norm": 0.05234065279364586,
+      "learning_rate": 0.000198103725006036,
+      "loss": 1.5718,
+      "step": 852
+    },
+    {
+      "epoch": 0.43691245123744965,
+      "grad_norm": 0.05265431106090546,
+      "learning_rate": 0.00019808728850253,
+      "loss": 1.56,
+      "step": 854
+    },
+    {
+      "epoch": 0.4379356654089659,
+      "grad_norm": 0.04220706969499588,
+      "learning_rate": 0.00019807078175982924,
+      "loss": 1.551,
+      "step": 856
+    },
+    {
+      "epoch": 0.4389588795804822,
+      "grad_norm": 0.042153794318437576,
+      "learning_rate": 0.00019805420478975403,
+      "loss": 1.5793,
+      "step": 858
+    },
+    {
+      "epoch": 0.4399820937519985,
+      "grad_norm": 0.04063679277896881,
+      "learning_rate": 0.00019803755760417494,
+      "loss": 1.5404,
+      "step": 860
+    },
+    {
+      "epoch": 0.4410053079235147,
+      "grad_norm": 0.04740441218018532,
+      "learning_rate": 0.0001980208402150128,
+      "loss": 1.526,
+      "step": 862
+    },
+    {
+      "epoch": 0.442028522095031,
+      "grad_norm": 0.04050862789154053,
+      "learning_rate": 0.0001980040526342388,
+      "loss": 1.5357,
+      "step": 864
+    },
+    {
+      "epoch": 0.4430517362665473,
+      "grad_norm": 0.050952885299921036,
+      "learning_rate": 0.00019798719487387428,
+      "loss": 1.5102,
+      "step": 866
+    },
+    {
+      "epoch": 0.44407495043806355,
+      "grad_norm": 0.048501502722501755,
+      "learning_rate": 0.00019797026694599098,
+      "loss": 1.5637,
+      "step": 868
+    },
+    {
+      "epoch": 0.44509816460957985,
+      "grad_norm": 0.03910909220576286,
+      "learning_rate": 0.0001979532688627107,
+      "loss": 1.5367,
+      "step": 870
+    },
+    {
+      "epoch": 0.44612137878109615,
+      "grad_norm": 0.05638305842876434,
+      "learning_rate": 0.0001979362006362056,
+      "loss": 1.5282,
+      "step": 872
+    },
+    {
+      "epoch": 0.4471445929526124,
+      "grad_norm": 0.05307792127132416,
+      "learning_rate": 0.00019791906227869808,
+      "loss": 1.5467,
+      "step": 874
+    },
+    {
+      "epoch": 0.4481678071241287,
+      "grad_norm": 0.04324028640985489,
+      "learning_rate": 0.0001979018538024607,
+      "loss": 1.5711,
+      "step": 876
+    },
+    {
+      "epoch": 0.4491910212956449,
+      "grad_norm": 0.03858278691768646,
+      "learning_rate": 0.00019788457521981623,
+      "loss": 1.5561,
+      "step": 878
+    },
+    {
+      "epoch": 0.4502142354671612,
+      "grad_norm": 0.043761543929576874,
+      "learning_rate": 0.00019786722654313772,
+      "loss": 1.5187,
+      "step": 880
+    },
+    {
+      "epoch": 0.4512374496386775,
+      "grad_norm": 0.08969100564718246,
+      "learning_rate": 0.00019784980778484834,
+      "loss": 1.5486,
+      "step": 882
+    },
+    {
+      "epoch": 0.45226066381019375,
+      "grad_norm": 0.04808567091822624,
+      "learning_rate": 0.00019783231895742143,
+      "loss": 1.5164,
+      "step": 884
+    },
+    {
+      "epoch": 0.45328387798171005,
+      "grad_norm": 0.04110665246844292,
+      "learning_rate": 0.00019781476007338058,
+      "loss": 1.5177,
+      "step": 886
+    },
+    {
+      "epoch": 0.45430709215322634,
+      "grad_norm": 0.050568196922540665,
+      "learning_rate": 0.00019779713114529947,
+      "loss": 1.5265,
+      "step": 888
+    },
+    {
+      "epoch": 0.4553303063247426,
+      "grad_norm": 0.04753986746072769,
+      "learning_rate": 0.00019777943218580207,
+      "loss": 1.5304,
+      "step": 890
+    },
+    {
+      "epoch": 0.4563535204962589,
+      "grad_norm": 0.05155970901250839,
+      "learning_rate": 0.00019776166320756227,
+      "loss": 1.566,
+      "step": 892
+    },
+    {
+      "epoch": 0.4573767346677752,
+      "grad_norm": 0.048765815794467926,
+      "learning_rate": 0.00019774382422330433,
+      "loss": 1.5276,
+      "step": 894
+    },
+    {
+      "epoch": 0.4583999488392914,
+      "grad_norm": 0.16882531344890594,
+      "learning_rate": 0.0001977259152458025,
+      "loss": 1.5074,
+      "step": 896
+    },
+    {
+      "epoch": 0.4594231630108077,
+      "grad_norm": 0.04014374688267708,
+      "learning_rate": 0.00019770793628788122,
+      "loss": 1.5262,
+      "step": 898
+    },
+    {
+      "epoch": 0.46044637718232395,
+      "grad_norm": 0.04874645173549652,
+      "learning_rate": 0.000197689887362415,
+      "loss": 1.5158,
+      "step": 900
+    },
+    {
+      "epoch": 0.46146959135384025,
+      "grad_norm": 0.049459170550107956,
+      "learning_rate": 0.00019767176848232846,
+      "loss": 1.5449,
+      "step": 902
+    },
+    {
+      "epoch": 0.46249280552535654,
+      "grad_norm": 0.04516777768731117,
+      "learning_rate": 0.00019765357966059638,
+      "loss": 1.5722,
+      "step": 904
+    },
+    {
+      "epoch": 0.4635160196968728,
+      "grad_norm": 0.04243026673793793,
+      "learning_rate": 0.00019763532091024352,
+      "loss": 1.5562,
+      "step": 906
+    },
+    {
+      "epoch": 0.4645392338683891,
+      "grad_norm": 0.04713771492242813,
+      "learning_rate": 0.00019761699224434475,
+      "loss": 1.5425,
+      "step": 908
+    },
+    {
+      "epoch": 0.4655624480399054,
+      "grad_norm": 0.0495879128575325,
+      "learning_rate": 0.0001975985936760251,
+      "loss": 1.5517,
+      "step": 910
+    },
+    {
+      "epoch": 0.4665856622114216,
+      "grad_norm": 0.037338342517614365,
+      "learning_rate": 0.00019758012521845948,
+      "loss": 1.5923,
+      "step": 912
+    },
+    {
+      "epoch": 0.4676088763829379,
+      "grad_norm": 0.044082753360271454,
+      "learning_rate": 0.000197561586884873,
+      "loss": 1.5582,
+      "step": 914
+    },
+    {
+      "epoch": 0.4686320905544542,
+      "grad_norm": 0.045763563364744186,
+      "learning_rate": 0.00019754297868854073,
+      "loss": 1.5435,
+      "step": 916
+    },
+    {
+      "epoch": 0.46965530472597045,
+      "grad_norm": 0.04221731796860695,
+      "learning_rate": 0.00019752430064278777,
+      "loss": 1.5365,
+      "step": 918
+    },
+    {
+      "epoch": 0.47067851889748674,
+      "grad_norm": 0.04800180345773697,
+      "learning_rate": 0.0001975055527609893,
+      "loss": 1.5534,
+      "step": 920
+    },
+    {
+      "epoch": 0.471701733069003,
+      "grad_norm": 0.05618242546916008,
+      "learning_rate": 0.00019748673505657046,
+      "loss": 1.5568,
+      "step": 922
+    },
+    {
+      "epoch": 0.4727249472405193,
+      "grad_norm": 0.04696999117732048,
+      "learning_rate": 0.00019746784754300637,
+      "loss": 1.5249,
+      "step": 924
+    },
+    {
+      "epoch": 0.4737481614120356,
+      "grad_norm": 0.041852448135614395,
+      "learning_rate": 0.00019744889023382215,
+      "loss": 1.5415,
+      "step": 926
+    },
+    {
+      "epoch": 0.4747713755835518,
+      "grad_norm": 0.04743418097496033,
+      "learning_rate": 0.00019742986314259299,
+      "loss": 1.5633,
+      "step": 928
+    },
+    {
+      "epoch": 0.4757945897550681,
+      "grad_norm": 0.04543265700340271,
+      "learning_rate": 0.00019741076628294386,
+      "loss": 1.5261,
+      "step": 930
+    },
+    {
+      "epoch": 0.4768178039265844,
+      "grad_norm": 0.04992993175983429,
+      "learning_rate": 0.00019739159966854992,
+      "loss": 1.5175,
+      "step": 932
+    },
+    {
+      "epoch": 0.47784101809810064,
+      "grad_norm": 0.05793948844075203,
+      "learning_rate": 0.00019737236331313608,
+      "loss": 1.59,
+      "step": 934
+    },
+    {
+      "epoch": 0.47886423226961694,
+      "grad_norm": 0.051816169172525406,
+      "learning_rate": 0.00019735305723047732,
+      "loss": 1.5008,
+      "step": 936
+    },
+    {
+      "epoch": 0.47988744644113324,
+      "grad_norm": 0.04754515737295151,
+      "learning_rate": 0.0001973336814343985,
+      "loss": 1.4773,
+      "step": 938
+    },
+    {
+      "epoch": 0.4809106606126495,
+      "grad_norm": 0.0393076054751873,
+      "learning_rate": 0.0001973142359387744,
+      "loss": 1.5568,
+      "step": 940
+    },
+    {
+      "epoch": 0.48193387478416577,
+      "grad_norm": 0.04164562746882439,
+      "learning_rate": 0.00019729472075752974,
+      "loss": 1.5319,
+      "step": 942
+    },
+    {
+      "epoch": 0.482957088955682,
+      "grad_norm": 0.04371575266122818,
+      "learning_rate": 0.00019727513590463906,
+      "loss": 1.5571,
+      "step": 944
+    },
+    {
+      "epoch": 0.4839803031271983,
+      "grad_norm": 0.0573207251727581,
+      "learning_rate": 0.00019725548139412692,
+      "loss": 1.5372,
+      "step": 946
+    },
+    {
+      "epoch": 0.4850035172987146,
+      "grad_norm": 0.04900820180773735,
+      "learning_rate": 0.00019723575724006767,
+      "loss": 1.5327,
+      "step": 948
+    },
+    {
+      "epoch": 0.48602673147023084,
+      "grad_norm": 0.039241593331098557,
+      "learning_rate": 0.00019721596345658552,
+      "loss": 1.5438,
+      "step": 950
+    },
+    {
+      "epoch": 0.48704994564174714,
+      "grad_norm": 0.043952930718660355,
+      "learning_rate": 0.00019719610005785465,
+      "loss": 1.5577,
+      "step": 952
+    },
+    {
+      "epoch": 0.48807315981326344,
+      "grad_norm": 0.038709525018930435,
+      "learning_rate": 0.0001971761670580989,
+      "loss": 1.5527,
+      "step": 954
+    },
+    {
+      "epoch": 0.4890963739847797,
+      "grad_norm": 0.03867029398679733,
+      "learning_rate": 0.0001971561644715922,
+      "loss": 1.5329,
+      "step": 956
+    },
+    {
+      "epoch": 0.49011958815629597,
+      "grad_norm": 0.0413273349404335,
+      "learning_rate": 0.00019713609231265805,
+      "loss": 1.5415,
+      "step": 958
+    },
+    {
+      "epoch": 0.49114280232781227,
+      "grad_norm": 0.03651106357574463,
+      "learning_rate": 0.00019711595059566998,
+      "loss": 1.5596,
+      "step": 960
+    },
+    {
+      "epoch": 0.4921660164993285,
+      "grad_norm": 0.03891696035861969,
+      "learning_rate": 0.0001970957393350512,
+      "loss": 1.5452,
+      "step": 962
+    },
+    {
+      "epoch": 0.4931892306708448,
+      "grad_norm": 0.03818392753601074,
+      "learning_rate": 0.0001970754585452748,
+      "loss": 1.5821,
+      "step": 964
+    },
+    {
+      "epoch": 0.49421244484236104,
+      "grad_norm": 0.03790618106722832,
+      "learning_rate": 0.0001970551082408636,
+      "loss": 1.5456,
+      "step": 966
+    },
+    {
+      "epoch": 0.49523565901387734,
+      "grad_norm": 0.043467581272125244,
+      "learning_rate": 0.00019703468843639024,
+      "loss": 1.4916,
+      "step": 968
+    },
+    {
+      "epoch": 0.49625887318539363,
+      "grad_norm": 0.03895978257060051,
+      "learning_rate": 0.0001970141991464771,
+      "loss": 1.5529,
+      "step": 970
+    },
+    {
+      "epoch": 0.4972820873569099,
+      "grad_norm": 0.03736645728349686,
+      "learning_rate": 0.0001969936403857963,
+      "loss": 1.5243,
+      "step": 972
+    },
+    {
+      "epoch": 0.49830530152842617,
+      "grad_norm": 0.03589653596282005,
+      "learning_rate": 0.0001969730121690698,
+      "loss": 1.5418,
+      "step": 974
+    },
+    {
+      "epoch": 0.49932851569994247,
+      "grad_norm": 0.03768768534064293,
+      "learning_rate": 0.00019695231451106912,
+      "loss": 1.5114,
+      "step": 976
+    },
+    {
+      "epoch": 0.5003517298714587,
+      "grad_norm": 0.04931550845503807,
+      "learning_rate": 0.00019693154742661575,
+      "loss": 1.564,
+      "step": 978
+    },
+    {
+      "epoch": 0.501374944042975,
+      "grad_norm": 0.04325348883867264,
+      "learning_rate": 0.0001969107109305807,
+      "loss": 1.5092,
+      "step": 980
+    },
+    {
+      "epoch": 0.5023981582144913,
+      "grad_norm": 0.03987947851419449,
+      "learning_rate": 0.00019688980503788475,
+      "loss": 1.5222,
+      "step": 982
+    },
+    {
+      "epoch": 0.5034213723860076,
+      "grad_norm": 0.04482003673911095,
+      "learning_rate": 0.00019686882976349836,
+      "loss": 1.517,
+      "step": 984
+    },
+    {
+      "epoch": 0.5044445865575238,
+      "grad_norm": 0.04025088995695114,
+      "learning_rate": 0.00019684778512244172,
+      "loss": 1.5188,
+      "step": 986
+    },
+    {
+      "epoch": 0.5054678007290401,
+      "grad_norm": 0.04705490544438362,
+      "learning_rate": 0.00019682667112978463,
+      "loss": 1.5266,
+      "step": 988
+    },
+    {
+      "epoch": 0.5064910149005564,
+      "grad_norm": 0.0493633933365345,
+      "learning_rate": 0.0001968054878006466,
+      "loss": 1.5079,
+      "step": 990
+    },
+    {
+      "epoch": 0.5075142290720727,
+      "grad_norm": 0.04063592851161957,
+      "learning_rate": 0.00019678423515019674,
+      "loss": 1.5169,
+      "step": 992
+    },
+    {
+      "epoch": 0.508537443243589,
+      "grad_norm": 0.04962534457445145,
+      "learning_rate": 0.00019676291319365387,
+      "loss": 1.5219,
+      "step": 994
+    },
+    {
+      "epoch": 0.5095606574151051,
+      "grad_norm": 0.03995488956570625,
+      "learning_rate": 0.00019674152194628638,
+      "loss": 1.5397,
+      "step": 996
+    },
+    {
+      "epoch": 0.5105838715866214,
+      "grad_norm": 0.04593009501695633,
+      "learning_rate": 0.00019672006142341234,
+      "loss": 1.5616,
+      "step": 998
+    },
+    {
+      "epoch": 0.5116070857581377,
+      "grad_norm": 0.04215447977185249,
+      "learning_rate": 0.00019669853164039933,
+      "loss": 1.5425,
+      "step": 1000
+    },
+    {
+      "epoch": 0.512630299929654,
+      "grad_norm": 0.043728407472372055,
+      "learning_rate": 0.0001966769326126646,
+      "loss": 1.5044,
+      "step": 1002
+    },
+    {
+      "epoch": 0.5136535141011703,
+      "grad_norm": 0.04384353384375572,
+      "learning_rate": 0.00019665526435567497,
+      "loss": 1.5734,
+      "step": 1004
+    },
+    {
+      "epoch": 0.5146767282726866,
+      "grad_norm": 0.04542085528373718,
+      "learning_rate": 0.00019663352688494684,
+      "loss": 1.5023,
+      "step": 1006
+    },
+    {
+      "epoch": 0.5156999424442028,
+      "grad_norm": 0.05727483332157135,
+      "learning_rate": 0.0001966117202160462,
+      "loss": 1.5668,
+      "step": 1008
+    },
+    {
+      "epoch": 0.5167231566157191,
+      "grad_norm": 0.055995501577854156,
+      "learning_rate": 0.0001965898443645885,
+      "loss": 1.5533,
+      "step": 1010
+    },
+    {
+      "epoch": 0.5177463707872354,
+      "grad_norm": 0.04521145299077034,
+      "learning_rate": 0.00019656789934623881,
+      "loss": 1.5196,
+      "step": 1012
+    },
+    {
+      "epoch": 0.5187695849587517,
+      "grad_norm": 0.040051352232694626,
+      "learning_rate": 0.0001965458851767117,
+      "loss": 1.5293,
+      "step": 1014
+    },
+    {
+      "epoch": 0.519792799130268,
+      "grad_norm": 0.04483609274029732,
+      "learning_rate": 0.00019652380187177126,
+      "loss": 1.5028,
+      "step": 1016
+    },
+    {
+      "epoch": 0.5208160133017842,
+      "grad_norm": 0.04116397351026535,
+      "learning_rate": 0.00019650164944723115,
+      "loss": 1.5272,
+      "step": 1018
+    },
+    {
+      "epoch": 0.5218392274733005,
+      "grad_norm": 0.04803440347313881,
+      "learning_rate": 0.00019647942791895445,
+      "loss": 1.525,
+      "step": 1020
+    },
+    {
+      "epoch": 0.5228624416448168,
+      "grad_norm": 0.05390439182519913,
+      "learning_rate": 0.00019645713730285366,
+      "loss": 1.5446,
+      "step": 1022
+    },
+    {
+      "epoch": 0.5238856558163331,
+      "grad_norm": 0.04475432634353638,
+      "learning_rate": 0.00019643477761489096,
+      "loss": 1.5213,
+      "step": 1024
+    },
+    {
+      "epoch": 0.5249088699878494,
+      "grad_norm": 0.04424989968538284,
+      "learning_rate": 0.00019641234887107778,
+      "loss": 1.4888,
+      "step": 1026
+    },
+    {
+      "epoch": 0.5259320841593657,
+      "grad_norm": 0.049827560782432556,
+      "learning_rate": 0.00019638985108747515,
+      "loss": 1.5555,
+      "step": 1028
+    },
+    {
+      "epoch": 0.5269552983308818,
+      "grad_norm": 0.04092090204358101,
+      "learning_rate": 0.0001963672842801934,
+      "loss": 1.4815,
+      "step": 1030
+    },
+    {
+      "epoch": 0.5279785125023981,
+      "grad_norm": 0.052185434848070145,
+      "learning_rate": 0.00019634464846539246,
+      "loss": 1.5657,
+      "step": 1032
+    },
+    {
+      "epoch": 0.5290017266739144,
+      "grad_norm": 0.04300570487976074,
+      "learning_rate": 0.00019632194365928153,
+      "loss": 1.5259,
+      "step": 1034
+    },
+    {
+      "epoch": 0.5300249408454307,
+      "grad_norm": 0.04205292835831642,
+      "learning_rate": 0.00019629916987811926,
+      "loss": 1.527,
+      "step": 1036
+    },
+    {
+      "epoch": 0.531048155016947,
+      "grad_norm": 0.06136661395430565,
+      "learning_rate": 0.00019627632713821368,
+      "loss": 1.5541,
+      "step": 1038
+    },
+    {
+      "epoch": 0.5320713691884632,
+      "grad_norm": 0.03824898600578308,
+      "learning_rate": 0.00019625341545592226,
+      "loss": 1.5496,
+      "step": 1040
+    },
+    {
+      "epoch": 0.5330945833599795,
+      "grad_norm": 0.041780851781368256,
+      "learning_rate": 0.0001962304348476518,
+      "loss": 1.5283,
+      "step": 1042
+    },
+    {
+      "epoch": 0.5341177975314958,
+      "grad_norm": 0.04486005753278732,
+      "learning_rate": 0.0001962073853298584,
+      "loss": 1.5312,
+      "step": 1044
+    },
+    {
+      "epoch": 0.5351410117030121,
+      "grad_norm": 0.041384853422641754,
+      "learning_rate": 0.00019618426691904762,
+      "loss": 1.5011,
+      "step": 1046
+    },
+    {
+      "epoch": 0.5361642258745284,
+      "grad_norm": 0.0440378412604332,
+      "learning_rate": 0.00019616107963177425,
+      "loss": 1.4855,
+      "step": 1048
+    },
+    {
+      "epoch": 0.5371874400460447,
+      "grad_norm": 0.052033115178346634,
+      "learning_rate": 0.00019613782348464244,
+      "loss": 1.4811,
+      "step": 1050
+    },
+    {
+      "epoch": 0.5382106542175609,
+      "grad_norm": 0.04121650755405426,
+      "learning_rate": 0.00019611449849430565,
+      "loss": 1.5653,
+      "step": 1052
+    },
+    {
+      "epoch": 0.5392338683890772,
+      "grad_norm": 0.04445752128958702,
+      "learning_rate": 0.00019609110467746666,
+      "loss": 1.5098,
+      "step": 1054
+    },
+    {
+      "epoch": 0.5402570825605935,
+      "grad_norm": 0.06591064482927322,
+      "learning_rate": 0.00019606764205087757,
+      "loss": 1.5304,
+      "step": 1056
+    },
+    {
+      "epoch": 0.5412802967321098,
+      "grad_norm": 0.05301080271601677,
+      "learning_rate": 0.0001960441106313396,
+      "loss": 1.4871,
+      "step": 1058
+    },
+    {
+      "epoch": 0.542303510903626,
+      "grad_norm": 0.040986523032188416,
+      "learning_rate": 0.0001960205104357034,
+      "loss": 1.5195,
+      "step": 1060
+    },
+    {
+      "epoch": 0.5433267250751422,
+      "grad_norm": 0.03562408685684204,
+      "learning_rate": 0.00019599684148086878,
+      "loss": 1.5384,
+      "step": 1062
+    },
+    {
+      "epoch": 0.5443499392466585,
+      "grad_norm": 0.04383963719010353,
+      "learning_rate": 0.00019597310378378476,
+      "loss": 1.4988,
+      "step": 1064
+    },
+    {
+      "epoch": 0.5453731534181748,
+      "grad_norm": 0.06702277064323425,
+      "learning_rate": 0.00019594929736144976,
+      "loss": 1.4897,
+      "step": 1066
+    },
+    {
+      "epoch": 0.5463963675896911,
+      "grad_norm": 0.0414276085793972,
+      "learning_rate": 0.00019592542223091118,
+      "loss": 1.5049,
+      "step": 1068
+    },
+    {
+      "epoch": 0.5474195817612074,
+      "grad_norm": 0.0432027168571949,
+      "learning_rate": 0.00019590147840926577,
+      "loss": 1.4686,
+      "step": 1070
+    },
+    {
+      "epoch": 0.5484427959327237,
+      "grad_norm": 0.044036637991666794,
+      "learning_rate": 0.00019587746591365941,
+      "loss": 1.5082,
+      "step": 1072
+    },
+    {
+      "epoch": 0.5494660101042399,
+      "grad_norm": 0.04510560259222984,
+      "learning_rate": 0.0001958533847612872,
+      "loss": 1.5213,
+      "step": 1074
+    },
+    {
+      "epoch": 0.5504892242757562,
+      "grad_norm": 0.04027169942855835,
+      "learning_rate": 0.00019582923496939337,
+      "loss": 1.4952,
+      "step": 1076
+    },
+    {
+      "epoch": 0.5515124384472725,
+      "grad_norm": 0.08312036097049713,
+      "learning_rate": 0.00019580501655527133,
+      "loss": 1.512,
+      "step": 1078
+    },
+    {
+      "epoch": 0.5525356526187888,
+      "grad_norm": 0.04634568840265274,
+      "learning_rate": 0.00019578072953626357,
+      "loss": 1.5248,
+      "step": 1080
+    },
+    {
+      "epoch": 0.5535588667903051,
+      "grad_norm": 0.044149454683065414,
+      "learning_rate": 0.00019575637392976178,
+      "loss": 1.4911,
+      "step": 1082
+    },
+    {
+      "epoch": 0.5545820809618213,
+      "grad_norm": 0.04358943551778793,
+      "learning_rate": 0.00019573194975320673,
+      "loss": 1.5427,
+      "step": 1084
+    },
+    {
+      "epoch": 0.5556052951333376,
+      "grad_norm": 0.038042690604925156,
+      "learning_rate": 0.0001957074570240883,
+      "loss": 1.5032,
+      "step": 1086
+    },
+    {
+      "epoch": 0.5566285093048539,
+      "grad_norm": 0.04171706736087799,
+      "learning_rate": 0.00019568289575994544,
+      "loss": 1.493,
+      "step": 1088
+    },
+    {
+      "epoch": 0.5576517234763702,
+      "grad_norm": 0.04037075862288475,
+      "learning_rate": 0.0001956582659783662,
+      "loss": 1.5334,
+      "step": 1090
+    },
+    {
+      "epoch": 0.5586749376478864,
+      "grad_norm": 0.036902882158756256,
+      "learning_rate": 0.0001956335676969877,
+      "loss": 1.5093,
+      "step": 1092
+    },
+    {
+      "epoch": 0.5596981518194027,
+      "grad_norm": 0.04198329523205757,
+      "learning_rate": 0.00019560880093349607,
+      "loss": 1.5069,
+      "step": 1094
+    },
+    {
+      "epoch": 0.5607213659909189,
+      "grad_norm": 0.034086357802152634,
+      "learning_rate": 0.0001955839657056265,
+      "loss": 1.5101,
+      "step": 1096
+    },
+    {
+      "epoch": 0.5617445801624352,
+      "grad_norm": 0.03502487763762474,
+      "learning_rate": 0.0001955590620311633,
+      "loss": 1.5305,
+      "step": 1098
+    },
+    {
+      "epoch": 0.5627677943339515,
+      "grad_norm": 0.03580254316329956,
+      "learning_rate": 0.00019553408992793964,
+      "loss": 1.4984,
+      "step": 1100
+    },
+    {
+      "epoch": 0.5637910085054678,
+      "grad_norm": 0.0441250242292881,
+      "learning_rate": 0.00019550904941383773,
+      "loss": 1.4956,
+      "step": 1102
+    },
+    {
+      "epoch": 0.5648142226769841,
+      "grad_norm": 0.039550572633743286,
+      "learning_rate": 0.00019548394050678883,
+      "loss": 1.5041,
+      "step": 1104
+    },
+    {
+      "epoch": 0.5658374368485003,
+      "grad_norm": 0.03674033284187317,
+      "learning_rate": 0.0001954587632247732,
+      "loss": 1.4694,
+      "step": 1106
+    },
+    {
+      "epoch": 0.5668606510200166,
+      "grad_norm": 0.03579515963792801,
+      "learning_rate": 0.00019543351758581994,
+      "loss": 1.4789,
+      "step": 1108
+    },
+    {
+      "epoch": 0.5678838651915329,
+      "grad_norm": 0.04077816754579544,
+      "learning_rate": 0.0001954082036080072,
+      "loss": 1.5221,
+      "step": 1110
+    },
+    {
+      "epoch": 0.5689070793630492,
+      "grad_norm": 0.03694437816739082,
+      "learning_rate": 0.00019538282130946198,
+      "loss": 1.5273,
+      "step": 1112
+    },
+    {
+      "epoch": 0.5699302935345655,
+      "grad_norm": 0.03998146578669548,
+      "learning_rate": 0.00019535737070836028,
+      "loss": 1.5426,
+      "step": 1114
+    },
+    {
+      "epoch": 0.5709535077060818,
+      "grad_norm": 0.03823567554354668,
+      "learning_rate": 0.00019533185182292703,
+      "loss": 1.5264,
+      "step": 1116
+    },
+    {
+      "epoch": 0.571976721877598,
+      "grad_norm": 0.03891613706946373,
+      "learning_rate": 0.000195306264671436,
+      "loss": 1.5194,
+      "step": 1118
+    },
+    {
+      "epoch": 0.5729999360491143,
+      "grad_norm": 0.035352472215890884,
+      "learning_rate": 0.0001952806092722098,
+      "loss": 1.5049,
+      "step": 1120
+    },
+    {
+      "epoch": 0.5740231502206306,
+      "grad_norm": 0.03947431594133377,
+      "learning_rate": 0.00019525488564362003,
+      "loss": 1.5562,
+      "step": 1122
+    },
+    {
+      "epoch": 0.5750463643921468,
+      "grad_norm": 0.0398818701505661,
+      "learning_rate": 0.00019522909380408705,
+      "loss": 1.5216,
+      "step": 1124
+    },
+    {
+      "epoch": 0.5760695785636631,
+      "grad_norm": 0.03842191398143768,
+      "learning_rate": 0.00019520323377208017,
+      "loss": 1.5461,
+      "step": 1126
+    },
+    {
+      "epoch": 0.5770927927351793,
+      "grad_norm": 0.03299557417631149,
+      "learning_rate": 0.00019517730556611738,
+      "loss": 1.4988,
+      "step": 1128
+    },
+    {
+      "epoch": 0.5781160069066956,
+      "grad_norm": 0.032452985644340515,
+      "learning_rate": 0.00019515130920476562,
+      "loss": 1.4837,
+      "step": 1130
+    },
+    {
+      "epoch": 0.5791392210782119,
+      "grad_norm": 0.03567085042595863,
+      "learning_rate": 0.00019512524470664057,
+      "loss": 1.5081,
+      "step": 1132
+    },
+    {
+      "epoch": 0.5801624352497282,
+      "grad_norm": 0.04303791746497154,
+      "learning_rate": 0.00019509911209040676,
+      "loss": 1.517,
+      "step": 1134
+    },
+    {
+      "epoch": 0.5811856494212445,
+      "grad_norm": 0.040586575865745544,
+      "learning_rate": 0.00019507291137477742,
+      "loss": 1.5494,
+      "step": 1136
+    },
+    {
+      "epoch": 0.5822088635927608,
+      "grad_norm": 0.038383904844522476,
+      "learning_rate": 0.0001950466425785146,
+      "loss": 1.4641,
+      "step": 1138
+    },
+    {
+      "epoch": 0.583232077764277,
+      "grad_norm": 0.0484977550804615,
+      "learning_rate": 0.0001950203057204291,
+      "loss": 1.4838,
+      "step": 1140
+    },
+    {
+      "epoch": 0.5842552919357933,
+      "grad_norm": 0.03300706669688225,
+      "learning_rate": 0.00019499390081938046,
+      "loss": 1.4935,
+      "step": 1142
+    },
+    {
+      "epoch": 0.5852785061073096,
+      "grad_norm": 0.041923582553863525,
+      "learning_rate": 0.00019496742789427683,
+      "loss": 1.484,
+      "step": 1144
+    },
+    {
+      "epoch": 0.5863017202788259,
+      "grad_norm": 0.04476374387741089,
+      "learning_rate": 0.00019494088696407532,
+      "loss": 1.5222,
+      "step": 1146
+    },
+    {
+      "epoch": 0.5873249344503422,
+      "grad_norm": 0.039443958550691605,
+      "learning_rate": 0.00019491427804778147,
+      "loss": 1.4899,
+      "step": 1148
+    },
+    {
+      "epoch": 0.5883481486218584,
+      "grad_norm": 0.0458071269094944,
+      "learning_rate": 0.00019488760116444966,
+      "loss": 1.5006,
+      "step": 1150
+    },
+    {
+      "epoch": 0.5893713627933747,
+      "grad_norm": 0.04912669211626053,
+      "learning_rate": 0.00019486085633318293,
+      "loss": 1.5193,
+      "step": 1152
+    },
+    {
+      "epoch": 0.590394576964891,
+      "grad_norm": 0.05331273376941681,
+      "learning_rate": 0.00019483404357313293,
+      "loss": 1.5115,
+      "step": 1154
+    },
+    {
+      "epoch": 0.5914177911364072,
+      "grad_norm": 0.04301870986819267,
+      "learning_rate": 0.00019480716290349995,
+      "loss": 1.4997,
+      "step": 1156
+    },
+    {
+      "epoch": 0.5924410053079235,
+      "grad_norm": 0.042690206319093704,
+      "learning_rate": 0.00019478021434353297,
+      "loss": 1.5014,
+      "step": 1158
+    },
+    {
+      "epoch": 0.5934642194794398,
+      "grad_norm": 0.045416899025440216,
+      "learning_rate": 0.00019475319791252956,
+      "loss": 1.5287,
+      "step": 1160
+    },
+    {
+      "epoch": 0.594487433650956,
+      "grad_norm": 0.04627612978219986,
+      "learning_rate": 0.0001947261136298358,
+      "loss": 1.5238,
+      "step": 1162
+    },
+    {
+      "epoch": 0.5955106478224723,
+      "grad_norm": 0.0443304218351841,
+      "learning_rate": 0.00019469896151484654,
+      "loss": 1.4956,
+      "step": 1164
+    },
+    {
+      "epoch": 0.5965338619939886,
+      "grad_norm": 0.042293716222047806,
+      "learning_rate": 0.00019467174158700504,
+      "loss": 1.4962,
+      "step": 1166
+    },
+    {
+      "epoch": 0.5975570761655049,
+      "grad_norm": 0.035955190658569336,
+      "learning_rate": 0.0001946444538658032,
+      "loss": 1.4799,
+      "step": 1168
+    },
+    {
+      "epoch": 0.5985802903370212,
+      "grad_norm": 0.04025396704673767,
+      "learning_rate": 0.00019461709837078145,
+      "loss": 1.489,
+      "step": 1170
+    },
+    {
+      "epoch": 0.5996035045085374,
+      "grad_norm": 0.057371869683265686,
+      "learning_rate": 0.0001945896751215287,
+      "loss": 1.4872,
+      "step": 1172
+    },
+    {
+      "epoch": 0.6006267186800537,
+      "grad_norm": 0.05806579813361168,
+      "learning_rate": 0.0001945621841376825,
+      "loss": 1.5153,
+      "step": 1174
+    },
+    {
+      "epoch": 0.60164993285157,
+      "grad_norm": 0.03980225697159767,
+      "learning_rate": 0.00019453462543892882,
+      "loss": 1.5093,
+      "step": 1176
+    },
+    {
+      "epoch": 0.6026731470230863,
+      "grad_norm": 0.041456956416368484,
+      "learning_rate": 0.0001945069990450021,
+      "loss": 1.5115,
+      "step": 1178
+    },
+    {
+      "epoch": 0.6036963611946026,
+      "grad_norm": 0.03392681106925011,
+      "learning_rate": 0.00019447930497568528,
+      "loss": 1.4863,
+      "step": 1180
+    },
+    {
+      "epoch": 0.6047195753661189,
+      "grad_norm": 0.03312285616993904,
+      "learning_rate": 0.0001944515432508098,
+      "loss": 1.5321,
+      "step": 1182
+    },
+    {
+      "epoch": 0.605742789537635,
+      "grad_norm": 0.03741718456149101,
+      "learning_rate": 0.00019442371389025552,
+      "loss": 1.4874,
+      "step": 1184
+    },
+    {
+      "epoch": 0.6067660037091513,
+      "grad_norm": 0.03954221308231354,
+      "learning_rate": 0.00019439581691395067,
+      "loss": 1.5014,
+      "step": 1186
+    },
+    {
+      "epoch": 0.6077892178806676,
+      "grad_norm": 0.03756248950958252,
+      "learning_rate": 0.00019436785234187205,
+      "loss": 1.522,
+      "step": 1188
+    },
+    {
+      "epoch": 0.6088124320521839,
+      "grad_norm": 0.03895876556634903,
+      "learning_rate": 0.00019433982019404473,
+      "loss": 1.5546,
+      "step": 1190
+    },
+    {
+      "epoch": 0.6098356462237002,
+      "grad_norm": 0.038288913667201996,
+      "learning_rate": 0.0001943117204905422,
+      "loss": 1.4859,
+      "step": 1192
+    },
+    {
+      "epoch": 0.6108588603952164,
+      "grad_norm": 0.034622881561517715,
+      "learning_rate": 0.00019428355325148633,
+      "loss": 1.5246,
+      "step": 1194
+    },
+    {
+      "epoch": 0.6118820745667327,
+      "grad_norm": 0.04585454985499382,
+      "learning_rate": 0.0001942553184970474,
+      "loss": 1.5001,
+      "step": 1196
+    },
+    {
+      "epoch": 0.612905288738249,
+      "grad_norm": 0.03685140982270241,
+      "learning_rate": 0.00019422701624744395,
+      "loss": 1.5114,
+      "step": 1198
+    },
+    {
+      "epoch": 0.6139285029097653,
+      "grad_norm": 0.033848248422145844,
+      "learning_rate": 0.00019419864652294296,
+      "loss": 1.5047,
+      "step": 1200
+    },
+    {
+      "epoch": 0.6149517170812816,
+      "grad_norm": 0.03485368937253952,
+      "learning_rate": 0.00019417020934385962,
+      "loss": 1.5412,
+      "step": 1202
+    },
+    {
+      "epoch": 0.6159749312527979,
+      "grad_norm": 0.03737105429172516,
+      "learning_rate": 0.00019414170473055746,
+      "loss": 1.5014,
+      "step": 1204
+    },
+    {
+      "epoch": 0.6169981454243141,
+      "grad_norm": 0.0417652502655983,
+      "learning_rate": 0.00019411313270344837,
+      "loss": 1.4963,
+      "step": 1206
+    },
+    {
+      "epoch": 0.6180213595958304,
+      "grad_norm": 0.037758734077215195,
+      "learning_rate": 0.0001940844932829924,
+      "loss": 1.4935,
+      "step": 1208
+    },
+    {
+      "epoch": 0.6190445737673467,
+      "grad_norm": 0.03808191418647766,
+      "learning_rate": 0.00019405578648969796,
+      "loss": 1.5181,
+      "step": 1210
+    },
+    {
+      "epoch": 0.620067787938863,
+      "grad_norm": 0.03454340249300003,
+      "learning_rate": 0.00019402701234412162,
+      "loss": 1.493,
+      "step": 1212
+    },
+    {
+      "epoch": 0.6210910021103793,
+      "grad_norm": 0.03708413615822792,
+      "learning_rate": 0.00019399817086686826,
+      "loss": 1.4987,
+      "step": 1214
+    },
+    {
+      "epoch": 0.6221142162818954,
+      "grad_norm": 0.046957071870565414,
+      "learning_rate": 0.00019396926207859084,
+      "loss": 1.473,
+      "step": 1216
+    },
+    {
+      "epoch": 0.6231374304534117,
+      "grad_norm": 0.03893362358212471,
+      "learning_rate": 0.00019394028599999073,
+      "loss": 1.4915,
+      "step": 1218
+    },
+    {
+      "epoch": 0.624160644624928,
+      "grad_norm": 0.04247049614787102,
+      "learning_rate": 0.0001939112426518173,
+      "loss": 1.5384,
+      "step": 1220
+    },
+    {
+      "epoch": 0.6251838587964443,
+      "grad_norm": 0.036440882831811905,
+      "learning_rate": 0.00019388213205486822,
+      "loss": 1.5124,
+      "step": 1222
+    },
+    {
+      "epoch": 0.6262070729679606,
+      "grad_norm": 0.037374429404735565,
+      "learning_rate": 0.00019385295422998921,
+      "loss": 1.5244,
+      "step": 1224
+    },
+    {
+      "epoch": 0.6272302871394769,
+      "grad_norm": 0.0383899062871933,
+      "learning_rate": 0.00019382370919807419,
+      "loss": 1.5078,
+      "step": 1226
+    },
+    {
+      "epoch": 0.6282535013109931,
+      "grad_norm": 0.03726350888609886,
+      "learning_rate": 0.0001937943969800652,
+      "loss": 1.4968,
+      "step": 1228
+    },
+    {
+      "epoch": 0.6292767154825094,
+      "grad_norm": 0.037606336176395416,
+      "learning_rate": 0.0001937650175969524,
+      "loss": 1.4735,
+      "step": 1230
+    },
+    {
+      "epoch": 0.6302999296540257,
+      "grad_norm": 0.03583415970206261,
+      "learning_rate": 0.000193735571069774,
+      "loss": 1.4872,
+      "step": 1232
+    },
+    {
+      "epoch": 0.631323143825542,
+      "grad_norm": 0.029802750796079636,
+      "learning_rate": 0.00019370605741961635,
+      "loss": 1.5037,
+      "step": 1234
+    },
+    {
+      "epoch": 0.6323463579970583,
+      "grad_norm": 0.037094760686159134,
+      "learning_rate": 0.00019367647666761385,
+      "loss": 1.518,
+      "step": 1236
+    },
+    {
+      "epoch": 0.6333695721685745,
+      "grad_norm": 0.03802032023668289,
+      "learning_rate": 0.00019364682883494893,
+      "loss": 1.4997,
+      "step": 1238
+    },
+    {
+      "epoch": 0.6343927863400908,
+      "grad_norm": 0.03934174031019211,
+      "learning_rate": 0.00019361711394285202,
+      "loss": 1.5033,
+      "step": 1240
+    },
+    {
+      "epoch": 0.6354160005116071,
+      "grad_norm": 0.03484318405389786,
+      "learning_rate": 0.00019358733201260169,
+      "loss": 1.5068,
+      "step": 1242
+    },
+    {
+      "epoch": 0.6364392146831234,
+      "grad_norm": 0.03633354604244232,
+      "learning_rate": 0.00019355748306552442,
+      "loss": 1.5462,
+      "step": 1244
+    },
+    {
+      "epoch": 0.6374624288546397,
+      "grad_norm": 0.05548425391316414,
+      "learning_rate": 0.00019352756712299468,
+      "loss": 1.5036,
+      "step": 1246
+    },
+    {
+      "epoch": 0.638485643026156,
+      "grad_norm": 0.032225679606199265,
+      "learning_rate": 0.00019349758420643493,
+      "loss": 1.5026,
+      "step": 1248
+    },
+    {
+      "epoch": 0.6395088571976721,
+      "grad_norm": 0.03236972540616989,
+      "learning_rate": 0.00019346753433731564,
+      "loss": 1.5199,
+      "step": 1250
+    },
+    {
+      "epoch": 0.6405320713691884,
+      "grad_norm": 0.03576046973466873,
+      "learning_rate": 0.00019343741753715516,
+      "loss": 1.5146,
+      "step": 1252
+    },
+    {
+      "epoch": 0.6415552855407047,
+      "grad_norm": 0.04308708757162094,
+      "learning_rate": 0.00019340723382751978,
+      "loss": 1.5,
+      "step": 1254
+    },
+    {
+      "epoch": 0.642578499712221,
+      "grad_norm": 0.035895735025405884,
+      "learning_rate": 0.0001933769832300237,
+      "loss": 1.5043,
+      "step": 1256
+    },
+    {
+      "epoch": 0.6436017138837373,
+      "grad_norm": 0.03789574280381203,
+      "learning_rate": 0.00019334666576632906,
+      "loss": 1.4935,
+      "step": 1258
+    },
+    {
+      "epoch": 0.6446249280552535,
+      "grad_norm": 0.03609545901417732,
+      "learning_rate": 0.00019331628145814587,
+      "loss": 1.5296,
+      "step": 1260
+    },
+    {
+      "epoch": 0.6456481422267698,
+      "grad_norm": 0.0432671383023262,
+      "learning_rate": 0.00019328583032723193,
+      "loss": 1.5045,
+      "step": 1262
+    },
+    {
+      "epoch": 0.6466713563982861,
+      "grad_norm": 0.038937125355005264,
+      "learning_rate": 0.000193255312395393,
+      "loss": 1.4801,
+      "step": 1264
+    },
+    {
+      "epoch": 0.6476945705698024,
+      "grad_norm": 0.03925538435578346,
+      "learning_rate": 0.00019322472768448258,
+      "loss": 1.4903,
+      "step": 1266
+    },
+    {
+      "epoch": 0.6487177847413187,
+      "grad_norm": 0.03581652417778969,
+      "learning_rate": 0.00019319407621640208,
+      "loss": 1.471,
+      "step": 1268
+    },
+    {
+      "epoch": 0.649740998912835,
+      "grad_norm": 0.03643723577260971,
+      "learning_rate": 0.00019316335801310063,
+      "loss": 1.5019,
+      "step": 1270
+    },
+    {
+      "epoch": 0.6507642130843512,
+      "grad_norm": 0.03839946910738945,
+      "learning_rate": 0.0001931325730965752,
+      "loss": 1.5148,
+      "step": 1272
+    },
+    {
+      "epoch": 0.6517874272558675,
+      "grad_norm": 0.04306597262620926,
+      "learning_rate": 0.00019310172148887054,
+      "loss": 1.472,
+      "step": 1274
+    },
+    {
+      "epoch": 0.6528106414273838,
+      "grad_norm": 0.069839708507061,
+      "learning_rate": 0.00019307080321207912,
+      "loss": 1.521,
+      "step": 1276
+    },
+    {
+      "epoch": 0.6538338555989001,
+      "grad_norm": 0.05618079751729965,
+      "learning_rate": 0.00019303981828834113,
+      "loss": 1.5019,
+      "step": 1278
+    },
+    {
+      "epoch": 0.6548570697704164,
+      "grad_norm": 0.04359296336770058,
+      "learning_rate": 0.00019300876673984462,
+      "loss": 1.4676,
+      "step": 1280
+    },
+    {
+      "epoch": 0.6558802839419325,
+      "grad_norm": 0.038589805364608765,
+      "learning_rate": 0.00019297764858882514,
+      "loss": 1.4791,
+      "step": 1282
+    },
+    {
+      "epoch": 0.6569034981134488,
+      "grad_norm": 0.0316338986158371,
+      "learning_rate": 0.00019294646385756612,
+      "loss": 1.4824,
+      "step": 1284
+    },
+    {
+      "epoch": 0.6579267122849651,
+      "grad_norm": 0.03457920625805855,
+      "learning_rate": 0.00019291521256839858,
+      "loss": 1.4946,
+      "step": 1286
+    },
+    {
+      "epoch": 0.6589499264564814,
+      "grad_norm": 0.04637923464179039,
+      "learning_rate": 0.00019288389474370117,
+      "loss": 1.5049,
+      "step": 1288
+    },
+    {
+      "epoch": 0.6599731406279977,
+      "grad_norm": 0.05314064025878906,
+      "learning_rate": 0.0001928525104059003,
+      "loss": 1.5021,
+      "step": 1290
+    },
+    {
+      "epoch": 0.660996354799514,
+      "grad_norm": 0.041335079818964005,
+      "learning_rate": 0.00019282105957746986,
+      "loss": 1.4869,
+      "step": 1292
+    },
+    {
+      "epoch": 0.6620195689710302,
+      "grad_norm": 0.040912263095378876,
+      "learning_rate": 0.00019278954228093146,
+      "loss": 1.5168,
+      "step": 1294
+    },
+    {
+      "epoch": 0.6630427831425465,
+      "grad_norm": 0.037110935896635056,
+      "learning_rate": 0.00019275795853885433,
+      "loss": 1.4973,
+      "step": 1296
+    },
+    {
+      "epoch": 0.6640659973140628,
+      "grad_norm": 0.035204846411943436,
+      "learning_rate": 0.00019272630837385518,
+      "loss": 1.5062,
+      "step": 1298
+    },
+    {
+      "epoch": 0.6650892114855791,
+      "grad_norm": 0.0464470274746418,
+      "learning_rate": 0.0001926945918085983,
+      "loss": 1.5412,
+      "step": 1300
+    },
+    {
+      "epoch": 0.6661124256570954,
+      "grad_norm": 0.033444374799728394,
+      "learning_rate": 0.00019266280886579565,
+      "loss": 1.4799,
+      "step": 1302
+    },
+    {
+      "epoch": 0.6671356398286117,
+      "grad_norm": 0.036789704114198685,
+      "learning_rate": 0.0001926309595682066,
+      "loss": 1.5604,
+      "step": 1304
+    },
+    {
+      "epoch": 0.6681588540001279,
+      "grad_norm": 0.03726235032081604,
+      "learning_rate": 0.00019259904393863802,
+      "loss": 1.5054,
+      "step": 1306
+    },
+    {
+      "epoch": 0.6691820681716442,
+      "grad_norm": 0.03499661013484001,
+      "learning_rate": 0.00019256706199994442,
+      "loss": 1.5039,
+      "step": 1308
+    },
+    {
+      "epoch": 0.6702052823431605,
+      "grad_norm": 0.037414226680994034,
+      "learning_rate": 0.00019253501377502764,
+      "loss": 1.4952,
+      "step": 1310
+    },
+    {
+      "epoch": 0.6712284965146768,
+      "grad_norm": 0.041186489164829254,
+      "learning_rate": 0.00019250289928683705,
+      "loss": 1.519,
+      "step": 1312
+    },
+    {
+      "epoch": 0.672251710686193,
+      "grad_norm": 0.050159044563770294,
+      "learning_rate": 0.0001924707185583695,
+      "loss": 1.5112,
+      "step": 1314
+    },
+    {
+      "epoch": 0.6732749248577092,
+      "grad_norm": 0.05124843865633011,
+      "learning_rate": 0.0001924384716126692,
+      "loss": 1.4897,
+      "step": 1316
+    },
+    {
+      "epoch": 0.6742981390292255,
+      "grad_norm": 0.03580416738986969,
+      "learning_rate": 0.00019240615847282788,
+      "loss": 1.4739,
+      "step": 1318
+    },
+    {
+      "epoch": 0.6753213532007418,
+      "grad_norm": 0.03572642430663109,
+      "learning_rate": 0.00019237377916198458,
+      "loss": 1.4735,
+      "step": 1320
+    },
+    {
+      "epoch": 0.6763445673722581,
+      "grad_norm": 0.04381095990538597,
+      "learning_rate": 0.00019234133370332578,
+      "loss": 1.4817,
+      "step": 1322
+    },
+    {
+      "epoch": 0.6773677815437744,
+      "grad_norm": 0.03948042169213295,
+      "learning_rate": 0.00019230882212008528,
+      "loss": 1.5288,
+      "step": 1324
+    },
+    {
+      "epoch": 0.6783909957152907,
+      "grad_norm": 0.04092205688357353,
+      "learning_rate": 0.00019227624443554425,
+      "loss": 1.503,
+      "step": 1326
+    },
+    {
+      "epoch": 0.6794142098868069,
+      "grad_norm": 0.0372740812599659,
+      "learning_rate": 0.0001922436006730312,
+      "loss": 1.5186,
+      "step": 1328
+    },
+    {
+      "epoch": 0.6804374240583232,
+      "grad_norm": 0.03410439193248749,
+      "learning_rate": 0.00019221089085592202,
+      "loss": 1.5104,
+      "step": 1330
+    },
+    {
+      "epoch": 0.6814606382298395,
+      "grad_norm": 0.04406609386205673,
+      "learning_rate": 0.00019217811500763977,
+      "loss": 1.497,
+      "step": 1332
+    },
+    {
+      "epoch": 0.6824838524013558,
+      "grad_norm": 0.04020300507545471,
+      "learning_rate": 0.00019214527315165487,
+      "loss": 1.4589,
+      "step": 1334
+    },
+    {
+      "epoch": 0.6835070665728721,
+      "grad_norm": 0.03552987799048424,
+      "learning_rate": 0.000192112365311485,
+      "loss": 1.4938,
+      "step": 1336
+    },
+    {
+      "epoch": 0.6845302807443883,
+      "grad_norm": 0.035595186054706573,
+      "learning_rate": 0.00019207939151069515,
+      "loss": 1.4664,
+      "step": 1338
+    },
+    {
+      "epoch": 0.6855534949159046,
+      "grad_norm": 0.030798960477113724,
+      "learning_rate": 0.00019204635177289743,
+      "loss": 1.4786,
+      "step": 1340
+    },
+    {
+      "epoch": 0.6865767090874209,
+      "grad_norm": 0.03413120657205582,
+      "learning_rate": 0.00019201324612175123,
+      "loss": 1.5409,
+      "step": 1342
+    },
+    {
+      "epoch": 0.6875999232589372,
+      "grad_norm": 0.03786253184080124,
+      "learning_rate": 0.0001919800745809631,
+      "loss": 1.4725,
+      "step": 1344
+    },
+    {
+      "epoch": 0.6886231374304534,
+      "grad_norm": 0.0414445661008358,
+      "learning_rate": 0.00019194683717428687,
+      "loss": 1.4993,
+      "step": 1346
+    },
+    {
+      "epoch": 0.6896463516019697,
+      "grad_norm": 0.0378003790974617,
+      "learning_rate": 0.00019191353392552344,
+      "loss": 1.5225,
+      "step": 1348
+    },
+    {
+      "epoch": 0.6906695657734859,
+      "grad_norm": 0.0343095101416111,
+      "learning_rate": 0.0001918801648585209,
+      "loss": 1.4671,
+      "step": 1350
+    },
+    {
+      "epoch": 0.6916927799450022,
+      "grad_norm": 0.03458075597882271,
+      "learning_rate": 0.0001918467299971744,
+      "loss": 1.4843,
+      "step": 1352
+    },
+    {
+      "epoch": 0.6927159941165185,
+      "grad_norm": 0.03243357688188553,
+      "learning_rate": 0.00019181322936542635,
+      "loss": 1.494,
+      "step": 1354
+    },
+    {
+      "epoch": 0.6937392082880348,
+      "grad_norm": 0.03002413548529148,
+      "learning_rate": 0.00019177966298726613,
+      "loss": 1.5046,
+      "step": 1356
+    },
+    {
+      "epoch": 0.6947624224595511,
+      "grad_norm": 0.031211066991090775,
+      "learning_rate": 0.00019174603088673026,
+      "loss": 1.4664,
+      "step": 1358
+    },
+    {
+      "epoch": 0.6957856366310673,
+      "grad_norm": 0.03740109130740166,
+      "learning_rate": 0.00019171233308790225,
+      "loss": 1.4394,
+      "step": 1360
+    },
+    {
+      "epoch": 0.6968088508025836,
+      "grad_norm": 0.03566642478108406,
+      "learning_rate": 0.0001916785696149128,
+      "loss": 1.4935,
+      "step": 1362
+    },
+    {
+      "epoch": 0.6978320649740999,
+      "grad_norm": 0.033135462552309036,
+      "learning_rate": 0.00019164474049193948,
+      "loss": 1.5171,
+      "step": 1364
+    },
+    {
+      "epoch": 0.6988552791456162,
+      "grad_norm": 0.03240213543176651,
+      "learning_rate": 0.00019161084574320696,
+      "loss": 1.4644,
+      "step": 1366
+    },
+    {
+      "epoch": 0.6998784933171325,
+      "grad_norm": 0.0337255634367466,
+      "learning_rate": 0.0001915768853929869,
+      "loss": 1.4739,
+      "step": 1368
+    },
+    {
+      "epoch": 0.7009017074886488,
+      "grad_norm": 0.033216070383787155,
+      "learning_rate": 0.00019154285946559792,
+      "loss": 1.4691,
+      "step": 1370
+    },
+    {
+      "epoch": 0.701924921660165,
+      "grad_norm": 0.03151748329401016,
+      "learning_rate": 0.0001915087679854056,
+      "loss": 1.4882,
+      "step": 1372
+    },
+    {
+      "epoch": 0.7029481358316813,
+      "grad_norm": 0.03065643645823002,
+      "learning_rate": 0.00019147461097682246,
+      "loss": 1.4608,
+      "step": 1374
+    },
+    {
+      "epoch": 0.7039713500031975,
+      "grad_norm": 0.0341670848429203,
+      "learning_rate": 0.0001914403884643079,
+      "loss": 1.4714,
+      "step": 1376
+    },
+    {
+      "epoch": 0.7049945641747138,
+      "grad_norm": 0.035825930535793304,
+      "learning_rate": 0.00019140610047236833,
+      "loss": 1.4752,
+      "step": 1378
+    },
+    {
+      "epoch": 0.7060177783462301,
+      "grad_norm": 0.042743559926748276,
+      "learning_rate": 0.00019137174702555697,
+      "loss": 1.5077,
+      "step": 1380
+    },
+    {
+      "epoch": 0.7070409925177463,
+      "grad_norm": 0.03980020061135292,
+      "learning_rate": 0.00019133732814847397,
+      "loss": 1.4813,
+      "step": 1382
+    },
+    {
+      "epoch": 0.7080642066892626,
+      "grad_norm": 0.03854946047067642,
+      "learning_rate": 0.00019130284386576624,
+      "loss": 1.4623,
+      "step": 1384
+    },
+    {
+      "epoch": 0.7090874208607789,
+      "grad_norm": 0.037254948168992996,
+      "learning_rate": 0.00019126829420212764,
+      "loss": 1.5247,
+      "step": 1386
+    },
+    {
+      "epoch": 0.7101106350322952,
+      "grad_norm": 0.047802574932575226,
+      "learning_rate": 0.00019123367918229874,
+      "loss": 1.4989,
+      "step": 1388
+    },
+    {
+      "epoch": 0.7111338492038115,
+      "grad_norm": 0.039889827370643616,
+      "learning_rate": 0.000191198998831067,
+      "loss": 1.4727,
+      "step": 1390
+    },
+    {
+      "epoch": 0.7121570633753278,
+      "grad_norm": 0.03746683895587921,
+      "learning_rate": 0.0001911642531732666,
+      "loss": 1.4929,
+      "step": 1392
+    },
+    {
+      "epoch": 0.713180277546844,
+      "grad_norm": 0.04323015734553337,
+      "learning_rate": 0.00019112944223377855,
+      "loss": 1.4989,
+      "step": 1394
+    },
+    {
+      "epoch": 0.7142034917183603,
+      "grad_norm": 0.04086681455373764,
+      "learning_rate": 0.0001910945660375305,
+      "loss": 1.4884,
+      "step": 1396
+    },
+    {
+      "epoch": 0.7152267058898766,
+      "grad_norm": 0.03528650477528572,
+      "learning_rate": 0.00019105962460949698,
+      "loss": 1.4932,
+      "step": 1398
+    },
+    {
+      "epoch": 0.7162499200613929,
+      "grad_norm": 0.041061852127313614,
+      "learning_rate": 0.00019102461797469912,
+      "loss": 1.5063,
+      "step": 1400
+    },
+    {
+      "epoch": 0.7172731342329092,
+      "grad_norm": 0.033481474965810776,
+      "learning_rate": 0.00019098954615820476,
+      "loss": 1.4825,
+      "step": 1402
+    },
+    {
+      "epoch": 0.7182963484044254,
+      "grad_norm": 0.03925000876188278,
+      "learning_rate": 0.00019095440918512842,
+      "loss": 1.513,
+      "step": 1404
+    },
+    {
+      "epoch": 0.7193195625759417,
+      "grad_norm": 0.03856325149536133,
+      "learning_rate": 0.0001909192070806313,
+      "loss": 1.4907,
+      "step": 1406
+    },
+    {
+      "epoch": 0.720342776747458,
+      "grad_norm": 0.03494630753993988,
+      "learning_rate": 0.00019088393986992124,
+      "loss": 1.4604,
+      "step": 1408
+    },
+    {
+      "epoch": 0.7213659909189742,
+      "grad_norm": 0.03931909799575806,
+      "learning_rate": 0.00019084860757825268,
+      "loss": 1.4905,
+      "step": 1410
+    },
+    {
+      "epoch": 0.7223892050904905,
+      "grad_norm": 0.03644140437245369,
+      "learning_rate": 0.00019081321023092668,
+      "loss": 1.49,
+      "step": 1412
+    },
+    {
+      "epoch": 0.7234124192620068,
+      "grad_norm": 0.03480161353945732,
+      "learning_rate": 0.00019077774785329087,
+      "loss": 1.5301,
+      "step": 1414
+    },
+    {
+      "epoch": 0.724435633433523,
+      "grad_norm": 0.03516329079866409,
+      "learning_rate": 0.00019074222047073947,
+      "loss": 1.4801,
+      "step": 1416
+    },
+    {
+      "epoch": 0.7254588476050393,
+      "grad_norm": 0.03371971845626831,
+      "learning_rate": 0.00019070662810871322,
+      "loss": 1.4724,
+      "step": 1418
+    },
+    {
+      "epoch": 0.7264820617765556,
+      "grad_norm": 0.034337956458330154,
+      "learning_rate": 0.00019067097079269942,
+      "loss": 1.4726,
+      "step": 1420
+    },
+    {
+      "epoch": 0.7275052759480719,
+      "grad_norm": 0.0360429473221302,
+      "learning_rate": 0.00019063524854823186,
+      "loss": 1.4856,
+      "step": 1422
+    },
+    {
+      "epoch": 0.7285284901195882,
+      "grad_norm": 0.03850055858492851,
+      "learning_rate": 0.0001905994614008908,
+      "loss": 1.5022,
+      "step": 1424
+    },
+    {
+      "epoch": 0.7295517042911044,
+      "grad_norm": 0.03869333118200302,
+      "learning_rate": 0.0001905636093763031,
+      "loss": 1.4949,
+      "step": 1426
+    },
+    {
+      "epoch": 0.7305749184626207,
+      "grad_norm": 0.03506360575556755,
+      "learning_rate": 0.0001905276925001419,
+      "loss": 1.4617,
+      "step": 1428
+    },
+    {
+      "epoch": 0.731598132634137,
+      "grad_norm": 0.033819831907749176,
+      "learning_rate": 0.00019049171079812692,
+      "loss": 1.4698,
+      "step": 1430
+    },
+    {
+      "epoch": 0.7326213468056533,
+      "grad_norm": 0.03606401011347771,
+      "learning_rate": 0.00019045566429602424,
+      "loss": 1.5038,
+      "step": 1432
+    },
+    {
+      "epoch": 0.7336445609771696,
+      "grad_norm": 0.04196172207593918,
+      "learning_rate": 0.00019041955301964632,
+      "loss": 1.5142,
+      "step": 1434
+    },
+    {
+      "epoch": 0.7346677751486859,
+      "grad_norm": 0.03859662637114525,
+      "learning_rate": 0.00019038337699485208,
+      "loss": 1.5072,
+      "step": 1436
+    },
+    {
+      "epoch": 0.735690989320202,
+      "grad_norm": 0.036224085837602615,
+      "learning_rate": 0.00019034713624754672,
+      "loss": 1.5033,
+      "step": 1438
+    },
+    {
+      "epoch": 0.7367142034917183,
+      "grad_norm": 0.04655170813202858,
+      "learning_rate": 0.00019031083080368183,
+      "loss": 1.5255,
+      "step": 1440
+    },
+    {
+      "epoch": 0.7377374176632346,
+      "grad_norm": 0.040406614542007446,
+      "learning_rate": 0.0001902744606892554,
+      "loss": 1.5199,
+      "step": 1442
+    },
+    {
+      "epoch": 0.7387606318347509,
+      "grad_norm": 0.03488042950630188,
+      "learning_rate": 0.00019023802593031154,
+      "loss": 1.5127,
+      "step": 1444
+    },
+    {
+      "epoch": 0.7397838460062672,
+      "grad_norm": 0.031517501920461655,
+      "learning_rate": 0.00019020152655294085,
+      "loss": 1.4726,
+      "step": 1446
+    },
+    {
+      "epoch": 0.7408070601777834,
+      "grad_norm": 0.0331415981054306,
+      "learning_rate": 0.0001901649625832801,
+      "loss": 1.473,
+      "step": 1448
+    },
+    {
+      "epoch": 0.7418302743492997,
+      "grad_norm": 0.03110121190547943,
+      "learning_rate": 0.00019012833404751235,
+      "loss": 1.4693,
+      "step": 1450
+    },
+    {
+      "epoch": 0.742853488520816,
+      "grad_norm": 0.03500855341553688,
+      "learning_rate": 0.00019009164097186684,
+      "loss": 1.4962,
+      "step": 1452
+    },
+    {
+      "epoch": 0.7438767026923323,
+      "grad_norm": 0.03449893742799759,
+      "learning_rate": 0.0001900548833826191,
+      "loss": 1.4938,
+      "step": 1454
+    },
+    {
+      "epoch": 0.7448999168638486,
+      "grad_norm": 0.03199852257966995,
+      "learning_rate": 0.0001900180613060908,
+      "loss": 1.4905,
+      "step": 1456
+    },
+    {
+      "epoch": 0.7459231310353649,
+      "grad_norm": 0.03547672927379608,
+      "learning_rate": 0.00018998117476864984,
+      "loss": 1.4495,
+      "step": 1458
+    },
+    {
+      "epoch": 0.7469463452068811,
+      "grad_norm": 0.03338061273097992,
+      "learning_rate": 0.00018994422379671016,
+      "loss": 1.4895,
+      "step": 1460
+    },
+    {
+      "epoch": 0.7479695593783974,
+      "grad_norm": 0.036238085478544235,
+      "learning_rate": 0.00018990720841673207,
+      "loss": 1.5382,
+      "step": 1462
+    },
+    {
+      "epoch": 0.7489927735499137,
+      "grad_norm": 0.03941986709833145,
+      "learning_rate": 0.0001898701286552218,
+      "loss": 1.4917,
+      "step": 1464
+    },
+    {
+      "epoch": 0.75001598772143,
+      "grad_norm": 0.03612781688570976,
+      "learning_rate": 0.0001898329845387317,
+      "loss": 1.4856,
+      "step": 1466
+    },
+    {
+      "epoch": 0.7510392018929463,
+      "grad_norm": 0.035338182002305984,
+      "learning_rate": 0.00018979577609386033,
+      "loss": 1.4787,
+      "step": 1468
+    },
+    {
+      "epoch": 0.7520624160644624,
+      "grad_norm": 0.035387344658374786,
+      "learning_rate": 0.0001897585033472522,
+      "loss": 1.489,
+      "step": 1470
+    },
+    {
+      "epoch": 0.7530856302359787,
+      "grad_norm": 0.033865489065647125,
+      "learning_rate": 0.00018972116632559786,
+      "loss": 1.4958,
+      "step": 1472
+    },
+    {
+      "epoch": 0.754108844407495,
+      "grad_norm": 0.03240435943007469,
+      "learning_rate": 0.000189683765055634,
+      "loss": 1.48,
+      "step": 1474
+    },
+    {
+      "epoch": 0.7551320585790113,
+      "grad_norm": 0.0325872041285038,
+      "learning_rate": 0.0001896462995641432,
+      "loss": 1.4685,
+      "step": 1476
+    },
+    {
+      "epoch": 0.7561552727505276,
+      "grad_norm": 0.030261578038334846,
+      "learning_rate": 0.00018960876987795413,
+      "loss": 1.4985,
+      "step": 1478
+    },
+    {
+      "epoch": 0.7571784869220439,
+      "grad_norm": 0.034684158861637115,
+      "learning_rate": 0.0001895711760239413,
+      "loss": 1.4869,
+      "step": 1480
+    },
+    {
+      "epoch": 0.7582017010935601,
+      "grad_norm": 0.03360000252723694,
+      "learning_rate": 0.00018953351802902525,
+      "loss": 1.5089,
+      "step": 1482
+    },
+    {
+      "epoch": 0.7592249152650764,
+      "grad_norm": 0.03356654942035675,
+      "learning_rate": 0.0001894957959201725,
+      "loss": 1.5119,
+      "step": 1484
+    },
+    {
+      "epoch": 0.7602481294365927,
+      "grad_norm": 0.035596925765275955,
+      "learning_rate": 0.00018945800972439538,
+      "loss": 1.5242,
+      "step": 1486
+    },
+    {
+      "epoch": 0.761271343608109,
+      "grad_norm": 0.03309349715709686,
+      "learning_rate": 0.00018942015946875215,
+      "loss": 1.519,
+      "step": 1488
+    },
+    {
+      "epoch": 0.7622945577796253,
+      "grad_norm": 0.03727027401328087,
+      "learning_rate": 0.00018938224518034698,
+      "loss": 1.4651,
+      "step": 1490
+    },
+    {
+      "epoch": 0.7633177719511415,
+      "grad_norm": 0.03802427276968956,
+      "learning_rate": 0.00018934426688632986,
+      "loss": 1.4584,
+      "step": 1492
+    },
+    {
+      "epoch": 0.7643409861226578,
+      "grad_norm": 0.03257981687784195,
+      "learning_rate": 0.00018930622461389655,
+      "loss": 1.4622,
+      "step": 1494
+    },
+    {
+      "epoch": 0.7653642002941741,
+      "grad_norm": 0.03339976444840431,
+      "learning_rate": 0.00018926811839028876,
+      "loss": 1.4486,
+      "step": 1496
+    },
+    {
+      "epoch": 0.7663874144656904,
+      "grad_norm": 0.03176839277148247,
+      "learning_rate": 0.00018922994824279395,
+      "loss": 1.478,
+      "step": 1498
+    },
+    {
+      "epoch": 0.7674106286372067,
+      "grad_norm": 0.03458357974886894,
+      "learning_rate": 0.00018919171419874524,
+      "loss": 1.5167,
+      "step": 1500
+    },
+    {
+      "epoch": 0.768433842808723,
+      "grad_norm": 0.037736013531684875,
+      "learning_rate": 0.00018915341628552166,
+      "loss": 1.5323,
+      "step": 1502
+    },
+    {
+      "epoch": 0.7694570569802391,
+      "grad_norm": 0.03360259160399437,
+      "learning_rate": 0.00018911505453054786,
+      "loss": 1.469,
+      "step": 1504
+    },
+    {
+      "epoch": 0.7704802711517554,
+      "grad_norm": 0.03466862440109253,
+      "learning_rate": 0.00018907662896129433,
+      "loss": 1.5173,
+      "step": 1506
+    },
+    {
+      "epoch": 0.7715034853232717,
+      "grad_norm": 0.036147862672805786,
+      "learning_rate": 0.00018903813960527714,
+      "loss": 1.4801,
+      "step": 1508
+    },
+    {
+      "epoch": 0.772526699494788,
+      "grad_norm": 0.03919236734509468,
+      "learning_rate": 0.0001889995864900581,
+      "loss": 1.479,
+      "step": 1510
+    },
+    {
+      "epoch": 0.7735499136663043,
+      "grad_norm": 0.03543972223997116,
+      "learning_rate": 0.0001889609696432446,
+      "loss": 1.4771,
+      "step": 1512
+    },
+    {
+      "epoch": 0.7745731278378205,
+      "grad_norm": 0.04238108918070793,
+      "learning_rate": 0.00018892228909248978,
+      "loss": 1.4936,
+      "step": 1514
+    },
+    {
+      "epoch": 0.7755963420093368,
+      "grad_norm": 0.035696953535079956,
+      "learning_rate": 0.00018888354486549237,
+      "loss": 1.49,
+      "step": 1516
+    },
+    {
+      "epoch": 0.7766195561808531,
+      "grad_norm": 0.04000556096434593,
+      "learning_rate": 0.00018884473698999661,
+      "loss": 1.5206,
+      "step": 1518
+    },
+    {
+      "epoch": 0.7776427703523694,
+      "grad_norm": 0.06562638282775879,
+      "learning_rate": 0.0001888058654937924,
+      "loss": 1.4672,
+      "step": 1520
+    },
+    {
+      "epoch": 0.7786659845238857,
+      "grad_norm": 0.03467231243848801,
+      "learning_rate": 0.00018876693040471517,
+      "loss": 1.5033,
+      "step": 1522
+    },
+    {
+      "epoch": 0.779689198695402,
+      "grad_norm": 0.03708554431796074,
+      "learning_rate": 0.00018872793175064593,
+      "loss": 1.4606,
+      "step": 1524
+    },
+    {
+      "epoch": 0.7807124128669182,
+      "grad_norm": 0.039738163352012634,
+      "learning_rate": 0.00018868886955951115,
+      "loss": 1.4506,
+      "step": 1526
+    },
+    {
+      "epoch": 0.7817356270384345,
+      "grad_norm": 0.036794066429138184,
+      "learning_rate": 0.00018864974385928283,
+      "loss": 1.516,
+      "step": 1528
+    },
+    {
+      "epoch": 0.7827588412099508,
+      "grad_norm": 0.037196848541498184,
+      "learning_rate": 0.0001886105546779784,
+      "loss": 1.5051,
+      "step": 1530
+    },
+    {
+      "epoch": 0.7837820553814671,
+      "grad_norm": 0.03867275267839432,
+      "learning_rate": 0.00018857130204366084,
+      "loss": 1.5015,
+      "step": 1532
+    },
+    {
+      "epoch": 0.7848052695529834,
+      "grad_norm": 0.03784462809562683,
+      "learning_rate": 0.00018853198598443852,
+      "loss": 1.4713,
+      "step": 1534
+    },
+    {
+      "epoch": 0.7858284837244995,
+      "grad_norm": 0.04151632636785507,
+      "learning_rate": 0.00018849260652846519,
+      "loss": 1.4671,
+      "step": 1536
+    },
+    {
+      "epoch": 0.7868516978960158,
+      "grad_norm": 0.04655742272734642,
+      "learning_rate": 0.00018845316370394005,
+      "loss": 1.4751,
+      "step": 1538
+    },
+    {
+      "epoch": 0.7878749120675321,
+      "grad_norm": 0.037444863468408585,
+      "learning_rate": 0.00018841365753910765,
+      "loss": 1.5155,
+      "step": 1540
+    },
+    {
+      "epoch": 0.7888981262390484,
+      "grad_norm": 0.04184754192829132,
+      "learning_rate": 0.0001883740880622579,
+      "loss": 1.4717,
+      "step": 1542
+    },
+    {
+      "epoch": 0.7899213404105647,
+      "grad_norm": 0.042664580047130585,
+      "learning_rate": 0.00018833445530172605,
+      "loss": 1.5221,
+      "step": 1544
+    },
+    {
+      "epoch": 0.790944554582081,
+      "grad_norm": 0.05149197578430176,
+      "learning_rate": 0.00018829475928589271,
+      "loss": 1.4861,
+      "step": 1546
+    },
+    {
+      "epoch": 0.7919677687535972,
+      "grad_norm": 0.04174793139100075,
+      "learning_rate": 0.0001882550000431837,
+      "loss": 1.4887,
+      "step": 1548
+    },
+    {
+      "epoch": 0.7929909829251135,
+      "grad_norm": 0.03560099005699158,
+      "learning_rate": 0.0001882151776020702,
+      "loss": 1.5099,
+      "step": 1550
+    },
+    {
+      "epoch": 0.7940141970966298,
+      "grad_norm": 0.049874622374773026,
+      "learning_rate": 0.0001881752919910686,
+      "loss": 1.4835,
+      "step": 1552
+    },
+    {
+      "epoch": 0.7950374112681461,
+      "grad_norm": 0.04354040324687958,
+      "learning_rate": 0.0001881353432387405,
+      "loss": 1.4778,
+      "step": 1554
+    },
+    {
+      "epoch": 0.7960606254396624,
+      "grad_norm": 0.04164579510688782,
+      "learning_rate": 0.0001880953313736928,
+      "loss": 1.4968,
+      "step": 1556
+    },
+    {
+      "epoch": 0.7970838396111786,
+      "grad_norm": 0.034870538860559464,
+      "learning_rate": 0.0001880552564245775,
+      "loss": 1.4628,
+      "step": 1558
+    },
+    {
+      "epoch": 0.7981070537826949,
+      "grad_norm": 0.034135766327381134,
+      "learning_rate": 0.00018801511842009183,
+      "loss": 1.4836,
+      "step": 1560
+    },
+    {
+      "epoch": 0.7991302679542112,
+      "grad_norm": 0.03587375581264496,
+      "learning_rate": 0.00018797491738897816,
+      "loss": 1.4636,
+      "step": 1562
+    },
+    {
+      "epoch": 0.8001534821257275,
+      "grad_norm": 0.03559894114732742,
+      "learning_rate": 0.000187934653360024,
+      "loss": 1.4874,
+      "step": 1564
+    },
+    {
+      "epoch": 0.8011766962972438,
+      "grad_norm": 0.05410682037472725,
+      "learning_rate": 0.00018789432636206197,
+      "loss": 1.4701,
+      "step": 1566
+    },
+    {
+      "epoch": 0.80219991046876,
+      "grad_norm": 0.046682942658662796,
+      "learning_rate": 0.00018785393642396976,
+      "loss": 1.4993,
+      "step": 1568
+    },
+    {
+      "epoch": 0.8032231246402762,
+      "grad_norm": 0.03647172451019287,
+      "learning_rate": 0.00018781348357467013,
+      "loss": 1.5053,
+      "step": 1570
+    },
+    {
+      "epoch": 0.8042463388117925,
+      "grad_norm": 0.035208649933338165,
+      "learning_rate": 0.00018777296784313095,
+      "loss": 1.5099,
+      "step": 1572
+    },
+    {
+      "epoch": 0.8052695529833088,
+      "grad_norm": 0.03541814163327217,
+      "learning_rate": 0.00018773238925836507,
+      "loss": 1.5027,
+      "step": 1574
+    },
+    {
+      "epoch": 0.8062927671548251,
+      "grad_norm": 0.04706384614109993,
+      "learning_rate": 0.0001876917478494303,
+      "loss": 1.5111,
+      "step": 1576
+    },
+    {
+      "epoch": 0.8073159813263414,
+      "grad_norm": 0.042128194123506546,
+      "learning_rate": 0.00018765104364542955,
+      "loss": 1.4832,
+      "step": 1578
+    },
+    {
+      "epoch": 0.8083391954978576,
+      "grad_norm": 0.033496059477329254,
+      "learning_rate": 0.00018761027667551063,
+      "loss": 1.49,
+      "step": 1580
+    },
+    {
+      "epoch": 0.8093624096693739,
+      "grad_norm": 0.036655962467193604,
+      "learning_rate": 0.0001875694469688663,
+      "loss": 1.4835,
+      "step": 1582
+    },
+    {
+      "epoch": 0.8103856238408902,
+      "grad_norm": 0.036248572170734406,
+      "learning_rate": 0.0001875285545547342,
+      "loss": 1.5025,
+      "step": 1584
+    },
+    {
+      "epoch": 0.8114088380124065,
+      "grad_norm": 0.040282152593135834,
+      "learning_rate": 0.000187487599462397,
+      "loss": 1.4776,
+      "step": 1586
+    },
+    {
+      "epoch": 0.8124320521839228,
+      "grad_norm": 0.03675289452075958,
+      "learning_rate": 0.00018744658172118215,
+      "loss": 1.5036,
+      "step": 1588
+    },
+    {
+      "epoch": 0.8134552663554391,
+      "grad_norm": 0.03431113436818123,
+      "learning_rate": 0.00018740550136046196,
+      "loss": 1.4701,
+      "step": 1590
+    },
+    {
+      "epoch": 0.8144784805269553,
+      "grad_norm": 0.03184695914387703,
+      "learning_rate": 0.00018736435840965366,
+      "loss": 1.473,
+      "step": 1592
+    },
+    {
+      "epoch": 0.8155016946984716,
+      "grad_norm": 0.031748853623867035,
+      "learning_rate": 0.00018732315289821921,
+      "loss": 1.5039,
+      "step": 1594
+    },
+    {
+      "epoch": 0.8165249088699879,
+      "grad_norm": 0.034614481031894684,
+      "learning_rate": 0.00018728188485566544,
+      "loss": 1.4664,
+      "step": 1596
+    },
+    {
+      "epoch": 0.8175481230415041,
+      "grad_norm": 0.0308011993765831,
+      "learning_rate": 0.0001872405543115439,
+      "loss": 1.4719,
+      "step": 1598
+    },
+    {
+      "epoch": 0.8185713372130204,
+      "grad_norm": 0.031010661274194717,
+      "learning_rate": 0.00018719916129545093,
+      "loss": 1.4841,
+      "step": 1600
+    },
+    {
+      "epoch": 0.8195945513845366,
+      "grad_norm": 0.03110615722835064,
+      "learning_rate": 0.0001871577058370276,
+      "loss": 1.4878,
+      "step": 1602
+    },
+    {
+      "epoch": 0.8206177655560529,
+      "grad_norm": 0.030799025669693947,
+      "learning_rate": 0.00018711618796595972,
+      "loss": 1.4391,
+      "step": 1604
+    },
+    {
+      "epoch": 0.8216409797275692,
+      "grad_norm": 0.029373083263635635,
+      "learning_rate": 0.00018707460771197774,
+      "loss": 1.5265,
+      "step": 1606
+    },
+    {
+      "epoch": 0.8226641938990855,
+      "grad_norm": 0.03043638914823532,
+      "learning_rate": 0.0001870329651048568,
+      "loss": 1.5027,
+      "step": 1608
+    },
+    {
+      "epoch": 0.8236874080706018,
+      "grad_norm": 0.0337023101747036,
+      "learning_rate": 0.00018699126017441672,
+      "loss": 1.4793,
+      "step": 1610
+    },
+    {
+      "epoch": 0.8247106222421181,
+      "grad_norm": 0.03439760580658913,
+      "learning_rate": 0.0001869494929505219,
+      "loss": 1.4764,
+      "step": 1612
+    },
+    {
+      "epoch": 0.8257338364136343,
+      "grad_norm": 0.03283720836043358,
+      "learning_rate": 0.00018690766346308145,
+      "loss": 1.4829,
+      "step": 1614
+    },
+    {
+      "epoch": 0.8267570505851506,
+      "grad_norm": 0.030338643118739128,
+      "learning_rate": 0.00018686577174204885,
+      "loss": 1.4587,
+      "step": 1616
+    },
+    {
+      "epoch": 0.8277802647566669,
+      "grad_norm": 0.03556302934885025,
+      "learning_rate": 0.00018682381781742245,
+      "loss": 1.4924,
+      "step": 1618
+    },
+    {
+      "epoch": 0.8288034789281832,
+      "grad_norm": 0.032113250344991684,
+      "learning_rate": 0.00018678180171924485,
+      "loss": 1.4875,
+      "step": 1620
+    },
+    {
+      "epoch": 0.8298266930996995,
+      "grad_norm": 0.1559678167104721,
+      "learning_rate": 0.00018673972347760338,
+      "loss": 1.5009,
+      "step": 1622
+    },
+    {
+      "epoch": 0.8308499072712157,
+      "grad_norm": 0.06492070108652115,
+      "learning_rate": 0.00018669758312262976,
+      "loss": 1.4632,
+      "step": 1624
+    },
+    {
+      "epoch": 0.831873121442732,
+      "grad_norm": 0.05882725864648819,
+      "learning_rate": 0.00018665538068450023,
+      "loss": 1.472,
+      "step": 1626
+    },
+    {
+      "epoch": 0.8328963356142483,
+      "grad_norm": 0.03860605135560036,
+      "learning_rate": 0.00018661311619343546,
+      "loss": 1.4662,
+      "step": 1628
+    },
+    {
+      "epoch": 0.8339195497857645,
+      "grad_norm": 0.04597290977835655,
+      "learning_rate": 0.00018657078967970062,
+      "loss": 1.4706,
+      "step": 1630
+    },
+    {
+      "epoch": 0.8349427639572808,
+      "grad_norm": 0.04754943400621414,
+      "learning_rate": 0.00018652840117360517,
+      "loss": 1.475,
+      "step": 1632
+    },
+    {
+      "epoch": 0.8359659781287971,
+      "grad_norm": 0.03354303911328316,
+      "learning_rate": 0.0001864859507055031,
+      "loss": 1.5133,
+      "step": 1634
+    },
+    {
+      "epoch": 0.8369891923003133,
+      "grad_norm": 0.042201388627290726,
+      "learning_rate": 0.0001864434383057927,
+      "loss": 1.5125,
+      "step": 1636
+    },
+    {
+      "epoch": 0.8380124064718296,
+      "grad_norm": 0.0343627855181694,
+      "learning_rate": 0.00018640086400491658,
+      "loss": 1.4811,
+      "step": 1638
+    },
+    {
+      "epoch": 0.8390356206433459,
+      "grad_norm": 0.03558426350355148,
+      "learning_rate": 0.00018635822783336174,
+      "loss": 1.5171,
+      "step": 1640
+    },
+    {
+      "epoch": 0.8400588348148622,
+      "grad_norm": 0.03267373517155647,
+      "learning_rate": 0.00018631552982165944,
+      "loss": 1.4758,
+      "step": 1642
+    },
+    {
+      "epoch": 0.8410820489863785,
+      "grad_norm": 0.03015967085957527,
+      "learning_rate": 0.00018627277000038533,
+      "loss": 1.4501,
+      "step": 1644
+    },
+    {
+      "epoch": 0.8421052631578947,
+      "grad_norm": 0.03152506798505783,
+      "learning_rate": 0.0001862299484001591,
+      "loss": 1.4625,
+      "step": 1646
+    },
+    {
+      "epoch": 0.843128477329411,
+      "grad_norm": 0.03820090368390083,
+      "learning_rate": 0.0001861870650516449,
+      "loss": 1.5065,
+      "step": 1648
+    },
+    {
+      "epoch": 0.8441516915009273,
+      "grad_norm": 0.030817920342087746,
+      "learning_rate": 0.000186144119985551,
+      "loss": 1.4814,
+      "step": 1650
+    },
+    {
+      "epoch": 0.8451749056724436,
+      "grad_norm": 0.03546105697751045,
+      "learning_rate": 0.00018610111323262986,
+      "loss": 1.4554,
+      "step": 1652
+    },
+    {
+      "epoch": 0.8461981198439599,
+      "grad_norm": 0.033546384423971176,
+      "learning_rate": 0.00018605804482367807,
+      "loss": 1.4379,
+      "step": 1654
+    },
+    {
+      "epoch": 0.8472213340154762,
+      "grad_norm": 0.035938508808612823,
+      "learning_rate": 0.00018601491478953657,
+      "loss": 1.4931,
+      "step": 1656
+    },
+    {
+      "epoch": 0.8482445481869924,
+      "grad_norm": 0.03531987965106964,
+      "learning_rate": 0.00018597172316109015,
+      "loss": 1.4483,
+      "step": 1658
+    },
+    {
+      "epoch": 0.8492677623585086,
+      "grad_norm": 0.03041314333677292,
+      "learning_rate": 0.00018592846996926793,
+      "loss": 1.4541,
+      "step": 1660
+    },
+    {
+      "epoch": 0.850290976530025,
+      "grad_norm": 0.03549192473292351,
+      "learning_rate": 0.00018588515524504295,
+      "loss": 1.4615,
+      "step": 1662
+    },
+    {
+      "epoch": 0.8513141907015412,
+      "grad_norm": 0.03376925736665726,
+      "learning_rate": 0.0001858417790194325,
+      "loss": 1.4722,
+      "step": 1664
+    },
+    {
+      "epoch": 0.8523374048730575,
+      "grad_norm": 0.03313841298222542,
+      "learning_rate": 0.00018579834132349772,
+      "loss": 1.4791,
+      "step": 1666
+    },
+    {
+      "epoch": 0.8533606190445737,
+      "grad_norm": 0.033985435962677,
+      "learning_rate": 0.00018575484218834388,
+      "loss": 1.4443,
+      "step": 1668
+    },
+    {
+      "epoch": 0.85438383321609,
+      "grad_norm": 0.032460469752550125,
+      "learning_rate": 0.00018571128164512023,
+      "loss": 1.4988,
+      "step": 1670
+    },
+    {
+      "epoch": 0.8554070473876063,
+      "grad_norm": 0.03272455185651779,
+      "learning_rate": 0.00018566765972501993,
+      "loss": 1.4659,
+      "step": 1672
+    },
+    {
+      "epoch": 0.8564302615591226,
+      "grad_norm": 0.031708747148513794,
+      "learning_rate": 0.0001856239764592802,
+      "loss": 1.5007,
+      "step": 1674
+    },
+    {
+      "epoch": 0.8574534757306389,
+      "grad_norm": 0.034189220517873764,
+      "learning_rate": 0.0001855802318791821,
+      "loss": 1.4423,
+      "step": 1676
+    },
+    {
+      "epoch": 0.8584766899021552,
+      "grad_norm": 0.03221631050109863,
+      "learning_rate": 0.00018553642601605068,
+      "loss": 1.4701,
+      "step": 1678
+    },
+    {
+      "epoch": 0.8594999040736714,
+      "grad_norm": 0.029117561876773834,
+      "learning_rate": 0.00018549255890125475,
+      "loss": 1.4769,
+      "step": 1680
+    },
+    {
+      "epoch": 0.8605231182451877,
+      "grad_norm": 0.029596133157610893,
+      "learning_rate": 0.00018544863056620708,
+      "loss": 1.4635,
+      "step": 1682
+    },
+    {
+      "epoch": 0.861546332416704,
+      "grad_norm": 0.030032752081751823,
+      "learning_rate": 0.00018540464104236425,
+      "loss": 1.4991,
+      "step": 1684
+    },
+    {
+      "epoch": 0.8625695465882203,
+      "grad_norm": 0.03227202966809273,
+      "learning_rate": 0.00018536059036122667,
+      "loss": 1.4608,
+      "step": 1686
+    },
+    {
+      "epoch": 0.8635927607597366,
+      "grad_norm": 0.03331397473812103,
+      "learning_rate": 0.0001853164785543385,
+      "loss": 1.4958,
+      "step": 1688
+    },
+    {
+      "epoch": 0.8646159749312528,
+      "grad_norm": 0.033648762851953506,
+      "learning_rate": 0.00018527230565328778,
+      "loss": 1.4949,
+      "step": 1690
+    },
+    {
+      "epoch": 0.865639189102769,
+      "grad_norm": 0.03504339978098869,
+      "learning_rate": 0.00018522807168970616,
+      "loss": 1.439,
+      "step": 1692
+    },
+    {
+      "epoch": 0.8666624032742853,
+      "grad_norm": 0.034829430282115936,
+      "learning_rate": 0.0001851837766952691,
+      "loss": 1.5001,
+      "step": 1694
+    },
+    {
+      "epoch": 0.8676856174458016,
+      "grad_norm": 0.03803844377398491,
+      "learning_rate": 0.0001851394207016957,
+      "loss": 1.4905,
+      "step": 1696
+    },
+    {
+      "epoch": 0.8687088316173179,
+      "grad_norm": 0.0394139364361763,
+      "learning_rate": 0.00018509500374074884,
+      "loss": 1.4537,
+      "step": 1698
+    },
+    {
+      "epoch": 0.8697320457888342,
+      "grad_norm": 0.039348065853118896,
+      "learning_rate": 0.000185050525844235,
+      "loss": 1.4865,
+      "step": 1700
+    },
+    {
+      "epoch": 0.8707552599603504,
+      "grad_norm": 0.03650161996483803,
+      "learning_rate": 0.00018500598704400428,
+      "loss": 1.4658,
+      "step": 1702
+    },
+    {
+      "epoch": 0.8717784741318667,
+      "grad_norm": 0.03312232345342636,
+      "learning_rate": 0.00018496138737195036,
+      "loss": 1.477,
+      "step": 1704
+    },
+    {
+      "epoch": 0.872801688303383,
+      "grad_norm": 0.031243184581398964,
+      "learning_rate": 0.00018491672686001066,
+      "loss": 1.4983,
+      "step": 1706
+    },
+    {
+      "epoch": 0.8738249024748993,
+      "grad_norm": 0.03666044771671295,
+      "learning_rate": 0.00018487200554016602,
+      "loss": 1.4606,
+      "step": 1708
+    },
+    {
+      "epoch": 0.8748481166464156,
+      "grad_norm": 0.035856928676366806,
+      "learning_rate": 0.00018482722344444086,
+      "loss": 1.4808,
+      "step": 1710
+    },
+    {
+      "epoch": 0.8758713308179318,
+      "grad_norm": 0.03538081422448158,
+      "learning_rate": 0.00018478238060490312,
+      "loss": 1.4734,
+      "step": 1712
+    },
+    {
+      "epoch": 0.8768945449894481,
+      "grad_norm": 0.02917349338531494,
+      "learning_rate": 0.00018473747705366426,
+      "loss": 1.4947,
+      "step": 1714
+    },
+    {
+      "epoch": 0.8779177591609644,
+      "grad_norm": 0.035214658826589584,
+      "learning_rate": 0.0001846925128228792,
+      "loss": 1.4773,
+      "step": 1716
+    },
+    {
+      "epoch": 0.8789409733324807,
+      "grad_norm": 0.03703998774290085,
+      "learning_rate": 0.00018464748794474634,
+      "loss": 1.4704,
+      "step": 1718
+    },
+    {
+      "epoch": 0.879964187503997,
+      "grad_norm": 0.03480003774166107,
+      "learning_rate": 0.0001846024024515075,
+      "loss": 1.4723,
+      "step": 1720
+    },
+    {
+      "epoch": 0.8809874016755133,
+      "grad_norm": 0.04090346395969391,
+      "learning_rate": 0.00018455725637544785,
+      "loss": 1.4525,
+      "step": 1722
+    },
+    {
+      "epoch": 0.8820106158470294,
+      "grad_norm": 0.042412955313920975,
+      "learning_rate": 0.00018451204974889596,
+      "loss": 1.4418,
+      "step": 1724
+    },
+    {
+      "epoch": 0.8830338300185457,
+      "grad_norm": 0.03738129511475563,
+      "learning_rate": 0.00018446678260422385,
+      "loss": 1.4747,
+      "step": 1726
+    },
+    {
+      "epoch": 0.884057044190062,
+      "grad_norm": 0.03728758171200752,
+      "learning_rate": 0.00018442145497384673,
+      "loss": 1.5007,
+      "step": 1728
+    },
+    {
+      "epoch": 0.8850802583615783,
+      "grad_norm": 0.038157109171152115,
+      "learning_rate": 0.0001843760668902233,
+      "loss": 1.4937,
+      "step": 1730
+    },
+    {
+      "epoch": 0.8861034725330946,
+      "grad_norm": 0.03238663077354431,
+      "learning_rate": 0.00018433061838585534,
+      "loss": 1.4631,
+      "step": 1732
+    },
+    {
+      "epoch": 0.8871266867046108,
+      "grad_norm": 0.03741516172885895,
+      "learning_rate": 0.0001842851094932881,
+      "loss": 1.4887,
+      "step": 1734
+    },
+    {
+      "epoch": 0.8881499008761271,
+      "grad_norm": 0.03934532031416893,
+      "learning_rate": 0.00018423954024510996,
+      "loss": 1.4208,
+      "step": 1736
+    },
+    {
+      "epoch": 0.8891731150476434,
+      "grad_norm": 0.03238905593752861,
+      "learning_rate": 0.00018419391067395248,
+      "loss": 1.4587,
+      "step": 1738
+    },
+    {
+      "epoch": 0.8901963292191597,
+      "grad_norm": 0.039086490869522095,
+      "learning_rate": 0.00018414822081249058,
+      "loss": 1.4545,
+      "step": 1740
+    },
+    {
+      "epoch": 0.891219543390676,
+      "grad_norm": 0.0370473712682724,
+      "learning_rate": 0.00018410247069344218,
+      "loss": 1.4473,
+      "step": 1742
+    },
+    {
+      "epoch": 0.8922427575621923,
+      "grad_norm": 0.034061599522829056,
+      "learning_rate": 0.00018405666034956844,
+      "loss": 1.4831,
+      "step": 1744
+    },
+    {
+      "epoch": 0.8932659717337085,
+      "grad_norm": 0.0363328754901886,
+      "learning_rate": 0.00018401078981367363,
+      "loss": 1.4729,
+      "step": 1746
+    },
+    {
+      "epoch": 0.8942891859052248,
+      "grad_norm": 0.035310424864292145,
+      "learning_rate": 0.00018396485911860512,
+      "loss": 1.518,
+      "step": 1748
+    },
+    {
+      "epoch": 0.8953124000767411,
+      "grad_norm": 0.03476149961352348,
+      "learning_rate": 0.00018391886829725334,
+      "loss": 1.4611,
+      "step": 1750
+    },
+    {
+      "epoch": 0.8963356142482574,
+      "grad_norm": 0.03310383856296539,
+      "learning_rate": 0.00018387281738255185,
+      "loss": 1.4746,
+      "step": 1752
+    },
+    {
+      "epoch": 0.8973588284197737,
+      "grad_norm": 0.0307275652885437,
+      "learning_rate": 0.00018382670640747714,
+      "loss": 1.4697,
+      "step": 1754
+    },
+    {
+      "epoch": 0.8983820425912898,
+      "grad_norm": 0.028024040162563324,
+      "learning_rate": 0.00018378053540504873,
+      "loss": 1.4608,
+      "step": 1756
+    },
+    {
+      "epoch": 0.8994052567628061,
+      "grad_norm": 0.029499476775527,
+      "learning_rate": 0.00018373430440832923,
+      "loss": 1.4614,
+      "step": 1758
+    },
+    {
+      "epoch": 0.9004284709343224,
+      "grad_norm": 0.033067066222429276,
+      "learning_rate": 0.0001836880134504241,
+      "loss": 1.479,
+      "step": 1760
+    },
+    {
+      "epoch": 0.9014516851058387,
+      "grad_norm": 0.03787175565958023,
+      "learning_rate": 0.00018364166256448173,
+      "loss": 1.4712,
+      "step": 1762
+    },
+    {
+      "epoch": 0.902474899277355,
+      "grad_norm": 0.02690064162015915,
+      "learning_rate": 0.0001835952517836935,
+      "loss": 1.4673,
+      "step": 1764
+    },
+    {
+      "epoch": 0.9034981134488713,
+      "grad_norm": 0.026671042665839195,
+      "learning_rate": 0.00018354878114129367,
+      "loss": 1.4561,
+      "step": 1766
+    },
+    {
+      "epoch": 0.9045213276203875,
+      "grad_norm": 0.03277120366692543,
+      "learning_rate": 0.00018350225067055925,
+      "loss": 1.4879,
+      "step": 1768
+    },
+    {
+      "epoch": 0.9055445417919038,
+      "grad_norm": 0.03682045266032219,
+      "learning_rate": 0.00018345566040481028,
+      "loss": 1.467,
+      "step": 1770
+    },
+    {
+      "epoch": 0.9065677559634201,
+      "grad_norm": 0.027602965012192726,
+      "learning_rate": 0.0001834090103774095,
+      "loss": 1.4514,
+      "step": 1772
+    },
+    {
+      "epoch": 0.9075909701349364,
+      "grad_norm": 0.03043595515191555,
+      "learning_rate": 0.00018336230062176244,
+      "loss": 1.4835,
+      "step": 1774
+    },
+    {
+      "epoch": 0.9086141843064527,
+      "grad_norm": 0.030672984197735786,
+      "learning_rate": 0.0001833155311713174,
+      "loss": 1.492,
+      "step": 1776
+    },
+    {
+      "epoch": 0.9096373984779689,
+      "grad_norm": 0.032694920897483826,
+      "learning_rate": 0.00018326870205956553,
+      "loss": 1.475,
+      "step": 1778
+    },
+    {
+      "epoch": 0.9106606126494852,
+      "grad_norm": 0.031511466950178146,
+      "learning_rate": 0.00018322181332004056,
+      "loss": 1.4457,
+      "step": 1780
+    },
+    {
+      "epoch": 0.9116838268210015,
+      "grad_norm": 0.03155050054192543,
+      "learning_rate": 0.00018317486498631899,
+      "loss": 1.5165,
+      "step": 1782
+    },
+    {
+      "epoch": 0.9127070409925178,
+      "grad_norm": 0.03132548928260803,
+      "learning_rate": 0.00018312785709202002,
+      "loss": 1.5171,
+      "step": 1784
+    },
+    {
+      "epoch": 0.913730255164034,
+      "grad_norm": 0.036277156323194504,
+      "learning_rate": 0.00018308078967080546,
+      "loss": 1.4726,
+      "step": 1786
+    },
+    {
+      "epoch": 0.9147534693355504,
+      "grad_norm": 0.029615385457873344,
+      "learning_rate": 0.00018303366275637976,
+      "loss": 1.448,
+      "step": 1788
+    },
+    {
+      "epoch": 0.9157766835070665,
+      "grad_norm": 0.029571905732154846,
+      "learning_rate": 0.00018298647638248996,
+      "loss": 1.4629,
+      "step": 1790
+    },
+    {
+      "epoch": 0.9167998976785828,
+      "grad_norm": 0.028433986008167267,
+      "learning_rate": 0.0001829392305829257,
+      "loss": 1.474,
+      "step": 1792
+    },
+    {
+      "epoch": 0.9178231118500991,
+      "grad_norm": 0.034186169505119324,
+      "learning_rate": 0.0001828919253915191,
+      "loss": 1.4828,
+      "step": 1794
+    },
+    {
+      "epoch": 0.9188463260216154,
+      "grad_norm": 0.03323967382311821,
+      "learning_rate": 0.00018284456084214496,
+      "loss": 1.4883,
+      "step": 1796
+    },
+    {
+      "epoch": 0.9198695401931317,
+      "grad_norm": 0.03627438098192215,
+      "learning_rate": 0.00018279713696872047,
+      "loss": 1.4505,
+      "step": 1798
+    },
+    {
+      "epoch": 0.9208927543646479,
+      "grad_norm": 0.037414826452732086,
+      "learning_rate": 0.0001827496538052053,
+      "loss": 1.5153,
+      "step": 1800
+    },
+    {
+      "epoch": 0.9219159685361642,
+      "grad_norm": 0.036538898944854736,
+      "learning_rate": 0.00018270211138560162,
+      "loss": 1.4565,
+      "step": 1802
+    },
+    {
+      "epoch": 0.9229391827076805,
+      "grad_norm": 0.034286949783563614,
+      "learning_rate": 0.00018265450974395403,
+      "loss": 1.4596,
+      "step": 1804
+    },
+    {
+      "epoch": 0.9239623968791968,
+      "grad_norm": 0.03332148864865303,
+      "learning_rate": 0.0001826068489143495,
+      "loss": 1.4452,
+      "step": 1806
+    },
+    {
+      "epoch": 0.9249856110507131,
+      "grad_norm": 0.030349107459187508,
+      "learning_rate": 0.00018255912893091743,
+      "loss": 1.4937,
+      "step": 1808
+    },
+    {
+      "epoch": 0.9260088252222294,
+      "grad_norm": 0.030373625457286835,
+      "learning_rate": 0.00018251134982782952,
+      "loss": 1.4774,
+      "step": 1810
+    },
+    {
+      "epoch": 0.9270320393937456,
+      "grad_norm": 0.03661259636282921,
+      "learning_rate": 0.00018246351163929991,
+      "loss": 1.4694,
+      "step": 1812
+    },
+    {
+      "epoch": 0.9280552535652619,
+      "grad_norm": 0.036550264805555344,
+      "learning_rate": 0.00018241561439958495,
+      "loss": 1.4944,
+      "step": 1814
+    },
+    {
+      "epoch": 0.9290784677367782,
+      "grad_norm": 0.03492378070950508,
+      "learning_rate": 0.0001823676581429833,
+      "loss": 1.445,
+      "step": 1816
+    },
+    {
+      "epoch": 0.9301016819082945,
+      "grad_norm": 0.03306609019637108,
+      "learning_rate": 0.0001823196429038359,
+      "loss": 1.4222,
+      "step": 1818
+    },
+    {
+      "epoch": 0.9311248960798107,
+      "grad_norm": 0.03200085088610649,
+      "learning_rate": 0.0001822715687165259,
+      "loss": 1.467,
+      "step": 1820
+    },
+    {
+      "epoch": 0.9321481102513269,
+      "grad_norm": 0.036335378885269165,
+      "learning_rate": 0.00018222343561547874,
+      "loss": 1.4693,
+      "step": 1822
+    },
+    {
+      "epoch": 0.9331713244228432,
+      "grad_norm": 0.039753127843141556,
+      "learning_rate": 0.00018217524363516193,
+      "loss": 1.4594,
+      "step": 1824
+    },
+    {
+      "epoch": 0.9341945385943595,
+      "grad_norm": 0.03748109191656113,
+      "learning_rate": 0.0001821269928100852,
+      "loss": 1.5014,
+      "step": 1826
+    },
+    {
+      "epoch": 0.9352177527658758,
+      "grad_norm": 0.04106932878494263,
+      "learning_rate": 0.00018207868317480046,
+      "loss": 1.4823,
+      "step": 1828
+    },
+    {
+      "epoch": 0.9362409669373921,
+      "grad_norm": 0.032248884439468384,
+      "learning_rate": 0.00018203031476390167,
+      "loss": 1.4697,
+      "step": 1830
+    },
+    {
+      "epoch": 0.9372641811089084,
+      "grad_norm": 0.047158315777778625,
+      "learning_rate": 0.00018198188761202487,
+      "loss": 1.5449,
+      "step": 1832
+    },
+    {
+      "epoch": 0.9382873952804246,
+      "grad_norm": 0.03881628066301346,
+      "learning_rate": 0.00018193340175384824,
+      "loss": 1.5129,
+      "step": 1834
+    },
+    {
+      "epoch": 0.9393106094519409,
+      "grad_norm": 0.038932789117097855,
+      "learning_rate": 0.00018188485722409197,
+      "loss": 1.4508,
+      "step": 1836
+    },
+    {
+      "epoch": 0.9403338236234572,
+      "grad_norm": 0.042171675711870193,
+      "learning_rate": 0.00018183625405751816,
+      "loss": 1.4976,
+      "step": 1838
+    },
+    {
+      "epoch": 0.9413570377949735,
+      "grad_norm": 0.03824607655405998,
+      "learning_rate": 0.00018178759228893108,
+      "loss": 1.4759,
+      "step": 1840
+    },
+    {
+      "epoch": 0.9423802519664898,
+      "grad_norm": 0.0380014143884182,
+      "learning_rate": 0.0001817388719531768,
+      "loss": 1.4765,
+      "step": 1842
+    },
+    {
+      "epoch": 0.943403466138006,
+      "grad_norm": 0.03372355177998543,
+      "learning_rate": 0.00018169009308514344,
+      "loss": 1.4724,
+      "step": 1844
+    },
+    {
+      "epoch": 0.9444266803095223,
+      "grad_norm": 0.03503812104463577,
+      "learning_rate": 0.00018164125571976098,
+      "loss": 1.4537,
+      "step": 1846
+    },
+    {
+      "epoch": 0.9454498944810386,
+      "grad_norm": 0.03842812776565552,
+      "learning_rate": 0.00018159235989200132,
+      "loss": 1.4747,
+      "step": 1848
+    },
+    {
+      "epoch": 0.9464731086525549,
+      "grad_norm": 0.03686497360467911,
+      "learning_rate": 0.0001815434056368782,
+      "loss": 1.4433,
+      "step": 1850
+    },
+    {
+      "epoch": 0.9474963228240711,
+      "grad_norm": 0.03216801956295967,
+      "learning_rate": 0.00018149439298944717,
+      "loss": 1.4628,
+      "step": 1852
+    },
+    {
+      "epoch": 0.9485195369955874,
+      "grad_norm": 0.04245101660490036,
+      "learning_rate": 0.0001814453219848057,
+      "loss": 1.5411,
+      "step": 1854
+    },
+    {
+      "epoch": 0.9495427511671036,
+      "grad_norm": 0.041708942502737045,
+      "learning_rate": 0.0001813961926580929,
+      "loss": 1.4828,
+      "step": 1856
+    },
+    {
+      "epoch": 0.9505659653386199,
+      "grad_norm": 0.038249559700489044,
+      "learning_rate": 0.0001813470050444898,
+      "loss": 1.4633,
+      "step": 1858
+    },
+    {
+      "epoch": 0.9515891795101362,
+      "grad_norm": 0.03623546287417412,
+      "learning_rate": 0.00018129775917921905,
+      "loss": 1.4644,
+      "step": 1860
+    },
+    {
+      "epoch": 0.9526123936816525,
+      "grad_norm": 0.03886585682630539,
+      "learning_rate": 0.00018124845509754505,
+      "loss": 1.4642,
+      "step": 1862
+    },
+    {
+      "epoch": 0.9536356078531688,
+      "grad_norm": 0.03367486968636513,
+      "learning_rate": 0.00018119909283477394,
+      "loss": 1.4577,
+      "step": 1864
+    },
+    {
+      "epoch": 0.954658822024685,
+      "grad_norm": 0.034619078040122986,
+      "learning_rate": 0.00018114967242625343,
+      "loss": 1.4424,
+      "step": 1866
+    },
+    {
+      "epoch": 0.9556820361962013,
+      "grad_norm": 0.036260370165109634,
+      "learning_rate": 0.00018110019390737292,
+      "loss": 1.4749,
+      "step": 1868
+    },
+    {
+      "epoch": 0.9567052503677176,
+      "grad_norm": 0.037158943712711334,
+      "learning_rate": 0.00018105065731356343,
+      "loss": 1.4185,
+      "step": 1870
+    },
+    {
+      "epoch": 0.9577284645392339,
+      "grad_norm": 0.03858686238527298,
+      "learning_rate": 0.00018100106268029755,
+      "loss": 1.5027,
+      "step": 1872
+    },
+    {
+      "epoch": 0.9587516787107502,
+      "grad_norm": 0.03699406236410141,
+      "learning_rate": 0.00018095141004308943,
+      "loss": 1.4283,
+      "step": 1874
+    },
+    {
+      "epoch": 0.9597748928822665,
+      "grad_norm": 0.030941152945160866,
+      "learning_rate": 0.00018090169943749476,
+      "loss": 1.4729,
+      "step": 1876
+    },
+    {
+      "epoch": 0.9607981070537827,
+      "grad_norm": 0.03944398835301399,
+      "learning_rate": 0.00018085193089911075,
+      "loss": 1.4636,
+      "step": 1878
+    },
+    {
+      "epoch": 0.961821321225299,
+      "grad_norm": 0.03944871574640274,
+      "learning_rate": 0.00018080210446357606,
+      "loss": 1.4458,
+      "step": 1880
+    },
+    {
+      "epoch": 0.9628445353968152,
+      "grad_norm": 0.042511675506830215,
+      "learning_rate": 0.00018075222016657088,
+      "loss": 1.4868,
+      "step": 1882
+    },
+    {
+      "epoch": 0.9638677495683315,
+      "grad_norm": 0.036067429929971695,
+      "learning_rate": 0.00018070227804381674,
+      "loss": 1.4681,
+      "step": 1884
+    },
+    {
+      "epoch": 0.9648909637398478,
+      "grad_norm": 0.030013304203748703,
+      "learning_rate": 0.00018065227813107666,
+      "loss": 1.5088,
+      "step": 1886
+    },
+    {
+      "epoch": 0.965914177911364,
+      "grad_norm": 0.030714694410562515,
+      "learning_rate": 0.000180602220464155,
+      "loss": 1.4443,
+      "step": 1888
+    },
+    {
+      "epoch": 0.9669373920828803,
+      "grad_norm": 0.03553122654557228,
+      "learning_rate": 0.0001805521050788975,
+      "loss": 1.4667,
+      "step": 1890
+    },
+    {
+      "epoch": 0.9679606062543966,
+      "grad_norm": 0.032518330961465836,
+      "learning_rate": 0.0001805019320111912,
+      "loss": 1.4756,
+      "step": 1892
+    },
+    {
+      "epoch": 0.9689838204259129,
+      "grad_norm": 0.032445941120386124,
+      "learning_rate": 0.0001804517012969644,
+      "loss": 1.474,
+      "step": 1894
+    },
+    {
+      "epoch": 0.9700070345974292,
+      "grad_norm": 0.03390254080295563,
+      "learning_rate": 0.00018040141297218695,
+      "loss": 1.4477,
+      "step": 1896
+    },
+    {
+      "epoch": 0.9710302487689455,
+      "grad_norm": 0.02915276773273945,
+      "learning_rate": 0.00018035106707286954,
+      "loss": 1.4784,
+      "step": 1898
+    },
+    {
+      "epoch": 0.9720534629404617,
+      "grad_norm": 0.028000080958008766,
+      "learning_rate": 0.00018030066363506437,
+      "loss": 1.45,
+      "step": 1900
+    }
+  ],
+  "logging_steps": 2,
+  "max_steps": 7816,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 4,
+  "save_steps": 100,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 7.236992921365381e+19,
+  "train_batch_size": 1,
+  "trial_name": null,
+  "trial_params": null
+}