{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 0.999953931911365,
  "eval_steps": 500,
  "global_step": 10853,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 9.213617727000507e-05,
      "grad_norm": 5.052331877744982,
      "learning_rate": 9.208103130755065e-09,
      "loss": 0.3834,
      "step": 1
    },
    {
      "epoch": 0.00018427235454001014,
      "grad_norm": 4.968514973300749,
      "learning_rate": 1.841620626151013e-08,
      "loss": 0.3996,
      "step": 2
    },
    {
      "epoch": 0.0002764085318100152,
      "grad_norm": 5.173905523123122,
      "learning_rate": 2.7624309392265195e-08,
      "loss": 0.4053,
      "step": 3
    },
    {
      "epoch": 0.0003685447090800203,
      "grad_norm": 4.954247638334854,
      "learning_rate": 3.683241252302026e-08,
      "loss": 0.3794,
      "step": 4
    },
    {
      "epoch": 0.0004606808863500253,
      "grad_norm": 4.9278156311152586,
      "learning_rate": 4.604051565377533e-08,
      "loss": 0.3735,
      "step": 5
    },
    {
      "epoch": 0.0005528170636200304,
      "grad_norm": 4.865883765294612,
      "learning_rate": 5.524861878453039e-08,
      "loss": 0.3882,
      "step": 6
    },
    {
      "epoch": 0.0006449532408900354,
      "grad_norm": 4.8700873305276255,
      "learning_rate": 6.445672191528546e-08,
      "loss": 0.3758,
      "step": 7
    },
    {
      "epoch": 0.0007370894181600406,
      "grad_norm": 5.087648327313099,
      "learning_rate": 7.366482504604052e-08,
      "loss": 0.3758,
      "step": 8
    },
    {
      "epoch": 0.0008292255954300456,
      "grad_norm": 5.30641963090733,
      "learning_rate": 8.287292817679558e-08,
      "loss": 0.4019,
      "step": 9
    },
    {
      "epoch": 0.0009213617727000506,
      "grad_norm": 5.070246570827928,
      "learning_rate": 9.208103130755066e-08,
      "loss": 0.3827,
      "step": 10
    },
    {
      "epoch": 0.0010134979499700557,
      "grad_norm": 5.216031896054235,
      "learning_rate": 1.0128913443830572e-07,
      "loss": 0.3846,
      "step": 11
    },
    {
      "epoch": 0.0011056341272400608,
      "grad_norm": 4.876861133902156,
      "learning_rate": 1.1049723756906078e-07,
      "loss": 0.3843,
      "step": 12
    },
    {
      "epoch": 0.001197770304510066,
      "grad_norm": 5.023561914398882,
      "learning_rate": 1.1970534069981586e-07,
      "loss": 0.3511,
      "step": 13
    },
    {
      "epoch": 0.0012899064817800709,
      "grad_norm": 5.145451738529512,
      "learning_rate": 1.2891344383057092e-07,
      "loss": 0.3672,
      "step": 14
    },
    {
      "epoch": 0.001382042659050076,
      "grad_norm": 4.792948201942286,
      "learning_rate": 1.3812154696132598e-07,
      "loss": 0.3796,
      "step": 15
    },
    {
      "epoch": 0.0014741788363200811,
      "grad_norm": 4.551919134485809,
      "learning_rate": 1.4732965009208104e-07,
      "loss": 0.3803,
      "step": 16
    },
    {
      "epoch": 0.001566315013590086,
      "grad_norm": 4.8263836273678145,
      "learning_rate": 1.5653775322283613e-07,
      "loss": 0.3716,
      "step": 17
    },
    {
      "epoch": 0.0016584511908600912,
      "grad_norm": 4.740504995965325,
      "learning_rate": 1.6574585635359117e-07,
      "loss": 0.3838,
      "step": 18
    },
    {
      "epoch": 0.0017505873681300963,
      "grad_norm": 4.3895949096154805,
      "learning_rate": 1.7495395948434625e-07,
      "loss": 0.3516,
      "step": 19
    },
    {
      "epoch": 0.0018427235454001013,
      "grad_norm": 4.730549482054788,
      "learning_rate": 1.8416206261510132e-07,
      "loss": 0.3971,
      "step": 20
    },
    {
      "epoch": 0.0019348597226701064,
      "grad_norm": 4.326518829493085,
      "learning_rate": 1.9337016574585635e-07,
      "loss": 0.3621,
      "step": 21
    },
    {
      "epoch": 0.0020269958999401113,
      "grad_norm": 4.411579317357037,
      "learning_rate": 2.0257826887661144e-07,
      "loss": 0.3671,
      "step": 22
    },
    {
      "epoch": 0.0021191320772101165,
      "grad_norm": 4.406425398983609,
      "learning_rate": 2.1178637200736653e-07,
      "loss": 0.3586,
      "step": 23
    },
    {
      "epoch": 0.0022112682544801216,
      "grad_norm": 4.250027158629804,
      "learning_rate": 2.2099447513812156e-07,
      "loss": 0.3351,
      "step": 24
    },
    {
      "epoch": 0.0023034044317501268,
      "grad_norm": 4.453036416890763,
      "learning_rate": 2.3020257826887662e-07,
      "loss": 0.3547,
      "step": 25
    },
    {
      "epoch": 0.002395540609020132,
      "grad_norm": 4.338068169465262,
      "learning_rate": 2.394106813996317e-07,
      "loss": 0.3643,
      "step": 26
    },
    {
      "epoch": 0.002487676786290137,
      "grad_norm": 4.081751082657096,
      "learning_rate": 2.486187845303868e-07,
      "loss": 0.3735,
      "step": 27
    },
    {
      "epoch": 0.0025798129635601417,
      "grad_norm": 3.759847993257449,
      "learning_rate": 2.5782688766114184e-07,
      "loss": 0.3538,
      "step": 28
    },
    {
      "epoch": 0.002671949140830147,
      "grad_norm": 3.656922967111338,
      "learning_rate": 2.670349907918969e-07,
      "loss": 0.3466,
      "step": 29
    },
    {
      "epoch": 0.002764085318100152,
      "grad_norm": 3.5621860682515782,
      "learning_rate": 2.7624309392265196e-07,
      "loss": 0.3678,
      "step": 30
    },
    {
      "epoch": 0.002856221495370157,
      "grad_norm": 3.074394037601397,
      "learning_rate": 2.85451197053407e-07,
      "loss": 0.3472,
      "step": 31
    },
    {
      "epoch": 0.0029483576726401623,
      "grad_norm": 3.0932831564716263,
      "learning_rate": 2.946593001841621e-07,
      "loss": 0.3326,
      "step": 32
    },
    {
      "epoch": 0.0030404938499101674,
      "grad_norm": 3.145618703123189,
      "learning_rate": 3.0386740331491715e-07,
      "loss": 0.3714,
      "step": 33
    },
    {
      "epoch": 0.003132630027180172,
      "grad_norm": 2.847899608744511,
      "learning_rate": 3.1307550644567226e-07,
      "loss": 0.3392,
      "step": 34
    },
    {
      "epoch": 0.0032247662044501773,
      "grad_norm": 2.9243824195879493,
      "learning_rate": 3.2228360957642727e-07,
      "loss": 0.3501,
      "step": 35
    },
    {
      "epoch": 0.0033169023817201824,
      "grad_norm": 2.7832329637023085,
      "learning_rate": 3.3149171270718233e-07,
      "loss": 0.3466,
      "step": 36
    },
    {
      "epoch": 0.0034090385589901876,
      "grad_norm": 2.572516529700843,
      "learning_rate": 3.4069981583793745e-07,
      "loss": 0.3354,
      "step": 37
    },
    {
      "epoch": 0.0035011747362601927,
      "grad_norm": 2.2478775351473783,
      "learning_rate": 3.499079189686925e-07,
      "loss": 0.3341,
      "step": 38
    },
    {
      "epoch": 0.003593310913530198,
      "grad_norm": 2.4125693100520214,
      "learning_rate": 3.591160220994475e-07,
      "loss": 0.3518,
      "step": 39
    },
    {
      "epoch": 0.0036854470908002025,
      "grad_norm": 2.161167186955303,
      "learning_rate": 3.6832412523020263e-07,
      "loss": 0.2944,
      "step": 40
    },
    {
      "epoch": 0.0037775832680702077,
      "grad_norm": 2.340176936611769,
      "learning_rate": 3.775322283609577e-07,
      "loss": 0.3516,
      "step": 41
    },
    {
      "epoch": 0.003869719445340213,
      "grad_norm": 2.0582341569455407,
      "learning_rate": 3.867403314917127e-07,
      "loss": 0.2974,
      "step": 42
    },
    {
      "epoch": 0.003961855622610218,
      "grad_norm": 2.1235508876322244,
      "learning_rate": 3.959484346224678e-07,
      "loss": 0.3482,
      "step": 43
    },
    {
      "epoch": 0.004053991799880223,
      "grad_norm": 1.810336528178075,
      "learning_rate": 4.051565377532229e-07,
      "loss": 0.2894,
      "step": 44
    },
    {
      "epoch": 0.004146127977150228,
      "grad_norm": 1.9229985402496037,
      "learning_rate": 4.1436464088397794e-07,
      "loss": 0.3062,
      "step": 45
    },
    {
      "epoch": 0.004238264154420233,
      "grad_norm": 1.8631489706081992,
      "learning_rate": 4.2357274401473305e-07,
      "loss": 0.318,
      "step": 46
    },
    {
      "epoch": 0.0043304003316902385,
      "grad_norm": 1.7237134286009292,
      "learning_rate": 4.3278084714548806e-07,
      "loss": 0.3012,
      "step": 47
    },
    {
      "epoch": 0.004422536508960243,
      "grad_norm": 1.849534050723691,
      "learning_rate": 4.419889502762431e-07,
      "loss": 0.3069,
      "step": 48
    },
    {
      "epoch": 0.004514672686230248,
      "grad_norm": 1.7076022714184271,
      "learning_rate": 4.5119705340699824e-07,
      "loss": 0.3099,
      "step": 49
    },
    {
      "epoch": 0.0046068088635002535,
      "grad_norm": 1.8201092125421363,
      "learning_rate": 4.6040515653775325e-07,
      "loss": 0.3363,
      "step": 50
    },
    {
      "epoch": 0.004698945040770258,
      "grad_norm": 1.5793947312415075,
      "learning_rate": 4.696132596685083e-07,
      "loss": 0.2906,
      "step": 51
    },
    {
      "epoch": 0.004791081218040264,
      "grad_norm": 1.7063324415343504,
      "learning_rate": 4.788213627992634e-07,
      "loss": 0.3293,
      "step": 52
    },
    {
      "epoch": 0.0048832173953102685,
      "grad_norm": 1.5334969451288736,
      "learning_rate": 4.880294659300184e-07,
      "loss": 0.2983,
      "step": 53
    },
    {
      "epoch": 0.004975353572580274,
      "grad_norm": 1.5375570913421324,
      "learning_rate": 4.972375690607735e-07,
      "loss": 0.3062,
      "step": 54
    },
    {
      "epoch": 0.005067489749850279,
      "grad_norm": 1.531795714777244,
      "learning_rate": 5.064456721915287e-07,
      "loss": 0.2957,
      "step": 55
    },
    {
      "epoch": 0.0051596259271202835,
      "grad_norm": 1.6190246422343229,
      "learning_rate": 5.156537753222837e-07,
      "loss": 0.3123,
      "step": 56
    },
    {
      "epoch": 0.005251762104390289,
      "grad_norm": 1.550801896644375,
      "learning_rate": 5.248618784530387e-07,
      "loss": 0.3136,
      "step": 57
    },
    {
      "epoch": 0.005343898281660294,
      "grad_norm": 1.4783938737129982,
      "learning_rate": 5.340699815837938e-07,
      "loss": 0.2735,
      "step": 58
    },
    {
      "epoch": 0.005436034458930299,
      "grad_norm": 1.4621021369750078,
      "learning_rate": 5.432780847145488e-07,
      "loss": 0.2884,
      "step": 59
    },
    {
      "epoch": 0.005528170636200304,
      "grad_norm": 1.3921579835622648,
      "learning_rate": 5.524861878453039e-07,
      "loss": 0.2867,
      "step": 60
    },
    {
      "epoch": 0.005620306813470309,
      "grad_norm": 1.5262968398070378,
      "learning_rate": 5.61694290976059e-07,
      "loss": 0.298,
      "step": 61
    },
    {
      "epoch": 0.005712442990740314,
      "grad_norm": 1.5915698039983843,
      "learning_rate": 5.70902394106814e-07,
      "loss": 0.3088,
      "step": 62
    },
    {
      "epoch": 0.005804579168010319,
      "grad_norm": 1.5397162188270683,
      "learning_rate": 5.80110497237569e-07,
      "loss": 0.2788,
      "step": 63
    },
    {
      "epoch": 0.005896715345280325,
      "grad_norm": 1.4822801901893587,
      "learning_rate": 5.893186003683242e-07,
      "loss": 0.3007,
      "step": 64
    },
    {
      "epoch": 0.005988851522550329,
      "grad_norm": 1.4690507666454256,
      "learning_rate": 5.985267034990793e-07,
      "loss": 0.2784,
      "step": 65
    },
    {
      "epoch": 0.006080987699820335,
      "grad_norm": 1.3696146492094494,
      "learning_rate": 6.077348066298343e-07,
      "loss": 0.2824,
      "step": 66
    },
    {
      "epoch": 0.00617312387709034,
      "grad_norm": 1.4331132908765383,
      "learning_rate": 6.169429097605894e-07,
      "loss": 0.2809,
      "step": 67
    },
    {
      "epoch": 0.006265260054360344,
      "grad_norm": 1.3225884273908008,
      "learning_rate": 6.261510128913445e-07,
      "loss": 0.2803,
      "step": 68
    },
    {
      "epoch": 0.00635739623163035,
      "grad_norm": 1.3651533674561502,
      "learning_rate": 6.353591160220995e-07,
      "loss": 0.2722,
      "step": 69
    },
    {
      "epoch": 0.0064495324089003546,
      "grad_norm": 1.3972206619657161,
      "learning_rate": 6.445672191528545e-07,
      "loss": 0.2856,
      "step": 70
    },
    {
      "epoch": 0.00654166858617036,
      "grad_norm": 1.3786952654094349,
      "learning_rate": 6.537753222836097e-07,
      "loss": 0.2821,
      "step": 71
    },
    {
      "epoch": 0.006633804763440365,
      "grad_norm": 1.3835265992223351,
      "learning_rate": 6.629834254143647e-07,
      "loss": 0.2859,
      "step": 72
    },
    {
      "epoch": 0.0067259409407103695,
      "grad_norm": 1.3167354078664357,
      "learning_rate": 6.721915285451197e-07,
      "loss": 0.2693,
      "step": 73
    },
    {
      "epoch": 0.006818077117980375,
      "grad_norm": 1.2157572639965608,
      "learning_rate": 6.813996316758749e-07,
      "loss": 0.2678,
      "step": 74
    },
    {
      "epoch": 0.00691021329525038,
      "grad_norm": 1.4145127549666732,
      "learning_rate": 6.906077348066299e-07,
      "loss": 0.2752,
      "step": 75
    },
    {
      "epoch": 0.007002349472520385,
      "grad_norm": 1.3643122772858198,
      "learning_rate": 6.99815837937385e-07,
      "loss": 0.2893,
      "step": 76
    },
    {
      "epoch": 0.00709448564979039,
      "grad_norm": 1.292319390060687,
      "learning_rate": 7.0902394106814e-07,
      "loss": 0.26,
      "step": 77
    },
    {
      "epoch": 0.007186621827060396,
      "grad_norm": 1.4121119413990915,
      "learning_rate": 7.18232044198895e-07,
      "loss": 0.2838,
      "step": 78
    },
    {
      "epoch": 0.0072787580043304,
      "grad_norm": 1.3573902039549837,
      "learning_rate": 7.274401473296501e-07,
      "loss": 0.2673,
      "step": 79
    },
    {
      "epoch": 0.007370894181600405,
      "grad_norm": 1.251616266192662,
      "learning_rate": 7.366482504604053e-07,
      "loss": 0.2795,
      "step": 80
    },
    {
      "epoch": 0.007463030358870411,
      "grad_norm": 1.2676616962987843,
      "learning_rate": 7.458563535911603e-07,
      "loss": 0.274,
      "step": 81
    },
    {
      "epoch": 0.007555166536140415,
      "grad_norm": 1.2779375532844437,
      "learning_rate": 7.550644567219154e-07,
      "loss": 0.237,
      "step": 82
    },
    {
      "epoch": 0.007647302713410421,
      "grad_norm": 1.2823725432016053,
      "learning_rate": 7.642725598526704e-07,
      "loss": 0.2535,
      "step": 83
    },
    {
      "epoch": 0.007739438890680426,
      "grad_norm": 1.2914112825622275,
      "learning_rate": 7.734806629834254e-07,
      "loss": 0.2579,
      "step": 84
    },
    {
      "epoch": 0.00783157506795043,
      "grad_norm": 1.255061266268493,
      "learning_rate": 7.826887661141805e-07,
      "loss": 0.2553,
      "step": 85
    },
    {
      "epoch": 0.007923711245220436,
      "grad_norm": 1.37482617750355,
      "learning_rate": 7.918968692449356e-07,
      "loss": 0.2552,
      "step": 86
    },
    {
      "epoch": 0.008015847422490441,
      "grad_norm": 1.1987336596664846,
      "learning_rate": 8.011049723756907e-07,
      "loss": 0.2544,
      "step": 87
    },
    {
      "epoch": 0.008107983599760445,
      "grad_norm": 1.352509072072678,
      "learning_rate": 8.103130755064458e-07,
      "loss": 0.2697,
      "step": 88
    },
    {
      "epoch": 0.008200119777030451,
      "grad_norm": 1.2902205773865623,
      "learning_rate": 8.195211786372008e-07,
      "loss": 0.2477,
      "step": 89
    },
    {
      "epoch": 0.008292255954300456,
      "grad_norm": 1.395341337522817,
      "learning_rate": 8.287292817679559e-07,
      "loss": 0.2627,
      "step": 90
    },
    {
      "epoch": 0.00838439213157046,
      "grad_norm": 1.268160686732221,
      "learning_rate": 8.379373848987109e-07,
      "loss": 0.2851,
      "step": 91
    },
    {
      "epoch": 0.008476528308840466,
      "grad_norm": 1.3489234822086935,
      "learning_rate": 8.471454880294661e-07,
      "loss": 0.2619,
      "step": 92
    },
    {
      "epoch": 0.008568664486110471,
      "grad_norm": 1.3220216831015386,
      "learning_rate": 8.563535911602211e-07,
      "loss": 0.2579,
      "step": 93
    },
    {
      "epoch": 0.008660800663380477,
      "grad_norm": 1.1760530408428194,
      "learning_rate": 8.655616942909761e-07,
      "loss": 0.2461,
      "step": 94
    },
    {
      "epoch": 0.008752936840650481,
      "grad_norm": 1.5393872750676103,
      "learning_rate": 8.747697974217311e-07,
      "loss": 0.2838,
      "step": 95
    },
    {
      "epoch": 0.008845073017920486,
      "grad_norm": 1.4358175929752721,
      "learning_rate": 8.839779005524863e-07,
      "loss": 0.255,
      "step": 96
    },
    {
      "epoch": 0.008937209195190492,
      "grad_norm": 1.3687969807695604,
      "learning_rate": 8.931860036832413e-07,
      "loss": 0.2563,
      "step": 97
    },
    {
      "epoch": 0.009029345372460496,
      "grad_norm": 1.3219226525379928,
      "learning_rate": 9.023941068139965e-07,
      "loss": 0.2482,
      "step": 98
    },
    {
      "epoch": 0.009121481549730501,
      "grad_norm": 1.3519450540596627,
      "learning_rate": 9.116022099447515e-07,
      "loss": 0.2524,
      "step": 99
    },
    {
      "epoch": 0.009213617727000507,
      "grad_norm": 1.2749396142008642,
      "learning_rate": 9.208103130755065e-07,
      "loss": 0.2371,
      "step": 100
    },
    {
      "epoch": 0.009305753904270513,
      "grad_norm": 1.1980272664855356,
      "learning_rate": 9.300184162062616e-07,
      "loss": 0.2508,
      "step": 101
    },
    {
      "epoch": 0.009397890081540516,
      "grad_norm": 1.3779116744427602,
      "learning_rate": 9.392265193370166e-07,
      "loss": 0.2719,
      "step": 102
    },
    {
      "epoch": 0.009490026258810522,
      "grad_norm": 1.2481451142639794,
      "learning_rate": 9.484346224677716e-07,
      "loss": 0.2406,
      "step": 103
    },
    {
      "epoch": 0.009582162436080528,
      "grad_norm": 1.2456086574919798,
      "learning_rate": 9.576427255985269e-07,
      "loss": 0.2594,
      "step": 104
    },
    {
      "epoch": 0.009674298613350531,
      "grad_norm": 1.3052822180290655,
      "learning_rate": 9.66850828729282e-07,
      "loss": 0.2578,
      "step": 105
    },
    {
      "epoch": 0.009766434790620537,
      "grad_norm": 1.2798135957331098,
      "learning_rate": 9.760589318600369e-07,
      "loss": 0.2449,
      "step": 106
    },
    {
      "epoch": 0.009858570967890543,
      "grad_norm": 1.2233602450508594,
      "learning_rate": 9.85267034990792e-07,
      "loss": 0.23,
      "step": 107
    },
    {
      "epoch": 0.009950707145160548,
      "grad_norm": 1.27138699960983,
      "learning_rate": 9.94475138121547e-07,
      "loss": 0.2575,
      "step": 108
    },
    {
      "epoch": 0.010042843322430552,
      "grad_norm": 1.4590705423131205,
      "learning_rate": 1.003683241252302e-06,
      "loss": 0.2414,
      "step": 109
    },
    {
      "epoch": 0.010134979499700558,
      "grad_norm": 1.3032600900132378,
      "learning_rate": 1.0128913443830573e-06,
      "loss": 0.2422,
      "step": 110
    },
    {
      "epoch": 0.010227115676970563,
      "grad_norm": 1.246423626921792,
      "learning_rate": 1.0220994475138122e-06,
      "loss": 0.2504,
      "step": 111
    },
    {
      "epoch": 0.010319251854240567,
      "grad_norm": 1.3617978945476827,
      "learning_rate": 1.0313075506445673e-06,
      "loss": 0.2625,
      "step": 112
    },
    {
      "epoch": 0.010411388031510573,
      "grad_norm": 1.3138368592325604,
      "learning_rate": 1.0405156537753222e-06,
      "loss": 0.2717,
      "step": 113
    },
    {
      "epoch": 0.010503524208780578,
      "grad_norm": 1.45931937030065,
      "learning_rate": 1.0497237569060774e-06,
      "loss": 0.2338,
      "step": 114
    },
    {
      "epoch": 0.010595660386050582,
      "grad_norm": 1.2209072353641341,
      "learning_rate": 1.0589318600368325e-06,
      "loss": 0.2571,
      "step": 115
    },
    {
      "epoch": 0.010687796563320588,
      "grad_norm": 1.2221332342582498,
      "learning_rate": 1.0681399631675876e-06,
      "loss": 0.2181,
      "step": 116
    },
    {
      "epoch": 0.010779932740590593,
      "grad_norm": 1.3055782277521266,
      "learning_rate": 1.0773480662983427e-06,
      "loss": 0.2413,
      "step": 117
    },
    {
      "epoch": 0.010872068917860599,
      "grad_norm": 1.3001013433954538,
      "learning_rate": 1.0865561694290976e-06,
      "loss": 0.2561,
      "step": 118
    },
    {
      "epoch": 0.010964205095130602,
      "grad_norm": 1.4430759553426427,
      "learning_rate": 1.0957642725598527e-06,
      "loss": 0.2393,
      "step": 119
    },
    {
      "epoch": 0.011056341272400608,
      "grad_norm": 1.4425457370059072,
      "learning_rate": 1.1049723756906078e-06,
      "loss": 0.2349,
      "step": 120
    },
    {
      "epoch": 0.011148477449670614,
      "grad_norm": 1.235681217544338,
      "learning_rate": 1.114180478821363e-06,
      "loss": 0.2315,
      "step": 121
    },
    {
      "epoch": 0.011240613626940617,
      "grad_norm": 1.291133894680049,
      "learning_rate": 1.123388581952118e-06,
      "loss": 0.2442,
      "step": 122
    },
    {
      "epoch": 0.011332749804210623,
      "grad_norm": 1.2986607434244122,
      "learning_rate": 1.132596685082873e-06,
      "loss": 0.2427,
      "step": 123
    },
    {
      "epoch": 0.011424885981480629,
      "grad_norm": 1.3600935260637073,
      "learning_rate": 1.141804788213628e-06,
      "loss": 0.2507,
      "step": 124
    },
    {
      "epoch": 0.011517022158750634,
      "grad_norm": 1.2882709655715936,
      "learning_rate": 1.1510128913443832e-06,
      "loss": 0.2491,
      "step": 125
    },
    {
      "epoch": 0.011609158336020638,
      "grad_norm": 1.236349701513875,
      "learning_rate": 1.160220994475138e-06,
      "loss": 0.2238,
      "step": 126
    },
    {
      "epoch": 0.011701294513290644,
      "grad_norm": 1.277175622784304,
      "learning_rate": 1.1694290976058934e-06,
      "loss": 0.2306,
      "step": 127
    },
    {
      "epoch": 0.01179343069056065,
      "grad_norm": 1.3466287077359933,
      "learning_rate": 1.1786372007366483e-06,
      "loss": 0.2529,
      "step": 128
    },
    {
      "epoch": 0.011885566867830653,
      "grad_norm": 1.2600725855409367,
      "learning_rate": 1.1878453038674034e-06,
      "loss": 0.2297,
      "step": 129
    },
    {
      "epoch": 0.011977703045100659,
      "grad_norm": 1.1909522608327074,
      "learning_rate": 1.1970534069981586e-06,
      "loss": 0.2428,
      "step": 130
    },
    {
      "epoch": 0.012069839222370664,
      "grad_norm": 1.3275342654407982,
      "learning_rate": 1.2062615101289135e-06,
      "loss": 0.2387,
      "step": 131
    },
    {
      "epoch": 0.01216197539964067,
      "grad_norm": 1.3832794168368345,
      "learning_rate": 1.2154696132596686e-06,
      "loss": 0.2606,
      "step": 132
    },
    {
      "epoch": 0.012254111576910674,
      "grad_norm": 1.4083734454299084,
      "learning_rate": 1.2246777163904237e-06,
      "loss": 0.2558,
      "step": 133
    },
    {
      "epoch": 0.01234624775418068,
      "grad_norm": 1.3604330663851263,
      "learning_rate": 1.2338858195211788e-06,
      "loss": 0.2131,
      "step": 134
    },
    {
      "epoch": 0.012438383931450685,
      "grad_norm": 1.368946573958846,
      "learning_rate": 1.243093922651934e-06,
      "loss": 0.2607,
      "step": 135
    },
    {
      "epoch": 0.012530520108720689,
      "grad_norm": 1.4349854840515686,
      "learning_rate": 1.252302025782689e-06,
      "loss": 0.2543,
      "step": 136
    },
    {
      "epoch": 0.012622656285990694,
      "grad_norm": 1.3053177174437076,
      "learning_rate": 1.261510128913444e-06,
      "loss": 0.2066,
      "step": 137
    },
    {
      "epoch": 0.0127147924632607,
      "grad_norm": 1.4065693991109225,
      "learning_rate": 1.270718232044199e-06,
      "loss": 0.2428,
      "step": 138
    },
    {
      "epoch": 0.012806928640530704,
      "grad_norm": 1.3060084203827886,
      "learning_rate": 1.2799263351749542e-06,
      "loss": 0.2452,
      "step": 139
    },
    {
      "epoch": 0.012899064817800709,
      "grad_norm": 1.42770860862496,
      "learning_rate": 1.289134438305709e-06,
      "loss": 0.2375,
      "step": 140
    },
    {
      "epoch": 0.012991200995070715,
      "grad_norm": 1.3712130826622553,
      "learning_rate": 1.2983425414364642e-06,
      "loss": 0.2296,
      "step": 141
    },
    {
      "epoch": 0.01308333717234072,
      "grad_norm": 1.2949739115350103,
      "learning_rate": 1.3075506445672193e-06,
      "loss": 0.2249,
      "step": 142
    },
    {
      "epoch": 0.013175473349610724,
      "grad_norm": 1.4444498310803144,
      "learning_rate": 1.3167587476979742e-06,
      "loss": 0.2336,
      "step": 143
    },
    {
      "epoch": 0.01326760952688073,
      "grad_norm": 1.327765157794959,
      "learning_rate": 1.3259668508287293e-06,
      "loss": 0.2305,
      "step": 144
    },
    {
      "epoch": 0.013359745704150735,
      "grad_norm": 1.49483024693552,
      "learning_rate": 1.3351749539594844e-06,
      "loss": 0.2524,
      "step": 145
    },
    {
      "epoch": 0.013451881881420739,
      "grad_norm": 1.4128065918962016,
      "learning_rate": 1.3443830570902393e-06,
      "loss": 0.2421,
      "step": 146
    },
    {
      "epoch": 0.013544018058690745,
      "grad_norm": 1.458256896983337,
      "learning_rate": 1.3535911602209945e-06,
      "loss": 0.256,
      "step": 147
    },
    {
      "epoch": 0.01363615423596075,
      "grad_norm": 1.5761688856396325,
      "learning_rate": 1.3627992633517498e-06,
      "loss": 0.2283,
      "step": 148
    },
    {
      "epoch": 0.013728290413230756,
      "grad_norm": 1.4268159296492195,
      "learning_rate": 1.372007366482505e-06,
      "loss": 0.231,
      "step": 149
    },
    {
      "epoch": 0.01382042659050076,
      "grad_norm": 1.240181839931121,
      "learning_rate": 1.3812154696132598e-06,
      "loss": 0.2265,
      "step": 150
    },
    {
      "epoch": 0.013912562767770765,
      "grad_norm": 1.3560921208474808,
      "learning_rate": 1.390423572744015e-06,
      "loss": 0.2347,
      "step": 151
    },
    {
      "epoch": 0.01400469894504077,
      "grad_norm": 1.440218247026957,
      "learning_rate": 1.39963167587477e-06,
      "loss": 0.2265,
      "step": 152
    },
    {
      "epoch": 0.014096835122310775,
      "grad_norm": 1.3168656248813988,
      "learning_rate": 1.408839779005525e-06,
      "loss": 0.2361,
      "step": 153
    },
    {
      "epoch": 0.01418897129958078,
      "grad_norm": 1.387358557045741,
      "learning_rate": 1.41804788213628e-06,
      "loss": 0.2538,
      "step": 154
    },
    {
      "epoch": 0.014281107476850786,
      "grad_norm": 1.4226212454591165,
      "learning_rate": 1.4272559852670352e-06,
      "loss": 0.2386,
      "step": 155
    },
    {
      "epoch": 0.014373243654120791,
      "grad_norm": 1.4868929751549826,
      "learning_rate": 1.43646408839779e-06,
      "loss": 0.2503,
      "step": 156
    },
    {
      "epoch": 0.014465379831390795,
      "grad_norm": 1.3156667636135637,
      "learning_rate": 1.4456721915285452e-06,
      "loss": 0.2439,
      "step": 157
    },
    {
      "epoch": 0.0145575160086608,
      "grad_norm": 1.3284249384355258,
      "learning_rate": 1.4548802946593003e-06,
      "loss": 0.2325,
      "step": 158
    },
    {
      "epoch": 0.014649652185930806,
      "grad_norm": 1.2878557366716903,
      "learning_rate": 1.4640883977900552e-06,
      "loss": 0.2056,
      "step": 159
    },
    {
      "epoch": 0.01474178836320081,
      "grad_norm": 1.4793729308344177,
      "learning_rate": 1.4732965009208105e-06,
      "loss": 0.2571,
      "step": 160
    },
    {
      "epoch": 0.014833924540470816,
      "grad_norm": 1.2481891533067875,
      "learning_rate": 1.4825046040515656e-06,
      "loss": 0.2195,
      "step": 161
    },
    {
      "epoch": 0.014926060717740821,
      "grad_norm": 1.3944875094813025,
      "learning_rate": 1.4917127071823205e-06,
      "loss": 0.2245,
      "step": 162
    },
    {
      "epoch": 0.015018196895010825,
      "grad_norm": 1.37854617862289,
      "learning_rate": 1.5009208103130757e-06,
      "loss": 0.2298,
      "step": 163
    },
    {
      "epoch": 0.01511033307228083,
      "grad_norm": 1.3740996859347074,
      "learning_rate": 1.5101289134438308e-06,
      "loss": 0.2343,
      "step": 164
    },
    {
      "epoch": 0.015202469249550836,
      "grad_norm": 1.379655917316226,
      "learning_rate": 1.5193370165745857e-06,
      "loss": 0.2271,
      "step": 165
    },
    {
      "epoch": 0.015294605426820842,
      "grad_norm": 1.2845573670743051,
      "learning_rate": 1.5285451197053408e-06,
      "loss": 0.221,
      "step": 166
    },
    {
      "epoch": 0.015386741604090846,
      "grad_norm": 1.3382949270875386,
      "learning_rate": 1.537753222836096e-06,
      "loss": 0.2053,
      "step": 167
    },
    {
      "epoch": 0.015478877781360851,
      "grad_norm": 1.2241039135765772,
      "learning_rate": 1.5469613259668508e-06,
      "loss": 0.2051,
      "step": 168
    },
    {
      "epoch": 0.015571013958630857,
      "grad_norm": 1.353071391505974,
      "learning_rate": 1.556169429097606e-06,
      "loss": 0.2238,
      "step": 169
    },
    {
      "epoch": 0.01566315013590086,
      "grad_norm": 1.3108612555966297,
      "learning_rate": 1.565377532228361e-06,
      "loss": 0.2422,
      "step": 170
    },
    {
      "epoch": 0.015755286313170868,
      "grad_norm": 1.3798597771479884,
      "learning_rate": 1.574585635359116e-06,
      "loss": 0.211,
      "step": 171
    },
    {
      "epoch": 0.015847422490440872,
      "grad_norm": 1.2463211759017325,
      "learning_rate": 1.5837937384898713e-06,
      "loss": 0.2124,
      "step": 172
    },
    {
      "epoch": 0.015939558667710876,
      "grad_norm": 1.2337293449366062,
      "learning_rate": 1.5930018416206264e-06,
      "loss": 0.2183,
      "step": 173
    },
    {
      "epoch": 0.016031694844980883,
      "grad_norm": 1.2103763277878807,
      "learning_rate": 1.6022099447513815e-06,
      "loss": 0.2236,
      "step": 174
    },
    {
      "epoch": 0.016123831022250887,
      "grad_norm": 1.220903675064504,
      "learning_rate": 1.6114180478821364e-06,
      "loss": 0.2141,
      "step": 175
    },
    {
      "epoch": 0.01621596719952089,
      "grad_norm": 1.358619080502357,
      "learning_rate": 1.6206261510128915e-06,
      "loss": 0.2423,
      "step": 176
    },
    {
      "epoch": 0.016308103376790898,
      "grad_norm": 1.2782364766180747,
      "learning_rate": 1.6298342541436466e-06,
      "loss": 0.2346,
      "step": 177
    },
    {
      "epoch": 0.016400239554060902,
      "grad_norm": 1.3105220268621274,
      "learning_rate": 1.6390423572744015e-06,
      "loss": 0.2506,
      "step": 178
    },
    {
      "epoch": 0.016492375731330906,
      "grad_norm": 1.2782540222227745,
      "learning_rate": 1.6482504604051566e-06,
      "loss": 0.2216,
      "step": 179
    },
    {
      "epoch": 0.016584511908600913,
      "grad_norm": 1.5337856840982391,
      "learning_rate": 1.6574585635359118e-06,
      "loss": 0.2348,
      "step": 180
    },
    {
      "epoch": 0.016676648085870917,
      "grad_norm": 1.2855938394022077,
      "learning_rate": 1.6666666666666667e-06,
      "loss": 0.2321,
      "step": 181
    },
    {
      "epoch": 0.01676878426314092,
      "grad_norm": 1.3688482992570172,
      "learning_rate": 1.6758747697974218e-06,
      "loss": 0.2443,
      "step": 182
    },
    {
      "epoch": 0.016860920440410928,
      "grad_norm": 1.3343184731235973,
      "learning_rate": 1.685082872928177e-06,
      "loss": 0.2121,
      "step": 183
    },
    {
      "epoch": 0.016953056617680932,
      "grad_norm": 1.225401208028096,
      "learning_rate": 1.6942909760589322e-06,
      "loss": 0.2287,
      "step": 184
    },
    {
      "epoch": 0.01704519279495094,
      "grad_norm": 1.2179622098203036,
      "learning_rate": 1.7034990791896871e-06,
      "loss": 0.2049,
      "step": 185
    },
    {
      "epoch": 0.017137328972220943,
      "grad_norm": 1.5066030755860567,
      "learning_rate": 1.7127071823204422e-06,
      "loss": 0.2424,
      "step": 186
    },
    {
      "epoch": 0.017229465149490947,
      "grad_norm": 1.4045090484290212,
      "learning_rate": 1.7219152854511971e-06,
      "loss": 0.2227,
      "step": 187
    },
    {
      "epoch": 0.017321601326760954,
      "grad_norm": 1.272435969600215,
      "learning_rate": 1.7311233885819523e-06,
      "loss": 0.2431,
      "step": 188
    },
    {
      "epoch": 0.017413737504030958,
      "grad_norm": 1.3028523579116038,
      "learning_rate": 1.7403314917127074e-06,
      "loss": 0.2179,
      "step": 189
    },
    {
      "epoch": 0.017505873681300962,
      "grad_norm": 1.515833129596805,
      "learning_rate": 1.7495395948434623e-06,
      "loss": 0.2518,
      "step": 190
    },
    {
      "epoch": 0.01759800985857097,
      "grad_norm": 1.3640712213334758,
      "learning_rate": 1.7587476979742174e-06,
      "loss": 0.2519,
      "step": 191
    },
    {
      "epoch": 0.017690146035840973,
      "grad_norm": 1.2963567960878155,
      "learning_rate": 1.7679558011049725e-06,
      "loss": 0.2122,
      "step": 192
    },
    {
      "epoch": 0.017782282213110977,
      "grad_norm": 1.2385890307787466,
      "learning_rate": 1.7771639042357274e-06,
      "loss": 0.218,
      "step": 193
    },
    {
      "epoch": 0.017874418390380984,
      "grad_norm": 1.2918958910678935,
      "learning_rate": 1.7863720073664825e-06,
      "loss": 0.2275,
      "step": 194
    },
    {
      "epoch": 0.017966554567650988,
      "grad_norm": 1.3240547033002077,
      "learning_rate": 1.7955801104972378e-06,
      "loss": 0.2265,
      "step": 195
    },
    {
      "epoch": 0.01805869074492099,
      "grad_norm": 1.3215736623947212,
      "learning_rate": 1.804788213627993e-06,
      "loss": 0.2106,
      "step": 196
    },
    {
      "epoch": 0.018150826922191,
      "grad_norm": 1.3962463357518629,
      "learning_rate": 1.8139963167587479e-06,
      "loss": 0.2421,
      "step": 197
    },
    {
      "epoch": 0.018242963099461003,
      "grad_norm": 1.3634363454930103,
      "learning_rate": 1.823204419889503e-06,
      "loss": 0.2258,
      "step": 198
    },
    {
      "epoch": 0.018335099276731007,
      "grad_norm": 1.3838622302412065,
      "learning_rate": 1.832412523020258e-06,
      "loss": 0.2086,
      "step": 199
    },
    {
      "epoch": 0.018427235454001014,
      "grad_norm": 1.3181256821025102,
      "learning_rate": 1.841620626151013e-06,
      "loss": 0.2129,
      "step": 200
    },
    {
      "epoch": 0.018519371631271018,
      "grad_norm": 1.270539722225883,
      "learning_rate": 1.8508287292817681e-06,
      "loss": 0.2191,
      "step": 201
    },
    {
      "epoch": 0.018611507808541025,
      "grad_norm": 1.265711181176557,
      "learning_rate": 1.8600368324125232e-06,
      "loss": 0.2061,
      "step": 202
    },
    {
      "epoch": 0.01870364398581103,
      "grad_norm": 1.4039473787664178,
      "learning_rate": 1.8692449355432781e-06,
      "loss": 0.2277,
      "step": 203
    },
    {
      "epoch": 0.018795780163081033,
      "grad_norm": 1.345966851950806,
      "learning_rate": 1.8784530386740332e-06,
      "loss": 0.2065,
      "step": 204
    },
    {
      "epoch": 0.01888791634035104,
      "grad_norm": 1.3892265247643658,
      "learning_rate": 1.8876611418047884e-06,
      "loss": 0.2117,
      "step": 205
    },
    {
      "epoch": 0.018980052517621044,
      "grad_norm": 1.3391019958709516,
      "learning_rate": 1.8968692449355433e-06,
      "loss": 0.2241,
      "step": 206
    },
    {
      "epoch": 0.019072188694891048,
      "grad_norm": 1.3767301542758652,
      "learning_rate": 1.9060773480662986e-06,
      "loss": 0.2394,
      "step": 207
    },
    {
      "epoch": 0.019164324872161055,
      "grad_norm": 1.193499504261302,
      "learning_rate": 1.9152854511970537e-06,
      "loss": 0.2147,
      "step": 208
    },
    {
      "epoch": 0.01925646104943106,
      "grad_norm": 1.42744498061299,
      "learning_rate": 1.9244935543278086e-06,
      "loss": 0.2454,
      "step": 209
    },
    {
      "epoch": 0.019348597226701063,
      "grad_norm": 1.2070717468524428,
      "learning_rate": 1.933701657458564e-06,
      "loss": 0.2281,
      "step": 210
    },
    {
      "epoch": 0.01944073340397107,
      "grad_norm": 1.2184409700694656,
      "learning_rate": 1.942909760589319e-06,
      "loss": 0.2011,
      "step": 211
    },
    {
      "epoch": 0.019532869581241074,
      "grad_norm": 1.244082773508379,
      "learning_rate": 1.9521178637200737e-06,
      "loss": 0.2198,
      "step": 212
    },
    {
      "epoch": 0.019625005758511078,
      "grad_norm": 1.1946783073071228,
      "learning_rate": 1.961325966850829e-06,
      "loss": 0.2174,
      "step": 213
    },
    {
      "epoch": 0.019717141935781085,
      "grad_norm": 1.2601297485847678,
      "learning_rate": 1.970534069981584e-06,
      "loss": 0.2225,
      "step": 214
    },
    {
      "epoch": 0.01980927811305109,
      "grad_norm": 1.3124765793917974,
      "learning_rate": 1.979742173112339e-06,
      "loss": 0.2267,
      "step": 215
    },
    {
      "epoch": 0.019901414290321096,
      "grad_norm": 1.3267678443080182,
      "learning_rate": 1.988950276243094e-06,
      "loss": 0.2297,
      "step": 216
    },
    {
      "epoch": 0.0199935504675911,
      "grad_norm": 1.267293008421713,
      "learning_rate": 1.998158379373849e-06,
      "loss": 0.2181,
      "step": 217
    },
    {
      "epoch": 0.020085686644861104,
      "grad_norm": 1.2619101408630657,
      "learning_rate": 2.007366482504604e-06,
      "loss": 0.2397,
      "step": 218
    },
    {
      "epoch": 0.02017782282213111,
      "grad_norm": 1.3636244878125987,
      "learning_rate": 2.0165745856353593e-06,
      "loss": 0.2253,
      "step": 219
    },
    {
      "epoch": 0.020269958999401115,
      "grad_norm": 1.394830925894432,
      "learning_rate": 2.0257826887661147e-06,
      "loss": 0.2252,
      "step": 220
    },
    {
      "epoch": 0.02036209517667112,
      "grad_norm": 1.2983165359381221,
      "learning_rate": 2.0349907918968696e-06,
      "loss": 0.2278,
      "step": 221
    },
    {
      "epoch": 0.020454231353941126,
      "grad_norm": 1.2967437740330148,
      "learning_rate": 2.0441988950276245e-06,
      "loss": 0.2124,
      "step": 222
    },
    {
      "epoch": 0.02054636753121113,
      "grad_norm": 1.4482194246277718,
      "learning_rate": 2.0534069981583794e-06,
      "loss": 0.2216,
      "step": 223
    },
    {
      "epoch": 0.020638503708481134,
      "grad_norm": 1.310894495587751,
      "learning_rate": 2.0626151012891347e-06,
      "loss": 0.222,
      "step": 224
    },
    {
      "epoch": 0.02073063988575114,
      "grad_norm": 1.2475533975236348,
      "learning_rate": 2.0718232044198896e-06,
      "loss": 0.2043,
      "step": 225
    },
    {
      "epoch": 0.020822776063021145,
      "grad_norm": 1.4060174527930498,
      "learning_rate": 2.0810313075506445e-06,
      "loss": 0.222,
      "step": 226
    },
    {
      "epoch": 0.02091491224029115,
      "grad_norm": 1.4368485294275846,
      "learning_rate": 2.0902394106814e-06,
      "loss": 0.2425,
      "step": 227
    },
    {
      "epoch": 0.021007048417561156,
      "grad_norm": 1.259305482075362,
      "learning_rate": 2.0994475138121547e-06,
      "loss": 0.223,
      "step": 228
    },
    {
      "epoch": 0.02109918459483116,
      "grad_norm": 1.433635435091614,
      "learning_rate": 2.1086556169429096e-06,
      "loss": 0.2223,
      "step": 229
    },
    {
      "epoch": 0.021191320772101164,
      "grad_norm": 1.3258788470822962,
      "learning_rate": 2.117863720073665e-06,
      "loss": 0.223,
      "step": 230
    },
    {
      "epoch": 0.02128345694937117,
      "grad_norm": 1.3345971348097236,
      "learning_rate": 2.1270718232044203e-06,
      "loss": 0.2088,
      "step": 231
    },
    {
      "epoch": 0.021375593126641175,
      "grad_norm": 1.1506446317260917,
      "learning_rate": 2.136279926335175e-06,
      "loss": 0.2183,
      "step": 232
    },
    {
      "epoch": 0.021467729303911182,
      "grad_norm": 1.2501482254633949,
      "learning_rate": 2.14548802946593e-06,
      "loss": 0.2218,
      "step": 233
    },
    {
      "epoch": 0.021559865481181186,
      "grad_norm": 1.2715617957043448,
      "learning_rate": 2.1546961325966854e-06,
      "loss": 0.2167,
      "step": 234
    },
    {
      "epoch": 0.02165200165845119,
      "grad_norm": 1.4251050947489576,
      "learning_rate": 2.1639042357274403e-06,
      "loss": 0.2378,
      "step": 235
    },
    {
      "epoch": 0.021744137835721197,
      "grad_norm": 1.2400115125049491,
      "learning_rate": 2.1731123388581952e-06,
      "loss": 0.2294,
      "step": 236
    },
    {
      "epoch": 0.0218362740129912,
      "grad_norm": 1.3035788835712026,
      "learning_rate": 2.1823204419889505e-06,
      "loss": 0.1968,
      "step": 237
    },
    {
      "epoch": 0.021928410190261205,
      "grad_norm": 1.471849667145631,
      "learning_rate": 2.1915285451197054e-06,
      "loss": 0.2228,
      "step": 238
    },
    {
      "epoch": 0.022020546367531212,
      "grad_norm": 1.3647414373400866,
      "learning_rate": 2.2007366482504604e-06,
      "loss": 0.224,
      "step": 239
    },
    {
      "epoch": 0.022112682544801216,
      "grad_norm": 1.537121143452077,
      "learning_rate": 2.2099447513812157e-06,
      "loss": 0.2334,
      "step": 240
    },
    {
      "epoch": 0.02220481872207122,
      "grad_norm": 1.2899612307328876,
      "learning_rate": 2.2191528545119706e-06,
      "loss": 0.207,
      "step": 241
    },
    {
      "epoch": 0.022296954899341227,
      "grad_norm": 1.2217522545050996,
      "learning_rate": 2.228360957642726e-06,
      "loss": 0.2236,
      "step": 242
    },
    {
      "epoch": 0.02238909107661123,
      "grad_norm": 1.395775888810296,
      "learning_rate": 2.237569060773481e-06,
      "loss": 0.2244,
      "step": 243
    },
    {
      "epoch": 0.022481227253881235,
      "grad_norm": 1.284804900306348,
      "learning_rate": 2.246777163904236e-06,
      "loss": 0.2098,
      "step": 244
    },
    {
      "epoch": 0.022573363431151242,
      "grad_norm": 1.304346808920648,
      "learning_rate": 2.255985267034991e-06,
      "loss": 0.2038,
      "step": 245
    },
    {
      "epoch": 0.022665499608421246,
      "grad_norm": 1.3086929964965677,
      "learning_rate": 2.265193370165746e-06,
      "loss": 0.221,
      "step": 246
    },
    {
      "epoch": 0.022757635785691253,
      "grad_norm": 1.2371068797697236,
      "learning_rate": 2.2744014732965013e-06,
      "loss": 0.2059,
      "step": 247
    },
    {
      "epoch": 0.022849771962961257,
      "grad_norm": 1.237860938668767,
      "learning_rate": 2.283609576427256e-06,
      "loss": 0.2116,
      "step": 248
    },
    {
      "epoch": 0.02294190814023126,
      "grad_norm": 1.1873106876741861,
      "learning_rate": 2.292817679558011e-06,
      "loss": 0.2044,
      "step": 249
    },
    {
      "epoch": 0.02303404431750127,
      "grad_norm": 1.284075741757394,
      "learning_rate": 2.3020257826887664e-06,
      "loss": 0.2265,
      "step": 250
    },
    {
      "epoch": 0.023126180494771272,
      "grad_norm": 1.2554034425448573,
      "learning_rate": 2.3112338858195213e-06,
      "loss": 0.2098,
      "step": 251
    },
    {
      "epoch": 0.023218316672041276,
      "grad_norm": 1.3561997983957859,
      "learning_rate": 2.320441988950276e-06,
      "loss": 0.2274,
      "step": 252
    },
    {
      "epoch": 0.023310452849311283,
      "grad_norm": 1.289899655742179,
      "learning_rate": 2.3296500920810315e-06,
      "loss": 0.203,
      "step": 253
    },
    {
      "epoch": 0.023402589026581287,
      "grad_norm": 1.207952118169262,
      "learning_rate": 2.338858195211787e-06,
      "loss": 0.2124,
      "step": 254
    },
    {
      "epoch": 0.02349472520385129,
      "grad_norm": 1.3805095413072321,
      "learning_rate": 2.3480662983425418e-06,
      "loss": 0.2284,
      "step": 255
    },
    {
      "epoch": 0.0235868613811213,
      "grad_norm": 1.3972049227450618,
      "learning_rate": 2.3572744014732967e-06,
      "loss": 0.2212,
      "step": 256
    },
    {
      "epoch": 0.023678997558391302,
      "grad_norm": 1.2247515177867434,
      "learning_rate": 2.366482504604052e-06,
      "loss": 0.2158,
      "step": 257
    },
    {
      "epoch": 0.023771133735661306,
      "grad_norm": 1.1692729997781546,
      "learning_rate": 2.375690607734807e-06,
      "loss": 0.2223,
      "step": 258
    },
    {
      "epoch": 0.023863269912931313,
      "grad_norm": 1.4293157160410055,
      "learning_rate": 2.384898710865562e-06,
      "loss": 0.2355,
      "step": 259
    },
    {
      "epoch": 0.023955406090201317,
      "grad_norm": 1.2833231867557153,
      "learning_rate": 2.394106813996317e-06,
      "loss": 0.2093,
      "step": 260
    },
    {
      "epoch": 0.02404754226747132,
      "grad_norm": 1.430588872964235,
      "learning_rate": 2.403314917127072e-06,
      "loss": 0.2299,
      "step": 261
    },
    {
      "epoch": 0.02413967844474133,
      "grad_norm": 1.3955869018367655,
      "learning_rate": 2.412523020257827e-06,
      "loss": 0.2235,
      "step": 262
    },
    {
      "epoch": 0.024231814622011332,
      "grad_norm": 1.3209105842207622,
      "learning_rate": 2.4217311233885823e-06,
      "loss": 0.2314,
      "step": 263
    },
    {
      "epoch": 0.02432395079928134,
      "grad_norm": 1.3675314084283223,
      "learning_rate": 2.430939226519337e-06,
      "loss": 0.192,
      "step": 264
    },
    {
      "epoch": 0.024416086976551343,
      "grad_norm": 1.3043781570646351,
      "learning_rate": 2.440147329650092e-06,
      "loss": 0.2218,
      "step": 265
    },
    {
      "epoch": 0.024508223153821347,
      "grad_norm": 1.2993635785678224,
      "learning_rate": 2.4493554327808474e-06,
      "loss": 0.2003,
      "step": 266
    },
    {
      "epoch": 0.024600359331091354,
      "grad_norm": 1.2707418652729778,
      "learning_rate": 2.4585635359116027e-06,
      "loss": 0.2036,
      "step": 267
    },
    {
      "epoch": 0.02469249550836136,
      "grad_norm": 1.2834662882271706,
      "learning_rate": 2.4677716390423576e-06,
      "loss": 0.2168,
      "step": 268
    },
    {
      "epoch": 0.024784631685631362,
      "grad_norm": 1.3433418131656627,
      "learning_rate": 2.4769797421731125e-06,
      "loss": 0.2164,
      "step": 269
    },
    {
      "epoch": 0.02487676786290137,
      "grad_norm": 1.3675989386071368,
      "learning_rate": 2.486187845303868e-06,
      "loss": 0.2136,
      "step": 270
    },
    {
      "epoch": 0.024968904040171373,
      "grad_norm": 1.2819757220021681,
      "learning_rate": 2.4953959484346228e-06,
      "loss": 0.2217,
      "step": 271
    },
    {
      "epoch": 0.025061040217441377,
      "grad_norm": 1.4596357277210503,
      "learning_rate": 2.504604051565378e-06,
      "loss": 0.2018,
      "step": 272
    },
    {
      "epoch": 0.025153176394711384,
      "grad_norm": 1.2597067670175457,
      "learning_rate": 2.513812154696133e-06,
      "loss": 0.2315,
      "step": 273
    },
    {
      "epoch": 0.025245312571981388,
      "grad_norm": 1.2971255477113983,
      "learning_rate": 2.523020257826888e-06,
      "loss": 0.2087,
      "step": 274
    },
    {
      "epoch": 0.025337448749251392,
      "grad_norm": 1.332947894390514,
      "learning_rate": 2.5322283609576432e-06,
      "loss": 0.2127,
      "step": 275
    },
    {
      "epoch": 0.0254295849265214,
      "grad_norm": 1.1260373924980331,
      "learning_rate": 2.541436464088398e-06,
      "loss": 0.1973,
      "step": 276
    },
    {
      "epoch": 0.025521721103791403,
      "grad_norm": 1.2445947528995862,
      "learning_rate": 2.550644567219153e-06,
      "loss": 0.2099,
      "step": 277
    },
    {
      "epoch": 0.025613857281061407,
      "grad_norm": 1.3694045461593618,
      "learning_rate": 2.5598526703499083e-06,
      "loss": 0.2243,
      "step": 278
    },
    {
      "epoch": 0.025705993458331414,
      "grad_norm": 1.297962915474662,
      "learning_rate": 2.5690607734806632e-06,
      "loss": 0.2204,
      "step": 279
    },
    {
      "epoch": 0.025798129635601418,
      "grad_norm": 1.2909158781778913,
      "learning_rate": 2.578268876611418e-06,
      "loss": 0.2211,
      "step": 280
    },
    {
      "epoch": 0.025890265812871426,
      "grad_norm": 1.320578975065506,
      "learning_rate": 2.5874769797421735e-06,
      "loss": 0.1965,
      "step": 281
    },
    {
      "epoch": 0.02598240199014143,
      "grad_norm": 1.2210472413434825,
      "learning_rate": 2.5966850828729284e-06,
      "loss": 0.2134,
      "step": 282
    },
    {
      "epoch": 0.026074538167411433,
      "grad_norm": 1.1578898719596564,
      "learning_rate": 2.6058931860036833e-06,
      "loss": 0.2039,
      "step": 283
    },
    {
      "epoch": 0.02616667434468144,
      "grad_norm": 1.3398605179539043,
      "learning_rate": 2.6151012891344386e-06,
      "loss": 0.2091,
      "step": 284
    },
    {
      "epoch": 0.026258810521951444,
      "grad_norm": 1.2764074537081127,
      "learning_rate": 2.6243093922651935e-06,
      "loss": 0.2127,
      "step": 285
    },
    {
      "epoch": 0.026350946699221448,
      "grad_norm": 1.201092481830308,
      "learning_rate": 2.6335174953959484e-06,
      "loss": 0.2134,
      "step": 286
    },
    {
      "epoch": 0.026443082876491456,
      "grad_norm": 1.3808995093878993,
      "learning_rate": 2.6427255985267037e-06,
      "loss": 0.2229,
      "step": 287
    },
    {
      "epoch": 0.02653521905376146,
      "grad_norm": 1.3001384527969946,
      "learning_rate": 2.6519337016574586e-06,
      "loss": 0.2292,
      "step": 288
    },
    {
      "epoch": 0.026627355231031463,
      "grad_norm": 1.3053774770933373,
      "learning_rate": 2.6611418047882135e-06,
      "loss": 0.22,
      "step": 289
    },
    {
      "epoch": 0.02671949140830147,
      "grad_norm": 1.449134250644644,
      "learning_rate": 2.670349907918969e-06,
      "loss": 0.2194,
      "step": 290
    },
    {
      "epoch": 0.026811627585571474,
      "grad_norm": 1.437368533930005,
      "learning_rate": 2.6795580110497238e-06,
      "loss": 0.2239,
      "step": 291
    },
    {
      "epoch": 0.026903763762841478,
      "grad_norm": 1.2446885130520247,
      "learning_rate": 2.6887661141804787e-06,
      "loss": 0.2141,
      "step": 292
    },
    {
      "epoch": 0.026995899940111485,
      "grad_norm": 1.7040675956383482,
      "learning_rate": 2.697974217311234e-06,
      "loss": 0.2224,
      "step": 293
    },
    {
      "epoch": 0.02708803611738149,
      "grad_norm": 1.332041322961633,
      "learning_rate": 2.707182320441989e-06,
      "loss": 0.2214,
      "step": 294
    },
    {
      "epoch": 0.027180172294651497,
      "grad_norm": 1.2590166578500492,
      "learning_rate": 2.716390423572744e-06,
      "loss": 0.2179,
      "step": 295
    },
    {
      "epoch": 0.0272723084719215,
      "grad_norm": 1.4226861813160818,
      "learning_rate": 2.7255985267034996e-06,
      "loss": 0.2196,
      "step": 296
    },
    {
      "epoch": 0.027364444649191504,
      "grad_norm": 1.3934346019180117,
      "learning_rate": 2.7348066298342545e-06,
      "loss": 0.2201,
      "step": 297
    },
    {
      "epoch": 0.02745658082646151,
      "grad_norm": 1.2888343424352768,
      "learning_rate": 2.74401473296501e-06,
      "loss": 0.2109,
      "step": 298
    },
    {
      "epoch": 0.027548717003731515,
      "grad_norm": 1.4048452398008997,
      "learning_rate": 2.7532228360957647e-06,
      "loss": 0.22,
      "step": 299
    },
    {
      "epoch": 0.02764085318100152,
      "grad_norm": 1.2759644069246936,
      "learning_rate": 2.7624309392265196e-06,
      "loss": 0.2002,
      "step": 300
    },
    {
      "epoch": 0.027732989358271527,
      "grad_norm": 1.257513125532842,
      "learning_rate": 2.771639042357275e-06,
      "loss": 0.2016,
      "step": 301
    },
    {
      "epoch": 0.02782512553554153,
      "grad_norm": 1.1604132143904993,
      "learning_rate": 2.78084714548803e-06,
      "loss": 0.2001,
      "step": 302
    },
    {
      "epoch": 0.027917261712811534,
      "grad_norm": 1.300325855758057,
      "learning_rate": 2.7900552486187847e-06,
      "loss": 0.2034,
      "step": 303
    },
    {
      "epoch": 0.02800939789008154,
      "grad_norm": 1.3505529212765077,
      "learning_rate": 2.79926335174954e-06,
      "loss": 0.2161,
      "step": 304
    },
    {
      "epoch": 0.028101534067351545,
      "grad_norm": 1.2614356404469431,
      "learning_rate": 2.808471454880295e-06,
      "loss": 0.1892,
      "step": 305
    },
    {
      "epoch": 0.02819367024462155,
      "grad_norm": 1.461383179054911,
      "learning_rate": 2.81767955801105e-06,
      "loss": 0.2128,
      "step": 306
    },
    {
      "epoch": 0.028285806421891557,
      "grad_norm": 1.25242901824847,
      "learning_rate": 2.826887661141805e-06,
      "loss": 0.22,
      "step": 307
    },
    {
      "epoch": 0.02837794259916156,
      "grad_norm": 1.2270884655785574,
      "learning_rate": 2.83609576427256e-06,
      "loss": 0.1904,
      "step": 308
    },
    {
      "epoch": 0.028470078776431564,
      "grad_norm": 1.23710825518334,
      "learning_rate": 2.845303867403315e-06,
      "loss": 0.2057,
      "step": 309
    },
    {
      "epoch": 0.02856221495370157,
      "grad_norm": 1.2747541107670282,
      "learning_rate": 2.8545119705340703e-06,
      "loss": 0.2165,
      "step": 310
    },
    {
      "epoch": 0.028654351130971575,
      "grad_norm": 1.5134645452681845,
      "learning_rate": 2.8637200736648252e-06,
      "loss": 0.2277,
      "step": 311
    },
    {
      "epoch": 0.028746487308241583,
      "grad_norm": 1.2152059027151885,
      "learning_rate": 2.87292817679558e-06,
      "loss": 0.2047,
      "step": 312
    },
    {
      "epoch": 0.028838623485511587,
      "grad_norm": 1.3277121581950153,
      "learning_rate": 2.8821362799263355e-06,
      "loss": 0.2256,
      "step": 313
    },
    {
      "epoch": 0.02893075966278159,
      "grad_norm": 1.32730086437395,
      "learning_rate": 2.8913443830570904e-06,
      "loss": 0.2202,
      "step": 314
    },
    {
      "epoch": 0.029022895840051598,
      "grad_norm": 1.363811967060764,
      "learning_rate": 2.9005524861878453e-06,
      "loss": 0.2197,
      "step": 315
    },
    {
      "epoch": 0.0291150320173216,
      "grad_norm": 1.2590769976128895,
      "learning_rate": 2.9097605893186006e-06,
      "loss": 0.2082,
      "step": 316
    },
    {
      "epoch": 0.029207168194591605,
      "grad_norm": 1.2277995813074711,
      "learning_rate": 2.9189686924493555e-06,
      "loss": 0.2061,
      "step": 317
    },
    {
      "epoch": 0.029299304371861613,
      "grad_norm": 1.3793722738217344,
      "learning_rate": 2.9281767955801104e-06,
      "loss": 0.1971,
      "step": 318
    },
    {
      "epoch": 0.029391440549131616,
      "grad_norm": 1.361895665830025,
      "learning_rate": 2.937384898710866e-06,
      "loss": 0.2427,
      "step": 319
    },
    {
      "epoch": 0.02948357672640162,
      "grad_norm": 1.2966036850711782,
      "learning_rate": 2.946593001841621e-06,
      "loss": 0.2244,
      "step": 320
    },
    {
      "epoch": 0.029575712903671628,
      "grad_norm": 1.1971066961262655,
      "learning_rate": 2.955801104972376e-06,
      "loss": 0.2265,
      "step": 321
    },
    {
      "epoch": 0.02966784908094163,
      "grad_norm": 1.2180842224212953,
      "learning_rate": 2.9650092081031313e-06,
      "loss": 0.2238,
      "step": 322
    },
    {
      "epoch": 0.029759985258211635,
      "grad_norm": 1.1589180181774166,
      "learning_rate": 2.974217311233886e-06,
      "loss": 0.1929,
      "step": 323
    },
    {
      "epoch": 0.029852121435481643,
      "grad_norm": 1.2133350157084903,
      "learning_rate": 2.983425414364641e-06,
      "loss": 0.2134,
      "step": 324
    },
    {
      "epoch": 0.029944257612751646,
      "grad_norm": 1.1779599790119306,
      "learning_rate": 2.9926335174953964e-06,
      "loss": 0.2066,
      "step": 325
    },
    {
      "epoch": 0.03003639379002165,
      "grad_norm": 1.1847663049212864,
      "learning_rate": 3.0018416206261513e-06,
      "loss": 0.1822,
      "step": 326
    },
    {
      "epoch": 0.030128529967291658,
      "grad_norm": 1.2734701566041768,
      "learning_rate": 3.0110497237569062e-06,
      "loss": 0.2225,
      "step": 327
    },
    {
      "epoch": 0.03022066614456166,
      "grad_norm": 1.456216336880786,
      "learning_rate": 3.0202578268876615e-06,
      "loss": 0.2303,
      "step": 328
    },
    {
      "epoch": 0.03031280232183167,
      "grad_norm": 1.3359492802492923,
      "learning_rate": 3.0294659300184164e-06,
      "loss": 0.2327,
      "step": 329
    },
    {
      "epoch": 0.030404938499101673,
      "grad_norm": 1.3170633885393337,
      "learning_rate": 3.0386740331491713e-06,
      "loss": 0.2113,
      "step": 330
    },
    {
      "epoch": 0.030497074676371676,
      "grad_norm": 1.3663569659678911,
      "learning_rate": 3.0478821362799267e-06,
      "loss": 0.2292,
      "step": 331
    },
    {
      "epoch": 0.030589210853641684,
      "grad_norm": 1.3094102769500708,
      "learning_rate": 3.0570902394106816e-06,
      "loss": 0.2158,
      "step": 332
    },
    {
      "epoch": 0.030681347030911688,
      "grad_norm": 1.2497293850188491,
      "learning_rate": 3.0662983425414365e-06,
      "loss": 0.2161,
      "step": 333
    },
    {
      "epoch": 0.03077348320818169,
      "grad_norm": 1.1791717194673006,
      "learning_rate": 3.075506445672192e-06,
      "loss": 0.2042,
      "step": 334
    },
    {
      "epoch": 0.0308656193854517,
      "grad_norm": 1.2243353185304204,
      "learning_rate": 3.0847145488029467e-06,
      "loss": 0.2221,
      "step": 335
    },
    {
      "epoch": 0.030957755562721703,
      "grad_norm": 1.2144934202478015,
      "learning_rate": 3.0939226519337016e-06,
      "loss": 0.2091,
      "step": 336
    },
    {
      "epoch": 0.031049891739991706,
      "grad_norm": 1.3869201324530631,
      "learning_rate": 3.103130755064457e-06,
      "loss": 0.22,
      "step": 337
    },
    {
      "epoch": 0.031142027917261714,
      "grad_norm": 1.2522412871255026,
      "learning_rate": 3.112338858195212e-06,
      "loss": 0.2012,
      "step": 338
    },
    {
      "epoch": 0.031234164094531718,
      "grad_norm": 1.223982875197098,
      "learning_rate": 3.1215469613259667e-06,
      "loss": 0.2362,
      "step": 339
    },
    {
      "epoch": 0.03132630027180172,
      "grad_norm": 1.180490704761597,
      "learning_rate": 3.130755064456722e-06,
      "loss": 0.2006,
      "step": 340
    },
    {
      "epoch": 0.031418436449071725,
      "grad_norm": 1.334003641795415,
      "learning_rate": 3.139963167587477e-06,
      "loss": 0.217,
      "step": 341
    },
    {
      "epoch": 0.031510572626341736,
      "grad_norm": 1.2176716108720274,
      "learning_rate": 3.149171270718232e-06,
      "loss": 0.2124,
      "step": 342
    },
    {
      "epoch": 0.03160270880361174,
      "grad_norm": 1.3395536252432325,
      "learning_rate": 3.1583793738489876e-06,
      "loss": 0.2098,
      "step": 343
    },
    {
      "epoch": 0.031694844980881744,
      "grad_norm": 1.283429869492175,
      "learning_rate": 3.1675874769797425e-06,
      "loss": 0.2314,
      "step": 344
    },
    {
      "epoch": 0.03178698115815175,
      "grad_norm": 1.228230288659854,
      "learning_rate": 3.176795580110498e-06,
      "loss": 0.1965,
      "step": 345
    },
    {
      "epoch": 0.03187911733542175,
      "grad_norm": 1.2440315402589066,
      "learning_rate": 3.1860036832412528e-06,
      "loss": 0.2225,
      "step": 346
    },
    {
      "epoch": 0.031971253512691755,
      "grad_norm": 1.2499654799548787,
      "learning_rate": 3.1952117863720077e-06,
      "loss": 0.2206,
      "step": 347
    },
    {
      "epoch": 0.032063389689961766,
      "grad_norm": 1.2049177369182857,
      "learning_rate": 3.204419889502763e-06,
      "loss": 0.2035,
      "step": 348
    },
    {
      "epoch": 0.03215552586723177,
      "grad_norm": 1.2316769248478436,
      "learning_rate": 3.213627992633518e-06,
      "loss": 0.2145,
      "step": 349
    },
    {
      "epoch": 0.032247662044501774,
      "grad_norm": 1.2321172561939648,
      "learning_rate": 3.222836095764273e-06,
      "loss": 0.2112,
      "step": 350
    },
    {
      "epoch": 0.03233979822177178,
      "grad_norm": 1.1523176756649027,
      "learning_rate": 3.232044198895028e-06,
      "loss": 0.2056,
      "step": 351
    },
    {
      "epoch": 0.03243193439904178,
      "grad_norm": 1.1236159577981801,
      "learning_rate": 3.241252302025783e-06,
      "loss": 0.1955,
      "step": 352
    },
    {
      "epoch": 0.03252407057631179,
      "grad_norm": 1.2130769573681421,
      "learning_rate": 3.250460405156538e-06,
      "loss": 0.2054,
      "step": 353
    },
    {
      "epoch": 0.032616206753581796,
      "grad_norm": 1.1913432800528299,
      "learning_rate": 3.2596685082872933e-06,
      "loss": 0.1981,
      "step": 354
    },
    {
      "epoch": 0.0327083429308518,
      "grad_norm": 1.3859609731695497,
      "learning_rate": 3.268876611418048e-06,
      "loss": 0.2342,
      "step": 355
    },
    {
      "epoch": 0.032800479108121804,
      "grad_norm": 1.3072352238426874,
      "learning_rate": 3.278084714548803e-06,
      "loss": 0.2183,
      "step": 356
    },
    {
      "epoch": 0.03289261528539181,
      "grad_norm": 1.3473692917629814,
      "learning_rate": 3.2872928176795584e-06,
      "loss": 0.2029,
      "step": 357
    },
    {
      "epoch": 0.03298475146266181,
      "grad_norm": 1.2572639062134559,
      "learning_rate": 3.2965009208103133e-06,
      "loss": 0.2214,
      "step": 358
    },
    {
      "epoch": 0.03307688763993182,
      "grad_norm": 1.203052241995266,
      "learning_rate": 3.305709023941068e-06,
      "loss": 0.2026,
      "step": 359
    },
    {
      "epoch": 0.033169023817201826,
      "grad_norm": 1.2948231954080816,
      "learning_rate": 3.3149171270718235e-06,
      "loss": 0.2209,
      "step": 360
    },
    {
      "epoch": 0.03326115999447183,
      "grad_norm": 1.1961645679085242,
      "learning_rate": 3.3241252302025784e-06,
      "loss": 0.2119,
      "step": 361
    },
    {
      "epoch": 0.033353296171741834,
      "grad_norm": 1.2097174219855942,
      "learning_rate": 3.3333333333333333e-06,
      "loss": 0.2142,
      "step": 362
    },
    {
      "epoch": 0.03344543234901184,
      "grad_norm": 1.242640883301389,
      "learning_rate": 3.3425414364640887e-06,
      "loss": 0.21,
      "step": 363
    },
    {
      "epoch": 0.03353756852628184,
      "grad_norm": 1.1964795201693772,
      "learning_rate": 3.3517495395948436e-06,
      "loss": 0.2179,
      "step": 364
    },
    {
      "epoch": 0.03362970470355185,
      "grad_norm": 1.2315695513233953,
      "learning_rate": 3.3609576427255985e-06,
      "loss": 0.2066,
      "step": 365
    },
    {
      "epoch": 0.033721840880821856,
      "grad_norm": 1.112363729635578,
      "learning_rate": 3.370165745856354e-06,
      "loss": 0.2008,
      "step": 366
    },
    {
      "epoch": 0.03381397705809186,
      "grad_norm": 1.2655168156807526,
      "learning_rate": 3.379373848987109e-06,
      "loss": 0.193,
      "step": 367
    },
    {
      "epoch": 0.033906113235361864,
      "grad_norm": 1.1496127915515584,
      "learning_rate": 3.3885819521178644e-06,
      "loss": 0.1982,
      "step": 368
    },
    {
      "epoch": 0.03399824941263187,
      "grad_norm": 1.174710108082871,
      "learning_rate": 3.3977900552486193e-06,
      "loss": 0.1977,
      "step": 369
    },
    {
      "epoch": 0.03409038558990188,
      "grad_norm": 1.2707966093570304,
      "learning_rate": 3.4069981583793742e-06,
      "loss": 0.2018,
      "step": 370
    },
    {
      "epoch": 0.03418252176717188,
      "grad_norm": 1.2562422642370359,
      "learning_rate": 3.416206261510129e-06,
      "loss": 0.2088,
      "step": 371
    },
    {
      "epoch": 0.034274657944441886,
      "grad_norm": 1.2280848584726436,
      "learning_rate": 3.4254143646408845e-06,
      "loss": 0.2105,
      "step": 372
    },
    {
      "epoch": 0.03436679412171189,
      "grad_norm": 1.217850057779426,
      "learning_rate": 3.4346224677716394e-06,
      "loss": 0.2063,
      "step": 373
    },
    {
      "epoch": 0.034458930298981894,
      "grad_norm": 1.2091286639825245,
      "learning_rate": 3.4438305709023943e-06,
      "loss": 0.2128,
      "step": 374
    },
    {
      "epoch": 0.0345510664762519,
      "grad_norm": 1.1715733041783976,
      "learning_rate": 3.4530386740331496e-06,
      "loss": 0.2021,
      "step": 375
    },
    {
      "epoch": 0.03464320265352191,
      "grad_norm": 1.2001707885169832,
      "learning_rate": 3.4622467771639045e-06,
      "loss": 0.2257,
      "step": 376
    },
    {
      "epoch": 0.03473533883079191,
      "grad_norm": 1.2368175910547214,
      "learning_rate": 3.4714548802946594e-06,
      "loss": 0.2145,
      "step": 377
    },
    {
      "epoch": 0.034827475008061916,
      "grad_norm": 1.166337000359825,
      "learning_rate": 3.4806629834254147e-06,
      "loss": 0.2079,
      "step": 378
    },
    {
      "epoch": 0.03491961118533192,
      "grad_norm": 1.1572816320295447,
      "learning_rate": 3.4898710865561696e-06,
      "loss": 0.1901,
      "step": 379
    },
    {
      "epoch": 0.035011747362601923,
      "grad_norm": 1.1237321439729187,
      "learning_rate": 3.4990791896869245e-06,
      "loss": 0.2007,
      "step": 380
    },
    {
      "epoch": 0.03510388353987193,
      "grad_norm": 1.215928962970433,
      "learning_rate": 3.50828729281768e-06,
      "loss": 0.2195,
      "step": 381
    },
    {
      "epoch": 0.03519601971714194,
      "grad_norm": 1.251329116724435,
      "learning_rate": 3.5174953959484348e-06,
      "loss": 0.2066,
      "step": 382
    },
    {
      "epoch": 0.03528815589441194,
      "grad_norm": 1.2234531446138686,
      "learning_rate": 3.5267034990791897e-06,
      "loss": 0.2093,
      "step": 383
    },
    {
      "epoch": 0.035380292071681946,
      "grad_norm": 1.2252920368192435,
      "learning_rate": 3.535911602209945e-06,
      "loss": 0.2202,
      "step": 384
    },
    {
      "epoch": 0.03547242824895195,
      "grad_norm": 1.2609463141051451,
      "learning_rate": 3.5451197053407e-06,
      "loss": 0.2001,
      "step": 385
    },
    {
      "epoch": 0.03556456442622195,
      "grad_norm": 1.2927472157292323,
      "learning_rate": 3.554327808471455e-06,
      "loss": 0.2256,
      "step": 386
    },
    {
      "epoch": 0.035656700603491964,
      "grad_norm": 1.155641554981568,
      "learning_rate": 3.56353591160221e-06,
      "loss": 0.199,
      "step": 387
    },
    {
      "epoch": 0.03574883678076197,
      "grad_norm": 1.2654887045193257,
      "learning_rate": 3.572744014732965e-06,
      "loss": 0.2078,
      "step": 388
    },
    {
      "epoch": 0.03584097295803197,
      "grad_norm": 1.2166982932381427,
      "learning_rate": 3.58195211786372e-06,
      "loss": 0.2103,
      "step": 389
    },
    {
      "epoch": 0.035933109135301976,
      "grad_norm": 1.37669075363763,
      "learning_rate": 3.5911602209944757e-06,
      "loss": 0.2152,
      "step": 390
    },
    {
      "epoch": 0.03602524531257198,
      "grad_norm": 1.3416512571374115,
      "learning_rate": 3.6003683241252306e-06,
      "loss": 0.1992,
      "step": 391
    },
    {
      "epoch": 0.03611738148984198,
      "grad_norm": 1.229234386328588,
      "learning_rate": 3.609576427255986e-06,
      "loss": 0.2187,
      "step": 392
    },
    {
      "epoch": 0.036209517667111994,
      "grad_norm": 1.1832506871580368,
      "learning_rate": 3.618784530386741e-06,
      "loss": 0.1978,
      "step": 393
    },
    {
      "epoch": 0.036301653844382,
      "grad_norm": 1.2383981566876872,
      "learning_rate": 3.6279926335174957e-06,
      "loss": 0.207,
      "step": 394
    },
    {
      "epoch": 0.036393790021652,
      "grad_norm": 1.1413755558262018,
      "learning_rate": 3.637200736648251e-06,
      "loss": 0.2155,
      "step": 395
    },
    {
      "epoch": 0.036485926198922006,
      "grad_norm": 1.2450403165766708,
      "learning_rate": 3.646408839779006e-06,
      "loss": 0.2142,
      "step": 396
    },
    {
      "epoch": 0.03657806237619201,
      "grad_norm": 1.2177024569527306,
      "learning_rate": 3.655616942909761e-06,
      "loss": 0.2167,
      "step": 397
    },
    {
      "epoch": 0.03667019855346201,
      "grad_norm": 1.2723658885665146,
      "learning_rate": 3.664825046040516e-06,
      "loss": 0.2194,
      "step": 398
    },
    {
      "epoch": 0.036762334730732024,
      "grad_norm": 1.174670391591689,
      "learning_rate": 3.674033149171271e-06,
      "loss": 0.2089,
      "step": 399
    },
    {
      "epoch": 0.03685447090800203,
      "grad_norm": 1.2221763371090841,
      "learning_rate": 3.683241252302026e-06,
      "loss": 0.2106,
      "step": 400
    },
    {
      "epoch": 0.03694660708527203,
      "grad_norm": 1.2965457839204568,
      "learning_rate": 3.6924493554327813e-06,
      "loss": 0.2099,
      "step": 401
    },
    {
      "epoch": 0.037038743262542036,
      "grad_norm": 1.2033987453656643,
      "learning_rate": 3.7016574585635362e-06,
      "loss": 0.2095,
      "step": 402
    },
    {
      "epoch": 0.03713087943981204,
      "grad_norm": 1.2068890988997176,
      "learning_rate": 3.710865561694291e-06,
      "loss": 0.201,
      "step": 403
    },
    {
      "epoch": 0.03722301561708205,
      "grad_norm": 1.203262066657531,
      "learning_rate": 3.7200736648250464e-06,
      "loss": 0.2073,
      "step": 404
    },
    {
      "epoch": 0.037315151794352054,
      "grad_norm": 1.2129025553528847,
      "learning_rate": 3.7292817679558014e-06,
      "loss": 0.2214,
      "step": 405
    },
    {
      "epoch": 0.03740728797162206,
      "grad_norm": 1.2850805303315755,
      "learning_rate": 3.7384898710865563e-06,
      "loss": 0.2202,
      "step": 406
    },
    {
      "epoch": 0.03749942414889206,
      "grad_norm": 1.1849304905609503,
      "learning_rate": 3.7476979742173116e-06,
      "loss": 0.1994,
      "step": 407
    },
    {
      "epoch": 0.037591560326162066,
      "grad_norm": 1.1447297755974901,
      "learning_rate": 3.7569060773480665e-06,
      "loss": 0.1877,
      "step": 408
    },
    {
      "epoch": 0.03768369650343207,
      "grad_norm": 1.3731333236915204,
      "learning_rate": 3.7661141804788214e-06,
      "loss": 0.2068,
      "step": 409
    },
    {
      "epoch": 0.03777583268070208,
      "grad_norm": 1.3507563680134274,
      "learning_rate": 3.7753222836095767e-06,
      "loss": 0.2184,
      "step": 410
    },
    {
      "epoch": 0.037867968857972084,
      "grad_norm": 1.2967176512561887,
      "learning_rate": 3.7845303867403316e-06,
      "loss": 0.1949,
      "step": 411
    },
    {
      "epoch": 0.03796010503524209,
      "grad_norm": 1.1615648678608848,
      "learning_rate": 3.7937384898710865e-06,
      "loss": 0.1906,
      "step": 412
    },
    {
      "epoch": 0.03805224121251209,
      "grad_norm": 1.1806971928220382,
      "learning_rate": 3.802946593001842e-06,
      "loss": 0.1814,
      "step": 413
    },
    {
      "epoch": 0.038144377389782096,
      "grad_norm": 1.273886277912273,
      "learning_rate": 3.812154696132597e-06,
      "loss": 0.2248,
      "step": 414
    },
    {
      "epoch": 0.038236513567052106,
      "grad_norm": 1.1335712217012766,
      "learning_rate": 3.8213627992633525e-06,
      "loss": 0.2128,
      "step": 415
    },
    {
      "epoch": 0.03832864974432211,
      "grad_norm": 1.1793652832224044,
      "learning_rate": 3.830570902394107e-06,
      "loss": 0.1874,
      "step": 416
    },
    {
      "epoch": 0.038420785921592114,
      "grad_norm": 1.2334577784320606,
      "learning_rate": 3.839779005524862e-06,
      "loss": 0.2141,
      "step": 417
    },
    {
      "epoch": 0.03851292209886212,
      "grad_norm": 1.1609662659175002,
      "learning_rate": 3.848987108655617e-06,
      "loss": 0.2094,
      "step": 418
    },
    {
      "epoch": 0.03860505827613212,
      "grad_norm": 1.1745343514754072,
      "learning_rate": 3.858195211786372e-06,
      "loss": 0.1954,
      "step": 419
    },
    {
      "epoch": 0.038697194453402126,
      "grad_norm": 1.2526388364382148,
      "learning_rate": 3.867403314917128e-06,
      "loss": 0.2275,
      "step": 420
    },
    {
      "epoch": 0.038789330630672136,
      "grad_norm": 1.4330497133104598,
      "learning_rate": 3.876611418047883e-06,
      "loss": 0.2126,
      "step": 421
    },
    {
      "epoch": 0.03888146680794214,
      "grad_norm": 1.2562815683824222,
      "learning_rate": 3.885819521178638e-06,
      "loss": 0.2024,
      "step": 422
    },
    {
      "epoch": 0.038973602985212144,
      "grad_norm": 1.4160966441130192,
      "learning_rate": 3.8950276243093926e-06,
      "loss": 0.2131,
      "step": 423
    },
    {
      "epoch": 0.03906573916248215,
      "grad_norm": 1.3816473101884428,
      "learning_rate": 3.9042357274401475e-06,
      "loss": 0.2113,
      "step": 424
    },
    {
      "epoch": 0.03915787533975215,
      "grad_norm": 1.2554377015630513,
      "learning_rate": 3.913443830570902e-06,
      "loss": 0.2132,
      "step": 425
    },
    {
      "epoch": 0.039250011517022156,
      "grad_norm": 1.3398302209339412,
      "learning_rate": 3.922651933701658e-06,
      "loss": 0.2099,
      "step": 426
    },
    {
      "epoch": 0.039342147694292166,
      "grad_norm": 1.3603810885516354,
      "learning_rate": 3.931860036832413e-06,
      "loss": 0.2125,
      "step": 427
    },
    {
      "epoch": 0.03943428387156217,
      "grad_norm": 1.258039947717146,
      "learning_rate": 3.941068139963168e-06,
      "loss": 0.2057,
      "step": 428
    },
    {
      "epoch": 0.039526420048832174,
      "grad_norm": 1.2036522078809784,
      "learning_rate": 3.950276243093923e-06,
      "loss": 0.1912,
      "step": 429
    },
    {
      "epoch": 0.03961855622610218,
      "grad_norm": 1.2675274340086102,
      "learning_rate": 3.959484346224678e-06,
      "loss": 0.2224,
      "step": 430
    },
    {
      "epoch": 0.03971069240337218,
      "grad_norm": 1.3973388775812086,
      "learning_rate": 3.968692449355433e-06,
      "loss": 0.2296,
      "step": 431
    },
    {
      "epoch": 0.03980282858064219,
      "grad_norm": 1.543316930802054,
      "learning_rate": 3.977900552486188e-06,
      "loss": 0.2062,
      "step": 432
    },
    {
      "epoch": 0.039894964757912196,
      "grad_norm": 1.1195071306132687,
      "learning_rate": 3.987108655616943e-06,
      "loss": 0.2085,
      "step": 433
    },
    {
      "epoch": 0.0399871009351822,
      "grad_norm": 1.4042138133782505,
      "learning_rate": 3.996316758747698e-06,
      "loss": 0.1929,
      "step": 434
    },
    {
      "epoch": 0.040079237112452204,
      "grad_norm": 1.458286673287262,
      "learning_rate": 4.005524861878453e-06,
      "loss": 0.2097,
      "step": 435
    },
    {
      "epoch": 0.04017137328972221,
      "grad_norm": 1.2963948315513325,
      "learning_rate": 4.014732965009208e-06,
      "loss": 0.2118,
      "step": 436
    },
    {
      "epoch": 0.04026350946699221,
      "grad_norm": 1.559050465013681,
      "learning_rate": 4.023941068139964e-06,
      "loss": 0.2149,
      "step": 437
    },
    {
      "epoch": 0.04035564564426222,
      "grad_norm": 1.2769319583081065,
      "learning_rate": 4.033149171270719e-06,
      "loss": 0.2219,
      "step": 438
    },
    {
      "epoch": 0.040447781821532226,
      "grad_norm": 1.3096848506654442,
      "learning_rate": 4.0423572744014736e-06,
      "loss": 0.1945,
      "step": 439
    },
    {
      "epoch": 0.04053991799880223,
      "grad_norm": 1.185303690120011,
      "learning_rate": 4.051565377532229e-06,
      "loss": 0.1891,
      "step": 440
    },
    {
      "epoch": 0.040632054176072234,
      "grad_norm": 1.159162633689249,
      "learning_rate": 4.060773480662984e-06,
      "loss": 0.2175,
      "step": 441
    },
    {
      "epoch": 0.04072419035334224,
      "grad_norm": 1.2662580758209543,
      "learning_rate": 4.069981583793739e-06,
      "loss": 0.2064,
      "step": 442
    },
    {
      "epoch": 0.04081632653061224,
      "grad_norm": 1.1246911765480914,
      "learning_rate": 4.079189686924494e-06,
      "loss": 0.219,
      "step": 443
    },
    {
      "epoch": 0.04090846270788225,
      "grad_norm": 1.1350391555698733,
      "learning_rate": 4.088397790055249e-06,
      "loss": 0.203,
      "step": 444
    },
    {
      "epoch": 0.041000598885152256,
      "grad_norm": 1.4680786044597403,
      "learning_rate": 4.097605893186004e-06,
      "loss": 0.2131,
      "step": 445
    },
    {
      "epoch": 0.04109273506242226,
      "grad_norm": 1.2159762672968413,
      "learning_rate": 4.106813996316759e-06,
      "loss": 0.2029,
      "step": 446
    },
    {
      "epoch": 0.041184871239692264,
      "grad_norm": 1.289465041658357,
      "learning_rate": 4.1160220994475145e-06,
      "loss": 0.2069,
      "step": 447
    },
    {
      "epoch": 0.04127700741696227,
      "grad_norm": 1.577269595539687,
      "learning_rate": 4.125230202578269e-06,
      "loss": 0.2179,
      "step": 448
    },
    {
      "epoch": 0.04136914359423228,
      "grad_norm": 1.2817783420567126,
      "learning_rate": 4.134438305709024e-06,
      "loss": 0.2043,
      "step": 449
    },
    {
      "epoch": 0.04146127977150228,
      "grad_norm": 1.1310315648514593,
      "learning_rate": 4.143646408839779e-06,
      "loss": 0.2162,
      "step": 450
    },
    {
      "epoch": 0.041553415948772286,
      "grad_norm": 1.3119107380289001,
      "learning_rate": 4.152854511970534e-06,
      "loss": 0.2197,
      "step": 451
    },
    {
      "epoch": 0.04164555212604229,
      "grad_norm": 1.3865007259353652,
      "learning_rate": 4.162062615101289e-06,
      "loss": 0.24,
      "step": 452
    },
    {
      "epoch": 0.041737688303312294,
      "grad_norm": 1.1858485712993334,
      "learning_rate": 4.171270718232045e-06,
      "loss": 0.209,
      "step": 453
    },
    {
      "epoch": 0.0418298244805823,
      "grad_norm": 1.1178283987286062,
      "learning_rate": 4.1804788213628e-06,
      "loss": 0.1983,
      "step": 454
    },
    {
      "epoch": 0.04192196065785231,
      "grad_norm": 1.201810219473216,
      "learning_rate": 4.1896869244935545e-06,
      "loss": 0.2191,
      "step": 455
    },
    {
      "epoch": 0.04201409683512231,
      "grad_norm": 1.2475953832029654,
      "learning_rate": 4.1988950276243095e-06,
      "loss": 0.2023,
      "step": 456
    },
    {
      "epoch": 0.042106233012392316,
      "grad_norm": 1.1804605088793463,
      "learning_rate": 4.208103130755064e-06,
      "loss": 0.2032,
      "step": 457
    },
    {
      "epoch": 0.04219836918966232,
      "grad_norm": 1.2262939863172024,
      "learning_rate": 4.217311233885819e-06,
      "loss": 0.2193,
      "step": 458
    },
    {
      "epoch": 0.042290505366932324,
      "grad_norm": 1.2144298896507224,
      "learning_rate": 4.226519337016575e-06,
      "loss": 0.221,
      "step": 459
    },
    {
      "epoch": 0.04238264154420233,
      "grad_norm": 1.2934211101242432,
      "learning_rate": 4.23572744014733e-06,
      "loss": 0.2258,
      "step": 460
    },
    {
      "epoch": 0.04247477772147234,
      "grad_norm": 1.1425010577356645,
      "learning_rate": 4.244935543278086e-06,
      "loss": 0.2027,
      "step": 461
    },
    {
      "epoch": 0.04256691389874234,
      "grad_norm": 1.185579804260426,
      "learning_rate": 4.2541436464088406e-06,
      "loss": 0.2351,
      "step": 462
    },
    {
      "epoch": 0.042659050076012346,
      "grad_norm": 1.1729170129773308,
      "learning_rate": 4.2633517495395955e-06,
      "loss": 0.2035,
      "step": 463
    },
    {
      "epoch": 0.04275118625328235,
      "grad_norm": 1.1888699825576086,
      "learning_rate": 4.27255985267035e-06,
      "loss": 0.2234,
      "step": 464
    },
    {
      "epoch": 0.042843322430552354,
      "grad_norm": 1.0956917428584472,
      "learning_rate": 4.281767955801105e-06,
      "loss": 0.1724,
      "step": 465
    },
    {
      "epoch": 0.042935458607822365,
      "grad_norm": 1.2428999853822662,
      "learning_rate": 4.29097605893186e-06,
      "loss": 0.2288,
      "step": 466
    },
    {
      "epoch": 0.04302759478509237,
      "grad_norm": 1.1530798568369878,
      "learning_rate": 4.300184162062616e-06,
      "loss": 0.2017,
      "step": 467
    },
    {
      "epoch": 0.04311973096236237,
      "grad_norm": 1.3439487754256692,
      "learning_rate": 4.309392265193371e-06,
      "loss": 0.2096,
      "step": 468
    },
    {
      "epoch": 0.043211867139632376,
      "grad_norm": 1.1683549358961656,
      "learning_rate": 4.318600368324126e-06,
      "loss": 0.2148,
      "step": 469
    },
    {
      "epoch": 0.04330400331690238,
      "grad_norm": 1.2522700216036091,
      "learning_rate": 4.327808471454881e-06,
      "loss": 0.2164,
      "step": 470
    },
    {
      "epoch": 0.043396139494172384,
      "grad_norm": 1.2387500928446844,
      "learning_rate": 4.3370165745856355e-06,
      "loss": 0.2054,
      "step": 471
    },
    {
      "epoch": 0.043488275671442395,
      "grad_norm": 1.1954111876349953,
      "learning_rate": 4.3462246777163904e-06,
      "loss": 0.2119,
      "step": 472
    },
    {
      "epoch": 0.0435804118487124,
      "grad_norm": 1.6231312157976416,
      "learning_rate": 4.355432780847146e-06,
      "loss": 0.2158,
      "step": 473
    },
    {
      "epoch": 0.0436725480259824,
      "grad_norm": 1.3358159604896864,
      "learning_rate": 4.364640883977901e-06,
      "loss": 0.2089,
      "step": 474
    },
    {
      "epoch": 0.043764684203252406,
      "grad_norm": 1.4873292661852786,
      "learning_rate": 4.373848987108656e-06,
      "loss": 0.2184,
      "step": 475
    },
    {
      "epoch": 0.04385682038052241,
      "grad_norm": 1.5926804326802442,
      "learning_rate": 4.383057090239411e-06,
      "loss": 0.2198,
      "step": 476
    },
    {
      "epoch": 0.043948956557792414,
      "grad_norm": 1.1544457982412275,
      "learning_rate": 4.392265193370166e-06,
      "loss": 0.2047,
      "step": 477
    },
    {
      "epoch": 0.044041092735062425,
      "grad_norm": 1.3790644801344767,
      "learning_rate": 4.401473296500921e-06,
      "loss": 0.2104,
      "step": 478
    },
    {
      "epoch": 0.04413322891233243,
      "grad_norm": 1.4144109682196038,
      "learning_rate": 4.4106813996316765e-06,
      "loss": 0.2169,
      "step": 479
    },
    {
      "epoch": 0.04422536508960243,
      "grad_norm": 1.235543716580457,
      "learning_rate": 4.419889502762431e-06,
      "loss": 0.2173,
      "step": 480
    },
    {
      "epoch": 0.044317501266872436,
      "grad_norm": 1.2283735168259708,
      "learning_rate": 4.429097605893186e-06,
      "loss": 0.1962,
      "step": 481
    },
    {
      "epoch": 0.04440963744414244,
      "grad_norm": 1.4283017557499813,
      "learning_rate": 4.438305709023941e-06,
      "loss": 0.218,
      "step": 482
    },
    {
      "epoch": 0.04450177362141245,
      "grad_norm": 1.4867664072627964,
      "learning_rate": 4.447513812154696e-06,
      "loss": 0.2183,
      "step": 483
    },
    {
      "epoch": 0.044593909798682455,
      "grad_norm": 1.4108802496691264,
      "learning_rate": 4.456721915285452e-06,
      "loss": 0.2013,
      "step": 484
    },
    {
      "epoch": 0.04468604597595246,
      "grad_norm": 1.166986570586516,
      "learning_rate": 4.465930018416207e-06,
      "loss": 0.2002,
      "step": 485
    },
    {
      "epoch": 0.04477818215322246,
      "grad_norm": 1.1648717298249538,
      "learning_rate": 4.475138121546962e-06,
      "loss": 0.2209,
      "step": 486
    },
    {
      "epoch": 0.044870318330492466,
      "grad_norm": 1.2876822750663734,
      "learning_rate": 4.484346224677717e-06,
      "loss": 0.2046,
      "step": 487
    },
    {
      "epoch": 0.04496245450776247,
      "grad_norm": 1.2801973779891336,
      "learning_rate": 4.493554327808472e-06,
      "loss": 0.2199,
      "step": 488
    },
    {
      "epoch": 0.04505459068503248,
      "grad_norm": 1.298576742843075,
      "learning_rate": 4.502762430939227e-06,
      "loss": 0.2117,
      "step": 489
    },
    {
      "epoch": 0.045146726862302484,
      "grad_norm": 1.3131277756849138,
      "learning_rate": 4.511970534069982e-06,
      "loss": 0.2126,
      "step": 490
    },
    {
      "epoch": 0.04523886303957249,
      "grad_norm": 1.163802616612496,
      "learning_rate": 4.521178637200737e-06,
      "loss": 0.2079,
      "step": 491
    },
    {
      "epoch": 0.04533099921684249,
      "grad_norm": 1.223443299674731,
      "learning_rate": 4.530386740331492e-06,
      "loss": 0.2047,
      "step": 492
    },
    {
      "epoch": 0.045423135394112496,
      "grad_norm": 1.183473280850051,
      "learning_rate": 4.539594843462248e-06,
      "loss": 0.1852,
      "step": 493
    },
    {
      "epoch": 0.04551527157138251,
      "grad_norm": 1.1651402187049773,
      "learning_rate": 4.5488029465930025e-06,
      "loss": 0.2038,
      "step": 494
    },
    {
      "epoch": 0.04560740774865251,
      "grad_norm": 1.1460140058645047,
      "learning_rate": 4.5580110497237574e-06,
      "loss": 0.196,
      "step": 495
    },
    {
      "epoch": 0.045699543925922514,
      "grad_norm": 1.1776138222171473,
      "learning_rate": 4.567219152854512e-06,
      "loss": 0.1995,
      "step": 496
    },
    {
      "epoch": 0.04579168010319252,
      "grad_norm": 1.2204020301333764,
      "learning_rate": 4.576427255985267e-06,
      "loss": 0.2143,
      "step": 497
    },
    {
      "epoch": 0.04588381628046252,
      "grad_norm": 1.205241368495962,
      "learning_rate": 4.585635359116022e-06,
      "loss": 0.1979,
      "step": 498
    },
    {
      "epoch": 0.045975952457732526,
      "grad_norm": 1.1734468213424174,
      "learning_rate": 4.594843462246777e-06,
      "loss": 0.2078,
      "step": 499
    },
    {
      "epoch": 0.04606808863500254,
      "grad_norm": 1.1082444575732158,
      "learning_rate": 4.604051565377533e-06,
      "loss": 0.1961,
      "step": 500
    },
    {
      "epoch": 0.04606808863500254,
      "eval_loss": 0.20690900087356567,
      "eval_runtime": 299.5863,
      "eval_samples_per_second": 23.422,
      "eval_steps_per_second": 2.931,
      "step": 500
    },
    {
      "epoch": 0.04616022481227254,
      "grad_norm": 1.2088449274977096,
      "learning_rate": 4.613259668508288e-06,
      "loss": 0.2047,
      "step": 501
    },
    {
      "epoch": 0.046252360989542544,
      "grad_norm": 1.142903312485517,
      "learning_rate": 4.622467771639043e-06,
      "loss": 0.2019,
      "step": 502
    },
    {
      "epoch": 0.04634449716681255,
      "grad_norm": 1.2256105445368588,
      "learning_rate": 4.6316758747697975e-06,
      "loss": 0.1987,
      "step": 503
    },
    {
      "epoch": 0.04643663334408255,
      "grad_norm": 1.1904513269582884,
      "learning_rate": 4.640883977900552e-06,
      "loss": 0.2127,
      "step": 504
    },
    {
      "epoch": 0.046528769521352556,
      "grad_norm": 1.1457301085346001,
      "learning_rate": 4.650092081031307e-06,
      "loss": 0.197,
      "step": 505
    },
    {
      "epoch": 0.04662090569862257,
      "grad_norm": 1.3077547506039735,
      "learning_rate": 4.659300184162063e-06,
      "loss": 0.1919,
      "step": 506
    },
    {
      "epoch": 0.04671304187589257,
      "grad_norm": 1.0715902844691318,
      "learning_rate": 4.668508287292818e-06,
      "loss": 0.1938,
      "step": 507
    },
    {
      "epoch": 0.046805178053162574,
      "grad_norm": 1.1850578820772626,
      "learning_rate": 4.677716390423574e-06,
      "loss": 0.2025,
      "step": 508
    },
    {
      "epoch": 0.04689731423043258,
      "grad_norm": 1.4073409971969546,
      "learning_rate": 4.686924493554329e-06,
      "loss": 0.2144,
      "step": 509
    },
    {
      "epoch": 0.04698945040770258,
      "grad_norm": 1.2622270185118862,
      "learning_rate": 4.6961325966850835e-06,
      "loss": 0.2194,
      "step": 510
    },
    {
      "epoch": 0.04708158658497259,
      "grad_norm": 1.2236474583396226,
      "learning_rate": 4.7053406998158384e-06,
      "loss": 0.2092,
      "step": 511
    },
    {
      "epoch": 0.0471737227622426,
      "grad_norm": 1.1348264980475313,
      "learning_rate": 4.714548802946593e-06,
      "loss": 0.2052,
      "step": 512
    },
    {
      "epoch": 0.0472658589395126,
      "grad_norm": 1.3406440516081668,
      "learning_rate": 4.723756906077348e-06,
      "loss": 0.207,
      "step": 513
    },
    {
      "epoch": 0.047357995116782604,
      "grad_norm": 1.446315027401559,
      "learning_rate": 4.732965009208104e-06,
      "loss": 0.2169,
      "step": 514
    },
    {
      "epoch": 0.04745013129405261,
      "grad_norm": 1.2350193328918795,
      "learning_rate": 4.742173112338859e-06,
      "loss": 0.2123,
      "step": 515
    },
    {
      "epoch": 0.04754226747132261,
      "grad_norm": 1.3453002863844115,
      "learning_rate": 4.751381215469614e-06,
      "loss": 0.2213,
      "step": 516
    },
    {
      "epoch": 0.04763440364859262,
      "grad_norm": 1.3208799605546395,
      "learning_rate": 4.760589318600369e-06,
      "loss": 0.2248,
      "step": 517
    },
    {
      "epoch": 0.04772653982586263,
      "grad_norm": 1.197417531741685,
      "learning_rate": 4.769797421731124e-06,
      "loss": 0.2067,
      "step": 518
    },
    {
      "epoch": 0.04781867600313263,
      "grad_norm": 1.166778788170299,
      "learning_rate": 4.7790055248618785e-06,
      "loss": 0.2121,
      "step": 519
    },
    {
      "epoch": 0.047910812180402634,
      "grad_norm": 1.2056681536807465,
      "learning_rate": 4.788213627992634e-06,
      "loss": 0.2048,
      "step": 520
    },
    {
      "epoch": 0.04800294835767264,
      "grad_norm": 1.1862496944445569,
      "learning_rate": 4.797421731123389e-06,
      "loss": 0.2052,
      "step": 521
    },
    {
      "epoch": 0.04809508453494264,
      "grad_norm": 1.1650915820727847,
      "learning_rate": 4.806629834254144e-06,
      "loss": 0.2037,
      "step": 522
    },
    {
      "epoch": 0.04818722071221265,
      "grad_norm": 1.0997406935439868,
      "learning_rate": 4.815837937384899e-06,
      "loss": 0.2162,
      "step": 523
    },
    {
      "epoch": 0.04827935688948266,
      "grad_norm": 1.142599490179962,
      "learning_rate": 4.825046040515654e-06,
      "loss": 0.1947,
      "step": 524
    },
    {
      "epoch": 0.04837149306675266,
      "grad_norm": 1.3182614669715278,
      "learning_rate": 4.834254143646409e-06,
      "loss": 0.2224,
      "step": 525
    },
    {
      "epoch": 0.048463629244022664,
      "grad_norm": 1.1006337099413035,
      "learning_rate": 4.8434622467771645e-06,
      "loss": 0.2104,
      "step": 526
    },
    {
      "epoch": 0.04855576542129267,
      "grad_norm": 1.2851582791992746,
      "learning_rate": 4.852670349907919e-06,
      "loss": 0.2112,
      "step": 527
    },
    {
      "epoch": 0.04864790159856268,
      "grad_norm": 1.226468392041963,
      "learning_rate": 4.861878453038674e-06,
      "loss": 0.2075,
      "step": 528
    },
    {
      "epoch": 0.04874003777583268,
      "grad_norm": 1.1605259603239653,
      "learning_rate": 4.871086556169429e-06,
      "loss": 0.2143,
      "step": 529
    },
    {
      "epoch": 0.04883217395310269,
      "grad_norm": 1.1872303036487082,
      "learning_rate": 4.880294659300184e-06,
      "loss": 0.2131,
      "step": 530
    },
    {
      "epoch": 0.04892431013037269,
      "grad_norm": 1.2324441915157167,
      "learning_rate": 4.889502762430939e-06,
      "loss": 0.2198,
      "step": 531
    },
    {
      "epoch": 0.049016446307642694,
      "grad_norm": 1.084873626256991,
      "learning_rate": 4.898710865561695e-06,
      "loss": 0.1981,
      "step": 532
    },
    {
      "epoch": 0.0491085824849127,
      "grad_norm": 1.0866645738744949,
      "learning_rate": 4.90791896869245e-06,
      "loss": 0.2105,
      "step": 533
    },
    {
      "epoch": 0.04920071866218271,
      "grad_norm": 1.2290547614755443,
      "learning_rate": 4.9171270718232054e-06,
      "loss": 0.2274,
      "step": 534
    },
    {
      "epoch": 0.04929285483945271,
      "grad_norm": 1.1524273585439213,
      "learning_rate": 4.92633517495396e-06,
      "loss": 0.2047,
      "step": 535
    },
    {
      "epoch": 0.04938499101672272,
      "grad_norm": 1.061747508568193,
      "learning_rate": 4.935543278084715e-06,
      "loss": 0.1734,
      "step": 536
    },
    {
      "epoch": 0.04947712719399272,
      "grad_norm": 1.2916494032401462,
      "learning_rate": 4.94475138121547e-06,
      "loss": 0.2168,
      "step": 537
    },
    {
      "epoch": 0.049569263371262724,
      "grad_norm": 1.269578911667256,
      "learning_rate": 4.953959484346225e-06,
      "loss": 0.2109,
      "step": 538
    },
    {
      "epoch": 0.04966139954853273,
      "grad_norm": 1.378804388434808,
      "learning_rate": 4.96316758747698e-06,
      "loss": 0.2091,
      "step": 539
    },
    {
      "epoch": 0.04975353572580274,
      "grad_norm": 1.1049708603346846,
      "learning_rate": 4.972375690607736e-06,
      "loss": 0.2072,
      "step": 540
    },
    {
      "epoch": 0.04984567190307274,
      "grad_norm": 1.2404672009645543,
      "learning_rate": 4.981583793738491e-06,
      "loss": 0.2066,
      "step": 541
    },
    {
      "epoch": 0.04993780808034275,
      "grad_norm": 1.1799110644655362,
      "learning_rate": 4.9907918968692455e-06,
      "loss": 0.2128,
      "step": 542
    },
    {
      "epoch": 0.05002994425761275,
      "grad_norm": 1.1896138346028056,
      "learning_rate": 5e-06,
      "loss": 0.2015,
      "step": 543
    },
    {
      "epoch": 0.050122080434882754,
      "grad_norm": 1.2225805533773542,
      "learning_rate": 4.999999883937366e-06,
      "loss": 0.213,
      "step": 544
    },
    {
      "epoch": 0.050214216612152765,
      "grad_norm": 1.2114568339356038,
      "learning_rate": 4.999999535749473e-06,
      "loss": 0.2064,
      "step": 545
    },
    {
      "epoch": 0.05030635278942277,
      "grad_norm": 1.1217499299052078,
      "learning_rate": 4.999998955436354e-06,
      "loss": 0.2024,
      "step": 546
    },
    {
      "epoch": 0.05039848896669277,
      "grad_norm": 1.133686745541808,
      "learning_rate": 4.999998142998064e-06,
      "loss": 0.2001,
      "step": 547
    },
    {
      "epoch": 0.050490625143962777,
      "grad_norm": 1.2440473687463243,
      "learning_rate": 4.999997098434676e-06,
      "loss": 0.2089,
      "step": 548
    },
    {
      "epoch": 0.05058276132123278,
      "grad_norm": 1.1784492874879233,
      "learning_rate": 4.999995821746289e-06,
      "loss": 0.2084,
      "step": 549
    },
    {
      "epoch": 0.050674897498502784,
      "grad_norm": 1.188597001170747,
      "learning_rate": 4.9999943129330204e-06,
      "loss": 0.2187,
      "step": 550
    },
    {
      "epoch": 0.050767033675772795,
      "grad_norm": 1.295042393574803,
      "learning_rate": 4.999992571995011e-06,
      "loss": 0.1944,
      "step": 551
    },
    {
      "epoch": 0.0508591698530428,
      "grad_norm": 1.0516405339241583,
      "learning_rate": 4.999990598932423e-06,
      "loss": 0.2001,
      "step": 552
    },
    {
      "epoch": 0.0509513060303128,
      "grad_norm": 1.0917727740604706,
      "learning_rate": 4.999988393745438e-06,
      "loss": 0.2018,
      "step": 553
    },
    {
      "epoch": 0.051043442207582806,
      "grad_norm": 1.2785884522845907,
      "learning_rate": 4.999985956434263e-06,
      "loss": 0.2369,
      "step": 554
    },
    {
      "epoch": 0.05113557838485281,
      "grad_norm": 1.1693288663566566,
      "learning_rate": 4.999983286999121e-06,
      "loss": 0.1985,
      "step": 555
    },
    {
      "epoch": 0.051227714562122814,
      "grad_norm": 1.1206519528655148,
      "learning_rate": 4.999980385440262e-06,
      "loss": 0.2056,
      "step": 556
    },
    {
      "epoch": 0.051319850739392825,
      "grad_norm": 1.1626004840751911,
      "learning_rate": 4.999977251757956e-06,
      "loss": 0.2036,
      "step": 557
    },
    {
      "epoch": 0.05141198691666283,
      "grad_norm": 1.326436529000634,
      "learning_rate": 4.999973885952492e-06,
      "loss": 0.2105,
      "step": 558
    },
    {
      "epoch": 0.05150412309393283,
      "grad_norm": 1.1994189863044933,
      "learning_rate": 4.9999702880241855e-06,
      "loss": 0.2136,
      "step": 559
    },
    {
      "epoch": 0.051596259271202836,
      "grad_norm": 1.2267888006027625,
      "learning_rate": 4.999966457973367e-06,
      "loss": 0.2173,
      "step": 560
    },
    {
      "epoch": 0.05168839544847284,
      "grad_norm": 1.1688358053384447,
      "learning_rate": 4.999962395800395e-06,
      "loss": 0.2334,
      "step": 561
    },
    {
      "epoch": 0.05178053162574285,
      "grad_norm": 1.3029073613643016,
      "learning_rate": 4.999958101505645e-06,
      "loss": 0.2071,
      "step": 562
    },
    {
      "epoch": 0.051872667803012855,
      "grad_norm": 1.1677420008318726,
      "learning_rate": 4.999953575089516e-06,
      "loss": 0.2165,
      "step": 563
    },
    {
      "epoch": 0.05196480398028286,
      "grad_norm": 1.116469178905927,
      "learning_rate": 4.999948816552429e-06,
      "loss": 0.2057,
      "step": 564
    },
    {
      "epoch": 0.05205694015755286,
      "grad_norm": 1.3097286700249924,
      "learning_rate": 4.999943825894825e-06,
      "loss": 0.2254,
      "step": 565
    },
    {
      "epoch": 0.052149076334822866,
      "grad_norm": 1.1322032799911372,
      "learning_rate": 4.999938603117167e-06,
      "loss": 0.2032,
      "step": 566
    },
    {
      "epoch": 0.05224121251209287,
      "grad_norm": 1.2103922176557846,
      "learning_rate": 4.999933148219942e-06,
      "loss": 0.2353,
      "step": 567
    },
    {
      "epoch": 0.05233334868936288,
      "grad_norm": 1.3405882785620524,
      "learning_rate": 4.999927461203654e-06,
      "loss": 0.2122,
      "step": 568
    },
    {
      "epoch": 0.052425484866632885,
      "grad_norm": 1.1357985528866983,
      "learning_rate": 4.999921542068833e-06,
      "loss": 0.2023,
      "step": 569
    },
    {
      "epoch": 0.05251762104390289,
      "grad_norm": 1.1622773497726775,
      "learning_rate": 4.9999153908160285e-06,
      "loss": 0.1914,
      "step": 570
    },
    {
      "epoch": 0.05260975722117289,
      "grad_norm": 1.2580747807039376,
      "learning_rate": 4.999909007445809e-06,
      "loss": 0.2155,
      "step": 571
    },
    {
      "epoch": 0.052701893398442896,
      "grad_norm": 1.1608877424155948,
      "learning_rate": 4.99990239195877e-06,
      "loss": 0.2156,
      "step": 572
    },
    {
      "epoch": 0.0527940295757129,
      "grad_norm": 1.1581171779291344,
      "learning_rate": 4.999895544355525e-06,
      "loss": 0.2128,
      "step": 573
    },
    {
      "epoch": 0.05288616575298291,
      "grad_norm": 1.153474177296958,
      "learning_rate": 4.9998884646367094e-06,
      "loss": 0.1973,
      "step": 574
    },
    {
      "epoch": 0.052978301930252915,
      "grad_norm": 1.1874129378072187,
      "learning_rate": 4.999881152802981e-06,
      "loss": 0.2063,
      "step": 575
    },
    {
      "epoch": 0.05307043810752292,
      "grad_norm": 1.0913852863675626,
      "learning_rate": 4.999873608855019e-06,
      "loss": 0.2013,
      "step": 576
    },
    {
      "epoch": 0.05316257428479292,
      "grad_norm": 1.124914712901831,
      "learning_rate": 4.999865832793522e-06,
      "loss": 0.2111,
      "step": 577
    },
    {
      "epoch": 0.053254710462062926,
      "grad_norm": 1.1644687308281916,
      "learning_rate": 4.9998578246192155e-06,
      "loss": 0.2059,
      "step": 578
    },
    {
      "epoch": 0.05334684663933294,
      "grad_norm": 1.2942615223555596,
      "learning_rate": 4.9998495843328385e-06,
      "loss": 0.2221,
      "step": 579
    },
    {
      "epoch": 0.05343898281660294,
      "grad_norm": 1.123179821863574,
      "learning_rate": 4.9998411119351605e-06,
      "loss": 0.2102,
      "step": 580
    },
    {
      "epoch": 0.053531118993872945,
      "grad_norm": 1.2273638033386287,
      "learning_rate": 4.999832407426966e-06,
      "loss": 0.215,
      "step": 581
    },
    {
      "epoch": 0.05362325517114295,
      "grad_norm": 1.2098380897181231,
      "learning_rate": 4.999823470809062e-06,
      "loss": 0.2148,
      "step": 582
    },
    {
      "epoch": 0.05371539134841295,
      "grad_norm": 0.9746673941052318,
      "learning_rate": 4.999814302082281e-06,
      "loss": 0.1878,
      "step": 583
    },
    {
      "epoch": 0.053807527525682956,
      "grad_norm": 1.0967724336422364,
      "learning_rate": 4.999804901247472e-06,
      "loss": 0.2021,
      "step": 584
    },
    {
      "epoch": 0.05389966370295297,
      "grad_norm": 1.2264527641340204,
      "learning_rate": 4.99979526830551e-06,
      "loss": 0.2083,
      "step": 585
    },
    {
      "epoch": 0.05399179988022297,
      "grad_norm": 1.185566661438185,
      "learning_rate": 4.999785403257288e-06,
      "loss": 0.1993,
      "step": 586
    },
    {
      "epoch": 0.054083936057492975,
      "grad_norm": 1.176578670156522,
      "learning_rate": 4.9997753061037225e-06,
      "loss": 0.1965,
      "step": 587
    },
    {
      "epoch": 0.05417607223476298,
      "grad_norm": 1.4360254532578547,
      "learning_rate": 4.9997649768457505e-06,
      "loss": 0.2219,
      "step": 588
    },
    {
      "epoch": 0.05426820841203298,
      "grad_norm": 1.144309223073869,
      "learning_rate": 4.999754415484331e-06,
      "loss": 0.2147,
      "step": 589
    },
    {
      "epoch": 0.05436034458930299,
      "grad_norm": 1.0619722443303092,
      "learning_rate": 4.9997436220204455e-06,
      "loss": 0.2046,
      "step": 590
    },
    {
      "epoch": 0.054452480766573,
      "grad_norm": 1.1799893495434046,
      "learning_rate": 4.9997325964550945e-06,
      "loss": 0.2243,
      "step": 591
    },
    {
      "epoch": 0.054544616943843,
      "grad_norm": 1.1114971013751254,
      "learning_rate": 4.999721338789304e-06,
      "loss": 0.2069,
      "step": 592
    },
    {
      "epoch": 0.054636753121113005,
      "grad_norm": 1.094133912064876,
      "learning_rate": 4.999709849024118e-06,
      "loss": 0.2018,
      "step": 593
    },
    {
      "epoch": 0.05472888929838301,
      "grad_norm": 1.0666830212123013,
      "learning_rate": 4.999698127160604e-06,
      "loss": 0.1898,
      "step": 594
    },
    {
      "epoch": 0.05482102547565301,
      "grad_norm": 1.1414568181365667,
      "learning_rate": 4.999686173199849e-06,
      "loss": 0.2035,
      "step": 595
    },
    {
      "epoch": 0.05491316165292302,
      "grad_norm": 1.1433975732381854,
      "learning_rate": 4.999673987142964e-06,
      "loss": 0.2044,
      "step": 596
    },
    {
      "epoch": 0.05500529783019303,
      "grad_norm": 1.2889576924059074,
      "learning_rate": 4.999661568991081e-06,
      "loss": 0.2042,
      "step": 597
    },
    {
      "epoch": 0.05509743400746303,
      "grad_norm": 1.2353072072103293,
      "learning_rate": 4.999648918745352e-06,
      "loss": 0.2115,
      "step": 598
    },
    {
      "epoch": 0.055189570184733035,
      "grad_norm": 1.2571958587328962,
      "learning_rate": 4.999636036406951e-06,
      "loss": 0.2169,
      "step": 599
    },
    {
      "epoch": 0.05528170636200304,
      "grad_norm": 1.46152140513451,
      "learning_rate": 4.999622921977076e-06,
      "loss": 0.2131,
      "step": 600
    },
    {
      "epoch": 0.05537384253927304,
      "grad_norm": 1.1820367409058008,
      "learning_rate": 4.999609575456944e-06,
      "loss": 0.1844,
      "step": 601
    },
    {
      "epoch": 0.05546597871654305,
      "grad_norm": 1.1737151850144656,
      "learning_rate": 4.9995959968477926e-06,
      "loss": 0.2256,
      "step": 602
    },
    {
      "epoch": 0.05555811489381306,
      "grad_norm": 1.1214004446252206,
      "learning_rate": 4.9995821861508844e-06,
      "loss": 0.1867,
      "step": 603
    },
    {
      "epoch": 0.05565025107108306,
      "grad_norm": 1.1260505778426881,
      "learning_rate": 4.999568143367501e-06,
      "loss": 0.1964,
      "step": 604
    },
    {
      "epoch": 0.055742387248353065,
      "grad_norm": 1.167666210113292,
      "learning_rate": 4.999553868498948e-06,
      "loss": 0.2115,
      "step": 605
    },
    {
      "epoch": 0.05583452342562307,
      "grad_norm": 1.1045012954183473,
      "learning_rate": 4.999539361546547e-06,
      "loss": 0.1758,
      "step": 606
    },
    {
      "epoch": 0.05592665960289308,
      "grad_norm": 1.233219811553698,
      "learning_rate": 4.999524622511649e-06,
      "loss": 0.2164,
      "step": 607
    },
    {
      "epoch": 0.05601879578016308,
      "grad_norm": 1.2706612209541799,
      "learning_rate": 4.99950965139562e-06,
      "loss": 0.2008,
      "step": 608
    },
    {
      "epoch": 0.05611093195743309,
      "grad_norm": 1.2646084508514779,
      "learning_rate": 4.999494448199851e-06,
      "loss": 0.2092,
      "step": 609
    },
    {
      "epoch": 0.05620306813470309,
      "grad_norm": 1.149501357551309,
      "learning_rate": 4.9994790129257535e-06,
      "loss": 0.1984,
      "step": 610
    },
    {
      "epoch": 0.056295204311973095,
      "grad_norm": 1.3291468516688794,
      "learning_rate": 4.999463345574761e-06,
      "loss": 0.2162,
      "step": 611
    },
    {
      "epoch": 0.0563873404892431,
      "grad_norm": 1.205429054407277,
      "learning_rate": 4.999447446148328e-06,
      "loss": 0.2137,
      "step": 612
    },
    {
      "epoch": 0.05647947666651311,
      "grad_norm": 1.2381435162688017,
      "learning_rate": 4.999431314647929e-06,
      "loss": 0.2129,
      "step": 613
    },
    {
      "epoch": 0.05657161284378311,
      "grad_norm": 1.2673565020592805,
      "learning_rate": 4.999414951075065e-06,
      "loss": 0.2007,
      "step": 614
    },
    {
      "epoch": 0.05666374902105312,
      "grad_norm": 1.1037792471970673,
      "learning_rate": 4.999398355431253e-06,
      "loss": 0.2108,
      "step": 615
    },
    {
      "epoch": 0.05675588519832312,
      "grad_norm": 1.16253375115935,
      "learning_rate": 4.999381527718036e-06,
      "loss": 0.2098,
      "step": 616
    },
    {
      "epoch": 0.056848021375593125,
      "grad_norm": 1.1401961391380055,
      "learning_rate": 4.999364467936974e-06,
      "loss": 0.2076,
      "step": 617
    },
    {
      "epoch": 0.05694015755286313,
      "grad_norm": 1.070625343707576,
      "learning_rate": 4.999347176089653e-06,
      "loss": 0.1909,
      "step": 618
    },
    {
      "epoch": 0.05703229373013314,
      "grad_norm": 1.1323161172935006,
      "learning_rate": 4.999329652177677e-06,
      "loss": 0.2081,
      "step": 619
    },
    {
      "epoch": 0.05712442990740314,
      "grad_norm": 1.1839375977802664,
      "learning_rate": 4.9993118962026735e-06,
      "loss": 0.2152,
      "step": 620
    },
    {
      "epoch": 0.05721656608467315,
      "grad_norm": 1.1346559448427023,
      "learning_rate": 4.999293908166292e-06,
      "loss": 0.1946,
      "step": 621
    },
    {
      "epoch": 0.05730870226194315,
      "grad_norm": 1.2069917410518431,
      "learning_rate": 4.999275688070202e-06,
      "loss": 0.1944,
      "step": 622
    },
    {
      "epoch": 0.057400838439213155,
      "grad_norm": 1.1793130901944762,
      "learning_rate": 4.999257235916096e-06,
      "loss": 0.2065,
      "step": 623
    },
    {
      "epoch": 0.057492974616483165,
      "grad_norm": 1.1633227618690698,
      "learning_rate": 4.999238551705686e-06,
      "loss": 0.1944,
      "step": 624
    },
    {
      "epoch": 0.05758511079375317,
      "grad_norm": 1.173343604205386,
      "learning_rate": 4.9992196354407075e-06,
      "loss": 0.2122,
      "step": 625
    },
    {
      "epoch": 0.05767724697102317,
      "grad_norm": 1.1303138827921249,
      "learning_rate": 4.999200487122917e-06,
      "loss": 0.2187,
      "step": 626
    },
    {
      "epoch": 0.05776938314829318,
      "grad_norm": 1.0398772349211105,
      "learning_rate": 4.999181106754093e-06,
      "loss": 0.1956,
      "step": 627
    },
    {
      "epoch": 0.05786151932556318,
      "grad_norm": 1.1638532808388222,
      "learning_rate": 4.999161494336033e-06,
      "loss": 0.1927,
      "step": 628
    },
    {
      "epoch": 0.057953655502833185,
      "grad_norm": 1.108151966330762,
      "learning_rate": 4.99914164987056e-06,
      "loss": 0.1986,
      "step": 629
    },
    {
      "epoch": 0.058045791680103195,
      "grad_norm": 1.183480026573518,
      "learning_rate": 4.999121573359516e-06,
      "loss": 0.2119,
      "step": 630
    },
    {
      "epoch": 0.0581379278573732,
      "grad_norm": 1.1555341737613782,
      "learning_rate": 4.999101264804765e-06,
      "loss": 0.1911,
      "step": 631
    },
    {
      "epoch": 0.0582300640346432,
      "grad_norm": 1.1980595228638002,
      "learning_rate": 4.9990807242081915e-06,
      "loss": 0.2053,
      "step": 632
    },
    {
      "epoch": 0.05832220021191321,
      "grad_norm": 1.3207559140555372,
      "learning_rate": 4.999059951571705e-06,
      "loss": 0.2119,
      "step": 633
    },
    {
      "epoch": 0.05841433638918321,
      "grad_norm": 1.2652476003839102,
      "learning_rate": 4.9990389468972336e-06,
      "loss": 0.2239,
      "step": 634
    },
    {
      "epoch": 0.058506472566453215,
      "grad_norm": 1.1645579104689652,
      "learning_rate": 4.999017710186725e-06,
      "loss": 0.2233,
      "step": 635
    },
    {
      "epoch": 0.058598608743723225,
      "grad_norm": 1.188157223333697,
      "learning_rate": 4.998996241442155e-06,
      "loss": 0.2056,
      "step": 636
    },
    {
      "epoch": 0.05869074492099323,
      "grad_norm": 1.174226549897811,
      "learning_rate": 4.998974540665514e-06,
      "loss": 0.2078,
      "step": 637
    },
    {
      "epoch": 0.05878288109826323,
      "grad_norm": 1.026345579666321,
      "learning_rate": 4.998952607858818e-06,
      "loss": 0.1947,
      "step": 638
    },
    {
      "epoch": 0.05887501727553324,
      "grad_norm": 1.3408302898393387,
      "learning_rate": 4.998930443024103e-06,
      "loss": 0.1999,
      "step": 639
    },
    {
      "epoch": 0.05896715345280324,
      "grad_norm": 1.2737103319088758,
      "learning_rate": 4.9989080461634285e-06,
      "loss": 0.2139,
      "step": 640
    },
    {
      "epoch": 0.05905928963007325,
      "grad_norm": 1.013191424805071,
      "learning_rate": 4.9988854172788725e-06,
      "loss": 0.1945,
      "step": 641
    },
    {
      "epoch": 0.059151425807343255,
      "grad_norm": 1.2422989589513913,
      "learning_rate": 4.998862556372537e-06,
      "loss": 0.2146,
      "step": 642
    },
    {
      "epoch": 0.05924356198461326,
      "grad_norm": 1.0916578038400657,
      "learning_rate": 4.998839463446543e-06,
      "loss": 0.1872,
      "step": 643
    },
    {
      "epoch": 0.05933569816188326,
      "grad_norm": 1.2140922793923727,
      "learning_rate": 4.998816138503038e-06,
      "loss": 0.2128,
      "step": 644
    },
    {
      "epoch": 0.05942783433915327,
      "grad_norm": 1.187481640612988,
      "learning_rate": 4.9987925815441835e-06,
      "loss": 0.186,
      "step": 645
    },
    {
      "epoch": 0.05951997051642327,
      "grad_norm": 1.0595639803375902,
      "learning_rate": 4.99876879257217e-06,
      "loss": 0.1814,
      "step": 646
    },
    {
      "epoch": 0.05961210669369328,
      "grad_norm": 1.1918728406860226,
      "learning_rate": 4.9987447715892046e-06,
      "loss": 0.2033,
      "step": 647
    },
    {
      "epoch": 0.059704242870963285,
      "grad_norm": 1.1277418699802502,
      "learning_rate": 4.998720518597518e-06,
      "loss": 0.1976,
      "step": 648
    },
    {
      "epoch": 0.05979637904823329,
      "grad_norm": 1.2389827145068848,
      "learning_rate": 4.998696033599363e-06,
      "loss": 0.2214,
      "step": 649
    },
    {
      "epoch": 0.05988851522550329,
      "grad_norm": 1.1715668898340321,
      "learning_rate": 4.998671316597012e-06,
      "loss": 0.1957,
      "step": 650
    },
    {
      "epoch": 0.0599806514027733,
      "grad_norm": 1.1157040510100455,
      "learning_rate": 4.998646367592761e-06,
      "loss": 0.1977,
      "step": 651
    },
    {
      "epoch": 0.0600727875800433,
      "grad_norm": 1.2964622196527131,
      "learning_rate": 4.998621186588925e-06,
      "loss": 0.2137,
      "step": 652
    },
    {
      "epoch": 0.06016492375731331,
      "grad_norm": 1.1874309992271197,
      "learning_rate": 4.998595773587844e-06,
      "loss": 0.198,
      "step": 653
    },
    {
      "epoch": 0.060257059934583315,
      "grad_norm": 1.0821447628825356,
      "learning_rate": 4.998570128591875e-06,
      "loss": 0.1983,
      "step": 654
    },
    {
      "epoch": 0.06034919611185332,
      "grad_norm": 1.1274894688033879,
      "learning_rate": 4.998544251603402e-06,
      "loss": 0.197,
      "step": 655
    },
    {
      "epoch": 0.06044133228912332,
      "grad_norm": 1.2286272649030554,
      "learning_rate": 4.998518142624826e-06,
      "loss": 0.2165,
      "step": 656
    },
    {
      "epoch": 0.06053346846639333,
      "grad_norm": 1.0008382256801454,
      "learning_rate": 4.998491801658571e-06,
      "loss": 0.1782,
      "step": 657
    },
    {
      "epoch": 0.06062560464366334,
      "grad_norm": 1.1573290350734389,
      "learning_rate": 4.998465228707084e-06,
      "loss": 0.1976,
      "step": 658
    },
    {
      "epoch": 0.06071774082093334,
      "grad_norm": 1.1294960460081294,
      "learning_rate": 4.998438423772831e-06,
      "loss": 0.2195,
      "step": 659
    },
    {
      "epoch": 0.060809876998203345,
      "grad_norm": 1.154289522109689,
      "learning_rate": 4.998411386858303e-06,
      "loss": 0.2114,
      "step": 660
    },
    {
      "epoch": 0.06090201317547335,
      "grad_norm": 1.0669414151184684,
      "learning_rate": 4.998384117966007e-06,
      "loss": 0.1967,
      "step": 661
    },
    {
      "epoch": 0.06099414935274335,
      "grad_norm": 1.1281826805262178,
      "learning_rate": 4.998356617098478e-06,
      "loss": 0.2076,
      "step": 662
    },
    {
      "epoch": 0.06108628553001336,
      "grad_norm": 1.114308852904428,
      "learning_rate": 4.9983288842582665e-06,
      "loss": 0.2047,
      "step": 663
    },
    {
      "epoch": 0.06117842170728337,
      "grad_norm": 1.2071437766523476,
      "learning_rate": 4.9983009194479505e-06,
      "loss": 0.2026,
      "step": 664
    },
    {
      "epoch": 0.06127055788455337,
      "grad_norm": 1.1827937087808504,
      "learning_rate": 4.998272722670126e-06,
      "loss": 0.2008,
      "step": 665
    },
    {
      "epoch": 0.061362694061823375,
      "grad_norm": 1.0733621691103314,
      "learning_rate": 4.998244293927409e-06,
      "loss": 0.1813,
      "step": 666
    },
    {
      "epoch": 0.06145483023909338,
      "grad_norm": 1.1788970733123387,
      "learning_rate": 4.998215633222441e-06,
      "loss": 0.219,
      "step": 667
    },
    {
      "epoch": 0.06154696641636338,
      "grad_norm": 1.2172001325394024,
      "learning_rate": 4.998186740557882e-06,
      "loss": 0.1977,
      "step": 668
    },
    {
      "epoch": 0.061639102593633394,
      "grad_norm": 1.1195473427959475,
      "learning_rate": 4.998157615936416e-06,
      "loss": 0.1914,
      "step": 669
    },
    {
      "epoch": 0.0617312387709034,
      "grad_norm": 1.1674007562772488,
      "learning_rate": 4.998128259360747e-06,
      "loss": 0.2087,
      "step": 670
    },
    {
      "epoch": 0.0618233749481734,
      "grad_norm": 1.212028870572152,
      "learning_rate": 4.998098670833599e-06,
      "loss": 0.211,
      "step": 671
    },
    {
      "epoch": 0.061915511125443405,
      "grad_norm": 1.1367268757070708,
      "learning_rate": 4.998068850357721e-06,
      "loss": 0.1912,
      "step": 672
    },
    {
      "epoch": 0.06200764730271341,
      "grad_norm": 1.08789125897739,
      "learning_rate": 4.998038797935882e-06,
      "loss": 0.1952,
      "step": 673
    },
    {
      "epoch": 0.06209978347998341,
      "grad_norm": 1.1357554005551076,
      "learning_rate": 4.9980085135708715e-06,
      "loss": 0.2017,
      "step": 674
    },
    {
      "epoch": 0.062191919657253424,
      "grad_norm": 1.14464491950595,
      "learning_rate": 4.997977997265501e-06,
      "loss": 0.2049,
      "step": 675
    },
    {
      "epoch": 0.06228405583452343,
      "grad_norm": 1.0226569388533309,
      "learning_rate": 4.997947249022605e-06,
      "loss": 0.1861,
      "step": 676
    },
    {
      "epoch": 0.06237619201179343,
      "grad_norm": 1.12500761449028,
      "learning_rate": 4.997916268845038e-06,
      "loss": 0.1935,
      "step": 677
    },
    {
      "epoch": 0.062468328189063435,
      "grad_norm": 1.0950565167941166,
      "learning_rate": 4.997885056735677e-06,
      "loss": 0.1959,
      "step": 678
    },
    {
      "epoch": 0.06256046436633345,
      "grad_norm": 1.1475008577190142,
      "learning_rate": 4.99785361269742e-06,
      "loss": 0.1987,
      "step": 679
    },
    {
      "epoch": 0.06265260054360344,
      "grad_norm": 1.153601965067529,
      "learning_rate": 4.9978219367331856e-06,
      "loss": 0.2098,
      "step": 680
    },
    {
      "epoch": 0.06274473672087345,
      "grad_norm": 1.1044287968459916,
      "learning_rate": 4.997790028845916e-06,
      "loss": 0.2078,
      "step": 681
    },
    {
      "epoch": 0.06283687289814345,
      "grad_norm": 1.087079920318585,
      "learning_rate": 4.997757889038573e-06,
      "loss": 0.2082,
      "step": 682
    },
    {
      "epoch": 0.06292900907541346,
      "grad_norm": 1.163440565892733,
      "learning_rate": 4.9977255173141405e-06,
      "loss": 0.1917,
      "step": 683
    },
    {
      "epoch": 0.06302114525268347,
      "grad_norm": 1.0799809277476804,
      "learning_rate": 4.997692913675626e-06,
      "loss": 0.2071,
      "step": 684
    },
    {
      "epoch": 0.06311328142995347,
      "grad_norm": 1.0607317616778018,
      "learning_rate": 4.997660078126055e-06,
      "loss": 0.2097,
      "step": 685
    },
    {
      "epoch": 0.06320541760722348,
      "grad_norm": 1.079311486129121,
      "learning_rate": 4.997627010668477e-06,
      "loss": 0.2119,
      "step": 686
    },
    {
      "epoch": 0.06329755378449348,
      "grad_norm": 1.0586204104270576,
      "learning_rate": 4.997593711305963e-06,
      "loss": 0.1931,
      "step": 687
    },
    {
      "epoch": 0.06338968996176349,
      "grad_norm": 1.3372536926610412,
      "learning_rate": 4.997560180041604e-06,
      "loss": 0.2016,
      "step": 688
    },
    {
      "epoch": 0.0634818261390335,
      "grad_norm": 1.026145931825911,
      "learning_rate": 4.997526416878513e-06,
      "loss": 0.1991,
      "step": 689
    },
    {
      "epoch": 0.0635739623163035,
      "grad_norm": 1.0825949947992264,
      "learning_rate": 4.997492421819825e-06,
      "loss": 0.1901,
      "step": 690
    },
    {
      "epoch": 0.0636660984935735,
      "grad_norm": 1.1104887396834857,
      "learning_rate": 4.997458194868697e-06,
      "loss": 0.1964,
      "step": 691
    },
    {
      "epoch": 0.0637582346708435,
      "grad_norm": 1.1414256643477074,
      "learning_rate": 4.997423736028308e-06,
      "loss": 0.1942,
      "step": 692
    },
    {
      "epoch": 0.06385037084811351,
      "grad_norm": 1.0428539165758621,
      "learning_rate": 4.997389045301856e-06,
      "loss": 0.1803,
      "step": 693
    },
    {
      "epoch": 0.06394250702538351,
      "grad_norm": 1.1658483507123416,
      "learning_rate": 4.997354122692561e-06,
      "loss": 0.2028,
      "step": 694
    },
    {
      "epoch": 0.06403464320265352,
      "grad_norm": 0.9640305471456437,
      "learning_rate": 4.997318968203668e-06,
      "loss": 0.1639,
      "step": 695
    },
    {
      "epoch": 0.06412677937992353,
      "grad_norm": 0.9944000405107695,
      "learning_rate": 4.99728358183844e-06,
      "loss": 0.1952,
      "step": 696
    },
    {
      "epoch": 0.06421891555719353,
      "grad_norm": 1.0588180086567232,
      "learning_rate": 4.9972479636001625e-06,
      "loss": 0.1902,
      "step": 697
    },
    {
      "epoch": 0.06431105173446354,
      "grad_norm": 1.1141307683264974,
      "learning_rate": 4.9972121134921435e-06,
      "loss": 0.1993,
      "step": 698
    },
    {
      "epoch": 0.06440318791173354,
      "grad_norm": 1.2576518697394798,
      "learning_rate": 4.99717603151771e-06,
      "loss": 0.1968,
      "step": 699
    },
    {
      "epoch": 0.06449532408900355,
      "grad_norm": 1.0725923379796012,
      "learning_rate": 4.997139717680214e-06,
      "loss": 0.1962,
      "step": 700
    },
    {
      "epoch": 0.06458746026627356,
      "grad_norm": 1.1265762810463271,
      "learning_rate": 4.9971031719830255e-06,
      "loss": 0.2103,
      "step": 701
    },
    {
      "epoch": 0.06467959644354355,
      "grad_norm": 1.0764056898213448,
      "learning_rate": 4.9970663944295396e-06,
      "loss": 0.1929,
      "step": 702
    },
    {
      "epoch": 0.06477173262081357,
      "grad_norm": 1.1443294790794694,
      "learning_rate": 4.99702938502317e-06,
      "loss": 0.2004,
      "step": 703
    },
    {
      "epoch": 0.06486386879808356,
      "grad_norm": 1.1556168002401073,
      "learning_rate": 4.996992143767353e-06,
      "loss": 0.1955,
      "step": 704
    },
    {
      "epoch": 0.06495600497535357,
      "grad_norm": 1.1330344957803058,
      "learning_rate": 4.996954670665547e-06,
      "loss": 0.1971,
      "step": 705
    },
    {
      "epoch": 0.06504814115262358,
      "grad_norm": 1.1245253883139057,
      "learning_rate": 4.996916965721232e-06,
      "loss": 0.2186,
      "step": 706
    },
    {
      "epoch": 0.06514027732989358,
      "grad_norm": 1.1056427514136586,
      "learning_rate": 4.996879028937906e-06,
      "loss": 0.2043,
      "step": 707
    },
    {
      "epoch": 0.06523241350716359,
      "grad_norm": 1.1164453535256291,
      "learning_rate": 4.996840860319094e-06,
      "loss": 0.2042,
      "step": 708
    },
    {
      "epoch": 0.06532454968443359,
      "grad_norm": 1.0858019482374586,
      "learning_rate": 4.996802459868341e-06,
      "loss": 0.1927,
      "step": 709
    },
    {
      "epoch": 0.0654166858617036,
      "grad_norm": 1.1400184015748671,
      "learning_rate": 4.996763827589209e-06,
      "loss": 0.2088,
      "step": 710
    },
    {
      "epoch": 0.0655088220389736,
      "grad_norm": 1.2412575997017532,
      "learning_rate": 4.996724963485288e-06,
      "loss": 0.2067,
      "step": 711
    },
    {
      "epoch": 0.06560095821624361,
      "grad_norm": 1.102175865083544,
      "learning_rate": 4.996685867560186e-06,
      "loss": 0.2011,
      "step": 712
    },
    {
      "epoch": 0.06569309439351362,
      "grad_norm": 1.1335302279554809,
      "learning_rate": 4.996646539817531e-06,
      "loss": 0.2104,
      "step": 713
    },
    {
      "epoch": 0.06578523057078361,
      "grad_norm": 1.0993877114695203,
      "learning_rate": 4.996606980260977e-06,
      "loss": 0.2188,
      "step": 714
    },
    {
      "epoch": 0.06587736674805363,
      "grad_norm": 1.0510165045528403,
      "learning_rate": 4.9965671888941955e-06,
      "loss": 0.1866,
      "step": 715
    },
    {
      "epoch": 0.06596950292532362,
      "grad_norm": 1.1947642401183298,
      "learning_rate": 4.996527165720882e-06,
      "loss": 0.2027,
      "step": 716
    },
    {
      "epoch": 0.06606163910259363,
      "grad_norm": 1.1376782085741068,
      "learning_rate": 4.996486910744753e-06,
      "loss": 0.184,
      "step": 717
    },
    {
      "epoch": 0.06615377527986364,
      "grad_norm": 0.9570102832377015,
      "learning_rate": 4.996446423969546e-06,
      "loss": 0.1791,
      "step": 718
    },
    {
      "epoch": 0.06624591145713364,
      "grad_norm": 1.1353836050535746,
      "learning_rate": 4.9964057053990186e-06,
      "loss": 0.2073,
      "step": 719
    },
    {
      "epoch": 0.06633804763440365,
      "grad_norm": 1.2154219117874654,
      "learning_rate": 4.996364755036954e-06,
      "loss": 0.201,
      "step": 720
    },
    {
      "epoch": 0.06643018381167365,
      "grad_norm": 1.0109850083514942,
      "learning_rate": 4.996323572887153e-06,
      "loss": 0.1882,
      "step": 721
    },
    {
      "epoch": 0.06652231998894366,
      "grad_norm": 1.101603698240089,
      "learning_rate": 4.996282158953439e-06,
      "loss": 0.1845,
      "step": 722
    },
    {
      "epoch": 0.06661445616621367,
      "grad_norm": 1.1990367891628382,
      "learning_rate": 4.996240513239658e-06,
      "loss": 0.2094,
      "step": 723
    },
    {
      "epoch": 0.06670659234348367,
      "grad_norm": 1.1531297226861148,
      "learning_rate": 4.9961986357496775e-06,
      "loss": 0.2001,
      "step": 724
    },
    {
      "epoch": 0.06679872852075368,
      "grad_norm": 1.1181593399001286,
      "learning_rate": 4.996156526487383e-06,
      "loss": 0.1898,
      "step": 725
    },
    {
      "epoch": 0.06689086469802367,
      "grad_norm": 1.1830597611536493,
      "learning_rate": 4.996114185456688e-06,
      "loss": 0.1877,
      "step": 726
    },
    {
      "epoch": 0.06698300087529369,
      "grad_norm": 1.1232584394160454,
      "learning_rate": 4.996071612661523e-06,
      "loss": 0.1953,
      "step": 727
    },
    {
      "epoch": 0.06707513705256368,
      "grad_norm": 1.0889290193640835,
      "learning_rate": 4.996028808105838e-06,
      "loss": 0.2108,
      "step": 728
    },
    {
      "epoch": 0.0671672732298337,
      "grad_norm": 1.0995182161943409,
      "learning_rate": 4.995985771793611e-06,
      "loss": 0.1885,
      "step": 729
    },
    {
      "epoch": 0.0672594094071037,
      "grad_norm": 0.991728475400526,
      "learning_rate": 4.995942503728837e-06,
      "loss": 0.1945,
      "step": 730
    },
    {
      "epoch": 0.0673515455843737,
      "grad_norm": 1.1414476841467807,
      "learning_rate": 4.995899003915532e-06,
      "loss": 0.2073,
      "step": 731
    },
    {
      "epoch": 0.06744368176164371,
      "grad_norm": 1.1816046268935128,
      "learning_rate": 4.995855272357736e-06,
      "loss": 0.2024,
      "step": 732
    },
    {
      "epoch": 0.06753581793891371,
      "grad_norm": 1.0793429170100068,
      "learning_rate": 4.995811309059509e-06,
      "loss": 0.2027,
      "step": 733
    },
    {
      "epoch": 0.06762795411618372,
      "grad_norm": 1.0463184895901732,
      "learning_rate": 4.995767114024934e-06,
      "loss": 0.1931,
      "step": 734
    },
    {
      "epoch": 0.06772009029345373,
      "grad_norm": 1.11156175838415,
      "learning_rate": 4.995722687258113e-06,
      "loss": 0.2125,
      "step": 735
    },
    {
      "epoch": 0.06781222647072373,
      "grad_norm": 1.1620522039851269,
      "learning_rate": 4.995678028763172e-06,
      "loss": 0.1767,
      "step": 736
    },
    {
      "epoch": 0.06790436264799374,
      "grad_norm": 1.1748972006716736,
      "learning_rate": 4.995633138544258e-06,
      "loss": 0.1896,
      "step": 737
    },
    {
      "epoch": 0.06799649882526373,
      "grad_norm": 1.1248616510945046,
      "learning_rate": 4.995588016605539e-06,
      "loss": 0.1975,
      "step": 738
    },
    {
      "epoch": 0.06808863500253375,
      "grad_norm": 1.1490092157815965,
      "learning_rate": 4.995542662951203e-06,
      "loss": 0.2018,
      "step": 739
    },
    {
      "epoch": 0.06818077117980376,
      "grad_norm": 1.29389132222856,
      "learning_rate": 4.9954970775854626e-06,
      "loss": 0.2105,
      "step": 740
    },
    {
      "epoch": 0.06827290735707375,
      "grad_norm": 1.0475002924733825,
      "learning_rate": 4.99545126051255e-06,
      "loss": 0.1922,
      "step": 741
    },
    {
      "epoch": 0.06836504353434376,
      "grad_norm": 1.1500051467045975,
      "learning_rate": 4.99540521173672e-06,
      "loss": 0.2023,
      "step": 742
    },
    {
      "epoch": 0.06845717971161376,
      "grad_norm": 1.15475035543741,
      "learning_rate": 4.995358931262246e-06,
      "loss": 0.1969,
      "step": 743
    },
    {
      "epoch": 0.06854931588888377,
      "grad_norm": 1.1034626957034204,
      "learning_rate": 4.995312419093427e-06,
      "loss": 0.2191,
      "step": 744
    },
    {
      "epoch": 0.06864145206615377,
      "grad_norm": 1.1053020130496054,
      "learning_rate": 4.995265675234583e-06,
      "loss": 0.1987,
      "step": 745
    },
    {
      "epoch": 0.06873358824342378,
      "grad_norm": 1.1992876381966995,
      "learning_rate": 4.995218699690051e-06,
      "loss": 0.209,
      "step": 746
    },
    {
      "epoch": 0.06882572442069379,
      "grad_norm": 1.082494388880573,
      "learning_rate": 4.995171492464195e-06,
      "loss": 0.1994,
      "step": 747
    },
    {
      "epoch": 0.06891786059796379,
      "grad_norm": 1.129990460435263,
      "learning_rate": 4.995124053561396e-06,
      "loss": 0.2027,
      "step": 748
    },
    {
      "epoch": 0.0690099967752338,
      "grad_norm": 1.0028326159846859,
      "learning_rate": 4.9950763829860615e-06,
      "loss": 0.1868,
      "step": 749
    },
    {
      "epoch": 0.0691021329525038,
      "grad_norm": 1.0755633729487084,
      "learning_rate": 4.995028480742616e-06,
      "loss": 0.1966,
      "step": 750
    },
    {
      "epoch": 0.0691942691297738,
      "grad_norm": 1.1063468461219594,
      "learning_rate": 4.9949803468355075e-06,
      "loss": 0.1893,
      "step": 751
    },
    {
      "epoch": 0.06928640530704382,
      "grad_norm": 1.048084914654042,
      "learning_rate": 4.994931981269206e-06,
      "loss": 0.2112,
      "step": 752
    },
    {
      "epoch": 0.06937854148431381,
      "grad_norm": 1.0230459409513677,
      "learning_rate": 4.9948833840482e-06,
      "loss": 0.1965,
      "step": 753
    },
    {
      "epoch": 0.06947067766158382,
      "grad_norm": 1.0632313033802272,
      "learning_rate": 4.994834555177004e-06,
      "loss": 0.2011,
      "step": 754
    },
    {
      "epoch": 0.06956281383885382,
      "grad_norm": 1.0589252280154375,
      "learning_rate": 4.994785494660151e-06,
      "loss": 0.2018,
      "step": 755
    },
    {
      "epoch": 0.06965495001612383,
      "grad_norm": 1.1755938840081506,
      "learning_rate": 4.994736202502196e-06,
      "loss": 0.2266,
      "step": 756
    },
    {
      "epoch": 0.06974708619339384,
      "grad_norm": 1.0493776400120107,
      "learning_rate": 4.994686678707716e-06,
      "loss": 0.2197,
      "step": 757
    },
    {
      "epoch": 0.06983922237066384,
      "grad_norm": 0.9856328790985965,
      "learning_rate": 4.9946369232813104e-06,
      "loss": 0.1926,
      "step": 758
    },
    {
      "epoch": 0.06993135854793385,
      "grad_norm": 0.9684741371548949,
      "learning_rate": 4.994586936227598e-06,
      "loss": 0.1856,
      "step": 759
    },
    {
      "epoch": 0.07002349472520385,
      "grad_norm": 1.0235610225225114,
      "learning_rate": 4.99453671755122e-06,
      "loss": 0.1985,
      "step": 760
    },
    {
      "epoch": 0.07011563090247386,
      "grad_norm": 1.0056452861965137,
      "learning_rate": 4.994486267256839e-06,
      "loss": 0.2043,
      "step": 761
    },
    {
      "epoch": 0.07020776707974385,
      "grad_norm": 0.9480271147994421,
      "learning_rate": 4.994435585349139e-06,
      "loss": 0.1854,
      "step": 762
    },
    {
      "epoch": 0.07029990325701387,
      "grad_norm": 1.073899838330998,
      "learning_rate": 4.994384671832827e-06,
      "loss": 0.194,
      "step": 763
    },
    {
      "epoch": 0.07039203943428388,
      "grad_norm": 1.1042378147261156,
      "learning_rate": 4.994333526712629e-06,
      "loss": 0.2038,
      "step": 764
    },
    {
      "epoch": 0.07048417561155387,
      "grad_norm": 1.0929836003453606,
      "learning_rate": 4.994282149993296e-06,
      "loss": 0.1941,
      "step": 765
    },
    {
      "epoch": 0.07057631178882388,
      "grad_norm": 1.002112688526324,
      "learning_rate": 4.994230541679596e-06,
      "loss": 0.1952,
      "step": 766
    },
    {
      "epoch": 0.07066844796609388,
      "grad_norm": 1.0405743611774914,
      "learning_rate": 4.994178701776322e-06,
      "loss": 0.1973,
      "step": 767
    },
    {
      "epoch": 0.07076058414336389,
      "grad_norm": 1.0611508046462508,
      "learning_rate": 4.994126630288287e-06,
      "loss": 0.1998,
      "step": 768
    },
    {
      "epoch": 0.0708527203206339,
      "grad_norm": 1.0044887753865614,
      "learning_rate": 4.994074327220326e-06,
      "loss": 0.1912,
      "step": 769
    },
    {
      "epoch": 0.0709448564979039,
      "grad_norm": 1.0125081975049615,
      "learning_rate": 4.994021792577296e-06,
      "loss": 0.1996,
      "step": 770
    },
    {
      "epoch": 0.07103699267517391,
      "grad_norm": 1.1171803446967403,
      "learning_rate": 4.993969026364074e-06,
      "loss": 0.2068,
      "step": 771
    },
    {
      "epoch": 0.0711291288524439,
      "grad_norm": 1.148820539107491,
      "learning_rate": 4.993916028585559e-06,
      "loss": 0.198,
      "step": 772
    },
    {
      "epoch": 0.07122126502971392,
      "grad_norm": 1.0778949515489327,
      "learning_rate": 4.993862799246672e-06,
      "loss": 0.1984,
      "step": 773
    },
    {
      "epoch": 0.07131340120698393,
      "grad_norm": 1.1483231088801946,
      "learning_rate": 4.9938093383523565e-06,
      "loss": 0.2117,
      "step": 774
    },
    {
      "epoch": 0.07140553738425393,
      "grad_norm": 1.1566007468065098,
      "learning_rate": 4.993755645907575e-06,
      "loss": 0.2116,
      "step": 775
    },
    {
      "epoch": 0.07149767356152394,
      "grad_norm": 1.0319713754984687,
      "learning_rate": 4.993701721917314e-06,
      "loss": 0.2012,
      "step": 776
    },
    {
      "epoch": 0.07158980973879393,
      "grad_norm": 1.077311245060311,
      "learning_rate": 4.993647566386579e-06,
      "loss": 0.2059,
      "step": 777
    },
    {
      "epoch": 0.07168194591606394,
      "grad_norm": 1.1405163265066733,
      "learning_rate": 4.993593179320399e-06,
      "loss": 0.1983,
      "step": 778
    },
    {
      "epoch": 0.07177408209333394,
      "grad_norm": 1.0767703872817018,
      "learning_rate": 4.993538560723824e-06,
      "loss": 0.2079,
      "step": 779
    },
    {
      "epoch": 0.07186621827060395,
      "grad_norm": 1.2227974845354599,
      "learning_rate": 4.993483710601926e-06,
      "loss": 0.2136,
      "step": 780
    },
    {
      "epoch": 0.07195835444787396,
      "grad_norm": 1.0541963353313086,
      "learning_rate": 4.993428628959796e-06,
      "loss": 0.1913,
      "step": 781
    },
    {
      "epoch": 0.07205049062514396,
      "grad_norm": 1.0513503112741112,
      "learning_rate": 4.993373315802551e-06,
      "loss": 0.201,
      "step": 782
    },
    {
      "epoch": 0.07214262680241397,
      "grad_norm": 1.0201367211302232,
      "learning_rate": 4.993317771135324e-06,
      "loss": 0.1903,
      "step": 783
    },
    {
      "epoch": 0.07223476297968397,
      "grad_norm": 1.1327814545653032,
      "learning_rate": 4.993261994963275e-06,
      "loss": 0.2212,
      "step": 784
    },
    {
      "epoch": 0.07232689915695398,
      "grad_norm": 1.0740213154007985,
      "learning_rate": 4.99320598729158e-06,
      "loss": 0.1948,
      "step": 785
    },
    {
      "epoch": 0.07241903533422399,
      "grad_norm": 0.957971255991909,
      "learning_rate": 4.993149748125441e-06,
      "loss": 0.1928,
      "step": 786
    },
    {
      "epoch": 0.07251117151149399,
      "grad_norm": 1.0166766681701447,
      "learning_rate": 4.99309327747008e-06,
      "loss": 0.1938,
      "step": 787
    },
    {
      "epoch": 0.072603307688764,
      "grad_norm": 1.0764254895760437,
      "learning_rate": 4.99303657533074e-06,
      "loss": 0.2087,
      "step": 788
    },
    {
      "epoch": 0.07269544386603399,
      "grad_norm": 1.0328362715853223,
      "learning_rate": 4.9929796417126855e-06,
      "loss": 0.2004,
      "step": 789
    },
    {
      "epoch": 0.072787580043304,
      "grad_norm": 1.0993910395671507,
      "learning_rate": 4.992922476621203e-06,
      "loss": 0.1968,
      "step": 790
    },
    {
      "epoch": 0.07287971622057401,
      "grad_norm": 1.174010366680361,
      "learning_rate": 4.992865080061599e-06,
      "loss": 0.2078,
      "step": 791
    },
    {
      "epoch": 0.07297185239784401,
      "grad_norm": 1.0829664383846667,
      "learning_rate": 4.992807452039206e-06,
      "loss": 0.2075,
      "step": 792
    },
    {
      "epoch": 0.07306398857511402,
      "grad_norm": 1.0362818708953512,
      "learning_rate": 4.992749592559372e-06,
      "loss": 0.2064,
      "step": 793
    },
    {
      "epoch": 0.07315612475238402,
      "grad_norm": 1.1051992276067801,
      "learning_rate": 4.99269150162747e-06,
      "loss": 0.2035,
      "step": 794
    },
    {
      "epoch": 0.07324826092965403,
      "grad_norm": 1.0070264686302375,
      "learning_rate": 4.9926331792488935e-06,
      "loss": 0.2013,
      "step": 795
    },
    {
      "epoch": 0.07334039710692403,
      "grad_norm": 1.0971405705997572,
      "learning_rate": 4.992574625429059e-06,
      "loss": 0.1991,
      "step": 796
    },
    {
      "epoch": 0.07343253328419404,
      "grad_norm": 1.0608316209147346,
      "learning_rate": 4.992515840173401e-06,
      "loss": 0.205,
      "step": 797
    },
    {
      "epoch": 0.07352466946146405,
      "grad_norm": 1.0833481979795825,
      "learning_rate": 4.992456823487381e-06,
      "loss": 0.2123,
      "step": 798
    },
    {
      "epoch": 0.07361680563873405,
      "grad_norm": 1.130384094246514,
      "learning_rate": 4.992397575376474e-06,
      "loss": 0.206,
      "step": 799
    },
    {
      "epoch": 0.07370894181600406,
      "grad_norm": 1.032883544230814,
      "learning_rate": 4.992338095846185e-06,
      "loss": 0.1954,
      "step": 800
    },
    {
      "epoch": 0.07380107799327405,
      "grad_norm": 1.0325683668694534,
      "learning_rate": 4.992278384902036e-06,
      "loss": 0.1823,
      "step": 801
    },
    {
      "epoch": 0.07389321417054406,
      "grad_norm": 1.0856330855658867,
      "learning_rate": 4.992218442549571e-06,
      "loss": 0.1972,
      "step": 802
    },
    {
      "epoch": 0.07398535034781407,
      "grad_norm": 0.9419401027800425,
      "learning_rate": 4.992158268794355e-06,
      "loss": 0.174,
      "step": 803
    },
    {
      "epoch": 0.07407748652508407,
      "grad_norm": 0.9955805004494609,
      "learning_rate": 4.992097863641975e-06,
      "loss": 0.1921,
      "step": 804
    },
    {
      "epoch": 0.07416962270235408,
      "grad_norm": 1.0997153728401974,
      "learning_rate": 4.992037227098041e-06,
      "loss": 0.2003,
      "step": 805
    },
    {
      "epoch": 0.07426175887962408,
      "grad_norm": 1.1910036898293976,
      "learning_rate": 4.991976359168182e-06,
      "loss": 0.2154,
      "step": 806
    },
    {
      "epoch": 0.07435389505689409,
      "grad_norm": 1.1056827367049222,
      "learning_rate": 4.99191525985805e-06,
      "loss": 0.2022,
      "step": 807
    },
    {
      "epoch": 0.0744460312341641,
      "grad_norm": 1.042144172015194,
      "learning_rate": 4.991853929173318e-06,
      "loss": 0.1988,
      "step": 808
    },
    {
      "epoch": 0.0745381674114341,
      "grad_norm": 1.01022506354823,
      "learning_rate": 4.99179236711968e-06,
      "loss": 0.1876,
      "step": 809
    },
    {
      "epoch": 0.07463030358870411,
      "grad_norm": 1.0524961526541383,
      "learning_rate": 4.991730573702852e-06,
      "loss": 0.1909,
      "step": 810
    },
    {
      "epoch": 0.0747224397659741,
      "grad_norm": 1.1702706877003066,
      "learning_rate": 4.991668548928573e-06,
      "loss": 0.2195,
      "step": 811
    },
    {
      "epoch": 0.07481457594324412,
      "grad_norm": 1.0918293776434618,
      "learning_rate": 4.991606292802601e-06,
      "loss": 0.2199,
      "step": 812
    },
    {
      "epoch": 0.07490671212051413,
      "grad_norm": 1.0721208493063734,
      "learning_rate": 4.991543805330716e-06,
      "loss": 0.2144,
      "step": 813
    },
    {
      "epoch": 0.07499884829778412,
      "grad_norm": 1.0564964966450854,
      "learning_rate": 4.991481086518721e-06,
      "loss": 0.1924,
      "step": 814
    },
    {
      "epoch": 0.07509098447505413,
      "grad_norm": 1.028522715058719,
      "learning_rate": 4.9914181363724394e-06,
      "loss": 0.1979,
      "step": 815
    },
    {
      "epoch": 0.07518312065232413,
      "grad_norm": 1.0724064172531385,
      "learning_rate": 4.991354954897715e-06,
      "loss": 0.2131,
      "step": 816
    },
    {
      "epoch": 0.07527525682959414,
      "grad_norm": 1.0614207713972115,
      "learning_rate": 4.991291542100416e-06,
      "loss": 0.207,
      "step": 817
    },
    {
      "epoch": 0.07536739300686414,
      "grad_norm": 1.0449337855654208,
      "learning_rate": 4.991227897986428e-06,
      "loss": 0.1917,
      "step": 818
    },
    {
      "epoch": 0.07545952918413415,
      "grad_norm": 1.1357051354151935,
      "learning_rate": 4.991164022561662e-06,
      "loss": 0.2016,
      "step": 819
    },
    {
      "epoch": 0.07555166536140416,
      "grad_norm": 0.9585027095704082,
      "learning_rate": 4.991099915832048e-06,
      "loss": 0.1846,
      "step": 820
    },
    {
      "epoch": 0.07564380153867416,
      "grad_norm": 1.1083315681597967,
      "learning_rate": 4.9910355778035394e-06,
      "loss": 0.2127,
      "step": 821
    },
    {
      "epoch": 0.07573593771594417,
      "grad_norm": 1.1383747660532335,
      "learning_rate": 4.990971008482109e-06,
      "loss": 0.1968,
      "step": 822
    },
    {
      "epoch": 0.07582807389321417,
      "grad_norm": 1.104749280816824,
      "learning_rate": 4.990906207873753e-06,
      "loss": 0.1904,
      "step": 823
    },
    {
      "epoch": 0.07592021007048418,
      "grad_norm": 1.0561974215226457,
      "learning_rate": 4.990841175984486e-06,
      "loss": 0.2,
      "step": 824
    },
    {
      "epoch": 0.07601234624775419,
      "grad_norm": 1.0340894305919612,
      "learning_rate": 4.9907759128203485e-06,
      "loss": 0.1958,
      "step": 825
    },
    {
      "epoch": 0.07610448242502418,
      "grad_norm": 1.020276399091769,
      "learning_rate": 4.9907104183874e-06,
      "loss": 0.1939,
      "step": 826
    },
    {
      "epoch": 0.0761966186022942,
      "grad_norm": 1.074585805099351,
      "learning_rate": 4.990644692691721e-06,
      "loss": 0.1983,
      "step": 827
    },
    {
      "epoch": 0.07628875477956419,
      "grad_norm": 0.9840992754978488,
      "learning_rate": 4.990578735739413e-06,
      "loss": 0.1936,
      "step": 828
    },
    {
      "epoch": 0.0763808909568342,
      "grad_norm": 1.0576207176961585,
      "learning_rate": 4.990512547536602e-06,
      "loss": 0.196,
      "step": 829
    },
    {
      "epoch": 0.07647302713410421,
      "grad_norm": 1.054963331859563,
      "learning_rate": 4.990446128089434e-06,
      "loss": 0.1939,
      "step": 830
    },
    {
      "epoch": 0.07656516331137421,
      "grad_norm": 1.0430434986538253,
      "learning_rate": 4.990379477404073e-06,
      "loss": 0.1973,
      "step": 831
    },
    {
      "epoch": 0.07665729948864422,
      "grad_norm": 1.0974937731254035,
      "learning_rate": 4.9903125954867114e-06,
      "loss": 0.2066,
      "step": 832
    },
    {
      "epoch": 0.07674943566591422,
      "grad_norm": 1.0481385062164033,
      "learning_rate": 4.990245482343556e-06,
      "loss": 0.1749,
      "step": 833
    },
    {
      "epoch": 0.07684157184318423,
      "grad_norm": 1.1591353967604399,
      "learning_rate": 4.990178137980841e-06,
      "loss": 0.2042,
      "step": 834
    },
    {
      "epoch": 0.07693370802045423,
      "grad_norm": 1.020946978751914,
      "learning_rate": 4.990110562404817e-06,
      "loss": 0.1887,
      "step": 835
    },
    {
      "epoch": 0.07702584419772424,
      "grad_norm": 1.0521526449822267,
      "learning_rate": 4.990042755621759e-06,
      "loss": 0.1925,
      "step": 836
    },
    {
      "epoch": 0.07711798037499425,
      "grad_norm": 1.08541448464305,
      "learning_rate": 4.989974717637963e-06,
      "loss": 0.1917,
      "step": 837
    },
    {
      "epoch": 0.07721011655226424,
      "grad_norm": 1.044048482126402,
      "learning_rate": 4.989906448459748e-06,
      "loss": 0.2083,
      "step": 838
    },
    {
      "epoch": 0.07730225272953425,
      "grad_norm": 1.0120160811235484,
      "learning_rate": 4.98983794809345e-06,
      "loss": 0.1941,
      "step": 839
    },
    {
      "epoch": 0.07739438890680425,
      "grad_norm": 1.102690087406534,
      "learning_rate": 4.989769216545431e-06,
      "loss": 0.1952,
      "step": 840
    },
    {
      "epoch": 0.07748652508407426,
      "grad_norm": 1.0589664371165017,
      "learning_rate": 4.9897002538220715e-06,
      "loss": 0.1889,
      "step": 841
    },
    {
      "epoch": 0.07757866126134427,
      "grad_norm": 1.0781668139280136,
      "learning_rate": 4.989631059929777e-06,
      "loss": 0.2183,
      "step": 842
    },
    {
      "epoch": 0.07767079743861427,
      "grad_norm": 1.0272497094730775,
      "learning_rate": 4.989561634874969e-06,
      "loss": 0.1995,
      "step": 843
    },
    {
      "epoch": 0.07776293361588428,
      "grad_norm": 1.1132914451731781,
      "learning_rate": 4.9894919786640964e-06,
      "loss": 0.2053,
      "step": 844
    },
    {
      "epoch": 0.07785506979315428,
      "grad_norm": 1.1159855286710003,
      "learning_rate": 4.989422091303625e-06,
      "loss": 0.1962,
      "step": 845
    },
    {
      "epoch": 0.07794720597042429,
      "grad_norm": 1.164205836975356,
      "learning_rate": 4.989351972800045e-06,
      "loss": 0.2039,
      "step": 846
    },
    {
      "epoch": 0.0780393421476943,
      "grad_norm": 1.0153737620570222,
      "learning_rate": 4.989281623159866e-06,
      "loss": 0.205,
      "step": 847
    },
    {
      "epoch": 0.0781314783249643,
      "grad_norm": 1.1936452816832441,
      "learning_rate": 4.98921104238962e-06,
      "loss": 0.2094,
      "step": 848
    },
    {
      "epoch": 0.0782236145022343,
      "grad_norm": 1.0872486263629397,
      "learning_rate": 4.989140230495862e-06,
      "loss": 0.2017,
      "step": 849
    },
    {
      "epoch": 0.0783157506795043,
      "grad_norm": 1.0430936509820625,
      "learning_rate": 4.989069187485165e-06,
      "loss": 0.1959,
      "step": 850
    },
    {
      "epoch": 0.07840788685677431,
      "grad_norm": 1.043073581421324,
      "learning_rate": 4.988997913364126e-06,
      "loss": 0.1943,
      "step": 851
    },
    {
      "epoch": 0.07850002303404431,
      "grad_norm": 1.142893762388807,
      "learning_rate": 4.988926408139363e-06,
      "loss": 0.2164,
      "step": 852
    },
    {
      "epoch": 0.07859215921131432,
      "grad_norm": 1.1122064536056118,
      "learning_rate": 4.988854671817516e-06,
      "loss": 0.2032,
      "step": 853
    },
    {
      "epoch": 0.07868429538858433,
      "grad_norm": 1.0929495216494962,
      "learning_rate": 4.988782704405244e-06,
      "loss": 0.1949,
      "step": 854
    },
    {
      "epoch": 0.07877643156585433,
      "grad_norm": 1.2464150623403418,
      "learning_rate": 4.98871050590923e-06,
      "loss": 0.2328,
      "step": 855
    },
    {
      "epoch": 0.07886856774312434,
      "grad_norm": 1.0842337596440934,
      "learning_rate": 4.988638076336178e-06,
      "loss": 0.2011,
      "step": 856
    },
    {
      "epoch": 0.07896070392039434,
      "grad_norm": 1.0893100625521832,
      "learning_rate": 4.988565415692812e-06,
      "loss": 0.2067,
      "step": 857
    },
    {
      "epoch": 0.07905284009766435,
      "grad_norm": 0.9557182611203138,
      "learning_rate": 4.988492523985881e-06,
      "loss": 0.1788,
      "step": 858
    },
    {
      "epoch": 0.07914497627493436,
      "grad_norm": 1.0570627168797462,
      "learning_rate": 4.9884194012221496e-06,
      "loss": 0.2019,
      "step": 859
    },
    {
      "epoch": 0.07923711245220436,
      "grad_norm": 1.0179565473648933,
      "learning_rate": 4.98834604740841e-06,
      "loss": 0.2019,
      "step": 860
    },
    {
      "epoch": 0.07932924862947437,
      "grad_norm": 1.0519805334151477,
      "learning_rate": 4.988272462551471e-06,
      "loss": 0.204,
      "step": 861
    },
    {
      "epoch": 0.07942138480674436,
      "grad_norm": 0.9369015774026637,
      "learning_rate": 4.988198646658167e-06,
      "loss": 0.1809,
      "step": 862
    },
    {
      "epoch": 0.07951352098401437,
      "grad_norm": 1.1315810175239667,
      "learning_rate": 4.988124599735351e-06,
      "loss": 0.2129,
      "step": 863
    },
    {
      "epoch": 0.07960565716128438,
      "grad_norm": 1.073675633216396,
      "learning_rate": 4.988050321789898e-06,
      "loss": 0.1985,
      "step": 864
    },
    {
      "epoch": 0.07969779333855438,
      "grad_norm": 0.9730213494505685,
      "learning_rate": 4.987975812828704e-06,
      "loss": 0.1924,
      "step": 865
    },
    {
      "epoch": 0.07978992951582439,
      "grad_norm": 1.0859711498805535,
      "learning_rate": 4.987901072858689e-06,
      "loss": 0.1946,
      "step": 866
    },
    {
      "epoch": 0.07988206569309439,
      "grad_norm": 1.10664046990388,
      "learning_rate": 4.9878261018867915e-06,
      "loss": 0.2062,
      "step": 867
    },
    {
      "epoch": 0.0799742018703644,
      "grad_norm": 1.1293880063200286,
      "learning_rate": 4.9877508999199724e-06,
      "loss": 0.215,
      "step": 868
    },
    {
      "epoch": 0.0800663380476344,
      "grad_norm": 1.0544035282612745,
      "learning_rate": 4.987675466965215e-06,
      "loss": 0.1872,
      "step": 869
    },
    {
      "epoch": 0.08015847422490441,
      "grad_norm": 1.081822317114401,
      "learning_rate": 4.987599803029522e-06,
      "loss": 0.2051,
      "step": 870
    },
    {
      "epoch": 0.08025061040217442,
      "grad_norm": 1.0983981922980988,
      "learning_rate": 4.98752390811992e-06,
      "loss": 0.1988,
      "step": 871
    },
    {
      "epoch": 0.08034274657944442,
      "grad_norm": 1.0687075549507865,
      "learning_rate": 4.987447782243456e-06,
      "loss": 0.2103,
      "step": 872
    },
    {
      "epoch": 0.08043488275671443,
      "grad_norm": 1.129069762940162,
      "learning_rate": 4.9873714254071966e-06,
      "loss": 0.2106,
      "step": 873
    },
    {
      "epoch": 0.08052701893398442,
      "grad_norm": 1.0695215954521389,
      "learning_rate": 4.987294837618233e-06,
      "loss": 0.2056,
      "step": 874
    },
    {
      "epoch": 0.08061915511125443,
      "grad_norm": 1.006571818809714,
      "learning_rate": 4.987218018883676e-06,
      "loss": 0.2022,
      "step": 875
    },
    {
      "epoch": 0.08071129128852444,
      "grad_norm": 1.1207288976979883,
      "learning_rate": 4.987140969210659e-06,
      "loss": 0.2053,
      "step": 876
    },
    {
      "epoch": 0.08080342746579444,
      "grad_norm": 1.1091698949899884,
      "learning_rate": 4.987063688606335e-06,
      "loss": 0.2054,
      "step": 877
    },
    {
      "epoch": 0.08089556364306445,
      "grad_norm": 0.9936982863486912,
      "learning_rate": 4.98698617707788e-06,
      "loss": 0.1949,
      "step": 878
    },
    {
      "epoch": 0.08098769982033445,
      "grad_norm": 1.0218531560234463,
      "learning_rate": 4.98690843463249e-06,
      "loss": 0.183,
      "step": 879
    },
    {
      "epoch": 0.08107983599760446,
      "grad_norm": 1.0482785566667265,
      "learning_rate": 4.986830461277384e-06,
      "loss": 0.1962,
      "step": 880
    },
    {
      "epoch": 0.08117197217487447,
      "grad_norm": 1.0487002736259132,
      "learning_rate": 4.986752257019804e-06,
      "loss": 0.2063,
      "step": 881
    },
    {
      "epoch": 0.08126410835214447,
      "grad_norm": 1.0225704173071417,
      "learning_rate": 4.9866738218670075e-06,
      "loss": 0.1873,
      "step": 882
    },
    {
      "epoch": 0.08135624452941448,
      "grad_norm": 0.9922761716304604,
      "learning_rate": 4.986595155826279e-06,
      "loss": 0.1932,
      "step": 883
    },
    {
      "epoch": 0.08144838070668448,
      "grad_norm": 1.1203291309724843,
      "learning_rate": 4.986516258904923e-06,
      "loss": 0.2085,
      "step": 884
    },
    {
      "epoch": 0.08154051688395449,
      "grad_norm": 1.0137824728220541,
      "learning_rate": 4.986437131110265e-06,
      "loss": 0.1957,
      "step": 885
    },
    {
      "epoch": 0.08163265306122448,
      "grad_norm": 1.078763682867198,
      "learning_rate": 4.986357772449652e-06,
      "loss": 0.2051,
      "step": 886
    },
    {
      "epoch": 0.0817247892384945,
      "grad_norm": 1.1459363420840565,
      "learning_rate": 4.986278182930452e-06,
      "loss": 0.2071,
      "step": 887
    },
    {
      "epoch": 0.0818169254157645,
      "grad_norm": 1.0489858592922743,
      "learning_rate": 4.986198362560055e-06,
      "loss": 0.2049,
      "step": 888
    },
    {
      "epoch": 0.0819090615930345,
      "grad_norm": 1.1867738602471791,
      "learning_rate": 4.986118311345873e-06,
      "loss": 0.1922,
      "step": 889
    },
    {
      "epoch": 0.08200119777030451,
      "grad_norm": 1.0816042554701828,
      "learning_rate": 4.9860380292953375e-06,
      "loss": 0.1961,
      "step": 890
    },
    {
      "epoch": 0.08209333394757451,
      "grad_norm": 1.0776867351148292,
      "learning_rate": 4.985957516415903e-06,
      "loss": 0.2077,
      "step": 891
    },
    {
      "epoch": 0.08218547012484452,
      "grad_norm": 1.0266838629289732,
      "learning_rate": 4.985876772715047e-06,
      "loss": 0.1845,
      "step": 892
    },
    {
      "epoch": 0.08227760630211453,
      "grad_norm": 1.0714186396487406,
      "learning_rate": 4.985795798200265e-06,
      "loss": 0.1991,
      "step": 893
    },
    {
      "epoch": 0.08236974247938453,
      "grad_norm": 1.1565923591963108,
      "learning_rate": 4.9857145928790745e-06,
      "loss": 0.2053,
      "step": 894
    },
    {
      "epoch": 0.08246187865665454,
      "grad_norm": 1.0961658661114462,
      "learning_rate": 4.9856331567590175e-06,
      "loss": 0.2075,
      "step": 895
    },
    {
      "epoch": 0.08255401483392454,
      "grad_norm": 0.9307410162608049,
      "learning_rate": 4.985551489847654e-06,
      "loss": 0.1784,
      "step": 896
    },
    {
      "epoch": 0.08264615101119455,
      "grad_norm": 1.0401378502092977,
      "learning_rate": 4.985469592152567e-06,
      "loss": 0.1867,
      "step": 897
    },
    {
      "epoch": 0.08273828718846456,
      "grad_norm": 1.0339494204766255,
      "learning_rate": 4.985387463681361e-06,
      "loss": 0.1986,
      "step": 898
    },
    {
      "epoch": 0.08283042336573455,
      "grad_norm": 1.1980946192748525,
      "learning_rate": 4.985305104441661e-06,
      "loss": 0.2064,
      "step": 899
    },
    {
      "epoch": 0.08292255954300456,
      "grad_norm": 1.0323921168593682,
      "learning_rate": 4.9852225144411156e-06,
      "loss": 0.2084,
      "step": 900
    },
    {
      "epoch": 0.08301469572027456,
      "grad_norm": 1.0136930453828628,
      "learning_rate": 4.985139693687392e-06,
      "loss": 0.1888,
      "step": 901
    },
    {
      "epoch": 0.08310683189754457,
      "grad_norm": 0.9971484787535447,
      "learning_rate": 4.985056642188179e-06,
      "loss": 0.2017,
      "step": 902
    },
    {
      "epoch": 0.08319896807481457,
      "grad_norm": 0.9555914001671217,
      "learning_rate": 4.984973359951192e-06,
      "loss": 0.1815,
      "step": 903
    },
    {
      "epoch": 0.08329110425208458,
      "grad_norm": 1.0364717190048256,
      "learning_rate": 4.984889846984159e-06,
      "loss": 0.188,
      "step": 904
    },
    {
      "epoch": 0.08338324042935459,
      "grad_norm": 1.005245975401244,
      "learning_rate": 4.984806103294837e-06,
      "loss": 0.1874,
      "step": 905
    },
    {
      "epoch": 0.08347537660662459,
      "grad_norm": 1.0742626967198226,
      "learning_rate": 4.9847221288910004e-06,
      "loss": 0.2091,
      "step": 906
    },
    {
      "epoch": 0.0835675127838946,
      "grad_norm": 1.0572271589829996,
      "learning_rate": 4.984637923780448e-06,
      "loss": 0.1896,
      "step": 907
    },
    {
      "epoch": 0.0836596489611646,
      "grad_norm": 1.072817626504438,
      "learning_rate": 4.984553487970995e-06,
      "loss": 0.2027,
      "step": 908
    },
    {
      "epoch": 0.0837517851384346,
      "grad_norm": 1.1697331427088027,
      "learning_rate": 4.984468821470485e-06,
      "loss": 0.2009,
      "step": 909
    },
    {
      "epoch": 0.08384392131570462,
      "grad_norm": 1.099615722951779,
      "learning_rate": 4.984383924286776e-06,
      "loss": 0.2073,
      "step": 910
    },
    {
      "epoch": 0.08393605749297461,
      "grad_norm": 1.1067206700378336,
      "learning_rate": 4.984298796427754e-06,
      "loss": 0.2053,
      "step": 911
    },
    {
      "epoch": 0.08402819367024462,
      "grad_norm": 1.1386230456096778,
      "learning_rate": 4.984213437901321e-06,
      "loss": 0.2053,
      "step": 912
    },
    {
      "epoch": 0.08412032984751462,
      "grad_norm": 1.051467856985353,
      "learning_rate": 4.984127848715402e-06,
      "loss": 0.2002,
      "step": 913
    },
    {
      "epoch": 0.08421246602478463,
      "grad_norm": 1.0159847224874938,
      "learning_rate": 4.984042028877945e-06,
      "loss": 0.1739,
      "step": 914
    },
    {
      "epoch": 0.08430460220205464,
      "grad_norm": 1.1102387510829792,
      "learning_rate": 4.983955978396919e-06,
      "loss": 0.1952,
      "step": 915
    },
    {
      "epoch": 0.08439673837932464,
      "grad_norm": 1.044485002079122,
      "learning_rate": 4.983869697280312e-06,
      "loss": 0.2054,
      "step": 916
    },
    {
      "epoch": 0.08448887455659465,
      "grad_norm": 1.1181813544003023,
      "learning_rate": 4.983783185536137e-06,
      "loss": 0.1931,
      "step": 917
    },
    {
      "epoch": 0.08458101073386465,
      "grad_norm": 1.0630599308615696,
      "learning_rate": 4.983696443172426e-06,
      "loss": 0.1876,
      "step": 918
    },
    {
      "epoch": 0.08467314691113466,
      "grad_norm": 0.9944252347497624,
      "learning_rate": 4.983609470197233e-06,
      "loss": 0.1866,
      "step": 919
    },
    {
      "epoch": 0.08476528308840466,
      "grad_norm": 1.0413027140377702,
      "learning_rate": 4.983522266618633e-06,
      "loss": 0.196,
      "step": 920
    },
    {
      "epoch": 0.08485741926567467,
      "grad_norm": 1.1229738346239124,
      "learning_rate": 4.983434832444724e-06,
      "loss": 0.1916,
      "step": 921
    },
    {
      "epoch": 0.08494955544294468,
      "grad_norm": 1.033570992977324,
      "learning_rate": 4.983347167683623e-06,
      "loss": 0.1942,
      "step": 922
    },
    {
      "epoch": 0.08504169162021467,
      "grad_norm": 1.0615629937133617,
      "learning_rate": 4.98325927234347e-06,
      "loss": 0.1998,
      "step": 923
    },
    {
      "epoch": 0.08513382779748468,
      "grad_norm": 1.0603013903796452,
      "learning_rate": 4.983171146432427e-06,
      "loss": 0.1958,
      "step": 924
    },
    {
      "epoch": 0.08522596397475468,
      "grad_norm": 1.0836485017893724,
      "learning_rate": 4.983082789958675e-06,
      "loss": 0.1969,
      "step": 925
    },
    {
      "epoch": 0.08531810015202469,
      "grad_norm": 1.1918839586269157,
      "learning_rate": 4.9829942029304194e-06,
      "loss": 0.1979,
      "step": 926
    },
    {
      "epoch": 0.0854102363292947,
      "grad_norm": 1.0560845818273896,
      "learning_rate": 4.982905385355885e-06,
      "loss": 0.1971,
      "step": 927
    },
    {
      "epoch": 0.0855023725065647,
      "grad_norm": 1.0371941917693073,
      "learning_rate": 4.982816337243318e-06,
      "loss": 0.199,
      "step": 928
    },
    {
      "epoch": 0.08559450868383471,
      "grad_norm": 1.0181772522742907,
      "learning_rate": 4.982727058600987e-06,
      "loss": 0.1991,
      "step": 929
    },
    {
      "epoch": 0.08568664486110471,
      "grad_norm": 0.9936553320104157,
      "learning_rate": 4.98263754943718e-06,
      "loss": 0.1841,
      "step": 930
    },
    {
      "epoch": 0.08577878103837472,
      "grad_norm": 1.151859083100634,
      "learning_rate": 4.9825478097602115e-06,
      "loss": 0.19,
      "step": 931
    },
    {
      "epoch": 0.08587091721564473,
      "grad_norm": 1.1087091001896687,
      "learning_rate": 4.982457839578411e-06,
      "loss": 0.1975,
      "step": 932
    },
    {
      "epoch": 0.08596305339291473,
      "grad_norm": 1.0020521314066806,
      "learning_rate": 4.982367638900132e-06,
      "loss": 0.206,
      "step": 933
    },
    {
      "epoch": 0.08605518957018474,
      "grad_norm": 1.0393467185829126,
      "learning_rate": 4.982277207733751e-06,
      "loss": 0.1917,
      "step": 934
    },
    {
      "epoch": 0.08614732574745473,
      "grad_norm": 1.3665091289700992,
      "learning_rate": 4.982186546087665e-06,
      "loss": 0.2101,
      "step": 935
    },
    {
      "epoch": 0.08623946192472474,
      "grad_norm": 0.970728944149346,
      "learning_rate": 4.98209565397029e-06,
      "loss": 0.1853,
      "step": 936
    },
    {
      "epoch": 0.08633159810199474,
      "grad_norm": 1.0188718196066726,
      "learning_rate": 4.9820045313900675e-06,
      "loss": 0.1909,
      "step": 937
    },
    {
      "epoch": 0.08642373427926475,
      "grad_norm": 1.0079807293569125,
      "learning_rate": 4.981913178355456e-06,
      "loss": 0.1798,
      "step": 938
    },
    {
      "epoch": 0.08651587045653476,
      "grad_norm": 0.9823783578124373,
      "learning_rate": 4.981821594874939e-06,
      "loss": 0.1792,
      "step": 939
    },
    {
      "epoch": 0.08660800663380476,
      "grad_norm": 1.035277129236254,
      "learning_rate": 4.981729780957021e-06,
      "loss": 0.1908,
      "step": 940
    },
    {
      "epoch": 0.08670014281107477,
      "grad_norm": 1.0201631403526028,
      "learning_rate": 4.981637736610224e-06,
      "loss": 0.182,
      "step": 941
    },
    {
      "epoch": 0.08679227898834477,
      "grad_norm": 1.0623794729008158,
      "learning_rate": 4.981545461843098e-06,
      "loss": 0.1962,
      "step": 942
    },
    {
      "epoch": 0.08688441516561478,
      "grad_norm": 1.0952542593720789,
      "learning_rate": 4.9814529566642065e-06,
      "loss": 0.1876,
      "step": 943
    },
    {
      "epoch": 0.08697655134288479,
      "grad_norm": 1.0851540110165558,
      "learning_rate": 4.981360221082143e-06,
      "loss": 0.1981,
      "step": 944
    },
    {
      "epoch": 0.08706868752015479,
      "grad_norm": 1.117127612131708,
      "learning_rate": 4.9812672551055144e-06,
      "loss": 0.2034,
      "step": 945
    },
    {
      "epoch": 0.0871608236974248,
      "grad_norm": 1.210471734588479,
      "learning_rate": 4.981174058742955e-06,
      "loss": 0.201,
      "step": 946
    },
    {
      "epoch": 0.0872529598746948,
      "grad_norm": 1.1075330267974028,
      "learning_rate": 4.981080632003117e-06,
      "loss": 0.211,
      "step": 947
    },
    {
      "epoch": 0.0873450960519648,
      "grad_norm": 1.070657470857975,
      "learning_rate": 4.980986974894676e-06,
      "loss": 0.1781,
      "step": 948
    },
    {
      "epoch": 0.08743723222923482,
      "grad_norm": 1.0137187662237797,
      "learning_rate": 4.980893087426326e-06,
      "loss": 0.1832,
      "step": 949
    },
    {
      "epoch": 0.08752936840650481,
      "grad_norm": 1.2310842945020835,
      "learning_rate": 4.980798969606787e-06,
      "loss": 0.2071,
      "step": 950
    },
    {
      "epoch": 0.08762150458377482,
      "grad_norm": 0.9978182904900013,
      "learning_rate": 4.980704621444797e-06,
      "loss": 0.1889,
      "step": 951
    },
    {
      "epoch": 0.08771364076104482,
      "grad_norm": 1.1657157637314783,
      "learning_rate": 4.980610042949115e-06,
      "loss": 0.2151,
      "step": 952
    },
    {
      "epoch": 0.08780577693831483,
      "grad_norm": 1.0615920322109136,
      "learning_rate": 4.980515234128522e-06,
      "loss": 0.1894,
      "step": 953
    },
    {
      "epoch": 0.08789791311558483,
      "grad_norm": 0.9965168434781553,
      "learning_rate": 4.980420194991826e-06,
      "loss": 0.1723,
      "step": 954
    },
    {
      "epoch": 0.08799004929285484,
      "grad_norm": 1.0223984489495057,
      "learning_rate": 4.980324925547845e-06,
      "loss": 0.2016,
      "step": 955
    },
    {
      "epoch": 0.08808218547012485,
      "grad_norm": 1.0267736631959716,
      "learning_rate": 4.980229425805429e-06,
      "loss": 0.1948,
      "step": 956
    },
    {
      "epoch": 0.08817432164739485,
      "grad_norm": 1.0487644444256707,
      "learning_rate": 4.9801336957734435e-06,
      "loss": 0.1976,
      "step": 957
    },
    {
      "epoch": 0.08826645782466486,
      "grad_norm": 0.9800925451113316,
      "learning_rate": 4.980037735460778e-06,
      "loss": 0.1884,
      "step": 958
    },
    {
      "epoch": 0.08835859400193485,
      "grad_norm": 1.0512460138994213,
      "learning_rate": 4.9799415448763414e-06,
      "loss": 0.1905,
      "step": 959
    },
    {
      "epoch": 0.08845073017920486,
      "grad_norm": 1.0438604822998108,
      "learning_rate": 4.979845124029066e-06,
      "loss": 0.1997,
      "step": 960
    },
    {
      "epoch": 0.08854286635647488,
      "grad_norm": 1.1256703426732806,
      "learning_rate": 4.979748472927903e-06,
      "loss": 0.1826,
      "step": 961
    },
    {
      "epoch": 0.08863500253374487,
      "grad_norm": 1.1642895647997922,
      "learning_rate": 4.979651591581829e-06,
      "loss": 0.1938,
      "step": 962
    },
    {
      "epoch": 0.08872713871101488,
      "grad_norm": 1.061865516308078,
      "learning_rate": 4.979554479999836e-06,
      "loss": 0.1979,
      "step": 963
    },
    {
      "epoch": 0.08881927488828488,
      "grad_norm": 1.0511995629982513,
      "learning_rate": 4.979457138190944e-06,
      "loss": 0.1991,
      "step": 964
    },
    {
      "epoch": 0.08891141106555489,
      "grad_norm": 1.0627898550832569,
      "learning_rate": 4.979359566164189e-06,
      "loss": 0.1892,
      "step": 965
    },
    {
      "epoch": 0.0890035472428249,
      "grad_norm": 1.1520194609491567,
      "learning_rate": 4.979261763928632e-06,
      "loss": 0.2088,
      "step": 966
    },
    {
      "epoch": 0.0890956834200949,
      "grad_norm": 1.0750536562889166,
      "learning_rate": 4.979163731493354e-06,
      "loss": 0.2057,
      "step": 967
    },
    {
      "epoch": 0.08918781959736491,
      "grad_norm": 1.0233899360449537,
      "learning_rate": 4.979065468867456e-06,
      "loss": 0.1966,
      "step": 968
    },
    {
      "epoch": 0.0892799557746349,
      "grad_norm": 1.0189667297065197,
      "learning_rate": 4.978966976060062e-06,
      "loss": 0.1893,
      "step": 969
    },
    {
      "epoch": 0.08937209195190492,
      "grad_norm": 0.9615841945851383,
      "learning_rate": 4.978868253080318e-06,
      "loss": 0.1795,
      "step": 970
    },
    {
      "epoch": 0.08946422812917491,
      "grad_norm": 1.0374752006939945,
      "learning_rate": 4.9787692999373895e-06,
      "loss": 0.1927,
      "step": 971
    },
    {
      "epoch": 0.08955636430644492,
      "grad_norm": 0.9781931331030623,
      "learning_rate": 4.978670116640465e-06,
      "loss": 0.1886,
      "step": 972
    },
    {
      "epoch": 0.08964850048371494,
      "grad_norm": 1.0482569650890077,
      "learning_rate": 4.978570703198754e-06,
      "loss": 0.2073,
      "step": 973
    },
    {
      "epoch": 0.08974063666098493,
      "grad_norm": 1.0307954204033831,
      "learning_rate": 4.978471059621486e-06,
      "loss": 0.2001,
      "step": 974
    },
    {
      "epoch": 0.08983277283825494,
      "grad_norm": 0.9504730932711084,
      "learning_rate": 4.978371185917913e-06,
      "loss": 0.1871,
      "step": 975
    },
    {
      "epoch": 0.08992490901552494,
      "grad_norm": 1.0212992592289591,
      "learning_rate": 4.978271082097309e-06,
      "loss": 0.1865,
      "step": 976
    },
    {
      "epoch": 0.09001704519279495,
      "grad_norm": 1.0584402344944974,
      "learning_rate": 4.978170748168968e-06,
      "loss": 0.1827,
      "step": 977
    },
    {
      "epoch": 0.09010918137006496,
      "grad_norm": 1.0265243323297528,
      "learning_rate": 4.978070184142207e-06,
      "loss": 0.1955,
      "step": 978
    },
    {
      "epoch": 0.09020131754733496,
      "grad_norm": 0.99918479647745,
      "learning_rate": 4.977969390026362e-06,
      "loss": 0.1902,
      "step": 979
    },
    {
      "epoch": 0.09029345372460497,
      "grad_norm": 1.0545620541518919,
      "learning_rate": 4.9778683658307925e-06,
      "loss": 0.1904,
      "step": 980
    },
    {
      "epoch": 0.09038558990187497,
      "grad_norm": 1.0303710328312456,
      "learning_rate": 4.977767111564879e-06,
      "loss": 0.1922,
      "step": 981
    },
    {
      "epoch": 0.09047772607914498,
      "grad_norm": 1.0446625626228556,
      "learning_rate": 4.977665627238023e-06,
      "loss": 0.1855,
      "step": 982
    },
    {
      "epoch": 0.09056986225641499,
      "grad_norm": 1.0552315927395985,
      "learning_rate": 4.977563912859645e-06,
      "loss": 0.1869,
      "step": 983
    },
    {
      "epoch": 0.09066199843368498,
      "grad_norm": 1.003974927091642,
      "learning_rate": 4.977461968439193e-06,
      "loss": 0.1923,
      "step": 984
    },
    {
      "epoch": 0.090754134610955,
      "grad_norm": 1.0230489571997252,
      "learning_rate": 4.9773597939861294e-06,
      "loss": 0.1856,
      "step": 985
    },
    {
      "epoch": 0.09084627078822499,
      "grad_norm": 1.0105592868572502,
      "learning_rate": 4.977257389509943e-06,
      "loss": 0.1929,
      "step": 986
    },
    {
      "epoch": 0.090938406965495,
      "grad_norm": 1.0725121055688818,
      "learning_rate": 4.9771547550201414e-06,
      "loss": 0.1856,
      "step": 987
    },
    {
      "epoch": 0.09103054314276501,
      "grad_norm": 1.0409324997207798,
      "learning_rate": 4.977051890526254e-06,
      "loss": 0.1922,
      "step": 988
    },
    {
      "epoch": 0.09112267932003501,
      "grad_norm": 1.0338303485068927,
      "learning_rate": 4.976948796037831e-06,
      "loss": 0.194,
      "step": 989
    },
    {
      "epoch": 0.09121481549730502,
      "grad_norm": 1.0689018179069636,
      "learning_rate": 4.976845471564447e-06,
      "loss": 0.1924,
      "step": 990
    },
    {
      "epoch": 0.09130695167457502,
      "grad_norm": 1.0394659919045186,
      "learning_rate": 4.976741917115695e-06,
      "loss": 0.1917,
      "step": 991
    },
    {
      "epoch": 0.09139908785184503,
      "grad_norm": 1.0150076898279992,
      "learning_rate": 4.976638132701188e-06,
      "loss": 0.1842,
      "step": 992
    },
    {
      "epoch": 0.09149122402911503,
      "grad_norm": 1.0841944088787114,
      "learning_rate": 4.976534118330565e-06,
      "loss": 0.1788,
      "step": 993
    },
    {
      "epoch": 0.09158336020638504,
      "grad_norm": 1.0401076161846878,
      "learning_rate": 4.9764298740134814e-06,
      "loss": 0.1901,
      "step": 994
    },
    {
      "epoch": 0.09167549638365505,
      "grad_norm": 0.9971280819093569,
      "learning_rate": 4.976325399759619e-06,
      "loss": 0.1951,
      "step": 995
    },
    {
      "epoch": 0.09176763256092504,
      "grad_norm": 0.9976774105341277,
      "learning_rate": 4.976220695578675e-06,
      "loss": 0.1741,
      "step": 996
    },
    {
      "epoch": 0.09185976873819506,
      "grad_norm": 1.0794336104421778,
      "learning_rate": 4.976115761480373e-06,
      "loss": 0.2019,
      "step": 997
    },
    {
      "epoch": 0.09195190491546505,
      "grad_norm": 1.1589141007240227,
      "learning_rate": 4.9760105974744576e-06,
      "loss": 0.2021,
      "step": 998
    },
    {
      "epoch": 0.09204404109273506,
      "grad_norm": 1.0959230462918457,
      "learning_rate": 4.97590520357069e-06,
      "loss": 0.1871,
      "step": 999
    },
    {
      "epoch": 0.09213617727000507,
      "grad_norm": 1.0170637360982258,
      "learning_rate": 4.97579957977886e-06,
      "loss": 0.192,
      "step": 1000
    },
    {
      "epoch": 0.09213617727000507,
      "eval_loss": 0.19303320348262787,
      "eval_runtime": 299.1988,
      "eval_samples_per_second": 23.453,
      "eval_steps_per_second": 2.935,
      "step": 1000
    },
    {
      "epoch": 0.09222831344727507,
      "grad_norm": 1.1568554165939138,
      "learning_rate": 4.97569372610877e-06,
      "loss": 0.1831,
      "step": 1001
    },
    {
      "epoch": 0.09232044962454508,
      "grad_norm": 1.1944389893604717,
      "learning_rate": 4.975587642570252e-06,
      "loss": 0.1828,
      "step": 1002
    },
    {
      "epoch": 0.09241258580181508,
      "grad_norm": 1.0170283022489994,
      "learning_rate": 4.975481329173156e-06,
      "loss": 0.1856,
      "step": 1003
    },
    {
      "epoch": 0.09250472197908509,
      "grad_norm": 1.0558442749265609,
      "learning_rate": 4.975374785927351e-06,
      "loss": 0.1847,
      "step": 1004
    },
    {
      "epoch": 0.0925968581563551,
      "grad_norm": 1.0094563700785,
      "learning_rate": 4.975268012842732e-06,
      "loss": 0.1876,
      "step": 1005
    },
    {
      "epoch": 0.0926889943336251,
      "grad_norm": 1.1605189632893436,
      "learning_rate": 4.97516100992921e-06,
      "loss": 0.1964,
      "step": 1006
    },
    {
      "epoch": 0.09278113051089511,
      "grad_norm": 1.0938956132780517,
      "learning_rate": 4.975053777196723e-06,
      "loss": 0.2036,
      "step": 1007
    },
    {
      "epoch": 0.0928732666881651,
      "grad_norm": 1.0767350046907365,
      "learning_rate": 4.974946314655226e-06,
      "loss": 0.2035,
      "step": 1008
    },
    {
      "epoch": 0.09296540286543511,
      "grad_norm": 0.9971359022353502,
      "learning_rate": 4.974838622314698e-06,
      "loss": 0.1969,
      "step": 1009
    },
    {
      "epoch": 0.09305753904270511,
      "grad_norm": 1.093729265610002,
      "learning_rate": 4.974730700185136e-06,
      "loss": 0.2025,
      "step": 1010
    },
    {
      "epoch": 0.09314967521997512,
      "grad_norm": 1.0774952559409026,
      "learning_rate": 4.974622548276564e-06,
      "loss": 0.2024,
      "step": 1011
    },
    {
      "epoch": 0.09324181139724513,
      "grad_norm": 1.009403051152341,
      "learning_rate": 4.974514166599021e-06,
      "loss": 0.1936,
      "step": 1012
    },
    {
      "epoch": 0.09333394757451513,
      "grad_norm": 1.0763173811540299,
      "learning_rate": 4.974405555162571e-06,
      "loss": 0.1912,
      "step": 1013
    },
    {
      "epoch": 0.09342608375178514,
      "grad_norm": 1.1012649720307428,
      "learning_rate": 4.9742967139773e-06,
      "loss": 0.2018,
      "step": 1014
    },
    {
      "epoch": 0.09351821992905514,
      "grad_norm": 0.9708640152172979,
      "learning_rate": 4.974187643053312e-06,
      "loss": 0.1711,
      "step": 1015
    },
    {
      "epoch": 0.09361035610632515,
      "grad_norm": 1.109835431173934,
      "learning_rate": 4.9740783424007355e-06,
      "loss": 0.1957,
      "step": 1016
    },
    {
      "epoch": 0.09370249228359516,
      "grad_norm": 1.1759709250141979,
      "learning_rate": 4.973968812029718e-06,
      "loss": 0.21,
      "step": 1017
    },
    {
      "epoch": 0.09379462846086516,
      "grad_norm": 0.9865822952697535,
      "learning_rate": 4.973859051950431e-06,
      "loss": 0.175,
      "step": 1018
    },
    {
      "epoch": 0.09388676463813517,
      "grad_norm": 1.1361881263400175,
      "learning_rate": 4.973749062173065e-06,
      "loss": 0.1994,
      "step": 1019
    },
    {
      "epoch": 0.09397890081540516,
      "grad_norm": 1.109853232173025,
      "learning_rate": 4.973638842707831e-06,
      "loss": 0.1969,
      "step": 1020
    },
    {
      "epoch": 0.09407103699267517,
      "grad_norm": 0.9731737615905076,
      "learning_rate": 4.973528393564965e-06,
      "loss": 0.1752,
      "step": 1021
    },
    {
      "epoch": 0.09416317316994519,
      "grad_norm": 1.074193893659733,
      "learning_rate": 4.973417714754721e-06,
      "loss": 0.194,
      "step": 1022
    },
    {
      "epoch": 0.09425530934721518,
      "grad_norm": 1.024455833636712,
      "learning_rate": 4.973306806287376e-06,
      "loss": 0.1903,
      "step": 1023
    },
    {
      "epoch": 0.0943474455244852,
      "grad_norm": 0.9573557205309052,
      "learning_rate": 4.9731956681732284e-06,
      "loss": 0.1828,
      "step": 1024
    },
    {
      "epoch": 0.09443958170175519,
      "grad_norm": 1.0124926306215474,
      "learning_rate": 4.973084300422597e-06,
      "loss": 0.1931,
      "step": 1025
    },
    {
      "epoch": 0.0945317178790252,
      "grad_norm": 1.0815661147002018,
      "learning_rate": 4.972972703045822e-06,
      "loss": 0.195,
      "step": 1026
    },
    {
      "epoch": 0.0946238540562952,
      "grad_norm": 1.110883335805394,
      "learning_rate": 4.972860876053265e-06,
      "loss": 0.1982,
      "step": 1027
    },
    {
      "epoch": 0.09471599023356521,
      "grad_norm": 1.0693046793652852,
      "learning_rate": 4.97274881945531e-06,
      "loss": 0.1811,
      "step": 1028
    },
    {
      "epoch": 0.09480812641083522,
      "grad_norm": 1.1919130340115107,
      "learning_rate": 4.97263653326236e-06,
      "loss": 0.2073,
      "step": 1029
    },
    {
      "epoch": 0.09490026258810522,
      "grad_norm": 1.0577602885838477,
      "learning_rate": 4.972524017484842e-06,
      "loss": 0.1841,
      "step": 1030
    },
    {
      "epoch": 0.09499239876537523,
      "grad_norm": 1.1057953679804238,
      "learning_rate": 4.972411272133204e-06,
      "loss": 0.1848,
      "step": 1031
    },
    {
      "epoch": 0.09508453494264522,
      "grad_norm": 1.1472304844236627,
      "learning_rate": 4.972298297217913e-06,
      "loss": 0.1851,
      "step": 1032
    },
    {
      "epoch": 0.09517667111991523,
      "grad_norm": 1.0139962897450747,
      "learning_rate": 4.972185092749458e-06,
      "loss": 0.1888,
      "step": 1033
    },
    {
      "epoch": 0.09526880729718525,
      "grad_norm": 1.0237739212232981,
      "learning_rate": 4.972071658738352e-06,
      "loss": 0.1961,
      "step": 1034
    },
    {
      "epoch": 0.09536094347445524,
      "grad_norm": 1.1226050341955258,
      "learning_rate": 4.971957995195126e-06,
      "loss": 0.1919,
      "step": 1035
    },
    {
      "epoch": 0.09545307965172525,
      "grad_norm": 1.0615783114613073,
      "learning_rate": 4.971844102130334e-06,
      "loss": 0.1929,
      "step": 1036
    },
    {
      "epoch": 0.09554521582899525,
      "grad_norm": 1.016886377753397,
      "learning_rate": 4.971729979554551e-06,
      "loss": 0.1956,
      "step": 1037
    },
    {
      "epoch": 0.09563735200626526,
      "grad_norm": 1.0576392129215906,
      "learning_rate": 4.9716156274783746e-06,
      "loss": 0.186,
      "step": 1038
    },
    {
      "epoch": 0.09572948818353527,
      "grad_norm": 1.0326102477262193,
      "learning_rate": 4.9715010459124205e-06,
      "loss": 0.2068,
      "step": 1039
    },
    {
      "epoch": 0.09582162436080527,
      "grad_norm": 0.9670626996840229,
      "learning_rate": 4.971386234867328e-06,
      "loss": 0.197,
      "step": 1040
    },
    {
      "epoch": 0.09591376053807528,
      "grad_norm": 1.0570562814072233,
      "learning_rate": 4.971271194353757e-06,
      "loss": 0.1895,
      "step": 1041
    },
    {
      "epoch": 0.09600589671534528,
      "grad_norm": 1.0598769897437053,
      "learning_rate": 4.971155924382392e-06,
      "loss": 0.1856,
      "step": 1042
    },
    {
      "epoch": 0.09609803289261529,
      "grad_norm": 0.9717398940768603,
      "learning_rate": 4.971040424963931e-06,
      "loss": 0.1694,
      "step": 1043
    },
    {
      "epoch": 0.09619016906988528,
      "grad_norm": 1.0484387235197232,
      "learning_rate": 4.970924696109102e-06,
      "loss": 0.1974,
      "step": 1044
    },
    {
      "epoch": 0.0962823052471553,
      "grad_norm": 1.0682781178742418,
      "learning_rate": 4.970808737828648e-06,
      "loss": 0.214,
      "step": 1045
    },
    {
      "epoch": 0.0963744414244253,
      "grad_norm": 1.0480298626312177,
      "learning_rate": 4.970692550133337e-06,
      "loss": 0.1959,
      "step": 1046
    },
    {
      "epoch": 0.0964665776016953,
      "grad_norm": 1.1064752815320091,
      "learning_rate": 4.970576133033958e-06,
      "loss": 0.1924,
      "step": 1047
    },
    {
      "epoch": 0.09655871377896531,
      "grad_norm": 1.0614078272511498,
      "learning_rate": 4.970459486541318e-06,
      "loss": 0.2013,
      "step": 1048
    },
    {
      "epoch": 0.09665084995623531,
      "grad_norm": 1.064795035168974,
      "learning_rate": 4.970342610666249e-06,
      "loss": 0.1947,
      "step": 1049
    },
    {
      "epoch": 0.09674298613350532,
      "grad_norm": 0.9799709467241581,
      "learning_rate": 4.970225505419602e-06,
      "loss": 0.1769,
      "step": 1050
    },
    {
      "epoch": 0.09683512231077533,
      "grad_norm": 1.043741540422472,
      "learning_rate": 4.970108170812252e-06,
      "loss": 0.1953,
      "step": 1051
    },
    {
      "epoch": 0.09692725848804533,
      "grad_norm": 1.061623477697688,
      "learning_rate": 4.969990606855093e-06,
      "loss": 0.2071,
      "step": 1052
    },
    {
      "epoch": 0.09701939466531534,
      "grad_norm": 1.1020868177462027,
      "learning_rate": 4.969872813559039e-06,
      "loss": 0.1821,
      "step": 1053
    },
    {
      "epoch": 0.09711153084258534,
      "grad_norm": 0.988163345783669,
      "learning_rate": 4.9697547909350295e-06,
      "loss": 0.1987,
      "step": 1054
    },
    {
      "epoch": 0.09720366701985535,
      "grad_norm": 1.0267991125472582,
      "learning_rate": 4.969636538994021e-06,
      "loss": 0.1918,
      "step": 1055
    },
    {
      "epoch": 0.09729580319712536,
      "grad_norm": 1.070032876028796,
      "learning_rate": 4.969518057746995e-06,
      "loss": 0.2002,
      "step": 1056
    },
    {
      "epoch": 0.09738793937439535,
      "grad_norm": 0.954895748932761,
      "learning_rate": 4.969399347204951e-06,
      "loss": 0.1829,
      "step": 1057
    },
    {
      "epoch": 0.09748007555166537,
      "grad_norm": 1.0721201059228953,
      "learning_rate": 4.969280407378912e-06,
      "loss": 0.192,
      "step": 1058
    },
    {
      "epoch": 0.09757221172893536,
      "grad_norm": 1.0107730979473246,
      "learning_rate": 4.9691612382799215e-06,
      "loss": 0.194,
      "step": 1059
    },
    {
      "epoch": 0.09766434790620537,
      "grad_norm": 0.9854433143433244,
      "learning_rate": 4.969041839919044e-06,
      "loss": 0.1909,
      "step": 1060
    },
    {
      "epoch": 0.09775648408347537,
      "grad_norm": 1.0348738031722355,
      "learning_rate": 4.968922212307367e-06,
      "loss": 0.1922,
      "step": 1061
    },
    {
      "epoch": 0.09784862026074538,
      "grad_norm": 1.0040960545465387,
      "learning_rate": 4.968802355455995e-06,
      "loss": 0.1923,
      "step": 1062
    },
    {
      "epoch": 0.09794075643801539,
      "grad_norm": 1.0094386883868456,
      "learning_rate": 4.96868226937606e-06,
      "loss": 0.1751,
      "step": 1063
    },
    {
      "epoch": 0.09803289261528539,
      "grad_norm": 1.073963995133156,
      "learning_rate": 4.96856195407871e-06,
      "loss": 0.1931,
      "step": 1064
    },
    {
      "epoch": 0.0981250287925554,
      "grad_norm": 0.9703948692708834,
      "learning_rate": 4.968441409575117e-06,
      "loss": 0.1906,
      "step": 1065
    },
    {
      "epoch": 0.0982171649698254,
      "grad_norm": 1.06228397745977,
      "learning_rate": 4.968320635876473e-06,
      "loss": 0.1857,
      "step": 1066
    },
    {
      "epoch": 0.09830930114709541,
      "grad_norm": 1.0662602435123827,
      "learning_rate": 4.968199632993994e-06,
      "loss": 0.1943,
      "step": 1067
    },
    {
      "epoch": 0.09840143732436542,
      "grad_norm": 1.030389208026544,
      "learning_rate": 4.968078400938912e-06,
      "loss": 0.1981,
      "step": 1068
    },
    {
      "epoch": 0.09849357350163541,
      "grad_norm": 1.0075981112842045,
      "learning_rate": 4.967956939722485e-06,
      "loss": 0.1969,
      "step": 1069
    },
    {
      "epoch": 0.09858570967890543,
      "grad_norm": 0.958118698153524,
      "learning_rate": 4.967835249355991e-06,
      "loss": 0.1858,
      "step": 1070
    },
    {
      "epoch": 0.09867784585617542,
      "grad_norm": 0.9474495525140502,
      "learning_rate": 4.967713329850728e-06,
      "loss": 0.1859,
      "step": 1071
    },
    {
      "epoch": 0.09876998203344543,
      "grad_norm": 1.0554983849327597,
      "learning_rate": 4.967591181218017e-06,
      "loss": 0.192,
      "step": 1072
    },
    {
      "epoch": 0.09886211821071544,
      "grad_norm": 1.0152034702976793,
      "learning_rate": 4.967468803469199e-06,
      "loss": 0.195,
      "step": 1073
    },
    {
      "epoch": 0.09895425438798544,
      "grad_norm": 1.0168864653893954,
      "learning_rate": 4.967346196615638e-06,
      "loss": 0.1767,
      "step": 1074
    },
    {
      "epoch": 0.09904639056525545,
      "grad_norm": 1.043221440739377,
      "learning_rate": 4.967223360668716e-06,
      "loss": 0.1846,
      "step": 1075
    },
    {
      "epoch": 0.09913852674252545,
      "grad_norm": 1.0048446057039784,
      "learning_rate": 4.9671002956398395e-06,
      "loss": 0.1973,
      "step": 1076
    },
    {
      "epoch": 0.09923066291979546,
      "grad_norm": 0.9969031964290073,
      "learning_rate": 4.966977001540436e-06,
      "loss": 0.1926,
      "step": 1077
    },
    {
      "epoch": 0.09932279909706546,
      "grad_norm": 1.052777627221389,
      "learning_rate": 4.966853478381951e-06,
      "loss": 0.1995,
      "step": 1078
    },
    {
      "epoch": 0.09941493527433547,
      "grad_norm": 1.068967031109921,
      "learning_rate": 4.966729726175857e-06,
      "loss": 0.1848,
      "step": 1079
    },
    {
      "epoch": 0.09950707145160548,
      "grad_norm": 1.160503081694701,
      "learning_rate": 4.96660574493364e-06,
      "loss": 0.1954,
      "step": 1080
    },
    {
      "epoch": 0.09959920762887547,
      "grad_norm": 1.0796967030397735,
      "learning_rate": 4.9664815346668165e-06,
      "loss": 0.2055,
      "step": 1081
    },
    {
      "epoch": 0.09969134380614549,
      "grad_norm": 1.005831162809125,
      "learning_rate": 4.966357095386915e-06,
      "loss": 0.1972,
      "step": 1082
    },
    {
      "epoch": 0.09978347998341548,
      "grad_norm": 1.0052790015292061,
      "learning_rate": 4.966232427105493e-06,
      "loss": 0.1825,
      "step": 1083
    },
    {
      "epoch": 0.0998756161606855,
      "grad_norm": 1.0082971570804145,
      "learning_rate": 4.9661075298341245e-06,
      "loss": 0.1679,
      "step": 1084
    },
    {
      "epoch": 0.0999677523379555,
      "grad_norm": 1.002477114385746,
      "learning_rate": 4.965982403584406e-06,
      "loss": 0.1978,
      "step": 1085
    },
    {
      "epoch": 0.1000598885152255,
      "grad_norm": 1.0561649001397835,
      "learning_rate": 4.965857048367956e-06,
      "loss": 0.2016,
      "step": 1086
    },
    {
      "epoch": 0.10015202469249551,
      "grad_norm": 1.090128521697667,
      "learning_rate": 4.965731464196415e-06,
      "loss": 0.1981,
      "step": 1087
    },
    {
      "epoch": 0.10024416086976551,
      "grad_norm": 1.1605860564434374,
      "learning_rate": 4.96560565108144e-06,
      "loss": 0.2093,
      "step": 1088
    },
    {
      "epoch": 0.10033629704703552,
      "grad_norm": 0.9908203729796794,
      "learning_rate": 4.965479609034717e-06,
      "loss": 0.1761,
      "step": 1089
    },
    {
      "epoch": 0.10042843322430553,
      "grad_norm": 1.0689295025072343,
      "learning_rate": 4.9653533380679455e-06,
      "loss": 0.2124,
      "step": 1090
    },
    {
      "epoch": 0.10052056940157553,
      "grad_norm": 0.9557530326333923,
      "learning_rate": 4.965226838192852e-06,
      "loss": 0.1835,
      "step": 1091
    },
    {
      "epoch": 0.10061270557884554,
      "grad_norm": 0.9607802521798345,
      "learning_rate": 4.965100109421182e-06,
      "loss": 0.1779,
      "step": 1092
    },
    {
      "epoch": 0.10070484175611553,
      "grad_norm": 1.0016875203479627,
      "learning_rate": 4.9649731517647e-06,
      "loss": 0.1769,
      "step": 1093
    },
    {
      "epoch": 0.10079697793338555,
      "grad_norm": 1.0440688028642757,
      "learning_rate": 4.964845965235196e-06,
      "loss": 0.1934,
      "step": 1094
    },
    {
      "epoch": 0.10088911411065554,
      "grad_norm": 1.099885377144038,
      "learning_rate": 4.964718549844479e-06,
      "loss": 0.2077,
      "step": 1095
    },
    {
      "epoch": 0.10098125028792555,
      "grad_norm": 1.0364884967207673,
      "learning_rate": 4.964590905604379e-06,
      "loss": 0.1839,
      "step": 1096
    },
    {
      "epoch": 0.10107338646519556,
      "grad_norm": 1.119047007151761,
      "learning_rate": 4.964463032526749e-06,
      "loss": 0.1921,
      "step": 1097
    },
    {
      "epoch": 0.10116552264246556,
      "grad_norm": 1.0229016109535547,
      "learning_rate": 4.9643349306234615e-06,
      "loss": 0.1873,
      "step": 1098
    },
    {
      "epoch": 0.10125765881973557,
      "grad_norm": 1.0007547076017496,
      "learning_rate": 4.96420659990641e-06,
      "loss": 0.1809,
      "step": 1099
    },
    {
      "epoch": 0.10134979499700557,
      "grad_norm": 1.1215031197209377,
      "learning_rate": 4.9640780403875095e-06,
      "loss": 0.1995,
      "step": 1100
    },
    {
      "epoch": 0.10144193117427558,
      "grad_norm": 1.0890006150478866,
      "learning_rate": 4.963949252078698e-06,
      "loss": 0.1965,
      "step": 1101
    },
    {
      "epoch": 0.10153406735154559,
      "grad_norm": 1.0198293652323223,
      "learning_rate": 4.963820234991934e-06,
      "loss": 0.2028,
      "step": 1102
    },
    {
      "epoch": 0.10162620352881559,
      "grad_norm": 0.9681566672517501,
      "learning_rate": 4.963690989139196e-06,
      "loss": 0.1775,
      "step": 1103
    },
    {
      "epoch": 0.1017183397060856,
      "grad_norm": 0.9842979036405417,
      "learning_rate": 4.963561514532485e-06,
      "loss": 0.174,
      "step": 1104
    },
    {
      "epoch": 0.1018104758833556,
      "grad_norm": 1.056487078327593,
      "learning_rate": 4.963431811183821e-06,
      "loss": 0.1923,
      "step": 1105
    },
    {
      "epoch": 0.1019026120606256,
      "grad_norm": 0.9681514651053884,
      "learning_rate": 4.963301879105249e-06,
      "loss": 0.1735,
      "step": 1106
    },
    {
      "epoch": 0.10199474823789562,
      "grad_norm": 1.0279411063186674,
      "learning_rate": 4.963171718308833e-06,
      "loss": 0.1939,
      "step": 1107
    },
    {
      "epoch": 0.10208688441516561,
      "grad_norm": 1.0199638016460721,
      "learning_rate": 4.963041328806656e-06,
      "loss": 0.184,
      "step": 1108
    },
    {
      "epoch": 0.10217902059243562,
      "grad_norm": 1.0831824990470151,
      "learning_rate": 4.962910710610827e-06,
      "loss": 0.1919,
      "step": 1109
    },
    {
      "epoch": 0.10227115676970562,
      "grad_norm": 1.0908326206048449,
      "learning_rate": 4.962779863733475e-06,
      "loss": 0.1765,
      "step": 1110
    },
    {
      "epoch": 0.10236329294697563,
      "grad_norm": 1.0045235303450732,
      "learning_rate": 4.962648788186747e-06,
      "loss": 0.1892,
      "step": 1111
    },
    {
      "epoch": 0.10245542912424563,
      "grad_norm": 0.9971996009560316,
      "learning_rate": 4.9625174839828135e-06,
      "loss": 0.1818,
      "step": 1112
    },
    {
      "epoch": 0.10254756530151564,
      "grad_norm": 1.1008529906643778,
      "learning_rate": 4.9623859511338664e-06,
      "loss": 0.1859,
      "step": 1113
    },
    {
      "epoch": 0.10263970147878565,
      "grad_norm": 1.0675327190930683,
      "learning_rate": 4.962254189652119e-06,
      "loss": 0.1938,
      "step": 1114
    },
    {
      "epoch": 0.10273183765605565,
      "grad_norm": 0.9678039471099696,
      "learning_rate": 4.962122199549806e-06,
      "loss": 0.1842,
      "step": 1115
    },
    {
      "epoch": 0.10282397383332566,
      "grad_norm": 0.9783428439299713,
      "learning_rate": 4.96198998083918e-06,
      "loss": 0.1868,
      "step": 1116
    },
    {
      "epoch": 0.10291611001059565,
      "grad_norm": 1.0795078906373854,
      "learning_rate": 4.961857533532521e-06,
      "loss": 0.2017,
      "step": 1117
    },
    {
      "epoch": 0.10300824618786567,
      "grad_norm": 0.9862093313250959,
      "learning_rate": 4.961724857642125e-06,
      "loss": 0.188,
      "step": 1118
    },
    {
      "epoch": 0.10310038236513568,
      "grad_norm": 1.023997592371275,
      "learning_rate": 4.96159195318031e-06,
      "loss": 0.1807,
      "step": 1119
    },
    {
      "epoch": 0.10319251854240567,
      "grad_norm": 1.1350867035477064,
      "learning_rate": 4.9614588201594175e-06,
      "loss": 0.1962,
      "step": 1120
    },
    {
      "epoch": 0.10328465471967568,
      "grad_norm": 1.0486867617781612,
      "learning_rate": 4.961325458591809e-06,
      "loss": 0.1967,
      "step": 1121
    },
    {
      "epoch": 0.10337679089694568,
      "grad_norm": 0.9979044530364211,
      "learning_rate": 4.961191868489866e-06,
      "loss": 0.1847,
      "step": 1122
    },
    {
      "epoch": 0.10346892707421569,
      "grad_norm": 1.0898276661037767,
      "learning_rate": 4.961058049865994e-06,
      "loss": 0.1841,
      "step": 1123
    },
    {
      "epoch": 0.1035610632514857,
      "grad_norm": 1.0539187928170928,
      "learning_rate": 4.960924002732616e-06,
      "loss": 0.2036,
      "step": 1124
    },
    {
      "epoch": 0.1036531994287557,
      "grad_norm": 0.973520042589487,
      "learning_rate": 4.9607897271021815e-06,
      "loss": 0.1765,
      "step": 1125
    },
    {
      "epoch": 0.10374533560602571,
      "grad_norm": 1.0595893348731948,
      "learning_rate": 4.960655222987155e-06,
      "loss": 0.2013,
      "step": 1126
    },
    {
      "epoch": 0.1038374717832957,
      "grad_norm": 1.1099133446922225,
      "learning_rate": 4.960520490400026e-06,
      "loss": 0.1849,
      "step": 1127
    },
    {
      "epoch": 0.10392960796056572,
      "grad_norm": 0.9847206344296402,
      "learning_rate": 4.9603855293533045e-06,
      "loss": 0.1859,
      "step": 1128
    },
    {
      "epoch": 0.10402174413783571,
      "grad_norm": 1.0085016434462313,
      "learning_rate": 4.960250339859523e-06,
      "loss": 0.1922,
      "step": 1129
    },
    {
      "epoch": 0.10411388031510573,
      "grad_norm": 1.0132928372271228,
      "learning_rate": 4.960114921931231e-06,
      "loss": 0.1816,
      "step": 1130
    },
    {
      "epoch": 0.10420601649237574,
      "grad_norm": 0.945813808487549,
      "learning_rate": 4.959979275581005e-06,
      "loss": 0.1926,
      "step": 1131
    },
    {
      "epoch": 0.10429815266964573,
      "grad_norm": 1.091489002731477,
      "learning_rate": 4.959843400821438e-06,
      "loss": 0.187,
      "step": 1132
    },
    {
      "epoch": 0.10439028884691574,
      "grad_norm": 0.9602509824379453,
      "learning_rate": 4.959707297665146e-06,
      "loss": 0.1666,
      "step": 1133
    },
    {
      "epoch": 0.10448242502418574,
      "grad_norm": 1.070831104626253,
      "learning_rate": 4.959570966124768e-06,
      "loss": 0.1983,
      "step": 1134
    },
    {
      "epoch": 0.10457456120145575,
      "grad_norm": 1.0644935824954898,
      "learning_rate": 4.959434406212959e-06,
      "loss": 0.187,
      "step": 1135
    },
    {
      "epoch": 0.10466669737872576,
      "grad_norm": 1.18188816476157,
      "learning_rate": 4.959297617942403e-06,
      "loss": 0.2021,
      "step": 1136
    },
    {
      "epoch": 0.10475883355599576,
      "grad_norm": 1.0758197586073297,
      "learning_rate": 4.959160601325797e-06,
      "loss": 0.1852,
      "step": 1137
    },
    {
      "epoch": 0.10485096973326577,
      "grad_norm": 1.0990404429156002,
      "learning_rate": 4.959023356375866e-06,
      "loss": 0.1921,
      "step": 1138
    },
    {
      "epoch": 0.10494310591053577,
      "grad_norm": 1.028267704366153,
      "learning_rate": 4.9588858831053495e-06,
      "loss": 0.1953,
      "step": 1139
    },
    {
      "epoch": 0.10503524208780578,
      "grad_norm": 0.9471378455619729,
      "learning_rate": 4.958748181527016e-06,
      "loss": 0.1834,
      "step": 1140
    },
    {
      "epoch": 0.10512737826507579,
      "grad_norm": 1.0244112736454591,
      "learning_rate": 4.958610251653649e-06,
      "loss": 0.1766,
      "step": 1141
    },
    {
      "epoch": 0.10521951444234579,
      "grad_norm": 1.0541172839830792,
      "learning_rate": 4.958472093498055e-06,
      "loss": 0.1942,
      "step": 1142
    },
    {
      "epoch": 0.1053116506196158,
      "grad_norm": 0.9910686182968134,
      "learning_rate": 4.9583337070730625e-06,
      "loss": 0.1839,
      "step": 1143
    },
    {
      "epoch": 0.10540378679688579,
      "grad_norm": 0.9317050369769572,
      "learning_rate": 4.958195092391521e-06,
      "loss": 0.1908,
      "step": 1144
    },
    {
      "epoch": 0.1054959229741558,
      "grad_norm": 1.01507009801172,
      "learning_rate": 4.958056249466301e-06,
      "loss": 0.1772,
      "step": 1145
    },
    {
      "epoch": 0.1055880591514258,
      "grad_norm": 0.9887305632286719,
      "learning_rate": 4.957917178310293e-06,
      "loss": 0.1931,
      "step": 1146
    },
    {
      "epoch": 0.10568019532869581,
      "grad_norm": 1.0393372989945,
      "learning_rate": 4.957777878936411e-06,
      "loss": 0.1966,
      "step": 1147
    },
    {
      "epoch": 0.10577233150596582,
      "grad_norm": 0.9892641791079512,
      "learning_rate": 4.957638351357587e-06,
      "loss": 0.1931,
      "step": 1148
    },
    {
      "epoch": 0.10586446768323582,
      "grad_norm": 0.9559643694345603,
      "learning_rate": 4.957498595586779e-06,
      "loss": 0.1823,
      "step": 1149
    },
    {
      "epoch": 0.10595660386050583,
      "grad_norm": 0.979023807959273,
      "learning_rate": 4.957358611636962e-06,
      "loss": 0.178,
      "step": 1150
    },
    {
      "epoch": 0.10604874003777583,
      "grad_norm": 1.0774638507040097,
      "learning_rate": 4.957218399521133e-06,
      "loss": 0.1908,
      "step": 1151
    },
    {
      "epoch": 0.10614087621504584,
      "grad_norm": 0.9904806549507087,
      "learning_rate": 4.957077959252311e-06,
      "loss": 0.1955,
      "step": 1152
    },
    {
      "epoch": 0.10623301239231585,
      "grad_norm": 1.0784191373101655,
      "learning_rate": 4.956937290843537e-06,
      "loss": 0.1904,
      "step": 1153
    },
    {
      "epoch": 0.10632514856958585,
      "grad_norm": 1.1522667775208266,
      "learning_rate": 4.95679639430787e-06,
      "loss": 0.2227,
      "step": 1154
    },
    {
      "epoch": 0.10641728474685586,
      "grad_norm": 0.9904608761960886,
      "learning_rate": 4.956655269658393e-06,
      "loss": 0.1822,
      "step": 1155
    },
    {
      "epoch": 0.10650942092412585,
      "grad_norm": 1.0503960405331592,
      "learning_rate": 4.956513916908211e-06,
      "loss": 0.1937,
      "step": 1156
    },
    {
      "epoch": 0.10660155710139586,
      "grad_norm": 1.0262513807644829,
      "learning_rate": 4.956372336070448e-06,
      "loss": 0.1808,
      "step": 1157
    },
    {
      "epoch": 0.10669369327866587,
      "grad_norm": 1.0342363298395292,
      "learning_rate": 4.956230527158248e-06,
      "loss": 0.1749,
      "step": 1158
    },
    {
      "epoch": 0.10678582945593587,
      "grad_norm": 0.9789103538040284,
      "learning_rate": 4.95608849018478e-06,
      "loss": 0.1849,
      "step": 1159
    },
    {
      "epoch": 0.10687796563320588,
      "grad_norm": 1.0614565305811643,
      "learning_rate": 4.95594622516323e-06,
      "loss": 0.2029,
      "step": 1160
    },
    {
      "epoch": 0.10697010181047588,
      "grad_norm": 1.0560558999226575,
      "learning_rate": 4.95580373210681e-06,
      "loss": 0.2107,
      "step": 1161
    },
    {
      "epoch": 0.10706223798774589,
      "grad_norm": 1.0797810477588987,
      "learning_rate": 4.955661011028748e-06,
      "loss": 0.2075,
      "step": 1162
    },
    {
      "epoch": 0.1071543741650159,
      "grad_norm": 1.0029566214938326,
      "learning_rate": 4.955518061942298e-06,
      "loss": 0.1868,
      "step": 1163
    },
    {
      "epoch": 0.1072465103422859,
      "grad_norm": 1.0588961555129832,
      "learning_rate": 4.955374884860731e-06,
      "loss": 0.2038,
      "step": 1164
    },
    {
      "epoch": 0.10733864651955591,
      "grad_norm": 1.00256844787753,
      "learning_rate": 4.9552314797973426e-06,
      "loss": 0.2008,
      "step": 1165
    },
    {
      "epoch": 0.1074307826968259,
      "grad_norm": 1.0284873277323123,
      "learning_rate": 4.955087846765446e-06,
      "loss": 0.1886,
      "step": 1166
    },
    {
      "epoch": 0.10752291887409592,
      "grad_norm": 0.9660868860677466,
      "learning_rate": 4.954943985778379e-06,
      "loss": 0.1911,
      "step": 1167
    },
    {
      "epoch": 0.10761505505136591,
      "grad_norm": 1.006615188078031,
      "learning_rate": 4.954799896849499e-06,
      "loss": 0.1988,
      "step": 1168
    },
    {
      "epoch": 0.10770719122863592,
      "grad_norm": 1.0946500653930293,
      "learning_rate": 4.954655579992184e-06,
      "loss": 0.2008,
      "step": 1169
    },
    {
      "epoch": 0.10779932740590593,
      "grad_norm": 1.0096850735445058,
      "learning_rate": 4.954511035219835e-06,
      "loss": 0.1905,
      "step": 1170
    },
    {
      "epoch": 0.10789146358317593,
      "grad_norm": 0.9542605339416056,
      "learning_rate": 4.954366262545871e-06,
      "loss": 0.1893,
      "step": 1171
    },
    {
      "epoch": 0.10798359976044594,
      "grad_norm": 0.9358990406040504,
      "learning_rate": 4.954221261983736e-06,
      "loss": 0.1841,
      "step": 1172
    },
    {
      "epoch": 0.10807573593771594,
      "grad_norm": 1.0250006987149098,
      "learning_rate": 4.954076033546892e-06,
      "loss": 0.1942,
      "step": 1173
    },
    {
      "epoch": 0.10816787211498595,
      "grad_norm": 1.02972348562299,
      "learning_rate": 4.953930577248825e-06,
      "loss": 0.1924,
      "step": 1174
    },
    {
      "epoch": 0.10826000829225596,
      "grad_norm": 0.9230680708989243,
      "learning_rate": 4.95378489310304e-06,
      "loss": 0.1795,
      "step": 1175
    },
    {
      "epoch": 0.10835214446952596,
      "grad_norm": 1.006934556434401,
      "learning_rate": 4.953638981123063e-06,
      "loss": 0.1859,
      "step": 1176
    },
    {
      "epoch": 0.10844428064679597,
      "grad_norm": 0.9479125968654736,
      "learning_rate": 4.9534928413224424e-06,
      "loss": 0.1685,
      "step": 1177
    },
    {
      "epoch": 0.10853641682406596,
      "grad_norm": 1.002904725939237,
      "learning_rate": 4.953346473714748e-06,
      "loss": 0.1972,
      "step": 1178
    },
    {
      "epoch": 0.10862855300133598,
      "grad_norm": 0.9705753499726706,
      "learning_rate": 4.953199878313569e-06,
      "loss": 0.1833,
      "step": 1179
    },
    {
      "epoch": 0.10872068917860599,
      "grad_norm": 0.9865043838278399,
      "learning_rate": 4.953053055132518e-06,
      "loss": 0.1868,
      "step": 1180
    },
    {
      "epoch": 0.10881282535587598,
      "grad_norm": 0.9364729281823607,
      "learning_rate": 4.9529060041852264e-06,
      "loss": 0.1877,
      "step": 1181
    },
    {
      "epoch": 0.108904961533146,
      "grad_norm": 0.9197551228545804,
      "learning_rate": 4.9527587254853485e-06,
      "loss": 0.1765,
      "step": 1182
    },
    {
      "epoch": 0.10899709771041599,
      "grad_norm": 0.9930734696539932,
      "learning_rate": 4.952611219046559e-06,
      "loss": 0.1975,
      "step": 1183
    },
    {
      "epoch": 0.109089233887686,
      "grad_norm": 0.9438054002784088,
      "learning_rate": 4.952463484882553e-06,
      "loss": 0.1746,
      "step": 1184
    },
    {
      "epoch": 0.109181370064956,
      "grad_norm": 1.0275596921449845,
      "learning_rate": 4.9523155230070495e-06,
      "loss": 0.1882,
      "step": 1185
    },
    {
      "epoch": 0.10927350624222601,
      "grad_norm": 0.9827711445828464,
      "learning_rate": 4.952167333433785e-06,
      "loss": 0.1813,
      "step": 1186
    },
    {
      "epoch": 0.10936564241949602,
      "grad_norm": 1.0596005745086683,
      "learning_rate": 4.952018916176521e-06,
      "loss": 0.1867,
      "step": 1187
    },
    {
      "epoch": 0.10945777859676602,
      "grad_norm": 0.9426441584350082,
      "learning_rate": 4.9518702712490355e-06,
      "loss": 0.1697,
      "step": 1188
    },
    {
      "epoch": 0.10954991477403603,
      "grad_norm": 1.0978130786232543,
      "learning_rate": 4.951721398665131e-06,
      "loss": 0.195,
      "step": 1189
    },
    {
      "epoch": 0.10964205095130602,
      "grad_norm": 1.054271925935406,
      "learning_rate": 4.951572298438632e-06,
      "loss": 0.1778,
      "step": 1190
    },
    {
      "epoch": 0.10973418712857604,
      "grad_norm": 0.9363792710466154,
      "learning_rate": 4.95142297058338e-06,
      "loss": 0.182,
      "step": 1191
    },
    {
      "epoch": 0.10982632330584605,
      "grad_norm": 0.9582824053873974,
      "learning_rate": 4.951273415113243e-06,
      "loss": 0.191,
      "step": 1192
    },
    {
      "epoch": 0.10991845948311604,
      "grad_norm": 1.04768214799217,
      "learning_rate": 4.951123632042104e-06,
      "loss": 0.1876,
      "step": 1193
    },
    {
      "epoch": 0.11001059566038605,
      "grad_norm": 0.9511803599003008,
      "learning_rate": 4.950973621383873e-06,
      "loss": 0.1682,
      "step": 1194
    },
    {
      "epoch": 0.11010273183765605,
      "grad_norm": 1.0673589766836193,
      "learning_rate": 4.950823383152478e-06,
      "loss": 0.2048,
      "step": 1195
    },
    {
      "epoch": 0.11019486801492606,
      "grad_norm": 1.0445742808478182,
      "learning_rate": 4.9506729173618675e-06,
      "loss": 0.1819,
      "step": 1196
    },
    {
      "epoch": 0.11028700419219607,
      "grad_norm": 0.8806288475750527,
      "learning_rate": 4.950522224026012e-06,
      "loss": 0.1729,
      "step": 1197
    },
    {
      "epoch": 0.11037914036946607,
      "grad_norm": 1.076709708388022,
      "learning_rate": 4.950371303158905e-06,
      "loss": 0.1789,
      "step": 1198
    },
    {
      "epoch": 0.11047127654673608,
      "grad_norm": 0.9229384393059257,
      "learning_rate": 4.950220154774559e-06,
      "loss": 0.1733,
      "step": 1199
    },
    {
      "epoch": 0.11056341272400608,
      "grad_norm": 0.9186878701388156,
      "learning_rate": 4.950068778887007e-06,
      "loss": 0.1753,
      "step": 1200
    },
    {
      "epoch": 0.11065554890127609,
      "grad_norm": 0.9700339527796721,
      "learning_rate": 4.949917175510307e-06,
      "loss": 0.1912,
      "step": 1201
    },
    {
      "epoch": 0.11074768507854608,
      "grad_norm": 0.9264578935919071,
      "learning_rate": 4.949765344658532e-06,
      "loss": 0.1807,
      "step": 1202
    },
    {
      "epoch": 0.1108398212558161,
      "grad_norm": 0.9953730707901259,
      "learning_rate": 4.949613286345781e-06,
      "loss": 0.1897,
      "step": 1203
    },
    {
      "epoch": 0.1109319574330861,
      "grad_norm": 0.9958814097903571,
      "learning_rate": 4.9494610005861745e-06,
      "loss": 0.1855,
      "step": 1204
    },
    {
      "epoch": 0.1110240936103561,
      "grad_norm": 0.9974041687614713,
      "learning_rate": 4.949308487393849e-06,
      "loss": 0.1887,
      "step": 1205
    },
    {
      "epoch": 0.11111622978762611,
      "grad_norm": 1.049073106143341,
      "learning_rate": 4.949155746782966e-06,
      "loss": 0.2123,
      "step": 1206
    },
    {
      "epoch": 0.11120836596489611,
      "grad_norm": 0.9442037997859811,
      "learning_rate": 4.94900277876771e-06,
      "loss": 0.1698,
      "step": 1207
    },
    {
      "epoch": 0.11130050214216612,
      "grad_norm": 1.0030492263525004,
      "learning_rate": 4.948849583362282e-06,
      "loss": 0.1939,
      "step": 1208
    },
    {
      "epoch": 0.11139263831943613,
      "grad_norm": 1.0918873927109156,
      "learning_rate": 4.948696160580907e-06,
      "loss": 0.2061,
      "step": 1209
    },
    {
      "epoch": 0.11148477449670613,
      "grad_norm": 0.9850049521735987,
      "learning_rate": 4.948542510437829e-06,
      "loss": 0.1791,
      "step": 1210
    },
    {
      "epoch": 0.11157691067397614,
      "grad_norm": 0.8914351041716434,
      "learning_rate": 4.948388632947316e-06,
      "loss": 0.1618,
      "step": 1211
    },
    {
      "epoch": 0.11166904685124614,
      "grad_norm": 0.9481190935623166,
      "learning_rate": 4.948234528123655e-06,
      "loss": 0.1926,
      "step": 1212
    },
    {
      "epoch": 0.11176118302851615,
      "grad_norm": 1.0067784660769212,
      "learning_rate": 4.948080195981154e-06,
      "loss": 0.1871,
      "step": 1213
    },
    {
      "epoch": 0.11185331920578616,
      "grad_norm": 0.962562766383318,
      "learning_rate": 4.947925636534144e-06,
      "loss": 0.1781,
      "step": 1214
    },
    {
      "epoch": 0.11194545538305616,
      "grad_norm": 1.0027653781996462,
      "learning_rate": 4.947770849796975e-06,
      "loss": 0.1888,
      "step": 1215
    },
    {
      "epoch": 0.11203759156032617,
      "grad_norm": 0.9736579394329236,
      "learning_rate": 4.9476158357840194e-06,
      "loss": 0.1795,
      "step": 1216
    },
    {
      "epoch": 0.11212972773759616,
      "grad_norm": 1.0500111563474652,
      "learning_rate": 4.9474605945096695e-06,
      "loss": 0.2044,
      "step": 1217
    },
    {
      "epoch": 0.11222186391486617,
      "grad_norm": 1.0505396188236562,
      "learning_rate": 4.94730512598834e-06,
      "loss": 0.1849,
      "step": 1218
    },
    {
      "epoch": 0.11231400009213617,
      "grad_norm": 0.9764803395123072,
      "learning_rate": 4.947149430234467e-06,
      "loss": 0.1906,
      "step": 1219
    },
    {
      "epoch": 0.11240613626940618,
      "grad_norm": 1.0016094631221018,
      "learning_rate": 4.946993507262505e-06,
      "loss": 0.1858,
      "step": 1220
    },
    {
      "epoch": 0.11249827244667619,
      "grad_norm": 0.9867478645758117,
      "learning_rate": 4.946837357086933e-06,
      "loss": 0.1871,
      "step": 1221
    },
    {
      "epoch": 0.11259040862394619,
      "grad_norm": 1.0536305452882067,
      "learning_rate": 4.946680979722249e-06,
      "loss": 0.2072,
      "step": 1222
    },
    {
      "epoch": 0.1126825448012162,
      "grad_norm": 1.0001369286623907,
      "learning_rate": 4.946524375182973e-06,
      "loss": 0.1849,
      "step": 1223
    },
    {
      "epoch": 0.1127746809784862,
      "grad_norm": 0.9590582772180609,
      "learning_rate": 4.946367543483645e-06,
      "loss": 0.1948,
      "step": 1224
    },
    {
      "epoch": 0.11286681715575621,
      "grad_norm": 1.0317577767091315,
      "learning_rate": 4.946210484638827e-06,
      "loss": 0.1842,
      "step": 1225
    },
    {
      "epoch": 0.11295895333302622,
      "grad_norm": 0.9817570764988467,
      "learning_rate": 4.946053198663103e-06,
      "loss": 0.1647,
      "step": 1226
    },
    {
      "epoch": 0.11305108951029622,
      "grad_norm": 1.0826573918808229,
      "learning_rate": 4.945895685571076e-06,
      "loss": 0.2078,
      "step": 1227
    },
    {
      "epoch": 0.11314322568756623,
      "grad_norm": 0.9688980456746177,
      "learning_rate": 4.945737945377372e-06,
      "loss": 0.1812,
      "step": 1228
    },
    {
      "epoch": 0.11323536186483622,
      "grad_norm": 0.992886307038925,
      "learning_rate": 4.945579978096635e-06,
      "loss": 0.1841,
      "step": 1229
    },
    {
      "epoch": 0.11332749804210623,
      "grad_norm": 1.0512449225985512,
      "learning_rate": 4.945421783743535e-06,
      "loss": 0.1818,
      "step": 1230
    },
    {
      "epoch": 0.11341963421937624,
      "grad_norm": 1.0098209511209224,
      "learning_rate": 4.945263362332759e-06,
      "loss": 0.1857,
      "step": 1231
    },
    {
      "epoch": 0.11351177039664624,
      "grad_norm": 1.1196581888871462,
      "learning_rate": 4.945104713879017e-06,
      "loss": 0.1891,
      "step": 1232
    },
    {
      "epoch": 0.11360390657391625,
      "grad_norm": 0.933560568750918,
      "learning_rate": 4.9449458383970386e-06,
      "loss": 0.1791,
      "step": 1233
    },
    {
      "epoch": 0.11369604275118625,
      "grad_norm": 1.0192174357061985,
      "learning_rate": 4.944786735901576e-06,
      "loss": 0.1794,
      "step": 1234
    },
    {
      "epoch": 0.11378817892845626,
      "grad_norm": 1.0521293689124385,
      "learning_rate": 4.944627406407401e-06,
      "loss": 0.1932,
      "step": 1235
    },
    {
      "epoch": 0.11388031510572626,
      "grad_norm": 0.9583785459922292,
      "learning_rate": 4.94446784992931e-06,
      "loss": 0.17,
      "step": 1236
    },
    {
      "epoch": 0.11397245128299627,
      "grad_norm": 1.050868615441966,
      "learning_rate": 4.9443080664821156e-06,
      "loss": 0.1939,
      "step": 1237
    },
    {
      "epoch": 0.11406458746026628,
      "grad_norm": 1.0764625733452038,
      "learning_rate": 4.944148056080654e-06,
      "loss": 0.1994,
      "step": 1238
    },
    {
      "epoch": 0.11415672363753628,
      "grad_norm": 1.0277252761190465,
      "learning_rate": 4.943987818739782e-06,
      "loss": 0.1876,
      "step": 1239
    },
    {
      "epoch": 0.11424885981480629,
      "grad_norm": 1.0350109470595392,
      "learning_rate": 4.943827354474378e-06,
      "loss": 0.1894,
      "step": 1240
    },
    {
      "epoch": 0.11434099599207628,
      "grad_norm": 1.0146682594155343,
      "learning_rate": 4.943666663299341e-06,
      "loss": 0.1729,
      "step": 1241
    },
    {
      "epoch": 0.1144331321693463,
      "grad_norm": 1.028187292870586,
      "learning_rate": 4.943505745229592e-06,
      "loss": 0.1855,
      "step": 1242
    },
    {
      "epoch": 0.1145252683466163,
      "grad_norm": 1.099924483199594,
      "learning_rate": 4.943344600280071e-06,
      "loss": 0.2035,
      "step": 1243
    },
    {
      "epoch": 0.1146174045238863,
      "grad_norm": 1.1454243464465912,
      "learning_rate": 4.943183228465742e-06,
      "loss": 0.1993,
      "step": 1244
    },
    {
      "epoch": 0.11470954070115631,
      "grad_norm": 0.9485855285325462,
      "learning_rate": 4.943021629801586e-06,
      "loss": 0.1691,
      "step": 1245
    },
    {
      "epoch": 0.11480167687842631,
      "grad_norm": 1.0626072276859084,
      "learning_rate": 4.9428598043026085e-06,
      "loss": 0.1846,
      "step": 1246
    },
    {
      "epoch": 0.11489381305569632,
      "grad_norm": 0.9269346524433504,
      "learning_rate": 4.942697751983837e-06,
      "loss": 0.1773,
      "step": 1247
    },
    {
      "epoch": 0.11498594923296633,
      "grad_norm": 0.9535100536348952,
      "learning_rate": 4.942535472860315e-06,
      "loss": 0.1787,
      "step": 1248
    },
    {
      "epoch": 0.11507808541023633,
      "grad_norm": 1.0023957002528299,
      "learning_rate": 4.942372966947112e-06,
      "loss": 0.1866,
      "step": 1249
    },
    {
      "epoch": 0.11517022158750634,
      "grad_norm": 0.994322779502017,
      "learning_rate": 4.942210234259316e-06,
      "loss": 0.1778,
      "step": 1250
    },
    {
      "epoch": 0.11526235776477634,
      "grad_norm": 0.9575100289627279,
      "learning_rate": 4.9420472748120365e-06,
      "loss": 0.1941,
      "step": 1251
    },
    {
      "epoch": 0.11535449394204635,
      "grad_norm": 0.9484403113363623,
      "learning_rate": 4.941884088620405e-06,
      "loss": 0.1731,
      "step": 1252
    },
    {
      "epoch": 0.11544663011931634,
      "grad_norm": 1.0017289164577379,
      "learning_rate": 4.941720675699573e-06,
      "loss": 0.1819,
      "step": 1253
    },
    {
      "epoch": 0.11553876629658635,
      "grad_norm": 0.9694215635786955,
      "learning_rate": 4.941557036064714e-06,
      "loss": 0.1838,
      "step": 1254
    },
    {
      "epoch": 0.11563090247385636,
      "grad_norm": 1.021592492535489,
      "learning_rate": 4.9413931697310215e-06,
      "loss": 0.197,
      "step": 1255
    },
    {
      "epoch": 0.11572303865112636,
      "grad_norm": 1.0982720789156573,
      "learning_rate": 4.941229076713709e-06,
      "loss": 0.1999,
      "step": 1256
    },
    {
      "epoch": 0.11581517482839637,
      "grad_norm": 0.9286929084767928,
      "learning_rate": 4.9410647570280156e-06,
      "loss": 0.1699,
      "step": 1257
    },
    {
      "epoch": 0.11590731100566637,
      "grad_norm": 1.0284887867762864,
      "learning_rate": 4.940900210689196e-06,
      "loss": 0.1965,
      "step": 1258
    },
    {
      "epoch": 0.11599944718293638,
      "grad_norm": 1.0434528337767535,
      "learning_rate": 4.94073543771253e-06,
      "loss": 0.1971,
      "step": 1259
    },
    {
      "epoch": 0.11609158336020639,
      "grad_norm": 0.9721395364025261,
      "learning_rate": 4.940570438113315e-06,
      "loss": 0.1916,
      "step": 1260
    },
    {
      "epoch": 0.11618371953747639,
      "grad_norm": 1.026835672769356,
      "learning_rate": 4.940405211906872e-06,
      "loss": 0.19,
      "step": 1261
    },
    {
      "epoch": 0.1162758557147464,
      "grad_norm": 0.983921489168916,
      "learning_rate": 4.9402397591085435e-06,
      "loss": 0.1901,
      "step": 1262
    },
    {
      "epoch": 0.1163679918920164,
      "grad_norm": 1.006030490851194,
      "learning_rate": 4.94007407973369e-06,
      "loss": 0.2099,
      "step": 1263
    },
    {
      "epoch": 0.1164601280692864,
      "grad_norm": 0.946916078921123,
      "learning_rate": 4.939908173797696e-06,
      "loss": 0.1796,
      "step": 1264
    },
    {
      "epoch": 0.11655226424655642,
      "grad_norm": 0.9186429153859957,
      "learning_rate": 4.939742041315964e-06,
      "loss": 0.1764,
      "step": 1265
    },
    {
      "epoch": 0.11664440042382641,
      "grad_norm": 1.0162264460794295,
      "learning_rate": 4.939575682303923e-06,
      "loss": 0.1946,
      "step": 1266
    },
    {
      "epoch": 0.11673653660109642,
      "grad_norm": 0.939647214901781,
      "learning_rate": 4.939409096777017e-06,
      "loss": 0.1816,
      "step": 1267
    },
    {
      "epoch": 0.11682867277836642,
      "grad_norm": 1.0127923569806176,
      "learning_rate": 4.939242284750712e-06,
      "loss": 0.1787,
      "step": 1268
    },
    {
      "epoch": 0.11692080895563643,
      "grad_norm": 0.9696949793271437,
      "learning_rate": 4.9390752462405e-06,
      "loss": 0.182,
      "step": 1269
    },
    {
      "epoch": 0.11701294513290643,
      "grad_norm": 1.0188460893399238,
      "learning_rate": 4.938907981261889e-06,
      "loss": 0.1925,
      "step": 1270
    },
    {
      "epoch": 0.11710508131017644,
      "grad_norm": 1.047535048711083,
      "learning_rate": 4.938740489830409e-06,
      "loss": 0.1858,
      "step": 1271
    },
    {
      "epoch": 0.11719721748744645,
      "grad_norm": 0.9839924846788682,
      "learning_rate": 4.938572771961612e-06,
      "loss": 0.193,
      "step": 1272
    },
    {
      "epoch": 0.11728935366471645,
      "grad_norm": 1.0763836514639684,
      "learning_rate": 4.93840482767107e-06,
      "loss": 0.1942,
      "step": 1273
    },
    {
      "epoch": 0.11738148984198646,
      "grad_norm": 0.9660646463485411,
      "learning_rate": 4.938236656974378e-06,
      "loss": 0.1703,
      "step": 1274
    },
    {
      "epoch": 0.11747362601925646,
      "grad_norm": 0.9776665253257636,
      "learning_rate": 4.9380682598871505e-06,
      "loss": 0.1821,
      "step": 1275
    },
    {
      "epoch": 0.11756576219652647,
      "grad_norm": 1.002267232847209,
      "learning_rate": 4.937899636425022e-06,
      "loss": 0.2,
      "step": 1276
    },
    {
      "epoch": 0.11765789837379648,
      "grad_norm": 0.9378913381320999,
      "learning_rate": 4.9377307866036506e-06,
      "loss": 0.1895,
      "step": 1277
    },
    {
      "epoch": 0.11775003455106647,
      "grad_norm": 0.9960120707110922,
      "learning_rate": 4.9375617104387124e-06,
      "loss": 0.1887,
      "step": 1278
    },
    {
      "epoch": 0.11784217072833648,
      "grad_norm": 1.0253843069745778,
      "learning_rate": 4.9373924079459076e-06,
      "loss": 0.1895,
      "step": 1279
    },
    {
      "epoch": 0.11793430690560648,
      "grad_norm": 0.9694205674452752,
      "learning_rate": 4.937222879140955e-06,
      "loss": 0.1948,
      "step": 1280
    },
    {
      "epoch": 0.11802644308287649,
      "grad_norm": 0.9340661317572511,
      "learning_rate": 4.937053124039597e-06,
      "loss": 0.1793,
      "step": 1281
    },
    {
      "epoch": 0.1181185792601465,
      "grad_norm": 1.0174735224747125,
      "learning_rate": 4.9368831426575925e-06,
      "loss": 0.182,
      "step": 1282
    },
    {
      "epoch": 0.1182107154374165,
      "grad_norm": 0.9767629316407481,
      "learning_rate": 4.9367129350107265e-06,
      "loss": 0.1837,
      "step": 1283
    },
    {
      "epoch": 0.11830285161468651,
      "grad_norm": 1.054908188076334,
      "learning_rate": 4.936542501114803e-06,
      "loss": 0.1852,
      "step": 1284
    },
    {
      "epoch": 0.11839498779195651,
      "grad_norm": 0.9595958456149299,
      "learning_rate": 4.936371840985645e-06,
      "loss": 0.17,
      "step": 1285
    },
    {
      "epoch": 0.11848712396922652,
      "grad_norm": 1.0916556011220866,
      "learning_rate": 4.9362009546391e-06,
      "loss": 0.1717,
      "step": 1286
    },
    {
      "epoch": 0.11857926014649652,
      "grad_norm": 1.0532332905680384,
      "learning_rate": 4.9360298420910335e-06,
      "loss": 0.1647,
      "step": 1287
    },
    {
      "epoch": 0.11867139632376653,
      "grad_norm": 1.069006714331314,
      "learning_rate": 4.935858503357335e-06,
      "loss": 0.1872,
      "step": 1288
    },
    {
      "epoch": 0.11876353250103654,
      "grad_norm": 1.0146730910464414,
      "learning_rate": 4.935686938453912e-06,
      "loss": 0.19,
      "step": 1289
    },
    {
      "epoch": 0.11885566867830653,
      "grad_norm": 1.1013136967926964,
      "learning_rate": 4.935515147396695e-06,
      "loss": 0.1942,
      "step": 1290
    },
    {
      "epoch": 0.11894780485557654,
      "grad_norm": 1.022490231526319,
      "learning_rate": 4.935343130201633e-06,
      "loss": 0.1719,
      "step": 1291
    },
    {
      "epoch": 0.11903994103284654,
      "grad_norm": 1.0736875280791962,
      "learning_rate": 4.935170886884701e-06,
      "loss": 0.1981,
      "step": 1292
    },
    {
      "epoch": 0.11913207721011655,
      "grad_norm": 1.0171104588512216,
      "learning_rate": 4.934998417461888e-06,
      "loss": 0.1832,
      "step": 1293
    },
    {
      "epoch": 0.11922421338738656,
      "grad_norm": 0.984348887268898,
      "learning_rate": 4.9348257219492116e-06,
      "loss": 0.1683,
      "step": 1294
    },
    {
      "epoch": 0.11931634956465656,
      "grad_norm": 1.0769453299087037,
      "learning_rate": 4.934652800362704e-06,
      "loss": 0.2061,
      "step": 1295
    },
    {
      "epoch": 0.11940848574192657,
      "grad_norm": 1.0409966331483476,
      "learning_rate": 4.934479652718422e-06,
      "loss": 0.1865,
      "step": 1296
    },
    {
      "epoch": 0.11950062191919657,
      "grad_norm": 1.046150491665991,
      "learning_rate": 4.934306279032442e-06,
      "loss": 0.1836,
      "step": 1297
    },
    {
      "epoch": 0.11959275809646658,
      "grad_norm": 1.0300935796451935,
      "learning_rate": 4.934132679320863e-06,
      "loss": 0.1818,
      "step": 1298
    },
    {
      "epoch": 0.11968489427373659,
      "grad_norm": 1.0223005476376819,
      "learning_rate": 4.933958853599803e-06,
      "loss": 0.2019,
      "step": 1299
    },
    {
      "epoch": 0.11977703045100659,
      "grad_norm": 0.9953052620305401,
      "learning_rate": 4.9337848018854005e-06,
      "loss": 0.191,
      "step": 1300
    },
    {
      "epoch": 0.1198691666282766,
      "grad_norm": 0.9167793138450512,
      "learning_rate": 4.933610524193817e-06,
      "loss": 0.1575,
      "step": 1301
    },
    {
      "epoch": 0.1199613028055466,
      "grad_norm": 0.9501468070448875,
      "learning_rate": 4.933436020541235e-06,
      "loss": 0.1935,
      "step": 1302
    },
    {
      "epoch": 0.1200534389828166,
      "grad_norm": 0.9908154738642798,
      "learning_rate": 4.933261290943856e-06,
      "loss": 0.1634,
      "step": 1303
    },
    {
      "epoch": 0.1201455751600866,
      "grad_norm": 1.0252708980694958,
      "learning_rate": 4.933086335417905e-06,
      "loss": 0.1912,
      "step": 1304
    },
    {
      "epoch": 0.12023771133735661,
      "grad_norm": 0.964216175527814,
      "learning_rate": 4.932911153979626e-06,
      "loss": 0.1788,
      "step": 1305
    },
    {
      "epoch": 0.12032984751462662,
      "grad_norm": 0.9947350149300663,
      "learning_rate": 4.932735746645284e-06,
      "loss": 0.1872,
      "step": 1306
    },
    {
      "epoch": 0.12042198369189662,
      "grad_norm": 1.0222442690970204,
      "learning_rate": 4.9325601134311665e-06,
      "loss": 0.1716,
      "step": 1307
    },
    {
      "epoch": 0.12051411986916663,
      "grad_norm": 0.9714762563929501,
      "learning_rate": 4.932384254353581e-06,
      "loss": 0.2076,
      "step": 1308
    },
    {
      "epoch": 0.12060625604643663,
      "grad_norm": 1.0023546871765083,
      "learning_rate": 4.932208169428855e-06,
      "loss": 0.1822,
      "step": 1309
    },
    {
      "epoch": 0.12069839222370664,
      "grad_norm": 0.9454582559482887,
      "learning_rate": 4.932031858673338e-06,
      "loss": 0.1814,
      "step": 1310
    },
    {
      "epoch": 0.12079052840097665,
      "grad_norm": 0.9685169957367546,
      "learning_rate": 4.931855322103403e-06,
      "loss": 0.1932,
      "step": 1311
    },
    {
      "epoch": 0.12088266457824665,
      "grad_norm": 0.9805185074459721,
      "learning_rate": 4.9316785597354385e-06,
      "loss": 0.1805,
      "step": 1312
    },
    {
      "epoch": 0.12097480075551666,
      "grad_norm": 0.9804392769335055,
      "learning_rate": 4.931501571585858e-06,
      "loss": 0.1845,
      "step": 1313
    },
    {
      "epoch": 0.12106693693278665,
      "grad_norm": 1.00322430997519,
      "learning_rate": 4.931324357671095e-06,
      "loss": 0.1851,
      "step": 1314
    },
    {
      "epoch": 0.12115907311005666,
      "grad_norm": 0.9328218448764897,
      "learning_rate": 4.931146918007604e-06,
      "loss": 0.1836,
      "step": 1315
    },
    {
      "epoch": 0.12125120928732668,
      "grad_norm": 0.9179723457446634,
      "learning_rate": 4.93096925261186e-06,
      "loss": 0.1784,
      "step": 1316
    },
    {
      "epoch": 0.12134334546459667,
      "grad_norm": 1.0730735318120903,
      "learning_rate": 4.930791361500359e-06,
      "loss": 0.1995,
      "step": 1317
    },
    {
      "epoch": 0.12143548164186668,
      "grad_norm": 1.0073445260759302,
      "learning_rate": 4.930613244689618e-06,
      "loss": 0.1876,
      "step": 1318
    },
    {
      "epoch": 0.12152761781913668,
      "grad_norm": 0.9666047869313055,
      "learning_rate": 4.930434902196177e-06,
      "loss": 0.1844,
      "step": 1319
    },
    {
      "epoch": 0.12161975399640669,
      "grad_norm": 0.9566473238654624,
      "learning_rate": 4.930256334036593e-06,
      "loss": 0.1834,
      "step": 1320
    },
    {
      "epoch": 0.12171189017367669,
      "grad_norm": 1.0697107399386463,
      "learning_rate": 4.930077540227447e-06,
      "loss": 0.1792,
      "step": 1321
    },
    {
      "epoch": 0.1218040263509467,
      "grad_norm": 0.9209255293099364,
      "learning_rate": 4.92989852078534e-06,
      "loss": 0.1636,
      "step": 1322
    },
    {
      "epoch": 0.12189616252821671,
      "grad_norm": 1.1125165716627952,
      "learning_rate": 4.929719275726893e-06,
      "loss": 0.1838,
      "step": 1323
    },
    {
      "epoch": 0.1219882987054867,
      "grad_norm": 0.9940504687221988,
      "learning_rate": 4.9295398050687505e-06,
      "loss": 0.1737,
      "step": 1324
    },
    {
      "epoch": 0.12208043488275672,
      "grad_norm": 1.0103196274970314,
      "learning_rate": 4.929360108827575e-06,
      "loss": 0.1867,
      "step": 1325
    },
    {
      "epoch": 0.12217257106002671,
      "grad_norm": 1.0723418698938951,
      "learning_rate": 4.929180187020053e-06,
      "loss": 0.1873,
      "step": 1326
    },
    {
      "epoch": 0.12226470723729672,
      "grad_norm": 0.9596676997934364,
      "learning_rate": 4.9290000396628875e-06,
      "loss": 0.1845,
      "step": 1327
    },
    {
      "epoch": 0.12235684341456673,
      "grad_norm": 0.9962848010523647,
      "learning_rate": 4.928819666772808e-06,
      "loss": 0.1789,
      "step": 1328
    },
    {
      "epoch": 0.12244897959183673,
      "grad_norm": 1.0363419257999569,
      "learning_rate": 4.9286390683665615e-06,
      "loss": 0.1886,
      "step": 1329
    },
    {
      "epoch": 0.12254111576910674,
      "grad_norm": 0.9278551228569101,
      "learning_rate": 4.9284582444609156e-06,
      "loss": 0.1816,
      "step": 1330
    },
    {
      "epoch": 0.12263325194637674,
      "grad_norm": 1.0265744616787111,
      "learning_rate": 4.9282771950726605e-06,
      "loss": 0.1864,
      "step": 1331
    },
    {
      "epoch": 0.12272538812364675,
      "grad_norm": 0.9152490785424434,
      "learning_rate": 4.928095920218606e-06,
      "loss": 0.1797,
      "step": 1332
    },
    {
      "epoch": 0.12281752430091676,
      "grad_norm": 0.8831438931297253,
      "learning_rate": 4.927914419915585e-06,
      "loss": 0.1858,
      "step": 1333
    },
    {
      "epoch": 0.12290966047818676,
      "grad_norm": 0.9599196224749852,
      "learning_rate": 4.927732694180448e-06,
      "loss": 0.1894,
      "step": 1334
    },
    {
      "epoch": 0.12300179665545677,
      "grad_norm": 0.9345601055819366,
      "learning_rate": 4.9275507430300694e-06,
      "loss": 0.19,
      "step": 1335
    },
    {
      "epoch": 0.12309393283272677,
      "grad_norm": 0.9531446635108866,
      "learning_rate": 4.927368566481343e-06,
      "loss": 0.1768,
      "step": 1336
    },
    {
      "epoch": 0.12318606900999678,
      "grad_norm": 0.8954454298056077,
      "learning_rate": 4.927186164551184e-06,
      "loss": 0.1661,
      "step": 1337
    },
    {
      "epoch": 0.12327820518726679,
      "grad_norm": 1.0406211772874898,
      "learning_rate": 4.927003537256528e-06,
      "loss": 0.1896,
      "step": 1338
    },
    {
      "epoch": 0.12337034136453678,
      "grad_norm": 0.9427072884318984,
      "learning_rate": 4.926820684614333e-06,
      "loss": 0.1783,
      "step": 1339
    },
    {
      "epoch": 0.1234624775418068,
      "grad_norm": 1.063398218693465,
      "learning_rate": 4.9266376066415764e-06,
      "loss": 0.2,
      "step": 1340
    },
    {
      "epoch": 0.12355461371907679,
      "grad_norm": 1.014467337241986,
      "learning_rate": 4.926454303355256e-06,
      "loss": 0.1778,
      "step": 1341
    },
    {
      "epoch": 0.1236467498963468,
      "grad_norm": 0.9867211771794864,
      "learning_rate": 4.926270774772392e-06,
      "loss": 0.1868,
      "step": 1342
    },
    {
      "epoch": 0.1237388860736168,
      "grad_norm": 0.9314537671161427,
      "learning_rate": 4.926087020910027e-06,
      "loss": 0.1689,
      "step": 1343
    },
    {
      "epoch": 0.12383102225088681,
      "grad_norm": 0.9945656614235373,
      "learning_rate": 4.925903041785221e-06,
      "loss": 0.1915,
      "step": 1344
    },
    {
      "epoch": 0.12392315842815682,
      "grad_norm": 1.054070147484718,
      "learning_rate": 4.925718837415055e-06,
      "loss": 0.1874,
      "step": 1345
    },
    {
      "epoch": 0.12401529460542682,
      "grad_norm": 1.003037762311901,
      "learning_rate": 4.925534407816634e-06,
      "loss": 0.1769,
      "step": 1346
    },
    {
      "epoch": 0.12410743078269683,
      "grad_norm": 0.9398314757426698,
      "learning_rate": 4.925349753007083e-06,
      "loss": 0.1737,
      "step": 1347
    },
    {
      "epoch": 0.12419956695996683,
      "grad_norm": 1.0109341517521457,
      "learning_rate": 4.925164873003546e-06,
      "loss": 0.184,
      "step": 1348
    },
    {
      "epoch": 0.12429170313723684,
      "grad_norm": 1.045989030595084,
      "learning_rate": 4.92497976782319e-06,
      "loss": 0.1924,
      "step": 1349
    },
    {
      "epoch": 0.12438383931450685,
      "grad_norm": 0.9613180803344651,
      "learning_rate": 4.924794437483202e-06,
      "loss": 0.1723,
      "step": 1350
    },
    {
      "epoch": 0.12447597549177684,
      "grad_norm": 1.0326785619529568,
      "learning_rate": 4.924608882000789e-06,
      "loss": 0.1987,
      "step": 1351
    },
    {
      "epoch": 0.12456811166904685,
      "grad_norm": 0.9158713928345703,
      "learning_rate": 4.92442310139318e-06,
      "loss": 0.1765,
      "step": 1352
    },
    {
      "epoch": 0.12466024784631685,
      "grad_norm": 0.9681906696968335,
      "learning_rate": 4.924237095677625e-06,
      "loss": 0.1783,
      "step": 1353
    },
    {
      "epoch": 0.12475238402358686,
      "grad_norm": 1.0316433171773627,
      "learning_rate": 4.924050864871396e-06,
      "loss": 0.191,
      "step": 1354
    },
    {
      "epoch": 0.12484452020085687,
      "grad_norm": 1.0732753608787855,
      "learning_rate": 4.923864408991782e-06,
      "loss": 0.1873,
      "step": 1355
    },
    {
      "epoch": 0.12493665637812687,
      "grad_norm": 1.0115088181506764,
      "learning_rate": 4.923677728056098e-06,
      "loss": 0.184,
      "step": 1356
    },
    {
      "epoch": 0.12502879255539687,
      "grad_norm": 0.9831921194489737,
      "learning_rate": 4.923490822081675e-06,
      "loss": 0.1831,
      "step": 1357
    },
    {
      "epoch": 0.1251209287326669,
      "grad_norm": 0.9368075813037634,
      "learning_rate": 4.923303691085869e-06,
      "loss": 0.1716,
      "step": 1358
    },
    {
      "epoch": 0.1252130649099369,
      "grad_norm": 0.9158976927466699,
      "learning_rate": 4.9231163350860535e-06,
      "loss": 0.1734,
      "step": 1359
    },
    {
      "epoch": 0.12530520108720689,
      "grad_norm": 0.9316616396852874,
      "learning_rate": 4.922928754099626e-06,
      "loss": 0.1706,
      "step": 1360
    },
    {
      "epoch": 0.1253973372644769,
      "grad_norm": 1.1250088621018242,
      "learning_rate": 4.9227409481440034e-06,
      "loss": 0.2038,
      "step": 1361
    },
    {
      "epoch": 0.1254894734417469,
      "grad_norm": 0.9675671851618894,
      "learning_rate": 4.922552917236622e-06,
      "loss": 0.1746,
      "step": 1362
    },
    {
      "epoch": 0.1255816096190169,
      "grad_norm": 0.9821306778029524,
      "learning_rate": 4.922364661394943e-06,
      "loss": 0.1879,
      "step": 1363
    },
    {
      "epoch": 0.1256737457962869,
      "grad_norm": 0.896728621906593,
      "learning_rate": 4.922176180636443e-06,
      "loss": 0.1632,
      "step": 1364
    },
    {
      "epoch": 0.12576588197355693,
      "grad_norm": 0.9697621684195716,
      "learning_rate": 4.921987474978626e-06,
      "loss": 0.1888,
      "step": 1365
    },
    {
      "epoch": 0.12585801815082692,
      "grad_norm": 0.9522967075080769,
      "learning_rate": 4.921798544439009e-06,
      "loss": 0.1875,
      "step": 1366
    },
    {
      "epoch": 0.12595015432809692,
      "grad_norm": 1.051166642356011,
      "learning_rate": 4.921609389035138e-06,
      "loss": 0.1997,
      "step": 1367
    },
    {
      "epoch": 0.12604229050536694,
      "grad_norm": 1.0744627934977156,
      "learning_rate": 4.921420008784573e-06,
      "loss": 0.1905,
      "step": 1368
    },
    {
      "epoch": 0.12613442668263694,
      "grad_norm": 0.9960260593456565,
      "learning_rate": 4.9212304037049015e-06,
      "loss": 0.1829,
      "step": 1369
    },
    {
      "epoch": 0.12622656285990694,
      "grad_norm": 1.0202692128949833,
      "learning_rate": 4.921040573813726e-06,
      "loss": 0.1732,
      "step": 1370
    },
    {
      "epoch": 0.12631869903717693,
      "grad_norm": 1.0365842879051643,
      "learning_rate": 4.9208505191286714e-06,
      "loss": 0.1855,
      "step": 1371
    },
    {
      "epoch": 0.12641083521444696,
      "grad_norm": 1.1591917314275566,
      "learning_rate": 4.920660239667387e-06,
      "loss": 0.209,
      "step": 1372
    },
    {
      "epoch": 0.12650297139171696,
      "grad_norm": 0.9993800116845046,
      "learning_rate": 4.920469735447538e-06,
      "loss": 0.1883,
      "step": 1373
    },
    {
      "epoch": 0.12659510756898695,
      "grad_norm": 0.9608088730057035,
      "learning_rate": 4.920279006486815e-06,
      "loss": 0.1812,
      "step": 1374
    },
    {
      "epoch": 0.12668724374625698,
      "grad_norm": 0.9592626621524494,
      "learning_rate": 4.920088052802924e-06,
      "loss": 0.1908,
      "step": 1375
    },
    {
      "epoch": 0.12677937992352697,
      "grad_norm": 0.9979910451476817,
      "learning_rate": 4.919896874413597e-06,
      "loss": 0.1756,
      "step": 1376
    },
    {
      "epoch": 0.12687151610079697,
      "grad_norm": 0.9705279536103502,
      "learning_rate": 4.919705471336585e-06,
      "loss": 0.1696,
      "step": 1377
    },
    {
      "epoch": 0.126963652278067,
      "grad_norm": 1.02529637426661,
      "learning_rate": 4.919513843589661e-06,
      "loss": 0.1979,
      "step": 1378
    },
    {
      "epoch": 0.127055788455337,
      "grad_norm": 0.9709462058488644,
      "learning_rate": 4.919321991190614e-06,
      "loss": 0.1803,
      "step": 1379
    },
    {
      "epoch": 0.127147924632607,
      "grad_norm": 1.072594825084783,
      "learning_rate": 4.919129914157261e-06,
      "loss": 0.1933,
      "step": 1380
    },
    {
      "epoch": 0.127240060809877,
      "grad_norm": 1.1100361520927327,
      "learning_rate": 4.918937612507435e-06,
      "loss": 0.1769,
      "step": 1381
    },
    {
      "epoch": 0.127332196987147,
      "grad_norm": 0.9545515898210492,
      "learning_rate": 4.918745086258992e-06,
      "loss": 0.1717,
      "step": 1382
    },
    {
      "epoch": 0.127424333164417,
      "grad_norm": 0.9027879802968097,
      "learning_rate": 4.918552335429806e-06,
      "loss": 0.1644,
      "step": 1383
    },
    {
      "epoch": 0.127516469341687,
      "grad_norm": 0.9607434821952252,
      "learning_rate": 4.918359360037776e-06,
      "loss": 0.1759,
      "step": 1384
    },
    {
      "epoch": 0.12760860551895703,
      "grad_norm": 1.0212850726584626,
      "learning_rate": 4.918166160100819e-06,
      "loss": 0.1868,
      "step": 1385
    },
    {
      "epoch": 0.12770074169622703,
      "grad_norm": 0.9851648999704009,
      "learning_rate": 4.917972735636875e-06,
      "loss": 0.1792,
      "step": 1386
    },
    {
      "epoch": 0.12779287787349702,
      "grad_norm": 0.9860306797823638,
      "learning_rate": 4.9177790866639005e-06,
      "loss": 0.1576,
      "step": 1387
    },
    {
      "epoch": 0.12788501405076702,
      "grad_norm": 0.9616676315179897,
      "learning_rate": 4.917585213199878e-06,
      "loss": 0.1748,
      "step": 1388
    },
    {
      "epoch": 0.12797715022803705,
      "grad_norm": 1.0519830739673464,
      "learning_rate": 4.9173911152628095e-06,
      "loss": 0.202,
      "step": 1389
    },
    {
      "epoch": 0.12806928640530704,
      "grad_norm": 1.0777062892635907,
      "learning_rate": 4.917196792870715e-06,
      "loss": 0.1892,
      "step": 1390
    },
    {
      "epoch": 0.12816142258257704,
      "grad_norm": 0.9991283645825584,
      "learning_rate": 4.917002246041638e-06,
      "loss": 0.2017,
      "step": 1391
    },
    {
      "epoch": 0.12825355875984706,
      "grad_norm": 1.0345531811177093,
      "learning_rate": 4.916807474793643e-06,
      "loss": 0.2083,
      "step": 1392
    },
    {
      "epoch": 0.12834569493711706,
      "grad_norm": 0.9962307459016241,
      "learning_rate": 4.916612479144812e-06,
      "loss": 0.1898,
      "step": 1393
    },
    {
      "epoch": 0.12843783111438706,
      "grad_norm": 0.9750941523903573,
      "learning_rate": 4.916417259113254e-06,
      "loss": 0.186,
      "step": 1394
    },
    {
      "epoch": 0.12852996729165708,
      "grad_norm": 1.0734279770420394,
      "learning_rate": 4.916221814717092e-06,
      "loss": 0.1941,
      "step": 1395
    },
    {
      "epoch": 0.12862210346892708,
      "grad_norm": 1.1087277465598613,
      "learning_rate": 4.916026145974476e-06,
      "loss": 0.1863,
      "step": 1396
    },
    {
      "epoch": 0.12871423964619708,
      "grad_norm": 0.970503983339328,
      "learning_rate": 4.915830252903572e-06,
      "loss": 0.1809,
      "step": 1397
    },
    {
      "epoch": 0.12880637582346707,
      "grad_norm": 1.0270549073779653,
      "learning_rate": 4.915634135522569e-06,
      "loss": 0.1737,
      "step": 1398
    },
    {
      "epoch": 0.1288985120007371,
      "grad_norm": 1.017718130375212,
      "learning_rate": 4.915437793849676e-06,
      "loss": 0.1936,
      "step": 1399
    },
    {
      "epoch": 0.1289906481780071,
      "grad_norm": 0.9258354769873366,
      "learning_rate": 4.915241227903125e-06,
      "loss": 0.176,
      "step": 1400
    },
    {
      "epoch": 0.1290827843552771,
      "grad_norm": 1.1632022523711172,
      "learning_rate": 4.915044437701165e-06,
      "loss": 0.193,
      "step": 1401
    },
    {
      "epoch": 0.12917492053254712,
      "grad_norm": 0.9909882624273187,
      "learning_rate": 4.914847423262069e-06,
      "loss": 0.1772,
      "step": 1402
    },
    {
      "epoch": 0.1292670567098171,
      "grad_norm": 1.0227967032568668,
      "learning_rate": 4.9146501846041304e-06,
      "loss": 0.1813,
      "step": 1403
    },
    {
      "epoch": 0.1293591928870871,
      "grad_norm": 1.023350428596177,
      "learning_rate": 4.914452721745662e-06,
      "loss": 0.1751,
      "step": 1404
    },
    {
      "epoch": 0.1294513290643571,
      "grad_norm": 1.0953277903910312,
      "learning_rate": 4.914255034704998e-06,
      "loss": 0.2063,
      "step": 1405
    },
    {
      "epoch": 0.12954346524162713,
      "grad_norm": 1.0076641007495757,
      "learning_rate": 4.914057123500495e-06,
      "loss": 0.1864,
      "step": 1406
    },
    {
      "epoch": 0.12963560141889713,
      "grad_norm": 1.0569146161299305,
      "learning_rate": 4.913858988150528e-06,
      "loss": 0.1817,
      "step": 1407
    },
    {
      "epoch": 0.12972773759616713,
      "grad_norm": 1.0438485163527682,
      "learning_rate": 4.9136606286734945e-06,
      "loss": 0.1638,
      "step": 1408
    },
    {
      "epoch": 0.12981987377343715,
      "grad_norm": 0.9678728108018867,
      "learning_rate": 4.913462045087811e-06,
      "loss": 0.1735,
      "step": 1409
    },
    {
      "epoch": 0.12991200995070715,
      "grad_norm": 1.0572497449704699,
      "learning_rate": 4.9132632374119185e-06,
      "loss": 0.1859,
      "step": 1410
    },
    {
      "epoch": 0.13000414612797714,
      "grad_norm": 1.1213963454815326,
      "learning_rate": 4.913064205664273e-06,
      "loss": 0.18,
      "step": 1411
    },
    {
      "epoch": 0.13009628230524717,
      "grad_norm": 1.0531509919261324,
      "learning_rate": 4.912864949863358e-06,
      "loss": 0.1701,
      "step": 1412
    },
    {
      "epoch": 0.13018841848251717,
      "grad_norm": 1.0597007287157376,
      "learning_rate": 4.912665470027671e-06,
      "loss": 0.1975,
      "step": 1413
    },
    {
      "epoch": 0.13028055465978716,
      "grad_norm": 1.0209805064275213,
      "learning_rate": 4.912465766175736e-06,
      "loss": 0.1686,
      "step": 1414
    },
    {
      "epoch": 0.13037269083705716,
      "grad_norm": 0.9354404787118893,
      "learning_rate": 4.912265838326095e-06,
      "loss": 0.1649,
      "step": 1415
    },
    {
      "epoch": 0.13046482701432718,
      "grad_norm": 0.9685000508425466,
      "learning_rate": 4.912065686497312e-06,
      "loss": 0.1792,
      "step": 1416
    },
    {
      "epoch": 0.13055696319159718,
      "grad_norm": 0.9793173080999195,
      "learning_rate": 4.91186531070797e-06,
      "loss": 0.1733,
      "step": 1417
    },
    {
      "epoch": 0.13064909936886718,
      "grad_norm": 1.0267034852588974,
      "learning_rate": 4.911664710976674e-06,
      "loss": 0.1911,
      "step": 1418
    },
    {
      "epoch": 0.1307412355461372,
      "grad_norm": 1.0679568316502386,
      "learning_rate": 4.91146388732205e-06,
      "loss": 0.1959,
      "step": 1419
    },
    {
      "epoch": 0.1308333717234072,
      "grad_norm": 1.043916463767155,
      "learning_rate": 4.911262839762745e-06,
      "loss": 0.1844,
      "step": 1420
    },
    {
      "epoch": 0.1309255079006772,
      "grad_norm": 1.1426541467661104,
      "learning_rate": 4.911061568317425e-06,
      "loss": 0.1839,
      "step": 1421
    },
    {
      "epoch": 0.1310176440779472,
      "grad_norm": 1.0670712794431116,
      "learning_rate": 4.910860073004779e-06,
      "loss": 0.1893,
      "step": 1422
    },
    {
      "epoch": 0.13110978025521722,
      "grad_norm": 1.0217657650798537,
      "learning_rate": 4.910658353843517e-06,
      "loss": 0.1757,
      "step": 1423
    },
    {
      "epoch": 0.13120191643248721,
      "grad_norm": 1.042824076262155,
      "learning_rate": 4.910456410852367e-06,
      "loss": 0.163,
      "step": 1424
    },
    {
      "epoch": 0.1312940526097572,
      "grad_norm": 0.956885833442343,
      "learning_rate": 4.91025424405008e-06,
      "loss": 0.1723,
      "step": 1425
    },
    {
      "epoch": 0.13138618878702724,
      "grad_norm": 1.1449966036167125,
      "learning_rate": 4.910051853455426e-06,
      "loss": 0.1911,
      "step": 1426
    },
    {
      "epoch": 0.13147832496429723,
      "grad_norm": 1.036287110180777,
      "learning_rate": 4.909849239087199e-06,
      "loss": 0.1988,
      "step": 1427
    },
    {
      "epoch": 0.13157046114156723,
      "grad_norm": 0.9948309049567811,
      "learning_rate": 4.90964640096421e-06,
      "loss": 0.1951,
      "step": 1428
    },
    {
      "epoch": 0.13166259731883725,
      "grad_norm": 1.0844421392628185,
      "learning_rate": 4.9094433391052935e-06,
      "loss": 0.1812,
      "step": 1429
    },
    {
      "epoch": 0.13175473349610725,
      "grad_norm": 1.0495914214075481,
      "learning_rate": 4.909240053529304e-06,
      "loss": 0.1985,
      "step": 1430
    },
    {
      "epoch": 0.13184686967337725,
      "grad_norm": 0.9651180837150052,
      "learning_rate": 4.909036544255116e-06,
      "loss": 0.1702,
      "step": 1431
    },
    {
      "epoch": 0.13193900585064725,
      "grad_norm": 0.9955966957325579,
      "learning_rate": 4.908832811301626e-06,
      "loss": 0.1781,
      "step": 1432
    },
    {
      "epoch": 0.13203114202791727,
      "grad_norm": 1.0035279832126653,
      "learning_rate": 4.90862885468775e-06,
      "loss": 0.1743,
      "step": 1433
    },
    {
      "epoch": 0.13212327820518727,
      "grad_norm": 1.0358847851511948,
      "learning_rate": 4.908424674432425e-06,
      "loss": 0.1895,
      "step": 1434
    },
    {
      "epoch": 0.13221541438245726,
      "grad_norm": 0.9875222025539104,
      "learning_rate": 4.908220270554611e-06,
      "loss": 0.1809,
      "step": 1435
    },
    {
      "epoch": 0.1323075505597273,
      "grad_norm": 1.0278508432913862,
      "learning_rate": 4.908015643073285e-06,
      "loss": 0.1833,
      "step": 1436
    },
    {
      "epoch": 0.13239968673699729,
      "grad_norm": 1.0273581077755105,
      "learning_rate": 4.907810792007447e-06,
      "loss": 0.1984,
      "step": 1437
    },
    {
      "epoch": 0.13249182291426728,
      "grad_norm": 1.0560009720633898,
      "learning_rate": 4.907605717376118e-06,
      "loss": 0.1864,
      "step": 1438
    },
    {
      "epoch": 0.13258395909153728,
      "grad_norm": 0.9626044030041234,
      "learning_rate": 4.90740041919834e-06,
      "loss": 0.1758,
      "step": 1439
    },
    {
      "epoch": 0.1326760952688073,
      "grad_norm": 0.9672689247849761,
      "learning_rate": 4.907194897493173e-06,
      "loss": 0.1771,
      "step": 1440
    },
    {
      "epoch": 0.1327682314460773,
      "grad_norm": 1.0450504004609606,
      "learning_rate": 4.906989152279701e-06,
      "loss": 0.1905,
      "step": 1441
    },
    {
      "epoch": 0.1328603676233473,
      "grad_norm": 0.9482587626895994,
      "learning_rate": 4.9067831835770275e-06,
      "loss": 0.1738,
      "step": 1442
    },
    {
      "epoch": 0.13295250380061732,
      "grad_norm": 0.9463239494076845,
      "learning_rate": 4.906576991404276e-06,
      "loss": 0.1854,
      "step": 1443
    },
    {
      "epoch": 0.13304463997788732,
      "grad_norm": 1.0061874594493472,
      "learning_rate": 4.9063705757805915e-06,
      "loss": 0.1985,
      "step": 1444
    },
    {
      "epoch": 0.13313677615515732,
      "grad_norm": 0.9068357394587006,
      "learning_rate": 4.906163936725141e-06,
      "loss": 0.1595,
      "step": 1445
    },
    {
      "epoch": 0.13322891233242734,
      "grad_norm": 1.011124096617946,
      "learning_rate": 4.905957074257109e-06,
      "loss": 0.1716,
      "step": 1446
    },
    {
      "epoch": 0.13332104850969734,
      "grad_norm": 0.9635712912469494,
      "learning_rate": 4.905749988395704e-06,
      "loss": 0.1686,
      "step": 1447
    },
    {
      "epoch": 0.13341318468696733,
      "grad_norm": 0.9726084759736766,
      "learning_rate": 4.905542679160155e-06,
      "loss": 0.1682,
      "step": 1448
    },
    {
      "epoch": 0.13350532086423733,
      "grad_norm": 0.9982505363951202,
      "learning_rate": 4.905335146569707e-06,
      "loss": 0.1972,
      "step": 1449
    },
    {
      "epoch": 0.13359745704150736,
      "grad_norm": 1.0309683229900235,
      "learning_rate": 4.9051273906436335e-06,
      "loss": 0.1786,
      "step": 1450
    },
    {
      "epoch": 0.13368959321877735,
      "grad_norm": 1.014497711913621,
      "learning_rate": 4.904919411401222e-06,
      "loss": 0.1753,
      "step": 1451
    },
    {
      "epoch": 0.13378172939604735,
      "grad_norm": 0.9494034749604296,
      "learning_rate": 4.9047112088617855e-06,
      "loss": 0.1685,
      "step": 1452
    },
    {
      "epoch": 0.13387386557331737,
      "grad_norm": 0.9533159259651824,
      "learning_rate": 4.904502783044654e-06,
      "loss": 0.183,
      "step": 1453
    },
    {
      "epoch": 0.13396600175058737,
      "grad_norm": 1.0198352992862363,
      "learning_rate": 4.90429413396918e-06,
      "loss": 0.1997,
      "step": 1454
    },
    {
      "epoch": 0.13405813792785737,
      "grad_norm": 1.028575773022091,
      "learning_rate": 4.904085261654736e-06,
      "loss": 0.1874,
      "step": 1455
    },
    {
      "epoch": 0.13415027410512737,
      "grad_norm": 0.9643745703997726,
      "learning_rate": 4.903876166120718e-06,
      "loss": 0.186,
      "step": 1456
    },
    {
      "epoch": 0.1342424102823974,
      "grad_norm": 0.9918455781983645,
      "learning_rate": 4.903666847386539e-06,
      "loss": 0.1926,
      "step": 1457
    },
    {
      "epoch": 0.1343345464596674,
      "grad_norm": 1.0310036777983294,
      "learning_rate": 4.903457305471635e-06,
      "loss": 0.1888,
      "step": 1458
    },
    {
      "epoch": 0.13442668263693738,
      "grad_norm": 0.9804790007901808,
      "learning_rate": 4.90324754039546e-06,
      "loss": 0.1899,
      "step": 1459
    },
    {
      "epoch": 0.1345188188142074,
      "grad_norm": 0.873722915764326,
      "learning_rate": 4.903037552177494e-06,
      "loss": 0.1743,
      "step": 1460
    },
    {
      "epoch": 0.1346109549914774,
      "grad_norm": 0.9519299025740255,
      "learning_rate": 4.9028273408372315e-06,
      "loss": 0.1809,
      "step": 1461
    },
    {
      "epoch": 0.1347030911687474,
      "grad_norm": 0.9612320511182931,
      "learning_rate": 4.902616906394193e-06,
      "loss": 0.1636,
      "step": 1462
    },
    {
      "epoch": 0.13479522734601743,
      "grad_norm": 0.9937787642498676,
      "learning_rate": 4.9024062488679145e-06,
      "loss": 0.1743,
      "step": 1463
    },
    {
      "epoch": 0.13488736352328742,
      "grad_norm": 0.8938554298239793,
      "learning_rate": 4.9021953682779585e-06,
      "loss": 0.1561,
      "step": 1464
    },
    {
      "epoch": 0.13497949970055742,
      "grad_norm": 1.0398410959428166,
      "learning_rate": 4.901984264643904e-06,
      "loss": 0.1925,
      "step": 1465
    },
    {
      "epoch": 0.13507163587782742,
      "grad_norm": 1.0310209853854573,
      "learning_rate": 4.9017729379853515e-06,
      "loss": 0.1992,
      "step": 1466
    },
    {
      "epoch": 0.13516377205509744,
      "grad_norm": 1.0207343294570042,
      "learning_rate": 4.901561388321923e-06,
      "loss": 0.2045,
      "step": 1467
    },
    {
      "epoch": 0.13525590823236744,
      "grad_norm": 0.87864572441413,
      "learning_rate": 4.901349615673262e-06,
      "loss": 0.1572,
      "step": 1468
    },
    {
      "epoch": 0.13534804440963744,
      "grad_norm": 0.9196847849987159,
      "learning_rate": 4.90113762005903e-06,
      "loss": 0.1802,
      "step": 1469
    },
    {
      "epoch": 0.13544018058690746,
      "grad_norm": 0.9578454364771498,
      "learning_rate": 4.900925401498912e-06,
      "loss": 0.1858,
      "step": 1470
    },
    {
      "epoch": 0.13553231676417746,
      "grad_norm": 0.9488263656936775,
      "learning_rate": 4.900712960012612e-06,
      "loss": 0.1801,
      "step": 1471
    },
    {
      "epoch": 0.13562445294144745,
      "grad_norm": 0.9320142456071285,
      "learning_rate": 4.900500295619855e-06,
      "loss": 0.1808,
      "step": 1472
    },
    {
      "epoch": 0.13571658911871745,
      "grad_norm": 0.8749696112915272,
      "learning_rate": 4.900287408340387e-06,
      "loss": 0.1707,
      "step": 1473
    },
    {
      "epoch": 0.13580872529598748,
      "grad_norm": 0.9555507551898182,
      "learning_rate": 4.900074298193976e-06,
      "loss": 0.1826,
      "step": 1474
    },
    {
      "epoch": 0.13590086147325747,
      "grad_norm": 1.0200858753072042,
      "learning_rate": 4.899860965200407e-06,
      "loss": 0.1936,
      "step": 1475
    },
    {
      "epoch": 0.13599299765052747,
      "grad_norm": 1.046105205148264,
      "learning_rate": 4.89964740937949e-06,
      "loss": 0.1949,
      "step": 1476
    },
    {
      "epoch": 0.1360851338277975,
      "grad_norm": 0.9703684004608017,
      "learning_rate": 4.899433630751052e-06,
      "loss": 0.1812,
      "step": 1477
    },
    {
      "epoch": 0.1361772700050675,
      "grad_norm": 0.9358632265946936,
      "learning_rate": 4.8992196293349435e-06,
      "loss": 0.1692,
      "step": 1478
    },
    {
      "epoch": 0.1362694061823375,
      "grad_norm": 0.9421548138334379,
      "learning_rate": 4.899005405151034e-06,
      "loss": 0.173,
      "step": 1479
    },
    {
      "epoch": 0.1363615423596075,
      "grad_norm": 0.9874060354260804,
      "learning_rate": 4.898790958219215e-06,
      "loss": 0.1917,
      "step": 1480
    },
    {
      "epoch": 0.1364536785368775,
      "grad_norm": 1.0241768866081442,
      "learning_rate": 4.898576288559396e-06,
      "loss": 0.194,
      "step": 1481
    },
    {
      "epoch": 0.1365458147141475,
      "grad_norm": 0.967676077385443,
      "learning_rate": 4.898361396191512e-06,
      "loss": 0.1869,
      "step": 1482
    },
    {
      "epoch": 0.1366379508914175,
      "grad_norm": 1.063157649624893,
      "learning_rate": 4.898146281135514e-06,
      "loss": 0.1878,
      "step": 1483
    },
    {
      "epoch": 0.13673008706868753,
      "grad_norm": 1.0213338083271282,
      "learning_rate": 4.8979309434113745e-06,
      "loss": 0.171,
      "step": 1484
    },
    {
      "epoch": 0.13682222324595752,
      "grad_norm": 0.9752201110126685,
      "learning_rate": 4.89771538303909e-06,
      "loss": 0.176,
      "step": 1485
    },
    {
      "epoch": 0.13691435942322752,
      "grad_norm": 0.9992722204337339,
      "learning_rate": 4.897499600038673e-06,
      "loss": 0.1763,
      "step": 1486
    },
    {
      "epoch": 0.13700649560049755,
      "grad_norm": 0.9673495125040701,
      "learning_rate": 4.8972835944301615e-06,
      "loss": 0.181,
      "step": 1487
    },
    {
      "epoch": 0.13709863177776754,
      "grad_norm": 0.9172227987836397,
      "learning_rate": 4.89706736623361e-06,
      "loss": 0.1687,
      "step": 1488
    },
    {
      "epoch": 0.13719076795503754,
      "grad_norm": 0.9179738386385932,
      "learning_rate": 4.896850915469095e-06,
      "loss": 0.1708,
      "step": 1489
    },
    {
      "epoch": 0.13728290413230754,
      "grad_norm": 1.0866865421153868,
      "learning_rate": 4.896634242156715e-06,
      "loss": 0.1981,
      "step": 1490
    },
    {
      "epoch": 0.13737504030957756,
      "grad_norm": 1.1181657985966733,
      "learning_rate": 4.896417346316587e-06,
      "loss": 0.2006,
      "step": 1491
    },
    {
      "epoch": 0.13746717648684756,
      "grad_norm": 0.958948999492823,
      "learning_rate": 4.8962002279688514e-06,
      "loss": 0.176,
      "step": 1492
    },
    {
      "epoch": 0.13755931266411756,
      "grad_norm": 1.0453082102751459,
      "learning_rate": 4.8959828871336665e-06,
      "loss": 0.1824,
      "step": 1493
    },
    {
      "epoch": 0.13765144884138758,
      "grad_norm": 1.0704601468240302,
      "learning_rate": 4.895765323831212e-06,
      "loss": 0.1798,
      "step": 1494
    },
    {
      "epoch": 0.13774358501865758,
      "grad_norm": 1.046104125736917,
      "learning_rate": 4.895547538081691e-06,
      "loss": 0.1865,
      "step": 1495
    },
    {
      "epoch": 0.13783572119592757,
      "grad_norm": 0.9953218833475491,
      "learning_rate": 4.895329529905322e-06,
      "loss": 0.1798,
      "step": 1496
    },
    {
      "epoch": 0.1379278573731976,
      "grad_norm": 0.9993516892546119,
      "learning_rate": 4.895111299322348e-06,
      "loss": 0.1779,
      "step": 1497
    },
    {
      "epoch": 0.1380199935504676,
      "grad_norm": 1.1037371027487326,
      "learning_rate": 4.894892846353032e-06,
      "loss": 0.1718,
      "step": 1498
    },
    {
      "epoch": 0.1381121297277376,
      "grad_norm": 1.1042776255985305,
      "learning_rate": 4.8946741710176584e-06,
      "loss": 0.1769,
      "step": 1499
    },
    {
      "epoch": 0.1382042659050076,
      "grad_norm": 1.0159415033155752,
      "learning_rate": 4.894455273336531e-06,
      "loss": 0.1963,
      "step": 1500
    },
    {
      "epoch": 0.1382042659050076,
      "eval_loss": 0.1832522302865982,
      "eval_runtime": 300.5966,
      "eval_samples_per_second": 23.344,
      "eval_steps_per_second": 2.921,
      "step": 1500
    },
    {
      "epoch": 0.13829640208227761,
      "grad_norm": 1.0590835289628433,
      "learning_rate": 4.894236153329972e-06,
      "loss": 0.1821,
      "step": 1501
    },
    {
      "epoch": 0.1383885382595476,
      "grad_norm": 1.0055100832100328,
      "learning_rate": 4.894016811018329e-06,
      "loss": 0.164,
      "step": 1502
    },
    {
      "epoch": 0.1384806744368176,
      "grad_norm": 1.031947823782199,
      "learning_rate": 4.893797246421968e-06,
      "loss": 0.1967,
      "step": 1503
    },
    {
      "epoch": 0.13857281061408763,
      "grad_norm": 0.9897293422122585,
      "learning_rate": 4.893577459561274e-06,
      "loss": 0.1844,
      "step": 1504
    },
    {
      "epoch": 0.13866494679135763,
      "grad_norm": 1.0213676397867222,
      "learning_rate": 4.893357450456657e-06,
      "loss": 0.1896,
      "step": 1505
    },
    {
      "epoch": 0.13875708296862763,
      "grad_norm": 1.0049304145032214,
      "learning_rate": 4.893137219128542e-06,
      "loss": 0.1719,
      "step": 1506
    },
    {
      "epoch": 0.13884921914589762,
      "grad_norm": 0.9605931957652998,
      "learning_rate": 4.892916765597378e-06,
      "loss": 0.1735,
      "step": 1507
    },
    {
      "epoch": 0.13894135532316765,
      "grad_norm": 1.09891001647981,
      "learning_rate": 4.892696089883636e-06,
      "loss": 0.2017,
      "step": 1508
    },
    {
      "epoch": 0.13903349150043764,
      "grad_norm": 1.0245227870445939,
      "learning_rate": 4.8924751920078045e-06,
      "loss": 0.1845,
      "step": 1509
    },
    {
      "epoch": 0.13912562767770764,
      "grad_norm": 0.9744899746698248,
      "learning_rate": 4.892254071990393e-06,
      "loss": 0.1673,
      "step": 1510
    },
    {
      "epoch": 0.13921776385497767,
      "grad_norm": 1.007033072682871,
      "learning_rate": 4.892032729851934e-06,
      "loss": 0.1638,
      "step": 1511
    },
    {
      "epoch": 0.13930990003224766,
      "grad_norm": 1.0239978553664408,
      "learning_rate": 4.891811165612979e-06,
      "loss": 0.2006,
      "step": 1512
    },
    {
      "epoch": 0.13940203620951766,
      "grad_norm": 0.9688980878806478,
      "learning_rate": 4.8915893792941e-06,
      "loss": 0.1741,
      "step": 1513
    },
    {
      "epoch": 0.13949417238678768,
      "grad_norm": 1.0124497462752957,
      "learning_rate": 4.891367370915889e-06,
      "loss": 0.1853,
      "step": 1514
    },
    {
      "epoch": 0.13958630856405768,
      "grad_norm": 0.9443507545978798,
      "learning_rate": 4.89114514049896e-06,
      "loss": 0.1853,
      "step": 1515
    },
    {
      "epoch": 0.13967844474132768,
      "grad_norm": 0.8944876191974487,
      "learning_rate": 4.890922688063949e-06,
      "loss": 0.1713,
      "step": 1516
    },
    {
      "epoch": 0.13977058091859768,
      "grad_norm": 0.9278038889701954,
      "learning_rate": 4.8907000136315075e-06,
      "loss": 0.159,
      "step": 1517
    },
    {
      "epoch": 0.1398627170958677,
      "grad_norm": 0.9544401348206267,
      "learning_rate": 4.890477117222313e-06,
      "loss": 0.1646,
      "step": 1518
    },
    {
      "epoch": 0.1399548532731377,
      "grad_norm": 0.993320836333389,
      "learning_rate": 4.890253998857061e-06,
      "loss": 0.1712,
      "step": 1519
    },
    {
      "epoch": 0.1400469894504077,
      "grad_norm": 0.9672368448981169,
      "learning_rate": 4.890030658556467e-06,
      "loss": 0.1763,
      "step": 1520
    },
    {
      "epoch": 0.14013912562767772,
      "grad_norm": 1.0007096297433211,
      "learning_rate": 4.88980709634127e-06,
      "loss": 0.1778,
      "step": 1521
    },
    {
      "epoch": 0.14023126180494772,
      "grad_norm": 1.0380589027334104,
      "learning_rate": 4.889583312232227e-06,
      "loss": 0.2014,
      "step": 1522
    },
    {
      "epoch": 0.1403233979822177,
      "grad_norm": 1.0112883370951264,
      "learning_rate": 4.889359306250117e-06,
      "loss": 0.173,
      "step": 1523
    },
    {
      "epoch": 0.1404155341594877,
      "grad_norm": 0.9158439308113651,
      "learning_rate": 4.889135078415736e-06,
      "loss": 0.1703,
      "step": 1524
    },
    {
      "epoch": 0.14050767033675773,
      "grad_norm": 0.971656273295498,
      "learning_rate": 4.888910628749908e-06,
      "loss": 0.2035,
      "step": 1525
    },
    {
      "epoch": 0.14059980651402773,
      "grad_norm": 1.028981671131926,
      "learning_rate": 4.88868595727347e-06,
      "loss": 0.1804,
      "step": 1526
    },
    {
      "epoch": 0.14069194269129773,
      "grad_norm": 0.9897401268515336,
      "learning_rate": 4.888461064007284e-06,
      "loss": 0.1767,
      "step": 1527
    },
    {
      "epoch": 0.14078407886856775,
      "grad_norm": 0.9659819889789746,
      "learning_rate": 4.888235948972232e-06,
      "loss": 0.1853,
      "step": 1528
    },
    {
      "epoch": 0.14087621504583775,
      "grad_norm": 0.9421444160040869,
      "learning_rate": 4.888010612189213e-06,
      "loss": 0.1643,
      "step": 1529
    },
    {
      "epoch": 0.14096835122310775,
      "grad_norm": 1.0162138437056278,
      "learning_rate": 4.8877850536791535e-06,
      "loss": 0.191,
      "step": 1530
    },
    {
      "epoch": 0.14106048740037777,
      "grad_norm": 1.074012467266447,
      "learning_rate": 4.887559273462994e-06,
      "loss": 0.1941,
      "step": 1531
    },
    {
      "epoch": 0.14115262357764777,
      "grad_norm": 0.9711479225310997,
      "learning_rate": 4.8873332715617e-06,
      "loss": 0.1845,
      "step": 1532
    },
    {
      "epoch": 0.14124475975491776,
      "grad_norm": 1.0096948886121502,
      "learning_rate": 4.887107047996253e-06,
      "loss": 0.1911,
      "step": 1533
    },
    {
      "epoch": 0.14133689593218776,
      "grad_norm": 0.9516594505504503,
      "learning_rate": 4.886880602787661e-06,
      "loss": 0.1763,
      "step": 1534
    },
    {
      "epoch": 0.1414290321094578,
      "grad_norm": 0.9870978246474578,
      "learning_rate": 4.886653935956949e-06,
      "loss": 0.172,
      "step": 1535
    },
    {
      "epoch": 0.14152116828672778,
      "grad_norm": 0.9766472869579116,
      "learning_rate": 4.88642704752516e-06,
      "loss": 0.1664,
      "step": 1536
    },
    {
      "epoch": 0.14161330446399778,
      "grad_norm": 0.8996626934665198,
      "learning_rate": 4.886199937513365e-06,
      "loss": 0.1725,
      "step": 1537
    },
    {
      "epoch": 0.1417054406412678,
      "grad_norm": 1.1275854369973244,
      "learning_rate": 4.885972605942647e-06,
      "loss": 0.1811,
      "step": 1538
    },
    {
      "epoch": 0.1417975768185378,
      "grad_norm": 0.9686314114387246,
      "learning_rate": 4.8857450528341166e-06,
      "loss": 0.1725,
      "step": 1539
    },
    {
      "epoch": 0.1418897129958078,
      "grad_norm": 0.9318170462863459,
      "learning_rate": 4.8855172782089015e-06,
      "loss": 0.1632,
      "step": 1540
    },
    {
      "epoch": 0.1419818491730778,
      "grad_norm": 0.9707586029191749,
      "learning_rate": 4.88528928208815e-06,
      "loss": 0.1759,
      "step": 1541
    },
    {
      "epoch": 0.14207398535034782,
      "grad_norm": 1.0456594221249906,
      "learning_rate": 4.885061064493033e-06,
      "loss": 0.1786,
      "step": 1542
    },
    {
      "epoch": 0.14216612152761782,
      "grad_norm": 0.9707787392031242,
      "learning_rate": 4.884832625444738e-06,
      "loss": 0.1732,
      "step": 1543
    },
    {
      "epoch": 0.1422582577048878,
      "grad_norm": 0.9532507142755162,
      "learning_rate": 4.8846039649644785e-06,
      "loss": 0.1662,
      "step": 1544
    },
    {
      "epoch": 0.14235039388215784,
      "grad_norm": 0.9159276555645566,
      "learning_rate": 4.884375083073483e-06,
      "loss": 0.1715,
      "step": 1545
    },
    {
      "epoch": 0.14244253005942784,
      "grad_norm": 1.0166336431813243,
      "learning_rate": 4.8841459797930045e-06,
      "loss": 0.1841,
      "step": 1546
    },
    {
      "epoch": 0.14253466623669783,
      "grad_norm": 0.9938953514211266,
      "learning_rate": 4.8839166551443165e-06,
      "loss": 0.1917,
      "step": 1547
    },
    {
      "epoch": 0.14262680241396786,
      "grad_norm": 0.9985060362734772,
      "learning_rate": 4.883687109148709e-06,
      "loss": 0.1909,
      "step": 1548
    },
    {
      "epoch": 0.14271893859123785,
      "grad_norm": 0.9480753155210944,
      "learning_rate": 4.883457341827498e-06,
      "loss": 0.1634,
      "step": 1549
    },
    {
      "epoch": 0.14281107476850785,
      "grad_norm": 1.044475869424449,
      "learning_rate": 4.883227353202016e-06,
      "loss": 0.1905,
      "step": 1550
    },
    {
      "epoch": 0.14290321094577785,
      "grad_norm": 0.9751713676071151,
      "learning_rate": 4.882997143293617e-06,
      "loss": 0.1766,
      "step": 1551
    },
    {
      "epoch": 0.14299534712304787,
      "grad_norm": 1.0616405307473622,
      "learning_rate": 4.882766712123677e-06,
      "loss": 0.177,
      "step": 1552
    },
    {
      "epoch": 0.14308748330031787,
      "grad_norm": 0.9891993285446687,
      "learning_rate": 4.882536059713592e-06,
      "loss": 0.1902,
      "step": 1553
    },
    {
      "epoch": 0.14317961947758787,
      "grad_norm": 0.9812316312540941,
      "learning_rate": 4.882305186084777e-06,
      "loss": 0.1828,
      "step": 1554
    },
    {
      "epoch": 0.1432717556548579,
      "grad_norm": 1.0198101696782014,
      "learning_rate": 4.88207409125867e-06,
      "loss": 0.1784,
      "step": 1555
    },
    {
      "epoch": 0.1433638918321279,
      "grad_norm": 0.9787051486382536,
      "learning_rate": 4.881842775256726e-06,
      "loss": 0.1802,
      "step": 1556
    },
    {
      "epoch": 0.14345602800939788,
      "grad_norm": 0.9757898719198366,
      "learning_rate": 4.8816112381004245e-06,
      "loss": 0.1722,
      "step": 1557
    },
    {
      "epoch": 0.14354816418666788,
      "grad_norm": 0.994552243999101,
      "learning_rate": 4.881379479811263e-06,
      "loss": 0.183,
      "step": 1558
    },
    {
      "epoch": 0.1436403003639379,
      "grad_norm": 0.9879186122580522,
      "learning_rate": 4.881147500410761e-06,
      "loss": 0.1727,
      "step": 1559
    },
    {
      "epoch": 0.1437324365412079,
      "grad_norm": 0.9389846554223322,
      "learning_rate": 4.880915299920457e-06,
      "loss": 0.183,
      "step": 1560
    },
    {
      "epoch": 0.1438245727184779,
      "grad_norm": 0.9176926081504023,
      "learning_rate": 4.8806828783619106e-06,
      "loss": 0.1648,
      "step": 1561
    },
    {
      "epoch": 0.14391670889574792,
      "grad_norm": 1.0177887444327505,
      "learning_rate": 4.880450235756704e-06,
      "loss": 0.1858,
      "step": 1562
    },
    {
      "epoch": 0.14400884507301792,
      "grad_norm": 0.8823529515319729,
      "learning_rate": 4.880217372126436e-06,
      "loss": 0.1709,
      "step": 1563
    },
    {
      "epoch": 0.14410098125028792,
      "grad_norm": 0.9499161520652284,
      "learning_rate": 4.8799842874927285e-06,
      "loss": 0.1833,
      "step": 1564
    },
    {
      "epoch": 0.14419311742755794,
      "grad_norm": 0.9768941581657758,
      "learning_rate": 4.879750981877224e-06,
      "loss": 0.1751,
      "step": 1565
    },
    {
      "epoch": 0.14428525360482794,
      "grad_norm": 1.0382432327832614,
      "learning_rate": 4.879517455301585e-06,
      "loss": 0.1696,
      "step": 1566
    },
    {
      "epoch": 0.14437738978209794,
      "grad_norm": 1.0226195818276123,
      "learning_rate": 4.8792837077874945e-06,
      "loss": 0.1897,
      "step": 1567
    },
    {
      "epoch": 0.14446952595936793,
      "grad_norm": 0.9536502285629268,
      "learning_rate": 4.8790497393566546e-06,
      "loss": 0.1905,
      "step": 1568
    },
    {
      "epoch": 0.14456166213663796,
      "grad_norm": 0.9933028554235266,
      "learning_rate": 4.878815550030792e-06,
      "loss": 0.1772,
      "step": 1569
    },
    {
      "epoch": 0.14465379831390796,
      "grad_norm": 0.94927888214594,
      "learning_rate": 4.878581139831649e-06,
      "loss": 0.174,
      "step": 1570
    },
    {
      "epoch": 0.14474593449117795,
      "grad_norm": 0.9734027162264917,
      "learning_rate": 4.87834650878099e-06,
      "loss": 0.1878,
      "step": 1571
    },
    {
      "epoch": 0.14483807066844798,
      "grad_norm": 0.9927226017155047,
      "learning_rate": 4.8781116569006026e-06,
      "loss": 0.1783,
      "step": 1572
    },
    {
      "epoch": 0.14493020684571797,
      "grad_norm": 0.8755924113255371,
      "learning_rate": 4.877876584212292e-06,
      "loss": 0.1789,
      "step": 1573
    },
    {
      "epoch": 0.14502234302298797,
      "grad_norm": 0.8908355930191686,
      "learning_rate": 4.8776412907378845e-06,
      "loss": 0.1659,
      "step": 1574
    },
    {
      "epoch": 0.14511447920025797,
      "grad_norm": 0.9964829044139177,
      "learning_rate": 4.8774057764992275e-06,
      "loss": 0.1903,
      "step": 1575
    },
    {
      "epoch": 0.145206615377528,
      "grad_norm": 1.0087199814403942,
      "learning_rate": 4.877170041518187e-06,
      "loss": 0.1846,
      "step": 1576
    },
    {
      "epoch": 0.145298751554798,
      "grad_norm": 0.9427168363296254,
      "learning_rate": 4.876934085816654e-06,
      "loss": 0.1805,
      "step": 1577
    },
    {
      "epoch": 0.14539088773206799,
      "grad_norm": 1.0114845794628744,
      "learning_rate": 4.8766979094165346e-06,
      "loss": 0.1767,
      "step": 1578
    },
    {
      "epoch": 0.145483023909338,
      "grad_norm": 0.9339557450945638,
      "learning_rate": 4.8764615123397584e-06,
      "loss": 0.1773,
      "step": 1579
    },
    {
      "epoch": 0.145575160086608,
      "grad_norm": 1.0022634915094149,
      "learning_rate": 4.876224894608275e-06,
      "loss": 0.1836,
      "step": 1580
    },
    {
      "epoch": 0.145667296263878,
      "grad_norm": 1.0173079709967716,
      "learning_rate": 4.875988056244055e-06,
      "loss": 0.1951,
      "step": 1581
    },
    {
      "epoch": 0.14575943244114803,
      "grad_norm": 0.9166566621649066,
      "learning_rate": 4.875750997269088e-06,
      "loss": 0.1713,
      "step": 1582
    },
    {
      "epoch": 0.14585156861841803,
      "grad_norm": 0.9612055526441895,
      "learning_rate": 4.875513717705385e-06,
      "loss": 0.1803,
      "step": 1583
    },
    {
      "epoch": 0.14594370479568802,
      "grad_norm": 0.9483501489894158,
      "learning_rate": 4.875276217574978e-06,
      "loss": 0.1804,
      "step": 1584
    },
    {
      "epoch": 0.14603584097295802,
      "grad_norm": 0.9754513204683902,
      "learning_rate": 4.875038496899919e-06,
      "loss": 0.1776,
      "step": 1585
    },
    {
      "epoch": 0.14612797715022804,
      "grad_norm": 0.9887240406962738,
      "learning_rate": 4.874800555702278e-06,
      "loss": 0.1859,
      "step": 1586
    },
    {
      "epoch": 0.14622011332749804,
      "grad_norm": 0.9269334563736277,
      "learning_rate": 4.874562394004152e-06,
      "loss": 0.1852,
      "step": 1587
    },
    {
      "epoch": 0.14631224950476804,
      "grad_norm": 0.9301846330078121,
      "learning_rate": 4.874324011827651e-06,
      "loss": 0.1743,
      "step": 1588
    },
    {
      "epoch": 0.14640438568203806,
      "grad_norm": 1.0003398362769642,
      "learning_rate": 4.874085409194911e-06,
      "loss": 0.1961,
      "step": 1589
    },
    {
      "epoch": 0.14649652185930806,
      "grad_norm": 0.9124366367357205,
      "learning_rate": 4.873846586128083e-06,
      "loss": 0.1683,
      "step": 1590
    },
    {
      "epoch": 0.14658865803657806,
      "grad_norm": 0.9337326845444719,
      "learning_rate": 4.873607542649347e-06,
      "loss": 0.1814,
      "step": 1591
    },
    {
      "epoch": 0.14668079421384805,
      "grad_norm": 0.9707004361697791,
      "learning_rate": 4.873368278780893e-06,
      "loss": 0.1835,
      "step": 1592
    },
    {
      "epoch": 0.14677293039111808,
      "grad_norm": 0.9567094098967206,
      "learning_rate": 4.87312879454494e-06,
      "loss": 0.1687,
      "step": 1593
    },
    {
      "epoch": 0.14686506656838808,
      "grad_norm": 1.0388349235769987,
      "learning_rate": 4.872889089963723e-06,
      "loss": 0.1999,
      "step": 1594
    },
    {
      "epoch": 0.14695720274565807,
      "grad_norm": 1.0055808616791115,
      "learning_rate": 4.872649165059497e-06,
      "loss": 0.196,
      "step": 1595
    },
    {
      "epoch": 0.1470493389229281,
      "grad_norm": 0.9344422199590832,
      "learning_rate": 4.872409019854543e-06,
      "loss": 0.1674,
      "step": 1596
    },
    {
      "epoch": 0.1471414751001981,
      "grad_norm": 0.9358621830893651,
      "learning_rate": 4.872168654371155e-06,
      "loss": 0.1735,
      "step": 1597
    },
    {
      "epoch": 0.1472336112774681,
      "grad_norm": 0.9787417589126662,
      "learning_rate": 4.8719280686316524e-06,
      "loss": 0.1662,
      "step": 1598
    },
    {
      "epoch": 0.14732574745473812,
      "grad_norm": 0.969455208958826,
      "learning_rate": 4.871687262658373e-06,
      "loss": 0.1887,
      "step": 1599
    },
    {
      "epoch": 0.1474178836320081,
      "grad_norm": 0.9719521093017446,
      "learning_rate": 4.871446236473676e-06,
      "loss": 0.1807,
      "step": 1600
    },
    {
      "epoch": 0.1475100198092781,
      "grad_norm": 0.967409307452388,
      "learning_rate": 4.871204990099941e-06,
      "loss": 0.1689,
      "step": 1601
    },
    {
      "epoch": 0.1476021559865481,
      "grad_norm": 1.0137520395644053,
      "learning_rate": 4.870963523559567e-06,
      "loss": 0.1768,
      "step": 1602
    },
    {
      "epoch": 0.14769429216381813,
      "grad_norm": 1.0032661099951032,
      "learning_rate": 4.8707218368749755e-06,
      "loss": 0.2019,
      "step": 1603
    },
    {
      "epoch": 0.14778642834108813,
      "grad_norm": 0.994483855416401,
      "learning_rate": 4.870479930068607e-06,
      "loss": 0.1846,
      "step": 1604
    },
    {
      "epoch": 0.14787856451835812,
      "grad_norm": 0.9323151361272326,
      "learning_rate": 4.8702378031629204e-06,
      "loss": 0.1703,
      "step": 1605
    },
    {
      "epoch": 0.14797070069562815,
      "grad_norm": 0.9392494774290172,
      "learning_rate": 4.869995456180399e-06,
      "loss": 0.1724,
      "step": 1606
    },
    {
      "epoch": 0.14806283687289815,
      "grad_norm": 0.9840474087404582,
      "learning_rate": 4.869752889143544e-06,
      "loss": 0.1831,
      "step": 1607
    },
    {
      "epoch": 0.14815497305016814,
      "grad_norm": 0.9347889910562127,
      "learning_rate": 4.8695101020748796e-06,
      "loss": 0.1707,
      "step": 1608
    },
    {
      "epoch": 0.14824710922743817,
      "grad_norm": 0.9683681475620743,
      "learning_rate": 4.869267094996946e-06,
      "loss": 0.1821,
      "step": 1609
    },
    {
      "epoch": 0.14833924540470816,
      "grad_norm": 0.951231654892894,
      "learning_rate": 4.869023867932309e-06,
      "loss": 0.173,
      "step": 1610
    },
    {
      "epoch": 0.14843138158197816,
      "grad_norm": 0.9654930888260544,
      "learning_rate": 4.868780420903549e-06,
      "loss": 0.1802,
      "step": 1611
    },
    {
      "epoch": 0.14852351775924816,
      "grad_norm": 1.0105698517481636,
      "learning_rate": 4.868536753933273e-06,
      "loss": 0.1892,
      "step": 1612
    },
    {
      "epoch": 0.14861565393651818,
      "grad_norm": 0.9740568467052928,
      "learning_rate": 4.868292867044104e-06,
      "loss": 0.1802,
      "step": 1613
    },
    {
      "epoch": 0.14870779011378818,
      "grad_norm": 1.0226350980028478,
      "learning_rate": 4.868048760258688e-06,
      "loss": 0.1893,
      "step": 1614
    },
    {
      "epoch": 0.14879992629105818,
      "grad_norm": 0.952735105485474,
      "learning_rate": 4.86780443359969e-06,
      "loss": 0.1895,
      "step": 1615
    },
    {
      "epoch": 0.1488920624683282,
      "grad_norm": 0.9784818281571142,
      "learning_rate": 4.8675598870897945e-06,
      "loss": 0.1775,
      "step": 1616
    },
    {
      "epoch": 0.1489841986455982,
      "grad_norm": 0.9473189174777866,
      "learning_rate": 4.86731512075171e-06,
      "loss": 0.1766,
      "step": 1617
    },
    {
      "epoch": 0.1490763348228682,
      "grad_norm": 1.0432332046226518,
      "learning_rate": 4.86707013460816e-06,
      "loss": 0.1785,
      "step": 1618
    },
    {
      "epoch": 0.1491684710001382,
      "grad_norm": 1.036504291535368,
      "learning_rate": 4.866824928681895e-06,
      "loss": 0.1896,
      "step": 1619
    },
    {
      "epoch": 0.14926060717740822,
      "grad_norm": 1.0094182586165745,
      "learning_rate": 4.86657950299568e-06,
      "loss": 0.1908,
      "step": 1620
    },
    {
      "epoch": 0.1493527433546782,
      "grad_norm": 0.9844012875372663,
      "learning_rate": 4.866333857572303e-06,
      "loss": 0.1854,
      "step": 1621
    },
    {
      "epoch": 0.1494448795319482,
      "grad_norm": 0.9374114123884193,
      "learning_rate": 4.866087992434573e-06,
      "loss": 0.157,
      "step": 1622
    },
    {
      "epoch": 0.14953701570921824,
      "grad_norm": 1.006479066028266,
      "learning_rate": 4.865841907605319e-06,
      "loss": 0.1824,
      "step": 1623
    },
    {
      "epoch": 0.14962915188648823,
      "grad_norm": 0.9427763406675067,
      "learning_rate": 4.865595603107388e-06,
      "loss": 0.1777,
      "step": 1624
    },
    {
      "epoch": 0.14972128806375823,
      "grad_norm": 1.0306848153573909,
      "learning_rate": 4.865349078963652e-06,
      "loss": 0.1883,
      "step": 1625
    },
    {
      "epoch": 0.14981342424102825,
      "grad_norm": 1.0514858020888211,
      "learning_rate": 4.865102335196999e-06,
      "loss": 0.1851,
      "step": 1626
    },
    {
      "epoch": 0.14990556041829825,
      "grad_norm": 0.9753039893941937,
      "learning_rate": 4.8648553718303386e-06,
      "loss": 0.1826,
      "step": 1627
    },
    {
      "epoch": 0.14999769659556825,
      "grad_norm": 1.0084449553216943,
      "learning_rate": 4.864608188886603e-06,
      "loss": 0.1822,
      "step": 1628
    },
    {
      "epoch": 0.15008983277283824,
      "grad_norm": 1.006986138104111,
      "learning_rate": 4.8643607863887435e-06,
      "loss": 0.1828,
      "step": 1629
    },
    {
      "epoch": 0.15018196895010827,
      "grad_norm": 1.0297786557131936,
      "learning_rate": 4.8641131643597294e-06,
      "loss": 0.2041,
      "step": 1630
    },
    {
      "epoch": 0.15027410512737827,
      "grad_norm": 1.0037871234057218,
      "learning_rate": 4.863865322822553e-06,
      "loss": 0.1904,
      "step": 1631
    },
    {
      "epoch": 0.15036624130464826,
      "grad_norm": 0.9211265793923229,
      "learning_rate": 4.863617261800229e-06,
      "loss": 0.1853,
      "step": 1632
    },
    {
      "epoch": 0.1504583774819183,
      "grad_norm": 0.9938628142088426,
      "learning_rate": 4.863368981315786e-06,
      "loss": 0.1869,
      "step": 1633
    },
    {
      "epoch": 0.15055051365918828,
      "grad_norm": 1.017354171445112,
      "learning_rate": 4.86312048139228e-06,
      "loss": 0.1867,
      "step": 1634
    },
    {
      "epoch": 0.15064264983645828,
      "grad_norm": 0.9333495575889209,
      "learning_rate": 4.862871762052782e-06,
      "loss": 0.1667,
      "step": 1635
    },
    {
      "epoch": 0.15073478601372828,
      "grad_norm": 0.9518180727935168,
      "learning_rate": 4.862622823320388e-06,
      "loss": 0.1788,
      "step": 1636
    },
    {
      "epoch": 0.1508269221909983,
      "grad_norm": 1.009626987099678,
      "learning_rate": 4.862373665218209e-06,
      "loss": 0.1648,
      "step": 1637
    },
    {
      "epoch": 0.1509190583682683,
      "grad_norm": 0.9908827014313232,
      "learning_rate": 4.862124287769382e-06,
      "loss": 0.1888,
      "step": 1638
    },
    {
      "epoch": 0.1510111945455383,
      "grad_norm": 0.9522038223620974,
      "learning_rate": 4.86187469099706e-06,
      "loss": 0.1736,
      "step": 1639
    },
    {
      "epoch": 0.15110333072280832,
      "grad_norm": 1.0418156159165777,
      "learning_rate": 4.861624874924419e-06,
      "loss": 0.1875,
      "step": 1640
    },
    {
      "epoch": 0.15119546690007832,
      "grad_norm": 0.9646383512472281,
      "learning_rate": 4.861374839574654e-06,
      "loss": 0.1712,
      "step": 1641
    },
    {
      "epoch": 0.15128760307734831,
      "grad_norm": 1.0021890657671846,
      "learning_rate": 4.861124584970981e-06,
      "loss": 0.193,
      "step": 1642
    },
    {
      "epoch": 0.15137973925461834,
      "grad_norm": 1.0054203275956135,
      "learning_rate": 4.860874111136637e-06,
      "loss": 0.1775,
      "step": 1643
    },
    {
      "epoch": 0.15147187543188834,
      "grad_norm": 0.9488906602949967,
      "learning_rate": 4.860623418094877e-06,
      "loss": 0.1907,
      "step": 1644
    },
    {
      "epoch": 0.15156401160915833,
      "grad_norm": 1.0123184700678247,
      "learning_rate": 4.8603725058689785e-06,
      "loss": 0.1903,
      "step": 1645
    },
    {
      "epoch": 0.15165614778642833,
      "grad_norm": 1.0996320649763636,
      "learning_rate": 4.860121374482239e-06,
      "loss": 0.1722,
      "step": 1646
    },
    {
      "epoch": 0.15174828396369835,
      "grad_norm": 0.9861341443618606,
      "learning_rate": 4.859870023957976e-06,
      "loss": 0.1835,
      "step": 1647
    },
    {
      "epoch": 0.15184042014096835,
      "grad_norm": 0.9267396471067681,
      "learning_rate": 4.8596184543195265e-06,
      "loss": 0.1555,
      "step": 1648
    },
    {
      "epoch": 0.15193255631823835,
      "grad_norm": 1.2004716263814705,
      "learning_rate": 4.859366665590251e-06,
      "loss": 0.1959,
      "step": 1649
    },
    {
      "epoch": 0.15202469249550837,
      "grad_norm": 0.968519280731306,
      "learning_rate": 4.859114657793526e-06,
      "loss": 0.1843,
      "step": 1650
    },
    {
      "epoch": 0.15211682867277837,
      "grad_norm": 0.9437339279271343,
      "learning_rate": 4.858862430952751e-06,
      "loss": 0.1683,
      "step": 1651
    },
    {
      "epoch": 0.15220896485004837,
      "grad_norm": 1.1495872644096086,
      "learning_rate": 4.858609985091345e-06,
      "loss": 0.188,
      "step": 1652
    },
    {
      "epoch": 0.15230110102731836,
      "grad_norm": 0.9283484607685915,
      "learning_rate": 4.858357320232749e-06,
      "loss": 0.1684,
      "step": 1653
    },
    {
      "epoch": 0.1523932372045884,
      "grad_norm": 0.9333600768277015,
      "learning_rate": 4.858104436400422e-06,
      "loss": 0.1672,
      "step": 1654
    },
    {
      "epoch": 0.15248537338185839,
      "grad_norm": 1.0603988968811429,
      "learning_rate": 4.857851333617844e-06,
      "loss": 0.1833,
      "step": 1655
    },
    {
      "epoch": 0.15257750955912838,
      "grad_norm": 0.8944080350738115,
      "learning_rate": 4.857598011908515e-06,
      "loss": 0.1706,
      "step": 1656
    },
    {
      "epoch": 0.1526696457363984,
      "grad_norm": 1.0031502555495742,
      "learning_rate": 4.857344471295958e-06,
      "loss": 0.186,
      "step": 1657
    },
    {
      "epoch": 0.1527617819136684,
      "grad_norm": 1.0782002303208784,
      "learning_rate": 4.857090711803713e-06,
      "loss": 0.1812,
      "step": 1658
    },
    {
      "epoch": 0.1528539180909384,
      "grad_norm": 0.9937919230014062,
      "learning_rate": 4.856836733455341e-06,
      "loss": 0.1638,
      "step": 1659
    },
    {
      "epoch": 0.15294605426820843,
      "grad_norm": 0.9962883028859182,
      "learning_rate": 4.8565825362744255e-06,
      "loss": 0.1753,
      "step": 1660
    },
    {
      "epoch": 0.15303819044547842,
      "grad_norm": 0.8589383960068907,
      "learning_rate": 4.8563281202845666e-06,
      "loss": 0.1608,
      "step": 1661
    },
    {
      "epoch": 0.15313032662274842,
      "grad_norm": 0.9811901617708566,
      "learning_rate": 4.85607348550939e-06,
      "loss": 0.1871,
      "step": 1662
    },
    {
      "epoch": 0.15322246280001842,
      "grad_norm": 0.9399582788710602,
      "learning_rate": 4.855818631972535e-06,
      "loss": 0.1832,
      "step": 1663
    },
    {
      "epoch": 0.15331459897728844,
      "grad_norm": 0.9372479897951055,
      "learning_rate": 4.855563559697668e-06,
      "loss": 0.1684,
      "step": 1664
    },
    {
      "epoch": 0.15340673515455844,
      "grad_norm": 0.9427635954759986,
      "learning_rate": 4.855308268708469e-06,
      "loss": 0.1647,
      "step": 1665
    },
    {
      "epoch": 0.15349887133182843,
      "grad_norm": 0.9627536164774635,
      "learning_rate": 4.8550527590286455e-06,
      "loss": 0.1803,
      "step": 1666
    },
    {
      "epoch": 0.15359100750909846,
      "grad_norm": 0.9739001116786681,
      "learning_rate": 4.85479703068192e-06,
      "loss": 0.168,
      "step": 1667
    },
    {
      "epoch": 0.15368314368636846,
      "grad_norm": 0.9356987155049376,
      "learning_rate": 4.854541083692036e-06,
      "loss": 0.1746,
      "step": 1668
    },
    {
      "epoch": 0.15377527986363845,
      "grad_norm": 1.0169478043112947,
      "learning_rate": 4.854284918082759e-06,
      "loss": 0.1695,
      "step": 1669
    },
    {
      "epoch": 0.15386741604090845,
      "grad_norm": 0.977947892285413,
      "learning_rate": 4.854028533877874e-06,
      "loss": 0.1782,
      "step": 1670
    },
    {
      "epoch": 0.15395955221817847,
      "grad_norm": 0.9697331096055015,
      "learning_rate": 4.8537719311011865e-06,
      "loss": 0.1767,
      "step": 1671
    },
    {
      "epoch": 0.15405168839544847,
      "grad_norm": 0.9882267562435134,
      "learning_rate": 4.853515109776522e-06,
      "loss": 0.1781,
      "step": 1672
    },
    {
      "epoch": 0.15414382457271847,
      "grad_norm": 0.9743762721224799,
      "learning_rate": 4.8532580699277256e-06,
      "loss": 0.1655,
      "step": 1673
    },
    {
      "epoch": 0.1542359607499885,
      "grad_norm": 0.8948799120507229,
      "learning_rate": 4.853000811578665e-06,
      "loss": 0.1713,
      "step": 1674
    },
    {
      "epoch": 0.1543280969272585,
      "grad_norm": 0.9685729986401584,
      "learning_rate": 4.852743334753226e-06,
      "loss": 0.1772,
      "step": 1675
    },
    {
      "epoch": 0.1544202331045285,
      "grad_norm": 1.0010420081338955,
      "learning_rate": 4.852485639475314e-06,
      "loss": 0.1827,
      "step": 1676
    },
    {
      "epoch": 0.1545123692817985,
      "grad_norm": 0.9126092389102316,
      "learning_rate": 4.852227725768857e-06,
      "loss": 0.1753,
      "step": 1677
    },
    {
      "epoch": 0.1546045054590685,
      "grad_norm": 0.9552177894021503,
      "learning_rate": 4.8519695936578045e-06,
      "loss": 0.1828,
      "step": 1678
    },
    {
      "epoch": 0.1546966416363385,
      "grad_norm": 0.9534993774757963,
      "learning_rate": 4.851711243166121e-06,
      "loss": 0.1808,
      "step": 1679
    },
    {
      "epoch": 0.1547887778136085,
      "grad_norm": 0.9956457154947068,
      "learning_rate": 4.851452674317795e-06,
      "loss": 0.1898,
      "step": 1680
    },
    {
      "epoch": 0.15488091399087853,
      "grad_norm": 0.9164339311946711,
      "learning_rate": 4.851193887136835e-06,
      "loss": 0.1635,
      "step": 1681
    },
    {
      "epoch": 0.15497305016814852,
      "grad_norm": 0.95908455887589,
      "learning_rate": 4.850934881647271e-06,
      "loss": 0.1802,
      "step": 1682
    },
    {
      "epoch": 0.15506518634541852,
      "grad_norm": 1.0414535544577295,
      "learning_rate": 4.850675657873149e-06,
      "loss": 0.2002,
      "step": 1683
    },
    {
      "epoch": 0.15515732252268855,
      "grad_norm": 0.9468141742151894,
      "learning_rate": 4.850416215838539e-06,
      "loss": 0.1796,
      "step": 1684
    },
    {
      "epoch": 0.15524945869995854,
      "grad_norm": 1.0035960741903245,
      "learning_rate": 4.850156555567531e-06,
      "loss": 0.1758,
      "step": 1685
    },
    {
      "epoch": 0.15534159487722854,
      "grad_norm": 0.9767439731982273,
      "learning_rate": 4.849896677084234e-06,
      "loss": 0.1774,
      "step": 1686
    },
    {
      "epoch": 0.15543373105449854,
      "grad_norm": 1.034221041451684,
      "learning_rate": 4.849636580412778e-06,
      "loss": 0.2034,
      "step": 1687
    },
    {
      "epoch": 0.15552586723176856,
      "grad_norm": 0.9576078272674569,
      "learning_rate": 4.849376265577312e-06,
      "loss": 0.1867,
      "step": 1688
    },
    {
      "epoch": 0.15561800340903856,
      "grad_norm": 1.0442517742292943,
      "learning_rate": 4.849115732602006e-06,
      "loss": 0.2,
      "step": 1689
    },
    {
      "epoch": 0.15571013958630855,
      "grad_norm": 0.8993671722695789,
      "learning_rate": 4.848854981511053e-06,
      "loss": 0.1634,
      "step": 1690
    },
    {
      "epoch": 0.15580227576357858,
      "grad_norm": 0.9595537498820411,
      "learning_rate": 4.848594012328661e-06,
      "loss": 0.1763,
      "step": 1691
    },
    {
      "epoch": 0.15589441194084858,
      "grad_norm": 0.9627114516540672,
      "learning_rate": 4.848332825079063e-06,
      "loss": 0.1853,
      "step": 1692
    },
    {
      "epoch": 0.15598654811811857,
      "grad_norm": 0.9229982001738194,
      "learning_rate": 4.848071419786509e-06,
      "loss": 0.1715,
      "step": 1693
    },
    {
      "epoch": 0.1560786842953886,
      "grad_norm": 0.9070563011756574,
      "learning_rate": 4.847809796475271e-06,
      "loss": 0.1743,
      "step": 1694
    },
    {
      "epoch": 0.1561708204726586,
      "grad_norm": 0.912107518148019,
      "learning_rate": 4.8475479551696405e-06,
      "loss": 0.1714,
      "step": 1695
    },
    {
      "epoch": 0.1562629566499286,
      "grad_norm": 1.031131238472556,
      "learning_rate": 4.847285895893931e-06,
      "loss": 0.1881,
      "step": 1696
    },
    {
      "epoch": 0.1563550928271986,
      "grad_norm": 0.8837742757425879,
      "learning_rate": 4.847023618672472e-06,
      "loss": 0.1628,
      "step": 1697
    },
    {
      "epoch": 0.1564472290044686,
      "grad_norm": 0.8733263411942864,
      "learning_rate": 4.846761123529618e-06,
      "loss": 0.1644,
      "step": 1698
    },
    {
      "epoch": 0.1565393651817386,
      "grad_norm": 0.9097551228910571,
      "learning_rate": 4.846498410489741e-06,
      "loss": 0.1682,
      "step": 1699
    },
    {
      "epoch": 0.1566315013590086,
      "grad_norm": 0.9915359550647295,
      "learning_rate": 4.846235479577234e-06,
      "loss": 0.1853,
      "step": 1700
    },
    {
      "epoch": 0.15672363753627863,
      "grad_norm": 1.030370760761187,
      "learning_rate": 4.845972330816511e-06,
      "loss": 0.1927,
      "step": 1701
    },
    {
      "epoch": 0.15681577371354863,
      "grad_norm": 1.0241222156942211,
      "learning_rate": 4.845708964232003e-06,
      "loss": 0.1904,
      "step": 1702
    },
    {
      "epoch": 0.15690790989081863,
      "grad_norm": 0.9367671000205519,
      "learning_rate": 4.845445379848167e-06,
      "loss": 0.1812,
      "step": 1703
    },
    {
      "epoch": 0.15700004606808862,
      "grad_norm": 0.969147645704287,
      "learning_rate": 4.845181577689474e-06,
      "loss": 0.1826,
      "step": 1704
    },
    {
      "epoch": 0.15709218224535865,
      "grad_norm": 0.914293608169453,
      "learning_rate": 4.844917557780419e-06,
      "loss": 0.1794,
      "step": 1705
    },
    {
      "epoch": 0.15718431842262864,
      "grad_norm": 1.007827722207317,
      "learning_rate": 4.844653320145517e-06,
      "loss": 0.2025,
      "step": 1706
    },
    {
      "epoch": 0.15727645459989864,
      "grad_norm": 0.9844002604666592,
      "learning_rate": 4.844388864809302e-06,
      "loss": 0.1754,
      "step": 1707
    },
    {
      "epoch": 0.15736859077716867,
      "grad_norm": 1.0507807427487061,
      "learning_rate": 4.844124191796328e-06,
      "loss": 0.1871,
      "step": 1708
    },
    {
      "epoch": 0.15746072695443866,
      "grad_norm": 1.0395668195003018,
      "learning_rate": 4.843859301131171e-06,
      "loss": 0.2118,
      "step": 1709
    },
    {
      "epoch": 0.15755286313170866,
      "grad_norm": 1.0186142609766382,
      "learning_rate": 4.843594192838425e-06,
      "loss": 0.1991,
      "step": 1710
    },
    {
      "epoch": 0.15764499930897868,
      "grad_norm": 0.9191028208060649,
      "learning_rate": 4.8433288669427055e-06,
      "loss": 0.1673,
      "step": 1711
    },
    {
      "epoch": 0.15773713548624868,
      "grad_norm": 1.0107944706696486,
      "learning_rate": 4.84306332346865e-06,
      "loss": 0.1813,
      "step": 1712
    },
    {
      "epoch": 0.15782927166351868,
      "grad_norm": 0.8938007261528311,
      "learning_rate": 4.842797562440913e-06,
      "loss": 0.1716,
      "step": 1713
    },
    {
      "epoch": 0.15792140784078867,
      "grad_norm": 0.9842512427881709,
      "learning_rate": 4.842531583884168e-06,
      "loss": 0.1797,
      "step": 1714
    },
    {
      "epoch": 0.1580135440180587,
      "grad_norm": 1.0168585547681084,
      "learning_rate": 4.842265387823115e-06,
      "loss": 0.1949,
      "step": 1715
    },
    {
      "epoch": 0.1581056801953287,
      "grad_norm": 0.9344893607216574,
      "learning_rate": 4.841998974282469e-06,
      "loss": 0.1687,
      "step": 1716
    },
    {
      "epoch": 0.1581978163725987,
      "grad_norm": 0.9569816615949233,
      "learning_rate": 4.841732343286965e-06,
      "loss": 0.1737,
      "step": 1717
    },
    {
      "epoch": 0.15828995254986872,
      "grad_norm": 0.937606627263079,
      "learning_rate": 4.841465494861362e-06,
      "loss": 0.1811,
      "step": 1718
    },
    {
      "epoch": 0.15838208872713871,
      "grad_norm": 1.0238800385610571,
      "learning_rate": 4.841198429030435e-06,
      "loss": 0.1739,
      "step": 1719
    },
    {
      "epoch": 0.1584742249044087,
      "grad_norm": 0.9619074678640244,
      "learning_rate": 4.840931145818982e-06,
      "loss": 0.1632,
      "step": 1720
    },
    {
      "epoch": 0.1585663610816787,
      "grad_norm": 0.9303762249991079,
      "learning_rate": 4.84066364525182e-06,
      "loss": 0.1721,
      "step": 1721
    },
    {
      "epoch": 0.15865849725894873,
      "grad_norm": 0.9418596387167747,
      "learning_rate": 4.8403959273537875e-06,
      "loss": 0.1739,
      "step": 1722
    },
    {
      "epoch": 0.15875063343621873,
      "grad_norm": 1.087199652929513,
      "learning_rate": 4.8401279921497405e-06,
      "loss": 0.1902,
      "step": 1723
    },
    {
      "epoch": 0.15884276961348873,
      "grad_norm": 0.9637578112894941,
      "learning_rate": 4.839859839664557e-06,
      "loss": 0.198,
      "step": 1724
    },
    {
      "epoch": 0.15893490579075875,
      "grad_norm": 0.8742090828607239,
      "learning_rate": 4.839591469923137e-06,
      "loss": 0.1676,
      "step": 1725
    },
    {
      "epoch": 0.15902704196802875,
      "grad_norm": 0.9493609600429447,
      "learning_rate": 4.8393228829503966e-06,
      "loss": 0.1838,
      "step": 1726
    },
    {
      "epoch": 0.15911917814529875,
      "grad_norm": 0.9210297680349159,
      "learning_rate": 4.839054078771275e-06,
      "loss": 0.1838,
      "step": 1727
    },
    {
      "epoch": 0.15921131432256877,
      "grad_norm": 0.9447500968465715,
      "learning_rate": 4.83878505741073e-06,
      "loss": 0.1835,
      "step": 1728
    },
    {
      "epoch": 0.15930345049983877,
      "grad_norm": 0.9540678329702622,
      "learning_rate": 4.838515818893741e-06,
      "loss": 0.172,
      "step": 1729
    },
    {
      "epoch": 0.15939558667710876,
      "grad_norm": 1.0123215650229256,
      "learning_rate": 4.838246363245306e-06,
      "loss": 0.1641,
      "step": 1730
    },
    {
      "epoch": 0.15948772285437876,
      "grad_norm": 0.9966400481188536,
      "learning_rate": 4.837976690490445e-06,
      "loss": 0.1905,
      "step": 1731
    },
    {
      "epoch": 0.15957985903164879,
      "grad_norm": 0.9095553032746777,
      "learning_rate": 4.837706800654197e-06,
      "loss": 0.1769,
      "step": 1732
    },
    {
      "epoch": 0.15967199520891878,
      "grad_norm": 1.0303333847098155,
      "learning_rate": 4.83743669376162e-06,
      "loss": 0.1803,
      "step": 1733
    },
    {
      "epoch": 0.15976413138618878,
      "grad_norm": 0.963888249747459,
      "learning_rate": 4.8371663698377955e-06,
      "loss": 0.1683,
      "step": 1734
    },
    {
      "epoch": 0.1598562675634588,
      "grad_norm": 0.9576505684732173,
      "learning_rate": 4.836895828907822e-06,
      "loss": 0.1851,
      "step": 1735
    },
    {
      "epoch": 0.1599484037407288,
      "grad_norm": 1.0554891537306332,
      "learning_rate": 4.836625070996818e-06,
      "loss": 0.188,
      "step": 1736
    },
    {
      "epoch": 0.1600405399179988,
      "grad_norm": 1.030216874614439,
      "learning_rate": 4.836354096129926e-06,
      "loss": 0.1659,
      "step": 1737
    },
    {
      "epoch": 0.1601326760952688,
      "grad_norm": 1.0900760942160297,
      "learning_rate": 4.8360829043323046e-06,
      "loss": 0.1832,
      "step": 1738
    },
    {
      "epoch": 0.16022481227253882,
      "grad_norm": 0.9371181926316801,
      "learning_rate": 4.835811495629134e-06,
      "loss": 0.175,
      "step": 1739
    },
    {
      "epoch": 0.16031694844980882,
      "grad_norm": 0.9495594727001155,
      "learning_rate": 4.835539870045613e-06,
      "loss": 0.1992,
      "step": 1740
    },
    {
      "epoch": 0.1604090846270788,
      "grad_norm": 0.917342942507763,
      "learning_rate": 4.8352680276069654e-06,
      "loss": 0.1653,
      "step": 1741
    },
    {
      "epoch": 0.16050122080434884,
      "grad_norm": 0.90525029954931,
      "learning_rate": 4.83499596833843e-06,
      "loss": 0.1795,
      "step": 1742
    },
    {
      "epoch": 0.16059335698161883,
      "grad_norm": 1.0455965458196492,
      "learning_rate": 4.834723692265268e-06,
      "loss": 0.1817,
      "step": 1743
    },
    {
      "epoch": 0.16068549315888883,
      "grad_norm": 0.959693505057054,
      "learning_rate": 4.834451199412759e-06,
      "loss": 0.1781,
      "step": 1744
    },
    {
      "epoch": 0.16077762933615886,
      "grad_norm": 0.9527369490107168,
      "learning_rate": 4.8341784898062056e-06,
      "loss": 0.1819,
      "step": 1745
    },
    {
      "epoch": 0.16086976551342885,
      "grad_norm": 0.9431052982892322,
      "learning_rate": 4.833905563470928e-06,
      "loss": 0.1706,
      "step": 1746
    },
    {
      "epoch": 0.16096190169069885,
      "grad_norm": 1.0048907347623248,
      "learning_rate": 4.833632420432267e-06,
      "loss": 0.1831,
      "step": 1747
    },
    {
      "epoch": 0.16105403786796885,
      "grad_norm": 1.0442032356650304,
      "learning_rate": 4.833359060715586e-06,
      "loss": 0.19,
      "step": 1748
    },
    {
      "epoch": 0.16114617404523887,
      "grad_norm": 1.037538480498732,
      "learning_rate": 4.8330854843462635e-06,
      "loss": 0.1804,
      "step": 1749
    },
    {
      "epoch": 0.16123831022250887,
      "grad_norm": 0.9676875781513236,
      "learning_rate": 4.832811691349703e-06,
      "loss": 0.1804,
      "step": 1750
    },
    {
      "epoch": 0.16133044639977887,
      "grad_norm": 1.0135987525084371,
      "learning_rate": 4.832537681751327e-06,
      "loss": 0.1827,
      "step": 1751
    },
    {
      "epoch": 0.1614225825770489,
      "grad_norm": 1.018385777602988,
      "learning_rate": 4.832263455576576e-06,
      "loss": 0.1972,
      "step": 1752
    },
    {
      "epoch": 0.1615147187543189,
      "grad_norm": 1.1980934939018493,
      "learning_rate": 4.8319890128509115e-06,
      "loss": 0.1822,
      "step": 1753
    },
    {
      "epoch": 0.16160685493158888,
      "grad_norm": 1.0080490206517214,
      "learning_rate": 4.831714353599817e-06,
      "loss": 0.1886,
      "step": 1754
    },
    {
      "epoch": 0.16169899110885888,
      "grad_norm": 0.941493167404903,
      "learning_rate": 4.831439477848793e-06,
      "loss": 0.1725,
      "step": 1755
    },
    {
      "epoch": 0.1617911272861289,
      "grad_norm": 0.9039272326667056,
      "learning_rate": 4.831164385623362e-06,
      "loss": 0.1589,
      "step": 1756
    },
    {
      "epoch": 0.1618832634633989,
      "grad_norm": 0.9530095888259483,
      "learning_rate": 4.830889076949069e-06,
      "loss": 0.1746,
      "step": 1757
    },
    {
      "epoch": 0.1619753996406689,
      "grad_norm": 0.905148587751177,
      "learning_rate": 4.830613551851473e-06,
      "loss": 0.1681,
      "step": 1758
    },
    {
      "epoch": 0.16206753581793892,
      "grad_norm": 0.9308074744746171,
      "learning_rate": 4.830337810356157e-06,
      "loss": 0.1776,
      "step": 1759
    },
    {
      "epoch": 0.16215967199520892,
      "grad_norm": 0.9875824921474106,
      "learning_rate": 4.830061852488726e-06,
      "loss": 0.1828,
      "step": 1760
    },
    {
      "epoch": 0.16225180817247892,
      "grad_norm": 0.9813374242149387,
      "learning_rate": 4.829785678274801e-06,
      "loss": 0.1839,
      "step": 1761
    },
    {
      "epoch": 0.16234394434974894,
      "grad_norm": 0.931234653866775,
      "learning_rate": 4.829509287740024e-06,
      "loss": 0.1788,
      "step": 1762
    },
    {
      "epoch": 0.16243608052701894,
      "grad_norm": 0.9585368043301006,
      "learning_rate": 4.82923268091006e-06,
      "loss": 0.188,
      "step": 1763
    },
    {
      "epoch": 0.16252821670428894,
      "grad_norm": 1.0074529442860354,
      "learning_rate": 4.828955857810591e-06,
      "loss": 0.191,
      "step": 1764
    },
    {
      "epoch": 0.16262035288155893,
      "grad_norm": 0.9410361170473834,
      "learning_rate": 4.828678818467319e-06,
      "loss": 0.1763,
      "step": 1765
    },
    {
      "epoch": 0.16271248905882896,
      "grad_norm": 1.0208109082183012,
      "learning_rate": 4.828401562905969e-06,
      "loss": 0.2075,
      "step": 1766
    },
    {
      "epoch": 0.16280462523609895,
      "grad_norm": 0.9269680529094783,
      "learning_rate": 4.828124091152283e-06,
      "loss": 0.1737,
      "step": 1767
    },
    {
      "epoch": 0.16289676141336895,
      "grad_norm": 1.0100855715032826,
      "learning_rate": 4.827846403232024e-06,
      "loss": 0.1928,
      "step": 1768
    },
    {
      "epoch": 0.16298889759063898,
      "grad_norm": 0.9905609107994238,
      "learning_rate": 4.827568499170977e-06,
      "loss": 0.1722,
      "step": 1769
    },
    {
      "epoch": 0.16308103376790897,
      "grad_norm": 1.0136821739673834,
      "learning_rate": 4.8272903789949435e-06,
      "loss": 0.1732,
      "step": 1770
    },
    {
      "epoch": 0.16317316994517897,
      "grad_norm": 1.014178191785531,
      "learning_rate": 4.8270120427297485e-06,
      "loss": 0.1892,
      "step": 1771
    },
    {
      "epoch": 0.16326530612244897,
      "grad_norm": 0.9882369844302558,
      "learning_rate": 4.8267334904012345e-06,
      "loss": 0.1798,
      "step": 1772
    },
    {
      "epoch": 0.163357442299719,
      "grad_norm": 0.9940919783308403,
      "learning_rate": 4.8264547220352655e-06,
      "loss": 0.1862,
      "step": 1773
    },
    {
      "epoch": 0.163449578476989,
      "grad_norm": 1.0582977739314734,
      "learning_rate": 4.826175737657725e-06,
      "loss": 0.1913,
      "step": 1774
    },
    {
      "epoch": 0.16354171465425899,
      "grad_norm": 1.0787650948609633,
      "learning_rate": 4.825896537294518e-06,
      "loss": 0.1854,
      "step": 1775
    },
    {
      "epoch": 0.163633850831529,
      "grad_norm": 1.0181516343182249,
      "learning_rate": 4.825617120971566e-06,
      "loss": 0.1747,
      "step": 1776
    },
    {
      "epoch": 0.163725987008799,
      "grad_norm": 0.9365780887109146,
      "learning_rate": 4.825337488714814e-06,
      "loss": 0.185,
      "step": 1777
    },
    {
      "epoch": 0.163818123186069,
      "grad_norm": 1.034805261001223,
      "learning_rate": 4.825057640550226e-06,
      "loss": 0.1875,
      "step": 1778
    },
    {
      "epoch": 0.16391025936333903,
      "grad_norm": 1.0004955506710502,
      "learning_rate": 4.824777576503786e-06,
      "loss": 0.1832,
      "step": 1779
    },
    {
      "epoch": 0.16400239554060903,
      "grad_norm": 1.045133789322791,
      "learning_rate": 4.824497296601499e-06,
      "loss": 0.1743,
      "step": 1780
    },
    {
      "epoch": 0.16409453171787902,
      "grad_norm": 0.929674001723364,
      "learning_rate": 4.8242168008693864e-06,
      "loss": 0.1688,
      "step": 1781
    },
    {
      "epoch": 0.16418666789514902,
      "grad_norm": 0.8909382328694191,
      "learning_rate": 4.823936089333494e-06,
      "loss": 0.1679,
      "step": 1782
    },
    {
      "epoch": 0.16427880407241904,
      "grad_norm": 0.8714078813260372,
      "learning_rate": 4.823655162019886e-06,
      "loss": 0.1616,
      "step": 1783
    },
    {
      "epoch": 0.16437094024968904,
      "grad_norm": 0.9117516027805643,
      "learning_rate": 4.823374018954646e-06,
      "loss": 0.1665,
      "step": 1784
    },
    {
      "epoch": 0.16446307642695904,
      "grad_norm": 0.8647610420636894,
      "learning_rate": 4.823092660163878e-06,
      "loss": 0.1528,
      "step": 1785
    },
    {
      "epoch": 0.16455521260422906,
      "grad_norm": 0.9530561108679019,
      "learning_rate": 4.822811085673706e-06,
      "loss": 0.1691,
      "step": 1786
    },
    {
      "epoch": 0.16464734878149906,
      "grad_norm": 0.8638732798559001,
      "learning_rate": 4.822529295510276e-06,
      "loss": 0.1568,
      "step": 1787
    },
    {
      "epoch": 0.16473948495876906,
      "grad_norm": 0.9641560836422756,
      "learning_rate": 4.82224728969975e-06,
      "loss": 0.1783,
      "step": 1788
    },
    {
      "epoch": 0.16483162113603905,
      "grad_norm": 0.924759701849841,
      "learning_rate": 4.821965068268314e-06,
      "loss": 0.1727,
      "step": 1789
    },
    {
      "epoch": 0.16492375731330908,
      "grad_norm": 0.9362299471705925,
      "learning_rate": 4.82168263124217e-06,
      "loss": 0.1741,
      "step": 1790
    },
    {
      "epoch": 0.16501589349057907,
      "grad_norm": 0.981951133307446,
      "learning_rate": 4.8213999786475455e-06,
      "loss": 0.186,
      "step": 1791
    },
    {
      "epoch": 0.16510802966784907,
      "grad_norm": 0.8704969480803476,
      "learning_rate": 4.821117110510683e-06,
      "loss": 0.1634,
      "step": 1792
    },
    {
      "epoch": 0.1652001658451191,
      "grad_norm": 0.9063406667410021,
      "learning_rate": 4.820834026857846e-06,
      "loss": 0.1793,
      "step": 1793
    },
    {
      "epoch": 0.1652923020223891,
      "grad_norm": 1.0657161087119753,
      "learning_rate": 4.820550727715321e-06,
      "loss": 0.1889,
      "step": 1794
    },
    {
      "epoch": 0.1653844381996591,
      "grad_norm": 0.9127685674712928,
      "learning_rate": 4.820267213109409e-06,
      "loss": 0.1724,
      "step": 1795
    },
    {
      "epoch": 0.16547657437692911,
      "grad_norm": 0.9727997916206534,
      "learning_rate": 4.8199834830664395e-06,
      "loss": 0.1777,
      "step": 1796
    },
    {
      "epoch": 0.1655687105541991,
      "grad_norm": 0.9452854756210244,
      "learning_rate": 4.819699537612752e-06,
      "loss": 0.1615,
      "step": 1797
    },
    {
      "epoch": 0.1656608467314691,
      "grad_norm": 0.9578005282418585,
      "learning_rate": 4.819415376774714e-06,
      "loss": 0.1827,
      "step": 1798
    },
    {
      "epoch": 0.1657529829087391,
      "grad_norm": 1.0597663881266928,
      "learning_rate": 4.819131000578707e-06,
      "loss": 0.1843,
      "step": 1799
    },
    {
      "epoch": 0.16584511908600913,
      "grad_norm": 0.9342840629471308,
      "learning_rate": 4.818846409051139e-06,
      "loss": 0.1813,
      "step": 1800
    },
    {
      "epoch": 0.16593725526327913,
      "grad_norm": 0.98252920230672,
      "learning_rate": 4.818561602218431e-06,
      "loss": 0.1838,
      "step": 1801
    },
    {
      "epoch": 0.16602939144054912,
      "grad_norm": 0.9424563329013992,
      "learning_rate": 4.818276580107029e-06,
      "loss": 0.1702,
      "step": 1802
    },
    {
      "epoch": 0.16612152761781915,
      "grad_norm": 0.9202413050049445,
      "learning_rate": 4.817991342743396e-06,
      "loss": 0.1735,
      "step": 1803
    },
    {
      "epoch": 0.16621366379508914,
      "grad_norm": 0.9549194113487504,
      "learning_rate": 4.81770589015402e-06,
      "loss": 0.1796,
      "step": 1804
    },
    {
      "epoch": 0.16630579997235914,
      "grad_norm": 0.9465694214625567,
      "learning_rate": 4.8174202223654e-06,
      "loss": 0.1843,
      "step": 1805
    },
    {
      "epoch": 0.16639793614962914,
      "grad_norm": 0.9349905458192107,
      "learning_rate": 4.8171343394040645e-06,
      "loss": 0.1745,
      "step": 1806
    },
    {
      "epoch": 0.16649007232689916,
      "grad_norm": 0.8796702727017657,
      "learning_rate": 4.816848241296556e-06,
      "loss": 0.1695,
      "step": 1807
    },
    {
      "epoch": 0.16658220850416916,
      "grad_norm": 0.9207017502379715,
      "learning_rate": 4.816561928069439e-06,
      "loss": 0.1825,
      "step": 1808
    },
    {
      "epoch": 0.16667434468143916,
      "grad_norm": 0.8798529220695297,
      "learning_rate": 4.8162753997492965e-06,
      "loss": 0.1758,
      "step": 1809
    },
    {
      "epoch": 0.16676648085870918,
      "grad_norm": 1.010096798746228,
      "learning_rate": 4.815988656362735e-06,
      "loss": 0.1908,
      "step": 1810
    },
    {
      "epoch": 0.16685861703597918,
      "grad_norm": 0.906477990452234,
      "learning_rate": 4.815701697936377e-06,
      "loss": 0.1817,
      "step": 1811
    },
    {
      "epoch": 0.16695075321324918,
      "grad_norm": 0.9584264334027195,
      "learning_rate": 4.815414524496867e-06,
      "loss": 0.1689,
      "step": 1812
    },
    {
      "epoch": 0.1670428893905192,
      "grad_norm": 1.0568204132693753,
      "learning_rate": 4.8151271360708704e-06,
      "loss": 0.1817,
      "step": 1813
    },
    {
      "epoch": 0.1671350255677892,
      "grad_norm": 1.0170922070435067,
      "learning_rate": 4.814839532685069e-06,
      "loss": 0.1794,
      "step": 1814
    },
    {
      "epoch": 0.1672271617450592,
      "grad_norm": 0.9452343091927007,
      "learning_rate": 4.814551714366168e-06,
      "loss": 0.1775,
      "step": 1815
    },
    {
      "epoch": 0.1673192979223292,
      "grad_norm": 1.0103963129300657,
      "learning_rate": 4.814263681140892e-06,
      "loss": 0.1789,
      "step": 1816
    },
    {
      "epoch": 0.16741143409959922,
      "grad_norm": 1.057193287404099,
      "learning_rate": 4.813975433035984e-06,
      "loss": 0.18,
      "step": 1817
    },
    {
      "epoch": 0.1675035702768692,
      "grad_norm": 1.0599542152721877,
      "learning_rate": 4.813686970078207e-06,
      "loss": 0.1861,
      "step": 1818
    },
    {
      "epoch": 0.1675957064541392,
      "grad_norm": 0.9410257979008102,
      "learning_rate": 4.813398292294345e-06,
      "loss": 0.1782,
      "step": 1819
    },
    {
      "epoch": 0.16768784263140923,
      "grad_norm": 0.9106844898045353,
      "learning_rate": 4.813109399711204e-06,
      "loss": 0.1605,
      "step": 1820
    },
    {
      "epoch": 0.16777997880867923,
      "grad_norm": 0.9510723876689734,
      "learning_rate": 4.812820292355607e-06,
      "loss": 0.1768,
      "step": 1821
    },
    {
      "epoch": 0.16787211498594923,
      "grad_norm": 0.9713976398794669,
      "learning_rate": 4.812530970254396e-06,
      "loss": 0.1768,
      "step": 1822
    },
    {
      "epoch": 0.16796425116321922,
      "grad_norm": 0.9726574938485535,
      "learning_rate": 4.812241433434436e-06,
      "loss": 0.188,
      "step": 1823
    },
    {
      "epoch": 0.16805638734048925,
      "grad_norm": 1.0053761517023432,
      "learning_rate": 4.81195168192261e-06,
      "loss": 0.1749,
      "step": 1824
    },
    {
      "epoch": 0.16814852351775925,
      "grad_norm": 1.0033991069529102,
      "learning_rate": 4.81166171574582e-06,
      "loss": 0.1782,
      "step": 1825
    },
    {
      "epoch": 0.16824065969502924,
      "grad_norm": 0.9968269303113185,
      "learning_rate": 4.811371534930993e-06,
      "loss": 0.1805,
      "step": 1826
    },
    {
      "epoch": 0.16833279587229927,
      "grad_norm": 0.9615227789782784,
      "learning_rate": 4.8110811395050695e-06,
      "loss": 0.1696,
      "step": 1827
    },
    {
      "epoch": 0.16842493204956926,
      "grad_norm": 0.9821799299380174,
      "learning_rate": 4.810790529495013e-06,
      "loss": 0.1791,
      "step": 1828
    },
    {
      "epoch": 0.16851706822683926,
      "grad_norm": 0.9811162841683847,
      "learning_rate": 4.810499704927808e-06,
      "loss": 0.1723,
      "step": 1829
    },
    {
      "epoch": 0.1686092044041093,
      "grad_norm": 1.1244318132152042,
      "learning_rate": 4.810208665830456e-06,
      "loss": 0.1907,
      "step": 1830
    },
    {
      "epoch": 0.16870134058137928,
      "grad_norm": 0.9817177234015824,
      "learning_rate": 4.809917412229981e-06,
      "loss": 0.1788,
      "step": 1831
    },
    {
      "epoch": 0.16879347675864928,
      "grad_norm": 0.9184917085261609,
      "learning_rate": 4.809625944153425e-06,
      "loss": 0.1617,
      "step": 1832
    },
    {
      "epoch": 0.16888561293591928,
      "grad_norm": 0.9234623678622588,
      "learning_rate": 4.8093342616278525e-06,
      "loss": 0.1809,
      "step": 1833
    },
    {
      "epoch": 0.1689777491131893,
      "grad_norm": 0.9581615643494954,
      "learning_rate": 4.809042364680345e-06,
      "loss": 0.1634,
      "step": 1834
    },
    {
      "epoch": 0.1690698852904593,
      "grad_norm": 0.9697985187154081,
      "learning_rate": 4.808750253338006e-06,
      "loss": 0.1871,
      "step": 1835
    },
    {
      "epoch": 0.1691620214677293,
      "grad_norm": 0.9221846846576924,
      "learning_rate": 4.8084579276279565e-06,
      "loss": 0.176,
      "step": 1836
    },
    {
      "epoch": 0.16925415764499932,
      "grad_norm": 0.962272543721862,
      "learning_rate": 4.80816538757734e-06,
      "loss": 0.1954,
      "step": 1837
    },
    {
      "epoch": 0.16934629382226932,
      "grad_norm": 0.8740315446603265,
      "learning_rate": 4.80787263321332e-06,
      "loss": 0.1638,
      "step": 1838
    },
    {
      "epoch": 0.16943842999953931,
      "grad_norm": 0.9401843869180659,
      "learning_rate": 4.8075796645630764e-06,
      "loss": 0.1839,
      "step": 1839
    },
    {
      "epoch": 0.1695305661768093,
      "grad_norm": 0.9919651044339668,
      "learning_rate": 4.807286481653813e-06,
      "loss": 0.1719,
      "step": 1840
    },
    {
      "epoch": 0.16962270235407934,
      "grad_norm": 0.931631600254304,
      "learning_rate": 4.806993084512752e-06,
      "loss": 0.1786,
      "step": 1841
    },
    {
      "epoch": 0.16971483853134933,
      "grad_norm": 1.0388517287914885,
      "learning_rate": 4.806699473167134e-06,
      "loss": 0.1916,
      "step": 1842
    },
    {
      "epoch": 0.16980697470861933,
      "grad_norm": 0.8913690534162603,
      "learning_rate": 4.806405647644222e-06,
      "loss": 0.1826,
      "step": 1843
    },
    {
      "epoch": 0.16989911088588935,
      "grad_norm": 1.0257086366954042,
      "learning_rate": 4.806111607971298e-06,
      "loss": 0.1751,
      "step": 1844
    },
    {
      "epoch": 0.16999124706315935,
      "grad_norm": 0.9335569005047046,
      "learning_rate": 4.805817354175663e-06,
      "loss": 0.1715,
      "step": 1845
    },
    {
      "epoch": 0.17008338324042935,
      "grad_norm": 0.9491430087260864,
      "learning_rate": 4.805522886284637e-06,
      "loss": 0.1731,
      "step": 1846
    },
    {
      "epoch": 0.17017551941769937,
      "grad_norm": 0.8936870287779184,
      "learning_rate": 4.8052282043255635e-06,
      "loss": 0.1777,
      "step": 1847
    },
    {
      "epoch": 0.17026765559496937,
      "grad_norm": 0.8902565793090995,
      "learning_rate": 4.804933308325804e-06,
      "loss": 0.1694,
      "step": 1848
    },
    {
      "epoch": 0.17035979177223937,
      "grad_norm": 0.9985363911822385,
      "learning_rate": 4.8046381983127385e-06,
      "loss": 0.2018,
      "step": 1849
    },
    {
      "epoch": 0.17045192794950936,
      "grad_norm": 0.9426301604024907,
      "learning_rate": 4.8043428743137675e-06,
      "loss": 0.1867,
      "step": 1850
    },
    {
      "epoch": 0.1705440641267794,
      "grad_norm": 0.8550027014618022,
      "learning_rate": 4.8040473363563136e-06,
      "loss": 0.1637,
      "step": 1851
    },
    {
      "epoch": 0.17063620030404938,
      "grad_norm": 0.9734804002388993,
      "learning_rate": 4.8037515844678165e-06,
      "loss": 0.1866,
      "step": 1852
    },
    {
      "epoch": 0.17072833648131938,
      "grad_norm": 0.9928693625462343,
      "learning_rate": 4.803455618675736e-06,
      "loss": 0.1784,
      "step": 1853
    },
    {
      "epoch": 0.1708204726585894,
      "grad_norm": 0.9402222477752512,
      "learning_rate": 4.803159439007554e-06,
      "loss": 0.1678,
      "step": 1854
    },
    {
      "epoch": 0.1709126088358594,
      "grad_norm": 0.9737747885881045,
      "learning_rate": 4.80286304549077e-06,
      "loss": 0.1844,
      "step": 1855
    },
    {
      "epoch": 0.1710047450131294,
      "grad_norm": 0.8979782554294682,
      "learning_rate": 4.802566438152904e-06,
      "loss": 0.1707,
      "step": 1856
    },
    {
      "epoch": 0.1710968811903994,
      "grad_norm": 1.0231193949257447,
      "learning_rate": 4.802269617021497e-06,
      "loss": 0.1965,
      "step": 1857
    },
    {
      "epoch": 0.17118901736766942,
      "grad_norm": 0.9882485601160279,
      "learning_rate": 4.801972582124108e-06,
      "loss": 0.1855,
      "step": 1858
    },
    {
      "epoch": 0.17128115354493942,
      "grad_norm": 0.9804779701977544,
      "learning_rate": 4.801675333488317e-06,
      "loss": 0.1846,
      "step": 1859
    },
    {
      "epoch": 0.17137328972220942,
      "grad_norm": 0.9887115862803393,
      "learning_rate": 4.801377871141723e-06,
      "loss": 0.1818,
      "step": 1860
    },
    {
      "epoch": 0.17146542589947944,
      "grad_norm": 0.9908343960616961,
      "learning_rate": 4.801080195111948e-06,
      "loss": 0.1728,
      "step": 1861
    },
    {
      "epoch": 0.17155756207674944,
      "grad_norm": 0.9121926413556142,
      "learning_rate": 4.800782305426628e-06,
      "loss": 0.1736,
      "step": 1862
    },
    {
      "epoch": 0.17164969825401943,
      "grad_norm": 1.0130289652715774,
      "learning_rate": 4.800484202113423e-06,
      "loss": 0.1725,
      "step": 1863
    },
    {
      "epoch": 0.17174183443128946,
      "grad_norm": 0.9997676957196446,
      "learning_rate": 4.800185885200013e-06,
      "loss": 0.1723,
      "step": 1864
    },
    {
      "epoch": 0.17183397060855946,
      "grad_norm": 0.9695560315706072,
      "learning_rate": 4.7998873547140954e-06,
      "loss": 0.1721,
      "step": 1865
    },
    {
      "epoch": 0.17192610678582945,
      "grad_norm": 0.9735293306486302,
      "learning_rate": 4.799588610683389e-06,
      "loss": 0.1845,
      "step": 1866
    },
    {
      "epoch": 0.17201824296309945,
      "grad_norm": 0.9944144850116116,
      "learning_rate": 4.799289653135633e-06,
      "loss": 0.1756,
      "step": 1867
    },
    {
      "epoch": 0.17211037914036947,
      "grad_norm": 0.9316812929228392,
      "learning_rate": 4.7989904820985854e-06,
      "loss": 0.1747,
      "step": 1868
    },
    {
      "epoch": 0.17220251531763947,
      "grad_norm": 0.9486629348319279,
      "learning_rate": 4.798691097600024e-06,
      "loss": 0.1783,
      "step": 1869
    },
    {
      "epoch": 0.17229465149490947,
      "grad_norm": 0.9921250810385875,
      "learning_rate": 4.798391499667747e-06,
      "loss": 0.1806,
      "step": 1870
    },
    {
      "epoch": 0.1723867876721795,
      "grad_norm": 0.9467281601207117,
      "learning_rate": 4.798091688329572e-06,
      "loss": 0.1792,
      "step": 1871
    },
    {
      "epoch": 0.1724789238494495,
      "grad_norm": 0.8938437016708146,
      "learning_rate": 4.7977916636133365e-06,
      "loss": 0.166,
      "step": 1872
    },
    {
      "epoch": 0.17257106002671949,
      "grad_norm": 0.9033608634661399,
      "learning_rate": 4.797491425546898e-06,
      "loss": 0.1624,
      "step": 1873
    },
    {
      "epoch": 0.17266319620398948,
      "grad_norm": 1.0093173143492151,
      "learning_rate": 4.797190974158133e-06,
      "loss": 0.1804,
      "step": 1874
    },
    {
      "epoch": 0.1727553323812595,
      "grad_norm": 0.9269759911665011,
      "learning_rate": 4.796890309474938e-06,
      "loss": 0.1924,
      "step": 1875
    },
    {
      "epoch": 0.1728474685585295,
      "grad_norm": 0.9511087442600202,
      "learning_rate": 4.796589431525232e-06,
      "loss": 0.1717,
      "step": 1876
    },
    {
      "epoch": 0.1729396047357995,
      "grad_norm": 0.9727630535856038,
      "learning_rate": 4.796288340336949e-06,
      "loss": 0.1824,
      "step": 1877
    },
    {
      "epoch": 0.17303174091306953,
      "grad_norm": 0.9098476882737225,
      "learning_rate": 4.795987035938047e-06,
      "loss": 0.1598,
      "step": 1878
    },
    {
      "epoch": 0.17312387709033952,
      "grad_norm": 0.9586208598334143,
      "learning_rate": 4.795685518356501e-06,
      "loss": 0.176,
      "step": 1879
    },
    {
      "epoch": 0.17321601326760952,
      "grad_norm": 0.9342244991941684,
      "learning_rate": 4.795383787620308e-06,
      "loss": 0.1622,
      "step": 1880
    },
    {
      "epoch": 0.17330814944487954,
      "grad_norm": 0.9062253340794538,
      "learning_rate": 4.795081843757483e-06,
      "loss": 0.1579,
      "step": 1881
    },
    {
      "epoch": 0.17340028562214954,
      "grad_norm": 0.93803547801233,
      "learning_rate": 4.794779686796062e-06,
      "loss": 0.166,
      "step": 1882
    },
    {
      "epoch": 0.17349242179941954,
      "grad_norm": 0.9996096252341287,
      "learning_rate": 4.794477316764101e-06,
      "loss": 0.1842,
      "step": 1883
    },
    {
      "epoch": 0.17358455797668954,
      "grad_norm": 0.9514554221599457,
      "learning_rate": 4.794174733689672e-06,
      "loss": 0.1672,
      "step": 1884
    },
    {
      "epoch": 0.17367669415395956,
      "grad_norm": 0.9379721576841644,
      "learning_rate": 4.793871937600874e-06,
      "loss": 0.1689,
      "step": 1885
    },
    {
      "epoch": 0.17376883033122956,
      "grad_norm": 0.9887485087873079,
      "learning_rate": 4.7935689285258195e-06,
      "loss": 0.1695,
      "step": 1886
    },
    {
      "epoch": 0.17386096650849955,
      "grad_norm": 0.9185524648048645,
      "learning_rate": 4.793265706492643e-06,
      "loss": 0.175,
      "step": 1887
    },
    {
      "epoch": 0.17395310268576958,
      "grad_norm": 0.944094137645397,
      "learning_rate": 4.792962271529499e-06,
      "loss": 0.1694,
      "step": 1888
    },
    {
      "epoch": 0.17404523886303958,
      "grad_norm": 0.9907587903074953,
      "learning_rate": 4.792658623664561e-06,
      "loss": 0.1788,
      "step": 1889
    },
    {
      "epoch": 0.17413737504030957,
      "grad_norm": 0.8874671631140197,
      "learning_rate": 4.792354762926023e-06,
      "loss": 0.1604,
      "step": 1890
    },
    {
      "epoch": 0.17422951121757957,
      "grad_norm": 0.917699450885086,
      "learning_rate": 4.792050689342098e-06,
      "loss": 0.1882,
      "step": 1891
    },
    {
      "epoch": 0.1743216473948496,
      "grad_norm": 0.8728825937831589,
      "learning_rate": 4.791746402941021e-06,
      "loss": 0.1583,
      "step": 1892
    },
    {
      "epoch": 0.1744137835721196,
      "grad_norm": 0.999026083350806,
      "learning_rate": 4.791441903751043e-06,
      "loss": 0.1835,
      "step": 1893
    },
    {
      "epoch": 0.1745059197493896,
      "grad_norm": 0.9922845901509115,
      "learning_rate": 4.791137191800438e-06,
      "loss": 0.1817,
      "step": 1894
    },
    {
      "epoch": 0.1745980559266596,
      "grad_norm": 0.9063701103169883,
      "learning_rate": 4.790832267117498e-06,
      "loss": 0.1633,
      "step": 1895
    },
    {
      "epoch": 0.1746901921039296,
      "grad_norm": 0.9812700186466582,
      "learning_rate": 4.790527129730536e-06,
      "loss": 0.1825,
      "step": 1896
    },
    {
      "epoch": 0.1747823282811996,
      "grad_norm": 0.9791016126703898,
      "learning_rate": 4.790221779667883e-06,
      "loss": 0.1834,
      "step": 1897
    },
    {
      "epoch": 0.17487446445846963,
      "grad_norm": 1.019954906964735,
      "learning_rate": 4.789916216957892e-06,
      "loss": 0.1793,
      "step": 1898
    },
    {
      "epoch": 0.17496660063573963,
      "grad_norm": 1.0658787511106087,
      "learning_rate": 4.789610441628932e-06,
      "loss": 0.1805,
      "step": 1899
    },
    {
      "epoch": 0.17505873681300962,
      "grad_norm": 0.9131631203861305,
      "learning_rate": 4.789304453709398e-06,
      "loss": 0.1692,
      "step": 1900
    },
    {
      "epoch": 0.17515087299027962,
      "grad_norm": 0.9370079179863117,
      "learning_rate": 4.788998253227698e-06,
      "loss": 0.1774,
      "step": 1901
    },
    {
      "epoch": 0.17524300916754965,
      "grad_norm": 0.9204477318927515,
      "learning_rate": 4.788691840212264e-06,
      "loss": 0.1696,
      "step": 1902
    },
    {
      "epoch": 0.17533514534481964,
      "grad_norm": 0.9702024390215709,
      "learning_rate": 4.788385214691546e-06,
      "loss": 0.1758,
      "step": 1903
    },
    {
      "epoch": 0.17542728152208964,
      "grad_norm": 1.0381178605677073,
      "learning_rate": 4.788078376694017e-06,
      "loss": 0.18,
      "step": 1904
    },
    {
      "epoch": 0.17551941769935966,
      "grad_norm": 0.9955510617085207,
      "learning_rate": 4.787771326248162e-06,
      "loss": 0.1738,
      "step": 1905
    },
    {
      "epoch": 0.17561155387662966,
      "grad_norm": 0.8831789806878049,
      "learning_rate": 4.787464063382493e-06,
      "loss": 0.157,
      "step": 1906
    },
    {
      "epoch": 0.17570369005389966,
      "grad_norm": 0.9652670549320401,
      "learning_rate": 4.787156588125541e-06,
      "loss": 0.1738,
      "step": 1907
    },
    {
      "epoch": 0.17579582623116966,
      "grad_norm": 1.016867250112242,
      "learning_rate": 4.786848900505852e-06,
      "loss": 0.197,
      "step": 1908
    },
    {
      "epoch": 0.17588796240843968,
      "grad_norm": 0.8937479093266593,
      "learning_rate": 4.786541000551997e-06,
      "loss": 0.1709,
      "step": 1909
    },
    {
      "epoch": 0.17598009858570968,
      "grad_norm": 0.9348137203113728,
      "learning_rate": 4.786232888292564e-06,
      "loss": 0.1696,
      "step": 1910
    },
    {
      "epoch": 0.17607223476297967,
      "grad_norm": 0.9606862462398517,
      "learning_rate": 4.785924563756162e-06,
      "loss": 0.1778,
      "step": 1911
    },
    {
      "epoch": 0.1761643709402497,
      "grad_norm": 0.9522177279868732,
      "learning_rate": 4.785616026971418e-06,
      "loss": 0.1802,
      "step": 1912
    },
    {
      "epoch": 0.1762565071175197,
      "grad_norm": 0.9383905997510276,
      "learning_rate": 4.78530727796698e-06,
      "loss": 0.1735,
      "step": 1913
    },
    {
      "epoch": 0.1763486432947897,
      "grad_norm": 0.9510793409227706,
      "learning_rate": 4.784998316771515e-06,
      "loss": 0.1739,
      "step": 1914
    },
    {
      "epoch": 0.17644077947205972,
      "grad_norm": 1.0062514891320191,
      "learning_rate": 4.784689143413711e-06,
      "loss": 0.1829,
      "step": 1915
    },
    {
      "epoch": 0.1765329156493297,
      "grad_norm": 0.906545128875752,
      "learning_rate": 4.784379757922273e-06,
      "loss": 0.1647,
      "step": 1916
    },
    {
      "epoch": 0.1766250518265997,
      "grad_norm": 0.9973752258860914,
      "learning_rate": 4.78407016032593e-06,
      "loss": 0.183,
      "step": 1917
    },
    {
      "epoch": 0.1767171880038697,
      "grad_norm": 0.9141803308976617,
      "learning_rate": 4.783760350653426e-06,
      "loss": 0.1575,
      "step": 1918
    },
    {
      "epoch": 0.17680932418113973,
      "grad_norm": 0.9502312399313518,
      "learning_rate": 4.783450328933527e-06,
      "loss": 0.1746,
      "step": 1919
    },
    {
      "epoch": 0.17690146035840973,
      "grad_norm": 1.0723880151854668,
      "learning_rate": 4.78314009519502e-06,
      "loss": 0.1945,
      "step": 1920
    },
    {
      "epoch": 0.17699359653567973,
      "grad_norm": 0.8964554327793061,
      "learning_rate": 4.782829649466709e-06,
      "loss": 0.1613,
      "step": 1921
    },
    {
      "epoch": 0.17708573271294975,
      "grad_norm": 0.9319472469096941,
      "learning_rate": 4.78251899177742e-06,
      "loss": 0.1661,
      "step": 1922
    },
    {
      "epoch": 0.17717786889021975,
      "grad_norm": 1.0237495405647998,
      "learning_rate": 4.7822081221559965e-06,
      "loss": 0.1866,
      "step": 1923
    },
    {
      "epoch": 0.17727000506748974,
      "grad_norm": 0.8369732203496116,
      "learning_rate": 4.781897040631304e-06,
      "loss": 0.1652,
      "step": 1924
    },
    {
      "epoch": 0.17736214124475974,
      "grad_norm": 1.0220020925106441,
      "learning_rate": 4.781585747232224e-06,
      "loss": 0.1963,
      "step": 1925
    },
    {
      "epoch": 0.17745427742202977,
      "grad_norm": 0.9377498492935152,
      "learning_rate": 4.781274241987664e-06,
      "loss": 0.1687,
      "step": 1926
    },
    {
      "epoch": 0.17754641359929976,
      "grad_norm": 0.9221667519044533,
      "learning_rate": 4.7809625249265436e-06,
      "loss": 0.1633,
      "step": 1927
    },
    {
      "epoch": 0.17763854977656976,
      "grad_norm": 1.096594760082437,
      "learning_rate": 4.780650596077808e-06,
      "loss": 0.1839,
      "step": 1928
    },
    {
      "epoch": 0.17773068595383978,
      "grad_norm": 1.0274699250823436,
      "learning_rate": 4.780338455470419e-06,
      "loss": 0.1784,
      "step": 1929
    },
    {
      "epoch": 0.17782282213110978,
      "grad_norm": 0.9173298175308037,
      "learning_rate": 4.780026103133358e-06,
      "loss": 0.1671,
      "step": 1930
    },
    {
      "epoch": 0.17791495830837978,
      "grad_norm": 1.0241515678846906,
      "learning_rate": 4.7797135390956294e-06,
      "loss": 0.1667,
      "step": 1931
    },
    {
      "epoch": 0.1780070944856498,
      "grad_norm": 1.0382108734004492,
      "learning_rate": 4.779400763386253e-06,
      "loss": 0.1912,
      "step": 1932
    },
    {
      "epoch": 0.1780992306629198,
      "grad_norm": 0.9695327271031154,
      "learning_rate": 4.77908777603427e-06,
      "loss": 0.1753,
      "step": 1933
    },
    {
      "epoch": 0.1781913668401898,
      "grad_norm": 1.038422251537585,
      "learning_rate": 4.778774577068741e-06,
      "loss": 0.1856,
      "step": 1934
    },
    {
      "epoch": 0.1782835030174598,
      "grad_norm": 0.8941524559172731,
      "learning_rate": 4.778461166518748e-06,
      "loss": 0.162,
      "step": 1935
    },
    {
      "epoch": 0.17837563919472982,
      "grad_norm": 0.9313317705606107,
      "learning_rate": 4.778147544413392e-06,
      "loss": 0.1728,
      "step": 1936
    },
    {
      "epoch": 0.17846777537199982,
      "grad_norm": 1.0519701170000495,
      "learning_rate": 4.777833710781789e-06,
      "loss": 0.1957,
      "step": 1937
    },
    {
      "epoch": 0.1785599115492698,
      "grad_norm": 1.0387981318355688,
      "learning_rate": 4.777519665653082e-06,
      "loss": 0.1878,
      "step": 1938
    },
    {
      "epoch": 0.17865204772653984,
      "grad_norm": 0.9444838158399083,
      "learning_rate": 4.777205409056429e-06,
      "loss": 0.1779,
      "step": 1939
    },
    {
      "epoch": 0.17874418390380983,
      "grad_norm": 0.9575307026752148,
      "learning_rate": 4.776890941021008e-06,
      "loss": 0.2017,
      "step": 1940
    },
    {
      "epoch": 0.17883632008107983,
      "grad_norm": 1.0234915939858733,
      "learning_rate": 4.776576261576018e-06,
      "loss": 0.1722,
      "step": 1941
    },
    {
      "epoch": 0.17892845625834983,
      "grad_norm": 0.9803379275090085,
      "learning_rate": 4.776261370750678e-06,
      "loss": 0.1706,
      "step": 1942
    },
    {
      "epoch": 0.17902059243561985,
      "grad_norm": 0.9358718176258201,
      "learning_rate": 4.775946268574224e-06,
      "loss": 0.1763,
      "step": 1943
    },
    {
      "epoch": 0.17911272861288985,
      "grad_norm": 0.9786414947095523,
      "learning_rate": 4.775630955075915e-06,
      "loss": 0.1992,
      "step": 1944
    },
    {
      "epoch": 0.17920486479015985,
      "grad_norm": 1.0318275409794424,
      "learning_rate": 4.775315430285026e-06,
      "loss": 0.1981,
      "step": 1945
    },
    {
      "epoch": 0.17929700096742987,
      "grad_norm": 0.9631276712451919,
      "learning_rate": 4.7749996942308546e-06,
      "loss": 0.1888,
      "step": 1946
    },
    {
      "epoch": 0.17938913714469987,
      "grad_norm": 1.0003266569020262,
      "learning_rate": 4.774683746942717e-06,
      "loss": 0.1842,
      "step": 1947
    },
    {
      "epoch": 0.17948127332196986,
      "grad_norm": 0.9265546188995342,
      "learning_rate": 4.774367588449948e-06,
      "loss": 0.1578,
      "step": 1948
    },
    {
      "epoch": 0.1795734094992399,
      "grad_norm": 0.9009805935055492,
      "learning_rate": 4.774051218781904e-06,
      "loss": 0.1632,
      "step": 1949
    },
    {
      "epoch": 0.17966554567650989,
      "grad_norm": 0.9809819232416425,
      "learning_rate": 4.77373463796796e-06,
      "loss": 0.1786,
      "step": 1950
    },
    {
      "epoch": 0.17975768185377988,
      "grad_norm": 1.0260467345453894,
      "learning_rate": 4.7734178460375105e-06,
      "loss": 0.1657,
      "step": 1951
    },
    {
      "epoch": 0.17984981803104988,
      "grad_norm": 0.9280186899004385,
      "learning_rate": 4.773100843019969e-06,
      "loss": 0.1662,
      "step": 1952
    },
    {
      "epoch": 0.1799419542083199,
      "grad_norm": 0.9861324889794633,
      "learning_rate": 4.7727836289447685e-06,
      "loss": 0.1815,
      "step": 1953
    },
    {
      "epoch": 0.1800340903855899,
      "grad_norm": 0.9184406579084488,
      "learning_rate": 4.7724662038413646e-06,
      "loss": 0.1805,
      "step": 1954
    },
    {
      "epoch": 0.1801262265628599,
      "grad_norm": 1.065223211061827,
      "learning_rate": 4.772148567739229e-06,
      "loss": 0.172,
      "step": 1955
    },
    {
      "epoch": 0.18021836274012992,
      "grad_norm": 0.9539061206964681,
      "learning_rate": 4.7718307206678535e-06,
      "loss": 0.1828,
      "step": 1956
    },
    {
      "epoch": 0.18031049891739992,
      "grad_norm": 0.9148835147838171,
      "learning_rate": 4.7715126626567525e-06,
      "loss": 0.1541,
      "step": 1957
    },
    {
      "epoch": 0.18040263509466992,
      "grad_norm": 0.9232041932852285,
      "learning_rate": 4.7711943937354555e-06,
      "loss": 0.1751,
      "step": 1958
    },
    {
      "epoch": 0.18049477127193994,
      "grad_norm": 0.9216980915355115,
      "learning_rate": 4.770875913933515e-06,
      "loss": 0.1851,
      "step": 1959
    },
    {
      "epoch": 0.18058690744920994,
      "grad_norm": 0.9540739614506976,
      "learning_rate": 4.770557223280501e-06,
      "loss": 0.1919,
      "step": 1960
    },
    {
      "epoch": 0.18067904362647993,
      "grad_norm": 0.9591787025956806,
      "learning_rate": 4.7702383218060044e-06,
      "loss": 0.1772,
      "step": 1961
    },
    {
      "epoch": 0.18077117980374993,
      "grad_norm": 1.059574276391285,
      "learning_rate": 4.769919209539635e-06,
      "loss": 0.2,
      "step": 1962
    },
    {
      "epoch": 0.18086331598101996,
      "grad_norm": 1.0242113087334594,
      "learning_rate": 4.769599886511024e-06,
      "loss": 0.1847,
      "step": 1963
    },
    {
      "epoch": 0.18095545215828995,
      "grad_norm": 0.8678928771391421,
      "learning_rate": 4.769280352749817e-06,
      "loss": 0.1636,
      "step": 1964
    },
    {
      "epoch": 0.18104758833555995,
      "grad_norm": 0.9217600598060104,
      "learning_rate": 4.768960608285688e-06,
      "loss": 0.1729,
      "step": 1965
    },
    {
      "epoch": 0.18113972451282997,
      "grad_norm": 0.8802794485229976,
      "learning_rate": 4.76864065314832e-06,
      "loss": 0.1748,
      "step": 1966
    },
    {
      "epoch": 0.18123186069009997,
      "grad_norm": 0.9870852789685146,
      "learning_rate": 4.768320487367424e-06,
      "loss": 0.1683,
      "step": 1967
    },
    {
      "epoch": 0.18132399686736997,
      "grad_norm": 0.9296981723017914,
      "learning_rate": 4.768000110972727e-06,
      "loss": 0.1689,
      "step": 1968
    },
    {
      "epoch": 0.18141613304463997,
      "grad_norm": 0.9677522771888776,
      "learning_rate": 4.767679523993976e-06,
      "loss": 0.1883,
      "step": 1969
    },
    {
      "epoch": 0.18150826922191,
      "grad_norm": 0.9016253481332323,
      "learning_rate": 4.767358726460936e-06,
      "loss": 0.1605,
      "step": 1970
    },
    {
      "epoch": 0.18160040539918,
      "grad_norm": 0.9058389324448571,
      "learning_rate": 4.7670377184033944e-06,
      "loss": 0.1687,
      "step": 1971
    },
    {
      "epoch": 0.18169254157644998,
      "grad_norm": 1.017821513904742,
      "learning_rate": 4.7667164998511574e-06,
      "loss": 0.1911,
      "step": 1972
    },
    {
      "epoch": 0.18178467775372,
      "grad_norm": 0.9469304159614529,
      "learning_rate": 4.766395070834049e-06,
      "loss": 0.1783,
      "step": 1973
    },
    {
      "epoch": 0.18187681393099,
      "grad_norm": 0.9290326425937189,
      "learning_rate": 4.7660734313819135e-06,
      "loss": 0.1666,
      "step": 1974
    },
    {
      "epoch": 0.18196895010826,
      "grad_norm": 0.907021416145737,
      "learning_rate": 4.765751581524617e-06,
      "loss": 0.1551,
      "step": 1975
    },
    {
      "epoch": 0.18206108628553003,
      "grad_norm": 0.9225218369066349,
      "learning_rate": 4.765429521292042e-06,
      "loss": 0.1812,
      "step": 1976
    },
    {
      "epoch": 0.18215322246280002,
      "grad_norm": 0.9316138857926441,
      "learning_rate": 4.765107250714093e-06,
      "loss": 0.17,
      "step": 1977
    },
    {
      "epoch": 0.18224535864007002,
      "grad_norm": 0.9764313721990456,
      "learning_rate": 4.764784769820691e-06,
      "loss": 0.1843,
      "step": 1978
    },
    {
      "epoch": 0.18233749481734002,
      "grad_norm": 0.9237308847508986,
      "learning_rate": 4.76446207864178e-06,
      "loss": 0.1797,
      "step": 1979
    },
    {
      "epoch": 0.18242963099461004,
      "grad_norm": 0.9644001296709762,
      "learning_rate": 4.764139177207321e-06,
      "loss": 0.187,
      "step": 1980
    },
    {
      "epoch": 0.18252176717188004,
      "grad_norm": 0.8821755436379798,
      "learning_rate": 4.763816065547295e-06,
      "loss": 0.1686,
      "step": 1981
    },
    {
      "epoch": 0.18261390334915004,
      "grad_norm": 0.9503521212580078,
      "learning_rate": 4.763492743691705e-06,
      "loss": 0.1591,
      "step": 1982
    },
    {
      "epoch": 0.18270603952642006,
      "grad_norm": 0.9082913689491118,
      "learning_rate": 4.7631692116705695e-06,
      "loss": 0.176,
      "step": 1983
    },
    {
      "epoch": 0.18279817570369006,
      "grad_norm": 0.9100042108564763,
      "learning_rate": 4.76284546951393e-06,
      "loss": 0.1638,
      "step": 1984
    },
    {
      "epoch": 0.18289031188096005,
      "grad_norm": 1.0130194245152968,
      "learning_rate": 4.762521517251844e-06,
      "loss": 0.1889,
      "step": 1985
    },
    {
      "epoch": 0.18298244805823005,
      "grad_norm": 0.9483601310990681,
      "learning_rate": 4.762197354914391e-06,
      "loss": 0.1789,
      "step": 1986
    },
    {
      "epoch": 0.18307458423550008,
      "grad_norm": 0.9978614650039507,
      "learning_rate": 4.761872982531671e-06,
      "loss": 0.188,
      "step": 1987
    },
    {
      "epoch": 0.18316672041277007,
      "grad_norm": 1.0247173947884123,
      "learning_rate": 4.761548400133801e-06,
      "loss": 0.1727,
      "step": 1988
    },
    {
      "epoch": 0.18325885659004007,
      "grad_norm": 1.043154599032093,
      "learning_rate": 4.761223607750919e-06,
      "loss": 0.1821,
      "step": 1989
    },
    {
      "epoch": 0.1833509927673101,
      "grad_norm": 1.069427452926741,
      "learning_rate": 4.760898605413182e-06,
      "loss": 0.1953,
      "step": 1990
    },
    {
      "epoch": 0.1834431289445801,
      "grad_norm": 0.9171673945079573,
      "learning_rate": 4.760573393150766e-06,
      "loss": 0.1682,
      "step": 1991
    },
    {
      "epoch": 0.1835352651218501,
      "grad_norm": 0.9707474375781835,
      "learning_rate": 4.760247970993867e-06,
      "loss": 0.187,
      "step": 1992
    },
    {
      "epoch": 0.1836274012991201,
      "grad_norm": 0.9115866782437404,
      "learning_rate": 4.7599223389727e-06,
      "loss": 0.166,
      "step": 1993
    },
    {
      "epoch": 0.1837195374763901,
      "grad_norm": 0.9093587060651217,
      "learning_rate": 4.759596497117501e-06,
      "loss": 0.1621,
      "step": 1994
    },
    {
      "epoch": 0.1838116736536601,
      "grad_norm": 0.939985698078961,
      "learning_rate": 4.759270445458524e-06,
      "loss": 0.186,
      "step": 1995
    },
    {
      "epoch": 0.1839038098309301,
      "grad_norm": 0.9930713127947033,
      "learning_rate": 4.758944184026043e-06,
      "loss": 0.1755,
      "step": 1996
    },
    {
      "epoch": 0.18399594600820013,
      "grad_norm": 0.9060670257127519,
      "learning_rate": 4.758617712850352e-06,
      "loss": 0.1656,
      "step": 1997
    },
    {
      "epoch": 0.18408808218547013,
      "grad_norm": 0.9596014968387875,
      "learning_rate": 4.758291031961763e-06,
      "loss": 0.1774,
      "step": 1998
    },
    {
      "epoch": 0.18418021836274012,
      "grad_norm": 0.9071035450847232,
      "learning_rate": 4.757964141390609e-06,
      "loss": 0.1779,
      "step": 1999
    },
    {
      "epoch": 0.18427235454001015,
      "grad_norm": 0.916038591656752,
      "learning_rate": 4.75763704116724e-06,
      "loss": 0.1701,
      "step": 2000
    },
    {
      "epoch": 0.18427235454001015,
      "eval_loss": 0.1747845858335495,
      "eval_runtime": 299.1226,
      "eval_samples_per_second": 23.459,
      "eval_steps_per_second": 2.935,
      "step": 2000
    },
    {
      "epoch": 0.18436449071728014,
      "grad_norm": 0.9620227340872637,
      "learning_rate": 4.757309731322029e-06,
      "loss": 0.1766,
      "step": 2001
    },
    {
      "epoch": 0.18445662689455014,
      "grad_norm": 0.9936552481938895,
      "learning_rate": 4.756982211885368e-06,
      "loss": 0.185,
      "step": 2002
    },
    {
      "epoch": 0.18454876307182014,
      "grad_norm": 0.9243700651653634,
      "learning_rate": 4.756654482887665e-06,
      "loss": 0.1629,
      "step": 2003
    },
    {
      "epoch": 0.18464089924909016,
      "grad_norm": 0.9388336836899591,
      "learning_rate": 4.756326544359351e-06,
      "loss": 0.1677,
      "step": 2004
    },
    {
      "epoch": 0.18473303542636016,
      "grad_norm": 0.9691896762005906,
      "learning_rate": 4.7559983963308735e-06,
      "loss": 0.159,
      "step": 2005
    },
    {
      "epoch": 0.18482517160363016,
      "grad_norm": 0.91871871300768,
      "learning_rate": 4.755670038832703e-06,
      "loss": 0.1657,
      "step": 2006
    },
    {
      "epoch": 0.18491730778090018,
      "grad_norm": 0.959152484145046,
      "learning_rate": 4.755341471895325e-06,
      "loss": 0.1813,
      "step": 2007
    },
    {
      "epoch": 0.18500944395817018,
      "grad_norm": 0.9236567392647953,
      "learning_rate": 4.75501269554925e-06,
      "loss": 0.1655,
      "step": 2008
    },
    {
      "epoch": 0.18510158013544017,
      "grad_norm": 0.917760585288594,
      "learning_rate": 4.754683709825003e-06,
      "loss": 0.1762,
      "step": 2009
    },
    {
      "epoch": 0.1851937163127102,
      "grad_norm": 0.8881508952042282,
      "learning_rate": 4.7543545147531314e-06,
      "loss": 0.1677,
      "step": 2010
    },
    {
      "epoch": 0.1852858524899802,
      "grad_norm": 0.9048943834112922,
      "learning_rate": 4.754025110364201e-06,
      "loss": 0.1648,
      "step": 2011
    },
    {
      "epoch": 0.1853779886672502,
      "grad_norm": 0.9174376650297095,
      "learning_rate": 4.753695496688795e-06,
      "loss": 0.1641,
      "step": 2012
    },
    {
      "epoch": 0.1854701248445202,
      "grad_norm": 0.9517016421197806,
      "learning_rate": 4.753365673757521e-06,
      "loss": 0.1783,
      "step": 2013
    },
    {
      "epoch": 0.18556226102179021,
      "grad_norm": 0.9098170442182718,
      "learning_rate": 4.7530356416010004e-06,
      "loss": 0.1584,
      "step": 2014
    },
    {
      "epoch": 0.1856543971990602,
      "grad_norm": 0.9253465519598166,
      "learning_rate": 4.7527054002498785e-06,
      "loss": 0.1692,
      "step": 2015
    },
    {
      "epoch": 0.1857465333763302,
      "grad_norm": 0.9304349614009866,
      "learning_rate": 4.752374949734818e-06,
      "loss": 0.1764,
      "step": 2016
    },
    {
      "epoch": 0.18583866955360023,
      "grad_norm": 0.9380871995269432,
      "learning_rate": 4.752044290086501e-06,
      "loss": 0.174,
      "step": 2017
    },
    {
      "epoch": 0.18593080573087023,
      "grad_norm": 0.9037755594294037,
      "learning_rate": 4.75171342133563e-06,
      "loss": 0.1584,
      "step": 2018
    },
    {
      "epoch": 0.18602294190814023,
      "grad_norm": 0.9530499133921239,
      "learning_rate": 4.751382343512924e-06,
      "loss": 0.1765,
      "step": 2019
    },
    {
      "epoch": 0.18611507808541022,
      "grad_norm": 0.9837126600118509,
      "learning_rate": 4.751051056649126e-06,
      "loss": 0.1754,
      "step": 2020
    },
    {
      "epoch": 0.18620721426268025,
      "grad_norm": 0.9610159521960262,
      "learning_rate": 4.750719560774994e-06,
      "loss": 0.1713,
      "step": 2021
    },
    {
      "epoch": 0.18629935043995025,
      "grad_norm": 0.9192661443054622,
      "learning_rate": 4.75038785592131e-06,
      "loss": 0.1624,
      "step": 2022
    },
    {
      "epoch": 0.18639148661722024,
      "grad_norm": 0.9674669683439728,
      "learning_rate": 4.750055942118871e-06,
      "loss": 0.1772,
      "step": 2023
    },
    {
      "epoch": 0.18648362279449027,
      "grad_norm": 0.9778177795518106,
      "learning_rate": 4.749723819398496e-06,
      "loss": 0.1693,
      "step": 2024
    },
    {
      "epoch": 0.18657575897176026,
      "grad_norm": 0.9154309692494186,
      "learning_rate": 4.749391487791021e-06,
      "loss": 0.167,
      "step": 2025
    },
    {
      "epoch": 0.18666789514903026,
      "grad_norm": 0.9903773650928431,
      "learning_rate": 4.749058947327306e-06,
      "loss": 0.1675,
      "step": 2026
    },
    {
      "epoch": 0.18676003132630029,
      "grad_norm": 0.9298684751001484,
      "learning_rate": 4.7487261980382235e-06,
      "loss": 0.1686,
      "step": 2027
    },
    {
      "epoch": 0.18685216750357028,
      "grad_norm": 0.9751804305795381,
      "learning_rate": 4.748393239954674e-06,
      "loss": 0.1811,
      "step": 2028
    },
    {
      "epoch": 0.18694430368084028,
      "grad_norm": 0.916861137524208,
      "learning_rate": 4.748060073107568e-06,
      "loss": 0.1852,
      "step": 2029
    },
    {
      "epoch": 0.18703643985811028,
      "grad_norm": 0.9351359370165698,
      "learning_rate": 4.747726697527844e-06,
      "loss": 0.1744,
      "step": 2030
    },
    {
      "epoch": 0.1871285760353803,
      "grad_norm": 0.9126688237293801,
      "learning_rate": 4.747393113246453e-06,
      "loss": 0.1643,
      "step": 2031
    },
    {
      "epoch": 0.1872207122126503,
      "grad_norm": 0.9693557147267255,
      "learning_rate": 4.74705932029437e-06,
      "loss": 0.1742,
      "step": 2032
    },
    {
      "epoch": 0.1873128483899203,
      "grad_norm": 0.9757807922463323,
      "learning_rate": 4.746725318702587e-06,
      "loss": 0.166,
      "step": 2033
    },
    {
      "epoch": 0.18740498456719032,
      "grad_norm": 1.0355600426168787,
      "learning_rate": 4.746391108502116e-06,
      "loss": 0.1829,
      "step": 2034
    },
    {
      "epoch": 0.18749712074446032,
      "grad_norm": 0.9040436676391728,
      "learning_rate": 4.7460566897239905e-06,
      "loss": 0.1662,
      "step": 2035
    },
    {
      "epoch": 0.1875892569217303,
      "grad_norm": 1.0315787072138687,
      "learning_rate": 4.745722062399258e-06,
      "loss": 0.1904,
      "step": 2036
    },
    {
      "epoch": 0.1876813930990003,
      "grad_norm": 0.8845840894873757,
      "learning_rate": 4.745387226558991e-06,
      "loss": 0.1578,
      "step": 2037
    },
    {
      "epoch": 0.18777352927627033,
      "grad_norm": 0.9747265772515474,
      "learning_rate": 4.745052182234278e-06,
      "loss": 0.1845,
      "step": 2038
    },
    {
      "epoch": 0.18786566545354033,
      "grad_norm": 0.8418652499136062,
      "learning_rate": 4.744716929456229e-06,
      "loss": 0.1648,
      "step": 2039
    },
    {
      "epoch": 0.18795780163081033,
      "grad_norm": 0.9120095457085243,
      "learning_rate": 4.744381468255971e-06,
      "loss": 0.1719,
      "step": 2040
    },
    {
      "epoch": 0.18804993780808035,
      "grad_norm": 0.9253441991466294,
      "learning_rate": 4.7440457986646525e-06,
      "loss": 0.1741,
      "step": 2041
    },
    {
      "epoch": 0.18814207398535035,
      "grad_norm": 0.892395716534565,
      "learning_rate": 4.743709920713439e-06,
      "loss": 0.1623,
      "step": 2042
    },
    {
      "epoch": 0.18823421016262035,
      "grad_norm": 0.9443349478550868,
      "learning_rate": 4.743373834433519e-06,
      "loss": 0.1722,
      "step": 2043
    },
    {
      "epoch": 0.18832634633989037,
      "grad_norm": 0.9663983479939164,
      "learning_rate": 4.743037539856097e-06,
      "loss": 0.1874,
      "step": 2044
    },
    {
      "epoch": 0.18841848251716037,
      "grad_norm": 0.8949159341792653,
      "learning_rate": 4.742701037012397e-06,
      "loss": 0.1707,
      "step": 2045
    },
    {
      "epoch": 0.18851061869443037,
      "grad_norm": 0.9418778718283833,
      "learning_rate": 4.7423643259336656e-06,
      "loss": 0.1754,
      "step": 2046
    },
    {
      "epoch": 0.18860275487170036,
      "grad_norm": 0.9752328192488944,
      "learning_rate": 4.742027406651164e-06,
      "loss": 0.1647,
      "step": 2047
    },
    {
      "epoch": 0.1886948910489704,
      "grad_norm": 1.100330463505503,
      "learning_rate": 4.741690279196178e-06,
      "loss": 0.1866,
      "step": 2048
    },
    {
      "epoch": 0.18878702722624038,
      "grad_norm": 0.9272295304956722,
      "learning_rate": 4.741352943600007e-06,
      "loss": 0.1817,
      "step": 2049
    },
    {
      "epoch": 0.18887916340351038,
      "grad_norm": 0.9627565386987668,
      "learning_rate": 4.741015399893974e-06,
      "loss": 0.176,
      "step": 2050
    },
    {
      "epoch": 0.1889712995807804,
      "grad_norm": 1.00925483113557,
      "learning_rate": 4.740677648109421e-06,
      "loss": 0.1825,
      "step": 2051
    },
    {
      "epoch": 0.1890634357580504,
      "grad_norm": 0.8733725473410048,
      "learning_rate": 4.740339688277707e-06,
      "loss": 0.1708,
      "step": 2052
    },
    {
      "epoch": 0.1891555719353204,
      "grad_norm": 0.8996489754962257,
      "learning_rate": 4.7400015204302105e-06,
      "loss": 0.1527,
      "step": 2053
    },
    {
      "epoch": 0.1892477081125904,
      "grad_norm": 1.215286111800239,
      "learning_rate": 4.739663144598333e-06,
      "loss": 0.1734,
      "step": 2054
    },
    {
      "epoch": 0.18933984428986042,
      "grad_norm": 0.9496525339998125,
      "learning_rate": 4.739324560813491e-06,
      "loss": 0.1641,
      "step": 2055
    },
    {
      "epoch": 0.18943198046713042,
      "grad_norm": 1.02856117189786,
      "learning_rate": 4.738985769107123e-06,
      "loss": 0.2055,
      "step": 2056
    },
    {
      "epoch": 0.18952411664440041,
      "grad_norm": 0.9598011558340099,
      "learning_rate": 4.738646769510685e-06,
      "loss": 0.1707,
      "step": 2057
    },
    {
      "epoch": 0.18961625282167044,
      "grad_norm": 1.006354046062315,
      "learning_rate": 4.738307562055653e-06,
      "loss": 0.1777,
      "step": 2058
    },
    {
      "epoch": 0.18970838899894044,
      "grad_norm": 0.9327924509299893,
      "learning_rate": 4.737968146773524e-06,
      "loss": 0.1703,
      "step": 2059
    },
    {
      "epoch": 0.18980052517621043,
      "grad_norm": 0.9457591674178287,
      "learning_rate": 4.737628523695811e-06,
      "loss": 0.1727,
      "step": 2060
    },
    {
      "epoch": 0.18989266135348046,
      "grad_norm": 0.9430303988478719,
      "learning_rate": 4.737288692854049e-06,
      "loss": 0.1813,
      "step": 2061
    },
    {
      "epoch": 0.18998479753075045,
      "grad_norm": 0.9261016048878836,
      "learning_rate": 4.736948654279791e-06,
      "loss": 0.1772,
      "step": 2062
    },
    {
      "epoch": 0.19007693370802045,
      "grad_norm": 0.9157174430306279,
      "learning_rate": 4.73660840800461e-06,
      "loss": 0.1589,
      "step": 2063
    },
    {
      "epoch": 0.19016906988529045,
      "grad_norm": 0.9654944454044344,
      "learning_rate": 4.736267954060097e-06,
      "loss": 0.1712,
      "step": 2064
    },
    {
      "epoch": 0.19026120606256047,
      "grad_norm": 0.9651362786809399,
      "learning_rate": 4.735927292477864e-06,
      "loss": 0.1814,
      "step": 2065
    },
    {
      "epoch": 0.19035334223983047,
      "grad_norm": 0.9044877928218676,
      "learning_rate": 4.735586423289542e-06,
      "loss": 0.168,
      "step": 2066
    },
    {
      "epoch": 0.19044547841710047,
      "grad_norm": 0.9307633438458481,
      "learning_rate": 4.735245346526779e-06,
      "loss": 0.1755,
      "step": 2067
    },
    {
      "epoch": 0.1905376145943705,
      "grad_norm": 0.9901411171327417,
      "learning_rate": 4.734904062221246e-06,
      "loss": 0.1798,
      "step": 2068
    },
    {
      "epoch": 0.1906297507716405,
      "grad_norm": 0.9888693596561464,
      "learning_rate": 4.734562570404629e-06,
      "loss": 0.1725,
      "step": 2069
    },
    {
      "epoch": 0.19072188694891049,
      "grad_norm": 0.9139611246565976,
      "learning_rate": 4.734220871108638e-06,
      "loss": 0.1639,
      "step": 2070
    },
    {
      "epoch": 0.19081402312618048,
      "grad_norm": 0.9049028935278389,
      "learning_rate": 4.733878964364998e-06,
      "loss": 0.1762,
      "step": 2071
    },
    {
      "epoch": 0.1909061593034505,
      "grad_norm": 0.931420719373214,
      "learning_rate": 4.7335368502054564e-06,
      "loss": 0.1787,
      "step": 2072
    },
    {
      "epoch": 0.1909982954807205,
      "grad_norm": 0.9522269497391289,
      "learning_rate": 4.733194528661778e-06,
      "loss": 0.1751,
      "step": 2073
    },
    {
      "epoch": 0.1910904316579905,
      "grad_norm": 0.9549558070156487,
      "learning_rate": 4.732851999765747e-06,
      "loss": 0.1684,
      "step": 2074
    },
    {
      "epoch": 0.19118256783526053,
      "grad_norm": 0.9653026251701582,
      "learning_rate": 4.732509263549167e-06,
      "loss": 0.1713,
      "step": 2075
    },
    {
      "epoch": 0.19127470401253052,
      "grad_norm": 0.9395360707584086,
      "learning_rate": 4.732166320043862e-06,
      "loss": 0.1595,
      "step": 2076
    },
    {
      "epoch": 0.19136684018980052,
      "grad_norm": 0.9011041007700445,
      "learning_rate": 4.731823169281674e-06,
      "loss": 0.1726,
      "step": 2077
    },
    {
      "epoch": 0.19145897636707054,
      "grad_norm": 0.9982033798259724,
      "learning_rate": 4.731479811294464e-06,
      "loss": 0.1802,
      "step": 2078
    },
    {
      "epoch": 0.19155111254434054,
      "grad_norm": 0.9653945847759827,
      "learning_rate": 4.731136246114114e-06,
      "loss": 0.1786,
      "step": 2079
    },
    {
      "epoch": 0.19164324872161054,
      "grad_norm": 0.9572793258606881,
      "learning_rate": 4.730792473772523e-06,
      "loss": 0.17,
      "step": 2080
    },
    {
      "epoch": 0.19173538489888053,
      "grad_norm": 1.017565135723274,
      "learning_rate": 4.730448494301612e-06,
      "loss": 0.174,
      "step": 2081
    },
    {
      "epoch": 0.19182752107615056,
      "grad_norm": 0.9507693595346857,
      "learning_rate": 4.7301043077333165e-06,
      "loss": 0.1805,
      "step": 2082
    },
    {
      "epoch": 0.19191965725342056,
      "grad_norm": 0.9061693205241929,
      "learning_rate": 4.729759914099597e-06,
      "loss": 0.1636,
      "step": 2083
    },
    {
      "epoch": 0.19201179343069055,
      "grad_norm": 0.9924987247261973,
      "learning_rate": 4.729415313432429e-06,
      "loss": 0.17,
      "step": 2084
    },
    {
      "epoch": 0.19210392960796058,
      "grad_norm": 0.9801999793423163,
      "learning_rate": 4.729070505763809e-06,
      "loss": 0.1798,
      "step": 2085
    },
    {
      "epoch": 0.19219606578523057,
      "grad_norm": 0.9452879034672409,
      "learning_rate": 4.728725491125753e-06,
      "loss": 0.1726,
      "step": 2086
    },
    {
      "epoch": 0.19228820196250057,
      "grad_norm": 0.9947615608484313,
      "learning_rate": 4.728380269550296e-06,
      "loss": 0.1876,
      "step": 2087
    },
    {
      "epoch": 0.19238033813977057,
      "grad_norm": 1.0462479131548408,
      "learning_rate": 4.7280348410694905e-06,
      "loss": 0.1865,
      "step": 2088
    },
    {
      "epoch": 0.1924724743170406,
      "grad_norm": 0.9309592228183027,
      "learning_rate": 4.72768920571541e-06,
      "loss": 0.1876,
      "step": 2089
    },
    {
      "epoch": 0.1925646104943106,
      "grad_norm": 0.9042067215573306,
      "learning_rate": 4.727343363520147e-06,
      "loss": 0.1715,
      "step": 2090
    },
    {
      "epoch": 0.1926567466715806,
      "grad_norm": 0.9590467825615356,
      "learning_rate": 4.7269973145158134e-06,
      "loss": 0.168,
      "step": 2091
    },
    {
      "epoch": 0.1927488828488506,
      "grad_norm": 0.9612689987095125,
      "learning_rate": 4.7266510587345395e-06,
      "loss": 0.1712,
      "step": 2092
    },
    {
      "epoch": 0.1928410190261206,
      "grad_norm": 0.9089807471667561,
      "learning_rate": 4.726304596208475e-06,
      "loss": 0.1624,
      "step": 2093
    },
    {
      "epoch": 0.1929331552033906,
      "grad_norm": 0.8967926180786936,
      "learning_rate": 4.725957926969789e-06,
      "loss": 0.1564,
      "step": 2094
    },
    {
      "epoch": 0.19302529138066063,
      "grad_norm": 0.9384858000388413,
      "learning_rate": 4.72561105105067e-06,
      "loss": 0.169,
      "step": 2095
    },
    {
      "epoch": 0.19311742755793063,
      "grad_norm": 0.9604763558849385,
      "learning_rate": 4.7252639684833255e-06,
      "loss": 0.1687,
      "step": 2096
    },
    {
      "epoch": 0.19320956373520062,
      "grad_norm": 0.9183646669661314,
      "learning_rate": 4.724916679299982e-06,
      "loss": 0.1664,
      "step": 2097
    },
    {
      "epoch": 0.19330169991247062,
      "grad_norm": 0.917100174808335,
      "learning_rate": 4.7245691835328855e-06,
      "loss": 0.1676,
      "step": 2098
    },
    {
      "epoch": 0.19339383608974064,
      "grad_norm": 0.9365272475470837,
      "learning_rate": 4.724221481214301e-06,
      "loss": 0.1673,
      "step": 2099
    },
    {
      "epoch": 0.19348597226701064,
      "grad_norm": 1.0141662835470497,
      "learning_rate": 4.723873572376512e-06,
      "loss": 0.185,
      "step": 2100
    },
    {
      "epoch": 0.19357810844428064,
      "grad_norm": 0.9558839300005915,
      "learning_rate": 4.723525457051823e-06,
      "loss": 0.1795,
      "step": 2101
    },
    {
      "epoch": 0.19367024462155066,
      "grad_norm": 0.9411376137788801,
      "learning_rate": 4.723177135272556e-06,
      "loss": 0.1752,
      "step": 2102
    },
    {
      "epoch": 0.19376238079882066,
      "grad_norm": 0.8955626892165673,
      "learning_rate": 4.7228286070710525e-06,
      "loss": 0.1726,
      "step": 2103
    },
    {
      "epoch": 0.19385451697609066,
      "grad_norm": 0.9072698897943006,
      "learning_rate": 4.722479872479674e-06,
      "loss": 0.1588,
      "step": 2104
    },
    {
      "epoch": 0.19394665315336065,
      "grad_norm": 0.921780763256833,
      "learning_rate": 4.7221309315308e-06,
      "loss": 0.1765,
      "step": 2105
    },
    {
      "epoch": 0.19403878933063068,
      "grad_norm": 0.8677649999621102,
      "learning_rate": 4.721781784256829e-06,
      "loss": 0.1535,
      "step": 2106
    },
    {
      "epoch": 0.19413092550790068,
      "grad_norm": 1.0361240936326264,
      "learning_rate": 4.721432430690181e-06,
      "loss": 0.18,
      "step": 2107
    },
    {
      "epoch": 0.19422306168517067,
      "grad_norm": 1.0506692873586285,
      "learning_rate": 4.721082870863293e-06,
      "loss": 0.1744,
      "step": 2108
    },
    {
      "epoch": 0.1943151978624407,
      "grad_norm": 0.9632736944905299,
      "learning_rate": 4.720733104808621e-06,
      "loss": 0.1563,
      "step": 2109
    },
    {
      "epoch": 0.1944073340397107,
      "grad_norm": 0.9477773210722018,
      "learning_rate": 4.720383132558641e-06,
      "loss": 0.174,
      "step": 2110
    },
    {
      "epoch": 0.1944994702169807,
      "grad_norm": 1.105717006171067,
      "learning_rate": 4.720032954145849e-06,
      "loss": 0.1774,
      "step": 2111
    },
    {
      "epoch": 0.19459160639425072,
      "grad_norm": 0.9737476883844909,
      "learning_rate": 4.719682569602757e-06,
      "loss": 0.1692,
      "step": 2112
    },
    {
      "epoch": 0.1946837425715207,
      "grad_norm": 1.0352161292591884,
      "learning_rate": 4.7193319789619e-06,
      "loss": 0.1729,
      "step": 2113
    },
    {
      "epoch": 0.1947758787487907,
      "grad_norm": 0.893049222834855,
      "learning_rate": 4.718981182255831e-06,
      "loss": 0.167,
      "step": 2114
    },
    {
      "epoch": 0.1948680149260607,
      "grad_norm": 0.9409597426616632,
      "learning_rate": 4.71863017951712e-06,
      "loss": 0.1851,
      "step": 2115
    },
    {
      "epoch": 0.19496015110333073,
      "grad_norm": 0.932584725623311,
      "learning_rate": 4.718278970778357e-06,
      "loss": 0.1704,
      "step": 2116
    },
    {
      "epoch": 0.19505228728060073,
      "grad_norm": 0.9747757753227256,
      "learning_rate": 4.717927556072153e-06,
      "loss": 0.175,
      "step": 2117
    },
    {
      "epoch": 0.19514442345787072,
      "grad_norm": 0.964675319367432,
      "learning_rate": 4.717575935431138e-06,
      "loss": 0.1741,
      "step": 2118
    },
    {
      "epoch": 0.19523655963514075,
      "grad_norm": 1.0099187231751314,
      "learning_rate": 4.7172241088879575e-06,
      "loss": 0.1854,
      "step": 2119
    },
    {
      "epoch": 0.19532869581241075,
      "grad_norm": 0.9059186999651861,
      "learning_rate": 4.716872076475281e-06,
      "loss": 0.1786,
      "step": 2120
    },
    {
      "epoch": 0.19542083198968074,
      "grad_norm": 0.9383878112214665,
      "learning_rate": 4.7165198382257926e-06,
      "loss": 0.1691,
      "step": 2121
    },
    {
      "epoch": 0.19551296816695074,
      "grad_norm": 0.9390251394741452,
      "learning_rate": 4.716167394172198e-06,
      "loss": 0.1747,
      "step": 2122
    },
    {
      "epoch": 0.19560510434422076,
      "grad_norm": 0.9179061143840483,
      "learning_rate": 4.715814744347224e-06,
      "loss": 0.1699,
      "step": 2123
    },
    {
      "epoch": 0.19569724052149076,
      "grad_norm": 0.8956384202715454,
      "learning_rate": 4.715461888783612e-06,
      "loss": 0.1581,
      "step": 2124
    },
    {
      "epoch": 0.19578937669876076,
      "grad_norm": 0.9922412568775244,
      "learning_rate": 4.715108827514125e-06,
      "loss": 0.1852,
      "step": 2125
    },
    {
      "epoch": 0.19588151287603078,
      "grad_norm": 0.9201841509112736,
      "learning_rate": 4.714755560571545e-06,
      "loss": 0.1806,
      "step": 2126
    },
    {
      "epoch": 0.19597364905330078,
      "grad_norm": 0.9291530212197076,
      "learning_rate": 4.7144020879886736e-06,
      "loss": 0.1678,
      "step": 2127
    },
    {
      "epoch": 0.19606578523057078,
      "grad_norm": 0.8892646385384433,
      "learning_rate": 4.714048409798328e-06,
      "loss": 0.1588,
      "step": 2128
    },
    {
      "epoch": 0.1961579214078408,
      "grad_norm": 0.9578647584230859,
      "learning_rate": 4.713694526033351e-06,
      "loss": 0.1696,
      "step": 2129
    },
    {
      "epoch": 0.1962500575851108,
      "grad_norm": 0.9303263756088295,
      "learning_rate": 4.713340436726599e-06,
      "loss": 0.1876,
      "step": 2130
    },
    {
      "epoch": 0.1963421937623808,
      "grad_norm": 1.0143043159123855,
      "learning_rate": 4.712986141910948e-06,
      "loss": 0.1833,
      "step": 2131
    },
    {
      "epoch": 0.1964343299396508,
      "grad_norm": 1.0257791241751693,
      "learning_rate": 4.712631641619297e-06,
      "loss": 0.1813,
      "step": 2132
    },
    {
      "epoch": 0.19652646611692082,
      "grad_norm": 0.9326155949771587,
      "learning_rate": 4.7122769358845595e-06,
      "loss": 0.1834,
      "step": 2133
    },
    {
      "epoch": 0.19661860229419081,
      "grad_norm": 0.9501425332398825,
      "learning_rate": 4.71192202473967e-06,
      "loss": 0.1883,
      "step": 2134
    },
    {
      "epoch": 0.1967107384714608,
      "grad_norm": 0.92519734244431,
      "learning_rate": 4.711566908217583e-06,
      "loss": 0.1733,
      "step": 2135
    },
    {
      "epoch": 0.19680287464873084,
      "grad_norm": 0.9141559769713349,
      "learning_rate": 4.71121158635127e-06,
      "loss": 0.1693,
      "step": 2136
    },
    {
      "epoch": 0.19689501082600083,
      "grad_norm": 0.8635507856697013,
      "learning_rate": 4.710856059173723e-06,
      "loss": 0.1597,
      "step": 2137
    },
    {
      "epoch": 0.19698714700327083,
      "grad_norm": 0.9169944069705963,
      "learning_rate": 4.710500326717954e-06,
      "loss": 0.1789,
      "step": 2138
    },
    {
      "epoch": 0.19707928318054083,
      "grad_norm": 0.9519571879140262,
      "learning_rate": 4.7101443890169915e-06,
      "loss": 0.1681,
      "step": 2139
    },
    {
      "epoch": 0.19717141935781085,
      "grad_norm": 0.9514507211318625,
      "learning_rate": 4.7097882461038845e-06,
      "loss": 0.1725,
      "step": 2140
    },
    {
      "epoch": 0.19726355553508085,
      "grad_norm": 0.9761404726228229,
      "learning_rate": 4.7094318980117005e-06,
      "loss": 0.1759,
      "step": 2141
    },
    {
      "epoch": 0.19735569171235084,
      "grad_norm": 0.9768124223352279,
      "learning_rate": 4.709075344773527e-06,
      "loss": 0.179,
      "step": 2142
    },
    {
      "epoch": 0.19744782788962087,
      "grad_norm": 0.9305036698633852,
      "learning_rate": 4.70871858642247e-06,
      "loss": 0.1669,
      "step": 2143
    },
    {
      "epoch": 0.19753996406689087,
      "grad_norm": 0.9332637277566961,
      "learning_rate": 4.708361622991656e-06,
      "loss": 0.164,
      "step": 2144
    },
    {
      "epoch": 0.19763210024416086,
      "grad_norm": 0.9666366370430729,
      "learning_rate": 4.708004454514226e-06,
      "loss": 0.183,
      "step": 2145
    },
    {
      "epoch": 0.1977242364214309,
      "grad_norm": 0.9087507122980967,
      "learning_rate": 4.7076470810233455e-06,
      "loss": 0.1715,
      "step": 2146
    },
    {
      "epoch": 0.19781637259870088,
      "grad_norm": 0.9415921197844276,
      "learning_rate": 4.707289502552196e-06,
      "loss": 0.1791,
      "step": 2147
    },
    {
      "epoch": 0.19790850877597088,
      "grad_norm": 0.8989464774107121,
      "learning_rate": 4.706931719133978e-06,
      "loss": 0.1655,
      "step": 2148
    },
    {
      "epoch": 0.19800064495324088,
      "grad_norm": 1.0465683898896436,
      "learning_rate": 4.706573730801913e-06,
      "loss": 0.189,
      "step": 2149
    },
    {
      "epoch": 0.1980927811305109,
      "grad_norm": 0.920378805791493,
      "learning_rate": 4.706215537589239e-06,
      "loss": 0.1758,
      "step": 2150
    },
    {
      "epoch": 0.1981849173077809,
      "grad_norm": 0.8983349357724887,
      "learning_rate": 4.705857139529215e-06,
      "loss": 0.1497,
      "step": 2151
    },
    {
      "epoch": 0.1982770534850509,
      "grad_norm": 0.9580161374361805,
      "learning_rate": 4.705498536655119e-06,
      "loss": 0.1821,
      "step": 2152
    },
    {
      "epoch": 0.19836918966232092,
      "grad_norm": 0.9231662092252367,
      "learning_rate": 4.705139729000246e-06,
      "loss": 0.1692,
      "step": 2153
    },
    {
      "epoch": 0.19846132583959092,
      "grad_norm": 0.9511535860655734,
      "learning_rate": 4.704780716597912e-06,
      "loss": 0.177,
      "step": 2154
    },
    {
      "epoch": 0.19855346201686092,
      "grad_norm": 0.9592527774843944,
      "learning_rate": 4.7044214994814505e-06,
      "loss": 0.1872,
      "step": 2155
    },
    {
      "epoch": 0.1986455981941309,
      "grad_norm": 0.941224979580375,
      "learning_rate": 4.704062077684216e-06,
      "loss": 0.1692,
      "step": 2156
    },
    {
      "epoch": 0.19873773437140094,
      "grad_norm": 0.8864677413252431,
      "learning_rate": 4.703702451239582e-06,
      "loss": 0.1711,
      "step": 2157
    },
    {
      "epoch": 0.19882987054867093,
      "grad_norm": 0.9760104907836626,
      "learning_rate": 4.703342620180936e-06,
      "loss": 0.1891,
      "step": 2158
    },
    {
      "epoch": 0.19892200672594093,
      "grad_norm": 0.925158598268592,
      "learning_rate": 4.702982584541691e-06,
      "loss": 0.1695,
      "step": 2159
    },
    {
      "epoch": 0.19901414290321096,
      "grad_norm": 0.969472216082636,
      "learning_rate": 4.702622344355276e-06,
      "loss": 0.1771,
      "step": 2160
    },
    {
      "epoch": 0.19910627908048095,
      "grad_norm": 1.0508981404224167,
      "learning_rate": 4.702261899655139e-06,
      "loss": 0.177,
      "step": 2161
    },
    {
      "epoch": 0.19919841525775095,
      "grad_norm": 0.9847930314760027,
      "learning_rate": 4.701901250474748e-06,
      "loss": 0.1827,
      "step": 2162
    },
    {
      "epoch": 0.19929055143502097,
      "grad_norm": 0.966509026140173,
      "learning_rate": 4.70154039684759e-06,
      "loss": 0.1706,
      "step": 2163
    },
    {
      "epoch": 0.19938268761229097,
      "grad_norm": 1.0693603050159732,
      "learning_rate": 4.701179338807168e-06,
      "loss": 0.1866,
      "step": 2164
    },
    {
      "epoch": 0.19947482378956097,
      "grad_norm": 0.9131224498772273,
      "learning_rate": 4.7008180763870075e-06,
      "loss": 0.1569,
      "step": 2165
    },
    {
      "epoch": 0.19956695996683096,
      "grad_norm": 0.9114405522217581,
      "learning_rate": 4.700456609620652e-06,
      "loss": 0.1687,
      "step": 2166
    },
    {
      "epoch": 0.199659096144101,
      "grad_norm": 0.9712061010659172,
      "learning_rate": 4.700094938541664e-06,
      "loss": 0.169,
      "step": 2167
    },
    {
      "epoch": 0.199751232321371,
      "grad_norm": 0.9870353066400023,
      "learning_rate": 4.6997330631836235e-06,
      "loss": 0.1736,
      "step": 2168
    },
    {
      "epoch": 0.19984336849864098,
      "grad_norm": 1.0457732545812704,
      "learning_rate": 4.699370983580132e-06,
      "loss": 0.1676,
      "step": 2169
    },
    {
      "epoch": 0.199935504675911,
      "grad_norm": 0.9582776467618426,
      "learning_rate": 4.699008699764807e-06,
      "loss": 0.1622,
      "step": 2170
    },
    {
      "epoch": 0.200027640853181,
      "grad_norm": 0.924560477144988,
      "learning_rate": 4.698646211771287e-06,
      "loss": 0.1778,
      "step": 2171
    },
    {
      "epoch": 0.200119777030451,
      "grad_norm": 0.9603418732544574,
      "learning_rate": 4.698283519633231e-06,
      "loss": 0.173,
      "step": 2172
    },
    {
      "epoch": 0.200211913207721,
      "grad_norm": 0.9318117654352273,
      "learning_rate": 4.6979206233843136e-06,
      "loss": 0.1763,
      "step": 2173
    },
    {
      "epoch": 0.20030404938499102,
      "grad_norm": 0.8500574809360554,
      "learning_rate": 4.697557523058229e-06,
      "loss": 0.1612,
      "step": 2174
    },
    {
      "epoch": 0.20039618556226102,
      "grad_norm": 0.9321952438861371,
      "learning_rate": 4.6971942186886925e-06,
      "loss": 0.1657,
      "step": 2175
    },
    {
      "epoch": 0.20048832173953102,
      "grad_norm": 0.8878808978445922,
      "learning_rate": 4.696830710309437e-06,
      "loss": 0.1669,
      "step": 2176
    },
    {
      "epoch": 0.20058045791680104,
      "grad_norm": 0.979230897783306,
      "learning_rate": 4.696466997954212e-06,
      "loss": 0.1746,
      "step": 2177
    },
    {
      "epoch": 0.20067259409407104,
      "grad_norm": 0.9049009072181234,
      "learning_rate": 4.696103081656791e-06,
      "loss": 0.1701,
      "step": 2178
    },
    {
      "epoch": 0.20076473027134104,
      "grad_norm": 0.9108225412943807,
      "learning_rate": 4.695738961450962e-06,
      "loss": 0.1588,
      "step": 2179
    },
    {
      "epoch": 0.20085686644861106,
      "grad_norm": 0.9517811181905188,
      "learning_rate": 4.695374637370534e-06,
      "loss": 0.173,
      "step": 2180
    },
    {
      "epoch": 0.20094900262588106,
      "grad_norm": 0.9484256391007594,
      "learning_rate": 4.695010109449335e-06,
      "loss": 0.174,
      "step": 2181
    },
    {
      "epoch": 0.20104113880315105,
      "grad_norm": 0.9833242975609692,
      "learning_rate": 4.694645377721211e-06,
      "loss": 0.1824,
      "step": 2182
    },
    {
      "epoch": 0.20113327498042105,
      "grad_norm": 0.9384489452108485,
      "learning_rate": 4.694280442220027e-06,
      "loss": 0.1606,
      "step": 2183
    },
    {
      "epoch": 0.20122541115769108,
      "grad_norm": 0.9458788028655759,
      "learning_rate": 4.693915302979669e-06,
      "loss": 0.1865,
      "step": 2184
    },
    {
      "epoch": 0.20131754733496107,
      "grad_norm": 0.9715149119245432,
      "learning_rate": 4.693549960034038e-06,
      "loss": 0.1758,
      "step": 2185
    },
    {
      "epoch": 0.20140968351223107,
      "grad_norm": 0.8895745071235284,
      "learning_rate": 4.693184413417058e-06,
      "loss": 0.1652,
      "step": 2186
    },
    {
      "epoch": 0.2015018196895011,
      "grad_norm": 0.9179333420101142,
      "learning_rate": 4.692818663162668e-06,
      "loss": 0.1668,
      "step": 2187
    },
    {
      "epoch": 0.2015939558667711,
      "grad_norm": 1.0524495392370385,
      "learning_rate": 4.69245270930483e-06,
      "loss": 0.1626,
      "step": 2188
    },
    {
      "epoch": 0.2016860920440411,
      "grad_norm": 0.9301977089994403,
      "learning_rate": 4.6920865518775214e-06,
      "loss": 0.161,
      "step": 2189
    },
    {
      "epoch": 0.20177822822131108,
      "grad_norm": 0.9799561176585696,
      "learning_rate": 4.6917201909147415e-06,
      "loss": 0.1838,
      "step": 2190
    },
    {
      "epoch": 0.2018703643985811,
      "grad_norm": 0.9996105143573751,
      "learning_rate": 4.691353626450505e-06,
      "loss": 0.1726,
      "step": 2191
    },
    {
      "epoch": 0.2019625005758511,
      "grad_norm": 1.025059301876579,
      "learning_rate": 4.690986858518849e-06,
      "loss": 0.1822,
      "step": 2192
    },
    {
      "epoch": 0.2020546367531211,
      "grad_norm": 0.9165762821501996,
      "learning_rate": 4.6906198871538265e-06,
      "loss": 0.1639,
      "step": 2193
    },
    {
      "epoch": 0.20214677293039113,
      "grad_norm": 1.0481638600088068,
      "learning_rate": 4.690252712389513e-06,
      "loss": 0.1855,
      "step": 2194
    },
    {
      "epoch": 0.20223890910766112,
      "grad_norm": 0.9492628877870042,
      "learning_rate": 4.6898853342599994e-06,
      "loss": 0.1567,
      "step": 2195
    },
    {
      "epoch": 0.20233104528493112,
      "grad_norm": 1.02721472974448,
      "learning_rate": 4.689517752799396e-06,
      "loss": 0.1904,
      "step": 2196
    },
    {
      "epoch": 0.20242318146220115,
      "grad_norm": 0.9749242814804707,
      "learning_rate": 4.689149968041834e-06,
      "loss": 0.1844,
      "step": 2197
    },
    {
      "epoch": 0.20251531763947114,
      "grad_norm": 1.0038104998977666,
      "learning_rate": 4.6887819800214615e-06,
      "loss": 0.1743,
      "step": 2198
    },
    {
      "epoch": 0.20260745381674114,
      "grad_norm": 1.005188089752521,
      "learning_rate": 4.688413788772447e-06,
      "loss": 0.1788,
      "step": 2199
    },
    {
      "epoch": 0.20269958999401114,
      "grad_norm": 0.9150762628236749,
      "learning_rate": 4.688045394328976e-06,
      "loss": 0.1737,
      "step": 2200
    },
    {
      "epoch": 0.20279172617128116,
      "grad_norm": 0.9398977054241704,
      "learning_rate": 4.687676796725256e-06,
      "loss": 0.1634,
      "step": 2201
    },
    {
      "epoch": 0.20288386234855116,
      "grad_norm": 0.9427101062525798,
      "learning_rate": 4.687307995995509e-06,
      "loss": 0.1661,
      "step": 2202
    },
    {
      "epoch": 0.20297599852582116,
      "grad_norm": 0.9673031921019366,
      "learning_rate": 4.68693899217398e-06,
      "loss": 0.1687,
      "step": 2203
    },
    {
      "epoch": 0.20306813470309118,
      "grad_norm": 0.9799184390341139,
      "learning_rate": 4.6865697852949285e-06,
      "loss": 0.1661,
      "step": 2204
    },
    {
      "epoch": 0.20316027088036118,
      "grad_norm": 0.9799685128302213,
      "learning_rate": 4.686200375392639e-06,
      "loss": 0.182,
      "step": 2205
    },
    {
      "epoch": 0.20325240705763117,
      "grad_norm": 0.9174324923130279,
      "learning_rate": 4.6858307625014084e-06,
      "loss": 0.1579,
      "step": 2206
    },
    {
      "epoch": 0.20334454323490117,
      "grad_norm": 0.9515024330765529,
      "learning_rate": 4.685460946655556e-06,
      "loss": 0.1784,
      "step": 2207
    },
    {
      "epoch": 0.2034366794121712,
      "grad_norm": 1.0194734725774568,
      "learning_rate": 4.68509092788942e-06,
      "loss": 0.1754,
      "step": 2208
    },
    {
      "epoch": 0.2035288155894412,
      "grad_norm": 0.9333275447322845,
      "learning_rate": 4.684720706237356e-06,
      "loss": 0.1666,
      "step": 2209
    },
    {
      "epoch": 0.2036209517667112,
      "grad_norm": 0.90507569208577,
      "learning_rate": 4.68435028173374e-06,
      "loss": 0.1643,
      "step": 2210
    },
    {
      "epoch": 0.2037130879439812,
      "grad_norm": 1.047547987412547,
      "learning_rate": 4.683979654412965e-06,
      "loss": 0.1965,
      "step": 2211
    },
    {
      "epoch": 0.2038052241212512,
      "grad_norm": 0.8708624834050619,
      "learning_rate": 4.683608824309443e-06,
      "loss": 0.1751,
      "step": 2212
    },
    {
      "epoch": 0.2038973602985212,
      "grad_norm": 0.9282192492999448,
      "learning_rate": 4.683237791457608e-06,
      "loss": 0.1772,
      "step": 2213
    },
    {
      "epoch": 0.20398949647579123,
      "grad_norm": 0.9310633455335762,
      "learning_rate": 4.682866555891908e-06,
      "loss": 0.1808,
      "step": 2214
    },
    {
      "epoch": 0.20408163265306123,
      "grad_norm": 0.8971755268118722,
      "learning_rate": 4.6824951176468134e-06,
      "loss": 0.1652,
      "step": 2215
    },
    {
      "epoch": 0.20417376883033123,
      "grad_norm": 0.9276435656284576,
      "learning_rate": 4.682123476756813e-06,
      "loss": 0.1685,
      "step": 2216
    },
    {
      "epoch": 0.20426590500760122,
      "grad_norm": 0.8795940253862768,
      "learning_rate": 4.681751633256413e-06,
      "loss": 0.1668,
      "step": 2217
    },
    {
      "epoch": 0.20435804118487125,
      "grad_norm": 0.9207697016900108,
      "learning_rate": 4.681379587180138e-06,
      "loss": 0.175,
      "step": 2218
    },
    {
      "epoch": 0.20445017736214124,
      "grad_norm": 0.9294762082235091,
      "learning_rate": 4.681007338562535e-06,
      "loss": 0.1796,
      "step": 2219
    },
    {
      "epoch": 0.20454231353941124,
      "grad_norm": 0.9734872584164359,
      "learning_rate": 4.680634887438165e-06,
      "loss": 0.1733,
      "step": 2220
    },
    {
      "epoch": 0.20463444971668127,
      "grad_norm": 0.924133430170834,
      "learning_rate": 4.6802622338416115e-06,
      "loss": 0.161,
      "step": 2221
    },
    {
      "epoch": 0.20472658589395126,
      "grad_norm": 0.8720069613724715,
      "learning_rate": 4.679889377807475e-06,
      "loss": 0.1514,
      "step": 2222
    },
    {
      "epoch": 0.20481872207122126,
      "grad_norm": 0.8737950896382805,
      "learning_rate": 4.679516319370374e-06,
      "loss": 0.1527,
      "step": 2223
    },
    {
      "epoch": 0.20491085824849126,
      "grad_norm": 0.9553532936958365,
      "learning_rate": 4.679143058564949e-06,
      "loss": 0.1844,
      "step": 2224
    },
    {
      "epoch": 0.20500299442576128,
      "grad_norm": 1.0052618454260092,
      "learning_rate": 4.678769595425856e-06,
      "loss": 0.1941,
      "step": 2225
    },
    {
      "epoch": 0.20509513060303128,
      "grad_norm": 0.9698993550808772,
      "learning_rate": 4.6783959299877725e-06,
      "loss": 0.1606,
      "step": 2226
    },
    {
      "epoch": 0.20518726678030128,
      "grad_norm": 0.9874980391024155,
      "learning_rate": 4.678022062285392e-06,
      "loss": 0.1753,
      "step": 2227
    },
    {
      "epoch": 0.2052794029575713,
      "grad_norm": 0.9071206997289588,
      "learning_rate": 4.677647992353428e-06,
      "loss": 0.1654,
      "step": 2228
    },
    {
      "epoch": 0.2053715391348413,
      "grad_norm": 0.9335882793666567,
      "learning_rate": 4.677273720226615e-06,
      "loss": 0.1701,
      "step": 2229
    },
    {
      "epoch": 0.2054636753121113,
      "grad_norm": 0.8977947704353159,
      "learning_rate": 4.6768992459397015e-06,
      "loss": 0.1505,
      "step": 2230
    },
    {
      "epoch": 0.20555581148938132,
      "grad_norm": 0.9250319869015167,
      "learning_rate": 4.67652456952746e-06,
      "loss": 0.1564,
      "step": 2231
    },
    {
      "epoch": 0.20564794766665132,
      "grad_norm": 0.9457739881833873,
      "learning_rate": 4.6761496910246766e-06,
      "loss": 0.1708,
      "step": 2232
    },
    {
      "epoch": 0.2057400838439213,
      "grad_norm": 0.8952312643772637,
      "learning_rate": 4.6757746104661606e-06,
      "loss": 0.159,
      "step": 2233
    },
    {
      "epoch": 0.2058322200211913,
      "grad_norm": 0.9126755394162147,
      "learning_rate": 4.675399327886738e-06,
      "loss": 0.1598,
      "step": 2234
    },
    {
      "epoch": 0.20592435619846133,
      "grad_norm": 0.9376971680289053,
      "learning_rate": 4.675023843321254e-06,
      "loss": 0.1645,
      "step": 2235
    },
    {
      "epoch": 0.20601649237573133,
      "grad_norm": 0.9812020676495905,
      "learning_rate": 4.674648156804571e-06,
      "loss": 0.1508,
      "step": 2236
    },
    {
      "epoch": 0.20610862855300133,
      "grad_norm": 1.0616534309734076,
      "learning_rate": 4.674272268371574e-06,
      "loss": 0.177,
      "step": 2237
    },
    {
      "epoch": 0.20620076473027135,
      "grad_norm": 0.9902707787686815,
      "learning_rate": 4.673896178057162e-06,
      "loss": 0.1725,
      "step": 2238
    },
    {
      "epoch": 0.20629290090754135,
      "grad_norm": 0.9329391926904818,
      "learning_rate": 4.673519885896256e-06,
      "loss": 0.182,
      "step": 2239
    },
    {
      "epoch": 0.20638503708481135,
      "grad_norm": 0.9761864416718115,
      "learning_rate": 4.673143391923794e-06,
      "loss": 0.1788,
      "step": 2240
    },
    {
      "epoch": 0.20647717326208134,
      "grad_norm": 0.9435942541496123,
      "learning_rate": 4.672766696174736e-06,
      "loss": 0.1664,
      "step": 2241
    },
    {
      "epoch": 0.20656930943935137,
      "grad_norm": 0.8962267290534224,
      "learning_rate": 4.672389798684055e-06,
      "loss": 0.173,
      "step": 2242
    },
    {
      "epoch": 0.20666144561662136,
      "grad_norm": 0.9259279692548622,
      "learning_rate": 4.672012699486748e-06,
      "loss": 0.1722,
      "step": 2243
    },
    {
      "epoch": 0.20675358179389136,
      "grad_norm": 0.9124221318809105,
      "learning_rate": 4.671635398617828e-06,
      "loss": 0.1616,
      "step": 2244
    },
    {
      "epoch": 0.20684571797116139,
      "grad_norm": 0.8905141403926174,
      "learning_rate": 4.671257896112327e-06,
      "loss": 0.1604,
      "step": 2245
    },
    {
      "epoch": 0.20693785414843138,
      "grad_norm": 0.9232672218716768,
      "learning_rate": 4.670880192005298e-06,
      "loss": 0.165,
      "step": 2246
    },
    {
      "epoch": 0.20702999032570138,
      "grad_norm": 0.9323052407767077,
      "learning_rate": 4.670502286331809e-06,
      "loss": 0.1805,
      "step": 2247
    },
    {
      "epoch": 0.2071221265029714,
      "grad_norm": 0.9634973629389704,
      "learning_rate": 4.670124179126948e-06,
      "loss": 0.1761,
      "step": 2248
    },
    {
      "epoch": 0.2072142626802414,
      "grad_norm": 0.9278126931365306,
      "learning_rate": 4.669745870425824e-06,
      "loss": 0.172,
      "step": 2249
    },
    {
      "epoch": 0.2073063988575114,
      "grad_norm": 0.849220352694767,
      "learning_rate": 4.669367360263563e-06,
      "loss": 0.1517,
      "step": 2250
    },
    {
      "epoch": 0.2073985350347814,
      "grad_norm": 0.9132996602721539,
      "learning_rate": 4.668988648675309e-06,
      "loss": 0.164,
      "step": 2251
    },
    {
      "epoch": 0.20749067121205142,
      "grad_norm": 0.8700626009502423,
      "learning_rate": 4.668609735696225e-06,
      "loss": 0.1628,
      "step": 2252
    },
    {
      "epoch": 0.20758280738932142,
      "grad_norm": 0.9505619550445175,
      "learning_rate": 4.668230621361494e-06,
      "loss": 0.1642,
      "step": 2253
    },
    {
      "epoch": 0.2076749435665914,
      "grad_norm": 0.9806376626717213,
      "learning_rate": 4.667851305706316e-06,
      "loss": 0.1875,
      "step": 2254
    },
    {
      "epoch": 0.20776707974386144,
      "grad_norm": 0.952808410032759,
      "learning_rate": 4.667471788765911e-06,
      "loss": 0.169,
      "step": 2255
    },
    {
      "epoch": 0.20785921592113143,
      "grad_norm": 1.0121652933247183,
      "learning_rate": 4.667092070575518e-06,
      "loss": 0.1769,
      "step": 2256
    },
    {
      "epoch": 0.20795135209840143,
      "grad_norm": 0.8796668160626014,
      "learning_rate": 4.666712151170392e-06,
      "loss": 0.151,
      "step": 2257
    },
    {
      "epoch": 0.20804348827567143,
      "grad_norm": 0.9275469318790555,
      "learning_rate": 4.6663320305858106e-06,
      "loss": 0.1674,
      "step": 2258
    },
    {
      "epoch": 0.20813562445294145,
      "grad_norm": 0.9732098865299389,
      "learning_rate": 4.665951708857066e-06,
      "loss": 0.1701,
      "step": 2259
    },
    {
      "epoch": 0.20822776063021145,
      "grad_norm": 0.9026738441040866,
      "learning_rate": 4.665571186019473e-06,
      "loss": 0.159,
      "step": 2260
    },
    {
      "epoch": 0.20831989680748145,
      "grad_norm": 0.9593697452130802,
      "learning_rate": 4.665190462108362e-06,
      "loss": 0.1625,
      "step": 2261
    },
    {
      "epoch": 0.20841203298475147,
      "grad_norm": 1.0499408869674476,
      "learning_rate": 4.664809537159084e-06,
      "loss": 0.179,
      "step": 2262
    },
    {
      "epoch": 0.20850416916202147,
      "grad_norm": 0.9468448196539467,
      "learning_rate": 4.664428411207007e-06,
      "loss": 0.1548,
      "step": 2263
    },
    {
      "epoch": 0.20859630533929147,
      "grad_norm": 0.9702256795456764,
      "learning_rate": 4.664047084287518e-06,
      "loss": 0.171,
      "step": 2264
    },
    {
      "epoch": 0.2086884415165615,
      "grad_norm": 0.98262891832084,
      "learning_rate": 4.663665556436025e-06,
      "loss": 0.1658,
      "step": 2265
    },
    {
      "epoch": 0.2087805776938315,
      "grad_norm": 0.9745838646564671,
      "learning_rate": 4.663283827687953e-06,
      "loss": 0.1773,
      "step": 2266
    },
    {
      "epoch": 0.20887271387110148,
      "grad_norm": 0.9604527581018515,
      "learning_rate": 4.662901898078746e-06,
      "loss": 0.1737,
      "step": 2267
    },
    {
      "epoch": 0.20896485004837148,
      "grad_norm": 0.9844126821220404,
      "learning_rate": 4.662519767643863e-06,
      "loss": 0.1678,
      "step": 2268
    },
    {
      "epoch": 0.2090569862256415,
      "grad_norm": 0.957428150635439,
      "learning_rate": 4.662137436418786e-06,
      "loss": 0.1804,
      "step": 2269
    },
    {
      "epoch": 0.2091491224029115,
      "grad_norm": 0.9349317076208526,
      "learning_rate": 4.661754904439018e-06,
      "loss": 0.177,
      "step": 2270
    },
    {
      "epoch": 0.2092412585801815,
      "grad_norm": 0.9298674765899402,
      "learning_rate": 4.661372171740073e-06,
      "loss": 0.1702,
      "step": 2271
    },
    {
      "epoch": 0.20933339475745152,
      "grad_norm": 0.9608961443235141,
      "learning_rate": 4.660989238357489e-06,
      "loss": 0.1725,
      "step": 2272
    },
    {
      "epoch": 0.20942553093472152,
      "grad_norm": 0.9192686971637103,
      "learning_rate": 4.660606104326822e-06,
      "loss": 0.1676,
      "step": 2273
    },
    {
      "epoch": 0.20951766711199152,
      "grad_norm": 0.9484587340929831,
      "learning_rate": 4.660222769683645e-06,
      "loss": 0.1689,
      "step": 2274
    },
    {
      "epoch": 0.20960980328926151,
      "grad_norm": 0.9166918556649847,
      "learning_rate": 4.659839234463552e-06,
      "loss": 0.1666,
      "step": 2275
    },
    {
      "epoch": 0.20970193946653154,
      "grad_norm": 0.9401238964664081,
      "learning_rate": 4.659455498702154e-06,
      "loss": 0.1676,
      "step": 2276
    },
    {
      "epoch": 0.20979407564380154,
      "grad_norm": 0.9401025662602998,
      "learning_rate": 4.65907156243508e-06,
      "loss": 0.1778,
      "step": 2277
    },
    {
      "epoch": 0.20988621182107153,
      "grad_norm": 0.9752374373591519,
      "learning_rate": 4.65868742569798e-06,
      "loss": 0.1672,
      "step": 2278
    },
    {
      "epoch": 0.20997834799834156,
      "grad_norm": 0.9809510171831634,
      "learning_rate": 4.658303088526519e-06,
      "loss": 0.184,
      "step": 2279
    },
    {
      "epoch": 0.21007048417561155,
      "grad_norm": 0.8925811679962771,
      "learning_rate": 4.657918550956384e-06,
      "loss": 0.1697,
      "step": 2280
    },
    {
      "epoch": 0.21016262035288155,
      "grad_norm": 0.9315080343767318,
      "learning_rate": 4.65753381302328e-06,
      "loss": 0.1737,
      "step": 2281
    },
    {
      "epoch": 0.21025475653015158,
      "grad_norm": 0.8436564944976962,
      "learning_rate": 4.657148874762929e-06,
      "loss": 0.1501,
      "step": 2282
    },
    {
      "epoch": 0.21034689270742157,
      "grad_norm": 0.9222611884806714,
      "learning_rate": 4.656763736211073e-06,
      "loss": 0.17,
      "step": 2283
    },
    {
      "epoch": 0.21043902888469157,
      "grad_norm": 0.9276655656851056,
      "learning_rate": 4.656378397403472e-06,
      "loss": 0.1628,
      "step": 2284
    },
    {
      "epoch": 0.21053116506196157,
      "grad_norm": 0.9146059613952714,
      "learning_rate": 4.655992858375904e-06,
      "loss": 0.17,
      "step": 2285
    },
    {
      "epoch": 0.2106233012392316,
      "grad_norm": 0.9490196294298086,
      "learning_rate": 4.655607119164168e-06,
      "loss": 0.1624,
      "step": 2286
    },
    {
      "epoch": 0.2107154374165016,
      "grad_norm": 0.9463987771147194,
      "learning_rate": 4.655221179804078e-06,
      "loss": 0.1729,
      "step": 2287
    },
    {
      "epoch": 0.21080757359377159,
      "grad_norm": 0.9384821744611376,
      "learning_rate": 4.65483504033147e-06,
      "loss": 0.1709,
      "step": 2288
    },
    {
      "epoch": 0.2108997097710416,
      "grad_norm": 0.9991342293889612,
      "learning_rate": 4.654448700782197e-06,
      "loss": 0.173,
      "step": 2289
    },
    {
      "epoch": 0.2109918459483116,
      "grad_norm": 0.9473308951074153,
      "learning_rate": 4.65406216119213e-06,
      "loss": 0.1816,
      "step": 2290
    },
    {
      "epoch": 0.2110839821255816,
      "grad_norm": 0.9682808078755373,
      "learning_rate": 4.653675421597159e-06,
      "loss": 0.1666,
      "step": 2291
    },
    {
      "epoch": 0.2111761183028516,
      "grad_norm": 0.9236695196106048,
      "learning_rate": 4.653288482033194e-06,
      "loss": 0.1725,
      "step": 2292
    },
    {
      "epoch": 0.21126825448012163,
      "grad_norm": 0.9706887089132648,
      "learning_rate": 4.652901342536162e-06,
      "loss": 0.1893,
      "step": 2293
    },
    {
      "epoch": 0.21136039065739162,
      "grad_norm": 1.0069353159222993,
      "learning_rate": 4.652514003142008e-06,
      "loss": 0.1593,
      "step": 2294
    },
    {
      "epoch": 0.21145252683466162,
      "grad_norm": 1.0053797728020337,
      "learning_rate": 4.652126463886697e-06,
      "loss": 0.1876,
      "step": 2295
    },
    {
      "epoch": 0.21154466301193164,
      "grad_norm": 0.9440259393861917,
      "learning_rate": 4.651738724806213e-06,
      "loss": 0.1547,
      "step": 2296
    },
    {
      "epoch": 0.21163679918920164,
      "grad_norm": 0.8471903564795712,
      "learning_rate": 4.651350785936556e-06,
      "loss": 0.1456,
      "step": 2297
    },
    {
      "epoch": 0.21172893536647164,
      "grad_norm": 0.9864966435131894,
      "learning_rate": 4.650962647313747e-06,
      "loss": 0.1737,
      "step": 2298
    },
    {
      "epoch": 0.21182107154374166,
      "grad_norm": 1.018468284517782,
      "learning_rate": 4.650574308973826e-06,
      "loss": 0.1832,
      "step": 2299
    },
    {
      "epoch": 0.21191320772101166,
      "grad_norm": 1.0098138817405005,
      "learning_rate": 4.6501857709528475e-06,
      "loss": 0.1904,
      "step": 2300
    },
    {
      "epoch": 0.21200534389828166,
      "grad_norm": 1.0461555531592517,
      "learning_rate": 4.649797033286889e-06,
      "loss": 0.1821,
      "step": 2301
    },
    {
      "epoch": 0.21209748007555165,
      "grad_norm": 0.9609524234503527,
      "learning_rate": 4.6494080960120444e-06,
      "loss": 0.1708,
      "step": 2302
    },
    {
      "epoch": 0.21218961625282168,
      "grad_norm": 0.8956399373758597,
      "learning_rate": 4.6490189591644274e-06,
      "loss": 0.1596,
      "step": 2303
    },
    {
      "epoch": 0.21228175243009167,
      "grad_norm": 0.9086186604621006,
      "learning_rate": 4.648629622780169e-06,
      "loss": 0.1742,
      "step": 2304
    },
    {
      "epoch": 0.21237388860736167,
      "grad_norm": 0.9054650267010966,
      "learning_rate": 4.648240086895418e-06,
      "loss": 0.1585,
      "step": 2305
    },
    {
      "epoch": 0.2124660247846317,
      "grad_norm": 0.973003929436934,
      "learning_rate": 4.647850351546345e-06,
      "loss": 0.1628,
      "step": 2306
    },
    {
      "epoch": 0.2125581609619017,
      "grad_norm": 0.920435551963916,
      "learning_rate": 4.647460416769134e-06,
      "loss": 0.1639,
      "step": 2307
    },
    {
      "epoch": 0.2126502971391717,
      "grad_norm": 1.010065351946995,
      "learning_rate": 4.647070282599994e-06,
      "loss": 0.171,
      "step": 2308
    },
    {
      "epoch": 0.2127424333164417,
      "grad_norm": 1.0122412359159743,
      "learning_rate": 4.646679949075146e-06,
      "loss": 0.1932,
      "step": 2309
    },
    {
      "epoch": 0.2128345694937117,
      "grad_norm": 0.9360254209298183,
      "learning_rate": 4.646289416230834e-06,
      "loss": 0.1694,
      "step": 2310
    },
    {
      "epoch": 0.2129267056709817,
      "grad_norm": 1.0354873116708483,
      "learning_rate": 4.645898684103318e-06,
      "loss": 0.1771,
      "step": 2311
    },
    {
      "epoch": 0.2130188418482517,
      "grad_norm": 0.9613647007080832,
      "learning_rate": 4.6455077527288795e-06,
      "loss": 0.18,
      "step": 2312
    },
    {
      "epoch": 0.21311097802552173,
      "grad_norm": 0.9226544949065173,
      "learning_rate": 4.6451166221438145e-06,
      "loss": 0.1657,
      "step": 2313
    },
    {
      "epoch": 0.21320311420279173,
      "grad_norm": 1.0212738272454622,
      "learning_rate": 4.644725292384441e-06,
      "loss": 0.1681,
      "step": 2314
    },
    {
      "epoch": 0.21329525038006172,
      "grad_norm": 0.9275529095731044,
      "learning_rate": 4.6443337634870926e-06,
      "loss": 0.1597,
      "step": 2315
    },
    {
      "epoch": 0.21338738655733175,
      "grad_norm": 0.9251203990872005,
      "learning_rate": 4.643942035488123e-06,
      "loss": 0.1665,
      "step": 2316
    },
    {
      "epoch": 0.21347952273460175,
      "grad_norm": 0.8799100754109026,
      "learning_rate": 4.643550108423905e-06,
      "loss": 0.1609,
      "step": 2317
    },
    {
      "epoch": 0.21357165891187174,
      "grad_norm": 0.9864751850142733,
      "learning_rate": 4.64315798233083e-06,
      "loss": 0.1724,
      "step": 2318
    },
    {
      "epoch": 0.21366379508914174,
      "grad_norm": 0.9305526319557794,
      "learning_rate": 4.642765657245304e-06,
      "loss": 0.1703,
      "step": 2319
    },
    {
      "epoch": 0.21375593126641176,
      "grad_norm": 0.9165264859623368,
      "learning_rate": 4.642373133203757e-06,
      "loss": 0.1597,
      "step": 2320
    },
    {
      "epoch": 0.21384806744368176,
      "grad_norm": 0.9499805101389946,
      "learning_rate": 4.641980410242634e-06,
      "loss": 0.1678,
      "step": 2321
    },
    {
      "epoch": 0.21394020362095176,
      "grad_norm": 0.9481987039539749,
      "learning_rate": 4.6415874883983995e-06,
      "loss": 0.1672,
      "step": 2322
    },
    {
      "epoch": 0.21403233979822178,
      "grad_norm": 0.8697561079038395,
      "learning_rate": 4.641194367707535e-06,
      "loss": 0.1636,
      "step": 2323
    },
    {
      "epoch": 0.21412447597549178,
      "grad_norm": 0.9583502223016455,
      "learning_rate": 4.640801048206545e-06,
      "loss": 0.1702,
      "step": 2324
    },
    {
      "epoch": 0.21421661215276178,
      "grad_norm": 0.9973479221639489,
      "learning_rate": 4.6404075299319465e-06,
      "loss": 0.1651,
      "step": 2325
    },
    {
      "epoch": 0.2143087483300318,
      "grad_norm": 0.8790869012185121,
      "learning_rate": 4.640013812920278e-06,
      "loss": 0.1584,
      "step": 2326
    },
    {
      "epoch": 0.2144008845073018,
      "grad_norm": 0.8205862861509154,
      "learning_rate": 4.639619897208097e-06,
      "loss": 0.1542,
      "step": 2327
    },
    {
      "epoch": 0.2144930206845718,
      "grad_norm": 0.970252112224293,
      "learning_rate": 4.639225782831978e-06,
      "loss": 0.1679,
      "step": 2328
    },
    {
      "epoch": 0.2145851568618418,
      "grad_norm": 0.9452446931266928,
      "learning_rate": 4.638831469828515e-06,
      "loss": 0.1701,
      "step": 2329
    },
    {
      "epoch": 0.21467729303911182,
      "grad_norm": 0.9891248909941764,
      "learning_rate": 4.638436958234321e-06,
      "loss": 0.1851,
      "step": 2330
    },
    {
      "epoch": 0.2147694292163818,
      "grad_norm": 0.8998482758735131,
      "learning_rate": 4.638042248086023e-06,
      "loss": 0.1676,
      "step": 2331
    },
    {
      "epoch": 0.2148615653936518,
      "grad_norm": 1.0211991710865993,
      "learning_rate": 4.637647339420273e-06,
      "loss": 0.172,
      "step": 2332
    },
    {
      "epoch": 0.21495370157092183,
      "grad_norm": 1.0033330908470734,
      "learning_rate": 4.637252232273738e-06,
      "loss": 0.1764,
      "step": 2333
    },
    {
      "epoch": 0.21504583774819183,
      "grad_norm": 0.8913172960585651,
      "learning_rate": 4.6368569266831035e-06,
      "loss": 0.1557,
      "step": 2334
    },
    {
      "epoch": 0.21513797392546183,
      "grad_norm": 1.043087204088054,
      "learning_rate": 4.636461422685072e-06,
      "loss": 0.1745,
      "step": 2335
    },
    {
      "epoch": 0.21523011010273183,
      "grad_norm": 0.8909211705779044,
      "learning_rate": 4.63606572031637e-06,
      "loss": 0.1649,
      "step": 2336
    },
    {
      "epoch": 0.21532224628000185,
      "grad_norm": 0.9538343349636392,
      "learning_rate": 4.635669819613734e-06,
      "loss": 0.1642,
      "step": 2337
    },
    {
      "epoch": 0.21541438245727185,
      "grad_norm": 0.9715472692776371,
      "learning_rate": 4.635273720613925e-06,
      "loss": 0.1712,
      "step": 2338
    },
    {
      "epoch": 0.21550651863454184,
      "grad_norm": 0.9349140770076221,
      "learning_rate": 4.634877423353723e-06,
      "loss": 0.1721,
      "step": 2339
    },
    {
      "epoch": 0.21559865481181187,
      "grad_norm": 0.9606087264919402,
      "learning_rate": 4.634480927869921e-06,
      "loss": 0.1668,
      "step": 2340
    },
    {
      "epoch": 0.21569079098908187,
      "grad_norm": 0.8532265613158703,
      "learning_rate": 4.634084234199335e-06,
      "loss": 0.1369,
      "step": 2341
    },
    {
      "epoch": 0.21578292716635186,
      "grad_norm": 0.8989400773969781,
      "learning_rate": 4.633687342378799e-06,
      "loss": 0.1698,
      "step": 2342
    },
    {
      "epoch": 0.2158750633436219,
      "grad_norm": 1.0121332804629664,
      "learning_rate": 4.633290252445164e-06,
      "loss": 0.1878,
      "step": 2343
    },
    {
      "epoch": 0.21596719952089188,
      "grad_norm": 1.030553980973197,
      "learning_rate": 4.632892964435299e-06,
      "loss": 0.1805,
      "step": 2344
    },
    {
      "epoch": 0.21605933569816188,
      "grad_norm": 0.9629513385761489,
      "learning_rate": 4.632495478386092e-06,
      "loss": 0.1634,
      "step": 2345
    },
    {
      "epoch": 0.21615147187543188,
      "grad_norm": 0.906324183769925,
      "learning_rate": 4.632097794334451e-06,
      "loss": 0.172,
      "step": 2346
    },
    {
      "epoch": 0.2162436080527019,
      "grad_norm": 0.9551279217162751,
      "learning_rate": 4.631699912317301e-06,
      "loss": 0.1656,
      "step": 2347
    },
    {
      "epoch": 0.2163357442299719,
      "grad_norm": 0.9365362458955379,
      "learning_rate": 4.631301832371584e-06,
      "loss": 0.1759,
      "step": 2348
    },
    {
      "epoch": 0.2164278804072419,
      "grad_norm": 0.9881321567680756,
      "learning_rate": 4.630903554534262e-06,
      "loss": 0.1685,
      "step": 2349
    },
    {
      "epoch": 0.21652001658451192,
      "grad_norm": 0.9674707035004548,
      "learning_rate": 4.630505078842317e-06,
      "loss": 0.1823,
      "step": 2350
    },
    {
      "epoch": 0.21661215276178192,
      "grad_norm": 0.9793487730071272,
      "learning_rate": 4.630106405332745e-06,
      "loss": 0.1827,
      "step": 2351
    },
    {
      "epoch": 0.21670428893905191,
      "grad_norm": 0.9409326842175851,
      "learning_rate": 4.629707534042564e-06,
      "loss": 0.1639,
      "step": 2352
    },
    {
      "epoch": 0.2167964251163219,
      "grad_norm": 1.0087420208183764,
      "learning_rate": 4.6293084650088095e-06,
      "loss": 0.1795,
      "step": 2353
    },
    {
      "epoch": 0.21688856129359194,
      "grad_norm": 1.044876791324977,
      "learning_rate": 4.628909198268534e-06,
      "loss": 0.169,
      "step": 2354
    },
    {
      "epoch": 0.21698069747086193,
      "grad_norm": 0.9389750409437341,
      "learning_rate": 4.628509733858813e-06,
      "loss": 0.1679,
      "step": 2355
    },
    {
      "epoch": 0.21707283364813193,
      "grad_norm": 0.8676673622848394,
      "learning_rate": 4.628110071816732e-06,
      "loss": 0.1624,
      "step": 2356
    },
    {
      "epoch": 0.21716496982540195,
      "grad_norm": 0.9154277930953563,
      "learning_rate": 4.6277102121794015e-06,
      "loss": 0.1751,
      "step": 2357
    },
    {
      "epoch": 0.21725710600267195,
      "grad_norm": 0.9619309892433496,
      "learning_rate": 4.62731015498395e-06,
      "loss": 0.1738,
      "step": 2358
    },
    {
      "epoch": 0.21734924217994195,
      "grad_norm": 0.926667275696638,
      "learning_rate": 4.626909900267521e-06,
      "loss": 0.1743,
      "step": 2359
    },
    {
      "epoch": 0.21744137835721197,
      "grad_norm": 0.9403910037068345,
      "learning_rate": 4.626509448067279e-06,
      "loss": 0.1655,
      "step": 2360
    },
    {
      "epoch": 0.21753351453448197,
      "grad_norm": 0.8818296140485228,
      "learning_rate": 4.626108798420406e-06,
      "loss": 0.1574,
      "step": 2361
    },
    {
      "epoch": 0.21762565071175197,
      "grad_norm": 0.9323796067026165,
      "learning_rate": 4.625707951364102e-06,
      "loss": 0.1781,
      "step": 2362
    },
    {
      "epoch": 0.21771778688902196,
      "grad_norm": 0.9243899591524739,
      "learning_rate": 4.625306906935586e-06,
      "loss": 0.1743,
      "step": 2363
    },
    {
      "epoch": 0.217809923066292,
      "grad_norm": 0.9527407527487539,
      "learning_rate": 4.624905665172095e-06,
      "loss": 0.1642,
      "step": 2364
    },
    {
      "epoch": 0.21790205924356199,
      "grad_norm": 0.8815844393739036,
      "learning_rate": 4.6245042261108845e-06,
      "loss": 0.1668,
      "step": 2365
    },
    {
      "epoch": 0.21799419542083198,
      "grad_norm": 0.9479977436616651,
      "learning_rate": 4.6241025897892275e-06,
      "loss": 0.1842,
      "step": 2366
    },
    {
      "epoch": 0.218086331598102,
      "grad_norm": 1.0207347307047026,
      "learning_rate": 4.623700756244417e-06,
      "loss": 0.1859,
      "step": 2367
    },
    {
      "epoch": 0.218178467775372,
      "grad_norm": 0.9197498111886677,
      "learning_rate": 4.6232987255137625e-06,
      "loss": 0.1515,
      "step": 2368
    },
    {
      "epoch": 0.218270603952642,
      "grad_norm": 0.8838719986716687,
      "learning_rate": 4.622896497634593e-06,
      "loss": 0.1483,
      "step": 2369
    },
    {
      "epoch": 0.218362740129912,
      "grad_norm": 0.900451505897105,
      "learning_rate": 4.622494072644255e-06,
      "loss": 0.1735,
      "step": 2370
    },
    {
      "epoch": 0.21845487630718202,
      "grad_norm": 0.8934791164057562,
      "learning_rate": 4.622091450580114e-06,
      "loss": 0.1656,
      "step": 2371
    },
    {
      "epoch": 0.21854701248445202,
      "grad_norm": 0.895686185724979,
      "learning_rate": 4.621688631479554e-06,
      "loss": 0.1653,
      "step": 2372
    },
    {
      "epoch": 0.21863914866172202,
      "grad_norm": 0.9833309677439741,
      "learning_rate": 4.621285615379976e-06,
      "loss": 0.1743,
      "step": 2373
    },
    {
      "epoch": 0.21873128483899204,
      "grad_norm": 0.9372570832200562,
      "learning_rate": 4.620882402318799e-06,
      "loss": 0.1736,
      "step": 2374
    },
    {
      "epoch": 0.21882342101626204,
      "grad_norm": 0.8761533867954667,
      "learning_rate": 4.620478992333463e-06,
      "loss": 0.1618,
      "step": 2375
    },
    {
      "epoch": 0.21891555719353203,
      "grad_norm": 0.8919996604272982,
      "learning_rate": 4.620075385461426e-06,
      "loss": 0.1608,
      "step": 2376
    },
    {
      "epoch": 0.21900769337080206,
      "grad_norm": 0.8657870181470666,
      "learning_rate": 4.61967158174016e-06,
      "loss": 0.1508,
      "step": 2377
    },
    {
      "epoch": 0.21909982954807206,
      "grad_norm": 0.923022145836738,
      "learning_rate": 4.61926758120716e-06,
      "loss": 0.1579,
      "step": 2378
    },
    {
      "epoch": 0.21919196572534205,
      "grad_norm": 0.9272554038487848,
      "learning_rate": 4.618863383899937e-06,
      "loss": 0.1696,
      "step": 2379
    },
    {
      "epoch": 0.21928410190261205,
      "grad_norm": 0.9473433534596402,
      "learning_rate": 4.618458989856021e-06,
      "loss": 0.1765,
      "step": 2380
    },
    {
      "epoch": 0.21937623807988207,
      "grad_norm": 0.923103183354492,
      "learning_rate": 4.618054399112959e-06,
      "loss": 0.1675,
      "step": 2381
    },
    {
      "epoch": 0.21946837425715207,
      "grad_norm": 0.9369984252173658,
      "learning_rate": 4.617649611708318e-06,
      "loss": 0.1711,
      "step": 2382
    },
    {
      "epoch": 0.21956051043442207,
      "grad_norm": 1.0737046620535462,
      "learning_rate": 4.617244627679684e-06,
      "loss": 0.1829,
      "step": 2383
    },
    {
      "epoch": 0.2196526466116921,
      "grad_norm": 0.9481793986325134,
      "learning_rate": 4.6168394470646575e-06,
      "loss": 0.1639,
      "step": 2384
    },
    {
      "epoch": 0.2197447827889621,
      "grad_norm": 1.0068030260607288,
      "learning_rate": 4.61643406990086e-06,
      "loss": 0.1714,
      "step": 2385
    },
    {
      "epoch": 0.2198369189662321,
      "grad_norm": 0.9234873321825561,
      "learning_rate": 4.616028496225933e-06,
      "loss": 0.165,
      "step": 2386
    },
    {
      "epoch": 0.21992905514350208,
      "grad_norm": 0.9145551409494252,
      "learning_rate": 4.6156227260775314e-06,
      "loss": 0.1637,
      "step": 2387
    },
    {
      "epoch": 0.2200211913207721,
      "grad_norm": 0.9326334048386388,
      "learning_rate": 4.615216759493332e-06,
      "loss": 0.1835,
      "step": 2388
    },
    {
      "epoch": 0.2201133274980421,
      "grad_norm": 0.8781396547544478,
      "learning_rate": 4.614810596511028e-06,
      "loss": 0.1571,
      "step": 2389
    },
    {
      "epoch": 0.2202054636753121,
      "grad_norm": 0.9321770521056157,
      "learning_rate": 4.614404237168334e-06,
      "loss": 0.1606,
      "step": 2390
    },
    {
      "epoch": 0.22029759985258213,
      "grad_norm": 0.9109442841057521,
      "learning_rate": 4.613997681502977e-06,
      "loss": 0.1523,
      "step": 2391
    },
    {
      "epoch": 0.22038973602985212,
      "grad_norm": 0.9073470892509928,
      "learning_rate": 4.61359092955271e-06,
      "loss": 0.1544,
      "step": 2392
    },
    {
      "epoch": 0.22048187220712212,
      "grad_norm": 0.9727998985444354,
      "learning_rate": 4.613183981355297e-06,
      "loss": 0.1606,
      "step": 2393
    },
    {
      "epoch": 0.22057400838439215,
      "grad_norm": 0.9071330785105896,
      "learning_rate": 4.612776836948524e-06,
      "loss": 0.1543,
      "step": 2394
    },
    {
      "epoch": 0.22066614456166214,
      "grad_norm": 0.9097721335699841,
      "learning_rate": 4.612369496370194e-06,
      "loss": 0.1574,
      "step": 2395
    },
    {
      "epoch": 0.22075828073893214,
      "grad_norm": 0.9304001670063471,
      "learning_rate": 4.611961959658129e-06,
      "loss": 0.1568,
      "step": 2396
    },
    {
      "epoch": 0.22085041691620214,
      "grad_norm": 0.9445814202450755,
      "learning_rate": 4.611554226850168e-06,
      "loss": 0.1821,
      "step": 2397
    },
    {
      "epoch": 0.22094255309347216,
      "grad_norm": 0.9995514602468918,
      "learning_rate": 4.6111462979841704e-06,
      "loss": 0.1621,
      "step": 2398
    },
    {
      "epoch": 0.22103468927074216,
      "grad_norm": 0.9832850713868987,
      "learning_rate": 4.610738173098012e-06,
      "loss": 0.1711,
      "step": 2399
    },
    {
      "epoch": 0.22112682544801215,
      "grad_norm": 0.9329304727726307,
      "learning_rate": 4.610329852229587e-06,
      "loss": 0.1566,
      "step": 2400
    },
    {
      "epoch": 0.22121896162528218,
      "grad_norm": 0.9107929098618456,
      "learning_rate": 4.6099213354168085e-06,
      "loss": 0.1735,
      "step": 2401
    },
    {
      "epoch": 0.22131109780255218,
      "grad_norm": 0.9163785528280652,
      "learning_rate": 4.609512622697606e-06,
      "loss": 0.1564,
      "step": 2402
    },
    {
      "epoch": 0.22140323397982217,
      "grad_norm": 0.9260260556809167,
      "learning_rate": 4.609103714109931e-06,
      "loss": 0.1583,
      "step": 2403
    },
    {
      "epoch": 0.22149537015709217,
      "grad_norm": 0.995726300308974,
      "learning_rate": 4.608694609691747e-06,
      "loss": 0.1728,
      "step": 2404
    },
    {
      "epoch": 0.2215875063343622,
      "grad_norm": 0.9277977487816504,
      "learning_rate": 4.608285309481043e-06,
      "loss": 0.1636,
      "step": 2405
    },
    {
      "epoch": 0.2216796425116322,
      "grad_norm": 0.9227743080386674,
      "learning_rate": 4.607875813515821e-06,
      "loss": 0.1707,
      "step": 2406
    },
    {
      "epoch": 0.2217717786889022,
      "grad_norm": 0.8860722109582515,
      "learning_rate": 4.607466121834103e-06,
      "loss": 0.1579,
      "step": 2407
    },
    {
      "epoch": 0.2218639148661722,
      "grad_norm": 0.920265036722145,
      "learning_rate": 4.607056234473928e-06,
      "loss": 0.1578,
      "step": 2408
    },
    {
      "epoch": 0.2219560510434422,
      "grad_norm": 0.8811025661892827,
      "learning_rate": 4.606646151473355e-06,
      "loss": 0.1675,
      "step": 2409
    },
    {
      "epoch": 0.2220481872207122,
      "grad_norm": 0.8916127939474685,
      "learning_rate": 4.606235872870461e-06,
      "loss": 0.1773,
      "step": 2410
    },
    {
      "epoch": 0.22214032339798223,
      "grad_norm": 0.9900333498792695,
      "learning_rate": 4.605825398703339e-06,
      "loss": 0.1751,
      "step": 2411
    },
    {
      "epoch": 0.22223245957525223,
      "grad_norm": 0.9706997511826468,
      "learning_rate": 4.605414729010102e-06,
      "loss": 0.1808,
      "step": 2412
    },
    {
      "epoch": 0.22232459575252222,
      "grad_norm": 0.9164289847295664,
      "learning_rate": 4.605003863828881e-06,
      "loss": 0.1609,
      "step": 2413
    },
    {
      "epoch": 0.22241673192979222,
      "grad_norm": 0.9547324103869823,
      "learning_rate": 4.604592803197825e-06,
      "loss": 0.1822,
      "step": 2414
    },
    {
      "epoch": 0.22250886810706225,
      "grad_norm": 0.8917918711512808,
      "learning_rate": 4.6041815471551e-06,
      "loss": 0.1579,
      "step": 2415
    },
    {
      "epoch": 0.22260100428433224,
      "grad_norm": 0.9874142973301107,
      "learning_rate": 4.603770095738892e-06,
      "loss": 0.1781,
      "step": 2416
    },
    {
      "epoch": 0.22269314046160224,
      "grad_norm": 0.9212348176350641,
      "learning_rate": 4.603358448987405e-06,
      "loss": 0.1673,
      "step": 2417
    },
    {
      "epoch": 0.22278527663887226,
      "grad_norm": 0.9412577615662902,
      "learning_rate": 4.602946606938858e-06,
      "loss": 0.1729,
      "step": 2418
    },
    {
      "epoch": 0.22287741281614226,
      "grad_norm": 0.8402429294824661,
      "learning_rate": 4.6025345696314935e-06,
      "loss": 0.1637,
      "step": 2419
    },
    {
      "epoch": 0.22296954899341226,
      "grad_norm": 1.0231878663886147,
      "learning_rate": 4.602122337103568e-06,
      "loss": 0.1861,
      "step": 2420
    },
    {
      "epoch": 0.22306168517068226,
      "grad_norm": 0.9267704717175042,
      "learning_rate": 4.601709909393357e-06,
      "loss": 0.1789,
      "step": 2421
    },
    {
      "epoch": 0.22315382134795228,
      "grad_norm": 0.9402886812499314,
      "learning_rate": 4.601297286539155e-06,
      "loss": 0.1844,
      "step": 2422
    },
    {
      "epoch": 0.22324595752522228,
      "grad_norm": 0.9232675247411815,
      "learning_rate": 4.600884468579273e-06,
      "loss": 0.1718,
      "step": 2423
    },
    {
      "epoch": 0.22333809370249227,
      "grad_norm": 0.8850107261659447,
      "learning_rate": 4.600471455552043e-06,
      "loss": 0.1545,
      "step": 2424
    },
    {
      "epoch": 0.2234302298797623,
      "grad_norm": 0.9001963815880184,
      "learning_rate": 4.600058247495812e-06,
      "loss": 0.168,
      "step": 2425
    },
    {
      "epoch": 0.2235223660570323,
      "grad_norm": 1.0125379821849876,
      "learning_rate": 4.599644844448946e-06,
      "loss": 0.175,
      "step": 2426
    },
    {
      "epoch": 0.2236145022343023,
      "grad_norm": 0.9715366729797272,
      "learning_rate": 4.599231246449831e-06,
      "loss": 0.1705,
      "step": 2427
    },
    {
      "epoch": 0.22370663841157232,
      "grad_norm": 0.983021655050111,
      "learning_rate": 4.5988174535368686e-06,
      "loss": 0.1568,
      "step": 2428
    },
    {
      "epoch": 0.22379877458884231,
      "grad_norm": 0.9512948661524263,
      "learning_rate": 4.59840346574848e-06,
      "loss": 0.1809,
      "step": 2429
    },
    {
      "epoch": 0.2238909107661123,
      "grad_norm": 0.9968867206661762,
      "learning_rate": 4.597989283123104e-06,
      "loss": 0.1843,
      "step": 2430
    },
    {
      "epoch": 0.2239830469433823,
      "grad_norm": 1.0375629522642,
      "learning_rate": 4.597574905699196e-06,
      "loss": 0.1685,
      "step": 2431
    },
    {
      "epoch": 0.22407518312065233,
      "grad_norm": 1.0263084195890293,
      "learning_rate": 4.597160333515233e-06,
      "loss": 0.1767,
      "step": 2432
    },
    {
      "epoch": 0.22416731929792233,
      "grad_norm": 0.9303738801673548,
      "learning_rate": 4.596745566609707e-06,
      "loss": 0.1598,
      "step": 2433
    },
    {
      "epoch": 0.22425945547519233,
      "grad_norm": 0.927679806741521,
      "learning_rate": 4.5963306050211296e-06,
      "loss": 0.1649,
      "step": 2434
    },
    {
      "epoch": 0.22435159165246235,
      "grad_norm": 0.9658005546537741,
      "learning_rate": 4.595915448788031e-06,
      "loss": 0.1818,
      "step": 2435
    },
    {
      "epoch": 0.22444372782973235,
      "grad_norm": 0.9499725030180026,
      "learning_rate": 4.5955000979489565e-06,
      "loss": 0.1766,
      "step": 2436
    },
    {
      "epoch": 0.22453586400700234,
      "grad_norm": 0.9462439750542999,
      "learning_rate": 4.595084552542472e-06,
      "loss": 0.1728,
      "step": 2437
    },
    {
      "epoch": 0.22462800018427234,
      "grad_norm": 0.9720158256266674,
      "learning_rate": 4.594668812607162e-06,
      "loss": 0.1667,
      "step": 2438
    },
    {
      "epoch": 0.22472013636154237,
      "grad_norm": 0.9173112612757395,
      "learning_rate": 4.594252878181627e-06,
      "loss": 0.163,
      "step": 2439
    },
    {
      "epoch": 0.22481227253881236,
      "grad_norm": 0.940867147194557,
      "learning_rate": 4.593836749304487e-06,
      "loss": 0.1511,
      "step": 2440
    },
    {
      "epoch": 0.22490440871608236,
      "grad_norm": 0.9835880434614503,
      "learning_rate": 4.59342042601438e-06,
      "loss": 0.1839,
      "step": 2441
    },
    {
      "epoch": 0.22499654489335238,
      "grad_norm": 0.9879031852310044,
      "learning_rate": 4.59300390834996e-06,
      "loss": 0.178,
      "step": 2442
    },
    {
      "epoch": 0.22508868107062238,
      "grad_norm": 0.9081333169346187,
      "learning_rate": 4.592587196349902e-06,
      "loss": 0.1798,
      "step": 2443
    },
    {
      "epoch": 0.22518081724789238,
      "grad_norm": 0.8798711443957609,
      "learning_rate": 4.592170290052898e-06,
      "loss": 0.1654,
      "step": 2444
    },
    {
      "epoch": 0.2252729534251624,
      "grad_norm": 0.9326071476847042,
      "learning_rate": 4.591753189497658e-06,
      "loss": 0.1544,
      "step": 2445
    },
    {
      "epoch": 0.2253650896024324,
      "grad_norm": 1.1534455607712155,
      "learning_rate": 4.591335894722909e-06,
      "loss": 0.1937,
      "step": 2446
    },
    {
      "epoch": 0.2254572257797024,
      "grad_norm": 0.9028689022204759,
      "learning_rate": 4.5909184057673976e-06,
      "loss": 0.1604,
      "step": 2447
    },
    {
      "epoch": 0.2255493619569724,
      "grad_norm": 0.8946199825952447,
      "learning_rate": 4.590500722669886e-06,
      "loss": 0.1697,
      "step": 2448
    },
    {
      "epoch": 0.22564149813424242,
      "grad_norm": 0.9077763847398155,
      "learning_rate": 4.590082845469158e-06,
      "loss": 0.1507,
      "step": 2449
    },
    {
      "epoch": 0.22573363431151242,
      "grad_norm": 0.9592178561443785,
      "learning_rate": 4.589664774204013e-06,
      "loss": 0.1696,
      "step": 2450
    },
    {
      "epoch": 0.2258257704887824,
      "grad_norm": 0.9613902738050768,
      "learning_rate": 4.589246508913267e-06,
      "loss": 0.1774,
      "step": 2451
    },
    {
      "epoch": 0.22591790666605244,
      "grad_norm": 0.8930021527771865,
      "learning_rate": 4.58882804963576e-06,
      "loss": 0.1676,
      "step": 2452
    },
    {
      "epoch": 0.22601004284332243,
      "grad_norm": 0.9254850657625637,
      "learning_rate": 4.588409396410342e-06,
      "loss": 0.1784,
      "step": 2453
    },
    {
      "epoch": 0.22610217902059243,
      "grad_norm": 0.8648068592964746,
      "learning_rate": 4.587990549275889e-06,
      "loss": 0.1647,
      "step": 2454
    },
    {
      "epoch": 0.22619431519786243,
      "grad_norm": 0.9218315395309605,
      "learning_rate": 4.587571508271288e-06,
      "loss": 0.1643,
      "step": 2455
    },
    {
      "epoch": 0.22628645137513245,
      "grad_norm": 0.9359752443930277,
      "learning_rate": 4.587152273435447e-06,
      "loss": 0.166,
      "step": 2456
    },
    {
      "epoch": 0.22637858755240245,
      "grad_norm": 0.9467255151495074,
      "learning_rate": 4.586732844807293e-06,
      "loss": 0.1695,
      "step": 2457
    },
    {
      "epoch": 0.22647072372967245,
      "grad_norm": 0.9775875127483362,
      "learning_rate": 4.58631322242577e-06,
      "loss": 0.1672,
      "step": 2458
    },
    {
      "epoch": 0.22656285990694247,
      "grad_norm": 1.0104603751151366,
      "learning_rate": 4.58589340632984e-06,
      "loss": 0.1704,
      "step": 2459
    },
    {
      "epoch": 0.22665499608421247,
      "grad_norm": 0.9989237155219942,
      "learning_rate": 4.585473396558482e-06,
      "loss": 0.1896,
      "step": 2460
    },
    {
      "epoch": 0.22674713226148246,
      "grad_norm": 0.9295942764578942,
      "learning_rate": 4.585053193150695e-06,
      "loss": 0.148,
      "step": 2461
    },
    {
      "epoch": 0.2268392684387525,
      "grad_norm": 0.9990328294585157,
      "learning_rate": 4.584632796145495e-06,
      "loss": 0.1617,
      "step": 2462
    },
    {
      "epoch": 0.2269314046160225,
      "grad_norm": 1.0342567607730049,
      "learning_rate": 4.584212205581915e-06,
      "loss": 0.1736,
      "step": 2463
    },
    {
      "epoch": 0.22702354079329248,
      "grad_norm": 0.9424632951659273,
      "learning_rate": 4.5837914214990085e-06,
      "loss": 0.1685,
      "step": 2464
    },
    {
      "epoch": 0.22711567697056248,
      "grad_norm": 0.8664763361776535,
      "learning_rate": 4.583370443935843e-06,
      "loss": 0.1574,
      "step": 2465
    },
    {
      "epoch": 0.2272078131478325,
      "grad_norm": 0.9720244211223175,
      "learning_rate": 4.582949272931508e-06,
      "loss": 0.1708,
      "step": 2466
    },
    {
      "epoch": 0.2272999493251025,
      "grad_norm": 0.9624471682686935,
      "learning_rate": 4.582527908525109e-06,
      "loss": 0.1701,
      "step": 2467
    },
    {
      "epoch": 0.2273920855023725,
      "grad_norm": 0.9668421091692699,
      "learning_rate": 4.5821063507557695e-06,
      "loss": 0.1771,
      "step": 2468
    },
    {
      "epoch": 0.22748422167964252,
      "grad_norm": 0.9317703673514961,
      "learning_rate": 4.581684599662632e-06,
      "loss": 0.1706,
      "step": 2469
    },
    {
      "epoch": 0.22757635785691252,
      "grad_norm": 0.9286142161019658,
      "learning_rate": 4.581262655284854e-06,
      "loss": 0.1661,
      "step": 2470
    },
    {
      "epoch": 0.22766849403418252,
      "grad_norm": 0.9792223907797291,
      "learning_rate": 4.580840517661615e-06,
      "loss": 0.1513,
      "step": 2471
    },
    {
      "epoch": 0.2277606302114525,
      "grad_norm": 0.8942474980522034,
      "learning_rate": 4.58041818683211e-06,
      "loss": 0.1607,
      "step": 2472
    },
    {
      "epoch": 0.22785276638872254,
      "grad_norm": 0.9241481692834103,
      "learning_rate": 4.579995662835552e-06,
      "loss": 0.1693,
      "step": 2473
    },
    {
      "epoch": 0.22794490256599254,
      "grad_norm": 0.9423013205176731,
      "learning_rate": 4.579572945711174e-06,
      "loss": 0.1548,
      "step": 2474
    },
    {
      "epoch": 0.22803703874326253,
      "grad_norm": 1.0187746405002311,
      "learning_rate": 4.579150035498223e-06,
      "loss": 0.1702,
      "step": 2475
    },
    {
      "epoch": 0.22812917492053256,
      "grad_norm": 1.0143035138735677,
      "learning_rate": 4.578726932235969e-06,
      "loss": 0.1638,
      "step": 2476
    },
    {
      "epoch": 0.22822131109780255,
      "grad_norm": 0.9930147973024229,
      "learning_rate": 4.5783036359636935e-06,
      "loss": 0.1677,
      "step": 2477
    },
    {
      "epoch": 0.22831344727507255,
      "grad_norm": 0.9625773296101545,
      "learning_rate": 4.5778801467207035e-06,
      "loss": 0.1699,
      "step": 2478
    },
    {
      "epoch": 0.22840558345234258,
      "grad_norm": 0.9811159556227943,
      "learning_rate": 4.577456464546317e-06,
      "loss": 0.1745,
      "step": 2479
    },
    {
      "epoch": 0.22849771962961257,
      "grad_norm": 0.9546391442575929,
      "learning_rate": 4.5770325894798754e-06,
      "loss": 0.1701,
      "step": 2480
    },
    {
      "epoch": 0.22858985580688257,
      "grad_norm": 0.8864094301933726,
      "learning_rate": 4.5766085215607335e-06,
      "loss": 0.1704,
      "step": 2481
    },
    {
      "epoch": 0.22868199198415257,
      "grad_norm": 0.9331663904936708,
      "learning_rate": 4.576184260828267e-06,
      "loss": 0.1603,
      "step": 2482
    },
    {
      "epoch": 0.2287741281614226,
      "grad_norm": 1.0563673952258763,
      "learning_rate": 4.575759807321869e-06,
      "loss": 0.179,
      "step": 2483
    },
    {
      "epoch": 0.2288662643386926,
      "grad_norm": 0.9305700277198957,
      "learning_rate": 4.575335161080948e-06,
      "loss": 0.1595,
      "step": 2484
    },
    {
      "epoch": 0.22895840051596258,
      "grad_norm": 0.9856789510726491,
      "learning_rate": 4.574910322144935e-06,
      "loss": 0.1785,
      "step": 2485
    },
    {
      "epoch": 0.2290505366932326,
      "grad_norm": 0.9650074532401066,
      "learning_rate": 4.574485290553276e-06,
      "loss": 0.1671,
      "step": 2486
    },
    {
      "epoch": 0.2291426728705026,
      "grad_norm": 0.9194999600270781,
      "learning_rate": 4.574060066345434e-06,
      "loss": 0.1794,
      "step": 2487
    },
    {
      "epoch": 0.2292348090477726,
      "grad_norm": 0.9499206998494362,
      "learning_rate": 4.573634649560891e-06,
      "loss": 0.1651,
      "step": 2488
    },
    {
      "epoch": 0.2293269452250426,
      "grad_norm": 0.9880827662100659,
      "learning_rate": 4.573209040239148e-06,
      "loss": 0.172,
      "step": 2489
    },
    {
      "epoch": 0.22941908140231262,
      "grad_norm": 0.9704437917207153,
      "learning_rate": 4.572783238419723e-06,
      "loss": 0.165,
      "step": 2490
    },
    {
      "epoch": 0.22951121757958262,
      "grad_norm": 0.9473886009713728,
      "learning_rate": 4.572357244142151e-06,
      "loss": 0.1687,
      "step": 2491
    },
    {
      "epoch": 0.22960335375685262,
      "grad_norm": 1.0115985599601431,
      "learning_rate": 4.5719310574459846e-06,
      "loss": 0.1661,
      "step": 2492
    },
    {
      "epoch": 0.22969548993412264,
      "grad_norm": 1.0523002183614862,
      "learning_rate": 4.5715046783707976e-06,
      "loss": 0.161,
      "step": 2493
    },
    {
      "epoch": 0.22978762611139264,
      "grad_norm": 0.9012474784949154,
      "learning_rate": 4.571078106956178e-06,
      "loss": 0.1588,
      "step": 2494
    },
    {
      "epoch": 0.22987976228866264,
      "grad_norm": 1.0733611855009282,
      "learning_rate": 4.570651343241733e-06,
      "loss": 0.1731,
      "step": 2495
    },
    {
      "epoch": 0.22997189846593266,
      "grad_norm": 1.0563609033763628,
      "learning_rate": 4.570224387267089e-06,
      "loss": 0.179,
      "step": 2496
    },
    {
      "epoch": 0.23006403464320266,
      "grad_norm": 0.9549484688378221,
      "learning_rate": 4.569797239071887e-06,
      "loss": 0.1831,
      "step": 2497
    },
    {
      "epoch": 0.23015617082047266,
      "grad_norm": 1.0449068654366036,
      "learning_rate": 4.569369898695789e-06,
      "loss": 0.1741,
      "step": 2498
    },
    {
      "epoch": 0.23024830699774265,
      "grad_norm": 0.9602792119218708,
      "learning_rate": 4.568942366178473e-06,
      "loss": 0.1623,
      "step": 2499
    },
    {
      "epoch": 0.23034044317501268,
      "grad_norm": 0.9618534029768782,
      "learning_rate": 4.568514641559636e-06,
      "loss": 0.1647,
      "step": 2500
    },
    {
      "epoch": 0.23034044317501268,
      "eval_loss": 0.16871164739131927,
      "eval_runtime": 300.164,
      "eval_samples_per_second": 23.377,
      "eval_steps_per_second": 2.925,
      "step": 2500
    },
    {
      "epoch": 0.23043257935228267,
      "grad_norm": 0.9533742219678426,
      "learning_rate": 4.5680867248789916e-06,
      "loss": 0.1741,
      "step": 2501
    },
    {
      "epoch": 0.23052471552955267,
      "grad_norm": 0.9298655405790685,
      "learning_rate": 4.567658616176273e-06,
      "loss": 0.1708,
      "step": 2502
    },
    {
      "epoch": 0.2306168517068227,
      "grad_norm": 0.9479029081415975,
      "learning_rate": 4.5672303154912275e-06,
      "loss": 0.1682,
      "step": 2503
    },
    {
      "epoch": 0.2307089878840927,
      "grad_norm": 0.8851274933549781,
      "learning_rate": 4.566801822863626e-06,
      "loss": 0.1666,
      "step": 2504
    },
    {
      "epoch": 0.2308011240613627,
      "grad_norm": 0.8924494397459016,
      "learning_rate": 4.566373138333253e-06,
      "loss": 0.1619,
      "step": 2505
    },
    {
      "epoch": 0.23089326023863269,
      "grad_norm": 0.9435939583116043,
      "learning_rate": 4.565944261939911e-06,
      "loss": 0.163,
      "step": 2506
    },
    {
      "epoch": 0.2309853964159027,
      "grad_norm": 0.8879874757562365,
      "learning_rate": 4.565515193723423e-06,
      "loss": 0.1756,
      "step": 2507
    },
    {
      "epoch": 0.2310775325931727,
      "grad_norm": 0.9376920867552287,
      "learning_rate": 4.5650859337236256e-06,
      "loss": 0.1614,
      "step": 2508
    },
    {
      "epoch": 0.2311696687704427,
      "grad_norm": 0.9673814720139658,
      "learning_rate": 4.564656481980378e-06,
      "loss": 0.1733,
      "step": 2509
    },
    {
      "epoch": 0.23126180494771273,
      "grad_norm": 0.9152447120818502,
      "learning_rate": 4.564226838533553e-06,
      "loss": 0.1586,
      "step": 2510
    },
    {
      "epoch": 0.23135394112498273,
      "grad_norm": 0.8820356484085558,
      "learning_rate": 4.563797003423045e-06,
      "loss": 0.1673,
      "step": 2511
    },
    {
      "epoch": 0.23144607730225272,
      "grad_norm": 0.9391183631130411,
      "learning_rate": 4.563366976688762e-06,
      "loss": 0.1761,
      "step": 2512
    },
    {
      "epoch": 0.23153821347952275,
      "grad_norm": 0.9636950958740574,
      "learning_rate": 4.562936758370634e-06,
      "loss": 0.1637,
      "step": 2513
    },
    {
      "epoch": 0.23163034965679274,
      "grad_norm": 1.037525619091228,
      "learning_rate": 4.5625063485086065e-06,
      "loss": 0.1925,
      "step": 2514
    },
    {
      "epoch": 0.23172248583406274,
      "grad_norm": 0.94368533310796,
      "learning_rate": 4.562075747142641e-06,
      "loss": 0.1921,
      "step": 2515
    },
    {
      "epoch": 0.23181462201133274,
      "grad_norm": 0.9306148789692448,
      "learning_rate": 4.561644954312721e-06,
      "loss": 0.1609,
      "step": 2516
    },
    {
      "epoch": 0.23190675818860276,
      "grad_norm": 0.8962214850100089,
      "learning_rate": 4.561213970058845e-06,
      "loss": 0.1568,
      "step": 2517
    },
    {
      "epoch": 0.23199889436587276,
      "grad_norm": 0.9376759104456283,
      "learning_rate": 4.560782794421031e-06,
      "loss": 0.1733,
      "step": 2518
    },
    {
      "epoch": 0.23209103054314276,
      "grad_norm": 0.9774780989550023,
      "learning_rate": 4.5603514274393125e-06,
      "loss": 0.1688,
      "step": 2519
    },
    {
      "epoch": 0.23218316672041278,
      "grad_norm": 0.8467491530853,
      "learning_rate": 4.559919869153742e-06,
      "loss": 0.1526,
      "step": 2520
    },
    {
      "epoch": 0.23227530289768278,
      "grad_norm": 0.9208731668470556,
      "learning_rate": 4.559488119604389e-06,
      "loss": 0.1625,
      "step": 2521
    },
    {
      "epoch": 0.23236743907495278,
      "grad_norm": 1.0214193792829334,
      "learning_rate": 4.5590561788313435e-06,
      "loss": 0.1769,
      "step": 2522
    },
    {
      "epoch": 0.23245957525222277,
      "grad_norm": 0.9530626148431194,
      "learning_rate": 4.55862404687471e-06,
      "loss": 0.1768,
      "step": 2523
    },
    {
      "epoch": 0.2325517114294928,
      "grad_norm": 0.9441286615058059,
      "learning_rate": 4.558191723774612e-06,
      "loss": 0.1662,
      "step": 2524
    },
    {
      "epoch": 0.2326438476067628,
      "grad_norm": 0.9633992603927569,
      "learning_rate": 4.557759209571191e-06,
      "loss": 0.1649,
      "step": 2525
    },
    {
      "epoch": 0.2327359837840328,
      "grad_norm": 0.8883571013742829,
      "learning_rate": 4.557326504304606e-06,
      "loss": 0.1669,
      "step": 2526
    },
    {
      "epoch": 0.23282811996130282,
      "grad_norm": 0.9452980355449644,
      "learning_rate": 4.556893608015034e-06,
      "loss": 0.1695,
      "step": 2527
    },
    {
      "epoch": 0.2329202561385728,
      "grad_norm": 0.9022590301416014,
      "learning_rate": 4.556460520742669e-06,
      "loss": 0.1571,
      "step": 2528
    },
    {
      "epoch": 0.2330123923158428,
      "grad_norm": 0.929852406290282,
      "learning_rate": 4.556027242527723e-06,
      "loss": 0.1743,
      "step": 2529
    },
    {
      "epoch": 0.23310452849311283,
      "grad_norm": 0.966516869274674,
      "learning_rate": 4.555593773410426e-06,
      "loss": 0.1702,
      "step": 2530
    },
    {
      "epoch": 0.23319666467038283,
      "grad_norm": 0.8832043346632404,
      "learning_rate": 4.555160113431027e-06,
      "loss": 0.1653,
      "step": 2531
    },
    {
      "epoch": 0.23328880084765283,
      "grad_norm": 0.9671050400227129,
      "learning_rate": 4.554726262629789e-06,
      "loss": 0.1752,
      "step": 2532
    },
    {
      "epoch": 0.23338093702492282,
      "grad_norm": 0.8813498900554017,
      "learning_rate": 4.554292221046997e-06,
      "loss": 0.1535,
      "step": 2533
    },
    {
      "epoch": 0.23347307320219285,
      "grad_norm": 0.928958558583173,
      "learning_rate": 4.553857988722951e-06,
      "loss": 0.1611,
      "step": 2534
    },
    {
      "epoch": 0.23356520937946285,
      "grad_norm": 0.9717573709876711,
      "learning_rate": 4.55342356569797e-06,
      "loss": 0.1683,
      "step": 2535
    },
    {
      "epoch": 0.23365734555673284,
      "grad_norm": 0.9072668433737212,
      "learning_rate": 4.5529889520123896e-06,
      "loss": 0.17,
      "step": 2536
    },
    {
      "epoch": 0.23374948173400287,
      "grad_norm": 0.9099867455316708,
      "learning_rate": 4.5525541477065644e-06,
      "loss": 0.1746,
      "step": 2537
    },
    {
      "epoch": 0.23384161791127286,
      "grad_norm": 0.9050044764166337,
      "learning_rate": 4.552119152820866e-06,
      "loss": 0.1606,
      "step": 2538
    },
    {
      "epoch": 0.23393375408854286,
      "grad_norm": 1.0216026559059102,
      "learning_rate": 4.551683967395683e-06,
      "loss": 0.1692,
      "step": 2539
    },
    {
      "epoch": 0.23402589026581286,
      "grad_norm": 0.9592363893092558,
      "learning_rate": 4.5512485914714225e-06,
      "loss": 0.1649,
      "step": 2540
    },
    {
      "epoch": 0.23411802644308288,
      "grad_norm": 0.9168069315815013,
      "learning_rate": 4.55081302508851e-06,
      "loss": 0.165,
      "step": 2541
    },
    {
      "epoch": 0.23421016262035288,
      "grad_norm": 0.918494290815671,
      "learning_rate": 4.550377268287387e-06,
      "loss": 0.1698,
      "step": 2542
    },
    {
      "epoch": 0.23430229879762288,
      "grad_norm": 0.9922874023864924,
      "learning_rate": 4.549941321108514e-06,
      "loss": 0.1714,
      "step": 2543
    },
    {
      "epoch": 0.2343944349748929,
      "grad_norm": 1.0043624274520886,
      "learning_rate": 4.549505183592368e-06,
      "loss": 0.1849,
      "step": 2544
    },
    {
      "epoch": 0.2344865711521629,
      "grad_norm": 0.9095985103827297,
      "learning_rate": 4.549068855779447e-06,
      "loss": 0.1526,
      "step": 2545
    },
    {
      "epoch": 0.2345787073294329,
      "grad_norm": 0.9878365543468702,
      "learning_rate": 4.5486323377102615e-06,
      "loss": 0.1765,
      "step": 2546
    },
    {
      "epoch": 0.23467084350670292,
      "grad_norm": 0.8155329876941606,
      "learning_rate": 4.548195629425343e-06,
      "loss": 0.1394,
      "step": 2547
    },
    {
      "epoch": 0.23476297968397292,
      "grad_norm": 0.9100479491854085,
      "learning_rate": 4.547758730965239e-06,
      "loss": 0.1598,
      "step": 2548
    },
    {
      "epoch": 0.2348551158612429,
      "grad_norm": 0.9992825507444053,
      "learning_rate": 4.547321642370517e-06,
      "loss": 0.1826,
      "step": 2549
    },
    {
      "epoch": 0.2349472520385129,
      "grad_norm": 0.9061188471199144,
      "learning_rate": 4.5468843636817605e-06,
      "loss": 0.1576,
      "step": 2550
    },
    {
      "epoch": 0.23503938821578294,
      "grad_norm": 0.9626546650825835,
      "learning_rate": 4.54644689493957e-06,
      "loss": 0.1722,
      "step": 2551
    },
    {
      "epoch": 0.23513152439305293,
      "grad_norm": 0.9163551094540457,
      "learning_rate": 4.546009236184565e-06,
      "loss": 0.1463,
      "step": 2552
    },
    {
      "epoch": 0.23522366057032293,
      "grad_norm": 0.9758440630401635,
      "learning_rate": 4.545571387457382e-06,
      "loss": 0.1688,
      "step": 2553
    },
    {
      "epoch": 0.23531579674759295,
      "grad_norm": 0.9592381292109499,
      "learning_rate": 4.545133348798677e-06,
      "loss": 0.1758,
      "step": 2554
    },
    {
      "epoch": 0.23540793292486295,
      "grad_norm": 1.0261072582249307,
      "learning_rate": 4.54469512024912e-06,
      "loss": 0.1713,
      "step": 2555
    },
    {
      "epoch": 0.23550006910213295,
      "grad_norm": 0.9247050273249168,
      "learning_rate": 4.5442567018494e-06,
      "loss": 0.1625,
      "step": 2556
    },
    {
      "epoch": 0.23559220527940294,
      "grad_norm": 0.8985845157903278,
      "learning_rate": 4.543818093640226e-06,
      "loss": 0.174,
      "step": 2557
    },
    {
      "epoch": 0.23568434145667297,
      "grad_norm": 0.9681388653177704,
      "learning_rate": 4.543379295662322e-06,
      "loss": 0.1713,
      "step": 2558
    },
    {
      "epoch": 0.23577647763394297,
      "grad_norm": 0.9448366597258528,
      "learning_rate": 4.54294030795643e-06,
      "loss": 0.1573,
      "step": 2559
    },
    {
      "epoch": 0.23586861381121296,
      "grad_norm": 0.9314244476538863,
      "learning_rate": 4.5425011305633106e-06,
      "loss": 0.1792,
      "step": 2560
    },
    {
      "epoch": 0.235960749988483,
      "grad_norm": 1.0225904621056787,
      "learning_rate": 4.542061763523742e-06,
      "loss": 0.1756,
      "step": 2561
    },
    {
      "epoch": 0.23605288616575298,
      "grad_norm": 0.9305300082933172,
      "learning_rate": 4.541622206878519e-06,
      "loss": 0.1614,
      "step": 2562
    },
    {
      "epoch": 0.23614502234302298,
      "grad_norm": 0.9248622005950687,
      "learning_rate": 4.541182460668453e-06,
      "loss": 0.1735,
      "step": 2563
    },
    {
      "epoch": 0.236237158520293,
      "grad_norm": 0.9311672044363726,
      "learning_rate": 4.540742524934377e-06,
      "loss": 0.1728,
      "step": 2564
    },
    {
      "epoch": 0.236329294697563,
      "grad_norm": 0.9839928437689336,
      "learning_rate": 4.540302399717138e-06,
      "loss": 0.1706,
      "step": 2565
    },
    {
      "epoch": 0.236421430874833,
      "grad_norm": 0.9426141389876795,
      "learning_rate": 4.5398620850576016e-06,
      "loss": 0.162,
      "step": 2566
    },
    {
      "epoch": 0.236513567052103,
      "grad_norm": 0.8881256101693608,
      "learning_rate": 4.53942158099665e-06,
      "loss": 0.1562,
      "step": 2567
    },
    {
      "epoch": 0.23660570322937302,
      "grad_norm": 0.9105265463755824,
      "learning_rate": 4.538980887575187e-06,
      "loss": 0.1598,
      "step": 2568
    },
    {
      "epoch": 0.23669783940664302,
      "grad_norm": 0.8733143352802549,
      "learning_rate": 4.538540004834128e-06,
      "loss": 0.155,
      "step": 2569
    },
    {
      "epoch": 0.23678997558391301,
      "grad_norm": 0.885143091633039,
      "learning_rate": 4.53809893281441e-06,
      "loss": 0.1525,
      "step": 2570
    },
    {
      "epoch": 0.23688211176118304,
      "grad_norm": 0.9585313460757532,
      "learning_rate": 4.537657671556987e-06,
      "loss": 0.1646,
      "step": 2571
    },
    {
      "epoch": 0.23697424793845304,
      "grad_norm": 0.9948560820566639,
      "learning_rate": 4.5372162211028305e-06,
      "loss": 0.1807,
      "step": 2572
    },
    {
      "epoch": 0.23706638411572303,
      "grad_norm": 0.9080172248460118,
      "learning_rate": 4.536774581492928e-06,
      "loss": 0.1494,
      "step": 2573
    },
    {
      "epoch": 0.23715852029299303,
      "grad_norm": 0.9556030654445338,
      "learning_rate": 4.5363327527682855e-06,
      "loss": 0.1593,
      "step": 2574
    },
    {
      "epoch": 0.23725065647026305,
      "grad_norm": 0.919600867467294,
      "learning_rate": 4.535890734969929e-06,
      "loss": 0.1704,
      "step": 2575
    },
    {
      "epoch": 0.23734279264753305,
      "grad_norm": 1.0317406838521088,
      "learning_rate": 4.535448528138899e-06,
      "loss": 0.176,
      "step": 2576
    },
    {
      "epoch": 0.23743492882480305,
      "grad_norm": 0.943160391590626,
      "learning_rate": 4.535006132316253e-06,
      "loss": 0.1911,
      "step": 2577
    },
    {
      "epoch": 0.23752706500207307,
      "grad_norm": 0.8917538056406878,
      "learning_rate": 4.534563547543069e-06,
      "loss": 0.1606,
      "step": 2578
    },
    {
      "epoch": 0.23761920117934307,
      "grad_norm": 0.9851079183758169,
      "learning_rate": 4.53412077386044e-06,
      "loss": 0.1817,
      "step": 2579
    },
    {
      "epoch": 0.23771133735661307,
      "grad_norm": 0.9027548501120674,
      "learning_rate": 4.533677811309479e-06,
      "loss": 0.1672,
      "step": 2580
    },
    {
      "epoch": 0.2378034735338831,
      "grad_norm": 0.945909808985898,
      "learning_rate": 4.533234659931315e-06,
      "loss": 0.1845,
      "step": 2581
    },
    {
      "epoch": 0.2378956097111531,
      "grad_norm": 0.8521446168221071,
      "learning_rate": 4.532791319767093e-06,
      "loss": 0.1434,
      "step": 2582
    },
    {
      "epoch": 0.23798774588842309,
      "grad_norm": 0.9503670223175602,
      "learning_rate": 4.532347790857978e-06,
      "loss": 0.1808,
      "step": 2583
    },
    {
      "epoch": 0.23807988206569308,
      "grad_norm": 0.9063415363527835,
      "learning_rate": 4.531904073245152e-06,
      "loss": 0.1701,
      "step": 2584
    },
    {
      "epoch": 0.2381720182429631,
      "grad_norm": 0.8789860283816323,
      "learning_rate": 4.531460166969816e-06,
      "loss": 0.1668,
      "step": 2585
    },
    {
      "epoch": 0.2382641544202331,
      "grad_norm": 0.9391306489506216,
      "learning_rate": 4.531016072073182e-06,
      "loss": 0.1697,
      "step": 2586
    },
    {
      "epoch": 0.2383562905975031,
      "grad_norm": 0.9586292371328816,
      "learning_rate": 4.530571788596489e-06,
      "loss": 0.166,
      "step": 2587
    },
    {
      "epoch": 0.23844842677477313,
      "grad_norm": 0.9708360436328097,
      "learning_rate": 4.530127316580986e-06,
      "loss": 0.1775,
      "step": 2588
    },
    {
      "epoch": 0.23854056295204312,
      "grad_norm": 0.8840283994456286,
      "learning_rate": 4.5296826560679445e-06,
      "loss": 0.1493,
      "step": 2589
    },
    {
      "epoch": 0.23863269912931312,
      "grad_norm": 0.9292858225886818,
      "learning_rate": 4.529237807098649e-06,
      "loss": 0.1625,
      "step": 2590
    },
    {
      "epoch": 0.23872483530658312,
      "grad_norm": 0.8977136056150155,
      "learning_rate": 4.528792769714404e-06,
      "loss": 0.1758,
      "step": 2591
    },
    {
      "epoch": 0.23881697148385314,
      "grad_norm": 1.0151817811358843,
      "learning_rate": 4.528347543956533e-06,
      "loss": 0.1596,
      "step": 2592
    },
    {
      "epoch": 0.23890910766112314,
      "grad_norm": 0.9335523901294361,
      "learning_rate": 4.527902129866374e-06,
      "loss": 0.1732,
      "step": 2593
    },
    {
      "epoch": 0.23900124383839313,
      "grad_norm": 0.9256864277979158,
      "learning_rate": 4.527456527485284e-06,
      "loss": 0.1683,
      "step": 2594
    },
    {
      "epoch": 0.23909338001566316,
      "grad_norm": 1.0210336286294275,
      "learning_rate": 4.527010736854637e-06,
      "loss": 0.1601,
      "step": 2595
    },
    {
      "epoch": 0.23918551619293316,
      "grad_norm": 0.960562630209394,
      "learning_rate": 4.526564758015825e-06,
      "loss": 0.1597,
      "step": 2596
    },
    {
      "epoch": 0.23927765237020315,
      "grad_norm": 0.9923091348049548,
      "learning_rate": 4.5261185910102575e-06,
      "loss": 0.1749,
      "step": 2597
    },
    {
      "epoch": 0.23936978854747318,
      "grad_norm": 0.9185996987364619,
      "learning_rate": 4.525672235879361e-06,
      "loss": 0.1757,
      "step": 2598
    },
    {
      "epoch": 0.23946192472474317,
      "grad_norm": 0.9504821525147511,
      "learning_rate": 4.5252256926645786e-06,
      "loss": 0.1733,
      "step": 2599
    },
    {
      "epoch": 0.23955406090201317,
      "grad_norm": 0.8817193190901266,
      "learning_rate": 4.5247789614073725e-06,
      "loss": 0.1518,
      "step": 2600
    },
    {
      "epoch": 0.23964619707928317,
      "grad_norm": 0.8998627255833563,
      "learning_rate": 4.524332042149223e-06,
      "loss": 0.1598,
      "step": 2601
    },
    {
      "epoch": 0.2397383332565532,
      "grad_norm": 0.8916195806919052,
      "learning_rate": 4.523884934931624e-06,
      "loss": 0.1536,
      "step": 2602
    },
    {
      "epoch": 0.2398304694338232,
      "grad_norm": 0.8969910004039,
      "learning_rate": 4.523437639796092e-06,
      "loss": 0.1558,
      "step": 2603
    },
    {
      "epoch": 0.2399226056110932,
      "grad_norm": 0.9579370144538515,
      "learning_rate": 4.522990156784157e-06,
      "loss": 0.1696,
      "step": 2604
    },
    {
      "epoch": 0.2400147417883632,
      "grad_norm": 0.9256485173070821,
      "learning_rate": 4.522542485937369e-06,
      "loss": 0.165,
      "step": 2605
    },
    {
      "epoch": 0.2401068779656332,
      "grad_norm": 0.8763097473674882,
      "learning_rate": 4.522094627297293e-06,
      "loss": 0.1518,
      "step": 2606
    },
    {
      "epoch": 0.2401990141429032,
      "grad_norm": 0.9590186614529309,
      "learning_rate": 4.521646580905513e-06,
      "loss": 0.1663,
      "step": 2607
    },
    {
      "epoch": 0.2402911503201732,
      "grad_norm": 0.8953321886812519,
      "learning_rate": 4.521198346803631e-06,
      "loss": 0.156,
      "step": 2608
    },
    {
      "epoch": 0.24038328649744323,
      "grad_norm": 0.9207768164702115,
      "learning_rate": 4.520749925033264e-06,
      "loss": 0.1607,
      "step": 2609
    },
    {
      "epoch": 0.24047542267471322,
      "grad_norm": 0.919939995459796,
      "learning_rate": 4.52030131563605e-06,
      "loss": 0.1725,
      "step": 2610
    },
    {
      "epoch": 0.24056755885198322,
      "grad_norm": 0.9646077134116254,
      "learning_rate": 4.519852518653641e-06,
      "loss": 0.167,
      "step": 2611
    },
    {
      "epoch": 0.24065969502925325,
      "grad_norm": 0.905875090197065,
      "learning_rate": 4.519403534127709e-06,
      "loss": 0.1651,
      "step": 2612
    },
    {
      "epoch": 0.24075183120652324,
      "grad_norm": 0.9733708732781958,
      "learning_rate": 4.51895436209994e-06,
      "loss": 0.1682,
      "step": 2613
    },
    {
      "epoch": 0.24084396738379324,
      "grad_norm": 0.9138941832811526,
      "learning_rate": 4.5185050026120425e-06,
      "loss": 0.1581,
      "step": 2614
    },
    {
      "epoch": 0.24093610356106326,
      "grad_norm": 0.8714836624140244,
      "learning_rate": 4.5180554557057376e-06,
      "loss": 0.1607,
      "step": 2615
    },
    {
      "epoch": 0.24102823973833326,
      "grad_norm": 0.896212162592746,
      "learning_rate": 4.5176057214227665e-06,
      "loss": 0.1557,
      "step": 2616
    },
    {
      "epoch": 0.24112037591560326,
      "grad_norm": 0.9795900304013812,
      "learning_rate": 4.517155799804888e-06,
      "loss": 0.1594,
      "step": 2617
    },
    {
      "epoch": 0.24121251209287325,
      "grad_norm": 0.9620751253748585,
      "learning_rate": 4.516705690893874e-06,
      "loss": 0.1746,
      "step": 2618
    },
    {
      "epoch": 0.24130464827014328,
      "grad_norm": 0.9766714887522009,
      "learning_rate": 4.516255394731522e-06,
      "loss": 0.1655,
      "step": 2619
    },
    {
      "epoch": 0.24139678444741328,
      "grad_norm": 0.9644944957480073,
      "learning_rate": 4.515804911359639e-06,
      "loss": 0.1604,
      "step": 2620
    },
    {
      "epoch": 0.24148892062468327,
      "grad_norm": 1.0028858940407497,
      "learning_rate": 4.5153542408200524e-06,
      "loss": 0.1666,
      "step": 2621
    },
    {
      "epoch": 0.2415810568019533,
      "grad_norm": 0.9909913147953266,
      "learning_rate": 4.514903383154608e-06,
      "loss": 0.1715,
      "step": 2622
    },
    {
      "epoch": 0.2416731929792233,
      "grad_norm": 0.9720807397909752,
      "learning_rate": 4.5144523384051675e-06,
      "loss": 0.1704,
      "step": 2623
    },
    {
      "epoch": 0.2417653291564933,
      "grad_norm": 0.9983520531859589,
      "learning_rate": 4.514001106613611e-06,
      "loss": 0.1673,
      "step": 2624
    },
    {
      "epoch": 0.2418574653337633,
      "grad_norm": 0.871023507107817,
      "learning_rate": 4.513549687821834e-06,
      "loss": 0.1598,
      "step": 2625
    },
    {
      "epoch": 0.2419496015110333,
      "grad_norm": 0.875727688350462,
      "learning_rate": 4.513098082071753e-06,
      "loss": 0.1796,
      "step": 2626
    },
    {
      "epoch": 0.2420417376883033,
      "grad_norm": 0.9984200375498948,
      "learning_rate": 4.512646289405298e-06,
      "loss": 0.1737,
      "step": 2627
    },
    {
      "epoch": 0.2421338738655733,
      "grad_norm": 0.9219824808671676,
      "learning_rate": 4.5121943098644185e-06,
      "loss": 0.1727,
      "step": 2628
    },
    {
      "epoch": 0.24222601004284333,
      "grad_norm": 0.903236560497671,
      "learning_rate": 4.5117421434910805e-06,
      "loss": 0.1563,
      "step": 2629
    },
    {
      "epoch": 0.24231814622011333,
      "grad_norm": 1.1088373746993974,
      "learning_rate": 4.511289790327268e-06,
      "loss": 0.1795,
      "step": 2630
    },
    {
      "epoch": 0.24241028239738333,
      "grad_norm": 0.9099632964748022,
      "learning_rate": 4.510837250414982e-06,
      "loss": 0.1802,
      "step": 2631
    },
    {
      "epoch": 0.24250241857465335,
      "grad_norm": 0.9625764621719629,
      "learning_rate": 4.5103845237962405e-06,
      "loss": 0.1824,
      "step": 2632
    },
    {
      "epoch": 0.24259455475192335,
      "grad_norm": 0.9045197623683646,
      "learning_rate": 4.509931610513081e-06,
      "loss": 0.1535,
      "step": 2633
    },
    {
      "epoch": 0.24268669092919334,
      "grad_norm": 0.8883996221321223,
      "learning_rate": 4.509478510607553e-06,
      "loss": 0.162,
      "step": 2634
    },
    {
      "epoch": 0.24277882710646334,
      "grad_norm": 0.8733565291886327,
      "learning_rate": 4.509025224121732e-06,
      "loss": 0.1488,
      "step": 2635
    },
    {
      "epoch": 0.24287096328373337,
      "grad_norm": 0.9323593147387864,
      "learning_rate": 4.5085717510977e-06,
      "loss": 0.1832,
      "step": 2636
    },
    {
      "epoch": 0.24296309946100336,
      "grad_norm": 0.8330195920047772,
      "learning_rate": 4.508118091577566e-06,
      "loss": 0.1578,
      "step": 2637
    },
    {
      "epoch": 0.24305523563827336,
      "grad_norm": 1.0178111918491635,
      "learning_rate": 4.507664245603451e-06,
      "loss": 0.1661,
      "step": 2638
    },
    {
      "epoch": 0.24314737181554338,
      "grad_norm": 0.9034888786182202,
      "learning_rate": 4.507210213217495e-06,
      "loss": 0.1695,
      "step": 2639
    },
    {
      "epoch": 0.24323950799281338,
      "grad_norm": 0.9159922991117291,
      "learning_rate": 4.506755994461853e-06,
      "loss": 0.1803,
      "step": 2640
    },
    {
      "epoch": 0.24333164417008338,
      "grad_norm": 0.9496166120744165,
      "learning_rate": 4.506301589378703e-06,
      "loss": 0.1754,
      "step": 2641
    },
    {
      "epoch": 0.24342378034735337,
      "grad_norm": 0.927288722206701,
      "learning_rate": 4.5058469980102336e-06,
      "loss": 0.1608,
      "step": 2642
    },
    {
      "epoch": 0.2435159165246234,
      "grad_norm": 0.9357101673472532,
      "learning_rate": 4.505392220398655e-06,
      "loss": 0.1743,
      "step": 2643
    },
    {
      "epoch": 0.2436080527018934,
      "grad_norm": 0.8967894186666624,
      "learning_rate": 4.504937256586192e-06,
      "loss": 0.1614,
      "step": 2644
    },
    {
      "epoch": 0.2437001888791634,
      "grad_norm": 1.008390819829681,
      "learning_rate": 4.50448210661509e-06,
      "loss": 0.18,
      "step": 2645
    },
    {
      "epoch": 0.24379232505643342,
      "grad_norm": 0.8959777511685124,
      "learning_rate": 4.504026770527607e-06,
      "loss": 0.1625,
      "step": 2646
    },
    {
      "epoch": 0.24388446123370341,
      "grad_norm": 0.883975734164676,
      "learning_rate": 4.503571248366024e-06,
      "loss": 0.1644,
      "step": 2647
    },
    {
      "epoch": 0.2439765974109734,
      "grad_norm": 0.9447762260150692,
      "learning_rate": 4.503115540172636e-06,
      "loss": 0.1766,
      "step": 2648
    },
    {
      "epoch": 0.24406873358824344,
      "grad_norm": 0.9023969503174291,
      "learning_rate": 4.502659645989753e-06,
      "loss": 0.1517,
      "step": 2649
    },
    {
      "epoch": 0.24416086976551343,
      "grad_norm": 0.8786423359540836,
      "learning_rate": 4.502203565859706e-06,
      "loss": 0.1548,
      "step": 2650
    },
    {
      "epoch": 0.24425300594278343,
      "grad_norm": 0.9129953675126846,
      "learning_rate": 4.501747299824843e-06,
      "loss": 0.1769,
      "step": 2651
    },
    {
      "epoch": 0.24434514212005343,
      "grad_norm": 0.8685998545587346,
      "learning_rate": 4.501290847927529e-06,
      "loss": 0.158,
      "step": 2652
    },
    {
      "epoch": 0.24443727829732345,
      "grad_norm": 0.9169127256515894,
      "learning_rate": 4.500834210210143e-06,
      "loss": 0.1536,
      "step": 2653
    },
    {
      "epoch": 0.24452941447459345,
      "grad_norm": 0.9004372536575974,
      "learning_rate": 4.500377386715086e-06,
      "loss": 0.1561,
      "step": 2654
    },
    {
      "epoch": 0.24462155065186345,
      "grad_norm": 0.9171923195813083,
      "learning_rate": 4.499920377484772e-06,
      "loss": 0.1561,
      "step": 2655
    },
    {
      "epoch": 0.24471368682913347,
      "grad_norm": 0.9970316417826179,
      "learning_rate": 4.499463182561637e-06,
      "loss": 0.1781,
      "step": 2656
    },
    {
      "epoch": 0.24480582300640347,
      "grad_norm": 0.8986690107368408,
      "learning_rate": 4.49900580198813e-06,
      "loss": 0.163,
      "step": 2657
    },
    {
      "epoch": 0.24489795918367346,
      "grad_norm": 0.8609935269764124,
      "learning_rate": 4.498548235806719e-06,
      "loss": 0.1552,
      "step": 2658
    },
    {
      "epoch": 0.24499009536094346,
      "grad_norm": 0.8736993468630774,
      "learning_rate": 4.4980904840598894e-06,
      "loss": 0.1552,
      "step": 2659
    },
    {
      "epoch": 0.24508223153821349,
      "grad_norm": 0.9081273596019058,
      "learning_rate": 4.497632546790143e-06,
      "loss": 0.1512,
      "step": 2660
    },
    {
      "epoch": 0.24517436771548348,
      "grad_norm": 0.9102031151269082,
      "learning_rate": 4.49717442404e-06,
      "loss": 0.1595,
      "step": 2661
    },
    {
      "epoch": 0.24526650389275348,
      "grad_norm": 0.9021316326942003,
      "learning_rate": 4.496716115851996e-06,
      "loss": 0.1717,
      "step": 2662
    },
    {
      "epoch": 0.2453586400700235,
      "grad_norm": 0.9719049140168042,
      "learning_rate": 4.496257622268687e-06,
      "loss": 0.1615,
      "step": 2663
    },
    {
      "epoch": 0.2454507762472935,
      "grad_norm": 0.9727092079229075,
      "learning_rate": 4.495798943332642e-06,
      "loss": 0.1579,
      "step": 2664
    },
    {
      "epoch": 0.2455429124245635,
      "grad_norm": 0.8986708563300652,
      "learning_rate": 4.495340079086451e-06,
      "loss": 0.169,
      "step": 2665
    },
    {
      "epoch": 0.24563504860183352,
      "grad_norm": 0.949471301620958,
      "learning_rate": 4.494881029572718e-06,
      "loss": 0.1704,
      "step": 2666
    },
    {
      "epoch": 0.24572718477910352,
      "grad_norm": 0.9152647484984884,
      "learning_rate": 4.494421794834068e-06,
      "loss": 0.1605,
      "step": 2667
    },
    {
      "epoch": 0.24581932095637352,
      "grad_norm": 1.0020699225934224,
      "learning_rate": 4.4939623749131385e-06,
      "loss": 0.1787,
      "step": 2668
    },
    {
      "epoch": 0.2459114571336435,
      "grad_norm": 0.9225724208528915,
      "learning_rate": 4.493502769852589e-06,
      "loss": 0.1645,
      "step": 2669
    },
    {
      "epoch": 0.24600359331091354,
      "grad_norm": 0.9282588859001433,
      "learning_rate": 4.493042979695092e-06,
      "loss": 0.1706,
      "step": 2670
    },
    {
      "epoch": 0.24609572948818353,
      "grad_norm": 0.950681578689861,
      "learning_rate": 4.4925830044833405e-06,
      "loss": 0.1632,
      "step": 2671
    },
    {
      "epoch": 0.24618786566545353,
      "grad_norm": 0.9578740886589785,
      "learning_rate": 4.492122844260042e-06,
      "loss": 0.1645,
      "step": 2672
    },
    {
      "epoch": 0.24628000184272356,
      "grad_norm": 0.9183491188538061,
      "learning_rate": 4.491662499067923e-06,
      "loss": 0.1684,
      "step": 2673
    },
    {
      "epoch": 0.24637213801999355,
      "grad_norm": 0.886920316373167,
      "learning_rate": 4.491201968949726e-06,
      "loss": 0.1425,
      "step": 2674
    },
    {
      "epoch": 0.24646427419726355,
      "grad_norm": 0.9515433294511995,
      "learning_rate": 4.490741253948213e-06,
      "loss": 0.1617,
      "step": 2675
    },
    {
      "epoch": 0.24655641037453357,
      "grad_norm": 0.9513213792540711,
      "learning_rate": 4.49028035410616e-06,
      "loss": 0.161,
      "step": 2676
    },
    {
      "epoch": 0.24664854655180357,
      "grad_norm": 0.9553852848159456,
      "learning_rate": 4.489819269466362e-06,
      "loss": 0.1646,
      "step": 2677
    },
    {
      "epoch": 0.24674068272907357,
      "grad_norm": 1.0489521515871678,
      "learning_rate": 4.489358000071631e-06,
      "loss": 0.1591,
      "step": 2678
    },
    {
      "epoch": 0.24683281890634357,
      "grad_norm": 0.9143172794273362,
      "learning_rate": 4.488896545964795e-06,
      "loss": 0.1642,
      "step": 2679
    },
    {
      "epoch": 0.2469249550836136,
      "grad_norm": 0.8760022568204034,
      "learning_rate": 4.4884349071887e-06,
      "loss": 0.1506,
      "step": 2680
    },
    {
      "epoch": 0.2470170912608836,
      "grad_norm": 0.9208733869067626,
      "learning_rate": 4.487973083786211e-06,
      "loss": 0.1668,
      "step": 2681
    },
    {
      "epoch": 0.24710922743815358,
      "grad_norm": 0.940764657196804,
      "learning_rate": 4.4875110758002076e-06,
      "loss": 0.1731,
      "step": 2682
    },
    {
      "epoch": 0.2472013636154236,
      "grad_norm": 0.975836350447163,
      "learning_rate": 4.487048883273586e-06,
      "loss": 0.162,
      "step": 2683
    },
    {
      "epoch": 0.2472934997926936,
      "grad_norm": 0.9104989847581315,
      "learning_rate": 4.486586506249262e-06,
      "loss": 0.1656,
      "step": 2684
    },
    {
      "epoch": 0.2473856359699636,
      "grad_norm": 0.8994565355652719,
      "learning_rate": 4.486123944770166e-06,
      "loss": 0.1649,
      "step": 2685
    },
    {
      "epoch": 0.2474777721472336,
      "grad_norm": 0.9958969115514426,
      "learning_rate": 4.48566119887925e-06,
      "loss": 0.1773,
      "step": 2686
    },
    {
      "epoch": 0.24756990832450362,
      "grad_norm": 0.8853726143372311,
      "learning_rate": 4.4851982686194775e-06,
      "loss": 0.1567,
      "step": 2687
    },
    {
      "epoch": 0.24766204450177362,
      "grad_norm": 0.9261982366343027,
      "learning_rate": 4.484735154033831e-06,
      "loss": 0.1593,
      "step": 2688
    },
    {
      "epoch": 0.24775418067904362,
      "grad_norm": 0.9154568975953823,
      "learning_rate": 4.484271855165312e-06,
      "loss": 0.1694,
      "step": 2689
    },
    {
      "epoch": 0.24784631685631364,
      "grad_norm": 0.9484967646834224,
      "learning_rate": 4.483808372056939e-06,
      "loss": 0.1714,
      "step": 2690
    },
    {
      "epoch": 0.24793845303358364,
      "grad_norm": 0.9634197695354942,
      "learning_rate": 4.483344704751745e-06,
      "loss": 0.1791,
      "step": 2691
    },
    {
      "epoch": 0.24803058921085364,
      "grad_norm": 0.9700386725862131,
      "learning_rate": 4.48288085329278e-06,
      "loss": 0.1642,
      "step": 2692
    },
    {
      "epoch": 0.24812272538812366,
      "grad_norm": 0.8622018190901424,
      "learning_rate": 4.482416817723115e-06,
      "loss": 0.1619,
      "step": 2693
    },
    {
      "epoch": 0.24821486156539366,
      "grad_norm": 0.9328281803476557,
      "learning_rate": 4.481952598085836e-06,
      "loss": 0.1816,
      "step": 2694
    },
    {
      "epoch": 0.24830699774266365,
      "grad_norm": 0.8818479936219914,
      "learning_rate": 4.481488194424044e-06,
      "loss": 0.1502,
      "step": 2695
    },
    {
      "epoch": 0.24839913391993365,
      "grad_norm": 0.9726880839105281,
      "learning_rate": 4.481023606780861e-06,
      "loss": 0.1681,
      "step": 2696
    },
    {
      "epoch": 0.24849127009720368,
      "grad_norm": 0.9194992416431299,
      "learning_rate": 4.480558835199422e-06,
      "loss": 0.1611,
      "step": 2697
    },
    {
      "epoch": 0.24858340627447367,
      "grad_norm": 0.8483395943971769,
      "learning_rate": 4.4800938797228825e-06,
      "loss": 0.1508,
      "step": 2698
    },
    {
      "epoch": 0.24867554245174367,
      "grad_norm": 0.863379180784911,
      "learning_rate": 4.479628740394412e-06,
      "loss": 0.1424,
      "step": 2699
    },
    {
      "epoch": 0.2487676786290137,
      "grad_norm": 0.918067359903417,
      "learning_rate": 4.4791634172572015e-06,
      "loss": 0.1557,
      "step": 2700
    },
    {
      "epoch": 0.2488598148062837,
      "grad_norm": 0.9248335136551753,
      "learning_rate": 4.478697910354455e-06,
      "loss": 0.1647,
      "step": 2701
    },
    {
      "epoch": 0.2489519509835537,
      "grad_norm": 0.9227062178559122,
      "learning_rate": 4.4782322197293935e-06,
      "loss": 0.1508,
      "step": 2702
    },
    {
      "epoch": 0.24904408716082369,
      "grad_norm": 0.9355481138879411,
      "learning_rate": 4.477766345425257e-06,
      "loss": 0.1669,
      "step": 2703
    },
    {
      "epoch": 0.2491362233380937,
      "grad_norm": 0.9262215971301258,
      "learning_rate": 4.4773002874853035e-06,
      "loss": 0.166,
      "step": 2704
    },
    {
      "epoch": 0.2492283595153637,
      "grad_norm": 0.939773739473119,
      "learning_rate": 4.476834045952805e-06,
      "loss": 0.1692,
      "step": 2705
    },
    {
      "epoch": 0.2493204956926337,
      "grad_norm": 0.9233835051003554,
      "learning_rate": 4.476367620871053e-06,
      "loss": 0.1678,
      "step": 2706
    },
    {
      "epoch": 0.24941263186990373,
      "grad_norm": 0.9959179911656888,
      "learning_rate": 4.475901012283354e-06,
      "loss": 0.1719,
      "step": 2707
    },
    {
      "epoch": 0.24950476804717373,
      "grad_norm": 0.9906403922433501,
      "learning_rate": 4.475434220233034e-06,
      "loss": 0.171,
      "step": 2708
    },
    {
      "epoch": 0.24959690422444372,
      "grad_norm": 0.9654043551251009,
      "learning_rate": 4.474967244763434e-06,
      "loss": 0.1524,
      "step": 2709
    },
    {
      "epoch": 0.24968904040171375,
      "grad_norm": 0.8859687185368176,
      "learning_rate": 4.474500085917912e-06,
      "loss": 0.1626,
      "step": 2710
    },
    {
      "epoch": 0.24978117657898374,
      "grad_norm": 0.9742371995625955,
      "learning_rate": 4.474032743739846e-06,
      "loss": 0.1681,
      "step": 2711
    },
    {
      "epoch": 0.24987331275625374,
      "grad_norm": 0.8800994362044134,
      "learning_rate": 4.4735652182726265e-06,
      "loss": 0.1534,
      "step": 2712
    },
    {
      "epoch": 0.24996544893352374,
      "grad_norm": 0.9751280188862264,
      "learning_rate": 4.473097509559664e-06,
      "loss": 0.1629,
      "step": 2713
    },
    {
      "epoch": 0.25005758511079373,
      "grad_norm": 0.9597390984328364,
      "learning_rate": 4.472629617644385e-06,
      "loss": 0.1634,
      "step": 2714
    },
    {
      "epoch": 0.25014972128806373,
      "grad_norm": 0.8782469623492604,
      "learning_rate": 4.472161542570234e-06,
      "loss": 0.1465,
      "step": 2715
    },
    {
      "epoch": 0.2502418574653338,
      "grad_norm": 1.0126355329165513,
      "learning_rate": 4.4716932843806715e-06,
      "loss": 0.1885,
      "step": 2716
    },
    {
      "epoch": 0.2503339936426038,
      "grad_norm": 0.9689860924375248,
      "learning_rate": 4.471224843119176e-06,
      "loss": 0.1622,
      "step": 2717
    },
    {
      "epoch": 0.2504261298198738,
      "grad_norm": 1.0502120884934425,
      "learning_rate": 4.470756218829241e-06,
      "loss": 0.1806,
      "step": 2718
    },
    {
      "epoch": 0.2505182659971438,
      "grad_norm": 0.8987777249287304,
      "learning_rate": 4.470287411554379e-06,
      "loss": 0.1517,
      "step": 2719
    },
    {
      "epoch": 0.25061040217441377,
      "grad_norm": 1.0238603313699401,
      "learning_rate": 4.469818421338119e-06,
      "loss": 0.1562,
      "step": 2720
    },
    {
      "epoch": 0.25070253835168377,
      "grad_norm": 1.0334880871837124,
      "learning_rate": 4.469349248224007e-06,
      "loss": 0.1943,
      "step": 2721
    },
    {
      "epoch": 0.2507946745289538,
      "grad_norm": 0.9433556577855376,
      "learning_rate": 4.468879892255604e-06,
      "loss": 0.155,
      "step": 2722
    },
    {
      "epoch": 0.2508868107062238,
      "grad_norm": 0.9291327476122262,
      "learning_rate": 4.4684103534764925e-06,
      "loss": 0.1684,
      "step": 2723
    },
    {
      "epoch": 0.2509789468834938,
      "grad_norm": 0.9294984576948387,
      "learning_rate": 4.467940631930267e-06,
      "loss": 0.1643,
      "step": 2724
    },
    {
      "epoch": 0.2510710830607638,
      "grad_norm": 0.8568728555781507,
      "learning_rate": 4.467470727660543e-06,
      "loss": 0.161,
      "step": 2725
    },
    {
      "epoch": 0.2511632192380338,
      "grad_norm": 0.9282762970010767,
      "learning_rate": 4.467000640710949e-06,
      "loss": 0.1657,
      "step": 2726
    },
    {
      "epoch": 0.2512553554153038,
      "grad_norm": 0.9137764648161802,
      "learning_rate": 4.466530371125135e-06,
      "loss": 0.1627,
      "step": 2727
    },
    {
      "epoch": 0.2513474915925738,
      "grad_norm": 0.9720124135320236,
      "learning_rate": 4.4660599189467634e-06,
      "loss": 0.1748,
      "step": 2728
    },
    {
      "epoch": 0.25143962776984385,
      "grad_norm": 1.0387321685221882,
      "learning_rate": 4.465589284219517e-06,
      "loss": 0.1823,
      "step": 2729
    },
    {
      "epoch": 0.25153176394711385,
      "grad_norm": 0.953514571101409,
      "learning_rate": 4.465118466987094e-06,
      "loss": 0.1749,
      "step": 2730
    },
    {
      "epoch": 0.25162390012438385,
      "grad_norm": 0.9653536317377608,
      "learning_rate": 4.4646474672932105e-06,
      "loss": 0.1688,
      "step": 2731
    },
    {
      "epoch": 0.25171603630165384,
      "grad_norm": 0.9214114453191518,
      "learning_rate": 4.464176285181597e-06,
      "loss": 0.1707,
      "step": 2732
    },
    {
      "epoch": 0.25180817247892384,
      "grad_norm": 0.9310991093879136,
      "learning_rate": 4.4637049206960055e-06,
      "loss": 0.1664,
      "step": 2733
    },
    {
      "epoch": 0.25190030865619384,
      "grad_norm": 0.8764637830155427,
      "learning_rate": 4.4632333738802e-06,
      "loss": 0.1636,
      "step": 2734
    },
    {
      "epoch": 0.25199244483346384,
      "grad_norm": 0.8625927177967262,
      "learning_rate": 4.462761644777964e-06,
      "loss": 0.1619,
      "step": 2735
    },
    {
      "epoch": 0.2520845810107339,
      "grad_norm": 0.90143610722121,
      "learning_rate": 4.4622897334330985e-06,
      "loss": 0.1643,
      "step": 2736
    },
    {
      "epoch": 0.2521767171880039,
      "grad_norm": 0.9695051236534774,
      "learning_rate": 4.4618176398894205e-06,
      "loss": 0.1621,
      "step": 2737
    },
    {
      "epoch": 0.2522688533652739,
      "grad_norm": 0.9623621029230007,
      "learning_rate": 4.4613453641907634e-06,
      "loss": 0.169,
      "step": 2738
    },
    {
      "epoch": 0.2523609895425439,
      "grad_norm": 0.9804724158084682,
      "learning_rate": 4.460872906380977e-06,
      "loss": 0.1667,
      "step": 2739
    },
    {
      "epoch": 0.2524531257198139,
      "grad_norm": 0.9440309923131297,
      "learning_rate": 4.460400266503932e-06,
      "loss": 0.1788,
      "step": 2740
    },
    {
      "epoch": 0.2525452618970839,
      "grad_norm": 0.8943689360394186,
      "learning_rate": 4.4599274446035104e-06,
      "loss": 0.1634,
      "step": 2741
    },
    {
      "epoch": 0.25263739807435387,
      "grad_norm": 0.9078445150328539,
      "learning_rate": 4.459454440723614e-06,
      "loss": 0.1674,
      "step": 2742
    },
    {
      "epoch": 0.2527295342516239,
      "grad_norm": 1.0362519468921287,
      "learning_rate": 4.4589812549081624e-06,
      "loss": 0.1754,
      "step": 2743
    },
    {
      "epoch": 0.2528216704288939,
      "grad_norm": 0.9085550271912034,
      "learning_rate": 4.458507887201091e-06,
      "loss": 0.1622,
      "step": 2744
    },
    {
      "epoch": 0.2529138066061639,
      "grad_norm": 0.9723393303756631,
      "learning_rate": 4.458034337646351e-06,
      "loss": 0.1717,
      "step": 2745
    },
    {
      "epoch": 0.2530059427834339,
      "grad_norm": 0.8992954032616152,
      "learning_rate": 4.4575606062879115e-06,
      "loss": 0.1591,
      "step": 2746
    },
    {
      "epoch": 0.2530980789607039,
      "grad_norm": 0.94354265969286,
      "learning_rate": 4.45708669316976e-06,
      "loss": 0.167,
      "step": 2747
    },
    {
      "epoch": 0.2531902151379739,
      "grad_norm": 0.9230334147360164,
      "learning_rate": 4.456612598335898e-06,
      "loss": 0.1684,
      "step": 2748
    },
    {
      "epoch": 0.2532823513152439,
      "grad_norm": 0.8605421993979968,
      "learning_rate": 4.4561383218303455e-06,
      "loss": 0.1448,
      "step": 2749
    },
    {
      "epoch": 0.25337448749251396,
      "grad_norm": 0.9734131686816277,
      "learning_rate": 4.45566386369714e-06,
      "loss": 0.1796,
      "step": 2750
    },
    {
      "epoch": 0.25346662366978395,
      "grad_norm": 0.9752093359656303,
      "learning_rate": 4.455189223980333e-06,
      "loss": 0.1666,
      "step": 2751
    },
    {
      "epoch": 0.25355875984705395,
      "grad_norm": 0.9671629645895793,
      "learning_rate": 4.454714402723997e-06,
      "loss": 0.1672,
      "step": 2752
    },
    {
      "epoch": 0.25365089602432395,
      "grad_norm": 0.9912354137997301,
      "learning_rate": 4.4542393999722184e-06,
      "loss": 0.1749,
      "step": 2753
    },
    {
      "epoch": 0.25374303220159394,
      "grad_norm": 0.9629544614386797,
      "learning_rate": 4.453764215769101e-06,
      "loss": 0.1617,
      "step": 2754
    },
    {
      "epoch": 0.25383516837886394,
      "grad_norm": 0.903837560697107,
      "learning_rate": 4.4532888501587655e-06,
      "loss": 0.1691,
      "step": 2755
    },
    {
      "epoch": 0.253927304556134,
      "grad_norm": 0.9369456487961056,
      "learning_rate": 4.452813303185351e-06,
      "loss": 0.1628,
      "step": 2756
    },
    {
      "epoch": 0.254019440733404,
      "grad_norm": 0.9460665893734913,
      "learning_rate": 4.452337574893011e-06,
      "loss": 0.1602,
      "step": 2757
    },
    {
      "epoch": 0.254111576910674,
      "grad_norm": 0.8699779142398438,
      "learning_rate": 4.451861665325916e-06,
      "loss": 0.1473,
      "step": 2758
    },
    {
      "epoch": 0.254203713087944,
      "grad_norm": 0.9002005591816477,
      "learning_rate": 4.451385574528256e-06,
      "loss": 0.1511,
      "step": 2759
    },
    {
      "epoch": 0.254295849265214,
      "grad_norm": 0.9945308777773249,
      "learning_rate": 4.450909302544235e-06,
      "loss": 0.1645,
      "step": 2760
    },
    {
      "epoch": 0.254387985442484,
      "grad_norm": 0.9477308554562681,
      "learning_rate": 4.450432849418076e-06,
      "loss": 0.1698,
      "step": 2761
    },
    {
      "epoch": 0.254480121619754,
      "grad_norm": 0.975609887655622,
      "learning_rate": 4.449956215194017e-06,
      "loss": 0.1733,
      "step": 2762
    },
    {
      "epoch": 0.254572257797024,
      "grad_norm": 1.045564596726802,
      "learning_rate": 4.4494793999163125e-06,
      "loss": 0.1485,
      "step": 2763
    },
    {
      "epoch": 0.254664393974294,
      "grad_norm": 0.9691806406972214,
      "learning_rate": 4.449002403629237e-06,
      "loss": 0.1715,
      "step": 2764
    },
    {
      "epoch": 0.254756530151564,
      "grad_norm": 0.8882406299274819,
      "learning_rate": 4.448525226377078e-06,
      "loss": 0.1599,
      "step": 2765
    },
    {
      "epoch": 0.254848666328834,
      "grad_norm": 1.0071041342757574,
      "learning_rate": 4.448047868204143e-06,
      "loss": 0.1663,
      "step": 2766
    },
    {
      "epoch": 0.254940802506104,
      "grad_norm": 0.9867383224082461,
      "learning_rate": 4.447570329154752e-06,
      "loss": 0.159,
      "step": 2767
    },
    {
      "epoch": 0.255032938683374,
      "grad_norm": 0.9594025105717243,
      "learning_rate": 4.447092609273248e-06,
      "loss": 0.1672,
      "step": 2768
    },
    {
      "epoch": 0.255125074860644,
      "grad_norm": 0.9711693026778082,
      "learning_rate": 4.446614708603985e-06,
      "loss": 0.175,
      "step": 2769
    },
    {
      "epoch": 0.25521721103791406,
      "grad_norm": 0.9495706212576188,
      "learning_rate": 4.446136627191337e-06,
      "loss": 0.1688,
      "step": 2770
    },
    {
      "epoch": 0.25530934721518406,
      "grad_norm": 0.8719052281328445,
      "learning_rate": 4.445658365079693e-06,
      "loss": 0.1548,
      "step": 2771
    },
    {
      "epoch": 0.25540148339245405,
      "grad_norm": 0.9026874716661581,
      "learning_rate": 4.4451799223134615e-06,
      "loss": 0.1588,
      "step": 2772
    },
    {
      "epoch": 0.25549361956972405,
      "grad_norm": 0.9205413159506892,
      "learning_rate": 4.444701298937064e-06,
      "loss": 0.1647,
      "step": 2773
    },
    {
      "epoch": 0.25558575574699405,
      "grad_norm": 0.8853201950547462,
      "learning_rate": 4.444222494994942e-06,
      "loss": 0.1607,
      "step": 2774
    },
    {
      "epoch": 0.25567789192426404,
      "grad_norm": 0.9112603810645689,
      "learning_rate": 4.443743510531552e-06,
      "loss": 0.1581,
      "step": 2775
    },
    {
      "epoch": 0.25577002810153404,
      "grad_norm": 0.9430372314730556,
      "learning_rate": 4.443264345591368e-06,
      "loss": 0.1602,
      "step": 2776
    },
    {
      "epoch": 0.2558621642788041,
      "grad_norm": 0.9940607095386159,
      "learning_rate": 4.442785000218881e-06,
      "loss": 0.1669,
      "step": 2777
    },
    {
      "epoch": 0.2559543004560741,
      "grad_norm": 0.874978128077645,
      "learning_rate": 4.442305474458596e-06,
      "loss": 0.1598,
      "step": 2778
    },
    {
      "epoch": 0.2560464366333441,
      "grad_norm": 0.870158754636262,
      "learning_rate": 4.4418257683550405e-06,
      "loss": 0.1537,
      "step": 2779
    },
    {
      "epoch": 0.2561385728106141,
      "grad_norm": 0.9790466686822947,
      "learning_rate": 4.441345881952752e-06,
      "loss": 0.169,
      "step": 2780
    },
    {
      "epoch": 0.2562307089878841,
      "grad_norm": 1.0139628897985031,
      "learning_rate": 4.44086581529629e-06,
      "loss": 0.1769,
      "step": 2781
    },
    {
      "epoch": 0.2563228451651541,
      "grad_norm": 0.9463222270409243,
      "learning_rate": 4.440385568430228e-06,
      "loss": 0.1544,
      "step": 2782
    },
    {
      "epoch": 0.2564149813424241,
      "grad_norm": 0.9747904707531557,
      "learning_rate": 4.439905141399157e-06,
      "loss": 0.1797,
      "step": 2783
    },
    {
      "epoch": 0.25650711751969413,
      "grad_norm": 0.9977583516525407,
      "learning_rate": 4.439424534247686e-06,
      "loss": 0.162,
      "step": 2784
    },
    {
      "epoch": 0.2565992536969641,
      "grad_norm": 1.009039762592302,
      "learning_rate": 4.438943747020437e-06,
      "loss": 0.1606,
      "step": 2785
    },
    {
      "epoch": 0.2566913898742341,
      "grad_norm": 0.917807551416993,
      "learning_rate": 4.438462779762052e-06,
      "loss": 0.1589,
      "step": 2786
    },
    {
      "epoch": 0.2567835260515041,
      "grad_norm": 1.0105522924525019,
      "learning_rate": 4.437981632517191e-06,
      "loss": 0.1691,
      "step": 2787
    },
    {
      "epoch": 0.2568756622287741,
      "grad_norm": 0.8741885257091299,
      "learning_rate": 4.437500305330526e-06,
      "loss": 0.1627,
      "step": 2788
    },
    {
      "epoch": 0.2569677984060441,
      "grad_norm": 0.9656983545728868,
      "learning_rate": 4.437018798246749e-06,
      "loss": 0.1533,
      "step": 2789
    },
    {
      "epoch": 0.25705993458331416,
      "grad_norm": 0.9585997069738847,
      "learning_rate": 4.436537111310568e-06,
      "loss": 0.1806,
      "step": 2790
    },
    {
      "epoch": 0.25715207076058416,
      "grad_norm": 0.891640608831986,
      "learning_rate": 4.436055244566708e-06,
      "loss": 0.1585,
      "step": 2791
    },
    {
      "epoch": 0.25724420693785416,
      "grad_norm": 0.9842157286945457,
      "learning_rate": 4.4355731980599105e-06,
      "loss": 0.1579,
      "step": 2792
    },
    {
      "epoch": 0.25733634311512416,
      "grad_norm": 1.0030576180364315,
      "learning_rate": 4.435090971834933e-06,
      "loss": 0.1709,
      "step": 2793
    },
    {
      "epoch": 0.25742847929239415,
      "grad_norm": 0.9517782001478897,
      "learning_rate": 4.43460856593655e-06,
      "loss": 0.1718,
      "step": 2794
    },
    {
      "epoch": 0.25752061546966415,
      "grad_norm": 0.8794981685031267,
      "learning_rate": 4.434125980409553e-06,
      "loss": 0.1562,
      "step": 2795
    },
    {
      "epoch": 0.25761275164693415,
      "grad_norm": 0.8812764377019595,
      "learning_rate": 4.433643215298753e-06,
      "loss": 0.1577,
      "step": 2796
    },
    {
      "epoch": 0.2577048878242042,
      "grad_norm": 0.8945685024582577,
      "learning_rate": 4.433160270648971e-06,
      "loss": 0.1613,
      "step": 2797
    },
    {
      "epoch": 0.2577970240014742,
      "grad_norm": 0.8853629308772607,
      "learning_rate": 4.432677146505049e-06,
      "loss": 0.1643,
      "step": 2798
    },
    {
      "epoch": 0.2578891601787442,
      "grad_norm": 0.9325081834935269,
      "learning_rate": 4.432193842911846e-06,
      "loss": 0.1807,
      "step": 2799
    },
    {
      "epoch": 0.2579812963560142,
      "grad_norm": 0.9434955095871754,
      "learning_rate": 4.431710359914238e-06,
      "loss": 0.1653,
      "step": 2800
    },
    {
      "epoch": 0.2580734325332842,
      "grad_norm": 0.9005501179330212,
      "learning_rate": 4.4312266975571145e-06,
      "loss": 0.1608,
      "step": 2801
    },
    {
      "epoch": 0.2581655687105542,
      "grad_norm": 0.9051611437276726,
      "learning_rate": 4.430742855885384e-06,
      "loss": 0.1595,
      "step": 2802
    },
    {
      "epoch": 0.2582577048878242,
      "grad_norm": 0.8541816832770728,
      "learning_rate": 4.430258834943973e-06,
      "loss": 0.1435,
      "step": 2803
    },
    {
      "epoch": 0.25834984106509423,
      "grad_norm": 0.8819731313821468,
      "learning_rate": 4.429774634777819e-06,
      "loss": 0.1708,
      "step": 2804
    },
    {
      "epoch": 0.25844197724236423,
      "grad_norm": 0.8660324638072526,
      "learning_rate": 4.429290255431884e-06,
      "loss": 0.1548,
      "step": 2805
    },
    {
      "epoch": 0.2585341134196342,
      "grad_norm": 0.9404639070221896,
      "learning_rate": 4.428805696951141e-06,
      "loss": 0.1691,
      "step": 2806
    },
    {
      "epoch": 0.2586262495969042,
      "grad_norm": 0.8740371398676113,
      "learning_rate": 4.428320959380581e-06,
      "loss": 0.1684,
      "step": 2807
    },
    {
      "epoch": 0.2587183857741742,
      "grad_norm": 0.8545366276085776,
      "learning_rate": 4.427836042765213e-06,
      "loss": 0.1448,
      "step": 2808
    },
    {
      "epoch": 0.2588105219514442,
      "grad_norm": 0.8662838141494884,
      "learning_rate": 4.4273509471500606e-06,
      "loss": 0.148,
      "step": 2809
    },
    {
      "epoch": 0.2589026581287142,
      "grad_norm": 0.9313287920052562,
      "learning_rate": 4.426865672580166e-06,
      "loss": 0.1506,
      "step": 2810
    },
    {
      "epoch": 0.25899479430598427,
      "grad_norm": 0.8542806501069565,
      "learning_rate": 4.426380219100585e-06,
      "loss": 0.1531,
      "step": 2811
    },
    {
      "epoch": 0.25908693048325426,
      "grad_norm": 1.0367999062687907,
      "learning_rate": 4.425894586756394e-06,
      "loss": 0.196,
      "step": 2812
    },
    {
      "epoch": 0.25917906666052426,
      "grad_norm": 0.9720196319247114,
      "learning_rate": 4.425408775592684e-06,
      "loss": 0.1574,
      "step": 2813
    },
    {
      "epoch": 0.25927120283779426,
      "grad_norm": 0.8971796959776257,
      "learning_rate": 4.424922785654561e-06,
      "loss": 0.1557,
      "step": 2814
    },
    {
      "epoch": 0.25936333901506425,
      "grad_norm": 0.8716438967042835,
      "learning_rate": 4.424436616987151e-06,
      "loss": 0.1534,
      "step": 2815
    },
    {
      "epoch": 0.25945547519233425,
      "grad_norm": 0.9366795506332969,
      "learning_rate": 4.423950269635594e-06,
      "loss": 0.1722,
      "step": 2816
    },
    {
      "epoch": 0.25954761136960425,
      "grad_norm": 0.8750053562323363,
      "learning_rate": 4.4234637436450465e-06,
      "loss": 0.1667,
      "step": 2817
    },
    {
      "epoch": 0.2596397475468743,
      "grad_norm": 0.8688375268172122,
      "learning_rate": 4.422977039060684e-06,
      "loss": 0.1551,
      "step": 2818
    },
    {
      "epoch": 0.2597318837241443,
      "grad_norm": 0.9164926259231396,
      "learning_rate": 4.422490155927696e-06,
      "loss": 0.1547,
      "step": 2819
    },
    {
      "epoch": 0.2598240199014143,
      "grad_norm": 0.8784819223059159,
      "learning_rate": 4.422003094291291e-06,
      "loss": 0.1553,
      "step": 2820
    },
    {
      "epoch": 0.2599161560786843,
      "grad_norm": 0.8480220147126285,
      "learning_rate": 4.421515854196692e-06,
      "loss": 0.1421,
      "step": 2821
    },
    {
      "epoch": 0.2600082922559543,
      "grad_norm": 0.9812963150064519,
      "learning_rate": 4.421028435689138e-06,
      "loss": 0.1739,
      "step": 2822
    },
    {
      "epoch": 0.2601004284332243,
      "grad_norm": 0.966210027789589,
      "learning_rate": 4.420540838813887e-06,
      "loss": 0.1726,
      "step": 2823
    },
    {
      "epoch": 0.26019256461049434,
      "grad_norm": 0.8640742533975352,
      "learning_rate": 4.420053063616214e-06,
      "loss": 0.1597,
      "step": 2824
    },
    {
      "epoch": 0.26028470078776433,
      "grad_norm": 0.9021044826230841,
      "learning_rate": 4.419565110141406e-06,
      "loss": 0.1669,
      "step": 2825
    },
    {
      "epoch": 0.26037683696503433,
      "grad_norm": 0.8744446068310874,
      "learning_rate": 4.419076978434771e-06,
      "loss": 0.1587,
      "step": 2826
    },
    {
      "epoch": 0.2604689731423043,
      "grad_norm": 0.9002597276951575,
      "learning_rate": 4.418588668541632e-06,
      "loss": 0.1496,
      "step": 2827
    },
    {
      "epoch": 0.2605611093195743,
      "grad_norm": 0.8705763804623955,
      "learning_rate": 4.41810018050733e-06,
      "loss": 0.1612,
      "step": 2828
    },
    {
      "epoch": 0.2606532454968443,
      "grad_norm": 0.8590574476522304,
      "learning_rate": 4.417611514377218e-06,
      "loss": 0.1541,
      "step": 2829
    },
    {
      "epoch": 0.2607453816741143,
      "grad_norm": 0.9420489301767392,
      "learning_rate": 4.417122670196672e-06,
      "loss": 0.1662,
      "step": 2830
    },
    {
      "epoch": 0.26083751785138437,
      "grad_norm": 0.9855215157889966,
      "learning_rate": 4.416633648011079e-06,
      "loss": 0.1719,
      "step": 2831
    },
    {
      "epoch": 0.26092965402865437,
      "grad_norm": 0.9015482825979495,
      "learning_rate": 4.416144447865845e-06,
      "loss": 0.1594,
      "step": 2832
    },
    {
      "epoch": 0.26102179020592436,
      "grad_norm": 0.9186528341173594,
      "learning_rate": 4.4156550698063935e-06,
      "loss": 0.1503,
      "step": 2833
    },
    {
      "epoch": 0.26111392638319436,
      "grad_norm": 0.888306584206573,
      "learning_rate": 4.4151655138781625e-06,
      "loss": 0.1477,
      "step": 2834
    },
    {
      "epoch": 0.26120606256046436,
      "grad_norm": 1.0019447633808933,
      "learning_rate": 4.414675780126607e-06,
      "loss": 0.1655,
      "step": 2835
    },
    {
      "epoch": 0.26129819873773436,
      "grad_norm": 0.9867728137209706,
      "learning_rate": 4.4141858685972e-06,
      "loss": 0.1709,
      "step": 2836
    },
    {
      "epoch": 0.26139033491500435,
      "grad_norm": 0.9522319296236781,
      "learning_rate": 4.413695779335428e-06,
      "loss": 0.1604,
      "step": 2837
    },
    {
      "epoch": 0.2614824710922744,
      "grad_norm": 0.9150332712571578,
      "learning_rate": 4.413205512386798e-06,
      "loss": 0.1683,
      "step": 2838
    },
    {
      "epoch": 0.2615746072695444,
      "grad_norm": 0.9783738508165877,
      "learning_rate": 4.41271506779683e-06,
      "loss": 0.1781,
      "step": 2839
    },
    {
      "epoch": 0.2616667434468144,
      "grad_norm": 0.8955606749537997,
      "learning_rate": 4.412224445611062e-06,
      "loss": 0.1636,
      "step": 2840
    },
    {
      "epoch": 0.2617588796240844,
      "grad_norm": 0.8984379268924941,
      "learning_rate": 4.411733645875048e-06,
      "loss": 0.1586,
      "step": 2841
    },
    {
      "epoch": 0.2618510158013544,
      "grad_norm": 0.9029965233138934,
      "learning_rate": 4.41124266863436e-06,
      "loss": 0.1666,
      "step": 2842
    },
    {
      "epoch": 0.2619431519786244,
      "grad_norm": 0.9174022488679581,
      "learning_rate": 4.410751513934585e-06,
      "loss": 0.1725,
      "step": 2843
    },
    {
      "epoch": 0.2620352881558944,
      "grad_norm": 0.9756876452431863,
      "learning_rate": 4.410260181821325e-06,
      "loss": 0.1763,
      "step": 2844
    },
    {
      "epoch": 0.26212742433316444,
      "grad_norm": 0.9244170802721333,
      "learning_rate": 4.409768672340202e-06,
      "loss": 0.1463,
      "step": 2845
    },
    {
      "epoch": 0.26221956051043444,
      "grad_norm": 0.9041971028491017,
      "learning_rate": 4.409276985536852e-06,
      "loss": 0.173,
      "step": 2846
    },
    {
      "epoch": 0.26231169668770443,
      "grad_norm": 0.9359224044219522,
      "learning_rate": 4.408785121456929e-06,
      "loss": 0.1613,
      "step": 2847
    },
    {
      "epoch": 0.26240383286497443,
      "grad_norm": 0.8730622181165766,
      "learning_rate": 4.408293080146101e-06,
      "loss": 0.1474,
      "step": 2848
    },
    {
      "epoch": 0.2624959690422444,
      "grad_norm": 0.9145537165320866,
      "learning_rate": 4.407800861650056e-06,
      "loss": 0.1586,
      "step": 2849
    },
    {
      "epoch": 0.2625881052195144,
      "grad_norm": 0.8977352205218089,
      "learning_rate": 4.407308466014496e-06,
      "loss": 0.1486,
      "step": 2850
    },
    {
      "epoch": 0.2626802413967845,
      "grad_norm": 0.9106247178852294,
      "learning_rate": 4.406815893285139e-06,
      "loss": 0.1653,
      "step": 2851
    },
    {
      "epoch": 0.26277237757405447,
      "grad_norm": 1.0115187545849416,
      "learning_rate": 4.406323143507721e-06,
      "loss": 0.1612,
      "step": 2852
    },
    {
      "epoch": 0.26286451375132447,
      "grad_norm": 0.9432111102878478,
      "learning_rate": 4.405830216727995e-06,
      "loss": 0.1501,
      "step": 2853
    },
    {
      "epoch": 0.26295664992859447,
      "grad_norm": 0.8641598652236985,
      "learning_rate": 4.405337112991728e-06,
      "loss": 0.1483,
      "step": 2854
    },
    {
      "epoch": 0.26304878610586446,
      "grad_norm": 0.8641392406260407,
      "learning_rate": 4.404843832344704e-06,
      "loss": 0.1653,
      "step": 2855
    },
    {
      "epoch": 0.26314092228313446,
      "grad_norm": 0.9564993972527854,
      "learning_rate": 4.404350374832725e-06,
      "loss": 0.1611,
      "step": 2856
    },
    {
      "epoch": 0.26323305846040446,
      "grad_norm": 0.8752998536245241,
      "learning_rate": 4.40385674050161e-06,
      "loss": 0.1537,
      "step": 2857
    },
    {
      "epoch": 0.2633251946376745,
      "grad_norm": 0.8546718345682361,
      "learning_rate": 4.403362929397191e-06,
      "loss": 0.1559,
      "step": 2858
    },
    {
      "epoch": 0.2634173308149445,
      "grad_norm": 0.8856269547457143,
      "learning_rate": 4.40286894156532e-06,
      "loss": 0.1531,
      "step": 2859
    },
    {
      "epoch": 0.2635094669922145,
      "grad_norm": 0.8966875055653404,
      "learning_rate": 4.402374777051862e-06,
      "loss": 0.1676,
      "step": 2860
    },
    {
      "epoch": 0.2636016031694845,
      "grad_norm": 0.9282672278947397,
      "learning_rate": 4.401880435902701e-06,
      "loss": 0.1676,
      "step": 2861
    },
    {
      "epoch": 0.2636937393467545,
      "grad_norm": 1.0165990167745647,
      "learning_rate": 4.401385918163737e-06,
      "loss": 0.1859,
      "step": 2862
    },
    {
      "epoch": 0.2637858755240245,
      "grad_norm": 0.8764188958015945,
      "learning_rate": 4.400891223880888e-06,
      "loss": 0.1488,
      "step": 2863
    },
    {
      "epoch": 0.2638780117012945,
      "grad_norm": 0.8798735349276867,
      "learning_rate": 4.400396353100081e-06,
      "loss": 0.1599,
      "step": 2864
    },
    {
      "epoch": 0.26397014787856454,
      "grad_norm": 0.9295437632531321,
      "learning_rate": 4.39990130586727e-06,
      "loss": 0.1597,
      "step": 2865
    },
    {
      "epoch": 0.26406228405583454,
      "grad_norm": 0.8596446043652288,
      "learning_rate": 4.399406082228418e-06,
      "loss": 0.1533,
      "step": 2866
    },
    {
      "epoch": 0.26415442023310454,
      "grad_norm": 0.9534196281125018,
      "learning_rate": 4.398910682229507e-06,
      "loss": 0.1718,
      "step": 2867
    },
    {
      "epoch": 0.26424655641037453,
      "grad_norm": 0.9279560321709486,
      "learning_rate": 4.398415105916535e-06,
      "loss": 0.1646,
      "step": 2868
    },
    {
      "epoch": 0.26433869258764453,
      "grad_norm": 0.8749195173976764,
      "learning_rate": 4.397919353335516e-06,
      "loss": 0.1513,
      "step": 2869
    },
    {
      "epoch": 0.2644308287649145,
      "grad_norm": 0.9097910090711311,
      "learning_rate": 4.3974234245324795e-06,
      "loss": 0.1705,
      "step": 2870
    },
    {
      "epoch": 0.2645229649421845,
      "grad_norm": 0.9171732739684699,
      "learning_rate": 4.396927319553476e-06,
      "loss": 0.1718,
      "step": 2871
    },
    {
      "epoch": 0.2646151011194546,
      "grad_norm": 0.8748961263126276,
      "learning_rate": 4.396431038444565e-06,
      "loss": 0.1576,
      "step": 2872
    },
    {
      "epoch": 0.2647072372967246,
      "grad_norm": 0.9224132153233574,
      "learning_rate": 4.3959345812518285e-06,
      "loss": 0.1666,
      "step": 2873
    },
    {
      "epoch": 0.26479937347399457,
      "grad_norm": 0.943886286209004,
      "learning_rate": 4.395437948021362e-06,
      "loss": 0.1623,
      "step": 2874
    },
    {
      "epoch": 0.26489150965126457,
      "grad_norm": 0.9233832263520497,
      "learning_rate": 4.394941138799278e-06,
      "loss": 0.1654,
      "step": 2875
    },
    {
      "epoch": 0.26498364582853456,
      "grad_norm": 0.8726674273082234,
      "learning_rate": 4.3944441536317055e-06,
      "loss": 0.1425,
      "step": 2876
    },
    {
      "epoch": 0.26507578200580456,
      "grad_norm": 1.01831125875492,
      "learning_rate": 4.3939469925647895e-06,
      "loss": 0.1705,
      "step": 2877
    },
    {
      "epoch": 0.26516791818307456,
      "grad_norm": 0.8873960103462912,
      "learning_rate": 4.3934496556446916e-06,
      "loss": 0.1475,
      "step": 2878
    },
    {
      "epoch": 0.2652600543603446,
      "grad_norm": 0.890705716689305,
      "learning_rate": 4.3929521429175895e-06,
      "loss": 0.1523,
      "step": 2879
    },
    {
      "epoch": 0.2653521905376146,
      "grad_norm": 0.8998449017070324,
      "learning_rate": 4.392454454429676e-06,
      "loss": 0.1703,
      "step": 2880
    },
    {
      "epoch": 0.2654443267148846,
      "grad_norm": 0.8815761058067845,
      "learning_rate": 4.391956590227164e-06,
      "loss": 0.1603,
      "step": 2881
    },
    {
      "epoch": 0.2655364628921546,
      "grad_norm": 0.9434933743462255,
      "learning_rate": 4.391458550356278e-06,
      "loss": 0.1619,
      "step": 2882
    },
    {
      "epoch": 0.2656285990694246,
      "grad_norm": 0.9342716887434068,
      "learning_rate": 4.390960334863263e-06,
      "loss": 0.1533,
      "step": 2883
    },
    {
      "epoch": 0.2657207352466946,
      "grad_norm": 0.9839182352177902,
      "learning_rate": 4.390461943794377e-06,
      "loss": 0.1775,
      "step": 2884
    },
    {
      "epoch": 0.26581287142396465,
      "grad_norm": 0.8677512839404379,
      "learning_rate": 4.389963377195896e-06,
      "loss": 0.162,
      "step": 2885
    },
    {
      "epoch": 0.26590500760123464,
      "grad_norm": 0.886479980018512,
      "learning_rate": 4.389464635114112e-06,
      "loss": 0.1563,
      "step": 2886
    },
    {
      "epoch": 0.26599714377850464,
      "grad_norm": 0.9815790472166931,
      "learning_rate": 4.388965717595334e-06,
      "loss": 0.1867,
      "step": 2887
    },
    {
      "epoch": 0.26608927995577464,
      "grad_norm": 0.8801062088344992,
      "learning_rate": 4.3884666246858846e-06,
      "loss": 0.1565,
      "step": 2888
    },
    {
      "epoch": 0.26618141613304463,
      "grad_norm": 0.9784498336027216,
      "learning_rate": 4.387967356432107e-06,
      "loss": 0.17,
      "step": 2889
    },
    {
      "epoch": 0.26627355231031463,
      "grad_norm": 0.9091918449537576,
      "learning_rate": 4.3874679128803565e-06,
      "loss": 0.1535,
      "step": 2890
    },
    {
      "epoch": 0.26636568848758463,
      "grad_norm": 0.9058091255127974,
      "learning_rate": 4.386968294077007e-06,
      "loss": 0.1599,
      "step": 2891
    },
    {
      "epoch": 0.2664578246648547,
      "grad_norm": 0.9314719924346809,
      "learning_rate": 4.386468500068449e-06,
      "loss": 0.1663,
      "step": 2892
    },
    {
      "epoch": 0.2665499608421247,
      "grad_norm": 0.9439398416546473,
      "learning_rate": 4.385968530901087e-06,
      "loss": 0.1737,
      "step": 2893
    },
    {
      "epoch": 0.2666420970193947,
      "grad_norm": 0.9627391950050143,
      "learning_rate": 4.3854683866213445e-06,
      "loss": 0.1613,
      "step": 2894
    },
    {
      "epoch": 0.26673423319666467,
      "grad_norm": 0.9709489598335683,
      "learning_rate": 4.384968067275659e-06,
      "loss": 0.1749,
      "step": 2895
    },
    {
      "epoch": 0.26682636937393467,
      "grad_norm": 0.9876304597584598,
      "learning_rate": 4.384467572910486e-06,
      "loss": 0.1744,
      "step": 2896
    },
    {
      "epoch": 0.26691850555120467,
      "grad_norm": 0.9806142595268034,
      "learning_rate": 4.383966903572295e-06,
      "loss": 0.1669,
      "step": 2897
    },
    {
      "epoch": 0.26701064172847466,
      "grad_norm": 0.8390793688951929,
      "learning_rate": 4.383466059307576e-06,
      "loss": 0.1529,
      "step": 2898
    },
    {
      "epoch": 0.2671027779057447,
      "grad_norm": 0.9378584287300362,
      "learning_rate": 4.382965040162829e-06,
      "loss": 0.1678,
      "step": 2899
    },
    {
      "epoch": 0.2671949140830147,
      "grad_norm": 0.9185936272777794,
      "learning_rate": 4.3824638461845764e-06,
      "loss": 0.1624,
      "step": 2900
    },
    {
      "epoch": 0.2672870502602847,
      "grad_norm": 0.8849129207015696,
      "learning_rate": 4.381962477419352e-06,
      "loss": 0.1621,
      "step": 2901
    },
    {
      "epoch": 0.2673791864375547,
      "grad_norm": 1.020300634639128,
      "learning_rate": 4.3814609339137105e-06,
      "loss": 0.1697,
      "step": 2902
    },
    {
      "epoch": 0.2674713226148247,
      "grad_norm": 0.9478899087466803,
      "learning_rate": 4.380959215714218e-06,
      "loss": 0.1625,
      "step": 2903
    },
    {
      "epoch": 0.2675634587920947,
      "grad_norm": 1.0037603195239564,
      "learning_rate": 4.380457322867461e-06,
      "loss": 0.1615,
      "step": 2904
    },
    {
      "epoch": 0.2676555949693647,
      "grad_norm": 0.9466541604314458,
      "learning_rate": 4.379955255420037e-06,
      "loss": 0.1636,
      "step": 2905
    },
    {
      "epoch": 0.26774773114663475,
      "grad_norm": 0.8951466466126106,
      "learning_rate": 4.379453013418567e-06,
      "loss": 0.1722,
      "step": 2906
    },
    {
      "epoch": 0.26783986732390475,
      "grad_norm": 0.9318669615219027,
      "learning_rate": 4.378950596909683e-06,
      "loss": 0.1661,
      "step": 2907
    },
    {
      "epoch": 0.26793200350117474,
      "grad_norm": 1.0399320939535317,
      "learning_rate": 4.378448005940031e-06,
      "loss": 0.1661,
      "step": 2908
    },
    {
      "epoch": 0.26802413967844474,
      "grad_norm": 1.0157712583133751,
      "learning_rate": 4.377945240556282e-06,
      "loss": 0.1779,
      "step": 2909
    },
    {
      "epoch": 0.26811627585571474,
      "grad_norm": 0.9301603867530266,
      "learning_rate": 4.3774423008051145e-06,
      "loss": 0.1634,
      "step": 2910
    },
    {
      "epoch": 0.26820841203298473,
      "grad_norm": 0.9296110650376944,
      "learning_rate": 4.376939186733227e-06,
      "loss": 0.1516,
      "step": 2911
    },
    {
      "epoch": 0.26830054821025473,
      "grad_norm": 1.0043841351727716,
      "learning_rate": 4.376435898387334e-06,
      "loss": 0.1632,
      "step": 2912
    },
    {
      "epoch": 0.2683926843875248,
      "grad_norm": 0.9963071461738237,
      "learning_rate": 4.375932435814167e-06,
      "loss": 0.1846,
      "step": 2913
    },
    {
      "epoch": 0.2684848205647948,
      "grad_norm": 0.9703723999360877,
      "learning_rate": 4.37542879906047e-06,
      "loss": 0.1675,
      "step": 2914
    },
    {
      "epoch": 0.2685769567420648,
      "grad_norm": 0.8830939679657934,
      "learning_rate": 4.374924988173008e-06,
      "loss": 0.1608,
      "step": 2915
    },
    {
      "epoch": 0.2686690929193348,
      "grad_norm": 0.9540176107967079,
      "learning_rate": 4.374421003198559e-06,
      "loss": 0.1575,
      "step": 2916
    },
    {
      "epoch": 0.26876122909660477,
      "grad_norm": 0.9928441377656393,
      "learning_rate": 4.373916844183918e-06,
      "loss": 0.1587,
      "step": 2917
    },
    {
      "epoch": 0.26885336527387477,
      "grad_norm": 0.9455759756236314,
      "learning_rate": 4.373412511175897e-06,
      "loss": 0.1575,
      "step": 2918
    },
    {
      "epoch": 0.2689455014511448,
      "grad_norm": 0.9637716298737328,
      "learning_rate": 4.372908004221322e-06,
      "loss": 0.1687,
      "step": 2919
    },
    {
      "epoch": 0.2690376376284148,
      "grad_norm": 0.9425788092717383,
      "learning_rate": 4.372403323367037e-06,
      "loss": 0.1538,
      "step": 2920
    },
    {
      "epoch": 0.2691297738056848,
      "grad_norm": 0.935187533150534,
      "learning_rate": 4.371898468659903e-06,
      "loss": 0.1701,
      "step": 2921
    },
    {
      "epoch": 0.2692219099829548,
      "grad_norm": 0.9553637112591826,
      "learning_rate": 4.371393440146794e-06,
      "loss": 0.1654,
      "step": 2922
    },
    {
      "epoch": 0.2693140461602248,
      "grad_norm": 0.9022261913400997,
      "learning_rate": 4.370888237874602e-06,
      "loss": 0.1494,
      "step": 2923
    },
    {
      "epoch": 0.2694061823374948,
      "grad_norm": 0.8851623605601451,
      "learning_rate": 4.370382861890237e-06,
      "loss": 0.1459,
      "step": 2924
    },
    {
      "epoch": 0.2694983185147648,
      "grad_norm": 0.9899899922542382,
      "learning_rate": 4.369877312240621e-06,
      "loss": 0.17,
      "step": 2925
    },
    {
      "epoch": 0.26959045469203485,
      "grad_norm": 0.8982399301119234,
      "learning_rate": 4.369371588972696e-06,
      "loss": 0.1581,
      "step": 2926
    },
    {
      "epoch": 0.26968259086930485,
      "grad_norm": 0.9199727306968912,
      "learning_rate": 4.368865692133417e-06,
      "loss": 0.1628,
      "step": 2927
    },
    {
      "epoch": 0.26977472704657485,
      "grad_norm": 0.916780158787217,
      "learning_rate": 4.3683596217697585e-06,
      "loss": 0.1578,
      "step": 2928
    },
    {
      "epoch": 0.26986686322384484,
      "grad_norm": 1.0499551006536163,
      "learning_rate": 4.367853377928707e-06,
      "loss": 0.1777,
      "step": 2929
    },
    {
      "epoch": 0.26995899940111484,
      "grad_norm": 0.9431552116102009,
      "learning_rate": 4.367346960657269e-06,
      "loss": 0.1523,
      "step": 2930
    },
    {
      "epoch": 0.27005113557838484,
      "grad_norm": 0.9014934644247001,
      "learning_rate": 4.366840370002465e-06,
      "loss": 0.1602,
      "step": 2931
    },
    {
      "epoch": 0.27014327175565483,
      "grad_norm": 1.0011043047400634,
      "learning_rate": 4.366333606011331e-06,
      "loss": 0.1674,
      "step": 2932
    },
    {
      "epoch": 0.2702354079329249,
      "grad_norm": 1.0340166384513643,
      "learning_rate": 4.365826668730921e-06,
      "loss": 0.1662,
      "step": 2933
    },
    {
      "epoch": 0.2703275441101949,
      "grad_norm": 0.9098261155448165,
      "learning_rate": 4.365319558208304e-06,
      "loss": 0.1656,
      "step": 2934
    },
    {
      "epoch": 0.2704196802874649,
      "grad_norm": 1.0370324645257154,
      "learning_rate": 4.3648122744905654e-06,
      "loss": 0.1918,
      "step": 2935
    },
    {
      "epoch": 0.2705118164647349,
      "grad_norm": 0.9814751176480726,
      "learning_rate": 4.364304817624806e-06,
      "loss": 0.1629,
      "step": 2936
    },
    {
      "epoch": 0.2706039526420049,
      "grad_norm": 0.8429499591444676,
      "learning_rate": 4.363797187658144e-06,
      "loss": 0.1524,
      "step": 2937
    },
    {
      "epoch": 0.27069608881927487,
      "grad_norm": 1.0208847925208175,
      "learning_rate": 4.363289384637713e-06,
      "loss": 0.1677,
      "step": 2938
    },
    {
      "epoch": 0.27078822499654487,
      "grad_norm": 0.9106234440617202,
      "learning_rate": 4.362781408610662e-06,
      "loss": 0.1596,
      "step": 2939
    },
    {
      "epoch": 0.2708803611738149,
      "grad_norm": 0.8962143929787811,
      "learning_rate": 4.362273259624156e-06,
      "loss": 0.1587,
      "step": 2940
    },
    {
      "epoch": 0.2709724973510849,
      "grad_norm": 0.9695209952030241,
      "learning_rate": 4.3617649377253775e-06,
      "loss": 0.1713,
      "step": 2941
    },
    {
      "epoch": 0.2710646335283549,
      "grad_norm": 0.9189259739380802,
      "learning_rate": 4.361256442961524e-06,
      "loss": 0.1703,
      "step": 2942
    },
    {
      "epoch": 0.2711567697056249,
      "grad_norm": 1.001396554375545,
      "learning_rate": 4.360747775379811e-06,
      "loss": 0.1704,
      "step": 2943
    },
    {
      "epoch": 0.2712489058828949,
      "grad_norm": 0.9123763202978219,
      "learning_rate": 4.3602389350274656e-06,
      "loss": 0.1591,
      "step": 2944
    },
    {
      "epoch": 0.2713410420601649,
      "grad_norm": 0.9308820506862105,
      "learning_rate": 4.359729921951735e-06,
      "loss": 0.1668,
      "step": 2945
    },
    {
      "epoch": 0.2714331782374349,
      "grad_norm": 0.9350597956598543,
      "learning_rate": 4.3592207361998815e-06,
      "loss": 0.1692,
      "step": 2946
    },
    {
      "epoch": 0.27152531441470495,
      "grad_norm": 0.9473956643289153,
      "learning_rate": 4.358711377819181e-06,
      "loss": 0.1562,
      "step": 2947
    },
    {
      "epoch": 0.27161745059197495,
      "grad_norm": 0.9222103923820115,
      "learning_rate": 4.358201846856931e-06,
      "loss": 0.1578,
      "step": 2948
    },
    {
      "epoch": 0.27170958676924495,
      "grad_norm": 0.8718693024219112,
      "learning_rate": 4.357692143360438e-06,
      "loss": 0.1602,
      "step": 2949
    },
    {
      "epoch": 0.27180172294651495,
      "grad_norm": 0.8969897252603595,
      "learning_rate": 4.35718226737703e-06,
      "loss": 0.1607,
      "step": 2950
    },
    {
      "epoch": 0.27189385912378494,
      "grad_norm": 0.9386779288297119,
      "learning_rate": 4.35667221895405e-06,
      "loss": 0.1724,
      "step": 2951
    },
    {
      "epoch": 0.27198599530105494,
      "grad_norm": 0.9094775211356984,
      "learning_rate": 4.356161998138853e-06,
      "loss": 0.1724,
      "step": 2952
    },
    {
      "epoch": 0.272078131478325,
      "grad_norm": 0.8950162107788308,
      "learning_rate": 4.355651604978815e-06,
      "loss": 0.1597,
      "step": 2953
    },
    {
      "epoch": 0.272170267655595,
      "grad_norm": 0.8761295316756618,
      "learning_rate": 4.355141039521325e-06,
      "loss": 0.16,
      "step": 2954
    },
    {
      "epoch": 0.272262403832865,
      "grad_norm": 0.862109488387714,
      "learning_rate": 4.3546303018137915e-06,
      "loss": 0.1512,
      "step": 2955
    },
    {
      "epoch": 0.272354540010135,
      "grad_norm": 0.9616828736913641,
      "learning_rate": 4.354119391903634e-06,
      "loss": 0.1654,
      "step": 2956
    },
    {
      "epoch": 0.272446676187405,
      "grad_norm": 0.8860353296993886,
      "learning_rate": 4.353608309838292e-06,
      "loss": 0.1403,
      "step": 2957
    },
    {
      "epoch": 0.272538812364675,
      "grad_norm": 0.9628950022603051,
      "learning_rate": 4.353097055665219e-06,
      "loss": 0.1566,
      "step": 2958
    },
    {
      "epoch": 0.272630948541945,
      "grad_norm": 0.9596948029087774,
      "learning_rate": 4.352585629431883e-06,
      "loss": 0.165,
      "step": 2959
    },
    {
      "epoch": 0.272723084719215,
      "grad_norm": 0.8557258049846965,
      "learning_rate": 4.352074031185774e-06,
      "loss": 0.1573,
      "step": 2960
    },
    {
      "epoch": 0.272815220896485,
      "grad_norm": 0.9269594433765076,
      "learning_rate": 4.351562260974391e-06,
      "loss": 0.1749,
      "step": 2961
    },
    {
      "epoch": 0.272907357073755,
      "grad_norm": 0.9173990269761251,
      "learning_rate": 4.3510503188452535e-06,
      "loss": 0.1693,
      "step": 2962
    },
    {
      "epoch": 0.272999493251025,
      "grad_norm": 0.9427467660265008,
      "learning_rate": 4.350538204845895e-06,
      "loss": 0.1641,
      "step": 2963
    },
    {
      "epoch": 0.273091629428295,
      "grad_norm": 0.9381162953466216,
      "learning_rate": 4.350025919023864e-06,
      "loss": 0.1714,
      "step": 2964
    },
    {
      "epoch": 0.273183765605565,
      "grad_norm": 0.8947538202851707,
      "learning_rate": 4.349513461426728e-06,
      "loss": 0.1777,
      "step": 2965
    },
    {
      "epoch": 0.273275901782835,
      "grad_norm": 0.9430666551395096,
      "learning_rate": 4.349000832102067e-06,
      "loss": 0.1606,
      "step": 2966
    },
    {
      "epoch": 0.27336803796010506,
      "grad_norm": 0.9077354832576249,
      "learning_rate": 4.348488031097481e-06,
      "loss": 0.1603,
      "step": 2967
    },
    {
      "epoch": 0.27346017413737506,
      "grad_norm": 0.9182053591196929,
      "learning_rate": 4.3479750584605814e-06,
      "loss": 0.16,
      "step": 2968
    },
    {
      "epoch": 0.27355231031464505,
      "grad_norm": 0.9057263697989837,
      "learning_rate": 4.347461914238999e-06,
      "loss": 0.1662,
      "step": 2969
    },
    {
      "epoch": 0.27364444649191505,
      "grad_norm": 0.9164232624325025,
      "learning_rate": 4.34694859848038e-06,
      "loss": 0.162,
      "step": 2970
    },
    {
      "epoch": 0.27373658266918505,
      "grad_norm": 0.9369492838326873,
      "learning_rate": 4.346435111232383e-06,
      "loss": 0.1673,
      "step": 2971
    },
    {
      "epoch": 0.27382871884645504,
      "grad_norm": 0.9215110313361893,
      "learning_rate": 4.345921452542689e-06,
      "loss": 0.1578,
      "step": 2972
    },
    {
      "epoch": 0.27392085502372504,
      "grad_norm": 0.9152849266863357,
      "learning_rate": 4.345407622458988e-06,
      "loss": 0.1673,
      "step": 2973
    },
    {
      "epoch": 0.2740129912009951,
      "grad_norm": 0.9041598610869491,
      "learning_rate": 4.3448936210289916e-06,
      "loss": 0.1457,
      "step": 2974
    },
    {
      "epoch": 0.2741051273782651,
      "grad_norm": 0.9426336536924195,
      "learning_rate": 4.344379448300423e-06,
      "loss": 0.1704,
      "step": 2975
    },
    {
      "epoch": 0.2741972635555351,
      "grad_norm": 0.8841261534075182,
      "learning_rate": 4.343865104321026e-06,
      "loss": 0.1546,
      "step": 2976
    },
    {
      "epoch": 0.2742893997328051,
      "grad_norm": 0.8945634429252811,
      "learning_rate": 4.3433505891385534e-06,
      "loss": 0.1668,
      "step": 2977
    },
    {
      "epoch": 0.2743815359100751,
      "grad_norm": 0.8960031810119188,
      "learning_rate": 4.342835902800782e-06,
      "loss": 0.1692,
      "step": 2978
    },
    {
      "epoch": 0.2744736720873451,
      "grad_norm": 0.9376536707630264,
      "learning_rate": 4.342321045355498e-06,
      "loss": 0.1679,
      "step": 2979
    },
    {
      "epoch": 0.2745658082646151,
      "grad_norm": 0.9040938912188998,
      "learning_rate": 4.341806016850506e-06,
      "loss": 0.1564,
      "step": 2980
    },
    {
      "epoch": 0.2746579444418851,
      "grad_norm": 0.9501565919833976,
      "learning_rate": 4.341290817333628e-06,
      "loss": 0.173,
      "step": 2981
    },
    {
      "epoch": 0.2747500806191551,
      "grad_norm": 0.8828775263021356,
      "learning_rate": 4.340775446852699e-06,
      "loss": 0.1559,
      "step": 2982
    },
    {
      "epoch": 0.2748422167964251,
      "grad_norm": 0.850067548106956,
      "learning_rate": 4.340259905455572e-06,
      "loss": 0.1516,
      "step": 2983
    },
    {
      "epoch": 0.2749343529736951,
      "grad_norm": 0.9603013308750544,
      "learning_rate": 4.339744193190114e-06,
      "loss": 0.1713,
      "step": 2984
    },
    {
      "epoch": 0.2750264891509651,
      "grad_norm": 1.0113076687699456,
      "learning_rate": 4.339228310104211e-06,
      "loss": 0.1827,
      "step": 2985
    },
    {
      "epoch": 0.2751186253282351,
      "grad_norm": 0.9572529546575894,
      "learning_rate": 4.338712256245761e-06,
      "loss": 0.1586,
      "step": 2986
    },
    {
      "epoch": 0.27521076150550516,
      "grad_norm": 0.9569799508875555,
      "learning_rate": 4.3381960316626795e-06,
      "loss": 0.1591,
      "step": 2987
    },
    {
      "epoch": 0.27530289768277516,
      "grad_norm": 0.8482125697273889,
      "learning_rate": 4.337679636402898e-06,
      "loss": 0.1595,
      "step": 2988
    },
    {
      "epoch": 0.27539503386004516,
      "grad_norm": 0.8838971352649014,
      "learning_rate": 4.3371630705143665e-06,
      "loss": 0.155,
      "step": 2989
    },
    {
      "epoch": 0.27548717003731515,
      "grad_norm": 0.9940918802102938,
      "learning_rate": 4.336646334045045e-06,
      "loss": 0.1565,
      "step": 2990
    },
    {
      "epoch": 0.27557930621458515,
      "grad_norm": 0.8826585591020982,
      "learning_rate": 4.336129427042913e-06,
      "loss": 0.1486,
      "step": 2991
    },
    {
      "epoch": 0.27567144239185515,
      "grad_norm": 0.9645792000767957,
      "learning_rate": 4.335612349555967e-06,
      "loss": 0.1706,
      "step": 2992
    },
    {
      "epoch": 0.27576357856912515,
      "grad_norm": 0.9157431148804065,
      "learning_rate": 4.335095101632217e-06,
      "loss": 0.1703,
      "step": 2993
    },
    {
      "epoch": 0.2758557147463952,
      "grad_norm": 0.9536633356430607,
      "learning_rate": 4.334577683319689e-06,
      "loss": 0.1515,
      "step": 2994
    },
    {
      "epoch": 0.2759478509236652,
      "grad_norm": 0.9246001577573807,
      "learning_rate": 4.334060094666426e-06,
      "loss": 0.1623,
      "step": 2995
    },
    {
      "epoch": 0.2760399871009352,
      "grad_norm": 0.9440481150724693,
      "learning_rate": 4.333542335720485e-06,
      "loss": 0.1736,
      "step": 2996
    },
    {
      "epoch": 0.2761321232782052,
      "grad_norm": 1.1704161241743252,
      "learning_rate": 4.3330244065299424e-06,
      "loss": 0.1663,
      "step": 2997
    },
    {
      "epoch": 0.2762242594554752,
      "grad_norm": 0.9360542946940662,
      "learning_rate": 4.332506307142885e-06,
      "loss": 0.1557,
      "step": 2998
    },
    {
      "epoch": 0.2763163956327452,
      "grad_norm": 0.8877005689033058,
      "learning_rate": 4.33198803760742e-06,
      "loss": 0.1623,
      "step": 2999
    },
    {
      "epoch": 0.2764085318100152,
      "grad_norm": 0.8917013234852214,
      "learning_rate": 4.3314695979716684e-06,
      "loss": 0.1507,
      "step": 3000
    },
    {
      "epoch": 0.2764085318100152,
      "eval_loss": 0.1629796177148819,
      "eval_runtime": 299.43,
      "eval_samples_per_second": 23.435,
      "eval_steps_per_second": 2.932,
      "step": 3000
    },
    {
      "epoch": 0.27650066798728523,
      "grad_norm": 0.9388370081009483,
      "learning_rate": 4.330950988283767e-06,
      "loss": 0.1647,
      "step": 3001
    },
    {
      "epoch": 0.27659280416455523,
      "grad_norm": 0.8715944561126756,
      "learning_rate": 4.330432208591871e-06,
      "loss": 0.1517,
      "step": 3002
    },
    {
      "epoch": 0.2766849403418252,
      "grad_norm": 0.9239641362222594,
      "learning_rate": 4.329913258944146e-06,
      "loss": 0.1629,
      "step": 3003
    },
    {
      "epoch": 0.2767770765190952,
      "grad_norm": 0.9607187053118423,
      "learning_rate": 4.329394139388779e-06,
      "loss": 0.1547,
      "step": 3004
    },
    {
      "epoch": 0.2768692126963652,
      "grad_norm": 0.9877687003566187,
      "learning_rate": 4.328874849973968e-06,
      "loss": 0.166,
      "step": 3005
    },
    {
      "epoch": 0.2769613488736352,
      "grad_norm": 0.9366916037002174,
      "learning_rate": 4.328355390747931e-06,
      "loss": 0.1685,
      "step": 3006
    },
    {
      "epoch": 0.2770534850509052,
      "grad_norm": 0.9333814868699946,
      "learning_rate": 4.3278357617589e-06,
      "loss": 0.1578,
      "step": 3007
    },
    {
      "epoch": 0.27714562122817527,
      "grad_norm": 0.967607732134371,
      "learning_rate": 4.327315963055121e-06,
      "loss": 0.1584,
      "step": 3008
    },
    {
      "epoch": 0.27723775740544526,
      "grad_norm": 0.9058729702202627,
      "learning_rate": 4.326795994684858e-06,
      "loss": 0.1629,
      "step": 3009
    },
    {
      "epoch": 0.27732989358271526,
      "grad_norm": 0.8914686442256584,
      "learning_rate": 4.326275856696391e-06,
      "loss": 0.1462,
      "step": 3010
    },
    {
      "epoch": 0.27742202975998526,
      "grad_norm": 0.9350169815870337,
      "learning_rate": 4.325755549138014e-06,
      "loss": 0.1626,
      "step": 3011
    },
    {
      "epoch": 0.27751416593725525,
      "grad_norm": 0.986767040362785,
      "learning_rate": 4.325235072058037e-06,
      "loss": 0.1682,
      "step": 3012
    },
    {
      "epoch": 0.27760630211452525,
      "grad_norm": 0.9304736612317988,
      "learning_rate": 4.324714425504788e-06,
      "loss": 0.1681,
      "step": 3013
    },
    {
      "epoch": 0.27769843829179525,
      "grad_norm": 1.0173816582289463,
      "learning_rate": 4.324193609526607e-06,
      "loss": 0.1493,
      "step": 3014
    },
    {
      "epoch": 0.2777905744690653,
      "grad_norm": 0.9479003398421398,
      "learning_rate": 4.323672624171854e-06,
      "loss": 0.1716,
      "step": 3015
    },
    {
      "epoch": 0.2778827106463353,
      "grad_norm": 0.9340243186194851,
      "learning_rate": 4.323151469488902e-06,
      "loss": 0.1596,
      "step": 3016
    },
    {
      "epoch": 0.2779748468236053,
      "grad_norm": 0.8575012092454355,
      "learning_rate": 4.322630145526139e-06,
      "loss": 0.1603,
      "step": 3017
    },
    {
      "epoch": 0.2780669830008753,
      "grad_norm": 0.8844930689599675,
      "learning_rate": 4.322108652331971e-06,
      "loss": 0.147,
      "step": 3018
    },
    {
      "epoch": 0.2781591191781453,
      "grad_norm": 0.9506160437467802,
      "learning_rate": 4.321586989954819e-06,
      "loss": 0.152,
      "step": 3019
    },
    {
      "epoch": 0.2782512553554153,
      "grad_norm": 0.9913035500170787,
      "learning_rate": 4.3210651584431186e-06,
      "loss": 0.1708,
      "step": 3020
    },
    {
      "epoch": 0.27834339153268534,
      "grad_norm": 0.9403771168318222,
      "learning_rate": 4.320543157845321e-06,
      "loss": 0.1664,
      "step": 3021
    },
    {
      "epoch": 0.27843552770995533,
      "grad_norm": 0.9888529408708953,
      "learning_rate": 4.320020988209898e-06,
      "loss": 0.1524,
      "step": 3022
    },
    {
      "epoch": 0.27852766388722533,
      "grad_norm": 1.0197212346707907,
      "learning_rate": 4.319498649585329e-06,
      "loss": 0.1837,
      "step": 3023
    },
    {
      "epoch": 0.2786198000644953,
      "grad_norm": 1.041097294154436,
      "learning_rate": 4.318976142020113e-06,
      "loss": 0.1815,
      "step": 3024
    },
    {
      "epoch": 0.2787119362417653,
      "grad_norm": 0.9489619575370245,
      "learning_rate": 4.318453465562768e-06,
      "loss": 0.1648,
      "step": 3025
    },
    {
      "epoch": 0.2788040724190353,
      "grad_norm": 0.8724471409134508,
      "learning_rate": 4.317930620261823e-06,
      "loss": 0.1541,
      "step": 3026
    },
    {
      "epoch": 0.2788962085963053,
      "grad_norm": 0.9104960457396443,
      "learning_rate": 4.317407606165825e-06,
      "loss": 0.1693,
      "step": 3027
    },
    {
      "epoch": 0.27898834477357537,
      "grad_norm": 0.9707721567063771,
      "learning_rate": 4.3168844233233345e-06,
      "loss": 0.1672,
      "step": 3028
    },
    {
      "epoch": 0.27908048095084537,
      "grad_norm": 0.9680840756993969,
      "learning_rate": 4.316361071782929e-06,
      "loss": 0.1674,
      "step": 3029
    },
    {
      "epoch": 0.27917261712811536,
      "grad_norm": 0.9071358782871782,
      "learning_rate": 4.315837551593203e-06,
      "loss": 0.1566,
      "step": 3030
    },
    {
      "epoch": 0.27926475330538536,
      "grad_norm": 0.8903474335652056,
      "learning_rate": 4.315313862802766e-06,
      "loss": 0.156,
      "step": 3031
    },
    {
      "epoch": 0.27935688948265536,
      "grad_norm": 0.9724942706639386,
      "learning_rate": 4.31479000546024e-06,
      "loss": 0.1767,
      "step": 3032
    },
    {
      "epoch": 0.27944902565992535,
      "grad_norm": 0.9528363083561447,
      "learning_rate": 4.314265979614267e-06,
      "loss": 0.1708,
      "step": 3033
    },
    {
      "epoch": 0.27954116183719535,
      "grad_norm": 0.8441521393036625,
      "learning_rate": 4.313741785313503e-06,
      "loss": 0.1554,
      "step": 3034
    },
    {
      "epoch": 0.2796332980144654,
      "grad_norm": 0.8782369763374204,
      "learning_rate": 4.313217422606618e-06,
      "loss": 0.1569,
      "step": 3035
    },
    {
      "epoch": 0.2797254341917354,
      "grad_norm": 0.9718559574608084,
      "learning_rate": 4.312692891542302e-06,
      "loss": 0.1661,
      "step": 3036
    },
    {
      "epoch": 0.2798175703690054,
      "grad_norm": 0.9446994124364056,
      "learning_rate": 4.312168192169254e-06,
      "loss": 0.1603,
      "step": 3037
    },
    {
      "epoch": 0.2799097065462754,
      "grad_norm": 0.8951970904849386,
      "learning_rate": 4.311643324536195e-06,
      "loss": 0.1624,
      "step": 3038
    },
    {
      "epoch": 0.2800018427235454,
      "grad_norm": 0.9198959941328185,
      "learning_rate": 4.311118288691859e-06,
      "loss": 0.1684,
      "step": 3039
    },
    {
      "epoch": 0.2800939789008154,
      "grad_norm": 0.9587061003460235,
      "learning_rate": 4.3105930846849945e-06,
      "loss": 0.1714,
      "step": 3040
    },
    {
      "epoch": 0.2801861150780854,
      "grad_norm": 0.8302605952313523,
      "learning_rate": 4.310067712564367e-06,
      "loss": 0.1448,
      "step": 3041
    },
    {
      "epoch": 0.28027825125535544,
      "grad_norm": 0.9283440649700089,
      "learning_rate": 4.3095421723787585e-06,
      "loss": 0.1672,
      "step": 3042
    },
    {
      "epoch": 0.28037038743262543,
      "grad_norm": 0.9372916319568307,
      "learning_rate": 4.309016464176964e-06,
      "loss": 0.1653,
      "step": 3043
    },
    {
      "epoch": 0.28046252360989543,
      "grad_norm": 0.9278315387270265,
      "learning_rate": 4.308490588007796e-06,
      "loss": 0.1634,
      "step": 3044
    },
    {
      "epoch": 0.28055465978716543,
      "grad_norm": 0.9396311360757985,
      "learning_rate": 4.307964543920083e-06,
      "loss": 0.1662,
      "step": 3045
    },
    {
      "epoch": 0.2806467959644354,
      "grad_norm": 0.9113997709540158,
      "learning_rate": 4.3074383319626655e-06,
      "loss": 0.1594,
      "step": 3046
    },
    {
      "epoch": 0.2807389321417054,
      "grad_norm": 0.8784488154165262,
      "learning_rate": 4.306911952184406e-06,
      "loss": 0.1487,
      "step": 3047
    },
    {
      "epoch": 0.2808310683189754,
      "grad_norm": 0.9661023236004905,
      "learning_rate": 4.306385404634177e-06,
      "loss": 0.1786,
      "step": 3048
    },
    {
      "epoch": 0.28092320449624547,
      "grad_norm": 0.9097267950273097,
      "learning_rate": 4.305858689360869e-06,
      "loss": 0.1492,
      "step": 3049
    },
    {
      "epoch": 0.28101534067351547,
      "grad_norm": 0.9366899670263596,
      "learning_rate": 4.3053318064133864e-06,
      "loss": 0.1637,
      "step": 3050
    },
    {
      "epoch": 0.28110747685078546,
      "grad_norm": 0.9021081218041649,
      "learning_rate": 4.3048047558406525e-06,
      "loss": 0.159,
      "step": 3051
    },
    {
      "epoch": 0.28119961302805546,
      "grad_norm": 0.9412954829333593,
      "learning_rate": 4.304277537691602e-06,
      "loss": 0.1589,
      "step": 3052
    },
    {
      "epoch": 0.28129174920532546,
      "grad_norm": 0.9059656008131649,
      "learning_rate": 4.303750152015188e-06,
      "loss": 0.1675,
      "step": 3053
    },
    {
      "epoch": 0.28138388538259546,
      "grad_norm": 0.8794175338311734,
      "learning_rate": 4.3032225988603786e-06,
      "loss": 0.1537,
      "step": 3054
    },
    {
      "epoch": 0.2814760215598655,
      "grad_norm": 0.913108805336839,
      "learning_rate": 4.302694878276157e-06,
      "loss": 0.1679,
      "step": 3055
    },
    {
      "epoch": 0.2815681577371355,
      "grad_norm": 0.9989015886002992,
      "learning_rate": 4.302166990311522e-06,
      "loss": 0.1825,
      "step": 3056
    },
    {
      "epoch": 0.2816602939144055,
      "grad_norm": 0.9152409790134869,
      "learning_rate": 4.301638935015487e-06,
      "loss": 0.1456,
      "step": 3057
    },
    {
      "epoch": 0.2817524300916755,
      "grad_norm": 0.8561207996336198,
      "learning_rate": 4.3011107124370835e-06,
      "loss": 0.1498,
      "step": 3058
    },
    {
      "epoch": 0.2818445662689455,
      "grad_norm": 0.9062836968107347,
      "learning_rate": 4.300582322625356e-06,
      "loss": 0.1524,
      "step": 3059
    },
    {
      "epoch": 0.2819367024462155,
      "grad_norm": 1.0121945627292734,
      "learning_rate": 4.300053765629367e-06,
      "loss": 0.1708,
      "step": 3060
    },
    {
      "epoch": 0.2820288386234855,
      "grad_norm": 0.8853915734328843,
      "learning_rate": 4.299525041498192e-06,
      "loss": 0.149,
      "step": 3061
    },
    {
      "epoch": 0.28212097480075554,
      "grad_norm": 0.8736829341581167,
      "learning_rate": 4.298996150280923e-06,
      "loss": 0.1581,
      "step": 3062
    },
    {
      "epoch": 0.28221311097802554,
      "grad_norm": 1.031079870711207,
      "learning_rate": 4.298467092026668e-06,
      "loss": 0.1677,
      "step": 3063
    },
    {
      "epoch": 0.28230524715529554,
      "grad_norm": 0.9303524529887373,
      "learning_rate": 4.29793786678455e-06,
      "loss": 0.1517,
      "step": 3064
    },
    {
      "epoch": 0.28239738333256553,
      "grad_norm": 1.0035979953985423,
      "learning_rate": 4.297408474603707e-06,
      "loss": 0.1738,
      "step": 3065
    },
    {
      "epoch": 0.28248951950983553,
      "grad_norm": 0.9516958507458324,
      "learning_rate": 4.296878915533294e-06,
      "loss": 0.1703,
      "step": 3066
    },
    {
      "epoch": 0.2825816556871055,
      "grad_norm": 0.8632532319262257,
      "learning_rate": 4.2963491896224806e-06,
      "loss": 0.1541,
      "step": 3067
    },
    {
      "epoch": 0.2826737918643755,
      "grad_norm": 0.9264065893787056,
      "learning_rate": 4.295819296920451e-06,
      "loss": 0.1602,
      "step": 3068
    },
    {
      "epoch": 0.2827659280416456,
      "grad_norm": 0.9289191168966096,
      "learning_rate": 4.295289237476407e-06,
      "loss": 0.1623,
      "step": 3069
    },
    {
      "epoch": 0.2828580642189156,
      "grad_norm": 0.8903370320980405,
      "learning_rate": 4.294759011339564e-06,
      "loss": 0.1574,
      "step": 3070
    },
    {
      "epoch": 0.28295020039618557,
      "grad_norm": 0.9863779141764555,
      "learning_rate": 4.294228618559153e-06,
      "loss": 0.178,
      "step": 3071
    },
    {
      "epoch": 0.28304233657345557,
      "grad_norm": 0.8660438063688297,
      "learning_rate": 4.293698059184423e-06,
      "loss": 0.1549,
      "step": 3072
    },
    {
      "epoch": 0.28313447275072556,
      "grad_norm": 0.8464393507295984,
      "learning_rate": 4.293167333264634e-06,
      "loss": 0.1516,
      "step": 3073
    },
    {
      "epoch": 0.28322660892799556,
      "grad_norm": 0.911983641200305,
      "learning_rate": 4.292636440849065e-06,
      "loss": 0.1559,
      "step": 3074
    },
    {
      "epoch": 0.28331874510526556,
      "grad_norm": 0.8761141663534089,
      "learning_rate": 4.292105381987011e-06,
      "loss": 0.1531,
      "step": 3075
    },
    {
      "epoch": 0.2834108812825356,
      "grad_norm": 0.9819629945606868,
      "learning_rate": 4.291574156727778e-06,
      "loss": 0.1823,
      "step": 3076
    },
    {
      "epoch": 0.2835030174598056,
      "grad_norm": 0.92229535525268,
      "learning_rate": 4.291042765120693e-06,
      "loss": 0.1551,
      "step": 3077
    },
    {
      "epoch": 0.2835951536370756,
      "grad_norm": 0.8734506454229878,
      "learning_rate": 4.290511207215093e-06,
      "loss": 0.1562,
      "step": 3078
    },
    {
      "epoch": 0.2836872898143456,
      "grad_norm": 0.9258218141455302,
      "learning_rate": 4.289979483060336e-06,
      "loss": 0.1643,
      "step": 3079
    },
    {
      "epoch": 0.2837794259916156,
      "grad_norm": 0.8863624060949952,
      "learning_rate": 4.289447592705791e-06,
      "loss": 0.151,
      "step": 3080
    },
    {
      "epoch": 0.2838715621688856,
      "grad_norm": 0.9360893187784172,
      "learning_rate": 4.2889155362008435e-06,
      "loss": 0.1724,
      "step": 3081
    },
    {
      "epoch": 0.2839636983461556,
      "grad_norm": 0.9114271743045955,
      "learning_rate": 4.288383313594897e-06,
      "loss": 0.1571,
      "step": 3082
    },
    {
      "epoch": 0.28405583452342564,
      "grad_norm": 0.9140713488133063,
      "learning_rate": 4.287850924937367e-06,
      "loss": 0.1625,
      "step": 3083
    },
    {
      "epoch": 0.28414797070069564,
      "grad_norm": 0.8729642931002742,
      "learning_rate": 4.287318370277686e-06,
      "loss": 0.1598,
      "step": 3084
    },
    {
      "epoch": 0.28424010687796564,
      "grad_norm": 0.8690224870912999,
      "learning_rate": 4.286785649665302e-06,
      "loss": 0.1428,
      "step": 3085
    },
    {
      "epoch": 0.28433224305523563,
      "grad_norm": 0.942851685881234,
      "learning_rate": 4.286252763149679e-06,
      "loss": 0.1726,
      "step": 3086
    },
    {
      "epoch": 0.28442437923250563,
      "grad_norm": 1.0015144979953934,
      "learning_rate": 4.2857197107802936e-06,
      "loss": 0.1628,
      "step": 3087
    },
    {
      "epoch": 0.2845165154097756,
      "grad_norm": 0.8880299301894348,
      "learning_rate": 4.285186492606641e-06,
      "loss": 0.1553,
      "step": 3088
    },
    {
      "epoch": 0.2846086515870457,
      "grad_norm": 0.8486996924762646,
      "learning_rate": 4.2846531086782315e-06,
      "loss": 0.1475,
      "step": 3089
    },
    {
      "epoch": 0.2847007877643157,
      "grad_norm": 0.9489188020398197,
      "learning_rate": 4.2841195590445875e-06,
      "loss": 0.1622,
      "step": 3090
    },
    {
      "epoch": 0.2847929239415857,
      "grad_norm": 1.0150637526631434,
      "learning_rate": 4.283585843755251e-06,
      "loss": 0.1593,
      "step": 3091
    },
    {
      "epoch": 0.28488506011885567,
      "grad_norm": 0.9502841585717497,
      "learning_rate": 4.283051962859776e-06,
      "loss": 0.1587,
      "step": 3092
    },
    {
      "epoch": 0.28497719629612567,
      "grad_norm": 0.9644289682928954,
      "learning_rate": 4.2825179164077365e-06,
      "loss": 0.1807,
      "step": 3093
    },
    {
      "epoch": 0.28506933247339566,
      "grad_norm": 0.893244872345484,
      "learning_rate": 4.281983704448715e-06,
      "loss": 0.1543,
      "step": 3094
    },
    {
      "epoch": 0.28516146865066566,
      "grad_norm": 0.8971291446766364,
      "learning_rate": 4.281449327032315e-06,
      "loss": 0.1634,
      "step": 3095
    },
    {
      "epoch": 0.2852536048279357,
      "grad_norm": 0.8839743026411959,
      "learning_rate": 4.2809147842081535e-06,
      "loss": 0.1497,
      "step": 3096
    },
    {
      "epoch": 0.2853457410052057,
      "grad_norm": 0.8814626684254067,
      "learning_rate": 4.280380076025863e-06,
      "loss": 0.1553,
      "step": 3097
    },
    {
      "epoch": 0.2854378771824757,
      "grad_norm": 0.9336844802468615,
      "learning_rate": 4.27984520253509e-06,
      "loss": 0.1746,
      "step": 3098
    },
    {
      "epoch": 0.2855300133597457,
      "grad_norm": 0.8813308242067851,
      "learning_rate": 4.279310163785499e-06,
      "loss": 0.1672,
      "step": 3099
    },
    {
      "epoch": 0.2856221495370157,
      "grad_norm": 0.92626645246821,
      "learning_rate": 4.278774959826768e-06,
      "loss": 0.154,
      "step": 3100
    },
    {
      "epoch": 0.2857142857142857,
      "grad_norm": 1.0123665912926623,
      "learning_rate": 4.2782395907085894e-06,
      "loss": 0.1978,
      "step": 3101
    },
    {
      "epoch": 0.2858064218915557,
      "grad_norm": 0.9242315379627385,
      "learning_rate": 4.277704056480674e-06,
      "loss": 0.1711,
      "step": 3102
    },
    {
      "epoch": 0.28589855806882575,
      "grad_norm": 0.8996561918227259,
      "learning_rate": 4.2771683571927455e-06,
      "loss": 0.1785,
      "step": 3103
    },
    {
      "epoch": 0.28599069424609574,
      "grad_norm": 0.9052116172117433,
      "learning_rate": 4.276632492894544e-06,
      "loss": 0.1484,
      "step": 3104
    },
    {
      "epoch": 0.28608283042336574,
      "grad_norm": 0.9657865678405,
      "learning_rate": 4.276096463635825e-06,
      "loss": 0.1653,
      "step": 3105
    },
    {
      "epoch": 0.28617496660063574,
      "grad_norm": 0.93706574283875,
      "learning_rate": 4.275560269466358e-06,
      "loss": 0.1673,
      "step": 3106
    },
    {
      "epoch": 0.28626710277790574,
      "grad_norm": 0.9397267517974345,
      "learning_rate": 4.275023910435928e-06,
      "loss": 0.1537,
      "step": 3107
    },
    {
      "epoch": 0.28635923895517573,
      "grad_norm": 0.9004009633111321,
      "learning_rate": 4.274487386594338e-06,
      "loss": 0.1538,
      "step": 3108
    },
    {
      "epoch": 0.28645137513244573,
      "grad_norm": 0.9613421096528934,
      "learning_rate": 4.273950697991402e-06,
      "loss": 0.162,
      "step": 3109
    },
    {
      "epoch": 0.2865435113097158,
      "grad_norm": 0.8638174797582878,
      "learning_rate": 4.273413844676953e-06,
      "loss": 0.1519,
      "step": 3110
    },
    {
      "epoch": 0.2866356474869858,
      "grad_norm": 0.9219609665483445,
      "learning_rate": 4.272876826700838e-06,
      "loss": 0.1581,
      "step": 3111
    },
    {
      "epoch": 0.2867277836642558,
      "grad_norm": 0.9123932775435808,
      "learning_rate": 4.27233964411292e-06,
      "loss": 0.1677,
      "step": 3112
    },
    {
      "epoch": 0.28681991984152577,
      "grad_norm": 0.8596332032462187,
      "learning_rate": 4.271802296963073e-06,
      "loss": 0.1476,
      "step": 3113
    },
    {
      "epoch": 0.28691205601879577,
      "grad_norm": 0.9236043959851093,
      "learning_rate": 4.271264785301194e-06,
      "loss": 0.1564,
      "step": 3114
    },
    {
      "epoch": 0.28700419219606577,
      "grad_norm": 0.9562016916199808,
      "learning_rate": 4.270727109177188e-06,
      "loss": 0.17,
      "step": 3115
    },
    {
      "epoch": 0.28709632837333576,
      "grad_norm": 0.9001979247167449,
      "learning_rate": 4.270189268640979e-06,
      "loss": 0.1626,
      "step": 3116
    },
    {
      "epoch": 0.2871884645506058,
      "grad_norm": 0.8700019211927976,
      "learning_rate": 4.269651263742507e-06,
      "loss": 0.1634,
      "step": 3117
    },
    {
      "epoch": 0.2872806007278758,
      "grad_norm": 0.9996977490972816,
      "learning_rate": 4.269113094531724e-06,
      "loss": 0.1679,
      "step": 3118
    },
    {
      "epoch": 0.2873727369051458,
      "grad_norm": 0.9392105076354225,
      "learning_rate": 4.268574761058601e-06,
      "loss": 0.1763,
      "step": 3119
    },
    {
      "epoch": 0.2874648730824158,
      "grad_norm": 0.9496915781676928,
      "learning_rate": 4.26803626337312e-06,
      "loss": 0.1546,
      "step": 3120
    },
    {
      "epoch": 0.2875570092596858,
      "grad_norm": 0.9427266315727539,
      "learning_rate": 4.267497601525281e-06,
      "loss": 0.1687,
      "step": 3121
    },
    {
      "epoch": 0.2876491454369558,
      "grad_norm": 0.8473345537965931,
      "learning_rate": 4.266958775565101e-06,
      "loss": 0.1401,
      "step": 3122
    },
    {
      "epoch": 0.28774128161422585,
      "grad_norm": 0.9460675493514902,
      "learning_rate": 4.266419785542607e-06,
      "loss": 0.1626,
      "step": 3123
    },
    {
      "epoch": 0.28783341779149585,
      "grad_norm": 0.986061711219792,
      "learning_rate": 4.265880631507847e-06,
      "loss": 0.162,
      "step": 3124
    },
    {
      "epoch": 0.28792555396876585,
      "grad_norm": 0.9533196068906457,
      "learning_rate": 4.265341313510879e-06,
      "loss": 0.163,
      "step": 3125
    },
    {
      "epoch": 0.28801769014603584,
      "grad_norm": 0.9354306059737347,
      "learning_rate": 4.264801831601781e-06,
      "loss": 0.17,
      "step": 3126
    },
    {
      "epoch": 0.28810982632330584,
      "grad_norm": 0.8667926960766736,
      "learning_rate": 4.264262185830643e-06,
      "loss": 0.1498,
      "step": 3127
    },
    {
      "epoch": 0.28820196250057584,
      "grad_norm": 1.0641447059766413,
      "learning_rate": 4.263722376247571e-06,
      "loss": 0.1683,
      "step": 3128
    },
    {
      "epoch": 0.28829409867784583,
      "grad_norm": 0.9032416434200741,
      "learning_rate": 4.263182402902687e-06,
      "loss": 0.154,
      "step": 3129
    },
    {
      "epoch": 0.2883862348551159,
      "grad_norm": 0.8697261860817109,
      "learning_rate": 4.262642265846127e-06,
      "loss": 0.1479,
      "step": 3130
    },
    {
      "epoch": 0.2884783710323859,
      "grad_norm": 0.9237533351121093,
      "learning_rate": 4.262101965128042e-06,
      "loss": 0.159,
      "step": 3131
    },
    {
      "epoch": 0.2885705072096559,
      "grad_norm": 0.9107264611849066,
      "learning_rate": 4.261561500798601e-06,
      "loss": 0.1612,
      "step": 3132
    },
    {
      "epoch": 0.2886626433869259,
      "grad_norm": 0.8700605038024919,
      "learning_rate": 4.261020872907985e-06,
      "loss": 0.1582,
      "step": 3133
    },
    {
      "epoch": 0.2887547795641959,
      "grad_norm": 0.9432745501419812,
      "learning_rate": 4.26048008150639e-06,
      "loss": 0.164,
      "step": 3134
    },
    {
      "epoch": 0.28884691574146587,
      "grad_norm": 0.9282434523299004,
      "learning_rate": 4.259939126644032e-06,
      "loss": 0.1679,
      "step": 3135
    },
    {
      "epoch": 0.28893905191873587,
      "grad_norm": 0.9423611364408566,
      "learning_rate": 4.259398008371136e-06,
      "loss": 0.1676,
      "step": 3136
    },
    {
      "epoch": 0.2890311880960059,
      "grad_norm": 0.8685127726333524,
      "learning_rate": 4.258856726737945e-06,
      "loss": 0.1492,
      "step": 3137
    },
    {
      "epoch": 0.2891233242732759,
      "grad_norm": 0.9771353056369224,
      "learning_rate": 4.258315281794718e-06,
      "loss": 0.1631,
      "step": 3138
    },
    {
      "epoch": 0.2892154604505459,
      "grad_norm": 0.9624194484481019,
      "learning_rate": 4.257773673591728e-06,
      "loss": 0.1498,
      "step": 3139
    },
    {
      "epoch": 0.2893075966278159,
      "grad_norm": 0.9092065792466625,
      "learning_rate": 4.257231902179263e-06,
      "loss": 0.1504,
      "step": 3140
    },
    {
      "epoch": 0.2893997328050859,
      "grad_norm": 0.956521824952388,
      "learning_rate": 4.256689967607627e-06,
      "loss": 0.1725,
      "step": 3141
    },
    {
      "epoch": 0.2894918689823559,
      "grad_norm": 0.9338296990138528,
      "learning_rate": 4.256147869927137e-06,
      "loss": 0.1581,
      "step": 3142
    },
    {
      "epoch": 0.2895840051596259,
      "grad_norm": 0.888802037271971,
      "learning_rate": 4.25560560918813e-06,
      "loss": 0.1602,
      "step": 3143
    },
    {
      "epoch": 0.28967614133689595,
      "grad_norm": 0.9319211913936077,
      "learning_rate": 4.255063185440953e-06,
      "loss": 0.1654,
      "step": 3144
    },
    {
      "epoch": 0.28976827751416595,
      "grad_norm": 0.9786469213747607,
      "learning_rate": 4.254520598735971e-06,
      "loss": 0.1824,
      "step": 3145
    },
    {
      "epoch": 0.28986041369143595,
      "grad_norm": 0.9157519191307902,
      "learning_rate": 4.253977849123561e-06,
      "loss": 0.1612,
      "step": 3146
    },
    {
      "epoch": 0.28995254986870594,
      "grad_norm": 0.9298000384411869,
      "learning_rate": 4.25343493665412e-06,
      "loss": 0.15,
      "step": 3147
    },
    {
      "epoch": 0.29004468604597594,
      "grad_norm": 0.9180849563596113,
      "learning_rate": 4.252891861378056e-06,
      "loss": 0.1682,
      "step": 3148
    },
    {
      "epoch": 0.29013682222324594,
      "grad_norm": 0.970214702091066,
      "learning_rate": 4.252348623345794e-06,
      "loss": 0.1724,
      "step": 3149
    },
    {
      "epoch": 0.29022895840051594,
      "grad_norm": 0.8712777245705906,
      "learning_rate": 4.2518052226077734e-06,
      "loss": 0.1471,
      "step": 3150
    },
    {
      "epoch": 0.290321094577786,
      "grad_norm": 0.8772532104660963,
      "learning_rate": 4.25126165921445e-06,
      "loss": 0.1586,
      "step": 3151
    },
    {
      "epoch": 0.290413230755056,
      "grad_norm": 0.8603518107957989,
      "learning_rate": 4.250717933216293e-06,
      "loss": 0.1485,
      "step": 3152
    },
    {
      "epoch": 0.290505366932326,
      "grad_norm": 0.9476717820121503,
      "learning_rate": 4.250174044663787e-06,
      "loss": 0.1641,
      "step": 3153
    },
    {
      "epoch": 0.290597503109596,
      "grad_norm": 0.9518921904154757,
      "learning_rate": 4.249629993607433e-06,
      "loss": 0.1602,
      "step": 3154
    },
    {
      "epoch": 0.290689639286866,
      "grad_norm": 0.8289642643179239,
      "learning_rate": 4.249085780097746e-06,
      "loss": 0.1506,
      "step": 3155
    },
    {
      "epoch": 0.29078177546413597,
      "grad_norm": 0.9148601434226283,
      "learning_rate": 4.248541404185255e-06,
      "loss": 0.1575,
      "step": 3156
    },
    {
      "epoch": 0.290873911641406,
      "grad_norm": 0.944322099578078,
      "learning_rate": 4.247996865920509e-06,
      "loss": 0.1676,
      "step": 3157
    },
    {
      "epoch": 0.290966047818676,
      "grad_norm": 1.0149429041739264,
      "learning_rate": 4.247452165354064e-06,
      "loss": 0.1757,
      "step": 3158
    },
    {
      "epoch": 0.291058183995946,
      "grad_norm": 0.8897165675585696,
      "learning_rate": 4.246907302536497e-06,
      "loss": 0.1503,
      "step": 3159
    },
    {
      "epoch": 0.291150320173216,
      "grad_norm": 0.9069588057960449,
      "learning_rate": 4.246362277518399e-06,
      "loss": 0.1633,
      "step": 3160
    },
    {
      "epoch": 0.291242456350486,
      "grad_norm": 0.8724677368674314,
      "learning_rate": 4.245817090350377e-06,
      "loss": 0.1507,
      "step": 3161
    },
    {
      "epoch": 0.291334592527756,
      "grad_norm": 0.8859929836695598,
      "learning_rate": 4.245271741083049e-06,
      "loss": 0.1669,
      "step": 3162
    },
    {
      "epoch": 0.291426728705026,
      "grad_norm": 0.990522122817011,
      "learning_rate": 4.244726229767052e-06,
      "loss": 0.1826,
      "step": 3163
    },
    {
      "epoch": 0.29151886488229606,
      "grad_norm": 0.8546328526513989,
      "learning_rate": 4.2441805564530366e-06,
      "loss": 0.1501,
      "step": 3164
    },
    {
      "epoch": 0.29161100105956606,
      "grad_norm": 0.9211953614605264,
      "learning_rate": 4.2436347211916695e-06,
      "loss": 0.1639,
      "step": 3165
    },
    {
      "epoch": 0.29170313723683605,
      "grad_norm": 1.1441021991526923,
      "learning_rate": 4.243088724033632e-06,
      "loss": 0.1586,
      "step": 3166
    },
    {
      "epoch": 0.29179527341410605,
      "grad_norm": 0.9121448781527541,
      "learning_rate": 4.242542565029617e-06,
      "loss": 0.1676,
      "step": 3167
    },
    {
      "epoch": 0.29188740959137605,
      "grad_norm": 0.8570822660987969,
      "learning_rate": 4.241996244230338e-06,
      "loss": 0.1572,
      "step": 3168
    },
    {
      "epoch": 0.29197954576864604,
      "grad_norm": 0.8798918108299641,
      "learning_rate": 4.24144976168652e-06,
      "loss": 0.1552,
      "step": 3169
    },
    {
      "epoch": 0.29207168194591604,
      "grad_norm": 0.9012805667976503,
      "learning_rate": 4.240903117448904e-06,
      "loss": 0.1608,
      "step": 3170
    },
    {
      "epoch": 0.2921638181231861,
      "grad_norm": 0.8268739629189876,
      "learning_rate": 4.240356311568247e-06,
      "loss": 0.1454,
      "step": 3171
    },
    {
      "epoch": 0.2922559543004561,
      "grad_norm": 0.9519215570676918,
      "learning_rate": 4.239809344095319e-06,
      "loss": 0.1645,
      "step": 3172
    },
    {
      "epoch": 0.2923480904777261,
      "grad_norm": 0.9360762926839049,
      "learning_rate": 4.239262215080906e-06,
      "loss": 0.1584,
      "step": 3173
    },
    {
      "epoch": 0.2924402266549961,
      "grad_norm": 0.9743162418622031,
      "learning_rate": 4.238714924575809e-06,
      "loss": 0.185,
      "step": 3174
    },
    {
      "epoch": 0.2925323628322661,
      "grad_norm": 0.8853535313691572,
      "learning_rate": 4.238167472630844e-06,
      "loss": 0.1475,
      "step": 3175
    },
    {
      "epoch": 0.2926244990095361,
      "grad_norm": 0.9429373674217792,
      "learning_rate": 4.237619859296842e-06,
      "loss": 0.1615,
      "step": 3176
    },
    {
      "epoch": 0.2927166351868061,
      "grad_norm": 0.8754838330157808,
      "learning_rate": 4.237072084624649e-06,
      "loss": 0.141,
      "step": 3177
    },
    {
      "epoch": 0.2928087713640761,
      "grad_norm": 0.9265393674777754,
      "learning_rate": 4.2365241486651275e-06,
      "loss": 0.1543,
      "step": 3178
    },
    {
      "epoch": 0.2929009075413461,
      "grad_norm": 0.892789764988484,
      "learning_rate": 4.235976051469151e-06,
      "loss": 0.1626,
      "step": 3179
    },
    {
      "epoch": 0.2929930437186161,
      "grad_norm": 0.8348631663386511,
      "learning_rate": 4.23542779308761e-06,
      "loss": 0.1429,
      "step": 3180
    },
    {
      "epoch": 0.2930851798958861,
      "grad_norm": 0.8821394531522185,
      "learning_rate": 4.234879373571413e-06,
      "loss": 0.162,
      "step": 3181
    },
    {
      "epoch": 0.2931773160731561,
      "grad_norm": 0.8806914298120246,
      "learning_rate": 4.234330792971479e-06,
      "loss": 0.1632,
      "step": 3182
    },
    {
      "epoch": 0.2932694522504261,
      "grad_norm": 0.8918627316336875,
      "learning_rate": 4.233782051338745e-06,
      "loss": 0.1617,
      "step": 3183
    },
    {
      "epoch": 0.2933615884276961,
      "grad_norm": 0.8678930306994892,
      "learning_rate": 4.23323314872416e-06,
      "loss": 0.1533,
      "step": 3184
    },
    {
      "epoch": 0.29345372460496616,
      "grad_norm": 0.9178630543984329,
      "learning_rate": 4.232684085178691e-06,
      "loss": 0.1649,
      "step": 3185
    },
    {
      "epoch": 0.29354586078223616,
      "grad_norm": 0.8753634315475634,
      "learning_rate": 4.232134860753318e-06,
      "loss": 0.1673,
      "step": 3186
    },
    {
      "epoch": 0.29363799695950615,
      "grad_norm": 0.8870097305860352,
      "learning_rate": 4.231585475499037e-06,
      "loss": 0.1448,
      "step": 3187
    },
    {
      "epoch": 0.29373013313677615,
      "grad_norm": 0.9544469402594513,
      "learning_rate": 4.231035929466858e-06,
      "loss": 0.1595,
      "step": 3188
    },
    {
      "epoch": 0.29382226931404615,
      "grad_norm": 0.9241039149441995,
      "learning_rate": 4.230486222707807e-06,
      "loss": 0.1527,
      "step": 3189
    },
    {
      "epoch": 0.29391440549131614,
      "grad_norm": 0.8843626519655292,
      "learning_rate": 4.229936355272924e-06,
      "loss": 0.1617,
      "step": 3190
    },
    {
      "epoch": 0.2940065416685862,
      "grad_norm": 0.9271307349122019,
      "learning_rate": 4.229386327213264e-06,
      "loss": 0.1611,
      "step": 3191
    },
    {
      "epoch": 0.2940986778458562,
      "grad_norm": 0.9492531025198703,
      "learning_rate": 4.228836138579897e-06,
      "loss": 0.1729,
      "step": 3192
    },
    {
      "epoch": 0.2941908140231262,
      "grad_norm": 0.9295851214577007,
      "learning_rate": 4.2282857894239085e-06,
      "loss": 0.1687,
      "step": 3193
    },
    {
      "epoch": 0.2942829502003962,
      "grad_norm": 0.977256783995922,
      "learning_rate": 4.227735279796399e-06,
      "loss": 0.1628,
      "step": 3194
    },
    {
      "epoch": 0.2943750863776662,
      "grad_norm": 0.8930681892980293,
      "learning_rate": 4.227184609748483e-06,
      "loss": 0.1693,
      "step": 3195
    },
    {
      "epoch": 0.2944672225549362,
      "grad_norm": 0.8933894768496947,
      "learning_rate": 4.226633779331289e-06,
      "loss": 0.1508,
      "step": 3196
    },
    {
      "epoch": 0.2945593587322062,
      "grad_norm": 0.8927683830142663,
      "learning_rate": 4.226082788595965e-06,
      "loss": 0.1453,
      "step": 3197
    },
    {
      "epoch": 0.29465149490947623,
      "grad_norm": 0.9104305630689763,
      "learning_rate": 4.225531637593666e-06,
      "loss": 0.1563,
      "step": 3198
    },
    {
      "epoch": 0.2947436310867462,
      "grad_norm": 0.9241605921153313,
      "learning_rate": 4.2249803263755695e-06,
      "loss": 0.1743,
      "step": 3199
    },
    {
      "epoch": 0.2948357672640162,
      "grad_norm": 0.9296113117121186,
      "learning_rate": 4.2244288549928645e-06,
      "loss": 0.1516,
      "step": 3200
    },
    {
      "epoch": 0.2949279034412862,
      "grad_norm": 0.9199239821431868,
      "learning_rate": 4.223877223496754e-06,
      "loss": 0.1663,
      "step": 3201
    },
    {
      "epoch": 0.2950200396185562,
      "grad_norm": 0.9000094471075423,
      "learning_rate": 4.223325431938459e-06,
      "loss": 0.1564,
      "step": 3202
    },
    {
      "epoch": 0.2951121757958262,
      "grad_norm": 0.9194190986912426,
      "learning_rate": 4.2227734803692115e-06,
      "loss": 0.1555,
      "step": 3203
    },
    {
      "epoch": 0.2952043119730962,
      "grad_norm": 0.8767195760502394,
      "learning_rate": 4.2222213688402605e-06,
      "loss": 0.1386,
      "step": 3204
    },
    {
      "epoch": 0.29529644815036626,
      "grad_norm": 0.8871918600848787,
      "learning_rate": 4.22166909740287e-06,
      "loss": 0.144,
      "step": 3205
    },
    {
      "epoch": 0.29538858432763626,
      "grad_norm": 1.0100555949711532,
      "learning_rate": 4.221116666108319e-06,
      "loss": 0.171,
      "step": 3206
    },
    {
      "epoch": 0.29548072050490626,
      "grad_norm": 0.907837346659232,
      "learning_rate": 4.2205640750079e-06,
      "loss": 0.1585,
      "step": 3207
    },
    {
      "epoch": 0.29557285668217625,
      "grad_norm": 0.9685782224087519,
      "learning_rate": 4.220011324152922e-06,
      "loss": 0.1694,
      "step": 3208
    },
    {
      "epoch": 0.29566499285944625,
      "grad_norm": 0.9318424056577996,
      "learning_rate": 4.219458413594707e-06,
      "loss": 0.1661,
      "step": 3209
    },
    {
      "epoch": 0.29575712903671625,
      "grad_norm": 0.975106382055604,
      "learning_rate": 4.218905343384593e-06,
      "loss": 0.1648,
      "step": 3210
    },
    {
      "epoch": 0.29584926521398625,
      "grad_norm": 0.8829421015276901,
      "learning_rate": 4.218352113573933e-06,
      "loss": 0.161,
      "step": 3211
    },
    {
      "epoch": 0.2959414013912563,
      "grad_norm": 0.9457196742302185,
      "learning_rate": 4.217798724214094e-06,
      "loss": 0.176,
      "step": 3212
    },
    {
      "epoch": 0.2960335375685263,
      "grad_norm": 0.9877734213208268,
      "learning_rate": 4.21724517535646e-06,
      "loss": 0.161,
      "step": 3213
    },
    {
      "epoch": 0.2961256737457963,
      "grad_norm": 0.8864723853789074,
      "learning_rate": 4.216691467052426e-06,
      "loss": 0.1501,
      "step": 3214
    },
    {
      "epoch": 0.2962178099230663,
      "grad_norm": 0.8725650110211445,
      "learning_rate": 4.216137599353404e-06,
      "loss": 0.149,
      "step": 3215
    },
    {
      "epoch": 0.2963099461003363,
      "grad_norm": 0.9069598790303386,
      "learning_rate": 4.215583572310821e-06,
      "loss": 0.1522,
      "step": 3216
    },
    {
      "epoch": 0.2964020822776063,
      "grad_norm": 0.9359483520411078,
      "learning_rate": 4.2150293859761196e-06,
      "loss": 0.1575,
      "step": 3217
    },
    {
      "epoch": 0.29649421845487633,
      "grad_norm": 0.9376534373883524,
      "learning_rate": 4.214475040400755e-06,
      "loss": 0.1693,
      "step": 3218
    },
    {
      "epoch": 0.29658635463214633,
      "grad_norm": 0.9605366150327874,
      "learning_rate": 4.213920535636198e-06,
      "loss": 0.1555,
      "step": 3219
    },
    {
      "epoch": 0.29667849080941633,
      "grad_norm": 0.9239353702038833,
      "learning_rate": 4.213365871733934e-06,
      "loss": 0.1589,
      "step": 3220
    },
    {
      "epoch": 0.2967706269866863,
      "grad_norm": 0.9265223393518568,
      "learning_rate": 4.212811048745467e-06,
      "loss": 0.1625,
      "step": 3221
    },
    {
      "epoch": 0.2968627631639563,
      "grad_norm": 0.9788953150847244,
      "learning_rate": 4.212256066722307e-06,
      "loss": 0.1648,
      "step": 3222
    },
    {
      "epoch": 0.2969548993412263,
      "grad_norm": 0.8587737017236943,
      "learning_rate": 4.211700925715988e-06,
      "loss": 0.1434,
      "step": 3223
    },
    {
      "epoch": 0.2970470355184963,
      "grad_norm": 0.9571360236107946,
      "learning_rate": 4.211145625778054e-06,
      "loss": 0.1718,
      "step": 3224
    },
    {
      "epoch": 0.29713917169576637,
      "grad_norm": 0.8948875668309092,
      "learning_rate": 4.2105901669600645e-06,
      "loss": 0.1493,
      "step": 3225
    },
    {
      "epoch": 0.29723130787303637,
      "grad_norm": 0.8698739984411084,
      "learning_rate": 4.210034549313594e-06,
      "loss": 0.1537,
      "step": 3226
    },
    {
      "epoch": 0.29732344405030636,
      "grad_norm": 0.9127957190567569,
      "learning_rate": 4.2094787728902305e-06,
      "loss": 0.154,
      "step": 3227
    },
    {
      "epoch": 0.29741558022757636,
      "grad_norm": 0.9743876075133446,
      "learning_rate": 4.20892283774158e-06,
      "loss": 0.1653,
      "step": 3228
    },
    {
      "epoch": 0.29750771640484636,
      "grad_norm": 0.9697707594140141,
      "learning_rate": 4.20836674391926e-06,
      "loss": 0.1628,
      "step": 3229
    },
    {
      "epoch": 0.29759985258211635,
      "grad_norm": 0.9921447822593994,
      "learning_rate": 4.207810491474904e-06,
      "loss": 0.1741,
      "step": 3230
    },
    {
      "epoch": 0.29769198875938635,
      "grad_norm": 1.0175502207785552,
      "learning_rate": 4.207254080460161e-06,
      "loss": 0.1759,
      "step": 3231
    },
    {
      "epoch": 0.2977841249366564,
      "grad_norm": 0.8958978146833146,
      "learning_rate": 4.206697510926691e-06,
      "loss": 0.1538,
      "step": 3232
    },
    {
      "epoch": 0.2978762611139264,
      "grad_norm": 0.950880039603536,
      "learning_rate": 4.206140782926174e-06,
      "loss": 0.1721,
      "step": 3233
    },
    {
      "epoch": 0.2979683972911964,
      "grad_norm": 0.925327176872321,
      "learning_rate": 4.205583896510303e-06,
      "loss": 0.1595,
      "step": 3234
    },
    {
      "epoch": 0.2980605334684664,
      "grad_norm": 0.9532665759717548,
      "learning_rate": 4.2050268517307816e-06,
      "loss": 0.1639,
      "step": 3235
    },
    {
      "epoch": 0.2981526696457364,
      "grad_norm": 0.9291134573772569,
      "learning_rate": 4.204469648639335e-06,
      "loss": 0.1715,
      "step": 3236
    },
    {
      "epoch": 0.2982448058230064,
      "grad_norm": 0.8797274289945238,
      "learning_rate": 4.203912287287697e-06,
      "loss": 0.1604,
      "step": 3237
    },
    {
      "epoch": 0.2983369420002764,
      "grad_norm": 0.9040168151293431,
      "learning_rate": 4.203354767727621e-06,
      "loss": 0.1658,
      "step": 3238
    },
    {
      "epoch": 0.29842907817754644,
      "grad_norm": 0.985469121655086,
      "learning_rate": 4.202797090010871e-06,
      "loss": 0.1692,
      "step": 3239
    },
    {
      "epoch": 0.29852121435481643,
      "grad_norm": 0.8991733097245737,
      "learning_rate": 4.202239254189228e-06,
      "loss": 0.1527,
      "step": 3240
    },
    {
      "epoch": 0.29861335053208643,
      "grad_norm": 0.9968726368094237,
      "learning_rate": 4.2016812603144865e-06,
      "loss": 0.1768,
      "step": 3241
    },
    {
      "epoch": 0.2987054867093564,
      "grad_norm": 0.9261811611053946,
      "learning_rate": 4.201123108438457e-06,
      "loss": 0.1609,
      "step": 3242
    },
    {
      "epoch": 0.2987976228866264,
      "grad_norm": 0.9805890401028196,
      "learning_rate": 4.2005647986129635e-06,
      "loss": 0.163,
      "step": 3243
    },
    {
      "epoch": 0.2988897590638964,
      "grad_norm": 0.9154281842963401,
      "learning_rate": 4.2000063308898466e-06,
      "loss": 0.154,
      "step": 3244
    },
    {
      "epoch": 0.2989818952411664,
      "grad_norm": 0.9263190062990657,
      "learning_rate": 4.199447705320958e-06,
      "loss": 0.159,
      "step": 3245
    },
    {
      "epoch": 0.29907403141843647,
      "grad_norm": 0.9684841984778471,
      "learning_rate": 4.1988889219581676e-06,
      "loss": 0.1809,
      "step": 3246
    },
    {
      "epoch": 0.29916616759570647,
      "grad_norm": 0.8439439005356527,
      "learning_rate": 4.198329980853357e-06,
      "loss": 0.1386,
      "step": 3247
    },
    {
      "epoch": 0.29925830377297646,
      "grad_norm": 0.8805133731749528,
      "learning_rate": 4.1977708820584265e-06,
      "loss": 0.1609,
      "step": 3248
    },
    {
      "epoch": 0.29935043995024646,
      "grad_norm": 0.9299348501988777,
      "learning_rate": 4.197211625625285e-06,
      "loss": 0.1614,
      "step": 3249
    },
    {
      "epoch": 0.29944257612751646,
      "grad_norm": 0.953423157440807,
      "learning_rate": 4.196652211605863e-06,
      "loss": 0.1753,
      "step": 3250
    },
    {
      "epoch": 0.29953471230478645,
      "grad_norm": 0.9650855610206487,
      "learning_rate": 4.196092640052099e-06,
      "loss": 0.1743,
      "step": 3251
    },
    {
      "epoch": 0.2996268484820565,
      "grad_norm": 0.8158109293633011,
      "learning_rate": 4.195532911015952e-06,
      "loss": 0.149,
      "step": 3252
    },
    {
      "epoch": 0.2997189846593265,
      "grad_norm": 1.0066905218223139,
      "learning_rate": 4.1949730245493915e-06,
      "loss": 0.166,
      "step": 3253
    },
    {
      "epoch": 0.2998111208365965,
      "grad_norm": 0.981891494249524,
      "learning_rate": 4.194412980704403e-06,
      "loss": 0.1683,
      "step": 3254
    },
    {
      "epoch": 0.2999032570138665,
      "grad_norm": 0.9388757636396333,
      "learning_rate": 4.1938527795329875e-06,
      "loss": 0.1695,
      "step": 3255
    },
    {
      "epoch": 0.2999953931911365,
      "grad_norm": 0.861978941124501,
      "learning_rate": 4.1932924210871585e-06,
      "loss": 0.1584,
      "step": 3256
    },
    {
      "epoch": 0.3000875293684065,
      "grad_norm": 0.9093301324713368,
      "learning_rate": 4.192731905418947e-06,
      "loss": 0.1612,
      "step": 3257
    },
    {
      "epoch": 0.3001796655456765,
      "grad_norm": 0.9074202593094116,
      "learning_rate": 4.192171232580395e-06,
      "loss": 0.1613,
      "step": 3258
    },
    {
      "epoch": 0.30027180172294654,
      "grad_norm": 0.8515153187239818,
      "learning_rate": 4.191610402623561e-06,
      "loss": 0.1501,
      "step": 3259
    },
    {
      "epoch": 0.30036393790021654,
      "grad_norm": 0.9327983127436237,
      "learning_rate": 4.191049415600521e-06,
      "loss": 0.1732,
      "step": 3260
    },
    {
      "epoch": 0.30045607407748653,
      "grad_norm": 0.96624826925591,
      "learning_rate": 4.19048827156336e-06,
      "loss": 0.1695,
      "step": 3261
    },
    {
      "epoch": 0.30054821025475653,
      "grad_norm": 0.9123411497937238,
      "learning_rate": 4.189926970564181e-06,
      "loss": 0.1404,
      "step": 3262
    },
    {
      "epoch": 0.30064034643202653,
      "grad_norm": 0.9116714759394909,
      "learning_rate": 4.189365512655101e-06,
      "loss": 0.1605,
      "step": 3263
    },
    {
      "epoch": 0.3007324826092965,
      "grad_norm": 0.9543230905818623,
      "learning_rate": 4.188803897888251e-06,
      "loss": 0.1489,
      "step": 3264
    },
    {
      "epoch": 0.3008246187865665,
      "grad_norm": 0.946421886980175,
      "learning_rate": 4.188242126315778e-06,
      "loss": 0.1636,
      "step": 3265
    },
    {
      "epoch": 0.3009167549638366,
      "grad_norm": 0.9416980687774688,
      "learning_rate": 4.187680197989841e-06,
      "loss": 0.1693,
      "step": 3266
    },
    {
      "epoch": 0.30100889114110657,
      "grad_norm": 0.8628506075375213,
      "learning_rate": 4.187118112962616e-06,
      "loss": 0.1471,
      "step": 3267
    },
    {
      "epoch": 0.30110102731837657,
      "grad_norm": 0.9110568486745181,
      "learning_rate": 4.186555871286293e-06,
      "loss": 0.1405,
      "step": 3268
    },
    {
      "epoch": 0.30119316349564657,
      "grad_norm": 0.8937401299625113,
      "learning_rate": 4.185993473013076e-06,
      "loss": 0.1494,
      "step": 3269
    },
    {
      "epoch": 0.30128529967291656,
      "grad_norm": 0.9063871070755407,
      "learning_rate": 4.185430918195184e-06,
      "loss": 0.1538,
      "step": 3270
    },
    {
      "epoch": 0.30137743585018656,
      "grad_norm": 0.912457399295779,
      "learning_rate": 4.184868206884849e-06,
      "loss": 0.1485,
      "step": 3271
    },
    {
      "epoch": 0.30146957202745656,
      "grad_norm": 0.9949777663011297,
      "learning_rate": 4.18430533913432e-06,
      "loss": 0.1771,
      "step": 3272
    },
    {
      "epoch": 0.3015617082047266,
      "grad_norm": 0.9784479373923843,
      "learning_rate": 4.183742314995859e-06,
      "loss": 0.1618,
      "step": 3273
    },
    {
      "epoch": 0.3016538443819966,
      "grad_norm": 0.9112650889954245,
      "learning_rate": 4.183179134521743e-06,
      "loss": 0.1513,
      "step": 3274
    },
    {
      "epoch": 0.3017459805592666,
      "grad_norm": 0.9355584324086121,
      "learning_rate": 4.1826157977642634e-06,
      "loss": 0.1484,
      "step": 3275
    },
    {
      "epoch": 0.3018381167365366,
      "grad_norm": 0.8931369194300064,
      "learning_rate": 4.1820523047757246e-06,
      "loss": 0.1656,
      "step": 3276
    },
    {
      "epoch": 0.3019302529138066,
      "grad_norm": 0.9956521555709148,
      "learning_rate": 4.18148865560845e-06,
      "loss": 0.1626,
      "step": 3277
    },
    {
      "epoch": 0.3020223890910766,
      "grad_norm": 0.8874050966915844,
      "learning_rate": 4.180924850314771e-06,
      "loss": 0.1575,
      "step": 3278
    },
    {
      "epoch": 0.3021145252683466,
      "grad_norm": 0.9523402862573914,
      "learning_rate": 4.180360888947041e-06,
      "loss": 0.1703,
      "step": 3279
    },
    {
      "epoch": 0.30220666144561664,
      "grad_norm": 0.9287874514545038,
      "learning_rate": 4.179796771557619e-06,
      "loss": 0.1669,
      "step": 3280
    },
    {
      "epoch": 0.30229879762288664,
      "grad_norm": 0.8673579238655271,
      "learning_rate": 4.179232498198888e-06,
      "loss": 0.1503,
      "step": 3281
    },
    {
      "epoch": 0.30239093380015664,
      "grad_norm": 0.8794126748973937,
      "learning_rate": 4.178668068923238e-06,
      "loss": 0.1578,
      "step": 3282
    },
    {
      "epoch": 0.30248306997742663,
      "grad_norm": 0.9275772205249678,
      "learning_rate": 4.178103483783077e-06,
      "loss": 0.1525,
      "step": 3283
    },
    {
      "epoch": 0.30257520615469663,
      "grad_norm": 0.8968088357348379,
      "learning_rate": 4.177538742830828e-06,
      "loss": 0.1547,
      "step": 3284
    },
    {
      "epoch": 0.3026673423319666,
      "grad_norm": 0.8928338403213077,
      "learning_rate": 4.1769738461189245e-06,
      "loss": 0.1653,
      "step": 3285
    },
    {
      "epoch": 0.3027594785092367,
      "grad_norm": 0.9542122073006188,
      "learning_rate": 4.176408793699821e-06,
      "loss": 0.1528,
      "step": 3286
    },
    {
      "epoch": 0.3028516146865067,
      "grad_norm": 0.9825041852507498,
      "learning_rate": 4.1758435856259784e-06,
      "loss": 0.1642,
      "step": 3287
    },
    {
      "epoch": 0.3029437508637767,
      "grad_norm": 0.9378090275370146,
      "learning_rate": 4.17527822194988e-06,
      "loss": 0.1532,
      "step": 3288
    },
    {
      "epoch": 0.30303588704104667,
      "grad_norm": 0.925622043346032,
      "learning_rate": 4.174712702724017e-06,
      "loss": 0.1638,
      "step": 3289
    },
    {
      "epoch": 0.30312802321831667,
      "grad_norm": 0.8605309241655659,
      "learning_rate": 4.174147028000901e-06,
      "loss": 0.1538,
      "step": 3290
    },
    {
      "epoch": 0.30322015939558666,
      "grad_norm": 0.9165943989447812,
      "learning_rate": 4.173581197833052e-06,
      "loss": 0.1482,
      "step": 3291
    },
    {
      "epoch": 0.30331229557285666,
      "grad_norm": 0.8668274344509658,
      "learning_rate": 4.173015212273009e-06,
      "loss": 0.1509,
      "step": 3292
    },
    {
      "epoch": 0.3034044317501267,
      "grad_norm": 0.9491082207852997,
      "learning_rate": 4.1724490713733246e-06,
      "loss": 0.1514,
      "step": 3293
    },
    {
      "epoch": 0.3034965679273967,
      "grad_norm": 0.9456871597563634,
      "learning_rate": 4.171882775186563e-06,
      "loss": 0.1619,
      "step": 3294
    },
    {
      "epoch": 0.3035887041046667,
      "grad_norm": 0.9315199674334298,
      "learning_rate": 4.1713163237653055e-06,
      "loss": 0.1582,
      "step": 3295
    },
    {
      "epoch": 0.3036808402819367,
      "grad_norm": 0.9989797250234418,
      "learning_rate": 4.170749717162148e-06,
      "loss": 0.1632,
      "step": 3296
    },
    {
      "epoch": 0.3037729764592067,
      "grad_norm": 0.875493273649281,
      "learning_rate": 4.170182955429699e-06,
      "loss": 0.1616,
      "step": 3297
    },
    {
      "epoch": 0.3038651126364767,
      "grad_norm": 0.9281100714565997,
      "learning_rate": 4.169616038620583e-06,
      "loss": 0.1456,
      "step": 3298
    },
    {
      "epoch": 0.3039572488137467,
      "grad_norm": 0.8493906414552408,
      "learning_rate": 4.169048966787438e-06,
      "loss": 0.1478,
      "step": 3299
    },
    {
      "epoch": 0.30404938499101675,
      "grad_norm": 0.8952927753283536,
      "learning_rate": 4.168481739982917e-06,
      "loss": 0.1636,
      "step": 3300
    },
    {
      "epoch": 0.30414152116828674,
      "grad_norm": 0.8655868115020314,
      "learning_rate": 4.167914358259687e-06,
      "loss": 0.1548,
      "step": 3301
    },
    {
      "epoch": 0.30423365734555674,
      "grad_norm": 0.9919065938123784,
      "learning_rate": 4.167346821670429e-06,
      "loss": 0.1751,
      "step": 3302
    },
    {
      "epoch": 0.30432579352282674,
      "grad_norm": 0.9493609688016562,
      "learning_rate": 4.166779130267839e-06,
      "loss": 0.1665,
      "step": 3303
    },
    {
      "epoch": 0.30441792970009673,
      "grad_norm": 0.8983868029734609,
      "learning_rate": 4.166211284104629e-06,
      "loss": 0.1572,
      "step": 3304
    },
    {
      "epoch": 0.30451006587736673,
      "grad_norm": 0.9007939752199932,
      "learning_rate": 4.16564328323352e-06,
      "loss": 0.1698,
      "step": 3305
    },
    {
      "epoch": 0.30460220205463673,
      "grad_norm": 0.8913045218728333,
      "learning_rate": 4.165075127707254e-06,
      "loss": 0.1603,
      "step": 3306
    },
    {
      "epoch": 0.3046943382319068,
      "grad_norm": 0.8821994473863105,
      "learning_rate": 4.164506817578582e-06,
      "loss": 0.1536,
      "step": 3307
    },
    {
      "epoch": 0.3047864744091768,
      "grad_norm": 0.8662864009408308,
      "learning_rate": 4.163938352900274e-06,
      "loss": 0.1631,
      "step": 3308
    },
    {
      "epoch": 0.3048786105864468,
      "grad_norm": 0.9322076782697044,
      "learning_rate": 4.16336973372511e-06,
      "loss": 0.1707,
      "step": 3309
    },
    {
      "epoch": 0.30497074676371677,
      "grad_norm": 0.8791872015663708,
      "learning_rate": 4.162800960105889e-06,
      "loss": 0.1453,
      "step": 3310
    },
    {
      "epoch": 0.30506288294098677,
      "grad_norm": 0.9335680774636453,
      "learning_rate": 4.162232032095418e-06,
      "loss": 0.1639,
      "step": 3311
    },
    {
      "epoch": 0.30515501911825677,
      "grad_norm": 0.938972469326503,
      "learning_rate": 4.1616629497465245e-06,
      "loss": 0.1542,
      "step": 3312
    },
    {
      "epoch": 0.30524715529552676,
      "grad_norm": 0.9437698259785928,
      "learning_rate": 4.1610937131120474e-06,
      "loss": 0.1788,
      "step": 3313
    },
    {
      "epoch": 0.3053392914727968,
      "grad_norm": 0.8838873717685708,
      "learning_rate": 4.16052432224484e-06,
      "loss": 0.1577,
      "step": 3314
    },
    {
      "epoch": 0.3054314276500668,
      "grad_norm": 0.9391124996158003,
      "learning_rate": 4.159954777197771e-06,
      "loss": 0.1574,
      "step": 3315
    },
    {
      "epoch": 0.3055235638273368,
      "grad_norm": 0.9675529515690404,
      "learning_rate": 4.159385078023722e-06,
      "loss": 0.1664,
      "step": 3316
    },
    {
      "epoch": 0.3056157000046068,
      "grad_norm": 0.9691361853056687,
      "learning_rate": 4.15881522477559e-06,
      "loss": 0.1581,
      "step": 3317
    },
    {
      "epoch": 0.3057078361818768,
      "grad_norm": 0.9497540299432615,
      "learning_rate": 4.1582452175062854e-06,
      "loss": 0.1766,
      "step": 3318
    },
    {
      "epoch": 0.3057999723591468,
      "grad_norm": 0.9331780873537096,
      "learning_rate": 4.157675056268735e-06,
      "loss": 0.1581,
      "step": 3319
    },
    {
      "epoch": 0.30589210853641685,
      "grad_norm": 0.9282528349029902,
      "learning_rate": 4.157104741115876e-06,
      "loss": 0.1542,
      "step": 3320
    },
    {
      "epoch": 0.30598424471368685,
      "grad_norm": 0.936802223907317,
      "learning_rate": 4.156534272100664e-06,
      "loss": 0.1827,
      "step": 3321
    },
    {
      "epoch": 0.30607638089095685,
      "grad_norm": 0.9002015504623913,
      "learning_rate": 4.155963649276066e-06,
      "loss": 0.1593,
      "step": 3322
    },
    {
      "epoch": 0.30616851706822684,
      "grad_norm": 0.8840297918677589,
      "learning_rate": 4.155392872695066e-06,
      "loss": 0.1578,
      "step": 3323
    },
    {
      "epoch": 0.30626065324549684,
      "grad_norm": 0.8926966395195797,
      "learning_rate": 4.154821942410659e-06,
      "loss": 0.1528,
      "step": 3324
    },
    {
      "epoch": 0.30635278942276684,
      "grad_norm": 0.8927711731854681,
      "learning_rate": 4.154250858475857e-06,
      "loss": 0.1653,
      "step": 3325
    },
    {
      "epoch": 0.30644492560003683,
      "grad_norm": 0.9700319830098186,
      "learning_rate": 4.1536796209436835e-06,
      "loss": 0.1659,
      "step": 3326
    },
    {
      "epoch": 0.3065370617773069,
      "grad_norm": 0.9483174494393913,
      "learning_rate": 4.153108229867181e-06,
      "loss": 0.1665,
      "step": 3327
    },
    {
      "epoch": 0.3066291979545769,
      "grad_norm": 0.8748630059230683,
      "learning_rate": 4.1525366852994e-06,
      "loss": 0.1554,
      "step": 3328
    },
    {
      "epoch": 0.3067213341318469,
      "grad_norm": 0.8582455329358799,
      "learning_rate": 4.151964987293411e-06,
      "loss": 0.143,
      "step": 3329
    },
    {
      "epoch": 0.3068134703091169,
      "grad_norm": 0.8715858478170836,
      "learning_rate": 4.151393135902294e-06,
      "loss": 0.1583,
      "step": 3330
    },
    {
      "epoch": 0.3069056064863869,
      "grad_norm": 0.9516812753474873,
      "learning_rate": 4.150821131179148e-06,
      "loss": 0.1645,
      "step": 3331
    },
    {
      "epoch": 0.30699774266365687,
      "grad_norm": 0.9084710244319857,
      "learning_rate": 4.150248973177081e-06,
      "loss": 0.1507,
      "step": 3332
    },
    {
      "epoch": 0.30708987884092687,
      "grad_norm": 0.9165140829229909,
      "learning_rate": 4.14967666194922e-06,
      "loss": 0.1578,
      "step": 3333
    },
    {
      "epoch": 0.3071820150181969,
      "grad_norm": 0.8704866370447997,
      "learning_rate": 4.149104197548703e-06,
      "loss": 0.1517,
      "step": 3334
    },
    {
      "epoch": 0.3072741511954669,
      "grad_norm": 0.8936771673676237,
      "learning_rate": 4.148531580028685e-06,
      "loss": 0.1527,
      "step": 3335
    },
    {
      "epoch": 0.3073662873727369,
      "grad_norm": 0.9689189902531499,
      "learning_rate": 4.147958809442331e-06,
      "loss": 0.1379,
      "step": 3336
    },
    {
      "epoch": 0.3074584235500069,
      "grad_norm": 0.9222655925574594,
      "learning_rate": 4.147385885842824e-06,
      "loss": 0.1536,
      "step": 3337
    },
    {
      "epoch": 0.3075505597272769,
      "grad_norm": 0.9481217418968965,
      "learning_rate": 4.146812809283361e-06,
      "loss": 0.1663,
      "step": 3338
    },
    {
      "epoch": 0.3076426959045469,
      "grad_norm": 0.9071299429191669,
      "learning_rate": 4.14623957981715e-06,
      "loss": 0.1547,
      "step": 3339
    },
    {
      "epoch": 0.3077348320818169,
      "grad_norm": 0.9272233789397594,
      "learning_rate": 4.1456661974974185e-06,
      "loss": 0.1385,
      "step": 3340
    },
    {
      "epoch": 0.30782696825908695,
      "grad_norm": 0.9741525591963978,
      "learning_rate": 4.145092662377403e-06,
      "loss": 0.1641,
      "step": 3341
    },
    {
      "epoch": 0.30791910443635695,
      "grad_norm": 0.9253540203356433,
      "learning_rate": 4.144518974510358e-06,
      "loss": 0.17,
      "step": 3342
    },
    {
      "epoch": 0.30801124061362695,
      "grad_norm": 0.8975724542139679,
      "learning_rate": 4.143945133949547e-06,
      "loss": 0.1479,
      "step": 3343
    },
    {
      "epoch": 0.30810337679089694,
      "grad_norm": 0.9632358843689789,
      "learning_rate": 4.1433711407482544e-06,
      "loss": 0.1731,
      "step": 3344
    },
    {
      "epoch": 0.30819551296816694,
      "grad_norm": 1.0076660275039135,
      "learning_rate": 4.142796994959775e-06,
      "loss": 0.1857,
      "step": 3345
    },
    {
      "epoch": 0.30828764914543694,
      "grad_norm": 0.9007248222676459,
      "learning_rate": 4.142222696637417e-06,
      "loss": 0.1653,
      "step": 3346
    },
    {
      "epoch": 0.30837978532270693,
      "grad_norm": 0.8293657995382075,
      "learning_rate": 4.141648245834505e-06,
      "loss": 0.1557,
      "step": 3347
    },
    {
      "epoch": 0.308471921499977,
      "grad_norm": 0.9018556933312604,
      "learning_rate": 4.141073642604377e-06,
      "loss": 0.1507,
      "step": 3348
    },
    {
      "epoch": 0.308564057677247,
      "grad_norm": 1.0206437869118763,
      "learning_rate": 4.140498887000385e-06,
      "loss": 0.1612,
      "step": 3349
    },
    {
      "epoch": 0.308656193854517,
      "grad_norm": 0.8628938425077515,
      "learning_rate": 4.139923979075894e-06,
      "loss": 0.1537,
      "step": 3350
    },
    {
      "epoch": 0.308748330031787,
      "grad_norm": 0.9050313968713789,
      "learning_rate": 4.139348918884285e-06,
      "loss": 0.1655,
      "step": 3351
    },
    {
      "epoch": 0.308840466209057,
      "grad_norm": 0.9103712066570067,
      "learning_rate": 4.138773706478953e-06,
      "loss": 0.151,
      "step": 3352
    },
    {
      "epoch": 0.30893260238632697,
      "grad_norm": 0.92340284255135,
      "learning_rate": 4.138198341913305e-06,
      "loss": 0.1493,
      "step": 3353
    },
    {
      "epoch": 0.309024738563597,
      "grad_norm": 0.8688924319396153,
      "learning_rate": 4.137622825240767e-06,
      "loss": 0.1574,
      "step": 3354
    },
    {
      "epoch": 0.309116874740867,
      "grad_norm": 0.8993317455096554,
      "learning_rate": 4.1370471565147715e-06,
      "loss": 0.1575,
      "step": 3355
    },
    {
      "epoch": 0.309209010918137,
      "grad_norm": 0.928602372486308,
      "learning_rate": 4.1364713357887715e-06,
      "loss": 0.1543,
      "step": 3356
    },
    {
      "epoch": 0.309301147095407,
      "grad_norm": 1.061155027790231,
      "learning_rate": 4.1358953631162314e-06,
      "loss": 0.1598,
      "step": 3357
    },
    {
      "epoch": 0.309393283272677,
      "grad_norm": 0.9084105597716158,
      "learning_rate": 4.135319238550632e-06,
      "loss": 0.1559,
      "step": 3358
    },
    {
      "epoch": 0.309485419449947,
      "grad_norm": 0.9097245918542545,
      "learning_rate": 4.1347429621454645e-06,
      "loss": 0.1491,
      "step": 3359
    },
    {
      "epoch": 0.309577555627217,
      "grad_norm": 1.0176830209153604,
      "learning_rate": 4.134166533954238e-06,
      "loss": 0.1677,
      "step": 3360
    },
    {
      "epoch": 0.30966969180448706,
      "grad_norm": 0.9599751677181546,
      "learning_rate": 4.1335899540304715e-06,
      "loss": 0.1648,
      "step": 3361
    },
    {
      "epoch": 0.30976182798175705,
      "grad_norm": 0.9575989184802846,
      "learning_rate": 4.133013222427703e-06,
      "loss": 0.1663,
      "step": 3362
    },
    {
      "epoch": 0.30985396415902705,
      "grad_norm": 0.9533677266914239,
      "learning_rate": 4.132436339199481e-06,
      "loss": 0.1527,
      "step": 3363
    },
    {
      "epoch": 0.30994610033629705,
      "grad_norm": 0.943371411328731,
      "learning_rate": 4.131859304399368e-06,
      "loss": 0.1645,
      "step": 3364
    },
    {
      "epoch": 0.31003823651356704,
      "grad_norm": 0.9108660115026544,
      "learning_rate": 4.1312821180809445e-06,
      "loss": 0.1672,
      "step": 3365
    },
    {
      "epoch": 0.31013037269083704,
      "grad_norm": 0.930016137545348,
      "learning_rate": 4.130704780297801e-06,
      "loss": 0.1498,
      "step": 3366
    },
    {
      "epoch": 0.31022250886810704,
      "grad_norm": 0.9522554481405724,
      "learning_rate": 4.130127291103542e-06,
      "loss": 0.1644,
      "step": 3367
    },
    {
      "epoch": 0.3103146450453771,
      "grad_norm": 0.8842450520060663,
      "learning_rate": 4.129549650551788e-06,
      "loss": 0.1453,
      "step": 3368
    },
    {
      "epoch": 0.3104067812226471,
      "grad_norm": 0.9676290587085921,
      "learning_rate": 4.1289718586961755e-06,
      "loss": 0.1627,
      "step": 3369
    },
    {
      "epoch": 0.3104989173999171,
      "grad_norm": 0.8760194541205237,
      "learning_rate": 4.12839391559035e-06,
      "loss": 0.1573,
      "step": 3370
    },
    {
      "epoch": 0.3105910535771871,
      "grad_norm": 1.0120308223548502,
      "learning_rate": 4.127815821287973e-06,
      "loss": 0.1691,
      "step": 3371
    },
    {
      "epoch": 0.3106831897544571,
      "grad_norm": 1.0230226420993533,
      "learning_rate": 4.127237575842723e-06,
      "loss": 0.1727,
      "step": 3372
    },
    {
      "epoch": 0.3107753259317271,
      "grad_norm": 0.9527995171561136,
      "learning_rate": 4.126659179308289e-06,
      "loss": 0.167,
      "step": 3373
    },
    {
      "epoch": 0.3108674621089971,
      "grad_norm": 0.8802401223513894,
      "learning_rate": 4.126080631738374e-06,
      "loss": 0.1577,
      "step": 3374
    },
    {
      "epoch": 0.3109595982862671,
      "grad_norm": 1.0054768017771973,
      "learning_rate": 4.125501933186699e-06,
      "loss": 0.152,
      "step": 3375
    },
    {
      "epoch": 0.3110517344635371,
      "grad_norm": 0.9339483620066793,
      "learning_rate": 4.124923083706993e-06,
      "loss": 0.169,
      "step": 3376
    },
    {
      "epoch": 0.3111438706408071,
      "grad_norm": 0.891809746192897,
      "learning_rate": 4.124344083353005e-06,
      "loss": 0.1604,
      "step": 3377
    },
    {
      "epoch": 0.3112360068180771,
      "grad_norm": 0.9405125779039759,
      "learning_rate": 4.123764932178492e-06,
      "loss": 0.1537,
      "step": 3378
    },
    {
      "epoch": 0.3113281429953471,
      "grad_norm": 0.9114646595770788,
      "learning_rate": 4.123185630237233e-06,
      "loss": 0.1631,
      "step": 3379
    },
    {
      "epoch": 0.3114202791726171,
      "grad_norm": 0.9156889815669623,
      "learning_rate": 4.122606177583012e-06,
      "loss": 0.1571,
      "step": 3380
    },
    {
      "epoch": 0.3115124153498871,
      "grad_norm": 0.9232508366180471,
      "learning_rate": 4.122026574269633e-06,
      "loss": 0.1623,
      "step": 3381
    },
    {
      "epoch": 0.31160455152715716,
      "grad_norm": 0.9173313455540943,
      "learning_rate": 4.121446820350911e-06,
      "loss": 0.153,
      "step": 3382
    },
    {
      "epoch": 0.31169668770442716,
      "grad_norm": 0.9332944353788419,
      "learning_rate": 4.12086691588068e-06,
      "loss": 0.1565,
      "step": 3383
    },
    {
      "epoch": 0.31178882388169715,
      "grad_norm": 0.867962487245611,
      "learning_rate": 4.120286860912779e-06,
      "loss": 0.1477,
      "step": 3384
    },
    {
      "epoch": 0.31188096005896715,
      "grad_norm": 0.9925637374051337,
      "learning_rate": 4.11970665550107e-06,
      "loss": 0.1599,
      "step": 3385
    },
    {
      "epoch": 0.31197309623623715,
      "grad_norm": 0.9567564373610871,
      "learning_rate": 4.119126299699422e-06,
      "loss": 0.1695,
      "step": 3386
    },
    {
      "epoch": 0.31206523241350714,
      "grad_norm": 0.8915249939767821,
      "learning_rate": 4.118545793561724e-06,
      "loss": 0.1473,
      "step": 3387
    },
    {
      "epoch": 0.3121573685907772,
      "grad_norm": 0.9671789476742289,
      "learning_rate": 4.117965137141875e-06,
      "loss": 0.1586,
      "step": 3388
    },
    {
      "epoch": 0.3122495047680472,
      "grad_norm": 0.9078171347172409,
      "learning_rate": 4.117384330493789e-06,
      "loss": 0.143,
      "step": 3389
    },
    {
      "epoch": 0.3123416409453172,
      "grad_norm": 0.9564013420176967,
      "learning_rate": 4.1168033736713934e-06,
      "loss": 0.1657,
      "step": 3390
    },
    {
      "epoch": 0.3124337771225872,
      "grad_norm": 0.9466706325040476,
      "learning_rate": 4.116222266728631e-06,
      "loss": 0.1646,
      "step": 3391
    },
    {
      "epoch": 0.3125259132998572,
      "grad_norm": 0.9132831841251625,
      "learning_rate": 4.115641009719456e-06,
      "loss": 0.1468,
      "step": 3392
    },
    {
      "epoch": 0.3126180494771272,
      "grad_norm": 0.8936981888983082,
      "learning_rate": 4.11505960269784e-06,
      "loss": 0.1555,
      "step": 3393
    },
    {
      "epoch": 0.3127101856543972,
      "grad_norm": 0.9496755272541821,
      "learning_rate": 4.114478045717767e-06,
      "loss": 0.1644,
      "step": 3394
    },
    {
      "epoch": 0.31280232183166723,
      "grad_norm": 0.9503775314773306,
      "learning_rate": 4.113896338833233e-06,
      "loss": 0.1553,
      "step": 3395
    },
    {
      "epoch": 0.3128944580089372,
      "grad_norm": 1.0006972046347853,
      "learning_rate": 4.11331448209825e-06,
      "loss": 0.1795,
      "step": 3396
    },
    {
      "epoch": 0.3129865941862072,
      "grad_norm": 0.8495640118383059,
      "learning_rate": 4.112732475566844e-06,
      "loss": 0.1525,
      "step": 3397
    },
    {
      "epoch": 0.3130787303634772,
      "grad_norm": 0.9164288932535483,
      "learning_rate": 4.112150319293055e-06,
      "loss": 0.1616,
      "step": 3398
    },
    {
      "epoch": 0.3131708665407472,
      "grad_norm": 0.9069820705502305,
      "learning_rate": 4.111568013330933e-06,
      "loss": 0.1549,
      "step": 3399
    },
    {
      "epoch": 0.3132630027180172,
      "grad_norm": 0.8560780037635382,
      "learning_rate": 4.110985557734549e-06,
      "loss": 0.1411,
      "step": 3400
    },
    {
      "epoch": 0.3133551388952872,
      "grad_norm": 0.9304730799382744,
      "learning_rate": 4.110402952557982e-06,
      "loss": 0.1589,
      "step": 3401
    },
    {
      "epoch": 0.31344727507255726,
      "grad_norm": 0.9225084800079449,
      "learning_rate": 4.109820197855329e-06,
      "loss": 0.1565,
      "step": 3402
    },
    {
      "epoch": 0.31353941124982726,
      "grad_norm": 0.9449480858494125,
      "learning_rate": 4.109237293680697e-06,
      "loss": 0.1578,
      "step": 3403
    },
    {
      "epoch": 0.31363154742709726,
      "grad_norm": 0.7922549635079016,
      "learning_rate": 4.108654240088208e-06,
      "loss": 0.1331,
      "step": 3404
    },
    {
      "epoch": 0.31372368360436725,
      "grad_norm": 0.9651881798215124,
      "learning_rate": 4.1080710371319995e-06,
      "loss": 0.1516,
      "step": 3405
    },
    {
      "epoch": 0.31381581978163725,
      "grad_norm": 0.9823047227787114,
      "learning_rate": 4.107487684866224e-06,
      "loss": 0.1681,
      "step": 3406
    },
    {
      "epoch": 0.31390795595890725,
      "grad_norm": 0.8984657664496137,
      "learning_rate": 4.106904183345042e-06,
      "loss": 0.1454,
      "step": 3407
    },
    {
      "epoch": 0.31400009213617724,
      "grad_norm": 1.0948214742805147,
      "learning_rate": 4.106320532622635e-06,
      "loss": 0.1789,
      "step": 3408
    },
    {
      "epoch": 0.3140922283134473,
      "grad_norm": 1.0380192093032585,
      "learning_rate": 4.105736732753193e-06,
      "loss": 0.176,
      "step": 3409
    },
    {
      "epoch": 0.3141843644907173,
      "grad_norm": 0.8916083716066878,
      "learning_rate": 4.1051527837909225e-06,
      "loss": 0.1372,
      "step": 3410
    },
    {
      "epoch": 0.3142765006679873,
      "grad_norm": 0.9012931626518442,
      "learning_rate": 4.104568685790043e-06,
      "loss": 0.1487,
      "step": 3411
    },
    {
      "epoch": 0.3143686368452573,
      "grad_norm": 0.9137654798095866,
      "learning_rate": 4.103984438804789e-06,
      "loss": 0.1538,
      "step": 3412
    },
    {
      "epoch": 0.3144607730225273,
      "grad_norm": 0.9481939846524343,
      "learning_rate": 4.103400042889407e-06,
      "loss": 0.1637,
      "step": 3413
    },
    {
      "epoch": 0.3145529091997973,
      "grad_norm": 0.9071124252098365,
      "learning_rate": 4.102815498098159e-06,
      "loss": 0.1578,
      "step": 3414
    },
    {
      "epoch": 0.3146450453770673,
      "grad_norm": 0.8270852439467992,
      "learning_rate": 4.102230804485318e-06,
      "loss": 0.1495,
      "step": 3415
    },
    {
      "epoch": 0.31473718155433733,
      "grad_norm": 0.9502681634882719,
      "learning_rate": 4.101645962105176e-06,
      "loss": 0.163,
      "step": 3416
    },
    {
      "epoch": 0.3148293177316073,
      "grad_norm": 0.951552464274314,
      "learning_rate": 4.101060971012033e-06,
      "loss": 0.1591,
      "step": 3417
    },
    {
      "epoch": 0.3149214539088773,
      "grad_norm": 0.8949602924889337,
      "learning_rate": 4.100475831260208e-06,
      "loss": 0.1444,
      "step": 3418
    },
    {
      "epoch": 0.3150135900861473,
      "grad_norm": 0.9491012105424428,
      "learning_rate": 4.099890542904028e-06,
      "loss": 0.1576,
      "step": 3419
    },
    {
      "epoch": 0.3151057262634173,
      "grad_norm": 0.9076824336858172,
      "learning_rate": 4.0993051059978405e-06,
      "loss": 0.1549,
      "step": 3420
    },
    {
      "epoch": 0.3151978624406873,
      "grad_norm": 0.9139002533602671,
      "learning_rate": 4.098719520596e-06,
      "loss": 0.1417,
      "step": 3421
    },
    {
      "epoch": 0.31528999861795737,
      "grad_norm": 0.9239054987071181,
      "learning_rate": 4.098133786752881e-06,
      "loss": 0.1456,
      "step": 3422
    },
    {
      "epoch": 0.31538213479522736,
      "grad_norm": 0.9467114150066259,
      "learning_rate": 4.097547904522869e-06,
      "loss": 0.1505,
      "step": 3423
    },
    {
      "epoch": 0.31547427097249736,
      "grad_norm": 0.9140901271401887,
      "learning_rate": 4.09696187396036e-06,
      "loss": 0.1493,
      "step": 3424
    },
    {
      "epoch": 0.31556640714976736,
      "grad_norm": 0.9424231683243252,
      "learning_rate": 4.0963756951197695e-06,
      "loss": 0.1606,
      "step": 3425
    },
    {
      "epoch": 0.31565854332703736,
      "grad_norm": 0.9403766923503943,
      "learning_rate": 4.095789368055525e-06,
      "loss": 0.1664,
      "step": 3426
    },
    {
      "epoch": 0.31575067950430735,
      "grad_norm": 0.9218638685999803,
      "learning_rate": 4.095202892822066e-06,
      "loss": 0.1499,
      "step": 3427
    },
    {
      "epoch": 0.31584281568157735,
      "grad_norm": 0.9485215914433966,
      "learning_rate": 4.094616269473846e-06,
      "loss": 0.1698,
      "step": 3428
    },
    {
      "epoch": 0.3159349518588474,
      "grad_norm": 0.9196371332910769,
      "learning_rate": 4.0940294980653335e-06,
      "loss": 0.1596,
      "step": 3429
    },
    {
      "epoch": 0.3160270880361174,
      "grad_norm": 0.886507100543908,
      "learning_rate": 4.093442578651011e-06,
      "loss": 0.151,
      "step": 3430
    },
    {
      "epoch": 0.3161192242133874,
      "grad_norm": 0.8484028923939285,
      "learning_rate": 4.092855511285373e-06,
      "loss": 0.1416,
      "step": 3431
    },
    {
      "epoch": 0.3162113603906574,
      "grad_norm": 0.9073737296535767,
      "learning_rate": 4.09226829602293e-06,
      "loss": 0.1493,
      "step": 3432
    },
    {
      "epoch": 0.3163034965679274,
      "grad_norm": 0.967529401555553,
      "learning_rate": 4.091680932918205e-06,
      "loss": 0.1633,
      "step": 3433
    },
    {
      "epoch": 0.3163956327451974,
      "grad_norm": 0.9725392063430839,
      "learning_rate": 4.091093422025733e-06,
      "loss": 0.1687,
      "step": 3434
    },
    {
      "epoch": 0.3164877689224674,
      "grad_norm": 0.9128546072500832,
      "learning_rate": 4.090505763400065e-06,
      "loss": 0.1624,
      "step": 3435
    },
    {
      "epoch": 0.31657990509973744,
      "grad_norm": 0.8497060725564581,
      "learning_rate": 4.089917957095767e-06,
      "loss": 0.1473,
      "step": 3436
    },
    {
      "epoch": 0.31667204127700743,
      "grad_norm": 0.936515788653949,
      "learning_rate": 4.089330003167416e-06,
      "loss": 0.1532,
      "step": 3437
    },
    {
      "epoch": 0.31676417745427743,
      "grad_norm": 0.9506803605861734,
      "learning_rate": 4.088741901669601e-06,
      "loss": 0.1418,
      "step": 3438
    },
    {
      "epoch": 0.3168563136315474,
      "grad_norm": 0.9006173689048483,
      "learning_rate": 4.088153652656932e-06,
      "loss": 0.1606,
      "step": 3439
    },
    {
      "epoch": 0.3169484498088174,
      "grad_norm": 0.9289187640854032,
      "learning_rate": 4.087565256184024e-06,
      "loss": 0.1566,
      "step": 3440
    },
    {
      "epoch": 0.3170405859860874,
      "grad_norm": 1.0591258145699214,
      "learning_rate": 4.086976712305511e-06,
      "loss": 0.1799,
      "step": 3441
    },
    {
      "epoch": 0.3171327221633574,
      "grad_norm": 0.9400847693025363,
      "learning_rate": 4.08638802107604e-06,
      "loss": 0.1616,
      "step": 3442
    },
    {
      "epoch": 0.31722485834062747,
      "grad_norm": 0.9298557372660742,
      "learning_rate": 4.0857991825502696e-06,
      "loss": 0.1676,
      "step": 3443
    },
    {
      "epoch": 0.31731699451789747,
      "grad_norm": 0.9800661183069009,
      "learning_rate": 4.085210196782875e-06,
      "loss": 0.1604,
      "step": 3444
    },
    {
      "epoch": 0.31740913069516746,
      "grad_norm": 1.0754863588778008,
      "learning_rate": 4.084621063828544e-06,
      "loss": 0.1738,
      "step": 3445
    },
    {
      "epoch": 0.31750126687243746,
      "grad_norm": 0.9468907496454307,
      "learning_rate": 4.0840317837419754e-06,
      "loss": 0.1716,
      "step": 3446
    },
    {
      "epoch": 0.31759340304970746,
      "grad_norm": 0.9500032382526354,
      "learning_rate": 4.083442356577886e-06,
      "loss": 0.1568,
      "step": 3447
    },
    {
      "epoch": 0.31768553922697745,
      "grad_norm": 1.03361821973418,
      "learning_rate": 4.082852782391003e-06,
      "loss": 0.1646,
      "step": 3448
    },
    {
      "epoch": 0.31777767540424745,
      "grad_norm": 0.9627491594944207,
      "learning_rate": 4.0822630612360685e-06,
      "loss": 0.159,
      "step": 3449
    },
    {
      "epoch": 0.3178698115815175,
      "grad_norm": 0.8662665250817492,
      "learning_rate": 4.081673193167839e-06,
      "loss": 0.1522,
      "step": 3450
    },
    {
      "epoch": 0.3179619477587875,
      "grad_norm": 0.8937920972998911,
      "learning_rate": 4.081083178241083e-06,
      "loss": 0.1493,
      "step": 3451
    },
    {
      "epoch": 0.3180540839360575,
      "grad_norm": 0.9479929284339497,
      "learning_rate": 4.080493016510583e-06,
      "loss": 0.1639,
      "step": 3452
    },
    {
      "epoch": 0.3181462201133275,
      "grad_norm": 0.9187532135497326,
      "learning_rate": 4.079902708031137e-06,
      "loss": 0.1601,
      "step": 3453
    },
    {
      "epoch": 0.3182383562905975,
      "grad_norm": 0.9144021033769467,
      "learning_rate": 4.079312252857556e-06,
      "loss": 0.1541,
      "step": 3454
    },
    {
      "epoch": 0.3183304924678675,
      "grad_norm": 0.9191213571527707,
      "learning_rate": 4.07872165104466e-06,
      "loss": 0.1623,
      "step": 3455
    },
    {
      "epoch": 0.31842262864513754,
      "grad_norm": 0.9511674669274025,
      "learning_rate": 4.07813090264729e-06,
      "loss": 0.1597,
      "step": 3456
    },
    {
      "epoch": 0.31851476482240754,
      "grad_norm": 0.9798337339733897,
      "learning_rate": 4.077540007720295e-06,
      "loss": 0.1722,
      "step": 3457
    },
    {
      "epoch": 0.31860690099967753,
      "grad_norm": 0.8879213314696275,
      "learning_rate": 4.076948966318542e-06,
      "loss": 0.1491,
      "step": 3458
    },
    {
      "epoch": 0.31869903717694753,
      "grad_norm": 0.8721348850025189,
      "learning_rate": 4.076357778496906e-06,
      "loss": 0.1519,
      "step": 3459
    },
    {
      "epoch": 0.3187911733542175,
      "grad_norm": 0.9450063837612857,
      "learning_rate": 4.075766444310282e-06,
      "loss": 0.1484,
      "step": 3460
    },
    {
      "epoch": 0.3188833095314875,
      "grad_norm": 0.9015677020824875,
      "learning_rate": 4.075174963813574e-06,
      "loss": 0.1551,
      "step": 3461
    },
    {
      "epoch": 0.3189754457087575,
      "grad_norm": 0.9398397674223026,
      "learning_rate": 4.0745833370617e-06,
      "loss": 0.1598,
      "step": 3462
    },
    {
      "epoch": 0.3190675818860276,
      "grad_norm": 0.9545054887697545,
      "learning_rate": 4.073991564109595e-06,
      "loss": 0.1535,
      "step": 3463
    },
    {
      "epoch": 0.31915971806329757,
      "grad_norm": 0.935516966266871,
      "learning_rate": 4.073399645012203e-06,
      "loss": 0.1534,
      "step": 3464
    },
    {
      "epoch": 0.31925185424056757,
      "grad_norm": 0.9004407095982404,
      "learning_rate": 4.072807579824485e-06,
      "loss": 0.1729,
      "step": 3465
    },
    {
      "epoch": 0.31934399041783756,
      "grad_norm": 0.9059623238664691,
      "learning_rate": 4.072215368601414e-06,
      "loss": 0.1619,
      "step": 3466
    },
    {
      "epoch": 0.31943612659510756,
      "grad_norm": 0.8742229100315807,
      "learning_rate": 4.0716230113979766e-06,
      "loss": 0.1502,
      "step": 3467
    },
    {
      "epoch": 0.31952826277237756,
      "grad_norm": 1.0188211123923578,
      "learning_rate": 4.071030508269173e-06,
      "loss": 0.1699,
      "step": 3468
    },
    {
      "epoch": 0.31962039894964756,
      "grad_norm": 0.8903732181525056,
      "learning_rate": 4.070437859270019e-06,
      "loss": 0.1637,
      "step": 3469
    },
    {
      "epoch": 0.3197125351269176,
      "grad_norm": 0.9856901177717559,
      "learning_rate": 4.06984506445554e-06,
      "loss": 0.1581,
      "step": 3470
    },
    {
      "epoch": 0.3198046713041876,
      "grad_norm": 0.9279233569958244,
      "learning_rate": 4.069252123880777e-06,
      "loss": 0.1619,
      "step": 3471
    },
    {
      "epoch": 0.3198968074814576,
      "grad_norm": 0.9172143149312972,
      "learning_rate": 4.068659037600786e-06,
      "loss": 0.1542,
      "step": 3472
    },
    {
      "epoch": 0.3199889436587276,
      "grad_norm": 0.8131584393868659,
      "learning_rate": 4.068065805670635e-06,
      "loss": 0.1417,
      "step": 3473
    },
    {
      "epoch": 0.3200810798359976,
      "grad_norm": 0.9034854285537024,
      "learning_rate": 4.067472428145405e-06,
      "loss": 0.1494,
      "step": 3474
    },
    {
      "epoch": 0.3201732160132676,
      "grad_norm": 0.8541692198778182,
      "learning_rate": 4.066878905080191e-06,
      "loss": 0.1396,
      "step": 3475
    },
    {
      "epoch": 0.3202653521905376,
      "grad_norm": 0.9365299991439768,
      "learning_rate": 4.066285236530103e-06,
      "loss": 0.1597,
      "step": 3476
    },
    {
      "epoch": 0.32035748836780764,
      "grad_norm": 0.9592597644111814,
      "learning_rate": 4.065691422550261e-06,
      "loss": 0.1512,
      "step": 3477
    },
    {
      "epoch": 0.32044962454507764,
      "grad_norm": 0.8863394888331791,
      "learning_rate": 4.065097463195803e-06,
      "loss": 0.1592,
      "step": 3478
    },
    {
      "epoch": 0.32054176072234764,
      "grad_norm": 0.8543865524815278,
      "learning_rate": 4.064503358521876e-06,
      "loss": 0.1459,
      "step": 3479
    },
    {
      "epoch": 0.32063389689961763,
      "grad_norm": 0.8962676770429008,
      "learning_rate": 4.063909108583644e-06,
      "loss": 0.1553,
      "step": 3480
    },
    {
      "epoch": 0.32072603307688763,
      "grad_norm": 0.871977731951131,
      "learning_rate": 4.063314713436283e-06,
      "loss": 0.1656,
      "step": 3481
    },
    {
      "epoch": 0.3208181692541576,
      "grad_norm": 1.001308123062294,
      "learning_rate": 4.062720173134983e-06,
      "loss": 0.1709,
      "step": 3482
    },
    {
      "epoch": 0.3209103054314276,
      "grad_norm": 0.8730340921167583,
      "learning_rate": 4.062125487734947e-06,
      "loss": 0.151,
      "step": 3483
    },
    {
      "epoch": 0.3210024416086977,
      "grad_norm": 0.9399620155764555,
      "learning_rate": 4.06153065729139e-06,
      "loss": 0.1477,
      "step": 3484
    },
    {
      "epoch": 0.32109457778596767,
      "grad_norm": 0.9154550750631788,
      "learning_rate": 4.060935681859545e-06,
      "loss": 0.1509,
      "step": 3485
    },
    {
      "epoch": 0.32118671396323767,
      "grad_norm": 0.9360146752265455,
      "learning_rate": 4.060340561494654e-06,
      "loss": 0.1606,
      "step": 3486
    },
    {
      "epoch": 0.32127885014050767,
      "grad_norm": 0.9870281583727557,
      "learning_rate": 4.059745296251972e-06,
      "loss": 0.1612,
      "step": 3487
    },
    {
      "epoch": 0.32137098631777766,
      "grad_norm": 0.9446020683476672,
      "learning_rate": 4.059149886186773e-06,
      "loss": 0.1586,
      "step": 3488
    },
    {
      "epoch": 0.32146312249504766,
      "grad_norm": 0.9370826914836032,
      "learning_rate": 4.058554331354339e-06,
      "loss": 0.1608,
      "step": 3489
    },
    {
      "epoch": 0.3215552586723177,
      "grad_norm": 0.9222591973137391,
      "learning_rate": 4.057958631809967e-06,
      "loss": 0.1549,
      "step": 3490
    },
    {
      "epoch": 0.3216473948495877,
      "grad_norm": 0.915092079185541,
      "learning_rate": 4.057362787608969e-06,
      "loss": 0.1546,
      "step": 3491
    },
    {
      "epoch": 0.3217395310268577,
      "grad_norm": 0.9413157010417782,
      "learning_rate": 4.056766798806668e-06,
      "loss": 0.1484,
      "step": 3492
    },
    {
      "epoch": 0.3218316672041277,
      "grad_norm": 0.9019348756364096,
      "learning_rate": 4.056170665458403e-06,
      "loss": 0.1511,
      "step": 3493
    },
    {
      "epoch": 0.3219238033813977,
      "grad_norm": 0.9600914185028901,
      "learning_rate": 4.055574387619524e-06,
      "loss": 0.1733,
      "step": 3494
    },
    {
      "epoch": 0.3220159395586677,
      "grad_norm": 0.911767587313382,
      "learning_rate": 4.054977965345396e-06,
      "loss": 0.1658,
      "step": 3495
    },
    {
      "epoch": 0.3221080757359377,
      "grad_norm": 0.9238148568525669,
      "learning_rate": 4.054381398691396e-06,
      "loss": 0.1651,
      "step": 3496
    },
    {
      "epoch": 0.32220021191320775,
      "grad_norm": 0.8606339124055112,
      "learning_rate": 4.053784687712916e-06,
      "loss": 0.1521,
      "step": 3497
    },
    {
      "epoch": 0.32229234809047774,
      "grad_norm": 0.8899508622289252,
      "learning_rate": 4.05318783246536e-06,
      "loss": 0.1452,
      "step": 3498
    },
    {
      "epoch": 0.32238448426774774,
      "grad_norm": 0.8955622026127069,
      "learning_rate": 4.052590833004147e-06,
      "loss": 0.1554,
      "step": 3499
    },
    {
      "epoch": 0.32247662044501774,
      "grad_norm": 0.824248867010417,
      "learning_rate": 4.051993689384709e-06,
      "loss": 0.1421,
      "step": 3500
    },
    {
      "epoch": 0.32247662044501774,
      "eval_loss": 0.15785863995552063,
      "eval_runtime": 300.4481,
      "eval_samples_per_second": 23.355,
      "eval_steps_per_second": 2.922,
      "step": 3500
    },
    {
      "epoch": 0.32256875662228773,
      "grad_norm": 0.8684284205940074,
      "learning_rate": 4.051396401662489e-06,
      "loss": 0.1431,
      "step": 3501
    },
    {
      "epoch": 0.32266089279955773,
      "grad_norm": 1.0345536582306027,
      "learning_rate": 4.050798969892946e-06,
      "loss": 0.1672,
      "step": 3502
    },
    {
      "epoch": 0.3227530289768277,
      "grad_norm": 0.8593899874719152,
      "learning_rate": 4.050201394131551e-06,
      "loss": 0.1409,
      "step": 3503
    },
    {
      "epoch": 0.3228451651540978,
      "grad_norm": 0.9223597678224997,
      "learning_rate": 4.049603674433791e-06,
      "loss": 0.168,
      "step": 3504
    },
    {
      "epoch": 0.3229373013313678,
      "grad_norm": 0.888252713703813,
      "learning_rate": 4.049005810855163e-06,
      "loss": 0.1679,
      "step": 3505
    },
    {
      "epoch": 0.3230294375086378,
      "grad_norm": 0.9718784879317928,
      "learning_rate": 4.048407803451178e-06,
      "loss": 0.1591,
      "step": 3506
    },
    {
      "epoch": 0.32312157368590777,
      "grad_norm": 0.8838193993880871,
      "learning_rate": 4.047809652277362e-06,
      "loss": 0.1505,
      "step": 3507
    },
    {
      "epoch": 0.32321370986317777,
      "grad_norm": 0.8915524847095565,
      "learning_rate": 4.047211357389254e-06,
      "loss": 0.1685,
      "step": 3508
    },
    {
      "epoch": 0.32330584604044776,
      "grad_norm": 0.873930483845192,
      "learning_rate": 4.046612918842405e-06,
      "loss": 0.1543,
      "step": 3509
    },
    {
      "epoch": 0.32339798221771776,
      "grad_norm": 0.8735696475644171,
      "learning_rate": 4.0460143366923785e-06,
      "loss": 0.1578,
      "step": 3510
    },
    {
      "epoch": 0.3234901183949878,
      "grad_norm": 0.8588041851689829,
      "learning_rate": 4.045415610994755e-06,
      "loss": 0.157,
      "step": 3511
    },
    {
      "epoch": 0.3235822545722578,
      "grad_norm": 0.8657789207118362,
      "learning_rate": 4.044816741805127e-06,
      "loss": 0.1471,
      "step": 3512
    },
    {
      "epoch": 0.3236743907495278,
      "grad_norm": 0.859079362850816,
      "learning_rate": 4.044217729179097e-06,
      "loss": 0.1583,
      "step": 3513
    },
    {
      "epoch": 0.3237665269267978,
      "grad_norm": 0.9071782136735049,
      "learning_rate": 4.043618573172286e-06,
      "loss": 0.1645,
      "step": 3514
    },
    {
      "epoch": 0.3238586631040678,
      "grad_norm": 0.9391842378302793,
      "learning_rate": 4.043019273840323e-06,
      "loss": 0.1599,
      "step": 3515
    },
    {
      "epoch": 0.3239507992813378,
      "grad_norm": 0.9179852158772025,
      "learning_rate": 4.042419831238855e-06,
      "loss": 0.1671,
      "step": 3516
    },
    {
      "epoch": 0.3240429354586078,
      "grad_norm": 0.936687179289209,
      "learning_rate": 4.041820245423539e-06,
      "loss": 0.1489,
      "step": 3517
    },
    {
      "epoch": 0.32413507163587785,
      "grad_norm": 0.8751747372246792,
      "learning_rate": 4.041220516450048e-06,
      "loss": 0.1429,
      "step": 3518
    },
    {
      "epoch": 0.32422720781314784,
      "grad_norm": 0.9622948464619779,
      "learning_rate": 4.040620644374066e-06,
      "loss": 0.1714,
      "step": 3519
    },
    {
      "epoch": 0.32431934399041784,
      "grad_norm": 0.8881873414992707,
      "learning_rate": 4.0400206292512914e-06,
      "loss": 0.1358,
      "step": 3520
    },
    {
      "epoch": 0.32441148016768784,
      "grad_norm": 0.9365575801168411,
      "learning_rate": 4.039420471137435e-06,
      "loss": 0.1687,
      "step": 3521
    },
    {
      "epoch": 0.32450361634495783,
      "grad_norm": 0.9525223912847359,
      "learning_rate": 4.038820170088223e-06,
      "loss": 0.162,
      "step": 3522
    },
    {
      "epoch": 0.32459575252222783,
      "grad_norm": 0.8896341990118014,
      "learning_rate": 4.0382197261593925e-06,
      "loss": 0.1672,
      "step": 3523
    },
    {
      "epoch": 0.3246878886994979,
      "grad_norm": 1.0189292647998855,
      "learning_rate": 4.037619139406695e-06,
      "loss": 0.1676,
      "step": 3524
    },
    {
      "epoch": 0.3247800248767679,
      "grad_norm": 0.902643694703534,
      "learning_rate": 4.037018409885894e-06,
      "loss": 0.1641,
      "step": 3525
    },
    {
      "epoch": 0.3248721610540379,
      "grad_norm": 0.867488131643773,
      "learning_rate": 4.036417537652769e-06,
      "loss": 0.1431,
      "step": 3526
    },
    {
      "epoch": 0.3249642972313079,
      "grad_norm": 0.9114760774953333,
      "learning_rate": 4.03581652276311e-06,
      "loss": 0.1563,
      "step": 3527
    },
    {
      "epoch": 0.32505643340857787,
      "grad_norm": 0.847393565850545,
      "learning_rate": 4.035215365272722e-06,
      "loss": 0.1403,
      "step": 3528
    },
    {
      "epoch": 0.32514856958584787,
      "grad_norm": 0.9601802498635071,
      "learning_rate": 4.034614065237421e-06,
      "loss": 0.1717,
      "step": 3529
    },
    {
      "epoch": 0.32524070576311787,
      "grad_norm": 0.8451113466394516,
      "learning_rate": 4.034012622713041e-06,
      "loss": 0.1323,
      "step": 3530
    },
    {
      "epoch": 0.3253328419403879,
      "grad_norm": 0.9540096973545658,
      "learning_rate": 4.033411037755422e-06,
      "loss": 0.163,
      "step": 3531
    },
    {
      "epoch": 0.3254249781176579,
      "grad_norm": 0.9029760011942667,
      "learning_rate": 4.032809310420424e-06,
      "loss": 0.1638,
      "step": 3532
    },
    {
      "epoch": 0.3255171142949279,
      "grad_norm": 0.8653493696897545,
      "learning_rate": 4.032207440763915e-06,
      "loss": 0.1482,
      "step": 3533
    },
    {
      "epoch": 0.3256092504721979,
      "grad_norm": 0.9002695038758083,
      "learning_rate": 4.0316054288417825e-06,
      "loss": 0.1626,
      "step": 3534
    },
    {
      "epoch": 0.3257013866494679,
      "grad_norm": 0.9292035928119098,
      "learning_rate": 4.031003274709919e-06,
      "loss": 0.1565,
      "step": 3535
    },
    {
      "epoch": 0.3257935228267379,
      "grad_norm": 0.9373344296369921,
      "learning_rate": 4.0304009784242385e-06,
      "loss": 0.1767,
      "step": 3536
    },
    {
      "epoch": 0.3258856590040079,
      "grad_norm": 0.8566406083660746,
      "learning_rate": 4.029798540040661e-06,
      "loss": 0.1393,
      "step": 3537
    },
    {
      "epoch": 0.32597779518127795,
      "grad_norm": 0.9507352638450343,
      "learning_rate": 4.029195959615125e-06,
      "loss": 0.1465,
      "step": 3538
    },
    {
      "epoch": 0.32606993135854795,
      "grad_norm": 0.907988452302607,
      "learning_rate": 4.02859323720358e-06,
      "loss": 0.1587,
      "step": 3539
    },
    {
      "epoch": 0.32616206753581795,
      "grad_norm": 0.9271425625270764,
      "learning_rate": 4.027990372861989e-06,
      "loss": 0.1569,
      "step": 3540
    },
    {
      "epoch": 0.32625420371308794,
      "grad_norm": 0.9450417681972197,
      "learning_rate": 4.027387366646326e-06,
      "loss": 0.1594,
      "step": 3541
    },
    {
      "epoch": 0.32634633989035794,
      "grad_norm": 0.9513022280524657,
      "learning_rate": 4.026784218612581e-06,
      "loss": 0.1675,
      "step": 3542
    },
    {
      "epoch": 0.32643847606762794,
      "grad_norm": 0.9200859147183892,
      "learning_rate": 4.026180928816759e-06,
      "loss": 0.1609,
      "step": 3543
    },
    {
      "epoch": 0.32653061224489793,
      "grad_norm": 1.0053375898691645,
      "learning_rate": 4.0255774973148735e-06,
      "loss": 0.1638,
      "step": 3544
    },
    {
      "epoch": 0.326622748422168,
      "grad_norm": 0.9025225354859596,
      "learning_rate": 4.024973924162952e-06,
      "loss": 0.1462,
      "step": 3545
    },
    {
      "epoch": 0.326714884599438,
      "grad_norm": 0.9576990817667488,
      "learning_rate": 4.024370209417037e-06,
      "loss": 0.1593,
      "step": 3546
    },
    {
      "epoch": 0.326807020776708,
      "grad_norm": 0.9525658664676164,
      "learning_rate": 4.0237663531331855e-06,
      "loss": 0.1598,
      "step": 3547
    },
    {
      "epoch": 0.326899156953978,
      "grad_norm": 0.8995010015032531,
      "learning_rate": 4.023162355367464e-06,
      "loss": 0.1483,
      "step": 3548
    },
    {
      "epoch": 0.326991293131248,
      "grad_norm": 0.9037322000936574,
      "learning_rate": 4.022558216175953e-06,
      "loss": 0.1507,
      "step": 3549
    },
    {
      "epoch": 0.32708342930851797,
      "grad_norm": 0.8937200904467155,
      "learning_rate": 4.021953935614748e-06,
      "loss": 0.1442,
      "step": 3550
    },
    {
      "epoch": 0.32717556548578797,
      "grad_norm": 0.9984501569219848,
      "learning_rate": 4.021349513739956e-06,
      "loss": 0.1755,
      "step": 3551
    },
    {
      "epoch": 0.327267701663058,
      "grad_norm": 0.8965440594336742,
      "learning_rate": 4.020744950607699e-06,
      "loss": 0.1573,
      "step": 3552
    },
    {
      "epoch": 0.327359837840328,
      "grad_norm": 0.9156102418861106,
      "learning_rate": 4.020140246274109e-06,
      "loss": 0.1599,
      "step": 3553
    },
    {
      "epoch": 0.327451974017598,
      "grad_norm": 0.9645827093836922,
      "learning_rate": 4.019535400795333e-06,
      "loss": 0.1746,
      "step": 3554
    },
    {
      "epoch": 0.327544110194868,
      "grad_norm": 0.8814855920188206,
      "learning_rate": 4.018930414227533e-06,
      "loss": 0.1558,
      "step": 3555
    },
    {
      "epoch": 0.327636246372138,
      "grad_norm": 0.983414207184116,
      "learning_rate": 4.018325286626879e-06,
      "loss": 0.1592,
      "step": 3556
    },
    {
      "epoch": 0.327728382549408,
      "grad_norm": 0.919976809129956,
      "learning_rate": 4.017720018049559e-06,
      "loss": 0.1532,
      "step": 3557
    },
    {
      "epoch": 0.32782051872667806,
      "grad_norm": 0.9291452608705151,
      "learning_rate": 4.017114608551772e-06,
      "loss": 0.1601,
      "step": 3558
    },
    {
      "epoch": 0.32791265490394805,
      "grad_norm": 0.8852683619080132,
      "learning_rate": 4.016509058189731e-06,
      "loss": 0.1505,
      "step": 3559
    },
    {
      "epoch": 0.32800479108121805,
      "grad_norm": 0.9186159403047259,
      "learning_rate": 4.0159033670196605e-06,
      "loss": 0.1629,
      "step": 3560
    },
    {
      "epoch": 0.32809692725848805,
      "grad_norm": 0.9844743262655814,
      "learning_rate": 4.0152975350978e-06,
      "loss": 0.1588,
      "step": 3561
    },
    {
      "epoch": 0.32818906343575804,
      "grad_norm": 0.8595561775845129,
      "learning_rate": 4.0146915624803985e-06,
      "loss": 0.1556,
      "step": 3562
    },
    {
      "epoch": 0.32828119961302804,
      "grad_norm": 0.9446841396028918,
      "learning_rate": 4.014085449223724e-06,
      "loss": 0.1581,
      "step": 3563
    },
    {
      "epoch": 0.32837333579029804,
      "grad_norm": 1.0023786637300742,
      "learning_rate": 4.013479195384051e-06,
      "loss": 0.1686,
      "step": 3564
    },
    {
      "epoch": 0.3284654719675681,
      "grad_norm": 0.9068328433386368,
      "learning_rate": 4.012872801017673e-06,
      "loss": 0.156,
      "step": 3565
    },
    {
      "epoch": 0.3285576081448381,
      "grad_norm": 0.9627651441762631,
      "learning_rate": 4.012266266180892e-06,
      "loss": 0.1477,
      "step": 3566
    },
    {
      "epoch": 0.3286497443221081,
      "grad_norm": 0.8659019527581846,
      "learning_rate": 4.011659590930026e-06,
      "loss": 0.1515,
      "step": 3567
    },
    {
      "epoch": 0.3287418804993781,
      "grad_norm": 0.8270948116690996,
      "learning_rate": 4.011052775321405e-06,
      "loss": 0.1422,
      "step": 3568
    },
    {
      "epoch": 0.3288340166766481,
      "grad_norm": 0.9782404984989339,
      "learning_rate": 4.010445819411369e-06,
      "loss": 0.1684,
      "step": 3569
    },
    {
      "epoch": 0.3289261528539181,
      "grad_norm": 0.8829362934235169,
      "learning_rate": 4.009838723256278e-06,
      "loss": 0.1522,
      "step": 3570
    },
    {
      "epoch": 0.32901828903118807,
      "grad_norm": 0.9407625480923458,
      "learning_rate": 4.009231486912498e-06,
      "loss": 0.1656,
      "step": 3571
    },
    {
      "epoch": 0.3291104252084581,
      "grad_norm": 0.8848548864415823,
      "learning_rate": 4.008624110436413e-06,
      "loss": 0.1527,
      "step": 3572
    },
    {
      "epoch": 0.3292025613857281,
      "grad_norm": 0.9301909339470641,
      "learning_rate": 4.008016593884416e-06,
      "loss": 0.1546,
      "step": 3573
    },
    {
      "epoch": 0.3292946975629981,
      "grad_norm": 0.9320406389327086,
      "learning_rate": 4.0074089373129165e-06,
      "loss": 0.1522,
      "step": 3574
    },
    {
      "epoch": 0.3293868337402681,
      "grad_norm": 0.8886539369086722,
      "learning_rate": 4.006801140778335e-06,
      "loss": 0.1472,
      "step": 3575
    },
    {
      "epoch": 0.3294789699175381,
      "grad_norm": 0.9537007448167414,
      "learning_rate": 4.006193204337106e-06,
      "loss": 0.1604,
      "step": 3576
    },
    {
      "epoch": 0.3295711060948081,
      "grad_norm": 0.8619622908907076,
      "learning_rate": 4.005585128045675e-06,
      "loss": 0.1431,
      "step": 3577
    },
    {
      "epoch": 0.3296632422720781,
      "grad_norm": 0.9035882099898597,
      "learning_rate": 4.004976911960503e-06,
      "loss": 0.1629,
      "step": 3578
    },
    {
      "epoch": 0.32975537844934816,
      "grad_norm": 0.9564187518876925,
      "learning_rate": 4.004368556138062e-06,
      "loss": 0.1551,
      "step": 3579
    },
    {
      "epoch": 0.32984751462661815,
      "grad_norm": 0.8805677607577047,
      "learning_rate": 4.003760060634839e-06,
      "loss": 0.1552,
      "step": 3580
    },
    {
      "epoch": 0.32993965080388815,
      "grad_norm": 0.8140917034555333,
      "learning_rate": 4.003151425507333e-06,
      "loss": 0.1515,
      "step": 3581
    },
    {
      "epoch": 0.33003178698115815,
      "grad_norm": 0.9419332145704289,
      "learning_rate": 4.002542650812056e-06,
      "loss": 0.1705,
      "step": 3582
    },
    {
      "epoch": 0.33012392315842815,
      "grad_norm": 0.8431186709355069,
      "learning_rate": 4.001933736605531e-06,
      "loss": 0.1577,
      "step": 3583
    },
    {
      "epoch": 0.33021605933569814,
      "grad_norm": 0.9288861117347793,
      "learning_rate": 4.001324682944297e-06,
      "loss": 0.1623,
      "step": 3584
    },
    {
      "epoch": 0.3303081955129682,
      "grad_norm": 0.9538522526825776,
      "learning_rate": 4.000715489884906e-06,
      "loss": 0.1561,
      "step": 3585
    },
    {
      "epoch": 0.3304003316902382,
      "grad_norm": 0.8871395638894523,
      "learning_rate": 4.000106157483919e-06,
      "loss": 0.1475,
      "step": 3586
    },
    {
      "epoch": 0.3304924678675082,
      "grad_norm": 0.9324362611728906,
      "learning_rate": 3.999496685797914e-06,
      "loss": 0.1643,
      "step": 3587
    },
    {
      "epoch": 0.3305846040447782,
      "grad_norm": 0.9712778225713625,
      "learning_rate": 3.998887074883481e-06,
      "loss": 0.16,
      "step": 3588
    },
    {
      "epoch": 0.3306767402220482,
      "grad_norm": 0.9202172732232993,
      "learning_rate": 3.9982773247972204e-06,
      "loss": 0.1718,
      "step": 3589
    },
    {
      "epoch": 0.3307688763993182,
      "grad_norm": 0.9304776376651178,
      "learning_rate": 3.99766743559575e-06,
      "loss": 0.1617,
      "step": 3590
    },
    {
      "epoch": 0.3308610125765882,
      "grad_norm": 0.8970303287240191,
      "learning_rate": 3.997057407335697e-06,
      "loss": 0.1537,
      "step": 3591
    },
    {
      "epoch": 0.33095314875385823,
      "grad_norm": 0.9275835584300335,
      "learning_rate": 3.996447240073702e-06,
      "loss": 0.1617,
      "step": 3592
    },
    {
      "epoch": 0.3310452849311282,
      "grad_norm": 0.917024390620923,
      "learning_rate": 3.995836933866421e-06,
      "loss": 0.1493,
      "step": 3593
    },
    {
      "epoch": 0.3311374211083982,
      "grad_norm": 0.8871456134644057,
      "learning_rate": 3.995226488770519e-06,
      "loss": 0.1603,
      "step": 3594
    },
    {
      "epoch": 0.3312295572856682,
      "grad_norm": 0.9237380887364842,
      "learning_rate": 3.994615904842676e-06,
      "loss": 0.1612,
      "step": 3595
    },
    {
      "epoch": 0.3313216934629382,
      "grad_norm": 0.990513202871709,
      "learning_rate": 3.994005182139586e-06,
      "loss": 0.1813,
      "step": 3596
    },
    {
      "epoch": 0.3314138296402082,
      "grad_norm": 0.8947980698760637,
      "learning_rate": 3.993394320717952e-06,
      "loss": 0.1597,
      "step": 3597
    },
    {
      "epoch": 0.3315059658174782,
      "grad_norm": 0.8966130531365468,
      "learning_rate": 3.992783320634498e-06,
      "loss": 0.1464,
      "step": 3598
    },
    {
      "epoch": 0.33159810199474826,
      "grad_norm": 0.9026903822003577,
      "learning_rate": 3.992172181945951e-06,
      "loss": 0.1658,
      "step": 3599
    },
    {
      "epoch": 0.33169023817201826,
      "grad_norm": 0.9037038636134237,
      "learning_rate": 3.991560904709055e-06,
      "loss": 0.166,
      "step": 3600
    },
    {
      "epoch": 0.33178237434928826,
      "grad_norm": 0.9647233382895593,
      "learning_rate": 3.990949488980569e-06,
      "loss": 0.161,
      "step": 3601
    },
    {
      "epoch": 0.33187451052655825,
      "grad_norm": 0.9559015439299133,
      "learning_rate": 3.990337934817263e-06,
      "loss": 0.1685,
      "step": 3602
    },
    {
      "epoch": 0.33196664670382825,
      "grad_norm": 0.8896214610619697,
      "learning_rate": 3.989726242275919e-06,
      "loss": 0.1489,
      "step": 3603
    },
    {
      "epoch": 0.33205878288109825,
      "grad_norm": 0.9415012706281953,
      "learning_rate": 3.989114411413333e-06,
      "loss": 0.1636,
      "step": 3604
    },
    {
      "epoch": 0.33215091905836824,
      "grad_norm": 0.9134402537640711,
      "learning_rate": 3.988502442286314e-06,
      "loss": 0.1496,
      "step": 3605
    },
    {
      "epoch": 0.3322430552356383,
      "grad_norm": 0.8943155472774135,
      "learning_rate": 3.987890334951683e-06,
      "loss": 0.1521,
      "step": 3606
    },
    {
      "epoch": 0.3323351914129083,
      "grad_norm": 0.9865044679709729,
      "learning_rate": 3.987278089466274e-06,
      "loss": 0.1754,
      "step": 3607
    },
    {
      "epoch": 0.3324273275901783,
      "grad_norm": 0.9439032580162637,
      "learning_rate": 3.986665705886934e-06,
      "loss": 0.1557,
      "step": 3608
    },
    {
      "epoch": 0.3325194637674483,
      "grad_norm": 0.8531927782870514,
      "learning_rate": 3.986053184270524e-06,
      "loss": 0.1541,
      "step": 3609
    },
    {
      "epoch": 0.3326115999447183,
      "grad_norm": 0.9242320549076447,
      "learning_rate": 3.9854405246739155e-06,
      "loss": 0.1632,
      "step": 3610
    },
    {
      "epoch": 0.3327037361219883,
      "grad_norm": 0.9688725514867598,
      "learning_rate": 3.984827727153995e-06,
      "loss": 0.1606,
      "step": 3611
    },
    {
      "epoch": 0.3327958722992583,
      "grad_norm": 0.9171240245963733,
      "learning_rate": 3.984214791767659e-06,
      "loss": 0.1548,
      "step": 3612
    },
    {
      "epoch": 0.33288800847652833,
      "grad_norm": 0.9001926653232694,
      "learning_rate": 3.983601718571821e-06,
      "loss": 0.1577,
      "step": 3613
    },
    {
      "epoch": 0.3329801446537983,
      "grad_norm": 0.8394494549911041,
      "learning_rate": 3.982988507623403e-06,
      "loss": 0.1532,
      "step": 3614
    },
    {
      "epoch": 0.3330722808310683,
      "grad_norm": 0.8980142878215236,
      "learning_rate": 3.982375158979344e-06,
      "loss": 0.1573,
      "step": 3615
    },
    {
      "epoch": 0.3331644170083383,
      "grad_norm": 0.883954929348229,
      "learning_rate": 3.98176167269659e-06,
      "loss": 0.1479,
      "step": 3616
    },
    {
      "epoch": 0.3332565531856083,
      "grad_norm": 0.9567135747589431,
      "learning_rate": 3.981148048832106e-06,
      "loss": 0.1608,
      "step": 3617
    },
    {
      "epoch": 0.3333486893628783,
      "grad_norm": 0.8848169559034494,
      "learning_rate": 3.980534287442866e-06,
      "loss": 0.1521,
      "step": 3618
    },
    {
      "epoch": 0.33344082554014837,
      "grad_norm": 0.9362944149112743,
      "learning_rate": 3.9799203885858584e-06,
      "loss": 0.1519,
      "step": 3619
    },
    {
      "epoch": 0.33353296171741836,
      "grad_norm": 0.8514694979616128,
      "learning_rate": 3.979306352318083e-06,
      "loss": 0.1425,
      "step": 3620
    },
    {
      "epoch": 0.33362509789468836,
      "grad_norm": 0.8970641312249314,
      "learning_rate": 3.978692178696555e-06,
      "loss": 0.1494,
      "step": 3621
    },
    {
      "epoch": 0.33371723407195836,
      "grad_norm": 0.9766601140568355,
      "learning_rate": 3.9780778677782974e-06,
      "loss": 0.1709,
      "step": 3622
    },
    {
      "epoch": 0.33380937024922835,
      "grad_norm": 0.9966149520552534,
      "learning_rate": 3.977463419620352e-06,
      "loss": 0.1779,
      "step": 3623
    },
    {
      "epoch": 0.33390150642649835,
      "grad_norm": 0.9568555752316749,
      "learning_rate": 3.976848834279767e-06,
      "loss": 0.1615,
      "step": 3624
    },
    {
      "epoch": 0.33399364260376835,
      "grad_norm": 0.919492432719039,
      "learning_rate": 3.976234111813611e-06,
      "loss": 0.1568,
      "step": 3625
    },
    {
      "epoch": 0.3340857787810384,
      "grad_norm": 0.9378481548085038,
      "learning_rate": 3.975619252278958e-06,
      "loss": 0.1648,
      "step": 3626
    },
    {
      "epoch": 0.3341779149583084,
      "grad_norm": 0.9358170612115819,
      "learning_rate": 3.9750042557328986e-06,
      "loss": 0.1494,
      "step": 3627
    },
    {
      "epoch": 0.3342700511355784,
      "grad_norm": 0.892577374929836,
      "learning_rate": 3.974389122232536e-06,
      "loss": 0.1505,
      "step": 3628
    },
    {
      "epoch": 0.3343621873128484,
      "grad_norm": 0.8907012935380049,
      "learning_rate": 3.973773851834983e-06,
      "loss": 0.1441,
      "step": 3629
    },
    {
      "epoch": 0.3344543234901184,
      "grad_norm": 0.9104396913688365,
      "learning_rate": 3.973158444597371e-06,
      "loss": 0.1566,
      "step": 3630
    },
    {
      "epoch": 0.3345464596673884,
      "grad_norm": 1.0018321492907019,
      "learning_rate": 3.972542900576838e-06,
      "loss": 0.1642,
      "step": 3631
    },
    {
      "epoch": 0.3346385958446584,
      "grad_norm": 1.0043463523252036,
      "learning_rate": 3.9719272198305385e-06,
      "loss": 0.1546,
      "step": 3632
    },
    {
      "epoch": 0.33473073202192843,
      "grad_norm": 0.9422176961049771,
      "learning_rate": 3.971311402415638e-06,
      "loss": 0.154,
      "step": 3633
    },
    {
      "epoch": 0.33482286819919843,
      "grad_norm": 0.9556246249227124,
      "learning_rate": 3.970695448389315e-06,
      "loss": 0.1566,
      "step": 3634
    },
    {
      "epoch": 0.33491500437646843,
      "grad_norm": 0.9922729925416031,
      "learning_rate": 3.970079357808763e-06,
      "loss": 0.1522,
      "step": 3635
    },
    {
      "epoch": 0.3350071405537384,
      "grad_norm": 0.9364461746995512,
      "learning_rate": 3.969463130731183e-06,
      "loss": 0.1658,
      "step": 3636
    },
    {
      "epoch": 0.3350992767310084,
      "grad_norm": 0.9077754495843189,
      "learning_rate": 3.968846767213794e-06,
      "loss": 0.1638,
      "step": 3637
    },
    {
      "epoch": 0.3351914129082784,
      "grad_norm": 0.9642592983646728,
      "learning_rate": 3.968230267313824e-06,
      "loss": 0.1607,
      "step": 3638
    },
    {
      "epoch": 0.3352835490855484,
      "grad_norm": 0.9523862140574059,
      "learning_rate": 3.967613631088516e-06,
      "loss": 0.1602,
      "step": 3639
    },
    {
      "epoch": 0.33537568526281847,
      "grad_norm": 0.853738339970035,
      "learning_rate": 3.966996858595123e-06,
      "loss": 0.1621,
      "step": 3640
    },
    {
      "epoch": 0.33546782144008847,
      "grad_norm": 0.9435129135901393,
      "learning_rate": 3.966379949890916e-06,
      "loss": 0.1479,
      "step": 3641
    },
    {
      "epoch": 0.33555995761735846,
      "grad_norm": 1.0237409152349508,
      "learning_rate": 3.965762905033171e-06,
      "loss": 0.1442,
      "step": 3642
    },
    {
      "epoch": 0.33565209379462846,
      "grad_norm": 1.004827274729739,
      "learning_rate": 3.965145724079184e-06,
      "loss": 0.1727,
      "step": 3643
    },
    {
      "epoch": 0.33574422997189846,
      "grad_norm": 0.9012970937284178,
      "learning_rate": 3.964528407086259e-06,
      "loss": 0.1582,
      "step": 3644
    },
    {
      "epoch": 0.33583636614916845,
      "grad_norm": 0.9084801893441767,
      "learning_rate": 3.963910954111712e-06,
      "loss": 0.1427,
      "step": 3645
    },
    {
      "epoch": 0.33592850232643845,
      "grad_norm": 1.0458487380125516,
      "learning_rate": 3.9632933652128765e-06,
      "loss": 0.1634,
      "step": 3646
    },
    {
      "epoch": 0.3360206385037085,
      "grad_norm": 0.9627427187542494,
      "learning_rate": 3.962675640447094e-06,
      "loss": 0.1768,
      "step": 3647
    },
    {
      "epoch": 0.3361127746809785,
      "grad_norm": 0.942436602646475,
      "learning_rate": 3.962057779871722e-06,
      "loss": 0.169,
      "step": 3648
    },
    {
      "epoch": 0.3362049108582485,
      "grad_norm": 1.0233983022133986,
      "learning_rate": 3.961439783544126e-06,
      "loss": 0.1548,
      "step": 3649
    },
    {
      "epoch": 0.3362970470355185,
      "grad_norm": 0.8673192470735941,
      "learning_rate": 3.960821651521691e-06,
      "loss": 0.1531,
      "step": 3650
    },
    {
      "epoch": 0.3363891832127885,
      "grad_norm": 0.9154833282149483,
      "learning_rate": 3.960203383861807e-06,
      "loss": 0.1662,
      "step": 3651
    },
    {
      "epoch": 0.3364813193900585,
      "grad_norm": 0.9145509274938834,
      "learning_rate": 3.959584980621883e-06,
      "loss": 0.1519,
      "step": 3652
    },
    {
      "epoch": 0.33657345556732854,
      "grad_norm": 0.9274327627736129,
      "learning_rate": 3.958966441859335e-06,
      "loss": 0.161,
      "step": 3653
    },
    {
      "epoch": 0.33666559174459854,
      "grad_norm": 0.93938164864867,
      "learning_rate": 3.958347767631595e-06,
      "loss": 0.1478,
      "step": 3654
    },
    {
      "epoch": 0.33675772792186853,
      "grad_norm": 1.0533654788708426,
      "learning_rate": 3.95772895799611e-06,
      "loss": 0.1577,
      "step": 3655
    },
    {
      "epoch": 0.33684986409913853,
      "grad_norm": 0.8832765620148048,
      "learning_rate": 3.957110013010333e-06,
      "loss": 0.1574,
      "step": 3656
    },
    {
      "epoch": 0.3369420002764085,
      "grad_norm": 0.9261190045148706,
      "learning_rate": 3.9564909327317355e-06,
      "loss": 0.1531,
      "step": 3657
    },
    {
      "epoch": 0.3370341364536785,
      "grad_norm": 0.9237710676896893,
      "learning_rate": 3.955871717217797e-06,
      "loss": 0.1433,
      "step": 3658
    },
    {
      "epoch": 0.3371262726309485,
      "grad_norm": 0.9487715568740481,
      "learning_rate": 3.955252366526014e-06,
      "loss": 0.1626,
      "step": 3659
    },
    {
      "epoch": 0.3372184088082186,
      "grad_norm": 1.0072562563839196,
      "learning_rate": 3.954632880713891e-06,
      "loss": 0.1569,
      "step": 3660
    },
    {
      "epoch": 0.33731054498548857,
      "grad_norm": 0.9446053903813388,
      "learning_rate": 3.954013259838949e-06,
      "loss": 0.1612,
      "step": 3661
    },
    {
      "epoch": 0.33740268116275857,
      "grad_norm": 0.8729387006885364,
      "learning_rate": 3.95339350395872e-06,
      "loss": 0.1407,
      "step": 3662
    },
    {
      "epoch": 0.33749481734002856,
      "grad_norm": 0.9221098538932074,
      "learning_rate": 3.952773613130747e-06,
      "loss": 0.177,
      "step": 3663
    },
    {
      "epoch": 0.33758695351729856,
      "grad_norm": 0.86712001263126,
      "learning_rate": 3.9521535874125875e-06,
      "loss": 0.1529,
      "step": 3664
    },
    {
      "epoch": 0.33767908969456856,
      "grad_norm": 0.8922847012391321,
      "learning_rate": 3.951533426861812e-06,
      "loss": 0.1519,
      "step": 3665
    },
    {
      "epoch": 0.33777122587183855,
      "grad_norm": 0.940770601309715,
      "learning_rate": 3.950913131536001e-06,
      "loss": 0.1431,
      "step": 3666
    },
    {
      "epoch": 0.3378633620491086,
      "grad_norm": 0.9514263978222713,
      "learning_rate": 3.950292701492749e-06,
      "loss": 0.1693,
      "step": 3667
    },
    {
      "epoch": 0.3379554982263786,
      "grad_norm": 0.8291431585281795,
      "learning_rate": 3.949672136789665e-06,
      "loss": 0.138,
      "step": 3668
    },
    {
      "epoch": 0.3380476344036486,
      "grad_norm": 0.9627031886750439,
      "learning_rate": 3.949051437484367e-06,
      "loss": 0.1486,
      "step": 3669
    },
    {
      "epoch": 0.3381397705809186,
      "grad_norm": 0.9203209058165884,
      "learning_rate": 3.948430603634486e-06,
      "loss": 0.1483,
      "step": 3670
    },
    {
      "epoch": 0.3382319067581886,
      "grad_norm": 0.8850216723638966,
      "learning_rate": 3.947809635297668e-06,
      "loss": 0.1517,
      "step": 3671
    },
    {
      "epoch": 0.3383240429354586,
      "grad_norm": 0.9675759754223425,
      "learning_rate": 3.9471885325315695e-06,
      "loss": 0.1727,
      "step": 3672
    },
    {
      "epoch": 0.3384161791127286,
      "grad_norm": 0.9573188172538333,
      "learning_rate": 3.94656729539386e-06,
      "loss": 0.1555,
      "step": 3673
    },
    {
      "epoch": 0.33850831528999864,
      "grad_norm": 0.9600208385443318,
      "learning_rate": 3.945945923942221e-06,
      "loss": 0.1635,
      "step": 3674
    },
    {
      "epoch": 0.33860045146726864,
      "grad_norm": 0.9380222087962892,
      "learning_rate": 3.945324418234349e-06,
      "loss": 0.1637,
      "step": 3675
    },
    {
      "epoch": 0.33869258764453863,
      "grad_norm": 0.9400532586305578,
      "learning_rate": 3.944702778327948e-06,
      "loss": 0.1479,
      "step": 3676
    },
    {
      "epoch": 0.33878472382180863,
      "grad_norm": 0.8849537883410138,
      "learning_rate": 3.944081004280738e-06,
      "loss": 0.1577,
      "step": 3677
    },
    {
      "epoch": 0.33887685999907863,
      "grad_norm": 0.893123818162334,
      "learning_rate": 3.943459096150452e-06,
      "loss": 0.1505,
      "step": 3678
    },
    {
      "epoch": 0.3389689961763486,
      "grad_norm": 0.902068247100491,
      "learning_rate": 3.942837053994834e-06,
      "loss": 0.1561,
      "step": 3679
    },
    {
      "epoch": 0.3390611323536186,
      "grad_norm": 0.9182592324604583,
      "learning_rate": 3.942214877871639e-06,
      "loss": 0.1631,
      "step": 3680
    },
    {
      "epoch": 0.3391532685308887,
      "grad_norm": 0.9270539702602028,
      "learning_rate": 3.941592567838638e-06,
      "loss": 0.1599,
      "step": 3681
    },
    {
      "epoch": 0.33924540470815867,
      "grad_norm": 0.869777337642264,
      "learning_rate": 3.940970123953613e-06,
      "loss": 0.1481,
      "step": 3682
    },
    {
      "epoch": 0.33933754088542867,
      "grad_norm": 0.86276183059958,
      "learning_rate": 3.940347546274355e-06,
      "loss": 0.156,
      "step": 3683
    },
    {
      "epoch": 0.33942967706269866,
      "grad_norm": 0.9137393832409828,
      "learning_rate": 3.9397248348586735e-06,
      "loss": 0.1548,
      "step": 3684
    },
    {
      "epoch": 0.33952181323996866,
      "grad_norm": 0.8612576797475836,
      "learning_rate": 3.939101989764386e-06,
      "loss": 0.1437,
      "step": 3685
    },
    {
      "epoch": 0.33961394941723866,
      "grad_norm": 0.8495987603787247,
      "learning_rate": 3.938479011049324e-06,
      "loss": 0.1368,
      "step": 3686
    },
    {
      "epoch": 0.3397060855945087,
      "grad_norm": 0.9270752934332105,
      "learning_rate": 3.937855898771331e-06,
      "loss": 0.1525,
      "step": 3687
    },
    {
      "epoch": 0.3397982217717787,
      "grad_norm": 0.9485150781196504,
      "learning_rate": 3.9372326529882635e-06,
      "loss": 0.172,
      "step": 3688
    },
    {
      "epoch": 0.3398903579490487,
      "grad_norm": 0.9116673543666898,
      "learning_rate": 3.936609273757988e-06,
      "loss": 0.1527,
      "step": 3689
    },
    {
      "epoch": 0.3399824941263187,
      "grad_norm": 0.9229394432273127,
      "learning_rate": 3.935985761138388e-06,
      "loss": 0.1564,
      "step": 3690
    },
    {
      "epoch": 0.3400746303035887,
      "grad_norm": 0.8605203140126699,
      "learning_rate": 3.935362115187356e-06,
      "loss": 0.1446,
      "step": 3691
    },
    {
      "epoch": 0.3401667664808587,
      "grad_norm": 0.9058126161505262,
      "learning_rate": 3.934738335962796e-06,
      "loss": 0.1689,
      "step": 3692
    },
    {
      "epoch": 0.3402589026581287,
      "grad_norm": 0.8737393124424059,
      "learning_rate": 3.934114423522627e-06,
      "loss": 0.1464,
      "step": 3693
    },
    {
      "epoch": 0.34035103883539874,
      "grad_norm": 0.9203947578164606,
      "learning_rate": 3.93349037792478e-06,
      "loss": 0.1603,
      "step": 3694
    },
    {
      "epoch": 0.34044317501266874,
      "grad_norm": 0.871739564957307,
      "learning_rate": 3.932866199227196e-06,
      "loss": 0.1527,
      "step": 3695
    },
    {
      "epoch": 0.34053531118993874,
      "grad_norm": 0.8450165346429883,
      "learning_rate": 3.932241887487834e-06,
      "loss": 0.1608,
      "step": 3696
    },
    {
      "epoch": 0.34062744736720874,
      "grad_norm": 0.9199114792500082,
      "learning_rate": 3.931617442764656e-06,
      "loss": 0.1627,
      "step": 3697
    },
    {
      "epoch": 0.34071958354447873,
      "grad_norm": 0.9050247845325442,
      "learning_rate": 3.930992865115645e-06,
      "loss": 0.1604,
      "step": 3698
    },
    {
      "epoch": 0.34081171972174873,
      "grad_norm": 0.9139956710219851,
      "learning_rate": 3.930368154598793e-06,
      "loss": 0.1572,
      "step": 3699
    },
    {
      "epoch": 0.3409038558990187,
      "grad_norm": 0.8631013927561422,
      "learning_rate": 3.929743311272104e-06,
      "loss": 0.1598,
      "step": 3700
    },
    {
      "epoch": 0.3409959920762888,
      "grad_norm": 0.8943273656977337,
      "learning_rate": 3.929118335193594e-06,
      "loss": 0.1538,
      "step": 3701
    },
    {
      "epoch": 0.3410881282535588,
      "grad_norm": 0.8850875675008057,
      "learning_rate": 3.9284932264212925e-06,
      "loss": 0.1595,
      "step": 3702
    },
    {
      "epoch": 0.3411802644308288,
      "grad_norm": 0.9126308299525047,
      "learning_rate": 3.927867985013242e-06,
      "loss": 0.1596,
      "step": 3703
    },
    {
      "epoch": 0.34127240060809877,
      "grad_norm": 0.9408328545751462,
      "learning_rate": 3.9272426110274955e-06,
      "loss": 0.1655,
      "step": 3704
    },
    {
      "epoch": 0.34136453678536877,
      "grad_norm": 0.9617011692840643,
      "learning_rate": 3.926617104522118e-06,
      "loss": 0.1766,
      "step": 3705
    },
    {
      "epoch": 0.34145667296263876,
      "grad_norm": 0.9237258704007577,
      "learning_rate": 3.92599146555519e-06,
      "loss": 0.1627,
      "step": 3706
    },
    {
      "epoch": 0.34154880913990876,
      "grad_norm": 0.9690704139681786,
      "learning_rate": 3.9253656941848e-06,
      "loss": 0.1639,
      "step": 3707
    },
    {
      "epoch": 0.3416409453171788,
      "grad_norm": 0.9982076700789548,
      "learning_rate": 3.9247397904690526e-06,
      "loss": 0.1669,
      "step": 3708
    },
    {
      "epoch": 0.3417330814944488,
      "grad_norm": 0.9471486226369275,
      "learning_rate": 3.924113754466062e-06,
      "loss": 0.1528,
      "step": 3709
    },
    {
      "epoch": 0.3418252176717188,
      "grad_norm": 0.9104829825095512,
      "learning_rate": 3.923487586233956e-06,
      "loss": 0.153,
      "step": 3710
    },
    {
      "epoch": 0.3419173538489888,
      "grad_norm": 0.956036911754247,
      "learning_rate": 3.922861285830874e-06,
      "loss": 0.1599,
      "step": 3711
    },
    {
      "epoch": 0.3420094900262588,
      "grad_norm": 0.8919804779273773,
      "learning_rate": 3.922234853314969e-06,
      "loss": 0.1563,
      "step": 3712
    },
    {
      "epoch": 0.3421016262035288,
      "grad_norm": 0.8444794902726142,
      "learning_rate": 3.921608288744405e-06,
      "loss": 0.1571,
      "step": 3713
    },
    {
      "epoch": 0.3421937623807988,
      "grad_norm": 0.8359544466312437,
      "learning_rate": 3.920981592177358e-06,
      "loss": 0.1327,
      "step": 3714
    },
    {
      "epoch": 0.34228589855806885,
      "grad_norm": 0.9597616019869197,
      "learning_rate": 3.920354763672017e-06,
      "loss": 0.1589,
      "step": 3715
    },
    {
      "epoch": 0.34237803473533884,
      "grad_norm": 0.838642030459525,
      "learning_rate": 3.9197278032865835e-06,
      "loss": 0.146,
      "step": 3716
    },
    {
      "epoch": 0.34247017091260884,
      "grad_norm": 0.9531398921954629,
      "learning_rate": 3.919100711079271e-06,
      "loss": 0.1544,
      "step": 3717
    },
    {
      "epoch": 0.34256230708987884,
      "grad_norm": 0.9489530931832708,
      "learning_rate": 3.918473487108305e-06,
      "loss": 0.1521,
      "step": 3718
    },
    {
      "epoch": 0.34265444326714883,
      "grad_norm": 0.9305730738295115,
      "learning_rate": 3.917846131431923e-06,
      "loss": 0.1532,
      "step": 3719
    },
    {
      "epoch": 0.34274657944441883,
      "grad_norm": 0.9741820408835091,
      "learning_rate": 3.917218644108375e-06,
      "loss": 0.1548,
      "step": 3720
    },
    {
      "epoch": 0.3428387156216889,
      "grad_norm": 1.0039496017353446,
      "learning_rate": 3.916591025195923e-06,
      "loss": 0.1618,
      "step": 3721
    },
    {
      "epoch": 0.3429308517989589,
      "grad_norm": 0.975277791866181,
      "learning_rate": 3.915963274752842e-06,
      "loss": 0.1598,
      "step": 3722
    },
    {
      "epoch": 0.3430229879762289,
      "grad_norm": 0.9510130783589238,
      "learning_rate": 3.915335392837418e-06,
      "loss": 0.1608,
      "step": 3723
    },
    {
      "epoch": 0.3431151241534989,
      "grad_norm": 0.9307546556982514,
      "learning_rate": 3.914707379507952e-06,
      "loss": 0.1429,
      "step": 3724
    },
    {
      "epoch": 0.34320726033076887,
      "grad_norm": 0.9390570411218914,
      "learning_rate": 3.914079234822752e-06,
      "loss": 0.1562,
      "step": 3725
    },
    {
      "epoch": 0.34329939650803887,
      "grad_norm": 0.8986706044646279,
      "learning_rate": 3.913450958840144e-06,
      "loss": 0.1487,
      "step": 3726
    },
    {
      "epoch": 0.34339153268530886,
      "grad_norm": 0.9437748108420285,
      "learning_rate": 3.912822551618461e-06,
      "loss": 0.1618,
      "step": 3727
    },
    {
      "epoch": 0.3434836688625789,
      "grad_norm": 0.8769176358148374,
      "learning_rate": 3.912194013216053e-06,
      "loss": 0.1582,
      "step": 3728
    },
    {
      "epoch": 0.3435758050398489,
      "grad_norm": 0.8809803011658778,
      "learning_rate": 3.911565343691279e-06,
      "loss": 0.1441,
      "step": 3729
    },
    {
      "epoch": 0.3436679412171189,
      "grad_norm": 1.0717667235658699,
      "learning_rate": 3.910936543102511e-06,
      "loss": 0.1624,
      "step": 3730
    },
    {
      "epoch": 0.3437600773943889,
      "grad_norm": 0.8995225605847589,
      "learning_rate": 3.910307611508133e-06,
      "loss": 0.1491,
      "step": 3731
    },
    {
      "epoch": 0.3438522135716589,
      "grad_norm": 0.9128137084403737,
      "learning_rate": 3.9096785489665405e-06,
      "loss": 0.146,
      "step": 3732
    },
    {
      "epoch": 0.3439443497489289,
      "grad_norm": 0.9476732140767965,
      "learning_rate": 3.9090493555361445e-06,
      "loss": 0.1623,
      "step": 3733
    },
    {
      "epoch": 0.3440364859261989,
      "grad_norm": 0.8873546249523853,
      "learning_rate": 3.908420031275363e-06,
      "loss": 0.1448,
      "step": 3734
    },
    {
      "epoch": 0.34412862210346895,
      "grad_norm": 0.9694870317753858,
      "learning_rate": 3.907790576242631e-06,
      "loss": 0.159,
      "step": 3735
    },
    {
      "epoch": 0.34422075828073895,
      "grad_norm": 0.8987721407984873,
      "learning_rate": 3.907160990496392e-06,
      "loss": 0.1518,
      "step": 3736
    },
    {
      "epoch": 0.34431289445800894,
      "grad_norm": 0.8835955419714732,
      "learning_rate": 3.9065312740951035e-06,
      "loss": 0.1551,
      "step": 3737
    },
    {
      "epoch": 0.34440503063527894,
      "grad_norm": 0.9036042800205629,
      "learning_rate": 3.905901427097235e-06,
      "loss": 0.1389,
      "step": 3738
    },
    {
      "epoch": 0.34449716681254894,
      "grad_norm": 0.9288485278514401,
      "learning_rate": 3.9052714495612675e-06,
      "loss": 0.1479,
      "step": 3739
    },
    {
      "epoch": 0.34458930298981894,
      "grad_norm": 0.832981619207698,
      "learning_rate": 3.904641341545694e-06,
      "loss": 0.143,
      "step": 3740
    },
    {
      "epoch": 0.34468143916708893,
      "grad_norm": 0.8967549193356065,
      "learning_rate": 3.904011103109022e-06,
      "loss": 0.1571,
      "step": 3741
    },
    {
      "epoch": 0.344773575344359,
      "grad_norm": 1.0107971066325179,
      "learning_rate": 3.903380734309767e-06,
      "loss": 0.1667,
      "step": 3742
    },
    {
      "epoch": 0.344865711521629,
      "grad_norm": 0.9379380617255512,
      "learning_rate": 3.90275023520646e-06,
      "loss": 0.1539,
      "step": 3743
    },
    {
      "epoch": 0.344957847698899,
      "grad_norm": 0.8811406590696291,
      "learning_rate": 3.902119605857644e-06,
      "loss": 0.1546,
      "step": 3744
    },
    {
      "epoch": 0.345049983876169,
      "grad_norm": 0.8930279758698314,
      "learning_rate": 3.90148884632187e-06,
      "loss": 0.135,
      "step": 3745
    },
    {
      "epoch": 0.34514212005343897,
      "grad_norm": 0.9641434925117012,
      "learning_rate": 3.900857956657707e-06,
      "loss": 0.1506,
      "step": 3746
    },
    {
      "epoch": 0.34523425623070897,
      "grad_norm": 0.9518783618290042,
      "learning_rate": 3.900226936923731e-06,
      "loss": 0.1406,
      "step": 3747
    },
    {
      "epoch": 0.34532639240797897,
      "grad_norm": 0.9648681621137352,
      "learning_rate": 3.899595787178534e-06,
      "loss": 0.1551,
      "step": 3748
    },
    {
      "epoch": 0.345418528585249,
      "grad_norm": 0.92389069120726,
      "learning_rate": 3.898964507480717e-06,
      "loss": 0.147,
      "step": 3749
    },
    {
      "epoch": 0.345510664762519,
      "grad_norm": 0.9264829787786033,
      "learning_rate": 3.8983330978888955e-06,
      "loss": 0.1523,
      "step": 3750
    },
    {
      "epoch": 0.345602800939789,
      "grad_norm": 0.9856484497779876,
      "learning_rate": 3.897701558461695e-06,
      "loss": 0.1709,
      "step": 3751
    },
    {
      "epoch": 0.345694937117059,
      "grad_norm": 0.9104132120981927,
      "learning_rate": 3.897069889257754e-06,
      "loss": 0.1615,
      "step": 3752
    },
    {
      "epoch": 0.345787073294329,
      "grad_norm": 0.8928954674177243,
      "learning_rate": 3.8964380903357244e-06,
      "loss": 0.1491,
      "step": 3753
    },
    {
      "epoch": 0.345879209471599,
      "grad_norm": 0.9773042752590833,
      "learning_rate": 3.895806161754267e-06,
      "loss": 0.1555,
      "step": 3754
    },
    {
      "epoch": 0.34597134564886906,
      "grad_norm": 0.9934249525249464,
      "learning_rate": 3.895174103572057e-06,
      "loss": 0.1739,
      "step": 3755
    },
    {
      "epoch": 0.34606348182613905,
      "grad_norm": 0.9725688234311249,
      "learning_rate": 3.894541915847783e-06,
      "loss": 0.1661,
      "step": 3756
    },
    {
      "epoch": 0.34615561800340905,
      "grad_norm": 0.9447443585635159,
      "learning_rate": 3.89390959864014e-06,
      "loss": 0.159,
      "step": 3757
    },
    {
      "epoch": 0.34624775418067905,
      "grad_norm": 0.8845525119997683,
      "learning_rate": 3.893277152007842e-06,
      "loss": 0.1549,
      "step": 3758
    },
    {
      "epoch": 0.34633989035794904,
      "grad_norm": 0.8523390340352258,
      "learning_rate": 3.89264457600961e-06,
      "loss": 0.1441,
      "step": 3759
    },
    {
      "epoch": 0.34643202653521904,
      "grad_norm": 0.9244438703386956,
      "learning_rate": 3.892011870704179e-06,
      "loss": 0.1581,
      "step": 3760
    },
    {
      "epoch": 0.34652416271248904,
      "grad_norm": 0.9038773135362822,
      "learning_rate": 3.891379036150297e-06,
      "loss": 0.1408,
      "step": 3761
    },
    {
      "epoch": 0.3466162988897591,
      "grad_norm": 0.8333273764873563,
      "learning_rate": 3.89074607240672e-06,
      "loss": 0.1503,
      "step": 3762
    },
    {
      "epoch": 0.3467084350670291,
      "grad_norm": 0.9480337430008207,
      "learning_rate": 3.890112979532222e-06,
      "loss": 0.1541,
      "step": 3763
    },
    {
      "epoch": 0.3468005712442991,
      "grad_norm": 0.9599410814646354,
      "learning_rate": 3.889479757585584e-06,
      "loss": 0.1665,
      "step": 3764
    },
    {
      "epoch": 0.3468927074215691,
      "grad_norm": 0.9433260473515176,
      "learning_rate": 3.888846406625601e-06,
      "loss": 0.1454,
      "step": 3765
    },
    {
      "epoch": 0.3469848435988391,
      "grad_norm": 0.9380561623033904,
      "learning_rate": 3.888212926711079e-06,
      "loss": 0.1573,
      "step": 3766
    },
    {
      "epoch": 0.3470769797761091,
      "grad_norm": 1.0103890768413186,
      "learning_rate": 3.887579317900838e-06,
      "loss": 0.1512,
      "step": 3767
    },
    {
      "epoch": 0.34716911595337907,
      "grad_norm": 0.9545274750885263,
      "learning_rate": 3.886945580253708e-06,
      "loss": 0.1507,
      "step": 3768
    },
    {
      "epoch": 0.3472612521306491,
      "grad_norm": 0.8545779025060927,
      "learning_rate": 3.886311713828531e-06,
      "loss": 0.134,
      "step": 3769
    },
    {
      "epoch": 0.3473533883079191,
      "grad_norm": 0.9256233241238948,
      "learning_rate": 3.885677718684163e-06,
      "loss": 0.1652,
      "step": 3770
    },
    {
      "epoch": 0.3474455244851891,
      "grad_norm": 0.8881952279984489,
      "learning_rate": 3.885043594879469e-06,
      "loss": 0.1515,
      "step": 3771
    },
    {
      "epoch": 0.3475376606624591,
      "grad_norm": 0.9239248201411169,
      "learning_rate": 3.884409342473329e-06,
      "loss": 0.1722,
      "step": 3772
    },
    {
      "epoch": 0.3476297968397291,
      "grad_norm": 0.9022748249991301,
      "learning_rate": 3.883774961524632e-06,
      "loss": 0.1591,
      "step": 3773
    },
    {
      "epoch": 0.3477219330169991,
      "grad_norm": 0.9727583904335542,
      "learning_rate": 3.88314045209228e-06,
      "loss": 0.1586,
      "step": 3774
    },
    {
      "epoch": 0.3478140691942691,
      "grad_norm": 0.8946050022186862,
      "learning_rate": 3.8825058142351895e-06,
      "loss": 0.1474,
      "step": 3775
    },
    {
      "epoch": 0.34790620537153916,
      "grad_norm": 0.9516275063497269,
      "learning_rate": 3.881871048012285e-06,
      "loss": 0.1534,
      "step": 3776
    },
    {
      "epoch": 0.34799834154880915,
      "grad_norm": 0.9403615984819307,
      "learning_rate": 3.881236153482505e-06,
      "loss": 0.1684,
      "step": 3777
    },
    {
      "epoch": 0.34809047772607915,
      "grad_norm": 0.9788364338685362,
      "learning_rate": 3.880601130704799e-06,
      "loss": 0.1594,
      "step": 3778
    },
    {
      "epoch": 0.34818261390334915,
      "grad_norm": 0.8769684675894291,
      "learning_rate": 3.87996597973813e-06,
      "loss": 0.1494,
      "step": 3779
    },
    {
      "epoch": 0.34827475008061914,
      "grad_norm": 0.9241894076679832,
      "learning_rate": 3.879330700641471e-06,
      "loss": 0.1516,
      "step": 3780
    },
    {
      "epoch": 0.34836688625788914,
      "grad_norm": 1.010356419388737,
      "learning_rate": 3.878695293473809e-06,
      "loss": 0.1786,
      "step": 3781
    },
    {
      "epoch": 0.34845902243515914,
      "grad_norm": 0.8481571310124365,
      "learning_rate": 3.878059758294139e-06,
      "loss": 0.1566,
      "step": 3782
    },
    {
      "epoch": 0.3485511586124292,
      "grad_norm": 0.9237936486021854,
      "learning_rate": 3.877424095161473e-06,
      "loss": 0.1643,
      "step": 3783
    },
    {
      "epoch": 0.3486432947896992,
      "grad_norm": 0.9049124285799792,
      "learning_rate": 3.8767883041348305e-06,
      "loss": 0.1524,
      "step": 3784
    },
    {
      "epoch": 0.3487354309669692,
      "grad_norm": 0.8886044487962574,
      "learning_rate": 3.8761523852732475e-06,
      "loss": 0.1486,
      "step": 3785
    },
    {
      "epoch": 0.3488275671442392,
      "grad_norm": 0.8609799128855903,
      "learning_rate": 3.875516338635766e-06,
      "loss": 0.1456,
      "step": 3786
    },
    {
      "epoch": 0.3489197033215092,
      "grad_norm": 0.8265506676375848,
      "learning_rate": 3.874880164281446e-06,
      "loss": 0.1403,
      "step": 3787
    },
    {
      "epoch": 0.3490118394987792,
      "grad_norm": 0.8677006052855488,
      "learning_rate": 3.874243862269353e-06,
      "loss": 0.1505,
      "step": 3788
    },
    {
      "epoch": 0.3491039756760492,
      "grad_norm": 0.8970711140727137,
      "learning_rate": 3.87360743265857e-06,
      "loss": 0.1628,
      "step": 3789
    },
    {
      "epoch": 0.3491961118533192,
      "grad_norm": 1.0145950719528463,
      "learning_rate": 3.87297087550819e-06,
      "loss": 0.1624,
      "step": 3790
    },
    {
      "epoch": 0.3492882480305892,
      "grad_norm": 0.888008623820799,
      "learning_rate": 3.872334190877316e-06,
      "loss": 0.1459,
      "step": 3791
    },
    {
      "epoch": 0.3493803842078592,
      "grad_norm": 0.8672174025069913,
      "learning_rate": 3.8716973788250645e-06,
      "loss": 0.1444,
      "step": 3792
    },
    {
      "epoch": 0.3494725203851292,
      "grad_norm": 0.9186929663280052,
      "learning_rate": 3.871060439410563e-06,
      "loss": 0.1463,
      "step": 3793
    },
    {
      "epoch": 0.3495646565623992,
      "grad_norm": 0.8865568286318863,
      "learning_rate": 3.870423372692953e-06,
      "loss": 0.147,
      "step": 3794
    },
    {
      "epoch": 0.3496567927396692,
      "grad_norm": 0.8920318935417838,
      "learning_rate": 3.869786178731386e-06,
      "loss": 0.1471,
      "step": 3795
    },
    {
      "epoch": 0.34974892891693926,
      "grad_norm": 1.0001696409280874,
      "learning_rate": 3.869148857585024e-06,
      "loss": 0.1567,
      "step": 3796
    },
    {
      "epoch": 0.34984106509420926,
      "grad_norm": 0.9044038444981554,
      "learning_rate": 3.8685114093130436e-06,
      "loss": 0.1605,
      "step": 3797
    },
    {
      "epoch": 0.34993320127147926,
      "grad_norm": 0.9419109233515364,
      "learning_rate": 3.867873833974631e-06,
      "loss": 0.1628,
      "step": 3798
    },
    {
      "epoch": 0.35002533744874925,
      "grad_norm": 0.9098145882514128,
      "learning_rate": 3.867236131628985e-06,
      "loss": 0.147,
      "step": 3799
    },
    {
      "epoch": 0.35011747362601925,
      "grad_norm": 0.9675498143428517,
      "learning_rate": 3.8665983023353195e-06,
      "loss": 0.1613,
      "step": 3800
    },
    {
      "epoch": 0.35020960980328925,
      "grad_norm": 0.9152259025758308,
      "learning_rate": 3.865960346152853e-06,
      "loss": 0.1575,
      "step": 3801
    },
    {
      "epoch": 0.35030174598055924,
      "grad_norm": 0.9096318752456122,
      "learning_rate": 3.865322263140821e-06,
      "loss": 0.1603,
      "step": 3802
    },
    {
      "epoch": 0.3503938821578293,
      "grad_norm": 0.9042773380336094,
      "learning_rate": 3.86468405335847e-06,
      "loss": 0.1593,
      "step": 3803
    },
    {
      "epoch": 0.3504860183350993,
      "grad_norm": 0.8810883239456601,
      "learning_rate": 3.864045716865059e-06,
      "loss": 0.1564,
      "step": 3804
    },
    {
      "epoch": 0.3505781545123693,
      "grad_norm": 0.8969771666725758,
      "learning_rate": 3.863407253719855e-06,
      "loss": 0.1658,
      "step": 3805
    },
    {
      "epoch": 0.3506702906896393,
      "grad_norm": 0.8859714505584699,
      "learning_rate": 3.8627686639821415e-06,
      "loss": 0.1524,
      "step": 3806
    },
    {
      "epoch": 0.3507624268669093,
      "grad_norm": 0.9396107844353678,
      "learning_rate": 3.8621299477112105e-06,
      "loss": 0.1594,
      "step": 3807
    },
    {
      "epoch": 0.3508545630441793,
      "grad_norm": 0.9568604332583132,
      "learning_rate": 3.861491104966368e-06,
      "loss": 0.143,
      "step": 3808
    },
    {
      "epoch": 0.3509466992214493,
      "grad_norm": 0.9548184489453653,
      "learning_rate": 3.860852135806929e-06,
      "loss": 0.1612,
      "step": 3809
    },
    {
      "epoch": 0.35103883539871933,
      "grad_norm": 0.9162268999810167,
      "learning_rate": 3.860213040292224e-06,
      "loss": 0.1516,
      "step": 3810
    },
    {
      "epoch": 0.3511309715759893,
      "grad_norm": 0.965092172228159,
      "learning_rate": 3.85957381848159e-06,
      "loss": 0.1462,
      "step": 3811
    },
    {
      "epoch": 0.3512231077532593,
      "grad_norm": 0.9578818766366897,
      "learning_rate": 3.858934470434381e-06,
      "loss": 0.1489,
      "step": 3812
    },
    {
      "epoch": 0.3513152439305293,
      "grad_norm": 0.9123089429516482,
      "learning_rate": 3.858294996209961e-06,
      "loss": 0.1563,
      "step": 3813
    },
    {
      "epoch": 0.3514073801077993,
      "grad_norm": 0.9948072100858789,
      "learning_rate": 3.857655395867704e-06,
      "loss": 0.1571,
      "step": 3814
    },
    {
      "epoch": 0.3514995162850693,
      "grad_norm": 0.9216205578818107,
      "learning_rate": 3.857015669466998e-06,
      "loss": 0.157,
      "step": 3815
    },
    {
      "epoch": 0.3515916524623393,
      "grad_norm": 0.9864385412189361,
      "learning_rate": 3.856375817067241e-06,
      "loss": 0.1627,
      "step": 3816
    },
    {
      "epoch": 0.35168378863960936,
      "grad_norm": 0.8407221015580862,
      "learning_rate": 3.855735838727842e-06,
      "loss": 0.1417,
      "step": 3817
    },
    {
      "epoch": 0.35177592481687936,
      "grad_norm": 0.8902927685530272,
      "learning_rate": 3.855095734508225e-06,
      "loss": 0.1379,
      "step": 3818
    },
    {
      "epoch": 0.35186806099414936,
      "grad_norm": 0.9232807543583181,
      "learning_rate": 3.854455504467824e-06,
      "loss": 0.1494,
      "step": 3819
    },
    {
      "epoch": 0.35196019717141935,
      "grad_norm": 0.8747114191669552,
      "learning_rate": 3.853815148666084e-06,
      "loss": 0.1392,
      "step": 3820
    },
    {
      "epoch": 0.35205233334868935,
      "grad_norm": 0.9007112346102228,
      "learning_rate": 3.85317466716246e-06,
      "loss": 0.1459,
      "step": 3821
    },
    {
      "epoch": 0.35214446952595935,
      "grad_norm": 0.9703594906332523,
      "learning_rate": 3.852534060016424e-06,
      "loss": 0.1514,
      "step": 3822
    },
    {
      "epoch": 0.3522366057032294,
      "grad_norm": 0.9876709700858648,
      "learning_rate": 3.8518933272874546e-06,
      "loss": 0.1556,
      "step": 3823
    },
    {
      "epoch": 0.3523287418804994,
      "grad_norm": 0.9085359041715336,
      "learning_rate": 3.851252469035044e-06,
      "loss": 0.1647,
      "step": 3824
    },
    {
      "epoch": 0.3524208780577694,
      "grad_norm": 0.8843618900771493,
      "learning_rate": 3.850611485318696e-06,
      "loss": 0.1526,
      "step": 3825
    },
    {
      "epoch": 0.3525130142350394,
      "grad_norm": 0.9384375911161207,
      "learning_rate": 3.8499703761979276e-06,
      "loss": 0.1504,
      "step": 3826
    },
    {
      "epoch": 0.3526051504123094,
      "grad_norm": 0.8748805623679292,
      "learning_rate": 3.849329141732263e-06,
      "loss": 0.1539,
      "step": 3827
    },
    {
      "epoch": 0.3526972865895794,
      "grad_norm": 0.8627817633209965,
      "learning_rate": 3.848687781981243e-06,
      "loss": 0.1552,
      "step": 3828
    },
    {
      "epoch": 0.3527894227668494,
      "grad_norm": 0.9059527689927666,
      "learning_rate": 3.848046297004417e-06,
      "loss": 0.1447,
      "step": 3829
    },
    {
      "epoch": 0.35288155894411943,
      "grad_norm": 0.8805324891957399,
      "learning_rate": 3.847404686861348e-06,
      "loss": 0.1439,
      "step": 3830
    },
    {
      "epoch": 0.35297369512138943,
      "grad_norm": 0.9344084024801386,
      "learning_rate": 3.846762951611608e-06,
      "loss": 0.1552,
      "step": 3831
    },
    {
      "epoch": 0.3530658312986594,
      "grad_norm": 0.9363214449601064,
      "learning_rate": 3.846121091314783e-06,
      "loss": 0.1545,
      "step": 3832
    },
    {
      "epoch": 0.3531579674759294,
      "grad_norm": 0.8539977014512892,
      "learning_rate": 3.84547910603047e-06,
      "loss": 0.1405,
      "step": 3833
    },
    {
      "epoch": 0.3532501036531994,
      "grad_norm": 0.9995946565004439,
      "learning_rate": 3.8448369958182775e-06,
      "loss": 0.1757,
      "step": 3834
    },
    {
      "epoch": 0.3533422398304694,
      "grad_norm": 0.9308525526025416,
      "learning_rate": 3.844194760737825e-06,
      "loss": 0.163,
      "step": 3835
    },
    {
      "epoch": 0.3534343760077394,
      "grad_norm": 0.8951960585707758,
      "learning_rate": 3.843552400848744e-06,
      "loss": 0.1492,
      "step": 3836
    },
    {
      "epoch": 0.35352651218500947,
      "grad_norm": 0.8647013473773321,
      "learning_rate": 3.842909916210678e-06,
      "loss": 0.143,
      "step": 3837
    },
    {
      "epoch": 0.35361864836227946,
      "grad_norm": 0.9713686917245324,
      "learning_rate": 3.842267306883283e-06,
      "loss": 0.1589,
      "step": 3838
    },
    {
      "epoch": 0.35371078453954946,
      "grad_norm": 0.8925126993308895,
      "learning_rate": 3.8416245729262225e-06,
      "loss": 0.1415,
      "step": 3839
    },
    {
      "epoch": 0.35380292071681946,
      "grad_norm": 1.0428020798287154,
      "learning_rate": 3.840981714399177e-06,
      "loss": 0.1517,
      "step": 3840
    },
    {
      "epoch": 0.35389505689408945,
      "grad_norm": 0.9157111788231413,
      "learning_rate": 3.840338731361834e-06,
      "loss": 0.1494,
      "step": 3841
    },
    {
      "epoch": 0.35398719307135945,
      "grad_norm": 0.9316243217330057,
      "learning_rate": 3.839695623873896e-06,
      "loss": 0.1558,
      "step": 3842
    },
    {
      "epoch": 0.35407932924862945,
      "grad_norm": 0.9232650712470144,
      "learning_rate": 3.839052391995076e-06,
      "loss": 0.1542,
      "step": 3843
    },
    {
      "epoch": 0.3541714654258995,
      "grad_norm": 0.8754883534686446,
      "learning_rate": 3.8384090357850964e-06,
      "loss": 0.1544,
      "step": 3844
    },
    {
      "epoch": 0.3542636016031695,
      "grad_norm": 0.9115494168855587,
      "learning_rate": 3.837765555303694e-06,
      "loss": 0.1538,
      "step": 3845
    },
    {
      "epoch": 0.3543557377804395,
      "grad_norm": 0.8999621586897121,
      "learning_rate": 3.837121950610616e-06,
      "loss": 0.1479,
      "step": 3846
    },
    {
      "epoch": 0.3544478739577095,
      "grad_norm": 0.8276305518580209,
      "learning_rate": 3.8364782217656205e-06,
      "loss": 0.1324,
      "step": 3847
    },
    {
      "epoch": 0.3545400101349795,
      "grad_norm": 0.9024826007607681,
      "learning_rate": 3.835834368828479e-06,
      "loss": 0.1557,
      "step": 3848
    },
    {
      "epoch": 0.3546321463122495,
      "grad_norm": 0.9065730873524084,
      "learning_rate": 3.835190391858972e-06,
      "loss": 0.1607,
      "step": 3849
    },
    {
      "epoch": 0.3547242824895195,
      "grad_norm": 0.8661100603350929,
      "learning_rate": 3.834546290916893e-06,
      "loss": 0.1565,
      "step": 3850
    },
    {
      "epoch": 0.35481641866678953,
      "grad_norm": 0.8664911463390288,
      "learning_rate": 3.833902066062049e-06,
      "loss": 0.14,
      "step": 3851
    },
    {
      "epoch": 0.35490855484405953,
      "grad_norm": 0.9297572513132248,
      "learning_rate": 3.833257717354253e-06,
      "loss": 0.162,
      "step": 3852
    },
    {
      "epoch": 0.35500069102132953,
      "grad_norm": 0.9334632150840317,
      "learning_rate": 3.832613244853335e-06,
      "loss": 0.1549,
      "step": 3853
    },
    {
      "epoch": 0.3550928271985995,
      "grad_norm": 0.9117192881469401,
      "learning_rate": 3.831968648619133e-06,
      "loss": 0.1643,
      "step": 3854
    },
    {
      "epoch": 0.3551849633758695,
      "grad_norm": 0.8679690659313138,
      "learning_rate": 3.8313239287115e-06,
      "loss": 0.148,
      "step": 3855
    },
    {
      "epoch": 0.3552770995531395,
      "grad_norm": 0.9554686130719982,
      "learning_rate": 3.830679085190296e-06,
      "loss": 0.1585,
      "step": 3856
    },
    {
      "epoch": 0.35536923573040957,
      "grad_norm": 0.9674925680074199,
      "learning_rate": 3.830034118115396e-06,
      "loss": 0.1582,
      "step": 3857
    },
    {
      "epoch": 0.35546137190767957,
      "grad_norm": 0.8961305393810277,
      "learning_rate": 3.829389027546685e-06,
      "loss": 0.1437,
      "step": 3858
    },
    {
      "epoch": 0.35555350808494957,
      "grad_norm": 0.8747439392004103,
      "learning_rate": 3.828743813544059e-06,
      "loss": 0.1432,
      "step": 3859
    },
    {
      "epoch": 0.35564564426221956,
      "grad_norm": 0.9511837265747041,
      "learning_rate": 3.8280984761674286e-06,
      "loss": 0.1554,
      "step": 3860
    },
    {
      "epoch": 0.35573778043948956,
      "grad_norm": 0.9271027409618041,
      "learning_rate": 3.82745301547671e-06,
      "loss": 0.1604,
      "step": 3861
    },
    {
      "epoch": 0.35582991661675956,
      "grad_norm": 0.8952219446892987,
      "learning_rate": 3.8268074315318375e-06,
      "loss": 0.1585,
      "step": 3862
    },
    {
      "epoch": 0.35592205279402955,
      "grad_norm": 0.8927757851183054,
      "learning_rate": 3.826161724392751e-06,
      "loss": 0.142,
      "step": 3863
    },
    {
      "epoch": 0.3560141889712996,
      "grad_norm": 0.8424179319437527,
      "learning_rate": 3.8255158941194066e-06,
      "loss": 0.1413,
      "step": 3864
    },
    {
      "epoch": 0.3561063251485696,
      "grad_norm": 0.8740217468806033,
      "learning_rate": 3.824869940771768e-06,
      "loss": 0.1448,
      "step": 3865
    },
    {
      "epoch": 0.3561984613258396,
      "grad_norm": 0.8540961364919858,
      "learning_rate": 3.824223864409813e-06,
      "loss": 0.1508,
      "step": 3866
    },
    {
      "epoch": 0.3562905975031096,
      "grad_norm": 0.883177257480543,
      "learning_rate": 3.823577665093529e-06,
      "loss": 0.1583,
      "step": 3867
    },
    {
      "epoch": 0.3563827336803796,
      "grad_norm": 0.8970386523940295,
      "learning_rate": 3.822931342882918e-06,
      "loss": 0.154,
      "step": 3868
    },
    {
      "epoch": 0.3564748698576496,
      "grad_norm": 0.9148582364371604,
      "learning_rate": 3.822284897837989e-06,
      "loss": 0.1574,
      "step": 3869
    },
    {
      "epoch": 0.3565670060349196,
      "grad_norm": 0.9835888081454008,
      "learning_rate": 3.821638330018764e-06,
      "loss": 0.1613,
      "step": 3870
    },
    {
      "epoch": 0.35665914221218964,
      "grad_norm": 0.9487618778033893,
      "learning_rate": 3.820991639485279e-06,
      "loss": 0.15,
      "step": 3871
    },
    {
      "epoch": 0.35675127838945964,
      "grad_norm": 0.9581745160471137,
      "learning_rate": 3.820344826297577e-06,
      "loss": 0.1482,
      "step": 3872
    },
    {
      "epoch": 0.35684341456672963,
      "grad_norm": 0.9939597183184489,
      "learning_rate": 3.819697890515717e-06,
      "loss": 0.1741,
      "step": 3873
    },
    {
      "epoch": 0.35693555074399963,
      "grad_norm": 0.9341386337680994,
      "learning_rate": 3.819050832199766e-06,
      "loss": 0.1534,
      "step": 3874
    },
    {
      "epoch": 0.3570276869212696,
      "grad_norm": 0.9627433592601693,
      "learning_rate": 3.818403651409801e-06,
      "loss": 0.166,
      "step": 3875
    },
    {
      "epoch": 0.3571198230985396,
      "grad_norm": 0.8966016704437211,
      "learning_rate": 3.817756348205917e-06,
      "loss": 0.1534,
      "step": 3876
    },
    {
      "epoch": 0.3572119592758096,
      "grad_norm": 0.831950783650535,
      "learning_rate": 3.817108922648214e-06,
      "loss": 0.1479,
      "step": 3877
    },
    {
      "epoch": 0.3573040954530797,
      "grad_norm": 0.9242438801830429,
      "learning_rate": 3.816461374796805e-06,
      "loss": 0.1581,
      "step": 3878
    },
    {
      "epoch": 0.35739623163034967,
      "grad_norm": 0.9027636442769159,
      "learning_rate": 3.815813704711816e-06,
      "loss": 0.1535,
      "step": 3879
    },
    {
      "epoch": 0.35748836780761967,
      "grad_norm": 0.796082355005034,
      "learning_rate": 3.815165912453383e-06,
      "loss": 0.1377,
      "step": 3880
    },
    {
      "epoch": 0.35758050398488966,
      "grad_norm": 0.9768643954402325,
      "learning_rate": 3.814517998081654e-06,
      "loss": 0.1672,
      "step": 3881
    },
    {
      "epoch": 0.35767264016215966,
      "grad_norm": 0.924729896863386,
      "learning_rate": 3.8138699616567875e-06,
      "loss": 0.1619,
      "step": 3882
    },
    {
      "epoch": 0.35776477633942966,
      "grad_norm": 0.8615406672854165,
      "learning_rate": 3.8132218032389524e-06,
      "loss": 0.1438,
      "step": 3883
    },
    {
      "epoch": 0.35785691251669965,
      "grad_norm": 0.8715405718747457,
      "learning_rate": 3.812573522888332e-06,
      "loss": 0.1516,
      "step": 3884
    },
    {
      "epoch": 0.3579490486939697,
      "grad_norm": 0.9561787493207595,
      "learning_rate": 3.81192512066512e-06,
      "loss": 0.1721,
      "step": 3885
    },
    {
      "epoch": 0.3580411848712397,
      "grad_norm": 0.8752491679156089,
      "learning_rate": 3.811276596629518e-06,
      "loss": 0.1502,
      "step": 3886
    },
    {
      "epoch": 0.3581333210485097,
      "grad_norm": 0.9660899512247874,
      "learning_rate": 3.810627950841743e-06,
      "loss": 0.1586,
      "step": 3887
    },
    {
      "epoch": 0.3582254572257797,
      "grad_norm": 0.9448658827634945,
      "learning_rate": 3.8099791833620214e-06,
      "loss": 0.1533,
      "step": 3888
    },
    {
      "epoch": 0.3583175934030497,
      "grad_norm": 0.9004121487474659,
      "learning_rate": 3.8093302942505935e-06,
      "loss": 0.1585,
      "step": 3889
    },
    {
      "epoch": 0.3584097295803197,
      "grad_norm": 0.906632158147621,
      "learning_rate": 3.8086812835677044e-06,
      "loss": 0.1624,
      "step": 3890
    },
    {
      "epoch": 0.35850186575758974,
      "grad_norm": 0.9183195616043317,
      "learning_rate": 3.808032151373619e-06,
      "loss": 0.1618,
      "step": 3891
    },
    {
      "epoch": 0.35859400193485974,
      "grad_norm": 0.8553552558472457,
      "learning_rate": 3.807382897728607e-06,
      "loss": 0.1444,
      "step": 3892
    },
    {
      "epoch": 0.35868613811212974,
      "grad_norm": 0.902571763244677,
      "learning_rate": 3.8067335226929523e-06,
      "loss": 0.1554,
      "step": 3893
    },
    {
      "epoch": 0.35877827428939973,
      "grad_norm": 0.8831422504068266,
      "learning_rate": 3.8060840263269494e-06,
      "loss": 0.1553,
      "step": 3894
    },
    {
      "epoch": 0.35887041046666973,
      "grad_norm": 0.8695419644636682,
      "learning_rate": 3.8054344086909043e-06,
      "loss": 0.1539,
      "step": 3895
    },
    {
      "epoch": 0.35896254664393973,
      "grad_norm": 0.8634127120614579,
      "learning_rate": 3.804784669845133e-06,
      "loss": 0.1438,
      "step": 3896
    },
    {
      "epoch": 0.3590546828212097,
      "grad_norm": 0.8608209569312256,
      "learning_rate": 3.8041348098499655e-06,
      "loss": 0.1397,
      "step": 3897
    },
    {
      "epoch": 0.3591468189984798,
      "grad_norm": 0.9012937821304821,
      "learning_rate": 3.8034848287657403e-06,
      "loss": 0.153,
      "step": 3898
    },
    {
      "epoch": 0.3592389551757498,
      "grad_norm": 0.913635814588998,
      "learning_rate": 3.802834726652809e-06,
      "loss": 0.1583,
      "step": 3899
    },
    {
      "epoch": 0.35933109135301977,
      "grad_norm": 0.9246153282899159,
      "learning_rate": 3.802184503571532e-06,
      "loss": 0.1508,
      "step": 3900
    },
    {
      "epoch": 0.35942322753028977,
      "grad_norm": 0.9563799822615789,
      "learning_rate": 3.801534159582285e-06,
      "loss": 0.1728,
      "step": 3901
    },
    {
      "epoch": 0.35951536370755977,
      "grad_norm": 0.8893729611997498,
      "learning_rate": 3.80088369474545e-06,
      "loss": 0.1571,
      "step": 3902
    },
    {
      "epoch": 0.35960749988482976,
      "grad_norm": 0.8977935109197466,
      "learning_rate": 3.800233109121425e-06,
      "loss": 0.1395,
      "step": 3903
    },
    {
      "epoch": 0.35969963606209976,
      "grad_norm": 0.9174627801219775,
      "learning_rate": 3.7995824027706152e-06,
      "loss": 0.1475,
      "step": 3904
    },
    {
      "epoch": 0.3597917722393698,
      "grad_norm": 0.9695866259137063,
      "learning_rate": 3.7989315757534397e-06,
      "loss": 0.1549,
      "step": 3905
    },
    {
      "epoch": 0.3598839084166398,
      "grad_norm": 0.9840473979899322,
      "learning_rate": 3.7982806281303276e-06,
      "loss": 0.1672,
      "step": 3906
    },
    {
      "epoch": 0.3599760445939098,
      "grad_norm": 0.8725606551036027,
      "learning_rate": 3.797629559961719e-06,
      "loss": 0.1436,
      "step": 3907
    },
    {
      "epoch": 0.3600681807711798,
      "grad_norm": 0.8950596885234003,
      "learning_rate": 3.7969783713080665e-06,
      "loss": 0.1595,
      "step": 3908
    },
    {
      "epoch": 0.3601603169484498,
      "grad_norm": 0.8905525328504984,
      "learning_rate": 3.796327062229833e-06,
      "loss": 0.1572,
      "step": 3909
    },
    {
      "epoch": 0.3602524531257198,
      "grad_norm": 0.8923575257402443,
      "learning_rate": 3.7956756327874912e-06,
      "loss": 0.1623,
      "step": 3910
    },
    {
      "epoch": 0.3603445893029898,
      "grad_norm": 0.9554499080768074,
      "learning_rate": 3.7950240830415286e-06,
      "loss": 0.1594,
      "step": 3911
    },
    {
      "epoch": 0.36043672548025985,
      "grad_norm": 0.9192802900436988,
      "learning_rate": 3.79437241305244e-06,
      "loss": 0.157,
      "step": 3912
    },
    {
      "epoch": 0.36052886165752984,
      "grad_norm": 0.8913340818064148,
      "learning_rate": 3.7937206228807333e-06,
      "loss": 0.1491,
      "step": 3913
    },
    {
      "epoch": 0.36062099783479984,
      "grad_norm": 0.8862949193565367,
      "learning_rate": 3.793068712586928e-06,
      "loss": 0.1456,
      "step": 3914
    },
    {
      "epoch": 0.36071313401206984,
      "grad_norm": 0.9419844735143882,
      "learning_rate": 3.7924166822315535e-06,
      "loss": 0.1605,
      "step": 3915
    },
    {
      "epoch": 0.36080527018933983,
      "grad_norm": 0.9360333851589002,
      "learning_rate": 3.791764531875151e-06,
      "loss": 0.1638,
      "step": 3916
    },
    {
      "epoch": 0.36089740636660983,
      "grad_norm": 0.8885011416795211,
      "learning_rate": 3.7911122615782727e-06,
      "loss": 0.1557,
      "step": 3917
    },
    {
      "epoch": 0.3609895425438799,
      "grad_norm": 0.890939735479181,
      "learning_rate": 3.790459871401482e-06,
      "loss": 0.1624,
      "step": 3918
    },
    {
      "epoch": 0.3610816787211499,
      "grad_norm": 0.8438507583767467,
      "learning_rate": 3.7898073614053527e-06,
      "loss": 0.1455,
      "step": 3919
    },
    {
      "epoch": 0.3611738148984199,
      "grad_norm": 0.9064039983475835,
      "learning_rate": 3.7891547316504716e-06,
      "loss": 0.146,
      "step": 3920
    },
    {
      "epoch": 0.3612659510756899,
      "grad_norm": 0.8475645530494015,
      "learning_rate": 3.788501982197435e-06,
      "loss": 0.1392,
      "step": 3921
    },
    {
      "epoch": 0.36135808725295987,
      "grad_norm": 0.9057176590949076,
      "learning_rate": 3.787849113106851e-06,
      "loss": 0.159,
      "step": 3922
    },
    {
      "epoch": 0.36145022343022987,
      "grad_norm": 0.9600989104067679,
      "learning_rate": 3.787196124439337e-06,
      "loss": 0.1694,
      "step": 3923
    },
    {
      "epoch": 0.36154235960749986,
      "grad_norm": 0.9044900537237868,
      "learning_rate": 3.7865430162555255e-06,
      "loss": 0.1639,
      "step": 3924
    },
    {
      "epoch": 0.3616344957847699,
      "grad_norm": 0.8508738273375758,
      "learning_rate": 3.7858897886160562e-06,
      "loss": 0.1453,
      "step": 3925
    },
    {
      "epoch": 0.3617266319620399,
      "grad_norm": 0.9620587536792758,
      "learning_rate": 3.785236441581581e-06,
      "loss": 0.1674,
      "step": 3926
    },
    {
      "epoch": 0.3618187681393099,
      "grad_norm": 0.8942864413844239,
      "learning_rate": 3.784582975212765e-06,
      "loss": 0.1456,
      "step": 3927
    },
    {
      "epoch": 0.3619109043165799,
      "grad_norm": 0.840135666464938,
      "learning_rate": 3.783929389570281e-06,
      "loss": 0.1455,
      "step": 3928
    },
    {
      "epoch": 0.3620030404938499,
      "grad_norm": 0.9114590346869866,
      "learning_rate": 3.7832756847148146e-06,
      "loss": 0.1616,
      "step": 3929
    },
    {
      "epoch": 0.3620951766711199,
      "grad_norm": 0.983234958010798,
      "learning_rate": 3.782621860707063e-06,
      "loss": 0.1725,
      "step": 3930
    },
    {
      "epoch": 0.3621873128483899,
      "grad_norm": 0.8225486926457173,
      "learning_rate": 3.781967917607734e-06,
      "loss": 0.1351,
      "step": 3931
    },
    {
      "epoch": 0.36227944902565995,
      "grad_norm": 0.8813300984579409,
      "learning_rate": 3.7813138554775454e-06,
      "loss": 0.1477,
      "step": 3932
    },
    {
      "epoch": 0.36237158520292995,
      "grad_norm": 0.8821804198238675,
      "learning_rate": 3.780659674377227e-06,
      "loss": 0.1596,
      "step": 3933
    },
    {
      "epoch": 0.36246372138019994,
      "grad_norm": 0.8722528057807815,
      "learning_rate": 3.7800053743675213e-06,
      "loss": 0.1419,
      "step": 3934
    },
    {
      "epoch": 0.36255585755746994,
      "grad_norm": 0.9010612330891158,
      "learning_rate": 3.779350955509178e-06,
      "loss": 0.1565,
      "step": 3935
    },
    {
      "epoch": 0.36264799373473994,
      "grad_norm": 0.8503750455245771,
      "learning_rate": 3.7786964178629613e-06,
      "loss": 0.1398,
      "step": 3936
    },
    {
      "epoch": 0.36274012991200993,
      "grad_norm": 0.8822053261660605,
      "learning_rate": 3.7780417614896438e-06,
      "loss": 0.1573,
      "step": 3937
    },
    {
      "epoch": 0.36283226608927993,
      "grad_norm": 0.895341082099772,
      "learning_rate": 3.777386986450012e-06,
      "loss": 0.1549,
      "step": 3938
    },
    {
      "epoch": 0.36292440226655,
      "grad_norm": 0.8502920032613639,
      "learning_rate": 3.77673209280486e-06,
      "loss": 0.1537,
      "step": 3939
    },
    {
      "epoch": 0.36301653844382,
      "grad_norm": 0.904481467548156,
      "learning_rate": 3.776077080614997e-06,
      "loss": 0.14,
      "step": 3940
    },
    {
      "epoch": 0.36310867462109,
      "grad_norm": 0.850345613001194,
      "learning_rate": 3.7754219499412393e-06,
      "loss": 0.1416,
      "step": 3941
    },
    {
      "epoch": 0.36320081079836,
      "grad_norm": 0.896357927903997,
      "learning_rate": 3.7747667008444154e-06,
      "loss": 0.1442,
      "step": 3942
    },
    {
      "epoch": 0.36329294697562997,
      "grad_norm": 0.9476496642343656,
      "learning_rate": 3.7741113333853673e-06,
      "loss": 0.1625,
      "step": 3943
    },
    {
      "epoch": 0.36338508315289997,
      "grad_norm": 0.9442071990268597,
      "learning_rate": 3.773455847624944e-06,
      "loss": 0.1602,
      "step": 3944
    },
    {
      "epoch": 0.36347721933016996,
      "grad_norm": 0.8869444492762905,
      "learning_rate": 3.7728002436240086e-06,
      "loss": 0.1449,
      "step": 3945
    },
    {
      "epoch": 0.36356935550744,
      "grad_norm": 0.8691908645382854,
      "learning_rate": 3.772144521443434e-06,
      "loss": 0.1466,
      "step": 3946
    },
    {
      "epoch": 0.36366149168471,
      "grad_norm": 0.8722709796332478,
      "learning_rate": 3.7714886811441033e-06,
      "loss": 0.1544,
      "step": 3947
    },
    {
      "epoch": 0.36375362786198,
      "grad_norm": 0.9781950813418121,
      "learning_rate": 3.7708327227869113e-06,
      "loss": 0.1619,
      "step": 3948
    },
    {
      "epoch": 0.36384576403925,
      "grad_norm": 0.9741482528206916,
      "learning_rate": 3.770176646432765e-06,
      "loss": 0.1611,
      "step": 3949
    },
    {
      "epoch": 0.36393790021652,
      "grad_norm": 0.9059945224642333,
      "learning_rate": 3.76952045214258e-06,
      "loss": 0.1474,
      "step": 3950
    },
    {
      "epoch": 0.36403003639379,
      "grad_norm": 0.8904357001863795,
      "learning_rate": 3.7688641399772842e-06,
      "loss": 0.1556,
      "step": 3951
    },
    {
      "epoch": 0.36412217257106005,
      "grad_norm": 0.9321752585797721,
      "learning_rate": 3.7682077099978163e-06,
      "loss": 0.1646,
      "step": 3952
    },
    {
      "epoch": 0.36421430874833005,
      "grad_norm": 0.9378488103126844,
      "learning_rate": 3.767551162265126e-06,
      "loss": 0.1535,
      "step": 3953
    },
    {
      "epoch": 0.36430644492560005,
      "grad_norm": 0.9011582010590111,
      "learning_rate": 3.7668944968401743e-06,
      "loss": 0.1544,
      "step": 3954
    },
    {
      "epoch": 0.36439858110287005,
      "grad_norm": 0.8790970485023327,
      "learning_rate": 3.7662377137839323e-06,
      "loss": 0.1442,
      "step": 3955
    },
    {
      "epoch": 0.36449071728014004,
      "grad_norm": 0.908621835636859,
      "learning_rate": 3.7655808131573823e-06,
      "loss": 0.1511,
      "step": 3956
    },
    {
      "epoch": 0.36458285345741004,
      "grad_norm": 0.8903756339669455,
      "learning_rate": 3.7649237950215178e-06,
      "loss": 0.1387,
      "step": 3957
    },
    {
      "epoch": 0.36467498963468004,
      "grad_norm": 1.0374094432276606,
      "learning_rate": 3.764266659437342e-06,
      "loss": 0.1752,
      "step": 3958
    },
    {
      "epoch": 0.3647671258119501,
      "grad_norm": 0.8597914758339518,
      "learning_rate": 3.763609406465872e-06,
      "loss": 0.1442,
      "step": 3959
    },
    {
      "epoch": 0.3648592619892201,
      "grad_norm": 0.9149822828048013,
      "learning_rate": 3.7629520361681317e-06,
      "loss": 0.1613,
      "step": 3960
    },
    {
      "epoch": 0.3649513981664901,
      "grad_norm": 0.9028396433291438,
      "learning_rate": 3.7622945486051585e-06,
      "loss": 0.1412,
      "step": 3961
    },
    {
      "epoch": 0.3650435343437601,
      "grad_norm": 0.9537831433092164,
      "learning_rate": 3.7616369438380014e-06,
      "loss": 0.1592,
      "step": 3962
    },
    {
      "epoch": 0.3651356705210301,
      "grad_norm": 0.9595063973112449,
      "learning_rate": 3.760979221927718e-06,
      "loss": 0.1501,
      "step": 3963
    },
    {
      "epoch": 0.3652278066983001,
      "grad_norm": 0.9440368618491344,
      "learning_rate": 3.760321382935378e-06,
      "loss": 0.1633,
      "step": 3964
    },
    {
      "epoch": 0.36531994287557007,
      "grad_norm": 0.8716304492059451,
      "learning_rate": 3.759663426922062e-06,
      "loss": 0.1406,
      "step": 3965
    },
    {
      "epoch": 0.3654120790528401,
      "grad_norm": 0.9317503191937407,
      "learning_rate": 3.7590053539488613e-06,
      "loss": 0.1573,
      "step": 3966
    },
    {
      "epoch": 0.3655042152301101,
      "grad_norm": 0.8833226489364178,
      "learning_rate": 3.758347164076879e-06,
      "loss": 0.1561,
      "step": 3967
    },
    {
      "epoch": 0.3655963514073801,
      "grad_norm": 0.9745417396103699,
      "learning_rate": 3.7576888573672254e-06,
      "loss": 0.1592,
      "step": 3968
    },
    {
      "epoch": 0.3656884875846501,
      "grad_norm": 0.9530059653507331,
      "learning_rate": 3.757030433881027e-06,
      "loss": 0.1468,
      "step": 3969
    },
    {
      "epoch": 0.3657806237619201,
      "grad_norm": 0.9366752553028688,
      "learning_rate": 3.7563718936794176e-06,
      "loss": 0.1567,
      "step": 3970
    },
    {
      "epoch": 0.3658727599391901,
      "grad_norm": 0.9622765132112847,
      "learning_rate": 3.755713236823542e-06,
      "loss": 0.1589,
      "step": 3971
    },
    {
      "epoch": 0.3659648961164601,
      "grad_norm": 0.8902165328288483,
      "learning_rate": 3.755054463374558e-06,
      "loss": 0.1525,
      "step": 3972
    },
    {
      "epoch": 0.36605703229373016,
      "grad_norm": 0.9688432772053249,
      "learning_rate": 3.754395573393631e-06,
      "loss": 0.1728,
      "step": 3973
    },
    {
      "epoch": 0.36614916847100015,
      "grad_norm": 0.9026315201949281,
      "learning_rate": 3.7537365669419413e-06,
      "loss": 0.1518,
      "step": 3974
    },
    {
      "epoch": 0.36624130464827015,
      "grad_norm": 0.8789867884729393,
      "learning_rate": 3.7530774440806757e-06,
      "loss": 0.1473,
      "step": 3975
    },
    {
      "epoch": 0.36633344082554015,
      "grad_norm": 0.8515286072012973,
      "learning_rate": 3.7524182048710343e-06,
      "loss": 0.1439,
      "step": 3976
    },
    {
      "epoch": 0.36642557700281014,
      "grad_norm": 0.8828903306684278,
      "learning_rate": 3.751758849374228e-06,
      "loss": 0.1413,
      "step": 3977
    },
    {
      "epoch": 0.36651771318008014,
      "grad_norm": 0.884530916429444,
      "learning_rate": 3.7510993776514786e-06,
      "loss": 0.1513,
      "step": 3978
    },
    {
      "epoch": 0.36660984935735014,
      "grad_norm": 0.8848921511295754,
      "learning_rate": 3.7504397897640165e-06,
      "loss": 0.1499,
      "step": 3979
    },
    {
      "epoch": 0.3667019855346202,
      "grad_norm": 0.9206901760475932,
      "learning_rate": 3.7497800857730854e-06,
      "loss": 0.1526,
      "step": 3980
    },
    {
      "epoch": 0.3667941217118902,
      "grad_norm": 0.8989347990602119,
      "learning_rate": 3.749120265739939e-06,
      "loss": 0.1416,
      "step": 3981
    },
    {
      "epoch": 0.3668862578891602,
      "grad_norm": 0.9478265392367967,
      "learning_rate": 3.7484603297258413e-06,
      "loss": 0.1526,
      "step": 3982
    },
    {
      "epoch": 0.3669783940664302,
      "grad_norm": 0.9804580570371507,
      "learning_rate": 3.747800277792068e-06,
      "loss": 0.1547,
      "step": 3983
    },
    {
      "epoch": 0.3670705302437002,
      "grad_norm": 0.9347412913063416,
      "learning_rate": 3.7471401099999044e-06,
      "loss": 0.152,
      "step": 3984
    },
    {
      "epoch": 0.3671626664209702,
      "grad_norm": 0.9714621448950228,
      "learning_rate": 3.7464798264106474e-06,
      "loss": 0.1546,
      "step": 3985
    },
    {
      "epoch": 0.3672548025982402,
      "grad_norm": 0.9361573736104657,
      "learning_rate": 3.7458194270856046e-06,
      "loss": 0.1496,
      "step": 3986
    },
    {
      "epoch": 0.3673469387755102,
      "grad_norm": 0.9435489839258917,
      "learning_rate": 3.745158912086093e-06,
      "loss": 0.166,
      "step": 3987
    },
    {
      "epoch": 0.3674390749527802,
      "grad_norm": 0.9511965958836167,
      "learning_rate": 3.744498281473443e-06,
      "loss": 0.1558,
      "step": 3988
    },
    {
      "epoch": 0.3675312111300502,
      "grad_norm": 0.8979980364324908,
      "learning_rate": 3.743837535308994e-06,
      "loss": 0.1401,
      "step": 3989
    },
    {
      "epoch": 0.3676233473073202,
      "grad_norm": 0.9236641015351937,
      "learning_rate": 3.7431766736540958e-06,
      "loss": 0.1482,
      "step": 3990
    },
    {
      "epoch": 0.3677154834845902,
      "grad_norm": 0.9291040410569116,
      "learning_rate": 3.74251569657011e-06,
      "loss": 0.1511,
      "step": 3991
    },
    {
      "epoch": 0.3678076196618602,
      "grad_norm": 1.014189124142894,
      "learning_rate": 3.7418546041184074e-06,
      "loss": 0.1467,
      "step": 3992
    },
    {
      "epoch": 0.36789975583913026,
      "grad_norm": 0.9383118379868695,
      "learning_rate": 3.7411933963603706e-06,
      "loss": 0.1655,
      "step": 3993
    },
    {
      "epoch": 0.36799189201640026,
      "grad_norm": 0.8876646385129865,
      "learning_rate": 3.7405320733573948e-06,
      "loss": 0.1433,
      "step": 3994
    },
    {
      "epoch": 0.36808402819367025,
      "grad_norm": 0.9951541805502094,
      "learning_rate": 3.739870635170881e-06,
      "loss": 0.1674,
      "step": 3995
    },
    {
      "epoch": 0.36817616437094025,
      "grad_norm": 0.9346916827824021,
      "learning_rate": 3.739209081862247e-06,
      "loss": 0.1408,
      "step": 3996
    },
    {
      "epoch": 0.36826830054821025,
      "grad_norm": 0.9003306272523681,
      "learning_rate": 3.738547413492916e-06,
      "loss": 0.1445,
      "step": 3997
    },
    {
      "epoch": 0.36836043672548024,
      "grad_norm": 0.9747753339711037,
      "learning_rate": 3.7378856301243233e-06,
      "loss": 0.1558,
      "step": 3998
    },
    {
      "epoch": 0.36845257290275024,
      "grad_norm": 0.9296953912478665,
      "learning_rate": 3.7372237318179172e-06,
      "loss": 0.1533,
      "step": 3999
    },
    {
      "epoch": 0.3685447090800203,
      "grad_norm": 0.886098729035054,
      "learning_rate": 3.7365617186351538e-06,
      "loss": 0.1403,
      "step": 4000
    },
    {
      "epoch": 0.3685447090800203,
      "eval_loss": 0.15275675058364868,
      "eval_runtime": 299.8799,
      "eval_samples_per_second": 23.399,
      "eval_steps_per_second": 2.928,
      "step": 4000
    },
    {
      "epoch": 0.3686368452572903,
      "grad_norm": 0.9026680591810774,
      "learning_rate": 3.735899590637503e-06,
      "loss": 0.1374,
      "step": 4001
    },
    {
      "epoch": 0.3687289814345603,
      "grad_norm": 0.9006265105747577,
      "learning_rate": 3.735237347886441e-06,
      "loss": 0.141,
      "step": 4002
    },
    {
      "epoch": 0.3688211176118303,
      "grad_norm": 0.9632222935393651,
      "learning_rate": 3.7345749904434593e-06,
      "loss": 0.1463,
      "step": 4003
    },
    {
      "epoch": 0.3689132537891003,
      "grad_norm": 0.8573673467809124,
      "learning_rate": 3.733912518370056e-06,
      "loss": 0.1416,
      "step": 4004
    },
    {
      "epoch": 0.3690053899663703,
      "grad_norm": 0.9167873217872394,
      "learning_rate": 3.7332499317277432e-06,
      "loss": 0.1514,
      "step": 4005
    },
    {
      "epoch": 0.3690975261436403,
      "grad_norm": 0.9053492707952349,
      "learning_rate": 3.732587230578041e-06,
      "loss": 0.1498,
      "step": 4006
    },
    {
      "epoch": 0.36918966232091033,
      "grad_norm": 0.9514143436948048,
      "learning_rate": 3.7319244149824825e-06,
      "loss": 0.1502,
      "step": 4007
    },
    {
      "epoch": 0.3692817984981803,
      "grad_norm": 0.9243371986561314,
      "learning_rate": 3.7312614850026086e-06,
      "loss": 0.1647,
      "step": 4008
    },
    {
      "epoch": 0.3693739346754503,
      "grad_norm": 0.9274365702949716,
      "learning_rate": 3.730598440699974e-06,
      "loss": 0.1508,
      "step": 4009
    },
    {
      "epoch": 0.3694660708527203,
      "grad_norm": 0.8933690616443185,
      "learning_rate": 3.729935282136142e-06,
      "loss": 0.1527,
      "step": 4010
    },
    {
      "epoch": 0.3695582070299903,
      "grad_norm": 0.9779518506144268,
      "learning_rate": 3.729272009372686e-06,
      "loss": 0.1528,
      "step": 4011
    },
    {
      "epoch": 0.3696503432072603,
      "grad_norm": 0.9370281031601337,
      "learning_rate": 3.7286086224711916e-06,
      "loss": 0.1455,
      "step": 4012
    },
    {
      "epoch": 0.3697424793845303,
      "grad_norm": 0.8777378271793664,
      "learning_rate": 3.727945121493255e-06,
      "loss": 0.1496,
      "step": 4013
    },
    {
      "epoch": 0.36983461556180036,
      "grad_norm": 0.9393712387806363,
      "learning_rate": 3.7272815065004808e-06,
      "loss": 0.1641,
      "step": 4014
    },
    {
      "epoch": 0.36992675173907036,
      "grad_norm": 1.0039688517917074,
      "learning_rate": 3.7266177775544877e-06,
      "loss": 0.1511,
      "step": 4015
    },
    {
      "epoch": 0.37001888791634036,
      "grad_norm": 0.8739356202824567,
      "learning_rate": 3.7259539347169015e-06,
      "loss": 0.1371,
      "step": 4016
    },
    {
      "epoch": 0.37011102409361035,
      "grad_norm": 0.888356334870733,
      "learning_rate": 3.72528997804936e-06,
      "loss": 0.1519,
      "step": 4017
    },
    {
      "epoch": 0.37020316027088035,
      "grad_norm": 0.993257143098972,
      "learning_rate": 3.724625907613513e-06,
      "loss": 0.1565,
      "step": 4018
    },
    {
      "epoch": 0.37029529644815035,
      "grad_norm": 0.9754754956762344,
      "learning_rate": 3.7239617234710185e-06,
      "loss": 0.1413,
      "step": 4019
    },
    {
      "epoch": 0.3703874326254204,
      "grad_norm": 0.843187808737617,
      "learning_rate": 3.7232974256835457e-06,
      "loss": 0.1433,
      "step": 4020
    },
    {
      "epoch": 0.3704795688026904,
      "grad_norm": 0.8997900277623461,
      "learning_rate": 3.7226330143127765e-06,
      "loss": 0.1468,
      "step": 4021
    },
    {
      "epoch": 0.3705717049799604,
      "grad_norm": 0.9570942103840279,
      "learning_rate": 3.721968489420399e-06,
      "loss": 0.1358,
      "step": 4022
    },
    {
      "epoch": 0.3706638411572304,
      "grad_norm": 0.9574979970822548,
      "learning_rate": 3.721303851068116e-06,
      "loss": 0.1602,
      "step": 4023
    },
    {
      "epoch": 0.3707559773345004,
      "grad_norm": 0.8884651421400183,
      "learning_rate": 3.7206390993176395e-06,
      "loss": 0.1414,
      "step": 4024
    },
    {
      "epoch": 0.3708481135117704,
      "grad_norm": 0.9861288907351328,
      "learning_rate": 3.719974234230691e-06,
      "loss": 0.1542,
      "step": 4025
    },
    {
      "epoch": 0.3709402496890404,
      "grad_norm": 1.0008506973241864,
      "learning_rate": 3.7193092558690036e-06,
      "loss": 0.1479,
      "step": 4026
    },
    {
      "epoch": 0.37103238586631043,
      "grad_norm": 0.9941006684988769,
      "learning_rate": 3.7186441642943206e-06,
      "loss": 0.1483,
      "step": 4027
    },
    {
      "epoch": 0.37112452204358043,
      "grad_norm": 0.9438400436931483,
      "learning_rate": 3.7179789595683954e-06,
      "loss": 0.1474,
      "step": 4028
    },
    {
      "epoch": 0.3712166582208504,
      "grad_norm": 0.9935959286704832,
      "learning_rate": 3.717313641752993e-06,
      "loss": 0.147,
      "step": 4029
    },
    {
      "epoch": 0.3713087943981204,
      "grad_norm": 1.011789007873907,
      "learning_rate": 3.7166482109098878e-06,
      "loss": 0.1558,
      "step": 4030
    },
    {
      "epoch": 0.3714009305753904,
      "grad_norm": 0.9405959335230354,
      "learning_rate": 3.715982667100866e-06,
      "loss": 0.1637,
      "step": 4031
    },
    {
      "epoch": 0.3714930667526604,
      "grad_norm": 0.909937263307173,
      "learning_rate": 3.7153170103877216e-06,
      "loss": 0.1431,
      "step": 4032
    },
    {
      "epoch": 0.3715852029299304,
      "grad_norm": 0.9554612213688228,
      "learning_rate": 3.7146512408322623e-06,
      "loss": 0.1652,
      "step": 4033
    },
    {
      "epoch": 0.37167733910720047,
      "grad_norm": 1.0065983816320472,
      "learning_rate": 3.7139853584963054e-06,
      "loss": 0.1552,
      "step": 4034
    },
    {
      "epoch": 0.37176947528447046,
      "grad_norm": 0.9020436912692089,
      "learning_rate": 3.7133193634416766e-06,
      "loss": 0.148,
      "step": 4035
    },
    {
      "epoch": 0.37186161146174046,
      "grad_norm": 0.9803335179141888,
      "learning_rate": 3.7126532557302144e-06,
      "loss": 0.1667,
      "step": 4036
    },
    {
      "epoch": 0.37195374763901046,
      "grad_norm": 0.9175684244894553,
      "learning_rate": 3.711987035423767e-06,
      "loss": 0.1566,
      "step": 4037
    },
    {
      "epoch": 0.37204588381628045,
      "grad_norm": 0.9262653613986682,
      "learning_rate": 3.711320702584193e-06,
      "loss": 0.154,
      "step": 4038
    },
    {
      "epoch": 0.37213801999355045,
      "grad_norm": 0.9086129584913225,
      "learning_rate": 3.710654257273361e-06,
      "loss": 0.1507,
      "step": 4039
    },
    {
      "epoch": 0.37223015617082045,
      "grad_norm": 1.0044929480816431,
      "learning_rate": 3.7099876995531515e-06,
      "loss": 0.1568,
      "step": 4040
    },
    {
      "epoch": 0.3723222923480905,
      "grad_norm": 0.9511895454298042,
      "learning_rate": 3.709321029485453e-06,
      "loss": 0.1596,
      "step": 4041
    },
    {
      "epoch": 0.3724144285253605,
      "grad_norm": 0.9457430568964129,
      "learning_rate": 3.708654247132168e-06,
      "loss": 0.1473,
      "step": 4042
    },
    {
      "epoch": 0.3725065647026305,
      "grad_norm": 0.9216559389726534,
      "learning_rate": 3.7079873525552053e-06,
      "loss": 0.1471,
      "step": 4043
    },
    {
      "epoch": 0.3725987008799005,
      "grad_norm": 0.9511420633348692,
      "learning_rate": 3.707320345816487e-06,
      "loss": 0.151,
      "step": 4044
    },
    {
      "epoch": 0.3726908370571705,
      "grad_norm": 0.9025153243738641,
      "learning_rate": 3.7066532269779444e-06,
      "loss": 0.142,
      "step": 4045
    },
    {
      "epoch": 0.3727829732344405,
      "grad_norm": 0.9011326279630237,
      "learning_rate": 3.7059859961015205e-06,
      "loss": 0.1437,
      "step": 4046
    },
    {
      "epoch": 0.3728751094117105,
      "grad_norm": 0.964381694542699,
      "learning_rate": 3.705318653249166e-06,
      "loss": 0.1624,
      "step": 4047
    },
    {
      "epoch": 0.37296724558898053,
      "grad_norm": 0.9491833954146554,
      "learning_rate": 3.704651198482846e-06,
      "loss": 0.151,
      "step": 4048
    },
    {
      "epoch": 0.37305938176625053,
      "grad_norm": 0.9783666245098148,
      "learning_rate": 3.703983631864532e-06,
      "loss": 0.1651,
      "step": 4049
    },
    {
      "epoch": 0.3731515179435205,
      "grad_norm": 0.9566763043838267,
      "learning_rate": 3.703315953456208e-06,
      "loss": 0.1457,
      "step": 4050
    },
    {
      "epoch": 0.3732436541207905,
      "grad_norm": 0.97352017386297,
      "learning_rate": 3.7026481633198687e-06,
      "loss": 0.1631,
      "step": 4051
    },
    {
      "epoch": 0.3733357902980605,
      "grad_norm": 0.9425864394639262,
      "learning_rate": 3.701980261517518e-06,
      "loss": 0.1661,
      "step": 4052
    },
    {
      "epoch": 0.3734279264753305,
      "grad_norm": 0.9059232717573347,
      "learning_rate": 3.70131224811117e-06,
      "loss": 0.1579,
      "step": 4053
    },
    {
      "epoch": 0.37352006265260057,
      "grad_norm": 0.9281068034699287,
      "learning_rate": 3.7006441231628517e-06,
      "loss": 0.1474,
      "step": 4054
    },
    {
      "epoch": 0.37361219882987057,
      "grad_norm": 0.9670493816154251,
      "learning_rate": 3.699975886734596e-06,
      "loss": 0.1638,
      "step": 4055
    },
    {
      "epoch": 0.37370433500714056,
      "grad_norm": 0.8970057217264027,
      "learning_rate": 3.6993075388884507e-06,
      "loss": 0.1477,
      "step": 4056
    },
    {
      "epoch": 0.37379647118441056,
      "grad_norm": 0.9142284746518392,
      "learning_rate": 3.698639079686471e-06,
      "loss": 0.1577,
      "step": 4057
    },
    {
      "epoch": 0.37388860736168056,
      "grad_norm": 0.9045178818037406,
      "learning_rate": 3.6979705091907244e-06,
      "loss": 0.158,
      "step": 4058
    },
    {
      "epoch": 0.37398074353895056,
      "grad_norm": 0.8711812616573087,
      "learning_rate": 3.6973018274632865e-06,
      "loss": 0.1529,
      "step": 4059
    },
    {
      "epoch": 0.37407287971622055,
      "grad_norm": 0.8729978347669862,
      "learning_rate": 3.696633034566245e-06,
      "loss": 0.157,
      "step": 4060
    },
    {
      "epoch": 0.3741650158934906,
      "grad_norm": 0.8990542116703788,
      "learning_rate": 3.6959641305616984e-06,
      "loss": 0.1503,
      "step": 4061
    },
    {
      "epoch": 0.3742571520707606,
      "grad_norm": 0.8904444740765883,
      "learning_rate": 3.695295115511752e-06,
      "loss": 0.1516,
      "step": 4062
    },
    {
      "epoch": 0.3743492882480306,
      "grad_norm": 0.9753397041967755,
      "learning_rate": 3.694625989478527e-06,
      "loss": 0.1631,
      "step": 4063
    },
    {
      "epoch": 0.3744414244253006,
      "grad_norm": 0.9049757714025393,
      "learning_rate": 3.69395675252415e-06,
      "loss": 0.1423,
      "step": 4064
    },
    {
      "epoch": 0.3745335606025706,
      "grad_norm": 0.8526472535969689,
      "learning_rate": 3.6932874047107597e-06,
      "loss": 0.1411,
      "step": 4065
    },
    {
      "epoch": 0.3746256967798406,
      "grad_norm": 0.9655334358530389,
      "learning_rate": 3.6926179461005056e-06,
      "loss": 0.1569,
      "step": 4066
    },
    {
      "epoch": 0.3747178329571106,
      "grad_norm": 0.9250687407747031,
      "learning_rate": 3.691948376755547e-06,
      "loss": 0.1572,
      "step": 4067
    },
    {
      "epoch": 0.37480996913438064,
      "grad_norm": 0.9047372516849579,
      "learning_rate": 3.6912786967380528e-06,
      "loss": 0.1562,
      "step": 4068
    },
    {
      "epoch": 0.37490210531165064,
      "grad_norm": 0.859181189210115,
      "learning_rate": 3.6906089061102043e-06,
      "loss": 0.1413,
      "step": 4069
    },
    {
      "epoch": 0.37499424148892063,
      "grad_norm": 0.9373737963751739,
      "learning_rate": 3.6899390049341893e-06,
      "loss": 0.1587,
      "step": 4070
    },
    {
      "epoch": 0.37508637766619063,
      "grad_norm": 0.9447926915063538,
      "learning_rate": 3.68926899327221e-06,
      "loss": 0.1514,
      "step": 4071
    },
    {
      "epoch": 0.3751785138434606,
      "grad_norm": 0.9193281338428294,
      "learning_rate": 3.6885988711864777e-06,
      "loss": 0.1583,
      "step": 4072
    },
    {
      "epoch": 0.3752706500207306,
      "grad_norm": 0.9827375591842401,
      "learning_rate": 3.6879286387392122e-06,
      "loss": 0.1512,
      "step": 4073
    },
    {
      "epoch": 0.3753627861980006,
      "grad_norm": 0.8550091267938758,
      "learning_rate": 3.687258295992644e-06,
      "loss": 0.1334,
      "step": 4074
    },
    {
      "epoch": 0.37545492237527067,
      "grad_norm": 0.9004154950704388,
      "learning_rate": 3.686587843009016e-06,
      "loss": 0.1479,
      "step": 4075
    },
    {
      "epoch": 0.37554705855254067,
      "grad_norm": 0.9743962905621874,
      "learning_rate": 3.685917279850578e-06,
      "loss": 0.1606,
      "step": 4076
    },
    {
      "epoch": 0.37563919472981067,
      "grad_norm": 0.9150974523990754,
      "learning_rate": 3.685246606579594e-06,
      "loss": 0.1479,
      "step": 4077
    },
    {
      "epoch": 0.37573133090708066,
      "grad_norm": 0.8927057804303226,
      "learning_rate": 3.684575823258334e-06,
      "loss": 0.1553,
      "step": 4078
    },
    {
      "epoch": 0.37582346708435066,
      "grad_norm": 0.8912269318519523,
      "learning_rate": 3.683904929949082e-06,
      "loss": 0.1606,
      "step": 4079
    },
    {
      "epoch": 0.37591560326162066,
      "grad_norm": 0.8714988941859493,
      "learning_rate": 3.68323392671413e-06,
      "loss": 0.1512,
      "step": 4080
    },
    {
      "epoch": 0.37600773943889065,
      "grad_norm": 0.9066092812200909,
      "learning_rate": 3.6825628136157805e-06,
      "loss": 0.1645,
      "step": 4081
    },
    {
      "epoch": 0.3760998756161607,
      "grad_norm": 0.9353430946436311,
      "learning_rate": 3.6818915907163456e-06,
      "loss": 0.1546,
      "step": 4082
    },
    {
      "epoch": 0.3761920117934307,
      "grad_norm": 0.824207106952847,
      "learning_rate": 3.6812202580781507e-06,
      "loss": 0.1357,
      "step": 4083
    },
    {
      "epoch": 0.3762841479707007,
      "grad_norm": 0.9127969613382955,
      "learning_rate": 3.680548815763527e-06,
      "loss": 0.1486,
      "step": 4084
    },
    {
      "epoch": 0.3763762841479707,
      "grad_norm": 0.9223650712476852,
      "learning_rate": 3.6798772638348186e-06,
      "loss": 0.1452,
      "step": 4085
    },
    {
      "epoch": 0.3764684203252407,
      "grad_norm": 0.9504996208651127,
      "learning_rate": 3.679205602354379e-06,
      "loss": 0.1593,
      "step": 4086
    },
    {
      "epoch": 0.3765605565025107,
      "grad_norm": 0.9697927009717019,
      "learning_rate": 3.6785338313845725e-06,
      "loss": 0.17,
      "step": 4087
    },
    {
      "epoch": 0.37665269267978074,
      "grad_norm": 0.9399208083149008,
      "learning_rate": 3.677861950987773e-06,
      "loss": 0.1476,
      "step": 4088
    },
    {
      "epoch": 0.37674482885705074,
      "grad_norm": 0.8869065172245069,
      "learning_rate": 3.677189961226365e-06,
      "loss": 0.1477,
      "step": 4089
    },
    {
      "epoch": 0.37683696503432074,
      "grad_norm": 0.9534644978626714,
      "learning_rate": 3.6765178621627418e-06,
      "loss": 0.1616,
      "step": 4090
    },
    {
      "epoch": 0.37692910121159073,
      "grad_norm": 0.85557009388475,
      "learning_rate": 3.675845653859309e-06,
      "loss": 0.1403,
      "step": 4091
    },
    {
      "epoch": 0.37702123738886073,
      "grad_norm": 0.9242645580752875,
      "learning_rate": 3.6751733363784804e-06,
      "loss": 0.1523,
      "step": 4092
    },
    {
      "epoch": 0.3771133735661307,
      "grad_norm": 0.8425672883949519,
      "learning_rate": 3.6745009097826813e-06,
      "loss": 0.1412,
      "step": 4093
    },
    {
      "epoch": 0.3772055097434007,
      "grad_norm": 0.8877254768400884,
      "learning_rate": 3.6738283741343463e-06,
      "loss": 0.1518,
      "step": 4094
    },
    {
      "epoch": 0.3772976459206708,
      "grad_norm": 0.9544665857522291,
      "learning_rate": 3.6731557294959196e-06,
      "loss": 0.156,
      "step": 4095
    },
    {
      "epoch": 0.3773897820979408,
      "grad_norm": 0.8985137299961812,
      "learning_rate": 3.6724829759298585e-06,
      "loss": 0.1567,
      "step": 4096
    },
    {
      "epoch": 0.37748191827521077,
      "grad_norm": 0.9207976569709534,
      "learning_rate": 3.671810113498626e-06,
      "loss": 0.1514,
      "step": 4097
    },
    {
      "epoch": 0.37757405445248077,
      "grad_norm": 0.9250476073445184,
      "learning_rate": 3.6711371422646984e-06,
      "loss": 0.1529,
      "step": 4098
    },
    {
      "epoch": 0.37766619062975076,
      "grad_norm": 0.9270432357066251,
      "learning_rate": 3.6704640622905617e-06,
      "loss": 0.1632,
      "step": 4099
    },
    {
      "epoch": 0.37775832680702076,
      "grad_norm": 0.9510100858087815,
      "learning_rate": 3.6697908736387105e-06,
      "loss": 0.1664,
      "step": 4100
    },
    {
      "epoch": 0.37785046298429076,
      "grad_norm": 0.8715401290229666,
      "learning_rate": 3.669117576371651e-06,
      "loss": 0.1355,
      "step": 4101
    },
    {
      "epoch": 0.3779425991615608,
      "grad_norm": 0.9679288812456934,
      "learning_rate": 3.668444170551898e-06,
      "loss": 0.1607,
      "step": 4102
    },
    {
      "epoch": 0.3780347353388308,
      "grad_norm": 0.9808453390014398,
      "learning_rate": 3.6677706562419784e-06,
      "loss": 0.1641,
      "step": 4103
    },
    {
      "epoch": 0.3781268715161008,
      "grad_norm": 0.9312962652606193,
      "learning_rate": 3.667097033504428e-06,
      "loss": 0.1692,
      "step": 4104
    },
    {
      "epoch": 0.3782190076933708,
      "grad_norm": 0.9276051374101398,
      "learning_rate": 3.666423302401792e-06,
      "loss": 0.1667,
      "step": 4105
    },
    {
      "epoch": 0.3783111438706408,
      "grad_norm": 0.8760055981989053,
      "learning_rate": 3.6657494629966274e-06,
      "loss": 0.144,
      "step": 4106
    },
    {
      "epoch": 0.3784032800479108,
      "grad_norm": 0.8706088602855692,
      "learning_rate": 3.6650755153514993e-06,
      "loss": 0.1451,
      "step": 4107
    },
    {
      "epoch": 0.3784954162251808,
      "grad_norm": 0.9489960109369865,
      "learning_rate": 3.664401459528984e-06,
      "loss": 0.1522,
      "step": 4108
    },
    {
      "epoch": 0.37858755240245084,
      "grad_norm": 0.9511363844631635,
      "learning_rate": 3.663727295591668e-06,
      "loss": 0.1603,
      "step": 4109
    },
    {
      "epoch": 0.37867968857972084,
      "grad_norm": 0.984138360362065,
      "learning_rate": 3.6630530236021478e-06,
      "loss": 0.1527,
      "step": 4110
    },
    {
      "epoch": 0.37877182475699084,
      "grad_norm": 0.9177359028717674,
      "learning_rate": 3.6623786436230287e-06,
      "loss": 0.1487,
      "step": 4111
    },
    {
      "epoch": 0.37886396093426083,
      "grad_norm": 0.9387063111522116,
      "learning_rate": 3.6617041557169282e-06,
      "loss": 0.1429,
      "step": 4112
    },
    {
      "epoch": 0.37895609711153083,
      "grad_norm": 0.9434138295220491,
      "learning_rate": 3.6610295599464707e-06,
      "loss": 0.1474,
      "step": 4113
    },
    {
      "epoch": 0.37904823328880083,
      "grad_norm": 1.0168889669868741,
      "learning_rate": 3.660354856374294e-06,
      "loss": 0.1462,
      "step": 4114
    },
    {
      "epoch": 0.3791403694660708,
      "grad_norm": 0.9435621753939302,
      "learning_rate": 3.6596800450630445e-06,
      "loss": 0.1488,
      "step": 4115
    },
    {
      "epoch": 0.3792325056433409,
      "grad_norm": 1.0039934347610908,
      "learning_rate": 3.659005126075377e-06,
      "loss": 0.1606,
      "step": 4116
    },
    {
      "epoch": 0.3793246418206109,
      "grad_norm": 0.9742949418469122,
      "learning_rate": 3.65833009947396e-06,
      "loss": 0.1618,
      "step": 4117
    },
    {
      "epoch": 0.37941677799788087,
      "grad_norm": 0.97062878459915,
      "learning_rate": 3.657654965321468e-06,
      "loss": 0.151,
      "step": 4118
    },
    {
      "epoch": 0.37950891417515087,
      "grad_norm": 0.9727659249762055,
      "learning_rate": 3.6569797236805877e-06,
      "loss": 0.1531,
      "step": 4119
    },
    {
      "epoch": 0.37960105035242087,
      "grad_norm": 0.9530211060763032,
      "learning_rate": 3.656304374614016e-06,
      "loss": 0.1608,
      "step": 4120
    },
    {
      "epoch": 0.37969318652969086,
      "grad_norm": 0.9892002262495758,
      "learning_rate": 3.6556289181844582e-06,
      "loss": 0.1614,
      "step": 4121
    },
    {
      "epoch": 0.3797853227069609,
      "grad_norm": 0.9853737375495374,
      "learning_rate": 3.654953354454631e-06,
      "loss": 0.1658,
      "step": 4122
    },
    {
      "epoch": 0.3798774588842309,
      "grad_norm": 0.8190640589995611,
      "learning_rate": 3.654277683487261e-06,
      "loss": 0.1318,
      "step": 4123
    },
    {
      "epoch": 0.3799695950615009,
      "grad_norm": 0.9400423177734195,
      "learning_rate": 3.6536019053450834e-06,
      "loss": 0.161,
      "step": 4124
    },
    {
      "epoch": 0.3800617312387709,
      "grad_norm": 0.9517573566439758,
      "learning_rate": 3.652926020090845e-06,
      "loss": 0.1518,
      "step": 4125
    },
    {
      "epoch": 0.3801538674160409,
      "grad_norm": 0.926377462587662,
      "learning_rate": 3.6522500277873017e-06,
      "loss": 0.1413,
      "step": 4126
    },
    {
      "epoch": 0.3802460035933109,
      "grad_norm": 0.8943962669159516,
      "learning_rate": 3.651573928497219e-06,
      "loss": 0.1545,
      "step": 4127
    },
    {
      "epoch": 0.3803381397705809,
      "grad_norm": 0.9300678578368041,
      "learning_rate": 3.6508977222833737e-06,
      "loss": 0.1546,
      "step": 4128
    },
    {
      "epoch": 0.38043027594785095,
      "grad_norm": 0.9643638642908029,
      "learning_rate": 3.6502214092085504e-06,
      "loss": 0.1529,
      "step": 4129
    },
    {
      "epoch": 0.38052241212512095,
      "grad_norm": 0.9714822619040409,
      "learning_rate": 3.649544989335545e-06,
      "loss": 0.1542,
      "step": 4130
    },
    {
      "epoch": 0.38061454830239094,
      "grad_norm": 0.9773020057604235,
      "learning_rate": 3.648868462727165e-06,
      "loss": 0.1575,
      "step": 4131
    },
    {
      "epoch": 0.38070668447966094,
      "grad_norm": 0.9255103386023259,
      "learning_rate": 3.6481918294462237e-06,
      "loss": 0.1533,
      "step": 4132
    },
    {
      "epoch": 0.38079882065693094,
      "grad_norm": 0.9723738172144644,
      "learning_rate": 3.647515089555548e-06,
      "loss": 0.1587,
      "step": 4133
    },
    {
      "epoch": 0.38089095683420093,
      "grad_norm": 0.9349434163742851,
      "learning_rate": 3.6468382431179717e-06,
      "loss": 0.1435,
      "step": 4134
    },
    {
      "epoch": 0.38098309301147093,
      "grad_norm": 0.9636701484737411,
      "learning_rate": 3.646161290196342e-06,
      "loss": 0.1529,
      "step": 4135
    },
    {
      "epoch": 0.381075229188741,
      "grad_norm": 0.9754765196777119,
      "learning_rate": 3.645484230853513e-06,
      "loss": 0.1707,
      "step": 4136
    },
    {
      "epoch": 0.381167365366011,
      "grad_norm": 0.9895673826813174,
      "learning_rate": 3.64480706515235e-06,
      "loss": 0.1673,
      "step": 4137
    },
    {
      "epoch": 0.381259501543281,
      "grad_norm": 0.9674314673920184,
      "learning_rate": 3.6441297931557274e-06,
      "loss": 0.1552,
      "step": 4138
    },
    {
      "epoch": 0.381351637720551,
      "grad_norm": 0.9225542238948127,
      "learning_rate": 3.643452414926531e-06,
      "loss": 0.1594,
      "step": 4139
    },
    {
      "epoch": 0.38144377389782097,
      "grad_norm": 0.8649506707868612,
      "learning_rate": 3.6427749305276537e-06,
      "loss": 0.1415,
      "step": 4140
    },
    {
      "epoch": 0.38153591007509097,
      "grad_norm": 0.8950199000553921,
      "learning_rate": 3.6420973400220016e-06,
      "loss": 0.16,
      "step": 4141
    },
    {
      "epoch": 0.38162804625236096,
      "grad_norm": 0.9225797065582534,
      "learning_rate": 3.641419643472489e-06,
      "loss": 0.1576,
      "step": 4142
    },
    {
      "epoch": 0.381720182429631,
      "grad_norm": 0.9392880937976605,
      "learning_rate": 3.640741840942039e-06,
      "loss": 0.1504,
      "step": 4143
    },
    {
      "epoch": 0.381812318606901,
      "grad_norm": 0.8928198640604731,
      "learning_rate": 3.640063932493588e-06,
      "loss": 0.1525,
      "step": 4144
    },
    {
      "epoch": 0.381904454784171,
      "grad_norm": 0.899053773871494,
      "learning_rate": 3.639385918190076e-06,
      "loss": 0.1527,
      "step": 4145
    },
    {
      "epoch": 0.381996590961441,
      "grad_norm": 0.9202410829435287,
      "learning_rate": 3.6387077980944595e-06,
      "loss": 0.1621,
      "step": 4146
    },
    {
      "epoch": 0.382088727138711,
      "grad_norm": 1.0028767320926162,
      "learning_rate": 3.6380295722697023e-06,
      "loss": 0.1505,
      "step": 4147
    },
    {
      "epoch": 0.382180863315981,
      "grad_norm": 0.8458601915921796,
      "learning_rate": 3.637351240778776e-06,
      "loss": 0.1354,
      "step": 4148
    },
    {
      "epoch": 0.382272999493251,
      "grad_norm": 0.8827937855379273,
      "learning_rate": 3.6366728036846647e-06,
      "loss": 0.1546,
      "step": 4149
    },
    {
      "epoch": 0.38236513567052105,
      "grad_norm": 0.8949424057254047,
      "learning_rate": 3.635994261050362e-06,
      "loss": 0.1544,
      "step": 4150
    },
    {
      "epoch": 0.38245727184779105,
      "grad_norm": 0.9282648917790803,
      "learning_rate": 3.6353156129388683e-06,
      "loss": 0.1522,
      "step": 4151
    },
    {
      "epoch": 0.38254940802506104,
      "grad_norm": 0.8870218985004921,
      "learning_rate": 3.634636859413199e-06,
      "loss": 0.1393,
      "step": 4152
    },
    {
      "epoch": 0.38264154420233104,
      "grad_norm": 0.9425406478284127,
      "learning_rate": 3.633958000536375e-06,
      "loss": 0.1567,
      "step": 4153
    },
    {
      "epoch": 0.38273368037960104,
      "grad_norm": 0.9264991414341093,
      "learning_rate": 3.633279036371429e-06,
      "loss": 0.1542,
      "step": 4154
    },
    {
      "epoch": 0.38282581655687103,
      "grad_norm": 0.9064377119313413,
      "learning_rate": 3.6325999669814014e-06,
      "loss": 0.1477,
      "step": 4155
    },
    {
      "epoch": 0.3829179527341411,
      "grad_norm": 0.9228052083944193,
      "learning_rate": 3.631920792429346e-06,
      "loss": 0.1534,
      "step": 4156
    },
    {
      "epoch": 0.3830100889114111,
      "grad_norm": 0.8711501223021223,
      "learning_rate": 3.6312415127783228e-06,
      "loss": 0.1436,
      "step": 4157
    },
    {
      "epoch": 0.3831022250886811,
      "grad_norm": 0.9402770645056095,
      "learning_rate": 3.630562128091403e-06,
      "loss": 0.1546,
      "step": 4158
    },
    {
      "epoch": 0.3831943612659511,
      "grad_norm": 0.9619983038446867,
      "learning_rate": 3.6298826384316684e-06,
      "loss": 0.1514,
      "step": 4159
    },
    {
      "epoch": 0.3832864974432211,
      "grad_norm": 0.8653963516789092,
      "learning_rate": 3.6292030438622093e-06,
      "loss": 0.1424,
      "step": 4160
    },
    {
      "epoch": 0.38337863362049107,
      "grad_norm": 0.938124421714208,
      "learning_rate": 3.6285233444461255e-06,
      "loss": 0.1644,
      "step": 4161
    },
    {
      "epoch": 0.38347076979776107,
      "grad_norm": 0.8478622536698076,
      "learning_rate": 3.6278435402465283e-06,
      "loss": 0.1357,
      "step": 4162
    },
    {
      "epoch": 0.3835629059750311,
      "grad_norm": 0.9507324930194543,
      "learning_rate": 3.6271636313265368e-06,
      "loss": 0.1697,
      "step": 4163
    },
    {
      "epoch": 0.3836550421523011,
      "grad_norm": 0.9054939912598036,
      "learning_rate": 3.6264836177492812e-06,
      "loss": 0.1469,
      "step": 4164
    },
    {
      "epoch": 0.3837471783295711,
      "grad_norm": 0.8788456687040019,
      "learning_rate": 3.6258034995778994e-06,
      "loss": 0.1342,
      "step": 4165
    },
    {
      "epoch": 0.3838393145068411,
      "grad_norm": 0.9015870187596965,
      "learning_rate": 3.6251232768755428e-06,
      "loss": 0.154,
      "step": 4166
    },
    {
      "epoch": 0.3839314506841111,
      "grad_norm": 0.8909983172995234,
      "learning_rate": 3.6244429497053678e-06,
      "loss": 0.1507,
      "step": 4167
    },
    {
      "epoch": 0.3840235868613811,
      "grad_norm": 0.8913316867597605,
      "learning_rate": 3.623762518130545e-06,
      "loss": 0.1474,
      "step": 4168
    },
    {
      "epoch": 0.3841157230386511,
      "grad_norm": 0.9361153707666292,
      "learning_rate": 3.6230819822142504e-06,
      "loss": 0.1416,
      "step": 4169
    },
    {
      "epoch": 0.38420785921592115,
      "grad_norm": 0.9455562022520799,
      "learning_rate": 3.6224013420196734e-06,
      "loss": 0.1586,
      "step": 4170
    },
    {
      "epoch": 0.38429999539319115,
      "grad_norm": 0.8593562197121624,
      "learning_rate": 3.621720597610011e-06,
      "loss": 0.1431,
      "step": 4171
    },
    {
      "epoch": 0.38439213157046115,
      "grad_norm": 0.8832913395812277,
      "learning_rate": 3.62103974904847e-06,
      "loss": 0.1383,
      "step": 4172
    },
    {
      "epoch": 0.38448426774773115,
      "grad_norm": 0.8483230097860568,
      "learning_rate": 3.620358796398268e-06,
      "loss": 0.1438,
      "step": 4173
    },
    {
      "epoch": 0.38457640392500114,
      "grad_norm": 0.9683562509779495,
      "learning_rate": 3.6196777397226314e-06,
      "loss": 0.1439,
      "step": 4174
    },
    {
      "epoch": 0.38466854010227114,
      "grad_norm": 0.9822738998325394,
      "learning_rate": 3.618996579084796e-06,
      "loss": 0.1706,
      "step": 4175
    },
    {
      "epoch": 0.38476067627954114,
      "grad_norm": 0.9318590340850513,
      "learning_rate": 3.6183153145480075e-06,
      "loss": 0.1502,
      "step": 4176
    },
    {
      "epoch": 0.3848528124568112,
      "grad_norm": 0.91227748896253,
      "learning_rate": 3.6176339461755217e-06,
      "loss": 0.1544,
      "step": 4177
    },
    {
      "epoch": 0.3849449486340812,
      "grad_norm": 0.849625877446819,
      "learning_rate": 3.6169524740306038e-06,
      "loss": 0.1269,
      "step": 4178
    },
    {
      "epoch": 0.3850370848113512,
      "grad_norm": 0.8533351529502364,
      "learning_rate": 3.6162708981765294e-06,
      "loss": 0.1392,
      "step": 4179
    },
    {
      "epoch": 0.3851292209886212,
      "grad_norm": 0.9224771053382539,
      "learning_rate": 3.6155892186765805e-06,
      "loss": 0.1477,
      "step": 4180
    },
    {
      "epoch": 0.3852213571658912,
      "grad_norm": 0.8713099758237273,
      "learning_rate": 3.6149074355940533e-06,
      "loss": 0.1398,
      "step": 4181
    },
    {
      "epoch": 0.3853134933431612,
      "grad_norm": 0.9272503453478166,
      "learning_rate": 3.614225548992251e-06,
      "loss": 0.1531,
      "step": 4182
    },
    {
      "epoch": 0.38540562952043117,
      "grad_norm": 0.9314307699112695,
      "learning_rate": 3.6135435589344857e-06,
      "loss": 0.1451,
      "step": 4183
    },
    {
      "epoch": 0.3854977656977012,
      "grad_norm": 0.9222458730386164,
      "learning_rate": 3.612861465484082e-06,
      "loss": 0.1572,
      "step": 4184
    },
    {
      "epoch": 0.3855899018749712,
      "grad_norm": 0.9132259187920421,
      "learning_rate": 3.612179268704371e-06,
      "loss": 0.1673,
      "step": 4185
    },
    {
      "epoch": 0.3856820380522412,
      "grad_norm": 0.8399624159184574,
      "learning_rate": 3.611496968658695e-06,
      "loss": 0.1397,
      "step": 4186
    },
    {
      "epoch": 0.3857741742295112,
      "grad_norm": 0.8945593972634557,
      "learning_rate": 3.6108145654104065e-06,
      "loss": 0.1539,
      "step": 4187
    },
    {
      "epoch": 0.3858663104067812,
      "grad_norm": 0.8969467662175643,
      "learning_rate": 3.610132059022865e-06,
      "loss": 0.151,
      "step": 4188
    },
    {
      "epoch": 0.3859584465840512,
      "grad_norm": 0.8663822959838602,
      "learning_rate": 3.6094494495594435e-06,
      "loss": 0.1457,
      "step": 4189
    },
    {
      "epoch": 0.38605058276132126,
      "grad_norm": 0.9056864522351286,
      "learning_rate": 3.6087667370835213e-06,
      "loss": 0.1533,
      "step": 4190
    },
    {
      "epoch": 0.38614271893859126,
      "grad_norm": 0.9201003973845128,
      "learning_rate": 3.6080839216584875e-06,
      "loss": 0.1463,
      "step": 4191
    },
    {
      "epoch": 0.38623485511586125,
      "grad_norm": 0.9124350794797036,
      "learning_rate": 3.6074010033477425e-06,
      "loss": 0.1473,
      "step": 4192
    },
    {
      "epoch": 0.38632699129313125,
      "grad_norm": 0.9708223579259208,
      "learning_rate": 3.606717982214695e-06,
      "loss": 0.1629,
      "step": 4193
    },
    {
      "epoch": 0.38641912747040125,
      "grad_norm": 0.869245957927959,
      "learning_rate": 3.6060348583227635e-06,
      "loss": 0.1282,
      "step": 4194
    },
    {
      "epoch": 0.38651126364767124,
      "grad_norm": 0.9028555853054538,
      "learning_rate": 3.6053516317353777e-06,
      "loss": 0.1499,
      "step": 4195
    },
    {
      "epoch": 0.38660339982494124,
      "grad_norm": 0.8716710099401325,
      "learning_rate": 3.6046683025159722e-06,
      "loss": 0.1315,
      "step": 4196
    },
    {
      "epoch": 0.3866955360022113,
      "grad_norm": 0.995012950602247,
      "learning_rate": 3.6039848707279965e-06,
      "loss": 0.1675,
      "step": 4197
    },
    {
      "epoch": 0.3867876721794813,
      "grad_norm": 0.8635770358105515,
      "learning_rate": 3.6033013364349074e-06,
      "loss": 0.1433,
      "step": 4198
    },
    {
      "epoch": 0.3868798083567513,
      "grad_norm": 0.9387478709393704,
      "learning_rate": 3.60261769970017e-06,
      "loss": 0.1602,
      "step": 4199
    },
    {
      "epoch": 0.3869719445340213,
      "grad_norm": 0.9106186310143041,
      "learning_rate": 3.6019339605872604e-06,
      "loss": 0.1423,
      "step": 4200
    },
    {
      "epoch": 0.3870640807112913,
      "grad_norm": 0.829738795239621,
      "learning_rate": 3.6012501191596637e-06,
      "loss": 0.129,
      "step": 4201
    },
    {
      "epoch": 0.3871562168885613,
      "grad_norm": 0.8274379621149167,
      "learning_rate": 3.6005661754808755e-06,
      "loss": 0.1306,
      "step": 4202
    },
    {
      "epoch": 0.3872483530658313,
      "grad_norm": 1.0033242130316309,
      "learning_rate": 3.5998821296143995e-06,
      "loss": 0.1647,
      "step": 4203
    },
    {
      "epoch": 0.3873404892431013,
      "grad_norm": 0.8796576048924489,
      "learning_rate": 3.5991979816237495e-06,
      "loss": 0.1398,
      "step": 4204
    },
    {
      "epoch": 0.3874326254203713,
      "grad_norm": 0.9147076443755038,
      "learning_rate": 3.5985137315724476e-06,
      "loss": 0.1467,
      "step": 4205
    },
    {
      "epoch": 0.3875247615976413,
      "grad_norm": 0.9113754827450924,
      "learning_rate": 3.597829379524029e-06,
      "loss": 0.1527,
      "step": 4206
    },
    {
      "epoch": 0.3876168977749113,
      "grad_norm": 0.8575385170167867,
      "learning_rate": 3.5971449255420334e-06,
      "loss": 0.1429,
      "step": 4207
    },
    {
      "epoch": 0.3877090339521813,
      "grad_norm": 0.971025350124052,
      "learning_rate": 3.5964603696900137e-06,
      "loss": 0.1767,
      "step": 4208
    },
    {
      "epoch": 0.3878011701294513,
      "grad_norm": 0.8814985588682502,
      "learning_rate": 3.59577571203153e-06,
      "loss": 0.1477,
      "step": 4209
    },
    {
      "epoch": 0.3878933063067213,
      "grad_norm": 0.885856410302397,
      "learning_rate": 3.5950909526301543e-06,
      "loss": 0.1435,
      "step": 4210
    },
    {
      "epoch": 0.38798544248399136,
      "grad_norm": 0.8978747474508465,
      "learning_rate": 3.5944060915494656e-06,
      "loss": 0.1496,
      "step": 4211
    },
    {
      "epoch": 0.38807757866126136,
      "grad_norm": 0.8340715669280159,
      "learning_rate": 3.5937211288530536e-06,
      "loss": 0.1414,
      "step": 4212
    },
    {
      "epoch": 0.38816971483853135,
      "grad_norm": 0.9322207725295413,
      "learning_rate": 3.5930360646045165e-06,
      "loss": 0.144,
      "step": 4213
    },
    {
      "epoch": 0.38826185101580135,
      "grad_norm": 0.9764968494745287,
      "learning_rate": 3.5923508988674643e-06,
      "loss": 0.1531,
      "step": 4214
    },
    {
      "epoch": 0.38835398719307135,
      "grad_norm": 0.929979392627363,
      "learning_rate": 3.591665631705512e-06,
      "loss": 0.1583,
      "step": 4215
    },
    {
      "epoch": 0.38844612337034135,
      "grad_norm": 0.8855391143671449,
      "learning_rate": 3.59098026318229e-06,
      "loss": 0.1392,
      "step": 4216
    },
    {
      "epoch": 0.38853825954761134,
      "grad_norm": 0.8787069684232223,
      "learning_rate": 3.5902947933614317e-06,
      "loss": 0.1394,
      "step": 4217
    },
    {
      "epoch": 0.3886303957248814,
      "grad_norm": 0.9634827487742722,
      "learning_rate": 3.5896092223065854e-06,
      "loss": 0.1541,
      "step": 4218
    },
    {
      "epoch": 0.3887225319021514,
      "grad_norm": 0.9325862623957322,
      "learning_rate": 3.5889235500814055e-06,
      "loss": 0.1542,
      "step": 4219
    },
    {
      "epoch": 0.3888146680794214,
      "grad_norm": 0.9023366000754964,
      "learning_rate": 3.588237776749557e-06,
      "loss": 0.1501,
      "step": 4220
    },
    {
      "epoch": 0.3889068042566914,
      "grad_norm": 0.8986206366257142,
      "learning_rate": 3.5875519023747125e-06,
      "loss": 0.1453,
      "step": 4221
    },
    {
      "epoch": 0.3889989404339614,
      "grad_norm": 0.917397921943575,
      "learning_rate": 3.5868659270205584e-06,
      "loss": 0.1493,
      "step": 4222
    },
    {
      "epoch": 0.3890910766112314,
      "grad_norm": 0.9143535754483454,
      "learning_rate": 3.586179850750785e-06,
      "loss": 0.1544,
      "step": 4223
    },
    {
      "epoch": 0.38918321278850143,
      "grad_norm": 0.9044475607995964,
      "learning_rate": 3.5854936736290956e-06,
      "loss": 0.1495,
      "step": 4224
    },
    {
      "epoch": 0.38927534896577143,
      "grad_norm": 0.8377084041288603,
      "learning_rate": 3.584807395719202e-06,
      "loss": 0.1316,
      "step": 4225
    },
    {
      "epoch": 0.3893674851430414,
      "grad_norm": 0.8703025793104591,
      "learning_rate": 3.584121017084825e-06,
      "loss": 0.162,
      "step": 4226
    },
    {
      "epoch": 0.3894596213203114,
      "grad_norm": 0.8891808936863865,
      "learning_rate": 3.5834345377896953e-06,
      "loss": 0.1534,
      "step": 4227
    },
    {
      "epoch": 0.3895517574975814,
      "grad_norm": 0.8490427643718715,
      "learning_rate": 3.5827479578975523e-06,
      "loss": 0.1485,
      "step": 4228
    },
    {
      "epoch": 0.3896438936748514,
      "grad_norm": 0.9253620813691368,
      "learning_rate": 3.582061277472144e-06,
      "loss": 0.142,
      "step": 4229
    },
    {
      "epoch": 0.3897360298521214,
      "grad_norm": 0.9100820857060963,
      "learning_rate": 3.5813744965772296e-06,
      "loss": 0.1496,
      "step": 4230
    },
    {
      "epoch": 0.38982816602939147,
      "grad_norm": 0.8307256013094806,
      "learning_rate": 3.580687615276577e-06,
      "loss": 0.1349,
      "step": 4231
    },
    {
      "epoch": 0.38992030220666146,
      "grad_norm": 0.9088847447095758,
      "learning_rate": 3.580000633633963e-06,
      "loss": 0.1495,
      "step": 4232
    },
    {
      "epoch": 0.39001243838393146,
      "grad_norm": 1.1234634530976173,
      "learning_rate": 3.579313551713175e-06,
      "loss": 0.1616,
      "step": 4233
    },
    {
      "epoch": 0.39010457456120146,
      "grad_norm": 0.9343623593094748,
      "learning_rate": 3.578626369578006e-06,
      "loss": 0.145,
      "step": 4234
    },
    {
      "epoch": 0.39019671073847145,
      "grad_norm": 0.8849146327309522,
      "learning_rate": 3.5779390872922637e-06,
      "loss": 0.1542,
      "step": 4235
    },
    {
      "epoch": 0.39028884691574145,
      "grad_norm": 0.9700332080648529,
      "learning_rate": 3.5772517049197602e-06,
      "loss": 0.1668,
      "step": 4236
    },
    {
      "epoch": 0.39038098309301145,
      "grad_norm": 0.9073077760008486,
      "learning_rate": 3.5765642225243204e-06,
      "loss": 0.1501,
      "step": 4237
    },
    {
      "epoch": 0.3904731192702815,
      "grad_norm": 0.8560186434769101,
      "learning_rate": 3.575876640169777e-06,
      "loss": 0.1526,
      "step": 4238
    },
    {
      "epoch": 0.3905652554475515,
      "grad_norm": 0.9149195393802323,
      "learning_rate": 3.5751889579199715e-06,
      "loss": 0.1519,
      "step": 4239
    },
    {
      "epoch": 0.3906573916248215,
      "grad_norm": 0.9313907437239937,
      "learning_rate": 3.574501175838755e-06,
      "loss": 0.147,
      "step": 4240
    },
    {
      "epoch": 0.3907495278020915,
      "grad_norm": 0.9774674402082485,
      "learning_rate": 3.5738132939899895e-06,
      "loss": 0.1593,
      "step": 4241
    },
    {
      "epoch": 0.3908416639793615,
      "grad_norm": 0.9249259871090791,
      "learning_rate": 3.573125312437544e-06,
      "loss": 0.1571,
      "step": 4242
    },
    {
      "epoch": 0.3909338001566315,
      "grad_norm": 0.8517631758062792,
      "learning_rate": 3.572437231245297e-06,
      "loss": 0.1469,
      "step": 4243
    },
    {
      "epoch": 0.3910259363339015,
      "grad_norm": 0.9075554502422322,
      "learning_rate": 3.5717490504771386e-06,
      "loss": 0.1549,
      "step": 4244
    },
    {
      "epoch": 0.39111807251117153,
      "grad_norm": 0.8860237957842017,
      "learning_rate": 3.571060770196965e-06,
      "loss": 0.1426,
      "step": 4245
    },
    {
      "epoch": 0.39121020868844153,
      "grad_norm": 0.8585811165821868,
      "learning_rate": 3.570372390468684e-06,
      "loss": 0.1373,
      "step": 4246
    },
    {
      "epoch": 0.3913023448657115,
      "grad_norm": 0.9061281656675514,
      "learning_rate": 3.569683911356211e-06,
      "loss": 0.147,
      "step": 4247
    },
    {
      "epoch": 0.3913944810429815,
      "grad_norm": 0.9101609658494008,
      "learning_rate": 3.568995332923472e-06,
      "loss": 0.1557,
      "step": 4248
    },
    {
      "epoch": 0.3914866172202515,
      "grad_norm": 1.0288124302622148,
      "learning_rate": 3.568306655234401e-06,
      "loss": 0.1653,
      "step": 4249
    },
    {
      "epoch": 0.3915787533975215,
      "grad_norm": 0.9003424685543512,
      "learning_rate": 3.567617878352942e-06,
      "loss": 0.1434,
      "step": 4250
    },
    {
      "epoch": 0.3916708895747915,
      "grad_norm": 0.8803026788568897,
      "learning_rate": 3.566929002343048e-06,
      "loss": 0.145,
      "step": 4251
    },
    {
      "epoch": 0.39176302575206157,
      "grad_norm": 0.9300702343272207,
      "learning_rate": 3.5662400272686813e-06,
      "loss": 0.1505,
      "step": 4252
    },
    {
      "epoch": 0.39185516192933156,
      "grad_norm": 0.9166481799372976,
      "learning_rate": 3.5655509531938143e-06,
      "loss": 0.1491,
      "step": 4253
    },
    {
      "epoch": 0.39194729810660156,
      "grad_norm": 0.8898318583085889,
      "learning_rate": 3.5648617801824257e-06,
      "loss": 0.1474,
      "step": 4254
    },
    {
      "epoch": 0.39203943428387156,
      "grad_norm": 0.8754326393842209,
      "learning_rate": 3.5641725082985066e-06,
      "loss": 0.1385,
      "step": 4255
    },
    {
      "epoch": 0.39213157046114155,
      "grad_norm": 0.8837064377392709,
      "learning_rate": 3.5634831376060554e-06,
      "loss": 0.1483,
      "step": 4256
    },
    {
      "epoch": 0.39222370663841155,
      "grad_norm": 0.82973390186192,
      "learning_rate": 3.5627936681690804e-06,
      "loss": 0.1385,
      "step": 4257
    },
    {
      "epoch": 0.3923158428156816,
      "grad_norm": 0.8980341352139759,
      "learning_rate": 3.562104100051599e-06,
      "loss": 0.1502,
      "step": 4258
    },
    {
      "epoch": 0.3924079789929516,
      "grad_norm": 0.8826296993941916,
      "learning_rate": 3.561414433317637e-06,
      "loss": 0.144,
      "step": 4259
    },
    {
      "epoch": 0.3925001151702216,
      "grad_norm": 0.8825113800199493,
      "learning_rate": 3.560724668031231e-06,
      "loss": 0.1535,
      "step": 4260
    },
    {
      "epoch": 0.3925922513474916,
      "grad_norm": 0.9439974689177584,
      "learning_rate": 3.560034804256426e-06,
      "loss": 0.1575,
      "step": 4261
    },
    {
      "epoch": 0.3926843875247616,
      "grad_norm": 0.8261955307735191,
      "learning_rate": 3.5593448420572753e-06,
      "loss": 0.1352,
      "step": 4262
    },
    {
      "epoch": 0.3927765237020316,
      "grad_norm": 0.9212672496243489,
      "learning_rate": 3.558654781497841e-06,
      "loss": 0.1351,
      "step": 4263
    },
    {
      "epoch": 0.3928686598793016,
      "grad_norm": 0.9109157673678041,
      "learning_rate": 3.557964622642197e-06,
      "loss": 0.1488,
      "step": 4264
    },
    {
      "epoch": 0.39296079605657164,
      "grad_norm": 0.9448156747304166,
      "learning_rate": 3.557274365554424e-06,
      "loss": 0.1651,
      "step": 4265
    },
    {
      "epoch": 0.39305293223384163,
      "grad_norm": 0.8808161080550717,
      "learning_rate": 3.5565840102986128e-06,
      "loss": 0.1377,
      "step": 4266
    },
    {
      "epoch": 0.39314506841111163,
      "grad_norm": 0.8536571695891909,
      "learning_rate": 3.555893556938862e-06,
      "loss": 0.1401,
      "step": 4267
    },
    {
      "epoch": 0.39323720458838163,
      "grad_norm": 0.8420334900764369,
      "learning_rate": 3.5552030055392805e-06,
      "loss": 0.1333,
      "step": 4268
    },
    {
      "epoch": 0.3933293407656516,
      "grad_norm": 0.9069556452168558,
      "learning_rate": 3.554512356163986e-06,
      "loss": 0.152,
      "step": 4269
    },
    {
      "epoch": 0.3934214769429216,
      "grad_norm": 0.8525624922991532,
      "learning_rate": 3.553821608877107e-06,
      "loss": 0.1355,
      "step": 4270
    },
    {
      "epoch": 0.3935136131201916,
      "grad_norm": 0.8460506248401927,
      "learning_rate": 3.5531307637427774e-06,
      "loss": 0.1461,
      "step": 4271
    },
    {
      "epoch": 0.39360574929746167,
      "grad_norm": 0.866204338149706,
      "learning_rate": 3.552439820825143e-06,
      "loss": 0.1475,
      "step": 4272
    },
    {
      "epoch": 0.39369788547473167,
      "grad_norm": 0.9409368306978433,
      "learning_rate": 3.5517487801883587e-06,
      "loss": 0.1617,
      "step": 4273
    },
    {
      "epoch": 0.39379002165200166,
      "grad_norm": 0.8316147873982872,
      "learning_rate": 3.5510576418965862e-06,
      "loss": 0.1383,
      "step": 4274
    },
    {
      "epoch": 0.39388215782927166,
      "grad_norm": 0.8920309395123601,
      "learning_rate": 3.5503664060139987e-06,
      "loss": 0.1514,
      "step": 4275
    },
    {
      "epoch": 0.39397429400654166,
      "grad_norm": 0.8663059385692813,
      "learning_rate": 3.549675072604778e-06,
      "loss": 0.1327,
      "step": 4276
    },
    {
      "epoch": 0.39406643018381166,
      "grad_norm": 0.908796183232886,
      "learning_rate": 3.548983641733113e-06,
      "loss": 0.1525,
      "step": 4277
    },
    {
      "epoch": 0.39415856636108165,
      "grad_norm": 0.9156866300447943,
      "learning_rate": 3.5482921134632043e-06,
      "loss": 0.1509,
      "step": 4278
    },
    {
      "epoch": 0.3942507025383517,
      "grad_norm": 0.885423965354652,
      "learning_rate": 3.54760048785926e-06,
      "loss": 0.1454,
      "step": 4279
    },
    {
      "epoch": 0.3943428387156217,
      "grad_norm": 0.9269220799512766,
      "learning_rate": 3.546908764985498e-06,
      "loss": 0.1604,
      "step": 4280
    },
    {
      "epoch": 0.3944349748928917,
      "grad_norm": 0.968797661863933,
      "learning_rate": 3.5462169449061445e-06,
      "loss": 0.1433,
      "step": 4281
    },
    {
      "epoch": 0.3945271110701617,
      "grad_norm": 0.9613953673270564,
      "learning_rate": 3.5455250276854348e-06,
      "loss": 0.1445,
      "step": 4282
    },
    {
      "epoch": 0.3946192472474317,
      "grad_norm": 0.8605829725128497,
      "learning_rate": 3.544833013387613e-06,
      "loss": 0.1269,
      "step": 4283
    },
    {
      "epoch": 0.3947113834247017,
      "grad_norm": 0.9087732510897352,
      "learning_rate": 3.5441409020769347e-06,
      "loss": 0.1496,
      "step": 4284
    },
    {
      "epoch": 0.39480351960197174,
      "grad_norm": 0.9422059956909788,
      "learning_rate": 3.5434486938176606e-06,
      "loss": 0.1407,
      "step": 4285
    },
    {
      "epoch": 0.39489565577924174,
      "grad_norm": 0.9356627540900108,
      "learning_rate": 3.5427563886740633e-06,
      "loss": 0.1519,
      "step": 4286
    },
    {
      "epoch": 0.39498779195651174,
      "grad_norm": 0.9822939505174967,
      "learning_rate": 3.542063986710423e-06,
      "loss": 0.1582,
      "step": 4287
    },
    {
      "epoch": 0.39507992813378173,
      "grad_norm": 0.9727146799740334,
      "learning_rate": 3.5413714879910287e-06,
      "loss": 0.1435,
      "step": 4288
    },
    {
      "epoch": 0.39517206431105173,
      "grad_norm": 0.9789445530371959,
      "learning_rate": 3.540678892580181e-06,
      "loss": 0.146,
      "step": 4289
    },
    {
      "epoch": 0.3952642004883217,
      "grad_norm": 0.9347182617511673,
      "learning_rate": 3.539986200542185e-06,
      "loss": 0.148,
      "step": 4290
    },
    {
      "epoch": 0.3953563366655917,
      "grad_norm": 0.8953433756495065,
      "learning_rate": 3.539293411941359e-06,
      "loss": 0.1486,
      "step": 4291
    },
    {
      "epoch": 0.3954484728428618,
      "grad_norm": 0.8971317416449771,
      "learning_rate": 3.5386005268420277e-06,
      "loss": 0.1507,
      "step": 4292
    },
    {
      "epoch": 0.3955406090201318,
      "grad_norm": 0.9427770172715076,
      "learning_rate": 3.5379075453085256e-06,
      "loss": 0.1572,
      "step": 4293
    },
    {
      "epoch": 0.39563274519740177,
      "grad_norm": 0.8767799263466138,
      "learning_rate": 3.5372144674051963e-06,
      "loss": 0.1366,
      "step": 4294
    },
    {
      "epoch": 0.39572488137467177,
      "grad_norm": 0.9997094521941999,
      "learning_rate": 3.536521293196392e-06,
      "loss": 0.1654,
      "step": 4295
    },
    {
      "epoch": 0.39581701755194176,
      "grad_norm": 0.9105426819568583,
      "learning_rate": 3.5358280227464735e-06,
      "loss": 0.1624,
      "step": 4296
    },
    {
      "epoch": 0.39590915372921176,
      "grad_norm": 0.8461159177852656,
      "learning_rate": 3.535134656119813e-06,
      "loss": 0.1367,
      "step": 4297
    },
    {
      "epoch": 0.39600128990648176,
      "grad_norm": 0.8914245299115292,
      "learning_rate": 3.534441193380787e-06,
      "loss": 0.142,
      "step": 4298
    },
    {
      "epoch": 0.3960934260837518,
      "grad_norm": 0.894727223944378,
      "learning_rate": 3.5337476345937853e-06,
      "loss": 0.1462,
      "step": 4299
    },
    {
      "epoch": 0.3961855622610218,
      "grad_norm": 0.9194710796710017,
      "learning_rate": 3.5330539798232044e-06,
      "loss": 0.1333,
      "step": 4300
    },
    {
      "epoch": 0.3962776984382918,
      "grad_norm": 0.9067339161473813,
      "learning_rate": 3.5323602291334508e-06,
      "loss": 0.1525,
      "step": 4301
    },
    {
      "epoch": 0.3963698346155618,
      "grad_norm": 0.9207199566595172,
      "learning_rate": 3.5316663825889384e-06,
      "loss": 0.1462,
      "step": 4302
    },
    {
      "epoch": 0.3964619707928318,
      "grad_norm": 0.893790793559201,
      "learning_rate": 3.530972440254092e-06,
      "loss": 0.1465,
      "step": 4303
    },
    {
      "epoch": 0.3965541069701018,
      "grad_norm": 0.9323229957009004,
      "learning_rate": 3.530278402193342e-06,
      "loss": 0.1501,
      "step": 4304
    },
    {
      "epoch": 0.3966462431473718,
      "grad_norm": 0.93288274935138,
      "learning_rate": 3.5295842684711334e-06,
      "loss": 0.1487,
      "step": 4305
    },
    {
      "epoch": 0.39673837932464184,
      "grad_norm": 0.8904180096398162,
      "learning_rate": 3.528890039151913e-06,
      "loss": 0.1374,
      "step": 4306
    },
    {
      "epoch": 0.39683051550191184,
      "grad_norm": 0.9609819910977235,
      "learning_rate": 3.5281957143001426e-06,
      "loss": 0.165,
      "step": 4307
    },
    {
      "epoch": 0.39692265167918184,
      "grad_norm": 0.9827893129703009,
      "learning_rate": 3.5275012939802895e-06,
      "loss": 0.1546,
      "step": 4308
    },
    {
      "epoch": 0.39701478785645183,
      "grad_norm": 0.9057704871681684,
      "learning_rate": 3.5268067782568306e-06,
      "loss": 0.1433,
      "step": 4309
    },
    {
      "epoch": 0.39710692403372183,
      "grad_norm": 0.9032061609195176,
      "learning_rate": 3.5261121671942515e-06,
      "loss": 0.1475,
      "step": 4310
    },
    {
      "epoch": 0.39719906021099183,
      "grad_norm": 0.9355446728513368,
      "learning_rate": 3.525417460857048e-06,
      "loss": 0.1468,
      "step": 4311
    },
    {
      "epoch": 0.3972911963882618,
      "grad_norm": 0.9825199046051186,
      "learning_rate": 3.524722659309722e-06,
      "loss": 0.1642,
      "step": 4312
    },
    {
      "epoch": 0.3973833325655319,
      "grad_norm": 0.915612075494406,
      "learning_rate": 3.5240277626167875e-06,
      "loss": 0.1434,
      "step": 4313
    },
    {
      "epoch": 0.3974754687428019,
      "grad_norm": 0.8754977403678704,
      "learning_rate": 3.5233327708427638e-06,
      "loss": 0.1382,
      "step": 4314
    },
    {
      "epoch": 0.39756760492007187,
      "grad_norm": 0.8479881511273041,
      "learning_rate": 3.522637684052184e-06,
      "loss": 0.1358,
      "step": 4315
    },
    {
      "epoch": 0.39765974109734187,
      "grad_norm": 0.9382658935005335,
      "learning_rate": 3.5219425023095837e-06,
      "loss": 0.1547,
      "step": 4316
    },
    {
      "epoch": 0.39775187727461186,
      "grad_norm": 0.8891053914875621,
      "learning_rate": 3.5212472256795122e-06,
      "loss": 0.151,
      "step": 4317
    },
    {
      "epoch": 0.39784401345188186,
      "grad_norm": 0.8883971554082362,
      "learning_rate": 3.5205518542265265e-06,
      "loss": 0.1536,
      "step": 4318
    },
    {
      "epoch": 0.3979361496291519,
      "grad_norm": 1.0232040373202294,
      "learning_rate": 3.5198563880151913e-06,
      "loss": 0.1609,
      "step": 4319
    },
    {
      "epoch": 0.3980282858064219,
      "grad_norm": 0.9193288491755802,
      "learning_rate": 3.519160827110081e-06,
      "loss": 0.1391,
      "step": 4320
    },
    {
      "epoch": 0.3981204219836919,
      "grad_norm": 0.9096900817977849,
      "learning_rate": 3.5184651715757772e-06,
      "loss": 0.148,
      "step": 4321
    },
    {
      "epoch": 0.3982125581609619,
      "grad_norm": 0.8684172311676668,
      "learning_rate": 3.517769421476873e-06,
      "loss": 0.1324,
      "step": 4322
    },
    {
      "epoch": 0.3983046943382319,
      "grad_norm": 0.924708159192272,
      "learning_rate": 3.5170735768779683e-06,
      "loss": 0.1576,
      "step": 4323
    },
    {
      "epoch": 0.3983968305155019,
      "grad_norm": 0.8836737240145939,
      "learning_rate": 3.5163776378436736e-06,
      "loss": 0.1509,
      "step": 4324
    },
    {
      "epoch": 0.3984889666927719,
      "grad_norm": 0.92612250459839,
      "learning_rate": 3.515681604438605e-06,
      "loss": 0.1584,
      "step": 4325
    },
    {
      "epoch": 0.39858110287004195,
      "grad_norm": 0.9261909259373187,
      "learning_rate": 3.5149854767273904e-06,
      "loss": 0.163,
      "step": 4326
    },
    {
      "epoch": 0.39867323904731194,
      "grad_norm": 0.8606719880644568,
      "learning_rate": 3.5142892547746647e-06,
      "loss": 0.1507,
      "step": 4327
    },
    {
      "epoch": 0.39876537522458194,
      "grad_norm": 0.9505649437739544,
      "learning_rate": 3.513592938645073e-06,
      "loss": 0.1598,
      "step": 4328
    },
    {
      "epoch": 0.39885751140185194,
      "grad_norm": 0.8572409396930928,
      "learning_rate": 3.5128965284032677e-06,
      "loss": 0.1378,
      "step": 4329
    },
    {
      "epoch": 0.39894964757912194,
      "grad_norm": 0.9370424084273961,
      "learning_rate": 3.512200024113911e-06,
      "loss": 0.1586,
      "step": 4330
    },
    {
      "epoch": 0.39904178375639193,
      "grad_norm": 0.9345899795920014,
      "learning_rate": 3.511503425841672e-06,
      "loss": 0.158,
      "step": 4331
    },
    {
      "epoch": 0.39913391993366193,
      "grad_norm": 0.8384552894192423,
      "learning_rate": 3.5108067336512325e-06,
      "loss": 0.1327,
      "step": 4332
    },
    {
      "epoch": 0.399226056110932,
      "grad_norm": 0.9193822845561663,
      "learning_rate": 3.5101099476072776e-06,
      "loss": 0.1561,
      "step": 4333
    },
    {
      "epoch": 0.399318192288202,
      "grad_norm": 0.9288838613780442,
      "learning_rate": 3.5094130677745065e-06,
      "loss": 0.1564,
      "step": 4334
    },
    {
      "epoch": 0.399410328465472,
      "grad_norm": 0.9365564041095639,
      "learning_rate": 3.5087160942176228e-06,
      "loss": 0.1551,
      "step": 4335
    },
    {
      "epoch": 0.399502464642742,
      "grad_norm": 0.9228486860598516,
      "learning_rate": 3.5080190270013415e-06,
      "loss": 0.149,
      "step": 4336
    },
    {
      "epoch": 0.39959460082001197,
      "grad_norm": 0.9585047222693666,
      "learning_rate": 3.5073218661903852e-06,
      "loss": 0.1491,
      "step": 4337
    },
    {
      "epoch": 0.39968673699728197,
      "grad_norm": 0.9608521329567735,
      "learning_rate": 3.5066246118494847e-06,
      "loss": 0.1651,
      "step": 4338
    },
    {
      "epoch": 0.39977887317455196,
      "grad_norm": 0.9241350643720966,
      "learning_rate": 3.5059272640433808e-06,
      "loss": 0.1455,
      "step": 4339
    },
    {
      "epoch": 0.399871009351822,
      "grad_norm": 0.9336561932328415,
      "learning_rate": 3.5052298228368227e-06,
      "loss": 0.1585,
      "step": 4340
    },
    {
      "epoch": 0.399963145529092,
      "grad_norm": 0.9287712735257532,
      "learning_rate": 3.5045322882945666e-06,
      "loss": 0.1511,
      "step": 4341
    },
    {
      "epoch": 0.400055281706362,
      "grad_norm": 0.8132984018015555,
      "learning_rate": 3.5038346604813796e-06,
      "loss": 0.1208,
      "step": 4342
    },
    {
      "epoch": 0.400147417883632,
      "grad_norm": 0.9102128798105348,
      "learning_rate": 3.5031369394620364e-06,
      "loss": 0.1461,
      "step": 4343
    },
    {
      "epoch": 0.400239554060902,
      "grad_norm": 0.8849265666537798,
      "learning_rate": 3.5024391253013206e-06,
      "loss": 0.1492,
      "step": 4344
    },
    {
      "epoch": 0.400331690238172,
      "grad_norm": 0.9810479880919115,
      "learning_rate": 3.5017412180640243e-06,
      "loss": 0.1521,
      "step": 4345
    },
    {
      "epoch": 0.400423826415442,
      "grad_norm": 0.956181986827905,
      "learning_rate": 3.5010432178149473e-06,
      "loss": 0.1609,
      "step": 4346
    },
    {
      "epoch": 0.40051596259271205,
      "grad_norm": 0.9029488074220976,
      "learning_rate": 3.5003451246189003e-06,
      "loss": 0.1482,
      "step": 4347
    },
    {
      "epoch": 0.40060809876998205,
      "grad_norm": 0.8824563585798078,
      "learning_rate": 3.499646938540701e-06,
      "loss": 0.146,
      "step": 4348
    },
    {
      "epoch": 0.40070023494725204,
      "grad_norm": 0.9269831174400178,
      "learning_rate": 3.498948659645176e-06,
      "loss": 0.1484,
      "step": 4349
    },
    {
      "epoch": 0.40079237112452204,
      "grad_norm": 0.9216669167926768,
      "learning_rate": 3.4982502879971596e-06,
      "loss": 0.1429,
      "step": 4350
    },
    {
      "epoch": 0.40088450730179204,
      "grad_norm": 0.9119424481041382,
      "learning_rate": 3.497551823661498e-06,
      "loss": 0.1438,
      "step": 4351
    },
    {
      "epoch": 0.40097664347906203,
      "grad_norm": 0.9521206551490597,
      "learning_rate": 3.4968532667030408e-06,
      "loss": 0.1684,
      "step": 4352
    },
    {
      "epoch": 0.4010687796563321,
      "grad_norm": 0.9169723849076772,
      "learning_rate": 3.496154617186651e-06,
      "loss": 0.1452,
      "step": 4353
    },
    {
      "epoch": 0.4011609158336021,
      "grad_norm": 0.9816498659771387,
      "learning_rate": 3.4954558751771976e-06,
      "loss": 0.1611,
      "step": 4354
    },
    {
      "epoch": 0.4012530520108721,
      "grad_norm": 1.0011798661122229,
      "learning_rate": 3.4947570407395593e-06,
      "loss": 0.1568,
      "step": 4355
    },
    {
      "epoch": 0.4013451881881421,
      "grad_norm": 0.8784800818378974,
      "learning_rate": 3.494058113938623e-06,
      "loss": 0.1454,
      "step": 4356
    },
    {
      "epoch": 0.4014373243654121,
      "grad_norm": 0.9091950278863203,
      "learning_rate": 3.493359094839284e-06,
      "loss": 0.1454,
      "step": 4357
    },
    {
      "epoch": 0.40152946054268207,
      "grad_norm": 1.003839121809296,
      "learning_rate": 3.4926599835064446e-06,
      "loss": 0.1527,
      "step": 4358
    },
    {
      "epoch": 0.40162159671995207,
      "grad_norm": 0.8735840233392277,
      "learning_rate": 3.491960780005021e-06,
      "loss": 0.1488,
      "step": 4359
    },
    {
      "epoch": 0.4017137328972221,
      "grad_norm": 0.9279416109721896,
      "learning_rate": 3.4912614843999304e-06,
      "loss": 0.1519,
      "step": 4360
    },
    {
      "epoch": 0.4018058690744921,
      "grad_norm": 0.9292349005371296,
      "learning_rate": 3.490562096756105e-06,
      "loss": 0.145,
      "step": 4361
    },
    {
      "epoch": 0.4018980052517621,
      "grad_norm": 0.8176160899793758,
      "learning_rate": 3.4898626171384823e-06,
      "loss": 0.1319,
      "step": 4362
    },
    {
      "epoch": 0.4019901414290321,
      "grad_norm": 0.8663661790382975,
      "learning_rate": 3.4891630456120098e-06,
      "loss": 0.1478,
      "step": 4363
    },
    {
      "epoch": 0.4020822776063021,
      "grad_norm": 0.8951011796955761,
      "learning_rate": 3.4884633822416412e-06,
      "loss": 0.1411,
      "step": 4364
    },
    {
      "epoch": 0.4021744137835721,
      "grad_norm": 0.9513840457401349,
      "learning_rate": 3.4877636270923416e-06,
      "loss": 0.1557,
      "step": 4365
    },
    {
      "epoch": 0.4022665499608421,
      "grad_norm": 0.9631571432658622,
      "learning_rate": 3.4870637802290817e-06,
      "loss": 0.1528,
      "step": 4366
    },
    {
      "epoch": 0.40235868613811215,
      "grad_norm": 0.9009246331376555,
      "learning_rate": 3.4863638417168455e-06,
      "loss": 0.1406,
      "step": 4367
    },
    {
      "epoch": 0.40245082231538215,
      "grad_norm": 0.9418153307876809,
      "learning_rate": 3.4856638116206194e-06,
      "loss": 0.1522,
      "step": 4368
    },
    {
      "epoch": 0.40254295849265215,
      "grad_norm": 0.938253466177827,
      "learning_rate": 3.4849636900054023e-06,
      "loss": 0.1414,
      "step": 4369
    },
    {
      "epoch": 0.40263509466992214,
      "grad_norm": 0.949524744273461,
      "learning_rate": 3.484263476936201e-06,
      "loss": 0.1527,
      "step": 4370
    },
    {
      "epoch": 0.40272723084719214,
      "grad_norm": 0.9037729435498303,
      "learning_rate": 3.4835631724780296e-06,
      "loss": 0.1445,
      "step": 4371
    },
    {
      "epoch": 0.40281936702446214,
      "grad_norm": 0.8961162386122397,
      "learning_rate": 3.4828627766959123e-06,
      "loss": 0.1491,
      "step": 4372
    },
    {
      "epoch": 0.40291150320173214,
      "grad_norm": 0.875416342667484,
      "learning_rate": 3.4821622896548795e-06,
      "loss": 0.1483,
      "step": 4373
    },
    {
      "epoch": 0.4030036393790022,
      "grad_norm": 0.8912047991694245,
      "learning_rate": 3.4814617114199722e-06,
      "loss": 0.1514,
      "step": 4374
    },
    {
      "epoch": 0.4030957755562722,
      "grad_norm": 0.9112833813015218,
      "learning_rate": 3.4807610420562406e-06,
      "loss": 0.1599,
      "step": 4375
    },
    {
      "epoch": 0.4031879117335422,
      "grad_norm": 0.8839605396294251,
      "learning_rate": 3.48006028162874e-06,
      "loss": 0.1532,
      "step": 4376
    },
    {
      "epoch": 0.4032800479108122,
      "grad_norm": 0.8948899894612451,
      "learning_rate": 3.4793594302025367e-06,
      "loss": 0.1465,
      "step": 4377
    },
    {
      "epoch": 0.4033721840880822,
      "grad_norm": 0.851447023415438,
      "learning_rate": 3.4786584878427056e-06,
      "loss": 0.134,
      "step": 4378
    },
    {
      "epoch": 0.40346432026535217,
      "grad_norm": 0.9114250139558066,
      "learning_rate": 3.4779574546143276e-06,
      "loss": 0.1539,
      "step": 4379
    },
    {
      "epoch": 0.40355645644262217,
      "grad_norm": 0.9361638989038511,
      "learning_rate": 3.4772563305824956e-06,
      "loss": 0.1612,
      "step": 4380
    },
    {
      "epoch": 0.4036485926198922,
      "grad_norm": 0.8195140088476734,
      "learning_rate": 3.4765551158123074e-06,
      "loss": 0.1333,
      "step": 4381
    },
    {
      "epoch": 0.4037407287971622,
      "grad_norm": 0.9008523426752787,
      "learning_rate": 3.4758538103688723e-06,
      "loss": 0.1525,
      "step": 4382
    },
    {
      "epoch": 0.4038328649744322,
      "grad_norm": 0.957477825346113,
      "learning_rate": 3.4751524143173055e-06,
      "loss": 0.1651,
      "step": 4383
    },
    {
      "epoch": 0.4039250011517022,
      "grad_norm": 0.9227344073131626,
      "learning_rate": 3.4744509277227316e-06,
      "loss": 0.1493,
      "step": 4384
    },
    {
      "epoch": 0.4040171373289722,
      "grad_norm": 0.9151830123149463,
      "learning_rate": 3.473749350650285e-06,
      "loss": 0.1554,
      "step": 4385
    },
    {
      "epoch": 0.4041092735062422,
      "grad_norm": 0.8878099053567197,
      "learning_rate": 3.473047683165106e-06,
      "loss": 0.1552,
      "step": 4386
    },
    {
      "epoch": 0.40420140968351226,
      "grad_norm": 0.9120949620840199,
      "learning_rate": 3.472345925332344e-06,
      "loss": 0.156,
      "step": 4387
    },
    {
      "epoch": 0.40429354586078226,
      "grad_norm": 0.9151073635872946,
      "learning_rate": 3.47164407721716e-06,
      "loss": 0.1601,
      "step": 4388
    },
    {
      "epoch": 0.40438568203805225,
      "grad_norm": 0.9144746721555486,
      "learning_rate": 3.4709421388847177e-06,
      "loss": 0.1442,
      "step": 4389
    },
    {
      "epoch": 0.40447781821532225,
      "grad_norm": 0.9910249337465405,
      "learning_rate": 3.4702401104001937e-06,
      "loss": 0.161,
      "step": 4390
    },
    {
      "epoch": 0.40456995439259225,
      "grad_norm": 0.905580616645535,
      "learning_rate": 3.4695379918287708e-06,
      "loss": 0.147,
      "step": 4391
    },
    {
      "epoch": 0.40466209056986224,
      "grad_norm": 0.9682851249530802,
      "learning_rate": 3.468835783235641e-06,
      "loss": 0.1664,
      "step": 4392
    },
    {
      "epoch": 0.40475422674713224,
      "grad_norm": 1.019950194661881,
      "learning_rate": 3.468133484686005e-06,
      "loss": 0.1581,
      "step": 4393
    },
    {
      "epoch": 0.4048463629244023,
      "grad_norm": 0.9054834581346294,
      "learning_rate": 3.467431096245071e-06,
      "loss": 0.1458,
      "step": 4394
    },
    {
      "epoch": 0.4049384991016723,
      "grad_norm": 0.9394518511371741,
      "learning_rate": 3.466728617978054e-06,
      "loss": 0.1465,
      "step": 4395
    },
    {
      "epoch": 0.4050306352789423,
      "grad_norm": 0.9276116238870109,
      "learning_rate": 3.466026049950182e-06,
      "loss": 0.1533,
      "step": 4396
    },
    {
      "epoch": 0.4051227714562123,
      "grad_norm": 0.8814615109668135,
      "learning_rate": 3.465323392226687e-06,
      "loss": 0.1474,
      "step": 4397
    },
    {
      "epoch": 0.4052149076334823,
      "grad_norm": 0.9276110676799734,
      "learning_rate": 3.4646206448728113e-06,
      "loss": 0.1471,
      "step": 4398
    },
    {
      "epoch": 0.4053070438107523,
      "grad_norm": 0.9740379558096653,
      "learning_rate": 3.463917807953805e-06,
      "loss": 0.1514,
      "step": 4399
    },
    {
      "epoch": 0.4053991799880223,
      "grad_norm": 0.8572241682747315,
      "learning_rate": 3.4632148815349265e-06,
      "loss": 0.1411,
      "step": 4400
    },
    {
      "epoch": 0.4054913161652923,
      "grad_norm": 0.8856261129316616,
      "learning_rate": 3.4625118656814414e-06,
      "loss": 0.1531,
      "step": 4401
    },
    {
      "epoch": 0.4055834523425623,
      "grad_norm": 0.8319574623205851,
      "learning_rate": 3.4618087604586277e-06,
      "loss": 0.1476,
      "step": 4402
    },
    {
      "epoch": 0.4056755885198323,
      "grad_norm": 0.851179009626978,
      "learning_rate": 3.4611055659317663e-06,
      "loss": 0.1403,
      "step": 4403
    },
    {
      "epoch": 0.4057677246971023,
      "grad_norm": 0.7983809793208019,
      "learning_rate": 3.4604022821661493e-06,
      "loss": 0.1217,
      "step": 4404
    },
    {
      "epoch": 0.4058598608743723,
      "grad_norm": 0.8594086174519592,
      "learning_rate": 3.459698909227078e-06,
      "loss": 0.15,
      "step": 4405
    },
    {
      "epoch": 0.4059519970516423,
      "grad_norm": 0.8403974045516667,
      "learning_rate": 3.458995447179858e-06,
      "loss": 0.1442,
      "step": 4406
    },
    {
      "epoch": 0.4060441332289123,
      "grad_norm": 0.9268270027862243,
      "learning_rate": 3.4582918960898094e-06,
      "loss": 0.1556,
      "step": 4407
    },
    {
      "epoch": 0.40613626940618236,
      "grad_norm": 0.8732579314464122,
      "learning_rate": 3.457588256022254e-06,
      "loss": 0.1425,
      "step": 4408
    },
    {
      "epoch": 0.40622840558345236,
      "grad_norm": 0.9393071006353455,
      "learning_rate": 3.4568845270425268e-06,
      "loss": 0.1402,
      "step": 4409
    },
    {
      "epoch": 0.40632054176072235,
      "grad_norm": 0.8936419289776788,
      "learning_rate": 3.456180709215968e-06,
      "loss": 0.1391,
      "step": 4410
    },
    {
      "epoch": 0.40641267793799235,
      "grad_norm": 0.9028553321289632,
      "learning_rate": 3.455476802607927e-06,
      "loss": 0.1455,
      "step": 4411
    },
    {
      "epoch": 0.40650481411526235,
      "grad_norm": 0.9629848751950774,
      "learning_rate": 3.454772807283763e-06,
      "loss": 0.1595,
      "step": 4412
    },
    {
      "epoch": 0.40659695029253234,
      "grad_norm": 0.8973627311061653,
      "learning_rate": 3.45406872330884e-06,
      "loss": 0.1451,
      "step": 4413
    },
    {
      "epoch": 0.40668908646980234,
      "grad_norm": 0.8543835552665785,
      "learning_rate": 3.453364550748533e-06,
      "loss": 0.1496,
      "step": 4414
    },
    {
      "epoch": 0.4067812226470724,
      "grad_norm": 0.8201019616645914,
      "learning_rate": 3.4526602896682267e-06,
      "loss": 0.1435,
      "step": 4415
    },
    {
      "epoch": 0.4068733588243424,
      "grad_norm": 0.8978747617563513,
      "learning_rate": 3.451955940133308e-06,
      "loss": 0.1436,
      "step": 4416
    },
    {
      "epoch": 0.4069654950016124,
      "grad_norm": 0.9475916174468934,
      "learning_rate": 3.451251502209179e-06,
      "loss": 0.1572,
      "step": 4417
    },
    {
      "epoch": 0.4070576311788824,
      "grad_norm": 0.9162427080905498,
      "learning_rate": 3.4505469759612453e-06,
      "loss": 0.1454,
      "step": 4418
    },
    {
      "epoch": 0.4071497673561524,
      "grad_norm": 0.8802358936759178,
      "learning_rate": 3.4498423614549226e-06,
      "loss": 0.1437,
      "step": 4419
    },
    {
      "epoch": 0.4072419035334224,
      "grad_norm": 0.9874591988178819,
      "learning_rate": 3.449137658755635e-06,
      "loss": 0.1558,
      "step": 4420
    },
    {
      "epoch": 0.40733403971069243,
      "grad_norm": 0.9103269014973717,
      "learning_rate": 3.4484328679288133e-06,
      "loss": 0.1512,
      "step": 4421
    },
    {
      "epoch": 0.4074261758879624,
      "grad_norm": 0.8962116901628482,
      "learning_rate": 3.4477279890398968e-06,
      "loss": 0.1472,
      "step": 4422
    },
    {
      "epoch": 0.4075183120652324,
      "grad_norm": 0.9979364839432481,
      "learning_rate": 3.4470230221543362e-06,
      "loss": 0.1431,
      "step": 4423
    },
    {
      "epoch": 0.4076104482425024,
      "grad_norm": 0.9124653449987579,
      "learning_rate": 3.4463179673375846e-06,
      "loss": 0.1426,
      "step": 4424
    },
    {
      "epoch": 0.4077025844197724,
      "grad_norm": 0.9408756063038355,
      "learning_rate": 3.445612824655108e-06,
      "loss": 0.1414,
      "step": 4425
    },
    {
      "epoch": 0.4077947205970424,
      "grad_norm": 0.9395683151940247,
      "learning_rate": 3.4449075941723797e-06,
      "loss": 0.1427,
      "step": 4426
    },
    {
      "epoch": 0.4078868567743124,
      "grad_norm": 0.9611998547934455,
      "learning_rate": 3.444202275954879e-06,
      "loss": 0.1412,
      "step": 4427
    },
    {
      "epoch": 0.40797899295158246,
      "grad_norm": 0.93129468781063,
      "learning_rate": 3.443496870068096e-06,
      "loss": 0.1483,
      "step": 4428
    },
    {
      "epoch": 0.40807112912885246,
      "grad_norm": 0.9047030358220689,
      "learning_rate": 3.442791376577527e-06,
      "loss": 0.1372,
      "step": 4429
    },
    {
      "epoch": 0.40816326530612246,
      "grad_norm": 0.9545577553610516,
      "learning_rate": 3.4420857955486756e-06,
      "loss": 0.1442,
      "step": 4430
    },
    {
      "epoch": 0.40825540148339245,
      "grad_norm": 0.8843025095837417,
      "learning_rate": 3.441380127047058e-06,
      "loss": 0.1345,
      "step": 4431
    },
    {
      "epoch": 0.40834753766066245,
      "grad_norm": 0.8466673145673613,
      "learning_rate": 3.4406743711381945e-06,
      "loss": 0.1435,
      "step": 4432
    },
    {
      "epoch": 0.40843967383793245,
      "grad_norm": 0.8921580068825665,
      "learning_rate": 3.439968527887614e-06,
      "loss": 0.1462,
      "step": 4433
    },
    {
      "epoch": 0.40853181001520245,
      "grad_norm": 0.8755904986298998,
      "learning_rate": 3.439262597360855e-06,
      "loss": 0.152,
      "step": 4434
    },
    {
      "epoch": 0.4086239461924725,
      "grad_norm": 0.9056162255854322,
      "learning_rate": 3.438556579623462e-06,
      "loss": 0.1412,
      "step": 4435
    },
    {
      "epoch": 0.4087160823697425,
      "grad_norm": 0.9685429925466471,
      "learning_rate": 3.43785047474099e-06,
      "loss": 0.1574,
      "step": 4436
    },
    {
      "epoch": 0.4088082185470125,
      "grad_norm": 0.850888377979816,
      "learning_rate": 3.437144282779e-06,
      "loss": 0.1449,
      "step": 4437
    },
    {
      "epoch": 0.4089003547242825,
      "grad_norm": 0.9337752457104304,
      "learning_rate": 3.4364380038030636e-06,
      "loss": 0.1647,
      "step": 4438
    },
    {
      "epoch": 0.4089924909015525,
      "grad_norm": 0.8772877594659604,
      "learning_rate": 3.435731637878757e-06,
      "loss": 0.146,
      "step": 4439
    },
    {
      "epoch": 0.4090846270788225,
      "grad_norm": 0.9383038547513723,
      "learning_rate": 3.435025185071668e-06,
      "loss": 0.1494,
      "step": 4440
    },
    {
      "epoch": 0.4091767632560925,
      "grad_norm": 0.8908104687401064,
      "learning_rate": 3.434318645447388e-06,
      "loss": 0.1344,
      "step": 4441
    },
    {
      "epoch": 0.40926889943336253,
      "grad_norm": 0.9462178584246064,
      "learning_rate": 3.433612019071523e-06,
      "loss": 0.1481,
      "step": 4442
    },
    {
      "epoch": 0.40936103561063253,
      "grad_norm": 1.0324931287459957,
      "learning_rate": 3.4329053060096805e-06,
      "loss": 0.1564,
      "step": 4443
    },
    {
      "epoch": 0.4094531717879025,
      "grad_norm": 0.9704275908210812,
      "learning_rate": 3.4321985063274805e-06,
      "loss": 0.1566,
      "step": 4444
    },
    {
      "epoch": 0.4095453079651725,
      "grad_norm": 0.9285469120115527,
      "learning_rate": 3.431491620090549e-06,
      "loss": 0.1529,
      "step": 4445
    },
    {
      "epoch": 0.4096374441424425,
      "grad_norm": 0.8357706208744242,
      "learning_rate": 3.43078464736452e-06,
      "loss": 0.1382,
      "step": 4446
    },
    {
      "epoch": 0.4097295803197125,
      "grad_norm": 0.979590666946963,
      "learning_rate": 3.4300775882150367e-06,
      "loss": 0.1642,
      "step": 4447
    },
    {
      "epoch": 0.4098217164969825,
      "grad_norm": 0.9152729280726183,
      "learning_rate": 3.429370442707749e-06,
      "loss": 0.1474,
      "step": 4448
    },
    {
      "epoch": 0.40991385267425257,
      "grad_norm": 0.9441617628306267,
      "learning_rate": 3.428663210908315e-06,
      "loss": 0.1565,
      "step": 4449
    },
    {
      "epoch": 0.41000598885152256,
      "grad_norm": 0.9176715113996435,
      "learning_rate": 3.427955892882403e-06,
      "loss": 0.1561,
      "step": 4450
    },
    {
      "epoch": 0.41009812502879256,
      "grad_norm": 0.9473640183674535,
      "learning_rate": 3.4272484886956856e-06,
      "loss": 0.1532,
      "step": 4451
    },
    {
      "epoch": 0.41019026120606256,
      "grad_norm": 0.9388563681988958,
      "learning_rate": 3.4265409984138463e-06,
      "loss": 0.1552,
      "step": 4452
    },
    {
      "epoch": 0.41028239738333255,
      "grad_norm": 0.8830180332465869,
      "learning_rate": 3.4258334221025763e-06,
      "loss": 0.1392,
      "step": 4453
    },
    {
      "epoch": 0.41037453356060255,
      "grad_norm": 0.8404506052238401,
      "learning_rate": 3.425125759827573e-06,
      "loss": 0.1408,
      "step": 4454
    },
    {
      "epoch": 0.4104666697378726,
      "grad_norm": 0.8937046949022973,
      "learning_rate": 3.4244180116545434e-06,
      "loss": 0.1493,
      "step": 4455
    },
    {
      "epoch": 0.4105588059151426,
      "grad_norm": 0.9179506656858187,
      "learning_rate": 3.423710177649202e-06,
      "loss": 0.1469,
      "step": 4456
    },
    {
      "epoch": 0.4106509420924126,
      "grad_norm": 0.9052018029466072,
      "learning_rate": 3.423002257877271e-06,
      "loss": 0.1524,
      "step": 4457
    },
    {
      "epoch": 0.4107430782696826,
      "grad_norm": 0.9531593496243919,
      "learning_rate": 3.4222942524044817e-06,
      "loss": 0.1635,
      "step": 4458
    },
    {
      "epoch": 0.4108352144469526,
      "grad_norm": 0.8613976632136475,
      "learning_rate": 3.4215861612965705e-06,
      "loss": 0.1364,
      "step": 4459
    },
    {
      "epoch": 0.4109273506242226,
      "grad_norm": 0.9203983911114743,
      "learning_rate": 3.4208779846192856e-06,
      "loss": 0.1471,
      "step": 4460
    },
    {
      "epoch": 0.4110194868014926,
      "grad_norm": 0.8893575267649316,
      "learning_rate": 3.420169722438381e-06,
      "loss": 0.1481,
      "step": 4461
    },
    {
      "epoch": 0.41111162297876264,
      "grad_norm": 0.9481408074859154,
      "learning_rate": 3.419461374819618e-06,
      "loss": 0.153,
      "step": 4462
    },
    {
      "epoch": 0.41120375915603263,
      "grad_norm": 0.9102576968282057,
      "learning_rate": 3.418752941828769e-06,
      "loss": 0.1479,
      "step": 4463
    },
    {
      "epoch": 0.41129589533330263,
      "grad_norm": 0.9170697810535563,
      "learning_rate": 3.418044423531609e-06,
      "loss": 0.1493,
      "step": 4464
    },
    {
      "epoch": 0.4113880315105726,
      "grad_norm": 0.9107203490066708,
      "learning_rate": 3.4173358199939253e-06,
      "loss": 0.1544,
      "step": 4465
    },
    {
      "epoch": 0.4114801676878426,
      "grad_norm": 0.8864919007877409,
      "learning_rate": 3.416627131281513e-06,
      "loss": 0.1501,
      "step": 4466
    },
    {
      "epoch": 0.4115723038651126,
      "grad_norm": 0.8923614673943928,
      "learning_rate": 3.415918357460173e-06,
      "loss": 0.1529,
      "step": 4467
    },
    {
      "epoch": 0.4116644400423826,
      "grad_norm": 0.8625947768008402,
      "learning_rate": 3.4152094985957135e-06,
      "loss": 0.1537,
      "step": 4468
    },
    {
      "epoch": 0.41175657621965267,
      "grad_norm": 0.8763271138448714,
      "learning_rate": 3.4145005547539552e-06,
      "loss": 0.1422,
      "step": 4469
    },
    {
      "epoch": 0.41184871239692267,
      "grad_norm": 0.845461883705554,
      "learning_rate": 3.413791526000721e-06,
      "loss": 0.137,
      "step": 4470
    },
    {
      "epoch": 0.41194084857419266,
      "grad_norm": 0.8670354139637978,
      "learning_rate": 3.4130824124018453e-06,
      "loss": 0.1487,
      "step": 4471
    },
    {
      "epoch": 0.41203298475146266,
      "grad_norm": 0.9156206700607552,
      "learning_rate": 3.4123732140231695e-06,
      "loss": 0.1505,
      "step": 4472
    },
    {
      "epoch": 0.41212512092873266,
      "grad_norm": 0.9246364937441267,
      "learning_rate": 3.411663930930543e-06,
      "loss": 0.1559,
      "step": 4473
    },
    {
      "epoch": 0.41221725710600265,
      "grad_norm": 0.9060481038174606,
      "learning_rate": 3.4109545631898223e-06,
      "loss": 0.1559,
      "step": 4474
    },
    {
      "epoch": 0.41230939328327265,
      "grad_norm": 0.9347223663635646,
      "learning_rate": 3.410245110866872e-06,
      "loss": 0.1598,
      "step": 4475
    },
    {
      "epoch": 0.4124015294605427,
      "grad_norm": 0.9055809419343559,
      "learning_rate": 3.409535574027565e-06,
      "loss": 0.1373,
      "step": 4476
    },
    {
      "epoch": 0.4124936656378127,
      "grad_norm": 0.8832161886506429,
      "learning_rate": 3.4088259527377826e-06,
      "loss": 0.1453,
      "step": 4477
    },
    {
      "epoch": 0.4125858018150827,
      "grad_norm": 0.9766041924808401,
      "learning_rate": 3.408116247063412e-06,
      "loss": 0.1624,
      "step": 4478
    },
    {
      "epoch": 0.4126779379923527,
      "grad_norm": 0.9355812719201331,
      "learning_rate": 3.407406457070351e-06,
      "loss": 0.1439,
      "step": 4479
    },
    {
      "epoch": 0.4127700741696227,
      "grad_norm": 0.9492137205341494,
      "learning_rate": 3.4066965828245023e-06,
      "loss": 0.1495,
      "step": 4480
    },
    {
      "epoch": 0.4128622103468927,
      "grad_norm": 0.9537352797092123,
      "learning_rate": 3.4059866243917784e-06,
      "loss": 0.1587,
      "step": 4481
    },
    {
      "epoch": 0.4129543465241627,
      "grad_norm": 0.9084473898134254,
      "learning_rate": 3.4052765818380988e-06,
      "loss": 0.1528,
      "step": 4482
    },
    {
      "epoch": 0.41304648270143274,
      "grad_norm": 0.8738894354219552,
      "learning_rate": 3.4045664552293913e-06,
      "loss": 0.1439,
      "step": 4483
    },
    {
      "epoch": 0.41313861887870273,
      "grad_norm": 0.955205886413484,
      "learning_rate": 3.4038562446315908e-06,
      "loss": 0.1621,
      "step": 4484
    },
    {
      "epoch": 0.41323075505597273,
      "grad_norm": 0.9384210771953125,
      "learning_rate": 3.4031459501106412e-06,
      "loss": 0.1402,
      "step": 4485
    },
    {
      "epoch": 0.41332289123324273,
      "grad_norm": 0.9346661196851742,
      "learning_rate": 3.4024355717324927e-06,
      "loss": 0.1599,
      "step": 4486
    },
    {
      "epoch": 0.4134150274105127,
      "grad_norm": 0.8793320839571395,
      "learning_rate": 3.4017251095631044e-06,
      "loss": 0.15,
      "step": 4487
    },
    {
      "epoch": 0.4135071635877827,
      "grad_norm": 0.976370275498756,
      "learning_rate": 3.401014563668442e-06,
      "loss": 0.1457,
      "step": 4488
    },
    {
      "epoch": 0.4135992997650528,
      "grad_norm": 0.8577892362812106,
      "learning_rate": 3.4003039341144807e-06,
      "loss": 0.1381,
      "step": 4489
    },
    {
      "epoch": 0.41369143594232277,
      "grad_norm": 0.8677820161364236,
      "learning_rate": 3.3995932209672028e-06,
      "loss": 0.1457,
      "step": 4490
    },
    {
      "epoch": 0.41378357211959277,
      "grad_norm": 0.9488229913826759,
      "learning_rate": 3.3988824242925965e-06,
      "loss": 0.1516,
      "step": 4491
    },
    {
      "epoch": 0.41387570829686277,
      "grad_norm": 0.8646831069607954,
      "learning_rate": 3.398171544156661e-06,
      "loss": 0.1294,
      "step": 4492
    },
    {
      "epoch": 0.41396784447413276,
      "grad_norm": 1.0109574950040325,
      "learning_rate": 3.3974605806254015e-06,
      "loss": 0.1455,
      "step": 4493
    },
    {
      "epoch": 0.41405998065140276,
      "grad_norm": 0.862225186168334,
      "learning_rate": 3.3967495337648297e-06,
      "loss": 0.1471,
      "step": 4494
    },
    {
      "epoch": 0.41415211682867276,
      "grad_norm": 0.9281967815655426,
      "learning_rate": 3.396038403640968e-06,
      "loss": 0.1588,
      "step": 4495
    },
    {
      "epoch": 0.4142442530059428,
      "grad_norm": 0.9404582510399649,
      "learning_rate": 3.395327190319843e-06,
      "loss": 0.1556,
      "step": 4496
    },
    {
      "epoch": 0.4143363891832128,
      "grad_norm": 0.871442207254464,
      "learning_rate": 3.394615893867492e-06,
      "loss": 0.1425,
      "step": 4497
    },
    {
      "epoch": 0.4144285253604828,
      "grad_norm": 0.9006939834301403,
      "learning_rate": 3.3939045143499604e-06,
      "loss": 0.1392,
      "step": 4498
    },
    {
      "epoch": 0.4145206615377528,
      "grad_norm": 0.8677132626890294,
      "learning_rate": 3.393193051833297e-06,
      "loss": 0.146,
      "step": 4499
    },
    {
      "epoch": 0.4146127977150228,
      "grad_norm": 0.8750036614339674,
      "learning_rate": 3.392481506383563e-06,
      "loss": 0.1557,
      "step": 4500
    },
    {
      "epoch": 0.4146127977150228,
      "eval_loss": 0.14849522709846497,
      "eval_runtime": 299.3682,
      "eval_samples_per_second": 23.439,
      "eval_steps_per_second": 2.933,
      "step": 4500
    },
    {
      "epoch": 0.4147049338922928,
      "grad_norm": 0.9083364571977722,
      "learning_rate": 3.391769878066825e-06,
      "loss": 0.1554,
      "step": 4501
    },
    {
      "epoch": 0.4147970700695628,
      "grad_norm": 0.9037418164648761,
      "learning_rate": 3.391058166949159e-06,
      "loss": 0.1483,
      "step": 4502
    },
    {
      "epoch": 0.41488920624683284,
      "grad_norm": 0.9011614056787858,
      "learning_rate": 3.390346373096645e-06,
      "loss": 0.1622,
      "step": 4503
    },
    {
      "epoch": 0.41498134242410284,
      "grad_norm": 0.935184827582457,
      "learning_rate": 3.3896344965753746e-06,
      "loss": 0.1542,
      "step": 4504
    },
    {
      "epoch": 0.41507347860137284,
      "grad_norm": 0.9172362705852074,
      "learning_rate": 3.3889225374514455e-06,
      "loss": 0.1522,
      "step": 4505
    },
    {
      "epoch": 0.41516561477864283,
      "grad_norm": 0.8595824614921564,
      "learning_rate": 3.388210495790964e-06,
      "loss": 0.1447,
      "step": 4506
    },
    {
      "epoch": 0.41525775095591283,
      "grad_norm": 0.8785044781998784,
      "learning_rate": 3.3874983716600414e-06,
      "loss": 0.1459,
      "step": 4507
    },
    {
      "epoch": 0.4153498871331828,
      "grad_norm": 0.9537125508997492,
      "learning_rate": 3.3867861651247997e-06,
      "loss": 0.1605,
      "step": 4508
    },
    {
      "epoch": 0.4154420233104528,
      "grad_norm": 0.89864935320374,
      "learning_rate": 3.3860738762513674e-06,
      "loss": 0.1539,
      "step": 4509
    },
    {
      "epoch": 0.4155341594877229,
      "grad_norm": 0.9531899881558427,
      "learning_rate": 3.3853615051058798e-06,
      "loss": 0.1476,
      "step": 4510
    },
    {
      "epoch": 0.4156262956649929,
      "grad_norm": 0.8988810611214069,
      "learning_rate": 3.384649051754481e-06,
      "loss": 0.1453,
      "step": 4511
    },
    {
      "epoch": 0.41571843184226287,
      "grad_norm": 0.9566810993912842,
      "learning_rate": 3.3839365162633237e-06,
      "loss": 0.1554,
      "step": 4512
    },
    {
      "epoch": 0.41581056801953287,
      "grad_norm": 0.9267067824355266,
      "learning_rate": 3.3832238986985643e-06,
      "loss": 0.1581,
      "step": 4513
    },
    {
      "epoch": 0.41590270419680286,
      "grad_norm": 0.9276426644752526,
      "learning_rate": 3.382511199126372e-06,
      "loss": 0.1353,
      "step": 4514
    },
    {
      "epoch": 0.41599484037407286,
      "grad_norm": 0.9891793471815831,
      "learning_rate": 3.3817984176129194e-06,
      "loss": 0.1456,
      "step": 4515
    },
    {
      "epoch": 0.41608697655134286,
      "grad_norm": 0.8778307632415944,
      "learning_rate": 3.3810855542243892e-06,
      "loss": 0.1417,
      "step": 4516
    },
    {
      "epoch": 0.4161791127286129,
      "grad_norm": 0.8922929124760213,
      "learning_rate": 3.38037260902697e-06,
      "loss": 0.146,
      "step": 4517
    },
    {
      "epoch": 0.4162712489058829,
      "grad_norm": 0.8832533852460722,
      "learning_rate": 3.3796595820868596e-06,
      "loss": 0.1403,
      "step": 4518
    },
    {
      "epoch": 0.4163633850831529,
      "grad_norm": 0.9314412639880251,
      "learning_rate": 3.378946473470262e-06,
      "loss": 0.1535,
      "step": 4519
    },
    {
      "epoch": 0.4164555212604229,
      "grad_norm": 0.8383119728511532,
      "learning_rate": 3.37823328324339e-06,
      "loss": 0.1357,
      "step": 4520
    },
    {
      "epoch": 0.4165476574376929,
      "grad_norm": 0.9162340271668836,
      "learning_rate": 3.3775200114724632e-06,
      "loss": 0.1479,
      "step": 4521
    },
    {
      "epoch": 0.4166397936149629,
      "grad_norm": 0.9381510485047666,
      "learning_rate": 3.3768066582237084e-06,
      "loss": 0.162,
      "step": 4522
    },
    {
      "epoch": 0.41673192979223295,
      "grad_norm": 0.903114085112653,
      "learning_rate": 3.3760932235633614e-06,
      "loss": 0.1555,
      "step": 4523
    },
    {
      "epoch": 0.41682406596950294,
      "grad_norm": 0.9275753356424354,
      "learning_rate": 3.3753797075576646e-06,
      "loss": 0.1515,
      "step": 4524
    },
    {
      "epoch": 0.41691620214677294,
      "grad_norm": 0.896898839228486,
      "learning_rate": 3.374666110272868e-06,
      "loss": 0.137,
      "step": 4525
    },
    {
      "epoch": 0.41700833832404294,
      "grad_norm": 0.8418394217228213,
      "learning_rate": 3.3739524317752276e-06,
      "loss": 0.1297,
      "step": 4526
    },
    {
      "epoch": 0.41710047450131293,
      "grad_norm": 0.8178077340704089,
      "learning_rate": 3.373238672131011e-06,
      "loss": 0.1383,
      "step": 4527
    },
    {
      "epoch": 0.41719261067858293,
      "grad_norm": 0.9015807010863438,
      "learning_rate": 3.372524831406489e-06,
      "loss": 0.1405,
      "step": 4528
    },
    {
      "epoch": 0.41728474685585293,
      "grad_norm": 0.9091807204242067,
      "learning_rate": 3.371810909667943e-06,
      "loss": 0.1425,
      "step": 4529
    },
    {
      "epoch": 0.417376883033123,
      "grad_norm": 0.8591056164974364,
      "learning_rate": 3.37109690698166e-06,
      "loss": 0.1298,
      "step": 4530
    },
    {
      "epoch": 0.417469019210393,
      "grad_norm": 0.8970907735401513,
      "learning_rate": 3.3703828234139357e-06,
      "loss": 0.1335,
      "step": 4531
    },
    {
      "epoch": 0.417561155387663,
      "grad_norm": 0.9376572782577746,
      "learning_rate": 3.369668659031072e-06,
      "loss": 0.1542,
      "step": 4532
    },
    {
      "epoch": 0.41765329156493297,
      "grad_norm": 0.9497750415555174,
      "learning_rate": 3.368954413899381e-06,
      "loss": 0.1557,
      "step": 4533
    },
    {
      "epoch": 0.41774542774220297,
      "grad_norm": 0.7958487365461181,
      "learning_rate": 3.368240088085177e-06,
      "loss": 0.1361,
      "step": 4534
    },
    {
      "epoch": 0.41783756391947297,
      "grad_norm": 0.9245976345181074,
      "learning_rate": 3.367525681654789e-06,
      "loss": 0.1636,
      "step": 4535
    },
    {
      "epoch": 0.41792970009674296,
      "grad_norm": 0.8430081265658356,
      "learning_rate": 3.366811194674548e-06,
      "loss": 0.1275,
      "step": 4536
    },
    {
      "epoch": 0.418021836274013,
      "grad_norm": 0.9432573855683106,
      "learning_rate": 3.3660966272107943e-06,
      "loss": 0.1401,
      "step": 4537
    },
    {
      "epoch": 0.418113972451283,
      "grad_norm": 0.878180759208677,
      "learning_rate": 3.365381979329875e-06,
      "loss": 0.1425,
      "step": 4538
    },
    {
      "epoch": 0.418206108628553,
      "grad_norm": 0.8842514952556141,
      "learning_rate": 3.3646672510981458e-06,
      "loss": 0.1518,
      "step": 4539
    },
    {
      "epoch": 0.418298244805823,
      "grad_norm": 0.890925588999716,
      "learning_rate": 3.363952442581969e-06,
      "loss": 0.1453,
      "step": 4540
    },
    {
      "epoch": 0.418390380983093,
      "grad_norm": 0.8994367794628658,
      "learning_rate": 3.3632375538477165e-06,
      "loss": 0.1379,
      "step": 4541
    },
    {
      "epoch": 0.418482517160363,
      "grad_norm": 1.0133084304207123,
      "learning_rate": 3.3625225849617625e-06,
      "loss": 0.1578,
      "step": 4542
    },
    {
      "epoch": 0.418574653337633,
      "grad_norm": 0.9206787724545995,
      "learning_rate": 3.3618075359904946e-06,
      "loss": 0.1376,
      "step": 4543
    },
    {
      "epoch": 0.41866678951490305,
      "grad_norm": 0.9561681753325468,
      "learning_rate": 3.361092407000304e-06,
      "loss": 0.1585,
      "step": 4544
    },
    {
      "epoch": 0.41875892569217305,
      "grad_norm": 0.8866968591572271,
      "learning_rate": 3.3603771980575907e-06,
      "loss": 0.1444,
      "step": 4545
    },
    {
      "epoch": 0.41885106186944304,
      "grad_norm": 0.9398479480714228,
      "learning_rate": 3.359661909228762e-06,
      "loss": 0.159,
      "step": 4546
    },
    {
      "epoch": 0.41894319804671304,
      "grad_norm": 0.9582293954402347,
      "learning_rate": 3.3589465405802324e-06,
      "loss": 0.1441,
      "step": 4547
    },
    {
      "epoch": 0.41903533422398304,
      "grad_norm": 0.9766029174637952,
      "learning_rate": 3.358231092178424e-06,
      "loss": 0.1474,
      "step": 4548
    },
    {
      "epoch": 0.41912747040125303,
      "grad_norm": 0.8689780855332151,
      "learning_rate": 3.3575155640897666e-06,
      "loss": 0.1414,
      "step": 4549
    },
    {
      "epoch": 0.41921960657852303,
      "grad_norm": 0.897722415255773,
      "learning_rate": 3.356799956380697e-06,
      "loss": 0.1402,
      "step": 4550
    },
    {
      "epoch": 0.4193117427557931,
      "grad_norm": 0.8923317845839843,
      "learning_rate": 3.3560842691176583e-06,
      "loss": 0.1449,
      "step": 4551
    },
    {
      "epoch": 0.4194038789330631,
      "grad_norm": 0.9314612085458115,
      "learning_rate": 3.355368502367104e-06,
      "loss": 0.1467,
      "step": 4552
    },
    {
      "epoch": 0.4194960151103331,
      "grad_norm": 0.8821129071299318,
      "learning_rate": 3.354652656195492e-06,
      "loss": 0.1522,
      "step": 4553
    },
    {
      "epoch": 0.4195881512876031,
      "grad_norm": 0.9219280388280229,
      "learning_rate": 3.3539367306692884e-06,
      "loss": 0.1479,
      "step": 4554
    },
    {
      "epoch": 0.41968028746487307,
      "grad_norm": 0.894711504158373,
      "learning_rate": 3.3532207258549676e-06,
      "loss": 0.1422,
      "step": 4555
    },
    {
      "epoch": 0.41977242364214307,
      "grad_norm": 0.8375395309893726,
      "learning_rate": 3.352504641819011e-06,
      "loss": 0.1351,
      "step": 4556
    },
    {
      "epoch": 0.4198645598194131,
      "grad_norm": 0.9230302453034883,
      "learning_rate": 3.3517884786279065e-06,
      "loss": 0.1455,
      "step": 4557
    },
    {
      "epoch": 0.4199566959966831,
      "grad_norm": 0.8746154702964645,
      "learning_rate": 3.3510722363481505e-06,
      "loss": 0.1318,
      "step": 4558
    },
    {
      "epoch": 0.4200488321739531,
      "grad_norm": 0.992793698546288,
      "learning_rate": 3.350355915046245e-06,
      "loss": 0.1579,
      "step": 4559
    },
    {
      "epoch": 0.4201409683512231,
      "grad_norm": 0.9214331623204591,
      "learning_rate": 3.3496395147887017e-06,
      "loss": 0.1434,
      "step": 4560
    },
    {
      "epoch": 0.4202331045284931,
      "grad_norm": 0.9457222587582186,
      "learning_rate": 3.348923035642038e-06,
      "loss": 0.1534,
      "step": 4561
    },
    {
      "epoch": 0.4203252407057631,
      "grad_norm": 0.956493474906948,
      "learning_rate": 3.3482064776727784e-06,
      "loss": 0.1621,
      "step": 4562
    },
    {
      "epoch": 0.4204173768830331,
      "grad_norm": 0.8780357825071199,
      "learning_rate": 3.3474898409474573e-06,
      "loss": 0.1429,
      "step": 4563
    },
    {
      "epoch": 0.42050951306030315,
      "grad_norm": 1.0473299857234213,
      "learning_rate": 3.3467731255326123e-06,
      "loss": 0.1698,
      "step": 4564
    },
    {
      "epoch": 0.42060164923757315,
      "grad_norm": 0.9885364033480389,
      "learning_rate": 3.346056331494792e-06,
      "loss": 0.1444,
      "step": 4565
    },
    {
      "epoch": 0.42069378541484315,
      "grad_norm": 0.8701414593375438,
      "learning_rate": 3.34533945890055e-06,
      "loss": 0.1327,
      "step": 4566
    },
    {
      "epoch": 0.42078592159211314,
      "grad_norm": 0.7992360530702873,
      "learning_rate": 3.344622507816448e-06,
      "loss": 0.1254,
      "step": 4567
    },
    {
      "epoch": 0.42087805776938314,
      "grad_norm": 0.8571525200902486,
      "learning_rate": 3.343905478309056e-06,
      "loss": 0.1392,
      "step": 4568
    },
    {
      "epoch": 0.42097019394665314,
      "grad_norm": 1.1164661745110709,
      "learning_rate": 3.3431883704449485e-06,
      "loss": 0.165,
      "step": 4569
    },
    {
      "epoch": 0.42106233012392313,
      "grad_norm": 1.0685661318077153,
      "learning_rate": 3.342471184290711e-06,
      "loss": 0.1468,
      "step": 4570
    },
    {
      "epoch": 0.4211544663011932,
      "grad_norm": 1.0295348039132994,
      "learning_rate": 3.3417539199129327e-06,
      "loss": 0.1755,
      "step": 4571
    },
    {
      "epoch": 0.4212466024784632,
      "grad_norm": 0.8852635534731436,
      "learning_rate": 3.341036577378213e-06,
      "loss": 0.141,
      "step": 4572
    },
    {
      "epoch": 0.4213387386557332,
      "grad_norm": 1.0333297539743211,
      "learning_rate": 3.3403191567531563e-06,
      "loss": 0.1612,
      "step": 4573
    },
    {
      "epoch": 0.4214308748330032,
      "grad_norm": 0.9589308822078715,
      "learning_rate": 3.3396016581043757e-06,
      "loss": 0.1416,
      "step": 4574
    },
    {
      "epoch": 0.4215230110102732,
      "grad_norm": 1.0447741475735821,
      "learning_rate": 3.3388840814984896e-06,
      "loss": 0.1674,
      "step": 4575
    },
    {
      "epoch": 0.42161514718754317,
      "grad_norm": 0.9598745446739234,
      "learning_rate": 3.3381664270021273e-06,
      "loss": 0.1526,
      "step": 4576
    },
    {
      "epoch": 0.42170728336481317,
      "grad_norm": 0.8744813212867445,
      "learning_rate": 3.337448694681922e-06,
      "loss": 0.1466,
      "step": 4577
    },
    {
      "epoch": 0.4217994195420832,
      "grad_norm": 0.8700235852673424,
      "learning_rate": 3.3367308846045155e-06,
      "loss": 0.1504,
      "step": 4578
    },
    {
      "epoch": 0.4218915557193532,
      "grad_norm": 0.9462197033773357,
      "learning_rate": 3.3360129968365556e-06,
      "loss": 0.1571,
      "step": 4579
    },
    {
      "epoch": 0.4219836918966232,
      "grad_norm": 1.016930154691944,
      "learning_rate": 3.335295031444699e-06,
      "loss": 0.1627,
      "step": 4580
    },
    {
      "epoch": 0.4220758280738932,
      "grad_norm": 1.0256579540504431,
      "learning_rate": 3.3345769884956097e-06,
      "loss": 0.1592,
      "step": 4581
    },
    {
      "epoch": 0.4221679642511632,
      "grad_norm": 0.8357570594016366,
      "learning_rate": 3.3338588680559565e-06,
      "loss": 0.1362,
      "step": 4582
    },
    {
      "epoch": 0.4222601004284332,
      "grad_norm": 0.8566130957892835,
      "learning_rate": 3.3331406701924173e-06,
      "loss": 0.1412,
      "step": 4583
    },
    {
      "epoch": 0.4223522366057032,
      "grad_norm": 1.0122900400853339,
      "learning_rate": 3.3324223949716783e-06,
      "loss": 0.1658,
      "step": 4584
    },
    {
      "epoch": 0.42244437278297325,
      "grad_norm": 1.026159322299466,
      "learning_rate": 3.3317040424604296e-06,
      "loss": 0.1389,
      "step": 4585
    },
    {
      "epoch": 0.42253650896024325,
      "grad_norm": 0.9442753705533472,
      "learning_rate": 3.330985612725371e-06,
      "loss": 0.1301,
      "step": 4586
    },
    {
      "epoch": 0.42262864513751325,
      "grad_norm": 0.9828708879208898,
      "learning_rate": 3.330267105833209e-06,
      "loss": 0.156,
      "step": 4587
    },
    {
      "epoch": 0.42272078131478324,
      "grad_norm": 0.9244268518849134,
      "learning_rate": 3.3295485218506568e-06,
      "loss": 0.1427,
      "step": 4588
    },
    {
      "epoch": 0.42281291749205324,
      "grad_norm": 0.9784915828014263,
      "learning_rate": 3.328829860844435e-06,
      "loss": 0.1543,
      "step": 4589
    },
    {
      "epoch": 0.42290505366932324,
      "grad_norm": 0.9435802145767377,
      "learning_rate": 3.328111122881272e-06,
      "loss": 0.1499,
      "step": 4590
    },
    {
      "epoch": 0.4229971898465933,
      "grad_norm": 0.9158661147876648,
      "learning_rate": 3.327392308027902e-06,
      "loss": 0.1486,
      "step": 4591
    },
    {
      "epoch": 0.4230893260238633,
      "grad_norm": 0.9137787014812229,
      "learning_rate": 3.3266734163510668e-06,
      "loss": 0.1423,
      "step": 4592
    },
    {
      "epoch": 0.4231814622011333,
      "grad_norm": 0.9229747858346737,
      "learning_rate": 3.325954447917516e-06,
      "loss": 0.145,
      "step": 4593
    },
    {
      "epoch": 0.4232735983784033,
      "grad_norm": 0.8730894128266261,
      "learning_rate": 3.3252354027940055e-06,
      "loss": 0.149,
      "step": 4594
    },
    {
      "epoch": 0.4233657345556733,
      "grad_norm": 0.9057946535086321,
      "learning_rate": 3.3245162810472998e-06,
      "loss": 0.155,
      "step": 4595
    },
    {
      "epoch": 0.4234578707329433,
      "grad_norm": 0.8590449088625499,
      "learning_rate": 3.323797082744168e-06,
      "loss": 0.1412,
      "step": 4596
    },
    {
      "epoch": 0.4235500069102133,
      "grad_norm": 0.8387324971373283,
      "learning_rate": 3.3230778079513883e-06,
      "loss": 0.1345,
      "step": 4597
    },
    {
      "epoch": 0.4236421430874833,
      "grad_norm": 0.7968624023810995,
      "learning_rate": 3.3223584567357458e-06,
      "loss": 0.1398,
      "step": 4598
    },
    {
      "epoch": 0.4237342792647533,
      "grad_norm": 0.9163589605700169,
      "learning_rate": 3.3216390291640327e-06,
      "loss": 0.1479,
      "step": 4599
    },
    {
      "epoch": 0.4238264154420233,
      "grad_norm": 0.9194471612561067,
      "learning_rate": 3.320919525303047e-06,
      "loss": 0.1504,
      "step": 4600
    },
    {
      "epoch": 0.4239185516192933,
      "grad_norm": 0.8551752072019035,
      "learning_rate": 3.3201999452195942e-06,
      "loss": 0.1497,
      "step": 4601
    },
    {
      "epoch": 0.4240106877965633,
      "grad_norm": 0.8814315296349494,
      "learning_rate": 3.3194802889804887e-06,
      "loss": 0.1511,
      "step": 4602
    },
    {
      "epoch": 0.4241028239738333,
      "grad_norm": 0.8960260768130908,
      "learning_rate": 3.318760556652551e-06,
      "loss": 0.1413,
      "step": 4603
    },
    {
      "epoch": 0.4241949601511033,
      "grad_norm": 0.9430328687544396,
      "learning_rate": 3.318040748302606e-06,
      "loss": 0.1566,
      "step": 4604
    },
    {
      "epoch": 0.42428709632837336,
      "grad_norm": 0.8505985044861099,
      "learning_rate": 3.317320863997491e-06,
      "loss": 0.1383,
      "step": 4605
    },
    {
      "epoch": 0.42437923250564336,
      "grad_norm": 0.9368280598249041,
      "learning_rate": 3.316600903804045e-06,
      "loss": 0.1509,
      "step": 4606
    },
    {
      "epoch": 0.42447136868291335,
      "grad_norm": 0.9237871325274064,
      "learning_rate": 3.3158808677891167e-06,
      "loss": 0.1571,
      "step": 4607
    },
    {
      "epoch": 0.42456350486018335,
      "grad_norm": 0.9214272756644586,
      "learning_rate": 3.315160756019563e-06,
      "loss": 0.1448,
      "step": 4608
    },
    {
      "epoch": 0.42465564103745335,
      "grad_norm": 0.9531573937406549,
      "learning_rate": 3.314440568562245e-06,
      "loss": 0.149,
      "step": 4609
    },
    {
      "epoch": 0.42474777721472334,
      "grad_norm": 0.8698615665529474,
      "learning_rate": 3.3137203054840323e-06,
      "loss": 0.1444,
      "step": 4610
    },
    {
      "epoch": 0.42483991339199334,
      "grad_norm": 0.8876415171072912,
      "learning_rate": 3.312999966851802e-06,
      "loss": 0.1431,
      "step": 4611
    },
    {
      "epoch": 0.4249320495692634,
      "grad_norm": 0.9031438952079939,
      "learning_rate": 3.3122795527324374e-06,
      "loss": 0.1436,
      "step": 4612
    },
    {
      "epoch": 0.4250241857465334,
      "grad_norm": 0.9283680531824011,
      "learning_rate": 3.3115590631928284e-06,
      "loss": 0.1478,
      "step": 4613
    },
    {
      "epoch": 0.4251163219238034,
      "grad_norm": 0.949901482292331,
      "learning_rate": 3.3108384982998736e-06,
      "loss": 0.1534,
      "step": 4614
    },
    {
      "epoch": 0.4252084581010734,
      "grad_norm": 0.8647134104622481,
      "learning_rate": 3.310117858120476e-06,
      "loss": 0.144,
      "step": 4615
    },
    {
      "epoch": 0.4253005942783434,
      "grad_norm": 0.9460010594500424,
      "learning_rate": 3.3093971427215497e-06,
      "loss": 0.1514,
      "step": 4616
    },
    {
      "epoch": 0.4253927304556134,
      "grad_norm": 0.8916111121637049,
      "learning_rate": 3.3086763521700105e-06,
      "loss": 0.1418,
      "step": 4617
    },
    {
      "epoch": 0.4254848666328834,
      "grad_norm": 0.9277173612977886,
      "learning_rate": 3.307955486532785e-06,
      "loss": 0.1511,
      "step": 4618
    },
    {
      "epoch": 0.4255770028101534,
      "grad_norm": 0.9048532366246484,
      "learning_rate": 3.3072345458768063e-06,
      "loss": 0.1489,
      "step": 4619
    },
    {
      "epoch": 0.4256691389874234,
      "grad_norm": 0.9137996003679597,
      "learning_rate": 3.306513530269012e-06,
      "loss": 0.1414,
      "step": 4620
    },
    {
      "epoch": 0.4257612751646934,
      "grad_norm": 0.890587578081525,
      "learning_rate": 3.30579243977635e-06,
      "loss": 0.147,
      "step": 4621
    },
    {
      "epoch": 0.4258534113419634,
      "grad_norm": 0.8890025500786466,
      "learning_rate": 3.305071274465774e-06,
      "loss": 0.1409,
      "step": 4622
    },
    {
      "epoch": 0.4259455475192334,
      "grad_norm": 0.9500786995375841,
      "learning_rate": 3.304350034404243e-06,
      "loss": 0.146,
      "step": 4623
    },
    {
      "epoch": 0.4260376836965034,
      "grad_norm": 0.8989819546643587,
      "learning_rate": 3.3036287196587245e-06,
      "loss": 0.1493,
      "step": 4624
    },
    {
      "epoch": 0.42612981987377346,
      "grad_norm": 0.8978782164819534,
      "learning_rate": 3.3029073302961933e-06,
      "loss": 0.1507,
      "step": 4625
    },
    {
      "epoch": 0.42622195605104346,
      "grad_norm": 0.8881037216614471,
      "learning_rate": 3.3021858663836302e-06,
      "loss": 0.1488,
      "step": 4626
    },
    {
      "epoch": 0.42631409222831346,
      "grad_norm": 0.9523384549504138,
      "learning_rate": 3.301464327988023e-06,
      "loss": 0.1527,
      "step": 4627
    },
    {
      "epoch": 0.42640622840558345,
      "grad_norm": 0.9354390642807312,
      "learning_rate": 3.300742715176366e-06,
      "loss": 0.1607,
      "step": 4628
    },
    {
      "epoch": 0.42649836458285345,
      "grad_norm": 0.9184053064450814,
      "learning_rate": 3.300021028015662e-06,
      "loss": 0.163,
      "step": 4629
    },
    {
      "epoch": 0.42659050076012345,
      "grad_norm": 1.0349141173183476,
      "learning_rate": 3.29929926657292e-06,
      "loss": 0.1473,
      "step": 4630
    },
    {
      "epoch": 0.42668263693739344,
      "grad_norm": 0.9429341556872202,
      "learning_rate": 3.298577430915155e-06,
      "loss": 0.1457,
      "step": 4631
    },
    {
      "epoch": 0.4267747731146635,
      "grad_norm": 0.9133407174487953,
      "learning_rate": 3.297855521109389e-06,
      "loss": 0.1428,
      "step": 4632
    },
    {
      "epoch": 0.4268669092919335,
      "grad_norm": 0.9300103407525209,
      "learning_rate": 3.297133537222652e-06,
      "loss": 0.1494,
      "step": 4633
    },
    {
      "epoch": 0.4269590454692035,
      "grad_norm": 1.0055639970528747,
      "learning_rate": 3.2964114793219802e-06,
      "loss": 0.1581,
      "step": 4634
    },
    {
      "epoch": 0.4270511816464735,
      "grad_norm": 0.923588698817885,
      "learning_rate": 3.2956893474744177e-06,
      "loss": 0.1634,
      "step": 4635
    },
    {
      "epoch": 0.4271433178237435,
      "grad_norm": 0.965170088385289,
      "learning_rate": 3.294967141747013e-06,
      "loss": 0.157,
      "step": 4636
    },
    {
      "epoch": 0.4272354540010135,
      "grad_norm": 0.9313997319447985,
      "learning_rate": 3.294244862206824e-06,
      "loss": 0.147,
      "step": 4637
    },
    {
      "epoch": 0.4273275901782835,
      "grad_norm": 0.9533289369081304,
      "learning_rate": 3.293522508920914e-06,
      "loss": 0.15,
      "step": 4638
    },
    {
      "epoch": 0.42741972635555353,
      "grad_norm": 0.9066220272117559,
      "learning_rate": 3.292800081956354e-06,
      "loss": 0.1534,
      "step": 4639
    },
    {
      "epoch": 0.42751186253282353,
      "grad_norm": 0.9284221041524333,
      "learning_rate": 3.29207758138022e-06,
      "loss": 0.1683,
      "step": 4640
    },
    {
      "epoch": 0.4276039987100935,
      "grad_norm": 0.9581504482495421,
      "learning_rate": 3.2913550072595986e-06,
      "loss": 0.152,
      "step": 4641
    },
    {
      "epoch": 0.4276961348873635,
      "grad_norm": 0.9272473768699457,
      "learning_rate": 3.290632359661578e-06,
      "loss": 0.1514,
      "step": 4642
    },
    {
      "epoch": 0.4277882710646335,
      "grad_norm": 0.91905411911038,
      "learning_rate": 3.289909638653259e-06,
      "loss": 0.155,
      "step": 4643
    },
    {
      "epoch": 0.4278804072419035,
      "grad_norm": 0.8779240052127671,
      "learning_rate": 3.289186844301745e-06,
      "loss": 0.1467,
      "step": 4644
    },
    {
      "epoch": 0.4279725434191735,
      "grad_norm": 0.92752988914161,
      "learning_rate": 3.2884639766741473e-06,
      "loss": 0.1503,
      "step": 4645
    },
    {
      "epoch": 0.42806467959644356,
      "grad_norm": 0.9044292622735791,
      "learning_rate": 3.2877410358375845e-06,
      "loss": 0.1484,
      "step": 4646
    },
    {
      "epoch": 0.42815681577371356,
      "grad_norm": 0.98783505304058,
      "learning_rate": 3.287018021859182e-06,
      "loss": 0.1379,
      "step": 4647
    },
    {
      "epoch": 0.42824895195098356,
      "grad_norm": 0.9153357355574386,
      "learning_rate": 3.2862949348060707e-06,
      "loss": 0.1485,
      "step": 4648
    },
    {
      "epoch": 0.42834108812825356,
      "grad_norm": 0.8891298445580663,
      "learning_rate": 3.285571774745391e-06,
      "loss": 0.145,
      "step": 4649
    },
    {
      "epoch": 0.42843322430552355,
      "grad_norm": 0.9539432888562989,
      "learning_rate": 3.2848485417442867e-06,
      "loss": 0.143,
      "step": 4650
    },
    {
      "epoch": 0.42852536048279355,
      "grad_norm": 0.9597047185512093,
      "learning_rate": 3.2841252358699115e-06,
      "loss": 0.1428,
      "step": 4651
    },
    {
      "epoch": 0.4286174966600636,
      "grad_norm": 0.9896270645503378,
      "learning_rate": 3.2834018571894233e-06,
      "loss": 0.1447,
      "step": 4652
    },
    {
      "epoch": 0.4287096328373336,
      "grad_norm": 1.0332260681499266,
      "learning_rate": 3.282678405769988e-06,
      "loss": 0.1542,
      "step": 4653
    },
    {
      "epoch": 0.4288017690146036,
      "grad_norm": 0.9539150898547636,
      "learning_rate": 3.2819548816787794e-06,
      "loss": 0.1526,
      "step": 4654
    },
    {
      "epoch": 0.4288939051918736,
      "grad_norm": 0.8810375273548426,
      "learning_rate": 3.2812312849829754e-06,
      "loss": 0.1399,
      "step": 4655
    },
    {
      "epoch": 0.4289860413691436,
      "grad_norm": 0.8775173004031491,
      "learning_rate": 3.280507615749763e-06,
      "loss": 0.1401,
      "step": 4656
    },
    {
      "epoch": 0.4290781775464136,
      "grad_norm": 0.9351979870510768,
      "learning_rate": 3.279783874046334e-06,
      "loss": 0.1516,
      "step": 4657
    },
    {
      "epoch": 0.4291703137236836,
      "grad_norm": 0.9308439149797066,
      "learning_rate": 3.2790600599398882e-06,
      "loss": 0.1416,
      "step": 4658
    },
    {
      "epoch": 0.42926244990095364,
      "grad_norm": 0.9622148973196127,
      "learning_rate": 3.2783361734976325e-06,
      "loss": 0.1585,
      "step": 4659
    },
    {
      "epoch": 0.42935458607822363,
      "grad_norm": 0.8675676981158611,
      "learning_rate": 3.2776122147867782e-06,
      "loss": 0.1388,
      "step": 4660
    },
    {
      "epoch": 0.42944672225549363,
      "grad_norm": 0.8296889413789598,
      "learning_rate": 3.276888183874547e-06,
      "loss": 0.1348,
      "step": 4661
    },
    {
      "epoch": 0.4295388584327636,
      "grad_norm": 0.8858689329829845,
      "learning_rate": 3.2761640808281647e-06,
      "loss": 0.1405,
      "step": 4662
    },
    {
      "epoch": 0.4296309946100336,
      "grad_norm": 0.9024337133238689,
      "learning_rate": 3.275439905714863e-06,
      "loss": 0.1478,
      "step": 4663
    },
    {
      "epoch": 0.4297231307873036,
      "grad_norm": 0.9342100871010554,
      "learning_rate": 3.274715658601883e-06,
      "loss": 0.1545,
      "step": 4664
    },
    {
      "epoch": 0.4298152669645736,
      "grad_norm": 0.906760939816061,
      "learning_rate": 3.273991339556471e-06,
      "loss": 0.144,
      "step": 4665
    },
    {
      "epoch": 0.42990740314184367,
      "grad_norm": 0.9287086900859022,
      "learning_rate": 3.2732669486458796e-06,
      "loss": 0.1617,
      "step": 4666
    },
    {
      "epoch": 0.42999953931911367,
      "grad_norm": 0.8811760608349966,
      "learning_rate": 3.272542485937369e-06,
      "loss": 0.1321,
      "step": 4667
    },
    {
      "epoch": 0.43009167549638366,
      "grad_norm": 0.904797164755038,
      "learning_rate": 3.271817951498205e-06,
      "loss": 0.1404,
      "step": 4668
    },
    {
      "epoch": 0.43018381167365366,
      "grad_norm": 0.8849305530273994,
      "learning_rate": 3.271093345395661e-06,
      "loss": 0.146,
      "step": 4669
    },
    {
      "epoch": 0.43027594785092366,
      "grad_norm": 0.8885015854103672,
      "learning_rate": 3.270368667697018e-06,
      "loss": 0.145,
      "step": 4670
    },
    {
      "epoch": 0.43036808402819365,
      "grad_norm": 0.8259412462687333,
      "learning_rate": 3.2696439184695606e-06,
      "loss": 0.124,
      "step": 4671
    },
    {
      "epoch": 0.43046022020546365,
      "grad_norm": 0.8918841353411899,
      "learning_rate": 3.2689190977805822e-06,
      "loss": 0.1293,
      "step": 4672
    },
    {
      "epoch": 0.4305523563827337,
      "grad_norm": 0.9323176495034798,
      "learning_rate": 3.2681942056973838e-06,
      "loss": 0.141,
      "step": 4673
    },
    {
      "epoch": 0.4306444925600037,
      "grad_norm": 0.9326715643680737,
      "learning_rate": 3.26746924228727e-06,
      "loss": 0.1574,
      "step": 4674
    },
    {
      "epoch": 0.4307366287372737,
      "grad_norm": 0.8857621090497365,
      "learning_rate": 3.2667442076175543e-06,
      "loss": 0.1416,
      "step": 4675
    },
    {
      "epoch": 0.4308287649145437,
      "grad_norm": 0.9038051971376051,
      "learning_rate": 3.2660191017555567e-06,
      "loss": 0.1414,
      "step": 4676
    },
    {
      "epoch": 0.4309209010918137,
      "grad_norm": 0.9216378597983858,
      "learning_rate": 3.2652939247686027e-06,
      "loss": 0.1366,
      "step": 4677
    },
    {
      "epoch": 0.4310130372690837,
      "grad_norm": 0.9440044237173141,
      "learning_rate": 3.2645686767240263e-06,
      "loss": 0.1478,
      "step": 4678
    },
    {
      "epoch": 0.4311051734463537,
      "grad_norm": 0.9933274011071618,
      "learning_rate": 3.2638433576891647e-06,
      "loss": 0.1602,
      "step": 4679
    },
    {
      "epoch": 0.43119730962362374,
      "grad_norm": 0.9203155714472351,
      "learning_rate": 3.263117967731366e-06,
      "loss": 0.1558,
      "step": 4680
    },
    {
      "epoch": 0.43128944580089373,
      "grad_norm": 0.8740835069784506,
      "learning_rate": 3.2623925069179817e-06,
      "loss": 0.1335,
      "step": 4681
    },
    {
      "epoch": 0.43138158197816373,
      "grad_norm": 1.0122592203806835,
      "learning_rate": 3.2616669753163717e-06,
      "loss": 0.1554,
      "step": 4682
    },
    {
      "epoch": 0.4314737181554337,
      "grad_norm": 0.9154881263246533,
      "learning_rate": 3.2609413729939005e-06,
      "loss": 0.1444,
      "step": 4683
    },
    {
      "epoch": 0.4315658543327037,
      "grad_norm": 0.892742980949353,
      "learning_rate": 3.260215700017941e-06,
      "loss": 0.1378,
      "step": 4684
    },
    {
      "epoch": 0.4316579905099737,
      "grad_norm": 0.9415049276203106,
      "learning_rate": 3.2594899564558713e-06,
      "loss": 0.1585,
      "step": 4685
    },
    {
      "epoch": 0.4317501266872438,
      "grad_norm": 0.8706508361164794,
      "learning_rate": 3.2587641423750782e-06,
      "loss": 0.1431,
      "step": 4686
    },
    {
      "epoch": 0.43184226286451377,
      "grad_norm": 0.8445511287505905,
      "learning_rate": 3.2580382578429525e-06,
      "loss": 0.1384,
      "step": 4687
    },
    {
      "epoch": 0.43193439904178377,
      "grad_norm": 0.9137170420709978,
      "learning_rate": 3.2573123029268926e-06,
      "loss": 0.1468,
      "step": 4688
    },
    {
      "epoch": 0.43202653521905376,
      "grad_norm": 0.9192552269512315,
      "learning_rate": 3.256586277694305e-06,
      "loss": 0.1407,
      "step": 4689
    },
    {
      "epoch": 0.43211867139632376,
      "grad_norm": 0.9422981334954762,
      "learning_rate": 3.255860182212599e-06,
      "loss": 0.159,
      "step": 4690
    },
    {
      "epoch": 0.43221080757359376,
      "grad_norm": 0.888256356837777,
      "learning_rate": 3.2551340165491947e-06,
      "loss": 0.148,
      "step": 4691
    },
    {
      "epoch": 0.43230294375086376,
      "grad_norm": 0.9066577043703513,
      "learning_rate": 3.254407780771515e-06,
      "loss": 0.1446,
      "step": 4692
    },
    {
      "epoch": 0.4323950799281338,
      "grad_norm": 0.8204021095990877,
      "learning_rate": 3.2536814749469915e-06,
      "loss": 0.1287,
      "step": 4693
    },
    {
      "epoch": 0.4324872161054038,
      "grad_norm": 0.8675428336875458,
      "learning_rate": 3.252955099143062e-06,
      "loss": 0.1402,
      "step": 4694
    },
    {
      "epoch": 0.4325793522826738,
      "grad_norm": 0.8491928019041444,
      "learning_rate": 3.2522286534271706e-06,
      "loss": 0.1403,
      "step": 4695
    },
    {
      "epoch": 0.4326714884599438,
      "grad_norm": 0.8968385179030194,
      "learning_rate": 3.2515021378667677e-06,
      "loss": 0.1456,
      "step": 4696
    },
    {
      "epoch": 0.4327636246372138,
      "grad_norm": 0.8221696054310007,
      "learning_rate": 3.250775552529312e-06,
      "loss": 0.1304,
      "step": 4697
    },
    {
      "epoch": 0.4328557608144838,
      "grad_norm": 0.924831124311071,
      "learning_rate": 3.250048897482263e-06,
      "loss": 0.1527,
      "step": 4698
    },
    {
      "epoch": 0.4329478969917538,
      "grad_norm": 0.9196754936163174,
      "learning_rate": 3.2493221727930947e-06,
      "loss": 0.1568,
      "step": 4699
    },
    {
      "epoch": 0.43304003316902384,
      "grad_norm": 0.9493069882745526,
      "learning_rate": 3.2485953785292813e-06,
      "loss": 0.1467,
      "step": 4700
    },
    {
      "epoch": 0.43313216934629384,
      "grad_norm": 0.8814778987108999,
      "learning_rate": 3.247868514758307e-06,
      "loss": 0.1507,
      "step": 4701
    },
    {
      "epoch": 0.43322430552356384,
      "grad_norm": 0.9240643672635029,
      "learning_rate": 3.2471415815476603e-06,
      "loss": 0.1526,
      "step": 4702
    },
    {
      "epoch": 0.43331644170083383,
      "grad_norm": 0.8567708852826385,
      "learning_rate": 3.246414578964837e-06,
      "loss": 0.1372,
      "step": 4703
    },
    {
      "epoch": 0.43340857787810383,
      "grad_norm": 0.8995443727689885,
      "learning_rate": 3.24568750707734e-06,
      "loss": 0.1518,
      "step": 4704
    },
    {
      "epoch": 0.4335007140553738,
      "grad_norm": 0.8848310081209736,
      "learning_rate": 3.2449603659526787e-06,
      "loss": 0.1414,
      "step": 4705
    },
    {
      "epoch": 0.4335928502326438,
      "grad_norm": 0.9253510253751343,
      "learning_rate": 3.244233155658365e-06,
      "loss": 0.1561,
      "step": 4706
    },
    {
      "epoch": 0.4336849864099139,
      "grad_norm": 0.8987080572532566,
      "learning_rate": 3.2435058762619243e-06,
      "loss": 0.1459,
      "step": 4707
    },
    {
      "epoch": 0.43377712258718387,
      "grad_norm": 0.8418375834879664,
      "learning_rate": 3.2427785278308832e-06,
      "loss": 0.1374,
      "step": 4708
    },
    {
      "epoch": 0.43386925876445387,
      "grad_norm": 0.8605526697968859,
      "learning_rate": 3.242051110432775e-06,
      "loss": 0.1493,
      "step": 4709
    },
    {
      "epoch": 0.43396139494172387,
      "grad_norm": 0.8897085366064001,
      "learning_rate": 3.241323624135142e-06,
      "loss": 0.1448,
      "step": 4710
    },
    {
      "epoch": 0.43405353111899386,
      "grad_norm": 0.8611729087577313,
      "learning_rate": 3.2405960690055307e-06,
      "loss": 0.137,
      "step": 4711
    },
    {
      "epoch": 0.43414566729626386,
      "grad_norm": 0.8688996045321737,
      "learning_rate": 3.2398684451114936e-06,
      "loss": 0.1367,
      "step": 4712
    },
    {
      "epoch": 0.43423780347353386,
      "grad_norm": 0.9014621771431871,
      "learning_rate": 3.2391407525205933e-06,
      "loss": 0.1479,
      "step": 4713
    },
    {
      "epoch": 0.4343299396508039,
      "grad_norm": 0.9021014682601557,
      "learning_rate": 3.2384129913003935e-06,
      "loss": 0.1499,
      "step": 4714
    },
    {
      "epoch": 0.4344220758280739,
      "grad_norm": 0.9191301374460689,
      "learning_rate": 3.237685161518468e-06,
      "loss": 0.1513,
      "step": 4715
    },
    {
      "epoch": 0.4345142120053439,
      "grad_norm": 0.9119658593871829,
      "learning_rate": 3.236957263242396e-06,
      "loss": 0.1439,
      "step": 4716
    },
    {
      "epoch": 0.4346063481826139,
      "grad_norm": 0.9280932838157593,
      "learning_rate": 3.2362292965397633e-06,
      "loss": 0.1528,
      "step": 4717
    },
    {
      "epoch": 0.4346984843598839,
      "grad_norm": 0.9312157213794583,
      "learning_rate": 3.235501261478161e-06,
      "loss": 0.146,
      "step": 4718
    },
    {
      "epoch": 0.4347906205371539,
      "grad_norm": 0.9302842434648845,
      "learning_rate": 3.2347731581251866e-06,
      "loss": 0.1393,
      "step": 4719
    },
    {
      "epoch": 0.43488275671442395,
      "grad_norm": 0.8610238509695106,
      "learning_rate": 3.2340449865484464e-06,
      "loss": 0.1412,
      "step": 4720
    },
    {
      "epoch": 0.43497489289169394,
      "grad_norm": 0.8708422464114209,
      "learning_rate": 3.23331674681555e-06,
      "loss": 0.127,
      "step": 4721
    },
    {
      "epoch": 0.43506702906896394,
      "grad_norm": 0.9170158188594641,
      "learning_rate": 3.2325884389941147e-06,
      "loss": 0.1388,
      "step": 4722
    },
    {
      "epoch": 0.43515916524623394,
      "grad_norm": 0.9055547940614805,
      "learning_rate": 3.2318600631517637e-06,
      "loss": 0.1438,
      "step": 4723
    },
    {
      "epoch": 0.43525130142350393,
      "grad_norm": 0.937925363757348,
      "learning_rate": 3.2311316193561277e-06,
      "loss": 0.155,
      "step": 4724
    },
    {
      "epoch": 0.43534343760077393,
      "grad_norm": 0.8980494734303124,
      "learning_rate": 3.230403107674841e-06,
      "loss": 0.1511,
      "step": 4725
    },
    {
      "epoch": 0.4354355737780439,
      "grad_norm": 0.9131532114439331,
      "learning_rate": 3.2296745281755485e-06,
      "loss": 0.146,
      "step": 4726
    },
    {
      "epoch": 0.435527709955314,
      "grad_norm": 0.9058295332114711,
      "learning_rate": 3.2289458809258965e-06,
      "loss": 0.1552,
      "step": 4727
    },
    {
      "epoch": 0.435619846132584,
      "grad_norm": 0.8947703410791472,
      "learning_rate": 3.2282171659935415e-06,
      "loss": 0.1388,
      "step": 4728
    },
    {
      "epoch": 0.435711982309854,
      "grad_norm": 0.8926286281907707,
      "learning_rate": 3.2274883834461444e-06,
      "loss": 0.1344,
      "step": 4729
    },
    {
      "epoch": 0.43580411848712397,
      "grad_norm": 0.9531048239256399,
      "learning_rate": 3.2267595333513724e-06,
      "loss": 0.1592,
      "step": 4730
    },
    {
      "epoch": 0.43589625466439397,
      "grad_norm": 0.9201501804582292,
      "learning_rate": 3.2260306157768994e-06,
      "loss": 0.148,
      "step": 4731
    },
    {
      "epoch": 0.43598839084166396,
      "grad_norm": 0.9174290109231636,
      "learning_rate": 3.2253016307904063e-06,
      "loss": 0.1385,
      "step": 4732
    },
    {
      "epoch": 0.43608052701893396,
      "grad_norm": 0.8375669138427223,
      "learning_rate": 3.224572578459577e-06,
      "loss": 0.1278,
      "step": 4733
    },
    {
      "epoch": 0.436172663196204,
      "grad_norm": 0.963997166950795,
      "learning_rate": 3.2238434588521078e-06,
      "loss": 0.1501,
      "step": 4734
    },
    {
      "epoch": 0.436264799373474,
      "grad_norm": 0.9336340407484712,
      "learning_rate": 3.2231142720356946e-06,
      "loss": 0.1528,
      "step": 4735
    },
    {
      "epoch": 0.436356935550744,
      "grad_norm": 0.9042393512372882,
      "learning_rate": 3.222385018078043e-06,
      "loss": 0.1368,
      "step": 4736
    },
    {
      "epoch": 0.436449071728014,
      "grad_norm": 0.9438201754765096,
      "learning_rate": 3.2216556970468656e-06,
      "loss": 0.1562,
      "step": 4737
    },
    {
      "epoch": 0.436541207905284,
      "grad_norm": 0.938414838398955,
      "learning_rate": 3.2209263090098785e-06,
      "loss": 0.1526,
      "step": 4738
    },
    {
      "epoch": 0.436633344082554,
      "grad_norm": 0.9018511969382467,
      "learning_rate": 3.220196854034806e-06,
      "loss": 0.138,
      "step": 4739
    },
    {
      "epoch": 0.436725480259824,
      "grad_norm": 0.8535484944038092,
      "learning_rate": 3.2194673321893787e-06,
      "loss": 0.1288,
      "step": 4740
    },
    {
      "epoch": 0.43681761643709405,
      "grad_norm": 0.8802381462413789,
      "learning_rate": 3.2187377435413316e-06,
      "loss": 0.1487,
      "step": 4741
    },
    {
      "epoch": 0.43690975261436404,
      "grad_norm": 0.9534514302434024,
      "learning_rate": 3.2180080881584075e-06,
      "loss": 0.1572,
      "step": 4742
    },
    {
      "epoch": 0.43700188879163404,
      "grad_norm": 0.8807997475524247,
      "learning_rate": 3.2172783661083556e-06,
      "loss": 0.1514,
      "step": 4743
    },
    {
      "epoch": 0.43709402496890404,
      "grad_norm": 0.9205359815568849,
      "learning_rate": 3.21654857745893e-06,
      "loss": 0.1507,
      "step": 4744
    },
    {
      "epoch": 0.43718616114617403,
      "grad_norm": 0.8949200471013621,
      "learning_rate": 3.2158187222778926e-06,
      "loss": 0.1548,
      "step": 4745
    },
    {
      "epoch": 0.43727829732344403,
      "grad_norm": 0.893054846565139,
      "learning_rate": 3.215088800633009e-06,
      "loss": 0.1368,
      "step": 4746
    },
    {
      "epoch": 0.43737043350071403,
      "grad_norm": 0.8943073971027454,
      "learning_rate": 3.214358812592053e-06,
      "loss": 0.135,
      "step": 4747
    },
    {
      "epoch": 0.4374625696779841,
      "grad_norm": 0.9181687265142016,
      "learning_rate": 3.2136287582228048e-06,
      "loss": 0.1328,
      "step": 4748
    },
    {
      "epoch": 0.4375547058552541,
      "grad_norm": 0.9251073922168981,
      "learning_rate": 3.2128986375930495e-06,
      "loss": 0.1437,
      "step": 4749
    },
    {
      "epoch": 0.4376468420325241,
      "grad_norm": 0.8926775990316347,
      "learning_rate": 3.212168450770579e-06,
      "loss": 0.1378,
      "step": 4750
    },
    {
      "epoch": 0.43773897820979407,
      "grad_norm": 0.853248400512252,
      "learning_rate": 3.2114381978231918e-06,
      "loss": 0.1443,
      "step": 4751
    },
    {
      "epoch": 0.43783111438706407,
      "grad_norm": 0.9583420162746287,
      "learning_rate": 3.21070787881869e-06,
      "loss": 0.1488,
      "step": 4752
    },
    {
      "epoch": 0.43792325056433407,
      "grad_norm": 0.8855991731771236,
      "learning_rate": 3.2099774938248866e-06,
      "loss": 0.1349,
      "step": 4753
    },
    {
      "epoch": 0.4380153867416041,
      "grad_norm": 0.8946899043733684,
      "learning_rate": 3.2092470429095955e-06,
      "loss": 0.1519,
      "step": 4754
    },
    {
      "epoch": 0.4381075229188741,
      "grad_norm": 0.8759680421106206,
      "learning_rate": 3.208516526140641e-06,
      "loss": 0.14,
      "step": 4755
    },
    {
      "epoch": 0.4381996590961441,
      "grad_norm": 0.9408448869394528,
      "learning_rate": 3.2077859435858503e-06,
      "loss": 0.1601,
      "step": 4756
    },
    {
      "epoch": 0.4382917952734141,
      "grad_norm": 0.9347952147345089,
      "learning_rate": 3.2070552953130586e-06,
      "loss": 0.1589,
      "step": 4757
    },
    {
      "epoch": 0.4383839314506841,
      "grad_norm": 0.9081980861793578,
      "learning_rate": 3.2063245813901068e-06,
      "loss": 0.1582,
      "step": 4758
    },
    {
      "epoch": 0.4384760676279541,
      "grad_norm": 0.8939823014855057,
      "learning_rate": 3.2055938018848417e-06,
      "loss": 0.154,
      "step": 4759
    },
    {
      "epoch": 0.4385682038052241,
      "grad_norm": 0.9056882583574686,
      "learning_rate": 3.2048629568651153e-06,
      "loss": 0.1539,
      "step": 4760
    },
    {
      "epoch": 0.43866033998249415,
      "grad_norm": 0.8749131675102494,
      "learning_rate": 3.2041320463987886e-06,
      "loss": 0.1444,
      "step": 4761
    },
    {
      "epoch": 0.43875247615976415,
      "grad_norm": 0.8808614298731526,
      "learning_rate": 3.2034010705537245e-06,
      "loss": 0.1415,
      "step": 4762
    },
    {
      "epoch": 0.43884461233703415,
      "grad_norm": 0.9542017201745561,
      "learning_rate": 3.202670029397796e-06,
      "loss": 0.1473,
      "step": 4763
    },
    {
      "epoch": 0.43893674851430414,
      "grad_norm": 0.9145181955112874,
      "learning_rate": 3.2019389229988794e-06,
      "loss": 0.1527,
      "step": 4764
    },
    {
      "epoch": 0.43902888469157414,
      "grad_norm": 0.9118897020011315,
      "learning_rate": 3.2012077514248592e-06,
      "loss": 0.1365,
      "step": 4765
    },
    {
      "epoch": 0.43912102086884414,
      "grad_norm": 0.8890964393279647,
      "learning_rate": 3.2004765147436228e-06,
      "loss": 0.1555,
      "step": 4766
    },
    {
      "epoch": 0.43921315704611413,
      "grad_norm": 0.9537052872925784,
      "learning_rate": 3.1997452130230664e-06,
      "loss": 0.1395,
      "step": 4767
    },
    {
      "epoch": 0.4393052932233842,
      "grad_norm": 0.924180348352055,
      "learning_rate": 3.1990138463310923e-06,
      "loss": 0.1405,
      "step": 4768
    },
    {
      "epoch": 0.4393974294006542,
      "grad_norm": 0.8637655086910015,
      "learning_rate": 3.1982824147356078e-06,
      "loss": 0.1447,
      "step": 4769
    },
    {
      "epoch": 0.4394895655779242,
      "grad_norm": 0.912924057763308,
      "learning_rate": 3.197550918304525e-06,
      "loss": 0.141,
      "step": 4770
    },
    {
      "epoch": 0.4395817017551942,
      "grad_norm": 0.9110661693707881,
      "learning_rate": 3.196819357105764e-06,
      "loss": 0.1416,
      "step": 4771
    },
    {
      "epoch": 0.4396738379324642,
      "grad_norm": 0.9416488247361345,
      "learning_rate": 3.196087731207252e-06,
      "loss": 0.1539,
      "step": 4772
    },
    {
      "epoch": 0.43976597410973417,
      "grad_norm": 0.9280205052202724,
      "learning_rate": 3.1953560406769184e-06,
      "loss": 0.15,
      "step": 4773
    },
    {
      "epoch": 0.43985811028700417,
      "grad_norm": 0.9515080388192296,
      "learning_rate": 3.194624285582702e-06,
      "loss": 0.1563,
      "step": 4774
    },
    {
      "epoch": 0.4399502464642742,
      "grad_norm": 0.8522352854395673,
      "learning_rate": 3.1938924659925457e-06,
      "loss": 0.1416,
      "step": 4775
    },
    {
      "epoch": 0.4400423826415442,
      "grad_norm": 0.8685099112900193,
      "learning_rate": 3.193160581974399e-06,
      "loss": 0.1275,
      "step": 4776
    },
    {
      "epoch": 0.4401345188188142,
      "grad_norm": 0.8909613526044037,
      "learning_rate": 3.1924286335962177e-06,
      "loss": 0.1475,
      "step": 4777
    },
    {
      "epoch": 0.4402266549960842,
      "grad_norm": 0.9496888743162736,
      "learning_rate": 3.1916966209259636e-06,
      "loss": 0.1506,
      "step": 4778
    },
    {
      "epoch": 0.4403187911733542,
      "grad_norm": 0.8581608533036216,
      "learning_rate": 3.1909645440316034e-06,
      "loss": 0.1322,
      "step": 4779
    },
    {
      "epoch": 0.4404109273506242,
      "grad_norm": 0.9290846371768269,
      "learning_rate": 3.1902324029811115e-06,
      "loss": 0.1433,
      "step": 4780
    },
    {
      "epoch": 0.4405030635278942,
      "grad_norm": 0.8837081497834979,
      "learning_rate": 3.1895001978424665e-06,
      "loss": 0.148,
      "step": 4781
    },
    {
      "epoch": 0.44059519970516425,
      "grad_norm": 0.8818053795949022,
      "learning_rate": 3.188767928683654e-06,
      "loss": 0.1572,
      "step": 4782
    },
    {
      "epoch": 0.44068733588243425,
      "grad_norm": 0.9566123724363703,
      "learning_rate": 3.188035595572665e-06,
      "loss": 0.1468,
      "step": 4783
    },
    {
      "epoch": 0.44077947205970425,
      "grad_norm": 0.9126945997475431,
      "learning_rate": 3.1873031985774972e-06,
      "loss": 0.1417,
      "step": 4784
    },
    {
      "epoch": 0.44087160823697424,
      "grad_norm": 0.9319946243034919,
      "learning_rate": 3.186570737766153e-06,
      "loss": 0.1524,
      "step": 4785
    },
    {
      "epoch": 0.44096374441424424,
      "grad_norm": 0.8594125708493436,
      "learning_rate": 3.1858382132066422e-06,
      "loss": 0.1428,
      "step": 4786
    },
    {
      "epoch": 0.44105588059151424,
      "grad_norm": 0.8475132117275923,
      "learning_rate": 3.1851056249669786e-06,
      "loss": 0.1396,
      "step": 4787
    },
    {
      "epoch": 0.4411480167687843,
      "grad_norm": 0.9114714437921265,
      "learning_rate": 3.1843729731151855e-06,
      "loss": 0.1506,
      "step": 4788
    },
    {
      "epoch": 0.4412401529460543,
      "grad_norm": 0.8478111369854033,
      "learning_rate": 3.183640257719287e-06,
      "loss": 0.1399,
      "step": 4789
    },
    {
      "epoch": 0.4413322891233243,
      "grad_norm": 0.9881708242449926,
      "learning_rate": 3.182907478847318e-06,
      "loss": 0.1628,
      "step": 4790
    },
    {
      "epoch": 0.4414244253005943,
      "grad_norm": 0.8973574240687511,
      "learning_rate": 3.1821746365673157e-06,
      "loss": 0.1507,
      "step": 4791
    },
    {
      "epoch": 0.4415165614778643,
      "grad_norm": 0.9318494415821815,
      "learning_rate": 3.1814417309473243e-06,
      "loss": 0.1401,
      "step": 4792
    },
    {
      "epoch": 0.4416086976551343,
      "grad_norm": 0.9456870027866897,
      "learning_rate": 3.1807087620553957e-06,
      "loss": 0.1535,
      "step": 4793
    },
    {
      "epoch": 0.44170083383240427,
      "grad_norm": 0.8681572085432423,
      "learning_rate": 3.179975729959585e-06,
      "loss": 0.1532,
      "step": 4794
    },
    {
      "epoch": 0.4417929700096743,
      "grad_norm": 0.931784470899894,
      "learning_rate": 3.1792426347279544e-06,
      "loss": 0.1557,
      "step": 4795
    },
    {
      "epoch": 0.4418851061869443,
      "grad_norm": 0.8708926614080035,
      "learning_rate": 3.178509476428573e-06,
      "loss": 0.1389,
      "step": 4796
    },
    {
      "epoch": 0.4419772423642143,
      "grad_norm": 0.9255219971385017,
      "learning_rate": 3.177776255129512e-06,
      "loss": 0.1564,
      "step": 4797
    },
    {
      "epoch": 0.4420693785414843,
      "grad_norm": 0.9397331600366731,
      "learning_rate": 3.1770429708988536e-06,
      "loss": 0.1665,
      "step": 4798
    },
    {
      "epoch": 0.4421615147187543,
      "grad_norm": 0.8823264473223045,
      "learning_rate": 3.1763096238046833e-06,
      "loss": 0.1448,
      "step": 4799
    },
    {
      "epoch": 0.4422536508960243,
      "grad_norm": 0.9225948201087502,
      "learning_rate": 3.1755762139150905e-06,
      "loss": 0.1496,
      "step": 4800
    },
    {
      "epoch": 0.4423457870732943,
      "grad_norm": 0.8726837869356154,
      "learning_rate": 3.1748427412981742e-06,
      "loss": 0.1418,
      "step": 4801
    },
    {
      "epoch": 0.44243792325056436,
      "grad_norm": 0.8557987601165721,
      "learning_rate": 3.1741092060220364e-06,
      "loss": 0.1345,
      "step": 4802
    },
    {
      "epoch": 0.44253005942783435,
      "grad_norm": 0.8580919943355668,
      "learning_rate": 3.1733756081547864e-06,
      "loss": 0.1394,
      "step": 4803
    },
    {
      "epoch": 0.44262219560510435,
      "grad_norm": 0.9176851413304004,
      "learning_rate": 3.172641947764539e-06,
      "loss": 0.1449,
      "step": 4804
    },
    {
      "epoch": 0.44271433178237435,
      "grad_norm": 0.9095773510548247,
      "learning_rate": 3.1719082249194134e-06,
      "loss": 0.1456,
      "step": 4805
    },
    {
      "epoch": 0.44280646795964435,
      "grad_norm": 0.8799083657238806,
      "learning_rate": 3.171174439687538e-06,
      "loss": 0.1377,
      "step": 4806
    },
    {
      "epoch": 0.44289860413691434,
      "grad_norm": 0.8851747834590045,
      "learning_rate": 3.1704405921370428e-06,
      "loss": 0.1313,
      "step": 4807
    },
    {
      "epoch": 0.44299074031418434,
      "grad_norm": 0.878962379862866,
      "learning_rate": 3.169706682336066e-06,
      "loss": 0.1395,
      "step": 4808
    },
    {
      "epoch": 0.4430828764914544,
      "grad_norm": 0.8508108772869166,
      "learning_rate": 3.1689727103527536e-06,
      "loss": 0.1408,
      "step": 4809
    },
    {
      "epoch": 0.4431750126687244,
      "grad_norm": 0.8878317199533106,
      "learning_rate": 3.168238676255251e-06,
      "loss": 0.1433,
      "step": 4810
    },
    {
      "epoch": 0.4432671488459944,
      "grad_norm": 0.9226903423145814,
      "learning_rate": 3.1675045801117167e-06,
      "loss": 0.1492,
      "step": 4811
    },
    {
      "epoch": 0.4433592850232644,
      "grad_norm": 0.9388424210883725,
      "learning_rate": 3.1667704219903095e-06,
      "loss": 0.1575,
      "step": 4812
    },
    {
      "epoch": 0.4434514212005344,
      "grad_norm": 0.8534203981677879,
      "learning_rate": 3.1660362019591972e-06,
      "loss": 0.1377,
      "step": 4813
    },
    {
      "epoch": 0.4435435573778044,
      "grad_norm": 0.9133209220134044,
      "learning_rate": 3.1653019200865513e-06,
      "loss": 0.1437,
      "step": 4814
    },
    {
      "epoch": 0.4436356935550744,
      "grad_norm": 0.9172019407225023,
      "learning_rate": 3.164567576440552e-06,
      "loss": 0.1507,
      "step": 4815
    },
    {
      "epoch": 0.4437278297323444,
      "grad_norm": 0.9953185567517315,
      "learning_rate": 3.1638331710893804e-06,
      "loss": 0.1562,
      "step": 4816
    },
    {
      "epoch": 0.4438199659096144,
      "grad_norm": 0.9378368946596357,
      "learning_rate": 3.163098704101228e-06,
      "loss": 0.1554,
      "step": 4817
    },
    {
      "epoch": 0.4439121020868844,
      "grad_norm": 0.9135001066867825,
      "learning_rate": 3.162364175544289e-06,
      "loss": 0.1472,
      "step": 4818
    },
    {
      "epoch": 0.4440042382641544,
      "grad_norm": 0.9971829365126571,
      "learning_rate": 3.161629585486766e-06,
      "loss": 0.1409,
      "step": 4819
    },
    {
      "epoch": 0.4440963744414244,
      "grad_norm": 0.9040841107610292,
      "learning_rate": 3.160894933996864e-06,
      "loss": 0.149,
      "step": 4820
    },
    {
      "epoch": 0.4441885106186944,
      "grad_norm": 0.9206094051297338,
      "learning_rate": 3.160160221142797e-06,
      "loss": 0.1364,
      "step": 4821
    },
    {
      "epoch": 0.44428064679596446,
      "grad_norm": 0.9379688154656526,
      "learning_rate": 3.159425446992781e-06,
      "loss": 0.143,
      "step": 4822
    },
    {
      "epoch": 0.44437278297323446,
      "grad_norm": 0.9567257382459619,
      "learning_rate": 3.1586906116150428e-06,
      "loss": 0.1482,
      "step": 4823
    },
    {
      "epoch": 0.44446491915050446,
      "grad_norm": 1.005451648614117,
      "learning_rate": 3.1579557150778094e-06,
      "loss": 0.1601,
      "step": 4824
    },
    {
      "epoch": 0.44455705532777445,
      "grad_norm": 0.8817243195267939,
      "learning_rate": 3.1572207574493174e-06,
      "loss": 0.1428,
      "step": 4825
    },
    {
      "epoch": 0.44464919150504445,
      "grad_norm": 0.849646786373053,
      "learning_rate": 3.1564857387978075e-06,
      "loss": 0.1456,
      "step": 4826
    },
    {
      "epoch": 0.44474132768231445,
      "grad_norm": 0.9519644437243897,
      "learning_rate": 3.155750659191526e-06,
      "loss": 0.1594,
      "step": 4827
    },
    {
      "epoch": 0.44483346385958444,
      "grad_norm": 0.8774384982653047,
      "learning_rate": 3.155015518698725e-06,
      "loss": 0.1443,
      "step": 4828
    },
    {
      "epoch": 0.4449256000368545,
      "grad_norm": 0.9294480749474386,
      "learning_rate": 3.154280317387663e-06,
      "loss": 0.152,
      "step": 4829
    },
    {
      "epoch": 0.4450177362141245,
      "grad_norm": 0.8812827080037829,
      "learning_rate": 3.1535450553266024e-06,
      "loss": 0.1467,
      "step": 4830
    },
    {
      "epoch": 0.4451098723913945,
      "grad_norm": 0.9149468689160323,
      "learning_rate": 3.1528097325838143e-06,
      "loss": 0.1496,
      "step": 4831
    },
    {
      "epoch": 0.4452020085686645,
      "grad_norm": 0.886095744307431,
      "learning_rate": 3.1520743492275714e-06,
      "loss": 0.1504,
      "step": 4832
    },
    {
      "epoch": 0.4452941447459345,
      "grad_norm": 0.9507047582370619,
      "learning_rate": 3.151338905326155e-06,
      "loss": 0.1536,
      "step": 4833
    },
    {
      "epoch": 0.4453862809232045,
      "grad_norm": 0.9475520516133387,
      "learning_rate": 3.1506034009478515e-06,
      "loss": 0.1513,
      "step": 4834
    },
    {
      "epoch": 0.4454784171004745,
      "grad_norm": 0.8963368746247009,
      "learning_rate": 3.1498678361609514e-06,
      "loss": 0.1379,
      "step": 4835
    },
    {
      "epoch": 0.44557055327774453,
      "grad_norm": 0.9466097663899271,
      "learning_rate": 3.149132211033754e-06,
      "loss": 0.1352,
      "step": 4836
    },
    {
      "epoch": 0.4456626894550145,
      "grad_norm": 0.9237287141376792,
      "learning_rate": 3.1483965256345596e-06,
      "loss": 0.1409,
      "step": 4837
    },
    {
      "epoch": 0.4457548256322845,
      "grad_norm": 1.0177207633781526,
      "learning_rate": 3.147660780031679e-06,
      "loss": 0.1584,
      "step": 4838
    },
    {
      "epoch": 0.4458469618095545,
      "grad_norm": 0.9370371601534141,
      "learning_rate": 3.146924974293425e-06,
      "loss": 0.1532,
      "step": 4839
    },
    {
      "epoch": 0.4459390979868245,
      "grad_norm": 0.9331012417513412,
      "learning_rate": 3.1461891084881175e-06,
      "loss": 0.1604,
      "step": 4840
    },
    {
      "epoch": 0.4460312341640945,
      "grad_norm": 0.9580514103078052,
      "learning_rate": 3.1454531826840816e-06,
      "loss": 0.1515,
      "step": 4841
    },
    {
      "epoch": 0.4461233703413645,
      "grad_norm": 0.931795200004456,
      "learning_rate": 3.1447171969496487e-06,
      "loss": 0.1515,
      "step": 4842
    },
    {
      "epoch": 0.44621550651863456,
      "grad_norm": 0.826891990289607,
      "learning_rate": 3.1439811513531537e-06,
      "loss": 0.1302,
      "step": 4843
    },
    {
      "epoch": 0.44630764269590456,
      "grad_norm": 0.8025586612837762,
      "learning_rate": 3.143245045962941e-06,
      "loss": 0.134,
      "step": 4844
    },
    {
      "epoch": 0.44639977887317456,
      "grad_norm": 0.9430651189859448,
      "learning_rate": 3.142508880847355e-06,
      "loss": 0.1572,
      "step": 4845
    },
    {
      "epoch": 0.44649191505044455,
      "grad_norm": 0.8750199948395706,
      "learning_rate": 3.1417726560747507e-06,
      "loss": 0.1414,
      "step": 4846
    },
    {
      "epoch": 0.44658405122771455,
      "grad_norm": 0.8970428891745201,
      "learning_rate": 3.1410363717134868e-06,
      "loss": 0.1362,
      "step": 4847
    },
    {
      "epoch": 0.44667618740498455,
      "grad_norm": 0.8970399368797006,
      "learning_rate": 3.140300027831927e-06,
      "loss": 0.156,
      "step": 4848
    },
    {
      "epoch": 0.44676832358225455,
      "grad_norm": 0.900467919696107,
      "learning_rate": 3.1395636244984397e-06,
      "loss": 0.1441,
      "step": 4849
    },
    {
      "epoch": 0.4468604597595246,
      "grad_norm": 0.8944755285685609,
      "learning_rate": 3.1388271617814015e-06,
      "loss": 0.1348,
      "step": 4850
    },
    {
      "epoch": 0.4469525959367946,
      "grad_norm": 0.83068628823159,
      "learning_rate": 3.1380906397491923e-06,
      "loss": 0.1314,
      "step": 4851
    },
    {
      "epoch": 0.4470447321140646,
      "grad_norm": 0.9164112054172598,
      "learning_rate": 3.1373540584701997e-06,
      "loss": 0.1537,
      "step": 4852
    },
    {
      "epoch": 0.4471368682913346,
      "grad_norm": 0.8992847662913391,
      "learning_rate": 3.1366174180128127e-06,
      "loss": 0.1364,
      "step": 4853
    },
    {
      "epoch": 0.4472290044686046,
      "grad_norm": 0.9212563327688689,
      "learning_rate": 3.1358807184454305e-06,
      "loss": 0.1493,
      "step": 4854
    },
    {
      "epoch": 0.4473211406458746,
      "grad_norm": 0.924510733072517,
      "learning_rate": 3.1351439598364554e-06,
      "loss": 0.1481,
      "step": 4855
    },
    {
      "epoch": 0.44741327682314463,
      "grad_norm": 0.8533748975648047,
      "learning_rate": 3.134407142254295e-06,
      "loss": 0.1257,
      "step": 4856
    },
    {
      "epoch": 0.44750541300041463,
      "grad_norm": 0.9120449629078842,
      "learning_rate": 3.1336702657673625e-06,
      "loss": 0.1446,
      "step": 4857
    },
    {
      "epoch": 0.44759754917768463,
      "grad_norm": 0.8955446860187469,
      "learning_rate": 3.132933330444079e-06,
      "loss": 0.1375,
      "step": 4858
    },
    {
      "epoch": 0.4476896853549546,
      "grad_norm": 0.9088059249145077,
      "learning_rate": 3.132196336352867e-06,
      "loss": 0.1317,
      "step": 4859
    },
    {
      "epoch": 0.4477818215322246,
      "grad_norm": 0.9870459351359203,
      "learning_rate": 3.131459283562157e-06,
      "loss": 0.1494,
      "step": 4860
    },
    {
      "epoch": 0.4478739577094946,
      "grad_norm": 0.8765641433359809,
      "learning_rate": 3.1307221721403846e-06,
      "loss": 0.1465,
      "step": 4861
    },
    {
      "epoch": 0.4479660938867646,
      "grad_norm": 0.8894333905339528,
      "learning_rate": 3.129985002155991e-06,
      "loss": 0.1375,
      "step": 4862
    },
    {
      "epoch": 0.44805823006403467,
      "grad_norm": 0.9046224882892272,
      "learning_rate": 3.129247773677422e-06,
      "loss": 0.1459,
      "step": 4863
    },
    {
      "epoch": 0.44815036624130467,
      "grad_norm": 0.8885653224314798,
      "learning_rate": 3.128510486773129e-06,
      "loss": 0.1435,
      "step": 4864
    },
    {
      "epoch": 0.44824250241857466,
      "grad_norm": 0.9047398877430426,
      "learning_rate": 3.1277731415115696e-06,
      "loss": 0.1521,
      "step": 4865
    },
    {
      "epoch": 0.44833463859584466,
      "grad_norm": 0.9101582197651139,
      "learning_rate": 3.127035737961207e-06,
      "loss": 0.1475,
      "step": 4866
    },
    {
      "epoch": 0.44842677477311466,
      "grad_norm": 0.9211512953806295,
      "learning_rate": 3.1262982761905084e-06,
      "loss": 0.1483,
      "step": 4867
    },
    {
      "epoch": 0.44851891095038465,
      "grad_norm": 0.9168309363122416,
      "learning_rate": 3.125560756267948e-06,
      "loss": 0.1431,
      "step": 4868
    },
    {
      "epoch": 0.44861104712765465,
      "grad_norm": 0.882017959352168,
      "learning_rate": 3.1248231782620035e-06,
      "loss": 0.1291,
      "step": 4869
    },
    {
      "epoch": 0.4487031833049247,
      "grad_norm": 0.9737260719423534,
      "learning_rate": 3.1240855422411593e-06,
      "loss": 0.1535,
      "step": 4870
    },
    {
      "epoch": 0.4487953194821947,
      "grad_norm": 0.8840045486699224,
      "learning_rate": 3.1233478482739065e-06,
      "loss": 0.1509,
      "step": 4871
    },
    {
      "epoch": 0.4488874556594647,
      "grad_norm": 0.9149749621641127,
      "learning_rate": 3.1226100964287378e-06,
      "loss": 0.1387,
      "step": 4872
    },
    {
      "epoch": 0.4489795918367347,
      "grad_norm": 0.9192452144221734,
      "learning_rate": 3.1218722867741553e-06,
      "loss": 0.1547,
      "step": 4873
    },
    {
      "epoch": 0.4490717280140047,
      "grad_norm": 0.8908377156409427,
      "learning_rate": 3.1211344193786636e-06,
      "loss": 0.1432,
      "step": 4874
    },
    {
      "epoch": 0.4491638641912747,
      "grad_norm": 0.8925399884941798,
      "learning_rate": 3.1203964943107747e-06,
      "loss": 0.1533,
      "step": 4875
    },
    {
      "epoch": 0.4492560003685447,
      "grad_norm": 0.861973775471057,
      "learning_rate": 3.1196585116390045e-06,
      "loss": 0.1282,
      "step": 4876
    },
    {
      "epoch": 0.44934813654581474,
      "grad_norm": 0.8880068164435665,
      "learning_rate": 3.1189204714318743e-06,
      "loss": 0.1355,
      "step": 4877
    },
    {
      "epoch": 0.44944027272308473,
      "grad_norm": 1.0015594851191234,
      "learning_rate": 3.1181823737579115e-06,
      "loss": 0.1649,
      "step": 4878
    },
    {
      "epoch": 0.44953240890035473,
      "grad_norm": 0.9213880892562584,
      "learning_rate": 3.11744421868565e-06,
      "loss": 0.1458,
      "step": 4879
    },
    {
      "epoch": 0.4496245450776247,
      "grad_norm": 0.865422722909716,
      "learning_rate": 3.1167060062836253e-06,
      "loss": 0.148,
      "step": 4880
    },
    {
      "epoch": 0.4497166812548947,
      "grad_norm": 0.877456464715817,
      "learning_rate": 3.1159677366203815e-06,
      "loss": 0.1443,
      "step": 4881
    },
    {
      "epoch": 0.4498088174321647,
      "grad_norm": 0.9847277495406819,
      "learning_rate": 3.1152294097644677e-06,
      "loss": 0.1631,
      "step": 4882
    },
    {
      "epoch": 0.4499009536094347,
      "grad_norm": 0.987382375783057,
      "learning_rate": 3.1144910257844367e-06,
      "loss": 0.1562,
      "step": 4883
    },
    {
      "epoch": 0.44999308978670477,
      "grad_norm": 0.8920639965187964,
      "learning_rate": 3.113752584748848e-06,
      "loss": 0.1413,
      "step": 4884
    },
    {
      "epoch": 0.45008522596397477,
      "grad_norm": 0.8848619030803502,
      "learning_rate": 3.1130140867262653e-06,
      "loss": 0.1435,
      "step": 4885
    },
    {
      "epoch": 0.45017736214124476,
      "grad_norm": 0.9631374264594577,
      "learning_rate": 3.112275531785259e-06,
      "loss": 0.1502,
      "step": 4886
    },
    {
      "epoch": 0.45026949831851476,
      "grad_norm": 0.8783560040106241,
      "learning_rate": 3.111536919994404e-06,
      "loss": 0.1384,
      "step": 4887
    },
    {
      "epoch": 0.45036163449578476,
      "grad_norm": 0.8626103552628747,
      "learning_rate": 3.110798251422279e-06,
      "loss": 0.1376,
      "step": 4888
    },
    {
      "epoch": 0.45045377067305475,
      "grad_norm": 0.9437049556206974,
      "learning_rate": 3.1100595261374718e-06,
      "loss": 0.1393,
      "step": 4889
    },
    {
      "epoch": 0.4505459068503248,
      "grad_norm": 0.9041755349011702,
      "learning_rate": 3.1093207442085716e-06,
      "loss": 0.1404,
      "step": 4890
    },
    {
      "epoch": 0.4506380430275948,
      "grad_norm": 0.8609050559264265,
      "learning_rate": 3.108581905704175e-06,
      "loss": 0.1401,
      "step": 4891
    },
    {
      "epoch": 0.4507301792048648,
      "grad_norm": 0.931689125980329,
      "learning_rate": 3.107843010692882e-06,
      "loss": 0.1522,
      "step": 4892
    },
    {
      "epoch": 0.4508223153821348,
      "grad_norm": 0.9401339014710594,
      "learning_rate": 3.1071040592433003e-06,
      "loss": 0.1625,
      "step": 4893
    },
    {
      "epoch": 0.4509144515594048,
      "grad_norm": 0.9329946448016365,
      "learning_rate": 3.1063650514240425e-06,
      "loss": 0.141,
      "step": 4894
    },
    {
      "epoch": 0.4510065877366748,
      "grad_norm": 0.8452855653517855,
      "learning_rate": 3.105625987303723e-06,
      "loss": 0.1403,
      "step": 4895
    },
    {
      "epoch": 0.4510987239139448,
      "grad_norm": 0.8710890044558494,
      "learning_rate": 3.104886866950966e-06,
      "loss": 0.1351,
      "step": 4896
    },
    {
      "epoch": 0.45119086009121484,
      "grad_norm": 0.9314641306578018,
      "learning_rate": 3.104147690434398e-06,
      "loss": 0.148,
      "step": 4897
    },
    {
      "epoch": 0.45128299626848484,
      "grad_norm": 0.972321592820565,
      "learning_rate": 3.103408457822653e-06,
      "loss": 0.1458,
      "step": 4898
    },
    {
      "epoch": 0.45137513244575483,
      "grad_norm": 0.9440018331535416,
      "learning_rate": 3.1026691691843667e-06,
      "loss": 0.1448,
      "step": 4899
    },
    {
      "epoch": 0.45146726862302483,
      "grad_norm": 0.8171835747175893,
      "learning_rate": 3.1019298245881836e-06,
      "loss": 0.1261,
      "step": 4900
    },
    {
      "epoch": 0.45155940480029483,
      "grad_norm": 0.9060060164646293,
      "learning_rate": 3.101190424102752e-06,
      "loss": 0.1444,
      "step": 4901
    },
    {
      "epoch": 0.4516515409775648,
      "grad_norm": 0.9156139073129704,
      "learning_rate": 3.100450967796724e-06,
      "loss": 0.1435,
      "step": 4902
    },
    {
      "epoch": 0.4517436771548348,
      "grad_norm": 0.9480421762707635,
      "learning_rate": 3.099711455738759e-06,
      "loss": 0.1498,
      "step": 4903
    },
    {
      "epoch": 0.4518358133321049,
      "grad_norm": 0.8572388070239273,
      "learning_rate": 3.0989718879975216e-06,
      "loss": 0.1261,
      "step": 4904
    },
    {
      "epoch": 0.45192794950937487,
      "grad_norm": 0.9440417055921769,
      "learning_rate": 3.098232264641679e-06,
      "loss": 0.1539,
      "step": 4905
    },
    {
      "epoch": 0.45202008568664487,
      "grad_norm": 0.9367708656728884,
      "learning_rate": 3.0974925857399067e-06,
      "loss": 0.1469,
      "step": 4906
    },
    {
      "epoch": 0.45211222186391486,
      "grad_norm": 0.9228480233084598,
      "learning_rate": 3.0967528513608834e-06,
      "loss": 0.1505,
      "step": 4907
    },
    {
      "epoch": 0.45220435804118486,
      "grad_norm": 0.9142943272446579,
      "learning_rate": 3.0960130615732934e-06,
      "loss": 0.1359,
      "step": 4908
    },
    {
      "epoch": 0.45229649421845486,
      "grad_norm": 0.9470937423096474,
      "learning_rate": 3.095273216445827e-06,
      "loss": 0.1545,
      "step": 4909
    },
    {
      "epoch": 0.45238863039572486,
      "grad_norm": 0.8542008317546738,
      "learning_rate": 3.0945333160471784e-06,
      "loss": 0.1419,
      "step": 4910
    },
    {
      "epoch": 0.4524807665729949,
      "grad_norm": 0.913915263073126,
      "learning_rate": 3.0937933604460475e-06,
      "loss": 0.1411,
      "step": 4911
    },
    {
      "epoch": 0.4525729027502649,
      "grad_norm": 0.8900641602854975,
      "learning_rate": 3.0930533497111385e-06,
      "loss": 0.1452,
      "step": 4912
    },
    {
      "epoch": 0.4526650389275349,
      "grad_norm": 0.8682569105387904,
      "learning_rate": 3.0923132839111623e-06,
      "loss": 0.1308,
      "step": 4913
    },
    {
      "epoch": 0.4527571751048049,
      "grad_norm": 0.9630318001076058,
      "learning_rate": 3.0915731631148347e-06,
      "loss": 0.1394,
      "step": 4914
    },
    {
      "epoch": 0.4528493112820749,
      "grad_norm": 0.9096845912213878,
      "learning_rate": 3.0908329873908744e-06,
      "loss": 0.1442,
      "step": 4915
    },
    {
      "epoch": 0.4529414474593449,
      "grad_norm": 0.898608735328125,
      "learning_rate": 3.0900927568080074e-06,
      "loss": 0.1393,
      "step": 4916
    },
    {
      "epoch": 0.4530335836366149,
      "grad_norm": 0.917045289108983,
      "learning_rate": 3.0893524714349655e-06,
      "loss": 0.1449,
      "step": 4917
    },
    {
      "epoch": 0.45312571981388494,
      "grad_norm": 0.9167944285700524,
      "learning_rate": 3.0886121313404827e-06,
      "loss": 0.1475,
      "step": 4918
    },
    {
      "epoch": 0.45321785599115494,
      "grad_norm": 0.9361875808177101,
      "learning_rate": 3.0878717365933005e-06,
      "loss": 0.1521,
      "step": 4919
    },
    {
      "epoch": 0.45330999216842494,
      "grad_norm": 0.872165934115353,
      "learning_rate": 3.087131287262163e-06,
      "loss": 0.1435,
      "step": 4920
    },
    {
      "epoch": 0.45340212834569493,
      "grad_norm": 0.9511119704159243,
      "learning_rate": 3.0863907834158236e-06,
      "loss": 0.1489,
      "step": 4921
    },
    {
      "epoch": 0.45349426452296493,
      "grad_norm": 0.878621950916722,
      "learning_rate": 3.0856502251230363e-06,
      "loss": 0.1406,
      "step": 4922
    },
    {
      "epoch": 0.4535864007002349,
      "grad_norm": 0.8888612621302028,
      "learning_rate": 3.084909612452563e-06,
      "loss": 0.14,
      "step": 4923
    },
    {
      "epoch": 0.453678536877505,
      "grad_norm": 0.9063848035951376,
      "learning_rate": 3.0841689454731686e-06,
      "loss": 0.1543,
      "step": 4924
    },
    {
      "epoch": 0.453770673054775,
      "grad_norm": 0.9055802766662286,
      "learning_rate": 3.0834282242536253e-06,
      "loss": 0.1433,
      "step": 4925
    },
    {
      "epoch": 0.453862809232045,
      "grad_norm": 0.8804890898499717,
      "learning_rate": 3.082687448862708e-06,
      "loss": 0.1415,
      "step": 4926
    },
    {
      "epoch": 0.45395494540931497,
      "grad_norm": 0.8951461422226128,
      "learning_rate": 3.0819466193691995e-06,
      "loss": 0.1366,
      "step": 4927
    },
    {
      "epoch": 0.45404708158658497,
      "grad_norm": 0.9641931901566596,
      "learning_rate": 3.0812057358418834e-06,
      "loss": 0.1516,
      "step": 4928
    },
    {
      "epoch": 0.45413921776385496,
      "grad_norm": 0.8913322240282533,
      "learning_rate": 3.0804647983495527e-06,
      "loss": 0.144,
      "step": 4929
    },
    {
      "epoch": 0.45423135394112496,
      "grad_norm": 0.8853943694518898,
      "learning_rate": 3.079723806961003e-06,
      "loss": 0.1415,
      "step": 4930
    },
    {
      "epoch": 0.454323490118395,
      "grad_norm": 0.892217812255629,
      "learning_rate": 3.078982761745036e-06,
      "loss": 0.1356,
      "step": 4931
    },
    {
      "epoch": 0.454415626295665,
      "grad_norm": 0.9055251034056351,
      "learning_rate": 3.078241662770456e-06,
      "loss": 0.1459,
      "step": 4932
    },
    {
      "epoch": 0.454507762472935,
      "grad_norm": 0.87958798551727,
      "learning_rate": 3.0775005101060766e-06,
      "loss": 0.1334,
      "step": 4933
    },
    {
      "epoch": 0.454599898650205,
      "grad_norm": 0.9294900159804047,
      "learning_rate": 3.076759303820712e-06,
      "loss": 0.1483,
      "step": 4934
    },
    {
      "epoch": 0.454692034827475,
      "grad_norm": 0.9137965516586513,
      "learning_rate": 3.0760180439831844e-06,
      "loss": 0.1461,
      "step": 4935
    },
    {
      "epoch": 0.454784171004745,
      "grad_norm": 0.9170055144005241,
      "learning_rate": 3.0752767306623193e-06,
      "loss": 0.1549,
      "step": 4936
    },
    {
      "epoch": 0.454876307182015,
      "grad_norm": 0.9215013771632808,
      "learning_rate": 3.0745353639269476e-06,
      "loss": 0.1468,
      "step": 4937
    },
    {
      "epoch": 0.45496844335928505,
      "grad_norm": 0.87370944363821,
      "learning_rate": 3.073793943845906e-06,
      "loss": 0.1312,
      "step": 4938
    },
    {
      "epoch": 0.45506057953655504,
      "grad_norm": 0.8529167887808932,
      "learning_rate": 3.073052470488035e-06,
      "loss": 0.1368,
      "step": 4939
    },
    {
      "epoch": 0.45515271571382504,
      "grad_norm": 0.8213407441730175,
      "learning_rate": 3.0723109439221794e-06,
      "loss": 0.1219,
      "step": 4940
    },
    {
      "epoch": 0.45524485189109504,
      "grad_norm": 0.9499098561290193,
      "learning_rate": 3.071569364217192e-06,
      "loss": 0.1488,
      "step": 4941
    },
    {
      "epoch": 0.45533698806836503,
      "grad_norm": 0.9412672079828817,
      "learning_rate": 3.070827731441927e-06,
      "loss": 0.145,
      "step": 4942
    },
    {
      "epoch": 0.45542912424563503,
      "grad_norm": 0.9449970015438249,
      "learning_rate": 3.0700860456652467e-06,
      "loss": 0.1527,
      "step": 4943
    },
    {
      "epoch": 0.455521260422905,
      "grad_norm": 1.023026610833486,
      "learning_rate": 3.0693443069560147e-06,
      "loss": 0.1645,
      "step": 4944
    },
    {
      "epoch": 0.4556133966001751,
      "grad_norm": 0.9322705358338801,
      "learning_rate": 3.0686025153831033e-06,
      "loss": 0.14,
      "step": 4945
    },
    {
      "epoch": 0.4557055327774451,
      "grad_norm": 0.9495432808167561,
      "learning_rate": 3.067860671015387e-06,
      "loss": 0.1393,
      "step": 4946
    },
    {
      "epoch": 0.4557976689547151,
      "grad_norm": 0.940822123502306,
      "learning_rate": 3.0671187739217455e-06,
      "loss": 0.1453,
      "step": 4947
    },
    {
      "epoch": 0.45588980513198507,
      "grad_norm": 0.9230469467919934,
      "learning_rate": 3.0663768241710653e-06,
      "loss": 0.1404,
      "step": 4948
    },
    {
      "epoch": 0.45598194130925507,
      "grad_norm": 0.8879484675632415,
      "learning_rate": 3.065634821832237e-06,
      "loss": 0.1453,
      "step": 4949
    },
    {
      "epoch": 0.45607407748652506,
      "grad_norm": 0.8834701175995728,
      "learning_rate": 3.064892766974153e-06,
      "loss": 0.1337,
      "step": 4950
    },
    {
      "epoch": 0.45616621366379506,
      "grad_norm": 0.9122572961159483,
      "learning_rate": 3.0641506596657155e-06,
      "loss": 0.1525,
      "step": 4951
    },
    {
      "epoch": 0.4562583498410651,
      "grad_norm": 0.9907773312470673,
      "learning_rate": 3.0634084999758283e-06,
      "loss": 0.1481,
      "step": 4952
    },
    {
      "epoch": 0.4563504860183351,
      "grad_norm": 1.0032234693838713,
      "learning_rate": 3.0626662879734015e-06,
      "loss": 0.1542,
      "step": 4953
    },
    {
      "epoch": 0.4564426221956051,
      "grad_norm": 0.9644592767333385,
      "learning_rate": 3.0619240237273496e-06,
      "loss": 0.1467,
      "step": 4954
    },
    {
      "epoch": 0.4565347583728751,
      "grad_norm": 0.9045737461881386,
      "learning_rate": 3.0611817073065906e-06,
      "loss": 0.1411,
      "step": 4955
    },
    {
      "epoch": 0.4566268945501451,
      "grad_norm": 0.8749374497112531,
      "learning_rate": 3.0604393387800506e-06,
      "loss": 0.133,
      "step": 4956
    },
    {
      "epoch": 0.4567190307274151,
      "grad_norm": 0.9062323098590219,
      "learning_rate": 3.059696918216658e-06,
      "loss": 0.1517,
      "step": 4957
    },
    {
      "epoch": 0.45681116690468515,
      "grad_norm": 0.8681354325932334,
      "learning_rate": 3.058954445685346e-06,
      "loss": 0.139,
      "step": 4958
    },
    {
      "epoch": 0.45690330308195515,
      "grad_norm": 0.9817471157907012,
      "learning_rate": 3.058211921255053e-06,
      "loss": 0.1503,
      "step": 4959
    },
    {
      "epoch": 0.45699543925922514,
      "grad_norm": 0.8802616285300711,
      "learning_rate": 3.0574693449947234e-06,
      "loss": 0.1369,
      "step": 4960
    },
    {
      "epoch": 0.45708757543649514,
      "grad_norm": 0.910891593640118,
      "learning_rate": 3.056726716973305e-06,
      "loss": 0.1402,
      "step": 4961
    },
    {
      "epoch": 0.45717971161376514,
      "grad_norm": 1.030761021092152,
      "learning_rate": 3.0559840372597516e-06,
      "loss": 0.158,
      "step": 4962
    },
    {
      "epoch": 0.45727184779103514,
      "grad_norm": 0.8832204519175471,
      "learning_rate": 3.0552413059230196e-06,
      "loss": 0.1497,
      "step": 4963
    },
    {
      "epoch": 0.45736398396830513,
      "grad_norm": 0.932715520563677,
      "learning_rate": 3.054498523032073e-06,
      "loss": 0.1606,
      "step": 4964
    },
    {
      "epoch": 0.4574561201455752,
      "grad_norm": 0.9295316822032308,
      "learning_rate": 3.053755688655879e-06,
      "loss": 0.1419,
      "step": 4965
    },
    {
      "epoch": 0.4575482563228452,
      "grad_norm": 0.9730620058415319,
      "learning_rate": 3.05301280286341e-06,
      "loss": 0.1379,
      "step": 4966
    },
    {
      "epoch": 0.4576403925001152,
      "grad_norm": 0.8698683882590288,
      "learning_rate": 3.0522698657236417e-06,
      "loss": 0.1364,
      "step": 4967
    },
    {
      "epoch": 0.4577325286773852,
      "grad_norm": 0.9725906398499707,
      "learning_rate": 3.0515268773055577e-06,
      "loss": 0.1574,
      "step": 4968
    },
    {
      "epoch": 0.4578246648546552,
      "grad_norm": 0.973396363810178,
      "learning_rate": 3.0507838376781433e-06,
      "loss": 0.1599,
      "step": 4969
    },
    {
      "epoch": 0.45791680103192517,
      "grad_norm": 1.007119621555577,
      "learning_rate": 3.050040746910391e-06,
      "loss": 0.1615,
      "step": 4970
    },
    {
      "epoch": 0.45800893720919517,
      "grad_norm": 0.9464800328407788,
      "learning_rate": 3.049297605071296e-06,
      "loss": 0.1472,
      "step": 4971
    },
    {
      "epoch": 0.4581010733864652,
      "grad_norm": 0.970285334538535,
      "learning_rate": 3.0485544122298586e-06,
      "loss": 0.1539,
      "step": 4972
    },
    {
      "epoch": 0.4581932095637352,
      "grad_norm": 0.8753266986448403,
      "learning_rate": 3.0478111684550855e-06,
      "loss": 0.1349,
      "step": 4973
    },
    {
      "epoch": 0.4582853457410052,
      "grad_norm": 0.8632450928210212,
      "learning_rate": 3.0470678738159865e-06,
      "loss": 0.1393,
      "step": 4974
    },
    {
      "epoch": 0.4583774819182752,
      "grad_norm": 0.9481887029950181,
      "learning_rate": 3.046324528381576e-06,
      "loss": 0.1544,
      "step": 4975
    },
    {
      "epoch": 0.4584696180955452,
      "grad_norm": 0.9528130537970406,
      "learning_rate": 3.045581132220875e-06,
      "loss": 0.144,
      "step": 4976
    },
    {
      "epoch": 0.4585617542728152,
      "grad_norm": 0.958356098741581,
      "learning_rate": 3.0448376854029067e-06,
      "loss": 0.158,
      "step": 4977
    },
    {
      "epoch": 0.4586538904500852,
      "grad_norm": 0.886430427120259,
      "learning_rate": 3.0440941879967007e-06,
      "loss": 0.1349,
      "step": 4978
    },
    {
      "epoch": 0.45874602662735525,
      "grad_norm": 0.9492295330617945,
      "learning_rate": 3.043350640071291e-06,
      "loss": 0.1498,
      "step": 4979
    },
    {
      "epoch": 0.45883816280462525,
      "grad_norm": 0.8711956527213836,
      "learning_rate": 3.0426070416957155e-06,
      "loss": 0.1367,
      "step": 4980
    },
    {
      "epoch": 0.45893029898189525,
      "grad_norm": 0.8805743996456511,
      "learning_rate": 3.0418633929390184e-06,
      "loss": 0.133,
      "step": 4981
    },
    {
      "epoch": 0.45902243515916524,
      "grad_norm": 0.939114838941981,
      "learning_rate": 3.0411196938702465e-06,
      "loss": 0.1491,
      "step": 4982
    },
    {
      "epoch": 0.45911457133643524,
      "grad_norm": 0.8942524010998183,
      "learning_rate": 3.040375944558453e-06,
      "loss": 0.1487,
      "step": 4983
    },
    {
      "epoch": 0.45920670751370524,
      "grad_norm": 0.9381437330304268,
      "learning_rate": 3.0396321450726946e-06,
      "loss": 0.1454,
      "step": 4984
    },
    {
      "epoch": 0.4592988436909753,
      "grad_norm": 0.8871246016080946,
      "learning_rate": 3.0388882954820336e-06,
      "loss": 0.1364,
      "step": 4985
    },
    {
      "epoch": 0.4593909798682453,
      "grad_norm": 0.8884750944032963,
      "learning_rate": 3.0381443958555367e-06,
      "loss": 0.139,
      "step": 4986
    },
    {
      "epoch": 0.4594831160455153,
      "grad_norm": 0.9264575790757549,
      "learning_rate": 3.037400446262274e-06,
      "loss": 0.1471,
      "step": 4987
    },
    {
      "epoch": 0.4595752522227853,
      "grad_norm": 0.9376702144868666,
      "learning_rate": 3.036656446771322e-06,
      "loss": 0.1486,
      "step": 4988
    },
    {
      "epoch": 0.4596673884000553,
      "grad_norm": 0.9416874908664606,
      "learning_rate": 3.035912397451763e-06,
      "loss": 0.1412,
      "step": 4989
    },
    {
      "epoch": 0.4597595245773253,
      "grad_norm": 0.918365816769307,
      "learning_rate": 3.035168298372678e-06,
      "loss": 0.1395,
      "step": 4990
    },
    {
      "epoch": 0.45985166075459527,
      "grad_norm": 0.9667651339344426,
      "learning_rate": 3.0344241496031602e-06,
      "loss": 0.1467,
      "step": 4991
    },
    {
      "epoch": 0.4599437969318653,
      "grad_norm": 0.9702613519979137,
      "learning_rate": 3.0336799512123017e-06,
      "loss": 0.15,
      "step": 4992
    },
    {
      "epoch": 0.4600359331091353,
      "grad_norm": 0.9617046493464246,
      "learning_rate": 3.032935703269203e-06,
      "loss": 0.145,
      "step": 4993
    },
    {
      "epoch": 0.4601280692864053,
      "grad_norm": 1.021218623960953,
      "learning_rate": 3.0321914058429668e-06,
      "loss": 0.1517,
      "step": 4994
    },
    {
      "epoch": 0.4602202054636753,
      "grad_norm": 0.9376645972483496,
      "learning_rate": 3.0314470590027012e-06,
      "loss": 0.1586,
      "step": 4995
    },
    {
      "epoch": 0.4603123416409453,
      "grad_norm": 0.9052458305949739,
      "learning_rate": 3.0307026628175183e-06,
      "loss": 0.1454,
      "step": 4996
    },
    {
      "epoch": 0.4604044778182153,
      "grad_norm": 0.8892358271557455,
      "learning_rate": 3.029958217356537e-06,
      "loss": 0.1423,
      "step": 4997
    },
    {
      "epoch": 0.4604966139954853,
      "grad_norm": 0.8721683270176317,
      "learning_rate": 3.029213722688878e-06,
      "loss": 0.1422,
      "step": 4998
    },
    {
      "epoch": 0.46058875017275536,
      "grad_norm": 0.9118092519515993,
      "learning_rate": 3.0284691788836672e-06,
      "loss": 0.1535,
      "step": 4999
    },
    {
      "epoch": 0.46068088635002535,
      "grad_norm": 0.9566666504443163,
      "learning_rate": 3.027724586010037e-06,
      "loss": 0.1536,
      "step": 5000
    },
    {
      "epoch": 0.46068088635002535,
      "eval_loss": 0.14408743381500244,
      "eval_runtime": 299.7274,
      "eval_samples_per_second": 23.411,
      "eval_steps_per_second": 2.929,
      "step": 5000
    },
    {
      "epoch": 0.46077302252729535,
      "grad_norm": 0.944085745803305,
      "learning_rate": 3.0269799441371224e-06,
      "loss": 0.1453,
      "step": 5001
    },
    {
      "epoch": 0.46086515870456535,
      "grad_norm": 0.8901400825031632,
      "learning_rate": 3.026235253334063e-06,
      "loss": 0.1445,
      "step": 5002
    },
    {
      "epoch": 0.46095729488183534,
      "grad_norm": 0.9088254286241351,
      "learning_rate": 3.0254905136700038e-06,
      "loss": 0.1415,
      "step": 5003
    },
    {
      "epoch": 0.46104943105910534,
      "grad_norm": 0.9018385475977528,
      "learning_rate": 3.024745725214093e-06,
      "loss": 0.1404,
      "step": 5004
    },
    {
      "epoch": 0.46114156723637534,
      "grad_norm": 0.8610078807583995,
      "learning_rate": 3.024000888035486e-06,
      "loss": 0.1327,
      "step": 5005
    },
    {
      "epoch": 0.4612337034136454,
      "grad_norm": 0.9052272195057318,
      "learning_rate": 3.0232560022033398e-06,
      "loss": 0.1389,
      "step": 5006
    },
    {
      "epoch": 0.4613258395909154,
      "grad_norm": 0.9070172834425693,
      "learning_rate": 3.022511067786817e-06,
      "loss": 0.1444,
      "step": 5007
    },
    {
      "epoch": 0.4614179757681854,
      "grad_norm": 1.0011454075311015,
      "learning_rate": 3.0217660848550863e-06,
      "loss": 0.1616,
      "step": 5008
    },
    {
      "epoch": 0.4615101119454554,
      "grad_norm": 0.8636917231521298,
      "learning_rate": 3.0210210534773175e-06,
      "loss": 0.1461,
      "step": 5009
    },
    {
      "epoch": 0.4616022481227254,
      "grad_norm": 0.887277083871904,
      "learning_rate": 3.020275973722688e-06,
      "loss": 0.1413,
      "step": 5010
    },
    {
      "epoch": 0.4616943842999954,
      "grad_norm": 0.904649471086909,
      "learning_rate": 3.0195308456603795e-06,
      "loss": 0.1444,
      "step": 5011
    },
    {
      "epoch": 0.46178652047726537,
      "grad_norm": 0.922525202971339,
      "learning_rate": 3.018785669359575e-06,
      "loss": 0.1349,
      "step": 5012
    },
    {
      "epoch": 0.4618786566545354,
      "grad_norm": 0.8871262781575864,
      "learning_rate": 3.018040444889466e-06,
      "loss": 0.1357,
      "step": 5013
    },
    {
      "epoch": 0.4619707928318054,
      "grad_norm": 0.8900463787551096,
      "learning_rate": 3.0172951723192456e-06,
      "loss": 0.1374,
      "step": 5014
    },
    {
      "epoch": 0.4620629290090754,
      "grad_norm": 0.9042316250161178,
      "learning_rate": 3.016549851718112e-06,
      "loss": 0.1456,
      "step": 5015
    },
    {
      "epoch": 0.4621550651863454,
      "grad_norm": 0.9786460909009905,
      "learning_rate": 3.0158044831552703e-06,
      "loss": 0.1508,
      "step": 5016
    },
    {
      "epoch": 0.4622472013636154,
      "grad_norm": 0.8686153084102173,
      "learning_rate": 3.015059066699926e-06,
      "loss": 0.1343,
      "step": 5017
    },
    {
      "epoch": 0.4623393375408854,
      "grad_norm": 0.8651676610958235,
      "learning_rate": 3.0143136024212923e-06,
      "loss": 0.1362,
      "step": 5018
    },
    {
      "epoch": 0.46243147371815546,
      "grad_norm": 0.9019007066819323,
      "learning_rate": 3.013568090388585e-06,
      "loss": 0.145,
      "step": 5019
    },
    {
      "epoch": 0.46252360989542546,
      "grad_norm": 0.9500449930037843,
      "learning_rate": 3.012822530671026e-06,
      "loss": 0.1437,
      "step": 5020
    },
    {
      "epoch": 0.46261574607269546,
      "grad_norm": 0.9361307249362852,
      "learning_rate": 3.012076923337839e-06,
      "loss": 0.1569,
      "step": 5021
    },
    {
      "epoch": 0.46270788224996545,
      "grad_norm": 0.8733335938562423,
      "learning_rate": 3.011331268458255e-06,
      "loss": 0.1424,
      "step": 5022
    },
    {
      "epoch": 0.46280001842723545,
      "grad_norm": 0.8812589568625355,
      "learning_rate": 3.010585566101507e-06,
      "loss": 0.1437,
      "step": 5023
    },
    {
      "epoch": 0.46289215460450545,
      "grad_norm": 0.8940904754428848,
      "learning_rate": 3.0098398163368353e-06,
      "loss": 0.1385,
      "step": 5024
    },
    {
      "epoch": 0.46298429078177544,
      "grad_norm": 0.8489133395575272,
      "learning_rate": 3.0090940192334805e-06,
      "loss": 0.1351,
      "step": 5025
    },
    {
      "epoch": 0.4630764269590455,
      "grad_norm": 0.905730134591134,
      "learning_rate": 3.0083481748606923e-06,
      "loss": 0.1499,
      "step": 5026
    },
    {
      "epoch": 0.4631685631363155,
      "grad_norm": 0.8927244938308796,
      "learning_rate": 3.007602283287721e-06,
      "loss": 0.1435,
      "step": 5027
    },
    {
      "epoch": 0.4632606993135855,
      "grad_norm": 0.8957615740961147,
      "learning_rate": 3.0068563445838234e-06,
      "loss": 0.1496,
      "step": 5028
    },
    {
      "epoch": 0.4633528354908555,
      "grad_norm": 0.9292440088049803,
      "learning_rate": 3.0061103588182592e-06,
      "loss": 0.1532,
      "step": 5029
    },
    {
      "epoch": 0.4634449716681255,
      "grad_norm": 0.8525916459383819,
      "learning_rate": 3.005364326060294e-06,
      "loss": 0.1267,
      "step": 5030
    },
    {
      "epoch": 0.4635371078453955,
      "grad_norm": 0.9596307408249468,
      "learning_rate": 3.0046182463791962e-06,
      "loss": 0.1395,
      "step": 5031
    },
    {
      "epoch": 0.4636292440226655,
      "grad_norm": 0.9635650608907829,
      "learning_rate": 3.0038721198442406e-06,
      "loss": 0.1495,
      "step": 5032
    },
    {
      "epoch": 0.46372138019993553,
      "grad_norm": 0.8783556628180413,
      "learning_rate": 3.003125946524704e-06,
      "loss": 0.1374,
      "step": 5033
    },
    {
      "epoch": 0.4638135163772055,
      "grad_norm": 0.942047255532235,
      "learning_rate": 3.002379726489869e-06,
      "loss": 0.1443,
      "step": 5034
    },
    {
      "epoch": 0.4639056525544755,
      "grad_norm": 1.029948109716625,
      "learning_rate": 3.001633459809023e-06,
      "loss": 0.1544,
      "step": 5035
    },
    {
      "epoch": 0.4639977887317455,
      "grad_norm": 0.9450058200355388,
      "learning_rate": 3.000887146551455e-06,
      "loss": 0.1447,
      "step": 5036
    },
    {
      "epoch": 0.4640899249090155,
      "grad_norm": 0.8932809463577716,
      "learning_rate": 3.000140786786463e-06,
      "loss": 0.1351,
      "step": 5037
    },
    {
      "epoch": 0.4641820610862855,
      "grad_norm": 0.9064007681388029,
      "learning_rate": 2.9993943805833444e-06,
      "loss": 0.1449,
      "step": 5038
    },
    {
      "epoch": 0.4642741972635555,
      "grad_norm": 0.8988360819697974,
      "learning_rate": 2.998647928011404e-06,
      "loss": 0.1423,
      "step": 5039
    },
    {
      "epoch": 0.46436633344082556,
      "grad_norm": 0.8884133863776579,
      "learning_rate": 2.9979014291399495e-06,
      "loss": 0.1381,
      "step": 5040
    },
    {
      "epoch": 0.46445846961809556,
      "grad_norm": 0.9578344478960423,
      "learning_rate": 2.997154884038294e-06,
      "loss": 0.1457,
      "step": 5041
    },
    {
      "epoch": 0.46455060579536556,
      "grad_norm": 0.8889333116943653,
      "learning_rate": 2.9964082927757537e-06,
      "loss": 0.1434,
      "step": 5042
    },
    {
      "epoch": 0.46464274197263555,
      "grad_norm": 0.8880790358302674,
      "learning_rate": 2.995661655421651e-06,
      "loss": 0.1422,
      "step": 5043
    },
    {
      "epoch": 0.46473487814990555,
      "grad_norm": 0.9310009728143278,
      "learning_rate": 2.994914972045309e-06,
      "loss": 0.1519,
      "step": 5044
    },
    {
      "epoch": 0.46482701432717555,
      "grad_norm": 0.9135511073129388,
      "learning_rate": 2.994168242716059e-06,
      "loss": 0.1439,
      "step": 5045
    },
    {
      "epoch": 0.46491915050444554,
      "grad_norm": 0.850859033376075,
      "learning_rate": 2.9934214675032346e-06,
      "loss": 0.1444,
      "step": 5046
    },
    {
      "epoch": 0.4650112866817156,
      "grad_norm": 0.9225464686632786,
      "learning_rate": 2.9926746464761743e-06,
      "loss": 0.1519,
      "step": 5047
    },
    {
      "epoch": 0.4651034228589856,
      "grad_norm": 0.9176808967329263,
      "learning_rate": 2.9919277797042196e-06,
      "loss": 0.1358,
      "step": 5048
    },
    {
      "epoch": 0.4651955590362556,
      "grad_norm": 0.8571010707868579,
      "learning_rate": 2.991180867256718e-06,
      "loss": 0.1421,
      "step": 5049
    },
    {
      "epoch": 0.4652876952135256,
      "grad_norm": 0.8862854880044667,
      "learning_rate": 2.990433909203019e-06,
      "loss": 0.1419,
      "step": 5050
    },
    {
      "epoch": 0.4653798313907956,
      "grad_norm": 0.8790782944491246,
      "learning_rate": 2.9896869056124795e-06,
      "loss": 0.1408,
      "step": 5051
    },
    {
      "epoch": 0.4654719675680656,
      "grad_norm": 0.9012749764460423,
      "learning_rate": 2.9889398565544576e-06,
      "loss": 0.1476,
      "step": 5052
    },
    {
      "epoch": 0.46556410374533563,
      "grad_norm": 0.8814257961671169,
      "learning_rate": 2.9881927620983175e-06,
      "loss": 0.1455,
      "step": 5053
    },
    {
      "epoch": 0.46565623992260563,
      "grad_norm": 0.8888919896330232,
      "learning_rate": 2.9874456223134273e-06,
      "loss": 0.1395,
      "step": 5054
    },
    {
      "epoch": 0.4657483760998756,
      "grad_norm": 0.8966843988002527,
      "learning_rate": 2.9866984372691586e-06,
      "loss": 0.1461,
      "step": 5055
    },
    {
      "epoch": 0.4658405122771456,
      "grad_norm": 0.9811961006056726,
      "learning_rate": 2.985951207034888e-06,
      "loss": 0.1566,
      "step": 5056
    },
    {
      "epoch": 0.4659326484544156,
      "grad_norm": 0.9175985358748722,
      "learning_rate": 2.985203931679995e-06,
      "loss": 0.1524,
      "step": 5057
    },
    {
      "epoch": 0.4660247846316856,
      "grad_norm": 0.9465765486071972,
      "learning_rate": 2.984456611273864e-06,
      "loss": 0.1522,
      "step": 5058
    },
    {
      "epoch": 0.4661169208089556,
      "grad_norm": 0.9159762833014369,
      "learning_rate": 2.9837092458858862e-06,
      "loss": 0.1425,
      "step": 5059
    },
    {
      "epoch": 0.46620905698622567,
      "grad_norm": 0.9303292659055917,
      "learning_rate": 2.982961835585451e-06,
      "loss": 0.1508,
      "step": 5060
    },
    {
      "epoch": 0.46630119316349566,
      "grad_norm": 0.9815182981934475,
      "learning_rate": 2.9822143804419586e-06,
      "loss": 0.1443,
      "step": 5061
    },
    {
      "epoch": 0.46639332934076566,
      "grad_norm": 0.9247239725037197,
      "learning_rate": 2.981466880524809e-06,
      "loss": 0.1619,
      "step": 5062
    },
    {
      "epoch": 0.46648546551803566,
      "grad_norm": 0.9203976339424482,
      "learning_rate": 2.9807193359034077e-06,
      "loss": 0.1437,
      "step": 5063
    },
    {
      "epoch": 0.46657760169530565,
      "grad_norm": 0.9511622917745709,
      "learning_rate": 2.979971746647164e-06,
      "loss": 0.1503,
      "step": 5064
    },
    {
      "epoch": 0.46666973787257565,
      "grad_norm": 0.9639638100811875,
      "learning_rate": 2.9792241128254916e-06,
      "loss": 0.1526,
      "step": 5065
    },
    {
      "epoch": 0.46676187404984565,
      "grad_norm": 0.8636214111011123,
      "learning_rate": 2.978476434507809e-06,
      "loss": 0.1406,
      "step": 5066
    },
    {
      "epoch": 0.4668540102271157,
      "grad_norm": 0.9786702258760724,
      "learning_rate": 2.9777287117635387e-06,
      "loss": 0.1501,
      "step": 5067
    },
    {
      "epoch": 0.4669461464043857,
      "grad_norm": 0.8728745748666031,
      "learning_rate": 2.9769809446621057e-06,
      "loss": 0.1398,
      "step": 5068
    },
    {
      "epoch": 0.4670382825816557,
      "grad_norm": 0.9069597536608185,
      "learning_rate": 2.9762331332729405e-06,
      "loss": 0.1374,
      "step": 5069
    },
    {
      "epoch": 0.4671304187589257,
      "grad_norm": 0.9322944650625257,
      "learning_rate": 2.975485277665478e-06,
      "loss": 0.1469,
      "step": 5070
    },
    {
      "epoch": 0.4672225549361957,
      "grad_norm": 0.9255137327705216,
      "learning_rate": 2.9747373779091552e-06,
      "loss": 0.1442,
      "step": 5071
    },
    {
      "epoch": 0.4673146911134657,
      "grad_norm": 0.9013228649521703,
      "learning_rate": 2.9739894340734177e-06,
      "loss": 0.1485,
      "step": 5072
    },
    {
      "epoch": 0.4674068272907357,
      "grad_norm": 0.9255808656859331,
      "learning_rate": 2.9732414462277083e-06,
      "loss": 0.1361,
      "step": 5073
    },
    {
      "epoch": 0.46749896346800573,
      "grad_norm": 0.8959121976406828,
      "learning_rate": 2.9724934144414807e-06,
      "loss": 0.1392,
      "step": 5074
    },
    {
      "epoch": 0.46759109964527573,
      "grad_norm": 0.9969189706936202,
      "learning_rate": 2.9717453387841884e-06,
      "loss": 0.1477,
      "step": 5075
    },
    {
      "epoch": 0.46768323582254573,
      "grad_norm": 0.8902239293901296,
      "learning_rate": 2.9709972193252905e-06,
      "loss": 0.1448,
      "step": 5076
    },
    {
      "epoch": 0.4677753719998157,
      "grad_norm": 0.8644170281111183,
      "learning_rate": 2.9702490561342505e-06,
      "loss": 0.1384,
      "step": 5077
    },
    {
      "epoch": 0.4678675081770857,
      "grad_norm": 0.8383712450753181,
      "learning_rate": 2.969500849280535e-06,
      "loss": 0.1306,
      "step": 5078
    },
    {
      "epoch": 0.4679596443543557,
      "grad_norm": 0.8679667354263388,
      "learning_rate": 2.9687525988336147e-06,
      "loss": 0.144,
      "step": 5079
    },
    {
      "epoch": 0.4680517805316257,
      "grad_norm": 0.9255121092467719,
      "learning_rate": 2.968004304862966e-06,
      "loss": 0.1363,
      "step": 5080
    },
    {
      "epoch": 0.46814391670889577,
      "grad_norm": 0.9135538111005072,
      "learning_rate": 2.9672559674380664e-06,
      "loss": 0.149,
      "step": 5081
    },
    {
      "epoch": 0.46823605288616577,
      "grad_norm": 0.8891994213746041,
      "learning_rate": 2.9665075866284e-06,
      "loss": 0.1455,
      "step": 5082
    },
    {
      "epoch": 0.46832818906343576,
      "grad_norm": 0.906236563353806,
      "learning_rate": 2.9657591625034543e-06,
      "loss": 0.1342,
      "step": 5083
    },
    {
      "epoch": 0.46842032524070576,
      "grad_norm": 0.8876771489888291,
      "learning_rate": 2.9650106951327202e-06,
      "loss": 0.1346,
      "step": 5084
    },
    {
      "epoch": 0.46851246141797576,
      "grad_norm": 0.9419083874001088,
      "learning_rate": 2.964262184585692e-06,
      "loss": 0.1434,
      "step": 5085
    },
    {
      "epoch": 0.46860459759524575,
      "grad_norm": 0.9289938357240787,
      "learning_rate": 2.963513630931872e-06,
      "loss": 0.1576,
      "step": 5086
    },
    {
      "epoch": 0.4686967337725158,
      "grad_norm": 0.9715748809553193,
      "learning_rate": 2.96276503424076e-06,
      "loss": 0.1542,
      "step": 5087
    },
    {
      "epoch": 0.4687888699497858,
      "grad_norm": 0.8970855643969083,
      "learning_rate": 2.9620163945818648e-06,
      "loss": 0.1441,
      "step": 5088
    },
    {
      "epoch": 0.4688810061270558,
      "grad_norm": 0.999806972740666,
      "learning_rate": 2.961267712024698e-06,
      "loss": 0.1608,
      "step": 5089
    },
    {
      "epoch": 0.4689731423043258,
      "grad_norm": 0.9615698363397197,
      "learning_rate": 2.9605189866387746e-06,
      "loss": 0.15,
      "step": 5090
    },
    {
      "epoch": 0.4690652784815958,
      "grad_norm": 0.9082739679420999,
      "learning_rate": 2.9597702184936137e-06,
      "loss": 0.1494,
      "step": 5091
    },
    {
      "epoch": 0.4691574146588658,
      "grad_norm": 0.8927931890323686,
      "learning_rate": 2.9590214076587386e-06,
      "loss": 0.1425,
      "step": 5092
    },
    {
      "epoch": 0.4692495508361358,
      "grad_norm": 0.8850697632312967,
      "learning_rate": 2.958272554203676e-06,
      "loss": 0.1422,
      "step": 5093
    },
    {
      "epoch": 0.46934168701340584,
      "grad_norm": 0.9147171490433019,
      "learning_rate": 2.9575236581979576e-06,
      "loss": 0.1467,
      "step": 5094
    },
    {
      "epoch": 0.46943382319067584,
      "grad_norm": 0.864702255400151,
      "learning_rate": 2.9567747197111186e-06,
      "loss": 0.1387,
      "step": 5095
    },
    {
      "epoch": 0.46952595936794583,
      "grad_norm": 0.8767863278849377,
      "learning_rate": 2.9560257388126973e-06,
      "loss": 0.1473,
      "step": 5096
    },
    {
      "epoch": 0.46961809554521583,
      "grad_norm": 0.9674640608974738,
      "learning_rate": 2.9552767155722375e-06,
      "loss": 0.132,
      "step": 5097
    },
    {
      "epoch": 0.4697102317224858,
      "grad_norm": 0.9065230519661844,
      "learning_rate": 2.954527650059285e-06,
      "loss": 0.1394,
      "step": 5098
    },
    {
      "epoch": 0.4698023678997558,
      "grad_norm": 0.8556130271531583,
      "learning_rate": 2.9537785423433925e-06,
      "loss": 0.1301,
      "step": 5099
    },
    {
      "epoch": 0.4698945040770258,
      "grad_norm": 0.9112839599118244,
      "learning_rate": 2.9530293924941123e-06,
      "loss": 0.1542,
      "step": 5100
    },
    {
      "epoch": 0.4699866402542959,
      "grad_norm": 0.9598181354372883,
      "learning_rate": 2.9522802005810043e-06,
      "loss": 0.1526,
      "step": 5101
    },
    {
      "epoch": 0.47007877643156587,
      "grad_norm": 0.8624365229094141,
      "learning_rate": 2.9515309666736312e-06,
      "loss": 0.1359,
      "step": 5102
    },
    {
      "epoch": 0.47017091260883587,
      "grad_norm": 0.8809972735025539,
      "learning_rate": 2.95078169084156e-06,
      "loss": 0.1363,
      "step": 5103
    },
    {
      "epoch": 0.47026304878610586,
      "grad_norm": 0.8561841482860453,
      "learning_rate": 2.9500323731543596e-06,
      "loss": 0.1252,
      "step": 5104
    },
    {
      "epoch": 0.47035518496337586,
      "grad_norm": 0.8809602128506461,
      "learning_rate": 2.9492830136816053e-06,
      "loss": 0.1407,
      "step": 5105
    },
    {
      "epoch": 0.47044732114064586,
      "grad_norm": 0.9372906064000932,
      "learning_rate": 2.948533612492874e-06,
      "loss": 0.1543,
      "step": 5106
    },
    {
      "epoch": 0.47053945731791585,
      "grad_norm": 0.8940500798202332,
      "learning_rate": 2.947784169657749e-06,
      "loss": 0.1342,
      "step": 5107
    },
    {
      "epoch": 0.4706315934951859,
      "grad_norm": 0.9499099523288487,
      "learning_rate": 2.947034685245816e-06,
      "loss": 0.1528,
      "step": 5108
    },
    {
      "epoch": 0.4707237296724559,
      "grad_norm": 0.9332993914331615,
      "learning_rate": 2.946285159326664e-06,
      "loss": 0.146,
      "step": 5109
    },
    {
      "epoch": 0.4708158658497259,
      "grad_norm": 0.9145503369594333,
      "learning_rate": 2.945535591969887e-06,
      "loss": 0.1366,
      "step": 5110
    },
    {
      "epoch": 0.4709080020269959,
      "grad_norm": 0.87163080765042,
      "learning_rate": 2.944785983245082e-06,
      "loss": 0.1327,
      "step": 5111
    },
    {
      "epoch": 0.4710001382042659,
      "grad_norm": 0.9165609316092446,
      "learning_rate": 2.944036333221851e-06,
      "loss": 0.1538,
      "step": 5112
    },
    {
      "epoch": 0.4710922743815359,
      "grad_norm": 0.8710335528196103,
      "learning_rate": 2.9432866419697993e-06,
      "loss": 0.1289,
      "step": 5113
    },
    {
      "epoch": 0.4711844105588059,
      "grad_norm": 0.8827968344332082,
      "learning_rate": 2.9425369095585337e-06,
      "loss": 0.133,
      "step": 5114
    },
    {
      "epoch": 0.47127654673607594,
      "grad_norm": 0.8897129402540811,
      "learning_rate": 2.94178713605767e-06,
      "loss": 0.1358,
      "step": 5115
    },
    {
      "epoch": 0.47136868291334594,
      "grad_norm": 0.9143135967958729,
      "learning_rate": 2.9410373215368216e-06,
      "loss": 0.1402,
      "step": 5116
    },
    {
      "epoch": 0.47146081909061593,
      "grad_norm": 0.8708685814829983,
      "learning_rate": 2.9402874660656113e-06,
      "loss": 0.141,
      "step": 5117
    },
    {
      "epoch": 0.47155295526788593,
      "grad_norm": 0.9041670120185827,
      "learning_rate": 2.9395375697136623e-06,
      "loss": 0.1388,
      "step": 5118
    },
    {
      "epoch": 0.47164509144515593,
      "grad_norm": 0.9052941120311353,
      "learning_rate": 2.9387876325506025e-06,
      "loss": 0.14,
      "step": 5119
    },
    {
      "epoch": 0.4717372276224259,
      "grad_norm": 0.8817598067857801,
      "learning_rate": 2.9380376546460643e-06,
      "loss": 0.1344,
      "step": 5120
    },
    {
      "epoch": 0.471829363799696,
      "grad_norm": 0.9163393295135425,
      "learning_rate": 2.9372876360696823e-06,
      "loss": 0.1398,
      "step": 5121
    },
    {
      "epoch": 0.471921499976966,
      "grad_norm": 0.8989709981272607,
      "learning_rate": 2.9365375768910957e-06,
      "loss": 0.1398,
      "step": 5122
    },
    {
      "epoch": 0.47201363615423597,
      "grad_norm": 0.8474850729012813,
      "learning_rate": 2.935787477179949e-06,
      "loss": 0.128,
      "step": 5123
    },
    {
      "epoch": 0.47210577233150597,
      "grad_norm": 0.9426786416617355,
      "learning_rate": 2.9350373370058882e-06,
      "loss": 0.1482,
      "step": 5124
    },
    {
      "epoch": 0.47219790850877597,
      "grad_norm": 0.9157568621182558,
      "learning_rate": 2.9342871564385627e-06,
      "loss": 0.1445,
      "step": 5125
    },
    {
      "epoch": 0.47229004468604596,
      "grad_norm": 0.9060909697019053,
      "learning_rate": 2.9335369355476295e-06,
      "loss": 0.14,
      "step": 5126
    },
    {
      "epoch": 0.47238218086331596,
      "grad_norm": 0.9206297306151924,
      "learning_rate": 2.932786674402744e-06,
      "loss": 0.1467,
      "step": 5127
    },
    {
      "epoch": 0.472474317040586,
      "grad_norm": 0.9762708271384426,
      "learning_rate": 2.9320363730735696e-06,
      "loss": 0.1528,
      "step": 5128
    },
    {
      "epoch": 0.472566453217856,
      "grad_norm": 0.9265016675178621,
      "learning_rate": 2.9312860316297716e-06,
      "loss": 0.1349,
      "step": 5129
    },
    {
      "epoch": 0.472658589395126,
      "grad_norm": 0.9251132679088837,
      "learning_rate": 2.930535650141019e-06,
      "loss": 0.147,
      "step": 5130
    },
    {
      "epoch": 0.472750725572396,
      "grad_norm": 0.9300808982858864,
      "learning_rate": 2.9297852286769852e-06,
      "loss": 0.1452,
      "step": 5131
    },
    {
      "epoch": 0.472842861749666,
      "grad_norm": 0.8853462094377297,
      "learning_rate": 2.9290347673073466e-06,
      "loss": 0.1353,
      "step": 5132
    },
    {
      "epoch": 0.472934997926936,
      "grad_norm": 0.8979270084102228,
      "learning_rate": 2.928284266101783e-06,
      "loss": 0.1379,
      "step": 5133
    },
    {
      "epoch": 0.473027134104206,
      "grad_norm": 0.858106147363866,
      "learning_rate": 2.9275337251299808e-06,
      "loss": 0.1373,
      "step": 5134
    },
    {
      "epoch": 0.47311927028147605,
      "grad_norm": 0.9349995659851857,
      "learning_rate": 2.9267831444616244e-06,
      "loss": 0.1358,
      "step": 5135
    },
    {
      "epoch": 0.47321140645874604,
      "grad_norm": 0.9034520136048325,
      "learning_rate": 2.926032524166408e-06,
      "loss": 0.1382,
      "step": 5136
    },
    {
      "epoch": 0.47330354263601604,
      "grad_norm": 0.9136257429700642,
      "learning_rate": 2.9252818643140256e-06,
      "loss": 0.143,
      "step": 5137
    },
    {
      "epoch": 0.47339567881328604,
      "grad_norm": 0.9468303486280631,
      "learning_rate": 2.9245311649741765e-06,
      "loss": 0.1354,
      "step": 5138
    },
    {
      "epoch": 0.47348781499055603,
      "grad_norm": 0.9333786953701937,
      "learning_rate": 2.9237804262165632e-06,
      "loss": 0.1494,
      "step": 5139
    },
    {
      "epoch": 0.47357995116782603,
      "grad_norm": 0.8913158148329773,
      "learning_rate": 2.9230296481108916e-06,
      "loss": 0.1491,
      "step": 5140
    },
    {
      "epoch": 0.473672087345096,
      "grad_norm": 0.9347408552372286,
      "learning_rate": 2.922278830726871e-06,
      "loss": 0.1469,
      "step": 5141
    },
    {
      "epoch": 0.4737642235223661,
      "grad_norm": 0.9046310230891329,
      "learning_rate": 2.9215279741342165e-06,
      "loss": 0.1248,
      "step": 5142
    },
    {
      "epoch": 0.4738563596996361,
      "grad_norm": 0.8910835401059762,
      "learning_rate": 2.9207770784026436e-06,
      "loss": 0.1375,
      "step": 5143
    },
    {
      "epoch": 0.4739484958769061,
      "grad_norm": 0.8892182784728998,
      "learning_rate": 2.920026143601874e-06,
      "loss": 0.1277,
      "step": 5144
    },
    {
      "epoch": 0.47404063205417607,
      "grad_norm": 0.8774140482608112,
      "learning_rate": 2.9192751698016317e-06,
      "loss": 0.1368,
      "step": 5145
    },
    {
      "epoch": 0.47413276823144607,
      "grad_norm": 0.9889873174330852,
      "learning_rate": 2.918524157071645e-06,
      "loss": 0.1509,
      "step": 5146
    },
    {
      "epoch": 0.47422490440871606,
      "grad_norm": 0.9436506649824882,
      "learning_rate": 2.917773105481645e-06,
      "loss": 0.1535,
      "step": 5147
    },
    {
      "epoch": 0.47431704058598606,
      "grad_norm": 0.8873798622275758,
      "learning_rate": 2.917022015101367e-06,
      "loss": 0.1312,
      "step": 5148
    },
    {
      "epoch": 0.4744091767632561,
      "grad_norm": 0.9843878112858804,
      "learning_rate": 2.91627088600055e-06,
      "loss": 0.1525,
      "step": 5149
    },
    {
      "epoch": 0.4745013129405261,
      "grad_norm": 0.9278713582496736,
      "learning_rate": 2.9155197182489375e-06,
      "loss": 0.133,
      "step": 5150
    },
    {
      "epoch": 0.4745934491177961,
      "grad_norm": 0.8901035694884752,
      "learning_rate": 2.9147685119162735e-06,
      "loss": 0.1194,
      "step": 5151
    },
    {
      "epoch": 0.4746855852950661,
      "grad_norm": 0.8786311039803401,
      "learning_rate": 2.9140172670723083e-06,
      "loss": 0.1364,
      "step": 5152
    },
    {
      "epoch": 0.4747777214723361,
      "grad_norm": 0.9353843520819218,
      "learning_rate": 2.913265983786796e-06,
      "loss": 0.1367,
      "step": 5153
    },
    {
      "epoch": 0.4748698576496061,
      "grad_norm": 0.9136948540094968,
      "learning_rate": 2.9125146621294915e-06,
      "loss": 0.1585,
      "step": 5154
    },
    {
      "epoch": 0.47496199382687615,
      "grad_norm": 0.9192694976349985,
      "learning_rate": 2.9117633021701574e-06,
      "loss": 0.1482,
      "step": 5155
    },
    {
      "epoch": 0.47505413000414615,
      "grad_norm": 0.9747913478550096,
      "learning_rate": 2.9110119039785554e-06,
      "loss": 0.1425,
      "step": 5156
    },
    {
      "epoch": 0.47514626618141614,
      "grad_norm": 0.9813043098360802,
      "learning_rate": 2.910260467624455e-06,
      "loss": 0.153,
      "step": 5157
    },
    {
      "epoch": 0.47523840235868614,
      "grad_norm": 0.8829325066846414,
      "learning_rate": 2.9095089931776255e-06,
      "loss": 0.1476,
      "step": 5158
    },
    {
      "epoch": 0.47533053853595614,
      "grad_norm": 0.9194266463326177,
      "learning_rate": 2.908757480707842e-06,
      "loss": 0.1553,
      "step": 5159
    },
    {
      "epoch": 0.47542267471322613,
      "grad_norm": 0.9373661605075402,
      "learning_rate": 2.908005930284882e-06,
      "loss": 0.1523,
      "step": 5160
    },
    {
      "epoch": 0.47551481089049613,
      "grad_norm": 0.9409623196439526,
      "learning_rate": 2.907254341978528e-06,
      "loss": 0.1479,
      "step": 5161
    },
    {
      "epoch": 0.4756069470677662,
      "grad_norm": 0.9489593698972468,
      "learning_rate": 2.906502715858564e-06,
      "loss": 0.1532,
      "step": 5162
    },
    {
      "epoch": 0.4756990832450362,
      "grad_norm": 0.8697876898890173,
      "learning_rate": 2.9057510519947794e-06,
      "loss": 0.1359,
      "step": 5163
    },
    {
      "epoch": 0.4757912194223062,
      "grad_norm": 0.9932677774158268,
      "learning_rate": 2.9049993504569663e-06,
      "loss": 0.1465,
      "step": 5164
    },
    {
      "epoch": 0.4758833555995762,
      "grad_norm": 0.8608562365126148,
      "learning_rate": 2.9042476113149193e-06,
      "loss": 0.136,
      "step": 5165
    },
    {
      "epoch": 0.47597549177684617,
      "grad_norm": 0.9028113269728798,
      "learning_rate": 2.9034958346384385e-06,
      "loss": 0.1507,
      "step": 5166
    },
    {
      "epoch": 0.47606762795411617,
      "grad_norm": 0.9736011569329539,
      "learning_rate": 2.9027440204973263e-06,
      "loss": 0.1503,
      "step": 5167
    },
    {
      "epoch": 0.47615976413138617,
      "grad_norm": 0.8603637223144222,
      "learning_rate": 2.9019921689613874e-06,
      "loss": 0.1339,
      "step": 5168
    },
    {
      "epoch": 0.4762519003086562,
      "grad_norm": 0.8692468134048886,
      "learning_rate": 2.9012402801004334e-06,
      "loss": 0.1389,
      "step": 5169
    },
    {
      "epoch": 0.4763440364859262,
      "grad_norm": 0.9180819372587773,
      "learning_rate": 2.9004883539842756e-06,
      "loss": 0.1456,
      "step": 5170
    },
    {
      "epoch": 0.4764361726631962,
      "grad_norm": 0.9080609642454912,
      "learning_rate": 2.8997363906827315e-06,
      "loss": 0.1365,
      "step": 5171
    },
    {
      "epoch": 0.4765283088404662,
      "grad_norm": 0.8854688032810417,
      "learning_rate": 2.8989843902656202e-06,
      "loss": 0.1291,
      "step": 5172
    },
    {
      "epoch": 0.4766204450177362,
      "grad_norm": 0.9528179450062821,
      "learning_rate": 2.898232352802765e-06,
      "loss": 0.1453,
      "step": 5173
    },
    {
      "epoch": 0.4767125811950062,
      "grad_norm": 0.8690123948765117,
      "learning_rate": 2.8974802783639934e-06,
      "loss": 0.133,
      "step": 5174
    },
    {
      "epoch": 0.4768047173722762,
      "grad_norm": 0.943895660424614,
      "learning_rate": 2.8967281670191357e-06,
      "loss": 0.146,
      "step": 5175
    },
    {
      "epoch": 0.47689685354954625,
      "grad_norm": 0.922222468563535,
      "learning_rate": 2.895976018838024e-06,
      "loss": 0.1397,
      "step": 5176
    },
    {
      "epoch": 0.47698898972681625,
      "grad_norm": 0.8783345231451642,
      "learning_rate": 2.895223833890497e-06,
      "loss": 0.1368,
      "step": 5177
    },
    {
      "epoch": 0.47708112590408625,
      "grad_norm": 0.8963037452817142,
      "learning_rate": 2.8944716122463933e-06,
      "loss": 0.127,
      "step": 5178
    },
    {
      "epoch": 0.47717326208135624,
      "grad_norm": 0.9540068837330717,
      "learning_rate": 2.8937193539755593e-06,
      "loss": 0.133,
      "step": 5179
    },
    {
      "epoch": 0.47726539825862624,
      "grad_norm": 0.926155655361845,
      "learning_rate": 2.8929670591478404e-06,
      "loss": 0.1597,
      "step": 5180
    },
    {
      "epoch": 0.47735753443589624,
      "grad_norm": 0.9621119246946411,
      "learning_rate": 2.8922147278330876e-06,
      "loss": 0.1544,
      "step": 5181
    },
    {
      "epoch": 0.47744967061316623,
      "grad_norm": 0.957134161209612,
      "learning_rate": 2.891462360101156e-06,
      "loss": 0.1517,
      "step": 5182
    },
    {
      "epoch": 0.4775418067904363,
      "grad_norm": 0.9236587620190673,
      "learning_rate": 2.890709956021901e-06,
      "loss": 0.14,
      "step": 5183
    },
    {
      "epoch": 0.4776339429677063,
      "grad_norm": 0.9000620955817382,
      "learning_rate": 2.8899575156651847e-06,
      "loss": 0.1374,
      "step": 5184
    },
    {
      "epoch": 0.4777260791449763,
      "grad_norm": 0.919864564287617,
      "learning_rate": 2.889205039100872e-06,
      "loss": 0.1415,
      "step": 5185
    },
    {
      "epoch": 0.4778182153222463,
      "grad_norm": 0.8402420054493916,
      "learning_rate": 2.8884525263988288e-06,
      "loss": 0.1265,
      "step": 5186
    },
    {
      "epoch": 0.4779103514995163,
      "grad_norm": 0.8852975503830686,
      "learning_rate": 2.887699977628927e-06,
      "loss": 0.1262,
      "step": 5187
    },
    {
      "epoch": 0.47800248767678627,
      "grad_norm": 0.8630089162205361,
      "learning_rate": 2.886947392861041e-06,
      "loss": 0.1349,
      "step": 5188
    },
    {
      "epoch": 0.4780946238540563,
      "grad_norm": 0.8956680318013008,
      "learning_rate": 2.886194772165046e-06,
      "loss": 0.1479,
      "step": 5189
    },
    {
      "epoch": 0.4781867600313263,
      "grad_norm": 0.9614757825575214,
      "learning_rate": 2.8854421156108276e-06,
      "loss": 0.15,
      "step": 5190
    },
    {
      "epoch": 0.4782788962085963,
      "grad_norm": 0.9044999706904249,
      "learning_rate": 2.8846894232682654e-06,
      "loss": 0.1476,
      "step": 5191
    },
    {
      "epoch": 0.4783710323858663,
      "grad_norm": 0.8821383917105962,
      "learning_rate": 2.883936695207249e-06,
      "loss": 0.147,
      "step": 5192
    },
    {
      "epoch": 0.4784631685631363,
      "grad_norm": 0.9243868354852021,
      "learning_rate": 2.8831839314976696e-06,
      "loss": 0.1506,
      "step": 5193
    },
    {
      "epoch": 0.4785553047404063,
      "grad_norm": 0.8698264824416307,
      "learning_rate": 2.8824311322094213e-06,
      "loss": 0.137,
      "step": 5194
    },
    {
      "epoch": 0.4786474409176763,
      "grad_norm": 0.9675043619811561,
      "learning_rate": 2.8816782974124007e-06,
      "loss": 0.1586,
      "step": 5195
    },
    {
      "epoch": 0.47873957709494636,
      "grad_norm": 0.8938915507442141,
      "learning_rate": 2.880925427176509e-06,
      "loss": 0.1361,
      "step": 5196
    },
    {
      "epoch": 0.47883171327221635,
      "grad_norm": 0.9334479510460674,
      "learning_rate": 2.8801725215716504e-06,
      "loss": 0.1474,
      "step": 5197
    },
    {
      "epoch": 0.47892384944948635,
      "grad_norm": 0.8647314780366868,
      "learning_rate": 2.879419580667733e-06,
      "loss": 0.1216,
      "step": 5198
    },
    {
      "epoch": 0.47901598562675635,
      "grad_norm": 0.8678424881431106,
      "learning_rate": 2.878666604534665e-06,
      "loss": 0.1295,
      "step": 5199
    },
    {
      "epoch": 0.47910812180402634,
      "grad_norm": 0.8752317443418055,
      "learning_rate": 2.8779135932423633e-06,
      "loss": 0.1377,
      "step": 5200
    },
    {
      "epoch": 0.47920025798129634,
      "grad_norm": 0.9457831979152571,
      "learning_rate": 2.877160546860744e-06,
      "loss": 0.1505,
      "step": 5201
    },
    {
      "epoch": 0.47929239415856634,
      "grad_norm": 0.870921425375068,
      "learning_rate": 2.8764074654597267e-06,
      "loss": 0.1312,
      "step": 5202
    },
    {
      "epoch": 0.4793845303358364,
      "grad_norm": 0.9157472068782744,
      "learning_rate": 2.8756543491092352e-06,
      "loss": 0.1472,
      "step": 5203
    },
    {
      "epoch": 0.4794766665131064,
      "grad_norm": 0.9300844720183196,
      "learning_rate": 2.8749011978791984e-06,
      "loss": 0.1528,
      "step": 5204
    },
    {
      "epoch": 0.4795688026903764,
      "grad_norm": 0.8245854441755501,
      "learning_rate": 2.8741480118395443e-06,
      "loss": 0.1167,
      "step": 5205
    },
    {
      "epoch": 0.4796609388676464,
      "grad_norm": 0.9984410331300712,
      "learning_rate": 2.873394791060207e-06,
      "loss": 0.1477,
      "step": 5206
    },
    {
      "epoch": 0.4797530750449164,
      "grad_norm": 0.9181133933506418,
      "learning_rate": 2.872641535611123e-06,
      "loss": 0.1526,
      "step": 5207
    },
    {
      "epoch": 0.4798452112221864,
      "grad_norm": 0.9508564827737102,
      "learning_rate": 2.8718882455622334e-06,
      "loss": 0.1596,
      "step": 5208
    },
    {
      "epoch": 0.47993734739945637,
      "grad_norm": 0.8843054175098252,
      "learning_rate": 2.871134920983479e-06,
      "loss": 0.1333,
      "step": 5209
    },
    {
      "epoch": 0.4800294835767264,
      "grad_norm": 0.9349077875143057,
      "learning_rate": 2.8703815619448072e-06,
      "loss": 0.1379,
      "step": 5210
    },
    {
      "epoch": 0.4801216197539964,
      "grad_norm": 0.8935820005729005,
      "learning_rate": 2.8696281685161676e-06,
      "loss": 0.1366,
      "step": 5211
    },
    {
      "epoch": 0.4802137559312664,
      "grad_norm": 0.889136554484447,
      "learning_rate": 2.868874740767513e-06,
      "loss": 0.1359,
      "step": 5212
    },
    {
      "epoch": 0.4803058921085364,
      "grad_norm": 0.9768791414763527,
      "learning_rate": 2.8681212787687997e-06,
      "loss": 0.1468,
      "step": 5213
    },
    {
      "epoch": 0.4803980282858064,
      "grad_norm": 0.9558517666869947,
      "learning_rate": 2.8673677825899852e-06,
      "loss": 0.156,
      "step": 5214
    },
    {
      "epoch": 0.4804901644630764,
      "grad_norm": 0.9018021401267595,
      "learning_rate": 2.866614252301033e-06,
      "loss": 0.1298,
      "step": 5215
    },
    {
      "epoch": 0.4805823006403464,
      "grad_norm": 0.9408168488926255,
      "learning_rate": 2.865860687971907e-06,
      "loss": 0.1504,
      "step": 5216
    },
    {
      "epoch": 0.48067443681761646,
      "grad_norm": 0.9242266840610662,
      "learning_rate": 2.8651070896725786e-06,
      "loss": 0.1438,
      "step": 5217
    },
    {
      "epoch": 0.48076657299488645,
      "grad_norm": 0.8848173378094824,
      "learning_rate": 2.864353457473016e-06,
      "loss": 0.1456,
      "step": 5218
    },
    {
      "epoch": 0.48085870917215645,
      "grad_norm": 0.9226387337834759,
      "learning_rate": 2.863599791443196e-06,
      "loss": 0.1452,
      "step": 5219
    },
    {
      "epoch": 0.48095084534942645,
      "grad_norm": 0.9215882804526133,
      "learning_rate": 2.8628460916530967e-06,
      "loss": 0.1484,
      "step": 5220
    },
    {
      "epoch": 0.48104298152669644,
      "grad_norm": 0.9025065525594986,
      "learning_rate": 2.8620923581726983e-06,
      "loss": 0.1481,
      "step": 5221
    },
    {
      "epoch": 0.48113511770396644,
      "grad_norm": 0.9097430267440212,
      "learning_rate": 2.861338591071986e-06,
      "loss": 0.1434,
      "step": 5222
    },
    {
      "epoch": 0.4812272538812365,
      "grad_norm": 0.8802740153350054,
      "learning_rate": 2.860584790420946e-06,
      "loss": 0.1362,
      "step": 5223
    },
    {
      "epoch": 0.4813193900585065,
      "grad_norm": 0.9819178496384163,
      "learning_rate": 2.85983095628957e-06,
      "loss": 0.1554,
      "step": 5224
    },
    {
      "epoch": 0.4814115262357765,
      "grad_norm": 0.8843967606324523,
      "learning_rate": 2.8590770887478507e-06,
      "loss": 0.1425,
      "step": 5225
    },
    {
      "epoch": 0.4815036624130465,
      "grad_norm": 0.8725658702095903,
      "learning_rate": 2.8583231878657847e-06,
      "loss": 0.1329,
      "step": 5226
    },
    {
      "epoch": 0.4815957985903165,
      "grad_norm": 0.8909883448928446,
      "learning_rate": 2.8575692537133726e-06,
      "loss": 0.1499,
      "step": 5227
    },
    {
      "epoch": 0.4816879347675865,
      "grad_norm": 0.9430742708330203,
      "learning_rate": 2.8568152863606167e-06,
      "loss": 0.1433,
      "step": 5228
    },
    {
      "epoch": 0.4817800709448565,
      "grad_norm": 0.8869863335741072,
      "learning_rate": 2.8560612858775233e-06,
      "loss": 0.1303,
      "step": 5229
    },
    {
      "epoch": 0.48187220712212653,
      "grad_norm": 0.9377415356064038,
      "learning_rate": 2.8553072523341008e-06,
      "loss": 0.1498,
      "step": 5230
    },
    {
      "epoch": 0.4819643432993965,
      "grad_norm": 0.8758553521749834,
      "learning_rate": 2.8545531858003623e-06,
      "loss": 0.1238,
      "step": 5231
    },
    {
      "epoch": 0.4820564794766665,
      "grad_norm": 0.8988921073626308,
      "learning_rate": 2.8537990863463212e-06,
      "loss": 0.1298,
      "step": 5232
    },
    {
      "epoch": 0.4821486156539365,
      "grad_norm": 0.8798732879738537,
      "learning_rate": 2.853044954041998e-06,
      "loss": 0.1454,
      "step": 5233
    },
    {
      "epoch": 0.4822407518312065,
      "grad_norm": 0.9246980033341549,
      "learning_rate": 2.8522907889574117e-06,
      "loss": 0.1524,
      "step": 5234
    },
    {
      "epoch": 0.4823328880084765,
      "grad_norm": 0.9174157745994747,
      "learning_rate": 2.851536591162589e-06,
      "loss": 0.1529,
      "step": 5235
    },
    {
      "epoch": 0.4824250241857465,
      "grad_norm": 0.9424499320791001,
      "learning_rate": 2.8507823607275554e-06,
      "loss": 0.1484,
      "step": 5236
    },
    {
      "epoch": 0.48251716036301656,
      "grad_norm": 0.8256974300101093,
      "learning_rate": 2.8500280977223416e-06,
      "loss": 0.1247,
      "step": 5237
    },
    {
      "epoch": 0.48260929654028656,
      "grad_norm": 0.8940530112241213,
      "learning_rate": 2.8492738022169815e-06,
      "loss": 0.1393,
      "step": 5238
    },
    {
      "epoch": 0.48270143271755656,
      "grad_norm": 0.8472711799346075,
      "learning_rate": 2.848519474281511e-06,
      "loss": 0.1298,
      "step": 5239
    },
    {
      "epoch": 0.48279356889482655,
      "grad_norm": 0.9181352849217045,
      "learning_rate": 2.84776511398597e-06,
      "loss": 0.1498,
      "step": 5240
    },
    {
      "epoch": 0.48288570507209655,
      "grad_norm": 0.9012991958361424,
      "learning_rate": 2.847010721400401e-06,
      "loss": 0.1477,
      "step": 5241
    },
    {
      "epoch": 0.48297784124936655,
      "grad_norm": 0.814508085647066,
      "learning_rate": 2.8462562965948493e-06,
      "loss": 0.1316,
      "step": 5242
    },
    {
      "epoch": 0.48306997742663654,
      "grad_norm": 0.8971639247190899,
      "learning_rate": 2.8455018396393618e-06,
      "loss": 0.1386,
      "step": 5243
    },
    {
      "epoch": 0.4831621136039066,
      "grad_norm": 0.8670252001320244,
      "learning_rate": 2.8447473506039934e-06,
      "loss": 0.1274,
      "step": 5244
    },
    {
      "epoch": 0.4832542497811766,
      "grad_norm": 0.9182328079782047,
      "learning_rate": 2.8439928295587948e-06,
      "loss": 0.1402,
      "step": 5245
    },
    {
      "epoch": 0.4833463859584466,
      "grad_norm": 0.8738254881249767,
      "learning_rate": 2.843238276573826e-06,
      "loss": 0.1395,
      "step": 5246
    },
    {
      "epoch": 0.4834385221357166,
      "grad_norm": 0.8751207383004573,
      "learning_rate": 2.8424836917191455e-06,
      "loss": 0.1397,
      "step": 5247
    },
    {
      "epoch": 0.4835306583129866,
      "grad_norm": 0.8559763699901859,
      "learning_rate": 2.841729075064818e-06,
      "loss": 0.1326,
      "step": 5248
    },
    {
      "epoch": 0.4836227944902566,
      "grad_norm": 0.9330748310081932,
      "learning_rate": 2.840974426680909e-06,
      "loss": 0.1308,
      "step": 5249
    },
    {
      "epoch": 0.4837149306675266,
      "grad_norm": 0.9467378848060349,
      "learning_rate": 2.840219746637487e-06,
      "loss": 0.1499,
      "step": 5250
    },
    {
      "epoch": 0.48380706684479663,
      "grad_norm": 0.9756656663187364,
      "learning_rate": 2.8394650350046256e-06,
      "loss": 0.1326,
      "step": 5251
    },
    {
      "epoch": 0.4838992030220666,
      "grad_norm": 0.9544148043520337,
      "learning_rate": 2.8387102918523995e-06,
      "loss": 0.1516,
      "step": 5252
    },
    {
      "epoch": 0.4839913391993366,
      "grad_norm": 0.8967080887994231,
      "learning_rate": 2.8379555172508853e-06,
      "loss": 0.125,
      "step": 5253
    },
    {
      "epoch": 0.4840834753766066,
      "grad_norm": 0.9096511642692993,
      "learning_rate": 2.8372007112701657e-06,
      "loss": 0.1415,
      "step": 5254
    },
    {
      "epoch": 0.4841756115538766,
      "grad_norm": 0.9212905745476281,
      "learning_rate": 2.8364458739803237e-06,
      "loss": 0.1394,
      "step": 5255
    },
    {
      "epoch": 0.4842677477311466,
      "grad_norm": 0.9188703117763569,
      "learning_rate": 2.835691005451446e-06,
      "loss": 0.1497,
      "step": 5256
    },
    {
      "epoch": 0.48435988390841667,
      "grad_norm": 0.9223024413019026,
      "learning_rate": 2.8349361057536223e-06,
      "loss": 0.1438,
      "step": 5257
    },
    {
      "epoch": 0.48445202008568666,
      "grad_norm": 0.9040847454967237,
      "learning_rate": 2.8341811749569452e-06,
      "loss": 0.1391,
      "step": 5258
    },
    {
      "epoch": 0.48454415626295666,
      "grad_norm": 0.9561157127435418,
      "learning_rate": 2.8334262131315094e-06,
      "loss": 0.1476,
      "step": 5259
    },
    {
      "epoch": 0.48463629244022666,
      "grad_norm": 0.8481512102632566,
      "learning_rate": 2.832671220347415e-06,
      "loss": 0.1265,
      "step": 5260
    },
    {
      "epoch": 0.48472842861749665,
      "grad_norm": 0.9162494132871907,
      "learning_rate": 2.831916196674761e-06,
      "loss": 0.1506,
      "step": 5261
    },
    {
      "epoch": 0.48482056479476665,
      "grad_norm": 0.9285013656763865,
      "learning_rate": 2.831161142183653e-06,
      "loss": 0.1442,
      "step": 5262
    },
    {
      "epoch": 0.48491270097203665,
      "grad_norm": 0.9376551897657546,
      "learning_rate": 2.830406056944197e-06,
      "loss": 0.1395,
      "step": 5263
    },
    {
      "epoch": 0.4850048371493067,
      "grad_norm": 0.9226971589582109,
      "learning_rate": 2.8296509410265032e-06,
      "loss": 0.1478,
      "step": 5264
    },
    {
      "epoch": 0.4850969733265767,
      "grad_norm": 0.9000083531197767,
      "learning_rate": 2.8288957945006845e-06,
      "loss": 0.1462,
      "step": 5265
    },
    {
      "epoch": 0.4851891095038467,
      "grad_norm": 0.89690674045041,
      "learning_rate": 2.8281406174368555e-06,
      "loss": 0.1419,
      "step": 5266
    },
    {
      "epoch": 0.4852812456811167,
      "grad_norm": 0.9345061065274617,
      "learning_rate": 2.827385409905134e-06,
      "loss": 0.1488,
      "step": 5267
    },
    {
      "epoch": 0.4853733818583867,
      "grad_norm": 0.9064937256577421,
      "learning_rate": 2.8266301719756427e-06,
      "loss": 0.1428,
      "step": 5268
    },
    {
      "epoch": 0.4854655180356567,
      "grad_norm": 0.9088525413468661,
      "learning_rate": 2.825874903718505e-06,
      "loss": 0.1478,
      "step": 5269
    },
    {
      "epoch": 0.4855576542129267,
      "grad_norm": 0.9271486145199646,
      "learning_rate": 2.8251196052038475e-06,
      "loss": 0.1551,
      "step": 5270
    },
    {
      "epoch": 0.48564979039019673,
      "grad_norm": 0.9522761407530571,
      "learning_rate": 2.8243642765017993e-06,
      "loss": 0.1507,
      "step": 5271
    },
    {
      "epoch": 0.48574192656746673,
      "grad_norm": 0.8485048602690964,
      "learning_rate": 2.8236089176824926e-06,
      "loss": 0.123,
      "step": 5272
    },
    {
      "epoch": 0.4858340627447367,
      "grad_norm": 0.8780735623699752,
      "learning_rate": 2.8228535288160647e-06,
      "loss": 0.1285,
      "step": 5273
    },
    {
      "epoch": 0.4859261989220067,
      "grad_norm": 0.9869769002349372,
      "learning_rate": 2.8220981099726503e-06,
      "loss": 0.1501,
      "step": 5274
    },
    {
      "epoch": 0.4860183350992767,
      "grad_norm": 0.9459927723090031,
      "learning_rate": 2.821342661222392e-06,
      "loss": 0.1459,
      "step": 5275
    },
    {
      "epoch": 0.4861104712765467,
      "grad_norm": 0.943149589932418,
      "learning_rate": 2.8205871826354336e-06,
      "loss": 0.1447,
      "step": 5276
    },
    {
      "epoch": 0.4862026074538167,
      "grad_norm": 0.8907007660895891,
      "learning_rate": 2.819831674281921e-06,
      "loss": 0.1372,
      "step": 5277
    },
    {
      "epoch": 0.48629474363108677,
      "grad_norm": 0.9140557800767286,
      "learning_rate": 2.819076136232002e-06,
      "loss": 0.1374,
      "step": 5278
    },
    {
      "epoch": 0.48638687980835676,
      "grad_norm": 0.963337859306478,
      "learning_rate": 2.81832056855583e-06,
      "loss": 0.1486,
      "step": 5279
    },
    {
      "epoch": 0.48647901598562676,
      "grad_norm": 0.9274489957087415,
      "learning_rate": 2.8175649713235586e-06,
      "loss": 0.1447,
      "step": 5280
    },
    {
      "epoch": 0.48657115216289676,
      "grad_norm": 0.8655898357592946,
      "learning_rate": 2.8168093446053455e-06,
      "loss": 0.1321,
      "step": 5281
    },
    {
      "epoch": 0.48666328834016676,
      "grad_norm": 0.9215423663303324,
      "learning_rate": 2.816053688471351e-06,
      "loss": 0.1351,
      "step": 5282
    },
    {
      "epoch": 0.48675542451743675,
      "grad_norm": 0.9621694893535245,
      "learning_rate": 2.815298002991738e-06,
      "loss": 0.1382,
      "step": 5283
    },
    {
      "epoch": 0.48684756069470675,
      "grad_norm": 0.8585957646539193,
      "learning_rate": 2.8145422882366707e-06,
      "loss": 0.1251,
      "step": 5284
    },
    {
      "epoch": 0.4869396968719768,
      "grad_norm": 0.9610427782236858,
      "learning_rate": 2.8137865442763186e-06,
      "loss": 0.155,
      "step": 5285
    },
    {
      "epoch": 0.4870318330492468,
      "grad_norm": 0.8803502360568106,
      "learning_rate": 2.813030771180851e-06,
      "loss": 0.1389,
      "step": 5286
    },
    {
      "epoch": 0.4871239692265168,
      "grad_norm": 0.8809740678094535,
      "learning_rate": 2.8122749690204443e-06,
      "loss": 0.1375,
      "step": 5287
    },
    {
      "epoch": 0.4872161054037868,
      "grad_norm": 0.8903435006680406,
      "learning_rate": 2.8115191378652716e-06,
      "loss": 0.1402,
      "step": 5288
    },
    {
      "epoch": 0.4873082415810568,
      "grad_norm": 0.8398996275510962,
      "learning_rate": 2.810763277785514e-06,
      "loss": 0.1345,
      "step": 5289
    },
    {
      "epoch": 0.4874003777583268,
      "grad_norm": 0.9572207221710832,
      "learning_rate": 2.810007388851353e-06,
      "loss": 0.1616,
      "step": 5290
    },
    {
      "epoch": 0.48749251393559684,
      "grad_norm": 0.913103201905886,
      "learning_rate": 2.809251471132972e-06,
      "loss": 0.1513,
      "step": 5291
    },
    {
      "epoch": 0.48758465011286684,
      "grad_norm": 0.8682324842644803,
      "learning_rate": 2.808495524700559e-06,
      "loss": 0.1291,
      "step": 5292
    },
    {
      "epoch": 0.48767678629013683,
      "grad_norm": 0.8997230016251552,
      "learning_rate": 2.807739549624303e-06,
      "loss": 0.1389,
      "step": 5293
    },
    {
      "epoch": 0.48776892246740683,
      "grad_norm": 0.8509844715151446,
      "learning_rate": 2.8069835459743965e-06,
      "loss": 0.1393,
      "step": 5294
    },
    {
      "epoch": 0.4878610586446768,
      "grad_norm": 0.865759399554568,
      "learning_rate": 2.8062275138210355e-06,
      "loss": 0.1351,
      "step": 5295
    },
    {
      "epoch": 0.4879531948219468,
      "grad_norm": 0.8590645407142179,
      "learning_rate": 2.805471453234416e-06,
      "loss": 0.1308,
      "step": 5296
    },
    {
      "epoch": 0.4880453309992168,
      "grad_norm": 0.9560127081452957,
      "learning_rate": 2.80471536428474e-06,
      "loss": 0.1431,
      "step": 5297
    },
    {
      "epoch": 0.48813746717648687,
      "grad_norm": 0.9540596366204239,
      "learning_rate": 2.8039592470422096e-06,
      "loss": 0.147,
      "step": 5298
    },
    {
      "epoch": 0.48822960335375687,
      "grad_norm": 0.9183606271614978,
      "learning_rate": 2.8032031015770296e-06,
      "loss": 0.1493,
      "step": 5299
    },
    {
      "epoch": 0.48832173953102687,
      "grad_norm": 0.9200811671231739,
      "learning_rate": 2.8024469279594102e-06,
      "loss": 0.1383,
      "step": 5300
    },
    {
      "epoch": 0.48841387570829686,
      "grad_norm": 0.8694060487879737,
      "learning_rate": 2.80169072625956e-06,
      "loss": 0.1333,
      "step": 5301
    },
    {
      "epoch": 0.48850601188556686,
      "grad_norm": 0.9202762624654115,
      "learning_rate": 2.8009344965476935e-06,
      "loss": 0.134,
      "step": 5302
    },
    {
      "epoch": 0.48859814806283686,
      "grad_norm": 0.9367392274537912,
      "learning_rate": 2.8001782388940267e-06,
      "loss": 0.1547,
      "step": 5303
    },
    {
      "epoch": 0.48869028424010685,
      "grad_norm": 0.9219003180099276,
      "learning_rate": 2.7994219533687784e-06,
      "loss": 0.1417,
      "step": 5304
    },
    {
      "epoch": 0.4887824204173769,
      "grad_norm": 0.8394096894153247,
      "learning_rate": 2.79866564004217e-06,
      "loss": 0.1372,
      "step": 5305
    },
    {
      "epoch": 0.4888745565946469,
      "grad_norm": 0.9632709174947548,
      "learning_rate": 2.797909298984424e-06,
      "loss": 0.1501,
      "step": 5306
    },
    {
      "epoch": 0.4889666927719169,
      "grad_norm": 0.8824075293556416,
      "learning_rate": 2.797152930265767e-06,
      "loss": 0.143,
      "step": 5307
    },
    {
      "epoch": 0.4890588289491869,
      "grad_norm": 0.8817934891698063,
      "learning_rate": 2.796396533956429e-06,
      "loss": 0.1418,
      "step": 5308
    },
    {
      "epoch": 0.4891509651264569,
      "grad_norm": 0.9544326229762716,
      "learning_rate": 2.7956401101266407e-06,
      "loss": 0.1504,
      "step": 5309
    },
    {
      "epoch": 0.4892431013037269,
      "grad_norm": 0.9322273063288705,
      "learning_rate": 2.7948836588466373e-06,
      "loss": 0.1456,
      "step": 5310
    },
    {
      "epoch": 0.4893352374809969,
      "grad_norm": 0.909857805157454,
      "learning_rate": 2.794127180186653e-06,
      "loss": 0.1401,
      "step": 5311
    },
    {
      "epoch": 0.48942737365826694,
      "grad_norm": 0.939370763815054,
      "learning_rate": 2.7933706742169297e-06,
      "loss": 0.1482,
      "step": 5312
    },
    {
      "epoch": 0.48951950983553694,
      "grad_norm": 0.8992145813911244,
      "learning_rate": 2.792614141007707e-06,
      "loss": 0.1437,
      "step": 5313
    },
    {
      "epoch": 0.48961164601280693,
      "grad_norm": 0.9533663660430065,
      "learning_rate": 2.7918575806292305e-06,
      "loss": 0.1432,
      "step": 5314
    },
    {
      "epoch": 0.48970378219007693,
      "grad_norm": 0.9175012649462685,
      "learning_rate": 2.791100993151745e-06,
      "loss": 0.1449,
      "step": 5315
    },
    {
      "epoch": 0.4897959183673469,
      "grad_norm": 0.9633249103576214,
      "learning_rate": 2.790344378645502e-06,
      "loss": 0.1503,
      "step": 5316
    },
    {
      "epoch": 0.4898880545446169,
      "grad_norm": 0.9468216343031558,
      "learning_rate": 2.7895877371807516e-06,
      "loss": 0.1546,
      "step": 5317
    },
    {
      "epoch": 0.4899801907218869,
      "grad_norm": 0.90993861662428,
      "learning_rate": 2.7888310688277493e-06,
      "loss": 0.1412,
      "step": 5318
    },
    {
      "epoch": 0.490072326899157,
      "grad_norm": 0.9040365876464947,
      "learning_rate": 2.7880743736567505e-06,
      "loss": 0.141,
      "step": 5319
    },
    {
      "epoch": 0.49016446307642697,
      "grad_norm": 0.9620046505695853,
      "learning_rate": 2.7873176517380157e-06,
      "loss": 0.1545,
      "step": 5320
    },
    {
      "epoch": 0.49025659925369697,
      "grad_norm": 0.9103509663251281,
      "learning_rate": 2.786560903141805e-06,
      "loss": 0.1348,
      "step": 5321
    },
    {
      "epoch": 0.49034873543096696,
      "grad_norm": 0.8917965010613744,
      "learning_rate": 2.7858041279383854e-06,
      "loss": 0.1457,
      "step": 5322
    },
    {
      "epoch": 0.49044087160823696,
      "grad_norm": 0.8894071380853192,
      "learning_rate": 2.7850473261980197e-06,
      "loss": 0.1396,
      "step": 5323
    },
    {
      "epoch": 0.49053300778550696,
      "grad_norm": 0.9274368507998124,
      "learning_rate": 2.78429049799098e-06,
      "loss": 0.1548,
      "step": 5324
    },
    {
      "epoch": 0.490625143962777,
      "grad_norm": 0.9052156307937325,
      "learning_rate": 2.783533643387537e-06,
      "loss": 0.1438,
      "step": 5325
    },
    {
      "epoch": 0.490717280140047,
      "grad_norm": 0.8956506738203168,
      "learning_rate": 2.7827767624579645e-06,
      "loss": 0.1307,
      "step": 5326
    },
    {
      "epoch": 0.490809416317317,
      "grad_norm": 0.9182979070301321,
      "learning_rate": 2.7820198552725404e-06,
      "loss": 0.1478,
      "step": 5327
    },
    {
      "epoch": 0.490901552494587,
      "grad_norm": 0.9067870769765347,
      "learning_rate": 2.781262921901541e-06,
      "loss": 0.1323,
      "step": 5328
    },
    {
      "epoch": 0.490993688671857,
      "grad_norm": 0.9321816561616926,
      "learning_rate": 2.780505962415249e-06,
      "loss": 0.1403,
      "step": 5329
    },
    {
      "epoch": 0.491085824849127,
      "grad_norm": 0.9245267858958562,
      "learning_rate": 2.779748976883949e-06,
      "loss": 0.1419,
      "step": 5330
    },
    {
      "epoch": 0.491177961026397,
      "grad_norm": 0.9301677422865269,
      "learning_rate": 2.7789919653779257e-06,
      "loss": 0.1374,
      "step": 5331
    },
    {
      "epoch": 0.49127009720366704,
      "grad_norm": 0.9786411388579338,
      "learning_rate": 2.7782349279674684e-06,
      "loss": 0.1587,
      "step": 5332
    },
    {
      "epoch": 0.49136223338093704,
      "grad_norm": 0.912963264042218,
      "learning_rate": 2.7774778647228688e-06,
      "loss": 0.1406,
      "step": 5333
    },
    {
      "epoch": 0.49145436955820704,
      "grad_norm": 0.9085498490897894,
      "learning_rate": 2.7767207757144186e-06,
      "loss": 0.1417,
      "step": 5334
    },
    {
      "epoch": 0.49154650573547704,
      "grad_norm": 0.9226144813877271,
      "learning_rate": 2.7759636610124158e-06,
      "loss": 0.142,
      "step": 5335
    },
    {
      "epoch": 0.49163864191274703,
      "grad_norm": 0.9713959796758398,
      "learning_rate": 2.7752065206871564e-06,
      "loss": 0.1375,
      "step": 5336
    },
    {
      "epoch": 0.49173077809001703,
      "grad_norm": 0.8991677667705583,
      "learning_rate": 2.7744493548089425e-06,
      "loss": 0.1304,
      "step": 5337
    },
    {
      "epoch": 0.491822914267287,
      "grad_norm": 0.8722949087858429,
      "learning_rate": 2.773692163448076e-06,
      "loss": 0.1346,
      "step": 5338
    },
    {
      "epoch": 0.4919150504445571,
      "grad_norm": 0.9623034124111783,
      "learning_rate": 2.7729349466748634e-06,
      "loss": 0.1347,
      "step": 5339
    },
    {
      "epoch": 0.4920071866218271,
      "grad_norm": 1.0356559269275247,
      "learning_rate": 2.772177704559611e-06,
      "loss": 0.1553,
      "step": 5340
    },
    {
      "epoch": 0.49209932279909707,
      "grad_norm": 0.9648096203571851,
      "learning_rate": 2.7714204371726293e-06,
      "loss": 0.148,
      "step": 5341
    },
    {
      "epoch": 0.49219145897636707,
      "grad_norm": 0.8771413421110204,
      "learning_rate": 2.770663144584231e-06,
      "loss": 0.1315,
      "step": 5342
    },
    {
      "epoch": 0.49228359515363707,
      "grad_norm": 0.9609389946251562,
      "learning_rate": 2.769905826864731e-06,
      "loss": 0.1541,
      "step": 5343
    },
    {
      "epoch": 0.49237573133090706,
      "grad_norm": 0.9202480450196757,
      "learning_rate": 2.769148484084445e-06,
      "loss": 0.1323,
      "step": 5344
    },
    {
      "epoch": 0.49246786750817706,
      "grad_norm": 0.9168884464799304,
      "learning_rate": 2.7683911163136944e-06,
      "loss": 0.1356,
      "step": 5345
    },
    {
      "epoch": 0.4925600036854471,
      "grad_norm": 0.8905102892909667,
      "learning_rate": 2.767633723622799e-06,
      "loss": 0.1327,
      "step": 5346
    },
    {
      "epoch": 0.4926521398627171,
      "grad_norm": 0.9283258835996472,
      "learning_rate": 2.7668763060820842e-06,
      "loss": 0.1397,
      "step": 5347
    },
    {
      "epoch": 0.4927442760399871,
      "grad_norm": 0.9031577778408963,
      "learning_rate": 2.7661188637618752e-06,
      "loss": 0.1432,
      "step": 5348
    },
    {
      "epoch": 0.4928364122172571,
      "grad_norm": 0.9704106362718894,
      "learning_rate": 2.7653613967325018e-06,
      "loss": 0.1452,
      "step": 5349
    },
    {
      "epoch": 0.4929285483945271,
      "grad_norm": 0.9047274748741216,
      "learning_rate": 2.7646039050642926e-06,
      "loss": 0.141,
      "step": 5350
    },
    {
      "epoch": 0.4930206845717971,
      "grad_norm": 0.9080009969739212,
      "learning_rate": 2.763846388827584e-06,
      "loss": 0.1323,
      "step": 5351
    },
    {
      "epoch": 0.49311282074906715,
      "grad_norm": 0.9190765010543278,
      "learning_rate": 2.7630888480927082e-06,
      "loss": 0.143,
      "step": 5352
    },
    {
      "epoch": 0.49320495692633715,
      "grad_norm": 0.836165641654129,
      "learning_rate": 2.7623312829300053e-06,
      "loss": 0.1317,
      "step": 5353
    },
    {
      "epoch": 0.49329709310360714,
      "grad_norm": 1.0026628826121744,
      "learning_rate": 2.7615736934098146e-06,
      "loss": 0.1598,
      "step": 5354
    },
    {
      "epoch": 0.49338922928087714,
      "grad_norm": 0.9572287593009317,
      "learning_rate": 2.760816079602478e-06,
      "loss": 0.146,
      "step": 5355
    },
    {
      "epoch": 0.49348136545814714,
      "grad_norm": 0.8912204631678248,
      "learning_rate": 2.760058441578341e-06,
      "loss": 0.1339,
      "step": 5356
    },
    {
      "epoch": 0.49357350163541713,
      "grad_norm": 0.914931483053279,
      "learning_rate": 2.7593007794077493e-06,
      "loss": 0.1279,
      "step": 5357
    },
    {
      "epoch": 0.49366563781268713,
      "grad_norm": 0.9112520500430483,
      "learning_rate": 2.7585430931610526e-06,
      "loss": 0.1336,
      "step": 5358
    },
    {
      "epoch": 0.4937577739899572,
      "grad_norm": 0.8758937123292749,
      "learning_rate": 2.7577853829086014e-06,
      "loss": 0.133,
      "step": 5359
    },
    {
      "epoch": 0.4938499101672272,
      "grad_norm": 0.9703690055488118,
      "learning_rate": 2.7570276487207504e-06,
      "loss": 0.1546,
      "step": 5360
    },
    {
      "epoch": 0.4939420463444972,
      "grad_norm": 0.9070344715122353,
      "learning_rate": 2.7562698906678537e-06,
      "loss": 0.1281,
      "step": 5361
    },
    {
      "epoch": 0.4940341825217672,
      "grad_norm": 1.0743999343067259,
      "learning_rate": 2.755512108820271e-06,
      "loss": 0.1499,
      "step": 5362
    },
    {
      "epoch": 0.49412631869903717,
      "grad_norm": 0.8768062588403824,
      "learning_rate": 2.7547543032483604e-06,
      "loss": 0.1268,
      "step": 5363
    },
    {
      "epoch": 0.49421845487630717,
      "grad_norm": 0.9074114494072715,
      "learning_rate": 2.753996474022486e-06,
      "loss": 0.1355,
      "step": 5364
    },
    {
      "epoch": 0.49431059105357716,
      "grad_norm": 0.9424744962467466,
      "learning_rate": 2.753238621213012e-06,
      "loss": 0.1412,
      "step": 5365
    },
    {
      "epoch": 0.4944027272308472,
      "grad_norm": 0.9539914855888854,
      "learning_rate": 2.752480744890304e-06,
      "loss": 0.1555,
      "step": 5366
    },
    {
      "epoch": 0.4944948634081172,
      "grad_norm": 0.8823231270895644,
      "learning_rate": 2.751722845124732e-06,
      "loss": 0.1392,
      "step": 5367
    },
    {
      "epoch": 0.4945869995853872,
      "grad_norm": 0.9403023365444029,
      "learning_rate": 2.750964921986667e-06,
      "loss": 0.1488,
      "step": 5368
    },
    {
      "epoch": 0.4946791357626572,
      "grad_norm": 0.8975289021275779,
      "learning_rate": 2.750206975546481e-06,
      "loss": 0.1361,
      "step": 5369
    },
    {
      "epoch": 0.4947712719399272,
      "grad_norm": 0.9253417573353605,
      "learning_rate": 2.7494490058745514e-06,
      "loss": 0.1425,
      "step": 5370
    },
    {
      "epoch": 0.4948634081171972,
      "grad_norm": 0.9328707319829574,
      "learning_rate": 2.7486910130412543e-06,
      "loss": 0.1505,
      "step": 5371
    },
    {
      "epoch": 0.4949555442944672,
      "grad_norm": 0.8941550491967376,
      "learning_rate": 2.74793299711697e-06,
      "loss": 0.1312,
      "step": 5372
    },
    {
      "epoch": 0.49504768047173725,
      "grad_norm": 0.9099114610645433,
      "learning_rate": 2.747174958172081e-06,
      "loss": 0.1349,
      "step": 5373
    },
    {
      "epoch": 0.49513981664900725,
      "grad_norm": 0.8779917259058921,
      "learning_rate": 2.7464168962769696e-06,
      "loss": 0.1314,
      "step": 5374
    },
    {
      "epoch": 0.49523195282627724,
      "grad_norm": 0.9125090915870446,
      "learning_rate": 2.745658811502023e-06,
      "loss": 0.1363,
      "step": 5375
    },
    {
      "epoch": 0.49532408900354724,
      "grad_norm": 0.9775013337024041,
      "learning_rate": 2.7449007039176296e-06,
      "loss": 0.1473,
      "step": 5376
    },
    {
      "epoch": 0.49541622518081724,
      "grad_norm": 0.8985672527780988,
      "learning_rate": 2.7441425735941787e-06,
      "loss": 0.1443,
      "step": 5377
    },
    {
      "epoch": 0.49550836135808723,
      "grad_norm": 0.9152198455379926,
      "learning_rate": 2.7433844206020643e-06,
      "loss": 0.1395,
      "step": 5378
    },
    {
      "epoch": 0.49560049753535723,
      "grad_norm": 0.8692587899638442,
      "learning_rate": 2.7426262450116798e-06,
      "loss": 0.1301,
      "step": 5379
    },
    {
      "epoch": 0.4956926337126273,
      "grad_norm": 0.968301271071014,
      "learning_rate": 2.7418680468934227e-06,
      "loss": 0.141,
      "step": 5380
    },
    {
      "epoch": 0.4957847698898973,
      "grad_norm": 0.9688367958386958,
      "learning_rate": 2.7411098263176917e-06,
      "loss": 0.1474,
      "step": 5381
    },
    {
      "epoch": 0.4958769060671673,
      "grad_norm": 0.9636259047294249,
      "learning_rate": 2.740351583354886e-06,
      "loss": 0.1466,
      "step": 5382
    },
    {
      "epoch": 0.4959690422444373,
      "grad_norm": 0.9710179385144946,
      "learning_rate": 2.739593318075412e-06,
      "loss": 0.1511,
      "step": 5383
    },
    {
      "epoch": 0.49606117842170727,
      "grad_norm": 0.9860533062959879,
      "learning_rate": 2.7388350305496708e-06,
      "loss": 0.1531,
      "step": 5384
    },
    {
      "epoch": 0.49615331459897727,
      "grad_norm": 0.8845136650001137,
      "learning_rate": 2.7380767208480726e-06,
      "loss": 0.1335,
      "step": 5385
    },
    {
      "epoch": 0.4962454507762473,
      "grad_norm": 0.857431418417288,
      "learning_rate": 2.7373183890410245e-06,
      "loss": 0.1266,
      "step": 5386
    },
    {
      "epoch": 0.4963375869535173,
      "grad_norm": 0.8725792617659032,
      "learning_rate": 2.7365600351989386e-06,
      "loss": 0.139,
      "step": 5387
    },
    {
      "epoch": 0.4964297231307873,
      "grad_norm": 0.9327533333797368,
      "learning_rate": 2.7358016593922283e-06,
      "loss": 0.147,
      "step": 5388
    },
    {
      "epoch": 0.4965218593080573,
      "grad_norm": 0.8981246518761035,
      "learning_rate": 2.7350432616913083e-06,
      "loss": 0.1345,
      "step": 5389
    },
    {
      "epoch": 0.4966139954853273,
      "grad_norm": 0.9240269830117442,
      "learning_rate": 2.734284842166596e-06,
      "loss": 0.1558,
      "step": 5390
    },
    {
      "epoch": 0.4967061316625973,
      "grad_norm": 0.9839833653231123,
      "learning_rate": 2.733526400888511e-06,
      "loss": 0.1562,
      "step": 5391
    },
    {
      "epoch": 0.4967982678398673,
      "grad_norm": 0.9071812673870605,
      "learning_rate": 2.732767937927474e-06,
      "loss": 0.1365,
      "step": 5392
    },
    {
      "epoch": 0.49689040401713735,
      "grad_norm": 0.869434467607522,
      "learning_rate": 2.73200945335391e-06,
      "loss": 0.1372,
      "step": 5393
    },
    {
      "epoch": 0.49698254019440735,
      "grad_norm": 0.9389875824273619,
      "learning_rate": 2.7312509472382425e-06,
      "loss": 0.151,
      "step": 5394
    },
    {
      "epoch": 0.49707467637167735,
      "grad_norm": 0.8639226483522556,
      "learning_rate": 2.7304924196509004e-06,
      "loss": 0.125,
      "step": 5395
    },
    {
      "epoch": 0.49716681254894735,
      "grad_norm": 0.9154900592463313,
      "learning_rate": 2.7297338706623113e-06,
      "loss": 0.1392,
      "step": 5396
    },
    {
      "epoch": 0.49725894872621734,
      "grad_norm": 0.9468057879828546,
      "learning_rate": 2.728975300342909e-06,
      "loss": 0.1349,
      "step": 5397
    },
    {
      "epoch": 0.49735108490348734,
      "grad_norm": 0.942572127556101,
      "learning_rate": 2.7282167087631234e-06,
      "loss": 0.1487,
      "step": 5398
    },
    {
      "epoch": 0.49744322108075734,
      "grad_norm": 0.9139656094622787,
      "learning_rate": 2.7274580959933933e-06,
      "loss": 0.1452,
      "step": 5399
    },
    {
      "epoch": 0.4975353572580274,
      "grad_norm": 0.8875985109012006,
      "learning_rate": 2.726699462104154e-06,
      "loss": 0.1327,
      "step": 5400
    },
    {
      "epoch": 0.4976274934352974,
      "grad_norm": 0.9556085304450759,
      "learning_rate": 2.7259408071658456e-06,
      "loss": 0.1439,
      "step": 5401
    },
    {
      "epoch": 0.4977196296125674,
      "grad_norm": 1.0220365803003981,
      "learning_rate": 2.725182131248909e-06,
      "loss": 0.1409,
      "step": 5402
    },
    {
      "epoch": 0.4978117657898374,
      "grad_norm": 0.9580616109925933,
      "learning_rate": 2.724423434423787e-06,
      "loss": 0.1477,
      "step": 5403
    },
    {
      "epoch": 0.4979039019671074,
      "grad_norm": 0.9150371096328926,
      "learning_rate": 2.7236647167609246e-06,
      "loss": 0.1345,
      "step": 5404
    },
    {
      "epoch": 0.4979960381443774,
      "grad_norm": 0.9365443118478644,
      "learning_rate": 2.7229059783307703e-06,
      "loss": 0.1501,
      "step": 5405
    },
    {
      "epoch": 0.49808817432164737,
      "grad_norm": 0.9815020710994572,
      "learning_rate": 2.7221472192037707e-06,
      "loss": 0.1408,
      "step": 5406
    },
    {
      "epoch": 0.4981803104989174,
      "grad_norm": 0.980382203078079,
      "learning_rate": 2.721388439450379e-06,
      "loss": 0.1447,
      "step": 5407
    },
    {
      "epoch": 0.4982724466761874,
      "grad_norm": 0.9089049802957376,
      "learning_rate": 2.7206296391410457e-06,
      "loss": 0.1448,
      "step": 5408
    },
    {
      "epoch": 0.4983645828534574,
      "grad_norm": 0.946696566658947,
      "learning_rate": 2.7198708183462275e-06,
      "loss": 0.1367,
      "step": 5409
    },
    {
      "epoch": 0.4984567190307274,
      "grad_norm": 0.9457376021726533,
      "learning_rate": 2.71911197713638e-06,
      "loss": 0.1422,
      "step": 5410
    },
    {
      "epoch": 0.4985488552079974,
      "grad_norm": 0.8824387290721871,
      "learning_rate": 2.7183531155819607e-06,
      "loss": 0.1421,
      "step": 5411
    },
    {
      "epoch": 0.4986409913852674,
      "grad_norm": 0.8761674752566213,
      "learning_rate": 2.7175942337534326e-06,
      "loss": 0.1242,
      "step": 5412
    },
    {
      "epoch": 0.4987331275625374,
      "grad_norm": 0.9330486674418532,
      "learning_rate": 2.7168353317212565e-06,
      "loss": 0.1435,
      "step": 5413
    },
    {
      "epoch": 0.49882526373980746,
      "grad_norm": 0.9235257314844412,
      "learning_rate": 2.7160764095558954e-06,
      "loss": 0.1385,
      "step": 5414
    },
    {
      "epoch": 0.49891739991707745,
      "grad_norm": 0.9117658297669242,
      "learning_rate": 2.7153174673278174e-06,
      "loss": 0.139,
      "step": 5415
    },
    {
      "epoch": 0.49900953609434745,
      "grad_norm": 0.898645968059442,
      "learning_rate": 2.7145585051074893e-06,
      "loss": 0.1388,
      "step": 5416
    },
    {
      "epoch": 0.49910167227161745,
      "grad_norm": 0.961300262470349,
      "learning_rate": 2.7137995229653803e-06,
      "loss": 0.1457,
      "step": 5417
    },
    {
      "epoch": 0.49919380844888744,
      "grad_norm": 0.9290136686056124,
      "learning_rate": 2.7130405209719637e-06,
      "loss": 0.1307,
      "step": 5418
    },
    {
      "epoch": 0.49928594462615744,
      "grad_norm": 0.9459474842848344,
      "learning_rate": 2.7122814991977104e-06,
      "loss": 0.1405,
      "step": 5419
    },
    {
      "epoch": 0.4993780808034275,
      "grad_norm": 0.9241431470133946,
      "learning_rate": 2.711522457713098e-06,
      "loss": 0.1334,
      "step": 5420
    },
    {
      "epoch": 0.4994702169806975,
      "grad_norm": 0.9390340034093618,
      "learning_rate": 2.710763396588602e-06,
      "loss": 0.1307,
      "step": 5421
    },
    {
      "epoch": 0.4995623531579675,
      "grad_norm": 0.9981828709590618,
      "learning_rate": 2.7100043158947027e-06,
      "loss": 0.1405,
      "step": 5422
    },
    {
      "epoch": 0.4996544893352375,
      "grad_norm": 1.0367977847526075,
      "learning_rate": 2.7092452157018795e-06,
      "loss": 0.1536,
      "step": 5423
    },
    {
      "epoch": 0.4997466255125075,
      "grad_norm": 0.9040763895456477,
      "learning_rate": 2.708486096080616e-06,
      "loss": 0.1362,
      "step": 5424
    },
    {
      "epoch": 0.4998387616897775,
      "grad_norm": 0.9262236757461823,
      "learning_rate": 2.7077269571013947e-06,
      "loss": 0.1311,
      "step": 5425
    },
    {
      "epoch": 0.4999308978670475,
      "grad_norm": 0.9351274103164497,
      "learning_rate": 2.7069677988347048e-06,
      "loss": 0.1466,
      "step": 5426
    },
    {
      "epoch": 0.5000230340443175,
      "grad_norm": 1.010791283106135,
      "learning_rate": 2.7062086213510315e-06,
      "loss": 0.1392,
      "step": 5427
    },
    {
      "epoch": 0.5001151702215875,
      "grad_norm": 0.9800680940213662,
      "learning_rate": 2.705449424720866e-06,
      "loss": 0.1393,
      "step": 5428
    },
    {
      "epoch": 0.5002073063988575,
      "grad_norm": 0.9090025369824177,
      "learning_rate": 2.7046902090146986e-06,
      "loss": 0.1332,
      "step": 5429
    },
    {
      "epoch": 0.5002994425761275,
      "grad_norm": 0.8947906760810447,
      "learning_rate": 2.703930974303024e-06,
      "loss": 0.1257,
      "step": 5430
    },
    {
      "epoch": 0.5003915787533976,
      "grad_norm": 0.918143860684268,
      "learning_rate": 2.703171720656336e-06,
      "loss": 0.1377,
      "step": 5431
    },
    {
      "epoch": 0.5004837149306676,
      "grad_norm": 0.8565108609127393,
      "learning_rate": 2.7024124481451323e-06,
      "loss": 0.1342,
      "step": 5432
    },
    {
      "epoch": 0.5005758511079376,
      "grad_norm": 0.9133626021685404,
      "learning_rate": 2.701653156839911e-06,
      "loss": 0.1407,
      "step": 5433
    },
    {
      "epoch": 0.5006679872852076,
      "grad_norm": 0.909655782017589,
      "learning_rate": 2.700893846811172e-06,
      "loss": 0.1474,
      "step": 5434
    },
    {
      "epoch": 0.5007601234624776,
      "grad_norm": 0.9613093139686264,
      "learning_rate": 2.700134518129418e-06,
      "loss": 0.1483,
      "step": 5435
    },
    {
      "epoch": 0.5008522596397476,
      "grad_norm": 0.922464421682086,
      "learning_rate": 2.699375170865152e-06,
      "loss": 0.1331,
      "step": 5436
    },
    {
      "epoch": 0.5009443958170176,
      "grad_norm": 0.8953135114057215,
      "learning_rate": 2.6986158050888804e-06,
      "loss": 0.1182,
      "step": 5437
    },
    {
      "epoch": 0.5010365319942875,
      "grad_norm": 0.9488693208071233,
      "learning_rate": 2.6978564208711098e-06,
      "loss": 0.138,
      "step": 5438
    },
    {
      "epoch": 0.5011286681715575,
      "grad_norm": 0.9225635631092897,
      "learning_rate": 2.697097018282349e-06,
      "loss": 0.1225,
      "step": 5439
    },
    {
      "epoch": 0.5012208043488275,
      "grad_norm": 0.9820006390170077,
      "learning_rate": 2.6963375973931095e-06,
      "loss": 0.1456,
      "step": 5440
    },
    {
      "epoch": 0.5013129405260975,
      "grad_norm": 0.9329638194227488,
      "learning_rate": 2.6955781582739028e-06,
      "loss": 0.1421,
      "step": 5441
    },
    {
      "epoch": 0.5014050767033675,
      "grad_norm": 0.9709775019424565,
      "learning_rate": 2.6948187009952426e-06,
      "loss": 0.1469,
      "step": 5442
    },
    {
      "epoch": 0.5014972128806375,
      "grad_norm": 0.8940728365805736,
      "learning_rate": 2.6940592256276455e-06,
      "loss": 0.1338,
      "step": 5443
    },
    {
      "epoch": 0.5015893490579076,
      "grad_norm": 0.9582589895606664,
      "learning_rate": 2.6932997322416276e-06,
      "loss": 0.1418,
      "step": 5444
    },
    {
      "epoch": 0.5016814852351776,
      "grad_norm": 0.9555495425316477,
      "learning_rate": 2.6925402209077096e-06,
      "loss": 0.1493,
      "step": 5445
    },
    {
      "epoch": 0.5017736214124476,
      "grad_norm": 0.8761008247398887,
      "learning_rate": 2.6917806916964107e-06,
      "loss": 0.133,
      "step": 5446
    },
    {
      "epoch": 0.5018657575897176,
      "grad_norm": 0.8917513046762765,
      "learning_rate": 2.691021144678254e-06,
      "loss": 0.139,
      "step": 5447
    },
    {
      "epoch": 0.5019578937669876,
      "grad_norm": 0.9523258198573107,
      "learning_rate": 2.690261579923764e-06,
      "loss": 0.1487,
      "step": 5448
    },
    {
      "epoch": 0.5020500299442576,
      "grad_norm": 0.8774773911026075,
      "learning_rate": 2.689501997503466e-06,
      "loss": 0.138,
      "step": 5449
    },
    {
      "epoch": 0.5021421661215276,
      "grad_norm": 0.9107629385972631,
      "learning_rate": 2.688742397487887e-06,
      "loss": 0.1475,
      "step": 5450
    },
    {
      "epoch": 0.5022343022987976,
      "grad_norm": 0.8954104101874873,
      "learning_rate": 2.6879827799475557e-06,
      "loss": 0.1432,
      "step": 5451
    },
    {
      "epoch": 0.5023264384760676,
      "grad_norm": 0.951129195820262,
      "learning_rate": 2.6872231449530027e-06,
      "loss": 0.1456,
      "step": 5452
    },
    {
      "epoch": 0.5024185746533376,
      "grad_norm": 0.9334140124428768,
      "learning_rate": 2.686463492574761e-06,
      "loss": 0.1537,
      "step": 5453
    },
    {
      "epoch": 0.5025107108306076,
      "grad_norm": 0.9532927507320598,
      "learning_rate": 2.6857038228833644e-06,
      "loss": 0.1485,
      "step": 5454
    },
    {
      "epoch": 0.5026028470078776,
      "grad_norm": 0.8330878352738564,
      "learning_rate": 2.6849441359493474e-06,
      "loss": 0.1158,
      "step": 5455
    },
    {
      "epoch": 0.5026949831851476,
      "grad_norm": 0.9220034819099294,
      "learning_rate": 2.6841844318432476e-06,
      "loss": 0.1396,
      "step": 5456
    },
    {
      "epoch": 0.5027871193624176,
      "grad_norm": 0.9118685665157755,
      "learning_rate": 2.683424710635603e-06,
      "loss": 0.1284,
      "step": 5457
    },
    {
      "epoch": 0.5028792555396877,
      "grad_norm": 0.8654389397910528,
      "learning_rate": 2.682664972396955e-06,
      "loss": 0.1283,
      "step": 5458
    },
    {
      "epoch": 0.5029713917169577,
      "grad_norm": 0.884857390795016,
      "learning_rate": 2.6819052171978443e-06,
      "loss": 0.1394,
      "step": 5459
    },
    {
      "epoch": 0.5030635278942277,
      "grad_norm": 0.894611570661682,
      "learning_rate": 2.681145445108814e-06,
      "loss": 0.1299,
      "step": 5460
    },
    {
      "epoch": 0.5031556640714977,
      "grad_norm": 0.9548521712233243,
      "learning_rate": 2.6803856562004112e-06,
      "loss": 0.1446,
      "step": 5461
    },
    {
      "epoch": 0.5032478002487677,
      "grad_norm": 0.9100534643805418,
      "learning_rate": 2.6796258505431786e-06,
      "loss": 0.1287,
      "step": 5462
    },
    {
      "epoch": 0.5033399364260377,
      "grad_norm": 0.9261093041169923,
      "learning_rate": 2.6788660282076682e-06,
      "loss": 0.1397,
      "step": 5463
    },
    {
      "epoch": 0.5034320726033077,
      "grad_norm": 0.9255465069936254,
      "learning_rate": 2.6781061892644273e-06,
      "loss": 0.1333,
      "step": 5464
    },
    {
      "epoch": 0.5035242087805777,
      "grad_norm": 0.8713776146436004,
      "learning_rate": 2.6773463337840078e-06,
      "loss": 0.1392,
      "step": 5465
    },
    {
      "epoch": 0.5036163449578477,
      "grad_norm": 0.9341165883169124,
      "learning_rate": 2.676586461836962e-06,
      "loss": 0.1554,
      "step": 5466
    },
    {
      "epoch": 0.5037084811351177,
      "grad_norm": 0.8604949495271219,
      "learning_rate": 2.6758265734938444e-06,
      "loss": 0.1443,
      "step": 5467
    },
    {
      "epoch": 0.5038006173123877,
      "grad_norm": 0.8940670948046049,
      "learning_rate": 2.67506666882521e-06,
      "loss": 0.1383,
      "step": 5468
    },
    {
      "epoch": 0.5038927534896577,
      "grad_norm": 0.9117729021006208,
      "learning_rate": 2.6743067479016166e-06,
      "loss": 0.147,
      "step": 5469
    },
    {
      "epoch": 0.5039848896669277,
      "grad_norm": 0.909075371011717,
      "learning_rate": 2.673546810793623e-06,
      "loss": 0.1298,
      "step": 5470
    },
    {
      "epoch": 0.5040770258441978,
      "grad_norm": 0.9179808077227184,
      "learning_rate": 2.6727868575717893e-06,
      "loss": 0.1472,
      "step": 5471
    },
    {
      "epoch": 0.5041691620214678,
      "grad_norm": 0.9233613284382322,
      "learning_rate": 2.6720268883066773e-06,
      "loss": 0.1291,
      "step": 5472
    },
    {
      "epoch": 0.5042612981987378,
      "grad_norm": 0.9615650138923706,
      "learning_rate": 2.6712669030688503e-06,
      "loss": 0.1502,
      "step": 5473
    },
    {
      "epoch": 0.5043534343760078,
      "grad_norm": 0.9397692153613146,
      "learning_rate": 2.6705069019288733e-06,
      "loss": 0.1463,
      "step": 5474
    },
    {
      "epoch": 0.5044455705532778,
      "grad_norm": 1.0194186132210752,
      "learning_rate": 2.6697468849573114e-06,
      "loss": 0.1598,
      "step": 5475
    },
    {
      "epoch": 0.5045377067305478,
      "grad_norm": 0.9924246077269206,
      "learning_rate": 2.6689868522247334e-06,
      "loss": 0.1521,
      "step": 5476
    },
    {
      "epoch": 0.5046298429078178,
      "grad_norm": 0.9255855941944566,
      "learning_rate": 2.668226803801708e-06,
      "loss": 0.1385,
      "step": 5477
    },
    {
      "epoch": 0.5047219790850878,
      "grad_norm": 0.8860193614954378,
      "learning_rate": 2.6674667397588056e-06,
      "loss": 0.1335,
      "step": 5478
    },
    {
      "epoch": 0.5048141152623578,
      "grad_norm": 0.954126434296226,
      "learning_rate": 2.666706660166598e-06,
      "loss": 0.1539,
      "step": 5479
    },
    {
      "epoch": 0.5049062514396278,
      "grad_norm": 0.9327823851336733,
      "learning_rate": 2.66594656509566e-06,
      "loss": 0.1436,
      "step": 5480
    },
    {
      "epoch": 0.5049983876168977,
      "grad_norm": 0.9487423920612162,
      "learning_rate": 2.665186454616565e-06,
      "loss": 0.131,
      "step": 5481
    },
    {
      "epoch": 0.5050905237941677,
      "grad_norm": 0.9131599113448318,
      "learning_rate": 2.66442632879989e-06,
      "loss": 0.1465,
      "step": 5482
    },
    {
      "epoch": 0.5051826599714377,
      "grad_norm": 0.8724986437513693,
      "learning_rate": 2.663666187716213e-06,
      "loss": 0.1257,
      "step": 5483
    },
    {
      "epoch": 0.5052747961487077,
      "grad_norm": 0.910959624134114,
      "learning_rate": 2.662906031436112e-06,
      "loss": 0.1379,
      "step": 5484
    },
    {
      "epoch": 0.5053669323259778,
      "grad_norm": 0.9769423030222774,
      "learning_rate": 2.662145860030169e-06,
      "loss": 0.1457,
      "step": 5485
    },
    {
      "epoch": 0.5054590685032478,
      "grad_norm": 0.9195321853537671,
      "learning_rate": 2.6613856735689656e-06,
      "loss": 0.1315,
      "step": 5486
    },
    {
      "epoch": 0.5055512046805178,
      "grad_norm": 0.8813823710242042,
      "learning_rate": 2.6606254721230845e-06,
      "loss": 0.1325,
      "step": 5487
    },
    {
      "epoch": 0.5056433408577878,
      "grad_norm": 0.9379783302984157,
      "learning_rate": 2.6598652557631123e-06,
      "loss": 0.1525,
      "step": 5488
    },
    {
      "epoch": 0.5057354770350578,
      "grad_norm": 0.9048060270273836,
      "learning_rate": 2.6591050245596322e-06,
      "loss": 0.1298,
      "step": 5489
    },
    {
      "epoch": 0.5058276132123278,
      "grad_norm": 0.9650276864334607,
      "learning_rate": 2.6583447785832343e-06,
      "loss": 0.1389,
      "step": 5490
    },
    {
      "epoch": 0.5059197493895978,
      "grad_norm": 0.8938180533740254,
      "learning_rate": 2.657584517904507e-06,
      "loss": 0.13,
      "step": 5491
    },
    {
      "epoch": 0.5060118855668678,
      "grad_norm": 0.9550953969484592,
      "learning_rate": 2.65682424259404e-06,
      "loss": 0.1443,
      "step": 5492
    },
    {
      "epoch": 0.5061040217441378,
      "grad_norm": 0.9048846266087659,
      "learning_rate": 2.6560639527224246e-06,
      "loss": 0.1368,
      "step": 5493
    },
    {
      "epoch": 0.5061961579214078,
      "grad_norm": 0.9388284521232664,
      "learning_rate": 2.6553036483602553e-06,
      "loss": 0.1528,
      "step": 5494
    },
    {
      "epoch": 0.5062882940986778,
      "grad_norm": 0.9094867358427684,
      "learning_rate": 2.6545433295781242e-06,
      "loss": 0.1417,
      "step": 5495
    },
    {
      "epoch": 0.5063804302759478,
      "grad_norm": 0.917149402691518,
      "learning_rate": 2.65378299644663e-06,
      "loss": 0.1406,
      "step": 5496
    },
    {
      "epoch": 0.5064725664532178,
      "grad_norm": 0.8829207817590017,
      "learning_rate": 2.653022649036367e-06,
      "loss": 0.1236,
      "step": 5497
    },
    {
      "epoch": 0.5065647026304878,
      "grad_norm": 0.8893997459270351,
      "learning_rate": 2.652262287417935e-06,
      "loss": 0.1372,
      "step": 5498
    },
    {
      "epoch": 0.5066568388077579,
      "grad_norm": 0.9035837066275488,
      "learning_rate": 2.6515019116619327e-06,
      "loss": 0.1374,
      "step": 5499
    },
    {
      "epoch": 0.5067489749850279,
      "grad_norm": 0.8749623890705591,
      "learning_rate": 2.6507415218389616e-06,
      "loss": 0.1344,
      "step": 5500
    },
    {
      "epoch": 0.5067489749850279,
      "eval_loss": 0.13985012471675873,
      "eval_runtime": 299.8724,
      "eval_samples_per_second": 23.4,
      "eval_steps_per_second": 2.928,
      "step": 5500
    },
    {
      "epoch": 0.5068411111622979,
      "grad_norm": 0.8972053443275787,
      "learning_rate": 2.649981118019625e-06,
      "loss": 0.1325,
      "step": 5501
    },
    {
      "epoch": 0.5069332473395679,
      "grad_norm": 0.9610764579191868,
      "learning_rate": 2.649220700274524e-06,
      "loss": 0.1421,
      "step": 5502
    },
    {
      "epoch": 0.5070253835168379,
      "grad_norm": 0.9421384788983665,
      "learning_rate": 2.648460268674266e-06,
      "loss": 0.1502,
      "step": 5503
    },
    {
      "epoch": 0.5071175196941079,
      "grad_norm": 0.8800435296498718,
      "learning_rate": 2.6476998232894557e-06,
      "loss": 0.1352,
      "step": 5504
    },
    {
      "epoch": 0.5072096558713779,
      "grad_norm": 0.917670590492245,
      "learning_rate": 2.646939364190701e-06,
      "loss": 0.1425,
      "step": 5505
    },
    {
      "epoch": 0.5073017920486479,
      "grad_norm": 0.9647345651378058,
      "learning_rate": 2.6461788914486098e-06,
      "loss": 0.1458,
      "step": 5506
    },
    {
      "epoch": 0.5073939282259179,
      "grad_norm": 0.9404490001911155,
      "learning_rate": 2.6454184051337946e-06,
      "loss": 0.1581,
      "step": 5507
    },
    {
      "epoch": 0.5074860644031879,
      "grad_norm": 0.8676278066012653,
      "learning_rate": 2.644657905316863e-06,
      "loss": 0.1316,
      "step": 5508
    },
    {
      "epoch": 0.5075782005804579,
      "grad_norm": 0.9111223393555133,
      "learning_rate": 2.64389739206843e-06,
      "loss": 0.1482,
      "step": 5509
    },
    {
      "epoch": 0.5076703367577279,
      "grad_norm": 0.9475807706333695,
      "learning_rate": 2.6431368654591087e-06,
      "loss": 0.1519,
      "step": 5510
    },
    {
      "epoch": 0.5077624729349979,
      "grad_norm": 0.9054563411368267,
      "learning_rate": 2.6423763255595143e-06,
      "loss": 0.146,
      "step": 5511
    },
    {
      "epoch": 0.507854609112268,
      "grad_norm": 0.9368488337401683,
      "learning_rate": 2.6416157724402626e-06,
      "loss": 0.1381,
      "step": 5512
    },
    {
      "epoch": 0.507946745289538,
      "grad_norm": 0.9040531100606465,
      "learning_rate": 2.640855206171971e-06,
      "loss": 0.1371,
      "step": 5513
    },
    {
      "epoch": 0.508038881466808,
      "grad_norm": 0.9249124192962886,
      "learning_rate": 2.640094626825258e-06,
      "loss": 0.1355,
      "step": 5514
    },
    {
      "epoch": 0.508131017644078,
      "grad_norm": 0.9403553647015074,
      "learning_rate": 2.6393340344707448e-06,
      "loss": 0.1459,
      "step": 5515
    },
    {
      "epoch": 0.508223153821348,
      "grad_norm": 0.9221283008538266,
      "learning_rate": 2.6385734291790506e-06,
      "loss": 0.1307,
      "step": 5516
    },
    {
      "epoch": 0.508315289998618,
      "grad_norm": 0.9181336545301192,
      "learning_rate": 2.637812811020799e-06,
      "loss": 0.1428,
      "step": 5517
    },
    {
      "epoch": 0.508407426175888,
      "grad_norm": 0.9060067197014477,
      "learning_rate": 2.6370521800666127e-06,
      "loss": 0.1323,
      "step": 5518
    },
    {
      "epoch": 0.508499562353158,
      "grad_norm": 0.8997448706666332,
      "learning_rate": 2.636291536387117e-06,
      "loss": 0.136,
      "step": 5519
    },
    {
      "epoch": 0.508591698530428,
      "grad_norm": 0.905826181470098,
      "learning_rate": 2.6355308800529377e-06,
      "loss": 0.1356,
      "step": 5520
    },
    {
      "epoch": 0.508683834707698,
      "grad_norm": 0.9173879086665507,
      "learning_rate": 2.6347702111347017e-06,
      "loss": 0.144,
      "step": 5521
    },
    {
      "epoch": 0.508775970884968,
      "grad_norm": 0.9378280218523944,
      "learning_rate": 2.634009529703036e-06,
      "loss": 0.1344,
      "step": 5522
    },
    {
      "epoch": 0.508868107062238,
      "grad_norm": 0.8970030378074605,
      "learning_rate": 2.633248835828573e-06,
      "loss": 0.1177,
      "step": 5523
    },
    {
      "epoch": 0.508960243239508,
      "grad_norm": 0.9873589104671638,
      "learning_rate": 2.6324881295819394e-06,
      "loss": 0.1525,
      "step": 5524
    },
    {
      "epoch": 0.509052379416778,
      "grad_norm": 0.8669878479759109,
      "learning_rate": 2.6317274110337692e-06,
      "loss": 0.1277,
      "step": 5525
    },
    {
      "epoch": 0.509144515594048,
      "grad_norm": 0.9081281065004965,
      "learning_rate": 2.6309666802546953e-06,
      "loss": 0.1294,
      "step": 5526
    },
    {
      "epoch": 0.509236651771318,
      "grad_norm": 0.9014846717124748,
      "learning_rate": 2.63020593731535e-06,
      "loss": 0.1291,
      "step": 5527
    },
    {
      "epoch": 0.509328787948588,
      "grad_norm": 0.9802281228815567,
      "learning_rate": 2.629445182286371e-06,
      "loss": 0.145,
      "step": 5528
    },
    {
      "epoch": 0.509420924125858,
      "grad_norm": 0.9134326612787356,
      "learning_rate": 2.6286844152383913e-06,
      "loss": 0.1384,
      "step": 5529
    },
    {
      "epoch": 0.509513060303128,
      "grad_norm": 0.9077152752905923,
      "learning_rate": 2.6279236362420503e-06,
      "loss": 0.1403,
      "step": 5530
    },
    {
      "epoch": 0.509605196480398,
      "grad_norm": 0.8849225997974401,
      "learning_rate": 2.6271628453679865e-06,
      "loss": 0.1391,
      "step": 5531
    },
    {
      "epoch": 0.509697332657668,
      "grad_norm": 0.9200720102028345,
      "learning_rate": 2.6264020426868382e-06,
      "loss": 0.1477,
      "step": 5532
    },
    {
      "epoch": 0.509789468834938,
      "grad_norm": 0.8476479866854897,
      "learning_rate": 2.6256412282692467e-06,
      "loss": 0.1334,
      "step": 5533
    },
    {
      "epoch": 0.509881605012208,
      "grad_norm": 0.8497457959986047,
      "learning_rate": 2.6248804021858538e-06,
      "loss": 0.1312,
      "step": 5534
    },
    {
      "epoch": 0.509973741189478,
      "grad_norm": 0.9003692531477517,
      "learning_rate": 2.624119564507302e-06,
      "loss": 0.139,
      "step": 5535
    },
    {
      "epoch": 0.510065877366748,
      "grad_norm": 0.8630292481575238,
      "learning_rate": 2.6233587153042356e-06,
      "loss": 0.1279,
      "step": 5536
    },
    {
      "epoch": 0.510158013544018,
      "grad_norm": 0.9052917352915406,
      "learning_rate": 2.6225978546472985e-06,
      "loss": 0.1267,
      "step": 5537
    },
    {
      "epoch": 0.510250149721288,
      "grad_norm": 0.8627747624985834,
      "learning_rate": 2.621836982607138e-06,
      "loss": 0.133,
      "step": 5538
    },
    {
      "epoch": 0.5103422858985581,
      "grad_norm": 0.9549906006503575,
      "learning_rate": 2.6210760992544004e-06,
      "loss": 0.1357,
      "step": 5539
    },
    {
      "epoch": 0.5104344220758281,
      "grad_norm": 0.9526451014635038,
      "learning_rate": 2.6203152046597343e-06,
      "loss": 0.1423,
      "step": 5540
    },
    {
      "epoch": 0.5105265582530981,
      "grad_norm": 0.861004252901416,
      "learning_rate": 2.6195542988937882e-06,
      "loss": 0.1395,
      "step": 5541
    },
    {
      "epoch": 0.5106186944303681,
      "grad_norm": 0.9027357450630178,
      "learning_rate": 2.6187933820272128e-06,
      "loss": 0.142,
      "step": 5542
    },
    {
      "epoch": 0.5107108306076381,
      "grad_norm": 0.891785231864971,
      "learning_rate": 2.6180324541306577e-06,
      "loss": 0.1385,
      "step": 5543
    },
    {
      "epoch": 0.5108029667849081,
      "grad_norm": 0.8647280432780181,
      "learning_rate": 2.6172715152747784e-06,
      "loss": 0.129,
      "step": 5544
    },
    {
      "epoch": 0.5108951029621781,
      "grad_norm": 0.8875513460819612,
      "learning_rate": 2.6165105655302252e-06,
      "loss": 0.1359,
      "step": 5545
    },
    {
      "epoch": 0.5109872391394481,
      "grad_norm": 0.9032613573115922,
      "learning_rate": 2.615749604967654e-06,
      "loss": 0.1406,
      "step": 5546
    },
    {
      "epoch": 0.5110793753167181,
      "grad_norm": 0.8882411960902156,
      "learning_rate": 2.614988633657719e-06,
      "loss": 0.1361,
      "step": 5547
    },
    {
      "epoch": 0.5111715114939881,
      "grad_norm": 0.867215069051396,
      "learning_rate": 2.614227651671078e-06,
      "loss": 0.1335,
      "step": 5548
    },
    {
      "epoch": 0.5112636476712581,
      "grad_norm": 0.8963000008178789,
      "learning_rate": 2.6134666590783856e-06,
      "loss": 0.1453,
      "step": 5549
    },
    {
      "epoch": 0.5113557838485281,
      "grad_norm": 0.9627517202967072,
      "learning_rate": 2.6127056559503035e-06,
      "loss": 0.1392,
      "step": 5550
    },
    {
      "epoch": 0.5114479200257981,
      "grad_norm": 0.898582511766749,
      "learning_rate": 2.611944642357488e-06,
      "loss": 0.1307,
      "step": 5551
    },
    {
      "epoch": 0.5115400562030681,
      "grad_norm": 0.8994720606560878,
      "learning_rate": 2.611183618370601e-06,
      "loss": 0.1424,
      "step": 5552
    },
    {
      "epoch": 0.5116321923803382,
      "grad_norm": 0.9189882349593465,
      "learning_rate": 2.6104225840603026e-06,
      "loss": 0.1447,
      "step": 5553
    },
    {
      "epoch": 0.5117243285576082,
      "grad_norm": 0.8508772095700861,
      "learning_rate": 2.609661539497255e-06,
      "loss": 0.1294,
      "step": 5554
    },
    {
      "epoch": 0.5118164647348782,
      "grad_norm": 0.9372166101365486,
      "learning_rate": 2.6089004847521227e-06,
      "loss": 0.1384,
      "step": 5555
    },
    {
      "epoch": 0.5119086009121482,
      "grad_norm": 0.9239599944745616,
      "learning_rate": 2.608139419895568e-06,
      "loss": 0.1349,
      "step": 5556
    },
    {
      "epoch": 0.5120007370894182,
      "grad_norm": 0.9349790401903082,
      "learning_rate": 2.6073783449982563e-06,
      "loss": 0.138,
      "step": 5557
    },
    {
      "epoch": 0.5120928732666882,
      "grad_norm": 0.8800466157716698,
      "learning_rate": 2.6066172601308544e-06,
      "loss": 0.1302,
      "step": 5558
    },
    {
      "epoch": 0.5121850094439582,
      "grad_norm": 0.876448123482639,
      "learning_rate": 2.605856165364028e-06,
      "loss": 0.1362,
      "step": 5559
    },
    {
      "epoch": 0.5122771456212282,
      "grad_norm": 0.8427611964166464,
      "learning_rate": 2.6050950607684454e-06,
      "loss": 0.1289,
      "step": 5560
    },
    {
      "epoch": 0.5123692817984982,
      "grad_norm": 0.9047573143154314,
      "learning_rate": 2.6043339464147754e-06,
      "loss": 0.1369,
      "step": 5561
    },
    {
      "epoch": 0.5124614179757682,
      "grad_norm": 0.9495280124543655,
      "learning_rate": 2.603572822373686e-06,
      "loss": 0.1531,
      "step": 5562
    },
    {
      "epoch": 0.5125535541530382,
      "grad_norm": 0.9881092237552044,
      "learning_rate": 2.6028116887158503e-06,
      "loss": 0.1504,
      "step": 5563
    },
    {
      "epoch": 0.5126456903303082,
      "grad_norm": 0.8214010445612134,
      "learning_rate": 2.6020505455119375e-06,
      "loss": 0.1276,
      "step": 5564
    },
    {
      "epoch": 0.5127378265075782,
      "grad_norm": 0.8555404360624906,
      "learning_rate": 2.601289392832621e-06,
      "loss": 0.1335,
      "step": 5565
    },
    {
      "epoch": 0.5128299626848482,
      "grad_norm": 0.8853452441226398,
      "learning_rate": 2.6005282307485735e-06,
      "loss": 0.1249,
      "step": 5566
    },
    {
      "epoch": 0.5129220988621183,
      "grad_norm": 0.8881684098601418,
      "learning_rate": 2.5997670593304688e-06,
      "loss": 0.1383,
      "step": 5567
    },
    {
      "epoch": 0.5130142350393883,
      "grad_norm": 0.9486217169656922,
      "learning_rate": 2.5990058786489818e-06,
      "loss": 0.1339,
      "step": 5568
    },
    {
      "epoch": 0.5131063712166583,
      "grad_norm": 0.8598186957614298,
      "learning_rate": 2.5982446887747885e-06,
      "loss": 0.125,
      "step": 5569
    },
    {
      "epoch": 0.5131985073939282,
      "grad_norm": 0.9382037989178497,
      "learning_rate": 2.5974834897785646e-06,
      "loss": 0.141,
      "step": 5570
    },
    {
      "epoch": 0.5132906435711982,
      "grad_norm": 0.8555476200376073,
      "learning_rate": 2.5967222817309893e-06,
      "loss": 0.1276,
      "step": 5571
    },
    {
      "epoch": 0.5133827797484682,
      "grad_norm": 0.928756828249199,
      "learning_rate": 2.5959610647027388e-06,
      "loss": 0.1456,
      "step": 5572
    },
    {
      "epoch": 0.5134749159257382,
      "grad_norm": 0.9146043332944963,
      "learning_rate": 2.595199838764493e-06,
      "loss": 0.1424,
      "step": 5573
    },
    {
      "epoch": 0.5135670521030082,
      "grad_norm": 0.8719255281170171,
      "learning_rate": 2.5944386039869328e-06,
      "loss": 0.127,
      "step": 5574
    },
    {
      "epoch": 0.5136591882802782,
      "grad_norm": 0.9891327414194487,
      "learning_rate": 2.593677360440738e-06,
      "loss": 0.1535,
      "step": 5575
    },
    {
      "epoch": 0.5137513244575482,
      "grad_norm": 0.8941008360456992,
      "learning_rate": 2.5929161081965898e-06,
      "loss": 0.1252,
      "step": 5576
    },
    {
      "epoch": 0.5138434606348182,
      "grad_norm": 0.9390926757674907,
      "learning_rate": 2.592154847325171e-06,
      "loss": 0.1411,
      "step": 5577
    },
    {
      "epoch": 0.5139355968120882,
      "grad_norm": 0.8699348061966624,
      "learning_rate": 2.5913935778971644e-06,
      "loss": 0.1363,
      "step": 5578
    },
    {
      "epoch": 0.5140277329893582,
      "grad_norm": 0.9583723014793004,
      "learning_rate": 2.590632299983255e-06,
      "loss": 0.1401,
      "step": 5579
    },
    {
      "epoch": 0.5141198691666283,
      "grad_norm": 0.9182617054191736,
      "learning_rate": 2.589871013654126e-06,
      "loss": 0.134,
      "step": 5580
    },
    {
      "epoch": 0.5142120053438983,
      "grad_norm": 0.9611058899311491,
      "learning_rate": 2.589109718980464e-06,
      "loss": 0.1469,
      "step": 5581
    },
    {
      "epoch": 0.5143041415211683,
      "grad_norm": 0.8705141047969384,
      "learning_rate": 2.5883484160329552e-06,
      "loss": 0.1255,
      "step": 5582
    },
    {
      "epoch": 0.5143962776984383,
      "grad_norm": 0.8586967447912096,
      "learning_rate": 2.587587104882286e-06,
      "loss": 0.14,
      "step": 5583
    },
    {
      "epoch": 0.5144884138757083,
      "grad_norm": 0.9372494805713448,
      "learning_rate": 2.586825785599145e-06,
      "loss": 0.1277,
      "step": 5584
    },
    {
      "epoch": 0.5145805500529783,
      "grad_norm": 0.9599321610073678,
      "learning_rate": 2.5860644582542206e-06,
      "loss": 0.1567,
      "step": 5585
    },
    {
      "epoch": 0.5146726862302483,
      "grad_norm": 0.9015636760266527,
      "learning_rate": 2.5853031229182017e-06,
      "loss": 0.1218,
      "step": 5586
    },
    {
      "epoch": 0.5147648224075183,
      "grad_norm": 0.8916433289597714,
      "learning_rate": 2.584541779661779e-06,
      "loss": 0.1325,
      "step": 5587
    },
    {
      "epoch": 0.5148569585847883,
      "grad_norm": 0.8773087815609789,
      "learning_rate": 2.583780428555643e-06,
      "loss": 0.1309,
      "step": 5588
    },
    {
      "epoch": 0.5149490947620583,
      "grad_norm": 0.8559951677078586,
      "learning_rate": 2.5830190696704843e-06,
      "loss": 0.1328,
      "step": 5589
    },
    {
      "epoch": 0.5150412309393283,
      "grad_norm": 0.919260554053468,
      "learning_rate": 2.5822577030769972e-06,
      "loss": 0.1353,
      "step": 5590
    },
    {
      "epoch": 0.5151333671165983,
      "grad_norm": 0.9129424240959548,
      "learning_rate": 2.581496328845873e-06,
      "loss": 0.1382,
      "step": 5591
    },
    {
      "epoch": 0.5152255032938683,
      "grad_norm": 0.9485496218586936,
      "learning_rate": 2.580734947047806e-06,
      "loss": 0.1294,
      "step": 5592
    },
    {
      "epoch": 0.5153176394711383,
      "grad_norm": 1.0463560758101833,
      "learning_rate": 2.57997355775349e-06,
      "loss": 0.1582,
      "step": 5593
    },
    {
      "epoch": 0.5154097756484084,
      "grad_norm": 0.9828298858039358,
      "learning_rate": 2.5792121610336215e-06,
      "loss": 0.138,
      "step": 5594
    },
    {
      "epoch": 0.5155019118256784,
      "grad_norm": 0.8865690987054157,
      "learning_rate": 2.5784507569588947e-06,
      "loss": 0.133,
      "step": 5595
    },
    {
      "epoch": 0.5155940480029484,
      "grad_norm": 0.948753054611248,
      "learning_rate": 2.577689345600007e-06,
      "loss": 0.1398,
      "step": 5596
    },
    {
      "epoch": 0.5156861841802184,
      "grad_norm": 0.8963248656506673,
      "learning_rate": 2.5769279270276544e-06,
      "loss": 0.1359,
      "step": 5597
    },
    {
      "epoch": 0.5157783203574884,
      "grad_norm": 0.8633984849791445,
      "learning_rate": 2.5761665013125364e-06,
      "loss": 0.1266,
      "step": 5598
    },
    {
      "epoch": 0.5158704565347584,
      "grad_norm": 0.9111635283757924,
      "learning_rate": 2.5754050685253503e-06,
      "loss": 0.1495,
      "step": 5599
    },
    {
      "epoch": 0.5159625927120284,
      "grad_norm": 0.8888987005242416,
      "learning_rate": 2.5746436287367956e-06,
      "loss": 0.1411,
      "step": 5600
    },
    {
      "epoch": 0.5160547288892984,
      "grad_norm": 0.9027874762523549,
      "learning_rate": 2.5738821820175713e-06,
      "loss": 0.1462,
      "step": 5601
    },
    {
      "epoch": 0.5161468650665684,
      "grad_norm": 0.8936351680734343,
      "learning_rate": 2.573120728438379e-06,
      "loss": 0.1567,
      "step": 5602
    },
    {
      "epoch": 0.5162390012438384,
      "grad_norm": 0.823163896670656,
      "learning_rate": 2.5723592680699194e-06,
      "loss": 0.1141,
      "step": 5603
    },
    {
      "epoch": 0.5163311374211084,
      "grad_norm": 0.8578357763989864,
      "learning_rate": 2.5715978009828934e-06,
      "loss": 0.1252,
      "step": 5604
    },
    {
      "epoch": 0.5164232735983784,
      "grad_norm": 0.9347199730746312,
      "learning_rate": 2.5708363272480034e-06,
      "loss": 0.1471,
      "step": 5605
    },
    {
      "epoch": 0.5165154097756484,
      "grad_norm": 0.9230560472785323,
      "learning_rate": 2.5700748469359542e-06,
      "loss": 0.1462,
      "step": 5606
    },
    {
      "epoch": 0.5166075459529185,
      "grad_norm": 0.9146428934847072,
      "learning_rate": 2.569313360117447e-06,
      "loss": 0.1435,
      "step": 5607
    },
    {
      "epoch": 0.5166996821301885,
      "grad_norm": 0.8883033669285219,
      "learning_rate": 2.568551866863187e-06,
      "loss": 0.1342,
      "step": 5608
    },
    {
      "epoch": 0.5167918183074585,
      "grad_norm": 0.8902566289498707,
      "learning_rate": 2.567790367243879e-06,
      "loss": 0.1358,
      "step": 5609
    },
    {
      "epoch": 0.5168839544847285,
      "grad_norm": 0.884923794423157,
      "learning_rate": 2.5670288613302278e-06,
      "loss": 0.1363,
      "step": 5610
    },
    {
      "epoch": 0.5169760906619985,
      "grad_norm": 0.8595484693799914,
      "learning_rate": 2.56626734919294e-06,
      "loss": 0.131,
      "step": 5611
    },
    {
      "epoch": 0.5170682268392685,
      "grad_norm": 0.8881898408502089,
      "learning_rate": 2.5655058309027216e-06,
      "loss": 0.133,
      "step": 5612
    },
    {
      "epoch": 0.5171603630165384,
      "grad_norm": 0.9055171174486542,
      "learning_rate": 2.5647443065302797e-06,
      "loss": 0.1337,
      "step": 5613
    },
    {
      "epoch": 0.5172524991938084,
      "grad_norm": 0.8982698558938885,
      "learning_rate": 2.5639827761463217e-06,
      "loss": 0.1385,
      "step": 5614
    },
    {
      "epoch": 0.5173446353710784,
      "grad_norm": 0.8439364034167672,
      "learning_rate": 2.5632212398215563e-06,
      "loss": 0.1277,
      "step": 5615
    },
    {
      "epoch": 0.5174367715483484,
      "grad_norm": 0.9053369863573947,
      "learning_rate": 2.562459697626692e-06,
      "loss": 0.1322,
      "step": 5616
    },
    {
      "epoch": 0.5175289077256184,
      "grad_norm": 0.9414286983524403,
      "learning_rate": 2.561698149632438e-06,
      "loss": 0.1425,
      "step": 5617
    },
    {
      "epoch": 0.5176210439028884,
      "grad_norm": 0.8600323617972152,
      "learning_rate": 2.560936595909504e-06,
      "loss": 0.1314,
      "step": 5618
    },
    {
      "epoch": 0.5177131800801584,
      "grad_norm": 0.9022890230620475,
      "learning_rate": 2.560175036528601e-06,
      "loss": 0.13,
      "step": 5619
    },
    {
      "epoch": 0.5178053162574284,
      "grad_norm": 0.8822823466002423,
      "learning_rate": 2.5594134715604384e-06,
      "loss": 0.136,
      "step": 5620
    },
    {
      "epoch": 0.5178974524346985,
      "grad_norm": 0.8770079594534396,
      "learning_rate": 2.5586519010757295e-06,
      "loss": 0.1416,
      "step": 5621
    },
    {
      "epoch": 0.5179895886119685,
      "grad_norm": 0.9227396154729606,
      "learning_rate": 2.557890325145185e-06,
      "loss": 0.1449,
      "step": 5622
    },
    {
      "epoch": 0.5180817247892385,
      "grad_norm": 0.8965913173018901,
      "learning_rate": 2.5571287438395175e-06,
      "loss": 0.1402,
      "step": 5623
    },
    {
      "epoch": 0.5181738609665085,
      "grad_norm": 0.8834294157700694,
      "learning_rate": 2.5563671572294396e-06,
      "loss": 0.1331,
      "step": 5624
    },
    {
      "epoch": 0.5182659971437785,
      "grad_norm": 0.898106091348643,
      "learning_rate": 2.5556055653856667e-06,
      "loss": 0.1401,
      "step": 5625
    },
    {
      "epoch": 0.5183581333210485,
      "grad_norm": 0.8886607871965505,
      "learning_rate": 2.554843968378909e-06,
      "loss": 0.1356,
      "step": 5626
    },
    {
      "epoch": 0.5184502694983185,
      "grad_norm": 0.87036466401323,
      "learning_rate": 2.5540823662798843e-06,
      "loss": 0.1291,
      "step": 5627
    },
    {
      "epoch": 0.5185424056755885,
      "grad_norm": 0.996280569292839,
      "learning_rate": 2.553320759159305e-06,
      "loss": 0.1457,
      "step": 5628
    },
    {
      "epoch": 0.5186345418528585,
      "grad_norm": 0.8837610420412856,
      "learning_rate": 2.5525591470878886e-06,
      "loss": 0.1322,
      "step": 5629
    },
    {
      "epoch": 0.5187266780301285,
      "grad_norm": 0.8816532288664364,
      "learning_rate": 2.551797530136349e-06,
      "loss": 0.1291,
      "step": 5630
    },
    {
      "epoch": 0.5188188142073985,
      "grad_norm": 1.01580389465096,
      "learning_rate": 2.5510359083754038e-06,
      "loss": 0.138,
      "step": 5631
    },
    {
      "epoch": 0.5189109503846685,
      "grad_norm": 0.9453562490091288,
      "learning_rate": 2.5502742818757683e-06,
      "loss": 0.1348,
      "step": 5632
    },
    {
      "epoch": 0.5190030865619385,
      "grad_norm": 0.9264318284476458,
      "learning_rate": 2.549512650708161e-06,
      "loss": 0.1373,
      "step": 5633
    },
    {
      "epoch": 0.5190952227392085,
      "grad_norm": 0.9397210232263981,
      "learning_rate": 2.5487510149432974e-06,
      "loss": 0.1372,
      "step": 5634
    },
    {
      "epoch": 0.5191873589164786,
      "grad_norm": 0.917269244252231,
      "learning_rate": 2.547989374651898e-06,
      "loss": 0.1362,
      "step": 5635
    },
    {
      "epoch": 0.5192794950937486,
      "grad_norm": 0.9624687169062707,
      "learning_rate": 2.54722772990468e-06,
      "loss": 0.1312,
      "step": 5636
    },
    {
      "epoch": 0.5193716312710186,
      "grad_norm": 0.8989884615305049,
      "learning_rate": 2.546466080772362e-06,
      "loss": 0.1393,
      "step": 5637
    },
    {
      "epoch": 0.5194637674482886,
      "grad_norm": 0.8504854986989404,
      "learning_rate": 2.5457044273256635e-06,
      "loss": 0.1419,
      "step": 5638
    },
    {
      "epoch": 0.5195559036255586,
      "grad_norm": 0.9181894072822718,
      "learning_rate": 2.544942769635304e-06,
      "loss": 0.1371,
      "step": 5639
    },
    {
      "epoch": 0.5196480398028286,
      "grad_norm": 0.9676061229686893,
      "learning_rate": 2.544181107772003e-06,
      "loss": 0.1416,
      "step": 5640
    },
    {
      "epoch": 0.5197401759800986,
      "grad_norm": 0.8791670761476063,
      "learning_rate": 2.543419441806482e-06,
      "loss": 0.1418,
      "step": 5641
    },
    {
      "epoch": 0.5198323121573686,
      "grad_norm": 0.8491594494899254,
      "learning_rate": 2.5426577718094607e-06,
      "loss": 0.1266,
      "step": 5642
    },
    {
      "epoch": 0.5199244483346386,
      "grad_norm": 1.0354163929555338,
      "learning_rate": 2.541896097851661e-06,
      "loss": 0.1409,
      "step": 5643
    },
    {
      "epoch": 0.5200165845119086,
      "grad_norm": 0.9635321671316454,
      "learning_rate": 2.541134420003804e-06,
      "loss": 0.1365,
      "step": 5644
    },
    {
      "epoch": 0.5201087206891786,
      "grad_norm": 1.031766808189202,
      "learning_rate": 2.5403727383366116e-06,
      "loss": 0.1383,
      "step": 5645
    },
    {
      "epoch": 0.5202008568664486,
      "grad_norm": 0.8389665943046382,
      "learning_rate": 2.5396110529208066e-06,
      "loss": 0.1141,
      "step": 5646
    },
    {
      "epoch": 0.5202929930437186,
      "grad_norm": 0.9395740207321802,
      "learning_rate": 2.53884936382711e-06,
      "loss": 0.1369,
      "step": 5647
    },
    {
      "epoch": 0.5203851292209887,
      "grad_norm": 0.9084013073081592,
      "learning_rate": 2.538087671126247e-06,
      "loss": 0.1406,
      "step": 5648
    },
    {
      "epoch": 0.5204772653982587,
      "grad_norm": 0.9266640118622809,
      "learning_rate": 2.537325974888939e-06,
      "loss": 0.1386,
      "step": 5649
    },
    {
      "epoch": 0.5205694015755287,
      "grad_norm": 1.0315770750585445,
      "learning_rate": 2.5365642751859103e-06,
      "loss": 0.1348,
      "step": 5650
    },
    {
      "epoch": 0.5206615377527987,
      "grad_norm": 0.9054680007028475,
      "learning_rate": 2.5358025720878847e-06,
      "loss": 0.135,
      "step": 5651
    },
    {
      "epoch": 0.5207536739300687,
      "grad_norm": 0.9740550288932809,
      "learning_rate": 2.535040865665587e-06,
      "loss": 0.1449,
      "step": 5652
    },
    {
      "epoch": 0.5208458101073387,
      "grad_norm": 0.8951997050564433,
      "learning_rate": 2.53427915598974e-06,
      "loss": 0.1393,
      "step": 5653
    },
    {
      "epoch": 0.5209379462846087,
      "grad_norm": 0.8907261864130697,
      "learning_rate": 2.5335174431310705e-06,
      "loss": 0.1392,
      "step": 5654
    },
    {
      "epoch": 0.5210300824618787,
      "grad_norm": 0.9414523754070911,
      "learning_rate": 2.532755727160302e-06,
      "loss": 0.1455,
      "step": 5655
    },
    {
      "epoch": 0.5211222186391486,
      "grad_norm": 0.9540517476228871,
      "learning_rate": 2.5319940081481612e-06,
      "loss": 0.1235,
      "step": 5656
    },
    {
      "epoch": 0.5212143548164186,
      "grad_norm": 0.9954834340666702,
      "learning_rate": 2.531232286165374e-06,
      "loss": 0.1438,
      "step": 5657
    },
    {
      "epoch": 0.5213064909936886,
      "grad_norm": 0.8970096073321792,
      "learning_rate": 2.530470561282665e-06,
      "loss": 0.1308,
      "step": 5658
    },
    {
      "epoch": 0.5213986271709586,
      "grad_norm": 0.9058477790180682,
      "learning_rate": 2.5297088335707607e-06,
      "loss": 0.1461,
      "step": 5659
    },
    {
      "epoch": 0.5214907633482286,
      "grad_norm": 0.9711778108276775,
      "learning_rate": 2.5289471031003894e-06,
      "loss": 0.1396,
      "step": 5660
    },
    {
      "epoch": 0.5215828995254986,
      "grad_norm": 0.8543618205002567,
      "learning_rate": 2.528185369942275e-06,
      "loss": 0.1323,
      "step": 5661
    },
    {
      "epoch": 0.5216750357027687,
      "grad_norm": 0.9505400443417231,
      "learning_rate": 2.5274236341671464e-06,
      "loss": 0.1437,
      "step": 5662
    },
    {
      "epoch": 0.5217671718800387,
      "grad_norm": 0.9705759551536418,
      "learning_rate": 2.5266618958457305e-06,
      "loss": 0.1501,
      "step": 5663
    },
    {
      "epoch": 0.5218593080573087,
      "grad_norm": 0.9286699162670864,
      "learning_rate": 2.525900155048755e-06,
      "loss": 0.1351,
      "step": 5664
    },
    {
      "epoch": 0.5219514442345787,
      "grad_norm": 0.9460951212975731,
      "learning_rate": 2.525138411846947e-06,
      "loss": 0.155,
      "step": 5665
    },
    {
      "epoch": 0.5220435804118487,
      "grad_norm": 0.9521125076330914,
      "learning_rate": 2.524376666311035e-06,
      "loss": 0.1552,
      "step": 5666
    },
    {
      "epoch": 0.5221357165891187,
      "grad_norm": 0.9052726980091685,
      "learning_rate": 2.523614918511746e-06,
      "loss": 0.1253,
      "step": 5667
    },
    {
      "epoch": 0.5222278527663887,
      "grad_norm": 0.9223530508604543,
      "learning_rate": 2.5228531685198105e-06,
      "loss": 0.1265,
      "step": 5668
    },
    {
      "epoch": 0.5223199889436587,
      "grad_norm": 0.9534027425880114,
      "learning_rate": 2.522091416405955e-06,
      "loss": 0.1339,
      "step": 5669
    },
    {
      "epoch": 0.5224121251209287,
      "grad_norm": 0.9757261885463333,
      "learning_rate": 2.5213296622409094e-06,
      "loss": 0.1418,
      "step": 5670
    },
    {
      "epoch": 0.5225042612981987,
      "grad_norm": 0.948753953135261,
      "learning_rate": 2.5205679060954025e-06,
      "loss": 0.1439,
      "step": 5671
    },
    {
      "epoch": 0.5225963974754687,
      "grad_norm": 0.9326818741941446,
      "learning_rate": 2.5198061480401623e-06,
      "loss": 0.1449,
      "step": 5672
    },
    {
      "epoch": 0.5226885336527387,
      "grad_norm": 0.9363994286992702,
      "learning_rate": 2.519044388145921e-06,
      "loss": 0.1326,
      "step": 5673
    },
    {
      "epoch": 0.5227806698300087,
      "grad_norm": 0.9856542056580295,
      "learning_rate": 2.5182826264834046e-06,
      "loss": 0.1359,
      "step": 5674
    },
    {
      "epoch": 0.5228728060072788,
      "grad_norm": 1.034901023201578,
      "learning_rate": 2.5175208631233454e-06,
      "loss": 0.145,
      "step": 5675
    },
    {
      "epoch": 0.5229649421845488,
      "grad_norm": 0.9040002234718671,
      "learning_rate": 2.516759098136472e-06,
      "loss": 0.1362,
      "step": 5676
    },
    {
      "epoch": 0.5230570783618188,
      "grad_norm": 0.9295191125870893,
      "learning_rate": 2.515997331593514e-06,
      "loss": 0.1458,
      "step": 5677
    },
    {
      "epoch": 0.5231492145390888,
      "grad_norm": 0.9049169701368106,
      "learning_rate": 2.5152355635652027e-06,
      "loss": 0.137,
      "step": 5678
    },
    {
      "epoch": 0.5232413507163588,
      "grad_norm": 0.9557392345266481,
      "learning_rate": 2.5144737941222673e-06,
      "loss": 0.1471,
      "step": 5679
    },
    {
      "epoch": 0.5233334868936288,
      "grad_norm": 0.929502828589841,
      "learning_rate": 2.513712023335438e-06,
      "loss": 0.1413,
      "step": 5680
    },
    {
      "epoch": 0.5234256230708988,
      "grad_norm": 0.9097049347259758,
      "learning_rate": 2.512950251275447e-06,
      "loss": 0.1347,
      "step": 5681
    },
    {
      "epoch": 0.5235177592481688,
      "grad_norm": 0.9202841127962849,
      "learning_rate": 2.512188478013023e-06,
      "loss": 0.1366,
      "step": 5682
    },
    {
      "epoch": 0.5236098954254388,
      "grad_norm": 0.8929983406529515,
      "learning_rate": 2.5114267036188975e-06,
      "loss": 0.13,
      "step": 5683
    },
    {
      "epoch": 0.5237020316027088,
      "grad_norm": 0.9686069240868677,
      "learning_rate": 2.510664928163802e-06,
      "loss": 0.1517,
      "step": 5684
    },
    {
      "epoch": 0.5237941677799788,
      "grad_norm": 0.8428080692138286,
      "learning_rate": 2.5099031517184665e-06,
      "loss": 0.1235,
      "step": 5685
    },
    {
      "epoch": 0.5238863039572488,
      "grad_norm": 0.8988271287258455,
      "learning_rate": 2.509141374353622e-06,
      "loss": 0.1339,
      "step": 5686
    },
    {
      "epoch": 0.5239784401345188,
      "grad_norm": 0.9611827066606157,
      "learning_rate": 2.50837959614e-06,
      "loss": 0.1489,
      "step": 5687
    },
    {
      "epoch": 0.5240705763117888,
      "grad_norm": 0.9468591677252746,
      "learning_rate": 2.5076178171483312e-06,
      "loss": 0.1415,
      "step": 5688
    },
    {
      "epoch": 0.5241627124890589,
      "grad_norm": 0.9603892814379588,
      "learning_rate": 2.506856037449348e-06,
      "loss": 0.14,
      "step": 5689
    },
    {
      "epoch": 0.5242548486663289,
      "grad_norm": 0.9746388238990209,
      "learning_rate": 2.50609425711378e-06,
      "loss": 0.1549,
      "step": 5690
    },
    {
      "epoch": 0.5243469848435989,
      "grad_norm": 0.9676253072065032,
      "learning_rate": 2.505332476212361e-06,
      "loss": 0.1547,
      "step": 5691
    },
    {
      "epoch": 0.5244391210208689,
      "grad_norm": 0.9193704129647952,
      "learning_rate": 2.50457069481582e-06,
      "loss": 0.1412,
      "step": 5692
    },
    {
      "epoch": 0.5245312571981389,
      "grad_norm": 0.8835165046226858,
      "learning_rate": 2.50380891299489e-06,
      "loss": 0.14,
      "step": 5693
    },
    {
      "epoch": 0.5246233933754089,
      "grad_norm": 0.8897949239988316,
      "learning_rate": 2.503047130820302e-06,
      "loss": 0.1305,
      "step": 5694
    },
    {
      "epoch": 0.5247155295526789,
      "grad_norm": 0.8582482232047172,
      "learning_rate": 2.5022853483627876e-06,
      "loss": 0.1291,
      "step": 5695
    },
    {
      "epoch": 0.5248076657299489,
      "grad_norm": 0.9377699199181725,
      "learning_rate": 2.5015235656930774e-06,
      "loss": 0.1468,
      "step": 5696
    },
    {
      "epoch": 0.5248998019072189,
      "grad_norm": 0.9298891282148648,
      "learning_rate": 2.500761782881905e-06,
      "loss": 0.1402,
      "step": 5697
    },
    {
      "epoch": 0.5249919380844889,
      "grad_norm": 0.8586736938601531,
      "learning_rate": 2.5e-06,
      "loss": 0.1319,
      "step": 5698
    },
    {
      "epoch": 0.5250840742617588,
      "grad_norm": 0.856179840256379,
      "learning_rate": 2.499238217118095e-06,
      "loss": 0.1341,
      "step": 5699
    },
    {
      "epoch": 0.5251762104390288,
      "grad_norm": 0.918400983382606,
      "learning_rate": 2.498476434306923e-06,
      "loss": 0.1414,
      "step": 5700
    },
    {
      "epoch": 0.5252683466162988,
      "grad_norm": 0.8617014174835537,
      "learning_rate": 2.4977146516372137e-06,
      "loss": 0.135,
      "step": 5701
    },
    {
      "epoch": 0.525360482793569,
      "grad_norm": 0.9095148426558132,
      "learning_rate": 2.496952869179699e-06,
      "loss": 0.1414,
      "step": 5702
    },
    {
      "epoch": 0.525452618970839,
      "grad_norm": 0.9003434006030184,
      "learning_rate": 2.4961910870051105e-06,
      "loss": 0.1304,
      "step": 5703
    },
    {
      "epoch": 0.5255447551481089,
      "grad_norm": 0.9417961533337451,
      "learning_rate": 2.49542930518418e-06,
      "loss": 0.1421,
      "step": 5704
    },
    {
      "epoch": 0.5256368913253789,
      "grad_norm": 0.9417914777947627,
      "learning_rate": 2.49466752378764e-06,
      "loss": 0.1425,
      "step": 5705
    },
    {
      "epoch": 0.5257290275026489,
      "grad_norm": 0.8897487906201332,
      "learning_rate": 2.4939057428862203e-06,
      "loss": 0.1284,
      "step": 5706
    },
    {
      "epoch": 0.5258211636799189,
      "grad_norm": 0.8457401884584623,
      "learning_rate": 2.4931439625506522e-06,
      "loss": 0.1276,
      "step": 5707
    },
    {
      "epoch": 0.5259132998571889,
      "grad_norm": 0.9013953147336925,
      "learning_rate": 2.4923821828516688e-06,
      "loss": 0.1426,
      "step": 5708
    },
    {
      "epoch": 0.5260054360344589,
      "grad_norm": 0.8852735984465513,
      "learning_rate": 2.491620403860001e-06,
      "loss": 0.1377,
      "step": 5709
    },
    {
      "epoch": 0.5260975722117289,
      "grad_norm": 0.9107949064957632,
      "learning_rate": 2.4908586256463788e-06,
      "loss": 0.1482,
      "step": 5710
    },
    {
      "epoch": 0.5261897083889989,
      "grad_norm": 0.8745259986185256,
      "learning_rate": 2.4900968482815344e-06,
      "loss": 0.1391,
      "step": 5711
    },
    {
      "epoch": 0.5262818445662689,
      "grad_norm": 0.8978816508895493,
      "learning_rate": 2.4893350718361984e-06,
      "loss": 0.1323,
      "step": 5712
    },
    {
      "epoch": 0.5263739807435389,
      "grad_norm": 0.9371818854858208,
      "learning_rate": 2.488573296381103e-06,
      "loss": 0.1392,
      "step": 5713
    },
    {
      "epoch": 0.5264661169208089,
      "grad_norm": 0.8768828375830369,
      "learning_rate": 2.487811521986978e-06,
      "loss": 0.1366,
      "step": 5714
    },
    {
      "epoch": 0.5265582530980789,
      "grad_norm": 0.9975475708282623,
      "learning_rate": 2.4870497487245534e-06,
      "loss": 0.1469,
      "step": 5715
    },
    {
      "epoch": 0.526650389275349,
      "grad_norm": 0.9416496571228281,
      "learning_rate": 2.486287976664562e-06,
      "loss": 0.1516,
      "step": 5716
    },
    {
      "epoch": 0.526742525452619,
      "grad_norm": 0.8976130137898136,
      "learning_rate": 2.485526205877734e-06,
      "loss": 0.1329,
      "step": 5717
    },
    {
      "epoch": 0.526834661629889,
      "grad_norm": 0.9229030102050523,
      "learning_rate": 2.484764436434798e-06,
      "loss": 0.1435,
      "step": 5718
    },
    {
      "epoch": 0.526926797807159,
      "grad_norm": 0.848994540276747,
      "learning_rate": 2.4840026684064867e-06,
      "loss": 0.1249,
      "step": 5719
    },
    {
      "epoch": 0.527018933984429,
      "grad_norm": 0.9202350631334453,
      "learning_rate": 2.4832409018635283e-06,
      "loss": 0.1428,
      "step": 5720
    },
    {
      "epoch": 0.527111070161699,
      "grad_norm": 0.938200837146183,
      "learning_rate": 2.4824791368766555e-06,
      "loss": 0.1412,
      "step": 5721
    },
    {
      "epoch": 0.527203206338969,
      "grad_norm": 0.9183623201693331,
      "learning_rate": 2.4817173735165958e-06,
      "loss": 0.1406,
      "step": 5722
    },
    {
      "epoch": 0.527295342516239,
      "grad_norm": 0.9091326879662819,
      "learning_rate": 2.4809556118540795e-06,
      "loss": 0.1333,
      "step": 5723
    },
    {
      "epoch": 0.527387478693509,
      "grad_norm": 0.8675851743483034,
      "learning_rate": 2.4801938519598372e-06,
      "loss": 0.1201,
      "step": 5724
    },
    {
      "epoch": 0.527479614870779,
      "grad_norm": 0.9000877408453869,
      "learning_rate": 2.4794320939045988e-06,
      "loss": 0.1418,
      "step": 5725
    },
    {
      "epoch": 0.527571751048049,
      "grad_norm": 0.8474851554049467,
      "learning_rate": 2.4786703377590914e-06,
      "loss": 0.1314,
      "step": 5726
    },
    {
      "epoch": 0.527663887225319,
      "grad_norm": 0.9530774213615841,
      "learning_rate": 2.4779085835940457e-06,
      "loss": 0.1501,
      "step": 5727
    },
    {
      "epoch": 0.527756023402589,
      "grad_norm": 0.8946042244381394,
      "learning_rate": 2.47714683148019e-06,
      "loss": 0.1368,
      "step": 5728
    },
    {
      "epoch": 0.527848159579859,
      "grad_norm": 0.9488184152086155,
      "learning_rate": 2.476385081488254e-06,
      "loss": 0.1416,
      "step": 5729
    },
    {
      "epoch": 0.5279402957571291,
      "grad_norm": 0.8769562906327312,
      "learning_rate": 2.4756233336889663e-06,
      "loss": 0.1331,
      "step": 5730
    },
    {
      "epoch": 0.5280324319343991,
      "grad_norm": 0.960488041104247,
      "learning_rate": 2.474861588153054e-06,
      "loss": 0.1434,
      "step": 5731
    },
    {
      "epoch": 0.5281245681116691,
      "grad_norm": 0.9442539249540435,
      "learning_rate": 2.4740998449512456e-06,
      "loss": 0.1405,
      "step": 5732
    },
    {
      "epoch": 0.5282167042889391,
      "grad_norm": 1.017322220614333,
      "learning_rate": 2.4733381041542695e-06,
      "loss": 0.1442,
      "step": 5733
    },
    {
      "epoch": 0.5283088404662091,
      "grad_norm": 0.9558560440845575,
      "learning_rate": 2.4725763658328544e-06,
      "loss": 0.128,
      "step": 5734
    },
    {
      "epoch": 0.5284009766434791,
      "grad_norm": 0.9552439366518181,
      "learning_rate": 2.471814630057726e-06,
      "loss": 0.1368,
      "step": 5735
    },
    {
      "epoch": 0.5284931128207491,
      "grad_norm": 0.9475504293116053,
      "learning_rate": 2.4710528968996114e-06,
      "loss": 0.149,
      "step": 5736
    },
    {
      "epoch": 0.5285852489980191,
      "grad_norm": 0.9358738909776354,
      "learning_rate": 2.4702911664292397e-06,
      "loss": 0.1365,
      "step": 5737
    },
    {
      "epoch": 0.5286773851752891,
      "grad_norm": 0.9334724145108257,
      "learning_rate": 2.469529438717336e-06,
      "loss": 0.1373,
      "step": 5738
    },
    {
      "epoch": 0.5287695213525591,
      "grad_norm": 0.8697872053181739,
      "learning_rate": 2.4687677138346265e-06,
      "loss": 0.137,
      "step": 5739
    },
    {
      "epoch": 0.528861657529829,
      "grad_norm": 1.0230475394744452,
      "learning_rate": 2.468005991851839e-06,
      "loss": 0.1547,
      "step": 5740
    },
    {
      "epoch": 0.528953793707099,
      "grad_norm": 0.9182793634858509,
      "learning_rate": 2.467244272839698e-06,
      "loss": 0.1402,
      "step": 5741
    },
    {
      "epoch": 0.529045929884369,
      "grad_norm": 0.8635610908929247,
      "learning_rate": 2.4664825568689303e-06,
      "loss": 0.1316,
      "step": 5742
    },
    {
      "epoch": 0.5291380660616392,
      "grad_norm": 0.9238816088041331,
      "learning_rate": 2.4657208440102607e-06,
      "loss": 0.1346,
      "step": 5743
    },
    {
      "epoch": 0.5292302022389092,
      "grad_norm": 0.9603841203290647,
      "learning_rate": 2.464959134334414e-06,
      "loss": 0.1398,
      "step": 5744
    },
    {
      "epoch": 0.5293223384161792,
      "grad_norm": 0.9691785358952262,
      "learning_rate": 2.4641974279121157e-06,
      "loss": 0.1338,
      "step": 5745
    },
    {
      "epoch": 0.5294144745934491,
      "grad_norm": 0.9811825017453772,
      "learning_rate": 2.463435724814091e-06,
      "loss": 0.1363,
      "step": 5746
    },
    {
      "epoch": 0.5295066107707191,
      "grad_norm": 0.9089320567911033,
      "learning_rate": 2.4626740251110615e-06,
      "loss": 0.1451,
      "step": 5747
    },
    {
      "epoch": 0.5295987469479891,
      "grad_norm": 0.9108691062317888,
      "learning_rate": 2.461912328873754e-06,
      "loss": 0.1357,
      "step": 5748
    },
    {
      "epoch": 0.5296908831252591,
      "grad_norm": 0.9259190835186841,
      "learning_rate": 2.46115063617289e-06,
      "loss": 0.1492,
      "step": 5749
    },
    {
      "epoch": 0.5297830193025291,
      "grad_norm": 0.9615169324748283,
      "learning_rate": 2.4603889470791946e-06,
      "loss": 0.128,
      "step": 5750
    },
    {
      "epoch": 0.5298751554797991,
      "grad_norm": 0.9546828640622951,
      "learning_rate": 2.4596272616633892e-06,
      "loss": 0.1339,
      "step": 5751
    },
    {
      "epoch": 0.5299672916570691,
      "grad_norm": 0.9448443537367963,
      "learning_rate": 2.4588655799961968e-06,
      "loss": 0.1365,
      "step": 5752
    },
    {
      "epoch": 0.5300594278343391,
      "grad_norm": 0.9490782504336478,
      "learning_rate": 2.45810390214834e-06,
      "loss": 0.1388,
      "step": 5753
    },
    {
      "epoch": 0.5301515640116091,
      "grad_norm": 0.9427353748495464,
      "learning_rate": 2.4573422281905405e-06,
      "loss": 0.1451,
      "step": 5754
    },
    {
      "epoch": 0.5302437001888791,
      "grad_norm": 0.9010459704299345,
      "learning_rate": 2.456580558193519e-06,
      "loss": 0.1378,
      "step": 5755
    },
    {
      "epoch": 0.5303358363661491,
      "grad_norm": 0.9586692997882068,
      "learning_rate": 2.4558188922279977e-06,
      "loss": 0.147,
      "step": 5756
    },
    {
      "epoch": 0.5304279725434192,
      "grad_norm": 1.0038167732871943,
      "learning_rate": 2.4550572303646965e-06,
      "loss": 0.155,
      "step": 5757
    },
    {
      "epoch": 0.5305201087206892,
      "grad_norm": 0.9211631756539981,
      "learning_rate": 2.454295572674337e-06,
      "loss": 0.14,
      "step": 5758
    },
    {
      "epoch": 0.5306122448979592,
      "grad_norm": 1.0000059466636613,
      "learning_rate": 2.453533919227639e-06,
      "loss": 0.1378,
      "step": 5759
    },
    {
      "epoch": 0.5307043810752292,
      "grad_norm": 0.8958358231178939,
      "learning_rate": 2.4527722700953205e-06,
      "loss": 0.1348,
      "step": 5760
    },
    {
      "epoch": 0.5307965172524992,
      "grad_norm": 0.9202617314939519,
      "learning_rate": 2.4520106253481025e-06,
      "loss": 0.1408,
      "step": 5761
    },
    {
      "epoch": 0.5308886534297692,
      "grad_norm": 0.9861823309901614,
      "learning_rate": 2.451248985056702e-06,
      "loss": 0.1464,
      "step": 5762
    },
    {
      "epoch": 0.5309807896070392,
      "grad_norm": 0.9958770704868112,
      "learning_rate": 2.4504873492918404e-06,
      "loss": 0.1252,
      "step": 5763
    },
    {
      "epoch": 0.5310729257843092,
      "grad_norm": 0.9492656613133602,
      "learning_rate": 2.449725718124233e-06,
      "loss": 0.1401,
      "step": 5764
    },
    {
      "epoch": 0.5311650619615792,
      "grad_norm": 0.9305337760930398,
      "learning_rate": 2.448964091624597e-06,
      "loss": 0.1383,
      "step": 5765
    },
    {
      "epoch": 0.5312571981388492,
      "grad_norm": 0.9418538276706124,
      "learning_rate": 2.4482024698636514e-06,
      "loss": 0.1334,
      "step": 5766
    },
    {
      "epoch": 0.5313493343161192,
      "grad_norm": 0.9056941479460245,
      "learning_rate": 2.4474408529121126e-06,
      "loss": 0.1379,
      "step": 5767
    },
    {
      "epoch": 0.5314414704933892,
      "grad_norm": 0.823476911768923,
      "learning_rate": 2.4466792408406953e-06,
      "loss": 0.1158,
      "step": 5768
    },
    {
      "epoch": 0.5315336066706592,
      "grad_norm": 0.8974601975603193,
      "learning_rate": 2.445917633720117e-06,
      "loss": 0.1366,
      "step": 5769
    },
    {
      "epoch": 0.5316257428479293,
      "grad_norm": 0.8852553284312088,
      "learning_rate": 2.4451560316210913e-06,
      "loss": 0.1298,
      "step": 5770
    },
    {
      "epoch": 0.5317178790251993,
      "grad_norm": 0.9577532072576866,
      "learning_rate": 2.444394434614335e-06,
      "loss": 0.1448,
      "step": 5771
    },
    {
      "epoch": 0.5318100152024693,
      "grad_norm": 0.9122078585444239,
      "learning_rate": 2.4436328427705612e-06,
      "loss": 0.1484,
      "step": 5772
    },
    {
      "epoch": 0.5319021513797393,
      "grad_norm": 0.9341072336908377,
      "learning_rate": 2.442871256160483e-06,
      "loss": 0.1463,
      "step": 5773
    },
    {
      "epoch": 0.5319942875570093,
      "grad_norm": 0.8852758499988086,
      "learning_rate": 2.442109674854815e-06,
      "loss": 0.1435,
      "step": 5774
    },
    {
      "epoch": 0.5320864237342793,
      "grad_norm": 0.8580463168445764,
      "learning_rate": 2.4413480989242718e-06,
      "loss": 0.1247,
      "step": 5775
    },
    {
      "epoch": 0.5321785599115493,
      "grad_norm": 0.9335327520464461,
      "learning_rate": 2.440586528439562e-06,
      "loss": 0.1404,
      "step": 5776
    },
    {
      "epoch": 0.5322706960888193,
      "grad_norm": 0.8745349837111808,
      "learning_rate": 2.4398249634713996e-06,
      "loss": 0.1332,
      "step": 5777
    },
    {
      "epoch": 0.5323628322660893,
      "grad_norm": 0.8252889666092439,
      "learning_rate": 2.4390634040904965e-06,
      "loss": 0.1182,
      "step": 5778
    },
    {
      "epoch": 0.5324549684433593,
      "grad_norm": 0.9157329631624775,
      "learning_rate": 2.4383018503675633e-06,
      "loss": 0.1345,
      "step": 5779
    },
    {
      "epoch": 0.5325471046206293,
      "grad_norm": 0.8603655816644842,
      "learning_rate": 2.437540302373309e-06,
      "loss": 0.125,
      "step": 5780
    },
    {
      "epoch": 0.5326392407978993,
      "grad_norm": 0.9038892694652924,
      "learning_rate": 2.4367787601784446e-06,
      "loss": 0.1371,
      "step": 5781
    },
    {
      "epoch": 0.5327313769751693,
      "grad_norm": 0.92831194618531,
      "learning_rate": 2.4360172238536787e-06,
      "loss": 0.1364,
      "step": 5782
    },
    {
      "epoch": 0.5328235131524393,
      "grad_norm": 0.9846067383086416,
      "learning_rate": 2.435255693469721e-06,
      "loss": 0.1423,
      "step": 5783
    },
    {
      "epoch": 0.5329156493297094,
      "grad_norm": 0.990724526005928,
      "learning_rate": 2.4344941690972797e-06,
      "loss": 0.1498,
      "step": 5784
    },
    {
      "epoch": 0.5330077855069794,
      "grad_norm": 0.9493770573022826,
      "learning_rate": 2.4337326508070604e-06,
      "loss": 0.139,
      "step": 5785
    },
    {
      "epoch": 0.5330999216842494,
      "grad_norm": 0.9201553217830986,
      "learning_rate": 2.4329711386697726e-06,
      "loss": 0.1298,
      "step": 5786
    },
    {
      "epoch": 0.5331920578615194,
      "grad_norm": 0.8880673033056966,
      "learning_rate": 2.432209632756121e-06,
      "loss": 0.1489,
      "step": 5787
    },
    {
      "epoch": 0.5332841940387893,
      "grad_norm": 0.9510070687868549,
      "learning_rate": 2.4314481331368133e-06,
      "loss": 0.135,
      "step": 5788
    },
    {
      "epoch": 0.5333763302160593,
      "grad_norm": 0.8694595494903921,
      "learning_rate": 2.430686639882554e-06,
      "loss": 0.132,
      "step": 5789
    },
    {
      "epoch": 0.5334684663933293,
      "grad_norm": 0.881862571605303,
      "learning_rate": 2.429925153064046e-06,
      "loss": 0.1237,
      "step": 5790
    },
    {
      "epoch": 0.5335606025705993,
      "grad_norm": 0.9202894290372889,
      "learning_rate": 2.4291636727519966e-06,
      "loss": 0.1287,
      "step": 5791
    },
    {
      "epoch": 0.5336527387478693,
      "grad_norm": 0.8795110496314976,
      "learning_rate": 2.428402199017108e-06,
      "loss": 0.1265,
      "step": 5792
    },
    {
      "epoch": 0.5337448749251393,
      "grad_norm": 0.973557854750666,
      "learning_rate": 2.4276407319300815e-06,
      "loss": 0.1515,
      "step": 5793
    },
    {
      "epoch": 0.5338370111024093,
      "grad_norm": 0.9241824971544353,
      "learning_rate": 2.4268792715616217e-06,
      "loss": 0.1386,
      "step": 5794
    },
    {
      "epoch": 0.5339291472796793,
      "grad_norm": 0.9019945526996254,
      "learning_rate": 2.4261178179824287e-06,
      "loss": 0.1312,
      "step": 5795
    },
    {
      "epoch": 0.5340212834569493,
      "grad_norm": 0.8982120470146486,
      "learning_rate": 2.4253563712632057e-06,
      "loss": 0.1329,
      "step": 5796
    },
    {
      "epoch": 0.5341134196342193,
      "grad_norm": 0.8751533441546493,
      "learning_rate": 2.4245949314746506e-06,
      "loss": 0.1367,
      "step": 5797
    },
    {
      "epoch": 0.5342055558114894,
      "grad_norm": 0.9530832980103735,
      "learning_rate": 2.423833498687464e-06,
      "loss": 0.1374,
      "step": 5798
    },
    {
      "epoch": 0.5342976919887594,
      "grad_norm": 0.9553902470626439,
      "learning_rate": 2.423072072972346e-06,
      "loss": 0.1346,
      "step": 5799
    },
    {
      "epoch": 0.5343898281660294,
      "grad_norm": 0.9146688205607726,
      "learning_rate": 2.4223106543999943e-06,
      "loss": 0.1378,
      "step": 5800
    },
    {
      "epoch": 0.5344819643432994,
      "grad_norm": 0.8880858853753428,
      "learning_rate": 2.4215492430411057e-06,
      "loss": 0.1275,
      "step": 5801
    },
    {
      "epoch": 0.5345741005205694,
      "grad_norm": 0.9299530782474574,
      "learning_rate": 2.4207878389663794e-06,
      "loss": 0.1372,
      "step": 5802
    },
    {
      "epoch": 0.5346662366978394,
      "grad_norm": 1.0145172996220377,
      "learning_rate": 2.4200264422465096e-06,
      "loss": 0.1415,
      "step": 5803
    },
    {
      "epoch": 0.5347583728751094,
      "grad_norm": 0.9577851127728939,
      "learning_rate": 2.4192650529521948e-06,
      "loss": 0.131,
      "step": 5804
    },
    {
      "epoch": 0.5348505090523794,
      "grad_norm": 0.8902962613744783,
      "learning_rate": 2.418503671154128e-06,
      "loss": 0.1416,
      "step": 5805
    },
    {
      "epoch": 0.5349426452296494,
      "grad_norm": 0.8831433355746883,
      "learning_rate": 2.417742296923003e-06,
      "loss": 0.1298,
      "step": 5806
    },
    {
      "epoch": 0.5350347814069194,
      "grad_norm": 0.9091113886860835,
      "learning_rate": 2.4169809303295157e-06,
      "loss": 0.1372,
      "step": 5807
    },
    {
      "epoch": 0.5351269175841894,
      "grad_norm": 0.920230718279897,
      "learning_rate": 2.4162195714443584e-06,
      "loss": 0.1336,
      "step": 5808
    },
    {
      "epoch": 0.5352190537614594,
      "grad_norm": 0.9225664409925763,
      "learning_rate": 2.4154582203382216e-06,
      "loss": 0.1377,
      "step": 5809
    },
    {
      "epoch": 0.5353111899387294,
      "grad_norm": 0.8214010998632829,
      "learning_rate": 2.4146968770817988e-06,
      "loss": 0.1252,
      "step": 5810
    },
    {
      "epoch": 0.5354033261159995,
      "grad_norm": 0.9037345408786586,
      "learning_rate": 2.41393554174578e-06,
      "loss": 0.1499,
      "step": 5811
    },
    {
      "epoch": 0.5354954622932695,
      "grad_norm": 0.8879279953804324,
      "learning_rate": 2.4131742144008557e-06,
      "loss": 0.1366,
      "step": 5812
    },
    {
      "epoch": 0.5355875984705395,
      "grad_norm": 0.8643469307323431,
      "learning_rate": 2.4124128951177146e-06,
      "loss": 0.119,
      "step": 5813
    },
    {
      "epoch": 0.5356797346478095,
      "grad_norm": 0.9000012611078515,
      "learning_rate": 2.4116515839670456e-06,
      "loss": 0.1358,
      "step": 5814
    },
    {
      "epoch": 0.5357718708250795,
      "grad_norm": 0.9397044907955544,
      "learning_rate": 2.4108902810195367e-06,
      "loss": 0.1468,
      "step": 5815
    },
    {
      "epoch": 0.5358640070023495,
      "grad_norm": 0.915825164357773,
      "learning_rate": 2.4101289863458744e-06,
      "loss": 0.1255,
      "step": 5816
    },
    {
      "epoch": 0.5359561431796195,
      "grad_norm": 0.8662962491574562,
      "learning_rate": 2.409367700016746e-06,
      "loss": 0.1344,
      "step": 5817
    },
    {
      "epoch": 0.5360482793568895,
      "grad_norm": 0.9247751195485957,
      "learning_rate": 2.4086064221028365e-06,
      "loss": 0.1302,
      "step": 5818
    },
    {
      "epoch": 0.5361404155341595,
      "grad_norm": 0.9472673515439451,
      "learning_rate": 2.40784515267483e-06,
      "loss": 0.1407,
      "step": 5819
    },
    {
      "epoch": 0.5362325517114295,
      "grad_norm": 0.9411408748162816,
      "learning_rate": 2.407083891803411e-06,
      "loss": 0.1394,
      "step": 5820
    },
    {
      "epoch": 0.5363246878886995,
      "grad_norm": 0.9255154027937263,
      "learning_rate": 2.4063226395592635e-06,
      "loss": 0.1358,
      "step": 5821
    },
    {
      "epoch": 0.5364168240659695,
      "grad_norm": 0.9180421838847347,
      "learning_rate": 2.4055613960130676e-06,
      "loss": 0.1376,
      "step": 5822
    },
    {
      "epoch": 0.5365089602432395,
      "grad_norm": 0.8784816828161529,
      "learning_rate": 2.4048001612355072e-06,
      "loss": 0.1375,
      "step": 5823
    },
    {
      "epoch": 0.5366010964205095,
      "grad_norm": 0.854986124528438,
      "learning_rate": 2.4040389352972616e-06,
      "loss": 0.1345,
      "step": 5824
    },
    {
      "epoch": 0.5366932325977796,
      "grad_norm": 0.8877816767972053,
      "learning_rate": 2.403277718269012e-06,
      "loss": 0.1287,
      "step": 5825
    },
    {
      "epoch": 0.5367853687750496,
      "grad_norm": 0.8756257399903816,
      "learning_rate": 2.4025165102214363e-06,
      "loss": 0.1274,
      "step": 5826
    },
    {
      "epoch": 0.5368775049523196,
      "grad_norm": 0.9513688328099061,
      "learning_rate": 2.4017553112252123e-06,
      "loss": 0.1427,
      "step": 5827
    },
    {
      "epoch": 0.5369696411295896,
      "grad_norm": 0.9443193310474314,
      "learning_rate": 2.400994121351019e-06,
      "loss": 0.1422,
      "step": 5828
    },
    {
      "epoch": 0.5370617773068596,
      "grad_norm": 0.8714929349419318,
      "learning_rate": 2.4002329406695325e-06,
      "loss": 0.135,
      "step": 5829
    },
    {
      "epoch": 0.5371539134841296,
      "grad_norm": 0.9829188571248488,
      "learning_rate": 2.3994717692514274e-06,
      "loss": 0.1479,
      "step": 5830
    },
    {
      "epoch": 0.5372460496613995,
      "grad_norm": 0.8778193951704442,
      "learning_rate": 2.3987106071673797e-06,
      "loss": 0.1339,
      "step": 5831
    },
    {
      "epoch": 0.5373381858386695,
      "grad_norm": 0.8295131686996187,
      "learning_rate": 2.3979494544880625e-06,
      "loss": 0.117,
      "step": 5832
    },
    {
      "epoch": 0.5374303220159395,
      "grad_norm": 0.9603954748107639,
      "learning_rate": 2.3971883112841505e-06,
      "loss": 0.1481,
      "step": 5833
    },
    {
      "epoch": 0.5375224581932095,
      "grad_norm": 0.9293362890021563,
      "learning_rate": 2.3964271776263146e-06,
      "loss": 0.1448,
      "step": 5834
    },
    {
      "epoch": 0.5376145943704795,
      "grad_norm": 0.9414302795244884,
      "learning_rate": 2.3956660535852254e-06,
      "loss": 0.1403,
      "step": 5835
    },
    {
      "epoch": 0.5377067305477495,
      "grad_norm": 0.9002162943398508,
      "learning_rate": 2.3949049392315555e-06,
      "loss": 0.1313,
      "step": 5836
    },
    {
      "epoch": 0.5377988667250195,
      "grad_norm": 0.8896376778803587,
      "learning_rate": 2.394143834635973e-06,
      "loss": 0.132,
      "step": 5837
    },
    {
      "epoch": 0.5378910029022896,
      "grad_norm": 0.9191022468212096,
      "learning_rate": 2.3933827398691464e-06,
      "loss": 0.1291,
      "step": 5838
    },
    {
      "epoch": 0.5379831390795596,
      "grad_norm": 0.9652932862586016,
      "learning_rate": 2.3926216550017445e-06,
      "loss": 0.1402,
      "step": 5839
    },
    {
      "epoch": 0.5380752752568296,
      "grad_norm": 0.8928366891316858,
      "learning_rate": 2.3918605801044325e-06,
      "loss": 0.1403,
      "step": 5840
    },
    {
      "epoch": 0.5381674114340996,
      "grad_norm": 0.9072163582373372,
      "learning_rate": 2.3910995152478786e-06,
      "loss": 0.1399,
      "step": 5841
    },
    {
      "epoch": 0.5382595476113696,
      "grad_norm": 1.5408775770789918,
      "learning_rate": 2.3903384605027462e-06,
      "loss": 0.1339,
      "step": 5842
    },
    {
      "epoch": 0.5383516837886396,
      "grad_norm": 0.9626327273310918,
      "learning_rate": 2.3895774159396982e-06,
      "loss": 0.1425,
      "step": 5843
    },
    {
      "epoch": 0.5384438199659096,
      "grad_norm": 0.8853401330370692,
      "learning_rate": 2.3888163816294e-06,
      "loss": 0.1345,
      "step": 5844
    },
    {
      "epoch": 0.5385359561431796,
      "grad_norm": 0.8572199289738917,
      "learning_rate": 2.3880553576425124e-06,
      "loss": 0.1313,
      "step": 5845
    },
    {
      "epoch": 0.5386280923204496,
      "grad_norm": 0.9431335769657753,
      "learning_rate": 2.3872943440496978e-06,
      "loss": 0.1472,
      "step": 5846
    },
    {
      "epoch": 0.5387202284977196,
      "grad_norm": 0.8765177735234764,
      "learning_rate": 2.386533340921615e-06,
      "loss": 0.1413,
      "step": 5847
    },
    {
      "epoch": 0.5388123646749896,
      "grad_norm": 1.0028906351410283,
      "learning_rate": 2.385772348328923e-06,
      "loss": 0.1584,
      "step": 5848
    },
    {
      "epoch": 0.5389045008522596,
      "grad_norm": 0.9002792229676244,
      "learning_rate": 2.385011366342281e-06,
      "loss": 0.1407,
      "step": 5849
    },
    {
      "epoch": 0.5389966370295296,
      "grad_norm": 0.8770503850906126,
      "learning_rate": 2.3842503950323473e-06,
      "loss": 0.136,
      "step": 5850
    },
    {
      "epoch": 0.5390887732067996,
      "grad_norm": 0.9336646528001852,
      "learning_rate": 2.383489434469775e-06,
      "loss": 0.1426,
      "step": 5851
    },
    {
      "epoch": 0.5391809093840697,
      "grad_norm": 0.9010134456089555,
      "learning_rate": 2.382728484725222e-06,
      "loss": 0.1341,
      "step": 5852
    },
    {
      "epoch": 0.5392730455613397,
      "grad_norm": 0.8942998501108559,
      "learning_rate": 2.3819675458693422e-06,
      "loss": 0.1409,
      "step": 5853
    },
    {
      "epoch": 0.5393651817386097,
      "grad_norm": 0.9340316091553998,
      "learning_rate": 2.381206617972789e-06,
      "loss": 0.1444,
      "step": 5854
    },
    {
      "epoch": 0.5394573179158797,
      "grad_norm": 0.8876144334710793,
      "learning_rate": 2.3804457011062126e-06,
      "loss": 0.1398,
      "step": 5855
    },
    {
      "epoch": 0.5395494540931497,
      "grad_norm": 0.9016840240138915,
      "learning_rate": 2.3796847953402665e-06,
      "loss": 0.1225,
      "step": 5856
    },
    {
      "epoch": 0.5396415902704197,
      "grad_norm": 0.9181794340809388,
      "learning_rate": 2.3789239007455996e-06,
      "loss": 0.1405,
      "step": 5857
    },
    {
      "epoch": 0.5397337264476897,
      "grad_norm": 0.8881973608279808,
      "learning_rate": 2.3781630173928627e-06,
      "loss": 0.1347,
      "step": 5858
    },
    {
      "epoch": 0.5398258626249597,
      "grad_norm": 0.856762225089732,
      "learning_rate": 2.3774021453527023e-06,
      "loss": 0.1193,
      "step": 5859
    },
    {
      "epoch": 0.5399179988022297,
      "grad_norm": 0.9519632501690944,
      "learning_rate": 2.3766412846957652e-06,
      "loss": 0.1485,
      "step": 5860
    },
    {
      "epoch": 0.5400101349794997,
      "grad_norm": 0.9612225735542497,
      "learning_rate": 2.3758804354926986e-06,
      "loss": 0.1391,
      "step": 5861
    },
    {
      "epoch": 0.5401022711567697,
      "grad_norm": 0.934863489073019,
      "learning_rate": 2.375119597814147e-06,
      "loss": 0.1455,
      "step": 5862
    },
    {
      "epoch": 0.5401944073340397,
      "grad_norm": 0.8937423146747298,
      "learning_rate": 2.374358771730754e-06,
      "loss": 0.1285,
      "step": 5863
    },
    {
      "epoch": 0.5402865435113097,
      "grad_norm": 0.9940701675808745,
      "learning_rate": 2.3735979573131626e-06,
      "loss": 0.147,
      "step": 5864
    },
    {
      "epoch": 0.5403786796885797,
      "grad_norm": 1.0060354786289987,
      "learning_rate": 2.372837154632014e-06,
      "loss": 0.1289,
      "step": 5865
    },
    {
      "epoch": 0.5404708158658498,
      "grad_norm": 0.9563270691350768,
      "learning_rate": 2.37207636375795e-06,
      "loss": 0.132,
      "step": 5866
    },
    {
      "epoch": 0.5405629520431198,
      "grad_norm": 0.9225113435435278,
      "learning_rate": 2.3713155847616095e-06,
      "loss": 0.1444,
      "step": 5867
    },
    {
      "epoch": 0.5406550882203898,
      "grad_norm": 0.9051134560491042,
      "learning_rate": 2.37055481771363e-06,
      "loss": 0.1257,
      "step": 5868
    },
    {
      "epoch": 0.5407472243976598,
      "grad_norm": 0.9125435043547829,
      "learning_rate": 2.3697940626846504e-06,
      "loss": 0.1366,
      "step": 5869
    },
    {
      "epoch": 0.5408393605749298,
      "grad_norm": 0.8827037590314208,
      "learning_rate": 2.369033319745306e-06,
      "loss": 0.1278,
      "step": 5870
    },
    {
      "epoch": 0.5409314967521998,
      "grad_norm": 0.9542353104247997,
      "learning_rate": 2.3682725889662316e-06,
      "loss": 0.1316,
      "step": 5871
    },
    {
      "epoch": 0.5410236329294698,
      "grad_norm": 0.9436897926471467,
      "learning_rate": 2.3675118704180614e-06,
      "loss": 0.1375,
      "step": 5872
    },
    {
      "epoch": 0.5411157691067398,
      "grad_norm": 0.9053198809341689,
      "learning_rate": 2.366751164171428e-06,
      "loss": 0.1433,
      "step": 5873
    },
    {
      "epoch": 0.5412079052840097,
      "grad_norm": 0.8602873340218734,
      "learning_rate": 2.3659904702969636e-06,
      "loss": 0.1339,
      "step": 5874
    },
    {
      "epoch": 0.5413000414612797,
      "grad_norm": 0.8998889070188997,
      "learning_rate": 2.3652297888653e-06,
      "loss": 0.1409,
      "step": 5875
    },
    {
      "epoch": 0.5413921776385497,
      "grad_norm": 0.9105525894251323,
      "learning_rate": 2.3644691199470628e-06,
      "loss": 0.1369,
      "step": 5876
    },
    {
      "epoch": 0.5414843138158197,
      "grad_norm": 0.9362291238439949,
      "learning_rate": 2.3637084636128836e-06,
      "loss": 0.1356,
      "step": 5877
    },
    {
      "epoch": 0.5415764499930897,
      "grad_norm": 0.8885859902523415,
      "learning_rate": 2.3629478199333873e-06,
      "loss": 0.1328,
      "step": 5878
    },
    {
      "epoch": 0.5416685861703598,
      "grad_norm": 0.9125123891199016,
      "learning_rate": 2.362187188979202e-06,
      "loss": 0.1368,
      "step": 5879
    },
    {
      "epoch": 0.5417607223476298,
      "grad_norm": 0.8937998412152254,
      "learning_rate": 2.3614265708209503e-06,
      "loss": 0.1324,
      "step": 5880
    },
    {
      "epoch": 0.5418528585248998,
      "grad_norm": 0.8888623370293548,
      "learning_rate": 2.360665965529256e-06,
      "loss": 0.1336,
      "step": 5881
    },
    {
      "epoch": 0.5419449947021698,
      "grad_norm": 0.9001269534353986,
      "learning_rate": 2.3599053731747424e-06,
      "loss": 0.1396,
      "step": 5882
    },
    {
      "epoch": 0.5420371308794398,
      "grad_norm": 0.9073225423258994,
      "learning_rate": 2.3591447938280304e-06,
      "loss": 0.1349,
      "step": 5883
    },
    {
      "epoch": 0.5421292670567098,
      "grad_norm": 0.9664393080376217,
      "learning_rate": 2.3583842275597382e-06,
      "loss": 0.1362,
      "step": 5884
    },
    {
      "epoch": 0.5422214032339798,
      "grad_norm": 0.9234357650375972,
      "learning_rate": 2.3576236744404866e-06,
      "loss": 0.1383,
      "step": 5885
    },
    {
      "epoch": 0.5423135394112498,
      "grad_norm": 0.8729256712212028,
      "learning_rate": 2.3568631345408912e-06,
      "loss": 0.1326,
      "step": 5886
    },
    {
      "epoch": 0.5424056755885198,
      "grad_norm": 0.9081475409204117,
      "learning_rate": 2.3561026079315707e-06,
      "loss": 0.1271,
      "step": 5887
    },
    {
      "epoch": 0.5424978117657898,
      "grad_norm": 0.9919399955510334,
      "learning_rate": 2.3553420946831377e-06,
      "loss": 0.159,
      "step": 5888
    },
    {
      "epoch": 0.5425899479430598,
      "grad_norm": 0.8465349151729697,
      "learning_rate": 2.3545815948662066e-06,
      "loss": 0.1246,
      "step": 5889
    },
    {
      "epoch": 0.5426820841203298,
      "grad_norm": 0.8814514667720759,
      "learning_rate": 2.3538211085513902e-06,
      "loss": 0.1258,
      "step": 5890
    },
    {
      "epoch": 0.5427742202975998,
      "grad_norm": 0.9320930510441189,
      "learning_rate": 2.3530606358093e-06,
      "loss": 0.1336,
      "step": 5891
    },
    {
      "epoch": 0.5428663564748698,
      "grad_norm": 0.8891625231779033,
      "learning_rate": 2.352300176710545e-06,
      "loss": 0.1367,
      "step": 5892
    },
    {
      "epoch": 0.5429584926521399,
      "grad_norm": 0.9387961274368829,
      "learning_rate": 2.351539731325735e-06,
      "loss": 0.1397,
      "step": 5893
    },
    {
      "epoch": 0.5430506288294099,
      "grad_norm": 0.8696939578101172,
      "learning_rate": 2.350779299725476e-06,
      "loss": 0.1364,
      "step": 5894
    },
    {
      "epoch": 0.5431427650066799,
      "grad_norm": 0.9477061229980223,
      "learning_rate": 2.3500188819803764e-06,
      "loss": 0.1488,
      "step": 5895
    },
    {
      "epoch": 0.5432349011839499,
      "grad_norm": 0.8300016678907538,
      "learning_rate": 2.3492584781610392e-06,
      "loss": 0.1199,
      "step": 5896
    },
    {
      "epoch": 0.5433270373612199,
      "grad_norm": 0.9676273385906484,
      "learning_rate": 2.3484980883380677e-06,
      "loss": 0.1389,
      "step": 5897
    },
    {
      "epoch": 0.5434191735384899,
      "grad_norm": 0.9146011666455297,
      "learning_rate": 2.347737712582066e-06,
      "loss": 0.1288,
      "step": 5898
    },
    {
      "epoch": 0.5435113097157599,
      "grad_norm": 0.8746026606193679,
      "learning_rate": 2.3469773509636346e-06,
      "loss": 0.1253,
      "step": 5899
    },
    {
      "epoch": 0.5436034458930299,
      "grad_norm": 0.9163120972409077,
      "learning_rate": 2.3462170035533713e-06,
      "loss": 0.1383,
      "step": 5900
    },
    {
      "epoch": 0.5436955820702999,
      "grad_norm": 0.9377392397159249,
      "learning_rate": 2.345456670421876e-06,
      "loss": 0.1444,
      "step": 5901
    },
    {
      "epoch": 0.5437877182475699,
      "grad_norm": 0.9564357005506579,
      "learning_rate": 2.3446963516397455e-06,
      "loss": 0.1432,
      "step": 5902
    },
    {
      "epoch": 0.5438798544248399,
      "grad_norm": 0.9273350082364941,
      "learning_rate": 2.3439360472775758e-06,
      "loss": 0.1398,
      "step": 5903
    },
    {
      "epoch": 0.5439719906021099,
      "grad_norm": 0.8985150989161702,
      "learning_rate": 2.3431757574059616e-06,
      "loss": 0.1324,
      "step": 5904
    },
    {
      "epoch": 0.5440641267793799,
      "grad_norm": 0.9057262190585165,
      "learning_rate": 2.342415482095494e-06,
      "loss": 0.1322,
      "step": 5905
    },
    {
      "epoch": 0.54415626295665,
      "grad_norm": 0.9487898862954096,
      "learning_rate": 2.341655221416766e-06,
      "loss": 0.1367,
      "step": 5906
    },
    {
      "epoch": 0.54424839913392,
      "grad_norm": 0.8907811006669535,
      "learning_rate": 2.3408949754403678e-06,
      "loss": 0.1257,
      "step": 5907
    },
    {
      "epoch": 0.54434053531119,
      "grad_norm": 0.8690356403332792,
      "learning_rate": 2.340134744236889e-06,
      "loss": 0.1194,
      "step": 5908
    },
    {
      "epoch": 0.54443267148846,
      "grad_norm": 0.9402466917106319,
      "learning_rate": 2.3393745278769163e-06,
      "loss": 0.1393,
      "step": 5909
    },
    {
      "epoch": 0.54452480766573,
      "grad_norm": 0.9079624440722254,
      "learning_rate": 2.3386143264310348e-06,
      "loss": 0.1338,
      "step": 5910
    },
    {
      "epoch": 0.544616943843,
      "grad_norm": 0.9249329243264749,
      "learning_rate": 2.3378541399698314e-06,
      "loss": 0.1367,
      "step": 5911
    },
    {
      "epoch": 0.54470908002027,
      "grad_norm": 0.9118627413475253,
      "learning_rate": 2.337093968563889e-06,
      "loss": 0.1267,
      "step": 5912
    },
    {
      "epoch": 0.54480121619754,
      "grad_norm": 0.8645824091337139,
      "learning_rate": 2.336333812283788e-06,
      "loss": 0.1214,
      "step": 5913
    },
    {
      "epoch": 0.54489335237481,
      "grad_norm": 0.8851023831702883,
      "learning_rate": 2.3355736712001107e-06,
      "loss": 0.1425,
      "step": 5914
    },
    {
      "epoch": 0.54498548855208,
      "grad_norm": 0.9789065324109237,
      "learning_rate": 2.3348135453834353e-06,
      "loss": 0.1477,
      "step": 5915
    },
    {
      "epoch": 0.54507762472935,
      "grad_norm": 0.8929699059776705,
      "learning_rate": 2.3340534349043407e-06,
      "loss": 0.1324,
      "step": 5916
    },
    {
      "epoch": 0.54516976090662,
      "grad_norm": 0.8525535061935612,
      "learning_rate": 2.3332933398334028e-06,
      "loss": 0.1348,
      "step": 5917
    },
    {
      "epoch": 0.54526189708389,
      "grad_norm": 0.9265971942479982,
      "learning_rate": 2.332533260241195e-06,
      "loss": 0.146,
      "step": 5918
    },
    {
      "epoch": 0.5453540332611599,
      "grad_norm": 0.8893191415358593,
      "learning_rate": 2.3317731961982926e-06,
      "loss": 0.1422,
      "step": 5919
    },
    {
      "epoch": 0.54544616943843,
      "grad_norm": 0.8715215907778809,
      "learning_rate": 2.331013147775268e-06,
      "loss": 0.1322,
      "step": 5920
    },
    {
      "epoch": 0.5455383056157,
      "grad_norm": 0.9232093592061419,
      "learning_rate": 2.3302531150426894e-06,
      "loss": 0.1514,
      "step": 5921
    },
    {
      "epoch": 0.54563044179297,
      "grad_norm": 0.9221326425086337,
      "learning_rate": 2.329493098071128e-06,
      "loss": 0.1377,
      "step": 5922
    },
    {
      "epoch": 0.54572257797024,
      "grad_norm": 0.8629100101924353,
      "learning_rate": 2.32873309693115e-06,
      "loss": 0.1292,
      "step": 5923
    },
    {
      "epoch": 0.54581471414751,
      "grad_norm": 0.8764601317643181,
      "learning_rate": 2.3279731116933235e-06,
      "loss": 0.1382,
      "step": 5924
    },
    {
      "epoch": 0.54590685032478,
      "grad_norm": 0.9527189795795429,
      "learning_rate": 2.327213142428212e-06,
      "loss": 0.1412,
      "step": 5925
    },
    {
      "epoch": 0.54599898650205,
      "grad_norm": 0.8514752428752124,
      "learning_rate": 2.326453189206378e-06,
      "loss": 0.137,
      "step": 5926
    },
    {
      "epoch": 0.54609112267932,
      "grad_norm": 0.8631535978765967,
      "learning_rate": 2.325693252098384e-06,
      "loss": 0.1302,
      "step": 5927
    },
    {
      "epoch": 0.54618325885659,
      "grad_norm": 0.9256537186435602,
      "learning_rate": 2.324933331174792e-06,
      "loss": 0.1399,
      "step": 5928
    },
    {
      "epoch": 0.54627539503386,
      "grad_norm": 0.9165258915845617,
      "learning_rate": 2.3241734265061573e-06,
      "loss": 0.134,
      "step": 5929
    },
    {
      "epoch": 0.54636753121113,
      "grad_norm": 0.9783289918459979,
      "learning_rate": 2.323413538163039e-06,
      "loss": 0.137,
      "step": 5930
    },
    {
      "epoch": 0.5464596673884,
      "grad_norm": 0.952969188227047,
      "learning_rate": 2.322653666215993e-06,
      "loss": 0.1499,
      "step": 5931
    },
    {
      "epoch": 0.54655180356567,
      "grad_norm": 0.8964130572884526,
      "learning_rate": 2.3218938107355727e-06,
      "loss": 0.1367,
      "step": 5932
    },
    {
      "epoch": 0.54664393974294,
      "grad_norm": 0.8954517651328167,
      "learning_rate": 2.3211339717923326e-06,
      "loss": 0.1226,
      "step": 5933
    },
    {
      "epoch": 0.5467360759202101,
      "grad_norm": 0.9005748837624358,
      "learning_rate": 2.320374149456822e-06,
      "loss": 0.1346,
      "step": 5934
    },
    {
      "epoch": 0.5468282120974801,
      "grad_norm": 0.9435439221997367,
      "learning_rate": 2.31961434379959e-06,
      "loss": 0.1442,
      "step": 5935
    },
    {
      "epoch": 0.5469203482747501,
      "grad_norm": 0.8486317533267757,
      "learning_rate": 2.3188545548911863e-06,
      "loss": 0.1176,
      "step": 5936
    },
    {
      "epoch": 0.5470124844520201,
      "grad_norm": 0.8973418783092466,
      "learning_rate": 2.3180947828021574e-06,
      "loss": 0.1255,
      "step": 5937
    },
    {
      "epoch": 0.5471046206292901,
      "grad_norm": 0.975856203440726,
      "learning_rate": 2.317335027603046e-06,
      "loss": 0.1382,
      "step": 5938
    },
    {
      "epoch": 0.5471967568065601,
      "grad_norm": 0.8448417831997888,
      "learning_rate": 2.3165752893643974e-06,
      "loss": 0.1218,
      "step": 5939
    },
    {
      "epoch": 0.5472888929838301,
      "grad_norm": 0.8631379001410238,
      "learning_rate": 2.315815568156753e-06,
      "loss": 0.1274,
      "step": 5940
    },
    {
      "epoch": 0.5473810291611001,
      "grad_norm": 0.8739460925822028,
      "learning_rate": 2.315055864050654e-06,
      "loss": 0.1343,
      "step": 5941
    },
    {
      "epoch": 0.5474731653383701,
      "grad_norm": 0.9056750460446128,
      "learning_rate": 2.314296177116637e-06,
      "loss": 0.1456,
      "step": 5942
    },
    {
      "epoch": 0.5475653015156401,
      "grad_norm": 0.8597999521946855,
      "learning_rate": 2.3135365074252393e-06,
      "loss": 0.1229,
      "step": 5943
    },
    {
      "epoch": 0.5476574376929101,
      "grad_norm": 0.9429380689039017,
      "learning_rate": 2.3127768550469977e-06,
      "loss": 0.1517,
      "step": 5944
    },
    {
      "epoch": 0.5477495738701801,
      "grad_norm": 0.9326983964366256,
      "learning_rate": 2.3120172200524456e-06,
      "loss": 0.1409,
      "step": 5945
    },
    {
      "epoch": 0.5478417100474501,
      "grad_norm": 0.9029183579785335,
      "learning_rate": 2.311257602512114e-06,
      "loss": 0.1375,
      "step": 5946
    },
    {
      "epoch": 0.5479338462247202,
      "grad_norm": 0.8405914319952673,
      "learning_rate": 2.310498002496535e-06,
      "loss": 0.125,
      "step": 5947
    },
    {
      "epoch": 0.5480259824019902,
      "grad_norm": 0.8963143530428416,
      "learning_rate": 2.309738420076236e-06,
      "loss": 0.1362,
      "step": 5948
    },
    {
      "epoch": 0.5481181185792602,
      "grad_norm": 0.9373725224516829,
      "learning_rate": 2.308978855321746e-06,
      "loss": 0.1381,
      "step": 5949
    },
    {
      "epoch": 0.5482102547565302,
      "grad_norm": 0.904479495310587,
      "learning_rate": 2.30821930830359e-06,
      "loss": 0.1344,
      "step": 5950
    },
    {
      "epoch": 0.5483023909338002,
      "grad_norm": 0.947211874283797,
      "learning_rate": 2.307459779092291e-06,
      "loss": 0.1391,
      "step": 5951
    },
    {
      "epoch": 0.5483945271110702,
      "grad_norm": 0.9137280527447117,
      "learning_rate": 2.306700267758373e-06,
      "loss": 0.1244,
      "step": 5952
    },
    {
      "epoch": 0.5484866632883402,
      "grad_norm": 0.887794582900808,
      "learning_rate": 2.3059407743723562e-06,
      "loss": 0.1236,
      "step": 5953
    },
    {
      "epoch": 0.5485787994656102,
      "grad_norm": 0.8957336178387945,
      "learning_rate": 2.305181299004758e-06,
      "loss": 0.1319,
      "step": 5954
    },
    {
      "epoch": 0.5486709356428802,
      "grad_norm": 0.9285491033008205,
      "learning_rate": 2.304421841726098e-06,
      "loss": 0.1375,
      "step": 5955
    },
    {
      "epoch": 0.5487630718201502,
      "grad_norm": 0.9000401889585357,
      "learning_rate": 2.303662402606891e-06,
      "loss": 0.1394,
      "step": 5956
    },
    {
      "epoch": 0.5488552079974202,
      "grad_norm": 0.8887392546394776,
      "learning_rate": 2.3029029817176513e-06,
      "loss": 0.1241,
      "step": 5957
    },
    {
      "epoch": 0.5489473441746902,
      "grad_norm": 0.9063498591456991,
      "learning_rate": 2.302143579128891e-06,
      "loss": 0.1452,
      "step": 5958
    },
    {
      "epoch": 0.5490394803519602,
      "grad_norm": 0.9179438604712449,
      "learning_rate": 2.30138419491112e-06,
      "loss": 0.1393,
      "step": 5959
    },
    {
      "epoch": 0.5491316165292301,
      "grad_norm": 0.9440096748055181,
      "learning_rate": 2.3006248291348483e-06,
      "loss": 0.1458,
      "step": 5960
    },
    {
      "epoch": 0.5492237527065003,
      "grad_norm": 0.8571329043484666,
      "learning_rate": 2.2998654818705824e-06,
      "loss": 0.1258,
      "step": 5961
    },
    {
      "epoch": 0.5493158888837703,
      "grad_norm": 0.974799513318454,
      "learning_rate": 2.2991061531888285e-06,
      "loss": 0.146,
      "step": 5962
    },
    {
      "epoch": 0.5494080250610403,
      "grad_norm": 0.9535479404809656,
      "learning_rate": 2.29834684316009e-06,
      "loss": 0.1387,
      "step": 5963
    },
    {
      "epoch": 0.5495001612383102,
      "grad_norm": 0.9528230339650521,
      "learning_rate": 2.297587551854868e-06,
      "loss": 0.1313,
      "step": 5964
    },
    {
      "epoch": 0.5495922974155802,
      "grad_norm": 0.9235402560277223,
      "learning_rate": 2.296828279343664e-06,
      "loss": 0.1291,
      "step": 5965
    },
    {
      "epoch": 0.5496844335928502,
      "grad_norm": 0.858766032670027,
      "learning_rate": 2.2960690256969774e-06,
      "loss": 0.1261,
      "step": 5966
    },
    {
      "epoch": 0.5497765697701202,
      "grad_norm": 1.0036102049390694,
      "learning_rate": 2.2953097909853018e-06,
      "loss": 0.15,
      "step": 5967
    },
    {
      "epoch": 0.5498687059473902,
      "grad_norm": 0.9197290368123798,
      "learning_rate": 2.294550575279135e-06,
      "loss": 0.1263,
      "step": 5968
    },
    {
      "epoch": 0.5499608421246602,
      "grad_norm": 0.9410944923888054,
      "learning_rate": 2.293791378648969e-06,
      "loss": 0.1267,
      "step": 5969
    },
    {
      "epoch": 0.5500529783019302,
      "grad_norm": 0.9187152074045628,
      "learning_rate": 2.2930322011652965e-06,
      "loss": 0.1332,
      "step": 5970
    },
    {
      "epoch": 0.5501451144792002,
      "grad_norm": 0.877662807921679,
      "learning_rate": 2.2922730428986057e-06,
      "loss": 0.1323,
      "step": 5971
    },
    {
      "epoch": 0.5502372506564702,
      "grad_norm": 0.9595621542293774,
      "learning_rate": 2.291513903919385e-06,
      "loss": 0.1587,
      "step": 5972
    },
    {
      "epoch": 0.5503293868337402,
      "grad_norm": 0.9391446872559864,
      "learning_rate": 2.2907547842981213e-06,
      "loss": 0.1322,
      "step": 5973
    },
    {
      "epoch": 0.5504215230110103,
      "grad_norm": 0.9539499349532112,
      "learning_rate": 2.289995684105299e-06,
      "loss": 0.1416,
      "step": 5974
    },
    {
      "epoch": 0.5505136591882803,
      "grad_norm": 0.9231926460585064,
      "learning_rate": 2.2892366034113988e-06,
      "loss": 0.1305,
      "step": 5975
    },
    {
      "epoch": 0.5506057953655503,
      "grad_norm": 0.9016548074632982,
      "learning_rate": 2.288477542286903e-06,
      "loss": 0.1392,
      "step": 5976
    },
    {
      "epoch": 0.5506979315428203,
      "grad_norm": 0.942515622309317,
      "learning_rate": 2.2877185008022896e-06,
      "loss": 0.143,
      "step": 5977
    },
    {
      "epoch": 0.5507900677200903,
      "grad_norm": 0.9349815447766366,
      "learning_rate": 2.2869594790280376e-06,
      "loss": 0.1456,
      "step": 5978
    },
    {
      "epoch": 0.5508822038973603,
      "grad_norm": 0.9109243804576193,
      "learning_rate": 2.2862004770346205e-06,
      "loss": 0.1407,
      "step": 5979
    },
    {
      "epoch": 0.5509743400746303,
      "grad_norm": 0.8404738991971081,
      "learning_rate": 2.285441494892511e-06,
      "loss": 0.1245,
      "step": 5980
    },
    {
      "epoch": 0.5510664762519003,
      "grad_norm": 0.9338626947780788,
      "learning_rate": 2.284682532672183e-06,
      "loss": 0.1445,
      "step": 5981
    },
    {
      "epoch": 0.5511586124291703,
      "grad_norm": 0.9020789497422819,
      "learning_rate": 2.2839235904441054e-06,
      "loss": 0.1344,
      "step": 5982
    },
    {
      "epoch": 0.5512507486064403,
      "grad_norm": 0.8908428591725923,
      "learning_rate": 2.2831646682787443e-06,
      "loss": 0.1343,
      "step": 5983
    },
    {
      "epoch": 0.5513428847837103,
      "grad_norm": 0.9079304182626624,
      "learning_rate": 2.282405766246568e-06,
      "loss": 0.1287,
      "step": 5984
    },
    {
      "epoch": 0.5514350209609803,
      "grad_norm": 0.8329410231373364,
      "learning_rate": 2.281646884418039e-06,
      "loss": 0.1135,
      "step": 5985
    },
    {
      "epoch": 0.5515271571382503,
      "grad_norm": 0.9381099953259999,
      "learning_rate": 2.280888022863621e-06,
      "loss": 0.1405,
      "step": 5986
    },
    {
      "epoch": 0.5516192933155203,
      "grad_norm": 0.8513722262685598,
      "learning_rate": 2.2801291816537738e-06,
      "loss": 0.1265,
      "step": 5987
    },
    {
      "epoch": 0.5517114294927904,
      "grad_norm": 0.8820089761459324,
      "learning_rate": 2.2793703608589547e-06,
      "loss": 0.1432,
      "step": 5988
    },
    {
      "epoch": 0.5518035656700604,
      "grad_norm": 0.9175146678877896,
      "learning_rate": 2.2786115605496224e-06,
      "loss": 0.1372,
      "step": 5989
    },
    {
      "epoch": 0.5518957018473304,
      "grad_norm": 0.8941107121696663,
      "learning_rate": 2.2778527807962297e-06,
      "loss": 0.1289,
      "step": 5990
    },
    {
      "epoch": 0.5519878380246004,
      "grad_norm": 0.9554274388492524,
      "learning_rate": 2.277094021669231e-06,
      "loss": 0.1451,
      "step": 5991
    },
    {
      "epoch": 0.5520799742018704,
      "grad_norm": 0.8202855336317009,
      "learning_rate": 2.2763352832390762e-06,
      "loss": 0.1209,
      "step": 5992
    },
    {
      "epoch": 0.5521721103791404,
      "grad_norm": 0.8890984039392089,
      "learning_rate": 2.2755765655762135e-06,
      "loss": 0.1273,
      "step": 5993
    },
    {
      "epoch": 0.5522642465564104,
      "grad_norm": 0.9178667132404417,
      "learning_rate": 2.2748178687510915e-06,
      "loss": 0.1426,
      "step": 5994
    },
    {
      "epoch": 0.5523563827336804,
      "grad_norm": 0.8546557060383881,
      "learning_rate": 2.2740591928341552e-06,
      "loss": 0.1335,
      "step": 5995
    },
    {
      "epoch": 0.5524485189109504,
      "grad_norm": 0.9204965701548999,
      "learning_rate": 2.2733005378958462e-06,
      "loss": 0.1343,
      "step": 5996
    },
    {
      "epoch": 0.5525406550882204,
      "grad_norm": 0.9234067877974934,
      "learning_rate": 2.2725419040066075e-06,
      "loss": 0.1346,
      "step": 5997
    },
    {
      "epoch": 0.5526327912654904,
      "grad_norm": 0.9161899474482855,
      "learning_rate": 2.2717832912368766e-06,
      "loss": 0.133,
      "step": 5998
    },
    {
      "epoch": 0.5527249274427604,
      "grad_norm": 0.8984198063815197,
      "learning_rate": 2.271024699657093e-06,
      "loss": 0.1397,
      "step": 5999
    },
    {
      "epoch": 0.5528170636200304,
      "grad_norm": 0.8464187626712723,
      "learning_rate": 2.2702661293376895e-06,
      "loss": 0.1195,
      "step": 6000
    },
    {
      "epoch": 0.5528170636200304,
      "eval_loss": 0.13545145094394684,
      "eval_runtime": 299.9711,
      "eval_samples_per_second": 23.392,
      "eval_steps_per_second": 2.927,
      "step": 6000
    },
    {
      "epoch": 0.5529091997973004,
      "grad_norm": 0.8673690915958489,
      "learning_rate": 2.269507580349101e-06,
      "loss": 0.1426,
      "step": 6001
    },
    {
      "epoch": 0.5530013359745705,
      "grad_norm": 0.8595959230817573,
      "learning_rate": 2.2687490527617575e-06,
      "loss": 0.1308,
      "step": 6002
    },
    {
      "epoch": 0.5530934721518405,
      "grad_norm": 0.8980906183910887,
      "learning_rate": 2.2679905466460917e-06,
      "loss": 0.1319,
      "step": 6003
    },
    {
      "epoch": 0.5531856083291105,
      "grad_norm": 0.9618260702341198,
      "learning_rate": 2.2672320620725265e-06,
      "loss": 0.143,
      "step": 6004
    },
    {
      "epoch": 0.5532777445063805,
      "grad_norm": 0.8827191753043445,
      "learning_rate": 2.2664735991114893e-06,
      "loss": 0.1325,
      "step": 6005
    },
    {
      "epoch": 0.5533698806836505,
      "grad_norm": 0.9068695564847284,
      "learning_rate": 2.2657151578334046e-06,
      "loss": 0.131,
      "step": 6006
    },
    {
      "epoch": 0.5534620168609204,
      "grad_norm": 0.895570377791969,
      "learning_rate": 2.264956738308693e-06,
      "loss": 0.1286,
      "step": 6007
    },
    {
      "epoch": 0.5535541530381904,
      "grad_norm": 0.8658652374176646,
      "learning_rate": 2.2641983406077726e-06,
      "loss": 0.1287,
      "step": 6008
    },
    {
      "epoch": 0.5536462892154604,
      "grad_norm": 0.9105693225657342,
      "learning_rate": 2.2634399648010623e-06,
      "loss": 0.1358,
      "step": 6009
    },
    {
      "epoch": 0.5537384253927304,
      "grad_norm": 0.9705522735493408,
      "learning_rate": 2.262681610958976e-06,
      "loss": 0.1461,
      "step": 6010
    },
    {
      "epoch": 0.5538305615700004,
      "grad_norm": 0.9386721602193085,
      "learning_rate": 2.2619232791519287e-06,
      "loss": 0.1358,
      "step": 6011
    },
    {
      "epoch": 0.5539226977472704,
      "grad_norm": 0.8948899091915336,
      "learning_rate": 2.26116496945033e-06,
      "loss": 0.1298,
      "step": 6012
    },
    {
      "epoch": 0.5540148339245404,
      "grad_norm": 0.9849000608057639,
      "learning_rate": 2.260406681924589e-06,
      "loss": 0.1398,
      "step": 6013
    },
    {
      "epoch": 0.5541069701018104,
      "grad_norm": 0.9402527148448475,
      "learning_rate": 2.2596484166451136e-06,
      "loss": 0.1393,
      "step": 6014
    },
    {
      "epoch": 0.5541991062790805,
      "grad_norm": 0.8512564356361474,
      "learning_rate": 2.2588901736823087e-06,
      "loss": 0.1195,
      "step": 6015
    },
    {
      "epoch": 0.5542912424563505,
      "grad_norm": 0.9600645116664145,
      "learning_rate": 2.2581319531065777e-06,
      "loss": 0.1411,
      "step": 6016
    },
    {
      "epoch": 0.5543833786336205,
      "grad_norm": 0.9064131823119506,
      "learning_rate": 2.257373754988321e-06,
      "loss": 0.1265,
      "step": 6017
    },
    {
      "epoch": 0.5544755148108905,
      "grad_norm": 0.9335295798432109,
      "learning_rate": 2.256615579397936e-06,
      "loss": 0.1339,
      "step": 6018
    },
    {
      "epoch": 0.5545676509881605,
      "grad_norm": 0.9618056064055769,
      "learning_rate": 2.2558574264058218e-06,
      "loss": 0.145,
      "step": 6019
    },
    {
      "epoch": 0.5546597871654305,
      "grad_norm": 0.8809660515422949,
      "learning_rate": 2.255099296082372e-06,
      "loss": 0.1253,
      "step": 6020
    },
    {
      "epoch": 0.5547519233427005,
      "grad_norm": 0.8893338573120851,
      "learning_rate": 2.2543411884979775e-06,
      "loss": 0.131,
      "step": 6021
    },
    {
      "epoch": 0.5548440595199705,
      "grad_norm": 0.8911158292533096,
      "learning_rate": 2.2535831037230313e-06,
      "loss": 0.129,
      "step": 6022
    },
    {
      "epoch": 0.5549361956972405,
      "grad_norm": 0.931882441099175,
      "learning_rate": 2.2528250418279196e-06,
      "loss": 0.1421,
      "step": 6023
    },
    {
      "epoch": 0.5550283318745105,
      "grad_norm": 0.9395929443411103,
      "learning_rate": 2.2520670028830305e-06,
      "loss": 0.1337,
      "step": 6024
    },
    {
      "epoch": 0.5551204680517805,
      "grad_norm": 0.9529079984593413,
      "learning_rate": 2.251308986958746e-06,
      "loss": 0.1342,
      "step": 6025
    },
    {
      "epoch": 0.5552126042290505,
      "grad_norm": 0.9295137492491802,
      "learning_rate": 2.250550994125449e-06,
      "loss": 0.1377,
      "step": 6026
    },
    {
      "epoch": 0.5553047404063205,
      "grad_norm": 0.9283395008536116,
      "learning_rate": 2.249793024453519e-06,
      "loss": 0.1243,
      "step": 6027
    },
    {
      "epoch": 0.5553968765835905,
      "grad_norm": 0.9643312312736612,
      "learning_rate": 2.2490350780133344e-06,
      "loss": 0.1447,
      "step": 6028
    },
    {
      "epoch": 0.5554890127608606,
      "grad_norm": 0.9532369619786419,
      "learning_rate": 2.2482771548752684e-06,
      "loss": 0.137,
      "step": 6029
    },
    {
      "epoch": 0.5555811489381306,
      "grad_norm": 0.9746110125718069,
      "learning_rate": 2.247519255109697e-06,
      "loss": 0.1369,
      "step": 6030
    },
    {
      "epoch": 0.5556732851154006,
      "grad_norm": 0.9429182322625936,
      "learning_rate": 2.2467613787869886e-06,
      "loss": 0.1387,
      "step": 6031
    },
    {
      "epoch": 0.5557654212926706,
      "grad_norm": 0.8952924660189465,
      "learning_rate": 2.2460035259775147e-06,
      "loss": 0.1373,
      "step": 6032
    },
    {
      "epoch": 0.5558575574699406,
      "grad_norm": 0.8812945184571047,
      "learning_rate": 2.2452456967516404e-06,
      "loss": 0.1349,
      "step": 6033
    },
    {
      "epoch": 0.5559496936472106,
      "grad_norm": 0.9285111333188203,
      "learning_rate": 2.2444878911797295e-06,
      "loss": 0.1378,
      "step": 6034
    },
    {
      "epoch": 0.5560418298244806,
      "grad_norm": 0.9269768243193539,
      "learning_rate": 2.2437301093321467e-06,
      "loss": 0.149,
      "step": 6035
    },
    {
      "epoch": 0.5561339660017506,
      "grad_norm": 0.9081816960352027,
      "learning_rate": 2.242972351279251e-06,
      "loss": 0.133,
      "step": 6036
    },
    {
      "epoch": 0.5562261021790206,
      "grad_norm": 0.9203965936383334,
      "learning_rate": 2.242214617091399e-06,
      "loss": 0.1269,
      "step": 6037
    },
    {
      "epoch": 0.5563182383562906,
      "grad_norm": 0.8587985077915162,
      "learning_rate": 2.241456906838948e-06,
      "loss": 0.1248,
      "step": 6038
    },
    {
      "epoch": 0.5564103745335606,
      "grad_norm": 0.9433740268698593,
      "learning_rate": 2.2406992205922506e-06,
      "loss": 0.1332,
      "step": 6039
    },
    {
      "epoch": 0.5565025107108306,
      "grad_norm": 0.8494531923033956,
      "learning_rate": 2.2399415584216595e-06,
      "loss": 0.1199,
      "step": 6040
    },
    {
      "epoch": 0.5565946468881006,
      "grad_norm": 0.8689474784825143,
      "learning_rate": 2.2391839203975225e-06,
      "loss": 0.1291,
      "step": 6041
    },
    {
      "epoch": 0.5566867830653707,
      "grad_norm": 0.9208796138126546,
      "learning_rate": 2.238426306590186e-06,
      "loss": 0.1504,
      "step": 6042
    },
    {
      "epoch": 0.5567789192426407,
      "grad_norm": 0.9175005106338083,
      "learning_rate": 2.237668717069995e-06,
      "loss": 0.1338,
      "step": 6043
    },
    {
      "epoch": 0.5568710554199107,
      "grad_norm": 0.9590343105192461,
      "learning_rate": 2.2369111519072917e-06,
      "loss": 0.1402,
      "step": 6044
    },
    {
      "epoch": 0.5569631915971807,
      "grad_norm": 0.9352154416418151,
      "learning_rate": 2.2361536111724176e-06,
      "loss": 0.1285,
      "step": 6045
    },
    {
      "epoch": 0.5570553277744507,
      "grad_norm": 0.9108642890336781,
      "learning_rate": 2.2353960949357082e-06,
      "loss": 0.1327,
      "step": 6046
    },
    {
      "epoch": 0.5571474639517207,
      "grad_norm": 0.9125190233288996,
      "learning_rate": 2.2346386032674995e-06,
      "loss": 0.137,
      "step": 6047
    },
    {
      "epoch": 0.5572396001289907,
      "grad_norm": 0.9423488359590013,
      "learning_rate": 2.2338811362381256e-06,
      "loss": 0.1324,
      "step": 6048
    },
    {
      "epoch": 0.5573317363062606,
      "grad_norm": 0.9392634101242259,
      "learning_rate": 2.233123693917917e-06,
      "loss": 0.1322,
      "step": 6049
    },
    {
      "epoch": 0.5574238724835306,
      "grad_norm": 0.908005657369545,
      "learning_rate": 2.232366276377201e-06,
      "loss": 0.1258,
      "step": 6050
    },
    {
      "epoch": 0.5575160086608006,
      "grad_norm": 0.8968615049336631,
      "learning_rate": 2.2316088836863064e-06,
      "loss": 0.1243,
      "step": 6051
    },
    {
      "epoch": 0.5576081448380706,
      "grad_norm": 0.9926119163177404,
      "learning_rate": 2.2308515159155546e-06,
      "loss": 0.1417,
      "step": 6052
    },
    {
      "epoch": 0.5577002810153406,
      "grad_norm": 0.9075124724568028,
      "learning_rate": 2.23009417313527e-06,
      "loss": 0.1259,
      "step": 6053
    },
    {
      "epoch": 0.5577924171926106,
      "grad_norm": 0.9618556843783079,
      "learning_rate": 2.2293368554157695e-06,
      "loss": 0.1311,
      "step": 6054
    },
    {
      "epoch": 0.5578845533698806,
      "grad_norm": 1.0245024369165074,
      "learning_rate": 2.228579562827371e-06,
      "loss": 0.1452,
      "step": 6055
    },
    {
      "epoch": 0.5579766895471507,
      "grad_norm": 0.9542187116021007,
      "learning_rate": 2.2278222954403895e-06,
      "loss": 0.1345,
      "step": 6056
    },
    {
      "epoch": 0.5580688257244207,
      "grad_norm": 1.026635884315615,
      "learning_rate": 2.2270650533251383e-06,
      "loss": 0.1531,
      "step": 6057
    },
    {
      "epoch": 0.5581609619016907,
      "grad_norm": 0.9103746701579992,
      "learning_rate": 2.2263078365519244e-06,
      "loss": 0.1315,
      "step": 6058
    },
    {
      "epoch": 0.5582530980789607,
      "grad_norm": 0.8750502768735856,
      "learning_rate": 2.2255506451910584e-06,
      "loss": 0.1389,
      "step": 6059
    },
    {
      "epoch": 0.5583452342562307,
      "grad_norm": 1.0348797840452786,
      "learning_rate": 2.2247934793128436e-06,
      "loss": 0.1507,
      "step": 6060
    },
    {
      "epoch": 0.5584373704335007,
      "grad_norm": 0.9387319109433172,
      "learning_rate": 2.224036338987585e-06,
      "loss": 0.1416,
      "step": 6061
    },
    {
      "epoch": 0.5585295066107707,
      "grad_norm": 0.9648857566576942,
      "learning_rate": 2.223279224285582e-06,
      "loss": 0.1465,
      "step": 6062
    },
    {
      "epoch": 0.5586216427880407,
      "grad_norm": 0.9512060091262989,
      "learning_rate": 2.2225221352771316e-06,
      "loss": 0.1333,
      "step": 6063
    },
    {
      "epoch": 0.5587137789653107,
      "grad_norm": 0.8818452555644942,
      "learning_rate": 2.221765072032532e-06,
      "loss": 0.1354,
      "step": 6064
    },
    {
      "epoch": 0.5588059151425807,
      "grad_norm": 0.9595230313609375,
      "learning_rate": 2.2210080346220755e-06,
      "loss": 0.1495,
      "step": 6065
    },
    {
      "epoch": 0.5588980513198507,
      "grad_norm": 0.9406934954772517,
      "learning_rate": 2.220251023116052e-06,
      "loss": 0.1395,
      "step": 6066
    },
    {
      "epoch": 0.5589901874971207,
      "grad_norm": 1.0184402402535877,
      "learning_rate": 2.2194940375847517e-06,
      "loss": 0.1491,
      "step": 6067
    },
    {
      "epoch": 0.5590823236743907,
      "grad_norm": 0.9912445760663547,
      "learning_rate": 2.2187370780984596e-06,
      "loss": 0.1489,
      "step": 6068
    },
    {
      "epoch": 0.5591744598516608,
      "grad_norm": 0.94668807549237,
      "learning_rate": 2.2179801447274613e-06,
      "loss": 0.1385,
      "step": 6069
    },
    {
      "epoch": 0.5592665960289308,
      "grad_norm": 0.9082934245577071,
      "learning_rate": 2.217223237542036e-06,
      "loss": 0.1235,
      "step": 6070
    },
    {
      "epoch": 0.5593587322062008,
      "grad_norm": 0.9420787600329253,
      "learning_rate": 2.2164663566124635e-06,
      "loss": 0.1376,
      "step": 6071
    },
    {
      "epoch": 0.5594508683834708,
      "grad_norm": 0.8741240273373798,
      "learning_rate": 2.2157095020090207e-06,
      "loss": 0.1316,
      "step": 6072
    },
    {
      "epoch": 0.5595430045607408,
      "grad_norm": 0.8657043268109199,
      "learning_rate": 2.2149526738019802e-06,
      "loss": 0.1233,
      "step": 6073
    },
    {
      "epoch": 0.5596351407380108,
      "grad_norm": 0.8732124884259539,
      "learning_rate": 2.2141958720616163e-06,
      "loss": 0.1279,
      "step": 6074
    },
    {
      "epoch": 0.5597272769152808,
      "grad_norm": 0.8525074108717983,
      "learning_rate": 2.2134390968581958e-06,
      "loss": 0.1243,
      "step": 6075
    },
    {
      "epoch": 0.5598194130925508,
      "grad_norm": 0.8487356309861768,
      "learning_rate": 2.212682348261985e-06,
      "loss": 0.1242,
      "step": 6076
    },
    {
      "epoch": 0.5599115492698208,
      "grad_norm": 0.9289207090322201,
      "learning_rate": 2.21192562634325e-06,
      "loss": 0.1336,
      "step": 6077
    },
    {
      "epoch": 0.5600036854470908,
      "grad_norm": 0.8841161908632112,
      "learning_rate": 2.2111689311722524e-06,
      "loss": 0.1327,
      "step": 6078
    },
    {
      "epoch": 0.5600958216243608,
      "grad_norm": 0.9007258520902498,
      "learning_rate": 2.210412262819249e-06,
      "loss": 0.1314,
      "step": 6079
    },
    {
      "epoch": 0.5601879578016308,
      "grad_norm": 0.9034049217075238,
      "learning_rate": 2.209655621354499e-06,
      "loss": 0.1288,
      "step": 6080
    },
    {
      "epoch": 0.5602800939789008,
      "grad_norm": 1.0561927888352458,
      "learning_rate": 2.2088990068482554e-06,
      "loss": 0.1638,
      "step": 6081
    },
    {
      "epoch": 0.5603722301561708,
      "grad_norm": 0.9263479169980393,
      "learning_rate": 2.208142419370771e-06,
      "loss": 0.1286,
      "step": 6082
    },
    {
      "epoch": 0.5604643663334409,
      "grad_norm": 0.876129774276463,
      "learning_rate": 2.207385858992294e-06,
      "loss": 0.1303,
      "step": 6083
    },
    {
      "epoch": 0.5605565025107109,
      "grad_norm": 0.8577719677238493,
      "learning_rate": 2.206629325783071e-06,
      "loss": 0.1278,
      "step": 6084
    },
    {
      "epoch": 0.5606486386879809,
      "grad_norm": 0.9145085290177857,
      "learning_rate": 2.2058728198133466e-06,
      "loss": 0.1449,
      "step": 6085
    },
    {
      "epoch": 0.5607407748652509,
      "grad_norm": 0.9125478368838313,
      "learning_rate": 2.2051163411533644e-06,
      "loss": 0.1308,
      "step": 6086
    },
    {
      "epoch": 0.5608329110425209,
      "grad_norm": 0.8282140216339626,
      "learning_rate": 2.2043598898733597e-06,
      "loss": 0.1243,
      "step": 6087
    },
    {
      "epoch": 0.5609250472197909,
      "grad_norm": 0.8843568534788926,
      "learning_rate": 2.2036034660435714e-06,
      "loss": 0.1404,
      "step": 6088
    },
    {
      "epoch": 0.5610171833970609,
      "grad_norm": 0.9683614071935065,
      "learning_rate": 2.2028470697342334e-06,
      "loss": 0.1459,
      "step": 6089
    },
    {
      "epoch": 0.5611093195743309,
      "grad_norm": 0.9027082869152945,
      "learning_rate": 2.2020907010155775e-06,
      "loss": 0.136,
      "step": 6090
    },
    {
      "epoch": 0.5612014557516009,
      "grad_norm": 0.9025715692301286,
      "learning_rate": 2.2013343599578314e-06,
      "loss": 0.1334,
      "step": 6091
    },
    {
      "epoch": 0.5612935919288708,
      "grad_norm": 1.0019084022146867,
      "learning_rate": 2.2005780466312224e-06,
      "loss": 0.1453,
      "step": 6092
    },
    {
      "epoch": 0.5613857281061408,
      "grad_norm": 0.8716930577679224,
      "learning_rate": 2.1998217611059733e-06,
      "loss": 0.118,
      "step": 6093
    },
    {
      "epoch": 0.5614778642834108,
      "grad_norm": 1.0009481851360602,
      "learning_rate": 2.1990655034523073e-06,
      "loss": 0.1483,
      "step": 6094
    },
    {
      "epoch": 0.5615700004606808,
      "grad_norm": 0.900153344517744,
      "learning_rate": 2.198309273740441e-06,
      "loss": 0.1184,
      "step": 6095
    },
    {
      "epoch": 0.5616621366379508,
      "grad_norm": 0.9337923240361191,
      "learning_rate": 2.1975530720405906e-06,
      "loss": 0.1316,
      "step": 6096
    },
    {
      "epoch": 0.561754272815221,
      "grad_norm": 0.8799886004445121,
      "learning_rate": 2.1967968984229704e-06,
      "loss": 0.131,
      "step": 6097
    },
    {
      "epoch": 0.5618464089924909,
      "grad_norm": 0.9528519591932912,
      "learning_rate": 2.1960407529577917e-06,
      "loss": 0.1323,
      "step": 6098
    },
    {
      "epoch": 0.5619385451697609,
      "grad_norm": 0.9216207218465383,
      "learning_rate": 2.1952846357152603e-06,
      "loss": 0.1332,
      "step": 6099
    },
    {
      "epoch": 0.5620306813470309,
      "grad_norm": 0.902665622003663,
      "learning_rate": 2.1945285467655843e-06,
      "loss": 0.1312,
      "step": 6100
    },
    {
      "epoch": 0.5621228175243009,
      "grad_norm": 0.9401051874207889,
      "learning_rate": 2.1937724861789645e-06,
      "loss": 0.1443,
      "step": 6101
    },
    {
      "epoch": 0.5622149537015709,
      "grad_norm": 0.9345134562655586,
      "learning_rate": 2.1930164540256035e-06,
      "loss": 0.1343,
      "step": 6102
    },
    {
      "epoch": 0.5623070898788409,
      "grad_norm": 0.8892763251463518,
      "learning_rate": 2.1922604503756977e-06,
      "loss": 0.1312,
      "step": 6103
    },
    {
      "epoch": 0.5623992260561109,
      "grad_norm": 0.8633253656360871,
      "learning_rate": 2.1915044752994417e-06,
      "loss": 0.1254,
      "step": 6104
    },
    {
      "epoch": 0.5624913622333809,
      "grad_norm": 0.9320587345393514,
      "learning_rate": 2.1907485288670288e-06,
      "loss": 0.1367,
      "step": 6105
    },
    {
      "epoch": 0.5625834984106509,
      "grad_norm": 0.889817249819364,
      "learning_rate": 2.1899926111486473e-06,
      "loss": 0.1374,
      "step": 6106
    },
    {
      "epoch": 0.5626756345879209,
      "grad_norm": 0.9262828568720447,
      "learning_rate": 2.1892367222144863e-06,
      "loss": 0.1449,
      "step": 6107
    },
    {
      "epoch": 0.5627677707651909,
      "grad_norm": 0.8659566791382182,
      "learning_rate": 2.1884808621347288e-06,
      "loss": 0.1249,
      "step": 6108
    },
    {
      "epoch": 0.5628599069424609,
      "grad_norm": 0.9488609398698363,
      "learning_rate": 2.1877250309795565e-06,
      "loss": 0.1458,
      "step": 6109
    },
    {
      "epoch": 0.562952043119731,
      "grad_norm": 0.9129507239437383,
      "learning_rate": 2.186969228819149e-06,
      "loss": 0.1356,
      "step": 6110
    },
    {
      "epoch": 0.563044179297001,
      "grad_norm": 0.9234955497308438,
      "learning_rate": 2.1862134557236826e-06,
      "loss": 0.1358,
      "step": 6111
    },
    {
      "epoch": 0.563136315474271,
      "grad_norm": 0.9315935822477422,
      "learning_rate": 2.1854577117633297e-06,
      "loss": 0.1459,
      "step": 6112
    },
    {
      "epoch": 0.563228451651541,
      "grad_norm": 0.961404001994605,
      "learning_rate": 2.1847019970082628e-06,
      "loss": 0.1366,
      "step": 6113
    },
    {
      "epoch": 0.563320587828811,
      "grad_norm": 0.8541689771391625,
      "learning_rate": 2.1839463115286484e-06,
      "loss": 0.1188,
      "step": 6114
    },
    {
      "epoch": 0.563412724006081,
      "grad_norm": 0.9470634734872366,
      "learning_rate": 2.183190655394655e-06,
      "loss": 0.1404,
      "step": 6115
    },
    {
      "epoch": 0.563504860183351,
      "grad_norm": 0.9911048928912876,
      "learning_rate": 2.182435028676442e-06,
      "loss": 0.1413,
      "step": 6116
    },
    {
      "epoch": 0.563596996360621,
      "grad_norm": 0.9182342845541454,
      "learning_rate": 2.1816794314441704e-06,
      "loss": 0.1234,
      "step": 6117
    },
    {
      "epoch": 0.563689132537891,
      "grad_norm": 0.9403718290802439,
      "learning_rate": 2.1809238637679984e-06,
      "loss": 0.1295,
      "step": 6118
    },
    {
      "epoch": 0.563781268715161,
      "grad_norm": 0.9045271472960527,
      "learning_rate": 2.1801683257180807e-06,
      "loss": 0.1291,
      "step": 6119
    },
    {
      "epoch": 0.563873404892431,
      "grad_norm": 0.9327840459682971,
      "learning_rate": 2.179412817364567e-06,
      "loss": 0.1305,
      "step": 6120
    },
    {
      "epoch": 0.563965541069701,
      "grad_norm": 0.9333718027134292,
      "learning_rate": 2.1786573387776085e-06,
      "loss": 0.1385,
      "step": 6121
    },
    {
      "epoch": 0.564057677246971,
      "grad_norm": 0.9100700904648433,
      "learning_rate": 2.17790189002735e-06,
      "loss": 0.1284,
      "step": 6122
    },
    {
      "epoch": 0.564149813424241,
      "grad_norm": 0.9593641910292168,
      "learning_rate": 2.177146471183937e-06,
      "loss": 0.1342,
      "step": 6123
    },
    {
      "epoch": 0.5642419496015111,
      "grad_norm": 0.968082559377105,
      "learning_rate": 2.176391082317508e-06,
      "loss": 0.141,
      "step": 6124
    },
    {
      "epoch": 0.5643340857787811,
      "grad_norm": 0.9149483271324426,
      "learning_rate": 2.175635723498201e-06,
      "loss": 0.1378,
      "step": 6125
    },
    {
      "epoch": 0.5644262219560511,
      "grad_norm": 0.8897063217512015,
      "learning_rate": 2.1748803947961533e-06,
      "loss": 0.1264,
      "step": 6126
    },
    {
      "epoch": 0.5645183581333211,
      "grad_norm": 0.8974960547750435,
      "learning_rate": 2.174125096281496e-06,
      "loss": 0.1409,
      "step": 6127
    },
    {
      "epoch": 0.5646104943105911,
      "grad_norm": 0.9182853643197407,
      "learning_rate": 2.1733698280243578e-06,
      "loss": 0.1307,
      "step": 6128
    },
    {
      "epoch": 0.5647026304878611,
      "grad_norm": 0.8913059045600129,
      "learning_rate": 2.1726145900948664e-06,
      "loss": 0.1326,
      "step": 6129
    },
    {
      "epoch": 0.5647947666651311,
      "grad_norm": 0.9202971875767432,
      "learning_rate": 2.1718593825631454e-06,
      "loss": 0.1407,
      "step": 6130
    },
    {
      "epoch": 0.5648869028424011,
      "grad_norm": 0.9154564708205672,
      "learning_rate": 2.1711042054993164e-06,
      "loss": 0.144,
      "step": 6131
    },
    {
      "epoch": 0.5649790390196711,
      "grad_norm": 0.9188105344844977,
      "learning_rate": 2.1703490589734976e-06,
      "loss": 0.1406,
      "step": 6132
    },
    {
      "epoch": 0.5650711751969411,
      "grad_norm": 0.9213698353945419,
      "learning_rate": 2.1695939430558035e-06,
      "loss": 0.1337,
      "step": 6133
    },
    {
      "epoch": 0.565163311374211,
      "grad_norm": 0.9162759583166316,
      "learning_rate": 2.1688388578163476e-06,
      "loss": 0.1334,
      "step": 6134
    },
    {
      "epoch": 0.565255447551481,
      "grad_norm": 0.941951590936026,
      "learning_rate": 2.168083803325239e-06,
      "loss": 0.1333,
      "step": 6135
    },
    {
      "epoch": 0.565347583728751,
      "grad_norm": 0.9398691081517228,
      "learning_rate": 2.167328779652586e-06,
      "loss": 0.134,
      "step": 6136
    },
    {
      "epoch": 0.5654397199060212,
      "grad_norm": 0.9567818118407774,
      "learning_rate": 2.166573786868491e-06,
      "loss": 0.1385,
      "step": 6137
    },
    {
      "epoch": 0.5655318560832912,
      "grad_norm": 0.9417036150379874,
      "learning_rate": 2.1658188250430556e-06,
      "loss": 0.1428,
      "step": 6138
    },
    {
      "epoch": 0.5656239922605611,
      "grad_norm": 0.8769247131311982,
      "learning_rate": 2.1650638942463785e-06,
      "loss": 0.131,
      "step": 6139
    },
    {
      "epoch": 0.5657161284378311,
      "grad_norm": 0.9078529735616421,
      "learning_rate": 2.1643089945485555e-06,
      "loss": 0.1335,
      "step": 6140
    },
    {
      "epoch": 0.5658082646151011,
      "grad_norm": 0.9324124879098438,
      "learning_rate": 2.163554126019677e-06,
      "loss": 0.143,
      "step": 6141
    },
    {
      "epoch": 0.5659004007923711,
      "grad_norm": 0.9048706928934882,
      "learning_rate": 2.162799288729835e-06,
      "loss": 0.1359,
      "step": 6142
    },
    {
      "epoch": 0.5659925369696411,
      "grad_norm": 0.9352246348847684,
      "learning_rate": 2.162044482749115e-06,
      "loss": 0.142,
      "step": 6143
    },
    {
      "epoch": 0.5660846731469111,
      "grad_norm": 0.911385629437897,
      "learning_rate": 2.161289708147602e-06,
      "loss": 0.1347,
      "step": 6144
    },
    {
      "epoch": 0.5661768093241811,
      "grad_norm": 1.011126666761643,
      "learning_rate": 2.1605349649953756e-06,
      "loss": 0.1568,
      "step": 6145
    },
    {
      "epoch": 0.5662689455014511,
      "grad_norm": 0.8963212952587003,
      "learning_rate": 2.1597802533625135e-06,
      "loss": 0.126,
      "step": 6146
    },
    {
      "epoch": 0.5663610816787211,
      "grad_norm": 0.8806494895413713,
      "learning_rate": 2.159025573319092e-06,
      "loss": 0.129,
      "step": 6147
    },
    {
      "epoch": 0.5664532178559911,
      "grad_norm": 0.9027048432779949,
      "learning_rate": 2.1582709249351834e-06,
      "loss": 0.1256,
      "step": 6148
    },
    {
      "epoch": 0.5665453540332611,
      "grad_norm": 0.926098276509541,
      "learning_rate": 2.157516308280855e-06,
      "loss": 0.1377,
      "step": 6149
    },
    {
      "epoch": 0.5666374902105311,
      "grad_norm": 0.8869498269399431,
      "learning_rate": 2.156761723426175e-06,
      "loss": 0.1191,
      "step": 6150
    },
    {
      "epoch": 0.5667296263878012,
      "grad_norm": 0.9599949611293014,
      "learning_rate": 2.1560071704412052e-06,
      "loss": 0.1386,
      "step": 6151
    },
    {
      "epoch": 0.5668217625650712,
      "grad_norm": 0.9464471559188455,
      "learning_rate": 2.155252649396008e-06,
      "loss": 0.124,
      "step": 6152
    },
    {
      "epoch": 0.5669138987423412,
      "grad_norm": 0.9102320016506573,
      "learning_rate": 2.1544981603606386e-06,
      "loss": 0.131,
      "step": 6153
    },
    {
      "epoch": 0.5670060349196112,
      "grad_norm": 0.90157281915478,
      "learning_rate": 2.1537437034051516e-06,
      "loss": 0.1221,
      "step": 6154
    },
    {
      "epoch": 0.5670981710968812,
      "grad_norm": 0.9285510101392009,
      "learning_rate": 2.1529892785995996e-06,
      "loss": 0.1279,
      "step": 6155
    },
    {
      "epoch": 0.5671903072741512,
      "grad_norm": 0.9514959766388984,
      "learning_rate": 2.152234886014031e-06,
      "loss": 0.1314,
      "step": 6156
    },
    {
      "epoch": 0.5672824434514212,
      "grad_norm": 0.9429262054723736,
      "learning_rate": 2.1514805257184894e-06,
      "loss": 0.1375,
      "step": 6157
    },
    {
      "epoch": 0.5673745796286912,
      "grad_norm": 0.9573607033483201,
      "learning_rate": 2.1507261977830198e-06,
      "loss": 0.1345,
      "step": 6158
    },
    {
      "epoch": 0.5674667158059612,
      "grad_norm": 0.9221981689933934,
      "learning_rate": 2.1499719022776588e-06,
      "loss": 0.1385,
      "step": 6159
    },
    {
      "epoch": 0.5675588519832312,
      "grad_norm": 0.9011655547573784,
      "learning_rate": 2.149217639272445e-06,
      "loss": 0.1306,
      "step": 6160
    },
    {
      "epoch": 0.5676509881605012,
      "grad_norm": 0.9171495889253741,
      "learning_rate": 2.1484634088374124e-06,
      "loss": 0.1327,
      "step": 6161
    },
    {
      "epoch": 0.5677431243377712,
      "grad_norm": 1.0023115963468265,
      "learning_rate": 2.1477092110425887e-06,
      "loss": 0.1435,
      "step": 6162
    },
    {
      "epoch": 0.5678352605150412,
      "grad_norm": 0.9694155127507335,
      "learning_rate": 2.1469550459580025e-06,
      "loss": 0.1493,
      "step": 6163
    },
    {
      "epoch": 0.5679273966923112,
      "grad_norm": 0.8953821703774014,
      "learning_rate": 2.1462009136536787e-06,
      "loss": 0.1302,
      "step": 6164
    },
    {
      "epoch": 0.5680195328695813,
      "grad_norm": 0.9109471052698274,
      "learning_rate": 2.145446814199639e-06,
      "loss": 0.1317,
      "step": 6165
    },
    {
      "epoch": 0.5681116690468513,
      "grad_norm": 0.9592739524875322,
      "learning_rate": 2.1446927476658996e-06,
      "loss": 0.1398,
      "step": 6166
    },
    {
      "epoch": 0.5682038052241213,
      "grad_norm": 0.8768594453644337,
      "learning_rate": 2.1439387141224775e-06,
      "loss": 0.1232,
      "step": 6167
    },
    {
      "epoch": 0.5682959414013913,
      "grad_norm": 0.894972250282082,
      "learning_rate": 2.1431847136393832e-06,
      "loss": 0.1311,
      "step": 6168
    },
    {
      "epoch": 0.5683880775786613,
      "grad_norm": 0.8918939489151908,
      "learning_rate": 2.1424307462866283e-06,
      "loss": 0.1292,
      "step": 6169
    },
    {
      "epoch": 0.5684802137559313,
      "grad_norm": 0.902243814029342,
      "learning_rate": 2.141676812134216e-06,
      "loss": 0.1217,
      "step": 6170
    },
    {
      "epoch": 0.5685723499332013,
      "grad_norm": 0.971252940336675,
      "learning_rate": 2.1409229112521498e-06,
      "loss": 0.1394,
      "step": 6171
    },
    {
      "epoch": 0.5686644861104713,
      "grad_norm": 0.9021673999338276,
      "learning_rate": 2.1401690437104306e-06,
      "loss": 0.1227,
      "step": 6172
    },
    {
      "epoch": 0.5687566222877413,
      "grad_norm": 0.9182448929575698,
      "learning_rate": 2.139415209579055e-06,
      "loss": 0.1361,
      "step": 6173
    },
    {
      "epoch": 0.5688487584650113,
      "grad_norm": 0.9163926727615889,
      "learning_rate": 2.1386614089280145e-06,
      "loss": 0.1446,
      "step": 6174
    },
    {
      "epoch": 0.5689408946422813,
      "grad_norm": 0.9611220324654529,
      "learning_rate": 2.137907641827302e-06,
      "loss": 0.1437,
      "step": 6175
    },
    {
      "epoch": 0.5690330308195513,
      "grad_norm": 0.918237967741333,
      "learning_rate": 2.1371539083469033e-06,
      "loss": 0.1397,
      "step": 6176
    },
    {
      "epoch": 0.5691251669968213,
      "grad_norm": 0.9040728280725138,
      "learning_rate": 2.1364002085568046e-06,
      "loss": 0.1355,
      "step": 6177
    },
    {
      "epoch": 0.5692173031740914,
      "grad_norm": 0.9140522866500976,
      "learning_rate": 2.135646542526985e-06,
      "loss": 0.1288,
      "step": 6178
    },
    {
      "epoch": 0.5693094393513614,
      "grad_norm": 0.9138310043305649,
      "learning_rate": 2.1348929103274223e-06,
      "loss": 0.1296,
      "step": 6179
    },
    {
      "epoch": 0.5694015755286314,
      "grad_norm": 0.8987271086082552,
      "learning_rate": 2.134139312028093e-06,
      "loss": 0.1366,
      "step": 6180
    },
    {
      "epoch": 0.5694937117059014,
      "grad_norm": 0.8915096965436777,
      "learning_rate": 2.1333857476989685e-06,
      "loss": 0.1348,
      "step": 6181
    },
    {
      "epoch": 0.5695858478831713,
      "grad_norm": 0.9142618530436438,
      "learning_rate": 2.1326322174100156e-06,
      "loss": 0.1265,
      "step": 6182
    },
    {
      "epoch": 0.5696779840604413,
      "grad_norm": 1.0020211952626925,
      "learning_rate": 2.1318787212312015e-06,
      "loss": 0.14,
      "step": 6183
    },
    {
      "epoch": 0.5697701202377113,
      "grad_norm": 0.9206368798131661,
      "learning_rate": 2.131125259232487e-06,
      "loss": 0.1319,
      "step": 6184
    },
    {
      "epoch": 0.5698622564149813,
      "grad_norm": 0.867245016991194,
      "learning_rate": 2.130371831483833e-06,
      "loss": 0.1226,
      "step": 6185
    },
    {
      "epoch": 0.5699543925922513,
      "grad_norm": 0.8823565460940279,
      "learning_rate": 2.1296184380551936e-06,
      "loss": 0.128,
      "step": 6186
    },
    {
      "epoch": 0.5700465287695213,
      "grad_norm": 0.9646875119052738,
      "learning_rate": 2.128865079016522e-06,
      "loss": 0.1402,
      "step": 6187
    },
    {
      "epoch": 0.5701386649467913,
      "grad_norm": 0.9309522537125285,
      "learning_rate": 2.128111754437768e-06,
      "loss": 0.1391,
      "step": 6188
    },
    {
      "epoch": 0.5702308011240613,
      "grad_norm": 0.895035105648232,
      "learning_rate": 2.127358464388877e-06,
      "loss": 0.1366,
      "step": 6189
    },
    {
      "epoch": 0.5703229373013313,
      "grad_norm": 0.9393248599902015,
      "learning_rate": 2.1266052089397936e-06,
      "loss": 0.1467,
      "step": 6190
    },
    {
      "epoch": 0.5704150734786013,
      "grad_norm": 0.9666709102936543,
      "learning_rate": 2.1258519881604566e-06,
      "loss": 0.1287,
      "step": 6191
    },
    {
      "epoch": 0.5705072096558714,
      "grad_norm": 0.8937479680491346,
      "learning_rate": 2.125098802120802e-06,
      "loss": 0.1304,
      "step": 6192
    },
    {
      "epoch": 0.5705993458331414,
      "grad_norm": 0.9700220829306405,
      "learning_rate": 2.1243456508907643e-06,
      "loss": 0.1394,
      "step": 6193
    },
    {
      "epoch": 0.5706914820104114,
      "grad_norm": 0.9037958372928121,
      "learning_rate": 2.1235925345402746e-06,
      "loss": 0.1232,
      "step": 6194
    },
    {
      "epoch": 0.5707836181876814,
      "grad_norm": 0.8889183496728102,
      "learning_rate": 2.122839453139257e-06,
      "loss": 0.1274,
      "step": 6195
    },
    {
      "epoch": 0.5708757543649514,
      "grad_norm": 0.8926523541631516,
      "learning_rate": 2.122086406757637e-06,
      "loss": 0.1307,
      "step": 6196
    },
    {
      "epoch": 0.5709678905422214,
      "grad_norm": 0.9152895066179517,
      "learning_rate": 2.121333395465335e-06,
      "loss": 0.1343,
      "step": 6197
    },
    {
      "epoch": 0.5710600267194914,
      "grad_norm": 0.9557095594049324,
      "learning_rate": 2.1205804193322685e-06,
      "loss": 0.138,
      "step": 6198
    },
    {
      "epoch": 0.5711521628967614,
      "grad_norm": 0.8443583063364482,
      "learning_rate": 2.119827478428351e-06,
      "loss": 0.1314,
      "step": 6199
    },
    {
      "epoch": 0.5712442990740314,
      "grad_norm": 0.9528291650337537,
      "learning_rate": 2.1190745728234916e-06,
      "loss": 0.1361,
      "step": 6200
    },
    {
      "epoch": 0.5713364352513014,
      "grad_norm": 0.895194785522947,
      "learning_rate": 2.1183217025876e-06,
      "loss": 0.1381,
      "step": 6201
    },
    {
      "epoch": 0.5714285714285714,
      "grad_norm": 0.9073430870330993,
      "learning_rate": 2.1175688677905804e-06,
      "loss": 0.1211,
      "step": 6202
    },
    {
      "epoch": 0.5715207076058414,
      "grad_norm": 0.9412625238043502,
      "learning_rate": 2.116816068502331e-06,
      "loss": 0.1392,
      "step": 6203
    },
    {
      "epoch": 0.5716128437831114,
      "grad_norm": 0.8922180197402308,
      "learning_rate": 2.1160633047927515e-06,
      "loss": 0.1346,
      "step": 6204
    },
    {
      "epoch": 0.5717049799603815,
      "grad_norm": 0.9184043552563133,
      "learning_rate": 2.115310576731735e-06,
      "loss": 0.132,
      "step": 6205
    },
    {
      "epoch": 0.5717971161376515,
      "grad_norm": 0.8905117266588061,
      "learning_rate": 2.114557884389174e-06,
      "loss": 0.1338,
      "step": 6206
    },
    {
      "epoch": 0.5718892523149215,
      "grad_norm": 1.0157498803550593,
      "learning_rate": 2.1138052278349543e-06,
      "loss": 0.1359,
      "step": 6207
    },
    {
      "epoch": 0.5719813884921915,
      "grad_norm": 0.91678227280953,
      "learning_rate": 2.1130526071389603e-06,
      "loss": 0.1155,
      "step": 6208
    },
    {
      "epoch": 0.5720735246694615,
      "grad_norm": 0.9086101316942187,
      "learning_rate": 2.1123000223710737e-06,
      "loss": 0.1303,
      "step": 6209
    },
    {
      "epoch": 0.5721656608467315,
      "grad_norm": 0.9277009997023078,
      "learning_rate": 2.1115474736011725e-06,
      "loss": 0.1321,
      "step": 6210
    },
    {
      "epoch": 0.5722577970240015,
      "grad_norm": 0.9127183367904651,
      "learning_rate": 2.110794960899129e-06,
      "loss": 0.1383,
      "step": 6211
    },
    {
      "epoch": 0.5723499332012715,
      "grad_norm": 0.9983831214605733,
      "learning_rate": 2.1100424843348157e-06,
      "loss": 0.1328,
      "step": 6212
    },
    {
      "epoch": 0.5724420693785415,
      "grad_norm": 0.9561191380206018,
      "learning_rate": 2.1092900439780993e-06,
      "loss": 0.1393,
      "step": 6213
    },
    {
      "epoch": 0.5725342055558115,
      "grad_norm": 0.9076953689928847,
      "learning_rate": 2.108537639898845e-06,
      "loss": 0.1343,
      "step": 6214
    },
    {
      "epoch": 0.5726263417330815,
      "grad_norm": 0.9230865489132,
      "learning_rate": 2.1077852721669132e-06,
      "loss": 0.125,
      "step": 6215
    },
    {
      "epoch": 0.5727184779103515,
      "grad_norm": 0.8781994139944165,
      "learning_rate": 2.10703294085216e-06,
      "loss": 0.1222,
      "step": 6216
    },
    {
      "epoch": 0.5728106140876215,
      "grad_norm": 0.9418352709870952,
      "learning_rate": 2.1062806460244415e-06,
      "loss": 0.1351,
      "step": 6217
    },
    {
      "epoch": 0.5729027502648915,
      "grad_norm": 0.9333115918970857,
      "learning_rate": 2.1055283877536066e-06,
      "loss": 0.1376,
      "step": 6218
    },
    {
      "epoch": 0.5729948864421616,
      "grad_norm": 0.8909079069488656,
      "learning_rate": 2.1047761661095043e-06,
      "loss": 0.134,
      "step": 6219
    },
    {
      "epoch": 0.5730870226194316,
      "grad_norm": 0.923389929285855,
      "learning_rate": 2.1040239811619774e-06,
      "loss": 0.1382,
      "step": 6220
    },
    {
      "epoch": 0.5731791587967016,
      "grad_norm": 0.900590742643775,
      "learning_rate": 2.1032718329808656e-06,
      "loss": 0.135,
      "step": 6221
    },
    {
      "epoch": 0.5732712949739716,
      "grad_norm": 0.8999258835799233,
      "learning_rate": 2.102519721636007e-06,
      "loss": 0.1269,
      "step": 6222
    },
    {
      "epoch": 0.5733634311512416,
      "grad_norm": 0.9115861566104987,
      "learning_rate": 2.1017676471972363e-06,
      "loss": 0.1194,
      "step": 6223
    },
    {
      "epoch": 0.5734555673285116,
      "grad_norm": 0.9547979195731437,
      "learning_rate": 2.101015609734381e-06,
      "loss": 0.1358,
      "step": 6224
    },
    {
      "epoch": 0.5735477035057815,
      "grad_norm": 0.8987983423045988,
      "learning_rate": 2.1002636093172694e-06,
      "loss": 0.1404,
      "step": 6225
    },
    {
      "epoch": 0.5736398396830515,
      "grad_norm": 0.9396026936959258,
      "learning_rate": 2.099511646015725e-06,
      "loss": 0.1358,
      "step": 6226
    },
    {
      "epoch": 0.5737319758603215,
      "grad_norm": 0.9380954721223328,
      "learning_rate": 2.098759719899568e-06,
      "loss": 0.1365,
      "step": 6227
    },
    {
      "epoch": 0.5738241120375915,
      "grad_norm": 0.9456480775172186,
      "learning_rate": 2.0980078310386135e-06,
      "loss": 0.1368,
      "step": 6228
    },
    {
      "epoch": 0.5739162482148615,
      "grad_norm": 0.9404251140274369,
      "learning_rate": 2.097255979502675e-06,
      "loss": 0.1374,
      "step": 6229
    },
    {
      "epoch": 0.5740083843921315,
      "grad_norm": 0.9329114486300538,
      "learning_rate": 2.096504165361562e-06,
      "loss": 0.1503,
      "step": 6230
    },
    {
      "epoch": 0.5741005205694015,
      "grad_norm": 0.9589004596645393,
      "learning_rate": 2.0957523886850815e-06,
      "loss": 0.142,
      "step": 6231
    },
    {
      "epoch": 0.5741926567466715,
      "grad_norm": 0.9469984435269726,
      "learning_rate": 2.095000649543035e-06,
      "loss": 0.1287,
      "step": 6232
    },
    {
      "epoch": 0.5742847929239416,
      "grad_norm": 0.8447650874717392,
      "learning_rate": 2.0942489480052214e-06,
      "loss": 0.1242,
      "step": 6233
    },
    {
      "epoch": 0.5743769291012116,
      "grad_norm": 0.8887316814688837,
      "learning_rate": 2.093497284141436e-06,
      "loss": 0.1393,
      "step": 6234
    },
    {
      "epoch": 0.5744690652784816,
      "grad_norm": 0.9034629564353456,
      "learning_rate": 2.0927456580214733e-06,
      "loss": 0.1421,
      "step": 6235
    },
    {
      "epoch": 0.5745612014557516,
      "grad_norm": 0.8562329900422688,
      "learning_rate": 2.091994069715119e-06,
      "loss": 0.1242,
      "step": 6236
    },
    {
      "epoch": 0.5746533376330216,
      "grad_norm": 0.9299994710734435,
      "learning_rate": 2.0912425192921588e-06,
      "loss": 0.1312,
      "step": 6237
    },
    {
      "epoch": 0.5747454738102916,
      "grad_norm": 0.9446203251305053,
      "learning_rate": 2.0904910068223745e-06,
      "loss": 0.147,
      "step": 6238
    },
    {
      "epoch": 0.5748376099875616,
      "grad_norm": 0.8745483561720717,
      "learning_rate": 2.0897395323755464e-06,
      "loss": 0.115,
      "step": 6239
    },
    {
      "epoch": 0.5749297461648316,
      "grad_norm": 0.863217780958388,
      "learning_rate": 2.088988096021445e-06,
      "loss": 0.1237,
      "step": 6240
    },
    {
      "epoch": 0.5750218823421016,
      "grad_norm": 0.9183400050395388,
      "learning_rate": 2.088236697829843e-06,
      "loss": 0.1366,
      "step": 6241
    },
    {
      "epoch": 0.5751140185193716,
      "grad_norm": 0.8436149595259076,
      "learning_rate": 2.0874853378705085e-06,
      "loss": 0.1229,
      "step": 6242
    },
    {
      "epoch": 0.5752061546966416,
      "grad_norm": 0.9275652931055757,
      "learning_rate": 2.0867340162132054e-06,
      "loss": 0.1303,
      "step": 6243
    },
    {
      "epoch": 0.5752982908739116,
      "grad_norm": 0.9302318599693131,
      "learning_rate": 2.0859827329276926e-06,
      "loss": 0.1418,
      "step": 6244
    },
    {
      "epoch": 0.5753904270511816,
      "grad_norm": 0.95815229518161,
      "learning_rate": 2.0852314880837278e-06,
      "loss": 0.1397,
      "step": 6245
    },
    {
      "epoch": 0.5754825632284517,
      "grad_norm": 0.8904343622778238,
      "learning_rate": 2.0844802817510633e-06,
      "loss": 0.1341,
      "step": 6246
    },
    {
      "epoch": 0.5755746994057217,
      "grad_norm": 0.862398919434429,
      "learning_rate": 2.08372911399945e-06,
      "loss": 0.1185,
      "step": 6247
    },
    {
      "epoch": 0.5756668355829917,
      "grad_norm": 0.9395104140482388,
      "learning_rate": 2.0829779848986337e-06,
      "loss": 0.1445,
      "step": 6248
    },
    {
      "epoch": 0.5757589717602617,
      "grad_norm": 0.8877225576451748,
      "learning_rate": 2.0822268945183555e-06,
      "loss": 0.1301,
      "step": 6249
    },
    {
      "epoch": 0.5758511079375317,
      "grad_norm": 0.8670131634340984,
      "learning_rate": 2.081475842928356e-06,
      "loss": 0.1242,
      "step": 6250
    },
    {
      "epoch": 0.5759432441148017,
      "grad_norm": 0.965400954698164,
      "learning_rate": 2.0807248301983682e-06,
      "loss": 0.1479,
      "step": 6251
    },
    {
      "epoch": 0.5760353802920717,
      "grad_norm": 0.8849794715504932,
      "learning_rate": 2.0799738563981263e-06,
      "loss": 0.1287,
      "step": 6252
    },
    {
      "epoch": 0.5761275164693417,
      "grad_norm": 0.905858445926939,
      "learning_rate": 2.079222921597357e-06,
      "loss": 0.1386,
      "step": 6253
    },
    {
      "epoch": 0.5762196526466117,
      "grad_norm": 0.9180158841915589,
      "learning_rate": 2.078472025865784e-06,
      "loss": 0.1375,
      "step": 6254
    },
    {
      "epoch": 0.5763117888238817,
      "grad_norm": 0.8919740094962243,
      "learning_rate": 2.077721169273129e-06,
      "loss": 0.1344,
      "step": 6255
    },
    {
      "epoch": 0.5764039250011517,
      "grad_norm": 0.9292425036753654,
      "learning_rate": 2.0769703518891096e-06,
      "loss": 0.1339,
      "step": 6256
    },
    {
      "epoch": 0.5764960611784217,
      "grad_norm": 0.9122724926415321,
      "learning_rate": 2.076219573783437e-06,
      "loss": 0.1295,
      "step": 6257
    },
    {
      "epoch": 0.5765881973556917,
      "grad_norm": 0.9458670285472852,
      "learning_rate": 2.075468835025824e-06,
      "loss": 0.1332,
      "step": 6258
    },
    {
      "epoch": 0.5766803335329617,
      "grad_norm": 1.0076418370512317,
      "learning_rate": 2.0747181356859743e-06,
      "loss": 0.1383,
      "step": 6259
    },
    {
      "epoch": 0.5767724697102318,
      "grad_norm": 0.8925007436761428,
      "learning_rate": 2.073967475833593e-06,
      "loss": 0.1301,
      "step": 6260
    },
    {
      "epoch": 0.5768646058875018,
      "grad_norm": 0.9879169780568002,
      "learning_rate": 2.0732168555383764e-06,
      "loss": 0.1358,
      "step": 6261
    },
    {
      "epoch": 0.5769567420647718,
      "grad_norm": 0.942015182599193,
      "learning_rate": 2.0724662748700205e-06,
      "loss": 0.1328,
      "step": 6262
    },
    {
      "epoch": 0.5770488782420418,
      "grad_norm": 0.910961493062362,
      "learning_rate": 2.0717157338982172e-06,
      "loss": 0.1328,
      "step": 6263
    },
    {
      "epoch": 0.5771410144193118,
      "grad_norm": 0.9031369142798781,
      "learning_rate": 2.0709652326926547e-06,
      "loss": 0.1309,
      "step": 6264
    },
    {
      "epoch": 0.5772331505965818,
      "grad_norm": 1.0202082284494964,
      "learning_rate": 2.070214771323015e-06,
      "loss": 0.1547,
      "step": 6265
    },
    {
      "epoch": 0.5773252867738518,
      "grad_norm": 0.8730129929135148,
      "learning_rate": 2.0694643498589816e-06,
      "loss": 0.1291,
      "step": 6266
    },
    {
      "epoch": 0.5774174229511218,
      "grad_norm": 0.8652049603519832,
      "learning_rate": 2.0687139683702284e-06,
      "loss": 0.1235,
      "step": 6267
    },
    {
      "epoch": 0.5775095591283917,
      "grad_norm": 0.9304371949873086,
      "learning_rate": 2.067963626926431e-06,
      "loss": 0.1375,
      "step": 6268
    },
    {
      "epoch": 0.5776016953056617,
      "grad_norm": 0.932762906253494,
      "learning_rate": 2.0672133255972567e-06,
      "loss": 0.1354,
      "step": 6269
    },
    {
      "epoch": 0.5776938314829317,
      "grad_norm": 0.9551092367759105,
      "learning_rate": 2.066463064452371e-06,
      "loss": 0.1378,
      "step": 6270
    },
    {
      "epoch": 0.5777859676602017,
      "grad_norm": 0.9374294366319679,
      "learning_rate": 2.0657128435614372e-06,
      "loss": 0.1426,
      "step": 6271
    },
    {
      "epoch": 0.5778781038374717,
      "grad_norm": 0.9424685529587546,
      "learning_rate": 2.0649626629941134e-06,
      "loss": 0.1383,
      "step": 6272
    },
    {
      "epoch": 0.5779702400147418,
      "grad_norm": 0.8798221517310623,
      "learning_rate": 2.0642125228200515e-06,
      "loss": 0.1231,
      "step": 6273
    },
    {
      "epoch": 0.5780623761920118,
      "grad_norm": 0.8718920010933575,
      "learning_rate": 2.0634624231089047e-06,
      "loss": 0.1317,
      "step": 6274
    },
    {
      "epoch": 0.5781545123692818,
      "grad_norm": 0.8900509963420861,
      "learning_rate": 2.062712363930318e-06,
      "loss": 0.1306,
      "step": 6275
    },
    {
      "epoch": 0.5782466485465518,
      "grad_norm": 0.9602083244915917,
      "learning_rate": 2.0619623453539365e-06,
      "loss": 0.1311,
      "step": 6276
    },
    {
      "epoch": 0.5783387847238218,
      "grad_norm": 0.9409875054246267,
      "learning_rate": 2.0612123674493983e-06,
      "loss": 0.1349,
      "step": 6277
    },
    {
      "epoch": 0.5784309209010918,
      "grad_norm": 0.954883571528099,
      "learning_rate": 2.060462430286338e-06,
      "loss": 0.1488,
      "step": 6278
    },
    {
      "epoch": 0.5785230570783618,
      "grad_norm": 0.924233430850071,
      "learning_rate": 2.059712533934389e-06,
      "loss": 0.1353,
      "step": 6279
    },
    {
      "epoch": 0.5786151932556318,
      "grad_norm": 0.9041734680736925,
      "learning_rate": 2.0589626784631784e-06,
      "loss": 0.1323,
      "step": 6280
    },
    {
      "epoch": 0.5787073294329018,
      "grad_norm": 0.9837493412140981,
      "learning_rate": 2.0582128639423316e-06,
      "loss": 0.1536,
      "step": 6281
    },
    {
      "epoch": 0.5787994656101718,
      "grad_norm": 0.974634864030492,
      "learning_rate": 2.057463090441467e-06,
      "loss": 0.1436,
      "step": 6282
    },
    {
      "epoch": 0.5788916017874418,
      "grad_norm": 0.9432227471990523,
      "learning_rate": 2.056713358030202e-06,
      "loss": 0.1441,
      "step": 6283
    },
    {
      "epoch": 0.5789837379647118,
      "grad_norm": 0.8964335561210266,
      "learning_rate": 2.0559636667781493e-06,
      "loss": 0.1318,
      "step": 6284
    },
    {
      "epoch": 0.5790758741419818,
      "grad_norm": 0.8913206385209099,
      "learning_rate": 2.055214016754919e-06,
      "loss": 0.1303,
      "step": 6285
    },
    {
      "epoch": 0.5791680103192518,
      "grad_norm": 0.9267354488882299,
      "learning_rate": 2.0544644080301138e-06,
      "loss": 0.1401,
      "step": 6286
    },
    {
      "epoch": 0.5792601464965219,
      "grad_norm": 0.9305789259939854,
      "learning_rate": 2.053714840673337e-06,
      "loss": 0.1358,
      "step": 6287
    },
    {
      "epoch": 0.5793522826737919,
      "grad_norm": 0.9084611989875776,
      "learning_rate": 2.0529653147541844e-06,
      "loss": 0.1357,
      "step": 6288
    },
    {
      "epoch": 0.5794444188510619,
      "grad_norm": 0.9851294864819644,
      "learning_rate": 2.0522158303422518e-06,
      "loss": 0.1441,
      "step": 6289
    },
    {
      "epoch": 0.5795365550283319,
      "grad_norm": 0.9148519072416933,
      "learning_rate": 2.051466387507127e-06,
      "loss": 0.1353,
      "step": 6290
    },
    {
      "epoch": 0.5796286912056019,
      "grad_norm": 0.8849124093932106,
      "learning_rate": 2.0507169863183956e-06,
      "loss": 0.1207,
      "step": 6291
    },
    {
      "epoch": 0.5797208273828719,
      "grad_norm": 0.9565706832013744,
      "learning_rate": 2.0499676268456412e-06,
      "loss": 0.1329,
      "step": 6292
    },
    {
      "epoch": 0.5798129635601419,
      "grad_norm": 0.9551972700761554,
      "learning_rate": 2.0492183091584414e-06,
      "loss": 0.1381,
      "step": 6293
    },
    {
      "epoch": 0.5799050997374119,
      "grad_norm": 0.8916816492853256,
      "learning_rate": 2.048469033326369e-06,
      "loss": 0.139,
      "step": 6294
    },
    {
      "epoch": 0.5799972359146819,
      "grad_norm": 0.9837000019154031,
      "learning_rate": 2.047719799418996e-06,
      "loss": 0.1408,
      "step": 6295
    },
    {
      "epoch": 0.5800893720919519,
      "grad_norm": 0.9296646554316655,
      "learning_rate": 2.046970607505888e-06,
      "loss": 0.1238,
      "step": 6296
    },
    {
      "epoch": 0.5801815082692219,
      "grad_norm": 0.9916491510040498,
      "learning_rate": 2.046221457656609e-06,
      "loss": 0.1467,
      "step": 6297
    },
    {
      "epoch": 0.5802736444464919,
      "grad_norm": 1.0222186289839617,
      "learning_rate": 2.0454723499407158e-06,
      "loss": 0.142,
      "step": 6298
    },
    {
      "epoch": 0.5803657806237619,
      "grad_norm": 0.9199317685695529,
      "learning_rate": 2.044723284427763e-06,
      "loss": 0.1337,
      "step": 6299
    },
    {
      "epoch": 0.5804579168010319,
      "grad_norm": 0.9431538247478978,
      "learning_rate": 2.043974261187303e-06,
      "loss": 0.1311,
      "step": 6300
    },
    {
      "epoch": 0.580550052978302,
      "grad_norm": 0.935052064731194,
      "learning_rate": 2.0432252802888827e-06,
      "loss": 0.1319,
      "step": 6301
    },
    {
      "epoch": 0.580642189155572,
      "grad_norm": 0.9305806123779443,
      "learning_rate": 2.042476341802043e-06,
      "loss": 0.1321,
      "step": 6302
    },
    {
      "epoch": 0.580734325332842,
      "grad_norm": 0.9340840745588856,
      "learning_rate": 2.0417274457963247e-06,
      "loss": 0.1351,
      "step": 6303
    },
    {
      "epoch": 0.580826461510112,
      "grad_norm": 0.8279176393267009,
      "learning_rate": 2.040978592341262e-06,
      "loss": 0.1174,
      "step": 6304
    },
    {
      "epoch": 0.580918597687382,
      "grad_norm": 0.8983819696246862,
      "learning_rate": 2.0402297815063867e-06,
      "loss": 0.1301,
      "step": 6305
    },
    {
      "epoch": 0.581010733864652,
      "grad_norm": 0.9312038343939995,
      "learning_rate": 2.0394810133612263e-06,
      "loss": 0.1365,
      "step": 6306
    },
    {
      "epoch": 0.581102870041922,
      "grad_norm": 0.8673030702969664,
      "learning_rate": 2.0387322879753025e-06,
      "loss": 0.1295,
      "step": 6307
    },
    {
      "epoch": 0.581195006219192,
      "grad_norm": 0.9257544184093895,
      "learning_rate": 2.0379836054181356e-06,
      "loss": 0.1348,
      "step": 6308
    },
    {
      "epoch": 0.581287142396462,
      "grad_norm": 0.9720242295003335,
      "learning_rate": 2.0372349657592404e-06,
      "loss": 0.1422,
      "step": 6309
    },
    {
      "epoch": 0.581379278573732,
      "grad_norm": 0.9263012800097226,
      "learning_rate": 2.0364863690681293e-06,
      "loss": 0.1291,
      "step": 6310
    },
    {
      "epoch": 0.581471414751002,
      "grad_norm": 0.8925339009739691,
      "learning_rate": 2.0357378154143083e-06,
      "loss": 0.1305,
      "step": 6311
    },
    {
      "epoch": 0.5815635509282719,
      "grad_norm": 0.8770195000598113,
      "learning_rate": 2.0349893048672806e-06,
      "loss": 0.1249,
      "step": 6312
    },
    {
      "epoch": 0.5816556871055419,
      "grad_norm": 0.9175165472111774,
      "learning_rate": 2.0342408374965457e-06,
      "loss": 0.136,
      "step": 6313
    },
    {
      "epoch": 0.581747823282812,
      "grad_norm": 0.9407699994116291,
      "learning_rate": 2.033492413371601e-06,
      "loss": 0.137,
      "step": 6314
    },
    {
      "epoch": 0.581839959460082,
      "grad_norm": 0.9116296301855412,
      "learning_rate": 2.0327440325619345e-06,
      "loss": 0.1299,
      "step": 6315
    },
    {
      "epoch": 0.581932095637352,
      "grad_norm": 0.9369925685666635,
      "learning_rate": 2.0319956951370346e-06,
      "loss": 0.1427,
      "step": 6316
    },
    {
      "epoch": 0.582024231814622,
      "grad_norm": 0.9385348869940181,
      "learning_rate": 2.0312474011663857e-06,
      "loss": 0.1479,
      "step": 6317
    },
    {
      "epoch": 0.582116367991892,
      "grad_norm": 0.9268995831678168,
      "learning_rate": 2.030499150719466e-06,
      "loss": 0.1337,
      "step": 6318
    },
    {
      "epoch": 0.582208504169162,
      "grad_norm": 0.9028226590767937,
      "learning_rate": 2.02975094386575e-06,
      "loss": 0.1309,
      "step": 6319
    },
    {
      "epoch": 0.582300640346432,
      "grad_norm": 0.9370832163666957,
      "learning_rate": 2.02900278067471e-06,
      "loss": 0.1258,
      "step": 6320
    },
    {
      "epoch": 0.582392776523702,
      "grad_norm": 0.9655358616877272,
      "learning_rate": 2.0282546612158116e-06,
      "loss": 0.1372,
      "step": 6321
    },
    {
      "epoch": 0.582484912700972,
      "grad_norm": 0.9148607737633211,
      "learning_rate": 2.02750658555852e-06,
      "loss": 0.1211,
      "step": 6322
    },
    {
      "epoch": 0.582577048878242,
      "grad_norm": 0.8864078738002699,
      "learning_rate": 2.026758553772292e-06,
      "loss": 0.1305,
      "step": 6323
    },
    {
      "epoch": 0.582669185055512,
      "grad_norm": 1.002989551156702,
      "learning_rate": 2.026010565926583e-06,
      "loss": 0.152,
      "step": 6324
    },
    {
      "epoch": 0.582761321232782,
      "grad_norm": 0.9183628328083221,
      "learning_rate": 2.0252626220908448e-06,
      "loss": 0.1376,
      "step": 6325
    },
    {
      "epoch": 0.582853457410052,
      "grad_norm": 0.8937942177656565,
      "learning_rate": 2.0245147223345235e-06,
      "loss": 0.1316,
      "step": 6326
    },
    {
      "epoch": 0.582945593587322,
      "grad_norm": 0.8875351589120594,
      "learning_rate": 2.0237668667270603e-06,
      "loss": 0.1303,
      "step": 6327
    },
    {
      "epoch": 0.5830377297645921,
      "grad_norm": 0.9263707122808156,
      "learning_rate": 2.023019055337895e-06,
      "loss": 0.1328,
      "step": 6328
    },
    {
      "epoch": 0.5831298659418621,
      "grad_norm": 0.915973919313679,
      "learning_rate": 2.0222712882364617e-06,
      "loss": 0.1386,
      "step": 6329
    },
    {
      "epoch": 0.5832220021191321,
      "grad_norm": 0.9156431422133637,
      "learning_rate": 2.0215235654921912e-06,
      "loss": 0.1362,
      "step": 6330
    },
    {
      "epoch": 0.5833141382964021,
      "grad_norm": 0.8978505787324376,
      "learning_rate": 2.0207758871745088e-06,
      "loss": 0.1229,
      "step": 6331
    },
    {
      "epoch": 0.5834062744736721,
      "grad_norm": 0.8691338078911657,
      "learning_rate": 2.0200282533528367e-06,
      "loss": 0.1373,
      "step": 6332
    },
    {
      "epoch": 0.5834984106509421,
      "grad_norm": 0.938257513392887,
      "learning_rate": 2.019280664096593e-06,
      "loss": 0.1378,
      "step": 6333
    },
    {
      "epoch": 0.5835905468282121,
      "grad_norm": 0.8559661977591175,
      "learning_rate": 2.018533119475191e-06,
      "loss": 0.1215,
      "step": 6334
    },
    {
      "epoch": 0.5836826830054821,
      "grad_norm": 0.9573360476871194,
      "learning_rate": 2.017785619558042e-06,
      "loss": 0.1355,
      "step": 6335
    },
    {
      "epoch": 0.5837748191827521,
      "grad_norm": 0.8571583060040944,
      "learning_rate": 2.0170381644145492e-06,
      "loss": 0.1224,
      "step": 6336
    },
    {
      "epoch": 0.5838669553600221,
      "grad_norm": 0.9268463083225386,
      "learning_rate": 2.0162907541141146e-06,
      "loss": 0.1348,
      "step": 6337
    },
    {
      "epoch": 0.5839590915372921,
      "grad_norm": 0.8793654369992728,
      "learning_rate": 2.0155433887261362e-06,
      "loss": 0.1289,
      "step": 6338
    },
    {
      "epoch": 0.5840512277145621,
      "grad_norm": 0.9334765040012325,
      "learning_rate": 2.0147960683200064e-06,
      "loss": 0.1351,
      "step": 6339
    },
    {
      "epoch": 0.5841433638918321,
      "grad_norm": 0.913189689611009,
      "learning_rate": 2.014048792965113e-06,
      "loss": 0.1306,
      "step": 6340
    },
    {
      "epoch": 0.5842355000691022,
      "grad_norm": 0.9441334337706997,
      "learning_rate": 2.013301562730842e-06,
      "loss": 0.1384,
      "step": 6341
    },
    {
      "epoch": 0.5843276362463722,
      "grad_norm": 0.8629368354740582,
      "learning_rate": 2.0125543776865723e-06,
      "loss": 0.1213,
      "step": 6342
    },
    {
      "epoch": 0.5844197724236422,
      "grad_norm": 0.8799947858308416,
      "learning_rate": 2.011807237901683e-06,
      "loss": 0.1168,
      "step": 6343
    },
    {
      "epoch": 0.5845119086009122,
      "grad_norm": 0.9652486866537766,
      "learning_rate": 2.011060143445543e-06,
      "loss": 0.141,
      "step": 6344
    },
    {
      "epoch": 0.5846040447781822,
      "grad_norm": 0.8773569096523852,
      "learning_rate": 2.010313094387521e-06,
      "loss": 0.1324,
      "step": 6345
    },
    {
      "epoch": 0.5846961809554522,
      "grad_norm": 0.9465727325543921,
      "learning_rate": 2.0095660907969816e-06,
      "loss": 0.1331,
      "step": 6346
    },
    {
      "epoch": 0.5847883171327222,
      "grad_norm": 0.8830422552735072,
      "learning_rate": 2.0088191327432838e-06,
      "loss": 0.1205,
      "step": 6347
    },
    {
      "epoch": 0.5848804533099922,
      "grad_norm": 0.8911291650874078,
      "learning_rate": 2.0080722202957813e-06,
      "loss": 0.1269,
      "step": 6348
    },
    {
      "epoch": 0.5849725894872622,
      "grad_norm": 0.919726354522966,
      "learning_rate": 2.0073253535238266e-06,
      "loss": 0.1348,
      "step": 6349
    },
    {
      "epoch": 0.5850647256645322,
      "grad_norm": 0.9153772381047109,
      "learning_rate": 2.0065785324967654e-06,
      "loss": 0.1231,
      "step": 6350
    },
    {
      "epoch": 0.5851568618418022,
      "grad_norm": 0.9585102905267247,
      "learning_rate": 2.0058317572839418e-06,
      "loss": 0.1361,
      "step": 6351
    },
    {
      "epoch": 0.5852489980190722,
      "grad_norm": 0.944155411460431,
      "learning_rate": 2.0050850279546918e-06,
      "loss": 0.1421,
      "step": 6352
    },
    {
      "epoch": 0.5853411341963421,
      "grad_norm": 0.9449499207179856,
      "learning_rate": 2.00433834457835e-06,
      "loss": 0.1311,
      "step": 6353
    },
    {
      "epoch": 0.5854332703736121,
      "grad_norm": 0.8963039607572094,
      "learning_rate": 2.0035917072242463e-06,
      "loss": 0.1365,
      "step": 6354
    },
    {
      "epoch": 0.5855254065508823,
      "grad_norm": 0.9372716167180863,
      "learning_rate": 2.002845115961707e-06,
      "loss": 0.1304,
      "step": 6355
    },
    {
      "epoch": 0.5856175427281523,
      "grad_norm": 0.9593342332592468,
      "learning_rate": 2.002098570860051e-06,
      "loss": 0.1341,
      "step": 6356
    },
    {
      "epoch": 0.5857096789054222,
      "grad_norm": 0.9355281745635248,
      "learning_rate": 2.001352071988597e-06,
      "loss": 0.1395,
      "step": 6357
    },
    {
      "epoch": 0.5858018150826922,
      "grad_norm": 0.9088372778534395,
      "learning_rate": 2.000605619416656e-06,
      "loss": 0.133,
      "step": 6358
    },
    {
      "epoch": 0.5858939512599622,
      "grad_norm": 0.9209424144820311,
      "learning_rate": 1.999859213213538e-06,
      "loss": 0.1437,
      "step": 6359
    },
    {
      "epoch": 0.5859860874372322,
      "grad_norm": 0.9475953953719402,
      "learning_rate": 1.9991128534485454e-06,
      "loss": 0.1315,
      "step": 6360
    },
    {
      "epoch": 0.5860782236145022,
      "grad_norm": 0.9238882849548906,
      "learning_rate": 1.998366540190978e-06,
      "loss": 0.1228,
      "step": 6361
    },
    {
      "epoch": 0.5861703597917722,
      "grad_norm": 0.937878020643912,
      "learning_rate": 1.9976202735101314e-06,
      "loss": 0.1385,
      "step": 6362
    },
    {
      "epoch": 0.5862624959690422,
      "grad_norm": 0.8840553821623729,
      "learning_rate": 1.9968740534752965e-06,
      "loss": 0.1201,
      "step": 6363
    },
    {
      "epoch": 0.5863546321463122,
      "grad_norm": 0.9642337662716879,
      "learning_rate": 1.9961278801557606e-06,
      "loss": 0.1402,
      "step": 6364
    },
    {
      "epoch": 0.5864467683235822,
      "grad_norm": 0.9253050908043012,
      "learning_rate": 1.9953817536208046e-06,
      "loss": 0.1289,
      "step": 6365
    },
    {
      "epoch": 0.5865389045008522,
      "grad_norm": 0.9555889041598452,
      "learning_rate": 1.994635673939707e-06,
      "loss": 0.1401,
      "step": 6366
    },
    {
      "epoch": 0.5866310406781222,
      "grad_norm": 0.9409992712837372,
      "learning_rate": 1.9938896411817416e-06,
      "loss": 0.1389,
      "step": 6367
    },
    {
      "epoch": 0.5867231768553922,
      "grad_norm": 0.8298665944219692,
      "learning_rate": 1.9931436554161783e-06,
      "loss": 0.1194,
      "step": 6368
    },
    {
      "epoch": 0.5868153130326623,
      "grad_norm": 0.9843872433472307,
      "learning_rate": 1.9923977167122797e-06,
      "loss": 0.1489,
      "step": 6369
    },
    {
      "epoch": 0.5869074492099323,
      "grad_norm": 0.9426206584555298,
      "learning_rate": 1.9916518251393085e-06,
      "loss": 0.1354,
      "step": 6370
    },
    {
      "epoch": 0.5869995853872023,
      "grad_norm": 0.9291998492432275,
      "learning_rate": 1.9909059807665195e-06,
      "loss": 0.1257,
      "step": 6371
    },
    {
      "epoch": 0.5870917215644723,
      "grad_norm": 0.9164376882762449,
      "learning_rate": 1.990160183663166e-06,
      "loss": 0.1348,
      "step": 6372
    },
    {
      "epoch": 0.5871838577417423,
      "grad_norm": 0.916904820338903,
      "learning_rate": 1.9894144338984937e-06,
      "loss": 0.132,
      "step": 6373
    },
    {
      "epoch": 0.5872759939190123,
      "grad_norm": 0.9084949228829207,
      "learning_rate": 1.9886687315417456e-06,
      "loss": 0.1344,
      "step": 6374
    },
    {
      "epoch": 0.5873681300962823,
      "grad_norm": 0.9304152484544277,
      "learning_rate": 1.9879230766621616e-06,
      "loss": 0.1354,
      "step": 6375
    },
    {
      "epoch": 0.5874602662735523,
      "grad_norm": 0.9262616287041675,
      "learning_rate": 1.9871774693289754e-06,
      "loss": 0.1279,
      "step": 6376
    },
    {
      "epoch": 0.5875524024508223,
      "grad_norm": 0.8867273085909185,
      "learning_rate": 1.9864319096114152e-06,
      "loss": 0.1301,
      "step": 6377
    },
    {
      "epoch": 0.5876445386280923,
      "grad_norm": 1.0611872055913358,
      "learning_rate": 1.985686397578708e-06,
      "loss": 0.1477,
      "step": 6378
    },
    {
      "epoch": 0.5877366748053623,
      "grad_norm": 0.8468401803460077,
      "learning_rate": 1.984940933300074e-06,
      "loss": 0.1174,
      "step": 6379
    },
    {
      "epoch": 0.5878288109826323,
      "grad_norm": 0.8830701936617826,
      "learning_rate": 1.984195516844731e-06,
      "loss": 0.1329,
      "step": 6380
    },
    {
      "epoch": 0.5879209471599023,
      "grad_norm": 0.9153760956552899,
      "learning_rate": 1.9834501482818885e-06,
      "loss": 0.1394,
      "step": 6381
    },
    {
      "epoch": 0.5880130833371724,
      "grad_norm": 0.884290099600892,
      "learning_rate": 1.9827048276807552e-06,
      "loss": 0.1361,
      "step": 6382
    },
    {
      "epoch": 0.5881052195144424,
      "grad_norm": 0.9630089565926064,
      "learning_rate": 1.9819595551105346e-06,
      "loss": 0.142,
      "step": 6383
    },
    {
      "epoch": 0.5881973556917124,
      "grad_norm": 0.9348061290004007,
      "learning_rate": 1.9812143306404262e-06,
      "loss": 0.1325,
      "step": 6384
    },
    {
      "epoch": 0.5882894918689824,
      "grad_norm": 0.8751611132400253,
      "learning_rate": 1.9804691543396213e-06,
      "loss": 0.1255,
      "step": 6385
    },
    {
      "epoch": 0.5883816280462524,
      "grad_norm": 0.9355019028595838,
      "learning_rate": 1.9797240262773122e-06,
      "loss": 0.1325,
      "step": 6386
    },
    {
      "epoch": 0.5884737642235224,
      "grad_norm": 0.8332639547487759,
      "learning_rate": 1.9789789465226825e-06,
      "loss": 0.1253,
      "step": 6387
    },
    {
      "epoch": 0.5885659004007924,
      "grad_norm": 0.9403565805795623,
      "learning_rate": 1.978233915144915e-06,
      "loss": 0.1401,
      "step": 6388
    },
    {
      "epoch": 0.5886580365780624,
      "grad_norm": 0.8968199420083651,
      "learning_rate": 1.977488932213184e-06,
      "loss": 0.1397,
      "step": 6389
    },
    {
      "epoch": 0.5887501727553324,
      "grad_norm": 0.9429483455249357,
      "learning_rate": 1.976743997796661e-06,
      "loss": 0.1431,
      "step": 6390
    },
    {
      "epoch": 0.5888423089326024,
      "grad_norm": 0.886059394188964,
      "learning_rate": 1.975999111964515e-06,
      "loss": 0.1311,
      "step": 6391
    },
    {
      "epoch": 0.5889344451098724,
      "grad_norm": 0.9131107533925749,
      "learning_rate": 1.9752542747859076e-06,
      "loss": 0.1332,
      "step": 6392
    },
    {
      "epoch": 0.5890265812871424,
      "grad_norm": 0.8724833844713735,
      "learning_rate": 1.974509486329998e-06,
      "loss": 0.1185,
      "step": 6393
    },
    {
      "epoch": 0.5891187174644124,
      "grad_norm": 0.8517216431460966,
      "learning_rate": 1.973764746665938e-06,
      "loss": 0.1244,
      "step": 6394
    },
    {
      "epoch": 0.5892108536416824,
      "grad_norm": 0.8853967206799523,
      "learning_rate": 1.9730200558628784e-06,
      "loss": 0.1361,
      "step": 6395
    },
    {
      "epoch": 0.5893029898189525,
      "grad_norm": 0.933921719916813,
      "learning_rate": 1.972275413989963e-06,
      "loss": 0.1276,
      "step": 6396
    },
    {
      "epoch": 0.5893951259962225,
      "grad_norm": 0.9412040556603793,
      "learning_rate": 1.971530821116333e-06,
      "loss": 0.1351,
      "step": 6397
    },
    {
      "epoch": 0.5894872621734925,
      "grad_norm": 0.9419111745680039,
      "learning_rate": 1.970786277311123e-06,
      "loss": 0.135,
      "step": 6398
    },
    {
      "epoch": 0.5895793983507625,
      "grad_norm": 0.9215559284783924,
      "learning_rate": 1.9700417826434633e-06,
      "loss": 0.1351,
      "step": 6399
    },
    {
      "epoch": 0.5896715345280324,
      "grad_norm": 0.996475566243809,
      "learning_rate": 1.969297337182482e-06,
      "loss": 0.1521,
      "step": 6400
    },
    {
      "epoch": 0.5897636707053024,
      "grad_norm": 0.9291448076226406,
      "learning_rate": 1.9685529409973e-06,
      "loss": 0.1278,
      "step": 6401
    },
    {
      "epoch": 0.5898558068825724,
      "grad_norm": 0.8924108059641798,
      "learning_rate": 1.967808594157034e-06,
      "loss": 0.1267,
      "step": 6402
    },
    {
      "epoch": 0.5899479430598424,
      "grad_norm": 0.9645722547839062,
      "learning_rate": 1.9670642967307974e-06,
      "loss": 0.1399,
      "step": 6403
    },
    {
      "epoch": 0.5900400792371124,
      "grad_norm": 0.8925650886648214,
      "learning_rate": 1.9663200487876983e-06,
      "loss": 0.1282,
      "step": 6404
    },
    {
      "epoch": 0.5901322154143824,
      "grad_norm": 0.8834690207967658,
      "learning_rate": 1.965575850396841e-06,
      "loss": 0.1313,
      "step": 6405
    },
    {
      "epoch": 0.5902243515916524,
      "grad_norm": 0.882154265370182,
      "learning_rate": 1.9648317016273227e-06,
      "loss": 0.1169,
      "step": 6406
    },
    {
      "epoch": 0.5903164877689224,
      "grad_norm": 0.9405001358124874,
      "learning_rate": 1.964087602548238e-06,
      "loss": 0.134,
      "step": 6407
    },
    {
      "epoch": 0.5904086239461924,
      "grad_norm": 0.9444373778196875,
      "learning_rate": 1.9633435532286775e-06,
      "loss": 0.1397,
      "step": 6408
    },
    {
      "epoch": 0.5905007601234625,
      "grad_norm": 0.8989319983039749,
      "learning_rate": 1.9625995537377268e-06,
      "loss": 0.1155,
      "step": 6409
    },
    {
      "epoch": 0.5905928963007325,
      "grad_norm": 0.9366810824860379,
      "learning_rate": 1.961855604144464e-06,
      "loss": 0.1409,
      "step": 6410
    },
    {
      "epoch": 0.5906850324780025,
      "grad_norm": 0.9492399799170057,
      "learning_rate": 1.961111704517967e-06,
      "loss": 0.1441,
      "step": 6411
    },
    {
      "epoch": 0.5907771686552725,
      "grad_norm": 0.9306708910687921,
      "learning_rate": 1.9603678549273054e-06,
      "loss": 0.1392,
      "step": 6412
    },
    {
      "epoch": 0.5908693048325425,
      "grad_norm": 0.9310169038231709,
      "learning_rate": 1.959624055441548e-06,
      "loss": 0.1309,
      "step": 6413
    },
    {
      "epoch": 0.5909614410098125,
      "grad_norm": 0.9509199776543236,
      "learning_rate": 1.9588803061297544e-06,
      "loss": 0.1469,
      "step": 6414
    },
    {
      "epoch": 0.5910535771870825,
      "grad_norm": 0.8560938591568295,
      "learning_rate": 1.9581366070609824e-06,
      "loss": 0.1137,
      "step": 6415
    },
    {
      "epoch": 0.5911457133643525,
      "grad_norm": 0.935421361088011,
      "learning_rate": 1.957392958304285e-06,
      "loss": 0.1302,
      "step": 6416
    },
    {
      "epoch": 0.5912378495416225,
      "grad_norm": 0.9578568430913035,
      "learning_rate": 1.9566493599287103e-06,
      "loss": 0.1378,
      "step": 6417
    },
    {
      "epoch": 0.5913299857188925,
      "grad_norm": 0.8990248820513408,
      "learning_rate": 1.9559058120032997e-06,
      "loss": 0.1172,
      "step": 6418
    },
    {
      "epoch": 0.5914221218961625,
      "grad_norm": 0.9364115670604503,
      "learning_rate": 1.955162314597094e-06,
      "loss": 0.1382,
      "step": 6419
    },
    {
      "epoch": 0.5915142580734325,
      "grad_norm": 0.9513877830712206,
      "learning_rate": 1.9544188677791253e-06,
      "loss": 0.1395,
      "step": 6420
    },
    {
      "epoch": 0.5916063942507025,
      "grad_norm": 0.9556704700985831,
      "learning_rate": 1.9536754716184244e-06,
      "loss": 0.1324,
      "step": 6421
    },
    {
      "epoch": 0.5916985304279725,
      "grad_norm": 0.9350415543063032,
      "learning_rate": 1.9529321261840148e-06,
      "loss": 0.1286,
      "step": 6422
    },
    {
      "epoch": 0.5917906666052426,
      "grad_norm": 0.9438832953394088,
      "learning_rate": 1.952188831544915e-06,
      "loss": 0.1425,
      "step": 6423
    },
    {
      "epoch": 0.5918828027825126,
      "grad_norm": 0.9351384678963927,
      "learning_rate": 1.951445587770142e-06,
      "loss": 0.1236,
      "step": 6424
    },
    {
      "epoch": 0.5919749389597826,
      "grad_norm": 0.9389889457570394,
      "learning_rate": 1.9507023949287045e-06,
      "loss": 0.135,
      "step": 6425
    },
    {
      "epoch": 0.5920670751370526,
      "grad_norm": 0.8928792043876652,
      "learning_rate": 1.94995925308961e-06,
      "loss": 0.1191,
      "step": 6426
    },
    {
      "epoch": 0.5921592113143226,
      "grad_norm": 0.907511539500844,
      "learning_rate": 1.9492161623218576e-06,
      "loss": 0.1347,
      "step": 6427
    },
    {
      "epoch": 0.5922513474915926,
      "grad_norm": 0.9123849674907245,
      "learning_rate": 1.9484731226944427e-06,
      "loss": 0.1301,
      "step": 6428
    },
    {
      "epoch": 0.5923434836688626,
      "grad_norm": 0.8943152512986418,
      "learning_rate": 1.9477301342763587e-06,
      "loss": 0.1356,
      "step": 6429
    },
    {
      "epoch": 0.5924356198461326,
      "grad_norm": 0.968239050353456,
      "learning_rate": 1.946987197136592e-06,
      "loss": 0.1356,
      "step": 6430
    },
    {
      "epoch": 0.5925277560234026,
      "grad_norm": 0.9555806245021822,
      "learning_rate": 1.946244311344122e-06,
      "loss": 0.1442,
      "step": 6431
    },
    {
      "epoch": 0.5926198922006726,
      "grad_norm": 0.9023064613208227,
      "learning_rate": 1.945501476967928e-06,
      "loss": 0.1334,
      "step": 6432
    },
    {
      "epoch": 0.5927120283779426,
      "grad_norm": 0.8772285953239736,
      "learning_rate": 1.9447586940769808e-06,
      "loss": 0.1251,
      "step": 6433
    },
    {
      "epoch": 0.5928041645552126,
      "grad_norm": 0.8673699715919908,
      "learning_rate": 1.9440159627402497e-06,
      "loss": 0.1224,
      "step": 6434
    },
    {
      "epoch": 0.5928963007324826,
      "grad_norm": 0.8963161155771975,
      "learning_rate": 1.9432732830266958e-06,
      "loss": 0.1314,
      "step": 6435
    },
    {
      "epoch": 0.5929884369097527,
      "grad_norm": 0.9320802532189435,
      "learning_rate": 1.9425306550052774e-06,
      "loss": 0.1438,
      "step": 6436
    },
    {
      "epoch": 0.5930805730870227,
      "grad_norm": 0.9223967293003285,
      "learning_rate": 1.9417880787449476e-06,
      "loss": 0.1359,
      "step": 6437
    },
    {
      "epoch": 0.5931727092642927,
      "grad_norm": 0.8711183973077686,
      "learning_rate": 1.9410455543146554e-06,
      "loss": 0.1305,
      "step": 6438
    },
    {
      "epoch": 0.5932648454415627,
      "grad_norm": 0.9242491659975414,
      "learning_rate": 1.9403030817833428e-06,
      "loss": 0.134,
      "step": 6439
    },
    {
      "epoch": 0.5933569816188327,
      "grad_norm": 0.91941387536559,
      "learning_rate": 1.93956066121995e-06,
      "loss": 0.1367,
      "step": 6440
    },
    {
      "epoch": 0.5934491177961027,
      "grad_norm": 0.948051439287453,
      "learning_rate": 1.938818292693409e-06,
      "loss": 0.1379,
      "step": 6441
    },
    {
      "epoch": 0.5935412539733727,
      "grad_norm": 0.8998731076445318,
      "learning_rate": 1.9380759762726512e-06,
      "loss": 0.1439,
      "step": 6442
    },
    {
      "epoch": 0.5936333901506426,
      "grad_norm": 0.8641438471342882,
      "learning_rate": 1.9373337120265993e-06,
      "loss": 0.1262,
      "step": 6443
    },
    {
      "epoch": 0.5937255263279126,
      "grad_norm": 0.9469896305094141,
      "learning_rate": 1.936591500024172e-06,
      "loss": 0.1448,
      "step": 6444
    },
    {
      "epoch": 0.5938176625051826,
      "grad_norm": 0.9327091962545253,
      "learning_rate": 1.935849340334285e-06,
      "loss": 0.1316,
      "step": 6445
    },
    {
      "epoch": 0.5939097986824526,
      "grad_norm": 0.9333644067409109,
      "learning_rate": 1.9351072330258483e-06,
      "loss": 0.1345,
      "step": 6446
    },
    {
      "epoch": 0.5940019348597226,
      "grad_norm": 0.9562785312219442,
      "learning_rate": 1.9343651781677648e-06,
      "loss": 0.142,
      "step": 6447
    },
    {
      "epoch": 0.5940940710369926,
      "grad_norm": 0.9451689710704071,
      "learning_rate": 1.933623175828935e-06,
      "loss": 0.1385,
      "step": 6448
    },
    {
      "epoch": 0.5941862072142626,
      "grad_norm": 0.9044973124651259,
      "learning_rate": 1.932881226078255e-06,
      "loss": 0.1287,
      "step": 6449
    },
    {
      "epoch": 0.5942783433915327,
      "grad_norm": 0.9583553094337743,
      "learning_rate": 1.932139328984614e-06,
      "loss": 0.1265,
      "step": 6450
    },
    {
      "epoch": 0.5943704795688027,
      "grad_norm": 0.9073283509605476,
      "learning_rate": 1.931397484616898e-06,
      "loss": 0.1322,
      "step": 6451
    },
    {
      "epoch": 0.5944626157460727,
      "grad_norm": 0.918641355087958,
      "learning_rate": 1.9306556930439857e-06,
      "loss": 0.1389,
      "step": 6452
    },
    {
      "epoch": 0.5945547519233427,
      "grad_norm": 0.9440290388840534,
      "learning_rate": 1.929913954334754e-06,
      "loss": 0.1404,
      "step": 6453
    },
    {
      "epoch": 0.5946468881006127,
      "grad_norm": 0.9119563904232301,
      "learning_rate": 1.929172268558073e-06,
      "loss": 0.1386,
      "step": 6454
    },
    {
      "epoch": 0.5947390242778827,
      "grad_norm": 0.8866487591480595,
      "learning_rate": 1.928430635782809e-06,
      "loss": 0.1194,
      "step": 6455
    },
    {
      "epoch": 0.5948311604551527,
      "grad_norm": 0.8549168890829434,
      "learning_rate": 1.9276890560778215e-06,
      "loss": 0.1148,
      "step": 6456
    },
    {
      "epoch": 0.5949232966324227,
      "grad_norm": 0.9034668553279024,
      "learning_rate": 1.9269475295119663e-06,
      "loss": 0.1354,
      "step": 6457
    },
    {
      "epoch": 0.5950154328096927,
      "grad_norm": 0.8658038474446961,
      "learning_rate": 1.9262060561540946e-06,
      "loss": 0.1336,
      "step": 6458
    },
    {
      "epoch": 0.5951075689869627,
      "grad_norm": 0.8666140071243187,
      "learning_rate": 1.9254646360730533e-06,
      "loss": 0.1215,
      "step": 6459
    },
    {
      "epoch": 0.5951997051642327,
      "grad_norm": 0.8869437410154752,
      "learning_rate": 1.9247232693376815e-06,
      "loss": 0.1241,
      "step": 6460
    },
    {
      "epoch": 0.5952918413415027,
      "grad_norm": 0.9344054957254118,
      "learning_rate": 1.9239819560168165e-06,
      "loss": 0.1386,
      "step": 6461
    },
    {
      "epoch": 0.5953839775187727,
      "grad_norm": 0.9095798537916769,
      "learning_rate": 1.9232406961792884e-06,
      "loss": 0.1361,
      "step": 6462
    },
    {
      "epoch": 0.5954761136960427,
      "grad_norm": 0.8624089349049318,
      "learning_rate": 1.9224994898939247e-06,
      "loss": 0.1223,
      "step": 6463
    },
    {
      "epoch": 0.5955682498733128,
      "grad_norm": 0.861278948017207,
      "learning_rate": 1.9217583372295446e-06,
      "loss": 0.1257,
      "step": 6464
    },
    {
      "epoch": 0.5956603860505828,
      "grad_norm": 0.9454217068596863,
      "learning_rate": 1.921017238254965e-06,
      "loss": 0.145,
      "step": 6465
    },
    {
      "epoch": 0.5957525222278528,
      "grad_norm": 0.9079234970512026,
      "learning_rate": 1.920276193038997e-06,
      "loss": 0.1299,
      "step": 6466
    },
    {
      "epoch": 0.5958446584051228,
      "grad_norm": 0.9043646425298468,
      "learning_rate": 1.9195352016504486e-06,
      "loss": 0.1288,
      "step": 6467
    },
    {
      "epoch": 0.5959367945823928,
      "grad_norm": 0.9208332337385433,
      "learning_rate": 1.9187942641581174e-06,
      "loss": 0.1367,
      "step": 6468
    },
    {
      "epoch": 0.5960289307596628,
      "grad_norm": 0.8611028175581442,
      "learning_rate": 1.9180533806308017e-06,
      "loss": 0.1256,
      "step": 6469
    },
    {
      "epoch": 0.5961210669369328,
      "grad_norm": 0.9491166063148969,
      "learning_rate": 1.9173125511372923e-06,
      "loss": 0.1349,
      "step": 6470
    },
    {
      "epoch": 0.5962132031142028,
      "grad_norm": 0.9318622192071003,
      "learning_rate": 1.916571775746376e-06,
      "loss": 0.1375,
      "step": 6471
    },
    {
      "epoch": 0.5963053392914728,
      "grad_norm": 0.9168563890482114,
      "learning_rate": 1.915831054526832e-06,
      "loss": 0.1412,
      "step": 6472
    },
    {
      "epoch": 0.5963974754687428,
      "grad_norm": 0.883224161497809,
      "learning_rate": 1.915090387547438e-06,
      "loss": 0.1223,
      "step": 6473
    },
    {
      "epoch": 0.5964896116460128,
      "grad_norm": 0.8794085943148341,
      "learning_rate": 1.914349774876964e-06,
      "loss": 0.1307,
      "step": 6474
    },
    {
      "epoch": 0.5965817478232828,
      "grad_norm": 0.9169286039253907,
      "learning_rate": 1.9136092165841776e-06,
      "loss": 0.1385,
      "step": 6475
    },
    {
      "epoch": 0.5966738840005528,
      "grad_norm": 0.8934819174982507,
      "learning_rate": 1.9128687127378376e-06,
      "loss": 0.1324,
      "step": 6476
    },
    {
      "epoch": 0.5967660201778229,
      "grad_norm": 0.9454788969073651,
      "learning_rate": 1.9121282634067008e-06,
      "loss": 0.1299,
      "step": 6477
    },
    {
      "epoch": 0.5968581563550929,
      "grad_norm": 0.9455512022162309,
      "learning_rate": 1.911387868659518e-06,
      "loss": 0.1314,
      "step": 6478
    },
    {
      "epoch": 0.5969502925323629,
      "grad_norm": 0.8730096720038547,
      "learning_rate": 1.9106475285650345e-06,
      "loss": 0.1144,
      "step": 6479
    },
    {
      "epoch": 0.5970424287096329,
      "grad_norm": 0.9349386390282505,
      "learning_rate": 1.909907243191993e-06,
      "loss": 0.1352,
      "step": 6480
    },
    {
      "epoch": 0.5971345648869029,
      "grad_norm": 0.9006142107363241,
      "learning_rate": 1.9091670126091264e-06,
      "loss": 0.1368,
      "step": 6481
    },
    {
      "epoch": 0.5972267010641729,
      "grad_norm": 0.9440688968299398,
      "learning_rate": 1.908426836885166e-06,
      "loss": 0.1277,
      "step": 6482
    },
    {
      "epoch": 0.5973188372414429,
      "grad_norm": 0.9115356328181138,
      "learning_rate": 1.907686716088838e-06,
      "loss": 0.13,
      "step": 6483
    },
    {
      "epoch": 0.5974109734187129,
      "grad_norm": 0.9107915908099802,
      "learning_rate": 1.9069466502888625e-06,
      "loss": 0.123,
      "step": 6484
    },
    {
      "epoch": 0.5975031095959829,
      "grad_norm": 0.9531637489514408,
      "learning_rate": 1.9062066395539535e-06,
      "loss": 0.1436,
      "step": 6485
    },
    {
      "epoch": 0.5975952457732528,
      "grad_norm": 0.9428193773075232,
      "learning_rate": 1.9054666839528225e-06,
      "loss": 0.1232,
      "step": 6486
    },
    {
      "epoch": 0.5976873819505228,
      "grad_norm": 0.9627495716809685,
      "learning_rate": 1.904726783554173e-06,
      "loss": 0.1331,
      "step": 6487
    },
    {
      "epoch": 0.5977795181277928,
      "grad_norm": 0.9517407487393909,
      "learning_rate": 1.903986938426707e-06,
      "loss": 0.1305,
      "step": 6488
    },
    {
      "epoch": 0.5978716543050628,
      "grad_norm": 0.9264042232053695,
      "learning_rate": 1.9032471486391175e-06,
      "loss": 0.1359,
      "step": 6489
    },
    {
      "epoch": 0.5979637904823328,
      "grad_norm": 0.904126855844614,
      "learning_rate": 1.9025074142600935e-06,
      "loss": 0.1244,
      "step": 6490
    },
    {
      "epoch": 0.5980559266596029,
      "grad_norm": 0.9136459862220759,
      "learning_rate": 1.9017677353583213e-06,
      "loss": 0.1312,
      "step": 6491
    },
    {
      "epoch": 0.5981480628368729,
      "grad_norm": 0.9007666304246773,
      "learning_rate": 1.90102811200248e-06,
      "loss": 0.1354,
      "step": 6492
    },
    {
      "epoch": 0.5982401990141429,
      "grad_norm": 0.9122491086990855,
      "learning_rate": 1.9002885442612413e-06,
      "loss": 0.1296,
      "step": 6493
    },
    {
      "epoch": 0.5983323351914129,
      "grad_norm": 0.8637309458864265,
      "learning_rate": 1.8995490322032767e-06,
      "loss": 0.1177,
      "step": 6494
    },
    {
      "epoch": 0.5984244713686829,
      "grad_norm": 0.9140446335579833,
      "learning_rate": 1.8988095758972485e-06,
      "loss": 0.126,
      "step": 6495
    },
    {
      "epoch": 0.5985166075459529,
      "grad_norm": 0.891049772866993,
      "learning_rate": 1.8980701754118168e-06,
      "loss": 0.1407,
      "step": 6496
    },
    {
      "epoch": 0.5986087437232229,
      "grad_norm": 0.9308146796900931,
      "learning_rate": 1.8973308308156337e-06,
      "loss": 0.146,
      "step": 6497
    },
    {
      "epoch": 0.5987008799004929,
      "grad_norm": 0.9217024801030724,
      "learning_rate": 1.8965915421773473e-06,
      "loss": 0.1385,
      "step": 6498
    },
    {
      "epoch": 0.5987930160777629,
      "grad_norm": 0.9446524923822498,
      "learning_rate": 1.8958523095656016e-06,
      "loss": 0.1264,
      "step": 6499
    },
    {
      "epoch": 0.5988851522550329,
      "grad_norm": 0.87348994025732,
      "learning_rate": 1.8951131330490347e-06,
      "loss": 0.1209,
      "step": 6500
    },
    {
      "epoch": 0.5988851522550329,
      "eval_loss": 0.1315893828868866,
      "eval_runtime": 299.1326,
      "eval_samples_per_second": 23.458,
      "eval_steps_per_second": 2.935,
      "step": 6500
    },
    {
      "epoch": 0.5989772884323029,
      "grad_norm": 0.890060441599928,
      "learning_rate": 1.8943740126962774e-06,
      "loss": 0.1333,
      "step": 6501
    },
    {
      "epoch": 0.5990694246095729,
      "grad_norm": 0.8535593592292823,
      "learning_rate": 1.8936349485759586e-06,
      "loss": 0.1216,
      "step": 6502
    },
    {
      "epoch": 0.5991615607868429,
      "grad_norm": 0.8947532542781077,
      "learning_rate": 1.8928959407566994e-06,
      "loss": 0.1378,
      "step": 6503
    },
    {
      "epoch": 0.599253696964113,
      "grad_norm": 0.8657797376728128,
      "learning_rate": 1.8921569893071187e-06,
      "loss": 0.1238,
      "step": 6504
    },
    {
      "epoch": 0.599345833141383,
      "grad_norm": 0.8597590797050176,
      "learning_rate": 1.8914180942958265e-06,
      "loss": 0.1328,
      "step": 6505
    },
    {
      "epoch": 0.599437969318653,
      "grad_norm": 0.9403836820908409,
      "learning_rate": 1.890679255791429e-06,
      "loss": 0.1387,
      "step": 6506
    },
    {
      "epoch": 0.599530105495923,
      "grad_norm": 0.92284449719563,
      "learning_rate": 1.8899404738625288e-06,
      "loss": 0.1357,
      "step": 6507
    },
    {
      "epoch": 0.599622241673193,
      "grad_norm": 0.8763065217252709,
      "learning_rate": 1.8892017485777208e-06,
      "loss": 0.1314,
      "step": 6508
    },
    {
      "epoch": 0.599714377850463,
      "grad_norm": 0.9386674388659384,
      "learning_rate": 1.8884630800055973e-06,
      "loss": 0.1223,
      "step": 6509
    },
    {
      "epoch": 0.599806514027733,
      "grad_norm": 0.9215754757081813,
      "learning_rate": 1.8877244682147419e-06,
      "loss": 0.1371,
      "step": 6510
    },
    {
      "epoch": 0.599898650205003,
      "grad_norm": 0.8830249289667409,
      "learning_rate": 1.886985913273735e-06,
      "loss": 0.1346,
      "step": 6511
    },
    {
      "epoch": 0.599990786382273,
      "grad_norm": 1.0090984121003168,
      "learning_rate": 1.8862474152511529e-06,
      "loss": 0.1506,
      "step": 6512
    },
    {
      "epoch": 0.600082922559543,
      "grad_norm": 0.8890967804641455,
      "learning_rate": 1.8855089742155647e-06,
      "loss": 0.1314,
      "step": 6513
    },
    {
      "epoch": 0.600175058736813,
      "grad_norm": 0.9269506146509996,
      "learning_rate": 1.8847705902355332e-06,
      "loss": 0.1349,
      "step": 6514
    },
    {
      "epoch": 0.600267194914083,
      "grad_norm": 0.889886502404544,
      "learning_rate": 1.8840322633796191e-06,
      "loss": 0.126,
      "step": 6515
    },
    {
      "epoch": 0.600359331091353,
      "grad_norm": 0.9002338360481987,
      "learning_rate": 1.8832939937163753e-06,
      "loss": 0.1241,
      "step": 6516
    },
    {
      "epoch": 0.600451467268623,
      "grad_norm": 0.9666422702216696,
      "learning_rate": 1.8825557813143513e-06,
      "loss": 0.1313,
      "step": 6517
    },
    {
      "epoch": 0.6005436034458931,
      "grad_norm": 0.8794516400849002,
      "learning_rate": 1.8818176262420893e-06,
      "loss": 0.1217,
      "step": 6518
    },
    {
      "epoch": 0.6006357396231631,
      "grad_norm": 0.8712755680659606,
      "learning_rate": 1.8810795285681263e-06,
      "loss": 0.1133,
      "step": 6519
    },
    {
      "epoch": 0.6007278758004331,
      "grad_norm": 0.9428832972839667,
      "learning_rate": 1.8803414883609967e-06,
      "loss": 0.1384,
      "step": 6520
    },
    {
      "epoch": 0.6008200119777031,
      "grad_norm": 0.8541956118384897,
      "learning_rate": 1.8796035056892268e-06,
      "loss": 0.1218,
      "step": 6521
    },
    {
      "epoch": 0.6009121481549731,
      "grad_norm": 0.9310338659977105,
      "learning_rate": 1.8788655806213372e-06,
      "loss": 0.1251,
      "step": 6522
    },
    {
      "epoch": 0.6010042843322431,
      "grad_norm": 0.9325252258734855,
      "learning_rate": 1.8781277132258458e-06,
      "loss": 0.1349,
      "step": 6523
    },
    {
      "epoch": 0.6010964205095131,
      "grad_norm": 0.95634726705644,
      "learning_rate": 1.8773899035712622e-06,
      "loss": 0.1349,
      "step": 6524
    },
    {
      "epoch": 0.6011885566867831,
      "grad_norm": 0.904487386830463,
      "learning_rate": 1.8766521517260946e-06,
      "loss": 0.1216,
      "step": 6525
    },
    {
      "epoch": 0.6012806928640531,
      "grad_norm": 0.9604402323364885,
      "learning_rate": 1.875914457758841e-06,
      "loss": 0.1414,
      "step": 6526
    },
    {
      "epoch": 0.601372829041323,
      "grad_norm": 0.8887907178428888,
      "learning_rate": 1.8751768217379973e-06,
      "loss": 0.1283,
      "step": 6527
    },
    {
      "epoch": 0.601464965218593,
      "grad_norm": 0.9066603462086329,
      "learning_rate": 1.874439243732053e-06,
      "loss": 0.1317,
      "step": 6528
    },
    {
      "epoch": 0.601557101395863,
      "grad_norm": 0.9088621832752597,
      "learning_rate": 1.8737017238094926e-06,
      "loss": 0.1323,
      "step": 6529
    },
    {
      "epoch": 0.601649237573133,
      "grad_norm": 0.8955444159164336,
      "learning_rate": 1.8729642620387935e-06,
      "loss": 0.1264,
      "step": 6530
    },
    {
      "epoch": 0.601741373750403,
      "grad_norm": 0.9723213160380102,
      "learning_rate": 1.8722268584884312e-06,
      "loss": 0.1385,
      "step": 6531
    },
    {
      "epoch": 0.6018335099276731,
      "grad_norm": 0.9220388614910245,
      "learning_rate": 1.8714895132268718e-06,
      "loss": 0.1387,
      "step": 6532
    },
    {
      "epoch": 0.6019256461049431,
      "grad_norm": 0.9805865380748094,
      "learning_rate": 1.8707522263225797e-06,
      "loss": 0.1388,
      "step": 6533
    },
    {
      "epoch": 0.6020177822822131,
      "grad_norm": 0.9279629753238866,
      "learning_rate": 1.8700149978440105e-06,
      "loss": 0.1287,
      "step": 6534
    },
    {
      "epoch": 0.6021099184594831,
      "grad_norm": 0.9464551237736833,
      "learning_rate": 1.8692778278596162e-06,
      "loss": 0.1321,
      "step": 6535
    },
    {
      "epoch": 0.6022020546367531,
      "grad_norm": 0.9800861251562089,
      "learning_rate": 1.868540716437844e-06,
      "loss": 0.1338,
      "step": 6536
    },
    {
      "epoch": 0.6022941908140231,
      "grad_norm": 0.9347287257761518,
      "learning_rate": 1.8678036636471336e-06,
      "loss": 0.1269,
      "step": 6537
    },
    {
      "epoch": 0.6023863269912931,
      "grad_norm": 0.9972581832144547,
      "learning_rate": 1.867066669555922e-06,
      "loss": 0.1431,
      "step": 6538
    },
    {
      "epoch": 0.6024784631685631,
      "grad_norm": 0.9908423627612108,
      "learning_rate": 1.8663297342326381e-06,
      "loss": 0.1363,
      "step": 6539
    },
    {
      "epoch": 0.6025705993458331,
      "grad_norm": 0.897036984294902,
      "learning_rate": 1.8655928577457058e-06,
      "loss": 0.1231,
      "step": 6540
    },
    {
      "epoch": 0.6026627355231031,
      "grad_norm": 0.9047862414880307,
      "learning_rate": 1.8648560401635448e-06,
      "loss": 0.134,
      "step": 6541
    },
    {
      "epoch": 0.6027548717003731,
      "grad_norm": 0.9144810403633027,
      "learning_rate": 1.8641192815545705e-06,
      "loss": 0.1301,
      "step": 6542
    },
    {
      "epoch": 0.6028470078776431,
      "grad_norm": 0.9365388153587314,
      "learning_rate": 1.8633825819871881e-06,
      "loss": 0.1347,
      "step": 6543
    },
    {
      "epoch": 0.6029391440549131,
      "grad_norm": 0.8949914004485408,
      "learning_rate": 1.8626459415298012e-06,
      "loss": 0.122,
      "step": 6544
    },
    {
      "epoch": 0.6030312802321832,
      "grad_norm": 0.8895585861402004,
      "learning_rate": 1.8619093602508075e-06,
      "loss": 0.1173,
      "step": 6545
    },
    {
      "epoch": 0.6031234164094532,
      "grad_norm": 0.9070371139758455,
      "learning_rate": 1.8611728382185995e-06,
      "loss": 0.1338,
      "step": 6546
    },
    {
      "epoch": 0.6032155525867232,
      "grad_norm": 0.9223856940001541,
      "learning_rate": 1.860436375501561e-06,
      "loss": 0.1228,
      "step": 6547
    },
    {
      "epoch": 0.6033076887639932,
      "grad_norm": 0.9695663122827732,
      "learning_rate": 1.8596999721680743e-06,
      "loss": 0.138,
      "step": 6548
    },
    {
      "epoch": 0.6033998249412632,
      "grad_norm": 0.9665305664799301,
      "learning_rate": 1.858963628286513e-06,
      "loss": 0.136,
      "step": 6549
    },
    {
      "epoch": 0.6034919611185332,
      "grad_norm": 0.8799821475459378,
      "learning_rate": 1.8582273439252497e-06,
      "loss": 0.1295,
      "step": 6550
    },
    {
      "epoch": 0.6035840972958032,
      "grad_norm": 0.9107048517759243,
      "learning_rate": 1.8574911191526456e-06,
      "loss": 0.1199,
      "step": 6551
    },
    {
      "epoch": 0.6036762334730732,
      "grad_norm": 0.9273771857397388,
      "learning_rate": 1.85675495403706e-06,
      "loss": 0.1419,
      "step": 6552
    },
    {
      "epoch": 0.6037683696503432,
      "grad_norm": 0.879572291727904,
      "learning_rate": 1.8560188486468463e-06,
      "loss": 0.1257,
      "step": 6553
    },
    {
      "epoch": 0.6038605058276132,
      "grad_norm": 0.917819635353749,
      "learning_rate": 1.8552828030503528e-06,
      "loss": 0.1362,
      "step": 6554
    },
    {
      "epoch": 0.6039526420048832,
      "grad_norm": 0.854519148192027,
      "learning_rate": 1.854546817315919e-06,
      "loss": 0.1247,
      "step": 6555
    },
    {
      "epoch": 0.6040447781821532,
      "grad_norm": 0.9358624951200376,
      "learning_rate": 1.8538108915118833e-06,
      "loss": 0.1385,
      "step": 6556
    },
    {
      "epoch": 0.6041369143594232,
      "grad_norm": 0.8987979977343791,
      "learning_rate": 1.8530750257065752e-06,
      "loss": 0.1299,
      "step": 6557
    },
    {
      "epoch": 0.6042290505366932,
      "grad_norm": 0.8655912576757261,
      "learning_rate": 1.8523392199683218e-06,
      "loss": 0.1232,
      "step": 6558
    },
    {
      "epoch": 0.6043211867139633,
      "grad_norm": 0.8844496081591054,
      "learning_rate": 1.851603474365441e-06,
      "loss": 0.1381,
      "step": 6559
    },
    {
      "epoch": 0.6044133228912333,
      "grad_norm": 0.9974044514544727,
      "learning_rate": 1.8508677889662469e-06,
      "loss": 0.1425,
      "step": 6560
    },
    {
      "epoch": 0.6045054590685033,
      "grad_norm": 0.8670146001504918,
      "learning_rate": 1.850132163839049e-06,
      "loss": 0.1305,
      "step": 6561
    },
    {
      "epoch": 0.6045975952457733,
      "grad_norm": 0.8994301785848731,
      "learning_rate": 1.849396599052149e-06,
      "loss": 0.1161,
      "step": 6562
    },
    {
      "epoch": 0.6046897314230433,
      "grad_norm": 0.9049796994871221,
      "learning_rate": 1.848661094673846e-06,
      "loss": 0.126,
      "step": 6563
    },
    {
      "epoch": 0.6047818676003133,
      "grad_norm": 0.9022404281815567,
      "learning_rate": 1.8479256507724297e-06,
      "loss": 0.1187,
      "step": 6564
    },
    {
      "epoch": 0.6048740037775833,
      "grad_norm": 0.9916709809090737,
      "learning_rate": 1.8471902674161863e-06,
      "loss": 0.1475,
      "step": 6565
    },
    {
      "epoch": 0.6049661399548533,
      "grad_norm": 0.9205985919823508,
      "learning_rate": 1.8464549446733976e-06,
      "loss": 0.1355,
      "step": 6566
    },
    {
      "epoch": 0.6050582761321233,
      "grad_norm": 0.8901635257949637,
      "learning_rate": 1.8457196826123381e-06,
      "loss": 0.1254,
      "step": 6567
    },
    {
      "epoch": 0.6051504123093933,
      "grad_norm": 0.8943163998141479,
      "learning_rate": 1.8449844813012755e-06,
      "loss": 0.1238,
      "step": 6568
    },
    {
      "epoch": 0.6052425484866633,
      "grad_norm": 0.9384468453458241,
      "learning_rate": 1.8442493408084746e-06,
      "loss": 0.135,
      "step": 6569
    },
    {
      "epoch": 0.6053346846639333,
      "grad_norm": 0.8784833757143954,
      "learning_rate": 1.8435142612021929e-06,
      "loss": 0.1356,
      "step": 6570
    },
    {
      "epoch": 0.6054268208412033,
      "grad_norm": 0.9014973212837387,
      "learning_rate": 1.8427792425506833e-06,
      "loss": 0.1255,
      "step": 6571
    },
    {
      "epoch": 0.6055189570184734,
      "grad_norm": 0.9243350184650585,
      "learning_rate": 1.8420442849221915e-06,
      "loss": 0.136,
      "step": 6572
    },
    {
      "epoch": 0.6056110931957434,
      "grad_norm": 0.8652026511605855,
      "learning_rate": 1.8413093883849579e-06,
      "loss": 0.1181,
      "step": 6573
    },
    {
      "epoch": 0.6057032293730134,
      "grad_norm": 0.9563469319601445,
      "learning_rate": 1.840574553007219e-06,
      "loss": 0.1281,
      "step": 6574
    },
    {
      "epoch": 0.6057953655502833,
      "grad_norm": 0.8706380604782731,
      "learning_rate": 1.8398397788572046e-06,
      "loss": 0.1345,
      "step": 6575
    },
    {
      "epoch": 0.6058875017275533,
      "grad_norm": 0.9457792347195052,
      "learning_rate": 1.8391050660031364e-06,
      "loss": 0.142,
      "step": 6576
    },
    {
      "epoch": 0.6059796379048233,
      "grad_norm": 0.9546506661285495,
      "learning_rate": 1.8383704145132347e-06,
      "loss": 0.1328,
      "step": 6577
    },
    {
      "epoch": 0.6060717740820933,
      "grad_norm": 0.9360510999701172,
      "learning_rate": 1.8376358244557108e-06,
      "loss": 0.1405,
      "step": 6578
    },
    {
      "epoch": 0.6061639102593633,
      "grad_norm": 0.953871600525164,
      "learning_rate": 1.8369012958987728e-06,
      "loss": 0.1294,
      "step": 6579
    },
    {
      "epoch": 0.6062560464366333,
      "grad_norm": 0.9827266384723413,
      "learning_rate": 1.8361668289106204e-06,
      "loss": 0.1404,
      "step": 6580
    },
    {
      "epoch": 0.6063481826139033,
      "grad_norm": 0.9683587334877363,
      "learning_rate": 1.8354324235594488e-06,
      "loss": 0.1409,
      "step": 6581
    },
    {
      "epoch": 0.6064403187911733,
      "grad_norm": 0.8877532566082904,
      "learning_rate": 1.8346980799134485e-06,
      "loss": 0.1232,
      "step": 6582
    },
    {
      "epoch": 0.6065324549684433,
      "grad_norm": 0.9025550006506345,
      "learning_rate": 1.8339637980408038e-06,
      "loss": 0.1153,
      "step": 6583
    },
    {
      "epoch": 0.6066245911457133,
      "grad_norm": 0.9428207407116446,
      "learning_rate": 1.833229578009691e-06,
      "loss": 0.1303,
      "step": 6584
    },
    {
      "epoch": 0.6067167273229833,
      "grad_norm": 0.9556142056967836,
      "learning_rate": 1.8324954198882843e-06,
      "loss": 0.1438,
      "step": 6585
    },
    {
      "epoch": 0.6068088635002534,
      "grad_norm": 0.8780366492422997,
      "learning_rate": 1.831761323744749e-06,
      "loss": 0.1214,
      "step": 6586
    },
    {
      "epoch": 0.6069009996775234,
      "grad_norm": 0.9234513915076087,
      "learning_rate": 1.831027289647248e-06,
      "loss": 0.1257,
      "step": 6587
    },
    {
      "epoch": 0.6069931358547934,
      "grad_norm": 0.9163158474665248,
      "learning_rate": 1.8302933176639346e-06,
      "loss": 0.1326,
      "step": 6588
    },
    {
      "epoch": 0.6070852720320634,
      "grad_norm": 0.9169583283659786,
      "learning_rate": 1.829559407862958e-06,
      "loss": 0.1313,
      "step": 6589
    },
    {
      "epoch": 0.6071774082093334,
      "grad_norm": 0.9362982236980304,
      "learning_rate": 1.8288255603124632e-06,
      "loss": 0.1338,
      "step": 6590
    },
    {
      "epoch": 0.6072695443866034,
      "grad_norm": 0.9062618567697522,
      "learning_rate": 1.8280917750805865e-06,
      "loss": 0.1259,
      "step": 6591
    },
    {
      "epoch": 0.6073616805638734,
      "grad_norm": 0.8924801799490278,
      "learning_rate": 1.8273580522354622e-06,
      "loss": 0.1189,
      "step": 6592
    },
    {
      "epoch": 0.6074538167411434,
      "grad_norm": 0.925574310988602,
      "learning_rate": 1.8266243918452149e-06,
      "loss": 0.128,
      "step": 6593
    },
    {
      "epoch": 0.6075459529184134,
      "grad_norm": 0.9140715166358384,
      "learning_rate": 1.825890793977964e-06,
      "loss": 0.128,
      "step": 6594
    },
    {
      "epoch": 0.6076380890956834,
      "grad_norm": 0.9680327082562757,
      "learning_rate": 1.8251572587018268e-06,
      "loss": 0.1515,
      "step": 6595
    },
    {
      "epoch": 0.6077302252729534,
      "grad_norm": 0.9601890925112531,
      "learning_rate": 1.8244237860849108e-06,
      "loss": 0.1322,
      "step": 6596
    },
    {
      "epoch": 0.6078223614502234,
      "grad_norm": 0.8684119145542256,
      "learning_rate": 1.8236903761953178e-06,
      "loss": 0.1176,
      "step": 6597
    },
    {
      "epoch": 0.6079144976274934,
      "grad_norm": 0.8648843664557521,
      "learning_rate": 1.8229570291011468e-06,
      "loss": 0.1168,
      "step": 6598
    },
    {
      "epoch": 0.6080066338047634,
      "grad_norm": 0.9193053247872316,
      "learning_rate": 1.8222237448704882e-06,
      "loss": 0.1301,
      "step": 6599
    },
    {
      "epoch": 0.6080987699820335,
      "grad_norm": 0.92509832524853,
      "learning_rate": 1.8214905235714286e-06,
      "loss": 0.128,
      "step": 6600
    },
    {
      "epoch": 0.6081909061593035,
      "grad_norm": 0.8970623170235373,
      "learning_rate": 1.8207573652720467e-06,
      "loss": 0.1161,
      "step": 6601
    },
    {
      "epoch": 0.6082830423365735,
      "grad_norm": 0.8532064017979026,
      "learning_rate": 1.8200242700404159e-06,
      "loss": 0.1201,
      "step": 6602
    },
    {
      "epoch": 0.6083751785138435,
      "grad_norm": 0.9100390031276187,
      "learning_rate": 1.8192912379446048e-06,
      "loss": 0.1226,
      "step": 6603
    },
    {
      "epoch": 0.6084673146911135,
      "grad_norm": 0.9301923891417945,
      "learning_rate": 1.8185582690526765e-06,
      "loss": 0.1332,
      "step": 6604
    },
    {
      "epoch": 0.6085594508683835,
      "grad_norm": 0.9588130238961836,
      "learning_rate": 1.8178253634326854e-06,
      "loss": 0.1323,
      "step": 6605
    },
    {
      "epoch": 0.6086515870456535,
      "grad_norm": 0.9464922450102702,
      "learning_rate": 1.817092521152683e-06,
      "loss": 0.1243,
      "step": 6606
    },
    {
      "epoch": 0.6087437232229235,
      "grad_norm": 0.9661556183879925,
      "learning_rate": 1.816359742280713e-06,
      "loss": 0.1309,
      "step": 6607
    },
    {
      "epoch": 0.6088358594001935,
      "grad_norm": 0.8915377822004282,
      "learning_rate": 1.8156270268848155e-06,
      "loss": 0.1183,
      "step": 6608
    },
    {
      "epoch": 0.6089279955774635,
      "grad_norm": 0.9370935343576614,
      "learning_rate": 1.8148943750330216e-06,
      "loss": 0.1296,
      "step": 6609
    },
    {
      "epoch": 0.6090201317547335,
      "grad_norm": 0.9175329703834323,
      "learning_rate": 1.8141617867933586e-06,
      "loss": 0.1215,
      "step": 6610
    },
    {
      "epoch": 0.6091122679320035,
      "grad_norm": 0.939823774501705,
      "learning_rate": 1.813429262233848e-06,
      "loss": 0.1316,
      "step": 6611
    },
    {
      "epoch": 0.6092044041092735,
      "grad_norm": 0.9438374697151484,
      "learning_rate": 1.8126968014225044e-06,
      "loss": 0.1381,
      "step": 6612
    },
    {
      "epoch": 0.6092965402865436,
      "grad_norm": 0.9026520719916683,
      "learning_rate": 1.811964404427336e-06,
      "loss": 0.1357,
      "step": 6613
    },
    {
      "epoch": 0.6093886764638136,
      "grad_norm": 0.8733218006894725,
      "learning_rate": 1.811232071316347e-06,
      "loss": 0.1118,
      "step": 6614
    },
    {
      "epoch": 0.6094808126410836,
      "grad_norm": 0.8841490921267199,
      "learning_rate": 1.8104998021575337e-06,
      "loss": 0.1295,
      "step": 6615
    },
    {
      "epoch": 0.6095729488183536,
      "grad_norm": 0.9359678194340786,
      "learning_rate": 1.8097675970188894e-06,
      "loss": 0.1327,
      "step": 6616
    },
    {
      "epoch": 0.6096650849956236,
      "grad_norm": 0.9605463356795557,
      "learning_rate": 1.8090354559683972e-06,
      "loss": 0.1378,
      "step": 6617
    },
    {
      "epoch": 0.6097572211728935,
      "grad_norm": 0.9054243649757274,
      "learning_rate": 1.8083033790740368e-06,
      "loss": 0.1289,
      "step": 6618
    },
    {
      "epoch": 0.6098493573501635,
      "grad_norm": 0.8842290378463926,
      "learning_rate": 1.8075713664037823e-06,
      "loss": 0.1289,
      "step": 6619
    },
    {
      "epoch": 0.6099414935274335,
      "grad_norm": 0.8947640414559052,
      "learning_rate": 1.806839418025601e-06,
      "loss": 0.1275,
      "step": 6620
    },
    {
      "epoch": 0.6100336297047035,
      "grad_norm": 0.921951402811935,
      "learning_rate": 1.8061075340074556e-06,
      "loss": 0.1297,
      "step": 6621
    },
    {
      "epoch": 0.6101257658819735,
      "grad_norm": 0.9294620374457182,
      "learning_rate": 1.8053757144172987e-06,
      "loss": 0.1237,
      "step": 6622
    },
    {
      "epoch": 0.6102179020592435,
      "grad_norm": 0.8834759916422078,
      "learning_rate": 1.8046439593230822e-06,
      "loss": 0.1147,
      "step": 6623
    },
    {
      "epoch": 0.6103100382365135,
      "grad_norm": 0.9385060723312659,
      "learning_rate": 1.8039122687927485e-06,
      "loss": 0.1386,
      "step": 6624
    },
    {
      "epoch": 0.6104021744137835,
      "grad_norm": 0.9310030772186891,
      "learning_rate": 1.803180642894236e-06,
      "loss": 0.1389,
      "step": 6625
    },
    {
      "epoch": 0.6104943105910535,
      "grad_norm": 0.8966613217875679,
      "learning_rate": 1.802449081695476e-06,
      "loss": 0.1269,
      "step": 6626
    },
    {
      "epoch": 0.6105864467683236,
      "grad_norm": 0.8572229985519388,
      "learning_rate": 1.801717585264393e-06,
      "loss": 0.1282,
      "step": 6627
    },
    {
      "epoch": 0.6106785829455936,
      "grad_norm": 0.930161237084897,
      "learning_rate": 1.800986153668908e-06,
      "loss": 0.125,
      "step": 6628
    },
    {
      "epoch": 0.6107707191228636,
      "grad_norm": 0.9925047377703466,
      "learning_rate": 1.8002547869769344e-06,
      "loss": 0.1297,
      "step": 6629
    },
    {
      "epoch": 0.6108628553001336,
      "grad_norm": 0.8971787402961252,
      "learning_rate": 1.7995234852563779e-06,
      "loss": 0.1228,
      "step": 6630
    },
    {
      "epoch": 0.6109549914774036,
      "grad_norm": 0.9592466914464529,
      "learning_rate": 1.7987922485751418e-06,
      "loss": 0.1383,
      "step": 6631
    },
    {
      "epoch": 0.6110471276546736,
      "grad_norm": 0.9101582801987562,
      "learning_rate": 1.7980610770011203e-06,
      "loss": 0.1217,
      "step": 6632
    },
    {
      "epoch": 0.6111392638319436,
      "grad_norm": 0.9388719801247791,
      "learning_rate": 1.7973299706022046e-06,
      "loss": 0.1289,
      "step": 6633
    },
    {
      "epoch": 0.6112314000092136,
      "grad_norm": 0.8983598124593635,
      "learning_rate": 1.796598929446276e-06,
      "loss": 0.1196,
      "step": 6634
    },
    {
      "epoch": 0.6113235361864836,
      "grad_norm": 0.9182147997516233,
      "learning_rate": 1.7958679536012118e-06,
      "loss": 0.1207,
      "step": 6635
    },
    {
      "epoch": 0.6114156723637536,
      "grad_norm": 0.8619585830126839,
      "learning_rate": 1.795137043134885e-06,
      "loss": 0.1232,
      "step": 6636
    },
    {
      "epoch": 0.6115078085410236,
      "grad_norm": 1.0263119069785722,
      "learning_rate": 1.7944061981151598e-06,
      "loss": 0.1374,
      "step": 6637
    },
    {
      "epoch": 0.6115999447182936,
      "grad_norm": 0.9085278130780867,
      "learning_rate": 1.793675418609894e-06,
      "loss": 0.124,
      "step": 6638
    },
    {
      "epoch": 0.6116920808955636,
      "grad_norm": 0.890303263816946,
      "learning_rate": 1.792944704686942e-06,
      "loss": 0.1253,
      "step": 6639
    },
    {
      "epoch": 0.6117842170728337,
      "grad_norm": 0.8922482946265187,
      "learning_rate": 1.79221405641415e-06,
      "loss": 0.1204,
      "step": 6640
    },
    {
      "epoch": 0.6118763532501037,
      "grad_norm": 0.9219285468355103,
      "learning_rate": 1.79148347385936e-06,
      "loss": 0.1339,
      "step": 6641
    },
    {
      "epoch": 0.6119684894273737,
      "grad_norm": 0.904392406416535,
      "learning_rate": 1.790752957090405e-06,
      "loss": 0.1285,
      "step": 6642
    },
    {
      "epoch": 0.6120606256046437,
      "grad_norm": 0.9040831409584187,
      "learning_rate": 1.7900225061751136e-06,
      "loss": 0.1305,
      "step": 6643
    },
    {
      "epoch": 0.6121527617819137,
      "grad_norm": 0.8813255919074837,
      "learning_rate": 1.78929212118131e-06,
      "loss": 0.1205,
      "step": 6644
    },
    {
      "epoch": 0.6122448979591837,
      "grad_norm": 0.8850407998747193,
      "learning_rate": 1.7885618021768097e-06,
      "loss": 0.1253,
      "step": 6645
    },
    {
      "epoch": 0.6123370341364537,
      "grad_norm": 0.9142336900388941,
      "learning_rate": 1.7878315492294213e-06,
      "loss": 0.1248,
      "step": 6646
    },
    {
      "epoch": 0.6124291703137237,
      "grad_norm": 0.9290446184856952,
      "learning_rate": 1.7871013624069511e-06,
      "loss": 0.1391,
      "step": 6647
    },
    {
      "epoch": 0.6125213064909937,
      "grad_norm": 0.9968257121332454,
      "learning_rate": 1.7863712417771956e-06,
      "loss": 0.1348,
      "step": 6648
    },
    {
      "epoch": 0.6126134426682637,
      "grad_norm": 0.9863639721347642,
      "learning_rate": 1.7856411874079475e-06,
      "loss": 0.1402,
      "step": 6649
    },
    {
      "epoch": 0.6127055788455337,
      "grad_norm": 0.9698492821648672,
      "learning_rate": 1.7849111993669924e-06,
      "loss": 0.1345,
      "step": 6650
    },
    {
      "epoch": 0.6127977150228037,
      "grad_norm": 0.9558952945055265,
      "learning_rate": 1.7841812777221085e-06,
      "loss": 0.1246,
      "step": 6651
    },
    {
      "epoch": 0.6128898512000737,
      "grad_norm": 0.9316749933029452,
      "learning_rate": 1.7834514225410704e-06,
      "loss": 0.1283,
      "step": 6652
    },
    {
      "epoch": 0.6129819873773437,
      "grad_norm": 0.9578132894362446,
      "learning_rate": 1.7827216338916444e-06,
      "loss": 0.1362,
      "step": 6653
    },
    {
      "epoch": 0.6130741235546138,
      "grad_norm": 0.9942396128253892,
      "learning_rate": 1.781991911841593e-06,
      "loss": 0.1384,
      "step": 6654
    },
    {
      "epoch": 0.6131662597318838,
      "grad_norm": 0.8876883802815898,
      "learning_rate": 1.7812622564586695e-06,
      "loss": 0.1197,
      "step": 6655
    },
    {
      "epoch": 0.6132583959091538,
      "grad_norm": 0.9207165138441854,
      "learning_rate": 1.7805326678106221e-06,
      "loss": 0.1306,
      "step": 6656
    },
    {
      "epoch": 0.6133505320864238,
      "grad_norm": 0.8709068748396972,
      "learning_rate": 1.7798031459651942e-06,
      "loss": 0.1205,
      "step": 6657
    },
    {
      "epoch": 0.6134426682636938,
      "grad_norm": 0.8609800545742414,
      "learning_rate": 1.7790736909901228e-06,
      "loss": 0.1251,
      "step": 6658
    },
    {
      "epoch": 0.6135348044409638,
      "grad_norm": 0.8481561432664178,
      "learning_rate": 1.7783443029531352e-06,
      "loss": 0.1252,
      "step": 6659
    },
    {
      "epoch": 0.6136269406182338,
      "grad_norm": 0.9299442690236165,
      "learning_rate": 1.7776149819219574e-06,
      "loss": 0.1355,
      "step": 6660
    },
    {
      "epoch": 0.6137190767955037,
      "grad_norm": 0.9289067001977687,
      "learning_rate": 1.776885727964306e-06,
      "loss": 0.1263,
      "step": 6661
    },
    {
      "epoch": 0.6138112129727737,
      "grad_norm": 0.8770126865790537,
      "learning_rate": 1.7761565411478935e-06,
      "loss": 0.1341,
      "step": 6662
    },
    {
      "epoch": 0.6139033491500437,
      "grad_norm": 0.8953296834576114,
      "learning_rate": 1.7754274215404234e-06,
      "loss": 0.139,
      "step": 6663
    },
    {
      "epoch": 0.6139954853273137,
      "grad_norm": 0.9011900760471201,
      "learning_rate": 1.7746983692095947e-06,
      "loss": 0.1273,
      "step": 6664
    },
    {
      "epoch": 0.6140876215045837,
      "grad_norm": 0.9071509181765444,
      "learning_rate": 1.7739693842231008e-06,
      "loss": 0.1192,
      "step": 6665
    },
    {
      "epoch": 0.6141797576818537,
      "grad_norm": 0.953660969909198,
      "learning_rate": 1.7732404666486289e-06,
      "loss": 0.1402,
      "step": 6666
    },
    {
      "epoch": 0.6142718938591237,
      "grad_norm": 0.9647631217108921,
      "learning_rate": 1.7725116165538564e-06,
      "loss": 0.1391,
      "step": 6667
    },
    {
      "epoch": 0.6143640300363938,
      "grad_norm": 0.8881822165996579,
      "learning_rate": 1.7717828340064592e-06,
      "loss": 0.1262,
      "step": 6668
    },
    {
      "epoch": 0.6144561662136638,
      "grad_norm": 0.9178819920338384,
      "learning_rate": 1.7710541190741037e-06,
      "loss": 0.1242,
      "step": 6669
    },
    {
      "epoch": 0.6145483023909338,
      "grad_norm": 0.9312720344544685,
      "learning_rate": 1.7703254718244525e-06,
      "loss": 0.1339,
      "step": 6670
    },
    {
      "epoch": 0.6146404385682038,
      "grad_norm": 0.9442221013026411,
      "learning_rate": 1.7695968923251593e-06,
      "loss": 0.1309,
      "step": 6671
    },
    {
      "epoch": 0.6147325747454738,
      "grad_norm": 0.9321429789620098,
      "learning_rate": 1.7688683806438731e-06,
      "loss": 0.1339,
      "step": 6672
    },
    {
      "epoch": 0.6148247109227438,
      "grad_norm": 0.9000161388757578,
      "learning_rate": 1.7681399368482367e-06,
      "loss": 0.1306,
      "step": 6673
    },
    {
      "epoch": 0.6149168471000138,
      "grad_norm": 0.9606303166573105,
      "learning_rate": 1.7674115610058864e-06,
      "loss": 0.133,
      "step": 6674
    },
    {
      "epoch": 0.6150089832772838,
      "grad_norm": 0.9437709741521769,
      "learning_rate": 1.7666832531844508e-06,
      "loss": 0.1258,
      "step": 6675
    },
    {
      "epoch": 0.6151011194545538,
      "grad_norm": 0.9434879871809514,
      "learning_rate": 1.765955013451554e-06,
      "loss": 0.1313,
      "step": 6676
    },
    {
      "epoch": 0.6151932556318238,
      "grad_norm": 0.9881526594306695,
      "learning_rate": 1.765226841874813e-06,
      "loss": 0.1447,
      "step": 6677
    },
    {
      "epoch": 0.6152853918090938,
      "grad_norm": 0.9328846527215926,
      "learning_rate": 1.7644987385218395e-06,
      "loss": 0.1345,
      "step": 6678
    },
    {
      "epoch": 0.6153775279863638,
      "grad_norm": 0.9727818346921414,
      "learning_rate": 1.7637707034602378e-06,
      "loss": 0.1464,
      "step": 6679
    },
    {
      "epoch": 0.6154696641636338,
      "grad_norm": 0.9213397997962777,
      "learning_rate": 1.7630427367576042e-06,
      "loss": 0.131,
      "step": 6680
    },
    {
      "epoch": 0.6155618003409039,
      "grad_norm": 0.9437472003416133,
      "learning_rate": 1.7623148384815326e-06,
      "loss": 0.1251,
      "step": 6681
    },
    {
      "epoch": 0.6156539365181739,
      "grad_norm": 0.9166929533684961,
      "learning_rate": 1.7615870086996067e-06,
      "loss": 0.1384,
      "step": 6682
    },
    {
      "epoch": 0.6157460726954439,
      "grad_norm": 0.9249383528462048,
      "learning_rate": 1.7608592474794078e-06,
      "loss": 0.1295,
      "step": 6683
    },
    {
      "epoch": 0.6158382088727139,
      "grad_norm": 0.9679979891312327,
      "learning_rate": 1.760131554888507e-06,
      "loss": 0.1295,
      "step": 6684
    },
    {
      "epoch": 0.6159303450499839,
      "grad_norm": 0.9300305618407569,
      "learning_rate": 1.7594039309944702e-06,
      "loss": 0.1392,
      "step": 6685
    },
    {
      "epoch": 0.6160224812272539,
      "grad_norm": 0.9864960844070007,
      "learning_rate": 1.7586763758648587e-06,
      "loss": 0.1485,
      "step": 6686
    },
    {
      "epoch": 0.6161146174045239,
      "grad_norm": 0.91785390219516,
      "learning_rate": 1.757948889567226e-06,
      "loss": 0.1287,
      "step": 6687
    },
    {
      "epoch": 0.6162067535817939,
      "grad_norm": 0.8841484437270067,
      "learning_rate": 1.7572214721691178e-06,
      "loss": 0.1213,
      "step": 6688
    },
    {
      "epoch": 0.6162988897590639,
      "grad_norm": 0.9395321082526199,
      "learning_rate": 1.7564941237380761e-06,
      "loss": 0.1293,
      "step": 6689
    },
    {
      "epoch": 0.6163910259363339,
      "grad_norm": 0.8857358471622105,
      "learning_rate": 1.7557668443416348e-06,
      "loss": 0.124,
      "step": 6690
    },
    {
      "epoch": 0.6164831621136039,
      "grad_norm": 0.903927286093644,
      "learning_rate": 1.755039634047323e-06,
      "loss": 0.1228,
      "step": 6691
    },
    {
      "epoch": 0.6165752982908739,
      "grad_norm": 0.9233988812330909,
      "learning_rate": 1.7543124929226608e-06,
      "loss": 0.1355,
      "step": 6692
    },
    {
      "epoch": 0.6166674344681439,
      "grad_norm": 0.9142573934713351,
      "learning_rate": 1.7535854210351635e-06,
      "loss": 0.1368,
      "step": 6693
    },
    {
      "epoch": 0.6167595706454139,
      "grad_norm": 0.8897398795942182,
      "learning_rate": 1.7528584184523407e-06,
      "loss": 0.1232,
      "step": 6694
    },
    {
      "epoch": 0.616851706822684,
      "grad_norm": 0.9684599266466104,
      "learning_rate": 1.7521314852416946e-06,
      "loss": 0.1412,
      "step": 6695
    },
    {
      "epoch": 0.616943842999954,
      "grad_norm": 0.8633355016137957,
      "learning_rate": 1.7514046214707195e-06,
      "loss": 0.1163,
      "step": 6696
    },
    {
      "epoch": 0.617035979177224,
      "grad_norm": 0.9556729220047715,
      "learning_rate": 1.7506778272069064e-06,
      "loss": 0.1381,
      "step": 6697
    },
    {
      "epoch": 0.617128115354494,
      "grad_norm": 0.8849642829869895,
      "learning_rate": 1.7499511025177373e-06,
      "loss": 0.1264,
      "step": 6698
    },
    {
      "epoch": 0.617220251531764,
      "grad_norm": 0.9186363005241283,
      "learning_rate": 1.7492244474706898e-06,
      "loss": 0.1423,
      "step": 6699
    },
    {
      "epoch": 0.617312387709034,
      "grad_norm": 0.8928578767583446,
      "learning_rate": 1.748497862133233e-06,
      "loss": 0.1265,
      "step": 6700
    },
    {
      "epoch": 0.617404523886304,
      "grad_norm": 0.8535895801436509,
      "learning_rate": 1.7477713465728296e-06,
      "loss": 0.1246,
      "step": 6701
    },
    {
      "epoch": 0.617496660063574,
      "grad_norm": 0.9062694992292313,
      "learning_rate": 1.747044900856938e-06,
      "loss": 0.1244,
      "step": 6702
    },
    {
      "epoch": 0.617588796240844,
      "grad_norm": 1.0026481306418769,
      "learning_rate": 1.7463185250530102e-06,
      "loss": 0.1309,
      "step": 6703
    },
    {
      "epoch": 0.617680932418114,
      "grad_norm": 0.9401301173996884,
      "learning_rate": 1.7455922192284864e-06,
      "loss": 0.1313,
      "step": 6704
    },
    {
      "epoch": 0.617773068595384,
      "grad_norm": 0.8910759203702324,
      "learning_rate": 1.7448659834508064e-06,
      "loss": 0.1298,
      "step": 6705
    },
    {
      "epoch": 0.6178652047726539,
      "grad_norm": 0.9252349150341446,
      "learning_rate": 1.7441398177874015e-06,
      "loss": 0.1308,
      "step": 6706
    },
    {
      "epoch": 0.6179573409499239,
      "grad_norm": 0.8962704413424406,
      "learning_rate": 1.7434137223056954e-06,
      "loss": 0.1253,
      "step": 6707
    },
    {
      "epoch": 0.618049477127194,
      "grad_norm": 0.9382417961474702,
      "learning_rate": 1.7426876970731076e-06,
      "loss": 0.1267,
      "step": 6708
    },
    {
      "epoch": 0.618141613304464,
      "grad_norm": 0.8782716906829233,
      "learning_rate": 1.7419617421570483e-06,
      "loss": 0.1298,
      "step": 6709
    },
    {
      "epoch": 0.618233749481734,
      "grad_norm": 0.8877730559324124,
      "learning_rate": 1.7412358576249222e-06,
      "loss": 0.1242,
      "step": 6710
    },
    {
      "epoch": 0.618325885659004,
      "grad_norm": 1.031141967978923,
      "learning_rate": 1.740510043544129e-06,
      "loss": 0.1433,
      "step": 6711
    },
    {
      "epoch": 0.618418021836274,
      "grad_norm": 0.8924542958205756,
      "learning_rate": 1.7397842999820605e-06,
      "loss": 0.1228,
      "step": 6712
    },
    {
      "epoch": 0.618510158013544,
      "grad_norm": 0.951354922791097,
      "learning_rate": 1.7390586270061005e-06,
      "loss": 0.1304,
      "step": 6713
    },
    {
      "epoch": 0.618602294190814,
      "grad_norm": 0.963162432927016,
      "learning_rate": 1.7383330246836294e-06,
      "loss": 0.1266,
      "step": 6714
    },
    {
      "epoch": 0.618694430368084,
      "grad_norm": 0.8984043647800838,
      "learning_rate": 1.737607493082018e-06,
      "loss": 0.1252,
      "step": 6715
    },
    {
      "epoch": 0.618786566545354,
      "grad_norm": 0.9279719290802259,
      "learning_rate": 1.7368820322686345e-06,
      "loss": 0.1294,
      "step": 6716
    },
    {
      "epoch": 0.618878702722624,
      "grad_norm": 0.952211601716075,
      "learning_rate": 1.7361566423108355e-06,
      "loss": 0.1312,
      "step": 6717
    },
    {
      "epoch": 0.618970838899894,
      "grad_norm": 0.8749680465755658,
      "learning_rate": 1.7354313232759745e-06,
      "loss": 0.1179,
      "step": 6718
    },
    {
      "epoch": 0.619062975077164,
      "grad_norm": 0.8577709338059307,
      "learning_rate": 1.7347060752313978e-06,
      "loss": 0.1235,
      "step": 6719
    },
    {
      "epoch": 0.619155111254434,
      "grad_norm": 0.955540440918245,
      "learning_rate": 1.7339808982444444e-06,
      "loss": 0.1394,
      "step": 6720
    },
    {
      "epoch": 0.619247247431704,
      "grad_norm": 0.9616325266472617,
      "learning_rate": 1.7332557923824463e-06,
      "loss": 0.1355,
      "step": 6721
    },
    {
      "epoch": 0.6193393836089741,
      "grad_norm": 0.9302492116295052,
      "learning_rate": 1.732530757712731e-06,
      "loss": 0.121,
      "step": 6722
    },
    {
      "epoch": 0.6194315197862441,
      "grad_norm": 0.8914909942723336,
      "learning_rate": 1.7318057943026169e-06,
      "loss": 0.1194,
      "step": 6723
    },
    {
      "epoch": 0.6195236559635141,
      "grad_norm": 0.8862393627090467,
      "learning_rate": 1.7310809022194184e-06,
      "loss": 0.1279,
      "step": 6724
    },
    {
      "epoch": 0.6196157921407841,
      "grad_norm": 0.8809958029695578,
      "learning_rate": 1.7303560815304404e-06,
      "loss": 0.1274,
      "step": 6725
    },
    {
      "epoch": 0.6197079283180541,
      "grad_norm": 0.9235688604500591,
      "learning_rate": 1.7296313323029825e-06,
      "loss": 0.129,
      "step": 6726
    },
    {
      "epoch": 0.6198000644953241,
      "grad_norm": 0.9150737133599645,
      "learning_rate": 1.7289066546043386e-06,
      "loss": 0.1189,
      "step": 6727
    },
    {
      "epoch": 0.6198922006725941,
      "grad_norm": 0.9212605410966181,
      "learning_rate": 1.7281820485017958e-06,
      "loss": 0.1359,
      "step": 6728
    },
    {
      "epoch": 0.6199843368498641,
      "grad_norm": 0.8655563428055735,
      "learning_rate": 1.7274575140626318e-06,
      "loss": 0.1287,
      "step": 6729
    },
    {
      "epoch": 0.6200764730271341,
      "grad_norm": 0.9011894322767224,
      "learning_rate": 1.726733051354121e-06,
      "loss": 0.1325,
      "step": 6730
    },
    {
      "epoch": 0.6201686092044041,
      "grad_norm": 0.8678920213692736,
      "learning_rate": 1.7260086604435295e-06,
      "loss": 0.1222,
      "step": 6731
    },
    {
      "epoch": 0.6202607453816741,
      "grad_norm": 0.9225506544414821,
      "learning_rate": 1.7252843413981176e-06,
      "loss": 0.1291,
      "step": 6732
    },
    {
      "epoch": 0.6203528815589441,
      "grad_norm": 0.8741405868411134,
      "learning_rate": 1.7245600942851378e-06,
      "loss": 0.1168,
      "step": 6733
    },
    {
      "epoch": 0.6204450177362141,
      "grad_norm": 0.88964218818895,
      "learning_rate": 1.7238359191718362e-06,
      "loss": 0.1158,
      "step": 6734
    },
    {
      "epoch": 0.6205371539134841,
      "grad_norm": 0.8860723797025962,
      "learning_rate": 1.7231118161254534e-06,
      "loss": 0.1201,
      "step": 6735
    },
    {
      "epoch": 0.6206292900907542,
      "grad_norm": 0.9866557709375795,
      "learning_rate": 1.7223877852132218e-06,
      "loss": 0.1349,
      "step": 6736
    },
    {
      "epoch": 0.6207214262680242,
      "grad_norm": 0.8892445642733556,
      "learning_rate": 1.721663826502369e-06,
      "loss": 0.1218,
      "step": 6737
    },
    {
      "epoch": 0.6208135624452942,
      "grad_norm": 0.9688094575884019,
      "learning_rate": 1.7209399400601128e-06,
      "loss": 0.1346,
      "step": 6738
    },
    {
      "epoch": 0.6209056986225642,
      "grad_norm": 0.926838705061562,
      "learning_rate": 1.720216125953667e-06,
      "loss": 0.1334,
      "step": 6739
    },
    {
      "epoch": 0.6209978347998342,
      "grad_norm": 0.9396059515548089,
      "learning_rate": 1.7194923842502382e-06,
      "loss": 0.1324,
      "step": 6740
    },
    {
      "epoch": 0.6210899709771042,
      "grad_norm": 0.9830328224533756,
      "learning_rate": 1.7187687150170257e-06,
      "loss": 0.1345,
      "step": 6741
    },
    {
      "epoch": 0.6211821071543742,
      "grad_norm": 0.9430353714861338,
      "learning_rate": 1.7180451183212217e-06,
      "loss": 0.1211,
      "step": 6742
    },
    {
      "epoch": 0.6212742433316442,
      "grad_norm": 0.9121922967727294,
      "learning_rate": 1.7173215942300125e-06,
      "loss": 0.1428,
      "step": 6743
    },
    {
      "epoch": 0.6213663795089142,
      "grad_norm": 0.8985273162486016,
      "learning_rate": 1.7165981428105771e-06,
      "loss": 0.14,
      "step": 6744
    },
    {
      "epoch": 0.6214585156861842,
      "grad_norm": 0.9074990241015248,
      "learning_rate": 1.71587476413009e-06,
      "loss": 0.1354,
      "step": 6745
    },
    {
      "epoch": 0.6215506518634542,
      "grad_norm": 0.8835713991487871,
      "learning_rate": 1.7151514582557144e-06,
      "loss": 0.1295,
      "step": 6746
    },
    {
      "epoch": 0.6216427880407241,
      "grad_norm": 0.9563138522145045,
      "learning_rate": 1.71442822525461e-06,
      "loss": 0.1472,
      "step": 6747
    },
    {
      "epoch": 0.6217349242179941,
      "grad_norm": 0.9397063902023289,
      "learning_rate": 1.71370506519393e-06,
      "loss": 0.1251,
      "step": 6748
    },
    {
      "epoch": 0.6218270603952643,
      "grad_norm": 0.8707621777233324,
      "learning_rate": 1.7129819781408197e-06,
      "loss": 0.1278,
      "step": 6749
    },
    {
      "epoch": 0.6219191965725342,
      "grad_norm": 0.8388729362903343,
      "learning_rate": 1.7122589641624166e-06,
      "loss": 0.1181,
      "step": 6750
    },
    {
      "epoch": 0.6220113327498042,
      "grad_norm": 0.9139806446176466,
      "learning_rate": 1.7115360233258537e-06,
      "loss": 0.1202,
      "step": 6751
    },
    {
      "epoch": 0.6221034689270742,
      "grad_norm": 0.9311401798162033,
      "learning_rate": 1.7108131556982554e-06,
      "loss": 0.1372,
      "step": 6752
    },
    {
      "epoch": 0.6221956051043442,
      "grad_norm": 0.9511568955126489,
      "learning_rate": 1.7100903613467419e-06,
      "loss": 0.1214,
      "step": 6753
    },
    {
      "epoch": 0.6222877412816142,
      "grad_norm": 0.9239186752009969,
      "learning_rate": 1.7093676403384223e-06,
      "loss": 0.1301,
      "step": 6754
    },
    {
      "epoch": 0.6223798774588842,
      "grad_norm": 0.913588956394364,
      "learning_rate": 1.7086449927404025e-06,
      "loss": 0.1243,
      "step": 6755
    },
    {
      "epoch": 0.6224720136361542,
      "grad_norm": 0.905711339806985,
      "learning_rate": 1.7079224186197804e-06,
      "loss": 0.1294,
      "step": 6756
    },
    {
      "epoch": 0.6225641498134242,
      "grad_norm": 0.9681244319244915,
      "learning_rate": 1.7071999180436477e-06,
      "loss": 0.133,
      "step": 6757
    },
    {
      "epoch": 0.6226562859906942,
      "grad_norm": 0.9155233722559435,
      "learning_rate": 1.7064774910790865e-06,
      "loss": 0.1234,
      "step": 6758
    },
    {
      "epoch": 0.6227484221679642,
      "grad_norm": 0.9435640978336151,
      "learning_rate": 1.7057551377931767e-06,
      "loss": 0.1307,
      "step": 6759
    },
    {
      "epoch": 0.6228405583452342,
      "grad_norm": 0.9545102508423661,
      "learning_rate": 1.705032858252987e-06,
      "loss": 0.1422,
      "step": 6760
    },
    {
      "epoch": 0.6229326945225042,
      "grad_norm": 0.9415499869138108,
      "learning_rate": 1.7043106525255831e-06,
      "loss": 0.1395,
      "step": 6761
    },
    {
      "epoch": 0.6230248306997742,
      "grad_norm": 0.905530428956616,
      "learning_rate": 1.70358852067802e-06,
      "loss": 0.1226,
      "step": 6762
    },
    {
      "epoch": 0.6231169668770443,
      "grad_norm": 0.9482156251625352,
      "learning_rate": 1.7028664627773483e-06,
      "loss": 0.138,
      "step": 6763
    },
    {
      "epoch": 0.6232091030543143,
      "grad_norm": 0.8947426819048082,
      "learning_rate": 1.7021444788906117e-06,
      "loss": 0.1271,
      "step": 6764
    },
    {
      "epoch": 0.6233012392315843,
      "grad_norm": 0.9270692925867124,
      "learning_rate": 1.7014225690848458e-06,
      "loss": 0.1285,
      "step": 6765
    },
    {
      "epoch": 0.6233933754088543,
      "grad_norm": 0.8786188858935942,
      "learning_rate": 1.7007007334270809e-06,
      "loss": 0.1205,
      "step": 6766
    },
    {
      "epoch": 0.6234855115861243,
      "grad_norm": 0.9163708159636691,
      "learning_rate": 1.6999789719843388e-06,
      "loss": 0.1272,
      "step": 6767
    },
    {
      "epoch": 0.6235776477633943,
      "grad_norm": 0.8979496261233568,
      "learning_rate": 1.6992572848236343e-06,
      "loss": 0.1181,
      "step": 6768
    },
    {
      "epoch": 0.6236697839406643,
      "grad_norm": 0.9156614779990734,
      "learning_rate": 1.698535672011978e-06,
      "loss": 0.1341,
      "step": 6769
    },
    {
      "epoch": 0.6237619201179343,
      "grad_norm": 0.9391056956844751,
      "learning_rate": 1.6978141336163713e-06,
      "loss": 0.1263,
      "step": 6770
    },
    {
      "epoch": 0.6238540562952043,
      "grad_norm": 0.8858195058935551,
      "learning_rate": 1.6970926697038073e-06,
      "loss": 0.1247,
      "step": 6771
    },
    {
      "epoch": 0.6239461924724743,
      "grad_norm": 0.8735010962850701,
      "learning_rate": 1.6963712803412761e-06,
      "loss": 0.126,
      "step": 6772
    },
    {
      "epoch": 0.6240383286497443,
      "grad_norm": 0.9453628701261408,
      "learning_rate": 1.6956499655957577e-06,
      "loss": 0.1434,
      "step": 6773
    },
    {
      "epoch": 0.6241304648270143,
      "grad_norm": 0.9657158049888763,
      "learning_rate": 1.694928725534227e-06,
      "loss": 0.1342,
      "step": 6774
    },
    {
      "epoch": 0.6242226010042843,
      "grad_norm": 0.9759493120509639,
      "learning_rate": 1.6942075602236507e-06,
      "loss": 0.125,
      "step": 6775
    },
    {
      "epoch": 0.6243147371815544,
      "grad_norm": 0.9688092020766974,
      "learning_rate": 1.6934864697309883e-06,
      "loss": 0.1469,
      "step": 6776
    },
    {
      "epoch": 0.6244068733588244,
      "grad_norm": 0.9382127371598054,
      "learning_rate": 1.6927654541231941e-06,
      "loss": 0.1304,
      "step": 6777
    },
    {
      "epoch": 0.6244990095360944,
      "grad_norm": 0.9522838200564424,
      "learning_rate": 1.6920445134672162e-06,
      "loss": 0.126,
      "step": 6778
    },
    {
      "epoch": 0.6245911457133644,
      "grad_norm": 0.94904956281575,
      "learning_rate": 1.6913236478299906e-06,
      "loss": 0.1314,
      "step": 6779
    },
    {
      "epoch": 0.6246832818906344,
      "grad_norm": 0.9386781279946481,
      "learning_rate": 1.6906028572784511e-06,
      "loss": 0.1298,
      "step": 6780
    },
    {
      "epoch": 0.6247754180679044,
      "grad_norm": 0.9081504529487514,
      "learning_rate": 1.6898821418795237e-06,
      "loss": 0.1349,
      "step": 6781
    },
    {
      "epoch": 0.6248675542451744,
      "grad_norm": 0.8914673156367624,
      "learning_rate": 1.6891615017001272e-06,
      "loss": 0.1137,
      "step": 6782
    },
    {
      "epoch": 0.6249596904224444,
      "grad_norm": 0.891940546485863,
      "learning_rate": 1.6884409368071718e-06,
      "loss": 0.1231,
      "step": 6783
    },
    {
      "epoch": 0.6250518265997144,
      "grad_norm": 0.914512509293828,
      "learning_rate": 1.6877204472675634e-06,
      "loss": 0.1291,
      "step": 6784
    },
    {
      "epoch": 0.6251439627769844,
      "grad_norm": 0.8841333746458065,
      "learning_rate": 1.687000033148198e-06,
      "loss": 0.1289,
      "step": 6785
    },
    {
      "epoch": 0.6252360989542544,
      "grad_norm": 0.9418903021503365,
      "learning_rate": 1.686279694515968e-06,
      "loss": 0.1313,
      "step": 6786
    },
    {
      "epoch": 0.6253282351315244,
      "grad_norm": 0.8870373175179938,
      "learning_rate": 1.685559431437756e-06,
      "loss": 0.1176,
      "step": 6787
    },
    {
      "epoch": 0.6254203713087944,
      "grad_norm": 0.8973213859270821,
      "learning_rate": 1.6848392439804374e-06,
      "loss": 0.1247,
      "step": 6788
    },
    {
      "epoch": 0.6255125074860644,
      "grad_norm": 0.9056535481430396,
      "learning_rate": 1.6841191322108835e-06,
      "loss": 0.1316,
      "step": 6789
    },
    {
      "epoch": 0.6256046436633345,
      "grad_norm": 0.8611185014978975,
      "learning_rate": 1.6833990961959562e-06,
      "loss": 0.1105,
      "step": 6790
    },
    {
      "epoch": 0.6256967798406045,
      "grad_norm": 0.8714595196924181,
      "learning_rate": 1.6826791360025103e-06,
      "loss": 0.118,
      "step": 6791
    },
    {
      "epoch": 0.6257889160178745,
      "grad_norm": 0.939216708887262,
      "learning_rate": 1.6819592516973942e-06,
      "loss": 0.1294,
      "step": 6792
    },
    {
      "epoch": 0.6258810521951444,
      "grad_norm": 0.8805849507628835,
      "learning_rate": 1.6812394433474497e-06,
      "loss": 0.1303,
      "step": 6793
    },
    {
      "epoch": 0.6259731883724144,
      "grad_norm": 0.9904622658783916,
      "learning_rate": 1.6805197110195115e-06,
      "loss": 0.1366,
      "step": 6794
    },
    {
      "epoch": 0.6260653245496844,
      "grad_norm": 0.8452414433909419,
      "learning_rate": 1.6798000547804066e-06,
      "loss": 0.1138,
      "step": 6795
    },
    {
      "epoch": 0.6261574607269544,
      "grad_norm": 0.9647793184636386,
      "learning_rate": 1.6790804746969542e-06,
      "loss": 0.1318,
      "step": 6796
    },
    {
      "epoch": 0.6262495969042244,
      "grad_norm": 0.9190751074302138,
      "learning_rate": 1.6783609708359683e-06,
      "loss": 0.1272,
      "step": 6797
    },
    {
      "epoch": 0.6263417330814944,
      "grad_norm": 1.0135874834100627,
      "learning_rate": 1.677641543264254e-06,
      "loss": 0.1356,
      "step": 6798
    },
    {
      "epoch": 0.6264338692587644,
      "grad_norm": 0.9578041665275557,
      "learning_rate": 1.6769221920486123e-06,
      "loss": 0.1333,
      "step": 6799
    },
    {
      "epoch": 0.6265260054360344,
      "grad_norm": 0.9120272599831393,
      "learning_rate": 1.676202917255833e-06,
      "loss": 0.1236,
      "step": 6800
    },
    {
      "epoch": 0.6266181416133044,
      "grad_norm": 0.9545336241142,
      "learning_rate": 1.675483718952701e-06,
      "loss": 0.1505,
      "step": 6801
    },
    {
      "epoch": 0.6267102777905744,
      "grad_norm": 0.8996118102214581,
      "learning_rate": 1.6747645972059949e-06,
      "loss": 0.1156,
      "step": 6802
    },
    {
      "epoch": 0.6268024139678445,
      "grad_norm": 0.9780721556358306,
      "learning_rate": 1.6740455520824852e-06,
      "loss": 0.1311,
      "step": 6803
    },
    {
      "epoch": 0.6268945501451145,
      "grad_norm": 0.8756953274991542,
      "learning_rate": 1.673326583648934e-06,
      "loss": 0.1223,
      "step": 6804
    },
    {
      "epoch": 0.6269866863223845,
      "grad_norm": 0.9590846378967668,
      "learning_rate": 1.672607691972099e-06,
      "loss": 0.1327,
      "step": 6805
    },
    {
      "epoch": 0.6270788224996545,
      "grad_norm": 0.9725555167066449,
      "learning_rate": 1.671888877118728e-06,
      "loss": 0.1394,
      "step": 6806
    },
    {
      "epoch": 0.6271709586769245,
      "grad_norm": 0.9680149550710828,
      "learning_rate": 1.6711701391555654e-06,
      "loss": 0.1458,
      "step": 6807
    },
    {
      "epoch": 0.6272630948541945,
      "grad_norm": 0.9266144277449202,
      "learning_rate": 1.6704514781493439e-06,
      "loss": 0.1318,
      "step": 6808
    },
    {
      "epoch": 0.6273552310314645,
      "grad_norm": 0.9044064187078539,
      "learning_rate": 1.6697328941667911e-06,
      "loss": 0.1309,
      "step": 6809
    },
    {
      "epoch": 0.6274473672087345,
      "grad_norm": 0.9161445150421038,
      "learning_rate": 1.6690143872746295e-06,
      "loss": 0.1359,
      "step": 6810
    },
    {
      "epoch": 0.6275395033860045,
      "grad_norm": 0.9039846306537077,
      "learning_rate": 1.6682959575395717e-06,
      "loss": 0.13,
      "step": 6811
    },
    {
      "epoch": 0.6276316395632745,
      "grad_norm": 0.8368274819032405,
      "learning_rate": 1.6675776050283228e-06,
      "loss": 0.1117,
      "step": 6812
    },
    {
      "epoch": 0.6277237757405445,
      "grad_norm": 0.9653527218946011,
      "learning_rate": 1.666859329807583e-06,
      "loss": 0.1362,
      "step": 6813
    },
    {
      "epoch": 0.6278159119178145,
      "grad_norm": 0.9458104946736843,
      "learning_rate": 1.666141131944044e-06,
      "loss": 0.1323,
      "step": 6814
    },
    {
      "epoch": 0.6279080480950845,
      "grad_norm": 0.8872800410857472,
      "learning_rate": 1.6654230115043915e-06,
      "loss": 0.1275,
      "step": 6815
    },
    {
      "epoch": 0.6280001842723545,
      "grad_norm": 0.915733456696928,
      "learning_rate": 1.6647049685553018e-06,
      "loss": 0.1295,
      "step": 6816
    },
    {
      "epoch": 0.6280923204496246,
      "grad_norm": 0.9299225804478112,
      "learning_rate": 1.663987003163445e-06,
      "loss": 0.1289,
      "step": 6817
    },
    {
      "epoch": 0.6281844566268946,
      "grad_norm": 0.893925699523802,
      "learning_rate": 1.6632691153954855e-06,
      "loss": 0.1281,
      "step": 6818
    },
    {
      "epoch": 0.6282765928041646,
      "grad_norm": 0.9610798463300602,
      "learning_rate": 1.6625513053180791e-06,
      "loss": 0.1255,
      "step": 6819
    },
    {
      "epoch": 0.6283687289814346,
      "grad_norm": 0.9394658550305951,
      "learning_rate": 1.6618335729978736e-06,
      "loss": 0.1263,
      "step": 6820
    },
    {
      "epoch": 0.6284608651587046,
      "grad_norm": 0.941465892891746,
      "learning_rate": 1.661115918501511e-06,
      "loss": 0.1254,
      "step": 6821
    },
    {
      "epoch": 0.6285530013359746,
      "grad_norm": 0.9393058701052077,
      "learning_rate": 1.6603983418956254e-06,
      "loss": 0.1328,
      "step": 6822
    },
    {
      "epoch": 0.6286451375132446,
      "grad_norm": 0.9625490206776947,
      "learning_rate": 1.6596808432468445e-06,
      "loss": 0.1394,
      "step": 6823
    },
    {
      "epoch": 0.6287372736905146,
      "grad_norm": 0.934085002587979,
      "learning_rate": 1.6589634226217883e-06,
      "loss": 0.1328,
      "step": 6824
    },
    {
      "epoch": 0.6288294098677846,
      "grad_norm": 0.9148072326121705,
      "learning_rate": 1.6582460800870675e-06,
      "loss": 0.1284,
      "step": 6825
    },
    {
      "epoch": 0.6289215460450546,
      "grad_norm": 0.9431341023115458,
      "learning_rate": 1.6575288157092898e-06,
      "loss": 0.1236,
      "step": 6826
    },
    {
      "epoch": 0.6290136822223246,
      "grad_norm": 1.0046117659117793,
      "learning_rate": 1.6568116295550515e-06,
      "loss": 0.1313,
      "step": 6827
    },
    {
      "epoch": 0.6291058183995946,
      "grad_norm": 0.8757042152992928,
      "learning_rate": 1.6560945216909451e-06,
      "loss": 0.1144,
      "step": 6828
    },
    {
      "epoch": 0.6291979545768646,
      "grad_norm": 0.8499443288587102,
      "learning_rate": 1.6553774921835528e-06,
      "loss": 0.1107,
      "step": 6829
    },
    {
      "epoch": 0.6292900907541346,
      "grad_norm": 0.8959817137425333,
      "learning_rate": 1.6546605410994507e-06,
      "loss": 0.1255,
      "step": 6830
    },
    {
      "epoch": 0.6293822269314047,
      "grad_norm": 0.8993665748741935,
      "learning_rate": 1.6539436685052087e-06,
      "loss": 0.1263,
      "step": 6831
    },
    {
      "epoch": 0.6294743631086747,
      "grad_norm": 0.9006040587776355,
      "learning_rate": 1.6532268744673887e-06,
      "loss": 0.1154,
      "step": 6832
    },
    {
      "epoch": 0.6295664992859447,
      "grad_norm": 0.8915285615887515,
      "learning_rate": 1.6525101590525435e-06,
      "loss": 0.1241,
      "step": 6833
    },
    {
      "epoch": 0.6296586354632147,
      "grad_norm": 0.8626544961619003,
      "learning_rate": 1.651793522327222e-06,
      "loss": 0.1275,
      "step": 6834
    },
    {
      "epoch": 0.6297507716404847,
      "grad_norm": 0.9064252376215327,
      "learning_rate": 1.6510769643579625e-06,
      "loss": 0.133,
      "step": 6835
    },
    {
      "epoch": 0.6298429078177546,
      "grad_norm": 0.893570508838155,
      "learning_rate": 1.6503604852112992e-06,
      "loss": 0.1174,
      "step": 6836
    },
    {
      "epoch": 0.6299350439950246,
      "grad_norm": 0.9209261279569575,
      "learning_rate": 1.649644084953756e-06,
      "loss": 0.1335,
      "step": 6837
    },
    {
      "epoch": 0.6300271801722946,
      "grad_norm": 0.9612542183985056,
      "learning_rate": 1.6489277636518503e-06,
      "loss": 0.1364,
      "step": 6838
    },
    {
      "epoch": 0.6301193163495646,
      "grad_norm": 0.9030891613008248,
      "learning_rate": 1.6482115213720939e-06,
      "loss": 0.128,
      "step": 6839
    },
    {
      "epoch": 0.6302114525268346,
      "grad_norm": 0.892199661038204,
      "learning_rate": 1.64749535818099e-06,
      "loss": 0.1234,
      "step": 6840
    },
    {
      "epoch": 0.6303035887041046,
      "grad_norm": 0.892341611186558,
      "learning_rate": 1.6467792741450328e-06,
      "loss": 0.1262,
      "step": 6841
    },
    {
      "epoch": 0.6303957248813746,
      "grad_norm": 0.9303003796224806,
      "learning_rate": 1.6460632693307122e-06,
      "loss": 0.1332,
      "step": 6842
    },
    {
      "epoch": 0.6304878610586446,
      "grad_norm": 0.971311474946169,
      "learning_rate": 1.6453473438045088e-06,
      "loss": 0.1282,
      "step": 6843
    },
    {
      "epoch": 0.6305799972359147,
      "grad_norm": 0.9128566574816395,
      "learning_rate": 1.644631497632897e-06,
      "loss": 0.1268,
      "step": 6844
    },
    {
      "epoch": 0.6306721334131847,
      "grad_norm": 0.9329959880146634,
      "learning_rate": 1.6439157308823425e-06,
      "loss": 0.1281,
      "step": 6845
    },
    {
      "epoch": 0.6307642695904547,
      "grad_norm": 0.9643591086782031,
      "learning_rate": 1.6432000436193042e-06,
      "loss": 0.14,
      "step": 6846
    },
    {
      "epoch": 0.6308564057677247,
      "grad_norm": 0.8949048912604123,
      "learning_rate": 1.642484435910234e-06,
      "loss": 0.1275,
      "step": 6847
    },
    {
      "epoch": 0.6309485419449947,
      "grad_norm": 0.9294314454625718,
      "learning_rate": 1.6417689078215771e-06,
      "loss": 0.1317,
      "step": 6848
    },
    {
      "epoch": 0.6310406781222647,
      "grad_norm": 0.9092264943448212,
      "learning_rate": 1.6410534594197687e-06,
      "loss": 0.1243,
      "step": 6849
    },
    {
      "epoch": 0.6311328142995347,
      "grad_norm": 0.8915342261422562,
      "learning_rate": 1.640338090771239e-06,
      "loss": 0.126,
      "step": 6850
    },
    {
      "epoch": 0.6312249504768047,
      "grad_norm": 0.9631374154156046,
      "learning_rate": 1.6396228019424099e-06,
      "loss": 0.1193,
      "step": 6851
    },
    {
      "epoch": 0.6313170866540747,
      "grad_norm": 1.028510708487749,
      "learning_rate": 1.6389075929996961e-06,
      "loss": 0.1425,
      "step": 6852
    },
    {
      "epoch": 0.6314092228313447,
      "grad_norm": 0.9457079527488705,
      "learning_rate": 1.6381924640095065e-06,
      "loss": 0.1232,
      "step": 6853
    },
    {
      "epoch": 0.6315013590086147,
      "grad_norm": 0.8672064169139518,
      "learning_rate": 1.6374774150382377e-06,
      "loss": 0.1191,
      "step": 6854
    },
    {
      "epoch": 0.6315934951858847,
      "grad_norm": 0.9792257903977444,
      "learning_rate": 1.6367624461522841e-06,
      "loss": 0.1303,
      "step": 6855
    },
    {
      "epoch": 0.6316856313631547,
      "grad_norm": 0.9661371788971164,
      "learning_rate": 1.6360475574180306e-06,
      "loss": 0.136,
      "step": 6856
    },
    {
      "epoch": 0.6317777675404247,
      "grad_norm": 0.9543594834857416,
      "learning_rate": 1.635332748901855e-06,
      "loss": 0.1294,
      "step": 6857
    },
    {
      "epoch": 0.6318699037176948,
      "grad_norm": 0.8929324029831897,
      "learning_rate": 1.6346180206701256e-06,
      "loss": 0.1237,
      "step": 6858
    },
    {
      "epoch": 0.6319620398949648,
      "grad_norm": 0.9010697000995774,
      "learning_rate": 1.6339033727892067e-06,
      "loss": 0.1271,
      "step": 6859
    },
    {
      "epoch": 0.6320541760722348,
      "grad_norm": 0.8981085366716448,
      "learning_rate": 1.6331888053254521e-06,
      "loss": 0.1287,
      "step": 6860
    },
    {
      "epoch": 0.6321463122495048,
      "grad_norm": 0.9238622764409322,
      "learning_rate": 1.6324743183452113e-06,
      "loss": 0.1316,
      "step": 6861
    },
    {
      "epoch": 0.6322384484267748,
      "grad_norm": 0.9109979518487612,
      "learning_rate": 1.631759911914823e-06,
      "loss": 0.1232,
      "step": 6862
    },
    {
      "epoch": 0.6323305846040448,
      "grad_norm": 0.9189914997104379,
      "learning_rate": 1.63104558610062e-06,
      "loss": 0.1279,
      "step": 6863
    },
    {
      "epoch": 0.6324227207813148,
      "grad_norm": 0.9492831449731975,
      "learning_rate": 1.630331340968928e-06,
      "loss": 0.139,
      "step": 6864
    },
    {
      "epoch": 0.6325148569585848,
      "grad_norm": 0.9602943007686003,
      "learning_rate": 1.6296171765860651e-06,
      "loss": 0.1392,
      "step": 6865
    },
    {
      "epoch": 0.6326069931358548,
      "grad_norm": 0.96860825635109,
      "learning_rate": 1.6289030930183403e-06,
      "loss": 0.1261,
      "step": 6866
    },
    {
      "epoch": 0.6326991293131248,
      "grad_norm": 0.9355225957084158,
      "learning_rate": 1.6281890903320574e-06,
      "loss": 0.1349,
      "step": 6867
    },
    {
      "epoch": 0.6327912654903948,
      "grad_norm": 0.8994626792406648,
      "learning_rate": 1.627475168593511e-06,
      "loss": 0.1232,
      "step": 6868
    },
    {
      "epoch": 0.6328834016676648,
      "grad_norm": 0.8585321066061194,
      "learning_rate": 1.6267613278689898e-06,
      "loss": 0.1172,
      "step": 6869
    },
    {
      "epoch": 0.6329755378449348,
      "grad_norm": 1.0178195384314648,
      "learning_rate": 1.626047568224773e-06,
      "loss": 0.1346,
      "step": 6870
    },
    {
      "epoch": 0.6330676740222049,
      "grad_norm": 0.9180606266375119,
      "learning_rate": 1.625333889727133e-06,
      "loss": 0.1305,
      "step": 6871
    },
    {
      "epoch": 0.6331598101994749,
      "grad_norm": 0.86008781184332,
      "learning_rate": 1.624620292442336e-06,
      "loss": 0.1128,
      "step": 6872
    },
    {
      "epoch": 0.6332519463767449,
      "grad_norm": 0.8953508403267575,
      "learning_rate": 1.6239067764366396e-06,
      "loss": 0.1295,
      "step": 6873
    },
    {
      "epoch": 0.6333440825540149,
      "grad_norm": 0.9701241179780662,
      "learning_rate": 1.6231933417762918e-06,
      "loss": 0.132,
      "step": 6874
    },
    {
      "epoch": 0.6334362187312849,
      "grad_norm": 0.8964266930393143,
      "learning_rate": 1.6224799885275378e-06,
      "loss": 0.131,
      "step": 6875
    },
    {
      "epoch": 0.6335283549085549,
      "grad_norm": 0.8920990399552023,
      "learning_rate": 1.6217667167566103e-06,
      "loss": 0.1214,
      "step": 6876
    },
    {
      "epoch": 0.6336204910858249,
      "grad_norm": 0.9034818516436899,
      "learning_rate": 1.6210535265297389e-06,
      "loss": 0.1195,
      "step": 6877
    },
    {
      "epoch": 0.6337126272630949,
      "grad_norm": 0.9099483703963683,
      "learning_rate": 1.6203404179131415e-06,
      "loss": 0.1269,
      "step": 6878
    },
    {
      "epoch": 0.6338047634403648,
      "grad_norm": 0.919892064573786,
      "learning_rate": 1.6196273909730303e-06,
      "loss": 0.1291,
      "step": 6879
    },
    {
      "epoch": 0.6338968996176348,
      "grad_norm": 0.9388586022519625,
      "learning_rate": 1.6189144457756118e-06,
      "loss": 0.1282,
      "step": 6880
    },
    {
      "epoch": 0.6339890357949048,
      "grad_norm": 0.9493861638877241,
      "learning_rate": 1.6182015823870805e-06,
      "loss": 0.129,
      "step": 6881
    },
    {
      "epoch": 0.6340811719721748,
      "grad_norm": 0.9149259117449804,
      "learning_rate": 1.617488800873629e-06,
      "loss": 0.1364,
      "step": 6882
    },
    {
      "epoch": 0.6341733081494448,
      "grad_norm": 0.9344518751317833,
      "learning_rate": 1.616776101301436e-06,
      "loss": 0.1343,
      "step": 6883
    },
    {
      "epoch": 0.6342654443267148,
      "grad_norm": 0.8832340238190585,
      "learning_rate": 1.6160634837366771e-06,
      "loss": 0.1162,
      "step": 6884
    },
    {
      "epoch": 0.6343575805039849,
      "grad_norm": 0.8869408664575485,
      "learning_rate": 1.615350948245519e-06,
      "loss": 0.1304,
      "step": 6885
    },
    {
      "epoch": 0.6344497166812549,
      "grad_norm": 0.9268014816586394,
      "learning_rate": 1.6146384948941213e-06,
      "loss": 0.1257,
      "step": 6886
    },
    {
      "epoch": 0.6345418528585249,
      "grad_norm": 0.9930422635091828,
      "learning_rate": 1.6139261237486337e-06,
      "loss": 0.1316,
      "step": 6887
    },
    {
      "epoch": 0.6346339890357949,
      "grad_norm": 0.9828861548408764,
      "learning_rate": 1.6132138348752013e-06,
      "loss": 0.1396,
      "step": 6888
    },
    {
      "epoch": 0.6347261252130649,
      "grad_norm": 0.9392508922508789,
      "learning_rate": 1.6125016283399592e-06,
      "loss": 0.1238,
      "step": 6889
    },
    {
      "epoch": 0.6348182613903349,
      "grad_norm": 0.9367414950444616,
      "learning_rate": 1.6117895042090374e-06,
      "loss": 0.1369,
      "step": 6890
    },
    {
      "epoch": 0.6349103975676049,
      "grad_norm": 0.8913932424175949,
      "learning_rate": 1.6110774625485554e-06,
      "loss": 0.1277,
      "step": 6891
    },
    {
      "epoch": 0.6350025337448749,
      "grad_norm": 0.9487507579836935,
      "learning_rate": 1.6103655034246256e-06,
      "loss": 0.1293,
      "step": 6892
    },
    {
      "epoch": 0.6350946699221449,
      "grad_norm": 0.9078867583893813,
      "learning_rate": 1.6096536269033557e-06,
      "loss": 0.1253,
      "step": 6893
    },
    {
      "epoch": 0.6351868060994149,
      "grad_norm": 0.9657934294617364,
      "learning_rate": 1.6089418330508427e-06,
      "loss": 0.1303,
      "step": 6894
    },
    {
      "epoch": 0.6352789422766849,
      "grad_norm": 0.9369656368147398,
      "learning_rate": 1.6082301219331754e-06,
      "loss": 0.1361,
      "step": 6895
    },
    {
      "epoch": 0.6353710784539549,
      "grad_norm": 0.9221568322665612,
      "learning_rate": 1.6075184936164377e-06,
      "loss": 0.1186,
      "step": 6896
    },
    {
      "epoch": 0.6354632146312249,
      "grad_norm": 0.9646737106447447,
      "learning_rate": 1.606806948166703e-06,
      "loss": 0.1283,
      "step": 6897
    },
    {
      "epoch": 0.6355553508084949,
      "grad_norm": 0.985452183450984,
      "learning_rate": 1.606095485650041e-06,
      "loss": 0.1385,
      "step": 6898
    },
    {
      "epoch": 0.635647486985765,
      "grad_norm": 0.9455020939527603,
      "learning_rate": 1.6053841061325086e-06,
      "loss": 0.1298,
      "step": 6899
    },
    {
      "epoch": 0.635739623163035,
      "grad_norm": 0.9890402174988893,
      "learning_rate": 1.6046728096801575e-06,
      "loss": 0.1357,
      "step": 6900
    },
    {
      "epoch": 0.635831759340305,
      "grad_norm": 0.9609982352039083,
      "learning_rate": 1.6039615963590332e-06,
      "loss": 0.126,
      "step": 6901
    },
    {
      "epoch": 0.635923895517575,
      "grad_norm": 0.956556821723389,
      "learning_rate": 1.6032504662351713e-06,
      "loss": 0.1325,
      "step": 6902
    },
    {
      "epoch": 0.636016031694845,
      "grad_norm": 0.9432174119765748,
      "learning_rate": 1.6025394193745993e-06,
      "loss": 0.1276,
      "step": 6903
    },
    {
      "epoch": 0.636108167872115,
      "grad_norm": 0.9059817128700657,
      "learning_rate": 1.6018284558433395e-06,
      "loss": 0.1302,
      "step": 6904
    },
    {
      "epoch": 0.636200304049385,
      "grad_norm": 0.9010824136241979,
      "learning_rate": 1.6011175757074035e-06,
      "loss": 0.1273,
      "step": 6905
    },
    {
      "epoch": 0.636292440226655,
      "grad_norm": 0.9038667529472869,
      "learning_rate": 1.6004067790327983e-06,
      "loss": 0.1332,
      "step": 6906
    },
    {
      "epoch": 0.636384576403925,
      "grad_norm": 0.9335549013514323,
      "learning_rate": 1.5996960658855201e-06,
      "loss": 0.1453,
      "step": 6907
    },
    {
      "epoch": 0.636476712581195,
      "grad_norm": 0.9466004752685642,
      "learning_rate": 1.5989854363315585e-06,
      "loss": 0.136,
      "step": 6908
    },
    {
      "epoch": 0.636568848758465,
      "grad_norm": 0.9078843220602426,
      "learning_rate": 1.5982748904368966e-06,
      "loss": 0.132,
      "step": 6909
    },
    {
      "epoch": 0.636660984935735,
      "grad_norm": 0.9085035676803845,
      "learning_rate": 1.5975644282675077e-06,
      "loss": 0.1229,
      "step": 6910
    },
    {
      "epoch": 0.636753121113005,
      "grad_norm": 0.9707588244468391,
      "learning_rate": 1.5968540498893598e-06,
      "loss": 0.1399,
      "step": 6911
    },
    {
      "epoch": 0.6368452572902751,
      "grad_norm": 0.903772261250293,
      "learning_rate": 1.59614375536841e-06,
      "loss": 0.1325,
      "step": 6912
    },
    {
      "epoch": 0.6369373934675451,
      "grad_norm": 0.877488888929862,
      "learning_rate": 1.5954335447706093e-06,
      "loss": 0.1202,
      "step": 6913
    },
    {
      "epoch": 0.6370295296448151,
      "grad_norm": 0.9048261590261063,
      "learning_rate": 1.5947234181619017e-06,
      "loss": 0.126,
      "step": 6914
    },
    {
      "epoch": 0.6371216658220851,
      "grad_norm": 0.8992318434307321,
      "learning_rate": 1.5940133756082226e-06,
      "loss": 0.1235,
      "step": 6915
    },
    {
      "epoch": 0.6372138019993551,
      "grad_norm": 0.8816222891695241,
      "learning_rate": 1.5933034171754985e-06,
      "loss": 0.1233,
      "step": 6916
    },
    {
      "epoch": 0.6373059381766251,
      "grad_norm": 0.9163390848974939,
      "learning_rate": 1.5925935429296499e-06,
      "loss": 0.1227,
      "step": 6917
    },
    {
      "epoch": 0.6373980743538951,
      "grad_norm": 0.9121612444035625,
      "learning_rate": 1.5918837529365884e-06,
      "loss": 0.1197,
      "step": 6918
    },
    {
      "epoch": 0.6374902105311651,
      "grad_norm": 0.9739517316471556,
      "learning_rate": 1.5911740472622184e-06,
      "loss": 0.1325,
      "step": 6919
    },
    {
      "epoch": 0.637582346708435,
      "grad_norm": 0.9782508014301649,
      "learning_rate": 1.590464425972436e-06,
      "loss": 0.1369,
      "step": 6920
    },
    {
      "epoch": 0.637674482885705,
      "grad_norm": 0.9666734281729935,
      "learning_rate": 1.5897548891331288e-06,
      "loss": 0.1424,
      "step": 6921
    },
    {
      "epoch": 0.637766619062975,
      "grad_norm": 0.928424842338119,
      "learning_rate": 1.5890454368101788e-06,
      "loss": 0.1319,
      "step": 6922
    },
    {
      "epoch": 0.637858755240245,
      "grad_norm": 0.9400505264698045,
      "learning_rate": 1.5883360690694582e-06,
      "loss": 0.1248,
      "step": 6923
    },
    {
      "epoch": 0.637950891417515,
      "grad_norm": 0.8670288793128172,
      "learning_rate": 1.587626785976831e-06,
      "loss": 0.1156,
      "step": 6924
    },
    {
      "epoch": 0.638043027594785,
      "grad_norm": 0.8702635503997724,
      "learning_rate": 1.5869175875981551e-06,
      "loss": 0.1228,
      "step": 6925
    },
    {
      "epoch": 0.6381351637720551,
      "grad_norm": 0.9992073872034697,
      "learning_rate": 1.5862084739992794e-06,
      "loss": 0.1425,
      "step": 6926
    },
    {
      "epoch": 0.6382272999493251,
      "grad_norm": 0.9676724696571829,
      "learning_rate": 1.585499445246046e-06,
      "loss": 0.1381,
      "step": 6927
    },
    {
      "epoch": 0.6383194361265951,
      "grad_norm": 0.9479667525779824,
      "learning_rate": 1.584790501404287e-06,
      "loss": 0.1414,
      "step": 6928
    },
    {
      "epoch": 0.6384115723038651,
      "grad_norm": 0.9202645800521777,
      "learning_rate": 1.5840816425398282e-06,
      "loss": 0.1329,
      "step": 6929
    },
    {
      "epoch": 0.6385037084811351,
      "grad_norm": 0.9281144590684807,
      "learning_rate": 1.5833728687184868e-06,
      "loss": 0.1288,
      "step": 6930
    },
    {
      "epoch": 0.6385958446584051,
      "grad_norm": 0.9212398432492384,
      "learning_rate": 1.5826641800060755e-06,
      "loss": 0.1235,
      "step": 6931
    },
    {
      "epoch": 0.6386879808356751,
      "grad_norm": 0.907492438307411,
      "learning_rate": 1.581955576468392e-06,
      "loss": 0.1297,
      "step": 6932
    },
    {
      "epoch": 0.6387801170129451,
      "grad_norm": 0.9343330686647542,
      "learning_rate": 1.581247058171232e-06,
      "loss": 0.1308,
      "step": 6933
    },
    {
      "epoch": 0.6388722531902151,
      "grad_norm": 0.8988949949443719,
      "learning_rate": 1.5805386251803818e-06,
      "loss": 0.1183,
      "step": 6934
    },
    {
      "epoch": 0.6389643893674851,
      "grad_norm": 0.9210865319052437,
      "learning_rate": 1.5798302775616198e-06,
      "loss": 0.1257,
      "step": 6935
    },
    {
      "epoch": 0.6390565255447551,
      "grad_norm": 0.8669787078566003,
      "learning_rate": 1.5791220153807146e-06,
      "loss": 0.1099,
      "step": 6936
    },
    {
      "epoch": 0.6391486617220251,
      "grad_norm": 0.9472952498117674,
      "learning_rate": 1.5784138387034302e-06,
      "loss": 0.1283,
      "step": 6937
    },
    {
      "epoch": 0.6392407978992951,
      "grad_norm": 0.9301216379397955,
      "learning_rate": 1.5777057475955194e-06,
      "loss": 0.1332,
      "step": 6938
    },
    {
      "epoch": 0.6393329340765652,
      "grad_norm": 0.9069566491323627,
      "learning_rate": 1.5769977421227295e-06,
      "loss": 0.1201,
      "step": 6939
    },
    {
      "epoch": 0.6394250702538352,
      "grad_norm": 0.9762702867137394,
      "learning_rate": 1.5762898223507989e-06,
      "loss": 0.1368,
      "step": 6940
    },
    {
      "epoch": 0.6395172064311052,
      "grad_norm": 0.9753252251298636,
      "learning_rate": 1.575581988345457e-06,
      "loss": 0.1377,
      "step": 6941
    },
    {
      "epoch": 0.6396093426083752,
      "grad_norm": 0.9245488069439479,
      "learning_rate": 1.5748742401724276e-06,
      "loss": 0.1277,
      "step": 6942
    },
    {
      "epoch": 0.6397014787856452,
      "grad_norm": 0.9222913829706925,
      "learning_rate": 1.5741665778974239e-06,
      "loss": 0.1288,
      "step": 6943
    },
    {
      "epoch": 0.6397936149629152,
      "grad_norm": 0.9101223878977888,
      "learning_rate": 1.5734590015861539e-06,
      "loss": 0.1309,
      "step": 6944
    },
    {
      "epoch": 0.6398857511401852,
      "grad_norm": 0.960200928451764,
      "learning_rate": 1.5727515113043152e-06,
      "loss": 0.1398,
      "step": 6945
    },
    {
      "epoch": 0.6399778873174552,
      "grad_norm": 0.8788028856828145,
      "learning_rate": 1.5720441071175976e-06,
      "loss": 0.1145,
      "step": 6946
    },
    {
      "epoch": 0.6400700234947252,
      "grad_norm": 0.8708461112250071,
      "learning_rate": 1.5713367890916852e-06,
      "loss": 0.1225,
      "step": 6947
    },
    {
      "epoch": 0.6401621596719952,
      "grad_norm": 0.8772041016771617,
      "learning_rate": 1.5706295572922524e-06,
      "loss": 0.1173,
      "step": 6948
    },
    {
      "epoch": 0.6402542958492652,
      "grad_norm": 0.897848427339447,
      "learning_rate": 1.5699224117849644e-06,
      "loss": 0.1382,
      "step": 6949
    },
    {
      "epoch": 0.6403464320265352,
      "grad_norm": 0.8624040978043598,
      "learning_rate": 1.569215352635481e-06,
      "loss": 0.1172,
      "step": 6950
    },
    {
      "epoch": 0.6404385682038052,
      "grad_norm": 0.9391168244715019,
      "learning_rate": 1.5685083799094513e-06,
      "loss": 0.1246,
      "step": 6951
    },
    {
      "epoch": 0.6405307043810752,
      "grad_norm": 0.9483088170825726,
      "learning_rate": 1.56780149367252e-06,
      "loss": 0.1306,
      "step": 6952
    },
    {
      "epoch": 0.6406228405583453,
      "grad_norm": 0.9378810721645574,
      "learning_rate": 1.5670946939903201e-06,
      "loss": 0.1317,
      "step": 6953
    },
    {
      "epoch": 0.6407149767356153,
      "grad_norm": 0.8918097080165212,
      "learning_rate": 1.5663879809284777e-06,
      "loss": 0.1223,
      "step": 6954
    },
    {
      "epoch": 0.6408071129128853,
      "grad_norm": 0.9183976546797075,
      "learning_rate": 1.565681354552612e-06,
      "loss": 0.1322,
      "step": 6955
    },
    {
      "epoch": 0.6408992490901553,
      "grad_norm": 0.8927835519177011,
      "learning_rate": 1.5649748149283339e-06,
      "loss": 0.1156,
      "step": 6956
    },
    {
      "epoch": 0.6409913852674253,
      "grad_norm": 0.9527061176749169,
      "learning_rate": 1.5642683621212435e-06,
      "loss": 0.1264,
      "step": 6957
    },
    {
      "epoch": 0.6410835214446953,
      "grad_norm": 0.9276007981920011,
      "learning_rate": 1.5635619961969372e-06,
      "loss": 0.1224,
      "step": 6958
    },
    {
      "epoch": 0.6411756576219653,
      "grad_norm": 0.9341219872288229,
      "learning_rate": 1.5628557172209997e-06,
      "loss": 0.1242,
      "step": 6959
    },
    {
      "epoch": 0.6412677937992353,
      "grad_norm": 0.9087165767479295,
      "learning_rate": 1.5621495252590108e-06,
      "loss": 0.1232,
      "step": 6960
    },
    {
      "epoch": 0.6413599299765053,
      "grad_norm": 0.9845108733921047,
      "learning_rate": 1.561443420376539e-06,
      "loss": 0.1318,
      "step": 6961
    },
    {
      "epoch": 0.6414520661537753,
      "grad_norm": 0.8579120613839665,
      "learning_rate": 1.560737402639146e-06,
      "loss": 0.1142,
      "step": 6962
    },
    {
      "epoch": 0.6415442023310453,
      "grad_norm": 0.9145911360522224,
      "learning_rate": 1.5600314721123866e-06,
      "loss": 0.1305,
      "step": 6963
    },
    {
      "epoch": 0.6416363385083153,
      "grad_norm": 1.0023486798357177,
      "learning_rate": 1.5593256288618067e-06,
      "loss": 0.1388,
      "step": 6964
    },
    {
      "epoch": 0.6417284746855852,
      "grad_norm": 0.897665746625067,
      "learning_rate": 1.5586198729529422e-06,
      "loss": 0.1224,
      "step": 6965
    },
    {
      "epoch": 0.6418206108628552,
      "grad_norm": 0.9742657750149276,
      "learning_rate": 1.5579142044513248e-06,
      "loss": 0.1296,
      "step": 6966
    },
    {
      "epoch": 0.6419127470401254,
      "grad_norm": 0.9864819104891229,
      "learning_rate": 1.5572086234224743e-06,
      "loss": 0.127,
      "step": 6967
    },
    {
      "epoch": 0.6420048832173954,
      "grad_norm": 0.9440948450795339,
      "learning_rate": 1.556503129931905e-06,
      "loss": 0.1329,
      "step": 6968
    },
    {
      "epoch": 0.6420970193946653,
      "grad_norm": 0.8435423134543755,
      "learning_rate": 1.5557977240451223e-06,
      "loss": 0.1153,
      "step": 6969
    },
    {
      "epoch": 0.6421891555719353,
      "grad_norm": 0.9570481012038368,
      "learning_rate": 1.5550924058276213e-06,
      "loss": 0.127,
      "step": 6970
    },
    {
      "epoch": 0.6422812917492053,
      "grad_norm": 0.9603904086633094,
      "learning_rate": 1.5543871753448924e-06,
      "loss": 0.1355,
      "step": 6971
    },
    {
      "epoch": 0.6423734279264753,
      "grad_norm": 0.8370803839751185,
      "learning_rate": 1.5536820326624159e-06,
      "loss": 0.1134,
      "step": 6972
    },
    {
      "epoch": 0.6424655641037453,
      "grad_norm": 0.878586123110724,
      "learning_rate": 1.5529769778456654e-06,
      "loss": 0.1255,
      "step": 6973
    },
    {
      "epoch": 0.6425577002810153,
      "grad_norm": 0.934161796637777,
      "learning_rate": 1.5522720109601039e-06,
      "loss": 0.1338,
      "step": 6974
    },
    {
      "epoch": 0.6426498364582853,
      "grad_norm": 0.9010882629753968,
      "learning_rate": 1.5515671320711877e-06,
      "loss": 0.1239,
      "step": 6975
    },
    {
      "epoch": 0.6427419726355553,
      "grad_norm": 0.8943824566421207,
      "learning_rate": 1.5508623412443657e-06,
      "loss": 0.1302,
      "step": 6976
    },
    {
      "epoch": 0.6428341088128253,
      "grad_norm": 0.9430422757119458,
      "learning_rate": 1.5501576385450785e-06,
      "loss": 0.138,
      "step": 6977
    },
    {
      "epoch": 0.6429262449900953,
      "grad_norm": 0.8608802032031488,
      "learning_rate": 1.5494530240387552e-06,
      "loss": 0.1157,
      "step": 6978
    },
    {
      "epoch": 0.6430183811673653,
      "grad_norm": 0.8931155875083723,
      "learning_rate": 1.5487484977908219e-06,
      "loss": 0.1209,
      "step": 6979
    },
    {
      "epoch": 0.6431105173446354,
      "grad_norm": 0.934058212786175,
      "learning_rate": 1.5480440598666918e-06,
      "loss": 0.1286,
      "step": 6980
    },
    {
      "epoch": 0.6432026535219054,
      "grad_norm": 0.9499195487628329,
      "learning_rate": 1.5473397103317748e-06,
      "loss": 0.1289,
      "step": 6981
    },
    {
      "epoch": 0.6432947896991754,
      "grad_norm": 0.9172407650858186,
      "learning_rate": 1.5466354492514675e-06,
      "loss": 0.1234,
      "step": 6982
    },
    {
      "epoch": 0.6433869258764454,
      "grad_norm": 0.9040807218912611,
      "learning_rate": 1.5459312766911607e-06,
      "loss": 0.1147,
      "step": 6983
    },
    {
      "epoch": 0.6434790620537154,
      "grad_norm": 0.9821552436596821,
      "learning_rate": 1.5452271927162381e-06,
      "loss": 0.1332,
      "step": 6984
    },
    {
      "epoch": 0.6435711982309854,
      "grad_norm": 0.9677205513939832,
      "learning_rate": 1.5445231973920744e-06,
      "loss": 0.1383,
      "step": 6985
    },
    {
      "epoch": 0.6436633344082554,
      "grad_norm": 0.9278700960504868,
      "learning_rate": 1.543819290784033e-06,
      "loss": 0.1203,
      "step": 6986
    },
    {
      "epoch": 0.6437554705855254,
      "grad_norm": 1.0164771249246947,
      "learning_rate": 1.5431154729574743e-06,
      "loss": 0.1382,
      "step": 6987
    },
    {
      "epoch": 0.6438476067627954,
      "grad_norm": 0.9576436472546979,
      "learning_rate": 1.5424117439777458e-06,
      "loss": 0.1241,
      "step": 6988
    },
    {
      "epoch": 0.6439397429400654,
      "grad_norm": 0.8990740819754796,
      "learning_rate": 1.5417081039101916e-06,
      "loss": 0.1356,
      "step": 6989
    },
    {
      "epoch": 0.6440318791173354,
      "grad_norm": 0.9008021527591362,
      "learning_rate": 1.5410045528201423e-06,
      "loss": 0.1152,
      "step": 6990
    },
    {
      "epoch": 0.6441240152946054,
      "grad_norm": 0.8895201725561562,
      "learning_rate": 1.5403010907729233e-06,
      "loss": 0.1225,
      "step": 6991
    },
    {
      "epoch": 0.6442161514718754,
      "grad_norm": 0.9060414956049287,
      "learning_rate": 1.5395977178338511e-06,
      "loss": 0.1231,
      "step": 6992
    },
    {
      "epoch": 0.6443082876491454,
      "grad_norm": 0.9858058843844217,
      "learning_rate": 1.5388944340682352e-06,
      "loss": 0.1293,
      "step": 6993
    },
    {
      "epoch": 0.6444004238264155,
      "grad_norm": 0.9830503873904406,
      "learning_rate": 1.5381912395413733e-06,
      "loss": 0.1328,
      "step": 6994
    },
    {
      "epoch": 0.6444925600036855,
      "grad_norm": 0.8953062308564167,
      "learning_rate": 1.5374881343185592e-06,
      "loss": 0.1175,
      "step": 6995
    },
    {
      "epoch": 0.6445846961809555,
      "grad_norm": 0.932027313269444,
      "learning_rate": 1.5367851184650745e-06,
      "loss": 0.1366,
      "step": 6996
    },
    {
      "epoch": 0.6446768323582255,
      "grad_norm": 0.9017210709791099,
      "learning_rate": 1.536082192046196e-06,
      "loss": 0.1241,
      "step": 6997
    },
    {
      "epoch": 0.6447689685354955,
      "grad_norm": 0.8736522849637017,
      "learning_rate": 1.53537935512719e-06,
      "loss": 0.124,
      "step": 6998
    },
    {
      "epoch": 0.6448611047127655,
      "grad_norm": 0.9243176302835587,
      "learning_rate": 1.5346766077733138e-06,
      "loss": 0.1266,
      "step": 6999
    },
    {
      "epoch": 0.6449532408900355,
      "grad_norm": 0.9500637805596197,
      "learning_rate": 1.5339739500498189e-06,
      "loss": 0.137,
      "step": 7000
    },
    {
      "epoch": 0.6449532408900355,
      "eval_loss": 0.1284143477678299,
      "eval_runtime": 299.5444,
      "eval_samples_per_second": 23.426,
      "eval_steps_per_second": 2.931,
      "step": 7000
    },
    {
      "epoch": 0.6450453770673055,
      "grad_norm": 0.9176312502247079,
      "learning_rate": 1.5332713820219461e-06,
      "loss": 0.124,
      "step": 7001
    },
    {
      "epoch": 0.6451375132445755,
      "grad_norm": 0.9517224833402965,
      "learning_rate": 1.5325689037549307e-06,
      "loss": 0.1278,
      "step": 7002
    },
    {
      "epoch": 0.6452296494218455,
      "grad_norm": 0.9012201702409783,
      "learning_rate": 1.531866515313996e-06,
      "loss": 0.1294,
      "step": 7003
    },
    {
      "epoch": 0.6453217855991155,
      "grad_norm": 0.870380747830478,
      "learning_rate": 1.5311642167643592e-06,
      "loss": 0.1127,
      "step": 7004
    },
    {
      "epoch": 0.6454139217763855,
      "grad_norm": 0.9019711406867783,
      "learning_rate": 1.530462008171229e-06,
      "loss": 0.1284,
      "step": 7005
    },
    {
      "epoch": 0.6455060579536555,
      "grad_norm": 0.9153502803076784,
      "learning_rate": 1.5297598895998076e-06,
      "loss": 0.128,
      "step": 7006
    },
    {
      "epoch": 0.6455981941309256,
      "grad_norm": 0.9397863736754628,
      "learning_rate": 1.529057861115283e-06,
      "loss": 0.1225,
      "step": 7007
    },
    {
      "epoch": 0.6456903303081956,
      "grad_norm": 0.9821388898224823,
      "learning_rate": 1.5283559227828404e-06,
      "loss": 0.1292,
      "step": 7008
    },
    {
      "epoch": 0.6457824664854656,
      "grad_norm": 0.9461970827854304,
      "learning_rate": 1.5276540746676558e-06,
      "loss": 0.1184,
      "step": 7009
    },
    {
      "epoch": 0.6458746026627356,
      "grad_norm": 0.9689676881269678,
      "learning_rate": 1.5269523168348954e-06,
      "loss": 0.1319,
      "step": 7010
    },
    {
      "epoch": 0.6459667388400055,
      "grad_norm": 0.9410910844995799,
      "learning_rate": 1.5262506493497159e-06,
      "loss": 0.1259,
      "step": 7011
    },
    {
      "epoch": 0.6460588750172755,
      "grad_norm": 0.983799739077306,
      "learning_rate": 1.525549072277269e-06,
      "loss": 0.1288,
      "step": 7012
    },
    {
      "epoch": 0.6461510111945455,
      "grad_norm": 0.8946262414629372,
      "learning_rate": 1.524847585682695e-06,
      "loss": 0.1265,
      "step": 7013
    },
    {
      "epoch": 0.6462431473718155,
      "grad_norm": 0.925928189408186,
      "learning_rate": 1.5241461896311288e-06,
      "loss": 0.1333,
      "step": 7014
    },
    {
      "epoch": 0.6463352835490855,
      "grad_norm": 0.9306429155305737,
      "learning_rate": 1.5234448841876935e-06,
      "loss": 0.1275,
      "step": 7015
    },
    {
      "epoch": 0.6464274197263555,
      "grad_norm": 0.9580787768584496,
      "learning_rate": 1.5227436694175052e-06,
      "loss": 0.1344,
      "step": 7016
    },
    {
      "epoch": 0.6465195559036255,
      "grad_norm": 0.917554371682307,
      "learning_rate": 1.5220425453856728e-06,
      "loss": 0.1173,
      "step": 7017
    },
    {
      "epoch": 0.6466116920808955,
      "grad_norm": 0.9025472883792388,
      "learning_rate": 1.5213415121572959e-06,
      "loss": 0.1194,
      "step": 7018
    },
    {
      "epoch": 0.6467038282581655,
      "grad_norm": 0.9103463103642951,
      "learning_rate": 1.5206405697974635e-06,
      "loss": 0.1353,
      "step": 7019
    },
    {
      "epoch": 0.6467959644354355,
      "grad_norm": 0.9179849344012468,
      "learning_rate": 1.5199397183712606e-06,
      "loss": 0.1301,
      "step": 7020
    },
    {
      "epoch": 0.6468881006127056,
      "grad_norm": 0.9046015817619245,
      "learning_rate": 1.5192389579437596e-06,
      "loss": 0.1208,
      "step": 7021
    },
    {
      "epoch": 0.6469802367899756,
      "grad_norm": 0.9338287299267279,
      "learning_rate": 1.5185382885800282e-06,
      "loss": 0.13,
      "step": 7022
    },
    {
      "epoch": 0.6470723729672456,
      "grad_norm": 0.8913972315365095,
      "learning_rate": 1.5178377103451213e-06,
      "loss": 0.1284,
      "step": 7023
    },
    {
      "epoch": 0.6471645091445156,
      "grad_norm": 0.9767424424928457,
      "learning_rate": 1.5171372233040887e-06,
      "loss": 0.1427,
      "step": 7024
    },
    {
      "epoch": 0.6472566453217856,
      "grad_norm": 0.9782173682999218,
      "learning_rate": 1.516436827521971e-06,
      "loss": 0.1308,
      "step": 7025
    },
    {
      "epoch": 0.6473487814990556,
      "grad_norm": 0.8844971219928716,
      "learning_rate": 1.5157365230637993e-06,
      "loss": 0.1233,
      "step": 7026
    },
    {
      "epoch": 0.6474409176763256,
      "grad_norm": 0.9548655466581969,
      "learning_rate": 1.5150363099945984e-06,
      "loss": 0.133,
      "step": 7027
    },
    {
      "epoch": 0.6475330538535956,
      "grad_norm": 0.9232492722707007,
      "learning_rate": 1.5143361883793814e-06,
      "loss": 0.1379,
      "step": 7028
    },
    {
      "epoch": 0.6476251900308656,
      "grad_norm": 0.9127807484244733,
      "learning_rate": 1.513636158283155e-06,
      "loss": 0.1252,
      "step": 7029
    },
    {
      "epoch": 0.6477173262081356,
      "grad_norm": 0.9288643059106027,
      "learning_rate": 1.512936219770918e-06,
      "loss": 0.1258,
      "step": 7030
    },
    {
      "epoch": 0.6478094623854056,
      "grad_norm": 0.9159865631279025,
      "learning_rate": 1.5122363729076595e-06,
      "loss": 0.1152,
      "step": 7031
    },
    {
      "epoch": 0.6479015985626756,
      "grad_norm": 0.9582038384688031,
      "learning_rate": 1.5115366177583596e-06,
      "loss": 0.1245,
      "step": 7032
    },
    {
      "epoch": 0.6479937347399456,
      "grad_norm": 0.9135516336834137,
      "learning_rate": 1.510836954387991e-06,
      "loss": 0.1258,
      "step": 7033
    },
    {
      "epoch": 0.6480858709172156,
      "grad_norm": 0.9392462355403146,
      "learning_rate": 1.5101373828615172e-06,
      "loss": 0.1286,
      "step": 7034
    },
    {
      "epoch": 0.6481780070944857,
      "grad_norm": 0.8760360479867124,
      "learning_rate": 1.5094379032438956e-06,
      "loss": 0.1309,
      "step": 7035
    },
    {
      "epoch": 0.6482701432717557,
      "grad_norm": 0.8521056439065474,
      "learning_rate": 1.50873851560007e-06,
      "loss": 0.1213,
      "step": 7036
    },
    {
      "epoch": 0.6483622794490257,
      "grad_norm": 0.9119974004512107,
      "learning_rate": 1.50803921999498e-06,
      "loss": 0.1277,
      "step": 7037
    },
    {
      "epoch": 0.6484544156262957,
      "grad_norm": 0.864820230481895,
      "learning_rate": 1.5073400164935554e-06,
      "loss": 0.1098,
      "step": 7038
    },
    {
      "epoch": 0.6485465518035657,
      "grad_norm": 0.9371074072350675,
      "learning_rate": 1.5066409051607175e-06,
      "loss": 0.1283,
      "step": 7039
    },
    {
      "epoch": 0.6486386879808357,
      "grad_norm": 0.9440464798003158,
      "learning_rate": 1.5059418860613779e-06,
      "loss": 0.1239,
      "step": 7040
    },
    {
      "epoch": 0.6487308241581057,
      "grad_norm": 1.0084876710261992,
      "learning_rate": 1.5052429592604411e-06,
      "loss": 0.1346,
      "step": 7041
    },
    {
      "epoch": 0.6488229603353757,
      "grad_norm": 0.9344953956197914,
      "learning_rate": 1.5045441248228024e-06,
      "loss": 0.1346,
      "step": 7042
    },
    {
      "epoch": 0.6489150965126457,
      "grad_norm": 0.9987049159397682,
      "learning_rate": 1.5038453828133498e-06,
      "loss": 0.1408,
      "step": 7043
    },
    {
      "epoch": 0.6490072326899157,
      "grad_norm": 0.9838768617974539,
      "learning_rate": 1.50314673329696e-06,
      "loss": 0.1397,
      "step": 7044
    },
    {
      "epoch": 0.6490993688671857,
      "grad_norm": 0.8951590164756219,
      "learning_rate": 1.502448176338503e-06,
      "loss": 0.1207,
      "step": 7045
    },
    {
      "epoch": 0.6491915050444557,
      "grad_norm": 0.8974579872474433,
      "learning_rate": 1.5017497120028404e-06,
      "loss": 0.1296,
      "step": 7046
    },
    {
      "epoch": 0.6492836412217257,
      "grad_norm": 0.8768387129540236,
      "learning_rate": 1.5010513403548253e-06,
      "loss": 0.1255,
      "step": 7047
    },
    {
      "epoch": 0.6493757773989958,
      "grad_norm": 0.8970459648430776,
      "learning_rate": 1.5003530614592995e-06,
      "loss": 0.1323,
      "step": 7048
    },
    {
      "epoch": 0.6494679135762658,
      "grad_norm": 0.9665064017272036,
      "learning_rate": 1.4996548753811001e-06,
      "loss": 0.1355,
      "step": 7049
    },
    {
      "epoch": 0.6495600497535358,
      "grad_norm": 0.8812970511013521,
      "learning_rate": 1.4989567821850527e-06,
      "loss": 0.1211,
      "step": 7050
    },
    {
      "epoch": 0.6496521859308058,
      "grad_norm": 0.9746647652708732,
      "learning_rate": 1.4982587819359767e-06,
      "loss": 0.1253,
      "step": 7051
    },
    {
      "epoch": 0.6497443221080758,
      "grad_norm": 0.932466168486249,
      "learning_rate": 1.4975608746986802e-06,
      "loss": 0.1296,
      "step": 7052
    },
    {
      "epoch": 0.6498364582853458,
      "grad_norm": 0.9063329649266557,
      "learning_rate": 1.4968630605379642e-06,
      "loss": 0.1188,
      "step": 7053
    },
    {
      "epoch": 0.6499285944626157,
      "grad_norm": 0.9583750002128922,
      "learning_rate": 1.496165339518621e-06,
      "loss": 0.1356,
      "step": 7054
    },
    {
      "epoch": 0.6500207306398857,
      "grad_norm": 0.9622381338587453,
      "learning_rate": 1.495467711705434e-06,
      "loss": 0.1427,
      "step": 7055
    },
    {
      "epoch": 0.6501128668171557,
      "grad_norm": 0.8843393452517173,
      "learning_rate": 1.4947701771631788e-06,
      "loss": 0.1214,
      "step": 7056
    },
    {
      "epoch": 0.6502050029944257,
      "grad_norm": 0.9571823770880513,
      "learning_rate": 1.4940727359566205e-06,
      "loss": 0.14,
      "step": 7057
    },
    {
      "epoch": 0.6502971391716957,
      "grad_norm": 0.8934023203022077,
      "learning_rate": 1.493375388150516e-06,
      "loss": 0.1211,
      "step": 7058
    },
    {
      "epoch": 0.6503892753489657,
      "grad_norm": 0.9452011328243307,
      "learning_rate": 1.4926781338096158e-06,
      "loss": 0.135,
      "step": 7059
    },
    {
      "epoch": 0.6504814115262357,
      "grad_norm": 0.9565891549170992,
      "learning_rate": 1.4919809729986598e-06,
      "loss": 0.1308,
      "step": 7060
    },
    {
      "epoch": 0.6505735477035057,
      "grad_norm": 0.927163730231958,
      "learning_rate": 1.491283905782378e-06,
      "loss": 0.116,
      "step": 7061
    },
    {
      "epoch": 0.6506656838807758,
      "grad_norm": 0.9764884266369946,
      "learning_rate": 1.4905869322254946e-06,
      "loss": 0.1253,
      "step": 7062
    },
    {
      "epoch": 0.6507578200580458,
      "grad_norm": 0.9093361220955308,
      "learning_rate": 1.4898900523927224e-06,
      "loss": 0.1217,
      "step": 7063
    },
    {
      "epoch": 0.6508499562353158,
      "grad_norm": 1.00659867942203,
      "learning_rate": 1.489193266348769e-06,
      "loss": 0.1323,
      "step": 7064
    },
    {
      "epoch": 0.6509420924125858,
      "grad_norm": 0.9328286355155382,
      "learning_rate": 1.4884965741583288e-06,
      "loss": 0.1242,
      "step": 7065
    },
    {
      "epoch": 0.6510342285898558,
      "grad_norm": 0.9492231265782749,
      "learning_rate": 1.48779997588609e-06,
      "loss": 0.1352,
      "step": 7066
    },
    {
      "epoch": 0.6511263647671258,
      "grad_norm": 0.9336815769927269,
      "learning_rate": 1.4871034715967331e-06,
      "loss": 0.1321,
      "step": 7067
    },
    {
      "epoch": 0.6512185009443958,
      "grad_norm": 0.8837768145065813,
      "learning_rate": 1.4864070613549284e-06,
      "loss": 0.1234,
      "step": 7068
    },
    {
      "epoch": 0.6513106371216658,
      "grad_norm": 0.8757121977193029,
      "learning_rate": 1.485710745225336e-06,
      "loss": 0.1177,
      "step": 7069
    },
    {
      "epoch": 0.6514027732989358,
      "grad_norm": 0.925605697107438,
      "learning_rate": 1.4850145232726104e-06,
      "loss": 0.1207,
      "step": 7070
    },
    {
      "epoch": 0.6514949094762058,
      "grad_norm": 0.9039591896508201,
      "learning_rate": 1.4843183955613955e-06,
      "loss": 0.1262,
      "step": 7071
    },
    {
      "epoch": 0.6515870456534758,
      "grad_norm": 0.9261306989585284,
      "learning_rate": 1.4836223621563272e-06,
      "loss": 0.1188,
      "step": 7072
    },
    {
      "epoch": 0.6516791818307458,
      "grad_norm": 0.9079997328951442,
      "learning_rate": 1.4829264231220319e-06,
      "loss": 0.1319,
      "step": 7073
    },
    {
      "epoch": 0.6517713180080158,
      "grad_norm": 0.9149117830703877,
      "learning_rate": 1.4822305785231273e-06,
      "loss": 0.1248,
      "step": 7074
    },
    {
      "epoch": 0.6518634541852859,
      "grad_norm": 0.9574311441669855,
      "learning_rate": 1.4815348284242234e-06,
      "loss": 0.1353,
      "step": 7075
    },
    {
      "epoch": 0.6519555903625559,
      "grad_norm": 0.9621595470781569,
      "learning_rate": 1.4808391728899206e-06,
      "loss": 0.1353,
      "step": 7076
    },
    {
      "epoch": 0.6520477265398259,
      "grad_norm": 0.912082741700463,
      "learning_rate": 1.4801436119848096e-06,
      "loss": 0.1279,
      "step": 7077
    },
    {
      "epoch": 0.6521398627170959,
      "grad_norm": 0.8963015605594415,
      "learning_rate": 1.4794481457734743e-06,
      "loss": 0.1232,
      "step": 7078
    },
    {
      "epoch": 0.6522319988943659,
      "grad_norm": 0.9239801719949823,
      "learning_rate": 1.478752774320488e-06,
      "loss": 0.1279,
      "step": 7079
    },
    {
      "epoch": 0.6523241350716359,
      "grad_norm": 0.8899030279037768,
      "learning_rate": 1.4780574976904174e-06,
      "loss": 0.1159,
      "step": 7080
    },
    {
      "epoch": 0.6524162712489059,
      "grad_norm": 0.8542351029355911,
      "learning_rate": 1.4773623159478178e-06,
      "loss": 0.1102,
      "step": 7081
    },
    {
      "epoch": 0.6525084074261759,
      "grad_norm": 0.844299544578612,
      "learning_rate": 1.4766672291572364e-06,
      "loss": 0.114,
      "step": 7082
    },
    {
      "epoch": 0.6526005436034459,
      "grad_norm": 0.9211202958944511,
      "learning_rate": 1.4759722373832135e-06,
      "loss": 0.1159,
      "step": 7083
    },
    {
      "epoch": 0.6526926797807159,
      "grad_norm": 0.9334540781839048,
      "learning_rate": 1.4752773406902788e-06,
      "loss": 0.1275,
      "step": 7084
    },
    {
      "epoch": 0.6527848159579859,
      "grad_norm": 0.8839225399468466,
      "learning_rate": 1.4745825391429537e-06,
      "loss": 0.1225,
      "step": 7085
    },
    {
      "epoch": 0.6528769521352559,
      "grad_norm": 0.9135405613448878,
      "learning_rate": 1.4738878328057493e-06,
      "loss": 0.1272,
      "step": 7086
    },
    {
      "epoch": 0.6529690883125259,
      "grad_norm": 0.9291388808423455,
      "learning_rate": 1.4731932217431704e-06,
      "loss": 0.1304,
      "step": 7087
    },
    {
      "epoch": 0.6530612244897959,
      "grad_norm": 0.9333671336031034,
      "learning_rate": 1.472498706019711e-06,
      "loss": 0.1282,
      "step": 7088
    },
    {
      "epoch": 0.653153360667066,
      "grad_norm": 0.9549475483412628,
      "learning_rate": 1.4718042856998582e-06,
      "loss": 0.1323,
      "step": 7089
    },
    {
      "epoch": 0.653245496844336,
      "grad_norm": 0.9327030362108195,
      "learning_rate": 1.4711099608480878e-06,
      "loss": 0.126,
      "step": 7090
    },
    {
      "epoch": 0.653337633021606,
      "grad_norm": 0.90652969378768,
      "learning_rate": 1.4704157315288676e-06,
      "loss": 0.1287,
      "step": 7091
    },
    {
      "epoch": 0.653429769198876,
      "grad_norm": 0.9976244196720754,
      "learning_rate": 1.469721597806658e-06,
      "loss": 0.1358,
      "step": 7092
    },
    {
      "epoch": 0.653521905376146,
      "grad_norm": 0.9056095269673432,
      "learning_rate": 1.4690275597459097e-06,
      "loss": 0.126,
      "step": 7093
    },
    {
      "epoch": 0.653614041553416,
      "grad_norm": 0.9103089182932546,
      "learning_rate": 1.4683336174110622e-06,
      "loss": 0.1302,
      "step": 7094
    },
    {
      "epoch": 0.653706177730686,
      "grad_norm": 0.8777755066961466,
      "learning_rate": 1.4676397708665496e-06,
      "loss": 0.1223,
      "step": 7095
    },
    {
      "epoch": 0.653798313907956,
      "grad_norm": 0.9541944228782124,
      "learning_rate": 1.4669460201767954e-06,
      "loss": 0.1439,
      "step": 7096
    },
    {
      "epoch": 0.653890450085226,
      "grad_norm": 0.8908592177465279,
      "learning_rate": 1.4662523654062153e-06,
      "loss": 0.1268,
      "step": 7097
    },
    {
      "epoch": 0.653982586262496,
      "grad_norm": 0.934638689368783,
      "learning_rate": 1.4655588066192135e-06,
      "loss": 0.132,
      "step": 7098
    },
    {
      "epoch": 0.6540747224397659,
      "grad_norm": 0.9183179883275167,
      "learning_rate": 1.4648653438801876e-06,
      "loss": 0.1213,
      "step": 7099
    },
    {
      "epoch": 0.6541668586170359,
      "grad_norm": 0.899781104969002,
      "learning_rate": 1.4641719772535265e-06,
      "loss": 0.1157,
      "step": 7100
    },
    {
      "epoch": 0.6542589947943059,
      "grad_norm": 0.9516515431961823,
      "learning_rate": 1.463478706803609e-06,
      "loss": 0.1344,
      "step": 7101
    },
    {
      "epoch": 0.6543511309715759,
      "grad_norm": 0.8622076564585328,
      "learning_rate": 1.4627855325948044e-06,
      "loss": 0.1218,
      "step": 7102
    },
    {
      "epoch": 0.654443267148846,
      "grad_norm": 0.9622965357298842,
      "learning_rate": 1.462092454691475e-06,
      "loss": 0.1363,
      "step": 7103
    },
    {
      "epoch": 0.654535403326116,
      "grad_norm": 0.9411786160752286,
      "learning_rate": 1.461399473157973e-06,
      "loss": 0.1293,
      "step": 7104
    },
    {
      "epoch": 0.654627539503386,
      "grad_norm": 0.9203989734120116,
      "learning_rate": 1.4607065880586418e-06,
      "loss": 0.1234,
      "step": 7105
    },
    {
      "epoch": 0.654719675680656,
      "grad_norm": 0.9211111609012533,
      "learning_rate": 1.4600137994578156e-06,
      "loss": 0.1248,
      "step": 7106
    },
    {
      "epoch": 0.654811811857926,
      "grad_norm": 0.9447252888204418,
      "learning_rate": 1.4593211074198202e-06,
      "loss": 0.1305,
      "step": 7107
    },
    {
      "epoch": 0.654903948035196,
      "grad_norm": 0.9322211454899155,
      "learning_rate": 1.4586285120089713e-06,
      "loss": 0.1321,
      "step": 7108
    },
    {
      "epoch": 0.654996084212466,
      "grad_norm": 0.870988539601446,
      "learning_rate": 1.457936013289578e-06,
      "loss": 0.1242,
      "step": 7109
    },
    {
      "epoch": 0.655088220389736,
      "grad_norm": 0.9532198461498228,
      "learning_rate": 1.4572436113259376e-06,
      "loss": 0.138,
      "step": 7110
    },
    {
      "epoch": 0.655180356567006,
      "grad_norm": 0.9982937398063886,
      "learning_rate": 1.4565513061823394e-06,
      "loss": 0.1395,
      "step": 7111
    },
    {
      "epoch": 0.655272492744276,
      "grad_norm": 0.9169145577965592,
      "learning_rate": 1.4558590979230663e-06,
      "loss": 0.1288,
      "step": 7112
    },
    {
      "epoch": 0.655364628921546,
      "grad_norm": 0.9951663238796098,
      "learning_rate": 1.4551669866123868e-06,
      "loss": 0.1485,
      "step": 7113
    },
    {
      "epoch": 0.655456765098816,
      "grad_norm": 0.9797069270513918,
      "learning_rate": 1.4544749723145665e-06,
      "loss": 0.1385,
      "step": 7114
    },
    {
      "epoch": 0.655548901276086,
      "grad_norm": 0.9252067413656129,
      "learning_rate": 1.4537830550938563e-06,
      "loss": 0.1301,
      "step": 7115
    },
    {
      "epoch": 0.6556410374533561,
      "grad_norm": 0.8954911117919563,
      "learning_rate": 1.453091235014502e-06,
      "loss": 0.1304,
      "step": 7116
    },
    {
      "epoch": 0.6557331736306261,
      "grad_norm": 0.8981284612320173,
      "learning_rate": 1.4523995121407402e-06,
      "loss": 0.1213,
      "step": 7117
    },
    {
      "epoch": 0.6558253098078961,
      "grad_norm": 0.9051040761470195,
      "learning_rate": 1.4517078865367968e-06,
      "loss": 0.1301,
      "step": 7118
    },
    {
      "epoch": 0.6559174459851661,
      "grad_norm": 0.9465935974796041,
      "learning_rate": 1.4510163582668876e-06,
      "loss": 0.1374,
      "step": 7119
    },
    {
      "epoch": 0.6560095821624361,
      "grad_norm": 0.9314265941965006,
      "learning_rate": 1.4503249273952224e-06,
      "loss": 0.1384,
      "step": 7120
    },
    {
      "epoch": 0.6561017183397061,
      "grad_norm": 0.9425029363283532,
      "learning_rate": 1.449633593986001e-06,
      "loss": 0.1316,
      "step": 7121
    },
    {
      "epoch": 0.6561938545169761,
      "grad_norm": 0.90970340809109,
      "learning_rate": 1.448942358103414e-06,
      "loss": 0.1249,
      "step": 7122
    },
    {
      "epoch": 0.6562859906942461,
      "grad_norm": 0.9009364371908557,
      "learning_rate": 1.4482512198116424e-06,
      "loss": 0.1281,
      "step": 7123
    },
    {
      "epoch": 0.6563781268715161,
      "grad_norm": 0.9697235851276125,
      "learning_rate": 1.4475601791748572e-06,
      "loss": 0.1255,
      "step": 7124
    },
    {
      "epoch": 0.6564702630487861,
      "grad_norm": 0.9906107648348492,
      "learning_rate": 1.4468692362572228e-06,
      "loss": 0.1391,
      "step": 7125
    },
    {
      "epoch": 0.6565623992260561,
      "grad_norm": 0.9367678289236904,
      "learning_rate": 1.4461783911228938e-06,
      "loss": 0.1134,
      "step": 7126
    },
    {
      "epoch": 0.6566545354033261,
      "grad_norm": 0.960257839404813,
      "learning_rate": 1.4454876438360138e-06,
      "loss": 0.1315,
      "step": 7127
    },
    {
      "epoch": 0.6567466715805961,
      "grad_norm": 1.0250542021519866,
      "learning_rate": 1.4447969944607207e-06,
      "loss": 0.143,
      "step": 7128
    },
    {
      "epoch": 0.6568388077578661,
      "grad_norm": 0.9299221218118151,
      "learning_rate": 1.444106443061139e-06,
      "loss": 0.1223,
      "step": 7129
    },
    {
      "epoch": 0.6569309439351362,
      "grad_norm": 0.9862847658068205,
      "learning_rate": 1.443415989701389e-06,
      "loss": 0.1296,
      "step": 7130
    },
    {
      "epoch": 0.6570230801124062,
      "grad_norm": 0.9101288069296816,
      "learning_rate": 1.4427256344455764e-06,
      "loss": 0.1304,
      "step": 7131
    },
    {
      "epoch": 0.6571152162896762,
      "grad_norm": 0.843608595661738,
      "learning_rate": 1.442035377357803e-06,
      "loss": 0.1175,
      "step": 7132
    },
    {
      "epoch": 0.6572073524669462,
      "grad_norm": 0.9150555649451192,
      "learning_rate": 1.4413452185021594e-06,
      "loss": 0.1304,
      "step": 7133
    },
    {
      "epoch": 0.6572994886442162,
      "grad_norm": 0.9201461415510122,
      "learning_rate": 1.4406551579427264e-06,
      "loss": 0.1314,
      "step": 7134
    },
    {
      "epoch": 0.6573916248214862,
      "grad_norm": 0.8949364453594446,
      "learning_rate": 1.4399651957435751e-06,
      "loss": 0.1136,
      "step": 7135
    },
    {
      "epoch": 0.6574837609987562,
      "grad_norm": 0.8541001653937904,
      "learning_rate": 1.439275331968769e-06,
      "loss": 0.1188,
      "step": 7136
    },
    {
      "epoch": 0.6575758971760262,
      "grad_norm": 0.951903553111977,
      "learning_rate": 1.4385855666823628e-06,
      "loss": 0.1298,
      "step": 7137
    },
    {
      "epoch": 0.6576680333532962,
      "grad_norm": 0.9391325848473654,
      "learning_rate": 1.4378958999484021e-06,
      "loss": 0.125,
      "step": 7138
    },
    {
      "epoch": 0.6577601695305662,
      "grad_norm": 0.9498586655518755,
      "learning_rate": 1.4372063318309213e-06,
      "loss": 0.1338,
      "step": 7139
    },
    {
      "epoch": 0.6578523057078361,
      "grad_norm": 0.9439890757247625,
      "learning_rate": 1.4365168623939458e-06,
      "loss": 0.1247,
      "step": 7140
    },
    {
      "epoch": 0.6579444418851061,
      "grad_norm": 0.9715129796688949,
      "learning_rate": 1.4358274917014942e-06,
      "loss": 0.1319,
      "step": 7141
    },
    {
      "epoch": 0.6580365780623761,
      "grad_norm": 0.952778753231662,
      "learning_rate": 1.4351382198175745e-06,
      "loss": 0.1251,
      "step": 7142
    },
    {
      "epoch": 0.6581287142396463,
      "grad_norm": 0.8580832806372259,
      "learning_rate": 1.4344490468061867e-06,
      "loss": 0.115,
      "step": 7143
    },
    {
      "epoch": 0.6582208504169162,
      "grad_norm": 0.9595668294661949,
      "learning_rate": 1.4337599727313196e-06,
      "loss": 0.1347,
      "step": 7144
    },
    {
      "epoch": 0.6583129865941862,
      "grad_norm": 0.9272250033635803,
      "learning_rate": 1.4330709976569526e-06,
      "loss": 0.1285,
      "step": 7145
    },
    {
      "epoch": 0.6584051227714562,
      "grad_norm": 0.9198354354531479,
      "learning_rate": 1.4323821216470585e-06,
      "loss": 0.1313,
      "step": 7146
    },
    {
      "epoch": 0.6584972589487262,
      "grad_norm": 0.940128449050614,
      "learning_rate": 1.4316933447656e-06,
      "loss": 0.1316,
      "step": 7147
    },
    {
      "epoch": 0.6585893951259962,
      "grad_norm": 0.9651435947834156,
      "learning_rate": 1.4310046670765288e-06,
      "loss": 0.1331,
      "step": 7148
    },
    {
      "epoch": 0.6586815313032662,
      "grad_norm": 0.926751113334902,
      "learning_rate": 1.43031608864379e-06,
      "loss": 0.1335,
      "step": 7149
    },
    {
      "epoch": 0.6587736674805362,
      "grad_norm": 0.9034975813817566,
      "learning_rate": 1.4296276095313168e-06,
      "loss": 0.1274,
      "step": 7150
    },
    {
      "epoch": 0.6588658036578062,
      "grad_norm": 0.9131412602543207,
      "learning_rate": 1.4289392298030362e-06,
      "loss": 0.119,
      "step": 7151
    },
    {
      "epoch": 0.6589579398350762,
      "grad_norm": 0.9423968798876814,
      "learning_rate": 1.4282509495228622e-06,
      "loss": 0.1281,
      "step": 7152
    },
    {
      "epoch": 0.6590500760123462,
      "grad_norm": 0.939229125385478,
      "learning_rate": 1.4275627687547027e-06,
      "loss": 0.122,
      "step": 7153
    },
    {
      "epoch": 0.6591422121896162,
      "grad_norm": 1.0008297804235626,
      "learning_rate": 1.4268746875624572e-06,
      "loss": 0.1361,
      "step": 7154
    },
    {
      "epoch": 0.6592343483668862,
      "grad_norm": 0.975070832200004,
      "learning_rate": 1.426186706010012e-06,
      "loss": 0.1279,
      "step": 7155
    },
    {
      "epoch": 0.6593264845441562,
      "grad_norm": 0.952944740044869,
      "learning_rate": 1.4254988241612456e-06,
      "loss": 0.1216,
      "step": 7156
    },
    {
      "epoch": 0.6594186207214263,
      "grad_norm": 1.0120820938152526,
      "learning_rate": 1.4248110420800293e-06,
      "loss": 0.1241,
      "step": 7157
    },
    {
      "epoch": 0.6595107568986963,
      "grad_norm": 0.9205810165081968,
      "learning_rate": 1.4241233598302233e-06,
      "loss": 0.1232,
      "step": 7158
    },
    {
      "epoch": 0.6596028930759663,
      "grad_norm": 1.0440449077545666,
      "learning_rate": 1.4234357774756802e-06,
      "loss": 0.1349,
      "step": 7159
    },
    {
      "epoch": 0.6596950292532363,
      "grad_norm": 0.9193202768365726,
      "learning_rate": 1.422748295080241e-06,
      "loss": 0.1246,
      "step": 7160
    },
    {
      "epoch": 0.6597871654305063,
      "grad_norm": 0.8943555491454667,
      "learning_rate": 1.4220609127077373e-06,
      "loss": 0.1195,
      "step": 7161
    },
    {
      "epoch": 0.6598793016077763,
      "grad_norm": 0.9391692237037247,
      "learning_rate": 1.4213736304219945e-06,
      "loss": 0.1328,
      "step": 7162
    },
    {
      "epoch": 0.6599714377850463,
      "grad_norm": 0.9963129927890189,
      "learning_rate": 1.4206864482868265e-06,
      "loss": 0.1263,
      "step": 7163
    },
    {
      "epoch": 0.6600635739623163,
      "grad_norm": 0.933541761483965,
      "learning_rate": 1.4199993663660372e-06,
      "loss": 0.1315,
      "step": 7164
    },
    {
      "epoch": 0.6601557101395863,
      "grad_norm": 0.9339266259054608,
      "learning_rate": 1.419312384723423e-06,
      "loss": 0.1255,
      "step": 7165
    },
    {
      "epoch": 0.6602478463168563,
      "grad_norm": 0.9000373393357917,
      "learning_rate": 1.4186255034227714e-06,
      "loss": 0.1114,
      "step": 7166
    },
    {
      "epoch": 0.6603399824941263,
      "grad_norm": 0.8703795566468919,
      "learning_rate": 1.4179387225278568e-06,
      "loss": 0.1116,
      "step": 7167
    },
    {
      "epoch": 0.6604321186713963,
      "grad_norm": 0.955579337973247,
      "learning_rate": 1.4172520421024493e-06,
      "loss": 0.1269,
      "step": 7168
    },
    {
      "epoch": 0.6605242548486663,
      "grad_norm": 0.9639282983172128,
      "learning_rate": 1.4165654622103054e-06,
      "loss": 0.1206,
      "step": 7169
    },
    {
      "epoch": 0.6606163910259364,
      "grad_norm": 0.8729449392995738,
      "learning_rate": 1.4158789829151747e-06,
      "loss": 0.1203,
      "step": 7170
    },
    {
      "epoch": 0.6607085272032064,
      "grad_norm": 0.9116857232463509,
      "learning_rate": 1.4151926042807985e-06,
      "loss": 0.1147,
      "step": 7171
    },
    {
      "epoch": 0.6608006633804764,
      "grad_norm": 0.9490013290219388,
      "learning_rate": 1.4145063263709056e-06,
      "loss": 0.1421,
      "step": 7172
    },
    {
      "epoch": 0.6608927995577464,
      "grad_norm": 0.9488839253969639,
      "learning_rate": 1.413820149249216e-06,
      "loss": 0.1311,
      "step": 7173
    },
    {
      "epoch": 0.6609849357350164,
      "grad_norm": 1.0096436437834344,
      "learning_rate": 1.4131340729794424e-06,
      "loss": 0.1288,
      "step": 7174
    },
    {
      "epoch": 0.6610770719122864,
      "grad_norm": 0.9178413047426945,
      "learning_rate": 1.4124480976252872e-06,
      "loss": 0.1257,
      "step": 7175
    },
    {
      "epoch": 0.6611692080895564,
      "grad_norm": 0.8598420566610736,
      "learning_rate": 1.4117622232504442e-06,
      "loss": 0.1186,
      "step": 7176
    },
    {
      "epoch": 0.6612613442668264,
      "grad_norm": 0.9438112564377011,
      "learning_rate": 1.4110764499185957e-06,
      "loss": 0.1368,
      "step": 7177
    },
    {
      "epoch": 0.6613534804440964,
      "grad_norm": 0.9398113472022501,
      "learning_rate": 1.410390777693415e-06,
      "loss": 0.1287,
      "step": 7178
    },
    {
      "epoch": 0.6614456166213664,
      "grad_norm": 0.8882446435476032,
      "learning_rate": 1.409705206638568e-06,
      "loss": 0.1223,
      "step": 7179
    },
    {
      "epoch": 0.6615377527986364,
      "grad_norm": 0.9108996799402447,
      "learning_rate": 1.409019736817711e-06,
      "loss": 0.1313,
      "step": 7180
    },
    {
      "epoch": 0.6616298889759064,
      "grad_norm": 0.9425541395961481,
      "learning_rate": 1.4083343682944878e-06,
      "loss": 0.14,
      "step": 7181
    },
    {
      "epoch": 0.6617220251531764,
      "grad_norm": 0.9257644599811672,
      "learning_rate": 1.4076491011325372e-06,
      "loss": 0.1366,
      "step": 7182
    },
    {
      "epoch": 0.6618141613304463,
      "grad_norm": 0.9321219321891313,
      "learning_rate": 1.4069639353954837e-06,
      "loss": 0.1335,
      "step": 7183
    },
    {
      "epoch": 0.6619062975077165,
      "grad_norm": 0.9094495757127697,
      "learning_rate": 1.4062788711469478e-06,
      "loss": 0.1259,
      "step": 7184
    },
    {
      "epoch": 0.6619984336849865,
      "grad_norm": 0.9848326389326956,
      "learning_rate": 1.405593908450535e-06,
      "loss": 0.1272,
      "step": 7185
    },
    {
      "epoch": 0.6620905698622565,
      "grad_norm": 0.9063900632096158,
      "learning_rate": 1.4049090473698457e-06,
      "loss": 0.1238,
      "step": 7186
    },
    {
      "epoch": 0.6621827060395264,
      "grad_norm": 0.9357963658738954,
      "learning_rate": 1.4042242879684703e-06,
      "loss": 0.1282,
      "step": 7187
    },
    {
      "epoch": 0.6622748422167964,
      "grad_norm": 0.8979794771571415,
      "learning_rate": 1.403539630309988e-06,
      "loss": 0.121,
      "step": 7188
    },
    {
      "epoch": 0.6623669783940664,
      "grad_norm": 0.892611802740368,
      "learning_rate": 1.4028550744579677e-06,
      "loss": 0.1236,
      "step": 7189
    },
    {
      "epoch": 0.6624591145713364,
      "grad_norm": 0.8850509948496229,
      "learning_rate": 1.4021706204759716e-06,
      "loss": 0.1263,
      "step": 7190
    },
    {
      "epoch": 0.6625512507486064,
      "grad_norm": 0.9465343552302885,
      "learning_rate": 1.4014862684275522e-06,
      "loss": 0.1319,
      "step": 7191
    },
    {
      "epoch": 0.6626433869258764,
      "grad_norm": 0.9608608477037041,
      "learning_rate": 1.4008020183762513e-06,
      "loss": 0.1287,
      "step": 7192
    },
    {
      "epoch": 0.6627355231031464,
      "grad_norm": 0.9568016008936874,
      "learning_rate": 1.4001178703856016e-06,
      "loss": 0.1249,
      "step": 7193
    },
    {
      "epoch": 0.6628276592804164,
      "grad_norm": 0.9681371499803646,
      "learning_rate": 1.3994338245191249e-06,
      "loss": 0.1298,
      "step": 7194
    },
    {
      "epoch": 0.6629197954576864,
      "grad_norm": 0.9707806670367282,
      "learning_rate": 1.398749880840336e-06,
      "loss": 0.1291,
      "step": 7195
    },
    {
      "epoch": 0.6630119316349564,
      "grad_norm": 1.0292775882377738,
      "learning_rate": 1.3980660394127394e-06,
      "loss": 0.128,
      "step": 7196
    },
    {
      "epoch": 0.6631040678122264,
      "grad_norm": 0.9334590025297743,
      "learning_rate": 1.3973823002998305e-06,
      "loss": 0.1248,
      "step": 7197
    },
    {
      "epoch": 0.6631962039894965,
      "grad_norm": 0.8671221737926933,
      "learning_rate": 1.3966986635650936e-06,
      "loss": 0.1019,
      "step": 7198
    },
    {
      "epoch": 0.6632883401667665,
      "grad_norm": 0.906986379921877,
      "learning_rate": 1.3960151292720039e-06,
      "loss": 0.1225,
      "step": 7199
    },
    {
      "epoch": 0.6633804763440365,
      "grad_norm": 0.9726558373988553,
      "learning_rate": 1.395331697484028e-06,
      "loss": 0.1252,
      "step": 7200
    },
    {
      "epoch": 0.6634726125213065,
      "grad_norm": 0.8928249593897785,
      "learning_rate": 1.394648368264624e-06,
      "loss": 0.1179,
      "step": 7201
    },
    {
      "epoch": 0.6635647486985765,
      "grad_norm": 0.9246855574964215,
      "learning_rate": 1.3939651416772365e-06,
      "loss": 0.1227,
      "step": 7202
    },
    {
      "epoch": 0.6636568848758465,
      "grad_norm": 0.9321724830410227,
      "learning_rate": 1.3932820177853062e-06,
      "loss": 0.1238,
      "step": 7203
    },
    {
      "epoch": 0.6637490210531165,
      "grad_norm": 0.9507285348694358,
      "learning_rate": 1.3925989966522585e-06,
      "loss": 0.1268,
      "step": 7204
    },
    {
      "epoch": 0.6638411572303865,
      "grad_norm": 0.9361993103413431,
      "learning_rate": 1.391916078341514e-06,
      "loss": 0.1215,
      "step": 7205
    },
    {
      "epoch": 0.6639332934076565,
      "grad_norm": 0.8919295341094984,
      "learning_rate": 1.3912332629164798e-06,
      "loss": 0.1179,
      "step": 7206
    },
    {
      "epoch": 0.6640254295849265,
      "grad_norm": 0.8853783172570733,
      "learning_rate": 1.3905505504405567e-06,
      "loss": 0.124,
      "step": 7207
    },
    {
      "epoch": 0.6641175657621965,
      "grad_norm": 0.9334584777070276,
      "learning_rate": 1.3898679409771355e-06,
      "loss": 0.1283,
      "step": 7208
    },
    {
      "epoch": 0.6642097019394665,
      "grad_norm": 0.9539829238214229,
      "learning_rate": 1.389185434589595e-06,
      "loss": 0.1266,
      "step": 7209
    },
    {
      "epoch": 0.6643018381167365,
      "grad_norm": 0.9241016852887277,
      "learning_rate": 1.3885030313413056e-06,
      "loss": 0.1196,
      "step": 7210
    },
    {
      "epoch": 0.6643939742940066,
      "grad_norm": 0.9586669345335255,
      "learning_rate": 1.3878207312956295e-06,
      "loss": 0.1351,
      "step": 7211
    },
    {
      "epoch": 0.6644861104712766,
      "grad_norm": 0.9450592859824014,
      "learning_rate": 1.3871385345159183e-06,
      "loss": 0.1323,
      "step": 7212
    },
    {
      "epoch": 0.6645782466485466,
      "grad_norm": 0.9612734215640861,
      "learning_rate": 1.3864564410655149e-06,
      "loss": 0.1225,
      "step": 7213
    },
    {
      "epoch": 0.6646703828258166,
      "grad_norm": 0.9395580497180398,
      "learning_rate": 1.3857744510077507e-06,
      "loss": 0.125,
      "step": 7214
    },
    {
      "epoch": 0.6647625190030866,
      "grad_norm": 0.9515079076163937,
      "learning_rate": 1.3850925644059475e-06,
      "loss": 0.1413,
      "step": 7215
    },
    {
      "epoch": 0.6648546551803566,
      "grad_norm": 0.9818483068556666,
      "learning_rate": 1.3844107813234197e-06,
      "loss": 0.1317,
      "step": 7216
    },
    {
      "epoch": 0.6649467913576266,
      "grad_norm": 0.9217060441337399,
      "learning_rate": 1.3837291018234723e-06,
      "loss": 0.1327,
      "step": 7217
    },
    {
      "epoch": 0.6650389275348966,
      "grad_norm": 0.9614026258662707,
      "learning_rate": 1.3830475259693964e-06,
      "loss": 0.1324,
      "step": 7218
    },
    {
      "epoch": 0.6651310637121666,
      "grad_norm": 0.8997301921202329,
      "learning_rate": 1.3823660538244793e-06,
      "loss": 0.114,
      "step": 7219
    },
    {
      "epoch": 0.6652231998894366,
      "grad_norm": 0.9264679973250439,
      "learning_rate": 1.3816846854519934e-06,
      "loss": 0.1366,
      "step": 7220
    },
    {
      "epoch": 0.6653153360667066,
      "grad_norm": 0.9397209333426656,
      "learning_rate": 1.3810034209152057e-06,
      "loss": 0.1224,
      "step": 7221
    },
    {
      "epoch": 0.6654074722439766,
      "grad_norm": 0.9140976327735287,
      "learning_rate": 1.3803222602773696e-06,
      "loss": 0.1209,
      "step": 7222
    },
    {
      "epoch": 0.6654996084212466,
      "grad_norm": 0.9275928232405652,
      "learning_rate": 1.379641203601732e-06,
      "loss": 0.1295,
      "step": 7223
    },
    {
      "epoch": 0.6655917445985166,
      "grad_norm": 0.8812472686313237,
      "learning_rate": 1.3789602509515306e-06,
      "loss": 0.126,
      "step": 7224
    },
    {
      "epoch": 0.6656838807757867,
      "grad_norm": 0.9507714938881616,
      "learning_rate": 1.3782794023899899e-06,
      "loss": 0.1271,
      "step": 7225
    },
    {
      "epoch": 0.6657760169530567,
      "grad_norm": 0.8546349341742145,
      "learning_rate": 1.3775986579803276e-06,
      "loss": 0.111,
      "step": 7226
    },
    {
      "epoch": 0.6658681531303267,
      "grad_norm": 0.9735538532092373,
      "learning_rate": 1.37691801778575e-06,
      "loss": 0.1267,
      "step": 7227
    },
    {
      "epoch": 0.6659602893075967,
      "grad_norm": 0.9649175198066844,
      "learning_rate": 1.3762374818694558e-06,
      "loss": 0.1273,
      "step": 7228
    },
    {
      "epoch": 0.6660524254848667,
      "grad_norm": 0.9308364929610364,
      "learning_rate": 1.3755570502946324e-06,
      "loss": 0.135,
      "step": 7229
    },
    {
      "epoch": 0.6661445616621366,
      "grad_norm": 0.9482080604533994,
      "learning_rate": 1.3748767231244587e-06,
      "loss": 0.1295,
      "step": 7230
    },
    {
      "epoch": 0.6662366978394066,
      "grad_norm": 0.9044964317809467,
      "learning_rate": 1.3741965004221012e-06,
      "loss": 0.1201,
      "step": 7231
    },
    {
      "epoch": 0.6663288340166766,
      "grad_norm": 0.9573803087194334,
      "learning_rate": 1.3735163822507196e-06,
      "loss": 0.1425,
      "step": 7232
    },
    {
      "epoch": 0.6664209701939466,
      "grad_norm": 0.879465266637758,
      "learning_rate": 1.372836368673463e-06,
      "loss": 0.1097,
      "step": 7233
    },
    {
      "epoch": 0.6665131063712166,
      "grad_norm": 1.007476252708849,
      "learning_rate": 1.3721564597534723e-06,
      "loss": 0.1303,
      "step": 7234
    },
    {
      "epoch": 0.6666052425484866,
      "grad_norm": 0.930209822547841,
      "learning_rate": 1.3714766555538755e-06,
      "loss": 0.1259,
      "step": 7235
    },
    {
      "epoch": 0.6666973787257566,
      "grad_norm": 0.9989347154200431,
      "learning_rate": 1.3707969561377915e-06,
      "loss": 0.1465,
      "step": 7236
    },
    {
      "epoch": 0.6667895149030266,
      "grad_norm": 0.854671371178768,
      "learning_rate": 1.370117361568332e-06,
      "loss": 0.1137,
      "step": 7237
    },
    {
      "epoch": 0.6668816510802967,
      "grad_norm": 0.9819684863020709,
      "learning_rate": 1.3694378719085976e-06,
      "loss": 0.1388,
      "step": 7238
    },
    {
      "epoch": 0.6669737872575667,
      "grad_norm": 0.9252396488730227,
      "learning_rate": 1.368758487221678e-06,
      "loss": 0.1285,
      "step": 7239
    },
    {
      "epoch": 0.6670659234348367,
      "grad_norm": 0.9453506499299281,
      "learning_rate": 1.3680792075706545e-06,
      "loss": 0.1397,
      "step": 7240
    },
    {
      "epoch": 0.6671580596121067,
      "grad_norm": 0.8709913858856868,
      "learning_rate": 1.367400033018599e-06,
      "loss": 0.1148,
      "step": 7241
    },
    {
      "epoch": 0.6672501957893767,
      "grad_norm": 0.9137545587799056,
      "learning_rate": 1.3667209636285727e-06,
      "loss": 0.131,
      "step": 7242
    },
    {
      "epoch": 0.6673423319666467,
      "grad_norm": 0.9159588879600266,
      "learning_rate": 1.366041999463626e-06,
      "loss": 0.13,
      "step": 7243
    },
    {
      "epoch": 0.6674344681439167,
      "grad_norm": 0.9230621320209568,
      "learning_rate": 1.3653631405868011e-06,
      "loss": 0.1232,
      "step": 7244
    },
    {
      "epoch": 0.6675266043211867,
      "grad_norm": 0.8953404755783205,
      "learning_rate": 1.3646843870611313e-06,
      "loss": 0.1277,
      "step": 7245
    },
    {
      "epoch": 0.6676187404984567,
      "grad_norm": 0.9414937946712962,
      "learning_rate": 1.3640057389496392e-06,
      "loss": 0.1319,
      "step": 7246
    },
    {
      "epoch": 0.6677108766757267,
      "grad_norm": 0.9230244067093492,
      "learning_rate": 1.3633271963153363e-06,
      "loss": 0.1258,
      "step": 7247
    },
    {
      "epoch": 0.6678030128529967,
      "grad_norm": 0.9482742954005706,
      "learning_rate": 1.3626487592212245e-06,
      "loss": 0.1285,
      "step": 7248
    },
    {
      "epoch": 0.6678951490302667,
      "grad_norm": 0.9053301065986715,
      "learning_rate": 1.361970427730298e-06,
      "loss": 0.1221,
      "step": 7249
    },
    {
      "epoch": 0.6679872852075367,
      "grad_norm": 0.9269917273591195,
      "learning_rate": 1.3612922019055409e-06,
      "loss": 0.1278,
      "step": 7250
    },
    {
      "epoch": 0.6680794213848067,
      "grad_norm": 0.9945437288140861,
      "learning_rate": 1.3606140818099243e-06,
      "loss": 0.1283,
      "step": 7251
    },
    {
      "epoch": 0.6681715575620768,
      "grad_norm": 0.8834575369543373,
      "learning_rate": 1.3599360675064139e-06,
      "loss": 0.1173,
      "step": 7252
    },
    {
      "epoch": 0.6682636937393468,
      "grad_norm": 0.9297478423868443,
      "learning_rate": 1.359258159057961e-06,
      "loss": 0.1243,
      "step": 7253
    },
    {
      "epoch": 0.6683558299166168,
      "grad_norm": 0.8803633005549261,
      "learning_rate": 1.358580356527511e-06,
      "loss": 0.1271,
      "step": 7254
    },
    {
      "epoch": 0.6684479660938868,
      "grad_norm": 0.9436775723358185,
      "learning_rate": 1.3579026599779988e-06,
      "loss": 0.1261,
      "step": 7255
    },
    {
      "epoch": 0.6685401022711568,
      "grad_norm": 0.8952694224685467,
      "learning_rate": 1.3572250694723465e-06,
      "loss": 0.1245,
      "step": 7256
    },
    {
      "epoch": 0.6686322384484268,
      "grad_norm": 0.8551919104756415,
      "learning_rate": 1.3565475850734706e-06,
      "loss": 0.1139,
      "step": 7257
    },
    {
      "epoch": 0.6687243746256968,
      "grad_norm": 0.9032991002081001,
      "learning_rate": 1.355870206844273e-06,
      "loss": 0.111,
      "step": 7258
    },
    {
      "epoch": 0.6688165108029668,
      "grad_norm": 0.9402261271430783,
      "learning_rate": 1.3551929348476512e-06,
      "loss": 0.125,
      "step": 7259
    },
    {
      "epoch": 0.6689086469802368,
      "grad_norm": 0.8590361852348019,
      "learning_rate": 1.3545157691464878e-06,
      "loss": 0.1212,
      "step": 7260
    },
    {
      "epoch": 0.6690007831575068,
      "grad_norm": 0.953889062291394,
      "learning_rate": 1.353838709803658e-06,
      "loss": 0.1246,
      "step": 7261
    },
    {
      "epoch": 0.6690929193347768,
      "grad_norm": 0.9024711962523686,
      "learning_rate": 1.3531617568820287e-06,
      "loss": 0.1256,
      "step": 7262
    },
    {
      "epoch": 0.6691850555120468,
      "grad_norm": 0.9029203771714646,
      "learning_rate": 1.3524849104444537e-06,
      "loss": 0.1259,
      "step": 7263
    },
    {
      "epoch": 0.6692771916893168,
      "grad_norm": 0.9757431844463745,
      "learning_rate": 1.3518081705537771e-06,
      "loss": 0.1329,
      "step": 7264
    },
    {
      "epoch": 0.6693693278665868,
      "grad_norm": 0.9514092658235749,
      "learning_rate": 1.3511315372728357e-06,
      "loss": 0.1181,
      "step": 7265
    },
    {
      "epoch": 0.6694614640438569,
      "grad_norm": 0.9673560639479292,
      "learning_rate": 1.3504550106644542e-06,
      "loss": 0.1253,
      "step": 7266
    },
    {
      "epoch": 0.6695536002211269,
      "grad_norm": 0.9354307139797914,
      "learning_rate": 1.34977859079145e-06,
      "loss": 0.1139,
      "step": 7267
    },
    {
      "epoch": 0.6696457363983969,
      "grad_norm": 0.8914156946294755,
      "learning_rate": 1.3491022777166276e-06,
      "loss": 0.1156,
      "step": 7268
    },
    {
      "epoch": 0.6697378725756669,
      "grad_norm": 0.9649736445005475,
      "learning_rate": 1.3484260715027813e-06,
      "loss": 0.1436,
      "step": 7269
    },
    {
      "epoch": 0.6698300087529369,
      "grad_norm": 0.9483659023489992,
      "learning_rate": 1.3477499722126985e-06,
      "loss": 0.1323,
      "step": 7270
    },
    {
      "epoch": 0.6699221449302069,
      "grad_norm": 0.9200966092532619,
      "learning_rate": 1.3470739799091555e-06,
      "loss": 0.1322,
      "step": 7271
    },
    {
      "epoch": 0.6700142811074769,
      "grad_norm": 0.9403584085916542,
      "learning_rate": 1.3463980946549166e-06,
      "loss": 0.1382,
      "step": 7272
    },
    {
      "epoch": 0.6701064172847468,
      "grad_norm": 0.8378406184781029,
      "learning_rate": 1.3457223165127397e-06,
      "loss": 0.1088,
      "step": 7273
    },
    {
      "epoch": 0.6701985534620168,
      "grad_norm": 0.9551913632098953,
      "learning_rate": 1.3450466455453693e-06,
      "loss": 0.1342,
      "step": 7274
    },
    {
      "epoch": 0.6702906896392868,
      "grad_norm": 0.8597495274518914,
      "learning_rate": 1.3443710818155428e-06,
      "loss": 0.1191,
      "step": 7275
    },
    {
      "epoch": 0.6703828258165568,
      "grad_norm": 0.9307987876921514,
      "learning_rate": 1.3436956253859851e-06,
      "loss": 0.1119,
      "step": 7276
    },
    {
      "epoch": 0.6704749619938268,
      "grad_norm": 0.913451889373323,
      "learning_rate": 1.3430202763194125e-06,
      "loss": 0.1242,
      "step": 7277
    },
    {
      "epoch": 0.6705670981710968,
      "grad_norm": 0.9174767959610509,
      "learning_rate": 1.342345034678533e-06,
      "loss": 0.1207,
      "step": 7278
    },
    {
      "epoch": 0.6706592343483669,
      "grad_norm": 0.9625576179556493,
      "learning_rate": 1.3416699005260416e-06,
      "loss": 0.131,
      "step": 7279
    },
    {
      "epoch": 0.6707513705256369,
      "grad_norm": 0.9322744623558886,
      "learning_rate": 1.3409948739246236e-06,
      "loss": 0.1193,
      "step": 7280
    },
    {
      "epoch": 0.6708435067029069,
      "grad_norm": 0.9144503231058142,
      "learning_rate": 1.3403199549369564e-06,
      "loss": 0.1288,
      "step": 7281
    },
    {
      "epoch": 0.6709356428801769,
      "grad_norm": 0.9216913139875762,
      "learning_rate": 1.3396451436257062e-06,
      "loss": 0.1307,
      "step": 7282
    },
    {
      "epoch": 0.6710277790574469,
      "grad_norm": 0.9487383042584467,
      "learning_rate": 1.3389704400535303e-06,
      "loss": 0.1294,
      "step": 7283
    },
    {
      "epoch": 0.6711199152347169,
      "grad_norm": 0.8999456824995248,
      "learning_rate": 1.3382958442830737e-06,
      "loss": 0.1239,
      "step": 7284
    },
    {
      "epoch": 0.6712120514119869,
      "grad_norm": 0.9535840771969801,
      "learning_rate": 1.337621356376972e-06,
      "loss": 0.1264,
      "step": 7285
    },
    {
      "epoch": 0.6713041875892569,
      "grad_norm": 0.9497062608622805,
      "learning_rate": 1.3369469763978527e-06,
      "loss": 0.1316,
      "step": 7286
    },
    {
      "epoch": 0.6713963237665269,
      "grad_norm": 0.9452724159743188,
      "learning_rate": 1.3362727044083318e-06,
      "loss": 0.1418,
      "step": 7287
    },
    {
      "epoch": 0.6714884599437969,
      "grad_norm": 0.91445117456331,
      "learning_rate": 1.3355985404710164e-06,
      "loss": 0.1174,
      "step": 7288
    },
    {
      "epoch": 0.6715805961210669,
      "grad_norm": 0.9449257270380875,
      "learning_rate": 1.3349244846485022e-06,
      "loss": 0.1301,
      "step": 7289
    },
    {
      "epoch": 0.6716727322983369,
      "grad_norm": 0.964496022253499,
      "learning_rate": 1.3342505370033736e-06,
      "loss": 0.1125,
      "step": 7290
    },
    {
      "epoch": 0.6717648684756069,
      "grad_norm": 0.936912152887504,
      "learning_rate": 1.3335766975982082e-06,
      "loss": 0.1243,
      "step": 7291
    },
    {
      "epoch": 0.6718570046528769,
      "grad_norm": 0.8577965724245058,
      "learning_rate": 1.3329029664955729e-06,
      "loss": 0.1002,
      "step": 7292
    },
    {
      "epoch": 0.671949140830147,
      "grad_norm": 0.990204302102415,
      "learning_rate": 1.332229343758022e-06,
      "loss": 0.1265,
      "step": 7293
    },
    {
      "epoch": 0.672041277007417,
      "grad_norm": 1.0281368081432407,
      "learning_rate": 1.331555829448103e-06,
      "loss": 0.1257,
      "step": 7294
    },
    {
      "epoch": 0.672133413184687,
      "grad_norm": 0.9288074008539412,
      "learning_rate": 1.33088242362835e-06,
      "loss": 0.1209,
      "step": 7295
    },
    {
      "epoch": 0.672225549361957,
      "grad_norm": 0.9119209022595895,
      "learning_rate": 1.3302091263612907e-06,
      "loss": 0.1174,
      "step": 7296
    },
    {
      "epoch": 0.672317685539227,
      "grad_norm": 0.9143317228925907,
      "learning_rate": 1.3295359377094392e-06,
      "loss": 0.1295,
      "step": 7297
    },
    {
      "epoch": 0.672409821716497,
      "grad_norm": 0.8944269171000773,
      "learning_rate": 1.3288628577353014e-06,
      "loss": 0.1251,
      "step": 7298
    },
    {
      "epoch": 0.672501957893767,
      "grad_norm": 0.952822769694342,
      "learning_rate": 1.3281898865013749e-06,
      "loss": 0.1422,
      "step": 7299
    },
    {
      "epoch": 0.672594094071037,
      "grad_norm": 0.9731154772916867,
      "learning_rate": 1.327517024070143e-06,
      "loss": 0.129,
      "step": 7300
    },
    {
      "epoch": 0.672686230248307,
      "grad_norm": 0.9820492852941306,
      "learning_rate": 1.3268442705040808e-06,
      "loss": 0.1319,
      "step": 7301
    },
    {
      "epoch": 0.672778366425577,
      "grad_norm": 0.8859435352688277,
      "learning_rate": 1.3261716258656543e-06,
      "loss": 0.127,
      "step": 7302
    },
    {
      "epoch": 0.672870502602847,
      "grad_norm": 0.9183943013213689,
      "learning_rate": 1.3254990902173187e-06,
      "loss": 0.1272,
      "step": 7303
    },
    {
      "epoch": 0.672962638780117,
      "grad_norm": 0.9189656727211152,
      "learning_rate": 1.3248266636215202e-06,
      "loss": 0.125,
      "step": 7304
    },
    {
      "epoch": 0.673054774957387,
      "grad_norm": 0.867060049690445,
      "learning_rate": 1.324154346140692e-06,
      "loss": 0.1094,
      "step": 7305
    },
    {
      "epoch": 0.6731469111346571,
      "grad_norm": 1.0020192646596813,
      "learning_rate": 1.3234821378372586e-06,
      "loss": 0.1299,
      "step": 7306
    },
    {
      "epoch": 0.6732390473119271,
      "grad_norm": 0.9670118862538364,
      "learning_rate": 1.3228100387736353e-06,
      "loss": 0.1377,
      "step": 7307
    },
    {
      "epoch": 0.6733311834891971,
      "grad_norm": 0.9237445429388595,
      "learning_rate": 1.3221380490122276e-06,
      "loss": 0.1286,
      "step": 7308
    },
    {
      "epoch": 0.6734233196664671,
      "grad_norm": 0.9893606523013816,
      "learning_rate": 1.321466168615428e-06,
      "loss": 0.1281,
      "step": 7309
    },
    {
      "epoch": 0.6735154558437371,
      "grad_norm": 0.9510171646860528,
      "learning_rate": 1.3207943976456223e-06,
      "loss": 0.1281,
      "step": 7310
    },
    {
      "epoch": 0.6736075920210071,
      "grad_norm": 0.9048525408219783,
      "learning_rate": 1.3201227361651824e-06,
      "loss": 0.1258,
      "step": 7311
    },
    {
      "epoch": 0.6736997281982771,
      "grad_norm": 0.9484261385740433,
      "learning_rate": 1.3194511842364738e-06,
      "loss": 0.1188,
      "step": 7312
    },
    {
      "epoch": 0.6737918643755471,
      "grad_norm": 0.9452879348278467,
      "learning_rate": 1.3187797419218506e-06,
      "loss": 0.1274,
      "step": 7313
    },
    {
      "epoch": 0.673884000552817,
      "grad_norm": 1.006014751364486,
      "learning_rate": 1.3181084092836544e-06,
      "loss": 0.1361,
      "step": 7314
    },
    {
      "epoch": 0.673976136730087,
      "grad_norm": 0.9742076329629352,
      "learning_rate": 1.31743718638422e-06,
      "loss": 0.133,
      "step": 7315
    },
    {
      "epoch": 0.674068272907357,
      "grad_norm": 0.99308011921525,
      "learning_rate": 1.3167660732858705e-06,
      "loss": 0.145,
      "step": 7316
    },
    {
      "epoch": 0.674160409084627,
      "grad_norm": 0.9007162632362339,
      "learning_rate": 1.316095070050919e-06,
      "loss": 0.1169,
      "step": 7317
    },
    {
      "epoch": 0.674252545261897,
      "grad_norm": 0.9403181481368799,
      "learning_rate": 1.3154241767416665e-06,
      "loss": 0.1266,
      "step": 7318
    },
    {
      "epoch": 0.674344681439167,
      "grad_norm": 0.8985720780848173,
      "learning_rate": 1.3147533934204065e-06,
      "loss": 0.1148,
      "step": 7319
    },
    {
      "epoch": 0.6744368176164371,
      "grad_norm": 0.8679435821138797,
      "learning_rate": 1.3140827201494215e-06,
      "loss": 0.1242,
      "step": 7320
    },
    {
      "epoch": 0.6745289537937071,
      "grad_norm": 0.9096030324469376,
      "learning_rate": 1.313412156990985e-06,
      "loss": 0.1295,
      "step": 7321
    },
    {
      "epoch": 0.6746210899709771,
      "grad_norm": 0.9524739379153031,
      "learning_rate": 1.312741704007357e-06,
      "loss": 0.136,
      "step": 7322
    },
    {
      "epoch": 0.6747132261482471,
      "grad_norm": 0.9364638633954382,
      "learning_rate": 1.3120713612607888e-06,
      "loss": 0.1259,
      "step": 7323
    },
    {
      "epoch": 0.6748053623255171,
      "grad_norm": 0.9004953190539895,
      "learning_rate": 1.3114011288135225e-06,
      "loss": 0.1264,
      "step": 7324
    },
    {
      "epoch": 0.6748974985027871,
      "grad_norm": 0.9161284739862543,
      "learning_rate": 1.31073100672779e-06,
      "loss": 0.1181,
      "step": 7325
    },
    {
      "epoch": 0.6749896346800571,
      "grad_norm": 0.9414480947386061,
      "learning_rate": 1.3100609950658109e-06,
      "loss": 0.1223,
      "step": 7326
    },
    {
      "epoch": 0.6750817708573271,
      "grad_norm": 0.904014912340006,
      "learning_rate": 1.3093910938897972e-06,
      "loss": 0.1266,
      "step": 7327
    },
    {
      "epoch": 0.6751739070345971,
      "grad_norm": 0.8955658720631855,
      "learning_rate": 1.3087213032619478e-06,
      "loss": 0.117,
      "step": 7328
    },
    {
      "epoch": 0.6752660432118671,
      "grad_norm": 0.9207589123517635,
      "learning_rate": 1.3080516232444545e-06,
      "loss": 0.1212,
      "step": 7329
    },
    {
      "epoch": 0.6753581793891371,
      "grad_norm": 0.9392613606296041,
      "learning_rate": 1.3073820538994952e-06,
      "loss": 0.1307,
      "step": 7330
    },
    {
      "epoch": 0.6754503155664071,
      "grad_norm": 0.8560776006565872,
      "learning_rate": 1.3067125952892408e-06,
      "loss": 0.1074,
      "step": 7331
    },
    {
      "epoch": 0.6755424517436771,
      "grad_norm": 0.8545937841158776,
      "learning_rate": 1.3060432474758508e-06,
      "loss": 0.1121,
      "step": 7332
    },
    {
      "epoch": 0.6756345879209471,
      "grad_norm": 0.9466866706239028,
      "learning_rate": 1.3053740105214741e-06,
      "loss": 0.1374,
      "step": 7333
    },
    {
      "epoch": 0.6757267240982172,
      "grad_norm": 0.8949652527185565,
      "learning_rate": 1.3047048844882481e-06,
      "loss": 0.1158,
      "step": 7334
    },
    {
      "epoch": 0.6758188602754872,
      "grad_norm": 0.8689164231602294,
      "learning_rate": 1.304035869438302e-06,
      "loss": 0.111,
      "step": 7335
    },
    {
      "epoch": 0.6759109964527572,
      "grad_norm": 0.9635373718004209,
      "learning_rate": 1.3033669654337544e-06,
      "loss": 0.1406,
      "step": 7336
    },
    {
      "epoch": 0.6760031326300272,
      "grad_norm": 0.9337974075065598,
      "learning_rate": 1.302698172536714e-06,
      "loss": 0.1214,
      "step": 7337
    },
    {
      "epoch": 0.6760952688072972,
      "grad_norm": 0.8946904614413712,
      "learning_rate": 1.3020294908092767e-06,
      "loss": 0.1255,
      "step": 7338
    },
    {
      "epoch": 0.6761874049845672,
      "grad_norm": 0.9691555259612942,
      "learning_rate": 1.3013609203135297e-06,
      "loss": 0.1268,
      "step": 7339
    },
    {
      "epoch": 0.6762795411618372,
      "grad_norm": 0.9120333872404491,
      "learning_rate": 1.3006924611115495e-06,
      "loss": 0.1238,
      "step": 7340
    },
    {
      "epoch": 0.6763716773391072,
      "grad_norm": 0.9405321337635649,
      "learning_rate": 1.300024113265404e-06,
      "loss": 0.1232,
      "step": 7341
    },
    {
      "epoch": 0.6764638135163772,
      "grad_norm": 0.9354071231407695,
      "learning_rate": 1.2993558768371494e-06,
      "loss": 0.1262,
      "step": 7342
    },
    {
      "epoch": 0.6765559496936472,
      "grad_norm": 0.8924751034495181,
      "learning_rate": 1.2986877518888307e-06,
      "loss": 0.1272,
      "step": 7343
    },
    {
      "epoch": 0.6766480858709172,
      "grad_norm": 0.9034305897586791,
      "learning_rate": 1.2980197384824828e-06,
      "loss": 0.1189,
      "step": 7344
    },
    {
      "epoch": 0.6767402220481872,
      "grad_norm": 0.8854680747678314,
      "learning_rate": 1.2973518366801315e-06,
      "loss": 0.1124,
      "step": 7345
    },
    {
      "epoch": 0.6768323582254572,
      "grad_norm": 0.9314473818384239,
      "learning_rate": 1.2966840465437923e-06,
      "loss": 0.1333,
      "step": 7346
    },
    {
      "epoch": 0.6769244944027273,
      "grad_norm": 0.9275619556132121,
      "learning_rate": 1.2960163681354683e-06,
      "loss": 0.1216,
      "step": 7347
    },
    {
      "epoch": 0.6770166305799973,
      "grad_norm": 0.8869121761739572,
      "learning_rate": 1.2953488015171551e-06,
      "loss": 0.1186,
      "step": 7348
    },
    {
      "epoch": 0.6771087667572673,
      "grad_norm": 0.818807364000548,
      "learning_rate": 1.294681346750834e-06,
      "loss": 0.1117,
      "step": 7349
    },
    {
      "epoch": 0.6772009029345373,
      "grad_norm": 0.9352558114757723,
      "learning_rate": 1.294014003898481e-06,
      "loss": 0.1212,
      "step": 7350
    },
    {
      "epoch": 0.6772930391118073,
      "grad_norm": 1.041324186976536,
      "learning_rate": 1.2933467730220562e-06,
      "loss": 0.1429,
      "step": 7351
    },
    {
      "epoch": 0.6773851752890773,
      "grad_norm": 0.9600592474772713,
      "learning_rate": 1.2926796541835135e-06,
      "loss": 0.1338,
      "step": 7352
    },
    {
      "epoch": 0.6774773114663473,
      "grad_norm": 0.9034424041374622,
      "learning_rate": 1.2920126474447957e-06,
      "loss": 0.1153,
      "step": 7353
    },
    {
      "epoch": 0.6775694476436173,
      "grad_norm": 1.0130396180529198,
      "learning_rate": 1.2913457528678335e-06,
      "loss": 0.1418,
      "step": 7354
    },
    {
      "epoch": 0.6776615838208873,
      "grad_norm": 0.9177987594541854,
      "learning_rate": 1.2906789705145475e-06,
      "loss": 0.1257,
      "step": 7355
    },
    {
      "epoch": 0.6777537199981573,
      "grad_norm": 0.943278223201491,
      "learning_rate": 1.2900123004468493e-06,
      "loss": 0.1156,
      "step": 7356
    },
    {
      "epoch": 0.6778458561754273,
      "grad_norm": 0.9010057141578023,
      "learning_rate": 1.289345742726639e-06,
      "loss": 0.1326,
      "step": 7357
    },
    {
      "epoch": 0.6779379923526972,
      "grad_norm": 0.9178511003467293,
      "learning_rate": 1.288679297415808e-06,
      "loss": 0.1248,
      "step": 7358
    },
    {
      "epoch": 0.6780301285299672,
      "grad_norm": 0.9031359655111605,
      "learning_rate": 1.2880129645762344e-06,
      "loss": 0.1176,
      "step": 7359
    },
    {
      "epoch": 0.6781222647072372,
      "grad_norm": 0.9436874791851032,
      "learning_rate": 1.2873467442697862e-06,
      "loss": 0.1354,
      "step": 7360
    },
    {
      "epoch": 0.6782144008845074,
      "grad_norm": 0.9241098072391299,
      "learning_rate": 1.286680636558324e-06,
      "loss": 0.1179,
      "step": 7361
    },
    {
      "epoch": 0.6783065370617773,
      "grad_norm": 0.9397598292849974,
      "learning_rate": 1.2860146415036957e-06,
      "loss": 0.1177,
      "step": 7362
    },
    {
      "epoch": 0.6783986732390473,
      "grad_norm": 0.9488656040076715,
      "learning_rate": 1.2853487591677377e-06,
      "loss": 0.1226,
      "step": 7363
    },
    {
      "epoch": 0.6784908094163173,
      "grad_norm": 0.8690902550103791,
      "learning_rate": 1.2846829896122792e-06,
      "loss": 0.1116,
      "step": 7364
    },
    {
      "epoch": 0.6785829455935873,
      "grad_norm": 0.9312076330382117,
      "learning_rate": 1.284017332899135e-06,
      "loss": 0.1259,
      "step": 7365
    },
    {
      "epoch": 0.6786750817708573,
      "grad_norm": 0.923409407118127,
      "learning_rate": 1.283351789090113e-06,
      "loss": 0.1182,
      "step": 7366
    },
    {
      "epoch": 0.6787672179481273,
      "grad_norm": 0.9441374226173443,
      "learning_rate": 1.2826863582470078e-06,
      "loss": 0.1254,
      "step": 7367
    },
    {
      "epoch": 0.6788593541253973,
      "grad_norm": 0.9020458710503054,
      "learning_rate": 1.2820210404316053e-06,
      "loss": 0.1124,
      "step": 7368
    },
    {
      "epoch": 0.6789514903026673,
      "grad_norm": 0.9364867537207302,
      "learning_rate": 1.2813558357056806e-06,
      "loss": 0.1288,
      "step": 7369
    },
    {
      "epoch": 0.6790436264799373,
      "grad_norm": 1.0236374395047134,
      "learning_rate": 1.2806907441309974e-06,
      "loss": 0.1373,
      "step": 7370
    },
    {
      "epoch": 0.6791357626572073,
      "grad_norm": 0.8698015409414299,
      "learning_rate": 1.2800257657693105e-06,
      "loss": 0.1099,
      "step": 7371
    },
    {
      "epoch": 0.6792278988344773,
      "grad_norm": 0.9205891854687096,
      "learning_rate": 1.2793609006823615e-06,
      "loss": 0.1208,
      "step": 7372
    },
    {
      "epoch": 0.6793200350117473,
      "grad_norm": 1.0175982193294322,
      "learning_rate": 1.2786961489318842e-06,
      "loss": 0.1436,
      "step": 7373
    },
    {
      "epoch": 0.6794121711890174,
      "grad_norm": 0.9473057310918341,
      "learning_rate": 1.278031510579602e-06,
      "loss": 0.1197,
      "step": 7374
    },
    {
      "epoch": 0.6795043073662874,
      "grad_norm": 0.9997006968274136,
      "learning_rate": 1.2773669856872256e-06,
      "loss": 0.1398,
      "step": 7375
    },
    {
      "epoch": 0.6795964435435574,
      "grad_norm": 0.879881394116734,
      "learning_rate": 1.2767025743164551e-06,
      "loss": 0.1183,
      "step": 7376
    },
    {
      "epoch": 0.6796885797208274,
      "grad_norm": 0.8972780003606752,
      "learning_rate": 1.2760382765289821e-06,
      "loss": 0.1192,
      "step": 7377
    },
    {
      "epoch": 0.6797807158980974,
      "grad_norm": 0.9385686665101676,
      "learning_rate": 1.275374092386487e-06,
      "loss": 0.1281,
      "step": 7378
    },
    {
      "epoch": 0.6798728520753674,
      "grad_norm": 0.8774908119041022,
      "learning_rate": 1.2747100219506404e-06,
      "loss": 0.1199,
      "step": 7379
    },
    {
      "epoch": 0.6799649882526374,
      "grad_norm": 0.9192489819603069,
      "learning_rate": 1.2740460652831e-06,
      "loss": 0.1258,
      "step": 7380
    },
    {
      "epoch": 0.6800571244299074,
      "grad_norm": 0.9323643542426164,
      "learning_rate": 1.2733822224455133e-06,
      "loss": 0.1309,
      "step": 7381
    },
    {
      "epoch": 0.6801492606071774,
      "grad_norm": 1.0334964401625448,
      "learning_rate": 1.272718493499519e-06,
      "loss": 0.1378,
      "step": 7382
    },
    {
      "epoch": 0.6802413967844474,
      "grad_norm": 0.9062491939851149,
      "learning_rate": 1.272054878506746e-06,
      "loss": 0.126,
      "step": 7383
    },
    {
      "epoch": 0.6803335329617174,
      "grad_norm": 0.8902882054217731,
      "learning_rate": 1.2713913775288086e-06,
      "loss": 0.1158,
      "step": 7384
    },
    {
      "epoch": 0.6804256691389874,
      "grad_norm": 0.8903105818667769,
      "learning_rate": 1.2707279906273152e-06,
      "loss": 0.1278,
      "step": 7385
    },
    {
      "epoch": 0.6805178053162574,
      "grad_norm": 0.9324559481614326,
      "learning_rate": 1.270064717863859e-06,
      "loss": 0.1305,
      "step": 7386
    },
    {
      "epoch": 0.6806099414935274,
      "grad_norm": 0.8959648512122598,
      "learning_rate": 1.269401559300027e-06,
      "loss": 0.1273,
      "step": 7387
    },
    {
      "epoch": 0.6807020776707975,
      "grad_norm": 0.9152200447009375,
      "learning_rate": 1.2687385149973919e-06,
      "loss": 0.1353,
      "step": 7388
    },
    {
      "epoch": 0.6807942138480675,
      "grad_norm": 0.8724993170234326,
      "learning_rate": 1.268075585017518e-06,
      "loss": 0.1169,
      "step": 7389
    },
    {
      "epoch": 0.6808863500253375,
      "grad_norm": 0.9065934723664036,
      "learning_rate": 1.2674127694219588e-06,
      "loss": 0.1258,
      "step": 7390
    },
    {
      "epoch": 0.6809784862026075,
      "grad_norm": 1.0023250137596291,
      "learning_rate": 1.2667500682722584e-06,
      "loss": 0.1295,
      "step": 7391
    },
    {
      "epoch": 0.6810706223798775,
      "grad_norm": 0.9139762424853833,
      "learning_rate": 1.266087481629945e-06,
      "loss": 0.1222,
      "step": 7392
    },
    {
      "epoch": 0.6811627585571475,
      "grad_norm": 0.986025500116549,
      "learning_rate": 1.2654250095565417e-06,
      "loss": 0.1304,
      "step": 7393
    },
    {
      "epoch": 0.6812548947344175,
      "grad_norm": 0.9687913893761001,
      "learning_rate": 1.2647626521135592e-06,
      "loss": 0.1219,
      "step": 7394
    },
    {
      "epoch": 0.6813470309116875,
      "grad_norm": 1.0229554853329992,
      "learning_rate": 1.2641004093624981e-06,
      "loss": 0.131,
      "step": 7395
    },
    {
      "epoch": 0.6814391670889575,
      "grad_norm": 0.9457520902813298,
      "learning_rate": 1.2634382813648462e-06,
      "loss": 0.13,
      "step": 7396
    },
    {
      "epoch": 0.6815313032662275,
      "grad_norm": 0.8981736708544468,
      "learning_rate": 1.262776268182084e-06,
      "loss": 0.1344,
      "step": 7397
    },
    {
      "epoch": 0.6816234394434975,
      "grad_norm": 0.9310753904940697,
      "learning_rate": 1.2621143698756778e-06,
      "loss": 0.1256,
      "step": 7398
    },
    {
      "epoch": 0.6817155756207675,
      "grad_norm": 0.9271288321729084,
      "learning_rate": 1.2614525865070848e-06,
      "loss": 0.1283,
      "step": 7399
    },
    {
      "epoch": 0.6818077117980375,
      "grad_norm": 0.9112315579438799,
      "learning_rate": 1.260790918137754e-06,
      "loss": 0.1319,
      "step": 7400
    },
    {
      "epoch": 0.6818998479753074,
      "grad_norm": 0.9539092052600017,
      "learning_rate": 1.2601293648291184e-06,
      "loss": 0.1239,
      "step": 7401
    },
    {
      "epoch": 0.6819919841525776,
      "grad_norm": 0.9703369630185199,
      "learning_rate": 1.2594679266426063e-06,
      "loss": 0.1301,
      "step": 7402
    },
    {
      "epoch": 0.6820841203298476,
      "grad_norm": 0.9137480063164818,
      "learning_rate": 1.2588066036396294e-06,
      "loss": 0.1171,
      "step": 7403
    },
    {
      "epoch": 0.6821762565071176,
      "grad_norm": 0.9058815845580901,
      "learning_rate": 1.2581453958815937e-06,
      "loss": 0.1195,
      "step": 7404
    },
    {
      "epoch": 0.6822683926843875,
      "grad_norm": 0.9045402132109076,
      "learning_rate": 1.2574843034298912e-06,
      "loss": 0.1265,
      "step": 7405
    },
    {
      "epoch": 0.6823605288616575,
      "grad_norm": 0.9698781357496576,
      "learning_rate": 1.2568233263459042e-06,
      "loss": 0.1312,
      "step": 7406
    },
    {
      "epoch": 0.6824526650389275,
      "grad_norm": 1.0278967298327988,
      "learning_rate": 1.2561624646910064e-06,
      "loss": 0.1346,
      "step": 7407
    },
    {
      "epoch": 0.6825448012161975,
      "grad_norm": 0.9620339931384371,
      "learning_rate": 1.2555017185265578e-06,
      "loss": 0.1376,
      "step": 7408
    },
    {
      "epoch": 0.6826369373934675,
      "grad_norm": 0.8746590584378339,
      "learning_rate": 1.2548410879139072e-06,
      "loss": 0.105,
      "step": 7409
    },
    {
      "epoch": 0.6827290735707375,
      "grad_norm": 0.9537799769999069,
      "learning_rate": 1.254180572914396e-06,
      "loss": 0.1309,
      "step": 7410
    },
    {
      "epoch": 0.6828212097480075,
      "grad_norm": 0.8835486213792817,
      "learning_rate": 1.2535201735893526e-06,
      "loss": 0.1168,
      "step": 7411
    },
    {
      "epoch": 0.6829133459252775,
      "grad_norm": 0.9677867977994593,
      "learning_rate": 1.252859890000096e-06,
      "loss": 0.1247,
      "step": 7412
    },
    {
      "epoch": 0.6830054821025475,
      "grad_norm": 0.9173729144330235,
      "learning_rate": 1.252199722207933e-06,
      "loss": 0.1182,
      "step": 7413
    },
    {
      "epoch": 0.6830976182798175,
      "grad_norm": 0.9489197558146778,
      "learning_rate": 1.2515396702741593e-06,
      "loss": 0.1275,
      "step": 7414
    },
    {
      "epoch": 0.6831897544570876,
      "grad_norm": 1.0231784534441966,
      "learning_rate": 1.2508797342600613e-06,
      "loss": 0.137,
      "step": 7415
    },
    {
      "epoch": 0.6832818906343576,
      "grad_norm": 0.9137554998444966,
      "learning_rate": 1.2502199142269154e-06,
      "loss": 0.1262,
      "step": 7416
    },
    {
      "epoch": 0.6833740268116276,
      "grad_norm": 0.959616464056135,
      "learning_rate": 1.2495602102359837e-06,
      "loss": 0.1159,
      "step": 7417
    },
    {
      "epoch": 0.6834661629888976,
      "grad_norm": 0.9443226724457511,
      "learning_rate": 1.2489006223485225e-06,
      "loss": 0.1318,
      "step": 7418
    },
    {
      "epoch": 0.6835582991661676,
      "grad_norm": 0.9055951065867156,
      "learning_rate": 1.2482411506257722e-06,
      "loss": 0.1217,
      "step": 7419
    },
    {
      "epoch": 0.6836504353434376,
      "grad_norm": 0.8964410503789555,
      "learning_rate": 1.2475817951289665e-06,
      "loss": 0.1219,
      "step": 7420
    },
    {
      "epoch": 0.6837425715207076,
      "grad_norm": 0.9692218760916794,
      "learning_rate": 1.2469225559193251e-06,
      "loss": 0.1351,
      "step": 7421
    },
    {
      "epoch": 0.6838347076979776,
      "grad_norm": 0.913552989477623,
      "learning_rate": 1.2462634330580593e-06,
      "loss": 0.1179,
      "step": 7422
    },
    {
      "epoch": 0.6839268438752476,
      "grad_norm": 0.9013278373038858,
      "learning_rate": 1.2456044266063694e-06,
      "loss": 0.1231,
      "step": 7423
    },
    {
      "epoch": 0.6840189800525176,
      "grad_norm": 0.9833694142508832,
      "learning_rate": 1.2449455366254434e-06,
      "loss": 0.1342,
      "step": 7424
    },
    {
      "epoch": 0.6841111162297876,
      "grad_norm": 0.8884726425002472,
      "learning_rate": 1.2442867631764588e-06,
      "loss": 0.1214,
      "step": 7425
    },
    {
      "epoch": 0.6842032524070576,
      "grad_norm": 0.9212373080375857,
      "learning_rate": 1.2436281063205833e-06,
      "loss": 0.1264,
      "step": 7426
    },
    {
      "epoch": 0.6842953885843276,
      "grad_norm": 0.961439810407205,
      "learning_rate": 1.2429695661189731e-06,
      "loss": 0.1297,
      "step": 7427
    },
    {
      "epoch": 0.6843875247615976,
      "grad_norm": 0.9165466783704851,
      "learning_rate": 1.242311142632775e-06,
      "loss": 0.133,
      "step": 7428
    },
    {
      "epoch": 0.6844796609388677,
      "grad_norm": 0.9031234366618096,
      "learning_rate": 1.2416528359231228e-06,
      "loss": 0.1201,
      "step": 7429
    },
    {
      "epoch": 0.6845717971161377,
      "grad_norm": 0.923906786213281,
      "learning_rate": 1.240994646051139e-06,
      "loss": 0.1336,
      "step": 7430
    },
    {
      "epoch": 0.6846639332934077,
      "grad_norm": 0.959066739495178,
      "learning_rate": 1.2403365730779383e-06,
      "loss": 0.1339,
      "step": 7431
    },
    {
      "epoch": 0.6847560694706777,
      "grad_norm": 0.9688337673057995,
      "learning_rate": 1.2396786170646218e-06,
      "loss": 0.128,
      "step": 7432
    },
    {
      "epoch": 0.6848482056479477,
      "grad_norm": 0.9374828794417941,
      "learning_rate": 1.2390207780722827e-06,
      "loss": 0.1191,
      "step": 7433
    },
    {
      "epoch": 0.6849403418252177,
      "grad_norm": 0.9645857123533416,
      "learning_rate": 1.238363056162e-06,
      "loss": 0.1268,
      "step": 7434
    },
    {
      "epoch": 0.6850324780024877,
      "grad_norm": 0.917977034506336,
      "learning_rate": 1.2377054513948423e-06,
      "loss": 0.1317,
      "step": 7435
    },
    {
      "epoch": 0.6851246141797577,
      "grad_norm": 0.903871262985241,
      "learning_rate": 1.2370479638318692e-06,
      "loss": 0.1263,
      "step": 7436
    },
    {
      "epoch": 0.6852167503570277,
      "grad_norm": 0.9181670238680173,
      "learning_rate": 1.2363905935341295e-06,
      "loss": 0.1237,
      "step": 7437
    },
    {
      "epoch": 0.6853088865342977,
      "grad_norm": 0.9205604948512467,
      "learning_rate": 1.235733340562658e-06,
      "loss": 0.1302,
      "step": 7438
    },
    {
      "epoch": 0.6854010227115677,
      "grad_norm": 0.8849849610348361,
      "learning_rate": 1.2350762049784835e-06,
      "loss": 0.1192,
      "step": 7439
    },
    {
      "epoch": 0.6854931588888377,
      "grad_norm": 0.9159893977573488,
      "learning_rate": 1.2344191868426181e-06,
      "loss": 0.1278,
      "step": 7440
    },
    {
      "epoch": 0.6855852950661077,
      "grad_norm": 0.9363760794106957,
      "learning_rate": 1.2337622862160687e-06,
      "loss": 0.1238,
      "step": 7441
    },
    {
      "epoch": 0.6856774312433778,
      "grad_norm": 0.8938566236978432,
      "learning_rate": 1.233105503159826e-06,
      "loss": 0.1244,
      "step": 7442
    },
    {
      "epoch": 0.6857695674206478,
      "grad_norm": 0.8527983351894046,
      "learning_rate": 1.2324488377348736e-06,
      "loss": 0.1087,
      "step": 7443
    },
    {
      "epoch": 0.6858617035979178,
      "grad_norm": 0.907390478133244,
      "learning_rate": 1.2317922900021843e-06,
      "loss": 0.1223,
      "step": 7444
    },
    {
      "epoch": 0.6859538397751878,
      "grad_norm": 0.9151103834295761,
      "learning_rate": 1.2311358600227172e-06,
      "loss": 0.118,
      "step": 7445
    },
    {
      "epoch": 0.6860459759524578,
      "grad_norm": 0.9384525478458614,
      "learning_rate": 1.2304795478574211e-06,
      "loss": 0.1201,
      "step": 7446
    },
    {
      "epoch": 0.6861381121297278,
      "grad_norm": 0.8967253804927608,
      "learning_rate": 1.2298233535672357e-06,
      "loss": 0.1254,
      "step": 7447
    },
    {
      "epoch": 0.6862302483069977,
      "grad_norm": 0.9329650086140051,
      "learning_rate": 1.2291672772130885e-06,
      "loss": 0.1379,
      "step": 7448
    },
    {
      "epoch": 0.6863223844842677,
      "grad_norm": 0.9485151638142946,
      "learning_rate": 1.2285113188558975e-06,
      "loss": 0.1324,
      "step": 7449
    },
    {
      "epoch": 0.6864145206615377,
      "grad_norm": 0.9190267891724095,
      "learning_rate": 1.2278554785565671e-06,
      "loss": 0.1328,
      "step": 7450
    },
    {
      "epoch": 0.6865066568388077,
      "grad_norm": 0.929404692376141,
      "learning_rate": 1.2271997563759918e-06,
      "loss": 0.1326,
      "step": 7451
    },
    {
      "epoch": 0.6865987930160777,
      "grad_norm": 0.9353304920500429,
      "learning_rate": 1.226544152375056e-06,
      "loss": 0.1309,
      "step": 7452
    },
    {
      "epoch": 0.6866909291933477,
      "grad_norm": 0.9166655846212474,
      "learning_rate": 1.2258886666146335e-06,
      "loss": 0.1204,
      "step": 7453
    },
    {
      "epoch": 0.6867830653706177,
      "grad_norm": 0.8790079523079889,
      "learning_rate": 1.2252332991555846e-06,
      "loss": 0.1168,
      "step": 7454
    },
    {
      "epoch": 0.6868752015478877,
      "grad_norm": 0.9637556673833563,
      "learning_rate": 1.224578050058762e-06,
      "loss": 0.1382,
      "step": 7455
    },
    {
      "epoch": 0.6869673377251578,
      "grad_norm": 0.8295194190943669,
      "learning_rate": 1.2239229193850039e-06,
      "loss": 0.1025,
      "step": 7456
    },
    {
      "epoch": 0.6870594739024278,
      "grad_norm": 0.9419495896513839,
      "learning_rate": 1.2232679071951398e-06,
      "loss": 0.129,
      "step": 7457
    },
    {
      "epoch": 0.6871516100796978,
      "grad_norm": 0.90361666710837,
      "learning_rate": 1.2226130135499891e-06,
      "loss": 0.1215,
      "step": 7458
    },
    {
      "epoch": 0.6872437462569678,
      "grad_norm": 0.8778510526809243,
      "learning_rate": 1.2219582385103564e-06,
      "loss": 0.1186,
      "step": 7459
    },
    {
      "epoch": 0.6873358824342378,
      "grad_norm": 0.9003346616886775,
      "learning_rate": 1.2213035821370401e-06,
      "loss": 0.1193,
      "step": 7460
    },
    {
      "epoch": 0.6874280186115078,
      "grad_norm": 0.9341321475259016,
      "learning_rate": 1.2206490444908226e-06,
      "loss": 0.1184,
      "step": 7461
    },
    {
      "epoch": 0.6875201547887778,
      "grad_norm": 1.029120204312684,
      "learning_rate": 1.21999462563248e-06,
      "loss": 0.139,
      "step": 7462
    },
    {
      "epoch": 0.6876122909660478,
      "grad_norm": 0.991534769651168,
      "learning_rate": 1.2193403256227731e-06,
      "loss": 0.1412,
      "step": 7463
    },
    {
      "epoch": 0.6877044271433178,
      "grad_norm": 0.9770012066753403,
      "learning_rate": 1.2186861445224548e-06,
      "loss": 0.1279,
      "step": 7464
    },
    {
      "epoch": 0.6877965633205878,
      "grad_norm": 0.9755692964671077,
      "learning_rate": 1.2180320823922662e-06,
      "loss": 0.1307,
      "step": 7465
    },
    {
      "epoch": 0.6878886994978578,
      "grad_norm": 0.9479539739269719,
      "learning_rate": 1.2173781392929383e-06,
      "loss": 0.1119,
      "step": 7466
    },
    {
      "epoch": 0.6879808356751278,
      "grad_norm": 0.9075991770684234,
      "learning_rate": 1.2167243152851862e-06,
      "loss": 0.1236,
      "step": 7467
    },
    {
      "epoch": 0.6880729718523978,
      "grad_norm": 0.9412169301247907,
      "learning_rate": 1.21607061042972e-06,
      "loss": 0.1231,
      "step": 7468
    },
    {
      "epoch": 0.6881651080296678,
      "grad_norm": 0.9310202024859918,
      "learning_rate": 1.2154170247872354e-06,
      "loss": 0.1295,
      "step": 7469
    },
    {
      "epoch": 0.6882572442069379,
      "grad_norm": 0.8808125116214799,
      "learning_rate": 1.2147635584184194e-06,
      "loss": 0.1219,
      "step": 7470
    },
    {
      "epoch": 0.6883493803842079,
      "grad_norm": 0.9574965439429187,
      "learning_rate": 1.2141102113839442e-06,
      "loss": 0.126,
      "step": 7471
    },
    {
      "epoch": 0.6884415165614779,
      "grad_norm": 0.9585891755121796,
      "learning_rate": 1.2134569837444755e-06,
      "loss": 0.1192,
      "step": 7472
    },
    {
      "epoch": 0.6885336527387479,
      "grad_norm": 0.9390435430840817,
      "learning_rate": 1.2128038755606632e-06,
      "loss": 0.1312,
      "step": 7473
    },
    {
      "epoch": 0.6886257889160179,
      "grad_norm": 0.9404765976585037,
      "learning_rate": 1.2121508868931507e-06,
      "loss": 0.1272,
      "step": 7474
    },
    {
      "epoch": 0.6887179250932879,
      "grad_norm": 0.9462706939442835,
      "learning_rate": 1.2114980178025657e-06,
      "loss": 0.1238,
      "step": 7475
    },
    {
      "epoch": 0.6888100612705579,
      "grad_norm": 0.9467478685012891,
      "learning_rate": 1.2108452683495286e-06,
      "loss": 0.1275,
      "step": 7476
    },
    {
      "epoch": 0.6889021974478279,
      "grad_norm": 0.9634104501865915,
      "learning_rate": 1.210192638594648e-06,
      "loss": 0.135,
      "step": 7477
    },
    {
      "epoch": 0.6889943336250979,
      "grad_norm": 0.9728865732091142,
      "learning_rate": 1.2095401285985197e-06,
      "loss": 0.1333,
      "step": 7478
    },
    {
      "epoch": 0.6890864698023679,
      "grad_norm": 0.933124824968402,
      "learning_rate": 1.2088877384217286e-06,
      "loss": 0.1293,
      "step": 7479
    },
    {
      "epoch": 0.6891786059796379,
      "grad_norm": 0.9059485628010226,
      "learning_rate": 1.2082354681248495e-06,
      "loss": 0.1212,
      "step": 7480
    },
    {
      "epoch": 0.6892707421569079,
      "grad_norm": 0.8854842763292889,
      "learning_rate": 1.2075833177684465e-06,
      "loss": 0.1144,
      "step": 7481
    },
    {
      "epoch": 0.6893628783341779,
      "grad_norm": 0.922192406975205,
      "learning_rate": 1.2069312874130725e-06,
      "loss": 0.1121,
      "step": 7482
    },
    {
      "epoch": 0.689455014511448,
      "grad_norm": 0.9320451445217117,
      "learning_rate": 1.2062793771192676e-06,
      "loss": 0.1246,
      "step": 7483
    },
    {
      "epoch": 0.689547150688718,
      "grad_norm": 0.8946586316246357,
      "learning_rate": 1.2056275869475606e-06,
      "loss": 0.1246,
      "step": 7484
    },
    {
      "epoch": 0.689639286865988,
      "grad_norm": 0.8953871059867827,
      "learning_rate": 1.2049759169584718e-06,
      "loss": 0.12,
      "step": 7485
    },
    {
      "epoch": 0.689731423043258,
      "grad_norm": 0.9397138554660357,
      "learning_rate": 1.2043243672125083e-06,
      "loss": 0.1231,
      "step": 7486
    },
    {
      "epoch": 0.689823559220528,
      "grad_norm": 0.870590564525269,
      "learning_rate": 1.2036729377701679e-06,
      "loss": 0.1123,
      "step": 7487
    },
    {
      "epoch": 0.689915695397798,
      "grad_norm": 0.9123543737092503,
      "learning_rate": 1.2030216286919343e-06,
      "loss": 0.1241,
      "step": 7488
    },
    {
      "epoch": 0.690007831575068,
      "grad_norm": 0.9315347547200894,
      "learning_rate": 1.2023704400382813e-06,
      "loss": 0.1266,
      "step": 7489
    },
    {
      "epoch": 0.690099967752338,
      "grad_norm": 0.9419355207806347,
      "learning_rate": 1.201719371869673e-06,
      "loss": 0.1259,
      "step": 7490
    },
    {
      "epoch": 0.690192103929608,
      "grad_norm": 1.0714501807054846,
      "learning_rate": 1.2010684242465612e-06,
      "loss": 0.1364,
      "step": 7491
    },
    {
      "epoch": 0.6902842401068779,
      "grad_norm": 0.9120663194555401,
      "learning_rate": 1.2004175972293852e-06,
      "loss": 0.1118,
      "step": 7492
    },
    {
      "epoch": 0.6903763762841479,
      "grad_norm": 0.9301664178778738,
      "learning_rate": 1.1997668908785761e-06,
      "loss": 0.1289,
      "step": 7493
    },
    {
      "epoch": 0.6904685124614179,
      "grad_norm": 0.9135039431916775,
      "learning_rate": 1.1991163052545502e-06,
      "loss": 0.1254,
      "step": 7494
    },
    {
      "epoch": 0.6905606486386879,
      "grad_norm": 0.9372952523507923,
      "learning_rate": 1.1984658404177162e-06,
      "loss": 0.1101,
      "step": 7495
    },
    {
      "epoch": 0.6906527848159579,
      "grad_norm": 0.9081435909843156,
      "learning_rate": 1.1978154964284683e-06,
      "loss": 0.1182,
      "step": 7496
    },
    {
      "epoch": 0.690744920993228,
      "grad_norm": 0.9207021186709088,
      "learning_rate": 1.1971652733471915e-06,
      "loss": 0.1251,
      "step": 7497
    },
    {
      "epoch": 0.690837057170498,
      "grad_norm": 0.9124595681631252,
      "learning_rate": 1.19651517123426e-06,
      "loss": 0.126,
      "step": 7498
    },
    {
      "epoch": 0.690929193347768,
      "grad_norm": 0.9578194252866348,
      "learning_rate": 1.1958651901500356e-06,
      "loss": 0.1233,
      "step": 7499
    },
    {
      "epoch": 0.691021329525038,
      "grad_norm": 0.8642829438776286,
      "learning_rate": 1.1952153301548674e-06,
      "loss": 0.117,
      "step": 7500
    },
    {
      "epoch": 0.691021329525038,
      "eval_loss": 0.12531189620494843,
      "eval_runtime": 299.4056,
      "eval_samples_per_second": 23.436,
      "eval_steps_per_second": 2.932,
      "step": 7500
    },
    {
      "epoch": 0.691113465702308,
      "grad_norm": 0.9056786919297217,
      "learning_rate": 1.1945655913090965e-06,
      "loss": 0.1273,
      "step": 7501
    },
    {
      "epoch": 0.691205601879578,
      "grad_norm": 0.9366350244304404,
      "learning_rate": 1.1939159736730508e-06,
      "loss": 0.1304,
      "step": 7502
    },
    {
      "epoch": 0.691297738056848,
      "grad_norm": 0.9873293906173173,
      "learning_rate": 1.1932664773070483e-06,
      "loss": 0.1305,
      "step": 7503
    },
    {
      "epoch": 0.691389874234118,
      "grad_norm": 0.879909629424238,
      "learning_rate": 1.192617102271394e-06,
      "loss": 0.1246,
      "step": 7504
    },
    {
      "epoch": 0.691482010411388,
      "grad_norm": 0.879155311662118,
      "learning_rate": 1.1919678486263817e-06,
      "loss": 0.1142,
      "step": 7505
    },
    {
      "epoch": 0.691574146588658,
      "grad_norm": 0.9510831799893671,
      "learning_rate": 1.1913187164322954e-06,
      "loss": 0.1268,
      "step": 7506
    },
    {
      "epoch": 0.691666282765928,
      "grad_norm": 0.8829687120011289,
      "learning_rate": 1.190669705749408e-06,
      "loss": 0.1266,
      "step": 7507
    },
    {
      "epoch": 0.691758418943198,
      "grad_norm": 0.8564115074734555,
      "learning_rate": 1.1900208166379784e-06,
      "loss": 0.1147,
      "step": 7508
    },
    {
      "epoch": 0.691850555120468,
      "grad_norm": 0.9176006723966627,
      "learning_rate": 1.1893720491582579e-06,
      "loss": 0.1236,
      "step": 7509
    },
    {
      "epoch": 0.6919426912977381,
      "grad_norm": 0.9154865319735849,
      "learning_rate": 1.1887234033704827e-06,
      "loss": 0.1153,
      "step": 7510
    },
    {
      "epoch": 0.6920348274750081,
      "grad_norm": 0.8846755111687101,
      "learning_rate": 1.1880748793348818e-06,
      "loss": 0.1264,
      "step": 7511
    },
    {
      "epoch": 0.6921269636522781,
      "grad_norm": 0.9063995376950635,
      "learning_rate": 1.1874264771116684e-06,
      "loss": 0.1233,
      "step": 7512
    },
    {
      "epoch": 0.6922190998295481,
      "grad_norm": 0.9119144408492244,
      "learning_rate": 1.1867781967610478e-06,
      "loss": 0.1286,
      "step": 7513
    },
    {
      "epoch": 0.6923112360068181,
      "grad_norm": 0.9227075633333177,
      "learning_rate": 1.186130038343214e-06,
      "loss": 0.1237,
      "step": 7514
    },
    {
      "epoch": 0.6924033721840881,
      "grad_norm": 0.9447847319047454,
      "learning_rate": 1.1854820019183467e-06,
      "loss": 0.1213,
      "step": 7515
    },
    {
      "epoch": 0.6924955083613581,
      "grad_norm": 0.9557400253206907,
      "learning_rate": 1.1848340875466176e-06,
      "loss": 0.1241,
      "step": 7516
    },
    {
      "epoch": 0.6925876445386281,
      "grad_norm": 0.9213338639911549,
      "learning_rate": 1.1841862952881845e-06,
      "loss": 0.1216,
      "step": 7517
    },
    {
      "epoch": 0.6926797807158981,
      "grad_norm": 0.929761471437544,
      "learning_rate": 1.183538625203195e-06,
      "loss": 0.1161,
      "step": 7518
    },
    {
      "epoch": 0.6927719168931681,
      "grad_norm": 0.956777668991843,
      "learning_rate": 1.182891077351787e-06,
      "loss": 0.1286,
      "step": 7519
    },
    {
      "epoch": 0.6928640530704381,
      "grad_norm": 0.9351053697619062,
      "learning_rate": 1.1822436517940844e-06,
      "loss": 0.1376,
      "step": 7520
    },
    {
      "epoch": 0.6929561892477081,
      "grad_norm": 0.9307576923675359,
      "learning_rate": 1.1815963485901994e-06,
      "loss": 0.1233,
      "step": 7521
    },
    {
      "epoch": 0.6930483254249781,
      "grad_norm": 0.9209797831879457,
      "learning_rate": 1.1809491678002356e-06,
      "loss": 0.1235,
      "step": 7522
    },
    {
      "epoch": 0.6931404616022481,
      "grad_norm": 0.8927925400652459,
      "learning_rate": 1.1803021094842831e-06,
      "loss": 0.1257,
      "step": 7523
    },
    {
      "epoch": 0.6932325977795182,
      "grad_norm": 0.8842257147429041,
      "learning_rate": 1.179655173702423e-06,
      "loss": 0.1131,
      "step": 7524
    },
    {
      "epoch": 0.6933247339567882,
      "grad_norm": 0.960345088928631,
      "learning_rate": 1.1790083605147221e-06,
      "loss": 0.1219,
      "step": 7525
    },
    {
      "epoch": 0.6934168701340582,
      "grad_norm": 0.9421499013450262,
      "learning_rate": 1.1783616699812362e-06,
      "loss": 0.1343,
      "step": 7526
    },
    {
      "epoch": 0.6935090063113282,
      "grad_norm": 0.9502278428418999,
      "learning_rate": 1.1777151021620113e-06,
      "loss": 0.1312,
      "step": 7527
    },
    {
      "epoch": 0.6936011424885982,
      "grad_norm": 1.001310947592207,
      "learning_rate": 1.1770686571170824e-06,
      "loss": 0.1333,
      "step": 7528
    },
    {
      "epoch": 0.6936932786658682,
      "grad_norm": 0.9471759067515962,
      "learning_rate": 1.17642233490647e-06,
      "loss": 0.1271,
      "step": 7529
    },
    {
      "epoch": 0.6937854148431382,
      "grad_norm": 0.8974159275769846,
      "learning_rate": 1.1757761355901875e-06,
      "loss": 0.1104,
      "step": 7530
    },
    {
      "epoch": 0.6938775510204082,
      "grad_norm": 0.9724327109166822,
      "learning_rate": 1.1751300592282325e-06,
      "loss": 0.1385,
      "step": 7531
    },
    {
      "epoch": 0.6939696871976782,
      "grad_norm": 0.9465696548880492,
      "learning_rate": 1.1744841058805947e-06,
      "loss": 0.1143,
      "step": 7532
    },
    {
      "epoch": 0.6940618233749482,
      "grad_norm": 0.979571877068151,
      "learning_rate": 1.1738382756072495e-06,
      "loss": 0.1469,
      "step": 7533
    },
    {
      "epoch": 0.6941539595522181,
      "grad_norm": 0.9206628125733626,
      "learning_rate": 1.1731925684681631e-06,
      "loss": 0.1203,
      "step": 7534
    },
    {
      "epoch": 0.6942460957294881,
      "grad_norm": 0.9375005379901026,
      "learning_rate": 1.1725469845232906e-06,
      "loss": 0.1331,
      "step": 7535
    },
    {
      "epoch": 0.6943382319067581,
      "grad_norm": 0.9411215714914509,
      "learning_rate": 1.1719015238325731e-06,
      "loss": 0.1205,
      "step": 7536
    },
    {
      "epoch": 0.6944303680840282,
      "grad_norm": 0.8819105340485545,
      "learning_rate": 1.1712561864559415e-06,
      "loss": 0.1173,
      "step": 7537
    },
    {
      "epoch": 0.6945225042612982,
      "grad_norm": 0.9772820854322495,
      "learning_rate": 1.1706109724533158e-06,
      "loss": 0.1277,
      "step": 7538
    },
    {
      "epoch": 0.6946146404385682,
      "grad_norm": 0.8949011843616254,
      "learning_rate": 1.1699658818846044e-06,
      "loss": 0.1225,
      "step": 7539
    },
    {
      "epoch": 0.6947067766158382,
      "grad_norm": 0.8911294765958074,
      "learning_rate": 1.1693209148097049e-06,
      "loss": 0.1134,
      "step": 7540
    },
    {
      "epoch": 0.6947989127931082,
      "grad_norm": 0.9990537451251741,
      "learning_rate": 1.1686760712885018e-06,
      "loss": 0.1421,
      "step": 7541
    },
    {
      "epoch": 0.6948910489703782,
      "grad_norm": 0.929425147367063,
      "learning_rate": 1.1680313513808677e-06,
      "loss": 0.1273,
      "step": 7542
    },
    {
      "epoch": 0.6949831851476482,
      "grad_norm": 0.9202734532577793,
      "learning_rate": 1.1673867551466658e-06,
      "loss": 0.1231,
      "step": 7543
    },
    {
      "epoch": 0.6950753213249182,
      "grad_norm": 0.8998933513621242,
      "learning_rate": 1.1667422826457475e-06,
      "loss": 0.1212,
      "step": 7544
    },
    {
      "epoch": 0.6951674575021882,
      "grad_norm": 0.9106168799928811,
      "learning_rate": 1.1660979339379524e-06,
      "loss": 0.13,
      "step": 7545
    },
    {
      "epoch": 0.6952595936794582,
      "grad_norm": 0.9458638903120887,
      "learning_rate": 1.1654537090831069e-06,
      "loss": 0.1245,
      "step": 7546
    },
    {
      "epoch": 0.6953517298567282,
      "grad_norm": 0.9451228064476396,
      "learning_rate": 1.164809608141029e-06,
      "loss": 0.1291,
      "step": 7547
    },
    {
      "epoch": 0.6954438660339982,
      "grad_norm": 0.9613647716535906,
      "learning_rate": 1.1641656311715218e-06,
      "loss": 0.1357,
      "step": 7548
    },
    {
      "epoch": 0.6955360022112682,
      "grad_norm": 0.890685272110586,
      "learning_rate": 1.1635217782343801e-06,
      "loss": 0.1119,
      "step": 7549
    },
    {
      "epoch": 0.6956281383885382,
      "grad_norm": 0.9401787383359983,
      "learning_rate": 1.1628780493893849e-06,
      "loss": 0.1146,
      "step": 7550
    },
    {
      "epoch": 0.6957202745658083,
      "grad_norm": 0.9264539393150472,
      "learning_rate": 1.162234444696306e-06,
      "loss": 0.1108,
      "step": 7551
    },
    {
      "epoch": 0.6958124107430783,
      "grad_norm": 0.966080326479011,
      "learning_rate": 1.1615909642149042e-06,
      "loss": 0.1218,
      "step": 7552
    },
    {
      "epoch": 0.6959045469203483,
      "grad_norm": 0.9639128496683719,
      "learning_rate": 1.1609476080049254e-06,
      "loss": 0.1365,
      "step": 7553
    },
    {
      "epoch": 0.6959966830976183,
      "grad_norm": 0.9204230071876955,
      "learning_rate": 1.1603043761261043e-06,
      "loss": 0.1189,
      "step": 7554
    },
    {
      "epoch": 0.6960888192748883,
      "grad_norm": 0.9068194418459751,
      "learning_rate": 1.159661268638166e-06,
      "loss": 0.119,
      "step": 7555
    },
    {
      "epoch": 0.6961809554521583,
      "grad_norm": 0.9406348953180761,
      "learning_rate": 1.1590182856008233e-06,
      "loss": 0.1251,
      "step": 7556
    },
    {
      "epoch": 0.6962730916294283,
      "grad_norm": 1.0019374287174063,
      "learning_rate": 1.158375427073778e-06,
      "loss": 0.1549,
      "step": 7557
    },
    {
      "epoch": 0.6963652278066983,
      "grad_norm": 0.9614692218032151,
      "learning_rate": 1.1577326931167184e-06,
      "loss": 0.1194,
      "step": 7558
    },
    {
      "epoch": 0.6964573639839683,
      "grad_norm": 0.9627814151373388,
      "learning_rate": 1.1570900837893223e-06,
      "loss": 0.1276,
      "step": 7559
    },
    {
      "epoch": 0.6965495001612383,
      "grad_norm": 0.9401742982964019,
      "learning_rate": 1.1564475991512562e-06,
      "loss": 0.1195,
      "step": 7560
    },
    {
      "epoch": 0.6966416363385083,
      "grad_norm": 0.9606928144175957,
      "learning_rate": 1.1558052392621758e-06,
      "loss": 0.1386,
      "step": 7561
    },
    {
      "epoch": 0.6967337725157783,
      "grad_norm": 0.8242620094607082,
      "learning_rate": 1.155163004181723e-06,
      "loss": 0.1104,
      "step": 7562
    },
    {
      "epoch": 0.6968259086930483,
      "grad_norm": 0.9661538812059097,
      "learning_rate": 1.1545208939695306e-06,
      "loss": 0.1233,
      "step": 7563
    },
    {
      "epoch": 0.6969180448703183,
      "grad_norm": 0.9185722917327637,
      "learning_rate": 1.1538789086852173e-06,
      "loss": 0.1172,
      "step": 7564
    },
    {
      "epoch": 0.6970101810475884,
      "grad_norm": 0.9559839661690173,
      "learning_rate": 1.1532370483883931e-06,
      "loss": 0.1308,
      "step": 7565
    },
    {
      "epoch": 0.6971023172248584,
      "grad_norm": 0.9926826971490361,
      "learning_rate": 1.152595313138653e-06,
      "loss": 0.1378,
      "step": 7566
    },
    {
      "epoch": 0.6971944534021284,
      "grad_norm": 0.8957768567257417,
      "learning_rate": 1.151953702995583e-06,
      "loss": 0.1221,
      "step": 7567
    },
    {
      "epoch": 0.6972865895793984,
      "grad_norm": 0.919822250860159,
      "learning_rate": 1.1513122180187577e-06,
      "loss": 0.127,
      "step": 7568
    },
    {
      "epoch": 0.6973787257566684,
      "grad_norm": 0.9537334807702881,
      "learning_rate": 1.150670858267738e-06,
      "loss": 0.1254,
      "step": 7569
    },
    {
      "epoch": 0.6974708619339384,
      "grad_norm": 0.9311165498976608,
      "learning_rate": 1.150029623802074e-06,
      "loss": 0.1345,
      "step": 7570
    },
    {
      "epoch": 0.6975629981112084,
      "grad_norm": 0.8904827529578065,
      "learning_rate": 1.1493885146813042e-06,
      "loss": 0.12,
      "step": 7571
    },
    {
      "epoch": 0.6976551342884784,
      "grad_norm": 0.9123497944185116,
      "learning_rate": 1.148747530964956e-06,
      "loss": 0.1288,
      "step": 7572
    },
    {
      "epoch": 0.6977472704657484,
      "grad_norm": 0.9531719885240827,
      "learning_rate": 1.1481066727125463e-06,
      "loss": 0.1291,
      "step": 7573
    },
    {
      "epoch": 0.6978394066430184,
      "grad_norm": 0.9819613269245462,
      "learning_rate": 1.1474659399835772e-06,
      "loss": 0.1309,
      "step": 7574
    },
    {
      "epoch": 0.6979315428202884,
      "grad_norm": 0.9657481867646279,
      "learning_rate": 1.1468253328375404e-06,
      "loss": 0.1156,
      "step": 7575
    },
    {
      "epoch": 0.6980236789975584,
      "grad_norm": 0.9068680219647643,
      "learning_rate": 1.1461848513339168e-06,
      "loss": 0.1199,
      "step": 7576
    },
    {
      "epoch": 0.6981158151748283,
      "grad_norm": 0.9583994048161718,
      "learning_rate": 1.145544495532176e-06,
      "loss": 0.1276,
      "step": 7577
    },
    {
      "epoch": 0.6982079513520985,
      "grad_norm": 0.9183051486498401,
      "learning_rate": 1.144904265491775e-06,
      "loss": 0.1166,
      "step": 7578
    },
    {
      "epoch": 0.6983000875293685,
      "grad_norm": 0.9558218358159374,
      "learning_rate": 1.1442641612721588e-06,
      "loss": 0.1264,
      "step": 7579
    },
    {
      "epoch": 0.6983922237066384,
      "grad_norm": 0.9610830469191616,
      "learning_rate": 1.1436241829327605e-06,
      "loss": 0.1265,
      "step": 7580
    },
    {
      "epoch": 0.6984843598839084,
      "grad_norm": 0.925704991359198,
      "learning_rate": 1.1429843305330027e-06,
      "loss": 0.1258,
      "step": 7581
    },
    {
      "epoch": 0.6985764960611784,
      "grad_norm": 0.9619080391790512,
      "learning_rate": 1.1423446041322967e-06,
      "loss": 0.1166,
      "step": 7582
    },
    {
      "epoch": 0.6986686322384484,
      "grad_norm": 0.9200858544359843,
      "learning_rate": 1.1417050037900393e-06,
      "loss": 0.1187,
      "step": 7583
    },
    {
      "epoch": 0.6987607684157184,
      "grad_norm": 0.9165477155141011,
      "learning_rate": 1.1410655295656196e-06,
      "loss": 0.1289,
      "step": 7584
    },
    {
      "epoch": 0.6988529045929884,
      "grad_norm": 0.9558792693314704,
      "learning_rate": 1.1404261815184105e-06,
      "loss": 0.1297,
      "step": 7585
    },
    {
      "epoch": 0.6989450407702584,
      "grad_norm": 0.9447699533790705,
      "learning_rate": 1.1397869597077783e-06,
      "loss": 0.1296,
      "step": 7586
    },
    {
      "epoch": 0.6990371769475284,
      "grad_norm": 0.8962723565665709,
      "learning_rate": 1.1391478641930716e-06,
      "loss": 0.1199,
      "step": 7587
    },
    {
      "epoch": 0.6991293131247984,
      "grad_norm": 0.9362449149914991,
      "learning_rate": 1.1385088950336329e-06,
      "loss": 0.1236,
      "step": 7588
    },
    {
      "epoch": 0.6992214493020684,
      "grad_norm": 0.9195942463818534,
      "learning_rate": 1.1378700522887903e-06,
      "loss": 0.1101,
      "step": 7589
    },
    {
      "epoch": 0.6993135854793384,
      "grad_norm": 0.9756437558705977,
      "learning_rate": 1.13723133601786e-06,
      "loss": 0.1366,
      "step": 7590
    },
    {
      "epoch": 0.6994057216566084,
      "grad_norm": 0.8944513848537959,
      "learning_rate": 1.136592746280146e-06,
      "loss": 0.1117,
      "step": 7591
    },
    {
      "epoch": 0.6994978578338785,
      "grad_norm": 0.9330843733670557,
      "learning_rate": 1.1359542831349422e-06,
      "loss": 0.1278,
      "step": 7592
    },
    {
      "epoch": 0.6995899940111485,
      "grad_norm": 0.8788149107087988,
      "learning_rate": 1.1353159466415298e-06,
      "loss": 0.1139,
      "step": 7593
    },
    {
      "epoch": 0.6996821301884185,
      "grad_norm": 0.9187660811222983,
      "learning_rate": 1.1346777368591797e-06,
      "loss": 0.1227,
      "step": 7594
    },
    {
      "epoch": 0.6997742663656885,
      "grad_norm": 0.9293501590683788,
      "learning_rate": 1.1340396538471488e-06,
      "loss": 0.127,
      "step": 7595
    },
    {
      "epoch": 0.6998664025429585,
      "grad_norm": 0.9122089651037086,
      "learning_rate": 1.133401697664682e-06,
      "loss": 0.1247,
      "step": 7596
    },
    {
      "epoch": 0.6999585387202285,
      "grad_norm": 0.9514888912155925,
      "learning_rate": 1.1327638683710146e-06,
      "loss": 0.1195,
      "step": 7597
    },
    {
      "epoch": 0.7000506748974985,
      "grad_norm": 0.9093223008533924,
      "learning_rate": 1.13212616602537e-06,
      "loss": 0.1128,
      "step": 7598
    },
    {
      "epoch": 0.7001428110747685,
      "grad_norm": 0.8769014325228602,
      "learning_rate": 1.1314885906869575e-06,
      "loss": 0.1092,
      "step": 7599
    },
    {
      "epoch": 0.7002349472520385,
      "grad_norm": 0.8726147241906409,
      "learning_rate": 1.1308511424149774e-06,
      "loss": 0.1093,
      "step": 7600
    },
    {
      "epoch": 0.7003270834293085,
      "grad_norm": 0.8949518559323022,
      "learning_rate": 1.1302138212686152e-06,
      "loss": 0.1192,
      "step": 7601
    },
    {
      "epoch": 0.7004192196065785,
      "grad_norm": 0.9241564642082613,
      "learning_rate": 1.1295766273070469e-06,
      "loss": 0.1218,
      "step": 7602
    },
    {
      "epoch": 0.7005113557838485,
      "grad_norm": 0.9788950224549492,
      "learning_rate": 1.1289395605894374e-06,
      "loss": 0.1283,
      "step": 7603
    },
    {
      "epoch": 0.7006034919611185,
      "grad_norm": 1.0243458990881766,
      "learning_rate": 1.1283026211749362e-06,
      "loss": 0.1368,
      "step": 7604
    },
    {
      "epoch": 0.7006956281383886,
      "grad_norm": 0.9374500966330062,
      "learning_rate": 1.127665809122685e-06,
      "loss": 0.1191,
      "step": 7605
    },
    {
      "epoch": 0.7007877643156586,
      "grad_norm": 0.9205880293432904,
      "learning_rate": 1.1270291244918106e-06,
      "loss": 0.1316,
      "step": 7606
    },
    {
      "epoch": 0.7008799004929286,
      "grad_norm": 0.9307359895923712,
      "learning_rate": 1.1263925673414303e-06,
      "loss": 0.1207,
      "step": 7607
    },
    {
      "epoch": 0.7009720366701986,
      "grad_norm": 0.9457635843783687,
      "learning_rate": 1.1257561377306471e-06,
      "loss": 0.1286,
      "step": 7608
    },
    {
      "epoch": 0.7010641728474686,
      "grad_norm": 0.9370338619779593,
      "learning_rate": 1.1251198357185547e-06,
      "loss": 0.1295,
      "step": 7609
    },
    {
      "epoch": 0.7011563090247386,
      "grad_norm": 0.9616310297957011,
      "learning_rate": 1.1244836613642342e-06,
      "loss": 0.1313,
      "step": 7610
    },
    {
      "epoch": 0.7012484452020086,
      "grad_norm": 0.8917892618492652,
      "learning_rate": 1.1238476147267537e-06,
      "loss": 0.1225,
      "step": 7611
    },
    {
      "epoch": 0.7013405813792786,
      "grad_norm": 0.9314012246164496,
      "learning_rate": 1.1232116958651695e-06,
      "loss": 0.1284,
      "step": 7612
    },
    {
      "epoch": 0.7014327175565486,
      "grad_norm": 0.9447613390064742,
      "learning_rate": 1.1225759048385276e-06,
      "loss": 0.124,
      "step": 7613
    },
    {
      "epoch": 0.7015248537338186,
      "grad_norm": 0.9139852735566912,
      "learning_rate": 1.1219402417058611e-06,
      "loss": 0.1252,
      "step": 7614
    },
    {
      "epoch": 0.7016169899110886,
      "grad_norm": 0.8944899755971756,
      "learning_rate": 1.1213047065261922e-06,
      "loss": 0.1209,
      "step": 7615
    },
    {
      "epoch": 0.7017091260883586,
      "grad_norm": 0.9037644191735968,
      "learning_rate": 1.12066929935853e-06,
      "loss": 0.1197,
      "step": 7616
    },
    {
      "epoch": 0.7018012622656286,
      "grad_norm": 0.9280483040196563,
      "learning_rate": 1.1200340202618706e-06,
      "loss": 0.1194,
      "step": 7617
    },
    {
      "epoch": 0.7018933984428986,
      "grad_norm": 0.9171715200445577,
      "learning_rate": 1.1193988692952012e-06,
      "loss": 0.1111,
      "step": 7618
    },
    {
      "epoch": 0.7019855346201687,
      "grad_norm": 0.9146803179316838,
      "learning_rate": 1.118763846517496e-06,
      "loss": 0.1183,
      "step": 7619
    },
    {
      "epoch": 0.7020776707974387,
      "grad_norm": 0.923647975551502,
      "learning_rate": 1.1181289519877156e-06,
      "loss": 0.1249,
      "step": 7620
    },
    {
      "epoch": 0.7021698069747087,
      "grad_norm": 0.9747801502638038,
      "learning_rate": 1.1174941857648105e-06,
      "loss": 0.126,
      "step": 7621
    },
    {
      "epoch": 0.7022619431519787,
      "grad_norm": 0.9391905801381869,
      "learning_rate": 1.11685954790772e-06,
      "loss": 0.1226,
      "step": 7622
    },
    {
      "epoch": 0.7023540793292486,
      "grad_norm": 0.9048641404195729,
      "learning_rate": 1.1162250384753697e-06,
      "loss": 0.1193,
      "step": 7623
    },
    {
      "epoch": 0.7024462155065186,
      "grad_norm": 0.960076295126353,
      "learning_rate": 1.1155906575266722e-06,
      "loss": 0.129,
      "step": 7624
    },
    {
      "epoch": 0.7025383516837886,
      "grad_norm": 0.9925166288127805,
      "learning_rate": 1.1149564051205314e-06,
      "loss": 0.1347,
      "step": 7625
    },
    {
      "epoch": 0.7026304878610586,
      "grad_norm": 0.9426529785588942,
      "learning_rate": 1.114322281315837e-06,
      "loss": 0.1283,
      "step": 7626
    },
    {
      "epoch": 0.7027226240383286,
      "grad_norm": 0.9194194860925674,
      "learning_rate": 1.1136882861714692e-06,
      "loss": 0.1162,
      "step": 7627
    },
    {
      "epoch": 0.7028147602155986,
      "grad_norm": 0.937081948310993,
      "learning_rate": 1.1130544197462933e-06,
      "loss": 0.1226,
      "step": 7628
    },
    {
      "epoch": 0.7029068963928686,
      "grad_norm": 0.9750960465487545,
      "learning_rate": 1.1124206820991628e-06,
      "loss": 0.1211,
      "step": 7629
    },
    {
      "epoch": 0.7029990325701386,
      "grad_norm": 0.9309418428024832,
      "learning_rate": 1.1117870732889214e-06,
      "loss": 0.1249,
      "step": 7630
    },
    {
      "epoch": 0.7030911687474086,
      "grad_norm": 0.9511119877832231,
      "learning_rate": 1.111153593374399e-06,
      "loss": 0.129,
      "step": 7631
    },
    {
      "epoch": 0.7031833049246786,
      "grad_norm": 0.9528627594483212,
      "learning_rate": 1.1105202424144165e-06,
      "loss": 0.1297,
      "step": 7632
    },
    {
      "epoch": 0.7032754411019487,
      "grad_norm": 0.9814987830163073,
      "learning_rate": 1.109887020467779e-06,
      "loss": 0.1245,
      "step": 7633
    },
    {
      "epoch": 0.7033675772792187,
      "grad_norm": 0.9242159264865467,
      "learning_rate": 1.10925392759328e-06,
      "loss": 0.1288,
      "step": 7634
    },
    {
      "epoch": 0.7034597134564887,
      "grad_norm": 0.9120110671753703,
      "learning_rate": 1.1086209638497038e-06,
      "loss": 0.1262,
      "step": 7635
    },
    {
      "epoch": 0.7035518496337587,
      "grad_norm": 0.9969698964166246,
      "learning_rate": 1.1079881292958217e-06,
      "loss": 0.1417,
      "step": 7636
    },
    {
      "epoch": 0.7036439858110287,
      "grad_norm": 0.9703225929392982,
      "learning_rate": 1.1073554239903905e-06,
      "loss": 0.1427,
      "step": 7637
    },
    {
      "epoch": 0.7037361219882987,
      "grad_norm": 0.880006758354232,
      "learning_rate": 1.106722847992159e-06,
      "loss": 0.1191,
      "step": 7638
    },
    {
      "epoch": 0.7038282581655687,
      "grad_norm": 0.9694607975674124,
      "learning_rate": 1.1060904013598604e-06,
      "loss": 0.1183,
      "step": 7639
    },
    {
      "epoch": 0.7039203943428387,
      "grad_norm": 0.9711437397169369,
      "learning_rate": 1.1054580841522188e-06,
      "loss": 0.1263,
      "step": 7640
    },
    {
      "epoch": 0.7040125305201087,
      "grad_norm": 0.8726884936753815,
      "learning_rate": 1.1048258964279432e-06,
      "loss": 0.1206,
      "step": 7641
    },
    {
      "epoch": 0.7041046666973787,
      "grad_norm": 0.9384163356051172,
      "learning_rate": 1.1041938382457332e-06,
      "loss": 0.1274,
      "step": 7642
    },
    {
      "epoch": 0.7041968028746487,
      "grad_norm": 0.8797955140060947,
      "learning_rate": 1.1035619096642766e-06,
      "loss": 0.125,
      "step": 7643
    },
    {
      "epoch": 0.7042889390519187,
      "grad_norm": 0.9592333120315895,
      "learning_rate": 1.102930110742247e-06,
      "loss": 0.1305,
      "step": 7644
    },
    {
      "epoch": 0.7043810752291887,
      "grad_norm": 0.9883696119753728,
      "learning_rate": 1.102298441538306e-06,
      "loss": 0.1262,
      "step": 7645
    },
    {
      "epoch": 0.7044732114064588,
      "grad_norm": 0.9316484289506399,
      "learning_rate": 1.101666902111105e-06,
      "loss": 0.1188,
      "step": 7646
    },
    {
      "epoch": 0.7045653475837288,
      "grad_norm": 0.9159885584557372,
      "learning_rate": 1.1010354925192826e-06,
      "loss": 0.1153,
      "step": 7647
    },
    {
      "epoch": 0.7046574837609988,
      "grad_norm": 0.971625755678242,
      "learning_rate": 1.1004042128214664e-06,
      "loss": 0.1325,
      "step": 7648
    },
    {
      "epoch": 0.7047496199382688,
      "grad_norm": 0.9208399755488353,
      "learning_rate": 1.0997730630762697e-06,
      "loss": 0.1183,
      "step": 7649
    },
    {
      "epoch": 0.7048417561155388,
      "grad_norm": 0.9003396206133496,
      "learning_rate": 1.0991420433422936e-06,
      "loss": 0.1078,
      "step": 7650
    },
    {
      "epoch": 0.7049338922928088,
      "grad_norm": 0.9159612364008332,
      "learning_rate": 1.0985111536781298e-06,
      "loss": 0.1178,
      "step": 7651
    },
    {
      "epoch": 0.7050260284700788,
      "grad_norm": 0.9660724779459582,
      "learning_rate": 1.0978803941423572e-06,
      "loss": 0.129,
      "step": 7652
    },
    {
      "epoch": 0.7051181646473488,
      "grad_norm": 0.9196254695327626,
      "learning_rate": 1.0972497647935396e-06,
      "loss": 0.122,
      "step": 7653
    },
    {
      "epoch": 0.7052103008246188,
      "grad_norm": 0.9473601084641282,
      "learning_rate": 1.0966192656902335e-06,
      "loss": 0.1081,
      "step": 7654
    },
    {
      "epoch": 0.7053024370018888,
      "grad_norm": 1.0074305480369767,
      "learning_rate": 1.0959888968909784e-06,
      "loss": 0.1314,
      "step": 7655
    },
    {
      "epoch": 0.7053945731791588,
      "grad_norm": 0.9923751289787686,
      "learning_rate": 1.0953586584543066e-06,
      "loss": 0.1221,
      "step": 7656
    },
    {
      "epoch": 0.7054867093564288,
      "grad_norm": 0.8843998573581785,
      "learning_rate": 1.0947285504387337e-06,
      "loss": 0.1217,
      "step": 7657
    },
    {
      "epoch": 0.7055788455336988,
      "grad_norm": 0.8849026382738153,
      "learning_rate": 1.094098572902766e-06,
      "loss": 0.114,
      "step": 7658
    },
    {
      "epoch": 0.7056709817109688,
      "grad_norm": 0.9500578248983311,
      "learning_rate": 1.0934687259048975e-06,
      "loss": 0.1178,
      "step": 7659
    },
    {
      "epoch": 0.7057631178882389,
      "grad_norm": 0.9258739494949468,
      "learning_rate": 1.092839009503609e-06,
      "loss": 0.1229,
      "step": 7660
    },
    {
      "epoch": 0.7058552540655089,
      "grad_norm": 0.9621025980170279,
      "learning_rate": 1.0922094237573706e-06,
      "loss": 0.1261,
      "step": 7661
    },
    {
      "epoch": 0.7059473902427789,
      "grad_norm": 0.9641510931839957,
      "learning_rate": 1.0915799687246376e-06,
      "loss": 0.1221,
      "step": 7662
    },
    {
      "epoch": 0.7060395264200489,
      "grad_norm": 0.9571666499830939,
      "learning_rate": 1.0909506444638563e-06,
      "loss": 0.1247,
      "step": 7663
    },
    {
      "epoch": 0.7061316625973189,
      "grad_norm": 0.9417941506505101,
      "learning_rate": 1.09032145103346e-06,
      "loss": 0.1145,
      "step": 7664
    },
    {
      "epoch": 0.7062237987745889,
      "grad_norm": 0.8922995138858749,
      "learning_rate": 1.0896923884918687e-06,
      "loss": 0.1172,
      "step": 7665
    },
    {
      "epoch": 0.7063159349518588,
      "grad_norm": 0.9455943222219495,
      "learning_rate": 1.0890634568974901e-06,
      "loss": 0.1307,
      "step": 7666
    },
    {
      "epoch": 0.7064080711291288,
      "grad_norm": 0.8963449786751144,
      "learning_rate": 1.0884346563087214e-06,
      "loss": 0.1148,
      "step": 7667
    },
    {
      "epoch": 0.7065002073063988,
      "grad_norm": 0.9238178351632784,
      "learning_rate": 1.0878059867839469e-06,
      "loss": 0.1219,
      "step": 7668
    },
    {
      "epoch": 0.7065923434836688,
      "grad_norm": 0.9717273503839575,
      "learning_rate": 1.0871774483815393e-06,
      "loss": 0.124,
      "step": 7669
    },
    {
      "epoch": 0.7066844796609388,
      "grad_norm": 0.9020615180773476,
      "learning_rate": 1.0865490411598576e-06,
      "loss": 0.1226,
      "step": 7670
    },
    {
      "epoch": 0.7067766158382088,
      "grad_norm": 0.9052631909123191,
      "learning_rate": 1.0859207651772485e-06,
      "loss": 0.1221,
      "step": 7671
    },
    {
      "epoch": 0.7068687520154788,
      "grad_norm": 0.9363740585233611,
      "learning_rate": 1.0852926204920488e-06,
      "loss": 0.1254,
      "step": 7672
    },
    {
      "epoch": 0.7069608881927489,
      "grad_norm": 0.942073473131626,
      "learning_rate": 1.084664607162582e-06,
      "loss": 0.1197,
      "step": 7673
    },
    {
      "epoch": 0.7070530243700189,
      "grad_norm": 0.9533912073107669,
      "learning_rate": 1.0840367252471583e-06,
      "loss": 0.123,
      "step": 7674
    },
    {
      "epoch": 0.7071451605472889,
      "grad_norm": 1.0371830317376796,
      "learning_rate": 1.083408974804078e-06,
      "loss": 0.1408,
      "step": 7675
    },
    {
      "epoch": 0.7072372967245589,
      "grad_norm": 0.9234521408562314,
      "learning_rate": 1.082781355891626e-06,
      "loss": 0.1181,
      "step": 7676
    },
    {
      "epoch": 0.7073294329018289,
      "grad_norm": 0.904399098255252,
      "learning_rate": 1.0821538685680783e-06,
      "loss": 0.1183,
      "step": 7677
    },
    {
      "epoch": 0.7074215690790989,
      "grad_norm": 0.9279812511551865,
      "learning_rate": 1.0815265128916955e-06,
      "loss": 0.127,
      "step": 7678
    },
    {
      "epoch": 0.7075137052563689,
      "grad_norm": 0.8684164999023688,
      "learning_rate": 1.0808992889207287e-06,
      "loss": 0.1134,
      "step": 7679
    },
    {
      "epoch": 0.7076058414336389,
      "grad_norm": 0.9084082832703506,
      "learning_rate": 1.0802721967134167e-06,
      "loss": 0.1234,
      "step": 7680
    },
    {
      "epoch": 0.7076979776109089,
      "grad_norm": 0.9542334556040545,
      "learning_rate": 1.0796452363279838e-06,
      "loss": 0.125,
      "step": 7681
    },
    {
      "epoch": 0.7077901137881789,
      "grad_norm": 0.9552003098411618,
      "learning_rate": 1.079018407822643e-06,
      "loss": 0.1178,
      "step": 7682
    },
    {
      "epoch": 0.7078822499654489,
      "grad_norm": 0.9786307431398847,
      "learning_rate": 1.0783917112555956e-06,
      "loss": 0.1213,
      "step": 7683
    },
    {
      "epoch": 0.7079743861427189,
      "grad_norm": 0.9443212053638332,
      "learning_rate": 1.0777651466850308e-06,
      "loss": 0.123,
      "step": 7684
    },
    {
      "epoch": 0.7080665223199889,
      "grad_norm": 0.906375176394638,
      "learning_rate": 1.0771387141691265e-06,
      "loss": 0.1239,
      "step": 7685
    },
    {
      "epoch": 0.7081586584972589,
      "grad_norm": 0.9974511076859406,
      "learning_rate": 1.0765124137660454e-06,
      "loss": 0.1199,
      "step": 7686
    },
    {
      "epoch": 0.708250794674529,
      "grad_norm": 0.9252319719364072,
      "learning_rate": 1.075886245533939e-06,
      "loss": 0.1322,
      "step": 7687
    },
    {
      "epoch": 0.708342930851799,
      "grad_norm": 0.9953283487894312,
      "learning_rate": 1.075260209530948e-06,
      "loss": 0.1346,
      "step": 7688
    },
    {
      "epoch": 0.708435067029069,
      "grad_norm": 0.9675526374551723,
      "learning_rate": 1.0746343058151998e-06,
      "loss": 0.1359,
      "step": 7689
    },
    {
      "epoch": 0.708527203206339,
      "grad_norm": 0.9600303445058818,
      "learning_rate": 1.074008534444811e-06,
      "loss": 0.1341,
      "step": 7690
    },
    {
      "epoch": 0.708619339383609,
      "grad_norm": 0.8930072632299815,
      "learning_rate": 1.0733828954778827e-06,
      "loss": 0.1127,
      "step": 7691
    },
    {
      "epoch": 0.708711475560879,
      "grad_norm": 0.9679509417395482,
      "learning_rate": 1.0727573889725053e-06,
      "loss": 0.125,
      "step": 7692
    },
    {
      "epoch": 0.708803611738149,
      "grad_norm": 0.8541878797863817,
      "learning_rate": 1.0721320149867582e-06,
      "loss": 0.1176,
      "step": 7693
    },
    {
      "epoch": 0.708895747915419,
      "grad_norm": 0.9332963996090269,
      "learning_rate": 1.0715067735787079e-06,
      "loss": 0.1304,
      "step": 7694
    },
    {
      "epoch": 0.708987884092689,
      "grad_norm": 0.9552468087190065,
      "learning_rate": 1.0708816648064067e-06,
      "loss": 0.1362,
      "step": 7695
    },
    {
      "epoch": 0.709080020269959,
      "grad_norm": 0.9699586270892624,
      "learning_rate": 1.0702566887278975e-06,
      "loss": 0.1363,
      "step": 7696
    },
    {
      "epoch": 0.709172156447229,
      "grad_norm": 0.9025744016802671,
      "learning_rate": 1.0696318454012074e-06,
      "loss": 0.1194,
      "step": 7697
    },
    {
      "epoch": 0.709264292624499,
      "grad_norm": 0.9032683279099893,
      "learning_rate": 1.0690071348843559e-06,
      "loss": 0.1263,
      "step": 7698
    },
    {
      "epoch": 0.709356428801769,
      "grad_norm": 0.9876043723097531,
      "learning_rate": 1.0683825572353447e-06,
      "loss": 0.1268,
      "step": 7699
    },
    {
      "epoch": 0.709448564979039,
      "grad_norm": 0.9741498985940272,
      "learning_rate": 1.0677581125121672e-06,
      "loss": 0.1292,
      "step": 7700
    },
    {
      "epoch": 0.7095407011563091,
      "grad_norm": 0.9413448494069492,
      "learning_rate": 1.067133800772803e-06,
      "loss": 0.1328,
      "step": 7701
    },
    {
      "epoch": 0.7096328373335791,
      "grad_norm": 0.9214270401858093,
      "learning_rate": 1.0665096220752214e-06,
      "loss": 0.1256,
      "step": 7702
    },
    {
      "epoch": 0.7097249735108491,
      "grad_norm": 0.9671108647251513,
      "learning_rate": 1.065885576477374e-06,
      "loss": 0.125,
      "step": 7703
    },
    {
      "epoch": 0.7098171096881191,
      "grad_norm": 0.8983338985867325,
      "learning_rate": 1.0652616640372051e-06,
      "loss": 0.1227,
      "step": 7704
    },
    {
      "epoch": 0.7099092458653891,
      "grad_norm": 0.9167380926561612,
      "learning_rate": 1.064637884812645e-06,
      "loss": 0.1225,
      "step": 7705
    },
    {
      "epoch": 0.7100013820426591,
      "grad_norm": 0.9416855433977324,
      "learning_rate": 1.0640142388616128e-06,
      "loss": 0.1216,
      "step": 7706
    },
    {
      "epoch": 0.710093518219929,
      "grad_norm": 0.9558540627602284,
      "learning_rate": 1.063390726242012e-06,
      "loss": 0.1171,
      "step": 7707
    },
    {
      "epoch": 0.710185654397199,
      "grad_norm": 0.9049250133502065,
      "learning_rate": 1.062767347011738e-06,
      "loss": 0.1179,
      "step": 7708
    },
    {
      "epoch": 0.710277790574469,
      "grad_norm": 0.9117542944386543,
      "learning_rate": 1.0621441012286696e-06,
      "loss": 0.1249,
      "step": 7709
    },
    {
      "epoch": 0.710369926751739,
      "grad_norm": 0.9522183662298654,
      "learning_rate": 1.061520988950677e-06,
      "loss": 0.1297,
      "step": 7710
    },
    {
      "epoch": 0.710462062929009,
      "grad_norm": 0.9050768681888326,
      "learning_rate": 1.0608980102356146e-06,
      "loss": 0.1155,
      "step": 7711
    },
    {
      "epoch": 0.710554199106279,
      "grad_norm": 0.9802646477753757,
      "learning_rate": 1.0602751651413264e-06,
      "loss": 0.1335,
      "step": 7712
    },
    {
      "epoch": 0.710646335283549,
      "grad_norm": 0.9303028811692908,
      "learning_rate": 1.0596524537256453e-06,
      "loss": 0.1095,
      "step": 7713
    },
    {
      "epoch": 0.7107384714608191,
      "grad_norm": 0.9190098625800486,
      "learning_rate": 1.0590298760463879e-06,
      "loss": 0.1199,
      "step": 7714
    },
    {
      "epoch": 0.7108306076380891,
      "grad_norm": 0.94415621243913,
      "learning_rate": 1.0584074321613625e-06,
      "loss": 0.1242,
      "step": 7715
    },
    {
      "epoch": 0.7109227438153591,
      "grad_norm": 0.9319898998438171,
      "learning_rate": 1.0577851221283614e-06,
      "loss": 0.1165,
      "step": 7716
    },
    {
      "epoch": 0.7110148799926291,
      "grad_norm": 0.958963020372258,
      "learning_rate": 1.0571629460051665e-06,
      "loss": 0.1329,
      "step": 7717
    },
    {
      "epoch": 0.7111070161698991,
      "grad_norm": 0.992606267592599,
      "learning_rate": 1.0565409038495486e-06,
      "loss": 0.1371,
      "step": 7718
    },
    {
      "epoch": 0.7111991523471691,
      "grad_norm": 0.9600979422255448,
      "learning_rate": 1.055918995719263e-06,
      "loss": 0.1222,
      "step": 7719
    },
    {
      "epoch": 0.7112912885244391,
      "grad_norm": 0.922753779521535,
      "learning_rate": 1.0552972216720534e-06,
      "loss": 0.121,
      "step": 7720
    },
    {
      "epoch": 0.7113834247017091,
      "grad_norm": 0.9470964491509773,
      "learning_rate": 1.054675581765652e-06,
      "loss": 0.1318,
      "step": 7721
    },
    {
      "epoch": 0.7114755608789791,
      "grad_norm": 0.9243871817173555,
      "learning_rate": 1.0540540760577785e-06,
      "loss": 0.1285,
      "step": 7722
    },
    {
      "epoch": 0.7115676970562491,
      "grad_norm": 0.9789267036795136,
      "learning_rate": 1.0534327046061404e-06,
      "loss": 0.1306,
      "step": 7723
    },
    {
      "epoch": 0.7116598332335191,
      "grad_norm": 0.9684236064786368,
      "learning_rate": 1.0528114674684318e-06,
      "loss": 0.1198,
      "step": 7724
    },
    {
      "epoch": 0.7117519694107891,
      "grad_norm": 0.9082055129941168,
      "learning_rate": 1.0521903647023327e-06,
      "loss": 0.126,
      "step": 7725
    },
    {
      "epoch": 0.7118441055880591,
      "grad_norm": 0.8603222142268356,
      "learning_rate": 1.0515693963655144e-06,
      "loss": 0.1087,
      "step": 7726
    },
    {
      "epoch": 0.7119362417653291,
      "grad_norm": 0.907707522654929,
      "learning_rate": 1.0509485625156342e-06,
      "loss": 0.1198,
      "step": 7727
    },
    {
      "epoch": 0.7120283779425992,
      "grad_norm": 0.9058331237453139,
      "learning_rate": 1.0503278632103353e-06,
      "loss": 0.1204,
      "step": 7728
    },
    {
      "epoch": 0.7121205141198692,
      "grad_norm": 0.9647120715897719,
      "learning_rate": 1.0497072985072509e-06,
      "loss": 0.138,
      "step": 7729
    },
    {
      "epoch": 0.7122126502971392,
      "grad_norm": 0.9429718054470461,
      "learning_rate": 1.0490868684639994e-06,
      "loss": 0.1377,
      "step": 7730
    },
    {
      "epoch": 0.7123047864744092,
      "grad_norm": 1.019549767972903,
      "learning_rate": 1.0484665731381892e-06,
      "loss": 0.1275,
      "step": 7731
    },
    {
      "epoch": 0.7123969226516792,
      "grad_norm": 0.8914456137526924,
      "learning_rate": 1.0478464125874126e-06,
      "loss": 0.1211,
      "step": 7732
    },
    {
      "epoch": 0.7124890588289492,
      "grad_norm": 0.8732468987409082,
      "learning_rate": 1.047226386869253e-06,
      "loss": 0.1144,
      "step": 7733
    },
    {
      "epoch": 0.7125811950062192,
      "grad_norm": 0.9361439907322878,
      "learning_rate": 1.046606496041281e-06,
      "loss": 0.1236,
      "step": 7734
    },
    {
      "epoch": 0.7126733311834892,
      "grad_norm": 0.8808363617293428,
      "learning_rate": 1.0459867401610519e-06,
      "loss": 0.1174,
      "step": 7735
    },
    {
      "epoch": 0.7127654673607592,
      "grad_norm": 0.9359884244207574,
      "learning_rate": 1.0453671192861095e-06,
      "loss": 0.1251,
      "step": 7736
    },
    {
      "epoch": 0.7128576035380292,
      "grad_norm": 0.9779404856607452,
      "learning_rate": 1.0447476334739867e-06,
      "loss": 0.1321,
      "step": 7737
    },
    {
      "epoch": 0.7129497397152992,
      "grad_norm": 0.9287380972756585,
      "learning_rate": 1.0441282827822027e-06,
      "loss": 0.1257,
      "step": 7738
    },
    {
      "epoch": 0.7130418758925692,
      "grad_norm": 0.9621747448313562,
      "learning_rate": 1.0435090672682655e-06,
      "loss": 0.1311,
      "step": 7739
    },
    {
      "epoch": 0.7131340120698392,
      "grad_norm": 0.9258715491762729,
      "learning_rate": 1.042889986989668e-06,
      "loss": 0.1309,
      "step": 7740
    },
    {
      "epoch": 0.7132261482471093,
      "grad_norm": 0.9039074847686032,
      "learning_rate": 1.0422710420038912e-06,
      "loss": 0.1259,
      "step": 7741
    },
    {
      "epoch": 0.7133182844243793,
      "grad_norm": 0.8822465315547272,
      "learning_rate": 1.0416522323684048e-06,
      "loss": 0.1116,
      "step": 7742
    },
    {
      "epoch": 0.7134104206016493,
      "grad_norm": 0.952450362988351,
      "learning_rate": 1.0410335581406657e-06,
      "loss": 0.1275,
      "step": 7743
    },
    {
      "epoch": 0.7135025567789193,
      "grad_norm": 0.9446338466948969,
      "learning_rate": 1.0404150193781187e-06,
      "loss": 0.1285,
      "step": 7744
    },
    {
      "epoch": 0.7135946929561893,
      "grad_norm": 0.886851570227021,
      "learning_rate": 1.0397966161381943e-06,
      "loss": 0.1163,
      "step": 7745
    },
    {
      "epoch": 0.7136868291334593,
      "grad_norm": 0.9493597158807928,
      "learning_rate": 1.03917834847831e-06,
      "loss": 0.1261,
      "step": 7746
    },
    {
      "epoch": 0.7137789653107293,
      "grad_norm": 0.872603891886761,
      "learning_rate": 1.0385602164558735e-06,
      "loss": 0.109,
      "step": 7747
    },
    {
      "epoch": 0.7138711014879993,
      "grad_norm": 0.9153516353588165,
      "learning_rate": 1.037942220128279e-06,
      "loss": 0.1214,
      "step": 7748
    },
    {
      "epoch": 0.7139632376652693,
      "grad_norm": 0.918045785934361,
      "learning_rate": 1.0373243595529058e-06,
      "loss": 0.1218,
      "step": 7749
    },
    {
      "epoch": 0.7140553738425393,
      "grad_norm": 0.9248276605451051,
      "learning_rate": 1.0367066347871243e-06,
      "loss": 0.118,
      "step": 7750
    },
    {
      "epoch": 0.7141475100198093,
      "grad_norm": 0.9388758721031797,
      "learning_rate": 1.0360890458882882e-06,
      "loss": 0.1277,
      "step": 7751
    },
    {
      "epoch": 0.7142396461970792,
      "grad_norm": 0.8690529484848437,
      "learning_rate": 1.0354715929137429e-06,
      "loss": 0.1118,
      "step": 7752
    },
    {
      "epoch": 0.7143317823743492,
      "grad_norm": 0.8640578234777775,
      "learning_rate": 1.0348542759208166e-06,
      "loss": 0.1122,
      "step": 7753
    },
    {
      "epoch": 0.7144239185516192,
      "grad_norm": 0.9843435962019589,
      "learning_rate": 1.0342370949668287e-06,
      "loss": 0.1282,
      "step": 7754
    },
    {
      "epoch": 0.7145160547288893,
      "grad_norm": 0.8969391758762647,
      "learning_rate": 1.0336200501090848e-06,
      "loss": 0.118,
      "step": 7755
    },
    {
      "epoch": 0.7146081909061593,
      "grad_norm": 0.9532247647644668,
      "learning_rate": 1.0330031414048775e-06,
      "loss": 0.1266,
      "step": 7756
    },
    {
      "epoch": 0.7147003270834293,
      "grad_norm": 0.9088908017504251,
      "learning_rate": 1.0323863689114851e-06,
      "loss": 0.1212,
      "step": 7757
    },
    {
      "epoch": 0.7147924632606993,
      "grad_norm": 0.906425226852019,
      "learning_rate": 1.0317697326861766e-06,
      "loss": 0.1239,
      "step": 7758
    },
    {
      "epoch": 0.7148845994379693,
      "grad_norm": 0.9227457737134785,
      "learning_rate": 1.0311532327862064e-06,
      "loss": 0.1219,
      "step": 7759
    },
    {
      "epoch": 0.7149767356152393,
      "grad_norm": 0.9987736939867745,
      "learning_rate": 1.0305368692688175e-06,
      "loss": 0.1372,
      "step": 7760
    },
    {
      "epoch": 0.7150688717925093,
      "grad_norm": 0.9714466510612396,
      "learning_rate": 1.0299206421912382e-06,
      "loss": 0.1201,
      "step": 7761
    },
    {
      "epoch": 0.7151610079697793,
      "grad_norm": 0.8732980566662378,
      "learning_rate": 1.0293045516106848e-06,
      "loss": 0.1085,
      "step": 7762
    },
    {
      "epoch": 0.7152531441470493,
      "grad_norm": 0.9074801485331541,
      "learning_rate": 1.0286885975843621e-06,
      "loss": 0.1145,
      "step": 7763
    },
    {
      "epoch": 0.7153452803243193,
      "grad_norm": 0.9109621722743166,
      "learning_rate": 1.0280727801694624e-06,
      "loss": 0.1216,
      "step": 7764
    },
    {
      "epoch": 0.7154374165015893,
      "grad_norm": 0.922807759731232,
      "learning_rate": 1.0274570994231622e-06,
      "loss": 0.1254,
      "step": 7765
    },
    {
      "epoch": 0.7155295526788593,
      "grad_norm": 0.9349066676402771,
      "learning_rate": 1.02684155540263e-06,
      "loss": 0.1273,
      "step": 7766
    },
    {
      "epoch": 0.7156216888561293,
      "grad_norm": 0.8751564876537657,
      "learning_rate": 1.026226148165017e-06,
      "loss": 0.1203,
      "step": 7767
    },
    {
      "epoch": 0.7157138250333993,
      "grad_norm": 0.917719645814074,
      "learning_rate": 1.0256108777674656e-06,
      "loss": 0.1198,
      "step": 7768
    },
    {
      "epoch": 0.7158059612106694,
      "grad_norm": 0.8858931449744197,
      "learning_rate": 1.024995744267102e-06,
      "loss": 0.1157,
      "step": 7769
    },
    {
      "epoch": 0.7158980973879394,
      "grad_norm": 0.8912225251790697,
      "learning_rate": 1.0243807477210423e-06,
      "loss": 0.13,
      "step": 7770
    },
    {
      "epoch": 0.7159902335652094,
      "grad_norm": 0.8764687308266974,
      "learning_rate": 1.0237658881863898e-06,
      "loss": 0.1114,
      "step": 7771
    },
    {
      "epoch": 0.7160823697424794,
      "grad_norm": 0.9088978751333338,
      "learning_rate": 1.0231511657202327e-06,
      "loss": 0.1203,
      "step": 7772
    },
    {
      "epoch": 0.7161745059197494,
      "grad_norm": 0.9229428912327712,
      "learning_rate": 1.0225365803796498e-06,
      "loss": 0.1236,
      "step": 7773
    },
    {
      "epoch": 0.7162666420970194,
      "grad_norm": 0.8660219223718931,
      "learning_rate": 1.0219221322217032e-06,
      "loss": 0.1101,
      "step": 7774
    },
    {
      "epoch": 0.7163587782742894,
      "grad_norm": 0.9744313899778617,
      "learning_rate": 1.0213078213034457e-06,
      "loss": 0.1358,
      "step": 7775
    },
    {
      "epoch": 0.7164509144515594,
      "grad_norm": 0.9000387318873999,
      "learning_rate": 1.0206936476819165e-06,
      "loss": 0.1147,
      "step": 7776
    },
    {
      "epoch": 0.7165430506288294,
      "grad_norm": 0.9151921247508992,
      "learning_rate": 1.0200796114141428e-06,
      "loss": 0.1173,
      "step": 7777
    },
    {
      "epoch": 0.7166351868060994,
      "grad_norm": 0.9873702603847861,
      "learning_rate": 1.0194657125571347e-06,
      "loss": 0.1267,
      "step": 7778
    },
    {
      "epoch": 0.7167273229833694,
      "grad_norm": 0.8744889522529582,
      "learning_rate": 1.0188519511678946e-06,
      "loss": 0.1134,
      "step": 7779
    },
    {
      "epoch": 0.7168194591606394,
      "grad_norm": 0.9000947869027155,
      "learning_rate": 1.0182383273034102e-06,
      "loss": 0.1138,
      "step": 7780
    },
    {
      "epoch": 0.7169115953379094,
      "grad_norm": 0.9534692999739216,
      "learning_rate": 1.0176248410206577e-06,
      "loss": 0.131,
      "step": 7781
    },
    {
      "epoch": 0.7170037315151795,
      "grad_norm": 0.9343304549611972,
      "learning_rate": 1.017011492376597e-06,
      "loss": 0.1226,
      "step": 7782
    },
    {
      "epoch": 0.7170958676924495,
      "grad_norm": 0.9079633458736804,
      "learning_rate": 1.0163982814281797e-06,
      "loss": 0.1081,
      "step": 7783
    },
    {
      "epoch": 0.7171880038697195,
      "grad_norm": 0.945362100863493,
      "learning_rate": 1.0157852082323411e-06,
      "loss": 0.117,
      "step": 7784
    },
    {
      "epoch": 0.7172801400469895,
      "grad_norm": 0.9305854058984073,
      "learning_rate": 1.0151722728460064e-06,
      "loss": 0.1232,
      "step": 7785
    },
    {
      "epoch": 0.7173722762242595,
      "grad_norm": 0.9193933839229599,
      "learning_rate": 1.0145594753260849e-06,
      "loss": 0.1162,
      "step": 7786
    },
    {
      "epoch": 0.7174644124015295,
      "grad_norm": 0.9311548944071399,
      "learning_rate": 1.0139468157294762e-06,
      "loss": 0.1218,
      "step": 7787
    },
    {
      "epoch": 0.7175565485787995,
      "grad_norm": 0.9143005687332517,
      "learning_rate": 1.0133342941130664e-06,
      "loss": 0.1112,
      "step": 7788
    },
    {
      "epoch": 0.7176486847560695,
      "grad_norm": 0.9207452007285798,
      "learning_rate": 1.0127219105337274e-06,
      "loss": 0.1259,
      "step": 7789
    },
    {
      "epoch": 0.7177408209333395,
      "grad_norm": 0.9215304391889503,
      "learning_rate": 1.0121096650483182e-06,
      "loss": 0.1169,
      "step": 7790
    },
    {
      "epoch": 0.7178329571106095,
      "grad_norm": 0.8909392126110938,
      "learning_rate": 1.0114975577136866e-06,
      "loss": 0.1194,
      "step": 7791
    },
    {
      "epoch": 0.7179250932878795,
      "grad_norm": 0.9693442968946673,
      "learning_rate": 1.010885588586667e-06,
      "loss": 0.1277,
      "step": 7792
    },
    {
      "epoch": 0.7180172294651495,
      "grad_norm": 0.9316443174949289,
      "learning_rate": 1.0102737577240818e-06,
      "loss": 0.1202,
      "step": 7793
    },
    {
      "epoch": 0.7181093656424195,
      "grad_norm": 0.9187961844110014,
      "learning_rate": 1.0096620651827382e-06,
      "loss": 0.1214,
      "step": 7794
    },
    {
      "epoch": 0.7182015018196894,
      "grad_norm": 0.8998126418946526,
      "learning_rate": 1.0090505110194315e-06,
      "loss": 0.1128,
      "step": 7795
    },
    {
      "epoch": 0.7182936379969596,
      "grad_norm": 0.9585336588680275,
      "learning_rate": 1.0084390952909456e-06,
      "loss": 0.1269,
      "step": 7796
    },
    {
      "epoch": 0.7183857741742296,
      "grad_norm": 0.9211680803712629,
      "learning_rate": 1.0078278180540507e-06,
      "loss": 0.1154,
      "step": 7797
    },
    {
      "epoch": 0.7184779103514995,
      "grad_norm": 0.8997122910133363,
      "learning_rate": 1.0072166793655027e-06,
      "loss": 0.1106,
      "step": 7798
    },
    {
      "epoch": 0.7185700465287695,
      "grad_norm": 0.9414360029244899,
      "learning_rate": 1.0066056792820478e-06,
      "loss": 0.1301,
      "step": 7799
    },
    {
      "epoch": 0.7186621827060395,
      "grad_norm": 0.8439284973205966,
      "learning_rate": 1.0059948178604154e-06,
      "loss": 0.1053,
      "step": 7800
    },
    {
      "epoch": 0.7187543188833095,
      "grad_norm": 0.8981596749183159,
      "learning_rate": 1.0053840951573247e-06,
      "loss": 0.1349,
      "step": 7801
    },
    {
      "epoch": 0.7188464550605795,
      "grad_norm": 0.8912489563990994,
      "learning_rate": 1.0047735112294827e-06,
      "loss": 0.1157,
      "step": 7802
    },
    {
      "epoch": 0.7189385912378495,
      "grad_norm": 0.9025423474052598,
      "learning_rate": 1.00416306613358e-06,
      "loss": 0.1197,
      "step": 7803
    },
    {
      "epoch": 0.7190307274151195,
      "grad_norm": 0.9318053328867276,
      "learning_rate": 1.0035527599262988e-06,
      "loss": 0.1341,
      "step": 7804
    },
    {
      "epoch": 0.7191228635923895,
      "grad_norm": 0.9344718743643173,
      "learning_rate": 1.0029425926643035e-06,
      "loss": 0.1309,
      "step": 7805
    },
    {
      "epoch": 0.7192149997696595,
      "grad_norm": 0.9345501597826967,
      "learning_rate": 1.0023325644042508e-06,
      "loss": 0.1228,
      "step": 7806
    },
    {
      "epoch": 0.7193071359469295,
      "grad_norm": 0.8698344847523616,
      "learning_rate": 1.0017226752027798e-06,
      "loss": 0.1173,
      "step": 7807
    },
    {
      "epoch": 0.7193992721241995,
      "grad_norm": 0.8908033217302351,
      "learning_rate": 1.0011129251165198e-06,
      "loss": 0.1264,
      "step": 7808
    },
    {
      "epoch": 0.7194914083014696,
      "grad_norm": 0.8889868667942962,
      "learning_rate": 1.0005033142020868e-06,
      "loss": 0.1195,
      "step": 7809
    },
    {
      "epoch": 0.7195835444787396,
      "grad_norm": 0.8608771688254551,
      "learning_rate": 9.998938425160822e-07,
      "loss": 0.1127,
      "step": 7810
    },
    {
      "epoch": 0.7196756806560096,
      "grad_norm": 0.8824505031638588,
      "learning_rate": 9.992845101150949e-07,
      "loss": 0.1177,
      "step": 7811
    },
    {
      "epoch": 0.7197678168332796,
      "grad_norm": 0.9015088142832688,
      "learning_rate": 9.986753170557026e-07,
      "loss": 0.1272,
      "step": 7812
    },
    {
      "epoch": 0.7198599530105496,
      "grad_norm": 0.9486135831290708,
      "learning_rate": 9.980662633944687e-07,
      "loss": 0.1306,
      "step": 7813
    },
    {
      "epoch": 0.7199520891878196,
      "grad_norm": 0.9331244271896649,
      "learning_rate": 9.974573491879447e-07,
      "loss": 0.1187,
      "step": 7814
    },
    {
      "epoch": 0.7200442253650896,
      "grad_norm": 1.0721474757596579,
      "learning_rate": 9.968485744926673e-07,
      "loss": 0.1422,
      "step": 7815
    },
    {
      "epoch": 0.7201363615423596,
      "grad_norm": 0.9312764151210932,
      "learning_rate": 9.962399393651608e-07,
      "loss": 0.1283,
      "step": 7816
    },
    {
      "epoch": 0.7202284977196296,
      "grad_norm": 0.9758367075704081,
      "learning_rate": 9.95631443861938e-07,
      "loss": 0.1113,
      "step": 7817
    },
    {
      "epoch": 0.7203206338968996,
      "grad_norm": 0.9924718172406113,
      "learning_rate": 9.95023088039498e-07,
      "loss": 0.1239,
      "step": 7818
    },
    {
      "epoch": 0.7204127700741696,
      "grad_norm": 0.8883754010136571,
      "learning_rate": 9.94414871954326e-07,
      "loss": 0.1211,
      "step": 7819
    },
    {
      "epoch": 0.7205049062514396,
      "grad_norm": 0.9535928744680094,
      "learning_rate": 9.938067956628955e-07,
      "loss": 0.1275,
      "step": 7820
    },
    {
      "epoch": 0.7205970424287096,
      "grad_norm": 0.9243576442784894,
      "learning_rate": 9.931988592216654e-07,
      "loss": 0.1234,
      "step": 7821
    },
    {
      "epoch": 0.7206891786059796,
      "grad_norm": 0.8921816864655784,
      "learning_rate": 9.925910626870841e-07,
      "loss": 0.1247,
      "step": 7822
    },
    {
      "epoch": 0.7207813147832497,
      "grad_norm": 1.0087296503855505,
      "learning_rate": 9.919834061155841e-07,
      "loss": 0.1403,
      "step": 7823
    },
    {
      "epoch": 0.7208734509605197,
      "grad_norm": 0.9161566983649667,
      "learning_rate": 9.913758895635872e-07,
      "loss": 0.1207,
      "step": 7824
    },
    {
      "epoch": 0.7209655871377897,
      "grad_norm": 0.9466811448920842,
      "learning_rate": 9.907685130875022e-07,
      "loss": 0.1305,
      "step": 7825
    },
    {
      "epoch": 0.7210577233150597,
      "grad_norm": 0.9427068503028089,
      "learning_rate": 9.901612767437233e-07,
      "loss": 0.1322,
      "step": 7826
    },
    {
      "epoch": 0.7211498594923297,
      "grad_norm": 0.8997398397326478,
      "learning_rate": 9.89554180588631e-07,
      "loss": 0.126,
      "step": 7827
    },
    {
      "epoch": 0.7212419956695997,
      "grad_norm": 0.9158172595074155,
      "learning_rate": 9.889472246785962e-07,
      "loss": 0.1259,
      "step": 7828
    },
    {
      "epoch": 0.7213341318468697,
      "grad_norm": 0.9590521031356755,
      "learning_rate": 9.883404090699739e-07,
      "loss": 0.123,
      "step": 7829
    },
    {
      "epoch": 0.7214262680241397,
      "grad_norm": 0.9197380638874784,
      "learning_rate": 9.877337338191081e-07,
      "loss": 0.1255,
      "step": 7830
    },
    {
      "epoch": 0.7215184042014097,
      "grad_norm": 0.9495198865360079,
      "learning_rate": 9.871271989823279e-07,
      "loss": 0.1257,
      "step": 7831
    },
    {
      "epoch": 0.7216105403786797,
      "grad_norm": 0.9618388100017592,
      "learning_rate": 9.865208046159493e-07,
      "loss": 0.1222,
      "step": 7832
    },
    {
      "epoch": 0.7217026765559497,
      "grad_norm": 0.9187912885939795,
      "learning_rate": 9.85914550776277e-07,
      "loss": 0.1315,
      "step": 7833
    },
    {
      "epoch": 0.7217948127332197,
      "grad_norm": 0.9463336421029137,
      "learning_rate": 9.853084375196013e-07,
      "loss": 0.1376,
      "step": 7834
    },
    {
      "epoch": 0.7218869489104897,
      "grad_norm": 0.8998408177725145,
      "learning_rate": 9.847024649022014e-07,
      "loss": 0.1134,
      "step": 7835
    },
    {
      "epoch": 0.7219790850877598,
      "grad_norm": 0.9016223772783057,
      "learning_rate": 9.840966329803404e-07,
      "loss": 0.1229,
      "step": 7836
    },
    {
      "epoch": 0.7220712212650298,
      "grad_norm": 0.9254109877176989,
      "learning_rate": 9.834909418102694e-07,
      "loss": 0.1241,
      "step": 7837
    },
    {
      "epoch": 0.7221633574422998,
      "grad_norm": 0.9144500299198667,
      "learning_rate": 9.828853914482276e-07,
      "loss": 0.1148,
      "step": 7838
    },
    {
      "epoch": 0.7222554936195698,
      "grad_norm": 0.8920230252999829,
      "learning_rate": 9.822799819504413e-07,
      "loss": 0.1161,
      "step": 7839
    },
    {
      "epoch": 0.7223476297968398,
      "grad_norm": 0.865650754828013,
      "learning_rate": 9.816747133731213e-07,
      "loss": 0.1116,
      "step": 7840
    },
    {
      "epoch": 0.7224397659741097,
      "grad_norm": 0.9425761369193745,
      "learning_rate": 9.810695857724685e-07,
      "loss": 0.1177,
      "step": 7841
    },
    {
      "epoch": 0.7225319021513797,
      "grad_norm": 0.9392678290824935,
      "learning_rate": 9.80464599204667e-07,
      "loss": 0.1274,
      "step": 7842
    },
    {
      "epoch": 0.7226240383286497,
      "grad_norm": 0.8952324686337915,
      "learning_rate": 9.798597537258921e-07,
      "loss": 0.118,
      "step": 7843
    },
    {
      "epoch": 0.7227161745059197,
      "grad_norm": 0.9485924421467364,
      "learning_rate": 9.79255049392302e-07,
      "loss": 0.1253,
      "step": 7844
    },
    {
      "epoch": 0.7228083106831897,
      "grad_norm": 0.9258101069064801,
      "learning_rate": 9.78650486260044e-07,
      "loss": 0.1185,
      "step": 7845
    },
    {
      "epoch": 0.7229004468604597,
      "grad_norm": 0.9892771454693859,
      "learning_rate": 9.78046064385253e-07,
      "loss": 0.1191,
      "step": 7846
    },
    {
      "epoch": 0.7229925830377297,
      "grad_norm": 0.9281111956495237,
      "learning_rate": 9.774417838240485e-07,
      "loss": 0.1203,
      "step": 7847
    },
    {
      "epoch": 0.7230847192149997,
      "grad_norm": 0.9200675350911685,
      "learning_rate": 9.768376446325376e-07,
      "loss": 0.1163,
      "step": 7848
    },
    {
      "epoch": 0.7231768553922697,
      "grad_norm": 0.8588371158345436,
      "learning_rate": 9.762336468668151e-07,
      "loss": 0.1151,
      "step": 7849
    },
    {
      "epoch": 0.7232689915695398,
      "grad_norm": 0.848370777709939,
      "learning_rate": 9.756297905829627e-07,
      "loss": 0.111,
      "step": 7850
    },
    {
      "epoch": 0.7233611277468098,
      "grad_norm": 0.8984725801587228,
      "learning_rate": 9.75026075837049e-07,
      "loss": 0.1085,
      "step": 7851
    },
    {
      "epoch": 0.7234532639240798,
      "grad_norm": 1.0168488186269962,
      "learning_rate": 9.744225026851284e-07,
      "loss": 0.125,
      "step": 7852
    },
    {
      "epoch": 0.7235454001013498,
      "grad_norm": 0.8882011781278885,
      "learning_rate": 9.738190711832415e-07,
      "loss": 0.1143,
      "step": 7853
    },
    {
      "epoch": 0.7236375362786198,
      "grad_norm": 0.9282850267501597,
      "learning_rate": 9.732157813874185e-07,
      "loss": 0.1284,
      "step": 7854
    },
    {
      "epoch": 0.7237296724558898,
      "grad_norm": 0.9204717778431273,
      "learning_rate": 9.72612633353675e-07,
      "loss": 0.1117,
      "step": 7855
    },
    {
      "epoch": 0.7238218086331598,
      "grad_norm": 0.8853711615838081,
      "learning_rate": 9.720096271380122e-07,
      "loss": 0.1122,
      "step": 7856
    },
    {
      "epoch": 0.7239139448104298,
      "grad_norm": 0.9593573703467069,
      "learning_rate": 9.714067627964199e-07,
      "loss": 0.1265,
      "step": 7857
    },
    {
      "epoch": 0.7240060809876998,
      "grad_norm": 0.8933130363258317,
      "learning_rate": 9.708040403848752e-07,
      "loss": 0.1265,
      "step": 7858
    },
    {
      "epoch": 0.7240982171649698,
      "grad_norm": 0.9035103404027475,
      "learning_rate": 9.70201459959339e-07,
      "loss": 0.1134,
      "step": 7859
    },
    {
      "epoch": 0.7241903533422398,
      "grad_norm": 0.9124284467326477,
      "learning_rate": 9.695990215757625e-07,
      "loss": 0.1183,
      "step": 7860
    },
    {
      "epoch": 0.7242824895195098,
      "grad_norm": 0.9554134809640706,
      "learning_rate": 9.689967252900809e-07,
      "loss": 0.1205,
      "step": 7861
    },
    {
      "epoch": 0.7243746256967798,
      "grad_norm": 0.947696514674594,
      "learning_rate": 9.683945711582181e-07,
      "loss": 0.1195,
      "step": 7862
    },
    {
      "epoch": 0.7244667618740498,
      "grad_norm": 0.9489888944270076,
      "learning_rate": 9.677925592360851e-07,
      "loss": 0.1353,
      "step": 7863
    },
    {
      "epoch": 0.7245588980513199,
      "grad_norm": 0.9408998478748832,
      "learning_rate": 9.671906895795779e-07,
      "loss": 0.1206,
      "step": 7864
    },
    {
      "epoch": 0.7246510342285899,
      "grad_norm": 0.9700947586152011,
      "learning_rate": 9.665889622445792e-07,
      "loss": 0.1356,
      "step": 7865
    },
    {
      "epoch": 0.7247431704058599,
      "grad_norm": 0.961635334352596,
      "learning_rate": 9.659873772869601e-07,
      "loss": 0.121,
      "step": 7866
    },
    {
      "epoch": 0.7248353065831299,
      "grad_norm": 0.9745334075339324,
      "learning_rate": 9.653859347625786e-07,
      "loss": 0.1267,
      "step": 7867
    },
    {
      "epoch": 0.7249274427603999,
      "grad_norm": 0.8762381292464243,
      "learning_rate": 9.647846347272788e-07,
      "loss": 0.1128,
      "step": 7868
    },
    {
      "epoch": 0.7250195789376699,
      "grad_norm": 0.9462395663888918,
      "learning_rate": 9.64183477236891e-07,
      "loss": 0.1276,
      "step": 7869
    },
    {
      "epoch": 0.7251117151149399,
      "grad_norm": 0.9656221813969156,
      "learning_rate": 9.635824623472317e-07,
      "loss": 0.1354,
      "step": 7870
    },
    {
      "epoch": 0.7252038512922099,
      "grad_norm": 0.9520423537311674,
      "learning_rate": 9.629815901141062e-07,
      "loss": 0.1242,
      "step": 7871
    },
    {
      "epoch": 0.7252959874694799,
      "grad_norm": 0.9210581281415772,
      "learning_rate": 9.623808605933063e-07,
      "loss": 0.1182,
      "step": 7872
    },
    {
      "epoch": 0.7253881236467499,
      "grad_norm": 0.8821609441885799,
      "learning_rate": 9.617802738406082e-07,
      "loss": 0.1125,
      "step": 7873
    },
    {
      "epoch": 0.7254802598240199,
      "grad_norm": 0.8783025714845024,
      "learning_rate": 9.611798299117778e-07,
      "loss": 0.1193,
      "step": 7874
    },
    {
      "epoch": 0.7255723960012899,
      "grad_norm": 0.9102543279448607,
      "learning_rate": 9.605795288625652e-07,
      "loss": 0.118,
      "step": 7875
    },
    {
      "epoch": 0.7256645321785599,
      "grad_norm": 0.923045665646691,
      "learning_rate": 9.599793707487098e-07,
      "loss": 0.1302,
      "step": 7876
    },
    {
      "epoch": 0.72575666835583,
      "grad_norm": 0.9074231261058356,
      "learning_rate": 9.593793556259347e-07,
      "loss": 0.1216,
      "step": 7877
    },
    {
      "epoch": 0.7258488045331,
      "grad_norm": 0.9150292738479361,
      "learning_rate": 9.587794835499523e-07,
      "loss": 0.1231,
      "step": 7878
    },
    {
      "epoch": 0.72594094071037,
      "grad_norm": 0.8998385147836466,
      "learning_rate": 9.581797545764614e-07,
      "loss": 0.1155,
      "step": 7879
    },
    {
      "epoch": 0.72603307688764,
      "grad_norm": 0.9142946009765255,
      "learning_rate": 9.575801687611464e-07,
      "loss": 0.1185,
      "step": 7880
    },
    {
      "epoch": 0.72612521306491,
      "grad_norm": 1.0152010957770305,
      "learning_rate": 9.569807261596779e-07,
      "loss": 0.1298,
      "step": 7881
    },
    {
      "epoch": 0.72621734924218,
      "grad_norm": 0.9170396942900356,
      "learning_rate": 9.56381426827715e-07,
      "loss": 0.1248,
      "step": 7882
    },
    {
      "epoch": 0.72630948541945,
      "grad_norm": 0.9811977448714191,
      "learning_rate": 9.557822708209025e-07,
      "loss": 0.1295,
      "step": 7883
    },
    {
      "epoch": 0.72640162159672,
      "grad_norm": 0.9161193726605645,
      "learning_rate": 9.551832581948733e-07,
      "loss": 0.1181,
      "step": 7884
    },
    {
      "epoch": 0.72649375777399,
      "grad_norm": 0.9462600455397618,
      "learning_rate": 9.54584389005245e-07,
      "loss": 0.1184,
      "step": 7885
    },
    {
      "epoch": 0.7265858939512599,
      "grad_norm": 0.9143213594519252,
      "learning_rate": 9.539856633076217e-07,
      "loss": 0.1144,
      "step": 7886
    },
    {
      "epoch": 0.7266780301285299,
      "grad_norm": 0.8958649242270569,
      "learning_rate": 9.533870811575957e-07,
      "loss": 0.1206,
      "step": 7887
    },
    {
      "epoch": 0.7267701663057999,
      "grad_norm": 1.0084483158787285,
      "learning_rate": 9.527886426107458e-07,
      "loss": 0.132,
      "step": 7888
    },
    {
      "epoch": 0.7268623024830699,
      "grad_norm": 0.8899448954141135,
      "learning_rate": 9.52190347722638e-07,
      "loss": 0.1217,
      "step": 7889
    },
    {
      "epoch": 0.7269544386603399,
      "grad_norm": 0.9445700892184191,
      "learning_rate": 9.515921965488226e-07,
      "loss": 0.1214,
      "step": 7890
    },
    {
      "epoch": 0.72704657483761,
      "grad_norm": 0.9425465602318627,
      "learning_rate": 9.509941891448376e-07,
      "loss": 0.113,
      "step": 7891
    },
    {
      "epoch": 0.72713871101488,
      "grad_norm": 0.9188640731856463,
      "learning_rate": 9.503963255662091e-07,
      "loss": 0.1235,
      "step": 7892
    },
    {
      "epoch": 0.72723084719215,
      "grad_norm": 0.9183542312222722,
      "learning_rate": 9.497986058684491e-07,
      "loss": 0.124,
      "step": 7893
    },
    {
      "epoch": 0.72732298336942,
      "grad_norm": 0.9609361929729627,
      "learning_rate": 9.492010301070548e-07,
      "loss": 0.123,
      "step": 7894
    },
    {
      "epoch": 0.72741511954669,
      "grad_norm": 0.920395848689612,
      "learning_rate": 9.486035983375125e-07,
      "loss": 0.1203,
      "step": 7895
    },
    {
      "epoch": 0.72750725572396,
      "grad_norm": 0.8742457567724701,
      "learning_rate": 9.48006310615292e-07,
      "loss": 0.1061,
      "step": 7896
    },
    {
      "epoch": 0.72759939190123,
      "grad_norm": 0.9205268230772412,
      "learning_rate": 9.474091669958538e-07,
      "loss": 0.1166,
      "step": 7897
    },
    {
      "epoch": 0.7276915280785,
      "grad_norm": 0.9528723866899338,
      "learning_rate": 9.468121675346406e-07,
      "loss": 0.1117,
      "step": 7898
    },
    {
      "epoch": 0.72778366425577,
      "grad_norm": 0.8977194231147801,
      "learning_rate": 9.462153122870846e-07,
      "loss": 0.1227,
      "step": 7899
    },
    {
      "epoch": 0.72787580043304,
      "grad_norm": 0.9545650384877833,
      "learning_rate": 9.456186013086049e-07,
      "loss": 0.1275,
      "step": 7900
    },
    {
      "epoch": 0.72796793661031,
      "grad_norm": 0.9970229892888035,
      "learning_rate": 9.450220346546057e-07,
      "loss": 0.1252,
      "step": 7901
    },
    {
      "epoch": 0.72806007278758,
      "grad_norm": 0.9069698030197497,
      "learning_rate": 9.444256123804768e-07,
      "loss": 0.1342,
      "step": 7902
    },
    {
      "epoch": 0.72815220896485,
      "grad_norm": 0.917366478706477,
      "learning_rate": 9.438293345415972e-07,
      "loss": 0.1233,
      "step": 7903
    },
    {
      "epoch": 0.7282443451421201,
      "grad_norm": 0.9816361473567158,
      "learning_rate": 9.432332011933315e-07,
      "loss": 0.119,
      "step": 7904
    },
    {
      "epoch": 0.7283364813193901,
      "grad_norm": 0.912167553796657,
      "learning_rate": 9.426372123910313e-07,
      "loss": 0.1209,
      "step": 7905
    },
    {
      "epoch": 0.7284286174966601,
      "grad_norm": 0.9049561632030253,
      "learning_rate": 9.420413681900337e-07,
      "loss": 0.1126,
      "step": 7906
    },
    {
      "epoch": 0.7285207536739301,
      "grad_norm": 0.9459876967547667,
      "learning_rate": 9.414456686456619e-07,
      "loss": 0.1184,
      "step": 7907
    },
    {
      "epoch": 0.7286128898512001,
      "grad_norm": 0.9084561231139721,
      "learning_rate": 9.408501138132273e-07,
      "loss": 0.1154,
      "step": 7908
    },
    {
      "epoch": 0.7287050260284701,
      "grad_norm": 0.9798235306309518,
      "learning_rate": 9.402547037480284e-07,
      "loss": 0.1241,
      "step": 7909
    },
    {
      "epoch": 0.7287971622057401,
      "grad_norm": 0.9169403190088212,
      "learning_rate": 9.396594385053473e-07,
      "loss": 0.1188,
      "step": 7910
    },
    {
      "epoch": 0.7288892983830101,
      "grad_norm": 0.8842824376641447,
      "learning_rate": 9.39064318140456e-07,
      "loss": 0.1157,
      "step": 7911
    },
    {
      "epoch": 0.7289814345602801,
      "grad_norm": 0.8815960009445793,
      "learning_rate": 9.3846934270861e-07,
      "loss": 0.1129,
      "step": 7912
    },
    {
      "epoch": 0.7290735707375501,
      "grad_norm": 0.8735058660585312,
      "learning_rate": 9.378745122650545e-07,
      "loss": 0.0989,
      "step": 7913
    },
    {
      "epoch": 0.7291657069148201,
      "grad_norm": 0.9964407796113233,
      "learning_rate": 9.372798268650177e-07,
      "loss": 0.128,
      "step": 7914
    },
    {
      "epoch": 0.7292578430920901,
      "grad_norm": 0.9077958451134759,
      "learning_rate": 9.366852865637171e-07,
      "loss": 0.1119,
      "step": 7915
    },
    {
      "epoch": 0.7293499792693601,
      "grad_norm": 0.9191567318461464,
      "learning_rate": 9.360908914163569e-07,
      "loss": 0.1165,
      "step": 7916
    },
    {
      "epoch": 0.7294421154466301,
      "grad_norm": 0.9137556085419295,
      "learning_rate": 9.354966414781247e-07,
      "loss": 0.1151,
      "step": 7917
    },
    {
      "epoch": 0.7295342516239002,
      "grad_norm": 0.9377389661455451,
      "learning_rate": 9.349025368041989e-07,
      "loss": 0.1299,
      "step": 7918
    },
    {
      "epoch": 0.7296263878011702,
      "grad_norm": 0.9697573167162219,
      "learning_rate": 9.343085774497399e-07,
      "loss": 0.1303,
      "step": 7919
    },
    {
      "epoch": 0.7297185239784402,
      "grad_norm": 0.9276544404006211,
      "learning_rate": 9.337147634698979e-07,
      "loss": 0.1114,
      "step": 7920
    },
    {
      "epoch": 0.7298106601557102,
      "grad_norm": 0.9316178448679642,
      "learning_rate": 9.331210949198097e-07,
      "loss": 0.1186,
      "step": 7921
    },
    {
      "epoch": 0.7299027963329802,
      "grad_norm": 0.9297104126646819,
      "learning_rate": 9.325275718545962e-07,
      "loss": 0.1175,
      "step": 7922
    },
    {
      "epoch": 0.7299949325102502,
      "grad_norm": 0.9459703058813975,
      "learning_rate": 9.319341943293659e-07,
      "loss": 0.1143,
      "step": 7923
    },
    {
      "epoch": 0.7300870686875202,
      "grad_norm": 0.903636093967031,
      "learning_rate": 9.31340962399214e-07,
      "loss": 0.1131,
      "step": 7924
    },
    {
      "epoch": 0.7301792048647902,
      "grad_norm": 0.9130854765962986,
      "learning_rate": 9.307478761192229e-07,
      "loss": 0.1215,
      "step": 7925
    },
    {
      "epoch": 0.7302713410420602,
      "grad_norm": 0.9250366455134187,
      "learning_rate": 9.301549355444611e-07,
      "loss": 0.1148,
      "step": 7926
    },
    {
      "epoch": 0.7303634772193301,
      "grad_norm": 0.9753728160621229,
      "learning_rate": 9.295621407299824e-07,
      "loss": 0.1234,
      "step": 7927
    },
    {
      "epoch": 0.7304556133966001,
      "grad_norm": 0.901388541959924,
      "learning_rate": 9.289694917308273e-07,
      "loss": 0.1241,
      "step": 7928
    },
    {
      "epoch": 0.7305477495738701,
      "grad_norm": 0.9071666280203143,
      "learning_rate": 9.283769886020238e-07,
      "loss": 0.1211,
      "step": 7929
    },
    {
      "epoch": 0.7306398857511401,
      "grad_norm": 0.9197162146244954,
      "learning_rate": 9.277846313985869e-07,
      "loss": 0.1112,
      "step": 7930
    },
    {
      "epoch": 0.7307320219284101,
      "grad_norm": 0.9720399464274628,
      "learning_rate": 9.271924201755153e-07,
      "loss": 0.1291,
      "step": 7931
    },
    {
      "epoch": 0.7308241581056802,
      "grad_norm": 0.9462594696606331,
      "learning_rate": 9.26600354987797e-07,
      "loss": 0.1146,
      "step": 7932
    },
    {
      "epoch": 0.7309162942829502,
      "grad_norm": 0.9549579771749659,
      "learning_rate": 9.260084358904056e-07,
      "loss": 0.1253,
      "step": 7933
    },
    {
      "epoch": 0.7310084304602202,
      "grad_norm": 0.9346643983496812,
      "learning_rate": 9.254166629383005e-07,
      "loss": 0.118,
      "step": 7934
    },
    {
      "epoch": 0.7311005666374902,
      "grad_norm": 0.9197443173940072,
      "learning_rate": 9.24825036186427e-07,
      "loss": 0.1155,
      "step": 7935
    },
    {
      "epoch": 0.7311927028147602,
      "grad_norm": 0.9111826786059956,
      "learning_rate": 9.242335556897181e-07,
      "loss": 0.125,
      "step": 7936
    },
    {
      "epoch": 0.7312848389920302,
      "grad_norm": 0.8963685992216108,
      "learning_rate": 9.236422215030932e-07,
      "loss": 0.1071,
      "step": 7937
    },
    {
      "epoch": 0.7313769751693002,
      "grad_norm": 0.9996120834457425,
      "learning_rate": 9.230510336814586e-07,
      "loss": 0.1308,
      "step": 7938
    },
    {
      "epoch": 0.7314691113465702,
      "grad_norm": 0.9100280131314815,
      "learning_rate": 9.224599922797053e-07,
      "loss": 0.1254,
      "step": 7939
    },
    {
      "epoch": 0.7315612475238402,
      "grad_norm": 0.877434720202932,
      "learning_rate": 9.218690973527106e-07,
      "loss": 0.1145,
      "step": 7940
    },
    {
      "epoch": 0.7316533837011102,
      "grad_norm": 1.0262118485516254,
      "learning_rate": 9.212783489553401e-07,
      "loss": 0.1336,
      "step": 7941
    },
    {
      "epoch": 0.7317455198783802,
      "grad_norm": 0.9555725570664751,
      "learning_rate": 9.206877471424455e-07,
      "loss": 0.1341,
      "step": 7942
    },
    {
      "epoch": 0.7318376560556502,
      "grad_norm": 0.9392503740018264,
      "learning_rate": 9.200972919688628e-07,
      "loss": 0.1216,
      "step": 7943
    },
    {
      "epoch": 0.7319297922329202,
      "grad_norm": 0.964214487072524,
      "learning_rate": 9.195069834894174e-07,
      "loss": 0.1232,
      "step": 7944
    },
    {
      "epoch": 0.7320219284101903,
      "grad_norm": 0.9505944973653115,
      "learning_rate": 9.189168217589178e-07,
      "loss": 0.1208,
      "step": 7945
    },
    {
      "epoch": 0.7321140645874603,
      "grad_norm": 0.9211807278405854,
      "learning_rate": 9.183268068321616e-07,
      "loss": 0.1156,
      "step": 7946
    },
    {
      "epoch": 0.7322062007647303,
      "grad_norm": 0.8903747447623426,
      "learning_rate": 9.177369387639323e-07,
      "loss": 0.1082,
      "step": 7947
    },
    {
      "epoch": 0.7322983369420003,
      "grad_norm": 0.8724081988043988,
      "learning_rate": 9.171472176089977e-07,
      "loss": 0.1116,
      "step": 7948
    },
    {
      "epoch": 0.7323904731192703,
      "grad_norm": 0.9842222839255699,
      "learning_rate": 9.165576434221152e-07,
      "loss": 0.1304,
      "step": 7949
    },
    {
      "epoch": 0.7324826092965403,
      "grad_norm": 0.8799032026581489,
      "learning_rate": 9.15968216258025e-07,
      "loss": 0.11,
      "step": 7950
    },
    {
      "epoch": 0.7325747454738103,
      "grad_norm": 0.9429682413677968,
      "learning_rate": 9.153789361714573e-07,
      "loss": 0.1214,
      "step": 7951
    },
    {
      "epoch": 0.7326668816510803,
      "grad_norm": 0.9521147026715193,
      "learning_rate": 9.147898032171251e-07,
      "loss": 0.1265,
      "step": 7952
    },
    {
      "epoch": 0.7327590178283503,
      "grad_norm": 0.9118057426636171,
      "learning_rate": 9.142008174497302e-07,
      "loss": 0.1164,
      "step": 7953
    },
    {
      "epoch": 0.7328511540056203,
      "grad_norm": 0.9177961028689359,
      "learning_rate": 9.136119789239612e-07,
      "loss": 0.1167,
      "step": 7954
    },
    {
      "epoch": 0.7329432901828903,
      "grad_norm": 0.9368447428001684,
      "learning_rate": 9.130232876944903e-07,
      "loss": 0.1242,
      "step": 7955
    },
    {
      "epoch": 0.7330354263601603,
      "grad_norm": 0.9623873942205943,
      "learning_rate": 9.124347438159772e-07,
      "loss": 0.1272,
      "step": 7956
    },
    {
      "epoch": 0.7331275625374303,
      "grad_norm": 0.9679388638840875,
      "learning_rate": 9.118463473430689e-07,
      "loss": 0.1247,
      "step": 7957
    },
    {
      "epoch": 0.7332196987147003,
      "grad_norm": 0.9003206232231331,
      "learning_rate": 9.112580983303984e-07,
      "loss": 0.1175,
      "step": 7958
    },
    {
      "epoch": 0.7333118348919704,
      "grad_norm": 0.9656950894298632,
      "learning_rate": 9.106699968325849e-07,
      "loss": 0.121,
      "step": 7959
    },
    {
      "epoch": 0.7334039710692404,
      "grad_norm": 0.9890202665502145,
      "learning_rate": 9.100820429042337e-07,
      "loss": 0.1317,
      "step": 7960
    },
    {
      "epoch": 0.7334961072465104,
      "grad_norm": 0.9233138280132024,
      "learning_rate": 9.094942365999349e-07,
      "loss": 0.1193,
      "step": 7961
    },
    {
      "epoch": 0.7335882434237804,
      "grad_norm": 0.9742207172348182,
      "learning_rate": 9.089065779742673e-07,
      "loss": 0.1299,
      "step": 7962
    },
    {
      "epoch": 0.7336803796010504,
      "grad_norm": 0.8990113391460285,
      "learning_rate": 9.083190670817963e-07,
      "loss": 0.1145,
      "step": 7963
    },
    {
      "epoch": 0.7337725157783204,
      "grad_norm": 0.861187500883117,
      "learning_rate": 9.0773170397707e-07,
      "loss": 0.1143,
      "step": 7964
    },
    {
      "epoch": 0.7338646519555904,
      "grad_norm": 0.9339588576002366,
      "learning_rate": 9.071444887146275e-07,
      "loss": 0.125,
      "step": 7965
    },
    {
      "epoch": 0.7339567881328604,
      "grad_norm": 0.9228986922230902,
      "learning_rate": 9.065574213489897e-07,
      "loss": 0.1177,
      "step": 7966
    },
    {
      "epoch": 0.7340489243101304,
      "grad_norm": 0.8987184447458356,
      "learning_rate": 9.059705019346676e-07,
      "loss": 0.1147,
      "step": 7967
    },
    {
      "epoch": 0.7341410604874004,
      "grad_norm": 0.9718902501296317,
      "learning_rate": 9.053837305261551e-07,
      "loss": 0.117,
      "step": 7968
    },
    {
      "epoch": 0.7342331966646704,
      "grad_norm": 0.882640269301587,
      "learning_rate": 9.047971071779349e-07,
      "loss": 0.1014,
      "step": 7969
    },
    {
      "epoch": 0.7343253328419403,
      "grad_norm": 0.9295900436726557,
      "learning_rate": 9.042106319444757e-07,
      "loss": 0.1282,
      "step": 7970
    },
    {
      "epoch": 0.7344174690192103,
      "grad_norm": 0.9431206523555368,
      "learning_rate": 9.036243048802312e-07,
      "loss": 0.1172,
      "step": 7971
    },
    {
      "epoch": 0.7345096051964805,
      "grad_norm": 0.9400908273680944,
      "learning_rate": 9.030381260396409e-07,
      "loss": 0.1143,
      "step": 7972
    },
    {
      "epoch": 0.7346017413737504,
      "grad_norm": 0.9688951073944904,
      "learning_rate": 9.024520954771326e-07,
      "loss": 0.131,
      "step": 7973
    },
    {
      "epoch": 0.7346938775510204,
      "grad_norm": 0.9851741676237068,
      "learning_rate": 9.018662132471189e-07,
      "loss": 0.1241,
      "step": 7974
    },
    {
      "epoch": 0.7347860137282904,
      "grad_norm": 0.9057714871865711,
      "learning_rate": 9.012804794040003e-07,
      "loss": 0.1179,
      "step": 7975
    },
    {
      "epoch": 0.7348781499055604,
      "grad_norm": 0.9750342427400404,
      "learning_rate": 9.006948940021612e-07,
      "loss": 0.121,
      "step": 7976
    },
    {
      "epoch": 0.7349702860828304,
      "grad_norm": 0.9246612470094258,
      "learning_rate": 9.001094570959726e-07,
      "loss": 0.1145,
      "step": 7977
    },
    {
      "epoch": 0.7350624222601004,
      "grad_norm": 0.9570937459286335,
      "learning_rate": 8.995241687397929e-07,
      "loss": 0.1219,
      "step": 7978
    },
    {
      "epoch": 0.7351545584373704,
      "grad_norm": 0.9961421625364182,
      "learning_rate": 8.989390289879665e-07,
      "loss": 0.1294,
      "step": 7979
    },
    {
      "epoch": 0.7352466946146404,
      "grad_norm": 0.9600761953924268,
      "learning_rate": 8.983540378948244e-07,
      "loss": 0.1242,
      "step": 7980
    },
    {
      "epoch": 0.7353388307919104,
      "grad_norm": 0.9304715372533625,
      "learning_rate": 8.977691955146823e-07,
      "loss": 0.1137,
      "step": 7981
    },
    {
      "epoch": 0.7354309669691804,
      "grad_norm": 0.9413815198495417,
      "learning_rate": 8.971845019018419e-07,
      "loss": 0.1279,
      "step": 7982
    },
    {
      "epoch": 0.7355231031464504,
      "grad_norm": 0.9586301278205637,
      "learning_rate": 8.965999571105929e-07,
      "loss": 0.1302,
      "step": 7983
    },
    {
      "epoch": 0.7356152393237204,
      "grad_norm": 1.0239076800420843,
      "learning_rate": 8.960155611952115e-07,
      "loss": 0.139,
      "step": 7984
    },
    {
      "epoch": 0.7357073755009904,
      "grad_norm": 0.8874691209556264,
      "learning_rate": 8.954313142099568e-07,
      "loss": 0.1162,
      "step": 7985
    },
    {
      "epoch": 0.7357995116782605,
      "grad_norm": 0.8906043874184616,
      "learning_rate": 8.948472162090782e-07,
      "loss": 0.1218,
      "step": 7986
    },
    {
      "epoch": 0.7358916478555305,
      "grad_norm": 0.8774243675542508,
      "learning_rate": 8.942632672468077e-07,
      "loss": 0.1146,
      "step": 7987
    },
    {
      "epoch": 0.7359837840328005,
      "grad_norm": 0.9447564057822249,
      "learning_rate": 8.936794673773661e-07,
      "loss": 0.1266,
      "step": 7988
    },
    {
      "epoch": 0.7360759202100705,
      "grad_norm": 0.9399500136330979,
      "learning_rate": 8.930958166549583e-07,
      "loss": 0.1266,
      "step": 7989
    },
    {
      "epoch": 0.7361680563873405,
      "grad_norm": 0.9102773778780244,
      "learning_rate": 8.925123151337767e-07,
      "loss": 0.1176,
      "step": 7990
    },
    {
      "epoch": 0.7362601925646105,
      "grad_norm": 0.9128385627600489,
      "learning_rate": 8.919289628680005e-07,
      "loss": 0.1205,
      "step": 7991
    },
    {
      "epoch": 0.7363523287418805,
      "grad_norm": 0.9342386016200168,
      "learning_rate": 8.913457599117933e-07,
      "loss": 0.1156,
      "step": 7992
    },
    {
      "epoch": 0.7364444649191505,
      "grad_norm": 0.9199680452398934,
      "learning_rate": 8.907627063193045e-07,
      "loss": 0.1165,
      "step": 7993
    },
    {
      "epoch": 0.7365366010964205,
      "grad_norm": 0.9437755517472011,
      "learning_rate": 8.901798021446714e-07,
      "loss": 0.1275,
      "step": 7994
    },
    {
      "epoch": 0.7366287372736905,
      "grad_norm": 0.8953777925202131,
      "learning_rate": 8.895970474420171e-07,
      "loss": 0.1101,
      "step": 7995
    },
    {
      "epoch": 0.7367208734509605,
      "grad_norm": 0.9170465764857951,
      "learning_rate": 8.890144422654512e-07,
      "loss": 0.1194,
      "step": 7996
    },
    {
      "epoch": 0.7368130096282305,
      "grad_norm": 0.9822108915381218,
      "learning_rate": 8.884319866690674e-07,
      "loss": 0.1174,
      "step": 7997
    },
    {
      "epoch": 0.7369051458055005,
      "grad_norm": 0.933804644233933,
      "learning_rate": 8.878496807069464e-07,
      "loss": 0.1168,
      "step": 7998
    },
    {
      "epoch": 0.7369972819827705,
      "grad_norm": 0.9261693186397323,
      "learning_rate": 8.87267524433156e-07,
      "loss": 0.1199,
      "step": 7999
    },
    {
      "epoch": 0.7370894181600406,
      "grad_norm": 0.9344138753722313,
      "learning_rate": 8.866855179017505e-07,
      "loss": 0.116,
      "step": 8000
    },
    {
      "epoch": 0.7370894181600406,
      "eval_loss": 0.12275012582540512,
      "eval_runtime": 299.0394,
      "eval_samples_per_second": 23.465,
      "eval_steps_per_second": 2.936,
      "step": 8000
    },
    {
      "epoch": 0.7371815543373106,
      "grad_norm": 0.9069037559452209,
      "learning_rate": 8.861036611667676e-07,
      "loss": 0.1074,
      "step": 8001
    },
    {
      "epoch": 0.7372736905145806,
      "grad_norm": 0.916023672944426,
      "learning_rate": 8.855219542822341e-07,
      "loss": 0.1059,
      "step": 8002
    },
    {
      "epoch": 0.7373658266918506,
      "grad_norm": 0.9141703814175853,
      "learning_rate": 8.8494039730216e-07,
      "loss": 0.1236,
      "step": 8003
    },
    {
      "epoch": 0.7374579628691206,
      "grad_norm": 0.961606399248731,
      "learning_rate": 8.843589902805438e-07,
      "loss": 0.1317,
      "step": 8004
    },
    {
      "epoch": 0.7375500990463906,
      "grad_norm": 0.934755066682781,
      "learning_rate": 8.837777332713701e-07,
      "loss": 0.1203,
      "step": 8005
    },
    {
      "epoch": 0.7376422352236606,
      "grad_norm": 0.9025835586979728,
      "learning_rate": 8.831966263286071e-07,
      "loss": 0.1187,
      "step": 8006
    },
    {
      "epoch": 0.7377343714009306,
      "grad_norm": 0.9078088364126208,
      "learning_rate": 8.826156695062113e-07,
      "loss": 0.1241,
      "step": 8007
    },
    {
      "epoch": 0.7378265075782006,
      "grad_norm": 0.9342661255740071,
      "learning_rate": 8.820348628581254e-07,
      "loss": 0.1228,
      "step": 8008
    },
    {
      "epoch": 0.7379186437554706,
      "grad_norm": 0.9935337510975628,
      "learning_rate": 8.814542064382767e-07,
      "loss": 0.1244,
      "step": 8009
    },
    {
      "epoch": 0.7380107799327406,
      "grad_norm": 0.9048942427175215,
      "learning_rate": 8.808737003005782e-07,
      "loss": 0.1214,
      "step": 8010
    },
    {
      "epoch": 0.7381029161100106,
      "grad_norm": 0.9564069497268002,
      "learning_rate": 8.802933444989308e-07,
      "loss": 0.1257,
      "step": 8011
    },
    {
      "epoch": 0.7381950522872806,
      "grad_norm": 0.8980904699977105,
      "learning_rate": 8.797131390872207e-07,
      "loss": 0.1104,
      "step": 8012
    },
    {
      "epoch": 0.7382871884645507,
      "grad_norm": 0.8601604398887479,
      "learning_rate": 8.79133084119321e-07,
      "loss": 0.1216,
      "step": 8013
    },
    {
      "epoch": 0.7383793246418207,
      "grad_norm": 0.9231462426012692,
      "learning_rate": 8.78553179649089e-07,
      "loss": 0.1287,
      "step": 8014
    },
    {
      "epoch": 0.7384714608190907,
      "grad_norm": 0.9342174944000972,
      "learning_rate": 8.779734257303677e-07,
      "loss": 0.1248,
      "step": 8015
    },
    {
      "epoch": 0.7385635969963606,
      "grad_norm": 0.9247205293120123,
      "learning_rate": 8.773938224169884e-07,
      "loss": 0.119,
      "step": 8016
    },
    {
      "epoch": 0.7386557331736306,
      "grad_norm": 0.9345834504752919,
      "learning_rate": 8.768143697627681e-07,
      "loss": 0.1301,
      "step": 8017
    },
    {
      "epoch": 0.7387478693509006,
      "grad_norm": 0.8466531869104624,
      "learning_rate": 8.762350678215076e-07,
      "loss": 0.1085,
      "step": 8018
    },
    {
      "epoch": 0.7388400055281706,
      "grad_norm": 0.948753230207356,
      "learning_rate": 8.756559166469966e-07,
      "loss": 0.1299,
      "step": 8019
    },
    {
      "epoch": 0.7389321417054406,
      "grad_norm": 0.9596076569583665,
      "learning_rate": 8.750769162930076e-07,
      "loss": 0.1221,
      "step": 8020
    },
    {
      "epoch": 0.7390242778827106,
      "grad_norm": 0.9550525329866566,
      "learning_rate": 8.744980668133026e-07,
      "loss": 0.1208,
      "step": 8021
    },
    {
      "epoch": 0.7391164140599806,
      "grad_norm": 0.9490302288135481,
      "learning_rate": 8.739193682616265e-07,
      "loss": 0.1197,
      "step": 8022
    },
    {
      "epoch": 0.7392085502372506,
      "grad_norm": 0.9200381002548552,
      "learning_rate": 8.733408206917118e-07,
      "loss": 0.1226,
      "step": 8023
    },
    {
      "epoch": 0.7393006864145206,
      "grad_norm": 0.9650781421693304,
      "learning_rate": 8.727624241572779e-07,
      "loss": 0.1292,
      "step": 8024
    },
    {
      "epoch": 0.7393928225917906,
      "grad_norm": 0.904301714517166,
      "learning_rate": 8.72184178712028e-07,
      "loss": 0.1146,
      "step": 8025
    },
    {
      "epoch": 0.7394849587690606,
      "grad_norm": 0.8895832457122699,
      "learning_rate": 8.716060844096514e-07,
      "loss": 0.1159,
      "step": 8026
    },
    {
      "epoch": 0.7395770949463307,
      "grad_norm": 0.8901681509477561,
      "learning_rate": 8.710281413038252e-07,
      "loss": 0.1127,
      "step": 8027
    },
    {
      "epoch": 0.7396692311236007,
      "grad_norm": 0.973436941777358,
      "learning_rate": 8.704503494482114e-07,
      "loss": 0.1323,
      "step": 8028
    },
    {
      "epoch": 0.7397613673008707,
      "grad_norm": 0.8806928065451151,
      "learning_rate": 8.698727088964587e-07,
      "loss": 0.1144,
      "step": 8029
    },
    {
      "epoch": 0.7398535034781407,
      "grad_norm": 0.9283017152565503,
      "learning_rate": 8.692952197022006e-07,
      "loss": 0.1126,
      "step": 8030
    },
    {
      "epoch": 0.7399456396554107,
      "grad_norm": 0.9541252081277901,
      "learning_rate": 8.687178819190558e-07,
      "loss": 0.1252,
      "step": 8031
    },
    {
      "epoch": 0.7400377758326807,
      "grad_norm": 0.9693080055760379,
      "learning_rate": 8.681406956006316e-07,
      "loss": 0.1327,
      "step": 8032
    },
    {
      "epoch": 0.7401299120099507,
      "grad_norm": 0.9120977367696028,
      "learning_rate": 8.675636608005191e-07,
      "loss": 0.1218,
      "step": 8033
    },
    {
      "epoch": 0.7402220481872207,
      "grad_norm": 0.9133310755374804,
      "learning_rate": 8.669867775722973e-07,
      "loss": 0.1308,
      "step": 8034
    },
    {
      "epoch": 0.7403141843644907,
      "grad_norm": 0.8719559549311144,
      "learning_rate": 8.66410045969529e-07,
      "loss": 0.1094,
      "step": 8035
    },
    {
      "epoch": 0.7404063205417607,
      "grad_norm": 0.9203157162386649,
      "learning_rate": 8.658334660457629e-07,
      "loss": 0.1215,
      "step": 8036
    },
    {
      "epoch": 0.7404984567190307,
      "grad_norm": 0.8475985566773456,
      "learning_rate": 8.652570378545355e-07,
      "loss": 0.1024,
      "step": 8037
    },
    {
      "epoch": 0.7405905928963007,
      "grad_norm": 0.8714394463711818,
      "learning_rate": 8.646807614493685e-07,
      "loss": 0.1074,
      "step": 8038
    },
    {
      "epoch": 0.7406827290735707,
      "grad_norm": 0.8951025111622029,
      "learning_rate": 8.641046368837682e-07,
      "loss": 0.119,
      "step": 8039
    },
    {
      "epoch": 0.7407748652508408,
      "grad_norm": 0.8940983168209601,
      "learning_rate": 8.635286642112295e-07,
      "loss": 0.1068,
      "step": 8040
    },
    {
      "epoch": 0.7408670014281108,
      "grad_norm": 0.9700185466019189,
      "learning_rate": 8.629528434852294e-07,
      "loss": 0.1261,
      "step": 8041
    },
    {
      "epoch": 0.7409591376053808,
      "grad_norm": 0.9605648103993476,
      "learning_rate": 8.623771747592347e-07,
      "loss": 0.1303,
      "step": 8042
    },
    {
      "epoch": 0.7410512737826508,
      "grad_norm": 0.8647742808321739,
      "learning_rate": 8.618016580866947e-07,
      "loss": 0.113,
      "step": 8043
    },
    {
      "epoch": 0.7411434099599208,
      "grad_norm": 0.874872810061018,
      "learning_rate": 8.612262935210472e-07,
      "loss": 0.1088,
      "step": 8044
    },
    {
      "epoch": 0.7412355461371908,
      "grad_norm": 0.959941985103914,
      "learning_rate": 8.606510811157154e-07,
      "loss": 0.1181,
      "step": 8045
    },
    {
      "epoch": 0.7413276823144608,
      "grad_norm": 0.9386067809347824,
      "learning_rate": 8.600760209241074e-07,
      "loss": 0.1277,
      "step": 8046
    },
    {
      "epoch": 0.7414198184917308,
      "grad_norm": 0.9597688620808285,
      "learning_rate": 8.595011129996164e-07,
      "loss": 0.1219,
      "step": 8047
    },
    {
      "epoch": 0.7415119546690008,
      "grad_norm": 0.9520032460057815,
      "learning_rate": 8.589263573956236e-07,
      "loss": 0.1226,
      "step": 8048
    },
    {
      "epoch": 0.7416040908462708,
      "grad_norm": 0.9018956330326757,
      "learning_rate": 8.583517541654951e-07,
      "loss": 0.1117,
      "step": 8049
    },
    {
      "epoch": 0.7416962270235408,
      "grad_norm": 0.9368276434977748,
      "learning_rate": 8.577773033625836e-07,
      "loss": 0.1226,
      "step": 8050
    },
    {
      "epoch": 0.7417883632008108,
      "grad_norm": 0.8922899110331376,
      "learning_rate": 8.572030050402264e-07,
      "loss": 0.1094,
      "step": 8051
    },
    {
      "epoch": 0.7418804993780808,
      "grad_norm": 0.9630269672440016,
      "learning_rate": 8.566288592517461e-07,
      "loss": 0.1222,
      "step": 8052
    },
    {
      "epoch": 0.7419726355553508,
      "grad_norm": 0.9077601187164204,
      "learning_rate": 8.560548660504531e-07,
      "loss": 0.1109,
      "step": 8053
    },
    {
      "epoch": 0.7420647717326209,
      "grad_norm": 0.9147053432114655,
      "learning_rate": 8.554810254896434e-07,
      "loss": 0.1222,
      "step": 8054
    },
    {
      "epoch": 0.7421569079098909,
      "grad_norm": 0.8670005809699466,
      "learning_rate": 8.54907337622597e-07,
      "loss": 0.1051,
      "step": 8055
    },
    {
      "epoch": 0.7422490440871609,
      "grad_norm": 0.9849815202966365,
      "learning_rate": 8.543338025025818e-07,
      "loss": 0.1295,
      "step": 8056
    },
    {
      "epoch": 0.7423411802644309,
      "grad_norm": 0.9317265536420508,
      "learning_rate": 8.537604201828495e-07,
      "loss": 0.1194,
      "step": 8057
    },
    {
      "epoch": 0.7424333164417009,
      "grad_norm": 0.9531648343620684,
      "learning_rate": 8.5318719071664e-07,
      "loss": 0.1215,
      "step": 8058
    },
    {
      "epoch": 0.7425254526189708,
      "grad_norm": 0.9383372475678683,
      "learning_rate": 8.526141141571764e-07,
      "loss": 0.1263,
      "step": 8059
    },
    {
      "epoch": 0.7426175887962408,
      "grad_norm": 0.8973356402044012,
      "learning_rate": 8.520411905576697e-07,
      "loss": 0.1238,
      "step": 8060
    },
    {
      "epoch": 0.7427097249735108,
      "grad_norm": 0.8774857932653227,
      "learning_rate": 8.514684199713166e-07,
      "loss": 0.1126,
      "step": 8061
    },
    {
      "epoch": 0.7428018611507808,
      "grad_norm": 0.9196376921082318,
      "learning_rate": 8.508958024512972e-07,
      "loss": 0.1186,
      "step": 8062
    },
    {
      "epoch": 0.7428939973280508,
      "grad_norm": 0.9235244197036236,
      "learning_rate": 8.503233380507808e-07,
      "loss": 0.1221,
      "step": 8063
    },
    {
      "epoch": 0.7429861335053208,
      "grad_norm": 0.8945240160141257,
      "learning_rate": 8.497510268229192e-07,
      "loss": 0.125,
      "step": 8064
    },
    {
      "epoch": 0.7430782696825908,
      "grad_norm": 0.9899562846771053,
      "learning_rate": 8.491788688208524e-07,
      "loss": 0.1272,
      "step": 8065
    },
    {
      "epoch": 0.7431704058598608,
      "grad_norm": 0.9773907890691923,
      "learning_rate": 8.486068640977063e-07,
      "loss": 0.1312,
      "step": 8066
    },
    {
      "epoch": 0.7432625420371308,
      "grad_norm": 0.9148469368667612,
      "learning_rate": 8.480350127065904e-07,
      "loss": 0.1144,
      "step": 8067
    },
    {
      "epoch": 0.7433546782144009,
      "grad_norm": 0.8556460111218671,
      "learning_rate": 8.474633147006006e-07,
      "loss": 0.116,
      "step": 8068
    },
    {
      "epoch": 0.7434468143916709,
      "grad_norm": 0.9538636763158576,
      "learning_rate": 8.468917701328197e-07,
      "loss": 0.1281,
      "step": 8069
    },
    {
      "epoch": 0.7435389505689409,
      "grad_norm": 0.9288207869960571,
      "learning_rate": 8.46320379056316e-07,
      "loss": 0.1262,
      "step": 8070
    },
    {
      "epoch": 0.7436310867462109,
      "grad_norm": 0.9360281891768948,
      "learning_rate": 8.45749141524144e-07,
      "loss": 0.1239,
      "step": 8071
    },
    {
      "epoch": 0.7437232229234809,
      "grad_norm": 0.8474422015239383,
      "learning_rate": 8.45178057589342e-07,
      "loss": 0.1068,
      "step": 8072
    },
    {
      "epoch": 0.7438153591007509,
      "grad_norm": 0.9805074107349353,
      "learning_rate": 8.446071273049347e-07,
      "loss": 0.1309,
      "step": 8073
    },
    {
      "epoch": 0.7439074952780209,
      "grad_norm": 0.9428578922557328,
      "learning_rate": 8.440363507239338e-07,
      "loss": 0.122,
      "step": 8074
    },
    {
      "epoch": 0.7439996314552909,
      "grad_norm": 0.8616051880037617,
      "learning_rate": 8.434657278993369e-07,
      "loss": 0.1103,
      "step": 8075
    },
    {
      "epoch": 0.7440917676325609,
      "grad_norm": 0.8825856553344199,
      "learning_rate": 8.428952588841247e-07,
      "loss": 0.1141,
      "step": 8076
    },
    {
      "epoch": 0.7441839038098309,
      "grad_norm": 0.945774709888264,
      "learning_rate": 8.423249437312667e-07,
      "loss": 0.1267,
      "step": 8077
    },
    {
      "epoch": 0.7442760399871009,
      "grad_norm": 0.9036109907141343,
      "learning_rate": 8.41754782493715e-07,
      "loss": 0.1132,
      "step": 8078
    },
    {
      "epoch": 0.7443681761643709,
      "grad_norm": 0.9153835514619679,
      "learning_rate": 8.411847752244115e-07,
      "loss": 0.1165,
      "step": 8079
    },
    {
      "epoch": 0.7444603123416409,
      "grad_norm": 1.0480110548918762,
      "learning_rate": 8.406149219762791e-07,
      "loss": 0.1479,
      "step": 8080
    },
    {
      "epoch": 0.744552448518911,
      "grad_norm": 0.8801512229098181,
      "learning_rate": 8.400452228022296e-07,
      "loss": 0.1087,
      "step": 8081
    },
    {
      "epoch": 0.744644584696181,
      "grad_norm": 0.9245546425567027,
      "learning_rate": 8.394756777551602e-07,
      "loss": 0.125,
      "step": 8082
    },
    {
      "epoch": 0.744736720873451,
      "grad_norm": 0.9070405580998772,
      "learning_rate": 8.389062868879541e-07,
      "loss": 0.1173,
      "step": 8083
    },
    {
      "epoch": 0.744828857050721,
      "grad_norm": 0.8722507342722666,
      "learning_rate": 8.383370502534765e-07,
      "loss": 0.1102,
      "step": 8084
    },
    {
      "epoch": 0.744920993227991,
      "grad_norm": 0.9207796493390706,
      "learning_rate": 8.377679679045828e-07,
      "loss": 0.1245,
      "step": 8085
    },
    {
      "epoch": 0.745013129405261,
      "grad_norm": 0.9025661348734192,
      "learning_rate": 8.371990398941121e-07,
      "loss": 0.1255,
      "step": 8086
    },
    {
      "epoch": 0.745105265582531,
      "grad_norm": 0.9436641765010778,
      "learning_rate": 8.366302662748901e-07,
      "loss": 0.1198,
      "step": 8087
    },
    {
      "epoch": 0.745197401759801,
      "grad_norm": 0.9440084983200447,
      "learning_rate": 8.360616470997263e-07,
      "loss": 0.1132,
      "step": 8088
    },
    {
      "epoch": 0.745289537937071,
      "grad_norm": 0.9353058639345495,
      "learning_rate": 8.354931824214185e-07,
      "loss": 0.1203,
      "step": 8089
    },
    {
      "epoch": 0.745381674114341,
      "grad_norm": 0.9620361208227816,
      "learning_rate": 8.349248722927469e-07,
      "loss": 0.1263,
      "step": 8090
    },
    {
      "epoch": 0.745473810291611,
      "grad_norm": 0.9390265581383841,
      "learning_rate": 8.343567167664801e-07,
      "loss": 0.123,
      "step": 8091
    },
    {
      "epoch": 0.745565946468881,
      "grad_norm": 0.9554238810465895,
      "learning_rate": 8.337887158953723e-07,
      "loss": 0.1203,
      "step": 8092
    },
    {
      "epoch": 0.745658082646151,
      "grad_norm": 0.9129449457442721,
      "learning_rate": 8.332208697321606e-07,
      "loss": 0.1192,
      "step": 8093
    },
    {
      "epoch": 0.745750218823421,
      "grad_norm": 0.9137176785711236,
      "learning_rate": 8.326531783295716e-07,
      "loss": 0.115,
      "step": 8094
    },
    {
      "epoch": 0.7458423550006911,
      "grad_norm": 0.9545661166830167,
      "learning_rate": 8.320856417403134e-07,
      "loss": 0.1287,
      "step": 8095
    },
    {
      "epoch": 0.7459344911779611,
      "grad_norm": 0.9471937440524988,
      "learning_rate": 8.315182600170838e-07,
      "loss": 0.1241,
      "step": 8096
    },
    {
      "epoch": 0.7460266273552311,
      "grad_norm": 0.9942778081193968,
      "learning_rate": 8.309510332125623e-07,
      "loss": 0.134,
      "step": 8097
    },
    {
      "epoch": 0.7461187635325011,
      "grad_norm": 0.8990687326017974,
      "learning_rate": 8.30383961379417e-07,
      "loss": 0.1176,
      "step": 8098
    },
    {
      "epoch": 0.7462108997097711,
      "grad_norm": 0.9198246571850542,
      "learning_rate": 8.298170445703016e-07,
      "loss": 0.1205,
      "step": 8099
    },
    {
      "epoch": 0.746303035887041,
      "grad_norm": 0.8982667907638325,
      "learning_rate": 8.292502828378534e-07,
      "loss": 0.1242,
      "step": 8100
    },
    {
      "epoch": 0.746395172064311,
      "grad_norm": 0.9183599913087285,
      "learning_rate": 8.286836762346953e-07,
      "loss": 0.1261,
      "step": 8101
    },
    {
      "epoch": 0.746487308241581,
      "grad_norm": 0.9390779289689044,
      "learning_rate": 8.281172248134376e-07,
      "loss": 0.1253,
      "step": 8102
    },
    {
      "epoch": 0.746579444418851,
      "grad_norm": 0.915484652855498,
      "learning_rate": 8.275509286266755e-07,
      "loss": 0.1213,
      "step": 8103
    },
    {
      "epoch": 0.746671580596121,
      "grad_norm": 0.9337864533144221,
      "learning_rate": 8.26984787726991e-07,
      "loss": 0.124,
      "step": 8104
    },
    {
      "epoch": 0.746763716773391,
      "grad_norm": 0.9738610570783551,
      "learning_rate": 8.264188021669483e-07,
      "loss": 0.1287,
      "step": 8105
    },
    {
      "epoch": 0.746855852950661,
      "grad_norm": 0.8942501970326633,
      "learning_rate": 8.258529719990996e-07,
      "loss": 0.1213,
      "step": 8106
    },
    {
      "epoch": 0.746947989127931,
      "grad_norm": 1.0538439434415687,
      "learning_rate": 8.252872972759826e-07,
      "loss": 0.1349,
      "step": 8107
    },
    {
      "epoch": 0.7470401253052011,
      "grad_norm": 0.9313257811750428,
      "learning_rate": 8.24721778050121e-07,
      "loss": 0.1182,
      "step": 8108
    },
    {
      "epoch": 0.7471322614824711,
      "grad_norm": 0.9095931652756736,
      "learning_rate": 8.241564143740216e-07,
      "loss": 0.1245,
      "step": 8109
    },
    {
      "epoch": 0.7472243976597411,
      "grad_norm": 0.9127789000547557,
      "learning_rate": 8.235912063001805e-07,
      "loss": 0.1196,
      "step": 8110
    },
    {
      "epoch": 0.7473165338370111,
      "grad_norm": 0.9008185338290264,
      "learning_rate": 8.230261538810755e-07,
      "loss": 0.1161,
      "step": 8111
    },
    {
      "epoch": 0.7474086700142811,
      "grad_norm": 0.9802778555969498,
      "learning_rate": 8.224612571691734e-07,
      "loss": 0.1207,
      "step": 8112
    },
    {
      "epoch": 0.7475008061915511,
      "grad_norm": 0.9268639919324714,
      "learning_rate": 8.218965162169232e-07,
      "loss": 0.1223,
      "step": 8113
    },
    {
      "epoch": 0.7475929423688211,
      "grad_norm": 0.9181135011446372,
      "learning_rate": 8.21331931076762e-07,
      "loss": 0.1238,
      "step": 8114
    },
    {
      "epoch": 0.7476850785460911,
      "grad_norm": 0.9651031360768163,
      "learning_rate": 8.207675018011127e-07,
      "loss": 0.1277,
      "step": 8115
    },
    {
      "epoch": 0.7477772147233611,
      "grad_norm": 0.9007347893819367,
      "learning_rate": 8.202032284423817e-07,
      "loss": 0.1207,
      "step": 8116
    },
    {
      "epoch": 0.7478693509006311,
      "grad_norm": 0.9716022381465899,
      "learning_rate": 8.196391110529606e-07,
      "loss": 0.1241,
      "step": 8117
    },
    {
      "epoch": 0.7479614870779011,
      "grad_norm": 0.8951591662409739,
      "learning_rate": 8.19075149685229e-07,
      "loss": 0.1097,
      "step": 8118
    },
    {
      "epoch": 0.7480536232551711,
      "grad_norm": 0.9058294291306436,
      "learning_rate": 8.185113443915504e-07,
      "loss": 0.1178,
      "step": 8119
    },
    {
      "epoch": 0.7481457594324411,
      "grad_norm": 0.9752842361448212,
      "learning_rate": 8.179476952242757e-07,
      "loss": 0.1184,
      "step": 8120
    },
    {
      "epoch": 0.7482378956097111,
      "grad_norm": 0.9086052019177948,
      "learning_rate": 8.173842022357381e-07,
      "loss": 0.1134,
      "step": 8121
    },
    {
      "epoch": 0.7483300317869812,
      "grad_norm": 0.9355291015960355,
      "learning_rate": 8.168208654782578e-07,
      "loss": 0.1188,
      "step": 8122
    },
    {
      "epoch": 0.7484221679642512,
      "grad_norm": 0.9463130206534003,
      "learning_rate": 8.162576850041415e-07,
      "loss": 0.1269,
      "step": 8123
    },
    {
      "epoch": 0.7485143041415212,
      "grad_norm": 0.9097943913503486,
      "learning_rate": 8.156946608656799e-07,
      "loss": 0.1197,
      "step": 8124
    },
    {
      "epoch": 0.7486064403187912,
      "grad_norm": 0.9261635841305669,
      "learning_rate": 8.151317931151514e-07,
      "loss": 0.1065,
      "step": 8125
    },
    {
      "epoch": 0.7486985764960612,
      "grad_norm": 0.9384266586122866,
      "learning_rate": 8.145690818048171e-07,
      "loss": 0.1379,
      "step": 8126
    },
    {
      "epoch": 0.7487907126733312,
      "grad_norm": 0.996044063579827,
      "learning_rate": 8.140065269869244e-07,
      "loss": 0.1245,
      "step": 8127
    },
    {
      "epoch": 0.7488828488506012,
      "grad_norm": 0.9224711715286494,
      "learning_rate": 8.134441287137068e-07,
      "loss": 0.1114,
      "step": 8128
    },
    {
      "epoch": 0.7489749850278712,
      "grad_norm": 0.9220396484994093,
      "learning_rate": 8.128818870373845e-07,
      "loss": 0.1084,
      "step": 8129
    },
    {
      "epoch": 0.7490671212051412,
      "grad_norm": 0.9545910800584627,
      "learning_rate": 8.123198020101594e-07,
      "loss": 0.1278,
      "step": 8130
    },
    {
      "epoch": 0.7491592573824112,
      "grad_norm": 0.9261706948520899,
      "learning_rate": 8.117578736842232e-07,
      "loss": 0.1276,
      "step": 8131
    },
    {
      "epoch": 0.7492513935596812,
      "grad_norm": 0.9033268659393892,
      "learning_rate": 8.111961021117496e-07,
      "loss": 0.12,
      "step": 8132
    },
    {
      "epoch": 0.7493435297369512,
      "grad_norm": 0.9488735125394763,
      "learning_rate": 8.106344873449001e-07,
      "loss": 0.1216,
      "step": 8133
    },
    {
      "epoch": 0.7494356659142212,
      "grad_norm": 0.9861922120436211,
      "learning_rate": 8.100730294358197e-07,
      "loss": 0.1268,
      "step": 8134
    },
    {
      "epoch": 0.7495278020914912,
      "grad_norm": 0.9421639498933942,
      "learning_rate": 8.095117284366405e-07,
      "loss": 0.1239,
      "step": 8135
    },
    {
      "epoch": 0.7496199382687613,
      "grad_norm": 0.9059836146120238,
      "learning_rate": 8.089505843994797e-07,
      "loss": 0.1165,
      "step": 8136
    },
    {
      "epoch": 0.7497120744460313,
      "grad_norm": 0.8729933498186071,
      "learning_rate": 8.083895973764394e-07,
      "loss": 0.1101,
      "step": 8137
    },
    {
      "epoch": 0.7498042106233013,
      "grad_norm": 0.9354667026261895,
      "learning_rate": 8.078287674196061e-07,
      "loss": 0.1259,
      "step": 8138
    },
    {
      "epoch": 0.7498963468005713,
      "grad_norm": 0.9393954330086781,
      "learning_rate": 8.072680945810538e-07,
      "loss": 0.1264,
      "step": 8139
    },
    {
      "epoch": 0.7499884829778413,
      "grad_norm": 0.982062774545163,
      "learning_rate": 8.067075789128412e-07,
      "loss": 0.1351,
      "step": 8140
    },
    {
      "epoch": 0.7500806191551113,
      "grad_norm": 0.8870732838171413,
      "learning_rate": 8.061472204670129e-07,
      "loss": 0.1151,
      "step": 8141
    },
    {
      "epoch": 0.7501727553323813,
      "grad_norm": 0.9705648123376437,
      "learning_rate": 8.055870192955975e-07,
      "loss": 0.1183,
      "step": 8142
    },
    {
      "epoch": 0.7502648915096513,
      "grad_norm": 0.891933701352055,
      "learning_rate": 8.050269754506091e-07,
      "loss": 0.1179,
      "step": 8143
    },
    {
      "epoch": 0.7503570276869213,
      "grad_norm": 0.974472806167114,
      "learning_rate": 8.044670889840481e-07,
      "loss": 0.1281,
      "step": 8144
    },
    {
      "epoch": 0.7504491638641912,
      "grad_norm": 0.9393965328736213,
      "learning_rate": 8.039073599479014e-07,
      "loss": 0.1243,
      "step": 8145
    },
    {
      "epoch": 0.7505413000414612,
      "grad_norm": 0.9261251014677941,
      "learning_rate": 8.033477883941379e-07,
      "loss": 0.1229,
      "step": 8146
    },
    {
      "epoch": 0.7506334362187312,
      "grad_norm": 0.9257164254893021,
      "learning_rate": 8.027883743747156e-07,
      "loss": 0.1275,
      "step": 8147
    },
    {
      "epoch": 0.7507255723960012,
      "grad_norm": 0.8801199783379451,
      "learning_rate": 8.022291179415747e-07,
      "loss": 0.1158,
      "step": 8148
    },
    {
      "epoch": 0.7508177085732713,
      "grad_norm": 0.9106903812355431,
      "learning_rate": 8.016700191466431e-07,
      "loss": 0.1265,
      "step": 8149
    },
    {
      "epoch": 0.7509098447505413,
      "grad_norm": 0.9141324302012125,
      "learning_rate": 8.011110780418335e-07,
      "loss": 0.1172,
      "step": 8150
    },
    {
      "epoch": 0.7510019809278113,
      "grad_norm": 0.9587514921406114,
      "learning_rate": 8.005522946790426e-07,
      "loss": 0.1294,
      "step": 8151
    },
    {
      "epoch": 0.7510941171050813,
      "grad_norm": 0.9030493525414592,
      "learning_rate": 7.999936691101545e-07,
      "loss": 0.1256,
      "step": 8152
    },
    {
      "epoch": 0.7511862532823513,
      "grad_norm": 0.9972586634465126,
      "learning_rate": 7.994352013870366e-07,
      "loss": 0.1276,
      "step": 8153
    },
    {
      "epoch": 0.7512783894596213,
      "grad_norm": 0.8537576474310897,
      "learning_rate": 7.988768915615441e-07,
      "loss": 0.1093,
      "step": 8154
    },
    {
      "epoch": 0.7513705256368913,
      "grad_norm": 0.9041807136029697,
      "learning_rate": 7.983187396855144e-07,
      "loss": 0.1131,
      "step": 8155
    },
    {
      "epoch": 0.7514626618141613,
      "grad_norm": 0.8912359097150397,
      "learning_rate": 7.977607458107731e-07,
      "loss": 0.1218,
      "step": 8156
    },
    {
      "epoch": 0.7515547979914313,
      "grad_norm": 0.921588521542897,
      "learning_rate": 7.972029099891293e-07,
      "loss": 0.1189,
      "step": 8157
    },
    {
      "epoch": 0.7516469341687013,
      "grad_norm": 0.931955611648895,
      "learning_rate": 7.966452322723806e-07,
      "loss": 0.1142,
      "step": 8158
    },
    {
      "epoch": 0.7517390703459713,
      "grad_norm": 0.9486463061139534,
      "learning_rate": 7.960877127123038e-07,
      "loss": 0.1199,
      "step": 8159
    },
    {
      "epoch": 0.7518312065232413,
      "grad_norm": 0.930525832367596,
      "learning_rate": 7.955303513606657e-07,
      "loss": 0.1117,
      "step": 8160
    },
    {
      "epoch": 0.7519233427005113,
      "grad_norm": 0.9429285315072875,
      "learning_rate": 7.949731482692185e-07,
      "loss": 0.1276,
      "step": 8161
    },
    {
      "epoch": 0.7520154788777813,
      "grad_norm": 0.9014224398876224,
      "learning_rate": 7.944161034896986e-07,
      "loss": 0.1145,
      "step": 8162
    },
    {
      "epoch": 0.7521076150550514,
      "grad_norm": 0.9373335528043448,
      "learning_rate": 7.93859217073826e-07,
      "loss": 0.1261,
      "step": 8163
    },
    {
      "epoch": 0.7521997512323214,
      "grad_norm": 0.9031086836900423,
      "learning_rate": 7.933024890733099e-07,
      "loss": 0.1186,
      "step": 8164
    },
    {
      "epoch": 0.7522918874095914,
      "grad_norm": 0.9747283781206498,
      "learning_rate": 7.927459195398404e-07,
      "loss": 0.1238,
      "step": 8165
    },
    {
      "epoch": 0.7523840235868614,
      "grad_norm": 1.0216403307734223,
      "learning_rate": 7.921895085250967e-07,
      "loss": 0.1281,
      "step": 8166
    },
    {
      "epoch": 0.7524761597641314,
      "grad_norm": 0.9079435690483035,
      "learning_rate": 7.916332560807402e-07,
      "loss": 0.1115,
      "step": 8167
    },
    {
      "epoch": 0.7525682959414014,
      "grad_norm": 0.9732939639285801,
      "learning_rate": 7.910771622584199e-07,
      "loss": 0.1172,
      "step": 8168
    },
    {
      "epoch": 0.7526604321186714,
      "grad_norm": 0.9380322737134049,
      "learning_rate": 7.905212271097696e-07,
      "loss": 0.1144,
      "step": 8169
    },
    {
      "epoch": 0.7527525682959414,
      "grad_norm": 0.902154398830762,
      "learning_rate": 7.899654506864074e-07,
      "loss": 0.1203,
      "step": 8170
    },
    {
      "epoch": 0.7528447044732114,
      "grad_norm": 0.9161360802401297,
      "learning_rate": 7.894098330399363e-07,
      "loss": 0.1143,
      "step": 8171
    },
    {
      "epoch": 0.7529368406504814,
      "grad_norm": 0.9832148574859536,
      "learning_rate": 7.888543742219462e-07,
      "loss": 0.1297,
      "step": 8172
    },
    {
      "epoch": 0.7530289768277514,
      "grad_norm": 0.9578861811755155,
      "learning_rate": 7.882990742840119e-07,
      "loss": 0.1205,
      "step": 8173
    },
    {
      "epoch": 0.7531211130050214,
      "grad_norm": 0.9560100009959844,
      "learning_rate": 7.877439332776934e-07,
      "loss": 0.1246,
      "step": 8174
    },
    {
      "epoch": 0.7532132491822914,
      "grad_norm": 0.8931628999965731,
      "learning_rate": 7.87188951254535e-07,
      "loss": 0.1078,
      "step": 8175
    },
    {
      "epoch": 0.7533053853595615,
      "grad_norm": 0.9894713605728191,
      "learning_rate": 7.866341282660661e-07,
      "loss": 0.1246,
      "step": 8176
    },
    {
      "epoch": 0.7533975215368315,
      "grad_norm": 0.940458859644715,
      "learning_rate": 7.860794643638026e-07,
      "loss": 0.1194,
      "step": 8177
    },
    {
      "epoch": 0.7534896577141015,
      "grad_norm": 0.919068557096056,
      "learning_rate": 7.855249595992454e-07,
      "loss": 0.1274,
      "step": 8178
    },
    {
      "epoch": 0.7535817938913715,
      "grad_norm": 0.8730537807997693,
      "learning_rate": 7.849706140238808e-07,
      "loss": 0.1141,
      "step": 8179
    },
    {
      "epoch": 0.7536739300686415,
      "grad_norm": 0.8816936498803757,
      "learning_rate": 7.844164276891794e-07,
      "loss": 0.1171,
      "step": 8180
    },
    {
      "epoch": 0.7537660662459115,
      "grad_norm": 0.9362278283930622,
      "learning_rate": 7.838624006465967e-07,
      "loss": 0.124,
      "step": 8181
    },
    {
      "epoch": 0.7538582024231815,
      "grad_norm": 0.9231753539436256,
      "learning_rate": 7.833085329475748e-07,
      "loss": 0.1174,
      "step": 8182
    },
    {
      "epoch": 0.7539503386004515,
      "grad_norm": 0.9753567791774588,
      "learning_rate": 7.82754824643541e-07,
      "loss": 0.1298,
      "step": 8183
    },
    {
      "epoch": 0.7540424747777215,
      "grad_norm": 0.934180096557911,
      "learning_rate": 7.822012757859057e-07,
      "loss": 0.1291,
      "step": 8184
    },
    {
      "epoch": 0.7541346109549915,
      "grad_norm": 0.9257184287763613,
      "learning_rate": 7.816478864260677e-07,
      "loss": 0.1147,
      "step": 8185
    },
    {
      "epoch": 0.7542267471322615,
      "grad_norm": 0.9686431756661043,
      "learning_rate": 7.810946566154076e-07,
      "loss": 0.1255,
      "step": 8186
    },
    {
      "epoch": 0.7543188833095315,
      "grad_norm": 0.9168752484252765,
      "learning_rate": 7.805415864052942e-07,
      "loss": 0.1261,
      "step": 8187
    },
    {
      "epoch": 0.7544110194868014,
      "grad_norm": 0.9408848273284718,
      "learning_rate": 7.79988675847079e-07,
      "loss": 0.123,
      "step": 8188
    },
    {
      "epoch": 0.7545031556640714,
      "grad_norm": 0.9802139431624383,
      "learning_rate": 7.794359249921004e-07,
      "loss": 0.1225,
      "step": 8189
    },
    {
      "epoch": 0.7545952918413416,
      "grad_norm": 0.888342210491898,
      "learning_rate": 7.78883333891682e-07,
      "loss": 0.1172,
      "step": 8190
    },
    {
      "epoch": 0.7546874280186116,
      "grad_norm": 0.9059491622012312,
      "learning_rate": 7.783309025971314e-07,
      "loss": 0.1158,
      "step": 8191
    },
    {
      "epoch": 0.7547795641958815,
      "grad_norm": 0.9274618100083608,
      "learning_rate": 7.777786311597408e-07,
      "loss": 0.1343,
      "step": 8192
    },
    {
      "epoch": 0.7548717003731515,
      "grad_norm": 0.9096886113886077,
      "learning_rate": 7.772265196307896e-07,
      "loss": 0.1216,
      "step": 8193
    },
    {
      "epoch": 0.7549638365504215,
      "grad_norm": 0.9364608277777419,
      "learning_rate": 7.766745680615417e-07,
      "loss": 0.1193,
      "step": 8194
    },
    {
      "epoch": 0.7550559727276915,
      "grad_norm": 0.8856154937055789,
      "learning_rate": 7.761227765032464e-07,
      "loss": 0.1125,
      "step": 8195
    },
    {
      "epoch": 0.7551481089049615,
      "grad_norm": 0.9611275417568343,
      "learning_rate": 7.755711450071365e-07,
      "loss": 0.1198,
      "step": 8196
    },
    {
      "epoch": 0.7552402450822315,
      "grad_norm": 0.9812449711132765,
      "learning_rate": 7.750196736244309e-07,
      "loss": 0.1275,
      "step": 8197
    },
    {
      "epoch": 0.7553323812595015,
      "grad_norm": 0.963089860945346,
      "learning_rate": 7.744683624063343e-07,
      "loss": 0.1175,
      "step": 8198
    },
    {
      "epoch": 0.7554245174367715,
      "grad_norm": 0.9683624391402031,
      "learning_rate": 7.739172114040366e-07,
      "loss": 0.1212,
      "step": 8199
    },
    {
      "epoch": 0.7555166536140415,
      "grad_norm": 0.9774515231953995,
      "learning_rate": 7.733662206687106e-07,
      "loss": 0.1191,
      "step": 8200
    },
    {
      "epoch": 0.7556087897913115,
      "grad_norm": 0.9521027886273846,
      "learning_rate": 7.728153902515181e-07,
      "loss": 0.13,
      "step": 8201
    },
    {
      "epoch": 0.7557009259685815,
      "grad_norm": 1.0001992095848415,
      "learning_rate": 7.722647202036012e-07,
      "loss": 0.1409,
      "step": 8202
    },
    {
      "epoch": 0.7557930621458516,
      "grad_norm": 0.9519675971196492,
      "learning_rate": 7.717142105760922e-07,
      "loss": 0.1164,
      "step": 8203
    },
    {
      "epoch": 0.7558851983231216,
      "grad_norm": 0.9638373797322568,
      "learning_rate": 7.711638614201037e-07,
      "loss": 0.132,
      "step": 8204
    },
    {
      "epoch": 0.7559773345003916,
      "grad_norm": 0.9227872124153469,
      "learning_rate": 7.706136727867366e-07,
      "loss": 0.1185,
      "step": 8205
    },
    {
      "epoch": 0.7560694706776616,
      "grad_norm": 0.9195582592993863,
      "learning_rate": 7.700636447270773e-07,
      "loss": 0.1142,
      "step": 8206
    },
    {
      "epoch": 0.7561616068549316,
      "grad_norm": 0.9565199463656906,
      "learning_rate": 7.695137772921938e-07,
      "loss": 0.125,
      "step": 8207
    },
    {
      "epoch": 0.7562537430322016,
      "grad_norm": 0.9254905477252218,
      "learning_rate": 7.68964070533143e-07,
      "loss": 0.1211,
      "step": 8208
    },
    {
      "epoch": 0.7563458792094716,
      "grad_norm": 1.000115280608466,
      "learning_rate": 7.684145245009639e-07,
      "loss": 0.1332,
      "step": 8209
    },
    {
      "epoch": 0.7564380153867416,
      "grad_norm": 0.9401494439330383,
      "learning_rate": 7.678651392466824e-07,
      "loss": 0.1177,
      "step": 8210
    },
    {
      "epoch": 0.7565301515640116,
      "grad_norm": 0.9091567374279103,
      "learning_rate": 7.6731591482131e-07,
      "loss": 0.1197,
      "step": 8211
    },
    {
      "epoch": 0.7566222877412816,
      "grad_norm": 0.9063596577785816,
      "learning_rate": 7.667668512758414e-07,
      "loss": 0.1129,
      "step": 8212
    },
    {
      "epoch": 0.7567144239185516,
      "grad_norm": 0.9020032849310663,
      "learning_rate": 7.662179486612561e-07,
      "loss": 0.1158,
      "step": 8213
    },
    {
      "epoch": 0.7568065600958216,
      "grad_norm": 0.9373934933001632,
      "learning_rate": 7.656692070285212e-07,
      "loss": 0.127,
      "step": 8214
    },
    {
      "epoch": 0.7568986962730916,
      "grad_norm": 0.9687990581181442,
      "learning_rate": 7.651206264285871e-07,
      "loss": 0.1314,
      "step": 8215
    },
    {
      "epoch": 0.7569908324503616,
      "grad_norm": 0.9003439445100029,
      "learning_rate": 7.645722069123904e-07,
      "loss": 0.118,
      "step": 8216
    },
    {
      "epoch": 0.7570829686276317,
      "grad_norm": 0.9023461782986097,
      "learning_rate": 7.640239485308506e-07,
      "loss": 0.1108,
      "step": 8217
    },
    {
      "epoch": 0.7571751048049017,
      "grad_norm": 0.9496536924462029,
      "learning_rate": 7.634758513348737e-07,
      "loss": 0.1242,
      "step": 8218
    },
    {
      "epoch": 0.7572672409821717,
      "grad_norm": 0.9003447933629831,
      "learning_rate": 7.629279153753508e-07,
      "loss": 0.1134,
      "step": 8219
    },
    {
      "epoch": 0.7573593771594417,
      "grad_norm": 0.9717119656589931,
      "learning_rate": 7.623801407031586e-07,
      "loss": 0.1257,
      "step": 8220
    },
    {
      "epoch": 0.7574515133367117,
      "grad_norm": 0.9319051148074415,
      "learning_rate": 7.618325273691565e-07,
      "loss": 0.1285,
      "step": 8221
    },
    {
      "epoch": 0.7575436495139817,
      "grad_norm": 0.898820556761226,
      "learning_rate": 7.612850754241921e-07,
      "loss": 0.1191,
      "step": 8222
    },
    {
      "epoch": 0.7576357856912517,
      "grad_norm": 0.9545287362385483,
      "learning_rate": 7.607377849190947e-07,
      "loss": 0.129,
      "step": 8223
    },
    {
      "epoch": 0.7577279218685217,
      "grad_norm": 0.9509213799133734,
      "learning_rate": 7.601906559046824e-07,
      "loss": 0.1252,
      "step": 8224
    },
    {
      "epoch": 0.7578200580457917,
      "grad_norm": 0.99532066898558,
      "learning_rate": 7.596436884317537e-07,
      "loss": 0.1418,
      "step": 8225
    },
    {
      "epoch": 0.7579121942230617,
      "grad_norm": 0.9568231647521277,
      "learning_rate": 7.590968825510958e-07,
      "loss": 0.1316,
      "step": 8226
    },
    {
      "epoch": 0.7580043304003317,
      "grad_norm": 0.9676381668975514,
      "learning_rate": 7.585502383134807e-07,
      "loss": 0.1246,
      "step": 8227
    },
    {
      "epoch": 0.7580964665776017,
      "grad_norm": 0.9686951983539297,
      "learning_rate": 7.580037557696634e-07,
      "loss": 0.1281,
      "step": 8228
    },
    {
      "epoch": 0.7581886027548717,
      "grad_norm": 0.9575475261171695,
      "learning_rate": 7.574574349703839e-07,
      "loss": 0.1277,
      "step": 8229
    },
    {
      "epoch": 0.7582807389321417,
      "grad_norm": 0.966501487465626,
      "learning_rate": 7.569112759663693e-07,
      "loss": 0.1169,
      "step": 8230
    },
    {
      "epoch": 0.7583728751094118,
      "grad_norm": 0.9650356114616608,
      "learning_rate": 7.5636527880833e-07,
      "loss": 0.1259,
      "step": 8231
    },
    {
      "epoch": 0.7584650112866818,
      "grad_norm": 0.9896181093613594,
      "learning_rate": 7.558194435469634e-07,
      "loss": 0.1325,
      "step": 8232
    },
    {
      "epoch": 0.7585571474639518,
      "grad_norm": 0.9215878238578664,
      "learning_rate": 7.55273770232949e-07,
      "loss": 0.1194,
      "step": 8233
    },
    {
      "epoch": 0.7586492836412217,
      "grad_norm": 0.8898782537530946,
      "learning_rate": 7.547282589169519e-07,
      "loss": 0.1212,
      "step": 8234
    },
    {
      "epoch": 0.7587414198184917,
      "grad_norm": 0.961252642668289,
      "learning_rate": 7.541829096496239e-07,
      "loss": 0.1148,
      "step": 8235
    },
    {
      "epoch": 0.7588335559957617,
      "grad_norm": 0.978911061379541,
      "learning_rate": 7.536377224816008e-07,
      "loss": 0.1273,
      "step": 8236
    },
    {
      "epoch": 0.7589256921730317,
      "grad_norm": 0.9326585979443505,
      "learning_rate": 7.530926974635036e-07,
      "loss": 0.1308,
      "step": 8237
    },
    {
      "epoch": 0.7590178283503017,
      "grad_norm": 0.999088474363938,
      "learning_rate": 7.525478346459369e-07,
      "loss": 0.1287,
      "step": 8238
    },
    {
      "epoch": 0.7591099645275717,
      "grad_norm": 0.9697618917331947,
      "learning_rate": 7.520031340794926e-07,
      "loss": 0.131,
      "step": 8239
    },
    {
      "epoch": 0.7592021007048417,
      "grad_norm": 0.9103230425606077,
      "learning_rate": 7.514585958147444e-07,
      "loss": 0.1231,
      "step": 8240
    },
    {
      "epoch": 0.7592942368821117,
      "grad_norm": 0.9963197904910523,
      "learning_rate": 7.509142199022545e-07,
      "loss": 0.1251,
      "step": 8241
    },
    {
      "epoch": 0.7593863730593817,
      "grad_norm": 0.9044203026817673,
      "learning_rate": 7.50370006392567e-07,
      "loss": 0.1167,
      "step": 8242
    },
    {
      "epoch": 0.7594785092366517,
      "grad_norm": 0.9102232962101179,
      "learning_rate": 7.498259553362128e-07,
      "loss": 0.1197,
      "step": 8243
    },
    {
      "epoch": 0.7595706454139218,
      "grad_norm": 0.9331151108211787,
      "learning_rate": 7.492820667837075e-07,
      "loss": 0.1209,
      "step": 8244
    },
    {
      "epoch": 0.7596627815911918,
      "grad_norm": 0.9581217043945808,
      "learning_rate": 7.487383407855508e-07,
      "loss": 0.1313,
      "step": 8245
    },
    {
      "epoch": 0.7597549177684618,
      "grad_norm": 0.859473471190459,
      "learning_rate": 7.481947773922269e-07,
      "loss": 0.1056,
      "step": 8246
    },
    {
      "epoch": 0.7598470539457318,
      "grad_norm": 0.906385166241462,
      "learning_rate": 7.476513766542065e-07,
      "loss": 0.1188,
      "step": 8247
    },
    {
      "epoch": 0.7599391901230018,
      "grad_norm": 0.9363292229597777,
      "learning_rate": 7.471081386219442e-07,
      "loss": 0.1308,
      "step": 8248
    },
    {
      "epoch": 0.7600313263002718,
      "grad_norm": 0.9867804550389615,
      "learning_rate": 7.465650633458807e-07,
      "loss": 0.139,
      "step": 8249
    },
    {
      "epoch": 0.7601234624775418,
      "grad_norm": 0.9925921139578151,
      "learning_rate": 7.460221508764398e-07,
      "loss": 0.124,
      "step": 8250
    },
    {
      "epoch": 0.7602155986548118,
      "grad_norm": 0.9089636015164811,
      "learning_rate": 7.454794012640301e-07,
      "loss": 0.1086,
      "step": 8251
    },
    {
      "epoch": 0.7603077348320818,
      "grad_norm": 0.8845980297368551,
      "learning_rate": 7.449368145590469e-07,
      "loss": 0.1212,
      "step": 8252
    },
    {
      "epoch": 0.7603998710093518,
      "grad_norm": 0.9422456456596634,
      "learning_rate": 7.443943908118703e-07,
      "loss": 0.1256,
      "step": 8253
    },
    {
      "epoch": 0.7604920071866218,
      "grad_norm": 0.9566109791919529,
      "learning_rate": 7.438521300728624e-07,
      "loss": 0.1354,
      "step": 8254
    },
    {
      "epoch": 0.7605841433638918,
      "grad_norm": 0.9351359065136909,
      "learning_rate": 7.433100323923742e-07,
      "loss": 0.1264,
      "step": 8255
    },
    {
      "epoch": 0.7606762795411618,
      "grad_norm": 0.9693764918733778,
      "learning_rate": 7.427680978207378e-07,
      "loss": 0.1195,
      "step": 8256
    },
    {
      "epoch": 0.7607684157184318,
      "grad_norm": 0.903810912945681,
      "learning_rate": 7.422263264082732e-07,
      "loss": 0.1026,
      "step": 8257
    },
    {
      "epoch": 0.7608605518957019,
      "grad_norm": 0.9003919306304144,
      "learning_rate": 7.416847182052825e-07,
      "loss": 0.1048,
      "step": 8258
    },
    {
      "epoch": 0.7609526880729719,
      "grad_norm": 0.9647571730317374,
      "learning_rate": 7.411432732620552e-07,
      "loss": 0.121,
      "step": 8259
    },
    {
      "epoch": 0.7610448242502419,
      "grad_norm": 0.9315295549134404,
      "learning_rate": 7.406019916288651e-07,
      "loss": 0.1267,
      "step": 8260
    },
    {
      "epoch": 0.7611369604275119,
      "grad_norm": 0.8841359303961115,
      "learning_rate": 7.400608733559692e-07,
      "loss": 0.1151,
      "step": 8261
    },
    {
      "epoch": 0.7612290966047819,
      "grad_norm": 0.9333795268076007,
      "learning_rate": 7.395199184936099e-07,
      "loss": 0.1235,
      "step": 8262
    },
    {
      "epoch": 0.7613212327820519,
      "grad_norm": 1.0032582232476528,
      "learning_rate": 7.389791270920158e-07,
      "loss": 0.1182,
      "step": 8263
    },
    {
      "epoch": 0.7614133689593219,
      "grad_norm": 1.008498522600006,
      "learning_rate": 7.38438499201399e-07,
      "loss": 0.1416,
      "step": 8264
    },
    {
      "epoch": 0.7615055051365919,
      "grad_norm": 0.9695756514176418,
      "learning_rate": 7.378980348719581e-07,
      "loss": 0.1188,
      "step": 8265
    },
    {
      "epoch": 0.7615976413138619,
      "grad_norm": 0.9189796715543543,
      "learning_rate": 7.373577341538742e-07,
      "loss": 0.1153,
      "step": 8266
    },
    {
      "epoch": 0.7616897774911319,
      "grad_norm": 0.9377850605107226,
      "learning_rate": 7.368175970973138e-07,
      "loss": 0.11,
      "step": 8267
    },
    {
      "epoch": 0.7617819136684019,
      "grad_norm": 0.9201398168024537,
      "learning_rate": 7.362776237524291e-07,
      "loss": 0.1197,
      "step": 8268
    },
    {
      "epoch": 0.7618740498456719,
      "grad_norm": 1.072128024202621,
      "learning_rate": 7.357378141693569e-07,
      "loss": 0.1197,
      "step": 8269
    },
    {
      "epoch": 0.7619661860229419,
      "grad_norm": 0.9460766863322817,
      "learning_rate": 7.351981683982193e-07,
      "loss": 0.1239,
      "step": 8270
    },
    {
      "epoch": 0.762058322200212,
      "grad_norm": 0.9692369089181001,
      "learning_rate": 7.346586864891217e-07,
      "loss": 0.127,
      "step": 8271
    },
    {
      "epoch": 0.762150458377482,
      "grad_norm": 0.8880843044237137,
      "learning_rate": 7.341193684921541e-07,
      "loss": 0.1118,
      "step": 8272
    },
    {
      "epoch": 0.762242594554752,
      "grad_norm": 0.926515471093357,
      "learning_rate": 7.335802144573933e-07,
      "loss": 0.1177,
      "step": 8273
    },
    {
      "epoch": 0.762334730732022,
      "grad_norm": 0.9482733992199582,
      "learning_rate": 7.330412244349005e-07,
      "loss": 0.1163,
      "step": 8274
    },
    {
      "epoch": 0.762426866909292,
      "grad_norm": 0.9582116484025304,
      "learning_rate": 7.325023984747195e-07,
      "loss": 0.1334,
      "step": 8275
    },
    {
      "epoch": 0.762519003086562,
      "grad_norm": 0.9336639018459388,
      "learning_rate": 7.319637366268817e-07,
      "loss": 0.1225,
      "step": 8276
    },
    {
      "epoch": 0.762611139263832,
      "grad_norm": 0.9118007724591288,
      "learning_rate": 7.314252389414003e-07,
      "loss": 0.1131,
      "step": 8277
    },
    {
      "epoch": 0.762703275441102,
      "grad_norm": 0.9695491078378007,
      "learning_rate": 7.308869054682769e-07,
      "loss": 0.1247,
      "step": 8278
    },
    {
      "epoch": 0.7627954116183719,
      "grad_norm": 0.9668242101632564,
      "learning_rate": 7.303487362574938e-07,
      "loss": 0.1261,
      "step": 8279
    },
    {
      "epoch": 0.7628875477956419,
      "grad_norm": 1.0067394909831462,
      "learning_rate": 7.298107313590208e-07,
      "loss": 0.1337,
      "step": 8280
    },
    {
      "epoch": 0.7629796839729119,
      "grad_norm": 0.9437521576960384,
      "learning_rate": 7.292728908228127e-07,
      "loss": 0.1247,
      "step": 8281
    },
    {
      "epoch": 0.7630718201501819,
      "grad_norm": 0.945932213840081,
      "learning_rate": 7.287352146988075e-07,
      "loss": 0.1222,
      "step": 8282
    },
    {
      "epoch": 0.7631639563274519,
      "grad_norm": 0.8918765975327134,
      "learning_rate": 7.281977030369275e-07,
      "loss": 0.1031,
      "step": 8283
    },
    {
      "epoch": 0.7632560925047219,
      "grad_norm": 0.9158648763429635,
      "learning_rate": 7.276603558870812e-07,
      "loss": 0.1208,
      "step": 8284
    },
    {
      "epoch": 0.763348228681992,
      "grad_norm": 0.9292601320925606,
      "learning_rate": 7.271231732991619e-07,
      "loss": 0.1164,
      "step": 8285
    },
    {
      "epoch": 0.763440364859262,
      "grad_norm": 0.8994955400839384,
      "learning_rate": 7.265861553230472e-07,
      "loss": 0.1222,
      "step": 8286
    },
    {
      "epoch": 0.763532501036532,
      "grad_norm": 0.951619859318042,
      "learning_rate": 7.26049302008599e-07,
      "loss": 0.1282,
      "step": 8287
    },
    {
      "epoch": 0.763624637213802,
      "grad_norm": 0.988325789476459,
      "learning_rate": 7.255126134056631e-07,
      "loss": 0.1399,
      "step": 8288
    },
    {
      "epoch": 0.763716773391072,
      "grad_norm": 1.055346800786715,
      "learning_rate": 7.249760895640723e-07,
      "loss": 0.1243,
      "step": 8289
    },
    {
      "epoch": 0.763808909568342,
      "grad_norm": 0.9044247369547711,
      "learning_rate": 7.244397305336423e-07,
      "loss": 0.1155,
      "step": 8290
    },
    {
      "epoch": 0.763901045745612,
      "grad_norm": 0.9472862151962139,
      "learning_rate": 7.239035363641752e-07,
      "loss": 0.1219,
      "step": 8291
    },
    {
      "epoch": 0.763993181922882,
      "grad_norm": 0.9502049407566469,
      "learning_rate": 7.233675071054564e-07,
      "loss": 0.1287,
      "step": 8292
    },
    {
      "epoch": 0.764085318100152,
      "grad_norm": 0.9285316003020824,
      "learning_rate": 7.228316428072546e-07,
      "loss": 0.1236,
      "step": 8293
    },
    {
      "epoch": 0.764177454277422,
      "grad_norm": 0.9390689107258569,
      "learning_rate": 7.222959435193258e-07,
      "loss": 0.1158,
      "step": 8294
    },
    {
      "epoch": 0.764269590454692,
      "grad_norm": 0.95789176362226,
      "learning_rate": 7.21760409291411e-07,
      "loss": 0.1343,
      "step": 8295
    },
    {
      "epoch": 0.764361726631962,
      "grad_norm": 0.9941924628392527,
      "learning_rate": 7.212250401732329e-07,
      "loss": 0.127,
      "step": 8296
    },
    {
      "epoch": 0.764453862809232,
      "grad_norm": 0.9252169197213751,
      "learning_rate": 7.206898362145021e-07,
      "loss": 0.1196,
      "step": 8297
    },
    {
      "epoch": 0.764545998986502,
      "grad_norm": 0.8762698700489192,
      "learning_rate": 7.201547974649104e-07,
      "loss": 0.1122,
      "step": 8298
    },
    {
      "epoch": 0.7646381351637721,
      "grad_norm": 0.9739201895395896,
      "learning_rate": 7.196199239741383e-07,
      "loss": 0.1236,
      "step": 8299
    },
    {
      "epoch": 0.7647302713410421,
      "grad_norm": 0.9127234805633828,
      "learning_rate": 7.190852157918468e-07,
      "loss": 0.1167,
      "step": 8300
    },
    {
      "epoch": 0.7648224075183121,
      "grad_norm": 0.9358931953978479,
      "learning_rate": 7.185506729676849e-07,
      "loss": 0.1136,
      "step": 8301
    },
    {
      "epoch": 0.7649145436955821,
      "grad_norm": 0.9726783433144469,
      "learning_rate": 7.180162955512856e-07,
      "loss": 0.1223,
      "step": 8302
    },
    {
      "epoch": 0.7650066798728521,
      "grad_norm": 0.9370684221290889,
      "learning_rate": 7.174820835922649e-07,
      "loss": 0.1136,
      "step": 8303
    },
    {
      "epoch": 0.7650988160501221,
      "grad_norm": 0.8790475206195711,
      "learning_rate": 7.16948037140224e-07,
      "loss": 0.1164,
      "step": 8304
    },
    {
      "epoch": 0.7651909522273921,
      "grad_norm": 1.0491204399969105,
      "learning_rate": 7.164141562447497e-07,
      "loss": 0.1411,
      "step": 8305
    },
    {
      "epoch": 0.7652830884046621,
      "grad_norm": 0.9571333133358144,
      "learning_rate": 7.158804409554126e-07,
      "loss": 0.1118,
      "step": 8306
    },
    {
      "epoch": 0.7653752245819321,
      "grad_norm": 0.964770395513596,
      "learning_rate": 7.153468913217695e-07,
      "loss": 0.1277,
      "step": 8307
    },
    {
      "epoch": 0.7654673607592021,
      "grad_norm": 0.9070299902224167,
      "learning_rate": 7.148135073933599e-07,
      "loss": 0.1164,
      "step": 8308
    },
    {
      "epoch": 0.7655594969364721,
      "grad_norm": 0.941404523811673,
      "learning_rate": 7.142802892197071e-07,
      "loss": 0.1253,
      "step": 8309
    },
    {
      "epoch": 0.7656516331137421,
      "grad_norm": 0.9066252176601356,
      "learning_rate": 7.137472368503217e-07,
      "loss": 0.1269,
      "step": 8310
    },
    {
      "epoch": 0.7657437692910121,
      "grad_norm": 0.9096259039649669,
      "learning_rate": 7.132143503346986e-07,
      "loss": 0.1159,
      "step": 8311
    },
    {
      "epoch": 0.7658359054682822,
      "grad_norm": 0.9385880965115956,
      "learning_rate": 7.126816297223147e-07,
      "loss": 0.1301,
      "step": 8312
    },
    {
      "epoch": 0.7659280416455522,
      "grad_norm": 0.9567068424142435,
      "learning_rate": 7.121490750626342e-07,
      "loss": 0.1231,
      "step": 8313
    },
    {
      "epoch": 0.7660201778228222,
      "grad_norm": 0.9173460135425626,
      "learning_rate": 7.116166864051038e-07,
      "loss": 0.1137,
      "step": 8314
    },
    {
      "epoch": 0.7661123140000922,
      "grad_norm": 0.9294880243189472,
      "learning_rate": 7.110844637991574e-07,
      "loss": 0.1232,
      "step": 8315
    },
    {
      "epoch": 0.7662044501773622,
      "grad_norm": 0.9152657770262788,
      "learning_rate": 7.105524072942105e-07,
      "loss": 0.1164,
      "step": 8316
    },
    {
      "epoch": 0.7662965863546322,
      "grad_norm": 0.9396243490752815,
      "learning_rate": 7.100205169396649e-07,
      "loss": 0.1227,
      "step": 8317
    },
    {
      "epoch": 0.7663887225319022,
      "grad_norm": 0.9206804869509456,
      "learning_rate": 7.09488792784907e-07,
      "loss": 0.1218,
      "step": 8318
    },
    {
      "epoch": 0.7664808587091722,
      "grad_norm": 0.935610637811038,
      "learning_rate": 7.089572348793081e-07,
      "loss": 0.1202,
      "step": 8319
    },
    {
      "epoch": 0.7665729948864421,
      "grad_norm": 0.9489679146040931,
      "learning_rate": 7.084258432722227e-07,
      "loss": 0.1177,
      "step": 8320
    },
    {
      "epoch": 0.7666651310637121,
      "grad_norm": 0.9163551987157174,
      "learning_rate": 7.078946180129898e-07,
      "loss": 0.1164,
      "step": 8321
    },
    {
      "epoch": 0.7667572672409821,
      "grad_norm": 0.8913692547879215,
      "learning_rate": 7.073635591509345e-07,
      "loss": 0.1064,
      "step": 8322
    },
    {
      "epoch": 0.7668494034182521,
      "grad_norm": 0.9434948927728509,
      "learning_rate": 7.068326667353659e-07,
      "loss": 0.1205,
      "step": 8323
    },
    {
      "epoch": 0.7669415395955221,
      "grad_norm": 0.9222390272621265,
      "learning_rate": 7.063019408155777e-07,
      "loss": 0.1213,
      "step": 8324
    },
    {
      "epoch": 0.7670336757727921,
      "grad_norm": 0.9288456312089602,
      "learning_rate": 7.057713814408473e-07,
      "loss": 0.1217,
      "step": 8325
    },
    {
      "epoch": 0.7671258119500622,
      "grad_norm": 0.9694355795017486,
      "learning_rate": 7.052409886604364e-07,
      "loss": 0.1292,
      "step": 8326
    },
    {
      "epoch": 0.7672179481273322,
      "grad_norm": 1.0037949780961066,
      "learning_rate": 7.04710762523593e-07,
      "loss": 0.1229,
      "step": 8327
    },
    {
      "epoch": 0.7673100843046022,
      "grad_norm": 1.0245295525799747,
      "learning_rate": 7.041807030795495e-07,
      "loss": 0.1302,
      "step": 8328
    },
    {
      "epoch": 0.7674022204818722,
      "grad_norm": 0.9915072745998507,
      "learning_rate": 7.036508103775199e-07,
      "loss": 0.1288,
      "step": 8329
    },
    {
      "epoch": 0.7674943566591422,
      "grad_norm": 0.9209649117333428,
      "learning_rate": 7.031210844667066e-07,
      "loss": 0.1177,
      "step": 8330
    },
    {
      "epoch": 0.7675864928364122,
      "grad_norm": 0.9777283812460724,
      "learning_rate": 7.025915253962934e-07,
      "loss": 0.1194,
      "step": 8331
    },
    {
      "epoch": 0.7676786290136822,
      "grad_norm": 0.9028823303836367,
      "learning_rate": 7.020621332154512e-07,
      "loss": 0.123,
      "step": 8332
    },
    {
      "epoch": 0.7677707651909522,
      "grad_norm": 0.8798941197971895,
      "learning_rate": 7.015329079733327e-07,
      "loss": 0.104,
      "step": 8333
    },
    {
      "epoch": 0.7678629013682222,
      "grad_norm": 0.9322675197448133,
      "learning_rate": 7.010038497190774e-07,
      "loss": 0.1159,
      "step": 8334
    },
    {
      "epoch": 0.7679550375454922,
      "grad_norm": 1.0339398169724536,
      "learning_rate": 7.004749585018089e-07,
      "loss": 0.1274,
      "step": 8335
    },
    {
      "epoch": 0.7680471737227622,
      "grad_norm": 0.9907312337014706,
      "learning_rate": 6.999462343706339e-07,
      "loss": 0.1261,
      "step": 8336
    },
    {
      "epoch": 0.7681393099000322,
      "grad_norm": 0.9265113005572841,
      "learning_rate": 6.994176773746445e-07,
      "loss": 0.1127,
      "step": 8337
    },
    {
      "epoch": 0.7682314460773022,
      "grad_norm": 0.8763629187072877,
      "learning_rate": 6.988892875629172e-07,
      "loss": 0.1123,
      "step": 8338
    },
    {
      "epoch": 0.7683235822545723,
      "grad_norm": 0.9473048227249817,
      "learning_rate": 6.983610649845136e-07,
      "loss": 0.1266,
      "step": 8339
    },
    {
      "epoch": 0.7684157184318423,
      "grad_norm": 0.9376358861988066,
      "learning_rate": 6.978330096884794e-07,
      "loss": 0.118,
      "step": 8340
    },
    {
      "epoch": 0.7685078546091123,
      "grad_norm": 0.9698111754881962,
      "learning_rate": 6.973051217238444e-07,
      "loss": 0.1259,
      "step": 8341
    },
    {
      "epoch": 0.7685999907863823,
      "grad_norm": 0.912876262065055,
      "learning_rate": 6.967774011396222e-07,
      "loss": 0.1077,
      "step": 8342
    },
    {
      "epoch": 0.7686921269636523,
      "grad_norm": 0.9663238447952306,
      "learning_rate": 6.962498479848124e-07,
      "loss": 0.1203,
      "step": 8343
    },
    {
      "epoch": 0.7687842631409223,
      "grad_norm": 0.8994434460789928,
      "learning_rate": 6.957224623083989e-07,
      "loss": 0.1223,
      "step": 8344
    },
    {
      "epoch": 0.7688763993181923,
      "grad_norm": 0.8717941859314399,
      "learning_rate": 6.951952441593482e-07,
      "loss": 0.1125,
      "step": 8345
    },
    {
      "epoch": 0.7689685354954623,
      "grad_norm": 0.8812348398963182,
      "learning_rate": 6.946681935866143e-07,
      "loss": 0.1082,
      "step": 8346
    },
    {
      "epoch": 0.7690606716727323,
      "grad_norm": 1.0019169701459543,
      "learning_rate": 6.941413106391321e-07,
      "loss": 0.128,
      "step": 8347
    },
    {
      "epoch": 0.7691528078500023,
      "grad_norm": 0.9169717950582553,
      "learning_rate": 6.936145953658233e-07,
      "loss": 0.1194,
      "step": 8348
    },
    {
      "epoch": 0.7692449440272723,
      "grad_norm": 0.9331417687311467,
      "learning_rate": 6.930880478155946e-07,
      "loss": 0.1198,
      "step": 8349
    },
    {
      "epoch": 0.7693370802045423,
      "grad_norm": 0.8913529397204782,
      "learning_rate": 6.925616680373346e-07,
      "loss": 0.1165,
      "step": 8350
    },
    {
      "epoch": 0.7694292163818123,
      "grad_norm": 0.889454311489956,
      "learning_rate": 6.920354560799189e-07,
      "loss": 0.1176,
      "step": 8351
    },
    {
      "epoch": 0.7695213525590823,
      "grad_norm": 0.9482997599159537,
      "learning_rate": 6.915094119922048e-07,
      "loss": 0.1333,
      "step": 8352
    },
    {
      "epoch": 0.7696134887363524,
      "grad_norm": 0.9522924476603231,
      "learning_rate": 6.909835358230372e-07,
      "loss": 0.1208,
      "step": 8353
    },
    {
      "epoch": 0.7697056249136224,
      "grad_norm": 0.9205945084224799,
      "learning_rate": 6.904578276212423e-07,
      "loss": 0.1298,
      "step": 8354
    },
    {
      "epoch": 0.7697977610908924,
      "grad_norm": 0.9486688534035198,
      "learning_rate": 6.899322874356329e-07,
      "loss": 0.1303,
      "step": 8355
    },
    {
      "epoch": 0.7698898972681624,
      "grad_norm": 0.9265740419588309,
      "learning_rate": 6.89406915315006e-07,
      "loss": 0.1253,
      "step": 8356
    },
    {
      "epoch": 0.7699820334454324,
      "grad_norm": 0.9274708447215646,
      "learning_rate": 6.888817113081419e-07,
      "loss": 0.1262,
      "step": 8357
    },
    {
      "epoch": 0.7700741696227024,
      "grad_norm": 0.9727274919868594,
      "learning_rate": 6.883566754638052e-07,
      "loss": 0.1318,
      "step": 8358
    },
    {
      "epoch": 0.7701663057999724,
      "grad_norm": 0.8822267025867091,
      "learning_rate": 6.878318078307461e-07,
      "loss": 0.1122,
      "step": 8359
    },
    {
      "epoch": 0.7702584419772424,
      "grad_norm": 0.9559628727055969,
      "learning_rate": 6.873071084576985e-07,
      "loss": 0.1169,
      "step": 8360
    },
    {
      "epoch": 0.7703505781545124,
      "grad_norm": 0.9606378148147342,
      "learning_rate": 6.86782577393382e-07,
      "loss": 0.1277,
      "step": 8361
    },
    {
      "epoch": 0.7704427143317824,
      "grad_norm": 0.9531820149960578,
      "learning_rate": 6.862582146864982e-07,
      "loss": 0.1244,
      "step": 8362
    },
    {
      "epoch": 0.7705348505090523,
      "grad_norm": 0.90167924888066,
      "learning_rate": 6.857340203857335e-07,
      "loss": 0.1117,
      "step": 8363
    },
    {
      "epoch": 0.7706269866863223,
      "grad_norm": 0.947891508516917,
      "learning_rate": 6.852099945397603e-07,
      "loss": 0.1195,
      "step": 8364
    },
    {
      "epoch": 0.7707191228635923,
      "grad_norm": 0.9378585554874798,
      "learning_rate": 6.846861371972355e-07,
      "loss": 0.1244,
      "step": 8365
    },
    {
      "epoch": 0.7708112590408623,
      "grad_norm": 0.9705047499227291,
      "learning_rate": 6.841624484067971e-07,
      "loss": 0.1309,
      "step": 8366
    },
    {
      "epoch": 0.7709033952181324,
      "grad_norm": 0.8928153670107007,
      "learning_rate": 6.836389282170716e-07,
      "loss": 0.1127,
      "step": 8367
    },
    {
      "epoch": 0.7709955313954024,
      "grad_norm": 0.9662324933740548,
      "learning_rate": 6.831155766766665e-07,
      "loss": 0.1336,
      "step": 8368
    },
    {
      "epoch": 0.7710876675726724,
      "grad_norm": 0.8815268225850014,
      "learning_rate": 6.825923938341761e-07,
      "loss": 0.108,
      "step": 8369
    },
    {
      "epoch": 0.7711798037499424,
      "grad_norm": 0.9841686059201685,
      "learning_rate": 6.820693797381769e-07,
      "loss": 0.1247,
      "step": 8370
    },
    {
      "epoch": 0.7712719399272124,
      "grad_norm": 0.9536490084886785,
      "learning_rate": 6.815465344372316e-07,
      "loss": 0.1251,
      "step": 8371
    },
    {
      "epoch": 0.7713640761044824,
      "grad_norm": 0.9289453206959042,
      "learning_rate": 6.81023857979887e-07,
      "loss": 0.1124,
      "step": 8372
    },
    {
      "epoch": 0.7714562122817524,
      "grad_norm": 0.9312998796681076,
      "learning_rate": 6.805013504146729e-07,
      "loss": 0.1187,
      "step": 8373
    },
    {
      "epoch": 0.7715483484590224,
      "grad_norm": 0.9395698669875502,
      "learning_rate": 6.799790117901034e-07,
      "loss": 0.1231,
      "step": 8374
    },
    {
      "epoch": 0.7716404846362924,
      "grad_norm": 0.9166696590685488,
      "learning_rate": 6.794568421546785e-07,
      "loss": 0.1228,
      "step": 8375
    },
    {
      "epoch": 0.7717326208135624,
      "grad_norm": 0.9550481131258569,
      "learning_rate": 6.78934841556882e-07,
      "loss": 0.1279,
      "step": 8376
    },
    {
      "epoch": 0.7718247569908324,
      "grad_norm": 0.913707817793788,
      "learning_rate": 6.784130100451819e-07,
      "loss": 0.1173,
      "step": 8377
    },
    {
      "epoch": 0.7719168931681024,
      "grad_norm": 0.9996482206708629,
      "learning_rate": 6.778913476680302e-07,
      "loss": 0.1227,
      "step": 8378
    },
    {
      "epoch": 0.7720090293453724,
      "grad_norm": 0.9288326450663615,
      "learning_rate": 6.773698544738619e-07,
      "loss": 0.1274,
      "step": 8379
    },
    {
      "epoch": 0.7721011655226425,
      "grad_norm": 0.9094294673119427,
      "learning_rate": 6.768485305110989e-07,
      "loss": 0.1158,
      "step": 8380
    },
    {
      "epoch": 0.7721933016999125,
      "grad_norm": 0.9455830954897821,
      "learning_rate": 6.763273758281458e-07,
      "loss": 0.1176,
      "step": 8381
    },
    {
      "epoch": 0.7722854378771825,
      "grad_norm": 0.9650385136922474,
      "learning_rate": 6.758063904733933e-07,
      "loss": 0.1245,
      "step": 8382
    },
    {
      "epoch": 0.7723775740544525,
      "grad_norm": 0.9541341797982005,
      "learning_rate": 6.752855744952136e-07,
      "loss": 0.1195,
      "step": 8383
    },
    {
      "epoch": 0.7724697102317225,
      "grad_norm": 0.9791283036740815,
      "learning_rate": 6.747649279419638e-07,
      "loss": 0.1189,
      "step": 8384
    },
    {
      "epoch": 0.7725618464089925,
      "grad_norm": 0.864555357722854,
      "learning_rate": 6.742444508619869e-07,
      "loss": 0.1061,
      "step": 8385
    },
    {
      "epoch": 0.7726539825862625,
      "grad_norm": 0.9755164798582352,
      "learning_rate": 6.737241433036101e-07,
      "loss": 0.1258,
      "step": 8386
    },
    {
      "epoch": 0.7727461187635325,
      "grad_norm": 0.9382934330514728,
      "learning_rate": 6.732040053151423e-07,
      "loss": 0.1226,
      "step": 8387
    },
    {
      "epoch": 0.7728382549408025,
      "grad_norm": 0.9150629750841084,
      "learning_rate": 6.7268403694488e-07,
      "loss": 0.1173,
      "step": 8388
    },
    {
      "epoch": 0.7729303911180725,
      "grad_norm": 0.9115809997858356,
      "learning_rate": 6.721642382411006e-07,
      "loss": 0.1146,
      "step": 8389
    },
    {
      "epoch": 0.7730225272953425,
      "grad_norm": 0.9670908852073896,
      "learning_rate": 6.716446092520696e-07,
      "loss": 0.1306,
      "step": 8390
    },
    {
      "epoch": 0.7731146634726125,
      "grad_norm": 0.9394456052321907,
      "learning_rate": 6.711251500260322e-07,
      "loss": 0.1175,
      "step": 8391
    },
    {
      "epoch": 0.7732067996498825,
      "grad_norm": 1.0224125514598037,
      "learning_rate": 6.706058606112217e-07,
      "loss": 0.1264,
      "step": 8392
    },
    {
      "epoch": 0.7732989358271525,
      "grad_norm": 0.9136807246539035,
      "learning_rate": 6.700867410558537e-07,
      "loss": 0.1133,
      "step": 8393
    },
    {
      "epoch": 0.7733910720044226,
      "grad_norm": 0.9579652653107388,
      "learning_rate": 6.695677914081303e-07,
      "loss": 0.1211,
      "step": 8394
    },
    {
      "epoch": 0.7734832081816926,
      "grad_norm": 0.8953201784587433,
      "learning_rate": 6.690490117162333e-07,
      "loss": 0.1177,
      "step": 8395
    },
    {
      "epoch": 0.7735753443589626,
      "grad_norm": 0.8947390913684228,
      "learning_rate": 6.68530402028332e-07,
      "loss": 0.1175,
      "step": 8396
    },
    {
      "epoch": 0.7736674805362326,
      "grad_norm": 0.930339743335267,
      "learning_rate": 6.680119623925804e-07,
      "loss": 0.1119,
      "step": 8397
    },
    {
      "epoch": 0.7737596167135026,
      "grad_norm": 0.9691742182397383,
      "learning_rate": 6.67493692857116e-07,
      "loss": 0.1232,
      "step": 8398
    },
    {
      "epoch": 0.7738517528907726,
      "grad_norm": 0.9782640805198125,
      "learning_rate": 6.669755934700586e-07,
      "loss": 0.1178,
      "step": 8399
    },
    {
      "epoch": 0.7739438890680426,
      "grad_norm": 0.946658092447457,
      "learning_rate": 6.664576642795153e-07,
      "loss": 0.1211,
      "step": 8400
    },
    {
      "epoch": 0.7740360252453126,
      "grad_norm": 0.9171463357835422,
      "learning_rate": 6.659399053335747e-07,
      "loss": 0.1158,
      "step": 8401
    },
    {
      "epoch": 0.7741281614225826,
      "grad_norm": 0.9237405035504593,
      "learning_rate": 6.654223166803117e-07,
      "loss": 0.118,
      "step": 8402
    },
    {
      "epoch": 0.7742202975998526,
      "grad_norm": 0.8654426344561063,
      "learning_rate": 6.649048983677834e-07,
      "loss": 0.1139,
      "step": 8403
    },
    {
      "epoch": 0.7743124337771226,
      "grad_norm": 0.9287901506367029,
      "learning_rate": 6.643876504440327e-07,
      "loss": 0.1165,
      "step": 8404
    },
    {
      "epoch": 0.7744045699543926,
      "grad_norm": 0.936571591755619,
      "learning_rate": 6.638705729570871e-07,
      "loss": 0.1256,
      "step": 8405
    },
    {
      "epoch": 0.7744967061316625,
      "grad_norm": 0.9037966720069851,
      "learning_rate": 6.633536659549558e-07,
      "loss": 0.1117,
      "step": 8406
    },
    {
      "epoch": 0.7745888423089327,
      "grad_norm": 0.930053506926587,
      "learning_rate": 6.628369294856349e-07,
      "loss": 0.1152,
      "step": 8407
    },
    {
      "epoch": 0.7746809784862027,
      "grad_norm": 1.0105164231758588,
      "learning_rate": 6.623203635971018e-07,
      "loss": 0.1324,
      "step": 8408
    },
    {
      "epoch": 0.7747731146634727,
      "grad_norm": 0.9280072407677732,
      "learning_rate": 6.618039683373209e-07,
      "loss": 0.1121,
      "step": 8409
    },
    {
      "epoch": 0.7748652508407426,
      "grad_norm": 0.931988772652393,
      "learning_rate": 6.612877437542403e-07,
      "loss": 0.1175,
      "step": 8410
    },
    {
      "epoch": 0.7749573870180126,
      "grad_norm": 0.9055453680389911,
      "learning_rate": 6.607716898957903e-07,
      "loss": 0.1134,
      "step": 8411
    },
    {
      "epoch": 0.7750495231952826,
      "grad_norm": 0.9570405732563926,
      "learning_rate": 6.602558068098864e-07,
      "loss": 0.126,
      "step": 8412
    },
    {
      "epoch": 0.7751416593725526,
      "grad_norm": 0.8901939988992377,
      "learning_rate": 6.597400945444285e-07,
      "loss": 0.1098,
      "step": 8413
    },
    {
      "epoch": 0.7752337955498226,
      "grad_norm": 0.9457230744765637,
      "learning_rate": 6.59224553147301e-07,
      "loss": 0.1268,
      "step": 8414
    },
    {
      "epoch": 0.7753259317270926,
      "grad_norm": 0.9090096740087552,
      "learning_rate": 6.587091826663728e-07,
      "loss": 0.1165,
      "step": 8415
    },
    {
      "epoch": 0.7754180679043626,
      "grad_norm": 0.9268957635480991,
      "learning_rate": 6.581939831494948e-07,
      "loss": 0.1165,
      "step": 8416
    },
    {
      "epoch": 0.7755102040816326,
      "grad_norm": 0.8867965628336227,
      "learning_rate": 6.576789546445031e-07,
      "loss": 0.1091,
      "step": 8417
    },
    {
      "epoch": 0.7756023402589026,
      "grad_norm": 0.9892905172706016,
      "learning_rate": 6.571640971992188e-07,
      "loss": 0.1157,
      "step": 8418
    },
    {
      "epoch": 0.7756944764361726,
      "grad_norm": 0.9380259789018441,
      "learning_rate": 6.566494108614471e-07,
      "loss": 0.1152,
      "step": 8419
    },
    {
      "epoch": 0.7757866126134426,
      "grad_norm": 0.9206879628963276,
      "learning_rate": 6.561348956789751e-07,
      "loss": 0.1111,
      "step": 8420
    },
    {
      "epoch": 0.7758787487907127,
      "grad_norm": 0.9614768658559342,
      "learning_rate": 6.556205516995772e-07,
      "loss": 0.1194,
      "step": 8421
    },
    {
      "epoch": 0.7759708849679827,
      "grad_norm": 1.0010135251033254,
      "learning_rate": 6.551063789710091e-07,
      "loss": 0.1287,
      "step": 8422
    },
    {
      "epoch": 0.7760630211452527,
      "grad_norm": 0.9381981477040692,
      "learning_rate": 6.545923775410129e-07,
      "loss": 0.1111,
      "step": 8423
    },
    {
      "epoch": 0.7761551573225227,
      "grad_norm": 0.991357248153485,
      "learning_rate": 6.540785474573121e-07,
      "loss": 0.1266,
      "step": 8424
    },
    {
      "epoch": 0.7762472934997927,
      "grad_norm": 0.9387943094150902,
      "learning_rate": 6.535648887676171e-07,
      "loss": 0.1191,
      "step": 8425
    },
    {
      "epoch": 0.7763394296770627,
      "grad_norm": 0.9860413451407051,
      "learning_rate": 6.530514015196218e-07,
      "loss": 0.1304,
      "step": 8426
    },
    {
      "epoch": 0.7764315658543327,
      "grad_norm": 0.985186213663649,
      "learning_rate": 6.525380857610022e-07,
      "loss": 0.1286,
      "step": 8427
    },
    {
      "epoch": 0.7765237020316027,
      "grad_norm": 0.9875065104492615,
      "learning_rate": 6.520249415394197e-07,
      "loss": 0.1194,
      "step": 8428
    },
    {
      "epoch": 0.7766158382088727,
      "grad_norm": 0.926501626127916,
      "learning_rate": 6.515119689025201e-07,
      "loss": 0.1156,
      "step": 8429
    },
    {
      "epoch": 0.7767079743861427,
      "grad_norm": 1.0045405628938215,
      "learning_rate": 6.509991678979333e-07,
      "loss": 0.1312,
      "step": 8430
    },
    {
      "epoch": 0.7768001105634127,
      "grad_norm": 0.9431212698157658,
      "learning_rate": 6.504865385732734e-07,
      "loss": 0.1197,
      "step": 8431
    },
    {
      "epoch": 0.7768922467406827,
      "grad_norm": 0.9141056582506759,
      "learning_rate": 6.499740809761373e-07,
      "loss": 0.1198,
      "step": 8432
    },
    {
      "epoch": 0.7769843829179527,
      "grad_norm": 0.9006244298850004,
      "learning_rate": 6.494617951541063e-07,
      "loss": 0.1112,
      "step": 8433
    },
    {
      "epoch": 0.7770765190952227,
      "grad_norm": 1.0513186465071094,
      "learning_rate": 6.489496811547468e-07,
      "loss": 0.1287,
      "step": 8434
    },
    {
      "epoch": 0.7771686552724928,
      "grad_norm": 0.9398083516416804,
      "learning_rate": 6.484377390256086e-07,
      "loss": 0.1223,
      "step": 8435
    },
    {
      "epoch": 0.7772607914497628,
      "grad_norm": 0.8917848864473544,
      "learning_rate": 6.479259688142261e-07,
      "loss": 0.1034,
      "step": 8436
    },
    {
      "epoch": 0.7773529276270328,
      "grad_norm": 0.9237163314143305,
      "learning_rate": 6.474143705681171e-07,
      "loss": 0.113,
      "step": 8437
    },
    {
      "epoch": 0.7774450638043028,
      "grad_norm": 0.8793476209899715,
      "learning_rate": 6.469029443347821e-07,
      "loss": 0.1095,
      "step": 8438
    },
    {
      "epoch": 0.7775371999815728,
      "grad_norm": 0.9637901601407782,
      "learning_rate": 6.463916901617084e-07,
      "loss": 0.1255,
      "step": 8439
    },
    {
      "epoch": 0.7776293361588428,
      "grad_norm": 0.9257448847903722,
      "learning_rate": 6.458806080963664e-07,
      "loss": 0.1217,
      "step": 8440
    },
    {
      "epoch": 0.7777214723361128,
      "grad_norm": 0.973025415024205,
      "learning_rate": 6.453696981862087e-07,
      "loss": 0.119,
      "step": 8441
    },
    {
      "epoch": 0.7778136085133828,
      "grad_norm": 0.9278105107397764,
      "learning_rate": 6.448589604786748e-07,
      "loss": 0.1176,
      "step": 8442
    },
    {
      "epoch": 0.7779057446906528,
      "grad_norm": 0.9748580678259127,
      "learning_rate": 6.443483950211854e-07,
      "loss": 0.1338,
      "step": 8443
    },
    {
      "epoch": 0.7779978808679228,
      "grad_norm": 0.9426796776933819,
      "learning_rate": 6.438380018611481e-07,
      "loss": 0.1161,
      "step": 8444
    },
    {
      "epoch": 0.7780900170451928,
      "grad_norm": 0.9374426050170451,
      "learning_rate": 6.433277810459512e-07,
      "loss": 0.126,
      "step": 8445
    },
    {
      "epoch": 0.7781821532224628,
      "grad_norm": 0.9156606080642972,
      "learning_rate": 6.428177326229698e-07,
      "loss": 0.1196,
      "step": 8446
    },
    {
      "epoch": 0.7782742893997328,
      "grad_norm": 0.9785305397305804,
      "learning_rate": 6.423078566395624e-07,
      "loss": 0.1278,
      "step": 8447
    },
    {
      "epoch": 0.7783664255770029,
      "grad_norm": 0.9472568149749723,
      "learning_rate": 6.417981531430705e-07,
      "loss": 0.1195,
      "step": 8448
    },
    {
      "epoch": 0.7784585617542729,
      "grad_norm": 0.9267730257758037,
      "learning_rate": 6.412886221808193e-07,
      "loss": 0.1217,
      "step": 8449
    },
    {
      "epoch": 0.7785506979315429,
      "grad_norm": 0.9245783333379712,
      "learning_rate": 6.407792638001195e-07,
      "loss": 0.118,
      "step": 8450
    },
    {
      "epoch": 0.7786428341088129,
      "grad_norm": 0.8951265439753134,
      "learning_rate": 6.402700780482651e-07,
      "loss": 0.1143,
      "step": 8451
    },
    {
      "epoch": 0.7787349702860829,
      "grad_norm": 0.9733184947776784,
      "learning_rate": 6.39761064972535e-07,
      "loss": 0.1192,
      "step": 8452
    },
    {
      "epoch": 0.7788271064633528,
      "grad_norm": 0.9653018548665765,
      "learning_rate": 6.392522246201902e-07,
      "loss": 0.1194,
      "step": 8453
    },
    {
      "epoch": 0.7789192426406228,
      "grad_norm": 0.9366809581149949,
      "learning_rate": 6.387435570384759e-07,
      "loss": 0.1185,
      "step": 8454
    },
    {
      "epoch": 0.7790113788178928,
      "grad_norm": 0.9412472670526044,
      "learning_rate": 6.382350622746225e-07,
      "loss": 0.1257,
      "step": 8455
    },
    {
      "epoch": 0.7791035149951628,
      "grad_norm": 0.9377271262047409,
      "learning_rate": 6.377267403758447e-07,
      "loss": 0.1122,
      "step": 8456
    },
    {
      "epoch": 0.7791956511724328,
      "grad_norm": 0.9410613069969594,
      "learning_rate": 6.372185913893389e-07,
      "loss": 0.1252,
      "step": 8457
    },
    {
      "epoch": 0.7792877873497028,
      "grad_norm": 1.0330182142744149,
      "learning_rate": 6.367106153622879e-07,
      "loss": 0.1228,
      "step": 8458
    },
    {
      "epoch": 0.7793799235269728,
      "grad_norm": 0.9656816711177176,
      "learning_rate": 6.362028123418562e-07,
      "loss": 0.1174,
      "step": 8459
    },
    {
      "epoch": 0.7794720597042428,
      "grad_norm": 0.9552540658961965,
      "learning_rate": 6.356951823751947e-07,
      "loss": 0.1094,
      "step": 8460
    },
    {
      "epoch": 0.7795641958815128,
      "grad_norm": 0.8951826862510767,
      "learning_rate": 6.351877255094352e-07,
      "loss": 0.1064,
      "step": 8461
    },
    {
      "epoch": 0.7796563320587829,
      "grad_norm": 0.8842323676196631,
      "learning_rate": 6.346804417916963e-07,
      "loss": 0.1091,
      "step": 8462
    },
    {
      "epoch": 0.7797484682360529,
      "grad_norm": 1.021858302504246,
      "learning_rate": 6.341733312690798e-07,
      "loss": 0.1224,
      "step": 8463
    },
    {
      "epoch": 0.7798406044133229,
      "grad_norm": 0.9373733957984577,
      "learning_rate": 6.336663939886695e-07,
      "loss": 0.1152,
      "step": 8464
    },
    {
      "epoch": 0.7799327405905929,
      "grad_norm": 0.9418487894872865,
      "learning_rate": 6.331596299975362e-07,
      "loss": 0.1287,
      "step": 8465
    },
    {
      "epoch": 0.7800248767678629,
      "grad_norm": 0.9485846360794888,
      "learning_rate": 6.326530393427316e-07,
      "loss": 0.1197,
      "step": 8466
    },
    {
      "epoch": 0.7801170129451329,
      "grad_norm": 0.9556435873882474,
      "learning_rate": 6.321466220712929e-07,
      "loss": 0.1156,
      "step": 8467
    },
    {
      "epoch": 0.7802091491224029,
      "grad_norm": 0.8926722100933544,
      "learning_rate": 6.316403782302416e-07,
      "loss": 0.113,
      "step": 8468
    },
    {
      "epoch": 0.7803012852996729,
      "grad_norm": 0.9739250990079734,
      "learning_rate": 6.311343078665835e-07,
      "loss": 0.1289,
      "step": 8469
    },
    {
      "epoch": 0.7803934214769429,
      "grad_norm": 0.988398143690067,
      "learning_rate": 6.306284110273047e-07,
      "loss": 0.1248,
      "step": 8470
    },
    {
      "epoch": 0.7804855576542129,
      "grad_norm": 0.9728431078034094,
      "learning_rate": 6.301226877593794e-07,
      "loss": 0.1162,
      "step": 8471
    },
    {
      "epoch": 0.7805776938314829,
      "grad_norm": 0.9367164880906897,
      "learning_rate": 6.296171381097635e-07,
      "loss": 0.1259,
      "step": 8472
    },
    {
      "epoch": 0.7806698300087529,
      "grad_norm": 0.914892332720901,
      "learning_rate": 6.291117621253984e-07,
      "loss": 0.1188,
      "step": 8473
    },
    {
      "epoch": 0.7807619661860229,
      "grad_norm": 0.9445604136684088,
      "learning_rate": 6.286065598532065e-07,
      "loss": 0.1215,
      "step": 8474
    },
    {
      "epoch": 0.780854102363293,
      "grad_norm": 0.971002437022487,
      "learning_rate": 6.281015313400981e-07,
      "loss": 0.127,
      "step": 8475
    },
    {
      "epoch": 0.780946238540563,
      "grad_norm": 0.9458980424013934,
      "learning_rate": 6.27596676632963e-07,
      "loss": 0.1233,
      "step": 8476
    },
    {
      "epoch": 0.781038374717833,
      "grad_norm": 0.9575720735157595,
      "learning_rate": 6.270919957786789e-07,
      "loss": 0.1276,
      "step": 8477
    },
    {
      "epoch": 0.781130510895103,
      "grad_norm": 0.9043773140494776,
      "learning_rate": 6.265874888241035e-07,
      "loss": 0.1148,
      "step": 8478
    },
    {
      "epoch": 0.781222647072373,
      "grad_norm": 0.9233761176859706,
      "learning_rate": 6.260831558160818e-07,
      "loss": 0.1167,
      "step": 8479
    },
    {
      "epoch": 0.781314783249643,
      "grad_norm": 0.98013498969769,
      "learning_rate": 6.255789968014411e-07,
      "loss": 0.1319,
      "step": 8480
    },
    {
      "epoch": 0.781406919426913,
      "grad_norm": 0.9404213995261698,
      "learning_rate": 6.250750118269927e-07,
      "loss": 0.1244,
      "step": 8481
    },
    {
      "epoch": 0.781499055604183,
      "grad_norm": 0.9519949447236915,
      "learning_rate": 6.245712009395303e-07,
      "loss": 0.1134,
      "step": 8482
    },
    {
      "epoch": 0.781591191781453,
      "grad_norm": 0.9317461820724098,
      "learning_rate": 6.240675641858335e-07,
      "loss": 0.1192,
      "step": 8483
    },
    {
      "epoch": 0.781683327958723,
      "grad_norm": 0.9129871386527313,
      "learning_rate": 6.235641016126653e-07,
      "loss": 0.1166,
      "step": 8484
    },
    {
      "epoch": 0.781775464135993,
      "grad_norm": 1.048428062985597,
      "learning_rate": 6.230608132667732e-07,
      "loss": 0.1334,
      "step": 8485
    },
    {
      "epoch": 0.781867600313263,
      "grad_norm": 0.9451185634087925,
      "learning_rate": 6.225576991948865e-07,
      "loss": 0.1149,
      "step": 8486
    },
    {
      "epoch": 0.781959736490533,
      "grad_norm": 0.99581861616513,
      "learning_rate": 6.220547594437188e-07,
      "loss": 0.1187,
      "step": 8487
    },
    {
      "epoch": 0.782051872667803,
      "grad_norm": 1.0278512933151998,
      "learning_rate": 6.215519940599687e-07,
      "loss": 0.1317,
      "step": 8488
    },
    {
      "epoch": 0.7821440088450731,
      "grad_norm": 0.9460848053133615,
      "learning_rate": 6.210494030903188e-07,
      "loss": 0.1257,
      "step": 8489
    },
    {
      "epoch": 0.7822361450223431,
      "grad_norm": 0.9789169783690399,
      "learning_rate": 6.205469865814334e-07,
      "loss": 0.1198,
      "step": 8490
    },
    {
      "epoch": 0.7823282811996131,
      "grad_norm": 0.9355875645829241,
      "learning_rate": 6.200447445799631e-07,
      "loss": 0.1239,
      "step": 8491
    },
    {
      "epoch": 0.7824204173768831,
      "grad_norm": 0.931025680256216,
      "learning_rate": 6.195426771325402e-07,
      "loss": 0.1197,
      "step": 8492
    },
    {
      "epoch": 0.7825125535541531,
      "grad_norm": 0.9038272461792805,
      "learning_rate": 6.190407842857818e-07,
      "loss": 0.109,
      "step": 8493
    },
    {
      "epoch": 0.782604689731423,
      "grad_norm": 0.9291283890197121,
      "learning_rate": 6.1853906608629e-07,
      "loss": 0.1144,
      "step": 8494
    },
    {
      "epoch": 0.782696825908693,
      "grad_norm": 0.9313673945332007,
      "learning_rate": 6.180375225806475e-07,
      "loss": 0.1174,
      "step": 8495
    },
    {
      "epoch": 0.782788962085963,
      "grad_norm": 0.8837655167803492,
      "learning_rate": 6.175361538154243e-07,
      "loss": 0.1078,
      "step": 8496
    },
    {
      "epoch": 0.782881098263233,
      "grad_norm": 0.9261810944905562,
      "learning_rate": 6.170349598371711e-07,
      "loss": 0.1135,
      "step": 8497
    },
    {
      "epoch": 0.782973234440503,
      "grad_norm": 0.9705748971066371,
      "learning_rate": 6.165339406924253e-07,
      "loss": 0.1284,
      "step": 8498
    },
    {
      "epoch": 0.783065370617773,
      "grad_norm": 0.931338790555389,
      "learning_rate": 6.16033096427705e-07,
      "loss": 0.1156,
      "step": 8499
    },
    {
      "epoch": 0.783157506795043,
      "grad_norm": 0.9412160383814583,
      "learning_rate": 6.155324270895144e-07,
      "loss": 0.1259,
      "step": 8500
    },
    {
      "epoch": 0.783157506795043,
      "eval_loss": 0.12055304646492004,
      "eval_runtime": 299.0742,
      "eval_samples_per_second": 23.462,
      "eval_steps_per_second": 2.936,
      "step": 8500
    },
    {
      "epoch": 0.783249642972313,
      "grad_norm": 0.9786974177274947,
      "learning_rate": 6.150319327243417e-07,
      "loss": 0.1266,
      "step": 8501
    },
    {
      "epoch": 0.783341779149583,
      "grad_norm": 0.9663666635302712,
      "learning_rate": 6.145316133786569e-07,
      "loss": 0.1236,
      "step": 8502
    },
    {
      "epoch": 0.7834339153268531,
      "grad_norm": 1.0066102428653365,
      "learning_rate": 6.140314690989138e-07,
      "loss": 0.1201,
      "step": 8503
    },
    {
      "epoch": 0.7835260515041231,
      "grad_norm": 0.9032566231861803,
      "learning_rate": 6.135314999315517e-07,
      "loss": 0.1188,
      "step": 8504
    },
    {
      "epoch": 0.7836181876813931,
      "grad_norm": 0.8936724534841473,
      "learning_rate": 6.130317059229932e-07,
      "loss": 0.1112,
      "step": 8505
    },
    {
      "epoch": 0.7837103238586631,
      "grad_norm": 0.8765532770050644,
      "learning_rate": 6.125320871196445e-07,
      "loss": 0.1215,
      "step": 8506
    },
    {
      "epoch": 0.7838024600359331,
      "grad_norm": 0.9812082328646534,
      "learning_rate": 6.120326435678945e-07,
      "loss": 0.1204,
      "step": 8507
    },
    {
      "epoch": 0.7838945962132031,
      "grad_norm": 0.9941206603675594,
      "learning_rate": 6.115333753141159e-07,
      "loss": 0.1267,
      "step": 8508
    },
    {
      "epoch": 0.7839867323904731,
      "grad_norm": 0.9400949014093748,
      "learning_rate": 6.110342824046667e-07,
      "loss": 0.1191,
      "step": 8509
    },
    {
      "epoch": 0.7840788685677431,
      "grad_norm": 0.9885242657468389,
      "learning_rate": 6.105353648858887e-07,
      "loss": 0.134,
      "step": 8510
    },
    {
      "epoch": 0.7841710047450131,
      "grad_norm": 0.9742890666878563,
      "learning_rate": 6.100366228041043e-07,
      "loss": 0.1279,
      "step": 8511
    },
    {
      "epoch": 0.7842631409222831,
      "grad_norm": 0.9855302153041672,
      "learning_rate": 6.095380562056238e-07,
      "loss": 0.128,
      "step": 8512
    },
    {
      "epoch": 0.7843552770995531,
      "grad_norm": 0.9553368441789891,
      "learning_rate": 6.090396651367375e-07,
      "loss": 0.1217,
      "step": 8513
    },
    {
      "epoch": 0.7844474132768231,
      "grad_norm": 0.9679957894036133,
      "learning_rate": 6.085414496437226e-07,
      "loss": 0.1314,
      "step": 8514
    },
    {
      "epoch": 0.7845395494540931,
      "grad_norm": 0.9478895842952598,
      "learning_rate": 6.080434097728368e-07,
      "loss": 0.1304,
      "step": 8515
    },
    {
      "epoch": 0.7846316856313632,
      "grad_norm": 0.8701797009684351,
      "learning_rate": 6.075455455703242e-07,
      "loss": 0.1086,
      "step": 8516
    },
    {
      "epoch": 0.7847238218086332,
      "grad_norm": 0.9906259373704634,
      "learning_rate": 6.070478570824118e-07,
      "loss": 0.1289,
      "step": 8517
    },
    {
      "epoch": 0.7848159579859032,
      "grad_norm": 0.8989965341827344,
      "learning_rate": 6.065503443553097e-07,
      "loss": 0.1077,
      "step": 8518
    },
    {
      "epoch": 0.7849080941631732,
      "grad_norm": 0.959007954608302,
      "learning_rate": 6.060530074352114e-07,
      "loss": 0.1263,
      "step": 8519
    },
    {
      "epoch": 0.7850002303404432,
      "grad_norm": 0.9246171708824706,
      "learning_rate": 6.055558463682948e-07,
      "loss": 0.1178,
      "step": 8520
    },
    {
      "epoch": 0.7850923665177132,
      "grad_norm": 0.9477951948238026,
      "learning_rate": 6.050588612007221e-07,
      "loss": 0.1191,
      "step": 8521
    },
    {
      "epoch": 0.7851845026949832,
      "grad_norm": 0.9658569636554907,
      "learning_rate": 6.045620519786386e-07,
      "loss": 0.1155,
      "step": 8522
    },
    {
      "epoch": 0.7852766388722532,
      "grad_norm": 0.957644890519468,
      "learning_rate": 6.040654187481726e-07,
      "loss": 0.1172,
      "step": 8523
    },
    {
      "epoch": 0.7853687750495232,
      "grad_norm": 0.9855829358535408,
      "learning_rate": 6.035689615554358e-07,
      "loss": 0.1212,
      "step": 8524
    },
    {
      "epoch": 0.7854609112267932,
      "grad_norm": 0.8942975631899651,
      "learning_rate": 6.030726804465251e-07,
      "loss": 0.1116,
      "step": 8525
    },
    {
      "epoch": 0.7855530474040632,
      "grad_norm": 0.9451557449490459,
      "learning_rate": 6.0257657546752e-07,
      "loss": 0.12,
      "step": 8526
    },
    {
      "epoch": 0.7856451835813332,
      "grad_norm": 0.9301666727799931,
      "learning_rate": 6.020806466644849e-07,
      "loss": 0.1243,
      "step": 8527
    },
    {
      "epoch": 0.7857373197586032,
      "grad_norm": 0.9620046325714793,
      "learning_rate": 6.015848940834662e-07,
      "loss": 0.1187,
      "step": 8528
    },
    {
      "epoch": 0.7858294559358732,
      "grad_norm": 0.933481477737301,
      "learning_rate": 6.010893177704935e-07,
      "loss": 0.1183,
      "step": 8529
    },
    {
      "epoch": 0.7859215921131433,
      "grad_norm": 0.9539924108362,
      "learning_rate": 6.00593917771582e-07,
      "loss": 0.128,
      "step": 8530
    },
    {
      "epoch": 0.7860137282904133,
      "grad_norm": 0.9199764287416644,
      "learning_rate": 6.000986941327303e-07,
      "loss": 0.114,
      "step": 8531
    },
    {
      "epoch": 0.7861058644676833,
      "grad_norm": 0.9631603455245744,
      "learning_rate": 5.996036468999187e-07,
      "loss": 0.1209,
      "step": 8532
    },
    {
      "epoch": 0.7861980006449533,
      "grad_norm": 0.946933611017911,
      "learning_rate": 5.991087761191136e-07,
      "loss": 0.1266,
      "step": 8533
    },
    {
      "epoch": 0.7862901368222233,
      "grad_norm": 0.9249986114077867,
      "learning_rate": 5.986140818362626e-07,
      "loss": 0.1131,
      "step": 8534
    },
    {
      "epoch": 0.7863822729994933,
      "grad_norm": 0.9484468777362128,
      "learning_rate": 5.981195640972995e-07,
      "loss": 0.1282,
      "step": 8535
    },
    {
      "epoch": 0.7864744091767633,
      "grad_norm": 0.9371057825811394,
      "learning_rate": 5.976252229481385e-07,
      "loss": 0.124,
      "step": 8536
    },
    {
      "epoch": 0.7865665453540333,
      "grad_norm": 0.9188426598072358,
      "learning_rate": 5.971310584346807e-07,
      "loss": 0.1148,
      "step": 8537
    },
    {
      "epoch": 0.7866586815313032,
      "grad_norm": 0.9073770852124188,
      "learning_rate": 5.966370706028094e-07,
      "loss": 0.1145,
      "step": 8538
    },
    {
      "epoch": 0.7867508177085732,
      "grad_norm": 0.9547670801763313,
      "learning_rate": 5.96143259498391e-07,
      "loss": 0.1266,
      "step": 8539
    },
    {
      "epoch": 0.7868429538858432,
      "grad_norm": 0.9827821775876324,
      "learning_rate": 5.956496251672752e-07,
      "loss": 0.128,
      "step": 8540
    },
    {
      "epoch": 0.7869350900631132,
      "grad_norm": 0.9456692696948865,
      "learning_rate": 5.951561676552966e-07,
      "loss": 0.1215,
      "step": 8541
    },
    {
      "epoch": 0.7870272262403832,
      "grad_norm": 0.9267593713920448,
      "learning_rate": 5.946628870082729e-07,
      "loss": 0.123,
      "step": 8542
    },
    {
      "epoch": 0.7871193624176533,
      "grad_norm": 0.9393627351534874,
      "learning_rate": 5.941697832720058e-07,
      "loss": 0.1325,
      "step": 8543
    },
    {
      "epoch": 0.7872114985949233,
      "grad_norm": 0.9550911434469457,
      "learning_rate": 5.936768564922796e-07,
      "loss": 0.1203,
      "step": 8544
    },
    {
      "epoch": 0.7873036347721933,
      "grad_norm": 0.9004499570914317,
      "learning_rate": 5.931841067148616e-07,
      "loss": 0.1176,
      "step": 8545
    },
    {
      "epoch": 0.7873957709494633,
      "grad_norm": 0.8932941203298196,
      "learning_rate": 5.926915339855044e-07,
      "loss": 0.1213,
      "step": 8546
    },
    {
      "epoch": 0.7874879071267333,
      "grad_norm": 0.8922074120043654,
      "learning_rate": 5.921991383499445e-07,
      "loss": 0.1156,
      "step": 8547
    },
    {
      "epoch": 0.7875800433040033,
      "grad_norm": 0.9050717612224399,
      "learning_rate": 5.917069198538991e-07,
      "loss": 0.1103,
      "step": 8548
    },
    {
      "epoch": 0.7876721794812733,
      "grad_norm": 0.9122458952803807,
      "learning_rate": 5.912148785430713e-07,
      "loss": 0.1023,
      "step": 8549
    },
    {
      "epoch": 0.7877643156585433,
      "grad_norm": 0.9437602990583515,
      "learning_rate": 5.907230144631485e-07,
      "loss": 0.1245,
      "step": 8550
    },
    {
      "epoch": 0.7878564518358133,
      "grad_norm": 0.8797876753243765,
      "learning_rate": 5.902313276597984e-07,
      "loss": 0.1112,
      "step": 8551
    },
    {
      "epoch": 0.7879485880130833,
      "grad_norm": 0.9023470141440695,
      "learning_rate": 5.89739818178676e-07,
      "loss": 0.1145,
      "step": 8552
    },
    {
      "epoch": 0.7880407241903533,
      "grad_norm": 1.0168757196023077,
      "learning_rate": 5.892484860654163e-07,
      "loss": 0.1374,
      "step": 8553
    },
    {
      "epoch": 0.7881328603676233,
      "grad_norm": 0.9206918918167758,
      "learning_rate": 5.8875733136564e-07,
      "loss": 0.1309,
      "step": 8554
    },
    {
      "epoch": 0.7882249965448933,
      "grad_norm": 0.9902963814438155,
      "learning_rate": 5.882663541249523e-07,
      "loss": 0.128,
      "step": 8555
    },
    {
      "epoch": 0.7883171327221633,
      "grad_norm": 0.9183091186455521,
      "learning_rate": 5.877755543889391e-07,
      "loss": 0.1202,
      "step": 8556
    },
    {
      "epoch": 0.7884092688994334,
      "grad_norm": 0.9533822683824176,
      "learning_rate": 5.872849322031706e-07,
      "loss": 0.1213,
      "step": 8557
    },
    {
      "epoch": 0.7885014050767034,
      "grad_norm": 0.9373591429059158,
      "learning_rate": 5.867944876132022e-07,
      "loss": 0.1227,
      "step": 8558
    },
    {
      "epoch": 0.7885935412539734,
      "grad_norm": 0.9103034475496797,
      "learning_rate": 5.863042206645716e-07,
      "loss": 0.1034,
      "step": 8559
    },
    {
      "epoch": 0.7886856774312434,
      "grad_norm": 0.985633430923528,
      "learning_rate": 5.858141314028007e-07,
      "loss": 0.1258,
      "step": 8560
    },
    {
      "epoch": 0.7887778136085134,
      "grad_norm": 0.8959172713941098,
      "learning_rate": 5.853242198733938e-07,
      "loss": 0.1123,
      "step": 8561
    },
    {
      "epoch": 0.7888699497857834,
      "grad_norm": 0.9396584454256341,
      "learning_rate": 5.848344861218383e-07,
      "loss": 0.1252,
      "step": 8562
    },
    {
      "epoch": 0.7889620859630534,
      "grad_norm": 0.9604002869692225,
      "learning_rate": 5.843449301936068e-07,
      "loss": 0.1263,
      "step": 8563
    },
    {
      "epoch": 0.7890542221403234,
      "grad_norm": 0.9298803788187124,
      "learning_rate": 5.838555521341558e-07,
      "loss": 0.1278,
      "step": 8564
    },
    {
      "epoch": 0.7891463583175934,
      "grad_norm": 0.9559648352234692,
      "learning_rate": 5.833663519889218e-07,
      "loss": 0.1244,
      "step": 8565
    },
    {
      "epoch": 0.7892384944948634,
      "grad_norm": 0.8911343469758822,
      "learning_rate": 5.828773298033294e-07,
      "loss": 0.1109,
      "step": 8566
    },
    {
      "epoch": 0.7893306306721334,
      "grad_norm": 0.9454641222726273,
      "learning_rate": 5.823884856227824e-07,
      "loss": 0.1232,
      "step": 8567
    },
    {
      "epoch": 0.7894227668494034,
      "grad_norm": 0.9859866096920007,
      "learning_rate": 5.818998194926714e-07,
      "loss": 0.13,
      "step": 8568
    },
    {
      "epoch": 0.7895149030266734,
      "grad_norm": 0.947179149432619,
      "learning_rate": 5.81411331458368e-07,
      "loss": 0.1206,
      "step": 8569
    },
    {
      "epoch": 0.7896070392039435,
      "grad_norm": 0.9052920558861215,
      "learning_rate": 5.809230215652292e-07,
      "loss": 0.1077,
      "step": 8570
    },
    {
      "epoch": 0.7896991753812135,
      "grad_norm": 0.9195765167110864,
      "learning_rate": 5.804348898585949e-07,
      "loss": 0.1171,
      "step": 8571
    },
    {
      "epoch": 0.7897913115584835,
      "grad_norm": 0.9482984019981444,
      "learning_rate": 5.799469363837876e-07,
      "loss": 0.1248,
      "step": 8572
    },
    {
      "epoch": 0.7898834477357535,
      "grad_norm": 0.9463055742926426,
      "learning_rate": 5.794591611861134e-07,
      "loss": 0.1208,
      "step": 8573
    },
    {
      "epoch": 0.7899755839130235,
      "grad_norm": 0.9494961706911567,
      "learning_rate": 5.789715643108623e-07,
      "loss": 0.1139,
      "step": 8574
    },
    {
      "epoch": 0.7900677200902935,
      "grad_norm": 0.919851610503083,
      "learning_rate": 5.784841458033086e-07,
      "loss": 0.1215,
      "step": 8575
    },
    {
      "epoch": 0.7901598562675635,
      "grad_norm": 0.9245264796890834,
      "learning_rate": 5.779969057087095e-07,
      "loss": 0.1176,
      "step": 8576
    },
    {
      "epoch": 0.7902519924448335,
      "grad_norm": 0.9287192440639785,
      "learning_rate": 5.775098440723042e-07,
      "loss": 0.1222,
      "step": 8577
    },
    {
      "epoch": 0.7903441286221035,
      "grad_norm": 0.9450777250855027,
      "learning_rate": 5.770229609393166e-07,
      "loss": 0.1129,
      "step": 8578
    },
    {
      "epoch": 0.7904362647993735,
      "grad_norm": 0.9492026706943572,
      "learning_rate": 5.765362563549537e-07,
      "loss": 0.1278,
      "step": 8579
    },
    {
      "epoch": 0.7905284009766435,
      "grad_norm": 0.9202309222182878,
      "learning_rate": 5.760497303644063e-07,
      "loss": 0.1103,
      "step": 8580
    },
    {
      "epoch": 0.7906205371539134,
      "grad_norm": 0.9245925883002144,
      "learning_rate": 5.755633830128493e-07,
      "loss": 0.1257,
      "step": 8581
    },
    {
      "epoch": 0.7907126733311834,
      "grad_norm": 0.9517509945193647,
      "learning_rate": 5.750772143454395e-07,
      "loss": 0.1261,
      "step": 8582
    },
    {
      "epoch": 0.7908048095084534,
      "grad_norm": 0.9080767566545325,
      "learning_rate": 5.745912244073166e-07,
      "loss": 0.1067,
      "step": 8583
    },
    {
      "epoch": 0.7908969456857236,
      "grad_norm": 0.9902988389524588,
      "learning_rate": 5.741054132436058e-07,
      "loss": 0.1311,
      "step": 8584
    },
    {
      "epoch": 0.7909890818629935,
      "grad_norm": 0.9161014687025334,
      "learning_rate": 5.736197808994151e-07,
      "loss": 0.1168,
      "step": 8585
    },
    {
      "epoch": 0.7910812180402635,
      "grad_norm": 0.9407576129491564,
      "learning_rate": 5.731343274198348e-07,
      "loss": 0.1181,
      "step": 8586
    },
    {
      "epoch": 0.7911733542175335,
      "grad_norm": 0.9222839395438274,
      "learning_rate": 5.726490528499398e-07,
      "loss": 0.1241,
      "step": 8587
    },
    {
      "epoch": 0.7912654903948035,
      "grad_norm": 0.989642538165895,
      "learning_rate": 5.721639572347873e-07,
      "loss": 0.1298,
      "step": 8588
    },
    {
      "epoch": 0.7913576265720735,
      "grad_norm": 0.910192558868216,
      "learning_rate": 5.716790406194195e-07,
      "loss": 0.1155,
      "step": 8589
    },
    {
      "epoch": 0.7914497627493435,
      "grad_norm": 0.9427897115154177,
      "learning_rate": 5.711943030488595e-07,
      "loss": 0.1208,
      "step": 8590
    },
    {
      "epoch": 0.7915418989266135,
      "grad_norm": 0.9230754902994186,
      "learning_rate": 5.707097445681162e-07,
      "loss": 0.1156,
      "step": 8591
    },
    {
      "epoch": 0.7916340351038835,
      "grad_norm": 0.9736347600748048,
      "learning_rate": 5.702253652221815e-07,
      "loss": 0.1209,
      "step": 8592
    },
    {
      "epoch": 0.7917261712811535,
      "grad_norm": 0.8885640767094255,
      "learning_rate": 5.697411650560292e-07,
      "loss": 0.1093,
      "step": 8593
    },
    {
      "epoch": 0.7918183074584235,
      "grad_norm": 0.9367737786103164,
      "learning_rate": 5.692571441146167e-07,
      "loss": 0.1137,
      "step": 8594
    },
    {
      "epoch": 0.7919104436356935,
      "grad_norm": 0.9616450487450995,
      "learning_rate": 5.68773302442886e-07,
      "loss": 0.1251,
      "step": 8595
    },
    {
      "epoch": 0.7920025798129635,
      "grad_norm": 0.994312643078694,
      "learning_rate": 5.682896400857623e-07,
      "loss": 0.1328,
      "step": 8596
    },
    {
      "epoch": 0.7920947159902335,
      "grad_norm": 0.9133848572401454,
      "learning_rate": 5.678061570881541e-07,
      "loss": 0.1175,
      "step": 8597
    },
    {
      "epoch": 0.7921868521675036,
      "grad_norm": 0.9092316720540026,
      "learning_rate": 5.673228534949521e-07,
      "loss": 0.1126,
      "step": 8598
    },
    {
      "epoch": 0.7922789883447736,
      "grad_norm": 0.9170357165667093,
      "learning_rate": 5.668397293510303e-07,
      "loss": 0.1095,
      "step": 8599
    },
    {
      "epoch": 0.7923711245220436,
      "grad_norm": 0.9195323389718666,
      "learning_rate": 5.66356784701248e-07,
      "loss": 0.1139,
      "step": 8600
    },
    {
      "epoch": 0.7924632606993136,
      "grad_norm": 0.9813287621184931,
      "learning_rate": 5.658740195904466e-07,
      "loss": 0.1224,
      "step": 8601
    },
    {
      "epoch": 0.7925553968765836,
      "grad_norm": 0.9381776235684026,
      "learning_rate": 5.653914340634504e-07,
      "loss": 0.1183,
      "step": 8602
    },
    {
      "epoch": 0.7926475330538536,
      "grad_norm": 0.9323615832783858,
      "learning_rate": 5.649090281650682e-07,
      "loss": 0.1234,
      "step": 8603
    },
    {
      "epoch": 0.7927396692311236,
      "grad_norm": 0.9313543955066301,
      "learning_rate": 5.644268019400903e-07,
      "loss": 0.1118,
      "step": 8604
    },
    {
      "epoch": 0.7928318054083936,
      "grad_norm": 0.9637787040109845,
      "learning_rate": 5.639447554332928e-07,
      "loss": 0.1296,
      "step": 8605
    },
    {
      "epoch": 0.7929239415856636,
      "grad_norm": 0.9278148056738393,
      "learning_rate": 5.634628886894324e-07,
      "loss": 0.1084,
      "step": 8606
    },
    {
      "epoch": 0.7930160777629336,
      "grad_norm": 0.9547852357619248,
      "learning_rate": 5.629812017532515e-07,
      "loss": 0.1208,
      "step": 8607
    },
    {
      "epoch": 0.7931082139402036,
      "grad_norm": 0.9578763992541558,
      "learning_rate": 5.62499694669475e-07,
      "loss": 0.1269,
      "step": 8608
    },
    {
      "epoch": 0.7932003501174736,
      "grad_norm": 0.954352297806697,
      "learning_rate": 5.620183674828094e-07,
      "loss": 0.1304,
      "step": 8609
    },
    {
      "epoch": 0.7932924862947436,
      "grad_norm": 0.9215896860684726,
      "learning_rate": 5.615372202379482e-07,
      "loss": 0.1208,
      "step": 8610
    },
    {
      "epoch": 0.7933846224720137,
      "grad_norm": 0.8858931374801061,
      "learning_rate": 5.610562529795635e-07,
      "loss": 0.1101,
      "step": 8611
    },
    {
      "epoch": 0.7934767586492837,
      "grad_norm": 0.974136599445881,
      "learning_rate": 5.605754657523147e-07,
      "loss": 0.1253,
      "step": 8612
    },
    {
      "epoch": 0.7935688948265537,
      "grad_norm": 0.9639846350892,
      "learning_rate": 5.600948586008432e-07,
      "loss": 0.1208,
      "step": 8613
    },
    {
      "epoch": 0.7936610310038237,
      "grad_norm": 0.9604929500826802,
      "learning_rate": 5.59614431569773e-07,
      "loss": 0.122,
      "step": 8614
    },
    {
      "epoch": 0.7937531671810937,
      "grad_norm": 0.9931624424857912,
      "learning_rate": 5.591341847037107e-07,
      "loss": 0.1276,
      "step": 8615
    },
    {
      "epoch": 0.7938453033583637,
      "grad_norm": 0.9549498208056124,
      "learning_rate": 5.586541180472485e-07,
      "loss": 0.1233,
      "step": 8616
    },
    {
      "epoch": 0.7939374395356337,
      "grad_norm": 0.8682677683889473,
      "learning_rate": 5.581742316449601e-07,
      "loss": 0.1024,
      "step": 8617
    },
    {
      "epoch": 0.7940295757129037,
      "grad_norm": 0.9493113700230978,
      "learning_rate": 5.57694525541404e-07,
      "loss": 0.128,
      "step": 8618
    },
    {
      "epoch": 0.7941217118901737,
      "grad_norm": 0.9031374769038998,
      "learning_rate": 5.572149997811205e-07,
      "loss": 0.1172,
      "step": 8619
    },
    {
      "epoch": 0.7942138480674437,
      "grad_norm": 0.9079305929186919,
      "learning_rate": 5.567356544086325e-07,
      "loss": 0.1236,
      "step": 8620
    },
    {
      "epoch": 0.7943059842447137,
      "grad_norm": 0.9274268890673875,
      "learning_rate": 5.56256489468448e-07,
      "loss": 0.1204,
      "step": 8621
    },
    {
      "epoch": 0.7943981204219837,
      "grad_norm": 0.9514779351411853,
      "learning_rate": 5.557775050050584e-07,
      "loss": 0.1267,
      "step": 8622
    },
    {
      "epoch": 0.7944902565992537,
      "grad_norm": 0.8546480418647092,
      "learning_rate": 5.552987010629363e-07,
      "loss": 0.1062,
      "step": 8623
    },
    {
      "epoch": 0.7945823927765236,
      "grad_norm": 0.8933129494153353,
      "learning_rate": 5.54820077686539e-07,
      "loss": 0.1191,
      "step": 8624
    },
    {
      "epoch": 0.7946745289537938,
      "grad_norm": 0.8839504359155155,
      "learning_rate": 5.543416349203071e-07,
      "loss": 0.121,
      "step": 8625
    },
    {
      "epoch": 0.7947666651310638,
      "grad_norm": 0.9199143993897311,
      "learning_rate": 5.538633728086643e-07,
      "loss": 0.1174,
      "step": 8626
    },
    {
      "epoch": 0.7948588013083338,
      "grad_norm": 0.9234319246914519,
      "learning_rate": 5.533852913960158e-07,
      "loss": 0.1123,
      "step": 8627
    },
    {
      "epoch": 0.7949509374856037,
      "grad_norm": 0.9564128250888884,
      "learning_rate": 5.529073907267526e-07,
      "loss": 0.1296,
      "step": 8628
    },
    {
      "epoch": 0.7950430736628737,
      "grad_norm": 0.9131763952358534,
      "learning_rate": 5.524296708452476e-07,
      "loss": 0.1155,
      "step": 8629
    },
    {
      "epoch": 0.7951352098401437,
      "grad_norm": 0.9744724093653233,
      "learning_rate": 5.519521317958581e-07,
      "loss": 0.1262,
      "step": 8630
    },
    {
      "epoch": 0.7952273460174137,
      "grad_norm": 0.9906485184260504,
      "learning_rate": 5.514747736229225e-07,
      "loss": 0.1203,
      "step": 8631
    },
    {
      "epoch": 0.7953194821946837,
      "grad_norm": 0.9125838552784906,
      "learning_rate": 5.509975963707636e-07,
      "loss": 0.1228,
      "step": 8632
    },
    {
      "epoch": 0.7954116183719537,
      "grad_norm": 0.9283033424610722,
      "learning_rate": 5.505206000836874e-07,
      "loss": 0.1193,
      "step": 8633
    },
    {
      "epoch": 0.7955037545492237,
      "grad_norm": 0.9203411373141805,
      "learning_rate": 5.500437848059842e-07,
      "loss": 0.122,
      "step": 8634
    },
    {
      "epoch": 0.7955958907264937,
      "grad_norm": 0.9121874766032623,
      "learning_rate": 5.495671505819244e-07,
      "loss": 0.1182,
      "step": 8635
    },
    {
      "epoch": 0.7956880269037637,
      "grad_norm": 0.9416908807262255,
      "learning_rate": 5.490906974557655e-07,
      "loss": 0.1212,
      "step": 8636
    },
    {
      "epoch": 0.7957801630810337,
      "grad_norm": 0.9425940298472445,
      "learning_rate": 5.486144254717446e-07,
      "loss": 0.1258,
      "step": 8637
    },
    {
      "epoch": 0.7958722992583038,
      "grad_norm": 0.9684145490905561,
      "learning_rate": 5.481383346740843e-07,
      "loss": 0.1163,
      "step": 8638
    },
    {
      "epoch": 0.7959644354355738,
      "grad_norm": 0.9555213332359374,
      "learning_rate": 5.476624251069904e-07,
      "loss": 0.1226,
      "step": 8639
    },
    {
      "epoch": 0.7960565716128438,
      "grad_norm": 0.9718769525251739,
      "learning_rate": 5.471866968146497e-07,
      "loss": 0.1128,
      "step": 8640
    },
    {
      "epoch": 0.7961487077901138,
      "grad_norm": 0.9887299932093059,
      "learning_rate": 5.46711149841235e-07,
      "loss": 0.1306,
      "step": 8641
    },
    {
      "epoch": 0.7962408439673838,
      "grad_norm": 0.9353887995608104,
      "learning_rate": 5.462357842308997e-07,
      "loss": 0.1194,
      "step": 8642
    },
    {
      "epoch": 0.7963329801446538,
      "grad_norm": 0.9103463409879694,
      "learning_rate": 5.457606000277826e-07,
      "loss": 0.1155,
      "step": 8643
    },
    {
      "epoch": 0.7964251163219238,
      "grad_norm": 0.8959125789177207,
      "learning_rate": 5.452855972760035e-07,
      "loss": 0.1126,
      "step": 8644
    },
    {
      "epoch": 0.7965172524991938,
      "grad_norm": 0.9415821381686513,
      "learning_rate": 5.448107760196672e-07,
      "loss": 0.1086,
      "step": 8645
    },
    {
      "epoch": 0.7966093886764638,
      "grad_norm": 0.9484822727507379,
      "learning_rate": 5.443361363028612e-07,
      "loss": 0.1196,
      "step": 8646
    },
    {
      "epoch": 0.7967015248537338,
      "grad_norm": 0.9729626443315508,
      "learning_rate": 5.438616781696557e-07,
      "loss": 0.1292,
      "step": 8647
    },
    {
      "epoch": 0.7967936610310038,
      "grad_norm": 0.9903115214641103,
      "learning_rate": 5.43387401664103e-07,
      "loss": 0.1288,
      "step": 8648
    },
    {
      "epoch": 0.7968857972082738,
      "grad_norm": 0.9035118638518556,
      "learning_rate": 5.429133068302405e-07,
      "loss": 0.1161,
      "step": 8649
    },
    {
      "epoch": 0.7969779333855438,
      "grad_norm": 0.8673452284329635,
      "learning_rate": 5.424393937120884e-07,
      "loss": 0.0994,
      "step": 8650
    },
    {
      "epoch": 0.7970700695628138,
      "grad_norm": 0.9479150604513178,
      "learning_rate": 5.419656623536498e-07,
      "loss": 0.124,
      "step": 8651
    },
    {
      "epoch": 0.7971622057400839,
      "grad_norm": 0.8588902296167942,
      "learning_rate": 5.414921127989104e-07,
      "loss": 0.1042,
      "step": 8652
    },
    {
      "epoch": 0.7972543419173539,
      "grad_norm": 0.9322158612905973,
      "learning_rate": 5.410187450918381e-07,
      "loss": 0.1294,
      "step": 8653
    },
    {
      "epoch": 0.7973464780946239,
      "grad_norm": 0.9631584611479438,
      "learning_rate": 5.405455592763864e-07,
      "loss": 0.1296,
      "step": 8654
    },
    {
      "epoch": 0.7974386142718939,
      "grad_norm": 0.919274177152333,
      "learning_rate": 5.400725553964908e-07,
      "loss": 0.1147,
      "step": 8655
    },
    {
      "epoch": 0.7975307504491639,
      "grad_norm": 0.9032358397233256,
      "learning_rate": 5.39599733496069e-07,
      "loss": 0.1142,
      "step": 8656
    },
    {
      "epoch": 0.7976228866264339,
      "grad_norm": 0.9549715100342432,
      "learning_rate": 5.391270936190232e-07,
      "loss": 0.1179,
      "step": 8657
    },
    {
      "epoch": 0.7977150228037039,
      "grad_norm": 0.8774598769374871,
      "learning_rate": 5.386546358092376e-07,
      "loss": 0.112,
      "step": 8658
    },
    {
      "epoch": 0.7978071589809739,
      "grad_norm": 0.9155784556515261,
      "learning_rate": 5.381823601105804e-07,
      "loss": 0.1172,
      "step": 8659
    },
    {
      "epoch": 0.7978992951582439,
      "grad_norm": 0.8967035583649721,
      "learning_rate": 5.377102665669018e-07,
      "loss": 0.1094,
      "step": 8660
    },
    {
      "epoch": 0.7979914313355139,
      "grad_norm": 0.9265403880030733,
      "learning_rate": 5.372383552220358e-07,
      "loss": 0.1221,
      "step": 8661
    },
    {
      "epoch": 0.7980835675127839,
      "grad_norm": 0.9252246274238533,
      "learning_rate": 5.36766626119801e-07,
      "loss": 0.1191,
      "step": 8662
    },
    {
      "epoch": 0.7981757036900539,
      "grad_norm": 0.9738521523534015,
      "learning_rate": 5.362950793039959e-07,
      "loss": 0.1222,
      "step": 8663
    },
    {
      "epoch": 0.7982678398673239,
      "grad_norm": 0.9111419202800259,
      "learning_rate": 5.358237148184034e-07,
      "loss": 0.1258,
      "step": 8664
    },
    {
      "epoch": 0.7983599760445939,
      "grad_norm": 0.8727192448465615,
      "learning_rate": 5.353525327067902e-07,
      "loss": 0.103,
      "step": 8665
    },
    {
      "epoch": 0.798452112221864,
      "grad_norm": 0.9316417754395357,
      "learning_rate": 5.348815330129059e-07,
      "loss": 0.1191,
      "step": 8666
    },
    {
      "epoch": 0.798544248399134,
      "grad_norm": 0.9191480528464787,
      "learning_rate": 5.344107157804834e-07,
      "loss": 0.1198,
      "step": 8667
    },
    {
      "epoch": 0.798636384576404,
      "grad_norm": 0.9361991190966118,
      "learning_rate": 5.339400810532375e-07,
      "loss": 0.1292,
      "step": 8668
    },
    {
      "epoch": 0.798728520753674,
      "grad_norm": 0.9428379963087531,
      "learning_rate": 5.334696288748661e-07,
      "loss": 0.1162,
      "step": 8669
    },
    {
      "epoch": 0.798820656930944,
      "grad_norm": 0.8926886143774425,
      "learning_rate": 5.329993592890512e-07,
      "loss": 0.1116,
      "step": 8670
    },
    {
      "epoch": 0.798912793108214,
      "grad_norm": 0.9405908396749681,
      "learning_rate": 5.325292723394573e-07,
      "loss": 0.1155,
      "step": 8671
    },
    {
      "epoch": 0.799004929285484,
      "grad_norm": 0.8988881879842964,
      "learning_rate": 5.320593680697331e-07,
      "loss": 0.1146,
      "step": 8672
    },
    {
      "epoch": 0.7990970654627539,
      "grad_norm": 0.9517179598930791,
      "learning_rate": 5.315896465235084e-07,
      "loss": 0.1254,
      "step": 8673
    },
    {
      "epoch": 0.7991892016400239,
      "grad_norm": 0.905955864737142,
      "learning_rate": 5.311201077443961e-07,
      "loss": 0.1063,
      "step": 8674
    },
    {
      "epoch": 0.7992813378172939,
      "grad_norm": 0.9489280018205543,
      "learning_rate": 5.306507517759937e-07,
      "loss": 0.1186,
      "step": 8675
    },
    {
      "epoch": 0.7993734739945639,
      "grad_norm": 0.9753417396799335,
      "learning_rate": 5.301815786618816e-07,
      "loss": 0.1227,
      "step": 8676
    },
    {
      "epoch": 0.7994656101718339,
      "grad_norm": 0.920701395541936,
      "learning_rate": 5.297125884456214e-07,
      "loss": 0.1161,
      "step": 8677
    },
    {
      "epoch": 0.7995577463491039,
      "grad_norm": 0.9251078334924024,
      "learning_rate": 5.292437811707599e-07,
      "loss": 0.1262,
      "step": 8678
    },
    {
      "epoch": 0.799649882526374,
      "grad_norm": 0.9327552040475929,
      "learning_rate": 5.287751568808247e-07,
      "loss": 0.1186,
      "step": 8679
    },
    {
      "epoch": 0.799742018703644,
      "grad_norm": 0.9619542917813455,
      "learning_rate": 5.283067156193292e-07,
      "loss": 0.1203,
      "step": 8680
    },
    {
      "epoch": 0.799834154880914,
      "grad_norm": 0.9289332567081827,
      "learning_rate": 5.278384574297665e-07,
      "loss": 0.1234,
      "step": 8681
    },
    {
      "epoch": 0.799926291058184,
      "grad_norm": 0.9064646891261017,
      "learning_rate": 5.273703823556153e-07,
      "loss": 0.1223,
      "step": 8682
    },
    {
      "epoch": 0.800018427235454,
      "grad_norm": 0.9316922917712442,
      "learning_rate": 5.269024904403372e-07,
      "loss": 0.1134,
      "step": 8683
    },
    {
      "epoch": 0.800110563412724,
      "grad_norm": 0.9607883705940592,
      "learning_rate": 5.264347817273752e-07,
      "loss": 0.1259,
      "step": 8684
    },
    {
      "epoch": 0.800202699589994,
      "grad_norm": 0.9179273484417532,
      "learning_rate": 5.259672562601553e-07,
      "loss": 0.1121,
      "step": 8685
    },
    {
      "epoch": 0.800294835767264,
      "grad_norm": 0.9843152416241994,
      "learning_rate": 5.25499914082088e-07,
      "loss": 0.1197,
      "step": 8686
    },
    {
      "epoch": 0.800386971944534,
      "grad_norm": 0.9651351060190287,
      "learning_rate": 5.250327552365664e-07,
      "loss": 0.1215,
      "step": 8687
    },
    {
      "epoch": 0.800479108121804,
      "grad_norm": 0.9119186870911993,
      "learning_rate": 5.245657797669665e-07,
      "loss": 0.1174,
      "step": 8688
    },
    {
      "epoch": 0.800571244299074,
      "grad_norm": 0.9238367856143697,
      "learning_rate": 5.24098987716647e-07,
      "loss": 0.1147,
      "step": 8689
    },
    {
      "epoch": 0.800663380476344,
      "grad_norm": 0.9374656049331135,
      "learning_rate": 5.236323791289479e-07,
      "loss": 0.12,
      "step": 8690
    },
    {
      "epoch": 0.800755516653614,
      "grad_norm": 0.9654942943206993,
      "learning_rate": 5.231659540471954e-07,
      "loss": 0.1316,
      "step": 8691
    },
    {
      "epoch": 0.800847652830884,
      "grad_norm": 0.9591977272622936,
      "learning_rate": 5.226997125146973e-07,
      "loss": 0.1253,
      "step": 8692
    },
    {
      "epoch": 0.8009397890081541,
      "grad_norm": 0.9494439671142985,
      "learning_rate": 5.222336545747434e-07,
      "loss": 0.1198,
      "step": 8693
    },
    {
      "epoch": 0.8010319251854241,
      "grad_norm": 0.9537456425037063,
      "learning_rate": 5.217677802706078e-07,
      "loss": 0.1193,
      "step": 8694
    },
    {
      "epoch": 0.8011240613626941,
      "grad_norm": 0.9259471935018194,
      "learning_rate": 5.213020896455462e-07,
      "loss": 0.1141,
      "step": 8695
    },
    {
      "epoch": 0.8012161975399641,
      "grad_norm": 0.9460346609610221,
      "learning_rate": 5.208365827427985e-07,
      "loss": 0.1207,
      "step": 8696
    },
    {
      "epoch": 0.8013083337172341,
      "grad_norm": 0.9684295469141999,
      "learning_rate": 5.203712596055876e-07,
      "loss": 0.127,
      "step": 8697
    },
    {
      "epoch": 0.8014004698945041,
      "grad_norm": 0.9765445354106874,
      "learning_rate": 5.19906120277118e-07,
      "loss": 0.1216,
      "step": 8698
    },
    {
      "epoch": 0.8014926060717741,
      "grad_norm": 0.8910773017327671,
      "learning_rate": 5.194411648005778e-07,
      "loss": 0.1112,
      "step": 8699
    },
    {
      "epoch": 0.8015847422490441,
      "grad_norm": 1.006495006076603,
      "learning_rate": 5.189763932191396e-07,
      "loss": 0.1264,
      "step": 8700
    },
    {
      "epoch": 0.8016768784263141,
      "grad_norm": 0.8756991823220688,
      "learning_rate": 5.185118055759564e-07,
      "loss": 0.116,
      "step": 8701
    },
    {
      "epoch": 0.8017690146035841,
      "grad_norm": 1.0276078013137269,
      "learning_rate": 5.180474019141646e-07,
      "loss": 0.1299,
      "step": 8702
    },
    {
      "epoch": 0.8018611507808541,
      "grad_norm": 1.008868504080066,
      "learning_rate": 5.175831822768848e-07,
      "loss": 0.1204,
      "step": 8703
    },
    {
      "epoch": 0.8019532869581241,
      "grad_norm": 0.9189685434248478,
      "learning_rate": 5.171191467072201e-07,
      "loss": 0.1096,
      "step": 8704
    },
    {
      "epoch": 0.8020454231353941,
      "grad_norm": 0.9502132889598285,
      "learning_rate": 5.166552952482565e-07,
      "loss": 0.1232,
      "step": 8705
    },
    {
      "epoch": 0.8021375593126642,
      "grad_norm": 0.8942341109417716,
      "learning_rate": 5.161916279430623e-07,
      "loss": 0.1178,
      "step": 8706
    },
    {
      "epoch": 0.8022296954899342,
      "grad_norm": 1.00440052137423,
      "learning_rate": 5.157281448346882e-07,
      "loss": 0.1317,
      "step": 8707
    },
    {
      "epoch": 0.8023218316672042,
      "grad_norm": 0.9128632666446306,
      "learning_rate": 5.152648459661694e-07,
      "loss": 0.1075,
      "step": 8708
    },
    {
      "epoch": 0.8024139678444742,
      "grad_norm": 0.9950914671950657,
      "learning_rate": 5.148017313805237e-07,
      "loss": 0.1369,
      "step": 8709
    },
    {
      "epoch": 0.8025061040217442,
      "grad_norm": 0.9559709503514494,
      "learning_rate": 5.143388011207506e-07,
      "loss": 0.1216,
      "step": 8710
    },
    {
      "epoch": 0.8025982401990142,
      "grad_norm": 0.9386882981548607,
      "learning_rate": 5.138760552298338e-07,
      "loss": 0.1104,
      "step": 8711
    },
    {
      "epoch": 0.8026903763762842,
      "grad_norm": 0.9275125291849861,
      "learning_rate": 5.134134937507387e-07,
      "loss": 0.1152,
      "step": 8712
    },
    {
      "epoch": 0.8027825125535542,
      "grad_norm": 0.9156843741584696,
      "learning_rate": 5.129511167264151e-07,
      "loss": 0.1127,
      "step": 8713
    },
    {
      "epoch": 0.8028746487308241,
      "grad_norm": 0.9505867161038387,
      "learning_rate": 5.124889241997935e-07,
      "loss": 0.1187,
      "step": 8714
    },
    {
      "epoch": 0.8029667849080941,
      "grad_norm": 0.99898568796701,
      "learning_rate": 5.120269162137889e-07,
      "loss": 0.1231,
      "step": 8715
    },
    {
      "epoch": 0.8030589210853641,
      "grad_norm": 0.9897462954997086,
      "learning_rate": 5.115650928113e-07,
      "loss": 0.1229,
      "step": 8716
    },
    {
      "epoch": 0.8031510572626341,
      "grad_norm": 0.945085721014716,
      "learning_rate": 5.111034540352064e-07,
      "loss": 0.1245,
      "step": 8717
    },
    {
      "epoch": 0.8032431934399041,
      "grad_norm": 0.9743912216678187,
      "learning_rate": 5.106419999283702e-07,
      "loss": 0.1279,
      "step": 8718
    },
    {
      "epoch": 0.8033353296171741,
      "grad_norm": 0.9710579013133132,
      "learning_rate": 5.101807305336385e-07,
      "loss": 0.1339,
      "step": 8719
    },
    {
      "epoch": 0.8034274657944442,
      "grad_norm": 0.9155853366901943,
      "learning_rate": 5.0971964589384e-07,
      "loss": 0.1218,
      "step": 8720
    },
    {
      "epoch": 0.8035196019717142,
      "grad_norm": 0.958206277205222,
      "learning_rate": 5.092587460517873e-07,
      "loss": 0.1239,
      "step": 8721
    },
    {
      "epoch": 0.8036117381489842,
      "grad_norm": 0.959328723295657,
      "learning_rate": 5.087980310502743e-07,
      "loss": 0.1182,
      "step": 8722
    },
    {
      "epoch": 0.8037038743262542,
      "grad_norm": 0.905234879072177,
      "learning_rate": 5.083375009320779e-07,
      "loss": 0.1129,
      "step": 8723
    },
    {
      "epoch": 0.8037960105035242,
      "grad_norm": 0.8962931301615531,
      "learning_rate": 5.078771557399586e-07,
      "loss": 0.1242,
      "step": 8724
    },
    {
      "epoch": 0.8038881466807942,
      "grad_norm": 0.9194498456557597,
      "learning_rate": 5.0741699551666e-07,
      "loss": 0.1107,
      "step": 8725
    },
    {
      "epoch": 0.8039802828580642,
      "grad_norm": 0.9100045586682919,
      "learning_rate": 5.069570203049085e-07,
      "loss": 0.1124,
      "step": 8726
    },
    {
      "epoch": 0.8040724190353342,
      "grad_norm": 0.9295523104342945,
      "learning_rate": 5.06497230147412e-07,
      "loss": 0.1179,
      "step": 8727
    },
    {
      "epoch": 0.8041645552126042,
      "grad_norm": 0.9613944140250847,
      "learning_rate": 5.060376250868615e-07,
      "loss": 0.1192,
      "step": 8728
    },
    {
      "epoch": 0.8042566913898742,
      "grad_norm": 0.9457683779137299,
      "learning_rate": 5.055782051659322e-07,
      "loss": 0.1268,
      "step": 8729
    },
    {
      "epoch": 0.8043488275671442,
      "grad_norm": 0.9653237653604977,
      "learning_rate": 5.051189704272819e-07,
      "loss": 0.1179,
      "step": 8730
    },
    {
      "epoch": 0.8044409637444142,
      "grad_norm": 0.9299770816436291,
      "learning_rate": 5.046599209135492e-07,
      "loss": 0.1243,
      "step": 8731
    },
    {
      "epoch": 0.8045330999216842,
      "grad_norm": 0.9681021548052552,
      "learning_rate": 5.042010566673583e-07,
      "loss": 0.1159,
      "step": 8732
    },
    {
      "epoch": 0.8046252360989542,
      "grad_norm": 0.8858392947078415,
      "learning_rate": 5.037423777313132e-07,
      "loss": 0.1032,
      "step": 8733
    },
    {
      "epoch": 0.8047173722762243,
      "grad_norm": 0.9198330998999283,
      "learning_rate": 5.032838841480042e-07,
      "loss": 0.105,
      "step": 8734
    },
    {
      "epoch": 0.8048095084534943,
      "grad_norm": 0.948083821447076,
      "learning_rate": 5.028255759600004e-07,
      "loss": 0.1232,
      "step": 8735
    },
    {
      "epoch": 0.8049016446307643,
      "grad_norm": 0.8914840218195179,
      "learning_rate": 5.023674532098571e-07,
      "loss": 0.1129,
      "step": 8736
    },
    {
      "epoch": 0.8049937808080343,
      "grad_norm": 0.9627700380196931,
      "learning_rate": 5.019095159401113e-07,
      "loss": 0.1217,
      "step": 8737
    },
    {
      "epoch": 0.8050859169853043,
      "grad_norm": 0.9673563842251529,
      "learning_rate": 5.01451764193282e-07,
      "loss": 0.1292,
      "step": 8738
    },
    {
      "epoch": 0.8051780531625743,
      "grad_norm": 0.9294744242883962,
      "learning_rate": 5.009941980118707e-07,
      "loss": 0.1202,
      "step": 8739
    },
    {
      "epoch": 0.8052701893398443,
      "grad_norm": 0.9123434635710035,
      "learning_rate": 5.005368174383634e-07,
      "loss": 0.1123,
      "step": 8740
    },
    {
      "epoch": 0.8053623255171143,
      "grad_norm": 0.8951060347141389,
      "learning_rate": 5.000796225152277e-07,
      "loss": 0.1152,
      "step": 8741
    },
    {
      "epoch": 0.8054544616943843,
      "grad_norm": 0.9299640822902256,
      "learning_rate": 4.996226132849149e-07,
      "loss": 0.1098,
      "step": 8742
    },
    {
      "epoch": 0.8055465978716543,
      "grad_norm": 0.9759922313677241,
      "learning_rate": 4.99165789789858e-07,
      "loss": 0.1253,
      "step": 8743
    },
    {
      "epoch": 0.8056387340489243,
      "grad_norm": 0.9719818047810473,
      "learning_rate": 4.987091520724721e-07,
      "loss": 0.1178,
      "step": 8744
    },
    {
      "epoch": 0.8057308702261943,
      "grad_norm": 0.9429694738572041,
      "learning_rate": 4.982527001751567e-07,
      "loss": 0.1197,
      "step": 8745
    },
    {
      "epoch": 0.8058230064034643,
      "grad_norm": 0.9728986864512471,
      "learning_rate": 4.977964341402941e-07,
      "loss": 0.1221,
      "step": 8746
    },
    {
      "epoch": 0.8059151425807344,
      "grad_norm": 0.8994651161363463,
      "learning_rate": 4.973403540102476e-07,
      "loss": 0.1085,
      "step": 8747
    },
    {
      "epoch": 0.8060072787580044,
      "grad_norm": 0.888762649636477,
      "learning_rate": 4.968844598273653e-07,
      "loss": 0.1055,
      "step": 8748
    },
    {
      "epoch": 0.8060994149352744,
      "grad_norm": 0.9187766828800995,
      "learning_rate": 4.964287516339758e-07,
      "loss": 0.111,
      "step": 8749
    },
    {
      "epoch": 0.8061915511125444,
      "grad_norm": 0.9374319253049951,
      "learning_rate": 4.959732294723932e-07,
      "loss": 0.1192,
      "step": 8750
    },
    {
      "epoch": 0.8062836872898144,
      "grad_norm": 0.9165076691998195,
      "learning_rate": 4.955178933849111e-07,
      "loss": 0.1178,
      "step": 8751
    },
    {
      "epoch": 0.8063758234670844,
      "grad_norm": 0.917632651699605,
      "learning_rate": 4.950627434138083e-07,
      "loss": 0.1223,
      "step": 8752
    },
    {
      "epoch": 0.8064679596443544,
      "grad_norm": 0.939684098512974,
      "learning_rate": 4.946077796013462e-07,
      "loss": 0.1138,
      "step": 8753
    },
    {
      "epoch": 0.8065600958216244,
      "grad_norm": 0.9575212037934554,
      "learning_rate": 4.941530019897669e-07,
      "loss": 0.1214,
      "step": 8754
    },
    {
      "epoch": 0.8066522319988944,
      "grad_norm": 0.9044347534310851,
      "learning_rate": 4.93698410621298e-07,
      "loss": 0.1144,
      "step": 8755
    },
    {
      "epoch": 0.8067443681761644,
      "grad_norm": 0.9558305259406638,
      "learning_rate": 4.932440055381471e-07,
      "loss": 0.1187,
      "step": 8756
    },
    {
      "epoch": 0.8068365043534343,
      "grad_norm": 0.9950767718575722,
      "learning_rate": 4.92789786782506e-07,
      "loss": 0.1384,
      "step": 8757
    },
    {
      "epoch": 0.8069286405307043,
      "grad_norm": 0.9110712406529344,
      "learning_rate": 4.923357543965498e-07,
      "loss": 0.1064,
      "step": 8758
    },
    {
      "epoch": 0.8070207767079743,
      "grad_norm": 0.9519585325823425,
      "learning_rate": 4.918819084224353e-07,
      "loss": 0.109,
      "step": 8759
    },
    {
      "epoch": 0.8071129128852443,
      "grad_norm": 0.947790510305991,
      "learning_rate": 4.914282489023006e-07,
      "loss": 0.1275,
      "step": 8760
    },
    {
      "epoch": 0.8072050490625144,
      "grad_norm": 1.0226051981887092,
      "learning_rate": 4.909747758782693e-07,
      "loss": 0.1239,
      "step": 8761
    },
    {
      "epoch": 0.8072971852397844,
      "grad_norm": 0.9217178327591092,
      "learning_rate": 4.905214893924462e-07,
      "loss": 0.1128,
      "step": 8762
    },
    {
      "epoch": 0.8073893214170544,
      "grad_norm": 0.9737955286681498,
      "learning_rate": 4.900683894869198e-07,
      "loss": 0.1307,
      "step": 8763
    },
    {
      "epoch": 0.8074814575943244,
      "grad_norm": 0.9299037243211407,
      "learning_rate": 4.8961547620376e-07,
      "loss": 0.1242,
      "step": 8764
    },
    {
      "epoch": 0.8075735937715944,
      "grad_norm": 0.9475722332886723,
      "learning_rate": 4.891627495850188e-07,
      "loss": 0.1142,
      "step": 8765
    },
    {
      "epoch": 0.8076657299488644,
      "grad_norm": 0.9662419417795653,
      "learning_rate": 4.887102096727326e-07,
      "loss": 0.1167,
      "step": 8766
    },
    {
      "epoch": 0.8077578661261344,
      "grad_norm": 0.9339979179707536,
      "learning_rate": 4.882578565089205e-07,
      "loss": 0.1185,
      "step": 8767
    },
    {
      "epoch": 0.8078500023034044,
      "grad_norm": 0.9596979033805477,
      "learning_rate": 4.878056901355823e-07,
      "loss": 0.1279,
      "step": 8768
    },
    {
      "epoch": 0.8079421384806744,
      "grad_norm": 0.9218071788920659,
      "learning_rate": 4.873537105947029e-07,
      "loss": 0.1229,
      "step": 8769
    },
    {
      "epoch": 0.8080342746579444,
      "grad_norm": 0.9480480193282101,
      "learning_rate": 4.869019179282478e-07,
      "loss": 0.1285,
      "step": 8770
    },
    {
      "epoch": 0.8081264108352144,
      "grad_norm": 0.9074065825918513,
      "learning_rate": 4.864503121781666e-07,
      "loss": 0.1232,
      "step": 8771
    },
    {
      "epoch": 0.8082185470124844,
      "grad_norm": 0.9385402031446006,
      "learning_rate": 4.859988933863898e-07,
      "loss": 0.1199,
      "step": 8772
    },
    {
      "epoch": 0.8083106831897544,
      "grad_norm": 0.9707115923915903,
      "learning_rate": 4.85547661594833e-07,
      "loss": 0.1255,
      "step": 8773
    },
    {
      "epoch": 0.8084028193670245,
      "grad_norm": 0.9897153814911818,
      "learning_rate": 4.850966168453922e-07,
      "loss": 0.1305,
      "step": 8774
    },
    {
      "epoch": 0.8084949555442945,
      "grad_norm": 0.9438068143981858,
      "learning_rate": 4.846457591799489e-07,
      "loss": 0.1166,
      "step": 8775
    },
    {
      "epoch": 0.8085870917215645,
      "grad_norm": 0.929569865631667,
      "learning_rate": 4.841950886403623e-07,
      "loss": 0.1201,
      "step": 8776
    },
    {
      "epoch": 0.8086792278988345,
      "grad_norm": 0.9544358854771392,
      "learning_rate": 4.837446052684788e-07,
      "loss": 0.1337,
      "step": 8777
    },
    {
      "epoch": 0.8087713640761045,
      "grad_norm": 0.8648015438610358,
      "learning_rate": 4.832943091061257e-07,
      "loss": 0.1097,
      "step": 8778
    },
    {
      "epoch": 0.8088635002533745,
      "grad_norm": 0.9271638424570154,
      "learning_rate": 4.828442001951136e-07,
      "loss": 0.112,
      "step": 8779
    },
    {
      "epoch": 0.8089556364306445,
      "grad_norm": 0.8825506069463267,
      "learning_rate": 4.82394278577234e-07,
      "loss": 0.1071,
      "step": 8780
    },
    {
      "epoch": 0.8090477726079145,
      "grad_norm": 0.982444580480559,
      "learning_rate": 4.819445442942633e-07,
      "loss": 0.13,
      "step": 8781
    },
    {
      "epoch": 0.8091399087851845,
      "grad_norm": 0.9338257947288606,
      "learning_rate": 4.814949973879582e-07,
      "loss": 0.1148,
      "step": 8782
    },
    {
      "epoch": 0.8092320449624545,
      "grad_norm": 0.9345277141507186,
      "learning_rate": 4.8104563790006e-07,
      "loss": 0.1189,
      "step": 8783
    },
    {
      "epoch": 0.8093241811397245,
      "grad_norm": 0.9422878793863659,
      "learning_rate": 4.805964658722922e-07,
      "loss": 0.1242,
      "step": 8784
    },
    {
      "epoch": 0.8094163173169945,
      "grad_norm": 0.9994874290723805,
      "learning_rate": 4.801474813463591e-07,
      "loss": 0.1259,
      "step": 8785
    },
    {
      "epoch": 0.8095084534942645,
      "grad_norm": 1.0136117696237055,
      "learning_rate": 4.796986843639506e-07,
      "loss": 0.1269,
      "step": 8786
    },
    {
      "epoch": 0.8096005896715345,
      "grad_norm": 0.9477824056753072,
      "learning_rate": 4.792500749667359e-07,
      "loss": 0.1225,
      "step": 8787
    },
    {
      "epoch": 0.8096927258488046,
      "grad_norm": 0.9447988320137191,
      "learning_rate": 4.788016531963699e-07,
      "loss": 0.1164,
      "step": 8788
    },
    {
      "epoch": 0.8097848620260746,
      "grad_norm": 0.9110046721363823,
      "learning_rate": 4.783534190944872e-07,
      "loss": 0.1176,
      "step": 8789
    },
    {
      "epoch": 0.8098769982033446,
      "grad_norm": 0.9650411635983291,
      "learning_rate": 4.779053727027072e-07,
      "loss": 0.118,
      "step": 8790
    },
    {
      "epoch": 0.8099691343806146,
      "grad_norm": 0.9194160816683928,
      "learning_rate": 4.774575140626317e-07,
      "loss": 0.1167,
      "step": 8791
    },
    {
      "epoch": 0.8100612705578846,
      "grad_norm": 0.9730949942743494,
      "learning_rate": 4.770098432158434e-07,
      "loss": 0.1186,
      "step": 8792
    },
    {
      "epoch": 0.8101534067351546,
      "grad_norm": 1.0026966470323755,
      "learning_rate": 4.765623602039085e-07,
      "loss": 0.1261,
      "step": 8793
    },
    {
      "epoch": 0.8102455429124246,
      "grad_norm": 0.8959270267150095,
      "learning_rate": 4.76115065068376e-07,
      "loss": 0.1143,
      "step": 8794
    },
    {
      "epoch": 0.8103376790896946,
      "grad_norm": 0.9745090604240599,
      "learning_rate": 4.756679578507778e-07,
      "loss": 0.1198,
      "step": 8795
    },
    {
      "epoch": 0.8104298152669646,
      "grad_norm": 0.9752302606638509,
      "learning_rate": 4.7522103859262813e-07,
      "loss": 0.1246,
      "step": 8796
    },
    {
      "epoch": 0.8105219514442346,
      "grad_norm": 0.9154425716815816,
      "learning_rate": 4.7477430733542273e-07,
      "loss": 0.1088,
      "step": 8797
    },
    {
      "epoch": 0.8106140876215046,
      "grad_norm": 0.9209746568913927,
      "learning_rate": 4.7432776412064034e-07,
      "loss": 0.1164,
      "step": 8798
    },
    {
      "epoch": 0.8107062237987746,
      "grad_norm": 0.8858703515048639,
      "learning_rate": 4.738814089897431e-07,
      "loss": 0.1118,
      "step": 8799
    },
    {
      "epoch": 0.8107983599760445,
      "grad_norm": 0.9071064876375858,
      "learning_rate": 4.734352419841756e-07,
      "loss": 0.1128,
      "step": 8800
    },
    {
      "epoch": 0.8108904961533145,
      "grad_norm": 0.9638341312522045,
      "learning_rate": 4.7298926314536364e-07,
      "loss": 0.1121,
      "step": 8801
    },
    {
      "epoch": 0.8109826323305847,
      "grad_norm": 0.9563234435681683,
      "learning_rate": 4.725434725147171e-07,
      "loss": 0.1154,
      "step": 8802
    },
    {
      "epoch": 0.8110747685078546,
      "grad_norm": 0.96543556775914,
      "learning_rate": 4.720978701336268e-07,
      "loss": 0.1266,
      "step": 8803
    },
    {
      "epoch": 0.8111669046851246,
      "grad_norm": 0.9933176223015399,
      "learning_rate": 4.716524560434679e-07,
      "loss": 0.1339,
      "step": 8804
    },
    {
      "epoch": 0.8112590408623946,
      "grad_norm": 0.9202425773610461,
      "learning_rate": 4.7120723028559633e-07,
      "loss": 0.1199,
      "step": 8805
    },
    {
      "epoch": 0.8113511770396646,
      "grad_norm": 0.9422875480282432,
      "learning_rate": 4.70762192901352e-07,
      "loss": 0.1225,
      "step": 8806
    },
    {
      "epoch": 0.8114433132169346,
      "grad_norm": 0.9709769506635573,
      "learning_rate": 4.7031734393205683e-07,
      "loss": 0.1239,
      "step": 8807
    },
    {
      "epoch": 0.8115354493942046,
      "grad_norm": 0.9206405274403489,
      "learning_rate": 4.6987268341901455e-07,
      "loss": 0.1179,
      "step": 8808
    },
    {
      "epoch": 0.8116275855714746,
      "grad_norm": 0.9756799458791882,
      "learning_rate": 4.6942821140351174e-07,
      "loss": 0.1174,
      "step": 8809
    },
    {
      "epoch": 0.8117197217487446,
      "grad_norm": 0.9470559153669584,
      "learning_rate": 4.6898392792681796e-07,
      "loss": 0.1187,
      "step": 8810
    },
    {
      "epoch": 0.8118118579260146,
      "grad_norm": 0.9201916935032508,
      "learning_rate": 4.6853983303018493e-07,
      "loss": 0.1206,
      "step": 8811
    },
    {
      "epoch": 0.8119039941032846,
      "grad_norm": 0.9265359599326323,
      "learning_rate": 4.680959267548479e-07,
      "loss": 0.1181,
      "step": 8812
    },
    {
      "epoch": 0.8119961302805546,
      "grad_norm": 0.9643827677740148,
      "learning_rate": 4.676522091420227e-07,
      "loss": 0.1295,
      "step": 8813
    },
    {
      "epoch": 0.8120882664578246,
      "grad_norm": 0.9221162699337527,
      "learning_rate": 4.672086802329079e-07,
      "loss": 0.1106,
      "step": 8814
    },
    {
      "epoch": 0.8121804026350947,
      "grad_norm": 0.9718760958585148,
      "learning_rate": 4.667653400686858e-07,
      "loss": 0.1226,
      "step": 8815
    },
    {
      "epoch": 0.8122725388123647,
      "grad_norm": 0.8960897169593115,
      "learning_rate": 4.6632218869052085e-07,
      "loss": 0.1046,
      "step": 8816
    },
    {
      "epoch": 0.8123646749896347,
      "grad_norm": 0.9716052904056378,
      "learning_rate": 4.6587922613956005e-07,
      "loss": 0.1274,
      "step": 8817
    },
    {
      "epoch": 0.8124568111669047,
      "grad_norm": 0.9329622015203816,
      "learning_rate": 4.6543645245693215e-07,
      "loss": 0.1234,
      "step": 8818
    },
    {
      "epoch": 0.8125489473441747,
      "grad_norm": 0.9354174342448097,
      "learning_rate": 4.649938676837479e-07,
      "loss": 0.1283,
      "step": 8819
    },
    {
      "epoch": 0.8126410835214447,
      "grad_norm": 0.9504482807872802,
      "learning_rate": 4.6455147186110217e-07,
      "loss": 0.1253,
      "step": 8820
    },
    {
      "epoch": 0.8127332196987147,
      "grad_norm": 0.8710928394218936,
      "learning_rate": 4.6410926503007187e-07,
      "loss": 0.1121,
      "step": 8821
    },
    {
      "epoch": 0.8128253558759847,
      "grad_norm": 0.9167945722916967,
      "learning_rate": 4.636672472317147e-07,
      "loss": 0.1219,
      "step": 8822
    },
    {
      "epoch": 0.8129174920532547,
      "grad_norm": 0.931250359567654,
      "learning_rate": 4.6322541850707336e-07,
      "loss": 0.1179,
      "step": 8823
    },
    {
      "epoch": 0.8130096282305247,
      "grad_norm": 0.9173628142401588,
      "learning_rate": 4.6278377889717064e-07,
      "loss": 0.1201,
      "step": 8824
    },
    {
      "epoch": 0.8131017644077947,
      "grad_norm": 0.9259153539552193,
      "learning_rate": 4.62342328443014e-07,
      "loss": 0.1073,
      "step": 8825
    },
    {
      "epoch": 0.8131939005850647,
      "grad_norm": 0.9620459880501422,
      "learning_rate": 4.6190106718559056e-07,
      "loss": 0.1228,
      "step": 8826
    },
    {
      "epoch": 0.8132860367623347,
      "grad_norm": 0.8858775284263333,
      "learning_rate": 4.614599951658727e-07,
      "loss": 0.1136,
      "step": 8827
    },
    {
      "epoch": 0.8133781729396047,
      "grad_norm": 0.8963968749457515,
      "learning_rate": 4.6101911242481396e-07,
      "loss": 0.1072,
      "step": 8828
    },
    {
      "epoch": 0.8134703091168748,
      "grad_norm": 0.9429238031032099,
      "learning_rate": 4.605784190033502e-07,
      "loss": 0.1195,
      "step": 8829
    },
    {
      "epoch": 0.8135624452941448,
      "grad_norm": 0.9081363924275805,
      "learning_rate": 4.6013791494239927e-07,
      "loss": 0.1129,
      "step": 8830
    },
    {
      "epoch": 0.8136545814714148,
      "grad_norm": 0.928463106569425,
      "learning_rate": 4.5969760028286236e-07,
      "loss": 0.1193,
      "step": 8831
    },
    {
      "epoch": 0.8137467176486848,
      "grad_norm": 0.9505794219950519,
      "learning_rate": 4.5925747506562287e-07,
      "loss": 0.1192,
      "step": 8832
    },
    {
      "epoch": 0.8138388538259548,
      "grad_norm": 0.9083831854315285,
      "learning_rate": 4.5881753933154695e-07,
      "loss": 0.1143,
      "step": 8833
    },
    {
      "epoch": 0.8139309900032248,
      "grad_norm": 1.0098806197047803,
      "learning_rate": 4.5837779312148225e-07,
      "loss": 0.1279,
      "step": 8834
    },
    {
      "epoch": 0.8140231261804948,
      "grad_norm": 0.9610060245617507,
      "learning_rate": 4.5793823647625856e-07,
      "loss": 0.129,
      "step": 8835
    },
    {
      "epoch": 0.8141152623577648,
      "grad_norm": 0.9499210345688431,
      "learning_rate": 4.574988694366894e-07,
      "loss": 0.1237,
      "step": 8836
    },
    {
      "epoch": 0.8142073985350348,
      "grad_norm": 0.916244337778968,
      "learning_rate": 4.570596920435708e-07,
      "loss": 0.1119,
      "step": 8837
    },
    {
      "epoch": 0.8142995347123048,
      "grad_norm": 0.9170844203897126,
      "learning_rate": 4.566207043376789e-07,
      "loss": 0.1114,
      "step": 8838
    },
    {
      "epoch": 0.8143916708895748,
      "grad_norm": 0.9254240160408157,
      "learning_rate": 4.56181906359775e-07,
      "loss": 0.123,
      "step": 8839
    },
    {
      "epoch": 0.8144838070668448,
      "grad_norm": 0.9655610903066298,
      "learning_rate": 4.557432981506005e-07,
      "loss": 0.1217,
      "step": 8840
    },
    {
      "epoch": 0.8145759432441148,
      "grad_norm": 0.9875316818462202,
      "learning_rate": 4.5530487975088076e-07,
      "loss": 0.1194,
      "step": 8841
    },
    {
      "epoch": 0.8146680794213849,
      "grad_norm": 0.9574896478337922,
      "learning_rate": 4.548666512013236e-07,
      "loss": 0.1219,
      "step": 8842
    },
    {
      "epoch": 0.8147602155986549,
      "grad_norm": 0.921246124798205,
      "learning_rate": 4.5442861254261753e-07,
      "loss": 0.1186,
      "step": 8843
    },
    {
      "epoch": 0.8148523517759249,
      "grad_norm": 0.9451372908762516,
      "learning_rate": 4.5399076381543536e-07,
      "loss": 0.1192,
      "step": 8844
    },
    {
      "epoch": 0.8149444879531949,
      "grad_norm": 0.9134889434581871,
      "learning_rate": 4.5355310506043053e-07,
      "loss": 0.112,
      "step": 8845
    },
    {
      "epoch": 0.8150366241304648,
      "grad_norm": 0.902262399393891,
      "learning_rate": 4.531156363182407e-07,
      "loss": 0.1112,
      "step": 8846
    },
    {
      "epoch": 0.8151287603077348,
      "grad_norm": 0.950956875411372,
      "learning_rate": 4.526783576294835e-07,
      "loss": 0.1274,
      "step": 8847
    },
    {
      "epoch": 0.8152208964850048,
      "grad_norm": 0.8980087170242911,
      "learning_rate": 4.5224126903476136e-07,
      "loss": 0.1144,
      "step": 8848
    },
    {
      "epoch": 0.8153130326622748,
      "grad_norm": 0.9102210614967016,
      "learning_rate": 4.518043705746578e-07,
      "loss": 0.121,
      "step": 8849
    },
    {
      "epoch": 0.8154051688395448,
      "grad_norm": 0.9794454379572055,
      "learning_rate": 4.5136766228974005e-07,
      "loss": 0.1259,
      "step": 8850
    },
    {
      "epoch": 0.8154973050168148,
      "grad_norm": 0.9445608794611002,
      "learning_rate": 4.509311442205538e-07,
      "loss": 0.1128,
      "step": 8851
    },
    {
      "epoch": 0.8155894411940848,
      "grad_norm": 0.9359598682098146,
      "learning_rate": 4.504948164076317e-07,
      "loss": 0.1173,
      "step": 8852
    },
    {
      "epoch": 0.8156815773713548,
      "grad_norm": 0.9334601600662357,
      "learning_rate": 4.5005867889148626e-07,
      "loss": 0.108,
      "step": 8853
    },
    {
      "epoch": 0.8157737135486248,
      "grad_norm": 0.8918500853209136,
      "learning_rate": 4.4962273171261393e-07,
      "loss": 0.1177,
      "step": 8854
    },
    {
      "epoch": 0.8158658497258948,
      "grad_norm": 0.9167522940965833,
      "learning_rate": 4.491869749114908e-07,
      "loss": 0.1016,
      "step": 8855
    },
    {
      "epoch": 0.8159579859031649,
      "grad_norm": 0.9233508244470227,
      "learning_rate": 4.4875140852857854e-07,
      "loss": 0.1213,
      "step": 8856
    },
    {
      "epoch": 0.8160501220804349,
      "grad_norm": 0.9724640957299692,
      "learning_rate": 4.4831603260431787e-07,
      "loss": 0.1307,
      "step": 8857
    },
    {
      "epoch": 0.8161422582577049,
      "grad_norm": 0.8845130255857112,
      "learning_rate": 4.478808471791354e-07,
      "loss": 0.1084,
      "step": 8858
    },
    {
      "epoch": 0.8162343944349749,
      "grad_norm": 0.9911551925112649,
      "learning_rate": 4.474458522934361e-07,
      "loss": 0.1268,
      "step": 8859
    },
    {
      "epoch": 0.8163265306122449,
      "grad_norm": 0.9575733009268137,
      "learning_rate": 4.470110479876105e-07,
      "loss": 0.1232,
      "step": 8860
    },
    {
      "epoch": 0.8164186667895149,
      "grad_norm": 0.881394940578178,
      "learning_rate": 4.4657643430203067e-07,
      "loss": 0.1086,
      "step": 8861
    },
    {
      "epoch": 0.8165108029667849,
      "grad_norm": 0.9874625512103724,
      "learning_rate": 4.461420112770501e-07,
      "loss": 0.1232,
      "step": 8862
    },
    {
      "epoch": 0.8166029391440549,
      "grad_norm": 0.9262867632138934,
      "learning_rate": 4.45707778953004e-07,
      "loss": 0.1166,
      "step": 8863
    },
    {
      "epoch": 0.8166950753213249,
      "grad_norm": 0.9675648924666996,
      "learning_rate": 4.452737373702115e-07,
      "loss": 0.1229,
      "step": 8864
    },
    {
      "epoch": 0.8167872114985949,
      "grad_norm": 0.887245591991255,
      "learning_rate": 4.44839886568974e-07,
      "loss": 0.1105,
      "step": 8865
    },
    {
      "epoch": 0.8168793476758649,
      "grad_norm": 0.9227953394306798,
      "learning_rate": 4.444062265895746e-07,
      "loss": 0.1207,
      "step": 8866
    },
    {
      "epoch": 0.8169714838531349,
      "grad_norm": 0.928324613846221,
      "learning_rate": 4.439727574722783e-07,
      "loss": 0.1159,
      "step": 8867
    },
    {
      "epoch": 0.8170636200304049,
      "grad_norm": 0.9133012167675768,
      "learning_rate": 4.435394792573322e-07,
      "loss": 0.114,
      "step": 8868
    },
    {
      "epoch": 0.8171557562076749,
      "grad_norm": 0.9615531030979356,
      "learning_rate": 4.431063919849668e-07,
      "loss": 0.1171,
      "step": 8869
    },
    {
      "epoch": 0.817247892384945,
      "grad_norm": 0.9009290620554269,
      "learning_rate": 4.4267349569539404e-07,
      "loss": 0.114,
      "step": 8870
    },
    {
      "epoch": 0.817340028562215,
      "grad_norm": 0.9012808267067186,
      "learning_rate": 4.422407904288095e-07,
      "loss": 0.1259,
      "step": 8871
    },
    {
      "epoch": 0.817432164739485,
      "grad_norm": 0.9080862780039042,
      "learning_rate": 4.418082762253889e-07,
      "loss": 0.1072,
      "step": 8872
    },
    {
      "epoch": 0.817524300916755,
      "grad_norm": 0.9247328780388934,
      "learning_rate": 4.4137595312529066e-07,
      "loss": 0.1004,
      "step": 8873
    },
    {
      "epoch": 0.817616437094025,
      "grad_norm": 0.9123210256606668,
      "learning_rate": 4.4094382116865704e-07,
      "loss": 0.1161,
      "step": 8874
    },
    {
      "epoch": 0.817708573271295,
      "grad_norm": 0.9462605452591462,
      "learning_rate": 4.4051188039561156e-07,
      "loss": 0.1294,
      "step": 8875
    },
    {
      "epoch": 0.817800709448565,
      "grad_norm": 0.9238681176353081,
      "learning_rate": 4.400801308462591e-07,
      "loss": 0.1223,
      "step": 8876
    },
    {
      "epoch": 0.817892845625835,
      "grad_norm": 0.9192630991789277,
      "learning_rate": 4.396485725606886e-07,
      "loss": 0.1133,
      "step": 8877
    },
    {
      "epoch": 0.817984981803105,
      "grad_norm": 0.9292618059590252,
      "learning_rate": 4.3921720557896953e-07,
      "loss": 0.1228,
      "step": 8878
    },
    {
      "epoch": 0.818077117980375,
      "grad_norm": 0.941305059372131,
      "learning_rate": 4.387860299411553e-07,
      "loss": 0.1203,
      "step": 8879
    },
    {
      "epoch": 0.818169254157645,
      "grad_norm": 0.9116056962903435,
      "learning_rate": 4.383550456872793e-07,
      "loss": 0.1145,
      "step": 8880
    },
    {
      "epoch": 0.818261390334915,
      "grad_norm": 0.9624185944825424,
      "learning_rate": 4.3792425285735935e-07,
      "loss": 0.1277,
      "step": 8881
    },
    {
      "epoch": 0.818353526512185,
      "grad_norm": 0.9945802215949294,
      "learning_rate": 4.3749365149139493e-07,
      "loss": 0.1229,
      "step": 8882
    },
    {
      "epoch": 0.8184456626894551,
      "grad_norm": 0.8888004929847416,
      "learning_rate": 4.3706324162936684e-07,
      "loss": 0.1073,
      "step": 8883
    },
    {
      "epoch": 0.8185377988667251,
      "grad_norm": 0.9515796576447026,
      "learning_rate": 4.3663302331123815e-07,
      "loss": 0.119,
      "step": 8884
    },
    {
      "epoch": 0.8186299350439951,
      "grad_norm": 0.9196874221149748,
      "learning_rate": 4.362029965769554e-07,
      "loss": 0.1149,
      "step": 8885
    },
    {
      "epoch": 0.8187220712212651,
      "grad_norm": 0.9462812699661997,
      "learning_rate": 4.3577316146644677e-07,
      "loss": 0.1249,
      "step": 8886
    },
    {
      "epoch": 0.818814207398535,
      "grad_norm": 0.9160239522286485,
      "learning_rate": 4.353435180196225e-07,
      "loss": 0.1158,
      "step": 8887
    },
    {
      "epoch": 0.818906343575805,
      "grad_norm": 0.9343357130198869,
      "learning_rate": 4.349140662763751e-07,
      "loss": 0.118,
      "step": 8888
    },
    {
      "epoch": 0.818998479753075,
      "grad_norm": 0.9273075309006215,
      "learning_rate": 4.3448480627657804e-07,
      "loss": 0.1262,
      "step": 8889
    },
    {
      "epoch": 0.819090615930345,
      "grad_norm": 0.9436024913907296,
      "learning_rate": 4.3405573806008905e-07,
      "loss": 0.1236,
      "step": 8890
    },
    {
      "epoch": 0.819182752107615,
      "grad_norm": 0.9298816928569952,
      "learning_rate": 4.336268616667477e-07,
      "loss": 0.1198,
      "step": 8891
    },
    {
      "epoch": 0.819274888284885,
      "grad_norm": 0.9208677732375474,
      "learning_rate": 4.3319817713637415e-07,
      "loss": 0.1159,
      "step": 8892
    },
    {
      "epoch": 0.819367024462155,
      "grad_norm": 0.9685921400717235,
      "learning_rate": 4.327696845087728e-07,
      "loss": 0.1145,
      "step": 8893
    },
    {
      "epoch": 0.819459160639425,
      "grad_norm": 0.9149193983462024,
      "learning_rate": 4.323413838237281e-07,
      "loss": 0.1148,
      "step": 8894
    },
    {
      "epoch": 0.819551296816695,
      "grad_norm": 0.8670286833052321,
      "learning_rate": 4.319132751210084e-07,
      "loss": 0.1044,
      "step": 8895
    },
    {
      "epoch": 0.819643432993965,
      "grad_norm": 0.9696750526422799,
      "learning_rate": 4.3148535844036444e-07,
      "loss": 0.1277,
      "step": 8896
    },
    {
      "epoch": 0.8197355691712351,
      "grad_norm": 0.905676179841572,
      "learning_rate": 4.310576338215269e-07,
      "loss": 0.1042,
      "step": 8897
    },
    {
      "epoch": 0.8198277053485051,
      "grad_norm": 0.9891893214431671,
      "learning_rate": 4.3063010130421133e-07,
      "loss": 0.1137,
      "step": 8898
    },
    {
      "epoch": 0.8199198415257751,
      "grad_norm": 0.9729248302550854,
      "learning_rate": 4.302027609281129e-07,
      "loss": 0.1196,
      "step": 8899
    },
    {
      "epoch": 0.8200119777030451,
      "grad_norm": 1.0186935346621,
      "learning_rate": 4.2977561273291166e-07,
      "loss": 0.1255,
      "step": 8900
    },
    {
      "epoch": 0.8201041138803151,
      "grad_norm": 0.9395849723041353,
      "learning_rate": 4.2934865675826666e-07,
      "loss": 0.1116,
      "step": 8901
    },
    {
      "epoch": 0.8201962500575851,
      "grad_norm": 0.9359575585273199,
      "learning_rate": 4.289218930438219e-07,
      "loss": 0.1194,
      "step": 8902
    },
    {
      "epoch": 0.8202883862348551,
      "grad_norm": 0.930147304393213,
      "learning_rate": 4.284953216292029e-07,
      "loss": 0.1276,
      "step": 8903
    },
    {
      "epoch": 0.8203805224121251,
      "grad_norm": 0.9492365348281026,
      "learning_rate": 4.280689425540163e-07,
      "loss": 0.1178,
      "step": 8904
    },
    {
      "epoch": 0.8204726585893951,
      "grad_norm": 0.8818809813498917,
      "learning_rate": 4.2764275585785054e-07,
      "loss": 0.1096,
      "step": 8905
    },
    {
      "epoch": 0.8205647947666651,
      "grad_norm": 0.9030730135112734,
      "learning_rate": 4.27216761580278e-07,
      "loss": 0.1183,
      "step": 8906
    },
    {
      "epoch": 0.8206569309439351,
      "grad_norm": 0.9361132602718762,
      "learning_rate": 4.2679095976085217e-07,
      "loss": 0.1202,
      "step": 8907
    },
    {
      "epoch": 0.8207490671212051,
      "grad_norm": 0.9450714386955552,
      "learning_rate": 4.2636535043910965e-07,
      "loss": 0.1181,
      "step": 8908
    },
    {
      "epoch": 0.8208412032984751,
      "grad_norm": 0.9815734649085485,
      "learning_rate": 4.2593993365456746e-07,
      "loss": 0.1126,
      "step": 8909
    },
    {
      "epoch": 0.8209333394757452,
      "grad_norm": 0.9795019317978423,
      "learning_rate": 4.255147094467249e-07,
      "loss": 0.1177,
      "step": 8910
    },
    {
      "epoch": 0.8210254756530152,
      "grad_norm": 0.9814500011535171,
      "learning_rate": 4.250896778550648e-07,
      "loss": 0.1262,
      "step": 8911
    },
    {
      "epoch": 0.8211176118302852,
      "grad_norm": 0.8741343412062761,
      "learning_rate": 4.246648389190522e-07,
      "loss": 0.1115,
      "step": 8912
    },
    {
      "epoch": 0.8212097480075552,
      "grad_norm": 1.001494769248629,
      "learning_rate": 4.24240192678132e-07,
      "loss": 0.1262,
      "step": 8913
    },
    {
      "epoch": 0.8213018841848252,
      "grad_norm": 0.9333485530166209,
      "learning_rate": 4.23815739171734e-07,
      "loss": 0.115,
      "step": 8914
    },
    {
      "epoch": 0.8213940203620952,
      "grad_norm": 0.9608338050250689,
      "learning_rate": 4.233914784392673e-07,
      "loss": 0.1298,
      "step": 8915
    },
    {
      "epoch": 0.8214861565393652,
      "grad_norm": 0.9396240080659503,
      "learning_rate": 4.229674105201259e-07,
      "loss": 0.1166,
      "step": 8916
    },
    {
      "epoch": 0.8215782927166352,
      "grad_norm": 0.9271200896495708,
      "learning_rate": 4.225435354536833e-07,
      "loss": 0.116,
      "step": 8917
    },
    {
      "epoch": 0.8216704288939052,
      "grad_norm": 0.9403145613253563,
      "learning_rate": 4.221198532792972e-07,
      "loss": 0.1141,
      "step": 8918
    },
    {
      "epoch": 0.8217625650711752,
      "grad_norm": 0.8984489683434724,
      "learning_rate": 4.2169636403630697e-07,
      "loss": 0.1184,
      "step": 8919
    },
    {
      "epoch": 0.8218547012484452,
      "grad_norm": 0.9492944717039095,
      "learning_rate": 4.212730677640328e-07,
      "loss": 0.1157,
      "step": 8920
    },
    {
      "epoch": 0.8219468374257152,
      "grad_norm": 0.9388579066857203,
      "learning_rate": 4.2084996450177744e-07,
      "loss": 0.12,
      "step": 8921
    },
    {
      "epoch": 0.8220389736029852,
      "grad_norm": 0.9925571358138545,
      "learning_rate": 4.20427054288827e-07,
      "loss": 0.1286,
      "step": 8922
    },
    {
      "epoch": 0.8221311097802552,
      "grad_norm": 0.9465299652537837,
      "learning_rate": 4.20004337164448e-07,
      "loss": 0.1252,
      "step": 8923
    },
    {
      "epoch": 0.8222232459575253,
      "grad_norm": 0.8823315904062302,
      "learning_rate": 4.1958181316789084e-07,
      "loss": 0.1116,
      "step": 8924
    },
    {
      "epoch": 0.8223153821347953,
      "grad_norm": 0.9751808798700181,
      "learning_rate": 4.1915948233838625e-07,
      "loss": 0.1131,
      "step": 8925
    },
    {
      "epoch": 0.8224075183120653,
      "grad_norm": 0.8996660151380795,
      "learning_rate": 4.1873734471514685e-07,
      "loss": 0.119,
      "step": 8926
    },
    {
      "epoch": 0.8224996544893353,
      "grad_norm": 0.9027774874768812,
      "learning_rate": 4.1831540033736935e-07,
      "loss": 0.1098,
      "step": 8927
    },
    {
      "epoch": 0.8225917906666053,
      "grad_norm": 0.9123413341572139,
      "learning_rate": 4.1789364924423067e-07,
      "loss": 0.1201,
      "step": 8928
    },
    {
      "epoch": 0.8226839268438753,
      "grad_norm": 0.9650092844823791,
      "learning_rate": 4.174720914748914e-07,
      "loss": 0.1212,
      "step": 8929
    },
    {
      "epoch": 0.8227760630211453,
      "grad_norm": 0.9097635241979137,
      "learning_rate": 4.1705072706849287e-07,
      "loss": 0.1146,
      "step": 8930
    },
    {
      "epoch": 0.8228681991984153,
      "grad_norm": 0.9642879397283781,
      "learning_rate": 4.166295560641576e-07,
      "loss": 0.1214,
      "step": 8931
    },
    {
      "epoch": 0.8229603353756852,
      "grad_norm": 0.9629087714634937,
      "learning_rate": 4.1620857850099227e-07,
      "loss": 0.1155,
      "step": 8932
    },
    {
      "epoch": 0.8230524715529552,
      "grad_norm": 0.9983273086133829,
      "learning_rate": 4.157877944180852e-07,
      "loss": 0.1293,
      "step": 8933
    },
    {
      "epoch": 0.8231446077302252,
      "grad_norm": 0.9935451639675307,
      "learning_rate": 4.153672038545054e-07,
      "loss": 0.128,
      "step": 8934
    },
    {
      "epoch": 0.8232367439074952,
      "grad_norm": 1.061281927988124,
      "learning_rate": 4.1494680684930485e-07,
      "loss": 0.1413,
      "step": 8935
    },
    {
      "epoch": 0.8233288800847652,
      "grad_norm": 0.9332443265144761,
      "learning_rate": 4.1452660344151826e-07,
      "loss": 0.1247,
      "step": 8936
    },
    {
      "epoch": 0.8234210162620353,
      "grad_norm": 0.8874742579698003,
      "learning_rate": 4.141065936701613e-07,
      "loss": 0.1045,
      "step": 8937
    },
    {
      "epoch": 0.8235131524393053,
      "grad_norm": 0.994749623574586,
      "learning_rate": 4.1368677757423064e-07,
      "loss": 0.1285,
      "step": 8938
    },
    {
      "epoch": 0.8236052886165753,
      "grad_norm": 0.954874028724842,
      "learning_rate": 4.1326715519270725e-07,
      "loss": 0.1302,
      "step": 8939
    },
    {
      "epoch": 0.8236974247938453,
      "grad_norm": 0.9643041528254239,
      "learning_rate": 4.1284772656455334e-07,
      "loss": 0.1276,
      "step": 8940
    },
    {
      "epoch": 0.8237895609711153,
      "grad_norm": 0.9820897900287507,
      "learning_rate": 4.12428491728713e-07,
      "loss": 0.1341,
      "step": 8941
    },
    {
      "epoch": 0.8238816971483853,
      "grad_norm": 0.9548532478634758,
      "learning_rate": 4.1200945072411207e-07,
      "loss": 0.121,
      "step": 8942
    },
    {
      "epoch": 0.8239738333256553,
      "grad_norm": 0.9437093357718186,
      "learning_rate": 4.1159060358965745e-07,
      "loss": 0.1326,
      "step": 8943
    },
    {
      "epoch": 0.8240659695029253,
      "grad_norm": 0.9282509901707379,
      "learning_rate": 4.111719503642403e-07,
      "loss": 0.1183,
      "step": 8944
    },
    {
      "epoch": 0.8241581056801953,
      "grad_norm": 0.971786827764824,
      "learning_rate": 4.1075349108673306e-07,
      "loss": 0.1355,
      "step": 8945
    },
    {
      "epoch": 0.8242502418574653,
      "grad_norm": 0.9875630667869919,
      "learning_rate": 4.1033522579598804e-07,
      "loss": 0.1413,
      "step": 8946
    },
    {
      "epoch": 0.8243423780347353,
      "grad_norm": 0.9528042142165237,
      "learning_rate": 4.0991715453084307e-07,
      "loss": 0.1146,
      "step": 8947
    },
    {
      "epoch": 0.8244345142120053,
      "grad_norm": 0.9010012156168963,
      "learning_rate": 4.0949927733011455e-07,
      "loss": 0.1172,
      "step": 8948
    },
    {
      "epoch": 0.8245266503892753,
      "grad_norm": 0.90929624679404,
      "learning_rate": 4.0908159423260374e-07,
      "loss": 0.1142,
      "step": 8949
    },
    {
      "epoch": 0.8246187865665453,
      "grad_norm": 0.943372601274594,
      "learning_rate": 4.086641052770915e-07,
      "loss": 0.1162,
      "step": 8950
    },
    {
      "epoch": 0.8247109227438154,
      "grad_norm": 0.9659695477866573,
      "learning_rate": 4.082468105023418e-07,
      "loss": 0.1209,
      "step": 8951
    },
    {
      "epoch": 0.8248030589210854,
      "grad_norm": 0.8942946250102998,
      "learning_rate": 4.078297099471018e-07,
      "loss": 0.1143,
      "step": 8952
    },
    {
      "epoch": 0.8248951950983554,
      "grad_norm": 0.9233344249672104,
      "learning_rate": 4.0741280365009765e-07,
      "loss": 0.1254,
      "step": 8953
    },
    {
      "epoch": 0.8249873312756254,
      "grad_norm": 0.9404317994762078,
      "learning_rate": 4.069960916500404e-07,
      "loss": 0.118,
      "step": 8954
    },
    {
      "epoch": 0.8250794674528954,
      "grad_norm": 0.9542843518189766,
      "learning_rate": 4.065795739856207e-07,
      "loss": 0.1236,
      "step": 8955
    },
    {
      "epoch": 0.8251716036301654,
      "grad_norm": 0.8768924661820494,
      "learning_rate": 4.0616325069551296e-07,
      "loss": 0.1026,
      "step": 8956
    },
    {
      "epoch": 0.8252637398074354,
      "grad_norm": 0.910496711637403,
      "learning_rate": 4.057471218183734e-07,
      "loss": 0.1185,
      "step": 8957
    },
    {
      "epoch": 0.8253558759847054,
      "grad_norm": 0.9617153732562939,
      "learning_rate": 4.0533118739283864e-07,
      "loss": 0.1287,
      "step": 8958
    },
    {
      "epoch": 0.8254480121619754,
      "grad_norm": 0.9206590523649169,
      "learning_rate": 4.049154474575284e-07,
      "loss": 0.1096,
      "step": 8959
    },
    {
      "epoch": 0.8255401483392454,
      "grad_norm": 0.9911337506691006,
      "learning_rate": 4.04499902051044e-07,
      "loss": 0.1254,
      "step": 8960
    },
    {
      "epoch": 0.8256322845165154,
      "grad_norm": 0.9393601602299094,
      "learning_rate": 4.0408455121196957e-07,
      "loss": 0.1216,
      "step": 8961
    },
    {
      "epoch": 0.8257244206937854,
      "grad_norm": 0.90987307073812,
      "learning_rate": 4.0366939497887033e-07,
      "loss": 0.1069,
      "step": 8962
    },
    {
      "epoch": 0.8258165568710554,
      "grad_norm": 0.9690470990393921,
      "learning_rate": 4.032544333902935e-07,
      "loss": 0.1267,
      "step": 8963
    },
    {
      "epoch": 0.8259086930483254,
      "grad_norm": 0.9604445922242358,
      "learning_rate": 4.028396664847678e-07,
      "loss": 0.1182,
      "step": 8964
    },
    {
      "epoch": 0.8260008292255955,
      "grad_norm": 0.9183917505789629,
      "learning_rate": 4.0242509430080456e-07,
      "loss": 0.1126,
      "step": 8965
    },
    {
      "epoch": 0.8260929654028655,
      "grad_norm": 1.0160636850060218,
      "learning_rate": 4.0201071687689746e-07,
      "loss": 0.1324,
      "step": 8966
    },
    {
      "epoch": 0.8261851015801355,
      "grad_norm": 0.9045916383232453,
      "learning_rate": 4.0159653425152074e-07,
      "loss": 0.1165,
      "step": 8967
    },
    {
      "epoch": 0.8262772377574055,
      "grad_norm": 0.9083173799399996,
      "learning_rate": 4.011825464631322e-07,
      "loss": 0.1046,
      "step": 8968
    },
    {
      "epoch": 0.8263693739346755,
      "grad_norm": 0.9147366746741465,
      "learning_rate": 4.0076875355016975e-07,
      "loss": 0.113,
      "step": 8969
    },
    {
      "epoch": 0.8264615101119455,
      "grad_norm": 0.9910503681642115,
      "learning_rate": 4.003551555510549e-07,
      "loss": 0.1175,
      "step": 8970
    },
    {
      "epoch": 0.8265536462892155,
      "grad_norm": 1.0082243762021406,
      "learning_rate": 3.99941752504189e-07,
      "loss": 0.1273,
      "step": 8971
    },
    {
      "epoch": 0.8266457824664855,
      "grad_norm": 0.9544300821805285,
      "learning_rate": 3.995285444479574e-07,
      "loss": 0.1277,
      "step": 8972
    },
    {
      "epoch": 0.8267379186437555,
      "grad_norm": 0.9873879835090933,
      "learning_rate": 3.9911553142072733e-07,
      "loss": 0.1156,
      "step": 8973
    },
    {
      "epoch": 0.8268300548210255,
      "grad_norm": 1.0143048636164989,
      "learning_rate": 3.987027134608462e-07,
      "loss": 0.1268,
      "step": 8974
    },
    {
      "epoch": 0.8269221909982954,
      "grad_norm": 0.9406111544099246,
      "learning_rate": 3.9829009060664363e-07,
      "loss": 0.1246,
      "step": 8975
    },
    {
      "epoch": 0.8270143271755654,
      "grad_norm": 0.9211034325729525,
      "learning_rate": 3.9787766289643233e-07,
      "loss": 0.1066,
      "step": 8976
    },
    {
      "epoch": 0.8271064633528354,
      "grad_norm": 0.9369902884160378,
      "learning_rate": 3.974654303685063e-07,
      "loss": 0.1135,
      "step": 8977
    },
    {
      "epoch": 0.8271985995301055,
      "grad_norm": 0.9431563970089338,
      "learning_rate": 3.97053393061142e-07,
      "loss": 0.111,
      "step": 8978
    },
    {
      "epoch": 0.8272907357073755,
      "grad_norm": 0.9293997578153168,
      "learning_rate": 3.966415510125965e-07,
      "loss": 0.1093,
      "step": 8979
    },
    {
      "epoch": 0.8273828718846455,
      "grad_norm": 0.9676726803687037,
      "learning_rate": 3.9622990426110867e-07,
      "loss": 0.1204,
      "step": 8980
    },
    {
      "epoch": 0.8274750080619155,
      "grad_norm": 0.9486005141441863,
      "learning_rate": 3.958184528449005e-07,
      "loss": 0.1204,
      "step": 8981
    },
    {
      "epoch": 0.8275671442391855,
      "grad_norm": 0.9918274525746862,
      "learning_rate": 3.954071968021755e-07,
      "loss": 0.127,
      "step": 8982
    },
    {
      "epoch": 0.8276592804164555,
      "grad_norm": 0.9000784607704694,
      "learning_rate": 3.9499613617111965e-07,
      "loss": 0.1071,
      "step": 8983
    },
    {
      "epoch": 0.8277514165937255,
      "grad_norm": 0.954022230403387,
      "learning_rate": 3.945852709898987e-07,
      "loss": 0.1233,
      "step": 8984
    },
    {
      "epoch": 0.8278435527709955,
      "grad_norm": 1.0309117530762104,
      "learning_rate": 3.941746012966616e-07,
      "loss": 0.1155,
      "step": 8985
    },
    {
      "epoch": 0.8279356889482655,
      "grad_norm": 0.9673615839155028,
      "learning_rate": 3.937641271295392e-07,
      "loss": 0.1245,
      "step": 8986
    },
    {
      "epoch": 0.8280278251255355,
      "grad_norm": 0.996234111747313,
      "learning_rate": 3.93353848526645e-07,
      "loss": 0.1329,
      "step": 8987
    },
    {
      "epoch": 0.8281199613028055,
      "grad_norm": 0.9257195359719688,
      "learning_rate": 3.9294376552607233e-07,
      "loss": 0.1149,
      "step": 8988
    },
    {
      "epoch": 0.8282120974800755,
      "grad_norm": 0.8982916316625367,
      "learning_rate": 3.92533878165898e-07,
      "loss": 0.1091,
      "step": 8989
    },
    {
      "epoch": 0.8283042336573455,
      "grad_norm": 0.9789169488412091,
      "learning_rate": 3.921241864841793e-07,
      "loss": 0.1123,
      "step": 8990
    },
    {
      "epoch": 0.8283963698346155,
      "grad_norm": 0.9511169814629293,
      "learning_rate": 3.917146905189576e-07,
      "loss": 0.1122,
      "step": 8991
    },
    {
      "epoch": 0.8284885060118856,
      "grad_norm": 0.9369304152077549,
      "learning_rate": 3.913053903082531e-07,
      "loss": 0.1155,
      "step": 8992
    },
    {
      "epoch": 0.8285806421891556,
      "grad_norm": 0.9261575552816484,
      "learning_rate": 3.9089628589007e-07,
      "loss": 0.1204,
      "step": 8993
    },
    {
      "epoch": 0.8286727783664256,
      "grad_norm": 0.9719530306024485,
      "learning_rate": 3.9048737730239427e-07,
      "loss": 0.1287,
      "step": 8994
    },
    {
      "epoch": 0.8287649145436956,
      "grad_norm": 0.9022992796256752,
      "learning_rate": 3.9007866458319275e-07,
      "loss": 0.1032,
      "step": 8995
    },
    {
      "epoch": 0.8288570507209656,
      "grad_norm": 0.9154891131247639,
      "learning_rate": 3.8967014777041344e-07,
      "loss": 0.1183,
      "step": 8996
    },
    {
      "epoch": 0.8289491868982356,
      "grad_norm": 0.9790283403390899,
      "learning_rate": 3.892618269019882e-07,
      "loss": 0.12,
      "step": 8997
    },
    {
      "epoch": 0.8290413230755056,
      "grad_norm": 0.8989171769636702,
      "learning_rate": 3.888537020158295e-07,
      "loss": 0.1162,
      "step": 8998
    },
    {
      "epoch": 0.8291334592527756,
      "grad_norm": 0.9513898800003195,
      "learning_rate": 3.8844577314983254e-07,
      "loss": 0.1206,
      "step": 8999
    },
    {
      "epoch": 0.8292255954300456,
      "grad_norm": 0.8928187592109958,
      "learning_rate": 3.8803804034187235e-07,
      "loss": 0.1147,
      "step": 9000
    },
    {
      "epoch": 0.8292255954300456,
      "eval_loss": 0.11866238713264465,
      "eval_runtime": 298.9938,
      "eval_samples_per_second": 23.469,
      "eval_steps_per_second": 2.937,
      "step": 9000
    },
    {
      "epoch": 0.8293177316073156,
      "grad_norm": 0.8963570408818765,
      "learning_rate": 3.8763050362980723e-07,
      "loss": 0.1019,
      "step": 9001
    },
    {
      "epoch": 0.8294098677845856,
      "grad_norm": 0.9809451351037467,
      "learning_rate": 3.8722316305147693e-07,
      "loss": 0.1274,
      "step": 9002
    },
    {
      "epoch": 0.8295020039618556,
      "grad_norm": 0.8795144198454591,
      "learning_rate": 3.8681601864470396e-07,
      "loss": 0.1017,
      "step": 9003
    },
    {
      "epoch": 0.8295941401391256,
      "grad_norm": 0.9827458108554684,
      "learning_rate": 3.864090704472906e-07,
      "loss": 0.1246,
      "step": 9004
    },
    {
      "epoch": 0.8296862763163957,
      "grad_norm": 0.9325877389996922,
      "learning_rate": 3.86002318497023e-07,
      "loss": 0.118,
      "step": 9005
    },
    {
      "epoch": 0.8297784124936657,
      "grad_norm": 0.9312298769357221,
      "learning_rate": 3.855957628316673e-07,
      "loss": 0.1339,
      "step": 9006
    },
    {
      "epoch": 0.8298705486709357,
      "grad_norm": 0.9946128464419297,
      "learning_rate": 3.8518940348897277e-07,
      "loss": 0.1323,
      "step": 9007
    },
    {
      "epoch": 0.8299626848482057,
      "grad_norm": 0.9313704014210649,
      "learning_rate": 3.8478324050666926e-07,
      "loss": 0.1181,
      "step": 9008
    },
    {
      "epoch": 0.8300548210254757,
      "grad_norm": 0.9198770011959588,
      "learning_rate": 3.8437727392246966e-07,
      "loss": 0.1142,
      "step": 9009
    },
    {
      "epoch": 0.8301469572027457,
      "grad_norm": 0.9180412820101661,
      "learning_rate": 3.839715037740677e-07,
      "loss": 0.1252,
      "step": 9010
    },
    {
      "epoch": 0.8302390933800157,
      "grad_norm": 0.9675721326632333,
      "learning_rate": 3.835659300991401e-07,
      "loss": 0.1252,
      "step": 9011
    },
    {
      "epoch": 0.8303312295572857,
      "grad_norm": 0.9748217354158463,
      "learning_rate": 3.8316055293534353e-07,
      "loss": 0.1179,
      "step": 9012
    },
    {
      "epoch": 0.8304233657345557,
      "grad_norm": 0.8849626621010777,
      "learning_rate": 3.82755372320317e-07,
      "loss": 0.1157,
      "step": 9013
    },
    {
      "epoch": 0.8305155019118257,
      "grad_norm": 0.9417758302521162,
      "learning_rate": 3.823503882916818e-07,
      "loss": 0.1145,
      "step": 9014
    },
    {
      "epoch": 0.8306076380890957,
      "grad_norm": 0.9352672162324204,
      "learning_rate": 3.819456008870412e-07,
      "loss": 0.1112,
      "step": 9015
    },
    {
      "epoch": 0.8306997742663657,
      "grad_norm": 0.9546735786454992,
      "learning_rate": 3.815410101439798e-07,
      "loss": 0.124,
      "step": 9016
    },
    {
      "epoch": 0.8307919104436357,
      "grad_norm": 0.8982406751220058,
      "learning_rate": 3.8113661610006375e-07,
      "loss": 0.108,
      "step": 9017
    },
    {
      "epoch": 0.8308840466209056,
      "grad_norm": 0.936872634455003,
      "learning_rate": 3.8073241879284045e-07,
      "loss": 0.125,
      "step": 9018
    },
    {
      "epoch": 0.8309761827981758,
      "grad_norm": 0.9029042550032307,
      "learning_rate": 3.803284182598399e-07,
      "loss": 0.111,
      "step": 9019
    },
    {
      "epoch": 0.8310683189754458,
      "grad_norm": 0.9279239531114798,
      "learning_rate": 3.799246145385746e-07,
      "loss": 0.1183,
      "step": 9020
    },
    {
      "epoch": 0.8311604551527157,
      "grad_norm": 0.9001601906556236,
      "learning_rate": 3.795210076665362e-07,
      "loss": 0.1062,
      "step": 9021
    },
    {
      "epoch": 0.8312525913299857,
      "grad_norm": 0.9096130257836612,
      "learning_rate": 3.791175976812014e-07,
      "loss": 0.1121,
      "step": 9022
    },
    {
      "epoch": 0.8313447275072557,
      "grad_norm": 0.9841772012761711,
      "learning_rate": 3.78714384620025e-07,
      "loss": 0.1291,
      "step": 9023
    },
    {
      "epoch": 0.8314368636845257,
      "grad_norm": 0.978523579768035,
      "learning_rate": 3.7831136852044705e-07,
      "loss": 0.1217,
      "step": 9024
    },
    {
      "epoch": 0.8315289998617957,
      "grad_norm": 0.9247283536985508,
      "learning_rate": 3.7790854941988615e-07,
      "loss": 0.1077,
      "step": 9025
    },
    {
      "epoch": 0.8316211360390657,
      "grad_norm": 0.9377240447676977,
      "learning_rate": 3.7750592735574494e-07,
      "loss": 0.1153,
      "step": 9026
    },
    {
      "epoch": 0.8317132722163357,
      "grad_norm": 0.9592075723655732,
      "learning_rate": 3.7710350236540737e-07,
      "loss": 0.1163,
      "step": 9027
    },
    {
      "epoch": 0.8318054083936057,
      "grad_norm": 0.9618403112391543,
      "learning_rate": 3.7670127448623804e-07,
      "loss": 0.1261,
      "step": 9028
    },
    {
      "epoch": 0.8318975445708757,
      "grad_norm": 0.9207309046891926,
      "learning_rate": 3.7629924375558347e-07,
      "loss": 0.1142,
      "step": 9029
    },
    {
      "epoch": 0.8319896807481457,
      "grad_norm": 0.9393609681547822,
      "learning_rate": 3.7589741021077234e-07,
      "loss": 0.1193,
      "step": 9030
    },
    {
      "epoch": 0.8320818169254157,
      "grad_norm": 0.9288187420162773,
      "learning_rate": 3.7549577388911546e-07,
      "loss": 0.1123,
      "step": 9031
    },
    {
      "epoch": 0.8321739531026857,
      "grad_norm": 0.9124189814585899,
      "learning_rate": 3.7509433482790515e-07,
      "loss": 0.11,
      "step": 9032
    },
    {
      "epoch": 0.8322660892799558,
      "grad_norm": 0.9163760756223935,
      "learning_rate": 3.7469309306441466e-07,
      "loss": 0.1089,
      "step": 9033
    },
    {
      "epoch": 0.8323582254572258,
      "grad_norm": 1.020152579275757,
      "learning_rate": 3.742920486358986e-07,
      "loss": 0.1325,
      "step": 9034
    },
    {
      "epoch": 0.8324503616344958,
      "grad_norm": 0.9058071226707416,
      "learning_rate": 3.738912015795945e-07,
      "loss": 0.1124,
      "step": 9035
    },
    {
      "epoch": 0.8325424978117658,
      "grad_norm": 0.9111574495433679,
      "learning_rate": 3.734905519327217e-07,
      "loss": 0.1197,
      "step": 9036
    },
    {
      "epoch": 0.8326346339890358,
      "grad_norm": 0.9817191595487917,
      "learning_rate": 3.7309009973247963e-07,
      "loss": 0.1235,
      "step": 9037
    },
    {
      "epoch": 0.8327267701663058,
      "grad_norm": 0.8820936392317466,
      "learning_rate": 3.72689845016051e-07,
      "loss": 0.1057,
      "step": 9038
    },
    {
      "epoch": 0.8328189063435758,
      "grad_norm": 0.9080842498257791,
      "learning_rate": 3.722897878205989e-07,
      "loss": 0.1238,
      "step": 9039
    },
    {
      "epoch": 0.8329110425208458,
      "grad_norm": 0.9689835563120064,
      "learning_rate": 3.718899281832686e-07,
      "loss": 0.1158,
      "step": 9040
    },
    {
      "epoch": 0.8330031786981158,
      "grad_norm": 0.9041628190128206,
      "learning_rate": 3.7149026614118844e-07,
      "loss": 0.1161,
      "step": 9041
    },
    {
      "epoch": 0.8330953148753858,
      "grad_norm": 0.8889635923127017,
      "learning_rate": 3.7109080173146543e-07,
      "loss": 0.1125,
      "step": 9042
    },
    {
      "epoch": 0.8331874510526558,
      "grad_norm": 0.9497760569923801,
      "learning_rate": 3.7069153499119134e-07,
      "loss": 0.1229,
      "step": 9043
    },
    {
      "epoch": 0.8332795872299258,
      "grad_norm": 0.9338812497801194,
      "learning_rate": 3.7029246595743666e-07,
      "loss": 0.1178,
      "step": 9044
    },
    {
      "epoch": 0.8333717234071958,
      "grad_norm": 0.8848813835864621,
      "learning_rate": 3.6989359466725603e-07,
      "loss": 0.109,
      "step": 9045
    },
    {
      "epoch": 0.8334638595844659,
      "grad_norm": 0.8906596859436642,
      "learning_rate": 3.6949492115768425e-07,
      "loss": 0.1136,
      "step": 9046
    },
    {
      "epoch": 0.8335559957617359,
      "grad_norm": 0.926402940035174,
      "learning_rate": 3.6909644546573806e-07,
      "loss": 0.1124,
      "step": 9047
    },
    {
      "epoch": 0.8336481319390059,
      "grad_norm": 0.9310849307554305,
      "learning_rate": 3.686981676284171e-07,
      "loss": 0.1116,
      "step": 9048
    },
    {
      "epoch": 0.8337402681162759,
      "grad_norm": 0.9872574472717625,
      "learning_rate": 3.6830008768270033e-07,
      "loss": 0.1274,
      "step": 9049
    },
    {
      "epoch": 0.8338324042935459,
      "grad_norm": 0.9384635786716498,
      "learning_rate": 3.679022056655493e-07,
      "loss": 0.115,
      "step": 9050
    },
    {
      "epoch": 0.8339245404708159,
      "grad_norm": 0.950804946256567,
      "learning_rate": 3.675045216139081e-07,
      "loss": 0.1113,
      "step": 9051
    },
    {
      "epoch": 0.8340166766480859,
      "grad_norm": 0.9885673160133664,
      "learning_rate": 3.6710703556470136e-07,
      "loss": 0.1242,
      "step": 9052
    },
    {
      "epoch": 0.8341088128253559,
      "grad_norm": 0.9689834654878836,
      "learning_rate": 3.667097475548367e-07,
      "loss": 0.1243,
      "step": 9053
    },
    {
      "epoch": 0.8342009490026259,
      "grad_norm": 0.9672726322538227,
      "learning_rate": 3.663126576212014e-07,
      "loss": 0.1226,
      "step": 9054
    },
    {
      "epoch": 0.8342930851798959,
      "grad_norm": 0.9226458262636499,
      "learning_rate": 3.659157658006651e-07,
      "loss": 0.1171,
      "step": 9055
    },
    {
      "epoch": 0.8343852213571659,
      "grad_norm": 0.9283165671065761,
      "learning_rate": 3.655190721300794e-07,
      "loss": 0.1175,
      "step": 9056
    },
    {
      "epoch": 0.8344773575344359,
      "grad_norm": 0.892800777812627,
      "learning_rate": 3.651225766462782e-07,
      "loss": 0.1133,
      "step": 9057
    },
    {
      "epoch": 0.8345694937117059,
      "grad_norm": 1.0480032457880637,
      "learning_rate": 3.647262793860751e-07,
      "loss": 0.1168,
      "step": 9058
    },
    {
      "epoch": 0.8346616298889759,
      "grad_norm": 0.9495060160798997,
      "learning_rate": 3.643301803862673e-07,
      "loss": 0.1251,
      "step": 9059
    },
    {
      "epoch": 0.834753766066246,
      "grad_norm": 0.9767279275355083,
      "learning_rate": 3.639342796836312e-07,
      "loss": 0.1249,
      "step": 9060
    },
    {
      "epoch": 0.834845902243516,
      "grad_norm": 0.956717822836148,
      "learning_rate": 3.63538577314928e-07,
      "loss": 0.1167,
      "step": 9061
    },
    {
      "epoch": 0.834938038420786,
      "grad_norm": 0.8805632595773724,
      "learning_rate": 3.6314307331689725e-07,
      "loss": 0.1089,
      "step": 9062
    },
    {
      "epoch": 0.835030174598056,
      "grad_norm": 0.9608933523035653,
      "learning_rate": 3.6274776772626197e-07,
      "loss": 0.1173,
      "step": 9063
    },
    {
      "epoch": 0.835122310775326,
      "grad_norm": 0.9490880216010513,
      "learning_rate": 3.6235266057972727e-07,
      "loss": 0.1236,
      "step": 9064
    },
    {
      "epoch": 0.835214446952596,
      "grad_norm": 0.9741016505813996,
      "learning_rate": 3.6195775191397776e-07,
      "loss": 0.1312,
      "step": 9065
    },
    {
      "epoch": 0.8353065831298659,
      "grad_norm": 1.061742094019043,
      "learning_rate": 3.615630417656807e-07,
      "loss": 0.1333,
      "step": 9066
    },
    {
      "epoch": 0.8353987193071359,
      "grad_norm": 0.9404116017239663,
      "learning_rate": 3.611685301714854e-07,
      "loss": 0.1159,
      "step": 9067
    },
    {
      "epoch": 0.8354908554844059,
      "grad_norm": 0.948652738305397,
      "learning_rate": 3.607742171680223e-07,
      "loss": 0.1156,
      "step": 9068
    },
    {
      "epoch": 0.8355829916616759,
      "grad_norm": 1.0125976280025968,
      "learning_rate": 3.6038010279190376e-07,
      "loss": 0.136,
      "step": 9069
    },
    {
      "epoch": 0.8356751278389459,
      "grad_norm": 0.8919477550099669,
      "learning_rate": 3.5998618707972303e-07,
      "loss": 0.1076,
      "step": 9070
    },
    {
      "epoch": 0.8357672640162159,
      "grad_norm": 0.9215454245045038,
      "learning_rate": 3.5959247006805446e-07,
      "loss": 0.1148,
      "step": 9071
    },
    {
      "epoch": 0.8358594001934859,
      "grad_norm": 0.9527340209068846,
      "learning_rate": 3.5919895179345546e-07,
      "loss": 0.1193,
      "step": 9072
    },
    {
      "epoch": 0.835951536370756,
      "grad_norm": 0.964974428331,
      "learning_rate": 3.5880563229246434e-07,
      "loss": 0.1248,
      "step": 9073
    },
    {
      "epoch": 0.836043672548026,
      "grad_norm": 0.9511935127041369,
      "learning_rate": 3.58412511601601e-07,
      "loss": 0.1213,
      "step": 9074
    },
    {
      "epoch": 0.836135808725296,
      "grad_norm": 0.9306185915223708,
      "learning_rate": 3.5801958975736653e-07,
      "loss": 0.1158,
      "step": 9075
    },
    {
      "epoch": 0.836227944902566,
      "grad_norm": 0.8972295969668357,
      "learning_rate": 3.576268667962432e-07,
      "loss": 0.1121,
      "step": 9076
    },
    {
      "epoch": 0.836320081079836,
      "grad_norm": 0.9471002321711262,
      "learning_rate": 3.5723434275469593e-07,
      "loss": 0.1185,
      "step": 9077
    },
    {
      "epoch": 0.836412217257106,
      "grad_norm": 0.9437730017790239,
      "learning_rate": 3.5684201766917087e-07,
      "loss": 0.1203,
      "step": 9078
    },
    {
      "epoch": 0.836504353434376,
      "grad_norm": 0.9323885982962973,
      "learning_rate": 3.564498915760947e-07,
      "loss": 0.1246,
      "step": 9079
    },
    {
      "epoch": 0.836596489611646,
      "grad_norm": 0.9202495340206708,
      "learning_rate": 3.560579645118775e-07,
      "loss": 0.1123,
      "step": 9080
    },
    {
      "epoch": 0.836688625788916,
      "grad_norm": 0.9643478363526602,
      "learning_rate": 3.556662365129082e-07,
      "loss": 0.1242,
      "step": 9081
    },
    {
      "epoch": 0.836780761966186,
      "grad_norm": 0.9643206560127056,
      "learning_rate": 3.5527470761556024e-07,
      "loss": 0.1313,
      "step": 9082
    },
    {
      "epoch": 0.836872898143456,
      "grad_norm": 0.9966865526476313,
      "learning_rate": 3.5488337785618607e-07,
      "loss": 0.1277,
      "step": 9083
    },
    {
      "epoch": 0.836965034320726,
      "grad_norm": 0.9510760700309292,
      "learning_rate": 3.5449224727112094e-07,
      "loss": 0.1239,
      "step": 9084
    },
    {
      "epoch": 0.837057170497996,
      "grad_norm": 0.937440293224199,
      "learning_rate": 3.541013158966816e-07,
      "loss": 0.1185,
      "step": 9085
    },
    {
      "epoch": 0.837149306675266,
      "grad_norm": 0.9609619870780663,
      "learning_rate": 3.5371058376916733e-07,
      "loss": 0.1136,
      "step": 9086
    },
    {
      "epoch": 0.8372414428525361,
      "grad_norm": 0.9648051774960312,
      "learning_rate": 3.5332005092485496e-07,
      "loss": 0.1196,
      "step": 9087
    },
    {
      "epoch": 0.8373335790298061,
      "grad_norm": 0.9183894976690667,
      "learning_rate": 3.529297174000071e-07,
      "loss": 0.1114,
      "step": 9088
    },
    {
      "epoch": 0.8374257152070761,
      "grad_norm": 0.9582901426501818,
      "learning_rate": 3.525395832308659e-07,
      "loss": 0.114,
      "step": 9089
    },
    {
      "epoch": 0.8375178513843461,
      "grad_norm": 0.9000415692013729,
      "learning_rate": 3.52149648453656e-07,
      "loss": 0.117,
      "step": 9090
    },
    {
      "epoch": 0.8376099875616161,
      "grad_norm": 0.9444437649158218,
      "learning_rate": 3.5175991310458207e-07,
      "loss": 0.1179,
      "step": 9091
    },
    {
      "epoch": 0.8377021237388861,
      "grad_norm": 0.941489120512911,
      "learning_rate": 3.513703772198318e-07,
      "loss": 0.1279,
      "step": 9092
    },
    {
      "epoch": 0.8377942599161561,
      "grad_norm": 0.9868011195136057,
      "learning_rate": 3.5098104083557264e-07,
      "loss": 0.1223,
      "step": 9093
    },
    {
      "epoch": 0.8378863960934261,
      "grad_norm": 0.8705477423458002,
      "learning_rate": 3.5059190398795603e-07,
      "loss": 0.1046,
      "step": 9094
    },
    {
      "epoch": 0.8379785322706961,
      "grad_norm": 0.9114211371590364,
      "learning_rate": 3.5020296671311154e-07,
      "loss": 0.1206,
      "step": 9095
    },
    {
      "epoch": 0.8380706684479661,
      "grad_norm": 0.8950724214900556,
      "learning_rate": 3.498142290471529e-07,
      "loss": 0.113,
      "step": 9096
    },
    {
      "epoch": 0.8381628046252361,
      "grad_norm": 0.9521022208521928,
      "learning_rate": 3.4942569102617534e-07,
      "loss": 0.1135,
      "step": 9097
    },
    {
      "epoch": 0.8382549408025061,
      "grad_norm": 0.8837117200628214,
      "learning_rate": 3.490373526862531e-07,
      "loss": 0.1017,
      "step": 9098
    },
    {
      "epoch": 0.8383470769797761,
      "grad_norm": 0.8962134046824323,
      "learning_rate": 3.4864921406344483e-07,
      "loss": 0.1135,
      "step": 9099
    },
    {
      "epoch": 0.8384392131570461,
      "grad_norm": 0.8887076436299933,
      "learning_rate": 3.482612751937878e-07,
      "loss": 0.1116,
      "step": 9100
    },
    {
      "epoch": 0.8385313493343162,
      "grad_norm": 0.9453219724054187,
      "learning_rate": 3.4787353611330317e-07,
      "loss": 0.1188,
      "step": 9101
    },
    {
      "epoch": 0.8386234855115862,
      "grad_norm": 0.9381143409167717,
      "learning_rate": 3.4748599685799274e-07,
      "loss": 0.1167,
      "step": 9102
    },
    {
      "epoch": 0.8387156216888562,
      "grad_norm": 0.9600988951205893,
      "learning_rate": 3.470986574638391e-07,
      "loss": 0.1087,
      "step": 9103
    },
    {
      "epoch": 0.8388077578661262,
      "grad_norm": 0.984011588082167,
      "learning_rate": 3.4671151796680654e-07,
      "loss": 0.1325,
      "step": 9104
    },
    {
      "epoch": 0.8388998940433962,
      "grad_norm": 0.9611615555873271,
      "learning_rate": 3.46324578402841e-07,
      "loss": 0.1305,
      "step": 9105
    },
    {
      "epoch": 0.8389920302206662,
      "grad_norm": 0.9586580145664007,
      "learning_rate": 3.459378388078702e-07,
      "loss": 0.1225,
      "step": 9106
    },
    {
      "epoch": 0.8390841663979361,
      "grad_norm": 0.9436411042590699,
      "learning_rate": 3.4555129921780337e-07,
      "loss": 0.117,
      "step": 9107
    },
    {
      "epoch": 0.8391763025752061,
      "grad_norm": 0.9925703690511307,
      "learning_rate": 3.4516495966853054e-07,
      "loss": 0.1177,
      "step": 9108
    },
    {
      "epoch": 0.8392684387524761,
      "grad_norm": 0.9548357208284326,
      "learning_rate": 3.447788201959223e-07,
      "loss": 0.1247,
      "step": 9109
    },
    {
      "epoch": 0.8393605749297461,
      "grad_norm": 0.9972576838402836,
      "learning_rate": 3.443928808358327e-07,
      "loss": 0.1367,
      "step": 9110
    },
    {
      "epoch": 0.8394527111070161,
      "grad_norm": 0.923211368304357,
      "learning_rate": 3.4400714162409644e-07,
      "loss": 0.1182,
      "step": 9111
    },
    {
      "epoch": 0.8395448472842861,
      "grad_norm": 0.9053249971071133,
      "learning_rate": 3.4362160259652887e-07,
      "loss": 0.109,
      "step": 9112
    },
    {
      "epoch": 0.8396369834615561,
      "grad_norm": 1.02878674745858,
      "learning_rate": 3.4323626378892775e-07,
      "loss": 0.1369,
      "step": 9113
    },
    {
      "epoch": 0.8397291196388262,
      "grad_norm": 0.8885051736781546,
      "learning_rate": 3.4285112523707143e-07,
      "loss": 0.1153,
      "step": 9114
    },
    {
      "epoch": 0.8398212558160962,
      "grad_norm": 0.9073801662881984,
      "learning_rate": 3.424661869767207e-07,
      "loss": 0.1089,
      "step": 9115
    },
    {
      "epoch": 0.8399133919933662,
      "grad_norm": 0.928370265115947,
      "learning_rate": 3.4208144904361613e-07,
      "loss": 0.1121,
      "step": 9116
    },
    {
      "epoch": 0.8400055281706362,
      "grad_norm": 0.9212881770336953,
      "learning_rate": 3.4169691147348137e-07,
      "loss": 0.1259,
      "step": 9117
    },
    {
      "epoch": 0.8400976643479062,
      "grad_norm": 0.9788684272042728,
      "learning_rate": 3.4131257430202124e-07,
      "loss": 0.1266,
      "step": 9118
    },
    {
      "epoch": 0.8401898005251762,
      "grad_norm": 0.9755763215915222,
      "learning_rate": 3.4092843756492075e-07,
      "loss": 0.1256,
      "step": 9119
    },
    {
      "epoch": 0.8402819367024462,
      "grad_norm": 0.9751484992123327,
      "learning_rate": 3.405445012978467e-07,
      "loss": 0.1343,
      "step": 9120
    },
    {
      "epoch": 0.8403740728797162,
      "grad_norm": 1.013270465324915,
      "learning_rate": 3.4016076553644806e-07,
      "loss": 0.1311,
      "step": 9121
    },
    {
      "epoch": 0.8404662090569862,
      "grad_norm": 0.9903544054884075,
      "learning_rate": 3.397772303163549e-07,
      "loss": 0.1291,
      "step": 9122
    },
    {
      "epoch": 0.8405583452342562,
      "grad_norm": 1.0099661007009835,
      "learning_rate": 3.3939389567317876e-07,
      "loss": 0.1196,
      "step": 9123
    },
    {
      "epoch": 0.8406504814115262,
      "grad_norm": 0.9572207364409202,
      "learning_rate": 3.39010761642512e-07,
      "loss": 0.1328,
      "step": 9124
    },
    {
      "epoch": 0.8407426175887962,
      "grad_norm": 0.9433063660527777,
      "learning_rate": 3.386278282599281e-07,
      "loss": 0.1244,
      "step": 9125
    },
    {
      "epoch": 0.8408347537660662,
      "grad_norm": 0.9561219271919276,
      "learning_rate": 3.3824509556098314e-07,
      "loss": 0.1117,
      "step": 9126
    },
    {
      "epoch": 0.8409268899433362,
      "grad_norm": 0.9526955884568686,
      "learning_rate": 3.3786256358121327e-07,
      "loss": 0.1171,
      "step": 9127
    },
    {
      "epoch": 0.8410190261206063,
      "grad_norm": 0.9421580524280039,
      "learning_rate": 3.37480232356138e-07,
      "loss": 0.1185,
      "step": 9128
    },
    {
      "epoch": 0.8411111622978763,
      "grad_norm": 0.9007740123953568,
      "learning_rate": 3.3709810192125576e-07,
      "loss": 0.1124,
      "step": 9129
    },
    {
      "epoch": 0.8412032984751463,
      "grad_norm": 0.9257483251297324,
      "learning_rate": 3.3671617231204683e-07,
      "loss": 0.1231,
      "step": 9130
    },
    {
      "epoch": 0.8412954346524163,
      "grad_norm": 0.9196705659888632,
      "learning_rate": 3.3633444356397425e-07,
      "loss": 0.1145,
      "step": 9131
    },
    {
      "epoch": 0.8413875708296863,
      "grad_norm": 0.9501560375264937,
      "learning_rate": 3.3595291571248216e-07,
      "loss": 0.1174,
      "step": 9132
    },
    {
      "epoch": 0.8414797070069563,
      "grad_norm": 0.9529617492932547,
      "learning_rate": 3.355715887929939e-07,
      "loss": 0.1235,
      "step": 9133
    },
    {
      "epoch": 0.8415718431842263,
      "grad_norm": 0.9237901196829995,
      "learning_rate": 3.351904628409172e-07,
      "loss": 0.1113,
      "step": 9134
    },
    {
      "epoch": 0.8416639793614963,
      "grad_norm": 0.9690381039536827,
      "learning_rate": 3.348095378916386e-07,
      "loss": 0.1195,
      "step": 9135
    },
    {
      "epoch": 0.8417561155387663,
      "grad_norm": 0.9498145803212865,
      "learning_rate": 3.34428813980528e-07,
      "loss": 0.1163,
      "step": 9136
    },
    {
      "epoch": 0.8418482517160363,
      "grad_norm": 0.8832640227036879,
      "learning_rate": 3.3404829114293437e-07,
      "loss": 0.1051,
      "step": 9137
    },
    {
      "epoch": 0.8419403878933063,
      "grad_norm": 0.8932009094651336,
      "learning_rate": 3.336679694141898e-07,
      "loss": 0.1023,
      "step": 9138
    },
    {
      "epoch": 0.8420325240705763,
      "grad_norm": 0.9251539366264352,
      "learning_rate": 3.3328784882960817e-07,
      "loss": 0.1073,
      "step": 9139
    },
    {
      "epoch": 0.8421246602478463,
      "grad_norm": 0.9107574392909054,
      "learning_rate": 3.3290792942448303e-07,
      "loss": 0.1217,
      "step": 9140
    },
    {
      "epoch": 0.8422167964251164,
      "grad_norm": 0.9267056836585295,
      "learning_rate": 3.325282112340894e-07,
      "loss": 0.1176,
      "step": 9141
    },
    {
      "epoch": 0.8423089326023864,
      "grad_norm": 0.9393315446042251,
      "learning_rate": 3.321486942936844e-07,
      "loss": 0.123,
      "step": 9142
    },
    {
      "epoch": 0.8424010687796564,
      "grad_norm": 0.9063891028187058,
      "learning_rate": 3.317693786385065e-07,
      "loss": 0.1099,
      "step": 9143
    },
    {
      "epoch": 0.8424932049569264,
      "grad_norm": 0.9198452193152991,
      "learning_rate": 3.3139026430377583e-07,
      "loss": 0.1144,
      "step": 9144
    },
    {
      "epoch": 0.8425853411341964,
      "grad_norm": 0.9107477136619364,
      "learning_rate": 3.3101135132469237e-07,
      "loss": 0.1102,
      "step": 9145
    },
    {
      "epoch": 0.8426774773114664,
      "grad_norm": 0.9865477556790888,
      "learning_rate": 3.306326397364379e-07,
      "loss": 0.1203,
      "step": 9146
    },
    {
      "epoch": 0.8427696134887364,
      "grad_norm": 0.9823050755373736,
      "learning_rate": 3.3025412957417624e-07,
      "loss": 0.1287,
      "step": 9147
    },
    {
      "epoch": 0.8428617496660064,
      "grad_norm": 0.9272156435057771,
      "learning_rate": 3.298758208730529e-07,
      "loss": 0.1219,
      "step": 9148
    },
    {
      "epoch": 0.8429538858432764,
      "grad_norm": 0.9622429969672128,
      "learning_rate": 3.294977136681923e-07,
      "loss": 0.1287,
      "step": 9149
    },
    {
      "epoch": 0.8430460220205463,
      "grad_norm": 1.012662531193091,
      "learning_rate": 3.291198079947033e-07,
      "loss": 0.1199,
      "step": 9150
    },
    {
      "epoch": 0.8431381581978163,
      "grad_norm": 0.903122594351644,
      "learning_rate": 3.2874210388767313e-07,
      "loss": 0.1208,
      "step": 9151
    },
    {
      "epoch": 0.8432302943750863,
      "grad_norm": 0.9666984134750198,
      "learning_rate": 3.2836460138217287e-07,
      "loss": 0.1169,
      "step": 9152
    },
    {
      "epoch": 0.8433224305523563,
      "grad_norm": 1.0222716317336589,
      "learning_rate": 3.279873005132525e-07,
      "loss": 0.1298,
      "step": 9153
    },
    {
      "epoch": 0.8434145667296263,
      "grad_norm": 0.9444079754719301,
      "learning_rate": 3.276102013159452e-07,
      "loss": 0.1129,
      "step": 9154
    },
    {
      "epoch": 0.8435067029068964,
      "grad_norm": 0.9178977470183698,
      "learning_rate": 3.272333038252648e-07,
      "loss": 0.1152,
      "step": 9155
    },
    {
      "epoch": 0.8435988390841664,
      "grad_norm": 0.990355918764317,
      "learning_rate": 3.2685660807620563e-07,
      "loss": 0.122,
      "step": 9156
    },
    {
      "epoch": 0.8436909752614364,
      "grad_norm": 0.994814328810813,
      "learning_rate": 3.2648011410374463e-07,
      "loss": 0.1273,
      "step": 9157
    },
    {
      "epoch": 0.8437831114387064,
      "grad_norm": 0.9642134855127644,
      "learning_rate": 3.2610382194283865e-07,
      "loss": 0.115,
      "step": 9158
    },
    {
      "epoch": 0.8438752476159764,
      "grad_norm": 0.998202330870969,
      "learning_rate": 3.257277316284266e-07,
      "loss": 0.1175,
      "step": 9159
    },
    {
      "epoch": 0.8439673837932464,
      "grad_norm": 0.9220879009717579,
      "learning_rate": 3.253518431954286e-07,
      "loss": 0.1144,
      "step": 9160
    },
    {
      "epoch": 0.8440595199705164,
      "grad_norm": 0.9456319757338142,
      "learning_rate": 3.249761566787474e-07,
      "loss": 0.1138,
      "step": 9161
    },
    {
      "epoch": 0.8441516561477864,
      "grad_norm": 0.9517620248803692,
      "learning_rate": 3.2460067211326274e-07,
      "loss": 0.1183,
      "step": 9162
    },
    {
      "epoch": 0.8442437923250564,
      "grad_norm": 0.9205843267337143,
      "learning_rate": 3.2422538953383986e-07,
      "loss": 0.1125,
      "step": 9163
    },
    {
      "epoch": 0.8443359285023264,
      "grad_norm": 0.9913122073528652,
      "learning_rate": 3.2385030897532364e-07,
      "loss": 0.1214,
      "step": 9164
    },
    {
      "epoch": 0.8444280646795964,
      "grad_norm": 0.9507436181178945,
      "learning_rate": 3.234754304725413e-07,
      "loss": 0.1172,
      "step": 9165
    },
    {
      "epoch": 0.8445202008568664,
      "grad_norm": 0.9003711742448463,
      "learning_rate": 3.2310075406029875e-07,
      "loss": 0.1084,
      "step": 9166
    },
    {
      "epoch": 0.8446123370341364,
      "grad_norm": 1.0251546343565063,
      "learning_rate": 3.22726279773386e-07,
      "loss": 0.1329,
      "step": 9167
    },
    {
      "epoch": 0.8447044732114064,
      "grad_norm": 0.9484851964975073,
      "learning_rate": 3.223520076465719e-07,
      "loss": 0.1116,
      "step": 9168
    },
    {
      "epoch": 0.8447966093886765,
      "grad_norm": 0.9361520273962691,
      "learning_rate": 3.219779377146087e-07,
      "loss": 0.1112,
      "step": 9169
    },
    {
      "epoch": 0.8448887455659465,
      "grad_norm": 0.9910150266298312,
      "learning_rate": 3.2160407001222805e-07,
      "loss": 0.1319,
      "step": 9170
    },
    {
      "epoch": 0.8449808817432165,
      "grad_norm": 0.9437007956534685,
      "learning_rate": 3.2123040457414377e-07,
      "loss": 0.1196,
      "step": 9171
    },
    {
      "epoch": 0.8450730179204865,
      "grad_norm": 0.9162880175898103,
      "learning_rate": 3.208569414350515e-07,
      "loss": 0.1205,
      "step": 9172
    },
    {
      "epoch": 0.8451651540977565,
      "grad_norm": 0.9328157543095837,
      "learning_rate": 3.204836806296269e-07,
      "loss": 0.1247,
      "step": 9173
    },
    {
      "epoch": 0.8452572902750265,
      "grad_norm": 1.0276232815812092,
      "learning_rate": 3.201106221925265e-07,
      "loss": 0.1333,
      "step": 9174
    },
    {
      "epoch": 0.8453494264522965,
      "grad_norm": 0.8934105754776828,
      "learning_rate": 3.197377661583892e-07,
      "loss": 0.1047,
      "step": 9175
    },
    {
      "epoch": 0.8454415626295665,
      "grad_norm": 0.8901250527712635,
      "learning_rate": 3.1936511256183524e-07,
      "loss": 0.108,
      "step": 9176
    },
    {
      "epoch": 0.8455336988068365,
      "grad_norm": 0.9456476762090105,
      "learning_rate": 3.1899266143746556e-07,
      "loss": 0.119,
      "step": 9177
    },
    {
      "epoch": 0.8456258349841065,
      "grad_norm": 0.9369759218485262,
      "learning_rate": 3.1862041281986224e-07,
      "loss": 0.1151,
      "step": 9178
    },
    {
      "epoch": 0.8457179711613765,
      "grad_norm": 0.9202364542455173,
      "learning_rate": 3.182483667435876e-07,
      "loss": 0.1141,
      "step": 9179
    },
    {
      "epoch": 0.8458101073386465,
      "grad_norm": 0.8767559307939479,
      "learning_rate": 3.1787652324318715e-07,
      "loss": 0.107,
      "step": 9180
    },
    {
      "epoch": 0.8459022435159165,
      "grad_norm": 0.9779144285133847,
      "learning_rate": 3.1750488235318675e-07,
      "loss": 0.1229,
      "step": 9181
    },
    {
      "epoch": 0.8459943796931866,
      "grad_norm": 1.0063964020443654,
      "learning_rate": 3.171334441080923e-07,
      "loss": 0.124,
      "step": 9182
    },
    {
      "epoch": 0.8460865158704566,
      "grad_norm": 0.9256576366855372,
      "learning_rate": 3.1676220854239326e-07,
      "loss": 0.1179,
      "step": 9183
    },
    {
      "epoch": 0.8461786520477266,
      "grad_norm": 0.9470305673494429,
      "learning_rate": 3.1639117569055744e-07,
      "loss": 0.1211,
      "step": 9184
    },
    {
      "epoch": 0.8462707882249966,
      "grad_norm": 0.9269075595700427,
      "learning_rate": 3.160203455870359e-07,
      "loss": 0.115,
      "step": 9185
    },
    {
      "epoch": 0.8463629244022666,
      "grad_norm": 0.9437783625013559,
      "learning_rate": 3.156497182662607e-07,
      "loss": 0.1096,
      "step": 9186
    },
    {
      "epoch": 0.8464550605795366,
      "grad_norm": 1.0404071839324958,
      "learning_rate": 3.1527929376264393e-07,
      "loss": 0.1346,
      "step": 9187
    },
    {
      "epoch": 0.8465471967568066,
      "grad_norm": 0.9384504047967224,
      "learning_rate": 3.149090721105805e-07,
      "loss": 0.1204,
      "step": 9188
    },
    {
      "epoch": 0.8466393329340766,
      "grad_norm": 0.9687143806916653,
      "learning_rate": 3.145390533444442e-07,
      "loss": 0.1205,
      "step": 9189
    },
    {
      "epoch": 0.8467314691113466,
      "grad_norm": 0.9811809575277504,
      "learning_rate": 3.1416923749859244e-07,
      "loss": 0.124,
      "step": 9190
    },
    {
      "epoch": 0.8468236052886166,
      "grad_norm": 0.9215113535637731,
      "learning_rate": 3.1379962460736183e-07,
      "loss": 0.118,
      "step": 9191
    },
    {
      "epoch": 0.8469157414658866,
      "grad_norm": 0.9453843360808005,
      "learning_rate": 3.1343021470507134e-07,
      "loss": 0.1262,
      "step": 9192
    },
    {
      "epoch": 0.8470078776431565,
      "grad_norm": 0.9679969128743854,
      "learning_rate": 3.1306100782602126e-07,
      "loss": 0.1217,
      "step": 9193
    },
    {
      "epoch": 0.8471000138204265,
      "grad_norm": 0.943336778635726,
      "learning_rate": 3.12692004004492e-07,
      "loss": 0.1292,
      "step": 9194
    },
    {
      "epoch": 0.8471921499976965,
      "grad_norm": 0.9273773600918996,
      "learning_rate": 3.1232320327474504e-07,
      "loss": 0.111,
      "step": 9195
    },
    {
      "epoch": 0.8472842861749666,
      "grad_norm": 0.9294729570727469,
      "learning_rate": 3.11954605671024e-07,
      "loss": 0.1091,
      "step": 9196
    },
    {
      "epoch": 0.8473764223522366,
      "grad_norm": 1.0014883154780123,
      "learning_rate": 3.1158621122755336e-07,
      "loss": 0.1209,
      "step": 9197
    },
    {
      "epoch": 0.8474685585295066,
      "grad_norm": 0.9293479352770679,
      "learning_rate": 3.112180199785389e-07,
      "loss": 0.1186,
      "step": 9198
    },
    {
      "epoch": 0.8475606947067766,
      "grad_norm": 0.925782398043939,
      "learning_rate": 3.1085003195816697e-07,
      "loss": 0.1168,
      "step": 9199
    },
    {
      "epoch": 0.8476528308840466,
      "grad_norm": 0.9633031131247594,
      "learning_rate": 3.1048224720060473e-07,
      "loss": 0.1167,
      "step": 9200
    },
    {
      "epoch": 0.8477449670613166,
      "grad_norm": 0.883630976694059,
      "learning_rate": 3.1011466574000144e-07,
      "loss": 0.1063,
      "step": 9201
    },
    {
      "epoch": 0.8478371032385866,
      "grad_norm": 0.8883849522188346,
      "learning_rate": 3.097472876104876e-07,
      "loss": 0.1141,
      "step": 9202
    },
    {
      "epoch": 0.8479292394158566,
      "grad_norm": 0.9053676263367607,
      "learning_rate": 3.093801128461735e-07,
      "loss": 0.1119,
      "step": 9203
    },
    {
      "epoch": 0.8480213755931266,
      "grad_norm": 0.8851968674906724,
      "learning_rate": 3.0901314148115203e-07,
      "loss": 0.1122,
      "step": 9204
    },
    {
      "epoch": 0.8481135117703966,
      "grad_norm": 0.9304807734264189,
      "learning_rate": 3.0864637354949545e-07,
      "loss": 0.1162,
      "step": 9205
    },
    {
      "epoch": 0.8482056479476666,
      "grad_norm": 0.956308995162976,
      "learning_rate": 3.082798090852596e-07,
      "loss": 0.1159,
      "step": 9206
    },
    {
      "epoch": 0.8482977841249366,
      "grad_norm": 0.938749830136273,
      "learning_rate": 3.079134481224788e-07,
      "loss": 0.1139,
      "step": 9207
    },
    {
      "epoch": 0.8483899203022066,
      "grad_norm": 0.9347902376284178,
      "learning_rate": 3.075472906951704e-07,
      "loss": 0.1213,
      "step": 9208
    },
    {
      "epoch": 0.8484820564794767,
      "grad_norm": 0.9532417511976031,
      "learning_rate": 3.0718133683733247e-07,
      "loss": 0.1308,
      "step": 9209
    },
    {
      "epoch": 0.8485741926567467,
      "grad_norm": 0.9022635398220523,
      "learning_rate": 3.0681558658294334e-07,
      "loss": 0.114,
      "step": 9210
    },
    {
      "epoch": 0.8486663288340167,
      "grad_norm": 0.953635552226034,
      "learning_rate": 3.0645003996596254e-07,
      "loss": 0.1167,
      "step": 9211
    },
    {
      "epoch": 0.8487584650112867,
      "grad_norm": 0.9560485444002419,
      "learning_rate": 3.0608469702033185e-07,
      "loss": 0.1199,
      "step": 9212
    },
    {
      "epoch": 0.8488506011885567,
      "grad_norm": 0.9130642873786164,
      "learning_rate": 3.057195577799729e-07,
      "loss": 0.1144,
      "step": 9213
    },
    {
      "epoch": 0.8489427373658267,
      "grad_norm": 0.9626403344881101,
      "learning_rate": 3.053546222787895e-07,
      "loss": 0.1234,
      "step": 9214
    },
    {
      "epoch": 0.8490348735430967,
      "grad_norm": 0.9327186071825609,
      "learning_rate": 3.0498989055066597e-07,
      "loss": 0.1122,
      "step": 9215
    },
    {
      "epoch": 0.8491270097203667,
      "grad_norm": 0.9708981271608061,
      "learning_rate": 3.046253626294665e-07,
      "loss": 0.1278,
      "step": 9216
    },
    {
      "epoch": 0.8492191458976367,
      "grad_norm": 0.8690144829639436,
      "learning_rate": 3.0426103854903856e-07,
      "loss": 0.1021,
      "step": 9217
    },
    {
      "epoch": 0.8493112820749067,
      "grad_norm": 0.928018364641159,
      "learning_rate": 3.0389691834320944e-07,
      "loss": 0.1133,
      "step": 9218
    },
    {
      "epoch": 0.8494034182521767,
      "grad_norm": 0.8965255066759291,
      "learning_rate": 3.0353300204578854e-07,
      "loss": 0.1102,
      "step": 9219
    },
    {
      "epoch": 0.8494955544294467,
      "grad_norm": 0.8911346876173641,
      "learning_rate": 3.031692896905644e-07,
      "loss": 0.1097,
      "step": 9220
    },
    {
      "epoch": 0.8495876906067167,
      "grad_norm": 0.96694668047903,
      "learning_rate": 3.0280578131130805e-07,
      "loss": 0.1198,
      "step": 9221
    },
    {
      "epoch": 0.8496798267839867,
      "grad_norm": 0.8842727538731018,
      "learning_rate": 3.024424769417711e-07,
      "loss": 0.1018,
      "step": 9222
    },
    {
      "epoch": 0.8497719629612568,
      "grad_norm": 0.9233797044528547,
      "learning_rate": 3.020793766156871e-07,
      "loss": 0.1181,
      "step": 9223
    },
    {
      "epoch": 0.8498640991385268,
      "grad_norm": 0.9490387802732133,
      "learning_rate": 3.017164803667691e-07,
      "loss": 0.1215,
      "step": 9224
    },
    {
      "epoch": 0.8499562353157968,
      "grad_norm": 1.0032873233600246,
      "learning_rate": 3.013537882287132e-07,
      "loss": 0.1153,
      "step": 9225
    },
    {
      "epoch": 0.8500483714930668,
      "grad_norm": 0.9287605713152437,
      "learning_rate": 3.0099130023519384e-07,
      "loss": 0.1153,
      "step": 9226
    },
    {
      "epoch": 0.8501405076703368,
      "grad_norm": 0.9260430559067087,
      "learning_rate": 3.0062901641986967e-07,
      "loss": 0.1125,
      "step": 9227
    },
    {
      "epoch": 0.8502326438476068,
      "grad_norm": 0.9559548455870349,
      "learning_rate": 3.002669368163774e-07,
      "loss": 0.1192,
      "step": 9228
    },
    {
      "epoch": 0.8503247800248768,
      "grad_norm": 0.9856506355542799,
      "learning_rate": 2.999050614583368e-07,
      "loss": 0.1319,
      "step": 9229
    },
    {
      "epoch": 0.8504169162021468,
      "grad_norm": 0.9733649361017951,
      "learning_rate": 2.995433903793485e-07,
      "loss": 0.1178,
      "step": 9230
    },
    {
      "epoch": 0.8505090523794168,
      "grad_norm": 0.9179807223176983,
      "learning_rate": 2.9918192361299335e-07,
      "loss": 0.1113,
      "step": 9231
    },
    {
      "epoch": 0.8506011885566868,
      "grad_norm": 0.9337430679762098,
      "learning_rate": 2.988206611928329e-07,
      "loss": 0.1157,
      "step": 9232
    },
    {
      "epoch": 0.8506933247339568,
      "grad_norm": 0.9471775562002914,
      "learning_rate": 2.984596031524109e-07,
      "loss": 0.1199,
      "step": 9233
    },
    {
      "epoch": 0.8507854609112268,
      "grad_norm": 0.8764880163175652,
      "learning_rate": 2.980987495252516e-07,
      "loss": 0.0979,
      "step": 9234
    },
    {
      "epoch": 0.8508775970884968,
      "grad_norm": 0.9735124287920944,
      "learning_rate": 2.9773810034486095e-07,
      "loss": 0.128,
      "step": 9235
    },
    {
      "epoch": 0.8509697332657667,
      "grad_norm": 0.9763026567613571,
      "learning_rate": 2.973776556447247e-07,
      "loss": 0.117,
      "step": 9236
    },
    {
      "epoch": 0.8510618694430369,
      "grad_norm": 0.9657511144374241,
      "learning_rate": 2.970174154583097e-07,
      "loss": 0.1349,
      "step": 9237
    },
    {
      "epoch": 0.8511540056203069,
      "grad_norm": 0.9732217261684756,
      "learning_rate": 2.9665737981906475e-07,
      "loss": 0.1157,
      "step": 9238
    },
    {
      "epoch": 0.8512461417975768,
      "grad_norm": 0.9557352112393618,
      "learning_rate": 2.962975487604197e-07,
      "loss": 0.1131,
      "step": 9239
    },
    {
      "epoch": 0.8513382779748468,
      "grad_norm": 0.879001540732117,
      "learning_rate": 2.9593792231578407e-07,
      "loss": 0.1091,
      "step": 9240
    },
    {
      "epoch": 0.8514304141521168,
      "grad_norm": 0.9464176135109578,
      "learning_rate": 2.9557850051854935e-07,
      "loss": 0.1133,
      "step": 9241
    },
    {
      "epoch": 0.8515225503293868,
      "grad_norm": 0.9042055512690569,
      "learning_rate": 2.9521928340208867e-07,
      "loss": 0.1073,
      "step": 9242
    },
    {
      "epoch": 0.8516146865066568,
      "grad_norm": 0.9290529203658289,
      "learning_rate": 2.9486027099975416e-07,
      "loss": 0.1111,
      "step": 9243
    },
    {
      "epoch": 0.8517068226839268,
      "grad_norm": 0.9999503573416042,
      "learning_rate": 2.9450146334488144e-07,
      "loss": 0.1238,
      "step": 9244
    },
    {
      "epoch": 0.8517989588611968,
      "grad_norm": 0.9769975220666243,
      "learning_rate": 2.9414286047078495e-07,
      "loss": 0.1185,
      "step": 9245
    },
    {
      "epoch": 0.8518910950384668,
      "grad_norm": 0.9805675881687931,
      "learning_rate": 2.937844624107608e-07,
      "loss": 0.1249,
      "step": 9246
    },
    {
      "epoch": 0.8519832312157368,
      "grad_norm": 0.9392593771275237,
      "learning_rate": 2.934262691980877e-07,
      "loss": 0.1271,
      "step": 9247
    },
    {
      "epoch": 0.8520753673930068,
      "grad_norm": 0.9420394121292485,
      "learning_rate": 2.930682808660226e-07,
      "loss": 0.1238,
      "step": 9248
    },
    {
      "epoch": 0.8521675035702768,
      "grad_norm": 0.9336936611055597,
      "learning_rate": 2.927104974478048e-07,
      "loss": 0.112,
      "step": 9249
    },
    {
      "epoch": 0.8522596397475469,
      "grad_norm": 0.9187592636253514,
      "learning_rate": 2.9235291897665497e-07,
      "loss": 0.1095,
      "step": 9250
    },
    {
      "epoch": 0.8523517759248169,
      "grad_norm": 0.9382325552043895,
      "learning_rate": 2.91995545485774e-07,
      "loss": 0.123,
      "step": 9251
    },
    {
      "epoch": 0.8524439121020869,
      "grad_norm": 0.9385540114569236,
      "learning_rate": 2.9163837700834473e-07,
      "loss": 0.1111,
      "step": 9252
    },
    {
      "epoch": 0.8525360482793569,
      "grad_norm": 1.0194309807321986,
      "learning_rate": 2.912814135775299e-07,
      "loss": 0.1201,
      "step": 9253
    },
    {
      "epoch": 0.8526281844566269,
      "grad_norm": 0.9422330543550835,
      "learning_rate": 2.909246552264733e-07,
      "loss": 0.1185,
      "step": 9254
    },
    {
      "epoch": 0.8527203206338969,
      "grad_norm": 0.9714237671246142,
      "learning_rate": 2.905681019882997e-07,
      "loss": 0.1233,
      "step": 9255
    },
    {
      "epoch": 0.8528124568111669,
      "grad_norm": 0.889642435524911,
      "learning_rate": 2.902117538961166e-07,
      "loss": 0.1089,
      "step": 9256
    },
    {
      "epoch": 0.8529045929884369,
      "grad_norm": 0.9627728725749669,
      "learning_rate": 2.898556109830092e-07,
      "loss": 0.1232,
      "step": 9257
    },
    {
      "epoch": 0.8529967291657069,
      "grad_norm": 0.924709180557171,
      "learning_rate": 2.894996732820468e-07,
      "loss": 0.1198,
      "step": 9258
    },
    {
      "epoch": 0.8530888653429769,
      "grad_norm": 0.995663879425923,
      "learning_rate": 2.8914394082627694e-07,
      "loss": 0.1308,
      "step": 9259
    },
    {
      "epoch": 0.8531810015202469,
      "grad_norm": 0.8870383336303369,
      "learning_rate": 2.8878841364873067e-07,
      "loss": 0.108,
      "step": 9260
    },
    {
      "epoch": 0.8532731376975169,
      "grad_norm": 0.939204703933531,
      "learning_rate": 2.8843309178241766e-07,
      "loss": 0.1229,
      "step": 9261
    },
    {
      "epoch": 0.8533652738747869,
      "grad_norm": 0.9353801215931173,
      "learning_rate": 2.880779752603302e-07,
      "loss": 0.117,
      "step": 9262
    },
    {
      "epoch": 0.8534574100520569,
      "grad_norm": 0.9319288195935074,
      "learning_rate": 2.877230641154413e-07,
      "loss": 0.1109,
      "step": 9263
    },
    {
      "epoch": 0.853549546229327,
      "grad_norm": 0.9812789745070393,
      "learning_rate": 2.873683583807038e-07,
      "loss": 0.1217,
      "step": 9264
    },
    {
      "epoch": 0.853641682406597,
      "grad_norm": 0.9535806751362134,
      "learning_rate": 2.8701385808905217e-07,
      "loss": 0.1217,
      "step": 9265
    },
    {
      "epoch": 0.853733818583867,
      "grad_norm": 1.0046894585013302,
      "learning_rate": 2.8665956327340175e-07,
      "loss": 0.1176,
      "step": 9266
    },
    {
      "epoch": 0.853825954761137,
      "grad_norm": 0.9303632824861731,
      "learning_rate": 2.8630547396664905e-07,
      "loss": 0.1179,
      "step": 9267
    },
    {
      "epoch": 0.853918090938407,
      "grad_norm": 0.9387604544667134,
      "learning_rate": 2.8595159020167186e-07,
      "loss": 0.1261,
      "step": 9268
    },
    {
      "epoch": 0.854010227115677,
      "grad_norm": 0.9702272703985197,
      "learning_rate": 2.855979120113278e-07,
      "loss": 0.1234,
      "step": 9269
    },
    {
      "epoch": 0.854102363292947,
      "grad_norm": 0.9525420190866508,
      "learning_rate": 2.8524443942845567e-07,
      "loss": 0.1259,
      "step": 9270
    },
    {
      "epoch": 0.854194499470217,
      "grad_norm": 0.9265347955081413,
      "learning_rate": 2.848911724858755e-07,
      "loss": 0.1159,
      "step": 9271
    },
    {
      "epoch": 0.854286635647487,
      "grad_norm": 0.8711773749514531,
      "learning_rate": 2.8453811121638834e-07,
      "loss": 0.1004,
      "step": 9272
    },
    {
      "epoch": 0.854378771824757,
      "grad_norm": 0.9477565096775316,
      "learning_rate": 2.841852556527763e-07,
      "loss": 0.1227,
      "step": 9273
    },
    {
      "epoch": 0.854470908002027,
      "grad_norm": 0.9126296349395749,
      "learning_rate": 2.8383260582780206e-07,
      "loss": 0.1102,
      "step": 9274
    },
    {
      "epoch": 0.854563044179297,
      "grad_norm": 0.9525387630469448,
      "learning_rate": 2.8348016177420833e-07,
      "loss": 0.1181,
      "step": 9275
    },
    {
      "epoch": 0.854655180356567,
      "grad_norm": 0.9081397804878282,
      "learning_rate": 2.8312792352472003e-07,
      "loss": 0.1146,
      "step": 9276
    },
    {
      "epoch": 0.8547473165338371,
      "grad_norm": 0.9292285804423576,
      "learning_rate": 2.8277589111204315e-07,
      "loss": 0.1052,
      "step": 9277
    },
    {
      "epoch": 0.8548394527111071,
      "grad_norm": 0.957473562369141,
      "learning_rate": 2.824240645688628e-07,
      "loss": 0.1172,
      "step": 9278
    },
    {
      "epoch": 0.8549315888883771,
      "grad_norm": 0.9231740975710168,
      "learning_rate": 2.8207244392784715e-07,
      "loss": 0.1088,
      "step": 9279
    },
    {
      "epoch": 0.8550237250656471,
      "grad_norm": 0.9829296508725345,
      "learning_rate": 2.817210292216435e-07,
      "loss": 0.1206,
      "step": 9280
    },
    {
      "epoch": 0.855115861242917,
      "grad_norm": 0.9620000672282338,
      "learning_rate": 2.813698204828816e-07,
      "loss": 0.1142,
      "step": 9281
    },
    {
      "epoch": 0.855207997420187,
      "grad_norm": 0.9896587979379299,
      "learning_rate": 2.8101881774416975e-07,
      "loss": 0.1293,
      "step": 9282
    },
    {
      "epoch": 0.855300133597457,
      "grad_norm": 0.9736012672651115,
      "learning_rate": 2.806680210380999e-07,
      "loss": 0.116,
      "step": 9283
    },
    {
      "epoch": 0.855392269774727,
      "grad_norm": 0.946172637330825,
      "learning_rate": 2.8031743039724337e-07,
      "loss": 0.1172,
      "step": 9284
    },
    {
      "epoch": 0.855484405951997,
      "grad_norm": 0.9513361916546126,
      "learning_rate": 2.7996704585415227e-07,
      "loss": 0.1127,
      "step": 9285
    },
    {
      "epoch": 0.855576542129267,
      "grad_norm": 0.881332691957772,
      "learning_rate": 2.796168674413596e-07,
      "loss": 0.1091,
      "step": 9286
    },
    {
      "epoch": 0.855668678306537,
      "grad_norm": 0.9543852740058231,
      "learning_rate": 2.7926689519137963e-07,
      "loss": 0.1069,
      "step": 9287
    },
    {
      "epoch": 0.855760814483807,
      "grad_norm": 1.0190560153967547,
      "learning_rate": 2.7891712913670765e-07,
      "loss": 0.127,
      "step": 9288
    },
    {
      "epoch": 0.855852950661077,
      "grad_norm": 0.9276584149377606,
      "learning_rate": 2.785675693098194e-07,
      "loss": 0.1174,
      "step": 9289
    },
    {
      "epoch": 0.855945086838347,
      "grad_norm": 0.9138106971403834,
      "learning_rate": 2.782182157431718e-07,
      "loss": 0.1122,
      "step": 9290
    },
    {
      "epoch": 0.8560372230156171,
      "grad_norm": 0.9154240943613224,
      "learning_rate": 2.778690684692012e-07,
      "loss": 0.1131,
      "step": 9291
    },
    {
      "epoch": 0.8561293591928871,
      "grad_norm": 1.0292627495010502,
      "learning_rate": 2.7752012752032683e-07,
      "loss": 0.1357,
      "step": 9292
    },
    {
      "epoch": 0.8562214953701571,
      "grad_norm": 0.8692529164590608,
      "learning_rate": 2.7717139292894824e-07,
      "loss": 0.1097,
      "step": 9293
    },
    {
      "epoch": 0.8563136315474271,
      "grad_norm": 0.9380278752902587,
      "learning_rate": 2.768228647274446e-07,
      "loss": 0.1174,
      "step": 9294
    },
    {
      "epoch": 0.8564057677246971,
      "grad_norm": 0.9121772464230438,
      "learning_rate": 2.7647454294817773e-07,
      "loss": 0.1146,
      "step": 9295
    },
    {
      "epoch": 0.8564979039019671,
      "grad_norm": 0.9604678113327662,
      "learning_rate": 2.7612642762348844e-07,
      "loss": 0.1086,
      "step": 9296
    },
    {
      "epoch": 0.8565900400792371,
      "grad_norm": 0.9726017733465043,
      "learning_rate": 2.757785187857001e-07,
      "loss": 0.1143,
      "step": 9297
    },
    {
      "epoch": 0.8566821762565071,
      "grad_norm": 0.9420082236664085,
      "learning_rate": 2.7543081646711487e-07,
      "loss": 0.1122,
      "step": 9298
    },
    {
      "epoch": 0.8567743124337771,
      "grad_norm": 0.9700969708005628,
      "learning_rate": 2.7508332070001807e-07,
      "loss": 0.1133,
      "step": 9299
    },
    {
      "epoch": 0.8568664486110471,
      "grad_norm": 0.9530651641631397,
      "learning_rate": 2.74736031516675e-07,
      "loss": 0.1231,
      "step": 9300
    },
    {
      "epoch": 0.8569585847883171,
      "grad_norm": 0.9404951910508061,
      "learning_rate": 2.7438894894933013e-07,
      "loss": 0.125,
      "step": 9301
    },
    {
      "epoch": 0.8570507209655871,
      "grad_norm": 0.9157359729471527,
      "learning_rate": 2.7404207303021153e-07,
      "loss": 0.1151,
      "step": 9302
    },
    {
      "epoch": 0.8571428571428571,
      "grad_norm": 0.9539864507426042,
      "learning_rate": 2.736954037915254e-07,
      "loss": 0.1224,
      "step": 9303
    },
    {
      "epoch": 0.8572349933201272,
      "grad_norm": 1.0345955402987674,
      "learning_rate": 2.733489412654608e-07,
      "loss": 0.1293,
      "step": 9304
    },
    {
      "epoch": 0.8573271294973972,
      "grad_norm": 0.9601556995270799,
      "learning_rate": 2.730026854841869e-07,
      "loss": 0.1279,
      "step": 9305
    },
    {
      "epoch": 0.8574192656746672,
      "grad_norm": 0.9853873195926093,
      "learning_rate": 2.7265663647985357e-07,
      "loss": 0.127,
      "step": 9306
    },
    {
      "epoch": 0.8575114018519372,
      "grad_norm": 0.9441216337976664,
      "learning_rate": 2.723107942845907e-07,
      "loss": 0.1252,
      "step": 9307
    },
    {
      "epoch": 0.8576035380292072,
      "grad_norm": 0.9547990137449248,
      "learning_rate": 2.7196515893051003e-07,
      "loss": 0.1186,
      "step": 9308
    },
    {
      "epoch": 0.8576956742064772,
      "grad_norm": 0.9812925813959527,
      "learning_rate": 2.7161973044970453e-07,
      "loss": 0.1247,
      "step": 9309
    },
    {
      "epoch": 0.8577878103837472,
      "grad_norm": 0.9238639272845356,
      "learning_rate": 2.712745088742472e-07,
      "loss": 0.1087,
      "step": 9310
    },
    {
      "epoch": 0.8578799465610172,
      "grad_norm": 0.9477555453822856,
      "learning_rate": 2.7092949423619145e-07,
      "loss": 0.1201,
      "step": 9311
    },
    {
      "epoch": 0.8579720827382872,
      "grad_norm": 0.9510317673971791,
      "learning_rate": 2.7058468656757183e-07,
      "loss": 0.1166,
      "step": 9312
    },
    {
      "epoch": 0.8580642189155572,
      "grad_norm": 0.8710369385781217,
      "learning_rate": 2.702400859004037e-07,
      "loss": 0.108,
      "step": 9313
    },
    {
      "epoch": 0.8581563550928272,
      "grad_norm": 0.9460929888604528,
      "learning_rate": 2.698956922666843e-07,
      "loss": 0.1199,
      "step": 9314
    },
    {
      "epoch": 0.8582484912700972,
      "grad_norm": 0.9849952167511389,
      "learning_rate": 2.695515056983894e-07,
      "loss": 0.116,
      "step": 9315
    },
    {
      "epoch": 0.8583406274473672,
      "grad_norm": 0.9495136032256005,
      "learning_rate": 2.692075262274771e-07,
      "loss": 0.1138,
      "step": 9316
    },
    {
      "epoch": 0.8584327636246372,
      "grad_norm": 0.970959517015624,
      "learning_rate": 2.6886375388588656e-07,
      "loss": 0.125,
      "step": 9317
    },
    {
      "epoch": 0.8585248998019073,
      "grad_norm": 0.9827648328416002,
      "learning_rate": 2.6852018870553664e-07,
      "loss": 0.1359,
      "step": 9318
    },
    {
      "epoch": 0.8586170359791773,
      "grad_norm": 0.9965665810884955,
      "learning_rate": 2.6817683071832687e-07,
      "loss": 0.1345,
      "step": 9319
    },
    {
      "epoch": 0.8587091721564473,
      "grad_norm": 0.8961725861415342,
      "learning_rate": 2.6783367995613846e-07,
      "loss": 0.1088,
      "step": 9320
    },
    {
      "epoch": 0.8588013083337173,
      "grad_norm": 0.9104434301871034,
      "learning_rate": 2.6749073645083333e-07,
      "loss": 0.1125,
      "step": 9321
    },
    {
      "epoch": 0.8588934445109873,
      "grad_norm": 0.9244596919139304,
      "learning_rate": 2.6714800023425385e-07,
      "loss": 0.1189,
      "step": 9322
    },
    {
      "epoch": 0.8589855806882573,
      "grad_norm": 0.9368277789935434,
      "learning_rate": 2.668054713382229e-07,
      "loss": 0.1195,
      "step": 9323
    },
    {
      "epoch": 0.8590777168655273,
      "grad_norm": 0.9729852872130562,
      "learning_rate": 2.6646314979454386e-07,
      "loss": 0.1161,
      "step": 9324
    },
    {
      "epoch": 0.8591698530427972,
      "grad_norm": 0.947259066999539,
      "learning_rate": 2.6612103563500165e-07,
      "loss": 0.1269,
      "step": 9325
    },
    {
      "epoch": 0.8592619892200672,
      "grad_norm": 0.9575031330131318,
      "learning_rate": 2.657791288913622e-07,
      "loss": 0.1229,
      "step": 9326
    },
    {
      "epoch": 0.8593541253973372,
      "grad_norm": 0.8933964099773779,
      "learning_rate": 2.6543742959537074e-07,
      "loss": 0.1115,
      "step": 9327
    },
    {
      "epoch": 0.8594462615746072,
      "grad_norm": 0.880212005558546,
      "learning_rate": 2.650959377787549e-07,
      "loss": 0.1084,
      "step": 9328
    },
    {
      "epoch": 0.8595383977518772,
      "grad_norm": 0.9167841432310003,
      "learning_rate": 2.647546534732209e-07,
      "loss": 0.1102,
      "step": 9329
    },
    {
      "epoch": 0.8596305339291472,
      "grad_norm": 0.9309781613665132,
      "learning_rate": 2.6441357671045833e-07,
      "loss": 0.1158,
      "step": 9330
    },
    {
      "epoch": 0.8597226701064172,
      "grad_norm": 0.923428605833851,
      "learning_rate": 2.640727075221361e-07,
      "loss": 0.1164,
      "step": 9331
    },
    {
      "epoch": 0.8598148062836873,
      "grad_norm": 0.9617325314977327,
      "learning_rate": 2.637320459399031e-07,
      "loss": 0.1201,
      "step": 9332
    },
    {
      "epoch": 0.8599069424609573,
      "grad_norm": 0.8600354204904673,
      "learning_rate": 2.6339159199539085e-07,
      "loss": 0.1084,
      "step": 9333
    },
    {
      "epoch": 0.8599990786382273,
      "grad_norm": 0.9258377390351011,
      "learning_rate": 2.6305134572020943e-07,
      "loss": 0.1143,
      "step": 9334
    },
    {
      "epoch": 0.8600912148154973,
      "grad_norm": 0.9465789010041532,
      "learning_rate": 2.6271130714595164e-07,
      "loss": 0.1144,
      "step": 9335
    },
    {
      "epoch": 0.8601833509927673,
      "grad_norm": 0.8715165159103272,
      "learning_rate": 2.623714763041896e-07,
      "loss": 0.1077,
      "step": 9336
    },
    {
      "epoch": 0.8602754871700373,
      "grad_norm": 0.9303896406648642,
      "learning_rate": 2.620318532264765e-07,
      "loss": 0.1173,
      "step": 9337
    },
    {
      "epoch": 0.8603676233473073,
      "grad_norm": 0.9786268133601739,
      "learning_rate": 2.6169243794434725e-07,
      "loss": 0.1242,
      "step": 9338
    },
    {
      "epoch": 0.8604597595245773,
      "grad_norm": 0.9589918812170614,
      "learning_rate": 2.613532304893163e-07,
      "loss": 0.1232,
      "step": 9339
    },
    {
      "epoch": 0.8605518957018473,
      "grad_norm": 0.8785947502166375,
      "learning_rate": 2.610142308928779e-07,
      "loss": 0.1135,
      "step": 9340
    },
    {
      "epoch": 0.8606440318791173,
      "grad_norm": 0.9312433513423943,
      "learning_rate": 2.6067543918650935e-07,
      "loss": 0.1242,
      "step": 9341
    },
    {
      "epoch": 0.8607361680563873,
      "grad_norm": 0.9325694372347554,
      "learning_rate": 2.603368554016672e-07,
      "loss": 0.1129,
      "step": 9342
    },
    {
      "epoch": 0.8608283042336573,
      "grad_norm": 0.9469327317443748,
      "learning_rate": 2.5999847956978963e-07,
      "loss": 0.1158,
      "step": 9343
    },
    {
      "epoch": 0.8609204404109273,
      "grad_norm": 0.90638245153095,
      "learning_rate": 2.5966031172229427e-07,
      "loss": 0.1109,
      "step": 9344
    },
    {
      "epoch": 0.8610125765881974,
      "grad_norm": 0.9413146612471566,
      "learning_rate": 2.593223518905796e-07,
      "loss": 0.1157,
      "step": 9345
    },
    {
      "epoch": 0.8611047127654674,
      "grad_norm": 0.8941304858723512,
      "learning_rate": 2.589846001060259e-07,
      "loss": 0.1084,
      "step": 9346
    },
    {
      "epoch": 0.8611968489427374,
      "grad_norm": 0.9540972957902403,
      "learning_rate": 2.586470563999935e-07,
      "loss": 0.1254,
      "step": 9347
    },
    {
      "epoch": 0.8612889851200074,
      "grad_norm": 0.9197783560409639,
      "learning_rate": 2.5830972080382265e-07,
      "loss": 0.1179,
      "step": 9348
    },
    {
      "epoch": 0.8613811212972774,
      "grad_norm": 1.0135846500017713,
      "learning_rate": 2.5797259334883613e-07,
      "loss": 0.1269,
      "step": 9349
    },
    {
      "epoch": 0.8614732574745474,
      "grad_norm": 0.9956640673249679,
      "learning_rate": 2.5763567406633496e-07,
      "loss": 0.1274,
      "step": 9350
    },
    {
      "epoch": 0.8615653936518174,
      "grad_norm": 0.9982956385527713,
      "learning_rate": 2.5729896298760325e-07,
      "loss": 0.1256,
      "step": 9351
    },
    {
      "epoch": 0.8616575298290874,
      "grad_norm": 0.9997035981643813,
      "learning_rate": 2.569624601439039e-07,
      "loss": 0.1186,
      "step": 9352
    },
    {
      "epoch": 0.8617496660063574,
      "grad_norm": 0.9473032276378903,
      "learning_rate": 2.566261655664812e-07,
      "loss": 0.113,
      "step": 9353
    },
    {
      "epoch": 0.8618418021836274,
      "grad_norm": 0.8904282708574572,
      "learning_rate": 2.562900792865611e-07,
      "loss": 0.1014,
      "step": 9354
    },
    {
      "epoch": 0.8619339383608974,
      "grad_norm": 0.9421774910383477,
      "learning_rate": 2.5595420133534887e-07,
      "loss": 0.1127,
      "step": 9355
    },
    {
      "epoch": 0.8620260745381674,
      "grad_norm": 0.9437685397025191,
      "learning_rate": 2.5561853174402964e-07,
      "loss": 0.1104,
      "step": 9356
    },
    {
      "epoch": 0.8621182107154374,
      "grad_norm": 0.9365914283115228,
      "learning_rate": 2.5528307054377145e-07,
      "loss": 0.1077,
      "step": 9357
    },
    {
      "epoch": 0.8622103468927074,
      "grad_norm": 0.9285492507636135,
      "learning_rate": 2.549478177657219e-07,
      "loss": 0.1051,
      "step": 9358
    },
    {
      "epoch": 0.8623024830699775,
      "grad_norm": 0.9765891819348158,
      "learning_rate": 2.546127734410095e-07,
      "loss": 0.1252,
      "step": 9359
    },
    {
      "epoch": 0.8623946192472475,
      "grad_norm": 0.9287397592156162,
      "learning_rate": 2.542779376007426e-07,
      "loss": 0.119,
      "step": 9360
    },
    {
      "epoch": 0.8624867554245175,
      "grad_norm": 0.9004090598160094,
      "learning_rate": 2.5394331027601056e-07,
      "loss": 0.1024,
      "step": 9361
    },
    {
      "epoch": 0.8625788916017875,
      "grad_norm": 0.9722848299604637,
      "learning_rate": 2.5360889149788375e-07,
      "loss": 0.1241,
      "step": 9362
    },
    {
      "epoch": 0.8626710277790575,
      "grad_norm": 0.906785297368422,
      "learning_rate": 2.532746812974132e-07,
      "loss": 0.114,
      "step": 9363
    },
    {
      "epoch": 0.8627631639563275,
      "grad_norm": 0.9589773361342592,
      "learning_rate": 2.529406797056305e-07,
      "loss": 0.1213,
      "step": 9364
    },
    {
      "epoch": 0.8628553001335975,
      "grad_norm": 0.9966890284084031,
      "learning_rate": 2.5260688675354806e-07,
      "loss": 0.1229,
      "step": 9365
    },
    {
      "epoch": 0.8629474363108675,
      "grad_norm": 0.9667983213030846,
      "learning_rate": 2.5227330247215716e-07,
      "loss": 0.1208,
      "step": 9366
    },
    {
      "epoch": 0.8630395724881375,
      "grad_norm": 0.9291822525151221,
      "learning_rate": 2.519399268924322e-07,
      "loss": 0.1139,
      "step": 9367
    },
    {
      "epoch": 0.8631317086654074,
      "grad_norm": 0.8994909111600728,
      "learning_rate": 2.516067600453273e-07,
      "loss": 0.1034,
      "step": 9368
    },
    {
      "epoch": 0.8632238448426774,
      "grad_norm": 1.0207921145021992,
      "learning_rate": 2.5127380196177634e-07,
      "loss": 0.1284,
      "step": 9369
    },
    {
      "epoch": 0.8633159810199474,
      "grad_norm": 0.9177180668568711,
      "learning_rate": 2.509410526726952e-07,
      "loss": 0.1095,
      "step": 9370
    },
    {
      "epoch": 0.8634081171972174,
      "grad_norm": 0.8712347431050033,
      "learning_rate": 2.5060851220897906e-07,
      "loss": 0.1001,
      "step": 9371
    },
    {
      "epoch": 0.8635002533744875,
      "grad_norm": 0.9886160970135562,
      "learning_rate": 2.5027618060150526e-07,
      "loss": 0.1185,
      "step": 9372
    },
    {
      "epoch": 0.8635923895517575,
      "grad_norm": 0.9670625304491519,
      "learning_rate": 2.4994405788112933e-07,
      "loss": 0.1218,
      "step": 9373
    },
    {
      "epoch": 0.8636845257290275,
      "grad_norm": 0.9168572699846476,
      "learning_rate": 2.4961214407869e-07,
      "loss": 0.1081,
      "step": 9374
    },
    {
      "epoch": 0.8637766619062975,
      "grad_norm": 0.9599698651657775,
      "learning_rate": 2.492804392250059e-07,
      "loss": 0.1181,
      "step": 9375
    },
    {
      "epoch": 0.8638687980835675,
      "grad_norm": 0.9619307269182482,
      "learning_rate": 2.489489433508752e-07,
      "loss": 0.1295,
      "step": 9376
    },
    {
      "epoch": 0.8639609342608375,
      "grad_norm": 0.9576970010297813,
      "learning_rate": 2.486176564870768e-07,
      "loss": 0.1197,
      "step": 9377
    },
    {
      "epoch": 0.8640530704381075,
      "grad_norm": 1.0203235684877179,
      "learning_rate": 2.4828657866437123e-07,
      "loss": 0.1371,
      "step": 9378
    },
    {
      "epoch": 0.8641452066153775,
      "grad_norm": 0.9495758122219625,
      "learning_rate": 2.479557099134991e-07,
      "loss": 0.1164,
      "step": 9379
    },
    {
      "epoch": 0.8642373427926475,
      "grad_norm": 0.9214388616761735,
      "learning_rate": 2.4762505026518224e-07,
      "loss": 0.1094,
      "step": 9380
    },
    {
      "epoch": 0.8643294789699175,
      "grad_norm": 0.9413072214892965,
      "learning_rate": 2.4729459975012194e-07,
      "loss": 0.1158,
      "step": 9381
    },
    {
      "epoch": 0.8644216151471875,
      "grad_norm": 0.9354397693146485,
      "learning_rate": 2.46964358399e-07,
      "loss": 0.1136,
      "step": 9382
    },
    {
      "epoch": 0.8645137513244575,
      "grad_norm": 0.9088302027421223,
      "learning_rate": 2.4663432624247975e-07,
      "loss": 0.1124,
      "step": 9383
    },
    {
      "epoch": 0.8646058875017275,
      "grad_norm": 0.8911370384300263,
      "learning_rate": 2.4630450331120547e-07,
      "loss": 0.1108,
      "step": 9384
    },
    {
      "epoch": 0.8646980236789975,
      "grad_norm": 0.9630587035183339,
      "learning_rate": 2.4597488963579995e-07,
      "loss": 0.1162,
      "step": 9385
    },
    {
      "epoch": 0.8647901598562676,
      "grad_norm": 0.9209292229525733,
      "learning_rate": 2.4564548524686925e-07,
      "loss": 0.1106,
      "step": 9386
    },
    {
      "epoch": 0.8648822960335376,
      "grad_norm": 0.9432471817490239,
      "learning_rate": 2.4531629017499724e-07,
      "loss": 0.1097,
      "step": 9387
    },
    {
      "epoch": 0.8649744322108076,
      "grad_norm": 0.9825332338153762,
      "learning_rate": 2.449873044507503e-07,
      "loss": 0.1241,
      "step": 9388
    },
    {
      "epoch": 0.8650665683880776,
      "grad_norm": 0.9408846109600006,
      "learning_rate": 2.446585281046751e-07,
      "loss": 0.1127,
      "step": 9389
    },
    {
      "epoch": 0.8651587045653476,
      "grad_norm": 0.9401838784688298,
      "learning_rate": 2.443299611672981e-07,
      "loss": 0.1269,
      "step": 9390
    },
    {
      "epoch": 0.8652508407426176,
      "grad_norm": 0.9111818343603258,
      "learning_rate": 2.44001603669127e-07,
      "loss": 0.119,
      "step": 9391
    },
    {
      "epoch": 0.8653429769198876,
      "grad_norm": 0.906464655426303,
      "learning_rate": 2.4367345564065003e-07,
      "loss": 0.1097,
      "step": 9392
    },
    {
      "epoch": 0.8654351130971576,
      "grad_norm": 0.875938489337497,
      "learning_rate": 2.433455171123356e-07,
      "loss": 0.1069,
      "step": 9393
    },
    {
      "epoch": 0.8655272492744276,
      "grad_norm": 0.9003402477495523,
      "learning_rate": 2.4301778811463255e-07,
      "loss": 0.1077,
      "step": 9394
    },
    {
      "epoch": 0.8656193854516976,
      "grad_norm": 0.9109081886705044,
      "learning_rate": 2.426902686779706e-07,
      "loss": 0.1186,
      "step": 9395
    },
    {
      "epoch": 0.8657115216289676,
      "grad_norm": 0.9609017270753635,
      "learning_rate": 2.4236295883276e-07,
      "loss": 0.1208,
      "step": 9396
    },
    {
      "epoch": 0.8658036578062376,
      "grad_norm": 0.898910913025197,
      "learning_rate": 2.420358586093921e-07,
      "loss": 0.1047,
      "step": 9397
    },
    {
      "epoch": 0.8658957939835076,
      "grad_norm": 0.9206843573844598,
      "learning_rate": 2.4170896803823785e-07,
      "loss": 0.1115,
      "step": 9398
    },
    {
      "epoch": 0.8659879301607776,
      "grad_norm": 0.9272830235116014,
      "learning_rate": 2.4138228714964853e-07,
      "loss": 0.1091,
      "step": 9399
    },
    {
      "epoch": 0.8660800663380477,
      "grad_norm": 0.9547090967557088,
      "learning_rate": 2.4105581597395705e-07,
      "loss": 0.1232,
      "step": 9400
    },
    {
      "epoch": 0.8661722025153177,
      "grad_norm": 0.9480953903908746,
      "learning_rate": 2.4072955454147643e-07,
      "loss": 0.1203,
      "step": 9401
    },
    {
      "epoch": 0.8662643386925877,
      "grad_norm": 0.9798031746500194,
      "learning_rate": 2.4040350288249944e-07,
      "loss": 0.1242,
      "step": 9402
    },
    {
      "epoch": 0.8663564748698577,
      "grad_norm": 0.9522213276476387,
      "learning_rate": 2.400776610273006e-07,
      "loss": 0.1155,
      "step": 9403
    },
    {
      "epoch": 0.8664486110471277,
      "grad_norm": 0.9347048484577208,
      "learning_rate": 2.397520290061339e-07,
      "loss": 0.1121,
      "step": 9404
    },
    {
      "epoch": 0.8665407472243977,
      "grad_norm": 0.9501824992765463,
      "learning_rate": 2.394266068492351e-07,
      "loss": 0.1171,
      "step": 9405
    },
    {
      "epoch": 0.8666328834016677,
      "grad_norm": 0.8502324502165575,
      "learning_rate": 2.391013945868187e-07,
      "loss": 0.0941,
      "step": 9406
    },
    {
      "epoch": 0.8667250195789377,
      "grad_norm": 0.9133438904722627,
      "learning_rate": 2.38776392249081e-07,
      "loss": 0.1126,
      "step": 9407
    },
    {
      "epoch": 0.8668171557562077,
      "grad_norm": 0.9547356127125062,
      "learning_rate": 2.38451599866199e-07,
      "loss": 0.1272,
      "step": 9408
    },
    {
      "epoch": 0.8669092919334777,
      "grad_norm": 1.0557212654833275,
      "learning_rate": 2.381270174683295e-07,
      "loss": 0.1321,
      "step": 9409
    },
    {
      "epoch": 0.8670014281107477,
      "grad_norm": 0.9335280236927755,
      "learning_rate": 2.3780264508560942e-07,
      "loss": 0.1165,
      "step": 9410
    },
    {
      "epoch": 0.8670935642880176,
      "grad_norm": 0.9290477286297832,
      "learning_rate": 2.3747848274815716e-07,
      "loss": 0.1198,
      "step": 9411
    },
    {
      "epoch": 0.8671857004652876,
      "grad_norm": 0.9789407678799273,
      "learning_rate": 2.3715453048607118e-07,
      "loss": 0.1263,
      "step": 9412
    },
    {
      "epoch": 0.8672778366425578,
      "grad_norm": 1.0063340760093704,
      "learning_rate": 2.368307883294313e-07,
      "loss": 0.1116,
      "step": 9413
    },
    {
      "epoch": 0.8673699728198278,
      "grad_norm": 0.9504096460020395,
      "learning_rate": 2.3650725630829598e-07,
      "loss": 0.1236,
      "step": 9414
    },
    {
      "epoch": 0.8674621089970977,
      "grad_norm": 0.9909104584146814,
      "learning_rate": 2.3618393445270504e-07,
      "loss": 0.1241,
      "step": 9415
    },
    {
      "epoch": 0.8675542451743677,
      "grad_norm": 0.8738058725690909,
      "learning_rate": 2.3586082279267952e-07,
      "loss": 0.1042,
      "step": 9416
    },
    {
      "epoch": 0.8676463813516377,
      "grad_norm": 0.9207349471071707,
      "learning_rate": 2.355379213582204e-07,
      "loss": 0.121,
      "step": 9417
    },
    {
      "epoch": 0.8677385175289077,
      "grad_norm": 0.9308193808957346,
      "learning_rate": 2.3521523017930954e-07,
      "loss": 0.1124,
      "step": 9418
    },
    {
      "epoch": 0.8678306537061777,
      "grad_norm": 0.9365520666702065,
      "learning_rate": 2.3489274928590795e-07,
      "loss": 0.1225,
      "step": 9419
    },
    {
      "epoch": 0.8679227898834477,
      "grad_norm": 0.9492932580673579,
      "learning_rate": 2.3457047870795808e-07,
      "loss": 0.1151,
      "step": 9420
    },
    {
      "epoch": 0.8680149260607177,
      "grad_norm": 0.9490480593481834,
      "learning_rate": 2.3424841847538292e-07,
      "loss": 0.1168,
      "step": 9421
    },
    {
      "epoch": 0.8681070622379877,
      "grad_norm": 0.9449269641389587,
      "learning_rate": 2.3392656861808666e-07,
      "loss": 0.1193,
      "step": 9422
    },
    {
      "epoch": 0.8681991984152577,
      "grad_norm": 0.9233460585881546,
      "learning_rate": 2.3360492916595174e-07,
      "loss": 0.1207,
      "step": 9423
    },
    {
      "epoch": 0.8682913345925277,
      "grad_norm": 0.9503848489495152,
      "learning_rate": 2.332835001488437e-07,
      "loss": 0.1091,
      "step": 9424
    },
    {
      "epoch": 0.8683834707697977,
      "grad_norm": 0.9610048378704811,
      "learning_rate": 2.3296228159660594e-07,
      "loss": 0.125,
      "step": 9425
    },
    {
      "epoch": 0.8684756069470677,
      "grad_norm": 0.9498156086623913,
      "learning_rate": 2.3264127353906485e-07,
      "loss": 0.1147,
      "step": 9426
    },
    {
      "epoch": 0.8685677431243378,
      "grad_norm": 0.914848634914354,
      "learning_rate": 2.323204760060252e-07,
      "loss": 0.1096,
      "step": 9427
    },
    {
      "epoch": 0.8686598793016078,
      "grad_norm": 0.8394385416081372,
      "learning_rate": 2.3199988902727317e-07,
      "loss": 0.1005,
      "step": 9428
    },
    {
      "epoch": 0.8687520154788778,
      "grad_norm": 0.9696929167719233,
      "learning_rate": 2.3167951263257633e-07,
      "loss": 0.1304,
      "step": 9429
    },
    {
      "epoch": 0.8688441516561478,
      "grad_norm": 0.9503528872420482,
      "learning_rate": 2.313593468516806e-07,
      "loss": 0.1127,
      "step": 9430
    },
    {
      "epoch": 0.8689362878334178,
      "grad_norm": 0.9408948277709294,
      "learning_rate": 2.3103939171431305e-07,
      "loss": 0.1247,
      "step": 9431
    },
    {
      "epoch": 0.8690284240106878,
      "grad_norm": 0.9920825083534364,
      "learning_rate": 2.307196472501824e-07,
      "loss": 0.1306,
      "step": 9432
    },
    {
      "epoch": 0.8691205601879578,
      "grad_norm": 0.906482830614097,
      "learning_rate": 2.3040011348897689e-07,
      "loss": 0.1105,
      "step": 9433
    },
    {
      "epoch": 0.8692126963652278,
      "grad_norm": 0.9142460311821695,
      "learning_rate": 2.3008079046036525e-07,
      "loss": 0.115,
      "step": 9434
    },
    {
      "epoch": 0.8693048325424978,
      "grad_norm": 0.9322906020030624,
      "learning_rate": 2.2976167819399652e-07,
      "loss": 0.1076,
      "step": 9435
    },
    {
      "epoch": 0.8693969687197678,
      "grad_norm": 0.8997499764924649,
      "learning_rate": 2.294427767195001e-07,
      "loss": 0.1058,
      "step": 9436
    },
    {
      "epoch": 0.8694891048970378,
      "grad_norm": 0.9470817054742046,
      "learning_rate": 2.291240860664859e-07,
      "loss": 0.1161,
      "step": 9437
    },
    {
      "epoch": 0.8695812410743078,
      "grad_norm": 0.922169310738529,
      "learning_rate": 2.288056062645455e-07,
      "loss": 0.1101,
      "step": 9438
    },
    {
      "epoch": 0.8696733772515778,
      "grad_norm": 0.8836399103375951,
      "learning_rate": 2.2848733734324835e-07,
      "loss": 0.1008,
      "step": 9439
    },
    {
      "epoch": 0.8697655134288479,
      "grad_norm": 0.9790710542189891,
      "learning_rate": 2.281692793321469e-07,
      "loss": 0.1293,
      "step": 9440
    },
    {
      "epoch": 0.8698576496061179,
      "grad_norm": 0.9605080787153812,
      "learning_rate": 2.2785143226077166e-07,
      "loss": 0.1233,
      "step": 9441
    },
    {
      "epoch": 0.8699497857833879,
      "grad_norm": 0.9728766436641451,
      "learning_rate": 2.2753379615863575e-07,
      "loss": 0.1296,
      "step": 9442
    },
    {
      "epoch": 0.8700419219606579,
      "grad_norm": 0.9012599081009991,
      "learning_rate": 2.2721637105523193e-07,
      "loss": 0.1169,
      "step": 9443
    },
    {
      "epoch": 0.8701340581379279,
      "grad_norm": 0.9449224767636791,
      "learning_rate": 2.268991569800319e-07,
      "loss": 0.1167,
      "step": 9444
    },
    {
      "epoch": 0.8702261943151979,
      "grad_norm": 0.9397622392423732,
      "learning_rate": 2.2658215396249046e-07,
      "loss": 0.1224,
      "step": 9445
    },
    {
      "epoch": 0.8703183304924679,
      "grad_norm": 1.0206942614633827,
      "learning_rate": 2.2626536203204014e-07,
      "loss": 0.1238,
      "step": 9446
    },
    {
      "epoch": 0.8704104666697379,
      "grad_norm": 0.9508061328678594,
      "learning_rate": 2.2594878121809633e-07,
      "loss": 0.1181,
      "step": 9447
    },
    {
      "epoch": 0.8705026028470079,
      "grad_norm": 0.9776800478848374,
      "learning_rate": 2.2563241155005216e-07,
      "loss": 0.1227,
      "step": 9448
    },
    {
      "epoch": 0.8705947390242779,
      "grad_norm": 0.9499755485624733,
      "learning_rate": 2.2531625305728362e-07,
      "loss": 0.1176,
      "step": 9449
    },
    {
      "epoch": 0.8706868752015479,
      "grad_norm": 0.9626981166059405,
      "learning_rate": 2.2500030576914606e-07,
      "loss": 0.1041,
      "step": 9450
    },
    {
      "epoch": 0.8707790113788179,
      "grad_norm": 0.9322085622455839,
      "learning_rate": 2.2468456971497493e-07,
      "loss": 0.1215,
      "step": 9451
    },
    {
      "epoch": 0.8708711475560879,
      "grad_norm": 1.0594714922946018,
      "learning_rate": 2.2436904492408596e-07,
      "loss": 0.1197,
      "step": 9452
    },
    {
      "epoch": 0.8709632837333579,
      "grad_norm": 0.9755136585207467,
      "learning_rate": 2.2405373142577597e-07,
      "loss": 0.1261,
      "step": 9453
    },
    {
      "epoch": 0.871055419910628,
      "grad_norm": 0.9455054395345747,
      "learning_rate": 2.237386292493221e-07,
      "loss": 0.127,
      "step": 9454
    },
    {
      "epoch": 0.871147556087898,
      "grad_norm": 0.9658714454786119,
      "learning_rate": 2.2342373842398208e-07,
      "loss": 0.122,
      "step": 9455
    },
    {
      "epoch": 0.871239692265168,
      "grad_norm": 0.9632137472514127,
      "learning_rate": 2.2310905897899275e-07,
      "loss": 0.117,
      "step": 9456
    },
    {
      "epoch": 0.871331828442438,
      "grad_norm": 0.9449426092351376,
      "learning_rate": 2.227945909435719e-07,
      "loss": 0.1128,
      "step": 9457
    },
    {
      "epoch": 0.871423964619708,
      "grad_norm": 0.9567787882080494,
      "learning_rate": 2.224803343469184e-07,
      "loss": 0.1213,
      "step": 9458
    },
    {
      "epoch": 0.8715161007969779,
      "grad_norm": 0.9823985814029087,
      "learning_rate": 2.2216628921821138e-07,
      "loss": 0.1114,
      "step": 9459
    },
    {
      "epoch": 0.8716082369742479,
      "grad_norm": 0.9140140102629726,
      "learning_rate": 2.2185245558660918e-07,
      "loss": 0.1113,
      "step": 9460
    },
    {
      "epoch": 0.8717003731515179,
      "grad_norm": 1.0126259446458135,
      "learning_rate": 2.215388334812521e-07,
      "loss": 0.1237,
      "step": 9461
    },
    {
      "epoch": 0.8717925093287879,
      "grad_norm": 0.875242477779259,
      "learning_rate": 2.2122542293125883e-07,
      "loss": 0.1051,
      "step": 9462
    },
    {
      "epoch": 0.8718846455060579,
      "grad_norm": 0.9070067464486096,
      "learning_rate": 2.2091222396573104e-07,
      "loss": 0.105,
      "step": 9463
    },
    {
      "epoch": 0.8719767816833279,
      "grad_norm": 0.8634324183365186,
      "learning_rate": 2.20599236613748e-07,
      "loss": 0.1051,
      "step": 9464
    },
    {
      "epoch": 0.8720689178605979,
      "grad_norm": 0.9097237773646777,
      "learning_rate": 2.2028646090437117e-07,
      "loss": 0.1156,
      "step": 9465
    },
    {
      "epoch": 0.8721610540378679,
      "grad_norm": 0.9388952429509481,
      "learning_rate": 2.199738968666418e-07,
      "loss": 0.1137,
      "step": 9466
    },
    {
      "epoch": 0.8722531902151379,
      "grad_norm": 0.9697976669781796,
      "learning_rate": 2.1966154452958216e-07,
      "loss": 0.1247,
      "step": 9467
    },
    {
      "epoch": 0.872345326392408,
      "grad_norm": 0.8989941935550352,
      "learning_rate": 2.1934940392219272e-07,
      "loss": 0.1066,
      "step": 9468
    },
    {
      "epoch": 0.872437462569678,
      "grad_norm": 0.94116231002316,
      "learning_rate": 2.190374750734567e-07,
      "loss": 0.1197,
      "step": 9469
    },
    {
      "epoch": 0.872529598746948,
      "grad_norm": 0.9274780247234806,
      "learning_rate": 2.187257580123367e-07,
      "loss": 0.1091,
      "step": 9470
    },
    {
      "epoch": 0.872621734924218,
      "grad_norm": 0.9605073108855209,
      "learning_rate": 2.1841425276777544e-07,
      "loss": 0.1204,
      "step": 9471
    },
    {
      "epoch": 0.872713871101488,
      "grad_norm": 0.9908749541818052,
      "learning_rate": 2.1810295936869675e-07,
      "loss": 0.1285,
      "step": 9472
    },
    {
      "epoch": 0.872806007278758,
      "grad_norm": 0.9926019750513289,
      "learning_rate": 2.1779187784400385e-07,
      "loss": 0.1288,
      "step": 9473
    },
    {
      "epoch": 0.872898143456028,
      "grad_norm": 0.9483689313827183,
      "learning_rate": 2.1748100822258034e-07,
      "loss": 0.1141,
      "step": 9474
    },
    {
      "epoch": 0.872990279633298,
      "grad_norm": 0.9137883591856961,
      "learning_rate": 2.171703505332909e-07,
      "loss": 0.108,
      "step": 9475
    },
    {
      "epoch": 0.873082415810568,
      "grad_norm": 0.9370216914924847,
      "learning_rate": 2.1685990480498048e-07,
      "loss": 0.126,
      "step": 9476
    },
    {
      "epoch": 0.873174551987838,
      "grad_norm": 0.9391599852602918,
      "learning_rate": 2.1654967106647328e-07,
      "loss": 0.1134,
      "step": 9477
    },
    {
      "epoch": 0.873266688165108,
      "grad_norm": 0.9333893143260931,
      "learning_rate": 2.1623964934657516e-07,
      "loss": 0.1207,
      "step": 9478
    },
    {
      "epoch": 0.873358824342378,
      "grad_norm": 0.921403332616667,
      "learning_rate": 2.159298396740711e-07,
      "loss": 0.1184,
      "step": 9479
    },
    {
      "epoch": 0.873450960519648,
      "grad_norm": 0.989075346325913,
      "learning_rate": 2.1562024207772758e-07,
      "loss": 0.1221,
      "step": 9480
    },
    {
      "epoch": 0.8735430966969181,
      "grad_norm": 0.8899903434445905,
      "learning_rate": 2.1531085658628992e-07,
      "loss": 0.1073,
      "step": 9481
    },
    {
      "epoch": 0.8736352328741881,
      "grad_norm": 0.9766428519858893,
      "learning_rate": 2.1500168322848515e-07,
      "loss": 0.1219,
      "step": 9482
    },
    {
      "epoch": 0.8737273690514581,
      "grad_norm": 0.9650695272721265,
      "learning_rate": 2.1469272203302055e-07,
      "loss": 0.1207,
      "step": 9483
    },
    {
      "epoch": 0.8738195052287281,
      "grad_norm": 0.9154717048048135,
      "learning_rate": 2.143839730285824e-07,
      "loss": 0.1136,
      "step": 9484
    },
    {
      "epoch": 0.8739116414059981,
      "grad_norm": 0.9660021884154858,
      "learning_rate": 2.1407543624383798e-07,
      "loss": 0.1162,
      "step": 9485
    },
    {
      "epoch": 0.8740037775832681,
      "grad_norm": 0.9456680462862941,
      "learning_rate": 2.1376711170743553e-07,
      "loss": 0.1213,
      "step": 9486
    },
    {
      "epoch": 0.8740959137605381,
      "grad_norm": 0.9118077582257975,
      "learning_rate": 2.134589994480027e-07,
      "loss": 0.1139,
      "step": 9487
    },
    {
      "epoch": 0.8741880499378081,
      "grad_norm": 0.9085493885735575,
      "learning_rate": 2.1315109949414824e-07,
      "loss": 0.1191,
      "step": 9488
    },
    {
      "epoch": 0.8742801861150781,
      "grad_norm": 0.9387869250564302,
      "learning_rate": 2.1284341187446046e-07,
      "loss": 0.1219,
      "step": 9489
    },
    {
      "epoch": 0.8743723222923481,
      "grad_norm": 0.9152316051258936,
      "learning_rate": 2.1253593661750727e-07,
      "loss": 0.1052,
      "step": 9490
    },
    {
      "epoch": 0.8744644584696181,
      "grad_norm": 0.979761804929908,
      "learning_rate": 2.1222867375183893e-07,
      "loss": 0.1236,
      "step": 9491
    },
    {
      "epoch": 0.8745565946468881,
      "grad_norm": 0.931801417869082,
      "learning_rate": 2.1192162330598453e-07,
      "loss": 0.1145,
      "step": 9492
    },
    {
      "epoch": 0.8746487308241581,
      "grad_norm": 0.8688388184702385,
      "learning_rate": 2.1161478530845353e-07,
      "loss": 0.1095,
      "step": 9493
    },
    {
      "epoch": 0.8747408670014281,
      "grad_norm": 0.9228613560998604,
      "learning_rate": 2.1130815978773616e-07,
      "loss": 0.1261,
      "step": 9494
    },
    {
      "epoch": 0.8748330031786982,
      "grad_norm": 0.9595529954449742,
      "learning_rate": 2.1100174677230217e-07,
      "loss": 0.1235,
      "step": 9495
    },
    {
      "epoch": 0.8749251393559682,
      "grad_norm": 0.8906914567887639,
      "learning_rate": 2.1069554629060297e-07,
      "loss": 0.1102,
      "step": 9496
    },
    {
      "epoch": 0.8750172755332382,
      "grad_norm": 0.9461347234134713,
      "learning_rate": 2.10389558371068e-07,
      "loss": 0.1151,
      "step": 9497
    },
    {
      "epoch": 0.8751094117105082,
      "grad_norm": 0.9429358001436208,
      "learning_rate": 2.1008378304210876e-07,
      "loss": 0.1251,
      "step": 9498
    },
    {
      "epoch": 0.8752015478877782,
      "grad_norm": 0.9420068293333895,
      "learning_rate": 2.0977822033211748e-07,
      "loss": 0.1164,
      "step": 9499
    },
    {
      "epoch": 0.8752936840650481,
      "grad_norm": 1.0217294748310057,
      "learning_rate": 2.0947287026946428e-07,
      "loss": 0.1175,
      "step": 9500
    },
    {
      "epoch": 0.8752936840650481,
      "eval_loss": 0.1174582913517952,
      "eval_runtime": 299.0187,
      "eval_samples_per_second": 23.467,
      "eval_steps_per_second": 2.936,
      "step": 9500
    },
    {
      "epoch": 0.8753858202423181,
      "grad_norm": 0.8911898224450792,
      "learning_rate": 2.091677328825023e-07,
      "loss": 0.1057,
      "step": 9501
    },
    {
      "epoch": 0.8754779564195881,
      "grad_norm": 0.9062661157706274,
      "learning_rate": 2.0886280819956223e-07,
      "loss": 0.1122,
      "step": 9502
    },
    {
      "epoch": 0.8755700925968581,
      "grad_norm": 0.9721174742130575,
      "learning_rate": 2.0855809624895694e-07,
      "loss": 0.1268,
      "step": 9503
    },
    {
      "epoch": 0.8756622287741281,
      "grad_norm": 0.9541176980173214,
      "learning_rate": 2.082535970589794e-07,
      "loss": 0.1225,
      "step": 9504
    },
    {
      "epoch": 0.8757543649513981,
      "grad_norm": 0.9238750790195943,
      "learning_rate": 2.0794931065790226e-07,
      "loss": 0.1134,
      "step": 9505
    },
    {
      "epoch": 0.8758465011286681,
      "grad_norm": 0.9486934776566898,
      "learning_rate": 2.076452370739776e-07,
      "loss": 0.1185,
      "step": 9506
    },
    {
      "epoch": 0.8759386373059381,
      "grad_norm": 1.0458887278730715,
      "learning_rate": 2.0734137633543954e-07,
      "loss": 0.1309,
      "step": 9507
    },
    {
      "epoch": 0.8760307734832082,
      "grad_norm": 0.944357664608892,
      "learning_rate": 2.0703772847050136e-07,
      "loss": 0.1102,
      "step": 9508
    },
    {
      "epoch": 0.8761229096604782,
      "grad_norm": 0.9642165230298722,
      "learning_rate": 2.0673429350735742e-07,
      "loss": 0.1214,
      "step": 9509
    },
    {
      "epoch": 0.8762150458377482,
      "grad_norm": 0.8546678704665883,
      "learning_rate": 2.06431071474181e-07,
      "loss": 0.1051,
      "step": 9510
    },
    {
      "epoch": 0.8763071820150182,
      "grad_norm": 0.9556155038340977,
      "learning_rate": 2.0612806239912602e-07,
      "loss": 0.1133,
      "step": 9511
    },
    {
      "epoch": 0.8763993181922882,
      "grad_norm": 0.940205240213181,
      "learning_rate": 2.0582526631032745e-07,
      "loss": 0.1122,
      "step": 9512
    },
    {
      "epoch": 0.8764914543695582,
      "grad_norm": 0.9471384282662257,
      "learning_rate": 2.0552268323590002e-07,
      "loss": 0.1183,
      "step": 9513
    },
    {
      "epoch": 0.8765835905468282,
      "grad_norm": 0.9996225062721664,
      "learning_rate": 2.052203132039382e-07,
      "loss": 0.1233,
      "step": 9514
    },
    {
      "epoch": 0.8766757267240982,
      "grad_norm": 0.9260611802417887,
      "learning_rate": 2.0491815624251733e-07,
      "loss": 0.1184,
      "step": 9515
    },
    {
      "epoch": 0.8767678629013682,
      "grad_norm": 0.9071613612784006,
      "learning_rate": 2.046162123796927e-07,
      "loss": 0.1145,
      "step": 9516
    },
    {
      "epoch": 0.8768599990786382,
      "grad_norm": 0.9863259108007564,
      "learning_rate": 2.043144816434997e-07,
      "loss": 0.1326,
      "step": 9517
    },
    {
      "epoch": 0.8769521352559082,
      "grad_norm": 0.8971641251120323,
      "learning_rate": 2.0401296406195426e-07,
      "loss": 0.1136,
      "step": 9518
    },
    {
      "epoch": 0.8770442714331782,
      "grad_norm": 0.897002354512559,
      "learning_rate": 2.0371165966305173e-07,
      "loss": 0.1127,
      "step": 9519
    },
    {
      "epoch": 0.8771364076104482,
      "grad_norm": 0.9075910207315132,
      "learning_rate": 2.0341056847476947e-07,
      "loss": 0.1084,
      "step": 9520
    },
    {
      "epoch": 0.8772285437877182,
      "grad_norm": 0.9592976228033577,
      "learning_rate": 2.031096905250629e-07,
      "loss": 0.1112,
      "step": 9521
    },
    {
      "epoch": 0.8773206799649883,
      "grad_norm": 0.9076761866207034,
      "learning_rate": 2.0280902584186828e-07,
      "loss": 0.1172,
      "step": 9522
    },
    {
      "epoch": 0.8774128161422583,
      "grad_norm": 1.0029980746167146,
      "learning_rate": 2.02508574453103e-07,
      "loss": 0.1247,
      "step": 9523
    },
    {
      "epoch": 0.8775049523195283,
      "grad_norm": 0.9757391160570924,
      "learning_rate": 2.0220833638666393e-07,
      "loss": 0.1203,
      "step": 9524
    },
    {
      "epoch": 0.8775970884967983,
      "grad_norm": 0.943872802307776,
      "learning_rate": 2.0190831167042846e-07,
      "loss": 0.1152,
      "step": 9525
    },
    {
      "epoch": 0.8776892246740683,
      "grad_norm": 0.9287559296126232,
      "learning_rate": 2.016085003322535e-07,
      "loss": 0.1214,
      "step": 9526
    },
    {
      "epoch": 0.8777813608513383,
      "grad_norm": 0.9162111580851442,
      "learning_rate": 2.013089023999762e-07,
      "loss": 0.1146,
      "step": 9527
    },
    {
      "epoch": 0.8778734970286083,
      "grad_norm": 0.9129520056654986,
      "learning_rate": 2.010095179014146e-07,
      "loss": 0.1116,
      "step": 9528
    },
    {
      "epoch": 0.8779656332058783,
      "grad_norm": 0.9523827132430306,
      "learning_rate": 2.00710346864367e-07,
      "loss": 0.1279,
      "step": 9529
    },
    {
      "epoch": 0.8780577693831483,
      "grad_norm": 0.9521087669536956,
      "learning_rate": 2.0041138931661124e-07,
      "loss": 0.1249,
      "step": 9530
    },
    {
      "epoch": 0.8781499055604183,
      "grad_norm": 0.9189194425795829,
      "learning_rate": 2.0011264528590562e-07,
      "loss": 0.1194,
      "step": 9531
    },
    {
      "epoch": 0.8782420417376883,
      "grad_norm": 0.9487815314113962,
      "learning_rate": 1.9981411479998798e-07,
      "loss": 0.1085,
      "step": 9532
    },
    {
      "epoch": 0.8783341779149583,
      "grad_norm": 0.963493565405593,
      "learning_rate": 1.9951579788657748e-07,
      "loss": 0.1102,
      "step": 9533
    },
    {
      "epoch": 0.8784263140922283,
      "grad_norm": 0.9495227621628857,
      "learning_rate": 1.9921769457337286e-07,
      "loss": 0.1242,
      "step": 9534
    },
    {
      "epoch": 0.8785184502694983,
      "grad_norm": 0.9305475533838578,
      "learning_rate": 1.9891980488805278e-07,
      "loss": 0.114,
      "step": 9535
    },
    {
      "epoch": 0.8786105864467684,
      "grad_norm": 0.9693885781366122,
      "learning_rate": 1.986221288582768e-07,
      "loss": 0.1306,
      "step": 9536
    },
    {
      "epoch": 0.8787027226240384,
      "grad_norm": 0.9847652103702126,
      "learning_rate": 1.9832466651168337e-07,
      "loss": 0.1214,
      "step": 9537
    },
    {
      "epoch": 0.8787948588013084,
      "grad_norm": 0.8562607646907413,
      "learning_rate": 1.9802741787589258e-07,
      "loss": 0.1056,
      "step": 9538
    },
    {
      "epoch": 0.8788869949785784,
      "grad_norm": 0.9232649256601291,
      "learning_rate": 1.977303829785035e-07,
      "loss": 0.116,
      "step": 9539
    },
    {
      "epoch": 0.8789791311558484,
      "grad_norm": 0.9829657229119272,
      "learning_rate": 1.9743356184709628e-07,
      "loss": 0.1243,
      "step": 9540
    },
    {
      "epoch": 0.8790712673331184,
      "grad_norm": 0.9827295767927928,
      "learning_rate": 1.9713695450923054e-07,
      "loss": 0.129,
      "step": 9541
    },
    {
      "epoch": 0.8791634035103884,
      "grad_norm": 0.94977481989151,
      "learning_rate": 1.968405609924473e-07,
      "loss": 0.1198,
      "step": 9542
    },
    {
      "epoch": 0.8792555396876583,
      "grad_norm": 0.9891164225609158,
      "learning_rate": 1.9654438132426485e-07,
      "loss": 0.1231,
      "step": 9543
    },
    {
      "epoch": 0.8793476758649283,
      "grad_norm": 0.9193503040315959,
      "learning_rate": 1.9624841553218476e-07,
      "loss": 0.1083,
      "step": 9544
    },
    {
      "epoch": 0.8794398120421983,
      "grad_norm": 1.0548903153761093,
      "learning_rate": 1.9595266364368705e-07,
      "loss": 0.1317,
      "step": 9545
    },
    {
      "epoch": 0.8795319482194683,
      "grad_norm": 0.8884916257850111,
      "learning_rate": 1.9565712568623274e-07,
      "loss": 0.1113,
      "step": 9546
    },
    {
      "epoch": 0.8796240843967383,
      "grad_norm": 0.9715085221952553,
      "learning_rate": 1.9536180168726214e-07,
      "loss": 0.1127,
      "step": 9547
    },
    {
      "epoch": 0.8797162205740083,
      "grad_norm": 0.9821115353932246,
      "learning_rate": 1.9506669167419667e-07,
      "loss": 0.119,
      "step": 9548
    },
    {
      "epoch": 0.8798083567512784,
      "grad_norm": 0.9843575732122866,
      "learning_rate": 1.9477179567443632e-07,
      "loss": 0.1274,
      "step": 9549
    },
    {
      "epoch": 0.8799004929285484,
      "grad_norm": 0.9642181310696486,
      "learning_rate": 1.9447711371536365e-07,
      "loss": 0.1202,
      "step": 9550
    },
    {
      "epoch": 0.8799926291058184,
      "grad_norm": 1.0037179892947732,
      "learning_rate": 1.9418264582433844e-07,
      "loss": 0.1171,
      "step": 9551
    },
    {
      "epoch": 0.8800847652830884,
      "grad_norm": 0.9337763265065536,
      "learning_rate": 1.9388839202870268e-07,
      "loss": 0.1167,
      "step": 9552
    },
    {
      "epoch": 0.8801769014603584,
      "grad_norm": 0.8987444148210977,
      "learning_rate": 1.9359435235577818e-07,
      "loss": 0.1042,
      "step": 9553
    },
    {
      "epoch": 0.8802690376376284,
      "grad_norm": 0.9502015836724473,
      "learning_rate": 1.9330052683286666e-07,
      "loss": 0.1253,
      "step": 9554
    },
    {
      "epoch": 0.8803611738148984,
      "grad_norm": 0.93018808175646,
      "learning_rate": 1.930069154872488e-07,
      "loss": 0.1134,
      "step": 9555
    },
    {
      "epoch": 0.8804533099921684,
      "grad_norm": 0.9617089178446352,
      "learning_rate": 1.92713518346187e-07,
      "loss": 0.1213,
      "step": 9556
    },
    {
      "epoch": 0.8805454461694384,
      "grad_norm": 0.9348159689266482,
      "learning_rate": 1.9242033543692362e-07,
      "loss": 0.1185,
      "step": 9557
    },
    {
      "epoch": 0.8806375823467084,
      "grad_norm": 0.9509850882088483,
      "learning_rate": 1.9212736678668075e-07,
      "loss": 0.1214,
      "step": 9558
    },
    {
      "epoch": 0.8807297185239784,
      "grad_norm": 0.9288825792030714,
      "learning_rate": 1.9183461242266027e-07,
      "loss": 0.1142,
      "step": 9559
    },
    {
      "epoch": 0.8808218547012484,
      "grad_norm": 0.9294423662297463,
      "learning_rate": 1.9154207237204403e-07,
      "loss": 0.1147,
      "step": 9560
    },
    {
      "epoch": 0.8809139908785184,
      "grad_norm": 1.0171452491428885,
      "learning_rate": 1.9124974666199476e-07,
      "loss": 0.1299,
      "step": 9561
    },
    {
      "epoch": 0.8810061270557884,
      "grad_norm": 0.9263947519637243,
      "learning_rate": 1.909576353196549e-07,
      "loss": 0.1131,
      "step": 9562
    },
    {
      "epoch": 0.8810982632330585,
      "grad_norm": 0.8906518666115324,
      "learning_rate": 1.9066573837214773e-07,
      "loss": 0.113,
      "step": 9563
    },
    {
      "epoch": 0.8811903994103285,
      "grad_norm": 0.9373704014564624,
      "learning_rate": 1.90374055846575e-07,
      "loss": 0.117,
      "step": 9564
    },
    {
      "epoch": 0.8812825355875985,
      "grad_norm": 0.9669182368686189,
      "learning_rate": 1.9008258777001963e-07,
      "loss": 0.1208,
      "step": 9565
    },
    {
      "epoch": 0.8813746717648685,
      "grad_norm": 0.9085498077985098,
      "learning_rate": 1.8979133416954453e-07,
      "loss": 0.1116,
      "step": 9566
    },
    {
      "epoch": 0.8814668079421385,
      "grad_norm": 0.9457559872850566,
      "learning_rate": 1.8950029507219302e-07,
      "loss": 0.1252,
      "step": 9567
    },
    {
      "epoch": 0.8815589441194085,
      "grad_norm": 0.9720738745786429,
      "learning_rate": 1.8920947050498711e-07,
      "loss": 0.1207,
      "step": 9568
    },
    {
      "epoch": 0.8816510802966785,
      "grad_norm": 0.9886987730813052,
      "learning_rate": 1.889188604949313e-07,
      "loss": 0.1293,
      "step": 9569
    },
    {
      "epoch": 0.8817432164739485,
      "grad_norm": 0.8695655667801883,
      "learning_rate": 1.8862846506900762e-07,
      "loss": 0.1005,
      "step": 9570
    },
    {
      "epoch": 0.8818353526512185,
      "grad_norm": 0.9139453399544554,
      "learning_rate": 1.8833828425418006e-07,
      "loss": 0.1113,
      "step": 9571
    },
    {
      "epoch": 0.8819274888284885,
      "grad_norm": 0.9206137569411968,
      "learning_rate": 1.8804831807739094e-07,
      "loss": 0.1122,
      "step": 9572
    },
    {
      "epoch": 0.8820196250057585,
      "grad_norm": 0.9661739250181918,
      "learning_rate": 1.8775856656556458e-07,
      "loss": 0.118,
      "step": 9573
    },
    {
      "epoch": 0.8821117611830285,
      "grad_norm": 0.9716508124012724,
      "learning_rate": 1.8746902974560443e-07,
      "loss": 0.1242,
      "step": 9574
    },
    {
      "epoch": 0.8822038973602985,
      "grad_norm": 0.9133529948046311,
      "learning_rate": 1.8717970764439374e-07,
      "loss": 0.123,
      "step": 9575
    },
    {
      "epoch": 0.8822960335375686,
      "grad_norm": 0.9832013176200599,
      "learning_rate": 1.8689060028879602e-07,
      "loss": 0.1278,
      "step": 9576
    },
    {
      "epoch": 0.8823881697148386,
      "grad_norm": 0.8860667781542512,
      "learning_rate": 1.866017077056545e-07,
      "loss": 0.1193,
      "step": 9577
    },
    {
      "epoch": 0.8824803058921086,
      "grad_norm": 0.9641731545183843,
      "learning_rate": 1.8631302992179383e-07,
      "loss": 0.1184,
      "step": 9578
    },
    {
      "epoch": 0.8825724420693786,
      "grad_norm": 0.8761458096650978,
      "learning_rate": 1.860245669640176e-07,
      "loss": 0.1092,
      "step": 9579
    },
    {
      "epoch": 0.8826645782466486,
      "grad_norm": 0.9386085977402518,
      "learning_rate": 1.857363188591091e-07,
      "loss": 0.1111,
      "step": 9580
    },
    {
      "epoch": 0.8827567144239186,
      "grad_norm": 0.9293784733624652,
      "learning_rate": 1.8544828563383243e-07,
      "loss": 0.1079,
      "step": 9581
    },
    {
      "epoch": 0.8828488506011886,
      "grad_norm": 0.9749859909166957,
      "learning_rate": 1.8516046731493127e-07,
      "loss": 0.1275,
      "step": 9582
    },
    {
      "epoch": 0.8829409867784586,
      "grad_norm": 0.9022892040966464,
      "learning_rate": 1.848728639291303e-07,
      "loss": 0.1096,
      "step": 9583
    },
    {
      "epoch": 0.8830331229557286,
      "grad_norm": 0.9433013663273301,
      "learning_rate": 1.8458547550313287e-07,
      "loss": 0.1191,
      "step": 9584
    },
    {
      "epoch": 0.8831252591329986,
      "grad_norm": 0.8964672985576979,
      "learning_rate": 1.8429830206362325e-07,
      "loss": 0.1137,
      "step": 9585
    },
    {
      "epoch": 0.8832173953102685,
      "grad_norm": 0.9174450175751114,
      "learning_rate": 1.8401134363726536e-07,
      "loss": 0.1138,
      "step": 9586
    },
    {
      "epoch": 0.8833095314875385,
      "grad_norm": 0.8803209048806083,
      "learning_rate": 1.8372460025070343e-07,
      "loss": 0.1152,
      "step": 9587
    },
    {
      "epoch": 0.8834016676648085,
      "grad_norm": 0.9100598997602551,
      "learning_rate": 1.8343807193056201e-07,
      "loss": 0.1113,
      "step": 9588
    },
    {
      "epoch": 0.8834938038420785,
      "grad_norm": 0.8753907155320467,
      "learning_rate": 1.8315175870344455e-07,
      "loss": 0.1149,
      "step": 9589
    },
    {
      "epoch": 0.8835859400193486,
      "grad_norm": 0.9462524201593473,
      "learning_rate": 1.8286566059593615e-07,
      "loss": 0.1154,
      "step": 9590
    },
    {
      "epoch": 0.8836780761966186,
      "grad_norm": 0.9212877418915894,
      "learning_rate": 1.825797776346e-07,
      "loss": 0.1185,
      "step": 9591
    },
    {
      "epoch": 0.8837702123738886,
      "grad_norm": 0.946329462649158,
      "learning_rate": 1.8229410984598128e-07,
      "loss": 0.1243,
      "step": 9592
    },
    {
      "epoch": 0.8838623485511586,
      "grad_norm": 0.9357474153015102,
      "learning_rate": 1.820086572566035e-07,
      "loss": 0.1225,
      "step": 9593
    },
    {
      "epoch": 0.8839544847284286,
      "grad_norm": 0.9432417654817284,
      "learning_rate": 1.8172341989297154e-07,
      "loss": 0.1082,
      "step": 9594
    },
    {
      "epoch": 0.8840466209056986,
      "grad_norm": 0.9728284426637823,
      "learning_rate": 1.814383977815698e-07,
      "loss": 0.1082,
      "step": 9595
    },
    {
      "epoch": 0.8841387570829686,
      "grad_norm": 0.9521973139587481,
      "learning_rate": 1.8115359094886238e-07,
      "loss": 0.1237,
      "step": 9596
    },
    {
      "epoch": 0.8842308932602386,
      "grad_norm": 0.9412655233696202,
      "learning_rate": 1.808689994212931e-07,
      "loss": 0.1181,
      "step": 9597
    },
    {
      "epoch": 0.8843230294375086,
      "grad_norm": 0.9095779965593743,
      "learning_rate": 1.8058462322528698e-07,
      "loss": 0.1093,
      "step": 9598
    },
    {
      "epoch": 0.8844151656147786,
      "grad_norm": 0.9580227956852666,
      "learning_rate": 1.8030046238724814e-07,
      "loss": 0.1191,
      "step": 9599
    },
    {
      "epoch": 0.8845073017920486,
      "grad_norm": 0.9635109661247991,
      "learning_rate": 1.8001651693356131e-07,
      "loss": 0.1228,
      "step": 9600
    },
    {
      "epoch": 0.8845994379693186,
      "grad_norm": 0.8568137536842804,
      "learning_rate": 1.797327868905907e-07,
      "loss": 0.0991,
      "step": 9601
    },
    {
      "epoch": 0.8846915741465886,
      "grad_norm": 0.9547474068403831,
      "learning_rate": 1.7944927228467995e-07,
      "loss": 0.1168,
      "step": 9602
    },
    {
      "epoch": 0.8847837103238586,
      "grad_norm": 0.9584155615411788,
      "learning_rate": 1.791659731421541e-07,
      "loss": 0.1077,
      "step": 9603
    },
    {
      "epoch": 0.8848758465011287,
      "grad_norm": 0.928296303372466,
      "learning_rate": 1.7888288948931799e-07,
      "loss": 0.1165,
      "step": 9604
    },
    {
      "epoch": 0.8849679826783987,
      "grad_norm": 0.9114506107172179,
      "learning_rate": 1.786000213524547e-07,
      "loss": 0.1154,
      "step": 9605
    },
    {
      "epoch": 0.8850601188556687,
      "grad_norm": 0.9032049431949399,
      "learning_rate": 1.783173687578299e-07,
      "loss": 0.1077,
      "step": 9606
    },
    {
      "epoch": 0.8851522550329387,
      "grad_norm": 1.031588892107339,
      "learning_rate": 1.7803493173168679e-07,
      "loss": 0.1249,
      "step": 9607
    },
    {
      "epoch": 0.8852443912102087,
      "grad_norm": 0.9265771709711623,
      "learning_rate": 1.777527103002505e-07,
      "loss": 0.1197,
      "step": 9608
    },
    {
      "epoch": 0.8853365273874787,
      "grad_norm": 0.9544289104713,
      "learning_rate": 1.7747070448972475e-07,
      "loss": 0.1178,
      "step": 9609
    },
    {
      "epoch": 0.8854286635647487,
      "grad_norm": 0.9971281602223147,
      "learning_rate": 1.7718891432629392e-07,
      "loss": 0.1295,
      "step": 9610
    },
    {
      "epoch": 0.8855207997420187,
      "grad_norm": 0.9433321387081206,
      "learning_rate": 1.769073398361229e-07,
      "loss": 0.1169,
      "step": 9611
    },
    {
      "epoch": 0.8856129359192887,
      "grad_norm": 0.9677629402688539,
      "learning_rate": 1.7662598104535522e-07,
      "loss": 0.1211,
      "step": 9612
    },
    {
      "epoch": 0.8857050720965587,
      "grad_norm": 0.9972427061874779,
      "learning_rate": 1.7634483798011498e-07,
      "loss": 0.1217,
      "step": 9613
    },
    {
      "epoch": 0.8857972082738287,
      "grad_norm": 0.8859263153374594,
      "learning_rate": 1.760639106665063e-07,
      "loss": 0.1138,
      "step": 9614
    },
    {
      "epoch": 0.8858893444510987,
      "grad_norm": 0.9202629453001602,
      "learning_rate": 1.7578319913061387e-07,
      "loss": 0.1135,
      "step": 9615
    },
    {
      "epoch": 0.8859814806283687,
      "grad_norm": 0.9096230922774695,
      "learning_rate": 1.7550270339850212e-07,
      "loss": 0.108,
      "step": 9616
    },
    {
      "epoch": 0.8860736168056388,
      "grad_norm": 0.9214369389617435,
      "learning_rate": 1.7522242349621438e-07,
      "loss": 0.1213,
      "step": 9617
    },
    {
      "epoch": 0.8861657529829088,
      "grad_norm": 0.9458955777475399,
      "learning_rate": 1.7494235944977427e-07,
      "loss": 0.1252,
      "step": 9618
    },
    {
      "epoch": 0.8862578891601788,
      "grad_norm": 0.9374083870077097,
      "learning_rate": 1.7466251128518629e-07,
      "loss": 0.1088,
      "step": 9619
    },
    {
      "epoch": 0.8863500253374488,
      "grad_norm": 0.9111588907855831,
      "learning_rate": 1.7438287902843465e-07,
      "loss": 0.1051,
      "step": 9620
    },
    {
      "epoch": 0.8864421615147188,
      "grad_norm": 0.9315197242699069,
      "learning_rate": 1.7410346270548328e-07,
      "loss": 0.1175,
      "step": 9621
    },
    {
      "epoch": 0.8865342976919888,
      "grad_norm": 1.0174523449968809,
      "learning_rate": 1.7382426234227562e-07,
      "loss": 0.1309,
      "step": 9622
    },
    {
      "epoch": 0.8866264338692588,
      "grad_norm": 0.9663714692842666,
      "learning_rate": 1.735452779647351e-07,
      "loss": 0.1191,
      "step": 9623
    },
    {
      "epoch": 0.8867185700465288,
      "grad_norm": 0.9881143534895991,
      "learning_rate": 1.7326650959876595e-07,
      "loss": 0.1308,
      "step": 9624
    },
    {
      "epoch": 0.8868107062237988,
      "grad_norm": 0.9652210535066099,
      "learning_rate": 1.7298795727025226e-07,
      "loss": 0.1118,
      "step": 9625
    },
    {
      "epoch": 0.8869028424010688,
      "grad_norm": 0.9190988726721089,
      "learning_rate": 1.7270962100505688e-07,
      "loss": 0.1202,
      "step": 9626
    },
    {
      "epoch": 0.8869949785783388,
      "grad_norm": 0.9356727701411983,
      "learning_rate": 1.724315008290234e-07,
      "loss": 0.1208,
      "step": 9627
    },
    {
      "epoch": 0.8870871147556088,
      "grad_norm": 0.9449533031642502,
      "learning_rate": 1.7215359676797604e-07,
      "loss": 0.1204,
      "step": 9628
    },
    {
      "epoch": 0.8871792509328787,
      "grad_norm": 0.9224941964563907,
      "learning_rate": 1.7187590884771789e-07,
      "loss": 0.1137,
      "step": 9629
    },
    {
      "epoch": 0.8872713871101487,
      "grad_norm": 0.9184212287329622,
      "learning_rate": 1.7159843709403156e-07,
      "loss": 0.1184,
      "step": 9630
    },
    {
      "epoch": 0.8873635232874189,
      "grad_norm": 1.033680961033416,
      "learning_rate": 1.7132118153268097e-07,
      "loss": 0.1182,
      "step": 9631
    },
    {
      "epoch": 0.8874556594646889,
      "grad_norm": 0.925393325283817,
      "learning_rate": 1.7104414218940934e-07,
      "loss": 0.1161,
      "step": 9632
    },
    {
      "epoch": 0.8875477956419588,
      "grad_norm": 0.9012734623981898,
      "learning_rate": 1.7076731908994032e-07,
      "loss": 0.1063,
      "step": 9633
    },
    {
      "epoch": 0.8876399318192288,
      "grad_norm": 0.9821681211632581,
      "learning_rate": 1.704907122599761e-07,
      "loss": 0.1297,
      "step": 9634
    },
    {
      "epoch": 0.8877320679964988,
      "grad_norm": 0.937963156497377,
      "learning_rate": 1.7021432172519974e-07,
      "loss": 0.1123,
      "step": 9635
    },
    {
      "epoch": 0.8878242041737688,
      "grad_norm": 0.8937454742138694,
      "learning_rate": 1.6993814751127435e-07,
      "loss": 0.115,
      "step": 9636
    },
    {
      "epoch": 0.8879163403510388,
      "grad_norm": 0.8840091538217429,
      "learning_rate": 1.69662189643843e-07,
      "loss": 0.103,
      "step": 9637
    },
    {
      "epoch": 0.8880084765283088,
      "grad_norm": 0.9022781888770508,
      "learning_rate": 1.69386448148528e-07,
      "loss": 0.1093,
      "step": 9638
    },
    {
      "epoch": 0.8881006127055788,
      "grad_norm": 0.948615735169581,
      "learning_rate": 1.691109230509322e-07,
      "loss": 0.1155,
      "step": 9639
    },
    {
      "epoch": 0.8881927488828488,
      "grad_norm": 0.9448712983100227,
      "learning_rate": 1.6883561437663788e-07,
      "loss": 0.1164,
      "step": 9640
    },
    {
      "epoch": 0.8882848850601188,
      "grad_norm": 0.8863119815353013,
      "learning_rate": 1.6856052215120794e-07,
      "loss": 0.1115,
      "step": 9641
    },
    {
      "epoch": 0.8883770212373888,
      "grad_norm": 0.9719080592411924,
      "learning_rate": 1.682856464001839e-07,
      "loss": 0.1166,
      "step": 9642
    },
    {
      "epoch": 0.8884691574146588,
      "grad_norm": 0.8809419811084558,
      "learning_rate": 1.680109871490887e-07,
      "loss": 0.1115,
      "step": 9643
    },
    {
      "epoch": 0.8885612935919289,
      "grad_norm": 0.9303992924384262,
      "learning_rate": 1.6773654442342468e-07,
      "loss": 0.1105,
      "step": 9644
    },
    {
      "epoch": 0.8886534297691989,
      "grad_norm": 1.0080826201157422,
      "learning_rate": 1.6746231824867316e-07,
      "loss": 0.1284,
      "step": 9645
    },
    {
      "epoch": 0.8887455659464689,
      "grad_norm": 0.9524532559452373,
      "learning_rate": 1.671883086502968e-07,
      "loss": 0.1139,
      "step": 9646
    },
    {
      "epoch": 0.8888377021237389,
      "grad_norm": 0.9658574906611174,
      "learning_rate": 1.669145156537366e-07,
      "loss": 0.1339,
      "step": 9647
    },
    {
      "epoch": 0.8889298383010089,
      "grad_norm": 0.9751522991921692,
      "learning_rate": 1.6664093928441456e-07,
      "loss": 0.1203,
      "step": 9648
    },
    {
      "epoch": 0.8890219744782789,
      "grad_norm": 0.951320518261064,
      "learning_rate": 1.6636757956773302e-07,
      "loss": 0.12,
      "step": 9649
    },
    {
      "epoch": 0.8891141106555489,
      "grad_norm": 0.9416676468741589,
      "learning_rate": 1.6609443652907287e-07,
      "loss": 0.1227,
      "step": 9650
    },
    {
      "epoch": 0.8892062468328189,
      "grad_norm": 1.0030344928799633,
      "learning_rate": 1.6582151019379517e-07,
      "loss": 0.1299,
      "step": 9651
    },
    {
      "epoch": 0.8892983830100889,
      "grad_norm": 0.9258602981126589,
      "learning_rate": 1.655488005872413e-07,
      "loss": 0.1126,
      "step": 9652
    },
    {
      "epoch": 0.8893905191873589,
      "grad_norm": 0.9424174799317092,
      "learning_rate": 1.6527630773473248e-07,
      "loss": 0.1178,
      "step": 9653
    },
    {
      "epoch": 0.8894826553646289,
      "grad_norm": 0.9481218238285717,
      "learning_rate": 1.650040316615703e-07,
      "loss": 0.1246,
      "step": 9654
    },
    {
      "epoch": 0.8895747915418989,
      "grad_norm": 0.9605790380659346,
      "learning_rate": 1.647319723930349e-07,
      "loss": 0.124,
      "step": 9655
    },
    {
      "epoch": 0.8896669277191689,
      "grad_norm": 0.9472833340946128,
      "learning_rate": 1.6446012995438688e-07,
      "loss": 0.1252,
      "step": 9656
    },
    {
      "epoch": 0.8897590638964389,
      "grad_norm": 0.9887156441835274,
      "learning_rate": 1.6418850437086715e-07,
      "loss": 0.1311,
      "step": 9657
    },
    {
      "epoch": 0.889851200073709,
      "grad_norm": 0.9639073024387345,
      "learning_rate": 1.6391709566769664e-07,
      "loss": 0.1232,
      "step": 9658
    },
    {
      "epoch": 0.889943336250979,
      "grad_norm": 0.8845699968934604,
      "learning_rate": 1.6364590387007468e-07,
      "loss": 0.1053,
      "step": 9659
    },
    {
      "epoch": 0.890035472428249,
      "grad_norm": 0.9467171501941805,
      "learning_rate": 1.6337492900318246e-07,
      "loss": 0.1118,
      "step": 9660
    },
    {
      "epoch": 0.890127608605519,
      "grad_norm": 0.8784549110877983,
      "learning_rate": 1.6310417109217906e-07,
      "loss": 0.11,
      "step": 9661
    },
    {
      "epoch": 0.890219744782789,
      "grad_norm": 0.9112414536641252,
      "learning_rate": 1.6283363016220548e-07,
      "loss": 0.1077,
      "step": 9662
    },
    {
      "epoch": 0.890311880960059,
      "grad_norm": 0.9555138237898598,
      "learning_rate": 1.6256330623838024e-07,
      "loss": 0.126,
      "step": 9663
    },
    {
      "epoch": 0.890404017137329,
      "grad_norm": 0.9146987594311619,
      "learning_rate": 1.6229319934580378e-07,
      "loss": 0.108,
      "step": 9664
    },
    {
      "epoch": 0.890496153314599,
      "grad_norm": 0.9870921357946368,
      "learning_rate": 1.6202330950955552e-07,
      "loss": 0.1216,
      "step": 9665
    },
    {
      "epoch": 0.890588289491869,
      "grad_norm": 0.9640885538855887,
      "learning_rate": 1.6175363675469485e-07,
      "loss": 0.1239,
      "step": 9666
    },
    {
      "epoch": 0.890680425669139,
      "grad_norm": 0.9317904692179457,
      "learning_rate": 1.6148418110626008e-07,
      "loss": 0.1171,
      "step": 9667
    },
    {
      "epoch": 0.890772561846409,
      "grad_norm": 0.9345846725693181,
      "learning_rate": 1.612149425892709e-07,
      "loss": 0.1137,
      "step": 9668
    },
    {
      "epoch": 0.890864698023679,
      "grad_norm": 0.9183218187820292,
      "learning_rate": 1.6094592122872594e-07,
      "loss": 0.1142,
      "step": 9669
    },
    {
      "epoch": 0.890956834200949,
      "grad_norm": 0.9700195116573087,
      "learning_rate": 1.6067711704960408e-07,
      "loss": 0.1146,
      "step": 9670
    },
    {
      "epoch": 0.8910489703782191,
      "grad_norm": 0.9380884900362914,
      "learning_rate": 1.60408530076864e-07,
      "loss": 0.1258,
      "step": 9671
    },
    {
      "epoch": 0.8911411065554891,
      "grad_norm": 0.9505680280600464,
      "learning_rate": 1.6014016033544329e-07,
      "loss": 0.1125,
      "step": 9672
    },
    {
      "epoch": 0.8912332427327591,
      "grad_norm": 0.9153781576364745,
      "learning_rate": 1.5987200785026024e-07,
      "loss": 0.1134,
      "step": 9673
    },
    {
      "epoch": 0.891325378910029,
      "grad_norm": 0.9722948079778472,
      "learning_rate": 1.5960407264621335e-07,
      "loss": 0.1234,
      "step": 9674
    },
    {
      "epoch": 0.891417515087299,
      "grad_norm": 0.9108011291797209,
      "learning_rate": 1.5933635474818048e-07,
      "loss": 0.1207,
      "step": 9675
    },
    {
      "epoch": 0.891509651264569,
      "grad_norm": 0.9460370912653332,
      "learning_rate": 1.5906885418101897e-07,
      "loss": 0.1169,
      "step": 9676
    },
    {
      "epoch": 0.891601787441839,
      "grad_norm": 0.9385372386749276,
      "learning_rate": 1.588015709695659e-07,
      "loss": 0.1155,
      "step": 9677
    },
    {
      "epoch": 0.891693923619109,
      "grad_norm": 0.9241971228937185,
      "learning_rate": 1.5853450513863887e-07,
      "loss": 0.112,
      "step": 9678
    },
    {
      "epoch": 0.891786059796379,
      "grad_norm": 0.8890910453397396,
      "learning_rate": 1.582676567130356e-07,
      "loss": 0.1149,
      "step": 9679
    },
    {
      "epoch": 0.891878195973649,
      "grad_norm": 0.9725011168763831,
      "learning_rate": 1.5800102571753185e-07,
      "loss": 0.1197,
      "step": 9680
    },
    {
      "epoch": 0.891970332150919,
      "grad_norm": 0.9501294692501856,
      "learning_rate": 1.5773461217688552e-07,
      "loss": 0.118,
      "step": 9681
    },
    {
      "epoch": 0.892062468328189,
      "grad_norm": 0.9353526816347267,
      "learning_rate": 1.5746841611583185e-07,
      "loss": 0.1155,
      "step": 9682
    },
    {
      "epoch": 0.892154604505459,
      "grad_norm": 0.8727846936420787,
      "learning_rate": 1.572024375590883e-07,
      "loss": 0.1078,
      "step": 9683
    },
    {
      "epoch": 0.892246740682729,
      "grad_norm": 0.9458301911588138,
      "learning_rate": 1.5693667653135043e-07,
      "loss": 0.1154,
      "step": 9684
    },
    {
      "epoch": 0.8923388768599991,
      "grad_norm": 0.9325030933389573,
      "learning_rate": 1.56671133057294e-07,
      "loss": 0.1133,
      "step": 9685
    },
    {
      "epoch": 0.8924310130372691,
      "grad_norm": 0.9620657518647695,
      "learning_rate": 1.5640580716157566e-07,
      "loss": 0.1101,
      "step": 9686
    },
    {
      "epoch": 0.8925231492145391,
      "grad_norm": 0.886768718834908,
      "learning_rate": 1.5614069886883021e-07,
      "loss": 0.1093,
      "step": 9687
    },
    {
      "epoch": 0.8926152853918091,
      "grad_norm": 1.0028608248597497,
      "learning_rate": 1.5587580820367294e-07,
      "loss": 0.1294,
      "step": 9688
    },
    {
      "epoch": 0.8927074215690791,
      "grad_norm": 0.9258946821905417,
      "learning_rate": 1.5561113519069887e-07,
      "loss": 0.1232,
      "step": 9689
    },
    {
      "epoch": 0.8927995577463491,
      "grad_norm": 0.8868177500137526,
      "learning_rate": 1.5534667985448336e-07,
      "loss": 0.1154,
      "step": 9690
    },
    {
      "epoch": 0.8928916939236191,
      "grad_norm": 0.9208808651503403,
      "learning_rate": 1.5508244221958125e-07,
      "loss": 0.1114,
      "step": 9691
    },
    {
      "epoch": 0.8929838301008891,
      "grad_norm": 0.9465660393051565,
      "learning_rate": 1.5481842231052702e-07,
      "loss": 0.1236,
      "step": 9692
    },
    {
      "epoch": 0.8930759662781591,
      "grad_norm": 0.8850488686636715,
      "learning_rate": 1.5455462015183388e-07,
      "loss": 0.0956,
      "step": 9693
    },
    {
      "epoch": 0.8931681024554291,
      "grad_norm": 0.9198730188731642,
      "learning_rate": 1.5429103576799692e-07,
      "loss": 0.1096,
      "step": 9694
    },
    {
      "epoch": 0.8932602386326991,
      "grad_norm": 0.9843260253433014,
      "learning_rate": 1.540276691834902e-07,
      "loss": 0.1274,
      "step": 9695
    },
    {
      "epoch": 0.8933523748099691,
      "grad_norm": 1.0059626536122228,
      "learning_rate": 1.537645204227664e-07,
      "loss": 0.1311,
      "step": 9696
    },
    {
      "epoch": 0.8934445109872391,
      "grad_norm": 0.980951404503035,
      "learning_rate": 1.5350158951025957e-07,
      "loss": 0.1183,
      "step": 9697
    },
    {
      "epoch": 0.8935366471645091,
      "grad_norm": 0.9806529946895199,
      "learning_rate": 1.5323887647038266e-07,
      "loss": 0.1249,
      "step": 9698
    },
    {
      "epoch": 0.8936287833417792,
      "grad_norm": 0.9610970927025647,
      "learning_rate": 1.5297638132752867e-07,
      "loss": 0.1138,
      "step": 9699
    },
    {
      "epoch": 0.8937209195190492,
      "grad_norm": 0.9565492528451189,
      "learning_rate": 1.5271410410607008e-07,
      "loss": 0.1103,
      "step": 9700
    },
    {
      "epoch": 0.8938130556963192,
      "grad_norm": 0.9128992345808833,
      "learning_rate": 1.5245204483035958e-07,
      "loss": 0.1194,
      "step": 9701
    },
    {
      "epoch": 0.8939051918735892,
      "grad_norm": 0.9813926968534401,
      "learning_rate": 1.5219020352472914e-07,
      "loss": 0.1208,
      "step": 9702
    },
    {
      "epoch": 0.8939973280508592,
      "grad_norm": 0.947509072446224,
      "learning_rate": 1.519285802134915e-07,
      "loss": 0.113,
      "step": 9703
    },
    {
      "epoch": 0.8940894642281292,
      "grad_norm": 0.9932594914438546,
      "learning_rate": 1.5166717492093808e-07,
      "loss": 0.1217,
      "step": 9704
    },
    {
      "epoch": 0.8941816004053992,
      "grad_norm": 0.892243585536645,
      "learning_rate": 1.5140598767133947e-07,
      "loss": 0.1104,
      "step": 9705
    },
    {
      "epoch": 0.8942737365826692,
      "grad_norm": 0.9090227640422955,
      "learning_rate": 1.5114501848894792e-07,
      "loss": 0.1129,
      "step": 9706
    },
    {
      "epoch": 0.8943658727599392,
      "grad_norm": 0.9667171942105707,
      "learning_rate": 1.5088426739799405e-07,
      "loss": 0.1195,
      "step": 9707
    },
    {
      "epoch": 0.8944580089372092,
      "grad_norm": 0.9372614582454015,
      "learning_rate": 1.5062373442268908e-07,
      "loss": 0.1222,
      "step": 9708
    },
    {
      "epoch": 0.8945501451144792,
      "grad_norm": 0.933453218296901,
      "learning_rate": 1.5036341958722334e-07,
      "loss": 0.1151,
      "step": 9709
    },
    {
      "epoch": 0.8946422812917492,
      "grad_norm": 0.9340782377512039,
      "learning_rate": 1.501033229157667e-07,
      "loss": 0.1205,
      "step": 9710
    },
    {
      "epoch": 0.8947344174690192,
      "grad_norm": 0.9239264734069119,
      "learning_rate": 1.4984344443246924e-07,
      "loss": 0.1126,
      "step": 9711
    },
    {
      "epoch": 0.8948265536462893,
      "grad_norm": 0.9349919081011316,
      "learning_rate": 1.495837841614614e-07,
      "loss": 0.0968,
      "step": 9712
    },
    {
      "epoch": 0.8949186898235593,
      "grad_norm": 0.9218295205011691,
      "learning_rate": 1.493243421268517e-07,
      "loss": 0.1116,
      "step": 9713
    },
    {
      "epoch": 0.8950108260008293,
      "grad_norm": 0.9371288620444782,
      "learning_rate": 1.4906511835273003e-07,
      "loss": 0.1048,
      "step": 9714
    },
    {
      "epoch": 0.8951029621780993,
      "grad_norm": 0.9444301518399142,
      "learning_rate": 1.4880611286316487e-07,
      "loss": 0.122,
      "step": 9715
    },
    {
      "epoch": 0.8951950983553693,
      "grad_norm": 0.9990493560331191,
      "learning_rate": 1.4854732568220566e-07,
      "loss": 0.1316,
      "step": 9716
    },
    {
      "epoch": 0.8952872345326393,
      "grad_norm": 0.9358320327837656,
      "learning_rate": 1.4828875683387977e-07,
      "loss": 0.1169,
      "step": 9717
    },
    {
      "epoch": 0.8953793707099093,
      "grad_norm": 0.9190529344035475,
      "learning_rate": 1.4803040634219612e-07,
      "loss": 0.1094,
      "step": 9718
    },
    {
      "epoch": 0.8954715068871792,
      "grad_norm": 0.9687233614559736,
      "learning_rate": 1.4777227423114271e-07,
      "loss": 0.12,
      "step": 9719
    },
    {
      "epoch": 0.8955636430644492,
      "grad_norm": 1.0284364178633627,
      "learning_rate": 1.4751436052468677e-07,
      "loss": 0.1258,
      "step": 9720
    },
    {
      "epoch": 0.8956557792417192,
      "grad_norm": 0.87744147112822,
      "learning_rate": 1.4725666524677496e-07,
      "loss": 0.1086,
      "step": 9721
    },
    {
      "epoch": 0.8957479154189892,
      "grad_norm": 0.9439290320132014,
      "learning_rate": 1.4699918842133536e-07,
      "loss": 0.1211,
      "step": 9722
    },
    {
      "epoch": 0.8958400515962592,
      "grad_norm": 0.8633906867334843,
      "learning_rate": 1.4674193007227416e-07,
      "loss": 0.1035,
      "step": 9723
    },
    {
      "epoch": 0.8959321877735292,
      "grad_norm": 0.9399171676121051,
      "learning_rate": 1.464848902234786e-07,
      "loss": 0.1188,
      "step": 9724
    },
    {
      "epoch": 0.8960243239507992,
      "grad_norm": 1.200021321856677,
      "learning_rate": 1.4622806889881407e-07,
      "loss": 0.1209,
      "step": 9725
    },
    {
      "epoch": 0.8961164601280693,
      "grad_norm": 0.9660581201978683,
      "learning_rate": 1.4597146612212622e-07,
      "loss": 0.1165,
      "step": 9726
    },
    {
      "epoch": 0.8962085963053393,
      "grad_norm": 0.9637032682306422,
      "learning_rate": 1.457150819172412e-07,
      "loss": 0.123,
      "step": 9727
    },
    {
      "epoch": 0.8963007324826093,
      "grad_norm": 0.9216934942341704,
      "learning_rate": 1.454589163079645e-07,
      "loss": 0.1088,
      "step": 9728
    },
    {
      "epoch": 0.8963928686598793,
      "grad_norm": 0.8845350904114487,
      "learning_rate": 1.4520296931808064e-07,
      "loss": 0.112,
      "step": 9729
    },
    {
      "epoch": 0.8964850048371493,
      "grad_norm": 0.9438486506585017,
      "learning_rate": 1.449472409713548e-07,
      "loss": 0.1236,
      "step": 9730
    },
    {
      "epoch": 0.8965771410144193,
      "grad_norm": 0.957677136788932,
      "learning_rate": 1.4469173129153052e-07,
      "loss": 0.1181,
      "step": 9731
    },
    {
      "epoch": 0.8966692771916893,
      "grad_norm": 0.9933864337772672,
      "learning_rate": 1.4443644030233268e-07,
      "loss": 0.1305,
      "step": 9732
    },
    {
      "epoch": 0.8967614133689593,
      "grad_norm": 0.8999166286380204,
      "learning_rate": 1.4418136802746507e-07,
      "loss": 0.1068,
      "step": 9733
    },
    {
      "epoch": 0.8968535495462293,
      "grad_norm": 0.9631857910045679,
      "learning_rate": 1.4392651449061075e-07,
      "loss": 0.1221,
      "step": 9734
    },
    {
      "epoch": 0.8969456857234993,
      "grad_norm": 0.8684313853698533,
      "learning_rate": 1.4367187971543352e-07,
      "loss": 0.1085,
      "step": 9735
    },
    {
      "epoch": 0.8970378219007693,
      "grad_norm": 0.8674388014206332,
      "learning_rate": 1.434174637255753e-07,
      "loss": 0.0992,
      "step": 9736
    },
    {
      "epoch": 0.8971299580780393,
      "grad_norm": 0.9874178781613823,
      "learning_rate": 1.4316326654465972e-07,
      "loss": 0.128,
      "step": 9737
    },
    {
      "epoch": 0.8972220942553093,
      "grad_norm": 0.9477060570555671,
      "learning_rate": 1.429092881962882e-07,
      "loss": 0.1228,
      "step": 9738
    },
    {
      "epoch": 0.8973142304325794,
      "grad_norm": 0.9149830735214507,
      "learning_rate": 1.4265552870404265e-07,
      "loss": 0.1189,
      "step": 9739
    },
    {
      "epoch": 0.8974063666098494,
      "grad_norm": 0.9544319608703057,
      "learning_rate": 1.4240198809148537e-07,
      "loss": 0.1261,
      "step": 9740
    },
    {
      "epoch": 0.8974985027871194,
      "grad_norm": 0.9299058821822609,
      "learning_rate": 1.421486663821575e-07,
      "loss": 0.1085,
      "step": 9741
    },
    {
      "epoch": 0.8975906389643894,
      "grad_norm": 0.9000451973033925,
      "learning_rate": 1.4189556359957917e-07,
      "loss": 0.1145,
      "step": 9742
    },
    {
      "epoch": 0.8976827751416594,
      "grad_norm": 0.9255410508309111,
      "learning_rate": 1.4164267976725154e-07,
      "loss": 0.1196,
      "step": 9743
    },
    {
      "epoch": 0.8977749113189294,
      "grad_norm": 0.9849259768868959,
      "learning_rate": 1.41390014908655e-07,
      "loss": 0.1232,
      "step": 9744
    },
    {
      "epoch": 0.8978670474961994,
      "grad_norm": 0.9322779341663346,
      "learning_rate": 1.4113756904724967e-07,
      "loss": 0.1113,
      "step": 9745
    },
    {
      "epoch": 0.8979591836734694,
      "grad_norm": 0.9525263463078988,
      "learning_rate": 1.4088534220647487e-07,
      "loss": 0.124,
      "step": 9746
    },
    {
      "epoch": 0.8980513198507394,
      "grad_norm": 0.921740201644219,
      "learning_rate": 1.4063333440974963e-07,
      "loss": 0.1108,
      "step": 9747
    },
    {
      "epoch": 0.8981434560280094,
      "grad_norm": 0.9209820144117232,
      "learning_rate": 1.403815456804733e-07,
      "loss": 0.1071,
      "step": 9748
    },
    {
      "epoch": 0.8982355922052794,
      "grad_norm": 0.9262180070109551,
      "learning_rate": 1.4012997604202466e-07,
      "loss": 0.1143,
      "step": 9749
    },
    {
      "epoch": 0.8983277283825494,
      "grad_norm": 0.8761195305273904,
      "learning_rate": 1.3987862551776143e-07,
      "loss": 0.1079,
      "step": 9750
    },
    {
      "epoch": 0.8984198645598194,
      "grad_norm": 0.9368357522890151,
      "learning_rate": 1.3962749413102216e-07,
      "loss": 0.1136,
      "step": 9751
    },
    {
      "epoch": 0.8985120007370894,
      "grad_norm": 0.9377461590430607,
      "learning_rate": 1.3937658190512377e-07,
      "loss": 0.1209,
      "step": 9752
    },
    {
      "epoch": 0.8986041369143595,
      "grad_norm": 0.8844793469481224,
      "learning_rate": 1.3912588886336397e-07,
      "loss": 0.114,
      "step": 9753
    },
    {
      "epoch": 0.8986962730916295,
      "grad_norm": 0.9335683035286864,
      "learning_rate": 1.388754150290192e-07,
      "loss": 0.1234,
      "step": 9754
    },
    {
      "epoch": 0.8987884092688995,
      "grad_norm": 0.9053742346487745,
      "learning_rate": 1.3862516042534634e-07,
      "loss": 0.1042,
      "step": 9755
    },
    {
      "epoch": 0.8988805454461695,
      "grad_norm": 0.9444620424685304,
      "learning_rate": 1.3837512507558188e-07,
      "loss": 0.1192,
      "step": 9756
    },
    {
      "epoch": 0.8989726816234395,
      "grad_norm": 0.8816034579977505,
      "learning_rate": 1.3812530900294107e-07,
      "loss": 0.1131,
      "step": 9757
    },
    {
      "epoch": 0.8990648178007095,
      "grad_norm": 0.8964634166927596,
      "learning_rate": 1.37875712230619e-07,
      "loss": 0.1141,
      "step": 9758
    },
    {
      "epoch": 0.8991569539779795,
      "grad_norm": 1.020273171079443,
      "learning_rate": 1.376263347817916e-07,
      "loss": 0.1255,
      "step": 9759
    },
    {
      "epoch": 0.8992490901552495,
      "grad_norm": 0.9267078640525189,
      "learning_rate": 1.3737717667961308e-07,
      "loss": 0.1206,
      "step": 9760
    },
    {
      "epoch": 0.8993412263325195,
      "grad_norm": 0.930151281773034,
      "learning_rate": 1.371282379472183e-07,
      "loss": 0.1116,
      "step": 9761
    },
    {
      "epoch": 0.8994333625097894,
      "grad_norm": 0.88549447679142,
      "learning_rate": 1.3687951860772098e-07,
      "loss": 0.1131,
      "step": 9762
    },
    {
      "epoch": 0.8995254986870594,
      "grad_norm": 0.913221976034471,
      "learning_rate": 1.366310186842143e-07,
      "loss": 0.1157,
      "step": 9763
    },
    {
      "epoch": 0.8996176348643294,
      "grad_norm": 0.9949130994048048,
      "learning_rate": 1.3638273819977205e-07,
      "loss": 0.1205,
      "step": 9764
    },
    {
      "epoch": 0.8997097710415994,
      "grad_norm": 0.9465781966051717,
      "learning_rate": 1.3613467717744661e-07,
      "loss": 0.1234,
      "step": 9765
    },
    {
      "epoch": 0.8998019072188694,
      "grad_norm": 0.9411758903196948,
      "learning_rate": 1.358868356402715e-07,
      "loss": 0.122,
      "step": 9766
    },
    {
      "epoch": 0.8998940433961395,
      "grad_norm": 1.0203783648280509,
      "learning_rate": 1.3563921361125804e-07,
      "loss": 0.1273,
      "step": 9767
    },
    {
      "epoch": 0.8999861795734095,
      "grad_norm": 0.9816594213978099,
      "learning_rate": 1.3539181111339754e-07,
      "loss": 0.1182,
      "step": 9768
    },
    {
      "epoch": 0.9000783157506795,
      "grad_norm": 0.889791816825393,
      "learning_rate": 1.3514462816966195e-07,
      "loss": 0.106,
      "step": 9769
    },
    {
      "epoch": 0.9001704519279495,
      "grad_norm": 0.9633466334197318,
      "learning_rate": 1.3489766480300232e-07,
      "loss": 0.1252,
      "step": 9770
    },
    {
      "epoch": 0.9002625881052195,
      "grad_norm": 0.9379316337177459,
      "learning_rate": 1.3465092103634892e-07,
      "loss": 0.1209,
      "step": 9771
    },
    {
      "epoch": 0.9003547242824895,
      "grad_norm": 0.9954431482111418,
      "learning_rate": 1.3440439689261232e-07,
      "loss": 0.1249,
      "step": 9772
    },
    {
      "epoch": 0.9004468604597595,
      "grad_norm": 0.9238096644006695,
      "learning_rate": 1.3415809239468198e-07,
      "loss": 0.1133,
      "step": 9773
    },
    {
      "epoch": 0.9005389966370295,
      "grad_norm": 0.9604790895841134,
      "learning_rate": 1.3391200756542738e-07,
      "loss": 0.1198,
      "step": 9774
    },
    {
      "epoch": 0.9006311328142995,
      "grad_norm": 0.974038640437757,
      "learning_rate": 1.336661424276972e-07,
      "loss": 0.1186,
      "step": 9775
    },
    {
      "epoch": 0.9007232689915695,
      "grad_norm": 0.9207157778378379,
      "learning_rate": 1.334204970043204e-07,
      "loss": 0.112,
      "step": 9776
    },
    {
      "epoch": 0.9008154051688395,
      "grad_norm": 0.9607697109332836,
      "learning_rate": 1.331750713181054e-07,
      "loss": 0.1186,
      "step": 9777
    },
    {
      "epoch": 0.9009075413461095,
      "grad_norm": 0.9612023119466364,
      "learning_rate": 1.3292986539184011e-07,
      "loss": 0.1217,
      "step": 9778
    },
    {
      "epoch": 0.9009996775233795,
      "grad_norm": 0.9352391442078847,
      "learning_rate": 1.32684879248291e-07,
      "loss": 0.1181,
      "step": 9779
    },
    {
      "epoch": 0.9010918137006496,
      "grad_norm": 0.938840287739617,
      "learning_rate": 1.324401129102057e-07,
      "loss": 0.113,
      "step": 9780
    },
    {
      "epoch": 0.9011839498779196,
      "grad_norm": 0.9599975054539138,
      "learning_rate": 1.321955664003105e-07,
      "loss": 0.1229,
      "step": 9781
    },
    {
      "epoch": 0.9012760860551896,
      "grad_norm": 0.9002389769621546,
      "learning_rate": 1.3195123974131252e-07,
      "loss": 0.109,
      "step": 9782
    },
    {
      "epoch": 0.9013682222324596,
      "grad_norm": 0.9532838208954857,
      "learning_rate": 1.317071329558961e-07,
      "loss": 0.1244,
      "step": 9783
    },
    {
      "epoch": 0.9014603584097296,
      "grad_norm": 0.9008130939211582,
      "learning_rate": 1.3146324606672754e-07,
      "loss": 0.1171,
      "step": 9784
    },
    {
      "epoch": 0.9015524945869996,
      "grad_norm": 0.9205065612854544,
      "learning_rate": 1.3121957909645155e-07,
      "loss": 0.1112,
      "step": 9785
    },
    {
      "epoch": 0.9016446307642696,
      "grad_norm": 0.963261859456219,
      "learning_rate": 1.309761320676925e-07,
      "loss": 0.1296,
      "step": 9786
    },
    {
      "epoch": 0.9017367669415396,
      "grad_norm": 0.9543745475845056,
      "learning_rate": 1.3073290500305452e-07,
      "loss": 0.1211,
      "step": 9787
    },
    {
      "epoch": 0.9018289031188096,
      "grad_norm": 0.939430723758642,
      "learning_rate": 1.3048989792512096e-07,
      "loss": 0.1165,
      "step": 9788
    },
    {
      "epoch": 0.9019210392960796,
      "grad_norm": 0.9899501282112528,
      "learning_rate": 1.3024711085645597e-07,
      "loss": 0.1174,
      "step": 9789
    },
    {
      "epoch": 0.9020131754733496,
      "grad_norm": 0.956721372717787,
      "learning_rate": 1.3000454381960127e-07,
      "loss": 0.1151,
      "step": 9790
    },
    {
      "epoch": 0.9021053116506196,
      "grad_norm": 0.9518141144978524,
      "learning_rate": 1.297621968370802e-07,
      "loss": 0.113,
      "step": 9791
    },
    {
      "epoch": 0.9021974478278896,
      "grad_norm": 0.998188656043015,
      "learning_rate": 1.2952006993139393e-07,
      "loss": 0.13,
      "step": 9792
    },
    {
      "epoch": 0.9022895840051596,
      "grad_norm": 0.9881266943062582,
      "learning_rate": 1.2927816312502422e-07,
      "loss": 0.1248,
      "step": 9793
    },
    {
      "epoch": 0.9023817201824297,
      "grad_norm": 0.9594850371411043,
      "learning_rate": 1.2903647644043254e-07,
      "loss": 0.121,
      "step": 9794
    },
    {
      "epoch": 0.9024738563596997,
      "grad_norm": 0.9412583884780079,
      "learning_rate": 1.2879500990005926e-07,
      "loss": 0.1118,
      "step": 9795
    },
    {
      "epoch": 0.9025659925369697,
      "grad_norm": 0.9863617128648952,
      "learning_rate": 1.2855376352632427e-07,
      "loss": 0.1199,
      "step": 9796
    },
    {
      "epoch": 0.9026581287142397,
      "grad_norm": 0.9075230786695033,
      "learning_rate": 1.2831273734162736e-07,
      "loss": 0.1116,
      "step": 9797
    },
    {
      "epoch": 0.9027502648915097,
      "grad_norm": 0.970810405883738,
      "learning_rate": 1.280719313683479e-07,
      "loss": 0.127,
      "step": 9798
    },
    {
      "epoch": 0.9028424010687797,
      "grad_norm": 0.969479796883492,
      "learning_rate": 1.2783134562884547e-07,
      "loss": 0.1309,
      "step": 9799
    },
    {
      "epoch": 0.9029345372460497,
      "grad_norm": 0.9723007625084326,
      "learning_rate": 1.275909801454578e-07,
      "loss": 0.1242,
      "step": 9800
    },
    {
      "epoch": 0.9030266734233197,
      "grad_norm": 0.9245601520664418,
      "learning_rate": 1.2735083494050255e-07,
      "loss": 0.1172,
      "step": 9801
    },
    {
      "epoch": 0.9031188096005897,
      "grad_norm": 0.939522410574133,
      "learning_rate": 1.2711091003627773e-07,
      "loss": 0.1197,
      "step": 9802
    },
    {
      "epoch": 0.9032109457778597,
      "grad_norm": 0.9359774641408325,
      "learning_rate": 1.2687120545506054e-07,
      "loss": 0.1187,
      "step": 9803
    },
    {
      "epoch": 0.9033030819551296,
      "grad_norm": 0.9609895198256817,
      "learning_rate": 1.2663172121910705e-07,
      "loss": 0.1189,
      "step": 9804
    },
    {
      "epoch": 0.9033952181323996,
      "grad_norm": 0.9098035827692788,
      "learning_rate": 1.263924573506542e-07,
      "loss": 0.1092,
      "step": 9805
    },
    {
      "epoch": 0.9034873543096696,
      "grad_norm": 0.9864415404989244,
      "learning_rate": 1.2615341387191644e-07,
      "loss": 0.1253,
      "step": 9806
    },
    {
      "epoch": 0.9035794904869398,
      "grad_norm": 0.9141531124611704,
      "learning_rate": 1.2591459080509017e-07,
      "loss": 0.1108,
      "step": 9807
    },
    {
      "epoch": 0.9036716266642097,
      "grad_norm": 0.938159819537608,
      "learning_rate": 1.2567598817234932e-07,
      "loss": 0.1146,
      "step": 9808
    },
    {
      "epoch": 0.9037637628414797,
      "grad_norm": 0.9704965870545296,
      "learning_rate": 1.2543760599584842e-07,
      "loss": 0.1208,
      "step": 9809
    },
    {
      "epoch": 0.9038558990187497,
      "grad_norm": 0.933503554525939,
      "learning_rate": 1.2519944429772168e-07,
      "loss": 0.1109,
      "step": 9810
    },
    {
      "epoch": 0.9039480351960197,
      "grad_norm": 1.0386870504682086,
      "learning_rate": 1.2496150310008226e-07,
      "loss": 0.1333,
      "step": 9811
    },
    {
      "epoch": 0.9040401713732897,
      "grad_norm": 0.9608349972329964,
      "learning_rate": 1.2472378242502247e-07,
      "loss": 0.1166,
      "step": 9812
    },
    {
      "epoch": 0.9041323075505597,
      "grad_norm": 0.9204922383585812,
      "learning_rate": 1.2448628229461522e-07,
      "loss": 0.1123,
      "step": 9813
    },
    {
      "epoch": 0.9042244437278297,
      "grad_norm": 0.8858240303231056,
      "learning_rate": 1.2424900273091206e-07,
      "loss": 0.1059,
      "step": 9814
    },
    {
      "epoch": 0.9043165799050997,
      "grad_norm": 0.93623941741783,
      "learning_rate": 1.2401194375594532e-07,
      "loss": 0.1171,
      "step": 9815
    },
    {
      "epoch": 0.9044087160823697,
      "grad_norm": 0.9047481046424645,
      "learning_rate": 1.237751053917252e-07,
      "loss": 0.1137,
      "step": 9816
    },
    {
      "epoch": 0.9045008522596397,
      "grad_norm": 0.9396357652997752,
      "learning_rate": 1.235384876602419e-07,
      "loss": 0.1166,
      "step": 9817
    },
    {
      "epoch": 0.9045929884369097,
      "grad_norm": 0.942299322546276,
      "learning_rate": 1.233020905834656e-07,
      "loss": 0.1109,
      "step": 9818
    },
    {
      "epoch": 0.9046851246141797,
      "grad_norm": 0.9482094011622528,
      "learning_rate": 1.2306591418334624e-07,
      "loss": 0.1236,
      "step": 9819
    },
    {
      "epoch": 0.9047772607914497,
      "grad_norm": 0.973923981370888,
      "learning_rate": 1.2282995848181267e-07,
      "loss": 0.1238,
      "step": 9820
    },
    {
      "epoch": 0.9048693969687198,
      "grad_norm": 0.851301869657395,
      "learning_rate": 1.2259422350077348e-07,
      "loss": 0.1026,
      "step": 9821
    },
    {
      "epoch": 0.9049615331459898,
      "grad_norm": 0.924457941406162,
      "learning_rate": 1.223587092621162e-07,
      "loss": 0.1147,
      "step": 9822
    },
    {
      "epoch": 0.9050536693232598,
      "grad_norm": 0.9255580493486694,
      "learning_rate": 1.2212341578770854e-07,
      "loss": 0.1168,
      "step": 9823
    },
    {
      "epoch": 0.9051458055005298,
      "grad_norm": 0.9023318089918937,
      "learning_rate": 1.2188834309939806e-07,
      "loss": 0.1158,
      "step": 9824
    },
    {
      "epoch": 0.9052379416777998,
      "grad_norm": 0.8721923584983146,
      "learning_rate": 1.2165349121901037e-07,
      "loss": 0.1126,
      "step": 9825
    },
    {
      "epoch": 0.9053300778550698,
      "grad_norm": 0.9105543472402409,
      "learning_rate": 1.2141886016835246e-07,
      "loss": 0.1137,
      "step": 9826
    },
    {
      "epoch": 0.9054222140323398,
      "grad_norm": 0.9505582223687297,
      "learning_rate": 1.2118444996920887e-07,
      "loss": 0.1172,
      "step": 9827
    },
    {
      "epoch": 0.9055143502096098,
      "grad_norm": 0.9050373360289897,
      "learning_rate": 1.2095026064334548e-07,
      "loss": 0.1186,
      "step": 9828
    },
    {
      "epoch": 0.9056064863868798,
      "grad_norm": 0.9383008904163106,
      "learning_rate": 1.207162922125063e-07,
      "loss": 0.1197,
      "step": 9829
    },
    {
      "epoch": 0.9056986225641498,
      "grad_norm": 0.913038696222506,
      "learning_rate": 1.2048254469841508e-07,
      "loss": 0.1071,
      "step": 9830
    },
    {
      "epoch": 0.9057907587414198,
      "grad_norm": 0.9626035864124302,
      "learning_rate": 1.2024901812277639e-07,
      "loss": 0.1223,
      "step": 9831
    },
    {
      "epoch": 0.9058828949186898,
      "grad_norm": 0.9518301451963114,
      "learning_rate": 1.2001571250727233e-07,
      "loss": 0.1132,
      "step": 9832
    },
    {
      "epoch": 0.9059750310959598,
      "grad_norm": 0.8904070068361779,
      "learning_rate": 1.1978262787356504e-07,
      "loss": 0.1131,
      "step": 9833
    },
    {
      "epoch": 0.9060671672732298,
      "grad_norm": 0.8945005093728289,
      "learning_rate": 1.1954976424329716e-07,
      "loss": 0.114,
      "step": 9834
    },
    {
      "epoch": 0.9061593034504999,
      "grad_norm": 0.9772335300116917,
      "learning_rate": 1.193171216380895e-07,
      "loss": 0.1168,
      "step": 9835
    },
    {
      "epoch": 0.9062514396277699,
      "grad_norm": 0.9413307860703609,
      "learning_rate": 1.1908470007954392e-07,
      "loss": 0.1175,
      "step": 9836
    },
    {
      "epoch": 0.9063435758050399,
      "grad_norm": 0.8985101580638164,
      "learning_rate": 1.188524995892401e-07,
      "loss": 0.1165,
      "step": 9837
    },
    {
      "epoch": 0.9064357119823099,
      "grad_norm": 0.9558883496465006,
      "learning_rate": 1.1862052018873777e-07,
      "loss": 0.1181,
      "step": 9838
    },
    {
      "epoch": 0.9065278481595799,
      "grad_norm": 0.9821413035235732,
      "learning_rate": 1.1838876189957632e-07,
      "loss": 0.1342,
      "step": 9839
    },
    {
      "epoch": 0.9066199843368499,
      "grad_norm": 0.9294065453209202,
      "learning_rate": 1.1815722474327495e-07,
      "loss": 0.1159,
      "step": 9840
    },
    {
      "epoch": 0.9067121205141199,
      "grad_norm": 0.941478958853304,
      "learning_rate": 1.1792590874133119e-07,
      "loss": 0.115,
      "step": 9841
    },
    {
      "epoch": 0.9068042566913899,
      "grad_norm": 0.9511182921769091,
      "learning_rate": 1.176948139152237e-07,
      "loss": 0.1225,
      "step": 9842
    },
    {
      "epoch": 0.9068963928686599,
      "grad_norm": 0.9305028355356414,
      "learning_rate": 1.1746394028640862e-07,
      "loss": 0.1236,
      "step": 9843
    },
    {
      "epoch": 0.9069885290459299,
      "grad_norm": 0.909927237435103,
      "learning_rate": 1.1723328787632354e-07,
      "loss": 0.119,
      "step": 9844
    },
    {
      "epoch": 0.9070806652231999,
      "grad_norm": 0.9264740890403086,
      "learning_rate": 1.1700285670638356e-07,
      "loss": 0.1105,
      "step": 9845
    },
    {
      "epoch": 0.9071728014004699,
      "grad_norm": 0.9332255111550347,
      "learning_rate": 1.1677264679798489e-07,
      "loss": 0.1135,
      "step": 9846
    },
    {
      "epoch": 0.9072649375777398,
      "grad_norm": 0.8932121363052186,
      "learning_rate": 1.1654265817250294e-07,
      "loss": 0.0993,
      "step": 9847
    },
    {
      "epoch": 0.90735707375501,
      "grad_norm": 0.984756358362903,
      "learning_rate": 1.1631289085129143e-07,
      "loss": 0.1209,
      "step": 9848
    },
    {
      "epoch": 0.90744920993228,
      "grad_norm": 0.8797646764277562,
      "learning_rate": 1.1608334485568446e-07,
      "loss": 0.1099,
      "step": 9849
    },
    {
      "epoch": 0.90754134610955,
      "grad_norm": 0.9266451524625748,
      "learning_rate": 1.1585402020699548e-07,
      "loss": 0.1128,
      "step": 9850
    },
    {
      "epoch": 0.90763348228682,
      "grad_norm": 0.9936081829017386,
      "learning_rate": 1.1562491692651723e-07,
      "loss": 0.1223,
      "step": 9851
    },
    {
      "epoch": 0.90772561846409,
      "grad_norm": 1.0064731245012568,
      "learning_rate": 1.153960350355221e-07,
      "loss": 0.124,
      "step": 9852
    },
    {
      "epoch": 0.9078177546413599,
      "grad_norm": 0.9249707712056661,
      "learning_rate": 1.1516737455526228e-07,
      "loss": 0.1159,
      "step": 9853
    },
    {
      "epoch": 0.9079098908186299,
      "grad_norm": 0.9030367953806261,
      "learning_rate": 1.14938935506968e-07,
      "loss": 0.1184,
      "step": 9854
    },
    {
      "epoch": 0.9080020269958999,
      "grad_norm": 1.007252102658938,
      "learning_rate": 1.1471071791185007e-07,
      "loss": 0.1362,
      "step": 9855
    },
    {
      "epoch": 0.9080941631731699,
      "grad_norm": 0.9551043810463008,
      "learning_rate": 1.1448272179109848e-07,
      "loss": 0.1233,
      "step": 9856
    },
    {
      "epoch": 0.9081862993504399,
      "grad_norm": 1.0072214892101452,
      "learning_rate": 1.1425494716588353e-07,
      "loss": 0.1238,
      "step": 9857
    },
    {
      "epoch": 0.9082784355277099,
      "grad_norm": 0.9042976003595324,
      "learning_rate": 1.1402739405735303e-07,
      "loss": 0.1067,
      "step": 9858
    },
    {
      "epoch": 0.9083705717049799,
      "grad_norm": 0.9074508823982074,
      "learning_rate": 1.1380006248663616e-07,
      "loss": 0.1212,
      "step": 9859
    },
    {
      "epoch": 0.9084627078822499,
      "grad_norm": 0.9749475166102289,
      "learning_rate": 1.1357295247483997e-07,
      "loss": 0.1184,
      "step": 9860
    },
    {
      "epoch": 0.9085548440595199,
      "grad_norm": 0.9722337825584535,
      "learning_rate": 1.1334606404305226e-07,
      "loss": 0.1253,
      "step": 9861
    },
    {
      "epoch": 0.90864698023679,
      "grad_norm": 0.9000281767600011,
      "learning_rate": 1.13119397212339e-07,
      "loss": 0.1063,
      "step": 9862
    },
    {
      "epoch": 0.90873911641406,
      "grad_norm": 0.8865674739066136,
      "learning_rate": 1.1289295200374667e-07,
      "loss": 0.1021,
      "step": 9863
    },
    {
      "epoch": 0.90883125259133,
      "grad_norm": 0.9338951859368928,
      "learning_rate": 1.1266672843830095e-07,
      "loss": 0.1124,
      "step": 9864
    },
    {
      "epoch": 0.9089233887686,
      "grad_norm": 0.9355079432027297,
      "learning_rate": 1.1244072653700644e-07,
      "loss": 0.1071,
      "step": 9865
    },
    {
      "epoch": 0.90901552494587,
      "grad_norm": 0.9657351672980083,
      "learning_rate": 1.122149463208469e-07,
      "loss": 0.1143,
      "step": 9866
    },
    {
      "epoch": 0.90910766112314,
      "grad_norm": 0.9550170888128628,
      "learning_rate": 1.1198938781078694e-07,
      "loss": 0.1183,
      "step": 9867
    },
    {
      "epoch": 0.90919979730041,
      "grad_norm": 0.9679298136534235,
      "learning_rate": 1.1176405102776899e-07,
      "loss": 0.1078,
      "step": 9868
    },
    {
      "epoch": 0.90929193347768,
      "grad_norm": 0.9730850211805437,
      "learning_rate": 1.1153893599271631e-07,
      "loss": 0.1149,
      "step": 9869
    },
    {
      "epoch": 0.90938406965495,
      "grad_norm": 0.9591042974754497,
      "learning_rate": 1.1131404272653051e-07,
      "loss": 0.1122,
      "step": 9870
    },
    {
      "epoch": 0.90947620583222,
      "grad_norm": 0.9421743521813682,
      "learning_rate": 1.1108937125009266e-07,
      "loss": 0.1212,
      "step": 9871
    },
    {
      "epoch": 0.90956834200949,
      "grad_norm": 0.9474137614317413,
      "learning_rate": 1.1086492158426387e-07,
      "loss": 0.1276,
      "step": 9872
    },
    {
      "epoch": 0.90966047818676,
      "grad_norm": 0.9496210187132605,
      "learning_rate": 1.106406937498844e-07,
      "loss": 0.117,
      "step": 9873
    },
    {
      "epoch": 0.90975261436403,
      "grad_norm": 0.9623210428089947,
      "learning_rate": 1.1041668776777342e-07,
      "loss": 0.1188,
      "step": 9874
    },
    {
      "epoch": 0.9098447505413001,
      "grad_norm": 0.9688398891785434,
      "learning_rate": 1.1019290365873042e-07,
      "loss": 0.1167,
      "step": 9875
    },
    {
      "epoch": 0.9099368867185701,
      "grad_norm": 0.9438973611520155,
      "learning_rate": 1.0996934144353322e-07,
      "loss": 0.1238,
      "step": 9876
    },
    {
      "epoch": 0.9100290228958401,
      "grad_norm": 1.0123754346766263,
      "learning_rate": 1.0974600114293993e-07,
      "loss": 0.131,
      "step": 9877
    },
    {
      "epoch": 0.9101211590731101,
      "grad_norm": 0.9369173621826444,
      "learning_rate": 1.0952288277768786e-07,
      "loss": 0.1243,
      "step": 9878
    },
    {
      "epoch": 0.9102132952503801,
      "grad_norm": 0.9104725907747405,
      "learning_rate": 1.0929998636849321e-07,
      "loss": 0.1114,
      "step": 9879
    },
    {
      "epoch": 0.9103054314276501,
      "grad_norm": 0.8911323682845301,
      "learning_rate": 1.090773119360522e-07,
      "loss": 0.1047,
      "step": 9880
    },
    {
      "epoch": 0.9103975676049201,
      "grad_norm": 0.9328489809284518,
      "learning_rate": 1.0885485950103997e-07,
      "loss": 0.115,
      "step": 9881
    },
    {
      "epoch": 0.9104897037821901,
      "grad_norm": 0.9185301495705797,
      "learning_rate": 1.0863262908411165e-07,
      "loss": 0.1102,
      "step": 9882
    },
    {
      "epoch": 0.9105818399594601,
      "grad_norm": 0.9338449898577366,
      "learning_rate": 1.0841062070590074e-07,
      "loss": 0.1195,
      "step": 9883
    },
    {
      "epoch": 0.9106739761367301,
      "grad_norm": 0.9028633254451033,
      "learning_rate": 1.0818883438702105e-07,
      "loss": 0.107,
      "step": 9884
    },
    {
      "epoch": 0.9107661123140001,
      "grad_norm": 0.9441449194556039,
      "learning_rate": 1.0796727014806607e-07,
      "loss": 0.1209,
      "step": 9885
    },
    {
      "epoch": 0.9108582484912701,
      "grad_norm": 0.943313665217802,
      "learning_rate": 1.0774592800960715e-07,
      "loss": 0.119,
      "step": 9886
    },
    {
      "epoch": 0.9109503846685401,
      "grad_norm": 0.9296020252630918,
      "learning_rate": 1.0752480799219616e-07,
      "loss": 0.117,
      "step": 9887
    },
    {
      "epoch": 0.91104252084581,
      "grad_norm": 0.9667448070909315,
      "learning_rate": 1.073039101163642e-07,
      "loss": 0.1242,
      "step": 9888
    },
    {
      "epoch": 0.9111346570230802,
      "grad_norm": 0.909844133598178,
      "learning_rate": 1.0708323440262153e-07,
      "loss": 0.103,
      "step": 9889
    },
    {
      "epoch": 0.9112267932003502,
      "grad_norm": 0.9273168759001507,
      "learning_rate": 1.0686278087145868e-07,
      "loss": 0.1183,
      "step": 9890
    },
    {
      "epoch": 0.9113189293776202,
      "grad_norm": 0.9491675032145784,
      "learning_rate": 1.0664254954334402e-07,
      "loss": 0.112,
      "step": 9891
    },
    {
      "epoch": 0.9114110655548902,
      "grad_norm": 0.9436010409635454,
      "learning_rate": 1.064225404387259e-07,
      "loss": 0.1163,
      "step": 9892
    },
    {
      "epoch": 0.9115032017321602,
      "grad_norm": 0.9369245507410711,
      "learning_rate": 1.0620275357803244e-07,
      "loss": 0.1219,
      "step": 9893
    },
    {
      "epoch": 0.9115953379094301,
      "grad_norm": 0.934345303014593,
      "learning_rate": 1.059831889816712e-07,
      "loss": 0.1149,
      "step": 9894
    },
    {
      "epoch": 0.9116874740867001,
      "grad_norm": 0.9164697181421179,
      "learning_rate": 1.0576384667002837e-07,
      "loss": 0.1091,
      "step": 9895
    },
    {
      "epoch": 0.9117796102639701,
      "grad_norm": 0.9185437968697807,
      "learning_rate": 1.0554472666347043e-07,
      "loss": 0.1245,
      "step": 9896
    },
    {
      "epoch": 0.9118717464412401,
      "grad_norm": 0.9367584691113693,
      "learning_rate": 1.0532582898234167e-07,
      "loss": 0.1224,
      "step": 9897
    },
    {
      "epoch": 0.9119638826185101,
      "grad_norm": 0.9166216071894971,
      "learning_rate": 1.0510715364696806e-07,
      "loss": 0.1278,
      "step": 9898
    },
    {
      "epoch": 0.9120560187957801,
      "grad_norm": 0.8699664872405362,
      "learning_rate": 1.048887006776525e-07,
      "loss": 0.1071,
      "step": 9899
    },
    {
      "epoch": 0.9121481549730501,
      "grad_norm": 0.9623063255443589,
      "learning_rate": 1.0467047009467878e-07,
      "loss": 0.1179,
      "step": 9900
    },
    {
      "epoch": 0.9122402911503201,
      "grad_norm": 0.9751253664006017,
      "learning_rate": 1.0445246191831015e-07,
      "loss": 0.1222,
      "step": 9901
    },
    {
      "epoch": 0.9123324273275901,
      "grad_norm": 0.9550208126269293,
      "learning_rate": 1.0423467616878819e-07,
      "loss": 0.1223,
      "step": 9902
    },
    {
      "epoch": 0.9124245635048602,
      "grad_norm": 0.9520464240161787,
      "learning_rate": 1.040171128663342e-07,
      "loss": 0.1113,
      "step": 9903
    },
    {
      "epoch": 0.9125166996821302,
      "grad_norm": 0.9800160176815804,
      "learning_rate": 1.03799772031149e-07,
      "loss": 0.1258,
      "step": 9904
    },
    {
      "epoch": 0.9126088358594002,
      "grad_norm": 0.9131937286932651,
      "learning_rate": 1.035826536834128e-07,
      "loss": 0.1107,
      "step": 9905
    },
    {
      "epoch": 0.9127009720366702,
      "grad_norm": 0.9198729568337226,
      "learning_rate": 1.0336575784328534e-07,
      "loss": 0.1129,
      "step": 9906
    },
    {
      "epoch": 0.9127931082139402,
      "grad_norm": 0.9354820808259293,
      "learning_rate": 1.031490845309055e-07,
      "loss": 0.1155,
      "step": 9907
    },
    {
      "epoch": 0.9128852443912102,
      "grad_norm": 0.9341493220138887,
      "learning_rate": 1.029326337663905e-07,
      "loss": 0.1136,
      "step": 9908
    },
    {
      "epoch": 0.9129773805684802,
      "grad_norm": 0.9341967204483506,
      "learning_rate": 1.0271640556983875e-07,
      "loss": 0.1117,
      "step": 9909
    },
    {
      "epoch": 0.9130695167457502,
      "grad_norm": 0.9531420223699355,
      "learning_rate": 1.0250039996132637e-07,
      "loss": 0.1191,
      "step": 9910
    },
    {
      "epoch": 0.9131616529230202,
      "grad_norm": 0.9487379938366196,
      "learning_rate": 1.0228461696091041e-07,
      "loss": 0.116,
      "step": 9911
    },
    {
      "epoch": 0.9132537891002902,
      "grad_norm": 0.9383824540449928,
      "learning_rate": 1.0206905658862592e-07,
      "loss": 0.1203,
      "step": 9912
    },
    {
      "epoch": 0.9133459252775602,
      "grad_norm": 0.8714240991607098,
      "learning_rate": 1.0185371886448719e-07,
      "loss": 0.1039,
      "step": 9913
    },
    {
      "epoch": 0.9134380614548302,
      "grad_norm": 0.9609563547823312,
      "learning_rate": 1.016386038084885e-07,
      "loss": 0.1293,
      "step": 9914
    },
    {
      "epoch": 0.9135301976321002,
      "grad_norm": 0.9029910797521731,
      "learning_rate": 1.0142371144060414e-07,
      "loss": 0.1026,
      "step": 9915
    },
    {
      "epoch": 0.9136223338093703,
      "grad_norm": 0.9201532523605698,
      "learning_rate": 1.0120904178078594e-07,
      "loss": 0.1125,
      "step": 9916
    },
    {
      "epoch": 0.9137144699866403,
      "grad_norm": 0.935929708191619,
      "learning_rate": 1.0099459484896684e-07,
      "loss": 0.1161,
      "step": 9917
    },
    {
      "epoch": 0.9138066061639103,
      "grad_norm": 0.9579121910083173,
      "learning_rate": 1.007803706650573e-07,
      "loss": 0.1201,
      "step": 9918
    },
    {
      "epoch": 0.9138987423411803,
      "grad_norm": 0.8990511111015285,
      "learning_rate": 1.0056636924894864e-07,
      "loss": 0.1082,
      "step": 9919
    },
    {
      "epoch": 0.9139908785184503,
      "grad_norm": 0.9419869327103809,
      "learning_rate": 1.0035259062051079e-07,
      "loss": 0.111,
      "step": 9920
    },
    {
      "epoch": 0.9140830146957203,
      "grad_norm": 0.9481056579845162,
      "learning_rate": 1.0013903479959313e-07,
      "loss": 0.1107,
      "step": 9921
    },
    {
      "epoch": 0.9141751508729903,
      "grad_norm": 0.9419350381453994,
      "learning_rate": 9.992570180602484e-08,
      "loss": 0.1183,
      "step": 9922
    },
    {
      "epoch": 0.9142672870502603,
      "grad_norm": 0.9768882094713995,
      "learning_rate": 9.971259165961312e-08,
      "loss": 0.1186,
      "step": 9923
    },
    {
      "epoch": 0.9143594232275303,
      "grad_norm": 0.9286412912317882,
      "learning_rate": 9.949970438014544e-08,
      "loss": 0.1103,
      "step": 9924
    },
    {
      "epoch": 0.9144515594048003,
      "grad_norm": 0.8956628077663775,
      "learning_rate": 9.928703998738853e-08,
      "loss": 0.1068,
      "step": 9925
    },
    {
      "epoch": 0.9145436955820703,
      "grad_norm": 0.9399039480242753,
      "learning_rate": 9.907459850108824e-08,
      "loss": 0.1131,
      "step": 9926
    },
    {
      "epoch": 0.9146358317593403,
      "grad_norm": 0.9283382943599822,
      "learning_rate": 9.886237994097048e-08,
      "loss": 0.1147,
      "step": 9927
    },
    {
      "epoch": 0.9147279679366103,
      "grad_norm": 0.9424001613736077,
      "learning_rate": 9.86503843267389e-08,
      "loss": 0.115,
      "step": 9928
    },
    {
      "epoch": 0.9148201041138803,
      "grad_norm": 0.9528230598198278,
      "learning_rate": 9.843861167807722e-08,
      "loss": 0.1271,
      "step": 9929
    },
    {
      "epoch": 0.9149122402911504,
      "grad_norm": 0.8861050600901383,
      "learning_rate": 9.822706201464915e-08,
      "loss": 0.1077,
      "step": 9930
    },
    {
      "epoch": 0.9150043764684204,
      "grad_norm": 0.9660146570263729,
      "learning_rate": 9.801573535609677e-08,
      "loss": 0.1187,
      "step": 9931
    },
    {
      "epoch": 0.9150965126456904,
      "grad_norm": 0.9516861737652685,
      "learning_rate": 9.780463172204186e-08,
      "loss": 0.122,
      "step": 9932
    },
    {
      "epoch": 0.9151886488229604,
      "grad_norm": 0.955635835206404,
      "learning_rate": 9.759375113208541e-08,
      "loss": 0.1159,
      "step": 9933
    },
    {
      "epoch": 0.9152807850002304,
      "grad_norm": 0.9384288101603031,
      "learning_rate": 9.738309360580789e-08,
      "loss": 0.1297,
      "step": 9934
    },
    {
      "epoch": 0.9153729211775004,
      "grad_norm": 0.9568435927717273,
      "learning_rate": 9.717265916276863e-08,
      "loss": 0.1173,
      "step": 9935
    },
    {
      "epoch": 0.9154650573547704,
      "grad_norm": 0.8958470833309566,
      "learning_rate": 9.696244782250675e-08,
      "loss": 0.1032,
      "step": 9936
    },
    {
      "epoch": 0.9155571935320403,
      "grad_norm": 0.9548627720230944,
      "learning_rate": 9.675245960453966e-08,
      "loss": 0.1208,
      "step": 9937
    },
    {
      "epoch": 0.9156493297093103,
      "grad_norm": 0.9615401456254469,
      "learning_rate": 9.654269452836567e-08,
      "loss": 0.1273,
      "step": 9938
    },
    {
      "epoch": 0.9157414658865803,
      "grad_norm": 0.9336984788547501,
      "learning_rate": 9.633315261346115e-08,
      "loss": 0.1111,
      "step": 9939
    },
    {
      "epoch": 0.9158336020638503,
      "grad_norm": 0.9676566615609804,
      "learning_rate": 9.612383387928248e-08,
      "loss": 0.1238,
      "step": 9940
    },
    {
      "epoch": 0.9159257382411203,
      "grad_norm": 0.9365548618439336,
      "learning_rate": 9.59147383452641e-08,
      "loss": 0.1171,
      "step": 9941
    },
    {
      "epoch": 0.9160178744183903,
      "grad_norm": 0.9679108953343,
      "learning_rate": 9.570586603082078e-08,
      "loss": 0.1187,
      "step": 9942
    },
    {
      "epoch": 0.9161100105956604,
      "grad_norm": 0.9693508193733982,
      "learning_rate": 9.549721695534669e-08,
      "loss": 0.1153,
      "step": 9943
    },
    {
      "epoch": 0.9162021467729304,
      "grad_norm": 0.8984541268521159,
      "learning_rate": 9.528879113821526e-08,
      "loss": 0.1151,
      "step": 9944
    },
    {
      "epoch": 0.9162942829502004,
      "grad_norm": 0.9104078134300633,
      "learning_rate": 9.508058859877794e-08,
      "loss": 0.1149,
      "step": 9945
    },
    {
      "epoch": 0.9163864191274704,
      "grad_norm": 0.9581148806404735,
      "learning_rate": 9.487260935636678e-08,
      "loss": 0.1153,
      "step": 9946
    },
    {
      "epoch": 0.9164785553047404,
      "grad_norm": 0.8834590853387024,
      "learning_rate": 9.466485343029269e-08,
      "loss": 0.109,
      "step": 9947
    },
    {
      "epoch": 0.9165706914820104,
      "grad_norm": 0.9112418757641385,
      "learning_rate": 9.44573208398461e-08,
      "loss": 0.1125,
      "step": 9948
    },
    {
      "epoch": 0.9166628276592804,
      "grad_norm": 0.9450438994219061,
      "learning_rate": 9.425001160429603e-08,
      "loss": 0.1159,
      "step": 9949
    },
    {
      "epoch": 0.9167549638365504,
      "grad_norm": 0.9472996467579019,
      "learning_rate": 9.404292574289126e-08,
      "loss": 0.1183,
      "step": 9950
    },
    {
      "epoch": 0.9168471000138204,
      "grad_norm": 0.8966195092536017,
      "learning_rate": 9.383606327485973e-08,
      "loss": 0.1044,
      "step": 9951
    },
    {
      "epoch": 0.9169392361910904,
      "grad_norm": 0.9532262581930256,
      "learning_rate": 9.362942421940885e-08,
      "loss": 0.1214,
      "step": 9952
    },
    {
      "epoch": 0.9170313723683604,
      "grad_norm": 0.9579253822700269,
      "learning_rate": 9.342300859572467e-08,
      "loss": 0.1217,
      "step": 9953
    },
    {
      "epoch": 0.9171235085456304,
      "grad_norm": 0.9434676831721229,
      "learning_rate": 9.321681642297298e-08,
      "loss": 0.1228,
      "step": 9954
    },
    {
      "epoch": 0.9172156447229004,
      "grad_norm": 0.9044628242856921,
      "learning_rate": 9.301084772029928e-08,
      "loss": 0.1061,
      "step": 9955
    },
    {
      "epoch": 0.9173077809001704,
      "grad_norm": 0.9238703964017801,
      "learning_rate": 9.280510250682745e-08,
      "loss": 0.123,
      "step": 9956
    },
    {
      "epoch": 0.9173999170774405,
      "grad_norm": 0.8527902116443628,
      "learning_rate": 9.259958080166081e-08,
      "loss": 0.1017,
      "step": 9957
    },
    {
      "epoch": 0.9174920532547105,
      "grad_norm": 0.9095947345394509,
      "learning_rate": 9.23942826238819e-08,
      "loss": 0.1098,
      "step": 9958
    },
    {
      "epoch": 0.9175841894319805,
      "grad_norm": 0.8956255963339151,
      "learning_rate": 9.218920799255293e-08,
      "loss": 0.1111,
      "step": 9959
    },
    {
      "epoch": 0.9176763256092505,
      "grad_norm": 0.971998459665577,
      "learning_rate": 9.198435692671565e-08,
      "loss": 0.1157,
      "step": 9960
    },
    {
      "epoch": 0.9177684617865205,
      "grad_norm": 0.9697718892880428,
      "learning_rate": 9.177972944538982e-08,
      "loss": 0.1131,
      "step": 9961
    },
    {
      "epoch": 0.9178605979637905,
      "grad_norm": 0.9544535119686984,
      "learning_rate": 9.157532556757526e-08,
      "loss": 0.1171,
      "step": 9962
    },
    {
      "epoch": 0.9179527341410605,
      "grad_norm": 0.9497954111135992,
      "learning_rate": 9.137114531225066e-08,
      "loss": 0.115,
      "step": 9963
    },
    {
      "epoch": 0.9180448703183305,
      "grad_norm": 0.9652083061124271,
      "learning_rate": 9.116718869837449e-08,
      "loss": 0.1291,
      "step": 9964
    },
    {
      "epoch": 0.9181370064956005,
      "grad_norm": 0.9278351761508028,
      "learning_rate": 9.096345574488435e-08,
      "loss": 0.1164,
      "step": 9965
    },
    {
      "epoch": 0.9182291426728705,
      "grad_norm": 0.9466540329969422,
      "learning_rate": 9.075994647069653e-08,
      "loss": 0.1134,
      "step": 9966
    },
    {
      "epoch": 0.9183212788501405,
      "grad_norm": 0.9546339745574965,
      "learning_rate": 9.0556660894707e-08,
      "loss": 0.1246,
      "step": 9967
    },
    {
      "epoch": 0.9184134150274105,
      "grad_norm": 0.9071641213415044,
      "learning_rate": 9.035359903579039e-08,
      "loss": 0.1085,
      "step": 9968
    },
    {
      "epoch": 0.9185055512046805,
      "grad_norm": 0.9984994115231136,
      "learning_rate": 9.015076091280189e-08,
      "loss": 0.116,
      "step": 9969
    },
    {
      "epoch": 0.9185976873819506,
      "grad_norm": 0.8703859546656373,
      "learning_rate": 8.994814654457451e-08,
      "loss": 0.1085,
      "step": 9970
    },
    {
      "epoch": 0.9186898235592206,
      "grad_norm": 1.0156798267992415,
      "learning_rate": 8.974575594992124e-08,
      "loss": 0.1122,
      "step": 9971
    },
    {
      "epoch": 0.9187819597364906,
      "grad_norm": 0.921122595652039,
      "learning_rate": 8.954358914763373e-08,
      "loss": 0.11,
      "step": 9972
    },
    {
      "epoch": 0.9188740959137606,
      "grad_norm": 0.8869429691351998,
      "learning_rate": 8.934164615648333e-08,
      "loss": 0.1037,
      "step": 9973
    },
    {
      "epoch": 0.9189662320910306,
      "grad_norm": 0.9087355779950654,
      "learning_rate": 8.913992699522062e-08,
      "loss": 0.1063,
      "step": 9974
    },
    {
      "epoch": 0.9190583682683006,
      "grad_norm": 0.9171403022519661,
      "learning_rate": 8.893843168257504e-08,
      "loss": 0.115,
      "step": 9975
    },
    {
      "epoch": 0.9191505044455706,
      "grad_norm": 0.9090886668961073,
      "learning_rate": 8.873716023725581e-08,
      "loss": 0.105,
      "step": 9976
    },
    {
      "epoch": 0.9192426406228406,
      "grad_norm": 0.9517240376938657,
      "learning_rate": 8.853611267795076e-08,
      "loss": 0.1249,
      "step": 9977
    },
    {
      "epoch": 0.9193347768001106,
      "grad_norm": 0.8765303454447209,
      "learning_rate": 8.833528902332688e-08,
      "loss": 0.0996,
      "step": 9978
    },
    {
      "epoch": 0.9194269129773806,
      "grad_norm": 0.9423252168440611,
      "learning_rate": 8.813468929203095e-08,
      "loss": 0.1141,
      "step": 9979
    },
    {
      "epoch": 0.9195190491546505,
      "grad_norm": 0.9496341928199417,
      "learning_rate": 8.793431350268861e-08,
      "loss": 0.1184,
      "step": 9980
    },
    {
      "epoch": 0.9196111853319205,
      "grad_norm": 0.9484875310380811,
      "learning_rate": 8.773416167390525e-08,
      "loss": 0.1151,
      "step": 9981
    },
    {
      "epoch": 0.9197033215091905,
      "grad_norm": 0.9375366241158559,
      "learning_rate": 8.753423382426463e-08,
      "loss": 0.1048,
      "step": 9982
    },
    {
      "epoch": 0.9197954576864605,
      "grad_norm": 0.9891837534169375,
      "learning_rate": 8.733452997232967e-08,
      "loss": 0.1165,
      "step": 9983
    },
    {
      "epoch": 0.9198875938637306,
      "grad_norm": 0.9944041684722955,
      "learning_rate": 8.713505013664303e-08,
      "loss": 0.1319,
      "step": 9984
    },
    {
      "epoch": 0.9199797300410006,
      "grad_norm": 0.9809021101391047,
      "learning_rate": 8.693579433572741e-08,
      "loss": 0.1245,
      "step": 9985
    },
    {
      "epoch": 0.9200718662182706,
      "grad_norm": 1.0109863882514742,
      "learning_rate": 8.673676258808244e-08,
      "loss": 0.1272,
      "step": 9986
    },
    {
      "epoch": 0.9201640023955406,
      "grad_norm": 0.9204852372456744,
      "learning_rate": 8.653795491218891e-08,
      "loss": 0.105,
      "step": 9987
    },
    {
      "epoch": 0.9202561385728106,
      "grad_norm": 0.9258040595618826,
      "learning_rate": 8.633937132650593e-08,
      "loss": 0.1134,
      "step": 9988
    },
    {
      "epoch": 0.9203482747500806,
      "grad_norm": 0.9293784596137714,
      "learning_rate": 8.614101184947238e-08,
      "loss": 0.1193,
      "step": 9989
    },
    {
      "epoch": 0.9204404109273506,
      "grad_norm": 0.9030214124625668,
      "learning_rate": 8.594287649950544e-08,
      "loss": 0.1097,
      "step": 9990
    },
    {
      "epoch": 0.9205325471046206,
      "grad_norm": 0.9118329350693379,
      "learning_rate": 8.574496529500209e-08,
      "loss": 0.1103,
      "step": 9991
    },
    {
      "epoch": 0.9206246832818906,
      "grad_norm": 0.9446461098329167,
      "learning_rate": 8.554727825433872e-08,
      "loss": 0.1192,
      "step": 9992
    },
    {
      "epoch": 0.9207168194591606,
      "grad_norm": 0.9733014763065767,
      "learning_rate": 8.53498153958704e-08,
      "loss": 0.1158,
      "step": 9993
    },
    {
      "epoch": 0.9208089556364306,
      "grad_norm": 0.9264514392864119,
      "learning_rate": 8.515257673793159e-08,
      "loss": 0.1144,
      "step": 9994
    },
    {
      "epoch": 0.9209010918137006,
      "grad_norm": 0.9796407782816811,
      "learning_rate": 8.4955562298836e-08,
      "loss": 0.1202,
      "step": 9995
    },
    {
      "epoch": 0.9209932279909706,
      "grad_norm": 0.9384931539095959,
      "learning_rate": 8.475877209687594e-08,
      "loss": 0.1071,
      "step": 9996
    },
    {
      "epoch": 0.9210853641682406,
      "grad_norm": 0.9675554946533331,
      "learning_rate": 8.456220615032429e-08,
      "loss": 0.1209,
      "step": 9997
    },
    {
      "epoch": 0.9211775003455107,
      "grad_norm": 0.9773182683269641,
      "learning_rate": 8.436586447743172e-08,
      "loss": 0.1226,
      "step": 9998
    },
    {
      "epoch": 0.9212696365227807,
      "grad_norm": 0.9262159911644006,
      "learning_rate": 8.416974709642839e-08,
      "loss": 0.1157,
      "step": 9999
    },
    {
      "epoch": 0.9213617727000507,
      "grad_norm": 0.9153048649416058,
      "learning_rate": 8.397385402552415e-08,
      "loss": 0.1117,
      "step": 10000
    },
    {
      "epoch": 0.9213617727000507,
      "eval_loss": 0.11681114137172699,
      "eval_runtime": 300.109,
      "eval_samples_per_second": 23.382,
      "eval_steps_per_second": 2.926,
      "step": 10000
    },
    {
      "epoch": 0.9214539088773207,
      "grad_norm": 0.8953472769400774,
      "learning_rate": 8.377818528290754e-08,
      "loss": 0.1116,
      "step": 10001
    },
    {
      "epoch": 0.9215460450545907,
      "grad_norm": 0.9605648976460276,
      "learning_rate": 8.358274088674651e-08,
      "loss": 0.1208,
      "step": 10002
    },
    {
      "epoch": 0.9216381812318607,
      "grad_norm": 0.9322621523925322,
      "learning_rate": 8.338752085518819e-08,
      "loss": 0.1114,
      "step": 10003
    },
    {
      "epoch": 0.9217303174091307,
      "grad_norm": 0.9272123065068724,
      "learning_rate": 8.31925252063584e-08,
      "loss": 0.1073,
      "step": 10004
    },
    {
      "epoch": 0.9218224535864007,
      "grad_norm": 0.9332881339966433,
      "learning_rate": 8.299775395836262e-08,
      "loss": 0.1133,
      "step": 10005
    },
    {
      "epoch": 0.9219145897636707,
      "grad_norm": 0.9102158918404231,
      "learning_rate": 8.280320712928585e-08,
      "loss": 0.1163,
      "step": 10006
    },
    {
      "epoch": 0.9220067259409407,
      "grad_norm": 0.9326940952236754,
      "learning_rate": 8.260888473719114e-08,
      "loss": 0.1158,
      "step": 10007
    },
    {
      "epoch": 0.9220988621182107,
      "grad_norm": 0.9461833110524152,
      "learning_rate": 8.241478680012183e-08,
      "loss": 0.116,
      "step": 10008
    },
    {
      "epoch": 0.9221909982954807,
      "grad_norm": 0.9551382205517904,
      "learning_rate": 8.222091333609989e-08,
      "loss": 0.1193,
      "step": 10009
    },
    {
      "epoch": 0.9222831344727507,
      "grad_norm": 0.9370064660179406,
      "learning_rate": 8.202726436312619e-08,
      "loss": 0.1092,
      "step": 10010
    },
    {
      "epoch": 0.9223752706500208,
      "grad_norm": 0.9791261039611153,
      "learning_rate": 8.183383989918109e-08,
      "loss": 0.1194,
      "step": 10011
    },
    {
      "epoch": 0.9224674068272908,
      "grad_norm": 0.9504216348772377,
      "learning_rate": 8.164063996222438e-08,
      "loss": 0.12,
      "step": 10012
    },
    {
      "epoch": 0.9225595430045608,
      "grad_norm": 0.936257019374371,
      "learning_rate": 8.14476645701942e-08,
      "loss": 0.1137,
      "step": 10013
    },
    {
      "epoch": 0.9226516791818308,
      "grad_norm": 0.9381555121278079,
      "learning_rate": 8.125491374100902e-08,
      "loss": 0.1155,
      "step": 10014
    },
    {
      "epoch": 0.9227438153591008,
      "grad_norm": 1.0079460709363284,
      "learning_rate": 8.106238749256562e-08,
      "loss": 0.1185,
      "step": 10015
    },
    {
      "epoch": 0.9228359515363708,
      "grad_norm": 0.958334489638207,
      "learning_rate": 8.087008584273942e-08,
      "loss": 0.1193,
      "step": 10016
    },
    {
      "epoch": 0.9229280877136408,
      "grad_norm": 0.9068490658601089,
      "learning_rate": 8.067800880938615e-08,
      "loss": 0.1079,
      "step": 10017
    },
    {
      "epoch": 0.9230202238909108,
      "grad_norm": 0.9435404073158213,
      "learning_rate": 8.048615641034013e-08,
      "loss": 0.1118,
      "step": 10018
    },
    {
      "epoch": 0.9231123600681808,
      "grad_norm": 0.9200330865058696,
      "learning_rate": 8.029452866341492e-08,
      "loss": 0.1182,
      "step": 10019
    },
    {
      "epoch": 0.9232044962454508,
      "grad_norm": 0.914248690361516,
      "learning_rate": 8.010312558640348e-08,
      "loss": 0.1062,
      "step": 10020
    },
    {
      "epoch": 0.9232966324227208,
      "grad_norm": 0.937085460107984,
      "learning_rate": 7.991194719707663e-08,
      "loss": 0.1207,
      "step": 10021
    },
    {
      "epoch": 0.9233887685999908,
      "grad_norm": 0.9829486329659389,
      "learning_rate": 7.972099351318624e-08,
      "loss": 0.1263,
      "step": 10022
    },
    {
      "epoch": 0.9234809047772607,
      "grad_norm": 0.9274488053766404,
      "learning_rate": 7.953026455246233e-08,
      "loss": 0.1171,
      "step": 10023
    },
    {
      "epoch": 0.9235730409545307,
      "grad_norm": 0.9512581459490765,
      "learning_rate": 7.933976033261348e-08,
      "loss": 0.1118,
      "step": 10024
    },
    {
      "epoch": 0.9236651771318009,
      "grad_norm": 0.9332557412920207,
      "learning_rate": 7.914948087132862e-08,
      "loss": 0.1221,
      "step": 10025
    },
    {
      "epoch": 0.9237573133090708,
      "grad_norm": 0.9534856497766078,
      "learning_rate": 7.895942618627472e-08,
      "loss": 0.1164,
      "step": 10026
    },
    {
      "epoch": 0.9238494494863408,
      "grad_norm": 0.9511982683101914,
      "learning_rate": 7.876959629509907e-08,
      "loss": 0.1168,
      "step": 10027
    },
    {
      "epoch": 0.9239415856636108,
      "grad_norm": 0.9863978122827085,
      "learning_rate": 7.85799912154267e-08,
      "loss": 0.1153,
      "step": 10028
    },
    {
      "epoch": 0.9240337218408808,
      "grad_norm": 0.856384935668297,
      "learning_rate": 7.839061096486273e-08,
      "loss": 0.1043,
      "step": 10029
    },
    {
      "epoch": 0.9241258580181508,
      "grad_norm": 0.9659271632688743,
      "learning_rate": 7.82014555609914e-08,
      "loss": 0.1252,
      "step": 10030
    },
    {
      "epoch": 0.9242179941954208,
      "grad_norm": 0.9124472370190727,
      "learning_rate": 7.801252502137535e-08,
      "loss": 0.1076,
      "step": 10031
    },
    {
      "epoch": 0.9243101303726908,
      "grad_norm": 0.943476757994991,
      "learning_rate": 7.782381936355693e-08,
      "loss": 0.1244,
      "step": 10032
    },
    {
      "epoch": 0.9244022665499608,
      "grad_norm": 0.9477335815226061,
      "learning_rate": 7.763533860505767e-08,
      "loss": 0.1219,
      "step": 10033
    },
    {
      "epoch": 0.9244944027272308,
      "grad_norm": 0.9474630709253589,
      "learning_rate": 7.744708276337776e-08,
      "loss": 0.1115,
      "step": 10034
    },
    {
      "epoch": 0.9245865389045008,
      "grad_norm": 0.9124267452326419,
      "learning_rate": 7.725905185599735e-08,
      "loss": 0.111,
      "step": 10035
    },
    {
      "epoch": 0.9246786750817708,
      "grad_norm": 0.9073193546423393,
      "learning_rate": 7.707124590037445e-08,
      "loss": 0.1079,
      "step": 10036
    },
    {
      "epoch": 0.9247708112590408,
      "grad_norm": 0.8735953297989576,
      "learning_rate": 7.688366491394706e-08,
      "loss": 0.1057,
      "step": 10037
    },
    {
      "epoch": 0.9248629474363109,
      "grad_norm": 0.8926319327455886,
      "learning_rate": 7.669630891413204e-08,
      "loss": 0.1076,
      "step": 10038
    },
    {
      "epoch": 0.9249550836135809,
      "grad_norm": 0.9033458944044545,
      "learning_rate": 7.650917791832608e-08,
      "loss": 0.108,
      "step": 10039
    },
    {
      "epoch": 0.9250472197908509,
      "grad_norm": 0.9280311985098185,
      "learning_rate": 7.632227194390301e-08,
      "loss": 0.1187,
      "step": 10040
    },
    {
      "epoch": 0.9251393559681209,
      "grad_norm": 0.8983958238137177,
      "learning_rate": 7.613559100821843e-08,
      "loss": 0.1087,
      "step": 10041
    },
    {
      "epoch": 0.9252314921453909,
      "grad_norm": 0.9239527817591513,
      "learning_rate": 7.594913512860485e-08,
      "loss": 0.1131,
      "step": 10042
    },
    {
      "epoch": 0.9253236283226609,
      "grad_norm": 0.920847354160995,
      "learning_rate": 7.57629043223751e-08,
      "loss": 0.1129,
      "step": 10043
    },
    {
      "epoch": 0.9254157644999309,
      "grad_norm": 0.9208440112474245,
      "learning_rate": 7.557689860682032e-08,
      "loss": 0.1123,
      "step": 10044
    },
    {
      "epoch": 0.9255079006772009,
      "grad_norm": 0.9693290819377761,
      "learning_rate": 7.539111799921145e-08,
      "loss": 0.1235,
      "step": 10045
    },
    {
      "epoch": 0.9256000368544709,
      "grad_norm": 0.9707370388186811,
      "learning_rate": 7.520556251679856e-08,
      "loss": 0.1157,
      "step": 10046
    },
    {
      "epoch": 0.9256921730317409,
      "grad_norm": 0.9373089571232343,
      "learning_rate": 7.502023217680982e-08,
      "loss": 0.114,
      "step": 10047
    },
    {
      "epoch": 0.9257843092090109,
      "grad_norm": 0.9666360388533218,
      "learning_rate": 7.483512699645368e-08,
      "loss": 0.1203,
      "step": 10048
    },
    {
      "epoch": 0.9258764453862809,
      "grad_norm": 0.9974723494362501,
      "learning_rate": 7.465024699291696e-08,
      "loss": 0.1253,
      "step": 10049
    },
    {
      "epoch": 0.9259685815635509,
      "grad_norm": 0.9684830257669516,
      "learning_rate": 7.446559218336563e-08,
      "loss": 0.1269,
      "step": 10050
    },
    {
      "epoch": 0.9260607177408209,
      "grad_norm": 0.941063326323892,
      "learning_rate": 7.428116258494545e-08,
      "loss": 0.1184,
      "step": 10051
    },
    {
      "epoch": 0.926152853918091,
      "grad_norm": 0.959037420198221,
      "learning_rate": 7.409695821478046e-08,
      "loss": 0.117,
      "step": 10052
    },
    {
      "epoch": 0.926244990095361,
      "grad_norm": 0.9716484105543598,
      "learning_rate": 7.391297908997341e-08,
      "loss": 0.117,
      "step": 10053
    },
    {
      "epoch": 0.926337126272631,
      "grad_norm": 0.9243109744426414,
      "learning_rate": 7.372922522760755e-08,
      "loss": 0.1085,
      "step": 10054
    },
    {
      "epoch": 0.926429262449901,
      "grad_norm": 0.9668232342765302,
      "learning_rate": 7.354569664474426e-08,
      "loss": 0.1202,
      "step": 10055
    },
    {
      "epoch": 0.926521398627171,
      "grad_norm": 0.9634841438555413,
      "learning_rate": 7.33623933584246e-08,
      "loss": 0.1233,
      "step": 10056
    },
    {
      "epoch": 0.926613534804441,
      "grad_norm": 0.9764871788307892,
      "learning_rate": 7.317931538566747e-08,
      "loss": 0.1249,
      "step": 10057
    },
    {
      "epoch": 0.926705670981711,
      "grad_norm": 0.9915884927220054,
      "learning_rate": 7.299646274347205e-08,
      "loss": 0.1226,
      "step": 10058
    },
    {
      "epoch": 0.926797807158981,
      "grad_norm": 0.9689646354780623,
      "learning_rate": 7.281383544881642e-08,
      "loss": 0.1245,
      "step": 10059
    },
    {
      "epoch": 0.926889943336251,
      "grad_norm": 1.0001034469069645,
      "learning_rate": 7.263143351865759e-08,
      "loss": 0.1312,
      "step": 10060
    },
    {
      "epoch": 0.926982079513521,
      "grad_norm": 0.975500103508216,
      "learning_rate": 7.244925696993088e-08,
      "loss": 0.1208,
      "step": 10061
    },
    {
      "epoch": 0.927074215690791,
      "grad_norm": 0.9932454627539781,
      "learning_rate": 7.226730581955249e-08,
      "loss": 0.1289,
      "step": 10062
    },
    {
      "epoch": 0.927166351868061,
      "grad_norm": 0.9673568247424288,
      "learning_rate": 7.208558008441557e-08,
      "loss": 0.1232,
      "step": 10063
    },
    {
      "epoch": 0.927258488045331,
      "grad_norm": 0.8870914398841915,
      "learning_rate": 7.190407978139413e-08,
      "loss": 0.1098,
      "step": 10064
    },
    {
      "epoch": 0.927350624222601,
      "grad_norm": 0.9218735423503247,
      "learning_rate": 7.172280492733996e-08,
      "loss": 0.1188,
      "step": 10065
    },
    {
      "epoch": 0.9274427603998711,
      "grad_norm": 0.9383407246695775,
      "learning_rate": 7.15417555390846e-08,
      "loss": 0.1243,
      "step": 10066
    },
    {
      "epoch": 0.927534896577141,
      "grad_norm": 0.9179094929795911,
      "learning_rate": 7.136093163343877e-08,
      "loss": 0.1197,
      "step": 10067
    },
    {
      "epoch": 0.927627032754411,
      "grad_norm": 0.9438528092701421,
      "learning_rate": 7.118033322719209e-08,
      "loss": 0.1086,
      "step": 10068
    },
    {
      "epoch": 0.927719168931681,
      "grad_norm": 0.9768979953603585,
      "learning_rate": 7.099996033711254e-08,
      "loss": 0.1215,
      "step": 10069
    },
    {
      "epoch": 0.927811305108951,
      "grad_norm": 0.92007035394007,
      "learning_rate": 7.081981297994784e-08,
      "loss": 0.1098,
      "step": 10070
    },
    {
      "epoch": 0.927903441286221,
      "grad_norm": 0.9547575066326877,
      "learning_rate": 7.063989117242514e-08,
      "loss": 0.1197,
      "step": 10071
    },
    {
      "epoch": 0.927995577463491,
      "grad_norm": 0.9142725216376145,
      "learning_rate": 7.046019493125028e-08,
      "loss": 0.1078,
      "step": 10072
    },
    {
      "epoch": 0.928087713640761,
      "grad_norm": 0.9579050206705731,
      "learning_rate": 7.028072427310767e-08,
      "loss": 0.1216,
      "step": 10073
    },
    {
      "epoch": 0.928179849818031,
      "grad_norm": 0.9297320319381187,
      "learning_rate": 7.010147921466121e-08,
      "loss": 0.1143,
      "step": 10074
    },
    {
      "epoch": 0.928271985995301,
      "grad_norm": 0.9497750407350231,
      "learning_rate": 6.992245977255369e-08,
      "loss": 0.1201,
      "step": 10075
    },
    {
      "epoch": 0.928364122172571,
      "grad_norm": 0.9907453496156512,
      "learning_rate": 6.974366596340765e-08,
      "loss": 0.1263,
      "step": 10076
    },
    {
      "epoch": 0.928456258349841,
      "grad_norm": 0.9239784611446478,
      "learning_rate": 6.95650978038237e-08,
      "loss": 0.1206,
      "step": 10077
    },
    {
      "epoch": 0.928548394527111,
      "grad_norm": 0.9308862971712853,
      "learning_rate": 6.93867553103822e-08,
      "loss": 0.1152,
      "step": 10078
    },
    {
      "epoch": 0.9286405307043811,
      "grad_norm": 0.9778397868120814,
      "learning_rate": 6.920863849964154e-08,
      "loss": 0.1242,
      "step": 10079
    },
    {
      "epoch": 0.9287326668816511,
      "grad_norm": 0.9989156628975393,
      "learning_rate": 6.903074738814047e-08,
      "loss": 0.1256,
      "step": 10080
    },
    {
      "epoch": 0.9288248030589211,
      "grad_norm": 0.9604361889555622,
      "learning_rate": 6.88530819923966e-08,
      "loss": 0.1183,
      "step": 10081
    },
    {
      "epoch": 0.9289169392361911,
      "grad_norm": 0.9902749552785988,
      "learning_rate": 6.867564232890534e-08,
      "loss": 0.1297,
      "step": 10082
    },
    {
      "epoch": 0.9290090754134611,
      "grad_norm": 0.9193712003271258,
      "learning_rate": 6.849842841414239e-08,
      "loss": 0.1128,
      "step": 10083
    },
    {
      "epoch": 0.9291012115907311,
      "grad_norm": 0.927277125839814,
      "learning_rate": 6.832144026456211e-08,
      "loss": 0.1116,
      "step": 10084
    },
    {
      "epoch": 0.9291933477680011,
      "grad_norm": 0.9509926978450719,
      "learning_rate": 6.8144677896598e-08,
      "loss": 0.118,
      "step": 10085
    },
    {
      "epoch": 0.9292854839452711,
      "grad_norm": 0.89136791200629,
      "learning_rate": 6.796814132666196e-08,
      "loss": 0.1133,
      "step": 10086
    },
    {
      "epoch": 0.9293776201225411,
      "grad_norm": 0.9316915181788125,
      "learning_rate": 6.779183057114585e-08,
      "loss": 0.1161,
      "step": 10087
    },
    {
      "epoch": 0.9294697562998111,
      "grad_norm": 0.955332942284261,
      "learning_rate": 6.761574564641993e-08,
      "loss": 0.1207,
      "step": 10088
    },
    {
      "epoch": 0.9295618924770811,
      "grad_norm": 0.9516950712819171,
      "learning_rate": 6.743988656883388e-08,
      "loss": 0.1209,
      "step": 10089
    },
    {
      "epoch": 0.9296540286543511,
      "grad_norm": 0.955229675549783,
      "learning_rate": 6.726425335471632e-08,
      "loss": 0.1173,
      "step": 10090
    },
    {
      "epoch": 0.9297461648316211,
      "grad_norm": 0.8921231535918545,
      "learning_rate": 6.708884602037446e-08,
      "loss": 0.1112,
      "step": 10091
    },
    {
      "epoch": 0.9298383010088911,
      "grad_norm": 0.9489220528435535,
      "learning_rate": 6.691366458209503e-08,
      "loss": 0.1244,
      "step": 10092
    },
    {
      "epoch": 0.9299304371861612,
      "grad_norm": 0.9299314366674031,
      "learning_rate": 6.673870905614387e-08,
      "loss": 0.1152,
      "step": 10093
    },
    {
      "epoch": 0.9300225733634312,
      "grad_norm": 0.9705811815401614,
      "learning_rate": 6.656397945876525e-08,
      "loss": 0.1192,
      "step": 10094
    },
    {
      "epoch": 0.9301147095407012,
      "grad_norm": 0.9309252837569718,
      "learning_rate": 6.638947580618338e-08,
      "loss": 0.1152,
      "step": 10095
    },
    {
      "epoch": 0.9302068457179712,
      "grad_norm": 0.9649772172337729,
      "learning_rate": 6.621519811460003e-08,
      "loss": 0.1184,
      "step": 10096
    },
    {
      "epoch": 0.9302989818952412,
      "grad_norm": 0.9312697012951024,
      "learning_rate": 6.60411464001981e-08,
      "loss": 0.1102,
      "step": 10097
    },
    {
      "epoch": 0.9303911180725112,
      "grad_norm": 0.9764272745006919,
      "learning_rate": 6.586732067913715e-08,
      "loss": 0.1119,
      "step": 10098
    },
    {
      "epoch": 0.9304832542497812,
      "grad_norm": 0.9436377619779318,
      "learning_rate": 6.56937209675576e-08,
      "loss": 0.1234,
      "step": 10099
    },
    {
      "epoch": 0.9305753904270512,
      "grad_norm": 0.9793883894185555,
      "learning_rate": 6.552034728157824e-08,
      "loss": 0.1198,
      "step": 10100
    },
    {
      "epoch": 0.9306675266043212,
      "grad_norm": 0.9409414214358416,
      "learning_rate": 6.534719963729646e-08,
      "loss": 0.1124,
      "step": 10101
    },
    {
      "epoch": 0.9307596627815912,
      "grad_norm": 0.9221372089696581,
      "learning_rate": 6.517427805078913e-08,
      "loss": 0.1198,
      "step": 10102
    },
    {
      "epoch": 0.9308517989588612,
      "grad_norm": 0.875186624270954,
      "learning_rate": 6.500158253811228e-08,
      "loss": 0.0994,
      "step": 10103
    },
    {
      "epoch": 0.9309439351361312,
      "grad_norm": 0.9267212533368236,
      "learning_rate": 6.482911311530033e-08,
      "loss": 0.1136,
      "step": 10104
    },
    {
      "epoch": 0.9310360713134012,
      "grad_norm": 0.8883942370936049,
      "learning_rate": 6.465686979836766e-08,
      "loss": 0.1036,
      "step": 10105
    },
    {
      "epoch": 0.9311282074906713,
      "grad_norm": 0.9235460645528614,
      "learning_rate": 6.44848526033065e-08,
      "loss": 0.1159,
      "step": 10106
    },
    {
      "epoch": 0.9312203436679413,
      "grad_norm": 0.9071083021798902,
      "learning_rate": 6.43130615460888e-08,
      "loss": 0.1105,
      "step": 10107
    },
    {
      "epoch": 0.9313124798452113,
      "grad_norm": 0.9968005269119061,
      "learning_rate": 6.41414966426654e-08,
      "loss": 0.1284,
      "step": 10108
    },
    {
      "epoch": 0.9314046160224813,
      "grad_norm": 0.9761930692958624,
      "learning_rate": 6.397015790896633e-08,
      "loss": 0.1205,
      "step": 10109
    },
    {
      "epoch": 0.9314967521997513,
      "grad_norm": 0.9365364534222561,
      "learning_rate": 6.379904536090053e-08,
      "loss": 0.1076,
      "step": 10110
    },
    {
      "epoch": 0.9315888883770213,
      "grad_norm": 0.8918112749411182,
      "learning_rate": 6.362815901435532e-08,
      "loss": 0.1098,
      "step": 10111
    },
    {
      "epoch": 0.9316810245542912,
      "grad_norm": 0.9663526734184886,
      "learning_rate": 6.34574988851977e-08,
      "loss": 0.1224,
      "step": 10112
    },
    {
      "epoch": 0.9317731607315612,
      "grad_norm": 0.927169379210311,
      "learning_rate": 6.328706498927361e-08,
      "loss": 0.117,
      "step": 10113
    },
    {
      "epoch": 0.9318652969088312,
      "grad_norm": 0.8784108002332701,
      "learning_rate": 6.311685734240791e-08,
      "loss": 0.1074,
      "step": 10114
    },
    {
      "epoch": 0.9319574330861012,
      "grad_norm": 0.9172721768927773,
      "learning_rate": 6.294687596040406e-08,
      "loss": 0.119,
      "step": 10115
    },
    {
      "epoch": 0.9320495692633712,
      "grad_norm": 0.903584060250854,
      "learning_rate": 6.277712085904524e-08,
      "loss": 0.1014,
      "step": 10116
    },
    {
      "epoch": 0.9321417054406412,
      "grad_norm": 0.9725426775760924,
      "learning_rate": 6.260759205409278e-08,
      "loss": 0.134,
      "step": 10117
    },
    {
      "epoch": 0.9322338416179112,
      "grad_norm": 0.8691523075785011,
      "learning_rate": 6.243828956128794e-08,
      "loss": 0.1008,
      "step": 10118
    },
    {
      "epoch": 0.9323259777951812,
      "grad_norm": 0.9136529794135902,
      "learning_rate": 6.226921339635012e-08,
      "loss": 0.1025,
      "step": 10119
    },
    {
      "epoch": 0.9324181139724513,
      "grad_norm": 1.0116626279614331,
      "learning_rate": 6.210036357497811e-08,
      "loss": 0.1259,
      "step": 10120
    },
    {
      "epoch": 0.9325102501497213,
      "grad_norm": 0.93766496587313,
      "learning_rate": 6.193174011284997e-08,
      "loss": 0.1199,
      "step": 10121
    },
    {
      "epoch": 0.9326023863269913,
      "grad_norm": 0.889965497266823,
      "learning_rate": 6.176334302562204e-08,
      "loss": 0.1029,
      "step": 10122
    },
    {
      "epoch": 0.9326945225042613,
      "grad_norm": 0.9855453499663499,
      "learning_rate": 6.159517232893014e-08,
      "loss": 0.121,
      "step": 10123
    },
    {
      "epoch": 0.9327866586815313,
      "grad_norm": 0.9743008981087332,
      "learning_rate": 6.142722803838874e-08,
      "loss": 0.124,
      "step": 10124
    },
    {
      "epoch": 0.9328787948588013,
      "grad_norm": 0.958869092253525,
      "learning_rate": 6.125951016959175e-08,
      "loss": 0.1247,
      "step": 10125
    },
    {
      "epoch": 0.9329709310360713,
      "grad_norm": 0.9640490274238452,
      "learning_rate": 6.109201873811171e-08,
      "loss": 0.1254,
      "step": 10126
    },
    {
      "epoch": 0.9330630672133413,
      "grad_norm": 0.8968523241668316,
      "learning_rate": 6.092475375950035e-08,
      "loss": 0.1058,
      "step": 10127
    },
    {
      "epoch": 0.9331552033906113,
      "grad_norm": 0.8935034810307682,
      "learning_rate": 6.075771524928804e-08,
      "loss": 0.1118,
      "step": 10128
    },
    {
      "epoch": 0.9332473395678813,
      "grad_norm": 0.9125924372741135,
      "learning_rate": 6.0590903222984e-08,
      "loss": 0.1099,
      "step": 10129
    },
    {
      "epoch": 0.9333394757451513,
      "grad_norm": 0.8980119979407162,
      "learning_rate": 6.042431769607782e-08,
      "loss": 0.0987,
      "step": 10130
    },
    {
      "epoch": 0.9334316119224213,
      "grad_norm": 0.9152540334694288,
      "learning_rate": 6.025795868403573e-08,
      "loss": 0.1221,
      "step": 10131
    },
    {
      "epoch": 0.9335237480996913,
      "grad_norm": 0.9265595110349005,
      "learning_rate": 6.009182620230508e-08,
      "loss": 0.1153,
      "step": 10132
    },
    {
      "epoch": 0.9336158842769613,
      "grad_norm": 0.9307369152940632,
      "learning_rate": 5.992592026631078e-08,
      "loss": 0.1064,
      "step": 10133
    },
    {
      "epoch": 0.9337080204542314,
      "grad_norm": 0.9215489600399154,
      "learning_rate": 5.976024089145715e-08,
      "loss": 0.1137,
      "step": 10134
    },
    {
      "epoch": 0.9338001566315014,
      "grad_norm": 0.9070052737981305,
      "learning_rate": 5.95947880931283e-08,
      "loss": 0.118,
      "step": 10135
    },
    {
      "epoch": 0.9338922928087714,
      "grad_norm": 0.9934855918706669,
      "learning_rate": 5.942956188668553e-08,
      "loss": 0.1296,
      "step": 10136
    },
    {
      "epoch": 0.9339844289860414,
      "grad_norm": 0.9732008800545134,
      "learning_rate": 5.926456228747102e-08,
      "loss": 0.1276,
      "step": 10137
    },
    {
      "epoch": 0.9340765651633114,
      "grad_norm": 0.9041660583571358,
      "learning_rate": 5.909978931080418e-08,
      "loss": 0.1115,
      "step": 10138
    },
    {
      "epoch": 0.9341687013405814,
      "grad_norm": 0.9207987007708457,
      "learning_rate": 5.8935242971984993e-08,
      "loss": 0.1173,
      "step": 10139
    },
    {
      "epoch": 0.9342608375178514,
      "grad_norm": 0.9634402298027609,
      "learning_rate": 5.877092328629097e-08,
      "loss": 0.1201,
      "step": 10140
    },
    {
      "epoch": 0.9343529736951214,
      "grad_norm": 0.9358564013054519,
      "learning_rate": 5.8606830268979344e-08,
      "loss": 0.1115,
      "step": 10141
    },
    {
      "epoch": 0.9344451098723914,
      "grad_norm": 0.9296780517476542,
      "learning_rate": 5.8442963935286535e-08,
      "loss": 0.1194,
      "step": 10142
    },
    {
      "epoch": 0.9345372460496614,
      "grad_norm": 0.8830379602856283,
      "learning_rate": 5.827932430042732e-08,
      "loss": 0.1087,
      "step": 10143
    },
    {
      "epoch": 0.9346293822269314,
      "grad_norm": 0.9437410925435715,
      "learning_rate": 5.811591137959538e-08,
      "loss": 0.1187,
      "step": 10144
    },
    {
      "epoch": 0.9347215184042014,
      "grad_norm": 0.9544053264277833,
      "learning_rate": 5.7952725187963855e-08,
      "loss": 0.1108,
      "step": 10145
    },
    {
      "epoch": 0.9348136545814714,
      "grad_norm": 0.9956439833964636,
      "learning_rate": 5.778976574068451e-08,
      "loss": 0.1212,
      "step": 10146
    },
    {
      "epoch": 0.9349057907587415,
      "grad_norm": 0.8916894305196384,
      "learning_rate": 5.762703305288858e-08,
      "loss": 0.1154,
      "step": 10147
    },
    {
      "epoch": 0.9349979269360115,
      "grad_norm": 0.9453557024716712,
      "learning_rate": 5.746452713968564e-08,
      "loss": 0.1232,
      "step": 10148
    },
    {
      "epoch": 0.9350900631132815,
      "grad_norm": 0.9616711906703019,
      "learning_rate": 5.730224801616391e-08,
      "loss": 0.1175,
      "step": 10149
    },
    {
      "epoch": 0.9351821992905515,
      "grad_norm": 0.9178001385739297,
      "learning_rate": 5.714019569739132e-08,
      "loss": 0.1115,
      "step": 10150
    },
    {
      "epoch": 0.9352743354678215,
      "grad_norm": 0.9475727631500085,
      "learning_rate": 5.697837019841446e-08,
      "loss": 0.125,
      "step": 10151
    },
    {
      "epoch": 0.9353664716450915,
      "grad_norm": 0.9648214850088555,
      "learning_rate": 5.6816771534258794e-08,
      "loss": 0.1196,
      "step": 10152
    },
    {
      "epoch": 0.9354586078223615,
      "grad_norm": 0.9432824464052948,
      "learning_rate": 5.6655399719929286e-08,
      "loss": 0.1149,
      "step": 10153
    },
    {
      "epoch": 0.9355507439996315,
      "grad_norm": 0.8934593126027819,
      "learning_rate": 5.649425477040837e-08,
      "loss": 0.1107,
      "step": 10154
    },
    {
      "epoch": 0.9356428801769014,
      "grad_norm": 0.9901492980135598,
      "learning_rate": 5.63333367006591e-08,
      "loss": 0.1259,
      "step": 10155
    },
    {
      "epoch": 0.9357350163541714,
      "grad_norm": 0.9389511635432886,
      "learning_rate": 5.617264552562229e-08,
      "loss": 0.1262,
      "step": 10156
    },
    {
      "epoch": 0.9358271525314414,
      "grad_norm": 0.9962631342478383,
      "learning_rate": 5.6012181260218514e-08,
      "loss": 0.1219,
      "step": 10157
    },
    {
      "epoch": 0.9359192887087114,
      "grad_norm": 0.9589826323723284,
      "learning_rate": 5.5851943919346394e-08,
      "loss": 0.1155,
      "step": 10158
    },
    {
      "epoch": 0.9360114248859814,
      "grad_norm": 1.0008099775740829,
      "learning_rate": 5.569193351788516e-08,
      "loss": 0.1258,
      "step": 10159
    },
    {
      "epoch": 0.9361035610632514,
      "grad_norm": 0.9701895891527986,
      "learning_rate": 5.5532150070690404e-08,
      "loss": 0.1144,
      "step": 10160
    },
    {
      "epoch": 0.9361956972405215,
      "grad_norm": 0.9887847060003988,
      "learning_rate": 5.5372593592598333e-08,
      "loss": 0.1239,
      "step": 10161
    },
    {
      "epoch": 0.9362878334177915,
      "grad_norm": 0.917151189793383,
      "learning_rate": 5.521326409842431e-08,
      "loss": 0.1072,
      "step": 10162
    },
    {
      "epoch": 0.9363799695950615,
      "grad_norm": 0.9511802364714468,
      "learning_rate": 5.5054161602961786e-08,
      "loss": 0.1128,
      "step": 10163
    },
    {
      "epoch": 0.9364721057723315,
      "grad_norm": 0.9196778717957829,
      "learning_rate": 5.489528612098366e-08,
      "loss": 0.1104,
      "step": 10164
    },
    {
      "epoch": 0.9365642419496015,
      "grad_norm": 0.9109397752453979,
      "learning_rate": 5.4736637667241465e-08,
      "loss": 0.1188,
      "step": 10165
    },
    {
      "epoch": 0.9366563781268715,
      "grad_norm": 0.9572866214282587,
      "learning_rate": 5.457821625646537e-08,
      "loss": 0.121,
      "step": 10166
    },
    {
      "epoch": 0.9367485143041415,
      "grad_norm": 0.9675168468217017,
      "learning_rate": 5.442002190336498e-08,
      "loss": 0.1255,
      "step": 10167
    },
    {
      "epoch": 0.9368406504814115,
      "grad_norm": 0.9939171972835509,
      "learning_rate": 5.426205462262884e-08,
      "loss": 0.1363,
      "step": 10168
    },
    {
      "epoch": 0.9369327866586815,
      "grad_norm": 1.00824738921418,
      "learning_rate": 5.410431442892411e-08,
      "loss": 0.1318,
      "step": 10169
    },
    {
      "epoch": 0.9370249228359515,
      "grad_norm": 0.9286004221876722,
      "learning_rate": 5.3946801336897395e-08,
      "loss": 0.1152,
      "step": 10170
    },
    {
      "epoch": 0.9371170590132215,
      "grad_norm": 0.9394035973214175,
      "learning_rate": 5.37895153611731e-08,
      "loss": 0.1161,
      "step": 10171
    },
    {
      "epoch": 0.9372091951904915,
      "grad_norm": 0.9356338375134339,
      "learning_rate": 5.363245651635568e-08,
      "loss": 0.1163,
      "step": 10172
    },
    {
      "epoch": 0.9373013313677615,
      "grad_norm": 0.9705226684470347,
      "learning_rate": 5.3475624817027614e-08,
      "loss": 0.1221,
      "step": 10173
    },
    {
      "epoch": 0.9373934675450316,
      "grad_norm": 0.9778732464822383,
      "learning_rate": 5.331902027775143e-08,
      "loss": 0.1226,
      "step": 10174
    },
    {
      "epoch": 0.9374856037223016,
      "grad_norm": 0.9344922249700046,
      "learning_rate": 5.316264291306744e-08,
      "loss": 0.1171,
      "step": 10175
    },
    {
      "epoch": 0.9375777398995716,
      "grad_norm": 0.930084190711884,
      "learning_rate": 5.300649273749542e-08,
      "loss": 0.1196,
      "step": 10176
    },
    {
      "epoch": 0.9376698760768416,
      "grad_norm": 0.9179768576305335,
      "learning_rate": 5.2850569765533766e-08,
      "loss": 0.1175,
      "step": 10177
    },
    {
      "epoch": 0.9377620122541116,
      "grad_norm": 0.92930569119096,
      "learning_rate": 5.2694874011660066e-08,
      "loss": 0.1195,
      "step": 10178
    },
    {
      "epoch": 0.9378541484313816,
      "grad_norm": 0.9196928713129051,
      "learning_rate": 5.253940549033082e-08,
      "loss": 0.1064,
      "step": 10179
    },
    {
      "epoch": 0.9379462846086516,
      "grad_norm": 0.9418730370988134,
      "learning_rate": 5.238416421598142e-08,
      "loss": 0.1188,
      "step": 10180
    },
    {
      "epoch": 0.9380384207859216,
      "grad_norm": 0.9425062455243531,
      "learning_rate": 5.2229150203025604e-08,
      "loss": 0.117,
      "step": 10181
    },
    {
      "epoch": 0.9381305569631916,
      "grad_norm": 0.9634946865174938,
      "learning_rate": 5.2074363465856316e-08,
      "loss": 0.1138,
      "step": 10182
    },
    {
      "epoch": 0.9382226931404616,
      "grad_norm": 0.9414468299208367,
      "learning_rate": 5.191980401884594e-08,
      "loss": 0.1181,
      "step": 10183
    },
    {
      "epoch": 0.9383148293177316,
      "grad_norm": 0.9099367437520887,
      "learning_rate": 5.176547187634551e-08,
      "loss": 0.1173,
      "step": 10184
    },
    {
      "epoch": 0.9384069654950016,
      "grad_norm": 0.940932781792052,
      "learning_rate": 5.161136705268438e-08,
      "loss": 0.1197,
      "step": 10185
    },
    {
      "epoch": 0.9384991016722716,
      "grad_norm": 0.9422087246290408,
      "learning_rate": 5.145748956217139e-08,
      "loss": 0.1186,
      "step": 10186
    },
    {
      "epoch": 0.9385912378495416,
      "grad_norm": 0.9339898499942781,
      "learning_rate": 5.130383941909372e-08,
      "loss": 0.1077,
      "step": 10187
    },
    {
      "epoch": 0.9386833740268117,
      "grad_norm": 0.9095970890077272,
      "learning_rate": 5.1150416637718306e-08,
      "loss": 0.1071,
      "step": 10188
    },
    {
      "epoch": 0.9387755102040817,
      "grad_norm": 0.9610721856654976,
      "learning_rate": 5.0997221232290115e-08,
      "loss": 0.1182,
      "step": 10189
    },
    {
      "epoch": 0.9388676463813517,
      "grad_norm": 0.9442119623237161,
      "learning_rate": 5.0844253217033624e-08,
      "loss": 0.1253,
      "step": 10190
    },
    {
      "epoch": 0.9389597825586217,
      "grad_norm": 0.9104684661627135,
      "learning_rate": 5.06915126061519e-08,
      "loss": 0.1107,
      "step": 10191
    },
    {
      "epoch": 0.9390519187358917,
      "grad_norm": 0.9213554783526815,
      "learning_rate": 5.0538999413826393e-08,
      "loss": 0.1178,
      "step": 10192
    },
    {
      "epoch": 0.9391440549131617,
      "grad_norm": 0.9993405206730799,
      "learning_rate": 5.0386713654218825e-08,
      "loss": 0.1308,
      "step": 10193
    },
    {
      "epoch": 0.9392361910904317,
      "grad_norm": 0.9736502010870575,
      "learning_rate": 5.023465534146843e-08,
      "loss": 0.1214,
      "step": 10194
    },
    {
      "epoch": 0.9393283272677017,
      "grad_norm": 0.8838783987621547,
      "learning_rate": 5.008282448969393e-08,
      "loss": 0.1084,
      "step": 10195
    },
    {
      "epoch": 0.9394204634449717,
      "grad_norm": 0.948623222140314,
      "learning_rate": 4.9931221112992924e-08,
      "loss": 0.1198,
      "step": 10196
    },
    {
      "epoch": 0.9395125996222417,
      "grad_norm": 0.8956293742678936,
      "learning_rate": 4.977984522544166e-08,
      "loss": 0.1091,
      "step": 10197
    },
    {
      "epoch": 0.9396047357995116,
      "grad_norm": 0.9255954230214183,
      "learning_rate": 4.962869684109528e-08,
      "loss": 0.1097,
      "step": 10198
    },
    {
      "epoch": 0.9396968719767816,
      "grad_norm": 0.9149389171061979,
      "learning_rate": 4.947777597398812e-08,
      "loss": 0.1145,
      "step": 10199
    },
    {
      "epoch": 0.9397890081540516,
      "grad_norm": 0.9200927674585153,
      "learning_rate": 4.932708263813341e-08,
      "loss": 0.1114,
      "step": 10200
    },
    {
      "epoch": 0.9398811443313216,
      "grad_norm": 0.9676136590853243,
      "learning_rate": 4.917661684752273e-08,
      "loss": 0.1214,
      "step": 10201
    },
    {
      "epoch": 0.9399732805085917,
      "grad_norm": 0.9156287716615673,
      "learning_rate": 4.9026378616127133e-08,
      "loss": 0.118,
      "step": 10202
    },
    {
      "epoch": 0.9400654166858617,
      "grad_norm": 0.889927739280015,
      "learning_rate": 4.8876367957895744e-08,
      "loss": 0.1084,
      "step": 10203
    },
    {
      "epoch": 0.9401575528631317,
      "grad_norm": 0.9299760065843369,
      "learning_rate": 4.872658488675741e-08,
      "loss": 0.1127,
      "step": 10204
    },
    {
      "epoch": 0.9402496890404017,
      "grad_norm": 0.9132737348421209,
      "learning_rate": 4.8577029416619625e-08,
      "loss": 0.1131,
      "step": 10205
    },
    {
      "epoch": 0.9403418252176717,
      "grad_norm": 0.88489448512981,
      "learning_rate": 4.84277015613685e-08,
      "loss": 0.1083,
      "step": 10206
    },
    {
      "epoch": 0.9404339613949417,
      "grad_norm": 1.0037419903582985,
      "learning_rate": 4.8278601334869056e-08,
      "loss": 0.1192,
      "step": 10207
    },
    {
      "epoch": 0.9405260975722117,
      "grad_norm": 0.9268411474644621,
      "learning_rate": 4.8129728750965224e-08,
      "loss": 0.1128,
      "step": 10208
    },
    {
      "epoch": 0.9406182337494817,
      "grad_norm": 0.9709277587211269,
      "learning_rate": 4.79810838234801e-08,
      "loss": 0.1303,
      "step": 10209
    },
    {
      "epoch": 0.9407103699267517,
      "grad_norm": 0.8769457665825694,
      "learning_rate": 4.7832666566215156e-08,
      "loss": 0.0994,
      "step": 10210
    },
    {
      "epoch": 0.9408025061040217,
      "grad_norm": 0.9298202881373417,
      "learning_rate": 4.7684476992951033e-08,
      "loss": 0.1122,
      "step": 10211
    },
    {
      "epoch": 0.9408946422812917,
      "grad_norm": 0.8818956452577879,
      "learning_rate": 4.753651511744728e-08,
      "loss": 0.1072,
      "step": 10212
    },
    {
      "epoch": 0.9409867784585617,
      "grad_norm": 0.9343822631531623,
      "learning_rate": 4.738878095344207e-08,
      "loss": 0.1167,
      "step": 10213
    },
    {
      "epoch": 0.9410789146358317,
      "grad_norm": 0.9605236137441521,
      "learning_rate": 4.7241274514652217e-08,
      "loss": 0.114,
      "step": 10214
    },
    {
      "epoch": 0.9411710508131018,
      "grad_norm": 0.9555618347520516,
      "learning_rate": 4.7093995814773975e-08,
      "loss": 0.1234,
      "step": 10215
    },
    {
      "epoch": 0.9412631869903718,
      "grad_norm": 0.9108221732663505,
      "learning_rate": 4.694694486748225e-08,
      "loss": 0.1163,
      "step": 10216
    },
    {
      "epoch": 0.9413553231676418,
      "grad_norm": 0.9386691959416757,
      "learning_rate": 4.680012168643111e-08,
      "loss": 0.1156,
      "step": 10217
    },
    {
      "epoch": 0.9414474593449118,
      "grad_norm": 0.9589183202714626,
      "learning_rate": 4.6653526285252437e-08,
      "loss": 0.1186,
      "step": 10218
    },
    {
      "epoch": 0.9415395955221818,
      "grad_norm": 0.9383960225445924,
      "learning_rate": 4.650715867755784e-08,
      "loss": 0.1124,
      "step": 10219
    },
    {
      "epoch": 0.9416317316994518,
      "grad_norm": 0.9599936149241113,
      "learning_rate": 4.636101887693756e-08,
      "loss": 0.1156,
      "step": 10220
    },
    {
      "epoch": 0.9417238678767218,
      "grad_norm": 0.9530960877295532,
      "learning_rate": 4.621510689696046e-08,
      "loss": 0.1234,
      "step": 10221
    },
    {
      "epoch": 0.9418160040539918,
      "grad_norm": 0.9144532827198258,
      "learning_rate": 4.606942275117543e-08,
      "loss": 0.1082,
      "step": 10222
    },
    {
      "epoch": 0.9419081402312618,
      "grad_norm": 0.9337094101499579,
      "learning_rate": 4.5923966453108315e-08,
      "loss": 0.1154,
      "step": 10223
    },
    {
      "epoch": 0.9420002764085318,
      "grad_norm": 0.9107710053439749,
      "learning_rate": 4.57787380162647e-08,
      "loss": 0.1105,
      "step": 10224
    },
    {
      "epoch": 0.9420924125858018,
      "grad_norm": 0.9887729337567444,
      "learning_rate": 4.5633737454129636e-08,
      "loss": 0.1222,
      "step": 10225
    },
    {
      "epoch": 0.9421845487630718,
      "grad_norm": 1.0127249337390898,
      "learning_rate": 4.548896478016651e-08,
      "loss": 0.1213,
      "step": 10226
    },
    {
      "epoch": 0.9422766849403418,
      "grad_norm": 0.8962299567936826,
      "learning_rate": 4.5344420007816526e-08,
      "loss": 0.1125,
      "step": 10227
    },
    {
      "epoch": 0.9423688211176118,
      "grad_norm": 0.8985914309284595,
      "learning_rate": 4.5200103150501996e-08,
      "loss": 0.1132,
      "step": 10228
    },
    {
      "epoch": 0.9424609572948819,
      "grad_norm": 0.8947342003365959,
      "learning_rate": 4.5056014221621645e-08,
      "loss": 0.1106,
      "step": 10229
    },
    {
      "epoch": 0.9425530934721519,
      "grad_norm": 0.9554543314067567,
      "learning_rate": 4.4912153234554777e-08,
      "loss": 0.1163,
      "step": 10230
    },
    {
      "epoch": 0.9426452296494219,
      "grad_norm": 0.8884133929587698,
      "learning_rate": 4.4768520202658484e-08,
      "loss": 0.1036,
      "step": 10231
    },
    {
      "epoch": 0.9427373658266919,
      "grad_norm": 0.9632133595908018,
      "learning_rate": 4.4625115139269314e-08,
      "loss": 0.1237,
      "step": 10232
    },
    {
      "epoch": 0.9428295020039619,
      "grad_norm": 0.9369237909664713,
      "learning_rate": 4.448193805770273e-08,
      "loss": 0.1118,
      "step": 10233
    },
    {
      "epoch": 0.9429216381812319,
      "grad_norm": 0.9721798178694152,
      "learning_rate": 4.4338988971252275e-08,
      "loss": 0.1255,
      "step": 10234
    },
    {
      "epoch": 0.9430137743585019,
      "grad_norm": 0.9855443317803402,
      "learning_rate": 4.4196267893190926e-08,
      "loss": 0.1181,
      "step": 10235
    },
    {
      "epoch": 0.9431059105357719,
      "grad_norm": 0.9723576412165987,
      "learning_rate": 4.4053774836770315e-08,
      "loss": 0.1235,
      "step": 10236
    },
    {
      "epoch": 0.9431980467130419,
      "grad_norm": 0.9171237878625916,
      "learning_rate": 4.3911509815221244e-08,
      "loss": 0.1065,
      "step": 10237
    },
    {
      "epoch": 0.9432901828903119,
      "grad_norm": 0.9440385935903521,
      "learning_rate": 4.3769472841752866e-08,
      "loss": 0.1147,
      "step": 10238
    },
    {
      "epoch": 0.9433823190675819,
      "grad_norm": 0.9451368053622421,
      "learning_rate": 4.362766392955325e-08,
      "loss": 0.1158,
      "step": 10239
    },
    {
      "epoch": 0.9434744552448519,
      "grad_norm": 0.9943988884305303,
      "learning_rate": 4.348608309178909e-08,
      "loss": 0.1229,
      "step": 10240
    },
    {
      "epoch": 0.9435665914221218,
      "grad_norm": 0.9477728019676438,
      "learning_rate": 4.33447303416068e-08,
      "loss": 0.1156,
      "step": 10241
    },
    {
      "epoch": 0.943658727599392,
      "grad_norm": 0.9560883582930023,
      "learning_rate": 4.320360569213061e-08,
      "loss": 0.1123,
      "step": 10242
    },
    {
      "epoch": 0.943750863776662,
      "grad_norm": 0.9330272969248564,
      "learning_rate": 4.3062709156463936e-08,
      "loss": 0.1157,
      "step": 10243
    },
    {
      "epoch": 0.943842999953932,
      "grad_norm": 0.9343240680576235,
      "learning_rate": 4.292204074768908e-08,
      "loss": 0.1169,
      "step": 10244
    },
    {
      "epoch": 0.943935136131202,
      "grad_norm": 0.9823045448492629,
      "learning_rate": 4.278160047886753e-08,
      "loss": 0.1266,
      "step": 10245
    },
    {
      "epoch": 0.9440272723084719,
      "grad_norm": 0.9301202110619059,
      "learning_rate": 4.264138836303861e-08,
      "loss": 0.1203,
      "step": 10246
    },
    {
      "epoch": 0.9441194084857419,
      "grad_norm": 0.9450681121818032,
      "learning_rate": 4.250140441322131e-08,
      "loss": 0.1272,
      "step": 10247
    },
    {
      "epoch": 0.9442115446630119,
      "grad_norm": 0.9737075656993364,
      "learning_rate": 4.236164864241277e-08,
      "loss": 0.1184,
      "step": 10248
    },
    {
      "epoch": 0.9443036808402819,
      "grad_norm": 0.9491703019406365,
      "learning_rate": 4.22221210635898e-08,
      "loss": 0.1153,
      "step": 10249
    },
    {
      "epoch": 0.9443958170175519,
      "grad_norm": 0.9452455622285862,
      "learning_rate": 4.208282168970762e-08,
      "loss": 0.1173,
      "step": 10250
    },
    {
      "epoch": 0.9444879531948219,
      "grad_norm": 1.0134904272797585,
      "learning_rate": 4.1943750533700036e-08,
      "loss": 0.1264,
      "step": 10251
    },
    {
      "epoch": 0.9445800893720919,
      "grad_norm": 0.9769733494426245,
      "learning_rate": 4.1804907608479494e-08,
      "loss": 0.1312,
      "step": 10252
    },
    {
      "epoch": 0.9446722255493619,
      "grad_norm": 0.9095232445090923,
      "learning_rate": 4.166629292693791e-08,
      "loss": 0.1095,
      "step": 10253
    },
    {
      "epoch": 0.9447643617266319,
      "grad_norm": 0.9253806676477634,
      "learning_rate": 4.1527906501945547e-08,
      "loss": 0.1136,
      "step": 10254
    },
    {
      "epoch": 0.9448564979039019,
      "grad_norm": 0.9756487468351606,
      "learning_rate": 4.138974834635157e-08,
      "loss": 0.123,
      "step": 10255
    },
    {
      "epoch": 0.944948634081172,
      "grad_norm": 0.9438368581540895,
      "learning_rate": 4.1251818472984315e-08,
      "loss": 0.1184,
      "step": 10256
    },
    {
      "epoch": 0.945040770258442,
      "grad_norm": 0.9009313023621,
      "learning_rate": 4.1114116894650225e-08,
      "loss": 0.1161,
      "step": 10257
    },
    {
      "epoch": 0.945132906435712,
      "grad_norm": 0.9339442841007484,
      "learning_rate": 4.0976643624134896e-08,
      "loss": 0.114,
      "step": 10258
    },
    {
      "epoch": 0.945225042612982,
      "grad_norm": 0.9994792676185544,
      "learning_rate": 4.0839398674203114e-08,
      "loss": 0.1172,
      "step": 10259
    },
    {
      "epoch": 0.945317178790252,
      "grad_norm": 0.8813178121583533,
      "learning_rate": 4.0702382057597465e-08,
      "loss": 0.1085,
      "step": 10260
    },
    {
      "epoch": 0.945409314967522,
      "grad_norm": 0.9806576557306624,
      "learning_rate": 4.0565593787040555e-08,
      "loss": 0.1229,
      "step": 10261
    },
    {
      "epoch": 0.945501451144792,
      "grad_norm": 0.9284548478345062,
      "learning_rate": 4.042903387523278e-08,
      "loss": 0.1133,
      "step": 10262
    },
    {
      "epoch": 0.945593587322062,
      "grad_norm": 0.961086375262101,
      "learning_rate": 4.029270233485427e-08,
      "loss": 0.1178,
      "step": 10263
    },
    {
      "epoch": 0.945685723499332,
      "grad_norm": 0.924194713183989,
      "learning_rate": 4.0156599178562686e-08,
      "loss": 0.1157,
      "step": 10264
    },
    {
      "epoch": 0.945777859676602,
      "grad_norm": 0.9922060533952876,
      "learning_rate": 4.002072441899568e-08,
      "loss": 0.1299,
      "step": 10265
    },
    {
      "epoch": 0.945869995853872,
      "grad_norm": 0.8994135466256734,
      "learning_rate": 3.988507806876929e-08,
      "loss": 0.1099,
      "step": 10266
    },
    {
      "epoch": 0.945962132031142,
      "grad_norm": 0.8825297317998159,
      "learning_rate": 3.974966014047815e-08,
      "loss": 0.1083,
      "step": 10267
    },
    {
      "epoch": 0.946054268208412,
      "grad_norm": 0.8886493644730223,
      "learning_rate": 3.961447064669582e-08,
      "loss": 0.1077,
      "step": 10268
    },
    {
      "epoch": 0.946146404385682,
      "grad_norm": 0.9517858277726193,
      "learning_rate": 3.9479509599974486e-08,
      "loss": 0.1205,
      "step": 10269
    },
    {
      "epoch": 0.9462385405629521,
      "grad_norm": 0.9357653566018014,
      "learning_rate": 3.9344777012845504e-08,
      "loss": 0.1142,
      "step": 10270
    },
    {
      "epoch": 0.9463306767402221,
      "grad_norm": 0.9994080080071049,
      "learning_rate": 3.921027289781915e-08,
      "loss": 0.1286,
      "step": 10271
    },
    {
      "epoch": 0.9464228129174921,
      "grad_norm": 0.9701132832326953,
      "learning_rate": 3.907599726738348e-08,
      "loss": 0.1214,
      "step": 10272
    },
    {
      "epoch": 0.9465149490947621,
      "grad_norm": 0.9475842718266658,
      "learning_rate": 3.894195013400631e-08,
      "loss": 0.1087,
      "step": 10273
    },
    {
      "epoch": 0.9466070852720321,
      "grad_norm": 0.9428359279407029,
      "learning_rate": 3.8808131510134074e-08,
      "loss": 0.1263,
      "step": 10274
    },
    {
      "epoch": 0.9466992214493021,
      "grad_norm": 0.97188586109351,
      "learning_rate": 3.8674541408191824e-08,
      "loss": 0.1213,
      "step": 10275
    },
    {
      "epoch": 0.9467913576265721,
      "grad_norm": 0.97344652490752,
      "learning_rate": 3.854117984058298e-08,
      "loss": 0.1212,
      "step": 10276
    },
    {
      "epoch": 0.9468834938038421,
      "grad_norm": 0.9359938550829311,
      "learning_rate": 3.840804681969068e-08,
      "loss": 0.1067,
      "step": 10277
    },
    {
      "epoch": 0.9469756299811121,
      "grad_norm": 0.9344899765877749,
      "learning_rate": 3.827514235787616e-08,
      "loss": 0.1217,
      "step": 10278
    },
    {
      "epoch": 0.9470677661583821,
      "grad_norm": 0.9439926374477899,
      "learning_rate": 3.8142466467479265e-08,
      "loss": 0.1105,
      "step": 10279
    },
    {
      "epoch": 0.9471599023356521,
      "grad_norm": 0.9322879507849088,
      "learning_rate": 3.801001916081987e-08,
      "loss": 0.1158,
      "step": 10280
    },
    {
      "epoch": 0.9472520385129221,
      "grad_norm": 0.935923550064033,
      "learning_rate": 3.787780045019479e-08,
      "loss": 0.119,
      "step": 10281
    },
    {
      "epoch": 0.947344174690192,
      "grad_norm": 0.9640710670440555,
      "learning_rate": 3.774581034788116e-08,
      "loss": 0.1135,
      "step": 10282
    },
    {
      "epoch": 0.9474363108674622,
      "grad_norm": 0.9311183807439256,
      "learning_rate": 3.7614048866133624e-08,
      "loss": 0.1173,
      "step": 10283
    },
    {
      "epoch": 0.9475284470447322,
      "grad_norm": 0.9739842153491166,
      "learning_rate": 3.748251601718711e-08,
      "loss": 0.1223,
      "step": 10284
    },
    {
      "epoch": 0.9476205832220022,
      "grad_norm": 0.9989638639290763,
      "learning_rate": 3.7351211813253795e-08,
      "loss": 0.1196,
      "step": 10285
    },
    {
      "epoch": 0.9477127193992722,
      "grad_norm": 0.9387016890086035,
      "learning_rate": 3.722013626652532e-08,
      "loss": 0.1178,
      "step": 10286
    },
    {
      "epoch": 0.9478048555765421,
      "grad_norm": 0.9004645700352083,
      "learning_rate": 3.70892893891725e-08,
      "loss": 0.1016,
      "step": 10287
    },
    {
      "epoch": 0.9478969917538121,
      "grad_norm": 0.9072326721273851,
      "learning_rate": 3.695867119334423e-08,
      "loss": 0.1173,
      "step": 10288
    },
    {
      "epoch": 0.9479891279310821,
      "grad_norm": 0.9778845392909062,
      "learning_rate": 3.682828169116831e-08,
      "loss": 0.1272,
      "step": 10289
    },
    {
      "epoch": 0.9480812641083521,
      "grad_norm": 0.909610617103646,
      "learning_rate": 3.669812089475144e-08,
      "loss": 0.1161,
      "step": 10290
    },
    {
      "epoch": 0.9481734002856221,
      "grad_norm": 0.9546052020757654,
      "learning_rate": 3.656818881617924e-08,
      "loss": 0.1204,
      "step": 10291
    },
    {
      "epoch": 0.9482655364628921,
      "grad_norm": 0.9205463321641423,
      "learning_rate": 3.6438485467515935e-08,
      "loss": 0.1015,
      "step": 10292
    },
    {
      "epoch": 0.9483576726401621,
      "grad_norm": 0.9848823587850997,
      "learning_rate": 3.630901086080441e-08,
      "loss": 0.1283,
      "step": 10293
    },
    {
      "epoch": 0.9484498088174321,
      "grad_norm": 0.9526839289073572,
      "learning_rate": 3.6179765008066134e-08,
      "loss": 0.117,
      "step": 10294
    },
    {
      "epoch": 0.9485419449947021,
      "grad_norm": 0.9203969240464469,
      "learning_rate": 3.605074792130181e-08,
      "loss": 0.1089,
      "step": 10295
    },
    {
      "epoch": 0.9486340811719721,
      "grad_norm": 0.9418423365790961,
      "learning_rate": 3.5921959612491006e-08,
      "loss": 0.1118,
      "step": 10296
    },
    {
      "epoch": 0.9487262173492422,
      "grad_norm": 0.9103613162527556,
      "learning_rate": 3.5793400093591394e-08,
      "loss": 0.1092,
      "step": 10297
    },
    {
      "epoch": 0.9488183535265122,
      "grad_norm": 0.9557914846405104,
      "learning_rate": 3.5665069376539796e-08,
      "loss": 0.1276,
      "step": 10298
    },
    {
      "epoch": 0.9489104897037822,
      "grad_norm": 0.9745637820171693,
      "learning_rate": 3.553696747325142e-08,
      "loss": 0.1125,
      "step": 10299
    },
    {
      "epoch": 0.9490026258810522,
      "grad_norm": 0.9457597041155954,
      "learning_rate": 3.540909439562118e-08,
      "loss": 0.1186,
      "step": 10300
    },
    {
      "epoch": 0.9490947620583222,
      "grad_norm": 1.0035411325101824,
      "learning_rate": 3.528145015552154e-08,
      "loss": 0.1309,
      "step": 10301
    },
    {
      "epoch": 0.9491868982355922,
      "grad_norm": 0.9519212745079059,
      "learning_rate": 3.515403476480439e-08,
      "loss": 0.1118,
      "step": 10302
    },
    {
      "epoch": 0.9492790344128622,
      "grad_norm": 0.9403154344488309,
      "learning_rate": 3.5026848235300834e-08,
      "loss": 0.11,
      "step": 10303
    },
    {
      "epoch": 0.9493711705901322,
      "grad_norm": 0.9136726499930174,
      "learning_rate": 3.489989057881948e-08,
      "loss": 0.1133,
      "step": 10304
    },
    {
      "epoch": 0.9494633067674022,
      "grad_norm": 0.955839488777064,
      "learning_rate": 3.47731618071484e-08,
      "loss": 0.1243,
      "step": 10305
    },
    {
      "epoch": 0.9495554429446722,
      "grad_norm": 0.964765603667002,
      "learning_rate": 3.4646661932054846e-08,
      "loss": 0.1175,
      "step": 10306
    },
    {
      "epoch": 0.9496475791219422,
      "grad_norm": 0.9055605967583641,
      "learning_rate": 3.452039096528359e-08,
      "loss": 0.1169,
      "step": 10307
    },
    {
      "epoch": 0.9497397152992122,
      "grad_norm": 0.9543169846360642,
      "learning_rate": 3.439434891855997e-08,
      "loss": 0.1117,
      "step": 10308
    },
    {
      "epoch": 0.9498318514764822,
      "grad_norm": 0.90718852701149,
      "learning_rate": 3.42685358035863e-08,
      "loss": 0.117,
      "step": 10309
    },
    {
      "epoch": 0.9499239876537523,
      "grad_norm": 0.9405814649991735,
      "learning_rate": 3.4142951632044065e-08,
      "loss": 0.1161,
      "step": 10310
    },
    {
      "epoch": 0.9500161238310223,
      "grad_norm": 1.0220816495719443,
      "learning_rate": 3.401759641559449e-08,
      "loss": 0.1268,
      "step": 10311
    },
    {
      "epoch": 0.9501082600082923,
      "grad_norm": 0.9680480102811909,
      "learning_rate": 3.3892470165876045e-08,
      "loss": 0.1143,
      "step": 10312
    },
    {
      "epoch": 0.9502003961855623,
      "grad_norm": 0.9410029223109165,
      "learning_rate": 3.376757289450777e-08,
      "loss": 0.1156,
      "step": 10313
    },
    {
      "epoch": 0.9502925323628323,
      "grad_norm": 0.8905182542520986,
      "learning_rate": 3.3642904613085393e-08,
      "loss": 0.1117,
      "step": 10314
    },
    {
      "epoch": 0.9503846685401023,
      "grad_norm": 0.9202657757695479,
      "learning_rate": 3.3518465333184925e-08,
      "loss": 0.1192,
      "step": 10315
    },
    {
      "epoch": 0.9504768047173723,
      "grad_norm": 0.914152702938313,
      "learning_rate": 3.339425506636018e-08,
      "loss": 0.1054,
      "step": 10316
    },
    {
      "epoch": 0.9505689408946423,
      "grad_norm": 0.9279714745854521,
      "learning_rate": 3.327027382414444e-08,
      "loss": 0.1131,
      "step": 10317
    },
    {
      "epoch": 0.9506610770719123,
      "grad_norm": 0.9479869182555316,
      "learning_rate": 3.314652161804932e-08,
      "loss": 0.1161,
      "step": 10318
    },
    {
      "epoch": 0.9507532132491823,
      "grad_norm": 0.954111959930808,
      "learning_rate": 3.30229984595648e-08,
      "loss": 0.1201,
      "step": 10319
    },
    {
      "epoch": 0.9508453494264523,
      "grad_norm": 0.9734374242952998,
      "learning_rate": 3.289970436016088e-08,
      "loss": 0.1221,
      "step": 10320
    },
    {
      "epoch": 0.9509374856037223,
      "grad_norm": 0.9428411208222813,
      "learning_rate": 3.2776639331284774e-08,
      "loss": 0.1144,
      "step": 10321
    },
    {
      "epoch": 0.9510296217809923,
      "grad_norm": 0.9317668049150903,
      "learning_rate": 3.2653803384362914e-08,
      "loss": 0.1181,
      "step": 10322
    },
    {
      "epoch": 0.9511217579582623,
      "grad_norm": 0.9298411302304473,
      "learning_rate": 3.253119653080117e-08,
      "loss": 0.1223,
      "step": 10323
    },
    {
      "epoch": 0.9512138941355324,
      "grad_norm": 0.9151941756140258,
      "learning_rate": 3.240881878198349e-08,
      "loss": 0.1185,
      "step": 10324
    },
    {
      "epoch": 0.9513060303128024,
      "grad_norm": 0.8593540933721838,
      "learning_rate": 3.228667014927245e-08,
      "loss": 0.1042,
      "step": 10325
    },
    {
      "epoch": 0.9513981664900724,
      "grad_norm": 0.9377788085836343,
      "learning_rate": 3.2164750644009814e-08,
      "loss": 0.1217,
      "step": 10326
    },
    {
      "epoch": 0.9514903026673424,
      "grad_norm": 0.9133361500053995,
      "learning_rate": 3.204306027751541e-08,
      "loss": 0.1116,
      "step": 10327
    },
    {
      "epoch": 0.9515824388446124,
      "grad_norm": 0.9487359846675105,
      "learning_rate": 3.1921599061088546e-08,
      "loss": 0.1249,
      "step": 10328
    },
    {
      "epoch": 0.9516745750218824,
      "grad_norm": 0.9343318497067161,
      "learning_rate": 3.180036700600686e-08,
      "loss": 0.1114,
      "step": 10329
    },
    {
      "epoch": 0.9517667111991523,
      "grad_norm": 0.9296746242117467,
      "learning_rate": 3.1679364123526625e-08,
      "loss": 0.1187,
      "step": 10330
    },
    {
      "epoch": 0.9518588473764223,
      "grad_norm": 0.9647571322981348,
      "learning_rate": 3.1558590424883294e-08,
      "loss": 0.1276,
      "step": 10331
    },
    {
      "epoch": 0.9519509835536923,
      "grad_norm": 0.9685146771017278,
      "learning_rate": 3.1438045921290404e-08,
      "loss": 0.124,
      "step": 10332
    },
    {
      "epoch": 0.9520431197309623,
      "grad_norm": 0.8476579863265571,
      "learning_rate": 3.1317730623940665e-08,
      "loss": 0.1033,
      "step": 10333
    },
    {
      "epoch": 0.9521352559082323,
      "grad_norm": 0.9532082684247339,
      "learning_rate": 3.119764454400515e-08,
      "loss": 0.1218,
      "step": 10334
    },
    {
      "epoch": 0.9522273920855023,
      "grad_norm": 0.9453757489867697,
      "learning_rate": 3.1077787692634085e-08,
      "loss": 0.121,
      "step": 10335
    },
    {
      "epoch": 0.9523195282627723,
      "grad_norm": 0.9229009382067201,
      "learning_rate": 3.095816008095637e-08,
      "loss": 0.1149,
      "step": 10336
    },
    {
      "epoch": 0.9524116644400424,
      "grad_norm": 1.0206879719212576,
      "learning_rate": 3.083876172007894e-08,
      "loss": 0.122,
      "step": 10337
    },
    {
      "epoch": 0.9525038006173124,
      "grad_norm": 0.8983665500372922,
      "learning_rate": 3.071959262108848e-08,
      "loss": 0.1063,
      "step": 10338
    },
    {
      "epoch": 0.9525959367945824,
      "grad_norm": 0.9479813529207537,
      "learning_rate": 3.0600652795049204e-08,
      "loss": 0.112,
      "step": 10339
    },
    {
      "epoch": 0.9526880729718524,
      "grad_norm": 0.9177433261172819,
      "learning_rate": 3.048194225300532e-08,
      "loss": 0.112,
      "step": 10340
    },
    {
      "epoch": 0.9527802091491224,
      "grad_norm": 0.9370376744228802,
      "learning_rate": 3.0363461005978865e-08,
      "loss": 0.1103,
      "step": 10341
    },
    {
      "epoch": 0.9528723453263924,
      "grad_norm": 0.9060338446923794,
      "learning_rate": 3.024520906497103e-08,
      "loss": 0.1088,
      "step": 10342
    },
    {
      "epoch": 0.9529644815036624,
      "grad_norm": 0.9151253682669213,
      "learning_rate": 3.012718644096107e-08,
      "loss": 0.116,
      "step": 10343
    },
    {
      "epoch": 0.9530566176809324,
      "grad_norm": 0.9662487060386523,
      "learning_rate": 3.0009393144907475e-08,
      "loss": 0.1025,
      "step": 10344
    },
    {
      "epoch": 0.9531487538582024,
      "grad_norm": 0.9507650484823077,
      "learning_rate": 2.989182918774786e-08,
      "loss": 0.1208,
      "step": 10345
    },
    {
      "epoch": 0.9532408900354724,
      "grad_norm": 1.045514593246912,
      "learning_rate": 2.977449458039766e-08,
      "loss": 0.1339,
      "step": 10346
    },
    {
      "epoch": 0.9533330262127424,
      "grad_norm": 0.9427860617410642,
      "learning_rate": 2.9657389333751784e-08,
      "loss": 0.1154,
      "step": 10347
    },
    {
      "epoch": 0.9534251623900124,
      "grad_norm": 0.9225913930801135,
      "learning_rate": 2.954051345868264e-08,
      "loss": 0.1031,
      "step": 10348
    },
    {
      "epoch": 0.9535172985672824,
      "grad_norm": 0.9038936333486661,
      "learning_rate": 2.9423866966042935e-08,
      "loss": 0.1116,
      "step": 10349
    },
    {
      "epoch": 0.9536094347445524,
      "grad_norm": 0.9543339656289974,
      "learning_rate": 2.9307449866663174e-08,
      "loss": 0.1174,
      "step": 10350
    },
    {
      "epoch": 0.9537015709218225,
      "grad_norm": 0.9056991309017217,
      "learning_rate": 2.9191262171352486e-08,
      "loss": 0.1111,
      "step": 10351
    },
    {
      "epoch": 0.9537937070990925,
      "grad_norm": 0.9861185880449297,
      "learning_rate": 2.9075303890899187e-08,
      "loss": 0.1282,
      "step": 10352
    },
    {
      "epoch": 0.9538858432763625,
      "grad_norm": 0.9621592809326136,
      "learning_rate": 2.895957503606939e-08,
      "loss": 0.1274,
      "step": 10353
    },
    {
      "epoch": 0.9539779794536325,
      "grad_norm": 0.8922039949226299,
      "learning_rate": 2.8844075617609492e-08,
      "loss": 0.1105,
      "step": 10354
    },
    {
      "epoch": 0.9540701156309025,
      "grad_norm": 0.9228402469930214,
      "learning_rate": 2.8728805646242863e-08,
      "loss": 0.1128,
      "step": 10355
    },
    {
      "epoch": 0.9541622518081725,
      "grad_norm": 0.9617168790205287,
      "learning_rate": 2.8613765132672612e-08,
      "loss": 0.1128,
      "step": 10356
    },
    {
      "epoch": 0.9542543879854425,
      "grad_norm": 0.8580565446966045,
      "learning_rate": 2.8498954087580187e-08,
      "loss": 0.0943,
      "step": 10357
    },
    {
      "epoch": 0.9543465241627125,
      "grad_norm": 0.9520942535427239,
      "learning_rate": 2.8384372521626236e-08,
      "loss": 0.1172,
      "step": 10358
    },
    {
      "epoch": 0.9544386603399825,
      "grad_norm": 0.9365343890221104,
      "learning_rate": 2.827002044544891e-08,
      "loss": 0.1148,
      "step": 10359
    },
    {
      "epoch": 0.9545307965172525,
      "grad_norm": 0.933691258317069,
      "learning_rate": 2.8155897869666105e-08,
      "loss": 0.1206,
      "step": 10360
    },
    {
      "epoch": 0.9546229326945225,
      "grad_norm": 0.9104238953866819,
      "learning_rate": 2.8042004804874346e-08,
      "loss": 0.1116,
      "step": 10361
    },
    {
      "epoch": 0.9547150688717925,
      "grad_norm": 0.8681396545723196,
      "learning_rate": 2.7928341261648507e-08,
      "loss": 0.1018,
      "step": 10362
    },
    {
      "epoch": 0.9548072050490625,
      "grad_norm": 0.9477791985994918,
      "learning_rate": 2.7814907250542368e-08,
      "loss": 0.1133,
      "step": 10363
    },
    {
      "epoch": 0.9548993412263325,
      "grad_norm": 0.9830223390489866,
      "learning_rate": 2.770170278208806e-08,
      "loss": 0.1211,
      "step": 10364
    },
    {
      "epoch": 0.9549914774036026,
      "grad_norm": 0.9651177678762349,
      "learning_rate": 2.7588727866796617e-08,
      "loss": 0.1193,
      "step": 10365
    },
    {
      "epoch": 0.9550836135808726,
      "grad_norm": 0.9231632131169901,
      "learning_rate": 2.7475982515157986e-08,
      "loss": 0.108,
      "step": 10366
    },
    {
      "epoch": 0.9551757497581426,
      "grad_norm": 0.9546876190711204,
      "learning_rate": 2.7363466737640453e-08,
      "loss": 0.1126,
      "step": 10367
    },
    {
      "epoch": 0.9552678859354126,
      "grad_norm": 0.9264536078414163,
      "learning_rate": 2.7251180544691225e-08,
      "loss": 0.1166,
      "step": 10368
    },
    {
      "epoch": 0.9553600221126826,
      "grad_norm": 0.948299270414955,
      "learning_rate": 2.7139123946735847e-08,
      "loss": 0.1229,
      "step": 10369
    },
    {
      "epoch": 0.9554521582899526,
      "grad_norm": 0.9773410646908133,
      "learning_rate": 2.7027296954178773e-08,
      "loss": 0.1169,
      "step": 10370
    },
    {
      "epoch": 0.9555442944672226,
      "grad_norm": 0.9411232679657067,
      "learning_rate": 2.6915699577403644e-08,
      "loss": 0.1119,
      "step": 10371
    },
    {
      "epoch": 0.9556364306444926,
      "grad_norm": 0.9173845585128807,
      "learning_rate": 2.680433182677189e-08,
      "loss": 0.1151,
      "step": 10372
    },
    {
      "epoch": 0.9557285668217625,
      "grad_norm": 0.9916694029075123,
      "learning_rate": 2.6693193712624133e-08,
      "loss": 0.1183,
      "step": 10373
    },
    {
      "epoch": 0.9558207029990325,
      "grad_norm": 0.8983875748435834,
      "learning_rate": 2.6582285245279338e-08,
      "loss": 0.1121,
      "step": 10374
    },
    {
      "epoch": 0.9559128391763025,
      "grad_norm": 0.916895182755261,
      "learning_rate": 2.6471606435035934e-08,
      "loss": 0.1075,
      "step": 10375
    },
    {
      "epoch": 0.9560049753535725,
      "grad_norm": 0.9347044640622088,
      "learning_rate": 2.6361157292169593e-08,
      "loss": 0.1195,
      "step": 10376
    },
    {
      "epoch": 0.9560971115308425,
      "grad_norm": 0.9614171099198148,
      "learning_rate": 2.6250937826936274e-08,
      "loss": 0.12,
      "step": 10377
    },
    {
      "epoch": 0.9561892477081126,
      "grad_norm": 0.891382660362729,
      "learning_rate": 2.6140948049569737e-08,
      "loss": 0.111,
      "step": 10378
    },
    {
      "epoch": 0.9562813838853826,
      "grad_norm": 0.9757278767219288,
      "learning_rate": 2.603118797028209e-08,
      "loss": 0.1256,
      "step": 10379
    },
    {
      "epoch": 0.9563735200626526,
      "grad_norm": 0.9466608595122338,
      "learning_rate": 2.592165759926518e-08,
      "loss": 0.1157,
      "step": 10380
    },
    {
      "epoch": 0.9564656562399226,
      "grad_norm": 0.9383107114478125,
      "learning_rate": 2.5812356946688376e-08,
      "loss": 0.1102,
      "step": 10381
    },
    {
      "epoch": 0.9565577924171926,
      "grad_norm": 0.9982385805871572,
      "learning_rate": 2.5703286022700503e-08,
      "loss": 0.1209,
      "step": 10382
    },
    {
      "epoch": 0.9566499285944626,
      "grad_norm": 0.9727363491362462,
      "learning_rate": 2.559444483742901e-08,
      "loss": 0.122,
      "step": 10383
    },
    {
      "epoch": 0.9567420647717326,
      "grad_norm": 0.9155845511468633,
      "learning_rate": 2.548583340097971e-08,
      "loss": 0.1099,
      "step": 10384
    },
    {
      "epoch": 0.9568342009490026,
      "grad_norm": 0.9366488234735828,
      "learning_rate": 2.5377451723436753e-08,
      "loss": 0.1122,
      "step": 10385
    },
    {
      "epoch": 0.9569263371262726,
      "grad_norm": 0.9856402469594859,
      "learning_rate": 2.5269299814863756e-08,
      "loss": 0.125,
      "step": 10386
    },
    {
      "epoch": 0.9570184733035426,
      "grad_norm": 0.9558864294357874,
      "learning_rate": 2.5161377685302968e-08,
      "loss": 0.1283,
      "step": 10387
    },
    {
      "epoch": 0.9571106094808126,
      "grad_norm": 0.9764377825032131,
      "learning_rate": 2.505368534477415e-08,
      "loss": 0.1199,
      "step": 10388
    },
    {
      "epoch": 0.9572027456580826,
      "grad_norm": 0.8978698039692437,
      "learning_rate": 2.4946222803277354e-08,
      "loss": 0.0986,
      "step": 10389
    },
    {
      "epoch": 0.9572948818353526,
      "grad_norm": 0.9801110295191998,
      "learning_rate": 2.483899007078988e-08,
      "loss": 0.1306,
      "step": 10390
    },
    {
      "epoch": 0.9573870180126226,
      "grad_norm": 0.9985048936130674,
      "learning_rate": 2.4731987157268768e-08,
      "loss": 0.1247,
      "step": 10391
    },
    {
      "epoch": 0.9574791541898927,
      "grad_norm": 0.9170543020639506,
      "learning_rate": 2.462521407264912e-08,
      "loss": 0.1122,
      "step": 10392
    },
    {
      "epoch": 0.9575712903671627,
      "grad_norm": 0.9342437829032304,
      "learning_rate": 2.4518670826844393e-08,
      "loss": 0.1129,
      "step": 10393
    },
    {
      "epoch": 0.9576634265444327,
      "grad_norm": 0.9286150111365903,
      "learning_rate": 2.4412357429747514e-08,
      "loss": 0.1156,
      "step": 10394
    },
    {
      "epoch": 0.9577555627217027,
      "grad_norm": 0.9298276602062225,
      "learning_rate": 2.4306273891230025e-08,
      "loss": 0.1104,
      "step": 10395
    },
    {
      "epoch": 0.9578476988989727,
      "grad_norm": 1.0298146827458847,
      "learning_rate": 2.4200420221141274e-08,
      "loss": 0.1333,
      "step": 10396
    },
    {
      "epoch": 0.9579398350762427,
      "grad_norm": 1.0308681272587117,
      "learning_rate": 2.4094796429310063e-08,
      "loss": 0.1398,
      "step": 10397
    },
    {
      "epoch": 0.9580319712535127,
      "grad_norm": 0.9327133822677386,
      "learning_rate": 2.398940252554327e-08,
      "loss": 0.1067,
      "step": 10398
    },
    {
      "epoch": 0.9581241074307827,
      "grad_norm": 0.9462850181389516,
      "learning_rate": 2.3884238519626957e-08,
      "loss": 0.1124,
      "step": 10399
    },
    {
      "epoch": 0.9582162436080527,
      "grad_norm": 0.9239641699815608,
      "learning_rate": 2.3779304421325532e-08,
      "loss": 0.1116,
      "step": 10400
    },
    {
      "epoch": 0.9583083797853227,
      "grad_norm": 0.9651337343147548,
      "learning_rate": 2.3674600240382594e-08,
      "loss": 0.1218,
      "step": 10401
    },
    {
      "epoch": 0.9584005159625927,
      "grad_norm": 0.9622370171729949,
      "learning_rate": 2.3570125986518977e-08,
      "loss": 0.1234,
      "step": 10402
    },
    {
      "epoch": 0.9584926521398627,
      "grad_norm": 0.9408894748477682,
      "learning_rate": 2.346588166943581e-08,
      "loss": 0.115,
      "step": 10403
    },
    {
      "epoch": 0.9585847883171327,
      "grad_norm": 0.9783764871100286,
      "learning_rate": 2.336186729881229e-08,
      "loss": 0.1194,
      "step": 10404
    },
    {
      "epoch": 0.9586769244944028,
      "grad_norm": 0.9756132049009393,
      "learning_rate": 2.32580828843057e-08,
      "loss": 0.1186,
      "step": 10405
    },
    {
      "epoch": 0.9587690606716728,
      "grad_norm": 0.9510515294492892,
      "learning_rate": 2.3154528435553046e-08,
      "loss": 0.1092,
      "step": 10406
    },
    {
      "epoch": 0.9588611968489428,
      "grad_norm": 0.8845538346516818,
      "learning_rate": 2.3051203962168588e-08,
      "loss": 0.0984,
      "step": 10407
    },
    {
      "epoch": 0.9589533330262128,
      "grad_norm": 0.9793169380312722,
      "learning_rate": 2.2948109473746593e-08,
      "loss": 0.1201,
      "step": 10408
    },
    {
      "epoch": 0.9590454692034828,
      "grad_norm": 0.9673626523788653,
      "learning_rate": 2.2845244979859127e-08,
      "loss": 0.1207,
      "step": 10409
    },
    {
      "epoch": 0.9591376053807528,
      "grad_norm": 0.9894712915426592,
      "learning_rate": 2.274261049005716e-08,
      "loss": 0.1243,
      "step": 10410
    },
    {
      "epoch": 0.9592297415580228,
      "grad_norm": 0.9445587376170052,
      "learning_rate": 2.264020601387057e-08,
      "loss": 0.1186,
      "step": 10411
    },
    {
      "epoch": 0.9593218777352928,
      "grad_norm": 0.9923210445189716,
      "learning_rate": 2.2538031560807584e-08,
      "loss": 0.1362,
      "step": 10412
    },
    {
      "epoch": 0.9594140139125628,
      "grad_norm": 0.9947331667454813,
      "learning_rate": 2.243608714035478e-08,
      "loss": 0.1276,
      "step": 10413
    },
    {
      "epoch": 0.9595061500898328,
      "grad_norm": 0.9192672464588488,
      "learning_rate": 2.2334372761977918e-08,
      "loss": 0.1123,
      "step": 10414
    },
    {
      "epoch": 0.9595982862671028,
      "grad_norm": 0.9176602052717612,
      "learning_rate": 2.2232888435121115e-08,
      "loss": 0.1082,
      "step": 10415
    },
    {
      "epoch": 0.9596904224443727,
      "grad_norm": 0.8490744571811496,
      "learning_rate": 2.213163416920766e-08,
      "loss": 0.1001,
      "step": 10416
    },
    {
      "epoch": 0.9597825586216427,
      "grad_norm": 1.0246092310280255,
      "learning_rate": 2.203060997363837e-08,
      "loss": 0.1357,
      "step": 10417
    },
    {
      "epoch": 0.9598746947989127,
      "grad_norm": 0.9870790553638401,
      "learning_rate": 2.1929815857793802e-08,
      "loss": 0.1232,
      "step": 10418
    },
    {
      "epoch": 0.9599668309761828,
      "grad_norm": 0.8683466030495433,
      "learning_rate": 2.1829251831032293e-08,
      "loss": 0.1048,
      "step": 10419
    },
    {
      "epoch": 0.9600589671534528,
      "grad_norm": 0.9473957011467433,
      "learning_rate": 2.172891790269166e-08,
      "loss": 0.1191,
      "step": 10420
    },
    {
      "epoch": 0.9601511033307228,
      "grad_norm": 0.9543960166674035,
      "learning_rate": 2.1628814082087503e-08,
      "loss": 0.1059,
      "step": 10421
    },
    {
      "epoch": 0.9602432395079928,
      "grad_norm": 0.9508773504440561,
      "learning_rate": 2.1528940378514885e-08,
      "loss": 0.1162,
      "step": 10422
    },
    {
      "epoch": 0.9603353756852628,
      "grad_norm": 0.9490154248407026,
      "learning_rate": 2.142929680124667e-08,
      "loss": 0.1193,
      "step": 10423
    },
    {
      "epoch": 0.9604275118625328,
      "grad_norm": 0.9539270143381033,
      "learning_rate": 2.1329883359535174e-08,
      "loss": 0.1224,
      "step": 10424
    },
    {
      "epoch": 0.9605196480398028,
      "grad_norm": 0.9445515396370072,
      "learning_rate": 2.12307000626108e-08,
      "loss": 0.1116,
      "step": 10425
    },
    {
      "epoch": 0.9606117842170728,
      "grad_norm": 0.9336698772896715,
      "learning_rate": 2.113174691968256e-08,
      "loss": 0.1222,
      "step": 10426
    },
    {
      "epoch": 0.9607039203943428,
      "grad_norm": 0.9736147739548708,
      "learning_rate": 2.103302393993867e-08,
      "loss": 0.1215,
      "step": 10427
    },
    {
      "epoch": 0.9607960565716128,
      "grad_norm": 0.9610154886959913,
      "learning_rate": 2.0934531132544845e-08,
      "loss": 0.1119,
      "step": 10428
    },
    {
      "epoch": 0.9608881927488828,
      "grad_norm": 0.9233255796310752,
      "learning_rate": 2.0836268506647108e-08,
      "loss": 0.1159,
      "step": 10429
    },
    {
      "epoch": 0.9609803289261528,
      "grad_norm": 1.0019170763885927,
      "learning_rate": 2.0738236071368157e-08,
      "loss": 0.1267,
      "step": 10430
    },
    {
      "epoch": 0.9610724651034228,
      "grad_norm": 0.9475199883723734,
      "learning_rate": 2.0640433835810992e-08,
      "loss": 0.124,
      "step": 10431
    },
    {
      "epoch": 0.9611646012806928,
      "grad_norm": 0.9505982590713935,
      "learning_rate": 2.0542861809056403e-08,
      "loss": 0.1195,
      "step": 10432
    },
    {
      "epoch": 0.9612567374579629,
      "grad_norm": 0.9229846484513212,
      "learning_rate": 2.044552000016409e-08,
      "loss": 0.1221,
      "step": 10433
    },
    {
      "epoch": 0.9613488736352329,
      "grad_norm": 1.0038863547016854,
      "learning_rate": 2.0348408418172095e-08,
      "loss": 0.1264,
      "step": 10434
    },
    {
      "epoch": 0.9614410098125029,
      "grad_norm": 0.9560645257488093,
      "learning_rate": 2.025152707209682e-08,
      "loss": 0.1254,
      "step": 10435
    },
    {
      "epoch": 0.9615331459897729,
      "grad_norm": 0.9731366034584815,
      "learning_rate": 2.0154875970934406e-08,
      "loss": 0.1203,
      "step": 10436
    },
    {
      "epoch": 0.9616252821670429,
      "grad_norm": 0.87193348832153,
      "learning_rate": 2.0058455123658783e-08,
      "loss": 0.1034,
      "step": 10437
    },
    {
      "epoch": 0.9617174183443129,
      "grad_norm": 0.9036349774099822,
      "learning_rate": 1.996226453922251e-08,
      "loss": 0.1133,
      "step": 10438
    },
    {
      "epoch": 0.9618095545215829,
      "grad_norm": 0.9620815398911813,
      "learning_rate": 1.98663042265565e-08,
      "loss": 0.122,
      "step": 10439
    },
    {
      "epoch": 0.9619016906988529,
      "grad_norm": 0.9346474041117219,
      "learning_rate": 1.97705741945714e-08,
      "loss": 0.1164,
      "step": 10440
    },
    {
      "epoch": 0.9619938268761229,
      "grad_norm": 0.9400603989396563,
      "learning_rate": 1.9675074452155385e-08,
      "loss": 0.122,
      "step": 10441
    },
    {
      "epoch": 0.9620859630533929,
      "grad_norm": 0.9234460209107835,
      "learning_rate": 1.9579805008175524e-08,
      "loss": 0.1125,
      "step": 10442
    },
    {
      "epoch": 0.9621780992306629,
      "grad_norm": 0.9414505506791186,
      "learning_rate": 1.9484765871477795e-08,
      "loss": 0.124,
      "step": 10443
    },
    {
      "epoch": 0.9622702354079329,
      "grad_norm": 1.0014286121077527,
      "learning_rate": 1.9389957050886255e-08,
      "loss": 0.1299,
      "step": 10444
    },
    {
      "epoch": 0.9623623715852029,
      "grad_norm": 0.9142956574674312,
      "learning_rate": 1.9295378555204692e-08,
      "loss": 0.1108,
      "step": 10445
    },
    {
      "epoch": 0.962454507762473,
      "grad_norm": 0.8926286890511982,
      "learning_rate": 1.920103039321386e-08,
      "loss": 0.1038,
      "step": 10446
    },
    {
      "epoch": 0.962546643939743,
      "grad_norm": 0.9334442791983129,
      "learning_rate": 1.910691257367425e-08,
      "loss": 0.1185,
      "step": 10447
    },
    {
      "epoch": 0.962638780117013,
      "grad_norm": 0.9226218209835843,
      "learning_rate": 1.9013025105324988e-08,
      "loss": 0.1156,
      "step": 10448
    },
    {
      "epoch": 0.962730916294283,
      "grad_norm": 1.0008713216068006,
      "learning_rate": 1.8919367996883263e-08,
      "loss": 0.1358,
      "step": 10449
    },
    {
      "epoch": 0.962823052471553,
      "grad_norm": 0.9420362549155533,
      "learning_rate": 1.8825941257045178e-08,
      "loss": 0.1215,
      "step": 10450
    },
    {
      "epoch": 0.962915188648823,
      "grad_norm": 0.9479544981846614,
      "learning_rate": 1.8732744894485732e-08,
      "loss": 0.1147,
      "step": 10451
    },
    {
      "epoch": 0.963007324826093,
      "grad_norm": 0.911141437496614,
      "learning_rate": 1.8639778917857732e-08,
      "loss": 0.1154,
      "step": 10452
    },
    {
      "epoch": 0.963099461003363,
      "grad_norm": 0.9618422043315887,
      "learning_rate": 1.8547043335793435e-08,
      "loss": 0.1207,
      "step": 10453
    },
    {
      "epoch": 0.963191597180633,
      "grad_norm": 0.9380728000761778,
      "learning_rate": 1.845453815690318e-08,
      "loss": 0.1165,
      "step": 10454
    },
    {
      "epoch": 0.963283733357903,
      "grad_norm": 0.9173483292287571,
      "learning_rate": 1.8362263389775926e-08,
      "loss": 0.1158,
      "step": 10455
    },
    {
      "epoch": 0.963375869535173,
      "grad_norm": 0.9578019512184618,
      "learning_rate": 1.827021904297982e-08,
      "loss": 0.1068,
      "step": 10456
    },
    {
      "epoch": 0.963468005712443,
      "grad_norm": 0.9760195511875756,
      "learning_rate": 1.8178405125060804e-08,
      "loss": 0.1254,
      "step": 10457
    },
    {
      "epoch": 0.963560141889713,
      "grad_norm": 0.9428947080982596,
      "learning_rate": 1.8086821644544283e-08,
      "loss": 0.1093,
      "step": 10458
    },
    {
      "epoch": 0.963652278066983,
      "grad_norm": 0.952402275184984,
      "learning_rate": 1.7995468609933176e-08,
      "loss": 0.1197,
      "step": 10459
    },
    {
      "epoch": 0.9637444142442531,
      "grad_norm": 0.9459210733717505,
      "learning_rate": 1.790434602971014e-08,
      "loss": 0.1174,
      "step": 10460
    },
    {
      "epoch": 0.963836550421523,
      "grad_norm": 0.9453907795866404,
      "learning_rate": 1.7813453912335354e-08,
      "loss": 0.1207,
      "step": 10461
    },
    {
      "epoch": 0.963928686598793,
      "grad_norm": 0.9770369213173757,
      "learning_rate": 1.772279226624901e-08,
      "loss": 0.1189,
      "step": 10462
    },
    {
      "epoch": 0.964020822776063,
      "grad_norm": 0.9195107982697946,
      "learning_rate": 1.7632361099867988e-08,
      "loss": 0.1148,
      "step": 10463
    },
    {
      "epoch": 0.964112958953333,
      "grad_norm": 0.9331915458684126,
      "learning_rate": 1.7542160421590017e-08,
      "loss": 0.1204,
      "step": 10464
    },
    {
      "epoch": 0.964205095130603,
      "grad_norm": 0.8800809838568525,
      "learning_rate": 1.7452190239789225e-08,
      "loss": 0.1125,
      "step": 10465
    },
    {
      "epoch": 0.964297231307873,
      "grad_norm": 0.932302653775477,
      "learning_rate": 1.7362450562819765e-08,
      "loss": 0.1112,
      "step": 10466
    },
    {
      "epoch": 0.964389367485143,
      "grad_norm": 0.9339782759306302,
      "learning_rate": 1.7272941399013865e-08,
      "loss": 0.1203,
      "step": 10467
    },
    {
      "epoch": 0.964481503662413,
      "grad_norm": 0.9185719997260997,
      "learning_rate": 1.718366275668265e-08,
      "loss": 0.1135,
      "step": 10468
    },
    {
      "epoch": 0.964573639839683,
      "grad_norm": 0.947190121034125,
      "learning_rate": 1.7094614644115605e-08,
      "loss": 0.1202,
      "step": 10469
    },
    {
      "epoch": 0.964665776016953,
      "grad_norm": 0.9272255443211805,
      "learning_rate": 1.700579706958083e-08,
      "loss": 0.1116,
      "step": 10470
    },
    {
      "epoch": 0.964757912194223,
      "grad_norm": 0.9462412750065758,
      "learning_rate": 1.6917210041325073e-08,
      "loss": 0.1239,
      "step": 10471
    },
    {
      "epoch": 0.964850048371493,
      "grad_norm": 0.8873810871132549,
      "learning_rate": 1.6828853567573413e-08,
      "loss": 0.1103,
      "step": 10472
    },
    {
      "epoch": 0.9649421845487631,
      "grad_norm": 0.9566149931244136,
      "learning_rate": 1.6740727656529844e-08,
      "loss": 0.1229,
      "step": 10473
    },
    {
      "epoch": 0.9650343207260331,
      "grad_norm": 0.8996264087378854,
      "learning_rate": 1.6652832316377264e-08,
      "loss": 0.1136,
      "step": 10474
    },
    {
      "epoch": 0.9651264569033031,
      "grad_norm": 0.9032876556833013,
      "learning_rate": 1.6565167555276373e-08,
      "loss": 0.108,
      "step": 10475
    },
    {
      "epoch": 0.9652185930805731,
      "grad_norm": 0.9747145064783178,
      "learning_rate": 1.6477733381367043e-08,
      "loss": 0.1172,
      "step": 10476
    },
    {
      "epoch": 0.9653107292578431,
      "grad_norm": 0.955471389790214,
      "learning_rate": 1.639052980276723e-08,
      "loss": 0.1171,
      "step": 10477
    },
    {
      "epoch": 0.9654028654351131,
      "grad_norm": 0.9422341858068135,
      "learning_rate": 1.6303556827574062e-08,
      "loss": 0.1103,
      "step": 10478
    },
    {
      "epoch": 0.9654950016123831,
      "grad_norm": 0.9833514913519336,
      "learning_rate": 1.6216814463863028e-08,
      "loss": 0.1112,
      "step": 10479
    },
    {
      "epoch": 0.9655871377896531,
      "grad_norm": 0.9459532688328239,
      "learning_rate": 1.6130302719687962e-08,
      "loss": 0.1148,
      "step": 10480
    },
    {
      "epoch": 0.9656792739669231,
      "grad_norm": 0.9261751377453326,
      "learning_rate": 1.6044021603081607e-08,
      "loss": 0.1188,
      "step": 10481
    },
    {
      "epoch": 0.9657714101441931,
      "grad_norm": 0.9328018965578682,
      "learning_rate": 1.5957971122055327e-08,
      "loss": 0.1224,
      "step": 10482
    },
    {
      "epoch": 0.9658635463214631,
      "grad_norm": 0.9127921231957702,
      "learning_rate": 1.5872151284598848e-08,
      "loss": 0.1106,
      "step": 10483
    },
    {
      "epoch": 0.9659556824987331,
      "grad_norm": 0.9700118414319213,
      "learning_rate": 1.5786562098680235e-08,
      "loss": 0.1217,
      "step": 10484
    },
    {
      "epoch": 0.9660478186760031,
      "grad_norm": 0.9464480098243716,
      "learning_rate": 1.570120357224647e-08,
      "loss": 0.1133,
      "step": 10485
    },
    {
      "epoch": 0.9661399548532731,
      "grad_norm": 0.9112094496345272,
      "learning_rate": 1.561607571322371e-08,
      "loss": 0.1093,
      "step": 10486
    },
    {
      "epoch": 0.9662320910305432,
      "grad_norm": 0.9105593590010037,
      "learning_rate": 1.5531178529515635e-08,
      "loss": 0.1097,
      "step": 10487
    },
    {
      "epoch": 0.9663242272078132,
      "grad_norm": 0.9055822997245453,
      "learning_rate": 1.54465120290051e-08,
      "loss": 0.1054,
      "step": 10488
    },
    {
      "epoch": 0.9664163633850832,
      "grad_norm": 0.9365050243180014,
      "learning_rate": 1.5362076219553048e-08,
      "loss": 0.1167,
      "step": 10489
    },
    {
      "epoch": 0.9665084995623532,
      "grad_norm": 0.9324783994074396,
      "learning_rate": 1.5277871108999586e-08,
      "loss": 0.1196,
      "step": 10490
    },
    {
      "epoch": 0.9666006357396232,
      "grad_norm": 0.9687391645872897,
      "learning_rate": 1.519389670516347e-08,
      "loss": 0.1264,
      "step": 10491
    },
    {
      "epoch": 0.9666927719168932,
      "grad_norm": 0.9326297055675994,
      "learning_rate": 1.511015301584151e-08,
      "loss": 0.1155,
      "step": 10492
    },
    {
      "epoch": 0.9667849080941632,
      "grad_norm": 0.9116280800410203,
      "learning_rate": 1.502664004880888e-08,
      "loss": 0.1104,
      "step": 10493
    },
    {
      "epoch": 0.9668770442714332,
      "grad_norm": 0.9768260070146615,
      "learning_rate": 1.4943357811820492e-08,
      "loss": 0.1179,
      "step": 10494
    },
    {
      "epoch": 0.9669691804487032,
      "grad_norm": 1.0156559451484888,
      "learning_rate": 1.4860306312608762e-08,
      "loss": 0.1246,
      "step": 10495
    },
    {
      "epoch": 0.9670613166259732,
      "grad_norm": 0.9438961994256015,
      "learning_rate": 1.4777485558884753e-08,
      "loss": 0.1193,
      "step": 10496
    },
    {
      "epoch": 0.9671534528032432,
      "grad_norm": 0.9865514247200415,
      "learning_rate": 1.4694895558338972e-08,
      "loss": 0.1222,
      "step": 10497
    },
    {
      "epoch": 0.9672455889805132,
      "grad_norm": 0.9609002911659604,
      "learning_rate": 1.4612536318639459e-08,
      "loss": 0.1188,
      "step": 10498
    },
    {
      "epoch": 0.9673377251577832,
      "grad_norm": 0.9682466775746156,
      "learning_rate": 1.4530407847433702e-08,
      "loss": 0.1288,
      "step": 10499
    },
    {
      "epoch": 0.9674298613350532,
      "grad_norm": 0.9339595766099066,
      "learning_rate": 1.4448510152346717e-08,
      "loss": 0.1133,
      "step": 10500
    },
    {
      "epoch": 0.9674298613350532,
      "eval_loss": 0.11658257246017456,
      "eval_runtime": 300.5591,
      "eval_samples_per_second": 23.346,
      "eval_steps_per_second": 2.921,
      "step": 10500
    },
    {
      "epoch": 0.9675219975123233,
      "grad_norm": 0.9331219890938893,
      "learning_rate": 1.4366843240982975e-08,
      "loss": 0.1149,
      "step": 10501
    },
    {
      "epoch": 0.9676141336895933,
      "grad_norm": 0.9234308420470323,
      "learning_rate": 1.4285407120925854e-08,
      "loss": 0.1203,
      "step": 10502
    },
    {
      "epoch": 0.9677062698668633,
      "grad_norm": 0.9100047418427226,
      "learning_rate": 1.4204201799735973e-08,
      "loss": 0.1048,
      "step": 10503
    },
    {
      "epoch": 0.9677984060441333,
      "grad_norm": 0.9445857702495147,
      "learning_rate": 1.412322728495341e-08,
      "loss": 0.1076,
      "step": 10504
    },
    {
      "epoch": 0.9678905422214032,
      "grad_norm": 0.9000245874971252,
      "learning_rate": 1.40424835840966e-08,
      "loss": 0.1125,
      "step": 10505
    },
    {
      "epoch": 0.9679826783986732,
      "grad_norm": 0.9193210702366165,
      "learning_rate": 1.3961970704662875e-08,
      "loss": 0.1061,
      "step": 10506
    },
    {
      "epoch": 0.9680748145759432,
      "grad_norm": 0.876365817619803,
      "learning_rate": 1.3881688654127645e-08,
      "loss": 0.1051,
      "step": 10507
    },
    {
      "epoch": 0.9681669507532132,
      "grad_norm": 0.9577008365137175,
      "learning_rate": 1.3801637439945225e-08,
      "loss": 0.1203,
      "step": 10508
    },
    {
      "epoch": 0.9682590869304832,
      "grad_norm": 0.9348907292875536,
      "learning_rate": 1.3721817069548282e-08,
      "loss": 0.1178,
      "step": 10509
    },
    {
      "epoch": 0.9683512231077532,
      "grad_norm": 0.9456956045310755,
      "learning_rate": 1.3642227550348387e-08,
      "loss": 0.1199,
      "step": 10510
    },
    {
      "epoch": 0.9684433592850232,
      "grad_norm": 0.9169617118871369,
      "learning_rate": 1.3562868889735182e-08,
      "loss": 0.104,
      "step": 10511
    },
    {
      "epoch": 0.9685354954622932,
      "grad_norm": 0.9437229497003773,
      "learning_rate": 1.348374109507694e-08,
      "loss": 0.1223,
      "step": 10512
    },
    {
      "epoch": 0.9686276316395632,
      "grad_norm": 0.980640074035574,
      "learning_rate": 1.3404844173721398e-08,
      "loss": 0.1183,
      "step": 10513
    },
    {
      "epoch": 0.9687197678168333,
      "grad_norm": 0.9759068217403593,
      "learning_rate": 1.332617813299325e-08,
      "loss": 0.1234,
      "step": 10514
    },
    {
      "epoch": 0.9688119039941033,
      "grad_norm": 0.9122055207582814,
      "learning_rate": 1.324774298019721e-08,
      "loss": 0.1115,
      "step": 10515
    },
    {
      "epoch": 0.9689040401713733,
      "grad_norm": 0.9770911885826764,
      "learning_rate": 1.316953872261606e-08,
      "loss": 0.1205,
      "step": 10516
    },
    {
      "epoch": 0.9689961763486433,
      "grad_norm": 0.9367724440014724,
      "learning_rate": 1.3091565367510661e-08,
      "loss": 0.1111,
      "step": 10517
    },
    {
      "epoch": 0.9690883125259133,
      "grad_norm": 0.9332000236767728,
      "learning_rate": 1.3013822922121332e-08,
      "loss": 0.1164,
      "step": 10518
    },
    {
      "epoch": 0.9691804487031833,
      "grad_norm": 0.9715687328064153,
      "learning_rate": 1.2936311393665912e-08,
      "loss": 0.1154,
      "step": 10519
    },
    {
      "epoch": 0.9692725848804533,
      "grad_norm": 0.9494480811734403,
      "learning_rate": 1.2859030789341698e-08,
      "loss": 0.1109,
      "step": 10520
    },
    {
      "epoch": 0.9693647210577233,
      "grad_norm": 0.8713494760993586,
      "learning_rate": 1.278198111632406e-08,
      "loss": 0.1073,
      "step": 10521
    },
    {
      "epoch": 0.9694568572349933,
      "grad_norm": 0.9593584205782064,
      "learning_rate": 1.2705162381767277e-08,
      "loss": 0.1166,
      "step": 10522
    },
    {
      "epoch": 0.9695489934122633,
      "grad_norm": 0.9071851172527483,
      "learning_rate": 1.2628574592803977e-08,
      "loss": 0.1131,
      "step": 10523
    },
    {
      "epoch": 0.9696411295895333,
      "grad_norm": 0.9053877155338074,
      "learning_rate": 1.2552217756545137e-08,
      "loss": 0.1115,
      "step": 10524
    },
    {
      "epoch": 0.9697332657668033,
      "grad_norm": 0.9666711819309619,
      "learning_rate": 1.2476091880080366e-08,
      "loss": 0.1189,
      "step": 10525
    },
    {
      "epoch": 0.9698254019440733,
      "grad_norm": 0.9263691427304236,
      "learning_rate": 1.240019697047845e-08,
      "loss": 0.1234,
      "step": 10526
    },
    {
      "epoch": 0.9699175381213433,
      "grad_norm": 0.9379347777164991,
      "learning_rate": 1.2324533034785702e-08,
      "loss": 0.1086,
      "step": 10527
    },
    {
      "epoch": 0.9700096742986134,
      "grad_norm": 0.9637020462719162,
      "learning_rate": 1.2249100080028164e-08,
      "loss": 0.1089,
      "step": 10528
    },
    {
      "epoch": 0.9701018104758834,
      "grad_norm": 0.9419892430527868,
      "learning_rate": 1.2173898113209126e-08,
      "loss": 0.1191,
      "step": 10529
    },
    {
      "epoch": 0.9701939466531534,
      "grad_norm": 0.9343313110426041,
      "learning_rate": 1.2098927141311333e-08,
      "loss": 0.1188,
      "step": 10530
    },
    {
      "epoch": 0.9702860828304234,
      "grad_norm": 0.9821001712352756,
      "learning_rate": 1.2024187171296165e-08,
      "loss": 0.1207,
      "step": 10531
    },
    {
      "epoch": 0.9703782190076934,
      "grad_norm": 0.936560662898743,
      "learning_rate": 1.1949678210102788e-08,
      "loss": 0.1141,
      "step": 10532
    },
    {
      "epoch": 0.9704703551849634,
      "grad_norm": 0.9816223567816494,
      "learning_rate": 1.1875400264649562e-08,
      "loss": 0.1137,
      "step": 10533
    },
    {
      "epoch": 0.9705624913622334,
      "grad_norm": 0.9258232675199926,
      "learning_rate": 1.1801353341833466e-08,
      "loss": 0.1106,
      "step": 10534
    },
    {
      "epoch": 0.9706546275395034,
      "grad_norm": 0.9150708660768186,
      "learning_rate": 1.1727537448529003e-08,
      "loss": 0.1079,
      "step": 10535
    },
    {
      "epoch": 0.9707467637167734,
      "grad_norm": 0.9204217315060662,
      "learning_rate": 1.1653952591590967e-08,
      "loss": 0.1169,
      "step": 10536
    },
    {
      "epoch": 0.9708388998940434,
      "grad_norm": 0.9549901951555877,
      "learning_rate": 1.1580598777850837e-08,
      "loss": 0.1115,
      "step": 10537
    },
    {
      "epoch": 0.9709310360713134,
      "grad_norm": 0.9021095506714563,
      "learning_rate": 1.1507476014120112e-08,
      "loss": 0.1081,
      "step": 10538
    },
    {
      "epoch": 0.9710231722485834,
      "grad_norm": 0.9292179876258926,
      "learning_rate": 1.143458430718808e-08,
      "loss": 0.12,
      "step": 10539
    },
    {
      "epoch": 0.9711153084258534,
      "grad_norm": 0.9377578388308517,
      "learning_rate": 1.136192366382266e-08,
      "loss": 0.1229,
      "step": 10540
    },
    {
      "epoch": 0.9712074446031235,
      "grad_norm": 0.9572734492491797,
      "learning_rate": 1.128949409077068e-08,
      "loss": 0.1144,
      "step": 10541
    },
    {
      "epoch": 0.9712995807803935,
      "grad_norm": 0.9870783763304519,
      "learning_rate": 1.121729559475676e-08,
      "loss": 0.118,
      "step": 10542
    },
    {
      "epoch": 0.9713917169576635,
      "grad_norm": 0.9116902070429109,
      "learning_rate": 1.1145328182484706e-08,
      "loss": 0.1097,
      "step": 10543
    },
    {
      "epoch": 0.9714838531349335,
      "grad_norm": 0.9063023701626302,
      "learning_rate": 1.1073591860636946e-08,
      "loss": 0.1156,
      "step": 10544
    },
    {
      "epoch": 0.9715759893122035,
      "grad_norm": 0.9457383706995881,
      "learning_rate": 1.1002086635873987e-08,
      "loss": 0.1241,
      "step": 10545
    },
    {
      "epoch": 0.9716681254894735,
      "grad_norm": 0.9838563803147564,
      "learning_rate": 1.0930812514835243e-08,
      "loss": 0.1213,
      "step": 10546
    },
    {
      "epoch": 0.9717602616667435,
      "grad_norm": 0.8800524682172473,
      "learning_rate": 1.0859769504138196e-08,
      "loss": 0.1083,
      "step": 10547
    },
    {
      "epoch": 0.9718523978440134,
      "grad_norm": 0.910856796387508,
      "learning_rate": 1.0788957610379791e-08,
      "loss": 0.1113,
      "step": 10548
    },
    {
      "epoch": 0.9719445340212834,
      "grad_norm": 0.8808796503629651,
      "learning_rate": 1.0718376840134214e-08,
      "loss": 0.1117,
      "step": 10549
    },
    {
      "epoch": 0.9720366701985534,
      "grad_norm": 0.9406849715983612,
      "learning_rate": 1.0648027199955391e-08,
      "loss": 0.1214,
      "step": 10550
    },
    {
      "epoch": 0.9721288063758234,
      "grad_norm": 0.9069532479196183,
      "learning_rate": 1.0577908696375316e-08,
      "loss": 0.1165,
      "step": 10551
    },
    {
      "epoch": 0.9722209425530934,
      "grad_norm": 0.9548171410344726,
      "learning_rate": 1.0508021335904061e-08,
      "loss": 0.1156,
      "step": 10552
    },
    {
      "epoch": 0.9723130787303634,
      "grad_norm": 0.9573038324311234,
      "learning_rate": 1.0438365125031158e-08,
      "loss": 0.1192,
      "step": 10553
    },
    {
      "epoch": 0.9724052149076334,
      "grad_norm": 0.9533383138256408,
      "learning_rate": 1.0368940070223932e-08,
      "loss": 0.1135,
      "step": 10554
    },
    {
      "epoch": 0.9724973510849035,
      "grad_norm": 0.9615848577133012,
      "learning_rate": 1.0299746177928338e-08,
      "loss": 0.1158,
      "step": 10555
    },
    {
      "epoch": 0.9725894872621735,
      "grad_norm": 0.9373204121607308,
      "learning_rate": 1.0230783454569515e-08,
      "loss": 0.1143,
      "step": 10556
    },
    {
      "epoch": 0.9726816234394435,
      "grad_norm": 0.9449065421907685,
      "learning_rate": 1.0162051906550397e-08,
      "loss": 0.1136,
      "step": 10557
    },
    {
      "epoch": 0.9727737596167135,
      "grad_norm": 0.88676617682467,
      "learning_rate": 1.0093551540252822e-08,
      "loss": 0.1071,
      "step": 10558
    },
    {
      "epoch": 0.9728658957939835,
      "grad_norm": 0.905225458618946,
      "learning_rate": 1.0025282362036704e-08,
      "loss": 0.1206,
      "step": 10559
    },
    {
      "epoch": 0.9729580319712535,
      "grad_norm": 0.9183727079728304,
      "learning_rate": 9.957244378241138e-09,
      "loss": 0.1101,
      "step": 10560
    },
    {
      "epoch": 0.9730501681485235,
      "grad_norm": 0.9845449747492825,
      "learning_rate": 9.889437595183293e-09,
      "loss": 0.1244,
      "step": 10561
    },
    {
      "epoch": 0.9731423043257935,
      "grad_norm": 0.9502006703842801,
      "learning_rate": 9.821862019159522e-09,
      "loss": 0.1231,
      "step": 10562
    },
    {
      "epoch": 0.9732344405030635,
      "grad_norm": 0.9617262844228264,
      "learning_rate": 9.754517656443697e-09,
      "loss": 0.1221,
      "step": 10563
    },
    {
      "epoch": 0.9733265766803335,
      "grad_norm": 0.9519604835311853,
      "learning_rate": 9.68740451328859e-09,
      "loss": 0.1171,
      "step": 10564
    },
    {
      "epoch": 0.9734187128576035,
      "grad_norm": 0.9637928773849125,
      "learning_rate": 9.62052259592644e-09,
      "loss": 0.1243,
      "step": 10565
    },
    {
      "epoch": 0.9735108490348735,
      "grad_norm": 0.9288945824254763,
      "learning_rate": 9.553871910566448e-09,
      "loss": 0.1239,
      "step": 10566
    },
    {
      "epoch": 0.9736029852121435,
      "grad_norm": 0.9472650891783666,
      "learning_rate": 9.487452463397828e-09,
      "loss": 0.1033,
      "step": 10567
    },
    {
      "epoch": 0.9736951213894135,
      "grad_norm": 0.9534769666335019,
      "learning_rate": 9.421264260587038e-09,
      "loss": 0.1191,
      "step": 10568
    },
    {
      "epoch": 0.9737872575666836,
      "grad_norm": 0.9897087742581929,
      "learning_rate": 9.355307308279992e-09,
      "loss": 0.1246,
      "step": 10569
    },
    {
      "epoch": 0.9738793937439536,
      "grad_norm": 0.9127514393900824,
      "learning_rate": 9.289581612600684e-09,
      "loss": 0.1194,
      "step": 10570
    },
    {
      "epoch": 0.9739715299212236,
      "grad_norm": 0.9052157433771952,
      "learning_rate": 9.224087179651731e-09,
      "loss": 0.1099,
      "step": 10571
    },
    {
      "epoch": 0.9740636660984936,
      "grad_norm": 0.9536514537796765,
      "learning_rate": 9.158824015514378e-09,
      "loss": 0.1174,
      "step": 10572
    },
    {
      "epoch": 0.9741558022757636,
      "grad_norm": 1.0070035166714224,
      "learning_rate": 9.093792126248224e-09,
      "loss": 0.1177,
      "step": 10573
    },
    {
      "epoch": 0.9742479384530336,
      "grad_norm": 0.9381398739474147,
      "learning_rate": 9.028991517891495e-09,
      "loss": 0.119,
      "step": 10574
    },
    {
      "epoch": 0.9743400746303036,
      "grad_norm": 0.9668291092324927,
      "learning_rate": 8.964422196461042e-09,
      "loss": 0.1296,
      "step": 10575
    },
    {
      "epoch": 0.9744322108075736,
      "grad_norm": 0.8874480746623137,
      "learning_rate": 8.900084167952072e-09,
      "loss": 0.1066,
      "step": 10576
    },
    {
      "epoch": 0.9745243469848436,
      "grad_norm": 0.93955289545439,
      "learning_rate": 8.835977438338417e-09,
      "loss": 0.114,
      "step": 10577
    },
    {
      "epoch": 0.9746164831621136,
      "grad_norm": 0.9040397264793893,
      "learning_rate": 8.772102013572537e-09,
      "loss": 0.1093,
      "step": 10578
    },
    {
      "epoch": 0.9747086193393836,
      "grad_norm": 0.9045961887807713,
      "learning_rate": 8.708457899584965e-09,
      "loss": 0.1128,
      "step": 10579
    },
    {
      "epoch": 0.9748007555166536,
      "grad_norm": 0.9615250420317627,
      "learning_rate": 8.645045102285143e-09,
      "loss": 0.1171,
      "step": 10580
    },
    {
      "epoch": 0.9748928916939236,
      "grad_norm": 0.9054598326002232,
      "learning_rate": 8.58186362756086e-09,
      "loss": 0.1186,
      "step": 10581
    },
    {
      "epoch": 0.9749850278711937,
      "grad_norm": 0.9889995978585244,
      "learning_rate": 8.518913481278812e-09,
      "loss": 0.1214,
      "step": 10582
    },
    {
      "epoch": 0.9750771640484637,
      "grad_norm": 0.9742523995692353,
      "learning_rate": 8.456194669284046e-09,
      "loss": 0.1277,
      "step": 10583
    },
    {
      "epoch": 0.9751693002257337,
      "grad_norm": 0.9658412586374955,
      "learning_rate": 8.393707197399404e-09,
      "loss": 0.1194,
      "step": 10584
    },
    {
      "epoch": 0.9752614364030037,
      "grad_norm": 0.8987042660004325,
      "learning_rate": 8.331451071427188e-09,
      "loss": 0.1154,
      "step": 10585
    },
    {
      "epoch": 0.9753535725802737,
      "grad_norm": 0.9238970510872365,
      "learning_rate": 8.269426297148053e-09,
      "loss": 0.1093,
      "step": 10586
    },
    {
      "epoch": 0.9754457087575437,
      "grad_norm": 0.9426614072312355,
      "learning_rate": 8.207632880320727e-09,
      "loss": 0.1089,
      "step": 10587
    },
    {
      "epoch": 0.9755378449348137,
      "grad_norm": 1.0276725978874846,
      "learning_rate": 8.146070826683116e-09,
      "loss": 0.1195,
      "step": 10588
    },
    {
      "epoch": 0.9756299811120837,
      "grad_norm": 0.9563523741611569,
      "learning_rate": 8.084740141950653e-09,
      "loss": 0.1236,
      "step": 10589
    },
    {
      "epoch": 0.9757221172893537,
      "grad_norm": 0.9156272141595668,
      "learning_rate": 8.023640831818502e-09,
      "loss": 0.1095,
      "step": 10590
    },
    {
      "epoch": 0.9758142534666236,
      "grad_norm": 0.9179595642185693,
      "learning_rate": 7.962772901959348e-09,
      "loss": 0.112,
      "step": 10591
    },
    {
      "epoch": 0.9759063896438936,
      "grad_norm": 0.9278463762785488,
      "learning_rate": 7.902136358025058e-09,
      "loss": 0.1138,
      "step": 10592
    },
    {
      "epoch": 0.9759985258211636,
      "grad_norm": 0.95021990936773,
      "learning_rate": 7.841731205645576e-09,
      "loss": 0.124,
      "step": 10593
    },
    {
      "epoch": 0.9760906619984336,
      "grad_norm": 0.8940755815437218,
      "learning_rate": 7.781557450429467e-09,
      "loss": 0.1009,
      "step": 10594
    },
    {
      "epoch": 0.9761827981757036,
      "grad_norm": 0.9717988157956657,
      "learning_rate": 7.72161509796393e-09,
      "loss": 0.1119,
      "step": 10595
    },
    {
      "epoch": 0.9762749343529737,
      "grad_norm": 0.9008600970075309,
      "learning_rate": 7.661904153814793e-09,
      "loss": 0.1133,
      "step": 10596
    },
    {
      "epoch": 0.9763670705302437,
      "grad_norm": 0.9419592224579174,
      "learning_rate": 7.60242462352595e-09,
      "loss": 0.1228,
      "step": 10597
    },
    {
      "epoch": 0.9764592067075137,
      "grad_norm": 0.9587973988727028,
      "learning_rate": 7.543176512620487e-09,
      "loss": 0.1168,
      "step": 10598
    },
    {
      "epoch": 0.9765513428847837,
      "grad_norm": 0.9321975028306119,
      "learning_rate": 7.484159826599002e-09,
      "loss": 0.1176,
      "step": 10599
    },
    {
      "epoch": 0.9766434790620537,
      "grad_norm": 0.922618089187133,
      "learning_rate": 7.425374570941557e-09,
      "loss": 0.1141,
      "step": 10600
    },
    {
      "epoch": 0.9767356152393237,
      "grad_norm": 0.8982462835378838,
      "learning_rate": 7.366820751106562e-09,
      "loss": 0.1142,
      "step": 10601
    },
    {
      "epoch": 0.9768277514165937,
      "grad_norm": 0.9843917017683375,
      "learning_rate": 7.308498372530226e-09,
      "loss": 0.1259,
      "step": 10602
    },
    {
      "epoch": 0.9769198875938637,
      "grad_norm": 0.9473204680331596,
      "learning_rate": 7.250407440628493e-09,
      "loss": 0.1234,
      "step": 10603
    },
    {
      "epoch": 0.9770120237711337,
      "grad_norm": 1.0040227923056881,
      "learning_rate": 7.192547960794549e-09,
      "loss": 0.1217,
      "step": 10604
    },
    {
      "epoch": 0.9771041599484037,
      "grad_norm": 0.947542454325738,
      "learning_rate": 7.134919938400486e-09,
      "loss": 0.1178,
      "step": 10605
    },
    {
      "epoch": 0.9771962961256737,
      "grad_norm": 0.9500522546507386,
      "learning_rate": 7.077523378797579e-09,
      "loss": 0.1136,
      "step": 10606
    },
    {
      "epoch": 0.9772884323029437,
      "grad_norm": 0.8961019047765427,
      "learning_rate": 7.0203582873151764e-09,
      "loss": 0.106,
      "step": 10607
    },
    {
      "epoch": 0.9773805684802137,
      "grad_norm": 0.9211980530137002,
      "learning_rate": 6.963424669260421e-09,
      "loss": 0.1176,
      "step": 10608
    },
    {
      "epoch": 0.9774727046574838,
      "grad_norm": 0.929269443134366,
      "learning_rate": 6.906722529920196e-09,
      "loss": 0.1233,
      "step": 10609
    },
    {
      "epoch": 0.9775648408347538,
      "grad_norm": 0.9473648799487335,
      "learning_rate": 6.850251874559177e-09,
      "loss": 0.1128,
      "step": 10610
    },
    {
      "epoch": 0.9776569770120238,
      "grad_norm": 0.9169410453945872,
      "learning_rate": 6.7940127084203945e-09,
      "loss": 0.1125,
      "step": 10611
    },
    {
      "epoch": 0.9777491131892938,
      "grad_norm": 0.9092218025061077,
      "learning_rate": 6.738005036726059e-09,
      "loss": 0.1082,
      "step": 10612
    },
    {
      "epoch": 0.9778412493665638,
      "grad_norm": 0.9487463863743852,
      "learning_rate": 6.682228864675899e-09,
      "loss": 0.117,
      "step": 10613
    },
    {
      "epoch": 0.9779333855438338,
      "grad_norm": 0.9286186855517777,
      "learning_rate": 6.626684197449384e-09,
      "loss": 0.1073,
      "step": 10614
    },
    {
      "epoch": 0.9780255217211038,
      "grad_norm": 0.9033427687803351,
      "learning_rate": 6.5713710402037775e-09,
      "loss": 0.111,
      "step": 10615
    },
    {
      "epoch": 0.9781176578983738,
      "grad_norm": 0.9705632853143739,
      "learning_rate": 6.516289398074416e-09,
      "loss": 0.1272,
      "step": 10616
    },
    {
      "epoch": 0.9782097940756438,
      "grad_norm": 0.9143294975731162,
      "learning_rate": 6.461439276176096e-09,
      "loss": 0.1156,
      "step": 10617
    },
    {
      "epoch": 0.9783019302529138,
      "grad_norm": 0.9656105337016601,
      "learning_rate": 6.406820679601411e-09,
      "loss": 0.1257,
      "step": 10618
    },
    {
      "epoch": 0.9783940664301838,
      "grad_norm": 1.006782451352813,
      "learning_rate": 6.35243361342186e-09,
      "loss": 0.1294,
      "step": 10619
    },
    {
      "epoch": 0.9784862026074538,
      "grad_norm": 0.930881618709819,
      "learning_rate": 6.298278082687015e-09,
      "loss": 0.1229,
      "step": 10620
    },
    {
      "epoch": 0.9785783387847238,
      "grad_norm": 0.9250924437201751,
      "learning_rate": 6.244354092425631e-09,
      "loss": 0.1092,
      "step": 10621
    },
    {
      "epoch": 0.9786704749619938,
      "grad_norm": 0.9004668653298935,
      "learning_rate": 6.190661647644259e-09,
      "loss": 0.0974,
      "step": 10622
    },
    {
      "epoch": 0.9787626111392639,
      "grad_norm": 0.9078763930835535,
      "learning_rate": 6.137200753328354e-09,
      "loss": 0.1078,
      "step": 10623
    },
    {
      "epoch": 0.9788547473165339,
      "grad_norm": 0.9246549887726953,
      "learning_rate": 6.083971414442003e-09,
      "loss": 0.1059,
      "step": 10624
    },
    {
      "epoch": 0.9789468834938039,
      "grad_norm": 0.9760946282787811,
      "learning_rate": 6.030973635926807e-09,
      "loss": 0.1196,
      "step": 10625
    },
    {
      "epoch": 0.9790390196710739,
      "grad_norm": 0.9148879730601133,
      "learning_rate": 5.9782074227046625e-09,
      "loss": 0.1106,
      "step": 10626
    },
    {
      "epoch": 0.9791311558483439,
      "grad_norm": 0.9830983954452395,
      "learning_rate": 5.925672779673875e-09,
      "loss": 0.13,
      "step": 10627
    },
    {
      "epoch": 0.9792232920256139,
      "grad_norm": 0.9168069775130212,
      "learning_rate": 5.87336971171304e-09,
      "loss": 0.1117,
      "step": 10628
    },
    {
      "epoch": 0.9793154282028839,
      "grad_norm": 0.927218481484028,
      "learning_rate": 5.821298223678274e-09,
      "loss": 0.1152,
      "step": 10629
    },
    {
      "epoch": 0.9794075643801539,
      "grad_norm": 0.9453897188162311,
      "learning_rate": 5.76945832040432e-09,
      "loss": 0.1227,
      "step": 10630
    },
    {
      "epoch": 0.9794997005574239,
      "grad_norm": 0.9079700350552379,
      "learning_rate": 5.717850006704551e-09,
      "loss": 0.1071,
      "step": 10631
    },
    {
      "epoch": 0.9795918367346939,
      "grad_norm": 0.9516553063862506,
      "learning_rate": 5.666473287370966e-09,
      "loss": 0.1245,
      "step": 10632
    },
    {
      "epoch": 0.9796839729119639,
      "grad_norm": 0.9634462965013579,
      "learning_rate": 5.615328167173639e-09,
      "loss": 0.1178,
      "step": 10633
    },
    {
      "epoch": 0.9797761090892338,
      "grad_norm": 0.9206987163610182,
      "learning_rate": 5.564414650861549e-09,
      "loss": 0.1154,
      "step": 10634
    },
    {
      "epoch": 0.9798682452665038,
      "grad_norm": 0.9014004204638184,
      "learning_rate": 5.513732743162303e-09,
      "loss": 0.1063,
      "step": 10635
    },
    {
      "epoch": 0.9799603814437738,
      "grad_norm": 0.8760217490984519,
      "learning_rate": 5.463282448781027e-09,
      "loss": 0.1048,
      "step": 10636
    },
    {
      "epoch": 0.980052517621044,
      "grad_norm": 0.9331572014494885,
      "learning_rate": 5.41306377240286e-09,
      "loss": 0.1111,
      "step": 10637
    },
    {
      "epoch": 0.980144653798314,
      "grad_norm": 0.9934794177717047,
      "learning_rate": 5.363076718689908e-09,
      "loss": 0.1257,
      "step": 10638
    },
    {
      "epoch": 0.9802367899755839,
      "grad_norm": 0.9255709555345145,
      "learning_rate": 5.313321292283735e-09,
      "loss": 0.1118,
      "step": 10639
    },
    {
      "epoch": 0.9803289261528539,
      "grad_norm": 0.9304134226487328,
      "learning_rate": 5.263797497804257e-09,
      "loss": 0.1202,
      "step": 10640
    },
    {
      "epoch": 0.9804210623301239,
      "grad_norm": 0.9036732654637581,
      "learning_rate": 5.2145053398494626e-09,
      "loss": 0.113,
      "step": 10641
    },
    {
      "epoch": 0.9805131985073939,
      "grad_norm": 0.884189794772743,
      "learning_rate": 5.165444822996801e-09,
      "loss": 0.1096,
      "step": 10642
    },
    {
      "epoch": 0.9806053346846639,
      "grad_norm": 0.9227208369894664,
      "learning_rate": 5.116615951800685e-09,
      "loss": 0.1138,
      "step": 10643
    },
    {
      "epoch": 0.9806974708619339,
      "grad_norm": 0.9727271841430377,
      "learning_rate": 5.068018730795543e-09,
      "loss": 0.1278,
      "step": 10644
    },
    {
      "epoch": 0.9807896070392039,
      "grad_norm": 0.8889123529269067,
      "learning_rate": 5.019653164493044e-09,
      "loss": 0.1126,
      "step": 10645
    },
    {
      "epoch": 0.9808817432164739,
      "grad_norm": 0.9502701859156512,
      "learning_rate": 4.971519257384316e-09,
      "loss": 0.1164,
      "step": 10646
    },
    {
      "epoch": 0.9809738793937439,
      "grad_norm": 0.9612182962807315,
      "learning_rate": 4.9236170139388415e-09,
      "loss": 0.1181,
      "step": 10647
    },
    {
      "epoch": 0.9810660155710139,
      "grad_norm": 0.9677109113081508,
      "learning_rate": 4.875946438603896e-09,
      "loss": 0.1192,
      "step": 10648
    },
    {
      "epoch": 0.9811581517482839,
      "grad_norm": 0.934500406425529,
      "learning_rate": 4.828507535805937e-09,
      "loss": 0.1155,
      "step": 10649
    },
    {
      "epoch": 0.981250287925554,
      "grad_norm": 0.9816515440764665,
      "learning_rate": 4.781300309949221e-09,
      "loss": 0.122,
      "step": 10650
    },
    {
      "epoch": 0.981342424102824,
      "grad_norm": 0.8982107358428756,
      "learning_rate": 4.734324765417741e-09,
      "loss": 0.1081,
      "step": 10651
    },
    {
      "epoch": 0.981434560280094,
      "grad_norm": 0.9325950183542184,
      "learning_rate": 4.687580906572453e-09,
      "loss": 0.1197,
      "step": 10652
    },
    {
      "epoch": 0.981526696457364,
      "grad_norm": 0.9549246082334452,
      "learning_rate": 4.6410687377540505e-09,
      "loss": 0.1154,
      "step": 10653
    },
    {
      "epoch": 0.981618832634634,
      "grad_norm": 0.9353084160232171,
      "learning_rate": 4.5947882632810244e-09,
      "loss": 0.1083,
      "step": 10654
    },
    {
      "epoch": 0.981710968811904,
      "grad_norm": 0.8924218274524065,
      "learning_rate": 4.5487394874502155e-09,
      "loss": 0.105,
      "step": 10655
    },
    {
      "epoch": 0.981803104989174,
      "grad_norm": 0.9371527891685825,
      "learning_rate": 4.502922414537647e-09,
      "loss": 0.1162,
      "step": 10656
    },
    {
      "epoch": 0.981895241166444,
      "grad_norm": 0.9043040642121698,
      "learning_rate": 4.457337048797139e-09,
      "loss": 0.112,
      "step": 10657
    },
    {
      "epoch": 0.981987377343714,
      "grad_norm": 0.9966084418021494,
      "learning_rate": 4.411983394461694e-09,
      "loss": 0.1213,
      "step": 10658
    },
    {
      "epoch": 0.982079513520984,
      "grad_norm": 0.9518988653583343,
      "learning_rate": 4.366861455742111e-09,
      "loss": 0.1136,
      "step": 10659
    },
    {
      "epoch": 0.982171649698254,
      "grad_norm": 0.9182319295200211,
      "learning_rate": 4.321971236827815e-09,
      "loss": 0.1124,
      "step": 10660
    },
    {
      "epoch": 0.982263785875524,
      "grad_norm": 0.9072685324613959,
      "learning_rate": 4.277312741887418e-09,
      "loss": 0.1162,
      "step": 10661
    },
    {
      "epoch": 0.982355922052794,
      "grad_norm": 0.9127397436133858,
      "learning_rate": 4.232885975066769e-09,
      "loss": 0.1073,
      "step": 10662
    },
    {
      "epoch": 0.982448058230064,
      "grad_norm": 0.9097974449201501,
      "learning_rate": 4.188690940491457e-09,
      "loss": 0.1221,
      "step": 10663
    },
    {
      "epoch": 0.9825401944073341,
      "grad_norm": 0.921889778134402,
      "learning_rate": 4.144727642264867e-09,
      "loss": 0.1033,
      "step": 10664
    },
    {
      "epoch": 0.9826323305846041,
      "grad_norm": 0.9406033364188552,
      "learning_rate": 4.100996084468734e-09,
      "loss": 0.1169,
      "step": 10665
    },
    {
      "epoch": 0.9827244667618741,
      "grad_norm": 0.948017766212838,
      "learning_rate": 4.057496271163974e-09,
      "loss": 0.1186,
      "step": 10666
    },
    {
      "epoch": 0.9828166029391441,
      "grad_norm": 0.9274946903488767,
      "learning_rate": 4.014228206389026e-09,
      "loss": 0.1189,
      "step": 10667
    },
    {
      "epoch": 0.9829087391164141,
      "grad_norm": 0.9316329448709069,
      "learning_rate": 3.971191894161785e-09,
      "loss": 0.1147,
      "step": 10668
    },
    {
      "epoch": 0.9830008752936841,
      "grad_norm": 0.9329760448425564,
      "learning_rate": 3.9283873384779455e-09,
      "loss": 0.1111,
      "step": 10669
    },
    {
      "epoch": 0.9830930114709541,
      "grad_norm": 0.9303551616716375,
      "learning_rate": 3.8858145433118275e-09,
      "loss": 0.1149,
      "step": 10670
    },
    {
      "epoch": 0.9831851476482241,
      "grad_norm": 1.0299478339943349,
      "learning_rate": 3.843473512616658e-09,
      "loss": 0.1233,
      "step": 10671
    },
    {
      "epoch": 0.9832772838254941,
      "grad_norm": 0.9347356160626646,
      "learning_rate": 3.801364250323458e-09,
      "loss": 0.1204,
      "step": 10672
    },
    {
      "epoch": 0.9833694200027641,
      "grad_norm": 0.9154680976488679,
      "learning_rate": 3.759486760342435e-09,
      "loss": 0.1131,
      "step": 10673
    },
    {
      "epoch": 0.9834615561800341,
      "grad_norm": 0.9648845681864151,
      "learning_rate": 3.7178410465615876e-09,
      "loss": 0.1074,
      "step": 10674
    },
    {
      "epoch": 0.983553692357304,
      "grad_norm": 0.9607039283534857,
      "learning_rate": 3.676427112848102e-09,
      "loss": 0.108,
      "step": 10675
    },
    {
      "epoch": 0.983645828534574,
      "grad_norm": 0.9682615771187273,
      "learning_rate": 3.63524496304668e-09,
      "loss": 0.1303,
      "step": 10676
    },
    {
      "epoch": 0.9837379647118442,
      "grad_norm": 1.004190066912663,
      "learning_rate": 3.5942946009814848e-09,
      "loss": 0.1222,
      "step": 10677
    },
    {
      "epoch": 0.9838301008891142,
      "grad_norm": 0.9169312481844852,
      "learning_rate": 3.553576030454753e-09,
      "loss": 0.1105,
      "step": 10678
    },
    {
      "epoch": 0.9839222370663842,
      "grad_norm": 0.9427475770972022,
      "learning_rate": 3.5130892552473485e-09,
      "loss": 0.122,
      "step": 10679
    },
    {
      "epoch": 0.9840143732436542,
      "grad_norm": 0.909718197191416,
      "learning_rate": 3.4728342791179313e-09,
      "loss": 0.1105,
      "step": 10680
    },
    {
      "epoch": 0.9841065094209241,
      "grad_norm": 0.958982757205956,
      "learning_rate": 3.432811105804623e-09,
      "loss": 0.118,
      "step": 10681
    },
    {
      "epoch": 0.9841986455981941,
      "grad_norm": 0.9241596455799383,
      "learning_rate": 3.3930197390236175e-09,
      "loss": 0.114,
      "step": 10682
    },
    {
      "epoch": 0.9842907817754641,
      "grad_norm": 0.9580131120057819,
      "learning_rate": 3.353460182469459e-09,
      "loss": 0.1243,
      "step": 10683
    },
    {
      "epoch": 0.9843829179527341,
      "grad_norm": 0.9537908329817629,
      "learning_rate": 3.3141324398150434e-09,
      "loss": 0.1231,
      "step": 10684
    },
    {
      "epoch": 0.9844750541300041,
      "grad_norm": 0.9846295496352669,
      "learning_rate": 3.275036514712171e-09,
      "loss": 0.1181,
      "step": 10685
    },
    {
      "epoch": 0.9845671903072741,
      "grad_norm": 1.0269472534084152,
      "learning_rate": 3.236172410790994e-09,
      "loss": 0.1243,
      "step": 10686
    },
    {
      "epoch": 0.9846593264845441,
      "grad_norm": 0.9138841791343236,
      "learning_rate": 3.1975401316597376e-09,
      "loss": 0.1114,
      "step": 10687
    },
    {
      "epoch": 0.9847514626618141,
      "grad_norm": 0.9102065333454794,
      "learning_rate": 3.1591396809055317e-09,
      "loss": 0.1126,
      "step": 10688
    },
    {
      "epoch": 0.9848435988390841,
      "grad_norm": 0.9490694422798538,
      "learning_rate": 3.120971062094136e-09,
      "loss": 0.1243,
      "step": 10689
    },
    {
      "epoch": 0.9849357350163541,
      "grad_norm": 0.9356220763269121,
      "learning_rate": 3.0830342787693814e-09,
      "loss": 0.116,
      "step": 10690
    },
    {
      "epoch": 0.9850278711936242,
      "grad_norm": 0.9780782171880228,
      "learning_rate": 3.0453293344534507e-09,
      "loss": 0.1134,
      "step": 10691
    },
    {
      "epoch": 0.9851200073708942,
      "grad_norm": 0.927888813498766,
      "learning_rate": 3.007856232647155e-09,
      "loss": 0.1128,
      "step": 10692
    },
    {
      "epoch": 0.9852121435481642,
      "grad_norm": 0.987536393409098,
      "learning_rate": 2.970614976830488e-09,
      "loss": 0.1182,
      "step": 10693
    },
    {
      "epoch": 0.9853042797254342,
      "grad_norm": 0.9263137120241127,
      "learning_rate": 2.933605570460962e-09,
      "loss": 0.1059,
      "step": 10694
    },
    {
      "epoch": 0.9853964159027042,
      "grad_norm": 0.9625610479510435,
      "learning_rate": 2.8968280169747177e-09,
      "loss": 0.1197,
      "step": 10695
    },
    {
      "epoch": 0.9854885520799742,
      "grad_norm": 0.9172728982589524,
      "learning_rate": 2.8602823197868e-09,
      "loss": 0.1093,
      "step": 10696
    },
    {
      "epoch": 0.9855806882572442,
      "grad_norm": 0.9632436012736146,
      "learning_rate": 2.823968482290329e-09,
      "loss": 0.1253,
      "step": 10697
    },
    {
      "epoch": 0.9856728244345142,
      "grad_norm": 0.9223452021204606,
      "learning_rate": 2.787886507857329e-09,
      "loss": 0.1129,
      "step": 10698
    },
    {
      "epoch": 0.9857649606117842,
      "grad_norm": 0.8890863207340997,
      "learning_rate": 2.7520363998376208e-09,
      "loss": 0.1153,
      "step": 10699
    },
    {
      "epoch": 0.9858570967890542,
      "grad_norm": 0.9555634706681613,
      "learning_rate": 2.716418161560208e-09,
      "loss": 0.1153,
      "step": 10700
    },
    {
      "epoch": 0.9859492329663242,
      "grad_norm": 0.9023523390251761,
      "learning_rate": 2.6810317963321674e-09,
      "loss": 0.116,
      "step": 10701
    },
    {
      "epoch": 0.9860413691435942,
      "grad_norm": 0.9676451487324907,
      "learning_rate": 2.6458773074389266e-09,
      "loss": 0.1128,
      "step": 10702
    },
    {
      "epoch": 0.9861335053208642,
      "grad_norm": 0.9623707144940772,
      "learning_rate": 2.610954698145096e-09,
      "loss": 0.1231,
      "step": 10703
    },
    {
      "epoch": 0.9862256414981343,
      "grad_norm": 0.9138591551329498,
      "learning_rate": 2.5762639716925274e-09,
      "loss": 0.1164,
      "step": 10704
    },
    {
      "epoch": 0.9863177776754043,
      "grad_norm": 0.9348545180273251,
      "learning_rate": 2.5418051313028102e-09,
      "loss": 0.116,
      "step": 10705
    },
    {
      "epoch": 0.9864099138526743,
      "grad_norm": 0.9045750930758,
      "learning_rate": 2.507578180175052e-09,
      "loss": 0.1059,
      "step": 10706
    },
    {
      "epoch": 0.9865020500299443,
      "grad_norm": 0.9558289080953325,
      "learning_rate": 2.473583121487544e-09,
      "loss": 0.1191,
      "step": 10707
    },
    {
      "epoch": 0.9865941862072143,
      "grad_norm": 0.9052879057162557,
      "learning_rate": 2.43981995839665e-09,
      "loss": 0.1145,
      "step": 10708
    },
    {
      "epoch": 0.9866863223844843,
      "grad_norm": 0.951275220374051,
      "learning_rate": 2.406288694037362e-09,
      "loss": 0.1206,
      "step": 10709
    },
    {
      "epoch": 0.9867784585617543,
      "grad_norm": 0.9285907818070983,
      "learning_rate": 2.3729893315230234e-09,
      "loss": 0.1151,
      "step": 10710
    },
    {
      "epoch": 0.9868705947390243,
      "grad_norm": 0.9395711360817434,
      "learning_rate": 2.339921873945328e-09,
      "loss": 0.1192,
      "step": 10711
    },
    {
      "epoch": 0.9869627309162943,
      "grad_norm": 0.90872727900148,
      "learning_rate": 2.3070863243745967e-09,
      "loss": 0.1197,
      "step": 10712
    },
    {
      "epoch": 0.9870548670935643,
      "grad_norm": 0.9485592509220756,
      "learning_rate": 2.2744826858597803e-09,
      "loss": 0.1159,
      "step": 10713
    },
    {
      "epoch": 0.9871470032708343,
      "grad_norm": 0.9328209116258498,
      "learning_rate": 2.2421109614279015e-09,
      "loss": 0.1123,
      "step": 10714
    },
    {
      "epoch": 0.9872391394481043,
      "grad_norm": 1.0114257066594805,
      "learning_rate": 2.209971154084889e-09,
      "loss": 0.129,
      "step": 10715
    },
    {
      "epoch": 0.9873312756253743,
      "grad_norm": 0.9576620679676022,
      "learning_rate": 2.1780632668150226e-09,
      "loss": 0.1192,
      "step": 10716
    },
    {
      "epoch": 0.9874234118026443,
      "grad_norm": 0.9937479557344089,
      "learning_rate": 2.1463873025806547e-09,
      "loss": 0.13,
      "step": 10717
    },
    {
      "epoch": 0.9875155479799144,
      "grad_norm": 0.8957813580668273,
      "learning_rate": 2.1149432643233213e-09,
      "loss": 0.1015,
      "step": 10718
    },
    {
      "epoch": 0.9876076841571844,
      "grad_norm": 0.9371753197462088,
      "learning_rate": 2.0837311549620763e-09,
      "loss": 0.1105,
      "step": 10719
    },
    {
      "epoch": 0.9876998203344544,
      "grad_norm": 0.9100179659422312,
      "learning_rate": 2.052750977395157e-09,
      "loss": 0.1109,
      "step": 10720
    },
    {
      "epoch": 0.9877919565117244,
      "grad_norm": 0.9000073802913596,
      "learning_rate": 2.0220027344994285e-09,
      "loss": 0.1063,
      "step": 10721
    },
    {
      "epoch": 0.9878840926889944,
      "grad_norm": 0.913903695944919,
      "learning_rate": 1.9914864291292747e-09,
      "loss": 0.1135,
      "step": 10722
    },
    {
      "epoch": 0.9879762288662643,
      "grad_norm": 0.9678585764440448,
      "learning_rate": 1.961202064118539e-09,
      "loss": 0.1222,
      "step": 10723
    },
    {
      "epoch": 0.9880683650435343,
      "grad_norm": 0.9568563012010259,
      "learning_rate": 1.9311496422791398e-09,
      "loss": 0.1183,
      "step": 10724
    },
    {
      "epoch": 0.9881605012208043,
      "grad_norm": 0.9891580511080038,
      "learning_rate": 1.9013291664013445e-09,
      "loss": 0.1247,
      "step": 10725
    },
    {
      "epoch": 0.9882526373980743,
      "grad_norm": 0.8893119723719847,
      "learning_rate": 1.8717406392537718e-09,
      "loss": 0.1032,
      "step": 10726
    },
    {
      "epoch": 0.9883447735753443,
      "grad_norm": 0.9638566658251766,
      "learning_rate": 1.8423840635842237e-09,
      "loss": 0.1255,
      "step": 10727
    },
    {
      "epoch": 0.9884369097526143,
      "grad_norm": 0.9313632687076696,
      "learning_rate": 1.8132594421180206e-09,
      "loss": 0.1166,
      "step": 10728
    },
    {
      "epoch": 0.9885290459298843,
      "grad_norm": 0.9253116813594806,
      "learning_rate": 1.7843667775593875e-09,
      "loss": 0.111,
      "step": 10729
    },
    {
      "epoch": 0.9886211821071543,
      "grad_norm": 0.8668298633727463,
      "learning_rate": 1.7557060725914566e-09,
      "loss": 0.0993,
      "step": 10730
    },
    {
      "epoch": 0.9887133182844243,
      "grad_norm": 0.9730611123571105,
      "learning_rate": 1.7272773298748769e-09,
      "loss": 0.1258,
      "step": 10731
    },
    {
      "epoch": 0.9888054544616944,
      "grad_norm": 0.9001091638779297,
      "learning_rate": 1.6990805520494813e-09,
      "loss": 0.1135,
      "step": 10732
    },
    {
      "epoch": 0.9888975906389644,
      "grad_norm": 0.9557725581836793,
      "learning_rate": 1.6711157417334533e-09,
      "loss": 0.1169,
      "step": 10733
    },
    {
      "epoch": 0.9889897268162344,
      "grad_norm": 0.9184313551073652,
      "learning_rate": 1.6433829015230497e-09,
      "loss": 0.1135,
      "step": 10734
    },
    {
      "epoch": 0.9890818629935044,
      "grad_norm": 0.8740577853942184,
      "learning_rate": 1.6158820339937098e-09,
      "loss": 0.1076,
      "step": 10735
    },
    {
      "epoch": 0.9891739991707744,
      "grad_norm": 0.9511768299527451,
      "learning_rate": 1.5886131416981144e-09,
      "loss": 0.1036,
      "step": 10736
    },
    {
      "epoch": 0.9892661353480444,
      "grad_norm": 0.9315575336147978,
      "learning_rate": 1.5615762271689593e-09,
      "loss": 0.1257,
      "step": 10737
    },
    {
      "epoch": 0.9893582715253144,
      "grad_norm": 0.9735995642076678,
      "learning_rate": 1.5347712929164594e-09,
      "loss": 0.1243,
      "step": 10738
    },
    {
      "epoch": 0.9894504077025844,
      "grad_norm": 0.9609590717734398,
      "learning_rate": 1.508198341429179e-09,
      "loss": 0.1245,
      "step": 10739
    },
    {
      "epoch": 0.9895425438798544,
      "grad_norm": 0.9803389685165724,
      "learning_rate": 1.481857375174589e-09,
      "loss": 0.1245,
      "step": 10740
    },
    {
      "epoch": 0.9896346800571244,
      "grad_norm": 0.9447187065505281,
      "learning_rate": 1.4557483965985109e-09,
      "loss": 0.1217,
      "step": 10741
    },
    {
      "epoch": 0.9897268162343944,
      "grad_norm": 0.9222073699752521,
      "learning_rate": 1.4298714081248389e-09,
      "loss": 0.1154,
      "step": 10742
    },
    {
      "epoch": 0.9898189524116644,
      "grad_norm": 0.9511417934769287,
      "learning_rate": 1.4042264121566507e-09,
      "loss": 0.1159,
      "step": 10743
    },
    {
      "epoch": 0.9899110885889344,
      "grad_norm": 0.9365644687505328,
      "learning_rate": 1.3788134110750972e-09,
      "loss": 0.1162,
      "step": 10744
    },
    {
      "epoch": 0.9900032247662045,
      "grad_norm": 0.9530663162797376,
      "learning_rate": 1.3536324072394026e-09,
      "loss": 0.1217,
      "step": 10745
    },
    {
      "epoch": 0.9900953609434745,
      "grad_norm": 0.9721468538247888,
      "learning_rate": 1.3286834029879735e-09,
      "loss": 0.1193,
      "step": 10746
    },
    {
      "epoch": 0.9901874971207445,
      "grad_norm": 0.917636144996345,
      "learning_rate": 1.303966400637291e-09,
      "loss": 0.1153,
      "step": 10747
    },
    {
      "epoch": 0.9902796332980145,
      "grad_norm": 0.8968146551784478,
      "learning_rate": 1.279481402481908e-09,
      "loss": 0.1095,
      "step": 10748
    },
    {
      "epoch": 0.9903717694752845,
      "grad_norm": 0.9204522570969634,
      "learning_rate": 1.255228410795839e-09,
      "loss": 0.1132,
      "step": 10749
    },
    {
      "epoch": 0.9904639056525545,
      "grad_norm": 0.909280443704693,
      "learning_rate": 1.2312074278308939e-09,
      "loss": 0.1053,
      "step": 10750
    },
    {
      "epoch": 0.9905560418298245,
      "grad_norm": 0.999015146759792,
      "learning_rate": 1.2074184558169554e-09,
      "loss": 0.1238,
      "step": 10751
    },
    {
      "epoch": 0.9906481780070945,
      "grad_norm": 1.0173745580641365,
      "learning_rate": 1.1838614969633678e-09,
      "loss": 0.134,
      "step": 10752
    },
    {
      "epoch": 0.9907403141843645,
      "grad_norm": 0.9423136441090145,
      "learning_rate": 1.1605365534569922e-09,
      "loss": 0.1165,
      "step": 10753
    },
    {
      "epoch": 0.9908324503616345,
      "grad_norm": 0.9484240848690269,
      "learning_rate": 1.1374436274635968e-09,
      "loss": 0.1207,
      "step": 10754
    },
    {
      "epoch": 0.9909245865389045,
      "grad_norm": 0.9701778726691909,
      "learning_rate": 1.1145827211278548e-09,
      "loss": 0.1191,
      "step": 10755
    },
    {
      "epoch": 0.9910167227161745,
      "grad_norm": 0.918169085936637,
      "learning_rate": 1.0919538365716797e-09,
      "loss": 0.1095,
      "step": 10756
    },
    {
      "epoch": 0.9911088588934445,
      "grad_norm": 0.9436039067062373,
      "learning_rate": 1.069556975896724e-09,
      "loss": 0.1164,
      "step": 10757
    },
    {
      "epoch": 0.9912009950707145,
      "grad_norm": 0.961966720663876,
      "learning_rate": 1.047392141182435e-09,
      "loss": 0.1157,
      "step": 10758
    },
    {
      "epoch": 0.9912931312479846,
      "grad_norm": 0.9031848710547364,
      "learning_rate": 1.0254593344866115e-09,
      "loss": 0.1156,
      "step": 10759
    },
    {
      "epoch": 0.9913852674252546,
      "grad_norm": 0.9289358118171895,
      "learning_rate": 1.00375855784568e-09,
      "loss": 0.1161,
      "step": 10760
    },
    {
      "epoch": 0.9914774036025246,
      "grad_norm": 0.9280764395282413,
      "learning_rate": 9.822898132749726e-10,
      "loss": 0.1127,
      "step": 10761
    },
    {
      "epoch": 0.9915695397797946,
      "grad_norm": 0.9245548787551704,
      "learning_rate": 9.610531027673398e-10,
      "loss": 0.1099,
      "step": 10762
    },
    {
      "epoch": 0.9916616759570646,
      "grad_norm": 0.9526967095042527,
      "learning_rate": 9.400484282950928e-10,
      "loss": 0.1116,
      "step": 10763
    },
    {
      "epoch": 0.9917538121343346,
      "grad_norm": 0.9584267650838287,
      "learning_rate": 9.192757918083383e-10,
      "loss": 0.1247,
      "step": 10764
    },
    {
      "epoch": 0.9918459483116046,
      "grad_norm": 0.9599737471955169,
      "learning_rate": 8.987351952355338e-10,
      "loss": 0.1188,
      "step": 10765
    },
    {
      "epoch": 0.9919380844888745,
      "grad_norm": 0.9471789520600152,
      "learning_rate": 8.7842664048432e-10,
      "loss": 0.1232,
      "step": 10766
    },
    {
      "epoch": 0.9920302206661445,
      "grad_norm": 0.9560335153255229,
      "learning_rate": 8.58350129440133e-10,
      "loss": 0.123,
      "step": 10767
    },
    {
      "epoch": 0.9921223568434145,
      "grad_norm": 0.9297203583707537,
      "learning_rate": 8.385056639670375e-10,
      "loss": 0.1104,
      "step": 10768
    },
    {
      "epoch": 0.9922144930206845,
      "grad_norm": 0.9557436945001951,
      "learning_rate": 8.188932459077259e-10,
      "loss": 0.1148,
      "step": 10769
    },
    {
      "epoch": 0.9923066291979545,
      "grad_norm": 0.9382086601075487,
      "learning_rate": 7.995128770829641e-10,
      "loss": 0.1229,
      "step": 10770
    },
    {
      "epoch": 0.9923987653752245,
      "grad_norm": 0.9776951290640383,
      "learning_rate": 7.803645592927012e-10,
      "loss": 0.1261,
      "step": 10771
    },
    {
      "epoch": 0.9924909015524946,
      "grad_norm": 0.9234222588432033,
      "learning_rate": 7.614482943144041e-10,
      "loss": 0.1078,
      "step": 10772
    },
    {
      "epoch": 0.9925830377297646,
      "grad_norm": 0.9425926563945666,
      "learning_rate": 7.427640839044458e-10,
      "loss": 0.1165,
      "step": 10773
    },
    {
      "epoch": 0.9926751739070346,
      "grad_norm": 0.9997978213398736,
      "learning_rate": 7.243119297981049e-10,
      "loss": 0.1301,
      "step": 10774
    },
    {
      "epoch": 0.9927673100843046,
      "grad_norm": 0.9622010406970339,
      "learning_rate": 7.060918337081779e-10,
      "loss": 0.1298,
      "step": 10775
    },
    {
      "epoch": 0.9928594462615746,
      "grad_norm": 0.9629025338726946,
      "learning_rate": 6.881037973266447e-10,
      "loss": 0.1085,
      "step": 10776
    },
    {
      "epoch": 0.9929515824388446,
      "grad_norm": 0.9442954815335793,
      "learning_rate": 6.703478223235582e-10,
      "loss": 0.1183,
      "step": 10777
    },
    {
      "epoch": 0.9930437186161146,
      "grad_norm": 0.9778711461998094,
      "learning_rate": 6.528239103478773e-10,
      "loss": 0.1207,
      "step": 10778
    },
    {
      "epoch": 0.9931358547933846,
      "grad_norm": 0.8971965968414244,
      "learning_rate": 6.355320630263561e-10,
      "loss": 0.108,
      "step": 10779
    },
    {
      "epoch": 0.9932279909706546,
      "grad_norm": 0.944982803475713,
      "learning_rate": 6.184722819646549e-10,
      "loss": 0.1075,
      "step": 10780
    },
    {
      "epoch": 0.9933201271479246,
      "grad_norm": 0.9722778893436133,
      "learning_rate": 6.016445687467842e-10,
      "loss": 0.1195,
      "step": 10781
    },
    {
      "epoch": 0.9934122633251946,
      "grad_norm": 0.9633531416564072,
      "learning_rate": 5.850489249351054e-10,
      "loss": 0.1316,
      "step": 10782
    },
    {
      "epoch": 0.9935043995024646,
      "grad_norm": 0.8947891332160514,
      "learning_rate": 5.686853520708857e-10,
      "loss": 0.1036,
      "step": 10783
    },
    {
      "epoch": 0.9935965356797346,
      "grad_norm": 0.8921400585380197,
      "learning_rate": 5.525538516729101e-10,
      "loss": 0.1088,
      "step": 10784
    },
    {
      "epoch": 0.9936886718570046,
      "grad_norm": 0.9192509868974265,
      "learning_rate": 5.366544252397021e-10,
      "loss": 0.1181,
      "step": 10785
    },
    {
      "epoch": 0.9937808080342747,
      "grad_norm": 0.9294284724982631,
      "learning_rate": 5.209870742467482e-10,
      "loss": 0.1192,
      "step": 10786
    },
    {
      "epoch": 0.9938729442115447,
      "grad_norm": 0.9204015586179748,
      "learning_rate": 5.055518001492731e-10,
      "loss": 0.1158,
      "step": 10787
    },
    {
      "epoch": 0.9939650803888147,
      "grad_norm": 0.9626572036280917,
      "learning_rate": 4.903486043802974e-10,
      "loss": 0.1205,
      "step": 10788
    },
    {
      "epoch": 0.9940572165660847,
      "grad_norm": 0.9249171298942248,
      "learning_rate": 4.75377488351747e-10,
      "loss": 0.1172,
      "step": 10789
    },
    {
      "epoch": 0.9941493527433547,
      "grad_norm": 0.916272268645282,
      "learning_rate": 4.6063845345306613e-10,
      "loss": 0.1096,
      "step": 10790
    },
    {
      "epoch": 0.9942414889206247,
      "grad_norm": 0.9606174055026327,
      "learning_rate": 4.4613150105315974e-10,
      "loss": 0.1159,
      "step": 10791
    },
    {
      "epoch": 0.9943336250978947,
      "grad_norm": 0.9366043316921812,
      "learning_rate": 4.3185663249900587e-10,
      "loss": 0.1151,
      "step": 10792
    },
    {
      "epoch": 0.9944257612751647,
      "grad_norm": 0.9470015355101646,
      "learning_rate": 4.1781384911593336e-10,
      "loss": 0.1106,
      "step": 10793
    },
    {
      "epoch": 0.9945178974524347,
      "grad_norm": 0.9553448889331428,
      "learning_rate": 4.040031522078991e-10,
      "loss": 0.1155,
      "step": 10794
    },
    {
      "epoch": 0.9946100336297047,
      "grad_norm": 0.943307344715156,
      "learning_rate": 3.904245430569331e-10,
      "loss": 0.1182,
      "step": 10795
    },
    {
      "epoch": 0.9947021698069747,
      "grad_norm": 0.941980364543447,
      "learning_rate": 3.7707802292424877e-10,
      "loss": 0.1068,
      "step": 10796
    },
    {
      "epoch": 0.9947943059842447,
      "grad_norm": 0.9633082238006477,
      "learning_rate": 3.639635930491325e-10,
      "loss": 0.1169,
      "step": 10797
    },
    {
      "epoch": 0.9948864421615147,
      "grad_norm": 0.9486204676908485,
      "learning_rate": 3.5108125464866636e-10,
      "loss": 0.1149,
      "step": 10798
    },
    {
      "epoch": 0.9949785783387847,
      "grad_norm": 0.9663686096850286,
      "learning_rate": 3.3843100891939316e-10,
      "loss": 0.118,
      "step": 10799
    },
    {
      "epoch": 0.9950707145160548,
      "grad_norm": 0.922095330879719,
      "learning_rate": 3.260128570359289e-10,
      "loss": 0.1164,
      "step": 10800
    },
    {
      "epoch": 0.9951628506933248,
      "grad_norm": 1.000098514142364,
      "learning_rate": 3.138268001509626e-10,
      "loss": 0.1222,
      "step": 10801
    },
    {
      "epoch": 0.9952549868705948,
      "grad_norm": 0.9719465806233039,
      "learning_rate": 3.018728393963666e-10,
      "loss": 0.1253,
      "step": 10802
    },
    {
      "epoch": 0.9953471230478648,
      "grad_norm": 0.9543309585482759,
      "learning_rate": 2.901509758820864e-10,
      "loss": 0.1206,
      "step": 10803
    },
    {
      "epoch": 0.9954392592251348,
      "grad_norm": 0.9013190921660513,
      "learning_rate": 2.786612106961406e-10,
      "loss": 0.11,
      "step": 10804
    },
    {
      "epoch": 0.9955313954024048,
      "grad_norm": 0.9654267379006883,
      "learning_rate": 2.674035449054535e-10,
      "loss": 0.114,
      "step": 10805
    },
    {
      "epoch": 0.9956235315796748,
      "grad_norm": 0.995907441347612,
      "learning_rate": 2.563779795553001e-10,
      "loss": 0.1223,
      "step": 10806
    },
    {
      "epoch": 0.9957156677569448,
      "grad_norm": 0.9689107866786532,
      "learning_rate": 2.455845156695835e-10,
      "loss": 0.1231,
      "step": 10807
    },
    {
      "epoch": 0.9958078039342148,
      "grad_norm": 0.8690683165999983,
      "learning_rate": 2.350231542502801e-10,
      "loss": 0.0981,
      "step": 10808
    },
    {
      "epoch": 0.9958999401114847,
      "grad_norm": 0.9517071622004358,
      "learning_rate": 2.2469389627827188e-10,
      "loss": 0.1242,
      "step": 10809
    },
    {
      "epoch": 0.9959920762887547,
      "grad_norm": 0.9644950560808359,
      "learning_rate": 2.1459674271251397e-10,
      "loss": 0.1179,
      "step": 10810
    },
    {
      "epoch": 0.9960842124660247,
      "grad_norm": 0.9585515695749373,
      "learning_rate": 2.0473169449031217e-10,
      "loss": 0.1181,
      "step": 10811
    },
    {
      "epoch": 0.9961763486432947,
      "grad_norm": 0.8761769302597302,
      "learning_rate": 1.9509875252787803e-10,
      "loss": 0.1071,
      "step": 10812
    },
    {
      "epoch": 0.9962684848205648,
      "grad_norm": 0.9240358594187232,
      "learning_rate": 1.856979177194962e-10,
      "loss": 0.1127,
      "step": 10813
    },
    {
      "epoch": 0.9963606209978348,
      "grad_norm": 0.90524334167604,
      "learning_rate": 1.7652919093807952e-10,
      "loss": 0.1084,
      "step": 10814
    },
    {
      "epoch": 0.9964527571751048,
      "grad_norm": 0.9512308901033537,
      "learning_rate": 1.675925730348915e-10,
      "loss": 0.1181,
      "step": 10815
    },
    {
      "epoch": 0.9965448933523748,
      "grad_norm": 0.8919879468606943,
      "learning_rate": 1.5888806484010143e-10,
      "loss": 0.1092,
      "step": 10816
    },
    {
      "epoch": 0.9966370295296448,
      "grad_norm": 0.928927024723345,
      "learning_rate": 1.5041566716139656e-10,
      "loss": 0.1152,
      "step": 10817
    },
    {
      "epoch": 0.9967291657069148,
      "grad_norm": 0.9682198116348629,
      "learning_rate": 1.4217538078536985e-10,
      "loss": 0.121,
      "step": 10818
    },
    {
      "epoch": 0.9968213018841848,
      "grad_norm": 0.9812596295403685,
      "learning_rate": 1.3416720647779768e-10,
      "loss": 0.1254,
      "step": 10819
    },
    {
      "epoch": 0.9969134380614548,
      "grad_norm": 0.9476046140101004,
      "learning_rate": 1.263911449816968e-10,
      "loss": 0.1228,
      "step": 10820
    },
    {
      "epoch": 0.9970055742387248,
      "grad_norm": 0.8940822513705196,
      "learning_rate": 1.1884719701926727e-10,
      "loss": 0.1175,
      "step": 10821
    },
    {
      "epoch": 0.9970977104159948,
      "grad_norm": 0.9495262015744932,
      "learning_rate": 1.1153536329078229e-10,
      "loss": 0.1171,
      "step": 10822
    },
    {
      "epoch": 0.9971898465932648,
      "grad_norm": 0.9466157440151927,
      "learning_rate": 1.0445564447542078e-10,
      "loss": 0.1192,
      "step": 10823
    },
    {
      "epoch": 0.9972819827705348,
      "grad_norm": 0.9413453778321897,
      "learning_rate": 9.760804123015721e-11,
      "loss": 0.1141,
      "step": 10824
    },
    {
      "epoch": 0.9973741189478048,
      "grad_norm": 0.9226586113738102,
      "learning_rate": 9.099255419114938e-11,
      "loss": 0.1177,
      "step": 10825
    },
    {
      "epoch": 0.9974662551250748,
      "grad_norm": 0.9383011446764592,
      "learning_rate": 8.460918397262818e-11,
      "loss": 0.1128,
      "step": 10826
    },
    {
      "epoch": 0.9975583913023449,
      "grad_norm": 0.9252669138882176,
      "learning_rate": 7.845793116717515e-11,
      "loss": 0.1134,
      "step": 10827
    },
    {
      "epoch": 0.9976505274796149,
      "grad_norm": 0.9388645197912326,
      "learning_rate": 7.253879634600003e-11,
      "loss": 0.1238,
      "step": 10828
    },
    {
      "epoch": 0.9977426636568849,
      "grad_norm": 0.9377529336829813,
      "learning_rate": 6.685178005838567e-11,
      "loss": 0.1114,
      "step": 10829
    },
    {
      "epoch": 0.9978347998341549,
      "grad_norm": 1.0046882927242726,
      "learning_rate": 6.139688283279821e-11,
      "loss": 0.1272,
      "step": 10830
    },
    {
      "epoch": 0.9979269360114249,
      "grad_norm": 0.9156009519149251,
      "learning_rate": 5.617410517549937e-11,
      "loss": 0.1112,
      "step": 10831
    },
    {
      "epoch": 0.9980190721886949,
      "grad_norm": 0.9902312030633188,
      "learning_rate": 5.118344757165661e-11,
      "loss": 0.1272,
      "step": 10832
    },
    {
      "epoch": 0.9981112083659649,
      "grad_norm": 0.9937864237747287,
      "learning_rate": 4.6424910484232924e-11,
      "loss": 0.1351,
      "step": 10833
    },
    {
      "epoch": 0.9982033445432349,
      "grad_norm": 0.9211831585262729,
      "learning_rate": 4.189849435565219e-11,
      "loss": 0.1033,
      "step": 10834
    },
    {
      "epoch": 0.9982954807205049,
      "grad_norm": 0.9613565174736062,
      "learning_rate": 3.7604199605578705e-11,
      "loss": 0.1195,
      "step": 10835
    },
    {
      "epoch": 0.9983876168977749,
      "grad_norm": 0.9636026205660925,
      "learning_rate": 3.35420266328601e-11,
      "loss": 0.1267,
      "step": 10836
    },
    {
      "epoch": 0.9984797530750449,
      "grad_norm": 0.9489533548789341,
      "learning_rate": 2.9711975814972205e-11,
      "loss": 0.1205,
      "step": 10837
    },
    {
      "epoch": 0.9985718892523149,
      "grad_norm": 0.9312987097224975,
      "learning_rate": 2.6114047507463936e-11,
      "loss": 0.1177,
      "step": 10838
    },
    {
      "epoch": 0.9986640254295849,
      "grad_norm": 0.9687758766239608,
      "learning_rate": 2.2748242044234868e-11,
      "loss": 0.1155,
      "step": 10839
    },
    {
      "epoch": 0.998756161606855,
      "grad_norm": 0.9124547506561508,
      "learning_rate": 1.9614559738090345e-11,
      "loss": 0.1206,
      "step": 10840
    },
    {
      "epoch": 0.998848297784125,
      "grad_norm": 0.9012464083753811,
      "learning_rate": 1.671300087935368e-11,
      "loss": 0.1137,
      "step": 10841
    },
    {
      "epoch": 0.998940433961395,
      "grad_norm": 0.9556438025043256,
      "learning_rate": 1.4043565738364184e-11,
      "loss": 0.112,
      "step": 10842
    },
    {
      "epoch": 0.999032570138665,
      "grad_norm": 0.9389183452029211,
      "learning_rate": 1.1606254562146479e-11,
      "loss": 0.1132,
      "step": 10843
    },
    {
      "epoch": 0.999124706315935,
      "grad_norm": 0.906558779678973,
      "learning_rate": 9.401067577463618e-12,
      "loss": 0.1083,
      "step": 10844
    },
    {
      "epoch": 0.999216842493205,
      "grad_norm": 0.9626475810080135,
      "learning_rate": 7.428004988874194e-12,
      "loss": 0.1201,
      "step": 10845
    },
    {
      "epoch": 0.999308978670475,
      "grad_norm": 0.9183400382526548,
      "learning_rate": 5.687066979565003e-12,
      "loss": 0.1133,
      "step": 10846
    },
    {
      "epoch": 0.999401114847745,
      "grad_norm": 0.9157534567793171,
      "learning_rate": 4.178253711351054e-12,
      "loss": 0.1123,
      "step": 10847
    },
    {
      "epoch": 0.999493251025015,
      "grad_norm": 0.9607427044331877,
      "learning_rate": 2.9015653243980034e-12,
      "loss": 0.1194,
      "step": 10848
    },
    {
      "epoch": 0.999585387202285,
      "grad_norm": 0.9133340683724209,
      "learning_rate": 1.8570019369446025e-12,
      "loss": 0.1209,
      "step": 10849
    },
    {
      "epoch": 0.999677523379555,
      "grad_norm": 0.9797017728895512,
      "learning_rate": 1.044563646135366e-12,
      "loss": 0.1213,
      "step": 10850
    },
    {
      "epoch": 0.999769659556825,
      "grad_norm": 0.950547168845273,
      "learning_rate": 4.642505274654596e-13,
      "loss": 0.1109,
      "step": 10851
    },
    {
      "epoch": 0.999861795734095,
      "grad_norm": 0.9546759905442439,
      "learning_rate": 1.1606263450314458e-13,
      "loss": 0.1163,
      "step": 10852
    },
    {
      "epoch": 0.999953931911365,
      "grad_norm": 0.99401938130207,
      "learning_rate": 0.0,
      "loss": 0.1287,
      "step": 10853
    },
    {
      "epoch": 0.999953931911365,
      "step": 10853,
      "total_flos": 1908258935930880.0,
      "train_loss": 0.14796658507510943,
      "train_runtime": 123515.0295,
      "train_samples_per_second": 5.624,
      "train_steps_per_second": 0.088
    }
  ],
  "logging_steps": 1,
  "max_steps": 10853,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 1,
  "save_steps": 500,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": false,
        "should_training_stop": false
      },
      "attributes": {}
    }
  },
  "total_flos": 1908258935930880.0,
  "train_batch_size": 1,
  "trial_name": null,
  "trial_params": null
}