{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 12500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "grad_norm": 3.2635705523930243, "learning_rate": 2.6666666666666667e-08, "loss": 0.3102, "step": 1 }, { "epoch": 0.0, "grad_norm": 3.543236875321121, "learning_rate": 5.3333333333333334e-08, "loss": 0.3172, "step": 2 }, { "epoch": 0.0, "grad_norm": 3.0655688091283118, "learning_rate": 8e-08, "loss": 0.2632, "step": 3 }, { "epoch": 0.0, "grad_norm": 7.081711234511395, "learning_rate": 1.0666666666666667e-07, "loss": 0.8341, "step": 4 }, { "epoch": 0.0, "grad_norm": 4.154098344404168, "learning_rate": 1.3333333333333336e-07, "loss": 0.3314, "step": 5 }, { "epoch": 0.0, "grad_norm": 7.535508933365584, "learning_rate": 1.6e-07, "loss": 0.9176, "step": 6 }, { "epoch": 0.0, "grad_norm": 3.402369845867405, "learning_rate": 1.866666666666667e-07, "loss": 0.3436, "step": 7 }, { "epoch": 0.0, "grad_norm": 3.5114298823570063, "learning_rate": 2.1333333333333334e-07, "loss": 0.3615, "step": 8 }, { "epoch": 0.0, "grad_norm": 7.476628489258035, "learning_rate": 2.4000000000000003e-07, "loss": 0.9509, "step": 9 }, { "epoch": 0.0, "grad_norm": 3.7138745616249826, "learning_rate": 2.666666666666667e-07, "loss": 0.3222, "step": 10 }, { "epoch": 0.0, "grad_norm": 3.8609823589881858, "learning_rate": 2.9333333333333337e-07, "loss": 0.2921, "step": 11 }, { "epoch": 0.0, "grad_norm": 3.075192583236817, "learning_rate": 3.2e-07, "loss": 0.3245, "step": 12 }, { "epoch": 0.0, "grad_norm": 3.998118821182612, "learning_rate": 3.466666666666667e-07, "loss": 0.3024, "step": 13 }, { "epoch": 0.0, "grad_norm": 4.045818795124452, "learning_rate": 3.733333333333334e-07, "loss": 0.3282, "step": 14 }, { "epoch": 0.0, "grad_norm": 7.973244687568935, "learning_rate": 4.0000000000000003e-07, "loss": 0.7978, "step": 15 }, { "epoch": 0.0, "grad_norm": 3.9808464485960586, "learning_rate": 4.266666666666667e-07, "loss": 0.3279, "step": 16 }, { "epoch": 0.0, "grad_norm": 3.4364859666828984, "learning_rate": 4.533333333333334e-07, "loss": 0.2966, "step": 17 }, { "epoch": 0.0, "grad_norm": 3.430219771069506, "learning_rate": 4.800000000000001e-07, "loss": 0.3061, "step": 18 }, { "epoch": 0.0, "grad_norm": 3.1459749927364595, "learning_rate": 5.066666666666667e-07, "loss": 0.3716, "step": 19 }, { "epoch": 0.0, "grad_norm": 2.583293257964736, "learning_rate": 5.333333333333335e-07, "loss": 0.2714, "step": 20 }, { "epoch": 0.0, "grad_norm": 2.820393113611824, "learning_rate": 5.6e-07, "loss": 0.2903, "step": 21 }, { "epoch": 0.0, "grad_norm": 7.996635641133045, "learning_rate": 5.866666666666667e-07, "loss": 0.9355, "step": 22 }, { "epoch": 0.0, "grad_norm": 2.8556690346481117, "learning_rate": 6.133333333333333e-07, "loss": 0.3388, "step": 23 }, { "epoch": 0.0, "grad_norm": 2.596995774371009, "learning_rate": 6.4e-07, "loss": 0.3523, "step": 24 }, { "epoch": 0.0, "grad_norm": 2.2831190744445444, "learning_rate": 6.666666666666667e-07, "loss": 0.2908, "step": 25 }, { "epoch": 0.0, "grad_norm": 2.599652572249794, "learning_rate": 6.933333333333334e-07, "loss": 0.3018, "step": 26 }, { "epoch": 0.0, "grad_norm": 2.690583140200056, "learning_rate": 7.2e-07, "loss": 0.3214, "step": 27 }, { "epoch": 0.0, "grad_norm": 5.888226221915256, "learning_rate": 7.466666666666668e-07, "loss": 0.7488, "step": 28 }, { "epoch": 0.0, "grad_norm": 2.248420029290666, "learning_rate": 7.733333333333335e-07, "loss": 0.292, "step": 29 }, { "epoch": 0.0, "grad_norm": 5.166642960970659, "learning_rate": 8.000000000000001e-07, "loss": 0.8535, "step": 30 }, { "epoch": 0.0, "grad_norm": 2.153730429443561, "learning_rate": 8.266666666666668e-07, "loss": 0.2999, "step": 31 }, { "epoch": 0.0, "grad_norm": 2.045491499674651, "learning_rate": 8.533333333333334e-07, "loss": 0.2379, "step": 32 }, { "epoch": 0.0, "grad_norm": 2.4058724593030694, "learning_rate": 8.8e-07, "loss": 0.2759, "step": 33 }, { "epoch": 0.0, "grad_norm": 2.1177615847510913, "learning_rate": 9.066666666666668e-07, "loss": 0.2711, "step": 34 }, { "epoch": 0.0, "grad_norm": 2.3992686930361855, "learning_rate": 9.333333333333334e-07, "loss": 0.2788, "step": 35 }, { "epoch": 0.0, "grad_norm": 2.28546520521007, "learning_rate": 9.600000000000001e-07, "loss": 0.239, "step": 36 }, { "epoch": 0.0, "grad_norm": 2.3080986760989655, "learning_rate": 9.866666666666668e-07, "loss": 0.2883, "step": 37 }, { "epoch": 0.0, "grad_norm": 2.662264753934099, "learning_rate": 1.0133333333333333e-06, "loss": 0.2807, "step": 38 }, { "epoch": 0.0, "grad_norm": 2.175493921197926, "learning_rate": 1.04e-06, "loss": 0.2659, "step": 39 }, { "epoch": 0.0, "grad_norm": 2.1416533996119678, "learning_rate": 1.066666666666667e-06, "loss": 0.308, "step": 40 }, { "epoch": 0.0, "grad_norm": 1.9403011675836694, "learning_rate": 1.0933333333333334e-06, "loss": 0.2768, "step": 41 }, { "epoch": 0.0, "grad_norm": 5.662490627051228, "learning_rate": 1.12e-06, "loss": 0.7614, "step": 42 }, { "epoch": 0.0, "grad_norm": 6.7966519208521365, "learning_rate": 1.1466666666666668e-06, "loss": 0.8204, "step": 43 }, { "epoch": 0.0, "grad_norm": 1.9809096759810592, "learning_rate": 1.1733333333333335e-06, "loss": 0.2237, "step": 44 }, { "epoch": 0.0, "grad_norm": 7.47988732627334, "learning_rate": 1.2000000000000002e-06, "loss": 0.8607, "step": 45 }, { "epoch": 0.0, "grad_norm": 2.296376364024676, "learning_rate": 1.2266666666666666e-06, "loss": 0.2883, "step": 46 }, { "epoch": 0.0, "grad_norm": 2.323464926411196, "learning_rate": 1.2533333333333333e-06, "loss": 0.2503, "step": 47 }, { "epoch": 0.0, "grad_norm": 2.422466514928935, "learning_rate": 1.28e-06, "loss": 0.2873, "step": 48 }, { "epoch": 0.0, "grad_norm": 2.1864023301873354, "learning_rate": 1.3066666666666667e-06, "loss": 0.2343, "step": 49 }, { "epoch": 0.0, "grad_norm": 1.9463373428273896, "learning_rate": 1.3333333333333334e-06, "loss": 0.2338, "step": 50 }, { "epoch": 0.0, "grad_norm": 2.1405490757192798, "learning_rate": 1.3600000000000001e-06, "loss": 0.2221, "step": 51 }, { "epoch": 0.0, "grad_norm": 2.0836180918551928, "learning_rate": 1.3866666666666668e-06, "loss": 0.2491, "step": 52 }, { "epoch": 0.0, "grad_norm": 2.100488803126805, "learning_rate": 1.4133333333333335e-06, "loss": 0.2822, "step": 53 }, { "epoch": 0.0, "grad_norm": 2.032919241222733, "learning_rate": 1.44e-06, "loss": 0.2195, "step": 54 }, { "epoch": 0.0, "grad_norm": 5.815257401523501, "learning_rate": 1.4666666666666669e-06, "loss": 0.7812, "step": 55 }, { "epoch": 0.0, "grad_norm": 2.1564309626298725, "learning_rate": 1.4933333333333336e-06, "loss": 0.2469, "step": 56 }, { "epoch": 0.0, "grad_norm": 1.9480887603542227, "learning_rate": 1.52e-06, "loss": 0.231, "step": 57 }, { "epoch": 0.0, "grad_norm": 2.011965218482532, "learning_rate": 1.546666666666667e-06, "loss": 0.2705, "step": 58 }, { "epoch": 0.0, "grad_norm": 2.622723478044898, "learning_rate": 1.5733333333333334e-06, "loss": 0.2621, "step": 59 }, { "epoch": 0.0, "grad_norm": 2.14774348381317, "learning_rate": 1.6000000000000001e-06, "loss": 0.2304, "step": 60 }, { "epoch": 0.0, "grad_norm": 2.1845766304625953, "learning_rate": 1.6266666666666666e-06, "loss": 0.2012, "step": 61 }, { "epoch": 0.0, "grad_norm": 2.066296298115803, "learning_rate": 1.6533333333333335e-06, "loss": 0.2328, "step": 62 }, { "epoch": 0.01, "grad_norm": 2.148557387257148, "learning_rate": 1.6800000000000002e-06, "loss": 0.215, "step": 63 }, { "epoch": 0.01, "grad_norm": 2.2476637406213626, "learning_rate": 1.7066666666666667e-06, "loss": 0.26, "step": 64 }, { "epoch": 0.01, "grad_norm": 2.0641999839834813, "learning_rate": 1.7333333333333336e-06, "loss": 0.2399, "step": 65 }, { "epoch": 0.01, "grad_norm": 2.0376118977375186, "learning_rate": 1.76e-06, "loss": 0.2016, "step": 66 }, { "epoch": 0.01, "grad_norm": 2.0274034656824056, "learning_rate": 1.7866666666666668e-06, "loss": 0.228, "step": 67 }, { "epoch": 0.01, "grad_norm": 2.2957221670003958, "learning_rate": 1.8133333333333337e-06, "loss": 0.2641, "step": 68 }, { "epoch": 0.01, "grad_norm": 2.041701984504308, "learning_rate": 1.8400000000000002e-06, "loss": 0.1948, "step": 69 }, { "epoch": 0.01, "grad_norm": 2.037906356469153, "learning_rate": 1.8666666666666669e-06, "loss": 0.242, "step": 70 }, { "epoch": 0.01, "grad_norm": 2.0212291033145426, "learning_rate": 1.8933333333333333e-06, "loss": 0.2195, "step": 71 }, { "epoch": 0.01, "grad_norm": 2.005787621345112, "learning_rate": 1.9200000000000003e-06, "loss": 0.2436, "step": 72 }, { "epoch": 0.01, "grad_norm": 2.110313066520128, "learning_rate": 1.9466666666666665e-06, "loss": 0.2097, "step": 73 }, { "epoch": 0.01, "grad_norm": 2.1635446281024207, "learning_rate": 1.9733333333333336e-06, "loss": 0.1888, "step": 74 }, { "epoch": 0.01, "grad_norm": 5.200565820752455, "learning_rate": 2.0000000000000003e-06, "loss": 0.7992, "step": 75 }, { "epoch": 0.01, "grad_norm": 2.193104289614479, "learning_rate": 2.0266666666666666e-06, "loss": 0.2297, "step": 76 }, { "epoch": 0.01, "grad_norm": 1.9612852870447144, "learning_rate": 2.0533333333333337e-06, "loss": 0.208, "step": 77 }, { "epoch": 0.01, "grad_norm": 5.391661786594564, "learning_rate": 2.08e-06, "loss": 0.7669, "step": 78 }, { "epoch": 0.01, "grad_norm": 2.11332632538686, "learning_rate": 2.1066666666666667e-06, "loss": 0.2424, "step": 79 }, { "epoch": 0.01, "grad_norm": 7.260362688106463, "learning_rate": 2.133333333333334e-06, "loss": 0.821, "step": 80 }, { "epoch": 0.01, "grad_norm": 6.803596009402048, "learning_rate": 2.16e-06, "loss": 0.6818, "step": 81 }, { "epoch": 0.01, "grad_norm": 1.991407922507565, "learning_rate": 2.1866666666666668e-06, "loss": 0.2306, "step": 82 }, { "epoch": 0.01, "grad_norm": 7.187669567898178, "learning_rate": 2.2133333333333335e-06, "loss": 0.7662, "step": 83 }, { "epoch": 0.01, "grad_norm": 2.2267663175830386, "learning_rate": 2.24e-06, "loss": 0.2389, "step": 84 }, { "epoch": 0.01, "grad_norm": 1.9644457331448772, "learning_rate": 2.266666666666667e-06, "loss": 0.2018, "step": 85 }, { "epoch": 0.01, "grad_norm": 2.0324819383736497, "learning_rate": 2.2933333333333335e-06, "loss": 0.2407, "step": 86 }, { "epoch": 0.01, "grad_norm": 8.84810247901649, "learning_rate": 2.3200000000000002e-06, "loss": 0.5149, "step": 87 }, { "epoch": 0.01, "grad_norm": 6.75065201318718, "learning_rate": 2.346666666666667e-06, "loss": 0.7061, "step": 88 }, { "epoch": 0.01, "grad_norm": 5.470538421286546, "learning_rate": 2.3733333333333336e-06, "loss": 0.829, "step": 89 }, { "epoch": 0.01, "grad_norm": 5.583734329121602, "learning_rate": 2.4000000000000003e-06, "loss": 0.6821, "step": 90 }, { "epoch": 0.01, "grad_norm": 5.099338441420322, "learning_rate": 2.426666666666667e-06, "loss": 0.9718, "step": 91 }, { "epoch": 0.01, "grad_norm": 2.3324966371105424, "learning_rate": 2.4533333333333333e-06, "loss": 0.2248, "step": 92 }, { "epoch": 0.01, "grad_norm": 2.016810889780768, "learning_rate": 2.4800000000000004e-06, "loss": 0.2584, "step": 93 }, { "epoch": 0.01, "grad_norm": 2.1304257948220604, "learning_rate": 2.5066666666666667e-06, "loss": 0.2196, "step": 94 }, { "epoch": 0.01, "grad_norm": 2.0180392724288096, "learning_rate": 2.5333333333333338e-06, "loss": 0.2055, "step": 95 }, { "epoch": 0.01, "grad_norm": 2.1615753822613946, "learning_rate": 2.56e-06, "loss": 0.2371, "step": 96 }, { "epoch": 0.01, "grad_norm": 2.0803764832105878, "learning_rate": 2.5866666666666667e-06, "loss": 0.2408, "step": 97 }, { "epoch": 0.01, "grad_norm": 2.2928421674371715, "learning_rate": 2.6133333333333334e-06, "loss": 0.2244, "step": 98 }, { "epoch": 0.01, "grad_norm": 2.0523804437065363, "learning_rate": 2.64e-06, "loss": 0.277, "step": 99 }, { "epoch": 0.01, "grad_norm": 2.2286435641079323, "learning_rate": 2.666666666666667e-06, "loss": 0.2225, "step": 100 }, { "epoch": 0.01, "grad_norm": 2.2962715373714495, "learning_rate": 2.6933333333333335e-06, "loss": 0.2735, "step": 101 }, { "epoch": 0.01, "grad_norm": 2.285439769931514, "learning_rate": 2.7200000000000002e-06, "loss": 0.2372, "step": 102 }, { "epoch": 0.01, "grad_norm": 1.9342386464302803, "learning_rate": 2.746666666666667e-06, "loss": 0.208, "step": 103 }, { "epoch": 0.01, "grad_norm": 2.1503197007060293, "learning_rate": 2.7733333333333336e-06, "loss": 0.2809, "step": 104 }, { "epoch": 0.01, "grad_norm": 1.7893235217469743, "learning_rate": 2.8000000000000003e-06, "loss": 0.1758, "step": 105 }, { "epoch": 0.01, "grad_norm": 1.906007462604689, "learning_rate": 2.826666666666667e-06, "loss": 0.2484, "step": 106 }, { "epoch": 0.01, "grad_norm": 1.9786598638462265, "learning_rate": 2.8533333333333337e-06, "loss": 0.2114, "step": 107 }, { "epoch": 0.01, "grad_norm": 2.049152819107196, "learning_rate": 2.88e-06, "loss": 0.2237, "step": 108 }, { "epoch": 0.01, "grad_norm": 1.6661362139299618, "learning_rate": 2.906666666666667e-06, "loss": 0.2113, "step": 109 }, { "epoch": 0.01, "grad_norm": 2.2480093172022495, "learning_rate": 2.9333333333333338e-06, "loss": 0.2459, "step": 110 }, { "epoch": 0.01, "grad_norm": 1.927738641246031, "learning_rate": 2.96e-06, "loss": 0.2035, "step": 111 }, { "epoch": 0.01, "grad_norm": 1.8857780825353894, "learning_rate": 2.986666666666667e-06, "loss": 0.1594, "step": 112 }, { "epoch": 0.01, "grad_norm": 1.9963991331191024, "learning_rate": 3.013333333333334e-06, "loss": 0.2328, "step": 113 }, { "epoch": 0.01, "grad_norm": 1.7325046061943692, "learning_rate": 3.04e-06, "loss": 0.2022, "step": 114 }, { "epoch": 0.01, "grad_norm": 6.970097203736795, "learning_rate": 3.066666666666667e-06, "loss": 0.677, "step": 115 }, { "epoch": 0.01, "grad_norm": 7.926852729565814, "learning_rate": 3.093333333333334e-06, "loss": 0.5464, "step": 116 }, { "epoch": 0.01, "grad_norm": 2.0605927676465607, "learning_rate": 3.12e-06, "loss": 0.198, "step": 117 }, { "epoch": 0.01, "grad_norm": 2.1104917300712245, "learning_rate": 3.146666666666667e-06, "loss": 0.2352, "step": 118 }, { "epoch": 0.01, "grad_norm": 1.8787240286872835, "learning_rate": 3.173333333333334e-06, "loss": 0.2465, "step": 119 }, { "epoch": 0.01, "grad_norm": 1.9218432020497223, "learning_rate": 3.2000000000000003e-06, "loss": 0.2031, "step": 120 }, { "epoch": 0.01, "grad_norm": 2.1772188471633362, "learning_rate": 3.226666666666667e-06, "loss": 0.2051, "step": 121 }, { "epoch": 0.01, "grad_norm": 2.328005319469429, "learning_rate": 3.2533333333333332e-06, "loss": 0.2153, "step": 122 }, { "epoch": 0.01, "grad_norm": 1.925604145136642, "learning_rate": 3.2800000000000004e-06, "loss": 0.25, "step": 123 }, { "epoch": 0.01, "grad_norm": 1.9820026322447761, "learning_rate": 3.306666666666667e-06, "loss": 0.1943, "step": 124 }, { "epoch": 0.01, "grad_norm": 2.0411928414856777, "learning_rate": 3.3333333333333333e-06, "loss": 0.1685, "step": 125 }, { "epoch": 0.01, "grad_norm": 1.8652352360105509, "learning_rate": 3.3600000000000004e-06, "loss": 0.1883, "step": 126 }, { "epoch": 0.01, "grad_norm": 2.2516584878176875, "learning_rate": 3.386666666666667e-06, "loss": 0.1727, "step": 127 }, { "epoch": 0.01, "grad_norm": 7.087264742735575, "learning_rate": 3.4133333333333334e-06, "loss": 0.793, "step": 128 }, { "epoch": 0.01, "grad_norm": 4.996973052396692, "learning_rate": 3.44e-06, "loss": 0.6537, "step": 129 }, { "epoch": 0.01, "grad_norm": 2.3405875842702155, "learning_rate": 3.4666666666666672e-06, "loss": 0.2419, "step": 130 }, { "epoch": 0.01, "grad_norm": 2.035032668627448, "learning_rate": 3.4933333333333335e-06, "loss": 0.1946, "step": 131 }, { "epoch": 0.01, "grad_norm": 2.043677962988261, "learning_rate": 3.52e-06, "loss": 0.2255, "step": 132 }, { "epoch": 0.01, "grad_norm": 2.178344339290289, "learning_rate": 3.5466666666666673e-06, "loss": 0.2287, "step": 133 }, { "epoch": 0.01, "grad_norm": 1.9821123059862547, "learning_rate": 3.5733333333333336e-06, "loss": 0.1983, "step": 134 }, { "epoch": 0.01, "grad_norm": 2.122660059246671, "learning_rate": 3.6000000000000003e-06, "loss": 0.2235, "step": 135 }, { "epoch": 0.01, "grad_norm": 1.862156009599487, "learning_rate": 3.6266666666666674e-06, "loss": 0.1862, "step": 136 }, { "epoch": 0.01, "grad_norm": 2.209332983222699, "learning_rate": 3.6533333333333336e-06, "loss": 0.2381, "step": 137 }, { "epoch": 0.01, "grad_norm": 2.3148626754621127, "learning_rate": 3.6800000000000003e-06, "loss": 0.229, "step": 138 }, { "epoch": 0.01, "grad_norm": 4.468563994514957, "learning_rate": 3.7066666666666666e-06, "loss": 0.5659, "step": 139 }, { "epoch": 0.01, "grad_norm": 2.231979066487987, "learning_rate": 3.7333333333333337e-06, "loss": 0.2475, "step": 140 }, { "epoch": 0.01, "grad_norm": 2.0539829331885677, "learning_rate": 3.7600000000000004e-06, "loss": 0.2088, "step": 141 }, { "epoch": 0.01, "grad_norm": 1.9101853584281967, "learning_rate": 3.7866666666666667e-06, "loss": 0.1787, "step": 142 }, { "epoch": 0.01, "grad_norm": 1.7549572650718648, "learning_rate": 3.813333333333334e-06, "loss": 0.1745, "step": 143 }, { "epoch": 0.01, "grad_norm": 1.9917671770254983, "learning_rate": 3.8400000000000005e-06, "loss": 0.2373, "step": 144 }, { "epoch": 0.01, "grad_norm": 1.9719928027465417, "learning_rate": 3.866666666666667e-06, "loss": 0.1964, "step": 145 }, { "epoch": 0.01, "grad_norm": 2.0200700893787116, "learning_rate": 3.893333333333333e-06, "loss": 0.2487, "step": 146 }, { "epoch": 0.01, "grad_norm": 1.9704671229709034, "learning_rate": 3.920000000000001e-06, "loss": 0.2275, "step": 147 }, { "epoch": 0.01, "grad_norm": 1.9949728915250518, "learning_rate": 3.946666666666667e-06, "loss": 0.2455, "step": 148 }, { "epoch": 0.01, "grad_norm": 5.183427935948576, "learning_rate": 3.973333333333333e-06, "loss": 0.4523, "step": 149 }, { "epoch": 0.01, "grad_norm": 1.8543991607502426, "learning_rate": 4.000000000000001e-06, "loss": 0.2223, "step": 150 }, { "epoch": 0.01, "grad_norm": 1.8723747071389218, "learning_rate": 4.026666666666667e-06, "loss": 0.1991, "step": 151 }, { "epoch": 0.01, "grad_norm": 1.9645684725537398, "learning_rate": 4.053333333333333e-06, "loss": 0.1986, "step": 152 }, { "epoch": 0.01, "grad_norm": 1.9654853771367395, "learning_rate": 4.08e-06, "loss": 0.2294, "step": 153 }, { "epoch": 0.01, "grad_norm": 2.312535389212955, "learning_rate": 4.1066666666666674e-06, "loss": 0.2392, "step": 154 }, { "epoch": 0.01, "grad_norm": 1.9576843768103063, "learning_rate": 4.133333333333333e-06, "loss": 0.236, "step": 155 }, { "epoch": 0.01, "grad_norm": 2.1319457853387407, "learning_rate": 4.16e-06, "loss": 0.2175, "step": 156 }, { "epoch": 0.01, "grad_norm": 7.444462809770049, "learning_rate": 4.1866666666666675e-06, "loss": 0.8498, "step": 157 }, { "epoch": 0.01, "grad_norm": 2.0619876961010677, "learning_rate": 4.213333333333333e-06, "loss": 0.2348, "step": 158 }, { "epoch": 0.01, "grad_norm": 2.0771362926774257, "learning_rate": 4.24e-06, "loss": 0.1975, "step": 159 }, { "epoch": 0.01, "grad_norm": 2.0444600557386154, "learning_rate": 4.266666666666668e-06, "loss": 0.2139, "step": 160 }, { "epoch": 0.01, "grad_norm": 2.130966379197011, "learning_rate": 4.2933333333333334e-06, "loss": 0.2686, "step": 161 }, { "epoch": 0.01, "grad_norm": 1.8898003382530546, "learning_rate": 4.32e-06, "loss": 0.2184, "step": 162 }, { "epoch": 0.01, "grad_norm": 2.2166378958169264, "learning_rate": 4.346666666666667e-06, "loss": 0.2192, "step": 163 }, { "epoch": 0.01, "grad_norm": 8.055870577216373, "learning_rate": 4.3733333333333335e-06, "loss": 0.6057, "step": 164 }, { "epoch": 0.01, "grad_norm": 2.127915791381282, "learning_rate": 4.4e-06, "loss": 0.2716, "step": 165 }, { "epoch": 0.01, "grad_norm": 8.367786962086758, "learning_rate": 4.426666666666667e-06, "loss": 0.7017, "step": 166 }, { "epoch": 0.01, "grad_norm": 2.1641741065311466, "learning_rate": 4.453333333333334e-06, "loss": 0.2736, "step": 167 }, { "epoch": 0.01, "grad_norm": 1.8708115322718382, "learning_rate": 4.48e-06, "loss": 0.1873, "step": 168 }, { "epoch": 0.01, "grad_norm": 2.0197250826297384, "learning_rate": 4.506666666666667e-06, "loss": 0.2603, "step": 169 }, { "epoch": 0.01, "grad_norm": 2.12178576784744, "learning_rate": 4.533333333333334e-06, "loss": 0.245, "step": 170 }, { "epoch": 0.01, "grad_norm": 2.0097943872195185, "learning_rate": 4.56e-06, "loss": 0.2111, "step": 171 }, { "epoch": 0.01, "grad_norm": 2.102553739297998, "learning_rate": 4.586666666666667e-06, "loss": 0.2364, "step": 172 }, { "epoch": 0.01, "grad_norm": 8.401209297673534, "learning_rate": 4.613333333333334e-06, "loss": 0.7021, "step": 173 }, { "epoch": 0.01, "grad_norm": 2.2145803698424196, "learning_rate": 4.6400000000000005e-06, "loss": 0.2344, "step": 174 }, { "epoch": 0.01, "grad_norm": 1.8939923366942533, "learning_rate": 4.666666666666667e-06, "loss": 0.1737, "step": 175 }, { "epoch": 0.01, "grad_norm": 1.7960576034922373, "learning_rate": 4.693333333333334e-06, "loss": 0.1825, "step": 176 }, { "epoch": 0.01, "grad_norm": 1.9298842821684317, "learning_rate": 4.7200000000000005e-06, "loss": 0.229, "step": 177 }, { "epoch": 0.01, "grad_norm": 2.1151162193087947, "learning_rate": 4.746666666666667e-06, "loss": 0.2215, "step": 178 }, { "epoch": 0.01, "grad_norm": 1.6525488005070617, "learning_rate": 4.773333333333334e-06, "loss": 0.1643, "step": 179 }, { "epoch": 0.01, "grad_norm": 2.1236553322274374, "learning_rate": 4.800000000000001e-06, "loss": 0.2302, "step": 180 }, { "epoch": 0.01, "grad_norm": 1.7230232090898925, "learning_rate": 4.826666666666667e-06, "loss": 0.1685, "step": 181 }, { "epoch": 0.01, "grad_norm": 2.143435168699805, "learning_rate": 4.853333333333334e-06, "loss": 0.1821, "step": 182 }, { "epoch": 0.01, "grad_norm": 2.024389744687161, "learning_rate": 4.880000000000001e-06, "loss": 0.2364, "step": 183 }, { "epoch": 0.01, "grad_norm": 2.1772637853759935, "learning_rate": 4.9066666666666666e-06, "loss": 0.2518, "step": 184 }, { "epoch": 0.01, "grad_norm": 1.940779281076909, "learning_rate": 4.933333333333334e-06, "loss": 0.2328, "step": 185 }, { "epoch": 0.01, "grad_norm": 1.9820568269378853, "learning_rate": 4.960000000000001e-06, "loss": 0.2611, "step": 186 }, { "epoch": 0.01, "grad_norm": 1.9456026666072652, "learning_rate": 4.986666666666667e-06, "loss": 0.1822, "step": 187 }, { "epoch": 0.02, "grad_norm": 1.9269335908419998, "learning_rate": 5.013333333333333e-06, "loss": 0.1799, "step": 188 }, { "epoch": 0.02, "grad_norm": 1.8155386201438246, "learning_rate": 5.04e-06, "loss": 0.196, "step": 189 }, { "epoch": 0.02, "grad_norm": 1.9008726008303274, "learning_rate": 5.0666666666666676e-06, "loss": 0.2435, "step": 190 }, { "epoch": 0.02, "grad_norm": 6.827807938534924, "learning_rate": 5.093333333333333e-06, "loss": 0.4753, "step": 191 }, { "epoch": 0.02, "grad_norm": 2.011283711428876, "learning_rate": 5.12e-06, "loss": 0.2263, "step": 192 }, { "epoch": 0.02, "grad_norm": 1.9527270517091515, "learning_rate": 5.146666666666668e-06, "loss": 0.2415, "step": 193 }, { "epoch": 0.02, "grad_norm": 1.871668931176077, "learning_rate": 5.1733333333333335e-06, "loss": 0.1864, "step": 194 }, { "epoch": 0.02, "grad_norm": 1.9831175773285001, "learning_rate": 5.2e-06, "loss": 0.2285, "step": 195 }, { "epoch": 0.02, "grad_norm": 1.9719109856763253, "learning_rate": 5.226666666666667e-06, "loss": 0.252, "step": 196 }, { "epoch": 0.02, "grad_norm": 2.0807459864555495, "learning_rate": 5.2533333333333336e-06, "loss": 0.2228, "step": 197 }, { "epoch": 0.02, "grad_norm": 1.8797229209078343, "learning_rate": 5.28e-06, "loss": 0.2275, "step": 198 }, { "epoch": 0.02, "grad_norm": 2.08389250069845, "learning_rate": 5.306666666666667e-06, "loss": 0.2272, "step": 199 }, { "epoch": 0.02, "grad_norm": 1.8425804995374695, "learning_rate": 5.333333333333334e-06, "loss": 0.2299, "step": 200 }, { "epoch": 0.02, "grad_norm": 2.0385319681393197, "learning_rate": 5.36e-06, "loss": 0.2434, "step": 201 }, { "epoch": 0.02, "grad_norm": 2.1102651455026393, "learning_rate": 5.386666666666667e-06, "loss": 0.2121, "step": 202 }, { "epoch": 0.02, "grad_norm": 1.9422624276384832, "learning_rate": 5.413333333333334e-06, "loss": 0.2092, "step": 203 }, { "epoch": 0.02, "grad_norm": 2.0720994845225444, "learning_rate": 5.4400000000000004e-06, "loss": 0.2313, "step": 204 }, { "epoch": 0.02, "grad_norm": 1.921416678358997, "learning_rate": 5.466666666666667e-06, "loss": 0.2433, "step": 205 }, { "epoch": 0.02, "grad_norm": 1.797567743632961, "learning_rate": 5.493333333333334e-06, "loss": 0.1871, "step": 206 }, { "epoch": 0.02, "grad_norm": 1.9465412885408135, "learning_rate": 5.5200000000000005e-06, "loss": 0.2206, "step": 207 }, { "epoch": 0.02, "grad_norm": 1.9013536989372188, "learning_rate": 5.546666666666667e-06, "loss": 0.2297, "step": 208 }, { "epoch": 0.02, "grad_norm": 1.9389930349578413, "learning_rate": 5.573333333333334e-06, "loss": 0.215, "step": 209 }, { "epoch": 0.02, "grad_norm": 1.975104964930779, "learning_rate": 5.600000000000001e-06, "loss": 0.2737, "step": 210 }, { "epoch": 0.02, "grad_norm": 2.168711960577819, "learning_rate": 5.626666666666667e-06, "loss": 0.1783, "step": 211 }, { "epoch": 0.02, "grad_norm": 1.9704932793281564, "learning_rate": 5.653333333333334e-06, "loss": 0.227, "step": 212 }, { "epoch": 0.02, "grad_norm": 2.376518860409301, "learning_rate": 5.68e-06, "loss": 0.238, "step": 213 }, { "epoch": 0.02, "grad_norm": 1.8730662945261776, "learning_rate": 5.706666666666667e-06, "loss": 0.2194, "step": 214 }, { "epoch": 0.02, "grad_norm": 2.0232295140895715, "learning_rate": 5.733333333333334e-06, "loss": 0.2156, "step": 215 }, { "epoch": 0.02, "grad_norm": 2.5439796923408347, "learning_rate": 5.76e-06, "loss": 0.2265, "step": 216 }, { "epoch": 0.02, "grad_norm": 1.8615861537301621, "learning_rate": 5.7866666666666674e-06, "loss": 0.1879, "step": 217 }, { "epoch": 0.02, "grad_norm": 2.200188694629253, "learning_rate": 5.813333333333334e-06, "loss": 0.2444, "step": 218 }, { "epoch": 0.02, "grad_norm": 1.8304717092554472, "learning_rate": 5.84e-06, "loss": 0.2002, "step": 219 }, { "epoch": 0.02, "grad_norm": 2.1521566647861547, "learning_rate": 5.8666666666666675e-06, "loss": 0.2095, "step": 220 }, { "epoch": 0.02, "grad_norm": 1.8073865127216844, "learning_rate": 5.893333333333334e-06, "loss": 0.2227, "step": 221 }, { "epoch": 0.02, "grad_norm": 2.024782578359343, "learning_rate": 5.92e-06, "loss": 0.2213, "step": 222 }, { "epoch": 0.02, "grad_norm": 1.9713612411625212, "learning_rate": 5.946666666666668e-06, "loss": 0.2345, "step": 223 }, { "epoch": 0.02, "grad_norm": 2.1359137855094446, "learning_rate": 5.973333333333334e-06, "loss": 0.1907, "step": 224 }, { "epoch": 0.02, "grad_norm": 1.9426893065862894, "learning_rate": 6e-06, "loss": 0.2361, "step": 225 }, { "epoch": 0.02, "grad_norm": 2.0072523715793786, "learning_rate": 6.026666666666668e-06, "loss": 0.2735, "step": 226 }, { "epoch": 0.02, "grad_norm": 1.8592934742515896, "learning_rate": 6.0533333333333335e-06, "loss": 0.211, "step": 227 }, { "epoch": 0.02, "grad_norm": 7.548305143394406, "learning_rate": 6.08e-06, "loss": 0.8559, "step": 228 }, { "epoch": 0.02, "grad_norm": 7.11879905460239, "learning_rate": 6.106666666666668e-06, "loss": 0.7902, "step": 229 }, { "epoch": 0.02, "grad_norm": 1.9948479210724899, "learning_rate": 6.133333333333334e-06, "loss": 0.2324, "step": 230 }, { "epoch": 0.02, "grad_norm": 2.221209470421269, "learning_rate": 6.16e-06, "loss": 0.2747, "step": 231 }, { "epoch": 0.02, "grad_norm": 1.9617241424388598, "learning_rate": 6.186666666666668e-06, "loss": 0.2064, "step": 232 }, { "epoch": 0.02, "grad_norm": 1.9546682934109598, "learning_rate": 6.213333333333334e-06, "loss": 0.2083, "step": 233 }, { "epoch": 0.02, "grad_norm": 1.8838853737186432, "learning_rate": 6.24e-06, "loss": 0.2207, "step": 234 }, { "epoch": 0.02, "grad_norm": 1.757184860971195, "learning_rate": 6.266666666666668e-06, "loss": 0.1955, "step": 235 }, { "epoch": 0.02, "grad_norm": 1.8738247110019026, "learning_rate": 6.293333333333334e-06, "loss": 0.192, "step": 236 }, { "epoch": 0.02, "grad_norm": 5.330828649974647, "learning_rate": 6.3200000000000005e-06, "loss": 0.5612, "step": 237 }, { "epoch": 0.02, "grad_norm": 1.8771857797349003, "learning_rate": 6.346666666666668e-06, "loss": 0.2176, "step": 238 }, { "epoch": 0.02, "grad_norm": 1.8658177829772218, "learning_rate": 6.373333333333334e-06, "loss": 0.2408, "step": 239 }, { "epoch": 0.02, "grad_norm": 1.862642133730281, "learning_rate": 6.4000000000000006e-06, "loss": 0.1968, "step": 240 }, { "epoch": 0.02, "grad_norm": 1.8485072293724576, "learning_rate": 6.426666666666668e-06, "loss": 0.1823, "step": 241 }, { "epoch": 0.02, "grad_norm": 2.008109455070311, "learning_rate": 6.453333333333334e-06, "loss": 0.2123, "step": 242 }, { "epoch": 0.02, "grad_norm": 1.9646557938344782, "learning_rate": 6.480000000000001e-06, "loss": 0.239, "step": 243 }, { "epoch": 0.02, "grad_norm": 1.802054143981362, "learning_rate": 6.5066666666666665e-06, "loss": 0.2153, "step": 244 }, { "epoch": 0.02, "grad_norm": 1.801969351436421, "learning_rate": 6.533333333333334e-06, "loss": 0.217, "step": 245 }, { "epoch": 0.02, "grad_norm": 6.298119907018182, "learning_rate": 6.560000000000001e-06, "loss": 0.7515, "step": 246 }, { "epoch": 0.02, "grad_norm": 1.7330655635007703, "learning_rate": 6.5866666666666666e-06, "loss": 0.1975, "step": 247 }, { "epoch": 0.02, "grad_norm": 2.0251944467161525, "learning_rate": 6.613333333333334e-06, "loss": 0.2445, "step": 248 }, { "epoch": 0.02, "grad_norm": 1.794177856475254, "learning_rate": 6.640000000000001e-06, "loss": 0.2187, "step": 249 }, { "epoch": 0.02, "grad_norm": 2.0157810651568338, "learning_rate": 6.666666666666667e-06, "loss": 0.2493, "step": 250 }, { "epoch": 0.02, "grad_norm": 1.953270411021334, "learning_rate": 6.693333333333334e-06, "loss": 0.202, "step": 251 }, { "epoch": 0.02, "grad_norm": 2.136550048049724, "learning_rate": 6.720000000000001e-06, "loss": 0.2787, "step": 252 }, { "epoch": 0.02, "grad_norm": 1.8213446521605676, "learning_rate": 6.746666666666667e-06, "loss": 0.211, "step": 253 }, { "epoch": 0.02, "grad_norm": 2.094535836932365, "learning_rate": 6.773333333333334e-06, "loss": 0.1939, "step": 254 }, { "epoch": 0.02, "grad_norm": 1.9131906863440633, "learning_rate": 6.800000000000001e-06, "loss": 0.2235, "step": 255 }, { "epoch": 0.02, "grad_norm": 5.409854210199455, "learning_rate": 6.826666666666667e-06, "loss": 0.8275, "step": 256 }, { "epoch": 0.02, "grad_norm": 1.8230651680675103, "learning_rate": 6.853333333333334e-06, "loss": 0.1901, "step": 257 }, { "epoch": 0.02, "grad_norm": 1.8762137776791508, "learning_rate": 6.88e-06, "loss": 0.2608, "step": 258 }, { "epoch": 0.02, "grad_norm": 1.9068957983795443, "learning_rate": 6.906666666666667e-06, "loss": 0.2139, "step": 259 }, { "epoch": 0.02, "grad_norm": 1.8187862767783025, "learning_rate": 6.9333333333333344e-06, "loss": 0.2404, "step": 260 }, { "epoch": 0.02, "grad_norm": 1.8098183193892265, "learning_rate": 6.96e-06, "loss": 0.2145, "step": 261 }, { "epoch": 0.02, "grad_norm": 1.8439694892995122, "learning_rate": 6.986666666666667e-06, "loss": 0.2076, "step": 262 }, { "epoch": 0.02, "grad_norm": 1.7493929656056368, "learning_rate": 7.0133333333333345e-06, "loss": 0.2226, "step": 263 }, { "epoch": 0.02, "grad_norm": 1.8055290320505561, "learning_rate": 7.04e-06, "loss": 0.2188, "step": 264 }, { "epoch": 0.02, "grad_norm": 1.780493758788641, "learning_rate": 7.066666666666667e-06, "loss": 0.2343, "step": 265 }, { "epoch": 0.02, "grad_norm": 1.9117221859319986, "learning_rate": 7.093333333333335e-06, "loss": 0.2715, "step": 266 }, { "epoch": 0.02, "grad_norm": 1.8683689548762643, "learning_rate": 7.1200000000000004e-06, "loss": 0.2264, "step": 267 }, { "epoch": 0.02, "grad_norm": 2.124299529327855, "learning_rate": 7.146666666666667e-06, "loss": 0.2838, "step": 268 }, { "epoch": 0.02, "grad_norm": 1.9898439974394204, "learning_rate": 7.173333333333335e-06, "loss": 0.2614, "step": 269 }, { "epoch": 0.02, "grad_norm": 1.8557654889796444, "learning_rate": 7.2000000000000005e-06, "loss": 0.2487, "step": 270 }, { "epoch": 0.02, "grad_norm": 1.746708978725774, "learning_rate": 7.226666666666667e-06, "loss": 0.2112, "step": 271 }, { "epoch": 0.02, "grad_norm": 4.78569883174777, "learning_rate": 7.253333333333335e-06, "loss": 0.3946, "step": 272 }, { "epoch": 0.02, "grad_norm": 7.477661416396561, "learning_rate": 7.280000000000001e-06, "loss": 0.5882, "step": 273 }, { "epoch": 0.02, "grad_norm": 1.8556111067049568, "learning_rate": 7.306666666666667e-06, "loss": 0.1944, "step": 274 }, { "epoch": 0.02, "grad_norm": 1.883219434344051, "learning_rate": 7.333333333333333e-06, "loss": 0.1615, "step": 275 }, { "epoch": 0.02, "grad_norm": 1.801070208917459, "learning_rate": 7.360000000000001e-06, "loss": 0.2263, "step": 276 }, { "epoch": 0.02, "grad_norm": 1.806011893395799, "learning_rate": 7.386666666666667e-06, "loss": 0.1902, "step": 277 }, { "epoch": 0.02, "grad_norm": 1.9652604726727818, "learning_rate": 7.413333333333333e-06, "loss": 0.2641, "step": 278 }, { "epoch": 0.02, "grad_norm": 1.9322580861382184, "learning_rate": 7.440000000000001e-06, "loss": 0.189, "step": 279 }, { "epoch": 0.02, "grad_norm": 1.9507311818604411, "learning_rate": 7.4666666666666675e-06, "loss": 0.1964, "step": 280 }, { "epoch": 0.02, "grad_norm": 9.604747701390727, "learning_rate": 7.493333333333333e-06, "loss": 0.7259, "step": 281 }, { "epoch": 0.02, "grad_norm": 6.612562328878434, "learning_rate": 7.520000000000001e-06, "loss": 0.6476, "step": 282 }, { "epoch": 0.02, "grad_norm": 5.596977838357216, "learning_rate": 7.5466666666666675e-06, "loss": 0.2662, "step": 283 }, { "epoch": 0.02, "grad_norm": 1.9001729147014241, "learning_rate": 7.573333333333333e-06, "loss": 0.2534, "step": 284 }, { "epoch": 0.02, "grad_norm": 1.844177418989628, "learning_rate": 7.600000000000001e-06, "loss": 0.2497, "step": 285 }, { "epoch": 0.02, "grad_norm": 1.5932126387816608, "learning_rate": 7.626666666666668e-06, "loss": 0.1896, "step": 286 }, { "epoch": 0.02, "grad_norm": 2.170754685838896, "learning_rate": 7.653333333333333e-06, "loss": 0.2431, "step": 287 }, { "epoch": 0.02, "grad_norm": 1.826138504100035, "learning_rate": 7.680000000000001e-06, "loss": 0.1937, "step": 288 }, { "epoch": 0.02, "grad_norm": 1.9202338734227242, "learning_rate": 7.706666666666669e-06, "loss": 0.2727, "step": 289 }, { "epoch": 0.02, "grad_norm": 10.920998957025096, "learning_rate": 7.733333333333334e-06, "loss": 0.7006, "step": 290 }, { "epoch": 0.02, "grad_norm": 1.8418945575301588, "learning_rate": 7.76e-06, "loss": 0.2167, "step": 291 }, { "epoch": 0.02, "grad_norm": 1.673045546958607, "learning_rate": 7.786666666666666e-06, "loss": 0.2321, "step": 292 }, { "epoch": 0.02, "grad_norm": 1.8366811114957107, "learning_rate": 7.813333333333334e-06, "loss": 0.1899, "step": 293 }, { "epoch": 0.02, "grad_norm": 1.8869371609251013, "learning_rate": 7.840000000000001e-06, "loss": 0.259, "step": 294 }, { "epoch": 0.02, "grad_norm": 1.8667091217240646, "learning_rate": 7.866666666666667e-06, "loss": 0.2247, "step": 295 }, { "epoch": 0.02, "grad_norm": 1.5730756464467448, "learning_rate": 7.893333333333335e-06, "loss": 0.2151, "step": 296 }, { "epoch": 0.02, "grad_norm": 2.0657362534847246, "learning_rate": 7.92e-06, "loss": 0.3143, "step": 297 }, { "epoch": 0.02, "grad_norm": 2.1407957139153857, "learning_rate": 7.946666666666666e-06, "loss": 0.2284, "step": 298 }, { "epoch": 0.02, "grad_norm": 1.549824490588346, "learning_rate": 7.973333333333334e-06, "loss": 0.1499, "step": 299 }, { "epoch": 0.02, "grad_norm": 6.6912225591495655, "learning_rate": 8.000000000000001e-06, "loss": 0.7355, "step": 300 }, { "epoch": 0.02, "grad_norm": 1.9068102090093282, "learning_rate": 8.026666666666667e-06, "loss": 0.2488, "step": 301 }, { "epoch": 0.02, "grad_norm": 1.8057644158054502, "learning_rate": 8.053333333333335e-06, "loss": 0.2296, "step": 302 }, { "epoch": 0.02, "grad_norm": 5.026373947332388, "learning_rate": 8.08e-06, "loss": 0.597, "step": 303 }, { "epoch": 0.02, "grad_norm": 1.787899072762206, "learning_rate": 8.106666666666666e-06, "loss": 0.2073, "step": 304 }, { "epoch": 0.02, "grad_norm": 1.6580375831203185, "learning_rate": 8.133333333333334e-06, "loss": 0.2289, "step": 305 }, { "epoch": 0.02, "grad_norm": 1.7894920400473349, "learning_rate": 8.16e-06, "loss": 0.1785, "step": 306 }, { "epoch": 0.02, "grad_norm": 7.198390276773755, "learning_rate": 8.186666666666667e-06, "loss": 0.6606, "step": 307 }, { "epoch": 0.02, "grad_norm": 1.6930201576413186, "learning_rate": 8.213333333333335e-06, "loss": 0.2181, "step": 308 }, { "epoch": 0.02, "grad_norm": 2.019897622083253, "learning_rate": 8.24e-06, "loss": 0.2236, "step": 309 }, { "epoch": 0.02, "grad_norm": 1.7334906676332429, "learning_rate": 8.266666666666667e-06, "loss": 0.2353, "step": 310 }, { "epoch": 0.02, "grad_norm": 1.8572114438308505, "learning_rate": 8.293333333333334e-06, "loss": 0.2772, "step": 311 }, { "epoch": 0.02, "grad_norm": 1.9469342984931026, "learning_rate": 8.32e-06, "loss": 0.2296, "step": 312 }, { "epoch": 0.03, "grad_norm": 1.9630239498130868, "learning_rate": 8.346666666666668e-06, "loss": 0.262, "step": 313 }, { "epoch": 0.03, "grad_norm": 4.27470813972205, "learning_rate": 8.373333333333335e-06, "loss": 0.5589, "step": 314 }, { "epoch": 0.03, "grad_norm": 2.0608869422345855, "learning_rate": 8.400000000000001e-06, "loss": 0.2098, "step": 315 }, { "epoch": 0.03, "grad_norm": 10.917682143653835, "learning_rate": 8.426666666666667e-06, "loss": 0.596, "step": 316 }, { "epoch": 0.03, "grad_norm": 1.7643571133847522, "learning_rate": 8.453333333333334e-06, "loss": 0.2344, "step": 317 }, { "epoch": 0.03, "grad_norm": 1.7957129154630194, "learning_rate": 8.48e-06, "loss": 0.2214, "step": 318 }, { "epoch": 0.03, "grad_norm": 2.028080745862539, "learning_rate": 8.506666666666668e-06, "loss": 0.2543, "step": 319 }, { "epoch": 0.03, "grad_norm": 1.778878662216473, "learning_rate": 8.533333333333335e-06, "loss": 0.2381, "step": 320 }, { "epoch": 0.03, "grad_norm": 1.7940192538414392, "learning_rate": 8.560000000000001e-06, "loss": 0.2437, "step": 321 }, { "epoch": 0.03, "grad_norm": 1.751415208191758, "learning_rate": 8.586666666666667e-06, "loss": 0.2373, "step": 322 }, { "epoch": 0.03, "grad_norm": 1.7286912456876244, "learning_rate": 8.613333333333333e-06, "loss": 0.1959, "step": 323 }, { "epoch": 0.03, "grad_norm": 6.4695403527963204, "learning_rate": 8.64e-06, "loss": 0.7221, "step": 324 }, { "epoch": 0.03, "grad_norm": 1.6958831748028036, "learning_rate": 8.666666666666668e-06, "loss": 0.1976, "step": 325 }, { "epoch": 0.03, "grad_norm": 1.8568383992999282, "learning_rate": 8.693333333333334e-06, "loss": 0.2119, "step": 326 }, { "epoch": 0.03, "grad_norm": 1.7775066097406733, "learning_rate": 8.720000000000001e-06, "loss": 0.2459, "step": 327 }, { "epoch": 0.03, "grad_norm": 1.9796336175492852, "learning_rate": 8.746666666666667e-06, "loss": 0.228, "step": 328 }, { "epoch": 0.03, "grad_norm": 1.725337679038237, "learning_rate": 8.773333333333333e-06, "loss": 0.2803, "step": 329 }, { "epoch": 0.03, "grad_norm": 1.8607473362530842, "learning_rate": 8.8e-06, "loss": 0.23, "step": 330 }, { "epoch": 0.03, "grad_norm": 1.7873655100663122, "learning_rate": 8.826666666666668e-06, "loss": 0.2475, "step": 331 }, { "epoch": 0.03, "grad_norm": 1.9470752103117503, "learning_rate": 8.853333333333334e-06, "loss": 0.2079, "step": 332 }, { "epoch": 0.03, "grad_norm": 1.9077336629858086, "learning_rate": 8.880000000000001e-06, "loss": 0.2496, "step": 333 }, { "epoch": 0.03, "grad_norm": 1.865253714411325, "learning_rate": 8.906666666666667e-06, "loss": 0.202, "step": 334 }, { "epoch": 0.03, "grad_norm": 1.8483104452799306, "learning_rate": 8.933333333333333e-06, "loss": 0.213, "step": 335 }, { "epoch": 0.03, "grad_norm": 6.655696928032451, "learning_rate": 8.96e-06, "loss": 0.6472, "step": 336 }, { "epoch": 0.03, "grad_norm": 1.6530692292567741, "learning_rate": 8.986666666666666e-06, "loss": 0.2225, "step": 337 }, { "epoch": 0.03, "grad_norm": 2.0226672513208204, "learning_rate": 9.013333333333334e-06, "loss": 0.227, "step": 338 }, { "epoch": 0.03, "grad_norm": 1.738470394388767, "learning_rate": 9.040000000000002e-06, "loss": 0.2146, "step": 339 }, { "epoch": 0.03, "grad_norm": 1.626623339712544, "learning_rate": 9.066666666666667e-06, "loss": 0.1957, "step": 340 }, { "epoch": 0.03, "grad_norm": 1.744696499076202, "learning_rate": 9.093333333333333e-06, "loss": 0.2002, "step": 341 }, { "epoch": 0.03, "grad_norm": 1.7051863913858039, "learning_rate": 9.12e-06, "loss": 0.21, "step": 342 }, { "epoch": 0.03, "grad_norm": 1.686387549981206, "learning_rate": 9.146666666666667e-06, "loss": 0.2006, "step": 343 }, { "epoch": 0.03, "grad_norm": 1.7793543082196661, "learning_rate": 9.173333333333334e-06, "loss": 0.2408, "step": 344 }, { "epoch": 0.03, "grad_norm": 1.7479798580717922, "learning_rate": 9.200000000000002e-06, "loss": 0.2642, "step": 345 }, { "epoch": 0.03, "grad_norm": 1.7614687271317424, "learning_rate": 9.226666666666668e-06, "loss": 0.2313, "step": 346 }, { "epoch": 0.03, "grad_norm": 1.722511361895267, "learning_rate": 9.253333333333333e-06, "loss": 0.2102, "step": 347 }, { "epoch": 0.03, "grad_norm": 2.161835584502886, "learning_rate": 9.280000000000001e-06, "loss": 0.2899, "step": 348 }, { "epoch": 0.03, "grad_norm": 15.64586602691352, "learning_rate": 9.306666666666667e-06, "loss": 0.7191, "step": 349 }, { "epoch": 0.03, "grad_norm": 17.64111257548122, "learning_rate": 9.333333333333334e-06, "loss": 0.6482, "step": 350 }, { "epoch": 0.03, "grad_norm": 9.42583444153681, "learning_rate": 9.360000000000002e-06, "loss": 0.7959, "step": 351 }, { "epoch": 0.03, "grad_norm": 1.5424720013071538, "learning_rate": 9.386666666666668e-06, "loss": 0.1429, "step": 352 }, { "epoch": 0.03, "grad_norm": 1.982575233322272, "learning_rate": 9.413333333333334e-06, "loss": 0.2918, "step": 353 }, { "epoch": 0.03, "grad_norm": 1.8336683173813957, "learning_rate": 9.440000000000001e-06, "loss": 0.2107, "step": 354 }, { "epoch": 0.03, "grad_norm": 1.9179224490424405, "learning_rate": 9.466666666666667e-06, "loss": 0.2576, "step": 355 }, { "epoch": 0.03, "grad_norm": 1.7444807373179754, "learning_rate": 9.493333333333334e-06, "loss": 0.244, "step": 356 }, { "epoch": 0.03, "grad_norm": 1.8960163999495967, "learning_rate": 9.52e-06, "loss": 0.246, "step": 357 }, { "epoch": 0.03, "grad_norm": 1.8008657169690534, "learning_rate": 9.546666666666668e-06, "loss": 0.2349, "step": 358 }, { "epoch": 0.03, "grad_norm": 1.810225823418963, "learning_rate": 9.573333333333334e-06, "loss": 0.2366, "step": 359 }, { "epoch": 0.03, "grad_norm": 2.047361957077407, "learning_rate": 9.600000000000001e-06, "loss": 0.3164, "step": 360 }, { "epoch": 0.03, "grad_norm": 1.8823083558049156, "learning_rate": 9.626666666666667e-06, "loss": 0.2385, "step": 361 }, { "epoch": 0.03, "grad_norm": 1.698176588727454, "learning_rate": 9.653333333333335e-06, "loss": 0.1787, "step": 362 }, { "epoch": 0.03, "grad_norm": 1.8160965938479063, "learning_rate": 9.68e-06, "loss": 0.2242, "step": 363 }, { "epoch": 0.03, "grad_norm": 1.9437981118956582, "learning_rate": 9.706666666666668e-06, "loss": 0.2605, "step": 364 }, { "epoch": 0.03, "grad_norm": 5.373439759697795, "learning_rate": 9.733333333333334e-06, "loss": 0.6327, "step": 365 }, { "epoch": 0.03, "grad_norm": 2.0998902475697885, "learning_rate": 9.760000000000001e-06, "loss": 0.2929, "step": 366 }, { "epoch": 0.03, "grad_norm": 1.8744366599560585, "learning_rate": 9.786666666666667e-06, "loss": 0.2381, "step": 367 }, { "epoch": 0.03, "grad_norm": 1.7793901248003852, "learning_rate": 9.813333333333333e-06, "loss": 0.246, "step": 368 }, { "epoch": 0.03, "grad_norm": 1.6586529404544306, "learning_rate": 9.84e-06, "loss": 0.206, "step": 369 }, { "epoch": 0.03, "grad_norm": 1.9018004543174982, "learning_rate": 9.866666666666668e-06, "loss": 0.2365, "step": 370 }, { "epoch": 0.03, "grad_norm": 1.7106291778939413, "learning_rate": 9.893333333333334e-06, "loss": 0.2469, "step": 371 }, { "epoch": 0.03, "grad_norm": 1.9474151303100837, "learning_rate": 9.920000000000002e-06, "loss": 0.3033, "step": 372 }, { "epoch": 0.03, "grad_norm": 1.742825501117863, "learning_rate": 9.946666666666667e-06, "loss": 0.2719, "step": 373 }, { "epoch": 0.03, "grad_norm": 1.649110246787641, "learning_rate": 9.973333333333333e-06, "loss": 0.2706, "step": 374 }, { "epoch": 0.03, "grad_norm": 6.433233054289498, "learning_rate": 1e-05, "loss": 0.7441, "step": 375 }, { "epoch": 0.03, "grad_norm": 1.7598126079007446, "learning_rate": 9.999999832167426e-06, "loss": 0.2421, "step": 376 }, { "epoch": 0.03, "grad_norm": 1.7327741927654245, "learning_rate": 9.999999328669713e-06, "loss": 0.2139, "step": 377 }, { "epoch": 0.03, "grad_norm": 1.7952826167116798, "learning_rate": 9.999998489506897e-06, "loss": 0.2542, "step": 378 }, { "epoch": 0.03, "grad_norm": 1.9112956335206404, "learning_rate": 9.999997314679031e-06, "loss": 0.2414, "step": 379 }, { "epoch": 0.03, "grad_norm": 1.8893839347164407, "learning_rate": 9.999995804186196e-06, "loss": 0.2711, "step": 380 }, { "epoch": 0.03, "grad_norm": 1.693669030375468, "learning_rate": 9.999993958028495e-06, "loss": 0.239, "step": 381 }, { "epoch": 0.03, "grad_norm": 1.6549425714155548, "learning_rate": 9.999991776206049e-06, "loss": 0.1948, "step": 382 }, { "epoch": 0.03, "grad_norm": 2.2597209046667817, "learning_rate": 9.999989258719005e-06, "loss": 0.2375, "step": 383 }, { "epoch": 0.03, "grad_norm": 8.87451040285667, "learning_rate": 9.999986405567535e-06, "loss": 0.7829, "step": 384 }, { "epoch": 0.03, "grad_norm": 2.0777043865702987, "learning_rate": 9.999983216751826e-06, "loss": 0.2943, "step": 385 }, { "epoch": 0.03, "grad_norm": 1.697246082818542, "learning_rate": 9.999979692272095e-06, "loss": 0.2378, "step": 386 }, { "epoch": 0.03, "grad_norm": 1.8307078193666357, "learning_rate": 9.999975832128578e-06, "loss": 0.2814, "step": 387 }, { "epoch": 0.03, "grad_norm": 1.8804460518214392, "learning_rate": 9.999971636321535e-06, "loss": 0.2378, "step": 388 }, { "epoch": 0.03, "grad_norm": 1.9249249089304041, "learning_rate": 9.999967104851244e-06, "loss": 0.2693, "step": 389 }, { "epoch": 0.03, "grad_norm": 1.959547312703074, "learning_rate": 9.999962237718015e-06, "loss": 0.2515, "step": 390 }, { "epoch": 0.03, "grad_norm": 1.8687320589379146, "learning_rate": 9.99995703492217e-06, "loss": 0.2584, "step": 391 }, { "epoch": 0.03, "grad_norm": 1.7066756693145695, "learning_rate": 9.999951496464062e-06, "loss": 0.1908, "step": 392 }, { "epoch": 0.03, "grad_norm": 1.7529062052157671, "learning_rate": 9.999945622344058e-06, "loss": 0.2449, "step": 393 }, { "epoch": 0.03, "grad_norm": 7.411631790011608, "learning_rate": 9.999939412562558e-06, "loss": 0.7327, "step": 394 }, { "epoch": 0.03, "grad_norm": 1.9015666314960635, "learning_rate": 9.999932867119974e-06, "loss": 0.2759, "step": 395 }, { "epoch": 0.03, "grad_norm": 1.7322147151979983, "learning_rate": 9.999925986016748e-06, "loss": 0.2217, "step": 396 }, { "epoch": 0.03, "grad_norm": 1.7866095729204259, "learning_rate": 9.99991876925334e-06, "loss": 0.2588, "step": 397 }, { "epoch": 0.03, "grad_norm": 6.174368380749566, "learning_rate": 9.999911216830239e-06, "loss": 0.4313, "step": 398 }, { "epoch": 0.03, "grad_norm": 1.5310984726495105, "learning_rate": 9.999903328747946e-06, "loss": 0.1944, "step": 399 }, { "epoch": 0.03, "grad_norm": 1.8519706513959513, "learning_rate": 9.999895105006995e-06, "loss": 0.2823, "step": 400 }, { "epoch": 0.03, "grad_norm": 1.6637054641929983, "learning_rate": 9.999886545607935e-06, "loss": 0.1717, "step": 401 }, { "epoch": 0.03, "grad_norm": 1.9952085032243967, "learning_rate": 9.999877650551344e-06, "loss": 0.2397, "step": 402 }, { "epoch": 0.03, "grad_norm": 1.8545572787682478, "learning_rate": 9.999868419837818e-06, "loss": 0.3225, "step": 403 }, { "epoch": 0.03, "grad_norm": 1.8104336215147734, "learning_rate": 9.999858853467972e-06, "loss": 0.2675, "step": 404 }, { "epoch": 0.03, "grad_norm": 1.7762845295831875, "learning_rate": 9.999848951442455e-06, "loss": 0.2207, "step": 405 }, { "epoch": 0.03, "grad_norm": 1.797172007569505, "learning_rate": 9.99983871376193e-06, "loss": 0.2024, "step": 406 }, { "epoch": 0.03, "grad_norm": 1.615293018085328, "learning_rate": 9.999828140427082e-06, "loss": 0.2355, "step": 407 }, { "epoch": 0.03, "grad_norm": 1.8968112208105452, "learning_rate": 9.99981723143862e-06, "loss": 0.2597, "step": 408 }, { "epoch": 0.03, "grad_norm": 1.754145304264831, "learning_rate": 9.99980598679728e-06, "loss": 0.2534, "step": 409 }, { "epoch": 0.03, "grad_norm": 2.007116910841986, "learning_rate": 9.999794406503816e-06, "loss": 0.2539, "step": 410 }, { "epoch": 0.03, "grad_norm": 1.9583626850834912, "learning_rate": 9.999782490559004e-06, "loss": 0.2394, "step": 411 }, { "epoch": 0.03, "grad_norm": 1.817502406871994, "learning_rate": 9.999770238963646e-06, "loss": 0.269, "step": 412 }, { "epoch": 0.03, "grad_norm": 1.796483917544564, "learning_rate": 9.999757651718562e-06, "loss": 0.276, "step": 413 }, { "epoch": 0.03, "grad_norm": 1.7443361297939857, "learning_rate": 9.999744728824599e-06, "loss": 0.2522, "step": 414 }, { "epoch": 0.03, "grad_norm": 1.9119833716967742, "learning_rate": 9.999731470282621e-06, "loss": 0.2566, "step": 415 }, { "epoch": 0.03, "grad_norm": 8.604759730691754, "learning_rate": 9.999717876093525e-06, "loss": 0.7022, "step": 416 }, { "epoch": 0.03, "grad_norm": 1.9478155159981574, "learning_rate": 9.999703946258217e-06, "loss": 0.2637, "step": 417 }, { "epoch": 0.03, "grad_norm": 1.8742409053779854, "learning_rate": 9.999689680777634e-06, "loss": 0.282, "step": 418 }, { "epoch": 0.03, "grad_norm": 1.936408504271645, "learning_rate": 9.999675079652736e-06, "loss": 0.2429, "step": 419 }, { "epoch": 0.03, "grad_norm": 1.508756672582905, "learning_rate": 9.9996601428845e-06, "loss": 0.2627, "step": 420 }, { "epoch": 0.03, "grad_norm": 1.6844374620864568, "learning_rate": 9.99964487047393e-06, "loss": 0.2613, "step": 421 }, { "epoch": 0.03, "grad_norm": 1.587888438073988, "learning_rate": 9.999629262422053e-06, "loss": 0.2277, "step": 422 }, { "epoch": 0.03, "grad_norm": 1.7669357863217947, "learning_rate": 9.999613318729915e-06, "loss": 0.2395, "step": 423 }, { "epoch": 0.03, "grad_norm": 5.272726419813807, "learning_rate": 9.999597039398586e-06, "loss": 0.5656, "step": 424 }, { "epoch": 0.03, "grad_norm": 1.5784608076294535, "learning_rate": 9.99958042442916e-06, "loss": 0.2131, "step": 425 }, { "epoch": 0.03, "grad_norm": 1.620535365064519, "learning_rate": 9.999563473822752e-06, "loss": 0.2197, "step": 426 }, { "epoch": 0.03, "grad_norm": 1.817314467358351, "learning_rate": 9.9995461875805e-06, "loss": 0.3054, "step": 427 }, { "epoch": 0.03, "grad_norm": 1.814826032540398, "learning_rate": 9.999528565703564e-06, "loss": 0.2649, "step": 428 }, { "epoch": 0.03, "grad_norm": 1.7801458756492414, "learning_rate": 9.999510608193128e-06, "loss": 0.2512, "step": 429 }, { "epoch": 0.03, "grad_norm": 1.7476056654752405, "learning_rate": 9.999492315050396e-06, "loss": 0.2738, "step": 430 }, { "epoch": 0.03, "grad_norm": 1.6010235466810252, "learning_rate": 9.999473686276598e-06, "loss": 0.2201, "step": 431 }, { "epoch": 0.03, "grad_norm": 1.8536584612725224, "learning_rate": 9.999454721872983e-06, "loss": 0.2709, "step": 432 }, { "epoch": 0.03, "grad_norm": 1.8135164102945176, "learning_rate": 9.999435421840826e-06, "loss": 0.2487, "step": 433 }, { "epoch": 0.03, "grad_norm": 1.5818417174610364, "learning_rate": 9.99941578618142e-06, "loss": 0.2141, "step": 434 }, { "epoch": 0.03, "grad_norm": 1.6005212701306177, "learning_rate": 9.999395814896086e-06, "loss": 0.1943, "step": 435 }, { "epoch": 0.03, "grad_norm": 1.5933879667459743, "learning_rate": 9.999375507986163e-06, "loss": 0.2374, "step": 436 }, { "epoch": 0.03, "grad_norm": 1.688048880829338, "learning_rate": 9.999354865453012e-06, "loss": 0.2335, "step": 437 }, { "epoch": 0.04, "grad_norm": 8.187328425483692, "learning_rate": 9.999333887298025e-06, "loss": 0.8429, "step": 438 }, { "epoch": 0.04, "grad_norm": 1.7960781284703393, "learning_rate": 9.999312573522606e-06, "loss": 0.2388, "step": 439 }, { "epoch": 0.04, "grad_norm": 1.7385905500174983, "learning_rate": 9.999290924128186e-06, "loss": 0.3019, "step": 440 }, { "epoch": 0.04, "grad_norm": 1.6675120324319856, "learning_rate": 9.999268939116218e-06, "loss": 0.2551, "step": 441 }, { "epoch": 0.04, "grad_norm": 1.7654656204783086, "learning_rate": 9.999246618488181e-06, "loss": 0.2163, "step": 442 }, { "epoch": 0.04, "grad_norm": 1.6330572866481377, "learning_rate": 9.99922396224557e-06, "loss": 0.1949, "step": 443 }, { "epoch": 0.04, "grad_norm": 1.6030756243071447, "learning_rate": 9.999200970389909e-06, "loss": 0.2331, "step": 444 }, { "epoch": 0.04, "grad_norm": 1.8210510700229596, "learning_rate": 9.999177642922736e-06, "loss": 0.295, "step": 445 }, { "epoch": 0.04, "grad_norm": 1.7002742660671717, "learning_rate": 9.999153979845625e-06, "loss": 0.2716, "step": 446 }, { "epoch": 0.04, "grad_norm": 1.807176114965877, "learning_rate": 9.999129981160159e-06, "loss": 0.2146, "step": 447 }, { "epoch": 0.04, "grad_norm": 1.5681821768850328, "learning_rate": 9.99910564686795e-06, "loss": 0.2238, "step": 448 }, { "epoch": 0.04, "grad_norm": 1.7555130945026285, "learning_rate": 9.999080976970635e-06, "loss": 0.2364, "step": 449 }, { "epoch": 0.04, "grad_norm": 1.8856066109241383, "learning_rate": 9.999055971469864e-06, "loss": 0.2595, "step": 450 }, { "epoch": 0.04, "grad_norm": 1.7074668763472476, "learning_rate": 9.99903063036732e-06, "loss": 0.2229, "step": 451 }, { "epoch": 0.04, "grad_norm": 1.7397396556855906, "learning_rate": 9.999004953664703e-06, "loss": 0.2335, "step": 452 }, { "epoch": 0.04, "grad_norm": 1.7253941594552007, "learning_rate": 9.998978941363739e-06, "loss": 0.2516, "step": 453 }, { "epoch": 0.04, "grad_norm": 1.367070400252453, "learning_rate": 9.998952593466171e-06, "loss": 0.1554, "step": 454 }, { "epoch": 0.04, "grad_norm": 7.777433265413999, "learning_rate": 9.998925909973769e-06, "loss": 0.6771, "step": 455 }, { "epoch": 0.04, "grad_norm": 1.837246019873197, "learning_rate": 9.998898890888325e-06, "loss": 0.2477, "step": 456 }, { "epoch": 0.04, "grad_norm": 1.6072135157782246, "learning_rate": 9.998871536211652e-06, "loss": 0.2286, "step": 457 }, { "epoch": 0.04, "grad_norm": 1.6051591770866929, "learning_rate": 9.998843845945587e-06, "loss": 0.2172, "step": 458 }, { "epoch": 0.04, "grad_norm": 1.6027647348744865, "learning_rate": 9.99881582009199e-06, "loss": 0.2305, "step": 459 }, { "epoch": 0.04, "grad_norm": 1.768135323642207, "learning_rate": 9.99878745865274e-06, "loss": 0.2705, "step": 460 }, { "epoch": 0.04, "grad_norm": 1.7930246213753742, "learning_rate": 9.99875876162974e-06, "loss": 0.2599, "step": 461 }, { "epoch": 0.04, "grad_norm": 1.7192238909009794, "learning_rate": 9.998729729024922e-06, "loss": 0.2626, "step": 462 }, { "epoch": 0.04, "grad_norm": 1.692627846513932, "learning_rate": 9.998700360840231e-06, "loss": 0.2517, "step": 463 }, { "epoch": 0.04, "grad_norm": 1.646238954943402, "learning_rate": 9.998670657077638e-06, "loss": 0.1984, "step": 464 }, { "epoch": 0.04, "grad_norm": 1.6968526535099162, "learning_rate": 9.99864061773914e-06, "loss": 0.2461, "step": 465 }, { "epoch": 0.04, "grad_norm": 1.5928989101111992, "learning_rate": 9.998610242826752e-06, "loss": 0.2378, "step": 466 }, { "epoch": 0.04, "grad_norm": 1.6016481726238563, "learning_rate": 9.998579532342511e-06, "loss": 0.2267, "step": 467 }, { "epoch": 0.04, "grad_norm": 1.77298901119548, "learning_rate": 9.998548486288483e-06, "loss": 0.2291, "step": 468 }, { "epoch": 0.04, "grad_norm": 1.7132993670059347, "learning_rate": 9.998517104666749e-06, "loss": 0.2264, "step": 469 }, { "epoch": 0.04, "grad_norm": 1.6817500764056423, "learning_rate": 9.998485387479418e-06, "loss": 0.2113, "step": 470 }, { "epoch": 0.04, "grad_norm": 1.8988742098042801, "learning_rate": 9.998453334728619e-06, "loss": 0.2257, "step": 471 }, { "epoch": 0.04, "grad_norm": 1.6829839684411732, "learning_rate": 9.9984209464165e-06, "loss": 0.2189, "step": 472 }, { "epoch": 0.04, "grad_norm": 1.6160207290097985, "learning_rate": 9.998388222545242e-06, "loss": 0.2233, "step": 473 }, { "epoch": 0.04, "grad_norm": 1.6062583538820858, "learning_rate": 9.998355163117035e-06, "loss": 0.206, "step": 474 }, { "epoch": 0.04, "grad_norm": 1.686815420738238, "learning_rate": 9.998321768134101e-06, "loss": 0.2472, "step": 475 }, { "epoch": 0.04, "grad_norm": 1.664740784069062, "learning_rate": 9.998288037598684e-06, "loss": 0.2344, "step": 476 }, { "epoch": 0.04, "grad_norm": 1.6842869750212988, "learning_rate": 9.998253971513048e-06, "loss": 0.2393, "step": 477 }, { "epoch": 0.04, "grad_norm": 1.6891112601576237, "learning_rate": 9.998219569879476e-06, "loss": 0.2362, "step": 478 }, { "epoch": 0.04, "grad_norm": 1.7388089742044242, "learning_rate": 9.998184832700282e-06, "loss": 0.2438, "step": 479 }, { "epoch": 0.04, "grad_norm": 1.8036799202626073, "learning_rate": 9.998149759977795e-06, "loss": 0.2179, "step": 480 }, { "epoch": 0.04, "grad_norm": 1.7263253675044903, "learning_rate": 9.998114351714373e-06, "loss": 0.216, "step": 481 }, { "epoch": 0.04, "grad_norm": 1.9501042184813449, "learning_rate": 9.99807860791239e-06, "loss": 0.2944, "step": 482 }, { "epoch": 0.04, "grad_norm": 1.501912712257805, "learning_rate": 9.998042528574246e-06, "loss": 0.1826, "step": 483 }, { "epoch": 0.04, "grad_norm": 1.744971987478523, "learning_rate": 9.998006113702363e-06, "loss": 0.2325, "step": 484 }, { "epoch": 0.04, "grad_norm": 1.7658194754401142, "learning_rate": 9.997969363299187e-06, "loss": 0.2535, "step": 485 }, { "epoch": 0.04, "grad_norm": 1.6956324374083949, "learning_rate": 9.997932277367183e-06, "loss": 0.2549, "step": 486 }, { "epoch": 0.04, "grad_norm": 1.9675776314228486, "learning_rate": 9.997894855908844e-06, "loss": 0.2411, "step": 487 }, { "epoch": 0.04, "grad_norm": 1.492592773883712, "learning_rate": 9.997857098926679e-06, "loss": 0.1714, "step": 488 }, { "epoch": 0.04, "grad_norm": 1.8597328856530386, "learning_rate": 9.997819006423227e-06, "loss": 0.2189, "step": 489 }, { "epoch": 0.04, "grad_norm": 1.6858733738383007, "learning_rate": 9.99778057840104e-06, "loss": 0.2047, "step": 490 }, { "epoch": 0.04, "grad_norm": 1.659006252231804, "learning_rate": 9.9977418148627e-06, "loss": 0.2575, "step": 491 }, { "epoch": 0.04, "grad_norm": 1.5191581617735919, "learning_rate": 9.99770271581081e-06, "loss": 0.2221, "step": 492 }, { "epoch": 0.04, "grad_norm": 1.566686974966257, "learning_rate": 9.997663281247993e-06, "loss": 0.2337, "step": 493 }, { "epoch": 0.04, "grad_norm": 1.7483886664121793, "learning_rate": 9.9976235111769e-06, "loss": 0.287, "step": 494 }, { "epoch": 0.04, "grad_norm": 1.5983677886961352, "learning_rate": 9.997583405600194e-06, "loss": 0.1951, "step": 495 }, { "epoch": 0.04, "grad_norm": 1.602791792171724, "learning_rate": 9.997542964520576e-06, "loss": 0.2432, "step": 496 }, { "epoch": 0.04, "grad_norm": 1.5582802615572773, "learning_rate": 9.997502187940757e-06, "loss": 0.2039, "step": 497 }, { "epoch": 0.04, "grad_norm": 1.6463461419806718, "learning_rate": 9.997461075863473e-06, "loss": 0.2609, "step": 498 }, { "epoch": 0.04, "grad_norm": 1.9442539106674928, "learning_rate": 9.997419628291485e-06, "loss": 0.279, "step": 499 }, { "epoch": 0.04, "grad_norm": 1.4895204015877028, "learning_rate": 9.997377845227577e-06, "loss": 0.1967, "step": 500 }, { "epoch": 0.04, "grad_norm": 1.635756892914474, "learning_rate": 9.99733572667455e-06, "loss": 0.2752, "step": 501 }, { "epoch": 0.04, "grad_norm": 1.7491036468801793, "learning_rate": 9.997293272635236e-06, "loss": 0.2402, "step": 502 }, { "epoch": 0.04, "grad_norm": 1.5469829330069942, "learning_rate": 9.997250483112483e-06, "loss": 0.1876, "step": 503 }, { "epoch": 0.04, "grad_norm": 1.6720312780346405, "learning_rate": 9.997207358109166e-06, "loss": 0.2323, "step": 504 }, { "epoch": 0.04, "grad_norm": 1.8177630203346855, "learning_rate": 9.997163897628175e-06, "loss": 0.2402, "step": 505 }, { "epoch": 0.04, "grad_norm": 1.501670140224173, "learning_rate": 9.997120101672434e-06, "loss": 0.1858, "step": 506 }, { "epoch": 0.04, "grad_norm": 1.4159744247278743, "learning_rate": 9.997075970244878e-06, "loss": 0.2067, "step": 507 }, { "epoch": 0.04, "grad_norm": 1.7360277144501992, "learning_rate": 9.997031503348473e-06, "loss": 0.258, "step": 508 }, { "epoch": 0.04, "grad_norm": 1.6441702779488527, "learning_rate": 9.996986700986201e-06, "loss": 0.1734, "step": 509 }, { "epoch": 0.04, "grad_norm": 4.728726117742014, "learning_rate": 9.996941563161071e-06, "loss": 0.6285, "step": 510 }, { "epoch": 0.04, "grad_norm": 1.6457050606364305, "learning_rate": 9.996896089876116e-06, "loss": 0.2351, "step": 511 }, { "epoch": 0.04, "grad_norm": 4.6029027785714085, "learning_rate": 9.996850281134385e-06, "loss": 0.5682, "step": 512 }, { "epoch": 0.04, "grad_norm": 1.6916565173802096, "learning_rate": 9.996804136938956e-06, "loss": 0.2322, "step": 513 }, { "epoch": 0.04, "grad_norm": 1.691599731192677, "learning_rate": 9.996757657292923e-06, "loss": 0.1974, "step": 514 }, { "epoch": 0.04, "grad_norm": 1.659840187173813, "learning_rate": 9.996710842199412e-06, "loss": 0.2344, "step": 515 }, { "epoch": 0.04, "grad_norm": 1.6493918530999607, "learning_rate": 9.99666369166156e-06, "loss": 0.2352, "step": 516 }, { "epoch": 0.04, "grad_norm": 1.3729762300075468, "learning_rate": 9.996616205682538e-06, "loss": 0.2157, "step": 517 }, { "epoch": 0.04, "grad_norm": 1.505073858359476, "learning_rate": 9.996568384265529e-06, "loss": 0.2292, "step": 518 }, { "epoch": 0.04, "grad_norm": 1.6759476137826335, "learning_rate": 9.996520227413747e-06, "loss": 0.2714, "step": 519 }, { "epoch": 0.04, "grad_norm": 1.9658492138473096, "learning_rate": 9.996471735130422e-06, "loss": 0.2911, "step": 520 }, { "epoch": 0.04, "grad_norm": 1.7163456216618151, "learning_rate": 9.99642290741881e-06, "loss": 0.236, "step": 521 }, { "epoch": 0.04, "grad_norm": 1.764867585471799, "learning_rate": 9.99637374428219e-06, "loss": 0.2337, "step": 522 }, { "epoch": 0.04, "grad_norm": 1.6346477893821554, "learning_rate": 9.996324245723863e-06, "loss": 0.2375, "step": 523 }, { "epoch": 0.04, "grad_norm": 1.558777251804133, "learning_rate": 9.99627441174715e-06, "loss": 0.2538, "step": 524 }, { "epoch": 0.04, "grad_norm": 1.6284371865490614, "learning_rate": 9.9962242423554e-06, "loss": 0.2217, "step": 525 }, { "epoch": 0.04, "grad_norm": 1.832394420792578, "learning_rate": 9.996173737551976e-06, "loss": 0.2616, "step": 526 }, { "epoch": 0.04, "grad_norm": 1.6409982686137434, "learning_rate": 9.996122897340273e-06, "loss": 0.2353, "step": 527 }, { "epoch": 0.04, "grad_norm": 1.4367469841502498, "learning_rate": 9.9960717217237e-06, "loss": 0.2171, "step": 528 }, { "epoch": 0.04, "grad_norm": 1.5379249205662373, "learning_rate": 9.996020210705697e-06, "loss": 0.2215, "step": 529 }, { "epoch": 0.04, "grad_norm": 7.658894243914053, "learning_rate": 9.995968364289719e-06, "loss": 0.5064, "step": 530 }, { "epoch": 0.04, "grad_norm": 7.240879290750167, "learning_rate": 9.995916182479248e-06, "loss": 0.5841, "step": 531 }, { "epoch": 0.04, "grad_norm": 1.6287031206021532, "learning_rate": 9.995863665277787e-06, "loss": 0.2332, "step": 532 }, { "epoch": 0.04, "grad_norm": 2.079833609184642, "learning_rate": 9.99581081268886e-06, "loss": 0.2507, "step": 533 }, { "epoch": 0.04, "grad_norm": 1.6297785420124187, "learning_rate": 9.995757624716019e-06, "loss": 0.2446, "step": 534 }, { "epoch": 0.04, "grad_norm": 1.4917721777721014, "learning_rate": 9.99570410136283e-06, "loss": 0.2636, "step": 535 }, { "epoch": 0.04, "grad_norm": 1.6260663646793445, "learning_rate": 9.995650242632887e-06, "loss": 0.2065, "step": 536 }, { "epoch": 0.04, "grad_norm": 1.6490972581637198, "learning_rate": 9.99559604852981e-06, "loss": 0.249, "step": 537 }, { "epoch": 0.04, "grad_norm": 1.568733558786324, "learning_rate": 9.995541519057231e-06, "loss": 0.2165, "step": 538 }, { "epoch": 0.04, "grad_norm": 1.59945261733457, "learning_rate": 9.995486654218815e-06, "loss": 0.2263, "step": 539 }, { "epoch": 0.04, "grad_norm": 1.526253766435983, "learning_rate": 9.995431454018246e-06, "loss": 0.2131, "step": 540 }, { "epoch": 0.04, "grad_norm": 1.5989519470593818, "learning_rate": 9.995375918459227e-06, "loss": 0.2746, "step": 541 }, { "epoch": 0.04, "grad_norm": 1.7991778962413925, "learning_rate": 9.995320047545488e-06, "loss": 0.2969, "step": 542 }, { "epoch": 0.04, "grad_norm": 1.633462776227588, "learning_rate": 9.995263841280776e-06, "loss": 0.2716, "step": 543 }, { "epoch": 0.04, "grad_norm": 1.8007509659264962, "learning_rate": 9.99520729966887e-06, "loss": 0.291, "step": 544 }, { "epoch": 0.04, "grad_norm": 1.7498919144157523, "learning_rate": 9.995150422713561e-06, "loss": 0.2152, "step": 545 }, { "epoch": 0.04, "grad_norm": 1.7115026792009813, "learning_rate": 9.995093210418672e-06, "loss": 0.2372, "step": 546 }, { "epoch": 0.04, "grad_norm": 1.920929337976908, "learning_rate": 9.995035662788039e-06, "loss": 0.2256, "step": 547 }, { "epoch": 0.04, "grad_norm": 1.6368308690184818, "learning_rate": 9.99497777982553e-06, "loss": 0.2228, "step": 548 }, { "epoch": 0.04, "grad_norm": 1.587645451987956, "learning_rate": 9.994919561535026e-06, "loss": 0.2231, "step": 549 }, { "epoch": 0.04, "grad_norm": 1.7628630883457435, "learning_rate": 9.99486100792044e-06, "loss": 0.2523, "step": 550 }, { "epoch": 0.04, "grad_norm": 1.85604783302287, "learning_rate": 9.9948021189857e-06, "loss": 0.2487, "step": 551 }, { "epoch": 0.04, "grad_norm": 1.6386254313357684, "learning_rate": 9.994742894734759e-06, "loss": 0.2116, "step": 552 }, { "epoch": 0.04, "grad_norm": 1.6351585775425137, "learning_rate": 9.994683335171594e-06, "loss": 0.2206, "step": 553 }, { "epoch": 0.04, "grad_norm": 1.6512105214169943, "learning_rate": 9.994623440300205e-06, "loss": 0.2698, "step": 554 }, { "epoch": 0.04, "grad_norm": 1.6491951674575047, "learning_rate": 9.99456321012461e-06, "loss": 0.2166, "step": 555 }, { "epoch": 0.04, "grad_norm": 1.5276431680172764, "learning_rate": 9.994502644648854e-06, "loss": 0.2473, "step": 556 }, { "epoch": 0.04, "grad_norm": 1.7477318881062665, "learning_rate": 9.994441743877003e-06, "loss": 0.2155, "step": 557 }, { "epoch": 0.04, "grad_norm": 1.7290047662759773, "learning_rate": 9.994380507813146e-06, "loss": 0.265, "step": 558 }, { "epoch": 0.04, "grad_norm": 1.5903361943985859, "learning_rate": 9.994318936461393e-06, "loss": 0.2171, "step": 559 }, { "epoch": 0.04, "grad_norm": 1.7378313280195632, "learning_rate": 9.994257029825876e-06, "loss": 0.2195, "step": 560 }, { "epoch": 0.04, "grad_norm": 6.363053125638119, "learning_rate": 9.994194787910754e-06, "loss": 0.5256, "step": 561 }, { "epoch": 0.04, "grad_norm": 1.7225026488301578, "learning_rate": 9.994132210720204e-06, "loss": 0.2306, "step": 562 }, { "epoch": 0.05, "grad_norm": 1.7367531611479357, "learning_rate": 9.994069298258427e-06, "loss": 0.2675, "step": 563 }, { "epoch": 0.05, "grad_norm": 1.8272753889086069, "learning_rate": 9.994006050529645e-06, "loss": 0.2763, "step": 564 }, { "epoch": 0.05, "grad_norm": 1.6823963619766302, "learning_rate": 9.993942467538107e-06, "loss": 0.2054, "step": 565 }, { "epoch": 0.05, "grad_norm": 1.676880147327587, "learning_rate": 9.99387854928808e-06, "loss": 0.3209, "step": 566 }, { "epoch": 0.05, "grad_norm": 1.4441124262864924, "learning_rate": 9.993814295783855e-06, "loss": 0.1592, "step": 567 }, { "epoch": 0.05, "grad_norm": 1.6419816105732825, "learning_rate": 9.993749707029746e-06, "loss": 0.2401, "step": 568 }, { "epoch": 0.05, "grad_norm": 1.8621215355585214, "learning_rate": 9.99368478303009e-06, "loss": 0.2736, "step": 569 }, { "epoch": 0.05, "grad_norm": 1.4834496089438753, "learning_rate": 9.993619523789241e-06, "loss": 0.201, "step": 570 }, { "epoch": 0.05, "grad_norm": 8.637398050899101, "learning_rate": 9.993553929311587e-06, "loss": 0.6305, "step": 571 }, { "epoch": 0.05, "grad_norm": 5.9423650310906915, "learning_rate": 9.993487999601522e-06, "loss": 0.5231, "step": 572 }, { "epoch": 0.05, "grad_norm": 1.4707362256197447, "learning_rate": 9.993421734663484e-06, "loss": 0.2245, "step": 573 }, { "epoch": 0.05, "grad_norm": 1.5734565332666623, "learning_rate": 9.993355134501914e-06, "loss": 0.2185, "step": 574 }, { "epoch": 0.05, "grad_norm": 1.8655602777043687, "learning_rate": 9.993288199121283e-06, "loss": 0.2637, "step": 575 }, { "epoch": 0.05, "grad_norm": 1.541634910203471, "learning_rate": 9.993220928526086e-06, "loss": 0.2297, "step": 576 }, { "epoch": 0.05, "grad_norm": 7.218874888642801, "learning_rate": 9.993153322720841e-06, "loss": 0.7584, "step": 577 }, { "epoch": 0.05, "grad_norm": 1.6847881833008065, "learning_rate": 9.993085381710083e-06, "loss": 0.2531, "step": 578 }, { "epoch": 0.05, "grad_norm": 1.648901477630294, "learning_rate": 9.993017105498378e-06, "loss": 0.2649, "step": 579 }, { "epoch": 0.05, "grad_norm": 1.812625406157889, "learning_rate": 9.992948494090303e-06, "loss": 0.2152, "step": 580 }, { "epoch": 0.05, "grad_norm": 1.6954501957093269, "learning_rate": 9.992879547490469e-06, "loss": 0.2617, "step": 581 }, { "epoch": 0.05, "grad_norm": 1.721341560188178, "learning_rate": 9.992810265703503e-06, "loss": 0.2604, "step": 582 }, { "epoch": 0.05, "grad_norm": 1.84219526134802, "learning_rate": 9.992740648734057e-06, "loss": 0.2507, "step": 583 }, { "epoch": 0.05, "grad_norm": 1.5922860407384904, "learning_rate": 9.992670696586802e-06, "loss": 0.21, "step": 584 }, { "epoch": 0.05, "grad_norm": 1.939742249964271, "learning_rate": 9.992600409266437e-06, "loss": 0.3023, "step": 585 }, { "epoch": 0.05, "grad_norm": 1.6440231924238256, "learning_rate": 9.99252978677768e-06, "loss": 0.2561, "step": 586 }, { "epoch": 0.05, "grad_norm": 1.510332088821589, "learning_rate": 9.992458829125271e-06, "loss": 0.2418, "step": 587 }, { "epoch": 0.05, "grad_norm": 1.615846261148202, "learning_rate": 9.992387536313975e-06, "loss": 0.2105, "step": 588 }, { "epoch": 0.05, "grad_norm": 1.6226165591908892, "learning_rate": 9.992315908348578e-06, "loss": 0.2219, "step": 589 }, { "epoch": 0.05, "grad_norm": 8.716925910548682, "learning_rate": 9.992243945233886e-06, "loss": 0.8111, "step": 590 }, { "epoch": 0.05, "grad_norm": 1.4470812954837977, "learning_rate": 9.992171646974734e-06, "loss": 0.2093, "step": 591 }, { "epoch": 0.05, "grad_norm": 1.9229297576856135, "learning_rate": 9.99209901357597e-06, "loss": 0.2341, "step": 592 }, { "epoch": 0.05, "grad_norm": 1.6953907196044202, "learning_rate": 9.992026045042478e-06, "loss": 0.2448, "step": 593 }, { "epoch": 0.05, "grad_norm": 1.5659509369156763, "learning_rate": 9.99195274137915e-06, "loss": 0.2571, "step": 594 }, { "epoch": 0.05, "grad_norm": 1.5864828262557804, "learning_rate": 9.991879102590912e-06, "loss": 0.2444, "step": 595 }, { "epoch": 0.05, "grad_norm": 1.6379102039520295, "learning_rate": 9.9918051286827e-06, "loss": 0.2355, "step": 596 }, { "epoch": 0.05, "grad_norm": 1.7257890430894567, "learning_rate": 9.99173081965949e-06, "loss": 0.2721, "step": 597 }, { "epoch": 0.05, "grad_norm": 1.7318963219098946, "learning_rate": 9.991656175526264e-06, "loss": 0.2847, "step": 598 }, { "epoch": 0.05, "grad_norm": 1.5231759153164557, "learning_rate": 9.991581196288035e-06, "loss": 0.2373, "step": 599 }, { "epoch": 0.05, "grad_norm": 1.4847070433892302, "learning_rate": 9.991505881949837e-06, "loss": 0.2098, "step": 600 }, { "epoch": 0.05, "grad_norm": 1.6001821953972448, "learning_rate": 9.991430232516725e-06, "loss": 0.2182, "step": 601 }, { "epoch": 0.05, "grad_norm": 1.802972270465234, "learning_rate": 9.991354247993776e-06, "loss": 0.2574, "step": 602 }, { "epoch": 0.05, "grad_norm": 1.74547706467968, "learning_rate": 9.991277928386095e-06, "loss": 0.2135, "step": 603 }, { "epoch": 0.05, "grad_norm": 1.6257433356625492, "learning_rate": 9.991201273698805e-06, "loss": 0.2207, "step": 604 }, { "epoch": 0.05, "grad_norm": 1.589040562884, "learning_rate": 9.991124283937049e-06, "loss": 0.2356, "step": 605 }, { "epoch": 0.05, "grad_norm": 1.5511165109105016, "learning_rate": 9.991046959105998e-06, "loss": 0.2341, "step": 606 }, { "epoch": 0.05, "grad_norm": 11.011595514303622, "learning_rate": 9.990969299210843e-06, "loss": 0.5676, "step": 607 }, { "epoch": 0.05, "grad_norm": 1.6447541800466938, "learning_rate": 9.990891304256796e-06, "loss": 0.2691, "step": 608 }, { "epoch": 0.05, "grad_norm": 1.5651861030216716, "learning_rate": 9.990812974249094e-06, "loss": 0.2191, "step": 609 }, { "epoch": 0.05, "grad_norm": 1.7103707894771236, "learning_rate": 9.990734309192995e-06, "loss": 0.2309, "step": 610 }, { "epoch": 0.05, "grad_norm": 1.395402475841665, "learning_rate": 9.99065530909378e-06, "loss": 0.2385, "step": 611 }, { "epoch": 0.05, "grad_norm": 1.5322543278660479, "learning_rate": 9.990575973956754e-06, "loss": 0.2451, "step": 612 }, { "epoch": 0.05, "grad_norm": 1.8074170798515372, "learning_rate": 9.990496303787243e-06, "loss": 0.2867, "step": 613 }, { "epoch": 0.05, "grad_norm": 1.7953475087350166, "learning_rate": 9.990416298590593e-06, "loss": 0.249, "step": 614 }, { "epoch": 0.05, "grad_norm": 1.7398426315939088, "learning_rate": 9.990335958372178e-06, "loss": 0.2566, "step": 615 }, { "epoch": 0.05, "grad_norm": 1.694536819583506, "learning_rate": 9.990255283137388e-06, "loss": 0.2401, "step": 616 }, { "epoch": 0.05, "grad_norm": 1.57965968100311, "learning_rate": 9.990174272891642e-06, "loss": 0.2128, "step": 617 }, { "epoch": 0.05, "grad_norm": 1.8619530161861824, "learning_rate": 9.990092927640378e-06, "loss": 0.2363, "step": 618 }, { "epoch": 0.05, "grad_norm": 1.7732123527437331, "learning_rate": 9.990011247389055e-06, "loss": 0.2123, "step": 619 }, { "epoch": 0.05, "grad_norm": 1.680198178950124, "learning_rate": 9.989929232143159e-06, "loss": 0.2809, "step": 620 }, { "epoch": 0.05, "grad_norm": 1.4884085087328633, "learning_rate": 9.989846881908194e-06, "loss": 0.2391, "step": 621 }, { "epoch": 0.05, "grad_norm": 1.4396744748564938, "learning_rate": 9.98976419668969e-06, "loss": 0.2096, "step": 622 }, { "epoch": 0.05, "grad_norm": 1.5968857523302402, "learning_rate": 9.989681176493197e-06, "loss": 0.2536, "step": 623 }, { "epoch": 0.05, "grad_norm": 6.615719943427046, "learning_rate": 9.989597821324288e-06, "loss": 0.7034, "step": 624 }, { "epoch": 0.05, "grad_norm": 1.614385891347682, "learning_rate": 9.98951413118856e-06, "loss": 0.2293, "step": 625 }, { "epoch": 0.05, "grad_norm": 8.191075367956811, "learning_rate": 9.989430106091629e-06, "loss": 0.612, "step": 626 }, { "epoch": 0.05, "grad_norm": 1.603659924226727, "learning_rate": 9.989345746039138e-06, "loss": 0.2174, "step": 627 }, { "epoch": 0.05, "grad_norm": 1.6804506211375603, "learning_rate": 9.989261051036752e-06, "loss": 0.2836, "step": 628 }, { "epoch": 0.05, "grad_norm": 1.6564991052749514, "learning_rate": 9.989176021090155e-06, "loss": 0.2338, "step": 629 }, { "epoch": 0.05, "grad_norm": 1.62722867667748, "learning_rate": 9.989090656205052e-06, "loss": 0.2266, "step": 630 }, { "epoch": 0.05, "grad_norm": 1.4852472228714626, "learning_rate": 9.989004956387179e-06, "loss": 0.2264, "step": 631 }, { "epoch": 0.05, "grad_norm": 1.6813906031094794, "learning_rate": 9.988918921642287e-06, "loss": 0.2327, "step": 632 }, { "epoch": 0.05, "grad_norm": 1.808867267957172, "learning_rate": 9.988832551976151e-06, "loss": 0.2772, "step": 633 }, { "epoch": 0.05, "grad_norm": 2.2115923601291123, "learning_rate": 9.988745847394572e-06, "loss": 0.2219, "step": 634 }, { "epoch": 0.05, "grad_norm": 1.6525129481359726, "learning_rate": 9.988658807903369e-06, "loss": 0.2703, "step": 635 }, { "epoch": 0.05, "grad_norm": 1.6657086181445584, "learning_rate": 9.988571433508383e-06, "loss": 0.2713, "step": 636 }, { "epoch": 0.05, "grad_norm": 1.5380327534887346, "learning_rate": 9.988483724215483e-06, "loss": 0.188, "step": 637 }, { "epoch": 0.05, "grad_norm": 1.4789487314289715, "learning_rate": 9.988395680030556e-06, "loss": 0.2108, "step": 638 }, { "epoch": 0.05, "grad_norm": 1.535315629330471, "learning_rate": 9.988307300959513e-06, "loss": 0.216, "step": 639 }, { "epoch": 0.05, "grad_norm": 1.5164162701768915, "learning_rate": 9.988218587008287e-06, "loss": 0.239, "step": 640 }, { "epoch": 0.05, "grad_norm": 1.6307974968305041, "learning_rate": 9.988129538182833e-06, "loss": 0.2109, "step": 641 }, { "epoch": 0.05, "grad_norm": 1.765092766661041, "learning_rate": 9.98804015448913e-06, "loss": 0.2883, "step": 642 }, { "epoch": 0.05, "grad_norm": 1.4852765359981503, "learning_rate": 9.987950435933179e-06, "loss": 0.2209, "step": 643 }, { "epoch": 0.05, "grad_norm": 1.3738595567429717, "learning_rate": 9.987860382521003e-06, "loss": 0.1841, "step": 644 }, { "epoch": 0.05, "grad_norm": 1.7597230144739753, "learning_rate": 9.987769994258645e-06, "loss": 0.2699, "step": 645 }, { "epoch": 0.05, "grad_norm": 1.6374916651117752, "learning_rate": 9.987679271152175e-06, "loss": 0.291, "step": 646 }, { "epoch": 0.05, "grad_norm": 1.4744783927154217, "learning_rate": 9.987588213207684e-06, "loss": 0.2349, "step": 647 }, { "epoch": 0.05, "grad_norm": 1.604462093974148, "learning_rate": 9.987496820431284e-06, "loss": 0.257, "step": 648 }, { "epoch": 0.05, "grad_norm": 11.17027017060943, "learning_rate": 9.987405092829113e-06, "loss": 0.7866, "step": 649 }, { "epoch": 0.05, "grad_norm": 1.6545224837022368, "learning_rate": 9.987313030407325e-06, "loss": 0.2556, "step": 650 }, { "epoch": 0.05, "grad_norm": 7.770167038492094, "learning_rate": 9.987220633172101e-06, "loss": 0.5711, "step": 651 }, { "epoch": 0.05, "grad_norm": 1.4945050815715162, "learning_rate": 9.987127901129647e-06, "loss": 0.1835, "step": 652 }, { "epoch": 0.05, "grad_norm": 1.5209064188816457, "learning_rate": 9.987034834286186e-06, "loss": 0.2482, "step": 653 }, { "epoch": 0.05, "grad_norm": 1.574506628240721, "learning_rate": 9.986941432647968e-06, "loss": 0.1818, "step": 654 }, { "epoch": 0.05, "grad_norm": 1.7695495539151342, "learning_rate": 9.98684769622126e-06, "loss": 0.2404, "step": 655 }, { "epoch": 0.05, "grad_norm": 1.8528949420014384, "learning_rate": 9.986753625012358e-06, "loss": 0.2419, "step": 656 }, { "epoch": 0.05, "grad_norm": 1.9018601086016529, "learning_rate": 9.986659219027575e-06, "loss": 0.2273, "step": 657 }, { "epoch": 0.05, "grad_norm": 1.4649290227741514, "learning_rate": 9.986564478273249e-06, "loss": 0.2356, "step": 658 }, { "epoch": 0.05, "grad_norm": 1.5766365636580522, "learning_rate": 9.986469402755742e-06, "loss": 0.2483, "step": 659 }, { "epoch": 0.05, "grad_norm": 1.5410862867976602, "learning_rate": 9.986373992481434e-06, "loss": 0.2093, "step": 660 }, { "epoch": 0.05, "grad_norm": 1.5896560643961246, "learning_rate": 9.986278247456735e-06, "loss": 0.2081, "step": 661 }, { "epoch": 0.05, "grad_norm": 1.5720182589287917, "learning_rate": 9.986182167688066e-06, "loss": 0.2292, "step": 662 }, { "epoch": 0.05, "grad_norm": 1.7676532617931748, "learning_rate": 9.986085753181883e-06, "loss": 0.2674, "step": 663 }, { "epoch": 0.05, "grad_norm": 6.964118047196627, "learning_rate": 9.985989003944655e-06, "loss": 0.7795, "step": 664 }, { "epoch": 0.05, "grad_norm": 1.4443487250329152, "learning_rate": 9.985891919982878e-06, "loss": 0.2276, "step": 665 }, { "epoch": 0.05, "grad_norm": 1.6024950006097898, "learning_rate": 9.98579450130307e-06, "loss": 0.2298, "step": 666 }, { "epoch": 0.05, "grad_norm": 1.499163223693087, "learning_rate": 9.98569674791177e-06, "loss": 0.2468, "step": 667 }, { "epoch": 0.05, "grad_norm": 1.4996555762851789, "learning_rate": 9.985598659815543e-06, "loss": 0.2315, "step": 668 }, { "epoch": 0.05, "grad_norm": 1.589601832900611, "learning_rate": 9.985500237020972e-06, "loss": 0.1994, "step": 669 }, { "epoch": 0.05, "grad_norm": 1.511346317111211, "learning_rate": 9.985401479534664e-06, "loss": 0.1664, "step": 670 }, { "epoch": 0.05, "grad_norm": 1.510992851798693, "learning_rate": 9.985302387363249e-06, "loss": 0.1495, "step": 671 }, { "epoch": 0.05, "grad_norm": 1.5637961680474897, "learning_rate": 9.985202960513381e-06, "loss": 0.2005, "step": 672 }, { "epoch": 0.05, "grad_norm": 6.809633882925538, "learning_rate": 9.985103198991733e-06, "loss": 0.6506, "step": 673 }, { "epoch": 0.05, "grad_norm": 1.6417080526020813, "learning_rate": 9.985003102805004e-06, "loss": 0.2702, "step": 674 }, { "epoch": 0.05, "grad_norm": 1.612573023316388, "learning_rate": 9.984902671959911e-06, "loss": 0.226, "step": 675 }, { "epoch": 0.05, "grad_norm": 1.4991800013606413, "learning_rate": 9.984801906463199e-06, "loss": 0.2415, "step": 676 }, { "epoch": 0.05, "grad_norm": 1.571089584243743, "learning_rate": 9.984700806321631e-06, "loss": 0.2078, "step": 677 }, { "epoch": 0.05, "grad_norm": 1.767553442670523, "learning_rate": 9.984599371541995e-06, "loss": 0.2548, "step": 678 }, { "epoch": 0.05, "grad_norm": 1.6380892120623323, "learning_rate": 9.984497602131101e-06, "loss": 0.2697, "step": 679 }, { "epoch": 0.05, "grad_norm": 1.6872874689547022, "learning_rate": 9.98439549809578e-06, "loss": 0.2405, "step": 680 }, { "epoch": 0.05, "grad_norm": 1.7309222951178749, "learning_rate": 9.984293059442888e-06, "loss": 0.2714, "step": 681 }, { "epoch": 0.05, "grad_norm": 1.4928730463120055, "learning_rate": 9.9841902861793e-06, "loss": 0.2357, "step": 682 }, { "epoch": 0.05, "grad_norm": 1.7283480791607235, "learning_rate": 9.984087178311917e-06, "loss": 0.2318, "step": 683 }, { "epoch": 0.05, "grad_norm": 1.5683098080694315, "learning_rate": 9.98398373584766e-06, "loss": 0.1564, "step": 684 }, { "epoch": 0.05, "grad_norm": 6.483321651287427, "learning_rate": 9.983879958793476e-06, "loss": 0.5989, "step": 685 }, { "epoch": 0.05, "grad_norm": 6.73557280843961, "learning_rate": 9.983775847156327e-06, "loss": 0.7552, "step": 686 }, { "epoch": 0.05, "grad_norm": 1.5430137528961039, "learning_rate": 9.983671400943206e-06, "loss": 0.2078, "step": 687 }, { "epoch": 0.06, "grad_norm": 1.6108688052305573, "learning_rate": 9.983566620161126e-06, "loss": 0.2167, "step": 688 }, { "epoch": 0.06, "grad_norm": 1.5866760723152404, "learning_rate": 9.983461504817119e-06, "loss": 0.2159, "step": 689 }, { "epoch": 0.06, "grad_norm": 1.6287221689970799, "learning_rate": 9.983356054918238e-06, "loss": 0.2298, "step": 690 }, { "epoch": 0.06, "grad_norm": 7.309985381252691, "learning_rate": 9.983250270471569e-06, "loss": 0.6667, "step": 691 }, { "epoch": 0.06, "grad_norm": 1.46354666321353, "learning_rate": 9.98314415148421e-06, "loss": 0.1661, "step": 692 }, { "epoch": 0.06, "grad_norm": 1.7442231640817898, "learning_rate": 9.983037697963287e-06, "loss": 0.2756, "step": 693 }, { "epoch": 0.06, "grad_norm": 1.5942400155331855, "learning_rate": 9.982930909915944e-06, "loss": 0.2374, "step": 694 }, { "epoch": 0.06, "grad_norm": 7.967596940971285, "learning_rate": 9.982823787349352e-06, "loss": 0.4513, "step": 695 }, { "epoch": 0.06, "grad_norm": 1.5748029913355504, "learning_rate": 9.982716330270701e-06, "loss": 0.2721, "step": 696 }, { "epoch": 0.06, "grad_norm": 1.7528853554171635, "learning_rate": 9.982608538687208e-06, "loss": 0.3013, "step": 697 }, { "epoch": 0.06, "grad_norm": 1.8242073167411055, "learning_rate": 9.982500412606105e-06, "loss": 0.2718, "step": 698 }, { "epoch": 0.06, "grad_norm": 1.877471434195713, "learning_rate": 9.982391952034653e-06, "loss": 0.2788, "step": 699 }, { "epoch": 0.06, "grad_norm": 1.431516573750643, "learning_rate": 9.982283156980133e-06, "loss": 0.1923, "step": 700 }, { "epoch": 0.06, "grad_norm": 1.5305076944664961, "learning_rate": 9.982174027449849e-06, "loss": 0.24, "step": 701 }, { "epoch": 0.06, "grad_norm": 1.5516135495338292, "learning_rate": 9.982064563451128e-06, "loss": 0.2038, "step": 702 }, { "epoch": 0.06, "grad_norm": 1.8808013155712298, "learning_rate": 9.981954764991318e-06, "loss": 0.2414, "step": 703 }, { "epoch": 0.06, "grad_norm": 1.9105853038326197, "learning_rate": 9.981844632077788e-06, "loss": 0.2307, "step": 704 }, { "epoch": 0.06, "grad_norm": 7.070204946011062, "learning_rate": 9.981734164717936e-06, "loss": 0.7087, "step": 705 }, { "epoch": 0.06, "grad_norm": 1.6145858457686906, "learning_rate": 9.981623362919173e-06, "loss": 0.2177, "step": 706 }, { "epoch": 0.06, "grad_norm": 1.7589534705172456, "learning_rate": 9.981512226688943e-06, "loss": 0.299, "step": 707 }, { "epoch": 0.06, "grad_norm": 1.7872652226825059, "learning_rate": 9.981400756034701e-06, "loss": 0.2177, "step": 708 }, { "epoch": 0.06, "grad_norm": 7.595467585425629, "learning_rate": 9.981288950963935e-06, "loss": 0.6067, "step": 709 }, { "epoch": 0.06, "grad_norm": 1.7039434754853582, "learning_rate": 9.981176811484148e-06, "loss": 0.2768, "step": 710 }, { "epoch": 0.06, "grad_norm": 4.936703805348352, "learning_rate": 9.981064337602869e-06, "loss": 0.7969, "step": 711 }, { "epoch": 0.06, "grad_norm": 1.5417335536639822, "learning_rate": 9.980951529327649e-06, "loss": 0.2288, "step": 712 }, { "epoch": 0.06, "grad_norm": 1.6031228450364754, "learning_rate": 9.980838386666063e-06, "loss": 0.2112, "step": 713 }, { "epoch": 0.06, "grad_norm": 1.5275420117561587, "learning_rate": 9.980724909625704e-06, "loss": 0.1867, "step": 714 }, { "epoch": 0.06, "grad_norm": 1.58178778497284, "learning_rate": 9.98061109821419e-06, "loss": 0.2188, "step": 715 }, { "epoch": 0.06, "grad_norm": 1.6153527043651708, "learning_rate": 9.980496952439162e-06, "loss": 0.2413, "step": 716 }, { "epoch": 0.06, "grad_norm": 1.7786101072483669, "learning_rate": 9.980382472308283e-06, "loss": 0.2484, "step": 717 }, { "epoch": 0.06, "grad_norm": 1.5491416040421198, "learning_rate": 9.980267657829241e-06, "loss": 0.2427, "step": 718 }, { "epoch": 0.06, "grad_norm": 11.631354661525792, "learning_rate": 9.98015250900974e-06, "loss": 0.5361, "step": 719 }, { "epoch": 0.06, "grad_norm": 1.5936177552114652, "learning_rate": 9.980037025857511e-06, "loss": 0.2743, "step": 720 }, { "epoch": 0.06, "grad_norm": 1.75208958421876, "learning_rate": 9.979921208380308e-06, "loss": 0.263, "step": 721 }, { "epoch": 0.06, "grad_norm": 1.542580678026301, "learning_rate": 9.979805056585907e-06, "loss": 0.2257, "step": 722 }, { "epoch": 0.06, "grad_norm": 1.5516303181404911, "learning_rate": 9.979688570482102e-06, "loss": 0.2321, "step": 723 }, { "epoch": 0.06, "grad_norm": 1.5122807139154193, "learning_rate": 9.979571750076717e-06, "loss": 0.2383, "step": 724 }, { "epoch": 0.06, "grad_norm": 1.7023205749186943, "learning_rate": 9.979454595377594e-06, "loss": 0.2492, "step": 725 }, { "epoch": 0.06, "grad_norm": 1.7133166051736621, "learning_rate": 9.979337106392596e-06, "loss": 0.2321, "step": 726 }, { "epoch": 0.06, "grad_norm": 1.3762061613889318, "learning_rate": 9.97921928312961e-06, "loss": 0.1812, "step": 727 }, { "epoch": 0.06, "grad_norm": 1.700459300745377, "learning_rate": 9.979101125596548e-06, "loss": 0.2862, "step": 728 }, { "epoch": 0.06, "grad_norm": 1.4414095884976699, "learning_rate": 9.978982633801342e-06, "loss": 0.2094, "step": 729 }, { "epoch": 0.06, "grad_norm": 1.682819877335394, "learning_rate": 9.978863807751944e-06, "loss": 0.2483, "step": 730 }, { "epoch": 0.06, "grad_norm": 5.9730690074405635, "learning_rate": 9.978744647456335e-06, "loss": 0.6216, "step": 731 }, { "epoch": 0.06, "grad_norm": 1.8055672341248263, "learning_rate": 9.978625152922511e-06, "loss": 0.2933, "step": 732 }, { "epoch": 0.06, "grad_norm": 1.62835706808305, "learning_rate": 9.978505324158499e-06, "loss": 0.2548, "step": 733 }, { "epoch": 0.06, "grad_norm": 1.548947469338119, "learning_rate": 9.97838516117234e-06, "loss": 0.2634, "step": 734 }, { "epoch": 0.06, "grad_norm": 1.8679351330490013, "learning_rate": 9.978264663972099e-06, "loss": 0.3044, "step": 735 }, { "epoch": 0.06, "grad_norm": 1.5976705469811023, "learning_rate": 9.978143832565868e-06, "loss": 0.2224, "step": 736 }, { "epoch": 0.06, "grad_norm": 1.5369696680646707, "learning_rate": 9.97802266696176e-06, "loss": 0.2587, "step": 737 }, { "epoch": 0.06, "grad_norm": 1.55808208457302, "learning_rate": 9.977901167167904e-06, "loss": 0.2193, "step": 738 }, { "epoch": 0.06, "grad_norm": 1.6041090402085885, "learning_rate": 9.977779333192464e-06, "loss": 0.226, "step": 739 }, { "epoch": 0.06, "grad_norm": 1.6009131481741816, "learning_rate": 9.977657165043613e-06, "loss": 0.2288, "step": 740 }, { "epoch": 0.06, "grad_norm": 1.6068170310437762, "learning_rate": 9.977534662729556e-06, "loss": 0.2091, "step": 741 }, { "epoch": 0.06, "grad_norm": 1.5875563244585098, "learning_rate": 9.977411826258516e-06, "loss": 0.2704, "step": 742 }, { "epoch": 0.06, "grad_norm": 1.5382575067972875, "learning_rate": 9.977288655638737e-06, "loss": 0.2476, "step": 743 }, { "epoch": 0.06, "grad_norm": 1.6527039859361619, "learning_rate": 9.977165150878492e-06, "loss": 0.2223, "step": 744 }, { "epoch": 0.06, "grad_norm": 1.5500088874233902, "learning_rate": 9.977041311986072e-06, "loss": 0.2236, "step": 745 }, { "epoch": 0.06, "grad_norm": 1.671063292017438, "learning_rate": 9.976917138969784e-06, "loss": 0.2244, "step": 746 }, { "epoch": 0.06, "grad_norm": 1.5894419089902876, "learning_rate": 9.976792631837973e-06, "loss": 0.2645, "step": 747 }, { "epoch": 0.06, "grad_norm": 1.748680435043252, "learning_rate": 9.976667790598991e-06, "loss": 0.2851, "step": 748 }, { "epoch": 0.06, "grad_norm": 7.086589648319484, "learning_rate": 9.976542615261223e-06, "loss": 0.7588, "step": 749 }, { "epoch": 0.06, "grad_norm": 1.5242147921331002, "learning_rate": 9.97641710583307e-06, "loss": 0.2097, "step": 750 }, { "epoch": 0.06, "grad_norm": 5.88729303714169, "learning_rate": 9.97629126232296e-06, "loss": 0.5644, "step": 751 }, { "epoch": 0.06, "grad_norm": 1.5971660429260632, "learning_rate": 9.97616508473934e-06, "loss": 0.1859, "step": 752 }, { "epoch": 0.06, "grad_norm": 1.5200953564780697, "learning_rate": 9.976038573090679e-06, "loss": 0.2198, "step": 753 }, { "epoch": 0.06, "grad_norm": 1.3229233995214702, "learning_rate": 9.975911727385473e-06, "loss": 0.1937, "step": 754 }, { "epoch": 0.06, "grad_norm": 1.7107874577662805, "learning_rate": 9.975784547632237e-06, "loss": 0.279, "step": 755 }, { "epoch": 0.06, "grad_norm": 1.590861964069376, "learning_rate": 9.975657033839506e-06, "loss": 0.2154, "step": 756 }, { "epoch": 0.06, "grad_norm": 1.5852799064118221, "learning_rate": 9.975529186015844e-06, "loss": 0.2438, "step": 757 }, { "epoch": 0.06, "grad_norm": 1.833299840709122, "learning_rate": 9.975401004169834e-06, "loss": 0.2529, "step": 758 }, { "epoch": 0.06, "grad_norm": 1.6261263969385116, "learning_rate": 9.975272488310077e-06, "loss": 0.2953, "step": 759 }, { "epoch": 0.06, "grad_norm": 8.134755177981726, "learning_rate": 9.975143638445205e-06, "loss": 0.4565, "step": 760 }, { "epoch": 0.06, "grad_norm": 1.4838292118798306, "learning_rate": 9.975014454583867e-06, "loss": 0.1897, "step": 761 }, { "epoch": 0.06, "grad_norm": 1.5086098099853702, "learning_rate": 9.974884936734734e-06, "loss": 0.2216, "step": 762 }, { "epoch": 0.06, "grad_norm": 1.5807894380856151, "learning_rate": 9.974755084906503e-06, "loss": 0.2387, "step": 763 }, { "epoch": 0.06, "grad_norm": 1.6034485373287957, "learning_rate": 9.974624899107887e-06, "loss": 0.1979, "step": 764 }, { "epoch": 0.06, "grad_norm": 1.6666110968479153, "learning_rate": 9.974494379347632e-06, "loss": 0.2971, "step": 765 }, { "epoch": 0.06, "grad_norm": 1.3425496727150754, "learning_rate": 9.974363525634496e-06, "loss": 0.1679, "step": 766 }, { "epoch": 0.06, "grad_norm": 1.6816606853728424, "learning_rate": 9.974232337977265e-06, "loss": 0.2389, "step": 767 }, { "epoch": 0.06, "grad_norm": 1.6097517949514568, "learning_rate": 9.974100816384746e-06, "loss": 0.2436, "step": 768 }, { "epoch": 0.06, "grad_norm": 4.2533419767079295, "learning_rate": 9.97396896086577e-06, "loss": 0.3795, "step": 769 }, { "epoch": 0.06, "grad_norm": 1.6474270440333585, "learning_rate": 9.973836771429185e-06, "loss": 0.2474, "step": 770 }, { "epoch": 0.06, "grad_norm": 1.5292543960921259, "learning_rate": 9.973704248083868e-06, "loss": 0.2144, "step": 771 }, { "epoch": 0.06, "grad_norm": 7.91669239814435, "learning_rate": 9.973571390838715e-06, "loss": 0.5486, "step": 772 }, { "epoch": 0.06, "grad_norm": 1.6010615356715285, "learning_rate": 9.973438199702645e-06, "loss": 0.2737, "step": 773 }, { "epoch": 0.06, "grad_norm": 1.6150235863971005, "learning_rate": 9.9733046746846e-06, "loss": 0.2341, "step": 774 }, { "epoch": 0.06, "grad_norm": 1.549145978996915, "learning_rate": 9.973170815793543e-06, "loss": 0.2201, "step": 775 }, { "epoch": 0.06, "grad_norm": 7.854928860857746, "learning_rate": 9.973036623038462e-06, "loss": 0.7801, "step": 776 }, { "epoch": 0.06, "grad_norm": 1.5339319218427738, "learning_rate": 9.972902096428365e-06, "loss": 0.2196, "step": 777 }, { "epoch": 0.06, "grad_norm": 1.3591211197368716, "learning_rate": 9.972767235972283e-06, "loss": 0.1778, "step": 778 }, { "epoch": 0.06, "grad_norm": 1.721549849234291, "learning_rate": 9.972632041679268e-06, "loss": 0.3117, "step": 779 }, { "epoch": 0.06, "grad_norm": 1.5052800770726014, "learning_rate": 9.972496513558399e-06, "loss": 0.2019, "step": 780 }, { "epoch": 0.06, "grad_norm": 1.8045207991279795, "learning_rate": 9.972360651618772e-06, "loss": 0.2673, "step": 781 }, { "epoch": 0.06, "grad_norm": 1.7356180445400264, "learning_rate": 9.972224455869508e-06, "loss": 0.2204, "step": 782 }, { "epoch": 0.06, "grad_norm": 1.5275042560215937, "learning_rate": 9.972087926319753e-06, "loss": 0.2035, "step": 783 }, { "epoch": 0.06, "grad_norm": 1.6674975865562465, "learning_rate": 9.971951062978671e-06, "loss": 0.2243, "step": 784 }, { "epoch": 0.06, "grad_norm": 1.6735786014976555, "learning_rate": 9.971813865855448e-06, "loss": 0.2393, "step": 785 }, { "epoch": 0.06, "grad_norm": 1.485054417507819, "learning_rate": 9.971676334959297e-06, "loss": 0.2449, "step": 786 }, { "epoch": 0.06, "grad_norm": 1.4598758873716757, "learning_rate": 9.971538470299452e-06, "loss": 0.2146, "step": 787 }, { "epoch": 0.06, "grad_norm": 1.4500147360744786, "learning_rate": 9.971400271885163e-06, "loss": 0.2205, "step": 788 }, { "epoch": 0.06, "grad_norm": 1.564290257911547, "learning_rate": 9.971261739725713e-06, "loss": 0.2403, "step": 789 }, { "epoch": 0.06, "grad_norm": 1.5888183187840743, "learning_rate": 9.971122873830398e-06, "loss": 0.2161, "step": 790 }, { "epoch": 0.06, "grad_norm": 1.399120216199401, "learning_rate": 9.970983674208546e-06, "loss": 0.2339, "step": 791 }, { "epoch": 0.06, "grad_norm": 1.616206822487652, "learning_rate": 9.970844140869495e-06, "loss": 0.2423, "step": 792 }, { "epoch": 0.06, "grad_norm": 1.4704532267575672, "learning_rate": 9.970704273822618e-06, "loss": 0.186, "step": 793 }, { "epoch": 0.06, "grad_norm": 1.510036611301035, "learning_rate": 9.9705640730773e-06, "loss": 0.2159, "step": 794 }, { "epoch": 0.06, "grad_norm": 1.5833938812424388, "learning_rate": 9.970423538642959e-06, "loss": 0.2063, "step": 795 }, { "epoch": 0.06, "grad_norm": 1.783645778096462, "learning_rate": 9.970282670529024e-06, "loss": 0.1971, "step": 796 }, { "epoch": 0.06, "grad_norm": 1.6602911300475403, "learning_rate": 9.970141468744953e-06, "loss": 0.2835, "step": 797 }, { "epoch": 0.06, "grad_norm": 7.4237699713685235, "learning_rate": 9.969999933300229e-06, "loss": 0.7389, "step": 798 }, { "epoch": 0.06, "grad_norm": 7.146900448883108, "learning_rate": 9.96985806420435e-06, "loss": 0.7412, "step": 799 }, { "epoch": 0.06, "grad_norm": 1.651043306863088, "learning_rate": 9.969715861466839e-06, "loss": 0.204, "step": 800 }, { "epoch": 0.06, "grad_norm": 1.5515993963549433, "learning_rate": 9.969573325097247e-06, "loss": 0.2184, "step": 801 }, { "epoch": 0.06, "grad_norm": 1.580301351811492, "learning_rate": 9.96943045510514e-06, "loss": 0.2038, "step": 802 }, { "epoch": 0.06, "grad_norm": 1.503931124932624, "learning_rate": 9.969287251500109e-06, "loss": 0.2203, "step": 803 }, { "epoch": 0.06, "grad_norm": 1.5395751313167916, "learning_rate": 9.96914371429177e-06, "loss": 0.232, "step": 804 }, { "epoch": 0.06, "grad_norm": 1.7578679879406942, "learning_rate": 9.968999843489755e-06, "loss": 0.2638, "step": 805 }, { "epoch": 0.06, "grad_norm": 1.4629681709189775, "learning_rate": 9.968855639103727e-06, "loss": 0.2363, "step": 806 }, { "epoch": 0.06, "grad_norm": 11.823305579292933, "learning_rate": 9.968711101143364e-06, "loss": 0.5712, "step": 807 }, { "epoch": 0.06, "grad_norm": 1.6129816277901692, "learning_rate": 9.96856622961837e-06, "loss": 0.2085, "step": 808 }, { "epoch": 0.06, "grad_norm": 1.675313435355806, "learning_rate": 9.968421024538473e-06, "loss": 0.2144, "step": 809 }, { "epoch": 0.06, "grad_norm": 8.643725522989435, "learning_rate": 9.968275485913417e-06, "loss": 0.5537, "step": 810 }, { "epoch": 0.06, "grad_norm": 1.6736518701220606, "learning_rate": 9.968129613752975e-06, "loss": 0.2294, "step": 811 }, { "epoch": 0.06, "grad_norm": 1.5608030907843273, "learning_rate": 9.967983408066939e-06, "loss": 0.239, "step": 812 }, { "epoch": 0.07, "grad_norm": 1.692421120277671, "learning_rate": 9.967836868865125e-06, "loss": 0.2152, "step": 813 }, { "epoch": 0.07, "grad_norm": 1.490998666464393, "learning_rate": 9.967689996157368e-06, "loss": 0.2281, "step": 814 }, { "epoch": 0.07, "grad_norm": 1.5911104377564773, "learning_rate": 9.967542789953532e-06, "loss": 0.1956, "step": 815 }, { "epoch": 0.07, "grad_norm": 1.7486329006113877, "learning_rate": 9.967395250263496e-06, "loss": 0.2615, "step": 816 }, { "epoch": 0.07, "grad_norm": 11.918347572210306, "learning_rate": 9.967247377097168e-06, "loss": 0.5843, "step": 817 }, { "epoch": 0.07, "grad_norm": 1.7712335600413032, "learning_rate": 9.967099170464473e-06, "loss": 0.222, "step": 818 }, { "epoch": 0.07, "grad_norm": 1.4287793366941868, "learning_rate": 9.966950630375361e-06, "loss": 0.1941, "step": 819 }, { "epoch": 0.07, "grad_norm": 1.6253770319429273, "learning_rate": 9.966801756839802e-06, "loss": 0.232, "step": 820 }, { "epoch": 0.07, "grad_norm": 1.620658044413487, "learning_rate": 9.966652549867795e-06, "loss": 0.2458, "step": 821 }, { "epoch": 0.07, "grad_norm": 1.5390088992562736, "learning_rate": 9.966503009469352e-06, "loss": 0.1821, "step": 822 }, { "epoch": 0.07, "grad_norm": 1.7820222092675864, "learning_rate": 9.966353135654513e-06, "loss": 0.2764, "step": 823 }, { "epoch": 0.07, "grad_norm": 1.7563027287758286, "learning_rate": 9.966202928433344e-06, "loss": 0.2619, "step": 824 }, { "epoch": 0.07, "grad_norm": 1.6412047958781044, "learning_rate": 9.966052387815923e-06, "loss": 0.2296, "step": 825 }, { "epoch": 0.07, "grad_norm": 1.8113012568420899, "learning_rate": 9.96590151381236e-06, "loss": 0.2548, "step": 826 }, { "epoch": 0.07, "grad_norm": 1.8036425669711458, "learning_rate": 9.965750306432782e-06, "loss": 0.2735, "step": 827 }, { "epoch": 0.07, "grad_norm": 1.7605930651182087, "learning_rate": 9.965598765687338e-06, "loss": 0.2326, "step": 828 }, { "epoch": 0.07, "grad_norm": 1.7939421554999757, "learning_rate": 9.965446891586208e-06, "loss": 0.2651, "step": 829 }, { "epoch": 0.07, "grad_norm": 1.5273136745259914, "learning_rate": 9.96529468413958e-06, "loss": 0.2342, "step": 830 }, { "epoch": 0.07, "grad_norm": 1.7647983712606425, "learning_rate": 9.965142143357677e-06, "loss": 0.2703, "step": 831 }, { "epoch": 0.07, "grad_norm": 1.4905150487397107, "learning_rate": 9.964989269250737e-06, "loss": 0.2231, "step": 832 }, { "epoch": 0.07, "grad_norm": 1.6319494378773265, "learning_rate": 9.964836061829026e-06, "loss": 0.2739, "step": 833 }, { "epoch": 0.07, "grad_norm": 1.545059339126701, "learning_rate": 9.964682521102827e-06, "loss": 0.2391, "step": 834 }, { "epoch": 0.07, "grad_norm": 1.4755142109745647, "learning_rate": 9.964528647082447e-06, "loss": 0.2064, "step": 835 }, { "epoch": 0.07, "grad_norm": 1.6133247402182394, "learning_rate": 9.964374439778217e-06, "loss": 0.2633, "step": 836 }, { "epoch": 0.07, "grad_norm": 10.79839837211012, "learning_rate": 9.964219899200489e-06, "loss": 0.6151, "step": 837 }, { "epoch": 0.07, "grad_norm": 1.6126352316833688, "learning_rate": 9.964065025359639e-06, "loss": 0.2248, "step": 838 }, { "epoch": 0.07, "grad_norm": 1.5443897893326974, "learning_rate": 9.963909818266063e-06, "loss": 0.223, "step": 839 }, { "epoch": 0.07, "grad_norm": 1.6891128038040328, "learning_rate": 9.96375427793018e-06, "loss": 0.2333, "step": 840 }, { "epoch": 0.07, "grad_norm": 1.5128042488233393, "learning_rate": 9.963598404362435e-06, "loss": 0.2204, "step": 841 }, { "epoch": 0.07, "grad_norm": 1.6184011045283073, "learning_rate": 9.963442197573288e-06, "loss": 0.1955, "step": 842 }, { "epoch": 0.07, "grad_norm": 1.5777606878059403, "learning_rate": 9.96328565757323e-06, "loss": 0.2442, "step": 843 }, { "epoch": 0.07, "grad_norm": 1.5594988761353863, "learning_rate": 9.963128784372765e-06, "loss": 0.2371, "step": 844 }, { "epoch": 0.07, "grad_norm": 1.5431292321025256, "learning_rate": 9.962971577982428e-06, "loss": 0.231, "step": 845 }, { "epoch": 0.07, "grad_norm": 1.3876276464270902, "learning_rate": 9.962814038412772e-06, "loss": 0.1801, "step": 846 }, { "epoch": 0.07, "grad_norm": 1.5547517341145136, "learning_rate": 9.96265616567437e-06, "loss": 0.2167, "step": 847 }, { "epoch": 0.07, "grad_norm": 1.5748061768874722, "learning_rate": 9.962497959777828e-06, "loss": 0.2515, "step": 848 }, { "epoch": 0.07, "grad_norm": 1.507620857460194, "learning_rate": 9.96233942073376e-06, "loss": 0.251, "step": 849 }, { "epoch": 0.07, "grad_norm": 1.786641707498006, "learning_rate": 9.962180548552812e-06, "loss": 0.2228, "step": 850 }, { "epoch": 0.07, "grad_norm": 1.5734565870069346, "learning_rate": 9.96202134324565e-06, "loss": 0.2255, "step": 851 }, { "epoch": 0.07, "grad_norm": 1.530993640017462, "learning_rate": 9.961861804822958e-06, "loss": 0.2184, "step": 852 }, { "epoch": 0.07, "grad_norm": 1.7994293277832776, "learning_rate": 9.961701933295451e-06, "loss": 0.2226, "step": 853 }, { "epoch": 0.07, "grad_norm": 1.5517399666080043, "learning_rate": 9.961541728673859e-06, "loss": 0.1879, "step": 854 }, { "epoch": 0.07, "grad_norm": 1.5249486387514, "learning_rate": 9.96138119096894e-06, "loss": 0.2072, "step": 855 }, { "epoch": 0.07, "grad_norm": 1.6528342596573378, "learning_rate": 9.961220320191466e-06, "loss": 0.2421, "step": 856 }, { "epoch": 0.07, "grad_norm": 1.3783025751873852, "learning_rate": 9.961059116352242e-06, "loss": 0.2071, "step": 857 }, { "epoch": 0.07, "grad_norm": 1.6393079519715634, "learning_rate": 9.960897579462088e-06, "loss": 0.2504, "step": 858 }, { "epoch": 0.07, "grad_norm": 1.5175184471151877, "learning_rate": 9.960735709531848e-06, "loss": 0.235, "step": 859 }, { "epoch": 0.07, "grad_norm": 18.517128423630176, "learning_rate": 9.960573506572391e-06, "loss": 0.7902, "step": 860 }, { "epoch": 0.07, "grad_norm": 5.652309784477271, "learning_rate": 9.960410970594603e-06, "loss": 0.5837, "step": 861 }, { "epoch": 0.07, "grad_norm": 1.6105868569186426, "learning_rate": 9.960248101609396e-06, "loss": 0.261, "step": 862 }, { "epoch": 0.07, "grad_norm": 1.733488267489593, "learning_rate": 9.960084899627707e-06, "loss": 0.2248, "step": 863 }, { "epoch": 0.07, "grad_norm": 1.5597977486668484, "learning_rate": 9.95992136466049e-06, "loss": 0.1748, "step": 864 }, { "epoch": 0.07, "grad_norm": 1.5795473051518327, "learning_rate": 9.959757496718723e-06, "loss": 0.2708, "step": 865 }, { "epoch": 0.07, "grad_norm": 1.2374724235499004, "learning_rate": 9.959593295813409e-06, "loss": 0.1666, "step": 866 }, { "epoch": 0.07, "grad_norm": 1.6591074051281063, "learning_rate": 9.959428761955569e-06, "loss": 0.2571, "step": 867 }, { "epoch": 0.07, "grad_norm": 7.903684773595323, "learning_rate": 9.95926389515625e-06, "loss": 0.6636, "step": 868 }, { "epoch": 0.07, "grad_norm": 1.4582363686787694, "learning_rate": 9.959098695426518e-06, "loss": 0.2195, "step": 869 }, { "epoch": 0.07, "grad_norm": 1.657775124388049, "learning_rate": 9.958933162777468e-06, "loss": 0.3255, "step": 870 }, { "epoch": 0.07, "grad_norm": 1.7430338508342884, "learning_rate": 9.958767297220209e-06, "loss": 0.2759, "step": 871 }, { "epoch": 0.07, "grad_norm": 1.5635963750475803, "learning_rate": 9.958601098765877e-06, "loss": 0.2379, "step": 872 }, { "epoch": 0.07, "grad_norm": 8.663777620810148, "learning_rate": 9.958434567425627e-06, "loss": 0.7178, "step": 873 }, { "epoch": 0.07, "grad_norm": 1.593180292621246, "learning_rate": 9.958267703210645e-06, "loss": 0.2592, "step": 874 }, { "epoch": 0.07, "grad_norm": 1.4938445708827173, "learning_rate": 9.958100506132127e-06, "loss": 0.1859, "step": 875 }, { "epoch": 0.07, "grad_norm": 1.4001428069220136, "learning_rate": 9.957932976201298e-06, "loss": 0.2169, "step": 876 }, { "epoch": 0.07, "grad_norm": 6.6482041393412645, "learning_rate": 9.95776511342941e-06, "loss": 0.6046, "step": 877 }, { "epoch": 0.07, "grad_norm": 1.6632024693049707, "learning_rate": 9.957596917827726e-06, "loss": 0.2463, "step": 878 }, { "epoch": 0.07, "grad_norm": 1.7549458091522492, "learning_rate": 9.95742838940754e-06, "loss": 0.1821, "step": 879 }, { "epoch": 0.07, "grad_norm": 7.336607146784815, "learning_rate": 9.957259528180166e-06, "loss": 0.6606, "step": 880 }, { "epoch": 0.07, "grad_norm": 1.596421033384349, "learning_rate": 9.95709033415694e-06, "loss": 0.2209, "step": 881 }, { "epoch": 0.07, "grad_norm": 1.7031625720340766, "learning_rate": 9.956920807349222e-06, "loss": 0.2214, "step": 882 }, { "epoch": 0.07, "grad_norm": 1.614354261023114, "learning_rate": 9.95675094776839e-06, "loss": 0.2226, "step": 883 }, { "epoch": 0.07, "grad_norm": 1.5391995482599397, "learning_rate": 9.956580755425847e-06, "loss": 0.2109, "step": 884 }, { "epoch": 0.07, "grad_norm": 1.6158173858056841, "learning_rate": 9.956410230333023e-06, "loss": 0.2377, "step": 885 }, { "epoch": 0.07, "grad_norm": 1.6555947275471679, "learning_rate": 9.956239372501361e-06, "loss": 0.2236, "step": 886 }, { "epoch": 0.07, "grad_norm": 1.6681512449709777, "learning_rate": 9.956068181942333e-06, "loss": 0.2782, "step": 887 }, { "epoch": 0.07, "grad_norm": 1.5652856598780753, "learning_rate": 9.955896658667433e-06, "loss": 0.2397, "step": 888 }, { "epoch": 0.07, "grad_norm": 1.454534919169001, "learning_rate": 9.955724802688173e-06, "loss": 0.2301, "step": 889 }, { "epoch": 0.07, "grad_norm": 1.5952741803703179, "learning_rate": 9.955552614016093e-06, "loss": 0.2475, "step": 890 }, { "epoch": 0.07, "grad_norm": 1.4927589801345362, "learning_rate": 9.955380092662751e-06, "loss": 0.2365, "step": 891 }, { "epoch": 0.07, "grad_norm": 1.685133760573064, "learning_rate": 9.955207238639729e-06, "loss": 0.2577, "step": 892 }, { "epoch": 0.07, "grad_norm": 1.5541686942396116, "learning_rate": 9.955034051958632e-06, "loss": 0.2164, "step": 893 }, { "epoch": 0.07, "grad_norm": 6.35315888932159, "learning_rate": 9.954860532631086e-06, "loss": 0.6672, "step": 894 }, { "epoch": 0.07, "grad_norm": 1.5424407308739105, "learning_rate": 9.954686680668737e-06, "loss": 0.186, "step": 895 }, { "epoch": 0.07, "grad_norm": 1.3933068540170461, "learning_rate": 9.954512496083262e-06, "loss": 0.2166, "step": 896 }, { "epoch": 0.07, "grad_norm": 1.5958090136772076, "learning_rate": 9.95433797888635e-06, "loss": 0.2378, "step": 897 }, { "epoch": 0.07, "grad_norm": 6.411236747990016, "learning_rate": 9.95416312908972e-06, "loss": 0.5844, "step": 898 }, { "epoch": 0.07, "grad_norm": 1.4064977250970212, "learning_rate": 9.953987946705108e-06, "loss": 0.186, "step": 899 }, { "epoch": 0.07, "grad_norm": 1.6459686346692761, "learning_rate": 9.953812431744274e-06, "loss": 0.2663, "step": 900 }, { "epoch": 0.07, "grad_norm": 1.6815411637250797, "learning_rate": 9.953636584219004e-06, "loss": 0.2335, "step": 901 }, { "epoch": 0.07, "grad_norm": 1.5351604977918487, "learning_rate": 9.9534604041411e-06, "loss": 0.232, "step": 902 }, { "epoch": 0.07, "grad_norm": 1.7265725215370527, "learning_rate": 9.953283891522393e-06, "loss": 0.2608, "step": 903 }, { "epoch": 0.07, "grad_norm": 8.070918595194136, "learning_rate": 9.953107046374726e-06, "loss": 0.6555, "step": 904 }, { "epoch": 0.07, "grad_norm": 1.6266271289217815, "learning_rate": 9.95292986870998e-06, "loss": 0.2466, "step": 905 }, { "epoch": 0.07, "grad_norm": 1.6221893894908572, "learning_rate": 9.95275235854004e-06, "loss": 0.264, "step": 906 }, { "epoch": 0.07, "grad_norm": 1.740166860557886, "learning_rate": 9.952574515876833e-06, "loss": 0.2098, "step": 907 }, { "epoch": 0.07, "grad_norm": 1.6429813550569097, "learning_rate": 9.952396340732292e-06, "loss": 0.2645, "step": 908 }, { "epoch": 0.07, "grad_norm": 1.3973402353889148, "learning_rate": 9.952217833118377e-06, "loss": 0.1947, "step": 909 }, { "epoch": 0.07, "grad_norm": 1.2841636408910444, "learning_rate": 9.952038993047076e-06, "loss": 0.2188, "step": 910 }, { "epoch": 0.07, "grad_norm": 1.6062633476128698, "learning_rate": 9.951859820530394e-06, "loss": 0.2003, "step": 911 }, { "epoch": 0.07, "grad_norm": 1.6050190748438495, "learning_rate": 9.951680315580356e-06, "loss": 0.2755, "step": 912 }, { "epoch": 0.07, "grad_norm": 1.8415823509569391, "learning_rate": 9.951500478209018e-06, "loss": 0.2936, "step": 913 }, { "epoch": 0.07, "grad_norm": 1.6127471457348062, "learning_rate": 9.951320308428449e-06, "loss": 0.2425, "step": 914 }, { "epoch": 0.07, "grad_norm": 1.5751057869021148, "learning_rate": 9.951139806250747e-06, "loss": 0.2193, "step": 915 }, { "epoch": 0.07, "grad_norm": 7.386367079834223, "learning_rate": 9.950958971688028e-06, "loss": 0.7378, "step": 916 }, { "epoch": 0.07, "grad_norm": 1.5491473017552104, "learning_rate": 9.950777804752432e-06, "loss": 0.2147, "step": 917 }, { "epoch": 0.07, "grad_norm": 1.6049453296205214, "learning_rate": 9.950596305456124e-06, "loss": 0.2489, "step": 918 }, { "epoch": 0.07, "grad_norm": 15.643927653364026, "learning_rate": 9.950414473811283e-06, "loss": 0.7063, "step": 919 }, { "epoch": 0.07, "grad_norm": 5.921814508291732, "learning_rate": 9.950232309830121e-06, "loss": 0.4937, "step": 920 }, { "epoch": 0.07, "grad_norm": 1.5538309090102893, "learning_rate": 9.950049813524865e-06, "loss": 0.2254, "step": 921 }, { "epoch": 0.07, "grad_norm": 1.7942758741770277, "learning_rate": 9.949866984907768e-06, "loss": 0.2549, "step": 922 }, { "epoch": 0.07, "grad_norm": 1.6934101926310348, "learning_rate": 9.9496838239911e-06, "loss": 0.2501, "step": 923 }, { "epoch": 0.07, "grad_norm": 1.5839252274937599, "learning_rate": 9.949500330787162e-06, "loss": 0.2323, "step": 924 }, { "epoch": 0.07, "grad_norm": 1.579175450399343, "learning_rate": 9.94931650530827e-06, "loss": 0.2117, "step": 925 }, { "epoch": 0.07, "grad_norm": 10.316547008170586, "learning_rate": 9.949132347566765e-06, "loss": 0.7896, "step": 926 }, { "epoch": 0.07, "grad_norm": 1.5327134758391607, "learning_rate": 9.948947857575012e-06, "loss": 0.2325, "step": 927 }, { "epoch": 0.07, "grad_norm": 6.468447725809474, "learning_rate": 9.948763035345393e-06, "loss": 0.7521, "step": 928 }, { "epoch": 0.07, "grad_norm": 1.4508076923260516, "learning_rate": 9.948577880890318e-06, "loss": 0.2227, "step": 929 }, { "epoch": 0.07, "grad_norm": 1.612063571957598, "learning_rate": 9.948392394222214e-06, "loss": 0.2197, "step": 930 }, { "epoch": 0.07, "grad_norm": 7.583841051716503, "learning_rate": 9.948206575353539e-06, "loss": 0.5356, "step": 931 }, { "epoch": 0.07, "grad_norm": 1.5291051527881614, "learning_rate": 9.948020424296762e-06, "loss": 0.2391, "step": 932 }, { "epoch": 0.07, "grad_norm": 1.6544556640373385, "learning_rate": 9.947833941064382e-06, "loss": 0.2765, "step": 933 }, { "epoch": 0.07, "grad_norm": 1.492941213120045, "learning_rate": 9.94764712566892e-06, "loss": 0.2283, "step": 934 }, { "epoch": 0.07, "grad_norm": 8.476821365229082, "learning_rate": 9.947459978122912e-06, "loss": 0.8646, "step": 935 }, { "epoch": 0.07, "grad_norm": 1.524828204377511, "learning_rate": 9.947272498438929e-06, "loss": 0.2004, "step": 936 }, { "epoch": 0.07, "grad_norm": 1.4975819681283637, "learning_rate": 9.947084686629552e-06, "loss": 0.2203, "step": 937 }, { "epoch": 0.08, "grad_norm": 1.5338876062166038, "learning_rate": 9.946896542707391e-06, "loss": 0.2312, "step": 938 }, { "epoch": 0.08, "grad_norm": 30.750702403951543, "learning_rate": 9.946708066685077e-06, "loss": 0.7122, "step": 939 }, { "epoch": 0.08, "grad_norm": 1.5084990224143429, "learning_rate": 9.946519258575263e-06, "loss": 0.2541, "step": 940 }, { "epoch": 0.08, "grad_norm": 1.6274106660844325, "learning_rate": 9.946330118390622e-06, "loss": 0.231, "step": 941 }, { "epoch": 0.08, "grad_norm": 1.7300014426029913, "learning_rate": 9.946140646143856e-06, "loss": 0.2565, "step": 942 }, { "epoch": 0.08, "grad_norm": 11.09768129921992, "learning_rate": 9.94595084184768e-06, "loss": 0.6627, "step": 943 }, { "epoch": 0.08, "grad_norm": 1.5832460693725294, "learning_rate": 9.945760705514839e-06, "loss": 0.2582, "step": 944 }, { "epoch": 0.08, "grad_norm": 1.7698903199669065, "learning_rate": 9.945570237158098e-06, "loss": 0.2576, "step": 945 }, { "epoch": 0.08, "grad_norm": 1.497249738308736, "learning_rate": 9.94537943679024e-06, "loss": 0.2269, "step": 946 }, { "epoch": 0.08, "grad_norm": 1.3900691512748142, "learning_rate": 9.945188304424078e-06, "loss": 0.2249, "step": 947 }, { "epoch": 0.08, "grad_norm": 9.926543979555571, "learning_rate": 9.944996840072442e-06, "loss": 0.8144, "step": 948 }, { "epoch": 0.08, "grad_norm": 1.5465396857631586, "learning_rate": 9.944805043748185e-06, "loss": 0.2537, "step": 949 }, { "epoch": 0.08, "grad_norm": 6.034809549310933, "learning_rate": 9.944612915464183e-06, "loss": 0.5551, "step": 950 }, { "epoch": 0.08, "grad_norm": 5.562651876822184, "learning_rate": 9.944420455233335e-06, "loss": 0.689, "step": 951 }, { "epoch": 0.08, "grad_norm": 1.6360519621442735, "learning_rate": 9.94422766306856e-06, "loss": 0.2617, "step": 952 }, { "epoch": 0.08, "grad_norm": 1.7375542378308855, "learning_rate": 9.944034538982804e-06, "loss": 0.2667, "step": 953 }, { "epoch": 0.08, "grad_norm": 1.6005154233453405, "learning_rate": 9.943841082989027e-06, "loss": 0.2065, "step": 954 }, { "epoch": 0.08, "grad_norm": 1.5790467979664349, "learning_rate": 9.943647295100219e-06, "loss": 0.2606, "step": 955 }, { "epoch": 0.08, "grad_norm": 1.5743554333429108, "learning_rate": 9.94345317532939e-06, "loss": 0.2743, "step": 956 }, { "epoch": 0.08, "grad_norm": 1.60837717216851, "learning_rate": 9.94325872368957e-06, "loss": 0.2692, "step": 957 }, { "epoch": 0.08, "grad_norm": 1.518814478187395, "learning_rate": 9.943063940193817e-06, "loss": 0.2245, "step": 958 }, { "epoch": 0.08, "grad_norm": 1.5019157165089836, "learning_rate": 9.942868824855202e-06, "loss": 0.2129, "step": 959 }, { "epoch": 0.08, "grad_norm": 1.541331492481318, "learning_rate": 9.94267337768683e-06, "loss": 0.2374, "step": 960 }, { "epoch": 0.08, "grad_norm": 1.4329875568597175, "learning_rate": 9.942477598701815e-06, "loss": 0.1988, "step": 961 }, { "epoch": 0.08, "grad_norm": 1.3895280585930996, "learning_rate": 9.942281487913306e-06, "loss": 0.2209, "step": 962 }, { "epoch": 0.08, "grad_norm": 1.4109001670295296, "learning_rate": 9.942085045334464e-06, "loss": 0.1857, "step": 963 }, { "epoch": 0.08, "grad_norm": 1.4512056507607836, "learning_rate": 9.941888270978482e-06, "loss": 0.2243, "step": 964 }, { "epoch": 0.08, "grad_norm": 1.597993735500171, "learning_rate": 9.941691164858565e-06, "loss": 0.2587, "step": 965 }, { "epoch": 0.08, "grad_norm": 1.5330797032064054, "learning_rate": 9.941493726987947e-06, "loss": 0.2375, "step": 966 }, { "epoch": 0.08, "grad_norm": 1.6283274387387057, "learning_rate": 9.941295957379884e-06, "loss": 0.2321, "step": 967 }, { "epoch": 0.08, "grad_norm": 1.6170550052541643, "learning_rate": 9.941097856047652e-06, "loss": 0.2231, "step": 968 }, { "epoch": 0.08, "grad_norm": 1.6310327950769177, "learning_rate": 9.940899423004548e-06, "loss": 0.2249, "step": 969 }, { "epoch": 0.08, "grad_norm": 1.7714358577666947, "learning_rate": 9.940700658263897e-06, "loss": 0.2927, "step": 970 }, { "epoch": 0.08, "grad_norm": 5.494587341201075, "learning_rate": 9.940501561839043e-06, "loss": 0.6137, "step": 971 }, { "epoch": 0.08, "grad_norm": 1.5307607267643362, "learning_rate": 9.940302133743347e-06, "loss": 0.1963, "step": 972 }, { "epoch": 0.08, "grad_norm": 1.41109477015087, "learning_rate": 9.940102373990202e-06, "loss": 0.2398, "step": 973 }, { "epoch": 0.08, "grad_norm": 1.6673305237552476, "learning_rate": 9.939902282593015e-06, "loss": 0.2671, "step": 974 }, { "epoch": 0.08, "grad_norm": 1.3937229651279954, "learning_rate": 9.93970185956522e-06, "loss": 0.2063, "step": 975 }, { "epoch": 0.08, "grad_norm": 11.801030692531414, "learning_rate": 9.939501104920275e-06, "loss": 0.6237, "step": 976 }, { "epoch": 0.08, "grad_norm": 1.6131397479118943, "learning_rate": 9.939300018671654e-06, "loss": 0.1828, "step": 977 }, { "epoch": 0.08, "grad_norm": 1.5320730722523492, "learning_rate": 9.939098600832857e-06, "loss": 0.2219, "step": 978 }, { "epoch": 0.08, "grad_norm": 1.6972839039006973, "learning_rate": 9.938896851417406e-06, "loss": 0.2499, "step": 979 }, { "epoch": 0.08, "grad_norm": 1.7300102797747343, "learning_rate": 9.938694770438843e-06, "loss": 0.2306, "step": 980 }, { "epoch": 0.08, "grad_norm": 1.6926207038849739, "learning_rate": 9.93849235791074e-06, "loss": 0.2472, "step": 981 }, { "epoch": 0.08, "grad_norm": 1.5464901577367662, "learning_rate": 9.93828961384668e-06, "loss": 0.2208, "step": 982 }, { "epoch": 0.08, "grad_norm": 1.4750972717226003, "learning_rate": 9.938086538260277e-06, "loss": 0.1851, "step": 983 }, { "epoch": 0.08, "grad_norm": 1.6370078355023407, "learning_rate": 9.937883131165163e-06, "loss": 0.238, "step": 984 }, { "epoch": 0.08, "grad_norm": 1.648554332459701, "learning_rate": 9.937679392574991e-06, "loss": 0.2452, "step": 985 }, { "epoch": 0.08, "grad_norm": 1.3602112441409195, "learning_rate": 9.937475322503442e-06, "loss": 0.1765, "step": 986 }, { "epoch": 0.08, "grad_norm": 4.834108864437724, "learning_rate": 9.937270920964214e-06, "loss": 0.5768, "step": 987 }, { "epoch": 0.08, "grad_norm": 1.5723169396135572, "learning_rate": 9.937066187971031e-06, "loss": 0.2409, "step": 988 }, { "epoch": 0.08, "grad_norm": 1.449766155435059, "learning_rate": 9.936861123537636e-06, "loss": 0.2064, "step": 989 }, { "epoch": 0.08, "grad_norm": 1.569226779964195, "learning_rate": 9.936655727677795e-06, "loss": 0.216, "step": 990 }, { "epoch": 0.08, "grad_norm": 1.675949214876949, "learning_rate": 9.936450000405297e-06, "loss": 0.2592, "step": 991 }, { "epoch": 0.08, "grad_norm": 1.75156627668863, "learning_rate": 9.936243941733956e-06, "loss": 0.2665, "step": 992 }, { "epoch": 0.08, "grad_norm": 1.5423023160586935, "learning_rate": 9.9360375516776e-06, "loss": 0.1817, "step": 993 }, { "epoch": 0.08, "grad_norm": 1.396136235643332, "learning_rate": 9.93583083025009e-06, "loss": 0.2003, "step": 994 }, { "epoch": 0.08, "grad_norm": 1.8085942243824484, "learning_rate": 9.9356237774653e-06, "loss": 0.2397, "step": 995 }, { "epoch": 0.08, "grad_norm": 1.4024269055644474, "learning_rate": 9.935416393337132e-06, "loss": 0.2203, "step": 996 }, { "epoch": 0.08, "grad_norm": 1.6147659812331727, "learning_rate": 9.935208677879508e-06, "loss": 0.2273, "step": 997 }, { "epoch": 0.08, "grad_norm": 1.5653745315197816, "learning_rate": 9.935000631106372e-06, "loss": 0.2583, "step": 998 }, { "epoch": 0.08, "grad_norm": 1.4414956885664476, "learning_rate": 9.93479225303169e-06, "loss": 0.2295, "step": 999 }, { "epoch": 0.08, "grad_norm": 1.513450440125113, "learning_rate": 9.934583543669454e-06, "loss": 0.2709, "step": 1000 }, { "epoch": 0.08, "grad_norm": 1.5560148017202278, "learning_rate": 9.934374503033672e-06, "loss": 0.2461, "step": 1001 }, { "epoch": 0.08, "grad_norm": 6.92353045907564, "learning_rate": 9.934165131138381e-06, "loss": 0.7145, "step": 1002 }, { "epoch": 0.08, "grad_norm": 1.527946841404391, "learning_rate": 9.933955427997634e-06, "loss": 0.2363, "step": 1003 }, { "epoch": 0.08, "grad_norm": 1.5162096594084293, "learning_rate": 9.933745393625509e-06, "loss": 0.2213, "step": 1004 }, { "epoch": 0.08, "grad_norm": 1.8043073181529599, "learning_rate": 9.933535028036108e-06, "loss": 0.3097, "step": 1005 }, { "epoch": 0.08, "grad_norm": 1.4571863545941868, "learning_rate": 9.933324331243553e-06, "loss": 0.2479, "step": 1006 }, { "epoch": 0.08, "grad_norm": 1.4390003929230886, "learning_rate": 9.933113303261987e-06, "loss": 0.2439, "step": 1007 }, { "epoch": 0.08, "grad_norm": 1.348203148223269, "learning_rate": 9.932901944105578e-06, "loss": 0.2131, "step": 1008 }, { "epoch": 0.08, "grad_norm": 1.327973066127189, "learning_rate": 9.932690253788516e-06, "loss": 0.2242, "step": 1009 }, { "epoch": 0.08, "grad_norm": 1.7254541944995574, "learning_rate": 9.932478232325013e-06, "loss": 0.2537, "step": 1010 }, { "epoch": 0.08, "grad_norm": 1.4601674357344157, "learning_rate": 9.9322658797293e-06, "loss": 0.2241, "step": 1011 }, { "epoch": 0.08, "grad_norm": 1.6868388967371297, "learning_rate": 9.932053196015634e-06, "loss": 0.2437, "step": 1012 }, { "epoch": 0.08, "grad_norm": 1.8984128773301088, "learning_rate": 9.931840181198296e-06, "loss": 0.2901, "step": 1013 }, { "epoch": 0.08, "grad_norm": 1.552741733811618, "learning_rate": 9.931626835291581e-06, "loss": 0.211, "step": 1014 }, { "epoch": 0.08, "grad_norm": 1.5708978741476807, "learning_rate": 9.931413158309816e-06, "loss": 0.2368, "step": 1015 }, { "epoch": 0.08, "grad_norm": 1.5167620261031745, "learning_rate": 9.931199150267343e-06, "loss": 0.2588, "step": 1016 }, { "epoch": 0.08, "grad_norm": 1.6334059992666325, "learning_rate": 9.93098481117853e-06, "loss": 0.2955, "step": 1017 }, { "epoch": 0.08, "grad_norm": 1.510915928012243, "learning_rate": 9.930770141057767e-06, "loss": 0.2119, "step": 1018 }, { "epoch": 0.08, "grad_norm": 1.6512566941178652, "learning_rate": 9.930555139919465e-06, "loss": 0.2926, "step": 1019 }, { "epoch": 0.08, "grad_norm": 2.0137385934719614, "learning_rate": 9.930339807778056e-06, "loss": 0.2583, "step": 1020 }, { "epoch": 0.08, "grad_norm": 1.5162046675913567, "learning_rate": 9.930124144647998e-06, "loss": 0.2114, "step": 1021 }, { "epoch": 0.08, "grad_norm": 1.4523137789073988, "learning_rate": 9.929908150543769e-06, "loss": 0.2331, "step": 1022 }, { "epoch": 0.08, "grad_norm": 9.997674020135156, "learning_rate": 9.929691825479868e-06, "loss": 0.8094, "step": 1023 }, { "epoch": 0.08, "grad_norm": 1.5994899779486356, "learning_rate": 9.929475169470819e-06, "loss": 0.2274, "step": 1024 }, { "epoch": 0.08, "grad_norm": 1.5258480728416541, "learning_rate": 9.929258182531167e-06, "loss": 0.2642, "step": 1025 }, { "epoch": 0.08, "grad_norm": 1.4987306543976613, "learning_rate": 9.929040864675477e-06, "loss": 0.2957, "step": 1026 }, { "epoch": 0.08, "grad_norm": 1.7077739460073242, "learning_rate": 9.92882321591834e-06, "loss": 0.2652, "step": 1027 }, { "epoch": 0.08, "grad_norm": 6.075228388987073, "learning_rate": 9.928605236274368e-06, "loss": 0.5457, "step": 1028 }, { "epoch": 0.08, "grad_norm": 1.4727007291628043, "learning_rate": 9.928386925758191e-06, "loss": 0.2081, "step": 1029 }, { "epoch": 0.08, "grad_norm": 1.4970191667197104, "learning_rate": 9.928168284384468e-06, "loss": 0.2044, "step": 1030 }, { "epoch": 0.08, "grad_norm": 7.859127292495222, "learning_rate": 9.927949312167876e-06, "loss": 0.6173, "step": 1031 }, { "epoch": 0.08, "grad_norm": 14.663958350712528, "learning_rate": 9.927730009123116e-06, "loss": 0.7604, "step": 1032 }, { "epoch": 0.08, "grad_norm": 1.6049887541285104, "learning_rate": 9.92751037526491e-06, "loss": 0.2041, "step": 1033 }, { "epoch": 0.08, "grad_norm": 1.408856198512203, "learning_rate": 9.927290410608003e-06, "loss": 0.2053, "step": 1034 }, { "epoch": 0.08, "grad_norm": 1.4552447095142726, "learning_rate": 9.927070115167161e-06, "loss": 0.2137, "step": 1035 }, { "epoch": 0.08, "grad_norm": 1.3913669250596103, "learning_rate": 9.926849488957176e-06, "loss": 0.2497, "step": 1036 }, { "epoch": 0.08, "grad_norm": 1.4662104773724527, "learning_rate": 9.926628531992855e-06, "loss": 0.2686, "step": 1037 }, { "epoch": 0.08, "grad_norm": 1.4533843449797672, "learning_rate": 9.926407244289033e-06, "loss": 0.1831, "step": 1038 }, { "epoch": 0.08, "grad_norm": 1.3069315357941098, "learning_rate": 9.926185625860567e-06, "loss": 0.165, "step": 1039 }, { "epoch": 0.08, "grad_norm": 1.8197806524288562, "learning_rate": 9.925963676722335e-06, "loss": 0.2582, "step": 1040 }, { "epoch": 0.08, "grad_norm": 1.636391482369491, "learning_rate": 9.925741396889235e-06, "loss": 0.2414, "step": 1041 }, { "epoch": 0.08, "grad_norm": 1.6667961366980302, "learning_rate": 9.925518786376192e-06, "loss": 0.2002, "step": 1042 }, { "epoch": 0.08, "grad_norm": 1.627358044434155, "learning_rate": 9.925295845198148e-06, "loss": 0.2518, "step": 1043 }, { "epoch": 0.08, "grad_norm": 1.5305873633266944, "learning_rate": 9.92507257337007e-06, "loss": 0.1877, "step": 1044 }, { "epoch": 0.08, "grad_norm": 10.589233215509505, "learning_rate": 9.92484897090695e-06, "loss": 0.5678, "step": 1045 }, { "epoch": 0.08, "grad_norm": 1.5282959217170176, "learning_rate": 9.924625037823797e-06, "loss": 0.2133, "step": 1046 }, { "epoch": 0.08, "grad_norm": 1.9613381692978133, "learning_rate": 9.924400774135641e-06, "loss": 0.248, "step": 1047 }, { "epoch": 0.08, "grad_norm": 1.5984806319273763, "learning_rate": 9.924176179857543e-06, "loss": 0.2563, "step": 1048 }, { "epoch": 0.08, "grad_norm": 1.4152145838407604, "learning_rate": 9.923951255004577e-06, "loss": 0.2027, "step": 1049 }, { "epoch": 0.08, "grad_norm": 11.967197837263107, "learning_rate": 9.923725999591846e-06, "loss": 0.5974, "step": 1050 }, { "epoch": 0.08, "grad_norm": 5.947466420008018, "learning_rate": 9.92350041363447e-06, "loss": 0.7772, "step": 1051 }, { "epoch": 0.08, "grad_norm": 1.4447722431281742, "learning_rate": 9.923274497147595e-06, "loss": 0.2329, "step": 1052 }, { "epoch": 0.08, "grad_norm": 1.468874504154234, "learning_rate": 9.923048250146383e-06, "loss": 0.1986, "step": 1053 }, { "epoch": 0.08, "grad_norm": 1.5855376368991068, "learning_rate": 9.922821672646028e-06, "loss": 0.2888, "step": 1054 }, { "epoch": 0.08, "grad_norm": 1.6317941171123513, "learning_rate": 9.922594764661737e-06, "loss": 0.2516, "step": 1055 }, { "epoch": 0.08, "grad_norm": 1.5453199820230867, "learning_rate": 9.922367526208746e-06, "loss": 0.221, "step": 1056 }, { "epoch": 0.08, "grad_norm": 1.5356831504505377, "learning_rate": 9.922139957302308e-06, "loss": 0.2432, "step": 1057 }, { "epoch": 0.08, "grad_norm": 1.6677105545506716, "learning_rate": 9.921912057957701e-06, "loss": 0.2428, "step": 1058 }, { "epoch": 0.08, "grad_norm": 5.697788112722738, "learning_rate": 9.921683828190225e-06, "loss": 0.4556, "step": 1059 }, { "epoch": 0.08, "grad_norm": 1.563437534868239, "learning_rate": 9.9214552680152e-06, "loss": 0.2682, "step": 1060 }, { "epoch": 0.08, "grad_norm": 1.5172546439591739, "learning_rate": 9.921226377447975e-06, "loss": 0.2367, "step": 1061 }, { "epoch": 0.08, "grad_norm": 1.586214624701855, "learning_rate": 9.920997156503912e-06, "loss": 0.2635, "step": 1062 }, { "epoch": 0.09, "grad_norm": 1.4906773460514615, "learning_rate": 9.920767605198396e-06, "loss": 0.2007, "step": 1063 }, { "epoch": 0.09, "grad_norm": 1.4065150667127742, "learning_rate": 9.920537723546843e-06, "loss": 0.2087, "step": 1064 }, { "epoch": 0.09, "grad_norm": 1.570240754506179, "learning_rate": 9.920307511564686e-06, "loss": 0.2326, "step": 1065 }, { "epoch": 0.09, "grad_norm": 1.4435832403868885, "learning_rate": 9.920076969267375e-06, "loss": 0.2256, "step": 1066 }, { "epoch": 0.09, "grad_norm": 1.599550093240707, "learning_rate": 9.919846096670393e-06, "loss": 0.2598, "step": 1067 }, { "epoch": 0.09, "grad_norm": 1.5539707437993482, "learning_rate": 9.919614893789234e-06, "loss": 0.2378, "step": 1068 }, { "epoch": 0.09, "grad_norm": 1.4931619450507125, "learning_rate": 9.919383360639423e-06, "loss": 0.2044, "step": 1069 }, { "epoch": 0.09, "grad_norm": 1.4940578508063573, "learning_rate": 9.9191514972365e-06, "loss": 0.1937, "step": 1070 }, { "epoch": 0.09, "grad_norm": 1.5171987980143786, "learning_rate": 9.918919303596034e-06, "loss": 0.2397, "step": 1071 }, { "epoch": 0.09, "grad_norm": 8.045229593322809, "learning_rate": 9.918686779733608e-06, "loss": 0.5765, "step": 1072 }, { "epoch": 0.09, "grad_norm": 13.014158078044732, "learning_rate": 9.91845392566484e-06, "loss": 0.7735, "step": 1073 }, { "epoch": 0.09, "grad_norm": 1.5891342340719457, "learning_rate": 9.918220741405356e-06, "loss": 0.28, "step": 1074 }, { "epoch": 0.09, "grad_norm": 1.6345710308036756, "learning_rate": 9.917987226970811e-06, "loss": 0.2543, "step": 1075 }, { "epoch": 0.09, "grad_norm": 1.5146539662657386, "learning_rate": 9.917753382376883e-06, "loss": 0.2567, "step": 1076 }, { "epoch": 0.09, "grad_norm": 1.5358136489579914, "learning_rate": 9.91751920763927e-06, "loss": 0.2751, "step": 1077 }, { "epoch": 0.09, "grad_norm": 1.5356927672818956, "learning_rate": 9.917284702773692e-06, "loss": 0.2322, "step": 1078 }, { "epoch": 0.09, "grad_norm": 1.386307629619825, "learning_rate": 9.917049867795896e-06, "loss": 0.2154, "step": 1079 }, { "epoch": 0.09, "grad_norm": 2.162002919949642, "learning_rate": 9.916814702721641e-06, "loss": 0.2277, "step": 1080 }, { "epoch": 0.09, "grad_norm": 1.4808403162173098, "learning_rate": 9.916579207566721e-06, "loss": 0.2316, "step": 1081 }, { "epoch": 0.09, "grad_norm": 1.7920413639979027, "learning_rate": 9.916343382346942e-06, "loss": 0.2953, "step": 1082 }, { "epoch": 0.09, "grad_norm": 1.52418176156363, "learning_rate": 9.916107227078133e-06, "loss": 0.1971, "step": 1083 }, { "epoch": 0.09, "grad_norm": 1.5789887755440322, "learning_rate": 9.915870741776153e-06, "loss": 0.2452, "step": 1084 }, { "epoch": 0.09, "grad_norm": 1.5674536441991773, "learning_rate": 9.915633926456874e-06, "loss": 0.2332, "step": 1085 }, { "epoch": 0.09, "grad_norm": 1.4787930528596724, "learning_rate": 9.915396781136197e-06, "loss": 0.305, "step": 1086 }, { "epoch": 0.09, "grad_norm": 1.6145443680635934, "learning_rate": 9.91515930583004e-06, "loss": 0.2433, "step": 1087 }, { "epoch": 0.09, "grad_norm": 1.6283076020039178, "learning_rate": 9.914921500554347e-06, "loss": 0.272, "step": 1088 }, { "epoch": 0.09, "grad_norm": 1.5844388695709666, "learning_rate": 9.914683365325083e-06, "loss": 0.2277, "step": 1089 }, { "epoch": 0.09, "grad_norm": 1.5718160599454403, "learning_rate": 9.914444900158234e-06, "loss": 0.193, "step": 1090 }, { "epoch": 0.09, "grad_norm": 1.581305145458717, "learning_rate": 9.914206105069806e-06, "loss": 0.2368, "step": 1091 }, { "epoch": 0.09, "grad_norm": 1.3677432212787979, "learning_rate": 9.913966980075834e-06, "loss": 0.1884, "step": 1092 }, { "epoch": 0.09, "grad_norm": 7.251517553439886, "learning_rate": 9.91372752519237e-06, "loss": 0.6546, "step": 1093 }, { "epoch": 0.09, "grad_norm": 1.6329209880909659, "learning_rate": 9.91348774043549e-06, "loss": 0.246, "step": 1094 }, { "epoch": 0.09, "grad_norm": 5.575897279826717, "learning_rate": 9.91324762582129e-06, "loss": 0.5309, "step": 1095 }, { "epoch": 0.09, "grad_norm": 1.8458113687611641, "learning_rate": 9.91300718136589e-06, "loss": 0.2899, "step": 1096 }, { "epoch": 0.09, "grad_norm": 1.748777461531222, "learning_rate": 9.912766407085432e-06, "loss": 0.2746, "step": 1097 }, { "epoch": 0.09, "grad_norm": 1.7240686198227673, "learning_rate": 9.912525302996081e-06, "loss": 0.2397, "step": 1098 }, { "epoch": 0.09, "grad_norm": 1.6165923715996868, "learning_rate": 9.91228386911402e-06, "loss": 0.2159, "step": 1099 }, { "epoch": 0.09, "grad_norm": 1.6999843109138466, "learning_rate": 9.912042105455462e-06, "loss": 0.2232, "step": 1100 }, { "epoch": 0.09, "grad_norm": 1.5562556534579395, "learning_rate": 9.911800012036633e-06, "loss": 0.2144, "step": 1101 }, { "epoch": 0.09, "grad_norm": 1.4283147342679852, "learning_rate": 9.911557588873787e-06, "loss": 0.2013, "step": 1102 }, { "epoch": 0.09, "grad_norm": 1.4331252199175264, "learning_rate": 9.911314835983202e-06, "loss": 0.2028, "step": 1103 }, { "epoch": 0.09, "grad_norm": 1.6814503495401072, "learning_rate": 9.911071753381168e-06, "loss": 0.225, "step": 1104 }, { "epoch": 0.09, "grad_norm": 11.502283281418432, "learning_rate": 9.910828341084006e-06, "loss": 0.5931, "step": 1105 }, { "epoch": 0.09, "grad_norm": 1.4318182870228882, "learning_rate": 9.91058459910806e-06, "loss": 0.2269, "step": 1106 }, { "epoch": 0.09, "grad_norm": 1.3852522308820963, "learning_rate": 9.910340527469692e-06, "loss": 0.2036, "step": 1107 }, { "epoch": 0.09, "grad_norm": 1.7521162616308004, "learning_rate": 9.910096126185286e-06, "loss": 0.2478, "step": 1108 }, { "epoch": 0.09, "grad_norm": 1.5242575950324693, "learning_rate": 9.90985139527125e-06, "loss": 0.2257, "step": 1109 }, { "epoch": 0.09, "grad_norm": 1.6071380500848118, "learning_rate": 9.909606334744013e-06, "loss": 0.2346, "step": 1110 }, { "epoch": 0.09, "grad_norm": 1.6690425820703312, "learning_rate": 9.909360944620027e-06, "loss": 0.2265, "step": 1111 }, { "epoch": 0.09, "grad_norm": 1.6188746078753145, "learning_rate": 9.909115224915768e-06, "loss": 0.2438, "step": 1112 }, { "epoch": 0.09, "grad_norm": 5.3935367242329555, "learning_rate": 9.90886917564773e-06, "loss": 0.481, "step": 1113 }, { "epoch": 0.09, "grad_norm": 1.5557804504612123, "learning_rate": 9.908622796832427e-06, "loss": 0.2091, "step": 1114 }, { "epoch": 0.09, "grad_norm": 1.4295384589046842, "learning_rate": 9.908376088486407e-06, "loss": 0.2112, "step": 1115 }, { "epoch": 0.09, "grad_norm": 1.5630822073770563, "learning_rate": 9.908129050626228e-06, "loss": 0.2611, "step": 1116 }, { "epoch": 0.09, "grad_norm": 1.3386470670812902, "learning_rate": 9.907881683268472e-06, "loss": 0.1919, "step": 1117 }, { "epoch": 0.09, "grad_norm": 6.122830265056606, "learning_rate": 9.90763398642975e-06, "loss": 0.5886, "step": 1118 }, { "epoch": 0.09, "grad_norm": 5.9099362011156655, "learning_rate": 9.907385960126689e-06, "loss": 0.697, "step": 1119 }, { "epoch": 0.09, "grad_norm": 1.4619422154511335, "learning_rate": 9.907137604375941e-06, "loss": 0.2362, "step": 1120 }, { "epoch": 0.09, "grad_norm": 1.4332301330619177, "learning_rate": 9.906888919194178e-06, "loss": 0.2288, "step": 1121 }, { "epoch": 0.09, "grad_norm": 1.4929799912449913, "learning_rate": 9.906639904598092e-06, "loss": 0.2188, "step": 1122 }, { "epoch": 0.09, "grad_norm": 1.4861937576996784, "learning_rate": 9.906390560604404e-06, "loss": 0.2267, "step": 1123 }, { "epoch": 0.09, "grad_norm": 1.5621958857376876, "learning_rate": 9.906140887229852e-06, "loss": 0.2273, "step": 1124 }, { "epoch": 0.09, "grad_norm": 1.449037565170767, "learning_rate": 9.905890884491196e-06, "loss": 0.2305, "step": 1125 }, { "epoch": 0.09, "grad_norm": 1.4342417119363762, "learning_rate": 9.905640552405222e-06, "loss": 0.2007, "step": 1126 }, { "epoch": 0.09, "grad_norm": 1.5815516766780038, "learning_rate": 9.905389890988734e-06, "loss": 0.2644, "step": 1127 }, { "epoch": 0.09, "grad_norm": 1.5211514049346904, "learning_rate": 9.90513890025856e-06, "loss": 0.2577, "step": 1128 }, { "epoch": 0.09, "grad_norm": 1.5769451735251425, "learning_rate": 9.904887580231548e-06, "loss": 0.2211, "step": 1129 }, { "epoch": 0.09, "grad_norm": 1.4833729839778171, "learning_rate": 9.904635930924573e-06, "loss": 0.2235, "step": 1130 }, { "epoch": 0.09, "grad_norm": 5.662068518411611, "learning_rate": 9.904383952354528e-06, "loss": 0.6652, "step": 1131 }, { "epoch": 0.09, "grad_norm": 1.7770467061461308, "learning_rate": 9.904131644538327e-06, "loss": 0.2525, "step": 1132 }, { "epoch": 0.09, "grad_norm": 1.5486244175079165, "learning_rate": 9.903879007492912e-06, "loss": 0.2054, "step": 1133 }, { "epoch": 0.09, "grad_norm": 8.643524940309224, "learning_rate": 9.90362604123524e-06, "loss": 0.6821, "step": 1134 }, { "epoch": 0.09, "grad_norm": 1.5080254709256975, "learning_rate": 9.903372745782294e-06, "loss": 0.1879, "step": 1135 }, { "epoch": 0.09, "grad_norm": 1.5889033904886767, "learning_rate": 9.903119121151079e-06, "loss": 0.2837, "step": 1136 }, { "epoch": 0.09, "grad_norm": 7.07897584118699, "learning_rate": 9.90286516735862e-06, "loss": 0.2376, "step": 1137 }, { "epoch": 0.09, "grad_norm": 1.5901485119856444, "learning_rate": 9.90261088442197e-06, "loss": 0.2386, "step": 1138 }, { "epoch": 0.09, "grad_norm": 1.6169484316909901, "learning_rate": 9.902356272358196e-06, "loss": 0.2426, "step": 1139 }, { "epoch": 0.09, "grad_norm": 1.4562920020039767, "learning_rate": 9.902101331184391e-06, "loss": 0.1921, "step": 1140 }, { "epoch": 0.09, "grad_norm": 1.6275751987498905, "learning_rate": 9.901846060917673e-06, "loss": 0.2669, "step": 1141 }, { "epoch": 0.09, "grad_norm": 1.4483419800946988, "learning_rate": 9.901590461575175e-06, "loss": 0.283, "step": 1142 }, { "epoch": 0.09, "grad_norm": 1.5581625117799525, "learning_rate": 9.901334533174058e-06, "loss": 0.2438, "step": 1143 }, { "epoch": 0.09, "grad_norm": 5.845540790063638, "learning_rate": 9.901078275731504e-06, "loss": 0.7201, "step": 1144 }, { "epoch": 0.09, "grad_norm": 1.2663777535983152, "learning_rate": 9.900821689264715e-06, "loss": 0.1933, "step": 1145 }, { "epoch": 0.09, "grad_norm": 1.4199211927022155, "learning_rate": 9.90056477379092e-06, "loss": 0.1677, "step": 1146 }, { "epoch": 0.09, "grad_norm": 1.4331803505883982, "learning_rate": 9.90030752932736e-06, "loss": 0.2008, "step": 1147 }, { "epoch": 0.09, "grad_norm": 5.140823995480581, "learning_rate": 9.90004995589131e-06, "loss": 0.6989, "step": 1148 }, { "epoch": 0.09, "grad_norm": 1.2880289710375101, "learning_rate": 9.899792053500059e-06, "loss": 0.1854, "step": 1149 }, { "epoch": 0.09, "grad_norm": 6.179045516523009, "learning_rate": 9.899533822170922e-06, "loss": 0.6106, "step": 1150 }, { "epoch": 0.09, "grad_norm": 10.248541362098338, "learning_rate": 9.899275261921236e-06, "loss": 0.7359, "step": 1151 }, { "epoch": 0.09, "grad_norm": 7.709322350555803, "learning_rate": 9.899016372768355e-06, "loss": 0.565, "step": 1152 }, { "epoch": 0.09, "grad_norm": 1.9086156275210808, "learning_rate": 9.898757154729663e-06, "loss": 0.2523, "step": 1153 }, { "epoch": 0.09, "grad_norm": 1.750415728950451, "learning_rate": 9.898497607822561e-06, "loss": 0.2634, "step": 1154 }, { "epoch": 0.09, "grad_norm": 1.6254634038989189, "learning_rate": 9.898237732064472e-06, "loss": 0.227, "step": 1155 }, { "epoch": 0.09, "grad_norm": 1.6893065904451008, "learning_rate": 9.897977527472842e-06, "loss": 0.2077, "step": 1156 }, { "epoch": 0.09, "grad_norm": 1.8202402491061556, "learning_rate": 9.89771699406514e-06, "loss": 0.2616, "step": 1157 }, { "epoch": 0.09, "grad_norm": 9.274171275975403, "learning_rate": 9.89745613185886e-06, "loss": 0.5854, "step": 1158 }, { "epoch": 0.09, "grad_norm": 1.6841254522318398, "learning_rate": 9.897194940871509e-06, "loss": 0.2797, "step": 1159 }, { "epoch": 0.09, "grad_norm": 1.520906972182786, "learning_rate": 9.896933421120623e-06, "loss": 0.233, "step": 1160 }, { "epoch": 0.09, "grad_norm": 1.663735323311016, "learning_rate": 9.89667157262376e-06, "loss": 0.2845, "step": 1161 }, { "epoch": 0.09, "grad_norm": 1.5702075679085052, "learning_rate": 9.896409395398499e-06, "loss": 0.2511, "step": 1162 }, { "epoch": 0.09, "grad_norm": 1.67545391789473, "learning_rate": 9.896146889462438e-06, "loss": 0.2065, "step": 1163 }, { "epoch": 0.09, "grad_norm": 1.5343816317798418, "learning_rate": 9.895884054833202e-06, "loss": 0.242, "step": 1164 }, { "epoch": 0.09, "grad_norm": 1.5656425485755339, "learning_rate": 9.895620891528437e-06, "loss": 0.2223, "step": 1165 }, { "epoch": 0.09, "grad_norm": 8.640067511117483, "learning_rate": 9.895357399565806e-06, "loss": 0.7804, "step": 1166 }, { "epoch": 0.09, "grad_norm": 1.4143070131597346, "learning_rate": 9.895093578963002e-06, "loss": 0.2002, "step": 1167 }, { "epoch": 0.09, "grad_norm": 1.519246262172307, "learning_rate": 9.894829429737734e-06, "loss": 0.2609, "step": 1168 }, { "epoch": 0.09, "grad_norm": 1.4767782603853812, "learning_rate": 9.894564951907737e-06, "loss": 0.1614, "step": 1169 }, { "epoch": 0.09, "grad_norm": 1.7026333192013057, "learning_rate": 9.894300145490763e-06, "loss": 0.188, "step": 1170 }, { "epoch": 0.09, "grad_norm": 1.6776679740499743, "learning_rate": 9.894035010504592e-06, "loss": 0.1955, "step": 1171 }, { "epoch": 0.09, "grad_norm": 1.4927297898718945, "learning_rate": 9.893769546967023e-06, "loss": 0.2389, "step": 1172 }, { "epoch": 0.09, "grad_norm": 1.5806633855964354, "learning_rate": 9.893503754895874e-06, "loss": 0.2034, "step": 1173 }, { "epoch": 0.09, "grad_norm": 1.6572760535927649, "learning_rate": 9.893237634308995e-06, "loss": 0.2273, "step": 1174 }, { "epoch": 0.09, "grad_norm": 1.4167153890754696, "learning_rate": 9.892971185224244e-06, "loss": 0.1986, "step": 1175 }, { "epoch": 0.09, "grad_norm": 1.7277837602673367, "learning_rate": 9.892704407659514e-06, "loss": 0.2473, "step": 1176 }, { "epoch": 0.09, "grad_norm": 1.8087353905467967, "learning_rate": 9.892437301632713e-06, "loss": 0.2522, "step": 1177 }, { "epoch": 0.09, "grad_norm": 1.3446262351005338, "learning_rate": 9.892169867161774e-06, "loss": 0.1729, "step": 1178 }, { "epoch": 0.09, "grad_norm": 1.3965088154997665, "learning_rate": 9.891902104264646e-06, "loss": 0.173, "step": 1179 }, { "epoch": 0.09, "grad_norm": 1.5968698423880505, "learning_rate": 9.891634012959311e-06, "loss": 0.2746, "step": 1180 }, { "epoch": 0.09, "grad_norm": 4.90291674074756, "learning_rate": 9.891365593263761e-06, "loss": 0.6693, "step": 1181 }, { "epoch": 0.09, "grad_norm": 1.595639180848154, "learning_rate": 9.891096845196019e-06, "loss": 0.2098, "step": 1182 }, { "epoch": 0.09, "grad_norm": 1.4603592896229445, "learning_rate": 9.890827768774127e-06, "loss": 0.2356, "step": 1183 }, { "epoch": 0.09, "grad_norm": 1.5904912869445982, "learning_rate": 9.890558364016148e-06, "loss": 0.2794, "step": 1184 }, { "epoch": 0.09, "grad_norm": 1.6741059309875053, "learning_rate": 9.890288630940168e-06, "loss": 0.3042, "step": 1185 }, { "epoch": 0.09, "grad_norm": 1.4537682558757834, "learning_rate": 9.890018569564298e-06, "loss": 0.2207, "step": 1186 }, { "epoch": 0.09, "grad_norm": 1.5335758109164082, "learning_rate": 9.889748179906661e-06, "loss": 0.2129, "step": 1187 }, { "epoch": 0.1, "grad_norm": 1.629553283848535, "learning_rate": 9.889477461985415e-06, "loss": 0.2292, "step": 1188 }, { "epoch": 0.1, "grad_norm": 1.4604753078543957, "learning_rate": 9.889206415818733e-06, "loss": 0.2264, "step": 1189 }, { "epoch": 0.1, "grad_norm": 1.4606776420321377, "learning_rate": 9.88893504142481e-06, "loss": 0.2162, "step": 1190 }, { "epoch": 0.1, "grad_norm": 2.1869958052882126, "learning_rate": 9.888663338821864e-06, "loss": 0.254, "step": 1191 }, { "epoch": 0.1, "grad_norm": 1.560041437595407, "learning_rate": 9.888391308028138e-06, "loss": 0.2399, "step": 1192 }, { "epoch": 0.1, "grad_norm": 1.4748190213329984, "learning_rate": 9.888118949061891e-06, "loss": 0.271, "step": 1193 }, { "epoch": 0.1, "grad_norm": 1.3723794852552889, "learning_rate": 9.887846261941408e-06, "loss": 0.2061, "step": 1194 }, { "epoch": 0.1, "grad_norm": 1.589319000495709, "learning_rate": 9.887573246684998e-06, "loss": 0.2008, "step": 1195 }, { "epoch": 0.1, "grad_norm": 7.748045907144685, "learning_rate": 9.887299903310985e-06, "loss": 0.5921, "step": 1196 }, { "epoch": 0.1, "grad_norm": 1.6656061947855736, "learning_rate": 9.887026231837722e-06, "loss": 0.2534, "step": 1197 }, { "epoch": 0.1, "grad_norm": 1.5509884640833587, "learning_rate": 9.886752232283582e-06, "loss": 0.1893, "step": 1198 }, { "epoch": 0.1, "grad_norm": 1.479228476949519, "learning_rate": 9.886477904666958e-06, "loss": 0.2075, "step": 1199 }, { "epoch": 0.1, "grad_norm": 1.5427920471763739, "learning_rate": 9.886203249006265e-06, "loss": 0.2431, "step": 1200 }, { "epoch": 0.1, "grad_norm": 1.4742759367376064, "learning_rate": 9.885928265319946e-06, "loss": 0.2351, "step": 1201 }, { "epoch": 0.1, "grad_norm": 1.5855799865481581, "learning_rate": 9.885652953626456e-06, "loss": 0.2062, "step": 1202 }, { "epoch": 0.1, "grad_norm": 1.6698415751519944, "learning_rate": 9.885377313944284e-06, "loss": 0.2245, "step": 1203 }, { "epoch": 0.1, "grad_norm": 1.3541746200863356, "learning_rate": 9.885101346291928e-06, "loss": 0.1951, "step": 1204 }, { "epoch": 0.1, "grad_norm": 1.4599680798670263, "learning_rate": 9.884825050687918e-06, "loss": 0.2622, "step": 1205 }, { "epoch": 0.1, "grad_norm": 1.5050691344117746, "learning_rate": 9.884548427150802e-06, "loss": 0.2155, "step": 1206 }, { "epoch": 0.1, "grad_norm": 1.6376583010376282, "learning_rate": 9.88427147569915e-06, "loss": 0.197, "step": 1207 }, { "epoch": 0.1, "grad_norm": 1.5941627822361515, "learning_rate": 9.883994196351555e-06, "loss": 0.2478, "step": 1208 }, { "epoch": 0.1, "grad_norm": 1.3789988160489755, "learning_rate": 9.883716589126633e-06, "loss": 0.1998, "step": 1209 }, { "epoch": 0.1, "grad_norm": 1.687964050380182, "learning_rate": 9.883438654043019e-06, "loss": 0.2439, "step": 1210 }, { "epoch": 0.1, "grad_norm": 9.119080356055434, "learning_rate": 9.88316039111937e-06, "loss": 0.832, "step": 1211 }, { "epoch": 0.1, "grad_norm": 1.477191608850536, "learning_rate": 9.88288180037437e-06, "loss": 0.212, "step": 1212 }, { "epoch": 0.1, "grad_norm": 1.5368308370797685, "learning_rate": 9.882602881826721e-06, "loss": 0.2404, "step": 1213 }, { "epoch": 0.1, "grad_norm": 14.876471593532495, "learning_rate": 9.882323635495145e-06, "loss": 0.6998, "step": 1214 }, { "epoch": 0.1, "grad_norm": 1.4796850485491542, "learning_rate": 9.882044061398393e-06, "loss": 0.2424, "step": 1215 }, { "epoch": 0.1, "grad_norm": 1.5376896210134923, "learning_rate": 9.88176415955523e-06, "loss": 0.2467, "step": 1216 }, { "epoch": 0.1, "grad_norm": 1.5474008286208663, "learning_rate": 9.881483929984446e-06, "loss": 0.2429, "step": 1217 }, { "epoch": 0.1, "grad_norm": 1.2319481864748325, "learning_rate": 9.881203372704857e-06, "loss": 0.1809, "step": 1218 }, { "epoch": 0.1, "grad_norm": 1.3864755229772052, "learning_rate": 9.880922487735295e-06, "loss": 0.1924, "step": 1219 }, { "epoch": 0.1, "grad_norm": 1.363777296476471, "learning_rate": 9.88064127509462e-06, "loss": 0.1499, "step": 1220 }, { "epoch": 0.1, "grad_norm": 1.4378195642987939, "learning_rate": 9.880359734801708e-06, "loss": 0.2205, "step": 1221 }, { "epoch": 0.1, "grad_norm": 1.7000927531120587, "learning_rate": 9.880077866875459e-06, "loss": 0.2608, "step": 1222 }, { "epoch": 0.1, "grad_norm": 1.58547703977809, "learning_rate": 9.879795671334798e-06, "loss": 0.2806, "step": 1223 }, { "epoch": 0.1, "grad_norm": 1.5204246237789443, "learning_rate": 9.879513148198668e-06, "loss": 0.2339, "step": 1224 }, { "epoch": 0.1, "grad_norm": 1.506551857601678, "learning_rate": 9.879230297486034e-06, "loss": 0.2114, "step": 1225 }, { "epoch": 0.1, "grad_norm": 7.296145757799375, "learning_rate": 9.878947119215889e-06, "loss": 0.7773, "step": 1226 }, { "epoch": 0.1, "grad_norm": 1.4541957464519022, "learning_rate": 9.87866361340724e-06, "loss": 0.216, "step": 1227 }, { "epoch": 0.1, "grad_norm": 1.5021421019592718, "learning_rate": 9.878379780079122e-06, "loss": 0.2256, "step": 1228 }, { "epoch": 0.1, "grad_norm": 1.5568659697802163, "learning_rate": 9.878095619250588e-06, "loss": 0.1983, "step": 1229 }, { "epoch": 0.1, "grad_norm": 1.5945908895129153, "learning_rate": 9.877811130940715e-06, "loss": 0.2526, "step": 1230 }, { "epoch": 0.1, "grad_norm": 1.6393224547703271, "learning_rate": 9.8775263151686e-06, "loss": 0.2415, "step": 1231 }, { "epoch": 0.1, "grad_norm": 1.3882238251581343, "learning_rate": 9.877241171953367e-06, "loss": 0.1852, "step": 1232 }, { "epoch": 0.1, "grad_norm": 1.5142479666155222, "learning_rate": 9.876955701314157e-06, "loss": 0.2546, "step": 1233 }, { "epoch": 0.1, "grad_norm": 5.656882863325662, "learning_rate": 9.876669903270133e-06, "loss": 0.6298, "step": 1234 }, { "epoch": 0.1, "grad_norm": 1.3810587229994047, "learning_rate": 9.876383777840484e-06, "loss": 0.1984, "step": 1235 }, { "epoch": 0.1, "grad_norm": 1.6805618016968162, "learning_rate": 9.876097325044416e-06, "loss": 0.2811, "step": 1236 }, { "epoch": 0.1, "grad_norm": 1.2961923253721568, "learning_rate": 9.87581054490116e-06, "loss": 0.2063, "step": 1237 }, { "epoch": 0.1, "grad_norm": 1.6457488650821899, "learning_rate": 9.87552343742997e-06, "loss": 0.3113, "step": 1238 }, { "epoch": 0.1, "grad_norm": 1.5313417194840961, "learning_rate": 9.875236002650119e-06, "loss": 0.2159, "step": 1239 }, { "epoch": 0.1, "grad_norm": 1.353590915230694, "learning_rate": 9.874948240580903e-06, "loss": 0.1823, "step": 1240 }, { "epoch": 0.1, "grad_norm": 1.507847020727077, "learning_rate": 9.874660151241644e-06, "loss": 0.2216, "step": 1241 }, { "epoch": 0.1, "grad_norm": 4.4961928046658075, "learning_rate": 9.874371734651678e-06, "loss": 0.3865, "step": 1242 }, { "epoch": 0.1, "grad_norm": 1.7216839037715528, "learning_rate": 9.874082990830366e-06, "loss": 0.2561, "step": 1243 }, { "epoch": 0.1, "grad_norm": 1.53656246277392, "learning_rate": 9.873793919797099e-06, "loss": 0.214, "step": 1244 }, { "epoch": 0.1, "grad_norm": 1.4365897369027967, "learning_rate": 9.873504521571278e-06, "loss": 0.2261, "step": 1245 }, { "epoch": 0.1, "grad_norm": 1.5472450575095278, "learning_rate": 9.87321479617233e-06, "loss": 0.2678, "step": 1246 }, { "epoch": 0.1, "grad_norm": 1.6981838623952037, "learning_rate": 9.87292474361971e-06, "loss": 0.2521, "step": 1247 }, { "epoch": 0.1, "grad_norm": 1.639554932401539, "learning_rate": 9.872634363932887e-06, "loss": 0.2632, "step": 1248 }, { "epoch": 0.1, "grad_norm": 1.4520043958718585, "learning_rate": 9.872343657131355e-06, "loss": 0.2183, "step": 1249 }, { "epoch": 0.1, "grad_norm": 1.3424890791931532, "learning_rate": 9.872052623234632e-06, "loss": 0.2004, "step": 1250 }, { "epoch": 0.1, "grad_norm": 1.4162601855887524, "learning_rate": 9.871761262262252e-06, "loss": 0.2306, "step": 1251 }, { "epoch": 0.1, "grad_norm": 1.5179272074142474, "learning_rate": 9.871469574233781e-06, "loss": 0.229, "step": 1252 }, { "epoch": 0.1, "grad_norm": 1.520275733372812, "learning_rate": 9.871177559168795e-06, "loss": 0.224, "step": 1253 }, { "epoch": 0.1, "grad_norm": 1.5287062683156647, "learning_rate": 9.8708852170869e-06, "loss": 0.2204, "step": 1254 }, { "epoch": 0.1, "grad_norm": 1.66286348628413, "learning_rate": 9.870592548007725e-06, "loss": 0.2682, "step": 1255 }, { "epoch": 0.1, "grad_norm": 1.5805089451503687, "learning_rate": 9.870299551950912e-06, "loss": 0.2079, "step": 1256 }, { "epoch": 0.1, "grad_norm": 1.468115576201194, "learning_rate": 9.870006228936135e-06, "loss": 0.2112, "step": 1257 }, { "epoch": 0.1, "grad_norm": 1.5114860861271562, "learning_rate": 9.869712578983085e-06, "loss": 0.2061, "step": 1258 }, { "epoch": 0.1, "grad_norm": 1.5385575720939868, "learning_rate": 9.869418602111475e-06, "loss": 0.1992, "step": 1259 }, { "epoch": 0.1, "grad_norm": 1.616794660571654, "learning_rate": 9.869124298341039e-06, "loss": 0.2449, "step": 1260 }, { "epoch": 0.1, "grad_norm": 6.179466654330429, "learning_rate": 9.868829667691538e-06, "loss": 0.8159, "step": 1261 }, { "epoch": 0.1, "grad_norm": 1.6874042904014803, "learning_rate": 9.868534710182747e-06, "loss": 0.212, "step": 1262 }, { "epoch": 0.1, "grad_norm": 1.6645803845864144, "learning_rate": 9.868239425834472e-06, "loss": 0.2672, "step": 1263 }, { "epoch": 0.1, "grad_norm": 1.627170207393232, "learning_rate": 9.867943814666533e-06, "loss": 0.257, "step": 1264 }, { "epoch": 0.1, "grad_norm": 1.5782840856787934, "learning_rate": 9.867647876698776e-06, "loss": 0.2831, "step": 1265 }, { "epoch": 0.1, "grad_norm": 8.07227715987488, "learning_rate": 9.867351611951071e-06, "loss": 0.4821, "step": 1266 }, { "epoch": 0.1, "grad_norm": 1.558220330838703, "learning_rate": 9.867055020443302e-06, "loss": 0.2328, "step": 1267 }, { "epoch": 0.1, "grad_norm": 1.6552393255471114, "learning_rate": 9.866758102195384e-06, "loss": 0.2035, "step": 1268 }, { "epoch": 0.1, "grad_norm": 1.6111335874381598, "learning_rate": 9.86646085722725e-06, "loss": 0.2478, "step": 1269 }, { "epoch": 0.1, "grad_norm": 1.568783215246882, "learning_rate": 9.866163285558851e-06, "loss": 0.2278, "step": 1270 }, { "epoch": 0.1, "grad_norm": 1.5369253250884514, "learning_rate": 9.865865387210169e-06, "loss": 0.206, "step": 1271 }, { "epoch": 0.1, "grad_norm": 1.601777102209474, "learning_rate": 9.8655671622012e-06, "loss": 0.2607, "step": 1272 }, { "epoch": 0.1, "grad_norm": 1.524083001926831, "learning_rate": 9.865268610551966e-06, "loss": 0.276, "step": 1273 }, { "epoch": 0.1, "grad_norm": 1.6077407560378083, "learning_rate": 9.864969732282507e-06, "loss": 0.2338, "step": 1274 }, { "epoch": 0.1, "grad_norm": 1.544743916225948, "learning_rate": 9.864670527412891e-06, "loss": 0.2236, "step": 1275 }, { "epoch": 0.1, "grad_norm": 1.5352981573532034, "learning_rate": 9.864370995963204e-06, "loss": 0.2372, "step": 1276 }, { "epoch": 0.1, "grad_norm": 1.6596131906689564, "learning_rate": 9.864071137953552e-06, "loss": 0.2142, "step": 1277 }, { "epoch": 0.1, "grad_norm": 6.401168154543283, "learning_rate": 9.863770953404068e-06, "loss": 0.618, "step": 1278 }, { "epoch": 0.1, "grad_norm": 1.6881806004593924, "learning_rate": 9.863470442334904e-06, "loss": 0.2489, "step": 1279 }, { "epoch": 0.1, "grad_norm": 5.270452032620281, "learning_rate": 9.863169604766231e-06, "loss": 0.5501, "step": 1280 }, { "epoch": 0.1, "grad_norm": 5.921263308335348, "learning_rate": 9.86286844071825e-06, "loss": 0.5891, "step": 1281 }, { "epoch": 0.1, "grad_norm": 1.6326002299720224, "learning_rate": 9.862566950211175e-06, "loss": 0.2087, "step": 1282 }, { "epoch": 0.1, "grad_norm": 1.1613978573095491, "learning_rate": 9.862265133265248e-06, "loss": 0.1747, "step": 1283 }, { "epoch": 0.1, "grad_norm": 1.7024187966796624, "learning_rate": 9.861962989900732e-06, "loss": 0.2747, "step": 1284 }, { "epoch": 0.1, "grad_norm": 1.5568553565742755, "learning_rate": 9.861660520137908e-06, "loss": 0.2461, "step": 1285 }, { "epoch": 0.1, "grad_norm": 1.7646407229744727, "learning_rate": 9.861357723997082e-06, "loss": 0.2798, "step": 1286 }, { "epoch": 0.1, "grad_norm": 1.5202771441452385, "learning_rate": 9.861054601498586e-06, "loss": 0.2126, "step": 1287 }, { "epoch": 0.1, "grad_norm": 1.495939184098469, "learning_rate": 9.860751152662762e-06, "loss": 0.2268, "step": 1288 }, { "epoch": 0.1, "grad_norm": 1.600047661816389, "learning_rate": 9.860447377509989e-06, "loss": 0.2031, "step": 1289 }, { "epoch": 0.1, "grad_norm": 1.3784986845475653, "learning_rate": 9.860143276060655e-06, "loss": 0.2008, "step": 1290 }, { "epoch": 0.1, "grad_norm": 1.4369934389728147, "learning_rate": 9.859838848335178e-06, "loss": 0.204, "step": 1291 }, { "epoch": 0.1, "grad_norm": 1.5247123382267085, "learning_rate": 9.859534094353994e-06, "loss": 0.2388, "step": 1292 }, { "epoch": 0.1, "grad_norm": 1.5214889060063344, "learning_rate": 9.859229014137564e-06, "loss": 0.2478, "step": 1293 }, { "epoch": 0.1, "grad_norm": 1.5837295621394827, "learning_rate": 9.858923607706366e-06, "loss": 0.233, "step": 1294 }, { "epoch": 0.1, "grad_norm": 1.4327328130112271, "learning_rate": 9.858617875080904e-06, "loss": 0.2551, "step": 1295 }, { "epoch": 0.1, "grad_norm": 1.712899783157388, "learning_rate": 9.858311816281703e-06, "loss": 0.2308, "step": 1296 }, { "epoch": 0.1, "grad_norm": 1.6467437562054286, "learning_rate": 9.858005431329309e-06, "loss": 0.2525, "step": 1297 }, { "epoch": 0.1, "grad_norm": 1.4168690711237977, "learning_rate": 9.857698720244294e-06, "loss": 0.189, "step": 1298 }, { "epoch": 0.1, "grad_norm": 1.4449466105721798, "learning_rate": 9.857391683047244e-06, "loss": 0.2031, "step": 1299 }, { "epoch": 0.1, "grad_norm": 1.554423339121032, "learning_rate": 9.857084319758772e-06, "loss": 0.2293, "step": 1300 }, { "epoch": 0.1, "grad_norm": 6.51829907476096, "learning_rate": 9.856776630399514e-06, "loss": 0.4161, "step": 1301 }, { "epoch": 0.1, "grad_norm": 1.5682420260526695, "learning_rate": 9.856468614990127e-06, "loss": 0.2804, "step": 1302 }, { "epoch": 0.1, "grad_norm": 1.4309682589456088, "learning_rate": 9.856160273551285e-06, "loss": 0.2023, "step": 1303 }, { "epoch": 0.1, "grad_norm": 1.4918386482698014, "learning_rate": 9.855851606103691e-06, "loss": 0.2125, "step": 1304 }, { "epoch": 0.1, "grad_norm": 1.3802167648011263, "learning_rate": 9.855542612668066e-06, "loss": 0.2315, "step": 1305 }, { "epoch": 0.1, "grad_norm": 1.6220486547133197, "learning_rate": 9.855233293265153e-06, "loss": 0.261, "step": 1306 }, { "epoch": 0.1, "grad_norm": 7.043414255382233, "learning_rate": 9.85492364791572e-06, "loss": 0.4871, "step": 1307 }, { "epoch": 0.1, "grad_norm": 1.503732509748684, "learning_rate": 9.854613676640551e-06, "loss": 0.2246, "step": 1308 }, { "epoch": 0.1, "grad_norm": 1.6049608124902606, "learning_rate": 9.854303379460458e-06, "loss": 0.2403, "step": 1309 }, { "epoch": 0.1, "grad_norm": 1.4643366977450483, "learning_rate": 9.853992756396272e-06, "loss": 0.2279, "step": 1310 }, { "epoch": 0.1, "grad_norm": 1.6054568792744657, "learning_rate": 9.853681807468845e-06, "loss": 0.2901, "step": 1311 }, { "epoch": 0.1, "grad_norm": 1.5145162783211317, "learning_rate": 9.853370532699052e-06, "loss": 0.2082, "step": 1312 }, { "epoch": 0.11, "grad_norm": 1.6343484845675553, "learning_rate": 9.853058932107789e-06, "loss": 0.207, "step": 1313 }, { "epoch": 0.11, "grad_norm": 1.5182533982877526, "learning_rate": 9.852747005715976e-06, "loss": 0.2309, "step": 1314 }, { "epoch": 0.11, "grad_norm": 1.6377338184254033, "learning_rate": 9.852434753544552e-06, "loss": 0.2157, "step": 1315 }, { "epoch": 0.11, "grad_norm": 1.7927427690368656, "learning_rate": 9.852122175614484e-06, "loss": 0.2108, "step": 1316 }, { "epoch": 0.11, "grad_norm": 1.5413721558837432, "learning_rate": 9.85180927194675e-06, "loss": 0.2269, "step": 1317 }, { "epoch": 0.11, "grad_norm": 4.604072206692453, "learning_rate": 9.85149604256236e-06, "loss": 0.7057, "step": 1318 }, { "epoch": 0.11, "grad_norm": 1.6518681112436822, "learning_rate": 9.851182487482342e-06, "loss": 0.2321, "step": 1319 }, { "epoch": 0.11, "grad_norm": 1.6176386535258809, "learning_rate": 9.850868606727745e-06, "loss": 0.2977, "step": 1320 }, { "epoch": 0.11, "grad_norm": 1.5163833273166403, "learning_rate": 9.85055440031964e-06, "loss": 0.2142, "step": 1321 }, { "epoch": 0.11, "grad_norm": 1.4593753709401616, "learning_rate": 9.850239868279123e-06, "loss": 0.213, "step": 1322 }, { "epoch": 0.11, "grad_norm": 1.3948648754092454, "learning_rate": 9.849925010627308e-06, "loss": 0.1798, "step": 1323 }, { "epoch": 0.11, "grad_norm": 1.6110357416701928, "learning_rate": 9.84960982738533e-06, "loss": 0.2632, "step": 1324 }, { "epoch": 0.11, "grad_norm": 5.114977292061194, "learning_rate": 9.849294318574353e-06, "loss": 0.6103, "step": 1325 }, { "epoch": 0.11, "grad_norm": 1.7754173369612274, "learning_rate": 9.848978484215554e-06, "loss": 0.2336, "step": 1326 }, { "epoch": 0.11, "grad_norm": 1.5000966746851923, "learning_rate": 9.848662324330139e-06, "loss": 0.2238, "step": 1327 }, { "epoch": 0.11, "grad_norm": 1.5459688275196888, "learning_rate": 9.848345838939329e-06, "loss": 0.2535, "step": 1328 }, { "epoch": 0.11, "grad_norm": 1.445380477310344, "learning_rate": 9.848029028064374e-06, "loss": 0.2384, "step": 1329 }, { "epoch": 0.11, "grad_norm": 1.3401213196546187, "learning_rate": 9.847711891726543e-06, "loss": 0.1965, "step": 1330 }, { "epoch": 0.11, "grad_norm": 1.544165120521194, "learning_rate": 9.847394429947124e-06, "loss": 0.2514, "step": 1331 }, { "epoch": 0.11, "grad_norm": 1.5187169729622347, "learning_rate": 9.847076642747429e-06, "loss": 0.2059, "step": 1332 }, { "epoch": 0.11, "grad_norm": 1.450421403812484, "learning_rate": 9.846758530148793e-06, "loss": 0.1944, "step": 1333 }, { "epoch": 0.11, "grad_norm": 1.378706091145846, "learning_rate": 9.84644009217257e-06, "loss": 0.1828, "step": 1334 }, { "epoch": 0.11, "grad_norm": 1.6106828071763193, "learning_rate": 9.846121328840143e-06, "loss": 0.2306, "step": 1335 }, { "epoch": 0.11, "grad_norm": 1.5249295372187086, "learning_rate": 9.845802240172908e-06, "loss": 0.2268, "step": 1336 }, { "epoch": 0.11, "grad_norm": 1.7422706892672513, "learning_rate": 9.845482826192284e-06, "loss": 0.2568, "step": 1337 }, { "epoch": 0.11, "grad_norm": 1.5630870214770054, "learning_rate": 9.845163086919718e-06, "loss": 0.2682, "step": 1338 }, { "epoch": 0.11, "grad_norm": 1.4423051259483073, "learning_rate": 9.844843022376673e-06, "loss": 0.2132, "step": 1339 }, { "epoch": 0.11, "grad_norm": 1.4945468977657284, "learning_rate": 9.844522632584636e-06, "loss": 0.2286, "step": 1340 }, { "epoch": 0.11, "grad_norm": 1.4811032687179086, "learning_rate": 9.844201917565119e-06, "loss": 0.2474, "step": 1341 }, { "epoch": 0.11, "grad_norm": 1.7351672465107948, "learning_rate": 9.843880877339648e-06, "loss": 0.2685, "step": 1342 }, { "epoch": 0.11, "grad_norm": 1.4098817650089928, "learning_rate": 9.843559511929777e-06, "loss": 0.2058, "step": 1343 }, { "epoch": 0.11, "grad_norm": 1.6030518121214643, "learning_rate": 9.843237821357082e-06, "loss": 0.2336, "step": 1344 }, { "epoch": 0.11, "grad_norm": 1.492303424991774, "learning_rate": 9.842915805643156e-06, "loss": 0.2196, "step": 1345 }, { "epoch": 0.11, "grad_norm": 1.4148967167561444, "learning_rate": 9.84259346480962e-06, "loss": 0.2448, "step": 1346 }, { "epoch": 0.11, "grad_norm": 1.4332362316202147, "learning_rate": 9.842270798878111e-06, "loss": 0.202, "step": 1347 }, { "epoch": 0.11, "grad_norm": 1.46852464059966, "learning_rate": 9.841947807870293e-06, "loss": 0.261, "step": 1348 }, { "epoch": 0.11, "grad_norm": 1.5477143630663952, "learning_rate": 9.841624491807846e-06, "loss": 0.2476, "step": 1349 }, { "epoch": 0.11, "grad_norm": 1.7733072106554304, "learning_rate": 9.841300850712479e-06, "loss": 0.2717, "step": 1350 }, { "epoch": 0.11, "grad_norm": 1.8632456287428134, "learning_rate": 9.840976884605916e-06, "loss": 0.2143, "step": 1351 }, { "epoch": 0.11, "grad_norm": 2.0008596053827428, "learning_rate": 9.840652593509909e-06, "loss": 0.2373, "step": 1352 }, { "epoch": 0.11, "grad_norm": 5.652903898881646, "learning_rate": 9.840327977446226e-06, "loss": 0.6222, "step": 1353 }, { "epoch": 0.11, "grad_norm": 4.846933979826645, "learning_rate": 9.840003036436661e-06, "loss": 0.585, "step": 1354 }, { "epoch": 0.11, "grad_norm": 1.3586585374788638, "learning_rate": 9.839677770503028e-06, "loss": 0.1728, "step": 1355 }, { "epoch": 0.11, "grad_norm": 1.6568698252455725, "learning_rate": 9.839352179667162e-06, "loss": 0.2092, "step": 1356 }, { "epoch": 0.11, "grad_norm": 1.5432193582207006, "learning_rate": 9.83902626395092e-06, "loss": 0.231, "step": 1357 }, { "epoch": 0.11, "grad_norm": 1.5448501122645313, "learning_rate": 9.838700023376184e-06, "loss": 0.2109, "step": 1358 }, { "epoch": 0.11, "grad_norm": 6.342338369806452, "learning_rate": 9.838373457964856e-06, "loss": 0.6459, "step": 1359 }, { "epoch": 0.11, "grad_norm": 5.20287931982747, "learning_rate": 9.838046567738856e-06, "loss": 0.4659, "step": 1360 }, { "epoch": 0.11, "grad_norm": 16.297105111468454, "learning_rate": 9.837719352720133e-06, "loss": 0.6487, "step": 1361 }, { "epoch": 0.11, "grad_norm": 1.619963408462713, "learning_rate": 9.83739181293065e-06, "loss": 0.2942, "step": 1362 }, { "epoch": 0.11, "grad_norm": 1.4349394650160046, "learning_rate": 9.837063948392401e-06, "loss": 0.2021, "step": 1363 }, { "epoch": 0.11, "grad_norm": 1.2944483742902744, "learning_rate": 9.836735759127391e-06, "loss": 0.1804, "step": 1364 }, { "epoch": 0.11, "grad_norm": 1.442794120773668, "learning_rate": 9.836407245157656e-06, "loss": 0.2095, "step": 1365 }, { "epoch": 0.11, "grad_norm": 1.6739206900543397, "learning_rate": 9.836078406505249e-06, "loss": 0.2272, "step": 1366 }, { "epoch": 0.11, "grad_norm": 1.4887511961771356, "learning_rate": 9.835749243192245e-06, "loss": 0.2296, "step": 1367 }, { "epoch": 0.11, "grad_norm": 4.33437153770658, "learning_rate": 9.835419755240743e-06, "loss": 0.4742, "step": 1368 }, { "epoch": 0.11, "grad_norm": 1.4765551147247555, "learning_rate": 9.835089942672862e-06, "loss": 0.2583, "step": 1369 }, { "epoch": 0.11, "grad_norm": 1.4567166693949587, "learning_rate": 9.834759805510742e-06, "loss": 0.2246, "step": 1370 }, { "epoch": 0.11, "grad_norm": 1.6546526285836909, "learning_rate": 9.834429343776551e-06, "loss": 0.2808, "step": 1371 }, { "epoch": 0.11, "grad_norm": 1.460627681075815, "learning_rate": 9.834098557492467e-06, "loss": 0.2526, "step": 1372 }, { "epoch": 0.11, "grad_norm": 1.4086192899309873, "learning_rate": 9.8337674466807e-06, "loss": 0.2243, "step": 1373 }, { "epoch": 0.11, "grad_norm": 1.5091255987601904, "learning_rate": 9.833436011363482e-06, "loss": 0.2042, "step": 1374 }, { "epoch": 0.11, "grad_norm": 1.6167079637260537, "learning_rate": 9.833104251563058e-06, "loss": 0.2604, "step": 1375 }, { "epoch": 0.11, "grad_norm": 1.4291148760164998, "learning_rate": 9.832772167301701e-06, "loss": 0.208, "step": 1376 }, { "epoch": 0.11, "grad_norm": 5.755968490273507, "learning_rate": 9.832439758601706e-06, "loss": 0.5376, "step": 1377 }, { "epoch": 0.11, "grad_norm": 6.184471085781334, "learning_rate": 9.83210702548539e-06, "loss": 0.5408, "step": 1378 }, { "epoch": 0.11, "grad_norm": 1.5989298590823278, "learning_rate": 9.831773967975085e-06, "loss": 0.2302, "step": 1379 }, { "epoch": 0.11, "grad_norm": 5.3485455377460465, "learning_rate": 9.831440586093157e-06, "loss": 0.4865, "step": 1380 }, { "epoch": 0.11, "grad_norm": 5.773040260322894, "learning_rate": 9.831106879861982e-06, "loss": 0.6059, "step": 1381 }, { "epoch": 0.11, "grad_norm": 1.4972312019509375, "learning_rate": 9.830772849303967e-06, "loss": 0.248, "step": 1382 }, { "epoch": 0.11, "grad_norm": 1.5611347541717178, "learning_rate": 9.830438494441533e-06, "loss": 0.2491, "step": 1383 }, { "epoch": 0.11, "grad_norm": 1.2940051640247117, "learning_rate": 9.830103815297126e-06, "loss": 0.1694, "step": 1384 }, { "epoch": 0.11, "grad_norm": 1.551032344180417, "learning_rate": 9.829768811893214e-06, "loss": 0.2361, "step": 1385 }, { "epoch": 0.11, "grad_norm": 1.5416562841956336, "learning_rate": 9.829433484252292e-06, "loss": 0.2104, "step": 1386 }, { "epoch": 0.11, "grad_norm": 1.585628610240876, "learning_rate": 9.829097832396864e-06, "loss": 0.2189, "step": 1387 }, { "epoch": 0.11, "grad_norm": 1.6828530326464681, "learning_rate": 9.82876185634947e-06, "loss": 0.2551, "step": 1388 }, { "epoch": 0.11, "grad_norm": 1.4639213732594658, "learning_rate": 9.828425556132659e-06, "loss": 0.2113, "step": 1389 }, { "epoch": 0.11, "grad_norm": 1.5468244789615329, "learning_rate": 9.828088931769012e-06, "loss": 0.2484, "step": 1390 }, { "epoch": 0.11, "grad_norm": 1.7052255095411852, "learning_rate": 9.827751983281126e-06, "loss": 0.2284, "step": 1391 }, { "epoch": 0.11, "grad_norm": 1.3946262694016962, "learning_rate": 9.827414710691624e-06, "loss": 0.2139, "step": 1392 }, { "epoch": 0.11, "grad_norm": 1.5706864110951968, "learning_rate": 9.827077114023145e-06, "loss": 0.2303, "step": 1393 }, { "epoch": 0.11, "grad_norm": 5.8467701436448705, "learning_rate": 9.826739193298353e-06, "loss": 0.4653, "step": 1394 }, { "epoch": 0.11, "grad_norm": 1.377439390234066, "learning_rate": 9.826400948539935e-06, "loss": 0.1939, "step": 1395 }, { "epoch": 0.11, "grad_norm": 1.3667716905354454, "learning_rate": 9.826062379770598e-06, "loss": 0.1824, "step": 1396 }, { "epoch": 0.11, "grad_norm": 5.826233858029706, "learning_rate": 9.82572348701307e-06, "loss": 0.529, "step": 1397 }, { "epoch": 0.11, "grad_norm": 1.38865560960334, "learning_rate": 9.825384270290104e-06, "loss": 0.2135, "step": 1398 }, { "epoch": 0.11, "grad_norm": 1.5217516109068419, "learning_rate": 9.825044729624472e-06, "loss": 0.2249, "step": 1399 }, { "epoch": 0.11, "grad_norm": 1.5573089448173083, "learning_rate": 9.824704865038967e-06, "loss": 0.2469, "step": 1400 }, { "epoch": 0.11, "grad_norm": 1.4939652856714627, "learning_rate": 9.824364676556406e-06, "loss": 0.1969, "step": 1401 }, { "epoch": 0.11, "grad_norm": 8.417313856655264, "learning_rate": 9.824024164199627e-06, "loss": 0.5295, "step": 1402 }, { "epoch": 0.11, "grad_norm": 1.5893032806163254, "learning_rate": 9.823683327991492e-06, "loss": 0.2426, "step": 1403 }, { "epoch": 0.11, "grad_norm": 1.530048069706559, "learning_rate": 9.82334216795488e-06, "loss": 0.2411, "step": 1404 }, { "epoch": 0.11, "grad_norm": 1.5691111762677317, "learning_rate": 9.823000684112691e-06, "loss": 0.2371, "step": 1405 }, { "epoch": 0.11, "grad_norm": 1.5740814661995475, "learning_rate": 9.822658876487854e-06, "loss": 0.2373, "step": 1406 }, { "epoch": 0.11, "grad_norm": 1.4544963366575885, "learning_rate": 9.822316745103316e-06, "loss": 0.2155, "step": 1407 }, { "epoch": 0.11, "grad_norm": 1.78182010890901, "learning_rate": 9.821974289982042e-06, "loss": 0.2717, "step": 1408 }, { "epoch": 0.11, "grad_norm": 1.3542350353714456, "learning_rate": 9.821631511147025e-06, "loss": 0.2226, "step": 1409 }, { "epoch": 0.11, "grad_norm": 1.630477332425947, "learning_rate": 9.821288408621276e-06, "loss": 0.2675, "step": 1410 }, { "epoch": 0.11, "grad_norm": 1.487475758372642, "learning_rate": 9.820944982427826e-06, "loss": 0.2359, "step": 1411 }, { "epoch": 0.11, "grad_norm": 1.471869211768212, "learning_rate": 9.820601232589735e-06, "loss": 0.189, "step": 1412 }, { "epoch": 0.11, "grad_norm": 1.3654992248220554, "learning_rate": 9.820257159130076e-06, "loss": 0.196, "step": 1413 }, { "epoch": 0.11, "grad_norm": 1.4416320816628994, "learning_rate": 9.81991276207195e-06, "loss": 0.214, "step": 1414 }, { "epoch": 0.11, "grad_norm": 1.3953485014430456, "learning_rate": 9.819568041438477e-06, "loss": 0.218, "step": 1415 }, { "epoch": 0.11, "grad_norm": 1.4661686834911387, "learning_rate": 9.819222997252798e-06, "loss": 0.1914, "step": 1416 }, { "epoch": 0.11, "grad_norm": 1.5321069518731856, "learning_rate": 9.818877629538077e-06, "loss": 0.29, "step": 1417 }, { "epoch": 0.11, "grad_norm": 9.610913915200411, "learning_rate": 9.818531938317499e-06, "loss": 0.6348, "step": 1418 }, { "epoch": 0.11, "grad_norm": 1.5830589045520829, "learning_rate": 9.818185923614274e-06, "loss": 0.2326, "step": 1419 }, { "epoch": 0.11, "grad_norm": 1.4443262685480311, "learning_rate": 9.817839585451629e-06, "loss": 0.2303, "step": 1420 }, { "epoch": 0.11, "grad_norm": 1.4140632364202714, "learning_rate": 9.817492923852817e-06, "loss": 0.2143, "step": 1421 }, { "epoch": 0.11, "grad_norm": 1.50414358071682, "learning_rate": 9.817145938841106e-06, "loss": 0.2229, "step": 1422 }, { "epoch": 0.11, "grad_norm": 1.5038105686779784, "learning_rate": 9.816798630439794e-06, "loss": 0.2289, "step": 1423 }, { "epoch": 0.11, "grad_norm": 1.5601935200054282, "learning_rate": 9.816450998672195e-06, "loss": 0.248, "step": 1424 }, { "epoch": 0.11, "grad_norm": 1.4408127622582956, "learning_rate": 9.816103043561648e-06, "loss": 0.2338, "step": 1425 }, { "epoch": 0.11, "grad_norm": 1.770503382532695, "learning_rate": 9.815754765131511e-06, "loss": 0.2582, "step": 1426 }, { "epoch": 0.11, "grad_norm": 1.5456056631965465, "learning_rate": 9.815406163405165e-06, "loss": 0.2285, "step": 1427 }, { "epoch": 0.11, "grad_norm": 1.5105420645521708, "learning_rate": 9.815057238406015e-06, "loss": 0.2499, "step": 1428 }, { "epoch": 0.11, "grad_norm": 1.2713021279102137, "learning_rate": 9.814707990157482e-06, "loss": 0.1608, "step": 1429 }, { "epoch": 0.11, "grad_norm": 1.6944330333303748, "learning_rate": 9.814358418683014e-06, "loss": 0.2508, "step": 1430 }, { "epoch": 0.11, "grad_norm": 1.4603186588033277, "learning_rate": 9.814008524006077e-06, "loss": 0.216, "step": 1431 }, { "epoch": 0.11, "grad_norm": 1.7791581265113101, "learning_rate": 9.813658306150164e-06, "loss": 0.2485, "step": 1432 }, { "epoch": 0.11, "grad_norm": 1.6145543915863054, "learning_rate": 9.813307765138784e-06, "loss": 0.2598, "step": 1433 }, { "epoch": 0.11, "grad_norm": 1.6239851367449045, "learning_rate": 9.81295690099547e-06, "loss": 0.2501, "step": 1434 }, { "epoch": 0.11, "grad_norm": 1.3407668059144913, "learning_rate": 9.812605713743775e-06, "loss": 0.2135, "step": 1435 }, { "epoch": 0.11, "grad_norm": 1.6451002778773953, "learning_rate": 9.812254203407278e-06, "loss": 0.2957, "step": 1436 }, { "epoch": 0.11, "grad_norm": 1.3738871085555746, "learning_rate": 9.811902370009576e-06, "loss": 0.2228, "step": 1437 }, { "epoch": 0.12, "grad_norm": 1.2018390035675401, "learning_rate": 9.811550213574287e-06, "loss": 0.1826, "step": 1438 }, { "epoch": 0.12, "grad_norm": 1.4340336457735143, "learning_rate": 9.811197734125055e-06, "loss": 0.2009, "step": 1439 }, { "epoch": 0.12, "grad_norm": 1.601991205647889, "learning_rate": 9.810844931685542e-06, "loss": 0.2543, "step": 1440 }, { "epoch": 0.12, "grad_norm": 1.6402376946753459, "learning_rate": 9.810491806279432e-06, "loss": 0.2725, "step": 1441 }, { "epoch": 0.12, "grad_norm": 1.7149124662662572, "learning_rate": 9.81013835793043e-06, "loss": 0.2511, "step": 1442 }, { "epoch": 0.12, "grad_norm": 1.581572668740855, "learning_rate": 9.809784586662268e-06, "loss": 0.2466, "step": 1443 }, { "epoch": 0.12, "grad_norm": 1.4928160289781764, "learning_rate": 9.809430492498693e-06, "loss": 0.21, "step": 1444 }, { "epoch": 0.12, "grad_norm": 1.7698665858693028, "learning_rate": 9.809076075463476e-06, "loss": 0.2557, "step": 1445 }, { "epoch": 0.12, "grad_norm": 1.276943382726287, "learning_rate": 9.808721335580414e-06, "loss": 0.1847, "step": 1446 }, { "epoch": 0.12, "grad_norm": 5.405350957138464, "learning_rate": 9.808366272873317e-06, "loss": 0.661, "step": 1447 }, { "epoch": 0.12, "grad_norm": 1.824441567134671, "learning_rate": 9.808010887366024e-06, "loss": 0.2549, "step": 1448 }, { "epoch": 0.12, "grad_norm": 1.3207735091632264, "learning_rate": 9.807655179082392e-06, "loss": 0.1645, "step": 1449 }, { "epoch": 0.12, "grad_norm": 1.3159725071161843, "learning_rate": 9.807299148046301e-06, "loss": 0.1507, "step": 1450 }, { "epoch": 0.12, "grad_norm": 1.5284544676265406, "learning_rate": 9.806942794281654e-06, "loss": 0.2146, "step": 1451 }, { "epoch": 0.12, "grad_norm": 1.3646302306878015, "learning_rate": 9.80658611781237e-06, "loss": 0.23, "step": 1452 }, { "epoch": 0.12, "grad_norm": 1.4778281106495794, "learning_rate": 9.806229118662398e-06, "loss": 0.2029, "step": 1453 }, { "epoch": 0.12, "grad_norm": 1.4820536704594791, "learning_rate": 9.805871796855704e-06, "loss": 0.216, "step": 1454 }, { "epoch": 0.12, "grad_norm": 1.4880666126792284, "learning_rate": 9.805514152416274e-06, "loss": 0.225, "step": 1455 }, { "epoch": 0.12, "grad_norm": 1.5381085612039622, "learning_rate": 9.80515618536812e-06, "loss": 0.2384, "step": 1456 }, { "epoch": 0.12, "grad_norm": 1.6104170851049522, "learning_rate": 9.80479789573527e-06, "loss": 0.26, "step": 1457 }, { "epoch": 0.12, "grad_norm": 4.792092933624506, "learning_rate": 9.804439283541781e-06, "loss": 0.6425, "step": 1458 }, { "epoch": 0.12, "grad_norm": 6.200474311649978, "learning_rate": 9.804080348811725e-06, "loss": 0.7019, "step": 1459 }, { "epoch": 0.12, "grad_norm": 1.7435265336733736, "learning_rate": 9.803721091569201e-06, "loss": 0.2592, "step": 1460 }, { "epoch": 0.12, "grad_norm": 11.89886811005992, "learning_rate": 9.803361511838324e-06, "loss": 0.6341, "step": 1461 }, { "epoch": 0.12, "grad_norm": 1.365379346529135, "learning_rate": 9.803001609643234e-06, "loss": 0.2099, "step": 1462 }, { "epoch": 0.12, "grad_norm": 1.3343566353898706, "learning_rate": 9.802641385008096e-06, "loss": 0.1868, "step": 1463 }, { "epoch": 0.12, "grad_norm": 1.515053796543866, "learning_rate": 9.80228083795709e-06, "loss": 0.2034, "step": 1464 }, { "epoch": 0.12, "grad_norm": 1.6269161043224634, "learning_rate": 9.80191996851442e-06, "loss": 0.211, "step": 1465 }, { "epoch": 0.12, "grad_norm": 1.4292789078983759, "learning_rate": 9.801558776704315e-06, "loss": 0.2349, "step": 1466 }, { "epoch": 0.12, "grad_norm": 1.5690316736296728, "learning_rate": 9.801197262551019e-06, "loss": 0.2026, "step": 1467 }, { "epoch": 0.12, "grad_norm": 8.7698241411753, "learning_rate": 9.800835426078804e-06, "loss": 0.614, "step": 1468 }, { "epoch": 0.12, "grad_norm": 1.367120240972899, "learning_rate": 9.800473267311962e-06, "loss": 0.2342, "step": 1469 }, { "epoch": 0.12, "grad_norm": 1.4721811650345396, "learning_rate": 9.800110786274803e-06, "loss": 0.2247, "step": 1470 }, { "epoch": 0.12, "grad_norm": 6.055228939688422, "learning_rate": 9.799747982991665e-06, "loss": 0.7436, "step": 1471 }, { "epoch": 0.12, "grad_norm": 1.4344985045654202, "learning_rate": 9.799384857486902e-06, "loss": 0.2091, "step": 1472 }, { "epoch": 0.12, "grad_norm": 1.3500369241321077, "learning_rate": 9.799021409784892e-06, "loss": 0.206, "step": 1473 }, { "epoch": 0.12, "grad_norm": 1.4754295176965464, "learning_rate": 9.798657639910033e-06, "loss": 0.2614, "step": 1474 }, { "epoch": 0.12, "grad_norm": 1.5538437626574393, "learning_rate": 9.798293547886748e-06, "loss": 0.2396, "step": 1475 }, { "epoch": 0.12, "grad_norm": 1.4867721451291298, "learning_rate": 9.79792913373948e-06, "loss": 0.2459, "step": 1476 }, { "epoch": 0.12, "grad_norm": 1.4662795235305603, "learning_rate": 9.79756439749269e-06, "loss": 0.2131, "step": 1477 }, { "epoch": 0.12, "grad_norm": 1.5728136361085496, "learning_rate": 9.797199339170866e-06, "loss": 0.2354, "step": 1478 }, { "epoch": 0.12, "grad_norm": 1.5014828919693486, "learning_rate": 9.796833958798517e-06, "loss": 0.1893, "step": 1479 }, { "epoch": 0.12, "grad_norm": 1.5643366021672795, "learning_rate": 9.79646825640017e-06, "loss": 0.154, "step": 1480 }, { "epoch": 0.12, "grad_norm": 1.3849792979232767, "learning_rate": 9.796102232000378e-06, "loss": 0.1897, "step": 1481 }, { "epoch": 0.12, "grad_norm": 1.3507529912131202, "learning_rate": 9.795735885623708e-06, "loss": 0.199, "step": 1482 }, { "epoch": 0.12, "grad_norm": 1.5222848732322185, "learning_rate": 9.795369217294759e-06, "loss": 0.2461, "step": 1483 }, { "epoch": 0.12, "grad_norm": 5.095961786416891, "learning_rate": 9.795002227038146e-06, "loss": 0.6301, "step": 1484 }, { "epoch": 0.12, "grad_norm": 1.6102115047438696, "learning_rate": 9.794634914878505e-06, "loss": 0.2131, "step": 1485 }, { "epoch": 0.12, "grad_norm": 1.384163364613551, "learning_rate": 9.794267280840494e-06, "loss": 0.202, "step": 1486 }, { "epoch": 0.12, "grad_norm": 5.980561450118831, "learning_rate": 9.793899324948795e-06, "loss": 0.5201, "step": 1487 }, { "epoch": 0.12, "grad_norm": 1.7045279102154405, "learning_rate": 9.79353104722811e-06, "loss": 0.2522, "step": 1488 }, { "epoch": 0.12, "grad_norm": 1.4721532606486294, "learning_rate": 9.793162447703161e-06, "loss": 0.2297, "step": 1489 }, { "epoch": 0.12, "grad_norm": 1.5583306675273565, "learning_rate": 9.792793526398694e-06, "loss": 0.2572, "step": 1490 }, { "epoch": 0.12, "grad_norm": 1.6535601309701211, "learning_rate": 9.792424283339477e-06, "loss": 0.2349, "step": 1491 }, { "epoch": 0.12, "grad_norm": 1.6238342068170448, "learning_rate": 9.792054718550297e-06, "loss": 0.3, "step": 1492 }, { "epoch": 0.12, "grad_norm": 1.5620780458702739, "learning_rate": 9.791684832055962e-06, "loss": 0.2115, "step": 1493 }, { "epoch": 0.12, "grad_norm": 1.4673245768704077, "learning_rate": 9.79131462388131e-06, "loss": 0.2303, "step": 1494 }, { "epoch": 0.12, "grad_norm": 1.5205843174141263, "learning_rate": 9.790944094051188e-06, "loss": 0.1741, "step": 1495 }, { "epoch": 0.12, "grad_norm": 1.617888816866139, "learning_rate": 9.790573242590473e-06, "loss": 0.2517, "step": 1496 }, { "epoch": 0.12, "grad_norm": 1.6754660394184404, "learning_rate": 9.790202069524061e-06, "loss": 0.2622, "step": 1497 }, { "epoch": 0.12, "grad_norm": 1.5456154240653932, "learning_rate": 9.789830574876873e-06, "loss": 0.2715, "step": 1498 }, { "epoch": 0.12, "grad_norm": 6.2915401357377725, "learning_rate": 9.789458758673843e-06, "loss": 0.606, "step": 1499 }, { "epoch": 0.12, "grad_norm": 1.7166910882738635, "learning_rate": 9.789086620939936e-06, "loss": 0.2637, "step": 1500 }, { "epoch": 0.12, "grad_norm": 1.5005129863346887, "learning_rate": 9.788714161700135e-06, "loss": 0.2689, "step": 1501 }, { "epoch": 0.12, "grad_norm": 1.3478767869455088, "learning_rate": 9.78834138097944e-06, "loss": 0.2587, "step": 1502 }, { "epoch": 0.12, "grad_norm": 1.3746996540686276, "learning_rate": 9.787968278802883e-06, "loss": 0.2149, "step": 1503 }, { "epoch": 0.12, "grad_norm": 1.44408894601983, "learning_rate": 9.787594855195509e-06, "loss": 0.2194, "step": 1504 }, { "epoch": 0.12, "grad_norm": 1.376583673021218, "learning_rate": 9.787221110182384e-06, "loss": 0.2454, "step": 1505 }, { "epoch": 0.12, "grad_norm": 5.397941276924308, "learning_rate": 9.786847043788601e-06, "loss": 0.6664, "step": 1506 }, { "epoch": 0.12, "grad_norm": 1.6146693771277414, "learning_rate": 9.786472656039275e-06, "loss": 0.2427, "step": 1507 }, { "epoch": 0.12, "grad_norm": 1.6544197733435781, "learning_rate": 9.786097946959534e-06, "loss": 0.2751, "step": 1508 }, { "epoch": 0.12, "grad_norm": 1.5076062634960605, "learning_rate": 9.785722916574539e-06, "loss": 0.167, "step": 1509 }, { "epoch": 0.12, "grad_norm": 1.4068663414249152, "learning_rate": 9.785347564909464e-06, "loss": 0.1855, "step": 1510 }, { "epoch": 0.12, "grad_norm": 5.927890560023354, "learning_rate": 9.784971891989508e-06, "loss": 0.6761, "step": 1511 }, { "epoch": 0.12, "grad_norm": 8.473618323153323, "learning_rate": 9.78459589783989e-06, "loss": 0.5578, "step": 1512 }, { "epoch": 0.12, "grad_norm": 1.4826105830934497, "learning_rate": 9.784219582485853e-06, "loss": 0.2144, "step": 1513 }, { "epoch": 0.12, "grad_norm": 1.4421224805808652, "learning_rate": 9.78384294595266e-06, "loss": 0.224, "step": 1514 }, { "epoch": 0.12, "grad_norm": 1.4868483772962446, "learning_rate": 9.783465988265594e-06, "loss": 0.2246, "step": 1515 }, { "epoch": 0.12, "grad_norm": 1.6342851575571882, "learning_rate": 9.783088709449967e-06, "loss": 0.2121, "step": 1516 }, { "epoch": 0.12, "grad_norm": 1.5792765679981189, "learning_rate": 9.7827111095311e-06, "loss": 0.2372, "step": 1517 }, { "epoch": 0.12, "grad_norm": 1.6149526191605383, "learning_rate": 9.782333188534345e-06, "loss": 0.2339, "step": 1518 }, { "epoch": 0.12, "grad_norm": 1.4198423148094563, "learning_rate": 9.781954946485072e-06, "loss": 0.2085, "step": 1519 }, { "epoch": 0.12, "grad_norm": 1.417247206892955, "learning_rate": 9.781576383408678e-06, "loss": 0.1626, "step": 1520 }, { "epoch": 0.12, "grad_norm": 1.4668535926425135, "learning_rate": 9.781197499330572e-06, "loss": 0.1678, "step": 1521 }, { "epoch": 0.12, "grad_norm": 1.5649086717820815, "learning_rate": 9.78081829427619e-06, "loss": 0.2426, "step": 1522 }, { "epoch": 0.12, "grad_norm": 7.472924529567563, "learning_rate": 9.780438768270992e-06, "loss": 0.6806, "step": 1523 }, { "epoch": 0.12, "grad_norm": 1.370004457147356, "learning_rate": 9.780058921340456e-06, "loss": 0.2394, "step": 1524 }, { "epoch": 0.12, "grad_norm": 6.459787777712049, "learning_rate": 9.779678753510082e-06, "loss": 0.668, "step": 1525 }, { "epoch": 0.12, "grad_norm": 1.4576166496865888, "learning_rate": 9.77929826480539e-06, "loss": 0.1966, "step": 1526 }, { "epoch": 0.12, "grad_norm": 1.4629814851551488, "learning_rate": 9.778917455251924e-06, "loss": 0.2657, "step": 1527 }, { "epoch": 0.12, "grad_norm": 1.55569845763157, "learning_rate": 9.778536324875252e-06, "loss": 0.2848, "step": 1528 }, { "epoch": 0.12, "grad_norm": 1.3684044127238737, "learning_rate": 9.778154873700956e-06, "loss": 0.2078, "step": 1529 }, { "epoch": 0.12, "grad_norm": 1.5414397128211301, "learning_rate": 9.777773101754648e-06, "loss": 0.2835, "step": 1530 }, { "epoch": 0.12, "grad_norm": 1.3758651514609745, "learning_rate": 9.777391009061954e-06, "loss": 0.208, "step": 1531 }, { "epoch": 0.12, "grad_norm": 5.9448235189303915, "learning_rate": 9.777008595648527e-06, "loss": 0.6133, "step": 1532 }, { "epoch": 0.12, "grad_norm": 1.5865145755859187, "learning_rate": 9.77662586154004e-06, "loss": 0.2833, "step": 1533 }, { "epoch": 0.12, "grad_norm": 1.5179452983165114, "learning_rate": 9.776242806762187e-06, "loss": 0.2791, "step": 1534 }, { "epoch": 0.12, "grad_norm": 1.6299484277744316, "learning_rate": 9.775859431340681e-06, "loss": 0.1934, "step": 1535 }, { "epoch": 0.12, "grad_norm": 1.4114162741359657, "learning_rate": 9.775475735301261e-06, "loss": 0.2088, "step": 1536 }, { "epoch": 0.12, "grad_norm": 1.3334542333679187, "learning_rate": 9.775091718669688e-06, "loss": 0.1901, "step": 1537 }, { "epoch": 0.12, "grad_norm": 1.5763527019236323, "learning_rate": 9.774707381471737e-06, "loss": 0.2211, "step": 1538 }, { "epoch": 0.12, "grad_norm": 1.7234397146890839, "learning_rate": 9.774322723733216e-06, "loss": 0.2823, "step": 1539 }, { "epoch": 0.12, "grad_norm": 1.6165818412590764, "learning_rate": 9.773937745479942e-06, "loss": 0.2321, "step": 1540 }, { "epoch": 0.12, "grad_norm": 1.2934389720843433, "learning_rate": 9.773552446737764e-06, "loss": 0.2037, "step": 1541 }, { "epoch": 0.12, "grad_norm": 1.4628982851816554, "learning_rate": 9.773166827532548e-06, "loss": 0.217, "step": 1542 }, { "epoch": 0.12, "grad_norm": 1.5175023983306994, "learning_rate": 9.77278088789018e-06, "loss": 0.2786, "step": 1543 }, { "epoch": 0.12, "grad_norm": 1.5691679386620183, "learning_rate": 9.77239462783657e-06, "loss": 0.2438, "step": 1544 }, { "epoch": 0.12, "grad_norm": 1.5579485036314944, "learning_rate": 9.772008047397647e-06, "loss": 0.2372, "step": 1545 }, { "epoch": 0.12, "grad_norm": 1.4464378185654634, "learning_rate": 9.771621146599369e-06, "loss": 0.1584, "step": 1546 }, { "epoch": 0.12, "grad_norm": 4.393102295164832, "learning_rate": 9.771233925467703e-06, "loss": 0.6021, "step": 1547 }, { "epoch": 0.12, "grad_norm": 1.3279935690715412, "learning_rate": 9.770846384028647e-06, "loss": 0.2526, "step": 1548 }, { "epoch": 0.12, "grad_norm": 1.3988597764851942, "learning_rate": 9.77045852230822e-06, "loss": 0.2023, "step": 1549 }, { "epoch": 0.12, "grad_norm": 1.4144040639472815, "learning_rate": 9.770070340332457e-06, "loss": 0.2144, "step": 1550 }, { "epoch": 0.12, "grad_norm": 1.861902363026179, "learning_rate": 9.769681838127421e-06, "loss": 0.2369, "step": 1551 }, { "epoch": 0.12, "grad_norm": 1.389033659250747, "learning_rate": 9.76929301571919e-06, "loss": 0.198, "step": 1552 }, { "epoch": 0.12, "grad_norm": 4.596142413144707, "learning_rate": 9.76890387313387e-06, "loss": 0.5471, "step": 1553 }, { "epoch": 0.12, "grad_norm": 5.669517148887409, "learning_rate": 9.768514410397583e-06, "loss": 0.644, "step": 1554 }, { "epoch": 0.12, "grad_norm": 1.410359637946326, "learning_rate": 9.768124627536474e-06, "loss": 0.1952, "step": 1555 }, { "epoch": 0.12, "grad_norm": 6.727014588356394, "learning_rate": 9.767734524576714e-06, "loss": 0.8061, "step": 1556 }, { "epoch": 0.12, "grad_norm": 1.4138314962122738, "learning_rate": 9.767344101544489e-06, "loss": 0.2108, "step": 1557 }, { "epoch": 0.12, "grad_norm": 1.4932571770122638, "learning_rate": 9.76695335846601e-06, "loss": 0.2282, "step": 1558 }, { "epoch": 0.12, "grad_norm": 1.6449023745881455, "learning_rate": 9.76656229536751e-06, "loss": 0.2151, "step": 1559 }, { "epoch": 0.12, "grad_norm": 1.4537766467283442, "learning_rate": 9.76617091227524e-06, "loss": 0.2191, "step": 1560 }, { "epoch": 0.12, "grad_norm": 1.571360027749933, "learning_rate": 9.765779209215474e-06, "loss": 0.1808, "step": 1561 }, { "epoch": 0.12, "grad_norm": 5.103660751294775, "learning_rate": 9.765387186214512e-06, "loss": 0.6537, "step": 1562 }, { "epoch": 0.13, "grad_norm": 1.4616768983692543, "learning_rate": 9.76499484329867e-06, "loss": 0.2342, "step": 1563 }, { "epoch": 0.13, "grad_norm": 1.3466355581617389, "learning_rate": 9.764602180494285e-06, "loss": 0.2078, "step": 1564 }, { "epoch": 0.13, "grad_norm": 1.5461759338537195, "learning_rate": 9.764209197827721e-06, "loss": 0.2502, "step": 1565 }, { "epoch": 0.13, "grad_norm": 1.714268542928138, "learning_rate": 9.763815895325357e-06, "loss": 0.2027, "step": 1566 }, { "epoch": 0.13, "grad_norm": 1.3260968907471, "learning_rate": 9.7634222730136e-06, "loss": 0.1785, "step": 1567 }, { "epoch": 0.13, "grad_norm": 5.9517203985932, "learning_rate": 9.763028330918874e-06, "loss": 0.5397, "step": 1568 }, { "epoch": 0.13, "grad_norm": 1.5405970638816762, "learning_rate": 9.762634069067622e-06, "loss": 0.2385, "step": 1569 }, { "epoch": 0.13, "grad_norm": 1.6015938995679906, "learning_rate": 9.762239487486316e-06, "loss": 0.2111, "step": 1570 }, { "epoch": 0.13, "grad_norm": 5.174633082700354, "learning_rate": 9.761844586201444e-06, "loss": 0.5955, "step": 1571 }, { "epoch": 0.13, "grad_norm": 1.5709485893560706, "learning_rate": 9.761449365239518e-06, "loss": 0.2141, "step": 1572 }, { "epoch": 0.13, "grad_norm": 3.937243962845362, "learning_rate": 9.761053824627068e-06, "loss": 0.5774, "step": 1573 }, { "epoch": 0.13, "grad_norm": 6.301274228241699, "learning_rate": 9.76065796439065e-06, "loss": 0.7388, "step": 1574 }, { "epoch": 0.13, "grad_norm": 1.382179165977071, "learning_rate": 9.76026178455684e-06, "loss": 0.1987, "step": 1575 }, { "epoch": 0.13, "grad_norm": 1.389248017784347, "learning_rate": 9.759865285152231e-06, "loss": 0.2112, "step": 1576 }, { "epoch": 0.13, "grad_norm": 1.4429928705861934, "learning_rate": 9.759468466203444e-06, "loss": 0.2001, "step": 1577 }, { "epoch": 0.13, "grad_norm": 1.5304857577376287, "learning_rate": 9.75907132773712e-06, "loss": 0.1979, "step": 1578 }, { "epoch": 0.13, "grad_norm": 1.4636827269179473, "learning_rate": 9.758673869779915e-06, "loss": 0.2474, "step": 1579 }, { "epoch": 0.13, "grad_norm": 5.679234629073408, "learning_rate": 9.758276092358518e-06, "loss": 0.6342, "step": 1580 }, { "epoch": 0.13, "grad_norm": 8.86152090252494, "learning_rate": 9.75787799549963e-06, "loss": 0.5772, "step": 1581 }, { "epoch": 0.13, "grad_norm": 1.3741318618094576, "learning_rate": 9.757479579229974e-06, "loss": 0.2698, "step": 1582 }, { "epoch": 0.13, "grad_norm": 6.8542829757183465, "learning_rate": 9.757080843576301e-06, "loss": 0.4931, "step": 1583 }, { "epoch": 0.13, "grad_norm": 1.3755555674464939, "learning_rate": 9.756681788565379e-06, "loss": 0.2309, "step": 1584 }, { "epoch": 0.13, "grad_norm": 1.4222154739746977, "learning_rate": 9.756282414223995e-06, "loss": 0.2278, "step": 1585 }, { "epoch": 0.13, "grad_norm": 1.6003354242457708, "learning_rate": 9.75588272057896e-06, "loss": 0.2788, "step": 1586 }, { "epoch": 0.13, "grad_norm": 1.6106017498133456, "learning_rate": 9.755482707657109e-06, "loss": 0.2263, "step": 1587 }, { "epoch": 0.13, "grad_norm": 1.773172038508745, "learning_rate": 9.755082375485296e-06, "loss": 0.234, "step": 1588 }, { "epoch": 0.13, "grad_norm": 1.4545373227217495, "learning_rate": 9.754681724090396e-06, "loss": 0.1929, "step": 1589 }, { "epoch": 0.13, "grad_norm": 1.609205148072724, "learning_rate": 9.754280753499306e-06, "loss": 0.2449, "step": 1590 }, { "epoch": 0.13, "grad_norm": 1.5666283507519787, "learning_rate": 9.753879463738942e-06, "loss": 0.229, "step": 1591 }, { "epoch": 0.13, "grad_norm": 1.3230971638142615, "learning_rate": 9.753477854836248e-06, "loss": 0.2137, "step": 1592 }, { "epoch": 0.13, "grad_norm": 1.5369705493136865, "learning_rate": 9.753075926818182e-06, "loss": 0.1978, "step": 1593 }, { "epoch": 0.13, "grad_norm": 1.4575961176763852, "learning_rate": 9.752673679711728e-06, "loss": 0.2461, "step": 1594 }, { "epoch": 0.13, "grad_norm": 1.6064120304746499, "learning_rate": 9.75227111354389e-06, "loss": 0.2334, "step": 1595 }, { "epoch": 0.13, "grad_norm": 1.5829676572043545, "learning_rate": 9.751868228341695e-06, "loss": 0.2469, "step": 1596 }, { "epoch": 0.13, "grad_norm": 1.543565400196061, "learning_rate": 9.751465024132184e-06, "loss": 0.2054, "step": 1597 }, { "epoch": 0.13, "grad_norm": 1.4902531190359736, "learning_rate": 9.751061500942434e-06, "loss": 0.2588, "step": 1598 }, { "epoch": 0.13, "grad_norm": 1.4464916939465176, "learning_rate": 9.750657658799528e-06, "loss": 0.2597, "step": 1599 }, { "epoch": 0.13, "grad_norm": 1.7689443638192823, "learning_rate": 9.75025349773058e-06, "loss": 0.216, "step": 1600 }, { "epoch": 0.13, "grad_norm": 1.4004095866677506, "learning_rate": 9.749849017762723e-06, "loss": 0.1793, "step": 1601 }, { "epoch": 0.13, "grad_norm": 1.546138023549135, "learning_rate": 9.749444218923108e-06, "loss": 0.233, "step": 1602 }, { "epoch": 0.13, "grad_norm": 1.456426824446879, "learning_rate": 9.749039101238914e-06, "loss": 0.1756, "step": 1603 }, { "epoch": 0.13, "grad_norm": 1.4063896715796795, "learning_rate": 9.748633664737334e-06, "loss": 0.2183, "step": 1604 }, { "epoch": 0.13, "grad_norm": 1.705206337718719, "learning_rate": 9.74822790944559e-06, "loss": 0.2464, "step": 1605 }, { "epoch": 0.13, "grad_norm": 1.4840002082776158, "learning_rate": 9.74782183539092e-06, "loss": 0.2327, "step": 1606 }, { "epoch": 0.13, "grad_norm": 1.511477359022925, "learning_rate": 9.747415442600585e-06, "loss": 0.2129, "step": 1607 }, { "epoch": 0.13, "grad_norm": 1.4895690426055241, "learning_rate": 9.747008731101865e-06, "loss": 0.2399, "step": 1608 }, { "epoch": 0.13, "grad_norm": 1.3378153449320915, "learning_rate": 9.74660170092207e-06, "loss": 0.2232, "step": 1609 }, { "epoch": 0.13, "grad_norm": 1.381222187473741, "learning_rate": 9.746194352088518e-06, "loss": 0.2227, "step": 1610 }, { "epoch": 0.13, "grad_norm": 1.660979463835364, "learning_rate": 9.74578668462856e-06, "loss": 0.2261, "step": 1611 }, { "epoch": 0.13, "grad_norm": 1.7241854923008417, "learning_rate": 9.745378698569562e-06, "loss": 0.2813, "step": 1612 }, { "epoch": 0.13, "grad_norm": 1.4115039208506461, "learning_rate": 9.744970393938915e-06, "loss": 0.2285, "step": 1613 }, { "epoch": 0.13, "grad_norm": 1.4776320597948998, "learning_rate": 9.744561770764027e-06, "loss": 0.237, "step": 1614 }, { "epoch": 0.13, "grad_norm": 1.706970959684569, "learning_rate": 9.744152829072333e-06, "loss": 0.2548, "step": 1615 }, { "epoch": 0.13, "grad_norm": 1.5499903134757889, "learning_rate": 9.743743568891287e-06, "loss": 0.2173, "step": 1616 }, { "epoch": 0.13, "grad_norm": 1.4368205494799517, "learning_rate": 9.743333990248359e-06, "loss": 0.2198, "step": 1617 }, { "epoch": 0.13, "grad_norm": 1.436406501976359, "learning_rate": 9.742924093171051e-06, "loss": 0.2456, "step": 1618 }, { "epoch": 0.13, "grad_norm": 1.3677361623959812, "learning_rate": 9.742513877686877e-06, "loss": 0.202, "step": 1619 }, { "epoch": 0.13, "grad_norm": 1.7417420260182053, "learning_rate": 9.742103343823376e-06, "loss": 0.2174, "step": 1620 }, { "epoch": 0.13, "grad_norm": 1.5239655330579545, "learning_rate": 9.741692491608112e-06, "loss": 0.2389, "step": 1621 }, { "epoch": 0.13, "grad_norm": 1.521519013274887, "learning_rate": 9.741281321068663e-06, "loss": 0.2058, "step": 1622 }, { "epoch": 0.13, "grad_norm": 1.3958180231477604, "learning_rate": 9.740869832232634e-06, "loss": 0.2353, "step": 1623 }, { "epoch": 0.13, "grad_norm": 1.4866407509368103, "learning_rate": 9.740458025127649e-06, "loss": 0.2141, "step": 1624 }, { "epoch": 0.13, "grad_norm": 1.6068765752354282, "learning_rate": 9.740045899781353e-06, "loss": 0.2858, "step": 1625 }, { "epoch": 0.13, "grad_norm": 1.601379070633747, "learning_rate": 9.739633456221415e-06, "loss": 0.2811, "step": 1626 }, { "epoch": 0.13, "grad_norm": 5.39640397491769, "learning_rate": 9.739220694475522e-06, "loss": 0.506, "step": 1627 }, { "epoch": 0.13, "grad_norm": 1.5505128105701302, "learning_rate": 9.738807614571384e-06, "loss": 0.2244, "step": 1628 }, { "epoch": 0.13, "grad_norm": 1.3832767673421447, "learning_rate": 9.738394216536733e-06, "loss": 0.2245, "step": 1629 }, { "epoch": 0.13, "grad_norm": 1.5330333737615514, "learning_rate": 9.737980500399322e-06, "loss": 0.2428, "step": 1630 }, { "epoch": 0.13, "grad_norm": 1.5955024672564626, "learning_rate": 9.737566466186922e-06, "loss": 0.2338, "step": 1631 }, { "epoch": 0.13, "grad_norm": 1.4270569297287, "learning_rate": 9.737152113927335e-06, "loss": 0.2547, "step": 1632 }, { "epoch": 0.13, "grad_norm": 1.3095426311807545, "learning_rate": 9.736737443648372e-06, "loss": 0.1955, "step": 1633 }, { "epoch": 0.13, "grad_norm": 5.528110671765845, "learning_rate": 9.73632245537787e-06, "loss": 0.5162, "step": 1634 }, { "epoch": 0.13, "grad_norm": 1.4692389088093838, "learning_rate": 9.735907149143695e-06, "loss": 0.2305, "step": 1635 }, { "epoch": 0.13, "grad_norm": 8.457093574168827, "learning_rate": 9.735491524973723e-06, "loss": 0.689, "step": 1636 }, { "epoch": 0.13, "grad_norm": 1.628693405302554, "learning_rate": 9.735075582895856e-06, "loss": 0.2378, "step": 1637 }, { "epoch": 0.13, "grad_norm": 1.4240199248006942, "learning_rate": 9.734659322938018e-06, "loss": 0.1779, "step": 1638 }, { "epoch": 0.13, "grad_norm": 1.6793055074558272, "learning_rate": 9.734242745128156e-06, "loss": 0.2467, "step": 1639 }, { "epoch": 0.13, "grad_norm": 1.432814034423324, "learning_rate": 9.733825849494232e-06, "loss": 0.2152, "step": 1640 }, { "epoch": 0.13, "grad_norm": 1.4151679170555584, "learning_rate": 9.733408636064236e-06, "loss": 0.2328, "step": 1641 }, { "epoch": 0.13, "grad_norm": 1.4883539018594545, "learning_rate": 9.732991104866179e-06, "loss": 0.1827, "step": 1642 }, { "epoch": 0.13, "grad_norm": 4.8699234101320785, "learning_rate": 9.732573255928086e-06, "loss": 0.7119, "step": 1643 }, { "epoch": 0.13, "grad_norm": 4.177567743065299, "learning_rate": 9.732155089278013e-06, "loss": 0.411, "step": 1644 }, { "epoch": 0.13, "grad_norm": 1.7471970828172905, "learning_rate": 9.731736604944031e-06, "loss": 0.245, "step": 1645 }, { "epoch": 0.13, "grad_norm": 1.7554415383245885, "learning_rate": 9.731317802954233e-06, "loss": 0.2649, "step": 1646 }, { "epoch": 0.13, "grad_norm": 1.3657823793349615, "learning_rate": 9.730898683336735e-06, "loss": 0.19, "step": 1647 }, { "epoch": 0.13, "grad_norm": 1.8227522296779513, "learning_rate": 9.730479246119677e-06, "loss": 0.2257, "step": 1648 }, { "epoch": 0.13, "grad_norm": 5.938908800728655, "learning_rate": 9.730059491331214e-06, "loss": 0.658, "step": 1649 }, { "epoch": 0.13, "grad_norm": 1.5611551436068443, "learning_rate": 9.729639418999524e-06, "loss": 0.2595, "step": 1650 }, { "epoch": 0.13, "grad_norm": 1.547685064912496, "learning_rate": 9.72921902915281e-06, "loss": 0.2508, "step": 1651 }, { "epoch": 0.13, "grad_norm": 1.5374636961496102, "learning_rate": 9.728798321819294e-06, "loss": 0.215, "step": 1652 }, { "epoch": 0.13, "grad_norm": 1.4988286417003958, "learning_rate": 9.728377297027218e-06, "loss": 0.2437, "step": 1653 }, { "epoch": 0.13, "grad_norm": 1.4196981853732336, "learning_rate": 9.727955954804848e-06, "loss": 0.2515, "step": 1654 }, { "epoch": 0.13, "grad_norm": 1.5171443314270052, "learning_rate": 9.727534295180471e-06, "loss": 0.2713, "step": 1655 }, { "epoch": 0.13, "grad_norm": 1.41126335629371, "learning_rate": 9.727112318182392e-06, "loss": 0.1994, "step": 1656 }, { "epoch": 0.13, "grad_norm": 1.4592917440724393, "learning_rate": 9.72669002383894e-06, "loss": 0.1927, "step": 1657 }, { "epoch": 0.13, "grad_norm": 1.4958176694534637, "learning_rate": 9.726267412178467e-06, "loss": 0.2435, "step": 1658 }, { "epoch": 0.13, "grad_norm": 1.3312267431402287, "learning_rate": 9.725844483229342e-06, "loss": 0.1796, "step": 1659 }, { "epoch": 0.13, "grad_norm": 5.1085467511902705, "learning_rate": 9.725421237019957e-06, "loss": 0.5534, "step": 1660 }, { "epoch": 0.13, "grad_norm": 1.5264526841901913, "learning_rate": 9.724997673578727e-06, "loss": 0.2521, "step": 1661 }, { "epoch": 0.13, "grad_norm": 1.6653291942087425, "learning_rate": 9.724573792934089e-06, "loss": 0.2472, "step": 1662 }, { "epoch": 0.13, "grad_norm": 25.64071668094937, "learning_rate": 9.724149595114496e-06, "loss": 0.5309, "step": 1663 }, { "epoch": 0.13, "grad_norm": 5.308603189700793, "learning_rate": 9.723725080148426e-06, "loss": 0.5843, "step": 1664 }, { "epoch": 0.13, "grad_norm": 1.5883693224922457, "learning_rate": 9.72330024806438e-06, "loss": 0.271, "step": 1665 }, { "epoch": 0.13, "grad_norm": 1.4579251605038241, "learning_rate": 9.722875098890878e-06, "loss": 0.1772, "step": 1666 }, { "epoch": 0.13, "grad_norm": 6.8562630052979365, "learning_rate": 9.72244963265646e-06, "loss": 0.5757, "step": 1667 }, { "epoch": 0.13, "grad_norm": 1.6744308512167032, "learning_rate": 9.72202384938969e-06, "loss": 0.2761, "step": 1668 }, { "epoch": 0.13, "grad_norm": 9.51270892421273, "learning_rate": 9.721597749119151e-06, "loss": 0.8495, "step": 1669 }, { "epoch": 0.13, "grad_norm": 1.4967390457973082, "learning_rate": 9.721171331873452e-06, "loss": 0.2468, "step": 1670 }, { "epoch": 0.13, "grad_norm": 1.2368237259695152, "learning_rate": 9.720744597681213e-06, "loss": 0.1914, "step": 1671 }, { "epoch": 0.13, "grad_norm": 1.3838599243408192, "learning_rate": 9.720317546571088e-06, "loss": 0.2095, "step": 1672 }, { "epoch": 0.13, "grad_norm": 1.6013841684581451, "learning_rate": 9.719890178571744e-06, "loss": 0.2215, "step": 1673 }, { "epoch": 0.13, "grad_norm": 1.5315634146774177, "learning_rate": 9.719462493711873e-06, "loss": 0.1962, "step": 1674 }, { "epoch": 0.13, "grad_norm": 1.5392839013199366, "learning_rate": 9.719034492020183e-06, "loss": 0.2497, "step": 1675 }, { "epoch": 0.13, "grad_norm": 1.7014341258207781, "learning_rate": 9.718606173525411e-06, "loss": 0.2738, "step": 1676 }, { "epoch": 0.13, "grad_norm": 1.3250246879392782, "learning_rate": 9.718177538256309e-06, "loss": 0.2267, "step": 1677 }, { "epoch": 0.13, "grad_norm": 1.536491524014943, "learning_rate": 9.717748586241653e-06, "loss": 0.2146, "step": 1678 }, { "epoch": 0.13, "grad_norm": 6.293486948058062, "learning_rate": 9.71731931751024e-06, "loss": 0.6151, "step": 1679 }, { "epoch": 0.13, "grad_norm": 1.556929727772488, "learning_rate": 9.71688973209089e-06, "loss": 0.2693, "step": 1680 }, { "epoch": 0.13, "grad_norm": 1.3622827255245316, "learning_rate": 9.716459830012439e-06, "loss": 0.1706, "step": 1681 }, { "epoch": 0.13, "grad_norm": 4.318877973805238, "learning_rate": 9.71602961130375e-06, "loss": 0.515, "step": 1682 }, { "epoch": 0.13, "grad_norm": 1.2336719882953078, "learning_rate": 9.715599075993705e-06, "loss": 0.2294, "step": 1683 }, { "epoch": 0.13, "grad_norm": 5.047145570888419, "learning_rate": 9.715168224111205e-06, "loss": 0.644, "step": 1684 }, { "epoch": 0.13, "grad_norm": 1.474036620858353, "learning_rate": 9.714737055685176e-06, "loss": 0.2409, "step": 1685 }, { "epoch": 0.13, "grad_norm": 1.5778056575076935, "learning_rate": 9.714305570744564e-06, "loss": 0.2329, "step": 1686 }, { "epoch": 0.13, "grad_norm": 1.2285047991412406, "learning_rate": 9.713873769318333e-06, "loss": 0.1891, "step": 1687 }, { "epoch": 0.14, "grad_norm": 1.5607622134746675, "learning_rate": 9.713441651435477e-06, "loss": 0.2421, "step": 1688 }, { "epoch": 0.14, "grad_norm": 1.4513717505441104, "learning_rate": 9.713009217125e-06, "loss": 0.2218, "step": 1689 }, { "epoch": 0.14, "grad_norm": 1.5635359649151226, "learning_rate": 9.712576466415935e-06, "loss": 0.2262, "step": 1690 }, { "epoch": 0.14, "grad_norm": 1.4433240441328847, "learning_rate": 9.712143399337333e-06, "loss": 0.2212, "step": 1691 }, { "epoch": 0.14, "grad_norm": 1.3881155947417456, "learning_rate": 9.711710015918266e-06, "loss": 0.2209, "step": 1692 }, { "epoch": 0.14, "grad_norm": 1.4800033949726952, "learning_rate": 9.71127631618783e-06, "loss": 0.1839, "step": 1693 }, { "epoch": 0.14, "grad_norm": 1.5028345570390487, "learning_rate": 9.710842300175141e-06, "loss": 0.2038, "step": 1694 }, { "epoch": 0.14, "grad_norm": 1.5009735742830224, "learning_rate": 9.710407967909336e-06, "loss": 0.2705, "step": 1695 }, { "epoch": 0.14, "grad_norm": 1.3972579493609312, "learning_rate": 9.709973319419572e-06, "loss": 0.1921, "step": 1696 }, { "epoch": 0.14, "grad_norm": 5.523936841142965, "learning_rate": 9.709538354735026e-06, "loss": 0.54, "step": 1697 }, { "epoch": 0.14, "grad_norm": 1.6991294389441396, "learning_rate": 9.709103073884905e-06, "loss": 0.2528, "step": 1698 }, { "epoch": 0.14, "grad_norm": 1.5639811472812528, "learning_rate": 9.708667476898423e-06, "loss": 0.2532, "step": 1699 }, { "epoch": 0.14, "grad_norm": 1.3540175628340287, "learning_rate": 9.708231563804828e-06, "loss": 0.1779, "step": 1700 }, { "epoch": 0.14, "grad_norm": 1.6095982351583809, "learning_rate": 9.707795334633383e-06, "loss": 0.1874, "step": 1701 }, { "epoch": 0.14, "grad_norm": 1.4729378250201564, "learning_rate": 9.707358789413373e-06, "loss": 0.216, "step": 1702 }, { "epoch": 0.14, "grad_norm": 1.5807371727896857, "learning_rate": 9.706921928174105e-06, "loss": 0.2195, "step": 1703 }, { "epoch": 0.14, "grad_norm": 1.4800662770005686, "learning_rate": 9.706484750944905e-06, "loss": 0.2513, "step": 1704 }, { "epoch": 0.14, "grad_norm": 1.6159367502238662, "learning_rate": 9.706047257755124e-06, "loss": 0.3019, "step": 1705 }, { "epoch": 0.14, "grad_norm": 1.4629890329393809, "learning_rate": 9.705609448634133e-06, "loss": 0.2201, "step": 1706 }, { "epoch": 0.14, "grad_norm": 6.225631699616758, "learning_rate": 9.705171323611322e-06, "loss": 0.5263, "step": 1707 }, { "epoch": 0.14, "grad_norm": 1.7799884269786705, "learning_rate": 9.704732882716104e-06, "loss": 0.2766, "step": 1708 }, { "epoch": 0.14, "grad_norm": 1.4597224490605745, "learning_rate": 9.704294125977912e-06, "loss": 0.2501, "step": 1709 }, { "epoch": 0.14, "grad_norm": 1.466299676143188, "learning_rate": 9.703855053426202e-06, "loss": 0.2116, "step": 1710 }, { "epoch": 0.14, "grad_norm": 1.529599025438097, "learning_rate": 9.703415665090452e-06, "loss": 0.273, "step": 1711 }, { "epoch": 0.14, "grad_norm": 1.4503870188316348, "learning_rate": 9.702975961000155e-06, "loss": 0.1992, "step": 1712 }, { "epoch": 0.14, "grad_norm": 1.4734372417685295, "learning_rate": 9.702535941184833e-06, "loss": 0.2385, "step": 1713 }, { "epoch": 0.14, "grad_norm": 1.2495034589875202, "learning_rate": 9.702095605674027e-06, "loss": 0.2228, "step": 1714 }, { "epoch": 0.14, "grad_norm": 1.2685117477059433, "learning_rate": 9.701654954497294e-06, "loss": 0.2231, "step": 1715 }, { "epoch": 0.14, "grad_norm": 1.400857054435006, "learning_rate": 9.70121398768422e-06, "loss": 0.1931, "step": 1716 }, { "epoch": 0.14, "grad_norm": 1.4933470651291618, "learning_rate": 9.700772705264405e-06, "loss": 0.2293, "step": 1717 }, { "epoch": 0.14, "grad_norm": 1.3789365329228507, "learning_rate": 9.700331107267477e-06, "loss": 0.2196, "step": 1718 }, { "epoch": 0.14, "grad_norm": 7.161258222313218, "learning_rate": 9.69988919372308e-06, "loss": 0.7293, "step": 1719 }, { "epoch": 0.14, "grad_norm": 1.5914113306653817, "learning_rate": 9.699446964660882e-06, "loss": 0.2295, "step": 1720 }, { "epoch": 0.14, "grad_norm": 3.815576091636923, "learning_rate": 9.69900442011057e-06, "loss": 0.5205, "step": 1721 }, { "epoch": 0.14, "grad_norm": 1.5653434655502219, "learning_rate": 9.698561560101853e-06, "loss": 0.2874, "step": 1722 }, { "epoch": 0.14, "grad_norm": 1.5361753284881046, "learning_rate": 9.698118384664464e-06, "loss": 0.2424, "step": 1723 }, { "epoch": 0.14, "grad_norm": 1.6057505288675846, "learning_rate": 9.69767489382815e-06, "loss": 0.2169, "step": 1724 }, { "epoch": 0.14, "grad_norm": 1.6105553864379374, "learning_rate": 9.697231087622691e-06, "loss": 0.2481, "step": 1725 }, { "epoch": 0.14, "grad_norm": 1.4405817181475413, "learning_rate": 9.696786966077875e-06, "loss": 0.2457, "step": 1726 }, { "epoch": 0.14, "grad_norm": 1.4830495415977467, "learning_rate": 9.69634252922352e-06, "loss": 0.2373, "step": 1727 }, { "epoch": 0.14, "grad_norm": 1.4006478621622342, "learning_rate": 9.69589777708946e-06, "loss": 0.2405, "step": 1728 }, { "epoch": 0.14, "grad_norm": 1.4885861658830843, "learning_rate": 9.695452709705555e-06, "loss": 0.2203, "step": 1729 }, { "epoch": 0.14, "grad_norm": 1.3404527013588188, "learning_rate": 9.695007327101685e-06, "loss": 0.2169, "step": 1730 }, { "epoch": 0.14, "grad_norm": 1.4890546172742003, "learning_rate": 9.694561629307745e-06, "loss": 0.2582, "step": 1731 }, { "epoch": 0.14, "grad_norm": 1.6100396888615591, "learning_rate": 9.694115616353662e-06, "loss": 0.2302, "step": 1732 }, { "epoch": 0.14, "grad_norm": 4.946664602535032, "learning_rate": 9.693669288269371e-06, "loss": 0.6134, "step": 1733 }, { "epoch": 0.14, "grad_norm": 1.3256763124198974, "learning_rate": 9.69322264508484e-06, "loss": 0.2222, "step": 1734 }, { "epoch": 0.14, "grad_norm": 1.61350332273475, "learning_rate": 9.692775686830057e-06, "loss": 0.2356, "step": 1735 }, { "epoch": 0.14, "grad_norm": 1.5383885625029967, "learning_rate": 9.69232841353502e-06, "loss": 0.2372, "step": 1736 }, { "epoch": 0.14, "grad_norm": 1.5613793907693219, "learning_rate": 9.69188082522976e-06, "loss": 0.2467, "step": 1737 }, { "epoch": 0.14, "grad_norm": 1.341786850136851, "learning_rate": 9.691432921944325e-06, "loss": 0.2541, "step": 1738 }, { "epoch": 0.14, "grad_norm": 1.5164759420816685, "learning_rate": 9.690984703708783e-06, "loss": 0.2234, "step": 1739 }, { "epoch": 0.14, "grad_norm": 1.3333153286507795, "learning_rate": 9.690536170553226e-06, "loss": 0.1712, "step": 1740 }, { "epoch": 0.14, "grad_norm": 1.2479864033253565, "learning_rate": 9.690087322507763e-06, "loss": 0.2337, "step": 1741 }, { "epoch": 0.14, "grad_norm": 1.4575191194921926, "learning_rate": 9.689638159602527e-06, "loss": 0.2519, "step": 1742 }, { "epoch": 0.14, "grad_norm": 1.608497112628641, "learning_rate": 9.689188681867675e-06, "loss": 0.2299, "step": 1743 }, { "epoch": 0.14, "grad_norm": 1.3864261547311492, "learning_rate": 9.688738889333376e-06, "loss": 0.2051, "step": 1744 }, { "epoch": 0.14, "grad_norm": 1.1792271922396842, "learning_rate": 9.68828878202983e-06, "loss": 0.1767, "step": 1745 }, { "epoch": 0.14, "grad_norm": 1.4219191134336626, "learning_rate": 9.687838359987254e-06, "loss": 0.2431, "step": 1746 }, { "epoch": 0.14, "grad_norm": 1.4753799795627052, "learning_rate": 9.687387623235885e-06, "loss": 0.2374, "step": 1747 }, { "epoch": 0.14, "grad_norm": 1.3470168408588994, "learning_rate": 9.686936571805982e-06, "loss": 0.1749, "step": 1748 }, { "epoch": 0.14, "grad_norm": 1.4509181163443456, "learning_rate": 9.686485205727827e-06, "loss": 0.1974, "step": 1749 }, { "epoch": 0.14, "grad_norm": 1.4787658377822652, "learning_rate": 9.68603352503172e-06, "loss": 0.2642, "step": 1750 }, { "epoch": 0.14, "grad_norm": 1.3933065630299974, "learning_rate": 9.685581529747982e-06, "loss": 0.2168, "step": 1751 }, { "epoch": 0.14, "grad_norm": 1.5075548508439822, "learning_rate": 9.685129219906964e-06, "loss": 0.2385, "step": 1752 }, { "epoch": 0.14, "grad_norm": 1.4706416043161044, "learning_rate": 9.684676595539023e-06, "loss": 0.2406, "step": 1753 }, { "epoch": 0.14, "grad_norm": 1.6457101424970801, "learning_rate": 9.684223656674548e-06, "loss": 0.2719, "step": 1754 }, { "epoch": 0.14, "grad_norm": 1.6129606462913444, "learning_rate": 9.683770403343947e-06, "loss": 0.1874, "step": 1755 }, { "epoch": 0.14, "grad_norm": 6.874525838877892, "learning_rate": 9.683316835577648e-06, "loss": 0.6454, "step": 1756 }, { "epoch": 0.14, "grad_norm": 1.533704318351654, "learning_rate": 9.6828629534061e-06, "loss": 0.2124, "step": 1757 }, { "epoch": 0.14, "grad_norm": 1.4424987723684324, "learning_rate": 9.682408756859772e-06, "loss": 0.2383, "step": 1758 }, { "epoch": 0.14, "grad_norm": 1.2855393696998263, "learning_rate": 9.681954245969158e-06, "loss": 0.2333, "step": 1759 }, { "epoch": 0.14, "grad_norm": 5.74254894544208, "learning_rate": 9.681499420764771e-06, "loss": 0.7153, "step": 1760 }, { "epoch": 0.14, "grad_norm": 1.5713553570242775, "learning_rate": 9.681044281277141e-06, "loss": 0.229, "step": 1761 }, { "epoch": 0.14, "grad_norm": 1.587260698866443, "learning_rate": 9.680588827536828e-06, "loss": 0.2201, "step": 1762 }, { "epoch": 0.14, "grad_norm": 1.5569514938301812, "learning_rate": 9.680133059574403e-06, "loss": 0.2452, "step": 1763 }, { "epoch": 0.14, "grad_norm": 1.454284750554401, "learning_rate": 9.679676977420467e-06, "loss": 0.2105, "step": 1764 }, { "epoch": 0.14, "grad_norm": 5.801300159525248, "learning_rate": 9.679220581105636e-06, "loss": 0.5018, "step": 1765 }, { "epoch": 0.14, "grad_norm": 1.364627286606356, "learning_rate": 9.67876387066055e-06, "loss": 0.2129, "step": 1766 }, { "epoch": 0.14, "grad_norm": 8.018976042950309, "learning_rate": 9.67830684611587e-06, "loss": 0.6771, "step": 1767 }, { "epoch": 0.14, "grad_norm": 1.447175655512617, "learning_rate": 9.677849507502275e-06, "loss": 0.2346, "step": 1768 }, { "epoch": 0.14, "grad_norm": 1.3158108160667845, "learning_rate": 9.67739185485047e-06, "loss": 0.2065, "step": 1769 }, { "epoch": 0.14, "grad_norm": 1.476034659343413, "learning_rate": 9.676933888191178e-06, "loss": 0.2854, "step": 1770 }, { "epoch": 0.14, "grad_norm": 1.3606615645319498, "learning_rate": 9.676475607555145e-06, "loss": 0.2547, "step": 1771 }, { "epoch": 0.14, "grad_norm": 1.3675545766153718, "learning_rate": 9.676017012973133e-06, "loss": 0.2279, "step": 1772 }, { "epoch": 0.14, "grad_norm": 5.727456697665223, "learning_rate": 9.675558104475933e-06, "loss": 0.6477, "step": 1773 }, { "epoch": 0.14, "grad_norm": 1.515171015126078, "learning_rate": 9.67509888209435e-06, "loss": 0.3094, "step": 1774 }, { "epoch": 0.14, "grad_norm": 1.5394367436950924, "learning_rate": 9.674639345859213e-06, "loss": 0.2638, "step": 1775 }, { "epoch": 0.14, "grad_norm": 1.5962039124052536, "learning_rate": 9.674179495801375e-06, "loss": 0.2064, "step": 1776 }, { "epoch": 0.14, "grad_norm": 4.240197954443099, "learning_rate": 9.673719331951706e-06, "loss": 0.4859, "step": 1777 }, { "epoch": 0.14, "grad_norm": 1.656502508423278, "learning_rate": 9.673258854341094e-06, "loss": 0.2062, "step": 1778 }, { "epoch": 0.14, "grad_norm": 1.552797027294706, "learning_rate": 9.672798063000458e-06, "loss": 0.2396, "step": 1779 }, { "epoch": 0.14, "grad_norm": 1.3931331279779118, "learning_rate": 9.67233695796073e-06, "loss": 0.2041, "step": 1780 }, { "epoch": 0.14, "grad_norm": 1.4568924059894364, "learning_rate": 9.671875539252865e-06, "loss": 0.2392, "step": 1781 }, { "epoch": 0.14, "grad_norm": 1.4802489232587634, "learning_rate": 9.67141380690784e-06, "loss": 0.1852, "step": 1782 }, { "epoch": 0.14, "grad_norm": 5.639534657927216, "learning_rate": 9.670951760956653e-06, "loss": 0.6627, "step": 1783 }, { "epoch": 0.14, "grad_norm": 1.3268191120135426, "learning_rate": 9.670489401430322e-06, "loss": 0.1699, "step": 1784 }, { "epoch": 0.14, "grad_norm": 7.282712151151691, "learning_rate": 9.670026728359884e-06, "loss": 0.6576, "step": 1785 }, { "epoch": 0.14, "grad_norm": 1.4822379096998657, "learning_rate": 9.669563741776405e-06, "loss": 0.252, "step": 1786 }, { "epoch": 0.14, "grad_norm": 1.4795322220388172, "learning_rate": 9.669100441710962e-06, "loss": 0.2574, "step": 1787 }, { "epoch": 0.14, "grad_norm": 1.6200600945979102, "learning_rate": 9.66863682819466e-06, "loss": 0.2862, "step": 1788 }, { "epoch": 0.14, "grad_norm": 1.4324867009525744, "learning_rate": 9.668172901258623e-06, "loss": 0.2259, "step": 1789 }, { "epoch": 0.14, "grad_norm": 1.5968672444934398, "learning_rate": 9.667708660933994e-06, "loss": 0.2563, "step": 1790 }, { "epoch": 0.14, "grad_norm": 1.3424385409778248, "learning_rate": 9.66724410725194e-06, "loss": 0.2113, "step": 1791 }, { "epoch": 0.14, "grad_norm": 1.5133813264514135, "learning_rate": 9.66677924024365e-06, "loss": 0.2227, "step": 1792 }, { "epoch": 0.14, "grad_norm": 4.935970297876555, "learning_rate": 9.666314059940326e-06, "loss": 0.6504, "step": 1793 }, { "epoch": 0.14, "grad_norm": 1.4143739506100628, "learning_rate": 9.665848566373204e-06, "loss": 0.2215, "step": 1794 }, { "epoch": 0.14, "grad_norm": 1.3364310700549318, "learning_rate": 9.665382759573529e-06, "loss": 0.2022, "step": 1795 }, { "epoch": 0.14, "grad_norm": 1.3541510201119917, "learning_rate": 9.664916639572574e-06, "loss": 0.2016, "step": 1796 }, { "epoch": 0.14, "grad_norm": 1.4388248897716867, "learning_rate": 9.664450206401633e-06, "loss": 0.2302, "step": 1797 }, { "epoch": 0.14, "grad_norm": 1.3651287386259812, "learning_rate": 9.663983460092015e-06, "loss": 0.2042, "step": 1798 }, { "epoch": 0.14, "grad_norm": 1.2350813275361885, "learning_rate": 9.663516400675057e-06, "loss": 0.1715, "step": 1799 }, { "epoch": 0.14, "grad_norm": 1.4219278792127619, "learning_rate": 9.663049028182112e-06, "loss": 0.1833, "step": 1800 }, { "epoch": 0.14, "grad_norm": 1.5984418880959148, "learning_rate": 9.662581342644557e-06, "loss": 0.2481, "step": 1801 }, { "epoch": 0.14, "grad_norm": 1.5474821964505603, "learning_rate": 9.662113344093791e-06, "loss": 0.2505, "step": 1802 }, { "epoch": 0.14, "grad_norm": 1.440962164400899, "learning_rate": 9.66164503256123e-06, "loss": 0.2435, "step": 1803 }, { "epoch": 0.14, "grad_norm": 1.576455989915794, "learning_rate": 9.661176408078315e-06, "loss": 0.2246, "step": 1804 }, { "epoch": 0.14, "grad_norm": 4.414412901157168, "learning_rate": 9.660707470676503e-06, "loss": 0.5895, "step": 1805 }, { "epoch": 0.14, "grad_norm": 1.5228796606428752, "learning_rate": 9.660238220387277e-06, "loss": 0.2213, "step": 1806 }, { "epoch": 0.14, "grad_norm": 1.4916439094235168, "learning_rate": 9.659768657242138e-06, "loss": 0.2333, "step": 1807 }, { "epoch": 0.14, "grad_norm": 4.273099535753963, "learning_rate": 9.659298781272615e-06, "loss": 0.6208, "step": 1808 }, { "epoch": 0.14, "grad_norm": 1.845409289409724, "learning_rate": 9.658828592510243e-06, "loss": 0.2603, "step": 1809 }, { "epoch": 0.14, "grad_norm": 1.4958124122288672, "learning_rate": 9.658358090986594e-06, "loss": 0.2757, "step": 1810 }, { "epoch": 0.14, "grad_norm": 1.4821414328281577, "learning_rate": 9.657887276733254e-06, "loss": 0.2513, "step": 1811 }, { "epoch": 0.14, "grad_norm": 1.4463573145481676, "learning_rate": 9.657416149781826e-06, "loss": 0.2355, "step": 1812 }, { "epoch": 0.15, "grad_norm": 1.3962703248241843, "learning_rate": 9.65694471016394e-06, "loss": 0.185, "step": 1813 }, { "epoch": 0.15, "grad_norm": 1.5092840818926048, "learning_rate": 9.656472957911247e-06, "loss": 0.2272, "step": 1814 }, { "epoch": 0.15, "grad_norm": 1.5675223724328375, "learning_rate": 9.656000893055416e-06, "loss": 0.2116, "step": 1815 }, { "epoch": 0.15, "grad_norm": 1.264692402207976, "learning_rate": 9.655528515628136e-06, "loss": 0.1975, "step": 1816 }, { "epoch": 0.15, "grad_norm": 25.45217738128297, "learning_rate": 9.655055825661122e-06, "loss": 0.7673, "step": 1817 }, { "epoch": 0.15, "grad_norm": 1.4165821059514272, "learning_rate": 9.654582823186107e-06, "loss": 0.238, "step": 1818 }, { "epoch": 0.15, "grad_norm": 1.5698541977874627, "learning_rate": 9.654109508234843e-06, "loss": 0.2435, "step": 1819 }, { "epoch": 0.15, "grad_norm": 1.6561874069663314, "learning_rate": 9.653635880839107e-06, "loss": 0.2296, "step": 1820 }, { "epoch": 0.15, "grad_norm": 1.7544145649718108, "learning_rate": 9.653161941030695e-06, "loss": 0.3023, "step": 1821 }, { "epoch": 0.15, "grad_norm": 1.6215254166246602, "learning_rate": 9.652687688841422e-06, "loss": 0.2399, "step": 1822 }, { "epoch": 0.15, "grad_norm": 1.529883773675546, "learning_rate": 9.652213124303126e-06, "loss": 0.2131, "step": 1823 }, { "epoch": 0.15, "grad_norm": 1.4077640443300024, "learning_rate": 9.65173824744767e-06, "loss": 0.2164, "step": 1824 }, { "epoch": 0.15, "grad_norm": 5.950152782950908, "learning_rate": 9.651263058306932e-06, "loss": 0.5494, "step": 1825 }, { "epoch": 0.15, "grad_norm": 1.4457866648370847, "learning_rate": 9.650787556912811e-06, "loss": 0.2144, "step": 1826 }, { "epoch": 0.15, "grad_norm": 1.4620074494844246, "learning_rate": 9.650311743297229e-06, "loss": 0.2424, "step": 1827 }, { "epoch": 0.15, "grad_norm": 5.382121137459981, "learning_rate": 9.64983561749213e-06, "loss": 0.6872, "step": 1828 }, { "epoch": 0.15, "grad_norm": 10.883283566820824, "learning_rate": 9.649359179529477e-06, "loss": 0.6622, "step": 1829 }, { "epoch": 0.15, "grad_norm": 1.3915846225953552, "learning_rate": 9.648882429441258e-06, "loss": 0.2305, "step": 1830 }, { "epoch": 0.15, "grad_norm": 1.3348083555934644, "learning_rate": 9.648405367259475e-06, "loss": 0.2074, "step": 1831 }, { "epoch": 0.15, "grad_norm": 1.3593364059817745, "learning_rate": 9.647927993016154e-06, "loss": 0.1948, "step": 1832 }, { "epoch": 0.15, "grad_norm": 1.283341574165253, "learning_rate": 9.647450306743345e-06, "loss": 0.1597, "step": 1833 }, { "epoch": 0.15, "grad_norm": 1.424624696304144, "learning_rate": 9.646972308473115e-06, "loss": 0.2212, "step": 1834 }, { "epoch": 0.15, "grad_norm": 1.4161912210982865, "learning_rate": 9.646493998237557e-06, "loss": 0.2152, "step": 1835 }, { "epoch": 0.15, "grad_norm": 1.528444093794733, "learning_rate": 9.646015376068776e-06, "loss": 0.2639, "step": 1836 }, { "epoch": 0.15, "grad_norm": 1.4918759334076668, "learning_rate": 9.645536441998907e-06, "loss": 0.294, "step": 1837 }, { "epoch": 0.15, "grad_norm": 1.6963649947054689, "learning_rate": 9.6450571960601e-06, "loss": 0.2598, "step": 1838 }, { "epoch": 0.15, "grad_norm": 1.358260605009517, "learning_rate": 9.64457763828453e-06, "loss": 0.2321, "step": 1839 }, { "epoch": 0.15, "grad_norm": 1.4477344401864383, "learning_rate": 9.64409776870439e-06, "loss": 0.2039, "step": 1840 }, { "epoch": 0.15, "grad_norm": 1.412542886639437, "learning_rate": 9.643617587351897e-06, "loss": 0.2201, "step": 1841 }, { "epoch": 0.15, "grad_norm": 1.4171031095198496, "learning_rate": 9.643137094259285e-06, "loss": 0.2512, "step": 1842 }, { "epoch": 0.15, "grad_norm": 1.474123267364693, "learning_rate": 9.642656289458812e-06, "loss": 0.2183, "step": 1843 }, { "epoch": 0.15, "grad_norm": 6.002951251146631, "learning_rate": 9.642175172982755e-06, "loss": 0.4185, "step": 1844 }, { "epoch": 0.15, "grad_norm": 1.4805643562694681, "learning_rate": 9.641693744863413e-06, "loss": 0.209, "step": 1845 }, { "epoch": 0.15, "grad_norm": 1.6013647707949583, "learning_rate": 9.641212005133107e-06, "loss": 0.2495, "step": 1846 }, { "epoch": 0.15, "grad_norm": 6.204465097782585, "learning_rate": 9.640729953824178e-06, "loss": 0.6387, "step": 1847 }, { "epoch": 0.15, "grad_norm": 8.469859351823903, "learning_rate": 9.640247590968985e-06, "loss": 0.602, "step": 1848 }, { "epoch": 0.15, "grad_norm": 1.4040876612257354, "learning_rate": 9.639764916599913e-06, "loss": 0.2249, "step": 1849 }, { "epoch": 0.15, "grad_norm": 1.4923479942416529, "learning_rate": 9.639281930749363e-06, "loss": 0.2602, "step": 1850 }, { "epoch": 0.15, "grad_norm": 1.3751367266363022, "learning_rate": 9.63879863344976e-06, "loss": 0.2317, "step": 1851 }, { "epoch": 0.15, "grad_norm": 1.4436553306092534, "learning_rate": 9.638315024733552e-06, "loss": 0.2361, "step": 1852 }, { "epoch": 0.15, "grad_norm": 1.5383488585043816, "learning_rate": 9.6378311046332e-06, "loss": 0.2462, "step": 1853 }, { "epoch": 0.15, "grad_norm": 1.4334003279016496, "learning_rate": 9.637346873181194e-06, "loss": 0.2143, "step": 1854 }, { "epoch": 0.15, "grad_norm": 1.1673812802913086, "learning_rate": 9.636862330410043e-06, "loss": 0.1625, "step": 1855 }, { "epoch": 0.15, "grad_norm": 1.58793702787862, "learning_rate": 9.636377476352277e-06, "loss": 0.2532, "step": 1856 }, { "epoch": 0.15, "grad_norm": 1.3925343785715028, "learning_rate": 9.63589231104044e-06, "loss": 0.2634, "step": 1857 }, { "epoch": 0.15, "grad_norm": 1.42520859198846, "learning_rate": 9.635406834507108e-06, "loss": 0.2289, "step": 1858 }, { "epoch": 0.15, "grad_norm": 1.5713187342892687, "learning_rate": 9.63492104678487e-06, "loss": 0.1898, "step": 1859 }, { "epoch": 0.15, "grad_norm": 1.5081517942789442, "learning_rate": 9.634434947906337e-06, "loss": 0.2367, "step": 1860 }, { "epoch": 0.15, "grad_norm": 1.5185103167867047, "learning_rate": 9.633948537904145e-06, "loss": 0.2268, "step": 1861 }, { "epoch": 0.15, "grad_norm": 1.8004943209709992, "learning_rate": 9.633461816810949e-06, "loss": 0.2933, "step": 1862 }, { "epoch": 0.15, "grad_norm": 1.5849704524915296, "learning_rate": 9.632974784659421e-06, "loss": 0.2445, "step": 1863 }, { "epoch": 0.15, "grad_norm": 1.4691035787116464, "learning_rate": 9.632487441482258e-06, "loss": 0.3007, "step": 1864 }, { "epoch": 0.15, "grad_norm": 1.4209399595229988, "learning_rate": 9.631999787312179e-06, "loss": 0.2427, "step": 1865 }, { "epoch": 0.15, "grad_norm": 1.4991589331775699, "learning_rate": 9.631511822181918e-06, "loss": 0.2006, "step": 1866 }, { "epoch": 0.15, "grad_norm": 1.5548685053767215, "learning_rate": 9.631023546124236e-06, "loss": 0.2207, "step": 1867 }, { "epoch": 0.15, "grad_norm": 1.5006229270384637, "learning_rate": 9.630534959171912e-06, "loss": 0.2339, "step": 1868 }, { "epoch": 0.15, "grad_norm": 1.4997565938429933, "learning_rate": 9.630046061357745e-06, "loss": 0.2284, "step": 1869 }, { "epoch": 0.15, "grad_norm": 1.4548194027415642, "learning_rate": 9.62955685271456e-06, "loss": 0.2252, "step": 1870 }, { "epoch": 0.15, "grad_norm": 1.3836536278467995, "learning_rate": 9.629067333275195e-06, "loss": 0.1946, "step": 1871 }, { "epoch": 0.15, "grad_norm": 1.3601950686464495, "learning_rate": 9.628577503072513e-06, "loss": 0.2329, "step": 1872 }, { "epoch": 0.15, "grad_norm": 1.3874831891846593, "learning_rate": 9.628087362139402e-06, "loss": 0.2025, "step": 1873 }, { "epoch": 0.15, "grad_norm": 1.4435317874573304, "learning_rate": 9.627596910508763e-06, "loss": 0.1901, "step": 1874 }, { "epoch": 0.15, "grad_norm": 1.6043322653590923, "learning_rate": 9.627106148213521e-06, "loss": 0.2372, "step": 1875 }, { "epoch": 0.15, "grad_norm": 1.439157766200496, "learning_rate": 9.626615075286626e-06, "loss": 0.1894, "step": 1876 }, { "epoch": 0.15, "grad_norm": 1.5111564304011444, "learning_rate": 9.62612369176104e-06, "loss": 0.2367, "step": 1877 }, { "epoch": 0.15, "grad_norm": 1.7662817959135402, "learning_rate": 9.625631997669757e-06, "loss": 0.2176, "step": 1878 }, { "epoch": 0.15, "grad_norm": 1.648335523316646, "learning_rate": 9.62513999304578e-06, "loss": 0.2244, "step": 1879 }, { "epoch": 0.15, "grad_norm": 1.3874137180612482, "learning_rate": 9.624647677922143e-06, "loss": 0.1973, "step": 1880 }, { "epoch": 0.15, "grad_norm": 1.567280988811608, "learning_rate": 9.624155052331896e-06, "loss": 0.2375, "step": 1881 }, { "epoch": 0.15, "grad_norm": 1.5353112251887062, "learning_rate": 9.623662116308108e-06, "loss": 0.2741, "step": 1882 }, { "epoch": 0.15, "grad_norm": 1.5896201454120966, "learning_rate": 9.623168869883874e-06, "loss": 0.2532, "step": 1883 }, { "epoch": 0.15, "grad_norm": 1.436006516667036, "learning_rate": 9.622675313092307e-06, "loss": 0.2445, "step": 1884 }, { "epoch": 0.15, "grad_norm": 1.6584035529200605, "learning_rate": 9.622181445966539e-06, "loss": 0.2372, "step": 1885 }, { "epoch": 0.15, "grad_norm": 1.3677471570809905, "learning_rate": 9.621687268539725e-06, "loss": 0.1718, "step": 1886 }, { "epoch": 0.15, "grad_norm": 1.532037447855381, "learning_rate": 9.621192780845044e-06, "loss": 0.2068, "step": 1887 }, { "epoch": 0.15, "grad_norm": 1.5706662625356325, "learning_rate": 9.620697982915688e-06, "loss": 0.238, "step": 1888 }, { "epoch": 0.15, "grad_norm": 1.584125466194905, "learning_rate": 9.620202874784878e-06, "loss": 0.2652, "step": 1889 }, { "epoch": 0.15, "grad_norm": 1.5165891746076328, "learning_rate": 9.619707456485848e-06, "loss": 0.2173, "step": 1890 }, { "epoch": 0.15, "grad_norm": 1.3596238825379194, "learning_rate": 9.61921172805186e-06, "loss": 0.1858, "step": 1891 }, { "epoch": 0.15, "grad_norm": 1.3782633084287081, "learning_rate": 9.618715689516194e-06, "loss": 0.2025, "step": 1892 }, { "epoch": 0.15, "grad_norm": 1.6404819007168323, "learning_rate": 9.61821934091215e-06, "loss": 0.2759, "step": 1893 }, { "epoch": 0.15, "grad_norm": 1.4671685267446017, "learning_rate": 9.617722682273048e-06, "loss": 0.2588, "step": 1894 }, { "epoch": 0.15, "grad_norm": 1.802647054447075, "learning_rate": 9.61722571363223e-06, "loss": 0.2539, "step": 1895 }, { "epoch": 0.15, "grad_norm": 1.3088073009265881, "learning_rate": 9.616728435023061e-06, "loss": 0.2152, "step": 1896 }, { "epoch": 0.15, "grad_norm": 1.7437712948100854, "learning_rate": 9.616230846478925e-06, "loss": 0.2605, "step": 1897 }, { "epoch": 0.15, "grad_norm": 1.552820013193392, "learning_rate": 9.615732948033225e-06, "loss": 0.2466, "step": 1898 }, { "epoch": 0.15, "grad_norm": 1.3554777950492651, "learning_rate": 9.615234739719387e-06, "loss": 0.2383, "step": 1899 }, { "epoch": 0.15, "grad_norm": 1.4223252507208866, "learning_rate": 9.61473622157086e-06, "loss": 0.2225, "step": 1900 }, { "epoch": 0.15, "grad_norm": 1.2953329567490843, "learning_rate": 9.614237393621104e-06, "loss": 0.2114, "step": 1901 }, { "epoch": 0.15, "grad_norm": 1.5240746242103045, "learning_rate": 9.613738255903613e-06, "loss": 0.2022, "step": 1902 }, { "epoch": 0.15, "grad_norm": 1.4085098899521087, "learning_rate": 9.613238808451894e-06, "loss": 0.2233, "step": 1903 }, { "epoch": 0.15, "grad_norm": 1.5415613357623623, "learning_rate": 9.612739051299477e-06, "loss": 0.224, "step": 1904 }, { "epoch": 0.15, "grad_norm": 1.3701432841120822, "learning_rate": 9.61223898447991e-06, "loss": 0.2147, "step": 1905 }, { "epoch": 0.15, "grad_norm": 4.141515132267844, "learning_rate": 9.611738608026765e-06, "loss": 0.5456, "step": 1906 }, { "epoch": 0.15, "grad_norm": 4.94252467747008, "learning_rate": 9.611237921973637e-06, "loss": 0.5873, "step": 1907 }, { "epoch": 0.15, "grad_norm": 1.5175182247666472, "learning_rate": 9.610736926354133e-06, "loss": 0.2015, "step": 1908 }, { "epoch": 0.15, "grad_norm": 1.6791296685318229, "learning_rate": 9.61023562120189e-06, "loss": 0.2576, "step": 1909 }, { "epoch": 0.15, "grad_norm": 1.540540676075431, "learning_rate": 9.609734006550562e-06, "loss": 0.2803, "step": 1910 }, { "epoch": 0.15, "grad_norm": 1.3932638652674971, "learning_rate": 9.609232082433824e-06, "loss": 0.2491, "step": 1911 }, { "epoch": 0.15, "grad_norm": 5.0673565846479125, "learning_rate": 9.608729848885369e-06, "loss": 0.5702, "step": 1912 }, { "epoch": 0.15, "grad_norm": 1.4628770278104222, "learning_rate": 9.608227305938915e-06, "loss": 0.2505, "step": 1913 }, { "epoch": 0.15, "grad_norm": 1.4556220972378124, "learning_rate": 9.6077244536282e-06, "loss": 0.2165, "step": 1914 }, { "epoch": 0.15, "grad_norm": 1.3776072477629275, "learning_rate": 9.607221291986983e-06, "loss": 0.2051, "step": 1915 }, { "epoch": 0.15, "grad_norm": 1.6109416961035818, "learning_rate": 9.606717821049042e-06, "loss": 0.2318, "step": 1916 }, { "epoch": 0.15, "grad_norm": 6.564011078240842, "learning_rate": 9.606214040848174e-06, "loss": 0.6679, "step": 1917 }, { "epoch": 0.15, "grad_norm": 1.4734363571316462, "learning_rate": 9.605709951418201e-06, "loss": 0.2191, "step": 1918 }, { "epoch": 0.15, "grad_norm": 1.415947605045889, "learning_rate": 9.605205552792964e-06, "loss": 0.2592, "step": 1919 }, { "epoch": 0.15, "grad_norm": 1.3553576209548763, "learning_rate": 9.604700845006326e-06, "loss": 0.1879, "step": 1920 }, { "epoch": 0.15, "grad_norm": 7.770096408550739, "learning_rate": 9.604195828092169e-06, "loss": 0.6087, "step": 1921 }, { "epoch": 0.15, "grad_norm": 1.436176122355819, "learning_rate": 9.603690502084396e-06, "loss": 0.2166, "step": 1922 }, { "epoch": 0.15, "grad_norm": 1.532933079910172, "learning_rate": 9.603184867016929e-06, "loss": 0.2502, "step": 1923 }, { "epoch": 0.15, "grad_norm": 1.6724162619718177, "learning_rate": 9.602678922923716e-06, "loss": 0.2565, "step": 1924 }, { "epoch": 0.15, "grad_norm": 1.4617272488796216, "learning_rate": 9.602172669838721e-06, "loss": 0.175, "step": 1925 }, { "epoch": 0.15, "grad_norm": 1.4826564657118184, "learning_rate": 9.60166610779593e-06, "loss": 0.2258, "step": 1926 }, { "epoch": 0.15, "grad_norm": 1.5029089606156945, "learning_rate": 9.601159236829353e-06, "loss": 0.2181, "step": 1927 }, { "epoch": 0.15, "grad_norm": 1.3315757135444566, "learning_rate": 9.600652056973013e-06, "loss": 0.2024, "step": 1928 }, { "epoch": 0.15, "grad_norm": 1.7744990086828667, "learning_rate": 9.600144568260962e-06, "loss": 0.205, "step": 1929 }, { "epoch": 0.15, "grad_norm": 1.464807782821732, "learning_rate": 9.59963677072727e-06, "loss": 0.2625, "step": 1930 }, { "epoch": 0.15, "grad_norm": 1.4557584757300288, "learning_rate": 9.599128664406023e-06, "loss": 0.2229, "step": 1931 }, { "epoch": 0.15, "grad_norm": 20.049400808877856, "learning_rate": 9.598620249331334e-06, "loss": 0.5854, "step": 1932 }, { "epoch": 0.15, "grad_norm": 1.5335096183442203, "learning_rate": 9.598111525537336e-06, "loss": 0.2668, "step": 1933 }, { "epoch": 0.15, "grad_norm": 1.728029813280104, "learning_rate": 9.597602493058178e-06, "loss": 0.2158, "step": 1934 }, { "epoch": 0.15, "grad_norm": 1.2578209304611148, "learning_rate": 9.597093151928035e-06, "loss": 0.1605, "step": 1935 }, { "epoch": 0.15, "grad_norm": 5.445192653172929, "learning_rate": 9.5965835021811e-06, "loss": 0.6865, "step": 1936 }, { "epoch": 0.15, "grad_norm": 5.155428312150652, "learning_rate": 9.596073543851587e-06, "loss": 0.7152, "step": 1937 }, { "epoch": 0.16, "grad_norm": 1.2743830337819548, "learning_rate": 9.595563276973732e-06, "loss": 0.166, "step": 1938 }, { "epoch": 0.16, "grad_norm": 1.4978080161236704, "learning_rate": 9.59505270158179e-06, "loss": 0.2852, "step": 1939 }, { "epoch": 0.16, "grad_norm": 1.2297690136207637, "learning_rate": 9.594541817710039e-06, "loss": 0.1969, "step": 1940 }, { "epoch": 0.16, "grad_norm": 1.5917862593006424, "learning_rate": 9.594030625392772e-06, "loss": 0.2255, "step": 1941 }, { "epoch": 0.16, "grad_norm": 1.4976831586923893, "learning_rate": 9.593519124664313e-06, "loss": 0.1637, "step": 1942 }, { "epoch": 0.16, "grad_norm": 1.4850846003247933, "learning_rate": 9.593007315558996e-06, "loss": 0.2559, "step": 1943 }, { "epoch": 0.16, "grad_norm": 1.497901466411517, "learning_rate": 9.59249519811118e-06, "loss": 0.2143, "step": 1944 }, { "epoch": 0.16, "grad_norm": 1.8133958958486953, "learning_rate": 9.591982772355248e-06, "loss": 0.2308, "step": 1945 }, { "epoch": 0.16, "grad_norm": 1.5158881167945921, "learning_rate": 9.591470038325599e-06, "loss": 0.2385, "step": 1946 }, { "epoch": 0.16, "grad_norm": 1.5931407945641844, "learning_rate": 9.590956996056656e-06, "loss": 0.2045, "step": 1947 }, { "epoch": 0.16, "grad_norm": 11.875339324321946, "learning_rate": 9.590443645582859e-06, "loss": 0.575, "step": 1948 }, { "epoch": 0.16, "grad_norm": 4.413503531708616, "learning_rate": 9.58992998693867e-06, "loss": 0.5894, "step": 1949 }, { "epoch": 0.16, "grad_norm": 1.3899183016364773, "learning_rate": 9.589416020158577e-06, "loss": 0.224, "step": 1950 }, { "epoch": 0.16, "grad_norm": 1.616174180479946, "learning_rate": 9.58890174527708e-06, "loss": 0.238, "step": 1951 }, { "epoch": 0.16, "grad_norm": 1.2557217304251171, "learning_rate": 9.588387162328705e-06, "loss": 0.1765, "step": 1952 }, { "epoch": 0.16, "grad_norm": 1.241650721238723, "learning_rate": 9.587872271347996e-06, "loss": 0.1956, "step": 1953 }, { "epoch": 0.16, "grad_norm": 5.182374014235162, "learning_rate": 9.587357072369522e-06, "loss": 0.4694, "step": 1954 }, { "epoch": 0.16, "grad_norm": 8.495638385794956, "learning_rate": 9.586841565427869e-06, "loss": 0.6953, "step": 1955 }, { "epoch": 0.16, "grad_norm": 1.5234717979887953, "learning_rate": 9.586325750557643e-06, "loss": 0.232, "step": 1956 }, { "epoch": 0.16, "grad_norm": 1.3231156939189745, "learning_rate": 9.585809627793475e-06, "loss": 0.2181, "step": 1957 }, { "epoch": 0.16, "grad_norm": 1.6643409253493608, "learning_rate": 9.58529319717001e-06, "loss": 0.2483, "step": 1958 }, { "epoch": 0.16, "grad_norm": 6.220945638880487, "learning_rate": 9.584776458721922e-06, "loss": 0.5609, "step": 1959 }, { "epoch": 0.16, "grad_norm": 1.5189513806058108, "learning_rate": 9.584259412483899e-06, "loss": 0.2371, "step": 1960 }, { "epoch": 0.16, "grad_norm": 1.3992559758248637, "learning_rate": 9.58374205849065e-06, "loss": 0.2309, "step": 1961 }, { "epoch": 0.16, "grad_norm": 5.457210197620917, "learning_rate": 9.58322439677691e-06, "loss": 0.6165, "step": 1962 }, { "epoch": 0.16, "grad_norm": 4.6775682801719665, "learning_rate": 9.58270642737743e-06, "loss": 0.5421, "step": 1963 }, { "epoch": 0.16, "grad_norm": 1.4032638645107387, "learning_rate": 9.582188150326981e-06, "loss": 0.2113, "step": 1964 }, { "epoch": 0.16, "grad_norm": 1.702253330389171, "learning_rate": 9.58166956566036e-06, "loss": 0.28, "step": 1965 }, { "epoch": 0.16, "grad_norm": 5.902093164871039, "learning_rate": 9.581150673412376e-06, "loss": 0.8045, "step": 1966 }, { "epoch": 0.16, "grad_norm": 1.4321327890486761, "learning_rate": 9.58063147361787e-06, "loss": 0.2108, "step": 1967 }, { "epoch": 0.16, "grad_norm": 1.3360277324608862, "learning_rate": 9.580111966311692e-06, "loss": 0.1979, "step": 1968 }, { "epoch": 0.16, "grad_norm": 1.5780497789719112, "learning_rate": 9.579592151528721e-06, "loss": 0.2494, "step": 1969 }, { "epoch": 0.16, "grad_norm": 1.4902731643983842, "learning_rate": 9.579072029303855e-06, "loss": 0.2485, "step": 1970 }, { "epoch": 0.16, "grad_norm": 5.843284786117048, "learning_rate": 9.578551599672008e-06, "loss": 0.649, "step": 1971 }, { "epoch": 0.16, "grad_norm": 1.4033494890839562, "learning_rate": 9.57803086266812e-06, "loss": 0.1878, "step": 1972 }, { "epoch": 0.16, "grad_norm": 1.2682486523733965, "learning_rate": 9.57750981832715e-06, "loss": 0.1982, "step": 1973 }, { "epoch": 0.16, "grad_norm": 1.4589166214005198, "learning_rate": 9.576988466684077e-06, "loss": 0.2242, "step": 1974 }, { "epoch": 0.16, "grad_norm": 1.5320888780628783, "learning_rate": 9.5764668077739e-06, "loss": 0.2255, "step": 1975 }, { "epoch": 0.16, "grad_norm": 1.44560694881027, "learning_rate": 9.575944841631636e-06, "loss": 0.2258, "step": 1976 }, { "epoch": 0.16, "grad_norm": 1.2640671370808514, "learning_rate": 9.575422568292336e-06, "loss": 0.1815, "step": 1977 }, { "epoch": 0.16, "grad_norm": 1.4448004433267245, "learning_rate": 9.574899987791054e-06, "loss": 0.1947, "step": 1978 }, { "epoch": 0.16, "grad_norm": 1.53999420291616, "learning_rate": 9.574377100162874e-06, "loss": 0.2613, "step": 1979 }, { "epoch": 0.16, "grad_norm": 21.608890307131713, "learning_rate": 9.573853905442899e-06, "loss": 0.6941, "step": 1980 }, { "epoch": 0.16, "grad_norm": 1.4569898264642869, "learning_rate": 9.573330403666254e-06, "loss": 0.1742, "step": 1981 }, { "epoch": 0.16, "grad_norm": 1.3107620253197376, "learning_rate": 9.572806594868082e-06, "loss": 0.2263, "step": 1982 }, { "epoch": 0.16, "grad_norm": 1.5460657706140506, "learning_rate": 9.572282479083548e-06, "loss": 0.2216, "step": 1983 }, { "epoch": 0.16, "grad_norm": 1.4814982356585722, "learning_rate": 9.571758056347839e-06, "loss": 0.2374, "step": 1984 }, { "epoch": 0.16, "grad_norm": 1.5328835440939412, "learning_rate": 9.571233326696159e-06, "loss": 0.207, "step": 1985 }, { "epoch": 0.16, "grad_norm": 1.8486166299418112, "learning_rate": 9.570708290163735e-06, "loss": 0.2552, "step": 1986 }, { "epoch": 0.16, "grad_norm": 1.4918509186981561, "learning_rate": 9.570182946785816e-06, "loss": 0.208, "step": 1987 }, { "epoch": 0.16, "grad_norm": 1.3786445868384574, "learning_rate": 9.569657296597668e-06, "loss": 0.2046, "step": 1988 }, { "epoch": 0.16, "grad_norm": 1.5473488657885672, "learning_rate": 9.569131339634578e-06, "loss": 0.2318, "step": 1989 }, { "epoch": 0.16, "grad_norm": 4.033039266230712, "learning_rate": 9.56860507593186e-06, "loss": 0.5917, "step": 1990 }, { "epoch": 0.16, "grad_norm": 1.369799275992181, "learning_rate": 9.56807850552484e-06, "loss": 0.2377, "step": 1991 }, { "epoch": 0.16, "grad_norm": 1.552840651691266, "learning_rate": 9.56755162844887e-06, "loss": 0.1961, "step": 1992 }, { "epoch": 0.16, "grad_norm": 1.5105102349890958, "learning_rate": 9.567024444739319e-06, "loss": 0.2288, "step": 1993 }, { "epoch": 0.16, "grad_norm": 1.3884346756602388, "learning_rate": 9.566496954431581e-06, "loss": 0.2376, "step": 1994 }, { "epoch": 0.16, "grad_norm": 1.548709650092197, "learning_rate": 9.565969157561066e-06, "loss": 0.1823, "step": 1995 }, { "epoch": 0.16, "grad_norm": 1.545572645035, "learning_rate": 9.565441054163205e-06, "loss": 0.2296, "step": 1996 }, { "epoch": 0.16, "grad_norm": 1.50076542549949, "learning_rate": 9.564912644273456e-06, "loss": 0.2596, "step": 1997 }, { "epoch": 0.16, "grad_norm": 1.4720231006663815, "learning_rate": 9.564383927927289e-06, "loss": 0.2588, "step": 1998 }, { "epoch": 0.16, "grad_norm": 1.5452056101398999, "learning_rate": 9.5638549051602e-06, "loss": 0.2464, "step": 1999 }, { "epoch": 0.16, "grad_norm": 1.5219195446778333, "learning_rate": 9.563325576007702e-06, "loss": 0.2344, "step": 2000 }, { "epoch": 0.16, "grad_norm": 1.557654921930902, "learning_rate": 9.562795940505332e-06, "loss": 0.2622, "step": 2001 }, { "epoch": 0.16, "grad_norm": 1.6739214624234389, "learning_rate": 9.562265998688648e-06, "loss": 0.2572, "step": 2002 }, { "epoch": 0.16, "grad_norm": 1.4127068911803078, "learning_rate": 9.561735750593221e-06, "loss": 0.2076, "step": 2003 }, { "epoch": 0.16, "grad_norm": 1.2916223613666138, "learning_rate": 9.561205196254652e-06, "loss": 0.188, "step": 2004 }, { "epoch": 0.16, "grad_norm": 1.5532007439461248, "learning_rate": 9.56067433570856e-06, "loss": 0.2443, "step": 2005 }, { "epoch": 0.16, "grad_norm": 1.4665407842945997, "learning_rate": 9.56014316899058e-06, "loss": 0.2112, "step": 2006 }, { "epoch": 0.16, "grad_norm": 1.3616723481387611, "learning_rate": 9.559611696136375e-06, "loss": 0.2328, "step": 2007 }, { "epoch": 0.16, "grad_norm": 1.3875531285528953, "learning_rate": 9.55907991718162e-06, "loss": 0.2432, "step": 2008 }, { "epoch": 0.16, "grad_norm": 1.3337642493496045, "learning_rate": 9.558547832162017e-06, "loss": 0.192, "step": 2009 }, { "epoch": 0.16, "grad_norm": 1.5039344008795261, "learning_rate": 9.558015441113285e-06, "loss": 0.2533, "step": 2010 }, { "epoch": 0.16, "grad_norm": 1.4983109918612854, "learning_rate": 9.557482744071166e-06, "loss": 0.2314, "step": 2011 }, { "epoch": 0.16, "grad_norm": 1.4345967592725362, "learning_rate": 9.556949741071423e-06, "loss": 0.2179, "step": 2012 }, { "epoch": 0.16, "grad_norm": 1.4163479549051405, "learning_rate": 9.556416432149838e-06, "loss": 0.2055, "step": 2013 }, { "epoch": 0.16, "grad_norm": 1.5432022347313818, "learning_rate": 9.555882817342212e-06, "loss": 0.1913, "step": 2014 }, { "epoch": 0.16, "grad_norm": 1.4667905992905719, "learning_rate": 9.555348896684366e-06, "loss": 0.2213, "step": 2015 }, { "epoch": 0.16, "grad_norm": 1.525694519663082, "learning_rate": 9.55481467021215e-06, "loss": 0.2265, "step": 2016 }, { "epoch": 0.16, "grad_norm": 1.4885313463161238, "learning_rate": 9.554280137961423e-06, "loss": 0.1936, "step": 2017 }, { "epoch": 0.16, "grad_norm": 1.7979582509944723, "learning_rate": 9.553745299968071e-06, "loss": 0.2786, "step": 2018 }, { "epoch": 0.16, "grad_norm": 6.911634735118961, "learning_rate": 9.553210156268e-06, "loss": 0.8003, "step": 2019 }, { "epoch": 0.16, "grad_norm": 1.4381219281716529, "learning_rate": 9.552674706897136e-06, "loss": 0.232, "step": 2020 }, { "epoch": 0.16, "grad_norm": 1.4646740875389157, "learning_rate": 9.552138951891425e-06, "loss": 0.189, "step": 2021 }, { "epoch": 0.16, "grad_norm": 1.4032173156466463, "learning_rate": 9.551602891286833e-06, "loss": 0.2177, "step": 2022 }, { "epoch": 0.16, "grad_norm": 1.3365001270540964, "learning_rate": 9.551066525119349e-06, "loss": 0.2423, "step": 2023 }, { "epoch": 0.16, "grad_norm": 1.6235414420461314, "learning_rate": 9.550529853424979e-06, "loss": 0.2659, "step": 2024 }, { "epoch": 0.16, "grad_norm": 1.5801991810269655, "learning_rate": 9.549992876239753e-06, "loss": 0.2185, "step": 2025 }, { "epoch": 0.16, "grad_norm": 1.4236622626202178, "learning_rate": 9.54945559359972e-06, "loss": 0.1621, "step": 2026 }, { "epoch": 0.16, "grad_norm": 1.2526788461689256, "learning_rate": 9.548918005540948e-06, "loss": 0.2026, "step": 2027 }, { "epoch": 0.16, "grad_norm": 1.359433016465041, "learning_rate": 9.548380112099527e-06, "loss": 0.2424, "step": 2028 }, { "epoch": 0.16, "grad_norm": 1.412417354573437, "learning_rate": 9.547841913311567e-06, "loss": 0.1905, "step": 2029 }, { "epoch": 0.16, "grad_norm": 1.5736360950442416, "learning_rate": 9.547303409213202e-06, "loss": 0.2728, "step": 2030 }, { "epoch": 0.16, "grad_norm": 1.3870607442195202, "learning_rate": 9.546764599840581e-06, "loss": 0.231, "step": 2031 }, { "epoch": 0.16, "grad_norm": 6.685973324326073, "learning_rate": 9.546225485229876e-06, "loss": 0.6292, "step": 2032 }, { "epoch": 0.16, "grad_norm": 1.4805895927061385, "learning_rate": 9.545686065417279e-06, "loss": 0.2318, "step": 2033 }, { "epoch": 0.16, "grad_norm": 1.4034043598475996, "learning_rate": 9.545146340439005e-06, "loss": 0.1979, "step": 2034 }, { "epoch": 0.16, "grad_norm": 6.493837060678656, "learning_rate": 9.544606310331284e-06, "loss": 0.4933, "step": 2035 }, { "epoch": 0.16, "grad_norm": 1.392240166641213, "learning_rate": 9.544065975130375e-06, "loss": 0.1941, "step": 2036 }, { "epoch": 0.16, "grad_norm": 1.4290041631367258, "learning_rate": 9.543525334872546e-06, "loss": 0.2174, "step": 2037 }, { "epoch": 0.16, "grad_norm": 1.4726713094571027, "learning_rate": 9.542984389594096e-06, "loss": 0.2417, "step": 2038 }, { "epoch": 0.16, "grad_norm": 1.5966915430033186, "learning_rate": 9.54244313933134e-06, "loss": 0.2323, "step": 2039 }, { "epoch": 0.16, "grad_norm": 1.3987276596032185, "learning_rate": 9.541901584120612e-06, "loss": 0.216, "step": 2040 }, { "epoch": 0.16, "grad_norm": 1.5736230177539783, "learning_rate": 9.541359723998268e-06, "loss": 0.275, "step": 2041 }, { "epoch": 0.16, "grad_norm": 1.159315491315751, "learning_rate": 9.540817559000688e-06, "loss": 0.2021, "step": 2042 }, { "epoch": 0.16, "grad_norm": 1.4957615294768731, "learning_rate": 9.540275089164266e-06, "loss": 0.2567, "step": 2043 }, { "epoch": 0.16, "grad_norm": 1.4694328640662935, "learning_rate": 9.539732314525421e-06, "loss": 0.2056, "step": 2044 }, { "epoch": 0.16, "grad_norm": 1.4735681707955224, "learning_rate": 9.539189235120591e-06, "loss": 0.2198, "step": 2045 }, { "epoch": 0.16, "grad_norm": 1.370865028932725, "learning_rate": 9.538645850986235e-06, "loss": 0.2182, "step": 2046 }, { "epoch": 0.16, "grad_norm": 1.4694703789697863, "learning_rate": 9.538102162158832e-06, "loss": 0.2018, "step": 2047 }, { "epoch": 0.16, "grad_norm": 1.6008991062691, "learning_rate": 9.53755816867488e-06, "loss": 0.25, "step": 2048 }, { "epoch": 0.16, "grad_norm": 1.4290205669845732, "learning_rate": 9.5370138705709e-06, "loss": 0.2134, "step": 2049 }, { "epoch": 0.16, "grad_norm": 5.524218497245832, "learning_rate": 9.536469267883432e-06, "loss": 0.5738, "step": 2050 }, { "epoch": 0.16, "grad_norm": 1.5826078574426712, "learning_rate": 9.535924360649038e-06, "loss": 0.2056, "step": 2051 }, { "epoch": 0.16, "grad_norm": 1.4485455108272356, "learning_rate": 9.535379148904297e-06, "loss": 0.2206, "step": 2052 }, { "epoch": 0.16, "grad_norm": 1.4123381063334186, "learning_rate": 9.534833632685813e-06, "loss": 0.2076, "step": 2053 }, { "epoch": 0.16, "grad_norm": 1.4042409562289215, "learning_rate": 9.534287812030207e-06, "loss": 0.2568, "step": 2054 }, { "epoch": 0.16, "grad_norm": 1.5448729375586674, "learning_rate": 9.533741686974122e-06, "loss": 0.2278, "step": 2055 }, { "epoch": 0.16, "grad_norm": 1.3972955293158567, "learning_rate": 9.53319525755422e-06, "loss": 0.1769, "step": 2056 }, { "epoch": 0.16, "grad_norm": 1.3729575846573594, "learning_rate": 9.532648523807186e-06, "loss": 0.2158, "step": 2057 }, { "epoch": 0.16, "grad_norm": 5.45973639599006, "learning_rate": 9.532101485769723e-06, "loss": 0.5855, "step": 2058 }, { "epoch": 0.16, "grad_norm": 1.3411493720748247, "learning_rate": 9.531554143478556e-06, "loss": 0.2057, "step": 2059 }, { "epoch": 0.16, "grad_norm": 1.1776009501329519, "learning_rate": 9.53100649697043e-06, "loss": 0.1607, "step": 2060 }, { "epoch": 0.16, "grad_norm": 1.5401496818725513, "learning_rate": 9.530458546282108e-06, "loss": 0.2706, "step": 2061 }, { "epoch": 0.16, "grad_norm": 2.508100732135895, "learning_rate": 9.529910291450377e-06, "loss": 0.2664, "step": 2062 }, { "epoch": 0.17, "grad_norm": 1.5642677004810706, "learning_rate": 9.529361732512044e-06, "loss": 0.232, "step": 2063 }, { "epoch": 0.17, "grad_norm": 1.495726002865652, "learning_rate": 9.528812869503934e-06, "loss": 0.1994, "step": 2064 }, { "epoch": 0.17, "grad_norm": 4.309080135006772, "learning_rate": 9.528263702462894e-06, "loss": 0.631, "step": 2065 }, { "epoch": 0.17, "grad_norm": 1.6086823594169135, "learning_rate": 9.527714231425793e-06, "loss": 0.2446, "step": 2066 }, { "epoch": 0.17, "grad_norm": 1.3925787350849426, "learning_rate": 9.527164456429517e-06, "loss": 0.2425, "step": 2067 }, { "epoch": 0.17, "grad_norm": 7.252624648772561, "learning_rate": 9.526614377510971e-06, "loss": 0.5082, "step": 2068 }, { "epoch": 0.17, "grad_norm": 1.406857417188421, "learning_rate": 9.526063994707091e-06, "loss": 0.2145, "step": 2069 }, { "epoch": 0.17, "grad_norm": 1.2559383225222025, "learning_rate": 9.525513308054818e-06, "loss": 0.1841, "step": 2070 }, { "epoch": 0.17, "grad_norm": 1.6739316440920449, "learning_rate": 9.524962317591128e-06, "loss": 0.2422, "step": 2071 }, { "epoch": 0.17, "grad_norm": 1.4333957233835004, "learning_rate": 9.524411023353007e-06, "loss": 0.2087, "step": 2072 }, { "epoch": 0.17, "grad_norm": 5.159383298981887, "learning_rate": 9.523859425377464e-06, "loss": 0.6348, "step": 2073 }, { "epoch": 0.17, "grad_norm": 1.5466566084137177, "learning_rate": 9.523307523701532e-06, "loss": 0.2704, "step": 2074 }, { "epoch": 0.17, "grad_norm": 1.433444494139256, "learning_rate": 9.52275531836226e-06, "loss": 0.1978, "step": 2075 }, { "epoch": 0.17, "grad_norm": 1.4712908303896224, "learning_rate": 9.522202809396721e-06, "loss": 0.2111, "step": 2076 }, { "epoch": 0.17, "grad_norm": 1.4193624016751663, "learning_rate": 9.521649996842006e-06, "loss": 0.2157, "step": 2077 }, { "epoch": 0.17, "grad_norm": 4.407578954997005, "learning_rate": 9.521096880735226e-06, "loss": 0.5117, "step": 2078 }, { "epoch": 0.17, "grad_norm": 1.3940715489580375, "learning_rate": 9.520543461113514e-06, "loss": 0.2256, "step": 2079 }, { "epoch": 0.17, "grad_norm": 1.5481874725980844, "learning_rate": 9.519989738014022e-06, "loss": 0.2672, "step": 2080 }, { "epoch": 0.17, "grad_norm": 1.459479265706027, "learning_rate": 9.519435711473926e-06, "loss": 0.2803, "step": 2081 }, { "epoch": 0.17, "grad_norm": 1.6033144472552365, "learning_rate": 9.518881381530415e-06, "loss": 0.1971, "step": 2082 }, { "epoch": 0.17, "grad_norm": 1.3915288260580332, "learning_rate": 9.518326748220707e-06, "loss": 0.2283, "step": 2083 }, { "epoch": 0.17, "grad_norm": 1.3302037641307136, "learning_rate": 9.517771811582033e-06, "loss": 0.2342, "step": 2084 }, { "epoch": 0.17, "grad_norm": 1.3679423428982893, "learning_rate": 9.51721657165165e-06, "loss": 0.2202, "step": 2085 }, { "epoch": 0.17, "grad_norm": 1.3855493969089079, "learning_rate": 9.51666102846683e-06, "loss": 0.1825, "step": 2086 }, { "epoch": 0.17, "grad_norm": 1.6501583709617222, "learning_rate": 9.516105182064872e-06, "loss": 0.2231, "step": 2087 }, { "epoch": 0.17, "grad_norm": 1.4201177790419752, "learning_rate": 9.515549032483091e-06, "loss": 0.2075, "step": 2088 }, { "epoch": 0.17, "grad_norm": 1.341568882933472, "learning_rate": 9.51499257975882e-06, "loss": 0.1821, "step": 2089 }, { "epoch": 0.17, "grad_norm": 5.091598968673305, "learning_rate": 9.514435823929418e-06, "loss": 0.4889, "step": 2090 }, { "epoch": 0.17, "grad_norm": 1.4453792949540785, "learning_rate": 9.51387876503226e-06, "loss": 0.2102, "step": 2091 }, { "epoch": 0.17, "grad_norm": 1.213033142639318, "learning_rate": 9.513321403104745e-06, "loss": 0.1777, "step": 2092 }, { "epoch": 0.17, "grad_norm": 1.3778318686899453, "learning_rate": 9.512763738184289e-06, "loss": 0.1802, "step": 2093 }, { "epoch": 0.17, "grad_norm": 1.3576336873725359, "learning_rate": 9.51220577030833e-06, "loss": 0.2045, "step": 2094 }, { "epoch": 0.17, "grad_norm": 1.2853360472502422, "learning_rate": 9.511647499514327e-06, "loss": 0.1939, "step": 2095 }, { "epoch": 0.17, "grad_norm": 1.3877022394839709, "learning_rate": 9.511088925839758e-06, "loss": 0.2376, "step": 2096 }, { "epoch": 0.17, "grad_norm": 1.3181303775408801, "learning_rate": 9.51053004932212e-06, "loss": 0.2082, "step": 2097 }, { "epoch": 0.17, "grad_norm": 1.4625666963559458, "learning_rate": 9.509970869998933e-06, "loss": 0.273, "step": 2098 }, { "epoch": 0.17, "grad_norm": 1.4917515500282716, "learning_rate": 9.509411387907738e-06, "loss": 0.1954, "step": 2099 }, { "epoch": 0.17, "grad_norm": 5.419326449804372, "learning_rate": 9.508851603086094e-06, "loss": 0.5762, "step": 2100 }, { "epoch": 0.17, "grad_norm": 1.4734182074506141, "learning_rate": 9.50829151557158e-06, "loss": 0.2343, "step": 2101 }, { "epoch": 0.17, "grad_norm": 1.4262186852452459, "learning_rate": 9.507731125401795e-06, "loss": 0.1929, "step": 2102 }, { "epoch": 0.17, "grad_norm": 1.310389441278163, "learning_rate": 9.507170432614364e-06, "loss": 0.1959, "step": 2103 }, { "epoch": 0.17, "grad_norm": 1.4456713785030906, "learning_rate": 9.506609437246924e-06, "loss": 0.2209, "step": 2104 }, { "epoch": 0.17, "grad_norm": 1.5400771409153984, "learning_rate": 9.506048139337142e-06, "loss": 0.2259, "step": 2105 }, { "epoch": 0.17, "grad_norm": 7.372996562194523, "learning_rate": 9.50548653892269e-06, "loss": 0.5097, "step": 2106 }, { "epoch": 0.17, "grad_norm": 1.3971378017901916, "learning_rate": 9.504924636041279e-06, "loss": 0.1831, "step": 2107 }, { "epoch": 0.17, "grad_norm": 1.5533099875500511, "learning_rate": 9.504362430730627e-06, "loss": 0.2745, "step": 2108 }, { "epoch": 0.17, "grad_norm": 1.3941673192517146, "learning_rate": 9.503799923028478e-06, "loss": 0.2061, "step": 2109 }, { "epoch": 0.17, "grad_norm": 1.5649265055545385, "learning_rate": 9.503237112972594e-06, "loss": 0.2367, "step": 2110 }, { "epoch": 0.17, "grad_norm": 1.2898029001234865, "learning_rate": 9.502674000600758e-06, "loss": 0.1878, "step": 2111 }, { "epoch": 0.17, "grad_norm": 4.109576612651166, "learning_rate": 9.502110585950773e-06, "loss": 0.6511, "step": 2112 }, { "epoch": 0.17, "grad_norm": 1.5188392150368415, "learning_rate": 9.501546869060466e-06, "loss": 0.2595, "step": 2113 }, { "epoch": 0.17, "grad_norm": 1.5330604673304256, "learning_rate": 9.500982849967674e-06, "loss": 0.1868, "step": 2114 }, { "epoch": 0.17, "grad_norm": 1.4843998264605276, "learning_rate": 9.50041852871027e-06, "loss": 0.2272, "step": 2115 }, { "epoch": 0.17, "grad_norm": 1.352627389988722, "learning_rate": 9.499853905326133e-06, "loss": 0.2411, "step": 2116 }, { "epoch": 0.17, "grad_norm": 1.4824838907866142, "learning_rate": 9.49928897985317e-06, "loss": 0.2476, "step": 2117 }, { "epoch": 0.17, "grad_norm": 1.2518045357658587, "learning_rate": 9.498723752329304e-06, "loss": 0.1926, "step": 2118 }, { "epoch": 0.17, "grad_norm": 1.2994409096323183, "learning_rate": 9.498158222792484e-06, "loss": 0.2033, "step": 2119 }, { "epoch": 0.17, "grad_norm": 1.319524117655798, "learning_rate": 9.497592391280672e-06, "loss": 0.2061, "step": 2120 }, { "epoch": 0.17, "grad_norm": 1.4988872726996796, "learning_rate": 9.497026257831856e-06, "loss": 0.2258, "step": 2121 }, { "epoch": 0.17, "grad_norm": 1.6105837276298889, "learning_rate": 9.496459822484043e-06, "loss": 0.194, "step": 2122 }, { "epoch": 0.17, "grad_norm": 4.550800217821765, "learning_rate": 9.495893085275256e-06, "loss": 0.7505, "step": 2123 }, { "epoch": 0.17, "grad_norm": 1.5692469587642102, "learning_rate": 9.495326046243547e-06, "loss": 0.2545, "step": 2124 }, { "epoch": 0.17, "grad_norm": 1.54748852508291, "learning_rate": 9.494758705426978e-06, "loss": 0.2978, "step": 2125 }, { "epoch": 0.17, "grad_norm": 1.49079564934414, "learning_rate": 9.494191062863638e-06, "loss": 0.2052, "step": 2126 }, { "epoch": 0.17, "grad_norm": 1.3271125793021075, "learning_rate": 9.493623118591638e-06, "loss": 0.2153, "step": 2127 }, { "epoch": 0.17, "grad_norm": 4.680213730970512, "learning_rate": 9.4930548726491e-06, "loss": 0.5648, "step": 2128 }, { "epoch": 0.17, "grad_norm": 1.5242805027692423, "learning_rate": 9.492486325074177e-06, "loss": 0.2018, "step": 2129 }, { "epoch": 0.17, "grad_norm": 1.4846476766560741, "learning_rate": 9.491917475905034e-06, "loss": 0.234, "step": 2130 }, { "epoch": 0.17, "grad_norm": 1.525320937546331, "learning_rate": 9.491348325179861e-06, "loss": 0.2051, "step": 2131 }, { "epoch": 0.17, "grad_norm": 1.820613291417657, "learning_rate": 9.490778872936867e-06, "loss": 0.1938, "step": 2132 }, { "epoch": 0.17, "grad_norm": 1.288141869809796, "learning_rate": 9.490209119214282e-06, "loss": 0.1612, "step": 2133 }, { "epoch": 0.17, "grad_norm": 1.5047683918927903, "learning_rate": 9.489639064050353e-06, "loss": 0.2347, "step": 2134 }, { "epoch": 0.17, "grad_norm": 6.346380564933354, "learning_rate": 9.48906870748335e-06, "loss": 0.6837, "step": 2135 }, { "epoch": 0.17, "grad_norm": 1.4627085858360793, "learning_rate": 9.488498049551563e-06, "loss": 0.2368, "step": 2136 }, { "epoch": 0.17, "grad_norm": 6.333896780750312, "learning_rate": 9.487927090293302e-06, "loss": 0.5522, "step": 2137 }, { "epoch": 0.17, "grad_norm": 1.4753513405176741, "learning_rate": 9.4873558297469e-06, "loss": 0.2504, "step": 2138 }, { "epoch": 0.17, "grad_norm": 1.4863404734720786, "learning_rate": 9.486784267950704e-06, "loss": 0.289, "step": 2139 }, { "epoch": 0.17, "grad_norm": 1.4997659279544855, "learning_rate": 9.486212404943084e-06, "loss": 0.2192, "step": 2140 }, { "epoch": 0.17, "grad_norm": 1.3558126149183962, "learning_rate": 9.485640240762434e-06, "loss": 0.2306, "step": 2141 }, { "epoch": 0.17, "grad_norm": 1.6283515566627347, "learning_rate": 9.485067775447164e-06, "loss": 0.26, "step": 2142 }, { "epoch": 0.17, "grad_norm": 1.6117645681041304, "learning_rate": 9.484495009035705e-06, "loss": 0.3015, "step": 2143 }, { "epoch": 0.17, "grad_norm": 1.5317196743038302, "learning_rate": 9.483921941566508e-06, "loss": 0.2423, "step": 2144 }, { "epoch": 0.17, "grad_norm": 1.504027963909363, "learning_rate": 9.483348573078046e-06, "loss": 0.2268, "step": 2145 }, { "epoch": 0.17, "grad_norm": 1.3338812570267844, "learning_rate": 9.48277490360881e-06, "loss": 0.2343, "step": 2146 }, { "epoch": 0.17, "grad_norm": 1.8553412194552557, "learning_rate": 9.482200933197312e-06, "loss": 0.2446, "step": 2147 }, { "epoch": 0.17, "grad_norm": 1.4586064422482663, "learning_rate": 9.481626661882084e-06, "loss": 0.211, "step": 2148 }, { "epoch": 0.17, "grad_norm": 1.5060889303335863, "learning_rate": 9.48105208970168e-06, "loss": 0.2029, "step": 2149 }, { "epoch": 0.17, "grad_norm": 1.4789353744808005, "learning_rate": 9.480477216694674e-06, "loss": 0.2446, "step": 2150 }, { "epoch": 0.17, "grad_norm": 1.536782965808731, "learning_rate": 9.479902042899655e-06, "loss": 0.2579, "step": 2151 }, { "epoch": 0.17, "grad_norm": 1.4569749023819965, "learning_rate": 9.47932656835524e-06, "loss": 0.2493, "step": 2152 }, { "epoch": 0.17, "grad_norm": 4.26424453942703, "learning_rate": 9.47875079310006e-06, "loss": 0.4321, "step": 2153 }, { "epoch": 0.17, "grad_norm": 1.4323574846328135, "learning_rate": 9.478174717172768e-06, "loss": 0.2283, "step": 2154 }, { "epoch": 0.17, "grad_norm": 1.558275622189435, "learning_rate": 9.47759834061204e-06, "loss": 0.2467, "step": 2155 }, { "epoch": 0.17, "grad_norm": 1.5004812858716223, "learning_rate": 9.477021663456569e-06, "loss": 0.2363, "step": 2156 }, { "epoch": 0.17, "grad_norm": 5.413430893216606, "learning_rate": 9.476444685745067e-06, "loss": 0.7435, "step": 2157 }, { "epoch": 0.17, "grad_norm": 1.552531712318251, "learning_rate": 9.475867407516272e-06, "loss": 0.2535, "step": 2158 }, { "epoch": 0.17, "grad_norm": 5.091096127791431, "learning_rate": 9.475289828808937e-06, "loss": 0.682, "step": 2159 }, { "epoch": 0.17, "grad_norm": 1.2992279651051872, "learning_rate": 9.474711949661835e-06, "loss": 0.1956, "step": 2160 }, { "epoch": 0.17, "grad_norm": 1.5254447756918428, "learning_rate": 9.474133770113763e-06, "loss": 0.2428, "step": 2161 }, { "epoch": 0.17, "grad_norm": 1.56535565203796, "learning_rate": 9.473555290203534e-06, "loss": 0.2003, "step": 2162 }, { "epoch": 0.17, "grad_norm": 1.4146840246637304, "learning_rate": 9.472976509969984e-06, "loss": 0.1744, "step": 2163 }, { "epoch": 0.17, "grad_norm": 4.972813395348676, "learning_rate": 9.47239742945197e-06, "loss": 0.603, "step": 2164 }, { "epoch": 0.17, "grad_norm": 7.391583479936229, "learning_rate": 9.471818048688364e-06, "loss": 0.7076, "step": 2165 }, { "epoch": 0.17, "grad_norm": 1.5273605663717216, "learning_rate": 9.471238367718064e-06, "loss": 0.2475, "step": 2166 }, { "epoch": 0.17, "grad_norm": 1.5058994448809844, "learning_rate": 9.470658386579983e-06, "loss": 0.2424, "step": 2167 }, { "epoch": 0.17, "grad_norm": 1.4124827274546083, "learning_rate": 9.470078105313062e-06, "loss": 0.194, "step": 2168 }, { "epoch": 0.17, "grad_norm": 1.294539221616167, "learning_rate": 9.469497523956253e-06, "loss": 0.1832, "step": 2169 }, { "epoch": 0.17, "grad_norm": 1.4673909426449274, "learning_rate": 9.468916642548534e-06, "loss": 0.2666, "step": 2170 }, { "epoch": 0.17, "grad_norm": 8.307828627227948, "learning_rate": 9.468335461128898e-06, "loss": 0.6339, "step": 2171 }, { "epoch": 0.17, "grad_norm": 6.096639182785343, "learning_rate": 9.467753979736365e-06, "loss": 0.7023, "step": 2172 }, { "epoch": 0.17, "grad_norm": 1.286089408773351, "learning_rate": 9.467172198409971e-06, "loss": 0.1935, "step": 2173 }, { "epoch": 0.17, "grad_norm": 1.3258447360043475, "learning_rate": 9.466590117188773e-06, "loss": 0.1917, "step": 2174 }, { "epoch": 0.17, "grad_norm": 1.6636379762354336, "learning_rate": 9.466007736111846e-06, "loss": 0.2243, "step": 2175 }, { "epoch": 0.17, "grad_norm": 1.2789750861085092, "learning_rate": 9.465425055218289e-06, "loss": 0.1868, "step": 2176 }, { "epoch": 0.17, "grad_norm": 1.4551335578557851, "learning_rate": 9.464842074547218e-06, "loss": 0.24, "step": 2177 }, { "epoch": 0.17, "grad_norm": 1.3278702168404326, "learning_rate": 9.464258794137771e-06, "loss": 0.2532, "step": 2178 }, { "epoch": 0.17, "grad_norm": 1.437916550773484, "learning_rate": 9.463675214029104e-06, "loss": 0.2822, "step": 2179 }, { "epoch": 0.17, "grad_norm": 6.129141880050032, "learning_rate": 9.463091334260397e-06, "loss": 0.5549, "step": 2180 }, { "epoch": 0.17, "grad_norm": 1.2691856699432087, "learning_rate": 9.462507154870846e-06, "loss": 0.223, "step": 2181 }, { "epoch": 0.17, "grad_norm": 1.5007735901182446, "learning_rate": 9.461922675899668e-06, "loss": 0.2059, "step": 2182 }, { "epoch": 0.17, "grad_norm": 1.4137550893693016, "learning_rate": 9.4613378973861e-06, "loss": 0.181, "step": 2183 }, { "epoch": 0.17, "grad_norm": 1.255164882730478, "learning_rate": 9.460752819369405e-06, "loss": 0.2046, "step": 2184 }, { "epoch": 0.17, "grad_norm": 1.4414349439819372, "learning_rate": 9.460167441888855e-06, "loss": 0.1944, "step": 2185 }, { "epoch": 0.17, "grad_norm": 1.3020724216845385, "learning_rate": 9.459581764983752e-06, "loss": 0.2041, "step": 2186 }, { "epoch": 0.17, "grad_norm": 1.4215039690706694, "learning_rate": 9.458995788693414e-06, "loss": 0.2284, "step": 2187 }, { "epoch": 0.18, "grad_norm": 1.3489758337410664, "learning_rate": 9.458409513057176e-06, "loss": 0.2143, "step": 2188 }, { "epoch": 0.18, "grad_norm": 1.5903309201331473, "learning_rate": 9.457822938114401e-06, "loss": 0.2578, "step": 2189 }, { "epoch": 0.18, "grad_norm": 1.295301861322975, "learning_rate": 9.457236063904465e-06, "loss": 0.2437, "step": 2190 }, { "epoch": 0.18, "grad_norm": 6.964882309439014, "learning_rate": 9.456648890466767e-06, "loss": 0.7316, "step": 2191 }, { "epoch": 0.18, "grad_norm": 6.963241638325417, "learning_rate": 9.456061417840727e-06, "loss": 0.604, "step": 2192 }, { "epoch": 0.18, "grad_norm": 1.417310052415083, "learning_rate": 9.455473646065782e-06, "loss": 0.2019, "step": 2193 }, { "epoch": 0.18, "grad_norm": 1.3665399739551956, "learning_rate": 9.454885575181391e-06, "loss": 0.1901, "step": 2194 }, { "epoch": 0.18, "grad_norm": 1.3937418077228976, "learning_rate": 9.454297205227034e-06, "loss": 0.2287, "step": 2195 }, { "epoch": 0.18, "grad_norm": 1.7255294675640807, "learning_rate": 9.45370853624221e-06, "loss": 0.2604, "step": 2196 }, { "epoch": 0.18, "grad_norm": 1.5374609973935005, "learning_rate": 9.453119568266435e-06, "loss": 0.2092, "step": 2197 }, { "epoch": 0.18, "grad_norm": 1.3604098807649905, "learning_rate": 9.452530301339254e-06, "loss": 0.1765, "step": 2198 }, { "epoch": 0.18, "grad_norm": 1.5340144135385738, "learning_rate": 9.451940735500222e-06, "loss": 0.207, "step": 2199 }, { "epoch": 0.18, "grad_norm": 1.4878568701443773, "learning_rate": 9.451350870788922e-06, "loss": 0.177, "step": 2200 }, { "epoch": 0.18, "grad_norm": 1.5957510906960977, "learning_rate": 9.450760707244948e-06, "loss": 0.2267, "step": 2201 }, { "epoch": 0.18, "grad_norm": 1.5303839463381181, "learning_rate": 9.450170244907924e-06, "loss": 0.1811, "step": 2202 }, { "epoch": 0.18, "grad_norm": 5.048135481378952, "learning_rate": 9.449579483817487e-06, "loss": 0.4253, "step": 2203 }, { "epoch": 0.18, "grad_norm": 1.3785729636903892, "learning_rate": 9.448988424013298e-06, "loss": 0.2227, "step": 2204 }, { "epoch": 0.18, "grad_norm": 1.3720288581033389, "learning_rate": 9.448397065535037e-06, "loss": 0.2146, "step": 2205 }, { "epoch": 0.18, "grad_norm": 1.32810794469314, "learning_rate": 9.4478054084224e-06, "loss": 0.2136, "step": 2206 }, { "epoch": 0.18, "grad_norm": 1.5341222755225963, "learning_rate": 9.447213452715114e-06, "loss": 0.2383, "step": 2207 }, { "epoch": 0.18, "grad_norm": 5.560362947103128, "learning_rate": 9.446621198452912e-06, "loss": 0.5618, "step": 2208 }, { "epoch": 0.18, "grad_norm": 1.343812869594025, "learning_rate": 9.446028645675556e-06, "loss": 0.2459, "step": 2209 }, { "epoch": 0.18, "grad_norm": 1.472817462672581, "learning_rate": 9.445435794422826e-06, "loss": 0.2418, "step": 2210 }, { "epoch": 0.18, "grad_norm": 1.3431266951288083, "learning_rate": 9.444842644734521e-06, "loss": 0.225, "step": 2211 }, { "epoch": 0.18, "grad_norm": 1.3341464195510009, "learning_rate": 9.444249196650465e-06, "loss": 0.1882, "step": 2212 }, { "epoch": 0.18, "grad_norm": 1.2475789038924463, "learning_rate": 9.443655450210494e-06, "loss": 0.1259, "step": 2213 }, { "epoch": 0.18, "grad_norm": 1.475017705770073, "learning_rate": 9.443061405454468e-06, "loss": 0.2344, "step": 2214 }, { "epoch": 0.18, "grad_norm": 1.3255628066564327, "learning_rate": 9.442467062422267e-06, "loss": 0.2448, "step": 2215 }, { "epoch": 0.18, "grad_norm": 1.5377187216748018, "learning_rate": 9.441872421153792e-06, "loss": 0.3013, "step": 2216 }, { "epoch": 0.18, "grad_norm": 1.3072848885932593, "learning_rate": 9.441277481688964e-06, "loss": 0.2189, "step": 2217 }, { "epoch": 0.18, "grad_norm": 1.6497872471436115, "learning_rate": 9.440682244067724e-06, "loss": 0.2129, "step": 2218 }, { "epoch": 0.18, "grad_norm": 1.3108276495834656, "learning_rate": 9.440086708330028e-06, "loss": 0.1855, "step": 2219 }, { "epoch": 0.18, "grad_norm": 1.4549609342170728, "learning_rate": 9.439490874515859e-06, "loss": 0.2126, "step": 2220 }, { "epoch": 0.18, "grad_norm": 4.697861787304837, "learning_rate": 9.438894742665217e-06, "loss": 0.6835, "step": 2221 }, { "epoch": 0.18, "grad_norm": 1.5347843136283408, "learning_rate": 9.43829831281812e-06, "loss": 0.2544, "step": 2222 }, { "epoch": 0.18, "grad_norm": 1.425218775855989, "learning_rate": 9.437701585014608e-06, "loss": 0.2594, "step": 2223 }, { "epoch": 0.18, "grad_norm": 1.3888331029979648, "learning_rate": 9.437104559294748e-06, "loss": 0.2194, "step": 2224 }, { "epoch": 0.18, "grad_norm": 1.7729432926220772, "learning_rate": 9.436507235698613e-06, "loss": 0.2157, "step": 2225 }, { "epoch": 0.18, "grad_norm": 1.5589357900648926, "learning_rate": 9.435909614266303e-06, "loss": 0.2417, "step": 2226 }, { "epoch": 0.18, "grad_norm": 9.170936968921524, "learning_rate": 9.435311695037943e-06, "loss": 0.6219, "step": 2227 }, { "epoch": 0.18, "grad_norm": 1.4257273523695209, "learning_rate": 9.43471347805367e-06, "loss": 0.2331, "step": 2228 }, { "epoch": 0.18, "grad_norm": 1.7165684337153404, "learning_rate": 9.434114963353644e-06, "loss": 0.2414, "step": 2229 }, { "epoch": 0.18, "grad_norm": 1.499368571544321, "learning_rate": 9.433516150978045e-06, "loss": 0.2321, "step": 2230 }, { "epoch": 0.18, "grad_norm": 1.4124288828783422, "learning_rate": 9.432917040967074e-06, "loss": 0.2332, "step": 2231 }, { "epoch": 0.18, "grad_norm": 1.286425194443099, "learning_rate": 9.432317633360952e-06, "loss": 0.198, "step": 2232 }, { "epoch": 0.18, "grad_norm": 1.3630367902161011, "learning_rate": 9.431717928199916e-06, "loss": 0.2211, "step": 2233 }, { "epoch": 0.18, "grad_norm": 5.047492976071003, "learning_rate": 9.43111792552423e-06, "loss": 0.5947, "step": 2234 }, { "epoch": 0.18, "grad_norm": 1.3783653260827184, "learning_rate": 9.430517625374171e-06, "loss": 0.201, "step": 2235 }, { "epoch": 0.18, "grad_norm": 6.9191294011434685, "learning_rate": 9.42991702779004e-06, "loss": 0.5277, "step": 2236 }, { "epoch": 0.18, "grad_norm": 1.5429923558207153, "learning_rate": 9.429316132812156e-06, "loss": 0.2278, "step": 2237 }, { "epoch": 0.18, "grad_norm": 1.2044915910082314, "learning_rate": 9.428714940480861e-06, "loss": 0.2013, "step": 2238 }, { "epoch": 0.18, "grad_norm": 1.424683103662734, "learning_rate": 9.428113450836514e-06, "loss": 0.2208, "step": 2239 }, { "epoch": 0.18, "grad_norm": 1.2379693625984003, "learning_rate": 9.427511663919492e-06, "loss": 0.1844, "step": 2240 }, { "epoch": 0.18, "grad_norm": 1.2824278145572752, "learning_rate": 9.426909579770197e-06, "loss": 0.1897, "step": 2241 }, { "epoch": 0.18, "grad_norm": 1.3567667380975004, "learning_rate": 9.426307198429053e-06, "loss": 0.1793, "step": 2242 }, { "epoch": 0.18, "grad_norm": 1.2598350002252732, "learning_rate": 9.425704519936492e-06, "loss": 0.1734, "step": 2243 }, { "epoch": 0.18, "grad_norm": 1.430557818807667, "learning_rate": 9.425101544332979e-06, "loss": 0.2091, "step": 2244 }, { "epoch": 0.18, "grad_norm": 1.3784187713522942, "learning_rate": 9.424498271658991e-06, "loss": 0.2168, "step": 2245 }, { "epoch": 0.18, "grad_norm": 1.4158224674815283, "learning_rate": 9.423894701955028e-06, "loss": 0.2056, "step": 2246 }, { "epoch": 0.18, "grad_norm": 1.4250845308816862, "learning_rate": 9.42329083526161e-06, "loss": 0.2617, "step": 2247 }, { "epoch": 0.18, "grad_norm": 1.556938676349873, "learning_rate": 9.422686671619277e-06, "loss": 0.2485, "step": 2248 }, { "epoch": 0.18, "grad_norm": 1.5543447924649205, "learning_rate": 9.422082211068586e-06, "loss": 0.2606, "step": 2249 }, { "epoch": 0.18, "grad_norm": 1.500565891959398, "learning_rate": 9.421477453650118e-06, "loss": 0.2185, "step": 2250 }, { "epoch": 0.18, "grad_norm": 1.431173797130157, "learning_rate": 9.420872399404473e-06, "loss": 0.2197, "step": 2251 }, { "epoch": 0.18, "grad_norm": 1.4146063595885454, "learning_rate": 9.42026704837227e-06, "loss": 0.2163, "step": 2252 }, { "epoch": 0.18, "grad_norm": 1.4833239989390457, "learning_rate": 9.419661400594145e-06, "loss": 0.2208, "step": 2253 }, { "epoch": 0.18, "grad_norm": 1.2566977666801733, "learning_rate": 9.41905545611076e-06, "loss": 0.2243, "step": 2254 }, { "epoch": 0.18, "grad_norm": 1.4782805197782598, "learning_rate": 9.418449214962793e-06, "loss": 0.2279, "step": 2255 }, { "epoch": 0.18, "grad_norm": 1.4540732160502314, "learning_rate": 9.417842677190944e-06, "loss": 0.2369, "step": 2256 }, { "epoch": 0.18, "grad_norm": 1.6929880675940345, "learning_rate": 9.417235842835929e-06, "loss": 0.2256, "step": 2257 }, { "epoch": 0.18, "grad_norm": 1.4899751865141602, "learning_rate": 9.416628711938489e-06, "loss": 0.2432, "step": 2258 }, { "epoch": 0.18, "grad_norm": 1.3691820456239192, "learning_rate": 9.41602128453938e-06, "loss": 0.2161, "step": 2259 }, { "epoch": 0.18, "grad_norm": 10.354782503148032, "learning_rate": 9.415413560679385e-06, "loss": 0.5143, "step": 2260 }, { "epoch": 0.18, "grad_norm": 1.7227183919347557, "learning_rate": 9.414805540399298e-06, "loss": 0.2459, "step": 2261 }, { "epoch": 0.18, "grad_norm": 1.6625817310054618, "learning_rate": 9.414197223739939e-06, "loss": 0.2561, "step": 2262 }, { "epoch": 0.18, "grad_norm": 1.482168686816286, "learning_rate": 9.413588610742146e-06, "loss": 0.2514, "step": 2263 }, { "epoch": 0.18, "grad_norm": 1.5591362269793154, "learning_rate": 9.412979701446776e-06, "loss": 0.2555, "step": 2264 }, { "epoch": 0.18, "grad_norm": 1.370238468176538, "learning_rate": 9.412370495894708e-06, "loss": 0.2231, "step": 2265 }, { "epoch": 0.18, "grad_norm": 1.6680891909785376, "learning_rate": 9.41176099412684e-06, "loss": 0.2612, "step": 2266 }, { "epoch": 0.18, "grad_norm": 1.6364112331692031, "learning_rate": 9.41115119618409e-06, "loss": 0.2633, "step": 2267 }, { "epoch": 0.18, "grad_norm": 1.2245051535314588, "learning_rate": 9.410541102107394e-06, "loss": 0.2207, "step": 2268 }, { "epoch": 0.18, "grad_norm": 7.169399850155293, "learning_rate": 9.40993071193771e-06, "loss": 0.6819, "step": 2269 }, { "epoch": 0.18, "grad_norm": 1.3548142848762887, "learning_rate": 9.409320025716018e-06, "loss": 0.2348, "step": 2270 }, { "epoch": 0.18, "grad_norm": 1.2966628787183774, "learning_rate": 9.40870904348331e-06, "loss": 0.2281, "step": 2271 }, { "epoch": 0.18, "grad_norm": 1.308631681397771, "learning_rate": 9.408097765280608e-06, "loss": 0.1797, "step": 2272 }, { "epoch": 0.18, "grad_norm": 1.5015021105944997, "learning_rate": 9.407486191148947e-06, "loss": 0.2369, "step": 2273 }, { "epoch": 0.18, "grad_norm": 1.2750236753729862, "learning_rate": 9.406874321129384e-06, "loss": 0.1499, "step": 2274 }, { "epoch": 0.18, "grad_norm": 1.3288722225950578, "learning_rate": 9.406262155262995e-06, "loss": 0.2315, "step": 2275 }, { "epoch": 0.18, "grad_norm": 1.4320337509544816, "learning_rate": 9.405649693590877e-06, "loss": 0.266, "step": 2276 }, { "epoch": 0.18, "grad_norm": 7.778047758296228, "learning_rate": 9.405036936154146e-06, "loss": 0.6217, "step": 2277 }, { "epoch": 0.18, "grad_norm": 1.496391342001259, "learning_rate": 9.40442388299394e-06, "loss": 0.2237, "step": 2278 }, { "epoch": 0.18, "grad_norm": 1.3519231223068302, "learning_rate": 9.403810534151411e-06, "loss": 0.1814, "step": 2279 }, { "epoch": 0.18, "grad_norm": 1.3952159726439315, "learning_rate": 9.403196889667742e-06, "loss": 0.2253, "step": 2280 }, { "epoch": 0.18, "grad_norm": 1.2837361425891198, "learning_rate": 9.402582949584122e-06, "loss": 0.1717, "step": 2281 }, { "epoch": 0.18, "grad_norm": 1.5225567177379653, "learning_rate": 9.40196871394177e-06, "loss": 0.2192, "step": 2282 }, { "epoch": 0.18, "grad_norm": 1.4204188714945731, "learning_rate": 9.401354182781921e-06, "loss": 0.2132, "step": 2283 }, { "epoch": 0.18, "grad_norm": 1.4897745662555772, "learning_rate": 9.400739356145829e-06, "loss": 0.2193, "step": 2284 }, { "epoch": 0.18, "grad_norm": 1.2669533932800552, "learning_rate": 9.400124234074772e-06, "loss": 0.1891, "step": 2285 }, { "epoch": 0.18, "grad_norm": 1.6126075346202686, "learning_rate": 9.399508816610042e-06, "loss": 0.2235, "step": 2286 }, { "epoch": 0.18, "grad_norm": 1.48447667500245, "learning_rate": 9.398893103792956e-06, "loss": 0.2237, "step": 2287 }, { "epoch": 0.18, "grad_norm": 1.4336585313324313, "learning_rate": 9.398277095664848e-06, "loss": 0.2264, "step": 2288 }, { "epoch": 0.18, "grad_norm": 8.452047312255996, "learning_rate": 9.397660792267072e-06, "loss": 0.5344, "step": 2289 }, { "epoch": 0.18, "grad_norm": 1.5354463918934596, "learning_rate": 9.397044193641e-06, "loss": 0.2436, "step": 2290 }, { "epoch": 0.18, "grad_norm": 5.89052187198876, "learning_rate": 9.396427299828033e-06, "loss": 0.6043, "step": 2291 }, { "epoch": 0.18, "grad_norm": 5.6291210718842715, "learning_rate": 9.395810110869579e-06, "loss": 0.5204, "step": 2292 }, { "epoch": 0.18, "grad_norm": 1.5019113663227364, "learning_rate": 9.395192626807072e-06, "loss": 0.2559, "step": 2293 }, { "epoch": 0.18, "grad_norm": 1.4394877217179112, "learning_rate": 9.39457484768197e-06, "loss": 0.2089, "step": 2294 }, { "epoch": 0.18, "grad_norm": 1.2876524414471524, "learning_rate": 9.393956773535742e-06, "loss": 0.189, "step": 2295 }, { "epoch": 0.18, "grad_norm": 1.3627823598710707, "learning_rate": 9.393338404409881e-06, "loss": 0.223, "step": 2296 }, { "epoch": 0.18, "grad_norm": 1.3623140768346185, "learning_rate": 9.392719740345904e-06, "loss": 0.2059, "step": 2297 }, { "epoch": 0.18, "grad_norm": 1.3030991324587635, "learning_rate": 9.39210078138534e-06, "loss": 0.2202, "step": 2298 }, { "epoch": 0.18, "grad_norm": 1.5096265512260567, "learning_rate": 9.391481527569744e-06, "loss": 0.2177, "step": 2299 }, { "epoch": 0.18, "grad_norm": 1.5496217167619466, "learning_rate": 9.390861978940687e-06, "loss": 0.2018, "step": 2300 }, { "epoch": 0.18, "grad_norm": 1.3387154039716214, "learning_rate": 9.390242135539761e-06, "loss": 0.1685, "step": 2301 }, { "epoch": 0.18, "grad_norm": 1.4027077482872332, "learning_rate": 9.38962199740858e-06, "loss": 0.2063, "step": 2302 }, { "epoch": 0.18, "grad_norm": 6.000993102823497, "learning_rate": 9.389001564588773e-06, "loss": 0.5878, "step": 2303 }, { "epoch": 0.18, "grad_norm": 1.5151366592570807, "learning_rate": 9.388380837121993e-06, "loss": 0.2121, "step": 2304 }, { "epoch": 0.18, "grad_norm": 1.580363034275209, "learning_rate": 9.387759815049911e-06, "loss": 0.24, "step": 2305 }, { "epoch": 0.18, "grad_norm": 8.106921095293227, "learning_rate": 9.38713849841422e-06, "loss": 0.6016, "step": 2306 }, { "epoch": 0.18, "grad_norm": 1.6172350327484344, "learning_rate": 9.386516887256627e-06, "loss": 0.2794, "step": 2307 }, { "epoch": 0.18, "grad_norm": 1.452730447957342, "learning_rate": 9.385894981618866e-06, "loss": 0.2421, "step": 2308 }, { "epoch": 0.18, "grad_norm": 1.3796145348848519, "learning_rate": 9.385272781542686e-06, "loss": 0.2298, "step": 2309 }, { "epoch": 0.18, "grad_norm": 1.4445220872318818, "learning_rate": 9.384650287069856e-06, "loss": 0.1849, "step": 2310 }, { "epoch": 0.18, "grad_norm": 1.4470763287639767, "learning_rate": 9.384027498242168e-06, "loss": 0.191, "step": 2311 }, { "epoch": 0.18, "grad_norm": 1.4777095847548318, "learning_rate": 9.38340441510143e-06, "loss": 0.2171, "step": 2312 }, { "epoch": 0.19, "grad_norm": 1.365387148393817, "learning_rate": 9.382781037689475e-06, "loss": 0.1972, "step": 2313 }, { "epoch": 0.19, "grad_norm": 5.848563998496717, "learning_rate": 9.382157366048146e-06, "loss": 0.5938, "step": 2314 }, { "epoch": 0.19, "grad_norm": 7.1728152841064245, "learning_rate": 9.381533400219319e-06, "loss": 0.6796, "step": 2315 }, { "epoch": 0.19, "grad_norm": 1.423788544392309, "learning_rate": 9.380909140244878e-06, "loss": 0.2499, "step": 2316 }, { "epoch": 0.19, "grad_norm": 1.5508273829340398, "learning_rate": 9.380284586166732e-06, "loss": 0.2699, "step": 2317 }, { "epoch": 0.19, "grad_norm": 1.5861860823822622, "learning_rate": 9.379659738026812e-06, "loss": 0.294, "step": 2318 }, { "epoch": 0.19, "grad_norm": 1.2863509195412839, "learning_rate": 9.379034595867062e-06, "loss": 0.2004, "step": 2319 }, { "epoch": 0.19, "grad_norm": 1.3201300150206785, "learning_rate": 9.378409159729454e-06, "loss": 0.178, "step": 2320 }, { "epoch": 0.19, "grad_norm": 1.3339717944515472, "learning_rate": 9.37778342965597e-06, "loss": 0.2096, "step": 2321 }, { "epoch": 0.19, "grad_norm": 1.530623793273264, "learning_rate": 9.377157405688622e-06, "loss": 0.2359, "step": 2322 }, { "epoch": 0.19, "grad_norm": 1.5981000538984693, "learning_rate": 9.376531087869435e-06, "loss": 0.2488, "step": 2323 }, { "epoch": 0.19, "grad_norm": 5.562309712028729, "learning_rate": 9.375904476240457e-06, "loss": 0.4018, "step": 2324 }, { "epoch": 0.19, "grad_norm": 1.4080207935052003, "learning_rate": 9.37527757084375e-06, "loss": 0.2659, "step": 2325 }, { "epoch": 0.19, "grad_norm": 1.4912483791855708, "learning_rate": 9.374650371721405e-06, "loss": 0.2034, "step": 2326 }, { "epoch": 0.19, "grad_norm": 1.3779006290994664, "learning_rate": 9.374022878915525e-06, "loss": 0.2517, "step": 2327 }, { "epoch": 0.19, "grad_norm": 1.5894219295573149, "learning_rate": 9.373395092468238e-06, "loss": 0.2478, "step": 2328 }, { "epoch": 0.19, "grad_norm": 1.3681202981992624, "learning_rate": 9.372767012421687e-06, "loss": 0.2095, "step": 2329 }, { "epoch": 0.19, "grad_norm": 1.6157463860274324, "learning_rate": 9.372138638818036e-06, "loss": 0.242, "step": 2330 }, { "epoch": 0.19, "grad_norm": 9.171450320342379, "learning_rate": 9.371509971699471e-06, "loss": 0.6706, "step": 2331 }, { "epoch": 0.19, "grad_norm": 1.3579047532050663, "learning_rate": 9.370881011108198e-06, "loss": 0.1917, "step": 2332 }, { "epoch": 0.19, "grad_norm": 1.6272202671548452, "learning_rate": 9.370251757086439e-06, "loss": 0.2427, "step": 2333 }, { "epoch": 0.19, "grad_norm": 1.43516458105959, "learning_rate": 9.36962220967644e-06, "loss": 0.1965, "step": 2334 }, { "epoch": 0.19, "grad_norm": 1.5127317609894841, "learning_rate": 9.36899236892046e-06, "loss": 0.1895, "step": 2335 }, { "epoch": 0.19, "grad_norm": 1.479339381212489, "learning_rate": 9.368362234860785e-06, "loss": 0.2413, "step": 2336 }, { "epoch": 0.19, "grad_norm": 1.3589742055044967, "learning_rate": 9.36773180753972e-06, "loss": 0.2012, "step": 2337 }, { "epoch": 0.19, "grad_norm": 1.3593057048126937, "learning_rate": 9.367101086999582e-06, "loss": 0.2046, "step": 2338 }, { "epoch": 0.19, "grad_norm": 9.781549597069835, "learning_rate": 9.366470073282718e-06, "loss": 0.6956, "step": 2339 }, { "epoch": 0.19, "grad_norm": 1.4917263355565447, "learning_rate": 9.365838766431487e-06, "loss": 0.2498, "step": 2340 }, { "epoch": 0.19, "grad_norm": 1.4735055991746548, "learning_rate": 9.365207166488273e-06, "loss": 0.2203, "step": 2341 }, { "epoch": 0.19, "grad_norm": 1.390455833402385, "learning_rate": 9.364575273495475e-06, "loss": 0.1999, "step": 2342 }, { "epoch": 0.19, "grad_norm": 1.3652222123307436, "learning_rate": 9.363943087495515e-06, "loss": 0.2049, "step": 2343 }, { "epoch": 0.19, "grad_norm": 1.5163772394091823, "learning_rate": 9.363310608530834e-06, "loss": 0.2051, "step": 2344 }, { "epoch": 0.19, "grad_norm": 1.2999708797126925, "learning_rate": 9.36267783664389e-06, "loss": 0.2099, "step": 2345 }, { "epoch": 0.19, "grad_norm": 1.3123219392463945, "learning_rate": 9.362044771877164e-06, "loss": 0.2149, "step": 2346 }, { "epoch": 0.19, "grad_norm": 1.3367676114336713, "learning_rate": 9.361411414273159e-06, "loss": 0.1979, "step": 2347 }, { "epoch": 0.19, "grad_norm": 1.3605132964306763, "learning_rate": 9.360777763874389e-06, "loss": 0.1637, "step": 2348 }, { "epoch": 0.19, "grad_norm": 1.5266572955723654, "learning_rate": 9.360143820723395e-06, "loss": 0.2209, "step": 2349 }, { "epoch": 0.19, "grad_norm": 1.5178431709728113, "learning_rate": 9.359509584862735e-06, "loss": 0.232, "step": 2350 }, { "epoch": 0.19, "grad_norm": 1.4796228238231883, "learning_rate": 9.35887505633499e-06, "loss": 0.2516, "step": 2351 }, { "epoch": 0.19, "grad_norm": 8.176693194648145, "learning_rate": 9.358240235182754e-06, "loss": 0.765, "step": 2352 }, { "epoch": 0.19, "grad_norm": 1.4913809824643796, "learning_rate": 9.357605121448648e-06, "loss": 0.2191, "step": 2353 }, { "epoch": 0.19, "grad_norm": 1.4568052241267102, "learning_rate": 9.356969715175305e-06, "loss": 0.2404, "step": 2354 }, { "epoch": 0.19, "grad_norm": 1.3235636555415984, "learning_rate": 9.356334016405383e-06, "loss": 0.2379, "step": 2355 }, { "epoch": 0.19, "grad_norm": 1.3861266394940404, "learning_rate": 9.355698025181561e-06, "loss": 0.2152, "step": 2356 }, { "epoch": 0.19, "grad_norm": 1.4010636261726943, "learning_rate": 9.355061741546533e-06, "loss": 0.2076, "step": 2357 }, { "epoch": 0.19, "grad_norm": 1.615820801241209, "learning_rate": 9.354425165543018e-06, "loss": 0.2133, "step": 2358 }, { "epoch": 0.19, "grad_norm": 1.4619290477397322, "learning_rate": 9.353788297213743e-06, "loss": 0.2533, "step": 2359 }, { "epoch": 0.19, "grad_norm": 1.5089855513715553, "learning_rate": 9.35315113660147e-06, "loss": 0.1853, "step": 2360 }, { "epoch": 0.19, "grad_norm": 1.4269663744798267, "learning_rate": 9.352513683748974e-06, "loss": 0.2541, "step": 2361 }, { "epoch": 0.19, "grad_norm": 1.4722174695678938, "learning_rate": 9.351875938699045e-06, "loss": 0.1752, "step": 2362 }, { "epoch": 0.19, "grad_norm": 1.518522418805461, "learning_rate": 9.351237901494498e-06, "loss": 0.2423, "step": 2363 }, { "epoch": 0.19, "grad_norm": 1.5362570190042253, "learning_rate": 9.35059957217817e-06, "loss": 0.2129, "step": 2364 }, { "epoch": 0.19, "grad_norm": 5.605574892206847, "learning_rate": 9.349960950792907e-06, "loss": 0.8395, "step": 2365 }, { "epoch": 0.19, "grad_norm": 1.543283674797875, "learning_rate": 9.349322037381587e-06, "loss": 0.2219, "step": 2366 }, { "epoch": 0.19, "grad_norm": 1.3981806754507369, "learning_rate": 9.348682831987101e-06, "loss": 0.25, "step": 2367 }, { "epoch": 0.19, "grad_norm": 9.909370059832323, "learning_rate": 9.348043334652362e-06, "loss": 0.7322, "step": 2368 }, { "epoch": 0.19, "grad_norm": 1.4107707031599575, "learning_rate": 9.347403545420298e-06, "loss": 0.2354, "step": 2369 }, { "epoch": 0.19, "grad_norm": 1.3231843110150259, "learning_rate": 9.346763464333862e-06, "loss": 0.1833, "step": 2370 }, { "epoch": 0.19, "grad_norm": 1.4673679706484994, "learning_rate": 9.346123091436024e-06, "loss": 0.2214, "step": 2371 }, { "epoch": 0.19, "grad_norm": 4.538997517146819, "learning_rate": 9.345482426769774e-06, "loss": 0.6032, "step": 2372 }, { "epoch": 0.19, "grad_norm": 1.265264994960151, "learning_rate": 9.344841470378125e-06, "loss": 0.1959, "step": 2373 }, { "epoch": 0.19, "grad_norm": 4.642179043237784, "learning_rate": 9.344200222304103e-06, "loss": 0.6994, "step": 2374 }, { "epoch": 0.19, "grad_norm": 5.462389335451799, "learning_rate": 9.343558682590757e-06, "loss": 0.7546, "step": 2375 }, { "epoch": 0.19, "grad_norm": 1.406716242992927, "learning_rate": 9.342916851281155e-06, "loss": 0.2443, "step": 2376 }, { "epoch": 0.19, "grad_norm": 1.6166901826026026, "learning_rate": 9.342274728418388e-06, "loss": 0.2683, "step": 2377 }, { "epoch": 0.19, "grad_norm": 1.4185659848446213, "learning_rate": 9.34163231404556e-06, "loss": 0.2835, "step": 2378 }, { "epoch": 0.19, "grad_norm": 1.5418184941791382, "learning_rate": 9.340989608205803e-06, "loss": 0.2707, "step": 2379 }, { "epoch": 0.19, "grad_norm": 1.370730856207796, "learning_rate": 9.34034661094226e-06, "loss": 0.2198, "step": 2380 }, { "epoch": 0.19, "grad_norm": 1.5013327735301913, "learning_rate": 9.339703322298098e-06, "loss": 0.2614, "step": 2381 }, { "epoch": 0.19, "grad_norm": 1.693488511571179, "learning_rate": 9.339059742316501e-06, "loss": 0.1875, "step": 2382 }, { "epoch": 0.19, "grad_norm": 1.2448175965719483, "learning_rate": 9.33841587104068e-06, "loss": 0.17, "step": 2383 }, { "epoch": 0.19, "grad_norm": 5.814135101473496, "learning_rate": 9.337771708513854e-06, "loss": 0.579, "step": 2384 }, { "epoch": 0.19, "grad_norm": 1.395920181242542, "learning_rate": 9.337127254779272e-06, "loss": 0.2448, "step": 2385 }, { "epoch": 0.19, "grad_norm": 1.4566858342891218, "learning_rate": 9.336482509880195e-06, "loss": 0.2559, "step": 2386 }, { "epoch": 0.19, "grad_norm": 1.4415964968113857, "learning_rate": 9.33583747385991e-06, "loss": 0.2558, "step": 2387 }, { "epoch": 0.19, "grad_norm": 1.2779400455329433, "learning_rate": 9.335192146761717e-06, "loss": 0.2052, "step": 2388 }, { "epoch": 0.19, "grad_norm": 1.5236088643087702, "learning_rate": 9.334546528628942e-06, "loss": 0.2316, "step": 2389 }, { "epoch": 0.19, "grad_norm": 1.4265296924486541, "learning_rate": 9.333900619504923e-06, "loss": 0.2248, "step": 2390 }, { "epoch": 0.19, "grad_norm": 1.3575410438730116, "learning_rate": 9.333254419433026e-06, "loss": 0.2378, "step": 2391 }, { "epoch": 0.19, "grad_norm": 8.392596317042315, "learning_rate": 9.332607928456629e-06, "loss": 0.5604, "step": 2392 }, { "epoch": 0.19, "grad_norm": 1.3420621854942743, "learning_rate": 9.331961146619135e-06, "loss": 0.2204, "step": 2393 }, { "epoch": 0.19, "grad_norm": 1.5032934531910074, "learning_rate": 9.331314073963964e-06, "loss": 0.2629, "step": 2394 }, { "epoch": 0.19, "grad_norm": 1.400943558165862, "learning_rate": 9.330666710534556e-06, "loss": 0.1968, "step": 2395 }, { "epoch": 0.19, "grad_norm": 1.3863295774574946, "learning_rate": 9.33001905637437e-06, "loss": 0.2261, "step": 2396 }, { "epoch": 0.19, "grad_norm": 1.379746568261968, "learning_rate": 9.329371111526887e-06, "loss": 0.2214, "step": 2397 }, { "epoch": 0.19, "grad_norm": 7.478170615036254, "learning_rate": 9.3287228760356e-06, "loss": 0.4555, "step": 2398 }, { "epoch": 0.19, "grad_norm": 1.3270583989416682, "learning_rate": 9.328074349944034e-06, "loss": 0.1948, "step": 2399 }, { "epoch": 0.19, "grad_norm": 1.583722019108332, "learning_rate": 9.327425533295725e-06, "loss": 0.2086, "step": 2400 }, { "epoch": 0.19, "grad_norm": 1.5418714702927594, "learning_rate": 9.326776426134223e-06, "loss": 0.2411, "step": 2401 }, { "epoch": 0.19, "grad_norm": 1.5368044413403081, "learning_rate": 9.326127028503114e-06, "loss": 0.2847, "step": 2402 }, { "epoch": 0.19, "grad_norm": 1.2989173478732807, "learning_rate": 9.325477340445989e-06, "loss": 0.1856, "step": 2403 }, { "epoch": 0.19, "grad_norm": 1.4757640132810568, "learning_rate": 9.324827362006464e-06, "loss": 0.2596, "step": 2404 }, { "epoch": 0.19, "grad_norm": 1.411318082042781, "learning_rate": 9.324177093228175e-06, "loss": 0.2246, "step": 2405 }, { "epoch": 0.19, "grad_norm": 1.251331714601607, "learning_rate": 9.323526534154775e-06, "loss": 0.1533, "step": 2406 }, { "epoch": 0.19, "grad_norm": 1.2692964951694725, "learning_rate": 9.32287568482994e-06, "loss": 0.163, "step": 2407 }, { "epoch": 0.19, "grad_norm": 1.3492730199877705, "learning_rate": 9.322224545297363e-06, "loss": 0.2036, "step": 2408 }, { "epoch": 0.19, "grad_norm": 7.550025497252298, "learning_rate": 9.321573115600755e-06, "loss": 0.6656, "step": 2409 }, { "epoch": 0.19, "grad_norm": 1.3919978756719757, "learning_rate": 9.32092139578385e-06, "loss": 0.2217, "step": 2410 }, { "epoch": 0.19, "grad_norm": 1.4727147054007221, "learning_rate": 9.3202693858904e-06, "loss": 0.2358, "step": 2411 }, { "epoch": 0.19, "grad_norm": 1.5640289860016316, "learning_rate": 9.319617085964177e-06, "loss": 0.2474, "step": 2412 }, { "epoch": 0.19, "grad_norm": 1.2609489212262928, "learning_rate": 9.31896449604897e-06, "loss": 0.1669, "step": 2413 }, { "epoch": 0.19, "grad_norm": 1.3274323230598555, "learning_rate": 9.318311616188592e-06, "loss": 0.2419, "step": 2414 }, { "epoch": 0.19, "grad_norm": 1.6269479979697206, "learning_rate": 9.31765844642687e-06, "loss": 0.231, "step": 2415 }, { "epoch": 0.19, "grad_norm": 1.434332059359609, "learning_rate": 9.317004986807656e-06, "loss": 0.2378, "step": 2416 }, { "epoch": 0.19, "grad_norm": 1.4898402002445745, "learning_rate": 9.316351237374816e-06, "loss": 0.2019, "step": 2417 }, { "epoch": 0.19, "grad_norm": 1.340695080988302, "learning_rate": 9.315697198172238e-06, "loss": 0.2032, "step": 2418 }, { "epoch": 0.19, "grad_norm": 1.3219477445287229, "learning_rate": 9.315042869243833e-06, "loss": 0.2007, "step": 2419 }, { "epoch": 0.19, "grad_norm": 1.4046389398510306, "learning_rate": 9.314388250633526e-06, "loss": 0.2562, "step": 2420 }, { "epoch": 0.19, "grad_norm": 6.021375975183638, "learning_rate": 9.313733342385263e-06, "loss": 0.5803, "step": 2421 }, { "epoch": 0.19, "grad_norm": 4.897434045292936, "learning_rate": 9.313078144543012e-06, "loss": 0.6076, "step": 2422 }, { "epoch": 0.19, "grad_norm": 1.4713017046220924, "learning_rate": 9.312422657150755e-06, "loss": 0.2091, "step": 2423 }, { "epoch": 0.19, "grad_norm": 1.4982149910403204, "learning_rate": 9.3117668802525e-06, "loss": 0.236, "step": 2424 }, { "epoch": 0.19, "grad_norm": 1.4617716221070527, "learning_rate": 9.31111081389227e-06, "loss": 0.2523, "step": 2425 }, { "epoch": 0.19, "grad_norm": 1.298663219149016, "learning_rate": 9.31045445811411e-06, "loss": 0.1994, "step": 2426 }, { "epoch": 0.19, "grad_norm": 1.4004326854239149, "learning_rate": 9.30979781296208e-06, "loss": 0.2307, "step": 2427 }, { "epoch": 0.19, "grad_norm": 1.3994074400141987, "learning_rate": 9.309140878480267e-06, "loss": 0.2443, "step": 2428 }, { "epoch": 0.19, "grad_norm": 1.381963114550332, "learning_rate": 9.308483654712769e-06, "loss": 0.2398, "step": 2429 }, { "epoch": 0.19, "grad_norm": 1.3301105150205204, "learning_rate": 9.30782614170371e-06, "loss": 0.2047, "step": 2430 }, { "epoch": 0.19, "grad_norm": 1.6639510373136035, "learning_rate": 9.30716833949723e-06, "loss": 0.2514, "step": 2431 }, { "epoch": 0.19, "grad_norm": 1.4682586644623121, "learning_rate": 9.306510248137488e-06, "loss": 0.2118, "step": 2432 }, { "epoch": 0.19, "grad_norm": 1.4232291572592344, "learning_rate": 9.305851867668665e-06, "loss": 0.2084, "step": 2433 }, { "epoch": 0.19, "grad_norm": 1.379806377695181, "learning_rate": 9.305193198134962e-06, "loss": 0.2139, "step": 2434 }, { "epoch": 0.19, "grad_norm": 1.5516557437166802, "learning_rate": 9.304534239580591e-06, "loss": 0.2596, "step": 2435 }, { "epoch": 0.19, "grad_norm": 7.983878102860816, "learning_rate": 9.303874992049797e-06, "loss": 0.5263, "step": 2436 }, { "epoch": 0.19, "grad_norm": 1.5214073112722155, "learning_rate": 9.303215455586835e-06, "loss": 0.2192, "step": 2437 }, { "epoch": 0.2, "grad_norm": 10.280652021015761, "learning_rate": 9.30255563023598e-06, "loss": 0.7436, "step": 2438 }, { "epoch": 0.2, "grad_norm": 6.044753879829271, "learning_rate": 9.301895516041531e-06, "loss": 0.5821, "step": 2439 }, { "epoch": 0.2, "grad_norm": 1.6366410619375933, "learning_rate": 9.301235113047801e-06, "loss": 0.2705, "step": 2440 }, { "epoch": 0.2, "grad_norm": 1.4917982027921104, "learning_rate": 9.300574421299127e-06, "loss": 0.2394, "step": 2441 }, { "epoch": 0.2, "grad_norm": 1.3845547234589197, "learning_rate": 9.299913440839859e-06, "loss": 0.2604, "step": 2442 }, { "epoch": 0.2, "grad_norm": 1.4219534833849874, "learning_rate": 9.299252171714374e-06, "loss": 0.2287, "step": 2443 }, { "epoch": 0.2, "grad_norm": 1.349023961813594, "learning_rate": 9.298590613967067e-06, "loss": 0.194, "step": 2444 }, { "epoch": 0.2, "grad_norm": 1.5584438281215978, "learning_rate": 9.297928767642346e-06, "loss": 0.2296, "step": 2445 }, { "epoch": 0.2, "grad_norm": 1.443518436770763, "learning_rate": 9.297266632784646e-06, "loss": 0.1946, "step": 2446 }, { "epoch": 0.2, "grad_norm": 1.4453125077668862, "learning_rate": 9.296604209438414e-06, "loss": 0.2371, "step": 2447 }, { "epoch": 0.2, "grad_norm": 1.481652828024523, "learning_rate": 9.295941497648125e-06, "loss": 0.2513, "step": 2448 }, { "epoch": 0.2, "grad_norm": 1.3661590584993701, "learning_rate": 9.295278497458266e-06, "loss": 0.2215, "step": 2449 }, { "epoch": 0.2, "grad_norm": 1.4871967471239442, "learning_rate": 9.29461520891335e-06, "loss": 0.2394, "step": 2450 }, { "epoch": 0.2, "grad_norm": 1.3775037658924878, "learning_rate": 9.2939516320579e-06, "loss": 0.2006, "step": 2451 }, { "epoch": 0.2, "grad_norm": 1.5973772895003122, "learning_rate": 9.293287766936469e-06, "loss": 0.2695, "step": 2452 }, { "epoch": 0.2, "grad_norm": 14.26604038601165, "learning_rate": 9.29262361359362e-06, "loss": 0.8148, "step": 2453 }, { "epoch": 0.2, "grad_norm": 1.5326213212465247, "learning_rate": 9.291959172073943e-06, "loss": 0.2379, "step": 2454 }, { "epoch": 0.2, "grad_norm": 1.5089898058918714, "learning_rate": 9.291294442422043e-06, "loss": 0.2548, "step": 2455 }, { "epoch": 0.2, "grad_norm": 1.3671669653549101, "learning_rate": 9.290629424682543e-06, "loss": 0.1762, "step": 2456 }, { "epoch": 0.2, "grad_norm": 7.5413106007252715, "learning_rate": 9.289964118900092e-06, "loss": 0.5311, "step": 2457 }, { "epoch": 0.2, "grad_norm": 1.2930332947379293, "learning_rate": 9.28929852511935e-06, "loss": 0.2378, "step": 2458 }, { "epoch": 0.2, "grad_norm": 1.5489373036725944, "learning_rate": 9.288632643385002e-06, "loss": 0.2963, "step": 2459 }, { "epoch": 0.2, "grad_norm": 1.5187989938325832, "learning_rate": 9.287966473741752e-06, "loss": 0.2195, "step": 2460 }, { "epoch": 0.2, "grad_norm": 1.4999921469494621, "learning_rate": 9.28730001623432e-06, "loss": 0.2291, "step": 2461 }, { "epoch": 0.2, "grad_norm": 1.4293275251756687, "learning_rate": 9.286633270907448e-06, "loss": 0.261, "step": 2462 }, { "epoch": 0.2, "grad_norm": 1.4513720467703264, "learning_rate": 9.285966237805895e-06, "loss": 0.2387, "step": 2463 }, { "epoch": 0.2, "grad_norm": 6.5353417128643265, "learning_rate": 9.285298916974443e-06, "loss": 0.5325, "step": 2464 }, { "epoch": 0.2, "grad_norm": 1.386332466672197, "learning_rate": 9.284631308457892e-06, "loss": 0.2425, "step": 2465 }, { "epoch": 0.2, "grad_norm": 1.296393099229109, "learning_rate": 9.283963412301058e-06, "loss": 0.2506, "step": 2466 }, { "epoch": 0.2, "grad_norm": 1.5184022975963396, "learning_rate": 9.28329522854878e-06, "loss": 0.2394, "step": 2467 }, { "epoch": 0.2, "grad_norm": 1.4006631701857264, "learning_rate": 9.282626757245918e-06, "loss": 0.2258, "step": 2468 }, { "epoch": 0.2, "grad_norm": 1.5533267007148088, "learning_rate": 9.281957998437345e-06, "loss": 0.2375, "step": 2469 }, { "epoch": 0.2, "grad_norm": 1.4336727371214546, "learning_rate": 9.281288952167957e-06, "loss": 0.2341, "step": 2470 }, { "epoch": 0.2, "grad_norm": 1.4939816028297657, "learning_rate": 9.28061961848267e-06, "loss": 0.2024, "step": 2471 }, { "epoch": 0.2, "grad_norm": 1.2816606239333097, "learning_rate": 9.279949997426417e-06, "loss": 0.1958, "step": 2472 }, { "epoch": 0.2, "grad_norm": 1.4047626002456695, "learning_rate": 9.279280089044155e-06, "loss": 0.2161, "step": 2473 }, { "epoch": 0.2, "grad_norm": 1.4218538824100064, "learning_rate": 9.278609893380855e-06, "loss": 0.2159, "step": 2474 }, { "epoch": 0.2, "grad_norm": 1.4709704124435377, "learning_rate": 9.277939410481507e-06, "loss": 0.2203, "step": 2475 }, { "epoch": 0.2, "grad_norm": 1.6435936336197325, "learning_rate": 9.277268640391126e-06, "loss": 0.2541, "step": 2476 }, { "epoch": 0.2, "grad_norm": 1.4611238593241196, "learning_rate": 9.27659758315474e-06, "loss": 0.1793, "step": 2477 }, { "epoch": 0.2, "grad_norm": 6.168397303915047, "learning_rate": 9.275926238817404e-06, "loss": 0.5371, "step": 2478 }, { "epoch": 0.2, "grad_norm": 1.3939953515914356, "learning_rate": 9.275254607424182e-06, "loss": 0.2667, "step": 2479 }, { "epoch": 0.2, "grad_norm": 1.5090912411164825, "learning_rate": 9.274582689020164e-06, "loss": 0.2199, "step": 2480 }, { "epoch": 0.2, "grad_norm": 1.470917441778964, "learning_rate": 9.273910483650461e-06, "loss": 0.2479, "step": 2481 }, { "epoch": 0.2, "grad_norm": 8.60024886734015, "learning_rate": 9.273237991360196e-06, "loss": 0.5251, "step": 2482 }, { "epoch": 0.2, "grad_norm": 1.5415147891786058, "learning_rate": 9.272565212194517e-06, "loss": 0.2728, "step": 2483 }, { "epoch": 0.2, "grad_norm": 1.2115858625178058, "learning_rate": 9.271892146198591e-06, "loss": 0.1948, "step": 2484 }, { "epoch": 0.2, "grad_norm": 1.5446657033659177, "learning_rate": 9.271218793417601e-06, "loss": 0.2336, "step": 2485 }, { "epoch": 0.2, "grad_norm": 1.2484030724617678, "learning_rate": 9.270545153896752e-06, "loss": 0.1666, "step": 2486 }, { "epoch": 0.2, "grad_norm": 1.3862724651901703, "learning_rate": 9.269871227681268e-06, "loss": 0.1854, "step": 2487 }, { "epoch": 0.2, "grad_norm": 1.4581185061074284, "learning_rate": 9.269197014816393e-06, "loss": 0.2508, "step": 2488 }, { "epoch": 0.2, "grad_norm": 1.435474486172341, "learning_rate": 9.268522515347384e-06, "loss": 0.2383, "step": 2489 }, { "epoch": 0.2, "grad_norm": 1.5731549428676292, "learning_rate": 9.267847729319528e-06, "loss": 0.2536, "step": 2490 }, { "epoch": 0.2, "grad_norm": 1.5507697834875012, "learning_rate": 9.267172656778122e-06, "loss": 0.2129, "step": 2491 }, { "epoch": 0.2, "grad_norm": 1.4830658508463754, "learning_rate": 9.266497297768487e-06, "loss": 0.2002, "step": 2492 }, { "epoch": 0.2, "grad_norm": 1.5309953148650814, "learning_rate": 9.265821652335961e-06, "loss": 0.2475, "step": 2493 }, { "epoch": 0.2, "grad_norm": 1.295659132966027, "learning_rate": 9.265145720525902e-06, "loss": 0.1987, "step": 2494 }, { "epoch": 0.2, "grad_norm": 1.3116497712159687, "learning_rate": 9.264469502383689e-06, "loss": 0.2062, "step": 2495 }, { "epoch": 0.2, "grad_norm": 1.4758729449247856, "learning_rate": 9.263792997954717e-06, "loss": 0.2136, "step": 2496 }, { "epoch": 0.2, "grad_norm": 1.6559882876214413, "learning_rate": 9.263116207284402e-06, "loss": 0.2954, "step": 2497 }, { "epoch": 0.2, "grad_norm": 1.583062641804702, "learning_rate": 9.26243913041818e-06, "loss": 0.2388, "step": 2498 }, { "epoch": 0.2, "grad_norm": 1.4827167848800258, "learning_rate": 9.261761767401503e-06, "loss": 0.2595, "step": 2499 }, { "epoch": 0.2, "grad_norm": 1.4492652267253305, "learning_rate": 9.261084118279846e-06, "loss": 0.2302, "step": 2500 }, { "epoch": 0.2, "grad_norm": 1.4481010435432558, "learning_rate": 9.260406183098704e-06, "loss": 0.2253, "step": 2501 }, { "epoch": 0.2, "grad_norm": 1.4762427947961392, "learning_rate": 9.259727961903584e-06, "loss": 0.2946, "step": 2502 }, { "epoch": 0.2, "grad_norm": 1.4258686627160804, "learning_rate": 9.25904945474002e-06, "loss": 0.2424, "step": 2503 }, { "epoch": 0.2, "grad_norm": 1.4322695557753122, "learning_rate": 9.258370661653563e-06, "loss": 0.2496, "step": 2504 }, { "epoch": 0.2, "grad_norm": 1.3557002571171304, "learning_rate": 9.25769158268978e-06, "loss": 0.2079, "step": 2505 }, { "epoch": 0.2, "grad_norm": 1.503981765288857, "learning_rate": 9.257012217894261e-06, "loss": 0.2397, "step": 2506 }, { "epoch": 0.2, "grad_norm": 7.804371997039915, "learning_rate": 9.256332567312614e-06, "loss": 0.7001, "step": 2507 }, { "epoch": 0.2, "grad_norm": 1.3999608098437997, "learning_rate": 9.255652630990464e-06, "loss": 0.2017, "step": 2508 }, { "epoch": 0.2, "grad_norm": 1.567890353838836, "learning_rate": 9.25497240897346e-06, "loss": 0.2553, "step": 2509 }, { "epoch": 0.2, "grad_norm": 8.398474103937861, "learning_rate": 9.254291901307267e-06, "loss": 0.4961, "step": 2510 }, { "epoch": 0.2, "grad_norm": 1.4145036263527753, "learning_rate": 9.253611108037566e-06, "loss": 0.2337, "step": 2511 }, { "epoch": 0.2, "grad_norm": 5.482965372595709, "learning_rate": 9.252930029210066e-06, "loss": 0.6682, "step": 2512 }, { "epoch": 0.2, "grad_norm": 1.4316685741927988, "learning_rate": 9.252248664870486e-06, "loss": 0.2423, "step": 2513 }, { "epoch": 0.2, "grad_norm": 1.5229130817369538, "learning_rate": 9.25156701506457e-06, "loss": 0.2223, "step": 2514 }, { "epoch": 0.2, "grad_norm": 1.520685788776464, "learning_rate": 9.250885079838079e-06, "loss": 0.2363, "step": 2515 }, { "epoch": 0.2, "grad_norm": 1.442688848302474, "learning_rate": 9.250202859236792e-06, "loss": 0.1794, "step": 2516 }, { "epoch": 0.2, "grad_norm": 1.465943062395698, "learning_rate": 9.249520353306509e-06, "loss": 0.225, "step": 2517 }, { "epoch": 0.2, "grad_norm": 1.6340684080426102, "learning_rate": 9.248837562093049e-06, "loss": 0.2246, "step": 2518 }, { "epoch": 0.2, "grad_norm": 1.5159266674273277, "learning_rate": 9.24815448564225e-06, "loss": 0.2088, "step": 2519 }, { "epoch": 0.2, "grad_norm": 1.4442845542231468, "learning_rate": 9.24747112399997e-06, "loss": 0.2357, "step": 2520 }, { "epoch": 0.2, "grad_norm": 1.4838230499467093, "learning_rate": 9.246787477212085e-06, "loss": 0.2464, "step": 2521 }, { "epoch": 0.2, "grad_norm": 6.652805673648436, "learning_rate": 9.246103545324488e-06, "loss": 0.6228, "step": 2522 }, { "epoch": 0.2, "grad_norm": 8.879983798196003, "learning_rate": 9.245419328383095e-06, "loss": 0.5753, "step": 2523 }, { "epoch": 0.2, "grad_norm": 1.3775244886299194, "learning_rate": 9.244734826433839e-06, "loss": 0.1972, "step": 2524 }, { "epoch": 0.2, "grad_norm": 1.381158484000899, "learning_rate": 9.244050039522673e-06, "loss": 0.2453, "step": 2525 }, { "epoch": 0.2, "grad_norm": 1.3231891360993122, "learning_rate": 9.243364967695569e-06, "loss": 0.2025, "step": 2526 }, { "epoch": 0.2, "grad_norm": 6.68637547598664, "learning_rate": 9.242679610998519e-06, "loss": 0.4897, "step": 2527 }, { "epoch": 0.2, "grad_norm": 1.413382560534626, "learning_rate": 9.241993969477531e-06, "loss": 0.2552, "step": 2528 }, { "epoch": 0.2, "grad_norm": 1.3935434394329702, "learning_rate": 9.241308043178635e-06, "loss": 0.2256, "step": 2529 }, { "epoch": 0.2, "grad_norm": 1.5074881432870122, "learning_rate": 9.24062183214788e-06, "loss": 0.2063, "step": 2530 }, { "epoch": 0.2, "grad_norm": 1.4281207441465014, "learning_rate": 9.239935336431331e-06, "loss": 0.1833, "step": 2531 }, { "epoch": 0.2, "grad_norm": 8.451110614104323, "learning_rate": 9.23924855607508e-06, "loss": 0.683, "step": 2532 }, { "epoch": 0.2, "grad_norm": 1.3517574261227974, "learning_rate": 9.238561491125225e-06, "loss": 0.1941, "step": 2533 }, { "epoch": 0.2, "grad_norm": 1.5224681150369972, "learning_rate": 9.237874141627896e-06, "loss": 0.2488, "step": 2534 }, { "epoch": 0.2, "grad_norm": 1.3467341771054333, "learning_rate": 9.237186507629236e-06, "loss": 0.2296, "step": 2535 }, { "epoch": 0.2, "grad_norm": 9.905310591288163, "learning_rate": 9.236498589175408e-06, "loss": 0.4675, "step": 2536 }, { "epoch": 0.2, "grad_norm": 1.4362406417222504, "learning_rate": 9.235810386312594e-06, "loss": 0.2257, "step": 2537 }, { "epoch": 0.2, "grad_norm": 1.3801983119520267, "learning_rate": 9.235121899086994e-06, "loss": 0.1867, "step": 2538 }, { "epoch": 0.2, "grad_norm": 1.4807110335585845, "learning_rate": 9.23443312754483e-06, "loss": 0.1957, "step": 2539 }, { "epoch": 0.2, "grad_norm": 1.4066779495411443, "learning_rate": 9.23374407173234e-06, "loss": 0.2146, "step": 2540 }, { "epoch": 0.2, "grad_norm": 7.5091301509973105, "learning_rate": 9.233054731695782e-06, "loss": 0.6844, "step": 2541 }, { "epoch": 0.2, "grad_norm": 1.4348934563408595, "learning_rate": 9.232365107481433e-06, "loss": 0.2199, "step": 2542 }, { "epoch": 0.2, "grad_norm": 1.433580207343984, "learning_rate": 9.231675199135593e-06, "loss": 0.2568, "step": 2543 }, { "epoch": 0.2, "grad_norm": 1.3547260450710197, "learning_rate": 9.230985006704575e-06, "loss": 0.2678, "step": 2544 }, { "epoch": 0.2, "grad_norm": 1.4039145897607916, "learning_rate": 9.230294530234714e-06, "loss": 0.2108, "step": 2545 }, { "epoch": 0.2, "grad_norm": 1.2697657102508415, "learning_rate": 9.229603769772364e-06, "loss": 0.1939, "step": 2546 }, { "epoch": 0.2, "grad_norm": 1.3245371471331209, "learning_rate": 9.228912725363897e-06, "loss": 0.1914, "step": 2547 }, { "epoch": 0.2, "grad_norm": 1.3372273214265753, "learning_rate": 9.228221397055705e-06, "loss": 0.2196, "step": 2548 }, { "epoch": 0.2, "grad_norm": 1.3373591056905139, "learning_rate": 9.2275297848942e-06, "loss": 0.26, "step": 2549 }, { "epoch": 0.2, "grad_norm": 1.393030613664345, "learning_rate": 9.226837888925813e-06, "loss": 0.1993, "step": 2550 }, { "epoch": 0.2, "grad_norm": 1.5892414372315793, "learning_rate": 9.226145709196991e-06, "loss": 0.2455, "step": 2551 }, { "epoch": 0.2, "grad_norm": 1.636981981972357, "learning_rate": 9.225453245754204e-06, "loss": 0.1862, "step": 2552 }, { "epoch": 0.2, "grad_norm": 1.3937873285388753, "learning_rate": 9.224760498643936e-06, "loss": 0.2497, "step": 2553 }, { "epoch": 0.2, "grad_norm": 1.2882393114518456, "learning_rate": 9.224067467912696e-06, "loss": 0.1903, "step": 2554 }, { "epoch": 0.2, "grad_norm": 1.3443596230487904, "learning_rate": 9.22337415360701e-06, "loss": 0.2054, "step": 2555 }, { "epoch": 0.2, "grad_norm": 1.3289269251060674, "learning_rate": 9.222680555773417e-06, "loss": 0.153, "step": 2556 }, { "epoch": 0.2, "grad_norm": 1.484344652326302, "learning_rate": 9.221986674458488e-06, "loss": 0.2689, "step": 2557 }, { "epoch": 0.2, "grad_norm": 6.334389007614257, "learning_rate": 9.221292509708799e-06, "loss": 0.6401, "step": 2558 }, { "epoch": 0.2, "grad_norm": 1.6878209637215262, "learning_rate": 9.220598061570956e-06, "loss": 0.2422, "step": 2559 }, { "epoch": 0.2, "grad_norm": 1.5287373601179137, "learning_rate": 9.219903330091575e-06, "loss": 0.213, "step": 2560 }, { "epoch": 0.2, "grad_norm": 1.3116513394070635, "learning_rate": 9.219208315317298e-06, "loss": 0.2148, "step": 2561 }, { "epoch": 0.2, "grad_norm": 1.378407896851258, "learning_rate": 9.218513017294784e-06, "loss": 0.192, "step": 2562 }, { "epoch": 0.21, "grad_norm": 1.3877394896723168, "learning_rate": 9.217817436070708e-06, "loss": 0.2003, "step": 2563 }, { "epoch": 0.21, "grad_norm": 1.4921635436978344, "learning_rate": 9.21712157169177e-06, "loss": 0.2569, "step": 2564 }, { "epoch": 0.21, "grad_norm": 1.4276512213316017, "learning_rate": 9.216425424204683e-06, "loss": 0.2309, "step": 2565 }, { "epoch": 0.21, "grad_norm": 1.413728043637103, "learning_rate": 9.21572899365618e-06, "loss": 0.2317, "step": 2566 }, { "epoch": 0.21, "grad_norm": 1.4703808340792581, "learning_rate": 9.21503228009302e-06, "loss": 0.2278, "step": 2567 }, { "epoch": 0.21, "grad_norm": 6.465010652761568, "learning_rate": 9.21433528356197e-06, "loss": 0.5517, "step": 2568 }, { "epoch": 0.21, "grad_norm": 1.2985605525229909, "learning_rate": 9.213638004109824e-06, "loss": 0.2071, "step": 2569 }, { "epoch": 0.21, "grad_norm": 1.3773735148111539, "learning_rate": 9.212940441783392e-06, "loss": 0.1815, "step": 2570 }, { "epoch": 0.21, "grad_norm": 1.4510572683152825, "learning_rate": 9.212242596629504e-06, "loss": 0.2322, "step": 2571 }, { "epoch": 0.21, "grad_norm": 1.4896401749045909, "learning_rate": 9.211544468695006e-06, "loss": 0.2305, "step": 2572 }, { "epoch": 0.21, "grad_norm": 1.3620070874134844, "learning_rate": 9.210846058026767e-06, "loss": 0.1904, "step": 2573 }, { "epoch": 0.21, "grad_norm": 1.5302365974957681, "learning_rate": 9.210147364671677e-06, "loss": 0.1975, "step": 2574 }, { "epoch": 0.21, "grad_norm": 1.6015145485481903, "learning_rate": 9.209448388676636e-06, "loss": 0.2202, "step": 2575 }, { "epoch": 0.21, "grad_norm": 1.6048312340272386, "learning_rate": 9.20874913008857e-06, "loss": 0.2767, "step": 2576 }, { "epoch": 0.21, "grad_norm": 1.3804191550252483, "learning_rate": 9.208049588954424e-06, "loss": 0.2175, "step": 2577 }, { "epoch": 0.21, "grad_norm": 1.2798978137197412, "learning_rate": 9.20734976532116e-06, "loss": 0.1978, "step": 2578 }, { "epoch": 0.21, "grad_norm": 1.452676177917455, "learning_rate": 9.206649659235756e-06, "loss": 0.2097, "step": 2579 }, { "epoch": 0.21, "grad_norm": 1.340129562355555, "learning_rate": 9.205949270745217e-06, "loss": 0.1935, "step": 2580 }, { "epoch": 0.21, "grad_norm": 1.4160690244536491, "learning_rate": 9.20524859989656e-06, "loss": 0.2046, "step": 2581 }, { "epoch": 0.21, "grad_norm": 1.3459733460501493, "learning_rate": 9.204547646736821e-06, "loss": 0.2059, "step": 2582 }, { "epoch": 0.21, "grad_norm": 1.3240502547597786, "learning_rate": 9.20384641131306e-06, "loss": 0.203, "step": 2583 }, { "epoch": 0.21, "grad_norm": 1.2869660305761765, "learning_rate": 9.203144893672354e-06, "loss": 0.2396, "step": 2584 }, { "epoch": 0.21, "grad_norm": 1.4802086491720519, "learning_rate": 9.202443093861796e-06, "loss": 0.2241, "step": 2585 }, { "epoch": 0.21, "grad_norm": 17.120619615915853, "learning_rate": 9.201741011928498e-06, "loss": 0.5523, "step": 2586 }, { "epoch": 0.21, "grad_norm": 1.3476667881077649, "learning_rate": 9.201038647919595e-06, "loss": 0.2597, "step": 2587 }, { "epoch": 0.21, "grad_norm": 8.367052989921326, "learning_rate": 9.20033600188224e-06, "loss": 0.8873, "step": 2588 }, { "epoch": 0.21, "grad_norm": 1.3255516252692998, "learning_rate": 9.199633073863603e-06, "loss": 0.2093, "step": 2589 }, { "epoch": 0.21, "grad_norm": 1.3447462584321312, "learning_rate": 9.198929863910874e-06, "loss": 0.1762, "step": 2590 }, { "epoch": 0.21, "grad_norm": 1.2864388853799342, "learning_rate": 9.198226372071259e-06, "loss": 0.1788, "step": 2591 }, { "epoch": 0.21, "grad_norm": 1.6518110873473089, "learning_rate": 9.19752259839199e-06, "loss": 0.2497, "step": 2592 }, { "epoch": 0.21, "grad_norm": 1.4182444108706607, "learning_rate": 9.19681854292031e-06, "loss": 0.1983, "step": 2593 }, { "epoch": 0.21, "grad_norm": 1.434158326272205, "learning_rate": 9.196114205703484e-06, "loss": 0.2311, "step": 2594 }, { "epoch": 0.21, "grad_norm": 1.4521995767763962, "learning_rate": 9.1954095867888e-06, "loss": 0.1879, "step": 2595 }, { "epoch": 0.21, "grad_norm": 1.552286919863675, "learning_rate": 9.194704686223557e-06, "loss": 0.2356, "step": 2596 }, { "epoch": 0.21, "grad_norm": 1.286665369890514, "learning_rate": 9.19399950405508e-06, "loss": 0.1801, "step": 2597 }, { "epoch": 0.21, "grad_norm": 1.3806476549559905, "learning_rate": 9.193294040330709e-06, "loss": 0.1546, "step": 2598 }, { "epoch": 0.21, "grad_norm": 1.7417330117177041, "learning_rate": 9.192588295097801e-06, "loss": 0.2417, "step": 2599 }, { "epoch": 0.21, "grad_norm": 1.3854065599438838, "learning_rate": 9.191882268403743e-06, "loss": 0.2121, "step": 2600 }, { "epoch": 0.21, "grad_norm": 1.484645674153059, "learning_rate": 9.191175960295924e-06, "loss": 0.2646, "step": 2601 }, { "epoch": 0.21, "grad_norm": 1.594624200499128, "learning_rate": 9.190469370821764e-06, "loss": 0.2721, "step": 2602 }, { "epoch": 0.21, "grad_norm": 1.4728863401872578, "learning_rate": 9.189762500028698e-06, "loss": 0.1982, "step": 2603 }, { "epoch": 0.21, "grad_norm": 1.0971855512730813, "learning_rate": 9.189055347964182e-06, "loss": 0.1681, "step": 2604 }, { "epoch": 0.21, "grad_norm": 5.082576177722389, "learning_rate": 9.188347914675689e-06, "loss": 0.6596, "step": 2605 }, { "epoch": 0.21, "grad_norm": 1.3919091198065385, "learning_rate": 9.18764020021071e-06, "loss": 0.276, "step": 2606 }, { "epoch": 0.21, "grad_norm": 1.1776139775140204, "learning_rate": 9.186932204616756e-06, "loss": 0.1789, "step": 2607 }, { "epoch": 0.21, "grad_norm": 1.439106220327655, "learning_rate": 9.186223927941356e-06, "loss": 0.2201, "step": 2608 }, { "epoch": 0.21, "grad_norm": 1.39652171188706, "learning_rate": 9.185515370232062e-06, "loss": 0.1916, "step": 2609 }, { "epoch": 0.21, "grad_norm": 1.5215548583810834, "learning_rate": 9.184806531536438e-06, "loss": 0.2205, "step": 2610 }, { "epoch": 0.21, "grad_norm": 1.7014454615516577, "learning_rate": 9.184097411902072e-06, "loss": 0.2583, "step": 2611 }, { "epoch": 0.21, "grad_norm": 1.3717443332297468, "learning_rate": 9.18338801137657e-06, "loss": 0.1937, "step": 2612 }, { "epoch": 0.21, "grad_norm": 1.2721018329044964, "learning_rate": 9.182678330007556e-06, "loss": 0.2183, "step": 2613 }, { "epoch": 0.21, "grad_norm": 1.3835540132382411, "learning_rate": 9.181968367842674e-06, "loss": 0.2103, "step": 2614 }, { "epoch": 0.21, "grad_norm": 1.511757867687406, "learning_rate": 9.181258124929582e-06, "loss": 0.2402, "step": 2615 }, { "epoch": 0.21, "grad_norm": 6.462402674354687, "learning_rate": 9.180547601315963e-06, "loss": 0.6583, "step": 2616 }, { "epoch": 0.21, "grad_norm": 1.4358484770462532, "learning_rate": 9.17983679704952e-06, "loss": 0.2219, "step": 2617 }, { "epoch": 0.21, "grad_norm": 1.4710993390172478, "learning_rate": 9.179125712177965e-06, "loss": 0.1975, "step": 2618 }, { "epoch": 0.21, "grad_norm": 4.6978926691234, "learning_rate": 9.17841434674904e-06, "loss": 0.4826, "step": 2619 }, { "epoch": 0.21, "grad_norm": 1.6587083948761059, "learning_rate": 9.1777027008105e-06, "loss": 0.2437, "step": 2620 }, { "epoch": 0.21, "grad_norm": 1.6282070275551552, "learning_rate": 9.17699077441012e-06, "loss": 0.2709, "step": 2621 }, { "epoch": 0.21, "grad_norm": 1.4686786274886592, "learning_rate": 9.176278567595696e-06, "loss": 0.2582, "step": 2622 }, { "epoch": 0.21, "grad_norm": 1.4275957389410845, "learning_rate": 9.175566080415036e-06, "loss": 0.2079, "step": 2623 }, { "epoch": 0.21, "grad_norm": 8.175196419461104, "learning_rate": 9.174853312915972e-06, "loss": 0.7449, "step": 2624 }, { "epoch": 0.21, "grad_norm": 1.459219148183228, "learning_rate": 9.174140265146356e-06, "loss": 0.2057, "step": 2625 }, { "epoch": 0.21, "grad_norm": 1.4367320719948615, "learning_rate": 9.173426937154058e-06, "loss": 0.2265, "step": 2626 }, { "epoch": 0.21, "grad_norm": 1.4388902604254572, "learning_rate": 9.172713328986965e-06, "loss": 0.2387, "step": 2627 }, { "epoch": 0.21, "grad_norm": 1.5414812851652706, "learning_rate": 9.171999440692982e-06, "loss": 0.2482, "step": 2628 }, { "epoch": 0.21, "grad_norm": 1.3528337947362474, "learning_rate": 9.17128527232004e-06, "loss": 0.1658, "step": 2629 }, { "epoch": 0.21, "grad_norm": 1.179402885857567, "learning_rate": 9.170570823916074e-06, "loss": 0.1978, "step": 2630 }, { "epoch": 0.21, "grad_norm": 1.481990131493022, "learning_rate": 9.169856095529055e-06, "loss": 0.2221, "step": 2631 }, { "epoch": 0.21, "grad_norm": 1.4284265168141148, "learning_rate": 9.16914108720696e-06, "loss": 0.2059, "step": 2632 }, { "epoch": 0.21, "grad_norm": 1.5474644091835987, "learning_rate": 9.168425798997794e-06, "loss": 0.2911, "step": 2633 }, { "epoch": 0.21, "grad_norm": 1.3544932070032691, "learning_rate": 9.167710230949573e-06, "loss": 0.2489, "step": 2634 }, { "epoch": 0.21, "grad_norm": 1.4038325987323266, "learning_rate": 9.166994383110338e-06, "loss": 0.2426, "step": 2635 }, { "epoch": 0.21, "grad_norm": 1.4927236396525574, "learning_rate": 9.166278255528143e-06, "loss": 0.2086, "step": 2636 }, { "epoch": 0.21, "grad_norm": 1.4786421653615467, "learning_rate": 9.165561848251066e-06, "loss": 0.257, "step": 2637 }, { "epoch": 0.21, "grad_norm": 1.494964907518733, "learning_rate": 9.164845161327203e-06, "loss": 0.2439, "step": 2638 }, { "epoch": 0.21, "grad_norm": 1.4914512590800875, "learning_rate": 9.164128194804663e-06, "loss": 0.2487, "step": 2639 }, { "epoch": 0.21, "grad_norm": 5.213346824093523, "learning_rate": 9.16341094873158e-06, "loss": 0.4918, "step": 2640 }, { "epoch": 0.21, "grad_norm": 5.8976864828360265, "learning_rate": 9.162693423156106e-06, "loss": 0.7302, "step": 2641 }, { "epoch": 0.21, "grad_norm": 1.293228759486824, "learning_rate": 9.161975618126411e-06, "loss": 0.1933, "step": 2642 }, { "epoch": 0.21, "grad_norm": 1.5208125629577405, "learning_rate": 9.161257533690682e-06, "loss": 0.2141, "step": 2643 }, { "epoch": 0.21, "grad_norm": 7.058882638648034, "learning_rate": 9.160539169897126e-06, "loss": 0.777, "step": 2644 }, { "epoch": 0.21, "grad_norm": 6.7070982969173425, "learning_rate": 9.15982052679397e-06, "loss": 0.7961, "step": 2645 }, { "epoch": 0.21, "grad_norm": 1.3151563738096888, "learning_rate": 9.15910160442946e-06, "loss": 0.2186, "step": 2646 }, { "epoch": 0.21, "grad_norm": 1.4053964091377618, "learning_rate": 9.158382402851854e-06, "loss": 0.2008, "step": 2647 }, { "epoch": 0.21, "grad_norm": 1.2994142241834374, "learning_rate": 9.15766292210944e-06, "loss": 0.1868, "step": 2648 }, { "epoch": 0.21, "grad_norm": 1.268345972289518, "learning_rate": 9.156943162250516e-06, "loss": 0.1615, "step": 2649 }, { "epoch": 0.21, "grad_norm": 1.5665268742002818, "learning_rate": 9.156223123323405e-06, "loss": 0.2451, "step": 2650 }, { "epoch": 0.21, "grad_norm": 1.4240915616702214, "learning_rate": 9.155502805376439e-06, "loss": 0.2301, "step": 2651 }, { "epoch": 0.21, "grad_norm": 6.917390607353714, "learning_rate": 9.154782208457981e-06, "loss": 0.4922, "step": 2652 }, { "epoch": 0.21, "grad_norm": 1.3034028824088442, "learning_rate": 9.154061332616407e-06, "loss": 0.2254, "step": 2653 }, { "epoch": 0.21, "grad_norm": 1.3376834824067334, "learning_rate": 9.153340177900108e-06, "loss": 0.2231, "step": 2654 }, { "epoch": 0.21, "grad_norm": 1.5048346088825546, "learning_rate": 9.152618744357498e-06, "loss": 0.212, "step": 2655 }, { "epoch": 0.21, "grad_norm": 1.3953346603606998, "learning_rate": 9.15189703203701e-06, "loss": 0.2528, "step": 2656 }, { "epoch": 0.21, "grad_norm": 1.5183701229182094, "learning_rate": 9.151175040987094e-06, "loss": 0.2235, "step": 2657 }, { "epoch": 0.21, "grad_norm": 1.3060666326298913, "learning_rate": 9.15045277125622e-06, "loss": 0.1975, "step": 2658 }, { "epoch": 0.21, "grad_norm": 6.246969409520784, "learning_rate": 9.149730222892876e-06, "loss": 0.668, "step": 2659 }, { "epoch": 0.21, "grad_norm": 1.3545291327540572, "learning_rate": 9.149007395945569e-06, "loss": 0.1894, "step": 2660 }, { "epoch": 0.21, "grad_norm": 1.322329299306653, "learning_rate": 9.148284290462825e-06, "loss": 0.1973, "step": 2661 }, { "epoch": 0.21, "grad_norm": 1.4586395960497665, "learning_rate": 9.147560906493189e-06, "loss": 0.2445, "step": 2662 }, { "epoch": 0.21, "grad_norm": 1.3192365454784591, "learning_rate": 9.14683724408522e-06, "loss": 0.194, "step": 2663 }, { "epoch": 0.21, "grad_norm": 1.412074552048598, "learning_rate": 9.146113303287503e-06, "loss": 0.2587, "step": 2664 }, { "epoch": 0.21, "grad_norm": 1.272525701462181, "learning_rate": 9.14538908414864e-06, "loss": 0.1981, "step": 2665 }, { "epoch": 0.21, "grad_norm": 1.390918664885163, "learning_rate": 9.144664586717246e-06, "loss": 0.2275, "step": 2666 }, { "epoch": 0.21, "grad_norm": 10.878339899546916, "learning_rate": 9.14393981104196e-06, "loss": 0.6393, "step": 2667 }, { "epoch": 0.21, "grad_norm": 1.3502878561911034, "learning_rate": 9.14321475717144e-06, "loss": 0.2382, "step": 2668 }, { "epoch": 0.21, "grad_norm": 1.301719968096471, "learning_rate": 9.14248942515436e-06, "loss": 0.1852, "step": 2669 }, { "epoch": 0.21, "grad_norm": 5.743392928059241, "learning_rate": 9.141763815039413e-06, "loss": 0.705, "step": 2670 }, { "epoch": 0.21, "grad_norm": 1.4748406355115191, "learning_rate": 9.141037926875312e-06, "loss": 0.1962, "step": 2671 }, { "epoch": 0.21, "grad_norm": 5.565445256685354, "learning_rate": 9.140311760710788e-06, "loss": 0.7296, "step": 2672 }, { "epoch": 0.21, "grad_norm": 1.3193624040827834, "learning_rate": 9.139585316594592e-06, "loss": 0.1926, "step": 2673 }, { "epoch": 0.21, "grad_norm": 1.5039408737590663, "learning_rate": 9.13885859457549e-06, "loss": 0.216, "step": 2674 }, { "epoch": 0.21, "grad_norm": 5.101394223123094, "learning_rate": 9.13813159470227e-06, "loss": 0.6383, "step": 2675 }, { "epoch": 0.21, "grad_norm": 1.5459468309760096, "learning_rate": 9.137404317023738e-06, "loss": 0.2287, "step": 2676 }, { "epoch": 0.21, "grad_norm": 1.3902110268349865, "learning_rate": 9.13667676158872e-06, "loss": 0.2405, "step": 2677 }, { "epoch": 0.21, "grad_norm": 1.373716524210969, "learning_rate": 9.135948928446057e-06, "loss": 0.2256, "step": 2678 }, { "epoch": 0.21, "grad_norm": 1.4329366195571578, "learning_rate": 9.13522081764461e-06, "loss": 0.2288, "step": 2679 }, { "epoch": 0.21, "grad_norm": 1.5049253420446151, "learning_rate": 9.134492429233262e-06, "loss": 0.2167, "step": 2680 }, { "epoch": 0.21, "grad_norm": 7.607352752390637, "learning_rate": 9.133763763260907e-06, "loss": 0.5486, "step": 2681 }, { "epoch": 0.21, "grad_norm": 1.347140340316864, "learning_rate": 9.133034819776469e-06, "loss": 0.2268, "step": 2682 }, { "epoch": 0.21, "grad_norm": 1.3720230861670513, "learning_rate": 9.13230559882888e-06, "loss": 0.2061, "step": 2683 }, { "epoch": 0.21, "grad_norm": 1.3937876334363148, "learning_rate": 9.131576100467095e-06, "loss": 0.2057, "step": 2684 }, { "epoch": 0.21, "grad_norm": 1.577816701365336, "learning_rate": 9.130846324740087e-06, "loss": 0.2294, "step": 2685 }, { "epoch": 0.21, "grad_norm": 1.5576769536011323, "learning_rate": 9.130116271696851e-06, "loss": 0.2429, "step": 2686 }, { "epoch": 0.21, "grad_norm": 1.3760401761764731, "learning_rate": 9.129385941386397e-06, "loss": 0.1705, "step": 2687 }, { "epoch": 0.22, "grad_norm": 1.421557200319374, "learning_rate": 9.128655333857751e-06, "loss": 0.2205, "step": 2688 }, { "epoch": 0.22, "grad_norm": 1.333311866529666, "learning_rate": 9.127924449159966e-06, "loss": 0.1973, "step": 2689 }, { "epoch": 0.22, "grad_norm": 1.489628096390768, "learning_rate": 9.127193287342103e-06, "loss": 0.2113, "step": 2690 }, { "epoch": 0.22, "grad_norm": 1.2733000801837795, "learning_rate": 9.12646184845325e-06, "loss": 0.1972, "step": 2691 }, { "epoch": 0.22, "grad_norm": 1.298630408900462, "learning_rate": 9.125730132542511e-06, "loss": 0.1678, "step": 2692 }, { "epoch": 0.22, "grad_norm": 1.460921832458138, "learning_rate": 9.124998139659009e-06, "loss": 0.2351, "step": 2693 }, { "epoch": 0.22, "grad_norm": 1.5015495967938832, "learning_rate": 9.124265869851882e-06, "loss": 0.2006, "step": 2694 }, { "epoch": 0.22, "grad_norm": 1.3391767970264414, "learning_rate": 9.12353332317029e-06, "loss": 0.1874, "step": 2695 }, { "epoch": 0.22, "grad_norm": 1.5178712198316444, "learning_rate": 9.122800499663414e-06, "loss": 0.2272, "step": 2696 }, { "epoch": 0.22, "grad_norm": 1.5116629528869265, "learning_rate": 9.12206739938045e-06, "loss": 0.2093, "step": 2697 }, { "epoch": 0.22, "grad_norm": 1.3410534843112707, "learning_rate": 9.12133402237061e-06, "loss": 0.2153, "step": 2698 }, { "epoch": 0.22, "grad_norm": 1.4019882394154262, "learning_rate": 9.12060036868313e-06, "loss": 0.2519, "step": 2699 }, { "epoch": 0.22, "grad_norm": 1.2002464621642435, "learning_rate": 9.119866438367263e-06, "loss": 0.1515, "step": 2700 }, { "epoch": 0.22, "grad_norm": 1.6261783024552916, "learning_rate": 9.119132231472278e-06, "loss": 0.2194, "step": 2701 }, { "epoch": 0.22, "grad_norm": 1.3336394962402387, "learning_rate": 9.118397748047467e-06, "loss": 0.1889, "step": 2702 }, { "epoch": 0.22, "grad_norm": 1.4605709263858393, "learning_rate": 9.117662988142138e-06, "loss": 0.2525, "step": 2703 }, { "epoch": 0.22, "grad_norm": 1.6723157566839806, "learning_rate": 9.116927951805615e-06, "loss": 0.2185, "step": 2704 }, { "epoch": 0.22, "grad_norm": 1.6023138177065948, "learning_rate": 9.116192639087245e-06, "loss": 0.2313, "step": 2705 }, { "epoch": 0.22, "grad_norm": 1.3621626671413896, "learning_rate": 9.115457050036393e-06, "loss": 0.197, "step": 2706 }, { "epoch": 0.22, "grad_norm": 1.3707857273152984, "learning_rate": 9.11472118470244e-06, "loss": 0.1982, "step": 2707 }, { "epoch": 0.22, "grad_norm": 7.441471835179643, "learning_rate": 9.113985043134784e-06, "loss": 0.7088, "step": 2708 }, { "epoch": 0.22, "grad_norm": 1.445595614736569, "learning_rate": 9.113248625382849e-06, "loss": 0.2784, "step": 2709 }, { "epoch": 0.22, "grad_norm": 1.51766381544587, "learning_rate": 9.11251193149607e-06, "loss": 0.2239, "step": 2710 }, { "epoch": 0.22, "grad_norm": 1.4026549068643028, "learning_rate": 9.111774961523906e-06, "loss": 0.2117, "step": 2711 }, { "epoch": 0.22, "grad_norm": 1.9154104309470954, "learning_rate": 9.11103771551583e-06, "loss": 0.2648, "step": 2712 }, { "epoch": 0.22, "grad_norm": 1.3223474030414537, "learning_rate": 9.110300193521336e-06, "loss": 0.1826, "step": 2713 }, { "epoch": 0.22, "grad_norm": 1.6005665556405795, "learning_rate": 9.109562395589937e-06, "loss": 0.1989, "step": 2714 }, { "epoch": 0.22, "grad_norm": 1.6073390073266425, "learning_rate": 9.108824321771163e-06, "loss": 0.2288, "step": 2715 }, { "epoch": 0.22, "grad_norm": 1.4154704609763002, "learning_rate": 9.108085972114563e-06, "loss": 0.2162, "step": 2716 }, { "epoch": 0.22, "grad_norm": 1.4622945902637294, "learning_rate": 9.107347346669705e-06, "loss": 0.2121, "step": 2717 }, { "epoch": 0.22, "grad_norm": 5.898084998466211, "learning_rate": 9.106608445486171e-06, "loss": 0.4668, "step": 2718 }, { "epoch": 0.22, "grad_norm": 1.4654514280282613, "learning_rate": 9.105869268613574e-06, "loss": 0.1899, "step": 2719 }, { "epoch": 0.22, "grad_norm": 5.6518450146102275, "learning_rate": 9.105129816101531e-06, "loss": 0.726, "step": 2720 }, { "epoch": 0.22, "grad_norm": 1.340745068837695, "learning_rate": 9.104390087999686e-06, "loss": 0.1607, "step": 2721 }, { "epoch": 0.22, "grad_norm": 1.4525163388831515, "learning_rate": 9.103650084357697e-06, "loss": 0.2404, "step": 2722 }, { "epoch": 0.22, "grad_norm": 1.2417255609281581, "learning_rate": 9.102909805225246e-06, "loss": 0.2193, "step": 2723 }, { "epoch": 0.22, "grad_norm": 1.3662320404428347, "learning_rate": 9.102169250652029e-06, "loss": 0.2499, "step": 2724 }, { "epoch": 0.22, "grad_norm": 1.5065714633685114, "learning_rate": 9.101428420687759e-06, "loss": 0.2605, "step": 2725 }, { "epoch": 0.22, "grad_norm": 1.4756191962821648, "learning_rate": 9.100687315382174e-06, "loss": 0.2636, "step": 2726 }, { "epoch": 0.22, "grad_norm": 1.5593777191465357, "learning_rate": 9.099945934785026e-06, "loss": 0.2616, "step": 2727 }, { "epoch": 0.22, "grad_norm": 1.5186098200696208, "learning_rate": 9.099204278946083e-06, "loss": 0.2327, "step": 2728 }, { "epoch": 0.22, "grad_norm": 1.1860620081515083, "learning_rate": 9.098462347915136e-06, "loss": 0.2039, "step": 2729 }, { "epoch": 0.22, "grad_norm": 1.346188150374557, "learning_rate": 9.097720141741994e-06, "loss": 0.2221, "step": 2730 }, { "epoch": 0.22, "grad_norm": 6.136065294341917, "learning_rate": 9.096977660476485e-06, "loss": 0.4554, "step": 2731 }, { "epoch": 0.22, "grad_norm": 1.3584808130383115, "learning_rate": 9.096234904168451e-06, "loss": 0.2206, "step": 2732 }, { "epoch": 0.22, "grad_norm": 1.393026357935776, "learning_rate": 9.095491872867757e-06, "loss": 0.2319, "step": 2733 }, { "epoch": 0.22, "grad_norm": 1.4446792389077936, "learning_rate": 9.094748566624285e-06, "loss": 0.2537, "step": 2734 }, { "epoch": 0.22, "grad_norm": 1.2915641268691058, "learning_rate": 9.094004985487935e-06, "loss": 0.2263, "step": 2735 }, { "epoch": 0.22, "grad_norm": 1.5681978616500116, "learning_rate": 9.093261129508625e-06, "loss": 0.2655, "step": 2736 }, { "epoch": 0.22, "grad_norm": 1.6978472680341348, "learning_rate": 9.092516998736296e-06, "loss": 0.2595, "step": 2737 }, { "epoch": 0.22, "grad_norm": 1.5187774255232824, "learning_rate": 9.0917725932209e-06, "loss": 0.1866, "step": 2738 }, { "epoch": 0.22, "grad_norm": 1.405692630637023, "learning_rate": 9.091027913012411e-06, "loss": 0.2198, "step": 2739 }, { "epoch": 0.22, "grad_norm": 1.2653642747439644, "learning_rate": 9.090282958160823e-06, "loss": 0.1838, "step": 2740 }, { "epoch": 0.22, "grad_norm": 5.787358460986167, "learning_rate": 9.089537728716147e-06, "loss": 0.5551, "step": 2741 }, { "epoch": 0.22, "grad_norm": 1.2631310893127194, "learning_rate": 9.088792224728413e-06, "loss": 0.2308, "step": 2742 }, { "epoch": 0.22, "grad_norm": 1.2396355531791612, "learning_rate": 9.08804644624767e-06, "loss": 0.181, "step": 2743 }, { "epoch": 0.22, "grad_norm": 1.3729885545740517, "learning_rate": 9.08730039332398e-06, "loss": 0.1992, "step": 2744 }, { "epoch": 0.22, "grad_norm": 1.3585934430800366, "learning_rate": 9.08655406600743e-06, "loss": 0.2067, "step": 2745 }, { "epoch": 0.22, "grad_norm": 1.4222544190520328, "learning_rate": 9.085807464348127e-06, "loss": 0.2706, "step": 2746 }, { "epoch": 0.22, "grad_norm": 1.6052385396727238, "learning_rate": 9.085060588396188e-06, "loss": 0.2578, "step": 2747 }, { "epoch": 0.22, "grad_norm": 9.121526491056624, "learning_rate": 9.084313438201754e-06, "loss": 0.5746, "step": 2748 }, { "epoch": 0.22, "grad_norm": 1.3475408266941615, "learning_rate": 9.083566013814985e-06, "loss": 0.2352, "step": 2749 }, { "epoch": 0.22, "grad_norm": 1.3000050120529731, "learning_rate": 9.082818315286054e-06, "loss": 0.1872, "step": 2750 }, { "epoch": 0.22, "grad_norm": 5.354369059324886, "learning_rate": 9.082070342665163e-06, "loss": 0.7369, "step": 2751 }, { "epoch": 0.22, "grad_norm": 1.374025744809407, "learning_rate": 9.08132209600252e-06, "loss": 0.2324, "step": 2752 }, { "epoch": 0.22, "grad_norm": 1.6888991493437802, "learning_rate": 9.080573575348358e-06, "loss": 0.285, "step": 2753 }, { "epoch": 0.22, "grad_norm": 1.3115675050413185, "learning_rate": 9.079824780752929e-06, "loss": 0.2233, "step": 2754 }, { "epoch": 0.22, "grad_norm": 6.935306431439466, "learning_rate": 9.079075712266501e-06, "loss": 0.4932, "step": 2755 }, { "epoch": 0.22, "grad_norm": 1.6274482135732544, "learning_rate": 9.078326369939361e-06, "loss": 0.2835, "step": 2756 }, { "epoch": 0.22, "grad_norm": 5.312514077491614, "learning_rate": 9.077576753821815e-06, "loss": 0.65, "step": 2757 }, { "epoch": 0.22, "grad_norm": 1.2132416216205233, "learning_rate": 9.076826863964188e-06, "loss": 0.1717, "step": 2758 }, { "epoch": 0.22, "grad_norm": 1.3067087425137986, "learning_rate": 9.07607670041682e-06, "loss": 0.1795, "step": 2759 }, { "epoch": 0.22, "grad_norm": 1.3659413034086476, "learning_rate": 9.075326263230073e-06, "loss": 0.208, "step": 2760 }, { "epoch": 0.22, "grad_norm": 1.432916099636624, "learning_rate": 9.074575552454325e-06, "loss": 0.2197, "step": 2761 }, { "epoch": 0.22, "grad_norm": 1.3704222847454821, "learning_rate": 9.073824568139979e-06, "loss": 0.1894, "step": 2762 }, { "epoch": 0.22, "grad_norm": 1.4754252279492717, "learning_rate": 9.073073310337443e-06, "loss": 0.2448, "step": 2763 }, { "epoch": 0.22, "grad_norm": 4.780182301212309, "learning_rate": 9.072321779097155e-06, "loss": 0.4945, "step": 2764 }, { "epoch": 0.22, "grad_norm": 1.5005067063616255, "learning_rate": 9.071569974469569e-06, "loss": 0.2177, "step": 2765 }, { "epoch": 0.22, "grad_norm": 1.5749056665739491, "learning_rate": 9.070817896505153e-06, "loss": 0.2624, "step": 2766 }, { "epoch": 0.22, "grad_norm": 1.594612179109651, "learning_rate": 9.0700655452544e-06, "loss": 0.2314, "step": 2767 }, { "epoch": 0.22, "grad_norm": 1.3433587189141731, "learning_rate": 9.06931292076781e-06, "loss": 0.2086, "step": 2768 }, { "epoch": 0.22, "grad_norm": 1.4497876571013442, "learning_rate": 9.068560023095917e-06, "loss": 0.2195, "step": 2769 }, { "epoch": 0.22, "grad_norm": 1.7234763093412258, "learning_rate": 9.067806852289262e-06, "loss": 0.2493, "step": 2770 }, { "epoch": 0.22, "grad_norm": 1.3520959895920677, "learning_rate": 9.067053408398409e-06, "loss": 0.1883, "step": 2771 }, { "epoch": 0.22, "grad_norm": 1.4488528219338068, "learning_rate": 9.06629969147394e-06, "loss": 0.2466, "step": 2772 }, { "epoch": 0.22, "grad_norm": 1.4125044326631508, "learning_rate": 9.065545701566448e-06, "loss": 0.2241, "step": 2773 }, { "epoch": 0.22, "grad_norm": 4.613366786154387, "learning_rate": 9.064791438726557e-06, "loss": 0.6188, "step": 2774 }, { "epoch": 0.22, "grad_norm": 1.5570319551335765, "learning_rate": 9.0640369030049e-06, "loss": 0.2984, "step": 2775 }, { "epoch": 0.22, "grad_norm": 1.7640627598685126, "learning_rate": 9.063282094452133e-06, "loss": 0.2718, "step": 2776 }, { "epoch": 0.22, "grad_norm": 1.5415451461254195, "learning_rate": 9.062527013118926e-06, "loss": 0.2708, "step": 2777 }, { "epoch": 0.22, "grad_norm": 1.2685295948939568, "learning_rate": 9.061771659055974e-06, "loss": 0.175, "step": 2778 }, { "epoch": 0.22, "grad_norm": 1.5650799437982696, "learning_rate": 9.061016032313984e-06, "loss": 0.216, "step": 2779 }, { "epoch": 0.22, "grad_norm": 1.3140040358820873, "learning_rate": 9.060260132943682e-06, "loss": 0.2223, "step": 2780 }, { "epoch": 0.22, "grad_norm": 1.2813241792749517, "learning_rate": 9.059503960995816e-06, "loss": 0.2133, "step": 2781 }, { "epoch": 0.22, "grad_norm": 1.3287037191082507, "learning_rate": 9.058747516521149e-06, "loss": 0.2113, "step": 2782 }, { "epoch": 0.22, "grad_norm": 6.158487620129103, "learning_rate": 9.057990799570464e-06, "loss": 0.6308, "step": 2783 }, { "epoch": 0.22, "grad_norm": 1.2535091261317577, "learning_rate": 9.05723381019456e-06, "loss": 0.185, "step": 2784 }, { "epoch": 0.22, "grad_norm": 1.4418232645320088, "learning_rate": 9.056476548444258e-06, "loss": 0.2286, "step": 2785 }, { "epoch": 0.22, "grad_norm": 1.2710842933275697, "learning_rate": 9.055719014370396e-06, "loss": 0.2066, "step": 2786 }, { "epoch": 0.22, "grad_norm": 1.3767040865775415, "learning_rate": 9.054961208023827e-06, "loss": 0.1812, "step": 2787 }, { "epoch": 0.22, "grad_norm": 1.4680325387215052, "learning_rate": 9.054203129455425e-06, "loss": 0.2061, "step": 2788 }, { "epoch": 0.22, "grad_norm": 1.4768127429690432, "learning_rate": 9.053444778716085e-06, "loss": 0.2298, "step": 2789 }, { "epoch": 0.22, "grad_norm": 1.3149869910181915, "learning_rate": 9.052686155856716e-06, "loss": 0.2091, "step": 2790 }, { "epoch": 0.22, "grad_norm": 1.5389201150829421, "learning_rate": 9.051927260928243e-06, "loss": 0.2414, "step": 2791 }, { "epoch": 0.22, "grad_norm": 1.4859793752387178, "learning_rate": 9.051168093981619e-06, "loss": 0.2539, "step": 2792 }, { "epoch": 0.22, "grad_norm": 6.491323541728569, "learning_rate": 9.050408655067806e-06, "loss": 0.6121, "step": 2793 }, { "epoch": 0.22, "grad_norm": 1.3732632355151762, "learning_rate": 9.049648944237788e-06, "loss": 0.1904, "step": 2794 }, { "epoch": 0.22, "grad_norm": 1.5624289356446526, "learning_rate": 9.048888961542565e-06, "loss": 0.2429, "step": 2795 }, { "epoch": 0.22, "grad_norm": 1.3400308202123299, "learning_rate": 9.048128707033159e-06, "loss": 0.2525, "step": 2796 }, { "epoch": 0.22, "grad_norm": 1.4765966949134453, "learning_rate": 9.04736818076061e-06, "loss": 0.2564, "step": 2797 }, { "epoch": 0.22, "grad_norm": 1.2927968756760015, "learning_rate": 9.04660738277597e-06, "loss": 0.2248, "step": 2798 }, { "epoch": 0.22, "grad_norm": 1.3771446452894096, "learning_rate": 9.045846313130313e-06, "loss": 0.164, "step": 2799 }, { "epoch": 0.22, "grad_norm": 1.2228823830858666, "learning_rate": 9.045084971874738e-06, "loss": 0.2032, "step": 2800 }, { "epoch": 0.22, "grad_norm": 1.5615435130701139, "learning_rate": 9.044323359060352e-06, "loss": 0.2282, "step": 2801 }, { "epoch": 0.22, "grad_norm": 1.2836631873300806, "learning_rate": 9.043561474738285e-06, "loss": 0.216, "step": 2802 }, { "epoch": 0.22, "grad_norm": 1.4032398860712012, "learning_rate": 9.042799318959684e-06, "loss": 0.2559, "step": 2803 }, { "epoch": 0.22, "grad_norm": 1.439838926176643, "learning_rate": 9.042036891775715e-06, "loss": 0.2166, "step": 2804 }, { "epoch": 0.22, "grad_norm": 1.3455688786043316, "learning_rate": 9.041274193237565e-06, "loss": 0.2225, "step": 2805 }, { "epoch": 0.22, "grad_norm": 1.6534876106769998, "learning_rate": 9.040511223396432e-06, "loss": 0.2757, "step": 2806 }, { "epoch": 0.22, "grad_norm": 1.52418003261114, "learning_rate": 9.039747982303539e-06, "loss": 0.2419, "step": 2807 }, { "epoch": 0.22, "grad_norm": 1.5657551020028315, "learning_rate": 9.038984470010123e-06, "loss": 0.2019, "step": 2808 }, { "epoch": 0.22, "grad_norm": 1.2971651078265394, "learning_rate": 9.038220686567443e-06, "loss": 0.2228, "step": 2809 }, { "epoch": 0.22, "grad_norm": 1.4426981816203392, "learning_rate": 9.037456632026774e-06, "loss": 0.197, "step": 2810 }, { "epoch": 0.22, "grad_norm": 1.467132715805502, "learning_rate": 9.036692306439406e-06, "loss": 0.2612, "step": 2811 }, { "epoch": 0.22, "grad_norm": 1.5428826911405988, "learning_rate": 9.035927709856654e-06, "loss": 0.2073, "step": 2812 }, { "epoch": 0.23, "grad_norm": 1.7316056738360845, "learning_rate": 9.035162842329845e-06, "loss": 0.2619, "step": 2813 }, { "epoch": 0.23, "grad_norm": 1.4074668426840706, "learning_rate": 9.034397703910328e-06, "loss": 0.1829, "step": 2814 }, { "epoch": 0.23, "grad_norm": 1.4573635293256941, "learning_rate": 9.033632294649473e-06, "loss": 0.2717, "step": 2815 }, { "epoch": 0.23, "grad_norm": 1.5978212214492764, "learning_rate": 9.032866614598658e-06, "loss": 0.2781, "step": 2816 }, { "epoch": 0.23, "grad_norm": 1.3990474932649402, "learning_rate": 9.032100663809288e-06, "loss": 0.1865, "step": 2817 }, { "epoch": 0.23, "grad_norm": 1.478225377470301, "learning_rate": 9.031334442332784e-06, "loss": 0.2209, "step": 2818 }, { "epoch": 0.23, "grad_norm": 1.324947628861508, "learning_rate": 9.030567950220586e-06, "loss": 0.1901, "step": 2819 }, { "epoch": 0.23, "grad_norm": 1.5447661822890375, "learning_rate": 9.029801187524147e-06, "loss": 0.2205, "step": 2820 }, { "epoch": 0.23, "grad_norm": 1.520956277663838, "learning_rate": 9.029034154294945e-06, "loss": 0.2232, "step": 2821 }, { "epoch": 0.23, "grad_norm": 1.6054121043114649, "learning_rate": 9.028266850584473e-06, "loss": 0.1992, "step": 2822 }, { "epoch": 0.23, "grad_norm": 1.4074866472708532, "learning_rate": 9.027499276444242e-06, "loss": 0.2425, "step": 2823 }, { "epoch": 0.23, "grad_norm": 1.5562234465102696, "learning_rate": 9.026731431925784e-06, "loss": 0.2364, "step": 2824 }, { "epoch": 0.23, "grad_norm": 1.4378675591134906, "learning_rate": 9.025963317080641e-06, "loss": 0.1817, "step": 2825 }, { "epoch": 0.23, "grad_norm": 1.3776608566050383, "learning_rate": 9.025194931960385e-06, "loss": 0.2358, "step": 2826 }, { "epoch": 0.23, "grad_norm": 1.4326468347676402, "learning_rate": 9.024426276616595e-06, "loss": 0.2475, "step": 2827 }, { "epoch": 0.23, "grad_norm": 1.5151669557882912, "learning_rate": 9.023657351100878e-06, "loss": 0.2024, "step": 2828 }, { "epoch": 0.23, "grad_norm": 1.0865485697602366, "learning_rate": 9.02288815546485e-06, "loss": 0.1421, "step": 2829 }, { "epoch": 0.23, "grad_norm": 1.4004083982157023, "learning_rate": 9.022118689760153e-06, "loss": 0.1914, "step": 2830 }, { "epoch": 0.23, "grad_norm": 1.5341325603388152, "learning_rate": 9.02134895403844e-06, "loss": 0.217, "step": 2831 }, { "epoch": 0.23, "grad_norm": 1.6584298425465849, "learning_rate": 9.020578948351389e-06, "loss": 0.2646, "step": 2832 }, { "epoch": 0.23, "grad_norm": 1.3658798680635684, "learning_rate": 9.01980867275069e-06, "loss": 0.2486, "step": 2833 }, { "epoch": 0.23, "grad_norm": 8.242840670162026, "learning_rate": 9.019038127288056e-06, "loss": 0.5635, "step": 2834 }, { "epoch": 0.23, "grad_norm": 4.2319528036005165, "learning_rate": 9.018267312015214e-06, "loss": 0.6663, "step": 2835 }, { "epoch": 0.23, "grad_norm": 1.4873446778978956, "learning_rate": 9.017496226983915e-06, "loss": 0.2237, "step": 2836 }, { "epoch": 0.23, "grad_norm": 1.34754222432654, "learning_rate": 9.01672487224592e-06, "loss": 0.2205, "step": 2837 }, { "epoch": 0.23, "grad_norm": 1.3877551988307628, "learning_rate": 9.015953247853014e-06, "loss": 0.2484, "step": 2838 }, { "epoch": 0.23, "grad_norm": 1.2750056098688927, "learning_rate": 9.015181353856998e-06, "loss": 0.1852, "step": 2839 }, { "epoch": 0.23, "grad_norm": 1.5586050695209854, "learning_rate": 9.014409190309695e-06, "loss": 0.2909, "step": 2840 }, { "epoch": 0.23, "grad_norm": 6.957771991515571, "learning_rate": 9.013636757262938e-06, "loss": 0.6443, "step": 2841 }, { "epoch": 0.23, "grad_norm": 5.978687030343337, "learning_rate": 9.012864054768584e-06, "loss": 0.694, "step": 2842 }, { "epoch": 0.23, "grad_norm": 1.68732340687833, "learning_rate": 9.01209108287851e-06, "loss": 0.2597, "step": 2843 }, { "epoch": 0.23, "grad_norm": 1.2070138246231235, "learning_rate": 9.011317841644602e-06, "loss": 0.1918, "step": 2844 }, { "epoch": 0.23, "grad_norm": 1.818747401966009, "learning_rate": 9.010544331118776e-06, "loss": 0.2171, "step": 2845 }, { "epoch": 0.23, "grad_norm": 5.909821446912314, "learning_rate": 9.009770551352957e-06, "loss": 0.6904, "step": 2846 }, { "epoch": 0.23, "grad_norm": 1.4625678309129038, "learning_rate": 9.008996502399092e-06, "loss": 0.1937, "step": 2847 }, { "epoch": 0.23, "grad_norm": 1.3403198462042074, "learning_rate": 9.008222184309145e-06, "loss": 0.2403, "step": 2848 }, { "epoch": 0.23, "grad_norm": 1.3078904259706645, "learning_rate": 9.007447597135097e-06, "loss": 0.1849, "step": 2849 }, { "epoch": 0.23, "grad_norm": 1.4700985389589813, "learning_rate": 9.006672740928952e-06, "loss": 0.2278, "step": 2850 }, { "epoch": 0.23, "grad_norm": 1.5413570894979507, "learning_rate": 9.005897615742723e-06, "loss": 0.2277, "step": 2851 }, { "epoch": 0.23, "grad_norm": 7.186934536845539, "learning_rate": 9.005122221628452e-06, "loss": 0.6427, "step": 2852 }, { "epoch": 0.23, "grad_norm": 1.4584957023587855, "learning_rate": 9.00434655863819e-06, "loss": 0.2496, "step": 2853 }, { "epoch": 0.23, "grad_norm": 4.047544675111224, "learning_rate": 9.003570626824013e-06, "loss": 0.3899, "step": 2854 }, { "epoch": 0.23, "grad_norm": 1.4842509091263645, "learning_rate": 9.002794426238009e-06, "loss": 0.2496, "step": 2855 }, { "epoch": 0.23, "grad_norm": 8.316297901270813, "learning_rate": 9.002017956932285e-06, "loss": 0.7612, "step": 2856 }, { "epoch": 0.23, "grad_norm": 1.3873258790927314, "learning_rate": 9.001241218958972e-06, "loss": 0.1843, "step": 2857 }, { "epoch": 0.23, "grad_norm": 1.3310898189891012, "learning_rate": 9.00046421237021e-06, "loss": 0.2229, "step": 2858 }, { "epoch": 0.23, "grad_norm": 1.5319725040429875, "learning_rate": 8.999686937218168e-06, "loss": 0.2215, "step": 2859 }, { "epoch": 0.23, "grad_norm": 1.5651787938288904, "learning_rate": 8.998909393555022e-06, "loss": 0.1776, "step": 2860 }, { "epoch": 0.23, "grad_norm": 1.5884858878347807, "learning_rate": 8.998131581432972e-06, "loss": 0.2042, "step": 2861 }, { "epoch": 0.23, "grad_norm": 1.2677565136802358, "learning_rate": 8.997353500904234e-06, "loss": 0.2085, "step": 2862 }, { "epoch": 0.23, "grad_norm": 4.270508258368694, "learning_rate": 8.996575152021045e-06, "loss": 0.5937, "step": 2863 }, { "epoch": 0.23, "grad_norm": 1.3566657617792557, "learning_rate": 8.995796534835656e-06, "loss": 0.2173, "step": 2864 }, { "epoch": 0.23, "grad_norm": 1.3821513087646573, "learning_rate": 8.995017649400341e-06, "loss": 0.1928, "step": 2865 }, { "epoch": 0.23, "grad_norm": 1.4789968006256975, "learning_rate": 8.994238495767385e-06, "loss": 0.2704, "step": 2866 }, { "epoch": 0.23, "grad_norm": 1.302866811475751, "learning_rate": 8.993459073989098e-06, "loss": 0.1911, "step": 2867 }, { "epoch": 0.23, "grad_norm": 1.3687427729722517, "learning_rate": 8.992679384117802e-06, "loss": 0.256, "step": 2868 }, { "epoch": 0.23, "grad_norm": 1.466415513897628, "learning_rate": 8.991899426205844e-06, "loss": 0.2006, "step": 2869 }, { "epoch": 0.23, "grad_norm": 1.3901368692979712, "learning_rate": 8.99111920030558e-06, "loss": 0.2073, "step": 2870 }, { "epoch": 0.23, "grad_norm": 1.515818563088692, "learning_rate": 8.990338706469393e-06, "loss": 0.2283, "step": 2871 }, { "epoch": 0.23, "grad_norm": 1.4387095170975521, "learning_rate": 8.989557944749677e-06, "loss": 0.2169, "step": 2872 }, { "epoch": 0.23, "grad_norm": 1.318235840370533, "learning_rate": 8.988776915198849e-06, "loss": 0.2323, "step": 2873 }, { "epoch": 0.23, "grad_norm": 13.770769206536903, "learning_rate": 8.987995617869341e-06, "loss": 0.7652, "step": 2874 }, { "epoch": 0.23, "grad_norm": 1.3194625550872465, "learning_rate": 8.987214052813605e-06, "loss": 0.2072, "step": 2875 }, { "epoch": 0.23, "grad_norm": 1.5375410732303605, "learning_rate": 8.986432220084108e-06, "loss": 0.1963, "step": 2876 }, { "epoch": 0.23, "grad_norm": 1.3580321144035887, "learning_rate": 8.985650119733338e-06, "loss": 0.2058, "step": 2877 }, { "epoch": 0.23, "grad_norm": 1.4223747414901433, "learning_rate": 8.9848677518138e-06, "loss": 0.2097, "step": 2878 }, { "epoch": 0.23, "grad_norm": 1.3684062925926486, "learning_rate": 8.984085116378015e-06, "loss": 0.2448, "step": 2879 }, { "epoch": 0.23, "grad_norm": 1.5209167131297223, "learning_rate": 8.983302213478525e-06, "loss": 0.2365, "step": 2880 }, { "epoch": 0.23, "grad_norm": 1.4976221426758953, "learning_rate": 8.982519043167888e-06, "loss": 0.2089, "step": 2881 }, { "epoch": 0.23, "grad_norm": 1.4877203565527528, "learning_rate": 8.981735605498683e-06, "loss": 0.2168, "step": 2882 }, { "epoch": 0.23, "grad_norm": 1.4102290530863708, "learning_rate": 8.9809519005235e-06, "loss": 0.1862, "step": 2883 }, { "epoch": 0.23, "grad_norm": 6.686511939581189, "learning_rate": 8.980167928294956e-06, "loss": 0.6404, "step": 2884 }, { "epoch": 0.23, "grad_norm": 1.3116354613445917, "learning_rate": 8.97938368886568e-06, "loss": 0.2243, "step": 2885 }, { "epoch": 0.23, "grad_norm": 1.3796307310451612, "learning_rate": 8.978599182288319e-06, "loss": 0.2369, "step": 2886 }, { "epoch": 0.23, "grad_norm": 1.3931589519539738, "learning_rate": 8.97781440861554e-06, "loss": 0.191, "step": 2887 }, { "epoch": 0.23, "grad_norm": 8.596134309935943, "learning_rate": 8.977029367900028e-06, "loss": 0.5147, "step": 2888 }, { "epoch": 0.23, "grad_norm": 1.6230139815261981, "learning_rate": 8.976244060194484e-06, "loss": 0.2381, "step": 2889 }, { "epoch": 0.23, "grad_norm": 1.3575497466361595, "learning_rate": 8.97545848555163e-06, "loss": 0.194, "step": 2890 }, { "epoch": 0.23, "grad_norm": 1.5174563498139526, "learning_rate": 8.9746726440242e-06, "loss": 0.211, "step": 2891 }, { "epoch": 0.23, "grad_norm": 1.451068308935107, "learning_rate": 8.973886535664954e-06, "loss": 0.2456, "step": 2892 }, { "epoch": 0.23, "grad_norm": 1.3813627297283715, "learning_rate": 8.973100160526666e-06, "loss": 0.1608, "step": 2893 }, { "epoch": 0.23, "grad_norm": 1.4438076529003137, "learning_rate": 8.972313518662125e-06, "loss": 0.2236, "step": 2894 }, { "epoch": 0.23, "grad_norm": 6.942817792351023, "learning_rate": 8.971526610124142e-06, "loss": 0.5243, "step": 2895 }, { "epoch": 0.23, "grad_norm": 1.3242244202244997, "learning_rate": 8.970739434965544e-06, "loss": 0.1937, "step": 2896 }, { "epoch": 0.23, "grad_norm": 1.2694900034886134, "learning_rate": 8.969951993239177e-06, "loss": 0.1772, "step": 2897 }, { "epoch": 0.23, "grad_norm": 1.6052848770131556, "learning_rate": 8.969164284997905e-06, "loss": 0.2432, "step": 2898 }, { "epoch": 0.23, "grad_norm": 1.4316202837576615, "learning_rate": 8.968376310294608e-06, "loss": 0.1867, "step": 2899 }, { "epoch": 0.23, "grad_norm": 1.2331308581215907, "learning_rate": 8.967588069182184e-06, "loss": 0.1828, "step": 2900 }, { "epoch": 0.23, "grad_norm": 5.450441644844448, "learning_rate": 8.966799561713556e-06, "loss": 0.3339, "step": 2901 }, { "epoch": 0.23, "grad_norm": 1.3921228475439893, "learning_rate": 8.96601078794165e-06, "loss": 0.1921, "step": 2902 }, { "epoch": 0.23, "grad_norm": 1.5342836828338007, "learning_rate": 8.965221747919424e-06, "loss": 0.2616, "step": 2903 }, { "epoch": 0.23, "grad_norm": 1.3307995401038824, "learning_rate": 8.964432441699848e-06, "loss": 0.1908, "step": 2904 }, { "epoch": 0.23, "grad_norm": 1.4488465674166047, "learning_rate": 8.963642869335913e-06, "loss": 0.2138, "step": 2905 }, { "epoch": 0.23, "grad_norm": 1.2844722805684925, "learning_rate": 8.96285303088062e-06, "loss": 0.1935, "step": 2906 }, { "epoch": 0.23, "grad_norm": 1.4756924621700949, "learning_rate": 8.962062926386998e-06, "loss": 0.2027, "step": 2907 }, { "epoch": 0.23, "grad_norm": 1.4807284157803684, "learning_rate": 8.961272555908084e-06, "loss": 0.2689, "step": 2908 }, { "epoch": 0.23, "grad_norm": 1.4348991029678073, "learning_rate": 8.960481919496944e-06, "loss": 0.2156, "step": 2909 }, { "epoch": 0.23, "grad_norm": 1.3105098253428948, "learning_rate": 8.959691017206653e-06, "loss": 0.2353, "step": 2910 }, { "epoch": 0.23, "grad_norm": 1.563290091567969, "learning_rate": 8.958899849090306e-06, "loss": 0.2219, "step": 2911 }, { "epoch": 0.23, "grad_norm": 1.534905143277963, "learning_rate": 8.958108415201017e-06, "loss": 0.2411, "step": 2912 }, { "epoch": 0.23, "grad_norm": 1.3994036775656344, "learning_rate": 8.957316715591918e-06, "loss": 0.1846, "step": 2913 }, { "epoch": 0.23, "grad_norm": 1.5292688357309916, "learning_rate": 8.956524750316158e-06, "loss": 0.2362, "step": 2914 }, { "epoch": 0.23, "grad_norm": 1.4516891801375305, "learning_rate": 8.955732519426902e-06, "loss": 0.2289, "step": 2915 }, { "epoch": 0.23, "grad_norm": 4.141907338968659, "learning_rate": 8.954940022977338e-06, "loss": 0.6361, "step": 2916 }, { "epoch": 0.23, "grad_norm": 1.412193324937404, "learning_rate": 8.954147261020667e-06, "loss": 0.2478, "step": 2917 }, { "epoch": 0.23, "grad_norm": 1.5285618242672612, "learning_rate": 8.95335423361011e-06, "loss": 0.1927, "step": 2918 }, { "epoch": 0.23, "grad_norm": 1.4654694347043942, "learning_rate": 8.952560940798905e-06, "loss": 0.212, "step": 2919 }, { "epoch": 0.23, "grad_norm": 1.4790847429447933, "learning_rate": 8.951767382640308e-06, "loss": 0.2413, "step": 2920 }, { "epoch": 0.23, "grad_norm": 1.3821051837903717, "learning_rate": 8.950973559187593e-06, "loss": 0.1719, "step": 2921 }, { "epoch": 0.23, "grad_norm": 1.236684201702628, "learning_rate": 8.950179470494051e-06, "loss": 0.1936, "step": 2922 }, { "epoch": 0.23, "grad_norm": 6.578188524537762, "learning_rate": 8.949385116612994e-06, "loss": 0.6843, "step": 2923 }, { "epoch": 0.23, "grad_norm": 4.439585453891126, "learning_rate": 8.948590497597749e-06, "loss": 0.6007, "step": 2924 }, { "epoch": 0.23, "grad_norm": 1.344027656495916, "learning_rate": 8.947795613501658e-06, "loss": 0.2326, "step": 2925 }, { "epoch": 0.23, "grad_norm": 1.4299267085226401, "learning_rate": 8.947000464378088e-06, "loss": 0.2543, "step": 2926 }, { "epoch": 0.23, "grad_norm": 1.4909179275638866, "learning_rate": 8.946205050280417e-06, "loss": 0.2421, "step": 2927 }, { "epoch": 0.23, "grad_norm": 5.179269879670114, "learning_rate": 8.945409371262044e-06, "loss": 0.5028, "step": 2928 }, { "epoch": 0.23, "grad_norm": 1.4427496160945599, "learning_rate": 8.944613427376385e-06, "loss": 0.2538, "step": 2929 }, { "epoch": 0.23, "grad_norm": 1.4328889098681798, "learning_rate": 8.943817218676877e-06, "loss": 0.2338, "step": 2930 }, { "epoch": 0.23, "grad_norm": 6.061666144203288, "learning_rate": 8.943020745216968e-06, "loss": 0.4691, "step": 2931 }, { "epoch": 0.23, "grad_norm": 1.438352709413, "learning_rate": 8.942224007050131e-06, "loss": 0.2503, "step": 2932 }, { "epoch": 0.23, "grad_norm": 1.4303951356733517, "learning_rate": 8.941427004229851e-06, "loss": 0.2265, "step": 2933 }, { "epoch": 0.23, "grad_norm": 1.4124173642321693, "learning_rate": 8.940629736809635e-06, "loss": 0.2468, "step": 2934 }, { "epoch": 0.23, "grad_norm": 1.235961248443977, "learning_rate": 8.939832204843003e-06, "loss": 0.2217, "step": 2935 }, { "epoch": 0.23, "grad_norm": 1.2548051161104445, "learning_rate": 8.939034408383502e-06, "loss": 0.1945, "step": 2936 }, { "epoch": 0.23, "grad_norm": 1.490504818831335, "learning_rate": 8.938236347484684e-06, "loss": 0.2447, "step": 2937 }, { "epoch": 0.24, "grad_norm": 1.3978936944130946, "learning_rate": 8.937438022200126e-06, "loss": 0.2327, "step": 2938 }, { "epoch": 0.24, "grad_norm": 1.3862364734631518, "learning_rate": 8.936639432583424e-06, "loss": 0.2009, "step": 2939 }, { "epoch": 0.24, "grad_norm": 1.3877759778435041, "learning_rate": 8.935840578688191e-06, "loss": 0.2141, "step": 2940 }, { "epoch": 0.24, "grad_norm": 1.4200776610196197, "learning_rate": 8.935041460568055e-06, "loss": 0.2228, "step": 2941 }, { "epoch": 0.24, "grad_norm": 1.475752988629331, "learning_rate": 8.934242078276662e-06, "loss": 0.1913, "step": 2942 }, { "epoch": 0.24, "grad_norm": 1.2676661732762509, "learning_rate": 8.933442431867678e-06, "loss": 0.1868, "step": 2943 }, { "epoch": 0.24, "grad_norm": 8.009595564956046, "learning_rate": 8.932642521394786e-06, "loss": 0.6896, "step": 2944 }, { "epoch": 0.24, "grad_norm": 8.923425990247967, "learning_rate": 8.931842346911688e-06, "loss": 0.7366, "step": 2945 }, { "epoch": 0.24, "grad_norm": 1.5496012799458294, "learning_rate": 8.931041908472098e-06, "loss": 0.1953, "step": 2946 }, { "epoch": 0.24, "grad_norm": 1.4694107537187513, "learning_rate": 8.930241206129754e-06, "loss": 0.2124, "step": 2947 }, { "epoch": 0.24, "grad_norm": 6.156646506399253, "learning_rate": 8.929440239938409e-06, "loss": 0.6602, "step": 2948 }, { "epoch": 0.24, "grad_norm": 8.913468333245076, "learning_rate": 8.928639009951837e-06, "loss": 0.6046, "step": 2949 }, { "epoch": 0.24, "grad_norm": 1.465254233573523, "learning_rate": 8.927837516223824e-06, "loss": 0.2328, "step": 2950 }, { "epoch": 0.24, "grad_norm": 1.6024147367264667, "learning_rate": 8.927035758808178e-06, "loss": 0.2904, "step": 2951 }, { "epoch": 0.24, "grad_norm": 6.097798826496697, "learning_rate": 8.926233737758722e-06, "loss": 0.6091, "step": 2952 }, { "epoch": 0.24, "grad_norm": 1.368522890277418, "learning_rate": 8.9254314531293e-06, "loss": 0.1999, "step": 2953 }, { "epoch": 0.24, "grad_norm": 1.4722783020652448, "learning_rate": 8.924628904973771e-06, "loss": 0.2357, "step": 2954 }, { "epoch": 0.24, "grad_norm": 1.4069748491539367, "learning_rate": 8.923826093346013e-06, "loss": 0.2291, "step": 2955 }, { "epoch": 0.24, "grad_norm": 1.5562678957802336, "learning_rate": 8.92302301829992e-06, "loss": 0.1873, "step": 2956 }, { "epoch": 0.24, "grad_norm": 1.28741795386435, "learning_rate": 8.922219679889406e-06, "loss": 0.1802, "step": 2957 }, { "epoch": 0.24, "grad_norm": 1.5606671655602036, "learning_rate": 8.9214160781684e-06, "loss": 0.1958, "step": 2958 }, { "epoch": 0.24, "grad_norm": 6.789161722564476, "learning_rate": 8.92061221319085e-06, "loss": 0.7222, "step": 2959 }, { "epoch": 0.24, "grad_norm": 1.3413033486236767, "learning_rate": 8.919808085010726e-06, "loss": 0.18, "step": 2960 }, { "epoch": 0.24, "grad_norm": 1.3772492899815412, "learning_rate": 8.919003693682008e-06, "loss": 0.2054, "step": 2961 }, { "epoch": 0.24, "grad_norm": 7.142316489802902, "learning_rate": 8.918199039258697e-06, "loss": 0.5801, "step": 2962 }, { "epoch": 0.24, "grad_norm": 1.2335058545110258, "learning_rate": 8.917394121794814e-06, "loss": 0.2602, "step": 2963 }, { "epoch": 0.24, "grad_norm": 1.3579528788615296, "learning_rate": 8.916588941344393e-06, "loss": 0.2342, "step": 2964 }, { "epoch": 0.24, "grad_norm": 1.3640735825962527, "learning_rate": 8.915783497961492e-06, "loss": 0.1982, "step": 2965 }, { "epoch": 0.24, "grad_norm": 1.350564116499505, "learning_rate": 8.914977791700178e-06, "loss": 0.1843, "step": 2966 }, { "epoch": 0.24, "grad_norm": 5.3814629638578175, "learning_rate": 8.914171822614543e-06, "loss": 0.6641, "step": 2967 }, { "epoch": 0.24, "grad_norm": 1.3047470511677055, "learning_rate": 8.913365590758695e-06, "loss": 0.1865, "step": 2968 }, { "epoch": 0.24, "grad_norm": 1.2583590551135229, "learning_rate": 8.912559096186759e-06, "loss": 0.1654, "step": 2969 }, { "epoch": 0.24, "grad_norm": 1.4165514591125556, "learning_rate": 8.911752338952875e-06, "loss": 0.2117, "step": 2970 }, { "epoch": 0.24, "grad_norm": 4.52258832302981, "learning_rate": 8.910945319111204e-06, "loss": 0.4941, "step": 2971 }, { "epoch": 0.24, "grad_norm": 1.4804116563106005, "learning_rate": 8.910138036715924e-06, "loss": 0.2239, "step": 2972 }, { "epoch": 0.24, "grad_norm": 1.582575054319406, "learning_rate": 8.90933049182123e-06, "loss": 0.1789, "step": 2973 }, { "epoch": 0.24, "grad_norm": 1.3345109313951915, "learning_rate": 8.908522684481336e-06, "loss": 0.1995, "step": 2974 }, { "epoch": 0.24, "grad_norm": 1.3138534937746604, "learning_rate": 8.907714614750473e-06, "loss": 0.1914, "step": 2975 }, { "epoch": 0.24, "grad_norm": 1.5972900555668406, "learning_rate": 8.906906282682886e-06, "loss": 0.2433, "step": 2976 }, { "epoch": 0.24, "grad_norm": 1.4077482613342298, "learning_rate": 8.906097688332844e-06, "loss": 0.2262, "step": 2977 }, { "epoch": 0.24, "grad_norm": 1.3154436412799022, "learning_rate": 8.905288831754628e-06, "loss": 0.1838, "step": 2978 }, { "epoch": 0.24, "grad_norm": 75.61717082444883, "learning_rate": 8.904479713002542e-06, "loss": 0.5783, "step": 2979 }, { "epoch": 0.24, "grad_norm": 1.591026012209408, "learning_rate": 8.9036703321309e-06, "loss": 0.2478, "step": 2980 }, { "epoch": 0.24, "grad_norm": 1.4679314210011754, "learning_rate": 8.902860689194044e-06, "loss": 0.2087, "step": 2981 }, { "epoch": 0.24, "grad_norm": 1.309384892215417, "learning_rate": 8.902050784246324e-06, "loss": 0.2452, "step": 2982 }, { "epoch": 0.24, "grad_norm": 1.4098651448350585, "learning_rate": 8.901240617342111e-06, "loss": 0.215, "step": 2983 }, { "epoch": 0.24, "grad_norm": 1.5564166714820489, "learning_rate": 8.900430188535796e-06, "loss": 0.2637, "step": 2984 }, { "epoch": 0.24, "grad_norm": 1.3766444097573167, "learning_rate": 8.899619497881784e-06, "loss": 0.2014, "step": 2985 }, { "epoch": 0.24, "grad_norm": 1.3376982352528928, "learning_rate": 8.8988085454345e-06, "loss": 0.1921, "step": 2986 }, { "epoch": 0.24, "grad_norm": 6.423813842254584, "learning_rate": 8.897997331248384e-06, "loss": 0.5051, "step": 2987 }, { "epoch": 0.24, "grad_norm": 6.08711338762155, "learning_rate": 8.8971858553779e-06, "loss": 0.635, "step": 2988 }, { "epoch": 0.24, "grad_norm": 1.4570796969531588, "learning_rate": 8.896374117877519e-06, "loss": 0.2543, "step": 2989 }, { "epoch": 0.24, "grad_norm": 1.439104010044396, "learning_rate": 8.895562118801739e-06, "loss": 0.2395, "step": 2990 }, { "epoch": 0.24, "grad_norm": 1.4181778375549878, "learning_rate": 8.89474985820507e-06, "loss": 0.2624, "step": 2991 }, { "epoch": 0.24, "grad_norm": 1.3671217247810508, "learning_rate": 8.893937336142043e-06, "loss": 0.2422, "step": 2992 }, { "epoch": 0.24, "grad_norm": 1.518766375842734, "learning_rate": 8.893124552667203e-06, "loss": 0.1654, "step": 2993 }, { "epoch": 0.24, "grad_norm": 1.4632857869933948, "learning_rate": 8.892311507835118e-06, "loss": 0.2439, "step": 2994 }, { "epoch": 0.24, "grad_norm": 7.9888690892946785, "learning_rate": 8.891498201700368e-06, "loss": 0.6135, "step": 2995 }, { "epoch": 0.24, "grad_norm": 1.431313920502848, "learning_rate": 8.890684634317552e-06, "loss": 0.265, "step": 2996 }, { "epoch": 0.24, "grad_norm": 1.53475140058515, "learning_rate": 8.889870805741288e-06, "loss": 0.2525, "step": 2997 }, { "epoch": 0.24, "grad_norm": 1.2841874124414565, "learning_rate": 8.889056716026213e-06, "loss": 0.2127, "step": 2998 }, { "epoch": 0.24, "grad_norm": 5.218451917714868, "learning_rate": 8.888242365226975e-06, "loss": 0.6813, "step": 2999 }, { "epoch": 0.24, "grad_norm": 1.325017544413177, "learning_rate": 8.887427753398249e-06, "loss": 0.2094, "step": 3000 }, { "epoch": 0.24, "grad_norm": 6.759196484638667, "learning_rate": 8.886612880594715e-06, "loss": 0.7537, "step": 3001 }, { "epoch": 0.24, "grad_norm": 1.2921386736848803, "learning_rate": 8.885797746871085e-06, "loss": 0.2751, "step": 3002 }, { "epoch": 0.24, "grad_norm": 1.3224681819846724, "learning_rate": 8.884982352282078e-06, "loss": 0.2141, "step": 3003 }, { "epoch": 0.24, "grad_norm": 1.359173149780651, "learning_rate": 8.884166696882436e-06, "loss": 0.1897, "step": 3004 }, { "epoch": 0.24, "grad_norm": 1.5703107608514189, "learning_rate": 8.883350780726915e-06, "loss": 0.2272, "step": 3005 }, { "epoch": 0.24, "grad_norm": 1.5044476234016868, "learning_rate": 8.88253460387029e-06, "loss": 0.2273, "step": 3006 }, { "epoch": 0.24, "grad_norm": 1.4696345431868707, "learning_rate": 8.881718166367353e-06, "loss": 0.1896, "step": 3007 }, { "epoch": 0.24, "grad_norm": 1.4119886546584697, "learning_rate": 8.880901468272913e-06, "loss": 0.2214, "step": 3008 }, { "epoch": 0.24, "grad_norm": 1.460589931004384, "learning_rate": 8.8800845096418e-06, "loss": 0.252, "step": 3009 }, { "epoch": 0.24, "grad_norm": 1.3064104514517634, "learning_rate": 8.87926729052886e-06, "loss": 0.1851, "step": 3010 }, { "epoch": 0.24, "grad_norm": 1.3861896346737865, "learning_rate": 8.87844981098895e-06, "loss": 0.2041, "step": 3011 }, { "epoch": 0.24, "grad_norm": 5.207236718994992, "learning_rate": 8.877632071076952e-06, "loss": 0.4273, "step": 3012 }, { "epoch": 0.24, "grad_norm": 1.3922746374429682, "learning_rate": 8.876814070847766e-06, "loss": 0.205, "step": 3013 }, { "epoch": 0.24, "grad_norm": 4.368476905950356, "learning_rate": 8.875995810356306e-06, "loss": 0.6352, "step": 3014 }, { "epoch": 0.24, "grad_norm": 1.627212978570021, "learning_rate": 8.875177289657502e-06, "loss": 0.299, "step": 3015 }, { "epoch": 0.24, "grad_norm": 4.688386773371714, "learning_rate": 8.874358508806306e-06, "loss": 0.4555, "step": 3016 }, { "epoch": 0.24, "grad_norm": 1.2601481112015893, "learning_rate": 8.873539467857683e-06, "loss": 0.1944, "step": 3017 }, { "epoch": 0.24, "grad_norm": 1.489229566544062, "learning_rate": 8.872720166866623e-06, "loss": 0.1905, "step": 3018 }, { "epoch": 0.24, "grad_norm": 1.3897923476764766, "learning_rate": 8.871900605888121e-06, "loss": 0.2158, "step": 3019 }, { "epoch": 0.24, "grad_norm": 1.5560663938837846, "learning_rate": 8.8710807849772e-06, "loss": 0.2506, "step": 3020 }, { "epoch": 0.24, "grad_norm": 1.4041101343418005, "learning_rate": 8.870260704188897e-06, "loss": 0.2276, "step": 3021 }, { "epoch": 0.24, "grad_norm": 4.833725070068477, "learning_rate": 8.869440363578267e-06, "loss": 0.3166, "step": 3022 }, { "epoch": 0.24, "grad_norm": 1.3935390529440868, "learning_rate": 8.868619763200384e-06, "loss": 0.2166, "step": 3023 }, { "epoch": 0.24, "grad_norm": 1.4822744324400534, "learning_rate": 8.867798903110331e-06, "loss": 0.221, "step": 3024 }, { "epoch": 0.24, "grad_norm": 1.2986363180338931, "learning_rate": 8.866977783363219e-06, "loss": 0.2096, "step": 3025 }, { "epoch": 0.24, "grad_norm": 1.3982047314246508, "learning_rate": 8.866156404014175e-06, "loss": 0.1789, "step": 3026 }, { "epoch": 0.24, "grad_norm": 1.5540256850621013, "learning_rate": 8.865334765118335e-06, "loss": 0.1916, "step": 3027 }, { "epoch": 0.24, "grad_norm": 1.5869607996291173, "learning_rate": 8.864512866730862e-06, "loss": 0.2259, "step": 3028 }, { "epoch": 0.24, "grad_norm": 1.4862388865873297, "learning_rate": 8.863690708906931e-06, "loss": 0.1961, "step": 3029 }, { "epoch": 0.24, "grad_norm": 1.4146381242860455, "learning_rate": 8.862868291701735e-06, "loss": 0.2056, "step": 3030 }, { "epoch": 0.24, "grad_norm": 1.2485742860820093, "learning_rate": 8.862045615170487e-06, "loss": 0.1799, "step": 3031 }, { "epoch": 0.24, "grad_norm": 1.4809931990909388, "learning_rate": 8.861222679368416e-06, "loss": 0.1888, "step": 3032 }, { "epoch": 0.24, "grad_norm": 1.3767173889362387, "learning_rate": 8.860399484350768e-06, "loss": 0.2001, "step": 3033 }, { "epoch": 0.24, "grad_norm": 1.43973407582441, "learning_rate": 8.859576030172804e-06, "loss": 0.2414, "step": 3034 }, { "epoch": 0.24, "grad_norm": 4.399524638135557, "learning_rate": 8.858752316889809e-06, "loss": 0.6516, "step": 3035 }, { "epoch": 0.24, "grad_norm": 1.5671356376652963, "learning_rate": 8.857928344557079e-06, "loss": 0.2693, "step": 3036 }, { "epoch": 0.24, "grad_norm": 7.866467922566804, "learning_rate": 8.857104113229929e-06, "loss": 0.6296, "step": 3037 }, { "epoch": 0.24, "grad_norm": 1.3960610482910183, "learning_rate": 8.856279622963694e-06, "loss": 0.2196, "step": 3038 }, { "epoch": 0.24, "grad_norm": 1.3575735353763143, "learning_rate": 8.855454873813724e-06, "loss": 0.2346, "step": 3039 }, { "epoch": 0.24, "grad_norm": 1.3132629173162411, "learning_rate": 8.854629865835387e-06, "loss": 0.2147, "step": 3040 }, { "epoch": 0.24, "grad_norm": 4.919591987793346, "learning_rate": 8.853804599084068e-06, "loss": 0.5371, "step": 3041 }, { "epoch": 0.24, "grad_norm": 1.2964773220704293, "learning_rate": 8.852979073615172e-06, "loss": 0.206, "step": 3042 }, { "epoch": 0.24, "grad_norm": 1.3785364084279794, "learning_rate": 8.852153289484114e-06, "loss": 0.2209, "step": 3043 }, { "epoch": 0.24, "grad_norm": 1.4406195320583162, "learning_rate": 8.851327246746334e-06, "loss": 0.2299, "step": 3044 }, { "epoch": 0.24, "grad_norm": 1.3430280094850309, "learning_rate": 8.850500945457286e-06, "loss": 0.1909, "step": 3045 }, { "epoch": 0.24, "grad_norm": 1.3193913896377703, "learning_rate": 8.849674385672444e-06, "loss": 0.2253, "step": 3046 }, { "epoch": 0.24, "grad_norm": 1.4175866522258325, "learning_rate": 8.848847567447298e-06, "loss": 0.1864, "step": 3047 }, { "epoch": 0.24, "grad_norm": 1.4087022432975331, "learning_rate": 8.848020490837352e-06, "loss": 0.2492, "step": 3048 }, { "epoch": 0.24, "grad_norm": 1.2203036704738788, "learning_rate": 8.84719315589813e-06, "loss": 0.1998, "step": 3049 }, { "epoch": 0.24, "grad_norm": 1.2001359681554475, "learning_rate": 8.846365562685178e-06, "loss": 0.171, "step": 3050 }, { "epoch": 0.24, "grad_norm": 1.5737706668660134, "learning_rate": 8.845537711254048e-06, "loss": 0.3049, "step": 3051 }, { "epoch": 0.24, "grad_norm": 9.506734592547174, "learning_rate": 8.844709601660323e-06, "loss": 0.6631, "step": 3052 }, { "epoch": 0.24, "grad_norm": 1.3029872973433998, "learning_rate": 8.843881233959592e-06, "loss": 0.2318, "step": 3053 }, { "epoch": 0.24, "grad_norm": 1.5730809171370128, "learning_rate": 8.843052608207468e-06, "loss": 0.2274, "step": 3054 }, { "epoch": 0.24, "grad_norm": 1.3845940713402958, "learning_rate": 8.842223724459578e-06, "loss": 0.1717, "step": 3055 }, { "epoch": 0.24, "grad_norm": 1.473633082083225, "learning_rate": 8.841394582771568e-06, "loss": 0.1881, "step": 3056 }, { "epoch": 0.24, "grad_norm": 5.402038116612309, "learning_rate": 8.840565183199102e-06, "loss": 0.4127, "step": 3057 }, { "epoch": 0.24, "grad_norm": 1.4322500257667174, "learning_rate": 8.839735525797857e-06, "loss": 0.2449, "step": 3058 }, { "epoch": 0.24, "grad_norm": 1.3700656809863976, "learning_rate": 8.838905610623532e-06, "loss": 0.2159, "step": 3059 }, { "epoch": 0.24, "grad_norm": 1.3712450480118383, "learning_rate": 8.838075437731844e-06, "loss": 0.214, "step": 3060 }, { "epoch": 0.24, "grad_norm": 1.4756749356418306, "learning_rate": 8.837245007178522e-06, "loss": 0.2307, "step": 3061 }, { "epoch": 0.24, "grad_norm": 1.1130716994336955, "learning_rate": 8.836414319019314e-06, "loss": 0.1668, "step": 3062 }, { "epoch": 0.25, "grad_norm": 1.3863741235710332, "learning_rate": 8.83558337330999e-06, "loss": 0.2495, "step": 3063 }, { "epoch": 0.25, "grad_norm": 1.1567591748237191, "learning_rate": 8.834752170106334e-06, "loss": 0.1738, "step": 3064 }, { "epoch": 0.25, "grad_norm": 1.4198624477188664, "learning_rate": 8.833920709464146e-06, "loss": 0.2311, "step": 3065 }, { "epoch": 0.25, "grad_norm": 1.2219532907420627, "learning_rate": 8.833088991439245e-06, "loss": 0.1676, "step": 3066 }, { "epoch": 0.25, "grad_norm": 1.3332142493078298, "learning_rate": 8.832257016087464e-06, "loss": 0.1981, "step": 3067 }, { "epoch": 0.25, "grad_norm": 1.3883019204283449, "learning_rate": 8.83142478346466e-06, "loss": 0.2167, "step": 3068 }, { "epoch": 0.25, "grad_norm": 1.4719396635417274, "learning_rate": 8.830592293626702e-06, "loss": 0.2073, "step": 3069 }, { "epoch": 0.25, "grad_norm": 1.3366231586690822, "learning_rate": 8.829759546629474e-06, "loss": 0.2236, "step": 3070 }, { "epoch": 0.25, "grad_norm": 5.251079760118595, "learning_rate": 8.828926542528888e-06, "loss": 0.685, "step": 3071 }, { "epoch": 0.25, "grad_norm": 1.6107929707361612, "learning_rate": 8.828093281380859e-06, "loss": 0.2302, "step": 3072 }, { "epoch": 0.25, "grad_norm": 1.2683237733034751, "learning_rate": 8.82725976324133e-06, "loss": 0.1966, "step": 3073 }, { "epoch": 0.25, "grad_norm": 1.2554818995645787, "learning_rate": 8.826425988166259e-06, "loss": 0.178, "step": 3074 }, { "epoch": 0.25, "grad_norm": 1.555182200381952, "learning_rate": 8.825591956211614e-06, "loss": 0.2474, "step": 3075 }, { "epoch": 0.25, "grad_norm": 1.3657277195365287, "learning_rate": 8.824757667433392e-06, "loss": 0.2156, "step": 3076 }, { "epoch": 0.25, "grad_norm": 1.3262778738154213, "learning_rate": 8.8239231218876e-06, "loss": 0.2073, "step": 3077 }, { "epoch": 0.25, "grad_norm": 1.3479177643160096, "learning_rate": 8.823088319630262e-06, "loss": 0.2461, "step": 3078 }, { "epoch": 0.25, "grad_norm": 1.1996438746816949, "learning_rate": 8.822253260717422e-06, "loss": 0.1436, "step": 3079 }, { "epoch": 0.25, "grad_norm": 1.4847628824101158, "learning_rate": 8.82141794520514e-06, "loss": 0.2517, "step": 3080 }, { "epoch": 0.25, "grad_norm": 1.4384736220957275, "learning_rate": 8.820582373149491e-06, "loss": 0.2243, "step": 3081 }, { "epoch": 0.25, "grad_norm": 1.3950071344974906, "learning_rate": 8.819746544606573e-06, "loss": 0.2355, "step": 3082 }, { "epoch": 0.25, "grad_norm": 1.3244181944903652, "learning_rate": 8.818910459632495e-06, "loss": 0.1694, "step": 3083 }, { "epoch": 0.25, "grad_norm": 1.4289850554357482, "learning_rate": 8.818074118283389e-06, "loss": 0.2537, "step": 3084 }, { "epoch": 0.25, "grad_norm": 1.5370309517445233, "learning_rate": 8.817237520615398e-06, "loss": 0.2272, "step": 3085 }, { "epoch": 0.25, "grad_norm": 1.4312273013700494, "learning_rate": 8.816400666684685e-06, "loss": 0.1803, "step": 3086 }, { "epoch": 0.25, "grad_norm": 1.6924668865681711, "learning_rate": 8.815563556547434e-06, "loss": 0.2733, "step": 3087 }, { "epoch": 0.25, "grad_norm": 1.2630884716859379, "learning_rate": 8.81472619025984e-06, "loss": 0.1854, "step": 3088 }, { "epoch": 0.25, "grad_norm": 5.838163449231494, "learning_rate": 8.81388856787812e-06, "loss": 0.7649, "step": 3089 }, { "epoch": 0.25, "grad_norm": 1.2981509880149225, "learning_rate": 8.813050689458502e-06, "loss": 0.1963, "step": 3090 }, { "epoch": 0.25, "grad_norm": 1.5487570226763947, "learning_rate": 8.81221255505724e-06, "loss": 0.2355, "step": 3091 }, { "epoch": 0.25, "grad_norm": 1.5859963702678124, "learning_rate": 8.811374164730599e-06, "loss": 0.2526, "step": 3092 }, { "epoch": 0.25, "grad_norm": 1.3696084796315757, "learning_rate": 8.810535518534862e-06, "loss": 0.1768, "step": 3093 }, { "epoch": 0.25, "grad_norm": 1.3563731310390237, "learning_rate": 8.80969661652633e-06, "loss": 0.2235, "step": 3094 }, { "epoch": 0.25, "grad_norm": 1.3636269619546166, "learning_rate": 8.80885745876132e-06, "loss": 0.2011, "step": 3095 }, { "epoch": 0.25, "grad_norm": 1.4851621407302664, "learning_rate": 8.80801804529617e-06, "loss": 0.2356, "step": 3096 }, { "epoch": 0.25, "grad_norm": 1.4372345325521587, "learning_rate": 8.80717837618723e-06, "loss": 0.2592, "step": 3097 }, { "epoch": 0.25, "grad_norm": 1.3441734166989745, "learning_rate": 8.80633845149087e-06, "loss": 0.2015, "step": 3098 }, { "epoch": 0.25, "grad_norm": 1.5511142489781486, "learning_rate": 8.805498271263477e-06, "loss": 0.225, "step": 3099 }, { "epoch": 0.25, "grad_norm": 1.3048253057726924, "learning_rate": 8.804657835561456e-06, "loss": 0.1466, "step": 3100 }, { "epoch": 0.25, "grad_norm": 1.424801274070495, "learning_rate": 8.803817144441227e-06, "loss": 0.1925, "step": 3101 }, { "epoch": 0.25, "grad_norm": 1.4577061990860265, "learning_rate": 8.802976197959228e-06, "loss": 0.2214, "step": 3102 }, { "epoch": 0.25, "grad_norm": 1.2734805013100003, "learning_rate": 8.802134996171913e-06, "loss": 0.2177, "step": 3103 }, { "epoch": 0.25, "grad_norm": 1.5259337578505816, "learning_rate": 8.801293539135755e-06, "loss": 0.2543, "step": 3104 }, { "epoch": 0.25, "grad_norm": 1.2175613357682593, "learning_rate": 8.800451826907245e-06, "loss": 0.1846, "step": 3105 }, { "epoch": 0.25, "grad_norm": 1.5235447681275573, "learning_rate": 8.79960985954289e-06, "loss": 0.1922, "step": 3106 }, { "epoch": 0.25, "grad_norm": 1.3490605782380212, "learning_rate": 8.798767637099212e-06, "loss": 0.2244, "step": 3107 }, { "epoch": 0.25, "grad_norm": 1.467369010492928, "learning_rate": 8.797925159632753e-06, "loss": 0.1963, "step": 3108 }, { "epoch": 0.25, "grad_norm": 7.017788081316822, "learning_rate": 8.79708242720007e-06, "loss": 0.7436, "step": 3109 }, { "epoch": 0.25, "grad_norm": 7.861642620861123, "learning_rate": 8.79623943985774e-06, "loss": 0.6662, "step": 3110 }, { "epoch": 0.25, "grad_norm": 1.5083832891400009, "learning_rate": 8.795396197662355e-06, "loss": 0.2602, "step": 3111 }, { "epoch": 0.25, "grad_norm": 1.418825262351778, "learning_rate": 8.794552700670522e-06, "loss": 0.2081, "step": 3112 }, { "epoch": 0.25, "grad_norm": 1.4549352327666223, "learning_rate": 8.79370894893887e-06, "loss": 0.2038, "step": 3113 }, { "epoch": 0.25, "grad_norm": 1.3306174978595904, "learning_rate": 8.792864942524042e-06, "loss": 0.1953, "step": 3114 }, { "epoch": 0.25, "grad_norm": 1.4253736787129068, "learning_rate": 8.792020681482698e-06, "loss": 0.1935, "step": 3115 }, { "epoch": 0.25, "grad_norm": 1.3800218047018245, "learning_rate": 8.791176165871515e-06, "loss": 0.2051, "step": 3116 }, { "epoch": 0.25, "grad_norm": 28.457137505952314, "learning_rate": 8.79033139574719e-06, "loss": 0.7992, "step": 3117 }, { "epoch": 0.25, "grad_norm": 1.2151133082444072, "learning_rate": 8.789486371166435e-06, "loss": 0.2043, "step": 3118 }, { "epoch": 0.25, "grad_norm": 4.9588462300076515, "learning_rate": 8.788641092185978e-06, "loss": 0.6187, "step": 3119 }, { "epoch": 0.25, "grad_norm": 1.2413584015854517, "learning_rate": 8.787795558862566e-06, "loss": 0.1588, "step": 3120 }, { "epoch": 0.25, "grad_norm": 1.281017448431733, "learning_rate": 8.786949771252961e-06, "loss": 0.2258, "step": 3121 }, { "epoch": 0.25, "grad_norm": 1.2979125104901477, "learning_rate": 8.786103729413944e-06, "loss": 0.2264, "step": 3122 }, { "epoch": 0.25, "grad_norm": 3.9464230852850775, "learning_rate": 8.785257433402311e-06, "loss": 0.5349, "step": 3123 }, { "epoch": 0.25, "grad_norm": 4.496713733707055, "learning_rate": 8.784410883274879e-06, "loss": 0.5529, "step": 3124 }, { "epoch": 0.25, "grad_norm": 1.4078929472123278, "learning_rate": 8.783564079088478e-06, "loss": 0.2158, "step": 3125 }, { "epoch": 0.25, "grad_norm": 1.3314309201145713, "learning_rate": 8.782717020899957e-06, "loss": 0.1986, "step": 3126 }, { "epoch": 0.25, "grad_norm": 1.268109254509458, "learning_rate": 8.781869708766179e-06, "loss": 0.2266, "step": 3127 }, { "epoch": 0.25, "grad_norm": 1.3703667159421709, "learning_rate": 8.781022142744028e-06, "loss": 0.2077, "step": 3128 }, { "epoch": 0.25, "grad_norm": 1.221546913753352, "learning_rate": 8.78017432289041e-06, "loss": 0.2124, "step": 3129 }, { "epoch": 0.25, "grad_norm": 1.2365090103815157, "learning_rate": 8.779326249262232e-06, "loss": 0.1499, "step": 3130 }, { "epoch": 0.25, "grad_norm": 1.2882794311642867, "learning_rate": 8.778477921916431e-06, "loss": 0.176, "step": 3131 }, { "epoch": 0.25, "grad_norm": 6.742150743840332, "learning_rate": 8.777629340909963e-06, "loss": 0.7087, "step": 3132 }, { "epoch": 0.25, "grad_norm": 1.339115561041797, "learning_rate": 8.77678050629979e-06, "loss": 0.1957, "step": 3133 }, { "epoch": 0.25, "grad_norm": 1.4837091657309796, "learning_rate": 8.775931418142895e-06, "loss": 0.2384, "step": 3134 }, { "epoch": 0.25, "grad_norm": 1.2602912463680471, "learning_rate": 8.775082076496287e-06, "loss": 0.2068, "step": 3135 }, { "epoch": 0.25, "grad_norm": 1.3588876192886485, "learning_rate": 8.77423248141698e-06, "loss": 0.2287, "step": 3136 }, { "epoch": 0.25, "grad_norm": 1.3376299926336737, "learning_rate": 8.773382632962011e-06, "loss": 0.1878, "step": 3137 }, { "epoch": 0.25, "grad_norm": 5.44436557411278, "learning_rate": 8.772532531188434e-06, "loss": 0.7156, "step": 3138 }, { "epoch": 0.25, "grad_norm": 1.5568207927876347, "learning_rate": 8.771682176153317e-06, "loss": 0.2903, "step": 3139 }, { "epoch": 0.25, "grad_norm": 1.4733707072180862, "learning_rate": 8.770831567913747e-06, "loss": 0.2201, "step": 3140 }, { "epoch": 0.25, "grad_norm": 1.3133798351375443, "learning_rate": 8.76998070652683e-06, "loss": 0.2162, "step": 3141 }, { "epoch": 0.25, "grad_norm": 1.3526197884802227, "learning_rate": 8.769129592049685e-06, "loss": 0.2099, "step": 3142 }, { "epoch": 0.25, "grad_norm": 1.4184424008701173, "learning_rate": 8.768278224539451e-06, "loss": 0.2103, "step": 3143 }, { "epoch": 0.25, "grad_norm": 1.217361278573516, "learning_rate": 8.767426604053282e-06, "loss": 0.1753, "step": 3144 }, { "epoch": 0.25, "grad_norm": 1.302061094553151, "learning_rate": 8.76657473064835e-06, "loss": 0.1923, "step": 3145 }, { "epoch": 0.25, "grad_norm": 1.365656133293616, "learning_rate": 8.765722604381843e-06, "loss": 0.242, "step": 3146 }, { "epoch": 0.25, "grad_norm": 1.2807699810964557, "learning_rate": 8.76487022531097e-06, "loss": 0.1839, "step": 3147 }, { "epoch": 0.25, "grad_norm": 1.5415908616585046, "learning_rate": 8.764017593492951e-06, "loss": 0.2195, "step": 3148 }, { "epoch": 0.25, "grad_norm": 1.5790375896224849, "learning_rate": 8.763164708985026e-06, "loss": 0.2339, "step": 3149 }, { "epoch": 0.25, "grad_norm": 1.361680637626458, "learning_rate": 8.762311571844453e-06, "loss": 0.2208, "step": 3150 }, { "epoch": 0.25, "grad_norm": 1.346827836075606, "learning_rate": 8.761458182128503e-06, "loss": 0.2217, "step": 3151 }, { "epoch": 0.25, "grad_norm": 1.3468208727814432, "learning_rate": 8.76060453989447e-06, "loss": 0.1544, "step": 3152 }, { "epoch": 0.25, "grad_norm": 1.4906687463832031, "learning_rate": 8.75975064519966e-06, "loss": 0.2555, "step": 3153 }, { "epoch": 0.25, "grad_norm": 1.5512415912968707, "learning_rate": 8.758896498101397e-06, "loss": 0.2384, "step": 3154 }, { "epoch": 0.25, "grad_norm": 1.4653316490156025, "learning_rate": 8.758042098657022e-06, "loss": 0.2101, "step": 3155 }, { "epoch": 0.25, "grad_norm": 1.0852536519977536, "learning_rate": 8.757187446923896e-06, "loss": 0.1414, "step": 3156 }, { "epoch": 0.25, "grad_norm": 1.4128764081558944, "learning_rate": 8.756332542959394e-06, "loss": 0.2217, "step": 3157 }, { "epoch": 0.25, "grad_norm": 17.88489633311662, "learning_rate": 8.755477386820906e-06, "loss": 0.7123, "step": 3158 }, { "epoch": 0.25, "grad_norm": 5.685680913869717, "learning_rate": 8.75462197856584e-06, "loss": 0.6202, "step": 3159 }, { "epoch": 0.25, "grad_norm": 1.222345531876853, "learning_rate": 8.753766318251628e-06, "loss": 0.2005, "step": 3160 }, { "epoch": 0.25, "grad_norm": 1.3017713733607912, "learning_rate": 8.752910405935708e-06, "loss": 0.1999, "step": 3161 }, { "epoch": 0.25, "grad_norm": 1.549801876056592, "learning_rate": 8.752054241675543e-06, "loss": 0.2419, "step": 3162 }, { "epoch": 0.25, "grad_norm": 1.5763183339308362, "learning_rate": 8.751197825528607e-06, "loss": 0.2578, "step": 3163 }, { "epoch": 0.25, "grad_norm": 1.3534450558695814, "learning_rate": 8.750341157552396e-06, "loss": 0.1649, "step": 3164 }, { "epoch": 0.25, "grad_norm": 1.3728107862610204, "learning_rate": 8.74948423780442e-06, "loss": 0.1753, "step": 3165 }, { "epoch": 0.25, "grad_norm": 1.4813510512455477, "learning_rate": 8.748627066342206e-06, "loss": 0.1773, "step": 3166 }, { "epoch": 0.25, "grad_norm": 1.4417247244486429, "learning_rate": 8.7477696432233e-06, "loss": 0.2012, "step": 3167 }, { "epoch": 0.25, "grad_norm": 6.0524160577338835, "learning_rate": 8.746911968505262e-06, "loss": 0.7107, "step": 3168 }, { "epoch": 0.25, "grad_norm": 1.3789545658785332, "learning_rate": 8.74605404224567e-06, "loss": 0.1825, "step": 3169 }, { "epoch": 0.25, "grad_norm": 1.5069480115976879, "learning_rate": 8.745195864502121e-06, "loss": 0.2639, "step": 3170 }, { "epoch": 0.25, "grad_norm": 1.5186721642131027, "learning_rate": 8.744337435332226e-06, "loss": 0.2471, "step": 3171 }, { "epoch": 0.25, "grad_norm": 1.25716461902238, "learning_rate": 8.743478754793616e-06, "loss": 0.1874, "step": 3172 }, { "epoch": 0.25, "grad_norm": 10.234128400739854, "learning_rate": 8.742619822943932e-06, "loss": 0.5618, "step": 3173 }, { "epoch": 0.25, "grad_norm": 1.5324637931168346, "learning_rate": 8.74176063984084e-06, "loss": 0.2442, "step": 3174 }, { "epoch": 0.25, "grad_norm": 1.3239359974531444, "learning_rate": 8.74090120554202e-06, "loss": 0.2067, "step": 3175 }, { "epoch": 0.25, "grad_norm": 1.538776837609694, "learning_rate": 8.740041520105168e-06, "loss": 0.2108, "step": 3176 }, { "epoch": 0.25, "grad_norm": 1.267553913591337, "learning_rate": 8.739181583587997e-06, "loss": 0.2147, "step": 3177 }, { "epoch": 0.25, "grad_norm": 9.305862617351721, "learning_rate": 8.738321396048235e-06, "loss": 0.6456, "step": 3178 }, { "epoch": 0.25, "grad_norm": 1.2922659852864014, "learning_rate": 8.737460957543633e-06, "loss": 0.1655, "step": 3179 }, { "epoch": 0.25, "grad_norm": 1.317768387210999, "learning_rate": 8.736600268131953e-06, "loss": 0.2115, "step": 3180 }, { "epoch": 0.25, "grad_norm": 1.2521343704787657, "learning_rate": 8.735739327870974e-06, "loss": 0.1555, "step": 3181 }, { "epoch": 0.25, "grad_norm": 1.3399821987123812, "learning_rate": 8.734878136818496e-06, "loss": 0.1957, "step": 3182 }, { "epoch": 0.25, "grad_norm": 1.179258389900639, "learning_rate": 8.734016695032333e-06, "loss": 0.1981, "step": 3183 }, { "epoch": 0.25, "grad_norm": 1.3254075134029584, "learning_rate": 8.733155002570315e-06, "loss": 0.205, "step": 3184 }, { "epoch": 0.25, "grad_norm": 1.3581819390716958, "learning_rate": 8.73229305949029e-06, "loss": 0.1856, "step": 3185 }, { "epoch": 0.25, "grad_norm": 1.3108554332551716, "learning_rate": 8.731430865850124e-06, "loss": 0.1962, "step": 3186 }, { "epoch": 0.25, "grad_norm": 1.3900741661961677, "learning_rate": 8.730568421707699e-06, "loss": 0.2199, "step": 3187 }, { "epoch": 0.26, "grad_norm": 1.5162537619511862, "learning_rate": 8.729705727120911e-06, "loss": 0.253, "step": 3188 }, { "epoch": 0.26, "grad_norm": 1.509490908177284, "learning_rate": 8.728842782147679e-06, "loss": 0.2479, "step": 3189 }, { "epoch": 0.26, "grad_norm": 1.6280437143260313, "learning_rate": 8.727979586845931e-06, "loss": 0.28, "step": 3190 }, { "epoch": 0.26, "grad_norm": 1.3007564642704696, "learning_rate": 8.727116141273619e-06, "loss": 0.1756, "step": 3191 }, { "epoch": 0.26, "grad_norm": 1.4623809912460453, "learning_rate": 8.726252445488708e-06, "loss": 0.2224, "step": 3192 }, { "epoch": 0.26, "grad_norm": 1.5595365044856657, "learning_rate": 8.725388499549182e-06, "loss": 0.2474, "step": 3193 }, { "epoch": 0.26, "grad_norm": 1.3048982681056154, "learning_rate": 8.724524303513035e-06, "loss": 0.2206, "step": 3194 }, { "epoch": 0.26, "grad_norm": 1.299657512967296, "learning_rate": 8.72365985743829e-06, "loss": 0.2047, "step": 3195 }, { "epoch": 0.26, "grad_norm": 1.2961884266114125, "learning_rate": 8.722795161382974e-06, "loss": 0.2667, "step": 3196 }, { "epoch": 0.26, "grad_norm": 1.394083392692424, "learning_rate": 8.72193021540514e-06, "loss": 0.2428, "step": 3197 }, { "epoch": 0.26, "grad_norm": 1.3633135534989351, "learning_rate": 8.721065019562854e-06, "loss": 0.235, "step": 3198 }, { "epoch": 0.26, "grad_norm": 1.359248119005776, "learning_rate": 8.720199573914196e-06, "loss": 0.2502, "step": 3199 }, { "epoch": 0.26, "grad_norm": 1.3594908019782461, "learning_rate": 8.719333878517274e-06, "loss": 0.2312, "step": 3200 }, { "epoch": 0.26, "grad_norm": 1.3976886597561842, "learning_rate": 8.718467933430195e-06, "loss": 0.2236, "step": 3201 }, { "epoch": 0.26, "grad_norm": 1.1994411275995478, "learning_rate": 8.7176017387111e-06, "loss": 0.1895, "step": 3202 }, { "epoch": 0.26, "grad_norm": 1.4821448683220442, "learning_rate": 8.716735294418136e-06, "loss": 0.2107, "step": 3203 }, { "epoch": 0.26, "grad_norm": 1.5329378979756643, "learning_rate": 8.71586860060947e-06, "loss": 0.2458, "step": 3204 }, { "epoch": 0.26, "grad_norm": 1.417391308935592, "learning_rate": 8.715001657343285e-06, "loss": 0.2458, "step": 3205 }, { "epoch": 0.26, "grad_norm": 6.3636924614899515, "learning_rate": 8.714134464677784e-06, "loss": 0.6404, "step": 3206 }, { "epoch": 0.26, "grad_norm": 1.5088706790184852, "learning_rate": 8.713267022671182e-06, "loss": 0.2895, "step": 3207 }, { "epoch": 0.26, "grad_norm": 1.4782466248225636, "learning_rate": 8.712399331381715e-06, "loss": 0.2062, "step": 3208 }, { "epoch": 0.26, "grad_norm": 1.4091674634684748, "learning_rate": 8.711531390867634e-06, "loss": 0.2489, "step": 3209 }, { "epoch": 0.26, "grad_norm": 1.3080483404843495, "learning_rate": 8.710663201187203e-06, "loss": 0.2044, "step": 3210 }, { "epoch": 0.26, "grad_norm": 1.450289452016168, "learning_rate": 8.709794762398709e-06, "loss": 0.2374, "step": 3211 }, { "epoch": 0.26, "grad_norm": 4.649676077644956, "learning_rate": 8.708926074560453e-06, "loss": 0.5833, "step": 3212 }, { "epoch": 0.26, "grad_norm": 1.397211157496579, "learning_rate": 8.708057137730752e-06, "loss": 0.1881, "step": 3213 }, { "epoch": 0.26, "grad_norm": 4.985061637005715, "learning_rate": 8.707187951967939e-06, "loss": 0.6008, "step": 3214 }, { "epoch": 0.26, "grad_norm": 1.535248458843302, "learning_rate": 8.706318517330368e-06, "loss": 0.2215, "step": 3215 }, { "epoch": 0.26, "grad_norm": 1.428511056438544, "learning_rate": 8.705448833876404e-06, "loss": 0.2083, "step": 3216 }, { "epoch": 0.26, "grad_norm": 4.839412785357798, "learning_rate": 8.704578901664434e-06, "loss": 0.698, "step": 3217 }, { "epoch": 0.26, "grad_norm": 5.92139593858278, "learning_rate": 8.703708720752857e-06, "loss": 0.6828, "step": 3218 }, { "epoch": 0.26, "grad_norm": 1.5203192792008802, "learning_rate": 8.702838291200093e-06, "loss": 0.208, "step": 3219 }, { "epoch": 0.26, "grad_norm": 6.1366176428658195, "learning_rate": 8.701967613064575e-06, "loss": 0.5873, "step": 3220 }, { "epoch": 0.26, "grad_norm": 1.4010690989462742, "learning_rate": 8.701096686404753e-06, "loss": 0.2426, "step": 3221 }, { "epoch": 0.26, "grad_norm": 1.5149715350301836, "learning_rate": 8.700225511279097e-06, "loss": 0.2155, "step": 3222 }, { "epoch": 0.26, "grad_norm": 1.4854697173396687, "learning_rate": 8.699354087746092e-06, "loss": 0.2329, "step": 3223 }, { "epoch": 0.26, "grad_norm": 1.29676813740157, "learning_rate": 8.69848241586424e-06, "loss": 0.1519, "step": 3224 }, { "epoch": 0.26, "grad_norm": 3.43750592682205, "learning_rate": 8.697610495692055e-06, "loss": 0.5887, "step": 3225 }, { "epoch": 0.26, "grad_norm": 1.2839545550256226, "learning_rate": 8.696738327288074e-06, "loss": 0.2136, "step": 3226 }, { "epoch": 0.26, "grad_norm": 1.3520771454702623, "learning_rate": 8.69586591071085e-06, "loss": 0.1927, "step": 3227 }, { "epoch": 0.26, "grad_norm": 5.190971887095076, "learning_rate": 8.694993246018949e-06, "loss": 0.6942, "step": 3228 }, { "epoch": 0.26, "grad_norm": 1.2547823970784193, "learning_rate": 8.694120333270954e-06, "loss": 0.1846, "step": 3229 }, { "epoch": 0.26, "grad_norm": 1.3824993840843107, "learning_rate": 8.693247172525472e-06, "loss": 0.2233, "step": 3230 }, { "epoch": 0.26, "grad_norm": 1.4901238254156066, "learning_rate": 8.692373763841115e-06, "loss": 0.2535, "step": 3231 }, { "epoch": 0.26, "grad_norm": 1.4575915479558135, "learning_rate": 8.691500107276521e-06, "loss": 0.2497, "step": 3232 }, { "epoch": 0.26, "grad_norm": 1.2454481386423009, "learning_rate": 8.690626202890338e-06, "loss": 0.1967, "step": 3233 }, { "epoch": 0.26, "grad_norm": 1.2732372990709095, "learning_rate": 8.689752050741237e-06, "loss": 0.1939, "step": 3234 }, { "epoch": 0.26, "grad_norm": 1.324830849361413, "learning_rate": 8.6888776508879e-06, "loss": 0.2228, "step": 3235 }, { "epoch": 0.26, "grad_norm": 1.4958712825898532, "learning_rate": 8.688003003389032e-06, "loss": 0.2566, "step": 3236 }, { "epoch": 0.26, "grad_norm": 1.3618273852074925, "learning_rate": 8.687128108303347e-06, "loss": 0.1707, "step": 3237 }, { "epoch": 0.26, "grad_norm": 1.4122647443594907, "learning_rate": 8.686252965689579e-06, "loss": 0.2377, "step": 3238 }, { "epoch": 0.26, "grad_norm": 1.27874265177543, "learning_rate": 8.685377575606481e-06, "loss": 0.219, "step": 3239 }, { "epoch": 0.26, "grad_norm": 1.4321965753032337, "learning_rate": 8.684501938112822e-06, "loss": 0.2257, "step": 3240 }, { "epoch": 0.26, "grad_norm": 1.568292891239202, "learning_rate": 8.683626053267385e-06, "loss": 0.2205, "step": 3241 }, { "epoch": 0.26, "grad_norm": 1.5098104800261514, "learning_rate": 8.682749921128968e-06, "loss": 0.2557, "step": 3242 }, { "epoch": 0.26, "grad_norm": 1.4799834535525536, "learning_rate": 8.68187354175639e-06, "loss": 0.235, "step": 3243 }, { "epoch": 0.26, "grad_norm": 1.3899808174423203, "learning_rate": 8.680996915208486e-06, "loss": 0.2312, "step": 3244 }, { "epoch": 0.26, "grad_norm": 1.2958942764425196, "learning_rate": 8.680120041544106e-06, "loss": 0.19, "step": 3245 }, { "epoch": 0.26, "grad_norm": 6.2469686513214775, "learning_rate": 8.679242920822119e-06, "loss": 0.6509, "step": 3246 }, { "epoch": 0.26, "grad_norm": 1.4472804927666902, "learning_rate": 8.678365553101406e-06, "loss": 0.2507, "step": 3247 }, { "epoch": 0.26, "grad_norm": 5.613440696545433, "learning_rate": 8.677487938440868e-06, "loss": 0.8339, "step": 3248 }, { "epoch": 0.26, "grad_norm": 1.3763756818865895, "learning_rate": 8.676610076899423e-06, "loss": 0.1891, "step": 3249 }, { "epoch": 0.26, "grad_norm": 1.250798261444277, "learning_rate": 8.675731968536004e-06, "loss": 0.2024, "step": 3250 }, { "epoch": 0.26, "grad_norm": 1.4927831683983277, "learning_rate": 8.67485361340956e-06, "loss": 0.2548, "step": 3251 }, { "epoch": 0.26, "grad_norm": 1.2870034010066054, "learning_rate": 8.673975011579058e-06, "loss": 0.1894, "step": 3252 }, { "epoch": 0.26, "grad_norm": 1.3527719379323089, "learning_rate": 8.673096163103483e-06, "loss": 0.2163, "step": 3253 }, { "epoch": 0.26, "grad_norm": 1.4974541870822855, "learning_rate": 8.672217068041833e-06, "loss": 0.2261, "step": 3254 }, { "epoch": 0.26, "grad_norm": 1.592232583908154, "learning_rate": 8.671337726453126e-06, "loss": 0.2372, "step": 3255 }, { "epoch": 0.26, "grad_norm": 1.400763616752801, "learning_rate": 8.670458138396391e-06, "loss": 0.223, "step": 3256 }, { "epoch": 0.26, "grad_norm": 1.2239321633047564, "learning_rate": 8.669578303930684e-06, "loss": 0.2193, "step": 3257 }, { "epoch": 0.26, "grad_norm": 8.634241103174643, "learning_rate": 8.668698223115063e-06, "loss": 0.551, "step": 3258 }, { "epoch": 0.26, "grad_norm": 1.3945335063867428, "learning_rate": 8.667817896008618e-06, "loss": 0.2073, "step": 3259 }, { "epoch": 0.26, "grad_norm": 1.334345418638459, "learning_rate": 8.666937322670443e-06, "loss": 0.2215, "step": 3260 }, { "epoch": 0.26, "grad_norm": 1.3719246270184962, "learning_rate": 8.666056503159654e-06, "loss": 0.2125, "step": 3261 }, { "epoch": 0.26, "grad_norm": 1.4195729416911402, "learning_rate": 8.665175437535387e-06, "loss": 0.2641, "step": 3262 }, { "epoch": 0.26, "grad_norm": 1.4270259610696958, "learning_rate": 8.664294125856786e-06, "loss": 0.2388, "step": 3263 }, { "epoch": 0.26, "grad_norm": 6.918064465575382, "learning_rate": 8.663412568183018e-06, "loss": 0.5098, "step": 3264 }, { "epoch": 0.26, "grad_norm": 1.3573804030741383, "learning_rate": 8.662530764573264e-06, "loss": 0.2267, "step": 3265 }, { "epoch": 0.26, "grad_norm": 1.43384358147485, "learning_rate": 8.661648715086725e-06, "loss": 0.213, "step": 3266 }, { "epoch": 0.26, "grad_norm": 1.4165597620522519, "learning_rate": 8.66076641978261e-06, "loss": 0.1996, "step": 3267 }, { "epoch": 0.26, "grad_norm": 1.224044680424154, "learning_rate": 8.659883878720158e-06, "loss": 0.1864, "step": 3268 }, { "epoch": 0.26, "grad_norm": 1.5335445345848624, "learning_rate": 8.65900109195861e-06, "loss": 0.2451, "step": 3269 }, { "epoch": 0.26, "grad_norm": 1.5067058038654433, "learning_rate": 8.658118059557233e-06, "loss": 0.1939, "step": 3270 }, { "epoch": 0.26, "grad_norm": 1.4186756181655202, "learning_rate": 8.657234781575305e-06, "loss": 0.245, "step": 3271 }, { "epoch": 0.26, "grad_norm": 2.12107141288473, "learning_rate": 8.656351258072126e-06, "loss": 0.1781, "step": 3272 }, { "epoch": 0.26, "grad_norm": 1.3783289562807386, "learning_rate": 8.65546748910701e-06, "loss": 0.2305, "step": 3273 }, { "epoch": 0.26, "grad_norm": 1.5891777073731126, "learning_rate": 8.654583474739284e-06, "loss": 0.24, "step": 3274 }, { "epoch": 0.26, "grad_norm": 1.3931356556078667, "learning_rate": 8.653699215028298e-06, "loss": 0.2132, "step": 3275 }, { "epoch": 0.26, "grad_norm": 1.38622832440785, "learning_rate": 8.652814710033413e-06, "loss": 0.1984, "step": 3276 }, { "epoch": 0.26, "grad_norm": 1.414963206934076, "learning_rate": 8.651929959814009e-06, "loss": 0.2438, "step": 3277 }, { "epoch": 0.26, "grad_norm": 1.227173360929713, "learning_rate": 8.65104496442948e-06, "loss": 0.202, "step": 3278 }, { "epoch": 0.26, "grad_norm": 5.695814781365407, "learning_rate": 8.650159723939241e-06, "loss": 0.6169, "step": 3279 }, { "epoch": 0.26, "grad_norm": 1.4872483855349818, "learning_rate": 8.649274238402723e-06, "loss": 0.2199, "step": 3280 }, { "epoch": 0.26, "grad_norm": 1.5501930447321255, "learning_rate": 8.648388507879367e-06, "loss": 0.2207, "step": 3281 }, { "epoch": 0.26, "grad_norm": 1.5059417901422238, "learning_rate": 8.647502532428636e-06, "loss": 0.2149, "step": 3282 }, { "epoch": 0.26, "grad_norm": 6.41956730295782, "learning_rate": 8.646616312110008e-06, "loss": 0.4973, "step": 3283 }, { "epoch": 0.26, "grad_norm": 5.163982146026823, "learning_rate": 8.64572984698298e-06, "loss": 0.4652, "step": 3284 }, { "epoch": 0.26, "grad_norm": 1.2477808648153987, "learning_rate": 8.644843137107058e-06, "loss": 0.1771, "step": 3285 }, { "epoch": 0.26, "grad_norm": 1.245524489759415, "learning_rate": 8.643956182541775e-06, "loss": 0.1906, "step": 3286 }, { "epoch": 0.26, "grad_norm": 1.423737742889783, "learning_rate": 8.643068983346672e-06, "loss": 0.2435, "step": 3287 }, { "epoch": 0.26, "grad_norm": 1.392556203965379, "learning_rate": 8.64218153958131e-06, "loss": 0.2354, "step": 3288 }, { "epoch": 0.26, "grad_norm": 1.3092858580540458, "learning_rate": 8.641293851305269e-06, "loss": 0.2235, "step": 3289 }, { "epoch": 0.26, "grad_norm": 1.3974007528559544, "learning_rate": 8.640405918578134e-06, "loss": 0.2297, "step": 3290 }, { "epoch": 0.26, "grad_norm": 1.440515656995989, "learning_rate": 8.639517741459522e-06, "loss": 0.2206, "step": 3291 }, { "epoch": 0.26, "grad_norm": 1.384342029141178, "learning_rate": 8.638629320009058e-06, "loss": 0.219, "step": 3292 }, { "epoch": 0.26, "grad_norm": 1.5994519012809414, "learning_rate": 8.637740654286382e-06, "loss": 0.2071, "step": 3293 }, { "epoch": 0.26, "grad_norm": 1.3160360288749673, "learning_rate": 8.636851744351154e-06, "loss": 0.1902, "step": 3294 }, { "epoch": 0.26, "grad_norm": 1.3152805292253702, "learning_rate": 8.635962590263047e-06, "loss": 0.2375, "step": 3295 }, { "epoch": 0.26, "grad_norm": 1.4591324320680432, "learning_rate": 8.635073192081757e-06, "loss": 0.2089, "step": 3296 }, { "epoch": 0.26, "grad_norm": 1.486571901267544, "learning_rate": 8.63418354986699e-06, "loss": 0.2268, "step": 3297 }, { "epoch": 0.26, "grad_norm": 1.502231562836528, "learning_rate": 8.63329366367847e-06, "loss": 0.2065, "step": 3298 }, { "epoch": 0.26, "grad_norm": 1.3750469036192092, "learning_rate": 8.632403533575939e-06, "loss": 0.2068, "step": 3299 }, { "epoch": 0.26, "grad_norm": 1.340091720239436, "learning_rate": 8.63151315961915e-06, "loss": 0.1986, "step": 3300 }, { "epoch": 0.26, "grad_norm": 1.4287659176814846, "learning_rate": 8.630622541867881e-06, "loss": 0.2083, "step": 3301 }, { "epoch": 0.26, "grad_norm": 1.2741602949274455, "learning_rate": 8.629731680381922e-06, "loss": 0.1877, "step": 3302 }, { "epoch": 0.26, "grad_norm": 1.392968593165019, "learning_rate": 8.628840575221076e-06, "loss": 0.2211, "step": 3303 }, { "epoch": 0.26, "grad_norm": 1.3948138917276385, "learning_rate": 8.627949226445166e-06, "loss": 0.2276, "step": 3304 }, { "epoch": 0.26, "grad_norm": 1.3767623543680214, "learning_rate": 8.627057634114036e-06, "loss": 0.2281, "step": 3305 }, { "epoch": 0.26, "grad_norm": 5.905700854127078, "learning_rate": 8.626165798287534e-06, "loss": 0.5206, "step": 3306 }, { "epoch": 0.26, "grad_norm": 1.60099476138202, "learning_rate": 8.625273719025538e-06, "loss": 0.2849, "step": 3307 }, { "epoch": 0.26, "grad_norm": 1.474792122507455, "learning_rate": 8.62438139638793e-06, "loss": 0.2413, "step": 3308 }, { "epoch": 0.26, "grad_norm": 1.603070691935612, "learning_rate": 8.623488830434619e-06, "loss": 0.2436, "step": 3309 }, { "epoch": 0.26, "grad_norm": 1.5154523071098147, "learning_rate": 8.622596021225524e-06, "loss": 0.2215, "step": 3310 }, { "epoch": 0.26, "grad_norm": 1.3714916702675344, "learning_rate": 8.621702968820582e-06, "loss": 0.2175, "step": 3311 }, { "epoch": 0.26, "grad_norm": 4.845900021469104, "learning_rate": 8.620809673279746e-06, "loss": 0.5095, "step": 3312 }, { "epoch": 0.27, "grad_norm": 1.3717316383161107, "learning_rate": 8.619916134662986e-06, "loss": 0.1835, "step": 3313 }, { "epoch": 0.27, "grad_norm": 1.3760865241075366, "learning_rate": 8.619022353030288e-06, "loss": 0.2346, "step": 3314 }, { "epoch": 0.27, "grad_norm": 1.2612030683693312, "learning_rate": 8.618128328441655e-06, "loss": 0.1943, "step": 3315 }, { "epoch": 0.27, "grad_norm": 1.4474170736837229, "learning_rate": 8.617234060957105e-06, "loss": 0.205, "step": 3316 }, { "epoch": 0.27, "grad_norm": 1.4672323025300713, "learning_rate": 8.616339550636672e-06, "loss": 0.2528, "step": 3317 }, { "epoch": 0.27, "grad_norm": 1.404668787934941, "learning_rate": 8.615444797540408e-06, "loss": 0.1926, "step": 3318 }, { "epoch": 0.27, "grad_norm": 1.2932740104839244, "learning_rate": 8.61454980172838e-06, "loss": 0.1799, "step": 3319 }, { "epoch": 0.27, "grad_norm": 1.4199019012557053, "learning_rate": 8.613654563260673e-06, "loss": 0.2321, "step": 3320 }, { "epoch": 0.27, "grad_norm": 1.4417218287588973, "learning_rate": 8.612759082197386e-06, "loss": 0.2296, "step": 3321 }, { "epoch": 0.27, "grad_norm": 1.381046292822886, "learning_rate": 8.611863358598635e-06, "loss": 0.2136, "step": 3322 }, { "epoch": 0.27, "grad_norm": 1.4630894314542768, "learning_rate": 8.610967392524554e-06, "loss": 0.221, "step": 3323 }, { "epoch": 0.27, "grad_norm": 1.4330271561023198, "learning_rate": 8.610071184035292e-06, "loss": 0.225, "step": 3324 }, { "epoch": 0.27, "grad_norm": 1.4096339550465822, "learning_rate": 8.609174733191012e-06, "loss": 0.2309, "step": 3325 }, { "epoch": 0.27, "grad_norm": 6.677255327444758, "learning_rate": 8.608278040051895e-06, "loss": 0.6186, "step": 3326 }, { "epoch": 0.27, "grad_norm": 10.725038603019051, "learning_rate": 8.607381104678142e-06, "loss": 0.6055, "step": 3327 }, { "epoch": 0.27, "grad_norm": 1.6543551893989619, "learning_rate": 8.606483927129967e-06, "loss": 0.2506, "step": 3328 }, { "epoch": 0.27, "grad_norm": 1.4641475236957175, "learning_rate": 8.605586507467597e-06, "loss": 0.2462, "step": 3329 }, { "epoch": 0.27, "grad_norm": 1.271741681012463, "learning_rate": 8.604688845751283e-06, "loss": 0.2333, "step": 3330 }, { "epoch": 0.27, "grad_norm": 1.2944789322117929, "learning_rate": 8.603790942041282e-06, "loss": 0.2062, "step": 3331 }, { "epoch": 0.27, "grad_norm": 1.2496626638288242, "learning_rate": 8.602892796397877e-06, "loss": 0.2301, "step": 3332 }, { "epoch": 0.27, "grad_norm": 1.3839262639360257, "learning_rate": 8.601994408881362e-06, "loss": 0.1944, "step": 3333 }, { "epoch": 0.27, "grad_norm": 10.960056264056906, "learning_rate": 8.60109577955205e-06, "loss": 0.5522, "step": 3334 }, { "epoch": 0.27, "grad_norm": 1.4845123372797735, "learning_rate": 8.600196908470265e-06, "loss": 0.2525, "step": 3335 }, { "epoch": 0.27, "grad_norm": 1.4904222270857068, "learning_rate": 8.599297795696355e-06, "loss": 0.2868, "step": 3336 }, { "epoch": 0.27, "grad_norm": 1.4336924661490809, "learning_rate": 8.598398441290679e-06, "loss": 0.2235, "step": 3337 }, { "epoch": 0.27, "grad_norm": 1.2862098601757985, "learning_rate": 8.597498845313612e-06, "loss": 0.205, "step": 3338 }, { "epoch": 0.27, "grad_norm": 1.3756513263966703, "learning_rate": 8.596599007825548e-06, "loss": 0.2204, "step": 3339 }, { "epoch": 0.27, "grad_norm": 1.3290166602454174, "learning_rate": 8.595698928886894e-06, "loss": 0.2541, "step": 3340 }, { "epoch": 0.27, "grad_norm": 1.3623803438445348, "learning_rate": 8.594798608558077e-06, "loss": 0.2251, "step": 3341 }, { "epoch": 0.27, "grad_norm": 1.523019867969133, "learning_rate": 8.593898046899539e-06, "loss": 0.2287, "step": 3342 }, { "epoch": 0.27, "grad_norm": 1.413000962049789, "learning_rate": 8.592997243971734e-06, "loss": 0.2034, "step": 3343 }, { "epoch": 0.27, "grad_norm": 1.4159815233966042, "learning_rate": 8.592096199835141e-06, "loss": 0.201, "step": 3344 }, { "epoch": 0.27, "grad_norm": 1.4366309363516205, "learning_rate": 8.591194914550242e-06, "loss": 0.178, "step": 3345 }, { "epoch": 0.27, "grad_norm": 8.234425552683678, "learning_rate": 8.590293388177552e-06, "loss": 0.6904, "step": 3346 }, { "epoch": 0.27, "grad_norm": 1.5826727940453578, "learning_rate": 8.589391620777586e-06, "loss": 0.2201, "step": 3347 }, { "epoch": 0.27, "grad_norm": 1.4249936449054619, "learning_rate": 8.588489612410885e-06, "loss": 0.2393, "step": 3348 }, { "epoch": 0.27, "grad_norm": 5.761381874608512, "learning_rate": 8.587587363138003e-06, "loss": 0.4948, "step": 3349 }, { "epoch": 0.27, "grad_norm": 1.355617614554123, "learning_rate": 8.586684873019513e-06, "loss": 0.2216, "step": 3350 }, { "epoch": 0.27, "grad_norm": 1.438418273720014, "learning_rate": 8.585782142116e-06, "loss": 0.255, "step": 3351 }, { "epoch": 0.27, "grad_norm": 1.419152071624994, "learning_rate": 8.584879170488068e-06, "loss": 0.2129, "step": 3352 }, { "epoch": 0.27, "grad_norm": 1.4212169669942178, "learning_rate": 8.583975958196333e-06, "loss": 0.2333, "step": 3353 }, { "epoch": 0.27, "grad_norm": 1.5804855519775207, "learning_rate": 8.583072505301436e-06, "loss": 0.2425, "step": 3354 }, { "epoch": 0.27, "grad_norm": 1.2835645409457161, "learning_rate": 8.582168811864022e-06, "loss": 0.2021, "step": 3355 }, { "epoch": 0.27, "grad_norm": 1.2681078675620716, "learning_rate": 8.581264877944764e-06, "loss": 0.1902, "step": 3356 }, { "epoch": 0.27, "grad_norm": 1.8395633418399882, "learning_rate": 8.580360703604344e-06, "loss": 0.2469, "step": 3357 }, { "epoch": 0.27, "grad_norm": 1.389459556585165, "learning_rate": 8.579456288903463e-06, "loss": 0.2617, "step": 3358 }, { "epoch": 0.27, "grad_norm": 1.420210456918626, "learning_rate": 8.578551633902836e-06, "loss": 0.1819, "step": 3359 }, { "epoch": 0.27, "grad_norm": 1.3741896065552794, "learning_rate": 8.577646738663193e-06, "loss": 0.1787, "step": 3360 }, { "epoch": 0.27, "grad_norm": 1.3429191571009946, "learning_rate": 8.576741603245285e-06, "loss": 0.2267, "step": 3361 }, { "epoch": 0.27, "grad_norm": 1.3266588368008274, "learning_rate": 8.575836227709877e-06, "loss": 0.2054, "step": 3362 }, { "epoch": 0.27, "grad_norm": 1.2665890815488017, "learning_rate": 8.574930612117748e-06, "loss": 0.1812, "step": 3363 }, { "epoch": 0.27, "grad_norm": 1.3263880385488571, "learning_rate": 8.574024756529698e-06, "loss": 0.1919, "step": 3364 }, { "epoch": 0.27, "grad_norm": 1.5457286664498076, "learning_rate": 8.573118661006535e-06, "loss": 0.2355, "step": 3365 }, { "epoch": 0.27, "grad_norm": 1.337918393069023, "learning_rate": 8.572212325609089e-06, "loss": 0.2198, "step": 3366 }, { "epoch": 0.27, "grad_norm": 1.2859309581970662, "learning_rate": 8.571305750398207e-06, "loss": 0.1786, "step": 3367 }, { "epoch": 0.27, "grad_norm": 1.1747443336171957, "learning_rate": 8.57039893543475e-06, "loss": 0.1778, "step": 3368 }, { "epoch": 0.27, "grad_norm": 5.152163072155325, "learning_rate": 8.569491880779594e-06, "loss": 0.5469, "step": 3369 }, { "epoch": 0.27, "grad_norm": 1.4790229884346278, "learning_rate": 8.568584586493635e-06, "loss": 0.2172, "step": 3370 }, { "epoch": 0.27, "grad_norm": 1.4516696240568987, "learning_rate": 8.567677052637778e-06, "loss": 0.2453, "step": 3371 }, { "epoch": 0.27, "grad_norm": 1.3448540211877822, "learning_rate": 8.566769279272952e-06, "loss": 0.2307, "step": 3372 }, { "epoch": 0.27, "grad_norm": 12.476500251821296, "learning_rate": 8.565861266460096e-06, "loss": 0.6898, "step": 3373 }, { "epoch": 0.27, "grad_norm": 1.4391748969000988, "learning_rate": 8.56495301426017e-06, "loss": 0.197, "step": 3374 }, { "epoch": 0.27, "grad_norm": 8.911363134126704, "learning_rate": 8.564044522734147e-06, "loss": 0.6137, "step": 3375 }, { "epoch": 0.27, "grad_norm": 1.500628730199422, "learning_rate": 8.563135791943015e-06, "loss": 0.2276, "step": 3376 }, { "epoch": 0.27, "grad_norm": 1.3924291241373836, "learning_rate": 8.562226821947784e-06, "loss": 0.2342, "step": 3377 }, { "epoch": 0.27, "grad_norm": 1.4233917667748524, "learning_rate": 8.561317612809473e-06, "loss": 0.2018, "step": 3378 }, { "epoch": 0.27, "grad_norm": 1.4540760032553721, "learning_rate": 8.560408164589117e-06, "loss": 0.1726, "step": 3379 }, { "epoch": 0.27, "grad_norm": 1.5823740699067297, "learning_rate": 8.559498477347777e-06, "loss": 0.2246, "step": 3380 }, { "epoch": 0.27, "grad_norm": 5.708874900183982, "learning_rate": 8.558588551146519e-06, "loss": 0.6208, "step": 3381 }, { "epoch": 0.27, "grad_norm": 1.4483844586385217, "learning_rate": 8.557678386046429e-06, "loss": 0.2343, "step": 3382 }, { "epoch": 0.27, "grad_norm": 1.3770788834879915, "learning_rate": 8.556767982108607e-06, "loss": 0.182, "step": 3383 }, { "epoch": 0.27, "grad_norm": 1.2930451226061221, "learning_rate": 8.555857339394175e-06, "loss": 0.1766, "step": 3384 }, { "epoch": 0.27, "grad_norm": 1.475730834805216, "learning_rate": 8.554946457964268e-06, "loss": 0.2194, "step": 3385 }, { "epoch": 0.27, "grad_norm": 4.843550203319319, "learning_rate": 8.554035337880034e-06, "loss": 0.605, "step": 3386 }, { "epoch": 0.27, "grad_norm": 1.4479539473973146, "learning_rate": 8.553123979202638e-06, "loss": 0.2207, "step": 3387 }, { "epoch": 0.27, "grad_norm": 1.4883021518656074, "learning_rate": 8.552212381993264e-06, "loss": 0.219, "step": 3388 }, { "epoch": 0.27, "grad_norm": 1.3753707166415994, "learning_rate": 8.55130054631311e-06, "loss": 0.207, "step": 3389 }, { "epoch": 0.27, "grad_norm": 1.564812415267844, "learning_rate": 8.550388472223391e-06, "loss": 0.2682, "step": 3390 }, { "epoch": 0.27, "grad_norm": 1.5335671407695977, "learning_rate": 8.549476159785336e-06, "loss": 0.2075, "step": 3391 }, { "epoch": 0.27, "grad_norm": 1.306855644100196, "learning_rate": 8.548563609060191e-06, "loss": 0.1748, "step": 3392 }, { "epoch": 0.27, "grad_norm": 1.377123622988194, "learning_rate": 8.547650820109222e-06, "loss": 0.2418, "step": 3393 }, { "epoch": 0.27, "grad_norm": 1.3783118718988048, "learning_rate": 8.546737792993702e-06, "loss": 0.1888, "step": 3394 }, { "epoch": 0.27, "grad_norm": 1.3861471381945925, "learning_rate": 8.54582452777493e-06, "loss": 0.1702, "step": 3395 }, { "epoch": 0.27, "grad_norm": 1.529905020367175, "learning_rate": 8.544911024514211e-06, "loss": 0.2316, "step": 3396 }, { "epoch": 0.27, "grad_norm": 1.4085107236134933, "learning_rate": 8.543997283272876e-06, "loss": 0.2133, "step": 3397 }, { "epoch": 0.27, "grad_norm": 6.293103098033083, "learning_rate": 8.543083304112265e-06, "loss": 0.7167, "step": 3398 }, { "epoch": 0.27, "grad_norm": 1.3188822304384575, "learning_rate": 8.542169087093738e-06, "loss": 0.1921, "step": 3399 }, { "epoch": 0.27, "grad_norm": 8.305343679466047, "learning_rate": 8.541254632278667e-06, "loss": 0.7709, "step": 3400 }, { "epoch": 0.27, "grad_norm": 1.4854512425162207, "learning_rate": 8.540339939728442e-06, "loss": 0.2361, "step": 3401 }, { "epoch": 0.27, "grad_norm": 1.4234052592768027, "learning_rate": 8.539425009504472e-06, "loss": 0.2235, "step": 3402 }, { "epoch": 0.27, "grad_norm": 1.479243328820142, "learning_rate": 8.538509841668175e-06, "loss": 0.203, "step": 3403 }, { "epoch": 0.27, "grad_norm": 1.4166251003820498, "learning_rate": 8.537594436280992e-06, "loss": 0.2301, "step": 3404 }, { "epoch": 0.27, "grad_norm": 1.5124227499727871, "learning_rate": 8.536678793404376e-06, "loss": 0.2501, "step": 3405 }, { "epoch": 0.27, "grad_norm": 1.4816169682267926, "learning_rate": 8.535762913099798e-06, "loss": 0.2128, "step": 3406 }, { "epoch": 0.27, "grad_norm": 1.597090458228408, "learning_rate": 8.534846795428741e-06, "loss": 0.2825, "step": 3407 }, { "epoch": 0.27, "grad_norm": 1.285115107151196, "learning_rate": 8.53393044045271e-06, "loss": 0.1923, "step": 3408 }, { "epoch": 0.27, "grad_norm": 1.3830473688227631, "learning_rate": 8.533013848233218e-06, "loss": 0.2099, "step": 3409 }, { "epoch": 0.27, "grad_norm": 1.457934509785978, "learning_rate": 8.532097018831805e-06, "loss": 0.2141, "step": 3410 }, { "epoch": 0.27, "grad_norm": 1.5704413315177805, "learning_rate": 8.531179952310016e-06, "loss": 0.2453, "step": 3411 }, { "epoch": 0.27, "grad_norm": 1.4384697804738853, "learning_rate": 8.530262648729419e-06, "loss": 0.2234, "step": 3412 }, { "epoch": 0.27, "grad_norm": 1.262343225066612, "learning_rate": 8.529345108151594e-06, "loss": 0.1781, "step": 3413 }, { "epoch": 0.27, "grad_norm": 1.2904855478551327, "learning_rate": 8.528427330638137e-06, "loss": 0.2577, "step": 3414 }, { "epoch": 0.27, "grad_norm": 1.4385361437785058, "learning_rate": 8.527509316250663e-06, "loss": 0.1713, "step": 3415 }, { "epoch": 0.27, "grad_norm": 1.5522998355833384, "learning_rate": 8.5265910650508e-06, "loss": 0.2217, "step": 3416 }, { "epoch": 0.27, "grad_norm": 1.2393137561563283, "learning_rate": 8.525672577100195e-06, "loss": 0.1701, "step": 3417 }, { "epoch": 0.27, "grad_norm": 1.3029039763986576, "learning_rate": 8.524753852460507e-06, "loss": 0.1799, "step": 3418 }, { "epoch": 0.27, "grad_norm": 1.3139639179579707, "learning_rate": 8.523834891193412e-06, "loss": 0.2001, "step": 3419 }, { "epoch": 0.27, "grad_norm": 1.2268069778300748, "learning_rate": 8.522915693360607e-06, "loss": 0.1682, "step": 3420 }, { "epoch": 0.27, "grad_norm": 1.3268490906493136, "learning_rate": 8.521996259023795e-06, "loss": 0.1576, "step": 3421 }, { "epoch": 0.27, "grad_norm": 1.1152832258383347, "learning_rate": 8.521076588244705e-06, "loss": 0.1447, "step": 3422 }, { "epoch": 0.27, "grad_norm": 1.354401385770746, "learning_rate": 8.520156681085073e-06, "loss": 0.229, "step": 3423 }, { "epoch": 0.27, "grad_norm": 1.4277168865077374, "learning_rate": 8.519236537606658e-06, "loss": 0.2054, "step": 3424 }, { "epoch": 0.27, "grad_norm": 1.6231363602590048, "learning_rate": 8.518316157871232e-06, "loss": 0.2951, "step": 3425 }, { "epoch": 0.27, "grad_norm": 1.2673811426144943, "learning_rate": 8.517395541940582e-06, "loss": 0.1918, "step": 3426 }, { "epoch": 0.27, "grad_norm": 9.56496349195988, "learning_rate": 8.516474689876512e-06, "loss": 0.4743, "step": 3427 }, { "epoch": 0.27, "grad_norm": 1.384655440165043, "learning_rate": 8.51555360174084e-06, "loss": 0.2152, "step": 3428 }, { "epoch": 0.27, "grad_norm": 1.4811281676721388, "learning_rate": 8.514632277595405e-06, "loss": 0.24, "step": 3429 }, { "epoch": 0.27, "grad_norm": 5.8618187422190005, "learning_rate": 8.513710717502057e-06, "loss": 0.6745, "step": 3430 }, { "epoch": 0.27, "grad_norm": 1.4836628821142488, "learning_rate": 8.51278892152266e-06, "loss": 0.2427, "step": 3431 }, { "epoch": 0.27, "grad_norm": 1.346042823960647, "learning_rate": 8.5118668897191e-06, "loss": 0.1746, "step": 3432 }, { "epoch": 0.27, "grad_norm": 1.3900841993862985, "learning_rate": 8.510944622153276e-06, "loss": 0.2242, "step": 3433 }, { "epoch": 0.27, "grad_norm": 1.3842055573971987, "learning_rate": 8.510022118887102e-06, "loss": 0.2107, "step": 3434 }, { "epoch": 0.27, "grad_norm": 6.521602464599794, "learning_rate": 8.509099379982509e-06, "loss": 0.4941, "step": 3435 }, { "epoch": 0.27, "grad_norm": 1.5441403836233059, "learning_rate": 8.50817640550144e-06, "loss": 0.1896, "step": 3436 }, { "epoch": 0.27, "grad_norm": 1.3526867369859892, "learning_rate": 8.50725319550586e-06, "loss": 0.2127, "step": 3437 }, { "epoch": 0.28, "grad_norm": 1.3006985648901674, "learning_rate": 8.506329750057747e-06, "loss": 0.1805, "step": 3438 }, { "epoch": 0.28, "grad_norm": 1.3444669617533247, "learning_rate": 8.505406069219095e-06, "loss": 0.2159, "step": 3439 }, { "epoch": 0.28, "grad_norm": 1.4643151722564194, "learning_rate": 8.504482153051912e-06, "loss": 0.2355, "step": 3440 }, { "epoch": 0.28, "grad_norm": 1.3136783778631522, "learning_rate": 8.503558001618224e-06, "loss": 0.2046, "step": 3441 }, { "epoch": 0.28, "grad_norm": 1.4819907086055457, "learning_rate": 8.502633614980071e-06, "loss": 0.2213, "step": 3442 }, { "epoch": 0.28, "grad_norm": 1.4795731060927866, "learning_rate": 8.501708993199511e-06, "loss": 0.2312, "step": 3443 }, { "epoch": 0.28, "grad_norm": 1.4472907899262863, "learning_rate": 8.500784136338618e-06, "loss": 0.2787, "step": 3444 }, { "epoch": 0.28, "grad_norm": 1.30392472860371, "learning_rate": 8.499859044459478e-06, "loss": 0.2431, "step": 3445 }, { "epoch": 0.28, "grad_norm": 1.3472270330454186, "learning_rate": 8.498933717624197e-06, "loss": 0.1774, "step": 3446 }, { "epoch": 0.28, "grad_norm": 1.443513622475838, "learning_rate": 8.498008155894892e-06, "loss": 0.2344, "step": 3447 }, { "epoch": 0.28, "grad_norm": 1.4144248404312476, "learning_rate": 8.497082359333703e-06, "loss": 0.2919, "step": 3448 }, { "epoch": 0.28, "grad_norm": 9.904353366374629, "learning_rate": 8.49615632800278e-06, "loss": 0.7658, "step": 3449 }, { "epoch": 0.28, "grad_norm": 1.5023091555648453, "learning_rate": 8.495230061964289e-06, "loss": 0.237, "step": 3450 }, { "epoch": 0.28, "grad_norm": 1.2514074271087992, "learning_rate": 8.494303561280412e-06, "loss": 0.2078, "step": 3451 }, { "epoch": 0.28, "grad_norm": 1.4294497086902498, "learning_rate": 8.49337682601335e-06, "loss": 0.2412, "step": 3452 }, { "epoch": 0.28, "grad_norm": 1.400063897613412, "learning_rate": 8.49244985622532e-06, "loss": 0.2715, "step": 3453 }, { "epoch": 0.28, "grad_norm": 1.4086700584117429, "learning_rate": 8.491522651978548e-06, "loss": 0.243, "step": 3454 }, { "epoch": 0.28, "grad_norm": 1.4266615227254944, "learning_rate": 8.49059521333528e-06, "loss": 0.2628, "step": 3455 }, { "epoch": 0.28, "grad_norm": 1.3772307678633362, "learning_rate": 8.489667540357781e-06, "loss": 0.2102, "step": 3456 }, { "epoch": 0.28, "grad_norm": 1.278040474668398, "learning_rate": 8.488739633108328e-06, "loss": 0.2149, "step": 3457 }, { "epoch": 0.28, "grad_norm": 1.4619945053111885, "learning_rate": 8.487811491649209e-06, "loss": 0.2228, "step": 3458 }, { "epoch": 0.28, "grad_norm": 1.3253640656608443, "learning_rate": 8.48688311604274e-06, "loss": 0.1673, "step": 3459 }, { "epoch": 0.28, "grad_norm": 17.605505927493333, "learning_rate": 8.485954506351241e-06, "loss": 0.6058, "step": 3460 }, { "epoch": 0.28, "grad_norm": 1.3320851338773465, "learning_rate": 8.485025662637055e-06, "loss": 0.2065, "step": 3461 }, { "epoch": 0.28, "grad_norm": 1.3683581897303136, "learning_rate": 8.484096584962537e-06, "loss": 0.2064, "step": 3462 }, { "epoch": 0.28, "grad_norm": 1.291190066384424, "learning_rate": 8.483167273390058e-06, "loss": 0.1986, "step": 3463 }, { "epoch": 0.28, "grad_norm": 1.206622654362899, "learning_rate": 8.482237727982007e-06, "loss": 0.2033, "step": 3464 }, { "epoch": 0.28, "grad_norm": 1.5512908714215459, "learning_rate": 8.481307948800787e-06, "loss": 0.2392, "step": 3465 }, { "epoch": 0.28, "grad_norm": 1.5434148859438048, "learning_rate": 8.480377935908817e-06, "loss": 0.2308, "step": 3466 }, { "epoch": 0.28, "grad_norm": 1.3367491214929064, "learning_rate": 8.479447689368529e-06, "loss": 0.1787, "step": 3467 }, { "epoch": 0.28, "grad_norm": 1.2605416557649196, "learning_rate": 8.478517209242376e-06, "loss": 0.185, "step": 3468 }, { "epoch": 0.28, "grad_norm": 1.4204735594366522, "learning_rate": 8.477586495592823e-06, "loss": 0.2498, "step": 3469 }, { "epoch": 0.28, "grad_norm": 1.2940385014474238, "learning_rate": 8.476655548482353e-06, "loss": 0.2088, "step": 3470 }, { "epoch": 0.28, "grad_norm": 1.3590296422013841, "learning_rate": 8.475724367973462e-06, "loss": 0.2211, "step": 3471 }, { "epoch": 0.28, "grad_norm": 6.881278190700596, "learning_rate": 8.474792954128661e-06, "loss": 0.5251, "step": 3472 }, { "epoch": 0.28, "grad_norm": 1.2956080157986372, "learning_rate": 8.473861307010484e-06, "loss": 0.1907, "step": 3473 }, { "epoch": 0.28, "grad_norm": 1.3374040318634932, "learning_rate": 8.47292942668147e-06, "loss": 0.1892, "step": 3474 }, { "epoch": 0.28, "grad_norm": 1.5164451403399195, "learning_rate": 8.471997313204183e-06, "loss": 0.2639, "step": 3475 }, { "epoch": 0.28, "grad_norm": 1.2832037661818128, "learning_rate": 8.471064966641193e-06, "loss": 0.2228, "step": 3476 }, { "epoch": 0.28, "grad_norm": 1.5984064652252163, "learning_rate": 8.470132387055098e-06, "loss": 0.2604, "step": 3477 }, { "epoch": 0.28, "grad_norm": 1.437946545289577, "learning_rate": 8.469199574508502e-06, "loss": 0.2286, "step": 3478 }, { "epoch": 0.28, "grad_norm": 1.3509247943561975, "learning_rate": 8.468266529064025e-06, "loss": 0.2189, "step": 3479 }, { "epoch": 0.28, "grad_norm": 1.507517567796907, "learning_rate": 8.467333250784309e-06, "loss": 0.2279, "step": 3480 }, { "epoch": 0.28, "grad_norm": 1.316622863669533, "learning_rate": 8.466399739732006e-06, "loss": 0.1977, "step": 3481 }, { "epoch": 0.28, "grad_norm": 8.088557709046349, "learning_rate": 8.465465995969786e-06, "loss": 0.6017, "step": 3482 }, { "epoch": 0.28, "grad_norm": 1.5641777773018952, "learning_rate": 8.464532019560335e-06, "loss": 0.2291, "step": 3483 }, { "epoch": 0.28, "grad_norm": 1.5310945209829467, "learning_rate": 8.463597810566349e-06, "loss": 0.212, "step": 3484 }, { "epoch": 0.28, "grad_norm": 9.72140498309021, "learning_rate": 8.46266336905055e-06, "loss": 0.6118, "step": 3485 }, { "epoch": 0.28, "grad_norm": 1.364825616409186, "learning_rate": 8.461728695075667e-06, "loss": 0.1985, "step": 3486 }, { "epoch": 0.28, "grad_norm": 1.4458929057102348, "learning_rate": 8.460793788704449e-06, "loss": 0.2447, "step": 3487 }, { "epoch": 0.28, "grad_norm": 1.4119090971058033, "learning_rate": 8.459858649999657e-06, "loss": 0.2056, "step": 3488 }, { "epoch": 0.28, "grad_norm": 1.4737843444468024, "learning_rate": 8.458923279024072e-06, "loss": 0.2616, "step": 3489 }, { "epoch": 0.28, "grad_norm": 1.2897049765546598, "learning_rate": 8.457987675840484e-06, "loss": 0.1942, "step": 3490 }, { "epoch": 0.28, "grad_norm": 1.32461648629269, "learning_rate": 8.45705184051171e-06, "loss": 0.2071, "step": 3491 }, { "epoch": 0.28, "grad_norm": 1.5500957466339325, "learning_rate": 8.45611577310057e-06, "loss": 0.2524, "step": 3492 }, { "epoch": 0.28, "grad_norm": 1.3673654279746756, "learning_rate": 8.455179473669905e-06, "loss": 0.2227, "step": 3493 }, { "epoch": 0.28, "grad_norm": 1.4310744505165995, "learning_rate": 8.454242942282576e-06, "loss": 0.1618, "step": 3494 }, { "epoch": 0.28, "grad_norm": 10.752270089548542, "learning_rate": 8.45330617900145e-06, "loss": 0.6575, "step": 3495 }, { "epoch": 0.28, "grad_norm": 1.4296196779878696, "learning_rate": 8.452369183889419e-06, "loss": 0.2503, "step": 3496 }, { "epoch": 0.28, "grad_norm": 1.356048869214277, "learning_rate": 8.451431957009384e-06, "loss": 0.1914, "step": 3497 }, { "epoch": 0.28, "grad_norm": 1.4793535135672018, "learning_rate": 8.450494498424263e-06, "loss": 0.2223, "step": 3498 }, { "epoch": 0.28, "grad_norm": 1.1105474716643329, "learning_rate": 8.449556808196992e-06, "loss": 0.1364, "step": 3499 }, { "epoch": 0.28, "grad_norm": 10.260996978187563, "learning_rate": 8.448618886390523e-06, "loss": 0.6377, "step": 3500 }, { "epoch": 0.28, "grad_norm": 1.4290444155480104, "learning_rate": 8.447680733067816e-06, "loss": 0.2143, "step": 3501 }, { "epoch": 0.28, "grad_norm": 1.3916454864739236, "learning_rate": 8.446742348291857e-06, "loss": 0.2415, "step": 3502 }, { "epoch": 0.28, "grad_norm": 1.377311538756095, "learning_rate": 8.445803732125641e-06, "loss": 0.1977, "step": 3503 }, { "epoch": 0.28, "grad_norm": 1.3803536348826295, "learning_rate": 8.44486488463218e-06, "loss": 0.2433, "step": 3504 }, { "epoch": 0.28, "grad_norm": 1.3594423362030763, "learning_rate": 8.443925805874502e-06, "loss": 0.216, "step": 3505 }, { "epoch": 0.28, "grad_norm": 14.601575872708146, "learning_rate": 8.44298649591565e-06, "loss": 0.6162, "step": 3506 }, { "epoch": 0.28, "grad_norm": 1.4177905703529006, "learning_rate": 8.442046954818682e-06, "loss": 0.2009, "step": 3507 }, { "epoch": 0.28, "grad_norm": 1.5388420691266567, "learning_rate": 8.441107182646674e-06, "loss": 0.257, "step": 3508 }, { "epoch": 0.28, "grad_norm": 1.4269021335765721, "learning_rate": 8.440167179462713e-06, "loss": 0.219, "step": 3509 }, { "epoch": 0.28, "grad_norm": 1.43282075620933, "learning_rate": 8.439226945329908e-06, "loss": 0.2231, "step": 3510 }, { "epoch": 0.28, "grad_norm": 1.7349687841648163, "learning_rate": 8.438286480311375e-06, "loss": 0.3251, "step": 3511 }, { "epoch": 0.28, "grad_norm": 1.4967416340105482, "learning_rate": 8.437345784470255e-06, "loss": 0.24, "step": 3512 }, { "epoch": 0.28, "grad_norm": 1.5108032644382643, "learning_rate": 8.436404857869697e-06, "loss": 0.2033, "step": 3513 }, { "epoch": 0.28, "grad_norm": 1.3491116587272378, "learning_rate": 8.43546370057287e-06, "loss": 0.2242, "step": 3514 }, { "epoch": 0.28, "grad_norm": 1.3380681770008118, "learning_rate": 8.434522312642955e-06, "loss": 0.2447, "step": 3515 }, { "epoch": 0.28, "grad_norm": 1.5057853560513264, "learning_rate": 8.433580694143152e-06, "loss": 0.248, "step": 3516 }, { "epoch": 0.28, "grad_norm": 10.872842745170944, "learning_rate": 8.432638845136671e-06, "loss": 0.65, "step": 3517 }, { "epoch": 0.28, "grad_norm": 1.3743038220856165, "learning_rate": 8.431696765686746e-06, "loss": 0.2137, "step": 3518 }, { "epoch": 0.28, "grad_norm": 1.162384675673378, "learning_rate": 8.430754455856618e-06, "loss": 0.1899, "step": 3519 }, { "epoch": 0.28, "grad_norm": 7.632299215587384, "learning_rate": 8.42981191570955e-06, "loss": 0.4724, "step": 3520 }, { "epoch": 0.28, "grad_norm": 1.3684654691444889, "learning_rate": 8.428869145308817e-06, "loss": 0.2446, "step": 3521 }, { "epoch": 0.28, "grad_norm": 1.5727901526492336, "learning_rate": 8.427926144717707e-06, "loss": 0.2583, "step": 3522 }, { "epoch": 0.28, "grad_norm": 7.9526626926514785, "learning_rate": 8.426982913999531e-06, "loss": 0.5841, "step": 3523 }, { "epoch": 0.28, "grad_norm": 1.3820512686571191, "learning_rate": 8.426039453217606e-06, "loss": 0.2331, "step": 3524 }, { "epoch": 0.28, "grad_norm": 1.3234939687259617, "learning_rate": 8.425095762435274e-06, "loss": 0.1872, "step": 3525 }, { "epoch": 0.28, "grad_norm": 1.2023558747677952, "learning_rate": 8.424151841715885e-06, "loss": 0.1341, "step": 3526 }, { "epoch": 0.28, "grad_norm": 1.3970676847709804, "learning_rate": 8.423207691122808e-06, "loss": 0.1943, "step": 3527 }, { "epoch": 0.28, "grad_norm": 1.3012580623644552, "learning_rate": 8.422263310719428e-06, "loss": 0.2298, "step": 3528 }, { "epoch": 0.28, "grad_norm": 1.390800120561444, "learning_rate": 8.421318700569142e-06, "loss": 0.1861, "step": 3529 }, { "epoch": 0.28, "grad_norm": 1.4388607884217528, "learning_rate": 8.420373860735366e-06, "loss": 0.1829, "step": 3530 }, { "epoch": 0.28, "grad_norm": 6.402792161876903, "learning_rate": 8.419428791281528e-06, "loss": 0.6133, "step": 3531 }, { "epoch": 0.28, "grad_norm": 1.5024921698480707, "learning_rate": 8.418483492271077e-06, "loss": 0.2466, "step": 3532 }, { "epoch": 0.28, "grad_norm": 1.114020113468879, "learning_rate": 8.417537963767469e-06, "loss": 0.1565, "step": 3533 }, { "epoch": 0.28, "grad_norm": 1.443472378494259, "learning_rate": 8.416592205834185e-06, "loss": 0.2148, "step": 3534 }, { "epoch": 0.28, "grad_norm": 1.2028106971309198, "learning_rate": 8.415646218534713e-06, "loss": 0.1697, "step": 3535 }, { "epoch": 0.28, "grad_norm": 1.3894622733695372, "learning_rate": 8.41470000193256e-06, "loss": 0.2133, "step": 3536 }, { "epoch": 0.28, "grad_norm": 1.5286291867068054, "learning_rate": 8.413753556091254e-06, "loss": 0.2355, "step": 3537 }, { "epoch": 0.28, "grad_norm": 1.2642081670084637, "learning_rate": 8.412806881074325e-06, "loss": 0.1874, "step": 3538 }, { "epoch": 0.28, "grad_norm": 7.147546051377303, "learning_rate": 8.41185997694533e-06, "loss": 0.5358, "step": 3539 }, { "epoch": 0.28, "grad_norm": 1.417027916612012, "learning_rate": 8.410912843767837e-06, "loss": 0.2353, "step": 3540 }, { "epoch": 0.28, "grad_norm": 1.4790168596047897, "learning_rate": 8.409965481605432e-06, "loss": 0.213, "step": 3541 }, { "epoch": 0.28, "grad_norm": 1.3235210719830932, "learning_rate": 8.409017890521711e-06, "loss": 0.1862, "step": 3542 }, { "epoch": 0.28, "grad_norm": 1.4680285760893097, "learning_rate": 8.40807007058029e-06, "loss": 0.2261, "step": 3543 }, { "epoch": 0.28, "grad_norm": 11.931338706852394, "learning_rate": 8.407122021844802e-06, "loss": 0.6239, "step": 3544 }, { "epoch": 0.28, "grad_norm": 1.458212901951154, "learning_rate": 8.406173744378887e-06, "loss": 0.1877, "step": 3545 }, { "epoch": 0.28, "grad_norm": 1.2961430426800935, "learning_rate": 8.405225238246208e-06, "loss": 0.1623, "step": 3546 }, { "epoch": 0.28, "grad_norm": 1.4483675940464704, "learning_rate": 8.404276503510443e-06, "loss": 0.227, "step": 3547 }, { "epoch": 0.28, "grad_norm": 1.3631285114069418, "learning_rate": 8.40332754023528e-06, "loss": 0.2195, "step": 3548 }, { "epoch": 0.28, "grad_norm": 1.1651284862412983, "learning_rate": 8.402378348484427e-06, "loss": 0.154, "step": 3549 }, { "epoch": 0.28, "grad_norm": 5.990265354834624, "learning_rate": 8.401428928321607e-06, "loss": 0.6829, "step": 3550 }, { "epoch": 0.28, "grad_norm": 1.4117491347143682, "learning_rate": 8.400479279810559e-06, "loss": 0.1837, "step": 3551 }, { "epoch": 0.28, "grad_norm": 1.3673114271466706, "learning_rate": 8.399529403015031e-06, "loss": 0.1915, "step": 3552 }, { "epoch": 0.28, "grad_norm": 1.3317961161774117, "learning_rate": 8.398579297998798e-06, "loss": 0.1553, "step": 3553 }, { "epoch": 0.28, "grad_norm": 1.2843120715717005, "learning_rate": 8.397628964825637e-06, "loss": 0.1837, "step": 3554 }, { "epoch": 0.28, "grad_norm": 7.345890360919074, "learning_rate": 8.396678403559348e-06, "loss": 0.6522, "step": 3555 }, { "epoch": 0.28, "grad_norm": 1.2782432092336076, "learning_rate": 8.395727614263749e-06, "loss": 0.1931, "step": 3556 }, { "epoch": 0.28, "grad_norm": 1.273421143340095, "learning_rate": 8.394776597002666e-06, "loss": 0.1833, "step": 3557 }, { "epoch": 0.28, "grad_norm": 4.991776810442344, "learning_rate": 8.393825351839942e-06, "loss": 0.5452, "step": 3558 }, { "epoch": 0.28, "grad_norm": 1.2994101098371522, "learning_rate": 8.392873878839441e-06, "loss": 0.1826, "step": 3559 }, { "epoch": 0.28, "grad_norm": 1.2754644429793767, "learning_rate": 8.391922178065037e-06, "loss": 0.1934, "step": 3560 }, { "epoch": 0.28, "grad_norm": 1.3071973979309786, "learning_rate": 8.390970249580619e-06, "loss": 0.1894, "step": 3561 }, { "epoch": 0.28, "grad_norm": 1.3061638562278641, "learning_rate": 8.390018093450094e-06, "loss": 0.1679, "step": 3562 }, { "epoch": 0.29, "grad_norm": 1.5446246473592506, "learning_rate": 8.389065709737384e-06, "loss": 0.2652, "step": 3563 }, { "epoch": 0.29, "grad_norm": 1.3650648397202592, "learning_rate": 8.388113098506422e-06, "loss": 0.2014, "step": 3564 }, { "epoch": 0.29, "grad_norm": 1.7396085664091716, "learning_rate": 8.387160259821165e-06, "loss": 0.2882, "step": 3565 }, { "epoch": 0.29, "grad_norm": 1.3332989805657023, "learning_rate": 8.386207193745577e-06, "loss": 0.203, "step": 3566 }, { "epoch": 0.29, "grad_norm": 1.370976767771929, "learning_rate": 8.385253900343638e-06, "loss": 0.2428, "step": 3567 }, { "epoch": 0.29, "grad_norm": 1.4026268781131952, "learning_rate": 8.38430037967935e-06, "loss": 0.1929, "step": 3568 }, { "epoch": 0.29, "grad_norm": 1.315817361523236, "learning_rate": 8.383346631816723e-06, "loss": 0.2324, "step": 3569 }, { "epoch": 0.29, "grad_norm": 1.2044036728363916, "learning_rate": 8.382392656819784e-06, "loss": 0.1854, "step": 3570 }, { "epoch": 0.29, "grad_norm": 1.4770274346571084, "learning_rate": 8.38143845475258e-06, "loss": 0.2469, "step": 3571 }, { "epoch": 0.29, "grad_norm": 1.6201309967754525, "learning_rate": 8.380484025679166e-06, "loss": 0.2489, "step": 3572 }, { "epoch": 0.29, "grad_norm": 1.4587136809526096, "learning_rate": 8.379529369663616e-06, "loss": 0.2317, "step": 3573 }, { "epoch": 0.29, "grad_norm": 1.4329762793107164, "learning_rate": 8.37857448677002e-06, "loss": 0.2344, "step": 3574 }, { "epoch": 0.29, "grad_norm": 4.139150836369118, "learning_rate": 8.377619377062483e-06, "loss": 0.5683, "step": 3575 }, { "epoch": 0.29, "grad_norm": 1.3209228526348853, "learning_rate": 8.376664040605122e-06, "loss": 0.2337, "step": 3576 }, { "epoch": 0.29, "grad_norm": 7.300426186948551, "learning_rate": 8.375708477462075e-06, "loss": 0.5826, "step": 3577 }, { "epoch": 0.29, "grad_norm": 1.5571356487365462, "learning_rate": 8.37475268769749e-06, "loss": 0.2294, "step": 3578 }, { "epoch": 0.29, "grad_norm": 1.2982539749910906, "learning_rate": 8.373796671375531e-06, "loss": 0.1805, "step": 3579 }, { "epoch": 0.29, "grad_norm": 1.3027740968560202, "learning_rate": 8.372840428560379e-06, "loss": 0.2109, "step": 3580 }, { "epoch": 0.29, "grad_norm": 1.5498852892032748, "learning_rate": 8.37188395931623e-06, "loss": 0.2192, "step": 3581 }, { "epoch": 0.29, "grad_norm": 5.584910515408539, "learning_rate": 8.370927263707293e-06, "loss": 0.7473, "step": 3582 }, { "epoch": 0.29, "grad_norm": 1.3808792874416347, "learning_rate": 8.369970341797797e-06, "loss": 0.2156, "step": 3583 }, { "epoch": 0.29, "grad_norm": 1.2559392269000558, "learning_rate": 8.369013193651982e-06, "loss": 0.1762, "step": 3584 }, { "epoch": 0.29, "grad_norm": 1.506684861025489, "learning_rate": 8.368055819334101e-06, "loss": 0.2397, "step": 3585 }, { "epoch": 0.29, "grad_norm": 1.3035072648123753, "learning_rate": 8.367098218908431e-06, "loss": 0.1999, "step": 3586 }, { "epoch": 0.29, "grad_norm": 4.208307351585978, "learning_rate": 8.366140392439255e-06, "loss": 0.3899, "step": 3587 }, { "epoch": 0.29, "grad_norm": 1.2769741335788356, "learning_rate": 8.365182339990875e-06, "loss": 0.1986, "step": 3588 }, { "epoch": 0.29, "grad_norm": 1.4492665330628032, "learning_rate": 8.364224061627608e-06, "loss": 0.2346, "step": 3589 }, { "epoch": 0.29, "grad_norm": 1.2593445140495894, "learning_rate": 8.363265557413786e-06, "loss": 0.2077, "step": 3590 }, { "epoch": 0.29, "grad_norm": 1.5151863512719281, "learning_rate": 8.36230682741376e-06, "loss": 0.2351, "step": 3591 }, { "epoch": 0.29, "grad_norm": 1.5491872618165108, "learning_rate": 8.361347871691885e-06, "loss": 0.2361, "step": 3592 }, { "epoch": 0.29, "grad_norm": 1.3795351336744357, "learning_rate": 8.360388690312546e-06, "loss": 0.1909, "step": 3593 }, { "epoch": 0.29, "grad_norm": 1.654255473682728, "learning_rate": 8.35942928334013e-06, "loss": 0.2544, "step": 3594 }, { "epoch": 0.29, "grad_norm": 6.503308348173627, "learning_rate": 8.358469650839049e-06, "loss": 0.6242, "step": 3595 }, { "epoch": 0.29, "grad_norm": 1.4328297608911573, "learning_rate": 8.357509792873725e-06, "loss": 0.251, "step": 3596 }, { "epoch": 0.29, "grad_norm": 1.356706270059027, "learning_rate": 8.356549709508596e-06, "loss": 0.194, "step": 3597 }, { "epoch": 0.29, "grad_norm": 1.4589583616897974, "learning_rate": 8.355589400808115e-06, "loss": 0.183, "step": 3598 }, { "epoch": 0.29, "grad_norm": 7.32716123412696, "learning_rate": 8.35462886683675e-06, "loss": 0.5073, "step": 3599 }, { "epoch": 0.29, "grad_norm": 1.2816688133159695, "learning_rate": 8.353668107658984e-06, "loss": 0.2204, "step": 3600 }, { "epoch": 0.29, "grad_norm": 1.3436927550294877, "learning_rate": 8.352707123339318e-06, "loss": 0.1832, "step": 3601 }, { "epoch": 0.29, "grad_norm": 1.4049583297428472, "learning_rate": 8.351745913942265e-06, "loss": 0.2157, "step": 3602 }, { "epoch": 0.29, "grad_norm": 7.330778287490357, "learning_rate": 8.35078447953235e-06, "loss": 0.7936, "step": 3603 }, { "epoch": 0.29, "grad_norm": 1.3753206991155558, "learning_rate": 8.349822820174125e-06, "loss": 0.2253, "step": 3604 }, { "epoch": 0.29, "grad_norm": 1.2499544592639533, "learning_rate": 8.348860935932143e-06, "loss": 0.1838, "step": 3605 }, { "epoch": 0.29, "grad_norm": 1.3981161905262895, "learning_rate": 8.34789882687098e-06, "loss": 0.199, "step": 3606 }, { "epoch": 0.29, "grad_norm": 1.4575574110933573, "learning_rate": 8.346936493055224e-06, "loss": 0.1866, "step": 3607 }, { "epoch": 0.29, "grad_norm": 1.3597440436401644, "learning_rate": 8.34597393454948e-06, "loss": 0.2013, "step": 3608 }, { "epoch": 0.29, "grad_norm": 1.3188029845421754, "learning_rate": 8.345011151418368e-06, "loss": 0.2064, "step": 3609 }, { "epoch": 0.29, "grad_norm": 1.2753653036203787, "learning_rate": 8.344048143726524e-06, "loss": 0.207, "step": 3610 }, { "epoch": 0.29, "grad_norm": 1.242345704634693, "learning_rate": 8.343084911538595e-06, "loss": 0.1924, "step": 3611 }, { "epoch": 0.29, "grad_norm": 1.260163749419984, "learning_rate": 8.342121454919246e-06, "loss": 0.2158, "step": 3612 }, { "epoch": 0.29, "grad_norm": 1.476982778057887, "learning_rate": 8.341157773933158e-06, "loss": 0.2291, "step": 3613 }, { "epoch": 0.29, "grad_norm": 1.3896105238425212, "learning_rate": 8.340193868645026e-06, "loss": 0.1854, "step": 3614 }, { "epoch": 0.29, "grad_norm": 1.3509730901125538, "learning_rate": 8.339229739119558e-06, "loss": 0.2194, "step": 3615 }, { "epoch": 0.29, "grad_norm": 1.391433033324576, "learning_rate": 8.338265385421482e-06, "loss": 0.2075, "step": 3616 }, { "epoch": 0.29, "grad_norm": 1.3746226792363845, "learning_rate": 8.337300807615536e-06, "loss": 0.2206, "step": 3617 }, { "epoch": 0.29, "grad_norm": 1.3516295263782565, "learning_rate": 8.336336005766475e-06, "loss": 0.2005, "step": 3618 }, { "epoch": 0.29, "grad_norm": 1.4282317438389422, "learning_rate": 8.335370979939067e-06, "loss": 0.2028, "step": 3619 }, { "epoch": 0.29, "grad_norm": 1.3501104463948796, "learning_rate": 8.334405730198101e-06, "loss": 0.2015, "step": 3620 }, { "epoch": 0.29, "grad_norm": 1.574640350031064, "learning_rate": 8.333440256608376e-06, "loss": 0.2372, "step": 3621 }, { "epoch": 0.29, "grad_norm": 1.4295925903521247, "learning_rate": 8.332474559234705e-06, "loss": 0.2382, "step": 3622 }, { "epoch": 0.29, "grad_norm": 1.4372995187617252, "learning_rate": 8.33150863814192e-06, "loss": 0.1975, "step": 3623 }, { "epoch": 0.29, "grad_norm": 1.3406626991679254, "learning_rate": 8.330542493394868e-06, "loss": 0.2469, "step": 3624 }, { "epoch": 0.29, "grad_norm": 1.2863909604227424, "learning_rate": 8.329576125058406e-06, "loss": 0.1674, "step": 3625 }, { "epoch": 0.29, "grad_norm": 1.4563004339514694, "learning_rate": 8.328609533197411e-06, "loss": 0.2477, "step": 3626 }, { "epoch": 0.29, "grad_norm": 1.4536198930009567, "learning_rate": 8.327642717876772e-06, "loss": 0.2213, "step": 3627 }, { "epoch": 0.29, "grad_norm": 1.4656670854878002, "learning_rate": 8.326675679161396e-06, "loss": 0.2497, "step": 3628 }, { "epoch": 0.29, "grad_norm": 1.285532404428391, "learning_rate": 8.3257084171162e-06, "loss": 0.196, "step": 3629 }, { "epoch": 0.29, "grad_norm": 1.4659319420783707, "learning_rate": 8.324740931806125e-06, "loss": 0.2025, "step": 3630 }, { "epoch": 0.29, "grad_norm": 1.6033861839326882, "learning_rate": 8.323773223296117e-06, "loss": 0.2294, "step": 3631 }, { "epoch": 0.29, "grad_norm": 1.2932123187943596, "learning_rate": 8.32280529165114e-06, "loss": 0.2279, "step": 3632 }, { "epoch": 0.29, "grad_norm": 1.3860152862379742, "learning_rate": 8.321837136936176e-06, "loss": 0.2086, "step": 3633 }, { "epoch": 0.29, "grad_norm": 1.3582526039781295, "learning_rate": 8.320868759216221e-06, "loss": 0.1841, "step": 3634 }, { "epoch": 0.29, "grad_norm": 7.317588757132336, "learning_rate": 8.319900158556285e-06, "loss": 0.5561, "step": 3635 }, { "epoch": 0.29, "grad_norm": 1.399881478807535, "learning_rate": 8.318931335021393e-06, "loss": 0.235, "step": 3636 }, { "epoch": 0.29, "grad_norm": 1.4720100751917324, "learning_rate": 8.317962288676582e-06, "loss": 0.2192, "step": 3637 }, { "epoch": 0.29, "grad_norm": 1.2736464299322818, "learning_rate": 8.31699301958691e-06, "loss": 0.2075, "step": 3638 }, { "epoch": 0.29, "grad_norm": 1.5199475689639685, "learning_rate": 8.316023527817447e-06, "loss": 0.1997, "step": 3639 }, { "epoch": 0.29, "grad_norm": 1.52945565095852, "learning_rate": 8.315053813433279e-06, "loss": 0.2421, "step": 3640 }, { "epoch": 0.29, "grad_norm": 1.5733506511274244, "learning_rate": 8.314083876499503e-06, "loss": 0.216, "step": 3641 }, { "epoch": 0.29, "grad_norm": 1.3359313514771205, "learning_rate": 8.313113717081234e-06, "loss": 0.2095, "step": 3642 }, { "epoch": 0.29, "grad_norm": 1.2796021653276421, "learning_rate": 8.312143335243605e-06, "loss": 0.2302, "step": 3643 }, { "epoch": 0.29, "grad_norm": 1.563369572541649, "learning_rate": 8.311172731051758e-06, "loss": 0.2525, "step": 3644 }, { "epoch": 0.29, "grad_norm": 1.3346987265034822, "learning_rate": 8.310201904570853e-06, "loss": 0.2063, "step": 3645 }, { "epoch": 0.29, "grad_norm": 1.420818497383828, "learning_rate": 8.309230855866065e-06, "loss": 0.2219, "step": 3646 }, { "epoch": 0.29, "grad_norm": 1.4621177291913883, "learning_rate": 8.308259585002581e-06, "loss": 0.2035, "step": 3647 }, { "epoch": 0.29, "grad_norm": 79.83605653195225, "learning_rate": 8.307288092045608e-06, "loss": 0.6522, "step": 3648 }, { "epoch": 0.29, "grad_norm": 1.4830585883208247, "learning_rate": 8.306316377060366e-06, "loss": 0.2691, "step": 3649 }, { "epoch": 0.29, "grad_norm": 1.3036038323488608, "learning_rate": 8.305344440112089e-06, "loss": 0.1788, "step": 3650 }, { "epoch": 0.29, "grad_norm": 1.4897912133373863, "learning_rate": 8.304372281266023e-06, "loss": 0.2429, "step": 3651 }, { "epoch": 0.29, "grad_norm": 1.4163330999523593, "learning_rate": 8.303399900587433e-06, "loss": 0.2476, "step": 3652 }, { "epoch": 0.29, "grad_norm": 1.4271150760742066, "learning_rate": 8.3024272981416e-06, "loss": 0.2063, "step": 3653 }, { "epoch": 0.29, "grad_norm": 1.2786276349988321, "learning_rate": 8.301454473993817e-06, "loss": 0.1857, "step": 3654 }, { "epoch": 0.29, "grad_norm": 5.489550263694434, "learning_rate": 8.300481428209391e-06, "loss": 0.5646, "step": 3655 }, { "epoch": 0.29, "grad_norm": 1.5996304194049025, "learning_rate": 8.299508160853648e-06, "loss": 0.2017, "step": 3656 }, { "epoch": 0.29, "grad_norm": 1.2146250171642388, "learning_rate": 8.298534671991925e-06, "loss": 0.1867, "step": 3657 }, { "epoch": 0.29, "grad_norm": 1.3454815762676198, "learning_rate": 8.297560961689574e-06, "loss": 0.2066, "step": 3658 }, { "epoch": 0.29, "grad_norm": 1.3649164808778247, "learning_rate": 8.296587030011965e-06, "loss": 0.2245, "step": 3659 }, { "epoch": 0.29, "grad_norm": 1.455109035751038, "learning_rate": 8.295612877024482e-06, "loss": 0.2107, "step": 3660 }, { "epoch": 0.29, "grad_norm": 1.381462131841822, "learning_rate": 8.294638502792517e-06, "loss": 0.2174, "step": 3661 }, { "epoch": 0.29, "grad_norm": 1.3088356961123524, "learning_rate": 8.29366390738149e-06, "loss": 0.2631, "step": 3662 }, { "epoch": 0.29, "grad_norm": 1.3723464164030308, "learning_rate": 8.292689090856826e-06, "loss": 0.2356, "step": 3663 }, { "epoch": 0.29, "grad_norm": 1.427429774474268, "learning_rate": 8.291714053283965e-06, "loss": 0.2122, "step": 3664 }, { "epoch": 0.29, "grad_norm": 1.3695769686363157, "learning_rate": 8.290738794728369e-06, "loss": 0.2171, "step": 3665 }, { "epoch": 0.29, "grad_norm": 1.4393291405342103, "learning_rate": 8.289763315255506e-06, "loss": 0.2141, "step": 3666 }, { "epoch": 0.29, "grad_norm": 1.681014742635568, "learning_rate": 8.288787614930862e-06, "loss": 0.2284, "step": 3667 }, { "epoch": 0.29, "grad_norm": 1.5775208107337915, "learning_rate": 8.287811693819943e-06, "loss": 0.215, "step": 3668 }, { "epoch": 0.29, "grad_norm": 1.3944643343603251, "learning_rate": 8.286835551988262e-06, "loss": 0.2025, "step": 3669 }, { "epoch": 0.29, "grad_norm": 1.3641289414013542, "learning_rate": 8.285859189501353e-06, "loss": 0.1872, "step": 3670 }, { "epoch": 0.29, "grad_norm": 1.3591705302643022, "learning_rate": 8.28488260642476e-06, "loss": 0.2247, "step": 3671 }, { "epoch": 0.29, "grad_norm": 1.4712739259040275, "learning_rate": 8.283905802824044e-06, "loss": 0.1944, "step": 3672 }, { "epoch": 0.29, "grad_norm": 1.2743427209148437, "learning_rate": 8.282928778764783e-06, "loss": 0.1663, "step": 3673 }, { "epoch": 0.29, "grad_norm": 1.302232184843105, "learning_rate": 8.281951534312566e-06, "loss": 0.2181, "step": 3674 }, { "epoch": 0.29, "grad_norm": 1.2098277345307213, "learning_rate": 8.280974069532999e-06, "loss": 0.1878, "step": 3675 }, { "epoch": 0.29, "grad_norm": 1.6433614979568003, "learning_rate": 8.2799963844917e-06, "loss": 0.219, "step": 3676 }, { "epoch": 0.29, "grad_norm": 1.6470605389946293, "learning_rate": 8.279018479254307e-06, "loss": 0.2516, "step": 3677 }, { "epoch": 0.29, "grad_norm": 1.3535455443861624, "learning_rate": 8.278040353886468e-06, "loss": 0.1803, "step": 3678 }, { "epoch": 0.29, "grad_norm": 1.388689154054311, "learning_rate": 8.277062008453848e-06, "loss": 0.2127, "step": 3679 }, { "epoch": 0.29, "grad_norm": 1.286672203501988, "learning_rate": 8.276083443022127e-06, "loss": 0.1903, "step": 3680 }, { "epoch": 0.29, "grad_norm": 1.2099200395071816, "learning_rate": 8.275104657656998e-06, "loss": 0.1932, "step": 3681 }, { "epoch": 0.29, "grad_norm": 1.34566525864894, "learning_rate": 8.274125652424169e-06, "loss": 0.2064, "step": 3682 }, { "epoch": 0.29, "grad_norm": 9.812300159171215, "learning_rate": 8.273146427389365e-06, "loss": 0.7962, "step": 3683 }, { "epoch": 0.29, "grad_norm": 1.2614664935664717, "learning_rate": 8.272166982618326e-06, "loss": 0.1853, "step": 3684 }, { "epoch": 0.29, "grad_norm": 1.4009811097030007, "learning_rate": 8.271187318176803e-06, "loss": 0.2172, "step": 3685 }, { "epoch": 0.29, "grad_norm": 1.4703533836664249, "learning_rate": 8.270207434130562e-06, "loss": 0.2143, "step": 3686 }, { "epoch": 0.29, "grad_norm": 1.3942150400953677, "learning_rate": 8.269227330545388e-06, "loss": 0.2737, "step": 3687 }, { "epoch": 0.3, "grad_norm": 1.4359373033324636, "learning_rate": 8.26824700748708e-06, "loss": 0.2042, "step": 3688 }, { "epoch": 0.3, "grad_norm": 1.4024022106210114, "learning_rate": 8.267266465021445e-06, "loss": 0.2344, "step": 3689 }, { "epoch": 0.3, "grad_norm": 1.3645021143953837, "learning_rate": 8.266285703214315e-06, "loss": 0.1971, "step": 3690 }, { "epoch": 0.3, "grad_norm": 1.2983707680147094, "learning_rate": 8.265304722131528e-06, "loss": 0.222, "step": 3691 }, { "epoch": 0.3, "grad_norm": 1.4190517889433056, "learning_rate": 8.264323521838945e-06, "loss": 0.2315, "step": 3692 }, { "epoch": 0.3, "grad_norm": 1.399609214833165, "learning_rate": 8.26334210240243e-06, "loss": 0.2265, "step": 3693 }, { "epoch": 0.3, "grad_norm": 1.4849815667250656, "learning_rate": 8.262360463887871e-06, "loss": 0.1911, "step": 3694 }, { "epoch": 0.3, "grad_norm": 1.4065921565172728, "learning_rate": 8.261378606361173e-06, "loss": 0.2306, "step": 3695 }, { "epoch": 0.3, "grad_norm": 1.3978970327573963, "learning_rate": 8.260396529888244e-06, "loss": 0.2423, "step": 3696 }, { "epoch": 0.3, "grad_norm": 1.4786677270683066, "learning_rate": 8.25941423453502e-06, "loss": 0.2306, "step": 3697 }, { "epoch": 0.3, "grad_norm": 1.2950361479029586, "learning_rate": 8.25843172036744e-06, "loss": 0.2029, "step": 3698 }, { "epoch": 0.3, "grad_norm": 1.529412594745916, "learning_rate": 8.25744898745147e-06, "loss": 0.2194, "step": 3699 }, { "epoch": 0.3, "grad_norm": 1.3973395692805393, "learning_rate": 8.256466035853077e-06, "loss": 0.2054, "step": 3700 }, { "epoch": 0.3, "grad_norm": 5.9423219727902445, "learning_rate": 8.255482865638253e-06, "loss": 0.7162, "step": 3701 }, { "epoch": 0.3, "grad_norm": 1.4680897725275448, "learning_rate": 8.254499476873001e-06, "loss": 0.1768, "step": 3702 }, { "epoch": 0.3, "grad_norm": 1.3666915719498542, "learning_rate": 8.253515869623339e-06, "loss": 0.1748, "step": 3703 }, { "epoch": 0.3, "grad_norm": 1.480952367741595, "learning_rate": 8.252532043955296e-06, "loss": 0.263, "step": 3704 }, { "epoch": 0.3, "grad_norm": 1.3093405784125278, "learning_rate": 8.251547999934924e-06, "loss": 0.182, "step": 3705 }, { "epoch": 0.3, "grad_norm": 1.3902644640422197, "learning_rate": 8.250563737628284e-06, "loss": 0.212, "step": 3706 }, { "epoch": 0.3, "grad_norm": 1.4616072349454408, "learning_rate": 8.24957925710145e-06, "loss": 0.2108, "step": 3707 }, { "epoch": 0.3, "grad_norm": 1.376501002638323, "learning_rate": 8.248594558420515e-06, "loss": 0.2016, "step": 3708 }, { "epoch": 0.3, "grad_norm": 1.5387570187128823, "learning_rate": 8.247609641651584e-06, "loss": 0.2254, "step": 3709 }, { "epoch": 0.3, "grad_norm": 1.5489918495288462, "learning_rate": 8.246624506860779e-06, "loss": 0.2697, "step": 3710 }, { "epoch": 0.3, "grad_norm": 1.3097732406823503, "learning_rate": 8.245639154114234e-06, "loss": 0.1699, "step": 3711 }, { "epoch": 0.3, "grad_norm": 1.2622903227246631, "learning_rate": 8.244653583478097e-06, "loss": 0.227, "step": 3712 }, { "epoch": 0.3, "grad_norm": 1.3749743408834962, "learning_rate": 8.243667795018534e-06, "loss": 0.2164, "step": 3713 }, { "epoch": 0.3, "grad_norm": 1.4549180829773172, "learning_rate": 8.242681788801727e-06, "loss": 0.231, "step": 3714 }, { "epoch": 0.3, "grad_norm": 1.5138124701086562, "learning_rate": 8.241695564893863e-06, "loss": 0.199, "step": 3715 }, { "epoch": 0.3, "grad_norm": 1.4729945768951331, "learning_rate": 8.240709123361157e-06, "loss": 0.1819, "step": 3716 }, { "epoch": 0.3, "grad_norm": 1.4379992296055528, "learning_rate": 8.239722464269826e-06, "loss": 0.238, "step": 3717 }, { "epoch": 0.3, "grad_norm": 1.5441518070877207, "learning_rate": 8.238735587686111e-06, "loss": 0.2034, "step": 3718 }, { "epoch": 0.3, "grad_norm": 1.2635229442489286, "learning_rate": 8.237748493676264e-06, "loss": 0.2089, "step": 3719 }, { "epoch": 0.3, "grad_norm": 1.4433181528455978, "learning_rate": 8.23676118230655e-06, "loss": 0.2225, "step": 3720 }, { "epoch": 0.3, "grad_norm": 1.3295000548166718, "learning_rate": 8.23577365364325e-06, "loss": 0.1927, "step": 3721 }, { "epoch": 0.3, "grad_norm": 1.3299863091429769, "learning_rate": 8.23478590775266e-06, "loss": 0.2058, "step": 3722 }, { "epoch": 0.3, "grad_norm": 1.32929603006992, "learning_rate": 8.233797944701093e-06, "loss": 0.1697, "step": 3723 }, { "epoch": 0.3, "grad_norm": 1.3850555964034388, "learning_rate": 8.23280976455487e-06, "loss": 0.1985, "step": 3724 }, { "epoch": 0.3, "grad_norm": 4.6459866806684795, "learning_rate": 8.231821367380335e-06, "loss": 0.5332, "step": 3725 }, { "epoch": 0.3, "grad_norm": 1.4984089277641566, "learning_rate": 8.230832753243838e-06, "loss": 0.244, "step": 3726 }, { "epoch": 0.3, "grad_norm": 1.3791108475428366, "learning_rate": 8.22984392221175e-06, "loss": 0.2206, "step": 3727 }, { "epoch": 0.3, "grad_norm": 1.3937207997634669, "learning_rate": 8.228854874350452e-06, "loss": 0.2785, "step": 3728 }, { "epoch": 0.3, "grad_norm": 1.347972058847856, "learning_rate": 8.227865609726344e-06, "loss": 0.1893, "step": 3729 }, { "epoch": 0.3, "grad_norm": 1.4610277082582304, "learning_rate": 8.226876128405838e-06, "loss": 0.2318, "step": 3730 }, { "epoch": 0.3, "grad_norm": 1.4266060858796787, "learning_rate": 8.22588643045536e-06, "loss": 0.2127, "step": 3731 }, { "epoch": 0.3, "grad_norm": 1.3381218278042828, "learning_rate": 8.224896515941352e-06, "loss": 0.2172, "step": 3732 }, { "epoch": 0.3, "grad_norm": 6.059647950561012, "learning_rate": 8.22390638493027e-06, "loss": 0.751, "step": 3733 }, { "epoch": 0.3, "grad_norm": 1.3438928045051468, "learning_rate": 8.222916037488585e-06, "loss": 0.2327, "step": 3734 }, { "epoch": 0.3, "grad_norm": 1.6371557991669559, "learning_rate": 8.22192547368278e-06, "loss": 0.2752, "step": 3735 }, { "epoch": 0.3, "grad_norm": 1.3556432457610368, "learning_rate": 8.220934693579357e-06, "loss": 0.1792, "step": 3736 }, { "epoch": 0.3, "grad_norm": 1.3950380065500485, "learning_rate": 8.219943697244828e-06, "loss": 0.2156, "step": 3737 }, { "epoch": 0.3, "grad_norm": 1.5186624761909908, "learning_rate": 8.218952484745723e-06, "loss": 0.2469, "step": 3738 }, { "epoch": 0.3, "grad_norm": 1.3430299980915081, "learning_rate": 8.217961056148584e-06, "loss": 0.1695, "step": 3739 }, { "epoch": 0.3, "grad_norm": 1.5473682006150449, "learning_rate": 8.216969411519971e-06, "loss": 0.2411, "step": 3740 }, { "epoch": 0.3, "grad_norm": 1.339528098739716, "learning_rate": 8.215977550926452e-06, "loss": 0.2373, "step": 3741 }, { "epoch": 0.3, "grad_norm": 1.2857071520583583, "learning_rate": 8.214985474434618e-06, "loss": 0.1797, "step": 3742 }, { "epoch": 0.3, "grad_norm": 1.160616836812752, "learning_rate": 8.213993182111066e-06, "loss": 0.1471, "step": 3743 }, { "epoch": 0.3, "grad_norm": 1.1978952469314283, "learning_rate": 8.213000674022415e-06, "loss": 0.1481, "step": 3744 }, { "epoch": 0.3, "grad_norm": 1.3652798963890227, "learning_rate": 8.212007950235295e-06, "loss": 0.1952, "step": 3745 }, { "epoch": 0.3, "grad_norm": 2.3333484544166128, "learning_rate": 8.211015010816347e-06, "loss": 0.2063, "step": 3746 }, { "epoch": 0.3, "grad_norm": 1.268932675839391, "learning_rate": 8.210021855832234e-06, "loss": 0.1888, "step": 3747 }, { "epoch": 0.3, "grad_norm": 1.4098247194853812, "learning_rate": 8.209028485349627e-06, "loss": 0.244, "step": 3748 }, { "epoch": 0.3, "grad_norm": 21.99483528163536, "learning_rate": 8.208034899435216e-06, "loss": 0.5939, "step": 3749 }, { "epoch": 0.3, "grad_norm": 1.28325668313735, "learning_rate": 8.207041098155701e-06, "loss": 0.1718, "step": 3750 }, { "epoch": 0.3, "grad_norm": 1.320616648218833, "learning_rate": 8.2060470815778e-06, "loss": 0.193, "step": 3751 }, { "epoch": 0.3, "grad_norm": 1.4483438505468544, "learning_rate": 8.205052849768244e-06, "loss": 0.2602, "step": 3752 }, { "epoch": 0.3, "grad_norm": 1.2273375653978682, "learning_rate": 8.204058402793782e-06, "loss": 0.2313, "step": 3753 }, { "epoch": 0.3, "grad_norm": 1.335381397708339, "learning_rate": 8.20306374072117e-06, "loss": 0.1754, "step": 3754 }, { "epoch": 0.3, "grad_norm": 1.5015469322437207, "learning_rate": 8.202068863617183e-06, "loss": 0.2072, "step": 3755 }, { "epoch": 0.3, "grad_norm": 1.5663185400489383, "learning_rate": 8.201073771548611e-06, "loss": 0.2716, "step": 3756 }, { "epoch": 0.3, "grad_norm": 1.2099649688181984, "learning_rate": 8.20007846458226e-06, "loss": 0.1854, "step": 3757 }, { "epoch": 0.3, "grad_norm": 1.2170989499886844, "learning_rate": 8.199082942784943e-06, "loss": 0.2103, "step": 3758 }, { "epoch": 0.3, "grad_norm": 1.3745667809485924, "learning_rate": 8.198087206223498e-06, "loss": 0.2073, "step": 3759 }, { "epoch": 0.3, "grad_norm": 1.509638521605373, "learning_rate": 8.19709125496477e-06, "loss": 0.2558, "step": 3760 }, { "epoch": 0.3, "grad_norm": 1.1512066954533253, "learning_rate": 8.196095089075615e-06, "loss": 0.1845, "step": 3761 }, { "epoch": 0.3, "grad_norm": 1.3621491829710406, "learning_rate": 8.195098708622917e-06, "loss": 0.2229, "step": 3762 }, { "epoch": 0.3, "grad_norm": 1.6281650486868589, "learning_rate": 8.194102113673558e-06, "loss": 0.2679, "step": 3763 }, { "epoch": 0.3, "grad_norm": 1.1191402777196033, "learning_rate": 8.193105304294452e-06, "loss": 0.1792, "step": 3764 }, { "epoch": 0.3, "grad_norm": 1.4479889098365322, "learning_rate": 8.192108280552507e-06, "loss": 0.2576, "step": 3765 }, { "epoch": 0.3, "grad_norm": 1.4873604791788069, "learning_rate": 8.191111042514663e-06, "loss": 0.2157, "step": 3766 }, { "epoch": 0.3, "grad_norm": 1.5418804154420083, "learning_rate": 8.190113590247866e-06, "loss": 0.2147, "step": 3767 }, { "epoch": 0.3, "grad_norm": 1.2470064232035385, "learning_rate": 8.189115923819078e-06, "loss": 0.2021, "step": 3768 }, { "epoch": 0.3, "grad_norm": 1.3102997450068385, "learning_rate": 8.188118043295276e-06, "loss": 0.2018, "step": 3769 }, { "epoch": 0.3, "grad_norm": 1.3357024427014244, "learning_rate": 8.18711994874345e-06, "loss": 0.1948, "step": 3770 }, { "epoch": 0.3, "grad_norm": 1.3251568560369325, "learning_rate": 8.186121640230604e-06, "loss": 0.1874, "step": 3771 }, { "epoch": 0.3, "grad_norm": 1.373837088366283, "learning_rate": 8.18512311782376e-06, "loss": 0.2203, "step": 3772 }, { "epoch": 0.3, "grad_norm": 1.2400577554799947, "learning_rate": 8.184124381589951e-06, "loss": 0.1883, "step": 3773 }, { "epoch": 0.3, "grad_norm": 1.1858150849851898, "learning_rate": 8.183125431596224e-06, "loss": 0.2094, "step": 3774 }, { "epoch": 0.3, "grad_norm": 1.3700138523272412, "learning_rate": 8.182126267909642e-06, "loss": 0.2411, "step": 3775 }, { "epoch": 0.3, "grad_norm": 1.3167487756659753, "learning_rate": 8.181126890597281e-06, "loss": 0.1921, "step": 3776 }, { "epoch": 0.3, "grad_norm": 1.443382472707996, "learning_rate": 8.180127299726236e-06, "loss": 0.2667, "step": 3777 }, { "epoch": 0.3, "grad_norm": 1.3151528661559193, "learning_rate": 8.179127495363607e-06, "loss": 0.2337, "step": 3778 }, { "epoch": 0.3, "grad_norm": 1.576240110368662, "learning_rate": 8.178127477576521e-06, "loss": 0.2624, "step": 3779 }, { "epoch": 0.3, "grad_norm": 1.3715344336381463, "learning_rate": 8.177127246432105e-06, "loss": 0.2065, "step": 3780 }, { "epoch": 0.3, "grad_norm": 1.452805563399023, "learning_rate": 8.176126801997512e-06, "loss": 0.1897, "step": 3781 }, { "epoch": 0.3, "grad_norm": 1.178345529572568, "learning_rate": 8.175126144339901e-06, "loss": 0.1943, "step": 3782 }, { "epoch": 0.3, "grad_norm": 1.3576207740697785, "learning_rate": 8.174125273526455e-06, "loss": 0.2137, "step": 3783 }, { "epoch": 0.3, "grad_norm": 1.6846658531351133, "learning_rate": 8.173124189624362e-06, "loss": 0.275, "step": 3784 }, { "epoch": 0.3, "grad_norm": 1.208878340863701, "learning_rate": 8.172122892700826e-06, "loss": 0.2088, "step": 3785 }, { "epoch": 0.3, "grad_norm": 1.2254991042253245, "learning_rate": 8.171121382823069e-06, "loss": 0.1546, "step": 3786 }, { "epoch": 0.3, "grad_norm": 1.3434993431008382, "learning_rate": 8.170119660058327e-06, "loss": 0.2129, "step": 3787 }, { "epoch": 0.3, "grad_norm": 5.034028722742464, "learning_rate": 8.169117724473847e-06, "loss": 0.5966, "step": 3788 }, { "epoch": 0.3, "grad_norm": 1.3228752110650597, "learning_rate": 8.168115576136891e-06, "loss": 0.1617, "step": 3789 }, { "epoch": 0.3, "grad_norm": 1.5694246028967147, "learning_rate": 8.167113215114738e-06, "loss": 0.2845, "step": 3790 }, { "epoch": 0.3, "grad_norm": 1.4130274092295652, "learning_rate": 8.166110641474679e-06, "loss": 0.2349, "step": 3791 }, { "epoch": 0.3, "grad_norm": 1.4367740492720582, "learning_rate": 8.165107855284019e-06, "loss": 0.2401, "step": 3792 }, { "epoch": 0.3, "grad_norm": 12.643956673191243, "learning_rate": 8.16410485661008e-06, "loss": 0.6212, "step": 3793 }, { "epoch": 0.3, "grad_norm": 1.2734615238494864, "learning_rate": 8.163101645520193e-06, "loss": 0.2043, "step": 3794 }, { "epoch": 0.3, "grad_norm": 1.4149364356908314, "learning_rate": 8.16209822208171e-06, "loss": 0.1979, "step": 3795 }, { "epoch": 0.3, "grad_norm": 1.2582820217914774, "learning_rate": 8.161094586361992e-06, "loss": 0.2225, "step": 3796 }, { "epoch": 0.3, "grad_norm": 7.478590770963363, "learning_rate": 8.160090738428418e-06, "loss": 0.604, "step": 3797 }, { "epoch": 0.3, "grad_norm": 1.391850948496934, "learning_rate": 8.159086678348378e-06, "loss": 0.1988, "step": 3798 }, { "epoch": 0.3, "grad_norm": 1.5561336336706055, "learning_rate": 8.158082406189278e-06, "loss": 0.2571, "step": 3799 }, { "epoch": 0.3, "grad_norm": 1.3930409160327242, "learning_rate": 8.157077922018537e-06, "loss": 0.2242, "step": 3800 }, { "epoch": 0.3, "grad_norm": 1.5124187551910635, "learning_rate": 8.156073225903588e-06, "loss": 0.2507, "step": 3801 }, { "epoch": 0.3, "grad_norm": 1.3272023628567406, "learning_rate": 8.155068317911882e-06, "loss": 0.2024, "step": 3802 }, { "epoch": 0.3, "grad_norm": 1.3523427424580936, "learning_rate": 8.15406319811088e-06, "loss": 0.1956, "step": 3803 }, { "epoch": 0.3, "grad_norm": 1.3571419779991183, "learning_rate": 8.153057866568059e-06, "loss": 0.1982, "step": 3804 }, { "epoch": 0.3, "grad_norm": 1.3532630537981003, "learning_rate": 8.152052323350909e-06, "loss": 0.2466, "step": 3805 }, { "epoch": 0.3, "grad_norm": 1.3399469802869355, "learning_rate": 8.151046568526938e-06, "loss": 0.178, "step": 3806 }, { "epoch": 0.3, "grad_norm": 6.268732818577074, "learning_rate": 8.150040602163665e-06, "loss": 0.6794, "step": 3807 }, { "epoch": 0.3, "grad_norm": 1.471598565280061, "learning_rate": 8.14903442432862e-06, "loss": 0.1827, "step": 3808 }, { "epoch": 0.3, "grad_norm": 1.3741470379487557, "learning_rate": 8.148028035089353e-06, "loss": 0.2025, "step": 3809 }, { "epoch": 0.3, "grad_norm": 1.320980023199665, "learning_rate": 8.147021434513425e-06, "loss": 0.1797, "step": 3810 }, { "epoch": 0.3, "grad_norm": 1.2158344135043195, "learning_rate": 8.146014622668415e-06, "loss": 0.182, "step": 3811 }, { "epoch": 0.3, "grad_norm": 1.254493537630793, "learning_rate": 8.14500759962191e-06, "loss": 0.2174, "step": 3812 }, { "epoch": 0.31, "grad_norm": 1.5143944226286659, "learning_rate": 8.144000365441515e-06, "loss": 0.1746, "step": 3813 }, { "epoch": 0.31, "grad_norm": 1.352600148430449, "learning_rate": 8.142992920194852e-06, "loss": 0.208, "step": 3814 }, { "epoch": 0.31, "grad_norm": 1.3309887233938618, "learning_rate": 8.14198526394955e-06, "loss": 0.1861, "step": 3815 }, { "epoch": 0.31, "grad_norm": 1.3395657168842534, "learning_rate": 8.140977396773257e-06, "loss": 0.2254, "step": 3816 }, { "epoch": 0.31, "grad_norm": 6.828579637503647, "learning_rate": 8.139969318733636e-06, "loss": 0.1971, "step": 3817 }, { "epoch": 0.31, "grad_norm": 1.4154001019319817, "learning_rate": 8.138961029898358e-06, "loss": 0.2162, "step": 3818 }, { "epoch": 0.31, "grad_norm": 4.722554168417846, "learning_rate": 8.137952530335119e-06, "loss": 0.4921, "step": 3819 }, { "epoch": 0.31, "grad_norm": 1.39348253248178, "learning_rate": 8.136943820111615e-06, "loss": 0.2143, "step": 3820 }, { "epoch": 0.31, "grad_norm": 1.402773358244312, "learning_rate": 8.135934899295572e-06, "loss": 0.2185, "step": 3821 }, { "epoch": 0.31, "grad_norm": 1.435000871013821, "learning_rate": 8.134925767954716e-06, "loss": 0.2292, "step": 3822 }, { "epoch": 0.31, "grad_norm": 1.3145847619822002, "learning_rate": 8.133916426156795e-06, "loss": 0.2218, "step": 3823 }, { "epoch": 0.31, "grad_norm": 1.3795694584195648, "learning_rate": 8.132906873969568e-06, "loss": 0.2014, "step": 3824 }, { "epoch": 0.31, "grad_norm": 1.426083799878832, "learning_rate": 8.13189711146081e-06, "loss": 0.1869, "step": 3825 }, { "epoch": 0.31, "grad_norm": 1.5010355448842745, "learning_rate": 8.130887138698311e-06, "loss": 0.1965, "step": 3826 }, { "epoch": 0.31, "grad_norm": 1.3447163614327744, "learning_rate": 8.129876955749871e-06, "loss": 0.2063, "step": 3827 }, { "epoch": 0.31, "grad_norm": 1.3723379728908258, "learning_rate": 8.128866562683309e-06, "loss": 0.2094, "step": 3828 }, { "epoch": 0.31, "grad_norm": 1.1235312923257483, "learning_rate": 8.127855959566452e-06, "loss": 0.1542, "step": 3829 }, { "epoch": 0.31, "grad_norm": 1.2770919731434849, "learning_rate": 8.126845146467151e-06, "loss": 0.158, "step": 3830 }, { "epoch": 0.31, "grad_norm": 1.5168597774387853, "learning_rate": 8.12583412345326e-06, "loss": 0.2203, "step": 3831 }, { "epoch": 0.31, "grad_norm": 1.5001805143384344, "learning_rate": 8.124822890592652e-06, "loss": 0.2773, "step": 3832 }, { "epoch": 0.31, "grad_norm": 1.5648243265884805, "learning_rate": 8.123811447953218e-06, "loss": 0.2417, "step": 3833 }, { "epoch": 0.31, "grad_norm": 1.3648919281105643, "learning_rate": 8.122799795602855e-06, "loss": 0.1761, "step": 3834 }, { "epoch": 0.31, "grad_norm": 1.5458571796780096, "learning_rate": 8.12178793360948e-06, "loss": 0.2449, "step": 3835 }, { "epoch": 0.31, "grad_norm": 1.271115662251034, "learning_rate": 8.120775862041023e-06, "loss": 0.1921, "step": 3836 }, { "epoch": 0.31, "grad_norm": 1.3516787335878178, "learning_rate": 8.119763580965425e-06, "loss": 0.2417, "step": 3837 }, { "epoch": 0.31, "grad_norm": 1.417710205515965, "learning_rate": 8.118751090450647e-06, "loss": 0.2281, "step": 3838 }, { "epoch": 0.31, "grad_norm": 1.372317266512435, "learning_rate": 8.117738390564658e-06, "loss": 0.1675, "step": 3839 }, { "epoch": 0.31, "grad_norm": 1.1982225719895114, "learning_rate": 8.116725481375448e-06, "loss": 0.1544, "step": 3840 }, { "epoch": 0.31, "grad_norm": 1.2010840745297853, "learning_rate": 8.11571236295101e-06, "loss": 0.1774, "step": 3841 }, { "epoch": 0.31, "grad_norm": 1.414962284996212, "learning_rate": 8.114699035359361e-06, "loss": 0.2266, "step": 3842 }, { "epoch": 0.31, "grad_norm": 1.3693482179846685, "learning_rate": 8.113685498668527e-06, "loss": 0.2407, "step": 3843 }, { "epoch": 0.31, "grad_norm": 1.2687671228040156, "learning_rate": 8.112671752946554e-06, "loss": 0.2121, "step": 3844 }, { "epoch": 0.31, "grad_norm": 1.3241716119899425, "learning_rate": 8.111657798261495e-06, "loss": 0.2235, "step": 3845 }, { "epoch": 0.31, "grad_norm": 1.2164656639018097, "learning_rate": 8.110643634681419e-06, "loss": 0.2039, "step": 3846 }, { "epoch": 0.31, "grad_norm": 1.516246851677116, "learning_rate": 8.10962926227441e-06, "loss": 0.219, "step": 3847 }, { "epoch": 0.31, "grad_norm": 1.3117082523676236, "learning_rate": 8.108614681108568e-06, "loss": 0.1849, "step": 3848 }, { "epoch": 0.31, "grad_norm": 1.509941156196788, "learning_rate": 8.107599891252005e-06, "loss": 0.2072, "step": 3849 }, { "epoch": 0.31, "grad_norm": 1.2132483981011335, "learning_rate": 8.106584892772844e-06, "loss": 0.198, "step": 3850 }, { "epoch": 0.31, "grad_norm": 1.3858259817769758, "learning_rate": 8.105569685739227e-06, "loss": 0.2519, "step": 3851 }, { "epoch": 0.31, "grad_norm": 1.512459936470633, "learning_rate": 8.104554270219307e-06, "loss": 0.2304, "step": 3852 }, { "epoch": 0.31, "grad_norm": 1.3263916266519096, "learning_rate": 8.103538646281253e-06, "loss": 0.1505, "step": 3853 }, { "epoch": 0.31, "grad_norm": 1.294783366535686, "learning_rate": 8.102522813993247e-06, "loss": 0.203, "step": 3854 }, { "epoch": 0.31, "grad_norm": 1.216788746044837, "learning_rate": 8.101506773423484e-06, "loss": 0.1856, "step": 3855 }, { "epoch": 0.31, "grad_norm": 1.4940954150927188, "learning_rate": 8.100490524640172e-06, "loss": 0.2634, "step": 3856 }, { "epoch": 0.31, "grad_norm": 1.338140377670126, "learning_rate": 8.09947406771154e-06, "loss": 0.184, "step": 3857 }, { "epoch": 0.31, "grad_norm": 5.6104205638483835, "learning_rate": 8.098457402705822e-06, "loss": 0.6341, "step": 3858 }, { "epoch": 0.31, "grad_norm": 1.274708201948737, "learning_rate": 8.09744052969127e-06, "loss": 0.1976, "step": 3859 }, { "epoch": 0.31, "grad_norm": 1.3494122180929107, "learning_rate": 8.09642344873615e-06, "loss": 0.2015, "step": 3860 }, { "epoch": 0.31, "grad_norm": 1.347851965256785, "learning_rate": 8.095406159908741e-06, "loss": 0.177, "step": 3861 }, { "epoch": 0.31, "grad_norm": 1.253097303071293, "learning_rate": 8.094388663277339e-06, "loss": 0.2085, "step": 3862 }, { "epoch": 0.31, "grad_norm": 1.4515645437875249, "learning_rate": 8.093370958910252e-06, "loss": 0.2646, "step": 3863 }, { "epoch": 0.31, "grad_norm": 1.3888438508248435, "learning_rate": 8.092353046875798e-06, "loss": 0.1876, "step": 3864 }, { "epoch": 0.31, "grad_norm": 6.72749920782667, "learning_rate": 8.091334927242315e-06, "loss": 0.4353, "step": 3865 }, { "epoch": 0.31, "grad_norm": 1.4557629869975592, "learning_rate": 8.090316600078152e-06, "loss": 0.2413, "step": 3866 }, { "epoch": 0.31, "grad_norm": 5.10867396928844, "learning_rate": 8.089298065451673e-06, "loss": 0.5667, "step": 3867 }, { "epoch": 0.31, "grad_norm": 1.3324617322697836, "learning_rate": 8.088279323431255e-06, "loss": 0.1712, "step": 3868 }, { "epoch": 0.31, "grad_norm": 1.6462473953992076, "learning_rate": 8.087260374085286e-06, "loss": 0.2406, "step": 3869 }, { "epoch": 0.31, "grad_norm": 1.667452322103438, "learning_rate": 8.086241217482177e-06, "loss": 0.2574, "step": 3870 }, { "epoch": 0.31, "grad_norm": 1.3138267296210167, "learning_rate": 8.085221853690344e-06, "loss": 0.2088, "step": 3871 }, { "epoch": 0.31, "grad_norm": 5.546490218014016, "learning_rate": 8.084202282778218e-06, "loss": 0.5648, "step": 3872 }, { "epoch": 0.31, "grad_norm": 1.2583768178123669, "learning_rate": 8.083182504814249e-06, "loss": 0.1319, "step": 3873 }, { "epoch": 0.31, "grad_norm": 1.378382834789108, "learning_rate": 8.082162519866898e-06, "loss": 0.2296, "step": 3874 }, { "epoch": 0.31, "grad_norm": 1.444753271743106, "learning_rate": 8.081142328004638e-06, "loss": 0.2023, "step": 3875 }, { "epoch": 0.31, "grad_norm": 1.559102318618348, "learning_rate": 8.080121929295957e-06, "loss": 0.2534, "step": 3876 }, { "epoch": 0.31, "grad_norm": 5.620181786986073, "learning_rate": 8.07910132380936e-06, "loss": 0.6777, "step": 3877 }, { "epoch": 0.31, "grad_norm": 1.3915794860198896, "learning_rate": 8.07808051161336e-06, "loss": 0.2318, "step": 3878 }, { "epoch": 0.31, "grad_norm": 1.150878766324051, "learning_rate": 8.07705949277649e-06, "loss": 0.1812, "step": 3879 }, { "epoch": 0.31, "grad_norm": 1.3357387381515504, "learning_rate": 8.076038267367292e-06, "loss": 0.1817, "step": 3880 }, { "epoch": 0.31, "grad_norm": 1.2328356037374417, "learning_rate": 8.075016835454327e-06, "loss": 0.1972, "step": 3881 }, { "epoch": 0.31, "grad_norm": 1.50137873260632, "learning_rate": 8.073995197106163e-06, "loss": 0.2, "step": 3882 }, { "epoch": 0.31, "grad_norm": 1.4156423292367393, "learning_rate": 8.07297335239139e-06, "loss": 0.1815, "step": 3883 }, { "epoch": 0.31, "grad_norm": 1.2383994634532183, "learning_rate": 8.071951301378604e-06, "loss": 0.187, "step": 3884 }, { "epoch": 0.31, "grad_norm": 1.2539914277041322, "learning_rate": 8.070929044136419e-06, "loss": 0.2061, "step": 3885 }, { "epoch": 0.31, "grad_norm": 1.4900851522767355, "learning_rate": 8.069906580733461e-06, "loss": 0.2451, "step": 3886 }, { "epoch": 0.31, "grad_norm": 1.2343465521259052, "learning_rate": 8.068883911238377e-06, "loss": 0.2053, "step": 3887 }, { "epoch": 0.31, "grad_norm": 1.2943545835185957, "learning_rate": 8.067861035719816e-06, "loss": 0.224, "step": 3888 }, { "epoch": 0.31, "grad_norm": 6.300324359689798, "learning_rate": 8.066837954246447e-06, "loss": 0.6481, "step": 3889 }, { "epoch": 0.31, "grad_norm": 1.3778856431987445, "learning_rate": 8.065814666886954e-06, "loss": 0.2133, "step": 3890 }, { "epoch": 0.31, "grad_norm": 1.2015676728607707, "learning_rate": 8.064791173710033e-06, "loss": 0.1927, "step": 3891 }, { "epoch": 0.31, "grad_norm": 1.231435034184277, "learning_rate": 8.063767474784397e-06, "loss": 0.1655, "step": 3892 }, { "epoch": 0.31, "grad_norm": 1.3684226485653346, "learning_rate": 8.062743570178767e-06, "loss": 0.2022, "step": 3893 }, { "epoch": 0.31, "grad_norm": 1.623355058271424, "learning_rate": 8.06171945996188e-06, "loss": 0.2597, "step": 3894 }, { "epoch": 0.31, "grad_norm": 5.900743990962824, "learning_rate": 8.06069514420249e-06, "loss": 0.4829, "step": 3895 }, { "epoch": 0.31, "grad_norm": 1.5129030600681552, "learning_rate": 8.059670622969363e-06, "loss": 0.2184, "step": 3896 }, { "epoch": 0.31, "grad_norm": 1.4583743399132925, "learning_rate": 8.058645896331274e-06, "loss": 0.1809, "step": 3897 }, { "epoch": 0.31, "grad_norm": 1.5219000938628264, "learning_rate": 8.057620964357018e-06, "loss": 0.2333, "step": 3898 }, { "epoch": 0.31, "grad_norm": 6.272767208035992, "learning_rate": 8.056595827115404e-06, "loss": 0.5816, "step": 3899 }, { "epoch": 0.31, "grad_norm": 1.3408453091881496, "learning_rate": 8.055570484675252e-06, "loss": 0.1869, "step": 3900 }, { "epoch": 0.31, "grad_norm": 1.1830891435623785, "learning_rate": 8.054544937105393e-06, "loss": 0.1992, "step": 3901 }, { "epoch": 0.31, "grad_norm": 1.5870902756894514, "learning_rate": 8.053519184474679e-06, "loss": 0.2175, "step": 3902 }, { "epoch": 0.31, "grad_norm": 1.5366996290517843, "learning_rate": 8.052493226851971e-06, "loss": 0.2005, "step": 3903 }, { "epoch": 0.31, "grad_norm": 1.3383593295558844, "learning_rate": 8.051467064306142e-06, "loss": 0.1871, "step": 3904 }, { "epoch": 0.31, "grad_norm": 1.4643097722439373, "learning_rate": 8.050440696906086e-06, "loss": 0.2025, "step": 3905 }, { "epoch": 0.31, "grad_norm": 1.3122153673218235, "learning_rate": 8.049414124720702e-06, "loss": 0.1813, "step": 3906 }, { "epoch": 0.31, "grad_norm": 1.5189376024119374, "learning_rate": 8.048387347818908e-06, "loss": 0.2198, "step": 3907 }, { "epoch": 0.31, "grad_norm": 1.2335353404978318, "learning_rate": 8.047360366269636e-06, "loss": 0.1717, "step": 3908 }, { "epoch": 0.31, "grad_norm": 1.2212660078410205, "learning_rate": 8.046333180141827e-06, "loss": 0.1876, "step": 3909 }, { "epoch": 0.31, "grad_norm": 1.5880997170269437, "learning_rate": 8.045305789504446e-06, "loss": 0.2225, "step": 3910 }, { "epoch": 0.31, "grad_norm": 1.2372582583447032, "learning_rate": 8.044278194426459e-06, "loss": 0.151, "step": 3911 }, { "epoch": 0.31, "grad_norm": 1.455127719565864, "learning_rate": 8.043250394976852e-06, "loss": 0.2678, "step": 3912 }, { "epoch": 0.31, "grad_norm": 1.3381907926194567, "learning_rate": 8.042222391224624e-06, "loss": 0.2333, "step": 3913 }, { "epoch": 0.31, "grad_norm": 4.349875936995672, "learning_rate": 8.04119418323879e-06, "loss": 0.5374, "step": 3914 }, { "epoch": 0.31, "grad_norm": 1.2824233812787074, "learning_rate": 8.040165771088377e-06, "loss": 0.226, "step": 3915 }, { "epoch": 0.31, "grad_norm": 1.4698355742895297, "learning_rate": 8.039137154842424e-06, "loss": 0.1962, "step": 3916 }, { "epoch": 0.31, "grad_norm": 1.2825614472697844, "learning_rate": 8.038108334569986e-06, "loss": 0.2204, "step": 3917 }, { "epoch": 0.31, "grad_norm": 7.583614757035065, "learning_rate": 8.03707931034013e-06, "loss": 0.6275, "step": 3918 }, { "epoch": 0.31, "grad_norm": 7.64898838428393, "learning_rate": 8.03605008222194e-06, "loss": 0.7141, "step": 3919 }, { "epoch": 0.31, "grad_norm": 1.687256771767737, "learning_rate": 8.035020650284507e-06, "loss": 0.1977, "step": 3920 }, { "epoch": 0.31, "grad_norm": 1.4359456165979352, "learning_rate": 8.03399101459694e-06, "loss": 0.1947, "step": 3921 }, { "epoch": 0.31, "grad_norm": 1.501620848749396, "learning_rate": 8.032961175228366e-06, "loss": 0.2594, "step": 3922 }, { "epoch": 0.31, "grad_norm": 1.4129752316209603, "learning_rate": 8.031931132247918e-06, "loss": 0.2337, "step": 3923 }, { "epoch": 0.31, "grad_norm": 1.173079752936573, "learning_rate": 8.030900885724748e-06, "loss": 0.1759, "step": 3924 }, { "epoch": 0.31, "grad_norm": 7.358015484779165, "learning_rate": 8.029870435728018e-06, "loss": 0.7442, "step": 3925 }, { "epoch": 0.31, "grad_norm": 1.3117396566032236, "learning_rate": 8.028839782326905e-06, "loss": 0.1646, "step": 3926 }, { "epoch": 0.31, "grad_norm": 1.3842003877459141, "learning_rate": 8.0278089255906e-06, "loss": 0.2263, "step": 3927 }, { "epoch": 0.31, "grad_norm": 1.5118005228721116, "learning_rate": 8.026777865588308e-06, "loss": 0.2495, "step": 3928 }, { "epoch": 0.31, "grad_norm": 1.591346622603105, "learning_rate": 8.025746602389248e-06, "loss": 0.2477, "step": 3929 }, { "epoch": 0.31, "grad_norm": 1.2546252596318728, "learning_rate": 8.02471513606265e-06, "loss": 0.2042, "step": 3930 }, { "epoch": 0.31, "grad_norm": 4.75292150213567, "learning_rate": 8.023683466677762e-06, "loss": 0.7022, "step": 3931 }, { "epoch": 0.31, "grad_norm": 1.2940007025010616, "learning_rate": 8.02265159430384e-06, "loss": 0.196, "step": 3932 }, { "epoch": 0.31, "grad_norm": 1.297520267530138, "learning_rate": 8.021619519010158e-06, "loss": 0.159, "step": 3933 }, { "epoch": 0.31, "grad_norm": 1.2361737642107624, "learning_rate": 8.020587240866004e-06, "loss": 0.2157, "step": 3934 }, { "epoch": 0.31, "grad_norm": 1.3442300348071081, "learning_rate": 8.019554759940675e-06, "loss": 0.188, "step": 3935 }, { "epoch": 0.31, "grad_norm": 1.4386745021893488, "learning_rate": 8.018522076303487e-06, "loss": 0.2509, "step": 3936 }, { "epoch": 0.31, "grad_norm": 1.2366331257831256, "learning_rate": 8.017489190023767e-06, "loss": 0.2058, "step": 3937 }, { "epoch": 0.32, "grad_norm": 1.4474858879275714, "learning_rate": 8.016456101170853e-06, "loss": 0.2203, "step": 3938 }, { "epoch": 0.32, "grad_norm": 1.3533307490578559, "learning_rate": 8.015422809814102e-06, "loss": 0.1949, "step": 3939 }, { "epoch": 0.32, "grad_norm": 1.5336873402792435, "learning_rate": 8.01438931602288e-06, "loss": 0.2174, "step": 3940 }, { "epoch": 0.32, "grad_norm": 1.4467875984137255, "learning_rate": 8.013355619866573e-06, "loss": 0.2155, "step": 3941 }, { "epoch": 0.32, "grad_norm": 1.3170092877132251, "learning_rate": 8.012321721414572e-06, "loss": 0.166, "step": 3942 }, { "epoch": 0.32, "grad_norm": 1.2957565469038352, "learning_rate": 8.011287620736288e-06, "loss": 0.2385, "step": 3943 }, { "epoch": 0.32, "grad_norm": 1.4190068889190592, "learning_rate": 8.01025331790114e-06, "loss": 0.1994, "step": 3944 }, { "epoch": 0.32, "grad_norm": 1.3431785937654042, "learning_rate": 8.009218812978567e-06, "loss": 0.1857, "step": 3945 }, { "epoch": 0.32, "grad_norm": 1.3630350792240267, "learning_rate": 8.008184106038017e-06, "loss": 0.2079, "step": 3946 }, { "epoch": 0.32, "grad_norm": 1.2200559031101863, "learning_rate": 8.007149197148954e-06, "loss": 0.1826, "step": 3947 }, { "epoch": 0.32, "grad_norm": 1.3701955395318743, "learning_rate": 8.006114086380855e-06, "loss": 0.1887, "step": 3948 }, { "epoch": 0.32, "grad_norm": 6.9197906091642905, "learning_rate": 8.005078773803207e-06, "loss": 0.5194, "step": 3949 }, { "epoch": 0.32, "grad_norm": 1.3318217094219627, "learning_rate": 8.004043259485519e-06, "loss": 0.2202, "step": 3950 }, { "epoch": 0.32, "grad_norm": 1.3461664106334252, "learning_rate": 8.003007543497303e-06, "loss": 0.1647, "step": 3951 }, { "epoch": 0.32, "grad_norm": 1.474177600164627, "learning_rate": 8.001971625908091e-06, "loss": 0.244, "step": 3952 }, { "epoch": 0.32, "grad_norm": 1.4031588350502715, "learning_rate": 8.000935506787431e-06, "loss": 0.1959, "step": 3953 }, { "epoch": 0.32, "grad_norm": 1.2978934812693592, "learning_rate": 7.999899186204876e-06, "loss": 0.2118, "step": 3954 }, { "epoch": 0.32, "grad_norm": 1.2610008584659997, "learning_rate": 7.99886266423e-06, "loss": 0.2133, "step": 3955 }, { "epoch": 0.32, "grad_norm": 1.4112411612048457, "learning_rate": 7.997825940932387e-06, "loss": 0.2556, "step": 3956 }, { "epoch": 0.32, "grad_norm": 1.2416601441206263, "learning_rate": 7.996789016381633e-06, "loss": 0.215, "step": 3957 }, { "epoch": 0.32, "grad_norm": 1.4814599174413965, "learning_rate": 7.995751890647356e-06, "loss": 0.261, "step": 3958 }, { "epoch": 0.32, "grad_norm": 1.3593429679006102, "learning_rate": 7.994714563799177e-06, "loss": 0.1821, "step": 3959 }, { "epoch": 0.32, "grad_norm": 9.99668364553336, "learning_rate": 7.993677035906734e-06, "loss": 0.634, "step": 3960 }, { "epoch": 0.32, "grad_norm": 1.6311457431206913, "learning_rate": 7.992639307039683e-06, "loss": 0.2369, "step": 3961 }, { "epoch": 0.32, "grad_norm": 1.3452305034566447, "learning_rate": 7.991601377267688e-06, "loss": 0.2156, "step": 3962 }, { "epoch": 0.32, "grad_norm": 1.5013586172301305, "learning_rate": 7.990563246660427e-06, "loss": 0.2081, "step": 3963 }, { "epoch": 0.32, "grad_norm": 1.4708830704824098, "learning_rate": 7.989524915287595e-06, "loss": 0.248, "step": 3964 }, { "epoch": 0.32, "grad_norm": 1.5071056098392208, "learning_rate": 7.988486383218898e-06, "loss": 0.2336, "step": 3965 }, { "epoch": 0.32, "grad_norm": 1.3028345996066666, "learning_rate": 7.987447650524054e-06, "loss": 0.2233, "step": 3966 }, { "epoch": 0.32, "grad_norm": 6.142034713267671, "learning_rate": 7.9864087172728e-06, "loss": 0.7164, "step": 3967 }, { "epoch": 0.32, "grad_norm": 1.1477877146706996, "learning_rate": 7.985369583534877e-06, "loss": 0.1793, "step": 3968 }, { "epoch": 0.32, "grad_norm": 1.2934143979115635, "learning_rate": 7.98433024938005e-06, "loss": 0.1668, "step": 3969 }, { "epoch": 0.32, "grad_norm": 1.290238257160589, "learning_rate": 7.98329071487809e-06, "loss": 0.1973, "step": 3970 }, { "epoch": 0.32, "grad_norm": 5.548102246802242, "learning_rate": 7.982250980098789e-06, "loss": 0.7221, "step": 3971 }, { "epoch": 0.32, "grad_norm": 1.4850484042084637, "learning_rate": 7.981211045111942e-06, "loss": 0.2362, "step": 3972 }, { "epoch": 0.32, "grad_norm": 1.4825843799523257, "learning_rate": 7.980170909987363e-06, "loss": 0.2269, "step": 3973 }, { "epoch": 0.32, "grad_norm": 1.5342327252589767, "learning_rate": 7.979130574794884e-06, "loss": 0.2135, "step": 3974 }, { "epoch": 0.32, "grad_norm": 1.3032135002126601, "learning_rate": 7.978090039604342e-06, "loss": 0.1955, "step": 3975 }, { "epoch": 0.32, "grad_norm": 1.3368386284837364, "learning_rate": 7.97704930448559e-06, "loss": 0.1971, "step": 3976 }, { "epoch": 0.32, "grad_norm": 1.4700572540687062, "learning_rate": 7.9760083695085e-06, "loss": 0.2252, "step": 3977 }, { "epoch": 0.32, "grad_norm": 4.918500265559613, "learning_rate": 7.974967234742954e-06, "loss": 0.5652, "step": 3978 }, { "epoch": 0.32, "grad_norm": 5.990893084110975, "learning_rate": 7.973925900258841e-06, "loss": 0.7468, "step": 3979 }, { "epoch": 0.32, "grad_norm": 1.3898168052542716, "learning_rate": 7.972884366126072e-06, "loss": 0.2566, "step": 3980 }, { "epoch": 0.32, "grad_norm": 11.600702829975893, "learning_rate": 7.971842632414569e-06, "loss": 0.5795, "step": 3981 }, { "epoch": 0.32, "grad_norm": 6.426458912575459, "learning_rate": 7.970800699194263e-06, "loss": 0.762, "step": 3982 }, { "epoch": 0.32, "grad_norm": 1.3591071123757004, "learning_rate": 7.969758566535106e-06, "loss": 0.1839, "step": 3983 }, { "epoch": 0.32, "grad_norm": 1.2299600097155978, "learning_rate": 7.96871623450706e-06, "loss": 0.2245, "step": 3984 }, { "epoch": 0.32, "grad_norm": 1.381872173161071, "learning_rate": 7.967673703180096e-06, "loss": 0.2212, "step": 3985 }, { "epoch": 0.32, "grad_norm": 1.336387902708184, "learning_rate": 7.966630972624208e-06, "loss": 0.212, "step": 3986 }, { "epoch": 0.32, "grad_norm": 1.268092437514557, "learning_rate": 7.96558804290939e-06, "loss": 0.1906, "step": 3987 }, { "epoch": 0.32, "grad_norm": 1.3688463127849748, "learning_rate": 7.964544914105665e-06, "loss": 0.2043, "step": 3988 }, { "epoch": 0.32, "grad_norm": 1.5133901464003763, "learning_rate": 7.963501586283055e-06, "loss": 0.236, "step": 3989 }, { "epoch": 0.32, "grad_norm": 1.1561157597935667, "learning_rate": 7.962458059511607e-06, "loss": 0.1973, "step": 3990 }, { "epoch": 0.32, "grad_norm": 1.400514810286149, "learning_rate": 7.961414333861373e-06, "loss": 0.2454, "step": 3991 }, { "epoch": 0.32, "grad_norm": 1.1753817993493918, "learning_rate": 7.96037040940242e-06, "loss": 0.1808, "step": 3992 }, { "epoch": 0.32, "grad_norm": 1.188775357970155, "learning_rate": 7.959326286204833e-06, "loss": 0.2349, "step": 3993 }, { "epoch": 0.32, "grad_norm": 1.4044122369734588, "learning_rate": 7.958281964338706e-06, "loss": 0.2197, "step": 3994 }, { "epoch": 0.32, "grad_norm": 1.382862706820919, "learning_rate": 7.957237443874148e-06, "loss": 0.196, "step": 3995 }, { "epoch": 0.32, "grad_norm": 1.4953246061369465, "learning_rate": 7.95619272488128e-06, "loss": 0.2021, "step": 3996 }, { "epoch": 0.32, "grad_norm": 1.4610163211226375, "learning_rate": 7.955147807430238e-06, "loss": 0.2219, "step": 3997 }, { "epoch": 0.32, "grad_norm": 1.4841565402265473, "learning_rate": 7.954102691591171e-06, "loss": 0.198, "step": 3998 }, { "epoch": 0.32, "grad_norm": 4.69635422259678, "learning_rate": 7.953057377434237e-06, "loss": 0.7829, "step": 3999 }, { "epoch": 0.32, "grad_norm": 1.281165321518233, "learning_rate": 7.952011865029614e-06, "loss": 0.171, "step": 4000 }, { "epoch": 0.32, "grad_norm": 5.168749381051066, "learning_rate": 7.950966154447492e-06, "loss": 0.6267, "step": 4001 }, { "epoch": 0.32, "grad_norm": 1.240793874016181, "learning_rate": 7.94992024575807e-06, "loss": 0.1861, "step": 4002 }, { "epoch": 0.32, "grad_norm": 1.4054228000952058, "learning_rate": 7.948874139031564e-06, "loss": 0.1995, "step": 4003 }, { "epoch": 0.32, "grad_norm": 1.3283706346551687, "learning_rate": 7.947827834338203e-06, "loss": 0.2136, "step": 4004 }, { "epoch": 0.32, "grad_norm": 1.363874966015505, "learning_rate": 7.946781331748226e-06, "loss": 0.2034, "step": 4005 }, { "epoch": 0.32, "grad_norm": 1.380403245357081, "learning_rate": 7.945734631331891e-06, "loss": 0.1808, "step": 4006 }, { "epoch": 0.32, "grad_norm": 1.4358422606141732, "learning_rate": 7.944687733159466e-06, "loss": 0.2221, "step": 4007 }, { "epoch": 0.32, "grad_norm": 1.2421013953647664, "learning_rate": 7.94364063730123e-06, "loss": 0.208, "step": 4008 }, { "epoch": 0.32, "grad_norm": 1.1639220080902029, "learning_rate": 7.94259334382748e-06, "loss": 0.2045, "step": 4009 }, { "epoch": 0.32, "grad_norm": 1.4221892213425364, "learning_rate": 7.941545852808523e-06, "loss": 0.2037, "step": 4010 }, { "epoch": 0.32, "grad_norm": 1.3656781835471623, "learning_rate": 7.94049816431468e-06, "loss": 0.1942, "step": 4011 }, { "epoch": 0.32, "grad_norm": 1.5351563075092955, "learning_rate": 7.939450278416288e-06, "loss": 0.2247, "step": 4012 }, { "epoch": 0.32, "grad_norm": 1.3095625745197526, "learning_rate": 7.93840219518369e-06, "loss": 0.1823, "step": 4013 }, { "epoch": 0.32, "grad_norm": 1.2836290807238213, "learning_rate": 7.937353914687249e-06, "loss": 0.1844, "step": 4014 }, { "epoch": 0.32, "grad_norm": 1.4352008247571864, "learning_rate": 7.936305436997343e-06, "loss": 0.2277, "step": 4015 }, { "epoch": 0.32, "grad_norm": 1.506104855577261, "learning_rate": 7.935256762184354e-06, "loss": 0.2291, "step": 4016 }, { "epoch": 0.32, "grad_norm": 1.3501947784815256, "learning_rate": 7.934207890318686e-06, "loss": 0.1985, "step": 4017 }, { "epoch": 0.32, "grad_norm": 1.4608276714556927, "learning_rate": 7.933158821470752e-06, "loss": 0.2124, "step": 4018 }, { "epoch": 0.32, "grad_norm": 1.4296851955667318, "learning_rate": 7.932109555710979e-06, "loss": 0.2213, "step": 4019 }, { "epoch": 0.32, "grad_norm": 1.5187092033792717, "learning_rate": 7.931060093109807e-06, "loss": 0.245, "step": 4020 }, { "epoch": 0.32, "grad_norm": 1.3785124563433033, "learning_rate": 7.930010433737692e-06, "loss": 0.1676, "step": 4021 }, { "epoch": 0.32, "grad_norm": 1.4544041941697485, "learning_rate": 7.928960577665096e-06, "loss": 0.2563, "step": 4022 }, { "epoch": 0.32, "grad_norm": 1.6048481141989983, "learning_rate": 7.927910524962506e-06, "loss": 0.2297, "step": 4023 }, { "epoch": 0.32, "grad_norm": 1.3572033122224483, "learning_rate": 7.926860275700408e-06, "loss": 0.1917, "step": 4024 }, { "epoch": 0.32, "grad_norm": 1.5781350418753863, "learning_rate": 7.925809829949312e-06, "loss": 0.2579, "step": 4025 }, { "epoch": 0.32, "grad_norm": 1.3552825211617143, "learning_rate": 7.924759187779737e-06, "loss": 0.1972, "step": 4026 }, { "epoch": 0.32, "grad_norm": 1.4569620142731758, "learning_rate": 7.923708349262218e-06, "loss": 0.2348, "step": 4027 }, { "epoch": 0.32, "grad_norm": 1.3094162763446615, "learning_rate": 7.922657314467297e-06, "loss": 0.2003, "step": 4028 }, { "epoch": 0.32, "grad_norm": 1.3841680587079115, "learning_rate": 7.921606083465538e-06, "loss": 0.2241, "step": 4029 }, { "epoch": 0.32, "grad_norm": 1.4067911724993933, "learning_rate": 7.920554656327509e-06, "loss": 0.2016, "step": 4030 }, { "epoch": 0.32, "grad_norm": 1.2691942745031113, "learning_rate": 7.919503033123795e-06, "loss": 0.1971, "step": 4031 }, { "epoch": 0.32, "grad_norm": 1.1884472633314014, "learning_rate": 7.918451213925e-06, "loss": 0.1776, "step": 4032 }, { "epoch": 0.32, "grad_norm": 1.276012405701036, "learning_rate": 7.91739919880173e-06, "loss": 0.1557, "step": 4033 }, { "epoch": 0.32, "grad_norm": 4.873425089766336, "learning_rate": 7.916346987824612e-06, "loss": 0.4123, "step": 4034 }, { "epoch": 0.32, "grad_norm": 1.468295867731718, "learning_rate": 7.915294581064287e-06, "loss": 0.2411, "step": 4035 }, { "epoch": 0.32, "grad_norm": 1.487222314913909, "learning_rate": 7.914241978591403e-06, "loss": 0.256, "step": 4036 }, { "epoch": 0.32, "grad_norm": 1.386196938454332, "learning_rate": 7.913189180476623e-06, "loss": 0.2289, "step": 4037 }, { "epoch": 0.32, "grad_norm": 1.366204466731206, "learning_rate": 7.912136186790628e-06, "loss": 0.2247, "step": 4038 }, { "epoch": 0.32, "grad_norm": 1.2281103362720471, "learning_rate": 7.91108299760411e-06, "loss": 0.1686, "step": 4039 }, { "epoch": 0.32, "grad_norm": 1.3332936179328003, "learning_rate": 7.910029612987766e-06, "loss": 0.1908, "step": 4040 }, { "epoch": 0.32, "grad_norm": 1.3579630446771767, "learning_rate": 7.90897603301232e-06, "loss": 0.2195, "step": 4041 }, { "epoch": 0.32, "grad_norm": 1.3742289140500026, "learning_rate": 7.907922257748498e-06, "loss": 0.2244, "step": 4042 }, { "epoch": 0.32, "grad_norm": 1.4705197476751664, "learning_rate": 7.906868287267044e-06, "loss": 0.2219, "step": 4043 }, { "epoch": 0.32, "grad_norm": 1.3978853583772395, "learning_rate": 7.905814121638715e-06, "loss": 0.2464, "step": 4044 }, { "epoch": 0.32, "grad_norm": 1.3425902814367763, "learning_rate": 7.90475976093428e-06, "loss": 0.1795, "step": 4045 }, { "epoch": 0.32, "grad_norm": 5.999084275607514, "learning_rate": 7.903705205224518e-06, "loss": 0.4826, "step": 4046 }, { "epoch": 0.32, "grad_norm": 1.3197182804567718, "learning_rate": 7.902650454580233e-06, "loss": 0.1817, "step": 4047 }, { "epoch": 0.32, "grad_norm": 1.082998522186344, "learning_rate": 7.901595509072224e-06, "loss": 0.161, "step": 4048 }, { "epoch": 0.32, "grad_norm": 1.4531715688974516, "learning_rate": 7.900540368771319e-06, "loss": 0.2281, "step": 4049 }, { "epoch": 0.32, "grad_norm": 1.3410596430698283, "learning_rate": 7.89948503374835e-06, "loss": 0.1993, "step": 4050 }, { "epoch": 0.32, "grad_norm": 1.493736311943534, "learning_rate": 7.898429504074165e-06, "loss": 0.2638, "step": 4051 }, { "epoch": 0.32, "grad_norm": 1.3549260568043828, "learning_rate": 7.897373779819627e-06, "loss": 0.1893, "step": 4052 }, { "epoch": 0.32, "grad_norm": 6.657542154263301, "learning_rate": 7.896317861055609e-06, "loss": 0.587, "step": 4053 }, { "epoch": 0.32, "grad_norm": 7.137440944391673, "learning_rate": 7.895261747852996e-06, "loss": 0.6967, "step": 4054 }, { "epoch": 0.32, "grad_norm": 1.2340759483895059, "learning_rate": 7.89420544028269e-06, "loss": 0.1923, "step": 4055 }, { "epoch": 0.32, "grad_norm": 5.045243153512769, "learning_rate": 7.893148938415602e-06, "loss": 0.7346, "step": 4056 }, { "epoch": 0.32, "grad_norm": 1.1748107632597535, "learning_rate": 7.892092242322662e-06, "loss": 0.1753, "step": 4057 }, { "epoch": 0.32, "grad_norm": 8.300679857611565, "learning_rate": 7.891035352074808e-06, "loss": 0.7109, "step": 4058 }, { "epoch": 0.32, "grad_norm": 1.277711060694452, "learning_rate": 7.88997826774299e-06, "loss": 0.1973, "step": 4059 }, { "epoch": 0.32, "grad_norm": 1.348231432666873, "learning_rate": 7.888920989398174e-06, "loss": 0.2195, "step": 4060 }, { "epoch": 0.32, "grad_norm": 1.470930614121796, "learning_rate": 7.887863517111337e-06, "loss": 0.2141, "step": 4061 }, { "epoch": 0.32, "grad_norm": 1.4371714376575915, "learning_rate": 7.886805850953476e-06, "loss": 0.2188, "step": 4062 }, { "epoch": 0.33, "grad_norm": 1.432188247191454, "learning_rate": 7.88574799099559e-06, "loss": 0.2474, "step": 4063 }, { "epoch": 0.33, "grad_norm": 1.4404755853856661, "learning_rate": 7.884689937308698e-06, "loss": 0.1924, "step": 4064 }, { "epoch": 0.33, "grad_norm": 1.4152935209153108, "learning_rate": 7.883631689963831e-06, "loss": 0.1867, "step": 4065 }, { "epoch": 0.33, "grad_norm": 1.3348794602290117, "learning_rate": 7.882573249032031e-06, "loss": 0.1985, "step": 4066 }, { "epoch": 0.33, "grad_norm": 1.3164126258258746, "learning_rate": 7.881514614584356e-06, "loss": 0.2092, "step": 4067 }, { "epoch": 0.33, "grad_norm": 1.441138815362133, "learning_rate": 7.880455786691872e-06, "loss": 0.2156, "step": 4068 }, { "epoch": 0.33, "grad_norm": 1.4826858508841896, "learning_rate": 7.879396765425665e-06, "loss": 0.2119, "step": 4069 }, { "epoch": 0.33, "grad_norm": 1.2620397541727186, "learning_rate": 7.878337550856829e-06, "loss": 0.1889, "step": 4070 }, { "epoch": 0.33, "grad_norm": 1.376404947090972, "learning_rate": 7.87727814305647e-06, "loss": 0.2096, "step": 4071 }, { "epoch": 0.33, "grad_norm": 1.2892368752426073, "learning_rate": 7.876218542095715e-06, "loss": 0.1798, "step": 4072 }, { "epoch": 0.33, "grad_norm": 1.4684951972136515, "learning_rate": 7.875158748045691e-06, "loss": 0.2332, "step": 4073 }, { "epoch": 0.33, "grad_norm": 17.73732110642301, "learning_rate": 7.874098760977552e-06, "loss": 0.7673, "step": 4074 }, { "epoch": 0.33, "grad_norm": 1.3295259617834692, "learning_rate": 7.873038580962453e-06, "loss": 0.2591, "step": 4075 }, { "epoch": 0.33, "grad_norm": 1.5145936257203483, "learning_rate": 7.871978208071572e-06, "loss": 0.1971, "step": 4076 }, { "epoch": 0.33, "grad_norm": 1.403915698666203, "learning_rate": 7.870917642376088e-06, "loss": 0.2294, "step": 4077 }, { "epoch": 0.33, "grad_norm": 1.5436568345254131, "learning_rate": 7.869856883947208e-06, "loss": 0.2057, "step": 4078 }, { "epoch": 0.33, "grad_norm": 1.4180047816894987, "learning_rate": 7.868795932856138e-06, "loss": 0.2276, "step": 4079 }, { "epoch": 0.33, "grad_norm": 1.0968950017144357, "learning_rate": 7.867734789174104e-06, "loss": 0.132, "step": 4080 }, { "epoch": 0.33, "grad_norm": 1.4881437193403455, "learning_rate": 7.866673452972346e-06, "loss": 0.1926, "step": 4081 }, { "epoch": 0.33, "grad_norm": 6.540178649001633, "learning_rate": 7.865611924322113e-06, "loss": 0.5771, "step": 4082 }, { "epoch": 0.33, "grad_norm": 1.275561595975137, "learning_rate": 7.864550203294669e-06, "loss": 0.2215, "step": 4083 }, { "epoch": 0.33, "grad_norm": 1.3102470688598118, "learning_rate": 7.863488289961291e-06, "loss": 0.1838, "step": 4084 }, { "epoch": 0.33, "grad_norm": 1.401617961777475, "learning_rate": 7.86242618439327e-06, "loss": 0.2073, "step": 4085 }, { "epoch": 0.33, "grad_norm": 1.4639585447551864, "learning_rate": 7.861363886661903e-06, "loss": 0.2268, "step": 4086 }, { "epoch": 0.33, "grad_norm": 5.492680339583702, "learning_rate": 7.86030139683851e-06, "loss": 0.6386, "step": 4087 }, { "epoch": 0.33, "grad_norm": 5.549542900902339, "learning_rate": 7.859238714994419e-06, "loss": 0.5361, "step": 4088 }, { "epoch": 0.33, "grad_norm": 1.304386027337992, "learning_rate": 7.858175841200968e-06, "loss": 0.177, "step": 4089 }, { "epoch": 0.33, "grad_norm": 1.2991924800561592, "learning_rate": 7.857112775529513e-06, "loss": 0.1952, "step": 4090 }, { "epoch": 0.33, "grad_norm": 1.348850933197621, "learning_rate": 7.856049518051423e-06, "loss": 0.2301, "step": 4091 }, { "epoch": 0.33, "grad_norm": 1.1420891247625766, "learning_rate": 7.854986068838073e-06, "loss": 0.1436, "step": 4092 }, { "epoch": 0.33, "grad_norm": 1.3347477451301466, "learning_rate": 7.85392242796086e-06, "loss": 0.231, "step": 4093 }, { "epoch": 0.33, "grad_norm": 5.394754992691177, "learning_rate": 7.852858595491186e-06, "loss": 0.7561, "step": 4094 }, { "epoch": 0.33, "grad_norm": 5.313125991920316, "learning_rate": 7.85179457150047e-06, "loss": 0.6511, "step": 4095 }, { "epoch": 0.33, "grad_norm": 1.3106698991438515, "learning_rate": 7.850730356060145e-06, "loss": 0.1593, "step": 4096 }, { "epoch": 0.33, "grad_norm": 1.363134227599685, "learning_rate": 7.849665949241653e-06, "loss": 0.2012, "step": 4097 }, { "epoch": 0.33, "grad_norm": 1.4383464626993177, "learning_rate": 7.848601351116454e-06, "loss": 0.2121, "step": 4098 }, { "epoch": 0.33, "grad_norm": 15.498277113151147, "learning_rate": 7.847536561756012e-06, "loss": 0.7387, "step": 4099 }, { "epoch": 0.33, "grad_norm": 1.3875819764255877, "learning_rate": 7.846471581231814e-06, "loss": 0.224, "step": 4100 }, { "epoch": 0.33, "grad_norm": 1.5177600431702303, "learning_rate": 7.845406409615354e-06, "loss": 0.2387, "step": 4101 }, { "epoch": 0.33, "grad_norm": 1.3216211886130689, "learning_rate": 7.844341046978144e-06, "loss": 0.1944, "step": 4102 }, { "epoch": 0.33, "grad_norm": 1.2881404785662474, "learning_rate": 7.843275493391698e-06, "loss": 0.2161, "step": 4103 }, { "epoch": 0.33, "grad_norm": 1.5227554331459012, "learning_rate": 7.842209748927554e-06, "loss": 0.2647, "step": 4104 }, { "epoch": 0.33, "grad_norm": 1.4383841940976994, "learning_rate": 7.841143813657257e-06, "loss": 0.1985, "step": 4105 }, { "epoch": 0.33, "grad_norm": 1.5166287636946287, "learning_rate": 7.840077687652368e-06, "loss": 0.2496, "step": 4106 }, { "epoch": 0.33, "grad_norm": 1.3601324961236294, "learning_rate": 7.83901137098446e-06, "loss": 0.2131, "step": 4107 }, { "epoch": 0.33, "grad_norm": 1.347209417009386, "learning_rate": 7.837944863725116e-06, "loss": 0.196, "step": 4108 }, { "epoch": 0.33, "grad_norm": 1.160420508312375, "learning_rate": 7.836878165945934e-06, "loss": 0.1973, "step": 4109 }, { "epoch": 0.33, "grad_norm": 1.3362402130212154, "learning_rate": 7.835811277718528e-06, "loss": 0.2084, "step": 4110 }, { "epoch": 0.33, "grad_norm": 1.3933196723668342, "learning_rate": 7.834744199114517e-06, "loss": 0.1796, "step": 4111 }, { "epoch": 0.33, "grad_norm": 1.3662873983944175, "learning_rate": 7.833676930205538e-06, "loss": 0.2152, "step": 4112 }, { "epoch": 0.33, "grad_norm": 1.3095816993456044, "learning_rate": 7.832609471063242e-06, "loss": 0.1969, "step": 4113 }, { "epoch": 0.33, "grad_norm": 6.130632417059272, "learning_rate": 7.831541821759288e-06, "loss": 0.7041, "step": 4114 }, { "epoch": 0.33, "grad_norm": 6.180047106402712, "learning_rate": 7.830473982365355e-06, "loss": 0.6027, "step": 4115 }, { "epoch": 0.33, "grad_norm": 1.3333731514574172, "learning_rate": 7.829405952953127e-06, "loss": 0.1815, "step": 4116 }, { "epoch": 0.33, "grad_norm": 1.3323515731898747, "learning_rate": 7.828337733594304e-06, "loss": 0.2165, "step": 4117 }, { "epoch": 0.33, "grad_norm": 1.2779810112881456, "learning_rate": 7.827269324360599e-06, "loss": 0.2039, "step": 4118 }, { "epoch": 0.33, "grad_norm": 1.4811841669676526, "learning_rate": 7.826200725323738e-06, "loss": 0.2117, "step": 4119 }, { "epoch": 0.33, "grad_norm": 1.3076168826317172, "learning_rate": 7.82513193655546e-06, "loss": 0.2223, "step": 4120 }, { "epoch": 0.33, "grad_norm": 1.3929572930277456, "learning_rate": 7.824062958127515e-06, "loss": 0.2546, "step": 4121 }, { "epoch": 0.33, "grad_norm": 1.298777907252214, "learning_rate": 7.822993790111668e-06, "loss": 0.2204, "step": 4122 }, { "epoch": 0.33, "grad_norm": 5.60516313466515, "learning_rate": 7.821924432579693e-06, "loss": 0.6862, "step": 4123 }, { "epoch": 0.33, "grad_norm": 1.308252922836962, "learning_rate": 7.820854885603381e-06, "loss": 0.1684, "step": 4124 }, { "epoch": 0.33, "grad_norm": 1.5399384307295758, "learning_rate": 7.819785149254534e-06, "loss": 0.1939, "step": 4125 }, { "epoch": 0.33, "grad_norm": 1.4965111177440602, "learning_rate": 7.818715223604966e-06, "loss": 0.2097, "step": 4126 }, { "epoch": 0.33, "grad_norm": 1.3250007089055158, "learning_rate": 7.817645108726504e-06, "loss": 0.2331, "step": 4127 }, { "epoch": 0.33, "grad_norm": 1.4254926725775519, "learning_rate": 7.816574804690991e-06, "loss": 0.2194, "step": 4128 }, { "epoch": 0.33, "grad_norm": 1.442442451184468, "learning_rate": 7.815504311570276e-06, "loss": 0.2207, "step": 4129 }, { "epoch": 0.33, "grad_norm": 1.3819579153450376, "learning_rate": 7.814433629436225e-06, "loss": 0.2405, "step": 4130 }, { "epoch": 0.33, "grad_norm": 1.5168482279016906, "learning_rate": 7.813362758360719e-06, "loss": 0.2447, "step": 4131 }, { "epoch": 0.33, "grad_norm": 1.3545507943858213, "learning_rate": 7.812291698415647e-06, "loss": 0.2258, "step": 4132 }, { "epoch": 0.33, "grad_norm": 1.4443173695540255, "learning_rate": 7.811220449672909e-06, "loss": 0.2425, "step": 4133 }, { "epoch": 0.33, "grad_norm": 1.530496486389679, "learning_rate": 7.810149012204427e-06, "loss": 0.232, "step": 4134 }, { "epoch": 0.33, "grad_norm": 1.2913255442481155, "learning_rate": 7.809077386082129e-06, "loss": 0.1883, "step": 4135 }, { "epoch": 0.33, "grad_norm": 1.2609003405147097, "learning_rate": 7.808005571377952e-06, "loss": 0.1707, "step": 4136 }, { "epoch": 0.33, "grad_norm": 6.443199968020226, "learning_rate": 7.806933568163855e-06, "loss": 0.6255, "step": 4137 }, { "epoch": 0.33, "grad_norm": 1.320971000452345, "learning_rate": 7.805861376511802e-06, "loss": 0.213, "step": 4138 }, { "epoch": 0.33, "grad_norm": 4.714154643071379, "learning_rate": 7.804788996493773e-06, "loss": 0.4573, "step": 4139 }, { "epoch": 0.33, "grad_norm": 1.3335379499933375, "learning_rate": 7.803716428181762e-06, "loss": 0.2148, "step": 4140 }, { "epoch": 0.33, "grad_norm": 1.3919067920752246, "learning_rate": 7.802643671647772e-06, "loss": 0.2381, "step": 4141 }, { "epoch": 0.33, "grad_norm": 1.3709071941724613, "learning_rate": 7.80157072696382e-06, "loss": 0.2249, "step": 4142 }, { "epoch": 0.33, "grad_norm": 1.3575278358919654, "learning_rate": 7.800497594201935e-06, "loss": 0.1865, "step": 4143 }, { "epoch": 0.33, "grad_norm": 6.40489892921925, "learning_rate": 7.799424273434164e-06, "loss": 0.6067, "step": 4144 }, { "epoch": 0.33, "grad_norm": 1.231895430554824, "learning_rate": 7.79835076473256e-06, "loss": 0.1867, "step": 4145 }, { "epoch": 0.33, "grad_norm": 1.446794622329574, "learning_rate": 7.797277068169187e-06, "loss": 0.26, "step": 4146 }, { "epoch": 0.33, "grad_norm": 1.336387924672816, "learning_rate": 7.796203183816131e-06, "loss": 0.2368, "step": 4147 }, { "epoch": 0.33, "grad_norm": 1.2615560496544245, "learning_rate": 7.795129111745484e-06, "loss": 0.1968, "step": 4148 }, { "epoch": 0.33, "grad_norm": 1.2577797643190718, "learning_rate": 7.79405485202935e-06, "loss": 0.2177, "step": 4149 }, { "epoch": 0.33, "grad_norm": 1.5176143521946903, "learning_rate": 7.792980404739849e-06, "loss": 0.2117, "step": 4150 }, { "epoch": 0.33, "grad_norm": 1.3115485988816853, "learning_rate": 7.791905769949108e-06, "loss": 0.2457, "step": 4151 }, { "epoch": 0.33, "grad_norm": 1.314607596902828, "learning_rate": 7.790830947729278e-06, "loss": 0.1873, "step": 4152 }, { "epoch": 0.33, "grad_norm": 1.447361498007139, "learning_rate": 7.789755938152508e-06, "loss": 0.2162, "step": 4153 }, { "epoch": 0.33, "grad_norm": 1.377479347802542, "learning_rate": 7.78868074129097e-06, "loss": 0.2413, "step": 4154 }, { "epoch": 0.33, "grad_norm": 1.458719204776982, "learning_rate": 7.787605357216843e-06, "loss": 0.2137, "step": 4155 }, { "epoch": 0.33, "grad_norm": 1.2802137334720627, "learning_rate": 7.786529786002324e-06, "loss": 0.202, "step": 4156 }, { "epoch": 0.33, "grad_norm": 8.9385986703183, "learning_rate": 7.785454027719617e-06, "loss": 0.686, "step": 4157 }, { "epoch": 0.33, "grad_norm": 1.2031168936930932, "learning_rate": 7.78437808244094e-06, "loss": 0.1537, "step": 4158 }, { "epoch": 0.33, "grad_norm": 1.4563418943929554, "learning_rate": 7.783301950238528e-06, "loss": 0.2734, "step": 4159 }, { "epoch": 0.33, "grad_norm": 1.4541863287729577, "learning_rate": 7.782225631184624e-06, "loss": 0.2081, "step": 4160 }, { "epoch": 0.33, "grad_norm": 1.296192840361664, "learning_rate": 7.78114912535148e-06, "loss": 0.2081, "step": 4161 }, { "epoch": 0.33, "grad_norm": 1.5302300013841614, "learning_rate": 7.780072432811371e-06, "loss": 0.2453, "step": 4162 }, { "epoch": 0.33, "grad_norm": 1.4080875155902433, "learning_rate": 7.778995553636576e-06, "loss": 0.1932, "step": 4163 }, { "epoch": 0.33, "grad_norm": 1.3765035226588807, "learning_rate": 7.77791848789939e-06, "loss": 0.2012, "step": 4164 }, { "epoch": 0.33, "grad_norm": 1.4731695362636956, "learning_rate": 7.776841235672119e-06, "loss": 0.2338, "step": 4165 }, { "epoch": 0.33, "grad_norm": 1.3910677970213865, "learning_rate": 7.775763797027081e-06, "loss": 0.1959, "step": 4166 }, { "epoch": 0.33, "grad_norm": 1.5911570004789106, "learning_rate": 7.77468617203661e-06, "loss": 0.1896, "step": 4167 }, { "epoch": 0.33, "grad_norm": 1.6529736922760596, "learning_rate": 7.77360836077305e-06, "loss": 0.3104, "step": 4168 }, { "epoch": 0.33, "grad_norm": 1.2277596731451668, "learning_rate": 7.772530363308756e-06, "loss": 0.1744, "step": 4169 }, { "epoch": 0.33, "grad_norm": 1.4497968508973773, "learning_rate": 7.771452179716099e-06, "loss": 0.2368, "step": 4170 }, { "epoch": 0.33, "grad_norm": 1.3258176159054198, "learning_rate": 7.77037381006746e-06, "loss": 0.1764, "step": 4171 }, { "epoch": 0.33, "grad_norm": 5.600599428436095, "learning_rate": 7.769295254435235e-06, "loss": 0.6881, "step": 4172 }, { "epoch": 0.33, "grad_norm": 1.2189255987784076, "learning_rate": 7.768216512891824e-06, "loss": 0.1765, "step": 4173 }, { "epoch": 0.33, "grad_norm": 5.485076744012145, "learning_rate": 7.767137585509655e-06, "loss": 0.6165, "step": 4174 }, { "epoch": 0.33, "grad_norm": 5.142160338125644, "learning_rate": 7.766058472361154e-06, "loss": 0.6272, "step": 4175 }, { "epoch": 0.33, "grad_norm": 1.5289245801088447, "learning_rate": 7.764979173518767e-06, "loss": 0.2158, "step": 4176 }, { "epoch": 0.33, "grad_norm": 6.627641625367301, "learning_rate": 7.763899689054952e-06, "loss": 0.5275, "step": 4177 }, { "epoch": 0.33, "grad_norm": 1.3863225139843434, "learning_rate": 7.762820019042176e-06, "loss": 0.2015, "step": 4178 }, { "epoch": 0.33, "grad_norm": 1.4651849976812208, "learning_rate": 7.761740163552921e-06, "loss": 0.2462, "step": 4179 }, { "epoch": 0.33, "grad_norm": 1.5377969727360974, "learning_rate": 7.760660122659682e-06, "loss": 0.2168, "step": 4180 }, { "epoch": 0.33, "grad_norm": 1.4711604137385677, "learning_rate": 7.759579896434963e-06, "loss": 0.2328, "step": 4181 }, { "epoch": 0.33, "grad_norm": 1.2744588930710363, "learning_rate": 7.758499484951285e-06, "loss": 0.1913, "step": 4182 }, { "epoch": 0.33, "grad_norm": 1.4197186979163279, "learning_rate": 7.757418888281179e-06, "loss": 0.1993, "step": 4183 }, { "epoch": 0.33, "grad_norm": 1.472656766566373, "learning_rate": 7.756338106497188e-06, "loss": 0.2157, "step": 4184 }, { "epoch": 0.33, "grad_norm": 1.3779435982915498, "learning_rate": 7.755257139671868e-06, "loss": 0.2071, "step": 4185 }, { "epoch": 0.33, "grad_norm": 1.3599978928521972, "learning_rate": 7.754175987877788e-06, "loss": 0.1999, "step": 4186 }, { "epoch": 0.33, "grad_norm": 4.985531377875093, "learning_rate": 7.75309465118753e-06, "loss": 0.4293, "step": 4187 }, { "epoch": 0.34, "grad_norm": 4.691334258919299, "learning_rate": 7.752013129673685e-06, "loss": 0.6981, "step": 4188 }, { "epoch": 0.34, "grad_norm": 1.347249148822668, "learning_rate": 7.750931423408864e-06, "loss": 0.1939, "step": 4189 }, { "epoch": 0.34, "grad_norm": 1.4037889948654199, "learning_rate": 7.749849532465677e-06, "loss": 0.2203, "step": 4190 }, { "epoch": 0.34, "grad_norm": 1.1972210802439345, "learning_rate": 7.74876745691676e-06, "loss": 0.1603, "step": 4191 }, { "epoch": 0.34, "grad_norm": 1.3691166158292847, "learning_rate": 7.747685196834757e-06, "loss": 0.2132, "step": 4192 }, { "epoch": 0.34, "grad_norm": 1.3614275795980018, "learning_rate": 7.746602752292322e-06, "loss": 0.2551, "step": 4193 }, { "epoch": 0.34, "grad_norm": 1.427300788012132, "learning_rate": 7.745520123362121e-06, "loss": 0.2217, "step": 4194 }, { "epoch": 0.34, "grad_norm": 1.4468402668620954, "learning_rate": 7.744437310116837e-06, "loss": 0.1896, "step": 4195 }, { "epoch": 0.34, "grad_norm": 1.3194101849500879, "learning_rate": 7.743354312629158e-06, "loss": 0.1857, "step": 4196 }, { "epoch": 0.34, "grad_norm": 1.5017080054448577, "learning_rate": 7.742271130971795e-06, "loss": 0.2524, "step": 4197 }, { "epoch": 0.34, "grad_norm": 1.2433922557027257, "learning_rate": 7.741187765217461e-06, "loss": 0.2219, "step": 4198 }, { "epoch": 0.34, "grad_norm": 1.34213668757596, "learning_rate": 7.740104215438888e-06, "loss": 0.2197, "step": 4199 }, { "epoch": 0.34, "grad_norm": 1.320091687636802, "learning_rate": 7.739020481708816e-06, "loss": 0.1928, "step": 4200 }, { "epoch": 0.34, "grad_norm": 1.4003029227778818, "learning_rate": 7.7379365641e-06, "loss": 0.1841, "step": 4201 }, { "epoch": 0.34, "grad_norm": 1.395411463616382, "learning_rate": 7.736852462685206e-06, "loss": 0.2353, "step": 4202 }, { "epoch": 0.34, "grad_norm": 1.4040396488127749, "learning_rate": 7.735768177537215e-06, "loss": 0.2294, "step": 4203 }, { "epoch": 0.34, "grad_norm": 1.3414786318578837, "learning_rate": 7.734683708728816e-06, "loss": 0.2331, "step": 4204 }, { "epoch": 0.34, "grad_norm": 1.4308723445125975, "learning_rate": 7.733599056332816e-06, "loss": 0.239, "step": 4205 }, { "epoch": 0.34, "grad_norm": 5.883879224848194, "learning_rate": 7.732514220422028e-06, "loss": 0.5157, "step": 4206 }, { "epoch": 0.34, "grad_norm": 1.2920345164391571, "learning_rate": 7.73142920106928e-06, "loss": 0.1917, "step": 4207 }, { "epoch": 0.34, "grad_norm": 1.4398283330785886, "learning_rate": 7.730343998347415e-06, "loss": 0.2286, "step": 4208 }, { "epoch": 0.34, "grad_norm": 1.2944976893335767, "learning_rate": 7.729258612329285e-06, "loss": 0.1818, "step": 4209 }, { "epoch": 0.34, "grad_norm": 1.2575464171243669, "learning_rate": 7.728173043087754e-06, "loss": 0.1944, "step": 4210 }, { "epoch": 0.34, "grad_norm": 1.4131519379810098, "learning_rate": 7.7270872906957e-06, "loss": 0.2345, "step": 4211 }, { "epoch": 0.34, "grad_norm": 1.2329081662113242, "learning_rate": 7.726001355226016e-06, "loss": 0.1974, "step": 4212 }, { "epoch": 0.34, "grad_norm": 4.957081903516495, "learning_rate": 7.7249152367516e-06, "loss": 0.5569, "step": 4213 }, { "epoch": 0.34, "grad_norm": 1.3522454115235596, "learning_rate": 7.723828935345368e-06, "loss": 0.199, "step": 4214 }, { "epoch": 0.34, "grad_norm": 1.4427884642224482, "learning_rate": 7.722742451080247e-06, "loss": 0.2192, "step": 4215 }, { "epoch": 0.34, "grad_norm": 1.2897373625936364, "learning_rate": 7.721655784029174e-06, "loss": 0.2135, "step": 4216 }, { "epoch": 0.34, "grad_norm": 1.5266920245615034, "learning_rate": 7.720568934265104e-06, "loss": 0.2324, "step": 4217 }, { "epoch": 0.34, "grad_norm": 1.5006556877155306, "learning_rate": 7.719481901860996e-06, "loss": 0.2212, "step": 4218 }, { "epoch": 0.34, "grad_norm": 1.317082212468535, "learning_rate": 7.718394686889831e-06, "loss": 0.2002, "step": 4219 }, { "epoch": 0.34, "grad_norm": 1.3895741235982342, "learning_rate": 7.717307289424594e-06, "loss": 0.2147, "step": 4220 }, { "epoch": 0.34, "grad_norm": 1.3944440536557305, "learning_rate": 7.716219709538285e-06, "loss": 0.2169, "step": 4221 }, { "epoch": 0.34, "grad_norm": 1.336767905467579, "learning_rate": 7.715131947303916e-06, "loss": 0.2117, "step": 4222 }, { "epoch": 0.34, "grad_norm": 1.2850502037655405, "learning_rate": 7.714044002794514e-06, "loss": 0.2228, "step": 4223 }, { "epoch": 0.34, "grad_norm": 1.2626411034111444, "learning_rate": 7.712955876083115e-06, "loss": 0.2213, "step": 4224 }, { "epoch": 0.34, "grad_norm": 7.627504520972727, "learning_rate": 7.711867567242769e-06, "loss": 0.6374, "step": 4225 }, { "epoch": 0.34, "grad_norm": 1.3911205672149252, "learning_rate": 7.710779076346534e-06, "loss": 0.1892, "step": 4226 }, { "epoch": 0.34, "grad_norm": 1.5127681709757852, "learning_rate": 7.709690403467486e-06, "loss": 0.297, "step": 4227 }, { "epoch": 0.34, "grad_norm": 1.4654532049612483, "learning_rate": 7.708601548678714e-06, "loss": 0.2177, "step": 4228 }, { "epoch": 0.34, "grad_norm": 1.5993654707404812, "learning_rate": 7.707512512053312e-06, "loss": 0.1898, "step": 4229 }, { "epoch": 0.34, "grad_norm": 1.3062088485113348, "learning_rate": 7.70642329366439e-06, "loss": 0.2015, "step": 4230 }, { "epoch": 0.34, "grad_norm": 1.386291832646128, "learning_rate": 7.705333893585076e-06, "loss": 0.2153, "step": 4231 }, { "epoch": 0.34, "grad_norm": 1.369131830370373, "learning_rate": 7.704244311888499e-06, "loss": 0.2028, "step": 4232 }, { "epoch": 0.34, "grad_norm": 1.404820069553254, "learning_rate": 7.703154548647806e-06, "loss": 0.2067, "step": 4233 }, { "epoch": 0.34, "grad_norm": 1.8180872088476219, "learning_rate": 7.702064603936162e-06, "loss": 0.2566, "step": 4234 }, { "epoch": 0.34, "grad_norm": 1.3819647206585368, "learning_rate": 7.70097447782673e-06, "loss": 0.2277, "step": 4235 }, { "epoch": 0.34, "grad_norm": 5.572663553318597, "learning_rate": 7.6998841703927e-06, "loss": 0.5376, "step": 4236 }, { "epoch": 0.34, "grad_norm": 1.3985544874096796, "learning_rate": 7.698793681707263e-06, "loss": 0.2124, "step": 4237 }, { "epoch": 0.34, "grad_norm": 6.301509388981198, "learning_rate": 7.697703011843632e-06, "loss": 0.4187, "step": 4238 }, { "epoch": 0.34, "grad_norm": 1.42064472596067, "learning_rate": 7.696612160875023e-06, "loss": 0.214, "step": 4239 }, { "epoch": 0.34, "grad_norm": 1.25355065984926, "learning_rate": 7.695521128874669e-06, "loss": 0.1846, "step": 4240 }, { "epoch": 0.34, "grad_norm": 1.549583995529812, "learning_rate": 7.694429915915816e-06, "loss": 0.2272, "step": 4241 }, { "epoch": 0.34, "grad_norm": 1.2982164778276364, "learning_rate": 7.693338522071717e-06, "loss": 0.1767, "step": 4242 }, { "epoch": 0.34, "grad_norm": 4.008084087006375, "learning_rate": 7.692246947415643e-06, "loss": 0.4301, "step": 4243 }, { "epoch": 0.34, "grad_norm": 1.5457529540847386, "learning_rate": 7.691155192020874e-06, "loss": 0.2506, "step": 4244 }, { "epoch": 0.34, "grad_norm": 1.3205918049903738, "learning_rate": 7.690063255960702e-06, "loss": 0.2392, "step": 4245 }, { "epoch": 0.34, "grad_norm": 1.4408328904197585, "learning_rate": 7.688971139308435e-06, "loss": 0.229, "step": 4246 }, { "epoch": 0.34, "grad_norm": 1.329064139277016, "learning_rate": 7.687878842137388e-06, "loss": 0.2044, "step": 4247 }, { "epoch": 0.34, "grad_norm": 1.5669282394333617, "learning_rate": 7.686786364520889e-06, "loss": 0.1937, "step": 4248 }, { "epoch": 0.34, "grad_norm": 1.4271242160587696, "learning_rate": 7.685693706532283e-06, "loss": 0.2323, "step": 4249 }, { "epoch": 0.34, "grad_norm": 1.211928811424097, "learning_rate": 7.68460086824492e-06, "loss": 0.2033, "step": 4250 }, { "epoch": 0.34, "grad_norm": 1.2953847062829094, "learning_rate": 7.683507849732166e-06, "loss": 0.2177, "step": 4251 }, { "epoch": 0.34, "grad_norm": 1.2532318780310638, "learning_rate": 7.6824146510674e-06, "loss": 0.2414, "step": 4252 }, { "epoch": 0.34, "grad_norm": 4.428110191115999, "learning_rate": 7.681321272324011e-06, "loss": 0.4797, "step": 4253 }, { "epoch": 0.34, "grad_norm": 1.3439137040781126, "learning_rate": 7.6802277135754e-06, "loss": 0.2193, "step": 4254 }, { "epoch": 0.34, "grad_norm": 1.464793473879814, "learning_rate": 7.679133974894984e-06, "loss": 0.2227, "step": 4255 }, { "epoch": 0.34, "grad_norm": 1.341722815372195, "learning_rate": 7.678040056356185e-06, "loss": 0.2224, "step": 4256 }, { "epoch": 0.34, "grad_norm": 1.3974479372533142, "learning_rate": 7.676945958032445e-06, "loss": 0.1857, "step": 4257 }, { "epoch": 0.34, "grad_norm": 5.3813999041193545, "learning_rate": 7.67585167999721e-06, "loss": 0.5687, "step": 4258 }, { "epoch": 0.34, "grad_norm": 1.3136805477651075, "learning_rate": 7.674757222323943e-06, "loss": 0.1885, "step": 4259 }, { "epoch": 0.34, "grad_norm": 1.3494551601006548, "learning_rate": 7.673662585086123e-06, "loss": 0.2292, "step": 4260 }, { "epoch": 0.34, "grad_norm": 1.4420032562388025, "learning_rate": 7.67256776835723e-06, "loss": 0.2031, "step": 4261 }, { "epoch": 0.34, "grad_norm": 1.4446056609067182, "learning_rate": 7.671472772210766e-06, "loss": 0.1752, "step": 4262 }, { "epoch": 0.34, "grad_norm": 1.2063830384558318, "learning_rate": 7.67037759672024e-06, "loss": 0.1826, "step": 4263 }, { "epoch": 0.34, "grad_norm": 1.3633372454871888, "learning_rate": 7.669282241959177e-06, "loss": 0.2134, "step": 4264 }, { "epoch": 0.34, "grad_norm": 4.908639598652913, "learning_rate": 7.668186708001106e-06, "loss": 0.7562, "step": 4265 }, { "epoch": 0.34, "grad_norm": 1.2381373928276707, "learning_rate": 7.66709099491958e-06, "loss": 0.1831, "step": 4266 }, { "epoch": 0.34, "grad_norm": 1.3637384554901681, "learning_rate": 7.665995102788153e-06, "loss": 0.2374, "step": 4267 }, { "epoch": 0.34, "grad_norm": 1.3032998469499624, "learning_rate": 7.6648990316804e-06, "loss": 0.1822, "step": 4268 }, { "epoch": 0.34, "grad_norm": 1.3981222173006953, "learning_rate": 7.663802781669898e-06, "loss": 0.2396, "step": 4269 }, { "epoch": 0.34, "grad_norm": 4.348116124783053, "learning_rate": 7.662706352830244e-06, "loss": 0.5654, "step": 4270 }, { "epoch": 0.34, "grad_norm": 4.977317041593801, "learning_rate": 7.661609745235046e-06, "loss": 0.6015, "step": 4271 }, { "epoch": 0.34, "grad_norm": 1.2858802799413296, "learning_rate": 7.66051295895792e-06, "loss": 0.2026, "step": 4272 }, { "epoch": 0.34, "grad_norm": 1.326866905988052, "learning_rate": 7.6594159940725e-06, "loss": 0.2052, "step": 4273 }, { "epoch": 0.34, "grad_norm": 1.6222980943077077, "learning_rate": 7.658318850652426e-06, "loss": 0.2466, "step": 4274 }, { "epoch": 0.34, "grad_norm": 1.53765424393116, "learning_rate": 7.657221528771352e-06, "loss": 0.2113, "step": 4275 }, { "epoch": 0.34, "grad_norm": 1.284850830434102, "learning_rate": 7.656124028502947e-06, "loss": 0.2082, "step": 4276 }, { "epoch": 0.34, "grad_norm": 1.4226945246952964, "learning_rate": 7.65502634992089e-06, "loss": 0.2425, "step": 4277 }, { "epoch": 0.34, "grad_norm": 1.182864885487913, "learning_rate": 7.653928493098866e-06, "loss": 0.1677, "step": 4278 }, { "epoch": 0.34, "grad_norm": 1.3584948278801785, "learning_rate": 7.652830458110581e-06, "loss": 0.2243, "step": 4279 }, { "epoch": 0.34, "grad_norm": 4.700720776837866, "learning_rate": 7.651732245029753e-06, "loss": 0.5615, "step": 4280 }, { "epoch": 0.34, "grad_norm": 1.466290762444148, "learning_rate": 7.650633853930102e-06, "loss": 0.2376, "step": 4281 }, { "epoch": 0.34, "grad_norm": 1.363050277550056, "learning_rate": 7.64953528488537e-06, "loss": 0.2087, "step": 4282 }, { "epoch": 0.34, "grad_norm": 1.16950897355178, "learning_rate": 7.648436537969308e-06, "loss": 0.1985, "step": 4283 }, { "epoch": 0.34, "grad_norm": 1.219778568413987, "learning_rate": 7.647337613255677e-06, "loss": 0.1911, "step": 4284 }, { "epoch": 0.34, "grad_norm": 1.2918953787679484, "learning_rate": 7.646238510818249e-06, "loss": 0.2015, "step": 4285 }, { "epoch": 0.34, "grad_norm": 1.1406604945409573, "learning_rate": 7.645139230730813e-06, "loss": 0.1739, "step": 4286 }, { "epoch": 0.34, "grad_norm": 1.414104612293941, "learning_rate": 7.644039773067166e-06, "loss": 0.2061, "step": 4287 }, { "epoch": 0.34, "grad_norm": 1.254493107967551, "learning_rate": 7.642940137901116e-06, "loss": 0.1917, "step": 4288 }, { "epoch": 0.34, "grad_norm": 1.3427715656788006, "learning_rate": 7.64184032530649e-06, "loss": 0.2202, "step": 4289 }, { "epoch": 0.34, "grad_norm": 1.3484078980661294, "learning_rate": 7.640740335357116e-06, "loss": 0.2169, "step": 4290 }, { "epoch": 0.34, "grad_norm": 1.4802304452975208, "learning_rate": 7.639640168126843e-06, "loss": 0.1956, "step": 4291 }, { "epoch": 0.34, "grad_norm": 1.3842962527860643, "learning_rate": 7.638539823689529e-06, "loss": 0.2252, "step": 4292 }, { "epoch": 0.34, "grad_norm": 1.4228930219645821, "learning_rate": 7.637439302119038e-06, "loss": 0.2402, "step": 4293 }, { "epoch": 0.34, "grad_norm": 1.2984993477898443, "learning_rate": 7.636338603489261e-06, "loss": 0.1867, "step": 4294 }, { "epoch": 0.34, "grad_norm": 1.37283854069665, "learning_rate": 7.635237727874082e-06, "loss": 0.229, "step": 4295 }, { "epoch": 0.34, "grad_norm": 4.255637379407906, "learning_rate": 7.634136675347413e-06, "loss": 0.4579, "step": 4296 }, { "epoch": 0.34, "grad_norm": 1.2760678841634427, "learning_rate": 7.633035445983164e-06, "loss": 0.203, "step": 4297 }, { "epoch": 0.34, "grad_norm": 1.3838105608371336, "learning_rate": 7.63193403985527e-06, "loss": 0.2499, "step": 4298 }, { "epoch": 0.34, "grad_norm": 1.2707668585548204, "learning_rate": 7.63083245703767e-06, "loss": 0.2023, "step": 4299 }, { "epoch": 0.34, "grad_norm": 1.386579180623586, "learning_rate": 7.629730697604314e-06, "loss": 0.2086, "step": 4300 }, { "epoch": 0.34, "grad_norm": 1.3375171439837847, "learning_rate": 7.62862876162917e-06, "loss": 0.1991, "step": 4301 }, { "epoch": 0.34, "grad_norm": 1.402240528665343, "learning_rate": 7.627526649186214e-06, "loss": 0.2041, "step": 4302 }, { "epoch": 0.34, "grad_norm": 1.3354579346682385, "learning_rate": 7.6264243603494305e-06, "loss": 0.2054, "step": 4303 }, { "epoch": 0.34, "grad_norm": 1.365193097943103, "learning_rate": 7.625321895192823e-06, "loss": 0.2223, "step": 4304 }, { "epoch": 0.34, "grad_norm": 1.4003792133502433, "learning_rate": 7.624219253790403e-06, "loss": 0.212, "step": 4305 }, { "epoch": 0.34, "grad_norm": 1.4537694196828728, "learning_rate": 7.623116436216191e-06, "loss": 0.2337, "step": 4306 }, { "epoch": 0.34, "grad_norm": 5.0904077358819, "learning_rate": 7.6220134425442274e-06, "loss": 0.5892, "step": 4307 }, { "epoch": 0.34, "grad_norm": 1.4131412816300277, "learning_rate": 7.620910272848556e-06, "loss": 0.209, "step": 4308 }, { "epoch": 0.34, "grad_norm": 1.402169315827073, "learning_rate": 7.619806927203237e-06, "loss": 0.1972, "step": 4309 }, { "epoch": 0.34, "grad_norm": 1.5688591956381426, "learning_rate": 7.618703405682341e-06, "loss": 0.2798, "step": 4310 }, { "epoch": 0.34, "grad_norm": 1.353040254018066, "learning_rate": 7.6175997083599525e-06, "loss": 0.2282, "step": 4311 }, { "epoch": 0.34, "grad_norm": 5.226449187028031, "learning_rate": 7.616495835310162e-06, "loss": 0.5595, "step": 4312 }, { "epoch": 0.35, "grad_norm": 1.5367011747437263, "learning_rate": 7.61539178660708e-06, "loss": 0.2278, "step": 4313 }, { "epoch": 0.35, "grad_norm": 1.4629270891683064, "learning_rate": 7.614287562324823e-06, "loss": 0.1602, "step": 4314 }, { "epoch": 0.35, "grad_norm": 1.3577875456828694, "learning_rate": 7.613183162537521e-06, "loss": 0.1924, "step": 4315 }, { "epoch": 0.35, "grad_norm": 7.693249268018882, "learning_rate": 7.612078587319316e-06, "loss": 0.3669, "step": 4316 }, { "epoch": 0.35, "grad_norm": 1.4713893208560636, "learning_rate": 7.61097383674436e-06, "loss": 0.231, "step": 4317 }, { "epoch": 0.35, "grad_norm": 1.3897757044193606, "learning_rate": 7.60986891088682e-06, "loss": 0.2047, "step": 4318 }, { "epoch": 0.35, "grad_norm": 1.4483935743298884, "learning_rate": 7.6087638098208726e-06, "loss": 0.1933, "step": 4319 }, { "epoch": 0.35, "grad_norm": 1.4108536513798686, "learning_rate": 7.607658533620708e-06, "loss": 0.1987, "step": 4320 }, { "epoch": 0.35, "grad_norm": 1.2787779176795473, "learning_rate": 7.606553082360523e-06, "loss": 0.1507, "step": 4321 }, { "epoch": 0.35, "grad_norm": 1.4929284022756466, "learning_rate": 7.605447456114532e-06, "loss": 0.2367, "step": 4322 }, { "epoch": 0.35, "grad_norm": 1.3028605340556978, "learning_rate": 7.604341654956959e-06, "loss": 0.168, "step": 4323 }, { "epoch": 0.35, "grad_norm": 1.5882068021196865, "learning_rate": 7.60323567896204e-06, "loss": 0.2384, "step": 4324 }, { "epoch": 0.35, "grad_norm": 1.47487033904761, "learning_rate": 7.602129528204023e-06, "loss": 0.2238, "step": 4325 }, { "epoch": 0.35, "grad_norm": 6.624881947891497, "learning_rate": 7.601023202757168e-06, "loss": 0.7792, "step": 4326 }, { "epoch": 0.35, "grad_norm": 1.3313743574496752, "learning_rate": 7.599916702695742e-06, "loss": 0.1796, "step": 4327 }, { "epoch": 0.35, "grad_norm": 1.314746299253457, "learning_rate": 7.59881002809403e-06, "loss": 0.1999, "step": 4328 }, { "epoch": 0.35, "grad_norm": 6.417733681669009, "learning_rate": 7.59770317902633e-06, "loss": 0.5444, "step": 4329 }, { "epoch": 0.35, "grad_norm": 1.5598066496222407, "learning_rate": 7.596596155566942e-06, "loss": 0.2465, "step": 4330 }, { "epoch": 0.35, "grad_norm": 1.3962840354819928, "learning_rate": 7.595488957790188e-06, "loss": 0.2156, "step": 4331 }, { "epoch": 0.35, "grad_norm": 1.4797845153123654, "learning_rate": 7.594381585770396e-06, "loss": 0.185, "step": 4332 }, { "epoch": 0.35, "grad_norm": 1.4181708315186943, "learning_rate": 7.593274039581908e-06, "loss": 0.1856, "step": 4333 }, { "epoch": 0.35, "grad_norm": 4.936770487574651, "learning_rate": 7.592166319299075e-06, "loss": 0.5457, "step": 4334 }, { "epoch": 0.35, "grad_norm": 1.5557926978635392, "learning_rate": 7.591058424996264e-06, "loss": 0.2604, "step": 4335 }, { "epoch": 0.35, "grad_norm": 1.412072310837266, "learning_rate": 7.5899503567478506e-06, "loss": 0.2563, "step": 4336 }, { "epoch": 0.35, "grad_norm": 1.387054261331683, "learning_rate": 7.5888421146282225e-06, "loss": 0.1954, "step": 4337 }, { "epoch": 0.35, "grad_norm": 1.2969096808661633, "learning_rate": 7.587733698711779e-06, "loss": 0.2059, "step": 4338 }, { "epoch": 0.35, "grad_norm": 1.429373153227334, "learning_rate": 7.586625109072931e-06, "loss": 0.1852, "step": 4339 }, { "epoch": 0.35, "grad_norm": 1.1825909849279925, "learning_rate": 7.585516345786103e-06, "loss": 0.152, "step": 4340 }, { "epoch": 0.35, "grad_norm": 1.2239825512306242, "learning_rate": 7.5844074089257295e-06, "loss": 0.1809, "step": 4341 }, { "epoch": 0.35, "grad_norm": 1.2915997510271018, "learning_rate": 7.583298298566255e-06, "loss": 0.1946, "step": 4342 }, { "epoch": 0.35, "grad_norm": 1.2181684015092504, "learning_rate": 7.582189014782139e-06, "loss": 0.2273, "step": 4343 }, { "epoch": 0.35, "grad_norm": 1.3142569522346708, "learning_rate": 7.581079557647853e-06, "loss": 0.1833, "step": 4344 }, { "epoch": 0.35, "grad_norm": 1.4453547020285573, "learning_rate": 7.5799699272378715e-06, "loss": 0.2333, "step": 4345 }, { "epoch": 0.35, "grad_norm": 1.2683677869451335, "learning_rate": 7.578860123626696e-06, "loss": 0.1546, "step": 4346 }, { "epoch": 0.35, "grad_norm": 1.5343711634776158, "learning_rate": 7.577750146888824e-06, "loss": 0.268, "step": 4347 }, { "epoch": 0.35, "grad_norm": 1.355791554163768, "learning_rate": 7.576639997098776e-06, "loss": 0.2431, "step": 4348 }, { "epoch": 0.35, "grad_norm": 5.460623424433426, "learning_rate": 7.575529674331077e-06, "loss": 0.6729, "step": 4349 }, { "epoch": 0.35, "grad_norm": 1.6035309037596033, "learning_rate": 7.574419178660269e-06, "loss": 0.2243, "step": 4350 }, { "epoch": 0.35, "grad_norm": 1.4166656432654923, "learning_rate": 7.573308510160899e-06, "loss": 0.2091, "step": 4351 }, { "epoch": 0.35, "grad_norm": 1.2395376282516506, "learning_rate": 7.572197668907533e-06, "loss": 0.1706, "step": 4352 }, { "epoch": 0.35, "grad_norm": 1.255248531298815, "learning_rate": 7.571086654974744e-06, "loss": 0.1996, "step": 4353 }, { "epoch": 0.35, "grad_norm": 1.4932687803936877, "learning_rate": 7.569975468437119e-06, "loss": 0.221, "step": 4354 }, { "epoch": 0.35, "grad_norm": 1.3402318856151096, "learning_rate": 7.568864109369252e-06, "loss": 0.1868, "step": 4355 }, { "epoch": 0.35, "grad_norm": 1.4762789770001095, "learning_rate": 7.567752577845755e-06, "loss": 0.2061, "step": 4356 }, { "epoch": 0.35, "grad_norm": 1.4009751628905103, "learning_rate": 7.566640873941248e-06, "loss": 0.2464, "step": 4357 }, { "epoch": 0.35, "grad_norm": 1.4578742350587837, "learning_rate": 7.565528997730362e-06, "loss": 0.2423, "step": 4358 }, { "epoch": 0.35, "grad_norm": 4.920170714618471, "learning_rate": 7.564416949287741e-06, "loss": 0.6145, "step": 4359 }, { "epoch": 0.35, "grad_norm": 1.5096005704088873, "learning_rate": 7.56330472868804e-06, "loss": 0.2188, "step": 4360 }, { "epoch": 0.35, "grad_norm": 1.381664120103105, "learning_rate": 7.562192336005927e-06, "loss": 0.2415, "step": 4361 }, { "epoch": 0.35, "grad_norm": 1.1842900794828364, "learning_rate": 7.561079771316078e-06, "loss": 0.1622, "step": 4362 }, { "epoch": 0.35, "grad_norm": 1.3626779383158645, "learning_rate": 7.559967034693186e-06, "loss": 0.1935, "step": 4363 }, { "epoch": 0.35, "grad_norm": 1.484710156623623, "learning_rate": 7.55885412621195e-06, "loss": 0.2022, "step": 4364 }, { "epoch": 0.35, "grad_norm": 1.1138028358137637, "learning_rate": 7.557741045947082e-06, "loss": 0.1543, "step": 4365 }, { "epoch": 0.35, "grad_norm": 1.2894063886904183, "learning_rate": 7.556627793973309e-06, "loss": 0.2424, "step": 4366 }, { "epoch": 0.35, "grad_norm": 1.5934110116631923, "learning_rate": 7.555514370365367e-06, "loss": 0.2367, "step": 4367 }, { "epoch": 0.35, "grad_norm": 1.2625960266815925, "learning_rate": 7.554400775198e-06, "loss": 0.1936, "step": 4368 }, { "epoch": 0.35, "grad_norm": 4.298927840330535, "learning_rate": 7.553287008545971e-06, "loss": 0.5638, "step": 4369 }, { "epoch": 0.35, "grad_norm": 1.323300025029743, "learning_rate": 7.552173070484048e-06, "loss": 0.2402, "step": 4370 }, { "epoch": 0.35, "grad_norm": 1.4177886523114012, "learning_rate": 7.551058961087016e-06, "loss": 0.2547, "step": 4371 }, { "epoch": 0.35, "grad_norm": 1.428894422623511, "learning_rate": 7.549944680429665e-06, "loss": 0.2318, "step": 4372 }, { "epoch": 0.35, "grad_norm": 1.4766800229153574, "learning_rate": 7.548830228586802e-06, "loss": 0.1845, "step": 4373 }, { "epoch": 0.35, "grad_norm": 1.2902147089384044, "learning_rate": 7.547715605633244e-06, "loss": 0.2143, "step": 4374 }, { "epoch": 0.35, "grad_norm": 1.3100434821869236, "learning_rate": 7.546600811643816e-06, "loss": 0.166, "step": 4375 }, { "epoch": 0.35, "grad_norm": 1.3688743154375775, "learning_rate": 7.54548584669336e-06, "loss": 0.1798, "step": 4376 }, { "epoch": 0.35, "grad_norm": 1.2147949320316396, "learning_rate": 7.544370710856728e-06, "loss": 0.1601, "step": 4377 }, { "epoch": 0.35, "grad_norm": 1.4781062551402164, "learning_rate": 7.54325540420878e-06, "loss": 0.1788, "step": 4378 }, { "epoch": 0.35, "grad_norm": 1.478746977300905, "learning_rate": 7.542139926824391e-06, "loss": 0.2496, "step": 4379 }, { "epoch": 0.35, "grad_norm": 1.4278261139750605, "learning_rate": 7.541024278778446e-06, "loss": 0.2083, "step": 4380 }, { "epoch": 0.35, "grad_norm": 1.3173428410391212, "learning_rate": 7.539908460145844e-06, "loss": 0.1957, "step": 4381 }, { "epoch": 0.35, "grad_norm": 1.5169638296411432, "learning_rate": 7.53879247100149e-06, "loss": 0.2372, "step": 4382 }, { "epoch": 0.35, "grad_norm": 1.246390859079121, "learning_rate": 7.537676311420305e-06, "loss": 0.1622, "step": 4383 }, { "epoch": 0.35, "grad_norm": 1.1803885613119514, "learning_rate": 7.536559981477221e-06, "loss": 0.1809, "step": 4384 }, { "epoch": 0.35, "grad_norm": 1.393438701477145, "learning_rate": 7.53544348124718e-06, "loss": 0.2102, "step": 4385 }, { "epoch": 0.35, "grad_norm": 1.3470319236418413, "learning_rate": 7.534326810805135e-06, "loss": 0.2042, "step": 4386 }, { "epoch": 0.35, "grad_norm": 1.2060190386125407, "learning_rate": 7.533209970226054e-06, "loss": 0.217, "step": 4387 }, { "epoch": 0.35, "grad_norm": 1.5594906111192852, "learning_rate": 7.532092959584912e-06, "loss": 0.2583, "step": 4388 }, { "epoch": 0.35, "grad_norm": 7.696617810064743, "learning_rate": 7.530975778956697e-06, "loss": 0.7041, "step": 4389 }, { "epoch": 0.35, "grad_norm": 1.2986880518500625, "learning_rate": 7.52985842841641e-06, "loss": 0.2131, "step": 4390 }, { "epoch": 0.35, "grad_norm": 1.2613738484088162, "learning_rate": 7.528740908039062e-06, "loss": 0.1975, "step": 4391 }, { "epoch": 0.35, "grad_norm": 1.3899847671131442, "learning_rate": 7.527623217899673e-06, "loss": 0.2284, "step": 4392 }, { "epoch": 0.35, "grad_norm": 1.3385992625893894, "learning_rate": 7.526505358073282e-06, "loss": 0.2255, "step": 4393 }, { "epoch": 0.35, "grad_norm": 11.70737911324868, "learning_rate": 7.525387328634929e-06, "loss": 0.5338, "step": 4394 }, { "epoch": 0.35, "grad_norm": 1.6002234435781306, "learning_rate": 7.524269129659674e-06, "loss": 0.2256, "step": 4395 }, { "epoch": 0.35, "grad_norm": 5.741025361112017, "learning_rate": 7.5231507612225814e-06, "loss": 0.5213, "step": 4396 }, { "epoch": 0.35, "grad_norm": 1.2689030575416727, "learning_rate": 7.522032223398736e-06, "loss": 0.1957, "step": 4397 }, { "epoch": 0.35, "grad_norm": 1.3010050325158051, "learning_rate": 7.520913516263229e-06, "loss": 0.1878, "step": 4398 }, { "epoch": 0.35, "grad_norm": 1.3775197801340422, "learning_rate": 7.519794639891153e-06, "loss": 0.2217, "step": 4399 }, { "epoch": 0.35, "grad_norm": 1.3418745999799693, "learning_rate": 7.5186755943576324e-06, "loss": 0.2221, "step": 4400 }, { "epoch": 0.35, "grad_norm": 5.041694730428965, "learning_rate": 7.517556379737787e-06, "loss": 0.6515, "step": 4401 }, { "epoch": 0.35, "grad_norm": 1.6790298924946974, "learning_rate": 7.5164369961067555e-06, "loss": 0.2029, "step": 4402 }, { "epoch": 0.35, "grad_norm": 1.550829115945779, "learning_rate": 7.51531744353968e-06, "loss": 0.1829, "step": 4403 }, { "epoch": 0.35, "grad_norm": 1.275127835390056, "learning_rate": 7.514197722111727e-06, "loss": 0.1653, "step": 4404 }, { "epoch": 0.35, "grad_norm": 4.4518442474388475, "learning_rate": 7.5130778318980614e-06, "loss": 0.6706, "step": 4405 }, { "epoch": 0.35, "grad_norm": 1.5544527304712086, "learning_rate": 7.511957772973868e-06, "loss": 0.2302, "step": 4406 }, { "epoch": 0.35, "grad_norm": 1.0893096500926531, "learning_rate": 7.5108375454143375e-06, "loss": 0.1694, "step": 4407 }, { "epoch": 0.35, "grad_norm": 1.2604724638638392, "learning_rate": 7.509717149294675e-06, "loss": 0.19, "step": 4408 }, { "epoch": 0.35, "grad_norm": 1.5297797021274266, "learning_rate": 7.508596584690096e-06, "loss": 0.2385, "step": 4409 }, { "epoch": 0.35, "grad_norm": 1.2810236237157906, "learning_rate": 7.5074758516758276e-06, "loss": 0.1811, "step": 4410 }, { "epoch": 0.35, "grad_norm": 1.2863407868743817, "learning_rate": 7.506354950327108e-06, "loss": 0.2402, "step": 4411 }, { "epoch": 0.35, "grad_norm": 1.3715753637275603, "learning_rate": 7.5052338807191875e-06, "loss": 0.199, "step": 4412 }, { "epoch": 0.35, "grad_norm": 1.3348736405952355, "learning_rate": 7.504112642927324e-06, "loss": 0.1711, "step": 4413 }, { "epoch": 0.35, "grad_norm": 1.632058193267838, "learning_rate": 7.502991237026794e-06, "loss": 0.1985, "step": 4414 }, { "epoch": 0.35, "grad_norm": 1.4867185206000277, "learning_rate": 7.501869663092875e-06, "loss": 0.2013, "step": 4415 }, { "epoch": 0.35, "grad_norm": 1.500792796518845, "learning_rate": 7.5007479212008685e-06, "loss": 0.1965, "step": 4416 }, { "epoch": 0.35, "grad_norm": 1.432326021842077, "learning_rate": 7.499626011426077e-06, "loss": 0.2196, "step": 4417 }, { "epoch": 0.35, "grad_norm": 1.3485624477554385, "learning_rate": 7.498503933843816e-06, "loss": 0.1963, "step": 4418 }, { "epoch": 0.35, "grad_norm": 1.5399879571735526, "learning_rate": 7.497381688529417e-06, "loss": 0.2517, "step": 4419 }, { "epoch": 0.35, "grad_norm": 1.2981810327993155, "learning_rate": 7.496259275558218e-06, "loss": 0.2044, "step": 4420 }, { "epoch": 0.35, "grad_norm": 1.3588820883825423, "learning_rate": 7.4951366950055695e-06, "loss": 0.1934, "step": 4421 }, { "epoch": 0.35, "grad_norm": 1.249244737874179, "learning_rate": 7.4940139469468364e-06, "loss": 0.1831, "step": 4422 }, { "epoch": 0.35, "grad_norm": 5.4673879008021204, "learning_rate": 7.49289103145739e-06, "loss": 0.7449, "step": 4423 }, { "epoch": 0.35, "grad_norm": 1.3034845124653658, "learning_rate": 7.491767948612616e-06, "loss": 0.1974, "step": 4424 }, { "epoch": 0.35, "grad_norm": 1.1637042439452236, "learning_rate": 7.490644698487909e-06, "loss": 0.1844, "step": 4425 }, { "epoch": 0.35, "grad_norm": 1.4068526917031556, "learning_rate": 7.489521281158677e-06, "loss": 0.1695, "step": 4426 }, { "epoch": 0.35, "grad_norm": 1.132983317815409, "learning_rate": 7.488397696700338e-06, "loss": 0.1771, "step": 4427 }, { "epoch": 0.35, "grad_norm": 1.4014519145012532, "learning_rate": 7.487273945188323e-06, "loss": 0.2177, "step": 4428 }, { "epoch": 0.35, "grad_norm": 1.2796188291075352, "learning_rate": 7.486150026698071e-06, "loss": 0.2217, "step": 4429 }, { "epoch": 0.35, "grad_norm": 1.323096300133733, "learning_rate": 7.485025941305036e-06, "loss": 0.1713, "step": 4430 }, { "epoch": 0.35, "grad_norm": 1.4254883352883738, "learning_rate": 7.48390168908468e-06, "loss": 0.2323, "step": 4431 }, { "epoch": 0.35, "grad_norm": 1.415031413768985, "learning_rate": 7.482777270112477e-06, "loss": 0.1785, "step": 4432 }, { "epoch": 0.35, "grad_norm": 1.4311722680785817, "learning_rate": 7.481652684463914e-06, "loss": 0.2344, "step": 4433 }, { "epoch": 0.35, "grad_norm": 1.3419537919599347, "learning_rate": 7.480527932214487e-06, "loss": 0.1723, "step": 4434 }, { "epoch": 0.35, "grad_norm": 1.3861655832970259, "learning_rate": 7.4794030134397055e-06, "loss": 0.19, "step": 4435 }, { "epoch": 0.35, "grad_norm": 1.5363110987871995, "learning_rate": 7.478277928215085e-06, "loss": 0.2127, "step": 4436 }, { "epoch": 0.35, "grad_norm": 1.3468114136713882, "learning_rate": 7.47715267661616e-06, "loss": 0.216, "step": 4437 }, { "epoch": 0.36, "grad_norm": 1.397027404279438, "learning_rate": 7.476027258718469e-06, "loss": 0.2355, "step": 4438 }, { "epoch": 0.36, "grad_norm": 1.3836591345656608, "learning_rate": 7.4749016745975675e-06, "loss": 0.2317, "step": 4439 }, { "epoch": 0.36, "grad_norm": 1.3216636414724485, "learning_rate": 7.473775924329018e-06, "loss": 0.1963, "step": 4440 }, { "epoch": 0.36, "grad_norm": 1.476499902791249, "learning_rate": 7.4726500079883935e-06, "loss": 0.1972, "step": 4441 }, { "epoch": 0.36, "grad_norm": 1.4891834781552717, "learning_rate": 7.471523925651285e-06, "loss": 0.2504, "step": 4442 }, { "epoch": 0.36, "grad_norm": 1.5221223543218547, "learning_rate": 7.4703976773932855e-06, "loss": 0.2362, "step": 4443 }, { "epoch": 0.36, "grad_norm": 1.3954100546000903, "learning_rate": 7.469271263290003e-06, "loss": 0.177, "step": 4444 }, { "epoch": 0.36, "grad_norm": 1.3337702311794086, "learning_rate": 7.468144683417061e-06, "loss": 0.2635, "step": 4445 }, { "epoch": 0.36, "grad_norm": 1.3478397701041323, "learning_rate": 7.467017937850088e-06, "loss": 0.21, "step": 4446 }, { "epoch": 0.36, "grad_norm": 10.042519453332044, "learning_rate": 7.465891026664726e-06, "loss": 0.5139, "step": 4447 }, { "epoch": 0.36, "grad_norm": 1.3536591577352095, "learning_rate": 7.4647639499366275e-06, "loss": 0.1852, "step": 4448 }, { "epoch": 0.36, "grad_norm": 1.467150668110183, "learning_rate": 7.463636707741458e-06, "loss": 0.2346, "step": 4449 }, { "epoch": 0.36, "grad_norm": 1.294432452525009, "learning_rate": 7.462509300154892e-06, "loss": 0.1964, "step": 4450 }, { "epoch": 0.36, "grad_norm": 1.1554747495352153, "learning_rate": 7.461381727252615e-06, "loss": 0.1698, "step": 4451 }, { "epoch": 0.36, "grad_norm": 1.4096047780387313, "learning_rate": 7.4602539891103254e-06, "loss": 0.2462, "step": 4452 }, { "epoch": 0.36, "grad_norm": 1.3405073739596516, "learning_rate": 7.459126085803731e-06, "loss": 0.219, "step": 4453 }, { "epoch": 0.36, "grad_norm": 1.221749352731346, "learning_rate": 7.4579980174085506e-06, "loss": 0.1588, "step": 4454 }, { "epoch": 0.36, "grad_norm": 1.3178386026120088, "learning_rate": 7.456869784000517e-06, "loss": 0.2234, "step": 4455 }, { "epoch": 0.36, "grad_norm": 1.3544038502193925, "learning_rate": 7.455741385655371e-06, "loss": 0.186, "step": 4456 }, { "epoch": 0.36, "grad_norm": 1.3475199779534466, "learning_rate": 7.454612822448865e-06, "loss": 0.1945, "step": 4457 }, { "epoch": 0.36, "grad_norm": 1.3011295094363582, "learning_rate": 7.453484094456761e-06, "loss": 0.1825, "step": 4458 }, { "epoch": 0.36, "grad_norm": 5.213867702758694, "learning_rate": 7.45235520175484e-06, "loss": 0.4681, "step": 4459 }, { "epoch": 0.36, "grad_norm": 1.306280923665334, "learning_rate": 7.4512261444188805e-06, "loss": 0.1911, "step": 4460 }, { "epoch": 0.36, "grad_norm": 1.3994551049174764, "learning_rate": 7.450096922524683e-06, "loss": 0.2697, "step": 4461 }, { "epoch": 0.36, "grad_norm": 5.933099316204779, "learning_rate": 7.448967536148057e-06, "loss": 0.5556, "step": 4462 }, { "epoch": 0.36, "grad_norm": 1.231002763921294, "learning_rate": 7.447837985364821e-06, "loss": 0.1646, "step": 4463 }, { "epoch": 0.36, "grad_norm": 4.8783328863749436, "learning_rate": 7.446708270250803e-06, "loss": 0.609, "step": 4464 }, { "epoch": 0.36, "grad_norm": 1.307839553343257, "learning_rate": 7.445578390881846e-06, "loss": 0.1977, "step": 4465 }, { "epoch": 0.36, "grad_norm": 1.6286465022839878, "learning_rate": 7.444448347333802e-06, "loss": 0.1956, "step": 4466 }, { "epoch": 0.36, "grad_norm": 1.3541853826039754, "learning_rate": 7.4433181396825334e-06, "loss": 0.236, "step": 4467 }, { "epoch": 0.36, "grad_norm": 1.3256808977320573, "learning_rate": 7.442187768003916e-06, "loss": 0.2146, "step": 4468 }, { "epoch": 0.36, "grad_norm": 1.6127973932410316, "learning_rate": 7.441057232373834e-06, "loss": 0.2438, "step": 4469 }, { "epoch": 0.36, "grad_norm": 1.289192808470553, "learning_rate": 7.439926532868183e-06, "loss": 0.1672, "step": 4470 }, { "epoch": 0.36, "grad_norm": 1.3835273122625376, "learning_rate": 7.438795669562873e-06, "loss": 0.2173, "step": 4471 }, { "epoch": 0.36, "grad_norm": 1.2330883130982486, "learning_rate": 7.437664642533817e-06, "loss": 0.1971, "step": 4472 }, { "epoch": 0.36, "grad_norm": 1.33202662008548, "learning_rate": 7.436533451856951e-06, "loss": 0.2005, "step": 4473 }, { "epoch": 0.36, "grad_norm": 1.3826294644597923, "learning_rate": 7.435402097608211e-06, "loss": 0.2169, "step": 4474 }, { "epoch": 0.36, "grad_norm": 1.4883545797424773, "learning_rate": 7.434270579863549e-06, "loss": 0.241, "step": 4475 }, { "epoch": 0.36, "grad_norm": 1.2649737180228018, "learning_rate": 7.433138898698927e-06, "loss": 0.1761, "step": 4476 }, { "epoch": 0.36, "grad_norm": 1.378872533239398, "learning_rate": 7.43200705419032e-06, "loss": 0.2255, "step": 4477 }, { "epoch": 0.36, "grad_norm": 1.332321703964259, "learning_rate": 7.430875046413709e-06, "loss": 0.1996, "step": 4478 }, { "epoch": 0.36, "grad_norm": 1.1082217396660838, "learning_rate": 7.429742875445092e-06, "loss": 0.1566, "step": 4479 }, { "epoch": 0.36, "grad_norm": 1.2631845816238365, "learning_rate": 7.428610541360475e-06, "loss": 0.1538, "step": 4480 }, { "epoch": 0.36, "grad_norm": 1.288802401433073, "learning_rate": 7.427478044235872e-06, "loss": 0.1967, "step": 4481 }, { "epoch": 0.36, "grad_norm": 1.37517899911686, "learning_rate": 7.426345384147313e-06, "loss": 0.2017, "step": 4482 }, { "epoch": 0.36, "grad_norm": 1.2862295081156347, "learning_rate": 7.425212561170838e-06, "loss": 0.1734, "step": 4483 }, { "epoch": 0.36, "grad_norm": 1.262494639198926, "learning_rate": 7.424079575382495e-06, "loss": 0.1633, "step": 4484 }, { "epoch": 0.36, "grad_norm": 5.60514519373858, "learning_rate": 7.422946426858346e-06, "loss": 0.6855, "step": 4485 }, { "epoch": 0.36, "grad_norm": 1.282869454889024, "learning_rate": 7.421813115674461e-06, "loss": 0.1988, "step": 4486 }, { "epoch": 0.36, "grad_norm": 1.4138422959112926, "learning_rate": 7.420679641906925e-06, "loss": 0.1994, "step": 4487 }, { "epoch": 0.36, "grad_norm": 1.405506923011502, "learning_rate": 7.4195460056318304e-06, "loss": 0.1993, "step": 4488 }, { "epoch": 0.36, "grad_norm": 1.2642286165144958, "learning_rate": 7.41841220692528e-06, "loss": 0.1658, "step": 4489 }, { "epoch": 0.36, "grad_norm": 1.3616416866587953, "learning_rate": 7.417278245863391e-06, "loss": 0.211, "step": 4490 }, { "epoch": 0.36, "grad_norm": 1.3285358714578297, "learning_rate": 7.4161441225222905e-06, "loss": 0.1575, "step": 4491 }, { "epoch": 0.36, "grad_norm": 1.4567799596116144, "learning_rate": 7.415009836978113e-06, "loss": 0.2361, "step": 4492 }, { "epoch": 0.36, "grad_norm": 1.5233089215006175, "learning_rate": 7.413875389307008e-06, "loss": 0.198, "step": 4493 }, { "epoch": 0.36, "grad_norm": 1.3885142700008979, "learning_rate": 7.412740779585137e-06, "loss": 0.2041, "step": 4494 }, { "epoch": 0.36, "grad_norm": 1.2577220357179733, "learning_rate": 7.411606007888665e-06, "loss": 0.1737, "step": 4495 }, { "epoch": 0.36, "grad_norm": 1.3335098064591036, "learning_rate": 7.410471074293774e-06, "loss": 0.2097, "step": 4496 }, { "epoch": 0.36, "grad_norm": 1.1894209347843252, "learning_rate": 7.409335978876657e-06, "loss": 0.2024, "step": 4497 }, { "epoch": 0.36, "grad_norm": 1.298540103583728, "learning_rate": 7.408200721713517e-06, "loss": 0.1924, "step": 4498 }, { "epoch": 0.36, "grad_norm": 1.3886252595589323, "learning_rate": 7.407065302880564e-06, "loss": 0.2372, "step": 4499 }, { "epoch": 0.36, "grad_norm": 1.398388547903395, "learning_rate": 7.405929722454026e-06, "loss": 0.1855, "step": 4500 }, { "epoch": 0.36, "grad_norm": 1.52892238362788, "learning_rate": 7.404793980510135e-06, "loss": 0.1994, "step": 4501 }, { "epoch": 0.36, "grad_norm": 1.3153486417070632, "learning_rate": 7.4036580771251375e-06, "loss": 0.1828, "step": 4502 }, { "epoch": 0.36, "grad_norm": 1.2497810285844588, "learning_rate": 7.402522012375292e-06, "loss": 0.2037, "step": 4503 }, { "epoch": 0.36, "grad_norm": 1.5388946422378202, "learning_rate": 7.401385786336863e-06, "loss": 0.257, "step": 4504 }, { "epoch": 0.36, "grad_norm": 1.3151955903898773, "learning_rate": 7.4002493990861314e-06, "loss": 0.209, "step": 4505 }, { "epoch": 0.36, "grad_norm": 1.412572195019964, "learning_rate": 7.399112850699384e-06, "loss": 0.1998, "step": 4506 }, { "epoch": 0.36, "grad_norm": 1.4964431110948937, "learning_rate": 7.397976141252923e-06, "loss": 0.244, "step": 4507 }, { "epoch": 0.36, "grad_norm": 1.4186408474924375, "learning_rate": 7.396839270823057e-06, "loss": 0.2129, "step": 4508 }, { "epoch": 0.36, "grad_norm": 1.5772421167161794, "learning_rate": 7.395702239486109e-06, "loss": 0.2617, "step": 4509 }, { "epoch": 0.36, "grad_norm": 1.471832027951409, "learning_rate": 7.39456504731841e-06, "loss": 0.2346, "step": 4510 }, { "epoch": 0.36, "grad_norm": 1.3487777087599147, "learning_rate": 7.3934276943963065e-06, "loss": 0.2483, "step": 4511 }, { "epoch": 0.36, "grad_norm": 1.1724074669821147, "learning_rate": 7.39229018079615e-06, "loss": 0.1791, "step": 4512 }, { "epoch": 0.36, "grad_norm": 1.4691126837807114, "learning_rate": 7.391152506594304e-06, "loss": 0.2012, "step": 4513 }, { "epoch": 0.36, "grad_norm": 1.121554013558425, "learning_rate": 7.390014671867145e-06, "loss": 0.1788, "step": 4514 }, { "epoch": 0.36, "grad_norm": 1.5676402516846408, "learning_rate": 7.3888766766910605e-06, "loss": 0.2129, "step": 4515 }, { "epoch": 0.36, "grad_norm": 1.4040702081770413, "learning_rate": 7.387738521142445e-06, "loss": 0.2457, "step": 4516 }, { "epoch": 0.36, "grad_norm": 1.4514816758232465, "learning_rate": 7.3866002052977096e-06, "loss": 0.2072, "step": 4517 }, { "epoch": 0.36, "grad_norm": 1.454291681952188, "learning_rate": 7.385461729233271e-06, "loss": 0.2487, "step": 4518 }, { "epoch": 0.36, "grad_norm": 1.3710475888999527, "learning_rate": 7.384323093025558e-06, "loss": 0.2349, "step": 4519 }, { "epoch": 0.36, "grad_norm": 1.5358186265975866, "learning_rate": 7.383184296751014e-06, "loss": 0.215, "step": 4520 }, { "epoch": 0.36, "grad_norm": 3.744461193022722, "learning_rate": 7.382045340486086e-06, "loss": 0.5168, "step": 4521 }, { "epoch": 0.36, "grad_norm": 1.2789260824127995, "learning_rate": 7.380906224307236e-06, "loss": 0.1946, "step": 4522 }, { "epoch": 0.36, "grad_norm": 1.3433354534259556, "learning_rate": 7.379766948290937e-06, "loss": 0.1858, "step": 4523 }, { "epoch": 0.36, "grad_norm": 1.279715167027864, "learning_rate": 7.3786275125136745e-06, "loss": 0.1659, "step": 4524 }, { "epoch": 0.36, "grad_norm": 1.1274172792952344, "learning_rate": 7.3774879170519386e-06, "loss": 0.1359, "step": 4525 }, { "epoch": 0.36, "grad_norm": 1.2365332901154997, "learning_rate": 7.376348161982236e-06, "loss": 0.1399, "step": 4526 }, { "epoch": 0.36, "grad_norm": 1.1202514511877528, "learning_rate": 7.37520824738108e-06, "loss": 0.1812, "step": 4527 }, { "epoch": 0.36, "grad_norm": 1.2439707639750857, "learning_rate": 7.374068173325e-06, "loss": 0.1781, "step": 4528 }, { "epoch": 0.36, "grad_norm": 1.3935584089382431, "learning_rate": 7.37292793989053e-06, "loss": 0.1785, "step": 4529 }, { "epoch": 0.36, "grad_norm": 1.2242613495163874, "learning_rate": 7.371787547154215e-06, "loss": 0.1761, "step": 4530 }, { "epoch": 0.36, "grad_norm": 1.2769778507428013, "learning_rate": 7.370646995192618e-06, "loss": 0.1771, "step": 4531 }, { "epoch": 0.36, "grad_norm": 4.816999362916801, "learning_rate": 7.369506284082306e-06, "loss": 0.5785, "step": 4532 }, { "epoch": 0.36, "grad_norm": 1.3920909405915207, "learning_rate": 7.3683654138998576e-06, "loss": 0.1887, "step": 4533 }, { "epoch": 0.36, "grad_norm": 1.289094729493865, "learning_rate": 7.367224384721861e-06, "loss": 0.1824, "step": 4534 }, { "epoch": 0.36, "grad_norm": 1.374885122074753, "learning_rate": 7.36608319662492e-06, "loss": 0.1966, "step": 4535 }, { "epoch": 0.36, "grad_norm": 1.353939476108601, "learning_rate": 7.364941849685646e-06, "loss": 0.2219, "step": 4536 }, { "epoch": 0.36, "grad_norm": 1.54695126040923, "learning_rate": 7.363800343980659e-06, "loss": 0.2192, "step": 4537 }, { "epoch": 0.36, "grad_norm": 1.4553934383941032, "learning_rate": 7.3626586795865936e-06, "loss": 0.2387, "step": 4538 }, { "epoch": 0.36, "grad_norm": 1.3398652503622337, "learning_rate": 7.361516856580093e-06, "loss": 0.1965, "step": 4539 }, { "epoch": 0.36, "grad_norm": 1.239725004914783, "learning_rate": 7.36037487503781e-06, "loss": 0.1796, "step": 4540 }, { "epoch": 0.36, "grad_norm": 1.3907444317559783, "learning_rate": 7.35923273503641e-06, "loss": 0.2056, "step": 4541 }, { "epoch": 0.36, "grad_norm": 4.790279110492094, "learning_rate": 7.358090436652568e-06, "loss": 0.7062, "step": 4542 }, { "epoch": 0.36, "grad_norm": 1.3839464487762783, "learning_rate": 7.356947979962972e-06, "loss": 0.2289, "step": 4543 }, { "epoch": 0.36, "grad_norm": 1.6018934204000703, "learning_rate": 7.355805365044314e-06, "loss": 0.2699, "step": 4544 }, { "epoch": 0.36, "grad_norm": 5.131398515008712, "learning_rate": 7.3546625919733065e-06, "loss": 0.6112, "step": 4545 }, { "epoch": 0.36, "grad_norm": 1.4232478849224517, "learning_rate": 7.353519660826665e-06, "loss": 0.2216, "step": 4546 }, { "epoch": 0.36, "grad_norm": 1.4422033385752677, "learning_rate": 7.352376571681114e-06, "loss": 0.2172, "step": 4547 }, { "epoch": 0.36, "grad_norm": 1.401986030227101, "learning_rate": 7.3512333246134e-06, "loss": 0.1858, "step": 4548 }, { "epoch": 0.36, "grad_norm": 1.2844177905785152, "learning_rate": 7.350089919700268e-06, "loss": 0.2014, "step": 4549 }, { "epoch": 0.36, "grad_norm": 1.3023427377276269, "learning_rate": 7.348946357018479e-06, "loss": 0.2172, "step": 4550 }, { "epoch": 0.36, "grad_norm": 1.2719449461518417, "learning_rate": 7.347802636644804e-06, "loss": 0.1875, "step": 4551 }, { "epoch": 0.36, "grad_norm": 1.427846722096851, "learning_rate": 7.3466587586560245e-06, "loss": 0.2561, "step": 4552 }, { "epoch": 0.36, "grad_norm": 1.380321257060329, "learning_rate": 7.345514723128932e-06, "loss": 0.2049, "step": 4553 }, { "epoch": 0.36, "grad_norm": 1.3924644565521185, "learning_rate": 7.344370530140331e-06, "loss": 0.187, "step": 4554 }, { "epoch": 0.36, "grad_norm": 1.3426902500988527, "learning_rate": 7.343226179767034e-06, "loss": 0.2185, "step": 4555 }, { "epoch": 0.36, "grad_norm": 1.4944494104212183, "learning_rate": 7.342081672085861e-06, "loss": 0.25, "step": 4556 }, { "epoch": 0.36, "grad_norm": 1.5009986963432371, "learning_rate": 7.340937007173651e-06, "loss": 0.2286, "step": 4557 }, { "epoch": 0.36, "grad_norm": 1.3147383534369999, "learning_rate": 7.339792185107245e-06, "loss": 0.2548, "step": 4558 }, { "epoch": 0.36, "grad_norm": 4.721786663233292, "learning_rate": 7.338647205963502e-06, "loss": 0.5509, "step": 4559 }, { "epoch": 0.36, "grad_norm": 1.3787574653497223, "learning_rate": 7.337502069819285e-06, "loss": 0.2197, "step": 4560 }, { "epoch": 0.36, "grad_norm": 5.66993794136623, "learning_rate": 7.336356776751472e-06, "loss": 0.6364, "step": 4561 }, { "epoch": 0.36, "grad_norm": 1.6200029166625864, "learning_rate": 7.335211326836951e-06, "loss": 0.2453, "step": 4562 }, { "epoch": 0.37, "grad_norm": 1.3473429751541997, "learning_rate": 7.334065720152617e-06, "loss": 0.2371, "step": 4563 }, { "epoch": 0.37, "grad_norm": 1.296567216967196, "learning_rate": 7.3329199567753796e-06, "loss": 0.2246, "step": 4564 }, { "epoch": 0.37, "grad_norm": 1.4202835188076974, "learning_rate": 7.331774036782158e-06, "loss": 0.2257, "step": 4565 }, { "epoch": 0.37, "grad_norm": 4.966647827600356, "learning_rate": 7.3306279602498785e-06, "loss": 0.5718, "step": 4566 }, { "epoch": 0.37, "grad_norm": 1.2209317147261978, "learning_rate": 7.329481727255484e-06, "loss": 0.2024, "step": 4567 }, { "epoch": 0.37, "grad_norm": 1.4261913915726283, "learning_rate": 7.328335337875921e-06, "loss": 0.169, "step": 4568 }, { "epoch": 0.37, "grad_norm": 4.076045592292064, "learning_rate": 7.327188792188155e-06, "loss": 0.6056, "step": 4569 }, { "epoch": 0.37, "grad_norm": 1.2241143578854154, "learning_rate": 7.326042090269152e-06, "loss": 0.209, "step": 4570 }, { "epoch": 0.37, "grad_norm": 1.3288193003278035, "learning_rate": 7.324895232195898e-06, "loss": 0.2081, "step": 4571 }, { "epoch": 0.37, "grad_norm": 1.4444692075856738, "learning_rate": 7.32374821804538e-06, "loss": 0.2201, "step": 4572 }, { "epoch": 0.37, "grad_norm": 1.3493685407625815, "learning_rate": 7.322601047894607e-06, "loss": 0.2055, "step": 4573 }, { "epoch": 0.37, "grad_norm": 1.3575542573181467, "learning_rate": 7.321453721820586e-06, "loss": 0.1771, "step": 4574 }, { "epoch": 0.37, "grad_norm": 1.301417514470607, "learning_rate": 7.320306239900343e-06, "loss": 0.1945, "step": 4575 }, { "epoch": 0.37, "grad_norm": 1.6454748602880356, "learning_rate": 7.319158602210913e-06, "loss": 0.2267, "step": 4576 }, { "epoch": 0.37, "grad_norm": 5.128095455240721, "learning_rate": 7.318010808829339e-06, "loss": 0.6972, "step": 4577 }, { "epoch": 0.37, "grad_norm": 6.781483942919324, "learning_rate": 7.316862859832675e-06, "loss": 0.5377, "step": 4578 }, { "epoch": 0.37, "grad_norm": 1.2912530661845676, "learning_rate": 7.315714755297988e-06, "loss": 0.1943, "step": 4579 }, { "epoch": 0.37, "grad_norm": 1.401066836588219, "learning_rate": 7.314566495302353e-06, "loss": 0.2702, "step": 4580 }, { "epoch": 0.37, "grad_norm": 1.2645527815254818, "learning_rate": 7.313418079922858e-06, "loss": 0.1811, "step": 4581 }, { "epoch": 0.37, "grad_norm": 1.7328561206365583, "learning_rate": 7.3122695092365955e-06, "loss": 0.2325, "step": 4582 }, { "epoch": 0.37, "grad_norm": 1.463693038641647, "learning_rate": 7.311120783320676e-06, "loss": 0.2039, "step": 4583 }, { "epoch": 0.37, "grad_norm": 1.2337237203382514, "learning_rate": 7.309971902252216e-06, "loss": 0.2036, "step": 4584 }, { "epoch": 0.37, "grad_norm": 1.4682135718410099, "learning_rate": 7.308822866108343e-06, "loss": 0.235, "step": 4585 }, { "epoch": 0.37, "grad_norm": 1.4977696648088044, "learning_rate": 7.307673674966196e-06, "loss": 0.2319, "step": 4586 }, { "epoch": 0.37, "grad_norm": 1.2840219102661012, "learning_rate": 7.306524328902923e-06, "loss": 0.1814, "step": 4587 }, { "epoch": 0.37, "grad_norm": 1.2594684362184525, "learning_rate": 7.305374827995684e-06, "loss": 0.1915, "step": 4588 }, { "epoch": 0.37, "grad_norm": 1.5247346409080598, "learning_rate": 7.304225172321646e-06, "loss": 0.2003, "step": 4589 }, { "epoch": 0.37, "grad_norm": 16.94837189189484, "learning_rate": 7.303075361957992e-06, "loss": 0.6169, "step": 4590 }, { "epoch": 0.37, "grad_norm": 1.3024394632314393, "learning_rate": 7.301925396981912e-06, "loss": 0.2076, "step": 4591 }, { "epoch": 0.37, "grad_norm": 1.4407981513835169, "learning_rate": 7.300775277470602e-06, "loss": 0.1901, "step": 4592 }, { "epoch": 0.37, "grad_norm": 4.603719147363319, "learning_rate": 7.29962500350128e-06, "loss": 0.6748, "step": 4593 }, { "epoch": 0.37, "grad_norm": 1.46696115351919, "learning_rate": 7.298474575151162e-06, "loss": 0.2155, "step": 4594 }, { "epoch": 0.37, "grad_norm": 1.3030749933965713, "learning_rate": 7.297323992497483e-06, "loss": 0.1432, "step": 4595 }, { "epoch": 0.37, "grad_norm": 1.3259812525285162, "learning_rate": 7.2961732556174826e-06, "loss": 0.1954, "step": 4596 }, { "epoch": 0.37, "grad_norm": 8.141472081338822, "learning_rate": 7.295022364588414e-06, "loss": 0.6461, "step": 4597 }, { "epoch": 0.37, "grad_norm": 1.2892807824588552, "learning_rate": 7.293871319487542e-06, "loss": 0.1816, "step": 4598 }, { "epoch": 0.37, "grad_norm": 1.4347591466152476, "learning_rate": 7.292720120392137e-06, "loss": 0.2466, "step": 4599 }, { "epoch": 0.37, "grad_norm": 5.386323116832039, "learning_rate": 7.291568767379484e-06, "loss": 0.5947, "step": 4600 }, { "epoch": 0.37, "grad_norm": 5.9477206071544195, "learning_rate": 7.290417260526877e-06, "loss": 0.6105, "step": 4601 }, { "epoch": 0.37, "grad_norm": 1.4981837006776133, "learning_rate": 7.289265599911619e-06, "loss": 0.1852, "step": 4602 }, { "epoch": 0.37, "grad_norm": 1.4307276165576726, "learning_rate": 7.288113785611025e-06, "loss": 0.2225, "step": 4603 }, { "epoch": 0.37, "grad_norm": 1.2524610704747874, "learning_rate": 7.2869618177024204e-06, "loss": 0.1617, "step": 4604 }, { "epoch": 0.37, "grad_norm": 1.468193183133097, "learning_rate": 7.2858096962631395e-06, "loss": 0.1569, "step": 4605 }, { "epoch": 0.37, "grad_norm": 1.2935021472727608, "learning_rate": 7.284657421370528e-06, "loss": 0.2136, "step": 4606 }, { "epoch": 0.37, "grad_norm": 3.8583769118854763, "learning_rate": 7.283504993101942e-06, "loss": 0.3823, "step": 4607 }, { "epoch": 0.37, "grad_norm": 1.501302182949338, "learning_rate": 7.282352411534747e-06, "loss": 0.2129, "step": 4608 }, { "epoch": 0.37, "grad_norm": 1.401695867666399, "learning_rate": 7.281199676746318e-06, "loss": 0.2456, "step": 4609 }, { "epoch": 0.37, "grad_norm": 1.3906372708228258, "learning_rate": 7.280046788814045e-06, "loss": 0.194, "step": 4610 }, { "epoch": 0.37, "grad_norm": 1.4424034482372718, "learning_rate": 7.278893747815321e-06, "loss": 0.2461, "step": 4611 }, { "epoch": 0.37, "grad_norm": 1.4343741351432249, "learning_rate": 7.277740553827556e-06, "loss": 0.2205, "step": 4612 }, { "epoch": 0.37, "grad_norm": 1.5053663128084247, "learning_rate": 7.2765872069281655e-06, "loss": 0.2181, "step": 4613 }, { "epoch": 0.37, "grad_norm": 1.3327550329491824, "learning_rate": 7.275433707194579e-06, "loss": 0.2259, "step": 4614 }, { "epoch": 0.37, "grad_norm": 6.489346800317658, "learning_rate": 7.274280054704232e-06, "loss": 0.6711, "step": 4615 }, { "epoch": 0.37, "grad_norm": 1.3095953620270868, "learning_rate": 7.273126249534576e-06, "loss": 0.2013, "step": 4616 }, { "epoch": 0.37, "grad_norm": 1.765202329043797, "learning_rate": 7.271972291763066e-06, "loss": 0.2085, "step": 4617 }, { "epoch": 0.37, "grad_norm": 1.309552408526066, "learning_rate": 7.270818181467174e-06, "loss": 0.2327, "step": 4618 }, { "epoch": 0.37, "grad_norm": 1.369767798704067, "learning_rate": 7.269663918724376e-06, "loss": 0.1949, "step": 4619 }, { "epoch": 0.37, "grad_norm": 1.2143494109955377, "learning_rate": 7.268509503612162e-06, "loss": 0.2107, "step": 4620 }, { "epoch": 0.37, "grad_norm": 1.4333956883802197, "learning_rate": 7.267354936208032e-06, "loss": 0.2305, "step": 4621 }, { "epoch": 0.37, "grad_norm": 1.5204917150270796, "learning_rate": 7.266200216589495e-06, "loss": 0.211, "step": 4622 }, { "epoch": 0.37, "grad_norm": 1.3521035931488967, "learning_rate": 7.265045344834072e-06, "loss": 0.2046, "step": 4623 }, { "epoch": 0.37, "grad_norm": 1.4821187381043048, "learning_rate": 7.263890321019291e-06, "loss": 0.2448, "step": 4624 }, { "epoch": 0.37, "grad_norm": 13.472179561909549, "learning_rate": 7.262735145222696e-06, "loss": 0.6137, "step": 4625 }, { "epoch": 0.37, "grad_norm": 1.348783700749346, "learning_rate": 7.2615798175218325e-06, "loss": 0.2154, "step": 4626 }, { "epoch": 0.37, "grad_norm": 1.4442483710464578, "learning_rate": 7.260424337994263e-06, "loss": 0.1952, "step": 4627 }, { "epoch": 0.37, "grad_norm": 1.4294802902806512, "learning_rate": 7.2592687067175605e-06, "loss": 0.2687, "step": 4628 }, { "epoch": 0.37, "grad_norm": 1.404617299354495, "learning_rate": 7.2581129237693045e-06, "loss": 0.2042, "step": 4629 }, { "epoch": 0.37, "grad_norm": 1.3925394151137753, "learning_rate": 7.256956989227084e-06, "loss": 0.2181, "step": 4630 }, { "epoch": 0.37, "grad_norm": 1.3489965299530309, "learning_rate": 7.255800903168505e-06, "loss": 0.1824, "step": 4631 }, { "epoch": 0.37, "grad_norm": 1.4067110607303004, "learning_rate": 7.254644665671176e-06, "loss": 0.2007, "step": 4632 }, { "epoch": 0.37, "grad_norm": 1.2825144348655453, "learning_rate": 7.253488276812719e-06, "loss": 0.218, "step": 4633 }, { "epoch": 0.37, "grad_norm": 1.4814934847448538, "learning_rate": 7.252331736670765e-06, "loss": 0.2164, "step": 4634 }, { "epoch": 0.37, "grad_norm": 1.3947338488963392, "learning_rate": 7.251175045322959e-06, "loss": 0.2225, "step": 4635 }, { "epoch": 0.37, "grad_norm": 1.2306781525582993, "learning_rate": 7.250018202846951e-06, "loss": 0.2002, "step": 4636 }, { "epoch": 0.37, "grad_norm": 1.2884757484626046, "learning_rate": 7.2488612093204025e-06, "loss": 0.2077, "step": 4637 }, { "epoch": 0.37, "grad_norm": 1.44123447967655, "learning_rate": 7.247704064820989e-06, "loss": 0.2385, "step": 4638 }, { "epoch": 0.37, "grad_norm": 1.413843015550043, "learning_rate": 7.24654676942639e-06, "loss": 0.2326, "step": 4639 }, { "epoch": 0.37, "grad_norm": 1.2895788492650495, "learning_rate": 7.245389323214301e-06, "loss": 0.2288, "step": 4640 }, { "epoch": 0.37, "grad_norm": 1.2367232140865478, "learning_rate": 7.244231726262422e-06, "loss": 0.2112, "step": 4641 }, { "epoch": 0.37, "grad_norm": 1.512118395217802, "learning_rate": 7.243073978648469e-06, "loss": 0.2033, "step": 4642 }, { "epoch": 0.37, "grad_norm": 1.250700487481848, "learning_rate": 7.241916080450163e-06, "loss": 0.1657, "step": 4643 }, { "epoch": 0.37, "grad_norm": 1.1311745688189312, "learning_rate": 7.240758031745237e-06, "loss": 0.1688, "step": 4644 }, { "epoch": 0.37, "grad_norm": 1.4022151033737413, "learning_rate": 7.2395998326114345e-06, "loss": 0.2099, "step": 4645 }, { "epoch": 0.37, "grad_norm": 1.3712520212644563, "learning_rate": 7.238441483126512e-06, "loss": 0.2309, "step": 4646 }, { "epoch": 0.37, "grad_norm": 1.3075430382988562, "learning_rate": 7.237282983368228e-06, "loss": 0.2121, "step": 4647 }, { "epoch": 0.37, "grad_norm": 1.300659953498008, "learning_rate": 7.236124333414359e-06, "loss": 0.183, "step": 4648 }, { "epoch": 0.37, "grad_norm": 1.4419638416751694, "learning_rate": 7.2349655333426905e-06, "loss": 0.2182, "step": 4649 }, { "epoch": 0.37, "grad_norm": 1.3356492464479306, "learning_rate": 7.233806583231012e-06, "loss": 0.2018, "step": 4650 }, { "epoch": 0.37, "grad_norm": 1.3249028716378075, "learning_rate": 7.23264748315713e-06, "loss": 0.2071, "step": 4651 }, { "epoch": 0.37, "grad_norm": 1.3828948765923208, "learning_rate": 7.231488233198858e-06, "loss": 0.2024, "step": 4652 }, { "epoch": 0.37, "grad_norm": 1.3766039057922546, "learning_rate": 7.2303288334340205e-06, "loss": 0.2122, "step": 4653 }, { "epoch": 0.37, "grad_norm": 1.5144341514830022, "learning_rate": 7.2291692839404495e-06, "loss": 0.253, "step": 4654 }, { "epoch": 0.37, "grad_norm": 1.2494676684544648, "learning_rate": 7.22800958479599e-06, "loss": 0.1987, "step": 4655 }, { "epoch": 0.37, "grad_norm": 1.2612132673659266, "learning_rate": 7.226849736078497e-06, "loss": 0.2011, "step": 4656 }, { "epoch": 0.37, "grad_norm": 1.3699986513963234, "learning_rate": 7.225689737865835e-06, "loss": 0.2314, "step": 4657 }, { "epoch": 0.37, "grad_norm": 1.558499070842386, "learning_rate": 7.224529590235876e-06, "loss": 0.2141, "step": 4658 }, { "epoch": 0.37, "grad_norm": 1.3528678116321762, "learning_rate": 7.223369293266507e-06, "loss": 0.1664, "step": 4659 }, { "epoch": 0.37, "grad_norm": 1.3962024985135608, "learning_rate": 7.222208847035621e-06, "loss": 0.2215, "step": 4660 }, { "epoch": 0.37, "grad_norm": 1.4055564085365457, "learning_rate": 7.22104825162112e-06, "loss": 0.2087, "step": 4661 }, { "epoch": 0.37, "grad_norm": 1.4351363404772675, "learning_rate": 7.219887507100921e-06, "loss": 0.2296, "step": 4662 }, { "epoch": 0.37, "grad_norm": 1.366196893853091, "learning_rate": 7.218726613552949e-06, "loss": 0.2065, "step": 4663 }, { "epoch": 0.37, "grad_norm": 1.3763920825696134, "learning_rate": 7.217565571055137e-06, "loss": 0.2253, "step": 4664 }, { "epoch": 0.37, "grad_norm": 14.146982778521922, "learning_rate": 7.216404379685427e-06, "loss": 0.5625, "step": 4665 }, { "epoch": 0.37, "grad_norm": 1.4421753066478902, "learning_rate": 7.2152430395217765e-06, "loss": 0.229, "step": 4666 }, { "epoch": 0.37, "grad_norm": 1.4464948217931586, "learning_rate": 7.214081550642149e-06, "loss": 0.2479, "step": 4667 }, { "epoch": 0.37, "grad_norm": 1.4656849620299957, "learning_rate": 7.212919913124519e-06, "loss": 0.1908, "step": 4668 }, { "epoch": 0.37, "grad_norm": 1.398624504276929, "learning_rate": 7.211758127046871e-06, "loss": 0.2151, "step": 4669 }, { "epoch": 0.37, "grad_norm": 4.856083965265127, "learning_rate": 7.210596192487198e-06, "loss": 0.6346, "step": 4670 }, { "epoch": 0.37, "grad_norm": 1.3977669365205103, "learning_rate": 7.2094341095235035e-06, "loss": 0.2578, "step": 4671 }, { "epoch": 0.37, "grad_norm": 1.3688436603958962, "learning_rate": 7.208271878233804e-06, "loss": 0.2382, "step": 4672 }, { "epoch": 0.37, "grad_norm": 1.4066932807716008, "learning_rate": 7.207109498696122e-06, "loss": 0.2074, "step": 4673 }, { "epoch": 0.37, "grad_norm": 1.3922291847099941, "learning_rate": 7.205946970988493e-06, "loss": 0.1953, "step": 4674 }, { "epoch": 0.37, "grad_norm": 1.466403899970326, "learning_rate": 7.204784295188959e-06, "loss": 0.2406, "step": 4675 }, { "epoch": 0.37, "grad_norm": 4.537515448389777, "learning_rate": 7.203621471375576e-06, "loss": 0.5576, "step": 4676 }, { "epoch": 0.37, "grad_norm": 1.3274543529146274, "learning_rate": 7.202458499626408e-06, "loss": 0.2067, "step": 4677 }, { "epoch": 0.37, "grad_norm": 1.3956618814986548, "learning_rate": 7.201295380019525e-06, "loss": 0.2263, "step": 4678 }, { "epoch": 0.37, "grad_norm": 6.377930505504033, "learning_rate": 7.200132112633015e-06, "loss": 0.6434, "step": 4679 }, { "epoch": 0.37, "grad_norm": 1.4321209132549046, "learning_rate": 7.19896869754497e-06, "loss": 0.2185, "step": 4680 }, { "epoch": 0.37, "grad_norm": 1.4922829482083375, "learning_rate": 7.197805134833493e-06, "loss": 0.2409, "step": 4681 }, { "epoch": 0.37, "grad_norm": 1.3111282877814505, "learning_rate": 7.196641424576698e-06, "loss": 0.1975, "step": 4682 }, { "epoch": 0.37, "grad_norm": 1.2440097550478715, "learning_rate": 7.19547756685271e-06, "loss": 0.2211, "step": 4683 }, { "epoch": 0.37, "grad_norm": 1.3409633376468209, "learning_rate": 7.194313561739661e-06, "loss": 0.1877, "step": 4684 }, { "epoch": 0.37, "grad_norm": 1.3690105212856414, "learning_rate": 7.193149409315694e-06, "loss": 0.2287, "step": 4685 }, { "epoch": 0.37, "grad_norm": 1.2689817910497139, "learning_rate": 7.191985109658961e-06, "loss": 0.213, "step": 4686 }, { "epoch": 0.37, "grad_norm": 1.3767022262520805, "learning_rate": 7.190820662847627e-06, "loss": 0.2426, "step": 4687 }, { "epoch": 0.38, "grad_norm": 1.4424232108778996, "learning_rate": 7.189656068959863e-06, "loss": 0.2068, "step": 4688 }, { "epoch": 0.38, "grad_norm": 1.4860548701268863, "learning_rate": 7.188491328073853e-06, "loss": 0.2362, "step": 4689 }, { "epoch": 0.38, "grad_norm": 1.5343218494259938, "learning_rate": 7.18732644026779e-06, "loss": 0.1956, "step": 4690 }, { "epoch": 0.38, "grad_norm": 1.2628542747039755, "learning_rate": 7.186161405619876e-06, "loss": 0.1619, "step": 4691 }, { "epoch": 0.38, "grad_norm": 1.5452611344458735, "learning_rate": 7.1849962242083215e-06, "loss": 0.2289, "step": 4692 }, { "epoch": 0.38, "grad_norm": 1.290376781748268, "learning_rate": 7.183830896111351e-06, "loss": 0.2129, "step": 4693 }, { "epoch": 0.38, "grad_norm": 1.1708075625985892, "learning_rate": 7.182665421407196e-06, "loss": 0.2029, "step": 4694 }, { "epoch": 0.38, "grad_norm": 1.2738470024004076, "learning_rate": 7.181499800174099e-06, "loss": 0.174, "step": 4695 }, { "epoch": 0.38, "grad_norm": 1.2877942716957311, "learning_rate": 7.1803340324903096e-06, "loss": 0.2388, "step": 4696 }, { "epoch": 0.38, "grad_norm": 1.4709086675119016, "learning_rate": 7.17916811843409e-06, "loss": 0.1986, "step": 4697 }, { "epoch": 0.38, "grad_norm": 1.3994313067924706, "learning_rate": 7.1780020580837125e-06, "loss": 0.2029, "step": 4698 }, { "epoch": 0.38, "grad_norm": 1.2911034152124485, "learning_rate": 7.176835851517458e-06, "loss": 0.189, "step": 4699 }, { "epoch": 0.38, "grad_norm": 1.3727867959853184, "learning_rate": 7.1756694988136165e-06, "loss": 0.2316, "step": 4700 }, { "epoch": 0.38, "grad_norm": 1.43053224914512, "learning_rate": 7.17450300005049e-06, "loss": 0.2163, "step": 4701 }, { "epoch": 0.38, "grad_norm": 1.110308618160761, "learning_rate": 7.17333635530639e-06, "loss": 0.1577, "step": 4702 }, { "epoch": 0.38, "grad_norm": 1.2549224998408561, "learning_rate": 7.1721695646596335e-06, "loss": 0.2215, "step": 4703 }, { "epoch": 0.38, "grad_norm": 1.3723128666652684, "learning_rate": 7.171002628188554e-06, "loss": 0.2601, "step": 4704 }, { "epoch": 0.38, "grad_norm": 1.2545082552861848, "learning_rate": 7.16983554597149e-06, "loss": 0.2007, "step": 4705 }, { "epoch": 0.38, "grad_norm": 1.5267378614042626, "learning_rate": 7.16866831808679e-06, "loss": 0.2346, "step": 4706 }, { "epoch": 0.38, "grad_norm": 1.3893049688525259, "learning_rate": 7.167500944612815e-06, "loss": 0.2244, "step": 4707 }, { "epoch": 0.38, "grad_norm": 1.256496702377986, "learning_rate": 7.166333425627936e-06, "loss": 0.1872, "step": 4708 }, { "epoch": 0.38, "grad_norm": 1.3895559819080734, "learning_rate": 7.165165761210529e-06, "loss": 0.2506, "step": 4709 }, { "epoch": 0.38, "grad_norm": 1.4527652774161193, "learning_rate": 7.163997951438986e-06, "loss": 0.2436, "step": 4710 }, { "epoch": 0.38, "grad_norm": 5.5130824959754845, "learning_rate": 7.162829996391702e-06, "loss": 0.7399, "step": 4711 }, { "epoch": 0.38, "grad_norm": 1.3606716588125523, "learning_rate": 7.161661896147088e-06, "loss": 0.2269, "step": 4712 }, { "epoch": 0.38, "grad_norm": 1.4586771135839935, "learning_rate": 7.160493650783562e-06, "loss": 0.2084, "step": 4713 }, { "epoch": 0.38, "grad_norm": 1.5036539250450593, "learning_rate": 7.159325260379551e-06, "loss": 0.2258, "step": 4714 }, { "epoch": 0.38, "grad_norm": 1.5168111329496314, "learning_rate": 7.158156725013493e-06, "loss": 0.2066, "step": 4715 }, { "epoch": 0.38, "grad_norm": 1.323394576935599, "learning_rate": 7.156988044763833e-06, "loss": 0.1945, "step": 4716 }, { "epoch": 0.38, "grad_norm": 1.3229632078055302, "learning_rate": 7.155819219709032e-06, "loss": 0.209, "step": 4717 }, { "epoch": 0.38, "grad_norm": 4.772106145246674, "learning_rate": 7.154650249927555e-06, "loss": 0.4629, "step": 4718 }, { "epoch": 0.38, "grad_norm": 1.4597883991305334, "learning_rate": 7.153481135497878e-06, "loss": 0.2366, "step": 4719 }, { "epoch": 0.38, "grad_norm": 1.3743141889587922, "learning_rate": 7.152311876498487e-06, "loss": 0.2011, "step": 4720 }, { "epoch": 0.38, "grad_norm": 1.4131271019310823, "learning_rate": 7.15114247300788e-06, "loss": 0.2255, "step": 4721 }, { "epoch": 0.38, "grad_norm": 1.5259833897546178, "learning_rate": 7.14997292510456e-06, "loss": 0.252, "step": 4722 }, { "epoch": 0.38, "grad_norm": 1.3028279028756722, "learning_rate": 7.148803232867043e-06, "loss": 0.1944, "step": 4723 }, { "epoch": 0.38, "grad_norm": 1.21179511273452, "learning_rate": 7.147633396373855e-06, "loss": 0.2178, "step": 4724 }, { "epoch": 0.38, "grad_norm": 1.390477745753487, "learning_rate": 7.14646341570353e-06, "loss": 0.2154, "step": 4725 }, { "epoch": 0.38, "grad_norm": 1.3760584100121604, "learning_rate": 7.145293290934613e-06, "loss": 0.2051, "step": 4726 }, { "epoch": 0.38, "grad_norm": 1.3856957853375935, "learning_rate": 7.144123022145655e-06, "loss": 0.1745, "step": 4727 }, { "epoch": 0.38, "grad_norm": 1.182377080155907, "learning_rate": 7.1429526094152244e-06, "loss": 0.1751, "step": 4728 }, { "epoch": 0.38, "grad_norm": 5.093060848013929, "learning_rate": 7.141782052821892e-06, "loss": 0.5751, "step": 4729 }, { "epoch": 0.38, "grad_norm": 1.3337009627534544, "learning_rate": 7.14061135244424e-06, "loss": 0.1973, "step": 4730 }, { "epoch": 0.38, "grad_norm": 4.565877356770094, "learning_rate": 7.139440508360863e-06, "loss": 0.4938, "step": 4731 }, { "epoch": 0.38, "grad_norm": 5.426887224584748, "learning_rate": 7.138269520650362e-06, "loss": 0.7291, "step": 4732 }, { "epoch": 0.38, "grad_norm": 1.3691091137941518, "learning_rate": 7.137098389391349e-06, "loss": 0.2296, "step": 4733 }, { "epoch": 0.38, "grad_norm": 1.3661731354440267, "learning_rate": 7.135927114662448e-06, "loss": 0.1818, "step": 4734 }, { "epoch": 0.38, "grad_norm": 1.258596824728972, "learning_rate": 7.134755696542286e-06, "loss": 0.1695, "step": 4735 }, { "epoch": 0.38, "grad_norm": 5.774615072980027, "learning_rate": 7.133584135109507e-06, "loss": 0.47, "step": 4736 }, { "epoch": 0.38, "grad_norm": 1.1692197188097482, "learning_rate": 7.132412430442759e-06, "loss": 0.2076, "step": 4737 }, { "epoch": 0.38, "grad_norm": 1.2800107703228183, "learning_rate": 7.131240582620706e-06, "loss": 0.2037, "step": 4738 }, { "epoch": 0.38, "grad_norm": 1.3722555093795468, "learning_rate": 7.130068591722015e-06, "loss": 0.1921, "step": 4739 }, { "epoch": 0.38, "grad_norm": 1.2223374021938294, "learning_rate": 7.128896457825364e-06, "loss": 0.1446, "step": 4740 }, { "epoch": 0.38, "grad_norm": 1.2453630118627566, "learning_rate": 7.127724181009444e-06, "loss": 0.2061, "step": 4741 }, { "epoch": 0.38, "grad_norm": 1.4541515528343412, "learning_rate": 7.126551761352953e-06, "loss": 0.2114, "step": 4742 }, { "epoch": 0.38, "grad_norm": 1.4615846206925147, "learning_rate": 7.1253791989346e-06, "loss": 0.1727, "step": 4743 }, { "epoch": 0.38, "grad_norm": 1.3042964921822728, "learning_rate": 7.1242064938331e-06, "loss": 0.164, "step": 4744 }, { "epoch": 0.38, "grad_norm": 1.4439156921952279, "learning_rate": 7.123033646127183e-06, "loss": 0.201, "step": 4745 }, { "epoch": 0.38, "grad_norm": 1.328002126416615, "learning_rate": 7.121860655895585e-06, "loss": 0.2016, "step": 4746 }, { "epoch": 0.38, "grad_norm": 1.3894458085701467, "learning_rate": 7.120687523217052e-06, "loss": 0.1808, "step": 4747 }, { "epoch": 0.38, "grad_norm": 1.369086271262368, "learning_rate": 7.11951424817034e-06, "loss": 0.2171, "step": 4748 }, { "epoch": 0.38, "grad_norm": 1.2821568227180404, "learning_rate": 7.118340830834216e-06, "loss": 0.1929, "step": 4749 }, { "epoch": 0.38, "grad_norm": 1.2088893799608302, "learning_rate": 7.117167271287453e-06, "loss": 0.181, "step": 4750 }, { "epoch": 0.38, "grad_norm": 1.4390116330684632, "learning_rate": 7.1159935696088346e-06, "loss": 0.2371, "step": 4751 }, { "epoch": 0.38, "grad_norm": 1.4307431707713578, "learning_rate": 7.114819725877158e-06, "loss": 0.1923, "step": 4752 }, { "epoch": 0.38, "grad_norm": 1.2753073620623494, "learning_rate": 7.1136457401712255e-06, "loss": 0.1893, "step": 4753 }, { "epoch": 0.38, "grad_norm": 1.549165364491072, "learning_rate": 7.112471612569849e-06, "loss": 0.219, "step": 4754 }, { "epoch": 0.38, "grad_norm": 1.3879968085204364, "learning_rate": 7.111297343151854e-06, "loss": 0.2104, "step": 4755 }, { "epoch": 0.38, "grad_norm": 1.5127874215402446, "learning_rate": 7.110122931996073e-06, "loss": 0.2197, "step": 4756 }, { "epoch": 0.38, "grad_norm": 5.764811609584851, "learning_rate": 7.108948379181343e-06, "loss": 0.5671, "step": 4757 }, { "epoch": 0.38, "grad_norm": 1.3628419901696498, "learning_rate": 7.107773684786521e-06, "loss": 0.2182, "step": 4758 }, { "epoch": 0.38, "grad_norm": 1.2471538620607514, "learning_rate": 7.106598848890464e-06, "loss": 0.1976, "step": 4759 }, { "epoch": 0.38, "grad_norm": 1.1299104828149948, "learning_rate": 7.105423871572043e-06, "loss": 0.1588, "step": 4760 }, { "epoch": 0.38, "grad_norm": 1.2555138247463418, "learning_rate": 7.104248752910139e-06, "loss": 0.1995, "step": 4761 }, { "epoch": 0.38, "grad_norm": 1.4604070456468663, "learning_rate": 7.1030734929836405e-06, "loss": 0.1835, "step": 4762 }, { "epoch": 0.38, "grad_norm": 1.3672598271617655, "learning_rate": 7.101898091871447e-06, "loss": 0.1907, "step": 4763 }, { "epoch": 0.38, "grad_norm": 1.1692413804108257, "learning_rate": 7.100722549652465e-06, "loss": 0.1414, "step": 4764 }, { "epoch": 0.38, "grad_norm": 1.421086934454808, "learning_rate": 7.0995468664056135e-06, "loss": 0.2412, "step": 4765 }, { "epoch": 0.38, "grad_norm": 6.301400006914409, "learning_rate": 7.0983710422098205e-06, "loss": 0.6559, "step": 4766 }, { "epoch": 0.38, "grad_norm": 1.2478468570576833, "learning_rate": 7.097195077144021e-06, "loss": 0.201, "step": 4767 }, { "epoch": 0.38, "grad_norm": 1.4117594439537449, "learning_rate": 7.096018971287161e-06, "loss": 0.2772, "step": 4768 }, { "epoch": 0.38, "grad_norm": 1.3213733827334329, "learning_rate": 7.094842724718197e-06, "loss": 0.1842, "step": 4769 }, { "epoch": 0.38, "grad_norm": 1.3543254643213234, "learning_rate": 7.093666337516094e-06, "loss": 0.1704, "step": 4770 }, { "epoch": 0.38, "grad_norm": 1.5336305097787688, "learning_rate": 7.0924898097598276e-06, "loss": 0.2247, "step": 4771 }, { "epoch": 0.38, "grad_norm": 1.3011424030111844, "learning_rate": 7.091313141528378e-06, "loss": 0.2299, "step": 4772 }, { "epoch": 0.38, "grad_norm": 1.3583807176306357, "learning_rate": 7.0901363329007435e-06, "loss": 0.2197, "step": 4773 }, { "epoch": 0.38, "grad_norm": 1.3458212775468819, "learning_rate": 7.0889593839559215e-06, "loss": 0.2267, "step": 4774 }, { "epoch": 0.38, "grad_norm": 1.2855340372452824, "learning_rate": 7.0877822947729265e-06, "loss": 0.227, "step": 4775 }, { "epoch": 0.38, "grad_norm": 1.2530404128884884, "learning_rate": 7.0866050654307824e-06, "loss": 0.1683, "step": 4776 }, { "epoch": 0.38, "grad_norm": 1.3471158778308971, "learning_rate": 7.085427696008517e-06, "loss": 0.189, "step": 4777 }, { "epoch": 0.38, "grad_norm": 1.162521903821187, "learning_rate": 7.084250186585172e-06, "loss": 0.172, "step": 4778 }, { "epoch": 0.38, "grad_norm": 1.2259662055855378, "learning_rate": 7.0830725372397965e-06, "loss": 0.1842, "step": 4779 }, { "epoch": 0.38, "grad_norm": 1.2164696392224137, "learning_rate": 7.081894748051451e-06, "loss": 0.1582, "step": 4780 }, { "epoch": 0.38, "grad_norm": 1.29867431099379, "learning_rate": 7.080716819099204e-06, "loss": 0.2423, "step": 4781 }, { "epoch": 0.38, "grad_norm": 1.3378007552101296, "learning_rate": 7.07953875046213e-06, "loss": 0.1966, "step": 4782 }, { "epoch": 0.38, "grad_norm": 1.6797732265410092, "learning_rate": 7.0783605422193215e-06, "loss": 0.2729, "step": 4783 }, { "epoch": 0.38, "grad_norm": 1.2926198085286513, "learning_rate": 7.077182194449872e-06, "loss": 0.1895, "step": 4784 }, { "epoch": 0.38, "grad_norm": 1.2952148211658299, "learning_rate": 7.0760037072328855e-06, "loss": 0.2561, "step": 4785 }, { "epoch": 0.38, "grad_norm": 1.6157629065690955, "learning_rate": 7.074825080647483e-06, "loss": 0.1943, "step": 4786 }, { "epoch": 0.38, "grad_norm": 1.4344836758916617, "learning_rate": 7.073646314772787e-06, "loss": 0.214, "step": 4787 }, { "epoch": 0.38, "grad_norm": 1.2526146504703244, "learning_rate": 7.072467409687928e-06, "loss": 0.1859, "step": 4788 }, { "epoch": 0.38, "grad_norm": 1.2979609127143175, "learning_rate": 7.071288365472054e-06, "loss": 0.2045, "step": 4789 }, { "epoch": 0.38, "grad_norm": 1.290462777531266, "learning_rate": 7.070109182204317e-06, "loss": 0.1904, "step": 4790 }, { "epoch": 0.38, "grad_norm": 1.3377588283983977, "learning_rate": 7.068929859963878e-06, "loss": 0.262, "step": 4791 }, { "epoch": 0.38, "grad_norm": 1.3476943852152492, "learning_rate": 7.067750398829908e-06, "loss": 0.2208, "step": 4792 }, { "epoch": 0.38, "grad_norm": 5.6480179868341045, "learning_rate": 7.066570798881589e-06, "loss": 0.69, "step": 4793 }, { "epoch": 0.38, "grad_norm": 1.2624266267338566, "learning_rate": 7.065391060198111e-06, "loss": 0.1934, "step": 4794 }, { "epoch": 0.38, "grad_norm": 1.4228929032171018, "learning_rate": 7.064211182858673e-06, "loss": 0.2257, "step": 4795 }, { "epoch": 0.38, "grad_norm": 1.2566747916226606, "learning_rate": 7.063031166942485e-06, "loss": 0.1736, "step": 4796 }, { "epoch": 0.38, "grad_norm": 11.791828552918496, "learning_rate": 7.0618510125287636e-06, "loss": 0.5563, "step": 4797 }, { "epoch": 0.38, "grad_norm": 1.3325220313677701, "learning_rate": 7.0606707196967365e-06, "loss": 0.1984, "step": 4798 }, { "epoch": 0.38, "grad_norm": 1.6180852709667057, "learning_rate": 7.05949028852564e-06, "loss": 0.2418, "step": 4799 }, { "epoch": 0.38, "grad_norm": 1.1923736574734867, "learning_rate": 7.05830971909472e-06, "loss": 0.1984, "step": 4800 }, { "epoch": 0.38, "grad_norm": 1.2093112425977857, "learning_rate": 7.057129011483233e-06, "loss": 0.194, "step": 4801 }, { "epoch": 0.38, "grad_norm": 1.1465677515140582, "learning_rate": 7.055948165770442e-06, "loss": 0.174, "step": 4802 }, { "epoch": 0.38, "grad_norm": 7.32073674114426, "learning_rate": 7.054767182035622e-06, "loss": 0.7384, "step": 4803 }, { "epoch": 0.38, "grad_norm": 1.245348169906903, "learning_rate": 7.053586060358056e-06, "loss": 0.1633, "step": 4804 }, { "epoch": 0.38, "grad_norm": 8.680001709333162, "learning_rate": 7.0524048008170345e-06, "loss": 0.6267, "step": 4805 }, { "epoch": 0.38, "grad_norm": 1.3283813129000381, "learning_rate": 7.051223403491859e-06, "loss": 0.1814, "step": 4806 }, { "epoch": 0.38, "grad_norm": 1.3457247207207927, "learning_rate": 7.050041868461843e-06, "loss": 0.1754, "step": 4807 }, { "epoch": 0.38, "grad_norm": 1.2312384006678254, "learning_rate": 7.048860195806306e-06, "loss": 0.1778, "step": 4808 }, { "epoch": 0.38, "grad_norm": 1.5873004543021405, "learning_rate": 7.047678385604575e-06, "loss": 0.2056, "step": 4809 }, { "epoch": 0.38, "grad_norm": 1.4385135595250433, "learning_rate": 7.04649643793599e-06, "loss": 0.2038, "step": 4810 }, { "epoch": 0.38, "grad_norm": 5.36835698222397, "learning_rate": 7.045314352879898e-06, "loss": 0.6407, "step": 4811 }, { "epoch": 0.38, "grad_norm": 6.355633131986204, "learning_rate": 7.044132130515657e-06, "loss": 0.5287, "step": 4812 }, { "epoch": 0.39, "grad_norm": 1.494963153879723, "learning_rate": 7.042949770922631e-06, "loss": 0.2181, "step": 4813 }, { "epoch": 0.39, "grad_norm": 1.233453748166965, "learning_rate": 7.041767274180199e-06, "loss": 0.2066, "step": 4814 }, { "epoch": 0.39, "grad_norm": 1.5486545767832707, "learning_rate": 7.040584640367744e-06, "loss": 0.235, "step": 4815 }, { "epoch": 0.39, "grad_norm": 7.406417972631745, "learning_rate": 7.039401869564657e-06, "loss": 0.7878, "step": 4816 }, { "epoch": 0.39, "grad_norm": 1.3015302768207249, "learning_rate": 7.038218961850346e-06, "loss": 0.2044, "step": 4817 }, { "epoch": 0.39, "grad_norm": 1.3787175176541557, "learning_rate": 7.037035917304219e-06, "loss": 0.2128, "step": 4818 }, { "epoch": 0.39, "grad_norm": 1.3499394036573094, "learning_rate": 7.035852736005698e-06, "loss": 0.1867, "step": 4819 }, { "epoch": 0.39, "grad_norm": 1.3702685278899334, "learning_rate": 7.034669418034217e-06, "loss": 0.2448, "step": 4820 }, { "epoch": 0.39, "grad_norm": 1.4141019983180052, "learning_rate": 7.033485963469211e-06, "loss": 0.1797, "step": 4821 }, { "epoch": 0.39, "grad_norm": 1.2591278873935836, "learning_rate": 7.032302372390132e-06, "loss": 0.167, "step": 4822 }, { "epoch": 0.39, "grad_norm": 1.3123881388421341, "learning_rate": 7.031118644876436e-06, "loss": 0.2309, "step": 4823 }, { "epoch": 0.39, "grad_norm": 4.935948876499589, "learning_rate": 7.029934781007592e-06, "loss": 0.5871, "step": 4824 }, { "epoch": 0.39, "grad_norm": 1.3543901889964152, "learning_rate": 7.028750780863078e-06, "loss": 0.2025, "step": 4825 }, { "epoch": 0.39, "grad_norm": 1.3473654162878381, "learning_rate": 7.0275666445223745e-06, "loss": 0.2106, "step": 4826 }, { "epoch": 0.39, "grad_norm": 1.1615918620167034, "learning_rate": 7.0263823720649795e-06, "loss": 0.167, "step": 4827 }, { "epoch": 0.39, "grad_norm": 1.4936536838354497, "learning_rate": 7.025197963570396e-06, "loss": 0.2358, "step": 4828 }, { "epoch": 0.39, "grad_norm": 1.4285922221885665, "learning_rate": 7.024013419118138e-06, "loss": 0.1781, "step": 4829 }, { "epoch": 0.39, "grad_norm": 1.2629307023708323, "learning_rate": 7.022828738787725e-06, "loss": 0.224, "step": 4830 }, { "epoch": 0.39, "grad_norm": 1.4305422882932475, "learning_rate": 7.021643922658691e-06, "loss": 0.2011, "step": 4831 }, { "epoch": 0.39, "grad_norm": 1.5108733104670857, "learning_rate": 7.020458970810575e-06, "loss": 0.2459, "step": 4832 }, { "epoch": 0.39, "grad_norm": 1.7227817553121723, "learning_rate": 7.0192738833229255e-06, "loss": 0.2525, "step": 4833 }, { "epoch": 0.39, "grad_norm": 1.2802231535873252, "learning_rate": 7.018088660275304e-06, "loss": 0.2019, "step": 4834 }, { "epoch": 0.39, "grad_norm": 1.2786367616500365, "learning_rate": 7.016903301747275e-06, "loss": 0.1825, "step": 4835 }, { "epoch": 0.39, "grad_norm": 1.2597425755505227, "learning_rate": 7.015717807818417e-06, "loss": 0.2016, "step": 4836 }, { "epoch": 0.39, "grad_norm": 1.4209919238692725, "learning_rate": 7.014532178568314e-06, "loss": 0.2134, "step": 4837 }, { "epoch": 0.39, "grad_norm": 1.3585054663107867, "learning_rate": 7.013346414076563e-06, "loss": 0.2049, "step": 4838 }, { "epoch": 0.39, "grad_norm": 4.069209179081909, "learning_rate": 7.012160514422767e-06, "loss": 0.5267, "step": 4839 }, { "epoch": 0.39, "grad_norm": 1.5546811833164766, "learning_rate": 7.010974479686538e-06, "loss": 0.244, "step": 4840 }, { "epoch": 0.39, "grad_norm": 1.507484296014732, "learning_rate": 7.0097883099475e-06, "loss": 0.2602, "step": 4841 }, { "epoch": 0.39, "grad_norm": 1.1623343477439738, "learning_rate": 7.008602005285283e-06, "loss": 0.1758, "step": 4842 }, { "epoch": 0.39, "grad_norm": 1.2558409497821823, "learning_rate": 7.007415565779528e-06, "loss": 0.2332, "step": 4843 }, { "epoch": 0.39, "grad_norm": 1.346866654961198, "learning_rate": 7.006228991509885e-06, "loss": 0.1975, "step": 4844 }, { "epoch": 0.39, "grad_norm": 1.4330432859433306, "learning_rate": 7.005042282556009e-06, "loss": 0.1796, "step": 4845 }, { "epoch": 0.39, "grad_norm": 1.2223240963082276, "learning_rate": 7.003855438997571e-06, "loss": 0.1671, "step": 4846 }, { "epoch": 0.39, "grad_norm": 1.3870883713136453, "learning_rate": 7.002668460914244e-06, "loss": 0.2097, "step": 4847 }, { "epoch": 0.39, "grad_norm": 1.4439850347518288, "learning_rate": 7.001481348385718e-06, "loss": 0.2389, "step": 4848 }, { "epoch": 0.39, "grad_norm": 1.287013176096537, "learning_rate": 7.000294101491683e-06, "loss": 0.206, "step": 4849 }, { "epoch": 0.39, "grad_norm": 1.3347672372789836, "learning_rate": 6.999106720311846e-06, "loss": 0.1841, "step": 4850 }, { "epoch": 0.39, "grad_norm": 1.303674612482775, "learning_rate": 6.997919204925916e-06, "loss": 0.1595, "step": 4851 }, { "epoch": 0.39, "grad_norm": 1.2929481381648444, "learning_rate": 6.9967315554136186e-06, "loss": 0.2169, "step": 4852 }, { "epoch": 0.39, "grad_norm": 1.441351601225843, "learning_rate": 6.995543771854681e-06, "loss": 0.2194, "step": 4853 }, { "epoch": 0.39, "grad_norm": 1.3851054356495633, "learning_rate": 6.994355854328842e-06, "loss": 0.1956, "step": 4854 }, { "epoch": 0.39, "grad_norm": 1.3476074353276983, "learning_rate": 6.993167802915854e-06, "loss": 0.1843, "step": 4855 }, { "epoch": 0.39, "grad_norm": 1.3048652297547558, "learning_rate": 6.99197961769547e-06, "loss": 0.2116, "step": 4856 }, { "epoch": 0.39, "grad_norm": 1.4561127090797057, "learning_rate": 6.990791298747461e-06, "loss": 0.2263, "step": 4857 }, { "epoch": 0.39, "grad_norm": 1.474774556199081, "learning_rate": 6.989602846151599e-06, "loss": 0.2593, "step": 4858 }, { "epoch": 0.39, "grad_norm": 1.4782006444187838, "learning_rate": 6.98841425998767e-06, "loss": 0.2089, "step": 4859 }, { "epoch": 0.39, "grad_norm": 1.2483069291308928, "learning_rate": 6.987225540335467e-06, "loss": 0.1546, "step": 4860 }, { "epoch": 0.39, "grad_norm": 1.301715452053872, "learning_rate": 6.986036687274793e-06, "loss": 0.1896, "step": 4861 }, { "epoch": 0.39, "grad_norm": 1.4783805081667416, "learning_rate": 6.984847700885459e-06, "loss": 0.2194, "step": 4862 }, { "epoch": 0.39, "grad_norm": 1.5851414386900278, "learning_rate": 6.983658581247285e-06, "loss": 0.251, "step": 4863 }, { "epoch": 0.39, "grad_norm": 7.04631605323419, "learning_rate": 6.982469328440098e-06, "loss": 0.5197, "step": 4864 }, { "epoch": 0.39, "grad_norm": 1.2272960603223113, "learning_rate": 6.981279942543741e-06, "loss": 0.1622, "step": 4865 }, { "epoch": 0.39, "grad_norm": 1.2746854881956649, "learning_rate": 6.980090423638057e-06, "loss": 0.209, "step": 4866 }, { "epoch": 0.39, "grad_norm": 5.206302545876208, "learning_rate": 6.978900771802903e-06, "loss": 0.6047, "step": 4867 }, { "epoch": 0.39, "grad_norm": 1.2198123179720712, "learning_rate": 6.9777109871181444e-06, "loss": 0.1895, "step": 4868 }, { "epoch": 0.39, "grad_norm": 1.2951570853463479, "learning_rate": 6.976521069663657e-06, "loss": 0.2139, "step": 4869 }, { "epoch": 0.39, "grad_norm": 1.337000024808618, "learning_rate": 6.975331019519322e-06, "loss": 0.2112, "step": 4870 }, { "epoch": 0.39, "grad_norm": 1.4741104816399193, "learning_rate": 6.974140836765028e-06, "loss": 0.2158, "step": 4871 }, { "epoch": 0.39, "grad_norm": 3.2876117875760857, "learning_rate": 6.972950521480679e-06, "loss": 0.6419, "step": 4872 }, { "epoch": 0.39, "grad_norm": 1.261432132399821, "learning_rate": 6.971760073746183e-06, "loss": 0.167, "step": 4873 }, { "epoch": 0.39, "grad_norm": 1.4445220320859973, "learning_rate": 6.9705694936414614e-06, "loss": 0.2412, "step": 4874 }, { "epoch": 0.39, "grad_norm": 4.69626174492105, "learning_rate": 6.969378781246436e-06, "loss": 0.7072, "step": 4875 }, { "epoch": 0.39, "grad_norm": 4.93499438990244, "learning_rate": 6.968187936641049e-06, "loss": 0.6157, "step": 4876 }, { "epoch": 0.39, "grad_norm": 1.3284789234946335, "learning_rate": 6.96699695990524e-06, "loss": 0.1861, "step": 4877 }, { "epoch": 0.39, "grad_norm": 1.3700035370714105, "learning_rate": 6.965805851118967e-06, "loss": 0.2203, "step": 4878 }, { "epoch": 0.39, "grad_norm": 1.292692702839124, "learning_rate": 6.96461461036219e-06, "loss": 0.1876, "step": 4879 }, { "epoch": 0.39, "grad_norm": 1.5352402568945485, "learning_rate": 6.9634232377148835e-06, "loss": 0.2081, "step": 4880 }, { "epoch": 0.39, "grad_norm": 1.3871809061173839, "learning_rate": 6.9622317332570235e-06, "loss": 0.2425, "step": 4881 }, { "epoch": 0.39, "grad_norm": 1.4876367943419888, "learning_rate": 6.961040097068603e-06, "loss": 0.2569, "step": 4882 }, { "epoch": 0.39, "grad_norm": 1.4130185792218803, "learning_rate": 6.959848329229619e-06, "loss": 0.2315, "step": 4883 }, { "epoch": 0.39, "grad_norm": 1.3432689621067724, "learning_rate": 6.95865642982008e-06, "loss": 0.2241, "step": 4884 }, { "epoch": 0.39, "grad_norm": 1.1628376790757289, "learning_rate": 6.957464398919998e-06, "loss": 0.1642, "step": 4885 }, { "epoch": 0.39, "grad_norm": 1.2912117106395649, "learning_rate": 6.956272236609402e-06, "loss": 0.2048, "step": 4886 }, { "epoch": 0.39, "grad_norm": 6.2493178095540864, "learning_rate": 6.955079942968324e-06, "loss": 0.5668, "step": 4887 }, { "epoch": 0.39, "grad_norm": 1.4202703870738962, "learning_rate": 6.9538875180768025e-06, "loss": 0.1906, "step": 4888 }, { "epoch": 0.39, "grad_norm": 6.970964089357048, "learning_rate": 6.952694962014894e-06, "loss": 0.5516, "step": 4889 }, { "epoch": 0.39, "grad_norm": 1.3803562728062753, "learning_rate": 6.951502274862656e-06, "loss": 0.2229, "step": 4890 }, { "epoch": 0.39, "grad_norm": 7.871589193090764, "learning_rate": 6.950309456700158e-06, "loss": 0.4558, "step": 4891 }, { "epoch": 0.39, "grad_norm": 1.4185740673601595, "learning_rate": 6.949116507607476e-06, "loss": 0.1956, "step": 4892 }, { "epoch": 0.39, "grad_norm": 1.4590087545002777, "learning_rate": 6.947923427664698e-06, "loss": 0.2079, "step": 4893 }, { "epoch": 0.39, "grad_norm": 5.452520864938566, "learning_rate": 6.946730216951918e-06, "loss": 0.6906, "step": 4894 }, { "epoch": 0.39, "grad_norm": 1.4225880604839358, "learning_rate": 6.945536875549241e-06, "loss": 0.233, "step": 4895 }, { "epoch": 0.39, "grad_norm": 1.3161400077408332, "learning_rate": 6.944343403536777e-06, "loss": 0.2156, "step": 4896 }, { "epoch": 0.39, "grad_norm": 1.3357418031471076, "learning_rate": 6.94314980099465e-06, "loss": 0.1828, "step": 4897 }, { "epoch": 0.39, "grad_norm": 1.3265221410054067, "learning_rate": 6.941956068002991e-06, "loss": 0.1782, "step": 4898 }, { "epoch": 0.39, "grad_norm": 1.1662216431501797, "learning_rate": 6.940762204641935e-06, "loss": 0.1939, "step": 4899 }, { "epoch": 0.39, "grad_norm": 1.395001291101406, "learning_rate": 6.939568210991633e-06, "loss": 0.2243, "step": 4900 }, { "epoch": 0.39, "grad_norm": 1.4352154910816537, "learning_rate": 6.9383740871322405e-06, "loss": 0.2332, "step": 4901 }, { "epoch": 0.39, "grad_norm": 1.4065655492432347, "learning_rate": 6.937179833143921e-06, "loss": 0.1957, "step": 4902 }, { "epoch": 0.39, "grad_norm": 1.3167961798580654, "learning_rate": 6.935985449106851e-06, "loss": 0.2061, "step": 4903 }, { "epoch": 0.39, "grad_norm": 1.2770916724380539, "learning_rate": 6.934790935101214e-06, "loss": 0.1932, "step": 4904 }, { "epoch": 0.39, "grad_norm": 1.134553579940731, "learning_rate": 6.933596291207196e-06, "loss": 0.1788, "step": 4905 }, { "epoch": 0.39, "grad_norm": 1.3224940310551938, "learning_rate": 6.932401517505003e-06, "loss": 0.1939, "step": 4906 }, { "epoch": 0.39, "grad_norm": 5.565108742040396, "learning_rate": 6.931206614074839e-06, "loss": 0.5762, "step": 4907 }, { "epoch": 0.39, "grad_norm": 1.408024383876922, "learning_rate": 6.9300115809969245e-06, "loss": 0.2374, "step": 4908 }, { "epoch": 0.39, "grad_norm": 8.199798691166958, "learning_rate": 6.928816418351485e-06, "loss": 0.5685, "step": 4909 }, { "epoch": 0.39, "grad_norm": 1.3526336714893068, "learning_rate": 6.927621126218756e-06, "loss": 0.1894, "step": 4910 }, { "epoch": 0.39, "grad_norm": 1.1935397918230348, "learning_rate": 6.9264257046789794e-06, "loss": 0.208, "step": 4911 }, { "epoch": 0.39, "grad_norm": 1.3047218043384239, "learning_rate": 6.9252301538124086e-06, "loss": 0.2082, "step": 4912 }, { "epoch": 0.39, "grad_norm": 1.4263092442658203, "learning_rate": 6.924034473699305e-06, "loss": 0.2068, "step": 4913 }, { "epoch": 0.39, "grad_norm": 1.413327240446172, "learning_rate": 6.922838664419937e-06, "loss": 0.1767, "step": 4914 }, { "epoch": 0.39, "grad_norm": 1.278508795441007, "learning_rate": 6.921642726054583e-06, "loss": 0.1972, "step": 4915 }, { "epoch": 0.39, "grad_norm": 1.4762990773159892, "learning_rate": 6.9204466586835305e-06, "loss": 0.2014, "step": 4916 }, { "epoch": 0.39, "grad_norm": 1.3690871783013314, "learning_rate": 6.919250462387075e-06, "loss": 0.2166, "step": 4917 }, { "epoch": 0.39, "grad_norm": 1.4212192863272215, "learning_rate": 6.9180541372455225e-06, "loss": 0.2302, "step": 4918 }, { "epoch": 0.39, "grad_norm": 1.4792930772918174, "learning_rate": 6.916857683339183e-06, "loss": 0.2076, "step": 4919 }, { "epoch": 0.39, "grad_norm": 1.307779964272019, "learning_rate": 6.915661100748379e-06, "loss": 0.206, "step": 4920 }, { "epoch": 0.39, "grad_norm": 4.602277155926983, "learning_rate": 6.914464389553443e-06, "loss": 0.7351, "step": 4921 }, { "epoch": 0.39, "grad_norm": 1.4759958696468491, "learning_rate": 6.91326754983471e-06, "loss": 0.1673, "step": 4922 }, { "epoch": 0.39, "grad_norm": 1.319054898036481, "learning_rate": 6.91207058167253e-06, "loss": 0.1847, "step": 4923 }, { "epoch": 0.39, "grad_norm": 1.216167784105681, "learning_rate": 6.910873485147258e-06, "loss": 0.1657, "step": 4924 }, { "epoch": 0.39, "grad_norm": 1.4408456792012903, "learning_rate": 6.9096762603392595e-06, "loss": 0.2279, "step": 4925 }, { "epoch": 0.39, "grad_norm": 1.2037805327361621, "learning_rate": 6.908478907328907e-06, "loss": 0.1711, "step": 4926 }, { "epoch": 0.39, "grad_norm": 1.5321231371973905, "learning_rate": 6.907281426196584e-06, "loss": 0.2339, "step": 4927 }, { "epoch": 0.39, "grad_norm": 1.383333258786712, "learning_rate": 6.906083817022679e-06, "loss": 0.2172, "step": 4928 }, { "epoch": 0.39, "grad_norm": 1.266122627689853, "learning_rate": 6.904886079887594e-06, "loss": 0.2348, "step": 4929 }, { "epoch": 0.39, "grad_norm": 1.5400489641294834, "learning_rate": 6.903688214871734e-06, "loss": 0.2304, "step": 4930 }, { "epoch": 0.39, "grad_norm": 1.5490376796386518, "learning_rate": 6.902490222055515e-06, "loss": 0.1923, "step": 4931 }, { "epoch": 0.39, "grad_norm": 1.375380413865992, "learning_rate": 6.901292101519365e-06, "loss": 0.1847, "step": 4932 }, { "epoch": 0.39, "grad_norm": 4.902342818607749, "learning_rate": 6.900093853343715e-06, "loss": 0.601, "step": 4933 }, { "epoch": 0.39, "grad_norm": 1.307491509380475, "learning_rate": 6.898895477609007e-06, "loss": 0.2062, "step": 4934 }, { "epoch": 0.39, "grad_norm": 1.36651356147818, "learning_rate": 6.897696974395691e-06, "loss": 0.21, "step": 4935 }, { "epoch": 0.39, "grad_norm": 1.3222386495184233, "learning_rate": 6.896498343784229e-06, "loss": 0.2319, "step": 4936 }, { "epoch": 0.39, "grad_norm": 1.5050735987420332, "learning_rate": 6.895299585855086e-06, "loss": 0.2525, "step": 4937 }, { "epoch": 0.4, "grad_norm": 6.36016554479017, "learning_rate": 6.8941007006887405e-06, "loss": 0.5755, "step": 4938 }, { "epoch": 0.4, "grad_norm": 1.3893299537750121, "learning_rate": 6.892901688365677e-06, "loss": 0.2126, "step": 4939 }, { "epoch": 0.4, "grad_norm": 1.6459441379369277, "learning_rate": 6.891702548966386e-06, "loss": 0.2268, "step": 4940 }, { "epoch": 0.4, "grad_norm": 25.113430737548498, "learning_rate": 6.890503282571371e-06, "loss": 0.4396, "step": 4941 }, { "epoch": 0.4, "grad_norm": 1.307314102051341, "learning_rate": 6.889303889261143e-06, "loss": 0.1999, "step": 4942 }, { "epoch": 0.4, "grad_norm": 1.3296348716315687, "learning_rate": 6.888104369116222e-06, "loss": 0.2017, "step": 4943 }, { "epoch": 0.4, "grad_norm": 1.3279160327902289, "learning_rate": 6.886904722217132e-06, "loss": 0.1911, "step": 4944 }, { "epoch": 0.4, "grad_norm": 5.768981047396083, "learning_rate": 6.885704948644411e-06, "loss": 0.7529, "step": 4945 }, { "epoch": 0.4, "grad_norm": 1.2629903682824621, "learning_rate": 6.8845050484786045e-06, "loss": 0.2088, "step": 4946 }, { "epoch": 0.4, "grad_norm": 1.334688790410686, "learning_rate": 6.883305021800264e-06, "loss": 0.2046, "step": 4947 }, { "epoch": 0.4, "grad_norm": 1.2984950009023064, "learning_rate": 6.8821048686899515e-06, "loss": 0.2026, "step": 4948 }, { "epoch": 0.4, "grad_norm": 1.2852530629635837, "learning_rate": 6.8809045892282365e-06, "loss": 0.1655, "step": 4949 }, { "epoch": 0.4, "grad_norm": 1.2376225065294297, "learning_rate": 6.8797041834956955e-06, "loss": 0.1771, "step": 4950 }, { "epoch": 0.4, "grad_norm": 1.116744101955967, "learning_rate": 6.87850365157292e-06, "loss": 0.1486, "step": 4951 }, { "epoch": 0.4, "grad_norm": 1.5416820976558965, "learning_rate": 6.877302993540501e-06, "loss": 0.2004, "step": 4952 }, { "epoch": 0.4, "grad_norm": 1.4467629270827924, "learning_rate": 6.876102209479045e-06, "loss": 0.224, "step": 4953 }, { "epoch": 0.4, "grad_norm": 1.5139332612600673, "learning_rate": 6.874901299469164e-06, "loss": 0.2502, "step": 4954 }, { "epoch": 0.4, "grad_norm": 1.421858902691484, "learning_rate": 6.873700263591476e-06, "loss": 0.2144, "step": 4955 }, { "epoch": 0.4, "grad_norm": 1.321054108608638, "learning_rate": 6.872499101926615e-06, "loss": 0.1884, "step": 4956 }, { "epoch": 0.4, "grad_norm": 1.3692552611674051, "learning_rate": 6.871297814555214e-06, "loss": 0.2103, "step": 4957 }, { "epoch": 0.4, "grad_norm": 1.4155867024277298, "learning_rate": 6.870096401557921e-06, "loss": 0.2164, "step": 4958 }, { "epoch": 0.4, "grad_norm": 5.797088952103575, "learning_rate": 6.86889486301539e-06, "loss": 0.7178, "step": 4959 }, { "epoch": 0.4, "grad_norm": 1.2949654574890377, "learning_rate": 6.867693199008285e-06, "loss": 0.2027, "step": 4960 }, { "epoch": 0.4, "grad_norm": 34.15486955323081, "learning_rate": 6.866491409617275e-06, "loss": 0.6208, "step": 4961 }, { "epoch": 0.4, "grad_norm": 1.3452684344912391, "learning_rate": 6.865289494923044e-06, "loss": 0.2018, "step": 4962 }, { "epoch": 0.4, "grad_norm": 1.4050764532151765, "learning_rate": 6.8640874550062765e-06, "loss": 0.1579, "step": 4963 }, { "epoch": 0.4, "grad_norm": 1.3179846356465217, "learning_rate": 6.862885289947669e-06, "loss": 0.2032, "step": 4964 }, { "epoch": 0.4, "grad_norm": 1.4399596586235432, "learning_rate": 6.8616829998279295e-06, "loss": 0.1943, "step": 4965 }, { "epoch": 0.4, "grad_norm": 3.9626913697152992, "learning_rate": 6.86048058472777e-06, "loss": 0.5126, "step": 4966 }, { "epoch": 0.4, "grad_norm": 1.3146291322706247, "learning_rate": 6.8592780447279106e-06, "loss": 0.1914, "step": 4967 }, { "epoch": 0.4, "grad_norm": 1.4635886329776477, "learning_rate": 6.8580753799090825e-06, "loss": 0.2332, "step": 4968 }, { "epoch": 0.4, "grad_norm": 1.3034745213422039, "learning_rate": 6.856872590352025e-06, "loss": 0.2004, "step": 4969 }, { "epoch": 0.4, "grad_norm": 1.3443065348836594, "learning_rate": 6.8556696761374844e-06, "loss": 0.2166, "step": 4970 }, { "epoch": 0.4, "grad_norm": 1.3766822623297141, "learning_rate": 6.854466637346215e-06, "loss": 0.2212, "step": 4971 }, { "epoch": 0.4, "grad_norm": 1.3297164025013832, "learning_rate": 6.853263474058982e-06, "loss": 0.1739, "step": 4972 }, { "epoch": 0.4, "grad_norm": 1.3695836169183748, "learning_rate": 6.852060186356557e-06, "loss": 0.2255, "step": 4973 }, { "epoch": 0.4, "grad_norm": 4.946767894543483, "learning_rate": 6.850856774319721e-06, "loss": 0.6077, "step": 4974 }, { "epoch": 0.4, "grad_norm": 1.3829320828718388, "learning_rate": 6.849653238029261e-06, "loss": 0.2182, "step": 4975 }, { "epoch": 0.4, "grad_norm": 1.4664919764851658, "learning_rate": 6.848449577565974e-06, "loss": 0.1779, "step": 4976 }, { "epoch": 0.4, "grad_norm": 7.829624030885766, "learning_rate": 6.847245793010668e-06, "loss": 0.5379, "step": 4977 }, { "epoch": 0.4, "grad_norm": 1.1648620839624397, "learning_rate": 6.846041884444154e-06, "loss": 0.1277, "step": 4978 }, { "epoch": 0.4, "grad_norm": 1.3899190230686151, "learning_rate": 6.844837851947255e-06, "loss": 0.1926, "step": 4979 }, { "epoch": 0.4, "grad_norm": 1.3245950993499847, "learning_rate": 6.843633695600802e-06, "loss": 0.2225, "step": 4980 }, { "epoch": 0.4, "grad_norm": 1.293372760045752, "learning_rate": 6.8424294154856315e-06, "loss": 0.1749, "step": 4981 }, { "epoch": 0.4, "grad_norm": 1.2931034185984664, "learning_rate": 6.841225011682594e-06, "loss": 0.1904, "step": 4982 }, { "epoch": 0.4, "grad_norm": 1.3950209648683762, "learning_rate": 6.840020484272543e-06, "loss": 0.2036, "step": 4983 }, { "epoch": 0.4, "grad_norm": 1.4741448372516144, "learning_rate": 6.83881583333634e-06, "loss": 0.1803, "step": 4984 }, { "epoch": 0.4, "grad_norm": 5.956825256850357, "learning_rate": 6.837611058954858e-06, "loss": 0.5337, "step": 4985 }, { "epoch": 0.4, "grad_norm": 1.3085637376696329, "learning_rate": 6.836406161208979e-06, "loss": 0.2358, "step": 4986 }, { "epoch": 0.4, "grad_norm": 1.3999991063350994, "learning_rate": 6.835201140179592e-06, "loss": 0.2018, "step": 4987 }, { "epoch": 0.4, "grad_norm": 1.248226332146478, "learning_rate": 6.83399599594759e-06, "loss": 0.1917, "step": 4988 }, { "epoch": 0.4, "grad_norm": 1.2962187144009307, "learning_rate": 6.83279072859388e-06, "loss": 0.1789, "step": 4989 }, { "epoch": 0.4, "grad_norm": 1.4934910549861211, "learning_rate": 6.831585338199375e-06, "loss": 0.1916, "step": 4990 }, { "epoch": 0.4, "grad_norm": 1.3457602355219391, "learning_rate": 6.830379824845e-06, "loss": 0.2226, "step": 4991 }, { "epoch": 0.4, "grad_norm": 1.3335046171770408, "learning_rate": 6.829174188611678e-06, "loss": 0.1897, "step": 4992 }, { "epoch": 0.4, "grad_norm": 1.57326488350582, "learning_rate": 6.8279684295803525e-06, "loss": 0.2287, "step": 4993 }, { "epoch": 0.4, "grad_norm": 1.2669226364021462, "learning_rate": 6.826762547831967e-06, "loss": 0.1818, "step": 4994 }, { "epoch": 0.4, "grad_norm": 1.4424108482404092, "learning_rate": 6.825556543447476e-06, "loss": 0.211, "step": 4995 }, { "epoch": 0.4, "grad_norm": 1.4019277274493673, "learning_rate": 6.8243504165078445e-06, "loss": 0.1915, "step": 4996 }, { "epoch": 0.4, "grad_norm": 1.2147315170196327, "learning_rate": 6.823144167094042e-06, "loss": 0.2079, "step": 4997 }, { "epoch": 0.4, "grad_norm": 1.2701717115152467, "learning_rate": 6.821937795287046e-06, "loss": 0.1675, "step": 4998 }, { "epoch": 0.4, "grad_norm": 1.4425369966089794, "learning_rate": 6.820731301167845e-06, "loss": 0.2093, "step": 4999 }, { "epoch": 0.4, "grad_norm": 1.4492908897367405, "learning_rate": 6.819524684817439e-06, "loss": 0.206, "step": 5000 }, { "epoch": 0.4, "grad_norm": 1.641069139104556, "learning_rate": 6.818317946316826e-06, "loss": 0.2191, "step": 5001 }, { "epoch": 0.4, "grad_norm": 1.523562717886117, "learning_rate": 6.817111085747018e-06, "loss": 0.2309, "step": 5002 }, { "epoch": 0.4, "grad_norm": 1.6006521753071992, "learning_rate": 6.8159041031890395e-06, "loss": 0.2745, "step": 5003 }, { "epoch": 0.4, "grad_norm": 1.3417446947327318, "learning_rate": 6.8146969987239155e-06, "loss": 0.1765, "step": 5004 }, { "epoch": 0.4, "grad_norm": 1.2989670579840462, "learning_rate": 6.8134897724326846e-06, "loss": 0.1845, "step": 5005 }, { "epoch": 0.4, "grad_norm": 1.4715732549671763, "learning_rate": 6.812282424396389e-06, "loss": 0.2044, "step": 5006 }, { "epoch": 0.4, "grad_norm": 1.6852456234673696, "learning_rate": 6.811074954696084e-06, "loss": 0.2536, "step": 5007 }, { "epoch": 0.4, "grad_norm": 1.3991171018594237, "learning_rate": 6.809867363412832e-06, "loss": 0.1974, "step": 5008 }, { "epoch": 0.4, "grad_norm": 1.2239878316189168, "learning_rate": 6.808659650627698e-06, "loss": 0.1806, "step": 5009 }, { "epoch": 0.4, "grad_norm": 1.2660882630922488, "learning_rate": 6.807451816421762e-06, "loss": 0.1848, "step": 5010 }, { "epoch": 0.4, "grad_norm": 1.4374387985078176, "learning_rate": 6.8062438608761095e-06, "loss": 0.196, "step": 5011 }, { "epoch": 0.4, "grad_norm": 1.3944339961623857, "learning_rate": 6.805035784071833e-06, "loss": 0.2308, "step": 5012 }, { "epoch": 0.4, "grad_norm": 1.411062033238887, "learning_rate": 6.803827586090036e-06, "loss": 0.2087, "step": 5013 }, { "epoch": 0.4, "grad_norm": 1.4193911557790597, "learning_rate": 6.802619267011828e-06, "loss": 0.2076, "step": 5014 }, { "epoch": 0.4, "grad_norm": 1.2961662675973553, "learning_rate": 6.801410826918327e-06, "loss": 0.208, "step": 5015 }, { "epoch": 0.4, "grad_norm": 1.2757708563696757, "learning_rate": 6.800202265890658e-06, "loss": 0.2044, "step": 5016 }, { "epoch": 0.4, "grad_norm": 1.4609176065044773, "learning_rate": 6.798993584009959e-06, "loss": 0.2118, "step": 5017 }, { "epoch": 0.4, "grad_norm": 1.468038699485295, "learning_rate": 6.797784781357368e-06, "loss": 0.1872, "step": 5018 }, { "epoch": 0.4, "grad_norm": 1.2988052843294424, "learning_rate": 6.796575858014036e-06, "loss": 0.2065, "step": 5019 }, { "epoch": 0.4, "grad_norm": 1.5636886441328282, "learning_rate": 6.7953668140611264e-06, "loss": 0.2272, "step": 5020 }, { "epoch": 0.4, "grad_norm": 6.806739700279936, "learning_rate": 6.794157649579801e-06, "loss": 0.5858, "step": 5021 }, { "epoch": 0.4, "grad_norm": 1.4620134666821374, "learning_rate": 6.792948364651237e-06, "loss": 0.2255, "step": 5022 }, { "epoch": 0.4, "grad_norm": 1.30615466809214, "learning_rate": 6.791738959356616e-06, "loss": 0.1975, "step": 5023 }, { "epoch": 0.4, "grad_norm": 1.5922264549549825, "learning_rate": 6.790529433777131e-06, "loss": 0.2006, "step": 5024 }, { "epoch": 0.4, "grad_norm": 1.372185829844158, "learning_rate": 6.78931978799398e-06, "loss": 0.1955, "step": 5025 }, { "epoch": 0.4, "grad_norm": 6.150212198115859, "learning_rate": 6.78811002208837e-06, "loss": 0.4465, "step": 5026 }, { "epoch": 0.4, "grad_norm": 1.6188269041012755, "learning_rate": 6.786900136141516e-06, "loss": 0.2191, "step": 5027 }, { "epoch": 0.4, "grad_norm": 1.4740464036444292, "learning_rate": 6.785690130234643e-06, "loss": 0.1739, "step": 5028 }, { "epoch": 0.4, "grad_norm": 1.1255519897147408, "learning_rate": 6.78448000444898e-06, "loss": 0.2072, "step": 5029 }, { "epoch": 0.4, "grad_norm": 1.3151225369620891, "learning_rate": 6.783269758865768e-06, "loss": 0.1863, "step": 5030 }, { "epoch": 0.4, "grad_norm": 1.428731589974488, "learning_rate": 6.782059393566254e-06, "loss": 0.2319, "step": 5031 }, { "epoch": 0.4, "grad_norm": 1.4195228746789723, "learning_rate": 6.780848908631694e-06, "loss": 0.2506, "step": 5032 }, { "epoch": 0.4, "grad_norm": 1.379849430240999, "learning_rate": 6.779638304143349e-06, "loss": 0.2652, "step": 5033 }, { "epoch": 0.4, "grad_norm": 1.2133125559139302, "learning_rate": 6.778427580182494e-06, "loss": 0.1768, "step": 5034 }, { "epoch": 0.4, "grad_norm": 1.437496250549501, "learning_rate": 6.777216736830409e-06, "loss": 0.2212, "step": 5035 }, { "epoch": 0.4, "grad_norm": 1.3274476220758487, "learning_rate": 6.7760057741683774e-06, "loss": 0.1995, "step": 5036 }, { "epoch": 0.4, "grad_norm": 1.3365914244953607, "learning_rate": 6.774794692277698e-06, "loss": 0.1629, "step": 5037 }, { "epoch": 0.4, "grad_norm": 1.373296646580278, "learning_rate": 6.773583491239672e-06, "loss": 0.2184, "step": 5038 }, { "epoch": 0.4, "grad_norm": 5.860886528026404, "learning_rate": 6.772372171135614e-06, "loss": 0.5923, "step": 5039 }, { "epoch": 0.4, "grad_norm": 1.3129858532952587, "learning_rate": 6.77116073204684e-06, "loss": 0.1918, "step": 5040 }, { "epoch": 0.4, "grad_norm": 6.748459346325971, "learning_rate": 6.769949174054682e-06, "loss": 0.6236, "step": 5041 }, { "epoch": 0.4, "grad_norm": 1.4051356650142013, "learning_rate": 6.768737497240472e-06, "loss": 0.2026, "step": 5042 }, { "epoch": 0.4, "grad_norm": 1.2825537650348895, "learning_rate": 6.767525701685555e-06, "loss": 0.173, "step": 5043 }, { "epoch": 0.4, "grad_norm": 1.3237141716077738, "learning_rate": 6.766313787471283e-06, "loss": 0.2073, "step": 5044 }, { "epoch": 0.4, "grad_norm": 1.2711946889863033, "learning_rate": 6.765101754679015e-06, "loss": 0.1483, "step": 5045 }, { "epoch": 0.4, "grad_norm": 3.72244253538842, "learning_rate": 6.7638896033901165e-06, "loss": 0.7456, "step": 5046 }, { "epoch": 0.4, "grad_norm": 1.2391592063988401, "learning_rate": 6.762677333685965e-06, "loss": 0.1533, "step": 5047 }, { "epoch": 0.4, "grad_norm": 1.444001177481423, "learning_rate": 6.761464945647944e-06, "loss": 0.233, "step": 5048 }, { "epoch": 0.4, "grad_norm": 1.3584264534273747, "learning_rate": 6.760252439357444e-06, "loss": 0.233, "step": 5049 }, { "epoch": 0.4, "grad_norm": 1.3073027697573225, "learning_rate": 6.7590398148958625e-06, "loss": 0.184, "step": 5050 }, { "epoch": 0.4, "grad_norm": 1.4308227549997163, "learning_rate": 6.757827072344612e-06, "loss": 0.179, "step": 5051 }, { "epoch": 0.4, "grad_norm": 1.2201289525801633, "learning_rate": 6.756614211785103e-06, "loss": 0.1669, "step": 5052 }, { "epoch": 0.4, "grad_norm": 6.930833694603219, "learning_rate": 6.755401233298758e-06, "loss": 0.6947, "step": 5053 }, { "epoch": 0.4, "grad_norm": 1.4120492897936616, "learning_rate": 6.754188136967011e-06, "loss": 0.2232, "step": 5054 }, { "epoch": 0.4, "grad_norm": 1.6083660082894295, "learning_rate": 6.7529749228712994e-06, "loss": 0.201, "step": 5055 }, { "epoch": 0.4, "grad_norm": 5.30257800508728, "learning_rate": 6.751761591093069e-06, "loss": 0.761, "step": 5056 }, { "epoch": 0.4, "grad_norm": 1.1149760061769185, "learning_rate": 6.750548141713775e-06, "loss": 0.1221, "step": 5057 }, { "epoch": 0.4, "grad_norm": 1.278570615557228, "learning_rate": 6.749334574814882e-06, "loss": 0.1432, "step": 5058 }, { "epoch": 0.4, "grad_norm": 1.3692256240549228, "learning_rate": 6.748120890477859e-06, "loss": 0.1613, "step": 5059 }, { "epoch": 0.4, "grad_norm": 1.4990085258271373, "learning_rate": 6.746907088784182e-06, "loss": 0.2077, "step": 5060 }, { "epoch": 0.4, "grad_norm": 1.1353209250819047, "learning_rate": 6.74569316981534e-06, "loss": 0.1389, "step": 5061 }, { "epoch": 0.4, "grad_norm": 1.4560256011956285, "learning_rate": 6.744479133652827e-06, "loss": 0.2174, "step": 5062 }, { "epoch": 0.41, "grad_norm": 5.659779778638779, "learning_rate": 6.743264980378143e-06, "loss": 0.748, "step": 5063 }, { "epoch": 0.41, "grad_norm": 1.367237654157424, "learning_rate": 6.7420507100727994e-06, "loss": 0.2146, "step": 5064 }, { "epoch": 0.41, "grad_norm": 1.1440305214043616, "learning_rate": 6.740836322818314e-06, "loss": 0.1362, "step": 5065 }, { "epoch": 0.41, "grad_norm": 1.2463456296363806, "learning_rate": 6.739621818696211e-06, "loss": 0.1552, "step": 5066 }, { "epoch": 0.41, "grad_norm": 1.2353888281433785, "learning_rate": 6.738407197788026e-06, "loss": 0.1788, "step": 5067 }, { "epoch": 0.41, "grad_norm": 1.264158487750602, "learning_rate": 6.737192460175297e-06, "loss": 0.1852, "step": 5068 }, { "epoch": 0.41, "grad_norm": 1.361099767610137, "learning_rate": 6.735977605939575e-06, "loss": 0.1891, "step": 5069 }, { "epoch": 0.41, "grad_norm": 1.4009815420599434, "learning_rate": 6.734762635162417e-06, "loss": 0.2236, "step": 5070 }, { "epoch": 0.41, "grad_norm": 1.4671571247351352, "learning_rate": 6.733547547925387e-06, "loss": 0.1999, "step": 5071 }, { "epoch": 0.41, "grad_norm": 1.366800866590126, "learning_rate": 6.732332344310058e-06, "loss": 0.2418, "step": 5072 }, { "epoch": 0.41, "grad_norm": 1.4245584301024892, "learning_rate": 6.731117024398009e-06, "loss": 0.2009, "step": 5073 }, { "epoch": 0.41, "grad_norm": 1.3185407405524865, "learning_rate": 6.729901588270829e-06, "loss": 0.2007, "step": 5074 }, { "epoch": 0.41, "grad_norm": 1.4423637331372412, "learning_rate": 6.728686036010115e-06, "loss": 0.2207, "step": 5075 }, { "epoch": 0.41, "grad_norm": 1.4494530150560185, "learning_rate": 6.727470367697468e-06, "loss": 0.1914, "step": 5076 }, { "epoch": 0.41, "grad_norm": 5.56831638037855, "learning_rate": 6.726254583414504e-06, "loss": 0.5959, "step": 5077 }, { "epoch": 0.41, "grad_norm": 1.3020232492479906, "learning_rate": 6.725038683242837e-06, "loss": 0.197, "step": 5078 }, { "epoch": 0.41, "grad_norm": 1.3582745578667978, "learning_rate": 6.723822667264098e-06, "loss": 0.2113, "step": 5079 }, { "epoch": 0.41, "grad_norm": 1.3703998711609113, "learning_rate": 6.7226065355599204e-06, "loss": 0.2328, "step": 5080 }, { "epoch": 0.41, "grad_norm": 1.4767342219225292, "learning_rate": 6.7213902882119455e-06, "loss": 0.2168, "step": 5081 }, { "epoch": 0.41, "grad_norm": 7.465831578868476, "learning_rate": 6.720173925301825e-06, "loss": 0.6009, "step": 5082 }, { "epoch": 0.41, "grad_norm": 1.4052115706334376, "learning_rate": 6.718957446911218e-06, "loss": 0.1891, "step": 5083 }, { "epoch": 0.41, "grad_norm": 1.1180965430031633, "learning_rate": 6.717740853121789e-06, "loss": 0.1785, "step": 5084 }, { "epoch": 0.41, "grad_norm": 1.2194147213173732, "learning_rate": 6.716524144015212e-06, "loss": 0.2058, "step": 5085 }, { "epoch": 0.41, "grad_norm": 9.138699990441886, "learning_rate": 6.7153073196731674e-06, "loss": 0.7238, "step": 5086 }, { "epoch": 0.41, "grad_norm": 1.4359326725586719, "learning_rate": 6.7140903801773484e-06, "loss": 0.1844, "step": 5087 }, { "epoch": 0.41, "grad_norm": 1.296712383438515, "learning_rate": 6.712873325609445e-06, "loss": 0.1975, "step": 5088 }, { "epoch": 0.41, "grad_norm": 1.2789756214718724, "learning_rate": 6.711656156051167e-06, "loss": 0.1821, "step": 5089 }, { "epoch": 0.41, "grad_norm": 1.3187854076684844, "learning_rate": 6.710438871584225e-06, "loss": 0.1869, "step": 5090 }, { "epoch": 0.41, "grad_norm": 1.2994498665757666, "learning_rate": 6.709221472290339e-06, "loss": 0.1725, "step": 5091 }, { "epoch": 0.41, "grad_norm": 1.360469534991078, "learning_rate": 6.708003958251237e-06, "loss": 0.2216, "step": 5092 }, { "epoch": 0.41, "grad_norm": 1.570249553278296, "learning_rate": 6.706786329548654e-06, "loss": 0.2211, "step": 5093 }, { "epoch": 0.41, "grad_norm": 1.4245777614144628, "learning_rate": 6.7055685862643336e-06, "loss": 0.1907, "step": 5094 }, { "epoch": 0.41, "grad_norm": 1.3735958873485237, "learning_rate": 6.704350728480026e-06, "loss": 0.2185, "step": 5095 }, { "epoch": 0.41, "grad_norm": 1.2785229780748044, "learning_rate": 6.7031327562774914e-06, "loss": 0.1917, "step": 5096 }, { "epoch": 0.41, "grad_norm": 1.278140072653446, "learning_rate": 6.701914669738494e-06, "loss": 0.2125, "step": 5097 }, { "epoch": 0.41, "grad_norm": 1.4434194683931856, "learning_rate": 6.700696468944806e-06, "loss": 0.2224, "step": 5098 }, { "epoch": 0.41, "grad_norm": 1.4729977219383583, "learning_rate": 6.699478153978214e-06, "loss": 0.2232, "step": 5099 }, { "epoch": 0.41, "grad_norm": 1.4187648921743439, "learning_rate": 6.698259724920503e-06, "loss": 0.2216, "step": 5100 }, { "epoch": 0.41, "grad_norm": 1.2021595713911357, "learning_rate": 6.697041181853472e-06, "loss": 0.1766, "step": 5101 }, { "epoch": 0.41, "grad_norm": 1.3605060827988127, "learning_rate": 6.695822524858922e-06, "loss": 0.1979, "step": 5102 }, { "epoch": 0.41, "grad_norm": 1.2135922307564213, "learning_rate": 6.69460375401867e-06, "loss": 0.1598, "step": 5103 }, { "epoch": 0.41, "grad_norm": 1.6471258015363548, "learning_rate": 6.693384869414534e-06, "loss": 0.2166, "step": 5104 }, { "epoch": 0.41, "grad_norm": 1.1291084871068817, "learning_rate": 6.69216587112834e-06, "loss": 0.1772, "step": 5105 }, { "epoch": 0.41, "grad_norm": 1.3507924587822886, "learning_rate": 6.690946759241925e-06, "loss": 0.1867, "step": 5106 }, { "epoch": 0.41, "grad_norm": 1.5264977503049746, "learning_rate": 6.689727533837129e-06, "loss": 0.1761, "step": 5107 }, { "epoch": 0.41, "grad_norm": 1.3464513142596253, "learning_rate": 6.688508194995806e-06, "loss": 0.1913, "step": 5108 }, { "epoch": 0.41, "grad_norm": 1.2005929596028337, "learning_rate": 6.68728874279981e-06, "loss": 0.2091, "step": 5109 }, { "epoch": 0.41, "grad_norm": 1.2836211981695371, "learning_rate": 6.686069177331009e-06, "loss": 0.2149, "step": 5110 }, { "epoch": 0.41, "grad_norm": 11.985673162085996, "learning_rate": 6.684849498671277e-06, "loss": 0.7009, "step": 5111 }, { "epoch": 0.41, "grad_norm": 1.3683650079021368, "learning_rate": 6.683629706902491e-06, "loss": 0.2422, "step": 5112 }, { "epoch": 0.41, "grad_norm": 1.2387814503990704, "learning_rate": 6.682409802106543e-06, "loss": 0.18, "step": 5113 }, { "epoch": 0.41, "grad_norm": 1.4213571056989656, "learning_rate": 6.681189784365327e-06, "loss": 0.1751, "step": 5114 }, { "epoch": 0.41, "grad_norm": 1.337220979241151, "learning_rate": 6.679969653760747e-06, "loss": 0.2044, "step": 5115 }, { "epoch": 0.41, "grad_norm": 1.3924072353410815, "learning_rate": 6.678749410374714e-06, "loss": 0.2325, "step": 5116 }, { "epoch": 0.41, "grad_norm": 1.2504738047540074, "learning_rate": 6.677529054289147e-06, "loss": 0.1764, "step": 5117 }, { "epoch": 0.41, "grad_norm": 5.2249976894632075, "learning_rate": 6.676308585585971e-06, "loss": 0.4245, "step": 5118 }, { "epoch": 0.41, "grad_norm": 1.5792309978259225, "learning_rate": 6.675088004347121e-06, "loss": 0.1946, "step": 5119 }, { "epoch": 0.41, "grad_norm": 1.401352906677056, "learning_rate": 6.673867310654538e-06, "loss": 0.1719, "step": 5120 }, { "epoch": 0.41, "grad_norm": 1.4070388027380107, "learning_rate": 6.672646504590172e-06, "loss": 0.212, "step": 5121 }, { "epoch": 0.41, "grad_norm": 1.510194873582569, "learning_rate": 6.671425586235978e-06, "loss": 0.258, "step": 5122 }, { "epoch": 0.41, "grad_norm": 1.4186702115404224, "learning_rate": 6.6702045556739195e-06, "loss": 0.206, "step": 5123 }, { "epoch": 0.41, "grad_norm": 1.3454907528411397, "learning_rate": 6.668983412985968e-06, "loss": 0.2013, "step": 5124 }, { "epoch": 0.41, "grad_norm": 1.409654812960609, "learning_rate": 6.667762158254104e-06, "loss": 0.1984, "step": 5125 }, { "epoch": 0.41, "grad_norm": 1.3092907006306889, "learning_rate": 6.666540791560312e-06, "loss": 0.2202, "step": 5126 }, { "epoch": 0.41, "grad_norm": 1.3680391626889064, "learning_rate": 6.665319312986589e-06, "loss": 0.2169, "step": 5127 }, { "epoch": 0.41, "grad_norm": 8.303102769323935, "learning_rate": 6.664097722614934e-06, "loss": 0.5969, "step": 5128 }, { "epoch": 0.41, "grad_norm": 1.3862753603771096, "learning_rate": 6.662876020527357e-06, "loss": 0.1817, "step": 5129 }, { "epoch": 0.41, "grad_norm": 1.2823484484426337, "learning_rate": 6.661654206805874e-06, "loss": 0.2128, "step": 5130 }, { "epoch": 0.41, "grad_norm": 1.4896645180184762, "learning_rate": 6.6604322815325105e-06, "loss": 0.169, "step": 5131 }, { "epoch": 0.41, "grad_norm": 1.4577324897450734, "learning_rate": 6.659210244789296e-06, "loss": 0.2058, "step": 5132 }, { "epoch": 0.41, "grad_norm": 1.2797823273685967, "learning_rate": 6.65798809665827e-06, "loss": 0.1528, "step": 5133 }, { "epoch": 0.41, "grad_norm": 1.4565319664506642, "learning_rate": 6.656765837221481e-06, "loss": 0.2789, "step": 5134 }, { "epoch": 0.41, "grad_norm": 1.4823194905770938, "learning_rate": 6.6555434665609806e-06, "loss": 0.1922, "step": 5135 }, { "epoch": 0.41, "grad_norm": 7.217833003250719, "learning_rate": 6.654320984758832e-06, "loss": 0.5128, "step": 5136 }, { "epoch": 0.41, "grad_norm": 1.3322279771206507, "learning_rate": 6.653098391897102e-06, "loss": 0.2068, "step": 5137 }, { "epoch": 0.41, "grad_norm": 1.3271212350446058, "learning_rate": 6.65187568805787e-06, "loss": 0.2158, "step": 5138 }, { "epoch": 0.41, "grad_norm": 1.4117990795235238, "learning_rate": 6.650652873323218e-06, "loss": 0.1928, "step": 5139 }, { "epoch": 0.41, "grad_norm": 6.380734059257349, "learning_rate": 6.6494299477752364e-06, "loss": 0.6327, "step": 5140 }, { "epoch": 0.41, "grad_norm": 1.306915458809025, "learning_rate": 6.6482069114960245e-06, "loss": 0.2187, "step": 5141 }, { "epoch": 0.41, "grad_norm": 1.408906183130293, "learning_rate": 6.646983764567689e-06, "loss": 0.1944, "step": 5142 }, { "epoch": 0.41, "grad_norm": 1.36877857293987, "learning_rate": 6.645760507072343e-06, "loss": 0.1788, "step": 5143 }, { "epoch": 0.41, "grad_norm": 1.538568546982342, "learning_rate": 6.644537139092109e-06, "loss": 0.2329, "step": 5144 }, { "epoch": 0.41, "grad_norm": 1.414009116440467, "learning_rate": 6.643313660709114e-06, "loss": 0.1987, "step": 5145 }, { "epoch": 0.41, "grad_norm": 1.3831981680385181, "learning_rate": 6.642090072005493e-06, "loss": 0.2357, "step": 5146 }, { "epoch": 0.41, "grad_norm": 1.3173789491870171, "learning_rate": 6.6408663730633895e-06, "loss": 0.1492, "step": 5147 }, { "epoch": 0.41, "grad_norm": 1.2529265130886371, "learning_rate": 6.639642563964956e-06, "loss": 0.1347, "step": 5148 }, { "epoch": 0.41, "grad_norm": 1.390799814663467, "learning_rate": 6.63841864479235e-06, "loss": 0.1955, "step": 5149 }, { "epoch": 0.41, "grad_norm": 1.3969541313429892, "learning_rate": 6.637194615627733e-06, "loss": 0.2217, "step": 5150 }, { "epoch": 0.41, "grad_norm": 4.777349784117009, "learning_rate": 6.635970476553284e-06, "loss": 0.6616, "step": 5151 }, { "epoch": 0.41, "grad_norm": 1.2839660546476244, "learning_rate": 6.63474622765118e-06, "loss": 0.2314, "step": 5152 }, { "epoch": 0.41, "grad_norm": 1.2011964471013474, "learning_rate": 6.633521869003607e-06, "loss": 0.1615, "step": 5153 }, { "epoch": 0.41, "grad_norm": 1.375710903732313, "learning_rate": 6.632297400692762e-06, "loss": 0.2183, "step": 5154 }, { "epoch": 0.41, "grad_norm": 1.5528831367273181, "learning_rate": 6.631072822800847e-06, "loss": 0.2024, "step": 5155 }, { "epoch": 0.41, "grad_norm": 1.2150661916156171, "learning_rate": 6.629848135410072e-06, "loss": 0.1668, "step": 5156 }, { "epoch": 0.41, "grad_norm": 1.2386011572181472, "learning_rate": 6.628623338602653e-06, "loss": 0.1629, "step": 5157 }, { "epoch": 0.41, "grad_norm": 1.6045531022216672, "learning_rate": 6.627398432460815e-06, "loss": 0.2398, "step": 5158 }, { "epoch": 0.41, "grad_norm": 4.5666984658424905, "learning_rate": 6.626173417066789e-06, "loss": 0.6172, "step": 5159 }, { "epoch": 0.41, "grad_norm": 1.445643071293786, "learning_rate": 6.624948292502814e-06, "loss": 0.2147, "step": 5160 }, { "epoch": 0.41, "grad_norm": 1.4017097066083113, "learning_rate": 6.623723058851137e-06, "loss": 0.2201, "step": 5161 }, { "epoch": 0.41, "grad_norm": 1.1477163864114104, "learning_rate": 6.6224977161940115e-06, "loss": 0.1419, "step": 5162 }, { "epoch": 0.41, "grad_norm": 1.284885404375715, "learning_rate": 6.6212722646137e-06, "loss": 0.2063, "step": 5163 }, { "epoch": 0.41, "grad_norm": 1.3652835683167315, "learning_rate": 6.620046704192466e-06, "loss": 0.2406, "step": 5164 }, { "epoch": 0.41, "grad_norm": 5.513823307266773, "learning_rate": 6.618821035012591e-06, "loss": 0.6655, "step": 5165 }, { "epoch": 0.41, "grad_norm": 1.306438313565492, "learning_rate": 6.617595257156355e-06, "loss": 0.1788, "step": 5166 }, { "epoch": 0.41, "grad_norm": 1.4229374764507086, "learning_rate": 6.616369370706046e-06, "loss": 0.2123, "step": 5167 }, { "epoch": 0.41, "grad_norm": 1.6782941381860743, "learning_rate": 6.615143375743965e-06, "loss": 0.2354, "step": 5168 }, { "epoch": 0.41, "grad_norm": 1.3772041300385436, "learning_rate": 6.613917272352416e-06, "loss": 0.1655, "step": 5169 }, { "epoch": 0.41, "grad_norm": 1.3191228760552132, "learning_rate": 6.61269106061371e-06, "loss": 0.224, "step": 5170 }, { "epoch": 0.41, "grad_norm": 1.365391664482848, "learning_rate": 6.611464740610165e-06, "loss": 0.206, "step": 5171 }, { "epoch": 0.41, "grad_norm": 1.4188461664109568, "learning_rate": 6.610238312424112e-06, "loss": 0.2089, "step": 5172 }, { "epoch": 0.41, "grad_norm": 1.2845131248319621, "learning_rate": 6.609011776137881e-06, "loss": 0.2282, "step": 5173 }, { "epoch": 0.41, "grad_norm": 1.501063172050814, "learning_rate": 6.607785131833816e-06, "loss": 0.245, "step": 5174 }, { "epoch": 0.41, "grad_norm": 1.181558809433394, "learning_rate": 6.6065583795942625e-06, "loss": 0.2312, "step": 5175 }, { "epoch": 0.41, "grad_norm": 1.304446208118464, "learning_rate": 6.605331519501578e-06, "loss": 0.2161, "step": 5176 }, { "epoch": 0.41, "grad_norm": 1.2041310300143628, "learning_rate": 6.604104551638124e-06, "loss": 0.1983, "step": 5177 }, { "epoch": 0.41, "grad_norm": 4.189824072740901, "learning_rate": 6.60287747608627e-06, "loss": 0.3953, "step": 5178 }, { "epoch": 0.41, "grad_norm": 1.3396686699741533, "learning_rate": 6.601650292928395e-06, "loss": 0.2344, "step": 5179 }, { "epoch": 0.41, "grad_norm": 1.3259471897185762, "learning_rate": 6.600423002246885e-06, "loss": 0.1865, "step": 5180 }, { "epoch": 0.41, "grad_norm": 1.122777008167117, "learning_rate": 6.599195604124127e-06, "loss": 0.1651, "step": 5181 }, { "epoch": 0.41, "grad_norm": 1.6060983910820983, "learning_rate": 6.597968098642524e-06, "loss": 0.1945, "step": 5182 }, { "epoch": 0.41, "grad_norm": 1.3248521311946972, "learning_rate": 6.596740485884483e-06, "loss": 0.1862, "step": 5183 }, { "epoch": 0.41, "grad_norm": 1.2266571024389004, "learning_rate": 6.595512765932412e-06, "loss": 0.2094, "step": 5184 }, { "epoch": 0.41, "grad_norm": 1.198211484396834, "learning_rate": 6.594284938868737e-06, "loss": 0.1677, "step": 5185 }, { "epoch": 0.41, "grad_norm": 1.259374019543164, "learning_rate": 6.593057004775882e-06, "loss": 0.2022, "step": 5186 }, { "epoch": 0.41, "grad_norm": 1.4254052176614622, "learning_rate": 6.591828963736285e-06, "loss": 0.2259, "step": 5187 }, { "epoch": 0.42, "grad_norm": 1.3335011203915845, "learning_rate": 6.590600815832385e-06, "loss": 0.2365, "step": 5188 }, { "epoch": 0.42, "grad_norm": 7.113398830976923, "learning_rate": 6.589372561146634e-06, "loss": 0.5034, "step": 5189 }, { "epoch": 0.42, "grad_norm": 8.465590285250945, "learning_rate": 6.588144199761487e-06, "loss": 0.6081, "step": 5190 }, { "epoch": 0.42, "grad_norm": 1.2444079727787378, "learning_rate": 6.586915731759409e-06, "loss": 0.192, "step": 5191 }, { "epoch": 0.42, "grad_norm": 1.5219131647599278, "learning_rate": 6.585687157222869e-06, "loss": 0.2074, "step": 5192 }, { "epoch": 0.42, "grad_norm": 4.235322431467677, "learning_rate": 6.584458476234345e-06, "loss": 0.4622, "step": 5193 }, { "epoch": 0.42, "grad_norm": 1.4100632439150653, "learning_rate": 6.583229688876323e-06, "loss": 0.1898, "step": 5194 }, { "epoch": 0.42, "grad_norm": 4.833682386365358, "learning_rate": 6.582000795231296e-06, "loss": 0.6271, "step": 5195 }, { "epoch": 0.42, "grad_norm": 1.279325582236379, "learning_rate": 6.58077179538176e-06, "loss": 0.1938, "step": 5196 }, { "epoch": 0.42, "grad_norm": 1.137727543879439, "learning_rate": 6.579542689410227e-06, "loss": 0.1634, "step": 5197 }, { "epoch": 0.42, "grad_norm": 1.2817231274525154, "learning_rate": 6.578313477399206e-06, "loss": 0.1798, "step": 5198 }, { "epoch": 0.42, "grad_norm": 1.4140677225061073, "learning_rate": 6.577084159431218e-06, "loss": 0.2084, "step": 5199 }, { "epoch": 0.42, "grad_norm": 1.2156221993435592, "learning_rate": 6.5758547355887944e-06, "loss": 0.1945, "step": 5200 }, { "epoch": 0.42, "grad_norm": 1.3106859794344599, "learning_rate": 6.574625205954466e-06, "loss": 0.1553, "step": 5201 }, { "epoch": 0.42, "grad_norm": 1.350128581282678, "learning_rate": 6.573395570610776e-06, "loss": 0.1935, "step": 5202 }, { "epoch": 0.42, "grad_norm": 4.077204401761722, "learning_rate": 6.5721658296402756e-06, "loss": 0.3873, "step": 5203 }, { "epoch": 0.42, "grad_norm": 1.4068098275491667, "learning_rate": 6.570935983125519e-06, "loss": 0.2128, "step": 5204 }, { "epoch": 0.42, "grad_norm": 1.1937252631217878, "learning_rate": 6.5697060311490705e-06, "loss": 0.1877, "step": 5205 }, { "epoch": 0.42, "grad_norm": 1.3607506996251284, "learning_rate": 6.5684759737935e-06, "loss": 0.2213, "step": 5206 }, { "epoch": 0.42, "grad_norm": 1.188717692893149, "learning_rate": 6.567245811141385e-06, "loss": 0.1705, "step": 5207 }, { "epoch": 0.42, "grad_norm": 1.254447698085806, "learning_rate": 6.56601554327531e-06, "loss": 0.1866, "step": 5208 }, { "epoch": 0.42, "grad_norm": 1.3673909423473616, "learning_rate": 6.564785170277868e-06, "loss": 0.2116, "step": 5209 }, { "epoch": 0.42, "grad_norm": 1.8301995695810152, "learning_rate": 6.563554692231655e-06, "loss": 0.2007, "step": 5210 }, { "epoch": 0.42, "grad_norm": 1.3866670169513238, "learning_rate": 6.562324109219278e-06, "loss": 0.2417, "step": 5211 }, { "epoch": 0.42, "grad_norm": 1.4456057399847053, "learning_rate": 6.56109342132335e-06, "loss": 0.2426, "step": 5212 }, { "epoch": 0.42, "grad_norm": 1.280624360632079, "learning_rate": 6.559862628626491e-06, "loss": 0.2202, "step": 5213 }, { "epoch": 0.42, "grad_norm": 1.1813613310739748, "learning_rate": 6.5586317312113265e-06, "loss": 0.159, "step": 5214 }, { "epoch": 0.42, "grad_norm": 1.277997990418586, "learning_rate": 6.557400729160494e-06, "loss": 0.207, "step": 5215 }, { "epoch": 0.42, "grad_norm": 1.2871234247354348, "learning_rate": 6.556169622556629e-06, "loss": 0.1941, "step": 5216 }, { "epoch": 0.42, "grad_norm": 1.5459302285212213, "learning_rate": 6.554938411482383e-06, "loss": 0.2348, "step": 5217 }, { "epoch": 0.42, "grad_norm": 1.4305835030541343, "learning_rate": 6.553707096020412e-06, "loss": 0.2282, "step": 5218 }, { "epoch": 0.42, "grad_norm": 1.3662932242711432, "learning_rate": 6.552475676253374e-06, "loss": 0.1957, "step": 5219 }, { "epoch": 0.42, "grad_norm": 1.3056266276748318, "learning_rate": 6.5512441522639415e-06, "loss": 0.1935, "step": 5220 }, { "epoch": 0.42, "grad_norm": 1.3217043575378415, "learning_rate": 6.550012524134788e-06, "loss": 0.1814, "step": 5221 }, { "epoch": 0.42, "grad_norm": 1.3004994485496875, "learning_rate": 6.548780791948597e-06, "loss": 0.1876, "step": 5222 }, { "epoch": 0.42, "grad_norm": 1.2596767006464704, "learning_rate": 6.547548955788059e-06, "loss": 0.1866, "step": 5223 }, { "epoch": 0.42, "grad_norm": 1.2231860834910764, "learning_rate": 6.5463170157358725e-06, "loss": 0.1597, "step": 5224 }, { "epoch": 0.42, "grad_norm": 1.3802075244113663, "learning_rate": 6.545084971874738e-06, "loss": 0.2276, "step": 5225 }, { "epoch": 0.42, "grad_norm": 5.539198423189714, "learning_rate": 6.543852824287369e-06, "loss": 0.615, "step": 5226 }, { "epoch": 0.42, "grad_norm": 1.169818901508934, "learning_rate": 6.542620573056481e-06, "loss": 0.18, "step": 5227 }, { "epoch": 0.42, "grad_norm": 1.4855886860484384, "learning_rate": 6.5413882182648e-06, "loss": 0.207, "step": 5228 }, { "epoch": 0.42, "grad_norm": 1.2826828077468595, "learning_rate": 6.540155759995057e-06, "loss": 0.1702, "step": 5229 }, { "epoch": 0.42, "grad_norm": 1.3039588389191847, "learning_rate": 6.538923198329993e-06, "loss": 0.2288, "step": 5230 }, { "epoch": 0.42, "grad_norm": 1.5547514510576375, "learning_rate": 6.5376905333523525e-06, "loss": 0.2192, "step": 5231 }, { "epoch": 0.42, "grad_norm": 7.629258780261147, "learning_rate": 6.536457765144886e-06, "loss": 0.3663, "step": 5232 }, { "epoch": 0.42, "grad_norm": 1.2238586296618559, "learning_rate": 6.535224893790354e-06, "loss": 0.1895, "step": 5233 }, { "epoch": 0.42, "grad_norm": 1.4729332120182608, "learning_rate": 6.533991919371524e-06, "loss": 0.2168, "step": 5234 }, { "epoch": 0.42, "grad_norm": 1.3246046912714695, "learning_rate": 6.5327588419711695e-06, "loss": 0.1917, "step": 5235 }, { "epoch": 0.42, "grad_norm": 1.5144653474749497, "learning_rate": 6.531525661672069e-06, "loss": 0.228, "step": 5236 }, { "epoch": 0.42, "grad_norm": 5.506070311080983, "learning_rate": 6.530292378557011e-06, "loss": 0.5919, "step": 5237 }, { "epoch": 0.42, "grad_norm": 1.39170617558987, "learning_rate": 6.529058992708788e-06, "loss": 0.206, "step": 5238 }, { "epoch": 0.42, "grad_norm": 1.3966972669628692, "learning_rate": 6.527825504210204e-06, "loss": 0.242, "step": 5239 }, { "epoch": 0.42, "grad_norm": 1.4078554800074747, "learning_rate": 6.526591913144062e-06, "loss": 0.1846, "step": 5240 }, { "epoch": 0.42, "grad_norm": 5.387431627423938, "learning_rate": 6.525358219593181e-06, "loss": 0.68, "step": 5241 }, { "epoch": 0.42, "grad_norm": 1.3283502240473737, "learning_rate": 6.524124423640381e-06, "loss": 0.1948, "step": 5242 }, { "epoch": 0.42, "grad_norm": 1.445215637872364, "learning_rate": 6.522890525368489e-06, "loss": 0.181, "step": 5243 }, { "epoch": 0.42, "grad_norm": 1.4414330232891739, "learning_rate": 6.521656524860345e-06, "loss": 0.2355, "step": 5244 }, { "epoch": 0.42, "grad_norm": 1.4504433642224108, "learning_rate": 6.5204224221987864e-06, "loss": 0.2079, "step": 5245 }, { "epoch": 0.42, "grad_norm": 1.6577254816797182, "learning_rate": 6.519188217466664e-06, "loss": 0.208, "step": 5246 }, { "epoch": 0.42, "grad_norm": 1.2590556838192246, "learning_rate": 6.517953910746832e-06, "loss": 0.1799, "step": 5247 }, { "epoch": 0.42, "grad_norm": 1.4374067504641521, "learning_rate": 6.516719502122158e-06, "loss": 0.2463, "step": 5248 }, { "epoch": 0.42, "grad_norm": 1.2851206548499534, "learning_rate": 6.515484991675506e-06, "loss": 0.2017, "step": 5249 }, { "epoch": 0.42, "grad_norm": 1.3290136415083715, "learning_rate": 6.514250379489754e-06, "loss": 0.2067, "step": 5250 }, { "epoch": 0.42, "grad_norm": 1.4519870786963927, "learning_rate": 6.513015665647787e-06, "loss": 0.2088, "step": 5251 }, { "epoch": 0.42, "grad_norm": 1.2461168106654088, "learning_rate": 6.511780850232495e-06, "loss": 0.1601, "step": 5252 }, { "epoch": 0.42, "grad_norm": 6.030239900414695, "learning_rate": 6.510545933326774e-06, "loss": 0.7233, "step": 5253 }, { "epoch": 0.42, "grad_norm": 1.383948713720009, "learning_rate": 6.509310915013527e-06, "loss": 0.2127, "step": 5254 }, { "epoch": 0.42, "grad_norm": 1.254816776586974, "learning_rate": 6.508075795375666e-06, "loss": 0.1672, "step": 5255 }, { "epoch": 0.42, "grad_norm": 1.4958917702463408, "learning_rate": 6.506840574496107e-06, "loss": 0.204, "step": 5256 }, { "epoch": 0.42, "grad_norm": 1.2593127748738593, "learning_rate": 6.505605252457774e-06, "loss": 0.1784, "step": 5257 }, { "epoch": 0.42, "grad_norm": 1.1576071579002136, "learning_rate": 6.504369829343599e-06, "loss": 0.184, "step": 5258 }, { "epoch": 0.42, "grad_norm": 4.31941450612502, "learning_rate": 6.50313430523652e-06, "loss": 0.5735, "step": 5259 }, { "epoch": 0.42, "grad_norm": 1.3870594752053418, "learning_rate": 6.5018986802194805e-06, "loss": 0.2062, "step": 5260 }, { "epoch": 0.42, "grad_norm": 1.3546006283955012, "learning_rate": 6.500662954375432e-06, "loss": 0.1911, "step": 5261 }, { "epoch": 0.42, "grad_norm": 1.263165480690272, "learning_rate": 6.499427127787332e-06, "loss": 0.1839, "step": 5262 }, { "epoch": 0.42, "grad_norm": 1.4917809090089957, "learning_rate": 6.498191200538147e-06, "loss": 0.1859, "step": 5263 }, { "epoch": 0.42, "grad_norm": 1.3084704236254123, "learning_rate": 6.496955172710846e-06, "loss": 0.16, "step": 5264 }, { "epoch": 0.42, "grad_norm": 1.3970049996427865, "learning_rate": 6.495719044388409e-06, "loss": 0.2243, "step": 5265 }, { "epoch": 0.42, "grad_norm": 1.592255641479745, "learning_rate": 6.494482815653822e-06, "loss": 0.2664, "step": 5266 }, { "epoch": 0.42, "grad_norm": 1.272641813071551, "learning_rate": 6.493246486590074e-06, "loss": 0.1932, "step": 5267 }, { "epoch": 0.42, "grad_norm": 1.2638436118613734, "learning_rate": 6.492010057280165e-06, "loss": 0.2076, "step": 5268 }, { "epoch": 0.42, "grad_norm": 1.3169964189654806, "learning_rate": 6.4907735278071e-06, "loss": 0.201, "step": 5269 }, { "epoch": 0.42, "grad_norm": 6.254891763412214, "learning_rate": 6.489536898253893e-06, "loss": 0.8521, "step": 5270 }, { "epoch": 0.42, "grad_norm": 1.433486420115116, "learning_rate": 6.48830016870356e-06, "loss": 0.2136, "step": 5271 }, { "epoch": 0.42, "grad_norm": 1.2690419522221899, "learning_rate": 6.487063339239127e-06, "loss": 0.1677, "step": 5272 }, { "epoch": 0.42, "grad_norm": 1.3383836535511908, "learning_rate": 6.485826409943627e-06, "loss": 0.2125, "step": 5273 }, { "epoch": 0.42, "grad_norm": 1.1445528865312937, "learning_rate": 6.484589380900097e-06, "loss": 0.166, "step": 5274 }, { "epoch": 0.42, "grad_norm": 1.3504873031467828, "learning_rate": 6.483352252191585e-06, "loss": 0.2071, "step": 5275 }, { "epoch": 0.42, "grad_norm": 5.395004743627818, "learning_rate": 6.482115023901141e-06, "loss": 0.7169, "step": 5276 }, { "epoch": 0.42, "grad_norm": 1.3680353562662346, "learning_rate": 6.480877696111826e-06, "loss": 0.1637, "step": 5277 }, { "epoch": 0.42, "grad_norm": 6.933531513868063, "learning_rate": 6.479640268906703e-06, "loss": 0.6104, "step": 5278 }, { "epoch": 0.42, "grad_norm": 1.3290806034404452, "learning_rate": 6.478402742368847e-06, "loss": 0.205, "step": 5279 }, { "epoch": 0.42, "grad_norm": 1.4015827887361438, "learning_rate": 6.4771651165813345e-06, "loss": 0.2069, "step": 5280 }, { "epoch": 0.42, "grad_norm": 1.533216254258825, "learning_rate": 6.4759273916272525e-06, "loss": 0.188, "step": 5281 }, { "epoch": 0.42, "grad_norm": 1.1712744974487044, "learning_rate": 6.4746895675896925e-06, "loss": 0.1824, "step": 5282 }, { "epoch": 0.42, "grad_norm": 1.2743347951300044, "learning_rate": 6.473451644551753e-06, "loss": 0.1988, "step": 5283 }, { "epoch": 0.42, "grad_norm": 1.3679096934570472, "learning_rate": 6.472213622596542e-06, "loss": 0.2122, "step": 5284 }, { "epoch": 0.42, "grad_norm": 1.476808405349239, "learning_rate": 6.4709755018071685e-06, "loss": 0.2162, "step": 5285 }, { "epoch": 0.42, "grad_norm": 1.5483900357120055, "learning_rate": 6.469737282266752e-06, "loss": 0.2653, "step": 5286 }, { "epoch": 0.42, "grad_norm": 1.350550264475312, "learning_rate": 6.468498964058421e-06, "loss": 0.2052, "step": 5287 }, { "epoch": 0.42, "grad_norm": 1.3848710410914897, "learning_rate": 6.4672605472653035e-06, "loss": 0.188, "step": 5288 }, { "epoch": 0.42, "grad_norm": 1.3128448240890085, "learning_rate": 6.466022031970541e-06, "loss": 0.1604, "step": 5289 }, { "epoch": 0.42, "grad_norm": 1.313543420913374, "learning_rate": 6.464783418257278e-06, "loss": 0.2222, "step": 5290 }, { "epoch": 0.42, "grad_norm": 1.3533663807416145, "learning_rate": 6.463544706208663e-06, "loss": 0.2401, "step": 5291 }, { "epoch": 0.42, "grad_norm": 1.4547640740483785, "learning_rate": 6.46230589590786e-06, "loss": 0.1783, "step": 5292 }, { "epoch": 0.42, "grad_norm": 1.3873148679714888, "learning_rate": 6.461066987438032e-06, "loss": 0.2056, "step": 5293 }, { "epoch": 0.42, "grad_norm": 1.3308554909654229, "learning_rate": 6.45982798088235e-06, "loss": 0.2225, "step": 5294 }, { "epoch": 0.42, "grad_norm": 1.161112617452865, "learning_rate": 6.45858887632399e-06, "loss": 0.2152, "step": 5295 }, { "epoch": 0.42, "grad_norm": 1.3387467898282273, "learning_rate": 6.457349673846143e-06, "loss": 0.1878, "step": 5296 }, { "epoch": 0.42, "grad_norm": 1.3113179596051425, "learning_rate": 6.4561103735319944e-06, "loss": 0.2051, "step": 5297 }, { "epoch": 0.42, "grad_norm": 1.4441212590945582, "learning_rate": 6.454870975464744e-06, "loss": 0.2193, "step": 5298 }, { "epoch": 0.42, "grad_norm": 5.443603744365554, "learning_rate": 6.453631479727599e-06, "loss": 0.6446, "step": 5299 }, { "epoch": 0.42, "grad_norm": 1.3066552009333854, "learning_rate": 6.452391886403767e-06, "loss": 0.1882, "step": 5300 }, { "epoch": 0.42, "grad_norm": 1.215473113162278, "learning_rate": 6.451152195576469e-06, "loss": 0.1817, "step": 5301 }, { "epoch": 0.42, "grad_norm": 1.5823306336168448, "learning_rate": 6.4499124073289246e-06, "loss": 0.2329, "step": 5302 }, { "epoch": 0.42, "grad_norm": 1.2454149933789684, "learning_rate": 6.448672521744369e-06, "loss": 0.2023, "step": 5303 }, { "epoch": 0.42, "grad_norm": 1.1938761025019873, "learning_rate": 6.447432538906038e-06, "loss": 0.184, "step": 5304 }, { "epoch": 0.42, "grad_norm": 1.293354731936287, "learning_rate": 6.446192458897174e-06, "loss": 0.2189, "step": 5305 }, { "epoch": 0.42, "grad_norm": 1.2467637583450004, "learning_rate": 6.444952281801029e-06, "loss": 0.1986, "step": 5306 }, { "epoch": 0.42, "grad_norm": 1.3378535854635816, "learning_rate": 6.4437120077008595e-06, "loss": 0.1927, "step": 5307 }, { "epoch": 0.42, "grad_norm": 1.392758857925226, "learning_rate": 6.4424716366799275e-06, "loss": 0.2207, "step": 5308 }, { "epoch": 0.42, "grad_norm": 1.2988957758820754, "learning_rate": 6.441231168821505e-06, "loss": 0.1931, "step": 5309 }, { "epoch": 0.42, "grad_norm": 1.3904771883192915, "learning_rate": 6.439990604208868e-06, "loss": 0.2337, "step": 5310 }, { "epoch": 0.42, "grad_norm": 6.424339908342641, "learning_rate": 6.438749942925298e-06, "loss": 0.8492, "step": 5311 }, { "epoch": 0.42, "grad_norm": 1.1462883964418502, "learning_rate": 6.4375091850540834e-06, "loss": 0.1823, "step": 5312 }, { "epoch": 0.43, "grad_norm": 1.3111800378903464, "learning_rate": 6.436268330678523e-06, "loss": 0.1872, "step": 5313 }, { "epoch": 0.43, "grad_norm": 1.3095309408689453, "learning_rate": 6.435027379881921e-06, "loss": 0.1554, "step": 5314 }, { "epoch": 0.43, "grad_norm": 1.3144509835966167, "learning_rate": 6.433786332747578e-06, "loss": 0.1808, "step": 5315 }, { "epoch": 0.43, "grad_norm": 1.3222440176458186, "learning_rate": 6.432545189358818e-06, "loss": 0.203, "step": 5316 }, { "epoch": 0.43, "grad_norm": 1.6903296094257299, "learning_rate": 6.4313039497989575e-06, "loss": 0.2606, "step": 5317 }, { "epoch": 0.43, "grad_norm": 1.2779968845586214, "learning_rate": 6.4300626141513264e-06, "loss": 0.1547, "step": 5318 }, { "epoch": 0.43, "grad_norm": 1.3535903462433458, "learning_rate": 6.4288211824992575e-06, "loss": 0.186, "step": 5319 }, { "epoch": 0.43, "grad_norm": 5.1276629223870405, "learning_rate": 6.427579654926095e-06, "loss": 0.5141, "step": 5320 }, { "epoch": 0.43, "grad_norm": 1.3643396483646841, "learning_rate": 6.426338031515184e-06, "loss": 0.2038, "step": 5321 }, { "epoch": 0.43, "grad_norm": 1.2272505186947735, "learning_rate": 6.425096312349881e-06, "loss": 0.1371, "step": 5322 }, { "epoch": 0.43, "grad_norm": 1.309281920455454, "learning_rate": 6.423854497513544e-06, "loss": 0.2387, "step": 5323 }, { "epoch": 0.43, "grad_norm": 1.2030404204318856, "learning_rate": 6.422612587089541e-06, "loss": 0.1979, "step": 5324 }, { "epoch": 0.43, "grad_norm": 1.5750933372834501, "learning_rate": 6.421370581161244e-06, "loss": 0.2489, "step": 5325 }, { "epoch": 0.43, "grad_norm": 7.568426883302416, "learning_rate": 6.420128479812032e-06, "loss": 0.756, "step": 5326 }, { "epoch": 0.43, "grad_norm": 1.2434292311130164, "learning_rate": 6.418886283125294e-06, "loss": 0.1695, "step": 5327 }, { "epoch": 0.43, "grad_norm": 1.3528966670120768, "learning_rate": 6.4176439911844205e-06, "loss": 0.1724, "step": 5328 }, { "epoch": 0.43, "grad_norm": 1.4541263609865582, "learning_rate": 6.41640160407281e-06, "loss": 0.2088, "step": 5329 }, { "epoch": 0.43, "grad_norm": 1.2989119295880054, "learning_rate": 6.415159121873868e-06, "loss": 0.2325, "step": 5330 }, { "epoch": 0.43, "grad_norm": 1.3221409393084043, "learning_rate": 6.413916544671008e-06, "loss": 0.1996, "step": 5331 }, { "epoch": 0.43, "grad_norm": 1.4618314050777363, "learning_rate": 6.4126738725476455e-06, "loss": 0.2076, "step": 5332 }, { "epoch": 0.43, "grad_norm": 1.300968721589118, "learning_rate": 6.411431105587206e-06, "loss": 0.1937, "step": 5333 }, { "epoch": 0.43, "grad_norm": 1.3659040084487077, "learning_rate": 6.410188243873119e-06, "loss": 0.2315, "step": 5334 }, { "epoch": 0.43, "grad_norm": 1.354034672312187, "learning_rate": 6.408945287488824e-06, "loss": 0.1986, "step": 5335 }, { "epoch": 0.43, "grad_norm": 1.193026326031507, "learning_rate": 6.407702236517761e-06, "loss": 0.2098, "step": 5336 }, { "epoch": 0.43, "grad_norm": 1.3642984814235528, "learning_rate": 6.406459091043382e-06, "loss": 0.2169, "step": 5337 }, { "epoch": 0.43, "grad_norm": 1.1948336191487878, "learning_rate": 6.405215851149144e-06, "loss": 0.1563, "step": 5338 }, { "epoch": 0.43, "grad_norm": 2.056444540088652, "learning_rate": 6.403972516918507e-06, "loss": 0.2201, "step": 5339 }, { "epoch": 0.43, "grad_norm": 1.3438577444630693, "learning_rate": 6.402729088434942e-06, "loss": 0.2193, "step": 5340 }, { "epoch": 0.43, "grad_norm": 1.2765018940213793, "learning_rate": 6.4014855657819246e-06, "loss": 0.2036, "step": 5341 }, { "epoch": 0.43, "grad_norm": 1.1858498448097516, "learning_rate": 6.400241949042933e-06, "loss": 0.2113, "step": 5342 }, { "epoch": 0.43, "grad_norm": 1.5088534030750504, "learning_rate": 6.398998238301456e-06, "loss": 0.2028, "step": 5343 }, { "epoch": 0.43, "grad_norm": 1.2420723623247931, "learning_rate": 6.397754433640991e-06, "loss": 0.1948, "step": 5344 }, { "epoch": 0.43, "grad_norm": 1.506780832860764, "learning_rate": 6.396510535145033e-06, "loss": 0.2077, "step": 5345 }, { "epoch": 0.43, "grad_norm": 1.309676888282968, "learning_rate": 6.395266542897093e-06, "loss": 0.189, "step": 5346 }, { "epoch": 0.43, "grad_norm": 1.2831909095321687, "learning_rate": 6.394022456980682e-06, "loss": 0.2126, "step": 5347 }, { "epoch": 0.43, "grad_norm": 1.3228370512165486, "learning_rate": 6.3927782774793214e-06, "loss": 0.1712, "step": 5348 }, { "epoch": 0.43, "grad_norm": 1.4232312872525599, "learning_rate": 6.391534004476534e-06, "loss": 0.1921, "step": 5349 }, { "epoch": 0.43, "grad_norm": 1.5566428415730347, "learning_rate": 6.390289638055851e-06, "loss": 0.2024, "step": 5350 }, { "epoch": 0.43, "grad_norm": 1.4302528070834486, "learning_rate": 6.389045178300815e-06, "loss": 0.2213, "step": 5351 }, { "epoch": 0.43, "grad_norm": 1.2036733420859314, "learning_rate": 6.387800625294966e-06, "loss": 0.2098, "step": 5352 }, { "epoch": 0.43, "grad_norm": 1.3568128290007944, "learning_rate": 6.386555979121855e-06, "loss": 0.1777, "step": 5353 }, { "epoch": 0.43, "grad_norm": 1.4208999324762037, "learning_rate": 6.385311239865042e-06, "loss": 0.2511, "step": 5354 }, { "epoch": 0.43, "grad_norm": 1.2495037713264239, "learning_rate": 6.384066407608087e-06, "loss": 0.1849, "step": 5355 }, { "epoch": 0.43, "grad_norm": 1.3389680201022816, "learning_rate": 6.382821482434562e-06, "loss": 0.1879, "step": 5356 }, { "epoch": 0.43, "grad_norm": 1.318057254232226, "learning_rate": 6.38157646442804e-06, "loss": 0.2092, "step": 5357 }, { "epoch": 0.43, "grad_norm": 1.2895159134210021, "learning_rate": 6.380331353672105e-06, "loss": 0.1769, "step": 5358 }, { "epoch": 0.43, "grad_norm": 1.5525758983728335, "learning_rate": 6.379086150250342e-06, "loss": 0.22, "step": 5359 }, { "epoch": 0.43, "grad_norm": 5.886839484155791, "learning_rate": 6.377840854246348e-06, "loss": 0.4984, "step": 5360 }, { "epoch": 0.43, "grad_norm": 1.453405324390962, "learning_rate": 6.376595465743722e-06, "loss": 0.2087, "step": 5361 }, { "epoch": 0.43, "grad_norm": 1.318995101929686, "learning_rate": 6.375349984826074e-06, "loss": 0.1759, "step": 5362 }, { "epoch": 0.43, "grad_norm": 1.4293502038668422, "learning_rate": 6.374104411577012e-06, "loss": 0.206, "step": 5363 }, { "epoch": 0.43, "grad_norm": 1.1850791397816645, "learning_rate": 6.372858746080159e-06, "loss": 0.147, "step": 5364 }, { "epoch": 0.43, "grad_norm": 1.3467871873601427, "learning_rate": 6.371612988419138e-06, "loss": 0.1741, "step": 5365 }, { "epoch": 0.43, "grad_norm": 1.3322382241356419, "learning_rate": 6.370367138677582e-06, "loss": 0.1792, "step": 5366 }, { "epoch": 0.43, "grad_norm": 1.400308125292723, "learning_rate": 6.3691211969391266e-06, "loss": 0.2148, "step": 5367 }, { "epoch": 0.43, "grad_norm": 4.821442781429495, "learning_rate": 6.367875163287418e-06, "loss": 0.5054, "step": 5368 }, { "epoch": 0.43, "grad_norm": 1.4544178573250153, "learning_rate": 6.366629037806105e-06, "loss": 0.2052, "step": 5369 }, { "epoch": 0.43, "grad_norm": 5.957213947031811, "learning_rate": 6.3653828205788445e-06, "loss": 0.5777, "step": 5370 }, { "epoch": 0.43, "grad_norm": 1.3073903040647563, "learning_rate": 6.3641365116892965e-06, "loss": 0.1755, "step": 5371 }, { "epoch": 0.43, "grad_norm": 1.253763341566155, "learning_rate": 6.362890111221133e-06, "loss": 0.1935, "step": 5372 }, { "epoch": 0.43, "grad_norm": 1.5841188123003762, "learning_rate": 6.361643619258027e-06, "loss": 0.2106, "step": 5373 }, { "epoch": 0.43, "grad_norm": 1.7548271730162313, "learning_rate": 6.3603970358836574e-06, "loss": 0.3025, "step": 5374 }, { "epoch": 0.43, "grad_norm": 1.3665032836119813, "learning_rate": 6.3591503611817155e-06, "loss": 0.176, "step": 5375 }, { "epoch": 0.43, "grad_norm": 1.47572798582419, "learning_rate": 6.35790359523589e-06, "loss": 0.2004, "step": 5376 }, { "epoch": 0.43, "grad_norm": 1.3820995546220687, "learning_rate": 6.356656738129882e-06, "loss": 0.2215, "step": 5377 }, { "epoch": 0.43, "grad_norm": 1.2976019738382976, "learning_rate": 6.355409789947398e-06, "loss": 0.2344, "step": 5378 }, { "epoch": 0.43, "grad_norm": 1.2573809721318219, "learning_rate": 6.354162750772146e-06, "loss": 0.1921, "step": 5379 }, { "epoch": 0.43, "grad_norm": 6.119552202024639, "learning_rate": 6.352915620687848e-06, "loss": 0.5219, "step": 5380 }, { "epoch": 0.43, "grad_norm": 1.218835308112296, "learning_rate": 6.3516683997782225e-06, "loss": 0.1535, "step": 5381 }, { "epoch": 0.43, "grad_norm": 6.857860334268619, "learning_rate": 6.350421088127004e-06, "loss": 0.4885, "step": 5382 }, { "epoch": 0.43, "grad_norm": 1.2227129432668795, "learning_rate": 6.349173685817927e-06, "loss": 0.1624, "step": 5383 }, { "epoch": 0.43, "grad_norm": 1.3669306171470308, "learning_rate": 6.3479261929347305e-06, "loss": 0.2026, "step": 5384 }, { "epoch": 0.43, "grad_norm": 1.6137258380058008, "learning_rate": 6.346678609561166e-06, "loss": 0.2468, "step": 5385 }, { "epoch": 0.43, "grad_norm": 1.3476505329111346, "learning_rate": 6.345430935780985e-06, "loss": 0.221, "step": 5386 }, { "epoch": 0.43, "grad_norm": 1.3344670018044502, "learning_rate": 6.344183171677949e-06, "loss": 0.2284, "step": 5387 }, { "epoch": 0.43, "grad_norm": 1.3140374570908187, "learning_rate": 6.342935317335823e-06, "loss": 0.1876, "step": 5388 }, { "epoch": 0.43, "grad_norm": 1.4001148862724935, "learning_rate": 6.341687372838382e-06, "loss": 0.1719, "step": 5389 }, { "epoch": 0.43, "grad_norm": 1.5609426715699228, "learning_rate": 6.340439338269402e-06, "loss": 0.2016, "step": 5390 }, { "epoch": 0.43, "grad_norm": 1.5743830365323122, "learning_rate": 6.339191213712668e-06, "loss": 0.2402, "step": 5391 }, { "epoch": 0.43, "grad_norm": 1.3446240618991603, "learning_rate": 6.33794299925197e-06, "loss": 0.1891, "step": 5392 }, { "epoch": 0.43, "grad_norm": 1.2907959163255103, "learning_rate": 6.336694694971106e-06, "loss": 0.2085, "step": 5393 }, { "epoch": 0.43, "grad_norm": 1.2530580264484246, "learning_rate": 6.3354463009538745e-06, "loss": 0.1805, "step": 5394 }, { "epoch": 0.43, "grad_norm": 1.1342455516422136, "learning_rate": 6.3341978172840875e-06, "loss": 0.1655, "step": 5395 }, { "epoch": 0.43, "grad_norm": 1.2450955306857623, "learning_rate": 6.33294924404556e-06, "loss": 0.2093, "step": 5396 }, { "epoch": 0.43, "grad_norm": 1.2800083447208719, "learning_rate": 6.33170058132211e-06, "loss": 0.2005, "step": 5397 }, { "epoch": 0.43, "grad_norm": 1.3700922904488833, "learning_rate": 6.330451829197564e-06, "loss": 0.191, "step": 5398 }, { "epoch": 0.43, "grad_norm": 1.3869866598564893, "learning_rate": 6.329202987755757e-06, "loss": 0.206, "step": 5399 }, { "epoch": 0.43, "grad_norm": 1.4549250310680832, "learning_rate": 6.3279540570805265e-06, "loss": 0.2407, "step": 5400 }, { "epoch": 0.43, "grad_norm": 1.4708987271829916, "learning_rate": 6.326705037255716e-06, "loss": 0.187, "step": 5401 }, { "epoch": 0.43, "grad_norm": 1.2708330988907208, "learning_rate": 6.325455928365176e-06, "loss": 0.1981, "step": 5402 }, { "epoch": 0.43, "grad_norm": 1.3278930084033125, "learning_rate": 6.324206730492765e-06, "loss": 0.1947, "step": 5403 }, { "epoch": 0.43, "grad_norm": 11.631395118333689, "learning_rate": 6.322957443722343e-06, "loss": 0.5051, "step": 5404 }, { "epoch": 0.43, "grad_norm": 1.3414880933294182, "learning_rate": 6.321708068137778e-06, "loss": 0.2147, "step": 5405 }, { "epoch": 0.43, "grad_norm": 1.273271877234986, "learning_rate": 6.320458603822949e-06, "loss": 0.1756, "step": 5406 }, { "epoch": 0.43, "grad_norm": 1.4433138667020462, "learning_rate": 6.319209050861731e-06, "loss": 0.24, "step": 5407 }, { "epoch": 0.43, "grad_norm": 1.4369750645434807, "learning_rate": 6.317959409338013e-06, "loss": 0.2386, "step": 5408 }, { "epoch": 0.43, "grad_norm": 1.3846730456639411, "learning_rate": 6.316709679335686e-06, "loss": 0.1833, "step": 5409 }, { "epoch": 0.43, "grad_norm": 1.6771928145798876, "learning_rate": 6.315459860938649e-06, "loss": 0.2428, "step": 5410 }, { "epoch": 0.43, "grad_norm": 1.2703693105782188, "learning_rate": 6.314209954230806e-06, "loss": 0.1877, "step": 5411 }, { "epoch": 0.43, "grad_norm": 1.4845559789211467, "learning_rate": 6.3129599592960665e-06, "loss": 0.24, "step": 5412 }, { "epoch": 0.43, "grad_norm": 1.4596300977817065, "learning_rate": 6.311709876218347e-06, "loss": 0.2345, "step": 5413 }, { "epoch": 0.43, "grad_norm": 1.2514090257319725, "learning_rate": 6.31045970508157e-06, "loss": 0.1602, "step": 5414 }, { "epoch": 0.43, "grad_norm": 1.4402163982994178, "learning_rate": 6.30920944596966e-06, "loss": 0.2106, "step": 5415 }, { "epoch": 0.43, "grad_norm": 1.2112359101090187, "learning_rate": 6.307959098966556e-06, "loss": 0.1752, "step": 5416 }, { "epoch": 0.43, "grad_norm": 1.4604133026948893, "learning_rate": 6.306708664156194e-06, "loss": 0.2568, "step": 5417 }, { "epoch": 0.43, "grad_norm": 1.3759924422272316, "learning_rate": 6.305458141622521e-06, "loss": 0.2077, "step": 5418 }, { "epoch": 0.43, "grad_norm": 1.2561711478936384, "learning_rate": 6.304207531449486e-06, "loss": 0.2085, "step": 5419 }, { "epoch": 0.43, "grad_norm": 1.3966676902267345, "learning_rate": 6.302956833721048e-06, "loss": 0.2205, "step": 5420 }, { "epoch": 0.43, "grad_norm": 8.223770500207937, "learning_rate": 6.30170604852117e-06, "loss": 0.7511, "step": 5421 }, { "epoch": 0.43, "grad_norm": 7.460352601838733, "learning_rate": 6.3004551759338206e-06, "loss": 0.636, "step": 5422 }, { "epoch": 0.43, "grad_norm": 1.384592431863059, "learning_rate": 6.299204216042976e-06, "loss": 0.2199, "step": 5423 }, { "epoch": 0.43, "grad_norm": 8.592492932771925, "learning_rate": 6.2979531689326155e-06, "loss": 0.5394, "step": 5424 }, { "epoch": 0.43, "grad_norm": 1.4973907236482074, "learning_rate": 6.296702034686726e-06, "loss": 0.2183, "step": 5425 }, { "epoch": 0.43, "grad_norm": 1.3011599398966487, "learning_rate": 6.2954508133893e-06, "loss": 0.1847, "step": 5426 }, { "epoch": 0.43, "grad_norm": 1.2221327505109223, "learning_rate": 6.294199505124337e-06, "loss": 0.1838, "step": 5427 }, { "epoch": 0.43, "grad_norm": 1.371463085781701, "learning_rate": 6.292948109975839e-06, "loss": 0.2079, "step": 5428 }, { "epoch": 0.43, "grad_norm": 1.537249608860019, "learning_rate": 6.291696628027816e-06, "loss": 0.2235, "step": 5429 }, { "epoch": 0.43, "grad_norm": 1.3682102613975526, "learning_rate": 6.290445059364286e-06, "loss": 0.1925, "step": 5430 }, { "epoch": 0.43, "grad_norm": 1.479780457950757, "learning_rate": 6.28919340406927e-06, "loss": 0.2284, "step": 5431 }, { "epoch": 0.43, "grad_norm": 1.3605682904442127, "learning_rate": 6.287941662226793e-06, "loss": 0.2141, "step": 5432 }, { "epoch": 0.43, "grad_norm": 1.3746007412737256, "learning_rate": 6.286689833920889e-06, "loss": 0.182, "step": 5433 }, { "epoch": 0.43, "grad_norm": 1.3005447096920353, "learning_rate": 6.2854379192356e-06, "loss": 0.1809, "step": 5434 }, { "epoch": 0.43, "grad_norm": 1.3283008000523862, "learning_rate": 6.284185918254968e-06, "loss": 0.2169, "step": 5435 }, { "epoch": 0.43, "grad_norm": 6.115410286688982, "learning_rate": 6.282933831063045e-06, "loss": 0.642, "step": 5436 }, { "epoch": 0.43, "grad_norm": 5.441000115481333, "learning_rate": 6.2816816577438866e-06, "loss": 0.569, "step": 5437 }, { "epoch": 0.44, "grad_norm": 1.273076868154803, "learning_rate": 6.280429398381555e-06, "loss": 0.1692, "step": 5438 }, { "epoch": 0.44, "grad_norm": 6.7827199444185995, "learning_rate": 6.279177053060117e-06, "loss": 0.6681, "step": 5439 }, { "epoch": 0.44, "grad_norm": 1.3271552793988675, "learning_rate": 6.277924621863649e-06, "loss": 0.1838, "step": 5440 }, { "epoch": 0.44, "grad_norm": 1.2939994277796243, "learning_rate": 6.276672104876229e-06, "loss": 0.1762, "step": 5441 }, { "epoch": 0.44, "grad_norm": 1.370757268235068, "learning_rate": 6.275419502181943e-06, "loss": 0.1866, "step": 5442 }, { "epoch": 0.44, "grad_norm": 9.400731535434586, "learning_rate": 6.2741668138648806e-06, "loss": 0.7704, "step": 5443 }, { "epoch": 0.44, "grad_norm": 1.571225792974094, "learning_rate": 6.27291404000914e-06, "loss": 0.1926, "step": 5444 }, { "epoch": 0.44, "grad_norm": 1.1794177233995102, "learning_rate": 6.271661180698824e-06, "loss": 0.1855, "step": 5445 }, { "epoch": 0.44, "grad_norm": 1.3001735561132977, "learning_rate": 6.2704082360180375e-06, "loss": 0.2326, "step": 5446 }, { "epoch": 0.44, "grad_norm": 6.349825326070215, "learning_rate": 6.269155206050899e-06, "loss": 0.5371, "step": 5447 }, { "epoch": 0.44, "grad_norm": 1.5092478634018724, "learning_rate": 6.2679020908815245e-06, "loss": 0.2429, "step": 5448 }, { "epoch": 0.44, "grad_norm": 1.320858193169152, "learning_rate": 6.266648890594042e-06, "loss": 0.2128, "step": 5449 }, { "epoch": 0.44, "grad_norm": 1.3437308437382531, "learning_rate": 6.265395605272581e-06, "loss": 0.1734, "step": 5450 }, { "epoch": 0.44, "grad_norm": 1.2217923784710367, "learning_rate": 6.264142235001281e-06, "loss": 0.1794, "step": 5451 }, { "epoch": 0.44, "grad_norm": 8.557230988065406, "learning_rate": 6.26288877986428e-06, "loss": 0.6333, "step": 5452 }, { "epoch": 0.44, "grad_norm": 1.256231249048313, "learning_rate": 6.261635239945732e-06, "loss": 0.1857, "step": 5453 }, { "epoch": 0.44, "grad_norm": 1.4754016489852595, "learning_rate": 6.260381615329785e-06, "loss": 0.1922, "step": 5454 }, { "epoch": 0.44, "grad_norm": 1.357227173467612, "learning_rate": 6.259127906100601e-06, "loss": 0.1997, "step": 5455 }, { "epoch": 0.44, "grad_norm": 1.3222266067958681, "learning_rate": 6.257874112342347e-06, "loss": 0.1818, "step": 5456 }, { "epoch": 0.44, "grad_norm": 1.2855673511165826, "learning_rate": 6.256620234139193e-06, "loss": 0.168, "step": 5457 }, { "epoch": 0.44, "grad_norm": 1.262650856266188, "learning_rate": 6.255366271575315e-06, "loss": 0.126, "step": 5458 }, { "epoch": 0.44, "grad_norm": 1.3290899737063937, "learning_rate": 6.254112224734895e-06, "loss": 0.2125, "step": 5459 }, { "epoch": 0.44, "grad_norm": 1.219599941419597, "learning_rate": 6.252858093702121e-06, "loss": 0.1952, "step": 5460 }, { "epoch": 0.44, "grad_norm": 1.4908047188101465, "learning_rate": 6.251603878561188e-06, "loss": 0.2506, "step": 5461 }, { "epoch": 0.44, "grad_norm": 1.4753254138787635, "learning_rate": 6.250349579396295e-06, "loss": 0.2436, "step": 5462 }, { "epoch": 0.44, "grad_norm": 6.217883190537527, "learning_rate": 6.249095196291646e-06, "loss": 0.5653, "step": 5463 }, { "epoch": 0.44, "grad_norm": 1.1909966359856916, "learning_rate": 6.247840729331451e-06, "loss": 0.1837, "step": 5464 }, { "epoch": 0.44, "grad_norm": 1.2949619029483872, "learning_rate": 6.246586178599928e-06, "loss": 0.1783, "step": 5465 }, { "epoch": 0.44, "grad_norm": 5.962156945258954, "learning_rate": 6.2453315441812975e-06, "loss": 0.621, "step": 5466 }, { "epoch": 0.44, "grad_norm": 7.831423156152444, "learning_rate": 6.2440768261597865e-06, "loss": 0.8104, "step": 5467 }, { "epoch": 0.44, "grad_norm": 1.3580329664368471, "learning_rate": 6.24282202461963e-06, "loss": 0.2384, "step": 5468 }, { "epoch": 0.44, "grad_norm": 5.209385288965838, "learning_rate": 6.241567139645065e-06, "loss": 0.4342, "step": 5469 }, { "epoch": 0.44, "grad_norm": 1.1649479546866952, "learning_rate": 6.240312171320336e-06, "loss": 0.1927, "step": 5470 }, { "epoch": 0.44, "grad_norm": 1.394883141638777, "learning_rate": 6.2390571197296936e-06, "loss": 0.1745, "step": 5471 }, { "epoch": 0.44, "grad_norm": 1.314870600579029, "learning_rate": 6.237801984957391e-06, "loss": 0.1933, "step": 5472 }, { "epoch": 0.44, "grad_norm": 1.3783853084859452, "learning_rate": 6.236546767087692e-06, "loss": 0.2187, "step": 5473 }, { "epoch": 0.44, "grad_norm": 1.2286470445799385, "learning_rate": 6.2352914662048604e-06, "loss": 0.1878, "step": 5474 }, { "epoch": 0.44, "grad_norm": 1.2371273765443371, "learning_rate": 6.234036082393171e-06, "loss": 0.1897, "step": 5475 }, { "epoch": 0.44, "grad_norm": 1.2505549747840392, "learning_rate": 6.232780615736901e-06, "loss": 0.1684, "step": 5476 }, { "epoch": 0.44, "grad_norm": 1.2000233428359381, "learning_rate": 6.231525066320332e-06, "loss": 0.1626, "step": 5477 }, { "epoch": 0.44, "grad_norm": 1.2480338738938548, "learning_rate": 6.230269434227755e-06, "loss": 0.1684, "step": 5478 }, { "epoch": 0.44, "grad_norm": 1.2854966498737794, "learning_rate": 6.229013719543464e-06, "loss": 0.2099, "step": 5479 }, { "epoch": 0.44, "grad_norm": 1.1592498686536488, "learning_rate": 6.227757922351756e-06, "loss": 0.1745, "step": 5480 }, { "epoch": 0.44, "grad_norm": 1.4122571271400273, "learning_rate": 6.226502042736939e-06, "loss": 0.1942, "step": 5481 }, { "epoch": 0.44, "grad_norm": 1.2314177812709297, "learning_rate": 6.225246080783325e-06, "loss": 0.1888, "step": 5482 }, { "epoch": 0.44, "grad_norm": 1.347114574653997, "learning_rate": 6.223990036575229e-06, "loss": 0.1892, "step": 5483 }, { "epoch": 0.44, "grad_norm": 1.1726567275688713, "learning_rate": 6.222733910196972e-06, "loss": 0.1747, "step": 5484 }, { "epoch": 0.44, "grad_norm": 1.553796451949075, "learning_rate": 6.221477701732884e-06, "loss": 0.2099, "step": 5485 }, { "epoch": 0.44, "grad_norm": 1.380067424922898, "learning_rate": 6.220221411267297e-06, "loss": 0.231, "step": 5486 }, { "epoch": 0.44, "grad_norm": 1.5622996637742816, "learning_rate": 6.21896503888455e-06, "loss": 0.2885, "step": 5487 }, { "epoch": 0.44, "grad_norm": 4.720143303713319, "learning_rate": 6.217708584668987e-06, "loss": 0.3691, "step": 5488 }, { "epoch": 0.44, "grad_norm": 1.2716801930960457, "learning_rate": 6.216452048704955e-06, "loss": 0.1786, "step": 5489 }, { "epoch": 0.44, "grad_norm": 1.4166012931967793, "learning_rate": 6.215195431076813e-06, "loss": 0.1664, "step": 5490 }, { "epoch": 0.44, "grad_norm": 1.4545816326470928, "learning_rate": 6.2139387318689185e-06, "loss": 0.2201, "step": 5491 }, { "epoch": 0.44, "grad_norm": 1.3578224879221072, "learning_rate": 6.21268195116564e-06, "loss": 0.1461, "step": 5492 }, { "epoch": 0.44, "grad_norm": 1.539719633864262, "learning_rate": 6.211425089051347e-06, "loss": 0.2283, "step": 5493 }, { "epoch": 0.44, "grad_norm": 10.173553867842234, "learning_rate": 6.210168145610418e-06, "loss": 0.4451, "step": 5494 }, { "epoch": 0.44, "grad_norm": 1.1312940147704096, "learning_rate": 6.208911120927233e-06, "loss": 0.1791, "step": 5495 }, { "epoch": 0.44, "grad_norm": 1.3928524463616982, "learning_rate": 6.207654015086184e-06, "loss": 0.1841, "step": 5496 }, { "epoch": 0.44, "grad_norm": 1.2744903443220463, "learning_rate": 6.206396828171663e-06, "loss": 0.176, "step": 5497 }, { "epoch": 0.44, "grad_norm": 1.4127866017101978, "learning_rate": 6.205139560268064e-06, "loss": 0.2147, "step": 5498 }, { "epoch": 0.44, "grad_norm": 1.3276172543322058, "learning_rate": 6.203882211459797e-06, "loss": 0.1681, "step": 5499 }, { "epoch": 0.44, "grad_norm": 1.3212396820311856, "learning_rate": 6.202624781831269e-06, "loss": 0.184, "step": 5500 }, { "epoch": 0.44, "grad_norm": 1.3900304031340762, "learning_rate": 6.201367271466895e-06, "loss": 0.2036, "step": 5501 }, { "epoch": 0.44, "grad_norm": 1.441158686958361, "learning_rate": 6.200109680451096e-06, "loss": 0.2241, "step": 5502 }, { "epoch": 0.44, "grad_norm": 1.3511503213529095, "learning_rate": 6.1988520088683e-06, "loss": 0.2151, "step": 5503 }, { "epoch": 0.44, "grad_norm": 1.312562500955458, "learning_rate": 6.197594256802935e-06, "loss": 0.2131, "step": 5504 }, { "epoch": 0.44, "grad_norm": 1.654466215232279, "learning_rate": 6.1963364243394386e-06, "loss": 0.2394, "step": 5505 }, { "epoch": 0.44, "grad_norm": 1.3353902310632164, "learning_rate": 6.195078511562254e-06, "loss": 0.166, "step": 5506 }, { "epoch": 0.44, "grad_norm": 4.670980632638383, "learning_rate": 6.193820518555827e-06, "loss": 0.5743, "step": 5507 }, { "epoch": 0.44, "grad_norm": 5.755538847897823, "learning_rate": 6.192562445404612e-06, "loss": 0.6666, "step": 5508 }, { "epoch": 0.44, "grad_norm": 6.393267037979869, "learning_rate": 6.191304292193068e-06, "loss": 0.5784, "step": 5509 }, { "epoch": 0.44, "grad_norm": 8.279109141399603, "learning_rate": 6.190046059005655e-06, "loss": 0.548, "step": 5510 }, { "epoch": 0.44, "grad_norm": 1.396254156480553, "learning_rate": 6.188787745926846e-06, "loss": 0.2257, "step": 5511 }, { "epoch": 0.44, "grad_norm": 1.3046528698882116, "learning_rate": 6.187529353041113e-06, "loss": 0.1685, "step": 5512 }, { "epoch": 0.44, "grad_norm": 1.3150714232156226, "learning_rate": 6.186270880432936e-06, "loss": 0.1599, "step": 5513 }, { "epoch": 0.44, "grad_norm": 1.1934054809583754, "learning_rate": 6.185012328186802e-06, "loss": 0.1737, "step": 5514 }, { "epoch": 0.44, "grad_norm": 1.4152977136108575, "learning_rate": 6.183753696387199e-06, "loss": 0.1942, "step": 5515 }, { "epoch": 0.44, "grad_norm": 1.4628005285769916, "learning_rate": 6.182494985118625e-06, "loss": 0.2282, "step": 5516 }, { "epoch": 0.44, "grad_norm": 1.4022228907273728, "learning_rate": 6.1812361944655784e-06, "loss": 0.2254, "step": 5517 }, { "epoch": 0.44, "grad_norm": 1.43762550993024, "learning_rate": 6.179977324512567e-06, "loss": 0.2311, "step": 5518 }, { "epoch": 0.44, "grad_norm": 1.3557578697331547, "learning_rate": 6.178718375344103e-06, "loss": 0.2178, "step": 5519 }, { "epoch": 0.44, "grad_norm": 1.453597665248679, "learning_rate": 6.177459347044703e-06, "loss": 0.2336, "step": 5520 }, { "epoch": 0.44, "grad_norm": 1.416425650169366, "learning_rate": 6.176200239698889e-06, "loss": 0.2397, "step": 5521 }, { "epoch": 0.44, "grad_norm": 1.3710737589143889, "learning_rate": 6.174941053391189e-06, "loss": 0.1926, "step": 5522 }, { "epoch": 0.44, "grad_norm": 1.671137593532443, "learning_rate": 6.1736817882061385e-06, "loss": 0.2503, "step": 5523 }, { "epoch": 0.44, "grad_norm": 1.3179961342657962, "learning_rate": 6.172422444228272e-06, "loss": 0.2128, "step": 5524 }, { "epoch": 0.44, "grad_norm": 1.393337914080416, "learning_rate": 6.171163021542134e-06, "loss": 0.2236, "step": 5525 }, { "epoch": 0.44, "grad_norm": 1.3255149159865447, "learning_rate": 6.169903520232274e-06, "loss": 0.1917, "step": 5526 }, { "epoch": 0.44, "grad_norm": 1.3670054755188386, "learning_rate": 6.168643940383246e-06, "loss": 0.206, "step": 5527 }, { "epoch": 0.44, "grad_norm": 1.5495651959630052, "learning_rate": 6.16738428207961e-06, "loss": 0.1928, "step": 5528 }, { "epoch": 0.44, "grad_norm": 1.2811045955224534, "learning_rate": 6.166124545405929e-06, "loss": 0.199, "step": 5529 }, { "epoch": 0.44, "grad_norm": 1.2526082792701527, "learning_rate": 6.164864730446776e-06, "loss": 0.157, "step": 5530 }, { "epoch": 0.44, "grad_norm": 1.2815294247890403, "learning_rate": 6.163604837286724e-06, "loss": 0.1915, "step": 5531 }, { "epoch": 0.44, "grad_norm": 1.2067983948364358, "learning_rate": 6.162344866010353e-06, "loss": 0.1536, "step": 5532 }, { "epoch": 0.44, "grad_norm": 1.429560085596385, "learning_rate": 6.161084816702249e-06, "loss": 0.1911, "step": 5533 }, { "epoch": 0.44, "grad_norm": 1.2802597179999669, "learning_rate": 6.159824689447003e-06, "loss": 0.2086, "step": 5534 }, { "epoch": 0.44, "grad_norm": 1.4367970778529962, "learning_rate": 6.158564484329212e-06, "loss": 0.1738, "step": 5535 }, { "epoch": 0.44, "grad_norm": 1.3509319373357598, "learning_rate": 6.157304201433476e-06, "loss": 0.2163, "step": 5536 }, { "epoch": 0.44, "grad_norm": 1.368101384719906, "learning_rate": 6.1560438408444036e-06, "loss": 0.1918, "step": 5537 }, { "epoch": 0.44, "grad_norm": 1.4244294899129617, "learning_rate": 6.154783402646604e-06, "loss": 0.1898, "step": 5538 }, { "epoch": 0.44, "grad_norm": 1.3431657336329828, "learning_rate": 6.153522886924699e-06, "loss": 0.218, "step": 5539 }, { "epoch": 0.44, "grad_norm": 1.3338828083822012, "learning_rate": 6.1522622937633044e-06, "loss": 0.1971, "step": 5540 }, { "epoch": 0.44, "grad_norm": 1.2819599058283644, "learning_rate": 6.151001623247053e-06, "loss": 0.201, "step": 5541 }, { "epoch": 0.44, "grad_norm": 1.1556123682361188, "learning_rate": 6.149740875460572e-06, "loss": 0.1393, "step": 5542 }, { "epoch": 0.44, "grad_norm": 1.298658837335167, "learning_rate": 6.148480050488506e-06, "loss": 0.1823, "step": 5543 }, { "epoch": 0.44, "grad_norm": 1.4024613622380637, "learning_rate": 6.147219148415493e-06, "loss": 0.2345, "step": 5544 }, { "epoch": 0.44, "grad_norm": 6.135464295700088, "learning_rate": 6.1459581693261825e-06, "loss": 0.4547, "step": 5545 }, { "epoch": 0.44, "grad_norm": 1.401314532990553, "learning_rate": 6.144697113305227e-06, "loss": 0.2165, "step": 5546 }, { "epoch": 0.44, "grad_norm": 1.2613489075282303, "learning_rate": 6.1434359804372875e-06, "loss": 0.199, "step": 5547 }, { "epoch": 0.44, "grad_norm": 1.2985802411800909, "learning_rate": 6.142174770807026e-06, "loss": 0.1904, "step": 5548 }, { "epoch": 0.44, "grad_norm": 1.2106002498194715, "learning_rate": 6.140913484499112e-06, "loss": 0.2399, "step": 5549 }, { "epoch": 0.44, "grad_norm": 1.1654411161583529, "learning_rate": 6.139652121598219e-06, "loss": 0.1629, "step": 5550 }, { "epoch": 0.44, "grad_norm": 1.2350272423782207, "learning_rate": 6.138390682189025e-06, "loss": 0.1917, "step": 5551 }, { "epoch": 0.44, "grad_norm": 1.2548833117756106, "learning_rate": 6.137129166356215e-06, "loss": 0.1542, "step": 5552 }, { "epoch": 0.44, "grad_norm": 2.050054719861633, "learning_rate": 6.135867574184479e-06, "loss": 0.2429, "step": 5553 }, { "epoch": 0.44, "grad_norm": 1.4161378157279216, "learning_rate": 6.1346059057585115e-06, "loss": 0.2011, "step": 5554 }, { "epoch": 0.44, "grad_norm": 1.410310996773235, "learning_rate": 6.133344161163012e-06, "loss": 0.2138, "step": 5555 }, { "epoch": 0.44, "grad_norm": 1.4564407259706964, "learning_rate": 6.132082340482684e-06, "loss": 0.1962, "step": 5556 }, { "epoch": 0.44, "grad_norm": 1.3792279705961341, "learning_rate": 6.130820443802239e-06, "loss": 0.1718, "step": 5557 }, { "epoch": 0.44, "grad_norm": 1.2004016298538722, "learning_rate": 6.1295584712063935e-06, "loss": 0.1877, "step": 5558 }, { "epoch": 0.44, "grad_norm": 1.2603421856261134, "learning_rate": 6.1282964227798634e-06, "loss": 0.1477, "step": 5559 }, { "epoch": 0.44, "grad_norm": 1.3101276987398394, "learning_rate": 6.127034298607375e-06, "loss": 0.1878, "step": 5560 }, { "epoch": 0.44, "grad_norm": 1.3121595124182959, "learning_rate": 6.125772098773661e-06, "loss": 0.1367, "step": 5561 }, { "epoch": 0.44, "grad_norm": 1.3514288366573788, "learning_rate": 6.124509823363455e-06, "loss": 0.2133, "step": 5562 }, { "epoch": 0.45, "grad_norm": 5.8074282650061395, "learning_rate": 6.123247472461495e-06, "loss": 0.4858, "step": 5563 }, { "epoch": 0.45, "grad_norm": 1.4291102593567688, "learning_rate": 6.121985046152531e-06, "loss": 0.1591, "step": 5564 }, { "epoch": 0.45, "grad_norm": 1.259740245412773, "learning_rate": 6.120722544521312e-06, "loss": 0.2019, "step": 5565 }, { "epoch": 0.45, "grad_norm": 1.283692223561614, "learning_rate": 6.119459967652592e-06, "loss": 0.2043, "step": 5566 }, { "epoch": 0.45, "grad_norm": 1.2647097728298455, "learning_rate": 6.1181973156311325e-06, "loss": 0.1936, "step": 5567 }, { "epoch": 0.45, "grad_norm": 1.329348476490451, "learning_rate": 6.1169345885417e-06, "loss": 0.1856, "step": 5568 }, { "epoch": 0.45, "grad_norm": 1.3812634944996203, "learning_rate": 6.115671786469063e-06, "loss": 0.2377, "step": 5569 }, { "epoch": 0.45, "grad_norm": 1.4205204920987597, "learning_rate": 6.114408909497999e-06, "loss": 0.2124, "step": 5570 }, { "epoch": 0.45, "grad_norm": 4.776555534942818, "learning_rate": 6.113145957713289e-06, "loss": 0.5159, "step": 5571 }, { "epoch": 0.45, "grad_norm": 5.597878293882241, "learning_rate": 6.1118829311997176e-06, "loss": 0.5918, "step": 5572 }, { "epoch": 0.45, "grad_norm": 1.3860500762253893, "learning_rate": 6.110619830042076e-06, "loss": 0.1832, "step": 5573 }, { "epoch": 0.45, "grad_norm": 5.364695393745401, "learning_rate": 6.109356654325161e-06, "loss": 0.5517, "step": 5574 }, { "epoch": 0.45, "grad_norm": 1.4641869690735012, "learning_rate": 6.108093404133772e-06, "loss": 0.2184, "step": 5575 }, { "epoch": 0.45, "grad_norm": 1.5404193350813713, "learning_rate": 6.106830079552716e-06, "loss": 0.2411, "step": 5576 }, { "epoch": 0.45, "grad_norm": 1.30869960881159, "learning_rate": 6.105566680666802e-06, "loss": 0.2159, "step": 5577 }, { "epoch": 0.45, "grad_norm": 1.2486705095388573, "learning_rate": 6.104303207560848e-06, "loss": 0.1822, "step": 5578 }, { "epoch": 0.45, "grad_norm": 1.346870614426665, "learning_rate": 6.103039660319674e-06, "loss": 0.2016, "step": 5579 }, { "epoch": 0.45, "grad_norm": 1.3585629896264042, "learning_rate": 6.101776039028104e-06, "loss": 0.2191, "step": 5580 }, { "epoch": 0.45, "grad_norm": 1.510382047584163, "learning_rate": 6.100512343770971e-06, "loss": 0.2168, "step": 5581 }, { "epoch": 0.45, "grad_norm": 1.3974176902369686, "learning_rate": 6.099248574633111e-06, "loss": 0.2057, "step": 5582 }, { "epoch": 0.45, "grad_norm": 1.5247654221894222, "learning_rate": 6.097984731699363e-06, "loss": 0.2291, "step": 5583 }, { "epoch": 0.45, "grad_norm": 1.3384639025850897, "learning_rate": 6.096720815054573e-06, "loss": 0.1921, "step": 5584 }, { "epoch": 0.45, "grad_norm": 1.4158085095343402, "learning_rate": 6.095456824783592e-06, "loss": 0.2042, "step": 5585 }, { "epoch": 0.45, "grad_norm": 1.224681070365773, "learning_rate": 6.094192760971275e-06, "loss": 0.1642, "step": 5586 }, { "epoch": 0.45, "grad_norm": 5.041993714675057, "learning_rate": 6.092928623702481e-06, "loss": 0.5225, "step": 5587 }, { "epoch": 0.45, "grad_norm": 1.3422488633802798, "learning_rate": 6.091664413062079e-06, "loss": 0.206, "step": 5588 }, { "epoch": 0.45, "grad_norm": 1.6114028105561287, "learning_rate": 6.0904001291349375e-06, "loss": 0.2429, "step": 5589 }, { "epoch": 0.45, "grad_norm": 5.4517315506449195, "learning_rate": 6.089135772005932e-06, "loss": 0.4918, "step": 5590 }, { "epoch": 0.45, "grad_norm": 1.3024740869565015, "learning_rate": 6.08787134175994e-06, "loss": 0.2013, "step": 5591 }, { "epoch": 0.45, "grad_norm": 1.432279796238654, "learning_rate": 6.086606838481851e-06, "loss": 0.176, "step": 5592 }, { "epoch": 0.45, "grad_norm": 1.355272647117586, "learning_rate": 6.085342262256552e-06, "loss": 0.1718, "step": 5593 }, { "epoch": 0.45, "grad_norm": 1.523873437848471, "learning_rate": 6.084077613168937e-06, "loss": 0.2202, "step": 5594 }, { "epoch": 0.45, "grad_norm": 5.773408011584644, "learning_rate": 6.0828128913039085e-06, "loss": 0.5324, "step": 5595 }, { "epoch": 0.45, "grad_norm": 1.2508901584474192, "learning_rate": 6.08154809674637e-06, "loss": 0.166, "step": 5596 }, { "epoch": 0.45, "grad_norm": 1.217697802148777, "learning_rate": 6.080283229581231e-06, "loss": 0.1801, "step": 5597 }, { "epoch": 0.45, "grad_norm": 1.627104879007245, "learning_rate": 6.079018289893403e-06, "loss": 0.2267, "step": 5598 }, { "epoch": 0.45, "grad_norm": 1.078833819597533, "learning_rate": 6.07775327776781e-06, "loss": 0.1552, "step": 5599 }, { "epoch": 0.45, "grad_norm": 1.3062748246185414, "learning_rate": 6.076488193289375e-06, "loss": 0.2017, "step": 5600 }, { "epoch": 0.45, "grad_norm": 1.2232917849384808, "learning_rate": 6.075223036543025e-06, "loss": 0.1602, "step": 5601 }, { "epoch": 0.45, "grad_norm": 1.2833413040414363, "learning_rate": 6.073957807613695e-06, "loss": 0.2157, "step": 5602 }, { "epoch": 0.45, "grad_norm": 1.1804234185114044, "learning_rate": 6.072692506586324e-06, "loss": 0.2038, "step": 5603 }, { "epoch": 0.45, "grad_norm": 1.4884602595204453, "learning_rate": 6.071427133545854e-06, "loss": 0.2258, "step": 5604 }, { "epoch": 0.45, "grad_norm": 1.2132038919084498, "learning_rate": 6.070161688577233e-06, "loss": 0.1806, "step": 5605 }, { "epoch": 0.45, "grad_norm": 1.4203566420277562, "learning_rate": 6.068896171765417e-06, "loss": 0.2288, "step": 5606 }, { "epoch": 0.45, "grad_norm": 3.7014609136692567, "learning_rate": 6.067630583195362e-06, "loss": 0.6135, "step": 5607 }, { "epoch": 0.45, "grad_norm": 1.4300728423441043, "learning_rate": 6.066364922952031e-06, "loss": 0.2081, "step": 5608 }, { "epoch": 0.45, "grad_norm": 1.6948475514341557, "learning_rate": 6.065099191120391e-06, "loss": 0.2098, "step": 5609 }, { "epoch": 0.45, "grad_norm": 1.5598582050049703, "learning_rate": 6.0638333877854185e-06, "loss": 0.2217, "step": 5610 }, { "epoch": 0.45, "grad_norm": 1.449739306948539, "learning_rate": 6.0625675130320835e-06, "loss": 0.2092, "step": 5611 }, { "epoch": 0.45, "grad_norm": 1.284292180522819, "learning_rate": 6.061301566945373e-06, "loss": 0.1859, "step": 5612 }, { "epoch": 0.45, "grad_norm": 1.3588099833381357, "learning_rate": 6.060035549610275e-06, "loss": 0.189, "step": 5613 }, { "epoch": 0.45, "grad_norm": 1.3395562956315887, "learning_rate": 6.0587694611117766e-06, "loss": 0.1987, "step": 5614 }, { "epoch": 0.45, "grad_norm": 1.2979559564066512, "learning_rate": 6.057503301534875e-06, "loss": 0.2057, "step": 5615 }, { "epoch": 0.45, "grad_norm": 1.4481850777496268, "learning_rate": 6.056237070964575e-06, "loss": 0.2206, "step": 5616 }, { "epoch": 0.45, "grad_norm": 5.938497053781902, "learning_rate": 6.054970769485879e-06, "loss": 0.4551, "step": 5617 }, { "epoch": 0.45, "grad_norm": 1.253788182750065, "learning_rate": 6.053704397183799e-06, "loss": 0.1832, "step": 5618 }, { "epoch": 0.45, "grad_norm": 1.2058164707408878, "learning_rate": 6.052437954143351e-06, "loss": 0.1604, "step": 5619 }, { "epoch": 0.45, "grad_norm": 8.547492703478182, "learning_rate": 6.051171440449555e-06, "loss": 0.5551, "step": 5620 }, { "epoch": 0.45, "grad_norm": 1.4518981116268568, "learning_rate": 6.049904856187434e-06, "loss": 0.1742, "step": 5621 }, { "epoch": 0.45, "grad_norm": 1.3981648420954715, "learning_rate": 6.048638201442017e-06, "loss": 0.2264, "step": 5622 }, { "epoch": 0.45, "grad_norm": 1.286440402540848, "learning_rate": 6.047371476298344e-06, "loss": 0.2318, "step": 5623 }, { "epoch": 0.45, "grad_norm": 1.3787880114042406, "learning_rate": 6.046104680841448e-06, "loss": 0.22, "step": 5624 }, { "epoch": 0.45, "grad_norm": 1.4142469301141967, "learning_rate": 6.044837815156377e-06, "loss": 0.1858, "step": 5625 }, { "epoch": 0.45, "grad_norm": 1.2901149794355038, "learning_rate": 6.043570879328175e-06, "loss": 0.2025, "step": 5626 }, { "epoch": 0.45, "grad_norm": 1.2919481080843338, "learning_rate": 6.042303873441902e-06, "loss": 0.1706, "step": 5627 }, { "epoch": 0.45, "grad_norm": 4.314075484074311, "learning_rate": 6.041036797582608e-06, "loss": 0.4891, "step": 5628 }, { "epoch": 0.45, "grad_norm": 1.4270442518471307, "learning_rate": 6.039769651835361e-06, "loss": 0.1971, "step": 5629 }, { "epoch": 0.45, "grad_norm": 1.3790628357555104, "learning_rate": 6.038502436285227e-06, "loss": 0.1523, "step": 5630 }, { "epoch": 0.45, "grad_norm": 6.636564401301105, "learning_rate": 6.037235151017279e-06, "loss": 0.5862, "step": 5631 }, { "epoch": 0.45, "grad_norm": 1.2471536399905925, "learning_rate": 6.03596779611659e-06, "loss": 0.1565, "step": 5632 }, { "epoch": 0.45, "grad_norm": 1.4267873799868975, "learning_rate": 6.034700371668246e-06, "loss": 0.2647, "step": 5633 }, { "epoch": 0.45, "grad_norm": 1.2920682812630875, "learning_rate": 6.033432877757331e-06, "loss": 0.1677, "step": 5634 }, { "epoch": 0.45, "grad_norm": 1.3201117633822932, "learning_rate": 6.032165314468935e-06, "loss": 0.1918, "step": 5635 }, { "epoch": 0.45, "grad_norm": 1.483827022879092, "learning_rate": 6.030897681888155e-06, "loss": 0.2374, "step": 5636 }, { "epoch": 0.45, "grad_norm": 1.3375296221624304, "learning_rate": 6.029629980100089e-06, "loss": 0.1888, "step": 5637 }, { "epoch": 0.45, "grad_norm": 1.3157550498962585, "learning_rate": 6.028362209189844e-06, "loss": 0.2101, "step": 5638 }, { "epoch": 0.45, "grad_norm": 5.0071345452294755, "learning_rate": 6.027094369242526e-06, "loss": 0.4409, "step": 5639 }, { "epoch": 0.45, "grad_norm": 4.750425550519562, "learning_rate": 6.025826460343252e-06, "loss": 0.5792, "step": 5640 }, { "epoch": 0.45, "grad_norm": 4.987667271162686, "learning_rate": 6.024558482577141e-06, "loss": 0.6997, "step": 5641 }, { "epoch": 0.45, "grad_norm": 1.344991530575316, "learning_rate": 6.023290436029314e-06, "loss": 0.1684, "step": 5642 }, { "epoch": 0.45, "grad_norm": 1.3584051200200764, "learning_rate": 6.022022320784899e-06, "loss": 0.1928, "step": 5643 }, { "epoch": 0.45, "grad_norm": 1.4604053560977028, "learning_rate": 6.020754136929029e-06, "loss": 0.228, "step": 5644 }, { "epoch": 0.45, "grad_norm": 1.2634227166406855, "learning_rate": 6.0194858845468425e-06, "loss": 0.1956, "step": 5645 }, { "epoch": 0.45, "grad_norm": 1.16182366887117, "learning_rate": 6.018217563723478e-06, "loss": 0.1623, "step": 5646 }, { "epoch": 0.45, "grad_norm": 1.265971671798676, "learning_rate": 6.016949174544085e-06, "loss": 0.1498, "step": 5647 }, { "epoch": 0.45, "grad_norm": 1.4929605025272874, "learning_rate": 6.015680717093811e-06, "loss": 0.1946, "step": 5648 }, { "epoch": 0.45, "grad_norm": 1.5007342859565824, "learning_rate": 6.014412191457812e-06, "loss": 0.2284, "step": 5649 }, { "epoch": 0.45, "grad_norm": 1.2035926870614377, "learning_rate": 6.013143597721252e-06, "loss": 0.1776, "step": 5650 }, { "epoch": 0.45, "grad_norm": 8.584367991225246, "learning_rate": 6.0118749359692905e-06, "loss": 0.6809, "step": 5651 }, { "epoch": 0.45, "grad_norm": 1.1988240255697657, "learning_rate": 6.0106062062871e-06, "loss": 0.1706, "step": 5652 }, { "epoch": 0.45, "grad_norm": 1.3731706831798078, "learning_rate": 6.0093374087598525e-06, "loss": 0.1947, "step": 5653 }, { "epoch": 0.45, "grad_norm": 1.3617248174587089, "learning_rate": 6.0080685434727274e-06, "loss": 0.2135, "step": 5654 }, { "epoch": 0.45, "grad_norm": 1.3413421229530602, "learning_rate": 6.006799610510905e-06, "loss": 0.1823, "step": 5655 }, { "epoch": 0.45, "grad_norm": 1.2931279087310887, "learning_rate": 6.005530609959573e-06, "loss": 0.1805, "step": 5656 }, { "epoch": 0.45, "grad_norm": 1.4282597823033496, "learning_rate": 6.004261541903928e-06, "loss": 0.2102, "step": 5657 }, { "epoch": 0.45, "grad_norm": 1.2873044151900763, "learning_rate": 6.002992406429161e-06, "loss": 0.2033, "step": 5658 }, { "epoch": 0.45, "grad_norm": 1.2912729124684637, "learning_rate": 6.001723203620475e-06, "loss": 0.1871, "step": 5659 }, { "epoch": 0.45, "grad_norm": 1.3609049903957566, "learning_rate": 6.000453933563075e-06, "loss": 0.1977, "step": 5660 }, { "epoch": 0.45, "grad_norm": 1.4618183956745632, "learning_rate": 5.999184596342171e-06, "loss": 0.1833, "step": 5661 }, { "epoch": 0.45, "grad_norm": 4.806190844357243, "learning_rate": 5.99791519204298e-06, "loss": 0.4771, "step": 5662 }, { "epoch": 0.45, "grad_norm": 1.1282979207018777, "learning_rate": 5.996645720750715e-06, "loss": 0.1703, "step": 5663 }, { "epoch": 0.45, "grad_norm": 1.347838831437371, "learning_rate": 5.995376182550607e-06, "loss": 0.1921, "step": 5664 }, { "epoch": 0.45, "grad_norm": 1.4978255033837349, "learning_rate": 5.994106577527877e-06, "loss": 0.187, "step": 5665 }, { "epoch": 0.45, "grad_norm": 1.2253919196219925, "learning_rate": 5.992836905767762e-06, "loss": 0.1425, "step": 5666 }, { "epoch": 0.45, "grad_norm": 1.3414929880676703, "learning_rate": 5.991567167355495e-06, "loss": 0.2127, "step": 5667 }, { "epoch": 0.45, "grad_norm": 1.282444006567151, "learning_rate": 5.990297362376322e-06, "loss": 0.2019, "step": 5668 }, { "epoch": 0.45, "grad_norm": 1.1778619482730026, "learning_rate": 5.989027490915485e-06, "loss": 0.1778, "step": 5669 }, { "epoch": 0.45, "grad_norm": 1.257621079603397, "learning_rate": 5.987757553058236e-06, "loss": 0.2048, "step": 5670 }, { "epoch": 0.45, "grad_norm": 1.4901630198688518, "learning_rate": 5.986487548889832e-06, "loss": 0.2166, "step": 5671 }, { "epoch": 0.45, "grad_norm": 1.3094962804900228, "learning_rate": 5.9852174784955295e-06, "loss": 0.2678, "step": 5672 }, { "epoch": 0.45, "grad_norm": 5.014818765928443, "learning_rate": 5.98394734196059e-06, "loss": 0.6236, "step": 5673 }, { "epoch": 0.45, "grad_norm": 6.6065058986841345, "learning_rate": 5.982677139370287e-06, "loss": 0.6142, "step": 5674 }, { "epoch": 0.45, "grad_norm": 1.5048809190438395, "learning_rate": 5.981406870809889e-06, "loss": 0.2041, "step": 5675 }, { "epoch": 0.45, "grad_norm": 1.4422487109970714, "learning_rate": 5.980136536364673e-06, "loss": 0.2198, "step": 5676 }, { "epoch": 0.45, "grad_norm": 1.1994914231536837, "learning_rate": 5.9788661361199225e-06, "loss": 0.1967, "step": 5677 }, { "epoch": 0.45, "grad_norm": 1.3100926971962215, "learning_rate": 5.977595670160923e-06, "loss": 0.2053, "step": 5678 }, { "epoch": 0.45, "grad_norm": 13.02904922119633, "learning_rate": 5.976325138572964e-06, "loss": 0.7019, "step": 5679 }, { "epoch": 0.45, "grad_norm": 1.2956763540161398, "learning_rate": 5.9750545414413405e-06, "loss": 0.1794, "step": 5680 }, { "epoch": 0.45, "grad_norm": 1.370151891092701, "learning_rate": 5.973783878851352e-06, "loss": 0.165, "step": 5681 }, { "epoch": 0.45, "grad_norm": 1.4211267576150197, "learning_rate": 5.972513150888299e-06, "loss": 0.231, "step": 5682 }, { "epoch": 0.45, "grad_norm": 1.325627164389678, "learning_rate": 5.971242357637493e-06, "loss": 0.1591, "step": 5683 }, { "epoch": 0.45, "grad_norm": 14.933694127378622, "learning_rate": 5.969971499184244e-06, "loss": 0.6261, "step": 5684 }, { "epoch": 0.45, "grad_norm": 1.093963592626661, "learning_rate": 5.96870057561387e-06, "loss": 0.1688, "step": 5685 }, { "epoch": 0.45, "grad_norm": 9.258413838332183, "learning_rate": 5.967429587011691e-06, "loss": 0.6146, "step": 5686 }, { "epoch": 0.45, "grad_norm": 1.2926402999629654, "learning_rate": 5.966158533463033e-06, "loss": 0.1888, "step": 5687 }, { "epoch": 0.46, "grad_norm": 1.4847246449457343, "learning_rate": 5.964887415053227e-06, "loss": 0.1921, "step": 5688 }, { "epoch": 0.46, "grad_norm": 1.2726212771325793, "learning_rate": 5.963616231867602e-06, "loss": 0.1724, "step": 5689 }, { "epoch": 0.46, "grad_norm": 1.4581039557863034, "learning_rate": 5.962344983991503e-06, "loss": 0.1874, "step": 5690 }, { "epoch": 0.46, "grad_norm": 6.645827002510589, "learning_rate": 5.9610736715102665e-06, "loss": 0.4644, "step": 5691 }, { "epoch": 0.46, "grad_norm": 1.3629035524712785, "learning_rate": 5.959802294509244e-06, "loss": 0.1968, "step": 5692 }, { "epoch": 0.46, "grad_norm": 1.2604163114797136, "learning_rate": 5.958530853073785e-06, "loss": 0.1611, "step": 5693 }, { "epoch": 0.46, "grad_norm": 8.190489500479314, "learning_rate": 5.9572593472892445e-06, "loss": 0.5745, "step": 5694 }, { "epoch": 0.46, "grad_norm": 1.294593535149013, "learning_rate": 5.955987777240985e-06, "loss": 0.2126, "step": 5695 }, { "epoch": 0.46, "grad_norm": 1.4208797127469808, "learning_rate": 5.95471614301437e-06, "loss": 0.1693, "step": 5696 }, { "epoch": 0.46, "grad_norm": 1.3067241364760294, "learning_rate": 5.953444444694767e-06, "loss": 0.2158, "step": 5697 }, { "epoch": 0.46, "grad_norm": 1.4441227441203894, "learning_rate": 5.95217268236755e-06, "loss": 0.1995, "step": 5698 }, { "epoch": 0.46, "grad_norm": 1.1814100998528794, "learning_rate": 5.950900856118096e-06, "loss": 0.1588, "step": 5699 }, { "epoch": 0.46, "grad_norm": 1.6285410725517602, "learning_rate": 5.949628966031785e-06, "loss": 0.2223, "step": 5700 }, { "epoch": 0.46, "grad_norm": 1.3199946041519168, "learning_rate": 5.948357012194005e-06, "loss": 0.1559, "step": 5701 }, { "epoch": 0.46, "grad_norm": 1.4954503441931697, "learning_rate": 5.947084994690145e-06, "loss": 0.2042, "step": 5702 }, { "epoch": 0.46, "grad_norm": 1.356241854830979, "learning_rate": 5.945812913605601e-06, "loss": 0.188, "step": 5703 }, { "epoch": 0.46, "grad_norm": 1.3174144523348514, "learning_rate": 5.94454076902577e-06, "loss": 0.2139, "step": 5704 }, { "epoch": 0.46, "grad_norm": 1.1567834956873886, "learning_rate": 5.943268561036053e-06, "loss": 0.1483, "step": 5705 }, { "epoch": 0.46, "grad_norm": 1.5628398791804923, "learning_rate": 5.941996289721863e-06, "loss": 0.2171, "step": 5706 }, { "epoch": 0.46, "grad_norm": 1.3378853522278507, "learning_rate": 5.940723955168607e-06, "loss": 0.2095, "step": 5707 }, { "epoch": 0.46, "grad_norm": 1.3210256914346519, "learning_rate": 5.9394515574617e-06, "loss": 0.1842, "step": 5708 }, { "epoch": 0.46, "grad_norm": 1.6147589943459284, "learning_rate": 5.938179096686565e-06, "loss": 0.2171, "step": 5709 }, { "epoch": 0.46, "grad_norm": 4.873899094459051, "learning_rate": 5.936906572928625e-06, "loss": 0.4317, "step": 5710 }, { "epoch": 0.46, "grad_norm": 1.2456435787960665, "learning_rate": 5.935633986273307e-06, "loss": 0.2166, "step": 5711 }, { "epoch": 0.46, "grad_norm": 5.032914263274818, "learning_rate": 5.934361336806044e-06, "loss": 0.5615, "step": 5712 }, { "epoch": 0.46, "grad_norm": 1.2757499417853844, "learning_rate": 5.933088624612275e-06, "loss": 0.1554, "step": 5713 }, { "epoch": 0.46, "grad_norm": 1.4026906978634766, "learning_rate": 5.931815849777438e-06, "loss": 0.2516, "step": 5714 }, { "epoch": 0.46, "grad_norm": 1.3715539984439826, "learning_rate": 5.930543012386981e-06, "loss": 0.2451, "step": 5715 }, { "epoch": 0.46, "grad_norm": 1.379447543188886, "learning_rate": 5.9292701125263515e-06, "loss": 0.2052, "step": 5716 }, { "epoch": 0.46, "grad_norm": 1.285892812830182, "learning_rate": 5.927997150281004e-06, "loss": 0.1758, "step": 5717 }, { "epoch": 0.46, "grad_norm": 1.3339860754384354, "learning_rate": 5.926724125736396e-06, "loss": 0.2394, "step": 5718 }, { "epoch": 0.46, "grad_norm": 1.2272098958799753, "learning_rate": 5.925451038977989e-06, "loss": 0.1563, "step": 5719 }, { "epoch": 0.46, "grad_norm": 1.3270458506433893, "learning_rate": 5.924177890091251e-06, "loss": 0.189, "step": 5720 }, { "epoch": 0.46, "grad_norm": 1.3614365340955805, "learning_rate": 5.9229046791616505e-06, "loss": 0.2221, "step": 5721 }, { "epoch": 0.46, "grad_norm": 6.092818923295385, "learning_rate": 5.921631406274661e-06, "loss": 0.4837, "step": 5722 }, { "epoch": 0.46, "grad_norm": 1.413445133560478, "learning_rate": 5.920358071515766e-06, "loss": 0.1999, "step": 5723 }, { "epoch": 0.46, "grad_norm": 1.2682108646771542, "learning_rate": 5.919084674970443e-06, "loss": 0.1951, "step": 5724 }, { "epoch": 0.46, "grad_norm": 4.420338640257742, "learning_rate": 5.9178112167241805e-06, "loss": 0.3494, "step": 5725 }, { "epoch": 0.46, "grad_norm": 4.632667086552824, "learning_rate": 5.916537696862472e-06, "loss": 0.408, "step": 5726 }, { "epoch": 0.46, "grad_norm": 1.3209886840816925, "learning_rate": 5.91526411547081e-06, "loss": 0.1838, "step": 5727 }, { "epoch": 0.46, "grad_norm": 1.7830084521710565, "learning_rate": 5.913990472634695e-06, "loss": 0.228, "step": 5728 }, { "epoch": 0.46, "grad_norm": 1.3595171001940738, "learning_rate": 5.91271676843963e-06, "loss": 0.1775, "step": 5729 }, { "epoch": 0.46, "grad_norm": 1.2953320928459553, "learning_rate": 5.911443002971122e-06, "loss": 0.2041, "step": 5730 }, { "epoch": 0.46, "grad_norm": 1.2234037329831313, "learning_rate": 5.910169176314686e-06, "loss": 0.1534, "step": 5731 }, { "epoch": 0.46, "grad_norm": 1.2125689024486903, "learning_rate": 5.908895288555833e-06, "loss": 0.1843, "step": 5732 }, { "epoch": 0.46, "grad_norm": 1.1193579859698313, "learning_rate": 5.907621339780087e-06, "loss": 0.1814, "step": 5733 }, { "epoch": 0.46, "grad_norm": 1.1590477776574561, "learning_rate": 5.906347330072971e-06, "loss": 0.1606, "step": 5734 }, { "epoch": 0.46, "grad_norm": 1.328192785851466, "learning_rate": 5.905073259520011e-06, "loss": 0.2261, "step": 5735 }, { "epoch": 0.46, "grad_norm": 1.1594818549014685, "learning_rate": 5.903799128206742e-06, "loss": 0.1606, "step": 5736 }, { "epoch": 0.46, "grad_norm": 1.486957350480733, "learning_rate": 5.9025249362186985e-06, "loss": 0.2091, "step": 5737 }, { "epoch": 0.46, "grad_norm": 1.1073424003443189, "learning_rate": 5.901250683641422e-06, "loss": 0.1425, "step": 5738 }, { "epoch": 0.46, "grad_norm": 1.3545557141082347, "learning_rate": 5.899976370560455e-06, "loss": 0.1942, "step": 5739 }, { "epoch": 0.46, "grad_norm": 1.5788262608892951, "learning_rate": 5.898701997061349e-06, "loss": 0.1949, "step": 5740 }, { "epoch": 0.46, "grad_norm": 1.3268574334232077, "learning_rate": 5.897427563229655e-06, "loss": 0.2029, "step": 5741 }, { "epoch": 0.46, "grad_norm": 1.414097255396974, "learning_rate": 5.896153069150928e-06, "loss": 0.2226, "step": 5742 }, { "epoch": 0.46, "grad_norm": 10.853680107674938, "learning_rate": 5.894878514910731e-06, "loss": 0.6264, "step": 5743 }, { "epoch": 0.46, "grad_norm": 1.3845440847702832, "learning_rate": 5.893603900594629e-06, "loss": 0.1979, "step": 5744 }, { "epoch": 0.46, "grad_norm": 1.4010949960206476, "learning_rate": 5.89232922628819e-06, "loss": 0.1982, "step": 5745 }, { "epoch": 0.46, "grad_norm": 1.2942396864942263, "learning_rate": 5.891054492076984e-06, "loss": 0.191, "step": 5746 }, { "epoch": 0.46, "grad_norm": 6.969964930270931, "learning_rate": 5.889779698046592e-06, "loss": 0.6958, "step": 5747 }, { "epoch": 0.46, "grad_norm": 1.3059372612615812, "learning_rate": 5.888504844282592e-06, "loss": 0.1786, "step": 5748 }, { "epoch": 0.46, "grad_norm": 1.180403595993147, "learning_rate": 5.88722993087057e-06, "loss": 0.2091, "step": 5749 }, { "epoch": 0.46, "grad_norm": 1.3895937458109564, "learning_rate": 5.885954957896115e-06, "loss": 0.1827, "step": 5750 }, { "epoch": 0.46, "grad_norm": 1.2088660694052178, "learning_rate": 5.884679925444818e-06, "loss": 0.1576, "step": 5751 }, { "epoch": 0.46, "grad_norm": 1.3201010823692692, "learning_rate": 5.883404833602279e-06, "loss": 0.1649, "step": 5752 }, { "epoch": 0.46, "grad_norm": 1.294232614173875, "learning_rate": 5.882129682454095e-06, "loss": 0.1718, "step": 5753 }, { "epoch": 0.46, "grad_norm": 1.340744119901396, "learning_rate": 5.880854472085874e-06, "loss": 0.1718, "step": 5754 }, { "epoch": 0.46, "grad_norm": 1.3492284195686604, "learning_rate": 5.879579202583223e-06, "loss": 0.1923, "step": 5755 }, { "epoch": 0.46, "grad_norm": 1.3599677684101235, "learning_rate": 5.878303874031755e-06, "loss": 0.1989, "step": 5756 }, { "epoch": 0.46, "grad_norm": 1.3669908663394268, "learning_rate": 5.8770284865170865e-06, "loss": 0.2295, "step": 5757 }, { "epoch": 0.46, "grad_norm": 1.4500462428166774, "learning_rate": 5.87575304012484e-06, "loss": 0.18, "step": 5758 }, { "epoch": 0.46, "grad_norm": 1.4826992627403754, "learning_rate": 5.874477534940637e-06, "loss": 0.2172, "step": 5759 }, { "epoch": 0.46, "grad_norm": 1.4292364450017767, "learning_rate": 5.8732019710501075e-06, "loss": 0.2389, "step": 5760 }, { "epoch": 0.46, "grad_norm": 4.187264464229376, "learning_rate": 5.871926348538885e-06, "loss": 0.4665, "step": 5761 }, { "epoch": 0.46, "grad_norm": 1.386842895695305, "learning_rate": 5.870650667492604e-06, "loss": 0.2178, "step": 5762 }, { "epoch": 0.46, "grad_norm": 1.4285108558467636, "learning_rate": 5.869374927996905e-06, "loss": 0.2073, "step": 5763 }, { "epoch": 0.46, "grad_norm": 1.3935972831777168, "learning_rate": 5.868099130137434e-06, "loss": 0.215, "step": 5764 }, { "epoch": 0.46, "grad_norm": 4.909830183041856, "learning_rate": 5.866823273999839e-06, "loss": 0.6254, "step": 5765 }, { "epoch": 0.46, "grad_norm": 1.3075461101427168, "learning_rate": 5.86554735966977e-06, "loss": 0.1809, "step": 5766 }, { "epoch": 0.46, "grad_norm": 1.2870174549240483, "learning_rate": 5.8642713872328845e-06, "loss": 0.1848, "step": 5767 }, { "epoch": 0.46, "grad_norm": 1.6485922931013042, "learning_rate": 5.862995356774843e-06, "loss": 0.1914, "step": 5768 }, { "epoch": 0.46, "grad_norm": 1.243615690870402, "learning_rate": 5.861719268381307e-06, "loss": 0.1704, "step": 5769 }, { "epoch": 0.46, "grad_norm": 1.3619467382787693, "learning_rate": 5.860443122137946e-06, "loss": 0.1705, "step": 5770 }, { "epoch": 0.46, "grad_norm": 1.0968087815841516, "learning_rate": 5.859166918130432e-06, "loss": 0.1478, "step": 5771 }, { "epoch": 0.46, "grad_norm": 1.3551066786591732, "learning_rate": 5.857890656444439e-06, "loss": 0.1842, "step": 5772 }, { "epoch": 0.46, "grad_norm": 1.337925809998819, "learning_rate": 5.856614337165647e-06, "loss": 0.1692, "step": 5773 }, { "epoch": 0.46, "grad_norm": 1.5104521816073047, "learning_rate": 5.8553379603797386e-06, "loss": 0.2455, "step": 5774 }, { "epoch": 0.46, "grad_norm": 1.353913514979569, "learning_rate": 5.854061526172402e-06, "loss": 0.1958, "step": 5775 }, { "epoch": 0.46, "grad_norm": 1.4517306800177738, "learning_rate": 5.852785034629329e-06, "loss": 0.2464, "step": 5776 }, { "epoch": 0.46, "grad_norm": 1.3837920810602258, "learning_rate": 5.85150848583621e-06, "loss": 0.2338, "step": 5777 }, { "epoch": 0.46, "grad_norm": 1.314685172807877, "learning_rate": 5.850231879878747e-06, "loss": 0.1781, "step": 5778 }, { "epoch": 0.46, "grad_norm": 1.2990389312399357, "learning_rate": 5.848955216842642e-06, "loss": 0.2097, "step": 5779 }, { "epoch": 0.46, "grad_norm": 1.2672165508534958, "learning_rate": 5.847678496813601e-06, "loss": 0.1729, "step": 5780 }, { "epoch": 0.46, "grad_norm": 12.705583601276164, "learning_rate": 5.846401719877335e-06, "loss": 0.5704, "step": 5781 }, { "epoch": 0.46, "grad_norm": 1.2461234946688042, "learning_rate": 5.845124886119556e-06, "loss": 0.1703, "step": 5782 }, { "epoch": 0.46, "grad_norm": 4.369370036417176, "learning_rate": 5.843847995625984e-06, "loss": 0.5121, "step": 5783 }, { "epoch": 0.46, "grad_norm": 1.2670824094483304, "learning_rate": 5.842571048482339e-06, "loss": 0.2214, "step": 5784 }, { "epoch": 0.46, "grad_norm": 7.117239106485012, "learning_rate": 5.841294044774346e-06, "loss": 0.5197, "step": 5785 }, { "epoch": 0.46, "grad_norm": 1.3293867536367963, "learning_rate": 5.840016984587736e-06, "loss": 0.1961, "step": 5786 }, { "epoch": 0.46, "grad_norm": 1.309808647074832, "learning_rate": 5.838739868008239e-06, "loss": 0.1565, "step": 5787 }, { "epoch": 0.46, "grad_norm": 1.4313639370345095, "learning_rate": 5.837462695121593e-06, "loss": 0.1925, "step": 5788 }, { "epoch": 0.46, "grad_norm": 1.3495533721692934, "learning_rate": 5.836185466013541e-06, "loss": 0.221, "step": 5789 }, { "epoch": 0.46, "grad_norm": 1.2619622877080199, "learning_rate": 5.834908180769824e-06, "loss": 0.1705, "step": 5790 }, { "epoch": 0.46, "grad_norm": 1.3666402266397282, "learning_rate": 5.83363083947619e-06, "loss": 0.228, "step": 5791 }, { "epoch": 0.46, "grad_norm": 1.35107387972115, "learning_rate": 5.832353442218394e-06, "loss": 0.1915, "step": 5792 }, { "epoch": 0.46, "grad_norm": 1.2699050647588939, "learning_rate": 5.8310759890821884e-06, "loss": 0.1785, "step": 5793 }, { "epoch": 0.46, "grad_norm": 1.236727980432842, "learning_rate": 5.829798480153333e-06, "loss": 0.1952, "step": 5794 }, { "epoch": 0.46, "grad_norm": 1.6037418210265613, "learning_rate": 5.828520915517593e-06, "loss": 0.2455, "step": 5795 }, { "epoch": 0.46, "grad_norm": 1.3022392744380422, "learning_rate": 5.827243295260733e-06, "loss": 0.1775, "step": 5796 }, { "epoch": 0.46, "grad_norm": 1.388907440391088, "learning_rate": 5.825965619468524e-06, "loss": 0.2072, "step": 5797 }, { "epoch": 0.46, "grad_norm": 1.5415974744874508, "learning_rate": 5.82468788822674e-06, "loss": 0.2371, "step": 5798 }, { "epoch": 0.46, "grad_norm": 1.242236943177887, "learning_rate": 5.82341010162116e-06, "loss": 0.1594, "step": 5799 }, { "epoch": 0.46, "grad_norm": 1.2637850756378195, "learning_rate": 5.822132259737565e-06, "loss": 0.1702, "step": 5800 }, { "epoch": 0.46, "grad_norm": 1.3314161899047547, "learning_rate": 5.8208543626617384e-06, "loss": 0.2131, "step": 5801 }, { "epoch": 0.46, "grad_norm": 6.856706037233241, "learning_rate": 5.819576410479475e-06, "loss": 0.7953, "step": 5802 }, { "epoch": 0.46, "grad_norm": 1.4039522225306276, "learning_rate": 5.818298403276562e-06, "loss": 0.1534, "step": 5803 }, { "epoch": 0.46, "grad_norm": 1.2401778434788044, "learning_rate": 5.817020341138797e-06, "loss": 0.1792, "step": 5804 }, { "epoch": 0.46, "grad_norm": 1.3521341055963747, "learning_rate": 5.815742224151982e-06, "loss": 0.2126, "step": 5805 }, { "epoch": 0.46, "grad_norm": 5.766174482820119, "learning_rate": 5.81446405240192e-06, "loss": 0.6264, "step": 5806 }, { "epoch": 0.46, "grad_norm": 1.4822014620439405, "learning_rate": 5.813185825974419e-06, "loss": 0.1925, "step": 5807 }, { "epoch": 0.46, "grad_norm": 1.2822804112503057, "learning_rate": 5.8119075449552885e-06, "loss": 0.1998, "step": 5808 }, { "epoch": 0.46, "grad_norm": 1.637119501339548, "learning_rate": 5.8106292094303455e-06, "loss": 0.2073, "step": 5809 }, { "epoch": 0.46, "grad_norm": 1.4689000906776288, "learning_rate": 5.809350819485408e-06, "loss": 0.1762, "step": 5810 }, { "epoch": 0.46, "grad_norm": 1.4548121029579029, "learning_rate": 5.8080723752062955e-06, "loss": 0.2188, "step": 5811 }, { "epoch": 0.46, "grad_norm": 1.3463070708671028, "learning_rate": 5.806793876678838e-06, "loss": 0.195, "step": 5812 }, { "epoch": 0.47, "grad_norm": 1.4188589745907383, "learning_rate": 5.805515323988861e-06, "loss": 0.2223, "step": 5813 }, { "epoch": 0.47, "grad_norm": 1.2170357974782944, "learning_rate": 5.8042367172222025e-06, "loss": 0.2197, "step": 5814 }, { "epoch": 0.47, "grad_norm": 1.4724450339418798, "learning_rate": 5.802958056464694e-06, "loss": 0.2129, "step": 5815 }, { "epoch": 0.47, "grad_norm": 1.2905244170609236, "learning_rate": 5.80167934180218e-06, "loss": 0.1791, "step": 5816 }, { "epoch": 0.47, "grad_norm": 1.7781235668903357, "learning_rate": 5.800400573320502e-06, "loss": 0.2247, "step": 5817 }, { "epoch": 0.47, "grad_norm": 1.1915795460943785, "learning_rate": 5.799121751105507e-06, "loss": 0.1451, "step": 5818 }, { "epoch": 0.47, "grad_norm": 1.1996631586277335, "learning_rate": 5.7978428752430514e-06, "loss": 0.185, "step": 5819 }, { "epoch": 0.47, "grad_norm": 1.290831565658375, "learning_rate": 5.796563945818984e-06, "loss": 0.2359, "step": 5820 }, { "epoch": 0.47, "grad_norm": 1.3212469880577664, "learning_rate": 5.795284962919164e-06, "loss": 0.1888, "step": 5821 }, { "epoch": 0.47, "grad_norm": 1.3889234367911285, "learning_rate": 5.794005926629457e-06, "loss": 0.1733, "step": 5822 }, { "epoch": 0.47, "grad_norm": 1.337997496160111, "learning_rate": 5.792726837035726e-06, "loss": 0.2107, "step": 5823 }, { "epoch": 0.47, "grad_norm": 1.3234147492119162, "learning_rate": 5.791447694223841e-06, "loss": 0.2065, "step": 5824 }, { "epoch": 0.47, "grad_norm": 1.2316160911624536, "learning_rate": 5.7901684982796716e-06, "loss": 0.202, "step": 5825 }, { "epoch": 0.47, "grad_norm": 1.3519214390539434, "learning_rate": 5.788889249289099e-06, "loss": 0.2171, "step": 5826 }, { "epoch": 0.47, "grad_norm": 1.2540029835453093, "learning_rate": 5.787609947338001e-06, "loss": 0.1871, "step": 5827 }, { "epoch": 0.47, "grad_norm": 1.4241020949385594, "learning_rate": 5.78633059251226e-06, "loss": 0.212, "step": 5828 }, { "epoch": 0.47, "grad_norm": 1.3598279855358968, "learning_rate": 5.785051184897765e-06, "loss": 0.1728, "step": 5829 }, { "epoch": 0.47, "grad_norm": 4.494814722974306, "learning_rate": 5.783771724580405e-06, "loss": 0.4617, "step": 5830 }, { "epoch": 0.47, "grad_norm": 1.1829142961754717, "learning_rate": 5.7824922116460735e-06, "loss": 0.1614, "step": 5831 }, { "epoch": 0.47, "grad_norm": 1.3843450315038315, "learning_rate": 5.78121264618067e-06, "loss": 0.1878, "step": 5832 }, { "epoch": 0.47, "grad_norm": 1.334292893401392, "learning_rate": 5.779933028270093e-06, "loss": 0.166, "step": 5833 }, { "epoch": 0.47, "grad_norm": 5.053860655002488, "learning_rate": 5.7786533580002506e-06, "loss": 0.4752, "step": 5834 }, { "epoch": 0.47, "grad_norm": 1.2691181839248344, "learning_rate": 5.777373635457049e-06, "loss": 0.1865, "step": 5835 }, { "epoch": 0.47, "grad_norm": 1.3650917512092926, "learning_rate": 5.7760938607263975e-06, "loss": 0.2186, "step": 5836 }, { "epoch": 0.47, "grad_norm": 7.236867975576756, "learning_rate": 5.774814033894216e-06, "loss": 0.4932, "step": 5837 }, { "epoch": 0.47, "grad_norm": 1.3468026191871252, "learning_rate": 5.773534155046421e-06, "loss": 0.1712, "step": 5838 }, { "epoch": 0.47, "grad_norm": 1.4395188842457471, "learning_rate": 5.772254224268932e-06, "loss": 0.2044, "step": 5839 }, { "epoch": 0.47, "grad_norm": 1.5538177638515203, "learning_rate": 5.770974241647679e-06, "loss": 0.1908, "step": 5840 }, { "epoch": 0.47, "grad_norm": 1.2333528050865397, "learning_rate": 5.769694207268589e-06, "loss": 0.1652, "step": 5841 }, { "epoch": 0.47, "grad_norm": 1.4798651122671294, "learning_rate": 5.768414121217594e-06, "loss": 0.2026, "step": 5842 }, { "epoch": 0.47, "grad_norm": 1.3405146552385974, "learning_rate": 5.767133983580631e-06, "loss": 0.195, "step": 5843 }, { "epoch": 0.47, "grad_norm": 4.361730241694057, "learning_rate": 5.765853794443641e-06, "loss": 0.6309, "step": 5844 }, { "epoch": 0.47, "grad_norm": 1.3086734966051299, "learning_rate": 5.764573553892564e-06, "loss": 0.1713, "step": 5845 }, { "epoch": 0.47, "grad_norm": 1.409697746302821, "learning_rate": 5.763293262013347e-06, "loss": 0.169, "step": 5846 }, { "epoch": 0.47, "grad_norm": 1.4562134309812649, "learning_rate": 5.762012918891942e-06, "loss": 0.265, "step": 5847 }, { "epoch": 0.47, "grad_norm": 1.4400144460057551, "learning_rate": 5.760732524614301e-06, "loss": 0.1949, "step": 5848 }, { "epoch": 0.47, "grad_norm": 1.3043056604803445, "learning_rate": 5.75945207926638e-06, "loss": 0.2008, "step": 5849 }, { "epoch": 0.47, "grad_norm": 1.4397428459139365, "learning_rate": 5.75817158293414e-06, "loss": 0.2239, "step": 5850 }, { "epoch": 0.47, "grad_norm": 5.211075218272331, "learning_rate": 5.756891035703544e-06, "loss": 0.5079, "step": 5851 }, { "epoch": 0.47, "grad_norm": 1.3321834904562107, "learning_rate": 5.755610437660561e-06, "loss": 0.1867, "step": 5852 }, { "epoch": 0.47, "grad_norm": 1.47952047368519, "learning_rate": 5.754329788891158e-06, "loss": 0.1934, "step": 5853 }, { "epoch": 0.47, "grad_norm": 1.2760948316488285, "learning_rate": 5.753049089481313e-06, "loss": 0.1737, "step": 5854 }, { "epoch": 0.47, "grad_norm": 1.310965478818646, "learning_rate": 5.751768339517e-06, "loss": 0.1757, "step": 5855 }, { "epoch": 0.47, "grad_norm": 1.3163542643126205, "learning_rate": 5.750487539084199e-06, "loss": 0.189, "step": 5856 }, { "epoch": 0.47, "grad_norm": 5.420325486405605, "learning_rate": 5.749206688268897e-06, "loss": 0.3969, "step": 5857 }, { "epoch": 0.47, "grad_norm": 1.2956025285766046, "learning_rate": 5.747925787157081e-06, "loss": 0.191, "step": 5858 }, { "epoch": 0.47, "grad_norm": 1.4050429722620523, "learning_rate": 5.746644835834739e-06, "loss": 0.2031, "step": 5859 }, { "epoch": 0.47, "grad_norm": 1.3983575554920347, "learning_rate": 5.745363834387867e-06, "loss": 0.204, "step": 5860 }, { "epoch": 0.47, "grad_norm": 9.017820838264491, "learning_rate": 5.744082782902463e-06, "loss": 0.5403, "step": 5861 }, { "epoch": 0.47, "grad_norm": 1.284492374152771, "learning_rate": 5.742801681464527e-06, "loss": 0.1742, "step": 5862 }, { "epoch": 0.47, "grad_norm": 1.2886651121809594, "learning_rate": 5.741520530160064e-06, "loss": 0.1976, "step": 5863 }, { "epoch": 0.47, "grad_norm": 1.4065906291576153, "learning_rate": 5.740239329075081e-06, "loss": 0.2076, "step": 5864 }, { "epoch": 0.47, "grad_norm": 1.5034303781768974, "learning_rate": 5.73895807829559e-06, "loss": 0.1867, "step": 5865 }, { "epoch": 0.47, "grad_norm": 1.4697077068130926, "learning_rate": 5.737676777907602e-06, "loss": 0.1817, "step": 5866 }, { "epoch": 0.47, "grad_norm": 1.2086837213418151, "learning_rate": 5.736395427997138e-06, "loss": 0.166, "step": 5867 }, { "epoch": 0.47, "grad_norm": 1.4738967135035692, "learning_rate": 5.735114028650218e-06, "loss": 0.2564, "step": 5868 }, { "epoch": 0.47, "grad_norm": 1.2838124182342827, "learning_rate": 5.733832579952866e-06, "loss": 0.2041, "step": 5869 }, { "epoch": 0.47, "grad_norm": 1.2221743945868093, "learning_rate": 5.732551081991109e-06, "loss": 0.2034, "step": 5870 }, { "epoch": 0.47, "grad_norm": 1.3185088313696427, "learning_rate": 5.73126953485098e-06, "loss": 0.1796, "step": 5871 }, { "epoch": 0.47, "grad_norm": 1.411420920784928, "learning_rate": 5.72998793861851e-06, "loss": 0.1986, "step": 5872 }, { "epoch": 0.47, "grad_norm": 1.7464490396659782, "learning_rate": 5.7287062933797365e-06, "loss": 0.2386, "step": 5873 }, { "epoch": 0.47, "grad_norm": 1.3327533738172825, "learning_rate": 5.727424599220703e-06, "loss": 0.2058, "step": 5874 }, { "epoch": 0.47, "grad_norm": 1.2560669788549168, "learning_rate": 5.726142856227453e-06, "loss": 0.1699, "step": 5875 }, { "epoch": 0.47, "grad_norm": 10.035683554512246, "learning_rate": 5.724861064486031e-06, "loss": 0.5508, "step": 5876 }, { "epoch": 0.47, "grad_norm": 1.3009862270019332, "learning_rate": 5.723579224082491e-06, "loss": 0.1817, "step": 5877 }, { "epoch": 0.47, "grad_norm": 1.257225330468414, "learning_rate": 5.7222973351028844e-06, "loss": 0.1518, "step": 5878 }, { "epoch": 0.47, "grad_norm": 1.3699903431022236, "learning_rate": 5.7210153976332696e-06, "loss": 0.1661, "step": 5879 }, { "epoch": 0.47, "grad_norm": 1.3002580818699006, "learning_rate": 5.719733411759707e-06, "loss": 0.1875, "step": 5880 }, { "epoch": 0.47, "grad_norm": 1.2086167438034907, "learning_rate": 5.7184513775682605e-06, "loss": 0.1636, "step": 5881 }, { "epoch": 0.47, "grad_norm": 1.2602920816050203, "learning_rate": 5.717169295144995e-06, "loss": 0.2114, "step": 5882 }, { "epoch": 0.47, "grad_norm": 1.3376660719533946, "learning_rate": 5.715887164575981e-06, "loss": 0.2312, "step": 5883 }, { "epoch": 0.47, "grad_norm": 1.2270800968932485, "learning_rate": 5.714604985947294e-06, "loss": 0.2333, "step": 5884 }, { "epoch": 0.47, "grad_norm": 1.4387223071593043, "learning_rate": 5.713322759345008e-06, "loss": 0.2358, "step": 5885 }, { "epoch": 0.47, "grad_norm": 1.3536225540000226, "learning_rate": 5.7120404848552056e-06, "loss": 0.1677, "step": 5886 }, { "epoch": 0.47, "grad_norm": 1.3537446329711529, "learning_rate": 5.710758162563965e-06, "loss": 0.1232, "step": 5887 }, { "epoch": 0.47, "grad_norm": 1.4819349973493916, "learning_rate": 5.709475792557379e-06, "loss": 0.2099, "step": 5888 }, { "epoch": 0.47, "grad_norm": 1.243377464387006, "learning_rate": 5.708193374921533e-06, "loss": 0.1558, "step": 5889 }, { "epoch": 0.47, "grad_norm": 1.2313001480346943, "learning_rate": 5.7069109097425176e-06, "loss": 0.1999, "step": 5890 }, { "epoch": 0.47, "grad_norm": 1.3738288377187922, "learning_rate": 5.705628397106432e-06, "loss": 0.1989, "step": 5891 }, { "epoch": 0.47, "grad_norm": 1.393464058116012, "learning_rate": 5.704345837099375e-06, "loss": 0.2253, "step": 5892 }, { "epoch": 0.47, "grad_norm": 1.3059650101949498, "learning_rate": 5.703063229807448e-06, "loss": 0.1969, "step": 5893 }, { "epoch": 0.47, "grad_norm": 1.374530431275325, "learning_rate": 5.7017805753167545e-06, "loss": 0.2187, "step": 5894 }, { "epoch": 0.47, "grad_norm": 1.2885387102987482, "learning_rate": 5.700497873713405e-06, "loss": 0.1997, "step": 5895 }, { "epoch": 0.47, "grad_norm": 1.5326136160042831, "learning_rate": 5.699215125083512e-06, "loss": 0.2194, "step": 5896 }, { "epoch": 0.47, "grad_norm": 1.4584974092145213, "learning_rate": 5.6979323295131884e-06, "loss": 0.2084, "step": 5897 }, { "epoch": 0.47, "grad_norm": 1.4232275509842094, "learning_rate": 5.696649487088553e-06, "loss": 0.1867, "step": 5898 }, { "epoch": 0.47, "grad_norm": 1.6035620190930906, "learning_rate": 5.695366597895728e-06, "loss": 0.2144, "step": 5899 }, { "epoch": 0.47, "grad_norm": 1.5175640086121123, "learning_rate": 5.694083662020835e-06, "loss": 0.2135, "step": 5900 }, { "epoch": 0.47, "grad_norm": 1.1972701145487568, "learning_rate": 5.692800679550001e-06, "loss": 0.1597, "step": 5901 }, { "epoch": 0.47, "grad_norm": 1.258488786213888, "learning_rate": 5.691517650569361e-06, "loss": 0.2291, "step": 5902 }, { "epoch": 0.47, "grad_norm": 1.0905339964775163, "learning_rate": 5.690234575165046e-06, "loss": 0.1782, "step": 5903 }, { "epoch": 0.47, "grad_norm": 5.028483657076705, "learning_rate": 5.68895145342319e-06, "loss": 0.6673, "step": 5904 }, { "epoch": 0.47, "grad_norm": 1.5262428659966436, "learning_rate": 5.6876682854299385e-06, "loss": 0.182, "step": 5905 }, { "epoch": 0.47, "grad_norm": 1.4484051930873665, "learning_rate": 5.686385071271431e-06, "loss": 0.1834, "step": 5906 }, { "epoch": 0.47, "grad_norm": 1.3945717792931787, "learning_rate": 5.685101811033812e-06, "loss": 0.1878, "step": 5907 }, { "epoch": 0.47, "grad_norm": 1.3857285946671793, "learning_rate": 5.683818504803234e-06, "loss": 0.1653, "step": 5908 }, { "epoch": 0.47, "grad_norm": 1.3715151116159963, "learning_rate": 5.682535152665848e-06, "loss": 0.1985, "step": 5909 }, { "epoch": 0.47, "grad_norm": 1.2376143862653577, "learning_rate": 5.68125175470781e-06, "loss": 0.163, "step": 5910 }, { "epoch": 0.47, "grad_norm": 1.3063222372355119, "learning_rate": 5.679968311015275e-06, "loss": 0.2132, "step": 5911 }, { "epoch": 0.47, "grad_norm": 1.4300750249723542, "learning_rate": 5.678684821674409e-06, "loss": 0.2122, "step": 5912 }, { "epoch": 0.47, "grad_norm": 5.765728847845143, "learning_rate": 5.677401286771373e-06, "loss": 0.2316, "step": 5913 }, { "epoch": 0.47, "grad_norm": 1.366396651773216, "learning_rate": 5.676117706392339e-06, "loss": 0.2013, "step": 5914 }, { "epoch": 0.47, "grad_norm": 1.3375263478720958, "learning_rate": 5.674834080623472e-06, "loss": 0.2112, "step": 5915 }, { "epoch": 0.47, "grad_norm": 1.538273385881684, "learning_rate": 5.673550409550951e-06, "loss": 0.2257, "step": 5916 }, { "epoch": 0.47, "grad_norm": 1.2713483112332928, "learning_rate": 5.67226669326095e-06, "loss": 0.1735, "step": 5917 }, { "epoch": 0.47, "grad_norm": 1.415427280468103, "learning_rate": 5.6709829318396456e-06, "loss": 0.2196, "step": 5918 }, { "epoch": 0.47, "grad_norm": 1.4678369190488758, "learning_rate": 5.669699125373227e-06, "loss": 0.2155, "step": 5919 }, { "epoch": 0.47, "grad_norm": 1.421014437684305, "learning_rate": 5.668415273947876e-06, "loss": 0.1763, "step": 5920 }, { "epoch": 0.47, "grad_norm": 1.3098766809686797, "learning_rate": 5.667131377649784e-06, "loss": 0.1829, "step": 5921 }, { "epoch": 0.47, "grad_norm": 1.297367671952655, "learning_rate": 5.665847436565138e-06, "loss": 0.1827, "step": 5922 }, { "epoch": 0.47, "grad_norm": 1.1710379004700813, "learning_rate": 5.664563450780139e-06, "loss": 0.172, "step": 5923 }, { "epoch": 0.47, "grad_norm": 1.3894756830200443, "learning_rate": 5.663279420380983e-06, "loss": 0.2088, "step": 5924 }, { "epoch": 0.47, "grad_norm": 1.3302788824046827, "learning_rate": 5.661995345453867e-06, "loss": 0.2155, "step": 5925 }, { "epoch": 0.47, "grad_norm": 1.3791295597542517, "learning_rate": 5.660711226084999e-06, "loss": 0.1899, "step": 5926 }, { "epoch": 0.47, "grad_norm": 1.4059490903014586, "learning_rate": 5.659427062360584e-06, "loss": 0.2186, "step": 5927 }, { "epoch": 0.47, "grad_norm": 1.4752211888247597, "learning_rate": 5.658142854366831e-06, "loss": 0.2037, "step": 5928 }, { "epoch": 0.47, "grad_norm": 1.3130994824645255, "learning_rate": 5.656858602189956e-06, "loss": 0.1606, "step": 5929 }, { "epoch": 0.47, "grad_norm": 1.1639218661617001, "learning_rate": 5.655574305916173e-06, "loss": 0.1405, "step": 5930 }, { "epoch": 0.47, "grad_norm": 1.3979191580013663, "learning_rate": 5.6542899656317e-06, "loss": 0.1972, "step": 5931 }, { "epoch": 0.47, "grad_norm": 1.3898160957879981, "learning_rate": 5.653005581422759e-06, "loss": 0.1771, "step": 5932 }, { "epoch": 0.47, "grad_norm": 1.1920173219901289, "learning_rate": 5.651721153375576e-06, "loss": 0.1647, "step": 5933 }, { "epoch": 0.47, "grad_norm": 1.3017918184142756, "learning_rate": 5.650436681576377e-06, "loss": 0.1946, "step": 5934 }, { "epoch": 0.47, "grad_norm": 1.4025265945770609, "learning_rate": 5.64915216611139e-06, "loss": 0.2162, "step": 5935 }, { "epoch": 0.47, "grad_norm": 1.2808322477519614, "learning_rate": 5.647867607066855e-06, "loss": 0.1863, "step": 5936 }, { "epoch": 0.47, "grad_norm": 1.5272249065075783, "learning_rate": 5.646583004529003e-06, "loss": 0.2421, "step": 5937 }, { "epoch": 0.48, "grad_norm": 1.2115062389307953, "learning_rate": 5.645298358584075e-06, "loss": 0.2119, "step": 5938 }, { "epoch": 0.48, "grad_norm": 1.6579623764689173, "learning_rate": 5.644013669318311e-06, "loss": 0.2306, "step": 5939 }, { "epoch": 0.48, "grad_norm": 1.3569769686010955, "learning_rate": 5.64272893681796e-06, "loss": 0.1833, "step": 5940 }, { "epoch": 0.48, "grad_norm": 1.4221628216846194, "learning_rate": 5.64144416116927e-06, "loss": 0.2212, "step": 5941 }, { "epoch": 0.48, "grad_norm": 1.2463620184319113, "learning_rate": 5.640159342458487e-06, "loss": 0.2008, "step": 5942 }, { "epoch": 0.48, "grad_norm": 1.575088956870121, "learning_rate": 5.638874480771869e-06, "loss": 0.2093, "step": 5943 }, { "epoch": 0.48, "grad_norm": 1.157262309479802, "learning_rate": 5.637589576195671e-06, "loss": 0.168, "step": 5944 }, { "epoch": 0.48, "grad_norm": 1.2743218871551016, "learning_rate": 5.636304628816153e-06, "loss": 0.2018, "step": 5945 }, { "epoch": 0.48, "grad_norm": 7.16500939068641, "learning_rate": 5.635019638719576e-06, "loss": 0.7054, "step": 5946 }, { "epoch": 0.48, "grad_norm": 1.1912768510895222, "learning_rate": 5.633734605992209e-06, "loss": 0.1937, "step": 5947 }, { "epoch": 0.48, "grad_norm": 1.4151370847374525, "learning_rate": 5.632449530720316e-06, "loss": 0.1904, "step": 5948 }, { "epoch": 0.48, "grad_norm": 1.279416978014094, "learning_rate": 5.63116441299017e-06, "loss": 0.1693, "step": 5949 }, { "epoch": 0.48, "grad_norm": 1.3088873934527676, "learning_rate": 5.629879252888046e-06, "loss": 0.2056, "step": 5950 }, { "epoch": 0.48, "grad_norm": 9.719504061327706, "learning_rate": 5.628594050500219e-06, "loss": 0.6353, "step": 5951 }, { "epoch": 0.48, "grad_norm": 1.3079975175667797, "learning_rate": 5.627308805912968e-06, "loss": 0.1778, "step": 5952 }, { "epoch": 0.48, "grad_norm": 1.4124124821782975, "learning_rate": 5.626023519212576e-06, "loss": 0.1655, "step": 5953 }, { "epoch": 0.48, "grad_norm": 1.2133313438798814, "learning_rate": 5.624738190485329e-06, "loss": 0.2016, "step": 5954 }, { "epoch": 0.48, "grad_norm": 1.1789270584811615, "learning_rate": 5.623452819817514e-06, "loss": 0.1295, "step": 5955 }, { "epoch": 0.48, "grad_norm": 1.3169481574395738, "learning_rate": 5.622167407295422e-06, "loss": 0.1836, "step": 5956 }, { "epoch": 0.48, "grad_norm": 1.238564384781399, "learning_rate": 5.620881953005347e-06, "loss": 0.1798, "step": 5957 }, { "epoch": 0.48, "grad_norm": 1.3320269991842364, "learning_rate": 5.619596457033586e-06, "loss": 0.2275, "step": 5958 }, { "epoch": 0.48, "grad_norm": 1.201729636212621, "learning_rate": 5.618310919466438e-06, "loss": 0.1929, "step": 5959 }, { "epoch": 0.48, "grad_norm": 1.4499074421537543, "learning_rate": 5.617025340390203e-06, "loss": 0.2656, "step": 5960 }, { "epoch": 0.48, "grad_norm": 1.469263467714341, "learning_rate": 5.615739719891189e-06, "loss": 0.2073, "step": 5961 }, { "epoch": 0.48, "grad_norm": 1.429833157384644, "learning_rate": 5.6144540580557e-06, "loss": 0.225, "step": 5962 }, { "epoch": 0.48, "grad_norm": 1.392256258350648, "learning_rate": 5.6131683549700485e-06, "loss": 0.2064, "step": 5963 }, { "epoch": 0.48, "grad_norm": 5.858762271755936, "learning_rate": 5.6118826107205496e-06, "loss": 0.6358, "step": 5964 }, { "epoch": 0.48, "grad_norm": 1.2196587850290672, "learning_rate": 5.610596825393516e-06, "loss": 0.1841, "step": 5965 }, { "epoch": 0.48, "grad_norm": 1.3803692772876153, "learning_rate": 5.609310999075268e-06, "loss": 0.1905, "step": 5966 }, { "epoch": 0.48, "grad_norm": 1.232699013594476, "learning_rate": 5.608025131852126e-06, "loss": 0.1736, "step": 5967 }, { "epoch": 0.48, "grad_norm": 1.485105631993959, "learning_rate": 5.6067392238104146e-06, "loss": 0.1898, "step": 5968 }, { "epoch": 0.48, "grad_norm": 1.862350150569696, "learning_rate": 5.60545327503646e-06, "loss": 0.2372, "step": 5969 }, { "epoch": 0.48, "grad_norm": 1.3580474404830163, "learning_rate": 5.604167285616593e-06, "loss": 0.1976, "step": 5970 }, { "epoch": 0.48, "grad_norm": 1.446761445578654, "learning_rate": 5.602881255637146e-06, "loss": 0.2183, "step": 5971 }, { "epoch": 0.48, "grad_norm": 1.1936653914287894, "learning_rate": 5.601595185184454e-06, "loss": 0.1638, "step": 5972 }, { "epoch": 0.48, "grad_norm": 1.4927953956870545, "learning_rate": 5.600309074344853e-06, "loss": 0.2181, "step": 5973 }, { "epoch": 0.48, "grad_norm": 1.360328532071457, "learning_rate": 5.599022923204686e-06, "loss": 0.1994, "step": 5974 }, { "epoch": 0.48, "grad_norm": 1.4884517014081409, "learning_rate": 5.597736731850295e-06, "loss": 0.2026, "step": 5975 }, { "epoch": 0.48, "grad_norm": 1.255441033656892, "learning_rate": 5.596450500368027e-06, "loss": 0.1966, "step": 5976 }, { "epoch": 0.48, "grad_norm": 1.3900141456820991, "learning_rate": 5.595164228844229e-06, "loss": 0.2173, "step": 5977 }, { "epoch": 0.48, "grad_norm": 1.2471511088921274, "learning_rate": 5.5938779173652535e-06, "loss": 0.1602, "step": 5978 }, { "epoch": 0.48, "grad_norm": 1.158041466196222, "learning_rate": 5.592591566017454e-06, "loss": 0.1781, "step": 5979 }, { "epoch": 0.48, "grad_norm": 1.2836872740864629, "learning_rate": 5.591305174887185e-06, "loss": 0.1909, "step": 5980 }, { "epoch": 0.48, "grad_norm": 3.946203578021819, "learning_rate": 5.59001874406081e-06, "loss": 0.4099, "step": 5981 }, { "epoch": 0.48, "grad_norm": 1.3868069436072263, "learning_rate": 5.588732273624689e-06, "loss": 0.1599, "step": 5982 }, { "epoch": 0.48, "grad_norm": 1.3702607578814197, "learning_rate": 5.587445763665187e-06, "loss": 0.2301, "step": 5983 }, { "epoch": 0.48, "grad_norm": 1.457836279442077, "learning_rate": 5.586159214268669e-06, "loss": 0.2196, "step": 5984 }, { "epoch": 0.48, "grad_norm": 1.3278533013278107, "learning_rate": 5.58487262552151e-06, "loss": 0.1875, "step": 5985 }, { "epoch": 0.48, "grad_norm": 1.516481069045735, "learning_rate": 5.583585997510078e-06, "loss": 0.2084, "step": 5986 }, { "epoch": 0.48, "grad_norm": 1.4088266528754982, "learning_rate": 5.582299330320749e-06, "loss": 0.1867, "step": 5987 }, { "epoch": 0.48, "grad_norm": 1.4743423439972172, "learning_rate": 5.581012624039903e-06, "loss": 0.249, "step": 5988 }, { "epoch": 0.48, "grad_norm": 1.3722060138197059, "learning_rate": 5.579725878753919e-06, "loss": 0.2041, "step": 5989 }, { "epoch": 0.48, "grad_norm": 1.5027671316126925, "learning_rate": 5.5784390945491784e-06, "loss": 0.2116, "step": 5990 }, { "epoch": 0.48, "grad_norm": 1.3441156452901943, "learning_rate": 5.577152271512071e-06, "loss": 0.2041, "step": 5991 }, { "epoch": 0.48, "grad_norm": 1.4045807658684673, "learning_rate": 5.5758654097289824e-06, "loss": 0.2105, "step": 5992 }, { "epoch": 0.48, "grad_norm": 1.543958337108886, "learning_rate": 5.574578509286304e-06, "loss": 0.1992, "step": 5993 }, { "epoch": 0.48, "grad_norm": 4.526558778526334, "learning_rate": 5.573291570270429e-06, "loss": 0.5413, "step": 5994 }, { "epoch": 0.48, "grad_norm": 1.5278364801345237, "learning_rate": 5.572004592767755e-06, "loss": 0.2295, "step": 5995 }, { "epoch": 0.48, "grad_norm": 1.3808539834434894, "learning_rate": 5.5707175768646784e-06, "loss": 0.174, "step": 5996 }, { "epoch": 0.48, "grad_norm": 1.445568872702685, "learning_rate": 5.569430522647601e-06, "loss": 0.2244, "step": 5997 }, { "epoch": 0.48, "grad_norm": 30.60855702119436, "learning_rate": 5.568143430202928e-06, "loss": 0.484, "step": 5998 }, { "epoch": 0.48, "grad_norm": 1.5442187412434536, "learning_rate": 5.566856299617066e-06, "loss": 0.2039, "step": 5999 }, { "epoch": 0.48, "grad_norm": 1.3797616433037831, "learning_rate": 5.5655691309764225e-06, "loss": 0.2199, "step": 6000 }, { "epoch": 0.48, "grad_norm": 1.4139187635704322, "learning_rate": 5.5642819243674085e-06, "loss": 0.1939, "step": 6001 }, { "epoch": 0.48, "grad_norm": 1.4039016937741262, "learning_rate": 5.562994679876441e-06, "loss": 0.2376, "step": 6002 }, { "epoch": 0.48, "grad_norm": 5.399714381052771, "learning_rate": 5.561707397589935e-06, "loss": 0.6284, "step": 6003 }, { "epoch": 0.48, "grad_norm": 1.4063465962241557, "learning_rate": 5.560420077594307e-06, "loss": 0.2326, "step": 6004 }, { "epoch": 0.48, "grad_norm": 1.3055414923882211, "learning_rate": 5.559132719975984e-06, "loss": 0.2075, "step": 6005 }, { "epoch": 0.48, "grad_norm": 1.2155928884225948, "learning_rate": 5.557845324821386e-06, "loss": 0.1754, "step": 6006 }, { "epoch": 0.48, "grad_norm": 1.2317597862453442, "learning_rate": 5.556557892216942e-06, "loss": 0.1806, "step": 6007 }, { "epoch": 0.48, "grad_norm": 1.349210599719671, "learning_rate": 5.555270422249078e-06, "loss": 0.2299, "step": 6008 }, { "epoch": 0.48, "grad_norm": 1.4786096939073454, "learning_rate": 5.55398291500423e-06, "loss": 0.2549, "step": 6009 }, { "epoch": 0.48, "grad_norm": 1.7833786326948462, "learning_rate": 5.55269537056883e-06, "loss": 0.2124, "step": 6010 }, { "epoch": 0.48, "grad_norm": 1.4568108102214778, "learning_rate": 5.551407789029316e-06, "loss": 0.1815, "step": 6011 }, { "epoch": 0.48, "grad_norm": 1.3239751020441697, "learning_rate": 5.5501201704721266e-06, "loss": 0.19, "step": 6012 }, { "epoch": 0.48, "grad_norm": 1.3995322905941363, "learning_rate": 5.548832514983703e-06, "loss": 0.1975, "step": 6013 }, { "epoch": 0.48, "grad_norm": 1.269746204973812, "learning_rate": 5.547544822650488e-06, "loss": 0.1727, "step": 6014 }, { "epoch": 0.48, "grad_norm": 1.4053687868927875, "learning_rate": 5.546257093558932e-06, "loss": 0.2032, "step": 6015 }, { "epoch": 0.48, "grad_norm": 1.289738644628163, "learning_rate": 5.544969327795482e-06, "loss": 0.1843, "step": 6016 }, { "epoch": 0.48, "grad_norm": 1.3345296602743995, "learning_rate": 5.543681525446589e-06, "loss": 0.1714, "step": 6017 }, { "epoch": 0.48, "grad_norm": 1.2410913009582902, "learning_rate": 5.542393686598708e-06, "loss": 0.1347, "step": 6018 }, { "epoch": 0.48, "grad_norm": 1.3634781234050608, "learning_rate": 5.541105811338297e-06, "loss": 0.2115, "step": 6019 }, { "epoch": 0.48, "grad_norm": 1.4052460359533734, "learning_rate": 5.539817899751813e-06, "loss": 0.1802, "step": 6020 }, { "epoch": 0.48, "grad_norm": 1.4927289049843615, "learning_rate": 5.538529951925715e-06, "loss": 0.2104, "step": 6021 }, { "epoch": 0.48, "grad_norm": 1.2700050933356997, "learning_rate": 5.5372419679464725e-06, "loss": 0.1818, "step": 6022 }, { "epoch": 0.48, "grad_norm": 1.4948610127561053, "learning_rate": 5.5359539479005484e-06, "loss": 0.2066, "step": 6023 }, { "epoch": 0.48, "grad_norm": 1.1395970175907957, "learning_rate": 5.534665891874412e-06, "loss": 0.1411, "step": 6024 }, { "epoch": 0.48, "grad_norm": 1.3544384489584773, "learning_rate": 5.533377799954532e-06, "loss": 0.2128, "step": 6025 }, { "epoch": 0.48, "grad_norm": 1.4352742278683133, "learning_rate": 5.5320896722273874e-06, "loss": 0.2623, "step": 6026 }, { "epoch": 0.48, "grad_norm": 1.3174046615725108, "learning_rate": 5.5308015087794495e-06, "loss": 0.2173, "step": 6027 }, { "epoch": 0.48, "grad_norm": 1.414602247754823, "learning_rate": 5.529513309697199e-06, "loss": 0.1897, "step": 6028 }, { "epoch": 0.48, "grad_norm": 7.115487324121038, "learning_rate": 5.528225075067115e-06, "loss": 0.5867, "step": 6029 }, { "epoch": 0.48, "grad_norm": 1.5257386718151094, "learning_rate": 5.526936804975681e-06, "loss": 0.2314, "step": 6030 }, { "epoch": 0.48, "grad_norm": 6.1303691661322866, "learning_rate": 5.525648499509383e-06, "loss": 0.6563, "step": 6031 }, { "epoch": 0.48, "grad_norm": 1.503807382485949, "learning_rate": 5.524360158754708e-06, "loss": 0.2346, "step": 6032 }, { "epoch": 0.48, "grad_norm": 1.3938833340305543, "learning_rate": 5.523071782798147e-06, "loss": 0.2088, "step": 6033 }, { "epoch": 0.48, "grad_norm": 1.3300527781747324, "learning_rate": 5.521783371726193e-06, "loss": 0.1978, "step": 6034 }, { "epoch": 0.48, "grad_norm": 1.4444783328031934, "learning_rate": 5.520494925625339e-06, "loss": 0.1984, "step": 6035 }, { "epoch": 0.48, "grad_norm": 1.280272864196463, "learning_rate": 5.519206444582085e-06, "loss": 0.1675, "step": 6036 }, { "epoch": 0.48, "grad_norm": 1.2930897811512372, "learning_rate": 5.51791792868293e-06, "loss": 0.2036, "step": 6037 }, { "epoch": 0.48, "grad_norm": 1.4211660092670169, "learning_rate": 5.516629378014373e-06, "loss": 0.2298, "step": 6038 }, { "epoch": 0.48, "grad_norm": 1.3377702612608382, "learning_rate": 5.515340792662922e-06, "loss": 0.2013, "step": 6039 }, { "epoch": 0.48, "grad_norm": 1.3960500478992524, "learning_rate": 5.5140521727150805e-06, "loss": 0.2534, "step": 6040 }, { "epoch": 0.48, "grad_norm": 1.4054634909028954, "learning_rate": 5.512763518257362e-06, "loss": 0.2258, "step": 6041 }, { "epoch": 0.48, "grad_norm": 1.5050815306855954, "learning_rate": 5.511474829376272e-06, "loss": 0.222, "step": 6042 }, { "epoch": 0.48, "grad_norm": 6.565493234571033, "learning_rate": 5.510186106158329e-06, "loss": 0.4654, "step": 6043 }, { "epoch": 0.48, "grad_norm": 1.4397003624647202, "learning_rate": 5.5088973486900456e-06, "loss": 0.2364, "step": 6044 }, { "epoch": 0.48, "grad_norm": 1.342030012261675, "learning_rate": 5.507608557057942e-06, "loss": 0.1906, "step": 6045 }, { "epoch": 0.48, "grad_norm": 1.390965026917466, "learning_rate": 5.506319731348539e-06, "loss": 0.1829, "step": 6046 }, { "epoch": 0.48, "grad_norm": 1.2511271304861076, "learning_rate": 5.505030871648356e-06, "loss": 0.1746, "step": 6047 }, { "epoch": 0.48, "grad_norm": 1.165958927056673, "learning_rate": 5.503741978043922e-06, "loss": 0.1879, "step": 6048 }, { "epoch": 0.48, "grad_norm": 1.4198084956443682, "learning_rate": 5.502453050621761e-06, "loss": 0.181, "step": 6049 }, { "epoch": 0.48, "grad_norm": 1.3648605611379505, "learning_rate": 5.501164089468406e-06, "loss": 0.2156, "step": 6050 }, { "epoch": 0.48, "grad_norm": 1.411973970066241, "learning_rate": 5.499875094670387e-06, "loss": 0.1811, "step": 6051 }, { "epoch": 0.48, "grad_norm": 1.3006616472067594, "learning_rate": 5.4985860663142385e-06, "loss": 0.1661, "step": 6052 }, { "epoch": 0.48, "grad_norm": 1.5537058731460205, "learning_rate": 5.497297004486496e-06, "loss": 0.2095, "step": 6053 }, { "epoch": 0.48, "grad_norm": 1.4273018204910448, "learning_rate": 5.496007909273699e-06, "loss": 0.1773, "step": 6054 }, { "epoch": 0.48, "grad_norm": 1.3600341815047596, "learning_rate": 5.494718780762388e-06, "loss": 0.1932, "step": 6055 }, { "epoch": 0.48, "grad_norm": 1.3610735010958077, "learning_rate": 5.493429619039108e-06, "loss": 0.169, "step": 6056 }, { "epoch": 0.48, "grad_norm": 1.2882254305545053, "learning_rate": 5.492140424190401e-06, "loss": 0.1918, "step": 6057 }, { "epoch": 0.48, "grad_norm": 1.1444241060391807, "learning_rate": 5.490851196302817e-06, "loss": 0.1703, "step": 6058 }, { "epoch": 0.48, "grad_norm": 1.213837916339109, "learning_rate": 5.489561935462904e-06, "loss": 0.1834, "step": 6059 }, { "epoch": 0.48, "grad_norm": 1.3141471728474285, "learning_rate": 5.488272641757215e-06, "loss": 0.1933, "step": 6060 }, { "epoch": 0.48, "grad_norm": 5.261598207816413, "learning_rate": 5.486983315272305e-06, "loss": 0.6498, "step": 6061 }, { "epoch": 0.48, "grad_norm": 1.2221484568860745, "learning_rate": 5.485693956094728e-06, "loss": 0.1865, "step": 6062 }, { "epoch": 0.49, "grad_norm": 1.539107532247561, "learning_rate": 5.484404564311047e-06, "loss": 0.2627, "step": 6063 }, { "epoch": 0.49, "grad_norm": 1.4492156246395256, "learning_rate": 5.483115140007818e-06, "loss": 0.2487, "step": 6064 }, { "epoch": 0.49, "grad_norm": 1.140482901593522, "learning_rate": 5.481825683271607e-06, "loss": 0.1545, "step": 6065 }, { "epoch": 0.49, "grad_norm": 1.4175042877607387, "learning_rate": 5.480536194188976e-06, "loss": 0.2465, "step": 6066 }, { "epoch": 0.49, "grad_norm": 1.2329482824055489, "learning_rate": 5.479246672846496e-06, "loss": 0.1732, "step": 6067 }, { "epoch": 0.49, "grad_norm": 1.3903366091057725, "learning_rate": 5.477957119330733e-06, "loss": 0.224, "step": 6068 }, { "epoch": 0.49, "grad_norm": 1.4428585946416677, "learning_rate": 5.476667533728262e-06, "loss": 0.2187, "step": 6069 }, { "epoch": 0.49, "grad_norm": 1.1899966022709014, "learning_rate": 5.475377916125655e-06, "loss": 0.1638, "step": 6070 }, { "epoch": 0.49, "grad_norm": 1.2652472675313262, "learning_rate": 5.474088266609487e-06, "loss": 0.1775, "step": 6071 }, { "epoch": 0.49, "grad_norm": 1.2609118241822819, "learning_rate": 5.47279858526634e-06, "loss": 0.1528, "step": 6072 }, { "epoch": 0.49, "grad_norm": 1.402843492300553, "learning_rate": 5.471508872182789e-06, "loss": 0.2316, "step": 6073 }, { "epoch": 0.49, "grad_norm": 11.304191971833744, "learning_rate": 5.47021912744542e-06, "loss": 0.5415, "step": 6074 }, { "epoch": 0.49, "grad_norm": 1.322918411822587, "learning_rate": 5.4689293511408155e-06, "loss": 0.1855, "step": 6075 }, { "epoch": 0.49, "grad_norm": 1.2671456809530721, "learning_rate": 5.4676395433555615e-06, "loss": 0.1922, "step": 6076 }, { "epoch": 0.49, "grad_norm": 1.5184119140894168, "learning_rate": 5.46634970417625e-06, "loss": 0.1976, "step": 6077 }, { "epoch": 0.49, "grad_norm": 17.840925550591802, "learning_rate": 5.46505983368947e-06, "loss": 0.6233, "step": 6078 }, { "epoch": 0.49, "grad_norm": 1.5639266702624965, "learning_rate": 5.463769931981815e-06, "loss": 0.1949, "step": 6079 }, { "epoch": 0.49, "grad_norm": 1.4228140682918635, "learning_rate": 5.462479999139877e-06, "loss": 0.2233, "step": 6080 }, { "epoch": 0.49, "grad_norm": 1.375151402801113, "learning_rate": 5.4611900352502574e-06, "loss": 0.1913, "step": 6081 }, { "epoch": 0.49, "grad_norm": 1.3851701151267617, "learning_rate": 5.459900040399552e-06, "loss": 0.1993, "step": 6082 }, { "epoch": 0.49, "grad_norm": 1.2932943105828254, "learning_rate": 5.458610014674363e-06, "loss": 0.1792, "step": 6083 }, { "epoch": 0.49, "grad_norm": 1.2596133349248404, "learning_rate": 5.457319958161295e-06, "loss": 0.1564, "step": 6084 }, { "epoch": 0.49, "grad_norm": 1.3313526912600322, "learning_rate": 5.456029870946954e-06, "loss": 0.1681, "step": 6085 }, { "epoch": 0.49, "grad_norm": 1.3035092700781619, "learning_rate": 5.454739753117944e-06, "loss": 0.176, "step": 6086 }, { "epoch": 0.49, "grad_norm": 1.2871509654360385, "learning_rate": 5.453449604760878e-06, "loss": 0.188, "step": 6087 }, { "epoch": 0.49, "grad_norm": 1.3753092076388826, "learning_rate": 5.4521594259623655e-06, "loss": 0.2036, "step": 6088 }, { "epoch": 0.49, "grad_norm": 1.363233269141916, "learning_rate": 5.450869216809021e-06, "loss": 0.2049, "step": 6089 }, { "epoch": 0.49, "grad_norm": 1.2825721200453803, "learning_rate": 5.44957897738746e-06, "loss": 0.1387, "step": 6090 }, { "epoch": 0.49, "grad_norm": 1.388168632814305, "learning_rate": 5.4482887077843e-06, "loss": 0.2289, "step": 6091 }, { "epoch": 0.49, "grad_norm": 1.3390687257731622, "learning_rate": 5.44699840808616e-06, "loss": 0.1927, "step": 6092 }, { "epoch": 0.49, "grad_norm": 1.4729524171846566, "learning_rate": 5.445708078379663e-06, "loss": 0.1899, "step": 6093 }, { "epoch": 0.49, "grad_norm": 1.347943526888609, "learning_rate": 5.444417718751431e-06, "loss": 0.207, "step": 6094 }, { "epoch": 0.49, "grad_norm": 1.335775746631166, "learning_rate": 5.443127329288093e-06, "loss": 0.1852, "step": 6095 }, { "epoch": 0.49, "grad_norm": 1.2708180955374653, "learning_rate": 5.441836910076275e-06, "loss": 0.197, "step": 6096 }, { "epoch": 0.49, "grad_norm": 1.2967646940082138, "learning_rate": 5.440546461202604e-06, "loss": 0.1898, "step": 6097 }, { "epoch": 0.49, "grad_norm": 1.3603966974977955, "learning_rate": 5.439255982753717e-06, "loss": 0.1728, "step": 6098 }, { "epoch": 0.49, "grad_norm": 1.3746809234063302, "learning_rate": 5.4379654748162435e-06, "loss": 0.1836, "step": 6099 }, { "epoch": 0.49, "grad_norm": 1.3264065959567641, "learning_rate": 5.43667493747682e-06, "loss": 0.1944, "step": 6100 }, { "epoch": 0.49, "grad_norm": 8.578750562526942, "learning_rate": 5.4353843708220865e-06, "loss": 0.5186, "step": 6101 }, { "epoch": 0.49, "grad_norm": 1.4646458769165451, "learning_rate": 5.43409377493868e-06, "loss": 0.2153, "step": 6102 }, { "epoch": 0.49, "grad_norm": 1.3860030342104832, "learning_rate": 5.432803149913244e-06, "loss": 0.2266, "step": 6103 }, { "epoch": 0.49, "grad_norm": 1.4788283261826667, "learning_rate": 5.4315124958324205e-06, "loss": 0.246, "step": 6104 }, { "epoch": 0.49, "grad_norm": 1.21950214440904, "learning_rate": 5.430221812782856e-06, "loss": 0.165, "step": 6105 }, { "epoch": 0.49, "grad_norm": 1.4704757530527062, "learning_rate": 5.428931100851198e-06, "loss": 0.2184, "step": 6106 }, { "epoch": 0.49, "grad_norm": 1.4050667092213878, "learning_rate": 5.427640360124095e-06, "loss": 0.2192, "step": 6107 }, { "epoch": 0.49, "grad_norm": 1.3206500519779876, "learning_rate": 5.426349590688199e-06, "loss": 0.1986, "step": 6108 }, { "epoch": 0.49, "grad_norm": 1.6990625831773556, "learning_rate": 5.425058792630163e-06, "loss": 0.1585, "step": 6109 }, { "epoch": 0.49, "grad_norm": 1.1943211238988904, "learning_rate": 5.423767966036644e-06, "loss": 0.198, "step": 6110 }, { "epoch": 0.49, "grad_norm": 1.4373194386978287, "learning_rate": 5.422477110994295e-06, "loss": 0.1889, "step": 6111 }, { "epoch": 0.49, "grad_norm": 1.3241384100961962, "learning_rate": 5.421186227589779e-06, "loss": 0.196, "step": 6112 }, { "epoch": 0.49, "grad_norm": 7.575186805440506, "learning_rate": 5.419895315909755e-06, "loss": 0.539, "step": 6113 }, { "epoch": 0.49, "grad_norm": 1.276069312805348, "learning_rate": 5.418604376040887e-06, "loss": 0.1621, "step": 6114 }, { "epoch": 0.49, "grad_norm": 8.485312551419629, "learning_rate": 5.417313408069839e-06, "loss": 0.7423, "step": 6115 }, { "epoch": 0.49, "grad_norm": 1.3610200067186389, "learning_rate": 5.416022412083276e-06, "loss": 0.1678, "step": 6116 }, { "epoch": 0.49, "grad_norm": 1.2769873974180446, "learning_rate": 5.414731388167868e-06, "loss": 0.1781, "step": 6117 }, { "epoch": 0.49, "grad_norm": 1.4663467415968185, "learning_rate": 5.413440336410286e-06, "loss": 0.191, "step": 6118 }, { "epoch": 0.49, "grad_norm": 1.2838528189271403, "learning_rate": 5.412149256897201e-06, "loss": 0.2256, "step": 6119 }, { "epoch": 0.49, "grad_norm": 1.3625456885057272, "learning_rate": 5.410858149715289e-06, "loss": 0.1975, "step": 6120 }, { "epoch": 0.49, "grad_norm": 1.2569962244624773, "learning_rate": 5.409567014951223e-06, "loss": 0.2135, "step": 6121 }, { "epoch": 0.49, "grad_norm": 1.4678340695307412, "learning_rate": 5.408275852691683e-06, "loss": 0.1664, "step": 6122 }, { "epoch": 0.49, "grad_norm": 1.3579318190859855, "learning_rate": 5.406984663023348e-06, "loss": 0.1666, "step": 6123 }, { "epoch": 0.49, "grad_norm": 1.3600183234899443, "learning_rate": 5.4056934460329e-06, "loss": 0.1842, "step": 6124 }, { "epoch": 0.49, "grad_norm": 7.208867291872801, "learning_rate": 5.404402201807022e-06, "loss": 0.7251, "step": 6125 }, { "epoch": 0.49, "grad_norm": 1.3840224244543287, "learning_rate": 5.403110930432398e-06, "loss": 0.19, "step": 6126 }, { "epoch": 0.49, "grad_norm": 1.6366906229699234, "learning_rate": 5.401819631995716e-06, "loss": 0.2274, "step": 6127 }, { "epoch": 0.49, "grad_norm": 1.369461161231918, "learning_rate": 5.400528306583664e-06, "loss": 0.1861, "step": 6128 }, { "epoch": 0.49, "grad_norm": 1.3699924571907363, "learning_rate": 5.3992369542829335e-06, "loss": 0.2263, "step": 6129 }, { "epoch": 0.49, "grad_norm": 1.28937723540913, "learning_rate": 5.3979455751802175e-06, "loss": 0.1754, "step": 6130 }, { "epoch": 0.49, "grad_norm": 1.3526611584679595, "learning_rate": 5.3966541693622095e-06, "loss": 0.1899, "step": 6131 }, { "epoch": 0.49, "grad_norm": 1.3562503652014586, "learning_rate": 5.395362736915604e-06, "loss": 0.1992, "step": 6132 }, { "epoch": 0.49, "grad_norm": 1.9131155075453725, "learning_rate": 5.394071277927102e-06, "loss": 0.201, "step": 6133 }, { "epoch": 0.49, "grad_norm": 1.2981926205240792, "learning_rate": 5.3927797924834e-06, "loss": 0.2092, "step": 6134 }, { "epoch": 0.49, "grad_norm": 1.301547721791799, "learning_rate": 5.391488280671199e-06, "loss": 0.2314, "step": 6135 }, { "epoch": 0.49, "grad_norm": 1.257158430939577, "learning_rate": 5.3901967425772054e-06, "loss": 0.1872, "step": 6136 }, { "epoch": 0.49, "grad_norm": 1.3260619773101494, "learning_rate": 5.388905178288122e-06, "loss": 0.2128, "step": 6137 }, { "epoch": 0.49, "grad_norm": 1.3568862506992065, "learning_rate": 5.387613587890655e-06, "loss": 0.2113, "step": 6138 }, { "epoch": 0.49, "grad_norm": 1.3278776492878688, "learning_rate": 5.386321971471513e-06, "loss": 0.2359, "step": 6139 }, { "epoch": 0.49, "grad_norm": 1.2872565173550587, "learning_rate": 5.3850303291174076e-06, "loss": 0.1757, "step": 6140 }, { "epoch": 0.49, "grad_norm": 7.843111840020467, "learning_rate": 5.383738660915049e-06, "loss": 0.5242, "step": 6141 }, { "epoch": 0.49, "grad_norm": 1.3557938662274296, "learning_rate": 5.382446966951153e-06, "loss": 0.1909, "step": 6142 }, { "epoch": 0.49, "grad_norm": 1.513328734571758, "learning_rate": 5.381155247312432e-06, "loss": 0.2315, "step": 6143 }, { "epoch": 0.49, "grad_norm": 1.4264195707805678, "learning_rate": 5.379863502085604e-06, "loss": 0.208, "step": 6144 }, { "epoch": 0.49, "grad_norm": 1.482948563359553, "learning_rate": 5.378571731357388e-06, "loss": 0.2074, "step": 6145 }, { "epoch": 0.49, "grad_norm": 1.3249209265795843, "learning_rate": 5.377279935214506e-06, "loss": 0.1813, "step": 6146 }, { "epoch": 0.49, "grad_norm": 1.1861088988847668, "learning_rate": 5.375988113743677e-06, "loss": 0.1519, "step": 6147 }, { "epoch": 0.49, "grad_norm": 4.92504338989327, "learning_rate": 5.374696267031628e-06, "loss": 0.4747, "step": 6148 }, { "epoch": 0.49, "grad_norm": 1.625763186027714, "learning_rate": 5.3734043951650825e-06, "loss": 0.2403, "step": 6149 }, { "epoch": 0.49, "grad_norm": 1.276998895442994, "learning_rate": 5.372112498230771e-06, "loss": 0.189, "step": 6150 }, { "epoch": 0.49, "grad_norm": 1.3248365580973975, "learning_rate": 5.3708205763154185e-06, "loss": 0.189, "step": 6151 }, { "epoch": 0.49, "grad_norm": 1.2857699143347978, "learning_rate": 5.369528629505756e-06, "loss": 0.1867, "step": 6152 }, { "epoch": 0.49, "grad_norm": 1.52676283686413, "learning_rate": 5.368236657888518e-06, "loss": 0.1901, "step": 6153 }, { "epoch": 0.49, "grad_norm": 1.1932605242514978, "learning_rate": 5.366944661550437e-06, "loss": 0.1661, "step": 6154 }, { "epoch": 0.49, "grad_norm": 1.381219198443967, "learning_rate": 5.365652640578249e-06, "loss": 0.2394, "step": 6155 }, { "epoch": 0.49, "grad_norm": 1.3485982997230657, "learning_rate": 5.364360595058692e-06, "loss": 0.2027, "step": 6156 }, { "epoch": 0.49, "grad_norm": 1.370328321348115, "learning_rate": 5.363068525078504e-06, "loss": 0.2354, "step": 6157 }, { "epoch": 0.49, "grad_norm": 1.3582041242680245, "learning_rate": 5.361776430724426e-06, "loss": 0.1951, "step": 6158 }, { "epoch": 0.49, "grad_norm": 8.751711131399174, "learning_rate": 5.3604843120832e-06, "loss": 0.6398, "step": 6159 }, { "epoch": 0.49, "grad_norm": 1.3910888823882277, "learning_rate": 5.3591921692415706e-06, "loss": 0.2199, "step": 6160 }, { "epoch": 0.49, "grad_norm": 1.3803874211146097, "learning_rate": 5.357900002286282e-06, "loss": 0.2063, "step": 6161 }, { "epoch": 0.49, "grad_norm": 1.281008238473857, "learning_rate": 5.35660781130408e-06, "loss": 0.181, "step": 6162 }, { "epoch": 0.49, "grad_norm": 1.3549325706681161, "learning_rate": 5.3553155963817175e-06, "loss": 0.1646, "step": 6163 }, { "epoch": 0.49, "grad_norm": 1.4893205686623445, "learning_rate": 5.354023357605942e-06, "loss": 0.1891, "step": 6164 }, { "epoch": 0.49, "grad_norm": 1.315072413842959, "learning_rate": 5.352731095063506e-06, "loss": 0.2207, "step": 6165 }, { "epoch": 0.49, "grad_norm": 1.1922754377449734, "learning_rate": 5.351438808841162e-06, "loss": 0.1673, "step": 6166 }, { "epoch": 0.49, "grad_norm": 7.35114937941398, "learning_rate": 5.350146499025666e-06, "loss": 0.5019, "step": 6167 }, { "epoch": 0.49, "grad_norm": 1.361136241527948, "learning_rate": 5.348854165703776e-06, "loss": 0.2094, "step": 6168 }, { "epoch": 0.49, "grad_norm": 1.174433487579136, "learning_rate": 5.347561808962247e-06, "loss": 0.1781, "step": 6169 }, { "epoch": 0.49, "grad_norm": 6.292939413961952, "learning_rate": 5.346269428887843e-06, "loss": 0.6418, "step": 6170 }, { "epoch": 0.49, "grad_norm": 1.2857080483477152, "learning_rate": 5.34497702556732e-06, "loss": 0.1833, "step": 6171 }, { "epoch": 0.49, "grad_norm": 1.5339134688011482, "learning_rate": 5.343684599087448e-06, "loss": 0.1782, "step": 6172 }, { "epoch": 0.49, "grad_norm": 1.2416299723464432, "learning_rate": 5.342392149534983e-06, "loss": 0.1996, "step": 6173 }, { "epoch": 0.49, "grad_norm": 1.1471377388798076, "learning_rate": 5.341099676996698e-06, "loss": 0.1638, "step": 6174 }, { "epoch": 0.49, "grad_norm": 1.165250223323812, "learning_rate": 5.339807181559359e-06, "loss": 0.1578, "step": 6175 }, { "epoch": 0.49, "grad_norm": 1.3456868721814605, "learning_rate": 5.338514663309733e-06, "loss": 0.1992, "step": 6176 }, { "epoch": 0.49, "grad_norm": 1.5029129229630251, "learning_rate": 5.337222122334593e-06, "loss": 0.1897, "step": 6177 }, { "epoch": 0.49, "grad_norm": 1.3350535291981411, "learning_rate": 5.33592955872071e-06, "loss": 0.2192, "step": 6178 }, { "epoch": 0.49, "grad_norm": 1.4363070923361838, "learning_rate": 5.334636972554858e-06, "loss": 0.2279, "step": 6179 }, { "epoch": 0.49, "grad_norm": 1.2691040738835486, "learning_rate": 5.33334436392381e-06, "loss": 0.1832, "step": 6180 }, { "epoch": 0.49, "grad_norm": 1.5669046984464379, "learning_rate": 5.3320517329143475e-06, "loss": 0.2177, "step": 6181 }, { "epoch": 0.49, "grad_norm": 1.2540121364576495, "learning_rate": 5.330759079613244e-06, "loss": 0.1639, "step": 6182 }, { "epoch": 0.49, "grad_norm": 1.3067742642281885, "learning_rate": 5.329466404107282e-06, "loss": 0.2078, "step": 6183 }, { "epoch": 0.49, "grad_norm": 1.367268499801735, "learning_rate": 5.328173706483243e-06, "loss": 0.2286, "step": 6184 }, { "epoch": 0.49, "grad_norm": 1.147007254075189, "learning_rate": 5.3268809868279095e-06, "loss": 0.1761, "step": 6185 }, { "epoch": 0.49, "grad_norm": 1.416699859637547, "learning_rate": 5.325588245228063e-06, "loss": 0.2066, "step": 6186 }, { "epoch": 0.49, "grad_norm": 1.335338703606333, "learning_rate": 5.324295481770492e-06, "loss": 0.2273, "step": 6187 }, { "epoch": 0.5, "grad_norm": 1.3001132257848957, "learning_rate": 5.323002696541983e-06, "loss": 0.174, "step": 6188 }, { "epoch": 0.5, "grad_norm": 1.3527813319619386, "learning_rate": 5.321709889629323e-06, "loss": 0.2322, "step": 6189 }, { "epoch": 0.5, "grad_norm": 6.458390890121769, "learning_rate": 5.320417061119303e-06, "loss": 0.5533, "step": 6190 }, { "epoch": 0.5, "grad_norm": 1.2107505972568007, "learning_rate": 5.319124211098716e-06, "loss": 0.1678, "step": 6191 }, { "epoch": 0.5, "grad_norm": 8.25474148136366, "learning_rate": 5.3178313396543546e-06, "loss": 0.7318, "step": 6192 }, { "epoch": 0.5, "grad_norm": 9.218351143906593, "learning_rate": 5.316538446873012e-06, "loss": 0.6997, "step": 6193 }, { "epoch": 0.5, "grad_norm": 1.356771075062425, "learning_rate": 5.315245532841483e-06, "loss": 0.1885, "step": 6194 }, { "epoch": 0.5, "grad_norm": 1.556763924259674, "learning_rate": 5.3139525976465675e-06, "loss": 0.2088, "step": 6195 }, { "epoch": 0.5, "grad_norm": 5.57119935181716, "learning_rate": 5.312659641375063e-06, "loss": 0.6587, "step": 6196 }, { "epoch": 0.5, "grad_norm": 1.4878667827648613, "learning_rate": 5.311366664113768e-06, "loss": 0.2061, "step": 6197 }, { "epoch": 0.5, "grad_norm": 1.4454518241148875, "learning_rate": 5.3100736659494865e-06, "loss": 0.188, "step": 6198 }, { "epoch": 0.5, "grad_norm": 1.4128697764717462, "learning_rate": 5.30878064696902e-06, "loss": 0.2365, "step": 6199 }, { "epoch": 0.5, "grad_norm": 1.464325736623637, "learning_rate": 5.307487607259175e-06, "loss": 0.1907, "step": 6200 }, { "epoch": 0.5, "grad_norm": 1.2769964390926019, "learning_rate": 5.306194546906752e-06, "loss": 0.1686, "step": 6201 }, { "epoch": 0.5, "grad_norm": 1.2938500521694163, "learning_rate": 5.304901465998563e-06, "loss": 0.181, "step": 6202 }, { "epoch": 0.5, "grad_norm": 1.2944682676645638, "learning_rate": 5.303608364621416e-06, "loss": 0.1791, "step": 6203 }, { "epoch": 0.5, "grad_norm": 1.3152122083320263, "learning_rate": 5.302315242862119e-06, "loss": 0.1948, "step": 6204 }, { "epoch": 0.5, "grad_norm": 1.326311443135585, "learning_rate": 5.301022100807482e-06, "loss": 0.1663, "step": 6205 }, { "epoch": 0.5, "grad_norm": 1.1644837629925227, "learning_rate": 5.29972893854432e-06, "loss": 0.1898, "step": 6206 }, { "epoch": 0.5, "grad_norm": 1.353841133564367, "learning_rate": 5.298435756159447e-06, "loss": 0.1395, "step": 6207 }, { "epoch": 0.5, "grad_norm": 1.3412561345227323, "learning_rate": 5.297142553739677e-06, "loss": 0.2038, "step": 6208 }, { "epoch": 0.5, "grad_norm": 1.4056916873216339, "learning_rate": 5.295849331371828e-06, "loss": 0.2388, "step": 6209 }, { "epoch": 0.5, "grad_norm": 1.2172610738102185, "learning_rate": 5.294556089142716e-06, "loss": 0.1714, "step": 6210 }, { "epoch": 0.5, "grad_norm": 6.30391090406548, "learning_rate": 5.293262827139161e-06, "loss": 0.5142, "step": 6211 }, { "epoch": 0.5, "grad_norm": 1.2855616429579746, "learning_rate": 5.291969545447985e-06, "loss": 0.1879, "step": 6212 }, { "epoch": 0.5, "grad_norm": 1.3520245848231778, "learning_rate": 5.290676244156009e-06, "loss": 0.174, "step": 6213 }, { "epoch": 0.5, "grad_norm": 1.34374424559405, "learning_rate": 5.289382923350054e-06, "loss": 0.1864, "step": 6214 }, { "epoch": 0.5, "grad_norm": 1.4250860385520456, "learning_rate": 5.2880895831169476e-06, "loss": 0.1698, "step": 6215 }, { "epoch": 0.5, "grad_norm": 6.8148650798024475, "learning_rate": 5.286796223543514e-06, "loss": 0.635, "step": 6216 }, { "epoch": 0.5, "grad_norm": 1.3711275170569162, "learning_rate": 5.2855028447165824e-06, "loss": 0.1885, "step": 6217 }, { "epoch": 0.5, "grad_norm": 5.48435436364416, "learning_rate": 5.284209446722979e-06, "loss": 0.4296, "step": 6218 }, { "epoch": 0.5, "grad_norm": 1.3686758686712193, "learning_rate": 5.282916029649534e-06, "loss": 0.231, "step": 6219 }, { "epoch": 0.5, "grad_norm": 1.5031396702649311, "learning_rate": 5.28162259358308e-06, "loss": 0.203, "step": 6220 }, { "epoch": 0.5, "grad_norm": 5.87619495820924, "learning_rate": 5.280329138610446e-06, "loss": 0.5631, "step": 6221 }, { "epoch": 0.5, "grad_norm": 1.3295513414410984, "learning_rate": 5.27903566481847e-06, "loss": 0.2018, "step": 6222 }, { "epoch": 0.5, "grad_norm": 1.2935896253719033, "learning_rate": 5.277742172293983e-06, "loss": 0.1523, "step": 6223 }, { "epoch": 0.5, "grad_norm": 1.255733188177954, "learning_rate": 5.276448661123822e-06, "loss": 0.1514, "step": 6224 }, { "epoch": 0.5, "grad_norm": 1.4982246059469662, "learning_rate": 5.275155131394825e-06, "loss": 0.1922, "step": 6225 }, { "epoch": 0.5, "grad_norm": 1.338161997524272, "learning_rate": 5.27386158319383e-06, "loss": 0.1732, "step": 6226 }, { "epoch": 0.5, "grad_norm": 13.502402498985422, "learning_rate": 5.272568016607678e-06, "loss": 0.4369, "step": 6227 }, { "epoch": 0.5, "grad_norm": 10.644401590868851, "learning_rate": 5.271274431723208e-06, "loss": 0.6347, "step": 6228 }, { "epoch": 0.5, "grad_norm": 1.4177339788861734, "learning_rate": 5.269980828627266e-06, "loss": 0.2073, "step": 6229 }, { "epoch": 0.5, "grad_norm": 1.1885238581679605, "learning_rate": 5.268687207406692e-06, "loss": 0.1907, "step": 6230 }, { "epoch": 0.5, "grad_norm": 1.3500017580095913, "learning_rate": 5.267393568148329e-06, "loss": 0.2119, "step": 6231 }, { "epoch": 0.5, "grad_norm": 1.4093337071058287, "learning_rate": 5.2660999109390285e-06, "loss": 0.1687, "step": 6232 }, { "epoch": 0.5, "grad_norm": 1.154269169118788, "learning_rate": 5.264806235865634e-06, "loss": 0.1676, "step": 6233 }, { "epoch": 0.5, "grad_norm": 1.5550816011762203, "learning_rate": 5.263512543014995e-06, "loss": 0.2188, "step": 6234 }, { "epoch": 0.5, "grad_norm": 1.3124130687453872, "learning_rate": 5.262218832473959e-06, "loss": 0.1678, "step": 6235 }, { "epoch": 0.5, "grad_norm": 1.6378629819936357, "learning_rate": 5.260925104329379e-06, "loss": 0.2204, "step": 6236 }, { "epoch": 0.5, "grad_norm": 1.2443336396054396, "learning_rate": 5.259631358668107e-06, "loss": 0.183, "step": 6237 }, { "epoch": 0.5, "grad_norm": 1.4490440991160176, "learning_rate": 5.258337595576994e-06, "loss": 0.1593, "step": 6238 }, { "epoch": 0.5, "grad_norm": 1.2619955416133604, "learning_rate": 5.257043815142897e-06, "loss": 0.1771, "step": 6239 }, { "epoch": 0.5, "grad_norm": 9.559735367267125, "learning_rate": 5.25575001745267e-06, "loss": 0.7524, "step": 6240 }, { "epoch": 0.5, "grad_norm": 1.5424281167939606, "learning_rate": 5.254456202593167e-06, "loss": 0.2393, "step": 6241 }, { "epoch": 0.5, "grad_norm": 1.4033943700063696, "learning_rate": 5.25316237065125e-06, "loss": 0.2091, "step": 6242 }, { "epoch": 0.5, "grad_norm": 1.3166563250082306, "learning_rate": 5.251868521713775e-06, "loss": 0.171, "step": 6243 }, { "epoch": 0.5, "grad_norm": 1.3677143418444035, "learning_rate": 5.250574655867604e-06, "loss": 0.2333, "step": 6244 }, { "epoch": 0.5, "grad_norm": 1.424957758269985, "learning_rate": 5.249280773199597e-06, "loss": 0.2008, "step": 6245 }, { "epoch": 0.5, "grad_norm": 6.548882291278945, "learning_rate": 5.247986873796616e-06, "loss": 0.6412, "step": 6246 }, { "epoch": 0.5, "grad_norm": 1.254563942971284, "learning_rate": 5.246692957745526e-06, "loss": 0.1997, "step": 6247 }, { "epoch": 0.5, "grad_norm": 1.31680943829475, "learning_rate": 5.245399025133189e-06, "loss": 0.1846, "step": 6248 }, { "epoch": 0.5, "grad_norm": 1.430612759942501, "learning_rate": 5.244105076046472e-06, "loss": 0.2216, "step": 6249 }, { "epoch": 0.5, "grad_norm": 1.5505943413042584, "learning_rate": 5.242811110572243e-06, "loss": 0.2069, "step": 6250 }, { "epoch": 0.5, "grad_norm": 1.4182960011606025, "learning_rate": 5.241517128797366e-06, "loss": 0.182, "step": 6251 }, { "epoch": 0.5, "grad_norm": 7.290315328036719, "learning_rate": 5.240223130808714e-06, "loss": 0.4789, "step": 6252 }, { "epoch": 0.5, "grad_norm": 1.3044802578652985, "learning_rate": 5.238929116693154e-06, "loss": 0.2195, "step": 6253 }, { "epoch": 0.5, "grad_norm": 1.2368560200831011, "learning_rate": 5.23763508653756e-06, "loss": 0.164, "step": 6254 }, { "epoch": 0.5, "grad_norm": 1.2172317642959076, "learning_rate": 5.236341040428803e-06, "loss": 0.2064, "step": 6255 }, { "epoch": 0.5, "grad_norm": 1.2327472522110754, "learning_rate": 5.235046978453755e-06, "loss": 0.1855, "step": 6256 }, { "epoch": 0.5, "grad_norm": 1.3262363626507576, "learning_rate": 5.233752900699291e-06, "loss": 0.1757, "step": 6257 }, { "epoch": 0.5, "grad_norm": 1.1879333437098638, "learning_rate": 5.2324588072522875e-06, "loss": 0.1665, "step": 6258 }, { "epoch": 0.5, "grad_norm": 1.3185553752647492, "learning_rate": 5.231164698199618e-06, "loss": 0.2106, "step": 6259 }, { "epoch": 0.5, "grad_norm": 1.3250489434983634, "learning_rate": 5.229870573628163e-06, "loss": 0.1951, "step": 6260 }, { "epoch": 0.5, "grad_norm": 1.3271923135117893, "learning_rate": 5.2285764336248e-06, "loss": 0.1911, "step": 6261 }, { "epoch": 0.5, "grad_norm": 1.3267738810883845, "learning_rate": 5.227282278276409e-06, "loss": 0.1724, "step": 6262 }, { "epoch": 0.5, "grad_norm": 1.388770080187803, "learning_rate": 5.225988107669868e-06, "loss": 0.2124, "step": 6263 }, { "epoch": 0.5, "grad_norm": 1.4358798842707865, "learning_rate": 5.224693921892064e-06, "loss": 0.2383, "step": 6264 }, { "epoch": 0.5, "grad_norm": 1.3787768392862565, "learning_rate": 5.223399721029875e-06, "loss": 0.2015, "step": 6265 }, { "epoch": 0.5, "grad_norm": 5.630419813457188, "learning_rate": 5.2221055051701845e-06, "loss": 0.4685, "step": 6266 }, { "epoch": 0.5, "grad_norm": 1.4566655880541437, "learning_rate": 5.220811274399878e-06, "loss": 0.1532, "step": 6267 }, { "epoch": 0.5, "grad_norm": 1.1617384552249408, "learning_rate": 5.2195170288058435e-06, "loss": 0.13, "step": 6268 }, { "epoch": 0.5, "grad_norm": 1.3629098046170762, "learning_rate": 5.218222768474964e-06, "loss": 0.2369, "step": 6269 }, { "epoch": 0.5, "grad_norm": 1.3486723099597446, "learning_rate": 5.21692849349413e-06, "loss": 0.2029, "step": 6270 }, { "epoch": 0.5, "grad_norm": 1.445537848619636, "learning_rate": 5.21563420395023e-06, "loss": 0.2137, "step": 6271 }, { "epoch": 0.5, "grad_norm": 1.5945006796204164, "learning_rate": 5.214339899930151e-06, "loss": 0.2251, "step": 6272 }, { "epoch": 0.5, "grad_norm": 9.712302116973072, "learning_rate": 5.2130455815207855e-06, "loss": 0.6219, "step": 6273 }, { "epoch": 0.5, "grad_norm": 1.3763313083280944, "learning_rate": 5.211751248809025e-06, "loss": 0.1615, "step": 6274 }, { "epoch": 0.5, "grad_norm": 1.446830603750063, "learning_rate": 5.210456901881761e-06, "loss": 0.2396, "step": 6275 }, { "epoch": 0.5, "grad_norm": 1.4296338724637219, "learning_rate": 5.209162540825887e-06, "loss": 0.2016, "step": 6276 }, { "epoch": 0.5, "grad_norm": 11.15501297483856, "learning_rate": 5.2078681657282994e-06, "loss": 0.5962, "step": 6277 }, { "epoch": 0.5, "grad_norm": 1.3644696065676662, "learning_rate": 5.206573776675891e-06, "loss": 0.1593, "step": 6278 }, { "epoch": 0.5, "grad_norm": 1.359840783281693, "learning_rate": 5.20527937375556e-06, "loss": 0.2155, "step": 6279 }, { "epoch": 0.5, "grad_norm": 1.1531130367251454, "learning_rate": 5.2039849570542e-06, "loss": 0.1559, "step": 6280 }, { "epoch": 0.5, "grad_norm": 1.2974861367052535, "learning_rate": 5.202690526658715e-06, "loss": 0.195, "step": 6281 }, { "epoch": 0.5, "grad_norm": 1.3096591769738086, "learning_rate": 5.201396082656001e-06, "loss": 0.2015, "step": 6282 }, { "epoch": 0.5, "grad_norm": 1.3015592076096667, "learning_rate": 5.200101625132955e-06, "loss": 0.1818, "step": 6283 }, { "epoch": 0.5, "grad_norm": 1.4243204668795564, "learning_rate": 5.1988071541764814e-06, "loss": 0.2127, "step": 6284 }, { "epoch": 0.5, "grad_norm": 1.212059500878854, "learning_rate": 5.197512669873482e-06, "loss": 0.1518, "step": 6285 }, { "epoch": 0.5, "grad_norm": 7.430545895006179, "learning_rate": 5.196218172310858e-06, "loss": 0.736, "step": 6286 }, { "epoch": 0.5, "grad_norm": 1.2254113833640334, "learning_rate": 5.194923661575514e-06, "loss": 0.1911, "step": 6287 }, { "epoch": 0.5, "grad_norm": 1.2697928522573918, "learning_rate": 5.193629137754355e-06, "loss": 0.19, "step": 6288 }, { "epoch": 0.5, "grad_norm": 1.3851992415996301, "learning_rate": 5.192334600934284e-06, "loss": 0.1757, "step": 6289 }, { "epoch": 0.5, "grad_norm": 1.3363734321951881, "learning_rate": 5.1910400512022084e-06, "loss": 0.1633, "step": 6290 }, { "epoch": 0.5, "grad_norm": 8.185884664214091, "learning_rate": 5.189745488645036e-06, "loss": 0.5321, "step": 6291 }, { "epoch": 0.5, "grad_norm": 1.2282431502504434, "learning_rate": 5.188450913349674e-06, "loss": 0.1837, "step": 6292 }, { "epoch": 0.5, "grad_norm": 1.4183703530101455, "learning_rate": 5.18715632540303e-06, "loss": 0.2132, "step": 6293 }, { "epoch": 0.5, "grad_norm": 1.3282381526127776, "learning_rate": 5.185861724892017e-06, "loss": 0.2209, "step": 6294 }, { "epoch": 0.5, "grad_norm": 1.3496458825344297, "learning_rate": 5.184567111903541e-06, "loss": 0.2595, "step": 6295 }, { "epoch": 0.5, "grad_norm": 1.4482981453686798, "learning_rate": 5.183272486524516e-06, "loss": 0.1942, "step": 6296 }, { "epoch": 0.5, "grad_norm": 1.3767640755892694, "learning_rate": 5.181977848841854e-06, "loss": 0.1838, "step": 6297 }, { "epoch": 0.5, "grad_norm": 6.661692935557831, "learning_rate": 5.180683198942468e-06, "loss": 0.478, "step": 6298 }, { "epoch": 0.5, "grad_norm": 1.2692566319290866, "learning_rate": 5.1793885369132724e-06, "loss": 0.2078, "step": 6299 }, { "epoch": 0.5, "grad_norm": 1.3128242909347663, "learning_rate": 5.1780938628411795e-06, "loss": 0.1804, "step": 6300 }, { "epoch": 0.5, "grad_norm": 1.380746640626029, "learning_rate": 5.1767991768131064e-06, "loss": 0.193, "step": 6301 }, { "epoch": 0.5, "grad_norm": 1.3376665180678404, "learning_rate": 5.175504478915968e-06, "loss": 0.2053, "step": 6302 }, { "epoch": 0.5, "grad_norm": 1.5282774562113082, "learning_rate": 5.1742097692366836e-06, "loss": 0.2521, "step": 6303 }, { "epoch": 0.5, "grad_norm": 1.4647091623871322, "learning_rate": 5.172915047862168e-06, "loss": 0.2197, "step": 6304 }, { "epoch": 0.5, "grad_norm": 1.3730826972308972, "learning_rate": 5.171620314879342e-06, "loss": 0.1796, "step": 6305 }, { "epoch": 0.5, "grad_norm": 1.4539461470657398, "learning_rate": 5.170325570375124e-06, "loss": 0.2101, "step": 6306 }, { "epoch": 0.5, "grad_norm": 7.4313619719408885, "learning_rate": 5.169030814436436e-06, "loss": 0.5814, "step": 6307 }, { "epoch": 0.5, "grad_norm": 1.2695290912945862, "learning_rate": 5.1677360471501955e-06, "loss": 0.1737, "step": 6308 }, { "epoch": 0.5, "grad_norm": 6.932141893561712, "learning_rate": 5.1664412686033276e-06, "loss": 0.6466, "step": 6309 }, { "epoch": 0.5, "grad_norm": 1.2910082903113071, "learning_rate": 5.165146478882751e-06, "loss": 0.1613, "step": 6310 }, { "epoch": 0.5, "grad_norm": 1.3272140675626496, "learning_rate": 5.163851678075391e-06, "loss": 0.1853, "step": 6311 }, { "epoch": 0.5, "grad_norm": 5.622696097831712, "learning_rate": 5.1625568662681735e-06, "loss": 0.7612, "step": 6312 }, { "epoch": 0.51, "grad_norm": 1.4497650320558, "learning_rate": 5.16126204354802e-06, "loss": 0.1977, "step": 6313 }, { "epoch": 0.51, "grad_norm": 1.3717893413295899, "learning_rate": 5.159967210001855e-06, "loss": 0.1919, "step": 6314 }, { "epoch": 0.51, "grad_norm": 1.2203759944908106, "learning_rate": 5.158672365716608e-06, "loss": 0.1722, "step": 6315 }, { "epoch": 0.51, "grad_norm": 1.4638437345441884, "learning_rate": 5.157377510779207e-06, "loss": 0.2059, "step": 6316 }, { "epoch": 0.51, "grad_norm": 1.3953522484032583, "learning_rate": 5.156082645276573e-06, "loss": 0.2096, "step": 6317 }, { "epoch": 0.51, "grad_norm": 1.35800286829192, "learning_rate": 5.154787769295639e-06, "loss": 0.1861, "step": 6318 }, { "epoch": 0.51, "grad_norm": 1.2655597434076797, "learning_rate": 5.153492882923335e-06, "loss": 0.1898, "step": 6319 }, { "epoch": 0.51, "grad_norm": 1.2904288729258366, "learning_rate": 5.152197986246586e-06, "loss": 0.1889, "step": 6320 }, { "epoch": 0.51, "grad_norm": 1.3636888587163787, "learning_rate": 5.150903079352326e-06, "loss": 0.2248, "step": 6321 }, { "epoch": 0.51, "grad_norm": 1.3673063628543014, "learning_rate": 5.149608162327486e-06, "loss": 0.2037, "step": 6322 }, { "epoch": 0.51, "grad_norm": 1.3141754862891193, "learning_rate": 5.148313235258996e-06, "loss": 0.1782, "step": 6323 }, { "epoch": 0.51, "grad_norm": 1.2190904449100082, "learning_rate": 5.1470182982337905e-06, "loss": 0.1759, "step": 6324 }, { "epoch": 0.51, "grad_norm": 1.3029187254268115, "learning_rate": 5.145723351338799e-06, "loss": 0.23, "step": 6325 }, { "epoch": 0.51, "grad_norm": 1.392547922698511, "learning_rate": 5.144428394660958e-06, "loss": 0.1616, "step": 6326 }, { "epoch": 0.51, "grad_norm": 1.26866731840062, "learning_rate": 5.143133428287202e-06, "loss": 0.1501, "step": 6327 }, { "epoch": 0.51, "grad_norm": 1.2483544462435243, "learning_rate": 5.141838452304465e-06, "loss": 0.1791, "step": 6328 }, { "epoch": 0.51, "grad_norm": 1.383080049451087, "learning_rate": 5.140543466799683e-06, "loss": 0.1979, "step": 6329 }, { "epoch": 0.51, "grad_norm": 1.3636725407881545, "learning_rate": 5.139248471859793e-06, "loss": 0.2232, "step": 6330 }, { "epoch": 0.51, "grad_norm": 9.388481945447857, "learning_rate": 5.137953467571729e-06, "loss": 0.6247, "step": 6331 }, { "epoch": 0.51, "grad_norm": 1.1772141983436382, "learning_rate": 5.136658454022432e-06, "loss": 0.148, "step": 6332 }, { "epoch": 0.51, "grad_norm": 1.3430838162519119, "learning_rate": 5.13536343129884e-06, "loss": 0.182, "step": 6333 }, { "epoch": 0.51, "grad_norm": 4.534180365224956, "learning_rate": 5.134068399487889e-06, "loss": 0.583, "step": 6334 }, { "epoch": 0.51, "grad_norm": 1.4833657678020846, "learning_rate": 5.1327733586765205e-06, "loss": 0.1895, "step": 6335 }, { "epoch": 0.51, "grad_norm": 1.3442009876980543, "learning_rate": 5.131478308951675e-06, "loss": 0.1879, "step": 6336 }, { "epoch": 0.51, "grad_norm": 1.225065962486156, "learning_rate": 5.130183250400292e-06, "loss": 0.1879, "step": 6337 }, { "epoch": 0.51, "grad_norm": 1.4489579094893268, "learning_rate": 5.1288881831093115e-06, "loss": 0.1943, "step": 6338 }, { "epoch": 0.51, "grad_norm": 1.2248707013790785, "learning_rate": 5.127593107165678e-06, "loss": 0.1734, "step": 6339 }, { "epoch": 0.51, "grad_norm": 1.4666455425776792, "learning_rate": 5.126298022656333e-06, "loss": 0.2064, "step": 6340 }, { "epoch": 0.51, "grad_norm": 1.407926718043965, "learning_rate": 5.125002929668219e-06, "loss": 0.1918, "step": 6341 }, { "epoch": 0.51, "grad_norm": 1.3436786397388014, "learning_rate": 5.123707828288279e-06, "loss": 0.2204, "step": 6342 }, { "epoch": 0.51, "grad_norm": 1.3532685411349283, "learning_rate": 5.122412718603458e-06, "loss": 0.2055, "step": 6343 }, { "epoch": 0.51, "grad_norm": 1.4988625299318015, "learning_rate": 5.1211176007007e-06, "loss": 0.1987, "step": 6344 }, { "epoch": 0.51, "grad_norm": 1.2780826241443235, "learning_rate": 5.11982247466695e-06, "loss": 0.1485, "step": 6345 }, { "epoch": 0.51, "grad_norm": 1.3495948559642899, "learning_rate": 5.118527340589155e-06, "loss": 0.185, "step": 6346 }, { "epoch": 0.51, "grad_norm": 6.4506138426105055, "learning_rate": 5.117232198554262e-06, "loss": 0.5483, "step": 6347 }, { "epoch": 0.51, "grad_norm": 6.009250781544445, "learning_rate": 5.1159370486492145e-06, "loss": 0.6166, "step": 6348 }, { "epoch": 0.51, "grad_norm": 1.2568757837198627, "learning_rate": 5.114641890960961e-06, "loss": 0.1607, "step": 6349 }, { "epoch": 0.51, "grad_norm": 1.432461779300337, "learning_rate": 5.11334672557645e-06, "loss": 0.2268, "step": 6350 }, { "epoch": 0.51, "grad_norm": 1.2308839028714296, "learning_rate": 5.1120515525826335e-06, "loss": 0.1856, "step": 6351 }, { "epoch": 0.51, "grad_norm": 1.3534130155833344, "learning_rate": 5.110756372066452e-06, "loss": 0.1636, "step": 6352 }, { "epoch": 0.51, "grad_norm": 1.4008718746468871, "learning_rate": 5.1094611841148625e-06, "loss": 0.2087, "step": 6353 }, { "epoch": 0.51, "grad_norm": 1.3810654127721416, "learning_rate": 5.108165988814811e-06, "loss": 0.1883, "step": 6354 }, { "epoch": 0.51, "grad_norm": 1.4254427437994086, "learning_rate": 5.106870786253248e-06, "loss": 0.2242, "step": 6355 }, { "epoch": 0.51, "grad_norm": 1.3495622555173048, "learning_rate": 5.1055755765171265e-06, "loss": 0.1735, "step": 6356 }, { "epoch": 0.51, "grad_norm": 1.2798924917983223, "learning_rate": 5.104280359693397e-06, "loss": 0.1706, "step": 6357 }, { "epoch": 0.51, "grad_norm": 1.491052737624853, "learning_rate": 5.102985135869011e-06, "loss": 0.245, "step": 6358 }, { "epoch": 0.51, "grad_norm": 1.1461273940637602, "learning_rate": 5.10168990513092e-06, "loss": 0.1486, "step": 6359 }, { "epoch": 0.51, "grad_norm": 7.859547458948115, "learning_rate": 5.100394667566079e-06, "loss": 0.5333, "step": 6360 }, { "epoch": 0.51, "grad_norm": 9.232011364879343, "learning_rate": 5.0990994232614386e-06, "loss": 0.5242, "step": 6361 }, { "epoch": 0.51, "grad_norm": 1.3055087151280933, "learning_rate": 5.0978041723039536e-06, "loss": 0.2017, "step": 6362 }, { "epoch": 0.51, "grad_norm": 1.4619621926700757, "learning_rate": 5.096508914780579e-06, "loss": 0.2103, "step": 6363 }, { "epoch": 0.51, "grad_norm": 1.30166151703843, "learning_rate": 5.0952136507782675e-06, "loss": 0.1621, "step": 6364 }, { "epoch": 0.51, "grad_norm": 8.434144424870652, "learning_rate": 5.093918380383977e-06, "loss": 0.6433, "step": 6365 }, { "epoch": 0.51, "grad_norm": 1.3664909365480735, "learning_rate": 5.092623103684657e-06, "loss": 0.1856, "step": 6366 }, { "epoch": 0.51, "grad_norm": 22.373376418414317, "learning_rate": 5.091327820767271e-06, "loss": 0.5479, "step": 6367 }, { "epoch": 0.51, "grad_norm": 1.3433504680403467, "learning_rate": 5.090032531718773e-06, "loss": 0.2207, "step": 6368 }, { "epoch": 0.51, "grad_norm": 1.2947627924842384, "learning_rate": 5.088737236626114e-06, "loss": 0.1707, "step": 6369 }, { "epoch": 0.51, "grad_norm": 1.308123110750913, "learning_rate": 5.0874419355762585e-06, "loss": 0.1982, "step": 6370 }, { "epoch": 0.51, "grad_norm": 1.1800048212229322, "learning_rate": 5.08614662865616e-06, "loss": 0.1784, "step": 6371 }, { "epoch": 0.51, "grad_norm": 1.1064509431006522, "learning_rate": 5.084851315952778e-06, "loss": 0.1515, "step": 6372 }, { "epoch": 0.51, "grad_norm": 1.1949298119046323, "learning_rate": 5.083555997553068e-06, "loss": 0.1432, "step": 6373 }, { "epoch": 0.51, "grad_norm": 1.4413531240848336, "learning_rate": 5.082260673543992e-06, "loss": 0.1986, "step": 6374 }, { "epoch": 0.51, "grad_norm": 1.2200435922631385, "learning_rate": 5.080965344012509e-06, "loss": 0.2025, "step": 6375 }, { "epoch": 0.51, "grad_norm": 1.222024129003145, "learning_rate": 5.079670009045574e-06, "loss": 0.1871, "step": 6376 }, { "epoch": 0.51, "grad_norm": 1.4566266278238087, "learning_rate": 5.078374668730154e-06, "loss": 0.191, "step": 6377 }, { "epoch": 0.51, "grad_norm": 1.2745843149390028, "learning_rate": 5.0770793231532025e-06, "loss": 0.184, "step": 6378 }, { "epoch": 0.51, "grad_norm": 1.3323997465696902, "learning_rate": 5.075783972401682e-06, "loss": 0.1848, "step": 6379 }, { "epoch": 0.51, "grad_norm": 1.5434798376949976, "learning_rate": 5.074488616562555e-06, "loss": 0.2132, "step": 6380 }, { "epoch": 0.51, "grad_norm": 1.1753449813561117, "learning_rate": 5.0731932557227814e-06, "loss": 0.1774, "step": 6381 }, { "epoch": 0.51, "grad_norm": 1.2471094037111388, "learning_rate": 5.0718978899693224e-06, "loss": 0.1726, "step": 6382 }, { "epoch": 0.51, "grad_norm": 1.4454714705841416, "learning_rate": 5.070602519389139e-06, "loss": 0.2298, "step": 6383 }, { "epoch": 0.51, "grad_norm": 1.2802330352406381, "learning_rate": 5.069307144069196e-06, "loss": 0.1659, "step": 6384 }, { "epoch": 0.51, "grad_norm": 1.1227732605890215, "learning_rate": 5.068011764096455e-06, "loss": 0.1498, "step": 6385 }, { "epoch": 0.51, "grad_norm": 1.3386868239234615, "learning_rate": 5.066716379557879e-06, "loss": 0.2309, "step": 6386 }, { "epoch": 0.51, "grad_norm": 1.3431133499369152, "learning_rate": 5.065420990540428e-06, "loss": 0.2133, "step": 6387 }, { "epoch": 0.51, "grad_norm": 1.3016129798515141, "learning_rate": 5.064125597131069e-06, "loss": 0.1949, "step": 6388 }, { "epoch": 0.51, "grad_norm": 7.960514266339366, "learning_rate": 5.062830199416764e-06, "loss": 0.7845, "step": 6389 }, { "epoch": 0.51, "grad_norm": 1.3866127992472959, "learning_rate": 5.061534797484476e-06, "loss": 0.2475, "step": 6390 }, { "epoch": 0.51, "grad_norm": 5.8681522122684235, "learning_rate": 5.0602393914211724e-06, "loss": 0.5361, "step": 6391 }, { "epoch": 0.51, "grad_norm": 1.2843313985431033, "learning_rate": 5.0589439813138165e-06, "loss": 0.1751, "step": 6392 }, { "epoch": 0.51, "grad_norm": 5.0784505366617285, "learning_rate": 5.057648567249372e-06, "loss": 0.4926, "step": 6393 }, { "epoch": 0.51, "grad_norm": 4.254534413096096, "learning_rate": 5.056353149314803e-06, "loss": 0.4868, "step": 6394 }, { "epoch": 0.51, "grad_norm": 1.5730514321996294, "learning_rate": 5.055057727597078e-06, "loss": 0.1815, "step": 6395 }, { "epoch": 0.51, "grad_norm": 1.366804493328478, "learning_rate": 5.0537623021831595e-06, "loss": 0.1857, "step": 6396 }, { "epoch": 0.51, "grad_norm": 1.1974273885225233, "learning_rate": 5.0524668731600154e-06, "loss": 0.1434, "step": 6397 }, { "epoch": 0.51, "grad_norm": 1.290462268700418, "learning_rate": 5.051171440614612e-06, "loss": 0.1682, "step": 6398 }, { "epoch": 0.51, "grad_norm": 1.3408080319511082, "learning_rate": 5.049876004633914e-06, "loss": 0.1726, "step": 6399 }, { "epoch": 0.51, "grad_norm": 1.3301174211156808, "learning_rate": 5.048580565304887e-06, "loss": 0.2001, "step": 6400 }, { "epoch": 0.51, "grad_norm": 1.1295720778513296, "learning_rate": 5.047285122714501e-06, "loss": 0.1356, "step": 6401 }, { "epoch": 0.51, "grad_norm": 5.307628524028128, "learning_rate": 5.045989676949722e-06, "loss": 0.6551, "step": 6402 }, { "epoch": 0.51, "grad_norm": 1.30717220964506, "learning_rate": 5.044694228097516e-06, "loss": 0.1868, "step": 6403 }, { "epoch": 0.51, "grad_norm": 1.308617542381283, "learning_rate": 5.043398776244852e-06, "loss": 0.1999, "step": 6404 }, { "epoch": 0.51, "grad_norm": 1.3594059347674798, "learning_rate": 5.0421033214786965e-06, "loss": 0.168, "step": 6405 }, { "epoch": 0.51, "grad_norm": 1.2978934715585597, "learning_rate": 5.040807863886017e-06, "loss": 0.1998, "step": 6406 }, { "epoch": 0.51, "grad_norm": 1.2274527728906517, "learning_rate": 5.039512403553782e-06, "loss": 0.1575, "step": 6407 }, { "epoch": 0.51, "grad_norm": 1.3690895326544734, "learning_rate": 5.038216940568958e-06, "loss": 0.1922, "step": 6408 }, { "epoch": 0.51, "grad_norm": 1.2294123798712022, "learning_rate": 5.036921475018517e-06, "loss": 0.166, "step": 6409 }, { "epoch": 0.51, "grad_norm": 1.3792524669883612, "learning_rate": 5.035626006989426e-06, "loss": 0.2063, "step": 6410 }, { "epoch": 0.51, "grad_norm": 1.382789409496291, "learning_rate": 5.0343305365686505e-06, "loss": 0.2011, "step": 6411 }, { "epoch": 0.51, "grad_norm": 1.3569064722631767, "learning_rate": 5.033035063843164e-06, "loss": 0.1964, "step": 6412 }, { "epoch": 0.51, "grad_norm": 1.2371414487308672, "learning_rate": 5.031739588899934e-06, "loss": 0.1674, "step": 6413 }, { "epoch": 0.51, "grad_norm": 1.245679788522383, "learning_rate": 5.030444111825928e-06, "loss": 0.2054, "step": 6414 }, { "epoch": 0.51, "grad_norm": 1.3043462745913819, "learning_rate": 5.029148632708117e-06, "loss": 0.1809, "step": 6415 }, { "epoch": 0.51, "grad_norm": 1.6185587609100252, "learning_rate": 5.02785315163347e-06, "loss": 0.2793, "step": 6416 }, { "epoch": 0.51, "grad_norm": 1.3674335989262343, "learning_rate": 5.026557668688956e-06, "loss": 0.1736, "step": 6417 }, { "epoch": 0.51, "grad_norm": 1.3205410181330746, "learning_rate": 5.025262183961546e-06, "loss": 0.2165, "step": 6418 }, { "epoch": 0.51, "grad_norm": 1.2722612485187108, "learning_rate": 5.023966697538209e-06, "loss": 0.1688, "step": 6419 }, { "epoch": 0.51, "grad_norm": 1.302922757379929, "learning_rate": 5.022671209505916e-06, "loss": 0.179, "step": 6420 }, { "epoch": 0.51, "grad_norm": 1.4126428658380186, "learning_rate": 5.021375719951634e-06, "loss": 0.2164, "step": 6421 }, { "epoch": 0.51, "grad_norm": 1.4134471536090285, "learning_rate": 5.020080228962337e-06, "loss": 0.1885, "step": 6422 }, { "epoch": 0.51, "grad_norm": 1.2672185260207796, "learning_rate": 5.018784736624993e-06, "loss": 0.1808, "step": 6423 }, { "epoch": 0.51, "grad_norm": 1.372429659648729, "learning_rate": 5.017489243026571e-06, "loss": 0.2286, "step": 6424 }, { "epoch": 0.51, "grad_norm": 1.2675801486466107, "learning_rate": 5.016193748254045e-06, "loss": 0.1684, "step": 6425 }, { "epoch": 0.51, "grad_norm": 5.531537854993101, "learning_rate": 5.014898252394382e-06, "loss": 0.5572, "step": 6426 }, { "epoch": 0.51, "grad_norm": 1.1972047922719196, "learning_rate": 5.013602755534555e-06, "loss": 0.1712, "step": 6427 }, { "epoch": 0.51, "grad_norm": 1.0860373471353395, "learning_rate": 5.012307257761534e-06, "loss": 0.1385, "step": 6428 }, { "epoch": 0.51, "grad_norm": 1.24897348644714, "learning_rate": 5.01101175916229e-06, "loss": 0.1729, "step": 6429 }, { "epoch": 0.51, "grad_norm": 1.5392819274344394, "learning_rate": 5.009716259823792e-06, "loss": 0.191, "step": 6430 }, { "epoch": 0.51, "grad_norm": 1.3156815177679642, "learning_rate": 5.008420759833013e-06, "loss": 0.1654, "step": 6431 }, { "epoch": 0.51, "grad_norm": 1.4624911622140855, "learning_rate": 5.007125259276922e-06, "loss": 0.2059, "step": 6432 }, { "epoch": 0.51, "grad_norm": 1.3450597396063462, "learning_rate": 5.005829758242492e-06, "loss": 0.1888, "step": 6433 }, { "epoch": 0.51, "grad_norm": 1.4196980970344129, "learning_rate": 5.004534256816692e-06, "loss": 0.2108, "step": 6434 }, { "epoch": 0.51, "grad_norm": 1.3615912207164587, "learning_rate": 5.003238755086492e-06, "loss": 0.1791, "step": 6435 }, { "epoch": 0.51, "grad_norm": 1.2695951334060156, "learning_rate": 5.001943253138866e-06, "loss": 0.1936, "step": 6436 }, { "epoch": 0.51, "grad_norm": 1.4216257341166583, "learning_rate": 5.000647751060784e-06, "loss": 0.2276, "step": 6437 }, { "epoch": 0.52, "grad_norm": 1.478867227948118, "learning_rate": 4.999352248939216e-06, "loss": 0.1894, "step": 6438 }, { "epoch": 0.52, "grad_norm": 1.40206226767812, "learning_rate": 4.998056746861134e-06, "loss": 0.1821, "step": 6439 }, { "epoch": 0.52, "grad_norm": 9.065017659028959, "learning_rate": 4.996761244913508e-06, "loss": 0.6221, "step": 6440 }, { "epoch": 0.52, "grad_norm": 1.267014851979614, "learning_rate": 4.99546574318331e-06, "loss": 0.171, "step": 6441 }, { "epoch": 0.52, "grad_norm": 1.3887548877704219, "learning_rate": 4.994170241757511e-06, "loss": 0.205, "step": 6442 }, { "epoch": 0.52, "grad_norm": 1.4505973721594676, "learning_rate": 4.992874740723079e-06, "loss": 0.2222, "step": 6443 }, { "epoch": 0.52, "grad_norm": 1.2742638484813196, "learning_rate": 4.991579240166989e-06, "loss": 0.1612, "step": 6444 }, { "epoch": 0.52, "grad_norm": 1.5338614536974189, "learning_rate": 4.9902837401762085e-06, "loss": 0.2202, "step": 6445 }, { "epoch": 0.52, "grad_norm": 1.307843015493231, "learning_rate": 4.988988240837713e-06, "loss": 0.2246, "step": 6446 }, { "epoch": 0.52, "grad_norm": 1.2438444874820898, "learning_rate": 4.987692742238467e-06, "loss": 0.1623, "step": 6447 }, { "epoch": 0.52, "grad_norm": 6.786782519616948, "learning_rate": 4.9863972444654454e-06, "loss": 0.6145, "step": 6448 }, { "epoch": 0.52, "grad_norm": 1.3497394214361547, "learning_rate": 4.985101747605618e-06, "loss": 0.1605, "step": 6449 }, { "epoch": 0.52, "grad_norm": 1.2277053799918165, "learning_rate": 4.983806251745958e-06, "loss": 0.1741, "step": 6450 }, { "epoch": 0.52, "grad_norm": 1.4964492040567676, "learning_rate": 4.98251075697343e-06, "loss": 0.2321, "step": 6451 }, { "epoch": 0.52, "grad_norm": 1.3098906850210155, "learning_rate": 4.9812152633750084e-06, "loss": 0.2036, "step": 6452 }, { "epoch": 0.52, "grad_norm": 1.3346761739871242, "learning_rate": 4.979919771037666e-06, "loss": 0.1904, "step": 6453 }, { "epoch": 0.52, "grad_norm": 1.354355557286669, "learning_rate": 4.978624280048367e-06, "loss": 0.205, "step": 6454 }, { "epoch": 0.52, "grad_norm": 1.1932310185295678, "learning_rate": 4.9773287904940856e-06, "loss": 0.1656, "step": 6455 }, { "epoch": 0.52, "grad_norm": 5.256375970899375, "learning_rate": 4.976033302461791e-06, "loss": 0.5707, "step": 6456 }, { "epoch": 0.52, "grad_norm": 4.954872842855911, "learning_rate": 4.974737816038454e-06, "loss": 0.6552, "step": 6457 }, { "epoch": 0.52, "grad_norm": 1.4617277256974235, "learning_rate": 4.973442331311043e-06, "loss": 0.1907, "step": 6458 }, { "epoch": 0.52, "grad_norm": 1.4977121736074688, "learning_rate": 4.9721468483665305e-06, "loss": 0.2165, "step": 6459 }, { "epoch": 0.52, "grad_norm": 1.232628270215334, "learning_rate": 4.9708513672918854e-06, "loss": 0.179, "step": 6460 }, { "epoch": 0.52, "grad_norm": 1.1183375655846886, "learning_rate": 4.969555888174073e-06, "loss": 0.186, "step": 6461 }, { "epoch": 0.52, "grad_norm": 1.387930690600105, "learning_rate": 4.968260411100068e-06, "loss": 0.176, "step": 6462 }, { "epoch": 0.52, "grad_norm": 1.339045318017827, "learning_rate": 4.9669649361568365e-06, "loss": 0.2372, "step": 6463 }, { "epoch": 0.52, "grad_norm": 1.2680504752869848, "learning_rate": 4.96566946343135e-06, "loss": 0.1885, "step": 6464 }, { "epoch": 0.52, "grad_norm": 1.2817023328445856, "learning_rate": 4.964373993010576e-06, "loss": 0.1767, "step": 6465 }, { "epoch": 0.52, "grad_norm": 1.4428992505542968, "learning_rate": 4.963078524981485e-06, "loss": 0.1899, "step": 6466 }, { "epoch": 0.52, "grad_norm": 1.236973634061518, "learning_rate": 4.961783059431042e-06, "loss": 0.1597, "step": 6467 }, { "epoch": 0.52, "grad_norm": 1.292994240421266, "learning_rate": 4.960487596446221e-06, "loss": 0.1876, "step": 6468 }, { "epoch": 0.52, "grad_norm": 1.2664576054590746, "learning_rate": 4.959192136113985e-06, "loss": 0.1722, "step": 6469 }, { "epoch": 0.52, "grad_norm": 6.781154130795651, "learning_rate": 4.957896678521305e-06, "loss": 0.736, "step": 6470 }, { "epoch": 0.52, "grad_norm": 1.2718884440393683, "learning_rate": 4.95660122375515e-06, "loss": 0.2094, "step": 6471 }, { "epoch": 0.52, "grad_norm": 1.2211291498781898, "learning_rate": 4.9553057719024845e-06, "loss": 0.1923, "step": 6472 }, { "epoch": 0.52, "grad_norm": 1.1897030103496622, "learning_rate": 4.954010323050279e-06, "loss": 0.1459, "step": 6473 }, { "epoch": 0.52, "grad_norm": 1.3364748722559496, "learning_rate": 4.952714877285499e-06, "loss": 0.1873, "step": 6474 }, { "epoch": 0.52, "grad_norm": 1.3843335320933292, "learning_rate": 4.951419434695115e-06, "loss": 0.2159, "step": 6475 }, { "epoch": 0.52, "grad_norm": 1.2825141205513542, "learning_rate": 4.950123995366088e-06, "loss": 0.1934, "step": 6476 }, { "epoch": 0.52, "grad_norm": 3.768346422506801, "learning_rate": 4.9488285593853906e-06, "loss": 0.3353, "step": 6477 }, { "epoch": 0.52, "grad_norm": 7.85330666587967, "learning_rate": 4.947533126839987e-06, "loss": 0.4567, "step": 6478 }, { "epoch": 0.52, "grad_norm": 1.473994232305882, "learning_rate": 4.946237697816842e-06, "loss": 0.2068, "step": 6479 }, { "epoch": 0.52, "grad_norm": 1.3247575744265563, "learning_rate": 4.944942272402925e-06, "loss": 0.1594, "step": 6480 }, { "epoch": 0.52, "grad_norm": 1.2617809596681895, "learning_rate": 4.943646850685198e-06, "loss": 0.1896, "step": 6481 }, { "epoch": 0.52, "grad_norm": 1.0979430708527276, "learning_rate": 4.942351432750631e-06, "loss": 0.1659, "step": 6482 }, { "epoch": 0.52, "grad_norm": 1.3092274348911537, "learning_rate": 4.941056018686184e-06, "loss": 0.1451, "step": 6483 }, { "epoch": 0.52, "grad_norm": 1.3828935306361319, "learning_rate": 4.939760608578828e-06, "loss": 0.2128, "step": 6484 }, { "epoch": 0.52, "grad_norm": 1.364895657879168, "learning_rate": 4.938465202515524e-06, "loss": 0.1507, "step": 6485 }, { "epoch": 0.52, "grad_norm": 1.1063316112840047, "learning_rate": 4.937169800583237e-06, "loss": 0.1439, "step": 6486 }, { "epoch": 0.52, "grad_norm": 4.557868565247235, "learning_rate": 4.9358744028689336e-06, "loss": 0.6502, "step": 6487 }, { "epoch": 0.52, "grad_norm": 1.2198421968036965, "learning_rate": 4.934579009459573e-06, "loss": 0.1753, "step": 6488 }, { "epoch": 0.52, "grad_norm": 1.4636363883426746, "learning_rate": 4.933283620442123e-06, "loss": 0.2249, "step": 6489 }, { "epoch": 0.52, "grad_norm": 1.3388299236756833, "learning_rate": 4.931988235903545e-06, "loss": 0.2105, "step": 6490 }, { "epoch": 0.52, "grad_norm": 1.3802305389526213, "learning_rate": 4.930692855930805e-06, "loss": 0.2114, "step": 6491 }, { "epoch": 0.52, "grad_norm": 1.320776490708098, "learning_rate": 4.929397480610861e-06, "loss": 0.1963, "step": 6492 }, { "epoch": 0.52, "grad_norm": 1.1974184321539358, "learning_rate": 4.928102110030678e-06, "loss": 0.1808, "step": 6493 }, { "epoch": 0.52, "grad_norm": 1.1702978295024231, "learning_rate": 4.9268067442772185e-06, "loss": 0.1377, "step": 6494 }, { "epoch": 0.52, "grad_norm": 4.675271783210693, "learning_rate": 4.925511383437446e-06, "loss": 0.7834, "step": 6495 }, { "epoch": 0.52, "grad_norm": 5.694876623033931, "learning_rate": 4.9242160275983195e-06, "loss": 0.729, "step": 6496 }, { "epoch": 0.52, "grad_norm": 1.4140300028028574, "learning_rate": 4.922920676846798e-06, "loss": 0.2222, "step": 6497 }, { "epoch": 0.52, "grad_norm": 1.1759189966700123, "learning_rate": 4.9216253312698496e-06, "loss": 0.2047, "step": 6498 }, { "epoch": 0.52, "grad_norm": 1.308159060446919, "learning_rate": 4.9203299909544264e-06, "loss": 0.2135, "step": 6499 }, { "epoch": 0.52, "grad_norm": 1.3963517342358278, "learning_rate": 4.919034655987493e-06, "loss": 0.1931, "step": 6500 }, { "epoch": 0.52, "grad_norm": 1.3231052685470979, "learning_rate": 4.917739326456008e-06, "loss": 0.1968, "step": 6501 }, { "epoch": 0.52, "grad_norm": 1.3380638222983487, "learning_rate": 4.916444002446934e-06, "loss": 0.2155, "step": 6502 }, { "epoch": 0.52, "grad_norm": 1.3618715141522857, "learning_rate": 4.915148684047225e-06, "loss": 0.2015, "step": 6503 }, { "epoch": 0.52, "grad_norm": 1.2871346959458516, "learning_rate": 4.913853371343841e-06, "loss": 0.1516, "step": 6504 }, { "epoch": 0.52, "grad_norm": 1.315687767609004, "learning_rate": 4.912558064423744e-06, "loss": 0.2039, "step": 6505 }, { "epoch": 0.52, "grad_norm": 1.5768749703894354, "learning_rate": 4.911262763373887e-06, "loss": 0.218, "step": 6506 }, { "epoch": 0.52, "grad_norm": 1.2817894268522665, "learning_rate": 4.909967468281231e-06, "loss": 0.1461, "step": 6507 }, { "epoch": 0.52, "grad_norm": 1.1916827267320635, "learning_rate": 4.908672179232729e-06, "loss": 0.1895, "step": 6508 }, { "epoch": 0.52, "grad_norm": 1.3134336602352426, "learning_rate": 4.9073768963153434e-06, "loss": 0.1973, "step": 6509 }, { "epoch": 0.52, "grad_norm": 1.2125077345570685, "learning_rate": 4.906081619616026e-06, "loss": 0.1838, "step": 6510 }, { "epoch": 0.52, "grad_norm": 1.5232579603192709, "learning_rate": 4.904786349221733e-06, "loss": 0.2077, "step": 6511 }, { "epoch": 0.52, "grad_norm": 6.200847275730482, "learning_rate": 4.903491085219424e-06, "loss": 0.7319, "step": 6512 }, { "epoch": 0.52, "grad_norm": 10.68495153331719, "learning_rate": 4.902195827696048e-06, "loss": 0.4831, "step": 6513 }, { "epoch": 0.52, "grad_norm": 1.704743142056712, "learning_rate": 4.900900576738563e-06, "loss": 0.2297, "step": 6514 }, { "epoch": 0.52, "grad_norm": 1.3430704691585156, "learning_rate": 4.899605332433922e-06, "loss": 0.1869, "step": 6515 }, { "epoch": 0.52, "grad_norm": 1.4240321833279908, "learning_rate": 4.898310094869082e-06, "loss": 0.2126, "step": 6516 }, { "epoch": 0.52, "grad_norm": 1.2345239307897238, "learning_rate": 4.89701486413099e-06, "loss": 0.1512, "step": 6517 }, { "epoch": 0.52, "grad_norm": 1.1919226427385405, "learning_rate": 4.8957196403066036e-06, "loss": 0.1506, "step": 6518 }, { "epoch": 0.52, "grad_norm": 6.619942926220242, "learning_rate": 4.8944244234828734e-06, "loss": 0.6776, "step": 6519 }, { "epoch": 0.52, "grad_norm": 5.9474411120294155, "learning_rate": 4.8931292137467525e-06, "loss": 0.62, "step": 6520 }, { "epoch": 0.52, "grad_norm": 1.2935708106312878, "learning_rate": 4.891834011185191e-06, "loss": 0.204, "step": 6521 }, { "epoch": 0.52, "grad_norm": 1.281559968102252, "learning_rate": 4.890538815885139e-06, "loss": 0.1876, "step": 6522 }, { "epoch": 0.52, "grad_norm": 1.32306481120636, "learning_rate": 4.88924362793355e-06, "loss": 0.1685, "step": 6523 }, { "epoch": 0.52, "grad_norm": 1.3601948506417618, "learning_rate": 4.887948447417369e-06, "loss": 0.1971, "step": 6524 }, { "epoch": 0.52, "grad_norm": 1.970631538545379, "learning_rate": 4.886653274423551e-06, "loss": 0.2331, "step": 6525 }, { "epoch": 0.52, "grad_norm": 1.286643234670694, "learning_rate": 4.88535810903904e-06, "loss": 0.1372, "step": 6526 }, { "epoch": 0.52, "grad_norm": 1.4579908451993773, "learning_rate": 4.884062951350787e-06, "loss": 0.2111, "step": 6527 }, { "epoch": 0.52, "grad_norm": 1.48490233924593, "learning_rate": 4.882767801445739e-06, "loss": 0.2039, "step": 6528 }, { "epoch": 0.52, "grad_norm": 1.3382582884559513, "learning_rate": 4.8814726594108455e-06, "loss": 0.2321, "step": 6529 }, { "epoch": 0.52, "grad_norm": 1.2096611266947055, "learning_rate": 4.880177525333051e-06, "loss": 0.148, "step": 6530 }, { "epoch": 0.52, "grad_norm": 1.330107188156349, "learning_rate": 4.878882399299301e-06, "loss": 0.2161, "step": 6531 }, { "epoch": 0.52, "grad_norm": 1.129105008008197, "learning_rate": 4.877587281396544e-06, "loss": 0.1628, "step": 6532 }, { "epoch": 0.52, "grad_norm": 1.240770920588671, "learning_rate": 4.876292171711723e-06, "loss": 0.2072, "step": 6533 }, { "epoch": 0.52, "grad_norm": 1.2656581851091837, "learning_rate": 4.8749970703317825e-06, "loss": 0.1601, "step": 6534 }, { "epoch": 0.52, "grad_norm": 1.5426382569629271, "learning_rate": 4.873701977343667e-06, "loss": 0.2213, "step": 6535 }, { "epoch": 0.52, "grad_norm": 7.319723210346589, "learning_rate": 4.872406892834323e-06, "loss": 0.6139, "step": 6536 }, { "epoch": 0.52, "grad_norm": 1.2412588837549297, "learning_rate": 4.8711118168906885e-06, "loss": 0.1932, "step": 6537 }, { "epoch": 0.52, "grad_norm": 1.1903991722047078, "learning_rate": 4.8698167495997095e-06, "loss": 0.1565, "step": 6538 }, { "epoch": 0.52, "grad_norm": 1.344746043596786, "learning_rate": 4.8685216910483275e-06, "loss": 0.213, "step": 6539 }, { "epoch": 0.52, "grad_norm": 1.2403383165534796, "learning_rate": 4.867226641323481e-06, "loss": 0.1966, "step": 6540 }, { "epoch": 0.52, "grad_norm": 4.3370077629390495, "learning_rate": 4.8659316005121125e-06, "loss": 0.5838, "step": 6541 }, { "epoch": 0.52, "grad_norm": 1.5358547601022514, "learning_rate": 4.864636568701162e-06, "loss": 0.2513, "step": 6542 }, { "epoch": 0.52, "grad_norm": 1.4304870506413077, "learning_rate": 4.86334154597757e-06, "loss": 0.1888, "step": 6543 }, { "epoch": 0.52, "grad_norm": 1.40764727334343, "learning_rate": 4.862046532428272e-06, "loss": 0.216, "step": 6544 }, { "epoch": 0.52, "grad_norm": 1.352628833835352, "learning_rate": 4.860751528140209e-06, "loss": 0.2036, "step": 6545 }, { "epoch": 0.52, "grad_norm": 1.1290700925638177, "learning_rate": 4.859456533200318e-06, "loss": 0.1305, "step": 6546 }, { "epoch": 0.52, "grad_norm": 1.164014569001093, "learning_rate": 4.858161547695537e-06, "loss": 0.1523, "step": 6547 }, { "epoch": 0.52, "grad_norm": 1.299323689716936, "learning_rate": 4.8568665717128e-06, "loss": 0.1931, "step": 6548 }, { "epoch": 0.52, "grad_norm": 1.3619883406877493, "learning_rate": 4.855571605339043e-06, "loss": 0.1787, "step": 6549 }, { "epoch": 0.52, "grad_norm": 1.4368002238089048, "learning_rate": 4.8542766486612035e-06, "loss": 0.2307, "step": 6550 }, { "epoch": 0.52, "grad_norm": 1.4073693146032153, "learning_rate": 4.852981701766212e-06, "loss": 0.1963, "step": 6551 }, { "epoch": 0.52, "grad_norm": 1.413877235349347, "learning_rate": 4.851686764741005e-06, "loss": 0.2066, "step": 6552 }, { "epoch": 0.52, "grad_norm": 1.243283087319646, "learning_rate": 4.850391837672515e-06, "loss": 0.2017, "step": 6553 }, { "epoch": 0.52, "grad_norm": 1.4244229440760505, "learning_rate": 4.849096920647675e-06, "loss": 0.2555, "step": 6554 }, { "epoch": 0.52, "grad_norm": 1.2860802610125572, "learning_rate": 4.847802013753415e-06, "loss": 0.1962, "step": 6555 }, { "epoch": 0.52, "grad_norm": 1.36055728292811, "learning_rate": 4.846507117076667e-06, "loss": 0.1921, "step": 6556 }, { "epoch": 0.52, "grad_norm": 1.326818838117966, "learning_rate": 4.845212230704363e-06, "loss": 0.2104, "step": 6557 }, { "epoch": 0.52, "grad_norm": 1.4904421650460549, "learning_rate": 4.843917354723428e-06, "loss": 0.1812, "step": 6558 }, { "epoch": 0.52, "grad_norm": 1.3284220479526134, "learning_rate": 4.842622489220796e-06, "loss": 0.2138, "step": 6559 }, { "epoch": 0.52, "grad_norm": 1.4775484909757695, "learning_rate": 4.841327634283392e-06, "loss": 0.212, "step": 6560 }, { "epoch": 0.52, "grad_norm": 1.3869806558083997, "learning_rate": 4.840032789998147e-06, "loss": 0.2169, "step": 6561 }, { "epoch": 0.52, "grad_norm": 1.4400567605583703, "learning_rate": 4.838737956451982e-06, "loss": 0.1811, "step": 6562 }, { "epoch": 0.53, "grad_norm": 1.398036744317056, "learning_rate": 4.837443133731828e-06, "loss": 0.1748, "step": 6563 }, { "epoch": 0.53, "grad_norm": 1.3478880238171476, "learning_rate": 4.836148321924611e-06, "loss": 0.1825, "step": 6564 }, { "epoch": 0.53, "grad_norm": 1.4267354229943439, "learning_rate": 4.834853521117251e-06, "loss": 0.1925, "step": 6565 }, { "epoch": 0.53, "grad_norm": 1.3057799642802572, "learning_rate": 4.833558731396675e-06, "loss": 0.1918, "step": 6566 }, { "epoch": 0.53, "grad_norm": 1.3220262574264439, "learning_rate": 4.832263952849805e-06, "loss": 0.2005, "step": 6567 }, { "epoch": 0.53, "grad_norm": 1.2677563871001374, "learning_rate": 4.830969185563566e-06, "loss": 0.1809, "step": 6568 }, { "epoch": 0.53, "grad_norm": 1.5501322312444912, "learning_rate": 4.829674429624876e-06, "loss": 0.2258, "step": 6569 }, { "epoch": 0.53, "grad_norm": 1.32090832886925, "learning_rate": 4.828379685120659e-06, "loss": 0.1399, "step": 6570 }, { "epoch": 0.53, "grad_norm": 16.768903833901742, "learning_rate": 4.827084952137833e-06, "loss": 0.4583, "step": 6571 }, { "epoch": 0.53, "grad_norm": 12.574780789212554, "learning_rate": 4.825790230763318e-06, "loss": 0.558, "step": 6572 }, { "epoch": 0.53, "grad_norm": 1.361489051072294, "learning_rate": 4.824495521084034e-06, "loss": 0.1653, "step": 6573 }, { "epoch": 0.53, "grad_norm": 1.4523552664448192, "learning_rate": 4.823200823186895e-06, "loss": 0.2397, "step": 6574 }, { "epoch": 0.53, "grad_norm": 1.3485866956157013, "learning_rate": 4.821906137158822e-06, "loss": 0.1804, "step": 6575 }, { "epoch": 0.53, "grad_norm": 1.4429614801960882, "learning_rate": 4.820611463086728e-06, "loss": 0.148, "step": 6576 }, { "epoch": 0.53, "grad_norm": 1.332727271255758, "learning_rate": 4.819316801057533e-06, "loss": 0.2024, "step": 6577 }, { "epoch": 0.53, "grad_norm": 7.350745128049199, "learning_rate": 4.818022151158146e-06, "loss": 0.5582, "step": 6578 }, { "epoch": 0.53, "grad_norm": 1.1734769646762175, "learning_rate": 4.816727513475485e-06, "loss": 0.1613, "step": 6579 }, { "epoch": 0.53, "grad_norm": 1.4472294667921561, "learning_rate": 4.815432888096459e-06, "loss": 0.2135, "step": 6580 }, { "epoch": 0.53, "grad_norm": 1.3287841250929464, "learning_rate": 4.814138275107986e-06, "loss": 0.1529, "step": 6581 }, { "epoch": 0.53, "grad_norm": 1.2545308503158996, "learning_rate": 4.812843674596972e-06, "loss": 0.2039, "step": 6582 }, { "epoch": 0.53, "grad_norm": 12.660946926334574, "learning_rate": 4.811549086650327e-06, "loss": 0.7344, "step": 6583 }, { "epoch": 0.53, "grad_norm": 1.298850196854735, "learning_rate": 4.810254511354966e-06, "loss": 0.2003, "step": 6584 }, { "epoch": 0.53, "grad_norm": 1.1489309815756354, "learning_rate": 4.808959948797793e-06, "loss": 0.1579, "step": 6585 }, { "epoch": 0.53, "grad_norm": 1.2166805966225172, "learning_rate": 4.807665399065718e-06, "loss": 0.206, "step": 6586 }, { "epoch": 0.53, "grad_norm": 1.195503116143096, "learning_rate": 4.806370862245646e-06, "loss": 0.1833, "step": 6587 }, { "epoch": 0.53, "grad_norm": 1.4104544362476874, "learning_rate": 4.8050763384244875e-06, "loss": 0.1837, "step": 6588 }, { "epoch": 0.53, "grad_norm": 1.2951669589209311, "learning_rate": 4.8037818276891425e-06, "loss": 0.2196, "step": 6589 }, { "epoch": 0.53, "grad_norm": 1.448977612935867, "learning_rate": 4.802487330126519e-06, "loss": 0.2073, "step": 6590 }, { "epoch": 0.53, "grad_norm": 1.291150336912249, "learning_rate": 4.801192845823521e-06, "loss": 0.188, "step": 6591 }, { "epoch": 0.53, "grad_norm": 1.5970412398479783, "learning_rate": 4.799898374867047e-06, "loss": 0.2277, "step": 6592 }, { "epoch": 0.53, "grad_norm": 1.5365158972020774, "learning_rate": 4.798603917344003e-06, "loss": 0.2269, "step": 6593 }, { "epoch": 0.53, "grad_norm": 1.4209885368414394, "learning_rate": 4.797309473341286e-06, "loss": 0.2294, "step": 6594 }, { "epoch": 0.53, "grad_norm": 1.4341493811388837, "learning_rate": 4.796015042945801e-06, "loss": 0.1677, "step": 6595 }, { "epoch": 0.53, "grad_norm": 1.1379282760771579, "learning_rate": 4.794720626244442e-06, "loss": 0.1542, "step": 6596 }, { "epoch": 0.53, "grad_norm": 1.448270841428079, "learning_rate": 4.79342622332411e-06, "loss": 0.1933, "step": 6597 }, { "epoch": 0.53, "grad_norm": 1.6272632472609554, "learning_rate": 4.792131834271701e-06, "loss": 0.2313, "step": 6598 }, { "epoch": 0.53, "grad_norm": 1.2302965393176406, "learning_rate": 4.790837459174114e-06, "loss": 0.1841, "step": 6599 }, { "epoch": 0.53, "grad_norm": 1.1708301466977171, "learning_rate": 4.7895430981182415e-06, "loss": 0.1622, "step": 6600 }, { "epoch": 0.53, "grad_norm": 1.3263735959669816, "learning_rate": 4.788248751190977e-06, "loss": 0.1992, "step": 6601 }, { "epoch": 0.53, "grad_norm": 1.493462081051432, "learning_rate": 4.786954418479217e-06, "loss": 0.1796, "step": 6602 }, { "epoch": 0.53, "grad_norm": 19.994128076040145, "learning_rate": 4.785660100069851e-06, "loss": 0.5566, "step": 6603 }, { "epoch": 0.53, "grad_norm": 1.42106058863406, "learning_rate": 4.784365796049772e-06, "loss": 0.2132, "step": 6604 }, { "epoch": 0.53, "grad_norm": 1.1495963360561818, "learning_rate": 4.78307150650587e-06, "loss": 0.1552, "step": 6605 }, { "epoch": 0.53, "grad_norm": 1.4525780583231667, "learning_rate": 4.781777231525037e-06, "loss": 0.2092, "step": 6606 }, { "epoch": 0.53, "grad_norm": 1.4602496248873231, "learning_rate": 4.780482971194157e-06, "loss": 0.2046, "step": 6607 }, { "epoch": 0.53, "grad_norm": 1.2674157587172645, "learning_rate": 4.779188725600123e-06, "loss": 0.1717, "step": 6608 }, { "epoch": 0.53, "grad_norm": 1.280533476688851, "learning_rate": 4.777894494829819e-06, "loss": 0.2241, "step": 6609 }, { "epoch": 0.53, "grad_norm": 1.306963050104407, "learning_rate": 4.776600278970127e-06, "loss": 0.1839, "step": 6610 }, { "epoch": 0.53, "grad_norm": 1.1984225559989583, "learning_rate": 4.775306078107938e-06, "loss": 0.1741, "step": 6611 }, { "epoch": 0.53, "grad_norm": 1.5708246597589846, "learning_rate": 4.774011892330132e-06, "loss": 0.1984, "step": 6612 }, { "epoch": 0.53, "grad_norm": 6.671244243284006, "learning_rate": 4.772717721723593e-06, "loss": 0.4876, "step": 6613 }, { "epoch": 0.53, "grad_norm": 1.3646108395698546, "learning_rate": 4.7714235663752e-06, "loss": 0.2456, "step": 6614 }, { "epoch": 0.53, "grad_norm": 1.3201079504129871, "learning_rate": 4.770129426371838e-06, "loss": 0.1929, "step": 6615 }, { "epoch": 0.53, "grad_norm": 1.4239607468807445, "learning_rate": 4.768835301800383e-06, "loss": 0.1901, "step": 6616 }, { "epoch": 0.53, "grad_norm": 1.3873728676656973, "learning_rate": 4.767541192747714e-06, "loss": 0.1796, "step": 6617 }, { "epoch": 0.53, "grad_norm": 1.3079091238882445, "learning_rate": 4.766247099300711e-06, "loss": 0.1902, "step": 6618 }, { "epoch": 0.53, "grad_norm": 1.4915154985268286, "learning_rate": 4.764953021546247e-06, "loss": 0.244, "step": 6619 }, { "epoch": 0.53, "grad_norm": 1.2341851321558228, "learning_rate": 4.763658959571199e-06, "loss": 0.146, "step": 6620 }, { "epoch": 0.53, "grad_norm": 1.2547974964996214, "learning_rate": 4.76236491346244e-06, "loss": 0.172, "step": 6621 }, { "epoch": 0.53, "grad_norm": 1.4681611132235795, "learning_rate": 4.7610708833068466e-06, "loss": 0.1947, "step": 6622 }, { "epoch": 0.53, "grad_norm": 1.2618533945433152, "learning_rate": 4.759776869191287e-06, "loss": 0.1914, "step": 6623 }, { "epoch": 0.53, "grad_norm": 1.3502634055977463, "learning_rate": 4.7584828712026345e-06, "loss": 0.1879, "step": 6624 }, { "epoch": 0.53, "grad_norm": 1.2971771694383252, "learning_rate": 4.757188889427761e-06, "loss": 0.1754, "step": 6625 }, { "epoch": 0.53, "grad_norm": 5.567244467715232, "learning_rate": 4.7558949239535295e-06, "loss": 0.6236, "step": 6626 }, { "epoch": 0.53, "grad_norm": 1.2869608603762317, "learning_rate": 4.754600974866813e-06, "loss": 0.1906, "step": 6627 }, { "epoch": 0.53, "grad_norm": 1.2692170245999832, "learning_rate": 4.753307042254476e-06, "loss": 0.2024, "step": 6628 }, { "epoch": 0.53, "grad_norm": 1.3213774859705014, "learning_rate": 4.752013126203386e-06, "loss": 0.1849, "step": 6629 }, { "epoch": 0.53, "grad_norm": 1.115208416368678, "learning_rate": 4.750719226800404e-06, "loss": 0.1663, "step": 6630 }, { "epoch": 0.53, "grad_norm": 1.392426964838036, "learning_rate": 4.749425344132397e-06, "loss": 0.2175, "step": 6631 }, { "epoch": 0.53, "grad_norm": 1.3450686686800324, "learning_rate": 4.748131478286225e-06, "loss": 0.1782, "step": 6632 }, { "epoch": 0.53, "grad_norm": 1.356508495742206, "learning_rate": 4.7468376293487515e-06, "loss": 0.216, "step": 6633 }, { "epoch": 0.53, "grad_norm": 1.4635077573282622, "learning_rate": 4.745543797406835e-06, "loss": 0.1606, "step": 6634 }, { "epoch": 0.53, "grad_norm": 1.3847681783984467, "learning_rate": 4.744249982547332e-06, "loss": 0.1944, "step": 6635 }, { "epoch": 0.53, "grad_norm": 1.5316911420192632, "learning_rate": 4.7429561848571055e-06, "loss": 0.195, "step": 6636 }, { "epoch": 0.53, "grad_norm": 6.020441401890124, "learning_rate": 4.741662404423007e-06, "loss": 0.5007, "step": 6637 }, { "epoch": 0.53, "grad_norm": 5.416073094247277, "learning_rate": 4.740368641331894e-06, "loss": 0.5204, "step": 6638 }, { "epoch": 0.53, "grad_norm": 1.1765016169106137, "learning_rate": 4.7390748956706216e-06, "loss": 0.1628, "step": 6639 }, { "epoch": 0.53, "grad_norm": 1.3786089061978328, "learning_rate": 4.737781167526043e-06, "loss": 0.1735, "step": 6640 }, { "epoch": 0.53, "grad_norm": 1.4602935960213266, "learning_rate": 4.736487456985007e-06, "loss": 0.2048, "step": 6641 }, { "epoch": 0.53, "grad_norm": 1.3581421998607388, "learning_rate": 4.735193764134367e-06, "loss": 0.1906, "step": 6642 }, { "epoch": 0.53, "grad_norm": 1.4038250599105053, "learning_rate": 4.733900089060974e-06, "loss": 0.1943, "step": 6643 }, { "epoch": 0.53, "grad_norm": 6.4694231968500855, "learning_rate": 4.732606431851672e-06, "loss": 0.6254, "step": 6644 }, { "epoch": 0.53, "grad_norm": 1.5271908450546945, "learning_rate": 4.731312792593311e-06, "loss": 0.2146, "step": 6645 }, { "epoch": 0.53, "grad_norm": 1.292783833906374, "learning_rate": 4.730019171372736e-06, "loss": 0.1778, "step": 6646 }, { "epoch": 0.53, "grad_norm": 7.150040173649783, "learning_rate": 4.728725568276793e-06, "loss": 0.5066, "step": 6647 }, { "epoch": 0.53, "grad_norm": 1.2625397301628674, "learning_rate": 4.727431983392323e-06, "loss": 0.166, "step": 6648 }, { "epoch": 0.53, "grad_norm": 1.1938061483824554, "learning_rate": 4.726138416806171e-06, "loss": 0.1538, "step": 6649 }, { "epoch": 0.53, "grad_norm": 1.3148058608095305, "learning_rate": 4.724844868605176e-06, "loss": 0.1851, "step": 6650 }, { "epoch": 0.53, "grad_norm": 1.563592166279795, "learning_rate": 4.72355133887618e-06, "loss": 0.2397, "step": 6651 }, { "epoch": 0.53, "grad_norm": 6.649686829916074, "learning_rate": 4.72225782770602e-06, "loss": 0.6216, "step": 6652 }, { "epoch": 0.53, "grad_norm": 1.28005366089802, "learning_rate": 4.720964335181532e-06, "loss": 0.2087, "step": 6653 }, { "epoch": 0.53, "grad_norm": 1.4425657997833392, "learning_rate": 4.7196708613895555e-06, "loss": 0.202, "step": 6654 }, { "epoch": 0.53, "grad_norm": 1.5114304971400478, "learning_rate": 4.7183774064169215e-06, "loss": 0.2341, "step": 6655 }, { "epoch": 0.53, "grad_norm": 1.2185997645296378, "learning_rate": 4.717083970350468e-06, "loss": 0.1609, "step": 6656 }, { "epoch": 0.53, "grad_norm": 1.2764221484305232, "learning_rate": 4.715790553277022e-06, "loss": 0.1982, "step": 6657 }, { "epoch": 0.53, "grad_norm": 1.1754715359095889, "learning_rate": 4.714497155283419e-06, "loss": 0.1671, "step": 6658 }, { "epoch": 0.53, "grad_norm": 1.2241228217885318, "learning_rate": 4.713203776456486e-06, "loss": 0.1716, "step": 6659 }, { "epoch": 0.53, "grad_norm": 1.3776858559092595, "learning_rate": 4.711910416883054e-06, "loss": 0.1867, "step": 6660 }, { "epoch": 0.53, "grad_norm": 1.2032887300114448, "learning_rate": 4.710617076649948e-06, "loss": 0.1806, "step": 6661 }, { "epoch": 0.53, "grad_norm": 1.1698300582628904, "learning_rate": 4.709323755843993e-06, "loss": 0.1826, "step": 6662 }, { "epoch": 0.53, "grad_norm": 1.4432394440958993, "learning_rate": 4.7080304545520175e-06, "loss": 0.2008, "step": 6663 }, { "epoch": 0.53, "grad_norm": 1.1829300783352419, "learning_rate": 4.7067371728608395e-06, "loss": 0.1873, "step": 6664 }, { "epoch": 0.53, "grad_norm": 1.45153836950273, "learning_rate": 4.7054439108572856e-06, "loss": 0.2458, "step": 6665 }, { "epoch": 0.53, "grad_norm": 1.3260024828157815, "learning_rate": 4.704150668628173e-06, "loss": 0.2096, "step": 6666 }, { "epoch": 0.53, "grad_norm": 1.4735533273079124, "learning_rate": 4.702857446260324e-06, "loss": 0.2051, "step": 6667 }, { "epoch": 0.53, "grad_norm": 1.4589662077916596, "learning_rate": 4.701564243840553e-06, "loss": 0.1531, "step": 6668 }, { "epoch": 0.53, "grad_norm": 1.6493672384073172, "learning_rate": 4.7002710614556805e-06, "loss": 0.2205, "step": 6669 }, { "epoch": 0.53, "grad_norm": 1.4681277141064315, "learning_rate": 4.69897789919252e-06, "loss": 0.1786, "step": 6670 }, { "epoch": 0.53, "grad_norm": 1.5257840952115231, "learning_rate": 4.697684757137883e-06, "loss": 0.2151, "step": 6671 }, { "epoch": 0.53, "grad_norm": 1.2791329601220198, "learning_rate": 4.696391635378586e-06, "loss": 0.1613, "step": 6672 }, { "epoch": 0.53, "grad_norm": 1.2818441955948678, "learning_rate": 4.695098534001437e-06, "loss": 0.1845, "step": 6673 }, { "epoch": 0.53, "grad_norm": 1.3830763697195854, "learning_rate": 4.693805453093249e-06, "loss": 0.1969, "step": 6674 }, { "epoch": 0.53, "grad_norm": 1.3291137688769834, "learning_rate": 4.6925123927408265e-06, "loss": 0.1763, "step": 6675 }, { "epoch": 0.53, "grad_norm": 1.3309494797501018, "learning_rate": 4.6912193530309805e-06, "loss": 0.1995, "step": 6676 }, { "epoch": 0.53, "grad_norm": 1.3087947815823218, "learning_rate": 4.689926334050513e-06, "loss": 0.1614, "step": 6677 }, { "epoch": 0.53, "grad_norm": 1.2510757329777877, "learning_rate": 4.6886333358862326e-06, "loss": 0.1615, "step": 6678 }, { "epoch": 0.53, "grad_norm": 1.3248074725593237, "learning_rate": 4.687340358624939e-06, "loss": 0.1776, "step": 6679 }, { "epoch": 0.53, "grad_norm": 8.051690357740911, "learning_rate": 4.686047402353433e-06, "loss": 0.5547, "step": 6680 }, { "epoch": 0.53, "grad_norm": 1.4333452627641259, "learning_rate": 4.6847544671585185e-06, "loss": 0.1705, "step": 6681 }, { "epoch": 0.53, "grad_norm": 1.3700514680388276, "learning_rate": 4.68346155312699e-06, "loss": 0.1811, "step": 6682 }, { "epoch": 0.53, "grad_norm": 1.5123805389442506, "learning_rate": 4.682168660345647e-06, "loss": 0.2417, "step": 6683 }, { "epoch": 0.53, "grad_norm": 1.237346360285923, "learning_rate": 4.680875788901284e-06, "loss": 0.1632, "step": 6684 }, { "epoch": 0.53, "grad_norm": 1.3940856177077163, "learning_rate": 4.679582938880698e-06, "loss": 0.1963, "step": 6685 }, { "epoch": 0.53, "grad_norm": 1.3460362806248896, "learning_rate": 4.67829011037068e-06, "loss": 0.1876, "step": 6686 }, { "epoch": 0.53, "grad_norm": 1.4680524905144217, "learning_rate": 4.67699730345802e-06, "loss": 0.2129, "step": 6687 }, { "epoch": 0.54, "grad_norm": 1.2990172369197281, "learning_rate": 4.675704518229512e-06, "loss": 0.1671, "step": 6688 }, { "epoch": 0.54, "grad_norm": 1.3201759114050347, "learning_rate": 4.674411754771939e-06, "loss": 0.1898, "step": 6689 }, { "epoch": 0.54, "grad_norm": 1.2850800343236037, "learning_rate": 4.673119013172093e-06, "loss": 0.1819, "step": 6690 }, { "epoch": 0.54, "grad_norm": 1.4471974965475833, "learning_rate": 4.671826293516758e-06, "loss": 0.2097, "step": 6691 }, { "epoch": 0.54, "grad_norm": 1.3200560564800181, "learning_rate": 4.67053359589272e-06, "loss": 0.1577, "step": 6692 }, { "epoch": 0.54, "grad_norm": 1.3889660411751608, "learning_rate": 4.669240920386756e-06, "loss": 0.1903, "step": 6693 }, { "epoch": 0.54, "grad_norm": 1.3565185059812563, "learning_rate": 4.667948267085655e-06, "loss": 0.2231, "step": 6694 }, { "epoch": 0.54, "grad_norm": 1.336468069301615, "learning_rate": 4.6666556360761925e-06, "loss": 0.2062, "step": 6695 }, { "epoch": 0.54, "grad_norm": 1.2524329255250914, "learning_rate": 4.665363027445144e-06, "loss": 0.1547, "step": 6696 }, { "epoch": 0.54, "grad_norm": 1.237215631747086, "learning_rate": 4.664070441279293e-06, "loss": 0.1884, "step": 6697 }, { "epoch": 0.54, "grad_norm": 1.2084125654754692, "learning_rate": 4.6627778776654085e-06, "loss": 0.1546, "step": 6698 }, { "epoch": 0.54, "grad_norm": 1.2999805282299985, "learning_rate": 4.661485336690268e-06, "loss": 0.1593, "step": 6699 }, { "epoch": 0.54, "grad_norm": 1.1575440435029185, "learning_rate": 4.660192818440642e-06, "loss": 0.1354, "step": 6700 }, { "epoch": 0.54, "grad_norm": 1.264280740098368, "learning_rate": 4.658900323003303e-06, "loss": 0.2101, "step": 6701 }, { "epoch": 0.54, "grad_norm": 1.3298447557650057, "learning_rate": 4.657607850465017e-06, "loss": 0.1855, "step": 6702 }, { "epoch": 0.54, "grad_norm": 1.3083684128468835, "learning_rate": 4.656315400912554e-06, "loss": 0.1718, "step": 6703 }, { "epoch": 0.54, "grad_norm": 1.2592558368512883, "learning_rate": 4.655022974432681e-06, "loss": 0.2094, "step": 6704 }, { "epoch": 0.54, "grad_norm": 1.4013461092096426, "learning_rate": 4.653730571112159e-06, "loss": 0.1955, "step": 6705 }, { "epoch": 0.54, "grad_norm": 5.7943585929237, "learning_rate": 4.652438191037754e-06, "loss": 0.5767, "step": 6706 }, { "epoch": 0.54, "grad_norm": 1.4174380779838232, "learning_rate": 4.6511458342962255e-06, "loss": 0.1847, "step": 6707 }, { "epoch": 0.54, "grad_norm": 1.4264623097995257, "learning_rate": 4.649853500974336e-06, "loss": 0.1986, "step": 6708 }, { "epoch": 0.54, "grad_norm": 1.3778893323818147, "learning_rate": 4.648561191158839e-06, "loss": 0.2025, "step": 6709 }, { "epoch": 0.54, "grad_norm": 1.4797639419767474, "learning_rate": 4.647268904936495e-06, "loss": 0.1947, "step": 6710 }, { "epoch": 0.54, "grad_norm": 1.2872928782716377, "learning_rate": 4.645976642394058e-06, "loss": 0.1729, "step": 6711 }, { "epoch": 0.54, "grad_norm": 1.3683633335942158, "learning_rate": 4.644684403618284e-06, "loss": 0.1759, "step": 6712 }, { "epoch": 0.54, "grad_norm": 1.3546114391091568, "learning_rate": 4.643392188695921e-06, "loss": 0.1784, "step": 6713 }, { "epoch": 0.54, "grad_norm": 1.2771567235567156, "learning_rate": 4.64209999771372e-06, "loss": 0.1771, "step": 6714 }, { "epoch": 0.54, "grad_norm": 1.3676320339308632, "learning_rate": 4.640807830758433e-06, "loss": 0.1817, "step": 6715 }, { "epoch": 0.54, "grad_norm": 1.292661147361725, "learning_rate": 4.639515687916801e-06, "loss": 0.1881, "step": 6716 }, { "epoch": 0.54, "grad_norm": 6.044023872874119, "learning_rate": 4.6382235692755756e-06, "loss": 0.5193, "step": 6717 }, { "epoch": 0.54, "grad_norm": 1.3572757170486722, "learning_rate": 4.6369314749214965e-06, "loss": 0.2144, "step": 6718 }, { "epoch": 0.54, "grad_norm": 1.3236558914891, "learning_rate": 4.63563940494131e-06, "loss": 0.2093, "step": 6719 }, { "epoch": 0.54, "grad_norm": 1.2950585456047743, "learning_rate": 4.6343473594217515e-06, "loss": 0.1617, "step": 6720 }, { "epoch": 0.54, "grad_norm": 1.3796416644826845, "learning_rate": 4.633055338449564e-06, "loss": 0.1796, "step": 6721 }, { "epoch": 0.54, "grad_norm": 16.65793525007263, "learning_rate": 4.631763342111485e-06, "loss": 0.7074, "step": 6722 }, { "epoch": 0.54, "grad_norm": 1.3174321036395324, "learning_rate": 4.630471370494246e-06, "loss": 0.1992, "step": 6723 }, { "epoch": 0.54, "grad_norm": 1.392529332795741, "learning_rate": 4.629179423684585e-06, "loss": 0.2088, "step": 6724 }, { "epoch": 0.54, "grad_norm": 1.2651433582092273, "learning_rate": 4.627887501769231e-06, "loss": 0.2112, "step": 6725 }, { "epoch": 0.54, "grad_norm": 1.5069683250385393, "learning_rate": 4.626595604834918e-06, "loss": 0.2257, "step": 6726 }, { "epoch": 0.54, "grad_norm": 1.307569893996264, "learning_rate": 4.625303732968373e-06, "loss": 0.1594, "step": 6727 }, { "epoch": 0.54, "grad_norm": 1.365099838652735, "learning_rate": 4.624011886256323e-06, "loss": 0.1871, "step": 6728 }, { "epoch": 0.54, "grad_norm": 1.3808385984584466, "learning_rate": 4.622720064785495e-06, "loss": 0.1917, "step": 6729 }, { "epoch": 0.54, "grad_norm": 1.3902979454676903, "learning_rate": 4.621428268642613e-06, "loss": 0.1733, "step": 6730 }, { "epoch": 0.54, "grad_norm": 1.3121360568472442, "learning_rate": 4.620136497914399e-06, "loss": 0.2159, "step": 6731 }, { "epoch": 0.54, "grad_norm": 1.483295275526807, "learning_rate": 4.61884475268757e-06, "loss": 0.2096, "step": 6732 }, { "epoch": 0.54, "grad_norm": 1.3756638486987285, "learning_rate": 4.617553033048851e-06, "loss": 0.2176, "step": 6733 }, { "epoch": 0.54, "grad_norm": 1.3572507463226735, "learning_rate": 4.616261339084952e-06, "loss": 0.1812, "step": 6734 }, { "epoch": 0.54, "grad_norm": 1.2292532494166757, "learning_rate": 4.614969670882594e-06, "loss": 0.1828, "step": 6735 }, { "epoch": 0.54, "grad_norm": 5.388551462670128, "learning_rate": 4.6136780285284875e-06, "loss": 0.6536, "step": 6736 }, { "epoch": 0.54, "grad_norm": 1.306036422348256, "learning_rate": 4.612386412109346e-06, "loss": 0.178, "step": 6737 }, { "epoch": 0.54, "grad_norm": 5.668020671128, "learning_rate": 4.611094821711879e-06, "loss": 0.5992, "step": 6738 }, { "epoch": 0.54, "grad_norm": 1.2274674376363142, "learning_rate": 4.609803257422796e-06, "loss": 0.178, "step": 6739 }, { "epoch": 0.54, "grad_norm": 1.39855725603969, "learning_rate": 4.608511719328803e-06, "loss": 0.1871, "step": 6740 }, { "epoch": 0.54, "grad_norm": 1.4803677707756495, "learning_rate": 4.607220207516602e-06, "loss": 0.2165, "step": 6741 }, { "epoch": 0.54, "grad_norm": 1.3838949620759733, "learning_rate": 4.605928722072901e-06, "loss": 0.1747, "step": 6742 }, { "epoch": 0.54, "grad_norm": 1.2106907767590576, "learning_rate": 4.604637263084397e-06, "loss": 0.133, "step": 6743 }, { "epoch": 0.54, "grad_norm": 6.5326695306776905, "learning_rate": 4.603345830637792e-06, "loss": 0.6715, "step": 6744 }, { "epoch": 0.54, "grad_norm": 1.2970835298083734, "learning_rate": 4.602054424819782e-06, "loss": 0.1831, "step": 6745 }, { "epoch": 0.54, "grad_norm": 1.4159173533431881, "learning_rate": 4.600763045717067e-06, "loss": 0.1396, "step": 6746 }, { "epoch": 0.54, "grad_norm": 1.458435106898632, "learning_rate": 4.599471693416337e-06, "loss": 0.2519, "step": 6747 }, { "epoch": 0.54, "grad_norm": 8.748821595226993, "learning_rate": 4.598180368004285e-06, "loss": 0.5151, "step": 6748 }, { "epoch": 0.54, "grad_norm": 1.4275414204827768, "learning_rate": 4.5968890695676044e-06, "loss": 0.2001, "step": 6749 }, { "epoch": 0.54, "grad_norm": 1.324307154241692, "learning_rate": 4.59559779819298e-06, "loss": 0.1582, "step": 6750 }, { "epoch": 0.54, "grad_norm": 1.4471224506599358, "learning_rate": 4.594306553967101e-06, "loss": 0.2059, "step": 6751 }, { "epoch": 0.54, "grad_norm": 1.3110883587353426, "learning_rate": 4.5930153369766515e-06, "loss": 0.1577, "step": 6752 }, { "epoch": 0.54, "grad_norm": 1.474741907873556, "learning_rate": 4.591724147308318e-06, "loss": 0.1817, "step": 6753 }, { "epoch": 0.54, "grad_norm": 8.250814597500748, "learning_rate": 4.590432985048777e-06, "loss": 0.6239, "step": 6754 }, { "epoch": 0.54, "grad_norm": 1.2688258885221322, "learning_rate": 4.589141850284712e-06, "loss": 0.1812, "step": 6755 }, { "epoch": 0.54, "grad_norm": 1.4355226756937578, "learning_rate": 4.5878507431028005e-06, "loss": 0.2043, "step": 6756 }, { "epoch": 0.54, "grad_norm": 1.3545989128067748, "learning_rate": 4.5865596635897155e-06, "loss": 0.151, "step": 6757 }, { "epoch": 0.54, "grad_norm": 1.3843928848876577, "learning_rate": 4.585268611832134e-06, "loss": 0.1955, "step": 6758 }, { "epoch": 0.54, "grad_norm": 1.246665766955404, "learning_rate": 4.583977587916725e-06, "loss": 0.1499, "step": 6759 }, { "epoch": 0.54, "grad_norm": 1.3654541255562844, "learning_rate": 4.5826865919301645e-06, "loss": 0.1985, "step": 6760 }, { "epoch": 0.54, "grad_norm": 1.4636712890094812, "learning_rate": 4.581395623959115e-06, "loss": 0.2374, "step": 6761 }, { "epoch": 0.54, "grad_norm": 1.8189095309876577, "learning_rate": 4.580104684090246e-06, "loss": 0.2052, "step": 6762 }, { "epoch": 0.54, "grad_norm": 1.282203251845499, "learning_rate": 4.578813772410221e-06, "loss": 0.1994, "step": 6763 }, { "epoch": 0.54, "grad_norm": 1.5291326164997934, "learning_rate": 4.577522889005706e-06, "loss": 0.2213, "step": 6764 }, { "epoch": 0.54, "grad_norm": 1.4112570720414033, "learning_rate": 4.5762320339633585e-06, "loss": 0.2364, "step": 6765 }, { "epoch": 0.54, "grad_norm": 1.4494647196970396, "learning_rate": 4.5749412073698376e-06, "loss": 0.2213, "step": 6766 }, { "epoch": 0.54, "grad_norm": 1.391002288696143, "learning_rate": 4.573650409311803e-06, "loss": 0.1769, "step": 6767 }, { "epoch": 0.54, "grad_norm": 1.4422199926436379, "learning_rate": 4.572359639875906e-06, "loss": 0.2052, "step": 6768 }, { "epoch": 0.54, "grad_norm": 1.2539810914171403, "learning_rate": 4.571068899148804e-06, "loss": 0.2116, "step": 6769 }, { "epoch": 0.54, "grad_norm": 21.730678253932666, "learning_rate": 4.569778187217144e-06, "loss": 0.5484, "step": 6770 }, { "epoch": 0.54, "grad_norm": 1.372478911989215, "learning_rate": 4.568487504167581e-06, "loss": 0.1715, "step": 6771 }, { "epoch": 0.54, "grad_norm": 1.3829501142972014, "learning_rate": 4.567196850086757e-06, "loss": 0.1625, "step": 6772 }, { "epoch": 0.54, "grad_norm": 1.4281710141016053, "learning_rate": 4.565906225061321e-06, "loss": 0.1642, "step": 6773 }, { "epoch": 0.54, "grad_norm": 1.419482849634811, "learning_rate": 4.564615629177916e-06, "loss": 0.195, "step": 6774 }, { "epoch": 0.54, "grad_norm": 1.296173511883808, "learning_rate": 4.5633250625231806e-06, "loss": 0.1844, "step": 6775 }, { "epoch": 0.54, "grad_norm": 1.4392133476221018, "learning_rate": 4.562034525183758e-06, "loss": 0.2245, "step": 6776 }, { "epoch": 0.54, "grad_norm": 1.3359981875258926, "learning_rate": 4.560744017246284e-06, "loss": 0.1947, "step": 6777 }, { "epoch": 0.54, "grad_norm": 1.9189177146640684, "learning_rate": 4.559453538797398e-06, "loss": 0.151, "step": 6778 }, { "epoch": 0.54, "grad_norm": 1.3112614638625646, "learning_rate": 4.558163089923726e-06, "loss": 0.1754, "step": 6779 }, { "epoch": 0.54, "grad_norm": 1.3247573946077167, "learning_rate": 4.556872670711908e-06, "loss": 0.1868, "step": 6780 }, { "epoch": 0.54, "grad_norm": 1.6118133676327768, "learning_rate": 4.555582281248569e-06, "loss": 0.2036, "step": 6781 }, { "epoch": 0.54, "grad_norm": 1.2877771635078612, "learning_rate": 4.554291921620338e-06, "loss": 0.1689, "step": 6782 }, { "epoch": 0.54, "grad_norm": 8.206120581386168, "learning_rate": 4.5530015919138425e-06, "loss": 0.665, "step": 6783 }, { "epoch": 0.54, "grad_norm": 1.4884691110504413, "learning_rate": 4.551711292215702e-06, "loss": 0.1476, "step": 6784 }, { "epoch": 0.54, "grad_norm": 1.3523883070191174, "learning_rate": 4.550421022612542e-06, "loss": 0.1904, "step": 6785 }, { "epoch": 0.54, "grad_norm": 1.3039106296455183, "learning_rate": 4.549130783190979e-06, "loss": 0.1995, "step": 6786 }, { "epoch": 0.54, "grad_norm": 1.2735095733703758, "learning_rate": 4.547840574037636e-06, "loss": 0.1899, "step": 6787 }, { "epoch": 0.54, "grad_norm": 1.2882889695637363, "learning_rate": 4.546550395239123e-06, "loss": 0.1637, "step": 6788 }, { "epoch": 0.54, "grad_norm": 1.476598782247935, "learning_rate": 4.545260246882056e-06, "loss": 0.2163, "step": 6789 }, { "epoch": 0.54, "grad_norm": 1.3469773257509285, "learning_rate": 4.543970129053047e-06, "loss": 0.1845, "step": 6790 }, { "epoch": 0.54, "grad_norm": 1.341372296139701, "learning_rate": 4.542680041838705e-06, "loss": 0.2192, "step": 6791 }, { "epoch": 0.54, "grad_norm": 1.5037484943636448, "learning_rate": 4.541389985325637e-06, "loss": 0.202, "step": 6792 }, { "epoch": 0.54, "grad_norm": 1.263852218207307, "learning_rate": 4.5400999596004484e-06, "loss": 0.1762, "step": 6793 }, { "epoch": 0.54, "grad_norm": 1.5119791726063663, "learning_rate": 4.538809964749745e-06, "loss": 0.2354, "step": 6794 }, { "epoch": 0.54, "grad_norm": 6.275755030366224, "learning_rate": 4.537520000860124e-06, "loss": 0.533, "step": 6795 }, { "epoch": 0.54, "grad_norm": 1.3754948908209557, "learning_rate": 4.536230068018187e-06, "loss": 0.2322, "step": 6796 }, { "epoch": 0.54, "grad_norm": 1.2910823051613778, "learning_rate": 4.53494016631053e-06, "loss": 0.1942, "step": 6797 }, { "epoch": 0.54, "grad_norm": 1.407064863655799, "learning_rate": 4.533650295823751e-06, "loss": 0.2049, "step": 6798 }, { "epoch": 0.54, "grad_norm": 1.314626772977091, "learning_rate": 4.532360456644438e-06, "loss": 0.2379, "step": 6799 }, { "epoch": 0.54, "grad_norm": 1.5363117560579744, "learning_rate": 4.531070648859186e-06, "loss": 0.1625, "step": 6800 }, { "epoch": 0.54, "grad_norm": 1.2941640012380784, "learning_rate": 4.529780872554582e-06, "loss": 0.1993, "step": 6801 }, { "epoch": 0.54, "grad_norm": 1.345887212749787, "learning_rate": 4.528491127817213e-06, "loss": 0.1971, "step": 6802 }, { "epoch": 0.54, "grad_norm": 1.199838695557625, "learning_rate": 4.5272014147336625e-06, "loss": 0.1809, "step": 6803 }, { "epoch": 0.54, "grad_norm": 1.3335644289691695, "learning_rate": 4.525911733390513e-06, "loss": 0.185, "step": 6804 }, { "epoch": 0.54, "grad_norm": 7.283270776937424, "learning_rate": 4.524622083874347e-06, "loss": 0.5506, "step": 6805 }, { "epoch": 0.54, "grad_norm": 4.819464192253964, "learning_rate": 4.523332466271739e-06, "loss": 0.5976, "step": 6806 }, { "epoch": 0.54, "grad_norm": 1.3239690413298923, "learning_rate": 4.522042880669268e-06, "loss": 0.1849, "step": 6807 }, { "epoch": 0.54, "grad_norm": 1.4368972244893568, "learning_rate": 4.5207533271535075e-06, "loss": 0.2026, "step": 6808 }, { "epoch": 0.54, "grad_norm": 1.4140409112760415, "learning_rate": 4.519463805811026e-06, "loss": 0.191, "step": 6809 }, { "epoch": 0.54, "grad_norm": 1.321285354453828, "learning_rate": 4.518174316728396e-06, "loss": 0.1913, "step": 6810 }, { "epoch": 0.54, "grad_norm": 1.4098685212310236, "learning_rate": 4.516884859992183e-06, "loss": 0.1836, "step": 6811 }, { "epoch": 0.54, "grad_norm": 1.453116455546688, "learning_rate": 4.5155954356889555e-06, "loss": 0.2032, "step": 6812 }, { "epoch": 0.55, "grad_norm": 1.076907258154822, "learning_rate": 4.5143060439052725e-06, "loss": 0.1391, "step": 6813 }, { "epoch": 0.55, "grad_norm": 8.581716515987097, "learning_rate": 4.513016684727697e-06, "loss": 0.7686, "step": 6814 }, { "epoch": 0.55, "grad_norm": 9.17948294903175, "learning_rate": 4.511727358242786e-06, "loss": 0.6994, "step": 6815 }, { "epoch": 0.55, "grad_norm": 6.97366186282323, "learning_rate": 4.5104380645370986e-06, "loss": 0.5059, "step": 6816 }, { "epoch": 0.55, "grad_norm": 1.4225888087599794, "learning_rate": 4.509148803697186e-06, "loss": 0.2253, "step": 6817 }, { "epoch": 0.55, "grad_norm": 1.4557908129285921, "learning_rate": 4.507859575809601e-06, "loss": 0.1578, "step": 6818 }, { "epoch": 0.55, "grad_norm": 1.3421951143343749, "learning_rate": 4.506570380960895e-06, "loss": 0.216, "step": 6819 }, { "epoch": 0.55, "grad_norm": 1.3360203227973844, "learning_rate": 4.505281219237613e-06, "loss": 0.1912, "step": 6820 }, { "epoch": 0.55, "grad_norm": 1.5284278674803466, "learning_rate": 4.503992090726302e-06, "loss": 0.2369, "step": 6821 }, { "epoch": 0.55, "grad_norm": 1.3458386937740916, "learning_rate": 4.5027029955135045e-06, "loss": 0.1861, "step": 6822 }, { "epoch": 0.55, "grad_norm": 1.430955980185571, "learning_rate": 4.501413933685763e-06, "loss": 0.2296, "step": 6823 }, { "epoch": 0.55, "grad_norm": 1.3130955042187646, "learning_rate": 4.500124905329613e-06, "loss": 0.1872, "step": 6824 }, { "epoch": 0.55, "grad_norm": 1.3574178417303149, "learning_rate": 4.498835910531595e-06, "loss": 0.1816, "step": 6825 }, { "epoch": 0.55, "grad_norm": 1.4886990548509318, "learning_rate": 4.4975469493782405e-06, "loss": 0.2169, "step": 6826 }, { "epoch": 0.55, "grad_norm": 1.4922240717432786, "learning_rate": 4.496258021956079e-06, "loss": 0.2192, "step": 6827 }, { "epoch": 0.55, "grad_norm": 1.2060400949021521, "learning_rate": 4.494969128351646e-06, "loss": 0.1756, "step": 6828 }, { "epoch": 0.55, "grad_norm": 1.473464837586222, "learning_rate": 4.493680268651464e-06, "loss": 0.1987, "step": 6829 }, { "epoch": 0.55, "grad_norm": 7.719087545658891, "learning_rate": 4.4923914429420595e-06, "loss": 0.6771, "step": 6830 }, { "epoch": 0.55, "grad_norm": 1.350113007890623, "learning_rate": 4.491102651309954e-06, "loss": 0.2071, "step": 6831 }, { "epoch": 0.55, "grad_norm": 1.4119349846512868, "learning_rate": 4.489813893841673e-06, "loss": 0.2046, "step": 6832 }, { "epoch": 0.55, "grad_norm": 1.3773206686993182, "learning_rate": 4.488525170623729e-06, "loss": 0.19, "step": 6833 }, { "epoch": 0.55, "grad_norm": 1.2684699818610259, "learning_rate": 4.48723648174264e-06, "loss": 0.2011, "step": 6834 }, { "epoch": 0.55, "grad_norm": 1.3613182146830667, "learning_rate": 4.485947827284921e-06, "loss": 0.216, "step": 6835 }, { "epoch": 0.55, "grad_norm": 1.344441486208463, "learning_rate": 4.48465920733708e-06, "loss": 0.1688, "step": 6836 }, { "epoch": 0.55, "grad_norm": 2.269048211686111, "learning_rate": 4.483370621985629e-06, "loss": 0.2608, "step": 6837 }, { "epoch": 0.55, "grad_norm": 1.3238833773034413, "learning_rate": 4.482082071317071e-06, "loss": 0.1863, "step": 6838 }, { "epoch": 0.55, "grad_norm": 1.4585108576039179, "learning_rate": 4.480793555417917e-06, "loss": 0.2283, "step": 6839 }, { "epoch": 0.55, "grad_norm": 1.3794390147776732, "learning_rate": 4.479505074374662e-06, "loss": 0.179, "step": 6840 }, { "epoch": 0.55, "grad_norm": 1.242688706855339, "learning_rate": 4.478216628273809e-06, "loss": 0.1832, "step": 6841 }, { "epoch": 0.55, "grad_norm": 1.4551189133191802, "learning_rate": 4.4769282172018535e-06, "loss": 0.2051, "step": 6842 }, { "epoch": 0.55, "grad_norm": 1.3252719160204203, "learning_rate": 4.475639841245294e-06, "loss": 0.188, "step": 6843 }, { "epoch": 0.55, "grad_norm": 1.321046666609145, "learning_rate": 4.47435150049062e-06, "loss": 0.144, "step": 6844 }, { "epoch": 0.55, "grad_norm": 1.2666294160922735, "learning_rate": 4.47306319502432e-06, "loss": 0.1668, "step": 6845 }, { "epoch": 0.55, "grad_norm": 1.2520355778465115, "learning_rate": 4.4717749249328876e-06, "loss": 0.1753, "step": 6846 }, { "epoch": 0.55, "grad_norm": 1.3341770310064853, "learning_rate": 4.470486690302803e-06, "loss": 0.2079, "step": 6847 }, { "epoch": 0.55, "grad_norm": 1.3143972729257676, "learning_rate": 4.469198491220551e-06, "loss": 0.2113, "step": 6848 }, { "epoch": 0.55, "grad_norm": 1.2747024485157306, "learning_rate": 4.467910327772613e-06, "loss": 0.1636, "step": 6849 }, { "epoch": 0.55, "grad_norm": 1.3692371705987472, "learning_rate": 4.4666222000454685e-06, "loss": 0.1638, "step": 6850 }, { "epoch": 0.55, "grad_norm": 5.282137349925222, "learning_rate": 4.4653341081255895e-06, "loss": 0.6384, "step": 6851 }, { "epoch": 0.55, "grad_norm": 1.277097758981777, "learning_rate": 4.464046052099453e-06, "loss": 0.2047, "step": 6852 }, { "epoch": 0.55, "grad_norm": 1.3148740244438768, "learning_rate": 4.46275803205353e-06, "loss": 0.1945, "step": 6853 }, { "epoch": 0.55, "grad_norm": 1.1592012204460431, "learning_rate": 4.461470048074286e-06, "loss": 0.1322, "step": 6854 }, { "epoch": 0.55, "grad_norm": 6.591474962860619, "learning_rate": 4.46018210024819e-06, "loss": 0.5113, "step": 6855 }, { "epoch": 0.55, "grad_norm": 11.116987230832049, "learning_rate": 4.458894188661704e-06, "loss": 0.621, "step": 6856 }, { "epoch": 0.55, "grad_norm": 1.4075600658815688, "learning_rate": 4.4576063134012935e-06, "loss": 0.1896, "step": 6857 }, { "epoch": 0.55, "grad_norm": 1.193468523185192, "learning_rate": 4.456318474553412e-06, "loss": 0.1836, "step": 6858 }, { "epoch": 0.55, "grad_norm": 1.399412090114669, "learning_rate": 4.45503067220452e-06, "loss": 0.1941, "step": 6859 }, { "epoch": 0.55, "grad_norm": 1.2848275985331417, "learning_rate": 4.4537429064410685e-06, "loss": 0.1609, "step": 6860 }, { "epoch": 0.55, "grad_norm": 1.282004781173103, "learning_rate": 4.452455177349513e-06, "loss": 0.1974, "step": 6861 }, { "epoch": 0.55, "grad_norm": 1.41569947839218, "learning_rate": 4.4511674850163e-06, "loss": 0.202, "step": 6862 }, { "epoch": 0.55, "grad_norm": 1.4823514214157687, "learning_rate": 4.449879829527875e-06, "loss": 0.2178, "step": 6863 }, { "epoch": 0.55, "grad_norm": 1.2950654121797143, "learning_rate": 4.448592210970687e-06, "loss": 0.1638, "step": 6864 }, { "epoch": 0.55, "grad_norm": 1.4244018868375872, "learning_rate": 4.44730462943117e-06, "loss": 0.2138, "step": 6865 }, { "epoch": 0.55, "grad_norm": 1.558880671815828, "learning_rate": 4.446017084995771e-06, "loss": 0.2455, "step": 6866 }, { "epoch": 0.55, "grad_norm": 1.1399705800233373, "learning_rate": 4.444729577750922e-06, "loss": 0.1811, "step": 6867 }, { "epoch": 0.55, "grad_norm": 1.410044543661609, "learning_rate": 4.44344210778306e-06, "loss": 0.1747, "step": 6868 }, { "epoch": 0.55, "grad_norm": 1.277621234477029, "learning_rate": 4.442154675178617e-06, "loss": 0.1688, "step": 6869 }, { "epoch": 0.55, "grad_norm": 3.820579125163696, "learning_rate": 4.4408672800240185e-06, "loss": 0.3982, "step": 6870 }, { "epoch": 0.55, "grad_norm": 1.2871127706426828, "learning_rate": 4.439579922405694e-06, "loss": 0.1496, "step": 6871 }, { "epoch": 0.55, "grad_norm": 1.3362104227675218, "learning_rate": 4.438292602410067e-06, "loss": 0.1793, "step": 6872 }, { "epoch": 0.55, "grad_norm": 1.291759069437098, "learning_rate": 4.437005320123561e-06, "loss": 0.17, "step": 6873 }, { "epoch": 0.55, "grad_norm": 1.5110594109568594, "learning_rate": 4.4357180756325915e-06, "loss": 0.2699, "step": 6874 }, { "epoch": 0.55, "grad_norm": 1.4701123236645992, "learning_rate": 4.434430869023579e-06, "loss": 0.2441, "step": 6875 }, { "epoch": 0.55, "grad_norm": 1.160343572115143, "learning_rate": 4.433143700382935e-06, "loss": 0.1671, "step": 6876 }, { "epoch": 0.55, "grad_norm": 12.434658475586604, "learning_rate": 4.4318565697970734e-06, "loss": 0.5358, "step": 6877 }, { "epoch": 0.55, "grad_norm": 1.625572663278618, "learning_rate": 4.4305694773524e-06, "loss": 0.2088, "step": 6878 }, { "epoch": 0.55, "grad_norm": 1.440715595174671, "learning_rate": 4.429282423135323e-06, "loss": 0.2129, "step": 6879 }, { "epoch": 0.55, "grad_norm": 1.6063175666066074, "learning_rate": 4.4279954072322486e-06, "loss": 0.2345, "step": 6880 }, { "epoch": 0.55, "grad_norm": 1.1151437160679623, "learning_rate": 4.426708429729573e-06, "loss": 0.1838, "step": 6881 }, { "epoch": 0.55, "grad_norm": 1.2969865591012761, "learning_rate": 4.425421490713698e-06, "loss": 0.1628, "step": 6882 }, { "epoch": 0.55, "grad_norm": 1.2689328201483292, "learning_rate": 4.424134590271018e-06, "loss": 0.1839, "step": 6883 }, { "epoch": 0.55, "grad_norm": 1.1777786371775967, "learning_rate": 4.4228477284879305e-06, "loss": 0.1797, "step": 6884 }, { "epoch": 0.55, "grad_norm": 1.2768149001123978, "learning_rate": 4.4215609054508215e-06, "loss": 0.2222, "step": 6885 }, { "epoch": 0.55, "grad_norm": 4.41245343953405, "learning_rate": 4.420274121246083e-06, "loss": 0.4898, "step": 6886 }, { "epoch": 0.55, "grad_norm": 1.2982857678427775, "learning_rate": 4.418987375960099e-06, "loss": 0.1878, "step": 6887 }, { "epoch": 0.55, "grad_norm": 1.3818504444036372, "learning_rate": 4.417700669679252e-06, "loss": 0.2157, "step": 6888 }, { "epoch": 0.55, "grad_norm": 1.2954598200406084, "learning_rate": 4.416414002489924e-06, "loss": 0.2078, "step": 6889 }, { "epoch": 0.55, "grad_norm": 1.3541970334816915, "learning_rate": 4.415127374478491e-06, "loss": 0.1508, "step": 6890 }, { "epoch": 0.55, "grad_norm": 1.298558014680946, "learning_rate": 4.413840785731332e-06, "loss": 0.1592, "step": 6891 }, { "epoch": 0.55, "grad_norm": 1.2252981819302706, "learning_rate": 4.412554236334815e-06, "loss": 0.2092, "step": 6892 }, { "epoch": 0.55, "grad_norm": 1.5705488227601931, "learning_rate": 4.411267726375312e-06, "loss": 0.2424, "step": 6893 }, { "epoch": 0.55, "grad_norm": 1.2593899933175228, "learning_rate": 4.4099812559391906e-06, "loss": 0.174, "step": 6894 }, { "epoch": 0.55, "grad_norm": 8.220553384140846, "learning_rate": 4.4086948251128155e-06, "loss": 0.6292, "step": 6895 }, { "epoch": 0.55, "grad_norm": 1.4870955066854525, "learning_rate": 4.407408433982549e-06, "loss": 0.1898, "step": 6896 }, { "epoch": 0.55, "grad_norm": 1.223490542937882, "learning_rate": 4.406122082634748e-06, "loss": 0.1466, "step": 6897 }, { "epoch": 0.55, "grad_norm": 16.646636375550507, "learning_rate": 4.404835771155774e-06, "loss": 0.5402, "step": 6898 }, { "epoch": 0.55, "grad_norm": 1.4078963759997407, "learning_rate": 4.403549499631974e-06, "loss": 0.1839, "step": 6899 }, { "epoch": 0.55, "grad_norm": 10.404806455728005, "learning_rate": 4.402263268149707e-06, "loss": 0.6505, "step": 6900 }, { "epoch": 0.55, "grad_norm": 1.4117806496624512, "learning_rate": 4.400977076795314e-06, "loss": 0.2174, "step": 6901 }, { "epoch": 0.55, "grad_norm": 1.3217136664515383, "learning_rate": 4.399690925655148e-06, "loss": 0.1952, "step": 6902 }, { "epoch": 0.55, "grad_norm": 1.3545036325548305, "learning_rate": 4.398404814815548e-06, "loss": 0.194, "step": 6903 }, { "epoch": 0.55, "grad_norm": 1.4617570792078414, "learning_rate": 4.397118744362855e-06, "loss": 0.1657, "step": 6904 }, { "epoch": 0.55, "grad_norm": 1.1795052885377475, "learning_rate": 4.39583271438341e-06, "loss": 0.1618, "step": 6905 }, { "epoch": 0.55, "grad_norm": 1.3258257555662984, "learning_rate": 4.394546724963542e-06, "loss": 0.1983, "step": 6906 }, { "epoch": 0.55, "grad_norm": 1.282195119884134, "learning_rate": 4.393260776189589e-06, "loss": 0.1733, "step": 6907 }, { "epoch": 0.55, "grad_norm": 1.2744893457013906, "learning_rate": 4.391974868147875e-06, "loss": 0.167, "step": 6908 }, { "epoch": 0.55, "grad_norm": 1.318358811017309, "learning_rate": 4.390689000924734e-06, "loss": 0.2041, "step": 6909 }, { "epoch": 0.55, "grad_norm": 6.746314917075147, "learning_rate": 4.389403174606484e-06, "loss": 0.6059, "step": 6910 }, { "epoch": 0.55, "grad_norm": 1.4109131453650452, "learning_rate": 4.388117389279452e-06, "loss": 0.2037, "step": 6911 }, { "epoch": 0.55, "grad_norm": 7.684213385405756, "learning_rate": 4.386831645029951e-06, "loss": 0.5197, "step": 6912 }, { "epoch": 0.55, "grad_norm": 1.4488463711988058, "learning_rate": 4.385545941944301e-06, "loss": 0.22, "step": 6913 }, { "epoch": 0.55, "grad_norm": 1.4166541641350296, "learning_rate": 4.3842602801088145e-06, "loss": 0.1896, "step": 6914 }, { "epoch": 0.55, "grad_norm": 1.2281973131274015, "learning_rate": 4.3829746596097975e-06, "loss": 0.2029, "step": 6915 }, { "epoch": 0.55, "grad_norm": 1.2917018253838513, "learning_rate": 4.381689080533564e-06, "loss": 0.1982, "step": 6916 }, { "epoch": 0.55, "grad_norm": 1.3502571550689517, "learning_rate": 4.380403542966414e-06, "loss": 0.1692, "step": 6917 }, { "epoch": 0.55, "grad_norm": 1.5262757647622938, "learning_rate": 4.379118046994654e-06, "loss": 0.2155, "step": 6918 }, { "epoch": 0.55, "grad_norm": 1.2411322665755142, "learning_rate": 4.377832592704578e-06, "loss": 0.1634, "step": 6919 }, { "epoch": 0.55, "grad_norm": 1.292136317930179, "learning_rate": 4.3765471801824865e-06, "loss": 0.1866, "step": 6920 }, { "epoch": 0.55, "grad_norm": 1.252074488100489, "learning_rate": 4.375261809514671e-06, "loss": 0.1468, "step": 6921 }, { "epoch": 0.55, "grad_norm": 8.862687447389899, "learning_rate": 4.373976480787425e-06, "loss": 0.5212, "step": 6922 }, { "epoch": 0.55, "grad_norm": 1.344089182351659, "learning_rate": 4.372691194087034e-06, "loss": 0.2248, "step": 6923 }, { "epoch": 0.55, "grad_norm": 1.467271247957386, "learning_rate": 4.371405949499782e-06, "loss": 0.2118, "step": 6924 }, { "epoch": 0.55, "grad_norm": 1.4654762448921959, "learning_rate": 4.370120747111956e-06, "loss": 0.1774, "step": 6925 }, { "epoch": 0.55, "grad_norm": 1.1342282856647932, "learning_rate": 4.36883558700983e-06, "loss": 0.1314, "step": 6926 }, { "epoch": 0.55, "grad_norm": 1.196690057521876, "learning_rate": 4.3675504692796845e-06, "loss": 0.1498, "step": 6927 }, { "epoch": 0.55, "grad_norm": 1.1382294653395975, "learning_rate": 4.3662653940077915e-06, "loss": 0.1502, "step": 6928 }, { "epoch": 0.55, "grad_norm": 1.3348484263262625, "learning_rate": 4.364980361280425e-06, "loss": 0.179, "step": 6929 }, { "epoch": 0.55, "grad_norm": 1.1444186481565346, "learning_rate": 4.363695371183849e-06, "loss": 0.1401, "step": 6930 }, { "epoch": 0.55, "grad_norm": 1.2163865394392357, "learning_rate": 4.362410423804331e-06, "loss": 0.1526, "step": 6931 }, { "epoch": 0.55, "grad_norm": 6.5688700201532395, "learning_rate": 4.3611255192281335e-06, "loss": 0.5703, "step": 6932 }, { "epoch": 0.55, "grad_norm": 1.550668132794035, "learning_rate": 4.359840657541515e-06, "loss": 0.215, "step": 6933 }, { "epoch": 0.55, "grad_norm": 17.169716410449166, "learning_rate": 4.358555838830733e-06, "loss": 0.6075, "step": 6934 }, { "epoch": 0.55, "grad_norm": 1.2975465806460014, "learning_rate": 4.35727106318204e-06, "loss": 0.1593, "step": 6935 }, { "epoch": 0.55, "grad_norm": 1.2756947230864846, "learning_rate": 4.35598633068169e-06, "loss": 0.1676, "step": 6936 }, { "epoch": 0.55, "grad_norm": 1.4927521500964291, "learning_rate": 4.354701641415927e-06, "loss": 0.2429, "step": 6937 }, { "epoch": 0.56, "grad_norm": 1.2187304943784556, "learning_rate": 4.353416995470999e-06, "loss": 0.1419, "step": 6938 }, { "epoch": 0.56, "grad_norm": 1.2939802364891733, "learning_rate": 4.352132392933148e-06, "loss": 0.1838, "step": 6939 }, { "epoch": 0.56, "grad_norm": 1.3744035356063795, "learning_rate": 4.3508478338886105e-06, "loss": 0.2148, "step": 6940 }, { "epoch": 0.56, "grad_norm": 1.4078221056672584, "learning_rate": 4.3495633184236265e-06, "loss": 0.2053, "step": 6941 }, { "epoch": 0.56, "grad_norm": 1.3336891451497794, "learning_rate": 4.348278846624426e-06, "loss": 0.1849, "step": 6942 }, { "epoch": 0.56, "grad_norm": 1.265314235694341, "learning_rate": 4.346994418577243e-06, "loss": 0.1739, "step": 6943 }, { "epoch": 0.56, "grad_norm": 5.251636127096702, "learning_rate": 4.345710034368301e-06, "loss": 0.7474, "step": 6944 }, { "epoch": 0.56, "grad_norm": 1.3544297690245544, "learning_rate": 4.344425694083829e-06, "loss": 0.2082, "step": 6945 }, { "epoch": 0.56, "grad_norm": 1.5084209740631263, "learning_rate": 4.343141397810044e-06, "loss": 0.189, "step": 6946 }, { "epoch": 0.56, "grad_norm": 1.165570371887376, "learning_rate": 4.34185714563317e-06, "loss": 0.1777, "step": 6947 }, { "epoch": 0.56, "grad_norm": 1.2745033339331835, "learning_rate": 4.340572937639419e-06, "loss": 0.205, "step": 6948 }, { "epoch": 0.56, "grad_norm": 5.312434279134441, "learning_rate": 4.339288773915003e-06, "loss": 0.5459, "step": 6949 }, { "epoch": 0.56, "grad_norm": 4.840235480049677, "learning_rate": 4.338004654546136e-06, "loss": 0.5943, "step": 6950 }, { "epoch": 0.56, "grad_norm": 1.4353667086587494, "learning_rate": 4.336720579619019e-06, "loss": 0.2088, "step": 6951 }, { "epoch": 0.56, "grad_norm": 1.5458032003734916, "learning_rate": 4.335436549219862e-06, "loss": 0.2337, "step": 6952 }, { "epoch": 0.56, "grad_norm": 1.228362293302541, "learning_rate": 4.3341525634348615e-06, "loss": 0.1868, "step": 6953 }, { "epoch": 0.56, "grad_norm": 1.2961893278710508, "learning_rate": 4.332868622350218e-06, "loss": 0.1395, "step": 6954 }, { "epoch": 0.56, "grad_norm": 1.3210471595097677, "learning_rate": 4.331584726052124e-06, "loss": 0.1764, "step": 6955 }, { "epoch": 0.56, "grad_norm": 1.2363000973947313, "learning_rate": 4.330300874626774e-06, "loss": 0.2007, "step": 6956 }, { "epoch": 0.56, "grad_norm": 1.3783389122480192, "learning_rate": 4.329017068160355e-06, "loss": 0.1761, "step": 6957 }, { "epoch": 0.56, "grad_norm": 1.2709263564353028, "learning_rate": 4.327733306739053e-06, "loss": 0.1892, "step": 6958 }, { "epoch": 0.56, "grad_norm": 1.4770586036175555, "learning_rate": 4.3264495904490514e-06, "loss": 0.1886, "step": 6959 }, { "epoch": 0.56, "grad_norm": 1.2471612185615193, "learning_rate": 4.325165919376528e-06, "loss": 0.1552, "step": 6960 }, { "epoch": 0.56, "grad_norm": 1.3455348326533412, "learning_rate": 4.323882293607663e-06, "loss": 0.1782, "step": 6961 }, { "epoch": 0.56, "grad_norm": 1.4291009403387405, "learning_rate": 4.322598713228626e-06, "loss": 0.2572, "step": 6962 }, { "epoch": 0.56, "grad_norm": 1.2909725625655353, "learning_rate": 4.321315178325593e-06, "loss": 0.1698, "step": 6963 }, { "epoch": 0.56, "grad_norm": 1.409756755830917, "learning_rate": 4.320031688984726e-06, "loss": 0.2246, "step": 6964 }, { "epoch": 0.56, "grad_norm": 1.26250424352819, "learning_rate": 4.318748245292193e-06, "loss": 0.1939, "step": 6965 }, { "epoch": 0.56, "grad_norm": 1.4627545534851207, "learning_rate": 4.317464847334154e-06, "loss": 0.2048, "step": 6966 }, { "epoch": 0.56, "grad_norm": 1.297654323781324, "learning_rate": 4.316181495196767e-06, "loss": 0.1873, "step": 6967 }, { "epoch": 0.56, "grad_norm": 1.150156674525904, "learning_rate": 4.31489818896619e-06, "loss": 0.1614, "step": 6968 }, { "epoch": 0.56, "grad_norm": 1.3586783765911885, "learning_rate": 4.313614928728571e-06, "loss": 0.2232, "step": 6969 }, { "epoch": 0.56, "grad_norm": 1.351133736023884, "learning_rate": 4.312331714570064e-06, "loss": 0.1849, "step": 6970 }, { "epoch": 0.56, "grad_norm": 8.8321359627386, "learning_rate": 4.31104854657681e-06, "loss": 0.5887, "step": 6971 }, { "epoch": 0.56, "grad_norm": 1.5096575919780169, "learning_rate": 4.3097654248349565e-06, "loss": 0.2179, "step": 6972 }, { "epoch": 0.56, "grad_norm": 5.439767616226352, "learning_rate": 4.30848234943064e-06, "loss": 0.5017, "step": 6973 }, { "epoch": 0.56, "grad_norm": 1.369051248796137, "learning_rate": 4.30719932045e-06, "loss": 0.2071, "step": 6974 }, { "epoch": 0.56, "grad_norm": 1.3652275023393845, "learning_rate": 4.3059163379791676e-06, "loss": 0.1726, "step": 6975 }, { "epoch": 0.56, "grad_norm": 1.2776663218437052, "learning_rate": 4.304633402104274e-06, "loss": 0.1833, "step": 6976 }, { "epoch": 0.56, "grad_norm": 1.4201243365422023, "learning_rate": 4.303350512911449e-06, "loss": 0.1919, "step": 6977 }, { "epoch": 0.56, "grad_norm": 1.9669754494502172, "learning_rate": 4.302067670486813e-06, "loss": 0.2488, "step": 6978 }, { "epoch": 0.56, "grad_norm": 1.2792280518222674, "learning_rate": 4.300784874916489e-06, "loss": 0.1897, "step": 6979 }, { "epoch": 0.56, "grad_norm": 1.6070159266529367, "learning_rate": 4.299502126286596e-06, "loss": 0.23, "step": 6980 }, { "epoch": 0.56, "grad_norm": 1.1430570673456661, "learning_rate": 4.298219424683247e-06, "loss": 0.1629, "step": 6981 }, { "epoch": 0.56, "grad_norm": 1.2649039861967415, "learning_rate": 4.296936770192554e-06, "loss": 0.206, "step": 6982 }, { "epoch": 0.56, "grad_norm": 1.4334455219775208, "learning_rate": 4.295654162900626e-06, "loss": 0.1932, "step": 6983 }, { "epoch": 0.56, "grad_norm": 1.174591000577947, "learning_rate": 4.29437160289357e-06, "loss": 0.1828, "step": 6984 }, { "epoch": 0.56, "grad_norm": 1.4484149257549188, "learning_rate": 4.293089090257484e-06, "loss": 0.1616, "step": 6985 }, { "epoch": 0.56, "grad_norm": 1.4359006713643834, "learning_rate": 4.29180662507847e-06, "loss": 0.1716, "step": 6986 }, { "epoch": 0.56, "grad_norm": 1.5546392124346744, "learning_rate": 4.290524207442621e-06, "loss": 0.1598, "step": 6987 }, { "epoch": 0.56, "grad_norm": 1.2801803014038105, "learning_rate": 4.289241837436036e-06, "loss": 0.1824, "step": 6988 }, { "epoch": 0.56, "grad_norm": 1.1898885197234932, "learning_rate": 4.287959515144796e-06, "loss": 0.1636, "step": 6989 }, { "epoch": 0.56, "grad_norm": 1.3472893795377605, "learning_rate": 4.286677240654993e-06, "loss": 0.1757, "step": 6990 }, { "epoch": 0.56, "grad_norm": 1.4512722993445515, "learning_rate": 4.285395014052707e-06, "loss": 0.1798, "step": 6991 }, { "epoch": 0.56, "grad_norm": 1.4854113372085438, "learning_rate": 4.28411283542402e-06, "loss": 0.2449, "step": 6992 }, { "epoch": 0.56, "grad_norm": 1.1713536800234112, "learning_rate": 4.282830704855008e-06, "loss": 0.1826, "step": 6993 }, { "epoch": 0.56, "grad_norm": 1.2877775016406923, "learning_rate": 4.281548622431741e-06, "loss": 0.167, "step": 6994 }, { "epoch": 0.56, "grad_norm": 1.2880159931247408, "learning_rate": 4.280266588240294e-06, "loss": 0.1991, "step": 6995 }, { "epoch": 0.56, "grad_norm": 1.2399922881753271, "learning_rate": 4.27898460236673e-06, "loss": 0.2155, "step": 6996 }, { "epoch": 0.56, "grad_norm": 1.3932948467073165, "learning_rate": 4.277702664897117e-06, "loss": 0.1929, "step": 6997 }, { "epoch": 0.56, "grad_norm": 1.272301501424295, "learning_rate": 4.27642077591751e-06, "loss": 0.1816, "step": 6998 }, { "epoch": 0.56, "grad_norm": 1.4366076533987884, "learning_rate": 4.27513893551397e-06, "loss": 0.1995, "step": 6999 }, { "epoch": 0.56, "grad_norm": 1.3474693807999927, "learning_rate": 4.27385714377255e-06, "loss": 0.192, "step": 7000 }, { "epoch": 0.56, "grad_norm": 1.3257592617600902, "learning_rate": 4.272575400779298e-06, "loss": 0.2066, "step": 7001 }, { "epoch": 0.56, "grad_norm": 1.1077017797789337, "learning_rate": 4.271293706620265e-06, "loss": 0.1563, "step": 7002 }, { "epoch": 0.56, "grad_norm": 5.719836949545226, "learning_rate": 4.270012061381492e-06, "loss": 0.5142, "step": 7003 }, { "epoch": 0.56, "grad_norm": 1.3454663428912361, "learning_rate": 4.268730465149024e-06, "loss": 0.1582, "step": 7004 }, { "epoch": 0.56, "grad_norm": 6.0936098196911415, "learning_rate": 4.267448918008892e-06, "loss": 0.4865, "step": 7005 }, { "epoch": 0.56, "grad_norm": 1.4557771354901219, "learning_rate": 4.266167420047136e-06, "loss": 0.202, "step": 7006 }, { "epoch": 0.56, "grad_norm": 1.3924029285186255, "learning_rate": 4.264885971349782e-06, "loss": 0.1816, "step": 7007 }, { "epoch": 0.56, "grad_norm": 1.332664448003593, "learning_rate": 4.263604572002863e-06, "loss": 0.1891, "step": 7008 }, { "epoch": 0.56, "grad_norm": 1.3502064968333063, "learning_rate": 4.262323222092399e-06, "loss": 0.1712, "step": 7009 }, { "epoch": 0.56, "grad_norm": 8.97225876758747, "learning_rate": 4.2610419217044115e-06, "loss": 0.4607, "step": 7010 }, { "epoch": 0.56, "grad_norm": 1.2914339009004334, "learning_rate": 4.25976067092492e-06, "loss": 0.1592, "step": 7011 }, { "epoch": 0.56, "grad_norm": 1.2548329272902767, "learning_rate": 4.2584794698399364e-06, "loss": 0.1751, "step": 7012 }, { "epoch": 0.56, "grad_norm": 1.4242878698417076, "learning_rate": 4.257198318535474e-06, "loss": 0.2037, "step": 7013 }, { "epoch": 0.56, "grad_norm": 1.3964214133138348, "learning_rate": 4.255917217097537e-06, "loss": 0.2058, "step": 7014 }, { "epoch": 0.56, "grad_norm": 1.2803677529839728, "learning_rate": 4.254636165612135e-06, "loss": 0.1515, "step": 7015 }, { "epoch": 0.56, "grad_norm": 1.4076245415062942, "learning_rate": 4.253355164165262e-06, "loss": 0.1716, "step": 7016 }, { "epoch": 0.56, "grad_norm": 1.2401166292813786, "learning_rate": 4.252074212842922e-06, "loss": 0.1662, "step": 7017 }, { "epoch": 0.56, "grad_norm": 1.2208282596730105, "learning_rate": 4.2507933117311055e-06, "loss": 0.1341, "step": 7018 }, { "epoch": 0.56, "grad_norm": 1.4909854122146164, "learning_rate": 4.249512460915802e-06, "loss": 0.1604, "step": 7019 }, { "epoch": 0.56, "grad_norm": 1.419998057773177, "learning_rate": 4.248231660483002e-06, "loss": 0.2213, "step": 7020 }, { "epoch": 0.56, "grad_norm": 1.3084865725366324, "learning_rate": 4.2469509105186884e-06, "loss": 0.1862, "step": 7021 }, { "epoch": 0.56, "grad_norm": 1.218506054439855, "learning_rate": 4.245670211108843e-06, "loss": 0.1776, "step": 7022 }, { "epoch": 0.56, "grad_norm": 1.2450991116548429, "learning_rate": 4.24438956233944e-06, "loss": 0.1815, "step": 7023 }, { "epoch": 0.56, "grad_norm": 1.516621515048516, "learning_rate": 4.2431089642964564e-06, "loss": 0.2309, "step": 7024 }, { "epoch": 0.56, "grad_norm": 1.2602599177120537, "learning_rate": 4.24182841706586e-06, "loss": 0.1947, "step": 7025 }, { "epoch": 0.56, "grad_norm": 1.364875354269461, "learning_rate": 4.240547920733622e-06, "loss": 0.152, "step": 7026 }, { "epoch": 0.56, "grad_norm": 1.2943959212217564, "learning_rate": 4.239267475385701e-06, "loss": 0.1955, "step": 7027 }, { "epoch": 0.56, "grad_norm": 10.375323779324813, "learning_rate": 4.2379870811080585e-06, "loss": 0.4726, "step": 7028 }, { "epoch": 0.56, "grad_norm": 1.2400978945827188, "learning_rate": 4.236706737986654e-06, "loss": 0.1652, "step": 7029 }, { "epoch": 0.56, "grad_norm": 1.419501598502113, "learning_rate": 4.235426446107437e-06, "loss": 0.1746, "step": 7030 }, { "epoch": 0.56, "grad_norm": 1.3570713962999794, "learning_rate": 4.2341462055563605e-06, "loss": 0.234, "step": 7031 }, { "epoch": 0.56, "grad_norm": 1.4746259743296237, "learning_rate": 4.2328660164193695e-06, "loss": 0.2509, "step": 7032 }, { "epoch": 0.56, "grad_norm": 1.2357181973315485, "learning_rate": 4.231585878782408e-06, "loss": 0.1859, "step": 7033 }, { "epoch": 0.56, "grad_norm": 9.006620903034838, "learning_rate": 4.230305792731411e-06, "loss": 0.4002, "step": 7034 }, { "epoch": 0.56, "grad_norm": 1.2816006095719912, "learning_rate": 4.229025758352322e-06, "loss": 0.2076, "step": 7035 }, { "epoch": 0.56, "grad_norm": 1.3689260346436294, "learning_rate": 4.227745775731071e-06, "loss": 0.2045, "step": 7036 }, { "epoch": 0.56, "grad_norm": 7.88186483104484, "learning_rate": 4.226465844953581e-06, "loss": 0.6176, "step": 7037 }, { "epoch": 0.56, "grad_norm": 1.3170562453134165, "learning_rate": 4.225185966105786e-06, "loss": 0.1458, "step": 7038 }, { "epoch": 0.56, "grad_norm": 1.4164123237210053, "learning_rate": 4.223906139273603e-06, "loss": 0.19, "step": 7039 }, { "epoch": 0.56, "grad_norm": 1.2660214247633335, "learning_rate": 4.2226263645429536e-06, "loss": 0.1776, "step": 7040 }, { "epoch": 0.56, "grad_norm": 1.3488838044225833, "learning_rate": 4.22134664199975e-06, "loss": 0.1777, "step": 7041 }, { "epoch": 0.56, "grad_norm": 1.2477248586884493, "learning_rate": 4.220066971729908e-06, "loss": 0.1672, "step": 7042 }, { "epoch": 0.56, "grad_norm": 1.210233556837474, "learning_rate": 4.218787353819331e-06, "loss": 0.1928, "step": 7043 }, { "epoch": 0.56, "grad_norm": 1.5574468566890245, "learning_rate": 4.217507788353927e-06, "loss": 0.2522, "step": 7044 }, { "epoch": 0.56, "grad_norm": 1.4767334596986261, "learning_rate": 4.216228275419598e-06, "loss": 0.1757, "step": 7045 }, { "epoch": 0.56, "grad_norm": 1.0810576555502336, "learning_rate": 4.214948815102237e-06, "loss": 0.1637, "step": 7046 }, { "epoch": 0.56, "grad_norm": 7.865566914567689, "learning_rate": 4.213669407487741e-06, "loss": 0.4762, "step": 7047 }, { "epoch": 0.56, "grad_norm": 1.2631110663073912, "learning_rate": 4.212390052661999e-06, "loss": 0.1766, "step": 7048 }, { "epoch": 0.56, "grad_norm": 1.2462824563629258, "learning_rate": 4.2111107507109025e-06, "loss": 0.1685, "step": 7049 }, { "epoch": 0.56, "grad_norm": 8.495245549531543, "learning_rate": 4.209831501720328e-06, "loss": 0.6785, "step": 7050 }, { "epoch": 0.56, "grad_norm": 1.1862416029389309, "learning_rate": 4.208552305776162e-06, "loss": 0.1693, "step": 7051 }, { "epoch": 0.56, "grad_norm": 1.410656178955726, "learning_rate": 4.207273162964274e-06, "loss": 0.1863, "step": 7052 }, { "epoch": 0.56, "grad_norm": 1.1858643107273845, "learning_rate": 4.205994073370545e-06, "loss": 0.1609, "step": 7053 }, { "epoch": 0.56, "grad_norm": 1.4130544533361011, "learning_rate": 4.2047150370808366e-06, "loss": 0.2121, "step": 7054 }, { "epoch": 0.56, "grad_norm": 1.2534465938727777, "learning_rate": 4.203436054181017e-06, "loss": 0.1495, "step": 7055 }, { "epoch": 0.56, "grad_norm": 1.4031607436177427, "learning_rate": 4.202157124756951e-06, "loss": 0.2121, "step": 7056 }, { "epoch": 0.56, "grad_norm": 1.3183160861657968, "learning_rate": 4.200878248894493e-06, "loss": 0.2265, "step": 7057 }, { "epoch": 0.56, "grad_norm": 1.4880532670727866, "learning_rate": 4.199599426679499e-06, "loss": 0.213, "step": 7058 }, { "epoch": 0.56, "grad_norm": 1.4103992586051797, "learning_rate": 4.198320658197821e-06, "loss": 0.212, "step": 7059 }, { "epoch": 0.56, "grad_norm": 1.4274076082759222, "learning_rate": 4.197041943535307e-06, "loss": 0.1871, "step": 7060 }, { "epoch": 0.56, "grad_norm": 1.4104829129971896, "learning_rate": 4.1957632827778e-06, "loss": 0.2057, "step": 7061 }, { "epoch": 0.56, "grad_norm": 1.2693657613305573, "learning_rate": 4.1944846760111395e-06, "loss": 0.2021, "step": 7062 }, { "epoch": 0.57, "grad_norm": 1.41727602523015, "learning_rate": 4.1932061233211655e-06, "loss": 0.1916, "step": 7063 }, { "epoch": 0.57, "grad_norm": 1.4407651063867084, "learning_rate": 4.191927624793705e-06, "loss": 0.2206, "step": 7064 }, { "epoch": 0.57, "grad_norm": 1.4052147815000997, "learning_rate": 4.190649180514595e-06, "loss": 0.2218, "step": 7065 }, { "epoch": 0.57, "grad_norm": 7.533223574273064, "learning_rate": 4.189370790569655e-06, "loss": 0.6424, "step": 7066 }, { "epoch": 0.57, "grad_norm": 1.2892464479098074, "learning_rate": 4.188092455044713e-06, "loss": 0.1586, "step": 7067 }, { "epoch": 0.57, "grad_norm": 1.448601912956667, "learning_rate": 4.186814174025582e-06, "loss": 0.1728, "step": 7068 }, { "epoch": 0.57, "grad_norm": 1.4520893250336793, "learning_rate": 4.185535947598081e-06, "loss": 0.1886, "step": 7069 }, { "epoch": 0.57, "grad_norm": 1.2475585712879984, "learning_rate": 4.18425777584802e-06, "loss": 0.1564, "step": 7070 }, { "epoch": 0.57, "grad_norm": 5.7682966964681, "learning_rate": 4.182979658861204e-06, "loss": 0.6638, "step": 7071 }, { "epoch": 0.57, "grad_norm": 1.4924274224976382, "learning_rate": 4.18170159672344e-06, "loss": 0.169, "step": 7072 }, { "epoch": 0.57, "grad_norm": 1.4082240650346431, "learning_rate": 4.180423589520526e-06, "loss": 0.1756, "step": 7073 }, { "epoch": 0.57, "grad_norm": 1.3158201895127204, "learning_rate": 4.179145637338262e-06, "loss": 0.1723, "step": 7074 }, { "epoch": 0.57, "grad_norm": 1.4436627337464247, "learning_rate": 4.177867740262437e-06, "loss": 0.2328, "step": 7075 }, { "epoch": 0.57, "grad_norm": 1.3763500700279003, "learning_rate": 4.176589898378843e-06, "loss": 0.1465, "step": 7076 }, { "epoch": 0.57, "grad_norm": 1.5612885961219398, "learning_rate": 4.175312111773261e-06, "loss": 0.2105, "step": 7077 }, { "epoch": 0.57, "grad_norm": 1.3852514398412912, "learning_rate": 4.1740343805314776e-06, "loss": 0.1951, "step": 7078 }, { "epoch": 0.57, "grad_norm": 1.353553475531793, "learning_rate": 4.17275670473927e-06, "loss": 0.1679, "step": 7079 }, { "epoch": 0.57, "grad_norm": 1.409360648745272, "learning_rate": 4.171479084482408e-06, "loss": 0.2005, "step": 7080 }, { "epoch": 0.57, "grad_norm": 1.1911695971613676, "learning_rate": 4.1702015198466675e-06, "loss": 0.1786, "step": 7081 }, { "epoch": 0.57, "grad_norm": 5.9542908327631725, "learning_rate": 4.168924010917812e-06, "loss": 0.571, "step": 7082 }, { "epoch": 0.57, "grad_norm": 1.1497839165226071, "learning_rate": 4.167646557781608e-06, "loss": 0.1231, "step": 7083 }, { "epoch": 0.57, "grad_norm": 1.3351486249153444, "learning_rate": 4.166369160523811e-06, "loss": 0.1958, "step": 7084 }, { "epoch": 0.57, "grad_norm": 1.3848677872586626, "learning_rate": 4.165091819230178e-06, "loss": 0.2093, "step": 7085 }, { "epoch": 0.57, "grad_norm": 1.4968563556603347, "learning_rate": 4.16381453398646e-06, "loss": 0.2019, "step": 7086 }, { "epoch": 0.57, "grad_norm": 1.4847521307670821, "learning_rate": 4.162537304878408e-06, "loss": 0.185, "step": 7087 }, { "epoch": 0.57, "grad_norm": 1.3418719908094174, "learning_rate": 4.1612601319917635e-06, "loss": 0.1816, "step": 7088 }, { "epoch": 0.57, "grad_norm": 1.2478757878901254, "learning_rate": 4.159983015412266e-06, "loss": 0.1644, "step": 7089 }, { "epoch": 0.57, "grad_norm": 1.5087309612078421, "learning_rate": 4.1587059552256566e-06, "loss": 0.1914, "step": 7090 }, { "epoch": 0.57, "grad_norm": 1.3795125220343416, "learning_rate": 4.157428951517662e-06, "loss": 0.1999, "step": 7091 }, { "epoch": 0.57, "grad_norm": 1.3783564072433927, "learning_rate": 4.156152004374018e-06, "loss": 0.163, "step": 7092 }, { "epoch": 0.57, "grad_norm": 1.3975036601694872, "learning_rate": 4.154875113880444e-06, "loss": 0.1996, "step": 7093 }, { "epoch": 0.57, "grad_norm": 5.226472394631826, "learning_rate": 4.1535982801226665e-06, "loss": 0.5928, "step": 7094 }, { "epoch": 0.57, "grad_norm": 7.390939995390869, "learning_rate": 4.152321503186399e-06, "loss": 0.4105, "step": 7095 }, { "epoch": 0.57, "grad_norm": 1.183662954790662, "learning_rate": 4.1510447831573585e-06, "loss": 0.1795, "step": 7096 }, { "epoch": 0.57, "grad_norm": 1.269219912067945, "learning_rate": 4.149768120121255e-06, "loss": 0.1534, "step": 7097 }, { "epoch": 0.57, "grad_norm": 1.244767844828059, "learning_rate": 4.148491514163791e-06, "loss": 0.1985, "step": 7098 }, { "epoch": 0.57, "grad_norm": 1.430119963557811, "learning_rate": 4.147214965370674e-06, "loss": 0.1861, "step": 7099 }, { "epoch": 0.57, "grad_norm": 1.4142096833252733, "learning_rate": 4.145938473827598e-06, "loss": 0.1983, "step": 7100 }, { "epoch": 0.57, "grad_norm": 1.2768083412801121, "learning_rate": 4.144662039620263e-06, "loss": 0.173, "step": 7101 }, { "epoch": 0.57, "grad_norm": 1.3002099585512168, "learning_rate": 4.143385662834354e-06, "loss": 0.1818, "step": 7102 }, { "epoch": 0.57, "grad_norm": 1.2994922290986646, "learning_rate": 4.142109343555562e-06, "loss": 0.1636, "step": 7103 }, { "epoch": 0.57, "grad_norm": 1.2745952938425549, "learning_rate": 4.1408330818695685e-06, "loss": 0.1895, "step": 7104 }, { "epoch": 0.57, "grad_norm": 1.5745945557139085, "learning_rate": 4.139556877862055e-06, "loss": 0.24, "step": 7105 }, { "epoch": 0.57, "grad_norm": 1.5083353817236569, "learning_rate": 4.138280731618694e-06, "loss": 0.2418, "step": 7106 }, { "epoch": 0.57, "grad_norm": 1.3244041117760628, "learning_rate": 4.137004643225158e-06, "loss": 0.1869, "step": 7107 }, { "epoch": 0.57, "grad_norm": 1.4102245231239432, "learning_rate": 4.135728612767117e-06, "loss": 0.1776, "step": 7108 }, { "epoch": 0.57, "grad_norm": 1.2312525690469984, "learning_rate": 4.134452640330231e-06, "loss": 0.1826, "step": 7109 }, { "epoch": 0.57, "grad_norm": 1.3731248695167848, "learning_rate": 4.133176726000163e-06, "loss": 0.1952, "step": 7110 }, { "epoch": 0.57, "grad_norm": 1.2711694566215608, "learning_rate": 4.131900869862566e-06, "loss": 0.1777, "step": 7111 }, { "epoch": 0.57, "grad_norm": 1.3142604154496367, "learning_rate": 4.130625072003096e-06, "loss": 0.1797, "step": 7112 }, { "epoch": 0.57, "grad_norm": 6.532066756912865, "learning_rate": 4.1293493325073975e-06, "loss": 0.6115, "step": 7113 }, { "epoch": 0.57, "grad_norm": 1.2076710368340655, "learning_rate": 4.128073651461116e-06, "loss": 0.1663, "step": 7114 }, { "epoch": 0.57, "grad_norm": 1.1908681142964177, "learning_rate": 4.126798028949894e-06, "loss": 0.1651, "step": 7115 }, { "epoch": 0.57, "grad_norm": 1.3945835314930477, "learning_rate": 4.1255224650593645e-06, "loss": 0.2007, "step": 7116 }, { "epoch": 0.57, "grad_norm": 1.4869429437352748, "learning_rate": 4.124246959875162e-06, "loss": 0.2196, "step": 7117 }, { "epoch": 0.57, "grad_norm": 1.3246297788794583, "learning_rate": 4.1229715134829135e-06, "loss": 0.1942, "step": 7118 }, { "epoch": 0.57, "grad_norm": 1.141957665213491, "learning_rate": 4.121696125968247e-06, "loss": 0.1692, "step": 7119 }, { "epoch": 0.57, "grad_norm": 1.4959309780197632, "learning_rate": 4.120420797416777e-06, "loss": 0.1967, "step": 7120 }, { "epoch": 0.57, "grad_norm": 1.1784984704479655, "learning_rate": 4.119145527914127e-06, "loss": 0.1731, "step": 7121 }, { "epoch": 0.57, "grad_norm": 1.3004888118672955, "learning_rate": 4.1178703175459074e-06, "loss": 0.1843, "step": 7122 }, { "epoch": 0.57, "grad_norm": 1.1552866776476427, "learning_rate": 4.116595166397722e-06, "loss": 0.1719, "step": 7123 }, { "epoch": 0.57, "grad_norm": 1.2806797992837116, "learning_rate": 4.1153200745551835e-06, "loss": 0.1725, "step": 7124 }, { "epoch": 0.57, "grad_norm": 1.4054384204147619, "learning_rate": 4.1140450421038865e-06, "loss": 0.219, "step": 7125 }, { "epoch": 0.57, "grad_norm": 1.2787984463052422, "learning_rate": 4.112770069129431e-06, "loss": 0.183, "step": 7126 }, { "epoch": 0.57, "grad_norm": 1.306066311441043, "learning_rate": 4.111495155717409e-06, "loss": 0.1734, "step": 7127 }, { "epoch": 0.57, "grad_norm": 1.2590250408511354, "learning_rate": 4.11022030195341e-06, "loss": 0.177, "step": 7128 }, { "epoch": 0.57, "grad_norm": 1.4417645248348994, "learning_rate": 4.108945507923017e-06, "loss": 0.1989, "step": 7129 }, { "epoch": 0.57, "grad_norm": 1.1198982616013609, "learning_rate": 4.107670773711812e-06, "loss": 0.1543, "step": 7130 }, { "epoch": 0.57, "grad_norm": 1.3014226841351553, "learning_rate": 4.106396099405373e-06, "loss": 0.1667, "step": 7131 }, { "epoch": 0.57, "grad_norm": 1.5295199405581843, "learning_rate": 4.1051214850892694e-06, "loss": 0.1713, "step": 7132 }, { "epoch": 0.57, "grad_norm": 1.494564081114908, "learning_rate": 4.103846930849073e-06, "loss": 0.1962, "step": 7133 }, { "epoch": 0.57, "grad_norm": 1.4465061374527886, "learning_rate": 4.102572436770346e-06, "loss": 0.2053, "step": 7134 }, { "epoch": 0.57, "grad_norm": 1.3721712143392322, "learning_rate": 4.101298002938653e-06, "loss": 0.2523, "step": 7135 }, { "epoch": 0.57, "grad_norm": 1.2682682117900668, "learning_rate": 4.1000236294395455e-06, "loss": 0.2146, "step": 7136 }, { "epoch": 0.57, "grad_norm": 1.270931477161842, "learning_rate": 4.0987493163585795e-06, "loss": 0.2101, "step": 7137 }, { "epoch": 0.57, "grad_norm": 1.2647219112463088, "learning_rate": 4.097475063781302e-06, "loss": 0.1659, "step": 7138 }, { "epoch": 0.57, "grad_norm": 1.706494840582202, "learning_rate": 4.09620087179326e-06, "loss": 0.1803, "step": 7139 }, { "epoch": 0.57, "grad_norm": 1.2066537738883378, "learning_rate": 4.094926740479991e-06, "loss": 0.1763, "step": 7140 }, { "epoch": 0.57, "grad_norm": 1.3781641249871743, "learning_rate": 4.09365266992703e-06, "loss": 0.1967, "step": 7141 }, { "epoch": 0.57, "grad_norm": 1.439782858536694, "learning_rate": 4.092378660219914e-06, "loss": 0.1703, "step": 7142 }, { "epoch": 0.57, "grad_norm": 6.687432764947525, "learning_rate": 4.0911047114441675e-06, "loss": 0.5043, "step": 7143 }, { "epoch": 0.57, "grad_norm": 1.3210832283737193, "learning_rate": 4.089830823685316e-06, "loss": 0.2099, "step": 7144 }, { "epoch": 0.57, "grad_norm": 1.2916430816375275, "learning_rate": 4.088556997028878e-06, "loss": 0.1538, "step": 7145 }, { "epoch": 0.57, "grad_norm": 1.343492408637802, "learning_rate": 4.087283231560371e-06, "loss": 0.1671, "step": 7146 }, { "epoch": 0.57, "grad_norm": 1.3766599620367632, "learning_rate": 4.086009527365306e-06, "loss": 0.1826, "step": 7147 }, { "epoch": 0.57, "grad_norm": 1.40180586715717, "learning_rate": 4.084735884529191e-06, "loss": 0.1984, "step": 7148 }, { "epoch": 0.57, "grad_norm": 1.3478697507644612, "learning_rate": 4.08346230313753e-06, "loss": 0.1944, "step": 7149 }, { "epoch": 0.57, "grad_norm": 1.3804928364443272, "learning_rate": 4.08218878327582e-06, "loss": 0.1809, "step": 7150 }, { "epoch": 0.57, "grad_norm": 1.2019422840099405, "learning_rate": 4.080915325029559e-06, "loss": 0.177, "step": 7151 }, { "epoch": 0.57, "grad_norm": 1.3947795096219517, "learning_rate": 4.0796419284842355e-06, "loss": 0.1914, "step": 7152 }, { "epoch": 0.57, "grad_norm": 9.759258089829613, "learning_rate": 4.078368593725339e-06, "loss": 0.574, "step": 7153 }, { "epoch": 0.57, "grad_norm": 1.5091467172058228, "learning_rate": 4.077095320838351e-06, "loss": 0.1784, "step": 7154 }, { "epoch": 0.57, "grad_norm": 1.7024569264346199, "learning_rate": 4.07582210990875e-06, "loss": 0.2042, "step": 7155 }, { "epoch": 0.57, "grad_norm": 1.5937428405681713, "learning_rate": 4.074548961022012e-06, "loss": 0.1941, "step": 7156 }, { "epoch": 0.57, "grad_norm": 1.2509277229114883, "learning_rate": 4.073275874263606e-06, "loss": 0.1531, "step": 7157 }, { "epoch": 0.57, "grad_norm": 1.3856597485476003, "learning_rate": 4.072002849718998e-06, "loss": 0.2308, "step": 7158 }, { "epoch": 0.57, "grad_norm": 1.3277721935626268, "learning_rate": 4.07072988747365e-06, "loss": 0.1697, "step": 7159 }, { "epoch": 0.57, "grad_norm": 7.9030577571300284, "learning_rate": 4.069456987613022e-06, "loss": 0.5279, "step": 7160 }, { "epoch": 0.57, "grad_norm": 1.2070186202854654, "learning_rate": 4.068184150222564e-06, "loss": 0.1834, "step": 7161 }, { "epoch": 0.57, "grad_norm": 1.3225249978663933, "learning_rate": 4.066911375387728e-06, "loss": 0.1868, "step": 7162 }, { "epoch": 0.57, "grad_norm": 1.4078303787036004, "learning_rate": 4.065638663193957e-06, "loss": 0.1971, "step": 7163 }, { "epoch": 0.57, "grad_norm": 1.472866445248734, "learning_rate": 4.064366013726695e-06, "loss": 0.1926, "step": 7164 }, { "epoch": 0.57, "grad_norm": 1.4199919415901312, "learning_rate": 4.063093427071376e-06, "loss": 0.2183, "step": 7165 }, { "epoch": 0.57, "grad_norm": 36.08454216215092, "learning_rate": 4.061820903313437e-06, "loss": 0.595, "step": 7166 }, { "epoch": 0.57, "grad_norm": 1.4274507379174581, "learning_rate": 4.060548442538301e-06, "loss": 0.2156, "step": 7167 }, { "epoch": 0.57, "grad_norm": 1.3184223209217552, "learning_rate": 4.059276044831394e-06, "loss": 0.2018, "step": 7168 }, { "epoch": 0.57, "grad_norm": 1.5204527125321803, "learning_rate": 4.0580037102781386e-06, "loss": 0.2179, "step": 7169 }, { "epoch": 0.57, "grad_norm": 1.3466677998238636, "learning_rate": 4.056731438963947e-06, "loss": 0.194, "step": 7170 }, { "epoch": 0.57, "grad_norm": 1.5423211441548683, "learning_rate": 4.055459230974232e-06, "loss": 0.2367, "step": 7171 }, { "epoch": 0.57, "grad_norm": 1.2106235090400705, "learning_rate": 4.0541870863944e-06, "loss": 0.1827, "step": 7172 }, { "epoch": 0.57, "grad_norm": 1.5400957963639583, "learning_rate": 4.0529150053098555e-06, "loss": 0.2008, "step": 7173 }, { "epoch": 0.57, "grad_norm": 1.2964735070603133, "learning_rate": 4.0516429878059955e-06, "loss": 0.2112, "step": 7174 }, { "epoch": 0.57, "grad_norm": 1.230011040644138, "learning_rate": 4.050371033968216e-06, "loss": 0.1728, "step": 7175 }, { "epoch": 0.57, "grad_norm": 1.4800379876456038, "learning_rate": 4.049099143881907e-06, "loss": 0.1993, "step": 7176 }, { "epoch": 0.57, "grad_norm": 1.1961287070366364, "learning_rate": 4.047827317632452e-06, "loss": 0.2237, "step": 7177 }, { "epoch": 0.57, "grad_norm": 1.2225442732621181, "learning_rate": 4.0465555553052344e-06, "loss": 0.1735, "step": 7178 }, { "epoch": 0.57, "grad_norm": 7.974183862850482, "learning_rate": 4.04528385698563e-06, "loss": 0.5976, "step": 7179 }, { "epoch": 0.57, "grad_norm": 1.4847414068981177, "learning_rate": 4.044012222759016e-06, "loss": 0.1917, "step": 7180 }, { "epoch": 0.57, "grad_norm": 1.2039028716992002, "learning_rate": 4.0427406527107554e-06, "loss": 0.1461, "step": 7181 }, { "epoch": 0.57, "grad_norm": 1.4164210169001747, "learning_rate": 4.041469146926216e-06, "loss": 0.1942, "step": 7182 }, { "epoch": 0.57, "grad_norm": 1.314132299011415, "learning_rate": 4.0401977054907585e-06, "loss": 0.1785, "step": 7183 }, { "epoch": 0.57, "grad_norm": 16.712991586046304, "learning_rate": 4.038926328489735e-06, "loss": 0.5571, "step": 7184 }, { "epoch": 0.57, "grad_norm": 1.2727397348308978, "learning_rate": 4.0376550160085e-06, "loss": 0.1724, "step": 7185 }, { "epoch": 0.57, "grad_norm": 1.4778044335377742, "learning_rate": 4.0363837681323984e-06, "loss": 0.1785, "step": 7186 }, { "epoch": 0.57, "grad_norm": 1.5306149605175132, "learning_rate": 4.035112584946776e-06, "loss": 0.194, "step": 7187 }, { "epoch": 0.58, "grad_norm": 1.3353237263199496, "learning_rate": 4.033841466536968e-06, "loss": 0.2045, "step": 7188 }, { "epoch": 0.58, "grad_norm": 1.4915665948339425, "learning_rate": 4.0325704129883095e-06, "loss": 0.1889, "step": 7189 }, { "epoch": 0.58, "grad_norm": 1.3231406288773528, "learning_rate": 4.0312994243861304e-06, "loss": 0.179, "step": 7190 }, { "epoch": 0.58, "grad_norm": 1.2859822603240152, "learning_rate": 4.030028500815757e-06, "loss": 0.1545, "step": 7191 }, { "epoch": 0.58, "grad_norm": 1.1822528162203252, "learning_rate": 4.0287576423625095e-06, "loss": 0.1932, "step": 7192 }, { "epoch": 0.58, "grad_norm": 1.1917582617478444, "learning_rate": 4.027486849111702e-06, "loss": 0.1699, "step": 7193 }, { "epoch": 0.58, "grad_norm": 1.5079727696296428, "learning_rate": 4.026216121148652e-06, "loss": 0.2038, "step": 7194 }, { "epoch": 0.58, "grad_norm": 1.3085550863427062, "learning_rate": 4.024945458558661e-06, "loss": 0.1808, "step": 7195 }, { "epoch": 0.58, "grad_norm": 1.470159106747156, "learning_rate": 4.023674861427038e-06, "loss": 0.2059, "step": 7196 }, { "epoch": 0.58, "grad_norm": 1.367049874024757, "learning_rate": 4.022404329839077e-06, "loss": 0.209, "step": 7197 }, { "epoch": 0.58, "grad_norm": 1.4336436737776328, "learning_rate": 4.021133863880079e-06, "loss": 0.2264, "step": 7198 }, { "epoch": 0.58, "grad_norm": 1.5652711500456202, "learning_rate": 4.019863463635328e-06, "loss": 0.226, "step": 7199 }, { "epoch": 0.58, "grad_norm": 13.266973465291466, "learning_rate": 4.018593129190113e-06, "loss": 0.4666, "step": 7200 }, { "epoch": 0.58, "grad_norm": 6.211890274683438, "learning_rate": 4.0173228606297165e-06, "loss": 0.4664, "step": 7201 }, { "epoch": 0.58, "grad_norm": 1.29203987956892, "learning_rate": 4.016052658039411e-06, "loss": 0.1973, "step": 7202 }, { "epoch": 0.58, "grad_norm": 1.412956216007654, "learning_rate": 4.014782521504473e-06, "loss": 0.172, "step": 7203 }, { "epoch": 0.58, "grad_norm": 1.1547722489529157, "learning_rate": 4.013512451110169e-06, "loss": 0.1448, "step": 7204 }, { "epoch": 0.58, "grad_norm": 1.3855161037256063, "learning_rate": 4.012242446941765e-06, "loss": 0.219, "step": 7205 }, { "epoch": 0.58, "grad_norm": 1.3583219849609947, "learning_rate": 4.010972509084514e-06, "loss": 0.1698, "step": 7206 }, { "epoch": 0.58, "grad_norm": 6.701926580429876, "learning_rate": 4.00970263762368e-06, "loss": 0.6528, "step": 7207 }, { "epoch": 0.58, "grad_norm": 7.257386964436387, "learning_rate": 4.008432832644505e-06, "loss": 0.754, "step": 7208 }, { "epoch": 0.58, "grad_norm": 1.2217689457375813, "learning_rate": 4.00716309423224e-06, "loss": 0.1953, "step": 7209 }, { "epoch": 0.58, "grad_norm": 1.3542655307330598, "learning_rate": 4.005893422472125e-06, "loss": 0.1663, "step": 7210 }, { "epoch": 0.58, "grad_norm": 1.3815808893498447, "learning_rate": 4.004623817449395e-06, "loss": 0.1818, "step": 7211 }, { "epoch": 0.58, "grad_norm": 1.351319682743735, "learning_rate": 4.0033542792492855e-06, "loss": 0.2107, "step": 7212 }, { "epoch": 0.58, "grad_norm": 1.2566727843314711, "learning_rate": 4.002084807957021e-06, "loss": 0.1937, "step": 7213 }, { "epoch": 0.58, "grad_norm": 1.2135643715306499, "learning_rate": 4.0008154036578295e-06, "loss": 0.1648, "step": 7214 }, { "epoch": 0.58, "grad_norm": 1.2706846039476762, "learning_rate": 3.9995460664369254e-06, "loss": 0.1617, "step": 7215 }, { "epoch": 0.58, "grad_norm": 1.4484983551075723, "learning_rate": 3.998276796379526e-06, "loss": 0.2387, "step": 7216 }, { "epoch": 0.58, "grad_norm": 1.3907165236871228, "learning_rate": 3.997007593570839e-06, "loss": 0.2108, "step": 7217 }, { "epoch": 0.58, "grad_norm": 5.494295581784815, "learning_rate": 3.995738458096074e-06, "loss": 0.5456, "step": 7218 }, { "epoch": 0.58, "grad_norm": 1.2814808353939966, "learning_rate": 3.994469390040428e-06, "loss": 0.2003, "step": 7219 }, { "epoch": 0.58, "grad_norm": 1.4836780479030092, "learning_rate": 3.993200389489096e-06, "loss": 0.2324, "step": 7220 }, { "epoch": 0.58, "grad_norm": 1.2133254981524315, "learning_rate": 3.991931456527276e-06, "loss": 0.1787, "step": 7221 }, { "epoch": 0.58, "grad_norm": 1.2024494787929823, "learning_rate": 3.990662591240148e-06, "loss": 0.1815, "step": 7222 }, { "epoch": 0.58, "grad_norm": 1.3462095973212893, "learning_rate": 3.989393793712901e-06, "loss": 0.2104, "step": 7223 }, { "epoch": 0.58, "grad_norm": 7.245980330866405, "learning_rate": 3.9881250640307095e-06, "loss": 0.5851, "step": 7224 }, { "epoch": 0.58, "grad_norm": 6.70998629330086, "learning_rate": 3.98685640227875e-06, "loss": 0.5983, "step": 7225 }, { "epoch": 0.58, "grad_norm": 1.3110779427085042, "learning_rate": 3.985587808542188e-06, "loss": 0.1839, "step": 7226 }, { "epoch": 0.58, "grad_norm": 1.3044678919267945, "learning_rate": 3.98431928290619e-06, "loss": 0.1689, "step": 7227 }, { "epoch": 0.58, "grad_norm": 8.115639521634886, "learning_rate": 3.983050825455919e-06, "loss": 0.45, "step": 7228 }, { "epoch": 0.58, "grad_norm": 1.2136124632940601, "learning_rate": 3.981782436276523e-06, "loss": 0.1416, "step": 7229 }, { "epoch": 0.58, "grad_norm": 1.1596483271668667, "learning_rate": 3.98051411545316e-06, "loss": 0.1614, "step": 7230 }, { "epoch": 0.58, "grad_norm": 1.2485450499440605, "learning_rate": 3.979245863070971e-06, "loss": 0.1639, "step": 7231 }, { "epoch": 0.58, "grad_norm": 1.2677789447725334, "learning_rate": 3.977977679215103e-06, "loss": 0.1812, "step": 7232 }, { "epoch": 0.58, "grad_norm": 1.2780789794305005, "learning_rate": 3.9767095639706875e-06, "loss": 0.1652, "step": 7233 }, { "epoch": 0.58, "grad_norm": 1.369279145262741, "learning_rate": 3.975441517422861e-06, "loss": 0.1715, "step": 7234 }, { "epoch": 0.58, "grad_norm": 1.314966166574355, "learning_rate": 3.974173539656747e-06, "loss": 0.1867, "step": 7235 }, { "epoch": 0.58, "grad_norm": 1.329478524343141, "learning_rate": 3.972905630757475e-06, "loss": 0.2423, "step": 7236 }, { "epoch": 0.58, "grad_norm": 1.5870198651267564, "learning_rate": 3.9716377908101585e-06, "loss": 0.2551, "step": 7237 }, { "epoch": 0.58, "grad_norm": 1.4593353090742065, "learning_rate": 3.9703700198999114e-06, "loss": 0.1982, "step": 7238 }, { "epoch": 0.58, "grad_norm": 1.1610521933801066, "learning_rate": 3.969102318111848e-06, "loss": 0.1674, "step": 7239 }, { "epoch": 0.58, "grad_norm": 1.3747922249590092, "learning_rate": 3.967834685531066e-06, "loss": 0.1909, "step": 7240 }, { "epoch": 0.58, "grad_norm": 1.2550429461544248, "learning_rate": 3.966567122242671e-06, "loss": 0.1551, "step": 7241 }, { "epoch": 0.58, "grad_norm": 1.38659900582652, "learning_rate": 3.965299628331755e-06, "loss": 0.1892, "step": 7242 }, { "epoch": 0.58, "grad_norm": 1.3723044564517897, "learning_rate": 3.964032203883411e-06, "loss": 0.2046, "step": 7243 }, { "epoch": 0.58, "grad_norm": 1.1091939809357614, "learning_rate": 3.962764848982724e-06, "loss": 0.1493, "step": 7244 }, { "epoch": 0.58, "grad_norm": 1.2813879385188385, "learning_rate": 3.961497563714774e-06, "loss": 0.2185, "step": 7245 }, { "epoch": 0.58, "grad_norm": 1.3824616647924133, "learning_rate": 3.960230348164641e-06, "loss": 0.198, "step": 7246 }, { "epoch": 0.58, "grad_norm": 1.2879046942682892, "learning_rate": 3.958963202417393e-06, "loss": 0.1956, "step": 7247 }, { "epoch": 0.58, "grad_norm": 1.211005352708064, "learning_rate": 3.9576961265581015e-06, "loss": 0.1974, "step": 7248 }, { "epoch": 0.58, "grad_norm": 1.3470318334291898, "learning_rate": 3.9564291206718256e-06, "loss": 0.1899, "step": 7249 }, { "epoch": 0.58, "grad_norm": 1.4477895981839048, "learning_rate": 3.955162184843625e-06, "loss": 0.1957, "step": 7250 }, { "epoch": 0.58, "grad_norm": 1.4987139836942087, "learning_rate": 3.953895319158552e-06, "loss": 0.1997, "step": 7251 }, { "epoch": 0.58, "grad_norm": 1.4833305313701641, "learning_rate": 3.952628523701658e-06, "loss": 0.2362, "step": 7252 }, { "epoch": 0.58, "grad_norm": 1.4129279648189654, "learning_rate": 3.9513617985579835e-06, "loss": 0.1699, "step": 7253 }, { "epoch": 0.58, "grad_norm": 1.5007501820886517, "learning_rate": 3.9500951438125675e-06, "loss": 0.2394, "step": 7254 }, { "epoch": 0.58, "grad_norm": 1.4333423660478632, "learning_rate": 3.948828559550448e-06, "loss": 0.1958, "step": 7255 }, { "epoch": 0.58, "grad_norm": 1.3374767923072521, "learning_rate": 3.94756204585665e-06, "loss": 0.1834, "step": 7256 }, { "epoch": 0.58, "grad_norm": 1.3712144727403748, "learning_rate": 3.9462956028162015e-06, "loss": 0.2029, "step": 7257 }, { "epoch": 0.58, "grad_norm": 1.3796075899174916, "learning_rate": 3.945029230514121e-06, "loss": 0.1987, "step": 7258 }, { "epoch": 0.58, "grad_norm": 1.3833419033480725, "learning_rate": 3.943762929035427e-06, "loss": 0.1609, "step": 7259 }, { "epoch": 0.58, "grad_norm": 16.861947351501957, "learning_rate": 3.942496698465125e-06, "loss": 0.4959, "step": 7260 }, { "epoch": 0.58, "grad_norm": 1.1904953967903564, "learning_rate": 3.941230538888225e-06, "loss": 0.2039, "step": 7261 }, { "epoch": 0.58, "grad_norm": 1.5791500606135864, "learning_rate": 3.939964450389728e-06, "loss": 0.1495, "step": 7262 }, { "epoch": 0.58, "grad_norm": 1.2502932460711642, "learning_rate": 3.9386984330546275e-06, "loss": 0.1989, "step": 7263 }, { "epoch": 0.58, "grad_norm": 1.3030336865069205, "learning_rate": 3.937432486967917e-06, "loss": 0.2172, "step": 7264 }, { "epoch": 0.58, "grad_norm": 1.305623944447648, "learning_rate": 3.936166612214583e-06, "loss": 0.1938, "step": 7265 }, { "epoch": 0.58, "grad_norm": 1.5583041322604052, "learning_rate": 3.93490080887961e-06, "loss": 0.1913, "step": 7266 }, { "epoch": 0.58, "grad_norm": 1.408221927955319, "learning_rate": 3.93363507704797e-06, "loss": 0.2101, "step": 7267 }, { "epoch": 0.58, "grad_norm": 5.756777031566555, "learning_rate": 3.932369416804639e-06, "loss": 0.6343, "step": 7268 }, { "epoch": 0.58, "grad_norm": 1.2643221031309184, "learning_rate": 3.9311038282345835e-06, "loss": 0.1828, "step": 7269 }, { "epoch": 0.58, "grad_norm": 1.3947445559094458, "learning_rate": 3.9298383114227675e-06, "loss": 0.1716, "step": 7270 }, { "epoch": 0.58, "grad_norm": 1.366569760486067, "learning_rate": 3.928572866454149e-06, "loss": 0.216, "step": 7271 }, { "epoch": 0.58, "grad_norm": 1.2084717731202264, "learning_rate": 3.927307493413678e-06, "loss": 0.1623, "step": 7272 }, { "epoch": 0.58, "grad_norm": 1.3309071308044624, "learning_rate": 3.926042192386307e-06, "loss": 0.1766, "step": 7273 }, { "epoch": 0.58, "grad_norm": 5.96154928890973, "learning_rate": 3.924776963456976e-06, "loss": 0.4559, "step": 7274 }, { "epoch": 0.58, "grad_norm": 1.3408256639531484, "learning_rate": 3.9235118067106255e-06, "loss": 0.2199, "step": 7275 }, { "epoch": 0.58, "grad_norm": 1.2296060003888072, "learning_rate": 3.922246722232189e-06, "loss": 0.1989, "step": 7276 }, { "epoch": 0.58, "grad_norm": 1.3739498120185585, "learning_rate": 3.9209817101065976e-06, "loss": 0.1819, "step": 7277 }, { "epoch": 0.58, "grad_norm": 1.3116633482285325, "learning_rate": 3.919716770418771e-06, "loss": 0.2217, "step": 7278 }, { "epoch": 0.58, "grad_norm": 1.264149681101779, "learning_rate": 3.918451903253632e-06, "loss": 0.1912, "step": 7279 }, { "epoch": 0.58, "grad_norm": 5.505841838659166, "learning_rate": 3.917187108696094e-06, "loss": 0.5852, "step": 7280 }, { "epoch": 0.58, "grad_norm": 1.3175286290627164, "learning_rate": 3.9159223868310644e-06, "loss": 0.1959, "step": 7281 }, { "epoch": 0.58, "grad_norm": 1.394973453483789, "learning_rate": 3.9146577377434504e-06, "loss": 0.2123, "step": 7282 }, { "epoch": 0.58, "grad_norm": 6.737236978795701, "learning_rate": 3.91339316151815e-06, "loss": 0.7347, "step": 7283 }, { "epoch": 0.58, "grad_norm": 1.3913340592525023, "learning_rate": 3.912128658240062e-06, "loss": 0.1944, "step": 7284 }, { "epoch": 0.58, "grad_norm": 1.3486856455863978, "learning_rate": 3.91086422799407e-06, "loss": 0.1855, "step": 7285 }, { "epoch": 0.58, "grad_norm": 1.332341132058153, "learning_rate": 3.909599870865064e-06, "loss": 0.2036, "step": 7286 }, { "epoch": 0.58, "grad_norm": 1.408748693035325, "learning_rate": 3.908335586937921e-06, "loss": 0.1891, "step": 7287 }, { "epoch": 0.58, "grad_norm": 1.2564039424723448, "learning_rate": 3.90707137629752e-06, "loss": 0.2182, "step": 7288 }, { "epoch": 0.58, "grad_norm": 1.4108145844456303, "learning_rate": 3.905807239028728e-06, "loss": 0.197, "step": 7289 }, { "epoch": 0.58, "grad_norm": 1.2652573889140213, "learning_rate": 3.90454317521641e-06, "loss": 0.1799, "step": 7290 }, { "epoch": 0.58, "grad_norm": 1.5325672993235506, "learning_rate": 3.90327918494543e-06, "loss": 0.2133, "step": 7291 }, { "epoch": 0.58, "grad_norm": 1.2664930846929467, "learning_rate": 3.902015268300638e-06, "loss": 0.1434, "step": 7292 }, { "epoch": 0.58, "grad_norm": 1.294479181218753, "learning_rate": 3.900751425366891e-06, "loss": 0.16, "step": 7293 }, { "epoch": 0.58, "grad_norm": 6.014476079817677, "learning_rate": 3.899487656229029e-06, "loss": 0.5761, "step": 7294 }, { "epoch": 0.58, "grad_norm": 1.190055259488617, "learning_rate": 3.8982239609718965e-06, "loss": 0.1642, "step": 7295 }, { "epoch": 0.58, "grad_norm": 1.4242098772805492, "learning_rate": 3.896960339680327e-06, "loss": 0.1639, "step": 7296 }, { "epoch": 0.58, "grad_norm": 1.2903622985436611, "learning_rate": 3.895696792439154e-06, "loss": 0.1747, "step": 7297 }, { "epoch": 0.58, "grad_norm": 1.2411493020242073, "learning_rate": 3.8944333193332e-06, "loss": 0.203, "step": 7298 }, { "epoch": 0.58, "grad_norm": 1.3927088272991854, "learning_rate": 3.893169920447285e-06, "loss": 0.1657, "step": 7299 }, { "epoch": 0.58, "grad_norm": 5.689409650056911, "learning_rate": 3.89190659586623e-06, "loss": 0.5041, "step": 7300 }, { "epoch": 0.58, "grad_norm": 1.4352267375140668, "learning_rate": 3.89064334567484e-06, "loss": 0.1475, "step": 7301 }, { "epoch": 0.58, "grad_norm": 1.44691062335425, "learning_rate": 3.889380169957925e-06, "loss": 0.2112, "step": 7302 }, { "epoch": 0.58, "grad_norm": 1.4681919586999757, "learning_rate": 3.888117068800283e-06, "loss": 0.2034, "step": 7303 }, { "epoch": 0.58, "grad_norm": 1.4238332640880818, "learning_rate": 3.886854042286713e-06, "loss": 0.2178, "step": 7304 }, { "epoch": 0.58, "grad_norm": 1.3541971856001938, "learning_rate": 3.885591090502003e-06, "loss": 0.2056, "step": 7305 }, { "epoch": 0.58, "grad_norm": 1.2478681334904405, "learning_rate": 3.884328213530938e-06, "loss": 0.1475, "step": 7306 }, { "epoch": 0.58, "grad_norm": 1.2430071161457696, "learning_rate": 3.883065411458303e-06, "loss": 0.1686, "step": 7307 }, { "epoch": 0.58, "grad_norm": 1.3380785198151155, "learning_rate": 3.881802684368869e-06, "loss": 0.1855, "step": 7308 }, { "epoch": 0.58, "grad_norm": 1.4204962082399126, "learning_rate": 3.88054003234741e-06, "loss": 0.2054, "step": 7309 }, { "epoch": 0.58, "grad_norm": 1.7171196870596477, "learning_rate": 3.879277455478689e-06, "loss": 0.1483, "step": 7310 }, { "epoch": 0.58, "grad_norm": 1.3653533886676135, "learning_rate": 3.8780149538474695e-06, "loss": 0.2143, "step": 7311 }, { "epoch": 0.58, "grad_norm": 1.2245971956851003, "learning_rate": 3.876752527538505e-06, "loss": 0.1511, "step": 7312 }, { "epoch": 0.59, "grad_norm": 1.577989930108368, "learning_rate": 3.875490176636547e-06, "loss": 0.1944, "step": 7313 }, { "epoch": 0.59, "grad_norm": 1.290433229405294, "learning_rate": 3.874227901226342e-06, "loss": 0.1689, "step": 7314 }, { "epoch": 0.59, "grad_norm": 1.2561466549410572, "learning_rate": 3.872965701392626e-06, "loss": 0.1672, "step": 7315 }, { "epoch": 0.59, "grad_norm": 1.202917230697213, "learning_rate": 3.871703577220139e-06, "loss": 0.1635, "step": 7316 }, { "epoch": 0.59, "grad_norm": 6.348856074094093, "learning_rate": 3.870441528793608e-06, "loss": 0.6248, "step": 7317 }, { "epoch": 0.59, "grad_norm": 1.5216629122170828, "learning_rate": 3.869179556197762e-06, "loss": 0.2223, "step": 7318 }, { "epoch": 0.59, "grad_norm": 1.1523428611296114, "learning_rate": 3.8679176595173165e-06, "loss": 0.1492, "step": 7319 }, { "epoch": 0.59, "grad_norm": 1.3462948490019122, "learning_rate": 3.8666558388369895e-06, "loss": 0.1923, "step": 7320 }, { "epoch": 0.59, "grad_norm": 1.5065131342707745, "learning_rate": 3.865394094241489e-06, "loss": 0.19, "step": 7321 }, { "epoch": 0.59, "grad_norm": 6.722886375756484, "learning_rate": 3.864132425815523e-06, "loss": 0.5388, "step": 7322 }, { "epoch": 0.59, "grad_norm": 1.3577759439674237, "learning_rate": 3.862870833643787e-06, "loss": 0.1642, "step": 7323 }, { "epoch": 0.59, "grad_norm": 1.3350309504315643, "learning_rate": 3.861609317810977e-06, "loss": 0.1742, "step": 7324 }, { "epoch": 0.59, "grad_norm": 1.499224511840807, "learning_rate": 3.8603478784017845e-06, "loss": 0.1968, "step": 7325 }, { "epoch": 0.59, "grad_norm": 1.2979463969605345, "learning_rate": 3.859086515500889e-06, "loss": 0.1676, "step": 7326 }, { "epoch": 0.59, "grad_norm": 1.2978126268074157, "learning_rate": 3.857825229192975e-06, "loss": 0.2114, "step": 7327 }, { "epoch": 0.59, "grad_norm": 1.2623104915215173, "learning_rate": 3.8565640195627124e-06, "loss": 0.1651, "step": 7328 }, { "epoch": 0.59, "grad_norm": 1.3179280639468434, "learning_rate": 3.8553028866947735e-06, "loss": 0.1801, "step": 7329 }, { "epoch": 0.59, "grad_norm": 1.5416835110817393, "learning_rate": 3.854041830673818e-06, "loss": 0.1971, "step": 7330 }, { "epoch": 0.59, "grad_norm": 1.369397764878767, "learning_rate": 3.8527808515845085e-06, "loss": 0.1945, "step": 7331 }, { "epoch": 0.59, "grad_norm": 1.3705099918842498, "learning_rate": 3.851519949511497e-06, "loss": 0.1758, "step": 7332 }, { "epoch": 0.59, "grad_norm": 1.20368987412326, "learning_rate": 3.8502591245394284e-06, "loss": 0.1402, "step": 7333 }, { "epoch": 0.59, "grad_norm": 6.902551554561779, "learning_rate": 3.8489983767529496e-06, "loss": 0.6011, "step": 7334 }, { "epoch": 0.59, "grad_norm": 1.3342795731762285, "learning_rate": 3.847737706236696e-06, "loss": 0.1653, "step": 7335 }, { "epoch": 0.59, "grad_norm": 1.3971889778707622, "learning_rate": 3.846477113075303e-06, "loss": 0.2057, "step": 7336 }, { "epoch": 0.59, "grad_norm": 1.5417989066358895, "learning_rate": 3.845216597353395e-06, "loss": 0.1849, "step": 7337 }, { "epoch": 0.59, "grad_norm": 1.239566196087453, "learning_rate": 3.843956159155598e-06, "loss": 0.184, "step": 7338 }, { "epoch": 0.59, "grad_norm": 6.676868403445291, "learning_rate": 3.842695798566524e-06, "loss": 0.6097, "step": 7339 }, { "epoch": 0.59, "grad_norm": 1.1238695914318746, "learning_rate": 3.8414355156707895e-06, "loss": 0.1914, "step": 7340 }, { "epoch": 0.59, "grad_norm": 1.2619884640776078, "learning_rate": 3.8401753105529995e-06, "loss": 0.1773, "step": 7341 }, { "epoch": 0.59, "grad_norm": 1.3834536670447832, "learning_rate": 3.838915183297754e-06, "loss": 0.187, "step": 7342 }, { "epoch": 0.59, "grad_norm": 1.3599772940636474, "learning_rate": 3.83765513398965e-06, "loss": 0.1738, "step": 7343 }, { "epoch": 0.59, "grad_norm": 1.3168460736640262, "learning_rate": 3.836395162713277e-06, "loss": 0.1547, "step": 7344 }, { "epoch": 0.59, "grad_norm": 1.174196725447432, "learning_rate": 3.835135269553226e-06, "loss": 0.1662, "step": 7345 }, { "epoch": 0.59, "grad_norm": 1.3547812151742071, "learning_rate": 3.833875454594071e-06, "loss": 0.2106, "step": 7346 }, { "epoch": 0.59, "grad_norm": 1.319521086767589, "learning_rate": 3.832615717920391e-06, "loss": 0.2394, "step": 7347 }, { "epoch": 0.59, "grad_norm": 1.4349408488912931, "learning_rate": 3.831356059616754e-06, "loss": 0.1837, "step": 7348 }, { "epoch": 0.59, "grad_norm": 1.3923479895558382, "learning_rate": 3.830096479767727e-06, "loss": 0.2017, "step": 7349 }, { "epoch": 0.59, "grad_norm": 1.4188967395044612, "learning_rate": 3.828836978457868e-06, "loss": 0.1957, "step": 7350 }, { "epoch": 0.59, "grad_norm": 1.3807581340849508, "learning_rate": 3.82757755577173e-06, "loss": 0.197, "step": 7351 }, { "epoch": 0.59, "grad_norm": 1.4309835614507902, "learning_rate": 3.826318211793863e-06, "loss": 0.1679, "step": 7352 }, { "epoch": 0.59, "grad_norm": 1.3533229613754219, "learning_rate": 3.825058946608811e-06, "loss": 0.1831, "step": 7353 }, { "epoch": 0.59, "grad_norm": 1.1144253943891669, "learning_rate": 3.8237997603011114e-06, "loss": 0.168, "step": 7354 }, { "epoch": 0.59, "grad_norm": 1.3355510851959091, "learning_rate": 3.822540652955298e-06, "loss": 0.2068, "step": 7355 }, { "epoch": 0.59, "grad_norm": 1.2854360436540522, "learning_rate": 3.821281624655899e-06, "loss": 0.1712, "step": 7356 }, { "epoch": 0.59, "grad_norm": 1.3075091330380737, "learning_rate": 3.820022675487434e-06, "loss": 0.167, "step": 7357 }, { "epoch": 0.59, "grad_norm": 1.438058917774604, "learning_rate": 3.818763805534423e-06, "loss": 0.2079, "step": 7358 }, { "epoch": 0.59, "grad_norm": 1.344845578199535, "learning_rate": 3.817505014881378e-06, "loss": 0.1969, "step": 7359 }, { "epoch": 0.59, "grad_norm": 10.685035028985734, "learning_rate": 3.816246303612802e-06, "loss": 0.4843, "step": 7360 }, { "epoch": 0.59, "grad_norm": 1.1494345867212967, "learning_rate": 3.8149876718132e-06, "loss": 0.1422, "step": 7361 }, { "epoch": 0.59, "grad_norm": 5.1339793517121945, "learning_rate": 3.813729119567064e-06, "loss": 0.5433, "step": 7362 }, { "epoch": 0.59, "grad_norm": 23.71054043634761, "learning_rate": 3.812470646958889e-06, "loss": 0.6882, "step": 7363 }, { "epoch": 0.59, "grad_norm": 1.1904680559619152, "learning_rate": 3.8112122540731555e-06, "loss": 0.155, "step": 7364 }, { "epoch": 0.59, "grad_norm": 1.3882303923335082, "learning_rate": 3.809953940994346e-06, "loss": 0.1964, "step": 7365 }, { "epoch": 0.59, "grad_norm": 1.3482520761208174, "learning_rate": 3.8086957078069355e-06, "loss": 0.1459, "step": 7366 }, { "epoch": 0.59, "grad_norm": 1.413423983941598, "learning_rate": 3.807437554595389e-06, "loss": 0.1841, "step": 7367 }, { "epoch": 0.59, "grad_norm": 1.407147553834495, "learning_rate": 3.8061794814441744e-06, "loss": 0.2143, "step": 7368 }, { "epoch": 0.59, "grad_norm": 1.4219859576684297, "learning_rate": 3.8049214884377473e-06, "loss": 0.2003, "step": 7369 }, { "epoch": 0.59, "grad_norm": 1.2524907926269377, "learning_rate": 3.8036635756605635e-06, "loss": 0.1714, "step": 7370 }, { "epoch": 0.59, "grad_norm": 1.5840792263172339, "learning_rate": 3.8024057431970664e-06, "loss": 0.2093, "step": 7371 }, { "epoch": 0.59, "grad_norm": 1.4949971648879672, "learning_rate": 3.801147991131702e-06, "loss": 0.2439, "step": 7372 }, { "epoch": 0.59, "grad_norm": 1.3924912339448179, "learning_rate": 3.7998903195489036e-06, "loss": 0.1723, "step": 7373 }, { "epoch": 0.59, "grad_norm": 1.2453052012600203, "learning_rate": 3.7986327285331065e-06, "loss": 0.1556, "step": 7374 }, { "epoch": 0.59, "grad_norm": 1.3140227569702692, "learning_rate": 3.7973752181687336e-06, "loss": 0.1842, "step": 7375 }, { "epoch": 0.59, "grad_norm": 1.5165240450501503, "learning_rate": 3.7961177885402047e-06, "loss": 0.2034, "step": 7376 }, { "epoch": 0.59, "grad_norm": 1.4845952230114245, "learning_rate": 3.794860439731938e-06, "loss": 0.2301, "step": 7377 }, { "epoch": 0.59, "grad_norm": 1.4541924883692308, "learning_rate": 3.793603171828339e-06, "loss": 0.2159, "step": 7378 }, { "epoch": 0.59, "grad_norm": 1.3678090368279336, "learning_rate": 3.7923459849138173e-06, "loss": 0.1569, "step": 7379 }, { "epoch": 0.59, "grad_norm": 1.344868895910062, "learning_rate": 3.791088879072766e-06, "loss": 0.1805, "step": 7380 }, { "epoch": 0.59, "grad_norm": 1.252466757603435, "learning_rate": 3.789831854389583e-06, "loss": 0.1429, "step": 7381 }, { "epoch": 0.59, "grad_norm": 1.5925025446285608, "learning_rate": 3.788574910948653e-06, "loss": 0.1623, "step": 7382 }, { "epoch": 0.59, "grad_norm": 1.1187667842454694, "learning_rate": 3.7873180488343617e-06, "loss": 0.1623, "step": 7383 }, { "epoch": 0.59, "grad_norm": 10.355446057563517, "learning_rate": 3.786061268131083e-06, "loss": 0.7082, "step": 7384 }, { "epoch": 0.59, "grad_norm": 1.208466107096428, "learning_rate": 3.784804568923188e-06, "loss": 0.1551, "step": 7385 }, { "epoch": 0.59, "grad_norm": 1.3243283590524273, "learning_rate": 3.783547951295047e-06, "loss": 0.1916, "step": 7386 }, { "epoch": 0.59, "grad_norm": 1.359905349145487, "learning_rate": 3.7822914153310155e-06, "loss": 0.1769, "step": 7387 }, { "epoch": 0.59, "grad_norm": 1.1345870616449818, "learning_rate": 3.781034961115452e-06, "loss": 0.1667, "step": 7388 }, { "epoch": 0.59, "grad_norm": 1.4260018473807656, "learning_rate": 3.779778588732703e-06, "loss": 0.2153, "step": 7389 }, { "epoch": 0.59, "grad_norm": 1.3788630135488815, "learning_rate": 3.778522298267117e-06, "loss": 0.2208, "step": 7390 }, { "epoch": 0.59, "grad_norm": 1.3096845616975608, "learning_rate": 3.777266089803028e-06, "loss": 0.1575, "step": 7391 }, { "epoch": 0.59, "grad_norm": 1.5467397213947347, "learning_rate": 3.776009963424772e-06, "loss": 0.2403, "step": 7392 }, { "epoch": 0.59, "grad_norm": 1.3938433913520791, "learning_rate": 3.774753919216677e-06, "loss": 0.1802, "step": 7393 }, { "epoch": 0.59, "grad_norm": 1.5138906224724487, "learning_rate": 3.7734979572630614e-06, "loss": 0.1883, "step": 7394 }, { "epoch": 0.59, "grad_norm": 1.3757047289707907, "learning_rate": 3.772242077648246e-06, "loss": 0.1912, "step": 7395 }, { "epoch": 0.59, "grad_norm": 1.4464650100877543, "learning_rate": 3.770986280456538e-06, "loss": 0.2076, "step": 7396 }, { "epoch": 0.59, "grad_norm": 1.2061700703240137, "learning_rate": 3.769730565772247e-06, "loss": 0.1624, "step": 7397 }, { "epoch": 0.59, "grad_norm": 7.654950339576268, "learning_rate": 3.768474933679668e-06, "loss": 0.668, "step": 7398 }, { "epoch": 0.59, "grad_norm": 1.1532735947493509, "learning_rate": 3.7672193842631e-06, "loss": 0.1382, "step": 7399 }, { "epoch": 0.59, "grad_norm": 6.074590478714253, "learning_rate": 3.7659639176068287e-06, "loss": 0.6437, "step": 7400 }, { "epoch": 0.59, "grad_norm": 1.2989898142938618, "learning_rate": 3.7647085337951404e-06, "loss": 0.1666, "step": 7401 }, { "epoch": 0.59, "grad_norm": 9.69321618047491, "learning_rate": 3.76345323291231e-06, "loss": 0.5837, "step": 7402 }, { "epoch": 0.59, "grad_norm": 1.2830949680657486, "learning_rate": 3.7621980150426103e-06, "loss": 0.1912, "step": 7403 }, { "epoch": 0.59, "grad_norm": 1.3350723415202501, "learning_rate": 3.7609428802703098e-06, "loss": 0.1945, "step": 7404 }, { "epoch": 0.59, "grad_norm": 1.4555069450911204, "learning_rate": 3.7596878286796657e-06, "loss": 0.2106, "step": 7405 }, { "epoch": 0.59, "grad_norm": 6.195476203237622, "learning_rate": 3.758432860354937e-06, "loss": 0.4393, "step": 7406 }, { "epoch": 0.59, "grad_norm": 1.3189697340255864, "learning_rate": 3.7571779753803704e-06, "loss": 0.1553, "step": 7407 }, { "epoch": 0.59, "grad_norm": 9.306025697135167, "learning_rate": 3.7559231738402148e-06, "loss": 0.5819, "step": 7408 }, { "epoch": 0.59, "grad_norm": 7.041457264706336, "learning_rate": 3.7546684558187033e-06, "loss": 0.624, "step": 7409 }, { "epoch": 0.59, "grad_norm": 6.142283640186236, "learning_rate": 3.753413821400073e-06, "loss": 0.5308, "step": 7410 }, { "epoch": 0.59, "grad_norm": 1.3305192291163936, "learning_rate": 3.752159270668551e-06, "loss": 0.1655, "step": 7411 }, { "epoch": 0.59, "grad_norm": 7.222602004316321, "learning_rate": 3.7509048037083556e-06, "loss": 0.522, "step": 7412 }, { "epoch": 0.59, "grad_norm": 1.3644494747539337, "learning_rate": 3.7496504206037066e-06, "loss": 0.2146, "step": 7413 }, { "epoch": 0.59, "grad_norm": 1.3802943046770164, "learning_rate": 3.748396121438812e-06, "loss": 0.1972, "step": 7414 }, { "epoch": 0.59, "grad_norm": 1.3198448301625334, "learning_rate": 3.74714190629788e-06, "loss": 0.1576, "step": 7415 }, { "epoch": 0.59, "grad_norm": 1.3464307843040537, "learning_rate": 3.7458877752651063e-06, "loss": 0.1763, "step": 7416 }, { "epoch": 0.59, "grad_norm": 1.5295505680178565, "learning_rate": 3.744633728424687e-06, "loss": 0.1921, "step": 7417 }, { "epoch": 0.59, "grad_norm": 1.4924802414110787, "learning_rate": 3.743379765860807e-06, "loss": 0.2009, "step": 7418 }, { "epoch": 0.59, "grad_norm": 7.711246508724797, "learning_rate": 3.7421258876576543e-06, "loss": 0.5653, "step": 7419 }, { "epoch": 0.59, "grad_norm": 1.3744690951190388, "learning_rate": 3.7408720938994003e-06, "loss": 0.1861, "step": 7420 }, { "epoch": 0.59, "grad_norm": 1.4289181650334266, "learning_rate": 3.7396183846702164e-06, "loss": 0.2224, "step": 7421 }, { "epoch": 0.59, "grad_norm": 1.2537257287751429, "learning_rate": 3.7383647600542704e-06, "loss": 0.1683, "step": 7422 }, { "epoch": 0.59, "grad_norm": 1.6447995244516012, "learning_rate": 3.7371112201357195e-06, "loss": 0.2004, "step": 7423 }, { "epoch": 0.59, "grad_norm": 1.4164785605311527, "learning_rate": 3.735857764998721e-06, "loss": 0.2419, "step": 7424 }, { "epoch": 0.59, "grad_norm": 1.3062254200506616, "learning_rate": 3.734604394727419e-06, "loss": 0.1567, "step": 7425 }, { "epoch": 0.59, "grad_norm": 1.2216825637783326, "learning_rate": 3.733351109405959e-06, "loss": 0.1904, "step": 7426 }, { "epoch": 0.59, "grad_norm": 1.4614287469241358, "learning_rate": 3.732097909118477e-06, "loss": 0.1818, "step": 7427 }, { "epoch": 0.59, "grad_norm": 1.3939627781385393, "learning_rate": 3.730844793949103e-06, "loss": 0.2065, "step": 7428 }, { "epoch": 0.59, "grad_norm": 1.3809734065067316, "learning_rate": 3.729591763981964e-06, "loss": 0.1821, "step": 7429 }, { "epoch": 0.59, "grad_norm": 1.386682358518999, "learning_rate": 3.7283388193011776e-06, "loss": 0.2017, "step": 7430 }, { "epoch": 0.59, "grad_norm": 5.650003644111188, "learning_rate": 3.727085959990862e-06, "loss": 0.572, "step": 7431 }, { "epoch": 0.59, "grad_norm": 1.439480014225976, "learning_rate": 3.7258331861351203e-06, "loss": 0.1981, "step": 7432 }, { "epoch": 0.59, "grad_norm": 1.3096835472161665, "learning_rate": 3.7245804978180582e-06, "loss": 0.146, "step": 7433 }, { "epoch": 0.59, "grad_norm": 5.238783004698527, "learning_rate": 3.7233278951237706e-06, "loss": 0.5511, "step": 7434 }, { "epoch": 0.59, "grad_norm": 1.4667865170044283, "learning_rate": 3.722075378136352e-06, "loss": 0.2056, "step": 7435 }, { "epoch": 0.59, "grad_norm": 1.2024470881132698, "learning_rate": 3.7208229469398843e-06, "loss": 0.1463, "step": 7436 }, { "epoch": 0.59, "grad_norm": 1.5640815794616751, "learning_rate": 3.719570601618446e-06, "loss": 0.1833, "step": 7437 }, { "epoch": 0.6, "grad_norm": 1.341529092575993, "learning_rate": 3.7183183422561164e-06, "loss": 0.182, "step": 7438 }, { "epoch": 0.6, "grad_norm": 1.4003543930946474, "learning_rate": 3.717066168936957e-06, "loss": 0.223, "step": 7439 }, { "epoch": 0.6, "grad_norm": 1.5641772603681234, "learning_rate": 3.7158140817450335e-06, "loss": 0.1789, "step": 7440 }, { "epoch": 0.6, "grad_norm": 1.3208516158449901, "learning_rate": 3.7145620807644007e-06, "loss": 0.2067, "step": 7441 }, { "epoch": 0.6, "grad_norm": 1.3728452818160795, "learning_rate": 3.713310166079112e-06, "loss": 0.1856, "step": 7442 }, { "epoch": 0.6, "grad_norm": 1.367049377524347, "learning_rate": 3.712058337773209e-06, "loss": 0.189, "step": 7443 }, { "epoch": 0.6, "grad_norm": 1.27522340932222, "learning_rate": 3.7108065959307325e-06, "loss": 0.1835, "step": 7444 }, { "epoch": 0.6, "grad_norm": 1.3062382342742045, "learning_rate": 3.709554940635717e-06, "loss": 0.1396, "step": 7445 }, { "epoch": 0.6, "grad_norm": 1.4265092881339705, "learning_rate": 3.7083033719721852e-06, "loss": 0.2247, "step": 7446 }, { "epoch": 0.6, "grad_norm": 1.3336420016664583, "learning_rate": 3.7070518900241635e-06, "loss": 0.1604, "step": 7447 }, { "epoch": 0.6, "grad_norm": 1.2980302367673076, "learning_rate": 3.7058004948756644e-06, "loss": 0.1848, "step": 7448 }, { "epoch": 0.6, "grad_norm": 1.471112017561321, "learning_rate": 3.704549186610702e-06, "loss": 0.1712, "step": 7449 }, { "epoch": 0.6, "grad_norm": 1.5335352648919394, "learning_rate": 3.703297965313275e-06, "loss": 0.1892, "step": 7450 }, { "epoch": 0.6, "grad_norm": 1.3888886623159546, "learning_rate": 3.702046831067386e-06, "loss": 0.1799, "step": 7451 }, { "epoch": 0.6, "grad_norm": 1.3461049545194068, "learning_rate": 3.7007957839570247e-06, "loss": 0.2082, "step": 7452 }, { "epoch": 0.6, "grad_norm": 1.3408958982430474, "learning_rate": 3.6995448240661807e-06, "loss": 0.1557, "step": 7453 }, { "epoch": 0.6, "grad_norm": 1.1371965313493715, "learning_rate": 3.698293951478832e-06, "loss": 0.1522, "step": 7454 }, { "epoch": 0.6, "grad_norm": 1.3508594150597606, "learning_rate": 3.6970431662789534e-06, "loss": 0.1603, "step": 7455 }, { "epoch": 0.6, "grad_norm": 1.316035061808714, "learning_rate": 3.695792468550517e-06, "loss": 0.1589, "step": 7456 }, { "epoch": 0.6, "grad_norm": 1.393082655964855, "learning_rate": 3.694541858377482e-06, "loss": 0.2224, "step": 7457 }, { "epoch": 0.6, "grad_norm": 1.2701291626823512, "learning_rate": 3.6932913358438073e-06, "loss": 0.1548, "step": 7458 }, { "epoch": 0.6, "grad_norm": 1.247004888546274, "learning_rate": 3.6920409010334447e-06, "loss": 0.162, "step": 7459 }, { "epoch": 0.6, "grad_norm": 1.3799562516138077, "learning_rate": 3.69079055403034e-06, "loss": 0.193, "step": 7460 }, { "epoch": 0.6, "grad_norm": 1.1650803040967792, "learning_rate": 3.68954029491843e-06, "loss": 0.1743, "step": 7461 }, { "epoch": 0.6, "grad_norm": 1.3596883535007305, "learning_rate": 3.6882901237816536e-06, "loss": 0.1979, "step": 7462 }, { "epoch": 0.6, "grad_norm": 1.3471113064670701, "learning_rate": 3.687040040703936e-06, "loss": 0.1731, "step": 7463 }, { "epoch": 0.6, "grad_norm": 5.119143016073538, "learning_rate": 3.6857900457691946e-06, "loss": 0.5809, "step": 7464 }, { "epoch": 0.6, "grad_norm": 11.528103008599109, "learning_rate": 3.6845401390613528e-06, "loss": 0.5254, "step": 7465 }, { "epoch": 0.6, "grad_norm": 1.178810788826177, "learning_rate": 3.6832903206643152e-06, "loss": 0.1495, "step": 7466 }, { "epoch": 0.6, "grad_norm": 5.438829844479788, "learning_rate": 3.682040590661988e-06, "loss": 0.5242, "step": 7467 }, { "epoch": 0.6, "grad_norm": 1.444508332531294, "learning_rate": 3.6807909491382697e-06, "loss": 0.1668, "step": 7468 }, { "epoch": 0.6, "grad_norm": 1.228405633222191, "learning_rate": 3.679541396177053e-06, "loss": 0.168, "step": 7469 }, { "epoch": 0.6, "grad_norm": 1.3985970841179658, "learning_rate": 3.678291931862221e-06, "loss": 0.2054, "step": 7470 }, { "epoch": 0.6, "grad_norm": 1.3779728182027506, "learning_rate": 3.6770425562776577e-06, "loss": 0.1752, "step": 7471 }, { "epoch": 0.6, "grad_norm": 1.2267918382770493, "learning_rate": 3.6757932695072373e-06, "loss": 0.189, "step": 7472 }, { "epoch": 0.6, "grad_norm": 1.3988149699461732, "learning_rate": 3.6745440716348245e-06, "loss": 0.1723, "step": 7473 }, { "epoch": 0.6, "grad_norm": 1.2327734782981703, "learning_rate": 3.6732949627442855e-06, "loss": 0.1426, "step": 7474 }, { "epoch": 0.6, "grad_norm": 1.2593554544320047, "learning_rate": 3.6720459429194743e-06, "loss": 0.1903, "step": 7475 }, { "epoch": 0.6, "grad_norm": 1.498535192534434, "learning_rate": 3.670797012244244e-06, "loss": 0.1702, "step": 7476 }, { "epoch": 0.6, "grad_norm": 1.5066249917509211, "learning_rate": 3.669548170802436e-06, "loss": 0.1875, "step": 7477 }, { "epoch": 0.6, "grad_norm": 1.5060216568663658, "learning_rate": 3.6682994186778914e-06, "loss": 0.2372, "step": 7478 }, { "epoch": 0.6, "grad_norm": 1.2209663220825024, "learning_rate": 3.6670507559544403e-06, "loss": 0.2133, "step": 7479 }, { "epoch": 0.6, "grad_norm": 1.225800617361338, "learning_rate": 3.665802182715913e-06, "loss": 0.1656, "step": 7480 }, { "epoch": 0.6, "grad_norm": 1.1811440149872499, "learning_rate": 3.664553699046127e-06, "loss": 0.1341, "step": 7481 }, { "epoch": 0.6, "grad_norm": 1.159204200656736, "learning_rate": 3.663305305028896e-06, "loss": 0.1612, "step": 7482 }, { "epoch": 0.6, "grad_norm": 1.1319149802103186, "learning_rate": 3.6620570007480317e-06, "loss": 0.1545, "step": 7483 }, { "epoch": 0.6, "grad_norm": 1.5053936472723846, "learning_rate": 3.660808786287333e-06, "loss": 0.1985, "step": 7484 }, { "epoch": 0.6, "grad_norm": 10.716754270459973, "learning_rate": 3.659560661730599e-06, "loss": 0.5565, "step": 7485 }, { "epoch": 0.6, "grad_norm": 1.2617823518684466, "learning_rate": 3.658312627161618e-06, "loss": 0.1621, "step": 7486 }, { "epoch": 0.6, "grad_norm": 7.3171838514049075, "learning_rate": 3.6570646826641777e-06, "loss": 0.5938, "step": 7487 }, { "epoch": 0.6, "grad_norm": 1.4781055033004342, "learning_rate": 3.6558168283220536e-06, "loss": 0.1859, "step": 7488 }, { "epoch": 0.6, "grad_norm": 1.1345368734666488, "learning_rate": 3.6545690642190166e-06, "loss": 0.1717, "step": 7489 }, { "epoch": 0.6, "grad_norm": 1.2550985424396772, "learning_rate": 3.6533213904388377e-06, "loss": 0.207, "step": 7490 }, { "epoch": 0.6, "grad_norm": 10.165873514046666, "learning_rate": 3.6520738070652716e-06, "loss": 0.5318, "step": 7491 }, { "epoch": 0.6, "grad_norm": 1.2936108174242225, "learning_rate": 3.650826314182076e-06, "loss": 0.2, "step": 7492 }, { "epoch": 0.6, "grad_norm": 1.3209209801809625, "learning_rate": 3.6495789118729966e-06, "loss": 0.1961, "step": 7493 }, { "epoch": 0.6, "grad_norm": 1.182927659862257, "learning_rate": 3.6483316002217788e-06, "loss": 0.1313, "step": 7494 }, { "epoch": 0.6, "grad_norm": 8.055882311002078, "learning_rate": 3.6470843793121536e-06, "loss": 0.544, "step": 7495 }, { "epoch": 0.6, "grad_norm": 1.24321505104082, "learning_rate": 3.645837249227855e-06, "loss": 0.1649, "step": 7496 }, { "epoch": 0.6, "grad_norm": 1.3793996212009143, "learning_rate": 3.6445902100526055e-06, "loss": 0.1942, "step": 7497 }, { "epoch": 0.6, "grad_norm": 1.5282592808581068, "learning_rate": 3.6433432618701193e-06, "loss": 0.2232, "step": 7498 }, { "epoch": 0.6, "grad_norm": 1.3766589611361668, "learning_rate": 3.6420964047641117e-06, "loss": 0.1794, "step": 7499 }, { "epoch": 0.6, "grad_norm": 6.9885020315649236, "learning_rate": 3.6408496388182857e-06, "loss": 0.5112, "step": 7500 }, { "epoch": 0.6, "grad_norm": 1.4073188779139203, "learning_rate": 3.6396029641163442e-06, "loss": 0.1957, "step": 7501 }, { "epoch": 0.6, "grad_norm": 1.3374305074074107, "learning_rate": 3.6383563807419754e-06, "loss": 0.1633, "step": 7502 }, { "epoch": 0.6, "grad_norm": 1.3508366168139432, "learning_rate": 3.6371098887788686e-06, "loss": 0.2046, "step": 7503 }, { "epoch": 0.6, "grad_norm": 1.27426675678328, "learning_rate": 3.6358634883107034e-06, "loss": 0.192, "step": 7504 }, { "epoch": 0.6, "grad_norm": 1.393696764505489, "learning_rate": 3.634617179421157e-06, "loss": 0.1785, "step": 7505 }, { "epoch": 0.6, "grad_norm": 1.4690214137560773, "learning_rate": 3.6333709621938974e-06, "loss": 0.2088, "step": 7506 }, { "epoch": 0.6, "grad_norm": 9.187533976740001, "learning_rate": 3.632124836712583e-06, "loss": 0.5973, "step": 7507 }, { "epoch": 0.6, "grad_norm": 1.4211628709665354, "learning_rate": 3.6308788030608743e-06, "loss": 0.164, "step": 7508 }, { "epoch": 0.6, "grad_norm": 1.2812885848728743, "learning_rate": 3.629632861322419e-06, "loss": 0.1788, "step": 7509 }, { "epoch": 0.6, "grad_norm": 1.3052675154983016, "learning_rate": 3.628387011580864e-06, "loss": 0.1882, "step": 7510 }, { "epoch": 0.6, "grad_norm": 1.4031165308666953, "learning_rate": 3.6271412539198426e-06, "loss": 0.2241, "step": 7511 }, { "epoch": 0.6, "grad_norm": 1.185697503275086, "learning_rate": 3.6258955884229883e-06, "loss": 0.159, "step": 7512 }, { "epoch": 0.6, "grad_norm": 1.4927268336185662, "learning_rate": 3.624650015173927e-06, "loss": 0.2578, "step": 7513 }, { "epoch": 0.6, "grad_norm": 1.5207832070740166, "learning_rate": 3.6234045342562783e-06, "loss": 0.17, "step": 7514 }, { "epoch": 0.6, "grad_norm": 4.878797628230829, "learning_rate": 3.622159145753654e-06, "loss": 0.4862, "step": 7515 }, { "epoch": 0.6, "grad_norm": 4.566785270264143, "learning_rate": 3.6209138497496594e-06, "loss": 0.556, "step": 7516 }, { "epoch": 0.6, "grad_norm": 1.3556702769874027, "learning_rate": 3.6196686463278985e-06, "loss": 0.1486, "step": 7517 }, { "epoch": 0.6, "grad_norm": 1.2834144008357922, "learning_rate": 3.6184235355719623e-06, "loss": 0.1589, "step": 7518 }, { "epoch": 0.6, "grad_norm": 11.995637246160584, "learning_rate": 3.61717851756544e-06, "loss": 0.6514, "step": 7519 }, { "epoch": 0.6, "grad_norm": 1.4698049443010799, "learning_rate": 3.615933592391913e-06, "loss": 0.1884, "step": 7520 }, { "epoch": 0.6, "grad_norm": 9.337639983835457, "learning_rate": 3.61468876013496e-06, "loss": 0.6289, "step": 7521 }, { "epoch": 0.6, "grad_norm": 1.1915264872383433, "learning_rate": 3.6134440208781453e-06, "loss": 0.1589, "step": 7522 }, { "epoch": 0.6, "grad_norm": 1.4176072280633316, "learning_rate": 3.612199374705036e-06, "loss": 0.1904, "step": 7523 }, { "epoch": 0.6, "grad_norm": 1.2780457789704345, "learning_rate": 3.6109548216991886e-06, "loss": 0.1827, "step": 7524 }, { "epoch": 0.6, "grad_norm": 1.3048692375746669, "learning_rate": 3.6097103619441505e-06, "loss": 0.1895, "step": 7525 }, { "epoch": 0.6, "grad_norm": 1.2157856552469801, "learning_rate": 3.608465995523469e-06, "loss": 0.1515, "step": 7526 }, { "epoch": 0.6, "grad_norm": 1.2854898774742747, "learning_rate": 3.6072217225206807e-06, "loss": 0.1454, "step": 7527 }, { "epoch": 0.6, "grad_norm": 1.5673726047714536, "learning_rate": 3.6059775430193202e-06, "loss": 0.2069, "step": 7528 }, { "epoch": 0.6, "grad_norm": 1.4160033912894239, "learning_rate": 3.604733457102908e-06, "loss": 0.2027, "step": 7529 }, { "epoch": 0.6, "grad_norm": 1.210170009294503, "learning_rate": 3.6034894648549677e-06, "loss": 0.1859, "step": 7530 }, { "epoch": 0.6, "grad_norm": 1.3530422447115213, "learning_rate": 3.6022455663590106e-06, "loss": 0.1748, "step": 7531 }, { "epoch": 0.6, "grad_norm": 1.3276246144575294, "learning_rate": 3.6010017616985454e-06, "loss": 0.1605, "step": 7532 }, { "epoch": 0.6, "grad_norm": 1.290852611777824, "learning_rate": 3.5997580509570696e-06, "loss": 0.1975, "step": 7533 }, { "epoch": 0.6, "grad_norm": 1.2203583241435583, "learning_rate": 3.598514434218078e-06, "loss": 0.1772, "step": 7534 }, { "epoch": 0.6, "grad_norm": 14.385052275967093, "learning_rate": 3.5972709115650594e-06, "loss": 0.6447, "step": 7535 }, { "epoch": 0.6, "grad_norm": 6.132578278136999, "learning_rate": 3.5960274830814933e-06, "loss": 0.6043, "step": 7536 }, { "epoch": 0.6, "grad_norm": 1.3429669488483478, "learning_rate": 3.5947841488508573e-06, "loss": 0.1597, "step": 7537 }, { "epoch": 0.6, "grad_norm": 1.586767056217181, "learning_rate": 3.593540908956618e-06, "loss": 0.2388, "step": 7538 }, { "epoch": 0.6, "grad_norm": 1.4365577172555777, "learning_rate": 3.5922977634822408e-06, "loss": 0.221, "step": 7539 }, { "epoch": 0.6, "grad_norm": 1.5278347668007461, "learning_rate": 3.5910547125111783e-06, "loss": 0.2072, "step": 7540 }, { "epoch": 0.6, "grad_norm": 1.4105822989562757, "learning_rate": 3.589811756126882e-06, "loss": 0.222, "step": 7541 }, { "epoch": 0.6, "grad_norm": 1.4175948042221154, "learning_rate": 3.588568894412797e-06, "loss": 0.194, "step": 7542 }, { "epoch": 0.6, "grad_norm": 6.481420494634635, "learning_rate": 3.587326127452356e-06, "loss": 0.5376, "step": 7543 }, { "epoch": 0.6, "grad_norm": 1.400464888990005, "learning_rate": 3.5860834553289936e-06, "loss": 0.1888, "step": 7544 }, { "epoch": 0.6, "grad_norm": 26.48533235470663, "learning_rate": 3.5848408781261323e-06, "loss": 0.6311, "step": 7545 }, { "epoch": 0.6, "grad_norm": 1.245566176923873, "learning_rate": 3.583598395927192e-06, "loss": 0.1525, "step": 7546 }, { "epoch": 0.6, "grad_norm": 1.5548970456730904, "learning_rate": 3.5823560088155795e-06, "loss": 0.2024, "step": 7547 }, { "epoch": 0.6, "grad_norm": 1.7874123140806166, "learning_rate": 3.5811137168747077e-06, "loss": 0.2004, "step": 7548 }, { "epoch": 0.6, "grad_norm": 1.2231374061988407, "learning_rate": 3.5798715201879704e-06, "loss": 0.2022, "step": 7549 }, { "epoch": 0.6, "grad_norm": 1.276897209985682, "learning_rate": 3.578629418838757e-06, "loss": 0.1991, "step": 7550 }, { "epoch": 0.6, "grad_norm": 15.977103607319526, "learning_rate": 3.5773874129104612e-06, "loss": 0.5714, "step": 7551 }, { "epoch": 0.6, "grad_norm": 1.3115728722161808, "learning_rate": 3.5761455024864566e-06, "loss": 0.1794, "step": 7552 }, { "epoch": 0.6, "grad_norm": 9.352057917511193, "learning_rate": 3.5749036876501196e-06, "loss": 0.4839, "step": 7553 }, { "epoch": 0.6, "grad_norm": 1.3934247326858136, "learning_rate": 3.573661968484815e-06, "loss": 0.1907, "step": 7554 }, { "epoch": 0.6, "grad_norm": 1.2854035859297976, "learning_rate": 3.572420345073906e-06, "loss": 0.2049, "step": 7555 }, { "epoch": 0.6, "grad_norm": 1.4767036692536906, "learning_rate": 3.571178817500742e-06, "loss": 0.21, "step": 7556 }, { "epoch": 0.6, "grad_norm": 1.326700707158939, "learning_rate": 3.5699373858486752e-06, "loss": 0.1847, "step": 7557 }, { "epoch": 0.6, "grad_norm": 1.3569676489826743, "learning_rate": 3.5686960502010455e-06, "loss": 0.2228, "step": 7558 }, { "epoch": 0.6, "grad_norm": 1.2272979979778909, "learning_rate": 3.5674548106411844e-06, "loss": 0.1724, "step": 7559 }, { "epoch": 0.6, "grad_norm": 1.4133170140252964, "learning_rate": 3.566213667252423e-06, "loss": 0.217, "step": 7560 }, { "epoch": 0.6, "grad_norm": 1.3659650901587805, "learning_rate": 3.564972620118081e-06, "loss": 0.1844, "step": 7561 }, { "epoch": 0.6, "grad_norm": 1.3379080507717929, "learning_rate": 3.5637316693214772e-06, "loss": 0.1761, "step": 7562 }, { "epoch": 0.61, "grad_norm": 1.2400290425687925, "learning_rate": 3.562490814945917e-06, "loss": 0.1847, "step": 7563 }, { "epoch": 0.61, "grad_norm": 1.3477721264104463, "learning_rate": 3.5612500570747037e-06, "loss": 0.1973, "step": 7564 }, { "epoch": 0.61, "grad_norm": 1.3367138246389696, "learning_rate": 3.560009395791133e-06, "loss": 0.1787, "step": 7565 }, { "epoch": 0.61, "grad_norm": 1.2800948325263146, "learning_rate": 3.5587688311784963e-06, "loss": 0.1529, "step": 7566 }, { "epoch": 0.61, "grad_norm": 1.3257861510925042, "learning_rate": 3.557528363320074e-06, "loss": 0.1444, "step": 7567 }, { "epoch": 0.61, "grad_norm": 1.4869956882389639, "learning_rate": 3.556287992299142e-06, "loss": 0.1905, "step": 7568 }, { "epoch": 0.61, "grad_norm": 1.3946942671802685, "learning_rate": 3.5550477181989735e-06, "loss": 0.167, "step": 7569 }, { "epoch": 0.61, "grad_norm": 1.5693927004355817, "learning_rate": 3.553807541102827e-06, "loss": 0.1942, "step": 7570 }, { "epoch": 0.61, "grad_norm": 1.2585721562428815, "learning_rate": 3.552567461093964e-06, "loss": 0.1708, "step": 7571 }, { "epoch": 0.61, "grad_norm": 10.649281630139912, "learning_rate": 3.5513274782556317e-06, "loss": 0.6914, "step": 7572 }, { "epoch": 0.61, "grad_norm": 1.3754912565916209, "learning_rate": 3.5500875926710767e-06, "loss": 0.1753, "step": 7573 }, { "epoch": 0.61, "grad_norm": 1.3172062692412108, "learning_rate": 3.548847804423533e-06, "loss": 0.2079, "step": 7574 }, { "epoch": 0.61, "grad_norm": 1.289101228007214, "learning_rate": 3.5476081135962335e-06, "loss": 0.1724, "step": 7575 }, { "epoch": 0.61, "grad_norm": 1.3324086064607157, "learning_rate": 3.546368520272404e-06, "loss": 0.1579, "step": 7576 }, { "epoch": 0.61, "grad_norm": 1.338356358261095, "learning_rate": 3.545129024535257e-06, "loss": 0.165, "step": 7577 }, { "epoch": 0.61, "grad_norm": 1.4686927822429694, "learning_rate": 3.5438896264680076e-06, "loss": 0.2159, "step": 7578 }, { "epoch": 0.61, "grad_norm": 1.3205479928749009, "learning_rate": 3.5426503261538593e-06, "loss": 0.179, "step": 7579 }, { "epoch": 0.61, "grad_norm": 1.2585725435464314, "learning_rate": 3.541411123676012e-06, "loss": 0.1817, "step": 7580 }, { "epoch": 0.61, "grad_norm": 1.3819962298236277, "learning_rate": 3.5401720191176527e-06, "loss": 0.1539, "step": 7581 }, { "epoch": 0.61, "grad_norm": 1.3453113246727373, "learning_rate": 3.5389330125619704e-06, "loss": 0.195, "step": 7582 }, { "epoch": 0.61, "grad_norm": 1.5013823222163911, "learning_rate": 3.5376941040921405e-06, "loss": 0.2354, "step": 7583 }, { "epoch": 0.61, "grad_norm": 7.166024823687403, "learning_rate": 3.536455293791338e-06, "loss": 0.5595, "step": 7584 }, { "epoch": 0.61, "grad_norm": 1.423666160410486, "learning_rate": 3.5352165817427255e-06, "loss": 0.1924, "step": 7585 }, { "epoch": 0.61, "grad_norm": 1.262063551825697, "learning_rate": 3.5339779680294607e-06, "loss": 0.1449, "step": 7586 }, { "epoch": 0.61, "grad_norm": 1.4286578508004075, "learning_rate": 3.532739452734698e-06, "loss": 0.1837, "step": 7587 }, { "epoch": 0.61, "grad_norm": 1.6105203598287376, "learning_rate": 3.53150103594158e-06, "loss": 0.1903, "step": 7588 }, { "epoch": 0.61, "grad_norm": 1.324396296583796, "learning_rate": 3.530262717733248e-06, "loss": 0.19, "step": 7589 }, { "epoch": 0.61, "grad_norm": 1.1603638911680982, "learning_rate": 3.5290244981928323e-06, "loss": 0.1544, "step": 7590 }, { "epoch": 0.61, "grad_norm": 1.245711260333263, "learning_rate": 3.527786377403459e-06, "loss": 0.1593, "step": 7591 }, { "epoch": 0.61, "grad_norm": 1.5162537477604785, "learning_rate": 3.5265483554482467e-06, "loss": 0.181, "step": 7592 }, { "epoch": 0.61, "grad_norm": 1.3329470001493515, "learning_rate": 3.525310432410309e-06, "loss": 0.1617, "step": 7593 }, { "epoch": 0.61, "grad_norm": 1.4462918183988256, "learning_rate": 3.5240726083727496e-06, "loss": 0.1754, "step": 7594 }, { "epoch": 0.61, "grad_norm": 1.2569823386119239, "learning_rate": 3.5228348834186663e-06, "loss": 0.1661, "step": 7595 }, { "epoch": 0.61, "grad_norm": 1.4367291315088795, "learning_rate": 3.521597257631155e-06, "loss": 0.2202, "step": 7596 }, { "epoch": 0.61, "grad_norm": 1.3726751461783724, "learning_rate": 3.520359731093298e-06, "loss": 0.1998, "step": 7597 }, { "epoch": 0.61, "grad_norm": 1.3912977858240747, "learning_rate": 3.519122303888176e-06, "loss": 0.2047, "step": 7598 }, { "epoch": 0.61, "grad_norm": 1.4015214203826476, "learning_rate": 3.5178849760988593e-06, "loss": 0.1694, "step": 7599 }, { "epoch": 0.61, "grad_norm": 1.3991201624490694, "learning_rate": 3.516647747808417e-06, "loss": 0.2459, "step": 7600 }, { "epoch": 0.61, "grad_norm": 1.2774890632509242, "learning_rate": 3.5154106190999036e-06, "loss": 0.1465, "step": 7601 }, { "epoch": 0.61, "grad_norm": 1.3324939861498035, "learning_rate": 3.5141735900563746e-06, "loss": 0.1719, "step": 7602 }, { "epoch": 0.61, "grad_norm": 1.3542774066196974, "learning_rate": 3.512936660760875e-06, "loss": 0.1955, "step": 7603 }, { "epoch": 0.61, "grad_norm": 11.307975947363177, "learning_rate": 3.5116998312964413e-06, "loss": 0.659, "step": 7604 }, { "epoch": 0.61, "grad_norm": 1.2935988722347063, "learning_rate": 3.510463101746109e-06, "loss": 0.2074, "step": 7605 }, { "epoch": 0.61, "grad_norm": 1.266181534546647, "learning_rate": 3.509226472192899e-06, "loss": 0.1909, "step": 7606 }, { "epoch": 0.61, "grad_norm": 6.2111987523200485, "learning_rate": 3.507989942719837e-06, "loss": 0.6301, "step": 7607 }, { "epoch": 0.61, "grad_norm": 6.178419033978148, "learning_rate": 3.5067535134099274e-06, "loss": 0.6075, "step": 7608 }, { "epoch": 0.61, "grad_norm": 1.3089199409041692, "learning_rate": 3.50551718434618e-06, "loss": 0.175, "step": 7609 }, { "epoch": 0.61, "grad_norm": 5.833102505061196, "learning_rate": 3.504280955611593e-06, "loss": 0.54, "step": 7610 }, { "epoch": 0.61, "grad_norm": 1.363730638232593, "learning_rate": 3.5030448272891553e-06, "loss": 0.1575, "step": 7611 }, { "epoch": 0.61, "grad_norm": 1.4370250673821596, "learning_rate": 3.5018087994618556e-06, "loss": 0.2055, "step": 7612 }, { "epoch": 0.61, "grad_norm": 1.4068002437763976, "learning_rate": 3.5005728722126686e-06, "loss": 0.1842, "step": 7613 }, { "epoch": 0.61, "grad_norm": 1.3511516319963588, "learning_rate": 3.49933704562457e-06, "loss": 0.2344, "step": 7614 }, { "epoch": 0.61, "grad_norm": 6.962411198775959, "learning_rate": 3.4981013197805208e-06, "loss": 0.3796, "step": 7615 }, { "epoch": 0.61, "grad_norm": 1.5168025411515509, "learning_rate": 3.4968656947634814e-06, "loss": 0.1945, "step": 7616 }, { "epoch": 0.61, "grad_norm": 13.672894166039077, "learning_rate": 3.495630170656401e-06, "loss": 0.6423, "step": 7617 }, { "epoch": 0.61, "grad_norm": 1.4326468776112502, "learning_rate": 3.4943947475422278e-06, "loss": 0.1993, "step": 7618 }, { "epoch": 0.61, "grad_norm": 1.3149488398469633, "learning_rate": 3.493159425503896e-06, "loss": 0.2069, "step": 7619 }, { "epoch": 0.61, "grad_norm": 1.4174205893387537, "learning_rate": 3.491924204624336e-06, "loss": 0.1712, "step": 7620 }, { "epoch": 0.61, "grad_norm": 1.3653752078130281, "learning_rate": 3.490689084986475e-06, "loss": 0.1848, "step": 7621 }, { "epoch": 0.61, "grad_norm": 5.93548743011966, "learning_rate": 3.489454066673228e-06, "loss": 0.5816, "step": 7622 }, { "epoch": 0.61, "grad_norm": 1.159499819188324, "learning_rate": 3.4882191497675065e-06, "loss": 0.1582, "step": 7623 }, { "epoch": 0.61, "grad_norm": 1.2767827020845832, "learning_rate": 3.4869843343522123e-06, "loss": 0.1686, "step": 7624 }, { "epoch": 0.61, "grad_norm": 1.37048136204285, "learning_rate": 3.4857496205102475e-06, "loss": 0.209, "step": 7625 }, { "epoch": 0.61, "grad_norm": 1.3891180972239856, "learning_rate": 3.4845150083244963e-06, "loss": 0.2104, "step": 7626 }, { "epoch": 0.61, "grad_norm": 1.2515538765774992, "learning_rate": 3.4832804978778447e-06, "loss": 0.183, "step": 7627 }, { "epoch": 0.61, "grad_norm": 1.3235646726261303, "learning_rate": 3.4820460892531706e-06, "loss": 0.1473, "step": 7628 }, { "epoch": 0.61, "grad_norm": 1.3708491025148604, "learning_rate": 3.480811782533339e-06, "loss": 0.2017, "step": 7629 }, { "epoch": 0.61, "grad_norm": 1.273397435985446, "learning_rate": 3.4795775778012165e-06, "loss": 0.1494, "step": 7630 }, { "epoch": 0.61, "grad_norm": 7.921175869396291, "learning_rate": 3.4783434751396567e-06, "loss": 0.5103, "step": 7631 }, { "epoch": 0.61, "grad_norm": 1.2883991178882805, "learning_rate": 3.4771094746315126e-06, "loss": 0.1798, "step": 7632 }, { "epoch": 0.61, "grad_norm": 1.3906837367068652, "learning_rate": 3.4758755763596197e-06, "loss": 0.1867, "step": 7633 }, { "epoch": 0.61, "grad_norm": 1.5226721301884454, "learning_rate": 3.474641780406821e-06, "loss": 0.1836, "step": 7634 }, { "epoch": 0.61, "grad_norm": 1.2205031281529022, "learning_rate": 3.473408086855939e-06, "loss": 0.1892, "step": 7635 }, { "epoch": 0.61, "grad_norm": 1.3446611788610965, "learning_rate": 3.4721744957897984e-06, "loss": 0.2019, "step": 7636 }, { "epoch": 0.61, "grad_norm": 1.3964940878261913, "learning_rate": 3.470941007291214e-06, "loss": 0.1809, "step": 7637 }, { "epoch": 0.61, "grad_norm": 1.427049912357395, "learning_rate": 3.469707621442991e-06, "loss": 0.1649, "step": 7638 }, { "epoch": 0.61, "grad_norm": 1.4225924599064432, "learning_rate": 3.468474338327933e-06, "loss": 0.2261, "step": 7639 }, { "epoch": 0.61, "grad_norm": 1.2929281936854873, "learning_rate": 3.4672411580288313e-06, "loss": 0.1748, "step": 7640 }, { "epoch": 0.61, "grad_norm": 6.66907069235532, "learning_rate": 3.466008080628478e-06, "loss": 0.6368, "step": 7641 }, { "epoch": 0.61, "grad_norm": 1.312003020561713, "learning_rate": 3.464775106209647e-06, "loss": 0.1764, "step": 7642 }, { "epoch": 0.61, "grad_norm": 5.868388268494398, "learning_rate": 3.463542234855116e-06, "loss": 0.5721, "step": 7643 }, { "epoch": 0.61, "grad_norm": 1.321817824598701, "learning_rate": 3.4623094666476487e-06, "loss": 0.1845, "step": 7644 }, { "epoch": 0.61, "grad_norm": 1.129538034585738, "learning_rate": 3.461076801670008e-06, "loss": 0.1349, "step": 7645 }, { "epoch": 0.61, "grad_norm": 1.2563461149416881, "learning_rate": 3.4598442400049436e-06, "loss": 0.2118, "step": 7646 }, { "epoch": 0.61, "grad_norm": 1.2342879626180465, "learning_rate": 3.458611781735201e-06, "loss": 0.1692, "step": 7647 }, { "epoch": 0.61, "grad_norm": 1.1630122494249018, "learning_rate": 3.4573794269435213e-06, "loss": 0.16, "step": 7648 }, { "epoch": 0.61, "grad_norm": 1.461939952654303, "learning_rate": 3.456147175712633e-06, "loss": 0.2348, "step": 7649 }, { "epoch": 0.61, "grad_norm": 1.4328513030543346, "learning_rate": 3.4549150281252635e-06, "loss": 0.193, "step": 7650 }, { "epoch": 0.61, "grad_norm": 1.3540324226204616, "learning_rate": 3.4536829842641283e-06, "loss": 0.2145, "step": 7651 }, { "epoch": 0.61, "grad_norm": 8.611858139845058, "learning_rate": 3.452451044211942e-06, "loss": 0.5789, "step": 7652 }, { "epoch": 0.61, "grad_norm": 5.235855693672821, "learning_rate": 3.4512192080514034e-06, "loss": 0.5535, "step": 7653 }, { "epoch": 0.61, "grad_norm": 1.3627137730320358, "learning_rate": 3.449987475865213e-06, "loss": 0.1726, "step": 7654 }, { "epoch": 0.61, "grad_norm": 1.3406922542141064, "learning_rate": 3.448755847736062e-06, "loss": 0.1482, "step": 7655 }, { "epoch": 0.61, "grad_norm": 1.4493548693777634, "learning_rate": 3.447524323746627e-06, "loss": 0.1662, "step": 7656 }, { "epoch": 0.61, "grad_norm": 1.3488234622088493, "learning_rate": 3.44629290397959e-06, "loss": 0.1826, "step": 7657 }, { "epoch": 0.61, "grad_norm": 1.3300626906485307, "learning_rate": 3.4450615885176166e-06, "loss": 0.2215, "step": 7658 }, { "epoch": 0.61, "grad_norm": 1.605805248608052, "learning_rate": 3.4438303774433726e-06, "loss": 0.2204, "step": 7659 }, { "epoch": 0.61, "grad_norm": 1.13786421682388, "learning_rate": 3.442599270839508e-06, "loss": 0.1601, "step": 7660 }, { "epoch": 0.61, "grad_norm": 1.4071404716846476, "learning_rate": 3.441368268788674e-06, "loss": 0.1972, "step": 7661 }, { "epoch": 0.61, "grad_norm": 1.2391649788741828, "learning_rate": 3.44013737137351e-06, "loss": 0.2007, "step": 7662 }, { "epoch": 0.61, "grad_norm": 1.2652779391049336, "learning_rate": 3.4389065786766516e-06, "loss": 0.1821, "step": 7663 }, { "epoch": 0.61, "grad_norm": 1.3730957516895934, "learning_rate": 3.437675890780724e-06, "loss": 0.176, "step": 7664 }, { "epoch": 0.61, "grad_norm": 14.907094636467663, "learning_rate": 3.436445307768347e-06, "loss": 0.5789, "step": 7665 }, { "epoch": 0.61, "grad_norm": 1.4595203884817498, "learning_rate": 3.4352148297221355e-06, "loss": 0.2077, "step": 7666 }, { "epoch": 0.61, "grad_norm": 1.3858247353886801, "learning_rate": 3.4339844567246918e-06, "loss": 0.1401, "step": 7667 }, { "epoch": 0.61, "grad_norm": 1.331750757497658, "learning_rate": 3.4327541888586168e-06, "loss": 0.1857, "step": 7668 }, { "epoch": 0.61, "grad_norm": 1.4545698433165115, "learning_rate": 3.431524026206501e-06, "loss": 0.1835, "step": 7669 }, { "epoch": 0.61, "grad_norm": 1.4580999643554522, "learning_rate": 3.430293968850931e-06, "loss": 0.1555, "step": 7670 }, { "epoch": 0.61, "grad_norm": 1.3779169160989622, "learning_rate": 3.4290640168744826e-06, "loss": 0.2026, "step": 7671 }, { "epoch": 0.61, "grad_norm": 6.938520070855153, "learning_rate": 3.4278341703597253e-06, "loss": 0.6658, "step": 7672 }, { "epoch": 0.61, "grad_norm": 1.5223876222983108, "learning_rate": 3.4266044293892254e-06, "loss": 0.2085, "step": 7673 }, { "epoch": 0.61, "grad_norm": 1.312150468697119, "learning_rate": 3.425374794045536e-06, "loss": 0.1794, "step": 7674 }, { "epoch": 0.61, "grad_norm": 1.1899787047623551, "learning_rate": 3.4241452644112085e-06, "loss": 0.1765, "step": 7675 }, { "epoch": 0.61, "grad_norm": 1.3446414265389965, "learning_rate": 3.4229158405687824e-06, "loss": 0.2066, "step": 7676 }, { "epoch": 0.61, "grad_norm": 1.4684917839439975, "learning_rate": 3.4216865226007956e-06, "loss": 0.1669, "step": 7677 }, { "epoch": 0.61, "grad_norm": 1.4924335066369387, "learning_rate": 3.420457310589774e-06, "loss": 0.2005, "step": 7678 }, { "epoch": 0.61, "grad_norm": 1.224297504529469, "learning_rate": 3.41922820461824e-06, "loss": 0.1585, "step": 7679 }, { "epoch": 0.61, "grad_norm": 1.3101762361023874, "learning_rate": 3.417999204768706e-06, "loss": 0.1978, "step": 7680 }, { "epoch": 0.61, "grad_norm": 1.4138713086705448, "learning_rate": 3.416770311123677e-06, "loss": 0.2163, "step": 7681 }, { "epoch": 0.61, "grad_norm": 1.34388490539271, "learning_rate": 3.4155415237656573e-06, "loss": 0.1603, "step": 7682 }, { "epoch": 0.61, "grad_norm": 1.33357589681583, "learning_rate": 3.4143128427771327e-06, "loss": 0.1781, "step": 7683 }, { "epoch": 0.61, "grad_norm": 1.3801021835872518, "learning_rate": 3.413084268240593e-06, "loss": 0.1795, "step": 7684 }, { "epoch": 0.61, "grad_norm": 1.5027114688289296, "learning_rate": 3.4118558002385127e-06, "loss": 0.1846, "step": 7685 }, { "epoch": 0.61, "grad_norm": 1.4440702629590831, "learning_rate": 3.4106274388533677e-06, "loss": 0.176, "step": 7686 }, { "epoch": 0.61, "grad_norm": 1.3353566814125521, "learning_rate": 3.4093991841676155e-06, "loss": 0.1743, "step": 7687 }, { "epoch": 0.62, "grad_norm": 1.2163258164733546, "learning_rate": 3.4081710362637167e-06, "loss": 0.1359, "step": 7688 }, { "epoch": 0.62, "grad_norm": 1.2251374318458983, "learning_rate": 3.4069429952241196e-06, "loss": 0.1703, "step": 7689 }, { "epoch": 0.62, "grad_norm": 1.4283081048042512, "learning_rate": 3.4057150611312644e-06, "loss": 0.182, "step": 7690 }, { "epoch": 0.62, "grad_norm": 1.4760255211680222, "learning_rate": 3.404487234067589e-06, "loss": 0.2401, "step": 7691 }, { "epoch": 0.62, "grad_norm": 1.2494232795423872, "learning_rate": 3.4032595141155183e-06, "loss": 0.175, "step": 7692 }, { "epoch": 0.62, "grad_norm": 1.3604982892370947, "learning_rate": 3.4020319013574765e-06, "loss": 0.1725, "step": 7693 }, { "epoch": 0.62, "grad_norm": 1.341118165255663, "learning_rate": 3.400804395875873e-06, "loss": 0.1509, "step": 7694 }, { "epoch": 0.62, "grad_norm": 1.492104364674769, "learning_rate": 3.399576997753117e-06, "loss": 0.2353, "step": 7695 }, { "epoch": 0.62, "grad_norm": 1.2745162264618315, "learning_rate": 3.398349707071604e-06, "loss": 0.208, "step": 7696 }, { "epoch": 0.62, "grad_norm": 1.2720977657707615, "learning_rate": 3.397122523913732e-06, "loss": 0.1508, "step": 7697 }, { "epoch": 0.62, "grad_norm": 1.3838446990349442, "learning_rate": 3.3958954483618787e-06, "loss": 0.1731, "step": 7698 }, { "epoch": 0.62, "grad_norm": 1.3683112633740533, "learning_rate": 3.394668480498424e-06, "loss": 0.1798, "step": 7699 }, { "epoch": 0.62, "grad_norm": 1.3225059137619923, "learning_rate": 3.3934416204057396e-06, "loss": 0.1735, "step": 7700 }, { "epoch": 0.62, "grad_norm": 1.4857631690953512, "learning_rate": 3.392214868166186e-06, "loss": 0.1877, "step": 7701 }, { "epoch": 0.62, "grad_norm": 1.4729268937410531, "learning_rate": 3.3909882238621194e-06, "loss": 0.1771, "step": 7702 }, { "epoch": 0.62, "grad_norm": 1.4064951237762462, "learning_rate": 3.389761687575888e-06, "loss": 0.1787, "step": 7703 }, { "epoch": 0.62, "grad_norm": 5.415383611665988, "learning_rate": 3.388535259389836e-06, "loss": 0.5764, "step": 7704 }, { "epoch": 0.62, "grad_norm": 1.352028177375627, "learning_rate": 3.387308939386291e-06, "loss": 0.1423, "step": 7705 }, { "epoch": 0.62, "grad_norm": 1.1279000827610544, "learning_rate": 3.386082727647586e-06, "loss": 0.1291, "step": 7706 }, { "epoch": 0.62, "grad_norm": 6.536461377366811, "learning_rate": 3.3848566242560375e-06, "loss": 0.6163, "step": 7707 }, { "epoch": 0.62, "grad_norm": 1.501787021073541, "learning_rate": 3.3836306292939557e-06, "loss": 0.1833, "step": 7708 }, { "epoch": 0.62, "grad_norm": 12.615404589841992, "learning_rate": 3.382404742843648e-06, "loss": 0.4738, "step": 7709 }, { "epoch": 0.62, "grad_norm": 7.192450974711765, "learning_rate": 3.38117896498741e-06, "loss": 0.6507, "step": 7710 }, { "epoch": 0.62, "grad_norm": 1.3267504834250634, "learning_rate": 3.3799532958075353e-06, "loss": 0.1815, "step": 7711 }, { "epoch": 0.62, "grad_norm": 1.3311650267560347, "learning_rate": 3.378727735386302e-06, "loss": 0.1678, "step": 7712 }, { "epoch": 0.62, "grad_norm": 1.5202942941280007, "learning_rate": 3.377502283805989e-06, "loss": 0.1754, "step": 7713 }, { "epoch": 0.62, "grad_norm": 1.2640521743395985, "learning_rate": 3.376276941148863e-06, "loss": 0.1567, "step": 7714 }, { "epoch": 0.62, "grad_norm": 11.461272207425104, "learning_rate": 3.375051707497187e-06, "loss": 0.4622, "step": 7715 }, { "epoch": 0.62, "grad_norm": 1.4444112970232195, "learning_rate": 3.373826582933213e-06, "loss": 0.1896, "step": 7716 }, { "epoch": 0.62, "grad_norm": 1.3759555178134941, "learning_rate": 3.3726015675391864e-06, "loss": 0.1558, "step": 7717 }, { "epoch": 0.62, "grad_norm": 1.6195306744055968, "learning_rate": 3.371376661397349e-06, "loss": 0.1983, "step": 7718 }, { "epoch": 0.62, "grad_norm": 1.637523838995361, "learning_rate": 3.370151864589928e-06, "loss": 0.2058, "step": 7719 }, { "epoch": 0.62, "grad_norm": 1.3481068474569262, "learning_rate": 3.368927177199154e-06, "loss": 0.1615, "step": 7720 }, { "epoch": 0.62, "grad_norm": 1.4378463529080978, "learning_rate": 3.367702599307239e-06, "loss": 0.1949, "step": 7721 }, { "epoch": 0.62, "grad_norm": 1.4169852129334608, "learning_rate": 3.366478130996394e-06, "loss": 0.2023, "step": 7722 }, { "epoch": 0.62, "grad_norm": 1.269424226763755, "learning_rate": 3.365253772348821e-06, "loss": 0.1729, "step": 7723 }, { "epoch": 0.62, "grad_norm": 1.3385725195025544, "learning_rate": 3.3640295234467167e-06, "loss": 0.182, "step": 7724 }, { "epoch": 0.62, "grad_norm": 1.242560393136787, "learning_rate": 3.3628053843722674e-06, "loss": 0.1487, "step": 7725 }, { "epoch": 0.62, "grad_norm": 1.3922206540484354, "learning_rate": 3.3615813552076513e-06, "loss": 0.1884, "step": 7726 }, { "epoch": 0.62, "grad_norm": 1.4277988751449666, "learning_rate": 3.3603574360350454e-06, "loss": 0.1825, "step": 7727 }, { "epoch": 0.62, "grad_norm": 1.3209581766965965, "learning_rate": 3.359133626936611e-06, "loss": 0.182, "step": 7728 }, { "epoch": 0.62, "grad_norm": 12.484666441966619, "learning_rate": 3.3579099279945083e-06, "loss": 0.7144, "step": 7729 }, { "epoch": 0.62, "grad_norm": 1.3261838838735356, "learning_rate": 3.3566863392908864e-06, "loss": 0.1851, "step": 7730 }, { "epoch": 0.62, "grad_norm": 1.502429668614384, "learning_rate": 3.355462860907892e-06, "loss": 0.2346, "step": 7731 }, { "epoch": 0.62, "grad_norm": 1.3985378017109573, "learning_rate": 3.354239492927658e-06, "loss": 0.1883, "step": 7732 }, { "epoch": 0.62, "grad_norm": 1.5973159535836983, "learning_rate": 3.3530162354323113e-06, "loss": 0.1749, "step": 7733 }, { "epoch": 0.62, "grad_norm": 9.3514475249712, "learning_rate": 3.351793088503977e-06, "loss": 0.4768, "step": 7734 }, { "epoch": 0.62, "grad_norm": 5.7001347967437965, "learning_rate": 3.3505700522247652e-06, "loss": 0.5179, "step": 7735 }, { "epoch": 0.62, "grad_norm": 1.3680083301584514, "learning_rate": 3.349347126676784e-06, "loss": 0.1973, "step": 7736 }, { "epoch": 0.62, "grad_norm": 1.3763767039578554, "learning_rate": 3.34812431194213e-06, "loss": 0.2082, "step": 7737 }, { "epoch": 0.62, "grad_norm": 1.2190872441625846, "learning_rate": 3.346901608102899e-06, "loss": 0.1366, "step": 7738 }, { "epoch": 0.62, "grad_norm": 1.3634348471065028, "learning_rate": 3.3456790152411687e-06, "loss": 0.2048, "step": 7739 }, { "epoch": 0.62, "grad_norm": 1.345356500562698, "learning_rate": 3.34445653343902e-06, "loss": 0.1634, "step": 7740 }, { "epoch": 0.62, "grad_norm": 1.4082196968512268, "learning_rate": 3.3432341627785216e-06, "loss": 0.2276, "step": 7741 }, { "epoch": 0.62, "grad_norm": 1.515889426489952, "learning_rate": 3.342011903341731e-06, "loss": 0.1796, "step": 7742 }, { "epoch": 0.62, "grad_norm": 1.226804421674503, "learning_rate": 3.340789755210706e-06, "loss": 0.1494, "step": 7743 }, { "epoch": 0.62, "grad_norm": 1.3031227965647956, "learning_rate": 3.339567718467491e-06, "loss": 0.1708, "step": 7744 }, { "epoch": 0.62, "grad_norm": 1.3784855369885873, "learning_rate": 3.3383457931941275e-06, "loss": 0.1652, "step": 7745 }, { "epoch": 0.62, "grad_norm": 1.384856629314098, "learning_rate": 3.337123979472644e-06, "loss": 0.1563, "step": 7746 }, { "epoch": 0.62, "grad_norm": 1.22030422619057, "learning_rate": 3.3359022773850673e-06, "loss": 0.1685, "step": 7747 }, { "epoch": 0.62, "grad_norm": 1.531771514871892, "learning_rate": 3.334680687013412e-06, "loss": 0.245, "step": 7748 }, { "epoch": 0.62, "grad_norm": 1.2510684197019721, "learning_rate": 3.3334592084396886e-06, "loss": 0.1488, "step": 7749 }, { "epoch": 0.62, "grad_norm": 1.3728412774049734, "learning_rate": 3.3322378417458985e-06, "loss": 0.1792, "step": 7750 }, { "epoch": 0.62, "grad_norm": 1.5609031167471188, "learning_rate": 3.3310165870140333e-06, "loss": 0.2277, "step": 7751 }, { "epoch": 0.62, "grad_norm": 1.46860824450199, "learning_rate": 3.329795444326084e-06, "loss": 0.1731, "step": 7752 }, { "epoch": 0.62, "grad_norm": 1.1880995044353495, "learning_rate": 3.328574413764024e-06, "loss": 0.1669, "step": 7753 }, { "epoch": 0.62, "grad_norm": 1.3035460095575666, "learning_rate": 3.3273534954098298e-06, "loss": 0.172, "step": 7754 }, { "epoch": 0.62, "grad_norm": 1.4116346485182145, "learning_rate": 3.3261326893454617e-06, "loss": 0.2273, "step": 7755 }, { "epoch": 0.62, "grad_norm": 1.323658549401525, "learning_rate": 3.3249119956528804e-06, "loss": 0.1861, "step": 7756 }, { "epoch": 0.62, "grad_norm": 1.5599893933014164, "learning_rate": 3.3236914144140297e-06, "loss": 0.2074, "step": 7757 }, { "epoch": 0.62, "grad_norm": 1.2284783631430836, "learning_rate": 3.3224709457108545e-06, "loss": 0.1788, "step": 7758 }, { "epoch": 0.62, "grad_norm": 1.2170113560535942, "learning_rate": 3.3212505896252887e-06, "loss": 0.1938, "step": 7759 }, { "epoch": 0.62, "grad_norm": 1.3725682407425117, "learning_rate": 3.3200303462392548e-06, "loss": 0.1853, "step": 7760 }, { "epoch": 0.62, "grad_norm": 1.3996863564165063, "learning_rate": 3.318810215634675e-06, "loss": 0.1887, "step": 7761 }, { "epoch": 0.62, "grad_norm": 1.4758313997933659, "learning_rate": 3.317590197893458e-06, "loss": 0.1805, "step": 7762 }, { "epoch": 0.62, "grad_norm": 22.01601098908984, "learning_rate": 3.31637029309751e-06, "loss": 0.5699, "step": 7763 }, { "epoch": 0.62, "grad_norm": 1.4469828343136375, "learning_rate": 3.3151505013287245e-06, "loss": 0.1887, "step": 7764 }, { "epoch": 0.62, "grad_norm": 1.421835163108374, "learning_rate": 3.313930822668992e-06, "loss": 0.1942, "step": 7765 }, { "epoch": 0.62, "grad_norm": 1.5270922145613641, "learning_rate": 3.312711257200191e-06, "loss": 0.1919, "step": 7766 }, { "epoch": 0.62, "grad_norm": 1.3394326876416576, "learning_rate": 3.3114918050041956e-06, "loss": 0.1793, "step": 7767 }, { "epoch": 0.62, "grad_norm": 1.3330306915356975, "learning_rate": 3.3102724661628727e-06, "loss": 0.2077, "step": 7768 }, { "epoch": 0.62, "grad_norm": 1.4173312056534677, "learning_rate": 3.3090532407580767e-06, "loss": 0.1844, "step": 7769 }, { "epoch": 0.62, "grad_norm": 1.3595523899259356, "learning_rate": 3.307834128871661e-06, "loss": 0.1966, "step": 7770 }, { "epoch": 0.62, "grad_norm": 7.124063701109471, "learning_rate": 3.3066151305854665e-06, "loss": 0.5644, "step": 7771 }, { "epoch": 0.62, "grad_norm": 1.375166845040154, "learning_rate": 3.305396245981331e-06, "loss": 0.2014, "step": 7772 }, { "epoch": 0.62, "grad_norm": 1.2475913257964744, "learning_rate": 3.304177475141078e-06, "loss": 0.1362, "step": 7773 }, { "epoch": 0.62, "grad_norm": 1.4014181548955384, "learning_rate": 3.30295881814653e-06, "loss": 0.1846, "step": 7774 }, { "epoch": 0.62, "grad_norm": 1.3482336749955173, "learning_rate": 3.3017402750794976e-06, "loss": 0.1955, "step": 7775 }, { "epoch": 0.62, "grad_norm": 1.1905663697529167, "learning_rate": 3.300521846021788e-06, "loss": 0.1731, "step": 7776 }, { "epoch": 0.62, "grad_norm": 1.4089279381462456, "learning_rate": 3.2993035310551945e-06, "loss": 0.2009, "step": 7777 }, { "epoch": 0.62, "grad_norm": 1.2138497644044441, "learning_rate": 3.298085330261508e-06, "loss": 0.1436, "step": 7778 }, { "epoch": 0.62, "grad_norm": 1.4767676287058094, "learning_rate": 3.2968672437225115e-06, "loss": 0.1858, "step": 7779 }, { "epoch": 0.62, "grad_norm": 1.3205353358612255, "learning_rate": 3.2956492715199744e-06, "loss": 0.1824, "step": 7780 }, { "epoch": 0.62, "grad_norm": 1.2895950792824262, "learning_rate": 3.294431413735667e-06, "loss": 0.1989, "step": 7781 }, { "epoch": 0.62, "grad_norm": 1.443016719972543, "learning_rate": 3.293213670451346e-06, "loss": 0.2092, "step": 7782 }, { "epoch": 0.62, "grad_norm": 1.4186463782525403, "learning_rate": 3.2919960417487643e-06, "loss": 0.2003, "step": 7783 }, { "epoch": 0.62, "grad_norm": 1.3304624205199254, "learning_rate": 3.2907785277096616e-06, "loss": 0.1587, "step": 7784 }, { "epoch": 0.62, "grad_norm": 1.1785842702036096, "learning_rate": 3.2895611284157757e-06, "loss": 0.1338, "step": 7785 }, { "epoch": 0.62, "grad_norm": 1.1763513998631734, "learning_rate": 3.288343843948835e-06, "loss": 0.1818, "step": 7786 }, { "epoch": 0.62, "grad_norm": 1.4085880984414645, "learning_rate": 3.287126674390556e-06, "loss": 0.1879, "step": 7787 }, { "epoch": 0.62, "grad_norm": 1.4839166910308192, "learning_rate": 3.285909619822655e-06, "loss": 0.1811, "step": 7788 }, { "epoch": 0.62, "grad_norm": 1.2913595765556065, "learning_rate": 3.2846926803268325e-06, "loss": 0.1584, "step": 7789 }, { "epoch": 0.62, "grad_norm": 1.3162434867545292, "learning_rate": 3.2834758559847903e-06, "loss": 0.2202, "step": 7790 }, { "epoch": 0.62, "grad_norm": 7.493748746273879, "learning_rate": 3.2822591468782115e-06, "loss": 0.6513, "step": 7791 }, { "epoch": 0.62, "grad_norm": 1.656035272661837, "learning_rate": 3.2810425530887836e-06, "loss": 0.217, "step": 7792 }, { "epoch": 0.62, "grad_norm": 1.3094612398965175, "learning_rate": 3.279826074698177e-06, "loss": 0.2148, "step": 7793 }, { "epoch": 0.62, "grad_norm": 1.4038220647668633, "learning_rate": 3.2786097117880566e-06, "loss": 0.1677, "step": 7794 }, { "epoch": 0.62, "grad_norm": 1.4030254942476899, "learning_rate": 3.2773934644400825e-06, "loss": 0.1965, "step": 7795 }, { "epoch": 0.62, "grad_norm": 1.328350866961088, "learning_rate": 3.276177332735903e-06, "loss": 0.1945, "step": 7796 }, { "epoch": 0.62, "grad_norm": 1.3654663453642728, "learning_rate": 3.2749613167571647e-06, "loss": 0.1848, "step": 7797 }, { "epoch": 0.62, "grad_norm": 1.3191245026600775, "learning_rate": 3.2737454165854976e-06, "loss": 0.1872, "step": 7798 }, { "epoch": 0.62, "grad_norm": 1.462070637370341, "learning_rate": 3.2725296323025326e-06, "loss": 0.2282, "step": 7799 }, { "epoch": 0.62, "grad_norm": 1.4968293239609527, "learning_rate": 3.271313963989886e-06, "loss": 0.2511, "step": 7800 }, { "epoch": 0.62, "grad_norm": 1.2311298454863742, "learning_rate": 3.2700984117291722e-06, "loss": 0.2021, "step": 7801 }, { "epoch": 0.62, "grad_norm": 1.3079270525733746, "learning_rate": 3.2688829756019934e-06, "loss": 0.1938, "step": 7802 }, { "epoch": 0.62, "grad_norm": 7.205557791342672, "learning_rate": 3.267667655689944e-06, "loss": 0.5393, "step": 7803 }, { "epoch": 0.62, "grad_norm": 1.416263705880692, "learning_rate": 3.2664524520746154e-06, "loss": 0.1645, "step": 7804 }, { "epoch": 0.62, "grad_norm": 1.3744361023492, "learning_rate": 3.2652373648375836e-06, "loss": 0.1935, "step": 7805 }, { "epoch": 0.62, "grad_norm": 1.319110613616149, "learning_rate": 3.2640223940604266e-06, "loss": 0.1962, "step": 7806 }, { "epoch": 0.62, "grad_norm": 1.2476514942331776, "learning_rate": 3.2628075398247037e-06, "loss": 0.1504, "step": 7807 }, { "epoch": 0.62, "grad_norm": 1.316200756428, "learning_rate": 3.2615928022119757e-06, "loss": 0.1839, "step": 7808 }, { "epoch": 0.62, "grad_norm": 1.371596941761974, "learning_rate": 3.260378181303788e-06, "loss": 0.1492, "step": 7809 }, { "epoch": 0.62, "grad_norm": 1.2817443162043318, "learning_rate": 3.259163677181687e-06, "loss": 0.1679, "step": 7810 }, { "epoch": 0.62, "grad_norm": 1.1158090261561235, "learning_rate": 3.2579492899272014e-06, "loss": 0.1249, "step": 7811 }, { "epoch": 0.62, "grad_norm": 10.583192836582663, "learning_rate": 3.256735019621857e-06, "loss": 0.454, "step": 7812 }, { "epoch": 0.63, "grad_norm": 1.3117492656924534, "learning_rate": 3.2555208663471748e-06, "loss": 0.1576, "step": 7813 }, { "epoch": 0.63, "grad_norm": 1.3093809825661626, "learning_rate": 3.2543068301846607e-06, "loss": 0.1974, "step": 7814 }, { "epoch": 0.63, "grad_norm": 13.076801601266826, "learning_rate": 3.2530929112158194e-06, "loss": 0.5858, "step": 7815 }, { "epoch": 0.63, "grad_norm": 1.4272500568790647, "learning_rate": 3.2518791095221425e-06, "loss": 0.1625, "step": 7816 }, { "epoch": 0.63, "grad_norm": 1.391157088025085, "learning_rate": 3.2506654251851194e-06, "loss": 0.2138, "step": 7817 }, { "epoch": 0.63, "grad_norm": 1.4036951424243067, "learning_rate": 3.249451858286225e-06, "loss": 0.225, "step": 7818 }, { "epoch": 0.63, "grad_norm": 1.6447430793369968, "learning_rate": 3.248238408906932e-06, "loss": 0.172, "step": 7819 }, { "epoch": 0.63, "grad_norm": 1.4464535407033532, "learning_rate": 3.247025077128704e-06, "loss": 0.1967, "step": 7820 }, { "epoch": 0.63, "grad_norm": 1.3394565615373315, "learning_rate": 3.2458118630329905e-06, "loss": 0.1889, "step": 7821 }, { "epoch": 0.63, "grad_norm": 1.3283642111956842, "learning_rate": 3.2445987667012435e-06, "loss": 0.1775, "step": 7822 }, { "epoch": 0.63, "grad_norm": 1.5320664837364364, "learning_rate": 3.243385788214899e-06, "loss": 0.2146, "step": 7823 }, { "epoch": 0.63, "grad_norm": 1.2869894925539433, "learning_rate": 3.2421729276553903e-06, "loss": 0.1629, "step": 7824 }, { "epoch": 0.63, "grad_norm": 1.6769634201741217, "learning_rate": 3.240960185104137e-06, "loss": 0.2091, "step": 7825 }, { "epoch": 0.63, "grad_norm": 1.3527731199345756, "learning_rate": 3.2397475606425577e-06, "loss": 0.1864, "step": 7826 }, { "epoch": 0.63, "grad_norm": 1.3256152073090754, "learning_rate": 3.2385350543520564e-06, "loss": 0.1744, "step": 7827 }, { "epoch": 0.63, "grad_norm": 1.4980477140988482, "learning_rate": 3.237322666314037e-06, "loss": 0.1979, "step": 7828 }, { "epoch": 0.63, "grad_norm": 1.2977104675572346, "learning_rate": 3.2361103966098856e-06, "loss": 0.1751, "step": 7829 }, { "epoch": 0.63, "grad_norm": 1.1910525961959622, "learning_rate": 3.234898245320987e-06, "loss": 0.1441, "step": 7830 }, { "epoch": 0.63, "grad_norm": 1.4361172315887394, "learning_rate": 3.2336862125287193e-06, "loss": 0.193, "step": 7831 }, { "epoch": 0.63, "grad_norm": 5.730218691043036, "learning_rate": 3.2324742983144463e-06, "loss": 0.585, "step": 7832 }, { "epoch": 0.63, "grad_norm": 1.351932024338407, "learning_rate": 3.231262502759529e-06, "loss": 0.1879, "step": 7833 }, { "epoch": 0.63, "grad_norm": 1.3642289781127743, "learning_rate": 3.230050825945319e-06, "loss": 0.2124, "step": 7834 }, { "epoch": 0.63, "grad_norm": 1.340660273633863, "learning_rate": 3.2288392679531612e-06, "loss": 0.1688, "step": 7835 }, { "epoch": 0.63, "grad_norm": 1.3850406138284692, "learning_rate": 3.227627828864388e-06, "loss": 0.1662, "step": 7836 }, { "epoch": 0.63, "grad_norm": 1.397425834733644, "learning_rate": 3.2264165087603293e-06, "loss": 0.1761, "step": 7837 }, { "epoch": 0.63, "grad_norm": 1.3937390422650353, "learning_rate": 3.2252053077223056e-06, "loss": 0.1889, "step": 7838 }, { "epoch": 0.63, "grad_norm": 1.2166233662075885, "learning_rate": 3.2239942258316247e-06, "loss": 0.1465, "step": 7839 }, { "epoch": 0.63, "grad_norm": 1.420200911465506, "learning_rate": 3.2227832631695936e-06, "loss": 0.1882, "step": 7840 }, { "epoch": 0.63, "grad_norm": 1.3424886021171207, "learning_rate": 3.2215724198175056e-06, "loss": 0.202, "step": 7841 }, { "epoch": 0.63, "grad_norm": 1.6242838269570241, "learning_rate": 3.220361695856652e-06, "loss": 0.1786, "step": 7842 }, { "epoch": 0.63, "grad_norm": 1.2401779048018964, "learning_rate": 3.219151091368308e-06, "loss": 0.1675, "step": 7843 }, { "epoch": 0.63, "grad_norm": 1.4277772454644777, "learning_rate": 3.217940606433747e-06, "loss": 0.2226, "step": 7844 }, { "epoch": 0.63, "grad_norm": 1.2406971436276348, "learning_rate": 3.2167302411342322e-06, "loss": 0.1784, "step": 7845 }, { "epoch": 0.63, "grad_norm": 1.2155890972610872, "learning_rate": 3.2155199955510214e-06, "loss": 0.1533, "step": 7846 }, { "epoch": 0.63, "grad_norm": 1.285633685865284, "learning_rate": 3.2143098697653586e-06, "loss": 0.1747, "step": 7847 }, { "epoch": 0.63, "grad_norm": 1.3749982491877086, "learning_rate": 3.2130998638584842e-06, "loss": 0.201, "step": 7848 }, { "epoch": 0.63, "grad_norm": 1.5525914416059414, "learning_rate": 3.2118899779116307e-06, "loss": 0.2492, "step": 7849 }, { "epoch": 0.63, "grad_norm": 1.2461663559532454, "learning_rate": 3.2106802120060197e-06, "loss": 0.1727, "step": 7850 }, { "epoch": 0.63, "grad_norm": 1.5308293375182658, "learning_rate": 3.2094705662228697e-06, "loss": 0.2049, "step": 7851 }, { "epoch": 0.63, "grad_norm": 1.1509735500015763, "learning_rate": 3.2082610406433838e-06, "loss": 0.1432, "step": 7852 }, { "epoch": 0.63, "grad_norm": 7.438699278371207, "learning_rate": 3.2070516353487634e-06, "loss": 0.7067, "step": 7853 }, { "epoch": 0.63, "grad_norm": 1.1952676313598005, "learning_rate": 3.2058423504202007e-06, "loss": 0.1743, "step": 7854 }, { "epoch": 0.63, "grad_norm": 7.136077856187923, "learning_rate": 3.2046331859388757e-06, "loss": 0.5843, "step": 7855 }, { "epoch": 0.63, "grad_norm": 1.2460555574057655, "learning_rate": 3.2034241419859645e-06, "loss": 0.166, "step": 7856 }, { "epoch": 0.63, "grad_norm": 4.622753746961374, "learning_rate": 3.2022152186426337e-06, "loss": 0.5816, "step": 7857 }, { "epoch": 0.63, "grad_norm": 4.3953046945051435, "learning_rate": 3.201006415990044e-06, "loss": 0.4799, "step": 7858 }, { "epoch": 0.63, "grad_norm": 1.4801755667147358, "learning_rate": 3.199797734109342e-06, "loss": 0.1765, "step": 7859 }, { "epoch": 0.63, "grad_norm": 1.508675854023696, "learning_rate": 3.198589173081674e-06, "loss": 0.1513, "step": 7860 }, { "epoch": 0.63, "grad_norm": 1.2101298017208768, "learning_rate": 3.1973807329881724e-06, "loss": 0.152, "step": 7861 }, { "epoch": 0.63, "grad_norm": 1.4796869854528971, "learning_rate": 3.196172413909965e-06, "loss": 0.179, "step": 7862 }, { "epoch": 0.63, "grad_norm": 1.3940533757989904, "learning_rate": 3.1949642159281683e-06, "loss": 0.1805, "step": 7863 }, { "epoch": 0.63, "grad_norm": 8.724050690088736, "learning_rate": 3.1937561391238918e-06, "loss": 0.4812, "step": 7864 }, { "epoch": 0.63, "grad_norm": 1.2971570030071555, "learning_rate": 3.19254818357824e-06, "loss": 0.1542, "step": 7865 }, { "epoch": 0.63, "grad_norm": 1.1366325056736128, "learning_rate": 3.191340349372304e-06, "loss": 0.1251, "step": 7866 }, { "epoch": 0.63, "grad_norm": 1.3500883088843927, "learning_rate": 3.19013263658717e-06, "loss": 0.1559, "step": 7867 }, { "epoch": 0.63, "grad_norm": 1.2692844803483487, "learning_rate": 3.1889250453039156e-06, "loss": 0.177, "step": 7868 }, { "epoch": 0.63, "grad_norm": 1.293474769002433, "learning_rate": 3.187717575603612e-06, "loss": 0.1718, "step": 7869 }, { "epoch": 0.63, "grad_norm": 1.4909055622686322, "learning_rate": 3.1865102275673167e-06, "loss": 0.1882, "step": 7870 }, { "epoch": 0.63, "grad_norm": 1.1882880789279795, "learning_rate": 3.1853030012760853e-06, "loss": 0.1579, "step": 7871 }, { "epoch": 0.63, "grad_norm": 1.228916074395396, "learning_rate": 3.184095896810963e-06, "loss": 0.1559, "step": 7872 }, { "epoch": 0.63, "grad_norm": 13.673248565033386, "learning_rate": 3.1828889142529828e-06, "loss": 0.504, "step": 7873 }, { "epoch": 0.63, "grad_norm": 1.441706467128113, "learning_rate": 3.1816820536831774e-06, "loss": 0.2082, "step": 7874 }, { "epoch": 0.63, "grad_norm": 1.3555205795658416, "learning_rate": 3.180475315182563e-06, "loss": 0.1974, "step": 7875 }, { "epoch": 0.63, "grad_norm": 1.351168669725019, "learning_rate": 3.179268698832155e-06, "loss": 0.1864, "step": 7876 }, { "epoch": 0.63, "grad_norm": 1.3693316013945411, "learning_rate": 3.178062204712955e-06, "loss": 0.1546, "step": 7877 }, { "epoch": 0.63, "grad_norm": 1.3564658330938701, "learning_rate": 3.1768558329059605e-06, "loss": 0.2183, "step": 7878 }, { "epoch": 0.63, "grad_norm": 1.3597714213269423, "learning_rate": 3.1756495834921563e-06, "loss": 0.1803, "step": 7879 }, { "epoch": 0.63, "grad_norm": 1.4989735460200675, "learning_rate": 3.1744434565525252e-06, "loss": 0.2109, "step": 7880 }, { "epoch": 0.63, "grad_norm": 1.3761517824161937, "learning_rate": 3.1732374521680355e-06, "loss": 0.1527, "step": 7881 }, { "epoch": 0.63, "grad_norm": 4.98422907051574, "learning_rate": 3.172031570419649e-06, "loss": 0.6473, "step": 7882 }, { "epoch": 0.63, "grad_norm": 1.7494956667675856, "learning_rate": 3.170825811388324e-06, "loss": 0.1552, "step": 7883 }, { "epoch": 0.63, "grad_norm": 1.536085026547384, "learning_rate": 3.169620175155002e-06, "loss": 0.2053, "step": 7884 }, { "epoch": 0.63, "grad_norm": 1.3288692813015224, "learning_rate": 3.168414661800625e-06, "loss": 0.1478, "step": 7885 }, { "epoch": 0.63, "grad_norm": 1.4485212484820296, "learning_rate": 3.16720927140612e-06, "loss": 0.2265, "step": 7886 }, { "epoch": 0.63, "grad_norm": 9.155445287583118, "learning_rate": 3.1660040040524122e-06, "loss": 0.4887, "step": 7887 }, { "epoch": 0.63, "grad_norm": 1.2289088055151425, "learning_rate": 3.164798859820408e-06, "loss": 0.184, "step": 7888 }, { "epoch": 0.63, "grad_norm": 1.3349459061874227, "learning_rate": 3.1635938387910216e-06, "loss": 0.1835, "step": 7889 }, { "epoch": 0.63, "grad_norm": 1.3655323058446471, "learning_rate": 3.1623889410451435e-06, "loss": 0.1718, "step": 7890 }, { "epoch": 0.63, "grad_norm": 1.5648082694013254, "learning_rate": 3.161184166663661e-06, "loss": 0.1794, "step": 7891 }, { "epoch": 0.63, "grad_norm": 11.219835770567885, "learning_rate": 3.1599795157274604e-06, "loss": 0.6247, "step": 7892 }, { "epoch": 0.63, "grad_norm": 1.3802950873605506, "learning_rate": 3.1587749883174074e-06, "loss": 0.2174, "step": 7893 }, { "epoch": 0.63, "grad_norm": 7.867841308632259, "learning_rate": 3.157570584514369e-06, "loss": 0.4619, "step": 7894 }, { "epoch": 0.63, "grad_norm": 1.546799712327459, "learning_rate": 3.1563663043991987e-06, "loss": 0.2003, "step": 7895 }, { "epoch": 0.63, "grad_norm": 26.439785755216093, "learning_rate": 3.1551621480527466e-06, "loss": 0.7484, "step": 7896 }, { "epoch": 0.63, "grad_norm": 1.242211273511225, "learning_rate": 3.1539581155558467e-06, "loss": 0.1469, "step": 7897 }, { "epoch": 0.63, "grad_norm": 1.5403378121062878, "learning_rate": 3.1527542069893334e-06, "loss": 0.233, "step": 7898 }, { "epoch": 0.63, "grad_norm": 1.336525950504161, "learning_rate": 3.1515504224340277e-06, "loss": 0.1391, "step": 7899 }, { "epoch": 0.63, "grad_norm": 1.558556340081549, "learning_rate": 3.1503467619707407e-06, "loss": 0.2176, "step": 7900 }, { "epoch": 0.63, "grad_norm": 1.457297069163006, "learning_rate": 3.149143225680281e-06, "loss": 0.2003, "step": 7901 }, { "epoch": 0.63, "grad_norm": 1.5429458923521044, "learning_rate": 3.1479398136434438e-06, "loss": 0.1997, "step": 7902 }, { "epoch": 0.63, "grad_norm": 1.2540375996622262, "learning_rate": 3.1467365259410194e-06, "loss": 0.2265, "step": 7903 }, { "epoch": 0.63, "grad_norm": 7.038069062383274, "learning_rate": 3.1455333626537852e-06, "loss": 0.6149, "step": 7904 }, { "epoch": 0.63, "grad_norm": 1.2581951508839013, "learning_rate": 3.1443303238625172e-06, "loss": 0.1698, "step": 7905 }, { "epoch": 0.63, "grad_norm": 1.336066610476519, "learning_rate": 3.143127409647976e-06, "loss": 0.1822, "step": 7906 }, { "epoch": 0.63, "grad_norm": 1.2870040444365258, "learning_rate": 3.141924620090919e-06, "loss": 0.149, "step": 7907 }, { "epoch": 0.63, "grad_norm": 1.626078311840451, "learning_rate": 3.1407219552720915e-06, "loss": 0.2213, "step": 7908 }, { "epoch": 0.63, "grad_norm": 1.4622793258906668, "learning_rate": 3.139519415272231e-06, "loss": 0.2009, "step": 7909 }, { "epoch": 0.63, "grad_norm": 1.4444155569498092, "learning_rate": 3.138317000172072e-06, "loss": 0.2155, "step": 7910 }, { "epoch": 0.63, "grad_norm": 1.3096067830056106, "learning_rate": 3.1371147100523308e-06, "loss": 0.199, "step": 7911 }, { "epoch": 0.63, "grad_norm": 1.464231902271683, "learning_rate": 3.1359125449937243e-06, "loss": 0.2315, "step": 7912 }, { "epoch": 0.63, "grad_norm": 6.9267884060269225, "learning_rate": 3.1347105050769565e-06, "loss": 0.6076, "step": 7913 }, { "epoch": 0.63, "grad_norm": 1.2765696927219845, "learning_rate": 3.1335085903827257e-06, "loss": 0.1666, "step": 7914 }, { "epoch": 0.63, "grad_norm": 1.3095728531874564, "learning_rate": 3.1323068009917174e-06, "loss": 0.1678, "step": 7915 }, { "epoch": 0.63, "grad_norm": 1.4075681281647845, "learning_rate": 3.1311051369846114e-06, "loss": 0.1688, "step": 7916 }, { "epoch": 0.63, "grad_norm": 7.497622412455353, "learning_rate": 3.129903598442082e-06, "loss": 0.3725, "step": 7917 }, { "epoch": 0.63, "grad_norm": 1.2897731598339204, "learning_rate": 3.1287021854447884e-06, "loss": 0.1541, "step": 7918 }, { "epoch": 0.63, "grad_norm": 5.054746550074537, "learning_rate": 3.127500898073388e-06, "loss": 0.5173, "step": 7919 }, { "epoch": 0.63, "grad_norm": 1.3866776097092954, "learning_rate": 3.1262997364085248e-06, "loss": 0.2185, "step": 7920 }, { "epoch": 0.63, "grad_norm": 1.2419760877543193, "learning_rate": 3.125098700530839e-06, "loss": 0.1753, "step": 7921 }, { "epoch": 0.63, "grad_norm": 1.3698172289136854, "learning_rate": 3.1238977905209554e-06, "loss": 0.1551, "step": 7922 }, { "epoch": 0.63, "grad_norm": 1.225018726144095, "learning_rate": 3.1226970064595e-06, "loss": 0.1676, "step": 7923 }, { "epoch": 0.63, "grad_norm": 1.2920389546834399, "learning_rate": 3.121496348427083e-06, "loss": 0.2073, "step": 7924 }, { "epoch": 0.63, "grad_norm": 1.5462429014092947, "learning_rate": 3.1202958165043053e-06, "loss": 0.1737, "step": 7925 }, { "epoch": 0.63, "grad_norm": 14.685746266884983, "learning_rate": 3.1190954107717664e-06, "loss": 0.631, "step": 7926 }, { "epoch": 0.63, "grad_norm": 1.218413056739077, "learning_rate": 3.11789513131005e-06, "loss": 0.1368, "step": 7927 }, { "epoch": 0.63, "grad_norm": 1.4745941208586935, "learning_rate": 3.116694978199738e-06, "loss": 0.2253, "step": 7928 }, { "epoch": 0.63, "grad_norm": 1.3242431806499277, "learning_rate": 3.115494951521397e-06, "loss": 0.1991, "step": 7929 }, { "epoch": 0.63, "grad_norm": 1.2534074074076318, "learning_rate": 3.1142950513555903e-06, "loss": 0.1852, "step": 7930 }, { "epoch": 0.63, "grad_norm": 1.3691204058955233, "learning_rate": 3.1130952777828694e-06, "loss": 0.207, "step": 7931 }, { "epoch": 0.63, "grad_norm": 7.058957440022268, "learning_rate": 3.11189563088378e-06, "loss": 0.6252, "step": 7932 }, { "epoch": 0.63, "grad_norm": 1.409825452184253, "learning_rate": 3.110696110738859e-06, "loss": 0.2144, "step": 7933 }, { "epoch": 0.63, "grad_norm": 1.277516986678831, "learning_rate": 3.1094967174286307e-06, "loss": 0.1613, "step": 7934 }, { "epoch": 0.63, "grad_norm": 1.4242585071223857, "learning_rate": 3.1082974510336163e-06, "loss": 0.2122, "step": 7935 }, { "epoch": 0.63, "grad_norm": 1.1411302891522008, "learning_rate": 3.1070983116343245e-06, "loss": 0.1467, "step": 7936 }, { "epoch": 0.63, "grad_norm": 1.3535875281186098, "learning_rate": 3.105899299311261e-06, "loss": 0.1639, "step": 7937 }, { "epoch": 0.64, "grad_norm": 1.356475780365062, "learning_rate": 3.1047004141449143e-06, "loss": 0.1883, "step": 7938 }, { "epoch": 0.64, "grad_norm": 1.4580495081435039, "learning_rate": 3.1035016562157716e-06, "loss": 0.1804, "step": 7939 }, { "epoch": 0.64, "grad_norm": 1.4036354169981848, "learning_rate": 3.1023030256043087e-06, "loss": 0.19, "step": 7940 }, { "epoch": 0.64, "grad_norm": 1.4519009655631587, "learning_rate": 3.1011045223909954e-06, "loss": 0.1669, "step": 7941 }, { "epoch": 0.64, "grad_norm": 1.2164440799792453, "learning_rate": 3.0999061466562873e-06, "loss": 0.1642, "step": 7942 }, { "epoch": 0.64, "grad_norm": 1.1890463668990345, "learning_rate": 3.0987078984806363e-06, "loss": 0.173, "step": 7943 }, { "epoch": 0.64, "grad_norm": 1.2026975896469725, "learning_rate": 3.0975097779444864e-06, "loss": 0.1992, "step": 7944 }, { "epoch": 0.64, "grad_norm": 1.3387900797436894, "learning_rate": 3.0963117851282677e-06, "loss": 0.19, "step": 7945 }, { "epoch": 0.64, "grad_norm": 1.2187033439473896, "learning_rate": 3.0951139201124075e-06, "loss": 0.1553, "step": 7946 }, { "epoch": 0.64, "grad_norm": 1.299014180477559, "learning_rate": 3.0939161829773208e-06, "loss": 0.1965, "step": 7947 }, { "epoch": 0.64, "grad_norm": 1.3636786086259918, "learning_rate": 3.0927185738034175e-06, "loss": 0.22, "step": 7948 }, { "epoch": 0.64, "grad_norm": 1.3841461213661053, "learning_rate": 3.091521092671094e-06, "loss": 0.1616, "step": 7949 }, { "epoch": 0.64, "grad_norm": 1.1866380015993783, "learning_rate": 3.090323739660742e-06, "loss": 0.1506, "step": 7950 }, { "epoch": 0.64, "grad_norm": 1.3102686206110632, "learning_rate": 3.089126514852745e-06, "loss": 0.1965, "step": 7951 }, { "epoch": 0.64, "grad_norm": 1.245647558296622, "learning_rate": 3.087929418327472e-06, "loss": 0.1584, "step": 7952 }, { "epoch": 0.64, "grad_norm": 1.1954272025574186, "learning_rate": 3.0867324501652923e-06, "loss": 0.16, "step": 7953 }, { "epoch": 0.64, "grad_norm": 1.2078550701523807, "learning_rate": 3.0855356104465593e-06, "loss": 0.1825, "step": 7954 }, { "epoch": 0.64, "grad_norm": 1.213482010562243, "learning_rate": 3.084338899251623e-06, "loss": 0.1552, "step": 7955 }, { "epoch": 0.64, "grad_norm": 1.4544353136242343, "learning_rate": 3.0831423166608186e-06, "loss": 0.1773, "step": 7956 }, { "epoch": 0.64, "grad_norm": 7.226658074350585, "learning_rate": 3.0819458627544796e-06, "loss": 0.5129, "step": 7957 }, { "epoch": 0.64, "grad_norm": 1.4342336246594842, "learning_rate": 3.080749537612925e-06, "loss": 0.2105, "step": 7958 }, { "epoch": 0.64, "grad_norm": 1.2493829783418786, "learning_rate": 3.079553341316471e-06, "loss": 0.161, "step": 7959 }, { "epoch": 0.64, "grad_norm": 1.2888975380250858, "learning_rate": 3.078357273945419e-06, "loss": 0.1542, "step": 7960 }, { "epoch": 0.64, "grad_norm": 1.2491285654903272, "learning_rate": 3.0771613355800647e-06, "loss": 0.1838, "step": 7961 }, { "epoch": 0.64, "grad_norm": 1.3019263927742257, "learning_rate": 3.0759655263006983e-06, "loss": 0.1773, "step": 7962 }, { "epoch": 0.64, "grad_norm": 1.4576355004291175, "learning_rate": 3.074769846187593e-06, "loss": 0.2294, "step": 7963 }, { "epoch": 0.64, "grad_norm": 1.1938537892672316, "learning_rate": 3.0735742953210222e-06, "loss": 0.1472, "step": 7964 }, { "epoch": 0.64, "grad_norm": 1.0785292728841107, "learning_rate": 3.072378873781245e-06, "loss": 0.1483, "step": 7965 }, { "epoch": 0.64, "grad_norm": 1.5566486230054253, "learning_rate": 3.0711835816485163e-06, "loss": 0.1867, "step": 7966 }, { "epoch": 0.64, "grad_norm": 1.1769989579443552, "learning_rate": 3.069988419003076e-06, "loss": 0.1665, "step": 7967 }, { "epoch": 0.64, "grad_norm": 1.6003297593823855, "learning_rate": 3.0687933859251618e-06, "loss": 0.2034, "step": 7968 }, { "epoch": 0.64, "grad_norm": 1.3034821184664414, "learning_rate": 3.067598482495e-06, "loss": 0.1403, "step": 7969 }, { "epoch": 0.64, "grad_norm": 7.17609484037814, "learning_rate": 3.066403708792805e-06, "loss": 0.5155, "step": 7970 }, { "epoch": 0.64, "grad_norm": 1.339265253243504, "learning_rate": 3.0652090648987885e-06, "loss": 0.196, "step": 7971 }, { "epoch": 0.64, "grad_norm": 1.386467679292854, "learning_rate": 3.0640145508931487e-06, "loss": 0.1867, "step": 7972 }, { "epoch": 0.64, "grad_norm": 1.3769171864786163, "learning_rate": 3.06282016685608e-06, "loss": 0.1639, "step": 7973 }, { "epoch": 0.64, "grad_norm": 1.246791308558666, "learning_rate": 3.0616259128677594e-06, "loss": 0.1659, "step": 7974 }, { "epoch": 0.64, "grad_norm": 1.2241482141651574, "learning_rate": 3.060431789008368e-06, "loss": 0.1673, "step": 7975 }, { "epoch": 0.64, "grad_norm": 1.4337045249299292, "learning_rate": 3.0592377953580652e-06, "loss": 0.2072, "step": 7976 }, { "epoch": 0.64, "grad_norm": 1.36498907173208, "learning_rate": 3.0580439319970103e-06, "loss": 0.1924, "step": 7977 }, { "epoch": 0.64, "grad_norm": 1.352640548386208, "learning_rate": 3.0568501990053513e-06, "loss": 0.1781, "step": 7978 }, { "epoch": 0.64, "grad_norm": 13.881520506931842, "learning_rate": 3.0556565964632236e-06, "loss": 0.645, "step": 7979 }, { "epoch": 0.64, "grad_norm": 1.075007046271416, "learning_rate": 3.0544631244507607e-06, "loss": 0.1296, "step": 7980 }, { "epoch": 0.64, "grad_norm": 6.851927678669725, "learning_rate": 3.0532697830480816e-06, "loss": 0.6398, "step": 7981 }, { "epoch": 0.64, "grad_norm": 1.3839623298678445, "learning_rate": 3.0520765723353036e-06, "loss": 0.1745, "step": 7982 }, { "epoch": 0.64, "grad_norm": 1.316093053741538, "learning_rate": 3.0508834923925245e-06, "loss": 0.1902, "step": 7983 }, { "epoch": 0.64, "grad_norm": 1.583808859957966, "learning_rate": 3.049690543299843e-06, "loss": 0.2251, "step": 7984 }, { "epoch": 0.64, "grad_norm": 1.260040471732798, "learning_rate": 3.0484977251373458e-06, "loss": 0.1664, "step": 7985 }, { "epoch": 0.64, "grad_norm": 1.3872466776814742, "learning_rate": 3.047305037985107e-06, "loss": 0.1842, "step": 7986 }, { "epoch": 0.64, "grad_norm": 1.472579694627104, "learning_rate": 3.0461124819231984e-06, "loss": 0.1951, "step": 7987 }, { "epoch": 0.64, "grad_norm": 1.4264483729058504, "learning_rate": 3.0449200570316783e-06, "loss": 0.2034, "step": 7988 }, { "epoch": 0.64, "grad_norm": 1.2885910413014916, "learning_rate": 3.0437277633906006e-06, "loss": 0.1921, "step": 7989 }, { "epoch": 0.64, "grad_norm": 7.147170209707625, "learning_rate": 3.0425356010800022e-06, "loss": 0.5489, "step": 7990 }, { "epoch": 0.64, "grad_norm": 1.8506478967544588, "learning_rate": 3.041343570179922e-06, "loss": 0.202, "step": 7991 }, { "epoch": 0.64, "grad_norm": 1.4062582976594764, "learning_rate": 3.0401516707703802e-06, "loss": 0.2036, "step": 7992 }, { "epoch": 0.64, "grad_norm": 5.461011709558017, "learning_rate": 3.0389599029313983e-06, "loss": 0.5216, "step": 7993 }, { "epoch": 0.64, "grad_norm": 1.365670652398302, "learning_rate": 3.037768266742978e-06, "loss": 0.1933, "step": 7994 }, { "epoch": 0.64, "grad_norm": 1.2913508927494168, "learning_rate": 3.036576762285118e-06, "loss": 0.1956, "step": 7995 }, { "epoch": 0.64, "grad_norm": 1.4455988966274207, "learning_rate": 3.0353853896378116e-06, "loss": 0.1993, "step": 7996 }, { "epoch": 0.64, "grad_norm": 1.2546018677158817, "learning_rate": 3.0341941488810346e-06, "loss": 0.178, "step": 7997 }, { "epoch": 0.64, "grad_norm": 4.897786235426149, "learning_rate": 3.0330030400947606e-06, "loss": 0.4207, "step": 7998 }, { "epoch": 0.64, "grad_norm": 1.3039938817751673, "learning_rate": 3.031812063358952e-06, "loss": 0.1504, "step": 7999 }, { "epoch": 0.64, "grad_norm": 1.4734481915874804, "learning_rate": 3.0306212187535653e-06, "loss": 0.181, "step": 8000 }, { "epoch": 0.64, "grad_norm": 1.416902362371618, "learning_rate": 3.0294305063585407e-06, "loss": 0.2144, "step": 8001 }, { "epoch": 0.64, "grad_norm": 1.4232988731374037, "learning_rate": 3.0282399262538175e-06, "loss": 0.1856, "step": 8002 }, { "epoch": 0.64, "grad_norm": 1.3555897918422068, "learning_rate": 3.027049478519324e-06, "loss": 0.2003, "step": 8003 }, { "epoch": 0.64, "grad_norm": 1.2731707083265038, "learning_rate": 3.0258591632349745e-06, "loss": 0.1585, "step": 8004 }, { "epoch": 0.64, "grad_norm": 1.3192928700010218, "learning_rate": 3.024668980480681e-06, "loss": 0.195, "step": 8005 }, { "epoch": 0.64, "grad_norm": 7.456842749092586, "learning_rate": 3.023478930336343e-06, "loss": 0.5053, "step": 8006 }, { "epoch": 0.64, "grad_norm": 1.2783613966556062, "learning_rate": 3.022289012881856e-06, "loss": 0.1729, "step": 8007 }, { "epoch": 0.64, "grad_norm": 1.3271185074953322, "learning_rate": 3.0210992281970973e-06, "loss": 0.1863, "step": 8008 }, { "epoch": 0.64, "grad_norm": 1.2119478069963066, "learning_rate": 3.0199095763619445e-06, "loss": 0.1634, "step": 8009 }, { "epoch": 0.64, "grad_norm": 1.5163075324953244, "learning_rate": 3.0187200574562605e-06, "loss": 0.2108, "step": 8010 }, { "epoch": 0.64, "grad_norm": 1.2282426128983028, "learning_rate": 3.017530671559903e-06, "loss": 0.1582, "step": 8011 }, { "epoch": 0.64, "grad_norm": 1.287490365919821, "learning_rate": 3.0163414187527174e-06, "loss": 0.183, "step": 8012 }, { "epoch": 0.64, "grad_norm": 1.4927378130290627, "learning_rate": 3.015152299114542e-06, "loss": 0.1944, "step": 8013 }, { "epoch": 0.64, "grad_norm": 1.4342657760444664, "learning_rate": 3.0139633127252086e-06, "loss": 0.2024, "step": 8014 }, { "epoch": 0.64, "grad_norm": 1.5665119269965095, "learning_rate": 3.0127744596645337e-06, "loss": 0.2572, "step": 8015 }, { "epoch": 0.64, "grad_norm": 1.2640150226807745, "learning_rate": 3.011585740012331e-06, "loss": 0.188, "step": 8016 }, { "epoch": 0.64, "grad_norm": 1.2337965305511025, "learning_rate": 3.010397153848401e-06, "loss": 0.1568, "step": 8017 }, { "epoch": 0.64, "grad_norm": 1.2444202683856274, "learning_rate": 3.00920870125254e-06, "loss": 0.1755, "step": 8018 }, { "epoch": 0.64, "grad_norm": 1.3839843708869037, "learning_rate": 3.0080203823045294e-06, "loss": 0.1605, "step": 8019 }, { "epoch": 0.64, "grad_norm": 1.0052052154318911, "learning_rate": 3.0068321970841484e-06, "loss": 0.1175, "step": 8020 }, { "epoch": 0.64, "grad_norm": 1.3013046054221236, "learning_rate": 3.0056441456711593e-06, "loss": 0.1732, "step": 8021 }, { "epoch": 0.64, "grad_norm": 1.524499557127757, "learning_rate": 3.004456228145321e-06, "loss": 0.1715, "step": 8022 }, { "epoch": 0.64, "grad_norm": 1.2546895391428723, "learning_rate": 3.003268444586384e-06, "loss": 0.1836, "step": 8023 }, { "epoch": 0.64, "grad_norm": 1.1781988702479556, "learning_rate": 3.0020807950740844e-06, "loss": 0.1254, "step": 8024 }, { "epoch": 0.64, "grad_norm": 1.5944806254334936, "learning_rate": 3.000893279688155e-06, "loss": 0.2104, "step": 8025 }, { "epoch": 0.64, "grad_norm": 1.3500123985762384, "learning_rate": 2.999705898508316e-06, "loss": 0.1877, "step": 8026 }, { "epoch": 0.64, "grad_norm": 1.2135488547060997, "learning_rate": 2.998518651614283e-06, "loss": 0.1864, "step": 8027 }, { "epoch": 0.64, "grad_norm": 1.4062597794056038, "learning_rate": 2.9973315390857547e-06, "loss": 0.1886, "step": 8028 }, { "epoch": 0.64, "grad_norm": 1.3934617473424993, "learning_rate": 2.99614456100243e-06, "loss": 0.1721, "step": 8029 }, { "epoch": 0.64, "grad_norm": 6.906684028431125, "learning_rate": 2.9949577174439926e-06, "loss": 0.5709, "step": 8030 }, { "epoch": 0.64, "grad_norm": 1.3390328777048515, "learning_rate": 2.993771008490117e-06, "loss": 0.186, "step": 8031 }, { "epoch": 0.64, "grad_norm": 1.226946132336187, "learning_rate": 2.9925844342204725e-06, "loss": 0.146, "step": 8032 }, { "epoch": 0.64, "grad_norm": 1.351549217559366, "learning_rate": 2.9913979947147166e-06, "loss": 0.1784, "step": 8033 }, { "epoch": 0.64, "grad_norm": 1.259593173882677, "learning_rate": 2.9902116900525012e-06, "loss": 0.1622, "step": 8034 }, { "epoch": 0.64, "grad_norm": 1.4140991981829438, "learning_rate": 2.9890255203134622e-06, "loss": 0.1991, "step": 8035 }, { "epoch": 0.64, "grad_norm": 1.2024614312362996, "learning_rate": 2.9878394855772343e-06, "loss": 0.1787, "step": 8036 }, { "epoch": 0.64, "grad_norm": 1.4269016367493343, "learning_rate": 2.9866535859234377e-06, "loss": 0.1427, "step": 8037 }, { "epoch": 0.64, "grad_norm": 1.3466872664107317, "learning_rate": 2.9854678214316875e-06, "loss": 0.1844, "step": 8038 }, { "epoch": 0.64, "grad_norm": 1.2845359808395693, "learning_rate": 2.984282192181586e-06, "loss": 0.1622, "step": 8039 }, { "epoch": 0.64, "grad_norm": 1.5712207498775685, "learning_rate": 2.983096698252726e-06, "loss": 0.2158, "step": 8040 }, { "epoch": 0.64, "grad_norm": 1.1640605388889678, "learning_rate": 2.9819113397246985e-06, "loss": 0.1705, "step": 8041 }, { "epoch": 0.64, "grad_norm": 1.2466452164168533, "learning_rate": 2.980726116677075e-06, "loss": 0.1823, "step": 8042 }, { "epoch": 0.64, "grad_norm": 1.376129183381196, "learning_rate": 2.9795410291894267e-06, "loss": 0.1786, "step": 8043 }, { "epoch": 0.64, "grad_norm": 1.3002618051340231, "learning_rate": 2.97835607734131e-06, "loss": 0.1786, "step": 8044 }, { "epoch": 0.64, "grad_norm": 1.3385294258781577, "learning_rate": 2.9771712612122765e-06, "loss": 0.1619, "step": 8045 }, { "epoch": 0.64, "grad_norm": 1.324770729323476, "learning_rate": 2.975986580881865e-06, "loss": 0.1803, "step": 8046 }, { "epoch": 0.64, "grad_norm": 1.1993692767461797, "learning_rate": 2.9748020364296054e-06, "loss": 0.1486, "step": 8047 }, { "epoch": 0.64, "grad_norm": 1.3557773116067577, "learning_rate": 2.973617627935023e-06, "loss": 0.1899, "step": 8048 }, { "epoch": 0.64, "grad_norm": 1.2914036560602702, "learning_rate": 2.9724333554776276e-06, "loss": 0.1857, "step": 8049 }, { "epoch": 0.64, "grad_norm": 1.3742001099535555, "learning_rate": 2.9712492191369245e-06, "loss": 0.1654, "step": 8050 }, { "epoch": 0.64, "grad_norm": 1.387449563862524, "learning_rate": 2.9700652189924075e-06, "loss": 0.1845, "step": 8051 }, { "epoch": 0.64, "grad_norm": 1.4989405709507886, "learning_rate": 2.968881355123565e-06, "loss": 0.1684, "step": 8052 }, { "epoch": 0.64, "grad_norm": 1.4434483826255395, "learning_rate": 2.967697627609869e-06, "loss": 0.2085, "step": 8053 }, { "epoch": 0.64, "grad_norm": 1.3887033350360027, "learning_rate": 2.96651403653079e-06, "loss": 0.1721, "step": 8054 }, { "epoch": 0.64, "grad_norm": 1.2300670224017218, "learning_rate": 2.965330581965786e-06, "loss": 0.1848, "step": 8055 }, { "epoch": 0.64, "grad_norm": 8.852549892498079, "learning_rate": 2.9641472639943025e-06, "loss": 0.6236, "step": 8056 }, { "epoch": 0.64, "grad_norm": 7.992211664224177, "learning_rate": 2.9629640826957827e-06, "loss": 0.6342, "step": 8057 }, { "epoch": 0.64, "grad_norm": 1.3452607159548737, "learning_rate": 2.961781038149656e-06, "loss": 0.1326, "step": 8058 }, { "epoch": 0.64, "grad_norm": 1.3716831958929907, "learning_rate": 2.9605981304353443e-06, "loss": 0.1663, "step": 8059 }, { "epoch": 0.64, "grad_norm": 1.3581235994240486, "learning_rate": 2.959415359632257e-06, "loss": 0.1741, "step": 8060 }, { "epoch": 0.64, "grad_norm": 1.1883025077721634, "learning_rate": 2.9582327258198016e-06, "loss": 0.1623, "step": 8061 }, { "epoch": 0.64, "grad_norm": 1.2389728084338965, "learning_rate": 2.957050229077368e-06, "loss": 0.1459, "step": 8062 }, { "epoch": 0.65, "grad_norm": 1.3688091674913991, "learning_rate": 2.9558678694843446e-06, "loss": 0.2218, "step": 8063 }, { "epoch": 0.65, "grad_norm": 1.3007065479293345, "learning_rate": 2.9546856471201046e-06, "loss": 0.2152, "step": 8064 }, { "epoch": 0.65, "grad_norm": 1.406598720099492, "learning_rate": 2.9535035620640117e-06, "loss": 0.2147, "step": 8065 }, { "epoch": 0.65, "grad_norm": 1.4860810740630899, "learning_rate": 2.952321614395427e-06, "loss": 0.2069, "step": 8066 }, { "epoch": 0.65, "grad_norm": 1.3962717353760248, "learning_rate": 2.9511398041936952e-06, "loss": 0.1952, "step": 8067 }, { "epoch": 0.65, "grad_norm": 1.3240819203873126, "learning_rate": 2.949958131538158e-06, "loss": 0.1792, "step": 8068 }, { "epoch": 0.65, "grad_norm": 1.3077646152931324, "learning_rate": 2.9487765965081417e-06, "loss": 0.1752, "step": 8069 }, { "epoch": 0.65, "grad_norm": 1.3380654715786018, "learning_rate": 2.9475951991829676e-06, "loss": 0.2152, "step": 8070 }, { "epoch": 0.65, "grad_norm": 1.4176775022314827, "learning_rate": 2.9464139396419457e-06, "loss": 0.1885, "step": 8071 }, { "epoch": 0.65, "grad_norm": 5.800748759644525, "learning_rate": 2.9452328179643797e-06, "loss": 0.4841, "step": 8072 }, { "epoch": 0.65, "grad_norm": 1.3836659358370496, "learning_rate": 2.9440518342295598e-06, "loss": 0.1716, "step": 8073 }, { "epoch": 0.65, "grad_norm": 1.4362636113371143, "learning_rate": 2.9428709885167685e-06, "loss": 0.1945, "step": 8074 }, { "epoch": 0.65, "grad_norm": 1.3339571131382026, "learning_rate": 2.9416902809052817e-06, "loss": 0.2269, "step": 8075 }, { "epoch": 0.65, "grad_norm": 1.2823320417334052, "learning_rate": 2.940509711474362e-06, "loss": 0.1585, "step": 8076 }, { "epoch": 0.65, "grad_norm": 1.4026145473441565, "learning_rate": 2.9393292803032656e-06, "loss": 0.1876, "step": 8077 }, { "epoch": 0.65, "grad_norm": 1.3952444802487651, "learning_rate": 2.9381489874712377e-06, "loss": 0.2413, "step": 8078 }, { "epoch": 0.65, "grad_norm": 1.4398183982341632, "learning_rate": 2.936968833057517e-06, "loss": 0.2085, "step": 8079 }, { "epoch": 0.65, "grad_norm": 1.3420214819739829, "learning_rate": 2.9357888171413273e-06, "loss": 0.1935, "step": 8080 }, { "epoch": 0.65, "grad_norm": 1.146966903763939, "learning_rate": 2.93460893980189e-06, "loss": 0.1621, "step": 8081 }, { "epoch": 0.65, "grad_norm": 1.242604526530489, "learning_rate": 2.9334292011184128e-06, "loss": 0.187, "step": 8082 }, { "epoch": 0.65, "grad_norm": 1.2556549049230001, "learning_rate": 2.932249601170094e-06, "loss": 0.1481, "step": 8083 }, { "epoch": 0.65, "grad_norm": 1.3982109169877928, "learning_rate": 2.931070140036124e-06, "loss": 0.2428, "step": 8084 }, { "epoch": 0.65, "grad_norm": 1.433095304380012, "learning_rate": 2.9298908177956843e-06, "loss": 0.1899, "step": 8085 }, { "epoch": 0.65, "grad_norm": 1.3907870019876247, "learning_rate": 2.9287116345279477e-06, "loss": 0.158, "step": 8086 }, { "epoch": 0.65, "grad_norm": 7.493594805341812, "learning_rate": 2.9275325903120734e-06, "loss": 0.6959, "step": 8087 }, { "epoch": 0.65, "grad_norm": 5.164188341112721, "learning_rate": 2.926353685227216e-06, "loss": 0.633, "step": 8088 }, { "epoch": 0.65, "grad_norm": 1.307978765465989, "learning_rate": 2.925174919352517e-06, "loss": 0.1716, "step": 8089 }, { "epoch": 0.65, "grad_norm": 1.3631015633830275, "learning_rate": 2.923996292767115e-06, "loss": 0.1801, "step": 8090 }, { "epoch": 0.65, "grad_norm": 1.2153432465393395, "learning_rate": 2.9228178055501313e-06, "loss": 0.1339, "step": 8091 }, { "epoch": 0.65, "grad_norm": 1.260359282636512, "learning_rate": 2.92163945778068e-06, "loss": 0.1572, "step": 8092 }, { "epoch": 0.65, "grad_norm": 1.218848672577065, "learning_rate": 2.9204612495378716e-06, "loss": 0.1929, "step": 8093 }, { "epoch": 0.65, "grad_norm": 1.377511796936783, "learning_rate": 2.919283180900798e-06, "loss": 0.1922, "step": 8094 }, { "epoch": 0.65, "grad_norm": 1.3042255967785155, "learning_rate": 2.9181052519485496e-06, "loss": 0.1649, "step": 8095 }, { "epoch": 0.65, "grad_norm": 1.4951667941057978, "learning_rate": 2.916927462760204e-06, "loss": 0.1738, "step": 8096 }, { "epoch": 0.65, "grad_norm": 1.370409239861674, "learning_rate": 2.9157498134148294e-06, "loss": 0.2086, "step": 8097 }, { "epoch": 0.65, "grad_norm": 8.652994563352957, "learning_rate": 2.9145723039914826e-06, "loss": 0.6323, "step": 8098 }, { "epoch": 0.65, "grad_norm": 1.3343953595466327, "learning_rate": 2.9133949345692192e-06, "loss": 0.1752, "step": 8099 }, { "epoch": 0.65, "grad_norm": 1.3668372892918503, "learning_rate": 2.912217705227075e-06, "loss": 0.1715, "step": 8100 }, { "epoch": 0.65, "grad_norm": 1.331016076162309, "learning_rate": 2.911040616044079e-06, "loss": 0.1725, "step": 8101 }, { "epoch": 0.65, "grad_norm": 1.2166038551640277, "learning_rate": 2.9098636670992603e-06, "loss": 0.1275, "step": 8102 }, { "epoch": 0.65, "grad_norm": 1.3553323170045553, "learning_rate": 2.908686858471622e-06, "loss": 0.1456, "step": 8103 }, { "epoch": 0.65, "grad_norm": 1.3762522583748829, "learning_rate": 2.9075101902401737e-06, "loss": 0.1327, "step": 8104 }, { "epoch": 0.65, "grad_norm": 1.455919710095317, "learning_rate": 2.9063336624839065e-06, "loss": 0.1996, "step": 8105 }, { "epoch": 0.65, "grad_norm": 1.559671321365754, "learning_rate": 2.9051572752818035e-06, "loss": 0.1798, "step": 8106 }, { "epoch": 0.65, "grad_norm": 1.3350050039751369, "learning_rate": 2.9039810287128408e-06, "loss": 0.1717, "step": 8107 }, { "epoch": 0.65, "grad_norm": 1.5037140018234578, "learning_rate": 2.9028049228559813e-06, "loss": 0.1948, "step": 8108 }, { "epoch": 0.65, "grad_norm": 1.3383606511889683, "learning_rate": 2.901628957790182e-06, "loss": 0.2034, "step": 8109 }, { "epoch": 0.65, "grad_norm": 1.5419290032589072, "learning_rate": 2.9004531335943865e-06, "loss": 0.1888, "step": 8110 }, { "epoch": 0.65, "grad_norm": 1.0135988291228506, "learning_rate": 2.8992774503475373e-06, "loss": 0.1167, "step": 8111 }, { "epoch": 0.65, "grad_norm": 1.22977531257775, "learning_rate": 2.8981019081285546e-06, "loss": 0.1707, "step": 8112 }, { "epoch": 0.65, "grad_norm": 1.4768854382936154, "learning_rate": 2.89692650701636e-06, "loss": 0.2437, "step": 8113 }, { "epoch": 0.65, "grad_norm": 1.2324231092387152, "learning_rate": 2.895751247089862e-06, "loss": 0.1577, "step": 8114 }, { "epoch": 0.65, "grad_norm": 1.3448048546626383, "learning_rate": 2.8945761284279583e-06, "loss": 0.2127, "step": 8115 }, { "epoch": 0.65, "grad_norm": 1.3054498520217608, "learning_rate": 2.8934011511095376e-06, "loss": 0.1833, "step": 8116 }, { "epoch": 0.65, "grad_norm": 1.1734087151844261, "learning_rate": 2.892226315213481e-06, "loss": 0.1632, "step": 8117 }, { "epoch": 0.65, "grad_norm": 1.4091496751575994, "learning_rate": 2.8910516208186578e-06, "loss": 0.2047, "step": 8118 }, { "epoch": 0.65, "grad_norm": 1.4823502947624343, "learning_rate": 2.88987706800393e-06, "loss": 0.24, "step": 8119 }, { "epoch": 0.65, "grad_norm": 8.821066701982557, "learning_rate": 2.888702656848147e-06, "loss": 0.5307, "step": 8120 }, { "epoch": 0.65, "grad_norm": 1.4653192839245515, "learning_rate": 2.88752838743015e-06, "loss": 0.2012, "step": 8121 }, { "epoch": 0.65, "grad_norm": 1.6209104197072142, "learning_rate": 2.8863542598287775e-06, "loss": 0.1911, "step": 8122 }, { "epoch": 0.65, "grad_norm": 1.4629892858105946, "learning_rate": 2.885180274122843e-06, "loss": 0.2104, "step": 8123 }, { "epoch": 0.65, "grad_norm": 1.5341898147874118, "learning_rate": 2.8840064303911662e-06, "loss": 0.2034, "step": 8124 }, { "epoch": 0.65, "grad_norm": 1.3724469970331425, "learning_rate": 2.882832728712551e-06, "loss": 0.1604, "step": 8125 }, { "epoch": 0.65, "grad_norm": 1.337702243188611, "learning_rate": 2.881659169165786e-06, "loss": 0.1573, "step": 8126 }, { "epoch": 0.65, "grad_norm": 1.26593516140244, "learning_rate": 2.88048575182966e-06, "loss": 0.134, "step": 8127 }, { "epoch": 0.65, "grad_norm": 1.1783578134638426, "learning_rate": 2.879312476782949e-06, "loss": 0.1731, "step": 8128 }, { "epoch": 0.65, "grad_norm": 1.216165217306844, "learning_rate": 2.878139344104416e-06, "loss": 0.1544, "step": 8129 }, { "epoch": 0.65, "grad_norm": 1.4986083017097345, "learning_rate": 2.8769663538728174e-06, "loss": 0.2236, "step": 8130 }, { "epoch": 0.65, "grad_norm": 1.6044003516611425, "learning_rate": 2.875793506166901e-06, "loss": 0.2208, "step": 8131 }, { "epoch": 0.65, "grad_norm": 1.266545184443366, "learning_rate": 2.8746208010654e-06, "loss": 0.1649, "step": 8132 }, { "epoch": 0.65, "grad_norm": 1.2636217835007633, "learning_rate": 2.8734482386470486e-06, "loss": 0.1585, "step": 8133 }, { "epoch": 0.65, "grad_norm": 1.3715053948714575, "learning_rate": 2.872275818990558e-06, "loss": 0.1613, "step": 8134 }, { "epoch": 0.65, "grad_norm": 1.2228003457887464, "learning_rate": 2.871103542174637e-06, "loss": 0.1515, "step": 8135 }, { "epoch": 0.65, "grad_norm": 1.612261722423232, "learning_rate": 2.8699314082779884e-06, "loss": 0.185, "step": 8136 }, { "epoch": 0.65, "grad_norm": 1.2198719025746336, "learning_rate": 2.868759417379295e-06, "loss": 0.1533, "step": 8137 }, { "epoch": 0.65, "grad_norm": 1.1465738644507792, "learning_rate": 2.867587569557241e-06, "loss": 0.1436, "step": 8138 }, { "epoch": 0.65, "grad_norm": 1.2981937212751469, "learning_rate": 2.866415864890494e-06, "loss": 0.1858, "step": 8139 }, { "epoch": 0.65, "grad_norm": 1.3482742108232029, "learning_rate": 2.865244303457715e-06, "loss": 0.1747, "step": 8140 }, { "epoch": 0.65, "grad_norm": 1.60813444855071, "learning_rate": 2.8640728853375545e-06, "loss": 0.1933, "step": 8141 }, { "epoch": 0.65, "grad_norm": 1.3532145849527302, "learning_rate": 2.8629016106086515e-06, "loss": 0.1921, "step": 8142 }, { "epoch": 0.65, "grad_norm": 1.4498115384857877, "learning_rate": 2.8617304793496393e-06, "loss": 0.1859, "step": 8143 }, { "epoch": 0.65, "grad_norm": 1.3016485857741171, "learning_rate": 2.8605594916391388e-06, "loss": 0.1865, "step": 8144 }, { "epoch": 0.65, "grad_norm": 1.1928587714221008, "learning_rate": 2.859388647555762e-06, "loss": 0.1497, "step": 8145 }, { "epoch": 0.65, "grad_norm": 9.82991062665454, "learning_rate": 2.8582179471781086e-06, "loss": 0.5546, "step": 8146 }, { "epoch": 0.65, "grad_norm": 1.2247550776926104, "learning_rate": 2.8570473905847764e-06, "loss": 0.1908, "step": 8147 }, { "epoch": 0.65, "grad_norm": 1.1224005157470753, "learning_rate": 2.855876977854345e-06, "loss": 0.1476, "step": 8148 }, { "epoch": 0.65, "grad_norm": 1.2108798789993558, "learning_rate": 2.854706709065389e-06, "loss": 0.1385, "step": 8149 }, { "epoch": 0.65, "grad_norm": 16.25821285418179, "learning_rate": 2.8535365842964713e-06, "loss": 0.4215, "step": 8150 }, { "epoch": 0.65, "grad_norm": 1.3856642732435283, "learning_rate": 2.8523666036261467e-06, "loss": 0.1813, "step": 8151 }, { "epoch": 0.65, "grad_norm": 1.3401070247680038, "learning_rate": 2.851196767132958e-06, "loss": 0.1782, "step": 8152 }, { "epoch": 0.65, "grad_norm": 1.508413431160601, "learning_rate": 2.8500270748954417e-06, "loss": 0.1966, "step": 8153 }, { "epoch": 0.65, "grad_norm": 8.72845395549302, "learning_rate": 2.8488575269921227e-06, "loss": 0.5491, "step": 8154 }, { "epoch": 0.65, "grad_norm": 1.4148164142055286, "learning_rate": 2.8476881235015126e-06, "loss": 0.1645, "step": 8155 }, { "epoch": 0.65, "grad_norm": 1.425671154122458, "learning_rate": 2.846518864502124e-06, "loss": 0.1921, "step": 8156 }, { "epoch": 0.65, "grad_norm": 1.278035632164817, "learning_rate": 2.8453497500724454e-06, "loss": 0.1646, "step": 8157 }, { "epoch": 0.65, "grad_norm": 1.3269113152611829, "learning_rate": 2.844180780290968e-06, "loss": 0.1378, "step": 8158 }, { "epoch": 0.65, "grad_norm": 1.401812305703745, "learning_rate": 2.8430119552361668e-06, "loss": 0.2111, "step": 8159 }, { "epoch": 0.65, "grad_norm": 1.38742239160579, "learning_rate": 2.841843274986509e-06, "loss": 0.1788, "step": 8160 }, { "epoch": 0.65, "grad_norm": 7.372275527640766, "learning_rate": 2.8406747396204505e-06, "loss": 0.6411, "step": 8161 }, { "epoch": 0.65, "grad_norm": 1.4888021117244161, "learning_rate": 2.8395063492164387e-06, "loss": 0.1985, "step": 8162 }, { "epoch": 0.65, "grad_norm": 1.3641315841153145, "learning_rate": 2.8383381038529125e-06, "loss": 0.1613, "step": 8163 }, { "epoch": 0.65, "grad_norm": 1.3375085842299907, "learning_rate": 2.8371700036082987e-06, "loss": 0.1795, "step": 8164 }, { "epoch": 0.65, "grad_norm": 1.312796766957599, "learning_rate": 2.8360020485610164e-06, "loss": 0.1685, "step": 8165 }, { "epoch": 0.65, "grad_norm": 1.362974841797472, "learning_rate": 2.83483423878947e-06, "loss": 0.1694, "step": 8166 }, { "epoch": 0.65, "grad_norm": 1.3134154835935457, "learning_rate": 2.8336665743720657e-06, "loss": 0.1918, "step": 8167 }, { "epoch": 0.65, "grad_norm": 1.4466690825315403, "learning_rate": 2.832499055387187e-06, "loss": 0.162, "step": 8168 }, { "epoch": 0.65, "grad_norm": 1.2437395164720058, "learning_rate": 2.8313316819132107e-06, "loss": 0.1911, "step": 8169 }, { "epoch": 0.65, "grad_norm": 1.3252796866560008, "learning_rate": 2.8301644540285137e-06, "loss": 0.1846, "step": 8170 }, { "epoch": 0.65, "grad_norm": 1.4107240030359327, "learning_rate": 2.8289973718114476e-06, "loss": 0.1698, "step": 8171 }, { "epoch": 0.65, "grad_norm": 1.471574696148342, "learning_rate": 2.8278304353403673e-06, "loss": 0.2125, "step": 8172 }, { "epoch": 0.65, "grad_norm": 1.3324876747937011, "learning_rate": 2.8266636446936126e-06, "loss": 0.2325, "step": 8173 }, { "epoch": 0.65, "grad_norm": 1.0812886995315816, "learning_rate": 2.825496999949511e-06, "loss": 0.1245, "step": 8174 }, { "epoch": 0.65, "grad_norm": 1.420777595369157, "learning_rate": 2.8243305011863843e-06, "loss": 0.1818, "step": 8175 }, { "epoch": 0.65, "grad_norm": 1.4177489202985598, "learning_rate": 2.8231641484825435e-06, "loss": 0.1661, "step": 8176 }, { "epoch": 0.65, "grad_norm": 1.5638281700479613, "learning_rate": 2.821997941916289e-06, "loss": 0.2321, "step": 8177 }, { "epoch": 0.65, "grad_norm": 1.3840143267847742, "learning_rate": 2.820831881565912e-06, "loss": 0.2133, "step": 8178 }, { "epoch": 0.65, "grad_norm": 1.255900802878477, "learning_rate": 2.8196659675096925e-06, "loss": 0.154, "step": 8179 }, { "epoch": 0.65, "grad_norm": 1.413599801881487, "learning_rate": 2.818500199825902e-06, "loss": 0.1688, "step": 8180 }, { "epoch": 0.65, "grad_norm": 1.2990797834020438, "learning_rate": 2.8173345785928057e-06, "loss": 0.1799, "step": 8181 }, { "epoch": 0.65, "grad_norm": 1.3613083596592144, "learning_rate": 2.8161691038886486e-06, "loss": 0.1862, "step": 8182 }, { "epoch": 0.65, "grad_norm": 6.307526131543236, "learning_rate": 2.815003775791679e-06, "loss": 0.567, "step": 8183 }, { "epoch": 0.65, "grad_norm": 6.836551021659636, "learning_rate": 2.8138385943801248e-06, "loss": 0.4816, "step": 8184 }, { "epoch": 0.65, "grad_norm": 1.2042362627583352, "learning_rate": 2.812673559732211e-06, "loss": 0.1902, "step": 8185 }, { "epoch": 0.65, "grad_norm": 1.4214243612726447, "learning_rate": 2.8115086719261474e-06, "loss": 0.183, "step": 8186 }, { "epoch": 0.65, "grad_norm": 1.2489295891226895, "learning_rate": 2.810343931040138e-06, "loss": 0.1757, "step": 8187 }, { "epoch": 0.66, "grad_norm": 1.219105045203035, "learning_rate": 2.8091793371523758e-06, "loss": 0.1803, "step": 8188 }, { "epoch": 0.66, "grad_norm": 8.90324681193005, "learning_rate": 2.8080148903410392e-06, "loss": 0.4731, "step": 8189 }, { "epoch": 0.66, "grad_norm": 1.3884067642615159, "learning_rate": 2.806850590684309e-06, "loss": 0.2018, "step": 8190 }, { "epoch": 0.66, "grad_norm": 5.08733715561001, "learning_rate": 2.80568643826034e-06, "loss": 0.6197, "step": 8191 }, { "epoch": 0.66, "grad_norm": 1.2197716814846384, "learning_rate": 2.8045224331472905e-06, "loss": 0.2167, "step": 8192 }, { "epoch": 0.66, "grad_norm": 1.3637476702772617, "learning_rate": 2.8033585754233016e-06, "loss": 0.1425, "step": 8193 }, { "epoch": 0.66, "grad_norm": 1.3124378630396867, "learning_rate": 2.8021948651665076e-06, "loss": 0.1562, "step": 8194 }, { "epoch": 0.66, "grad_norm": 1.4023071019221858, "learning_rate": 2.801031302455032e-06, "loss": 0.2013, "step": 8195 }, { "epoch": 0.66, "grad_norm": 1.4764818449105874, "learning_rate": 2.7998678873669865e-06, "loss": 0.2047, "step": 8196 }, { "epoch": 0.66, "grad_norm": 1.2809537032740461, "learning_rate": 2.798704619980477e-06, "loss": 0.1484, "step": 8197 }, { "epoch": 0.66, "grad_norm": 1.4262301916846456, "learning_rate": 2.797541500373595e-06, "loss": 0.1735, "step": 8198 }, { "epoch": 0.66, "grad_norm": 1.660828530205685, "learning_rate": 2.796378528624426e-06, "loss": 0.1954, "step": 8199 }, { "epoch": 0.66, "grad_norm": 1.5164652248087216, "learning_rate": 2.7952157048110406e-06, "loss": 0.1801, "step": 8200 }, { "epoch": 0.66, "grad_norm": 1.3383935882724372, "learning_rate": 2.7940530290115093e-06, "loss": 0.1811, "step": 8201 }, { "epoch": 0.66, "grad_norm": 1.4938821995861717, "learning_rate": 2.7928905013038774e-06, "loss": 0.2521, "step": 8202 }, { "epoch": 0.66, "grad_norm": 1.3692213093388257, "learning_rate": 2.791728121766196e-06, "loss": 0.1803, "step": 8203 }, { "epoch": 0.66, "grad_norm": 1.447852193673213, "learning_rate": 2.7905658904764994e-06, "loss": 0.1818, "step": 8204 }, { "epoch": 0.66, "grad_norm": 1.326692920546855, "learning_rate": 2.7894038075128038e-06, "loss": 0.1506, "step": 8205 }, { "epoch": 0.66, "grad_norm": 1.347939193197956, "learning_rate": 2.7882418729531307e-06, "loss": 0.1646, "step": 8206 }, { "epoch": 0.66, "grad_norm": 10.811364101410414, "learning_rate": 2.787080086875482e-06, "loss": 0.3927, "step": 8207 }, { "epoch": 0.66, "grad_norm": 1.3363433683173385, "learning_rate": 2.7859184493578517e-06, "loss": 0.1526, "step": 8208 }, { "epoch": 0.66, "grad_norm": 8.880797734402494, "learning_rate": 2.784756960478225e-06, "loss": 0.6027, "step": 8209 }, { "epoch": 0.66, "grad_norm": 1.4408869393439778, "learning_rate": 2.7835956203145754e-06, "loss": 0.2133, "step": 8210 }, { "epoch": 0.66, "grad_norm": 1.207624716735158, "learning_rate": 2.7824344289448646e-06, "loss": 0.1658, "step": 8211 }, { "epoch": 0.66, "grad_norm": 1.3730893811206233, "learning_rate": 2.7812733864470536e-06, "loss": 0.1733, "step": 8212 }, { "epoch": 0.66, "grad_norm": 5.743111497454227, "learning_rate": 2.780112492899081e-06, "loss": 0.4282, "step": 8213 }, { "epoch": 0.66, "grad_norm": 1.3203663741074512, "learning_rate": 2.7789517483788807e-06, "loss": 0.2028, "step": 8214 }, { "epoch": 0.66, "grad_norm": 1.2203683058740593, "learning_rate": 2.777791152964383e-06, "loss": 0.171, "step": 8215 }, { "epoch": 0.66, "grad_norm": 1.4042304145668758, "learning_rate": 2.776630706733494e-06, "loss": 0.1841, "step": 8216 }, { "epoch": 0.66, "grad_norm": 1.2677535200978691, "learning_rate": 2.7754704097641246e-06, "loss": 0.1756, "step": 8217 }, { "epoch": 0.66, "grad_norm": 1.4118838062054786, "learning_rate": 2.7743102621341666e-06, "loss": 0.2069, "step": 8218 }, { "epoch": 0.66, "grad_norm": 1.3425218361546978, "learning_rate": 2.7731502639215037e-06, "loss": 0.1897, "step": 8219 }, { "epoch": 0.66, "grad_norm": 1.272412169417104, "learning_rate": 2.7719904152040112e-06, "loss": 0.1637, "step": 8220 }, { "epoch": 0.66, "grad_norm": 1.491814432811376, "learning_rate": 2.7708307160595526e-06, "loss": 0.1903, "step": 8221 }, { "epoch": 0.66, "grad_norm": 1.3821696297160513, "learning_rate": 2.7696711665659825e-06, "loss": 0.1684, "step": 8222 }, { "epoch": 0.66, "grad_norm": 1.5353597017467464, "learning_rate": 2.768511766801144e-06, "loss": 0.1978, "step": 8223 }, { "epoch": 0.66, "grad_norm": 1.4580019435728244, "learning_rate": 2.7673525168428716e-06, "loss": 0.1835, "step": 8224 }, { "epoch": 0.66, "grad_norm": 1.2614078371469561, "learning_rate": 2.7661934167689887e-06, "loss": 0.1713, "step": 8225 }, { "epoch": 0.66, "grad_norm": 1.3411207250056796, "learning_rate": 2.7650344666573125e-06, "loss": 0.1797, "step": 8226 }, { "epoch": 0.66, "grad_norm": 1.2989201034624076, "learning_rate": 2.7638756665856402e-06, "loss": 0.1708, "step": 8227 }, { "epoch": 0.66, "grad_norm": 1.4212189933161887, "learning_rate": 2.762717016631773e-06, "loss": 0.1769, "step": 8228 }, { "epoch": 0.66, "grad_norm": 6.394863677016402, "learning_rate": 2.7615585168734915e-06, "loss": 0.5518, "step": 8229 }, { "epoch": 0.66, "grad_norm": 1.4444900450669798, "learning_rate": 2.760400167388566e-06, "loss": 0.2489, "step": 8230 }, { "epoch": 0.66, "grad_norm": 1.4244509237057872, "learning_rate": 2.759241968254765e-06, "loss": 0.1775, "step": 8231 }, { "epoch": 0.66, "grad_norm": 10.386791454415684, "learning_rate": 2.7580839195498397e-06, "loss": 0.6115, "step": 8232 }, { "epoch": 0.66, "grad_norm": 1.3149351229111497, "learning_rate": 2.7569260213515336e-06, "loss": 0.1761, "step": 8233 }, { "epoch": 0.66, "grad_norm": 1.442269624319686, "learning_rate": 2.755768273737578e-06, "loss": 0.2038, "step": 8234 }, { "epoch": 0.66, "grad_norm": 1.3286050596293064, "learning_rate": 2.754610676785702e-06, "loss": 0.1709, "step": 8235 }, { "epoch": 0.66, "grad_norm": 1.297555810338083, "learning_rate": 2.7534532305736094e-06, "loss": 0.1648, "step": 8236 }, { "epoch": 0.66, "grad_norm": 1.4306340058496405, "learning_rate": 2.752295935179011e-06, "loss": 0.2198, "step": 8237 }, { "epoch": 0.66, "grad_norm": 1.4975989650519839, "learning_rate": 2.7511387906795996e-06, "loss": 0.195, "step": 8238 }, { "epoch": 0.66, "grad_norm": 1.344218308312577, "learning_rate": 2.7499817971530502e-06, "loss": 0.1502, "step": 8239 }, { "epoch": 0.66, "grad_norm": 1.3996292929523575, "learning_rate": 2.748824954677042e-06, "loss": 0.2092, "step": 8240 }, { "epoch": 0.66, "grad_norm": 1.5792798855200623, "learning_rate": 2.747668263329235e-06, "loss": 0.2384, "step": 8241 }, { "epoch": 0.66, "grad_norm": 1.4674512480717126, "learning_rate": 2.746511723187283e-06, "loss": 0.1591, "step": 8242 }, { "epoch": 0.66, "grad_norm": 1.325400646463198, "learning_rate": 2.745355334328826e-06, "loss": 0.1643, "step": 8243 }, { "epoch": 0.66, "grad_norm": 1.2124832975218465, "learning_rate": 2.7441990968314967e-06, "loss": 0.1519, "step": 8244 }, { "epoch": 0.66, "grad_norm": 1.384503602650137, "learning_rate": 2.7430430107729144e-06, "loss": 0.1704, "step": 8245 }, { "epoch": 0.66, "grad_norm": 1.3893304988749504, "learning_rate": 2.741887076230698e-06, "loss": 0.2218, "step": 8246 }, { "epoch": 0.66, "grad_norm": 1.4415206907102687, "learning_rate": 2.7407312932824416e-06, "loss": 0.1896, "step": 8247 }, { "epoch": 0.66, "grad_norm": 1.4453486737555465, "learning_rate": 2.739575662005737e-06, "loss": 0.1939, "step": 8248 }, { "epoch": 0.66, "grad_norm": 1.5658119240728812, "learning_rate": 2.738420182478171e-06, "loss": 0.1918, "step": 8249 }, { "epoch": 0.66, "grad_norm": 1.3169004691334731, "learning_rate": 2.7372648547773063e-06, "loss": 0.1835, "step": 8250 }, { "epoch": 0.66, "grad_norm": 1.4347636850761758, "learning_rate": 2.7361096789807097e-06, "loss": 0.1774, "step": 8251 }, { "epoch": 0.66, "grad_norm": 7.453222150367343, "learning_rate": 2.734954655165929e-06, "loss": 0.6196, "step": 8252 }, { "epoch": 0.66, "grad_norm": 1.246336446218398, "learning_rate": 2.7337997834105058e-06, "loss": 0.1807, "step": 8253 }, { "epoch": 0.66, "grad_norm": 1.5502133061418724, "learning_rate": 2.7326450637919694e-06, "loss": 0.189, "step": 8254 }, { "epoch": 0.66, "grad_norm": 1.3116290106849469, "learning_rate": 2.7314904963878397e-06, "loss": 0.1722, "step": 8255 }, { "epoch": 0.66, "grad_norm": 1.3606258011633294, "learning_rate": 2.7303360812756265e-06, "loss": 0.1673, "step": 8256 }, { "epoch": 0.66, "grad_norm": 15.366059486873533, "learning_rate": 2.729181818532829e-06, "loss": 0.6163, "step": 8257 }, { "epoch": 0.66, "grad_norm": 1.2952209935395895, "learning_rate": 2.7280277082369356e-06, "loss": 0.1254, "step": 8258 }, { "epoch": 0.66, "grad_norm": 1.2057614639652108, "learning_rate": 2.7268737504654242e-06, "loss": 0.1557, "step": 8259 }, { "epoch": 0.66, "grad_norm": 1.3392405405592644, "learning_rate": 2.7257199452957693e-06, "loss": 0.1514, "step": 8260 }, { "epoch": 0.66, "grad_norm": 9.687759815561765, "learning_rate": 2.724566292805422e-06, "loss": 0.6903, "step": 8261 }, { "epoch": 0.66, "grad_norm": 8.295979724309028, "learning_rate": 2.7234127930718354e-06, "loss": 0.5088, "step": 8262 }, { "epoch": 0.66, "grad_norm": 1.4013456438350602, "learning_rate": 2.722259446172445e-06, "loss": 0.1751, "step": 8263 }, { "epoch": 0.66, "grad_norm": 10.662885107094121, "learning_rate": 2.7211062521846798e-06, "loss": 0.3866, "step": 8264 }, { "epoch": 0.66, "grad_norm": 1.4904595686156994, "learning_rate": 2.719953211185957e-06, "loss": 0.1731, "step": 8265 }, { "epoch": 0.66, "grad_norm": 1.5217263606407998, "learning_rate": 2.718800323253683e-06, "loss": 0.1701, "step": 8266 }, { "epoch": 0.66, "grad_norm": 1.3857532103780656, "learning_rate": 2.717647588465255e-06, "loss": 0.1918, "step": 8267 }, { "epoch": 0.66, "grad_norm": 1.4207325461457037, "learning_rate": 2.71649500689806e-06, "loss": 0.2283, "step": 8268 }, { "epoch": 0.66, "grad_norm": 1.4075779665781691, "learning_rate": 2.715342578629474e-06, "loss": 0.2127, "step": 8269 }, { "epoch": 0.66, "grad_norm": 1.337208250402908, "learning_rate": 2.71419030373686e-06, "loss": 0.165, "step": 8270 }, { "epoch": 0.66, "grad_norm": 1.4014631208363169, "learning_rate": 2.7130381822975816e-06, "loss": 0.182, "step": 8271 }, { "epoch": 0.66, "grad_norm": 1.3454458999667605, "learning_rate": 2.7118862143889745e-06, "loss": 0.1751, "step": 8272 }, { "epoch": 0.66, "grad_norm": 1.5390851863572763, "learning_rate": 2.7107344000883815e-06, "loss": 0.204, "step": 8273 }, { "epoch": 0.66, "grad_norm": 1.5298681987468916, "learning_rate": 2.7095827394731255e-06, "loss": 0.1912, "step": 8274 }, { "epoch": 0.66, "grad_norm": 1.2659084111637122, "learning_rate": 2.7084312326205164e-06, "loss": 0.1502, "step": 8275 }, { "epoch": 0.66, "grad_norm": 7.949398461314595, "learning_rate": 2.7072798796078637e-06, "loss": 0.5114, "step": 8276 }, { "epoch": 0.66, "grad_norm": 1.434917861837227, "learning_rate": 2.7061286805124597e-06, "loss": 0.1865, "step": 8277 }, { "epoch": 0.66, "grad_norm": 1.2556630682747942, "learning_rate": 2.704977635411587e-06, "loss": 0.1752, "step": 8278 }, { "epoch": 0.66, "grad_norm": 1.2628487014630823, "learning_rate": 2.7038267443825174e-06, "loss": 0.1558, "step": 8279 }, { "epoch": 0.66, "grad_norm": 1.388196570661171, "learning_rate": 2.7026760075025195e-06, "loss": 0.1524, "step": 8280 }, { "epoch": 0.66, "grad_norm": 1.3276699165992805, "learning_rate": 2.701525424848838e-06, "loss": 0.1766, "step": 8281 }, { "epoch": 0.66, "grad_norm": 1.2410157063968865, "learning_rate": 2.7003749964987215e-06, "loss": 0.161, "step": 8282 }, { "epoch": 0.66, "grad_norm": 1.2036379745242725, "learning_rate": 2.6992247225294e-06, "loss": 0.1738, "step": 8283 }, { "epoch": 0.66, "grad_norm": 9.663086822592764, "learning_rate": 2.6980746030180904e-06, "loss": 0.5636, "step": 8284 }, { "epoch": 0.66, "grad_norm": 0.9928667480651018, "learning_rate": 2.6969246380420088e-06, "loss": 0.1242, "step": 8285 }, { "epoch": 0.66, "grad_norm": 1.3942696023362178, "learning_rate": 2.6957748276783547e-06, "loss": 0.1748, "step": 8286 }, { "epoch": 0.66, "grad_norm": 1.534515750358792, "learning_rate": 2.694625172004318e-06, "loss": 0.1972, "step": 8287 }, { "epoch": 0.66, "grad_norm": 1.2741706467113365, "learning_rate": 2.693475671097079e-06, "loss": 0.1876, "step": 8288 }, { "epoch": 0.66, "grad_norm": 1.1634624021256645, "learning_rate": 2.6923263250338056e-06, "loss": 0.1495, "step": 8289 }, { "epoch": 0.66, "grad_norm": 6.739318096948371, "learning_rate": 2.691177133891658e-06, "loss": 0.5646, "step": 8290 }, { "epoch": 0.66, "grad_norm": 1.4530826292526324, "learning_rate": 2.6900280977477855e-06, "loss": 0.1933, "step": 8291 }, { "epoch": 0.66, "grad_norm": 1.2264394114662693, "learning_rate": 2.688879216679326e-06, "loss": 0.1251, "step": 8292 }, { "epoch": 0.66, "grad_norm": 1.4737147977735179, "learning_rate": 2.6877304907634045e-06, "loss": 0.2399, "step": 8293 }, { "epoch": 0.66, "grad_norm": 1.2986615672670123, "learning_rate": 2.6865819200771455e-06, "loss": 0.1724, "step": 8294 }, { "epoch": 0.66, "grad_norm": 1.2335084138572132, "learning_rate": 2.685433504697647e-06, "loss": 0.1578, "step": 8295 }, { "epoch": 0.66, "grad_norm": 1.3642247260447673, "learning_rate": 2.684285244702013e-06, "loss": 0.2141, "step": 8296 }, { "epoch": 0.66, "grad_norm": 1.561878618907496, "learning_rate": 2.6831371401673257e-06, "loss": 0.1974, "step": 8297 }, { "epoch": 0.66, "grad_norm": 1.6222764211564789, "learning_rate": 2.681989191170663e-06, "loss": 0.1931, "step": 8298 }, { "epoch": 0.66, "grad_norm": 1.1706686783812512, "learning_rate": 2.680841397789089e-06, "loss": 0.156, "step": 8299 }, { "epoch": 0.66, "grad_norm": 1.2836917250516335, "learning_rate": 2.6796937600996587e-06, "loss": 0.2042, "step": 8300 }, { "epoch": 0.66, "grad_norm": 1.2446628252941623, "learning_rate": 2.6785462781794158e-06, "loss": 0.1666, "step": 8301 }, { "epoch": 0.66, "grad_norm": 1.363875117129697, "learning_rate": 2.6773989521053955e-06, "loss": 0.1765, "step": 8302 }, { "epoch": 0.66, "grad_norm": 1.4197133687724275, "learning_rate": 2.676251781954621e-06, "loss": 0.2067, "step": 8303 }, { "epoch": 0.66, "grad_norm": 1.4393984741123935, "learning_rate": 2.6751047678041033e-06, "loss": 0.2207, "step": 8304 }, { "epoch": 0.66, "grad_norm": 1.2378351179754488, "learning_rate": 2.67395790973085e-06, "loss": 0.169, "step": 8305 }, { "epoch": 0.66, "grad_norm": 1.2334454725461579, "learning_rate": 2.672811207811846e-06, "loss": 0.1583, "step": 8306 }, { "epoch": 0.66, "grad_norm": 1.3576199813228453, "learning_rate": 2.6716646621240794e-06, "loss": 0.1632, "step": 8307 }, { "epoch": 0.66, "grad_norm": 1.344384291012976, "learning_rate": 2.6705182727445194e-06, "loss": 0.1721, "step": 8308 }, { "epoch": 0.66, "grad_norm": 1.4233836859669649, "learning_rate": 2.6693720397501228e-06, "loss": 0.1999, "step": 8309 }, { "epoch": 0.66, "grad_norm": 1.3743066738517795, "learning_rate": 2.668225963217844e-06, "loss": 0.1947, "step": 8310 }, { "epoch": 0.66, "grad_norm": 1.6570564427600556, "learning_rate": 2.6670800432246217e-06, "loss": 0.2118, "step": 8311 }, { "epoch": 0.66, "grad_norm": 1.3590532377170166, "learning_rate": 2.6659342798473846e-06, "loss": 0.1893, "step": 8312 }, { "epoch": 0.67, "grad_norm": 1.201603673134636, "learning_rate": 2.664788673163051e-06, "loss": 0.1878, "step": 8313 }, { "epoch": 0.67, "grad_norm": 1.6083432237818807, "learning_rate": 2.66364322324853e-06, "loss": 0.2138, "step": 8314 }, { "epoch": 0.67, "grad_norm": 1.5048847965041539, "learning_rate": 2.662497930180715e-06, "loss": 0.213, "step": 8315 }, { "epoch": 0.67, "grad_norm": 1.1058786738734272, "learning_rate": 2.661352794036501e-06, "loss": 0.1404, "step": 8316 }, { "epoch": 0.67, "grad_norm": 5.69329960554202, "learning_rate": 2.660207814892758e-06, "loss": 0.5211, "step": 8317 }, { "epoch": 0.67, "grad_norm": 1.4223825603456628, "learning_rate": 2.659062992826351e-06, "loss": 0.1947, "step": 8318 }, { "epoch": 0.67, "grad_norm": 1.7434518129438166, "learning_rate": 2.657918327914141e-06, "loss": 0.2038, "step": 8319 }, { "epoch": 0.67, "grad_norm": 1.3918002521049717, "learning_rate": 2.6567738202329684e-06, "loss": 0.1935, "step": 8320 }, { "epoch": 0.67, "grad_norm": 1.2687108962909985, "learning_rate": 2.6556294698596695e-06, "loss": 0.1636, "step": 8321 }, { "epoch": 0.67, "grad_norm": 1.2172700294627605, "learning_rate": 2.6544852768710685e-06, "loss": 0.1513, "step": 8322 }, { "epoch": 0.67, "grad_norm": 1.385810546657486, "learning_rate": 2.6533412413439763e-06, "loss": 0.1997, "step": 8323 }, { "epoch": 0.67, "grad_norm": 1.3369828981952543, "learning_rate": 2.6521973633551957e-06, "loss": 0.1925, "step": 8324 }, { "epoch": 0.67, "grad_norm": 1.3363980843922303, "learning_rate": 2.6510536429815224e-06, "loss": 0.1887, "step": 8325 }, { "epoch": 0.67, "grad_norm": 1.369907471551549, "learning_rate": 2.6499100802997336e-06, "loss": 0.1881, "step": 8326 }, { "epoch": 0.67, "grad_norm": 1.3650807653077166, "learning_rate": 2.6487666753866005e-06, "loss": 0.1633, "step": 8327 }, { "epoch": 0.67, "grad_norm": 1.507369354758025, "learning_rate": 2.6476234283188873e-06, "loss": 0.221, "step": 8328 }, { "epoch": 0.67, "grad_norm": 1.277527879111273, "learning_rate": 2.646480339173337e-06, "loss": 0.1689, "step": 8329 }, { "epoch": 0.67, "grad_norm": 1.3825803415055011, "learning_rate": 2.6453374080266947e-06, "loss": 0.2296, "step": 8330 }, { "epoch": 0.67, "grad_norm": 1.4198192025347276, "learning_rate": 2.6441946349556864e-06, "loss": 0.166, "step": 8331 }, { "epoch": 0.67, "grad_norm": 1.5289788123477608, "learning_rate": 2.6430520200370303e-06, "loss": 0.2076, "step": 8332 }, { "epoch": 0.67, "grad_norm": 1.358464394048185, "learning_rate": 2.6419095633474323e-06, "loss": 0.1848, "step": 8333 }, { "epoch": 0.67, "grad_norm": 1.437009421412895, "learning_rate": 2.640767264963591e-06, "loss": 0.1874, "step": 8334 }, { "epoch": 0.67, "grad_norm": 1.4057292957038556, "learning_rate": 2.639625124962192e-06, "loss": 0.1916, "step": 8335 }, { "epoch": 0.67, "grad_norm": 1.308352056010595, "learning_rate": 2.6384831434199087e-06, "loss": 0.1742, "step": 8336 }, { "epoch": 0.67, "grad_norm": 5.3940799343030434, "learning_rate": 2.637341320413408e-06, "loss": 0.5314, "step": 8337 }, { "epoch": 0.67, "grad_norm": 1.424853640928415, "learning_rate": 2.63619965601934e-06, "loss": 0.1846, "step": 8338 }, { "epoch": 0.67, "grad_norm": 11.397831585139151, "learning_rate": 2.6350581503143564e-06, "loss": 0.5994, "step": 8339 }, { "epoch": 0.67, "grad_norm": 1.7567645369929097, "learning_rate": 2.63391680337508e-06, "loss": 0.1903, "step": 8340 }, { "epoch": 0.67, "grad_norm": 1.3748505507214188, "learning_rate": 2.632775615278139e-06, "loss": 0.1772, "step": 8341 }, { "epoch": 0.67, "grad_norm": 1.4305998248238738, "learning_rate": 2.6316345861001445e-06, "loss": 0.226, "step": 8342 }, { "epoch": 0.67, "grad_norm": 6.862078336036875, "learning_rate": 2.6304937159176956e-06, "loss": 0.6725, "step": 8343 }, { "epoch": 0.67, "grad_norm": 1.224074036050416, "learning_rate": 2.6293530048073824e-06, "loss": 0.1759, "step": 8344 }, { "epoch": 0.67, "grad_norm": 1.2793527014775796, "learning_rate": 2.6282124528457852e-06, "loss": 0.1644, "step": 8345 }, { "epoch": 0.67, "grad_norm": 1.4620300834513775, "learning_rate": 2.6270720601094723e-06, "loss": 0.1828, "step": 8346 }, { "epoch": 0.67, "grad_norm": 1.3350169251144521, "learning_rate": 2.6259318266750024e-06, "loss": 0.1594, "step": 8347 }, { "epoch": 0.67, "grad_norm": 1.6145892461887903, "learning_rate": 2.624791752618921e-06, "loss": 0.1765, "step": 8348 }, { "epoch": 0.67, "grad_norm": 17.10406048172628, "learning_rate": 2.6236518380177644e-06, "loss": 0.6177, "step": 8349 }, { "epoch": 0.67, "grad_norm": 1.2703090655014542, "learning_rate": 2.622512082948063e-06, "loss": 0.1867, "step": 8350 }, { "epoch": 0.67, "grad_norm": 1.4865630127432794, "learning_rate": 2.6213724874863276e-06, "loss": 0.1572, "step": 8351 }, { "epoch": 0.67, "grad_norm": 1.5149217946291809, "learning_rate": 2.6202330517090624e-06, "loss": 0.2068, "step": 8352 }, { "epoch": 0.67, "grad_norm": 1.3783866237694922, "learning_rate": 2.619093775692767e-06, "loss": 0.1633, "step": 8353 }, { "epoch": 0.67, "grad_norm": 1.310155360866443, "learning_rate": 2.617954659513916e-06, "loss": 0.2063, "step": 8354 }, { "epoch": 0.67, "grad_norm": 1.3282874736984818, "learning_rate": 2.6168157032489883e-06, "loss": 0.2007, "step": 8355 }, { "epoch": 0.67, "grad_norm": 1.4036790059330233, "learning_rate": 2.6156769069744425e-06, "loss": 0.2154, "step": 8356 }, { "epoch": 0.67, "grad_norm": 1.3221212302242065, "learning_rate": 2.6145382707667307e-06, "loss": 0.1876, "step": 8357 }, { "epoch": 0.67, "grad_norm": 1.4435399543818757, "learning_rate": 2.6133997947022904e-06, "loss": 0.1774, "step": 8358 }, { "epoch": 0.67, "grad_norm": 1.635382291907966, "learning_rate": 2.6122614788575563e-06, "loss": 0.1842, "step": 8359 }, { "epoch": 0.67, "grad_norm": 1.2805976058806852, "learning_rate": 2.611123323308943e-06, "loss": 0.1928, "step": 8360 }, { "epoch": 0.67, "grad_norm": 1.3746477688795287, "learning_rate": 2.6099853281328556e-06, "loss": 0.1732, "step": 8361 }, { "epoch": 0.67, "grad_norm": 1.3703191102077759, "learning_rate": 2.6088474934056997e-06, "loss": 0.187, "step": 8362 }, { "epoch": 0.67, "grad_norm": 1.1730626745601922, "learning_rate": 2.607709819203852e-06, "loss": 0.153, "step": 8363 }, { "epoch": 0.67, "grad_norm": 6.012358728915169, "learning_rate": 2.606572305603694e-06, "loss": 0.5336, "step": 8364 }, { "epoch": 0.67, "grad_norm": 1.2827423625753318, "learning_rate": 2.605434952681589e-06, "loss": 0.1529, "step": 8365 }, { "epoch": 0.67, "grad_norm": 12.371205268757304, "learning_rate": 2.604297760513892e-06, "loss": 0.5584, "step": 8366 }, { "epoch": 0.67, "grad_norm": 1.483951994621688, "learning_rate": 2.603160729176944e-06, "loss": 0.1862, "step": 8367 }, { "epoch": 0.67, "grad_norm": 1.5264475948639284, "learning_rate": 2.6020238587470786e-06, "loss": 0.2238, "step": 8368 }, { "epoch": 0.67, "grad_norm": 1.4742128626067894, "learning_rate": 2.6008871493006173e-06, "loss": 0.2212, "step": 8369 }, { "epoch": 0.67, "grad_norm": 8.389661631413501, "learning_rate": 2.5997506009138707e-06, "loss": 0.6088, "step": 8370 }, { "epoch": 0.67, "grad_norm": 1.3923283647070297, "learning_rate": 2.598614213663139e-06, "loss": 0.2079, "step": 8371 }, { "epoch": 0.67, "grad_norm": 1.3840309042318228, "learning_rate": 2.597477987624708e-06, "loss": 0.1836, "step": 8372 }, { "epoch": 0.67, "grad_norm": 1.3775238701544763, "learning_rate": 2.596341922874864e-06, "loss": 0.1763, "step": 8373 }, { "epoch": 0.67, "grad_norm": 1.3548391936560993, "learning_rate": 2.5952060194898655e-06, "loss": 0.152, "step": 8374 }, { "epoch": 0.67, "grad_norm": 1.2664372152024646, "learning_rate": 2.594070277545975e-06, "loss": 0.1671, "step": 8375 }, { "epoch": 0.67, "grad_norm": 1.3257243520647308, "learning_rate": 2.5929346971194356e-06, "loss": 0.1401, "step": 8376 }, { "epoch": 0.67, "grad_norm": 6.643772094137073, "learning_rate": 2.5917992782864844e-06, "loss": 0.5311, "step": 8377 }, { "epoch": 0.67, "grad_norm": 1.537068809858651, "learning_rate": 2.5906640211233435e-06, "loss": 0.1946, "step": 8378 }, { "epoch": 0.67, "grad_norm": 1.2784505933322527, "learning_rate": 2.5895289257062274e-06, "loss": 0.1377, "step": 8379 }, { "epoch": 0.67, "grad_norm": 1.3036414254508844, "learning_rate": 2.5883939921113373e-06, "loss": 0.1609, "step": 8380 }, { "epoch": 0.67, "grad_norm": 5.983427887120907, "learning_rate": 2.5872592204148654e-06, "loss": 0.6134, "step": 8381 }, { "epoch": 0.67, "grad_norm": 1.300533596061534, "learning_rate": 2.5861246106929926e-06, "loss": 0.1336, "step": 8382 }, { "epoch": 0.67, "grad_norm": 1.4279479926840726, "learning_rate": 2.5849901630218867e-06, "loss": 0.1665, "step": 8383 }, { "epoch": 0.67, "grad_norm": 1.293795834205817, "learning_rate": 2.5838558774777125e-06, "loss": 0.1961, "step": 8384 }, { "epoch": 0.67, "grad_norm": 1.2929536969861828, "learning_rate": 2.582721754136609e-06, "loss": 0.1649, "step": 8385 }, { "epoch": 0.67, "grad_norm": 13.214554928506967, "learning_rate": 2.5815877930747207e-06, "loss": 0.6791, "step": 8386 }, { "epoch": 0.67, "grad_norm": 6.727042803026171, "learning_rate": 2.580453994368173e-06, "loss": 0.4667, "step": 8387 }, { "epoch": 0.67, "grad_norm": 1.270006033304784, "learning_rate": 2.5793203580930758e-06, "loss": 0.16, "step": 8388 }, { "epoch": 0.67, "grad_norm": 1.3575929154072648, "learning_rate": 2.5781868843255398e-06, "loss": 0.1784, "step": 8389 }, { "epoch": 0.67, "grad_norm": 1.3802315973198427, "learning_rate": 2.5770535731416556e-06, "loss": 0.1667, "step": 8390 }, { "epoch": 0.67, "grad_norm": 1.5928041467690033, "learning_rate": 2.5759204246175062e-06, "loss": 0.2109, "step": 8391 }, { "epoch": 0.67, "grad_norm": 1.2908215368502198, "learning_rate": 2.574787438829164e-06, "loss": 0.1889, "step": 8392 }, { "epoch": 0.67, "grad_norm": 1.3239839347610995, "learning_rate": 2.573654615852688e-06, "loss": 0.1905, "step": 8393 }, { "epoch": 0.67, "grad_norm": 7.2647769965038815, "learning_rate": 2.572521955764128e-06, "loss": 0.536, "step": 8394 }, { "epoch": 0.67, "grad_norm": 8.5582608544364, "learning_rate": 2.5713894586395282e-06, "loss": 0.5374, "step": 8395 }, { "epoch": 0.67, "grad_norm": 1.2040471192119289, "learning_rate": 2.57025712455491e-06, "loss": 0.1779, "step": 8396 }, { "epoch": 0.67, "grad_norm": 8.69235247895066, "learning_rate": 2.5691249535862907e-06, "loss": 0.4164, "step": 8397 }, { "epoch": 0.67, "grad_norm": 1.4703416391993587, "learning_rate": 2.567992945809683e-06, "loss": 0.1958, "step": 8398 }, { "epoch": 0.67, "grad_norm": 1.382781051457089, "learning_rate": 2.5668611013010734e-06, "loss": 0.1752, "step": 8399 }, { "epoch": 0.67, "grad_norm": 8.287516538348383, "learning_rate": 2.5657294201364526e-06, "loss": 0.4783, "step": 8400 }, { "epoch": 0.67, "grad_norm": 1.7374085538513262, "learning_rate": 2.5645979023917906e-06, "loss": 0.1619, "step": 8401 }, { "epoch": 0.67, "grad_norm": 1.2865812796166598, "learning_rate": 2.56346654814305e-06, "loss": 0.1843, "step": 8402 }, { "epoch": 0.67, "grad_norm": 1.3202424365720382, "learning_rate": 2.5623353574661814e-06, "loss": 0.1907, "step": 8403 }, { "epoch": 0.67, "grad_norm": 1.5169410652836222, "learning_rate": 2.5612043304371304e-06, "loss": 0.1658, "step": 8404 }, { "epoch": 0.67, "grad_norm": 1.3246159512880782, "learning_rate": 2.560073467131819e-06, "loss": 0.1641, "step": 8405 }, { "epoch": 0.67, "grad_norm": 1.450833532937457, "learning_rate": 2.558942767626167e-06, "loss": 0.1739, "step": 8406 }, { "epoch": 0.67, "grad_norm": 1.4894617836154485, "learning_rate": 2.5578122319960865e-06, "loss": 0.1637, "step": 8407 }, { "epoch": 0.67, "grad_norm": 1.3930469140727428, "learning_rate": 2.556681860317467e-06, "loss": 0.2119, "step": 8408 }, { "epoch": 0.67, "grad_norm": 1.2092638310034658, "learning_rate": 2.5555516526661996e-06, "loss": 0.1608, "step": 8409 }, { "epoch": 0.67, "grad_norm": 1.439167988330913, "learning_rate": 2.554421609118155e-06, "loss": 0.2016, "step": 8410 }, { "epoch": 0.67, "grad_norm": 1.3370190346669266, "learning_rate": 2.553291729749198e-06, "loss": 0.1882, "step": 8411 }, { "epoch": 0.67, "grad_norm": 1.3923730892049928, "learning_rate": 2.5521620146351806e-06, "loss": 0.1318, "step": 8412 }, { "epoch": 0.67, "grad_norm": 1.5155836519904236, "learning_rate": 2.5510324638519435e-06, "loss": 0.1867, "step": 8413 }, { "epoch": 0.67, "grad_norm": 1.4016559992356261, "learning_rate": 2.5499030774753176e-06, "loss": 0.1916, "step": 8414 }, { "epoch": 0.67, "grad_norm": 1.261624161534118, "learning_rate": 2.5487738555811215e-06, "loss": 0.1456, "step": 8415 }, { "epoch": 0.67, "grad_norm": 1.088674480367247, "learning_rate": 2.5476447982451636e-06, "loss": 0.1399, "step": 8416 }, { "epoch": 0.67, "grad_norm": 6.918340357854911, "learning_rate": 2.5465159055432383e-06, "loss": 0.5938, "step": 8417 }, { "epoch": 0.67, "grad_norm": 1.3824811142382467, "learning_rate": 2.545387177551138e-06, "loss": 0.222, "step": 8418 }, { "epoch": 0.67, "grad_norm": 1.3849157281916746, "learning_rate": 2.54425861434463e-06, "loss": 0.1821, "step": 8419 }, { "epoch": 0.67, "grad_norm": 1.3411100337093516, "learning_rate": 2.5431302159994835e-06, "loss": 0.2, "step": 8420 }, { "epoch": 0.67, "grad_norm": 6.7547661560311525, "learning_rate": 2.542001982591452e-06, "loss": 0.5695, "step": 8421 }, { "epoch": 0.67, "grad_norm": 1.2408585504338, "learning_rate": 2.5408739141962704e-06, "loss": 0.1459, "step": 8422 }, { "epoch": 0.67, "grad_norm": 1.338497924203863, "learning_rate": 2.539746010889676e-06, "loss": 0.1728, "step": 8423 }, { "epoch": 0.67, "grad_norm": 1.3126759660134741, "learning_rate": 2.5386182727473862e-06, "loss": 0.1942, "step": 8424 }, { "epoch": 0.67, "grad_norm": 1.2820197881024753, "learning_rate": 2.5374906998451094e-06, "loss": 0.1822, "step": 8425 }, { "epoch": 0.67, "grad_norm": 1.4291055005477529, "learning_rate": 2.536363292258543e-06, "loss": 0.1978, "step": 8426 }, { "epoch": 0.67, "grad_norm": 1.4594811843319682, "learning_rate": 2.5352360500633733e-06, "loss": 0.1982, "step": 8427 }, { "epoch": 0.67, "grad_norm": 1.4581060922940934, "learning_rate": 2.5341089733352737e-06, "loss": 0.1899, "step": 8428 }, { "epoch": 0.67, "grad_norm": 1.340334290982926, "learning_rate": 2.5329820621499135e-06, "loss": 0.1769, "step": 8429 }, { "epoch": 0.67, "grad_norm": 1.351724274707295, "learning_rate": 2.5318553165829407e-06, "loss": 0.2084, "step": 8430 }, { "epoch": 0.67, "grad_norm": 4.431960827401473, "learning_rate": 2.530728736709997e-06, "loss": 0.6228, "step": 8431 }, { "epoch": 0.67, "grad_norm": 1.3914139408565374, "learning_rate": 2.5296023226067183e-06, "loss": 0.196, "step": 8432 }, { "epoch": 0.67, "grad_norm": 1.389280058175246, "learning_rate": 2.528476074348717e-06, "loss": 0.2093, "step": 8433 }, { "epoch": 0.67, "grad_norm": 1.4076540112760858, "learning_rate": 2.5273499920116073e-06, "loss": 0.1778, "step": 8434 }, { "epoch": 0.67, "grad_norm": 1.3230740434138704, "learning_rate": 2.5262240756709838e-06, "loss": 0.1811, "step": 8435 }, { "epoch": 0.67, "grad_norm": 1.2443327889627611, "learning_rate": 2.525098325402434e-06, "loss": 0.1666, "step": 8436 }, { "epoch": 0.67, "grad_norm": 1.2163024197581889, "learning_rate": 2.5239727412815317e-06, "loss": 0.1502, "step": 8437 }, { "epoch": 0.68, "grad_norm": 9.572666608384148, "learning_rate": 2.522847323383842e-06, "loss": 0.3843, "step": 8438 }, { "epoch": 0.68, "grad_norm": 1.295502398616411, "learning_rate": 2.5217220717849167e-06, "loss": 0.1554, "step": 8439 }, { "epoch": 0.68, "grad_norm": 6.979633956432414, "learning_rate": 2.5205969865602974e-06, "loss": 0.3847, "step": 8440 }, { "epoch": 0.68, "grad_norm": 1.3255602677274645, "learning_rate": 2.519472067785515e-06, "loss": 0.1904, "step": 8441 }, { "epoch": 0.68, "grad_norm": 1.329054071426642, "learning_rate": 2.518347315536086e-06, "loss": 0.1864, "step": 8442 }, { "epoch": 0.68, "grad_norm": 12.466200607498436, "learning_rate": 2.5172227298875253e-06, "loss": 0.6387, "step": 8443 }, { "epoch": 0.68, "grad_norm": 1.3573221609395365, "learning_rate": 2.5160983109153207e-06, "loss": 0.1757, "step": 8444 }, { "epoch": 0.68, "grad_norm": 1.3763026235568596, "learning_rate": 2.514974058694965e-06, "loss": 0.1839, "step": 8445 }, { "epoch": 0.68, "grad_norm": 1.3155024991664774, "learning_rate": 2.5138499733019294e-06, "loss": 0.1522, "step": 8446 }, { "epoch": 0.68, "grad_norm": 1.2972188218378276, "learning_rate": 2.512726054811678e-06, "loss": 0.152, "step": 8447 }, { "epoch": 0.68, "grad_norm": 1.375938653985904, "learning_rate": 2.5116023032996635e-06, "loss": 0.1799, "step": 8448 }, { "epoch": 0.68, "grad_norm": 1.365649460624493, "learning_rate": 2.510478718841325e-06, "loss": 0.179, "step": 8449 }, { "epoch": 0.68, "grad_norm": 1.5045482140220925, "learning_rate": 2.5093553015120937e-06, "loss": 0.2217, "step": 8450 }, { "epoch": 0.68, "grad_norm": 1.468083324936883, "learning_rate": 2.508232051387385e-06, "loss": 0.2069, "step": 8451 }, { "epoch": 0.68, "grad_norm": 1.3331638487191784, "learning_rate": 2.5071089685426127e-06, "loss": 0.1576, "step": 8452 }, { "epoch": 0.68, "grad_norm": 1.2545180613070355, "learning_rate": 2.5059860530531644e-06, "loss": 0.1818, "step": 8453 }, { "epoch": 0.68, "grad_norm": 1.336553781113412, "learning_rate": 2.5048633049944305e-06, "loss": 0.1693, "step": 8454 }, { "epoch": 0.68, "grad_norm": 1.4923150824370275, "learning_rate": 2.5037407244417834e-06, "loss": 0.1943, "step": 8455 }, { "epoch": 0.68, "grad_norm": 1.4859046782697194, "learning_rate": 2.5026183114705844e-06, "loss": 0.1779, "step": 8456 }, { "epoch": 0.68, "grad_norm": 1.4170496787062508, "learning_rate": 2.501496066156185e-06, "loss": 0.1691, "step": 8457 }, { "epoch": 0.68, "grad_norm": 8.696476482692896, "learning_rate": 2.5003739885739253e-06, "loss": 0.587, "step": 8458 }, { "epoch": 0.68, "grad_norm": 1.5324499197982238, "learning_rate": 2.4992520787991328e-06, "loss": 0.2156, "step": 8459 }, { "epoch": 0.68, "grad_norm": 1.3942672615624903, "learning_rate": 2.498130336907125e-06, "loss": 0.1787, "step": 8460 }, { "epoch": 0.68, "grad_norm": 1.3526214712013132, "learning_rate": 2.497008762973209e-06, "loss": 0.1915, "step": 8461 }, { "epoch": 0.68, "grad_norm": 1.4260154575269086, "learning_rate": 2.495887357072676e-06, "loss": 0.1763, "step": 8462 }, { "epoch": 0.68, "grad_norm": 1.2826193221392896, "learning_rate": 2.494766119280816e-06, "loss": 0.1677, "step": 8463 }, { "epoch": 0.68, "grad_norm": 1.5360239255235244, "learning_rate": 2.4936450496728927e-06, "loss": 0.1727, "step": 8464 }, { "epoch": 0.68, "grad_norm": 1.1597837442557963, "learning_rate": 2.492524148324173e-06, "loss": 0.1325, "step": 8465 }, { "epoch": 0.68, "grad_norm": 1.293857847534176, "learning_rate": 2.491403415309907e-06, "loss": 0.1734, "step": 8466 }, { "epoch": 0.68, "grad_norm": 1.3485591114822921, "learning_rate": 2.490282850705326e-06, "loss": 0.1697, "step": 8467 }, { "epoch": 0.68, "grad_norm": 1.2351140589428389, "learning_rate": 2.489162454585664e-06, "loss": 0.1731, "step": 8468 }, { "epoch": 0.68, "grad_norm": 1.3010663457449723, "learning_rate": 2.488042227026134e-06, "loss": 0.2016, "step": 8469 }, { "epoch": 0.68, "grad_norm": 1.4550253190322415, "learning_rate": 2.4869221681019394e-06, "loss": 0.1684, "step": 8470 }, { "epoch": 0.68, "grad_norm": 7.412379849386844, "learning_rate": 2.4858022778882746e-06, "loss": 0.5663, "step": 8471 }, { "epoch": 0.68, "grad_norm": 1.4494060978272503, "learning_rate": 2.4846825564603204e-06, "loss": 0.2182, "step": 8472 }, { "epoch": 0.68, "grad_norm": 1.4083347938367576, "learning_rate": 2.4835630038932475e-06, "loss": 0.1949, "step": 8473 }, { "epoch": 0.68, "grad_norm": 1.9480023195484835, "learning_rate": 2.4824436202622147e-06, "loss": 0.1383, "step": 8474 }, { "epoch": 0.68, "grad_norm": 1.3617399392324856, "learning_rate": 2.4813244056423692e-06, "loss": 0.1653, "step": 8475 }, { "epoch": 0.68, "grad_norm": 1.8211578636303276, "learning_rate": 2.4802053601088462e-06, "loss": 0.248, "step": 8476 }, { "epoch": 0.68, "grad_norm": 5.26647954245297, "learning_rate": 2.479086483736775e-06, "loss": 0.4828, "step": 8477 }, { "epoch": 0.68, "grad_norm": 1.3162224897957153, "learning_rate": 2.477967776601263e-06, "loss": 0.1739, "step": 8478 }, { "epoch": 0.68, "grad_norm": 1.0629831172172495, "learning_rate": 2.476849238777418e-06, "loss": 0.1252, "step": 8479 }, { "epoch": 0.68, "grad_norm": 1.5453624777820747, "learning_rate": 2.4757308703403275e-06, "loss": 0.1896, "step": 8480 }, { "epoch": 0.68, "grad_norm": 1.4116068290697115, "learning_rate": 2.4746126713650724e-06, "loss": 0.1993, "step": 8481 }, { "epoch": 0.68, "grad_norm": 1.3818204282674493, "learning_rate": 2.47349464192672e-06, "loss": 0.1652, "step": 8482 }, { "epoch": 0.68, "grad_norm": 1.2422014901595066, "learning_rate": 2.472376782100328e-06, "loss": 0.1694, "step": 8483 }, { "epoch": 0.68, "grad_norm": 1.3860698153181032, "learning_rate": 2.47125909196094e-06, "loss": 0.1825, "step": 8484 }, { "epoch": 0.68, "grad_norm": 1.2190229467045905, "learning_rate": 2.4701415715835917e-06, "loss": 0.1966, "step": 8485 }, { "epoch": 0.68, "grad_norm": 1.556590601919081, "learning_rate": 2.4690242210433046e-06, "loss": 0.2251, "step": 8486 }, { "epoch": 0.68, "grad_norm": 1.2690971971154463, "learning_rate": 2.4679070404150883e-06, "loss": 0.1828, "step": 8487 }, { "epoch": 0.68, "grad_norm": 1.4563588891784556, "learning_rate": 2.4667900297739463e-06, "loss": 0.1862, "step": 8488 }, { "epoch": 0.68, "grad_norm": 1.261680432088371, "learning_rate": 2.465673189194865e-06, "loss": 0.1732, "step": 8489 }, { "epoch": 0.68, "grad_norm": 1.4187220805158927, "learning_rate": 2.464556518752821e-06, "loss": 0.155, "step": 8490 }, { "epoch": 0.68, "grad_norm": 1.4621249492381339, "learning_rate": 2.4634400185227795e-06, "loss": 0.2164, "step": 8491 }, { "epoch": 0.68, "grad_norm": 8.713201434067235, "learning_rate": 2.462323688579696e-06, "loss": 0.6061, "step": 8492 }, { "epoch": 0.68, "grad_norm": 1.3308084600874768, "learning_rate": 2.461207528998511e-06, "loss": 0.1909, "step": 8493 }, { "epoch": 0.68, "grad_norm": 1.2456786050921536, "learning_rate": 2.4600915398541576e-06, "loss": 0.1283, "step": 8494 }, { "epoch": 0.68, "grad_norm": 7.378428536750042, "learning_rate": 2.458975721221555e-06, "loss": 0.6686, "step": 8495 }, { "epoch": 0.68, "grad_norm": 5.870328674334021, "learning_rate": 2.4578600731756087e-06, "loss": 0.6198, "step": 8496 }, { "epoch": 0.68, "grad_norm": 1.4557764756101716, "learning_rate": 2.4567445957912217e-06, "loss": 0.1839, "step": 8497 }, { "epoch": 0.68, "grad_norm": 11.486948935891597, "learning_rate": 2.4556292891432728e-06, "loss": 0.5416, "step": 8498 }, { "epoch": 0.68, "grad_norm": 10.372545367371641, "learning_rate": 2.4545141533066403e-06, "loss": 0.6967, "step": 8499 }, { "epoch": 0.68, "grad_norm": 1.284953442519931, "learning_rate": 2.4533991883561868e-06, "loss": 0.1499, "step": 8500 }, { "epoch": 0.68, "grad_norm": 1.3539728771278943, "learning_rate": 2.4522843943667586e-06, "loss": 0.1767, "step": 8501 }, { "epoch": 0.68, "grad_norm": 12.150123276735659, "learning_rate": 2.4511697714131993e-06, "loss": 0.59, "step": 8502 }, { "epoch": 0.68, "grad_norm": 5.479145826437882, "learning_rate": 2.4500553195703364e-06, "loss": 0.5093, "step": 8503 }, { "epoch": 0.68, "grad_norm": 1.3562506376801557, "learning_rate": 2.448941038912986e-06, "loss": 0.1855, "step": 8504 }, { "epoch": 0.68, "grad_norm": 9.858679188655305, "learning_rate": 2.447826929515953e-06, "loss": 0.5704, "step": 8505 }, { "epoch": 0.68, "grad_norm": 1.3647684528844777, "learning_rate": 2.4467129914540304e-06, "loss": 0.2007, "step": 8506 }, { "epoch": 0.68, "grad_norm": 1.312033575876438, "learning_rate": 2.445599224802e-06, "loss": 0.1598, "step": 8507 }, { "epoch": 0.68, "grad_norm": 1.3416506917789155, "learning_rate": 2.444485629634636e-06, "loss": 0.1505, "step": 8508 }, { "epoch": 0.68, "grad_norm": 1.2490840818307274, "learning_rate": 2.4433722060266933e-06, "loss": 0.1703, "step": 8509 }, { "epoch": 0.68, "grad_norm": 1.2175259768823494, "learning_rate": 2.4422589540529187e-06, "loss": 0.1585, "step": 8510 }, { "epoch": 0.68, "grad_norm": 1.208344891110285, "learning_rate": 2.4411458737880537e-06, "loss": 0.1699, "step": 8511 }, { "epoch": 0.68, "grad_norm": 1.362061170228578, "learning_rate": 2.4400329653068156e-06, "loss": 0.1587, "step": 8512 }, { "epoch": 0.68, "grad_norm": 1.658947845851851, "learning_rate": 2.4389202286839225e-06, "loss": 0.2246, "step": 8513 }, { "epoch": 0.68, "grad_norm": 1.441581689359193, "learning_rate": 2.4378076639940746e-06, "loss": 0.1782, "step": 8514 }, { "epoch": 0.68, "grad_norm": 1.2868330244084218, "learning_rate": 2.436695271311961e-06, "loss": 0.1636, "step": 8515 }, { "epoch": 0.68, "grad_norm": 1.4014748678064373, "learning_rate": 2.435583050712261e-06, "loss": 0.1958, "step": 8516 }, { "epoch": 0.68, "grad_norm": 1.441058278933007, "learning_rate": 2.43447100226964e-06, "loss": 0.1861, "step": 8517 }, { "epoch": 0.68, "grad_norm": 1.4708312812227287, "learning_rate": 2.433359126058754e-06, "loss": 0.1832, "step": 8518 }, { "epoch": 0.68, "grad_norm": 1.3846367761401979, "learning_rate": 2.432247422154247e-06, "loss": 0.189, "step": 8519 }, { "epoch": 0.68, "grad_norm": 1.4974904045135156, "learning_rate": 2.43113589063075e-06, "loss": 0.1851, "step": 8520 }, { "epoch": 0.68, "grad_norm": 1.423384314606963, "learning_rate": 2.4300245315628824e-06, "loss": 0.1887, "step": 8521 }, { "epoch": 0.68, "grad_norm": 1.3007616293554927, "learning_rate": 2.4289133450252583e-06, "loss": 0.1507, "step": 8522 }, { "epoch": 0.68, "grad_norm": 6.459632629691166, "learning_rate": 2.4278023310924676e-06, "loss": 0.6831, "step": 8523 }, { "epoch": 0.68, "grad_norm": 1.391848402516739, "learning_rate": 2.426691489839102e-06, "loss": 0.1795, "step": 8524 }, { "epoch": 0.68, "grad_norm": 1.2901079400731768, "learning_rate": 2.425580821339733e-06, "loss": 0.1483, "step": 8525 }, { "epoch": 0.68, "grad_norm": 1.1845993696400745, "learning_rate": 2.4244703256689244e-06, "loss": 0.1346, "step": 8526 }, { "epoch": 0.68, "grad_norm": 5.908571437799625, "learning_rate": 2.423360002901226e-06, "loss": 0.5316, "step": 8527 }, { "epoch": 0.68, "grad_norm": 1.4592188552388006, "learning_rate": 2.422249853111177e-06, "loss": 0.1785, "step": 8528 }, { "epoch": 0.68, "grad_norm": 6.856562566303992, "learning_rate": 2.4211398763733063e-06, "loss": 0.5992, "step": 8529 }, { "epoch": 0.68, "grad_norm": 1.2643571241493792, "learning_rate": 2.420030072762127e-06, "loss": 0.1405, "step": 8530 }, { "epoch": 0.68, "grad_norm": 1.217700546821981, "learning_rate": 2.4189204423521506e-06, "loss": 0.1378, "step": 8531 }, { "epoch": 0.68, "grad_norm": 1.3077190110334977, "learning_rate": 2.4178109852178606e-06, "loss": 0.1731, "step": 8532 }, { "epoch": 0.68, "grad_norm": 1.3426155906656483, "learning_rate": 2.416701701433745e-06, "loss": 0.1646, "step": 8533 }, { "epoch": 0.68, "grad_norm": 1.3765645013620476, "learning_rate": 2.4155925910742735e-06, "loss": 0.2029, "step": 8534 }, { "epoch": 0.68, "grad_norm": 1.4235353698933184, "learning_rate": 2.4144836542138975e-06, "loss": 0.1924, "step": 8535 }, { "epoch": 0.68, "grad_norm": 1.3779149974869869, "learning_rate": 2.4133748909270697e-06, "loss": 0.1615, "step": 8536 }, { "epoch": 0.68, "grad_norm": 1.3497584861664549, "learning_rate": 2.4122663012882227e-06, "loss": 0.1727, "step": 8537 }, { "epoch": 0.68, "grad_norm": 1.3858682469275923, "learning_rate": 2.411157885371779e-06, "loss": 0.197, "step": 8538 }, { "epoch": 0.68, "grad_norm": 1.243929244430079, "learning_rate": 2.4100496432521507e-06, "loss": 0.1492, "step": 8539 }, { "epoch": 0.68, "grad_norm": 1.3831255144952614, "learning_rate": 2.408941575003737e-06, "loss": 0.1866, "step": 8540 }, { "epoch": 0.68, "grad_norm": 1.3449284565220094, "learning_rate": 2.4078336807009247e-06, "loss": 0.1917, "step": 8541 }, { "epoch": 0.68, "grad_norm": 1.3677498202982803, "learning_rate": 2.406725960418094e-06, "loss": 0.1763, "step": 8542 }, { "epoch": 0.68, "grad_norm": 1.3847629074884495, "learning_rate": 2.4056184142296056e-06, "loss": 0.2275, "step": 8543 }, { "epoch": 0.68, "grad_norm": 1.3418862308949862, "learning_rate": 2.4045110422098122e-06, "loss": 0.1677, "step": 8544 }, { "epoch": 0.68, "grad_norm": 1.304021088813838, "learning_rate": 2.4034038444330597e-06, "loss": 0.17, "step": 8545 }, { "epoch": 0.68, "grad_norm": 1.2660214074555907, "learning_rate": 2.4022968209736713e-06, "loss": 0.1713, "step": 8546 }, { "epoch": 0.68, "grad_norm": 1.3000719692074154, "learning_rate": 2.4011899719059696e-06, "loss": 0.1437, "step": 8547 }, { "epoch": 0.68, "grad_norm": 1.5150766519191672, "learning_rate": 2.400083297304259e-06, "loss": 0.1792, "step": 8548 }, { "epoch": 0.68, "grad_norm": 1.3977149142442993, "learning_rate": 2.398976797242834e-06, "loss": 0.2329, "step": 8549 }, { "epoch": 0.68, "grad_norm": 1.4984021145073811, "learning_rate": 2.3978704717959777e-06, "loss": 0.1754, "step": 8550 }, { "epoch": 0.68, "grad_norm": 1.1591378510486372, "learning_rate": 2.3967643210379608e-06, "loss": 0.1536, "step": 8551 }, { "epoch": 0.68, "grad_norm": 1.360829072861916, "learning_rate": 2.3956583450430426e-06, "loss": 0.1603, "step": 8552 }, { "epoch": 0.68, "grad_norm": 1.5410472497864467, "learning_rate": 2.39455254388547e-06, "loss": 0.2128, "step": 8553 }, { "epoch": 0.68, "grad_norm": 1.4776471399141238, "learning_rate": 2.39344691763948e-06, "loss": 0.1702, "step": 8554 }, { "epoch": 0.68, "grad_norm": 1.3010736893991974, "learning_rate": 2.392341466379294e-06, "loss": 0.178, "step": 8555 }, { "epoch": 0.68, "grad_norm": 1.2545142007384433, "learning_rate": 2.391236190179129e-06, "loss": 0.1782, "step": 8556 }, { "epoch": 0.68, "grad_norm": 1.3078817829096632, "learning_rate": 2.3901310891131803e-06, "loss": 0.1413, "step": 8557 }, { "epoch": 0.68, "grad_norm": 1.2566846988619564, "learning_rate": 2.3890261632556405e-06, "loss": 0.1677, "step": 8558 }, { "epoch": 0.68, "grad_norm": 1.4187241728697129, "learning_rate": 2.3879214126806856e-06, "loss": 0.1842, "step": 8559 }, { "epoch": 0.68, "grad_norm": 1.2248998847088113, "learning_rate": 2.38681683746248e-06, "loss": 0.1707, "step": 8560 }, { "epoch": 0.68, "grad_norm": 1.3837954012065063, "learning_rate": 2.385712437675178e-06, "loss": 0.1679, "step": 8561 }, { "epoch": 0.68, "grad_norm": 1.326986572992212, "learning_rate": 2.384608213392921e-06, "loss": 0.21, "step": 8562 }, { "epoch": 0.69, "grad_norm": 1.1122187033578725, "learning_rate": 2.383504164689839e-06, "loss": 0.1539, "step": 8563 }, { "epoch": 0.69, "grad_norm": 1.2486025623598727, "learning_rate": 2.38240029164005e-06, "loss": 0.1451, "step": 8564 }, { "epoch": 0.69, "grad_norm": 1.2364221172478997, "learning_rate": 2.3812965943176608e-06, "loss": 0.1398, "step": 8565 }, { "epoch": 0.69, "grad_norm": 25.063600812477958, "learning_rate": 2.380193072796763e-06, "loss": 0.5526, "step": 8566 }, { "epoch": 0.69, "grad_norm": 1.4038204898179123, "learning_rate": 2.3790897271514464e-06, "loss": 0.1913, "step": 8567 }, { "epoch": 0.69, "grad_norm": 8.996757008323058, "learning_rate": 2.377986557455774e-06, "loss": 0.4078, "step": 8568 }, { "epoch": 0.69, "grad_norm": 6.073085636818596, "learning_rate": 2.376883563783809e-06, "loss": 0.544, "step": 8569 }, { "epoch": 0.69, "grad_norm": 1.226665789625632, "learning_rate": 2.3757807462096013e-06, "loss": 0.1226, "step": 8570 }, { "epoch": 0.69, "grad_norm": 1.3547237112169002, "learning_rate": 2.374678104807178e-06, "loss": 0.15, "step": 8571 }, { "epoch": 0.69, "grad_norm": 1.5254561677903267, "learning_rate": 2.373575639650571e-06, "loss": 0.2089, "step": 8572 }, { "epoch": 0.69, "grad_norm": 1.4258364748371088, "learning_rate": 2.372473350813789e-06, "loss": 0.1585, "step": 8573 }, { "epoch": 0.69, "grad_norm": 1.4384587384880048, "learning_rate": 2.371371238370831e-06, "loss": 0.2186, "step": 8574 }, { "epoch": 0.69, "grad_norm": 1.453410433920425, "learning_rate": 2.3702693023956853e-06, "loss": 0.1898, "step": 8575 }, { "epoch": 0.69, "grad_norm": 1.401553812305528, "learning_rate": 2.3691675429623322e-06, "loss": 0.1819, "step": 8576 }, { "epoch": 0.69, "grad_norm": 9.664635466718382, "learning_rate": 2.36806596014473e-06, "loss": 0.5996, "step": 8577 }, { "epoch": 0.69, "grad_norm": 1.2046505080563843, "learning_rate": 2.3669645540168355e-06, "loss": 0.1729, "step": 8578 }, { "epoch": 0.69, "grad_norm": 1.678740454617855, "learning_rate": 2.3658633246525906e-06, "loss": 0.2152, "step": 8579 }, { "epoch": 0.69, "grad_norm": 1.2753931285971467, "learning_rate": 2.3647622721259184e-06, "loss": 0.1483, "step": 8580 }, { "epoch": 0.69, "grad_norm": 1.3614560587276388, "learning_rate": 2.3636613965107406e-06, "loss": 0.1627, "step": 8581 }, { "epoch": 0.69, "grad_norm": 1.3647480392753688, "learning_rate": 2.3625606978809614e-06, "loss": 0.1706, "step": 8582 }, { "epoch": 0.69, "grad_norm": 1.30355084569227, "learning_rate": 2.3614601763104733e-06, "loss": 0.1725, "step": 8583 }, { "epoch": 0.69, "grad_norm": 1.5795291484128728, "learning_rate": 2.3603598318731584e-06, "loss": 0.2356, "step": 8584 }, { "epoch": 0.69, "grad_norm": 1.3890008674163354, "learning_rate": 2.3592596646428855e-06, "loss": 0.1623, "step": 8585 }, { "epoch": 0.69, "grad_norm": 1.6400365006859408, "learning_rate": 2.358159674693511e-06, "loss": 0.1672, "step": 8586 }, { "epoch": 0.69, "grad_norm": 1.353642454611162, "learning_rate": 2.357059862098885e-06, "loss": 0.1956, "step": 8587 }, { "epoch": 0.69, "grad_norm": 1.4017000427419182, "learning_rate": 2.355960226932837e-06, "loss": 0.1924, "step": 8588 }, { "epoch": 0.69, "grad_norm": 1.2809217862880078, "learning_rate": 2.354860769269188e-06, "loss": 0.1455, "step": 8589 }, { "epoch": 0.69, "grad_norm": 1.2906602762927917, "learning_rate": 2.353761489181754e-06, "loss": 0.1714, "step": 8590 }, { "epoch": 0.69, "grad_norm": 1.282015781676501, "learning_rate": 2.3526623867443248e-06, "loss": 0.1726, "step": 8591 }, { "epoch": 0.69, "grad_norm": 1.3792755320530645, "learning_rate": 2.3515634620306925e-06, "loss": 0.1928, "step": 8592 }, { "epoch": 0.69, "grad_norm": 1.3595467302432158, "learning_rate": 2.35046471511463e-06, "loss": 0.1711, "step": 8593 }, { "epoch": 0.69, "grad_norm": 1.6470457636587517, "learning_rate": 2.3493661460698986e-06, "loss": 0.2254, "step": 8594 }, { "epoch": 0.69, "grad_norm": 1.3172558940297976, "learning_rate": 2.3482677549702493e-06, "loss": 0.1705, "step": 8595 }, { "epoch": 0.69, "grad_norm": 1.4354833300218077, "learning_rate": 2.34716954188942e-06, "loss": 0.1668, "step": 8596 }, { "epoch": 0.69, "grad_norm": 1.1950440706486667, "learning_rate": 2.3460715069011364e-06, "loss": 0.1774, "step": 8597 }, { "epoch": 0.69, "grad_norm": 1.3152357756919197, "learning_rate": 2.344973650079114e-06, "loss": 0.2087, "step": 8598 }, { "epoch": 0.69, "grad_norm": 1.2153941066514582, "learning_rate": 2.343875971497055e-06, "loss": 0.1571, "step": 8599 }, { "epoch": 0.69, "grad_norm": 1.357844507461579, "learning_rate": 2.342778471228648e-06, "loss": 0.179, "step": 8600 }, { "epoch": 0.69, "grad_norm": 1.4036411008739, "learning_rate": 2.3416811493475767e-06, "loss": 0.1687, "step": 8601 }, { "epoch": 0.69, "grad_norm": 1.442257632305718, "learning_rate": 2.340584005927501e-06, "loss": 0.1801, "step": 8602 }, { "epoch": 0.69, "grad_norm": 1.4020140667254148, "learning_rate": 2.33948704104208e-06, "loss": 0.2135, "step": 8603 }, { "epoch": 0.69, "grad_norm": 1.2231075488783294, "learning_rate": 2.3383902547649567e-06, "loss": 0.1546, "step": 8604 }, { "epoch": 0.69, "grad_norm": 1.3729470096320693, "learning_rate": 2.3372936471697564e-06, "loss": 0.1705, "step": 8605 }, { "epoch": 0.69, "grad_norm": 1.3213457258767163, "learning_rate": 2.336197218330104e-06, "loss": 0.1569, "step": 8606 }, { "epoch": 0.69, "grad_norm": 5.377234671918558, "learning_rate": 2.3351009683196025e-06, "loss": 0.5272, "step": 8607 }, { "epoch": 0.69, "grad_norm": 1.2533129565825858, "learning_rate": 2.3340048972118475e-06, "loss": 0.1523, "step": 8608 }, { "epoch": 0.69, "grad_norm": 1.4437289383505454, "learning_rate": 2.332909005080421e-06, "loss": 0.1751, "step": 8609 }, { "epoch": 0.69, "grad_norm": 1.1818405728761945, "learning_rate": 2.3318132919988944e-06, "loss": 0.1662, "step": 8610 }, { "epoch": 0.69, "grad_norm": 7.237474968897468, "learning_rate": 2.3307177580408243e-06, "loss": 0.5038, "step": 8611 }, { "epoch": 0.69, "grad_norm": 1.5583317725987094, "learning_rate": 2.3296224032797616e-06, "loss": 0.2323, "step": 8612 }, { "epoch": 0.69, "grad_norm": 1.6800599716369158, "learning_rate": 2.328527227789236e-06, "loss": 0.1912, "step": 8613 }, { "epoch": 0.69, "grad_norm": 1.1616636644168354, "learning_rate": 2.3274322316427706e-06, "loss": 0.174, "step": 8614 }, { "epoch": 0.69, "grad_norm": 1.3998644498786548, "learning_rate": 2.326337414913881e-06, "loss": 0.17, "step": 8615 }, { "epoch": 0.69, "grad_norm": 6.292234164920583, "learning_rate": 2.325242777676057e-06, "loss": 0.5858, "step": 8616 }, { "epoch": 0.69, "grad_norm": 5.754097612815069, "learning_rate": 2.324148320002792e-06, "loss": 0.4215, "step": 8617 }, { "epoch": 0.69, "grad_norm": 1.3801948094943512, "learning_rate": 2.323054041967557e-06, "loss": 0.141, "step": 8618 }, { "epoch": 0.69, "grad_norm": 1.1804844524325584, "learning_rate": 2.3219599436438162e-06, "loss": 0.1644, "step": 8619 }, { "epoch": 0.69, "grad_norm": 1.4615781525710068, "learning_rate": 2.320866025105016e-06, "loss": 0.2289, "step": 8620 }, { "epoch": 0.69, "grad_norm": 1.0737896593414822, "learning_rate": 2.319772286424601e-06, "loss": 0.1197, "step": 8621 }, { "epoch": 0.69, "grad_norm": 5.958260260142388, "learning_rate": 2.318678727675991e-06, "loss": 0.5602, "step": 8622 }, { "epoch": 0.69, "grad_norm": 1.309184401051319, "learning_rate": 2.3175853489326e-06, "loss": 0.1315, "step": 8623 }, { "epoch": 0.69, "grad_norm": 1.2973334380537973, "learning_rate": 2.3164921502678363e-06, "loss": 0.1802, "step": 8624 }, { "epoch": 0.69, "grad_norm": 1.2102528402280974, "learning_rate": 2.315399131755081e-06, "loss": 0.1697, "step": 8625 }, { "epoch": 0.69, "grad_norm": 1.3414110501478933, "learning_rate": 2.314306293467718e-06, "loss": 0.1557, "step": 8626 }, { "epoch": 0.69, "grad_norm": 14.041071212461024, "learning_rate": 2.313213635479111e-06, "loss": 0.6111, "step": 8627 }, { "epoch": 0.69, "grad_norm": 1.5257508939651871, "learning_rate": 2.312121157862613e-06, "loss": 0.1897, "step": 8628 }, { "epoch": 0.69, "grad_norm": 1.3856273625845739, "learning_rate": 2.311028860691566e-06, "loss": 0.1505, "step": 8629 }, { "epoch": 0.69, "grad_norm": 1.3685700282297448, "learning_rate": 2.3099367440392985e-06, "loss": 0.218, "step": 8630 }, { "epoch": 0.69, "grad_norm": 6.856678168937938, "learning_rate": 2.308844807979128e-06, "loss": 0.4455, "step": 8631 }, { "epoch": 0.69, "grad_norm": 1.2522641174410185, "learning_rate": 2.3077530525843593e-06, "loss": 0.1914, "step": 8632 }, { "epoch": 0.69, "grad_norm": 1.5594677146749716, "learning_rate": 2.306661477928286e-06, "loss": 0.1833, "step": 8633 }, { "epoch": 0.69, "grad_norm": 1.3523388746249685, "learning_rate": 2.3055700840841857e-06, "loss": 0.1917, "step": 8634 }, { "epoch": 0.69, "grad_norm": 1.494809927899121, "learning_rate": 2.3044788711253336e-06, "loss": 0.1775, "step": 8635 }, { "epoch": 0.69, "grad_norm": 1.1440631106618129, "learning_rate": 2.3033878391249775e-06, "loss": 0.1361, "step": 8636 }, { "epoch": 0.69, "grad_norm": 1.3077896538758447, "learning_rate": 2.302296988156369e-06, "loss": 0.1699, "step": 8637 }, { "epoch": 0.69, "grad_norm": 1.3165458622636665, "learning_rate": 2.301206318292737e-06, "loss": 0.1864, "step": 8638 }, { "epoch": 0.69, "grad_norm": 1.4201494771515406, "learning_rate": 2.300115829607302e-06, "loss": 0.1656, "step": 8639 }, { "epoch": 0.69, "grad_norm": 14.588831789297222, "learning_rate": 2.299025522173271e-06, "loss": 0.5851, "step": 8640 }, { "epoch": 0.69, "grad_norm": 1.4072551172991863, "learning_rate": 2.297935396063841e-06, "loss": 0.153, "step": 8641 }, { "epoch": 0.69, "grad_norm": 1.1859591460674632, "learning_rate": 2.296845451352195e-06, "loss": 0.1174, "step": 8642 }, { "epoch": 0.69, "grad_norm": 1.2535864752171626, "learning_rate": 2.2957556881115033e-06, "loss": 0.1188, "step": 8643 }, { "epoch": 0.69, "grad_norm": 1.411947794916505, "learning_rate": 2.294666106414926e-06, "loss": 0.2099, "step": 8644 }, { "epoch": 0.69, "grad_norm": 1.4879657084185662, "learning_rate": 2.2935767063356084e-06, "loss": 0.1846, "step": 8645 }, { "epoch": 0.69, "grad_norm": 1.4057771121186284, "learning_rate": 2.2924874879466903e-06, "loss": 0.1666, "step": 8646 }, { "epoch": 0.69, "grad_norm": 1.6089047311051063, "learning_rate": 2.2913984513212866e-06, "loss": 0.2122, "step": 8647 }, { "epoch": 0.69, "grad_norm": 1.3227789332390976, "learning_rate": 2.2903095965325135e-06, "loss": 0.1384, "step": 8648 }, { "epoch": 0.69, "grad_norm": 1.2453708652530837, "learning_rate": 2.289220923653469e-06, "loss": 0.1607, "step": 8649 }, { "epoch": 0.69, "grad_norm": 1.113529211518167, "learning_rate": 2.2881324327572336e-06, "loss": 0.1256, "step": 8650 }, { "epoch": 0.69, "grad_norm": 1.291072188271209, "learning_rate": 2.287044123916887e-06, "loss": 0.1633, "step": 8651 }, { "epoch": 0.69, "grad_norm": 15.141908833234195, "learning_rate": 2.2859559972054874e-06, "loss": 0.561, "step": 8652 }, { "epoch": 0.69, "grad_norm": 1.3701657754788936, "learning_rate": 2.2848680526960854e-06, "loss": 0.2, "step": 8653 }, { "epoch": 0.69, "grad_norm": 1.3821793473968764, "learning_rate": 2.283780290461717e-06, "loss": 0.1936, "step": 8654 }, { "epoch": 0.69, "grad_norm": 1.4330477840192175, "learning_rate": 2.2826927105754083e-06, "loss": 0.1665, "step": 8655 }, { "epoch": 0.69, "grad_norm": 1.59051277537362, "learning_rate": 2.2816053131101706e-06, "loss": 0.1976, "step": 8656 }, { "epoch": 0.69, "grad_norm": 1.3372352117982291, "learning_rate": 2.2805180981390047e-06, "loss": 0.1659, "step": 8657 }, { "epoch": 0.69, "grad_norm": 1.3881366115859393, "learning_rate": 2.279431065734899e-06, "loss": 0.1809, "step": 8658 }, { "epoch": 0.69, "grad_norm": 1.2384241350736567, "learning_rate": 2.2783442159708262e-06, "loss": 0.1781, "step": 8659 }, { "epoch": 0.69, "grad_norm": 5.645180923109874, "learning_rate": 2.2772575489197553e-06, "loss": 0.4778, "step": 8660 }, { "epoch": 0.69, "grad_norm": 1.4698639921262209, "learning_rate": 2.2761710646546336e-06, "loss": 0.2238, "step": 8661 }, { "epoch": 0.69, "grad_norm": 1.2257661877328063, "learning_rate": 2.275084763248402e-06, "loss": 0.1447, "step": 8662 }, { "epoch": 0.69, "grad_norm": 1.2082783756899877, "learning_rate": 2.2739986447739864e-06, "loss": 0.1382, "step": 8663 }, { "epoch": 0.69, "grad_norm": 1.3665834296831005, "learning_rate": 2.272912709304301e-06, "loss": 0.1364, "step": 8664 }, { "epoch": 0.69, "grad_norm": 1.3139959335967346, "learning_rate": 2.271826956912248e-06, "loss": 0.1757, "step": 8665 }, { "epoch": 0.69, "grad_norm": 1.283557105834495, "learning_rate": 2.2707413876707176e-06, "loss": 0.1428, "step": 8666 }, { "epoch": 0.69, "grad_norm": 1.2684148495355902, "learning_rate": 2.2696560016525872e-06, "loss": 0.1531, "step": 8667 }, { "epoch": 0.69, "grad_norm": 1.1756378583122005, "learning_rate": 2.2685707989307206e-06, "loss": 0.1514, "step": 8668 }, { "epoch": 0.69, "grad_norm": 1.5057887378935848, "learning_rate": 2.2674857795779747e-06, "loss": 0.1902, "step": 8669 }, { "epoch": 0.69, "grad_norm": 1.4528078861420395, "learning_rate": 2.266400943667185e-06, "loss": 0.1927, "step": 8670 }, { "epoch": 0.69, "grad_norm": 1.4810948757119218, "learning_rate": 2.265316291271184e-06, "loss": 0.1702, "step": 8671 }, { "epoch": 0.69, "grad_norm": 1.2615013220150761, "learning_rate": 2.2642318224627856e-06, "loss": 0.1509, "step": 8672 }, { "epoch": 0.69, "grad_norm": 1.316791946086133, "learning_rate": 2.2631475373147947e-06, "loss": 0.1695, "step": 8673 }, { "epoch": 0.69, "grad_norm": 1.2520488729890291, "learning_rate": 2.2620634359000017e-06, "loss": 0.1702, "step": 8674 }, { "epoch": 0.69, "grad_norm": 1.4645323598742592, "learning_rate": 2.260979518291186e-06, "loss": 0.2026, "step": 8675 }, { "epoch": 0.69, "grad_norm": 1.3225285487978233, "learning_rate": 2.259895784561114e-06, "loss": 0.2131, "step": 8676 }, { "epoch": 0.69, "grad_norm": 1.5041488606363822, "learning_rate": 2.2588122347825403e-06, "loss": 0.1702, "step": 8677 }, { "epoch": 0.69, "grad_norm": 1.1408835350586002, "learning_rate": 2.257728869028206e-06, "loss": 0.124, "step": 8678 }, { "epoch": 0.69, "grad_norm": 1.3526120289817962, "learning_rate": 2.2566456873708407e-06, "loss": 0.1905, "step": 8679 }, { "epoch": 0.69, "grad_norm": 1.285780156576686, "learning_rate": 2.255562689883166e-06, "loss": 0.152, "step": 8680 }, { "epoch": 0.69, "grad_norm": 8.71512727478539, "learning_rate": 2.254479876637879e-06, "loss": 0.6604, "step": 8681 }, { "epoch": 0.69, "grad_norm": 1.165899180043969, "learning_rate": 2.253397247707679e-06, "loss": 0.1529, "step": 8682 }, { "epoch": 0.69, "grad_norm": 1.3454976114941755, "learning_rate": 2.2523148031652447e-06, "loss": 0.1216, "step": 8683 }, { "epoch": 0.69, "grad_norm": 1.365999298343403, "learning_rate": 2.2512325430832393e-06, "loss": 0.1645, "step": 8684 }, { "epoch": 0.69, "grad_norm": 1.3170762927418018, "learning_rate": 2.2501504675343237e-06, "loss": 0.1943, "step": 8685 }, { "epoch": 0.69, "grad_norm": 1.4784706699991588, "learning_rate": 2.249068576591139e-06, "loss": 0.228, "step": 8686 }, { "epoch": 0.69, "grad_norm": 1.296589181648553, "learning_rate": 2.247986870326316e-06, "loss": 0.1738, "step": 8687 }, { "epoch": 0.7, "grad_norm": 1.5356612640489036, "learning_rate": 2.246905348812472e-06, "loss": 0.2028, "step": 8688 }, { "epoch": 0.7, "grad_norm": 10.393529379541585, "learning_rate": 2.245824012122214e-06, "loss": 0.7075, "step": 8689 }, { "epoch": 0.7, "grad_norm": 1.3127502337152592, "learning_rate": 2.244742860328133e-06, "loss": 0.1737, "step": 8690 }, { "epoch": 0.7, "grad_norm": 1.3317927276905537, "learning_rate": 2.2436618935028152e-06, "loss": 0.1857, "step": 8691 }, { "epoch": 0.7, "grad_norm": 1.408709377149671, "learning_rate": 2.2425811117188245e-06, "loss": 0.2097, "step": 8692 }, { "epoch": 0.7, "grad_norm": 1.5359756895497416, "learning_rate": 2.241500515048716e-06, "loss": 0.1925, "step": 8693 }, { "epoch": 0.7, "grad_norm": 8.57717298846275, "learning_rate": 2.2404201035650404e-06, "loss": 0.5537, "step": 8694 }, { "epoch": 0.7, "grad_norm": 1.20115842475598, "learning_rate": 2.2393398773403196e-06, "loss": 0.1354, "step": 8695 }, { "epoch": 0.7, "grad_norm": 1.1815064544567289, "learning_rate": 2.23825983644708e-06, "loss": 0.1499, "step": 8696 }, { "epoch": 0.7, "grad_norm": 1.4232128296982534, "learning_rate": 2.2371799809578254e-06, "loss": 0.1868, "step": 8697 }, { "epoch": 0.7, "grad_norm": 1.2135499758565902, "learning_rate": 2.236100310945049e-06, "loss": 0.1431, "step": 8698 }, { "epoch": 0.7, "grad_norm": 1.337339011347196, "learning_rate": 2.2350208264812318e-06, "loss": 0.186, "step": 8699 }, { "epoch": 0.7, "grad_norm": 1.5417855191289902, "learning_rate": 2.233941527638848e-06, "loss": 0.1835, "step": 8700 }, { "epoch": 0.7, "grad_norm": 1.5400669998025067, "learning_rate": 2.232862414490347e-06, "loss": 0.1764, "step": 8701 }, { "epoch": 0.7, "grad_norm": 6.85876936960936, "learning_rate": 2.2317834871081757e-06, "loss": 0.6021, "step": 8702 }, { "epoch": 0.7, "grad_norm": 1.2261340481247933, "learning_rate": 2.2307047455647696e-06, "loss": 0.1643, "step": 8703 }, { "epoch": 0.7, "grad_norm": 1.3717648273852179, "learning_rate": 2.2296261899325407e-06, "loss": 0.1678, "step": 8704 }, { "epoch": 0.7, "grad_norm": 1.31532119975373, "learning_rate": 2.228547820283902e-06, "loss": 0.1753, "step": 8705 }, { "epoch": 0.7, "grad_norm": 1.399501526894658, "learning_rate": 2.2274696366912448e-06, "loss": 0.1747, "step": 8706 }, { "epoch": 0.7, "grad_norm": 1.5062145517272492, "learning_rate": 2.226391639226951e-06, "loss": 0.2285, "step": 8707 }, { "epoch": 0.7, "grad_norm": 1.3491365043466192, "learning_rate": 2.2253138279633907e-06, "loss": 0.1623, "step": 8708 }, { "epoch": 0.7, "grad_norm": 1.394595576009113, "learning_rate": 2.22423620297292e-06, "loss": 0.165, "step": 8709 }, { "epoch": 0.7, "grad_norm": 1.462069929474237, "learning_rate": 2.2231587643278827e-06, "loss": 0.1799, "step": 8710 }, { "epoch": 0.7, "grad_norm": 1.4534174807138118, "learning_rate": 2.2220815121006116e-06, "loss": 0.1495, "step": 8711 }, { "epoch": 0.7, "grad_norm": 2.0298562362298234, "learning_rate": 2.2210044463634252e-06, "loss": 0.1717, "step": 8712 }, { "epoch": 0.7, "grad_norm": 1.262769019496148, "learning_rate": 2.219927567188629e-06, "loss": 0.136, "step": 8713 }, { "epoch": 0.7, "grad_norm": 1.3324393388167715, "learning_rate": 2.2188508746485214e-06, "loss": 0.1984, "step": 8714 }, { "epoch": 0.7, "grad_norm": 1.48438789746588, "learning_rate": 2.217774368815378e-06, "loss": 0.2132, "step": 8715 }, { "epoch": 0.7, "grad_norm": 1.246894482827469, "learning_rate": 2.2166980497614728e-06, "loss": 0.1403, "step": 8716 }, { "epoch": 0.7, "grad_norm": 1.4094312823907584, "learning_rate": 2.2156219175590623e-06, "loss": 0.1451, "step": 8717 }, { "epoch": 0.7, "grad_norm": 1.5048823293038702, "learning_rate": 2.214545972280385e-06, "loss": 0.2391, "step": 8718 }, { "epoch": 0.7, "grad_norm": 10.588603464127296, "learning_rate": 2.2134702139976777e-06, "loss": 0.6963, "step": 8719 }, { "epoch": 0.7, "grad_norm": 1.403886494343395, "learning_rate": 2.2123946427831582e-06, "loss": 0.2175, "step": 8720 }, { "epoch": 0.7, "grad_norm": 1.2626957641134713, "learning_rate": 2.2113192587090327e-06, "loss": 0.1679, "step": 8721 }, { "epoch": 0.7, "grad_norm": 1.6784919996542202, "learning_rate": 2.2102440618474942e-06, "loss": 0.1646, "step": 8722 }, { "epoch": 0.7, "grad_norm": 1.2948792986247344, "learning_rate": 2.2091690522707248e-06, "loss": 0.168, "step": 8723 }, { "epoch": 0.7, "grad_norm": 6.249720254991113, "learning_rate": 2.2080942300508907e-06, "loss": 0.5663, "step": 8724 }, { "epoch": 0.7, "grad_norm": 1.4229058812396012, "learning_rate": 2.207019595260154e-06, "loss": 0.2145, "step": 8725 }, { "epoch": 0.7, "grad_norm": 1.4630662476865703, "learning_rate": 2.2059451479706522e-06, "loss": 0.2301, "step": 8726 }, { "epoch": 0.7, "grad_norm": 1.3195765864235, "learning_rate": 2.2048708882545166e-06, "loss": 0.1634, "step": 8727 }, { "epoch": 0.7, "grad_norm": 1.6388587411312872, "learning_rate": 2.203796816183871e-06, "loss": 0.2031, "step": 8728 }, { "epoch": 0.7, "grad_norm": 1.3542096925167477, "learning_rate": 2.2027229318308135e-06, "loss": 0.1702, "step": 8729 }, { "epoch": 0.7, "grad_norm": 1.4778953744058572, "learning_rate": 2.2016492352674424e-06, "loss": 0.1868, "step": 8730 }, { "epoch": 0.7, "grad_norm": 1.4677367710726397, "learning_rate": 2.2005757265658375e-06, "loss": 0.1966, "step": 8731 }, { "epoch": 0.7, "grad_norm": 1.2931955724901292, "learning_rate": 2.1995024057980656e-06, "loss": 0.1652, "step": 8732 }, { "epoch": 0.7, "grad_norm": 1.2514041651540069, "learning_rate": 2.198429273036182e-06, "loss": 0.2037, "step": 8733 }, { "epoch": 0.7, "grad_norm": 1.4305431824315464, "learning_rate": 2.1973563283522305e-06, "loss": 0.1515, "step": 8734 }, { "epoch": 0.7, "grad_norm": 1.2808339942607438, "learning_rate": 2.19628357181824e-06, "loss": 0.1745, "step": 8735 }, { "epoch": 0.7, "grad_norm": 1.3083878047924136, "learning_rate": 2.1952110035062284e-06, "loss": 0.1881, "step": 8736 }, { "epoch": 0.7, "grad_norm": 1.4242874062845257, "learning_rate": 2.1941386234882e-06, "loss": 0.1868, "step": 8737 }, { "epoch": 0.7, "grad_norm": 1.3482473959845853, "learning_rate": 2.1930664318361457e-06, "loss": 0.1663, "step": 8738 }, { "epoch": 0.7, "grad_norm": 1.3831857066941897, "learning_rate": 2.19199442862205e-06, "loss": 0.2388, "step": 8739 }, { "epoch": 0.7, "grad_norm": 8.952667737017903, "learning_rate": 2.1909226139178723e-06, "loss": 0.549, "step": 8740 }, { "epoch": 0.7, "grad_norm": 1.448249893474245, "learning_rate": 2.1898509877955733e-06, "loss": 0.1887, "step": 8741 }, { "epoch": 0.7, "grad_norm": 7.448377709262811, "learning_rate": 2.1887795503270914e-06, "loss": 0.4552, "step": 8742 }, { "epoch": 0.7, "grad_norm": 8.082956309024548, "learning_rate": 2.187708301584356e-06, "loss": 0.5641, "step": 8743 }, { "epoch": 0.7, "grad_norm": 1.318625735997687, "learning_rate": 2.1866372416392827e-06, "loss": 0.1976, "step": 8744 }, { "epoch": 0.7, "grad_norm": 1.187578344779676, "learning_rate": 2.1855663705637763e-06, "loss": 0.1798, "step": 8745 }, { "epoch": 0.7, "grad_norm": 1.4578983494895568, "learning_rate": 2.1844956884297265e-06, "loss": 0.1934, "step": 8746 }, { "epoch": 0.7, "grad_norm": 1.3237372610631304, "learning_rate": 2.1834251953090096e-06, "loss": 0.2015, "step": 8747 }, { "epoch": 0.7, "grad_norm": 1.528414428453423, "learning_rate": 2.1823548912734976e-06, "loss": 0.1796, "step": 8748 }, { "epoch": 0.7, "grad_norm": 1.3324822343648361, "learning_rate": 2.181284776395035e-06, "loss": 0.1774, "step": 8749 }, { "epoch": 0.7, "grad_norm": 1.391034644324073, "learning_rate": 2.1802148507454675e-06, "loss": 0.1518, "step": 8750 }, { "epoch": 0.7, "grad_norm": 1.4596720917481374, "learning_rate": 2.17914511439662e-06, "loss": 0.1886, "step": 8751 }, { "epoch": 0.7, "grad_norm": 1.4719213776194484, "learning_rate": 2.1780755674203087e-06, "loss": 0.1463, "step": 8752 }, { "epoch": 0.7, "grad_norm": 1.2633251822427136, "learning_rate": 2.1770062098883343e-06, "loss": 0.1563, "step": 8753 }, { "epoch": 0.7, "grad_norm": 1.354272484944648, "learning_rate": 2.175937041872487e-06, "loss": 0.1637, "step": 8754 }, { "epoch": 0.7, "grad_norm": 1.3420536308280704, "learning_rate": 2.174868063444542e-06, "loss": 0.1884, "step": 8755 }, { "epoch": 0.7, "grad_norm": 1.5413568596475649, "learning_rate": 2.173799274676264e-06, "loss": 0.2399, "step": 8756 }, { "epoch": 0.7, "grad_norm": 1.4809524084303805, "learning_rate": 2.1727306756394027e-06, "loss": 0.1893, "step": 8757 }, { "epoch": 0.7, "grad_norm": 1.3050136822491063, "learning_rate": 2.171662266405697e-06, "loss": 0.1806, "step": 8758 }, { "epoch": 0.7, "grad_norm": 1.3846842094315535, "learning_rate": 2.1705940470468758e-06, "loss": 0.1807, "step": 8759 }, { "epoch": 0.7, "grad_norm": 1.4035795574727916, "learning_rate": 2.1695260176346453e-06, "loss": 0.203, "step": 8760 }, { "epoch": 0.7, "grad_norm": 1.2896832358138453, "learning_rate": 2.1684581782407117e-06, "loss": 0.1725, "step": 8761 }, { "epoch": 0.7, "grad_norm": 1.5857872105033077, "learning_rate": 2.167390528936761e-06, "loss": 0.2258, "step": 8762 }, { "epoch": 0.7, "grad_norm": 1.5072083789835178, "learning_rate": 2.166323069794463e-06, "loss": 0.2018, "step": 8763 }, { "epoch": 0.7, "grad_norm": 1.2004578291724939, "learning_rate": 2.1652558008854853e-06, "loss": 0.156, "step": 8764 }, { "epoch": 0.7, "grad_norm": 1.4177180195213406, "learning_rate": 2.164188722281474e-06, "loss": 0.1839, "step": 8765 }, { "epoch": 0.7, "grad_norm": 1.405902876673419, "learning_rate": 2.163121834054066e-06, "loss": 0.1893, "step": 8766 }, { "epoch": 0.7, "grad_norm": 1.5589266291088337, "learning_rate": 2.1620551362748854e-06, "loss": 0.1912, "step": 8767 }, { "epoch": 0.7, "grad_norm": 8.638788751173921, "learning_rate": 2.1609886290155412e-06, "loss": 0.4962, "step": 8768 }, { "epoch": 0.7, "grad_norm": 1.3779042004614497, "learning_rate": 2.159922312347631e-06, "loss": 0.2079, "step": 8769 }, { "epoch": 0.7, "grad_norm": 1.2521203725918555, "learning_rate": 2.158856186342745e-06, "loss": 0.1556, "step": 8770 }, { "epoch": 0.7, "grad_norm": 1.4078683413357185, "learning_rate": 2.1577902510724486e-06, "loss": 0.1892, "step": 8771 }, { "epoch": 0.7, "grad_norm": 1.2253088175861557, "learning_rate": 2.1567245066083033e-06, "loss": 0.1866, "step": 8772 }, { "epoch": 0.7, "grad_norm": 1.3814699745572565, "learning_rate": 2.15565895302186e-06, "loss": 0.1877, "step": 8773 }, { "epoch": 0.7, "grad_norm": 1.5342635587114835, "learning_rate": 2.1545935903846456e-06, "loss": 0.1952, "step": 8774 }, { "epoch": 0.7, "grad_norm": 1.5421558089029654, "learning_rate": 2.1535284187681866e-06, "loss": 0.221, "step": 8775 }, { "epoch": 0.7, "grad_norm": 1.3670858207147938, "learning_rate": 2.152463438243989e-06, "loss": 0.186, "step": 8776 }, { "epoch": 0.7, "grad_norm": 1.3266752703845073, "learning_rate": 2.1513986488835486e-06, "loss": 0.2035, "step": 8777 }, { "epoch": 0.7, "grad_norm": 1.1514188028614187, "learning_rate": 2.1503340507583482e-06, "loss": 0.1672, "step": 8778 }, { "epoch": 0.7, "grad_norm": 1.1590982308808748, "learning_rate": 2.1492696439398563e-06, "loss": 0.1748, "step": 8779 }, { "epoch": 0.7, "grad_norm": 1.5924349186199698, "learning_rate": 2.148205428499531e-06, "loss": 0.2184, "step": 8780 }, { "epoch": 0.7, "grad_norm": 1.7672018950883812, "learning_rate": 2.147141404508816e-06, "loss": 0.1977, "step": 8781 }, { "epoch": 0.7, "grad_norm": 1.4514485893671827, "learning_rate": 2.146077572039143e-06, "loss": 0.2069, "step": 8782 }, { "epoch": 0.7, "grad_norm": 1.4475352734861087, "learning_rate": 2.145013931161927e-06, "loss": 0.1871, "step": 8783 }, { "epoch": 0.7, "grad_norm": 17.056245322540924, "learning_rate": 2.1439504819485795e-06, "loss": 0.6315, "step": 8784 }, { "epoch": 0.7, "grad_norm": 1.5210892120096347, "learning_rate": 2.1428872244704862e-06, "loss": 0.1925, "step": 8785 }, { "epoch": 0.7, "grad_norm": 1.1989500402999689, "learning_rate": 2.1418241587990326e-06, "loss": 0.1647, "step": 8786 }, { "epoch": 0.7, "grad_norm": 7.622820220211232, "learning_rate": 2.140761285005584e-06, "loss": 0.627, "step": 8787 }, { "epoch": 0.7, "grad_norm": 47.784501888223176, "learning_rate": 2.139698603161491e-06, "loss": 0.6089, "step": 8788 }, { "epoch": 0.7, "grad_norm": 1.2652650592349, "learning_rate": 2.1386361133380985e-06, "loss": 0.1798, "step": 8789 }, { "epoch": 0.7, "grad_norm": 1.3280266661705573, "learning_rate": 2.1375738156067327e-06, "loss": 0.1981, "step": 8790 }, { "epoch": 0.7, "grad_norm": 1.2822586657578385, "learning_rate": 2.13651171003871e-06, "loss": 0.1593, "step": 8791 }, { "epoch": 0.7, "grad_norm": 1.2566453274980987, "learning_rate": 2.1354497967053304e-06, "loss": 0.1666, "step": 8792 }, { "epoch": 0.7, "grad_norm": 1.2849340099753168, "learning_rate": 2.1343880756778884e-06, "loss": 0.1712, "step": 8793 }, { "epoch": 0.7, "grad_norm": 1.3134977391382963, "learning_rate": 2.133326547027654e-06, "loss": 0.1613, "step": 8794 }, { "epoch": 0.7, "grad_norm": 1.6583229142328548, "learning_rate": 2.132265210825896e-06, "loss": 0.2101, "step": 8795 }, { "epoch": 0.7, "grad_norm": 1.2836326828938867, "learning_rate": 2.1312040671438656e-06, "loss": 0.1474, "step": 8796 }, { "epoch": 0.7, "grad_norm": 1.3714070436635597, "learning_rate": 2.130143116052794e-06, "loss": 0.1853, "step": 8797 }, { "epoch": 0.7, "grad_norm": 1.3802971835800655, "learning_rate": 2.129082357623912e-06, "loss": 0.1656, "step": 8798 }, { "epoch": 0.7, "grad_norm": 1.2090062461686482, "learning_rate": 2.128021791928431e-06, "loss": 0.1497, "step": 8799 }, { "epoch": 0.7, "grad_norm": 1.480064634597544, "learning_rate": 2.1269614190375477e-06, "loss": 0.1853, "step": 8800 }, { "epoch": 0.7, "grad_norm": 1.4062062432419404, "learning_rate": 2.12590123902245e-06, "loss": 0.1997, "step": 8801 }, { "epoch": 0.7, "grad_norm": 1.4300533792985821, "learning_rate": 2.12484125195431e-06, "loss": 0.1735, "step": 8802 }, { "epoch": 0.7, "grad_norm": 1.384543903209185, "learning_rate": 2.123781457904286e-06, "loss": 0.1824, "step": 8803 }, { "epoch": 0.7, "grad_norm": 1.5132930671969893, "learning_rate": 2.122721856943531e-06, "loss": 0.1796, "step": 8804 }, { "epoch": 0.7, "grad_norm": 1.4532391516521237, "learning_rate": 2.1216624491431744e-06, "loss": 0.1927, "step": 8805 }, { "epoch": 0.7, "grad_norm": 1.369314650979274, "learning_rate": 2.120603234574336e-06, "loss": 0.1997, "step": 8806 }, { "epoch": 0.7, "grad_norm": 1.3858502990956465, "learning_rate": 2.1195442133081307e-06, "loss": 0.1721, "step": 8807 }, { "epoch": 0.7, "grad_norm": 1.4622561652982093, "learning_rate": 2.1184853854156462e-06, "loss": 0.1745, "step": 8808 }, { "epoch": 0.7, "grad_norm": 1.4447131585698556, "learning_rate": 2.1174267509679703e-06, "loss": 0.1717, "step": 8809 }, { "epoch": 0.7, "grad_norm": 13.866334365437547, "learning_rate": 2.1163683100361702e-06, "loss": 0.5617, "step": 8810 }, { "epoch": 0.7, "grad_norm": 12.041647197855086, "learning_rate": 2.1153100626913027e-06, "loss": 0.3806, "step": 8811 }, { "epoch": 0.7, "grad_norm": 5.550844057590258, "learning_rate": 2.114252009004411e-06, "loss": 0.6173, "step": 8812 }, { "epoch": 0.71, "grad_norm": 1.2360118798995223, "learning_rate": 2.113194149046525e-06, "loss": 0.1541, "step": 8813 }, { "epoch": 0.71, "grad_norm": 1.4922707384325054, "learning_rate": 2.112136482888663e-06, "loss": 0.2145, "step": 8814 }, { "epoch": 0.71, "grad_norm": 7.71839473296996, "learning_rate": 2.1110790106018286e-06, "loss": 0.3803, "step": 8815 }, { "epoch": 0.71, "grad_norm": 1.413837554148325, "learning_rate": 2.1100217322570133e-06, "loss": 0.182, "step": 8816 }, { "epoch": 0.71, "grad_norm": 5.475626157536165, "learning_rate": 2.1089646479251936e-06, "loss": 0.5344, "step": 8817 }, { "epoch": 0.71, "grad_norm": 1.2816078715522858, "learning_rate": 2.10790775767734e-06, "loss": 0.168, "step": 8818 }, { "epoch": 0.71, "grad_norm": 1.3580897520767927, "learning_rate": 2.106851061584398e-06, "loss": 0.1941, "step": 8819 }, { "epoch": 0.71, "grad_norm": 1.5446506490868177, "learning_rate": 2.105794559717311e-06, "loss": 0.1941, "step": 8820 }, { "epoch": 0.71, "grad_norm": 1.4381451154685287, "learning_rate": 2.1047382521470057e-06, "loss": 0.2112, "step": 8821 }, { "epoch": 0.71, "grad_norm": 1.304268565476533, "learning_rate": 2.1036821389443927e-06, "loss": 0.162, "step": 8822 }, { "epoch": 0.71, "grad_norm": 1.376732000216103, "learning_rate": 2.1026262201803738e-06, "loss": 0.1981, "step": 8823 }, { "epoch": 0.71, "grad_norm": 1.4773961092945924, "learning_rate": 2.1015704959258355e-06, "loss": 0.2161, "step": 8824 }, { "epoch": 0.71, "grad_norm": 1.4320544687799417, "learning_rate": 2.1005149662516517e-06, "loss": 0.1975, "step": 8825 }, { "epoch": 0.71, "grad_norm": 1.3237543295352794, "learning_rate": 2.099459631228683e-06, "loss": 0.2086, "step": 8826 }, { "epoch": 0.71, "grad_norm": 1.3892191534369167, "learning_rate": 2.0984044909277777e-06, "loss": 0.2306, "step": 8827 }, { "epoch": 0.71, "grad_norm": 1.2142666790063947, "learning_rate": 2.0973495454197686e-06, "loss": 0.1563, "step": 8828 }, { "epoch": 0.71, "grad_norm": 1.5062731682482111, "learning_rate": 2.096294794775481e-06, "loss": 0.2151, "step": 8829 }, { "epoch": 0.71, "grad_norm": 17.263257745407792, "learning_rate": 2.0952402390657215e-06, "loss": 0.5625, "step": 8830 }, { "epoch": 0.71, "grad_norm": 1.275642617991941, "learning_rate": 2.0941858783612863e-06, "loss": 0.1386, "step": 8831 }, { "epoch": 0.71, "grad_norm": 1.3848665576330261, "learning_rate": 2.093131712732957e-06, "loss": 0.2391, "step": 8832 }, { "epoch": 0.71, "grad_norm": 1.2702835364104217, "learning_rate": 2.0920777422515033e-06, "loss": 0.1578, "step": 8833 }, { "epoch": 0.71, "grad_norm": 7.893659356488322, "learning_rate": 2.0910239669876814e-06, "loss": 0.6647, "step": 8834 }, { "epoch": 0.71, "grad_norm": 1.5017451947179192, "learning_rate": 2.0899703870122347e-06, "loss": 0.1868, "step": 8835 }, { "epoch": 0.71, "grad_norm": 9.213723079554798, "learning_rate": 2.0889170023958927e-06, "loss": 0.4584, "step": 8836 }, { "epoch": 0.71, "grad_norm": 1.4903907522906628, "learning_rate": 2.0878638132093703e-06, "loss": 0.2304, "step": 8837 }, { "epoch": 0.71, "grad_norm": 1.3420402720303217, "learning_rate": 2.0868108195233783e-06, "loss": 0.1876, "step": 8838 }, { "epoch": 0.71, "grad_norm": 1.5246173645413683, "learning_rate": 2.0857580214086e-06, "loss": 0.2007, "step": 8839 }, { "epoch": 0.71, "grad_norm": 1.3505345935443007, "learning_rate": 2.0847054189357136e-06, "loss": 0.1856, "step": 8840 }, { "epoch": 0.71, "grad_norm": 6.328069713639952, "learning_rate": 2.0836530121753895e-06, "loss": 0.4315, "step": 8841 }, { "epoch": 0.71, "grad_norm": 1.2589652011839645, "learning_rate": 2.0826008011982714e-06, "loss": 0.196, "step": 8842 }, { "epoch": 0.71, "grad_norm": 1.5765691334292593, "learning_rate": 2.0815487860750023e-06, "loss": 0.2135, "step": 8843 }, { "epoch": 0.71, "grad_norm": 1.4865032367365123, "learning_rate": 2.0804969668762052e-06, "loss": 0.1885, "step": 8844 }, { "epoch": 0.71, "grad_norm": 1.2294452274400172, "learning_rate": 2.079445343672493e-06, "loss": 0.144, "step": 8845 }, { "epoch": 0.71, "grad_norm": 1.1103684526624893, "learning_rate": 2.078393916534464e-06, "loss": 0.1069, "step": 8846 }, { "epoch": 0.71, "grad_norm": 1.3410438379703813, "learning_rate": 2.0773426855327034e-06, "loss": 0.1708, "step": 8847 }, { "epoch": 0.71, "grad_norm": 1.6506223626781895, "learning_rate": 2.076291650737784e-06, "loss": 0.251, "step": 8848 }, { "epoch": 0.71, "grad_norm": 1.2074409122689882, "learning_rate": 2.075240812220264e-06, "loss": 0.1644, "step": 8849 }, { "epoch": 0.71, "grad_norm": 1.258498636978523, "learning_rate": 2.07419017005069e-06, "loss": 0.1767, "step": 8850 }, { "epoch": 0.71, "grad_norm": 1.389742377300548, "learning_rate": 2.0731397242995932e-06, "loss": 0.172, "step": 8851 }, { "epoch": 0.71, "grad_norm": 1.243204367665582, "learning_rate": 2.0720894750374982e-06, "loss": 0.1629, "step": 8852 }, { "epoch": 0.71, "grad_norm": 1.2684072703099805, "learning_rate": 2.071039422334904e-06, "loss": 0.1891, "step": 8853 }, { "epoch": 0.71, "grad_norm": 1.208393405917905, "learning_rate": 2.0699895662623097e-06, "loss": 0.1739, "step": 8854 }, { "epoch": 0.71, "grad_norm": 1.401908456132836, "learning_rate": 2.068939906890194e-06, "loss": 0.1795, "step": 8855 }, { "epoch": 0.71, "grad_norm": 6.712359394773343, "learning_rate": 2.0678904442890222e-06, "loss": 0.6128, "step": 8856 }, { "epoch": 0.71, "grad_norm": 1.4498698601565287, "learning_rate": 2.06684117852925e-06, "loss": 0.171, "step": 8857 }, { "epoch": 0.71, "grad_norm": 1.4905052209777583, "learning_rate": 2.0657921096813154e-06, "loss": 0.205, "step": 8858 }, { "epoch": 0.71, "grad_norm": 9.37779395509775, "learning_rate": 2.0647432378156473e-06, "loss": 0.7501, "step": 8859 }, { "epoch": 0.71, "grad_norm": 5.777066165302513, "learning_rate": 2.0636945630026594e-06, "loss": 0.5364, "step": 8860 }, { "epoch": 0.71, "grad_norm": 1.3493024116593981, "learning_rate": 2.062646085312752e-06, "loss": 0.1744, "step": 8861 }, { "epoch": 0.71, "grad_norm": 1.4172313940747723, "learning_rate": 2.0615978048163106e-06, "loss": 0.1897, "step": 8862 }, { "epoch": 0.71, "grad_norm": 1.258228085163617, "learning_rate": 2.0605497215837157e-06, "loss": 0.1969, "step": 8863 }, { "epoch": 0.71, "grad_norm": 1.414241917380858, "learning_rate": 2.05950183568532e-06, "loss": 0.1947, "step": 8864 }, { "epoch": 0.71, "grad_norm": 1.3681719346891756, "learning_rate": 2.058454147191478e-06, "loss": 0.1462, "step": 8865 }, { "epoch": 0.71, "grad_norm": 1.4210413124718841, "learning_rate": 2.057406656172522e-06, "loss": 0.2069, "step": 8866 }, { "epoch": 0.71, "grad_norm": 9.129885131379842, "learning_rate": 2.0563593626987705e-06, "loss": 0.7347, "step": 8867 }, { "epoch": 0.71, "grad_norm": 1.3353185414284146, "learning_rate": 2.0553122668405352e-06, "loss": 0.188, "step": 8868 }, { "epoch": 0.71, "grad_norm": 8.478511375119496, "learning_rate": 2.0542653686681096e-06, "loss": 0.6067, "step": 8869 }, { "epoch": 0.71, "grad_norm": 1.2807392110065132, "learning_rate": 2.053218668251775e-06, "loss": 0.1913, "step": 8870 }, { "epoch": 0.71, "grad_norm": 7.820035919517078, "learning_rate": 2.0521721656617976e-06, "loss": 0.624, "step": 8871 }, { "epoch": 0.71, "grad_norm": 1.367497406936669, "learning_rate": 2.051125860968438e-06, "loss": 0.1809, "step": 8872 }, { "epoch": 0.71, "grad_norm": 6.849808753412626, "learning_rate": 2.050079754241931e-06, "loss": 0.58, "step": 8873 }, { "epoch": 0.71, "grad_norm": 1.4757352890299462, "learning_rate": 2.0490338455525095e-06, "loss": 0.2111, "step": 8874 }, { "epoch": 0.71, "grad_norm": 1.3113034833813948, "learning_rate": 2.0479881349703885e-06, "loss": 0.1857, "step": 8875 }, { "epoch": 0.71, "grad_norm": 1.4037693468083008, "learning_rate": 2.0469426225657647e-06, "loss": 0.1662, "step": 8876 }, { "epoch": 0.71, "grad_norm": 1.3466721617272521, "learning_rate": 2.0458973084088316e-06, "loss": 0.1955, "step": 8877 }, { "epoch": 0.71, "grad_norm": 1.5872954258490564, "learning_rate": 2.0448521925697634e-06, "loss": 0.1887, "step": 8878 }, { "epoch": 0.71, "grad_norm": 6.72511532302446, "learning_rate": 2.043807275118721e-06, "loss": 0.6621, "step": 8879 }, { "epoch": 0.71, "grad_norm": 15.598762438867025, "learning_rate": 2.042762556125853e-06, "loss": 0.6081, "step": 8880 }, { "epoch": 0.71, "grad_norm": 8.480833664347717, "learning_rate": 2.041718035661295e-06, "loss": 0.4832, "step": 8881 }, { "epoch": 0.71, "grad_norm": 1.3889063549569087, "learning_rate": 2.0406737137951666e-06, "loss": 0.1722, "step": 8882 }, { "epoch": 0.71, "grad_norm": 1.4433172288199752, "learning_rate": 2.0396295905975816e-06, "loss": 0.1836, "step": 8883 }, { "epoch": 0.71, "grad_norm": 1.432354872475748, "learning_rate": 2.03858566613863e-06, "loss": 0.1659, "step": 8884 }, { "epoch": 0.71, "grad_norm": 7.223139700753808, "learning_rate": 2.0375419404883938e-06, "loss": 0.6951, "step": 8885 }, { "epoch": 0.71, "grad_norm": 1.4733168650617896, "learning_rate": 2.0364984137169464e-06, "loss": 0.2128, "step": 8886 }, { "epoch": 0.71, "grad_norm": 1.2806633412295598, "learning_rate": 2.035455085894336e-06, "loss": 0.149, "step": 8887 }, { "epoch": 0.71, "grad_norm": 1.3235700119958946, "learning_rate": 2.0344119570906097e-06, "loss": 0.1682, "step": 8888 }, { "epoch": 0.71, "grad_norm": 1.2938884332148652, "learning_rate": 2.033369027375794e-06, "loss": 0.1999, "step": 8889 }, { "epoch": 0.71, "grad_norm": 1.3949791198469503, "learning_rate": 2.0323262968199043e-06, "loss": 0.1474, "step": 8890 }, { "epoch": 0.71, "grad_norm": 1.459419889213352, "learning_rate": 2.0312837654929412e-06, "loss": 0.1595, "step": 8891 }, { "epoch": 0.71, "grad_norm": 1.451162222231715, "learning_rate": 2.0302414334648944e-06, "loss": 0.2459, "step": 8892 }, { "epoch": 0.71, "grad_norm": 1.1609369611756744, "learning_rate": 2.029199300805739e-06, "loss": 0.1336, "step": 8893 }, { "epoch": 0.71, "grad_norm": 1.1017142913133107, "learning_rate": 2.0281573675854344e-06, "loss": 0.1037, "step": 8894 }, { "epoch": 0.71, "grad_norm": 73.31681932343245, "learning_rate": 2.027115633873931e-06, "loss": 0.5081, "step": 8895 }, { "epoch": 0.71, "grad_norm": 1.4475739857474033, "learning_rate": 2.02607409974116e-06, "loss": 0.1844, "step": 8896 }, { "epoch": 0.71, "grad_norm": 1.374014870378563, "learning_rate": 2.025032765257049e-06, "loss": 0.1574, "step": 8897 }, { "epoch": 0.71, "grad_norm": 1.5335452122548567, "learning_rate": 2.0239916304914984e-06, "loss": 0.2, "step": 8898 }, { "epoch": 0.71, "grad_norm": 1.3057293089429303, "learning_rate": 2.022950695514409e-06, "loss": 0.1797, "step": 8899 }, { "epoch": 0.71, "grad_norm": 1.2563082709326832, "learning_rate": 2.021909960395661e-06, "loss": 0.161, "step": 8900 }, { "epoch": 0.71, "grad_norm": 1.4537795638776398, "learning_rate": 2.0208694252051174e-06, "loss": 0.2429, "step": 8901 }, { "epoch": 0.71, "grad_norm": 1.3175652381616214, "learning_rate": 2.0198290900126373e-06, "loss": 0.1537, "step": 8902 }, { "epoch": 0.71, "grad_norm": 1.4615986199852011, "learning_rate": 2.0187889548880594e-06, "loss": 0.1703, "step": 8903 }, { "epoch": 0.71, "grad_norm": 22.194715048393576, "learning_rate": 2.017749019901213e-06, "loss": 0.4577, "step": 8904 }, { "epoch": 0.71, "grad_norm": 1.174107113559744, "learning_rate": 2.0167092851219094e-06, "loss": 0.1644, "step": 8905 }, { "epoch": 0.71, "grad_norm": 9.578276450839011, "learning_rate": 2.0156697506199512e-06, "loss": 0.545, "step": 8906 }, { "epoch": 0.71, "grad_norm": 1.1836351038222372, "learning_rate": 2.0146304164651227e-06, "loss": 0.174, "step": 8907 }, { "epoch": 0.71, "grad_norm": 1.3432759545366408, "learning_rate": 2.0135912827272036e-06, "loss": 0.1759, "step": 8908 }, { "epoch": 0.71, "grad_norm": 8.664527552885824, "learning_rate": 2.012552349475948e-06, "loss": 0.4168, "step": 8909 }, { "epoch": 0.71, "grad_norm": 1.292971414037827, "learning_rate": 2.0115136167811033e-06, "loss": 0.1603, "step": 8910 }, { "epoch": 0.71, "grad_norm": 1.2405040693404323, "learning_rate": 2.0104750847124075e-06, "loss": 0.1556, "step": 8911 }, { "epoch": 0.71, "grad_norm": 1.5065017470853874, "learning_rate": 2.009436753339574e-06, "loss": 0.1861, "step": 8912 }, { "epoch": 0.71, "grad_norm": 1.3557858429895708, "learning_rate": 2.0083986227323138e-06, "loss": 0.1824, "step": 8913 }, { "epoch": 0.71, "grad_norm": 1.3434318012983424, "learning_rate": 2.007360692960318e-06, "loss": 0.1647, "step": 8914 }, { "epoch": 0.71, "grad_norm": 1.5414041357145596, "learning_rate": 2.0063229640932664e-06, "loss": 0.2005, "step": 8915 }, { "epoch": 0.71, "grad_norm": 1.4355962043303128, "learning_rate": 2.0052854362008232e-06, "loss": 0.1981, "step": 8916 }, { "epoch": 0.71, "grad_norm": 1.5044123223932846, "learning_rate": 2.0042481093526454e-06, "loss": 0.1865, "step": 8917 }, { "epoch": 0.71, "grad_norm": 1.3832212558379213, "learning_rate": 2.003210983618368e-06, "loss": 0.1786, "step": 8918 }, { "epoch": 0.71, "grad_norm": 1.3732643237452844, "learning_rate": 2.002174059067614e-06, "loss": 0.1458, "step": 8919 }, { "epoch": 0.71, "grad_norm": 7.751721697102618, "learning_rate": 2.001137335770003e-06, "loss": 0.4929, "step": 8920 }, { "epoch": 0.71, "grad_norm": 1.3124551098178798, "learning_rate": 2.000100813795125e-06, "loss": 0.1718, "step": 8921 }, { "epoch": 0.71, "grad_norm": 1.455805796739455, "learning_rate": 1.9990644932125704e-06, "loss": 0.1802, "step": 8922 }, { "epoch": 0.71, "grad_norm": 1.3375243772516927, "learning_rate": 1.9980283740919086e-06, "loss": 0.2127, "step": 8923 }, { "epoch": 0.71, "grad_norm": 1.2237679361904445, "learning_rate": 1.9969924565026982e-06, "loss": 0.1442, "step": 8924 }, { "epoch": 0.71, "grad_norm": 1.4206898912657797, "learning_rate": 1.9959567405144825e-06, "loss": 0.1586, "step": 8925 }, { "epoch": 0.71, "grad_norm": 1.387873481284477, "learning_rate": 1.994921226196793e-06, "loss": 0.2045, "step": 8926 }, { "epoch": 0.71, "grad_norm": 1.503667368614702, "learning_rate": 1.993885913619147e-06, "loss": 0.204, "step": 8927 }, { "epoch": 0.71, "grad_norm": 1.398104153294139, "learning_rate": 1.9928508028510467e-06, "loss": 0.1778, "step": 8928 }, { "epoch": 0.71, "grad_norm": 1.280793073788728, "learning_rate": 1.9918158939619843e-06, "loss": 0.1581, "step": 8929 }, { "epoch": 0.71, "grad_norm": 1.3960895174296555, "learning_rate": 1.9907811870214334e-06, "loss": 0.1912, "step": 8930 }, { "epoch": 0.71, "grad_norm": 1.3269657227466958, "learning_rate": 1.9897466820988622e-06, "loss": 0.1712, "step": 8931 }, { "epoch": 0.71, "grad_norm": 5.076675910648911, "learning_rate": 1.988712379263713e-06, "loss": 0.5833, "step": 8932 }, { "epoch": 0.71, "grad_norm": 1.4895155877351163, "learning_rate": 1.9876782785854283e-06, "loss": 0.1869, "step": 8933 }, { "epoch": 0.71, "grad_norm": 1.3819105899604691, "learning_rate": 1.9866443801334274e-06, "loss": 0.1687, "step": 8934 }, { "epoch": 0.71, "grad_norm": 1.1871228628252093, "learning_rate": 1.9856106839771194e-06, "loss": 0.1372, "step": 8935 }, { "epoch": 0.71, "grad_norm": 1.3653868366131665, "learning_rate": 1.984577190185899e-06, "loss": 0.1708, "step": 8936 }, { "epoch": 0.71, "grad_norm": 1.1849947991323169, "learning_rate": 1.983543898829149e-06, "loss": 0.1634, "step": 8937 }, { "epoch": 0.72, "grad_norm": 1.1534516532065513, "learning_rate": 1.9825108099762358e-06, "loss": 0.1406, "step": 8938 }, { "epoch": 0.72, "grad_norm": 1.2652940800700563, "learning_rate": 1.981477923696515e-06, "loss": 0.1485, "step": 8939 }, { "epoch": 0.72, "grad_norm": 1.3976634087237663, "learning_rate": 1.9804452400593265e-06, "loss": 0.1801, "step": 8940 }, { "epoch": 0.72, "grad_norm": 1.490153783334788, "learning_rate": 1.9794127591339967e-06, "loss": 0.1817, "step": 8941 }, { "epoch": 0.72, "grad_norm": 1.3693676471876894, "learning_rate": 1.978380480989844e-06, "loss": 0.1564, "step": 8942 }, { "epoch": 0.72, "grad_norm": 1.5448776724041051, "learning_rate": 1.9773484056961605e-06, "loss": 0.2015, "step": 8943 }, { "epoch": 0.72, "grad_norm": 1.3041591000333528, "learning_rate": 1.976316533322239e-06, "loss": 0.1871, "step": 8944 }, { "epoch": 0.72, "grad_norm": 1.266478614858927, "learning_rate": 1.975284863937352e-06, "loss": 0.1654, "step": 8945 }, { "epoch": 0.72, "grad_norm": 1.4347066422425818, "learning_rate": 1.9742533976107524e-06, "loss": 0.1781, "step": 8946 }, { "epoch": 0.72, "grad_norm": 1.2832198876254115, "learning_rate": 1.973222134411692e-06, "loss": 0.1938, "step": 8947 }, { "epoch": 0.72, "grad_norm": 1.4391846452093422, "learning_rate": 1.9721910744094006e-06, "loss": 0.1963, "step": 8948 }, { "epoch": 0.72, "grad_norm": 1.2209384930910225, "learning_rate": 1.9711602176730967e-06, "loss": 0.1871, "step": 8949 }, { "epoch": 0.72, "grad_norm": 1.3893943483714364, "learning_rate": 1.9701295642719836e-06, "loss": 0.1568, "step": 8950 }, { "epoch": 0.72, "grad_norm": 1.4592579135093358, "learning_rate": 1.969099114275253e-06, "loss": 0.216, "step": 8951 }, { "epoch": 0.72, "grad_norm": 1.20634802262108, "learning_rate": 1.968068867752081e-06, "loss": 0.1709, "step": 8952 }, { "epoch": 0.72, "grad_norm": 1.2887015641935737, "learning_rate": 1.9670388247716354e-06, "loss": 0.1752, "step": 8953 }, { "epoch": 0.72, "grad_norm": 1.6014548586869528, "learning_rate": 1.9660089854030612e-06, "loss": 0.2275, "step": 8954 }, { "epoch": 0.72, "grad_norm": 1.2307201572158966, "learning_rate": 1.9649793497154953e-06, "loss": 0.1373, "step": 8955 }, { "epoch": 0.72, "grad_norm": 1.608759894031024, "learning_rate": 1.9639499177780642e-06, "loss": 0.2286, "step": 8956 }, { "epoch": 0.72, "grad_norm": 1.5872218847628707, "learning_rate": 1.9629206896598707e-06, "loss": 0.2, "step": 8957 }, { "epoch": 0.72, "grad_norm": 1.4387261174009294, "learning_rate": 1.9618916654300144e-06, "loss": 0.2176, "step": 8958 }, { "epoch": 0.72, "grad_norm": 1.3472256930456061, "learning_rate": 1.960862845157577e-06, "loss": 0.1758, "step": 8959 }, { "epoch": 0.72, "grad_norm": 1.4559821839385172, "learning_rate": 1.959834228911624e-06, "loss": 0.1455, "step": 8960 }, { "epoch": 0.72, "grad_norm": 1.4175853859582273, "learning_rate": 1.9588058167612113e-06, "loss": 0.2027, "step": 8961 }, { "epoch": 0.72, "grad_norm": 1.5182053199409054, "learning_rate": 1.957777608775378e-06, "loss": 0.2195, "step": 8962 }, { "epoch": 0.72, "grad_norm": 1.4210613805210779, "learning_rate": 1.9567496050231516e-06, "loss": 0.1708, "step": 8963 }, { "epoch": 0.72, "grad_norm": 6.992888157424812, "learning_rate": 1.955721805573543e-06, "loss": 0.5966, "step": 8964 }, { "epoch": 0.72, "grad_norm": 1.1769085169195812, "learning_rate": 1.9546942104955567e-06, "loss": 0.1625, "step": 8965 }, { "epoch": 0.72, "grad_norm": 1.2709786771199052, "learning_rate": 1.953666819858172e-06, "loss": 0.1666, "step": 8966 }, { "epoch": 0.72, "grad_norm": 11.871684728700824, "learning_rate": 1.9526396337303654e-06, "loss": 0.472, "step": 8967 }, { "epoch": 0.72, "grad_norm": 1.3024000046297861, "learning_rate": 1.951612652181093e-06, "loss": 0.1625, "step": 8968 }, { "epoch": 0.72, "grad_norm": 1.3745739019797572, "learning_rate": 1.9505858752792995e-06, "loss": 0.163, "step": 8969 }, { "epoch": 0.72, "grad_norm": 6.66430553107358, "learning_rate": 1.949559303093916e-06, "loss": 0.5885, "step": 8970 }, { "epoch": 0.72, "grad_norm": 1.0866830098427749, "learning_rate": 1.9485329356938587e-06, "loss": 0.1443, "step": 8971 }, { "epoch": 0.72, "grad_norm": 1.3714377355503682, "learning_rate": 1.9475067731480308e-06, "loss": 0.1481, "step": 8972 }, { "epoch": 0.72, "grad_norm": 1.47456644670411, "learning_rate": 1.9464808155253225e-06, "loss": 0.1648, "step": 8973 }, { "epoch": 0.72, "grad_norm": 10.17560716864444, "learning_rate": 1.945455062894608e-06, "loss": 0.5775, "step": 8974 }, { "epoch": 0.72, "grad_norm": 1.4322579342718345, "learning_rate": 1.944429515324749e-06, "loss": 0.1656, "step": 8975 }, { "epoch": 0.72, "grad_norm": 1.3149312056892075, "learning_rate": 1.943404172884598e-06, "loss": 0.1842, "step": 8976 }, { "epoch": 0.72, "grad_norm": 1.3631057634103574, "learning_rate": 1.9423790356429815e-06, "loss": 0.1854, "step": 8977 }, { "epoch": 0.72, "grad_norm": 1.2898927589982514, "learning_rate": 1.9413541036687272e-06, "loss": 0.1927, "step": 8978 }, { "epoch": 0.72, "grad_norm": 1.5952432585175471, "learning_rate": 1.9403293770306414e-06, "loss": 0.1702, "step": 8979 }, { "epoch": 0.72, "grad_norm": 6.507514038451187, "learning_rate": 1.939304855797511e-06, "loss": 0.5915, "step": 8980 }, { "epoch": 0.72, "grad_norm": 1.4655712159404735, "learning_rate": 1.9382805400381206e-06, "loss": 0.1734, "step": 8981 }, { "epoch": 0.72, "grad_norm": 1.157475564032177, "learning_rate": 1.9372564298212343e-06, "loss": 0.1617, "step": 8982 }, { "epoch": 0.72, "grad_norm": 8.707654693270186, "learning_rate": 1.936232525215604e-06, "loss": 0.657, "step": 8983 }, { "epoch": 0.72, "grad_norm": 1.2320959475797324, "learning_rate": 1.935208826289967e-06, "loss": 0.1324, "step": 8984 }, { "epoch": 0.72, "grad_norm": 1.4570816928890984, "learning_rate": 1.9341853331130472e-06, "loss": 0.1779, "step": 8985 }, { "epoch": 0.72, "grad_norm": 1.8486861670890355, "learning_rate": 1.933162045753554e-06, "loss": 0.1765, "step": 8986 }, { "epoch": 0.72, "grad_norm": 1.2213251735617248, "learning_rate": 1.9321389642801875e-06, "loss": 0.1598, "step": 8987 }, { "epoch": 0.72, "grad_norm": 9.928762655034381, "learning_rate": 1.931116088761626e-06, "loss": 0.5843, "step": 8988 }, { "epoch": 0.72, "grad_norm": 1.6031241876257238, "learning_rate": 1.9300934192665383e-06, "loss": 0.2282, "step": 8989 }, { "epoch": 0.72, "grad_norm": 1.3370554296302009, "learning_rate": 1.929070955863584e-06, "loss": 0.1954, "step": 8990 }, { "epoch": 0.72, "grad_norm": 1.2593402690332172, "learning_rate": 1.9280486986213976e-06, "loss": 0.1537, "step": 8991 }, { "epoch": 0.72, "grad_norm": 1.2433238086882437, "learning_rate": 1.9270266476086115e-06, "loss": 0.1173, "step": 8992 }, { "epoch": 0.72, "grad_norm": 1.3582152451748397, "learning_rate": 1.926004802893837e-06, "loss": 0.1753, "step": 8993 }, { "epoch": 0.72, "grad_norm": 7.13069566882756, "learning_rate": 1.9249831645456744e-06, "loss": 0.4784, "step": 8994 }, { "epoch": 0.72, "grad_norm": 6.444084739450104, "learning_rate": 1.923961732632709e-06, "loss": 0.4935, "step": 8995 }, { "epoch": 0.72, "grad_norm": 1.454621925803563, "learning_rate": 1.922940507223512e-06, "loss": 0.1627, "step": 8996 }, { "epoch": 0.72, "grad_norm": 1.3768881871308623, "learning_rate": 1.9219194883866423e-06, "loss": 0.1553, "step": 8997 }, { "epoch": 0.72, "grad_norm": 1.416366284882426, "learning_rate": 1.9208986761906434e-06, "loss": 0.1783, "step": 8998 }, { "epoch": 0.72, "grad_norm": 1.3985872053516994, "learning_rate": 1.919878070704045e-06, "loss": 0.2194, "step": 8999 }, { "epoch": 0.72, "grad_norm": 1.47871055597183, "learning_rate": 1.9188576719953635e-06, "loss": 0.236, "step": 9000 }, { "epoch": 0.72, "grad_norm": 1.4647047385880896, "learning_rate": 1.917837480133103e-06, "loss": 0.1785, "step": 9001 }, { "epoch": 0.72, "grad_norm": 1.6027860735149817, "learning_rate": 1.9168174951857514e-06, "loss": 0.1887, "step": 9002 }, { "epoch": 0.72, "grad_norm": 1.3959321590021587, "learning_rate": 1.915797717221783e-06, "loss": 0.2174, "step": 9003 }, { "epoch": 0.72, "grad_norm": 8.787307661870827, "learning_rate": 1.9147781463096583e-06, "loss": 0.5173, "step": 9004 }, { "epoch": 0.72, "grad_norm": 8.219231121041604, "learning_rate": 1.9137587825178243e-06, "loss": 0.4628, "step": 9005 }, { "epoch": 0.72, "grad_norm": 6.58597382904973, "learning_rate": 1.9127396259147147e-06, "loss": 0.5857, "step": 9006 }, { "epoch": 0.72, "grad_norm": 1.53125390728812, "learning_rate": 1.9117206765687478e-06, "loss": 0.1922, "step": 9007 }, { "epoch": 0.72, "grad_norm": 1.2925076690762765, "learning_rate": 1.910701934548329e-06, "loss": 0.1527, "step": 9008 }, { "epoch": 0.72, "grad_norm": 6.986660858718342, "learning_rate": 1.9096833999218485e-06, "loss": 0.5722, "step": 9009 }, { "epoch": 0.72, "grad_norm": 1.5049045595525858, "learning_rate": 1.908665072757687e-06, "loss": 0.1701, "step": 9010 }, { "epoch": 0.72, "grad_norm": 1.3013312296424395, "learning_rate": 1.9076469531242027e-06, "loss": 0.1739, "step": 9011 }, { "epoch": 0.72, "grad_norm": 7.3234320645364, "learning_rate": 1.9066290410897492e-06, "loss": 0.57, "step": 9012 }, { "epoch": 0.72, "grad_norm": 1.4188351085388629, "learning_rate": 1.9056113367226615e-06, "loss": 0.1867, "step": 9013 }, { "epoch": 0.72, "grad_norm": 1.5266610457055567, "learning_rate": 1.9045938400912594e-06, "loss": 0.1906, "step": 9014 }, { "epoch": 0.72, "grad_norm": 1.5998476745666153, "learning_rate": 1.903576551263852e-06, "loss": 0.2458, "step": 9015 }, { "epoch": 0.72, "grad_norm": 1.1725142056660862, "learning_rate": 1.9025594703087324e-06, "loss": 0.1578, "step": 9016 }, { "epoch": 0.72, "grad_norm": 1.346640033083446, "learning_rate": 1.9015425972941809e-06, "loss": 0.188, "step": 9017 }, { "epoch": 0.72, "grad_norm": 1.5046373584086468, "learning_rate": 1.9005259322884623e-06, "loss": 0.184, "step": 9018 }, { "epoch": 0.72, "grad_norm": 1.4630580127232968, "learning_rate": 1.8995094753598293e-06, "loss": 0.2007, "step": 9019 }, { "epoch": 0.72, "grad_norm": 1.2287556456925455, "learning_rate": 1.8984932265765172e-06, "loss": 0.1206, "step": 9020 }, { "epoch": 0.72, "grad_norm": 1.1752296313228991, "learning_rate": 1.8974771860067554e-06, "loss": 0.1728, "step": 9021 }, { "epoch": 0.72, "grad_norm": 1.2871000491879698, "learning_rate": 1.8964613537187471e-06, "loss": 0.2006, "step": 9022 }, { "epoch": 0.72, "grad_norm": 6.91631462333189, "learning_rate": 1.8954457297806934e-06, "loss": 0.5449, "step": 9023 }, { "epoch": 0.72, "grad_norm": 1.3739381624627762, "learning_rate": 1.8944303142607757e-06, "loss": 0.1614, "step": 9024 }, { "epoch": 0.72, "grad_norm": 1.3703448006553145, "learning_rate": 1.8934151072271573e-06, "loss": 0.1778, "step": 9025 }, { "epoch": 0.72, "grad_norm": 1.4427572437740512, "learning_rate": 1.8924001087479964e-06, "loss": 0.1888, "step": 9026 }, { "epoch": 0.72, "grad_norm": 1.4269691327156102, "learning_rate": 1.8913853188914328e-06, "loss": 0.2005, "step": 9027 }, { "epoch": 0.72, "grad_norm": 1.3619245569886933, "learning_rate": 1.8903707377255908e-06, "loss": 0.1646, "step": 9028 }, { "epoch": 0.72, "grad_norm": 1.3632806771743786, "learning_rate": 1.8893563653185831e-06, "loss": 0.164, "step": 9029 }, { "epoch": 0.72, "grad_norm": 5.891872794522661, "learning_rate": 1.8883422017385078e-06, "loss": 0.6036, "step": 9030 }, { "epoch": 0.72, "grad_norm": 1.2510058092993064, "learning_rate": 1.8873282470534482e-06, "loss": 0.1655, "step": 9031 }, { "epoch": 0.72, "grad_norm": 8.343872727473007, "learning_rate": 1.8863145013314743e-06, "loss": 0.537, "step": 9032 }, { "epoch": 0.72, "grad_norm": 1.3497207121481087, "learning_rate": 1.8853009646406422e-06, "loss": 0.1832, "step": 9033 }, { "epoch": 0.72, "grad_norm": 7.106506353877256, "learning_rate": 1.8842876370489916e-06, "loss": 0.61, "step": 9034 }, { "epoch": 0.72, "grad_norm": 1.5031398259556004, "learning_rate": 1.883274518624556e-06, "loss": 0.1875, "step": 9035 }, { "epoch": 0.72, "grad_norm": 1.527589239881181, "learning_rate": 1.8822616094353414e-06, "loss": 0.2137, "step": 9036 }, { "epoch": 0.72, "grad_norm": 1.2357193565509907, "learning_rate": 1.8812489095493531e-06, "loss": 0.1461, "step": 9037 }, { "epoch": 0.72, "grad_norm": 1.460796920353672, "learning_rate": 1.8802364190345751e-06, "loss": 0.1995, "step": 9038 }, { "epoch": 0.72, "grad_norm": 1.377444800361589, "learning_rate": 1.8792241379589787e-06, "loss": 0.1724, "step": 9039 }, { "epoch": 0.72, "grad_norm": 1.3899413562607357, "learning_rate": 1.8782120663905218e-06, "loss": 0.1825, "step": 9040 }, { "epoch": 0.72, "grad_norm": 1.5718998315126693, "learning_rate": 1.8772002043971472e-06, "loss": 0.2286, "step": 9041 }, { "epoch": 0.72, "grad_norm": 1.3161028412589368, "learning_rate": 1.8761885520467843e-06, "loss": 0.1617, "step": 9042 }, { "epoch": 0.72, "grad_norm": 1.4837234358946882, "learning_rate": 1.8751771094073474e-06, "loss": 0.1907, "step": 9043 }, { "epoch": 0.72, "grad_norm": 1.434352862307001, "learning_rate": 1.8741658765467425e-06, "loss": 0.1706, "step": 9044 }, { "epoch": 0.72, "grad_norm": 1.2339523146681706, "learning_rate": 1.8731548535328497e-06, "loss": 0.1349, "step": 9045 }, { "epoch": 0.72, "grad_norm": 1.2312461382027569, "learning_rate": 1.8721440404335472e-06, "loss": 0.195, "step": 9046 }, { "epoch": 0.72, "grad_norm": 1.4088767929434909, "learning_rate": 1.8711334373166923e-06, "loss": 0.1845, "step": 9047 }, { "epoch": 0.72, "grad_norm": 7.852212003456255, "learning_rate": 1.8701230442501295e-06, "loss": 0.4429, "step": 9048 }, { "epoch": 0.72, "grad_norm": 1.3596457027927031, "learning_rate": 1.86911286130169e-06, "loss": 0.1894, "step": 9049 }, { "epoch": 0.72, "grad_norm": 1.267271473852765, "learning_rate": 1.8681028885391905e-06, "loss": 0.1597, "step": 9050 }, { "epoch": 0.72, "grad_norm": 9.86910531694426, "learning_rate": 1.8670931260304336e-06, "loss": 0.5523, "step": 9051 }, { "epoch": 0.72, "grad_norm": 1.2256981376416658, "learning_rate": 1.8660835738432071e-06, "loss": 0.1831, "step": 9052 }, { "epoch": 0.72, "grad_norm": 1.346391409208626, "learning_rate": 1.8650742320452858e-06, "loss": 0.1651, "step": 9053 }, { "epoch": 0.72, "grad_norm": 1.163021428506186, "learning_rate": 1.864065100704428e-06, "loss": 0.161, "step": 9054 }, { "epoch": 0.72, "grad_norm": 1.319772788648218, "learning_rate": 1.863056179888385e-06, "loss": 0.1565, "step": 9055 }, { "epoch": 0.72, "grad_norm": 1.3685326345418296, "learning_rate": 1.8620474696648822e-06, "loss": 0.1907, "step": 9056 }, { "epoch": 0.72, "grad_norm": 1.171267534366849, "learning_rate": 1.8610389701016412e-06, "loss": 0.1356, "step": 9057 }, { "epoch": 0.72, "grad_norm": 1.4527260649860825, "learning_rate": 1.860030681266367e-06, "loss": 0.1935, "step": 9058 }, { "epoch": 0.72, "grad_norm": 1.562844654247753, "learning_rate": 1.8590226032267438e-06, "loss": 0.2084, "step": 9059 }, { "epoch": 0.72, "grad_norm": 1.243419185090541, "learning_rate": 1.8580147360504513e-06, "loss": 0.182, "step": 9060 }, { "epoch": 0.72, "grad_norm": 1.178463100952765, "learning_rate": 1.8570070798051492e-06, "loss": 0.1597, "step": 9061 }, { "epoch": 0.72, "grad_norm": 1.401016803160156, "learning_rate": 1.8559996345584853e-06, "loss": 0.2113, "step": 9062 }, { "epoch": 0.73, "grad_norm": 1.2701134727499686, "learning_rate": 1.8549924003780918e-06, "loss": 0.1782, "step": 9063 }, { "epoch": 0.73, "grad_norm": 1.8300056128021036, "learning_rate": 1.8539853773315869e-06, "loss": 0.1309, "step": 9064 }, { "epoch": 0.73, "grad_norm": 1.4755951844743371, "learning_rate": 1.8529785654865744e-06, "loss": 0.2041, "step": 9065 }, { "epoch": 0.73, "grad_norm": 1.2414245381253806, "learning_rate": 1.8519719649106493e-06, "loss": 0.1611, "step": 9066 }, { "epoch": 0.73, "grad_norm": 1.157672311174175, "learning_rate": 1.8509655756713823e-06, "loss": 0.1657, "step": 9067 }, { "epoch": 0.73, "grad_norm": 1.2777072949644641, "learning_rate": 1.8499593978363363e-06, "loss": 0.1424, "step": 9068 }, { "epoch": 0.73, "grad_norm": 1.5135583124810161, "learning_rate": 1.8489534314730634e-06, "loss": 0.2181, "step": 9069 }, { "epoch": 0.73, "grad_norm": 1.46751220400743, "learning_rate": 1.84794767664909e-06, "loss": 0.2033, "step": 9070 }, { "epoch": 0.73, "grad_norm": 9.68212170102838, "learning_rate": 1.8469421334319416e-06, "loss": 0.5123, "step": 9071 }, { "epoch": 0.73, "grad_norm": 1.3363714638406035, "learning_rate": 1.8459368018891211e-06, "loss": 0.1731, "step": 9072 }, { "epoch": 0.73, "grad_norm": 1.2602180715452724, "learning_rate": 1.8449316820881192e-06, "loss": 0.1834, "step": 9073 }, { "epoch": 0.73, "grad_norm": 1.2009255504542025, "learning_rate": 1.8439267740964135e-06, "loss": 0.1493, "step": 9074 }, { "epoch": 0.73, "grad_norm": 1.316314397381077, "learning_rate": 1.8429220779814654e-06, "loss": 0.1512, "step": 9075 }, { "epoch": 0.73, "grad_norm": 1.356786016465121, "learning_rate": 1.8419175938107241e-06, "loss": 0.204, "step": 9076 }, { "epoch": 0.73, "grad_norm": 1.3761389580037218, "learning_rate": 1.8409133216516235e-06, "loss": 0.1431, "step": 9077 }, { "epoch": 0.73, "grad_norm": 5.349315800941496, "learning_rate": 1.8399092615715831e-06, "loss": 0.3529, "step": 9078 }, { "epoch": 0.73, "grad_norm": 11.839520506734722, "learning_rate": 1.838905413638007e-06, "loss": 0.627, "step": 9079 }, { "epoch": 0.73, "grad_norm": 1.492488805802242, "learning_rate": 1.837901777918291e-06, "loss": 0.2119, "step": 9080 }, { "epoch": 0.73, "grad_norm": 1.6033427980714965, "learning_rate": 1.836898354479807e-06, "loss": 0.2181, "step": 9081 }, { "epoch": 0.73, "grad_norm": 1.2334260100206724, "learning_rate": 1.8358951433899214e-06, "loss": 0.1452, "step": 9082 }, { "epoch": 0.73, "grad_norm": 1.2876735053287762, "learning_rate": 1.8348921447159818e-06, "loss": 0.1637, "step": 9083 }, { "epoch": 0.73, "grad_norm": 1.44960032521332, "learning_rate": 1.8338893585253225e-06, "loss": 0.1969, "step": 9084 }, { "epoch": 0.73, "grad_norm": 1.217997157541277, "learning_rate": 1.8328867848852633e-06, "loss": 0.1794, "step": 9085 }, { "epoch": 0.73, "grad_norm": 1.3901485362419175, "learning_rate": 1.8318844238631106e-06, "loss": 0.1835, "step": 9086 }, { "epoch": 0.73, "grad_norm": 1.4451519436859581, "learning_rate": 1.8308822755261551e-06, "loss": 0.1957, "step": 9087 }, { "epoch": 0.73, "grad_norm": 1.2626627471763259, "learning_rate": 1.8298803399416732e-06, "loss": 0.1849, "step": 9088 }, { "epoch": 0.73, "grad_norm": 5.59624752127837, "learning_rate": 1.8288786171769323e-06, "loss": 0.6307, "step": 9089 }, { "epoch": 0.73, "grad_norm": 1.2955364080876017, "learning_rate": 1.8278771072991748e-06, "loss": 0.1623, "step": 9090 }, { "epoch": 0.73, "grad_norm": 1.381177841983565, "learning_rate": 1.8268758103756396e-06, "loss": 0.1815, "step": 9091 }, { "epoch": 0.73, "grad_norm": 1.4769305224715712, "learning_rate": 1.825874726473547e-06, "loss": 0.1741, "step": 9092 }, { "epoch": 0.73, "grad_norm": 1.2338641845515417, "learning_rate": 1.824873855660098e-06, "loss": 0.156, "step": 9093 }, { "epoch": 0.73, "grad_norm": 1.4439577592167092, "learning_rate": 1.8238731980024892e-06, "loss": 0.2064, "step": 9094 }, { "epoch": 0.73, "grad_norm": 1.4243003326926151, "learning_rate": 1.8228727535678959e-06, "loss": 0.1782, "step": 9095 }, { "epoch": 0.73, "grad_norm": 1.391349945278128, "learning_rate": 1.8218725224234806e-06, "loss": 0.1903, "step": 9096 }, { "epoch": 0.73, "grad_norm": 1.377665989122746, "learning_rate": 1.8208725046363924e-06, "loss": 0.1813, "step": 9097 }, { "epoch": 0.73, "grad_norm": 1.4098146012303618, "learning_rate": 1.8198727002737653e-06, "loss": 0.1904, "step": 9098 }, { "epoch": 0.73, "grad_norm": 7.773847920528601, "learning_rate": 1.8188731094027178e-06, "loss": 0.6139, "step": 9099 }, { "epoch": 0.73, "grad_norm": 1.1951072495858535, "learning_rate": 1.81787373209036e-06, "loss": 0.1538, "step": 9100 }, { "epoch": 0.73, "grad_norm": 1.3115695788184494, "learning_rate": 1.8168745684037787e-06, "loss": 0.1656, "step": 9101 }, { "epoch": 0.73, "grad_norm": 1.549892380551993, "learning_rate": 1.81587561841005e-06, "loss": 0.1716, "step": 9102 }, { "epoch": 0.73, "grad_norm": 4.859074154748873, "learning_rate": 1.8148768821762425e-06, "loss": 0.513, "step": 9103 }, { "epoch": 0.73, "grad_norm": 9.07762320570059, "learning_rate": 1.8138783597693965e-06, "loss": 0.397, "step": 9104 }, { "epoch": 0.73, "grad_norm": 1.666020303578491, "learning_rate": 1.8128800512565514e-06, "loss": 0.1835, "step": 9105 }, { "epoch": 0.73, "grad_norm": 5.204458757872615, "learning_rate": 1.8118819567047252e-06, "loss": 0.5086, "step": 9106 }, { "epoch": 0.73, "grad_norm": 1.300713819667264, "learning_rate": 1.8108840761809232e-06, "loss": 0.1784, "step": 9107 }, { "epoch": 0.73, "grad_norm": 1.4458389900729802, "learning_rate": 1.8098864097521358e-06, "loss": 0.145, "step": 9108 }, { "epoch": 0.73, "grad_norm": 1.4692793185726798, "learning_rate": 1.808888957485339e-06, "loss": 0.1571, "step": 9109 }, { "epoch": 0.73, "grad_norm": 7.387262291171904, "learning_rate": 1.8078917194474954e-06, "loss": 0.5367, "step": 9110 }, { "epoch": 0.73, "grad_norm": 1.2841130242550987, "learning_rate": 1.8068946957055521e-06, "loss": 0.1753, "step": 9111 }, { "epoch": 0.73, "grad_norm": 1.3658696655829443, "learning_rate": 1.8058978863264426e-06, "loss": 0.1834, "step": 9112 }, { "epoch": 0.73, "grad_norm": 1.1193693847613664, "learning_rate": 1.8049012913770842e-06, "loss": 0.1257, "step": 9113 }, { "epoch": 0.73, "grad_norm": 1.3748410745207416, "learning_rate": 1.8039049109243861e-06, "loss": 0.1362, "step": 9114 }, { "epoch": 0.73, "grad_norm": 1.3949136419365638, "learning_rate": 1.8029087450352323e-06, "loss": 0.1784, "step": 9115 }, { "epoch": 0.73, "grad_norm": 1.6000969253971802, "learning_rate": 1.8019127937765024e-06, "loss": 0.1968, "step": 9116 }, { "epoch": 0.73, "grad_norm": 1.2878760047334998, "learning_rate": 1.8009170572150563e-06, "loss": 0.1498, "step": 9117 }, { "epoch": 0.73, "grad_norm": 6.354325901892797, "learning_rate": 1.7999215354177412e-06, "loss": 0.6027, "step": 9118 }, { "epoch": 0.73, "grad_norm": 1.3800090408292782, "learning_rate": 1.7989262284513897e-06, "loss": 0.1886, "step": 9119 }, { "epoch": 0.73, "grad_norm": 1.1320182271889063, "learning_rate": 1.797931136382819e-06, "loss": 0.169, "step": 9120 }, { "epoch": 0.73, "grad_norm": 1.1833916862645706, "learning_rate": 1.7969362592788331e-06, "loss": 0.13, "step": 9121 }, { "epoch": 0.73, "grad_norm": 1.2661554648471567, "learning_rate": 1.795941597206221e-06, "loss": 0.1504, "step": 9122 }, { "epoch": 0.73, "grad_norm": 1.4274754951410569, "learning_rate": 1.7949471502317572e-06, "loss": 0.1881, "step": 9123 }, { "epoch": 0.73, "grad_norm": 1.3987655227780484, "learning_rate": 1.7939529184222004e-06, "loss": 0.2016, "step": 9124 }, { "epoch": 0.73, "grad_norm": 1.5070902294649455, "learning_rate": 1.7929589018443016e-06, "loss": 0.2186, "step": 9125 }, { "epoch": 0.73, "grad_norm": 5.673346831008698, "learning_rate": 1.7919651005647852e-06, "loss": 0.546, "step": 9126 }, { "epoch": 0.73, "grad_norm": 1.411344556144927, "learning_rate": 1.7909715146503736e-06, "loss": 0.1995, "step": 9127 }, { "epoch": 0.73, "grad_norm": 1.2296788567845378, "learning_rate": 1.7899781441677688e-06, "loss": 0.1382, "step": 9128 }, { "epoch": 0.73, "grad_norm": 4.723067033845056, "learning_rate": 1.7889849891836537e-06, "loss": 0.4328, "step": 9129 }, { "epoch": 0.73, "grad_norm": 1.2377433870944272, "learning_rate": 1.7879920497647068e-06, "loss": 0.1657, "step": 9130 }, { "epoch": 0.73, "grad_norm": 1.3664850067175895, "learning_rate": 1.786999325977586e-06, "loss": 0.1894, "step": 9131 }, { "epoch": 0.73, "grad_norm": 1.2488048557385354, "learning_rate": 1.786006817888935e-06, "loss": 0.1476, "step": 9132 }, { "epoch": 0.73, "grad_norm": 1.415161664205637, "learning_rate": 1.7850145255653828e-06, "loss": 0.1517, "step": 9133 }, { "epoch": 0.73, "grad_norm": 1.3116741753313503, "learning_rate": 1.7840224490735498e-06, "loss": 0.1601, "step": 9134 }, { "epoch": 0.73, "grad_norm": 1.2280865705221102, "learning_rate": 1.7830305884800302e-06, "loss": 0.1658, "step": 9135 }, { "epoch": 0.73, "grad_norm": 1.3332937180170799, "learning_rate": 1.782038943851417e-06, "loss": 0.1792, "step": 9136 }, { "epoch": 0.73, "grad_norm": 1.1655021043449045, "learning_rate": 1.78104751525428e-06, "loss": 0.1308, "step": 9137 }, { "epoch": 0.73, "grad_norm": 1.4056584339153708, "learning_rate": 1.7800563027551737e-06, "loss": 0.1573, "step": 9138 }, { "epoch": 0.73, "grad_norm": 1.3676531582355078, "learning_rate": 1.7790653064206454e-06, "loss": 0.173, "step": 9139 }, { "epoch": 0.73, "grad_norm": 6.914572675069034, "learning_rate": 1.7780745263172216e-06, "loss": 0.5155, "step": 9140 }, { "epoch": 0.73, "grad_norm": 1.4437519724786982, "learning_rate": 1.7770839625114173e-06, "loss": 0.2257, "step": 9141 }, { "epoch": 0.73, "grad_norm": 1.3175726896556128, "learning_rate": 1.7760936150697316e-06, "loss": 0.1661, "step": 9142 }, { "epoch": 0.73, "grad_norm": 8.8282546548058, "learning_rate": 1.7751034840586495e-06, "loss": 0.4521, "step": 9143 }, { "epoch": 0.73, "grad_norm": 1.2015361374381475, "learning_rate": 1.77411356954464e-06, "loss": 0.1311, "step": 9144 }, { "epoch": 0.73, "grad_norm": 1.3093938978987165, "learning_rate": 1.773123871594164e-06, "loss": 0.1902, "step": 9145 }, { "epoch": 0.73, "grad_norm": 1.236017886291195, "learning_rate": 1.7721343902736577e-06, "loss": 0.1671, "step": 9146 }, { "epoch": 0.73, "grad_norm": 1.4171283963075427, "learning_rate": 1.7711451256495482e-06, "loss": 0.197, "step": 9147 }, { "epoch": 0.73, "grad_norm": 1.2078737330068696, "learning_rate": 1.7701560777882531e-06, "loss": 0.1359, "step": 9148 }, { "epoch": 0.73, "grad_norm": 1.3276099376275687, "learning_rate": 1.7691672467561627e-06, "loss": 0.1679, "step": 9149 }, { "epoch": 0.73, "grad_norm": 1.1288656214939443, "learning_rate": 1.7681786326196665e-06, "loss": 0.1292, "step": 9150 }, { "epoch": 0.73, "grad_norm": 7.461758550108675, "learning_rate": 1.7671902354451298e-06, "loss": 0.5478, "step": 9151 }, { "epoch": 0.73, "grad_norm": 1.3937972455820422, "learning_rate": 1.7662020552989085e-06, "loss": 0.1785, "step": 9152 }, { "epoch": 0.73, "grad_norm": 1.2031226628548526, "learning_rate": 1.7652140922473403e-06, "loss": 0.1758, "step": 9153 }, { "epoch": 0.73, "grad_norm": 1.4401764649614757, "learning_rate": 1.7642263463567517e-06, "loss": 0.2074, "step": 9154 }, { "epoch": 0.73, "grad_norm": 1.4707547211423908, "learning_rate": 1.7632388176934523e-06, "loss": 0.1971, "step": 9155 }, { "epoch": 0.73, "grad_norm": 1.4703205804579635, "learning_rate": 1.7622515063237382e-06, "loss": 0.2004, "step": 9156 }, { "epoch": 0.73, "grad_norm": 1.6052850321146184, "learning_rate": 1.7612644123138906e-06, "loss": 0.1982, "step": 9157 }, { "epoch": 0.73, "grad_norm": 10.012456061399446, "learning_rate": 1.7602775357301738e-06, "loss": 0.5874, "step": 9158 }, { "epoch": 0.73, "grad_norm": 1.3071494090716804, "learning_rate": 1.7592908766388456e-06, "loss": 0.199, "step": 9159 }, { "epoch": 0.73, "grad_norm": 1.3281227183022555, "learning_rate": 1.7583044351061369e-06, "loss": 0.1382, "step": 9160 }, { "epoch": 0.73, "grad_norm": 1.3780077078245516, "learning_rate": 1.7573182111982745e-06, "loss": 0.1526, "step": 9161 }, { "epoch": 0.73, "grad_norm": 1.6010940253210684, "learning_rate": 1.7563322049814674e-06, "loss": 0.2028, "step": 9162 }, { "epoch": 0.73, "grad_norm": 1.3511678098730722, "learning_rate": 1.7553464165219036e-06, "loss": 0.2116, "step": 9163 }, { "epoch": 0.73, "grad_norm": 1.4354990201199087, "learning_rate": 1.754360845885768e-06, "loss": 0.1761, "step": 9164 }, { "epoch": 0.73, "grad_norm": 1.3811452135719158, "learning_rate": 1.7533754931392227e-06, "loss": 0.1645, "step": 9165 }, { "epoch": 0.73, "grad_norm": 1.3561983613114927, "learning_rate": 1.752390358348417e-06, "loss": 0.1735, "step": 9166 }, { "epoch": 0.73, "grad_norm": 1.444992386053985, "learning_rate": 1.7514054415794868e-06, "loss": 0.1877, "step": 9167 }, { "epoch": 0.73, "grad_norm": 1.6217837162963722, "learning_rate": 1.750420742898552e-06, "loss": 0.1834, "step": 9168 }, { "epoch": 0.73, "grad_norm": 11.361423833064933, "learning_rate": 1.7494362623717166e-06, "loss": 0.5676, "step": 9169 }, { "epoch": 0.73, "grad_norm": 1.3386263027220062, "learning_rate": 1.7484520000650757e-06, "loss": 0.1323, "step": 9170 }, { "epoch": 0.73, "grad_norm": 1.2904389289987186, "learning_rate": 1.7474679560447056e-06, "loss": 0.1672, "step": 9171 }, { "epoch": 0.73, "grad_norm": 6.66356773113316, "learning_rate": 1.7464841303766628e-06, "loss": 0.5819, "step": 9172 }, { "epoch": 0.73, "grad_norm": 1.3389462043080522, "learning_rate": 1.745500523127e-06, "loss": 0.1728, "step": 9173 }, { "epoch": 0.73, "grad_norm": 6.249835068796126, "learning_rate": 1.7445171343617473e-06, "loss": 0.5887, "step": 9174 }, { "epoch": 0.73, "grad_norm": 1.4742614594215442, "learning_rate": 1.743533964146924e-06, "loss": 0.192, "step": 9175 }, { "epoch": 0.73, "grad_norm": 1.6508322006594116, "learning_rate": 1.7425510125485318e-06, "loss": 0.1721, "step": 9176 }, { "epoch": 0.73, "grad_norm": 1.2952475706868518, "learning_rate": 1.7415682796325595e-06, "loss": 0.1765, "step": 9177 }, { "epoch": 0.73, "grad_norm": 1.4100633240113714, "learning_rate": 1.7405857654649798e-06, "loss": 0.1742, "step": 9178 }, { "epoch": 0.73, "grad_norm": 1.417327823333738, "learning_rate": 1.739603470111757e-06, "loss": 0.1913, "step": 9179 }, { "epoch": 0.73, "grad_norm": 1.2475244160830607, "learning_rate": 1.7386213936388303e-06, "loss": 0.1828, "step": 9180 }, { "epoch": 0.73, "grad_norm": 8.684409191312877, "learning_rate": 1.737639536112129e-06, "loss": 0.5451, "step": 9181 }, { "epoch": 0.73, "grad_norm": 1.3792925771356725, "learning_rate": 1.7366578975975734e-06, "loss": 0.2137, "step": 9182 }, { "epoch": 0.73, "grad_norm": 1.4230665283851671, "learning_rate": 1.7356764781610574e-06, "loss": 0.1814, "step": 9183 }, { "epoch": 0.73, "grad_norm": 1.4349885751563647, "learning_rate": 1.7346952778684718e-06, "loss": 0.2046, "step": 9184 }, { "epoch": 0.73, "grad_norm": 1.3007414196960718, "learning_rate": 1.7337142967856857e-06, "loss": 0.1754, "step": 9185 }, { "epoch": 0.73, "grad_norm": 1.5080921896753834, "learning_rate": 1.7327335349785552e-06, "loss": 0.1955, "step": 9186 }, { "epoch": 0.73, "grad_norm": 1.23886854470887, "learning_rate": 1.7317529925129217e-06, "loss": 0.1718, "step": 9187 }, { "epoch": 0.74, "grad_norm": 1.1889302492512888, "learning_rate": 1.7307726694546128e-06, "loss": 0.1582, "step": 9188 }, { "epoch": 0.74, "grad_norm": 1.350155097170726, "learning_rate": 1.7297925658694393e-06, "loss": 0.2031, "step": 9189 }, { "epoch": 0.74, "grad_norm": 1.5503632463867265, "learning_rate": 1.7288126818231998e-06, "loss": 0.2062, "step": 9190 }, { "epoch": 0.74, "grad_norm": 8.642558768113016, "learning_rate": 1.727833017381676e-06, "loss": 0.4599, "step": 9191 }, { "epoch": 0.74, "grad_norm": 1.4082854346195635, "learning_rate": 1.726853572610634e-06, "loss": 0.1664, "step": 9192 }, { "epoch": 0.74, "grad_norm": 1.35170824266716, "learning_rate": 1.7258743475758328e-06, "loss": 0.1956, "step": 9193 }, { "epoch": 0.74, "grad_norm": 1.4355182429581022, "learning_rate": 1.724895342343003e-06, "loss": 0.2212, "step": 9194 }, { "epoch": 0.74, "grad_norm": 1.3171630102653067, "learning_rate": 1.7239165569778738e-06, "loss": 0.1831, "step": 9195 }, { "epoch": 0.74, "grad_norm": 1.390088773667043, "learning_rate": 1.7229379915461526e-06, "loss": 0.1783, "step": 9196 }, { "epoch": 0.74, "grad_norm": 1.321707622942941, "learning_rate": 1.721959646113533e-06, "loss": 0.1883, "step": 9197 }, { "epoch": 0.74, "grad_norm": 14.605022186230592, "learning_rate": 1.7209815207456941e-06, "loss": 0.7236, "step": 9198 }, { "epoch": 0.74, "grad_norm": 1.4367012060529436, "learning_rate": 1.720003615508301e-06, "loss": 0.2162, "step": 9199 }, { "epoch": 0.74, "grad_norm": 9.480257189356589, "learning_rate": 1.7190259304670038e-06, "loss": 0.5246, "step": 9200 }, { "epoch": 0.74, "grad_norm": 1.319189640947264, "learning_rate": 1.7180484656874357e-06, "loss": 0.1557, "step": 9201 }, { "epoch": 0.74, "grad_norm": 1.2914883112552646, "learning_rate": 1.7170712212352187e-06, "loss": 0.1975, "step": 9202 }, { "epoch": 0.74, "grad_norm": 1.4158668217907435, "learning_rate": 1.7160941971759558e-06, "loss": 0.1862, "step": 9203 }, { "epoch": 0.74, "grad_norm": 1.3265497105863124, "learning_rate": 1.7151173935752425e-06, "loss": 0.1581, "step": 9204 }, { "epoch": 0.74, "grad_norm": 1.578711914848806, "learning_rate": 1.714140810498648e-06, "loss": 0.1838, "step": 9205 }, { "epoch": 0.74, "grad_norm": 1.20358599873575, "learning_rate": 1.7131644480117381e-06, "loss": 0.14, "step": 9206 }, { "epoch": 0.74, "grad_norm": 1.1304955742676857, "learning_rate": 1.7121883061800598e-06, "loss": 0.1137, "step": 9207 }, { "epoch": 0.74, "grad_norm": 1.313040639447187, "learning_rate": 1.7112123850691386e-06, "loss": 0.1767, "step": 9208 }, { "epoch": 0.74, "grad_norm": 1.411500047521928, "learning_rate": 1.7102366847444963e-06, "loss": 0.155, "step": 9209 }, { "epoch": 0.74, "grad_norm": 1.3040926799210366, "learning_rate": 1.709261205271633e-06, "loss": 0.1775, "step": 9210 }, { "epoch": 0.74, "grad_norm": 1.3588452174313836, "learning_rate": 1.7082859467160351e-06, "loss": 0.1573, "step": 9211 }, { "epoch": 0.74, "grad_norm": 1.3604806158128333, "learning_rate": 1.7073109091431734e-06, "loss": 0.1633, "step": 9212 }, { "epoch": 0.74, "grad_norm": 10.280809109485256, "learning_rate": 1.7063360926185108e-06, "loss": 0.5909, "step": 9213 }, { "epoch": 0.74, "grad_norm": 1.2762231048290287, "learning_rate": 1.7053614972074833e-06, "loss": 0.1428, "step": 9214 }, { "epoch": 0.74, "grad_norm": 1.4394083421690795, "learning_rate": 1.7043871229755198e-06, "loss": 0.1511, "step": 9215 }, { "epoch": 0.74, "grad_norm": 1.4694081397116232, "learning_rate": 1.703412969988037e-06, "loss": 0.1854, "step": 9216 }, { "epoch": 0.74, "grad_norm": 1.2221345946599484, "learning_rate": 1.7024390383104267e-06, "loss": 0.1489, "step": 9217 }, { "epoch": 0.74, "grad_norm": 1.3768626461435447, "learning_rate": 1.7014653280080768e-06, "loss": 0.1993, "step": 9218 }, { "epoch": 0.74, "grad_norm": 1.4508423976464029, "learning_rate": 1.7004918391463531e-06, "loss": 0.1832, "step": 9219 }, { "epoch": 0.74, "grad_norm": 1.586727272575284, "learning_rate": 1.6995185717906092e-06, "loss": 0.1718, "step": 9220 }, { "epoch": 0.74, "grad_norm": 1.322618490027282, "learning_rate": 1.6985455260061845e-06, "loss": 0.1733, "step": 9221 }, { "epoch": 0.74, "grad_norm": 1.147784180702518, "learning_rate": 1.697572701858401e-06, "loss": 0.1611, "step": 9222 }, { "epoch": 0.74, "grad_norm": 7.4411954352452305, "learning_rate": 1.6966000994125682e-06, "loss": 0.5393, "step": 9223 }, { "epoch": 0.74, "grad_norm": 1.243911175939272, "learning_rate": 1.6956277187339798e-06, "loss": 0.1726, "step": 9224 }, { "epoch": 0.74, "grad_norm": 1.5517853781787916, "learning_rate": 1.6946555598879138e-06, "loss": 0.2118, "step": 9225 }, { "epoch": 0.74, "grad_norm": 1.2757837179388372, "learning_rate": 1.6936836229396336e-06, "loss": 0.1813, "step": 9226 }, { "epoch": 0.74, "grad_norm": 1.355245094856216, "learning_rate": 1.692711907954393e-06, "loss": 0.1849, "step": 9227 }, { "epoch": 0.74, "grad_norm": 9.873139660448752, "learning_rate": 1.6917404149974193e-06, "loss": 0.5636, "step": 9228 }, { "epoch": 0.74, "grad_norm": 1.2488597610842453, "learning_rate": 1.690769144133937e-06, "loss": 0.153, "step": 9229 }, { "epoch": 0.74, "grad_norm": 1.4749167905824174, "learning_rate": 1.6897980954291483e-06, "loss": 0.1895, "step": 9230 }, { "epoch": 0.74, "grad_norm": 1.3212231119976772, "learning_rate": 1.6888272689482433e-06, "loss": 0.1511, "step": 9231 }, { "epoch": 0.74, "grad_norm": 6.562784536519942, "learning_rate": 1.687856664756396e-06, "loss": 0.623, "step": 9232 }, { "epoch": 0.74, "grad_norm": 1.409321025546184, "learning_rate": 1.686886282918766e-06, "loss": 0.2025, "step": 9233 }, { "epoch": 0.74, "grad_norm": 1.261353951473355, "learning_rate": 1.6859161235004984e-06, "loss": 0.1403, "step": 9234 }, { "epoch": 0.74, "grad_norm": 1.3172663320919502, "learning_rate": 1.6849461865667226e-06, "loss": 0.1504, "step": 9235 }, { "epoch": 0.74, "grad_norm": 1.473388485476268, "learning_rate": 1.6839764721825535e-06, "loss": 0.2052, "step": 9236 }, { "epoch": 0.74, "grad_norm": 1.416037488662575, "learning_rate": 1.6830069804130894e-06, "loss": 0.1912, "step": 9237 }, { "epoch": 0.74, "grad_norm": 1.1759164300042428, "learning_rate": 1.6820377113234203e-06, "loss": 0.1624, "step": 9238 }, { "epoch": 0.74, "grad_norm": 1.2422302204738789, "learning_rate": 1.681068664978609e-06, "loss": 0.1398, "step": 9239 }, { "epoch": 0.74, "grad_norm": 1.4192031276831634, "learning_rate": 1.6800998414437165e-06, "loss": 0.2122, "step": 9240 }, { "epoch": 0.74, "grad_norm": 1.305504551274611, "learning_rate": 1.6791312407837812e-06, "loss": 0.186, "step": 9241 }, { "epoch": 0.74, "grad_norm": 1.3942829190287833, "learning_rate": 1.6781628630638247e-06, "loss": 0.1688, "step": 9242 }, { "epoch": 0.74, "grad_norm": 1.4623816778595087, "learning_rate": 1.677194708348862e-06, "loss": 0.1932, "step": 9243 }, { "epoch": 0.74, "grad_norm": 1.4062078042403683, "learning_rate": 1.676226776703886e-06, "loss": 0.1986, "step": 9244 }, { "epoch": 0.74, "grad_norm": 1.3661368554371822, "learning_rate": 1.6752590681938768e-06, "loss": 0.1898, "step": 9245 }, { "epoch": 0.74, "grad_norm": 1.26341414816587, "learning_rate": 1.6742915828838003e-06, "loss": 0.1672, "step": 9246 }, { "epoch": 0.74, "grad_norm": 1.2678653255602448, "learning_rate": 1.6733243208386057e-06, "loss": 0.1477, "step": 9247 }, { "epoch": 0.74, "grad_norm": 1.2497552078537733, "learning_rate": 1.6723572821232275e-06, "loss": 0.1494, "step": 9248 }, { "epoch": 0.74, "grad_norm": 1.2592193533709861, "learning_rate": 1.671390466802591e-06, "loss": 0.1675, "step": 9249 }, { "epoch": 0.74, "grad_norm": 1.2422435095041522, "learning_rate": 1.6704238749415958e-06, "loss": 0.1604, "step": 9250 }, { "epoch": 0.74, "grad_norm": 1.3542371304434624, "learning_rate": 1.6694575066051327e-06, "loss": 0.1489, "step": 9251 }, { "epoch": 0.74, "grad_norm": 5.37494241514877, "learning_rate": 1.6684913618580811e-06, "loss": 0.3888, "step": 9252 }, { "epoch": 0.74, "grad_norm": 1.4749907282645558, "learning_rate": 1.6675254407652958e-06, "loss": 0.1498, "step": 9253 }, { "epoch": 0.74, "grad_norm": 1.3655219194332666, "learning_rate": 1.666559743391626e-06, "loss": 0.1495, "step": 9254 }, { "epoch": 0.74, "grad_norm": 1.2238659357015582, "learning_rate": 1.6655942698019001e-06, "loss": 0.158, "step": 9255 }, { "epoch": 0.74, "grad_norm": 1.2447053038839422, "learning_rate": 1.6646290200609344e-06, "loss": 0.1825, "step": 9256 }, { "epoch": 0.74, "grad_norm": 1.3157849727553947, "learning_rate": 1.6636639942335264e-06, "loss": 0.1912, "step": 9257 }, { "epoch": 0.74, "grad_norm": 1.532190861284905, "learning_rate": 1.6626991923844666e-06, "loss": 0.2227, "step": 9258 }, { "epoch": 0.74, "grad_norm": 1.3017882347859238, "learning_rate": 1.6617346145785196e-06, "loss": 0.1873, "step": 9259 }, { "epoch": 0.74, "grad_norm": 1.2399111343791365, "learning_rate": 1.6607702608804416e-06, "loss": 0.1581, "step": 9260 }, { "epoch": 0.74, "grad_norm": 1.2322812395962557, "learning_rate": 1.6598061313549763e-06, "loss": 0.1223, "step": 9261 }, { "epoch": 0.74, "grad_norm": 1.3202594536486618, "learning_rate": 1.6588422260668425e-06, "loss": 0.1901, "step": 9262 }, { "epoch": 0.74, "grad_norm": 6.887564970257272, "learning_rate": 1.6578785450807545e-06, "loss": 0.6083, "step": 9263 }, { "epoch": 0.74, "grad_norm": 1.3644309077112091, "learning_rate": 1.6569150884614067e-06, "loss": 0.1712, "step": 9264 }, { "epoch": 0.74, "grad_norm": 1.236849750581803, "learning_rate": 1.6559518562734777e-06, "loss": 0.1842, "step": 9265 }, { "epoch": 0.74, "grad_norm": 1.3231989287190236, "learning_rate": 1.654988848581633e-06, "loss": 0.2158, "step": 9266 }, { "epoch": 0.74, "grad_norm": 1.4568027585555277, "learning_rate": 1.654026065450522e-06, "loss": 0.1919, "step": 9267 }, { "epoch": 0.74, "grad_norm": 1.2997956067430405, "learning_rate": 1.6530635069447787e-06, "loss": 0.1588, "step": 9268 }, { "epoch": 0.74, "grad_norm": 1.2583025044293814, "learning_rate": 1.6521011731290232e-06, "loss": 0.137, "step": 9269 }, { "epoch": 0.74, "grad_norm": 1.4121831615704283, "learning_rate": 1.6511390640678592e-06, "loss": 0.1802, "step": 9270 }, { "epoch": 0.74, "grad_norm": 1.1690021025018584, "learning_rate": 1.6501771798258753e-06, "loss": 0.1436, "step": 9271 }, { "epoch": 0.74, "grad_norm": 1.3477639530979049, "learning_rate": 1.6492155204676503e-06, "loss": 0.1503, "step": 9272 }, { "epoch": 0.74, "grad_norm": 1.2437246983368497, "learning_rate": 1.6482540860577368e-06, "loss": 0.1789, "step": 9273 }, { "epoch": 0.74, "grad_norm": 1.3164176830262688, "learning_rate": 1.6472928766606828e-06, "loss": 0.1981, "step": 9274 }, { "epoch": 0.74, "grad_norm": 6.853506558062393, "learning_rate": 1.6463318923410183e-06, "loss": 0.5632, "step": 9275 }, { "epoch": 0.74, "grad_norm": 1.554141924999955, "learning_rate": 1.6453711331632516e-06, "loss": 0.1712, "step": 9276 }, { "epoch": 0.74, "grad_norm": 1.4165565642352327, "learning_rate": 1.6444105991918867e-06, "loss": 0.1983, "step": 9277 }, { "epoch": 0.74, "grad_norm": 1.395133690830671, "learning_rate": 1.6434502904914056e-06, "loss": 0.1803, "step": 9278 }, { "epoch": 0.74, "grad_norm": 5.765559196909282, "learning_rate": 1.642490207126276e-06, "loss": 0.4761, "step": 9279 }, { "epoch": 0.74, "grad_norm": 1.1008247602290997, "learning_rate": 1.6415303491609519e-06, "loss": 0.1566, "step": 9280 }, { "epoch": 0.74, "grad_norm": 1.4044265421306688, "learning_rate": 1.6405707166598712e-06, "loss": 0.1525, "step": 9281 }, { "epoch": 0.74, "grad_norm": 1.201148571553906, "learning_rate": 1.6396113096874549e-06, "loss": 0.1411, "step": 9282 }, { "epoch": 0.74, "grad_norm": 1.5347791889838944, "learning_rate": 1.6386521283081163e-06, "loss": 0.1762, "step": 9283 }, { "epoch": 0.74, "grad_norm": 1.397035950735275, "learning_rate": 1.6376931725862438e-06, "loss": 0.1386, "step": 9284 }, { "epoch": 0.74, "grad_norm": 1.3947534737814915, "learning_rate": 1.6367344425862136e-06, "loss": 0.1844, "step": 9285 }, { "epoch": 0.74, "grad_norm": 1.2779211471534706, "learning_rate": 1.6357759383723943e-06, "loss": 0.1584, "step": 9286 }, { "epoch": 0.74, "grad_norm": 1.2374402719010764, "learning_rate": 1.6348176600091265e-06, "loss": 0.1521, "step": 9287 }, { "epoch": 0.74, "grad_norm": 1.511379302818712, "learning_rate": 1.633859607560746e-06, "loss": 0.2106, "step": 9288 }, { "epoch": 0.74, "grad_norm": 1.363485819857421, "learning_rate": 1.6329017810915698e-06, "loss": 0.2151, "step": 9289 }, { "epoch": 0.74, "grad_norm": 1.2560928067368509, "learning_rate": 1.6319441806658987e-06, "loss": 0.1809, "step": 9290 }, { "epoch": 0.74, "grad_norm": 1.2658214573477322, "learning_rate": 1.6309868063480195e-06, "loss": 0.1526, "step": 9291 }, { "epoch": 0.74, "grad_norm": 5.460191505969931, "learning_rate": 1.630029658202204e-06, "loss": 0.4932, "step": 9292 }, { "epoch": 0.74, "grad_norm": 1.2676719893803832, "learning_rate": 1.6290727362927079e-06, "loss": 0.1395, "step": 9293 }, { "epoch": 0.74, "grad_norm": 1.3244213324953213, "learning_rate": 1.628116040683772e-06, "loss": 0.1776, "step": 9294 }, { "epoch": 0.74, "grad_norm": 1.5994883085491038, "learning_rate": 1.6271595714396233e-06, "loss": 0.2004, "step": 9295 }, { "epoch": 0.74, "grad_norm": 1.3469249171089657, "learning_rate": 1.6262033286244706e-06, "loss": 0.1822, "step": 9296 }, { "epoch": 0.74, "grad_norm": 1.489173908576333, "learning_rate": 1.6252473123025132e-06, "loss": 0.1601, "step": 9297 }, { "epoch": 0.74, "grad_norm": 1.3050814138525575, "learning_rate": 1.6242915225379259e-06, "loss": 0.1503, "step": 9298 }, { "epoch": 0.74, "grad_norm": 1.5750673609743637, "learning_rate": 1.6233359593948777e-06, "loss": 0.185, "step": 9299 }, { "epoch": 0.74, "grad_norm": 1.201749909127806, "learning_rate": 1.6223806229375182e-06, "loss": 0.1784, "step": 9300 }, { "epoch": 0.74, "grad_norm": 7.4536117859407325, "learning_rate": 1.621425513229981e-06, "loss": 0.6193, "step": 9301 }, { "epoch": 0.74, "grad_norm": 1.5494388422491012, "learning_rate": 1.620470630336386e-06, "loss": 0.2129, "step": 9302 }, { "epoch": 0.74, "grad_norm": 6.4522675276715, "learning_rate": 1.6195159743208366e-06, "loss": 0.4635, "step": 9303 }, { "epoch": 0.74, "grad_norm": 1.1955909192405902, "learning_rate": 1.618561545247423e-06, "loss": 0.1658, "step": 9304 }, { "epoch": 0.74, "grad_norm": 1.317684304811304, "learning_rate": 1.6176073431802158e-06, "loss": 0.2214, "step": 9305 }, { "epoch": 0.74, "grad_norm": 1.810840328400755, "learning_rate": 1.6166533681832797e-06, "loss": 0.1784, "step": 9306 }, { "epoch": 0.74, "grad_norm": 1.4051241753665276, "learning_rate": 1.615699620320651e-06, "loss": 0.2164, "step": 9307 }, { "epoch": 0.74, "grad_norm": 1.350655311438031, "learning_rate": 1.6147460996563618e-06, "loss": 0.1896, "step": 9308 }, { "epoch": 0.74, "grad_norm": 1.3227261744109176, "learning_rate": 1.6137928062544244e-06, "loss": 0.1685, "step": 9309 }, { "epoch": 0.74, "grad_norm": 1.3608382948963378, "learning_rate": 1.6128397401788353e-06, "loss": 0.1683, "step": 9310 }, { "epoch": 0.74, "grad_norm": 1.4523858568971029, "learning_rate": 1.6118869014935773e-06, "loss": 0.1881, "step": 9311 }, { "epoch": 0.74, "grad_norm": 1.3265709584342793, "learning_rate": 1.6109342902626175e-06, "loss": 0.1845, "step": 9312 }, { "epoch": 0.75, "grad_norm": 1.3045222093432671, "learning_rate": 1.6099819065499068e-06, "loss": 0.1985, "step": 9313 }, { "epoch": 0.75, "grad_norm": 1.4297427325254977, "learning_rate": 1.6090297504193824e-06, "loss": 0.1674, "step": 9314 }, { "epoch": 0.75, "grad_norm": 1.2857805506530455, "learning_rate": 1.6080778219349652e-06, "loss": 0.1635, "step": 9315 }, { "epoch": 0.75, "grad_norm": 1.3513704585722028, "learning_rate": 1.6071261211605587e-06, "loss": 0.1762, "step": 9316 }, { "epoch": 0.75, "grad_norm": 1.3606228279863979, "learning_rate": 1.6061746481600593e-06, "loss": 0.1976, "step": 9317 }, { "epoch": 0.75, "grad_norm": 1.1964455532862093, "learning_rate": 1.6052234029973356e-06, "loss": 0.1674, "step": 9318 }, { "epoch": 0.75, "grad_norm": 1.4047082067376941, "learning_rate": 1.604272385736252e-06, "loss": 0.1986, "step": 9319 }, { "epoch": 0.75, "grad_norm": 7.796150518058644, "learning_rate": 1.6033215964406534e-06, "loss": 0.5328, "step": 9320 }, { "epoch": 0.75, "grad_norm": 1.4014825614928708, "learning_rate": 1.6023710351743642e-06, "loss": 0.1824, "step": 9321 }, { "epoch": 0.75, "grad_norm": 1.1599802589763089, "learning_rate": 1.6014207020012034e-06, "loss": 0.1168, "step": 9322 }, { "epoch": 0.75, "grad_norm": 1.2601789289680414, "learning_rate": 1.6004705969849687e-06, "loss": 0.1912, "step": 9323 }, { "epoch": 0.75, "grad_norm": 1.5343333138502464, "learning_rate": 1.5995207201894424e-06, "loss": 0.1814, "step": 9324 }, { "epoch": 0.75, "grad_norm": 18.747394343373486, "learning_rate": 1.5985710716783936e-06, "loss": 0.627, "step": 9325 }, { "epoch": 0.75, "grad_norm": 1.300032923152989, "learning_rate": 1.5976216515155746e-06, "loss": 0.139, "step": 9326 }, { "epoch": 0.75, "grad_norm": 1.3334952657094212, "learning_rate": 1.5966724597647209e-06, "loss": 0.2097, "step": 9327 }, { "epoch": 0.75, "grad_norm": 1.2723720253733435, "learning_rate": 1.5957234964895602e-06, "loss": 0.1852, "step": 9328 }, { "epoch": 0.75, "grad_norm": 1.339249180252441, "learning_rate": 1.5947747617537945e-06, "loss": 0.1387, "step": 9329 }, { "epoch": 0.75, "grad_norm": 1.5230290255531902, "learning_rate": 1.5938262556211142e-06, "loss": 0.1761, "step": 9330 }, { "epoch": 0.75, "grad_norm": 1.35805051320388, "learning_rate": 1.5928779781552012e-06, "loss": 0.1695, "step": 9331 }, { "epoch": 0.75, "grad_norm": 1.258997654406913, "learning_rate": 1.5919299294197093e-06, "loss": 0.1689, "step": 9332 }, { "epoch": 0.75, "grad_norm": 1.3070742762293202, "learning_rate": 1.5909821094782891e-06, "loss": 0.1585, "step": 9333 }, { "epoch": 0.75, "grad_norm": 1.2073520879048403, "learning_rate": 1.5900345183945688e-06, "loss": 0.1735, "step": 9334 }, { "epoch": 0.75, "grad_norm": 1.3524267854993093, "learning_rate": 1.589087156232163e-06, "loss": 0.1709, "step": 9335 }, { "epoch": 0.75, "grad_norm": 1.208146166698898, "learning_rate": 1.5881400230546712e-06, "loss": 0.1572, "step": 9336 }, { "epoch": 0.75, "grad_norm": 8.617003209428942, "learning_rate": 1.5871931189256768e-06, "loss": 0.6768, "step": 9337 }, { "epoch": 0.75, "grad_norm": 1.2680820431940585, "learning_rate": 1.5862464439087488e-06, "loss": 0.1639, "step": 9338 }, { "epoch": 0.75, "grad_norm": 1.3076368488237644, "learning_rate": 1.5852999980674404e-06, "loss": 0.1898, "step": 9339 }, { "epoch": 0.75, "grad_norm": 1.4637877634606185, "learning_rate": 1.5843537814652894e-06, "loss": 0.2175, "step": 9340 }, { "epoch": 0.75, "grad_norm": 1.401853692267398, "learning_rate": 1.5834077941658165e-06, "loss": 0.2033, "step": 9341 }, { "epoch": 0.75, "grad_norm": 1.396712624745321, "learning_rate": 1.5824620362325315e-06, "loss": 0.2002, "step": 9342 }, { "epoch": 0.75, "grad_norm": 10.648326557893304, "learning_rate": 1.5815165077289247e-06, "loss": 0.7711, "step": 9343 }, { "epoch": 0.75, "grad_norm": 1.407157150159328, "learning_rate": 1.5805712087184727e-06, "loss": 0.1772, "step": 9344 }, { "epoch": 0.75, "grad_norm": 1.4307108385827259, "learning_rate": 1.5796261392646357e-06, "loss": 0.1886, "step": 9345 }, { "epoch": 0.75, "grad_norm": 1.206561352200287, "learning_rate": 1.5786812994308592e-06, "loss": 0.182, "step": 9346 }, { "epoch": 0.75, "grad_norm": 1.345536603614382, "learning_rate": 1.5777366892805735e-06, "loss": 0.1773, "step": 9347 }, { "epoch": 0.75, "grad_norm": 1.3621866577813369, "learning_rate": 1.576792308877193e-06, "loss": 0.2023, "step": 9348 }, { "epoch": 0.75, "grad_norm": 1.2919590550630158, "learning_rate": 1.5758481582841162e-06, "loss": 0.184, "step": 9349 }, { "epoch": 0.75, "grad_norm": 1.2355816848368182, "learning_rate": 1.5749042375647261e-06, "loss": 0.1471, "step": 9350 }, { "epoch": 0.75, "grad_norm": 1.3459914724858888, "learning_rate": 1.5739605467823953e-06, "loss": 0.1543, "step": 9351 }, { "epoch": 0.75, "grad_norm": 1.3708064910140392, "learning_rate": 1.57301708600047e-06, "loss": 0.1811, "step": 9352 }, { "epoch": 0.75, "grad_norm": 1.1994024936767052, "learning_rate": 1.5720738552822929e-06, "loss": 0.1586, "step": 9353 }, { "epoch": 0.75, "grad_norm": 1.3388958208750605, "learning_rate": 1.5711308546911859e-06, "loss": 0.1761, "step": 9354 }, { "epoch": 0.75, "grad_norm": 1.368201819488567, "learning_rate": 1.5701880842904503e-06, "loss": 0.1747, "step": 9355 }, { "epoch": 0.75, "grad_norm": 1.245913086368507, "learning_rate": 1.569245544143382e-06, "loss": 0.1692, "step": 9356 }, { "epoch": 0.75, "grad_norm": 7.874973148266775, "learning_rate": 1.5683032343132554e-06, "loss": 0.586, "step": 9357 }, { "epoch": 0.75, "grad_norm": 1.2867405194831163, "learning_rate": 1.56736115486333e-06, "loss": 0.1414, "step": 9358 }, { "epoch": 0.75, "grad_norm": 1.4718809798163417, "learning_rate": 1.5664193058568505e-06, "loss": 0.1908, "step": 9359 }, { "epoch": 0.75, "grad_norm": 1.420506295551474, "learning_rate": 1.565477687357047e-06, "loss": 0.1877, "step": 9360 }, { "epoch": 0.75, "grad_norm": 1.4261483500855614, "learning_rate": 1.5645362994271306e-06, "loss": 0.1591, "step": 9361 }, { "epoch": 0.75, "grad_norm": 1.4713040744058778, "learning_rate": 1.5635951421303047e-06, "loss": 0.1949, "step": 9362 }, { "epoch": 0.75, "grad_norm": 1.4247911789022698, "learning_rate": 1.562654215529747e-06, "loss": 0.1857, "step": 9363 }, { "epoch": 0.75, "grad_norm": 1.376379764522003, "learning_rate": 1.5617135196886251e-06, "loss": 0.1728, "step": 9364 }, { "epoch": 0.75, "grad_norm": 8.158168869944392, "learning_rate": 1.5607730546700956e-06, "loss": 0.5003, "step": 9365 }, { "epoch": 0.75, "grad_norm": 1.3831778491072337, "learning_rate": 1.5598328205372882e-06, "loss": 0.1821, "step": 9366 }, { "epoch": 0.75, "grad_norm": 1.4609672337910595, "learning_rate": 1.558892817353328e-06, "loss": 0.1994, "step": 9367 }, { "epoch": 0.75, "grad_norm": 1.2934071496884783, "learning_rate": 1.5579530451813197e-06, "loss": 0.1532, "step": 9368 }, { "epoch": 0.75, "grad_norm": 1.3797129027006125, "learning_rate": 1.557013504084352e-06, "loss": 0.1894, "step": 9369 }, { "epoch": 0.75, "grad_norm": 1.3038280298391, "learning_rate": 1.5560741941254998e-06, "loss": 0.1371, "step": 9370 }, { "epoch": 0.75, "grad_norm": 10.244811957969455, "learning_rate": 1.5551351153678219e-06, "loss": 0.6788, "step": 9371 }, { "epoch": 0.75, "grad_norm": 1.3645078033817268, "learning_rate": 1.5541962678743606e-06, "loss": 0.1672, "step": 9372 }, { "epoch": 0.75, "grad_norm": 1.2510419882515846, "learning_rate": 1.553257651708145e-06, "loss": 0.1507, "step": 9373 }, { "epoch": 0.75, "grad_norm": 1.3938383786711575, "learning_rate": 1.5523192669321858e-06, "loss": 0.1551, "step": 9374 }, { "epoch": 0.75, "grad_norm": 1.355667226661782, "learning_rate": 1.5513811136094786e-06, "loss": 0.1524, "step": 9375 }, { "epoch": 0.75, "grad_norm": 1.5274339936161818, "learning_rate": 1.55044319180301e-06, "loss": 0.2166, "step": 9376 }, { "epoch": 0.75, "grad_norm": 1.4408613814447968, "learning_rate": 1.5495055015757377e-06, "loss": 0.1901, "step": 9377 }, { "epoch": 0.75, "grad_norm": 1.3228786210345276, "learning_rate": 1.5485680429906175e-06, "loss": 0.1515, "step": 9378 }, { "epoch": 0.75, "grad_norm": 1.326616846397789, "learning_rate": 1.5476308161105825e-06, "loss": 0.1613, "step": 9379 }, { "epoch": 0.75, "grad_norm": 1.2268189457206167, "learning_rate": 1.5466938209985504e-06, "loss": 0.1622, "step": 9380 }, { "epoch": 0.75, "grad_norm": 1.4602018070776943, "learning_rate": 1.5457570577174257e-06, "loss": 0.1645, "step": 9381 }, { "epoch": 0.75, "grad_norm": 1.2905483115486185, "learning_rate": 1.5448205263300952e-06, "loss": 0.1719, "step": 9382 }, { "epoch": 0.75, "grad_norm": 1.5059262715438797, "learning_rate": 1.543884226899432e-06, "loss": 0.214, "step": 9383 }, { "epoch": 0.75, "grad_norm": 1.4258629349772773, "learning_rate": 1.5429481594882905e-06, "loss": 0.2125, "step": 9384 }, { "epoch": 0.75, "grad_norm": 1.325015343329243, "learning_rate": 1.5420123241595169e-06, "loss": 0.1846, "step": 9385 }, { "epoch": 0.75, "grad_norm": 1.518488103145518, "learning_rate": 1.5410767209759298e-06, "loss": 0.1792, "step": 9386 }, { "epoch": 0.75, "grad_norm": 1.4142280390012976, "learning_rate": 1.5401413500003443e-06, "loss": 0.1797, "step": 9387 }, { "epoch": 0.75, "grad_norm": 1.422754865308121, "learning_rate": 1.5392062112955524e-06, "loss": 0.1551, "step": 9388 }, { "epoch": 0.75, "grad_norm": 1.2653878894750799, "learning_rate": 1.5382713049243336e-06, "loss": 0.1589, "step": 9389 }, { "epoch": 0.75, "grad_norm": 1.308876400514231, "learning_rate": 1.5373366309494515e-06, "loss": 0.1953, "step": 9390 }, { "epoch": 0.75, "grad_norm": 1.3247587030491197, "learning_rate": 1.5364021894336517e-06, "loss": 0.183, "step": 9391 }, { "epoch": 0.75, "grad_norm": 1.4539961124735112, "learning_rate": 1.5354679804396678e-06, "loss": 0.1942, "step": 9392 }, { "epoch": 0.75, "grad_norm": 1.4486015459406543, "learning_rate": 1.5345340040302153e-06, "loss": 0.1938, "step": 9393 }, { "epoch": 0.75, "grad_norm": 1.5192065569005047, "learning_rate": 1.533600260267995e-06, "loss": 0.2159, "step": 9394 }, { "epoch": 0.75, "grad_norm": 9.753101693366677, "learning_rate": 1.5326667492156905e-06, "loss": 0.6111, "step": 9395 }, { "epoch": 0.75, "grad_norm": 1.4420065680208252, "learning_rate": 1.531733470935976e-06, "loss": 0.178, "step": 9396 }, { "epoch": 0.75, "grad_norm": 1.2974879604420024, "learning_rate": 1.5308004254915004e-06, "loss": 0.1673, "step": 9397 }, { "epoch": 0.75, "grad_norm": 1.2610943344961074, "learning_rate": 1.5298676129449019e-06, "loss": 0.1675, "step": 9398 }, { "epoch": 0.75, "grad_norm": 1.4652424221987772, "learning_rate": 1.5289350333588076e-06, "loss": 0.1764, "step": 9399 }, { "epoch": 0.75, "grad_norm": 1.353272512092655, "learning_rate": 1.5280026867958186e-06, "loss": 0.1754, "step": 9400 }, { "epoch": 0.75, "grad_norm": 1.2256948729947745, "learning_rate": 1.5270705733185304e-06, "loss": 0.1194, "step": 9401 }, { "epoch": 0.75, "grad_norm": 1.23078983120021, "learning_rate": 1.5261386929895173e-06, "loss": 0.1305, "step": 9402 }, { "epoch": 0.75, "grad_norm": 1.3413357405182518, "learning_rate": 1.5252070458713393e-06, "loss": 0.2305, "step": 9403 }, { "epoch": 0.75, "grad_norm": 1.5429711768944137, "learning_rate": 1.52427563202654e-06, "loss": 0.159, "step": 9404 }, { "epoch": 0.75, "grad_norm": 1.3853649450490844, "learning_rate": 1.5233444515176488e-06, "loss": 0.213, "step": 9405 }, { "epoch": 0.75, "grad_norm": 6.075035658825802, "learning_rate": 1.5224135044071782e-06, "loss": 0.7079, "step": 9406 }, { "epoch": 0.75, "grad_norm": 1.4105589901166866, "learning_rate": 1.5214827907576257e-06, "loss": 0.1658, "step": 9407 }, { "epoch": 0.75, "grad_norm": 1.2749283408560548, "learning_rate": 1.5205523106314736e-06, "loss": 0.1348, "step": 9408 }, { "epoch": 0.75, "grad_norm": 1.24389986257992, "learning_rate": 1.5196220640911846e-06, "loss": 0.1868, "step": 9409 }, { "epoch": 0.75, "grad_norm": 1.39323421657642, "learning_rate": 1.5186920511992154e-06, "loss": 0.1798, "step": 9410 }, { "epoch": 0.75, "grad_norm": 1.4008239742941895, "learning_rate": 1.5177622720179936e-06, "loss": 0.1709, "step": 9411 }, { "epoch": 0.75, "grad_norm": 1.4552369695828133, "learning_rate": 1.5168327266099424e-06, "loss": 0.1741, "step": 9412 }, { "epoch": 0.75, "grad_norm": 1.406483843143933, "learning_rate": 1.515903415037464e-06, "loss": 0.195, "step": 9413 }, { "epoch": 0.75, "grad_norm": 1.6819854428902734, "learning_rate": 1.514974337362946e-06, "loss": 0.1556, "step": 9414 }, { "epoch": 0.75, "grad_norm": 1.5179444271714662, "learning_rate": 1.5140454936487597e-06, "loss": 0.2041, "step": 9415 }, { "epoch": 0.75, "grad_norm": 7.972367202012451, "learning_rate": 1.513116883957262e-06, "loss": 0.5602, "step": 9416 }, { "epoch": 0.75, "grad_norm": 1.6198569338249735, "learning_rate": 1.512188508350792e-06, "loss": 0.2139, "step": 9417 }, { "epoch": 0.75, "grad_norm": 1.2829441028382433, "learning_rate": 1.5112603668916752e-06, "loss": 0.1969, "step": 9418 }, { "epoch": 0.75, "grad_norm": 1.459096298611839, "learning_rate": 1.5103324596422209e-06, "loss": 0.1471, "step": 9419 }, { "epoch": 0.75, "grad_norm": 1.3704958231998041, "learning_rate": 1.5094047866647194e-06, "loss": 0.1648, "step": 9420 }, { "epoch": 0.75, "grad_norm": 1.2939810664602074, "learning_rate": 1.5084773480214543e-06, "loss": 0.1577, "step": 9421 }, { "epoch": 0.75, "grad_norm": 1.1733310321007817, "learning_rate": 1.507550143774681e-06, "loss": 0.1512, "step": 9422 }, { "epoch": 0.75, "grad_norm": 1.3289051790554052, "learning_rate": 1.5066231739866494e-06, "loss": 0.1905, "step": 9423 }, { "epoch": 0.75, "grad_norm": 1.4622796242195417, "learning_rate": 1.5056964387195899e-06, "loss": 0.1979, "step": 9424 }, { "epoch": 0.75, "grad_norm": 1.5330226359248196, "learning_rate": 1.5047699380357134e-06, "loss": 0.1564, "step": 9425 }, { "epoch": 0.75, "grad_norm": 1.317815827195486, "learning_rate": 1.503843671997222e-06, "loss": 0.146, "step": 9426 }, { "epoch": 0.75, "grad_norm": 1.187941949202876, "learning_rate": 1.5029176406662982e-06, "loss": 0.1511, "step": 9427 }, { "epoch": 0.75, "grad_norm": 1.6317481333486337, "learning_rate": 1.5019918441051084e-06, "loss": 0.1874, "step": 9428 }, { "epoch": 0.75, "grad_norm": 1.4309893462426606, "learning_rate": 1.5010662823758037e-06, "loss": 0.1736, "step": 9429 }, { "epoch": 0.75, "grad_norm": 1.27630107118488, "learning_rate": 1.5001409555405238e-06, "loss": 0.1742, "step": 9430 }, { "epoch": 0.75, "grad_norm": 1.4565101545326837, "learning_rate": 1.4992158636613824e-06, "loss": 0.2006, "step": 9431 }, { "epoch": 0.75, "grad_norm": 1.442116411631485, "learning_rate": 1.4982910068004886e-06, "loss": 0.1847, "step": 9432 }, { "epoch": 0.75, "grad_norm": 1.3457790366624383, "learning_rate": 1.497366385019931e-06, "loss": 0.1788, "step": 9433 }, { "epoch": 0.75, "grad_norm": 9.234378309465253, "learning_rate": 1.4964419983817773e-06, "loss": 0.4991, "step": 9434 }, { "epoch": 0.75, "grad_norm": 1.383115835112116, "learning_rate": 1.4955178469480891e-06, "loss": 0.1761, "step": 9435 }, { "epoch": 0.75, "grad_norm": 1.5689932363071797, "learning_rate": 1.4945939307809066e-06, "loss": 0.1964, "step": 9436 }, { "epoch": 0.75, "grad_norm": 1.2459260633590892, "learning_rate": 1.4936702499422534e-06, "loss": 0.1229, "step": 9437 }, { "epoch": 0.76, "grad_norm": 1.3299548849751863, "learning_rate": 1.4927468044941407e-06, "loss": 0.1777, "step": 9438 }, { "epoch": 0.76, "grad_norm": 1.382942554996044, "learning_rate": 1.4918235944985616e-06, "loss": 0.2114, "step": 9439 }, { "epoch": 0.76, "grad_norm": 7.203411238077802, "learning_rate": 1.4909006200174924e-06, "loss": 0.5328, "step": 9440 }, { "epoch": 0.76, "grad_norm": 1.3732862362328369, "learning_rate": 1.4899778811128996e-06, "loss": 0.1793, "step": 9441 }, { "epoch": 0.76, "grad_norm": 1.454908155860332, "learning_rate": 1.4890553778467253e-06, "loss": 0.1627, "step": 9442 }, { "epoch": 0.76, "grad_norm": 1.2432888231024148, "learning_rate": 1.4881331102808994e-06, "loss": 0.1768, "step": 9443 }, { "epoch": 0.76, "grad_norm": 1.2234125004691994, "learning_rate": 1.487211078477342e-06, "loss": 0.1503, "step": 9444 }, { "epoch": 0.76, "grad_norm": 13.029984790168864, "learning_rate": 1.4862892824979448e-06, "loss": 0.6348, "step": 9445 }, { "epoch": 0.76, "grad_norm": 1.4085605201468197, "learning_rate": 1.4853677224045955e-06, "loss": 0.1938, "step": 9446 }, { "epoch": 0.76, "grad_norm": 1.3470430131286248, "learning_rate": 1.48444639825916e-06, "loss": 0.179, "step": 9447 }, { "epoch": 0.76, "grad_norm": 1.4529842541068745, "learning_rate": 1.4835253101234891e-06, "loss": 0.2109, "step": 9448 }, { "epoch": 0.76, "grad_norm": 1.3601666403547403, "learning_rate": 1.4826044580594197e-06, "loss": 0.2092, "step": 9449 }, { "epoch": 0.76, "grad_norm": 1.2523536514003595, "learning_rate": 1.4816838421287693e-06, "loss": 0.1718, "step": 9450 }, { "epoch": 0.76, "grad_norm": 1.3290787140527853, "learning_rate": 1.4807634623933431e-06, "loss": 0.1559, "step": 9451 }, { "epoch": 0.76, "grad_norm": 1.5961875526630813, "learning_rate": 1.4798433189149286e-06, "loss": 0.1615, "step": 9452 }, { "epoch": 0.76, "grad_norm": 7.320723290932673, "learning_rate": 1.4789234117552974e-06, "loss": 0.5378, "step": 9453 }, { "epoch": 0.76, "grad_norm": 14.269721066179851, "learning_rate": 1.478003740976205e-06, "loss": 0.3311, "step": 9454 }, { "epoch": 0.76, "grad_norm": 8.960558457189034, "learning_rate": 1.4770843066393954e-06, "loss": 0.5803, "step": 9455 }, { "epoch": 0.76, "grad_norm": 23.920351685010257, "learning_rate": 1.4761651088065871e-06, "loss": 0.5061, "step": 9456 }, { "epoch": 0.76, "grad_norm": 1.2857335728530066, "learning_rate": 1.4752461475394936e-06, "loss": 0.1549, "step": 9457 }, { "epoch": 0.76, "grad_norm": 1.2179194459186946, "learning_rate": 1.4743274228998073e-06, "loss": 0.156, "step": 9458 }, { "epoch": 0.76, "grad_norm": 1.4717195560907594, "learning_rate": 1.4734089349492003e-06, "loss": 0.2067, "step": 9459 }, { "epoch": 0.76, "grad_norm": 1.1810822913162073, "learning_rate": 1.4724906837493386e-06, "loss": 0.1897, "step": 9460 }, { "epoch": 0.76, "grad_norm": 1.3460237229914176, "learning_rate": 1.4715726693618648e-06, "loss": 0.1867, "step": 9461 }, { "epoch": 0.76, "grad_norm": 1.6483617258778553, "learning_rate": 1.4706548918484086e-06, "loss": 0.2299, "step": 9462 }, { "epoch": 0.76, "grad_norm": 1.467713366860506, "learning_rate": 1.4697373512705825e-06, "loss": 0.1827, "step": 9463 }, { "epoch": 0.76, "grad_norm": 1.5593949516406573, "learning_rate": 1.468820047689985e-06, "loss": 0.1881, "step": 9464 }, { "epoch": 0.76, "grad_norm": 1.2559534204729685, "learning_rate": 1.467902981168195e-06, "loss": 0.1515, "step": 9465 }, { "epoch": 0.76, "grad_norm": 1.6295212064514297, "learning_rate": 1.4669861517667827e-06, "loss": 0.2045, "step": 9466 }, { "epoch": 0.76, "grad_norm": 1.3799786900028381, "learning_rate": 1.466069559547293e-06, "loss": 0.1896, "step": 9467 }, { "epoch": 0.76, "grad_norm": 1.3109390977073188, "learning_rate": 1.4651532045712596e-06, "loss": 0.1905, "step": 9468 }, { "epoch": 0.76, "grad_norm": 1.189816932410113, "learning_rate": 1.4642370869002048e-06, "loss": 0.128, "step": 9469 }, { "epoch": 0.76, "grad_norm": 1.294885055021585, "learning_rate": 1.4633212065956248e-06, "loss": 0.1799, "step": 9470 }, { "epoch": 0.76, "grad_norm": 1.414707058226305, "learning_rate": 1.4624055637190087e-06, "loss": 0.1865, "step": 9471 }, { "epoch": 0.76, "grad_norm": 1.3791342531933861, "learning_rate": 1.4614901583318258e-06, "loss": 0.1349, "step": 9472 }, { "epoch": 0.76, "grad_norm": 1.3363566031400298, "learning_rate": 1.4605749904955301e-06, "loss": 0.1665, "step": 9473 }, { "epoch": 0.76, "grad_norm": 7.669581603404619, "learning_rate": 1.4596600602715576e-06, "loss": 0.5442, "step": 9474 }, { "epoch": 0.76, "grad_norm": 1.301003993697531, "learning_rate": 1.4587453677213348e-06, "loss": 0.1716, "step": 9475 }, { "epoch": 0.76, "grad_norm": 1.394557039397332, "learning_rate": 1.457830912906264e-06, "loss": 0.1912, "step": 9476 }, { "epoch": 0.76, "grad_norm": 1.424606987188368, "learning_rate": 1.4569166958877344e-06, "loss": 0.1671, "step": 9477 }, { "epoch": 0.76, "grad_norm": 1.6733430037287949, "learning_rate": 1.4560027167271257e-06, "loss": 0.2028, "step": 9478 }, { "epoch": 0.76, "grad_norm": 1.324640204933129, "learning_rate": 1.4550889754857895e-06, "loss": 0.1393, "step": 9479 }, { "epoch": 0.76, "grad_norm": 1.3455280875323246, "learning_rate": 1.4541754722250716e-06, "loss": 0.153, "step": 9480 }, { "epoch": 0.76, "grad_norm": 7.665598052227999, "learning_rate": 1.4532622070062985e-06, "loss": 0.4305, "step": 9481 }, { "epoch": 0.76, "grad_norm": 1.4761610330466073, "learning_rate": 1.4523491798907797e-06, "loss": 0.1839, "step": 9482 }, { "epoch": 0.76, "grad_norm": 1.4106730429956367, "learning_rate": 1.4514363909398087e-06, "loss": 0.2174, "step": 9483 }, { "epoch": 0.76, "grad_norm": 1.3602914294857062, "learning_rate": 1.4505238402146655e-06, "loss": 0.1709, "step": 9484 }, { "epoch": 0.76, "grad_norm": 1.3280141635271685, "learning_rate": 1.4496115277766105e-06, "loss": 0.1112, "step": 9485 }, { "epoch": 0.76, "grad_norm": 1.2936525507891432, "learning_rate": 1.4486994536868914e-06, "loss": 0.1666, "step": 9486 }, { "epoch": 0.76, "grad_norm": 1.4751379848237982, "learning_rate": 1.447787618006738e-06, "loss": 0.1817, "step": 9487 }, { "epoch": 0.76, "grad_norm": 1.3204226270972617, "learning_rate": 1.4468760207973626e-06, "loss": 0.1759, "step": 9488 }, { "epoch": 0.76, "grad_norm": 1.490590648041613, "learning_rate": 1.4459646621199686e-06, "loss": 0.1698, "step": 9489 }, { "epoch": 0.76, "grad_norm": 1.4824303748700929, "learning_rate": 1.4450535420357325e-06, "loss": 0.1814, "step": 9490 }, { "epoch": 0.76, "grad_norm": 1.325868276688208, "learning_rate": 1.4441426606058245e-06, "loss": 0.1578, "step": 9491 }, { "epoch": 0.76, "grad_norm": 1.4634316077127678, "learning_rate": 1.4432320178913933e-06, "loss": 0.165, "step": 9492 }, { "epoch": 0.76, "grad_norm": 7.964189516750006, "learning_rate": 1.4423216139535735e-06, "loss": 0.6755, "step": 9493 }, { "epoch": 0.76, "grad_norm": 1.3944858190029523, "learning_rate": 1.441411448853483e-06, "loss": 0.1877, "step": 9494 }, { "epoch": 0.76, "grad_norm": 6.650562732169268, "learning_rate": 1.440501522652224e-06, "loss": 0.6248, "step": 9495 }, { "epoch": 0.76, "grad_norm": 1.5755838456817048, "learning_rate": 1.4395918354108828e-06, "loss": 0.1954, "step": 9496 }, { "epoch": 0.76, "grad_norm": 1.4080230874275284, "learning_rate": 1.4386823871905293e-06, "loss": 0.1699, "step": 9497 }, { "epoch": 0.76, "grad_norm": 1.2773079929378146, "learning_rate": 1.4377731780522176e-06, "loss": 0.1933, "step": 9498 }, { "epoch": 0.76, "grad_norm": 1.342458829294653, "learning_rate": 1.4368642080569839e-06, "loss": 0.1717, "step": 9499 }, { "epoch": 0.76, "grad_norm": 1.469860056379433, "learning_rate": 1.4359554772658551e-06, "loss": 0.1864, "step": 9500 }, { "epoch": 0.76, "grad_norm": 1.3246879485208942, "learning_rate": 1.4350469857398302e-06, "loss": 0.1395, "step": 9501 }, { "epoch": 0.76, "grad_norm": 1.2934110728182642, "learning_rate": 1.4341387335399043e-06, "loss": 0.195, "step": 9502 }, { "epoch": 0.76, "grad_norm": 1.3401304518663661, "learning_rate": 1.4332307207270507e-06, "loss": 0.2016, "step": 9503 }, { "epoch": 0.76, "grad_norm": 1.1972405696058863, "learning_rate": 1.432322947362223e-06, "loss": 0.1792, "step": 9504 }, { "epoch": 0.76, "grad_norm": 1.3033608810952786, "learning_rate": 1.4314154135063668e-06, "loss": 0.1839, "step": 9505 }, { "epoch": 0.76, "grad_norm": 1.3170656636689433, "learning_rate": 1.4305081192204063e-06, "loss": 0.1501, "step": 9506 }, { "epoch": 0.76, "grad_norm": 7.331578605320142, "learning_rate": 1.4296010645652508e-06, "loss": 0.5321, "step": 9507 }, { "epoch": 0.76, "grad_norm": 1.233407614184908, "learning_rate": 1.4286942496017936e-06, "loss": 0.1604, "step": 9508 }, { "epoch": 0.76, "grad_norm": 5.241474233955057, "learning_rate": 1.4277876743909126e-06, "loss": 0.6613, "step": 9509 }, { "epoch": 0.76, "grad_norm": 1.2304216286611123, "learning_rate": 1.426881338993466e-06, "loss": 0.1756, "step": 9510 }, { "epoch": 0.76, "grad_norm": 8.644379302117223, "learning_rate": 1.425975243470305e-06, "loss": 0.4953, "step": 9511 }, { "epoch": 0.76, "grad_norm": 5.738090691030682, "learning_rate": 1.425069387882253e-06, "loss": 0.4325, "step": 9512 }, { "epoch": 0.76, "grad_norm": 1.3724100130778067, "learning_rate": 1.4241637722901231e-06, "loss": 0.1504, "step": 9513 }, { "epoch": 0.76, "grad_norm": 1.2401692803813313, "learning_rate": 1.4232583967547153e-06, "loss": 0.1648, "step": 9514 }, { "epoch": 0.76, "grad_norm": 1.4101990466290486, "learning_rate": 1.422353261336808e-06, "loss": 0.1616, "step": 9515 }, { "epoch": 0.76, "grad_norm": 9.490879904374797, "learning_rate": 1.4214483660971663e-06, "loss": 0.532, "step": 9516 }, { "epoch": 0.76, "grad_norm": 1.1882894761098333, "learning_rate": 1.420543711096538e-06, "loss": 0.1763, "step": 9517 }, { "epoch": 0.76, "grad_norm": 1.3590309846896866, "learning_rate": 1.4196392963956562e-06, "loss": 0.1633, "step": 9518 }, { "epoch": 0.76, "grad_norm": 1.6747899669887836, "learning_rate": 1.4187351220552365e-06, "loss": 0.2204, "step": 9519 }, { "epoch": 0.76, "grad_norm": 1.3801906646257749, "learning_rate": 1.4178311881359785e-06, "loss": 0.1583, "step": 9520 }, { "epoch": 0.76, "grad_norm": 1.3418794197945618, "learning_rate": 1.4169274946985666e-06, "loss": 0.1798, "step": 9521 }, { "epoch": 0.76, "grad_norm": 1.324226482857568, "learning_rate": 1.416024041803667e-06, "loss": 0.1677, "step": 9522 }, { "epoch": 0.76, "grad_norm": 10.702841458168953, "learning_rate": 1.4151208295119351e-06, "loss": 0.5708, "step": 9523 }, { "epoch": 0.76, "grad_norm": 1.4673549968322426, "learning_rate": 1.4142178578840005e-06, "loss": 0.1721, "step": 9524 }, { "epoch": 0.76, "grad_norm": 1.4989579406884164, "learning_rate": 1.4133151269804873e-06, "loss": 0.1627, "step": 9525 }, { "epoch": 0.76, "grad_norm": 1.1826466321446618, "learning_rate": 1.4124126368619972e-06, "loss": 0.1834, "step": 9526 }, { "epoch": 0.76, "grad_norm": 1.435829386157774, "learning_rate": 1.4115103875891161e-06, "loss": 0.1607, "step": 9527 }, { "epoch": 0.76, "grad_norm": 1.2813394459905327, "learning_rate": 1.4106083792224161e-06, "loss": 0.1768, "step": 9528 }, { "epoch": 0.76, "grad_norm": 1.3482329560110613, "learning_rate": 1.4097066118224505e-06, "loss": 0.2215, "step": 9529 }, { "epoch": 0.76, "grad_norm": 1.4349688413644328, "learning_rate": 1.4088050854497587e-06, "loss": 0.1605, "step": 9530 }, { "epoch": 0.76, "grad_norm": 1.3434978840786715, "learning_rate": 1.407903800164862e-06, "loss": 0.1776, "step": 9531 }, { "epoch": 0.76, "grad_norm": 6.16202447088763, "learning_rate": 1.4070027560282668e-06, "loss": 0.4032, "step": 9532 }, { "epoch": 0.76, "grad_norm": 1.1604662711088827, "learning_rate": 1.4061019531004617e-06, "loss": 0.1627, "step": 9533 }, { "epoch": 0.76, "grad_norm": 1.388689996160774, "learning_rate": 1.4052013914419244e-06, "loss": 0.1911, "step": 9534 }, { "epoch": 0.76, "grad_norm": 1.179192419260396, "learning_rate": 1.404301071113106e-06, "loss": 0.1339, "step": 9535 }, { "epoch": 0.76, "grad_norm": 7.867951917332689, "learning_rate": 1.4034009921744534e-06, "loss": 0.4695, "step": 9536 }, { "epoch": 0.76, "grad_norm": 1.654643297565091, "learning_rate": 1.402501154686391e-06, "loss": 0.2051, "step": 9537 }, { "epoch": 0.76, "grad_norm": 8.967300197272294, "learning_rate": 1.4016015587093223e-06, "loss": 0.7145, "step": 9538 }, { "epoch": 0.76, "grad_norm": 6.30620331114358, "learning_rate": 1.4007022043036462e-06, "loss": 0.675, "step": 9539 }, { "epoch": 0.76, "grad_norm": 1.3238881189565663, "learning_rate": 1.3998030915297357e-06, "loss": 0.1653, "step": 9540 }, { "epoch": 0.76, "grad_norm": 1.3828199078702719, "learning_rate": 1.3989042204479524e-06, "loss": 0.1395, "step": 9541 }, { "epoch": 0.76, "grad_norm": 1.5020418118996313, "learning_rate": 1.3980055911186397e-06, "loss": 0.1489, "step": 9542 }, { "epoch": 0.76, "grad_norm": 1.2890011781073085, "learning_rate": 1.397107203602125e-06, "loss": 0.1746, "step": 9543 }, { "epoch": 0.76, "grad_norm": 1.4484678371927784, "learning_rate": 1.3962090579587185e-06, "loss": 0.1761, "step": 9544 }, { "epoch": 0.76, "grad_norm": 1.354060512818933, "learning_rate": 1.3953111542487202e-06, "loss": 0.1693, "step": 9545 }, { "epoch": 0.76, "grad_norm": 1.504551825013934, "learning_rate": 1.3944134925324042e-06, "loss": 0.1499, "step": 9546 }, { "epoch": 0.76, "grad_norm": 1.2296212074264161, "learning_rate": 1.393516072870033e-06, "loss": 0.1563, "step": 9547 }, { "epoch": 0.76, "grad_norm": 4.796733755882371, "learning_rate": 1.3926188953218588e-06, "loss": 0.4771, "step": 9548 }, { "epoch": 0.76, "grad_norm": 1.3670592063763816, "learning_rate": 1.3917219599481052e-06, "loss": 0.1639, "step": 9549 }, { "epoch": 0.76, "grad_norm": 1.292539834765507, "learning_rate": 1.39082526680899e-06, "loss": 0.1626, "step": 9550 }, { "epoch": 0.76, "grad_norm": 1.2594938858716314, "learning_rate": 1.38992881596471e-06, "loss": 0.149, "step": 9551 }, { "epoch": 0.76, "grad_norm": 1.544498350106149, "learning_rate": 1.3890326074754474e-06, "loss": 0.1728, "step": 9552 }, { "epoch": 0.76, "grad_norm": 1.330752902030664, "learning_rate": 1.3881366414013642e-06, "loss": 0.1527, "step": 9553 }, { "epoch": 0.76, "grad_norm": 1.435867482970378, "learning_rate": 1.3872409178026153e-06, "loss": 0.1653, "step": 9554 }, { "epoch": 0.76, "grad_norm": 1.392670813415072, "learning_rate": 1.386345436739328e-06, "loss": 0.1826, "step": 9555 }, { "epoch": 0.76, "grad_norm": 1.3935418335271457, "learning_rate": 1.3854501982716196e-06, "loss": 0.1946, "step": 9556 }, { "epoch": 0.76, "grad_norm": 5.949126955465833, "learning_rate": 1.3845552024595937e-06, "loss": 0.6535, "step": 9557 }, { "epoch": 0.76, "grad_norm": 1.635963701515663, "learning_rate": 1.3836604493633282e-06, "loss": 0.2358, "step": 9558 }, { "epoch": 0.76, "grad_norm": 1.4534131931170744, "learning_rate": 1.3827659390428954e-06, "loss": 0.2097, "step": 9559 }, { "epoch": 0.76, "grad_norm": 1.236820020529011, "learning_rate": 1.3818716715583452e-06, "loss": 0.1728, "step": 9560 }, { "epoch": 0.76, "grad_norm": 9.599596893025295, "learning_rate": 1.3809776469697117e-06, "loss": 0.6319, "step": 9561 }, { "epoch": 0.76, "grad_norm": 1.4500552908205495, "learning_rate": 1.380083865337014e-06, "loss": 0.1852, "step": 9562 }, { "epoch": 0.77, "grad_norm": 1.4283826583397312, "learning_rate": 1.379190326720255e-06, "loss": 0.1835, "step": 9563 }, { "epoch": 0.77, "grad_norm": 1.296572454013597, "learning_rate": 1.378297031179419e-06, "loss": 0.1424, "step": 9564 }, { "epoch": 0.77, "grad_norm": 1.5683992555739819, "learning_rate": 1.3774039787744776e-06, "loss": 0.1953, "step": 9565 }, { "epoch": 0.77, "grad_norm": 1.305474895223133, "learning_rate": 1.3765111695653827e-06, "loss": 0.1842, "step": 9566 }, { "epoch": 0.77, "grad_norm": 1.183909120786941, "learning_rate": 1.37561860361207e-06, "loss": 0.1534, "step": 9567 }, { "epoch": 0.77, "grad_norm": 1.5633094592245167, "learning_rate": 1.374726280974465e-06, "loss": 0.183, "step": 9568 }, { "epoch": 0.77, "grad_norm": 1.3819098969708152, "learning_rate": 1.3738342017124663e-06, "loss": 0.1498, "step": 9569 }, { "epoch": 0.77, "grad_norm": 1.3013755691246405, "learning_rate": 1.3729423658859654e-06, "loss": 0.1619, "step": 9570 }, { "epoch": 0.77, "grad_norm": 1.4299547381396105, "learning_rate": 1.3720507735548333e-06, "loss": 0.1668, "step": 9571 }, { "epoch": 0.77, "grad_norm": 1.383860121985393, "learning_rate": 1.371159424778925e-06, "loss": 0.1745, "step": 9572 }, { "epoch": 0.77, "grad_norm": 1.5092454863802427, "learning_rate": 1.3702683196180793e-06, "loss": 0.1581, "step": 9573 }, { "epoch": 0.77, "grad_norm": 1.244768949441808, "learning_rate": 1.3693774581321195e-06, "loss": 0.157, "step": 9574 }, { "epoch": 0.77, "grad_norm": 2.0406806728956774, "learning_rate": 1.368486840380851e-06, "loss": 0.1824, "step": 9575 }, { "epoch": 0.77, "grad_norm": 1.3862038264360002, "learning_rate": 1.3675964664240638e-06, "loss": 0.1584, "step": 9576 }, { "epoch": 0.77, "grad_norm": 6.580882455541988, "learning_rate": 1.3667063363215317e-06, "loss": 0.6504, "step": 9577 }, { "epoch": 0.77, "grad_norm": 1.5972889989994792, "learning_rate": 1.36581645013301e-06, "loss": 0.1904, "step": 9578 }, { "epoch": 0.77, "grad_norm": 1.3081239334781531, "learning_rate": 1.3649268079182443e-06, "loss": 0.1762, "step": 9579 }, { "epoch": 0.77, "grad_norm": 12.022901863252361, "learning_rate": 1.364037409736954e-06, "loss": 0.7905, "step": 9580 }, { "epoch": 0.77, "grad_norm": 1.4132776820530764, "learning_rate": 1.363148255648848e-06, "loss": 0.1875, "step": 9581 }, { "epoch": 0.77, "grad_norm": 1.3697041909350678, "learning_rate": 1.3622593457136213e-06, "loss": 0.2101, "step": 9582 }, { "epoch": 0.77, "grad_norm": 1.338149560813029, "learning_rate": 1.3613706799909437e-06, "loss": 0.1978, "step": 9583 }, { "epoch": 0.77, "grad_norm": 1.2971856484520121, "learning_rate": 1.3604822585404788e-06, "loss": 0.1923, "step": 9584 }, { "epoch": 0.77, "grad_norm": 1.3585148024154676, "learning_rate": 1.3595940814218668e-06, "loss": 0.1986, "step": 9585 }, { "epoch": 0.77, "grad_norm": 1.7303474405411523, "learning_rate": 1.3587061486947346e-06, "loss": 0.1748, "step": 9586 }, { "epoch": 0.77, "grad_norm": 6.489074911175372, "learning_rate": 1.3578184604186905e-06, "loss": 0.5244, "step": 9587 }, { "epoch": 0.77, "grad_norm": 1.3111334344209886, "learning_rate": 1.3569310166533296e-06, "loss": 0.1661, "step": 9588 }, { "epoch": 0.77, "grad_norm": 1.5057802914941214, "learning_rate": 1.3560438174582269e-06, "loss": 0.211, "step": 9589 }, { "epoch": 0.77, "grad_norm": 1.0799185766573356, "learning_rate": 1.3551568628929434e-06, "loss": 0.0989, "step": 9590 }, { "epoch": 0.77, "grad_norm": 6.407109771371685, "learning_rate": 1.3542701530170232e-06, "loss": 0.5072, "step": 9591 }, { "epoch": 0.77, "grad_norm": 1.2780794928594057, "learning_rate": 1.3533836878899926e-06, "loss": 0.1735, "step": 9592 }, { "epoch": 0.77, "grad_norm": 1.2807341333372013, "learning_rate": 1.3524974675713665e-06, "loss": 0.2034, "step": 9593 }, { "epoch": 0.77, "grad_norm": 1.5352565879390407, "learning_rate": 1.351611492120634e-06, "loss": 0.2073, "step": 9594 }, { "epoch": 0.77, "grad_norm": 1.3406718189532885, "learning_rate": 1.3507257615972779e-06, "loss": 0.1956, "step": 9595 }, { "epoch": 0.77, "grad_norm": 1.3768010480101185, "learning_rate": 1.3498402760607583e-06, "loss": 0.1795, "step": 9596 }, { "epoch": 0.77, "grad_norm": 1.3753312696708002, "learning_rate": 1.34895503557052e-06, "loss": 0.2164, "step": 9597 }, { "epoch": 0.77, "grad_norm": 1.3285053397933568, "learning_rate": 1.348070040185993e-06, "loss": 0.1853, "step": 9598 }, { "epoch": 0.77, "grad_norm": 18.219939954146714, "learning_rate": 1.3471852899665888e-06, "loss": 0.4674, "step": 9599 }, { "epoch": 0.77, "grad_norm": 1.5750489160314116, "learning_rate": 1.3463007849717035e-06, "loss": 0.1866, "step": 9600 }, { "epoch": 0.77, "grad_norm": 1.293972076259141, "learning_rate": 1.3454165252607155e-06, "loss": 0.1728, "step": 9601 }, { "epoch": 0.77, "grad_norm": 1.326216574248781, "learning_rate": 1.3445325108929925e-06, "loss": 0.1854, "step": 9602 }, { "epoch": 0.77, "grad_norm": 1.2888354948646372, "learning_rate": 1.3436487419278738e-06, "loss": 0.1601, "step": 9603 }, { "epoch": 0.77, "grad_norm": 1.221012335123318, "learning_rate": 1.3427652184246952e-06, "loss": 0.1541, "step": 9604 }, { "epoch": 0.77, "grad_norm": 1.2524904239281136, "learning_rate": 1.341881940442769e-06, "loss": 0.1479, "step": 9605 }, { "epoch": 0.77, "grad_norm": 1.4512552393785314, "learning_rate": 1.340998908041391e-06, "loss": 0.1569, "step": 9606 }, { "epoch": 0.77, "grad_norm": 1.3239944720160173, "learning_rate": 1.340116121279843e-06, "loss": 0.1569, "step": 9607 }, { "epoch": 0.77, "grad_norm": 1.3476387513839778, "learning_rate": 1.339233580217389e-06, "loss": 0.1866, "step": 9608 }, { "epoch": 0.77, "grad_norm": 1.3471928892707412, "learning_rate": 1.3383512849132763e-06, "loss": 0.1597, "step": 9609 }, { "epoch": 0.77, "grad_norm": 5.291681048339552, "learning_rate": 1.337469235426736e-06, "loss": 0.6777, "step": 9610 }, { "epoch": 0.77, "grad_norm": 1.375597595402478, "learning_rate": 1.336587431816983e-06, "loss": 0.1757, "step": 9611 }, { "epoch": 0.77, "grad_norm": 1.4699522996503478, "learning_rate": 1.3357058741432143e-06, "loss": 0.1737, "step": 9612 }, { "epoch": 0.77, "grad_norm": 1.335584692191746, "learning_rate": 1.334824562464615e-06, "loss": 0.1521, "step": 9613 }, { "epoch": 0.77, "grad_norm": 1.4958505138259426, "learning_rate": 1.3339434968403453e-06, "loss": 0.1985, "step": 9614 }, { "epoch": 0.77, "grad_norm": 1.3676360208857195, "learning_rate": 1.3330626773295579e-06, "loss": 0.1691, "step": 9615 }, { "epoch": 0.77, "grad_norm": 1.3925730442750044, "learning_rate": 1.3321821039913846e-06, "loss": 0.1521, "step": 9616 }, { "epoch": 0.77, "grad_norm": 1.6120207165975067, "learning_rate": 1.3313017768849368e-06, "loss": 0.2055, "step": 9617 }, { "epoch": 0.77, "grad_norm": 1.419546355501113, "learning_rate": 1.3304216960693184e-06, "loss": 0.1777, "step": 9618 }, { "epoch": 0.77, "grad_norm": 1.353822303456167, "learning_rate": 1.3295418616036093e-06, "loss": 0.159, "step": 9619 }, { "epoch": 0.77, "grad_norm": 1.0496577997395307, "learning_rate": 1.3286622735468764e-06, "loss": 0.1365, "step": 9620 }, { "epoch": 0.77, "grad_norm": 8.538599804232529, "learning_rate": 1.3277829319581682e-06, "loss": 0.465, "step": 9621 }, { "epoch": 0.77, "grad_norm": 1.6189600171400274, "learning_rate": 1.3269038368965186e-06, "loss": 0.2215, "step": 9622 }, { "epoch": 0.77, "grad_norm": 1.2736620650765598, "learning_rate": 1.326024988420942e-06, "loss": 0.1729, "step": 9623 }, { "epoch": 0.77, "grad_norm": 1.3582058906548065, "learning_rate": 1.325146386590443e-06, "loss": 0.1742, "step": 9624 }, { "epoch": 0.77, "grad_norm": 1.3683837501519065, "learning_rate": 1.3242680314639995e-06, "loss": 0.1732, "step": 9625 }, { "epoch": 0.77, "grad_norm": 1.3809632047167297, "learning_rate": 1.323389923100578e-06, "loss": 0.1954, "step": 9626 }, { "epoch": 0.77, "grad_norm": 1.272012411658825, "learning_rate": 1.3225120615591348e-06, "loss": 0.145, "step": 9627 }, { "epoch": 0.77, "grad_norm": 15.67671607290729, "learning_rate": 1.3216344468985952e-06, "loss": 0.6937, "step": 9628 }, { "epoch": 0.77, "grad_norm": 7.113232189526291, "learning_rate": 1.3207570791778824e-06, "loss": 0.4933, "step": 9629 }, { "epoch": 0.77, "grad_norm": 1.189196847889087, "learning_rate": 1.319879958455894e-06, "loss": 0.1366, "step": 9630 }, { "epoch": 0.77, "grad_norm": 1.3498062707465581, "learning_rate": 1.3190030847915148e-06, "loss": 0.1858, "step": 9631 }, { "epoch": 0.77, "grad_norm": 1.4802678123197617, "learning_rate": 1.3181264582436115e-06, "loss": 0.2463, "step": 9632 }, { "epoch": 0.77, "grad_norm": 19.39872033281013, "learning_rate": 1.3172500788710347e-06, "loss": 0.6008, "step": 9633 }, { "epoch": 0.77, "grad_norm": 1.5084644539602123, "learning_rate": 1.316373946732618e-06, "loss": 0.2255, "step": 9634 }, { "epoch": 0.77, "grad_norm": 1.3364142835123023, "learning_rate": 1.3154980618871793e-06, "loss": 0.175, "step": 9635 }, { "epoch": 0.77, "grad_norm": 6.435541684885667, "learning_rate": 1.3146224243935195e-06, "loss": 0.5252, "step": 9636 }, { "epoch": 0.77, "grad_norm": 1.336266437398066, "learning_rate": 1.3137470343104208e-06, "loss": 0.1925, "step": 9637 }, { "epoch": 0.77, "grad_norm": 1.3194209815748466, "learning_rate": 1.312871891696656e-06, "loss": 0.1894, "step": 9638 }, { "epoch": 0.77, "grad_norm": 1.3433903912664904, "learning_rate": 1.311996996610969e-06, "loss": 0.1727, "step": 9639 }, { "epoch": 0.77, "grad_norm": 1.3381210493808817, "learning_rate": 1.3111223491121e-06, "loss": 0.1484, "step": 9640 }, { "epoch": 0.77, "grad_norm": 1.4304298538470888, "learning_rate": 1.3102479492587662e-06, "loss": 0.1599, "step": 9641 }, { "epoch": 0.77, "grad_norm": 1.3436631237523706, "learning_rate": 1.3093737971096637e-06, "loss": 0.1715, "step": 9642 }, { "epoch": 0.77, "grad_norm": 1.3384039013836901, "learning_rate": 1.3084998927234816e-06, "loss": 0.1922, "step": 9643 }, { "epoch": 0.77, "grad_norm": 1.7619959023130334, "learning_rate": 1.3076262361588871e-06, "loss": 0.2137, "step": 9644 }, { "epoch": 0.77, "grad_norm": 1.4271910533463508, "learning_rate": 1.30675282747453e-06, "loss": 0.1542, "step": 9645 }, { "epoch": 0.77, "grad_norm": 1.7225465626074374, "learning_rate": 1.3058796667290446e-06, "loss": 0.1976, "step": 9646 }, { "epoch": 0.77, "grad_norm": 1.1662364861741215, "learning_rate": 1.3050067539810529e-06, "loss": 0.1558, "step": 9647 }, { "epoch": 0.77, "grad_norm": 1.3803226612548904, "learning_rate": 1.3041340892891508e-06, "loss": 0.1955, "step": 9648 }, { "epoch": 0.77, "grad_norm": 1.4167938145683001, "learning_rate": 1.3032616727119263e-06, "loss": 0.1851, "step": 9649 }, { "epoch": 0.77, "grad_norm": 1.3559721378665677, "learning_rate": 1.3023895043079476e-06, "loss": 0.1606, "step": 9650 }, { "epoch": 0.77, "grad_norm": 1.4106247093248743, "learning_rate": 1.3015175841357618e-06, "loss": 0.1945, "step": 9651 }, { "epoch": 0.77, "grad_norm": 1.3915104658500859, "learning_rate": 1.3006459122539083e-06, "loss": 0.1773, "step": 9652 }, { "epoch": 0.77, "grad_norm": 1.3949293004165577, "learning_rate": 1.2997744887209034e-06, "loss": 0.1424, "step": 9653 }, { "epoch": 0.77, "grad_norm": 1.498613016747376, "learning_rate": 1.2989033135952484e-06, "loss": 0.1665, "step": 9654 }, { "epoch": 0.77, "grad_norm": 1.3817191561852455, "learning_rate": 1.2980323869354277e-06, "loss": 0.1787, "step": 9655 }, { "epoch": 0.77, "grad_norm": 1.33106650852278, "learning_rate": 1.297161708799909e-06, "loss": 0.1853, "step": 9656 }, { "epoch": 0.77, "grad_norm": 1.326393210649177, "learning_rate": 1.2962912792471426e-06, "loss": 0.1527, "step": 9657 }, { "epoch": 0.77, "grad_norm": 1.2609976685904807, "learning_rate": 1.295421098335568e-06, "loss": 0.1604, "step": 9658 }, { "epoch": 0.77, "grad_norm": 1.3997698941181176, "learning_rate": 1.2945511661235977e-06, "loss": 0.1663, "step": 9659 }, { "epoch": 0.77, "grad_norm": 1.2754168893415605, "learning_rate": 1.2936814826696326e-06, "loss": 0.1487, "step": 9660 }, { "epoch": 0.77, "grad_norm": 1.2369092001936701, "learning_rate": 1.2928120480320627e-06, "loss": 0.1597, "step": 9661 }, { "epoch": 0.77, "grad_norm": 1.252003589635802, "learning_rate": 1.2919428622692499e-06, "loss": 0.1559, "step": 9662 }, { "epoch": 0.77, "grad_norm": 10.165678739915954, "learning_rate": 1.2910739254395482e-06, "loss": 0.5545, "step": 9663 }, { "epoch": 0.77, "grad_norm": 1.4924209069746268, "learning_rate": 1.290205237601292e-06, "loss": 0.2011, "step": 9664 }, { "epoch": 0.77, "grad_norm": 1.403136813357024, "learning_rate": 1.2893367988127986e-06, "loss": 0.177, "step": 9665 }, { "epoch": 0.77, "grad_norm": 1.3795896465290092, "learning_rate": 1.2884686091323684e-06, "loss": 0.1831, "step": 9666 }, { "epoch": 0.77, "grad_norm": 1.2436644503950356, "learning_rate": 1.2876006686182863e-06, "loss": 0.1508, "step": 9667 }, { "epoch": 0.77, "grad_norm": 1.3099909187940835, "learning_rate": 1.286732977328819e-06, "loss": 0.1717, "step": 9668 }, { "epoch": 0.77, "grad_norm": 5.293489586165324, "learning_rate": 1.2858655353222182e-06, "loss": 0.4487, "step": 9669 }, { "epoch": 0.77, "grad_norm": 1.4364614103042515, "learning_rate": 1.284998342656717e-06, "loss": 0.1828, "step": 9670 }, { "epoch": 0.77, "grad_norm": 1.3990358228027717, "learning_rate": 1.284131399390532e-06, "loss": 0.1775, "step": 9671 }, { "epoch": 0.77, "grad_norm": 1.48244109101915, "learning_rate": 1.283264705581867e-06, "loss": 0.2129, "step": 9672 }, { "epoch": 0.77, "grad_norm": 1.2635632079279266, "learning_rate": 1.2823982612889013e-06, "loss": 0.1806, "step": 9673 }, { "epoch": 0.77, "grad_norm": 1.5154657192724275, "learning_rate": 1.2815320665698056e-06, "loss": 0.2039, "step": 9674 }, { "epoch": 0.77, "grad_norm": 1.432365553539627, "learning_rate": 1.2806661214827286e-06, "loss": 0.1984, "step": 9675 }, { "epoch": 0.77, "grad_norm": 1.4054911432786705, "learning_rate": 1.2798004260858039e-06, "loss": 0.1841, "step": 9676 }, { "epoch": 0.77, "grad_norm": 1.4472745361627866, "learning_rate": 1.2789349804371486e-06, "loss": 0.2116, "step": 9677 }, { "epoch": 0.77, "grad_norm": 1.3531110065039962, "learning_rate": 1.2780697845948625e-06, "loss": 0.1603, "step": 9678 }, { "epoch": 0.77, "grad_norm": 1.3939290198745053, "learning_rate": 1.2772048386170284e-06, "loss": 0.1897, "step": 9679 }, { "epoch": 0.77, "grad_norm": 17.94111117270653, "learning_rate": 1.2763401425617134e-06, "loss": 0.7047, "step": 9680 }, { "epoch": 0.77, "grad_norm": 1.209768005446248, "learning_rate": 1.275475696486967e-06, "loss": 0.164, "step": 9681 }, { "epoch": 0.77, "grad_norm": 6.491920262071602, "learning_rate": 1.2746115004508198e-06, "loss": 0.4811, "step": 9682 }, { "epoch": 0.77, "grad_norm": 1.3630257227242328, "learning_rate": 1.273747554511292e-06, "loss": 0.1761, "step": 9683 }, { "epoch": 0.77, "grad_norm": 1.277766373987406, "learning_rate": 1.272883858726381e-06, "loss": 0.1563, "step": 9684 }, { "epoch": 0.77, "grad_norm": 1.1902572638906392, "learning_rate": 1.2720204131540693e-06, "loss": 0.1559, "step": 9685 }, { "epoch": 0.77, "grad_norm": 1.342748353602773, "learning_rate": 1.2711572178523224e-06, "loss": 0.188, "step": 9686 }, { "epoch": 0.77, "grad_norm": 1.2036780390289268, "learning_rate": 1.2702942728790897e-06, "loss": 0.1509, "step": 9687 }, { "epoch": 0.78, "grad_norm": 1.3068500263494836, "learning_rate": 1.2694315782923029e-06, "loss": 0.178, "step": 9688 }, { "epoch": 0.78, "grad_norm": 1.2930488902783386, "learning_rate": 1.2685691341498775e-06, "loss": 0.1767, "step": 9689 }, { "epoch": 0.78, "grad_norm": 1.143972226164976, "learning_rate": 1.2677069405097115e-06, "loss": 0.1349, "step": 9690 }, { "epoch": 0.78, "grad_norm": 1.2186527908945055, "learning_rate": 1.266844997429686e-06, "loss": 0.1859, "step": 9691 }, { "epoch": 0.78, "grad_norm": 1.4151349414493946, "learning_rate": 1.2659833049676695e-06, "loss": 0.1692, "step": 9692 }, { "epoch": 0.78, "grad_norm": 1.381076332409303, "learning_rate": 1.2651218631815043e-06, "loss": 0.1805, "step": 9693 }, { "epoch": 0.78, "grad_norm": 1.406152524967701, "learning_rate": 1.2642606721290262e-06, "loss": 0.1578, "step": 9694 }, { "epoch": 0.78, "grad_norm": 1.2651239677311, "learning_rate": 1.2633997318680496e-06, "loss": 0.1383, "step": 9695 }, { "epoch": 0.78, "grad_norm": 1.3895068288622148, "learning_rate": 1.2625390424563677e-06, "loss": 0.1769, "step": 9696 }, { "epoch": 0.78, "grad_norm": 1.3513493743654383, "learning_rate": 1.2616786039517653e-06, "loss": 0.1734, "step": 9697 }, { "epoch": 0.78, "grad_norm": 1.2204624576270215, "learning_rate": 1.2608184164120046e-06, "loss": 0.1546, "step": 9698 }, { "epoch": 0.78, "grad_norm": 1.462617742241187, "learning_rate": 1.2599584798948334e-06, "loss": 0.2129, "step": 9699 }, { "epoch": 0.78, "grad_norm": 1.2789824326871861, "learning_rate": 1.2590987944579808e-06, "loss": 0.1517, "step": 9700 }, { "epoch": 0.78, "grad_norm": 1.3958115021693471, "learning_rate": 1.2582393601591608e-06, "loss": 0.1753, "step": 9701 }, { "epoch": 0.78, "grad_norm": 10.14100832097093, "learning_rate": 1.2573801770560696e-06, "loss": 0.4916, "step": 9702 }, { "epoch": 0.78, "grad_norm": 1.38204250596385, "learning_rate": 1.2565212452063868e-06, "loss": 0.1638, "step": 9703 }, { "epoch": 0.78, "grad_norm": 1.1958446392121997, "learning_rate": 1.255662564667775e-06, "loss": 0.1915, "step": 9704 }, { "epoch": 0.78, "grad_norm": 1.3256701764725474, "learning_rate": 1.254804135497879e-06, "loss": 0.1576, "step": 9705 }, { "epoch": 0.78, "grad_norm": 7.326200893413067, "learning_rate": 1.2539459577543317e-06, "loss": 0.6434, "step": 9706 }, { "epoch": 0.78, "grad_norm": 1.3307907023325738, "learning_rate": 1.253088031494739e-06, "loss": 0.1665, "step": 9707 }, { "epoch": 0.78, "grad_norm": 1.1027352884085972, "learning_rate": 1.2522303567767013e-06, "loss": 0.1602, "step": 9708 }, { "epoch": 0.78, "grad_norm": 7.089395155964005, "learning_rate": 1.2513729336577952e-06, "loss": 0.5007, "step": 9709 }, { "epoch": 0.78, "grad_norm": 1.3625978555690292, "learning_rate": 1.2505157621955815e-06, "loss": 0.1835, "step": 9710 }, { "epoch": 0.78, "grad_norm": 9.794887340986408, "learning_rate": 1.2496588424476053e-06, "loss": 0.4645, "step": 9711 }, { "epoch": 0.78, "grad_norm": 1.3745107318293512, "learning_rate": 1.248802174471394e-06, "loss": 0.1886, "step": 9712 }, { "epoch": 0.78, "grad_norm": 1.4450027980655469, "learning_rate": 1.2479457583244586e-06, "loss": 0.1705, "step": 9713 }, { "epoch": 0.78, "grad_norm": 1.2559512424854085, "learning_rate": 1.247089594064293e-06, "loss": 0.1635, "step": 9714 }, { "epoch": 0.78, "grad_norm": 1.2897402455614253, "learning_rate": 1.2462336817483734e-06, "loss": 0.1601, "step": 9715 }, { "epoch": 0.78, "grad_norm": 11.159294875052488, "learning_rate": 1.2453780214341588e-06, "loss": 0.6042, "step": 9716 }, { "epoch": 0.78, "grad_norm": 1.1571272965770274, "learning_rate": 1.2445226131790965e-06, "loss": 0.1301, "step": 9717 }, { "epoch": 0.78, "grad_norm": 1.2995103041134548, "learning_rate": 1.243667457040607e-06, "loss": 0.1562, "step": 9718 }, { "epoch": 0.78, "grad_norm": 1.4025446666148669, "learning_rate": 1.2428125530761043e-06, "loss": 0.1692, "step": 9719 }, { "epoch": 0.78, "grad_norm": 1.2906886628993186, "learning_rate": 1.2419579013429795e-06, "loss": 0.163, "step": 9720 }, { "epoch": 0.78, "grad_norm": 1.279015857952301, "learning_rate": 1.2411035018986045e-06, "loss": 0.1545, "step": 9721 }, { "epoch": 0.78, "grad_norm": 1.3208093120753732, "learning_rate": 1.2402493548003418e-06, "loss": 0.1467, "step": 9722 }, { "epoch": 0.78, "grad_norm": 1.3797915765546418, "learning_rate": 1.2393954601055318e-06, "loss": 0.1683, "step": 9723 }, { "epoch": 0.78, "grad_norm": 1.334191449613719, "learning_rate": 1.2385418178714987e-06, "loss": 0.181, "step": 9724 }, { "epoch": 0.78, "grad_norm": 1.1788189523618704, "learning_rate": 1.2376884281555485e-06, "loss": 0.1477, "step": 9725 }, { "epoch": 0.78, "grad_norm": 1.3170985118720009, "learning_rate": 1.2368352910149761e-06, "loss": 0.1309, "step": 9726 }, { "epoch": 0.78, "grad_norm": 8.516135494720798, "learning_rate": 1.2359824065070496e-06, "loss": 0.5503, "step": 9727 }, { "epoch": 0.78, "grad_norm": 1.374627311459824, "learning_rate": 1.2351297746890306e-06, "loss": 0.1896, "step": 9728 }, { "epoch": 0.78, "grad_norm": 1.2960278679368211, "learning_rate": 1.2342773956181585e-06, "loss": 0.1693, "step": 9729 }, { "epoch": 0.78, "grad_norm": 1.4729670515988442, "learning_rate": 1.2334252693516512e-06, "loss": 0.1745, "step": 9730 }, { "epoch": 0.78, "grad_norm": 1.535430541928961, "learning_rate": 1.2325733959467196e-06, "loss": 0.2111, "step": 9731 }, { "epoch": 0.78, "grad_norm": 7.718949537900114, "learning_rate": 1.2317217754605503e-06, "loss": 0.5044, "step": 9732 }, { "epoch": 0.78, "grad_norm": 1.3396746144986362, "learning_rate": 1.2308704079503164e-06, "loss": 0.1516, "step": 9733 }, { "epoch": 0.78, "grad_norm": 1.1854988857431092, "learning_rate": 1.2300192934731713e-06, "loss": 0.1581, "step": 9734 }, { "epoch": 0.78, "grad_norm": 1.2666466863429922, "learning_rate": 1.229168432086254e-06, "loss": 0.1492, "step": 9735 }, { "epoch": 0.78, "grad_norm": 1.4410346665142189, "learning_rate": 1.2283178238466837e-06, "loss": 0.1903, "step": 9736 }, { "epoch": 0.78, "grad_norm": 1.3333431762939871, "learning_rate": 1.2274674688115684e-06, "loss": 0.1834, "step": 9737 }, { "epoch": 0.78, "grad_norm": 1.5080363678671707, "learning_rate": 1.2266173670379905e-06, "loss": 0.1497, "step": 9738 }, { "epoch": 0.78, "grad_norm": 1.4000910908927513, "learning_rate": 1.2257675185830203e-06, "loss": 0.1783, "step": 9739 }, { "epoch": 0.78, "grad_norm": 1.1189839561871457, "learning_rate": 1.224917923503715e-06, "loss": 0.1413, "step": 9740 }, { "epoch": 0.78, "grad_norm": 1.2571164045497523, "learning_rate": 1.2240685818571047e-06, "loss": 0.1747, "step": 9741 }, { "epoch": 0.78, "grad_norm": 4.82598303779388, "learning_rate": 1.2232194937002124e-06, "loss": 0.5132, "step": 9742 }, { "epoch": 0.78, "grad_norm": 1.4111242016095809, "learning_rate": 1.222370659090039e-06, "loss": 0.1898, "step": 9743 }, { "epoch": 0.78, "grad_norm": 1.5109913798176613, "learning_rate": 1.221522078083569e-06, "loss": 0.1949, "step": 9744 }, { "epoch": 0.78, "grad_norm": 10.931788468827273, "learning_rate": 1.2206737507377698e-06, "loss": 0.5002, "step": 9745 }, { "epoch": 0.78, "grad_norm": 1.2957869521268661, "learning_rate": 1.219825677109593e-06, "loss": 0.1992, "step": 9746 }, { "epoch": 0.78, "grad_norm": 1.4035660794505376, "learning_rate": 1.2189778572559718e-06, "loss": 0.1778, "step": 9747 }, { "epoch": 0.78, "grad_norm": 1.3973899103952812, "learning_rate": 1.2181302912338234e-06, "loss": 0.1895, "step": 9748 }, { "epoch": 0.78, "grad_norm": 1.3981419509518755, "learning_rate": 1.2172829791000468e-06, "loss": 0.1673, "step": 9749 }, { "epoch": 0.78, "grad_norm": 1.1885338700672288, "learning_rate": 1.2164359209115235e-06, "loss": 0.1487, "step": 9750 }, { "epoch": 0.78, "grad_norm": 1.3879392677242115, "learning_rate": 1.215589116725124e-06, "loss": 0.1629, "step": 9751 }, { "epoch": 0.78, "grad_norm": 1.210056448384112, "learning_rate": 1.2147425665976898e-06, "loss": 0.1561, "step": 9752 }, { "epoch": 0.78, "grad_norm": 1.3404829400958798, "learning_rate": 1.2138962705860574e-06, "loss": 0.1737, "step": 9753 }, { "epoch": 0.78, "grad_norm": 1.3718911425512998, "learning_rate": 1.2130502287470402e-06, "loss": 0.1721, "step": 9754 }, { "epoch": 0.78, "grad_norm": 1.3170201905848402, "learning_rate": 1.212204441137435e-06, "loss": 0.1674, "step": 9755 }, { "epoch": 0.78, "grad_norm": 1.596004767264219, "learning_rate": 1.2113589078140225e-06, "loss": 0.1427, "step": 9756 }, { "epoch": 0.78, "grad_norm": 1.781898949815977, "learning_rate": 1.2105136288335655e-06, "loss": 0.1551, "step": 9757 }, { "epoch": 0.78, "grad_norm": 1.3831569328733846, "learning_rate": 1.20966860425281e-06, "loss": 0.1462, "step": 9758 }, { "epoch": 0.78, "grad_norm": 1.364261201260296, "learning_rate": 1.2088238341284858e-06, "loss": 0.1862, "step": 9759 }, { "epoch": 0.78, "grad_norm": 1.6029419981000008, "learning_rate": 1.2079793185173045e-06, "loss": 0.201, "step": 9760 }, { "epoch": 0.78, "grad_norm": 1.3130531112834292, "learning_rate": 1.2071350574759589e-06, "loss": 0.1738, "step": 9761 }, { "epoch": 0.78, "grad_norm": 1.3129234346522478, "learning_rate": 1.2062910510611319e-06, "loss": 0.1852, "step": 9762 }, { "epoch": 0.78, "grad_norm": 1.4665037289324505, "learning_rate": 1.2054472993294796e-06, "loss": 0.1865, "step": 9763 }, { "epoch": 0.78, "grad_norm": 1.125969839204317, "learning_rate": 1.204603802337646e-06, "loss": 0.1419, "step": 9764 }, { "epoch": 0.78, "grad_norm": 1.2998347139224293, "learning_rate": 1.2037605601422614e-06, "loss": 0.1253, "step": 9765 }, { "epoch": 0.78, "grad_norm": 11.25177259854596, "learning_rate": 1.2029175727999298e-06, "loss": 0.4844, "step": 9766 }, { "epoch": 0.78, "grad_norm": 1.4175279570114816, "learning_rate": 1.2020748403672478e-06, "loss": 0.1926, "step": 9767 }, { "epoch": 0.78, "grad_norm": 1.690616535051294, "learning_rate": 1.2012323629007887e-06, "loss": 0.1847, "step": 9768 }, { "epoch": 0.78, "grad_norm": 1.6055183078216466, "learning_rate": 1.200390140457111e-06, "loss": 0.2277, "step": 9769 }, { "epoch": 0.78, "grad_norm": 1.3128020818922936, "learning_rate": 1.1995481730927538e-06, "loss": 0.1735, "step": 9770 }, { "epoch": 0.78, "grad_norm": 1.3757135722992135, "learning_rate": 1.1987064608642463e-06, "loss": 0.21, "step": 9771 }, { "epoch": 0.78, "grad_norm": 1.4778586265268383, "learning_rate": 1.1978650038280893e-06, "loss": 0.1625, "step": 9772 }, { "epoch": 0.78, "grad_norm": 1.500562524060788, "learning_rate": 1.1970238020407737e-06, "loss": 0.1979, "step": 9773 }, { "epoch": 0.78, "grad_norm": 1.2042070084911467, "learning_rate": 1.1961828555587756e-06, "loss": 0.1746, "step": 9774 }, { "epoch": 0.78, "grad_norm": 1.3520633651449392, "learning_rate": 1.1953421644385444e-06, "loss": 0.2057, "step": 9775 }, { "epoch": 0.78, "grad_norm": 1.5652978519860317, "learning_rate": 1.194501728736523e-06, "loss": 0.1698, "step": 9776 }, { "epoch": 0.78, "grad_norm": 1.3108565773116279, "learning_rate": 1.1936615485091307e-06, "loss": 0.1494, "step": 9777 }, { "epoch": 0.78, "grad_norm": 1.6544316856254455, "learning_rate": 1.1928216238127715e-06, "loss": 0.2057, "step": 9778 }, { "epoch": 0.78, "grad_norm": 5.9969959673169155, "learning_rate": 1.191981954703832e-06, "loss": 0.5536, "step": 9779 }, { "epoch": 0.78, "grad_norm": 1.3191856511542102, "learning_rate": 1.1911425412386811e-06, "loss": 0.1903, "step": 9780 }, { "epoch": 0.78, "grad_norm": 1.364585765194263, "learning_rate": 1.1903033834736726e-06, "loss": 0.1774, "step": 9781 }, { "epoch": 0.78, "grad_norm": 1.2470652865156833, "learning_rate": 1.1894644814651407e-06, "loss": 0.1394, "step": 9782 }, { "epoch": 0.78, "grad_norm": 1.441906404573823, "learning_rate": 1.188625835269403e-06, "loss": 0.1959, "step": 9783 }, { "epoch": 0.78, "grad_norm": 1.317836894345022, "learning_rate": 1.18778744494276e-06, "loss": 0.201, "step": 9784 }, { "epoch": 0.78, "grad_norm": 5.67963698826465, "learning_rate": 1.1869493105414999e-06, "loss": 0.5676, "step": 9785 }, { "epoch": 0.78, "grad_norm": 1.3632883445348702, "learning_rate": 1.186111432121882e-06, "loss": 0.1735, "step": 9786 }, { "epoch": 0.78, "grad_norm": 8.035546708199437, "learning_rate": 1.1852738097401605e-06, "loss": 0.5192, "step": 9787 }, { "epoch": 0.78, "grad_norm": 1.245154129589304, "learning_rate": 1.1844364434525667e-06, "loss": 0.1492, "step": 9788 }, { "epoch": 0.78, "grad_norm": 1.573357832333633, "learning_rate": 1.1835993333153156e-06, "loss": 0.1968, "step": 9789 }, { "epoch": 0.78, "grad_norm": 1.320261224224351, "learning_rate": 1.1827624793846037e-06, "loss": 0.1914, "step": 9790 }, { "epoch": 0.78, "grad_norm": 1.3701948222707918, "learning_rate": 1.181925881716613e-06, "loss": 0.1699, "step": 9791 }, { "epoch": 0.78, "grad_norm": 1.3611585384831602, "learning_rate": 1.1810895403675055e-06, "loss": 0.1646, "step": 9792 }, { "epoch": 0.78, "grad_norm": 1.5001972070458662, "learning_rate": 1.1802534553934286e-06, "loss": 0.1932, "step": 9793 }, { "epoch": 0.78, "grad_norm": 1.3276122673765458, "learning_rate": 1.1794176268505103e-06, "loss": 0.1761, "step": 9794 }, { "epoch": 0.78, "grad_norm": 1.309131040558233, "learning_rate": 1.1785820547948612e-06, "loss": 0.1415, "step": 9795 }, { "epoch": 0.78, "grad_norm": 1.3833571927434074, "learning_rate": 1.1777467392825797e-06, "loss": 0.1824, "step": 9796 }, { "epoch": 0.78, "grad_norm": 1.3789760450518598, "learning_rate": 1.1769116803697383e-06, "loss": 0.1957, "step": 9797 }, { "epoch": 0.78, "grad_norm": 1.5158437345390863, "learning_rate": 1.1760768781124005e-06, "loss": 0.1886, "step": 9798 }, { "epoch": 0.78, "grad_norm": 9.005585897188372, "learning_rate": 1.1752423325666096e-06, "loss": 0.5743, "step": 9799 }, { "epoch": 0.78, "grad_norm": 1.4842342645797546, "learning_rate": 1.1744080437883859e-06, "loss": 0.168, "step": 9800 }, { "epoch": 0.78, "grad_norm": 1.5296894005019765, "learning_rate": 1.1735740118337436e-06, "loss": 0.1935, "step": 9801 }, { "epoch": 0.78, "grad_norm": 1.3541812302831, "learning_rate": 1.172740236758671e-06, "loss": 0.1644, "step": 9802 }, { "epoch": 0.78, "grad_norm": 1.440122790678042, "learning_rate": 1.1719067186191423e-06, "loss": 0.1695, "step": 9803 }, { "epoch": 0.78, "grad_norm": 11.583215023448982, "learning_rate": 1.1710734574711147e-06, "loss": 0.7161, "step": 9804 }, { "epoch": 0.78, "grad_norm": 1.367271088658042, "learning_rate": 1.1702404533705264e-06, "loss": 0.1604, "step": 9805 }, { "epoch": 0.78, "grad_norm": 1.3359624408024706, "learning_rate": 1.1694077063732995e-06, "loss": 0.1684, "step": 9806 }, { "epoch": 0.78, "grad_norm": 1.46502154979226, "learning_rate": 1.168575216535342e-06, "loss": 0.1753, "step": 9807 }, { "epoch": 0.78, "grad_norm": 8.148531340240961, "learning_rate": 1.1677429839125376e-06, "loss": 0.6221, "step": 9808 }, { "epoch": 0.78, "grad_norm": 1.2114163514730203, "learning_rate": 1.1669110085607565e-06, "loss": 0.1579, "step": 9809 }, { "epoch": 0.78, "grad_norm": 1.351153092094994, "learning_rate": 1.166079290535856e-06, "loss": 0.1849, "step": 9810 }, { "epoch": 0.78, "grad_norm": 1.2988569914155577, "learning_rate": 1.1652478298936664e-06, "loss": 0.1487, "step": 9811 }, { "epoch": 0.78, "grad_norm": 1.467092991046771, "learning_rate": 1.1644166266900098e-06, "loss": 0.1576, "step": 9812 }, { "epoch": 0.79, "grad_norm": 1.4390196816106162, "learning_rate": 1.1635856809806866e-06, "loss": 0.1637, "step": 9813 }, { "epoch": 0.79, "grad_norm": 1.30118363739276, "learning_rate": 1.162754992821481e-06, "loss": 0.1408, "step": 9814 }, { "epoch": 0.79, "grad_norm": 1.2726260879024283, "learning_rate": 1.1619245622681575e-06, "loss": 0.1799, "step": 9815 }, { "epoch": 0.79, "grad_norm": 1.3693734420967312, "learning_rate": 1.1610943893764692e-06, "loss": 0.2109, "step": 9816 }, { "epoch": 0.79, "grad_norm": 1.38509827532211, "learning_rate": 1.160264474202145e-06, "loss": 0.1703, "step": 9817 }, { "epoch": 0.79, "grad_norm": 1.4405518033055595, "learning_rate": 1.1594348168008995e-06, "loss": 0.1805, "step": 9818 }, { "epoch": 0.79, "grad_norm": 1.3693944669054656, "learning_rate": 1.1586054172284338e-06, "loss": 0.1863, "step": 9819 }, { "epoch": 0.79, "grad_norm": 1.4491888048478812, "learning_rate": 1.1577762755404227e-06, "loss": 0.1608, "step": 9820 }, { "epoch": 0.79, "grad_norm": 1.506491004958339, "learning_rate": 1.156947391792533e-06, "loss": 0.2121, "step": 9821 }, { "epoch": 0.79, "grad_norm": 1.2687628177018353, "learning_rate": 1.1561187660404083e-06, "loss": 0.1733, "step": 9822 }, { "epoch": 0.79, "grad_norm": 1.286893964367683, "learning_rate": 1.1552903983396779e-06, "loss": 0.1729, "step": 9823 }, { "epoch": 0.79, "grad_norm": 1.364489297235388, "learning_rate": 1.1544622887459522e-06, "loss": 0.1923, "step": 9824 }, { "epoch": 0.79, "grad_norm": 1.4330355666731789, "learning_rate": 1.1536344373148245e-06, "loss": 0.1677, "step": 9825 }, { "epoch": 0.79, "grad_norm": 1.6660367364474653, "learning_rate": 1.1528068441018708e-06, "loss": 0.1853, "step": 9826 }, { "epoch": 0.79, "grad_norm": 1.313874155436797, "learning_rate": 1.15197950916265e-06, "loss": 0.1395, "step": 9827 }, { "epoch": 0.79, "grad_norm": 1.4798038758361267, "learning_rate": 1.151152432552704e-06, "loss": 0.1614, "step": 9828 }, { "epoch": 0.79, "grad_norm": 1.5400662740034599, "learning_rate": 1.1503256143275559e-06, "loss": 0.1863, "step": 9829 }, { "epoch": 0.79, "grad_norm": 1.4065515040691345, "learning_rate": 1.1494990545427153e-06, "loss": 0.2081, "step": 9830 }, { "epoch": 0.79, "grad_norm": 1.4361973341200527, "learning_rate": 1.1486727532536674e-06, "loss": 0.1514, "step": 9831 }, { "epoch": 0.79, "grad_norm": 1.4330070199656062, "learning_rate": 1.147846710515888e-06, "loss": 0.2242, "step": 9832 }, { "epoch": 0.79, "grad_norm": 1.405907560108396, "learning_rate": 1.147020926384832e-06, "loss": 0.1651, "step": 9833 }, { "epoch": 0.79, "grad_norm": 6.414990176268025, "learning_rate": 1.146195400915932e-06, "loss": 0.6261, "step": 9834 }, { "epoch": 0.79, "grad_norm": 1.3914045403464677, "learning_rate": 1.1453701341646134e-06, "loss": 0.1667, "step": 9835 }, { "epoch": 0.79, "grad_norm": 1.2805940094851456, "learning_rate": 1.1445451261862761e-06, "loss": 0.1473, "step": 9836 }, { "epoch": 0.79, "grad_norm": 1.4342410011455013, "learning_rate": 1.1437203770363064e-06, "loss": 0.2031, "step": 9837 }, { "epoch": 0.79, "grad_norm": 1.4119699327648056, "learning_rate": 1.1428958867700717e-06, "loss": 0.1668, "step": 9838 }, { "epoch": 0.79, "grad_norm": 10.996793088071932, "learning_rate": 1.1420716554429229e-06, "loss": 0.5711, "step": 9839 }, { "epoch": 0.79, "grad_norm": 1.1840580020234677, "learning_rate": 1.1412476831101916e-06, "loss": 0.1478, "step": 9840 }, { "epoch": 0.79, "grad_norm": 1.2660520782487996, "learning_rate": 1.1404239698271975e-06, "loss": 0.1792, "step": 9841 }, { "epoch": 0.79, "grad_norm": 1.4008899204196315, "learning_rate": 1.1396005156492346e-06, "loss": 0.1969, "step": 9842 }, { "epoch": 0.79, "grad_norm": 1.3411630258498726, "learning_rate": 1.1387773206315845e-06, "loss": 0.1985, "step": 9843 }, { "epoch": 0.79, "grad_norm": 1.3996694457080312, "learning_rate": 1.1379543848295149e-06, "loss": 0.1776, "step": 9844 }, { "epoch": 0.79, "grad_norm": 1.3719295176852393, "learning_rate": 1.1371317082982658e-06, "loss": 0.1841, "step": 9845 }, { "epoch": 0.79, "grad_norm": 1.3841330521808748, "learning_rate": 1.1363092910930707e-06, "loss": 0.1502, "step": 9846 }, { "epoch": 0.79, "grad_norm": 6.019650446786424, "learning_rate": 1.1354871332691396e-06, "loss": 0.5841, "step": 9847 }, { "epoch": 0.79, "grad_norm": 8.10280298369289, "learning_rate": 1.1346652348816662e-06, "loss": 0.467, "step": 9848 }, { "epoch": 0.79, "grad_norm": 10.241730860964395, "learning_rate": 1.1338435959858268e-06, "loss": 0.6076, "step": 9849 }, { "epoch": 0.79, "grad_norm": 8.50279421666206, "learning_rate": 1.133022216636781e-06, "loss": 0.4449, "step": 9850 }, { "epoch": 0.79, "grad_norm": 1.535252145004815, "learning_rate": 1.1322010968896708e-06, "loss": 0.1841, "step": 9851 }, { "epoch": 0.79, "grad_norm": 1.4591776923716469, "learning_rate": 1.131380236799619e-06, "loss": 0.2065, "step": 9852 }, { "epoch": 0.79, "grad_norm": 1.2376492482486345, "learning_rate": 1.1305596364217336e-06, "loss": 0.1808, "step": 9853 }, { "epoch": 0.79, "grad_norm": 1.4192122429504153, "learning_rate": 1.1297392958111025e-06, "loss": 0.1653, "step": 9854 }, { "epoch": 0.79, "grad_norm": 1.271300616045784, "learning_rate": 1.1289192150228007e-06, "loss": 0.152, "step": 9855 }, { "epoch": 0.79, "grad_norm": 1.4472716395399354, "learning_rate": 1.1280993941118805e-06, "loss": 0.1686, "step": 9856 }, { "epoch": 0.79, "grad_norm": 1.2862849271318963, "learning_rate": 1.1272798331333789e-06, "loss": 0.1412, "step": 9857 }, { "epoch": 0.79, "grad_norm": 5.9446450623625875, "learning_rate": 1.1264605321423167e-06, "loss": 0.5053, "step": 9858 }, { "epoch": 0.79, "grad_norm": 15.141549849846838, "learning_rate": 1.1256414911936952e-06, "loss": 0.5147, "step": 9859 }, { "epoch": 0.79, "grad_norm": 1.5087631801234787, "learning_rate": 1.124822710342499e-06, "loss": 0.1856, "step": 9860 }, { "epoch": 0.79, "grad_norm": 8.1730253870854, "learning_rate": 1.124004189643696e-06, "loss": 0.644, "step": 9861 }, { "epoch": 0.79, "grad_norm": 1.4861562021172667, "learning_rate": 1.123185929152235e-06, "loss": 0.1987, "step": 9862 }, { "epoch": 0.79, "grad_norm": 1.6112319318027872, "learning_rate": 1.1223679289230478e-06, "loss": 0.2063, "step": 9863 }, { "epoch": 0.79, "grad_norm": 1.1987742143599338, "learning_rate": 1.121550189011053e-06, "loss": 0.1346, "step": 9864 }, { "epoch": 0.79, "grad_norm": 1.40178869813193, "learning_rate": 1.1207327094711423e-06, "loss": 0.1582, "step": 9865 }, { "epoch": 0.79, "grad_norm": 10.533594816996713, "learning_rate": 1.1199154903582e-06, "loss": 0.5875, "step": 9866 }, { "epoch": 0.79, "grad_norm": 1.2835925162805852, "learning_rate": 1.119098531727087e-06, "loss": 0.1587, "step": 9867 }, { "epoch": 0.79, "grad_norm": 1.2994861647513538, "learning_rate": 1.1182818336326485e-06, "loss": 0.1718, "step": 9868 }, { "epoch": 0.79, "grad_norm": 8.557713741927612, "learning_rate": 1.1174653961297116e-06, "loss": 0.5139, "step": 9869 }, { "epoch": 0.79, "grad_norm": 1.3583825787438852, "learning_rate": 1.116649219273086e-06, "loss": 0.1791, "step": 9870 }, { "epoch": 0.79, "grad_norm": 1.4391856689933962, "learning_rate": 1.1158333031175643e-06, "loss": 0.1716, "step": 9871 }, { "epoch": 0.79, "grad_norm": 1.2922499001840557, "learning_rate": 1.115017647717922e-06, "loss": 0.1787, "step": 9872 }, { "epoch": 0.79, "grad_norm": 10.027893253493273, "learning_rate": 1.114202253128916e-06, "loss": 0.7926, "step": 9873 }, { "epoch": 0.79, "grad_norm": 1.2184271608164499, "learning_rate": 1.1133871194052843e-06, "loss": 0.1335, "step": 9874 }, { "epoch": 0.79, "grad_norm": 1.4356445500207249, "learning_rate": 1.1125722466017547e-06, "loss": 0.1895, "step": 9875 }, { "epoch": 0.79, "grad_norm": 1.4031336807382615, "learning_rate": 1.1117576347730253e-06, "loss": 0.208, "step": 9876 }, { "epoch": 0.79, "grad_norm": 1.3037425473228466, "learning_rate": 1.1109432839737883e-06, "loss": 0.1657, "step": 9877 }, { "epoch": 0.79, "grad_norm": 1.51971929251121, "learning_rate": 1.1101291942587133e-06, "loss": 0.2174, "step": 9878 }, { "epoch": 0.79, "grad_norm": 1.373883784449355, "learning_rate": 1.109315365682449e-06, "loss": 0.1868, "step": 9879 }, { "epoch": 0.79, "grad_norm": 1.41286035815426, "learning_rate": 1.1085017982996337e-06, "loss": 0.1658, "step": 9880 }, { "epoch": 0.79, "grad_norm": 1.1771025471141001, "learning_rate": 1.1076884921648834e-06, "loss": 0.1359, "step": 9881 }, { "epoch": 0.79, "grad_norm": 1.327708055429888, "learning_rate": 1.1068754473327975e-06, "loss": 0.1667, "step": 9882 }, { "epoch": 0.79, "grad_norm": 1.29914385240458, "learning_rate": 1.1060626638579591e-06, "loss": 0.1487, "step": 9883 }, { "epoch": 0.79, "grad_norm": 1.4481201333699947, "learning_rate": 1.1052501417949319e-06, "loss": 0.1784, "step": 9884 }, { "epoch": 0.79, "grad_norm": 20.053654196460826, "learning_rate": 1.1044378811982631e-06, "loss": 0.52, "step": 9885 }, { "epoch": 0.79, "grad_norm": 1.6351945014187972, "learning_rate": 1.1036258821224833e-06, "loss": 0.2039, "step": 9886 }, { "epoch": 0.79, "grad_norm": 1.290663199134415, "learning_rate": 1.1028141446221024e-06, "loss": 0.1377, "step": 9887 }, { "epoch": 0.79, "grad_norm": 1.390442926306491, "learning_rate": 1.1020026687516155e-06, "loss": 0.1798, "step": 9888 }, { "epoch": 0.79, "grad_norm": 1.3756236616506705, "learning_rate": 1.1011914545655023e-06, "loss": 0.1757, "step": 9889 }, { "epoch": 0.79, "grad_norm": 1.4165927348687577, "learning_rate": 1.1003805021182169e-06, "loss": 0.1818, "step": 9890 }, { "epoch": 0.79, "grad_norm": 4.794723577350158, "learning_rate": 1.099569811464205e-06, "loss": 0.5262, "step": 9891 }, { "epoch": 0.79, "grad_norm": 10.787289761416629, "learning_rate": 1.09875938265789e-06, "loss": 0.4017, "step": 9892 }, { "epoch": 0.79, "grad_norm": 1.1931108154160246, "learning_rate": 1.0979492157536774e-06, "loss": 0.1426, "step": 9893 }, { "epoch": 0.79, "grad_norm": 1.2358440075177703, "learning_rate": 1.0971393108059569e-06, "loss": 0.1707, "step": 9894 }, { "epoch": 0.79, "grad_norm": 1.2917390844122292, "learning_rate": 1.0963296678691e-06, "loss": 0.1548, "step": 9895 }, { "epoch": 0.79, "grad_norm": 1.5793335636325065, "learning_rate": 1.0955202869974597e-06, "loss": 0.2102, "step": 9896 }, { "epoch": 0.79, "grad_norm": 1.3941711111515733, "learning_rate": 1.0947111682453714e-06, "loss": 0.1696, "step": 9897 }, { "epoch": 0.79, "grad_norm": 1.5026409551203748, "learning_rate": 1.0939023116671577e-06, "loss": 0.1793, "step": 9898 }, { "epoch": 0.79, "grad_norm": 1.2197598431153427, "learning_rate": 1.093093717317114e-06, "loss": 0.1622, "step": 9899 }, { "epoch": 0.79, "grad_norm": 1.4246894183172512, "learning_rate": 1.092285385249528e-06, "loss": 0.1996, "step": 9900 }, { "epoch": 0.79, "grad_norm": 1.3001198652226, "learning_rate": 1.0914773155186641e-06, "loss": 0.1906, "step": 9901 }, { "epoch": 0.79, "grad_norm": 1.7084001017920598, "learning_rate": 1.09066950817877e-06, "loss": 0.1689, "step": 9902 }, { "epoch": 0.79, "grad_norm": 1.4909422922896476, "learning_rate": 1.0898619632840768e-06, "loss": 0.1887, "step": 9903 }, { "epoch": 0.79, "grad_norm": 1.5946068088880412, "learning_rate": 1.0890546808887976e-06, "loss": 0.1896, "step": 9904 }, { "epoch": 0.79, "grad_norm": 1.4047885579053343, "learning_rate": 1.088247661047127e-06, "loss": 0.1434, "step": 9905 }, { "epoch": 0.79, "grad_norm": 1.4935789187329487, "learning_rate": 1.0874409038132427e-06, "loss": 0.1525, "step": 9906 }, { "epoch": 0.79, "grad_norm": 1.1826451456761589, "learning_rate": 1.0866344092413056e-06, "loss": 0.1561, "step": 9907 }, { "epoch": 0.79, "grad_norm": 1.3058903123930754, "learning_rate": 1.0858281773854562e-06, "loss": 0.1762, "step": 9908 }, { "epoch": 0.79, "grad_norm": 1.358816848690121, "learning_rate": 1.0850222082998235e-06, "loss": 0.1791, "step": 9909 }, { "epoch": 0.79, "grad_norm": 6.6416147911387515, "learning_rate": 1.0842165020385092e-06, "loss": 0.5557, "step": 9910 }, { "epoch": 0.79, "grad_norm": 1.3240333403954518, "learning_rate": 1.0834110586556062e-06, "loss": 0.1414, "step": 9911 }, { "epoch": 0.79, "grad_norm": 17.731540954490463, "learning_rate": 1.082605878205188e-06, "loss": 0.5485, "step": 9912 }, { "epoch": 0.79, "grad_norm": 1.4424079952966151, "learning_rate": 1.0818009607413027e-06, "loss": 0.1795, "step": 9913 }, { "epoch": 0.79, "grad_norm": 1.1425857698632826, "learning_rate": 1.0809963063179923e-06, "loss": 0.1335, "step": 9914 }, { "epoch": 0.79, "grad_norm": 1.2819325096079859, "learning_rate": 1.0801919149892743e-06, "loss": 0.1564, "step": 9915 }, { "epoch": 0.79, "grad_norm": 1.3553206959507982, "learning_rate": 1.0793877868091496e-06, "loss": 0.1759, "step": 9916 }, { "epoch": 0.79, "grad_norm": 1.372575918424012, "learning_rate": 1.0785839218316012e-06, "loss": 0.1819, "step": 9917 }, { "epoch": 0.79, "grad_norm": 1.2436419482167722, "learning_rate": 1.0777803201105963e-06, "loss": 0.1467, "step": 9918 }, { "epoch": 0.79, "grad_norm": 1.1845850604167574, "learning_rate": 1.0769769817000802e-06, "loss": 0.1672, "step": 9919 }, { "epoch": 0.79, "grad_norm": 1.2066982636060177, "learning_rate": 1.0761739066539888e-06, "loss": 0.1707, "step": 9920 }, { "epoch": 0.79, "grad_norm": 1.4449870183064657, "learning_rate": 1.0753710950262303e-06, "loss": 0.2513, "step": 9921 }, { "epoch": 0.79, "grad_norm": 1.527841818275548, "learning_rate": 1.0745685468707002e-06, "loss": 0.1872, "step": 9922 }, { "epoch": 0.79, "grad_norm": 1.3175448272322143, "learning_rate": 1.0737662622412792e-06, "loss": 0.1823, "step": 9923 }, { "epoch": 0.79, "grad_norm": 16.282928289820504, "learning_rate": 1.0729642411918228e-06, "loss": 0.6064, "step": 9924 }, { "epoch": 0.79, "grad_norm": 6.365857240688093, "learning_rate": 1.0721624837761768e-06, "loss": 0.634, "step": 9925 }, { "epoch": 0.79, "grad_norm": 1.3304839967444, "learning_rate": 1.0713609900481642e-06, "loss": 0.1692, "step": 9926 }, { "epoch": 0.79, "grad_norm": 1.1987163480870806, "learning_rate": 1.0705597600615913e-06, "loss": 0.1473, "step": 9927 }, { "epoch": 0.79, "grad_norm": 1.3411165268945053, "learning_rate": 1.0697587938702474e-06, "loss": 0.1735, "step": 9928 }, { "epoch": 0.79, "grad_norm": 1.1573819944727495, "learning_rate": 1.0689580915279041e-06, "loss": 0.1238, "step": 9929 }, { "epoch": 0.79, "grad_norm": 1.328414404905445, "learning_rate": 1.0681576530883148e-06, "loss": 0.147, "step": 9930 }, { "epoch": 0.79, "grad_norm": 1.4592088754315973, "learning_rate": 1.0673574786052148e-06, "loss": 0.154, "step": 9931 }, { "epoch": 0.79, "grad_norm": 1.3097856303510884, "learning_rate": 1.0665575681323232e-06, "loss": 0.1732, "step": 9932 }, { "epoch": 0.79, "grad_norm": 1.1193675688775948, "learning_rate": 1.0657579217233384e-06, "loss": 0.1403, "step": 9933 }, { "epoch": 0.79, "grad_norm": 1.3504398478891557, "learning_rate": 1.0649585394319468e-06, "loss": 0.178, "step": 9934 }, { "epoch": 0.79, "grad_norm": 1.3841293253155458, "learning_rate": 1.064159421311809e-06, "loss": 0.1711, "step": 9935 }, { "epoch": 0.79, "grad_norm": 1.1729021763892495, "learning_rate": 1.0633605674165753e-06, "loss": 0.1535, "step": 9936 }, { "epoch": 0.79, "grad_norm": 5.944858378451434, "learning_rate": 1.0625619777998746e-06, "loss": 0.509, "step": 9937 }, { "epoch": 0.8, "grad_norm": 1.3850922834110508, "learning_rate": 1.061763652515318e-06, "loss": 0.1873, "step": 9938 }, { "epoch": 0.8, "grad_norm": 8.974479260329819, "learning_rate": 1.0609655916165001e-06, "loss": 0.5892, "step": 9939 }, { "epoch": 0.8, "grad_norm": 1.4145160287732046, "learning_rate": 1.0601677951569967e-06, "loss": 0.2047, "step": 9940 }, { "epoch": 0.8, "grad_norm": 1.2707893181561614, "learning_rate": 1.0593702631903668e-06, "loss": 0.1833, "step": 9941 }, { "epoch": 0.8, "grad_norm": 7.424826215870271, "learning_rate": 1.0585729957701491e-06, "loss": 0.5179, "step": 9942 }, { "epoch": 0.8, "grad_norm": 1.4135029807660044, "learning_rate": 1.0577759929498704e-06, "loss": 0.1705, "step": 9943 }, { "epoch": 0.8, "grad_norm": 1.1061780767478582, "learning_rate": 1.0569792547830321e-06, "loss": 0.1171, "step": 9944 }, { "epoch": 0.8, "grad_norm": 1.2590441478411794, "learning_rate": 1.056182781323124e-06, "loss": 0.1276, "step": 9945 }, { "epoch": 0.8, "grad_norm": 1.300748557697865, "learning_rate": 1.0553865726236162e-06, "loss": 0.1767, "step": 9946 }, { "epoch": 0.8, "grad_norm": 1.3675357273246527, "learning_rate": 1.0545906287379577e-06, "loss": 0.1714, "step": 9947 }, { "epoch": 0.8, "grad_norm": 5.791488917962926, "learning_rate": 1.053794949719585e-06, "loss": 0.5518, "step": 9948 }, { "epoch": 0.8, "grad_norm": 1.162905316454507, "learning_rate": 1.0529995356219136e-06, "loss": 0.1368, "step": 9949 }, { "epoch": 0.8, "grad_norm": 1.2995644358985834, "learning_rate": 1.0522043864983428e-06, "loss": 0.1695, "step": 9950 }, { "epoch": 0.8, "grad_norm": 1.3957546600893522, "learning_rate": 1.051409502402253e-06, "loss": 0.1885, "step": 9951 }, { "epoch": 0.8, "grad_norm": 1.2705138969544578, "learning_rate": 1.0506148833870066e-06, "loss": 0.1648, "step": 9952 }, { "epoch": 0.8, "grad_norm": 7.603592887565018, "learning_rate": 1.0498205295059482e-06, "loss": 0.4356, "step": 9953 }, { "epoch": 0.8, "grad_norm": 1.3285689063144732, "learning_rate": 1.049026440812409e-06, "loss": 0.1791, "step": 9954 }, { "epoch": 0.8, "grad_norm": 1.2284098677397233, "learning_rate": 1.0482326173596947e-06, "loss": 0.1476, "step": 9955 }, { "epoch": 0.8, "grad_norm": 1.1601105981174051, "learning_rate": 1.0474390592010963e-06, "loss": 0.138, "step": 9956 }, { "epoch": 0.8, "grad_norm": 1.3594060912168493, "learning_rate": 1.0466457663898927e-06, "loss": 0.195, "step": 9957 }, { "epoch": 0.8, "grad_norm": 1.2448690300506786, "learning_rate": 1.0458527389793339e-06, "loss": 0.157, "step": 9958 }, { "epoch": 0.8, "grad_norm": 1.3323220869448942, "learning_rate": 1.0450599770226627e-06, "loss": 0.1833, "step": 9959 }, { "epoch": 0.8, "grad_norm": 1.415787210757145, "learning_rate": 1.0442674805730986e-06, "loss": 0.1755, "step": 9960 }, { "epoch": 0.8, "grad_norm": 1.4543179219423301, "learning_rate": 1.0434752496838435e-06, "loss": 0.1736, "step": 9961 }, { "epoch": 0.8, "grad_norm": 1.4665996593384516, "learning_rate": 1.042683284408083e-06, "loss": 0.163, "step": 9962 }, { "epoch": 0.8, "grad_norm": 5.648706940443995, "learning_rate": 1.041891584798984e-06, "loss": 0.5727, "step": 9963 }, { "epoch": 0.8, "grad_norm": 1.394710071179995, "learning_rate": 1.0411001509096952e-06, "loss": 0.1741, "step": 9964 }, { "epoch": 0.8, "grad_norm": 1.4257600857208834, "learning_rate": 1.0403089827933482e-06, "loss": 0.1802, "step": 9965 }, { "epoch": 0.8, "grad_norm": 1.5760279842274405, "learning_rate": 1.039518080503057e-06, "loss": 0.1908, "step": 9966 }, { "epoch": 0.8, "grad_norm": 1.4366350849790221, "learning_rate": 1.0387274440919149e-06, "loss": 0.1747, "step": 9967 }, { "epoch": 0.8, "grad_norm": 1.6765152122595948, "learning_rate": 1.0379370736130045e-06, "loss": 0.1922, "step": 9968 }, { "epoch": 0.8, "grad_norm": 1.3085727482768688, "learning_rate": 1.0371469691193802e-06, "loss": 0.1601, "step": 9969 }, { "epoch": 0.8, "grad_norm": 1.2297706640886659, "learning_rate": 1.0363571306640885e-06, "loss": 0.1799, "step": 9970 }, { "epoch": 0.8, "grad_norm": 1.4477931536776054, "learning_rate": 1.0355675583001518e-06, "loss": 0.2076, "step": 9971 }, { "epoch": 0.8, "grad_norm": 1.5680284072089725, "learning_rate": 1.0347782520805767e-06, "loss": 0.1653, "step": 9972 }, { "epoch": 0.8, "grad_norm": 1.2855100083732538, "learning_rate": 1.0339892120583517e-06, "loss": 0.202, "step": 9973 }, { "epoch": 0.8, "grad_norm": 1.350768838295333, "learning_rate": 1.0332004382864475e-06, "loss": 0.1596, "step": 9974 }, { "epoch": 0.8, "grad_norm": 1.5135083309328234, "learning_rate": 1.0324119308178166e-06, "loss": 0.152, "step": 9975 }, { "epoch": 0.8, "grad_norm": 1.4230355166752526, "learning_rate": 1.0316236897053939e-06, "loss": 0.1704, "step": 9976 }, { "epoch": 0.8, "grad_norm": 1.3684835133805795, "learning_rate": 1.030835715002097e-06, "loss": 0.2232, "step": 9977 }, { "epoch": 0.8, "grad_norm": 1.3194609257669547, "learning_rate": 1.0300480067608232e-06, "loss": 0.1562, "step": 9978 }, { "epoch": 0.8, "grad_norm": 7.758528060597476, "learning_rate": 1.0292605650344578e-06, "loss": 0.4557, "step": 9979 }, { "epoch": 0.8, "grad_norm": 1.194294415865535, "learning_rate": 1.0284733898758587e-06, "loss": 0.1433, "step": 9980 }, { "epoch": 0.8, "grad_norm": 1.3540332969031237, "learning_rate": 1.0276864813378756e-06, "loss": 0.147, "step": 9981 }, { "epoch": 0.8, "grad_norm": 1.39919577735848, "learning_rate": 1.0268998394733364e-06, "loss": 0.1761, "step": 9982 }, { "epoch": 0.8, "grad_norm": 1.3030668727110852, "learning_rate": 1.0261134643350457e-06, "loss": 0.1695, "step": 9983 }, { "epoch": 0.8, "grad_norm": 1.485700178341435, "learning_rate": 1.0253273559758003e-06, "loss": 0.2108, "step": 9984 }, { "epoch": 0.8, "grad_norm": 1.61418828446539, "learning_rate": 1.0245415144483722e-06, "loss": 0.2075, "step": 9985 }, { "epoch": 0.8, "grad_norm": 1.3760858704228203, "learning_rate": 1.0237559398055175e-06, "loss": 0.1632, "step": 9986 }, { "epoch": 0.8, "grad_norm": 1.520507919029659, "learning_rate": 1.022970632099972e-06, "loss": 0.1939, "step": 9987 }, { "epoch": 0.8, "grad_norm": 1.3807956913923043, "learning_rate": 1.0221855913844613e-06, "loss": 0.1679, "step": 9988 }, { "epoch": 0.8, "grad_norm": 1.3203257908824977, "learning_rate": 1.021400817711682e-06, "loss": 0.1759, "step": 9989 }, { "epoch": 0.8, "grad_norm": 1.3288954024080573, "learning_rate": 1.020616311134321e-06, "loss": 0.1715, "step": 9990 }, { "epoch": 0.8, "grad_norm": 1.2957800879574517, "learning_rate": 1.0198320717050452e-06, "loss": 0.1428, "step": 9991 }, { "epoch": 0.8, "grad_norm": 1.5339538411519418, "learning_rate": 1.0190480994764994e-06, "loss": 0.1868, "step": 9992 }, { "epoch": 0.8, "grad_norm": 1.509352197383225, "learning_rate": 1.0182643945013182e-06, "loss": 0.1709, "step": 9993 }, { "epoch": 0.8, "grad_norm": 1.4521454106620841, "learning_rate": 1.0174809568321126e-06, "loss": 0.2399, "step": 9994 }, { "epoch": 0.8, "grad_norm": 1.408399137157275, "learning_rate": 1.016697786521476e-06, "loss": 0.1406, "step": 9995 }, { "epoch": 0.8, "grad_norm": 1.3154743899784178, "learning_rate": 1.0159148836219869e-06, "loss": 0.1396, "step": 9996 }, { "epoch": 0.8, "grad_norm": 8.258736788933737, "learning_rate": 1.0151322481862019e-06, "loss": 0.5629, "step": 9997 }, { "epoch": 0.8, "grad_norm": 6.510595066973942, "learning_rate": 1.0143498802666623e-06, "loss": 0.6855, "step": 9998 }, { "epoch": 0.8, "grad_norm": 1.2668513919324005, "learning_rate": 1.0135677799158938e-06, "loss": 0.1696, "step": 9999 }, { "epoch": 0.8, "grad_norm": 1.2965564196795851, "learning_rate": 1.012785947186397e-06, "loss": 0.1463, "step": 10000 }, { "epoch": 0.8, "grad_norm": 1.5113976463799976, "learning_rate": 1.012004382130659e-06, "loss": 0.1437, "step": 10001 }, { "epoch": 0.8, "grad_norm": 1.1422974617878314, "learning_rate": 1.0112230848011528e-06, "loss": 0.1376, "step": 10002 }, { "epoch": 0.8, "grad_norm": 1.431943433380309, "learning_rate": 1.0104420552503235e-06, "loss": 0.1876, "step": 10003 }, { "epoch": 0.8, "grad_norm": 1.2763291212059635, "learning_rate": 1.009661293530609e-06, "loss": 0.1762, "step": 10004 }, { "epoch": 0.8, "grad_norm": 1.4575440803545459, "learning_rate": 1.008880799694421e-06, "loss": 0.1702, "step": 10005 }, { "epoch": 0.8, "grad_norm": 1.2582510640421622, "learning_rate": 1.0081005737941584e-06, "loss": 0.1682, "step": 10006 }, { "epoch": 0.8, "grad_norm": 1.2908233987972584, "learning_rate": 1.007320615882199e-06, "loss": 0.172, "step": 10007 }, { "epoch": 0.8, "grad_norm": 1.2629518894701661, "learning_rate": 1.0065409260109039e-06, "loss": 0.1244, "step": 10008 }, { "epoch": 0.8, "grad_norm": 1.5314323875631555, "learning_rate": 1.0057615042326163e-06, "loss": 0.171, "step": 10009 }, { "epoch": 0.8, "grad_norm": 1.2880729335275378, "learning_rate": 1.0049823505996608e-06, "loss": 0.1936, "step": 10010 }, { "epoch": 0.8, "grad_norm": 1.5795944641399233, "learning_rate": 1.0042034651643445e-06, "loss": 0.1765, "step": 10011 }, { "epoch": 0.8, "grad_norm": 10.455117094432103, "learning_rate": 1.0034248479789554e-06, "loss": 0.6211, "step": 10012 }, { "epoch": 0.8, "grad_norm": 1.3365878827926418, "learning_rate": 1.002646499095768e-06, "loss": 0.1736, "step": 10013 }, { "epoch": 0.8, "grad_norm": 1.2650396275193536, "learning_rate": 1.0018684185670296e-06, "loss": 0.1282, "step": 10014 }, { "epoch": 0.8, "grad_norm": 1.6322753331810513, "learning_rate": 1.00109060644498e-06, "loss": 0.2102, "step": 10015 }, { "epoch": 0.8, "grad_norm": 1.4607266041436615, "learning_rate": 1.0003130627818346e-06, "loss": 0.1899, "step": 10016 }, { "epoch": 0.8, "grad_norm": 6.823361724112483, "learning_rate": 9.9953578762979e-07, "loss": 0.4014, "step": 10017 }, { "epoch": 0.8, "grad_norm": 1.4584880952681074, "learning_rate": 9.987587810410298e-07, "loss": 0.1789, "step": 10018 }, { "epoch": 0.8, "grad_norm": 7.346574596436633, "learning_rate": 9.979820430677163e-07, "loss": 0.5732, "step": 10019 }, { "epoch": 0.8, "grad_norm": 1.2625369222092133, "learning_rate": 9.972055737619935e-07, "loss": 0.1906, "step": 10020 }, { "epoch": 0.8, "grad_norm": 1.4600617952469055, "learning_rate": 9.96429373175989e-07, "loss": 0.195, "step": 10021 }, { "epoch": 0.8, "grad_norm": 1.3992841356793502, "learning_rate": 9.956534413618108e-07, "loss": 0.1985, "step": 10022 }, { "epoch": 0.8, "grad_norm": 1.546058854916641, "learning_rate": 9.948777783715479e-07, "loss": 0.1801, "step": 10023 }, { "epoch": 0.8, "grad_norm": 1.5179555295351908, "learning_rate": 9.941023842572766e-07, "loss": 0.1879, "step": 10024 }, { "epoch": 0.8, "grad_norm": 1.4755108028697184, "learning_rate": 9.933272590710508e-07, "loss": 0.2336, "step": 10025 }, { "epoch": 0.8, "grad_norm": 1.201938120011718, "learning_rate": 9.925524028649036e-07, "loss": 0.1374, "step": 10026 }, { "epoch": 0.8, "grad_norm": 1.3021580616389516, "learning_rate": 9.917778156908564e-07, "loss": 0.1742, "step": 10027 }, { "epoch": 0.8, "grad_norm": 1.3863431901738839, "learning_rate": 9.91003497600909e-07, "loss": 0.1751, "step": 10028 }, { "epoch": 0.8, "grad_norm": 1.2301073836730858, "learning_rate": 9.902294486470442e-07, "loss": 0.1635, "step": 10029 }, { "epoch": 0.8, "grad_norm": 1.373950681062246, "learning_rate": 9.89455668881225e-07, "loss": 0.1491, "step": 10030 }, { "epoch": 0.8, "grad_norm": 1.4945135817804187, "learning_rate": 9.886821583553985e-07, "loss": 0.1625, "step": 10031 }, { "epoch": 0.8, "grad_norm": 5.149791924314525, "learning_rate": 9.879089171214912e-07, "loss": 0.5265, "step": 10032 }, { "epoch": 0.8, "grad_norm": 1.5076205177859383, "learning_rate": 9.871359452314167e-07, "loss": 0.186, "step": 10033 }, { "epoch": 0.8, "grad_norm": 1.6314450456455643, "learning_rate": 9.86363242737064e-07, "loss": 0.2116, "step": 10034 }, { "epoch": 0.8, "grad_norm": 1.3856036412719313, "learning_rate": 9.855908096903055e-07, "loss": 0.1793, "step": 10035 }, { "epoch": 0.8, "grad_norm": 5.312065376778226, "learning_rate": 9.848186461430026e-07, "loss": 0.4693, "step": 10036 }, { "epoch": 0.8, "grad_norm": 1.2612520084481689, "learning_rate": 9.840467521469865e-07, "loss": 0.1321, "step": 10037 }, { "epoch": 0.8, "grad_norm": 1.4247847936515623, "learning_rate": 9.832751277540809e-07, "loss": 0.1658, "step": 10038 }, { "epoch": 0.8, "grad_norm": 1.4647215319895794, "learning_rate": 9.825037730160864e-07, "loss": 0.2035, "step": 10039 }, { "epoch": 0.8, "grad_norm": 1.301271780810483, "learning_rate": 9.81732687984786e-07, "loss": 0.1704, "step": 10040 }, { "epoch": 0.8, "grad_norm": 1.354442230069663, "learning_rate": 9.809618727119451e-07, "loss": 0.1937, "step": 10041 }, { "epoch": 0.8, "grad_norm": 1.37472021069647, "learning_rate": 9.801913272493114e-07, "loss": 0.1615, "step": 10042 }, { "epoch": 0.8, "grad_norm": 1.3594527216471728, "learning_rate": 9.79421051648613e-07, "loss": 0.193, "step": 10043 }, { "epoch": 0.8, "grad_norm": 1.3056440212367473, "learning_rate": 9.786510459615611e-07, "loss": 0.1443, "step": 10044 }, { "epoch": 0.8, "grad_norm": 1.3322938711358383, "learning_rate": 9.778813102398494e-07, "loss": 0.1767, "step": 10045 }, { "epoch": 0.8, "grad_norm": 6.368881752894228, "learning_rate": 9.771118445351496e-07, "loss": 0.6692, "step": 10046 }, { "epoch": 0.8, "grad_norm": 1.2688116061534769, "learning_rate": 9.76342648899124e-07, "loss": 0.1864, "step": 10047 }, { "epoch": 0.8, "grad_norm": 1.4286197167591534, "learning_rate": 9.755737233834045e-07, "loss": 0.1648, "step": 10048 }, { "epoch": 0.8, "grad_norm": 1.3696945301502303, "learning_rate": 9.748050680396159e-07, "loss": 0.1843, "step": 10049 }, { "epoch": 0.8, "grad_norm": 1.572372266645736, "learning_rate": 9.740366829193587e-07, "loss": 0.2255, "step": 10050 }, { "epoch": 0.8, "grad_norm": 1.348929924351992, "learning_rate": 9.732685680742176e-07, "loss": 0.1616, "step": 10051 }, { "epoch": 0.8, "grad_norm": 1.2850472358091638, "learning_rate": 9.725007235557576e-07, "loss": 0.1603, "step": 10052 }, { "epoch": 0.8, "grad_norm": 1.308562313068008, "learning_rate": 9.717331494155275e-07, "loss": 0.1769, "step": 10053 }, { "epoch": 0.8, "grad_norm": 1.1715487025446583, "learning_rate": 9.709658457050558e-07, "loss": 0.1374, "step": 10054 }, { "epoch": 0.8, "grad_norm": 1.4853189358539207, "learning_rate": 9.701988124758544e-07, "loss": 0.1848, "step": 10055 }, { "epoch": 0.8, "grad_norm": 1.4012689182217613, "learning_rate": 9.694320497794163e-07, "loss": 0.1625, "step": 10056 }, { "epoch": 0.8, "grad_norm": 1.204404334360558, "learning_rate": 9.686655576672155e-07, "loss": 0.1355, "step": 10057 }, { "epoch": 0.8, "grad_norm": 8.536848817875134, "learning_rate": 9.678993361907135e-07, "loss": 0.6657, "step": 10058 }, { "epoch": 0.8, "grad_norm": 1.446758772543423, "learning_rate": 9.671333854013425e-07, "loss": 0.1859, "step": 10059 }, { "epoch": 0.8, "grad_norm": 1.2761416967895671, "learning_rate": 9.663677053505283e-07, "loss": 0.1261, "step": 10060 }, { "epoch": 0.8, "grad_norm": 1.5326020254158912, "learning_rate": 9.656022960896726e-07, "loss": 0.1909, "step": 10061 }, { "epoch": 0.8, "grad_norm": 1.4577038129245006, "learning_rate": 9.64837157670156e-07, "loss": 0.1804, "step": 10062 }, { "epoch": 0.81, "grad_norm": 1.3703161050977117, "learning_rate": 9.640722901433481e-07, "loss": 0.1931, "step": 10063 }, { "epoch": 0.81, "grad_norm": 1.3940980140958328, "learning_rate": 9.633076935605956e-07, "loss": 0.1637, "step": 10064 }, { "epoch": 0.81, "grad_norm": 1.193801389969481, "learning_rate": 9.625433679732288e-07, "loss": 0.1409, "step": 10065 }, { "epoch": 0.81, "grad_norm": 1.4505685167036253, "learning_rate": 9.617793134325571e-07, "loss": 0.137, "step": 10066 }, { "epoch": 0.81, "grad_norm": 1.229713337947451, "learning_rate": 9.610155299898782e-07, "loss": 0.1791, "step": 10067 }, { "epoch": 0.81, "grad_norm": 1.3515869156981164, "learning_rate": 9.602520176964619e-07, "loss": 0.1714, "step": 10068 }, { "epoch": 0.81, "grad_norm": 1.360944203376149, "learning_rate": 9.594887766035688e-07, "loss": 0.1686, "step": 10069 }, { "epoch": 0.81, "grad_norm": 1.5328530078835172, "learning_rate": 9.587258067624373e-07, "loss": 0.2233, "step": 10070 }, { "epoch": 0.81, "grad_norm": 1.4136293820562986, "learning_rate": 9.579631082242846e-07, "loss": 0.1607, "step": 10071 }, { "epoch": 0.81, "grad_norm": 1.3307677882007, "learning_rate": 9.572006810403168e-07, "loss": 0.1765, "step": 10072 }, { "epoch": 0.81, "grad_norm": 1.388010675280717, "learning_rate": 9.564385252617164e-07, "loss": 0.1936, "step": 10073 }, { "epoch": 0.81, "grad_norm": 1.3439623839947026, "learning_rate": 9.556766409396494e-07, "loss": 0.1417, "step": 10074 }, { "epoch": 0.81, "grad_norm": 1.5410267554431705, "learning_rate": 9.549150281252633e-07, "loss": 0.1902, "step": 10075 }, { "epoch": 0.81, "grad_norm": 6.402278580065271, "learning_rate": 9.54153686869687e-07, "loss": 0.4114, "step": 10076 }, { "epoch": 0.81, "grad_norm": 6.942463613217658, "learning_rate": 9.533926172240327e-07, "loss": 0.5871, "step": 10077 }, { "epoch": 0.81, "grad_norm": 1.3959807496627432, "learning_rate": 9.526318192393924e-07, "loss": 0.1872, "step": 10078 }, { "epoch": 0.81, "grad_norm": 1.441370761589804, "learning_rate": 9.518712929668416e-07, "loss": 0.1716, "step": 10079 }, { "epoch": 0.81, "grad_norm": 1.3495230536321061, "learning_rate": 9.511110384574345e-07, "loss": 0.1727, "step": 10080 }, { "epoch": 0.81, "grad_norm": 1.2211091018447036, "learning_rate": 9.503510557622137e-07, "loss": 0.1621, "step": 10081 }, { "epoch": 0.81, "grad_norm": 1.1767406562933385, "learning_rate": 9.495913449321942e-07, "loss": 0.1516, "step": 10082 }, { "epoch": 0.81, "grad_norm": 1.2368803547707719, "learning_rate": 9.48831906018381e-07, "loss": 0.1582, "step": 10083 }, { "epoch": 0.81, "grad_norm": 1.2957876988295747, "learning_rate": 9.480727390717565e-07, "loss": 0.189, "step": 10084 }, { "epoch": 0.81, "grad_norm": 7.5991153851941915, "learning_rate": 9.473138441432855e-07, "loss": 0.5869, "step": 10085 }, { "epoch": 0.81, "grad_norm": 1.3022780964024205, "learning_rate": 9.465552212839158e-07, "loss": 0.1621, "step": 10086 }, { "epoch": 0.81, "grad_norm": 1.3377884186304294, "learning_rate": 9.457968705445753e-07, "loss": 0.1826, "step": 10087 }, { "epoch": 0.81, "grad_norm": 1.5249628537459352, "learning_rate": 9.45038791976175e-07, "loss": 0.2333, "step": 10088 }, { "epoch": 0.81, "grad_norm": 1.2496405454674895, "learning_rate": 9.442809856296059e-07, "loss": 0.1538, "step": 10089 }, { "epoch": 0.81, "grad_norm": 1.3712781912859877, "learning_rate": 9.435234515557434e-07, "loss": 0.187, "step": 10090 }, { "epoch": 0.81, "grad_norm": 8.712126387153397, "learning_rate": 9.427661898054402e-07, "loss": 0.5242, "step": 10091 }, { "epoch": 0.81, "grad_norm": 1.2821361773532347, "learning_rate": 9.420092004295389e-07, "loss": 0.1736, "step": 10092 }, { "epoch": 0.81, "grad_norm": 1.429774165106857, "learning_rate": 9.412524834788522e-07, "loss": 0.1767, "step": 10093 }, { "epoch": 0.81, "grad_norm": 1.3546507984119363, "learning_rate": 9.40496039004185e-07, "loss": 0.1891, "step": 10094 }, { "epoch": 0.81, "grad_norm": 1.3985843222252938, "learning_rate": 9.397398670563201e-07, "loss": 0.1662, "step": 10095 }, { "epoch": 0.81, "grad_norm": 1.3557829102545365, "learning_rate": 9.389839676860174e-07, "loss": 0.1807, "step": 10096 }, { "epoch": 0.81, "grad_norm": 1.2329748714711344, "learning_rate": 9.382283409440262e-07, "loss": 0.1717, "step": 10097 }, { "epoch": 0.81, "grad_norm": 1.179315096265657, "learning_rate": 9.374729868810739e-07, "loss": 0.1241, "step": 10098 }, { "epoch": 0.81, "grad_norm": 1.1497764550843674, "learning_rate": 9.367179055478681e-07, "loss": 0.1582, "step": 10099 }, { "epoch": 0.81, "grad_norm": 6.986894478010182, "learning_rate": 9.359630969951012e-07, "loss": 0.5949, "step": 10100 }, { "epoch": 0.81, "grad_norm": 1.2197150578858686, "learning_rate": 9.352085612734452e-07, "loss": 0.1889, "step": 10101 }, { "epoch": 0.81, "grad_norm": 1.3800968861366738, "learning_rate": 9.344542984335525e-07, "loss": 0.1823, "step": 10102 }, { "epoch": 0.81, "grad_norm": 6.4281243335426685, "learning_rate": 9.337003085260638e-07, "loss": 0.4373, "step": 10103 }, { "epoch": 0.81, "grad_norm": 5.526783397251375, "learning_rate": 9.329465916015923e-07, "loss": 0.482, "step": 10104 }, { "epoch": 0.81, "grad_norm": 1.4055324414121964, "learning_rate": 9.321931477107377e-07, "loss": 0.1495, "step": 10105 }, { "epoch": 0.81, "grad_norm": 1.4401525537341084, "learning_rate": 9.314399769040844e-07, "loss": 0.186, "step": 10106 }, { "epoch": 0.81, "grad_norm": 1.4846350846867904, "learning_rate": 9.306870792321903e-07, "loss": 0.1693, "step": 10107 }, { "epoch": 0.81, "grad_norm": 1.6982012014069987, "learning_rate": 9.299344547456035e-07, "loss": 0.1918, "step": 10108 }, { "epoch": 0.81, "grad_norm": 1.4553762991102994, "learning_rate": 9.291821034948484e-07, "loss": 0.2124, "step": 10109 }, { "epoch": 0.81, "grad_norm": 1.3083853785930153, "learning_rate": 9.284300255304329e-07, "loss": 0.1375, "step": 10110 }, { "epoch": 0.81, "grad_norm": 1.1610137892233543, "learning_rate": 9.276782209028446e-07, "loss": 0.1431, "step": 10111 }, { "epoch": 0.81, "grad_norm": 1.3412238319325342, "learning_rate": 9.269266896625584e-07, "loss": 0.1798, "step": 10112 }, { "epoch": 0.81, "grad_norm": 1.363693221338907, "learning_rate": 9.261754318600235e-07, "loss": 0.185, "step": 10113 }, { "epoch": 0.81, "grad_norm": 1.2262690995616388, "learning_rate": 9.254244475456742e-07, "loss": 0.1498, "step": 10114 }, { "epoch": 0.81, "grad_norm": 1.3229844619457494, "learning_rate": 9.246737367699287e-07, "loss": 0.1555, "step": 10115 }, { "epoch": 0.81, "grad_norm": 1.3770174025288464, "learning_rate": 9.239232995831815e-07, "loss": 0.1969, "step": 10116 }, { "epoch": 0.81, "grad_norm": 1.4644121366983032, "learning_rate": 9.231731360358137e-07, "loss": 0.2008, "step": 10117 }, { "epoch": 0.81, "grad_norm": 5.9286313986876635, "learning_rate": 9.224232461781863e-07, "loss": 0.6244, "step": 10118 }, { "epoch": 0.81, "grad_norm": 1.3478104728444238, "learning_rate": 9.216736300606405e-07, "loss": 0.1453, "step": 10119 }, { "epoch": 0.81, "grad_norm": 9.062679257046323, "learning_rate": 9.209242877335006e-07, "loss": 0.5292, "step": 10120 }, { "epoch": 0.81, "grad_norm": 1.3631776764650816, "learning_rate": 9.201752192470725e-07, "loss": 0.1752, "step": 10121 }, { "epoch": 0.81, "grad_norm": 1.4125811182196126, "learning_rate": 9.194264246516438e-07, "loss": 0.1655, "step": 10122 }, { "epoch": 0.81, "grad_norm": 1.2692478886240817, "learning_rate": 9.186779039974825e-07, "loss": 0.1667, "step": 10123 }, { "epoch": 0.81, "grad_norm": 1.1785626696055125, "learning_rate": 9.179296573348389e-07, "loss": 0.1505, "step": 10124 }, { "epoch": 0.81, "grad_norm": 1.1580569792355342, "learning_rate": 9.171816847139447e-07, "loss": 0.1788, "step": 10125 }, { "epoch": 0.81, "grad_norm": 1.2877269136183882, "learning_rate": 9.164339861850175e-07, "loss": 0.1397, "step": 10126 }, { "epoch": 0.81, "grad_norm": 1.4043346446213893, "learning_rate": 9.156865617982463e-07, "loss": 0.1709, "step": 10127 }, { "epoch": 0.81, "grad_norm": 1.3162552386435677, "learning_rate": 9.14939411603813e-07, "loss": 0.1602, "step": 10128 }, { "epoch": 0.81, "grad_norm": 1.4356209036912964, "learning_rate": 9.141925356518738e-07, "loss": 0.1797, "step": 10129 }, { "epoch": 0.81, "grad_norm": 9.405150612804565, "learning_rate": 9.134459339925694e-07, "loss": 0.5443, "step": 10130 }, { "epoch": 0.81, "grad_norm": 1.1413983824023846, "learning_rate": 9.126996066760213e-07, "loss": 0.1306, "step": 10131 }, { "epoch": 0.81, "grad_norm": 1.2461612744569732, "learning_rate": 9.119535537523322e-07, "loss": 0.1513, "step": 10132 }, { "epoch": 0.81, "grad_norm": 1.2036110814627825, "learning_rate": 9.112077752715876e-07, "loss": 0.1673, "step": 10133 }, { "epoch": 0.81, "grad_norm": 1.3496340584246618, "learning_rate": 9.104622712838535e-07, "loss": 0.1901, "step": 10134 }, { "epoch": 0.81, "grad_norm": 1.2153105576874768, "learning_rate": 9.097170418391782e-07, "loss": 0.1486, "step": 10135 }, { "epoch": 0.81, "grad_norm": 1.29087728148871, "learning_rate": 9.089720869875896e-07, "loss": 0.1762, "step": 10136 }, { "epoch": 0.81, "grad_norm": 1.1990201679849206, "learning_rate": 9.082274067791025e-07, "loss": 0.1723, "step": 10137 }, { "epoch": 0.81, "grad_norm": 1.4442979123706765, "learning_rate": 9.074830012637059e-07, "loss": 0.179, "step": 10138 }, { "epoch": 0.81, "grad_norm": 1.3194447894321615, "learning_rate": 9.067388704913743e-07, "loss": 0.179, "step": 10139 }, { "epoch": 0.81, "grad_norm": 1.3310475818763872, "learning_rate": 9.059950145120666e-07, "loss": 0.151, "step": 10140 }, { "epoch": 0.81, "grad_norm": 1.4155127187803922, "learning_rate": 9.052514333757156e-07, "loss": 0.1987, "step": 10141 }, { "epoch": 0.81, "grad_norm": 1.4405919859424205, "learning_rate": 9.045081271322437e-07, "loss": 0.1662, "step": 10142 }, { "epoch": 0.81, "grad_norm": 6.9126493216228955, "learning_rate": 9.037650958315497e-07, "loss": 0.551, "step": 10143 }, { "epoch": 0.81, "grad_norm": 1.5951300933305457, "learning_rate": 9.03022339523516e-07, "loss": 0.1929, "step": 10144 }, { "epoch": 0.81, "grad_norm": 1.1274138164519414, "learning_rate": 9.022798582580067e-07, "loss": 0.1467, "step": 10145 }, { "epoch": 0.81, "grad_norm": 1.5903364046697648, "learning_rate": 9.015376520848651e-07, "loss": 0.1447, "step": 10146 }, { "epoch": 0.81, "grad_norm": 1.3020114522763622, "learning_rate": 9.007957210539197e-07, "loss": 0.188, "step": 10147 }, { "epoch": 0.81, "grad_norm": 1.565293678684902, "learning_rate": 9.00054065214977e-07, "loss": 0.1723, "step": 10148 }, { "epoch": 0.81, "grad_norm": 1.389178217662915, "learning_rate": 8.993126846178274e-07, "loss": 0.1911, "step": 10149 }, { "epoch": 0.81, "grad_norm": 1.3146572208404759, "learning_rate": 8.985715793122407e-07, "loss": 0.1579, "step": 10150 }, { "epoch": 0.81, "grad_norm": 1.3014947288220622, "learning_rate": 8.978307493479732e-07, "loss": 0.1649, "step": 10151 }, { "epoch": 0.81, "grad_norm": 1.5167848691754688, "learning_rate": 8.97090194774754e-07, "loss": 0.1892, "step": 10152 }, { "epoch": 0.81, "grad_norm": 1.3089259110386358, "learning_rate": 8.963499156423028e-07, "loss": 0.191, "step": 10153 }, { "epoch": 0.81, "grad_norm": 1.5133426054415562, "learning_rate": 8.956099120003153e-07, "loss": 0.2266, "step": 10154 }, { "epoch": 0.81, "grad_norm": 1.6346932047428013, "learning_rate": 8.948701838984702e-07, "loss": 0.1675, "step": 10155 }, { "epoch": 0.81, "grad_norm": 1.2817442343581882, "learning_rate": 8.941307313864278e-07, "loss": 0.15, "step": 10156 }, { "epoch": 0.81, "grad_norm": 1.232264931871029, "learning_rate": 8.933915545138294e-07, "loss": 0.1648, "step": 10157 }, { "epoch": 0.81, "grad_norm": 1.5466996781683333, "learning_rate": 8.926526533302987e-07, "loss": 0.2042, "step": 10158 }, { "epoch": 0.81, "grad_norm": 1.343454333888892, "learning_rate": 8.919140278854388e-07, "loss": 0.1724, "step": 10159 }, { "epoch": 0.81, "grad_norm": 9.11117693645915, "learning_rate": 8.911756782288394e-07, "loss": 0.4436, "step": 10160 }, { "epoch": 0.81, "grad_norm": 1.3681415721076284, "learning_rate": 8.904376044100638e-07, "loss": 0.1708, "step": 10161 }, { "epoch": 0.81, "grad_norm": 1.2694094600991526, "learning_rate": 8.896998064786644e-07, "loss": 0.1422, "step": 10162 }, { "epoch": 0.81, "grad_norm": 1.5185317545157733, "learning_rate": 8.889622844841711e-07, "loss": 0.19, "step": 10163 }, { "epoch": 0.81, "grad_norm": 1.4643123021737832, "learning_rate": 8.882250384760949e-07, "loss": 0.2178, "step": 10164 }, { "epoch": 0.81, "grad_norm": 1.2730700245486566, "learning_rate": 8.874880685039305e-07, "loss": 0.1494, "step": 10165 }, { "epoch": 0.81, "grad_norm": 1.3224266243528155, "learning_rate": 8.867513746171525e-07, "loss": 0.168, "step": 10166 }, { "epoch": 0.81, "grad_norm": 1.3743402053642682, "learning_rate": 8.860149568652177e-07, "loss": 0.1879, "step": 10167 }, { "epoch": 0.81, "grad_norm": 1.3521331636649152, "learning_rate": 8.852788152975627e-07, "loss": 0.1714, "step": 10168 }, { "epoch": 0.81, "grad_norm": 1.3785988865360714, "learning_rate": 8.845429499636088e-07, "loss": 0.1745, "step": 10169 }, { "epoch": 0.81, "grad_norm": 1.2955353682641584, "learning_rate": 8.838073609127546e-07, "loss": 0.1757, "step": 10170 }, { "epoch": 0.81, "grad_norm": 1.5032068541606938, "learning_rate": 8.830720481943861e-07, "loss": 0.1771, "step": 10171 }, { "epoch": 0.81, "grad_norm": 1.4066557829522546, "learning_rate": 8.823370118578628e-07, "loss": 0.1836, "step": 10172 }, { "epoch": 0.81, "grad_norm": 6.832307198834741, "learning_rate": 8.816022519525325e-07, "loss": 0.6178, "step": 10173 }, { "epoch": 0.81, "grad_norm": 1.3504139353560576, "learning_rate": 8.808677685277228e-07, "loss": 0.1527, "step": 10174 }, { "epoch": 0.81, "grad_norm": 1.4863088404845417, "learning_rate": 8.801335616327378e-07, "loss": 0.1578, "step": 10175 }, { "epoch": 0.81, "grad_norm": 1.3123930284380871, "learning_rate": 8.793996313168713e-07, "loss": 0.1767, "step": 10176 }, { "epoch": 0.81, "grad_norm": 1.4173868503259681, "learning_rate": 8.786659776293915e-07, "loss": 0.191, "step": 10177 }, { "epoch": 0.81, "grad_norm": 1.3300662074215983, "learning_rate": 8.779326006195521e-07, "loss": 0.1803, "step": 10178 }, { "epoch": 0.81, "grad_norm": 1.432763562212586, "learning_rate": 8.771995003365869e-07, "loss": 0.1822, "step": 10179 }, { "epoch": 0.81, "grad_norm": 1.581227949084694, "learning_rate": 8.764666768297108e-07, "loss": 0.1851, "step": 10180 }, { "epoch": 0.81, "grad_norm": 1.1725873961372244, "learning_rate": 8.757341301481187e-07, "loss": 0.1403, "step": 10181 }, { "epoch": 0.81, "grad_norm": 1.314622327606304, "learning_rate": 8.750018603409937e-07, "loss": 0.1239, "step": 10182 }, { "epoch": 0.81, "grad_norm": 1.3602771488132364, "learning_rate": 8.742698674574901e-07, "loss": 0.1369, "step": 10183 }, { "epoch": 0.81, "grad_norm": 1.2930471104708752, "learning_rate": 8.7353815154675e-07, "loss": 0.1685, "step": 10184 }, { "epoch": 0.81, "grad_norm": 8.320414040092015, "learning_rate": 8.728067126578988e-07, "loss": 0.5274, "step": 10185 }, { "epoch": 0.81, "grad_norm": 1.566157736565579, "learning_rate": 8.720755508400358e-07, "loss": 0.1887, "step": 10186 }, { "epoch": 0.81, "grad_norm": 1.2926427349554652, "learning_rate": 8.713446661422487e-07, "loss": 0.1705, "step": 10187 }, { "epoch": 0.82, "grad_norm": 1.3268460563198965, "learning_rate": 8.706140586136042e-07, "loss": 0.1732, "step": 10188 }, { "epoch": 0.82, "grad_norm": 1.2833558655714281, "learning_rate": 8.698837283031486e-07, "loss": 0.1883, "step": 10189 }, { "epoch": 0.82, "grad_norm": 1.4524148890269948, "learning_rate": 8.691536752599128e-07, "loss": 0.2212, "step": 10190 }, { "epoch": 0.82, "grad_norm": 1.2532004466690323, "learning_rate": 8.684238995329064e-07, "loss": 0.1541, "step": 10191 }, { "epoch": 0.82, "grad_norm": 1.4253423445528368, "learning_rate": 8.676944011711219e-07, "loss": 0.146, "step": 10192 }, { "epoch": 0.82, "grad_norm": 6.350281275742027, "learning_rate": 8.66965180223533e-07, "loss": 0.5016, "step": 10193 }, { "epoch": 0.82, "grad_norm": 1.4374510606643063, "learning_rate": 8.662362367390936e-07, "loss": 0.1757, "step": 10194 }, { "epoch": 0.82, "grad_norm": 1.380235153998684, "learning_rate": 8.655075707667399e-07, "loss": 0.179, "step": 10195 }, { "epoch": 0.82, "grad_norm": 1.4426369974105007, "learning_rate": 8.647791823553903e-07, "loss": 0.2063, "step": 10196 }, { "epoch": 0.82, "grad_norm": 1.2096274356416226, "learning_rate": 8.640510715539441e-07, "loss": 0.1143, "step": 10197 }, { "epoch": 0.82, "grad_norm": 1.3078016981872718, "learning_rate": 8.633232384112805e-07, "loss": 0.188, "step": 10198 }, { "epoch": 0.82, "grad_norm": 1.1304389196037985, "learning_rate": 8.625956829762616e-07, "loss": 0.1402, "step": 10199 }, { "epoch": 0.82, "grad_norm": 8.295331212149966, "learning_rate": 8.618684052977305e-07, "loss": 0.6101, "step": 10200 }, { "epoch": 0.82, "grad_norm": 14.217922861346684, "learning_rate": 8.611414054245115e-07, "loss": 0.8411, "step": 10201 }, { "epoch": 0.82, "grad_norm": 1.3834796284681978, "learning_rate": 8.6041468340541e-07, "loss": 0.1693, "step": 10202 }, { "epoch": 0.82, "grad_norm": 1.5542061501098994, "learning_rate": 8.596882392892131e-07, "loss": 0.1859, "step": 10203 }, { "epoch": 0.82, "grad_norm": 6.126338155061678, "learning_rate": 8.589620731246883e-07, "loss": 0.5776, "step": 10204 }, { "epoch": 0.82, "grad_norm": 1.476175690080442, "learning_rate": 8.582361849605891e-07, "loss": 0.209, "step": 10205 }, { "epoch": 0.82, "grad_norm": 1.2925080028539806, "learning_rate": 8.575105748456408e-07, "loss": 0.1711, "step": 10206 }, { "epoch": 0.82, "grad_norm": 1.4374310240949202, "learning_rate": 8.567852428285606e-07, "loss": 0.1916, "step": 10207 }, { "epoch": 0.82, "grad_norm": 1.3711472107200957, "learning_rate": 8.560601889580416e-07, "loss": 0.1379, "step": 10208 }, { "epoch": 0.82, "grad_norm": 1.4185831402537163, "learning_rate": 8.553354132827552e-07, "loss": 0.1811, "step": 10209 }, { "epoch": 0.82, "grad_norm": 1.284024720252546, "learning_rate": 8.546109158513615e-07, "loss": 0.1266, "step": 10210 }, { "epoch": 0.82, "grad_norm": 1.3846411167175223, "learning_rate": 8.53886696712497e-07, "loss": 0.1641, "step": 10211 }, { "epoch": 0.82, "grad_norm": 1.3589856101357376, "learning_rate": 8.531627559147809e-07, "loss": 0.159, "step": 10212 }, { "epoch": 0.82, "grad_norm": 1.2942546956250405, "learning_rate": 8.524390935068133e-07, "loss": 0.1649, "step": 10213 }, { "epoch": 0.82, "grad_norm": 1.4209052169030074, "learning_rate": 8.517157095371764e-07, "loss": 0.1829, "step": 10214 }, { "epoch": 0.82, "grad_norm": 1.528574988377464, "learning_rate": 8.509926040544308e-07, "loss": 0.1897, "step": 10215 }, { "epoch": 0.82, "grad_norm": 1.4277434223581493, "learning_rate": 8.502697771071255e-07, "loss": 0.1623, "step": 10216 }, { "epoch": 0.82, "grad_norm": 1.3526510814419397, "learning_rate": 8.495472287437823e-07, "loss": 0.1592, "step": 10217 }, { "epoch": 0.82, "grad_norm": 5.201078939685123, "learning_rate": 8.488249590129078e-07, "loss": 0.5186, "step": 10218 }, { "epoch": 0.82, "grad_norm": 1.5419635103865734, "learning_rate": 8.481029679629932e-07, "loss": 0.1829, "step": 10219 }, { "epoch": 0.82, "grad_norm": 1.3076809294870706, "learning_rate": 8.473812556425037e-07, "loss": 0.1656, "step": 10220 }, { "epoch": 0.82, "grad_norm": 1.4475514325305514, "learning_rate": 8.466598220998939e-07, "loss": 0.2103, "step": 10221 }, { "epoch": 0.82, "grad_norm": 1.4635748686452354, "learning_rate": 8.459386673835945e-07, "loss": 0.1576, "step": 10222 }, { "epoch": 0.82, "grad_norm": 1.5854066548269123, "learning_rate": 8.45217791542019e-07, "loss": 0.1956, "step": 10223 }, { "epoch": 0.82, "grad_norm": 1.3487467933644017, "learning_rate": 8.444971946235608e-07, "loss": 0.1756, "step": 10224 }, { "epoch": 0.82, "grad_norm": 1.2799984585236162, "learning_rate": 8.437768766765975e-07, "loss": 0.1437, "step": 10225 }, { "epoch": 0.82, "grad_norm": 1.3299686338232437, "learning_rate": 8.430568377494847e-07, "loss": 0.1995, "step": 10226 }, { "epoch": 0.82, "grad_norm": 6.780738622550268, "learning_rate": 8.423370778905615e-07, "loss": 0.4675, "step": 10227 }, { "epoch": 0.82, "grad_norm": 1.1932586788249298, "learning_rate": 8.416175971481471e-07, "loss": 0.1586, "step": 10228 }, { "epoch": 0.82, "grad_norm": 7.045581922329502, "learning_rate": 8.408983955705424e-07, "loss": 0.5504, "step": 10229 }, { "epoch": 0.82, "grad_norm": 1.177603483615978, "learning_rate": 8.40179473206032e-07, "loss": 0.1296, "step": 10230 }, { "epoch": 0.82, "grad_norm": 1.24634579885937, "learning_rate": 8.394608301028751e-07, "loss": 0.1384, "step": 10231 }, { "epoch": 0.82, "grad_norm": 34.178454368488396, "learning_rate": 8.387424663093197e-07, "loss": 0.4803, "step": 10232 }, { "epoch": 0.82, "grad_norm": 1.4796242159355029, "learning_rate": 8.380243818735901e-07, "loss": 0.213, "step": 10233 }, { "epoch": 0.82, "grad_norm": 1.182760868179684, "learning_rate": 8.373065768438948e-07, "loss": 0.1776, "step": 10234 }, { "epoch": 0.82, "grad_norm": 1.3875879422596897, "learning_rate": 8.365890512684211e-07, "loss": 0.1753, "step": 10235 }, { "epoch": 0.82, "grad_norm": 1.3055608926558224, "learning_rate": 8.35871805195339e-07, "loss": 0.1725, "step": 10236 }, { "epoch": 0.82, "grad_norm": 1.3900499999286327, "learning_rate": 8.351548386727998e-07, "loss": 0.1658, "step": 10237 }, { "epoch": 0.82, "grad_norm": 1.4287482035934234, "learning_rate": 8.344381517489336e-07, "loss": 0.1837, "step": 10238 }, { "epoch": 0.82, "grad_norm": 1.2754125935991587, "learning_rate": 8.337217444718582e-07, "loss": 0.1793, "step": 10239 }, { "epoch": 0.82, "grad_norm": 1.2165289290680796, "learning_rate": 8.330056168896628e-07, "loss": 0.1288, "step": 10240 }, { "epoch": 0.82, "grad_norm": 1.240056194016386, "learning_rate": 8.322897690504267e-07, "loss": 0.1645, "step": 10241 }, { "epoch": 0.82, "grad_norm": 1.4599209036711218, "learning_rate": 8.315742010022065e-07, "loss": 0.2121, "step": 10242 }, { "epoch": 0.82, "grad_norm": 1.3473861084448016, "learning_rate": 8.3085891279304e-07, "loss": 0.1593, "step": 10243 }, { "epoch": 0.82, "grad_norm": 1.068711740818255, "learning_rate": 8.301439044709464e-07, "loss": 0.1337, "step": 10244 }, { "epoch": 0.82, "grad_norm": 1.3978541147115797, "learning_rate": 8.294291760839268e-07, "loss": 0.1906, "step": 10245 }, { "epoch": 0.82, "grad_norm": 1.3591865112054997, "learning_rate": 8.287147276799629e-07, "loss": 0.1744, "step": 10246 }, { "epoch": 0.82, "grad_norm": 1.3809659197413373, "learning_rate": 8.280005593070184e-07, "loss": 0.1895, "step": 10247 }, { "epoch": 0.82, "grad_norm": 1.3398066610758947, "learning_rate": 8.272866710130362e-07, "loss": 0.1357, "step": 10248 }, { "epoch": 0.82, "grad_norm": 1.3317007447203062, "learning_rate": 8.265730628459417e-07, "loss": 0.167, "step": 10249 }, { "epoch": 0.82, "grad_norm": 9.943742627975102, "learning_rate": 8.258597348536452e-07, "loss": 0.6698, "step": 10250 }, { "epoch": 0.82, "grad_norm": 1.27753281613557, "learning_rate": 8.251466870840292e-07, "loss": 0.1658, "step": 10251 }, { "epoch": 0.82, "grad_norm": 1.435594771040495, "learning_rate": 8.24433919584966e-07, "loss": 0.1877, "step": 10252 }, { "epoch": 0.82, "grad_norm": 1.3489143008296707, "learning_rate": 8.237214324043069e-07, "loss": 0.1665, "step": 10253 }, { "epoch": 0.82, "grad_norm": 1.3345924308029946, "learning_rate": 8.230092255898797e-07, "loss": 0.1512, "step": 10254 }, { "epoch": 0.82, "grad_norm": 1.4335615320364972, "learning_rate": 8.222972991894995e-07, "loss": 0.1603, "step": 10255 }, { "epoch": 0.82, "grad_norm": 1.296630469785851, "learning_rate": 8.215856532509597e-07, "loss": 0.1567, "step": 10256 }, { "epoch": 0.82, "grad_norm": 1.3172950175589975, "learning_rate": 8.208742878220355e-07, "loss": 0.1599, "step": 10257 }, { "epoch": 0.82, "grad_norm": 1.4205039970725006, "learning_rate": 8.201632029504825e-07, "loss": 0.2046, "step": 10258 }, { "epoch": 0.82, "grad_norm": 5.669082247410548, "learning_rate": 8.194523986840375e-07, "loss": 0.496, "step": 10259 }, { "epoch": 0.82, "grad_norm": 1.3981452525884392, "learning_rate": 8.187418750704202e-07, "loss": 0.1513, "step": 10260 }, { "epoch": 0.82, "grad_norm": 1.4767843656938935, "learning_rate": 8.180316321573295e-07, "loss": 0.1753, "step": 10261 }, { "epoch": 0.82, "grad_norm": 7.55169400769144, "learning_rate": 8.173216699924458e-07, "loss": 0.6634, "step": 10262 }, { "epoch": 0.82, "grad_norm": 1.3578846611849653, "learning_rate": 8.166119886234302e-07, "loss": 0.145, "step": 10263 }, { "epoch": 0.82, "grad_norm": 1.2586239644009136, "learning_rate": 8.159025880979299e-07, "loss": 0.1532, "step": 10264 }, { "epoch": 0.82, "grad_norm": 1.4690218720524801, "learning_rate": 8.151934684635632e-07, "loss": 0.1487, "step": 10265 }, { "epoch": 0.82, "grad_norm": 1.3058316899303153, "learning_rate": 8.144846297679398e-07, "loss": 0.1768, "step": 10266 }, { "epoch": 0.82, "grad_norm": 1.4187275367977383, "learning_rate": 8.137760720586446e-07, "loss": 0.1879, "step": 10267 }, { "epoch": 0.82, "grad_norm": 1.174436859255855, "learning_rate": 8.13067795383246e-07, "loss": 0.1162, "step": 10268 }, { "epoch": 0.82, "grad_norm": 1.4840360799713945, "learning_rate": 8.123597997892918e-07, "loss": 0.1914, "step": 10269 }, { "epoch": 0.82, "grad_norm": 1.449240062470318, "learning_rate": 8.116520853243126e-07, "loss": 0.1648, "step": 10270 }, { "epoch": 0.82, "grad_norm": 1.441033624486293, "learning_rate": 8.109446520358188e-07, "loss": 0.1894, "step": 10271 }, { "epoch": 0.82, "grad_norm": 1.4038540803967303, "learning_rate": 8.102374999713025e-07, "loss": 0.1669, "step": 10272 }, { "epoch": 0.82, "grad_norm": 1.403092525328924, "learning_rate": 8.095306291782378e-07, "loss": 0.1981, "step": 10273 }, { "epoch": 0.82, "grad_norm": 6.997909340123656, "learning_rate": 8.088240397040776e-07, "loss": 0.4697, "step": 10274 }, { "epoch": 0.82, "grad_norm": 1.3799519560359428, "learning_rate": 8.081177315962601e-07, "loss": 0.1654, "step": 10275 }, { "epoch": 0.82, "grad_norm": 1.4284416808049756, "learning_rate": 8.074117049021985e-07, "loss": 0.1944, "step": 10276 }, { "epoch": 0.82, "grad_norm": 1.3285882879490167, "learning_rate": 8.067059596692928e-07, "loss": 0.1938, "step": 10277 }, { "epoch": 0.82, "grad_norm": 1.456921684426916, "learning_rate": 8.060004959449225e-07, "loss": 0.1929, "step": 10278 }, { "epoch": 0.82, "grad_norm": 1.2809114171272378, "learning_rate": 8.052953137764441e-07, "loss": 0.1611, "step": 10279 }, { "epoch": 0.82, "grad_norm": 1.4048132551457462, "learning_rate": 8.04590413211202e-07, "loss": 0.1646, "step": 10280 }, { "epoch": 0.82, "grad_norm": 6.896066147142604, "learning_rate": 8.038857942965167e-07, "loss": 0.6755, "step": 10281 }, { "epoch": 0.82, "grad_norm": 1.3090141934836064, "learning_rate": 8.03181457079692e-07, "loss": 0.1735, "step": 10282 }, { "epoch": 0.82, "grad_norm": 1.3708271984127252, "learning_rate": 8.024774016080105e-07, "loss": 0.1804, "step": 10283 }, { "epoch": 0.82, "grad_norm": 1.196900333388832, "learning_rate": 8.017736279287414e-07, "loss": 0.1324, "step": 10284 }, { "epoch": 0.82, "grad_norm": 1.3169728460582562, "learning_rate": 8.010701360891265e-07, "loss": 0.1667, "step": 10285 }, { "epoch": 0.82, "grad_norm": 1.637443000044592, "learning_rate": 8.003669261363972e-07, "loss": 0.1959, "step": 10286 }, { "epoch": 0.82, "grad_norm": 1.2216941627208409, "learning_rate": 7.996639981177612e-07, "loss": 0.149, "step": 10287 }, { "epoch": 0.82, "grad_norm": 1.3581970271787118, "learning_rate": 7.989613520804051e-07, "loss": 0.1927, "step": 10288 }, { "epoch": 0.82, "grad_norm": 1.2676407083427539, "learning_rate": 7.982589880715036e-07, "loss": 0.1688, "step": 10289 }, { "epoch": 0.82, "grad_norm": 1.4461391514232231, "learning_rate": 7.975569061382066e-07, "loss": 0.1895, "step": 10290 }, { "epoch": 0.82, "grad_norm": 1.3737987257525235, "learning_rate": 7.96855106327648e-07, "loss": 0.1688, "step": 10291 }, { "epoch": 0.82, "grad_norm": 7.883446170259811, "learning_rate": 7.96153588686941e-07, "loss": 0.5334, "step": 10292 }, { "epoch": 0.82, "grad_norm": 1.2515985009100468, "learning_rate": 7.954523532631802e-07, "loss": 0.1396, "step": 10293 }, { "epoch": 0.82, "grad_norm": 1.424250936452341, "learning_rate": 7.947514001034412e-07, "loss": 0.1481, "step": 10294 }, { "epoch": 0.82, "grad_norm": 1.1785945464249235, "learning_rate": 7.94050729254785e-07, "loss": 0.1648, "step": 10295 }, { "epoch": 0.82, "grad_norm": 1.1600100051842837, "learning_rate": 7.933503407642451e-07, "loss": 0.1715, "step": 10296 }, { "epoch": 0.82, "grad_norm": 1.528021105323564, "learning_rate": 7.926502346788412e-07, "loss": 0.1688, "step": 10297 }, { "epoch": 0.82, "grad_norm": 1.3333077728031326, "learning_rate": 7.919504110455778e-07, "loss": 0.1578, "step": 10298 }, { "epoch": 0.82, "grad_norm": 1.2619531300973006, "learning_rate": 7.912508699114302e-07, "loss": 0.1516, "step": 10299 }, { "epoch": 0.82, "grad_norm": 6.9652040621247835, "learning_rate": 7.905516113233652e-07, "loss": 0.555, "step": 10300 }, { "epoch": 0.82, "grad_norm": 1.2526973218103923, "learning_rate": 7.898526353283248e-07, "loss": 0.1413, "step": 10301 }, { "epoch": 0.82, "grad_norm": 1.3284422797678117, "learning_rate": 7.89153941973233e-07, "loss": 0.1441, "step": 10302 }, { "epoch": 0.82, "grad_norm": 1.5250098737111806, "learning_rate": 7.884555313049952e-07, "loss": 0.2019, "step": 10303 }, { "epoch": 0.82, "grad_norm": 1.4564496313900295, "learning_rate": 7.877574033704982e-07, "loss": 0.1629, "step": 10304 }, { "epoch": 0.82, "grad_norm": 1.587573692658112, "learning_rate": 7.870595582166096e-07, "loss": 0.1985, "step": 10305 }, { "epoch": 0.82, "grad_norm": 10.106469808086027, "learning_rate": 7.863619958901775e-07, "loss": 0.527, "step": 10306 }, { "epoch": 0.82, "grad_norm": 1.3792532837625195, "learning_rate": 7.856647164380315e-07, "loss": 0.1633, "step": 10307 }, { "epoch": 0.82, "grad_norm": 1.3403237328401127, "learning_rate": 7.849677199069805e-07, "loss": 0.1993, "step": 10308 }, { "epoch": 0.82, "grad_norm": 1.2895561568780798, "learning_rate": 7.842710063438202e-07, "loss": 0.1781, "step": 10309 }, { "epoch": 0.82, "grad_norm": 1.3278200557695121, "learning_rate": 7.835745757953178e-07, "loss": 0.192, "step": 10310 }, { "epoch": 0.82, "grad_norm": 1.4079285895297915, "learning_rate": 7.828784283082303e-07, "loss": 0.1669, "step": 10311 }, { "epoch": 0.82, "grad_norm": 1.2861926768039775, "learning_rate": 7.821825639292918e-07, "loss": 0.16, "step": 10312 }, { "epoch": 0.83, "grad_norm": 1.4617563603954875, "learning_rate": 7.814869827052168e-07, "loss": 0.1566, "step": 10313 }, { "epoch": 0.83, "grad_norm": 8.497629502190096, "learning_rate": 7.807916846827024e-07, "loss": 0.7361, "step": 10314 }, { "epoch": 0.83, "grad_norm": 1.2581980042741252, "learning_rate": 7.800966699084262e-07, "loss": 0.1838, "step": 10315 }, { "epoch": 0.83, "grad_norm": 1.391378832734386, "learning_rate": 7.794019384290464e-07, "loss": 0.1863, "step": 10316 }, { "epoch": 0.83, "grad_norm": 14.500750199627793, "learning_rate": 7.787074902912018e-07, "loss": 0.6652, "step": 10317 }, { "epoch": 0.83, "grad_norm": 1.1593065025799003, "learning_rate": 7.780133255415139e-07, "loss": 0.1048, "step": 10318 }, { "epoch": 0.83, "grad_norm": 1.4932383857417515, "learning_rate": 7.773194442265819e-07, "loss": 0.1585, "step": 10319 }, { "epoch": 0.83, "grad_norm": 1.3300243050752893, "learning_rate": 7.766258463929926e-07, "loss": 0.1692, "step": 10320 }, { "epoch": 0.83, "grad_norm": 1.293679057996431, "learning_rate": 7.759325320873051e-07, "loss": 0.1861, "step": 10321 }, { "epoch": 0.83, "grad_norm": 6.094428191613522, "learning_rate": 7.752395013560643e-07, "loss": 0.5467, "step": 10322 }, { "epoch": 0.83, "grad_norm": 1.279826462186724, "learning_rate": 7.745467542457985e-07, "loss": 0.1898, "step": 10323 }, { "epoch": 0.83, "grad_norm": 1.3608804146744042, "learning_rate": 7.738542908030095e-07, "loss": 0.1712, "step": 10324 }, { "epoch": 0.83, "grad_norm": 1.237587981488496, "learning_rate": 7.731621110741871e-07, "loss": 0.125, "step": 10325 }, { "epoch": 0.83, "grad_norm": 1.4840417345202028, "learning_rate": 7.724702151058e-07, "loss": 0.1595, "step": 10326 }, { "epoch": 0.83, "grad_norm": 1.36070465111008, "learning_rate": 7.717786029442953e-07, "loss": 0.1905, "step": 10327 }, { "epoch": 0.83, "grad_norm": 1.522324888305765, "learning_rate": 7.710872746361037e-07, "loss": 0.1705, "step": 10328 }, { "epoch": 0.83, "grad_norm": 6.595643286629416, "learning_rate": 7.70396230227638e-07, "loss": 0.5053, "step": 10329 }, { "epoch": 0.83, "grad_norm": 1.6167632138969317, "learning_rate": 7.697054697652879e-07, "loss": 0.1571, "step": 10330 }, { "epoch": 0.83, "grad_norm": 1.4735380195479784, "learning_rate": 7.690149932954255e-07, "loss": 0.2098, "step": 10331 }, { "epoch": 0.83, "grad_norm": 1.2823875882351807, "learning_rate": 7.683248008644084e-07, "loss": 0.1645, "step": 10332 }, { "epoch": 0.83, "grad_norm": 8.97413401718711, "learning_rate": 7.676348925185667e-07, "loss": 0.6059, "step": 10333 }, { "epoch": 0.83, "grad_norm": 1.3642457433622908, "learning_rate": 7.669452683042194e-07, "loss": 0.161, "step": 10334 }, { "epoch": 0.83, "grad_norm": 1.3132387397590377, "learning_rate": 7.662559282676619e-07, "loss": 0.1692, "step": 10335 }, { "epoch": 0.83, "grad_norm": 1.3607245235273113, "learning_rate": 7.655668724551713e-07, "loss": 0.1509, "step": 10336 }, { "epoch": 0.83, "grad_norm": 11.348280907923808, "learning_rate": 7.648781009130068e-07, "loss": 0.5216, "step": 10337 }, { "epoch": 0.83, "grad_norm": 1.6543635617578722, "learning_rate": 7.641896136874067e-07, "loss": 0.199, "step": 10338 }, { "epoch": 0.83, "grad_norm": 1.3340460227706135, "learning_rate": 7.635014108245925e-07, "loss": 0.1758, "step": 10339 }, { "epoch": 0.83, "grad_norm": 1.3498039307992271, "learning_rate": 7.628134923707642e-07, "loss": 0.1837, "step": 10340 }, { "epoch": 0.83, "grad_norm": 1.4413871961685307, "learning_rate": 7.621258583721047e-07, "loss": 0.2095, "step": 10341 }, { "epoch": 0.83, "grad_norm": 1.408815771379749, "learning_rate": 7.61438508874775e-07, "loss": 0.1964, "step": 10342 }, { "epoch": 0.83, "grad_norm": 1.3254240878451835, "learning_rate": 7.60751443924923e-07, "loss": 0.1799, "step": 10343 }, { "epoch": 0.83, "grad_norm": 1.4716355231314966, "learning_rate": 7.600646635686687e-07, "loss": 0.2109, "step": 10344 }, { "epoch": 0.83, "grad_norm": 1.4283905441000049, "learning_rate": 7.593781678521212e-07, "loss": 0.1834, "step": 10345 }, { "epoch": 0.83, "grad_norm": 1.2364220957612122, "learning_rate": 7.586919568213663e-07, "loss": 0.1343, "step": 10346 }, { "epoch": 0.83, "grad_norm": 1.1992997613432193, "learning_rate": 7.580060305224701e-07, "loss": 0.1309, "step": 10347 }, { "epoch": 0.83, "grad_norm": 1.5024655394058375, "learning_rate": 7.573203890014824e-07, "loss": 0.1522, "step": 10348 }, { "epoch": 0.83, "grad_norm": 4.477035873887483, "learning_rate": 7.566350323044319e-07, "loss": 0.4559, "step": 10349 }, { "epoch": 0.83, "grad_norm": 1.4865055360236739, "learning_rate": 7.55949960477328e-07, "loss": 0.1815, "step": 10350 }, { "epoch": 0.83, "grad_norm": 1.1905828713122224, "learning_rate": 7.552651735661626e-07, "loss": 0.1606, "step": 10351 }, { "epoch": 0.83, "grad_norm": 1.3156678256939212, "learning_rate": 7.545806716169074e-07, "loss": 0.1691, "step": 10352 }, { "epoch": 0.83, "grad_norm": 1.392991131927554, "learning_rate": 7.538964546755128e-07, "loss": 0.1779, "step": 10353 }, { "epoch": 0.83, "grad_norm": 1.5783029536339719, "learning_rate": 7.532125227879178e-07, "loss": 0.1874, "step": 10354 }, { "epoch": 0.83, "grad_norm": 1.204889617914643, "learning_rate": 7.525288760000304e-07, "loss": 0.1535, "step": 10355 }, { "epoch": 0.83, "grad_norm": 1.1389205384642376, "learning_rate": 7.518455143577502e-07, "loss": 0.1439, "step": 10356 }, { "epoch": 0.83, "grad_norm": 1.1364661112191676, "learning_rate": 7.511624379069532e-07, "loss": 0.1532, "step": 10357 }, { "epoch": 0.83, "grad_norm": 1.3695818613221862, "learning_rate": 7.504796466934927e-07, "loss": 0.1752, "step": 10358 }, { "epoch": 0.83, "grad_norm": 1.2894407255180065, "learning_rate": 7.497971407632105e-07, "loss": 0.1544, "step": 10359 }, { "epoch": 0.83, "grad_norm": 1.393065319647897, "learning_rate": 7.491149201619236e-07, "loss": 0.1538, "step": 10360 }, { "epoch": 0.83, "grad_norm": 1.2348200093865822, "learning_rate": 7.484329849354316e-07, "loss": 0.1568, "step": 10361 }, { "epoch": 0.83, "grad_norm": 6.689310826589822, "learning_rate": 7.477513351295152e-07, "loss": 0.6105, "step": 10362 }, { "epoch": 0.83, "grad_norm": 1.287140174619003, "learning_rate": 7.47069970789936e-07, "loss": 0.1709, "step": 10363 }, { "epoch": 0.83, "grad_norm": 1.3229519000931695, "learning_rate": 7.46388891962434e-07, "loss": 0.1567, "step": 10364 }, { "epoch": 0.83, "grad_norm": 1.4493219300391396, "learning_rate": 7.457080986927357e-07, "loss": 0.1875, "step": 10365 }, { "epoch": 0.83, "grad_norm": 8.792194172152708, "learning_rate": 7.450275910265415e-07, "loss": 0.6442, "step": 10366 }, { "epoch": 0.83, "grad_norm": 1.3707211756003133, "learning_rate": 7.443473690095365e-07, "loss": 0.1763, "step": 10367 }, { "epoch": 0.83, "grad_norm": 1.560652280672448, "learning_rate": 7.436674326873878e-07, "loss": 0.1588, "step": 10368 }, { "epoch": 0.83, "grad_norm": 6.62717761181012, "learning_rate": 7.429877821057402e-07, "loss": 0.4933, "step": 10369 }, { "epoch": 0.83, "grad_norm": 1.388677768337239, "learning_rate": 7.423084173102213e-07, "loss": 0.2074, "step": 10370 }, { "epoch": 0.83, "grad_norm": 1.3741211327882001, "learning_rate": 7.416293383464384e-07, "loss": 0.1724, "step": 10371 }, { "epoch": 0.83, "grad_norm": 1.3881543362742537, "learning_rate": 7.409505452599808e-07, "loss": 0.1982, "step": 10372 }, { "epoch": 0.83, "grad_norm": 1.4340467799022172, "learning_rate": 7.402720380964157e-07, "loss": 0.2199, "step": 10373 }, { "epoch": 0.83, "grad_norm": 1.407404142006903, "learning_rate": 7.395938169012978e-07, "loss": 0.1596, "step": 10374 }, { "epoch": 0.83, "grad_norm": 1.3696161465291723, "learning_rate": 7.389158817201541e-07, "loss": 0.1881, "step": 10375 }, { "epoch": 0.83, "grad_norm": 1.336794376240014, "learning_rate": 7.382382325984971e-07, "loss": 0.1835, "step": 10376 }, { "epoch": 0.83, "grad_norm": 1.3986486910743217, "learning_rate": 7.375608695818226e-07, "loss": 0.171, "step": 10377 }, { "epoch": 0.83, "grad_norm": 1.3028313584057856, "learning_rate": 7.368837927155986e-07, "loss": 0.1515, "step": 10378 }, { "epoch": 0.83, "grad_norm": 1.433895737476724, "learning_rate": 7.362070020452838e-07, "loss": 0.1946, "step": 10379 }, { "epoch": 0.83, "grad_norm": 1.137831613535778, "learning_rate": 7.355304976163119e-07, "loss": 0.1266, "step": 10380 }, { "epoch": 0.83, "grad_norm": 9.114095810093628, "learning_rate": 7.348542794740987e-07, "loss": 0.62, "step": 10381 }, { "epoch": 0.83, "grad_norm": 1.4945798395123349, "learning_rate": 7.341783476640402e-07, "loss": 0.1671, "step": 10382 }, { "epoch": 0.83, "grad_norm": 1.2706722717863457, "learning_rate": 7.335027022315144e-07, "loss": 0.1622, "step": 10383 }, { "epoch": 0.83, "grad_norm": 1.3211014343548666, "learning_rate": 7.328273432218791e-07, "loss": 0.1761, "step": 10384 }, { "epoch": 0.83, "grad_norm": 1.3340180668525505, "learning_rate": 7.32152270680473e-07, "loss": 0.1751, "step": 10385 }, { "epoch": 0.83, "grad_norm": 1.4613555819734572, "learning_rate": 7.314774846526163e-07, "loss": 0.2022, "step": 10386 }, { "epoch": 0.83, "grad_norm": 1.5240540572586232, "learning_rate": 7.308029851836079e-07, "loss": 0.185, "step": 10387 }, { "epoch": 0.83, "grad_norm": 1.408532372791918, "learning_rate": 7.301287723187328e-07, "loss": 0.1824, "step": 10388 }, { "epoch": 0.83, "grad_norm": 1.3939470377133603, "learning_rate": 7.294548461032475e-07, "loss": 0.2046, "step": 10389 }, { "epoch": 0.83, "grad_norm": 1.4707705338869925, "learning_rate": 7.287812065823996e-07, "loss": 0.2012, "step": 10390 }, { "epoch": 0.83, "grad_norm": 1.4205622566664506, "learning_rate": 7.281078538014108e-07, "loss": 0.1613, "step": 10391 }, { "epoch": 0.83, "grad_norm": 1.5883635998644619, "learning_rate": 7.274347878054839e-07, "loss": 0.218, "step": 10392 }, { "epoch": 0.83, "grad_norm": 1.3895197472350735, "learning_rate": 7.267620086398053e-07, "loss": 0.1505, "step": 10393 }, { "epoch": 0.83, "grad_norm": 1.3440498097504125, "learning_rate": 7.260895163495407e-07, "loss": 0.1501, "step": 10394 }, { "epoch": 0.83, "grad_norm": 1.2859237779745636, "learning_rate": 7.254173109798363e-07, "loss": 0.1747, "step": 10395 }, { "epoch": 0.83, "grad_norm": 44.79975121297489, "learning_rate": 7.247453925758197e-07, "loss": 0.5969, "step": 10396 }, { "epoch": 0.83, "grad_norm": 1.4802334572852698, "learning_rate": 7.240737611825976e-07, "loss": 0.2012, "step": 10397 }, { "epoch": 0.83, "grad_norm": 8.363300433590267, "learning_rate": 7.234024168452592e-07, "loss": 0.6022, "step": 10398 }, { "epoch": 0.83, "grad_norm": 1.2232651354608663, "learning_rate": 7.227313596088759e-07, "loss": 0.1434, "step": 10399 }, { "epoch": 0.83, "grad_norm": 1.5870548395422266, "learning_rate": 7.220605895184946e-07, "loss": 0.2197, "step": 10400 }, { "epoch": 0.83, "grad_norm": 1.141045740445732, "learning_rate": 7.21390106619147e-07, "loss": 0.1225, "step": 10401 }, { "epoch": 0.83, "grad_norm": 12.871453980027333, "learning_rate": 7.207199109558472e-07, "loss": 0.5547, "step": 10402 }, { "epoch": 0.83, "grad_norm": 2.0299418025395037, "learning_rate": 7.200500025735835e-07, "loss": 0.1956, "step": 10403 }, { "epoch": 0.83, "grad_norm": 1.284970320221558, "learning_rate": 7.193803815173317e-07, "loss": 0.1501, "step": 10404 }, { "epoch": 0.83, "grad_norm": 1.3116599532908575, "learning_rate": 7.187110478320447e-07, "loss": 0.1706, "step": 10405 }, { "epoch": 0.83, "grad_norm": 1.5792095019405397, "learning_rate": 7.180420015626571e-07, "loss": 0.203, "step": 10406 }, { "epoch": 0.83, "grad_norm": 7.088503193312967, "learning_rate": 7.173732427540824e-07, "loss": 0.6149, "step": 10407 }, { "epoch": 0.83, "grad_norm": 1.2227501452513352, "learning_rate": 7.167047714512199e-07, "loss": 0.1577, "step": 10408 }, { "epoch": 0.83, "grad_norm": 1.2543306642982393, "learning_rate": 7.160365876989428e-07, "loss": 0.1799, "step": 10409 }, { "epoch": 0.83, "grad_norm": 1.2909038878245038, "learning_rate": 7.153686915421087e-07, "loss": 0.1964, "step": 10410 }, { "epoch": 0.83, "grad_norm": 1.5155298423600947, "learning_rate": 7.147010830255585e-07, "loss": 0.2085, "step": 10411 }, { "epoch": 0.83, "grad_norm": 1.4026772316442193, "learning_rate": 7.140337621941057e-07, "loss": 0.1928, "step": 10412 }, { "epoch": 0.83, "grad_norm": 9.058285708043789, "learning_rate": 7.133667290925538e-07, "loss": 0.6879, "step": 10413 }, { "epoch": 0.83, "grad_norm": 1.4252336191440804, "learning_rate": 7.126999837656817e-07, "loss": 0.1749, "step": 10414 }, { "epoch": 0.83, "grad_norm": 1.308836359682954, "learning_rate": 7.120335262582495e-07, "loss": 0.156, "step": 10415 }, { "epoch": 0.83, "grad_norm": 1.2792537675442102, "learning_rate": 7.113673566149987e-07, "loss": 0.169, "step": 10416 }, { "epoch": 0.83, "grad_norm": 1.3615722536345924, "learning_rate": 7.107014748806512e-07, "loss": 0.1697, "step": 10417 }, { "epoch": 0.83, "grad_norm": 1.3088875539799676, "learning_rate": 7.100358810999098e-07, "loss": 0.1658, "step": 10418 }, { "epoch": 0.83, "grad_norm": 1.5571371698779042, "learning_rate": 7.093705753174579e-07, "loss": 0.196, "step": 10419 }, { "epoch": 0.83, "grad_norm": 1.3941635948160789, "learning_rate": 7.087055575779594e-07, "loss": 0.19, "step": 10420 }, { "epoch": 0.83, "grad_norm": 1.3056844359898923, "learning_rate": 7.080408279260575e-07, "loss": 0.1697, "step": 10421 }, { "epoch": 0.83, "grad_norm": 6.040287885088047, "learning_rate": 7.073763864063815e-07, "loss": 0.567, "step": 10422 }, { "epoch": 0.83, "grad_norm": 1.4141785875279016, "learning_rate": 7.067122330635323e-07, "loss": 0.1741, "step": 10423 }, { "epoch": 0.83, "grad_norm": 1.7525062077173894, "learning_rate": 7.060483679421004e-07, "loss": 0.1879, "step": 10424 }, { "epoch": 0.83, "grad_norm": 1.252254370329039, "learning_rate": 7.053847910866513e-07, "loss": 0.1472, "step": 10425 }, { "epoch": 0.83, "grad_norm": 1.4161467081144168, "learning_rate": 7.047215025417337e-07, "loss": 0.1707, "step": 10426 }, { "epoch": 0.83, "grad_norm": 1.2001568021114077, "learning_rate": 7.040585023518759e-07, "loss": 0.1288, "step": 10427 }, { "epoch": 0.83, "grad_norm": 6.264703695645213, "learning_rate": 7.033957905615867e-07, "loss": 0.4106, "step": 10428 }, { "epoch": 0.83, "grad_norm": 1.3494966062795628, "learning_rate": 7.027333672153563e-07, "loss": 0.1778, "step": 10429 }, { "epoch": 0.83, "grad_norm": 1.4139999939526524, "learning_rate": 7.020712323576556e-07, "loss": 0.1779, "step": 10430 }, { "epoch": 0.83, "grad_norm": 1.3768242262370547, "learning_rate": 7.014093860329346e-07, "loss": 0.1624, "step": 10431 }, { "epoch": 0.83, "grad_norm": 1.4684058975777654, "learning_rate": 7.00747828285625e-07, "loss": 0.1823, "step": 10432 }, { "epoch": 0.83, "grad_norm": 1.3020954810503225, "learning_rate": 7.000865591601424e-07, "loss": 0.156, "step": 10433 }, { "epoch": 0.83, "grad_norm": 7.8281829169847, "learning_rate": 6.994255787008747e-07, "loss": 0.5676, "step": 10434 }, { "epoch": 0.83, "grad_norm": 1.4333836977903172, "learning_rate": 6.987648869521996e-07, "loss": 0.1648, "step": 10435 }, { "epoch": 0.83, "grad_norm": 1.2199258430677205, "learning_rate": 6.981044839584705e-07, "loss": 0.168, "step": 10436 }, { "epoch": 0.83, "grad_norm": 1.339178820289547, "learning_rate": 6.974443697640193e-07, "loss": 0.1826, "step": 10437 }, { "epoch": 0.84, "grad_norm": 1.3367819388389524, "learning_rate": 6.967845444131654e-07, "loss": 0.1834, "step": 10438 }, { "epoch": 0.84, "grad_norm": 1.4540219629407722, "learning_rate": 6.96125007950203e-07, "loss": 0.1962, "step": 10439 }, { "epoch": 0.84, "grad_norm": 1.3467724801680916, "learning_rate": 6.954657604194093e-07, "loss": 0.1467, "step": 10440 }, { "epoch": 0.84, "grad_norm": 1.575143644477294, "learning_rate": 6.948068018650411e-07, "loss": 0.1707, "step": 10441 }, { "epoch": 0.84, "grad_norm": 1.3082776299698478, "learning_rate": 6.941481323313365e-07, "loss": 0.1782, "step": 10442 }, { "epoch": 0.84, "grad_norm": 1.1948581144121222, "learning_rate": 6.934897518625138e-07, "loss": 0.1328, "step": 10443 }, { "epoch": 0.84, "grad_norm": 1.610104160435083, "learning_rate": 6.928316605027724e-07, "loss": 0.207, "step": 10444 }, { "epoch": 0.84, "grad_norm": 1.2510590343014063, "learning_rate": 6.921738582962923e-07, "loss": 0.1578, "step": 10445 }, { "epoch": 0.84, "grad_norm": 1.4065339460194437, "learning_rate": 6.915163452872315e-07, "loss": 0.1875, "step": 10446 }, { "epoch": 0.84, "grad_norm": 1.3183170444399515, "learning_rate": 6.908591215197353e-07, "loss": 0.1481, "step": 10447 }, { "epoch": 0.84, "grad_norm": 11.281114293935818, "learning_rate": 6.902021870379199e-07, "loss": 0.5272, "step": 10448 }, { "epoch": 0.84, "grad_norm": 1.3945064991214662, "learning_rate": 6.89545541885891e-07, "loss": 0.1556, "step": 10449 }, { "epoch": 0.84, "grad_norm": 1.4610791153539384, "learning_rate": 6.888891861077301e-07, "loss": 0.2009, "step": 10450 }, { "epoch": 0.84, "grad_norm": 1.4592205866589194, "learning_rate": 6.882331197475006e-07, "loss": 0.1665, "step": 10451 }, { "epoch": 0.84, "grad_norm": 1.2465658339537615, "learning_rate": 6.875773428492455e-07, "loss": 0.1299, "step": 10452 }, { "epoch": 0.84, "grad_norm": 1.243520508754784, "learning_rate": 6.869218554569895e-07, "loss": 0.1703, "step": 10453 }, { "epoch": 0.84, "grad_norm": 1.3044428334714238, "learning_rate": 6.862666576147375e-07, "loss": 0.1697, "step": 10454 }, { "epoch": 0.84, "grad_norm": 1.3348660441537628, "learning_rate": 6.856117493664743e-07, "loss": 0.1515, "step": 10455 }, { "epoch": 0.84, "grad_norm": 1.2668288952144335, "learning_rate": 6.84957130756168e-07, "loss": 0.181, "step": 10456 }, { "epoch": 0.84, "grad_norm": 1.3595987251388104, "learning_rate": 6.843028018277614e-07, "loss": 0.1998, "step": 10457 }, { "epoch": 0.84, "grad_norm": 1.5413024697497035, "learning_rate": 6.836487626251853e-07, "loss": 0.1509, "step": 10458 }, { "epoch": 0.84, "grad_norm": 1.331531227951978, "learning_rate": 6.829950131923452e-07, "loss": 0.1348, "step": 10459 }, { "epoch": 0.84, "grad_norm": 1.3619631378209511, "learning_rate": 6.823415535731303e-07, "loss": 0.1562, "step": 10460 }, { "epoch": 0.84, "grad_norm": 1.273094679328015, "learning_rate": 6.816883838114085e-07, "loss": 0.1461, "step": 10461 }, { "epoch": 0.84, "grad_norm": 1.5072539063605697, "learning_rate": 6.810355039510302e-07, "loss": 0.2246, "step": 10462 }, { "epoch": 0.84, "grad_norm": 1.4653968230754655, "learning_rate": 6.803829140358237e-07, "loss": 0.2092, "step": 10463 }, { "epoch": 0.84, "grad_norm": 1.414135191644308, "learning_rate": 6.797306141096005e-07, "loss": 0.1677, "step": 10464 }, { "epoch": 0.84, "grad_norm": 1.448125786890402, "learning_rate": 6.790786042161507e-07, "loss": 0.1774, "step": 10465 }, { "epoch": 0.84, "grad_norm": 1.2059194481116273, "learning_rate": 6.784268843992453e-07, "loss": 0.1376, "step": 10466 }, { "epoch": 0.84, "grad_norm": 1.2271038939011716, "learning_rate": 6.777754547026393e-07, "loss": 0.1532, "step": 10467 }, { "epoch": 0.84, "grad_norm": 1.4440136151008223, "learning_rate": 6.771243151700608e-07, "loss": 0.197, "step": 10468 }, { "epoch": 0.84, "grad_norm": 7.530559934225353, "learning_rate": 6.764734658452255e-07, "loss": 0.5969, "step": 10469 }, { "epoch": 0.84, "grad_norm": 1.2924688849985508, "learning_rate": 6.758229067718269e-07, "loss": 0.1594, "step": 10470 }, { "epoch": 0.84, "grad_norm": 1.2520174244144135, "learning_rate": 6.751726379935369e-07, "loss": 0.1384, "step": 10471 }, { "epoch": 0.84, "grad_norm": 10.03350384176968, "learning_rate": 6.745226595540122e-07, "loss": 0.6921, "step": 10472 }, { "epoch": 0.84, "grad_norm": 8.408125780349087, "learning_rate": 6.738729714968872e-07, "loss": 0.5453, "step": 10473 }, { "epoch": 0.84, "grad_norm": 9.096457808805035, "learning_rate": 6.732235738657767e-07, "loss": 0.6267, "step": 10474 }, { "epoch": 0.84, "grad_norm": 1.3671381284147623, "learning_rate": 6.725744667042778e-07, "loss": 0.2228, "step": 10475 }, { "epoch": 0.84, "grad_norm": 1.2564026402349633, "learning_rate": 6.719256500559668e-07, "loss": 0.1704, "step": 10476 }, { "epoch": 0.84, "grad_norm": 1.4735287577873937, "learning_rate": 6.71277123964399e-07, "loss": 0.1943, "step": 10477 }, { "epoch": 0.84, "grad_norm": 1.2508352843864312, "learning_rate": 6.706288884731155e-07, "loss": 0.1597, "step": 10478 }, { "epoch": 0.84, "grad_norm": 1.3805284620481453, "learning_rate": 6.699809436256311e-07, "loss": 0.1336, "step": 10479 }, { "epoch": 0.84, "grad_norm": 1.5672606853586122, "learning_rate": 6.693332894654442e-07, "loss": 0.1746, "step": 10480 }, { "epoch": 0.84, "grad_norm": 1.2830717694871543, "learning_rate": 6.686859260360374e-07, "loss": 0.1516, "step": 10481 }, { "epoch": 0.84, "grad_norm": 1.4604620027618846, "learning_rate": 6.680388533808652e-07, "loss": 0.186, "step": 10482 }, { "epoch": 0.84, "grad_norm": 1.2006048237971076, "learning_rate": 6.673920715433718e-07, "loss": 0.1545, "step": 10483 }, { "epoch": 0.84, "grad_norm": 1.3034675254005228, "learning_rate": 6.667455805669753e-07, "loss": 0.1684, "step": 10484 }, { "epoch": 0.84, "grad_norm": 1.4093061470312744, "learning_rate": 6.660993804950777e-07, "loss": 0.1415, "step": 10485 }, { "epoch": 0.84, "grad_norm": 1.4586209518755469, "learning_rate": 6.654534713710597e-07, "loss": 0.1809, "step": 10486 }, { "epoch": 0.84, "grad_norm": 1.514294809139086, "learning_rate": 6.648078532382835e-07, "loss": 0.1888, "step": 10487 }, { "epoch": 0.84, "grad_norm": 1.3807195729913626, "learning_rate": 6.641625261400908e-07, "loss": 0.1832, "step": 10488 }, { "epoch": 0.84, "grad_norm": 1.3911693397736682, "learning_rate": 6.63517490119805e-07, "loss": 0.1636, "step": 10489 }, { "epoch": 0.84, "grad_norm": 1.482956961084758, "learning_rate": 6.62872745220729e-07, "loss": 0.183, "step": 10490 }, { "epoch": 0.84, "grad_norm": 1.4257052717413892, "learning_rate": 6.622282914861456e-07, "loss": 0.154, "step": 10491 }, { "epoch": 0.84, "grad_norm": 1.168065860879332, "learning_rate": 6.615841289593223e-07, "loss": 0.1366, "step": 10492 }, { "epoch": 0.84, "grad_norm": 1.5306795326379492, "learning_rate": 6.609402576834989e-07, "loss": 0.1758, "step": 10493 }, { "epoch": 0.84, "grad_norm": 8.31979508552305, "learning_rate": 6.602966777019038e-07, "loss": 0.41, "step": 10494 }, { "epoch": 0.84, "grad_norm": 1.403948128906313, "learning_rate": 6.596533890577417e-07, "loss": 0.1339, "step": 10495 }, { "epoch": 0.84, "grad_norm": 1.3866326970952532, "learning_rate": 6.590103917941982e-07, "loss": 0.1883, "step": 10496 }, { "epoch": 0.84, "grad_norm": 1.440204418410186, "learning_rate": 6.583676859544402e-07, "loss": 0.1697, "step": 10497 }, { "epoch": 0.84, "grad_norm": 1.5487438483192193, "learning_rate": 6.577252715816135e-07, "loss": 0.1611, "step": 10498 }, { "epoch": 0.84, "grad_norm": 1.3817312824363903, "learning_rate": 6.570831487188461e-07, "loss": 0.1691, "step": 10499 }, { "epoch": 0.84, "grad_norm": 1.3098629824634276, "learning_rate": 6.564413174092443e-07, "loss": 0.1547, "step": 10500 }, { "epoch": 0.84, "grad_norm": 1.4483042601938303, "learning_rate": 6.557997776958997e-07, "loss": 0.2156, "step": 10501 }, { "epoch": 0.84, "grad_norm": 1.2732973811825736, "learning_rate": 6.55158529621876e-07, "loss": 0.1255, "step": 10502 }, { "epoch": 0.84, "grad_norm": 1.3997232956852186, "learning_rate": 6.545175732302256e-07, "loss": 0.1847, "step": 10503 }, { "epoch": 0.84, "grad_norm": 1.9112349805222262, "learning_rate": 6.538769085639779e-07, "loss": 0.252, "step": 10504 }, { "epoch": 0.84, "grad_norm": 1.4570916189976346, "learning_rate": 6.532365356661397e-07, "loss": 0.1734, "step": 10505 }, { "epoch": 0.84, "grad_norm": 1.3840406879774412, "learning_rate": 6.525964545797042e-07, "loss": 0.1917, "step": 10506 }, { "epoch": 0.84, "grad_norm": 11.274855629113032, "learning_rate": 6.519566653476405e-07, "loss": 0.6124, "step": 10507 }, { "epoch": 0.84, "grad_norm": 1.3036791104548606, "learning_rate": 6.513171680128999e-07, "loss": 0.1952, "step": 10508 }, { "epoch": 0.84, "grad_norm": 1.4431391582727755, "learning_rate": 6.506779626184134e-07, "loss": 0.1663, "step": 10509 }, { "epoch": 0.84, "grad_norm": 1.528983189047326, "learning_rate": 6.500390492070941e-07, "loss": 0.14, "step": 10510 }, { "epoch": 0.84, "grad_norm": 1.3403699270904939, "learning_rate": 6.494004278218313e-07, "loss": 0.171, "step": 10511 }, { "epoch": 0.84, "grad_norm": 1.5072303195863255, "learning_rate": 6.487620985055027e-07, "loss": 0.1954, "step": 10512 }, { "epoch": 0.84, "grad_norm": 1.378166066652208, "learning_rate": 6.481240613009565e-07, "loss": 0.188, "step": 10513 }, { "epoch": 0.84, "grad_norm": 1.316787863168727, "learning_rate": 6.474863162510264e-07, "loss": 0.1485, "step": 10514 }, { "epoch": 0.84, "grad_norm": 1.3513723797005748, "learning_rate": 6.468488633985299e-07, "loss": 0.1739, "step": 10515 }, { "epoch": 0.84, "grad_norm": 1.4173785156978218, "learning_rate": 6.462117027862569e-07, "loss": 0.1672, "step": 10516 }, { "epoch": 0.84, "grad_norm": 1.486898221715606, "learning_rate": 6.45574834456984e-07, "loss": 0.1952, "step": 10517 }, { "epoch": 0.84, "grad_norm": 6.366305949854242, "learning_rate": 6.449382584534663e-07, "loss": 0.5917, "step": 10518 }, { "epoch": 0.84, "grad_norm": 9.33096666804939, "learning_rate": 6.443019748184392e-07, "loss": 0.5946, "step": 10519 }, { "epoch": 0.84, "grad_norm": 1.5176979379713582, "learning_rate": 6.436659835946168e-07, "loss": 0.1979, "step": 10520 }, { "epoch": 0.84, "grad_norm": 1.3500045910213623, "learning_rate": 6.430302848246967e-07, "loss": 0.1503, "step": 10521 }, { "epoch": 0.84, "grad_norm": 1.395576689272778, "learning_rate": 6.423948785513545e-07, "loss": 0.1524, "step": 10522 }, { "epoch": 0.84, "grad_norm": 1.5893826037061731, "learning_rate": 6.417597648172474e-07, "loss": 0.1872, "step": 10523 }, { "epoch": 0.84, "grad_norm": 1.4268238123990546, "learning_rate": 6.411249436650119e-07, "loss": 0.1868, "step": 10524 }, { "epoch": 0.84, "grad_norm": 1.3014838688902484, "learning_rate": 6.404904151372649e-07, "loss": 0.1324, "step": 10525 }, { "epoch": 0.84, "grad_norm": 1.2429703358514754, "learning_rate": 6.398561792766067e-07, "loss": 0.1406, "step": 10526 }, { "epoch": 0.84, "grad_norm": 7.605700248752528, "learning_rate": 6.392222361256123e-07, "loss": 0.4614, "step": 10527 }, { "epoch": 0.84, "grad_norm": 1.4760903581195748, "learning_rate": 6.385885857268431e-07, "loss": 0.1495, "step": 10528 }, { "epoch": 0.84, "grad_norm": 1.3130970035006684, "learning_rate": 6.379552281228363e-07, "loss": 0.1679, "step": 10529 }, { "epoch": 0.84, "grad_norm": 1.2695191090714528, "learning_rate": 6.373221633561117e-07, "loss": 0.1435, "step": 10530 }, { "epoch": 0.84, "grad_norm": 1.3669620982376145, "learning_rate": 6.366893914691685e-07, "loss": 0.19, "step": 10531 }, { "epoch": 0.84, "grad_norm": 1.3939842218772, "learning_rate": 6.360569125044869e-07, "loss": 0.1902, "step": 10532 }, { "epoch": 0.84, "grad_norm": 1.5050198271685364, "learning_rate": 6.354247265045266e-07, "loss": 0.1543, "step": 10533 }, { "epoch": 0.84, "grad_norm": 1.4002381812318612, "learning_rate": 6.347928335117287e-07, "loss": 0.1931, "step": 10534 }, { "epoch": 0.84, "grad_norm": 1.3764659487344357, "learning_rate": 6.341612335685143e-07, "loss": 0.1705, "step": 10535 }, { "epoch": 0.84, "grad_norm": 1.2294240787670405, "learning_rate": 6.335299267172828e-07, "loss": 0.1341, "step": 10536 }, { "epoch": 0.84, "grad_norm": 1.4846665612558694, "learning_rate": 6.328989130004187e-07, "loss": 0.182, "step": 10537 }, { "epoch": 0.84, "grad_norm": 1.3084862438377158, "learning_rate": 6.322681924602819e-07, "loss": 0.1699, "step": 10538 }, { "epoch": 0.84, "grad_norm": 1.3900223974804318, "learning_rate": 6.316377651392153e-07, "loss": 0.1501, "step": 10539 }, { "epoch": 0.84, "grad_norm": 1.2956409310994568, "learning_rate": 6.31007631079541e-07, "loss": 0.174, "step": 10540 }, { "epoch": 0.84, "grad_norm": 1.244418695242432, "learning_rate": 6.30377790323562e-07, "loss": 0.1663, "step": 10541 }, { "epoch": 0.84, "grad_norm": 1.401353893196756, "learning_rate": 6.297482429135615e-07, "loss": 0.1799, "step": 10542 }, { "epoch": 0.84, "grad_norm": 1.511367684092856, "learning_rate": 6.291189888918025e-07, "loss": 0.1962, "step": 10543 }, { "epoch": 0.84, "grad_norm": 1.2924466348207186, "learning_rate": 6.284900283005297e-07, "loss": 0.1592, "step": 10544 }, { "epoch": 0.84, "grad_norm": 1.0751484817266148, "learning_rate": 6.278613611819645e-07, "loss": 0.1377, "step": 10545 }, { "epoch": 0.84, "grad_norm": 1.2400031467123105, "learning_rate": 6.272329875783157e-07, "loss": 0.1617, "step": 10546 }, { "epoch": 0.84, "grad_norm": 1.444685289841513, "learning_rate": 6.266049075317631e-07, "loss": 0.2082, "step": 10547 }, { "epoch": 0.84, "grad_norm": 1.3473433970249304, "learning_rate": 6.259771210844751e-07, "loss": 0.1719, "step": 10548 }, { "epoch": 0.84, "grad_norm": 1.3282156815852242, "learning_rate": 6.25349628278597e-07, "loss": 0.154, "step": 10549 }, { "epoch": 0.84, "grad_norm": 1.4409660171190206, "learning_rate": 6.24722429156251e-07, "loss": 0.1743, "step": 10550 }, { "epoch": 0.84, "grad_norm": 1.3190231016451954, "learning_rate": 6.240955237595453e-07, "loss": 0.1451, "step": 10551 }, { "epoch": 0.84, "grad_norm": 1.16478661642939, "learning_rate": 6.234689121305659e-07, "loss": 0.105, "step": 10552 }, { "epoch": 0.84, "grad_norm": 1.2688257054702625, "learning_rate": 6.228425943113792e-07, "loss": 0.1897, "step": 10553 }, { "epoch": 0.84, "grad_norm": 1.539205650900151, "learning_rate": 6.222165703440308e-07, "loss": 0.1638, "step": 10554 }, { "epoch": 0.84, "grad_norm": 1.5565763836371498, "learning_rate": 6.215908402705484e-07, "loss": 0.227, "step": 10555 }, { "epoch": 0.84, "grad_norm": 5.999370863016364, "learning_rate": 6.209654041329377e-07, "loss": 0.5521, "step": 10556 }, { "epoch": 0.84, "grad_norm": 6.761255762303853, "learning_rate": 6.203402619731897e-07, "loss": 0.5766, "step": 10557 }, { "epoch": 0.84, "grad_norm": 1.394167469202081, "learning_rate": 6.19715413833269e-07, "loss": 0.1718, "step": 10558 }, { "epoch": 0.84, "grad_norm": 11.24601338639437, "learning_rate": 6.190908597551226e-07, "loss": 0.6815, "step": 10559 }, { "epoch": 0.84, "grad_norm": 1.425094184879317, "learning_rate": 6.184665997806832e-07, "loss": 0.1733, "step": 10560 }, { "epoch": 0.84, "grad_norm": 1.4626667873287005, "learning_rate": 6.178426339518539e-07, "loss": 0.2131, "step": 10561 }, { "epoch": 0.84, "grad_norm": 9.82423807772496, "learning_rate": 6.172189623105268e-07, "loss": 0.5531, "step": 10562 }, { "epoch": 0.85, "grad_norm": 1.3049301440550842, "learning_rate": 6.165955848985705e-07, "loss": 0.1763, "step": 10563 }, { "epoch": 0.85, "grad_norm": 1.340973208507046, "learning_rate": 6.159725017578338e-07, "loss": 0.1686, "step": 10564 }, { "epoch": 0.85, "grad_norm": 11.119707414439866, "learning_rate": 6.153497129301461e-07, "loss": 0.7096, "step": 10565 }, { "epoch": 0.85, "grad_norm": 1.3365318394036592, "learning_rate": 6.147272184573172e-07, "loss": 0.1779, "step": 10566 }, { "epoch": 0.85, "grad_norm": 1.5019321988976653, "learning_rate": 6.141050183811364e-07, "loss": 0.1671, "step": 10567 }, { "epoch": 0.85, "grad_norm": 1.3240951436671438, "learning_rate": 6.134831127433749e-07, "loss": 0.1821, "step": 10568 }, { "epoch": 0.85, "grad_norm": 1.23077180632172, "learning_rate": 6.128615015857831e-07, "loss": 0.1432, "step": 10569 }, { "epoch": 0.85, "grad_norm": 1.3886829029373806, "learning_rate": 6.122401849500892e-07, "loss": 0.1721, "step": 10570 }, { "epoch": 0.85, "grad_norm": 1.331990608184947, "learning_rate": 6.116191628780088e-07, "loss": 0.1961, "step": 10571 }, { "epoch": 0.85, "grad_norm": 1.4671516418360893, "learning_rate": 6.10998435411228e-07, "loss": 0.1741, "step": 10572 }, { "epoch": 0.85, "grad_norm": 1.224131425694233, "learning_rate": 6.103780025914213e-07, "loss": 0.1606, "step": 10573 }, { "epoch": 0.85, "grad_norm": 1.311785059706187, "learning_rate": 6.097578644602409e-07, "loss": 0.1671, "step": 10574 }, { "epoch": 0.85, "grad_norm": 1.3475492983218158, "learning_rate": 6.091380210593145e-07, "loss": 0.1629, "step": 10575 }, { "epoch": 0.85, "grad_norm": 1.4146782064920833, "learning_rate": 6.085184724302573e-07, "loss": 0.1815, "step": 10576 }, { "epoch": 0.85, "grad_norm": 1.4794577218910716, "learning_rate": 6.07899218614661e-07, "loss": 0.1718, "step": 10577 }, { "epoch": 0.85, "grad_norm": 5.373963825721224, "learning_rate": 6.072802596540978e-07, "loss": 0.5084, "step": 10578 }, { "epoch": 0.85, "grad_norm": 1.2086793299923775, "learning_rate": 6.066615955901189e-07, "loss": 0.1424, "step": 10579 }, { "epoch": 0.85, "grad_norm": 1.2952511224664978, "learning_rate": 6.060432264642601e-07, "loss": 0.1509, "step": 10580 }, { "epoch": 0.85, "grad_norm": 1.4484589906310303, "learning_rate": 6.054251523180316e-07, "loss": 0.1896, "step": 10581 }, { "epoch": 0.85, "grad_norm": 1.274939998354236, "learning_rate": 6.048073731929277e-07, "loss": 0.1508, "step": 10582 }, { "epoch": 0.85, "grad_norm": 1.3822241428217132, "learning_rate": 6.041898891304232e-07, "loss": 0.1841, "step": 10583 }, { "epoch": 0.85, "grad_norm": 1.2742229129461664, "learning_rate": 6.035727001719682e-07, "loss": 0.1957, "step": 10584 }, { "epoch": 0.85, "grad_norm": 1.493077854071611, "learning_rate": 6.029558063589996e-07, "loss": 0.213, "step": 10585 }, { "epoch": 0.85, "grad_norm": 1.3062933900875577, "learning_rate": 6.023392077329298e-07, "loss": 0.1351, "step": 10586 }, { "epoch": 0.85, "grad_norm": 9.483754633905308, "learning_rate": 6.017229043351535e-07, "loss": 0.5208, "step": 10587 }, { "epoch": 0.85, "grad_norm": 1.3166882406360825, "learning_rate": 6.011068962070454e-07, "loss": 0.1834, "step": 10588 }, { "epoch": 0.85, "grad_norm": 1.4238748839588746, "learning_rate": 6.00491183389959e-07, "loss": 0.163, "step": 10589 }, { "epoch": 0.85, "grad_norm": 1.5839068604098732, "learning_rate": 5.998757659252285e-07, "loss": 0.2023, "step": 10590 }, { "epoch": 0.85, "grad_norm": 1.2846631601546437, "learning_rate": 5.99260643854172e-07, "loss": 0.1294, "step": 10591 }, { "epoch": 0.85, "grad_norm": 1.4439416956071258, "learning_rate": 5.986458172180809e-07, "loss": 0.1641, "step": 10592 }, { "epoch": 0.85, "grad_norm": 1.46594038114184, "learning_rate": 5.980312860582305e-07, "loss": 0.1696, "step": 10593 }, { "epoch": 0.85, "grad_norm": 1.5274450308424767, "learning_rate": 5.974170504158794e-07, "loss": 0.2228, "step": 10594 }, { "epoch": 0.85, "grad_norm": 8.42685855904396, "learning_rate": 5.968031103322592e-07, "loss": 0.4682, "step": 10595 }, { "epoch": 0.85, "grad_norm": 1.192632531109942, "learning_rate": 5.961894658485884e-07, "loss": 0.149, "step": 10596 }, { "epoch": 0.85, "grad_norm": 7.378053807274557, "learning_rate": 5.955761170060614e-07, "loss": 0.5758, "step": 10597 }, { "epoch": 0.85, "grad_norm": 1.7145537328717946, "learning_rate": 5.949630638458548e-07, "loss": 0.2066, "step": 10598 }, { "epoch": 0.85, "grad_norm": 1.5403380476082977, "learning_rate": 5.943503064091243e-07, "loss": 0.2104, "step": 10599 }, { "epoch": 0.85, "grad_norm": 5.710931006526699, "learning_rate": 5.937378447370068e-07, "loss": 0.5195, "step": 10600 }, { "epoch": 0.85, "grad_norm": 1.2420568628435795, "learning_rate": 5.931256788706175e-07, "loss": 0.1753, "step": 10601 }, { "epoch": 0.85, "grad_norm": 1.2585527638284753, "learning_rate": 5.925138088510541e-07, "loss": 0.1762, "step": 10602 }, { "epoch": 0.85, "grad_norm": 1.4047215880490322, "learning_rate": 5.919022347193926e-07, "loss": 0.1579, "step": 10603 }, { "epoch": 0.85, "grad_norm": 1.3449013104744025, "learning_rate": 5.912909565166896e-07, "loss": 0.1937, "step": 10604 }, { "epoch": 0.85, "grad_norm": 1.503198894031086, "learning_rate": 5.906799742839842e-07, "loss": 0.1948, "step": 10605 }, { "epoch": 0.85, "grad_norm": 1.280119902006962, "learning_rate": 5.900692880622899e-07, "loss": 0.157, "step": 10606 }, { "epoch": 0.85, "grad_norm": 1.387600039260772, "learning_rate": 5.894588978926069e-07, "loss": 0.1813, "step": 10607 }, { "epoch": 0.85, "grad_norm": 1.4244714486925334, "learning_rate": 5.888488038159112e-07, "loss": 0.1703, "step": 10608 }, { "epoch": 0.85, "grad_norm": 1.4603517493620952, "learning_rate": 5.882390058731607e-07, "loss": 0.1891, "step": 10609 }, { "epoch": 0.85, "grad_norm": 1.4153686565982728, "learning_rate": 5.876295041052932e-07, "loss": 0.1939, "step": 10610 }, { "epoch": 0.85, "grad_norm": 1.3708018147958438, "learning_rate": 5.870202985532253e-07, "loss": 0.1764, "step": 10611 }, { "epoch": 0.85, "grad_norm": 1.4293280795843777, "learning_rate": 5.86411389257856e-07, "loss": 0.1859, "step": 10612 }, { "epoch": 0.85, "grad_norm": 1.4520204756596706, "learning_rate": 5.858027762600632e-07, "loss": 0.1685, "step": 10613 }, { "epoch": 0.85, "grad_norm": 1.399259892432451, "learning_rate": 5.851944596007036e-07, "loss": 0.1463, "step": 10614 }, { "epoch": 0.85, "grad_norm": 1.4839897916603628, "learning_rate": 5.845864393206158e-07, "loss": 0.1371, "step": 10615 }, { "epoch": 0.85, "grad_norm": 1.3670438706079497, "learning_rate": 5.839787154606208e-07, "loss": 0.1513, "step": 10616 }, { "epoch": 0.85, "grad_norm": 1.4792979586309143, "learning_rate": 5.833712880615122e-07, "loss": 0.1984, "step": 10617 }, { "epoch": 0.85, "grad_norm": 1.3932551885664783, "learning_rate": 5.827641571640718e-07, "loss": 0.1232, "step": 10618 }, { "epoch": 0.85, "grad_norm": 1.535431722590136, "learning_rate": 5.821573228090582e-07, "loss": 0.1622, "step": 10619 }, { "epoch": 0.85, "grad_norm": 6.807338066248929, "learning_rate": 5.815507850372077e-07, "loss": 0.6901, "step": 10620 }, { "epoch": 0.85, "grad_norm": 1.3897973085916457, "learning_rate": 5.809445438892408e-07, "loss": 0.1677, "step": 10621 }, { "epoch": 0.85, "grad_norm": 1.4116097718477323, "learning_rate": 5.803385994058558e-07, "loss": 0.1614, "step": 10622 }, { "epoch": 0.85, "grad_norm": 5.098898525817662, "learning_rate": 5.797329516277317e-07, "loss": 0.6058, "step": 10623 }, { "epoch": 0.85, "grad_norm": 1.4215288717268604, "learning_rate": 5.791276005955266e-07, "loss": 0.189, "step": 10624 }, { "epoch": 0.85, "grad_norm": 1.3057289802591212, "learning_rate": 5.785225463498828e-07, "loss": 0.1751, "step": 10625 }, { "epoch": 0.85, "grad_norm": 1.2724361094558154, "learning_rate": 5.779177889314153e-07, "loss": 0.1511, "step": 10626 }, { "epoch": 0.85, "grad_norm": 1.317483034883664, "learning_rate": 5.773133283807242e-07, "loss": 0.1921, "step": 10627 }, { "epoch": 0.85, "grad_norm": 1.6579604976603532, "learning_rate": 5.767091647383916e-07, "loss": 0.1638, "step": 10628 }, { "epoch": 0.85, "grad_norm": 1.361385894563717, "learning_rate": 5.761052980449732e-07, "loss": 0.1702, "step": 10629 }, { "epoch": 0.85, "grad_norm": 1.4987005539239486, "learning_rate": 5.755017283410102e-07, "loss": 0.1954, "step": 10630 }, { "epoch": 0.85, "grad_norm": 1.2134378179620855, "learning_rate": 5.748984556670229e-07, "loss": 0.1689, "step": 10631 }, { "epoch": 0.85, "grad_norm": 12.173148674523707, "learning_rate": 5.742954800635092e-07, "loss": 0.4498, "step": 10632 }, { "epoch": 0.85, "grad_norm": 1.2999402968366802, "learning_rate": 5.736928015709492e-07, "loss": 0.1632, "step": 10633 }, { "epoch": 0.85, "grad_norm": 1.730892312351319, "learning_rate": 5.730904202298026e-07, "loss": 0.1724, "step": 10634 }, { "epoch": 0.85, "grad_norm": 1.3053735325753895, "learning_rate": 5.724883360805095e-07, "loss": 0.1725, "step": 10635 }, { "epoch": 0.85, "grad_norm": 1.5242452414354464, "learning_rate": 5.71886549163489e-07, "loss": 0.2201, "step": 10636 }, { "epoch": 0.85, "grad_norm": 1.4089103139522323, "learning_rate": 5.71285059519141e-07, "loss": 0.2015, "step": 10637 }, { "epoch": 0.85, "grad_norm": 1.340136113570122, "learning_rate": 5.70683867187844e-07, "loss": 0.1539, "step": 10638 }, { "epoch": 0.85, "grad_norm": 1.2574858794164834, "learning_rate": 5.700829722099621e-07, "loss": 0.1654, "step": 10639 }, { "epoch": 0.85, "grad_norm": 1.2545192468012578, "learning_rate": 5.694823746258299e-07, "loss": 0.1714, "step": 10640 }, { "epoch": 0.85, "grad_norm": 1.5946377909347815, "learning_rate": 5.688820744757712e-07, "loss": 0.1479, "step": 10641 }, { "epoch": 0.85, "grad_norm": 1.4158074518883674, "learning_rate": 5.682820718000842e-07, "loss": 0.1772, "step": 10642 }, { "epoch": 0.85, "grad_norm": 1.2227927017458096, "learning_rate": 5.676823666390496e-07, "loss": 0.1723, "step": 10643 }, { "epoch": 0.85, "grad_norm": 1.3296695819257498, "learning_rate": 5.670829590329269e-07, "loss": 0.1726, "step": 10644 }, { "epoch": 0.85, "grad_norm": 1.0793434212961626, "learning_rate": 5.664838490219565e-07, "loss": 0.1197, "step": 10645 }, { "epoch": 0.85, "grad_norm": 8.303235710964803, "learning_rate": 5.658850366463581e-07, "loss": 0.6706, "step": 10646 }, { "epoch": 0.85, "grad_norm": 1.3196752122690611, "learning_rate": 5.652865219463321e-07, "loss": 0.1539, "step": 10647 }, { "epoch": 0.85, "grad_norm": 4.723741662394361, "learning_rate": 5.646883049620589e-07, "loss": 0.5579, "step": 10648 }, { "epoch": 0.85, "grad_norm": 1.366949607399701, "learning_rate": 5.640903857336972e-07, "loss": 0.1671, "step": 10649 }, { "epoch": 0.85, "grad_norm": 1.3584449760063195, "learning_rate": 5.634927643013899e-07, "loss": 0.1716, "step": 10650 }, { "epoch": 0.85, "grad_norm": 1.2110692632425746, "learning_rate": 5.628954407052534e-07, "loss": 0.1502, "step": 10651 }, { "epoch": 0.85, "grad_norm": 1.3576052282926303, "learning_rate": 5.622984149853906e-07, "loss": 0.1512, "step": 10652 }, { "epoch": 0.85, "grad_norm": 9.55089964573737, "learning_rate": 5.617016871818826e-07, "loss": 0.7438, "step": 10653 }, { "epoch": 0.85, "grad_norm": 1.3276497540068664, "learning_rate": 5.611052573347852e-07, "loss": 0.184, "step": 10654 }, { "epoch": 0.85, "grad_norm": 1.2540148809381224, "learning_rate": 5.605091254841427e-07, "loss": 0.1204, "step": 10655 }, { "epoch": 0.85, "grad_norm": 1.4085778618371645, "learning_rate": 5.599132916699735e-07, "loss": 0.1504, "step": 10656 }, { "epoch": 0.85, "grad_norm": 1.3583813078344065, "learning_rate": 5.593177559322776e-07, "loss": 0.1611, "step": 10657 }, { "epoch": 0.85, "grad_norm": 1.2822691300659912, "learning_rate": 5.587225183110362e-07, "loss": 0.1725, "step": 10658 }, { "epoch": 0.85, "grad_norm": 1.267253713850386, "learning_rate": 5.581275788462081e-07, "loss": 0.1907, "step": 10659 }, { "epoch": 0.85, "grad_norm": 1.5352307580405857, "learning_rate": 5.575329375777333e-07, "loss": 0.1933, "step": 10660 }, { "epoch": 0.85, "grad_norm": 11.157385536059648, "learning_rate": 5.569385945455342e-07, "loss": 0.665, "step": 10661 }, { "epoch": 0.85, "grad_norm": 6.1209071136158135, "learning_rate": 5.563445497895087e-07, "loss": 0.5758, "step": 10662 }, { "epoch": 0.85, "grad_norm": 1.5510846194446903, "learning_rate": 5.557508033495356e-07, "loss": 0.1976, "step": 10663 }, { "epoch": 0.85, "grad_norm": 1.56975586978172, "learning_rate": 5.551573552654793e-07, "loss": 0.1634, "step": 10664 }, { "epoch": 0.85, "grad_norm": 1.429251854362159, "learning_rate": 5.545642055771749e-07, "loss": 0.1736, "step": 10665 }, { "epoch": 0.85, "grad_norm": 1.4202109241285006, "learning_rate": 5.539713543244451e-07, "loss": 0.1574, "step": 10666 }, { "epoch": 0.85, "grad_norm": 10.844452706750555, "learning_rate": 5.533788015470898e-07, "loss": 0.6601, "step": 10667 }, { "epoch": 0.85, "grad_norm": 1.4023188170926022, "learning_rate": 5.527865472848881e-07, "loss": 0.1577, "step": 10668 }, { "epoch": 0.85, "grad_norm": 1.3946165271918907, "learning_rate": 5.521945915775989e-07, "loss": 0.1496, "step": 10669 }, { "epoch": 0.85, "grad_norm": 1.2168763992378715, "learning_rate": 5.516029344649649e-07, "loss": 0.1583, "step": 10670 }, { "epoch": 0.85, "grad_norm": 7.317322727025469, "learning_rate": 5.510115759867035e-07, "loss": 0.5213, "step": 10671 }, { "epoch": 0.85, "grad_norm": 1.4299673563390836, "learning_rate": 5.504205161825132e-07, "loss": 0.1527, "step": 10672 }, { "epoch": 0.85, "grad_norm": 1.3929594209464373, "learning_rate": 5.498297550920778e-07, "loss": 0.1767, "step": 10673 }, { "epoch": 0.85, "grad_norm": 1.250503706121312, "learning_rate": 5.492392927550527e-07, "loss": 0.2001, "step": 10674 }, { "epoch": 0.85, "grad_norm": 1.5179472152032525, "learning_rate": 5.486491292110796e-07, "loss": 0.234, "step": 10675 }, { "epoch": 0.85, "grad_norm": 1.5420003597440808, "learning_rate": 5.480592644997778e-07, "loss": 0.1893, "step": 10676 }, { "epoch": 0.85, "grad_norm": 1.5262011594782208, "learning_rate": 5.474696986607464e-07, "loss": 0.1441, "step": 10677 }, { "epoch": 0.85, "grad_norm": 1.1783701722251738, "learning_rate": 5.468804317335647e-07, "loss": 0.1579, "step": 10678 }, { "epoch": 0.85, "grad_norm": 1.3843769445665046, "learning_rate": 5.462914637577921e-07, "loss": 0.1807, "step": 10679 }, { "epoch": 0.85, "grad_norm": 1.4048882880311022, "learning_rate": 5.457027947729676e-07, "loss": 0.2243, "step": 10680 }, { "epoch": 0.85, "grad_norm": 1.3927134109780797, "learning_rate": 5.451144248186108e-07, "loss": 0.1618, "step": 10681 }, { "epoch": 0.85, "grad_norm": 1.3669259670055096, "learning_rate": 5.4452635393422e-07, "loss": 0.1763, "step": 10682 }, { "epoch": 0.85, "grad_norm": 1.2763733629170713, "learning_rate": 5.43938582159274e-07, "loss": 0.1548, "step": 10683 }, { "epoch": 0.85, "grad_norm": 1.3822167284937537, "learning_rate": 5.433511095332339e-07, "loss": 0.1722, "step": 10684 }, { "epoch": 0.85, "grad_norm": 7.548060214449014, "learning_rate": 5.42763936095535e-07, "loss": 0.4781, "step": 10685 }, { "epoch": 0.85, "grad_norm": 1.288812261654522, "learning_rate": 5.421770618855993e-07, "loss": 0.1371, "step": 10686 }, { "epoch": 0.85, "grad_norm": 1.3876410932880585, "learning_rate": 5.415904869428251e-07, "loss": 0.1955, "step": 10687 }, { "epoch": 0.86, "grad_norm": 8.465154381259502, "learning_rate": 5.410042113065877e-07, "loss": 0.4814, "step": 10688 }, { "epoch": 0.86, "grad_norm": 1.3162355070592981, "learning_rate": 5.40418235016249e-07, "loss": 0.1601, "step": 10689 }, { "epoch": 0.86, "grad_norm": 10.161538755306522, "learning_rate": 5.398325581111458e-07, "loss": 0.534, "step": 10690 }, { "epoch": 0.86, "grad_norm": 1.2962372775697666, "learning_rate": 5.392471806305971e-07, "loss": 0.1569, "step": 10691 }, { "epoch": 0.86, "grad_norm": 5.855524527558247, "learning_rate": 5.386621026139006e-07, "loss": 0.5027, "step": 10692 }, { "epoch": 0.86, "grad_norm": 1.2636364632270607, "learning_rate": 5.380773241003345e-07, "loss": 0.1601, "step": 10693 }, { "epoch": 0.86, "grad_norm": 1.2566763585641445, "learning_rate": 5.374928451291556e-07, "loss": 0.1677, "step": 10694 }, { "epoch": 0.86, "grad_norm": 1.497679311190612, "learning_rate": 5.36908665739605e-07, "loss": 0.2121, "step": 10695 }, { "epoch": 0.86, "grad_norm": 1.317051581535928, "learning_rate": 5.36324785970897e-07, "loss": 0.1797, "step": 10696 }, { "epoch": 0.86, "grad_norm": 1.3029451211369458, "learning_rate": 5.357412058622297e-07, "loss": 0.1532, "step": 10697 }, { "epoch": 0.86, "grad_norm": 1.4293064237889748, "learning_rate": 5.351579254527839e-07, "loss": 0.184, "step": 10698 }, { "epoch": 0.86, "grad_norm": 1.2470858045215023, "learning_rate": 5.345749447817117e-07, "loss": 0.1662, "step": 10699 }, { "epoch": 0.86, "grad_norm": 1.3635457116660699, "learning_rate": 5.339922638881545e-07, "loss": 0.1503, "step": 10700 }, { "epoch": 0.86, "grad_norm": 1.4821456405705027, "learning_rate": 5.334098828112283e-07, "loss": 0.1714, "step": 10701 }, { "epoch": 0.86, "grad_norm": 1.3707107659771045, "learning_rate": 5.328278015900296e-07, "loss": 0.1801, "step": 10702 }, { "epoch": 0.86, "grad_norm": 1.3654871727784446, "learning_rate": 5.32246020263636e-07, "loss": 0.1731, "step": 10703 }, { "epoch": 0.86, "grad_norm": 1.4284620545681295, "learning_rate": 5.316645388711033e-07, "loss": 0.1595, "step": 10704 }, { "epoch": 0.86, "grad_norm": 1.4222133609898235, "learning_rate": 5.31083357451469e-07, "loss": 0.1845, "step": 10705 }, { "epoch": 0.86, "grad_norm": 1.2589204197288464, "learning_rate": 5.305024760437489e-07, "loss": 0.1529, "step": 10706 }, { "epoch": 0.86, "grad_norm": 1.5202601878350592, "learning_rate": 5.299218946869394e-07, "loss": 0.1956, "step": 10707 }, { "epoch": 0.86, "grad_norm": 1.539906503052729, "learning_rate": 5.293416134200164e-07, "loss": 0.1833, "step": 10708 }, { "epoch": 0.86, "grad_norm": 1.4676716970709158, "learning_rate": 5.287616322819372e-07, "loss": 0.2112, "step": 10709 }, { "epoch": 0.86, "grad_norm": 1.5142322596846975, "learning_rate": 5.281819513116371e-07, "loss": 0.1984, "step": 10710 }, { "epoch": 0.86, "grad_norm": 1.2372880966812074, "learning_rate": 5.27602570548032e-07, "loss": 0.1265, "step": 10711 }, { "epoch": 0.86, "grad_norm": 1.4559158772102778, "learning_rate": 5.270234900300164e-07, "loss": 0.2008, "step": 10712 }, { "epoch": 0.86, "grad_norm": 1.2967967907460891, "learning_rate": 5.26444709796467e-07, "loss": 0.1492, "step": 10713 }, { "epoch": 0.86, "grad_norm": 1.276132377045814, "learning_rate": 5.258662298862383e-07, "loss": 0.1441, "step": 10714 }, { "epoch": 0.86, "grad_norm": 1.188062824533826, "learning_rate": 5.252880503381658e-07, "loss": 0.168, "step": 10715 }, { "epoch": 0.86, "grad_norm": 1.4688078496630812, "learning_rate": 5.247101711910646e-07, "loss": 0.1897, "step": 10716 }, { "epoch": 0.86, "grad_norm": 1.519678326359524, "learning_rate": 5.241325924837277e-07, "loss": 0.1606, "step": 10717 }, { "epoch": 0.86, "grad_norm": 1.5530452508088857, "learning_rate": 5.235553142549338e-07, "loss": 0.2072, "step": 10718 }, { "epoch": 0.86, "grad_norm": 1.3116130786746796, "learning_rate": 5.229783365434321e-07, "loss": 0.1436, "step": 10719 }, { "epoch": 0.86, "grad_norm": 1.4182911164374756, "learning_rate": 5.22401659387961e-07, "loss": 0.1673, "step": 10720 }, { "epoch": 0.86, "grad_norm": 1.3902968225946, "learning_rate": 5.21825282827233e-07, "loss": 0.1654, "step": 10721 }, { "epoch": 0.86, "grad_norm": 1.3444171053538785, "learning_rate": 5.212492068999414e-07, "loss": 0.1618, "step": 10722 }, { "epoch": 0.86, "grad_norm": 1.3255262001266808, "learning_rate": 5.206734316447615e-07, "loss": 0.1678, "step": 10723 }, { "epoch": 0.86, "grad_norm": 1.3777758351988492, "learning_rate": 5.200979571003456e-07, "loss": 0.2073, "step": 10724 }, { "epoch": 0.86, "grad_norm": 1.323300414159089, "learning_rate": 5.195227833053273e-07, "loss": 0.1373, "step": 10725 }, { "epoch": 0.86, "grad_norm": 1.58545925403752, "learning_rate": 5.189479102983203e-07, "loss": 0.1887, "step": 10726 }, { "epoch": 0.86, "grad_norm": 1.2895156186280978, "learning_rate": 5.183733381179168e-07, "loss": 0.1829, "step": 10727 }, { "epoch": 0.86, "grad_norm": 1.4082991209466291, "learning_rate": 5.177990668026889e-07, "loss": 0.2108, "step": 10728 }, { "epoch": 0.86, "grad_norm": 1.3676593754975992, "learning_rate": 5.172250963911923e-07, "loss": 0.1914, "step": 10729 }, { "epoch": 0.86, "grad_norm": 1.3257451261519173, "learning_rate": 5.166514269219546e-07, "loss": 0.1732, "step": 10730 }, { "epoch": 0.86, "grad_norm": 1.266770315363688, "learning_rate": 5.160780584334923e-07, "loss": 0.1641, "step": 10731 }, { "epoch": 0.86, "grad_norm": 1.615752223659782, "learning_rate": 5.155049909642967e-07, "loss": 0.1984, "step": 10732 }, { "epoch": 0.86, "grad_norm": 1.4719864971448822, "learning_rate": 5.149322245528365e-07, "loss": 0.146, "step": 10733 }, { "epoch": 0.86, "grad_norm": 7.396251805725455, "learning_rate": 5.14359759237566e-07, "loss": 0.4866, "step": 10734 }, { "epoch": 0.86, "grad_norm": 1.323175011068097, "learning_rate": 5.13787595056916e-07, "loss": 0.1254, "step": 10735 }, { "epoch": 0.86, "grad_norm": 1.5753289130588009, "learning_rate": 5.132157320492975e-07, "loss": 0.1731, "step": 10736 }, { "epoch": 0.86, "grad_norm": 1.4173567935064983, "learning_rate": 5.12644170253101e-07, "loss": 0.183, "step": 10737 }, { "epoch": 0.86, "grad_norm": 1.4245674365390482, "learning_rate": 5.120729097066978e-07, "loss": 0.148, "step": 10738 }, { "epoch": 0.86, "grad_norm": 1.3092688966148205, "learning_rate": 5.11501950448437e-07, "loss": 0.1524, "step": 10739 }, { "epoch": 0.86, "grad_norm": 1.3343046111134684, "learning_rate": 5.10931292516652e-07, "loss": 0.1505, "step": 10740 }, { "epoch": 0.86, "grad_norm": 1.4272431085704738, "learning_rate": 5.103609359496492e-07, "loss": 0.1678, "step": 10741 }, { "epoch": 0.86, "grad_norm": 1.38841904232489, "learning_rate": 5.097908807857193e-07, "loss": 0.1772, "step": 10742 }, { "epoch": 0.86, "grad_norm": 1.3657522748499518, "learning_rate": 5.092211270631343e-07, "loss": 0.21, "step": 10743 }, { "epoch": 0.86, "grad_norm": 6.596545760535399, "learning_rate": 5.086516748201393e-07, "loss": 0.4536, "step": 10744 }, { "epoch": 0.86, "grad_norm": 1.320915279745972, "learning_rate": 5.080825240949672e-07, "loss": 0.1655, "step": 10745 }, { "epoch": 0.86, "grad_norm": 1.4090026460298837, "learning_rate": 5.075136749258248e-07, "loss": 0.1999, "step": 10746 }, { "epoch": 0.86, "grad_norm": 6.404031127141463, "learning_rate": 5.069451273509007e-07, "loss": 0.5957, "step": 10747 }, { "epoch": 0.86, "grad_norm": 1.4299705284045514, "learning_rate": 5.063768814083641e-07, "loss": 0.1968, "step": 10748 }, { "epoch": 0.86, "grad_norm": 1.6480755634554847, "learning_rate": 5.058089371363628e-07, "loss": 0.2095, "step": 10749 }, { "epoch": 0.86, "grad_norm": 1.2079254220657192, "learning_rate": 5.05241294573024e-07, "loss": 0.1613, "step": 10750 }, { "epoch": 0.86, "grad_norm": 1.2062723463067426, "learning_rate": 5.046739537564549e-07, "loss": 0.1426, "step": 10751 }, { "epoch": 0.86, "grad_norm": 1.3430996595014446, "learning_rate": 5.041069147247452e-07, "loss": 0.1653, "step": 10752 }, { "epoch": 0.86, "grad_norm": 1.4539651543284005, "learning_rate": 5.035401775159588e-07, "loss": 0.1716, "step": 10753 }, { "epoch": 0.86, "grad_norm": 1.2267534771353652, "learning_rate": 5.029737421681446e-07, "loss": 0.1503, "step": 10754 }, { "epoch": 0.86, "grad_norm": 1.31927656464137, "learning_rate": 5.024076087193292e-07, "loss": 0.1661, "step": 10755 }, { "epoch": 0.86, "grad_norm": 1.3023593501530626, "learning_rate": 5.018417772075174e-07, "loss": 0.1449, "step": 10756 }, { "epoch": 0.86, "grad_norm": 1.3119511005733342, "learning_rate": 5.012762476706961e-07, "loss": 0.1639, "step": 10757 }, { "epoch": 0.86, "grad_norm": 10.397542959684994, "learning_rate": 5.007110201468312e-07, "loss": 0.4112, "step": 10758 }, { "epoch": 0.86, "grad_norm": 1.532501705355364, "learning_rate": 5.001460946738679e-07, "loss": 0.1848, "step": 10759 }, { "epoch": 0.86, "grad_norm": 1.450439442903786, "learning_rate": 4.995814712897312e-07, "loss": 0.1909, "step": 10760 }, { "epoch": 0.86, "grad_norm": 1.2524635849564814, "learning_rate": 4.99017150032326e-07, "loss": 0.1929, "step": 10761 }, { "epoch": 0.86, "grad_norm": 1.21217028037563, "learning_rate": 4.984531309395357e-07, "loss": 0.1267, "step": 10762 }, { "epoch": 0.86, "grad_norm": 1.4286413043166686, "learning_rate": 4.978894140492279e-07, "loss": 0.1704, "step": 10763 }, { "epoch": 0.86, "grad_norm": 1.2487583103839746, "learning_rate": 4.97325999399243e-07, "loss": 0.1367, "step": 10764 }, { "epoch": 0.86, "grad_norm": 1.302547225535744, "learning_rate": 4.967628870274071e-07, "loss": 0.148, "step": 10765 }, { "epoch": 0.86, "grad_norm": 1.3612114656572132, "learning_rate": 4.962000769715236e-07, "loss": 0.1576, "step": 10766 }, { "epoch": 0.86, "grad_norm": 1.416346413575944, "learning_rate": 4.956375692693732e-07, "loss": 0.1676, "step": 10767 }, { "epoch": 0.86, "grad_norm": 1.424282498461266, "learning_rate": 4.950753639587214e-07, "loss": 0.1827, "step": 10768 }, { "epoch": 0.86, "grad_norm": 1.3497385518522562, "learning_rate": 4.9451346107731e-07, "loss": 0.1687, "step": 10769 }, { "epoch": 0.86, "grad_norm": 8.228783537633953, "learning_rate": 4.939518606628607e-07, "loss": 0.5702, "step": 10770 }, { "epoch": 0.86, "grad_norm": 1.3061232290681442, "learning_rate": 4.933905627530761e-07, "loss": 0.1269, "step": 10771 }, { "epoch": 0.86, "grad_norm": 1.1823576704238843, "learning_rate": 4.92829567385637e-07, "loss": 0.1197, "step": 10772 }, { "epoch": 0.86, "grad_norm": 1.3525085853428738, "learning_rate": 4.922688745982046e-07, "loss": 0.1476, "step": 10773 }, { "epoch": 0.86, "grad_norm": 1.473038333807972, "learning_rate": 4.917084844284225e-07, "loss": 0.1819, "step": 10774 }, { "epoch": 0.86, "grad_norm": 1.2857561415888838, "learning_rate": 4.911483969139086e-07, "loss": 0.1668, "step": 10775 }, { "epoch": 0.86, "grad_norm": 1.4338573692401042, "learning_rate": 4.905886120922626e-07, "loss": 0.1987, "step": 10776 }, { "epoch": 0.86, "grad_norm": 1.439769701814589, "learning_rate": 4.900291300010684e-07, "loss": 0.1822, "step": 10777 }, { "epoch": 0.86, "grad_norm": 1.639885417213558, "learning_rate": 4.894699506778816e-07, "loss": 0.1923, "step": 10778 }, { "epoch": 0.86, "grad_norm": 1.2570515971613156, "learning_rate": 4.889110741602437e-07, "loss": 0.1821, "step": 10779 }, { "epoch": 0.86, "grad_norm": 1.3190793801263405, "learning_rate": 4.883525004856738e-07, "loss": 0.1588, "step": 10780 }, { "epoch": 0.86, "grad_norm": 1.2685335737114225, "learning_rate": 4.877942296916704e-07, "loss": 0.1568, "step": 10781 }, { "epoch": 0.86, "grad_norm": 1.3179934732928813, "learning_rate": 4.872362618157117e-07, "loss": 0.1878, "step": 10782 }, { "epoch": 0.86, "grad_norm": 1.2987883971390568, "learning_rate": 4.866785968952559e-07, "loss": 0.1501, "step": 10783 }, { "epoch": 0.86, "grad_norm": 1.5489665768860346, "learning_rate": 4.861212349677408e-07, "loss": 0.1947, "step": 10784 }, { "epoch": 0.86, "grad_norm": 1.4832335057118036, "learning_rate": 4.855641760705837e-07, "loss": 0.1692, "step": 10785 }, { "epoch": 0.86, "grad_norm": 1.4413926151169387, "learning_rate": 4.850074202411814e-07, "loss": 0.2114, "step": 10786 }, { "epoch": 0.86, "grad_norm": 1.373933697097091, "learning_rate": 4.844509675169101e-07, "loss": 0.1594, "step": 10787 }, { "epoch": 0.86, "grad_norm": 1.525182960616275, "learning_rate": 4.838948179351288e-07, "loss": 0.1774, "step": 10788 }, { "epoch": 0.86, "grad_norm": 1.2897896412497687, "learning_rate": 4.833389715331699e-07, "loss": 0.1445, "step": 10789 }, { "epoch": 0.86, "grad_norm": 1.37918614791825, "learning_rate": 4.827834283483513e-07, "loss": 0.1998, "step": 10790 }, { "epoch": 0.86, "grad_norm": 7.313756174988163, "learning_rate": 4.822281884179681e-07, "loss": 0.4548, "step": 10791 }, { "epoch": 0.86, "grad_norm": 1.1670251074380418, "learning_rate": 4.816732517792949e-07, "loss": 0.149, "step": 10792 }, { "epoch": 0.86, "grad_norm": 1.2564124482055126, "learning_rate": 4.81118618469586e-07, "loss": 0.1368, "step": 10793 }, { "epoch": 0.86, "grad_norm": 1.2466733723568582, "learning_rate": 4.805642885260764e-07, "loss": 0.1549, "step": 10794 }, { "epoch": 0.86, "grad_norm": 1.4223359751903752, "learning_rate": 4.800102619859792e-07, "loss": 0.1387, "step": 10795 }, { "epoch": 0.86, "grad_norm": 1.3861661837478807, "learning_rate": 4.794565388864869e-07, "loss": 0.1563, "step": 10796 }, { "epoch": 0.86, "grad_norm": 1.467508216815393, "learning_rate": 4.789031192647758e-07, "loss": 0.1932, "step": 10797 }, { "epoch": 0.86, "grad_norm": 1.4063786463582348, "learning_rate": 4.783500031579952e-07, "loss": 0.1854, "step": 10798 }, { "epoch": 0.86, "grad_norm": 1.3966460858230294, "learning_rate": 4.777971906032797e-07, "loss": 0.1785, "step": 10799 }, { "epoch": 0.86, "grad_norm": 1.5076328423209329, "learning_rate": 4.772446816377408e-07, "loss": 0.2023, "step": 10800 }, { "epoch": 0.86, "grad_norm": 9.975669748014996, "learning_rate": 4.7669247629846926e-07, "loss": 0.4406, "step": 10801 }, { "epoch": 0.86, "grad_norm": 1.270288084306112, "learning_rate": 4.761405746225373e-07, "loss": 0.1589, "step": 10802 }, { "epoch": 0.86, "grad_norm": 1.5511397758411156, "learning_rate": 4.755889766469951e-07, "loss": 0.2187, "step": 10803 }, { "epoch": 0.86, "grad_norm": 1.1754300006033265, "learning_rate": 4.7503768240887313e-07, "loss": 0.1288, "step": 10804 }, { "epoch": 0.86, "grad_norm": 1.3361724472095928, "learning_rate": 4.744866919451824e-07, "loss": 0.1438, "step": 10805 }, { "epoch": 0.86, "grad_norm": 7.382646187136116, "learning_rate": 4.7393600529291096e-07, "loss": 0.5683, "step": 10806 }, { "epoch": 0.86, "grad_norm": 1.525595422318339, "learning_rate": 4.733856224890282e-07, "loss": 0.1768, "step": 10807 }, { "epoch": 0.86, "grad_norm": 11.818702062891521, "learning_rate": 4.7283554357048566e-07, "loss": 0.629, "step": 10808 }, { "epoch": 0.86, "grad_norm": 6.557497888484463, "learning_rate": 4.7228576857420883e-07, "loss": 0.6591, "step": 10809 }, { "epoch": 0.86, "grad_norm": 1.3753639150594845, "learning_rate": 4.7173629753710595e-07, "loss": 0.1187, "step": 10810 }, { "epoch": 0.86, "grad_norm": 1.49871510291026, "learning_rate": 4.711871304960675e-07, "loss": 0.1672, "step": 10811 }, { "epoch": 0.86, "grad_norm": 1.297628754275986, "learning_rate": 4.706382674879567e-07, "loss": 0.1963, "step": 10812 }, { "epoch": 0.87, "grad_norm": 1.4704846987786728, "learning_rate": 4.7008970854962363e-07, "loss": 0.2076, "step": 10813 }, { "epoch": 0.87, "grad_norm": 1.184003159461943, "learning_rate": 4.695414537178933e-07, "loss": 0.1754, "step": 10814 }, { "epoch": 0.87, "grad_norm": 1.2868423575961738, "learning_rate": 4.689935030295717e-07, "loss": 0.1578, "step": 10815 }, { "epoch": 0.87, "grad_norm": 1.3664872436691053, "learning_rate": 4.68445856521445e-07, "loss": 0.1766, "step": 10816 }, { "epoch": 0.87, "grad_norm": 1.415791641927633, "learning_rate": 4.678985142302778e-07, "loss": 0.1794, "step": 10817 }, { "epoch": 0.87, "grad_norm": 1.4222051500911845, "learning_rate": 4.6735147619281495e-07, "loss": 0.1641, "step": 10818 }, { "epoch": 0.87, "grad_norm": 1.2977619644460066, "learning_rate": 4.668047424457811e-07, "loss": 0.1562, "step": 10819 }, { "epoch": 0.87, "grad_norm": 1.3747404818840225, "learning_rate": 4.662583130258796e-07, "loss": 0.1599, "step": 10820 }, { "epoch": 0.87, "grad_norm": 1.49567819586526, "learning_rate": 4.65712187969794e-07, "loss": 0.1815, "step": 10821 }, { "epoch": 0.87, "grad_norm": 1.259701534891147, "learning_rate": 4.6516636731418875e-07, "loss": 0.1601, "step": 10822 }, { "epoch": 0.87, "grad_norm": 1.6270672832231368, "learning_rate": 4.64620851095704e-07, "loss": 0.1758, "step": 10823 }, { "epoch": 0.87, "grad_norm": 1.2823394066127363, "learning_rate": 4.640756393509638e-07, "loss": 0.1665, "step": 10824 }, { "epoch": 0.87, "grad_norm": 1.293474583669807, "learning_rate": 4.6353073211656886e-07, "loss": 0.1854, "step": 10825 }, { "epoch": 0.87, "grad_norm": 1.5014117271963903, "learning_rate": 4.6298612942910105e-07, "loss": 0.2134, "step": 10826 }, { "epoch": 0.87, "grad_norm": 1.4169576569551785, "learning_rate": 4.624418313251211e-07, "loss": 0.1654, "step": 10827 }, { "epoch": 0.87, "grad_norm": 1.4653698053638866, "learning_rate": 4.6189783784116927e-07, "loss": 0.1809, "step": 10828 }, { "epoch": 0.87, "grad_norm": 1.2649154153665856, "learning_rate": 4.613541490137657e-07, "loss": 0.162, "step": 10829 }, { "epoch": 0.87, "grad_norm": 1.3698287618284197, "learning_rate": 4.608107648794091e-07, "loss": 0.183, "step": 10830 }, { "epoch": 0.87, "grad_norm": 14.837131370819016, "learning_rate": 4.602676854745797e-07, "loss": 0.5171, "step": 10831 }, { "epoch": 0.87, "grad_norm": 1.3555949438615669, "learning_rate": 4.597249108357338e-07, "loss": 0.172, "step": 10832 }, { "epoch": 0.87, "grad_norm": 1.3235932939104549, "learning_rate": 4.591824409993134e-07, "loss": 0.1585, "step": 10833 }, { "epoch": 0.87, "grad_norm": 1.2457131580146856, "learning_rate": 4.586402760017322e-07, "loss": 0.162, "step": 10834 }, { "epoch": 0.87, "grad_norm": 6.591793963448041, "learning_rate": 4.580984158793894e-07, "loss": 0.4505, "step": 10835 }, { "epoch": 0.87, "grad_norm": 1.229849529708539, "learning_rate": 4.5755686066866313e-07, "loss": 0.1367, "step": 10836 }, { "epoch": 0.87, "grad_norm": 15.262851854442323, "learning_rate": 4.5701561040590534e-07, "loss": 0.408, "step": 10837 }, { "epoch": 0.87, "grad_norm": 9.371548020655865, "learning_rate": 4.564746651274554e-07, "loss": 0.6073, "step": 10838 }, { "epoch": 0.87, "grad_norm": 1.3818505976355937, "learning_rate": 4.5593402486962757e-07, "loss": 0.1495, "step": 10839 }, { "epoch": 0.87, "grad_norm": 1.319194960967024, "learning_rate": 4.553936896687161e-07, "loss": 0.1602, "step": 10840 }, { "epoch": 0.87, "grad_norm": 1.5173042284823384, "learning_rate": 4.548536595609959e-07, "loss": 0.1856, "step": 10841 }, { "epoch": 0.87, "grad_norm": 1.2479264111781918, "learning_rate": 4.543139345827219e-07, "loss": 0.1477, "step": 10842 }, { "epoch": 0.87, "grad_norm": 1.4781360673786108, "learning_rate": 4.5377451477012446e-07, "loss": 0.1487, "step": 10843 }, { "epoch": 0.87, "grad_norm": 6.714765408001884, "learning_rate": 4.532354001594197e-07, "loss": 0.7003, "step": 10844 }, { "epoch": 0.87, "grad_norm": 1.5462426691874387, "learning_rate": 4.5269659078679973e-07, "loss": 0.216, "step": 10845 }, { "epoch": 0.87, "grad_norm": 1.3214280166045052, "learning_rate": 4.5215808668843284e-07, "loss": 0.1818, "step": 10846 }, { "epoch": 0.87, "grad_norm": 1.5339132592964004, "learning_rate": 4.516198879004746e-07, "loss": 0.1742, "step": 10847 }, { "epoch": 0.87, "grad_norm": 1.3870471557520077, "learning_rate": 4.5108199445905387e-07, "loss": 0.1821, "step": 10848 }, { "epoch": 0.87, "grad_norm": 1.3420032327808804, "learning_rate": 4.5054440640028173e-07, "loss": 0.1571, "step": 10849 }, { "epoch": 0.87, "grad_norm": 1.4189815382376711, "learning_rate": 4.5000712376024826e-07, "loss": 0.1713, "step": 10850 }, { "epoch": 0.87, "grad_norm": 1.5888482292040853, "learning_rate": 4.494701465750217e-07, "loss": 0.2116, "step": 10851 }, { "epoch": 0.87, "grad_norm": 1.3505876452660421, "learning_rate": 4.489334748806518e-07, "loss": 0.1467, "step": 10852 }, { "epoch": 0.87, "grad_norm": 6.350048776561533, "learning_rate": 4.4839710871316776e-07, "loss": 0.6005, "step": 10853 }, { "epoch": 0.87, "grad_norm": 1.127126537109826, "learning_rate": 4.478610481085766e-07, "loss": 0.1335, "step": 10854 }, { "epoch": 0.87, "grad_norm": 1.490094509043298, "learning_rate": 4.473252931028643e-07, "loss": 0.1862, "step": 10855 }, { "epoch": 0.87, "grad_norm": 1.4997920218893166, "learning_rate": 4.4678984373200117e-07, "loss": 0.1784, "step": 10856 }, { "epoch": 0.87, "grad_norm": 1.557147374164674, "learning_rate": 4.462547000319295e-07, "loss": 0.2157, "step": 10857 }, { "epoch": 0.87, "grad_norm": 1.3226962037582275, "learning_rate": 4.4571986203857777e-07, "loss": 0.1489, "step": 10858 }, { "epoch": 0.87, "grad_norm": 1.3237092307333724, "learning_rate": 4.451853297878511e-07, "loss": 0.1453, "step": 10859 }, { "epoch": 0.87, "grad_norm": 1.2593725537732356, "learning_rate": 4.446511033156337e-07, "loss": 0.1249, "step": 10860 }, { "epoch": 0.87, "grad_norm": 1.2740839148635874, "learning_rate": 4.4411718265778946e-07, "loss": 0.1708, "step": 10861 }, { "epoch": 0.87, "grad_norm": 1.560343060647224, "learning_rate": 4.435835678501632e-07, "loss": 0.1947, "step": 10862 }, { "epoch": 0.87, "grad_norm": 1.414233442714087, "learning_rate": 4.430502589285768e-07, "loss": 0.1787, "step": 10863 }, { "epoch": 0.87, "grad_norm": 1.240280994101839, "learning_rate": 4.4251725592883374e-07, "loss": 0.1377, "step": 10864 }, { "epoch": 0.87, "grad_norm": 1.4033701362237134, "learning_rate": 4.419845588867161e-07, "loss": 0.172, "step": 10865 }, { "epoch": 0.87, "grad_norm": 1.216382774076751, "learning_rate": 4.4145216783798404e-07, "loss": 0.1315, "step": 10866 }, { "epoch": 0.87, "grad_norm": 1.3779752815586535, "learning_rate": 4.4092008281838183e-07, "loss": 0.1866, "step": 10867 }, { "epoch": 0.87, "grad_norm": 1.4186357924391155, "learning_rate": 4.403883038636264e-07, "loss": 0.2068, "step": 10868 }, { "epoch": 0.87, "grad_norm": 1.4300458050000626, "learning_rate": 4.3985683100941976e-07, "loss": 0.189, "step": 10869 }, { "epoch": 0.87, "grad_norm": 1.4693112917705078, "learning_rate": 4.3932566429144175e-07, "loss": 0.2197, "step": 10870 }, { "epoch": 0.87, "grad_norm": 1.2472156478581446, "learning_rate": 4.387948037453482e-07, "loss": 0.1389, "step": 10871 }, { "epoch": 0.87, "grad_norm": 1.3319265849427564, "learning_rate": 4.382642494067801e-07, "loss": 0.1287, "step": 10872 }, { "epoch": 0.87, "grad_norm": 1.3022825219310923, "learning_rate": 4.377340013113551e-07, "loss": 0.1584, "step": 10873 }, { "epoch": 0.87, "grad_norm": 1.3414097197806183, "learning_rate": 4.372040594946692e-07, "loss": 0.1943, "step": 10874 }, { "epoch": 0.87, "grad_norm": 1.5054932744572576, "learning_rate": 4.3667442399229985e-07, "loss": 0.2043, "step": 10875 }, { "epoch": 0.87, "grad_norm": 1.3250410655203841, "learning_rate": 4.361450948398027e-07, "loss": 0.1562, "step": 10876 }, { "epoch": 0.87, "grad_norm": 1.2595182993460818, "learning_rate": 4.356160720727121e-07, "loss": 0.146, "step": 10877 }, { "epoch": 0.87, "grad_norm": 1.2216303329216809, "learning_rate": 4.3508735572654505e-07, "loss": 0.1702, "step": 10878 }, { "epoch": 0.87, "grad_norm": 1.302197874130371, "learning_rate": 4.3455894583679606e-07, "loss": 0.1535, "step": 10879 }, { "epoch": 0.87, "grad_norm": 1.4700381280069024, "learning_rate": 4.3403084243893556e-07, "loss": 0.1799, "step": 10880 }, { "epoch": 0.87, "grad_norm": 1.8225266585250548, "learning_rate": 4.3350304556842024e-07, "loss": 0.1535, "step": 10881 }, { "epoch": 0.87, "grad_norm": 1.3879002615969367, "learning_rate": 4.3297555526068124e-07, "loss": 0.1784, "step": 10882 }, { "epoch": 0.87, "grad_norm": 1.3632825827868034, "learning_rate": 4.3244837155113073e-07, "loss": 0.1903, "step": 10883 }, { "epoch": 0.87, "grad_norm": 1.3470137072999415, "learning_rate": 4.3192149447516043e-07, "loss": 0.1692, "step": 10884 }, { "epoch": 0.87, "grad_norm": 1.981848215763743, "learning_rate": 4.3139492406814086e-07, "loss": 0.2152, "step": 10885 }, { "epoch": 0.87, "grad_norm": 1.3341174056670826, "learning_rate": 4.308686603654211e-07, "loss": 0.1705, "step": 10886 }, { "epoch": 0.87, "grad_norm": 7.6414093777774434, "learning_rate": 4.3034270340233444e-07, "loss": 0.5674, "step": 10887 }, { "epoch": 0.87, "grad_norm": 1.8388214873263853, "learning_rate": 4.2981705321418596e-07, "loss": 0.1773, "step": 10888 }, { "epoch": 0.87, "grad_norm": 1.3487742879912015, "learning_rate": 4.292917098362653e-07, "loss": 0.1644, "step": 10889 }, { "epoch": 0.87, "grad_norm": 1.4780184186332737, "learning_rate": 4.2876667330384315e-07, "loss": 0.1746, "step": 10890 }, { "epoch": 0.87, "grad_norm": 7.473981077003753, "learning_rate": 4.282419436521618e-07, "loss": 0.4217, "step": 10891 }, { "epoch": 0.87, "grad_norm": 1.423379379268459, "learning_rate": 4.2771752091645246e-07, "loss": 0.2166, "step": 10892 }, { "epoch": 0.87, "grad_norm": 1.4533622077121109, "learning_rate": 4.271934051319188e-07, "loss": 0.1994, "step": 10893 }, { "epoch": 0.87, "grad_norm": 7.517224369234004, "learning_rate": 4.2666959633374693e-07, "loss": 0.509, "step": 10894 }, { "epoch": 0.87, "grad_norm": 6.727207902450283, "learning_rate": 4.261460945571017e-07, "loss": 0.4944, "step": 10895 }, { "epoch": 0.87, "grad_norm": 1.3708848107942913, "learning_rate": 4.256228998371276e-07, "loss": 0.1584, "step": 10896 }, { "epoch": 0.87, "grad_norm": 1.5022227487567108, "learning_rate": 4.251000122089477e-07, "loss": 0.1804, "step": 10897 }, { "epoch": 0.87, "grad_norm": 1.3010065062471772, "learning_rate": 4.2457743170766565e-07, "loss": 0.1498, "step": 10898 }, { "epoch": 0.87, "grad_norm": 1.2065311568646544, "learning_rate": 4.2405515836836384e-07, "loss": 0.1544, "step": 10899 }, { "epoch": 0.87, "grad_norm": 8.345209053360188, "learning_rate": 4.2353319222610265e-07, "loss": 0.5977, "step": 10900 }, { "epoch": 0.87, "grad_norm": 1.489633700460956, "learning_rate": 4.2301153331592627e-07, "loss": 0.1559, "step": 10901 }, { "epoch": 0.87, "grad_norm": 1.2685997682648158, "learning_rate": 4.2249018167285104e-07, "loss": 0.1727, "step": 10902 }, { "epoch": 0.87, "grad_norm": 1.3209471690839587, "learning_rate": 4.219691373318807e-07, "loss": 0.1632, "step": 10903 }, { "epoch": 0.87, "grad_norm": 1.4165597418682605, "learning_rate": 4.2144840032799283e-07, "loss": 0.1975, "step": 10904 }, { "epoch": 0.87, "grad_norm": 1.5326772159473, "learning_rate": 4.2092797069614667e-07, "loss": 0.1728, "step": 10905 }, { "epoch": 0.87, "grad_norm": 1.3957537367840924, "learning_rate": 4.2040784847127925e-07, "loss": 0.1991, "step": 10906 }, { "epoch": 0.87, "grad_norm": 1.3673420129629266, "learning_rate": 4.1988803368830934e-07, "loss": 0.1513, "step": 10907 }, { "epoch": 0.87, "grad_norm": 1.2924918648047796, "learning_rate": 4.193685263821323e-07, "loss": 0.1297, "step": 10908 }, { "epoch": 0.87, "grad_norm": 1.5949097165949266, "learning_rate": 4.1884932658762536e-07, "loss": 0.1897, "step": 10909 }, { "epoch": 0.87, "grad_norm": 1.67150128914668, "learning_rate": 4.183304343396427e-07, "loss": 0.2001, "step": 10910 }, { "epoch": 0.87, "grad_norm": 1.7539800429875156, "learning_rate": 4.1781184967301934e-07, "loss": 0.2051, "step": 10911 }, { "epoch": 0.87, "grad_norm": 1.4904017167281327, "learning_rate": 4.1729357262257185e-07, "loss": 0.1808, "step": 10912 }, { "epoch": 0.87, "grad_norm": 1.2448141904210848, "learning_rate": 4.167756032230902e-07, "loss": 0.1479, "step": 10913 }, { "epoch": 0.87, "grad_norm": 1.411363680582119, "learning_rate": 4.1625794150934986e-07, "loss": 0.1636, "step": 10914 }, { "epoch": 0.87, "grad_norm": 1.438723899241405, "learning_rate": 4.15740587516103e-07, "loss": 0.1894, "step": 10915 }, { "epoch": 0.87, "grad_norm": 1.3504406299230292, "learning_rate": 4.1522354127807807e-07, "loss": 0.1583, "step": 10916 }, { "epoch": 0.87, "grad_norm": 1.4667402732974089, "learning_rate": 4.147068028299894e-07, "loss": 0.2029, "step": 10917 }, { "epoch": 0.87, "grad_norm": 1.5541346607212239, "learning_rate": 4.14190372206526e-07, "loss": 0.2054, "step": 10918 }, { "epoch": 0.87, "grad_norm": 4.0853948355321, "learning_rate": 4.136742494423573e-07, "loss": 0.5319, "step": 10919 }, { "epoch": 0.87, "grad_norm": 1.5027997779344031, "learning_rate": 4.131584345721312e-07, "loss": 0.2093, "step": 10920 }, { "epoch": 0.87, "grad_norm": 1.3504262149292312, "learning_rate": 4.1264292763047886e-07, "loss": 0.169, "step": 10921 }, { "epoch": 0.87, "grad_norm": 1.5029131287446185, "learning_rate": 4.1212772865200365e-07, "loss": 0.168, "step": 10922 }, { "epoch": 0.87, "grad_norm": 1.379462950587084, "learning_rate": 4.1161283767129636e-07, "loss": 0.161, "step": 10923 }, { "epoch": 0.87, "grad_norm": 1.2674442395480743, "learning_rate": 4.1109825472292196e-07, "loss": 0.1572, "step": 10924 }, { "epoch": 0.87, "grad_norm": 1.4053414716144796, "learning_rate": 4.1058397984142405e-07, "loss": 0.185, "step": 10925 }, { "epoch": 0.87, "grad_norm": 1.362549981955955, "learning_rate": 4.1007001306132943e-07, "loss": 0.1858, "step": 10926 }, { "epoch": 0.87, "grad_norm": 1.437342282821647, "learning_rate": 4.0955635441714216e-07, "loss": 0.1721, "step": 10927 }, { "epoch": 0.87, "grad_norm": 1.3327197547260632, "learning_rate": 4.0904300394334515e-07, "loss": 0.1596, "step": 10928 }, { "epoch": 0.87, "grad_norm": 1.399636838110795, "learning_rate": 4.0852996167440153e-07, "loss": 0.1865, "step": 10929 }, { "epoch": 0.87, "grad_norm": 1.3132224334418692, "learning_rate": 4.0801722764475303e-07, "loss": 0.1499, "step": 10930 }, { "epoch": 0.87, "grad_norm": 1.3943536674854309, "learning_rate": 4.075048018888211e-07, "loss": 0.1754, "step": 10931 }, { "epoch": 0.87, "grad_norm": 1.3014529381268565, "learning_rate": 4.069926844410066e-07, "loss": 0.1609, "step": 10932 }, { "epoch": 0.87, "grad_norm": 1.430002171585103, "learning_rate": 4.0648087533568913e-07, "loss": 0.1678, "step": 10933 }, { "epoch": 0.87, "grad_norm": 7.85925357905726, "learning_rate": 4.0596937460722795e-07, "loss": 0.5752, "step": 10934 }, { "epoch": 0.87, "grad_norm": 1.5819819719618007, "learning_rate": 4.0545818228996336e-07, "loss": 0.225, "step": 10935 }, { "epoch": 0.87, "grad_norm": 1.5142029765709426, "learning_rate": 4.049472984182107e-07, "loss": 0.1638, "step": 10936 }, { "epoch": 0.87, "grad_norm": 1.3899194760551206, "learning_rate": 4.0443672302626857e-07, "loss": 0.1684, "step": 10937 }, { "epoch": 0.88, "grad_norm": 1.3746808389307428, "learning_rate": 4.039264561484135e-07, "loss": 0.1997, "step": 10938 }, { "epoch": 0.88, "grad_norm": 1.3135159870906599, "learning_rate": 4.0341649781890144e-07, "loss": 0.1478, "step": 10939 }, { "epoch": 0.88, "grad_norm": 1.3534165482166853, "learning_rate": 4.0290684807196667e-07, "loss": 0.1611, "step": 10940 }, { "epoch": 0.88, "grad_norm": 1.3825277015934818, "learning_rate": 4.0239750694182346e-07, "loss": 0.1515, "step": 10941 }, { "epoch": 0.88, "grad_norm": 1.4366822210585461, "learning_rate": 4.018884744626661e-07, "loss": 0.1666, "step": 10942 }, { "epoch": 0.88, "grad_norm": 1.2599282634047564, "learning_rate": 4.013797506686673e-07, "loss": 0.1571, "step": 10943 }, { "epoch": 0.88, "grad_norm": 1.3949266783620244, "learning_rate": 4.008713355939786e-07, "loss": 0.1468, "step": 10944 }, { "epoch": 0.88, "grad_norm": 1.3526273193698493, "learning_rate": 4.003632292727316e-07, "loss": 0.1822, "step": 10945 }, { "epoch": 0.88, "grad_norm": 7.417770019203867, "learning_rate": 3.998554317390385e-07, "loss": 0.6612, "step": 10946 }, { "epoch": 0.88, "grad_norm": 1.4436500361770703, "learning_rate": 3.99347943026987e-07, "loss": 0.1624, "step": 10947 }, { "epoch": 0.88, "grad_norm": 1.5919952013677723, "learning_rate": 3.9884076317064813e-07, "loss": 0.1585, "step": 10948 }, { "epoch": 0.88, "grad_norm": 1.3413592588917502, "learning_rate": 3.983338922040708e-07, "loss": 0.1771, "step": 10949 }, { "epoch": 0.88, "grad_norm": 1.3399280680857606, "learning_rate": 3.9782733016128006e-07, "loss": 0.1822, "step": 10950 }, { "epoch": 0.88, "grad_norm": 1.4227833007043245, "learning_rate": 3.973210770762853e-07, "loss": 0.1545, "step": 10951 }, { "epoch": 0.88, "grad_norm": 1.2689849886012383, "learning_rate": 3.9681513298307216e-07, "loss": 0.1341, "step": 10952 }, { "epoch": 0.88, "grad_norm": 1.353718525355096, "learning_rate": 3.963094979156068e-07, "loss": 0.1858, "step": 10953 }, { "epoch": 0.88, "grad_norm": 1.207401303646797, "learning_rate": 3.9580417190783316e-07, "loss": 0.162, "step": 10954 }, { "epoch": 0.88, "grad_norm": 1.2977639500196645, "learning_rate": 3.952991549936752e-07, "loss": 0.1709, "step": 10955 }, { "epoch": 0.88, "grad_norm": 8.622246039667715, "learning_rate": 3.947944472070364e-07, "loss": 0.5607, "step": 10956 }, { "epoch": 0.88, "grad_norm": 1.3724633708381222, "learning_rate": 3.9429004858180076e-07, "loss": 0.1379, "step": 10957 }, { "epoch": 0.88, "grad_norm": 1.342505536913543, "learning_rate": 3.9378595915182837e-07, "loss": 0.1543, "step": 10958 }, { "epoch": 0.88, "grad_norm": 1.2877394075074835, "learning_rate": 3.9328217895095997e-07, "loss": 0.1452, "step": 10959 }, { "epoch": 0.88, "grad_norm": 1.2962800859117072, "learning_rate": 3.9277870801301855e-07, "loss": 0.1679, "step": 10960 }, { "epoch": 0.88, "grad_norm": 1.4926300779522514, "learning_rate": 3.9227554637179977e-07, "loss": 0.1926, "step": 10961 }, { "epoch": 0.88, "grad_norm": 1.457895022612262, "learning_rate": 3.9177269406108553e-07, "loss": 0.19, "step": 10962 }, { "epoch": 0.88, "grad_norm": 1.2102066230924624, "learning_rate": 3.912701511146322e-07, "loss": 0.1324, "step": 10963 }, { "epoch": 0.88, "grad_norm": 1.381145396280551, "learning_rate": 3.907679175661783e-07, "loss": 0.149, "step": 10964 }, { "epoch": 0.88, "grad_norm": 1.4051858911562365, "learning_rate": 3.9026599344943794e-07, "loss": 0.1831, "step": 10965 }, { "epoch": 0.88, "grad_norm": 10.409093828637229, "learning_rate": 3.8976437879811035e-07, "loss": 0.5226, "step": 10966 }, { "epoch": 0.88, "grad_norm": 10.291491207008816, "learning_rate": 3.8926307364586745e-07, "loss": 0.5507, "step": 10967 }, { "epoch": 0.88, "grad_norm": 1.4195971334040238, "learning_rate": 3.88762078026364e-07, "loss": 0.1792, "step": 10968 }, { "epoch": 0.88, "grad_norm": 1.5042481847713542, "learning_rate": 3.882613919732353e-07, "loss": 0.2042, "step": 10969 }, { "epoch": 0.88, "grad_norm": 1.2757365404604653, "learning_rate": 3.877610155200906e-07, "loss": 0.1368, "step": 10970 }, { "epoch": 0.88, "grad_norm": 1.3993723628481347, "learning_rate": 3.8726094870052413e-07, "loss": 0.1566, "step": 10971 }, { "epoch": 0.88, "grad_norm": 1.2711664213883518, "learning_rate": 3.867611915481062e-07, "loss": 0.1828, "step": 10972 }, { "epoch": 0.88, "grad_norm": 1.3470967548281911, "learning_rate": 3.8626174409638726e-07, "loss": 0.1787, "step": 10973 }, { "epoch": 0.88, "grad_norm": 1.3822747247011236, "learning_rate": 3.8576260637889664e-07, "loss": 0.1679, "step": 10974 }, { "epoch": 0.88, "grad_norm": 1.5288873216807632, "learning_rate": 3.852637784291424e-07, "loss": 0.1669, "step": 10975 }, { "epoch": 0.88, "grad_norm": 1.469797136464258, "learning_rate": 3.847652602806129e-07, "loss": 0.2143, "step": 10976 }, { "epoch": 0.88, "grad_norm": 1.374641653582868, "learning_rate": 3.8426705196677517e-07, "loss": 0.1727, "step": 10977 }, { "epoch": 0.88, "grad_norm": 1.4824597654974832, "learning_rate": 3.837691535210758e-07, "loss": 0.188, "step": 10978 }, { "epoch": 0.88, "grad_norm": 1.301449845871071, "learning_rate": 3.832715649769381e-07, "loss": 0.1682, "step": 10979 }, { "epoch": 0.88, "grad_norm": 1.2392884380458786, "learning_rate": 3.827742863677708e-07, "loss": 0.1705, "step": 10980 }, { "epoch": 0.88, "grad_norm": 1.395701858577252, "learning_rate": 3.8227731772695344e-07, "loss": 0.16, "step": 10981 }, { "epoch": 0.88, "grad_norm": 1.3329215481184697, "learning_rate": 3.8178065908785143e-07, "loss": 0.1438, "step": 10982 }, { "epoch": 0.88, "grad_norm": 5.789037380138889, "learning_rate": 3.8128431048380653e-07, "loss": 0.5914, "step": 10983 }, { "epoch": 0.88, "grad_norm": 1.4309208445004962, "learning_rate": 3.807882719481398e-07, "loss": 0.1536, "step": 10984 }, { "epoch": 0.88, "grad_norm": 1.3788356746513588, "learning_rate": 3.802925435141525e-07, "loss": 0.1615, "step": 10985 }, { "epoch": 0.88, "grad_norm": 1.4389441480773226, "learning_rate": 3.797971252151239e-07, "loss": 0.1565, "step": 10986 }, { "epoch": 0.88, "grad_norm": 1.3997453481955369, "learning_rate": 3.793020170843126e-07, "loss": 0.2511, "step": 10987 }, { "epoch": 0.88, "grad_norm": 1.3211223075899494, "learning_rate": 3.7880721915495744e-07, "loss": 0.1345, "step": 10988 }, { "epoch": 0.88, "grad_norm": 7.8411297044490285, "learning_rate": 3.783127314602747e-07, "loss": 0.5914, "step": 10989 }, { "epoch": 0.88, "grad_norm": 1.3990802706992915, "learning_rate": 3.778185540334611e-07, "loss": 0.1615, "step": 10990 }, { "epoch": 0.88, "grad_norm": 1.5766375201593792, "learning_rate": 3.7732468690769406e-07, "loss": 0.2197, "step": 10991 }, { "epoch": 0.88, "grad_norm": 1.3719197011153383, "learning_rate": 3.7683113011612593e-07, "loss": 0.1848, "step": 10992 }, { "epoch": 0.88, "grad_norm": 1.7242142893124688, "learning_rate": 3.763378836918918e-07, "loss": 0.1884, "step": 10993 }, { "epoch": 0.88, "grad_norm": 1.3563799719243221, "learning_rate": 3.758449476681059e-07, "loss": 0.171, "step": 10994 }, { "epoch": 0.88, "grad_norm": 1.5387660926425042, "learning_rate": 3.753523220778571e-07, "loss": 0.1997, "step": 10995 }, { "epoch": 0.88, "grad_norm": 1.2170233167367548, "learning_rate": 3.748600069542202e-07, "loss": 0.1321, "step": 10996 }, { "epoch": 0.88, "grad_norm": 1.5861987192498954, "learning_rate": 3.7436800233024485e-07, "loss": 0.199, "step": 10997 }, { "epoch": 0.88, "grad_norm": 1.4837344927632734, "learning_rate": 3.7387630823896013e-07, "loss": 0.1934, "step": 10998 }, { "epoch": 0.88, "grad_norm": 1.220479886428112, "learning_rate": 3.7338492471337585e-07, "loss": 0.1544, "step": 10999 }, { "epoch": 0.88, "grad_norm": 1.3230158311643934, "learning_rate": 3.728938517864794e-07, "loss": 0.1988, "step": 11000 }, { "epoch": 0.88, "grad_norm": 1.317205359221635, "learning_rate": 3.7240308949123835e-07, "loss": 0.1909, "step": 11001 }, { "epoch": 0.88, "grad_norm": 1.36334413043963, "learning_rate": 3.7191263786059917e-07, "loss": 0.1745, "step": 11002 }, { "epoch": 0.88, "grad_norm": 1.271028503771703, "learning_rate": 3.7142249692748713e-07, "loss": 0.1783, "step": 11003 }, { "epoch": 0.88, "grad_norm": 1.4745772432840127, "learning_rate": 3.7093266672480586e-07, "loss": 0.1851, "step": 11004 }, { "epoch": 0.88, "grad_norm": 1.3262805059430038, "learning_rate": 3.7044314728544196e-07, "loss": 0.1559, "step": 11005 }, { "epoch": 0.88, "grad_norm": 1.5003210951088684, "learning_rate": 3.6995393864225457e-07, "loss": 0.1955, "step": 11006 }, { "epoch": 0.88, "grad_norm": 1.4049914377776864, "learning_rate": 3.694650408280892e-07, "loss": 0.181, "step": 11007 }, { "epoch": 0.88, "grad_norm": 1.3088945212744225, "learning_rate": 3.689764538757651e-07, "loss": 0.1968, "step": 11008 }, { "epoch": 0.88, "grad_norm": 1.5727815688238391, "learning_rate": 3.684881778180832e-07, "loss": 0.176, "step": 11009 }, { "epoch": 0.88, "grad_norm": 7.464961185025448, "learning_rate": 3.6800021268782293e-07, "loss": 0.7439, "step": 11010 }, { "epoch": 0.88, "grad_norm": 1.3276180684554897, "learning_rate": 3.67512558517743e-07, "loss": 0.1547, "step": 11011 }, { "epoch": 0.88, "grad_norm": 1.4771211394590342, "learning_rate": 3.6702521534058065e-07, "loss": 0.1764, "step": 11012 }, { "epoch": 0.88, "grad_norm": 1.1966089048599553, "learning_rate": 3.665381831890519e-07, "loss": 0.1559, "step": 11013 }, { "epoch": 0.88, "grad_norm": 44.03278836023848, "learning_rate": 3.6605146209585605e-07, "loss": 0.5717, "step": 11014 }, { "epoch": 0.88, "grad_norm": 1.2491527112479215, "learning_rate": 3.655650520936638e-07, "loss": 0.159, "step": 11015 }, { "epoch": 0.88, "grad_norm": 1.4174246236840933, "learning_rate": 3.650789532151322e-07, "loss": 0.1686, "step": 11016 }, { "epoch": 0.88, "grad_norm": 1.2616323945758376, "learning_rate": 3.645931654928936e-07, "loss": 0.1766, "step": 11017 }, { "epoch": 0.88, "grad_norm": 1.385534418225101, "learning_rate": 3.641076889595607e-07, "loss": 0.1856, "step": 11018 }, { "epoch": 0.88, "grad_norm": 8.217597760212756, "learning_rate": 3.636225236477248e-07, "loss": 0.5081, "step": 11019 }, { "epoch": 0.88, "grad_norm": 11.666725604506887, "learning_rate": 3.6313766958995635e-07, "loss": 0.5583, "step": 11020 }, { "epoch": 0.88, "grad_norm": 1.2896188538520112, "learning_rate": 3.626531268188055e-07, "loss": 0.15, "step": 11021 }, { "epoch": 0.88, "grad_norm": 1.3671465091312707, "learning_rate": 3.6216889536680125e-07, "loss": 0.1287, "step": 11022 }, { "epoch": 0.88, "grad_norm": 1.3987110119034194, "learning_rate": 3.616849752664503e-07, "loss": 0.1885, "step": 11023 }, { "epoch": 0.88, "grad_norm": 1.416102596938885, "learning_rate": 3.612013665502401e-07, "loss": 0.2125, "step": 11024 }, { "epoch": 0.88, "grad_norm": 1.3298266166149644, "learning_rate": 3.60718069250639e-07, "loss": 0.1799, "step": 11025 }, { "epoch": 0.88, "grad_norm": 1.4291490805647944, "learning_rate": 3.602350834000884e-07, "loss": 0.1688, "step": 11026 }, { "epoch": 0.88, "grad_norm": 1.2591676795184357, "learning_rate": 3.597524090310156e-07, "loss": 0.1568, "step": 11027 }, { "epoch": 0.88, "grad_norm": 1.215410419605292, "learning_rate": 3.5927004617582363e-07, "loss": 0.1536, "step": 11028 }, { "epoch": 0.88, "grad_norm": 1.3920706912398622, "learning_rate": 3.587879948668926e-07, "loss": 0.2054, "step": 11029 }, { "epoch": 0.88, "grad_norm": 1.430658811985136, "learning_rate": 3.5830625513658677e-07, "loss": 0.1725, "step": 11030 }, { "epoch": 0.88, "grad_norm": 2.0397914583044003, "learning_rate": 3.5782482701724575e-07, "loss": 0.2074, "step": 11031 }, { "epoch": 0.88, "grad_norm": 1.443383237556742, "learning_rate": 3.573437105411892e-07, "loss": 0.1318, "step": 11032 }, { "epoch": 0.88, "grad_norm": 1.222184831502122, "learning_rate": 3.5686290574071637e-07, "loss": 0.1332, "step": 11033 }, { "epoch": 0.88, "grad_norm": 1.465555494750924, "learning_rate": 3.563824126481047e-07, "loss": 0.1733, "step": 11034 }, { "epoch": 0.88, "grad_norm": 1.3676026107522443, "learning_rate": 3.5590223129561063e-07, "loss": 0.1733, "step": 11035 }, { "epoch": 0.88, "grad_norm": 1.4452617528897223, "learning_rate": 3.554223617154723e-07, "loss": 0.1753, "step": 11036 }, { "epoch": 0.88, "grad_norm": 11.544648516317134, "learning_rate": 3.549428039399022e-07, "loss": 0.4559, "step": 11037 }, { "epoch": 0.88, "grad_norm": 1.2562725218811925, "learning_rate": 3.544635580010952e-07, "loss": 0.1619, "step": 11038 }, { "epoch": 0.88, "grad_norm": 1.4287710654552075, "learning_rate": 3.539846239312267e-07, "loss": 0.1852, "step": 11039 }, { "epoch": 0.88, "grad_norm": 1.402688958356042, "learning_rate": 3.535060017624453e-07, "loss": 0.1801, "step": 11040 }, { "epoch": 0.88, "grad_norm": 1.205857472891897, "learning_rate": 3.5302769152688543e-07, "loss": 0.1681, "step": 11041 }, { "epoch": 0.88, "grad_norm": 1.2762026035082723, "learning_rate": 3.5254969325665576e-07, "loss": 0.1533, "step": 11042 }, { "epoch": 0.88, "grad_norm": 1.2679095645245506, "learning_rate": 3.520720069838474e-07, "loss": 0.1804, "step": 11043 }, { "epoch": 0.88, "grad_norm": 1.4587740545207275, "learning_rate": 3.515946327405273e-07, "loss": 0.144, "step": 11044 }, { "epoch": 0.88, "grad_norm": 1.2461141742161097, "learning_rate": 3.511175705587433e-07, "loss": 0.1489, "step": 11045 }, { "epoch": 0.88, "grad_norm": 1.4816179584005076, "learning_rate": 3.506408204705225e-07, "loss": 0.2099, "step": 11046 }, { "epoch": 0.88, "grad_norm": 1.21467246003251, "learning_rate": 3.5016438250787097e-07, "loss": 0.1103, "step": 11047 }, { "epoch": 0.88, "grad_norm": 11.039574422964098, "learning_rate": 3.496882567027726e-07, "loss": 0.6468, "step": 11048 }, { "epoch": 0.88, "grad_norm": 1.4056976353816089, "learning_rate": 3.4921244308719014e-07, "loss": 0.1383, "step": 11049 }, { "epoch": 0.88, "grad_norm": 1.2845727549568058, "learning_rate": 3.4873694169306915e-07, "loss": 0.1647, "step": 11050 }, { "epoch": 0.88, "grad_norm": 1.4997488741741116, "learning_rate": 3.482617525523296e-07, "loss": 0.2078, "step": 11051 }, { "epoch": 0.88, "grad_norm": 1.386253062336796, "learning_rate": 3.477868756968733e-07, "loss": 0.2082, "step": 11052 }, { "epoch": 0.88, "grad_norm": 1.3453813991414216, "learning_rate": 3.473123111585791e-07, "loss": 0.1863, "step": 11053 }, { "epoch": 0.88, "grad_norm": 1.5657840770247213, "learning_rate": 3.4683805896930713e-07, "loss": 0.1707, "step": 11054 }, { "epoch": 0.88, "grad_norm": 10.470699246511801, "learning_rate": 3.4636411916089465e-07, "loss": 0.5715, "step": 11055 }, { "epoch": 0.88, "grad_norm": 1.2985639075382216, "learning_rate": 3.4589049176515844e-07, "loss": 0.139, "step": 11056 }, { "epoch": 0.88, "grad_norm": 1.426930214315953, "learning_rate": 3.4541717681389475e-07, "loss": 0.1523, "step": 11057 }, { "epoch": 0.88, "grad_norm": 1.5630127519827672, "learning_rate": 3.4494417433887874e-07, "loss": 0.198, "step": 11058 }, { "epoch": 0.88, "grad_norm": 1.4551814617907926, "learning_rate": 3.444714843718655e-07, "loss": 0.1526, "step": 11059 }, { "epoch": 0.88, "grad_norm": 1.4627558181325298, "learning_rate": 3.4399910694458583e-07, "loss": 0.18, "step": 11060 }, { "epoch": 0.88, "grad_norm": 6.558874424708463, "learning_rate": 3.435270420887543e-07, "loss": 0.5888, "step": 11061 }, { "epoch": 0.88, "grad_norm": 1.3633184816651975, "learning_rate": 3.430552898360617e-07, "loss": 0.1679, "step": 11062 }, { "epoch": 0.89, "grad_norm": 10.272409880032354, "learning_rate": 3.4258385021817555e-07, "loss": 0.6198, "step": 11063 }, { "epoch": 0.89, "grad_norm": 1.2833901003105128, "learning_rate": 3.4211272326674816e-07, "loss": 0.1655, "step": 11064 }, { "epoch": 0.89, "grad_norm": 1.3870952801446848, "learning_rate": 3.41641909013406e-07, "loss": 0.1563, "step": 11065 }, { "epoch": 0.89, "grad_norm": 1.3942311187350314, "learning_rate": 3.411714074897571e-07, "loss": 0.1731, "step": 11066 }, { "epoch": 0.89, "grad_norm": 1.3201721977710539, "learning_rate": 3.4070121872738727e-07, "loss": 0.1457, "step": 11067 }, { "epoch": 0.89, "grad_norm": 1.4562341523126676, "learning_rate": 3.4023134275786184e-07, "loss": 0.2079, "step": 11068 }, { "epoch": 0.89, "grad_norm": 1.2836211048998314, "learning_rate": 3.3976177961272395e-07, "loss": 0.1639, "step": 11069 }, { "epoch": 0.89, "grad_norm": 1.4275956787815223, "learning_rate": 3.3929252932349944e-07, "loss": 0.1686, "step": 11070 }, { "epoch": 0.89, "grad_norm": 1.3213251128933132, "learning_rate": 3.388235919216876e-07, "loss": 0.1838, "step": 11071 }, { "epoch": 0.89, "grad_norm": 1.4243463840839043, "learning_rate": 3.383549674387704e-07, "loss": 0.1882, "step": 11072 }, { "epoch": 0.89, "grad_norm": 1.2774908023058051, "learning_rate": 3.378866559062105e-07, "loss": 0.1544, "step": 11073 }, { "epoch": 0.89, "grad_norm": 1.3253873546663835, "learning_rate": 3.374186573554428e-07, "loss": 0.1609, "step": 11074 }, { "epoch": 0.89, "grad_norm": 1.4527610039297811, "learning_rate": 3.369509718178887e-07, "loss": 0.2088, "step": 11075 }, { "epoch": 0.89, "grad_norm": 1.1784328852258756, "learning_rate": 3.364835993249443e-07, "loss": 0.1316, "step": 11076 }, { "epoch": 0.89, "grad_norm": 1.3477742521436449, "learning_rate": 3.3601653990798566e-07, "loss": 0.1638, "step": 11077 }, { "epoch": 0.89, "grad_norm": 1.3399956660126535, "learning_rate": 3.3554979359836816e-07, "loss": 0.1538, "step": 11078 }, { "epoch": 0.89, "grad_norm": 1.2741122550977966, "learning_rate": 3.350833604274256e-07, "loss": 0.1583, "step": 11079 }, { "epoch": 0.89, "grad_norm": 1.3418466177857096, "learning_rate": 3.3461724042647136e-07, "loss": 0.1518, "step": 11080 }, { "epoch": 0.89, "grad_norm": 1.4152361416780388, "learning_rate": 3.3415143362679704e-07, "loss": 0.1291, "step": 11081 }, { "epoch": 0.89, "grad_norm": 1.2158615842321112, "learning_rate": 3.336859400596743e-07, "loss": 0.1446, "step": 11082 }, { "epoch": 0.89, "grad_norm": 1.340204107858864, "learning_rate": 3.3322075975635194e-07, "loss": 0.1926, "step": 11083 }, { "epoch": 0.89, "grad_norm": 1.432024641806054, "learning_rate": 3.3275589274806117e-07, "loss": 0.1982, "step": 11084 }, { "epoch": 0.89, "grad_norm": 1.3892252764458355, "learning_rate": 3.3229133906600706e-07, "loss": 0.1572, "step": 11085 }, { "epoch": 0.89, "grad_norm": 1.2932078274644476, "learning_rate": 3.3182709874137854e-07, "loss": 0.1575, "step": 11086 }, { "epoch": 0.89, "grad_norm": 1.5323624056715146, "learning_rate": 3.313631718053412e-07, "loss": 0.186, "step": 11087 }, { "epoch": 0.89, "grad_norm": 1.3295573604417814, "learning_rate": 3.3089955828903895e-07, "loss": 0.1748, "step": 11088 }, { "epoch": 0.89, "grad_norm": 1.3811967715112745, "learning_rate": 3.30436258223597e-07, "loss": 0.1992, "step": 11089 }, { "epoch": 0.89, "grad_norm": 1.3233296382122004, "learning_rate": 3.299732716401166e-07, "loss": 0.1651, "step": 11090 }, { "epoch": 0.89, "grad_norm": 1.3481720028068371, "learning_rate": 3.295105985696806e-07, "loss": 0.1834, "step": 11091 }, { "epoch": 0.89, "grad_norm": 1.1764927131863134, "learning_rate": 3.2904823904334804e-07, "loss": 0.1417, "step": 11092 }, { "epoch": 0.89, "grad_norm": 1.2141261313428235, "learning_rate": 3.285861930921608e-07, "loss": 0.1517, "step": 11093 }, { "epoch": 0.89, "grad_norm": 8.25421314163318, "learning_rate": 3.2812446074713513e-07, "loss": 0.6064, "step": 11094 }, { "epoch": 0.89, "grad_norm": 1.4175774048839866, "learning_rate": 3.276630420392707e-07, "loss": 0.2164, "step": 11095 }, { "epoch": 0.89, "grad_norm": 1.3645605837282677, "learning_rate": 3.272019369995422e-07, "loss": 0.1494, "step": 11096 }, { "epoch": 0.89, "grad_norm": 1.5182162987960348, "learning_rate": 3.267411456589059e-07, "loss": 0.179, "step": 11097 }, { "epoch": 0.89, "grad_norm": 1.5123252274058185, "learning_rate": 3.2628066804829604e-07, "loss": 0.1896, "step": 11098 }, { "epoch": 0.89, "grad_norm": 1.3349536137495188, "learning_rate": 3.258205041986262e-07, "loss": 0.145, "step": 11099 }, { "epoch": 0.89, "grad_norm": 9.537807137846857, "learning_rate": 3.2536065414078724e-07, "loss": 0.6531, "step": 11100 }, { "epoch": 0.89, "grad_norm": 1.2637114981561115, "learning_rate": 3.249011179056516e-07, "loss": 0.1337, "step": 11101 }, { "epoch": 0.89, "grad_norm": 13.717426333235315, "learning_rate": 3.2444189552406914e-07, "loss": 0.6826, "step": 11102 }, { "epoch": 0.89, "grad_norm": 1.3208547401926929, "learning_rate": 3.2398298702686736e-07, "loss": 0.1561, "step": 11103 }, { "epoch": 0.89, "grad_norm": 1.4993039738045884, "learning_rate": 3.2352439244485723e-07, "loss": 0.1856, "step": 11104 }, { "epoch": 0.89, "grad_norm": 1.2196306290947323, "learning_rate": 3.230661118088219e-07, "loss": 0.1897, "step": 11105 }, { "epoch": 0.89, "grad_norm": 8.21585471262804, "learning_rate": 3.2260814514953053e-07, "loss": 0.6915, "step": 11106 }, { "epoch": 0.89, "grad_norm": 1.4078083556275283, "learning_rate": 3.2215049249772645e-07, "loss": 0.1501, "step": 11107 }, { "epoch": 0.89, "grad_norm": 1.4117956060357606, "learning_rate": 3.216931538841317e-07, "loss": 0.1462, "step": 11108 }, { "epoch": 0.89, "grad_norm": 1.3046957063643916, "learning_rate": 3.212361293394511e-07, "loss": 0.1645, "step": 11109 }, { "epoch": 0.89, "grad_norm": 1.2266206781493447, "learning_rate": 3.2077941889436525e-07, "loss": 0.156, "step": 11110 }, { "epoch": 0.89, "grad_norm": 1.2246394431719136, "learning_rate": 3.203230225795345e-07, "loss": 0.1405, "step": 11111 }, { "epoch": 0.89, "grad_norm": 1.4544179726239115, "learning_rate": 3.198669404255977e-07, "loss": 0.1564, "step": 11112 }, { "epoch": 0.89, "grad_norm": 7.318958098398374, "learning_rate": 3.1941117246317433e-07, "loss": 0.649, "step": 11113 }, { "epoch": 0.89, "grad_norm": 1.533541404980899, "learning_rate": 3.1895571872285926e-07, "loss": 0.1803, "step": 11114 }, { "epoch": 0.89, "grad_norm": 1.251346296631035, "learning_rate": 3.185005792352308e-07, "loss": 0.1422, "step": 11115 }, { "epoch": 0.89, "grad_norm": 1.5429566960988912, "learning_rate": 3.1804575403084293e-07, "loss": 0.1752, "step": 11116 }, { "epoch": 0.89, "grad_norm": 1.3996174578535134, "learning_rate": 3.1759124314022835e-07, "loss": 0.1632, "step": 11117 }, { "epoch": 0.89, "grad_norm": 1.107689015086432, "learning_rate": 3.171370465939022e-07, "loss": 0.13, "step": 11118 }, { "epoch": 0.89, "grad_norm": 1.252095183458183, "learning_rate": 3.166831644223528e-07, "loss": 0.1337, "step": 11119 }, { "epoch": 0.89, "grad_norm": 1.4629233393971817, "learning_rate": 3.162295966560536e-07, "loss": 0.1736, "step": 11120 }, { "epoch": 0.89, "grad_norm": 1.4730333068431403, "learning_rate": 3.1577634332545294e-07, "loss": 0.1974, "step": 11121 }, { "epoch": 0.89, "grad_norm": 1.2654861225452234, "learning_rate": 3.1532340446097874e-07, "loss": 0.1514, "step": 11122 }, { "epoch": 0.89, "grad_norm": 1.8531602252401644, "learning_rate": 3.148707800930384e-07, "loss": 0.18, "step": 11123 }, { "epoch": 0.89, "grad_norm": 1.3135201466264634, "learning_rate": 3.1441847025201754e-07, "loss": 0.1482, "step": 11124 }, { "epoch": 0.89, "grad_norm": 1.1459905357761362, "learning_rate": 3.1396647496828245e-07, "loss": 0.1283, "step": 11125 }, { "epoch": 0.89, "grad_norm": 1.4546993747524704, "learning_rate": 3.135147942721756e-07, "loss": 0.1962, "step": 11126 }, { "epoch": 0.89, "grad_norm": 1.2675305180466545, "learning_rate": 3.130634281940198e-07, "loss": 0.1776, "step": 11127 }, { "epoch": 0.89, "grad_norm": 1.3387171026259423, "learning_rate": 3.126123767641165e-07, "loss": 0.1613, "step": 11128 }, { "epoch": 0.89, "grad_norm": 1.5483004018780033, "learning_rate": 3.121616400127481e-07, "loss": 0.1796, "step": 11129 }, { "epoch": 0.89, "grad_norm": 1.331678741762243, "learning_rate": 3.1171121797017036e-07, "loss": 0.1943, "step": 11130 }, { "epoch": 0.89, "grad_norm": 14.226704254575495, "learning_rate": 3.1126111066662467e-07, "loss": 0.5818, "step": 11131 }, { "epoch": 0.89, "grad_norm": 1.3554157878099566, "learning_rate": 3.10811318132328e-07, "loss": 0.1667, "step": 11132 }, { "epoch": 0.89, "grad_norm": 10.789609420766846, "learning_rate": 3.103618403974734e-07, "loss": 0.7467, "step": 11133 }, { "epoch": 0.89, "grad_norm": 1.218761279217407, "learning_rate": 3.0991267749223787e-07, "loss": 0.152, "step": 11134 }, { "epoch": 0.89, "grad_norm": 1.4001744893932715, "learning_rate": 3.09463829446775e-07, "loss": 0.2006, "step": 11135 }, { "epoch": 0.89, "grad_norm": 1.4691793704122154, "learning_rate": 3.090152962912174e-07, "loss": 0.1917, "step": 11136 }, { "epoch": 0.89, "grad_norm": 1.3000315431550218, "learning_rate": 3.085670780556749e-07, "loss": 0.1803, "step": 11137 }, { "epoch": 0.89, "grad_norm": 1.2598946649620286, "learning_rate": 3.0811917477024057e-07, "loss": 0.1389, "step": 11138 }, { "epoch": 0.89, "grad_norm": 5.94081004043715, "learning_rate": 3.076715864649804e-07, "loss": 0.501, "step": 11139 }, { "epoch": 0.89, "grad_norm": 8.460900402587475, "learning_rate": 3.072243131699443e-07, "loss": 0.59, "step": 11140 }, { "epoch": 0.89, "grad_norm": 1.4067151448244295, "learning_rate": 3.067773549151598e-07, "loss": 0.1744, "step": 11141 }, { "epoch": 0.89, "grad_norm": 1.2396228480282168, "learning_rate": 3.0633071173062966e-07, "loss": 0.1495, "step": 11142 }, { "epoch": 0.89, "grad_norm": 5.89290509373479, "learning_rate": 3.05884383646341e-07, "loss": 0.5554, "step": 11143 }, { "epoch": 0.89, "grad_norm": 1.254318194169198, "learning_rate": 3.054383706922559e-07, "loss": 0.1508, "step": 11144 }, { "epoch": 0.89, "grad_norm": 11.242087523337672, "learning_rate": 3.049926728983171e-07, "loss": 0.4199, "step": 11145 }, { "epoch": 0.89, "grad_norm": 1.4413295071220684, "learning_rate": 3.045472902944452e-07, "loss": 0.1848, "step": 11146 }, { "epoch": 0.89, "grad_norm": 1.6188726010420798, "learning_rate": 3.041022229105406e-07, "loss": 0.2121, "step": 11147 }, { "epoch": 0.89, "grad_norm": 1.4643359019414128, "learning_rate": 3.036574707764811e-07, "loss": 0.2102, "step": 11148 }, { "epoch": 0.89, "grad_norm": 1.3398425913420022, "learning_rate": 3.032130339221268e-07, "loss": 0.1545, "step": 11149 }, { "epoch": 0.89, "grad_norm": 1.259954941119145, "learning_rate": 3.0276891237731085e-07, "loss": 0.1633, "step": 11150 }, { "epoch": 0.89, "grad_norm": 4.797083454564077, "learning_rate": 3.023251061718496e-07, "loss": 0.5474, "step": 11151 }, { "epoch": 0.89, "grad_norm": 1.4232150218390427, "learning_rate": 3.0188161533553864e-07, "loss": 0.1568, "step": 11152 }, { "epoch": 0.89, "grad_norm": 1.410387009940278, "learning_rate": 3.014384398981479e-07, "loss": 0.1911, "step": 11153 }, { "epoch": 0.89, "grad_norm": 1.309069093701047, "learning_rate": 3.009955798894315e-07, "loss": 0.1773, "step": 11154 }, { "epoch": 0.89, "grad_norm": 1.4405097712256496, "learning_rate": 3.005530353391195e-07, "loss": 0.1991, "step": 11155 }, { "epoch": 0.89, "grad_norm": 6.079036034296953, "learning_rate": 3.001108062769209e-07, "loss": 0.5024, "step": 11156 }, { "epoch": 0.89, "grad_norm": 1.1491660571543736, "learning_rate": 2.996688927325242e-07, "loss": 0.1409, "step": 11157 }, { "epoch": 0.89, "grad_norm": 1.4011063665403745, "learning_rate": 2.9922729473559564e-07, "loss": 0.165, "step": 11158 }, { "epoch": 0.89, "grad_norm": 1.515201636351137, "learning_rate": 2.9878601231578207e-07, "loss": 0.2126, "step": 11159 }, { "epoch": 0.89, "grad_norm": 1.41828655010213, "learning_rate": 2.9834504550270706e-07, "loss": 0.1487, "step": 11160 }, { "epoch": 0.89, "grad_norm": 1.5116500148704706, "learning_rate": 2.9790439432597517e-07, "loss": 0.1635, "step": 11161 }, { "epoch": 0.89, "grad_norm": 1.2852916893514048, "learning_rate": 2.9746405881516727e-07, "loss": 0.1609, "step": 11162 }, { "epoch": 0.89, "grad_norm": 1.2531764113769908, "learning_rate": 2.9702403899984635e-07, "loss": 0.1759, "step": 11163 }, { "epoch": 0.89, "grad_norm": 1.446296957653437, "learning_rate": 2.965843349095499e-07, "loss": 0.1819, "step": 11164 }, { "epoch": 0.89, "grad_norm": 1.423817906458392, "learning_rate": 2.9614494657379865e-07, "loss": 0.1825, "step": 11165 }, { "epoch": 0.89, "grad_norm": 1.4649724112846072, "learning_rate": 2.957058740220886e-07, "loss": 0.1837, "step": 11166 }, { "epoch": 0.89, "grad_norm": 1.4910796802988728, "learning_rate": 2.952671172838972e-07, "loss": 0.1804, "step": 11167 }, { "epoch": 0.89, "grad_norm": 1.4760678774805092, "learning_rate": 2.948286763886793e-07, "loss": 0.1746, "step": 11168 }, { "epoch": 0.89, "grad_norm": 1.418580867201188, "learning_rate": 2.943905513658679e-07, "loss": 0.1882, "step": 11169 }, { "epoch": 0.89, "grad_norm": 1.287768415634642, "learning_rate": 2.939527422448768e-07, "loss": 0.1696, "step": 11170 }, { "epoch": 0.89, "grad_norm": 1.4511559305315846, "learning_rate": 2.9351524905509634e-07, "loss": 0.1773, "step": 11171 }, { "epoch": 0.89, "grad_norm": 8.243612767275687, "learning_rate": 2.9307807182589743e-07, "loss": 0.5028, "step": 11172 }, { "epoch": 0.89, "grad_norm": 1.148265095834209, "learning_rate": 2.926412105866283e-07, "loss": 0.1495, "step": 11173 }, { "epoch": 0.89, "grad_norm": 1.296998698100775, "learning_rate": 2.9220466536661885e-07, "loss": 0.1506, "step": 11174 }, { "epoch": 0.89, "grad_norm": 1.3369539130876749, "learning_rate": 2.917684361951728e-07, "loss": 0.1791, "step": 11175 }, { "epoch": 0.89, "grad_norm": 1.4148968677225944, "learning_rate": 2.913325231015779e-07, "loss": 0.1748, "step": 11176 }, { "epoch": 0.89, "grad_norm": 1.328571471896504, "learning_rate": 2.908969261150979e-07, "loss": 0.1806, "step": 11177 }, { "epoch": 0.89, "grad_norm": 1.559644767050513, "learning_rate": 2.904616452649739e-07, "loss": 0.2056, "step": 11178 }, { "epoch": 0.89, "grad_norm": 1.3342893807781893, "learning_rate": 2.9002668058042973e-07, "loss": 0.1829, "step": 11179 }, { "epoch": 0.89, "grad_norm": 1.5132020511015358, "learning_rate": 2.8959203209066477e-07, "loss": 0.1813, "step": 11180 }, { "epoch": 0.89, "grad_norm": 1.315377947579541, "learning_rate": 2.891576998248591e-07, "loss": 0.1698, "step": 11181 }, { "epoch": 0.89, "grad_norm": 1.3975496276756696, "learning_rate": 2.8872368381216984e-07, "loss": 0.1804, "step": 11182 }, { "epoch": 0.89, "grad_norm": 1.5575378891100844, "learning_rate": 2.882899840817349e-07, "loss": 0.1828, "step": 11183 }, { "epoch": 0.89, "grad_norm": 7.215020347320121, "learning_rate": 2.878566006626693e-07, "loss": 0.4571, "step": 11184 }, { "epoch": 0.89, "grad_norm": 1.354633052374859, "learning_rate": 2.874235335840664e-07, "loss": 0.1732, "step": 11185 }, { "epoch": 0.89, "grad_norm": 1.385517589919848, "learning_rate": 2.869907828750018e-07, "loss": 0.1549, "step": 11186 }, { "epoch": 0.89, "grad_norm": 1.6360534661396309, "learning_rate": 2.8655834856452404e-07, "loss": 0.1946, "step": 11187 }, { "epoch": 0.9, "grad_norm": 1.3263504680774265, "learning_rate": 2.8612623068166646e-07, "loss": 0.1641, "step": 11188 }, { "epoch": 0.9, "grad_norm": 1.5332972515360006, "learning_rate": 2.85694429255437e-07, "loss": 0.2095, "step": 11189 }, { "epoch": 0.9, "grad_norm": 1.306270618784906, "learning_rate": 2.852629443148247e-07, "loss": 0.1625, "step": 11190 }, { "epoch": 0.9, "grad_norm": 1.323675804102063, "learning_rate": 2.848317758887958e-07, "loss": 0.165, "step": 11191 }, { "epoch": 0.9, "grad_norm": 1.391800786274174, "learning_rate": 2.8440092400629673e-07, "loss": 0.1997, "step": 11192 }, { "epoch": 0.9, "grad_norm": 1.3876283897055879, "learning_rate": 2.839703886962508e-07, "loss": 0.1549, "step": 11193 }, { "epoch": 0.9, "grad_norm": 1.0982690536142847, "learning_rate": 2.8354016998756174e-07, "loss": 0.1604, "step": 11194 }, { "epoch": 0.9, "grad_norm": 1.4975301551982976, "learning_rate": 2.831102679091113e-07, "loss": 0.1653, "step": 11195 }, { "epoch": 0.9, "grad_norm": 9.087906569363012, "learning_rate": 2.826806824897599e-07, "loss": 0.5338, "step": 11196 }, { "epoch": 0.9, "grad_norm": 1.4753092377997294, "learning_rate": 2.822514137583482e-07, "loss": 0.1852, "step": 11197 }, { "epoch": 0.9, "grad_norm": 1.3112978564947928, "learning_rate": 2.818224617436921e-07, "loss": 0.1412, "step": 11198 }, { "epoch": 0.9, "grad_norm": 1.3520446186875188, "learning_rate": 2.8139382647459025e-07, "loss": 0.1926, "step": 11199 }, { "epoch": 0.9, "grad_norm": 1.3077274941590027, "learning_rate": 2.809655079798179e-07, "loss": 0.1452, "step": 11200 }, { "epoch": 0.9, "grad_norm": 1.5967930585525536, "learning_rate": 2.805375062881288e-07, "loss": 0.1759, "step": 11201 }, { "epoch": 0.9, "grad_norm": 1.3451385067030859, "learning_rate": 2.801098214282566e-07, "loss": 0.1635, "step": 11202 }, { "epoch": 0.9, "grad_norm": 1.2862528581895054, "learning_rate": 2.796824534289122e-07, "loss": 0.1757, "step": 11203 }, { "epoch": 0.9, "grad_norm": 1.3925230983974384, "learning_rate": 2.792554023187871e-07, "loss": 0.1683, "step": 11204 }, { "epoch": 0.9, "grad_norm": 8.733450523254797, "learning_rate": 2.7882866812655006e-07, "loss": 0.6001, "step": 11205 }, { "epoch": 0.9, "grad_norm": 5.694024746934279, "learning_rate": 2.7840225088084927e-07, "loss": 0.5553, "step": 11206 }, { "epoch": 0.9, "grad_norm": 14.651615728885222, "learning_rate": 2.779761506103107e-07, "loss": 0.522, "step": 11207 }, { "epoch": 0.9, "grad_norm": 1.2901443145978497, "learning_rate": 2.775503673435415e-07, "loss": 0.1701, "step": 11208 }, { "epoch": 0.9, "grad_norm": 1.3730175919097642, "learning_rate": 2.771249011091226e-07, "loss": 0.1758, "step": 11209 }, { "epoch": 0.9, "grad_norm": 1.8400465603811433, "learning_rate": 2.7669975193562013e-07, "loss": 0.255, "step": 11210 }, { "epoch": 0.9, "grad_norm": 1.4243811786710665, "learning_rate": 2.762749198515752e-07, "loss": 0.1577, "step": 11211 }, { "epoch": 0.9, "grad_norm": 1.5835270772356915, "learning_rate": 2.758504048855054e-07, "loss": 0.1827, "step": 11212 }, { "epoch": 0.9, "grad_norm": 1.5407018723513857, "learning_rate": 2.754262070659125e-07, "loss": 0.1648, "step": 11213 }, { "epoch": 0.9, "grad_norm": 1.2361681416791075, "learning_rate": 2.750023264212731e-07, "loss": 0.1874, "step": 11214 }, { "epoch": 0.9, "grad_norm": 1.3295495383107927, "learning_rate": 2.7457876298004393e-07, "loss": 0.1657, "step": 11215 }, { "epoch": 0.9, "grad_norm": 1.4016942045911114, "learning_rate": 2.7415551677065943e-07, "loss": 0.155, "step": 11216 }, { "epoch": 0.9, "grad_norm": 1.2793645361885995, "learning_rate": 2.737325878215341e-07, "loss": 0.1602, "step": 11217 }, { "epoch": 0.9, "grad_norm": 1.5005097158778191, "learning_rate": 2.733099761610597e-07, "loss": 0.188, "step": 11218 }, { "epoch": 0.9, "grad_norm": 1.2643817258749224, "learning_rate": 2.7288768181760904e-07, "loss": 0.1613, "step": 11219 }, { "epoch": 0.9, "grad_norm": 1.4258418527834953, "learning_rate": 2.7246570481953004e-07, "loss": 0.1726, "step": 11220 }, { "epoch": 0.9, "grad_norm": 1.2752776027513757, "learning_rate": 2.720440451951517e-07, "loss": 0.1323, "step": 11221 }, { "epoch": 0.9, "grad_norm": 1.4529624644026562, "learning_rate": 2.716227029727819e-07, "loss": 0.1793, "step": 11222 }, { "epoch": 0.9, "grad_norm": 1.4750083057856656, "learning_rate": 2.7120167818070695e-07, "loss": 0.1733, "step": 11223 }, { "epoch": 0.9, "grad_norm": 1.2516795667807024, "learning_rate": 2.707809708471909e-07, "loss": 0.1593, "step": 11224 }, { "epoch": 0.9, "grad_norm": 1.5290373768374597, "learning_rate": 2.7036058100047723e-07, "loss": 0.151, "step": 11225 }, { "epoch": 0.9, "grad_norm": 1.4296213240426994, "learning_rate": 2.699405086687884e-07, "loss": 0.1781, "step": 11226 }, { "epoch": 0.9, "grad_norm": 1.4517576008756699, "learning_rate": 2.695207538803235e-07, "loss": 0.1977, "step": 11227 }, { "epoch": 0.9, "grad_norm": 1.4830281465735342, "learning_rate": 2.69101316663265e-07, "loss": 0.1933, "step": 11228 }, { "epoch": 0.9, "grad_norm": 1.242334605681596, "learning_rate": 2.686821970457681e-07, "loss": 0.1587, "step": 11229 }, { "epoch": 0.9, "grad_norm": 1.2721309371086087, "learning_rate": 2.682633950559699e-07, "loss": 0.1637, "step": 11230 }, { "epoch": 0.9, "grad_norm": 1.4717923791003538, "learning_rate": 2.6784491072198824e-07, "loss": 0.1401, "step": 11231 }, { "epoch": 0.9, "grad_norm": 1.291579186756562, "learning_rate": 2.6742674407191416e-07, "loss": 0.17, "step": 11232 }, { "epoch": 0.9, "grad_norm": 8.233129772261389, "learning_rate": 2.6700889513382233e-07, "loss": 0.5044, "step": 11233 }, { "epoch": 0.9, "grad_norm": 12.155976609685988, "learning_rate": 2.6659136393576423e-07, "loss": 0.5389, "step": 11234 }, { "epoch": 0.9, "grad_norm": 1.403403485571084, "learning_rate": 2.661741505057691e-07, "loss": 0.1962, "step": 11235 }, { "epoch": 0.9, "grad_norm": 1.3619770184361806, "learning_rate": 2.657572548718462e-07, "loss": 0.1917, "step": 11236 }, { "epoch": 0.9, "grad_norm": 1.255701772370654, "learning_rate": 2.653406770619832e-07, "loss": 0.129, "step": 11237 }, { "epoch": 0.9, "grad_norm": 1.308736207511838, "learning_rate": 2.64924417104146e-07, "loss": 0.1675, "step": 11238 }, { "epoch": 0.9, "grad_norm": 1.3199373540189938, "learning_rate": 2.6450847502627883e-07, "loss": 0.1402, "step": 11239 }, { "epoch": 0.9, "grad_norm": 9.259438699294948, "learning_rate": 2.640928508563062e-07, "loss": 0.6889, "step": 11240 }, { "epoch": 0.9, "grad_norm": 1.4930885147163255, "learning_rate": 2.63677544622129e-07, "loss": 0.1832, "step": 11241 }, { "epoch": 0.9, "grad_norm": 1.283363145290632, "learning_rate": 2.6326255635163e-07, "loss": 0.1841, "step": 11242 }, { "epoch": 0.9, "grad_norm": 2.0185935890238516, "learning_rate": 2.6284788607266575e-07, "loss": 0.1894, "step": 11243 }, { "epoch": 0.9, "grad_norm": 7.262108635097663, "learning_rate": 2.6243353381307736e-07, "loss": 0.4882, "step": 11244 }, { "epoch": 0.9, "grad_norm": 1.3874852569905642, "learning_rate": 2.620194996006803e-07, "loss": 0.2046, "step": 11245 }, { "epoch": 0.9, "grad_norm": 1.4284294551158938, "learning_rate": 2.616057834632679e-07, "loss": 0.2049, "step": 11246 }, { "epoch": 0.9, "grad_norm": 1.4427296521748576, "learning_rate": 2.6119238542861746e-07, "loss": 0.1551, "step": 11247 }, { "epoch": 0.9, "grad_norm": 1.2150311113623231, "learning_rate": 2.607793055244795e-07, "loss": 0.1572, "step": 11248 }, { "epoch": 0.9, "grad_norm": 1.3241562533462572, "learning_rate": 2.603665437785868e-07, "loss": 0.161, "step": 11249 }, { "epoch": 0.9, "grad_norm": 1.3563488547256068, "learning_rate": 2.599541002186479e-07, "loss": 0.2204, "step": 11250 }, { "epoch": 0.9, "grad_norm": 1.501295912379093, "learning_rate": 2.595419748723521e-07, "loss": 0.1834, "step": 11251 }, { "epoch": 0.9, "grad_norm": 1.5079538826457175, "learning_rate": 2.591301677673663e-07, "loss": 0.1804, "step": 11252 }, { "epoch": 0.9, "grad_norm": 1.2117070089435333, "learning_rate": 2.587186789313378e-07, "loss": 0.1619, "step": 11253 }, { "epoch": 0.9, "grad_norm": 1.2387238116416834, "learning_rate": 2.58307508391889e-07, "loss": 0.1357, "step": 11254 }, { "epoch": 0.9, "grad_norm": 6.137241958271838, "learning_rate": 2.578966561766233e-07, "loss": 0.6801, "step": 11255 }, { "epoch": 0.9, "grad_norm": 1.2455089060626816, "learning_rate": 2.5748612231312476e-07, "loss": 0.1523, "step": 11256 }, { "epoch": 0.9, "grad_norm": 1.3726570472408386, "learning_rate": 2.570759068289502e-07, "loss": 0.1467, "step": 11257 }, { "epoch": 0.9, "grad_norm": 1.304875651541526, "learning_rate": 2.566660097516416e-07, "loss": 0.1764, "step": 11258 }, { "epoch": 0.9, "grad_norm": 1.4302420917468852, "learning_rate": 2.562564311087151e-07, "loss": 0.1691, "step": 11259 }, { "epoch": 0.9, "grad_norm": 8.156362734284608, "learning_rate": 2.5584717092766774e-07, "loss": 0.5535, "step": 11260 }, { "epoch": 0.9, "grad_norm": 5.625379630242133, "learning_rate": 2.5543822923597305e-07, "loss": 0.5441, "step": 11261 }, { "epoch": 0.9, "grad_norm": 7.425631249694794, "learning_rate": 2.550296060610868e-07, "loss": 0.6162, "step": 11262 }, { "epoch": 0.9, "grad_norm": 1.373690359634014, "learning_rate": 2.546213014304394e-07, "loss": 0.1595, "step": 11263 }, { "epoch": 0.9, "grad_norm": 5.608065901132052, "learning_rate": 2.54213315371441e-07, "loss": 0.5138, "step": 11264 }, { "epoch": 0.9, "grad_norm": 12.538600530275065, "learning_rate": 2.5380564791148364e-07, "loss": 0.5417, "step": 11265 }, { "epoch": 0.9, "grad_norm": 1.3468750836968264, "learning_rate": 2.533982990779316e-07, "loss": 0.1771, "step": 11266 }, { "epoch": 0.9, "grad_norm": 1.360446351749534, "learning_rate": 2.5299126889813454e-07, "loss": 0.1904, "step": 11267 }, { "epoch": 0.9, "grad_norm": 1.5155586404153307, "learning_rate": 2.5258455739941623e-07, "loss": 0.1834, "step": 11268 }, { "epoch": 0.9, "grad_norm": 1.4738491296319747, "learning_rate": 2.5217816460908094e-07, "loss": 0.1733, "step": 11269 }, { "epoch": 0.9, "grad_norm": 4.260438200463216, "learning_rate": 2.517720905544102e-07, "loss": 0.426, "step": 11270 }, { "epoch": 0.9, "grad_norm": 1.3242362176274711, "learning_rate": 2.513663352626661e-07, "loss": 0.18, "step": 11271 }, { "epoch": 0.9, "grad_norm": 1.3125636685125515, "learning_rate": 2.5096089876108784e-07, "loss": 0.2008, "step": 11272 }, { "epoch": 0.9, "grad_norm": 1.4872015756791364, "learning_rate": 2.5055578107689326e-07, "loss": 0.1958, "step": 11273 }, { "epoch": 0.9, "grad_norm": 1.5479871507838738, "learning_rate": 2.501509822372788e-07, "loss": 0.1794, "step": 11274 }, { "epoch": 0.9, "grad_norm": 1.3542164714482559, "learning_rate": 2.497465022694207e-07, "loss": 0.164, "step": 11275 }, { "epoch": 0.9, "grad_norm": 10.575557847446362, "learning_rate": 2.4934234120047306e-07, "loss": 0.4692, "step": 11276 }, { "epoch": 0.9, "grad_norm": 1.366116668322423, "learning_rate": 2.489384990575672e-07, "loss": 0.1926, "step": 11277 }, { "epoch": 0.9, "grad_norm": 1.5124471526161982, "learning_rate": 2.485349758678152e-07, "loss": 0.1864, "step": 11278 }, { "epoch": 0.9, "grad_norm": 6.343282577393237, "learning_rate": 2.48131771658307e-07, "loss": 0.5763, "step": 11279 }, { "epoch": 0.9, "grad_norm": 1.2765130945823198, "learning_rate": 2.477288864561106e-07, "loss": 0.1541, "step": 11280 }, { "epoch": 0.9, "grad_norm": 1.3667983399347927, "learning_rate": 2.4732632028827255e-07, "loss": 0.1746, "step": 11281 }, { "epoch": 0.9, "grad_norm": 1.3074991472304878, "learning_rate": 2.46924073181819e-07, "loss": 0.1751, "step": 11282 }, { "epoch": 0.9, "grad_norm": 1.6123227552756643, "learning_rate": 2.465221451637534e-07, "loss": 0.1489, "step": 11283 }, { "epoch": 0.9, "grad_norm": 1.4770941867483163, "learning_rate": 2.461205362610591e-07, "loss": 0.1703, "step": 11284 }, { "epoch": 0.9, "grad_norm": 9.543111835060115, "learning_rate": 2.4571924650069634e-07, "loss": 0.4469, "step": 11285 }, { "epoch": 0.9, "grad_norm": 1.3415549581439412, "learning_rate": 2.453182759096051e-07, "loss": 0.1782, "step": 11286 }, { "epoch": 0.9, "grad_norm": 8.429112839847905, "learning_rate": 2.4491762451470556e-07, "loss": 0.6321, "step": 11287 }, { "epoch": 0.9, "grad_norm": 1.2451587676931866, "learning_rate": 2.445172923428912e-07, "loss": 0.1513, "step": 11288 }, { "epoch": 0.9, "grad_norm": 1.3143353382703578, "learning_rate": 2.4411727942104115e-07, "loss": 0.1789, "step": 11289 }, { "epoch": 0.9, "grad_norm": 1.412337338748157, "learning_rate": 2.437175857760077e-07, "loss": 0.1729, "step": 11290 }, { "epoch": 0.9, "grad_norm": 1.4056652859777632, "learning_rate": 2.433182114346228e-07, "loss": 0.1476, "step": 11291 }, { "epoch": 0.9, "grad_norm": 1.3626645754418352, "learning_rate": 2.4291915642369934e-07, "loss": 0.1857, "step": 11292 }, { "epoch": 0.9, "grad_norm": 1.3428383347917274, "learning_rate": 2.42520420770026e-07, "loss": 0.1678, "step": 11293 }, { "epoch": 0.9, "grad_norm": 1.4118610901162105, "learning_rate": 2.421220045003714e-07, "loss": 0.1727, "step": 11294 }, { "epoch": 0.9, "grad_norm": 1.278524901478949, "learning_rate": 2.417239076414829e-07, "loss": 0.2124, "step": 11295 }, { "epoch": 0.9, "grad_norm": 1.3389404788255934, "learning_rate": 2.413261302200848e-07, "loss": 0.1838, "step": 11296 }, { "epoch": 0.9, "grad_norm": 1.3553019883794628, "learning_rate": 2.4092867226288173e-07, "loss": 0.1792, "step": 11297 }, { "epoch": 0.9, "grad_norm": 1.3334678646912719, "learning_rate": 2.4053153379655693e-07, "loss": 0.1495, "step": 11298 }, { "epoch": 0.9, "grad_norm": 1.3868336794763116, "learning_rate": 2.4013471484777063e-07, "loss": 0.1765, "step": 11299 }, { "epoch": 0.9, "grad_norm": 1.2548148723380272, "learning_rate": 2.397382154431621e-07, "loss": 0.141, "step": 11300 }, { "epoch": 0.9, "grad_norm": 1.3415066768795039, "learning_rate": 2.3934203560935174e-07, "loss": 0.187, "step": 11301 }, { "epoch": 0.9, "grad_norm": 1.378302616017471, "learning_rate": 2.389461753729333e-07, "loss": 0.1716, "step": 11302 }, { "epoch": 0.9, "grad_norm": 1.320515498694177, "learning_rate": 2.385506347604838e-07, "loss": 0.1596, "step": 11303 }, { "epoch": 0.9, "grad_norm": 1.3852956920895878, "learning_rate": 2.38155413798557e-07, "loss": 0.172, "step": 11304 }, { "epoch": 0.9, "grad_norm": 1.3757911350499652, "learning_rate": 2.3776051251368505e-07, "loss": 0.2087, "step": 11305 }, { "epoch": 0.9, "grad_norm": 1.509730190985472, "learning_rate": 2.3736593093237948e-07, "loss": 0.2175, "step": 11306 }, { "epoch": 0.9, "grad_norm": 1.3880319628435038, "learning_rate": 2.3697166908112855e-07, "loss": 0.174, "step": 11307 }, { "epoch": 0.9, "grad_norm": 1.3601592483856686, "learning_rate": 2.3657772698640113e-07, "loss": 0.1746, "step": 11308 }, { "epoch": 0.9, "grad_norm": 7.887079865979303, "learning_rate": 2.361841046746427e-07, "loss": 0.4426, "step": 11309 }, { "epoch": 0.9, "grad_norm": 1.2900978896642583, "learning_rate": 2.3579080217228046e-07, "loss": 0.1559, "step": 11310 }, { "epoch": 0.9, "grad_norm": 1.3303123338746785, "learning_rate": 2.3539781950571494e-07, "loss": 0.1905, "step": 11311 }, { "epoch": 0.9, "grad_norm": 1.4537985118973686, "learning_rate": 2.3500515670133117e-07, "loss": 0.1813, "step": 11312 }, { "epoch": 0.91, "grad_norm": 1.2868874499746372, "learning_rate": 2.346128137854886e-07, "loss": 0.1766, "step": 11313 }, { "epoch": 0.91, "grad_norm": 1.2836052704329015, "learning_rate": 2.342207907845262e-07, "loss": 0.1629, "step": 11314 }, { "epoch": 0.91, "grad_norm": 1.1469195728899055, "learning_rate": 2.3382908772476175e-07, "loss": 0.1327, "step": 11315 }, { "epoch": 0.91, "grad_norm": 1.3133155600217268, "learning_rate": 2.3343770463249204e-07, "loss": 0.1714, "step": 11316 }, { "epoch": 0.91, "grad_norm": 1.2828180306492585, "learning_rate": 2.3304664153399048e-07, "loss": 0.1751, "step": 11317 }, { "epoch": 0.91, "grad_norm": 1.4480467815344436, "learning_rate": 2.3265589845551218e-07, "loss": 0.1842, "step": 11318 }, { "epoch": 0.91, "grad_norm": 1.5155134125418652, "learning_rate": 2.3226547542328724e-07, "loss": 0.1856, "step": 11319 }, { "epoch": 0.91, "grad_norm": 1.390828331936354, "learning_rate": 2.3187537246352587e-07, "loss": 0.1605, "step": 11320 }, { "epoch": 0.91, "grad_norm": 1.205682173666545, "learning_rate": 2.3148558960241929e-07, "loss": 0.1549, "step": 11321 }, { "epoch": 0.91, "grad_norm": 1.480276546162343, "learning_rate": 2.3109612686613158e-07, "loss": 0.1839, "step": 11322 }, { "epoch": 0.91, "grad_norm": 1.4709713576882006, "learning_rate": 2.3070698428081074e-07, "loss": 0.1926, "step": 11323 }, { "epoch": 0.91, "grad_norm": 1.1651707156886693, "learning_rate": 2.3031816187258138e-07, "loss": 0.1337, "step": 11324 }, { "epoch": 0.91, "grad_norm": 7.714345901868557, "learning_rate": 2.2992965966754378e-07, "loss": 0.6461, "step": 11325 }, { "epoch": 0.91, "grad_norm": 10.044866187826981, "learning_rate": 2.2954147769178148e-07, "loss": 0.5367, "step": 11326 }, { "epoch": 0.91, "grad_norm": 1.2210531438022616, "learning_rate": 2.2915361597135366e-07, "loss": 0.1684, "step": 11327 }, { "epoch": 0.91, "grad_norm": 1.3653871403414761, "learning_rate": 2.2876607453229893e-07, "loss": 0.1696, "step": 11328 }, { "epoch": 0.91, "grad_norm": 1.3860088379045408, "learning_rate": 2.283788534006337e-07, "loss": 0.1581, "step": 11329 }, { "epoch": 0.91, "grad_norm": 1.2186318068934878, "learning_rate": 2.279919526023533e-07, "loss": 0.1596, "step": 11330 }, { "epoch": 0.91, "grad_norm": 1.3176731839071112, "learning_rate": 2.2760537216343136e-07, "loss": 0.1824, "step": 11331 }, { "epoch": 0.91, "grad_norm": 1.3375797865317043, "learning_rate": 2.272191121098216e-07, "loss": 0.1836, "step": 11332 }, { "epoch": 0.91, "grad_norm": 1.278116095053253, "learning_rate": 2.2683317246745328e-07, "loss": 0.1424, "step": 11333 }, { "epoch": 0.91, "grad_norm": 10.014855159222185, "learning_rate": 2.2644755326223567e-07, "loss": 0.4228, "step": 11334 }, { "epoch": 0.91, "grad_norm": 1.3722299052231866, "learning_rate": 2.260622545200586e-07, "loss": 0.1475, "step": 11335 }, { "epoch": 0.91, "grad_norm": 1.228068908603046, "learning_rate": 2.2567727626678527e-07, "loss": 0.1833, "step": 11336 }, { "epoch": 0.91, "grad_norm": 1.2446707372354253, "learning_rate": 2.2529261852826279e-07, "loss": 0.1691, "step": 11337 }, { "epoch": 0.91, "grad_norm": 1.2662142530203966, "learning_rate": 2.249082813303133e-07, "loss": 0.1755, "step": 11338 }, { "epoch": 0.91, "grad_norm": 1.3954235117999418, "learning_rate": 2.245242646987389e-07, "loss": 0.1643, "step": 11339 }, { "epoch": 0.91, "grad_norm": 1.2980226473618142, "learning_rate": 2.241405686593201e-07, "loss": 0.1593, "step": 11340 }, { "epoch": 0.91, "grad_norm": 35.42730954413929, "learning_rate": 2.237571932378152e-07, "loss": 0.6564, "step": 11341 }, { "epoch": 0.91, "grad_norm": 1.2531754395004402, "learning_rate": 2.233741384599608e-07, "loss": 0.139, "step": 11342 }, { "epoch": 0.91, "grad_norm": 1.3081334325517342, "learning_rate": 2.229914043514736e-07, "loss": 0.1649, "step": 11343 }, { "epoch": 0.91, "grad_norm": 1.3914059702502306, "learning_rate": 2.226089909380469e-07, "loss": 0.1522, "step": 11344 }, { "epoch": 0.91, "grad_norm": 1.469126633590488, "learning_rate": 2.2222689824535294e-07, "loss": 0.1648, "step": 11345 }, { "epoch": 0.91, "grad_norm": 1.2423789168728636, "learning_rate": 2.2184512629904508e-07, "loss": 0.1501, "step": 11346 }, { "epoch": 0.91, "grad_norm": 1.2404927337037424, "learning_rate": 2.2146367512474955e-07, "loss": 0.1391, "step": 11347 }, { "epoch": 0.91, "grad_norm": 1.5025268019206122, "learning_rate": 2.2108254474807632e-07, "loss": 0.1675, "step": 11348 }, { "epoch": 0.91, "grad_norm": 1.366908438639587, "learning_rate": 2.207017351946117e-07, "loss": 0.1535, "step": 11349 }, { "epoch": 0.91, "grad_norm": 1.325470516823539, "learning_rate": 2.2032124648992015e-07, "loss": 0.1472, "step": 11350 }, { "epoch": 0.91, "grad_norm": 1.349874325502185, "learning_rate": 2.1994107865954516e-07, "loss": 0.1872, "step": 11351 }, { "epoch": 0.91, "grad_norm": 1.505345700678516, "learning_rate": 2.1956123172900855e-07, "loss": 0.1914, "step": 11352 }, { "epoch": 0.91, "grad_norm": 1.282501386624574, "learning_rate": 2.1918170572381048e-07, "loss": 0.2122, "step": 11353 }, { "epoch": 0.91, "grad_norm": 1.2710376643072956, "learning_rate": 2.1880250066942942e-07, "loss": 0.1468, "step": 11354 }, { "epoch": 0.91, "grad_norm": 1.4804138448394293, "learning_rate": 2.1842361659132395e-07, "loss": 0.2016, "step": 11355 }, { "epoch": 0.91, "grad_norm": 1.5057787595821464, "learning_rate": 2.1804505351492755e-07, "loss": 0.1788, "step": 11356 }, { "epoch": 0.91, "grad_norm": 1.2201136509617896, "learning_rate": 2.176668114656566e-07, "loss": 0.1524, "step": 11357 }, { "epoch": 0.91, "grad_norm": 1.5881335013817457, "learning_rate": 2.1728889046890188e-07, "loss": 0.1976, "step": 11358 }, { "epoch": 0.91, "grad_norm": 1.338732888422352, "learning_rate": 2.1691129055003535e-07, "loss": 0.1991, "step": 11359 }, { "epoch": 0.91, "grad_norm": 1.5490961424216514, "learning_rate": 2.1653401173440558e-07, "loss": 0.1662, "step": 11360 }, { "epoch": 0.91, "grad_norm": 1.364717059256129, "learning_rate": 2.1615705404734123e-07, "loss": 0.1979, "step": 11361 }, { "epoch": 0.91, "grad_norm": 1.351110415298109, "learning_rate": 2.1578041751414812e-07, "loss": 0.1949, "step": 11362 }, { "epoch": 0.91, "grad_norm": 1.2972547798748257, "learning_rate": 2.1540410216011166e-07, "loss": 0.177, "step": 11363 }, { "epoch": 0.91, "grad_norm": 1.3050948452069984, "learning_rate": 2.1502810801049435e-07, "loss": 0.1775, "step": 11364 }, { "epoch": 0.91, "grad_norm": 5.249732013254502, "learning_rate": 2.1465243509053713e-07, "loss": 0.5292, "step": 11365 }, { "epoch": 0.91, "grad_norm": 1.1293522377783152, "learning_rate": 2.142770834254626e-07, "loss": 0.139, "step": 11366 }, { "epoch": 0.91, "grad_norm": 1.5264711838401406, "learning_rate": 2.1390205304046675e-07, "loss": 0.1855, "step": 11367 }, { "epoch": 0.91, "grad_norm": 1.5470745674084725, "learning_rate": 2.135273439607266e-07, "loss": 0.1854, "step": 11368 }, { "epoch": 0.91, "grad_norm": 1.2711768183162393, "learning_rate": 2.1315295621139986e-07, "loss": 0.17, "step": 11369 }, { "epoch": 0.91, "grad_norm": 1.539818299926648, "learning_rate": 2.1277888981761753e-07, "loss": 0.1556, "step": 11370 }, { "epoch": 0.91, "grad_norm": 1.4243469221767162, "learning_rate": 2.1240514480449336e-07, "loss": 0.1561, "step": 11371 }, { "epoch": 0.91, "grad_norm": 1.2306508212648097, "learning_rate": 2.120317211971179e-07, "loss": 0.1412, "step": 11372 }, { "epoch": 0.91, "grad_norm": 1.3523333040255328, "learning_rate": 2.1165861902055995e-07, "loss": 0.1721, "step": 11373 }, { "epoch": 0.91, "grad_norm": 1.296729216152383, "learning_rate": 2.112858382998667e-07, "loss": 0.1683, "step": 11374 }, { "epoch": 0.91, "grad_norm": 1.4331784653211281, "learning_rate": 2.109133790600648e-07, "loss": 0.1479, "step": 11375 }, { "epoch": 0.91, "grad_norm": 1.3148471759233555, "learning_rate": 2.105412413261576e-07, "loss": 0.1466, "step": 11376 }, { "epoch": 0.91, "grad_norm": 1.5281822435819672, "learning_rate": 2.1016942512312898e-07, "loss": 0.2459, "step": 11377 }, { "epoch": 0.91, "grad_norm": 1.5249807713625627, "learning_rate": 2.09797930475939e-07, "loss": 0.1808, "step": 11378 }, { "epoch": 0.91, "grad_norm": 1.1298143522777115, "learning_rate": 2.0942675740952712e-07, "loss": 0.1288, "step": 11379 }, { "epoch": 0.91, "grad_norm": 7.468200947875259, "learning_rate": 2.0905590594881342e-07, "loss": 0.6689, "step": 11380 }, { "epoch": 0.91, "grad_norm": 1.2000965178762486, "learning_rate": 2.0868537611869132e-07, "loss": 0.1343, "step": 11381 }, { "epoch": 0.91, "grad_norm": 1.1935475531589255, "learning_rate": 2.0831516794403762e-07, "loss": 0.1813, "step": 11382 }, { "epoch": 0.91, "grad_norm": 9.514907941992528, "learning_rate": 2.079452814497046e-07, "loss": 0.5754, "step": 11383 }, { "epoch": 0.91, "grad_norm": 7.52306167020663, "learning_rate": 2.0757571666052466e-07, "loss": 0.5163, "step": 11384 }, { "epoch": 0.91, "grad_norm": 1.5487308059315799, "learning_rate": 2.0720647360130687e-07, "loss": 0.2222, "step": 11385 }, { "epoch": 0.91, "grad_norm": 1.3409813187302209, "learning_rate": 2.0683755229684077e-07, "loss": 0.1645, "step": 11386 }, { "epoch": 0.91, "grad_norm": 1.4443439127504245, "learning_rate": 2.0646895277189216e-07, "loss": 0.2067, "step": 11387 }, { "epoch": 0.91, "grad_norm": 1.290636357865715, "learning_rate": 2.0610067505120622e-07, "loss": 0.1513, "step": 11388 }, { "epoch": 0.91, "grad_norm": 1.1834382543129705, "learning_rate": 2.057327191595071e-07, "loss": 0.1398, "step": 11389 }, { "epoch": 0.91, "grad_norm": 1.3163940697301169, "learning_rate": 2.053650851214961e-07, "loss": 0.1576, "step": 11390 }, { "epoch": 0.91, "grad_norm": 1.2865088405975913, "learning_rate": 2.0499777296185463e-07, "loss": 0.1766, "step": 11391 }, { "epoch": 0.91, "grad_norm": 1.2781280120027936, "learning_rate": 2.0463078270524128e-07, "loss": 0.1456, "step": 11392 }, { "epoch": 0.91, "grad_norm": 10.444722876936616, "learning_rate": 2.0426411437629245e-07, "loss": 0.544, "step": 11393 }, { "epoch": 0.91, "grad_norm": 1.393964975379059, "learning_rate": 2.0389776799962402e-07, "loss": 0.1831, "step": 11394 }, { "epoch": 0.91, "grad_norm": 1.3570879425880782, "learning_rate": 2.0353174359983074e-07, "loss": 0.1717, "step": 11395 }, { "epoch": 0.91, "grad_norm": 8.406559724360969, "learning_rate": 2.0316604120148354e-07, "loss": 0.6578, "step": 11396 }, { "epoch": 0.91, "grad_norm": 1.4447430794356344, "learning_rate": 2.0280066082913385e-07, "loss": 0.1735, "step": 11397 }, { "epoch": 0.91, "grad_norm": 1.3122376447155761, "learning_rate": 2.0243560250731097e-07, "loss": 0.1891, "step": 11398 }, { "epoch": 0.91, "grad_norm": 1.2623000871543326, "learning_rate": 2.0207086626052142e-07, "loss": 0.167, "step": 11399 }, { "epoch": 0.91, "grad_norm": 1.2745929701369876, "learning_rate": 2.0170645211325335e-07, "loss": 0.1761, "step": 11400 }, { "epoch": 0.91, "grad_norm": 1.4543249833706569, "learning_rate": 2.0134236008996778e-07, "loss": 0.187, "step": 11401 }, { "epoch": 0.91, "grad_norm": 1.5456968944808578, "learning_rate": 2.0097859021510956e-07, "loss": 0.1965, "step": 11402 }, { "epoch": 0.91, "grad_norm": 1.2685604452349555, "learning_rate": 2.0061514251309978e-07, "loss": 0.1521, "step": 11403 }, { "epoch": 0.91, "grad_norm": 1.6966512967484293, "learning_rate": 2.0025201700833607e-07, "loss": 0.1601, "step": 11404 }, { "epoch": 0.91, "grad_norm": 1.4083702487034786, "learning_rate": 1.9988921372519732e-07, "loss": 0.1769, "step": 11405 }, { "epoch": 0.91, "grad_norm": 1.320405015220427, "learning_rate": 1.9952673268803956e-07, "loss": 0.1456, "step": 11406 }, { "epoch": 0.91, "grad_norm": 1.3570214671225111, "learning_rate": 1.991645739211967e-07, "loss": 0.1534, "step": 11407 }, { "epoch": 0.91, "grad_norm": 1.3936849766499813, "learning_rate": 1.988027374489826e-07, "loss": 0.1677, "step": 11408 }, { "epoch": 0.91, "grad_norm": 1.2940103954318118, "learning_rate": 1.9844122329568728e-07, "loss": 0.1466, "step": 11409 }, { "epoch": 0.91, "grad_norm": 1.4664966455165562, "learning_rate": 1.9808003148558074e-07, "loss": 0.1908, "step": 11410 }, { "epoch": 0.91, "grad_norm": 1.42781695690178, "learning_rate": 1.977191620429114e-07, "loss": 0.1783, "step": 11411 }, { "epoch": 0.91, "grad_norm": 1.3852473108245562, "learning_rate": 1.973586149919049e-07, "loss": 0.1743, "step": 11412 }, { "epoch": 0.91, "grad_norm": 5.389177646950667, "learning_rate": 1.9699839035676516e-07, "loss": 0.5123, "step": 11413 }, { "epoch": 0.91, "grad_norm": 6.278126115528506, "learning_rate": 1.9663848816167785e-07, "loss": 0.5899, "step": 11414 }, { "epoch": 0.91, "grad_norm": 1.3184167644848865, "learning_rate": 1.9627890843080034e-07, "loss": 0.1636, "step": 11415 }, { "epoch": 0.91, "grad_norm": 1.3006664715132283, "learning_rate": 1.959196511882755e-07, "loss": 0.1507, "step": 11416 }, { "epoch": 0.91, "grad_norm": 1.2776166239735658, "learning_rate": 1.9556071645822017e-07, "loss": 0.1734, "step": 11417 }, { "epoch": 0.91, "grad_norm": 1.2895393602465155, "learning_rate": 1.9520210426473063e-07, "loss": 0.1447, "step": 11418 }, { "epoch": 0.91, "grad_norm": 10.07309447915683, "learning_rate": 1.9484381463188206e-07, "loss": 0.6733, "step": 11419 }, { "epoch": 0.91, "grad_norm": 1.3122534945256383, "learning_rate": 1.9448584758372745e-07, "loss": 0.1487, "step": 11420 }, { "epoch": 0.91, "grad_norm": 1.301036612388024, "learning_rate": 1.941282031442976e-07, "loss": 0.1802, "step": 11421 }, { "epoch": 0.91, "grad_norm": 6.15144116980991, "learning_rate": 1.937708813376027e-07, "loss": 0.6986, "step": 11422 }, { "epoch": 0.91, "grad_norm": 1.2499385185905192, "learning_rate": 1.934138821876308e-07, "loss": 0.1314, "step": 11423 }, { "epoch": 0.91, "grad_norm": 1.3804036601337681, "learning_rate": 1.930572057183483e-07, "loss": 0.2122, "step": 11424 }, { "epoch": 0.91, "grad_norm": 1.534112555745727, "learning_rate": 1.9270085195370048e-07, "loss": 0.185, "step": 11425 }, { "epoch": 0.91, "grad_norm": 1.3593264776915632, "learning_rate": 1.9234482091760932e-07, "loss": 0.1553, "step": 11426 }, { "epoch": 0.91, "grad_norm": 1.3539667179312658, "learning_rate": 1.9198911263397734e-07, "loss": 0.147, "step": 11427 }, { "epoch": 0.91, "grad_norm": 7.934540251060143, "learning_rate": 1.916337271266844e-07, "loss": 0.5276, "step": 11428 }, { "epoch": 0.91, "grad_norm": 1.2632505728716807, "learning_rate": 1.9127866441958743e-07, "loss": 0.1844, "step": 11429 }, { "epoch": 0.91, "grad_norm": 1.3002017700371515, "learning_rate": 1.9092392453652352e-07, "loss": 0.1891, "step": 11430 }, { "epoch": 0.91, "grad_norm": 1.2839726055450675, "learning_rate": 1.905695075013081e-07, "loss": 0.157, "step": 11431 }, { "epoch": 0.91, "grad_norm": 1.3324023563283238, "learning_rate": 1.902154133377332e-07, "loss": 0.1567, "step": 11432 }, { "epoch": 0.91, "grad_norm": 1.3098399473628979, "learning_rate": 1.8986164206957037e-07, "loss": 0.1608, "step": 11433 }, { "epoch": 0.91, "grad_norm": 1.574573910652031, "learning_rate": 1.895081937205706e-07, "loss": 0.1883, "step": 11434 }, { "epoch": 0.91, "grad_norm": 1.4133053070200015, "learning_rate": 1.8915506831445996e-07, "loss": 0.1983, "step": 11435 }, { "epoch": 0.91, "grad_norm": 1.3655258634119631, "learning_rate": 1.888022658749461e-07, "loss": 0.1652, "step": 11436 }, { "epoch": 0.91, "grad_norm": 1.099794613797391, "learning_rate": 1.8844978642571454e-07, "loss": 0.124, "step": 11437 }, { "epoch": 0.92, "grad_norm": 1.242352889620964, "learning_rate": 1.880976299904258e-07, "loss": 0.1287, "step": 11438 }, { "epoch": 0.92, "grad_norm": 1.4569215421711281, "learning_rate": 1.8774579659272318e-07, "loss": 0.164, "step": 11439 }, { "epoch": 0.92, "grad_norm": 1.4125310961049473, "learning_rate": 1.8739428625622614e-07, "loss": 0.1685, "step": 11440 }, { "epoch": 0.92, "grad_norm": 1.1699636229474681, "learning_rate": 1.8704309900453187e-07, "loss": 0.1397, "step": 11441 }, { "epoch": 0.92, "grad_norm": 1.3321259395147382, "learning_rate": 1.866922348612171e-07, "loss": 0.1587, "step": 11442 }, { "epoch": 0.92, "grad_norm": 1.5587867890700113, "learning_rate": 1.863416938498369e-07, "loss": 0.1942, "step": 11443 }, { "epoch": 0.92, "grad_norm": 1.3528618112109925, "learning_rate": 1.8599147599392243e-07, "loss": 0.1509, "step": 11444 }, { "epoch": 0.92, "grad_norm": 1.5097232051060785, "learning_rate": 1.856415813169876e-07, "loss": 0.191, "step": 11445 }, { "epoch": 0.92, "grad_norm": 1.3753701599321564, "learning_rate": 1.8529200984251926e-07, "loss": 0.2013, "step": 11446 }, { "epoch": 0.92, "grad_norm": 1.4202737410647657, "learning_rate": 1.8494276159398582e-07, "loss": 0.2389, "step": 11447 }, { "epoch": 0.92, "grad_norm": 1.3293926683595405, "learning_rate": 1.8459383659483522e-07, "loss": 0.1915, "step": 11448 }, { "epoch": 0.92, "grad_norm": 1.4753297727027381, "learning_rate": 1.842452348684892e-07, "loss": 0.2012, "step": 11449 }, { "epoch": 0.92, "grad_norm": 1.370411036750608, "learning_rate": 1.838969564383525e-07, "loss": 0.1407, "step": 11450 }, { "epoch": 0.92, "grad_norm": 1.2886527495552438, "learning_rate": 1.8354900132780517e-07, "loss": 0.1518, "step": 11451 }, { "epoch": 0.92, "grad_norm": 17.18297221343557, "learning_rate": 1.832013695602064e-07, "loss": 0.4941, "step": 11452 }, { "epoch": 0.92, "grad_norm": 1.3034576986651727, "learning_rate": 1.8285406115889416e-07, "loss": 0.1938, "step": 11453 }, { "epoch": 0.92, "grad_norm": 1.4732539637540782, "learning_rate": 1.8250707614718422e-07, "loss": 0.2083, "step": 11454 }, { "epoch": 0.92, "grad_norm": 1.5443487948506132, "learning_rate": 1.8216041454837075e-07, "loss": 0.2012, "step": 11455 }, { "epoch": 0.92, "grad_norm": 1.2788028405246241, "learning_rate": 1.8181407638572623e-07, "loss": 0.1619, "step": 11456 }, { "epoch": 0.92, "grad_norm": 1.3971423940841963, "learning_rate": 1.8146806168250098e-07, "loss": 0.1568, "step": 11457 }, { "epoch": 0.92, "grad_norm": 1.4053743817982913, "learning_rate": 1.8112237046192417e-07, "loss": 0.1887, "step": 11458 }, { "epoch": 0.92, "grad_norm": 1.5101223467114036, "learning_rate": 1.8077700274720388e-07, "loss": 0.134, "step": 11459 }, { "epoch": 0.92, "grad_norm": 1.4153147719482428, "learning_rate": 1.804319585615244e-07, "loss": 0.1577, "step": 11460 }, { "epoch": 0.92, "grad_norm": 1.560946142223074, "learning_rate": 1.8008723792805049e-07, "loss": 0.1596, "step": 11461 }, { "epoch": 0.92, "grad_norm": 1.7437177999010351, "learning_rate": 1.7974284086992422e-07, "loss": 0.1878, "step": 11462 }, { "epoch": 0.92, "grad_norm": 1.5061473489434563, "learning_rate": 1.7939876741026597e-07, "loss": 0.1769, "step": 11463 }, { "epoch": 0.92, "grad_norm": 1.1740682537061646, "learning_rate": 1.7905501757217393e-07, "loss": 0.1226, "step": 11464 }, { "epoch": 0.92, "grad_norm": 1.2490806595606019, "learning_rate": 1.7871159137872573e-07, "loss": 0.1331, "step": 11465 }, { "epoch": 0.92, "grad_norm": 1.545424997827313, "learning_rate": 1.783684888529763e-07, "loss": 0.2009, "step": 11466 }, { "epoch": 0.92, "grad_norm": 1.3347479475507522, "learning_rate": 1.780257100179589e-07, "loss": 0.1548, "step": 11467 }, { "epoch": 0.92, "grad_norm": 1.3075272856577411, "learning_rate": 1.7768325489668558e-07, "loss": 0.1799, "step": 11468 }, { "epoch": 0.92, "grad_norm": 1.5451895703395253, "learning_rate": 1.773411235121464e-07, "loss": 0.1643, "step": 11469 }, { "epoch": 0.92, "grad_norm": 1.2483975751150365, "learning_rate": 1.7699931588731012e-07, "loss": 0.1602, "step": 11470 }, { "epoch": 0.92, "grad_norm": 1.5080102490450584, "learning_rate": 1.766578320451223e-07, "loss": 0.1695, "step": 11471 }, { "epoch": 0.92, "grad_norm": 1.3792794733884053, "learning_rate": 1.7631667200850856e-07, "loss": 0.133, "step": 11472 }, { "epoch": 0.92, "grad_norm": 1.3084587690882636, "learning_rate": 1.759758358003727e-07, "loss": 0.1838, "step": 11473 }, { "epoch": 0.92, "grad_norm": 10.322550186207593, "learning_rate": 1.7563532344359367e-07, "loss": 0.4574, "step": 11474 }, { "epoch": 0.92, "grad_norm": 1.3568265035408105, "learning_rate": 1.7529513496103322e-07, "loss": 0.1603, "step": 11475 }, { "epoch": 0.92, "grad_norm": 1.350049010280239, "learning_rate": 1.7495527037552917e-07, "loss": 0.1535, "step": 11476 }, { "epoch": 0.92, "grad_norm": 5.322810727965199, "learning_rate": 1.746157297098966e-07, "loss": 0.5451, "step": 11477 }, { "epoch": 0.92, "grad_norm": 1.5306958179578278, "learning_rate": 1.7427651298693004e-07, "loss": 0.1789, "step": 11478 }, { "epoch": 0.92, "grad_norm": 1.4535288205852983, "learning_rate": 1.7393762022940352e-07, "loss": 0.1455, "step": 11479 }, { "epoch": 0.92, "grad_norm": 1.1398275904744233, "learning_rate": 1.7359905146006607e-07, "loss": 0.1405, "step": 11480 }, { "epoch": 0.92, "grad_norm": 1.5760153073825938, "learning_rate": 1.732608067016478e-07, "loss": 0.2058, "step": 11481 }, { "epoch": 0.92, "grad_norm": 1.5230044043923348, "learning_rate": 1.7292288597685724e-07, "loss": 0.1765, "step": 11482 }, { "epoch": 0.92, "grad_norm": 7.578727590705592, "learning_rate": 1.7258528930837682e-07, "loss": 0.5327, "step": 11483 }, { "epoch": 0.92, "grad_norm": 1.3884724461109659, "learning_rate": 1.7224801671887392e-07, "loss": 0.1789, "step": 11484 }, { "epoch": 0.92, "grad_norm": 1.399420253818466, "learning_rate": 1.719110682309888e-07, "loss": 0.1761, "step": 11485 }, { "epoch": 0.92, "grad_norm": 1.2574402819903847, "learning_rate": 1.715744438673417e-07, "loss": 0.1559, "step": 11486 }, { "epoch": 0.92, "grad_norm": 1.366907934093081, "learning_rate": 1.7123814365053227e-07, "loss": 0.1676, "step": 11487 }, { "epoch": 0.92, "grad_norm": 1.3948790246563503, "learning_rate": 1.7090216760313693e-07, "loss": 0.1608, "step": 11488 }, { "epoch": 0.92, "grad_norm": 1.414944117984996, "learning_rate": 1.7056651574771043e-07, "loss": 0.2018, "step": 11489 }, { "epoch": 0.92, "grad_norm": 1.4226239270356955, "learning_rate": 1.702311881067864e-07, "loss": 0.182, "step": 11490 }, { "epoch": 0.92, "grad_norm": 1.4503277530578726, "learning_rate": 1.6989618470287628e-07, "loss": 0.1688, "step": 11491 }, { "epoch": 0.92, "grad_norm": 1.4162574904915288, "learning_rate": 1.6956150555846928e-07, "loss": 0.1519, "step": 11492 }, { "epoch": 0.92, "grad_norm": 1.3541594483226762, "learning_rate": 1.6922715069603524e-07, "loss": 0.1989, "step": 11493 }, { "epoch": 0.92, "grad_norm": 5.374428141386888, "learning_rate": 1.6889312013801784e-07, "loss": 0.3775, "step": 11494 }, { "epoch": 0.92, "grad_norm": 1.4951721292761446, "learning_rate": 1.6855941390684415e-07, "loss": 0.1856, "step": 11495 }, { "epoch": 0.92, "grad_norm": 1.373724817312194, "learning_rate": 1.6822603202491515e-07, "loss": 0.1341, "step": 11496 }, { "epoch": 0.92, "grad_norm": 1.2684039902744457, "learning_rate": 1.678929745146124e-07, "loss": 0.1576, "step": 11497 }, { "epoch": 0.92, "grad_norm": 1.2399231376470368, "learning_rate": 1.675602413982952e-07, "loss": 0.1379, "step": 11498 }, { "epoch": 0.92, "grad_norm": 1.423296366320413, "learning_rate": 1.6722783269830068e-07, "loss": 0.1577, "step": 11499 }, { "epoch": 0.92, "grad_norm": 1.165002051646205, "learning_rate": 1.6689574843694433e-07, "loss": 0.1442, "step": 11500 }, { "epoch": 0.92, "grad_norm": 1.2566012843541576, "learning_rate": 1.6656398863652003e-07, "loss": 0.1562, "step": 11501 }, { "epoch": 0.92, "grad_norm": 1.101281550273509, "learning_rate": 1.662325533192999e-07, "loss": 0.1505, "step": 11502 }, { "epoch": 0.92, "grad_norm": 1.431737475269822, "learning_rate": 1.6590144250753392e-07, "loss": 0.1769, "step": 11503 }, { "epoch": 0.92, "grad_norm": 9.6575369847766, "learning_rate": 1.655706562234516e-07, "loss": 0.5614, "step": 11504 }, { "epoch": 0.92, "grad_norm": 1.2570528718895349, "learning_rate": 1.6524019448925788e-07, "loss": 0.179, "step": 11505 }, { "epoch": 0.92, "grad_norm": 1.2674761158916967, "learning_rate": 1.6491005732713948e-07, "loss": 0.1712, "step": 11506 }, { "epoch": 0.92, "grad_norm": 1.4350581209408035, "learning_rate": 1.645802447592587e-07, "loss": 0.1674, "step": 11507 }, { "epoch": 0.92, "grad_norm": 1.3593463710779512, "learning_rate": 1.6425075680775616e-07, "loss": 0.1699, "step": 11508 }, { "epoch": 0.92, "grad_norm": 5.897923619181858, "learning_rate": 1.6392159349475246e-07, "loss": 0.4527, "step": 11509 }, { "epoch": 0.92, "grad_norm": 1.3614106361429505, "learning_rate": 1.6359275484234495e-07, "loss": 0.1399, "step": 11510 }, { "epoch": 0.92, "grad_norm": 1.3523951929676439, "learning_rate": 1.6326424087260984e-07, "loss": 0.1699, "step": 11511 }, { "epoch": 0.92, "grad_norm": 1.351064679727098, "learning_rate": 1.6293605160760063e-07, "loss": 0.1844, "step": 11512 }, { "epoch": 0.92, "grad_norm": 1.2823307393445196, "learning_rate": 1.6260818706934967e-07, "loss": 0.1967, "step": 11513 }, { "epoch": 0.92, "grad_norm": 6.432463421913745, "learning_rate": 1.622806472798677e-07, "loss": 0.6423, "step": 11514 }, { "epoch": 0.92, "grad_norm": 1.385859036779389, "learning_rate": 1.6195343226114492e-07, "loss": 0.1951, "step": 11515 }, { "epoch": 0.92, "grad_norm": 1.4221183041507257, "learning_rate": 1.6162654203514594e-07, "loss": 0.1922, "step": 11516 }, { "epoch": 0.92, "grad_norm": 1.3300417037604144, "learning_rate": 1.6129997662381657e-07, "loss": 0.1599, "step": 11517 }, { "epoch": 0.92, "grad_norm": 1.1844371598770815, "learning_rate": 1.609737360490815e-07, "loss": 0.1368, "step": 11518 }, { "epoch": 0.92, "grad_norm": 1.4048293891083952, "learning_rate": 1.6064782033283988e-07, "loss": 0.1651, "step": 11519 }, { "epoch": 0.92, "grad_norm": 1.416730853310614, "learning_rate": 1.6032222949697361e-07, "loss": 0.1787, "step": 11520 }, { "epoch": 0.92, "grad_norm": 1.3133560801310733, "learning_rate": 1.5999696356333916e-07, "loss": 0.1848, "step": 11521 }, { "epoch": 0.92, "grad_norm": 1.283556131758709, "learning_rate": 1.5967202255377344e-07, "loss": 0.1385, "step": 11522 }, { "epoch": 0.92, "grad_norm": 1.2208994065994303, "learning_rate": 1.5934740649009016e-07, "loss": 0.1802, "step": 11523 }, { "epoch": 0.92, "grad_norm": 1.2544995440690612, "learning_rate": 1.59023115394083e-07, "loss": 0.1285, "step": 11524 }, { "epoch": 0.92, "grad_norm": 6.594654569703901, "learning_rate": 1.5869914928752117e-07, "loss": 0.5808, "step": 11525 }, { "epoch": 0.92, "grad_norm": 1.3295716500152246, "learning_rate": 1.5837550819215342e-07, "loss": 0.173, "step": 11526 }, { "epoch": 0.92, "grad_norm": 1.4497989036883736, "learning_rate": 1.5805219212970846e-07, "loss": 0.1669, "step": 11527 }, { "epoch": 0.92, "grad_norm": 1.5607999730474693, "learning_rate": 1.577292011218895e-07, "loss": 0.1921, "step": 11528 }, { "epoch": 0.92, "grad_norm": 1.2797238916967677, "learning_rate": 1.5740653519038085e-07, "loss": 0.1825, "step": 11529 }, { "epoch": 0.92, "grad_norm": 1.3908101154257415, "learning_rate": 1.5708419435684463e-07, "loss": 0.1767, "step": 11530 }, { "epoch": 0.92, "grad_norm": 1.5597408224887392, "learning_rate": 1.567621786429191e-07, "loss": 0.1747, "step": 11531 }, { "epoch": 0.92, "grad_norm": 9.018537429593646, "learning_rate": 1.564404880702236e-07, "loss": 0.7068, "step": 11532 }, { "epoch": 0.92, "grad_norm": 1.2621791382933296, "learning_rate": 1.5611912266035312e-07, "loss": 0.1557, "step": 11533 }, { "epoch": 0.92, "grad_norm": 1.537506580991218, "learning_rate": 1.5579808243488258e-07, "loss": 0.1453, "step": 11534 }, { "epoch": 0.92, "grad_norm": 1.2651419077587005, "learning_rate": 1.5547736741536367e-07, "loss": 0.1483, "step": 11535 }, { "epoch": 0.92, "grad_norm": 8.264317035471171, "learning_rate": 1.5515697762332804e-07, "loss": 0.6699, "step": 11536 }, { "epoch": 0.92, "grad_norm": 1.3128643687052466, "learning_rate": 1.5483691308028292e-07, "loss": 0.18, "step": 11537 }, { "epoch": 0.92, "grad_norm": 1.552323275639246, "learning_rate": 1.5451717380771725e-07, "loss": 0.1566, "step": 11538 }, { "epoch": 0.92, "grad_norm": 1.2224464990258117, "learning_rate": 1.5419775982709383e-07, "loss": 0.1396, "step": 11539 }, { "epoch": 0.92, "grad_norm": 1.3181872238068628, "learning_rate": 1.5387867115985721e-07, "loss": 0.1732, "step": 11540 }, { "epoch": 0.92, "grad_norm": 1.3633695410635465, "learning_rate": 1.5355990782742912e-07, "loss": 0.1615, "step": 11541 }, { "epoch": 0.92, "grad_norm": 1.5594357890009398, "learning_rate": 1.5324146985120802e-07, "loss": 0.142, "step": 11542 }, { "epoch": 0.92, "grad_norm": 1.2094004395938132, "learning_rate": 1.5292335725257236e-07, "loss": 0.1743, "step": 11543 }, { "epoch": 0.92, "grad_norm": 1.432113804501824, "learning_rate": 1.526055700528778e-07, "loss": 0.2102, "step": 11544 }, { "epoch": 0.92, "grad_norm": 1.4326210845457028, "learning_rate": 1.522881082734584e-07, "loss": 0.146, "step": 11545 }, { "epoch": 0.92, "grad_norm": 1.2343444107002965, "learning_rate": 1.5197097193562603e-07, "loss": 0.1147, "step": 11546 }, { "epoch": 0.92, "grad_norm": 1.5500966415520663, "learning_rate": 1.5165416106067143e-07, "loss": 0.177, "step": 11547 }, { "epoch": 0.92, "grad_norm": 1.2435392311250306, "learning_rate": 1.5133767566986257e-07, "loss": 0.1571, "step": 11548 }, { "epoch": 0.92, "grad_norm": 1.4331472407456767, "learning_rate": 1.5102151578444746e-07, "loss": 0.1916, "step": 11549 }, { "epoch": 0.92, "grad_norm": 1.111728804777668, "learning_rate": 1.5070568142564912e-07, "loss": 0.1405, "step": 11550 }, { "epoch": 0.92, "grad_norm": 1.3167782800287304, "learning_rate": 1.5039017261467058e-07, "loss": 0.1814, "step": 11551 }, { "epoch": 0.92, "grad_norm": 1.2812087500582154, "learning_rate": 1.5007498937269438e-07, "loss": 0.1808, "step": 11552 }, { "epoch": 0.92, "grad_norm": 1.376230391418388, "learning_rate": 1.49760131720878e-07, "loss": 0.1635, "step": 11553 }, { "epoch": 0.92, "grad_norm": 1.461463040745427, "learning_rate": 1.494455996803601e-07, "loss": 0.188, "step": 11554 }, { "epoch": 0.92, "grad_norm": 8.096575580315122, "learning_rate": 1.4913139327225546e-07, "loss": 0.5001, "step": 11555 }, { "epoch": 0.92, "grad_norm": 1.28238425058102, "learning_rate": 1.4881751251765832e-07, "loss": 0.171, "step": 11556 }, { "epoch": 0.92, "grad_norm": 1.4135550828292813, "learning_rate": 1.4850395743764013e-07, "loss": 0.1713, "step": 11557 }, { "epoch": 0.92, "grad_norm": 1.2748570891991327, "learning_rate": 1.4819072805325074e-07, "loss": 0.1188, "step": 11558 }, { "epoch": 0.92, "grad_norm": 1.3664247423529206, "learning_rate": 1.478778243855178e-07, "loss": 0.161, "step": 11559 }, { "epoch": 0.92, "grad_norm": 1.4798664950350844, "learning_rate": 1.475652464554478e-07, "loss": 0.1782, "step": 11560 }, { "epoch": 0.92, "grad_norm": 1.4464052747718317, "learning_rate": 1.472529942840256e-07, "loss": 0.1801, "step": 11561 }, { "epoch": 0.92, "grad_norm": 1.2592005379057025, "learning_rate": 1.469410678922123e-07, "loss": 0.1548, "step": 11562 }, { "epoch": 0.93, "grad_norm": 1.2995115674848683, "learning_rate": 1.4662946730094997e-07, "loss": 0.1493, "step": 11563 }, { "epoch": 0.93, "grad_norm": 1.1236899007906922, "learning_rate": 1.4631819253115632e-07, "loss": 0.1062, "step": 11564 }, { "epoch": 0.93, "grad_norm": 1.3206675186667056, "learning_rate": 1.4600724360372853e-07, "loss": 0.1702, "step": 11565 }, { "epoch": 0.93, "grad_norm": 1.5256679316636497, "learning_rate": 1.456966205395416e-07, "loss": 0.1623, "step": 11566 }, { "epoch": 0.93, "grad_norm": 1.3169614037898738, "learning_rate": 1.453863233594488e-07, "loss": 0.1872, "step": 11567 }, { "epoch": 0.93, "grad_norm": 1.5806519354418795, "learning_rate": 1.4507635208428073e-07, "loss": 0.2091, "step": 11568 }, { "epoch": 0.93, "grad_norm": 1.404001972917452, "learning_rate": 1.4476670673484683e-07, "loss": 0.1575, "step": 11569 }, { "epoch": 0.93, "grad_norm": 1.4690596319965643, "learning_rate": 1.4445738733193494e-07, "loss": 0.1739, "step": 11570 }, { "epoch": 0.93, "grad_norm": 1.3435462904088367, "learning_rate": 1.4414839389630954e-07, "loss": 0.1638, "step": 11571 }, { "epoch": 0.93, "grad_norm": 1.3598554665722264, "learning_rate": 1.4383972644871624e-07, "loss": 0.1493, "step": 11572 }, { "epoch": 0.93, "grad_norm": 1.584575882644112, "learning_rate": 1.4353138500987461e-07, "loss": 0.1652, "step": 11573 }, { "epoch": 0.93, "grad_norm": 1.3508869907894938, "learning_rate": 1.4322336960048643e-07, "loss": 0.1576, "step": 11574 }, { "epoch": 0.93, "grad_norm": 8.373996453732175, "learning_rate": 1.4291568024122848e-07, "loss": 0.7397, "step": 11575 }, { "epoch": 0.93, "grad_norm": 1.4594073972742634, "learning_rate": 1.4260831695275757e-07, "loss": 0.1575, "step": 11576 }, { "epoch": 0.93, "grad_norm": 1.3152737649499724, "learning_rate": 1.4230127975570774e-07, "loss": 0.1879, "step": 11577 }, { "epoch": 0.93, "grad_norm": 1.4668314630234571, "learning_rate": 1.4199456867069084e-07, "loss": 0.1735, "step": 11578 }, { "epoch": 0.93, "grad_norm": 1.5572226036038777, "learning_rate": 1.4168818371829817e-07, "loss": 0.1497, "step": 11579 }, { "epoch": 0.93, "grad_norm": 1.4322428142764396, "learning_rate": 1.4138212491909776e-07, "loss": 0.1739, "step": 11580 }, { "epoch": 0.93, "grad_norm": 1.28411973563813, "learning_rate": 1.410763922936359e-07, "loss": 0.1444, "step": 11581 }, { "epoch": 0.93, "grad_norm": 1.7479492048656395, "learning_rate": 1.4077098586243786e-07, "loss": 0.1441, "step": 11582 }, { "epoch": 0.93, "grad_norm": 1.4321866864417385, "learning_rate": 1.404659056460067e-07, "loss": 0.1872, "step": 11583 }, { "epoch": 0.93, "grad_norm": 1.2374099928641051, "learning_rate": 1.4016115166482269e-07, "loss": 0.1576, "step": 11584 }, { "epoch": 0.93, "grad_norm": 1.3933546318095675, "learning_rate": 1.3985672393934557e-07, "loss": 0.201, "step": 11585 }, { "epoch": 0.93, "grad_norm": 1.4939498893191685, "learning_rate": 1.3955262249001289e-07, "loss": 0.1777, "step": 11586 }, { "epoch": 0.93, "grad_norm": 1.435452472503611, "learning_rate": 1.392488473372383e-07, "loss": 0.2081, "step": 11587 }, { "epoch": 0.93, "grad_norm": 1.4705166808432428, "learning_rate": 1.3894539850141607e-07, "loss": 0.1706, "step": 11588 }, { "epoch": 0.93, "grad_norm": 1.2518529620532008, "learning_rate": 1.3864227600291824e-07, "loss": 0.1467, "step": 11589 }, { "epoch": 0.93, "grad_norm": 1.3189854991732242, "learning_rate": 1.383394798620935e-07, "loss": 0.1803, "step": 11590 }, { "epoch": 0.93, "grad_norm": 1.335237866046854, "learning_rate": 1.380370100992695e-07, "loss": 0.1859, "step": 11591 }, { "epoch": 0.93, "grad_norm": 1.2714991006837475, "learning_rate": 1.3773486673475277e-07, "loss": 0.1474, "step": 11592 }, { "epoch": 0.93, "grad_norm": 1.479207314942831, "learning_rate": 1.3743304978882543e-07, "loss": 0.1363, "step": 11593 }, { "epoch": 0.93, "grad_norm": 1.5434757045523793, "learning_rate": 1.3713155928175183e-07, "loss": 0.1942, "step": 11594 }, { "epoch": 0.93, "grad_norm": 1.36410381815116, "learning_rate": 1.3683039523376962e-07, "loss": 0.1723, "step": 11595 }, { "epoch": 0.93, "grad_norm": 1.1967038207265357, "learning_rate": 1.3652955766509768e-07, "loss": 0.1015, "step": 11596 }, { "epoch": 0.93, "grad_norm": 1.5589894916195683, "learning_rate": 1.3622904659593316e-07, "loss": 0.2017, "step": 11597 }, { "epoch": 0.93, "grad_norm": 10.100457027645962, "learning_rate": 1.359288620464483e-07, "loss": 0.4993, "step": 11598 }, { "epoch": 0.93, "grad_norm": 9.594524097136254, "learning_rate": 1.356290040367969e-07, "loss": 0.6542, "step": 11599 }, { "epoch": 0.93, "grad_norm": 1.5272223316091265, "learning_rate": 1.3532947258710905e-07, "loss": 0.2064, "step": 11600 }, { "epoch": 0.93, "grad_norm": 1.2770427248546683, "learning_rate": 1.3503026771749305e-07, "loss": 0.195, "step": 11601 }, { "epoch": 0.93, "grad_norm": 1.3349588407368802, "learning_rate": 1.347313894480351e-07, "loss": 0.138, "step": 11602 }, { "epoch": 0.93, "grad_norm": 1.478920998702164, "learning_rate": 1.3443283779880134e-07, "loss": 0.1978, "step": 11603 }, { "epoch": 0.93, "grad_norm": 1.413244687397578, "learning_rate": 1.3413461278983241e-07, "loss": 0.1481, "step": 11604 }, { "epoch": 0.93, "grad_norm": 1.5458598533427121, "learning_rate": 1.3383671444114953e-07, "loss": 0.1971, "step": 11605 }, { "epoch": 0.93, "grad_norm": 1.2996549683554335, "learning_rate": 1.3353914277275282e-07, "loss": 0.1507, "step": 11606 }, { "epoch": 0.93, "grad_norm": 1.3516846603688444, "learning_rate": 1.3324189780461684e-07, "loss": 0.1942, "step": 11607 }, { "epoch": 0.93, "grad_norm": 1.3780526072681978, "learning_rate": 1.3294497955669894e-07, "loss": 0.1446, "step": 11608 }, { "epoch": 0.93, "grad_norm": 1.2647168685219725, "learning_rate": 1.32648388048931e-07, "loss": 0.1692, "step": 11609 }, { "epoch": 0.93, "grad_norm": 1.3195374089670084, "learning_rate": 1.3235212330122425e-07, "loss": 0.1661, "step": 11610 }, { "epoch": 0.93, "grad_norm": 1.5100444120355025, "learning_rate": 1.320561853334684e-07, "loss": 0.1762, "step": 11611 }, { "epoch": 0.93, "grad_norm": 1.3948257857317143, "learning_rate": 1.3176057416552924e-07, "loss": 0.1482, "step": 11612 }, { "epoch": 0.93, "grad_norm": 1.5381756774677289, "learning_rate": 1.314652898172536e-07, "loss": 0.1767, "step": 11613 }, { "epoch": 0.93, "grad_norm": 7.182517359519336, "learning_rate": 1.3117033230846398e-07, "loss": 0.5426, "step": 11614 }, { "epoch": 0.93, "grad_norm": 1.5295353551751703, "learning_rate": 1.308757016589618e-07, "loss": 0.1819, "step": 11615 }, { "epoch": 0.93, "grad_norm": 1.2889947658943508, "learning_rate": 1.3058139788852563e-07, "loss": 0.1384, "step": 11616 }, { "epoch": 0.93, "grad_norm": 1.281284381654974, "learning_rate": 1.3028742101691583e-07, "loss": 0.1463, "step": 11617 }, { "epoch": 0.93, "grad_norm": 1.4299306856534115, "learning_rate": 1.2999377106386492e-07, "loss": 0.1594, "step": 11618 }, { "epoch": 0.93, "grad_norm": 1.5363362481854856, "learning_rate": 1.2970044804908766e-07, "loss": 0.1932, "step": 11619 }, { "epoch": 0.93, "grad_norm": 1.375480237019025, "learning_rate": 1.2940745199227666e-07, "loss": 0.1743, "step": 11620 }, { "epoch": 0.93, "grad_norm": 1.2567849833924285, "learning_rate": 1.2911478291309954e-07, "loss": 0.1589, "step": 11621 }, { "epoch": 0.93, "grad_norm": 1.3560608629829285, "learning_rate": 1.288224408312061e-07, "loss": 0.1962, "step": 11622 }, { "epoch": 0.93, "grad_norm": 1.4764825798809136, "learning_rate": 1.2853042576622066e-07, "loss": 0.2005, "step": 11623 }, { "epoch": 0.93, "grad_norm": 1.5354825149451514, "learning_rate": 1.2823873773774808e-07, "loss": 0.1706, "step": 11624 }, { "epoch": 0.93, "grad_norm": 1.295288288789178, "learning_rate": 1.2794737676536993e-07, "loss": 0.1621, "step": 11625 }, { "epoch": 0.93, "grad_norm": 1.4545686386522088, "learning_rate": 1.2765634286864614e-07, "loss": 0.2197, "step": 11626 }, { "epoch": 0.93, "grad_norm": 1.4600296111379967, "learning_rate": 1.2736563606711384e-07, "loss": 0.1664, "step": 11627 }, { "epoch": 0.93, "grad_norm": 1.214750667177578, "learning_rate": 1.270752563802913e-07, "loss": 0.1521, "step": 11628 }, { "epoch": 0.93, "grad_norm": 1.318386643133485, "learning_rate": 1.2678520382767068e-07, "loss": 0.1536, "step": 11629 }, { "epoch": 0.93, "grad_norm": 1.3030253432667387, "learning_rate": 1.2649547842872367e-07, "loss": 0.1772, "step": 11630 }, { "epoch": 0.93, "grad_norm": 1.2744554417542768, "learning_rate": 1.262060802029025e-07, "loss": 0.1832, "step": 11631 }, { "epoch": 0.93, "grad_norm": 1.293691205806851, "learning_rate": 1.2591700916963323e-07, "loss": 0.1906, "step": 11632 }, { "epoch": 0.93, "grad_norm": 1.130174241335614, "learning_rate": 1.2562826534832372e-07, "loss": 0.1289, "step": 11633 }, { "epoch": 0.93, "grad_norm": 1.276443191441171, "learning_rate": 1.2533984875835737e-07, "loss": 0.1933, "step": 11634 }, { "epoch": 0.93, "grad_norm": 1.3091566107207626, "learning_rate": 1.2505175941909642e-07, "loss": 0.1717, "step": 11635 }, { "epoch": 0.93, "grad_norm": 1.3599025772734412, "learning_rate": 1.2476399734988155e-07, "loss": 0.1699, "step": 11636 }, { "epoch": 0.93, "grad_norm": 1.3039832224328505, "learning_rate": 1.244765625700306e-07, "loss": 0.1861, "step": 11637 }, { "epoch": 0.93, "grad_norm": 1.452215308739537, "learning_rate": 1.2418945509884095e-07, "loss": 0.1869, "step": 11638 }, { "epoch": 0.93, "grad_norm": 1.3969264812006772, "learning_rate": 1.239026749555855e-07, "loss": 0.1504, "step": 11639 }, { "epoch": 0.93, "grad_norm": 7.031209296605902, "learning_rate": 1.2361622215951774e-07, "loss": 0.4558, "step": 11640 }, { "epoch": 0.93, "grad_norm": 7.773914536683679, "learning_rate": 1.2333009672986784e-07, "loss": 0.6375, "step": 11641 }, { "epoch": 0.93, "grad_norm": 1.1204355130934291, "learning_rate": 1.2304429868584488e-07, "loss": 0.144, "step": 11642 }, { "epoch": 0.93, "grad_norm": 1.4784018417360747, "learning_rate": 1.2275882804663354e-07, "loss": 0.2096, "step": 11643 }, { "epoch": 0.93, "grad_norm": 1.247350831220143, "learning_rate": 1.224736848314001e-07, "loss": 0.1651, "step": 11644 }, { "epoch": 0.93, "grad_norm": 1.3814533502398265, "learning_rate": 1.2218886905928652e-07, "loss": 0.1747, "step": 11645 }, { "epoch": 0.93, "grad_norm": 1.4813408760692128, "learning_rate": 1.2190438074941357e-07, "loss": 0.1965, "step": 11646 }, { "epoch": 0.93, "grad_norm": 1.5130654882098742, "learning_rate": 1.2162021992087934e-07, "loss": 0.1648, "step": 11647 }, { "epoch": 0.93, "grad_norm": 1.3224570741428938, "learning_rate": 1.2133638659276082e-07, "loss": 0.1456, "step": 11648 }, { "epoch": 0.93, "grad_norm": 1.3349571583502873, "learning_rate": 1.2105288078411214e-07, "loss": 0.1558, "step": 11649 }, { "epoch": 0.93, "grad_norm": 1.2338567122692776, "learning_rate": 1.2076970251396593e-07, "loss": 0.144, "step": 11650 }, { "epoch": 0.93, "grad_norm": 1.3958895075070241, "learning_rate": 1.2048685180133412e-07, "loss": 0.1649, "step": 11651 }, { "epoch": 0.93, "grad_norm": 1.4010496539570263, "learning_rate": 1.2020432866520325e-07, "loss": 0.1849, "step": 11652 }, { "epoch": 0.93, "grad_norm": 1.503249879452813, "learning_rate": 1.199221331245415e-07, "loss": 0.1734, "step": 11653 }, { "epoch": 0.93, "grad_norm": 1.3575245845949964, "learning_rate": 1.1964026519829307e-07, "loss": 0.1626, "step": 11654 }, { "epoch": 0.93, "grad_norm": 11.979808973539093, "learning_rate": 1.193587249053807e-07, "loss": 0.5021, "step": 11655 }, { "epoch": 0.93, "grad_norm": 1.3907039782940525, "learning_rate": 1.1907751226470421e-07, "loss": 0.1872, "step": 11656 }, { "epoch": 0.93, "grad_norm": 1.4467321057726041, "learning_rate": 1.1879662729514352e-07, "loss": 0.1727, "step": 11657 }, { "epoch": 0.93, "grad_norm": 1.143763110836109, "learning_rate": 1.1851607001555465e-07, "loss": 0.1349, "step": 11658 }, { "epoch": 0.93, "grad_norm": 1.4173322095061018, "learning_rate": 1.1823584044477199e-07, "loss": 0.202, "step": 11659 }, { "epoch": 0.93, "grad_norm": 1.3626871066482655, "learning_rate": 1.179559386016088e-07, "loss": 0.1586, "step": 11660 }, { "epoch": 0.93, "grad_norm": 1.3310918363445101, "learning_rate": 1.1767636450485453e-07, "loss": 0.1809, "step": 11661 }, { "epoch": 0.93, "grad_norm": 1.6161479866087205, "learning_rate": 1.1739711817328025e-07, "loss": 0.1901, "step": 11662 }, { "epoch": 0.93, "grad_norm": 1.2339997782035526, "learning_rate": 1.1711819962562987e-07, "loss": 0.1466, "step": 11663 }, { "epoch": 0.93, "grad_norm": 1.327590247981248, "learning_rate": 1.1683960888063006e-07, "loss": 0.1471, "step": 11664 }, { "epoch": 0.93, "grad_norm": 1.2278666142565842, "learning_rate": 1.1656134595698309e-07, "loss": 0.1454, "step": 11665 }, { "epoch": 0.93, "grad_norm": 1.2766610652719923, "learning_rate": 1.1628341087336792e-07, "loss": 0.1124, "step": 11666 }, { "epoch": 0.93, "grad_norm": 1.2098282132851113, "learning_rate": 1.1600580364844516e-07, "loss": 0.124, "step": 11667 }, { "epoch": 0.93, "grad_norm": 1.2536296093800676, "learning_rate": 1.1572852430085103e-07, "loss": 0.1624, "step": 11668 }, { "epoch": 0.93, "grad_norm": 1.4224677729078656, "learning_rate": 1.154515728491995e-07, "loss": 0.1932, "step": 11669 }, { "epoch": 0.93, "grad_norm": 1.4758594928094455, "learning_rate": 1.151749493120835e-07, "loss": 0.1956, "step": 11670 }, { "epoch": 0.93, "grad_norm": 1.3267670904776225, "learning_rate": 1.1489865370807318e-07, "loss": 0.168, "step": 11671 }, { "epoch": 0.93, "grad_norm": 1.4064444632889166, "learning_rate": 1.1462268605571814e-07, "loss": 0.1502, "step": 11672 }, { "epoch": 0.93, "grad_norm": 1.2044490560652903, "learning_rate": 1.143470463735441e-07, "loss": 0.1525, "step": 11673 }, { "epoch": 0.93, "grad_norm": 1.3548379019308125, "learning_rate": 1.1407173468005572e-07, "loss": 0.1931, "step": 11674 }, { "epoch": 0.93, "grad_norm": 1.3123905965422187, "learning_rate": 1.1379675099373489e-07, "loss": 0.167, "step": 11675 }, { "epoch": 0.93, "grad_norm": 1.364710543413833, "learning_rate": 1.1352209533304404e-07, "loss": 0.1612, "step": 11676 }, { "epoch": 0.93, "grad_norm": 1.2119668242147925, "learning_rate": 1.1324776771641898e-07, "loss": 0.1218, "step": 11677 }, { "epoch": 0.93, "grad_norm": 1.4953614485925484, "learning_rate": 1.1297376816227834e-07, "loss": 0.185, "step": 11678 }, { "epoch": 0.93, "grad_norm": 1.3725143923522227, "learning_rate": 1.1270009668901571e-07, "loss": 0.1674, "step": 11679 }, { "epoch": 0.93, "grad_norm": 8.866969719005281, "learning_rate": 1.1242675331500363e-07, "loss": 0.4868, "step": 11680 }, { "epoch": 0.93, "grad_norm": 1.1670670071655291, "learning_rate": 1.1215373805859242e-07, "loss": 0.1109, "step": 11681 }, { "epoch": 0.93, "grad_norm": 1.3660391574464552, "learning_rate": 1.118810509381102e-07, "loss": 0.1512, "step": 11682 }, { "epoch": 0.93, "grad_norm": 1.219007255532347, "learning_rate": 1.1160869197186342e-07, "loss": 0.1616, "step": 11683 }, { "epoch": 0.93, "grad_norm": 1.4423812245767982, "learning_rate": 1.1133666117813635e-07, "loss": 0.1545, "step": 11684 }, { "epoch": 0.93, "grad_norm": 7.7599134179340705, "learning_rate": 1.1106495857519162e-07, "loss": 0.5044, "step": 11685 }, { "epoch": 0.93, "grad_norm": 1.27294324674106, "learning_rate": 1.1079358418126795e-07, "loss": 0.1245, "step": 11686 }, { "epoch": 0.93, "grad_norm": 1.400516955577557, "learning_rate": 1.1052253801458634e-07, "loss": 0.1793, "step": 11687 }, { "epoch": 0.94, "grad_norm": 1.709964790224891, "learning_rate": 1.1025182009334001e-07, "loss": 0.1771, "step": 11688 }, { "epoch": 0.94, "grad_norm": 1.476409105865249, "learning_rate": 1.0998143043570441e-07, "loss": 0.1754, "step": 11689 }, { "epoch": 0.94, "grad_norm": 1.2376053220258127, "learning_rate": 1.0971136905983282e-07, "loss": 0.1481, "step": 11690 }, { "epoch": 0.94, "grad_norm": 1.3058320902057976, "learning_rate": 1.0944163598385237e-07, "loss": 0.145, "step": 11691 }, { "epoch": 0.94, "grad_norm": 1.4384127158071447, "learning_rate": 1.0917223122587361e-07, "loss": 0.1745, "step": 11692 }, { "epoch": 0.94, "grad_norm": 14.913152952662157, "learning_rate": 1.089031548039815e-07, "loss": 0.6187, "step": 11693 }, { "epoch": 0.94, "grad_norm": 1.3470912575561913, "learning_rate": 1.0863440673623992e-07, "loss": 0.1646, "step": 11694 }, { "epoch": 0.94, "grad_norm": 1.441832855824926, "learning_rate": 1.0836598704069057e-07, "loss": 0.1645, "step": 11695 }, { "epoch": 0.94, "grad_norm": 1.5576253411749779, "learning_rate": 1.0809789573535456e-07, "loss": 0.1778, "step": 11696 }, { "epoch": 0.94, "grad_norm": 1.4154872282832505, "learning_rate": 1.0783013283822752e-07, "loss": 0.1779, "step": 11697 }, { "epoch": 0.94, "grad_norm": 1.382538972798503, "learning_rate": 1.0756269836728672e-07, "loss": 0.1805, "step": 11698 }, { "epoch": 0.94, "grad_norm": 56.269954542126946, "learning_rate": 1.0729559234048615e-07, "loss": 0.698, "step": 11699 }, { "epoch": 0.94, "grad_norm": 1.255364273374407, "learning_rate": 1.0702881477575589e-07, "loss": 0.1691, "step": 11700 }, { "epoch": 0.94, "grad_norm": 1.1659005639972897, "learning_rate": 1.0676236569100718e-07, "loss": 0.1701, "step": 11701 }, { "epoch": 0.94, "grad_norm": 1.3542944143413516, "learning_rate": 1.0649624510412625e-07, "loss": 0.1702, "step": 11702 }, { "epoch": 0.94, "grad_norm": 1.310998641833066, "learning_rate": 1.0623045303297941e-07, "loss": 0.1519, "step": 11703 }, { "epoch": 0.94, "grad_norm": 1.2372867607674312, "learning_rate": 1.0596498949540957e-07, "loss": 0.1632, "step": 11704 }, { "epoch": 0.94, "grad_norm": 1.3983616959310992, "learning_rate": 1.0569985450923803e-07, "loss": 0.1875, "step": 11705 }, { "epoch": 0.94, "grad_norm": 1.5547153997130903, "learning_rate": 1.0543504809226446e-07, "loss": 0.1753, "step": 11706 }, { "epoch": 0.94, "grad_norm": 5.6434501216870006, "learning_rate": 1.0517057026226685e-07, "loss": 0.5303, "step": 11707 }, { "epoch": 0.94, "grad_norm": 74.11884199535422, "learning_rate": 1.0490642103699933e-07, "loss": 0.5841, "step": 11708 }, { "epoch": 0.94, "grad_norm": 1.2098872102968024, "learning_rate": 1.0464260043419438e-07, "loss": 0.1374, "step": 11709 }, { "epoch": 0.94, "grad_norm": 1.3595075573672604, "learning_rate": 1.0437910847156507e-07, "loss": 0.1941, "step": 11710 }, { "epoch": 0.94, "grad_norm": 1.2432710661925528, "learning_rate": 1.0411594516679835e-07, "loss": 0.1342, "step": 11711 }, { "epoch": 0.94, "grad_norm": 1.4001168094389072, "learning_rate": 1.0385311053756287e-07, "loss": 0.2087, "step": 11712 }, { "epoch": 0.94, "grad_norm": 1.143803182788164, "learning_rate": 1.0359060460150283e-07, "loss": 0.1195, "step": 11713 }, { "epoch": 0.94, "grad_norm": 1.6966454642514566, "learning_rate": 1.0332842737624082e-07, "loss": 0.2324, "step": 11714 }, { "epoch": 0.94, "grad_norm": 1.4257000685944459, "learning_rate": 1.0306657887937833e-07, "loss": 0.1423, "step": 11715 }, { "epoch": 0.94, "grad_norm": 1.33784751745277, "learning_rate": 1.0280505912849293e-07, "loss": 0.1877, "step": 11716 }, { "epoch": 0.94, "grad_norm": 1.6299263861555227, "learning_rate": 1.0254386814114226e-07, "loss": 0.2113, "step": 11717 }, { "epoch": 0.94, "grad_norm": 1.2666545341317765, "learning_rate": 1.0228300593486007e-07, "loss": 0.1752, "step": 11718 }, { "epoch": 0.94, "grad_norm": 1.2743314990649175, "learning_rate": 1.0202247252715902e-07, "loss": 0.178, "step": 11719 }, { "epoch": 0.94, "grad_norm": 1.8066721037296976, "learning_rate": 1.0176226793552957e-07, "loss": 0.1657, "step": 11720 }, { "epoch": 0.94, "grad_norm": 1.2448332861839368, "learning_rate": 1.0150239217744106e-07, "loss": 0.152, "step": 11721 }, { "epoch": 0.94, "grad_norm": 1.4096673461971014, "learning_rate": 1.0124284527033789e-07, "loss": 0.1482, "step": 11722 }, { "epoch": 0.94, "grad_norm": 1.3754035706029113, "learning_rate": 1.0098362723164556e-07, "loss": 0.1816, "step": 11723 }, { "epoch": 0.94, "grad_norm": 1.2840843206080021, "learning_rate": 1.007247380787657e-07, "loss": 0.1273, "step": 11724 }, { "epoch": 0.94, "grad_norm": 1.2431449712110232, "learning_rate": 1.004661778290783e-07, "loss": 0.1545, "step": 11725 }, { "epoch": 0.94, "grad_norm": 1.3662559613394658, "learning_rate": 1.002079464999417e-07, "loss": 0.1664, "step": 11726 }, { "epoch": 0.94, "grad_norm": 1.2559547986400617, "learning_rate": 9.995004410869147e-08, "loss": 0.1456, "step": 11727 }, { "epoch": 0.94, "grad_norm": 11.763105304067645, "learning_rate": 9.969247067264099e-08, "loss": 0.5072, "step": 11728 }, { "epoch": 0.94, "grad_norm": 1.4274905041538866, "learning_rate": 9.943522620908253e-08, "loss": 0.1472, "step": 11729 }, { "epoch": 0.94, "grad_norm": 1.3271281774266643, "learning_rate": 9.917831073528506e-08, "loss": 0.1463, "step": 11730 }, { "epoch": 0.94, "grad_norm": 1.4211394312039303, "learning_rate": 9.892172426849644e-08, "loss": 0.203, "step": 11731 }, { "epoch": 0.94, "grad_norm": 8.455423329644393, "learning_rate": 9.866546682594236e-08, "loss": 0.5779, "step": 11732 }, { "epoch": 0.94, "grad_norm": 1.2713255801559635, "learning_rate": 9.840953842482682e-08, "loss": 0.1225, "step": 11733 }, { "epoch": 0.94, "grad_norm": 1.7715292316507498, "learning_rate": 9.815393908232885e-08, "loss": 0.1797, "step": 11734 }, { "epoch": 0.94, "grad_norm": 1.3863715798558283, "learning_rate": 9.789866881560971e-08, "loss": 0.164, "step": 11735 }, { "epoch": 0.94, "grad_norm": 1.4807437787339865, "learning_rate": 9.764372764180519e-08, "loss": 0.1618, "step": 11736 }, { "epoch": 0.94, "grad_norm": 1.4400704952228964, "learning_rate": 9.738911557803154e-08, "loss": 0.1599, "step": 11737 }, { "epoch": 0.94, "grad_norm": 1.4236330699601027, "learning_rate": 9.713483264138013e-08, "loss": 0.1871, "step": 11738 }, { "epoch": 0.94, "grad_norm": 1.3618291288221354, "learning_rate": 9.688087884892284e-08, "loss": 0.1721, "step": 11739 }, { "epoch": 0.94, "grad_norm": 1.4380555809988866, "learning_rate": 9.66272542177077e-08, "loss": 0.185, "step": 11740 }, { "epoch": 0.94, "grad_norm": 1.33692767645177, "learning_rate": 9.637395876476219e-08, "loss": 0.1662, "step": 11741 }, { "epoch": 0.94, "grad_norm": 7.414480408177421, "learning_rate": 9.612099250708995e-08, "loss": 0.5949, "step": 11742 }, { "epoch": 0.94, "grad_norm": 1.291475200489871, "learning_rate": 9.586835546167351e-08, "loss": 0.164, "step": 11743 }, { "epoch": 0.94, "grad_norm": 14.460021615551394, "learning_rate": 9.561604764547371e-08, "loss": 0.6736, "step": 11744 }, { "epoch": 0.94, "grad_norm": 1.6068235874191255, "learning_rate": 9.536406907542761e-08, "loss": 0.1803, "step": 11745 }, { "epoch": 0.94, "grad_norm": 1.4638836463804885, "learning_rate": 9.511241976845276e-08, "loss": 0.1744, "step": 11746 }, { "epoch": 0.94, "grad_norm": 1.6782197776043437, "learning_rate": 9.486109974144175e-08, "loss": 0.1939, "step": 11747 }, { "epoch": 0.94, "grad_norm": 1.2861538522534006, "learning_rate": 9.461010901126777e-08, "loss": 0.1539, "step": 11748 }, { "epoch": 0.94, "grad_norm": 1.3761552758320768, "learning_rate": 9.435944759477955e-08, "loss": 0.1536, "step": 11749 }, { "epoch": 0.94, "grad_norm": 1.2343077190634686, "learning_rate": 9.410911550880474e-08, "loss": 0.155, "step": 11750 }, { "epoch": 0.94, "grad_norm": 4.698658656326484, "learning_rate": 9.385911277014991e-08, "loss": 0.4707, "step": 11751 }, { "epoch": 0.94, "grad_norm": 1.5147066198744654, "learning_rate": 9.360943939559774e-08, "loss": 0.1757, "step": 11752 }, { "epoch": 0.94, "grad_norm": 1.4972340936540334, "learning_rate": 9.336009540190927e-08, "loss": 0.1639, "step": 11753 }, { "epoch": 0.94, "grad_norm": 1.5525489449835235, "learning_rate": 9.311108080582387e-08, "loss": 0.14, "step": 11754 }, { "epoch": 0.94, "grad_norm": 1.309520514414241, "learning_rate": 9.286239562405985e-08, "loss": 0.153, "step": 11755 }, { "epoch": 0.94, "grad_norm": 1.5327706412103699, "learning_rate": 9.261403987331052e-08, "loss": 0.193, "step": 11756 }, { "epoch": 0.94, "grad_norm": 1.37571221931503, "learning_rate": 9.236601357024977e-08, "loss": 0.1653, "step": 11757 }, { "epoch": 0.94, "grad_norm": 1.4567987096461017, "learning_rate": 9.211831673152816e-08, "loss": 0.1538, "step": 11758 }, { "epoch": 0.94, "grad_norm": 1.6904854909974252, "learning_rate": 9.187094937377406e-08, "loss": 0.2043, "step": 11759 }, { "epoch": 0.94, "grad_norm": 1.2490973624499306, "learning_rate": 9.162391151359417e-08, "loss": 0.1534, "step": 11760 }, { "epoch": 0.94, "grad_norm": 1.5479756034338594, "learning_rate": 9.137720316757303e-08, "loss": 0.1856, "step": 11761 }, { "epoch": 0.94, "grad_norm": 1.2745122005590874, "learning_rate": 9.113082435227294e-08, "loss": 0.194, "step": 11762 }, { "epoch": 0.94, "grad_norm": 1.421100319367278, "learning_rate": 9.088477508423343e-08, "loss": 0.2067, "step": 11763 }, { "epoch": 0.94, "grad_norm": 9.266449547376562, "learning_rate": 9.063905537997352e-08, "loss": 0.6652, "step": 11764 }, { "epoch": 0.94, "grad_norm": 1.3450258706195086, "learning_rate": 9.039366525598781e-08, "loss": 0.1688, "step": 11765 }, { "epoch": 0.94, "grad_norm": 1.5287419179764004, "learning_rate": 9.014860472875197e-08, "loss": 0.1925, "step": 11766 }, { "epoch": 0.94, "grad_norm": 1.4424855994443555, "learning_rate": 8.99038738147151e-08, "loss": 0.183, "step": 11767 }, { "epoch": 0.94, "grad_norm": 1.38429330711202, "learning_rate": 8.965947253030904e-08, "loss": 0.2049, "step": 11768 }, { "epoch": 0.94, "grad_norm": 1.4556569867386469, "learning_rate": 8.941540089194067e-08, "loss": 0.1747, "step": 11769 }, { "epoch": 0.94, "grad_norm": 5.860342207838784, "learning_rate": 8.917165891599467e-08, "loss": 0.5741, "step": 11770 }, { "epoch": 0.94, "grad_norm": 10.839762515804097, "learning_rate": 8.892824661883403e-08, "loss": 0.3994, "step": 11771 }, { "epoch": 0.94, "grad_norm": 1.359713427083873, "learning_rate": 8.868516401680072e-08, "loss": 0.1655, "step": 11772 }, { "epoch": 0.94, "grad_norm": 1.4144130968296706, "learning_rate": 8.844241112621277e-08, "loss": 0.1389, "step": 11773 }, { "epoch": 0.94, "grad_norm": 1.4500565876798737, "learning_rate": 8.81999879633666e-08, "loss": 0.2155, "step": 11774 }, { "epoch": 0.94, "grad_norm": 1.4172840698664086, "learning_rate": 8.795789454453862e-08, "loss": 0.2117, "step": 11775 }, { "epoch": 0.94, "grad_norm": 1.5215896066552475, "learning_rate": 8.771613088597863e-08, "loss": 0.1756, "step": 11776 }, { "epoch": 0.94, "grad_norm": 1.3608572549513422, "learning_rate": 8.74746970039192e-08, "loss": 0.1778, "step": 11777 }, { "epoch": 0.94, "grad_norm": 1.491995202689277, "learning_rate": 8.723359291456846e-08, "loss": 0.1617, "step": 11778 }, { "epoch": 0.94, "grad_norm": 1.3943551231388651, "learning_rate": 8.69928186341107e-08, "loss": 0.1574, "step": 11779 }, { "epoch": 0.94, "grad_norm": 1.278201244907948, "learning_rate": 8.675237417871075e-08, "loss": 0.1174, "step": 11780 }, { "epoch": 0.94, "grad_norm": 1.268493452103561, "learning_rate": 8.65122595645107e-08, "loss": 0.1539, "step": 11781 }, { "epoch": 0.94, "grad_norm": 1.4604603257923368, "learning_rate": 8.627247480763046e-08, "loss": 0.1778, "step": 11782 }, { "epoch": 0.94, "grad_norm": 1.3686321563197972, "learning_rate": 8.603301992416658e-08, "loss": 0.1758, "step": 11783 }, { "epoch": 0.94, "grad_norm": 1.1786353006275547, "learning_rate": 8.579389493019507e-08, "loss": 0.1223, "step": 11784 }, { "epoch": 0.94, "grad_norm": 1.4074305568440126, "learning_rate": 8.555509984176812e-08, "loss": 0.1761, "step": 11785 }, { "epoch": 0.94, "grad_norm": 1.188634840818344, "learning_rate": 8.531663467491846e-08, "loss": 0.1489, "step": 11786 }, { "epoch": 0.94, "grad_norm": 1.2663873230029539, "learning_rate": 8.507849944565327e-08, "loss": 0.1325, "step": 11787 }, { "epoch": 0.94, "grad_norm": 1.2603704199895884, "learning_rate": 8.484069416995977e-08, "loss": 0.1229, "step": 11788 }, { "epoch": 0.94, "grad_norm": 1.3655044245529577, "learning_rate": 8.460321886380407e-08, "loss": 0.1986, "step": 11789 }, { "epoch": 0.94, "grad_norm": 1.443027256509294, "learning_rate": 8.43660735431262e-08, "loss": 0.1683, "step": 11790 }, { "epoch": 0.94, "grad_norm": 1.4153268601765026, "learning_rate": 8.412925822384788e-08, "loss": 0.1626, "step": 11791 }, { "epoch": 0.94, "grad_norm": 1.6689826618293173, "learning_rate": 8.389277292186692e-08, "loss": 0.1604, "step": 11792 }, { "epoch": 0.94, "grad_norm": 1.3813344447179734, "learning_rate": 8.365661765305955e-08, "loss": 0.1955, "step": 11793 }, { "epoch": 0.94, "grad_norm": 1.317158557669469, "learning_rate": 8.342079243327972e-08, "loss": 0.1912, "step": 11794 }, { "epoch": 0.94, "grad_norm": 1.348288666630903, "learning_rate": 8.318529727835811e-08, "loss": 0.1914, "step": 11795 }, { "epoch": 0.94, "grad_norm": 1.3701256826982715, "learning_rate": 8.295013220410486e-08, "loss": 0.1792, "step": 11796 }, { "epoch": 0.94, "grad_norm": 1.7469704833515158, "learning_rate": 8.271529722630788e-08, "loss": 0.2075, "step": 11797 }, { "epoch": 0.94, "grad_norm": 1.3642710142341172, "learning_rate": 8.248079236073125e-08, "loss": 0.1616, "step": 11798 }, { "epoch": 0.94, "grad_norm": 1.2114330681994019, "learning_rate": 8.224661762311847e-08, "loss": 0.1528, "step": 11799 }, { "epoch": 0.94, "grad_norm": 1.7523648079478362, "learning_rate": 8.201277302919086e-08, "loss": 0.1645, "step": 11800 }, { "epoch": 0.94, "grad_norm": 1.2202036558095306, "learning_rate": 8.177925859464587e-08, "loss": 0.1582, "step": 11801 }, { "epoch": 0.94, "grad_norm": 1.4181966397972763, "learning_rate": 8.154607433516149e-08, "loss": 0.145, "step": 11802 }, { "epoch": 0.94, "grad_norm": 7.383059796788842, "learning_rate": 8.131322026639187e-08, "loss": 0.4503, "step": 11803 }, { "epoch": 0.94, "grad_norm": 1.25872582386875, "learning_rate": 8.108069640396843e-08, "loss": 0.1344, "step": 11804 }, { "epoch": 0.94, "grad_norm": 7.213188701177112, "learning_rate": 8.084850276350142e-08, "loss": 0.5332, "step": 11805 }, { "epoch": 0.94, "grad_norm": 1.3184609466322557, "learning_rate": 8.061663936057895e-08, "loss": 0.15, "step": 11806 }, { "epoch": 0.94, "grad_norm": 1.4008237022365766, "learning_rate": 8.038510621076689e-08, "loss": 0.192, "step": 11807 }, { "epoch": 0.94, "grad_norm": 1.302024960927299, "learning_rate": 8.015390332960782e-08, "loss": 0.158, "step": 11808 }, { "epoch": 0.94, "grad_norm": 1.412139565159276, "learning_rate": 7.992303073262431e-08, "loss": 0.1361, "step": 11809 }, { "epoch": 0.94, "grad_norm": 1.476782233975, "learning_rate": 7.969248843531452e-08, "loss": 0.1818, "step": 11810 }, { "epoch": 0.94, "grad_norm": 6.098117317268838, "learning_rate": 7.946227645315663e-08, "loss": 0.619, "step": 11811 }, { "epoch": 0.94, "grad_norm": 1.5588694507718681, "learning_rate": 7.923239480160439e-08, "loss": 0.1535, "step": 11812 }, { "epoch": 0.95, "grad_norm": 1.3787995921824425, "learning_rate": 7.900284349609044e-08, "loss": 0.1798, "step": 11813 }, { "epoch": 0.95, "grad_norm": 1.4868015954810596, "learning_rate": 7.877362255202636e-08, "loss": 0.1964, "step": 11814 }, { "epoch": 0.95, "grad_norm": 31.725333076294138, "learning_rate": 7.854473198479928e-08, "loss": 0.5089, "step": 11815 }, { "epoch": 0.95, "grad_norm": 5.594085304985128, "learning_rate": 7.83161718097758e-08, "loss": 0.5377, "step": 11816 }, { "epoch": 0.95, "grad_norm": 1.5393466634921353, "learning_rate": 7.808794204229975e-08, "loss": 0.1999, "step": 11817 }, { "epoch": 0.95, "grad_norm": 1.2690071632713675, "learning_rate": 7.786004269769331e-08, "loss": 0.1464, "step": 11818 }, { "epoch": 0.95, "grad_norm": 1.5224477129625908, "learning_rate": 7.763247379125482e-08, "loss": 0.1748, "step": 11819 }, { "epoch": 0.95, "grad_norm": 1.3937655334728871, "learning_rate": 7.740523533826372e-08, "loss": 0.1618, "step": 11820 }, { "epoch": 0.95, "grad_norm": 1.3150660386909798, "learning_rate": 7.717832735397335e-08, "loss": 0.1594, "step": 11821 }, { "epoch": 0.95, "grad_norm": 1.386233364795769, "learning_rate": 7.695174985361708e-08, "loss": 0.181, "step": 11822 }, { "epoch": 0.95, "grad_norm": 1.3181758694792953, "learning_rate": 7.672550285240721e-08, "loss": 0.1215, "step": 11823 }, { "epoch": 0.95, "grad_norm": 1.5427027537448201, "learning_rate": 7.649958636552989e-08, "loss": 0.1674, "step": 11824 }, { "epoch": 0.95, "grad_norm": 1.2628763141232395, "learning_rate": 7.627400040815414e-08, "loss": 0.1572, "step": 11825 }, { "epoch": 0.95, "grad_norm": 1.3494593233701575, "learning_rate": 7.604874499542225e-08, "loss": 0.1713, "step": 11826 }, { "epoch": 0.95, "grad_norm": 1.320751195883479, "learning_rate": 7.582382014245771e-08, "loss": 0.1328, "step": 11827 }, { "epoch": 0.95, "grad_norm": 1.372544392211871, "learning_rate": 7.559922586435953e-08, "loss": 0.1969, "step": 11828 }, { "epoch": 0.95, "grad_norm": 1.4500060332112423, "learning_rate": 7.537496217620566e-08, "loss": 0.1852, "step": 11829 }, { "epoch": 0.95, "grad_norm": 1.4611062718142405, "learning_rate": 7.515102909305128e-08, "loss": 0.1972, "step": 11830 }, { "epoch": 0.95, "grad_norm": 1.2803739504199134, "learning_rate": 7.492742662993047e-08, "loss": 0.1655, "step": 11831 }, { "epoch": 0.95, "grad_norm": 1.4299403887454627, "learning_rate": 7.470415480185345e-08, "loss": 0.1638, "step": 11832 }, { "epoch": 0.95, "grad_norm": 7.465899490176388, "learning_rate": 7.448121362380989e-08, "loss": 0.5181, "step": 11833 }, { "epoch": 0.95, "grad_norm": 1.2583204024744978, "learning_rate": 7.425860311076616e-08, "loss": 0.1286, "step": 11834 }, { "epoch": 0.95, "grad_norm": 1.4264964072415753, "learning_rate": 7.403632327766641e-08, "loss": 0.1761, "step": 11835 }, { "epoch": 0.95, "grad_norm": 6.467687939329435, "learning_rate": 7.381437413943372e-08, "loss": 0.5684, "step": 11836 }, { "epoch": 0.95, "grad_norm": 8.639114553828653, "learning_rate": 7.35927557109678e-08, "loss": 0.5775, "step": 11837 }, { "epoch": 0.95, "grad_norm": 1.3258592943825107, "learning_rate": 7.337146800714678e-08, "loss": 0.1398, "step": 11838 }, { "epoch": 0.95, "grad_norm": 1.2688494590841974, "learning_rate": 7.3150511042826e-08, "loss": 0.1622, "step": 11839 }, { "epoch": 0.95, "grad_norm": 1.4422449900031205, "learning_rate": 7.292988483283913e-08, "loss": 0.1753, "step": 11840 }, { "epoch": 0.95, "grad_norm": 1.3807472280583288, "learning_rate": 7.270958939199767e-08, "loss": 0.1508, "step": 11841 }, { "epoch": 0.95, "grad_norm": 7.683136374884187, "learning_rate": 7.248962473509091e-08, "loss": 0.4974, "step": 11842 }, { "epoch": 0.95, "grad_norm": 1.38674509247126, "learning_rate": 7.22699908768848e-08, "loss": 0.1527, "step": 11843 }, { "epoch": 0.95, "grad_norm": 1.4087857088589786, "learning_rate": 7.205068783212476e-08, "loss": 0.255, "step": 11844 }, { "epoch": 0.95, "grad_norm": 1.2762590965064187, "learning_rate": 7.183171561553349e-08, "loss": 0.1533, "step": 11845 }, { "epoch": 0.95, "grad_norm": 1.3487275156130714, "learning_rate": 7.16130742418103e-08, "loss": 0.1818, "step": 11846 }, { "epoch": 0.95, "grad_norm": 1.4676877901726253, "learning_rate": 7.139476372563403e-08, "loss": 0.1668, "step": 11847 }, { "epoch": 0.95, "grad_norm": 1.2601961299799058, "learning_rate": 7.117678408166128e-08, "loss": 0.1663, "step": 11848 }, { "epoch": 0.95, "grad_norm": 1.6415343775013025, "learning_rate": 7.09591353245237e-08, "loss": 0.1893, "step": 11849 }, { "epoch": 0.95, "grad_norm": 1.2609758007419987, "learning_rate": 7.074181746883402e-08, "loss": 0.1663, "step": 11850 }, { "epoch": 0.95, "grad_norm": 1.4582308333366587, "learning_rate": 7.052483052918113e-08, "loss": 0.1651, "step": 11851 }, { "epoch": 0.95, "grad_norm": 1.668328731260004, "learning_rate": 7.030817452013227e-08, "loss": 0.1898, "step": 11852 }, { "epoch": 0.95, "grad_norm": 1.2341059693512457, "learning_rate": 7.009184945623193e-08, "loss": 0.1391, "step": 11853 }, { "epoch": 0.95, "grad_norm": 1.2778406269058784, "learning_rate": 6.987585535200292e-08, "loss": 0.1675, "step": 11854 }, { "epoch": 0.95, "grad_norm": 6.834718743366357, "learning_rate": 6.966019222194531e-08, "loss": 0.5617, "step": 11855 }, { "epoch": 0.95, "grad_norm": 1.462109830434754, "learning_rate": 6.944486008053697e-08, "loss": 0.1685, "step": 11856 }, { "epoch": 0.95, "grad_norm": 1.4835814478188611, "learning_rate": 6.922985894223467e-08, "loss": 0.2032, "step": 11857 }, { "epoch": 0.95, "grad_norm": 1.3265635060824332, "learning_rate": 6.901518882147129e-08, "loss": 0.1888, "step": 11858 }, { "epoch": 0.95, "grad_norm": 11.46863593758475, "learning_rate": 6.880084973265866e-08, "loss": 0.6125, "step": 11859 }, { "epoch": 0.95, "grad_norm": 7.534773571685166, "learning_rate": 6.858684169018581e-08, "loss": 0.5993, "step": 11860 }, { "epoch": 0.95, "grad_norm": 1.3396505072731482, "learning_rate": 6.837316470842015e-08, "loss": 0.166, "step": 11861 }, { "epoch": 0.95, "grad_norm": 1.3750438743016877, "learning_rate": 6.815981880170575e-08, "loss": 0.1808, "step": 11862 }, { "epoch": 0.95, "grad_norm": 1.4581663242174605, "learning_rate": 6.794680398436615e-08, "loss": 0.2204, "step": 11863 }, { "epoch": 0.95, "grad_norm": 1.4268435317041939, "learning_rate": 6.773412027070104e-08, "loss": 0.2123, "step": 11864 }, { "epoch": 0.95, "grad_norm": 1.2488165644565898, "learning_rate": 6.752176767498841e-08, "loss": 0.1628, "step": 11865 }, { "epoch": 0.95, "grad_norm": 1.475039661974663, "learning_rate": 6.730974621148412e-08, "loss": 0.1433, "step": 11866 }, { "epoch": 0.95, "grad_norm": 1.4387197920466739, "learning_rate": 6.709805589442175e-08, "loss": 0.1684, "step": 11867 }, { "epoch": 0.95, "grad_norm": 7.5563961475422365, "learning_rate": 6.688669673801384e-08, "loss": 0.509, "step": 11868 }, { "epoch": 0.95, "grad_norm": 1.4809665338888722, "learning_rate": 6.667566875644849e-08, "loss": 0.1954, "step": 11869 }, { "epoch": 0.95, "grad_norm": 1.3069074443785949, "learning_rate": 6.646497196389268e-08, "loss": 0.1613, "step": 11870 }, { "epoch": 0.95, "grad_norm": 1.407974081476115, "learning_rate": 6.625460637449122e-08, "loss": 0.1916, "step": 11871 }, { "epoch": 0.95, "grad_norm": 1.2927187629684935, "learning_rate": 6.604457200236669e-08, "loss": 0.15, "step": 11872 }, { "epoch": 0.95, "grad_norm": 1.3222100886207386, "learning_rate": 6.58348688616195e-08, "loss": 0.1577, "step": 11873 }, { "epoch": 0.95, "grad_norm": 1.4637226745812413, "learning_rate": 6.56254969663278e-08, "loss": 0.1477, "step": 11874 }, { "epoch": 0.95, "grad_norm": 1.5566512797458902, "learning_rate": 6.54164563305465e-08, "loss": 0.1891, "step": 11875 }, { "epoch": 0.95, "grad_norm": 1.5186953975690822, "learning_rate": 6.52077469683099e-08, "loss": 0.1634, "step": 11876 }, { "epoch": 0.95, "grad_norm": 1.4582468980484613, "learning_rate": 6.499936889362956e-08, "loss": 0.1905, "step": 11877 }, { "epoch": 0.95, "grad_norm": 1.2695336439914797, "learning_rate": 6.479132212049322e-08, "loss": 0.1661, "step": 11878 }, { "epoch": 0.95, "grad_norm": 1.311169568961537, "learning_rate": 6.458360666286912e-08, "loss": 0.1775, "step": 11879 }, { "epoch": 0.95, "grad_norm": 1.1074794040218978, "learning_rate": 6.437622253470055e-08, "loss": 0.1329, "step": 11880 }, { "epoch": 0.95, "grad_norm": 1.2269036820881958, "learning_rate": 6.416916974991083e-08, "loss": 0.1665, "step": 11881 }, { "epoch": 0.95, "grad_norm": 1.2298001006015544, "learning_rate": 6.396244832240051e-08, "loss": 0.1564, "step": 11882 }, { "epoch": 0.95, "grad_norm": 1.4068667335196685, "learning_rate": 6.375605826604514e-08, "loss": 0.1706, "step": 11883 }, { "epoch": 0.95, "grad_norm": 8.585625138109625, "learning_rate": 6.354999959470254e-08, "loss": 0.5553, "step": 11884 }, { "epoch": 0.95, "grad_norm": 1.564265137659678, "learning_rate": 6.334427232220552e-08, "loss": 0.1959, "step": 11885 }, { "epoch": 0.95, "grad_norm": 1.4280273058211257, "learning_rate": 6.313887646236472e-08, "loss": 0.1612, "step": 11886 }, { "epoch": 0.95, "grad_norm": 1.6124953048659518, "learning_rate": 6.293381202896964e-08, "loss": 0.1739, "step": 11887 }, { "epoch": 0.95, "grad_norm": 1.3286016519205572, "learning_rate": 6.272907903578595e-08, "loss": 0.1898, "step": 11888 }, { "epoch": 0.95, "grad_norm": 1.3640079049012501, "learning_rate": 6.252467749655878e-08, "loss": 0.1786, "step": 11889 }, { "epoch": 0.95, "grad_norm": 1.39774889598795, "learning_rate": 6.23206074250099e-08, "loss": 0.1613, "step": 11890 }, { "epoch": 0.95, "grad_norm": 1.3539897184371086, "learning_rate": 6.211686883483947e-08, "loss": 0.1862, "step": 11891 }, { "epoch": 0.95, "grad_norm": 1.274897312091227, "learning_rate": 6.191346173972434e-08, "loss": 0.147, "step": 11892 }, { "epoch": 0.95, "grad_norm": 1.352203457863679, "learning_rate": 6.171038615332081e-08, "loss": 0.1443, "step": 11893 }, { "epoch": 0.95, "grad_norm": 1.4077795221133333, "learning_rate": 6.150764208926074e-08, "loss": 0.1855, "step": 11894 }, { "epoch": 0.95, "grad_norm": 1.3117932688014544, "learning_rate": 6.130522956115659e-08, "loss": 0.1545, "step": 11895 }, { "epoch": 0.95, "grad_norm": 1.412363925100236, "learning_rate": 6.110314858259581e-08, "loss": 0.1709, "step": 11896 }, { "epoch": 0.95, "grad_norm": 8.713954540133612, "learning_rate": 6.090139916714478e-08, "loss": 0.5904, "step": 11897 }, { "epoch": 0.95, "grad_norm": 1.356462327846721, "learning_rate": 6.069998132834764e-08, "loss": 0.1846, "step": 11898 }, { "epoch": 0.95, "grad_norm": 1.5278864187242538, "learning_rate": 6.049889507972585e-08, "loss": 0.2077, "step": 11899 }, { "epoch": 0.95, "grad_norm": 1.5705808268346064, "learning_rate": 6.029814043478022e-08, "loss": 0.2153, "step": 11900 }, { "epoch": 0.95, "grad_norm": 1.3169297207329678, "learning_rate": 6.009771740698667e-08, "loss": 0.1655, "step": 11901 }, { "epoch": 0.95, "grad_norm": 1.3927743587432218, "learning_rate": 5.989762600980053e-08, "loss": 0.1671, "step": 11902 }, { "epoch": 0.95, "grad_norm": 1.2645494613998252, "learning_rate": 5.969786625665441e-08, "loss": 0.1747, "step": 11903 }, { "epoch": 0.95, "grad_norm": 1.317143416391292, "learning_rate": 5.949843816095924e-08, "loss": 0.129, "step": 11904 }, { "epoch": 0.95, "grad_norm": 1.4721098787005424, "learning_rate": 5.9299341736103746e-08, "loss": 0.1591, "step": 11905 }, { "epoch": 0.95, "grad_norm": 1.4451669317684392, "learning_rate": 5.910057699545224e-08, "loss": 0.1729, "step": 11906 }, { "epoch": 0.95, "grad_norm": 1.324290096574141, "learning_rate": 5.890214395235017e-08, "loss": 0.1881, "step": 11907 }, { "epoch": 0.95, "grad_norm": 1.246805943837617, "learning_rate": 5.870404262011742e-08, "loss": 0.147, "step": 11908 }, { "epoch": 0.95, "grad_norm": 1.4247928029368102, "learning_rate": 5.850627301205391e-08, "loss": 0.1654, "step": 11909 }, { "epoch": 0.95, "grad_norm": 1.3842602430613966, "learning_rate": 5.8308835141436814e-08, "loss": 0.1807, "step": 11910 }, { "epoch": 0.95, "grad_norm": 1.4656827671721269, "learning_rate": 5.811172902151996e-08, "loss": 0.1831, "step": 11911 }, { "epoch": 0.95, "grad_norm": 1.5149523393238529, "learning_rate": 5.791495466553609e-08, "loss": 0.1899, "step": 11912 }, { "epoch": 0.95, "grad_norm": 11.8023149756254, "learning_rate": 5.7718512086695767e-08, "loss": 0.5914, "step": 11913 }, { "epoch": 0.95, "grad_norm": 1.430295278233407, "learning_rate": 5.752240129818565e-08, "loss": 0.1888, "step": 11914 }, { "epoch": 0.95, "grad_norm": 1.2904451642914454, "learning_rate": 5.7326622313171877e-08, "loss": 0.1821, "step": 11915 }, { "epoch": 0.95, "grad_norm": 1.2898162873948038, "learning_rate": 5.713117514479838e-08, "loss": 0.1549, "step": 11916 }, { "epoch": 0.95, "grad_norm": 1.3696932179407268, "learning_rate": 5.6936059806184105e-08, "loss": 0.1694, "step": 11917 }, { "epoch": 0.95, "grad_norm": 1.2702318614737542, "learning_rate": 5.674127631043025e-08, "loss": 0.1674, "step": 11918 }, { "epoch": 0.95, "grad_norm": 1.3067137746286897, "learning_rate": 5.6546824670611344e-08, "loss": 0.1505, "step": 11919 }, { "epoch": 0.95, "grad_norm": 8.238389726238456, "learning_rate": 5.6352704899782506e-08, "loss": 0.6668, "step": 11920 }, { "epoch": 0.95, "grad_norm": 1.3543000249610455, "learning_rate": 5.6158917010974976e-08, "loss": 0.1525, "step": 11921 }, { "epoch": 0.95, "grad_norm": 1.4162895508431155, "learning_rate": 5.596546101719835e-08, "loss": 0.1469, "step": 11922 }, { "epoch": 0.95, "grad_norm": 1.1779053378458708, "learning_rate": 5.5772336931440574e-08, "loss": 0.1284, "step": 11923 }, { "epoch": 0.95, "grad_norm": 1.3600622654393317, "learning_rate": 5.557954476666627e-08, "loss": 0.1888, "step": 11924 }, { "epoch": 0.95, "grad_norm": 1.42920131246672, "learning_rate": 5.538708453581787e-08, "loss": 0.1851, "step": 11925 }, { "epoch": 0.95, "grad_norm": 1.3929477475156726, "learning_rate": 5.5194956251816144e-08, "loss": 0.1701, "step": 11926 }, { "epoch": 0.95, "grad_norm": 8.608129123392848, "learning_rate": 5.500315992755911e-08, "loss": 0.7568, "step": 11927 }, { "epoch": 0.95, "grad_norm": 1.4447726585552805, "learning_rate": 5.481169557592258e-08, "loss": 0.2044, "step": 11928 }, { "epoch": 0.95, "grad_norm": 6.127699067645563, "learning_rate": 5.462056320976072e-08, "loss": 0.4706, "step": 11929 }, { "epoch": 0.95, "grad_norm": 1.4406931273244223, "learning_rate": 5.442976284190382e-08, "loss": 0.1695, "step": 11930 }, { "epoch": 0.95, "grad_norm": 1.3103668551675502, "learning_rate": 5.423929448516218e-08, "loss": 0.1159, "step": 11931 }, { "epoch": 0.95, "grad_norm": 6.1211141393635735, "learning_rate": 5.404915815232115e-08, "loss": 0.5285, "step": 11932 }, { "epoch": 0.95, "grad_norm": 1.4288727479488013, "learning_rate": 5.3859353856146045e-08, "loss": 0.1791, "step": 11933 }, { "epoch": 0.95, "grad_norm": 1.6192437324781481, "learning_rate": 5.366988160937836e-08, "loss": 0.1886, "step": 11934 }, { "epoch": 0.95, "grad_norm": 1.3116857609879293, "learning_rate": 5.348074142473847e-08, "loss": 0.174, "step": 11935 }, { "epoch": 0.95, "grad_norm": 1.2317871056191212, "learning_rate": 5.329193331492399e-08, "loss": 0.1352, "step": 11936 }, { "epoch": 0.95, "grad_norm": 1.3377019156556629, "learning_rate": 5.310345729260924e-08, "loss": 0.1618, "step": 11937 }, { "epoch": 0.96, "grad_norm": 1.4222184493637573, "learning_rate": 5.2915313370449083e-08, "loss": 0.19, "step": 11938 }, { "epoch": 0.96, "grad_norm": 11.945399768370425, "learning_rate": 5.2727501561071756e-08, "loss": 0.6056, "step": 11939 }, { "epoch": 0.96, "grad_norm": 1.5389058375136386, "learning_rate": 5.254002187708773e-08, "loss": 0.1759, "step": 11940 }, { "epoch": 0.96, "grad_norm": 1.5082862149830094, "learning_rate": 5.235287433108194e-08, "loss": 0.1739, "step": 11941 }, { "epoch": 0.96, "grad_norm": 1.5273411009924767, "learning_rate": 5.216605893561877e-08, "loss": 0.1672, "step": 11942 }, { "epoch": 0.96, "grad_norm": 1.3497536718479062, "learning_rate": 5.1979575703239304e-08, "loss": 0.1888, "step": 11943 }, { "epoch": 0.96, "grad_norm": 1.467161216185006, "learning_rate": 5.179342464646242e-08, "loss": 0.199, "step": 11944 }, { "epoch": 0.96, "grad_norm": 9.984529937390512, "learning_rate": 5.16076057777859e-08, "loss": 0.6603, "step": 11945 }, { "epoch": 0.96, "grad_norm": 6.388086282541771, "learning_rate": 5.142211910968309e-08, "loss": 0.4956, "step": 11946 }, { "epoch": 0.96, "grad_norm": 1.3552435997528884, "learning_rate": 5.1236964654607926e-08, "loss": 0.1267, "step": 11947 }, { "epoch": 0.96, "grad_norm": 1.2360965020268753, "learning_rate": 5.1052142424988794e-08, "loss": 0.1692, "step": 11948 }, { "epoch": 0.96, "grad_norm": 1.4755246890534195, "learning_rate": 5.086765243323466e-08, "loss": 0.1734, "step": 11949 }, { "epoch": 0.96, "grad_norm": 7.64536004619589, "learning_rate": 5.068349469173006e-08, "loss": 0.5911, "step": 11950 }, { "epoch": 0.96, "grad_norm": 1.2712322513977827, "learning_rate": 5.0499669212837885e-08, "loss": 0.1573, "step": 11951 }, { "epoch": 0.96, "grad_norm": 1.4606907262031674, "learning_rate": 5.031617600889993e-08, "loss": 0.1559, "step": 11952 }, { "epoch": 0.96, "grad_norm": 7.273862432883727, "learning_rate": 5.013301509223356e-08, "loss": 0.5911, "step": 11953 }, { "epoch": 0.96, "grad_norm": 1.2352277630418924, "learning_rate": 4.995018647513561e-08, "loss": 0.1494, "step": 11954 }, { "epoch": 0.96, "grad_norm": 1.3406570443905086, "learning_rate": 4.976769016987959e-08, "loss": 0.14, "step": 11955 }, { "epoch": 0.96, "grad_norm": 1.2840225368469602, "learning_rate": 4.958552618871737e-08, "loss": 0.1693, "step": 11956 }, { "epoch": 0.96, "grad_norm": 1.5847924636894588, "learning_rate": 4.94036945438775e-08, "loss": 0.195, "step": 11957 }, { "epoch": 0.96, "grad_norm": 1.450570288825283, "learning_rate": 4.922219524756799e-08, "loss": 0.1965, "step": 11958 }, { "epoch": 0.96, "grad_norm": 1.5363409185710943, "learning_rate": 4.904102831197188e-08, "loss": 0.1951, "step": 11959 }, { "epoch": 0.96, "grad_norm": 1.2329498282943006, "learning_rate": 4.8860193749253324e-08, "loss": 0.1456, "step": 11960 }, { "epoch": 0.96, "grad_norm": 1.438794220940196, "learning_rate": 4.867969157155095e-08, "loss": 0.1674, "step": 11961 }, { "epoch": 0.96, "grad_norm": 1.3859866160844427, "learning_rate": 4.8499521790982276e-08, "loss": 0.1747, "step": 11962 }, { "epoch": 0.96, "grad_norm": 1.3750328183286025, "learning_rate": 4.8319684419643743e-08, "loss": 0.1647, "step": 11963 }, { "epoch": 0.96, "grad_norm": 1.4195078026030556, "learning_rate": 4.814017946960792e-08, "loss": 0.1419, "step": 11964 }, { "epoch": 0.96, "grad_norm": 1.315211731286358, "learning_rate": 4.79610069529246e-08, "loss": 0.1652, "step": 11965 }, { "epoch": 0.96, "grad_norm": 1.3367000069064292, "learning_rate": 4.7782166881623626e-08, "loss": 0.1553, "step": 11966 }, { "epoch": 0.96, "grad_norm": 1.3159252485269777, "learning_rate": 4.7603659267710376e-08, "loss": 0.157, "step": 11967 }, { "epoch": 0.96, "grad_norm": 11.789139154827396, "learning_rate": 4.742548412316805e-08, "loss": 0.5509, "step": 11968 }, { "epoch": 0.96, "grad_norm": 1.2437922974328492, "learning_rate": 4.72476414599593e-08, "loss": 0.1419, "step": 11969 }, { "epoch": 0.96, "grad_norm": 1.6004520603837116, "learning_rate": 4.707013129002291e-08, "loss": 0.1565, "step": 11970 }, { "epoch": 0.96, "grad_norm": 1.134726028223145, "learning_rate": 4.689295362527435e-08, "loss": 0.1429, "step": 11971 }, { "epoch": 0.96, "grad_norm": 1.3213673349821737, "learning_rate": 4.671610847761021e-08, "loss": 0.159, "step": 11972 }, { "epoch": 0.96, "grad_norm": 1.3062637064638105, "learning_rate": 4.6539595858900446e-08, "loss": 0.1371, "step": 11973 }, { "epoch": 0.96, "grad_norm": 1.3642930246811105, "learning_rate": 4.636341578099668e-08, "loss": 0.2032, "step": 11974 }, { "epoch": 0.96, "grad_norm": 1.4505218549250118, "learning_rate": 4.618756825572612e-08, "loss": 0.2039, "step": 11975 }, { "epoch": 0.96, "grad_norm": 1.3423592004613445, "learning_rate": 4.601205329489267e-08, "loss": 0.1705, "step": 11976 }, { "epoch": 0.96, "grad_norm": 1.5010634829434208, "learning_rate": 4.583687091028077e-08, "loss": 0.1986, "step": 11977 }, { "epoch": 0.96, "grad_norm": 1.2958664340074666, "learning_rate": 4.5662021113649923e-08, "loss": 0.1823, "step": 11978 }, { "epoch": 0.96, "grad_norm": 1.043083292926528, "learning_rate": 4.5487503916738505e-08, "loss": 0.1252, "step": 11979 }, { "epoch": 0.96, "grad_norm": 1.625813694686941, "learning_rate": 4.5313319331262703e-08, "loss": 0.2111, "step": 11980 }, { "epoch": 0.96, "grad_norm": 1.3601050371379158, "learning_rate": 4.513946736891595e-08, "loss": 0.1305, "step": 11981 }, { "epoch": 0.96, "grad_norm": 1.3581968669282554, "learning_rate": 4.4965948041369444e-08, "loss": 0.1687, "step": 11982 }, { "epoch": 0.96, "grad_norm": 1.2303874547425697, "learning_rate": 4.479276136027222e-08, "loss": 0.1481, "step": 11983 }, { "epoch": 0.96, "grad_norm": 1.3018819918256086, "learning_rate": 4.4619907337249965e-08, "loss": 0.1666, "step": 11984 }, { "epoch": 0.96, "grad_norm": 1.3012908546933881, "learning_rate": 4.4447385983907855e-08, "loss": 0.1699, "step": 11985 }, { "epoch": 0.96, "grad_norm": 1.3858201731481983, "learning_rate": 4.427519731182772e-08, "loss": 0.1389, "step": 11986 }, { "epoch": 0.96, "grad_norm": 1.1107173646406219, "learning_rate": 4.4103341332568105e-08, "loss": 0.1305, "step": 11987 }, { "epoch": 0.96, "grad_norm": 8.37185997790486, "learning_rate": 4.393181805766755e-08, "loss": 0.5384, "step": 11988 }, { "epoch": 0.96, "grad_norm": 7.904637254738123, "learning_rate": 4.3760627498640184e-08, "loss": 0.6962, "step": 11989 }, { "epoch": 0.96, "grad_norm": 1.2649148302185147, "learning_rate": 4.3589769666978476e-08, "loss": 0.163, "step": 11990 }, { "epoch": 0.96, "grad_norm": 1.4412159237437532, "learning_rate": 4.3419244574153255e-08, "loss": 0.1953, "step": 11991 }, { "epoch": 0.96, "grad_norm": 1.1417599908904636, "learning_rate": 4.324905223161202e-08, "loss": 0.1458, "step": 11992 }, { "epoch": 0.96, "grad_norm": 1.2401118467312509, "learning_rate": 4.3079192650779536e-08, "loss": 0.1541, "step": 11993 }, { "epoch": 0.96, "grad_norm": 1.5466035441811181, "learning_rate": 4.2909665843060555e-08, "loss": 0.2007, "step": 11994 }, { "epoch": 0.96, "grad_norm": 1.2994211061589582, "learning_rate": 4.274047181983487e-08, "loss": 0.158, "step": 11995 }, { "epoch": 0.96, "grad_norm": 1.6688625719378687, "learning_rate": 4.257161059246118e-08, "loss": 0.193, "step": 11996 }, { "epoch": 0.96, "grad_norm": 1.2440328455094596, "learning_rate": 4.24030821722754e-08, "loss": 0.125, "step": 11997 }, { "epoch": 0.96, "grad_norm": 1.3409149787992523, "learning_rate": 4.2234886570591824e-08, "loss": 0.1744, "step": 11998 }, { "epoch": 0.96, "grad_norm": 1.3023544062343446, "learning_rate": 4.206702379870198e-08, "loss": 0.1592, "step": 11999 }, { "epoch": 0.96, "grad_norm": 1.349200978983085, "learning_rate": 4.189949386787462e-08, "loss": 0.1539, "step": 12000 }, { "epoch": 0.96, "grad_norm": 12.966447204672004, "learning_rate": 4.173229678935631e-08, "loss": 0.4401, "step": 12001 }, { "epoch": 0.96, "grad_norm": 1.3377347632907357, "learning_rate": 4.156543257437196e-08, "loss": 0.1816, "step": 12002 }, { "epoch": 0.96, "grad_norm": 1.3720423091730143, "learning_rate": 4.1398901234124265e-08, "loss": 0.1776, "step": 12003 }, { "epoch": 0.96, "grad_norm": 8.437915317907944, "learning_rate": 4.123270277979208e-08, "loss": 0.5207, "step": 12004 }, { "epoch": 0.96, "grad_norm": 1.481365682065454, "learning_rate": 4.106683722253257e-08, "loss": 0.1809, "step": 12005 }, { "epoch": 0.96, "grad_norm": 1.3562969908639704, "learning_rate": 4.090130457348185e-08, "loss": 0.1841, "step": 12006 }, { "epoch": 0.96, "grad_norm": 5.879694441676661, "learning_rate": 4.0736104843751014e-08, "loss": 0.4557, "step": 12007 }, { "epoch": 0.96, "grad_norm": 1.354566403703353, "learning_rate": 4.057123804443231e-08, "loss": 0.1804, "step": 12008 }, { "epoch": 0.96, "grad_norm": 1.1399744381798957, "learning_rate": 4.040670418659243e-08, "loss": 0.1276, "step": 12009 }, { "epoch": 0.96, "grad_norm": 1.3858615599850823, "learning_rate": 4.024250328127755e-08, "loss": 0.2334, "step": 12010 }, { "epoch": 0.96, "grad_norm": 24.288813121532584, "learning_rate": 4.0078635339511065e-08, "loss": 0.5138, "step": 12011 }, { "epoch": 0.96, "grad_norm": 1.3163628676677386, "learning_rate": 3.991510037229363e-08, "loss": 0.1449, "step": 12012 }, { "epoch": 0.96, "grad_norm": 1.3322016445372824, "learning_rate": 3.9751898390604226e-08, "loss": 0.1725, "step": 12013 }, { "epoch": 0.96, "grad_norm": 1.4141994433896983, "learning_rate": 3.958902940539855e-08, "loss": 0.1782, "step": 12014 }, { "epoch": 0.96, "grad_norm": 1.4621149743937452, "learning_rate": 3.9426493427611177e-08, "loss": 0.1683, "step": 12015 }, { "epoch": 0.96, "grad_norm": 1.4422256309450874, "learning_rate": 3.926429046815228e-08, "loss": 0.1863, "step": 12016 }, { "epoch": 0.96, "grad_norm": 4.329864987008123, "learning_rate": 3.9102420537913134e-08, "loss": 0.7039, "step": 12017 }, { "epoch": 0.96, "grad_norm": 1.207100151076323, "learning_rate": 3.894088364775894e-08, "loss": 0.1506, "step": 12018 }, { "epoch": 0.96, "grad_norm": 1.4950051982668435, "learning_rate": 3.877967980853437e-08, "loss": 0.1861, "step": 12019 }, { "epoch": 0.96, "grad_norm": 11.238555975896437, "learning_rate": 3.8618809031061855e-08, "loss": 0.5917, "step": 12020 }, { "epoch": 0.96, "grad_norm": 1.2985831890474746, "learning_rate": 3.845827132614166e-08, "loss": 0.151, "step": 12021 }, { "epoch": 0.96, "grad_norm": 1.2807882841052944, "learning_rate": 3.8298066704550165e-08, "loss": 0.1598, "step": 12022 }, { "epoch": 0.96, "grad_norm": 1.508349957569687, "learning_rate": 3.8138195177042644e-08, "loss": 0.1848, "step": 12023 }, { "epoch": 0.96, "grad_norm": 1.370408601266554, "learning_rate": 3.797865675435219e-08, "loss": 0.17, "step": 12024 }, { "epoch": 0.96, "grad_norm": 1.1601162191682797, "learning_rate": 3.781945144718912e-08, "loss": 0.1503, "step": 12025 }, { "epoch": 0.96, "grad_norm": 1.3392262274521287, "learning_rate": 3.766057926624045e-08, "loss": 0.1871, "step": 12026 }, { "epoch": 0.96, "grad_norm": 1.4056785724764818, "learning_rate": 3.750204022217263e-08, "loss": 0.1737, "step": 12027 }, { "epoch": 0.96, "grad_norm": 1.1851661030201108, "learning_rate": 3.7343834325628827e-08, "loss": 0.12, "step": 12028 }, { "epoch": 0.96, "grad_norm": 1.2453294102838228, "learning_rate": 3.7185961587229424e-08, "loss": 0.1512, "step": 12029 }, { "epoch": 0.96, "grad_norm": 1.3343456041599573, "learning_rate": 3.7028422017573175e-08, "loss": 0.1589, "step": 12030 }, { "epoch": 0.96, "grad_norm": 1.2237028374117966, "learning_rate": 3.687121562723661e-08, "loss": 0.1098, "step": 12031 }, { "epoch": 0.96, "grad_norm": 1.3551363731750161, "learning_rate": 3.6714342426771856e-08, "loss": 0.1693, "step": 12032 }, { "epoch": 0.96, "grad_norm": 1.333018165195099, "learning_rate": 3.65578024267127e-08, "loss": 0.1629, "step": 12033 }, { "epoch": 0.96, "grad_norm": 1.3904831247379053, "learning_rate": 3.640159563756629e-08, "loss": 0.1666, "step": 12034 }, { "epoch": 0.96, "grad_norm": 1.4007722579059874, "learning_rate": 3.62457220698198e-08, "loss": 0.1784, "step": 12035 }, { "epoch": 0.96, "grad_norm": 1.251428677603877, "learning_rate": 3.609018173393763e-08, "loss": 0.1436, "step": 12036 }, { "epoch": 0.96, "grad_norm": 1.4125431535547044, "learning_rate": 3.5934974640362e-08, "loss": 0.2091, "step": 12037 }, { "epoch": 0.96, "grad_norm": 6.976626649929335, "learning_rate": 3.578010079951178e-08, "loss": 0.5252, "step": 12038 }, { "epoch": 0.96, "grad_norm": 1.2178594571534171, "learning_rate": 3.5625560221784205e-08, "loss": 0.1561, "step": 12039 }, { "epoch": 0.96, "grad_norm": 1.322820552515946, "learning_rate": 3.547135291755488e-08, "loss": 0.1639, "step": 12040 }, { "epoch": 0.96, "grad_norm": 1.4706527220410934, "learning_rate": 3.531747889717496e-08, "loss": 0.1998, "step": 12041 }, { "epoch": 0.96, "grad_norm": 1.396940556118862, "learning_rate": 3.516393817097508e-08, "loss": 0.1989, "step": 12042 }, { "epoch": 0.96, "grad_norm": 1.3234492517612735, "learning_rate": 3.5010730749263086e-08, "loss": 0.1279, "step": 12043 }, { "epoch": 0.96, "grad_norm": 1.3079839344993724, "learning_rate": 3.4857856642323554e-08, "loss": 0.1758, "step": 12044 }, { "epoch": 0.96, "grad_norm": 1.2881367357471751, "learning_rate": 3.470531586042047e-08, "loss": 0.167, "step": 12045 }, { "epoch": 0.96, "grad_norm": 8.155113237486573, "learning_rate": 3.455310841379345e-08, "loss": 0.5205, "step": 12046 }, { "epoch": 0.96, "grad_norm": 1.2546464502133254, "learning_rate": 3.440123431266151e-08, "loss": 0.1473, "step": 12047 }, { "epoch": 0.96, "grad_norm": 1.350399063246769, "learning_rate": 3.42496935672193e-08, "loss": 0.1657, "step": 12048 }, { "epoch": 0.96, "grad_norm": 1.2606406846876248, "learning_rate": 3.409848618764089e-08, "loss": 0.2075, "step": 12049 }, { "epoch": 0.96, "grad_norm": 1.1886334889528178, "learning_rate": 3.394761218407705e-08, "loss": 0.135, "step": 12050 }, { "epoch": 0.96, "grad_norm": 1.5817456614390903, "learning_rate": 3.379707156665746e-08, "loss": 0.232, "step": 12051 }, { "epoch": 0.96, "grad_norm": 1.4404575633818477, "learning_rate": 3.364686434548625e-08, "loss": 0.1554, "step": 12052 }, { "epoch": 0.96, "grad_norm": 1.3618906418744414, "learning_rate": 3.3496990530649256e-08, "loss": 0.1834, "step": 12053 }, { "epoch": 0.96, "grad_norm": 1.194238485918517, "learning_rate": 3.334745013220675e-08, "loss": 0.1276, "step": 12054 }, { "epoch": 0.96, "grad_norm": 1.1872329717793155, "learning_rate": 3.3198243160198486e-08, "loss": 0.1554, "step": 12055 }, { "epoch": 0.96, "grad_norm": 1.3792562535418866, "learning_rate": 3.3049369624640896e-08, "loss": 0.1916, "step": 12056 }, { "epoch": 0.96, "grad_norm": 1.251789081141179, "learning_rate": 3.290082953552876e-08, "loss": 0.1438, "step": 12057 }, { "epoch": 0.96, "grad_norm": 1.3313032966704588, "learning_rate": 3.2752622902832984e-08, "loss": 0.1535, "step": 12058 }, { "epoch": 0.96, "grad_norm": 1.349054406544141, "learning_rate": 3.2604749736504514e-08, "loss": 0.2289, "step": 12059 }, { "epoch": 0.96, "grad_norm": 1.3629849066459452, "learning_rate": 3.245721004646929e-08, "loss": 0.1766, "step": 12060 }, { "epoch": 0.96, "grad_norm": 1.444491168269748, "learning_rate": 3.231000384263272e-08, "loss": 0.1783, "step": 12061 }, { "epoch": 0.96, "grad_norm": 1.361216997056909, "learning_rate": 3.21631311348769e-08, "loss": 0.1794, "step": 12062 }, { "epoch": 0.97, "grad_norm": 1.5301662417876827, "learning_rate": 3.201659193306228e-08, "loss": 0.2261, "step": 12063 }, { "epoch": 0.97, "grad_norm": 1.5299945825349242, "learning_rate": 3.1870386247025986e-08, "loss": 0.172, "step": 12064 }, { "epoch": 0.97, "grad_norm": 1.4737109521963345, "learning_rate": 3.172451408658406e-08, "loss": 0.1795, "step": 12065 }, { "epoch": 0.97, "grad_norm": 1.195948475801584, "learning_rate": 3.1578975461528106e-08, "loss": 0.1508, "step": 12066 }, { "epoch": 0.97, "grad_norm": 1.6151968153174672, "learning_rate": 3.1433770381629756e-08, "loss": 0.1991, "step": 12067 }, { "epoch": 0.97, "grad_norm": 1.3002722602315484, "learning_rate": 3.128889885663622e-08, "loss": 0.1788, "step": 12068 }, { "epoch": 0.97, "grad_norm": 1.5033514642767625, "learning_rate": 3.114436089627415e-08, "loss": 0.1792, "step": 12069 }, { "epoch": 0.97, "grad_norm": 1.2977307387035604, "learning_rate": 3.100015651024524e-08, "loss": 0.1443, "step": 12070 }, { "epoch": 0.97, "grad_norm": 7.201616295875197, "learning_rate": 3.085628570823174e-08, "loss": 0.3964, "step": 12071 }, { "epoch": 0.97, "grad_norm": 1.3622989725437762, "learning_rate": 3.071274849989147e-08, "loss": 0.2083, "step": 12072 }, { "epoch": 0.97, "grad_norm": 1.2723276387382116, "learning_rate": 3.0569544894861194e-08, "loss": 0.1731, "step": 12073 }, { "epoch": 0.97, "grad_norm": 1.562506940744663, "learning_rate": 3.04266749027543e-08, "loss": 0.1778, "step": 12074 }, { "epoch": 0.97, "grad_norm": 1.3578661234455802, "learning_rate": 3.0284138533160924e-08, "loss": 0.1821, "step": 12075 }, { "epoch": 0.97, "grad_norm": 1.4358501300507704, "learning_rate": 3.0141935795651725e-08, "loss": 0.1604, "step": 12076 }, { "epoch": 0.97, "grad_norm": 5.9823365425292865, "learning_rate": 3.00000666997724e-08, "loss": 0.568, "step": 12077 }, { "epoch": 0.97, "grad_norm": 1.4925221508350677, "learning_rate": 2.985853125504701e-08, "loss": 0.1543, "step": 12078 }, { "epoch": 0.97, "grad_norm": 1.2836759499464627, "learning_rate": 2.9717329470977384e-08, "loss": 0.1332, "step": 12079 }, { "epoch": 0.97, "grad_norm": 1.2673374611178578, "learning_rate": 2.957646135704262e-08, "loss": 0.1588, "step": 12080 }, { "epoch": 0.97, "grad_norm": 1.3686982005707646, "learning_rate": 2.9435926922699588e-08, "loss": 0.1921, "step": 12081 }, { "epoch": 0.97, "grad_norm": 10.379521332314932, "learning_rate": 2.929572617738352e-08, "loss": 0.6916, "step": 12082 }, { "epoch": 0.97, "grad_norm": 1.3617758046017843, "learning_rate": 2.9155859130505782e-08, "loss": 0.1722, "step": 12083 }, { "epoch": 0.97, "grad_norm": 1.2711545968125395, "learning_rate": 2.901632579145608e-08, "loss": 0.1709, "step": 12084 }, { "epoch": 0.97, "grad_norm": 1.5732754287839423, "learning_rate": 2.8877126169602477e-08, "loss": 0.1702, "step": 12085 }, { "epoch": 0.97, "grad_norm": 1.409748390686803, "learning_rate": 2.873826027428861e-08, "loss": 0.157, "step": 12086 }, { "epoch": 0.97, "grad_norm": 1.464016065618886, "learning_rate": 2.8599728114838134e-08, "loss": 0.1565, "step": 12087 }, { "epoch": 0.97, "grad_norm": 1.4756818170544415, "learning_rate": 2.846152970055027e-08, "loss": 0.1858, "step": 12088 }, { "epoch": 0.97, "grad_norm": 1.5188673246293447, "learning_rate": 2.8323665040703717e-08, "loss": 0.1746, "step": 12089 }, { "epoch": 0.97, "grad_norm": 1.361258431014825, "learning_rate": 2.818613414455218e-08, "loss": 0.1703, "step": 12090 }, { "epoch": 0.97, "grad_norm": 1.4520797451366454, "learning_rate": 2.804893702133049e-08, "loss": 0.1699, "step": 12091 }, { "epoch": 0.97, "grad_norm": 1.3891759310034593, "learning_rate": 2.7912073680247398e-08, "loss": 0.1726, "step": 12092 }, { "epoch": 0.97, "grad_norm": 1.321580333725119, "learning_rate": 2.777554413049166e-08, "loss": 0.1567, "step": 12093 }, { "epoch": 0.97, "grad_norm": 1.1762527955721975, "learning_rate": 2.7639348381229282e-08, "loss": 0.1426, "step": 12094 }, { "epoch": 0.97, "grad_norm": 1.364128231709815, "learning_rate": 2.7503486441602388e-08, "loss": 0.1836, "step": 12095 }, { "epoch": 0.97, "grad_norm": 1.4035172312033404, "learning_rate": 2.7367958320733135e-08, "loss": 0.1853, "step": 12096 }, { "epoch": 0.97, "grad_norm": 1.4078442344238051, "learning_rate": 2.7232764027718684e-08, "loss": 0.1976, "step": 12097 }, { "epoch": 0.97, "grad_norm": 1.2707720743362028, "learning_rate": 2.709790357163622e-08, "loss": 0.1304, "step": 12098 }, { "epoch": 0.97, "grad_norm": 1.4421887111905762, "learning_rate": 2.69633769615385e-08, "loss": 0.1684, "step": 12099 }, { "epoch": 0.97, "grad_norm": 1.505569400520923, "learning_rate": 2.6829184206457194e-08, "loss": 0.2069, "step": 12100 }, { "epoch": 0.97, "grad_norm": 1.2181578783457396, "learning_rate": 2.6695325315401198e-08, "loss": 0.1575, "step": 12101 }, { "epoch": 0.97, "grad_norm": 1.2886042226557122, "learning_rate": 2.656180029735611e-08, "loss": 0.1699, "step": 12102 }, { "epoch": 0.97, "grad_norm": 1.3962941558613893, "learning_rate": 2.642860916128642e-08, "loss": 0.1814, "step": 12103 }, { "epoch": 0.97, "grad_norm": 11.913118761696666, "learning_rate": 2.6295751916133315e-08, "loss": 0.6563, "step": 12104 }, { "epoch": 0.97, "grad_norm": 1.3406454155451237, "learning_rate": 2.6163228570816324e-08, "loss": 0.1729, "step": 12105 }, { "epoch": 0.97, "grad_norm": 1.4182273240135925, "learning_rate": 2.6031039134231663e-08, "loss": 0.182, "step": 12106 }, { "epoch": 0.97, "grad_norm": 1.4333338765856458, "learning_rate": 2.5899183615253897e-08, "loss": 0.1762, "step": 12107 }, { "epoch": 0.97, "grad_norm": 1.4923643455037134, "learning_rate": 2.5767662022735394e-08, "loss": 0.1924, "step": 12108 }, { "epoch": 0.97, "grad_norm": 1.3441307105179607, "learning_rate": 2.5636474365504095e-08, "loss": 0.1884, "step": 12109 }, { "epoch": 0.97, "grad_norm": 1.2881119185703722, "learning_rate": 2.5505620652369058e-08, "loss": 0.1564, "step": 12110 }, { "epoch": 0.97, "grad_norm": 1.3008360361380846, "learning_rate": 2.5375100892113258e-08, "loss": 0.1546, "step": 12111 }, { "epoch": 0.97, "grad_norm": 1.532645808645719, "learning_rate": 2.5244915093499134e-08, "loss": 0.2131, "step": 12112 }, { "epoch": 0.97, "grad_norm": 1.3872226656204951, "learning_rate": 2.511506326526747e-08, "loss": 0.1474, "step": 12113 }, { "epoch": 0.97, "grad_norm": 1.2666883093797752, "learning_rate": 2.4985545416134628e-08, "loss": 0.1765, "step": 12114 }, { "epoch": 0.97, "grad_norm": 1.1640033121557665, "learning_rate": 2.4856361554795318e-08, "loss": 0.1244, "step": 12115 }, { "epoch": 0.97, "grad_norm": 1.3539787683073303, "learning_rate": 2.4727511689923156e-08, "loss": 0.1647, "step": 12116 }, { "epoch": 0.97, "grad_norm": 1.259777909976236, "learning_rate": 2.459899583016734e-08, "loss": 0.1523, "step": 12117 }, { "epoch": 0.97, "grad_norm": 1.526114999174613, "learning_rate": 2.4470813984155962e-08, "loss": 0.1798, "step": 12118 }, { "epoch": 0.97, "grad_norm": 1.5669402329431252, "learning_rate": 2.4342966160494364e-08, "loss": 0.1919, "step": 12119 }, { "epoch": 0.97, "grad_norm": 1.3069705197312311, "learning_rate": 2.421545236776457e-08, "loss": 0.1979, "step": 12120 }, { "epoch": 0.97, "grad_norm": 1.2768006460369934, "learning_rate": 2.408827261452751e-08, "loss": 0.1609, "step": 12121 }, { "epoch": 0.97, "grad_norm": 1.374996924499818, "learning_rate": 2.396142690932135e-08, "loss": 0.177, "step": 12122 }, { "epoch": 0.97, "grad_norm": 1.3627517008156633, "learning_rate": 2.3834915260661506e-08, "loss": 0.1713, "step": 12123 }, { "epoch": 0.97, "grad_norm": 6.967998693571285, "learning_rate": 2.3708737677040628e-08, "loss": 0.5587, "step": 12124 }, { "epoch": 0.97, "grad_norm": 1.2928547266145998, "learning_rate": 2.358289416693027e-08, "loss": 0.1567, "step": 12125 }, { "epoch": 0.97, "grad_norm": 1.3860491595305744, "learning_rate": 2.3457384738777567e-08, "loss": 0.1698, "step": 12126 }, { "epoch": 0.97, "grad_norm": 1.3301884738101102, "learning_rate": 2.3332209401009664e-08, "loss": 0.1462, "step": 12127 }, { "epoch": 0.97, "grad_norm": 1.3183929770279312, "learning_rate": 2.3207368162028733e-08, "loss": 0.1705, "step": 12128 }, { "epoch": 0.97, "grad_norm": 1.0769143902764093, "learning_rate": 2.3082861030215843e-08, "loss": 0.1296, "step": 12129 }, { "epoch": 0.97, "grad_norm": 1.2567878788932627, "learning_rate": 2.2958688013930973e-08, "loss": 0.1765, "step": 12130 }, { "epoch": 0.97, "grad_norm": 1.3403712707072726, "learning_rate": 2.2834849121508574e-08, "loss": 0.1806, "step": 12131 }, { "epoch": 0.97, "grad_norm": 1.3600016957594245, "learning_rate": 2.27113443612631e-08, "loss": 0.1315, "step": 12132 }, { "epoch": 0.97, "grad_norm": 1.4083011512643924, "learning_rate": 2.2588173741485144e-08, "loss": 0.1875, "step": 12133 }, { "epoch": 0.97, "grad_norm": 1.2210303716329927, "learning_rate": 2.2465337270444754e-08, "loss": 0.1711, "step": 12134 }, { "epoch": 0.97, "grad_norm": 1.4334874075537845, "learning_rate": 2.234283495638756e-08, "loss": 0.1854, "step": 12135 }, { "epoch": 0.97, "grad_norm": 1.495359517216171, "learning_rate": 2.2220666807537538e-08, "loss": 0.2015, "step": 12136 }, { "epoch": 0.97, "grad_norm": 1.2088796558583776, "learning_rate": 2.2098832832095906e-08, "loss": 0.1612, "step": 12137 }, { "epoch": 0.97, "grad_norm": 1.115386132069599, "learning_rate": 2.1977333038242233e-08, "loss": 0.1262, "step": 12138 }, { "epoch": 0.97, "grad_norm": 1.4154215861804995, "learning_rate": 2.185616743413277e-08, "loss": 0.1524, "step": 12139 }, { "epoch": 0.97, "grad_norm": 1.392322841923491, "learning_rate": 2.173533602790212e-08, "loss": 0.1628, "step": 12140 }, { "epoch": 0.97, "grad_norm": 1.3750747118610367, "learning_rate": 2.161483882766213e-08, "loss": 0.2158, "step": 12141 }, { "epoch": 0.97, "grad_norm": 1.5932392113946434, "learning_rate": 2.149467584150189e-08, "loss": 0.19, "step": 12142 }, { "epoch": 0.97, "grad_norm": 1.2262813156384613, "learning_rate": 2.137484707748827e-08, "loss": 0.1377, "step": 12143 }, { "epoch": 0.97, "grad_norm": 1.5471823532578715, "learning_rate": 2.1255352543665954e-08, "loss": 0.2242, "step": 12144 }, { "epoch": 0.97, "grad_norm": 1.8684449764849067, "learning_rate": 2.1136192248056298e-08, "loss": 0.2044, "step": 12145 }, { "epoch": 0.97, "grad_norm": 1.3234845516522307, "learning_rate": 2.1017366198659573e-08, "loss": 0.1678, "step": 12146 }, { "epoch": 0.97, "grad_norm": 1.373305621190505, "learning_rate": 2.0898874403453284e-08, "loss": 0.1716, "step": 12147 }, { "epoch": 0.97, "grad_norm": 1.573837829976918, "learning_rate": 2.078071687039107e-08, "loss": 0.1762, "step": 12148 }, { "epoch": 0.97, "grad_norm": 1.6337536955590073, "learning_rate": 2.0662893607406033e-08, "loss": 0.199, "step": 12149 }, { "epoch": 0.97, "grad_norm": 1.5128868896716055, "learning_rate": 2.0545404622407396e-08, "loss": 0.2215, "step": 12150 }, { "epoch": 0.97, "grad_norm": 1.553462953056607, "learning_rate": 2.0428249923283296e-08, "loss": 0.1718, "step": 12151 }, { "epoch": 0.97, "grad_norm": 7.939391718539825, "learning_rate": 2.0311429517897997e-08, "loss": 0.4344, "step": 12152 }, { "epoch": 0.97, "grad_norm": 1.5829037585418475, "learning_rate": 2.019494341409467e-08, "loss": 0.1815, "step": 12153 }, { "epoch": 0.97, "grad_norm": 7.848698248388663, "learning_rate": 2.0078791619692616e-08, "loss": 0.3825, "step": 12154 }, { "epoch": 0.97, "grad_norm": 5.91867027264245, "learning_rate": 1.9962974142490043e-08, "loss": 0.4767, "step": 12155 }, { "epoch": 0.97, "grad_norm": 1.461525713350055, "learning_rate": 1.984749099026184e-08, "loss": 0.1633, "step": 12156 }, { "epoch": 0.97, "grad_norm": 1.4647316525356164, "learning_rate": 1.9732342170760698e-08, "loss": 0.1687, "step": 12157 }, { "epoch": 0.97, "grad_norm": 1.3837270126836565, "learning_rate": 1.9617527691717653e-08, "loss": 0.1553, "step": 12158 }, { "epoch": 0.97, "grad_norm": 1.270716247017522, "learning_rate": 1.9503047560839316e-08, "loss": 0.134, "step": 12159 }, { "epoch": 0.97, "grad_norm": 1.4217707303042713, "learning_rate": 1.9388901785811766e-08, "loss": 0.2105, "step": 12160 }, { "epoch": 0.97, "grad_norm": 1.3559998798507675, "learning_rate": 1.9275090374298312e-08, "loss": 0.1594, "step": 12161 }, { "epoch": 0.97, "grad_norm": 1.2515511588089945, "learning_rate": 1.91616133339384e-08, "loss": 0.1349, "step": 12162 }, { "epoch": 0.97, "grad_norm": 1.3755356718032212, "learning_rate": 1.9048470672350938e-08, "loss": 0.1773, "step": 12163 }, { "epoch": 0.97, "grad_norm": 1.3256021393822173, "learning_rate": 1.8935662397131516e-08, "loss": 0.1424, "step": 12164 }, { "epoch": 0.97, "grad_norm": 1.2528755099029445, "learning_rate": 1.8823188515852964e-08, "loss": 0.1486, "step": 12165 }, { "epoch": 0.97, "grad_norm": 1.5578253581860955, "learning_rate": 1.8711049036066465e-08, "loss": 0.2048, "step": 12166 }, { "epoch": 0.97, "grad_norm": 1.4950695414615793, "learning_rate": 1.8599243965299328e-08, "loss": 0.1894, "step": 12167 }, { "epoch": 0.97, "grad_norm": 1.2226207761711811, "learning_rate": 1.848777331105833e-08, "loss": 0.1266, "step": 12168 }, { "epoch": 0.97, "grad_norm": 1.341830133092988, "learning_rate": 1.837663708082693e-08, "loss": 0.1707, "step": 12169 }, { "epoch": 0.97, "grad_norm": 1.2053602885680637, "learning_rate": 1.826583528206527e-08, "loss": 0.1355, "step": 12170 }, { "epoch": 0.97, "grad_norm": 1.2104290718913862, "learning_rate": 1.815536792221184e-08, "loss": 0.1289, "step": 12171 }, { "epoch": 0.97, "grad_norm": 10.27395936487803, "learning_rate": 1.8045235008682937e-08, "loss": 0.6108, "step": 12172 }, { "epoch": 0.97, "grad_norm": 1.299393859648016, "learning_rate": 1.7935436548872643e-08, "loss": 0.1776, "step": 12173 }, { "epoch": 0.97, "grad_norm": 6.103057425911352, "learning_rate": 1.7825972550151173e-08, "loss": 0.6399, "step": 12174 }, { "epoch": 0.97, "grad_norm": 1.4472560694868717, "learning_rate": 1.7716843019867646e-08, "loss": 0.1523, "step": 12175 }, { "epoch": 0.97, "grad_norm": 1.2498497336712182, "learning_rate": 1.7608047965347876e-08, "loss": 0.1727, "step": 12176 }, { "epoch": 0.97, "grad_norm": 1.4063233194479239, "learning_rate": 1.749958739389601e-08, "loss": 0.1552, "step": 12177 }, { "epoch": 0.97, "grad_norm": 1.3362612585591054, "learning_rate": 1.7391461312794012e-08, "loss": 0.1534, "step": 12178 }, { "epoch": 0.97, "grad_norm": 1.286995834326484, "learning_rate": 1.7283669729298847e-08, "loss": 0.1637, "step": 12179 }, { "epoch": 0.97, "grad_norm": 1.3039593072071103, "learning_rate": 1.7176212650648616e-08, "loss": 0.1426, "step": 12180 }, { "epoch": 0.97, "grad_norm": 1.3119264852066315, "learning_rate": 1.7069090084056437e-08, "loss": 0.1706, "step": 12181 }, { "epoch": 0.97, "grad_norm": 1.3018367517572076, "learning_rate": 1.6962302036713783e-08, "loss": 0.1521, "step": 12182 }, { "epoch": 0.97, "grad_norm": 1.3977147282360436, "learning_rate": 1.685584851578992e-08, "loss": 0.1855, "step": 12183 }, { "epoch": 0.97, "grad_norm": 1.461932026973442, "learning_rate": 1.6749729528431345e-08, "loss": 0.1452, "step": 12184 }, { "epoch": 0.97, "grad_norm": 1.4717675995866952, "learning_rate": 1.6643945081761813e-08, "loss": 0.1601, "step": 12185 }, { "epoch": 0.97, "grad_norm": 1.5521248266603107, "learning_rate": 1.6538495182883418e-08, "loss": 0.193, "step": 12186 }, { "epoch": 0.97, "grad_norm": 1.25264307954479, "learning_rate": 1.6433379838874942e-08, "loss": 0.1916, "step": 12187 }, { "epoch": 0.98, "grad_norm": 1.2547476828899171, "learning_rate": 1.6328599056792958e-08, "loss": 0.1354, "step": 12188 }, { "epoch": 0.98, "grad_norm": 1.4894967671136325, "learning_rate": 1.622415284367296e-08, "loss": 0.1788, "step": 12189 }, { "epoch": 0.98, "grad_norm": 1.341557243562213, "learning_rate": 1.6120041206524885e-08, "loss": 0.1619, "step": 12190 }, { "epoch": 0.98, "grad_norm": 1.2447323989861585, "learning_rate": 1.6016264152339813e-08, "loss": 0.1386, "step": 12191 }, { "epoch": 0.98, "grad_norm": 1.3762815605662289, "learning_rate": 1.5912821688083278e-08, "loss": 0.1193, "step": 12192 }, { "epoch": 0.98, "grad_norm": 1.5732982880052122, "learning_rate": 1.580971382070029e-08, "loss": 0.2075, "step": 12193 }, { "epoch": 0.98, "grad_norm": 1.5262706883971546, "learning_rate": 1.570694055711308e-08, "loss": 0.2018, "step": 12194 }, { "epoch": 0.98, "grad_norm": 1.3828249323637616, "learning_rate": 1.560450190422058e-08, "loss": 0.1328, "step": 12195 }, { "epoch": 0.98, "grad_norm": 1.2874134876775765, "learning_rate": 1.550239786889951e-08, "loss": 0.1758, "step": 12196 }, { "epoch": 0.98, "grad_norm": 24.55686252076405, "learning_rate": 1.5400628458005495e-08, "loss": 0.4715, "step": 12197 }, { "epoch": 0.98, "grad_norm": 1.4217719934969204, "learning_rate": 1.529919367836974e-08, "loss": 0.178, "step": 12198 }, { "epoch": 0.98, "grad_norm": 1.2759049850375173, "learning_rate": 1.519809353680235e-08, "loss": 0.1199, "step": 12199 }, { "epoch": 0.98, "grad_norm": 1.4830562542830823, "learning_rate": 1.509732804009012e-08, "loss": 0.1692, "step": 12200 }, { "epoch": 0.98, "grad_norm": 1.2995127586027977, "learning_rate": 1.499689719499764e-08, "loss": 0.1531, "step": 12201 }, { "epoch": 0.98, "grad_norm": 1.5185508529231253, "learning_rate": 1.4896801008267848e-08, "loss": 0.1656, "step": 12202 }, { "epoch": 0.98, "grad_norm": 1.2641075517430078, "learning_rate": 1.4797039486619814e-08, "loss": 0.1849, "step": 12203 }, { "epoch": 0.98, "grad_norm": 1.7144137044918875, "learning_rate": 1.4697612636751513e-08, "loss": 0.1885, "step": 12204 }, { "epoch": 0.98, "grad_norm": 1.3426685854295706, "learning_rate": 1.4598520465337051e-08, "loss": 0.1967, "step": 12205 }, { "epoch": 0.98, "grad_norm": 1.4591341307756867, "learning_rate": 1.4499762979028887e-08, "loss": 0.1809, "step": 12206 }, { "epoch": 0.98, "grad_norm": 1.3242376509794311, "learning_rate": 1.4401340184457268e-08, "loss": 0.152, "step": 12207 }, { "epoch": 0.98, "grad_norm": 1.3009264294852863, "learning_rate": 1.4303252088229136e-08, "loss": 0.1386, "step": 12208 }, { "epoch": 0.98, "grad_norm": 1.2450471758564503, "learning_rate": 1.4205498696930332e-08, "loss": 0.2055, "step": 12209 }, { "epoch": 0.98, "grad_norm": 1.4451286899028526, "learning_rate": 1.4108080017122272e-08, "loss": 0.146, "step": 12210 }, { "epoch": 0.98, "grad_norm": 13.892704659211985, "learning_rate": 1.4010996055345838e-08, "loss": 0.5407, "step": 12211 }, { "epoch": 0.98, "grad_norm": 1.352273748660829, "learning_rate": 1.3914246818118039e-08, "loss": 0.1449, "step": 12212 }, { "epoch": 0.98, "grad_norm": 7.2156737052469255, "learning_rate": 1.3817832311934232e-08, "loss": 0.6321, "step": 12213 }, { "epoch": 0.98, "grad_norm": 1.3658827378126372, "learning_rate": 1.3721752543266464e-08, "loss": 0.1639, "step": 12214 }, { "epoch": 0.98, "grad_norm": 1.1383944717417929, "learning_rate": 1.3626007518565686e-08, "loss": 0.13, "step": 12215 }, { "epoch": 0.98, "grad_norm": 1.4276798186314943, "learning_rate": 1.353059724425898e-08, "loss": 0.1775, "step": 12216 }, { "epoch": 0.98, "grad_norm": 1.2914492159618929, "learning_rate": 1.3435521726751777e-08, "loss": 0.1388, "step": 12217 }, { "epoch": 0.98, "grad_norm": 1.441087741275333, "learning_rate": 1.3340780972426192e-08, "loss": 0.2028, "step": 12218 }, { "epoch": 0.98, "grad_norm": 1.4539996835838316, "learning_rate": 1.324637498764325e-08, "loss": 0.1597, "step": 12219 }, { "epoch": 0.98, "grad_norm": 1.129070178719021, "learning_rate": 1.3152303778740661e-08, "loss": 0.1221, "step": 12220 }, { "epoch": 0.98, "grad_norm": 1.4354945699889285, "learning_rate": 1.3058567352033369e-08, "loss": 0.1842, "step": 12221 }, { "epoch": 0.98, "grad_norm": 1.2541763511749886, "learning_rate": 1.296516571381412e-08, "loss": 0.1579, "step": 12222 }, { "epoch": 0.98, "grad_norm": 1.4075754669774874, "learning_rate": 1.287209887035401e-08, "loss": 0.1585, "step": 12223 }, { "epoch": 0.98, "grad_norm": 1.4577081755652435, "learning_rate": 1.2779366827899708e-08, "loss": 0.1905, "step": 12224 }, { "epoch": 0.98, "grad_norm": 1.2628100249784184, "learning_rate": 1.268696959267679e-08, "loss": 0.1453, "step": 12225 }, { "epoch": 0.98, "grad_norm": 1.331820262349359, "learning_rate": 1.2594907170889181e-08, "loss": 0.1525, "step": 12226 }, { "epoch": 0.98, "grad_norm": 1.3350976274236408, "learning_rate": 1.2503179568716938e-08, "loss": 0.15, "step": 12227 }, { "epoch": 0.98, "grad_norm": 1.3573646794474215, "learning_rate": 1.241178679231736e-08, "loss": 0.1817, "step": 12228 }, { "epoch": 0.98, "grad_norm": 1.274578300393972, "learning_rate": 1.232072884782609e-08, "loss": 0.1283, "step": 12229 }, { "epoch": 0.98, "grad_norm": 10.316245290009325, "learning_rate": 1.2230005741356577e-08, "loss": 0.443, "step": 12230 }, { "epoch": 0.98, "grad_norm": 1.2483599025012198, "learning_rate": 1.21396174789995e-08, "loss": 0.1662, "step": 12231 }, { "epoch": 0.98, "grad_norm": 1.4160588071124862, "learning_rate": 1.2049564066822228e-08, "loss": 0.1945, "step": 12232 }, { "epoch": 0.98, "grad_norm": 1.5538771749297238, "learning_rate": 1.1959845510870483e-08, "loss": 0.185, "step": 12233 }, { "epoch": 0.98, "grad_norm": 1.5526792575936301, "learning_rate": 1.1870461817167222e-08, "loss": 0.1711, "step": 12234 }, { "epoch": 0.98, "grad_norm": 1.4968994634360262, "learning_rate": 1.1781412991713759e-08, "loss": 0.1873, "step": 12235 }, { "epoch": 0.98, "grad_norm": 6.92480709905564, "learning_rate": 1.1692699040487532e-08, "loss": 0.5893, "step": 12236 }, { "epoch": 0.98, "grad_norm": 1.4863686775880538, "learning_rate": 1.1604319969444334e-08, "loss": 0.2052, "step": 12237 }, { "epoch": 0.98, "grad_norm": 8.601728728951114, "learning_rate": 1.151627578451775e-08, "loss": 0.5932, "step": 12238 }, { "epoch": 0.98, "grad_norm": 1.3597971860915512, "learning_rate": 1.1428566491618053e-08, "loss": 0.2024, "step": 12239 }, { "epoch": 0.98, "grad_norm": 1.301203309283687, "learning_rate": 1.1341192096633313e-08, "loss": 0.1897, "step": 12240 }, { "epoch": 0.98, "grad_norm": 1.415794142201371, "learning_rate": 1.1254152605428836e-08, "loss": 0.1415, "step": 12241 }, { "epoch": 0.98, "grad_norm": 9.4361603980693, "learning_rate": 1.1167448023848837e-08, "loss": 0.4815, "step": 12242 }, { "epoch": 0.98, "grad_norm": 1.577964480547651, "learning_rate": 1.1081078357713659e-08, "loss": 0.1894, "step": 12243 }, { "epoch": 0.98, "grad_norm": 8.372290978759702, "learning_rate": 1.0995043612821443e-08, "loss": 0.5653, "step": 12244 }, { "epoch": 0.98, "grad_norm": 1.548587649850906, "learning_rate": 1.0909343794948124e-08, "loss": 0.1703, "step": 12245 }, { "epoch": 0.98, "grad_norm": 1.4707505482090233, "learning_rate": 1.0823978909846877e-08, "loss": 0.1678, "step": 12246 }, { "epoch": 0.98, "grad_norm": 1.4679607928467995, "learning_rate": 1.0738948963248674e-08, "loss": 0.2064, "step": 12247 }, { "epoch": 0.98, "grad_norm": 1.3945284839322962, "learning_rate": 1.0654253960861727e-08, "loss": 0.1581, "step": 12248 }, { "epoch": 0.98, "grad_norm": 1.3792259788850962, "learning_rate": 1.0569893908371487e-08, "loss": 0.1448, "step": 12249 }, { "epoch": 0.98, "grad_norm": 8.995862125893316, "learning_rate": 1.0485868811441757e-08, "loss": 0.6902, "step": 12250 }, { "epoch": 0.98, "grad_norm": 1.3767203136396222, "learning_rate": 1.0402178675713026e-08, "loss": 0.2001, "step": 12251 }, { "epoch": 0.98, "grad_norm": 1.2402256006405865, "learning_rate": 1.0318823506804687e-08, "loss": 0.1483, "step": 12252 }, { "epoch": 0.98, "grad_norm": 1.2703544834799045, "learning_rate": 1.0235803310311154e-08, "loss": 0.1698, "step": 12253 }, { "epoch": 0.98, "grad_norm": 1.4279018193731392, "learning_rate": 1.0153118091806857e-08, "loss": 0.1754, "step": 12254 }, { "epoch": 0.98, "grad_norm": 1.1595784870017645, "learning_rate": 1.00707678568418e-08, "loss": 0.1433, "step": 12255 }, { "epoch": 0.98, "grad_norm": 1.2705992922837306, "learning_rate": 9.98875261094545e-09, "loss": 0.1492, "step": 12256 }, { "epoch": 0.98, "grad_norm": 1.4978084942537777, "learning_rate": 9.907072359623404e-09, "loss": 0.1583, "step": 12257 }, { "epoch": 0.98, "grad_norm": 1.404079408824503, "learning_rate": 9.825727108359051e-09, "loss": 0.2114, "step": 12258 }, { "epoch": 0.98, "grad_norm": 1.3029560689472313, "learning_rate": 9.744716862613024e-09, "loss": 0.171, "step": 12259 }, { "epoch": 0.98, "grad_norm": 1.3693483338590915, "learning_rate": 9.66404162782375e-09, "loss": 0.1587, "step": 12260 }, { "epoch": 0.98, "grad_norm": 1.4032130563104992, "learning_rate": 9.583701409408009e-09, "loss": 0.1694, "step": 12261 }, { "epoch": 0.98, "grad_norm": 1.2531816839980177, "learning_rate": 9.503696212758706e-09, "loss": 0.1708, "step": 12262 }, { "epoch": 0.98, "grad_norm": 1.4984307093106302, "learning_rate": 9.424026043246548e-09, "loss": 0.1646, "step": 12263 }, { "epoch": 0.98, "grad_norm": 1.3965366120538856, "learning_rate": 9.344690906220588e-09, "loss": 0.1884, "step": 12264 }, { "epoch": 0.98, "grad_norm": 1.5029559073182108, "learning_rate": 9.265690807006566e-09, "loss": 0.1996, "step": 12265 }, { "epoch": 0.98, "grad_norm": 1.4743878636787504, "learning_rate": 9.187025750908018e-09, "loss": 0.188, "step": 12266 }, { "epoch": 0.98, "grad_norm": 1.29589460222466, "learning_rate": 9.10869574320572e-09, "loss": 0.1434, "step": 12267 }, { "epoch": 0.98, "grad_norm": 1.5041501804371316, "learning_rate": 9.030700789158798e-09, "loss": 0.1839, "step": 12268 }, { "epoch": 0.98, "grad_norm": 1.3793725258579925, "learning_rate": 8.953040894003062e-09, "loss": 0.1754, "step": 12269 }, { "epoch": 0.98, "grad_norm": 1.2571181271275564, "learning_rate": 8.875716062951566e-09, "loss": 0.1268, "step": 12270 }, { "epoch": 0.98, "grad_norm": 8.816754062772981, "learning_rate": 8.79872630119627e-09, "loss": 0.5238, "step": 12271 }, { "epoch": 0.98, "grad_norm": 17.58607576034883, "learning_rate": 8.722071613904704e-09, "loss": 0.6891, "step": 12272 }, { "epoch": 0.98, "grad_norm": 1.2452210552376264, "learning_rate": 8.645752006223861e-09, "loss": 0.1432, "step": 12273 }, { "epoch": 0.98, "grad_norm": 1.345283979362492, "learning_rate": 8.56976748327687e-09, "loss": 0.182, "step": 12274 }, { "epoch": 0.98, "grad_norm": 1.4479199907936136, "learning_rate": 8.494118050164646e-09, "loss": 0.1912, "step": 12275 }, { "epoch": 0.98, "grad_norm": 1.2887791332407421, "learning_rate": 8.418803711965907e-09, "loss": 0.1418, "step": 12276 }, { "epoch": 0.98, "grad_norm": 1.2822601742930948, "learning_rate": 8.343824473736606e-09, "loss": 0.1732, "step": 12277 }, { "epoch": 0.98, "grad_norm": 1.2564170035108555, "learning_rate": 8.269180340510496e-09, "loss": 0.1869, "step": 12278 }, { "epoch": 0.98, "grad_norm": 7.9862912713262535, "learning_rate": 8.194871317299125e-09, "loss": 0.8268, "step": 12279 }, { "epoch": 0.98, "grad_norm": 1.2768001077003917, "learning_rate": 8.120897409090166e-09, "loss": 0.1743, "step": 12280 }, { "epoch": 0.98, "grad_norm": 9.810197117912155, "learning_rate": 8.047258620850206e-09, "loss": 0.5019, "step": 12281 }, { "epoch": 0.98, "grad_norm": 6.990512480880503, "learning_rate": 7.973954957523068e-09, "loss": 0.5697, "step": 12282 }, { "epoch": 0.98, "grad_norm": 1.4695344571953228, "learning_rate": 7.900986424029256e-09, "loss": 0.1927, "step": 12283 }, { "epoch": 0.98, "grad_norm": 1.2664276407804347, "learning_rate": 7.828353025268188e-09, "loss": 0.1424, "step": 12284 }, { "epoch": 0.98, "grad_norm": 7.331401252710619, "learning_rate": 7.756054766114852e-09, "loss": 0.5963, "step": 12285 }, { "epoch": 0.98, "grad_norm": 1.4112852802567288, "learning_rate": 7.684091651423697e-09, "loss": 0.165, "step": 12286 }, { "epoch": 0.98, "grad_norm": 1.3731390589457115, "learning_rate": 7.612463686025306e-09, "loss": 0.1452, "step": 12287 }, { "epoch": 0.98, "grad_norm": 1.1929103023388103, "learning_rate": 7.541170874729165e-09, "loss": 0.1343, "step": 12288 }, { "epoch": 0.98, "grad_norm": 1.4233021652881748, "learning_rate": 7.470213222320332e-09, "loss": 0.1595, "step": 12289 }, { "epoch": 0.98, "grad_norm": 1.225446325597461, "learning_rate": 7.399590733562778e-09, "loss": 0.1362, "step": 12290 }, { "epoch": 0.98, "grad_norm": 1.2730192885612466, "learning_rate": 7.329303413198263e-09, "loss": 0.1465, "step": 12291 }, { "epoch": 0.98, "grad_norm": 8.017879972028917, "learning_rate": 7.259351265944126e-09, "loss": 0.7116, "step": 12292 }, { "epoch": 0.98, "grad_norm": 1.2975253074124933, "learning_rate": 7.189734296497719e-09, "loss": 0.117, "step": 12293 }, { "epoch": 0.98, "grad_norm": 1.2638526734027022, "learning_rate": 7.120452509531417e-09, "loss": 0.1541, "step": 12294 }, { "epoch": 0.98, "grad_norm": 1.4948836129436398, "learning_rate": 7.051505909697609e-09, "loss": 0.212, "step": 12295 }, { "epoch": 0.98, "grad_norm": 1.1712536099176125, "learning_rate": 6.982894501623705e-09, "loss": 0.1612, "step": 12296 }, { "epoch": 0.98, "grad_norm": 1.3713071960391352, "learning_rate": 6.914618289916575e-09, "loss": 0.1712, "step": 12297 }, { "epoch": 0.98, "grad_norm": 6.879105410249399, "learning_rate": 6.846677279159775e-09, "loss": 0.709, "step": 12298 }, { "epoch": 0.98, "grad_norm": 1.429659359760385, "learning_rate": 6.7790714739141e-09, "loss": 0.1861, "step": 12299 }, { "epoch": 0.98, "grad_norm": 1.3749823825771499, "learning_rate": 6.711800878718144e-09, "loss": 0.196, "step": 12300 }, { "epoch": 0.98, "grad_norm": 1.314832207426685, "learning_rate": 6.644865498087738e-09, "loss": 0.1619, "step": 12301 }, { "epoch": 0.98, "grad_norm": 1.5540704623641748, "learning_rate": 6.578265336517065e-09, "loss": 0.1755, "step": 12302 }, { "epoch": 0.98, "grad_norm": 1.5245910919725998, "learning_rate": 6.512000398476992e-09, "loss": 0.1824, "step": 12303 }, { "epoch": 0.98, "grad_norm": 1.257500997300339, "learning_rate": 6.446070688415629e-09, "loss": 0.1668, "step": 12304 }, { "epoch": 0.98, "grad_norm": 1.3401438360576166, "learning_rate": 6.38047621075999e-09, "loss": 0.1645, "step": 12305 }, { "epoch": 0.98, "grad_norm": 21.119012663067124, "learning_rate": 6.315216969912663e-09, "loss": 0.5201, "step": 12306 }, { "epoch": 0.98, "grad_norm": 1.448409313037296, "learning_rate": 6.250292970255145e-09, "loss": 0.1748, "step": 12307 }, { "epoch": 0.98, "grad_norm": 1.3553161486965755, "learning_rate": 6.185704216145616e-09, "loss": 0.1431, "step": 12308 }, { "epoch": 0.98, "grad_norm": 6.678348894200219, "learning_rate": 6.121450711921162e-09, "loss": 0.4574, "step": 12309 }, { "epoch": 0.98, "grad_norm": 1.4669329433026022, "learning_rate": 6.057532461893889e-09, "loss": 0.191, "step": 12310 }, { "epoch": 0.98, "grad_norm": 1.393941975253871, "learning_rate": 5.99394947035592e-09, "loss": 0.188, "step": 12311 }, { "epoch": 0.98, "grad_norm": 1.5439729910392685, "learning_rate": 5.9307017415749516e-09, "loss": 0.1863, "step": 12312 }, { "epoch": 0.99, "grad_norm": 1.50180898577787, "learning_rate": 5.8677892797975866e-09, "loss": 0.1875, "step": 12313 }, { "epoch": 0.99, "grad_norm": 1.5607833765045886, "learning_rate": 5.805212089247114e-09, "loss": 0.1987, "step": 12314 }, { "epoch": 0.99, "grad_norm": 1.352077229514529, "learning_rate": 5.742970174124618e-09, "loss": 0.1663, "step": 12315 }, { "epoch": 0.99, "grad_norm": 1.199383473404824, "learning_rate": 5.681063538608422e-09, "loss": 0.1519, "step": 12316 }, { "epoch": 0.99, "grad_norm": 1.2819460329864152, "learning_rate": 5.619492186855202e-09, "loss": 0.161, "step": 12317 }, { "epoch": 0.99, "grad_norm": 1.264466058052309, "learning_rate": 5.558256122997207e-09, "loss": 0.1576, "step": 12318 }, { "epoch": 0.99, "grad_norm": 1.4920802902798858, "learning_rate": 5.49735535114615e-09, "loss": 0.2044, "step": 12319 }, { "epoch": 0.99, "grad_norm": 1.3614897920733158, "learning_rate": 5.436789875390425e-09, "loss": 0.1687, "step": 12320 }, { "epoch": 0.99, "grad_norm": 1.296904684012285, "learning_rate": 5.376559699796224e-09, "loss": 0.1722, "step": 12321 }, { "epoch": 0.99, "grad_norm": 1.3481339893970439, "learning_rate": 5.316664828406426e-09, "loss": 0.1473, "step": 12322 }, { "epoch": 0.99, "grad_norm": 1.576515547067488, "learning_rate": 5.257105265241702e-09, "loss": 0.1966, "step": 12323 }, { "epoch": 0.99, "grad_norm": 19.66069683916925, "learning_rate": 5.197881014301631e-09, "loss": 0.4991, "step": 12324 }, { "epoch": 0.99, "grad_norm": 1.4723995186236816, "learning_rate": 5.138992079561367e-09, "loss": 0.2246, "step": 12325 }, { "epoch": 0.99, "grad_norm": 1.5279680531601956, "learning_rate": 5.080438464974413e-09, "loss": 0.2005, "step": 12326 }, { "epoch": 0.99, "grad_norm": 1.3006601194454075, "learning_rate": 5.022220174471515e-09, "loss": 0.1537, "step": 12327 }, { "epoch": 0.99, "grad_norm": 1.618676123596008, "learning_rate": 4.964337211960657e-09, "loss": 0.1419, "step": 12328 }, { "epoch": 0.99, "grad_norm": 1.427657177786239, "learning_rate": 4.90678958132873e-09, "loss": 0.1752, "step": 12329 }, { "epoch": 0.99, "grad_norm": 1.391970293093847, "learning_rate": 4.849577286438756e-09, "loss": 0.1562, "step": 12330 }, { "epoch": 0.99, "grad_norm": 1.726878325539933, "learning_rate": 4.792700331130995e-09, "loss": 0.1889, "step": 12331 }, { "epoch": 0.99, "grad_norm": 1.3897014568421922, "learning_rate": 4.73615871922406e-09, "loss": 0.1501, "step": 12332 }, { "epoch": 0.99, "grad_norm": 1.4757281658750148, "learning_rate": 4.679952454514358e-09, "loss": 0.187, "step": 12333 }, { "epoch": 0.99, "grad_norm": 1.5806145387314678, "learning_rate": 4.6240815407744274e-09, "loss": 0.185, "step": 12334 }, { "epoch": 0.99, "grad_norm": 1.3732692878523967, "learning_rate": 4.568545981755157e-09, "loss": 0.1801, "step": 12335 }, { "epoch": 0.99, "grad_norm": 1.4647804333632826, "learning_rate": 4.513345781184675e-09, "loss": 0.1715, "step": 12336 }, { "epoch": 0.99, "grad_norm": 1.3065198763656092, "learning_rate": 4.458480942769461e-09, "loss": 0.1267, "step": 12337 }, { "epoch": 0.99, "grad_norm": 1.498248120338687, "learning_rate": 4.403951470192125e-09, "loss": 0.1445, "step": 12338 }, { "epoch": 0.99, "grad_norm": 1.2871319985551446, "learning_rate": 4.349757367113627e-09, "loss": 0.1441, "step": 12339 }, { "epoch": 0.99, "grad_norm": 1.4926997197484004, "learning_rate": 4.295898637172169e-09, "loss": 0.2172, "step": 12340 }, { "epoch": 0.99, "grad_norm": 7.54686938084507, "learning_rate": 4.242375283983191e-09, "loss": 0.5971, "step": 12341 }, { "epoch": 0.99, "grad_norm": 1.560666477147416, "learning_rate": 4.1891873111404855e-09, "loss": 0.2095, "step": 12342 }, { "epoch": 0.99, "grad_norm": 1.467135754940844, "learning_rate": 4.136334722213975e-09, "loss": 0.1857, "step": 12343 }, { "epoch": 0.99, "grad_norm": 1.4942887830872245, "learning_rate": 4.083817520752487e-09, "loss": 0.1969, "step": 12344 }, { "epoch": 0.99, "grad_norm": 1.2894693195372344, "learning_rate": 4.031635710281534e-09, "loss": 0.1976, "step": 12345 }, { "epoch": 0.99, "grad_norm": 1.4526431439360832, "learning_rate": 3.979789294303315e-09, "loss": 0.1514, "step": 12346 }, { "epoch": 0.99, "grad_norm": 1.3223293873940583, "learning_rate": 3.928278276300046e-09, "loss": 0.1666, "step": 12347 }, { "epoch": 0.99, "grad_norm": 1.2099540498933572, "learning_rate": 3.877102659728404e-09, "loss": 0.1364, "step": 12348 }, { "epoch": 0.99, "grad_norm": 1.3267124091172107, "learning_rate": 3.826262448024531e-09, "loss": 0.13, "step": 12349 }, { "epoch": 0.99, "grad_norm": 1.4496510481714342, "learning_rate": 3.775757644601808e-09, "loss": 0.173, "step": 12350 }, { "epoch": 0.99, "grad_norm": 1.4600101639645677, "learning_rate": 3.725588252849743e-09, "loss": 0.1782, "step": 12351 }, { "epoch": 0.99, "grad_norm": 1.5136698008893839, "learning_rate": 3.6757542761378663e-09, "loss": 0.2242, "step": 12352 }, { "epoch": 0.99, "grad_norm": 1.346901645513817, "learning_rate": 3.6262557178101678e-09, "loss": 0.1741, "step": 12353 }, { "epoch": 0.99, "grad_norm": 1.2687115238432485, "learning_rate": 3.5770925811906554e-09, "loss": 0.1489, "step": 12354 }, { "epoch": 0.99, "grad_norm": 1.4988878498216378, "learning_rate": 3.5282648695794675e-09, "loss": 0.1726, "step": 12355 }, { "epoch": 0.99, "grad_norm": 1.4155570823141062, "learning_rate": 3.4797725862545374e-09, "loss": 0.1684, "step": 12356 }, { "epoch": 0.99, "grad_norm": 1.3345094877212889, "learning_rate": 3.431615734471594e-09, "loss": 0.1596, "step": 12357 }, { "epoch": 0.99, "grad_norm": 1.5817264727196219, "learning_rate": 3.3837943174630517e-09, "loss": 0.1787, "step": 12358 }, { "epoch": 0.99, "grad_norm": 1.4127084729906447, "learning_rate": 3.3363083384396756e-09, "loss": 0.1847, "step": 12359 }, { "epoch": 0.99, "grad_norm": 1.1734884922316862, "learning_rate": 3.2891578005889158e-09, "loss": 0.15, "step": 12360 }, { "epoch": 0.99, "grad_norm": 11.560635991850749, "learning_rate": 3.2423427070765734e-09, "loss": 0.553, "step": 12361 }, { "epoch": 0.99, "grad_norm": 1.3816335815303793, "learning_rate": 3.1958630610451346e-09, "loss": 0.1853, "step": 12362 }, { "epoch": 0.99, "grad_norm": 1.6024541699705677, "learning_rate": 3.1497188656154364e-09, "loss": 0.2055, "step": 12363 }, { "epoch": 0.99, "grad_norm": 5.502855844873582, "learning_rate": 3.103910123885001e-09, "loss": 0.4776, "step": 12364 }, { "epoch": 0.99, "grad_norm": 1.140915778075582, "learning_rate": 3.0584368389291465e-09, "loss": 0.1278, "step": 12365 }, { "epoch": 0.99, "grad_norm": 1.3163209347215794, "learning_rate": 3.0132990138004303e-09, "loss": 0.1317, "step": 12366 }, { "epoch": 0.99, "grad_norm": 7.153940413798579, "learning_rate": 2.9684966515286517e-09, "loss": 0.5226, "step": 12367 }, { "epoch": 0.99, "grad_norm": 1.3417864872004317, "learning_rate": 2.924029755122515e-09, "loss": 0.1588, "step": 12368 }, { "epoch": 0.99, "grad_norm": 1.3600769353574524, "learning_rate": 2.8798983275674096e-09, "loss": 0.1533, "step": 12369 }, { "epoch": 0.99, "grad_norm": 1.3940917293233759, "learning_rate": 2.8361023718248557e-09, "loss": 0.1603, "step": 12370 }, { "epoch": 0.99, "grad_norm": 1.4202603775114153, "learning_rate": 2.7926418908352795e-09, "loss": 0.196, "step": 12371 }, { "epoch": 0.99, "grad_norm": 10.462933731978273, "learning_rate": 2.749516887516901e-09, "loss": 0.587, "step": 12372 }, { "epoch": 0.99, "grad_norm": 1.2236738063619397, "learning_rate": 2.7067273647646274e-09, "loss": 0.1773, "step": 12373 }, { "epoch": 0.99, "grad_norm": 1.4349459706290002, "learning_rate": 2.664273325450606e-09, "loss": 0.1823, "step": 12374 }, { "epoch": 0.99, "grad_norm": 1.3487796835289365, "learning_rate": 2.6221547724253337e-09, "loss": 0.2131, "step": 12375 }, { "epoch": 0.99, "grad_norm": 1.1979761253788213, "learning_rate": 2.580371708516549e-09, "loss": 0.1128, "step": 12376 }, { "epoch": 0.99, "grad_norm": 1.5371049958341534, "learning_rate": 2.5389241365286753e-09, "loss": 0.1851, "step": 12377 }, { "epoch": 0.99, "grad_norm": 1.4489277692090052, "learning_rate": 2.4978120592450415e-09, "loss": 0.1966, "step": 12378 }, { "epoch": 0.99, "grad_norm": 8.68366510113351, "learning_rate": 2.457035479424552e-09, "loss": 0.6126, "step": 12379 }, { "epoch": 0.99, "grad_norm": 1.3466698401367339, "learning_rate": 2.4165943998050166e-09, "loss": 0.1715, "step": 12380 }, { "epoch": 0.99, "grad_norm": 1.610756926846695, "learning_rate": 2.376488823102041e-09, "loss": 0.2028, "step": 12381 }, { "epoch": 0.99, "grad_norm": 1.4663812471311686, "learning_rate": 2.3367187520079158e-09, "loss": 0.1654, "step": 12382 }, { "epoch": 0.99, "grad_norm": 1.6868530767921344, "learning_rate": 2.2972841891916175e-09, "loss": 0.193, "step": 12383 }, { "epoch": 0.99, "grad_norm": 1.4160130622822396, "learning_rate": 2.258185137301028e-09, "loss": 0.1856, "step": 12384 }, { "epoch": 0.99, "grad_norm": 1.2848517940312105, "learning_rate": 2.219421598961269e-09, "loss": 0.1735, "step": 12385 }, { "epoch": 0.99, "grad_norm": 1.5012106866844217, "learning_rate": 2.1809935767741484e-09, "loss": 0.2074, "step": 12386 }, { "epoch": 0.99, "grad_norm": 1.589497274494362, "learning_rate": 2.1429010733203804e-09, "loss": 0.1916, "step": 12387 }, { "epoch": 0.99, "grad_norm": 1.1476062738983304, "learning_rate": 2.1051440911556973e-09, "loss": 0.1436, "step": 12388 }, { "epoch": 0.99, "grad_norm": 1.3746764929364983, "learning_rate": 2.0677226328164047e-09, "loss": 0.1958, "step": 12389 }, { "epoch": 0.99, "grad_norm": 1.2920370035860693, "learning_rate": 2.0306367008138263e-09, "loss": 0.1615, "step": 12390 }, { "epoch": 0.99, "grad_norm": 10.757896195405598, "learning_rate": 1.993886297637637e-09, "loss": 0.5433, "step": 12391 }, { "epoch": 0.99, "grad_norm": 1.4190559361498127, "learning_rate": 1.9574714257553084e-09, "loss": 0.185, "step": 12392 }, { "epoch": 0.99, "grad_norm": 1.3618281376289567, "learning_rate": 1.921392087611551e-09, "loss": 0.1626, "step": 12393 }, { "epoch": 0.99, "grad_norm": 1.459251675745753, "learning_rate": 1.8856482856277615e-09, "loss": 0.164, "step": 12394 }, { "epoch": 0.99, "grad_norm": 1.2817898197403625, "learning_rate": 1.8502400222047967e-09, "loss": 0.1691, "step": 12395 }, { "epoch": 0.99, "grad_norm": 1.8164649396142765, "learning_rate": 1.8151672997185342e-09, "loss": 0.1875, "step": 12396 }, { "epoch": 0.99, "grad_norm": 1.6218945840895487, "learning_rate": 1.7804301205243125e-09, "loss": 0.2111, "step": 12397 }, { "epoch": 0.99, "grad_norm": 1.3480529726296482, "learning_rate": 1.7460284869535992e-09, "loss": 0.1438, "step": 12398 }, { "epoch": 0.99, "grad_norm": 1.368658752033586, "learning_rate": 1.711962401316214e-09, "loss": 0.2167, "step": 12399 }, { "epoch": 0.99, "grad_norm": 1.348689541208689, "learning_rate": 1.6782318658992159e-09, "loss": 0.1454, "step": 12400 }, { "epoch": 0.99, "grad_norm": 1.3600608221739365, "learning_rate": 1.6448368829663497e-09, "loss": 0.1789, "step": 12401 }, { "epoch": 0.99, "grad_norm": 1.3864266789405137, "learning_rate": 1.611777454760266e-09, "loss": 0.1777, "step": 12402 }, { "epoch": 0.99, "grad_norm": 1.4377286808726921, "learning_rate": 1.5790535835003006e-09, "loss": 0.1721, "step": 12403 }, { "epoch": 0.99, "grad_norm": 1.4011933271877726, "learning_rate": 1.5466652713824748e-09, "loss": 0.1601, "step": 12404 }, { "epoch": 0.99, "grad_norm": 1.4537048304126532, "learning_rate": 1.5146125205822703e-09, "loss": 0.1769, "step": 12405 }, { "epoch": 0.99, "grad_norm": 1.2751143686417106, "learning_rate": 1.4828953332507445e-09, "loss": 0.118, "step": 12406 }, { "epoch": 0.99, "grad_norm": 7.360199711863573, "learning_rate": 1.4515137115173051e-09, "loss": 0.5382, "step": 12407 }, { "epoch": 0.99, "grad_norm": 1.4751042143119324, "learning_rate": 1.4204676574886001e-09, "loss": 0.231, "step": 12408 }, { "epoch": 0.99, "grad_norm": 1.4660952568821204, "learning_rate": 1.3897571732496284e-09, "loss": 0.165, "step": 12409 }, { "epoch": 0.99, "grad_norm": 1.5423802821682449, "learning_rate": 1.3593822608609642e-09, "loss": 0.2109, "step": 12410 }, { "epoch": 0.99, "grad_norm": 1.3128584140898922, "learning_rate": 1.3293429223620869e-09, "loss": 0.1765, "step": 12411 }, { "epoch": 0.99, "grad_norm": 10.11882325642994, "learning_rate": 1.2996391597697167e-09, "loss": 0.5815, "step": 12412 }, { "epoch": 0.99, "grad_norm": 1.3228958382020615, "learning_rate": 1.2702709750783693e-09, "loss": 0.1612, "step": 12413 }, { "epoch": 0.99, "grad_norm": 1.3176762011998675, "learning_rate": 1.2412383702592456e-09, "loss": 0.1538, "step": 12414 }, { "epoch": 0.99, "grad_norm": 1.473617421344286, "learning_rate": 1.2125413472613424e-09, "loss": 0.1866, "step": 12415 }, { "epoch": 0.99, "grad_norm": 1.2728618120494672, "learning_rate": 1.1841799080114513e-09, "loss": 0.2002, "step": 12416 }, { "epoch": 0.99, "grad_norm": 1.5430915116808106, "learning_rate": 1.1561540544130502e-09, "loss": 0.1467, "step": 12417 }, { "epoch": 0.99, "grad_norm": 1.450949560863337, "learning_rate": 1.1284637883485217e-09, "loss": 0.1547, "step": 12418 }, { "epoch": 0.99, "grad_norm": 1.5430801091108468, "learning_rate": 1.1011091116758244e-09, "loss": 0.1603, "step": 12419 }, { "epoch": 0.99, "grad_norm": 1.6751472628821946, "learning_rate": 1.074090026231267e-09, "loss": 0.1708, "step": 12420 }, { "epoch": 0.99, "grad_norm": 1.303974764425101, "learning_rate": 1.0474065338300643e-09, "loss": 0.1727, "step": 12421 }, { "epoch": 0.99, "grad_norm": 1.278845192915408, "learning_rate": 1.0210586362618957e-09, "loss": 0.1472, "step": 12422 }, { "epoch": 0.99, "grad_norm": 1.3259345134072038, "learning_rate": 9.95046335297012e-10, "loss": 0.1558, "step": 12423 }, { "epoch": 0.99, "grad_norm": 1.3629431828669294, "learning_rate": 9.693696326806834e-10, "loss": 0.1625, "step": 12424 }, { "epoch": 0.99, "grad_norm": 1.448764721383848, "learning_rate": 9.440285301370865e-10, "loss": 0.1952, "step": 12425 }, { "epoch": 0.99, "grad_norm": 1.2563578990533695, "learning_rate": 9.190230293670832e-10, "loss": 0.1517, "step": 12426 }, { "epoch": 0.99, "grad_norm": 1.3348301019367887, "learning_rate": 8.943531320504406e-10, "loss": 0.1427, "step": 12427 }, { "epoch": 0.99, "grad_norm": 1.221874588182704, "learning_rate": 8.700188398419462e-10, "loss": 0.1711, "step": 12428 }, { "epoch": 0.99, "grad_norm": 1.2782950854156685, "learning_rate": 8.460201543758484e-10, "loss": 0.1559, "step": 12429 }, { "epoch": 0.99, "grad_norm": 1.4578008185900555, "learning_rate": 8.223570772636358e-10, "loss": 0.1712, "step": 12430 }, { "epoch": 0.99, "grad_norm": 1.54978810808045, "learning_rate": 7.990296100929273e-10, "loss": 0.2191, "step": 12431 }, { "epoch": 0.99, "grad_norm": 1.4407032747913722, "learning_rate": 7.760377544308028e-10, "loss": 0.1854, "step": 12432 }, { "epoch": 0.99, "grad_norm": 1.39391860974014, "learning_rate": 7.533815118204724e-10, "loss": 0.1841, "step": 12433 }, { "epoch": 0.99, "grad_norm": 1.4293782580132575, "learning_rate": 7.310608837823863e-10, "loss": 0.164, "step": 12434 }, { "epoch": 0.99, "grad_norm": 1.4080986413261523, "learning_rate": 7.090758718153457e-10, "loss": 0.2149, "step": 12435 }, { "epoch": 0.99, "grad_norm": 1.4417525270832874, "learning_rate": 6.87426477395392e-10, "loss": 0.186, "step": 12436 }, { "epoch": 0.99, "grad_norm": 1.3546528250782155, "learning_rate": 6.661127019758074e-10, "loss": 0.1715, "step": 12437 }, { "epoch": 1.0, "grad_norm": 1.3576999772131655, "learning_rate": 6.451345469876691e-10, "loss": 0.1437, "step": 12438 }, { "epoch": 1.0, "grad_norm": 10.346874579005064, "learning_rate": 6.2449201383874e-10, "loss": 0.5354, "step": 12439 }, { "epoch": 1.0, "grad_norm": 7.844415334165052, "learning_rate": 6.041851039151337e-10, "loss": 0.5341, "step": 12440 }, { "epoch": 1.0, "grad_norm": 1.340737348657837, "learning_rate": 5.842138185807589e-10, "loss": 0.185, "step": 12441 }, { "epoch": 1.0, "grad_norm": 1.5325089484802543, "learning_rate": 5.645781591750998e-10, "loss": 0.1831, "step": 12442 }, { "epoch": 1.0, "grad_norm": 1.4041455854715992, "learning_rate": 5.452781270171015e-10, "loss": 0.1762, "step": 12443 }, { "epoch": 1.0, "grad_norm": 1.278495733628024, "learning_rate": 5.263137234023941e-10, "loss": 0.1619, "step": 12444 }, { "epoch": 1.0, "grad_norm": 1.5047500870787491, "learning_rate": 5.076849496044034e-10, "loss": 0.175, "step": 12445 }, { "epoch": 1.0, "grad_norm": 9.757268320782321, "learning_rate": 4.893918068726855e-10, "loss": 0.5709, "step": 12446 }, { "epoch": 1.0, "grad_norm": 1.2482668570882471, "learning_rate": 4.714342964368124e-10, "loss": 0.1614, "step": 12447 }, { "epoch": 1.0, "grad_norm": 1.2113480552390334, "learning_rate": 4.538124195008209e-10, "loss": 0.1384, "step": 12448 }, { "epoch": 1.0, "grad_norm": 1.3194012389859553, "learning_rate": 4.3652617724876387e-10, "loss": 0.1433, "step": 12449 }, { "epoch": 1.0, "grad_norm": 1.2482354084598875, "learning_rate": 4.1957557084082447e-10, "loss": 0.153, "step": 12450 }, { "epoch": 1.0, "grad_norm": 1.500151594637948, "learning_rate": 4.0296060141498116e-10, "loss": 0.208, "step": 12451 }, { "epoch": 1.0, "grad_norm": 7.904553600045467, "learning_rate": 3.86681270085898e-10, "loss": 0.5328, "step": 12452 }, { "epoch": 1.0, "grad_norm": 1.4399258407950049, "learning_rate": 3.707375779476996e-10, "loss": 0.1371, "step": 12453 }, { "epoch": 1.0, "grad_norm": 1.2919240224978263, "learning_rate": 3.5512952607008597e-10, "loss": 0.1548, "step": 12454 }, { "epoch": 1.0, "grad_norm": 1.3644810223640418, "learning_rate": 3.398571155011077e-10, "loss": 0.1568, "step": 12455 }, { "epoch": 1.0, "grad_norm": 1.5294373605825142, "learning_rate": 3.249203472655005e-10, "loss": 0.1793, "step": 12456 }, { "epoch": 1.0, "grad_norm": 1.4704870900241869, "learning_rate": 3.1031922236690604e-10, "loss": 0.1279, "step": 12457 }, { "epoch": 1.0, "grad_norm": 1.2845473309482875, "learning_rate": 2.9605374178454104e-10, "loss": 0.1521, "step": 12458 }, { "epoch": 1.0, "grad_norm": 1.3298594368596983, "learning_rate": 2.821239064770831e-10, "loss": 0.2038, "step": 12459 }, { "epoch": 1.0, "grad_norm": 1.502680772724333, "learning_rate": 2.6852971737878483e-10, "loss": 0.1841, "step": 12460 }, { "epoch": 1.0, "grad_norm": 1.3207531587764756, "learning_rate": 2.5527117540280475e-10, "loss": 0.1786, "step": 12461 }, { "epoch": 1.0, "grad_norm": 1.3703484223685662, "learning_rate": 2.423482814389866e-10, "loss": 0.1898, "step": 12462 }, { "epoch": 1.0, "grad_norm": 1.4995343979937201, "learning_rate": 2.297610363549696e-10, "loss": 0.1911, "step": 12463 }, { "epoch": 1.0, "grad_norm": 5.5257805567300124, "learning_rate": 2.1750944099618865e-10, "loss": 0.5361, "step": 12464 }, { "epoch": 1.0, "grad_norm": 1.4570660963391613, "learning_rate": 2.0559349618420877e-10, "loss": 0.1944, "step": 12465 }, { "epoch": 1.0, "grad_norm": 7.038698655981214, "learning_rate": 1.9401320272005585e-10, "loss": 0.5719, "step": 12466 }, { "epoch": 1.0, "grad_norm": 1.3652835620030952, "learning_rate": 1.8276856138033095e-10, "loss": 0.2062, "step": 12467 }, { "epoch": 1.0, "grad_norm": 1.298275905013123, "learning_rate": 1.7185957291998568e-10, "loss": 0.1709, "step": 12468 }, { "epoch": 1.0, "grad_norm": 6.676511801464607, "learning_rate": 1.6128623807176726e-10, "loss": 0.5762, "step": 12469 }, { "epoch": 1.0, "grad_norm": 12.137816656550177, "learning_rate": 1.5104855754566329e-10, "loss": 0.5806, "step": 12470 }, { "epoch": 1.0, "grad_norm": 1.4239731564966485, "learning_rate": 1.4114653202834672e-10, "loss": 0.1686, "step": 12471 }, { "epoch": 1.0, "grad_norm": 1.3775099348967232, "learning_rate": 1.3158016218428605e-10, "loss": 0.176, "step": 12472 }, { "epoch": 1.0, "grad_norm": 1.388389116201817, "learning_rate": 1.223494486568555e-10, "loss": 0.186, "step": 12473 }, { "epoch": 1.0, "grad_norm": 1.277211543108001, "learning_rate": 1.1345439206500441e-10, "loss": 0.1286, "step": 12474 }, { "epoch": 1.0, "grad_norm": 1.2543232351115798, "learning_rate": 1.0489499300603279e-10, "loss": 0.1465, "step": 12475 }, { "epoch": 1.0, "grad_norm": 1.6695659682758446, "learning_rate": 9.667125205448102e-11, "loss": 0.1687, "step": 12476 }, { "epoch": 1.0, "grad_norm": 1.4460695622520532, "learning_rate": 8.878316976268508e-11, "loss": 0.1777, "step": 12477 }, { "epoch": 1.0, "grad_norm": 1.4673696087842043, "learning_rate": 8.123074665966624e-11, "loss": 0.1799, "step": 12478 }, { "epoch": 1.0, "grad_norm": 1.3636871146700074, "learning_rate": 7.401398325335152e-11, "loss": 0.1799, "step": 12479 }, { "epoch": 1.0, "grad_norm": 1.297010273055021, "learning_rate": 6.713288002724305e-11, "loss": 0.1489, "step": 12480 }, { "epoch": 1.0, "grad_norm": 1.3352641986509013, "learning_rate": 6.058743744374874e-11, "loss": 0.1751, "step": 12481 }, { "epoch": 1.0, "grad_norm": 5.034262881864984, "learning_rate": 5.437765594196176e-11, "loss": 0.3937, "step": 12482 }, { "epoch": 1.0, "grad_norm": 1.4119534379433494, "learning_rate": 4.8503535939326e-11, "loss": 0.1569, "step": 12483 }, { "epoch": 1.0, "grad_norm": 1.6655655272604932, "learning_rate": 4.296507782997061e-11, "loss": 0.1851, "step": 12484 }, { "epoch": 1.0, "grad_norm": 1.4135804673982388, "learning_rate": 3.776228198526521e-11, "loss": 0.1764, "step": 12485 }, { "epoch": 1.0, "grad_norm": 8.326214058789395, "learning_rate": 3.289514875548516e-11, "loss": 0.5402, "step": 12486 }, { "epoch": 1.0, "grad_norm": 1.3324158441519371, "learning_rate": 2.8363678465925804e-11, "loss": 0.1696, "step": 12487 }, { "epoch": 1.0, "grad_norm": 1.3566381054246832, "learning_rate": 2.416787142245358e-11, "loss": 0.182, "step": 12488 }, { "epoch": 1.0, "grad_norm": 1.4884231772623906, "learning_rate": 2.030772790539981e-11, "loss": 0.1793, "step": 12489 }, { "epoch": 1.0, "grad_norm": 1.277548275918522, "learning_rate": 1.6783248174556677e-11, "loss": 0.1348, "step": 12490 }, { "epoch": 1.0, "grad_norm": 1.310317184661321, "learning_rate": 1.3594432466401686e-11, "loss": 0.1589, "step": 12491 }, { "epoch": 1.0, "grad_norm": 1.4874888057435114, "learning_rate": 1.074128099520788e-11, "loss": 0.1793, "step": 12492 }, { "epoch": 1.0, "grad_norm": 1.3331354664989092, "learning_rate": 8.223793951933623e-12, "loss": 0.1697, "step": 12493 }, { "epoch": 1.0, "grad_norm": 1.1352728993213603, "learning_rate": 6.0419715058879225e-12, "loss": 0.1307, "step": 12494 }, { "epoch": 1.0, "grad_norm": 1.3584496311398142, "learning_rate": 4.19581380417533e-12, "loss": 0.1526, "step": 12495 }, { "epoch": 1.0, "grad_norm": 1.5578867726168715, "learning_rate": 2.6853209700306025e-12, "loss": 0.1933, "step": 12496 }, { "epoch": 1.0, "grad_norm": 1.2706422910499398, "learning_rate": 1.5104931044840344e-12, "loss": 0.1463, "step": 12497 }, { "epoch": 1.0, "grad_norm": 1.1398165796291209, "learning_rate": 6.713302874716832e-13, "loss": 0.1258, "step": 12498 }, { "epoch": 1.0, "grad_norm": 1.3057438696186767, "learning_rate": 1.6783257450470047e-13, "loss": 0.163, "step": 12499 }, { "epoch": 1.0, "grad_norm": 6.928702036545608, "learning_rate": 0.0, "loss": 0.36, "step": 12500 }, { "epoch": 1.0, "step": 12500, "total_flos": 5919025980409856.0, "train_loss": 0.246136455129385, "train_runtime": 40622.9472, "train_samples_per_second": 4.923, "train_steps_per_second": 0.308 } ], "logging_steps": 1.0, "max_steps": 12500, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1000, "total_flos": 5919025980409856.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }