{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 13980, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "grad_norm": 4.056429511946979, "learning_rate": 2.3809523809523813e-08, "loss": 0.3022, "step": 1 }, { "epoch": 0.0, "grad_norm": 4.254068828741834, "learning_rate": 4.7619047619047627e-08, "loss": 0.3284, "step": 2 }, { "epoch": 0.0, "grad_norm": 3.7081250197058817, "learning_rate": 7.142857142857144e-08, "loss": 0.3585, "step": 3 }, { "epoch": 0.0, "grad_norm": 4.03035489196616, "learning_rate": 9.523809523809525e-08, "loss": 0.3019, "step": 4 }, { "epoch": 0.0, "grad_norm": 3.8736544756581472, "learning_rate": 1.1904761904761906e-07, "loss": 0.3255, "step": 5 }, { "epoch": 0.0, "grad_norm": 3.3222832104609203, "learning_rate": 1.4285714285714287e-07, "loss": 0.3255, "step": 6 }, { "epoch": 0.0, "grad_norm": 3.4210795474931173, "learning_rate": 1.6666666666666668e-07, "loss": 0.3276, "step": 7 }, { "epoch": 0.0, "grad_norm": 3.5634613739343233, "learning_rate": 1.904761904761905e-07, "loss": 0.2726, "step": 8 }, { "epoch": 0.0, "grad_norm": 3.6637944779237985, "learning_rate": 2.142857142857143e-07, "loss": 0.3569, "step": 9 }, { "epoch": 0.0, "grad_norm": 3.5471335549365794, "learning_rate": 2.3809523809523811e-07, "loss": 0.2951, "step": 10 }, { "epoch": 0.0, "grad_norm": 3.977026469672543, "learning_rate": 2.6190476190476194e-07, "loss": 0.3717, "step": 11 }, { "epoch": 0.0, "grad_norm": 3.2639479730612186, "learning_rate": 2.8571428571428575e-07, "loss": 0.331, "step": 12 }, { "epoch": 0.0, "grad_norm": 3.5942630021409734, "learning_rate": 3.0952380952380955e-07, "loss": 0.3077, "step": 13 }, { "epoch": 0.0, "grad_norm": 3.4934757744517793, "learning_rate": 3.3333333333333335e-07, "loss": 0.3157, "step": 14 }, { "epoch": 0.0, "grad_norm": 3.3732383934025227, "learning_rate": 3.5714285714285716e-07, "loss": 0.2967, "step": 15 }, { "epoch": 0.0, "grad_norm": 3.6149605542926895, "learning_rate": 3.80952380952381e-07, "loss": 0.3424, "step": 16 }, { "epoch": 0.0, "grad_norm": 3.5210734613974997, "learning_rate": 4.047619047619048e-07, "loss": 0.276, "step": 17 }, { "epoch": 0.0, "grad_norm": 3.061422767153501, "learning_rate": 4.285714285714286e-07, "loss": 0.3026, "step": 18 }, { "epoch": 0.0, "grad_norm": 3.160475698653178, "learning_rate": 4.523809523809524e-07, "loss": 0.3297, "step": 19 }, { "epoch": 0.0, "grad_norm": 2.5495914807491262, "learning_rate": 4.7619047619047623e-07, "loss": 0.2812, "step": 20 }, { "epoch": 0.0, "grad_norm": 2.6927500779564375, "learning_rate": 5.000000000000001e-07, "loss": 0.3275, "step": 21 }, { "epoch": 0.0, "grad_norm": 2.5205250700987505, "learning_rate": 5.238095238095239e-07, "loss": 0.3052, "step": 22 }, { "epoch": 0.0, "grad_norm": 2.742367553400832, "learning_rate": 5.476190476190477e-07, "loss": 0.2851, "step": 23 }, { "epoch": 0.0, "grad_norm": 2.9650903686604932, "learning_rate": 5.714285714285715e-07, "loss": 0.3266, "step": 24 }, { "epoch": 0.0, "grad_norm": 2.4156996750251345, "learning_rate": 5.952380952380953e-07, "loss": 0.3231, "step": 25 }, { "epoch": 0.0, "grad_norm": 2.3692584103692957, "learning_rate": 6.190476190476191e-07, "loss": 0.2806, "step": 26 }, { "epoch": 0.0, "grad_norm": 2.6932628521462156, "learning_rate": 6.428571428571428e-07, "loss": 0.3239, "step": 27 }, { "epoch": 0.0, "grad_norm": 2.3137713650870695, "learning_rate": 6.666666666666667e-07, "loss": 0.2365, "step": 28 }, { "epoch": 0.0, "grad_norm": 2.829652477374651, "learning_rate": 6.904761904761906e-07, "loss": 0.3574, "step": 29 }, { "epoch": 0.0, "grad_norm": 2.6553668991478743, "learning_rate": 7.142857142857143e-07, "loss": 0.2845, "step": 30 }, { "epoch": 0.0, "grad_norm": 11.495345310085186, "learning_rate": 7.380952380952381e-07, "loss": 0.7697, "step": 31 }, { "epoch": 0.0, "grad_norm": 2.389002003838203, "learning_rate": 7.61904761904762e-07, "loss": 0.2958, "step": 32 }, { "epoch": 0.0, "grad_norm": 2.2865110242169084, "learning_rate": 7.857142857142857e-07, "loss": 0.3121, "step": 33 }, { "epoch": 0.0, "grad_norm": 2.4238725380166968, "learning_rate": 8.095238095238096e-07, "loss": 0.258, "step": 34 }, { "epoch": 0.0, "grad_norm": 2.355608078103788, "learning_rate": 8.333333333333333e-07, "loss": 0.2702, "step": 35 }, { "epoch": 0.0, "grad_norm": 10.427643609197508, "learning_rate": 8.571428571428572e-07, "loss": 0.9491, "step": 36 }, { "epoch": 0.0, "grad_norm": 1.9928679067193384, "learning_rate": 8.80952380952381e-07, "loss": 0.2227, "step": 37 }, { "epoch": 0.0, "grad_norm": 2.2340918480144483, "learning_rate": 9.047619047619048e-07, "loss": 0.2885, "step": 38 }, { "epoch": 0.0, "grad_norm": 2.193974440968795, "learning_rate": 9.285714285714287e-07, "loss": 0.3291, "step": 39 }, { "epoch": 0.0, "grad_norm": 2.1196758770255992, "learning_rate": 9.523809523809525e-07, "loss": 0.2951, "step": 40 }, { "epoch": 0.0, "grad_norm": 2.1490244515586836, "learning_rate": 9.761904761904764e-07, "loss": 0.2745, "step": 41 }, { "epoch": 0.0, "grad_norm": 2.0259501280518046, "learning_rate": 1.0000000000000002e-06, "loss": 0.2335, "step": 42 }, { "epoch": 0.0, "grad_norm": 2.386078433373896, "learning_rate": 1.023809523809524e-06, "loss": 0.2754, "step": 43 }, { "epoch": 0.0, "grad_norm": 2.3222612620184395, "learning_rate": 1.0476190476190478e-06, "loss": 0.268, "step": 44 }, { "epoch": 0.0, "grad_norm": 1.9842027523787453, "learning_rate": 1.0714285714285714e-06, "loss": 0.2357, "step": 45 }, { "epoch": 0.0, "grad_norm": 1.9404390155680744, "learning_rate": 1.0952380952380954e-06, "loss": 0.2369, "step": 46 }, { "epoch": 0.0, "grad_norm": 2.1724755446218986, "learning_rate": 1.1190476190476192e-06, "loss": 0.2839, "step": 47 }, { "epoch": 0.0, "grad_norm": 2.147205523844692, "learning_rate": 1.142857142857143e-06, "loss": 0.2807, "step": 48 }, { "epoch": 0.0, "grad_norm": 2.1451415010931316, "learning_rate": 1.1666666666666668e-06, "loss": 0.2399, "step": 49 }, { "epoch": 0.0, "grad_norm": 2.2977228806377155, "learning_rate": 1.1904761904761906e-06, "loss": 0.2535, "step": 50 }, { "epoch": 0.0, "grad_norm": 6.1811249474428, "learning_rate": 1.2142857142857144e-06, "loss": 0.6552, "step": 51 }, { "epoch": 0.0, "grad_norm": 2.1343221115649977, "learning_rate": 1.2380952380952382e-06, "loss": 0.2532, "step": 52 }, { "epoch": 0.0, "grad_norm": 2.2639059478688472, "learning_rate": 1.261904761904762e-06, "loss": 0.2586, "step": 53 }, { "epoch": 0.0, "grad_norm": 2.0089478289042684, "learning_rate": 1.2857142857142856e-06, "loss": 0.2402, "step": 54 }, { "epoch": 0.0, "grad_norm": 2.24336603317913, "learning_rate": 1.3095238095238096e-06, "loss": 0.2846, "step": 55 }, { "epoch": 0.0, "grad_norm": 1.9587757608266414, "learning_rate": 1.3333333333333334e-06, "loss": 0.2065, "step": 56 }, { "epoch": 0.0, "grad_norm": 1.930728490515616, "learning_rate": 1.3571428571428572e-06, "loss": 0.2133, "step": 57 }, { "epoch": 0.0, "grad_norm": 2.183490676647861, "learning_rate": 1.3809523809523812e-06, "loss": 0.2805, "step": 58 }, { "epoch": 0.0, "grad_norm": 2.1172476465345884, "learning_rate": 1.4047619047619048e-06, "loss": 0.237, "step": 59 }, { "epoch": 0.0, "grad_norm": 6.5514224885111245, "learning_rate": 1.4285714285714286e-06, "loss": 0.7453, "step": 60 }, { "epoch": 0.0, "grad_norm": 2.1124935644479677, "learning_rate": 1.4523809523809526e-06, "loss": 0.2218, "step": 61 }, { "epoch": 0.0, "grad_norm": 2.170312311251715, "learning_rate": 1.4761904761904762e-06, "loss": 0.2409, "step": 62 }, { "epoch": 0.0, "grad_norm": 2.0894495745829857, "learning_rate": 1.5e-06, "loss": 0.1823, "step": 63 }, { "epoch": 0.0, "grad_norm": 1.8889374506086778, "learning_rate": 1.523809523809524e-06, "loss": 0.2266, "step": 64 }, { "epoch": 0.0, "grad_norm": 1.984488536257302, "learning_rate": 1.5476190476190479e-06, "loss": 0.211, "step": 65 }, { "epoch": 0.0, "grad_norm": 7.145016408706811, "learning_rate": 1.5714285714285714e-06, "loss": 0.7319, "step": 66 }, { "epoch": 0.0, "grad_norm": 2.1654520463013207, "learning_rate": 1.5952380952380953e-06, "loss": 0.2173, "step": 67 }, { "epoch": 0.0, "grad_norm": 2.244806015594005, "learning_rate": 1.6190476190476193e-06, "loss": 0.2628, "step": 68 }, { "epoch": 0.0, "grad_norm": 1.8942822683828673, "learning_rate": 1.642857142857143e-06, "loss": 0.1977, "step": 69 }, { "epoch": 0.01, "grad_norm": 1.8954494215887965, "learning_rate": 1.6666666666666667e-06, "loss": 0.1887, "step": 70 }, { "epoch": 0.01, "grad_norm": 2.0499942197816883, "learning_rate": 1.6904761904761907e-06, "loss": 0.2128, "step": 71 }, { "epoch": 0.01, "grad_norm": 1.9144317231484271, "learning_rate": 1.7142857142857145e-06, "loss": 0.1934, "step": 72 }, { "epoch": 0.01, "grad_norm": 2.104567411561711, "learning_rate": 1.738095238095238e-06, "loss": 0.2257, "step": 73 }, { "epoch": 0.01, "grad_norm": 1.9643248646333762, "learning_rate": 1.761904761904762e-06, "loss": 0.1816, "step": 74 }, { "epoch": 0.01, "grad_norm": 2.2148682540820457, "learning_rate": 1.7857142857142859e-06, "loss": 0.2361, "step": 75 }, { "epoch": 0.01, "grad_norm": 2.2483964630906637, "learning_rate": 1.8095238095238097e-06, "loss": 0.2452, "step": 76 }, { "epoch": 0.01, "grad_norm": 2.1479332406203637, "learning_rate": 1.8333333333333333e-06, "loss": 0.2634, "step": 77 }, { "epoch": 0.01, "grad_norm": 2.0963707561854488, "learning_rate": 1.8571428571428573e-06, "loss": 0.2121, "step": 78 }, { "epoch": 0.01, "grad_norm": 2.173608815933427, "learning_rate": 1.880952380952381e-06, "loss": 0.2404, "step": 79 }, { "epoch": 0.01, "grad_norm": 8.885364430718631, "learning_rate": 1.904761904761905e-06, "loss": 0.7518, "step": 80 }, { "epoch": 0.01, "grad_norm": 2.017850365944102, "learning_rate": 1.928571428571429e-06, "loss": 0.2323, "step": 81 }, { "epoch": 0.01, "grad_norm": 2.0934319220645303, "learning_rate": 1.9523809523809527e-06, "loss": 0.2107, "step": 82 }, { "epoch": 0.01, "grad_norm": 2.0157776224705963, "learning_rate": 1.976190476190476e-06, "loss": 0.2161, "step": 83 }, { "epoch": 0.01, "grad_norm": 2.0963356587727358, "learning_rate": 2.0000000000000003e-06, "loss": 0.2073, "step": 84 }, { "epoch": 0.01, "grad_norm": 2.1453558286787144, "learning_rate": 2.023809523809524e-06, "loss": 0.2714, "step": 85 }, { "epoch": 0.01, "grad_norm": 2.105244076484882, "learning_rate": 2.047619047619048e-06, "loss": 0.1831, "step": 86 }, { "epoch": 0.01, "grad_norm": 2.0469816531104628, "learning_rate": 2.0714285714285717e-06, "loss": 0.1842, "step": 87 }, { "epoch": 0.01, "grad_norm": 2.1131678021217355, "learning_rate": 2.0952380952380955e-06, "loss": 0.2319, "step": 88 }, { "epoch": 0.01, "grad_norm": 2.2234116155027306, "learning_rate": 2.1190476190476194e-06, "loss": 0.2364, "step": 89 }, { "epoch": 0.01, "grad_norm": 2.2898981677302066, "learning_rate": 2.1428571428571427e-06, "loss": 0.2098, "step": 90 }, { "epoch": 0.01, "grad_norm": 1.865692206738114, "learning_rate": 2.166666666666667e-06, "loss": 0.1888, "step": 91 }, { "epoch": 0.01, "grad_norm": 2.2130298092942216, "learning_rate": 2.1904761904761908e-06, "loss": 0.2547, "step": 92 }, { "epoch": 0.01, "grad_norm": 6.72766461879791, "learning_rate": 2.2142857142857146e-06, "loss": 0.7045, "step": 93 }, { "epoch": 0.01, "grad_norm": 1.977836159104701, "learning_rate": 2.2380952380952384e-06, "loss": 0.2262, "step": 94 }, { "epoch": 0.01, "grad_norm": 2.1714672727232327, "learning_rate": 2.261904761904762e-06, "loss": 0.1557, "step": 95 }, { "epoch": 0.01, "grad_norm": 1.9299618073119207, "learning_rate": 2.285714285714286e-06, "loss": 0.224, "step": 96 }, { "epoch": 0.01, "grad_norm": 2.046524239303243, "learning_rate": 2.3095238095238098e-06, "loss": 0.1843, "step": 97 }, { "epoch": 0.01, "grad_norm": 2.0839290248878695, "learning_rate": 2.3333333333333336e-06, "loss": 0.2689, "step": 98 }, { "epoch": 0.01, "grad_norm": 2.120963721697617, "learning_rate": 2.3571428571428574e-06, "loss": 0.1751, "step": 99 }, { "epoch": 0.01, "grad_norm": 1.9036105564820034, "learning_rate": 2.380952380952381e-06, "loss": 0.2208, "step": 100 }, { "epoch": 0.01, "grad_norm": 2.0485115765607276, "learning_rate": 2.404761904761905e-06, "loss": 0.2307, "step": 101 }, { "epoch": 0.01, "grad_norm": 5.60262679923109, "learning_rate": 2.428571428571429e-06, "loss": 0.7797, "step": 102 }, { "epoch": 0.01, "grad_norm": 1.837861455828818, "learning_rate": 2.4523809523809526e-06, "loss": 0.2522, "step": 103 }, { "epoch": 0.01, "grad_norm": 2.1194774167042114, "learning_rate": 2.4761904761904764e-06, "loss": 0.194, "step": 104 }, { "epoch": 0.01, "grad_norm": 2.022736300741432, "learning_rate": 2.5e-06, "loss": 0.2121, "step": 105 }, { "epoch": 0.01, "grad_norm": 1.9780281422325785, "learning_rate": 2.523809523809524e-06, "loss": 0.2147, "step": 106 }, { "epoch": 0.01, "grad_norm": 1.8383683180089418, "learning_rate": 2.547619047619048e-06, "loss": 0.2183, "step": 107 }, { "epoch": 0.01, "grad_norm": 1.9918997649579477, "learning_rate": 2.571428571428571e-06, "loss": 0.2279, "step": 108 }, { "epoch": 0.01, "grad_norm": 1.9099623977560096, "learning_rate": 2.595238095238096e-06, "loss": 0.229, "step": 109 }, { "epoch": 0.01, "grad_norm": 2.1413284891047724, "learning_rate": 2.6190476190476192e-06, "loss": 0.2221, "step": 110 }, { "epoch": 0.01, "grad_norm": 2.0913816480066716, "learning_rate": 2.642857142857143e-06, "loss": 0.2277, "step": 111 }, { "epoch": 0.01, "grad_norm": 2.3357881099876483, "learning_rate": 2.666666666666667e-06, "loss": 0.2501, "step": 112 }, { "epoch": 0.01, "grad_norm": 4.909182560515798, "learning_rate": 2.6904761904761906e-06, "loss": 0.5005, "step": 113 }, { "epoch": 0.01, "grad_norm": 1.9189708493261897, "learning_rate": 2.7142857142857144e-06, "loss": 0.2104, "step": 114 }, { "epoch": 0.01, "grad_norm": 2.09162065524797, "learning_rate": 2.7380952380952387e-06, "loss": 0.2177, "step": 115 }, { "epoch": 0.01, "grad_norm": 2.0144457732401135, "learning_rate": 2.7619047619047625e-06, "loss": 0.263, "step": 116 }, { "epoch": 0.01, "grad_norm": 1.8133612519967228, "learning_rate": 2.785714285714286e-06, "loss": 0.2118, "step": 117 }, { "epoch": 0.01, "grad_norm": 1.8419847635786342, "learning_rate": 2.8095238095238096e-06, "loss": 0.2098, "step": 118 }, { "epoch": 0.01, "grad_norm": 2.1396979749275866, "learning_rate": 2.8333333333333335e-06, "loss": 0.1928, "step": 119 }, { "epoch": 0.01, "grad_norm": 2.0846192623694746, "learning_rate": 2.8571428571428573e-06, "loss": 0.2996, "step": 120 }, { "epoch": 0.01, "grad_norm": 2.0703470958859502, "learning_rate": 2.880952380952381e-06, "loss": 0.2084, "step": 121 }, { "epoch": 0.01, "grad_norm": 2.159704343366871, "learning_rate": 2.9047619047619053e-06, "loss": 0.2049, "step": 122 }, { "epoch": 0.01, "grad_norm": 2.0614930122275426, "learning_rate": 2.928571428571429e-06, "loss": 0.2517, "step": 123 }, { "epoch": 0.01, "grad_norm": 2.262587549707648, "learning_rate": 2.9523809523809525e-06, "loss": 0.2032, "step": 124 }, { "epoch": 0.01, "grad_norm": 2.262565782524033, "learning_rate": 2.9761904761904763e-06, "loss": 0.2264, "step": 125 }, { "epoch": 0.01, "grad_norm": 2.056499286779882, "learning_rate": 3e-06, "loss": 0.2335, "step": 126 }, { "epoch": 0.01, "grad_norm": 2.0216787118216217, "learning_rate": 3.023809523809524e-06, "loss": 0.2281, "step": 127 }, { "epoch": 0.01, "grad_norm": 2.5201661765670345, "learning_rate": 3.047619047619048e-06, "loss": 0.2485, "step": 128 }, { "epoch": 0.01, "grad_norm": 2.2942754671277363, "learning_rate": 3.071428571428572e-06, "loss": 0.2233, "step": 129 }, { "epoch": 0.01, "grad_norm": 1.977533385058842, "learning_rate": 3.0952380952380957e-06, "loss": 0.2035, "step": 130 }, { "epoch": 0.01, "grad_norm": 2.1238003805874146, "learning_rate": 3.1190476190476195e-06, "loss": 0.1839, "step": 131 }, { "epoch": 0.01, "grad_norm": 1.9678074238799244, "learning_rate": 3.142857142857143e-06, "loss": 0.204, "step": 132 }, { "epoch": 0.01, "grad_norm": 1.9627148032828434, "learning_rate": 3.1666666666666667e-06, "loss": 0.2337, "step": 133 }, { "epoch": 0.01, "grad_norm": 2.1960777229916686, "learning_rate": 3.1904761904761905e-06, "loss": 0.215, "step": 134 }, { "epoch": 0.01, "grad_norm": 1.7854201466840234, "learning_rate": 3.2142857142857147e-06, "loss": 0.1838, "step": 135 }, { "epoch": 0.01, "grad_norm": 1.8595115145218097, "learning_rate": 3.2380952380952385e-06, "loss": 0.2047, "step": 136 }, { "epoch": 0.01, "grad_norm": 1.9915969651414422, "learning_rate": 3.2619047619047623e-06, "loss": 0.2611, "step": 137 }, { "epoch": 0.01, "grad_norm": 2.1160146592796214, "learning_rate": 3.285714285714286e-06, "loss": 0.2288, "step": 138 }, { "epoch": 0.01, "grad_norm": 2.1093082399019334, "learning_rate": 3.3095238095238095e-06, "loss": 0.223, "step": 139 }, { "epoch": 0.01, "grad_norm": 1.9661795407430658, "learning_rate": 3.3333333333333333e-06, "loss": 0.2658, "step": 140 }, { "epoch": 0.01, "grad_norm": 1.8064197669404958, "learning_rate": 3.357142857142857e-06, "loss": 0.1927, "step": 141 }, { "epoch": 0.01, "grad_norm": 1.978841467097062, "learning_rate": 3.3809523809523814e-06, "loss": 0.2037, "step": 142 }, { "epoch": 0.01, "grad_norm": 2.204542478982373, "learning_rate": 3.404761904761905e-06, "loss": 0.2557, "step": 143 }, { "epoch": 0.01, "grad_norm": 1.7884326802506234, "learning_rate": 3.428571428571429e-06, "loss": 0.1888, "step": 144 }, { "epoch": 0.01, "grad_norm": 1.8491485952298592, "learning_rate": 3.4523809523809528e-06, "loss": 0.1853, "step": 145 }, { "epoch": 0.01, "grad_norm": 4.189118700654106, "learning_rate": 3.476190476190476e-06, "loss": 0.4302, "step": 146 }, { "epoch": 0.01, "grad_norm": 2.0070526993070854, "learning_rate": 3.5e-06, "loss": 0.1866, "step": 147 }, { "epoch": 0.01, "grad_norm": 1.9238891857286395, "learning_rate": 3.523809523809524e-06, "loss": 0.1902, "step": 148 }, { "epoch": 0.01, "grad_norm": 2.3488600553246397, "learning_rate": 3.547619047619048e-06, "loss": 0.2199, "step": 149 }, { "epoch": 0.01, "grad_norm": 2.2937836740323254, "learning_rate": 3.5714285714285718e-06, "loss": 0.2366, "step": 150 }, { "epoch": 0.01, "grad_norm": 1.955484153435743, "learning_rate": 3.5952380952380956e-06, "loss": 0.2138, "step": 151 }, { "epoch": 0.01, "grad_norm": 1.9696059878227126, "learning_rate": 3.6190476190476194e-06, "loss": 0.222, "step": 152 }, { "epoch": 0.01, "grad_norm": 2.036668231835895, "learning_rate": 3.642857142857143e-06, "loss": 0.2076, "step": 153 }, { "epoch": 0.01, "grad_norm": 2.2371897387757533, "learning_rate": 3.6666666666666666e-06, "loss": 0.2002, "step": 154 }, { "epoch": 0.01, "grad_norm": 2.1150825877853476, "learning_rate": 3.690476190476191e-06, "loss": 0.2398, "step": 155 }, { "epoch": 0.01, "grad_norm": 2.051675139066703, "learning_rate": 3.7142857142857146e-06, "loss": 0.2697, "step": 156 }, { "epoch": 0.01, "grad_norm": 1.9987071450358536, "learning_rate": 3.7380952380952384e-06, "loss": 0.2921, "step": 157 }, { "epoch": 0.01, "grad_norm": 1.852216888343916, "learning_rate": 3.761904761904762e-06, "loss": 0.2074, "step": 158 }, { "epoch": 0.01, "grad_norm": 1.9109294703981274, "learning_rate": 3.785714285714286e-06, "loss": 0.2229, "step": 159 }, { "epoch": 0.01, "grad_norm": 1.9827822056153939, "learning_rate": 3.80952380952381e-06, "loss": 0.2044, "step": 160 }, { "epoch": 0.01, "grad_norm": 1.8958194523778151, "learning_rate": 3.833333333333334e-06, "loss": 0.2281, "step": 161 }, { "epoch": 0.01, "grad_norm": 2.0352702433879086, "learning_rate": 3.857142857142858e-06, "loss": 0.1967, "step": 162 }, { "epoch": 0.01, "grad_norm": 2.193590453573038, "learning_rate": 3.880952380952381e-06, "loss": 0.2713, "step": 163 }, { "epoch": 0.01, "grad_norm": 2.087677938534779, "learning_rate": 3.9047619047619055e-06, "loss": 0.2218, "step": 164 }, { "epoch": 0.01, "grad_norm": 8.925880552360889, "learning_rate": 3.928571428571429e-06, "loss": 0.5797, "step": 165 }, { "epoch": 0.01, "grad_norm": 2.3067196692705023, "learning_rate": 3.952380952380952e-06, "loss": 0.2556, "step": 166 }, { "epoch": 0.01, "grad_norm": 2.481123179032876, "learning_rate": 3.9761904761904764e-06, "loss": 0.2198, "step": 167 }, { "epoch": 0.01, "grad_norm": 1.8364567899800972, "learning_rate": 4.000000000000001e-06, "loss": 0.2337, "step": 168 }, { "epoch": 0.01, "grad_norm": 1.9577327048730095, "learning_rate": 4.023809523809524e-06, "loss": 0.2237, "step": 169 }, { "epoch": 0.01, "grad_norm": 2.3386135226840246, "learning_rate": 4.047619047619048e-06, "loss": 0.2615, "step": 170 }, { "epoch": 0.01, "grad_norm": 2.1206970562637233, "learning_rate": 4.071428571428572e-06, "loss": 0.2118, "step": 171 }, { "epoch": 0.01, "grad_norm": 1.9515176070657816, "learning_rate": 4.095238095238096e-06, "loss": 0.1839, "step": 172 }, { "epoch": 0.01, "grad_norm": 1.9422732216718517, "learning_rate": 4.119047619047619e-06, "loss": 0.2069, "step": 173 }, { "epoch": 0.01, "grad_norm": 1.8946655266765382, "learning_rate": 4.1428571428571435e-06, "loss": 0.1613, "step": 174 }, { "epoch": 0.01, "grad_norm": 1.7844880333011413, "learning_rate": 4.166666666666667e-06, "loss": 0.1955, "step": 175 }, { "epoch": 0.01, "grad_norm": 2.2916838035958893, "learning_rate": 4.190476190476191e-06, "loss": 0.245, "step": 176 }, { "epoch": 0.01, "grad_norm": 1.9348749130091802, "learning_rate": 4.2142857142857145e-06, "loss": 0.24, "step": 177 }, { "epoch": 0.01, "grad_norm": 1.9060851987544007, "learning_rate": 4.238095238095239e-06, "loss": 0.1964, "step": 178 }, { "epoch": 0.01, "grad_norm": 2.1361682290003974, "learning_rate": 4.261904761904762e-06, "loss": 0.2268, "step": 179 }, { "epoch": 0.01, "grad_norm": 2.0883582444581195, "learning_rate": 4.2857142857142855e-06, "loss": 0.2392, "step": 180 }, { "epoch": 0.01, "grad_norm": 1.7727517307423128, "learning_rate": 4.30952380952381e-06, "loss": 0.1869, "step": 181 }, { "epoch": 0.01, "grad_norm": 2.010109935693985, "learning_rate": 4.333333333333334e-06, "loss": 0.2169, "step": 182 }, { "epoch": 0.01, "grad_norm": 2.1480669786310616, "learning_rate": 4.357142857142857e-06, "loss": 0.286, "step": 183 }, { "epoch": 0.01, "grad_norm": 2.0812727334033108, "learning_rate": 4.3809523809523815e-06, "loss": 0.2386, "step": 184 }, { "epoch": 0.01, "grad_norm": 1.841835771092403, "learning_rate": 4.404761904761905e-06, "loss": 0.2043, "step": 185 }, { "epoch": 0.01, "grad_norm": 1.7745460577151568, "learning_rate": 4.428571428571429e-06, "loss": 0.1829, "step": 186 }, { "epoch": 0.01, "grad_norm": 2.0917374449433015, "learning_rate": 4.4523809523809525e-06, "loss": 0.284, "step": 187 }, { "epoch": 0.01, "grad_norm": 1.9414563378563814, "learning_rate": 4.476190476190477e-06, "loss": 0.2107, "step": 188 }, { "epoch": 0.01, "grad_norm": 1.9333551533085978, "learning_rate": 4.5e-06, "loss": 0.2124, "step": 189 }, { "epoch": 0.01, "grad_norm": 2.080567276770741, "learning_rate": 4.523809523809524e-06, "loss": 0.2392, "step": 190 }, { "epoch": 0.01, "grad_norm": 2.012896259654425, "learning_rate": 4.547619047619048e-06, "loss": 0.2048, "step": 191 }, { "epoch": 0.01, "grad_norm": 2.105208492002958, "learning_rate": 4.571428571428572e-06, "loss": 0.2606, "step": 192 }, { "epoch": 0.01, "grad_norm": 2.027416340272993, "learning_rate": 4.595238095238095e-06, "loss": 0.2133, "step": 193 }, { "epoch": 0.01, "grad_norm": 1.87599091564929, "learning_rate": 4.6190476190476196e-06, "loss": 0.1843, "step": 194 }, { "epoch": 0.01, "grad_norm": 1.9495957404961308, "learning_rate": 4.642857142857144e-06, "loss": 0.2022, "step": 195 }, { "epoch": 0.01, "grad_norm": 2.148344534365086, "learning_rate": 4.666666666666667e-06, "loss": 0.2678, "step": 196 }, { "epoch": 0.01, "grad_norm": 1.8479919100385038, "learning_rate": 4.6904761904761905e-06, "loss": 0.183, "step": 197 }, { "epoch": 0.01, "grad_norm": 2.277673980381613, "learning_rate": 4.714285714285715e-06, "loss": 0.3075, "step": 198 }, { "epoch": 0.01, "grad_norm": 1.9395407154116269, "learning_rate": 4.738095238095238e-06, "loss": 0.2314, "step": 199 }, { "epoch": 0.01, "grad_norm": 3.9375029931816763, "learning_rate": 4.761904761904762e-06, "loss": 0.4656, "step": 200 }, { "epoch": 0.01, "grad_norm": 1.9769617439412623, "learning_rate": 4.785714285714287e-06, "loss": 0.2318, "step": 201 }, { "epoch": 0.01, "grad_norm": 2.0826605803702134, "learning_rate": 4.80952380952381e-06, "loss": 0.2225, "step": 202 }, { "epoch": 0.01, "grad_norm": 1.9190098513987295, "learning_rate": 4.833333333333333e-06, "loss": 0.2028, "step": 203 }, { "epoch": 0.01, "grad_norm": 6.19378248589066, "learning_rate": 4.857142857142858e-06, "loss": 0.6534, "step": 204 }, { "epoch": 0.01, "grad_norm": 1.8999833167107292, "learning_rate": 4.880952380952381e-06, "loss": 0.1908, "step": 205 }, { "epoch": 0.01, "grad_norm": 1.6608307540657135, "learning_rate": 4.904761904761905e-06, "loss": 0.1698, "step": 206 }, { "epoch": 0.01, "grad_norm": 2.050021744810209, "learning_rate": 4.928571428571429e-06, "loss": 0.2399, "step": 207 }, { "epoch": 0.01, "grad_norm": 1.9810315170285808, "learning_rate": 4.952380952380953e-06, "loss": 0.2454, "step": 208 }, { "epoch": 0.01, "grad_norm": 1.8959122311030974, "learning_rate": 4.976190476190477e-06, "loss": 0.1822, "step": 209 }, { "epoch": 0.02, "grad_norm": 1.9266170929302338, "learning_rate": 5e-06, "loss": 0.2214, "step": 210 }, { "epoch": 0.02, "grad_norm": 1.8576383563935714, "learning_rate": 5.023809523809524e-06, "loss": 0.1747, "step": 211 }, { "epoch": 0.02, "grad_norm": 2.420005028304979, "learning_rate": 5.047619047619048e-06, "loss": 0.2087, "step": 212 }, { "epoch": 0.02, "grad_norm": 2.1053757678492335, "learning_rate": 5.071428571428571e-06, "loss": 0.2518, "step": 213 }, { "epoch": 0.02, "grad_norm": 2.1075845919874414, "learning_rate": 5.095238095238096e-06, "loss": 0.2524, "step": 214 }, { "epoch": 0.02, "grad_norm": 2.0010763461542953, "learning_rate": 5.119047619047619e-06, "loss": 0.1987, "step": 215 }, { "epoch": 0.02, "grad_norm": 2.1973253566392272, "learning_rate": 5.142857142857142e-06, "loss": 0.24, "step": 216 }, { "epoch": 0.02, "grad_norm": 2.0411437423876895, "learning_rate": 5.1666666666666675e-06, "loss": 0.1894, "step": 217 }, { "epoch": 0.02, "grad_norm": 1.8602203050490493, "learning_rate": 5.190476190476192e-06, "loss": 0.216, "step": 218 }, { "epoch": 0.02, "grad_norm": 1.9745010237795677, "learning_rate": 5.214285714285715e-06, "loss": 0.2325, "step": 219 }, { "epoch": 0.02, "grad_norm": 2.1845890495625513, "learning_rate": 5.2380952380952384e-06, "loss": 0.2343, "step": 220 }, { "epoch": 0.02, "grad_norm": 2.159794967578708, "learning_rate": 5.261904761904763e-06, "loss": 0.2306, "step": 221 }, { "epoch": 0.02, "grad_norm": 1.8229650941306363, "learning_rate": 5.285714285714286e-06, "loss": 0.2058, "step": 222 }, { "epoch": 0.02, "grad_norm": 2.0138766720966954, "learning_rate": 5.30952380952381e-06, "loss": 0.2167, "step": 223 }, { "epoch": 0.02, "grad_norm": 1.83581223570138, "learning_rate": 5.333333333333334e-06, "loss": 0.1959, "step": 224 }, { "epoch": 0.02, "grad_norm": 10.009385116397086, "learning_rate": 5.357142857142857e-06, "loss": 0.8172, "step": 225 }, { "epoch": 0.02, "grad_norm": 2.01239880463524, "learning_rate": 5.380952380952381e-06, "loss": 0.2237, "step": 226 }, { "epoch": 0.02, "grad_norm": 1.9691087214986362, "learning_rate": 5.404761904761905e-06, "loss": 0.2073, "step": 227 }, { "epoch": 0.02, "grad_norm": 1.906422073689916, "learning_rate": 5.428571428571429e-06, "loss": 0.2032, "step": 228 }, { "epoch": 0.02, "grad_norm": 2.080456628303908, "learning_rate": 5.452380952380952e-06, "loss": 0.2177, "step": 229 }, { "epoch": 0.02, "grad_norm": 2.1691464980549333, "learning_rate": 5.476190476190477e-06, "loss": 0.2563, "step": 230 }, { "epoch": 0.02, "grad_norm": 1.9609442970542161, "learning_rate": 5.500000000000001e-06, "loss": 0.2382, "step": 231 }, { "epoch": 0.02, "grad_norm": 2.063500837006452, "learning_rate": 5.523809523809525e-06, "loss": 0.2757, "step": 232 }, { "epoch": 0.02, "grad_norm": 1.9990118444146079, "learning_rate": 5.547619047619048e-06, "loss": 0.2847, "step": 233 }, { "epoch": 0.02, "grad_norm": 5.040029602463559, "learning_rate": 5.571428571428572e-06, "loss": 0.7314, "step": 234 }, { "epoch": 0.02, "grad_norm": 1.9620435655273838, "learning_rate": 5.595238095238096e-06, "loss": 0.2261, "step": 235 }, { "epoch": 0.02, "grad_norm": 2.274342496679555, "learning_rate": 5.619047619047619e-06, "loss": 0.2366, "step": 236 }, { "epoch": 0.02, "grad_norm": 1.9279924210380983, "learning_rate": 5.6428571428571435e-06, "loss": 0.2102, "step": 237 }, { "epoch": 0.02, "grad_norm": 2.020322634043729, "learning_rate": 5.666666666666667e-06, "loss": 0.2393, "step": 238 }, { "epoch": 0.02, "grad_norm": 1.9948923549138908, "learning_rate": 5.690476190476191e-06, "loss": 0.2242, "step": 239 }, { "epoch": 0.02, "grad_norm": 2.004488668018897, "learning_rate": 5.7142857142857145e-06, "loss": 0.1864, "step": 240 }, { "epoch": 0.02, "grad_norm": 1.9465692076813665, "learning_rate": 5.738095238095238e-06, "loss": 0.281, "step": 241 }, { "epoch": 0.02, "grad_norm": 1.9236574326408011, "learning_rate": 5.761904761904762e-06, "loss": 0.2273, "step": 242 }, { "epoch": 0.02, "grad_norm": 1.943027503615834, "learning_rate": 5.785714285714286e-06, "loss": 0.2182, "step": 243 }, { "epoch": 0.02, "grad_norm": 1.8202266039934019, "learning_rate": 5.8095238095238106e-06, "loss": 0.2681, "step": 244 }, { "epoch": 0.02, "grad_norm": 1.8943427872082612, "learning_rate": 5.833333333333334e-06, "loss": 0.1831, "step": 245 }, { "epoch": 0.02, "grad_norm": 1.9163628908792816, "learning_rate": 5.857142857142858e-06, "loss": 0.2219, "step": 246 }, { "epoch": 0.02, "grad_norm": 2.02467163952195, "learning_rate": 5.8809523809523816e-06, "loss": 0.1822, "step": 247 }, { "epoch": 0.02, "grad_norm": 1.9948307918894201, "learning_rate": 5.904761904761905e-06, "loss": 0.2588, "step": 248 }, { "epoch": 0.02, "grad_norm": 1.8784218583060284, "learning_rate": 5.928571428571429e-06, "loss": 0.1999, "step": 249 }, { "epoch": 0.02, "grad_norm": 1.9053908951342011, "learning_rate": 5.9523809523809525e-06, "loss": 0.1974, "step": 250 }, { "epoch": 0.02, "grad_norm": 1.8344519715845773, "learning_rate": 5.976190476190477e-06, "loss": 0.1896, "step": 251 }, { "epoch": 0.02, "grad_norm": 2.061751098664898, "learning_rate": 6e-06, "loss": 0.2566, "step": 252 }, { "epoch": 0.02, "grad_norm": 6.457791234632552, "learning_rate": 6.023809523809524e-06, "loss": 0.7121, "step": 253 }, { "epoch": 0.02, "grad_norm": 5.290405650754611, "learning_rate": 6.047619047619048e-06, "loss": 0.6195, "step": 254 }, { "epoch": 0.02, "grad_norm": 1.915236806669019, "learning_rate": 6.071428571428571e-06, "loss": 0.2534, "step": 255 }, { "epoch": 0.02, "grad_norm": 1.9135861660087692, "learning_rate": 6.095238095238096e-06, "loss": 0.2378, "step": 256 }, { "epoch": 0.02, "grad_norm": 8.77449740006332, "learning_rate": 6.11904761904762e-06, "loss": 0.9165, "step": 257 }, { "epoch": 0.02, "grad_norm": 1.9314814238403286, "learning_rate": 6.142857142857144e-06, "loss": 0.225, "step": 258 }, { "epoch": 0.02, "grad_norm": 1.8597824236462694, "learning_rate": 6.166666666666667e-06, "loss": 0.2243, "step": 259 }, { "epoch": 0.02, "grad_norm": 1.9440300785829745, "learning_rate": 6.1904761904761914e-06, "loss": 0.1999, "step": 260 }, { "epoch": 0.02, "grad_norm": 1.8683095191981387, "learning_rate": 6.214285714285715e-06, "loss": 0.2386, "step": 261 }, { "epoch": 0.02, "grad_norm": 2.0570951335021994, "learning_rate": 6.238095238095239e-06, "loss": 0.2346, "step": 262 }, { "epoch": 0.02, "grad_norm": 1.9807046290463075, "learning_rate": 6.261904761904762e-06, "loss": 0.2873, "step": 263 }, { "epoch": 0.02, "grad_norm": 1.9933947520265711, "learning_rate": 6.285714285714286e-06, "loss": 0.216, "step": 264 }, { "epoch": 0.02, "grad_norm": 1.9983609559808428, "learning_rate": 6.30952380952381e-06, "loss": 0.2141, "step": 265 }, { "epoch": 0.02, "grad_norm": 2.0267206624022767, "learning_rate": 6.333333333333333e-06, "loss": 0.2014, "step": 266 }, { "epoch": 0.02, "grad_norm": 1.9637057491999363, "learning_rate": 6.357142857142858e-06, "loss": 0.2377, "step": 267 }, { "epoch": 0.02, "grad_norm": 1.912161557180021, "learning_rate": 6.380952380952381e-06, "loss": 0.2117, "step": 268 }, { "epoch": 0.02, "grad_norm": 1.9922123192772643, "learning_rate": 6.404761904761904e-06, "loss": 0.2168, "step": 269 }, { "epoch": 0.02, "grad_norm": 2.0524865993741486, "learning_rate": 6.4285714285714295e-06, "loss": 0.3244, "step": 270 }, { "epoch": 0.02, "grad_norm": 1.9520151412476405, "learning_rate": 6.452380952380954e-06, "loss": 0.2487, "step": 271 }, { "epoch": 0.02, "grad_norm": 1.8674499915952019, "learning_rate": 6.476190476190477e-06, "loss": 0.2079, "step": 272 }, { "epoch": 0.02, "grad_norm": 1.701869163842095, "learning_rate": 6.5000000000000004e-06, "loss": 0.227, "step": 273 }, { "epoch": 0.02, "grad_norm": 1.8613229362056498, "learning_rate": 6.523809523809525e-06, "loss": 0.2185, "step": 274 }, { "epoch": 0.02, "grad_norm": 1.8961958538148296, "learning_rate": 6.547619047619048e-06, "loss": 0.2477, "step": 275 }, { "epoch": 0.02, "grad_norm": 2.048977212661111, "learning_rate": 6.571428571428572e-06, "loss": 0.2186, "step": 276 }, { "epoch": 0.02, "grad_norm": 1.8252064171242572, "learning_rate": 6.595238095238096e-06, "loss": 0.2038, "step": 277 }, { "epoch": 0.02, "grad_norm": 2.020613005565305, "learning_rate": 6.619047619047619e-06, "loss": 0.2012, "step": 278 }, { "epoch": 0.02, "grad_norm": 6.267345323856519, "learning_rate": 6.642857142857143e-06, "loss": 0.8847, "step": 279 }, { "epoch": 0.02, "grad_norm": 9.566668027407411, "learning_rate": 6.666666666666667e-06, "loss": 0.5696, "step": 280 }, { "epoch": 0.02, "grad_norm": 1.857475019185172, "learning_rate": 6.690476190476191e-06, "loss": 0.2412, "step": 281 }, { "epoch": 0.02, "grad_norm": 1.9371231880100228, "learning_rate": 6.714285714285714e-06, "loss": 0.2066, "step": 282 }, { "epoch": 0.02, "grad_norm": 1.750408234656862, "learning_rate": 6.738095238095239e-06, "loss": 0.1983, "step": 283 }, { "epoch": 0.02, "grad_norm": 2.0621317569036024, "learning_rate": 6.761904761904763e-06, "loss": 0.2746, "step": 284 }, { "epoch": 0.02, "grad_norm": 1.8704400263261065, "learning_rate": 6.785714285714287e-06, "loss": 0.1551, "step": 285 }, { "epoch": 0.02, "grad_norm": 2.2745572060712784, "learning_rate": 6.80952380952381e-06, "loss": 0.2485, "step": 286 }, { "epoch": 0.02, "grad_norm": 2.0296284764475665, "learning_rate": 6.833333333333334e-06, "loss": 0.2083, "step": 287 }, { "epoch": 0.02, "grad_norm": 6.663487325561779, "learning_rate": 6.857142857142858e-06, "loss": 0.6986, "step": 288 }, { "epoch": 0.02, "grad_norm": 1.9826412010368553, "learning_rate": 6.880952380952381e-06, "loss": 0.2323, "step": 289 }, { "epoch": 0.02, "grad_norm": 1.890359106406398, "learning_rate": 6.9047619047619055e-06, "loss": 0.2125, "step": 290 }, { "epoch": 0.02, "grad_norm": 6.159655777152774, "learning_rate": 6.928571428571429e-06, "loss": 0.5949, "step": 291 }, { "epoch": 0.02, "grad_norm": 2.0622253517354467, "learning_rate": 6.952380952380952e-06, "loss": 0.2269, "step": 292 }, { "epoch": 0.02, "grad_norm": 1.8794407809613203, "learning_rate": 6.9761904761904765e-06, "loss": 0.2496, "step": 293 }, { "epoch": 0.02, "grad_norm": 1.8273633787133798, "learning_rate": 7e-06, "loss": 0.2123, "step": 294 }, { "epoch": 0.02, "grad_norm": 2.2604015118849006, "learning_rate": 7.023809523809524e-06, "loss": 0.2249, "step": 295 }, { "epoch": 0.02, "grad_norm": 1.9223540799930627, "learning_rate": 7.047619047619048e-06, "loss": 0.2504, "step": 296 }, { "epoch": 0.02, "grad_norm": 2.033197568809861, "learning_rate": 7.0714285714285726e-06, "loss": 0.2549, "step": 297 }, { "epoch": 0.02, "grad_norm": 1.9447513722776977, "learning_rate": 7.095238095238096e-06, "loss": 0.2465, "step": 298 }, { "epoch": 0.02, "grad_norm": 1.896405654316611, "learning_rate": 7.11904761904762e-06, "loss": 0.2532, "step": 299 }, { "epoch": 0.02, "grad_norm": 1.9867067163972432, "learning_rate": 7.1428571428571436e-06, "loss": 0.2049, "step": 300 }, { "epoch": 0.02, "grad_norm": 5.3317875248763205, "learning_rate": 7.166666666666667e-06, "loss": 0.5459, "step": 301 }, { "epoch": 0.02, "grad_norm": 1.9372191264235612, "learning_rate": 7.190476190476191e-06, "loss": 0.2256, "step": 302 }, { "epoch": 0.02, "grad_norm": 1.6900627862221096, "learning_rate": 7.2142857142857145e-06, "loss": 0.1987, "step": 303 }, { "epoch": 0.02, "grad_norm": 1.9262794348517647, "learning_rate": 7.238095238095239e-06, "loss": 0.2168, "step": 304 }, { "epoch": 0.02, "grad_norm": 1.7828153071973336, "learning_rate": 7.261904761904762e-06, "loss": 0.2578, "step": 305 }, { "epoch": 0.02, "grad_norm": 1.7914881338820046, "learning_rate": 7.285714285714286e-06, "loss": 0.2193, "step": 306 }, { "epoch": 0.02, "grad_norm": 2.0251201826842347, "learning_rate": 7.30952380952381e-06, "loss": 0.2115, "step": 307 }, { "epoch": 0.02, "grad_norm": 1.8815559512328361, "learning_rate": 7.333333333333333e-06, "loss": 0.2182, "step": 308 }, { "epoch": 0.02, "grad_norm": 1.9393022963495439, "learning_rate": 7.357142857142858e-06, "loss": 0.2492, "step": 309 }, { "epoch": 0.02, "grad_norm": 1.8774064763511955, "learning_rate": 7.380952380952382e-06, "loss": 0.2701, "step": 310 }, { "epoch": 0.02, "grad_norm": 1.7410433659647047, "learning_rate": 7.404761904761906e-06, "loss": 0.228, "step": 311 }, { "epoch": 0.02, "grad_norm": 1.8218936762193778, "learning_rate": 7.428571428571429e-06, "loss": 0.2339, "step": 312 }, { "epoch": 0.02, "grad_norm": 1.7743420188901728, "learning_rate": 7.4523809523809534e-06, "loss": 0.2082, "step": 313 }, { "epoch": 0.02, "grad_norm": 1.9498876741358502, "learning_rate": 7.476190476190477e-06, "loss": 0.2275, "step": 314 }, { "epoch": 0.02, "grad_norm": 1.8340673029230719, "learning_rate": 7.500000000000001e-06, "loss": 0.1732, "step": 315 }, { "epoch": 0.02, "grad_norm": 1.7993362037784466, "learning_rate": 7.523809523809524e-06, "loss": 0.2544, "step": 316 }, { "epoch": 0.02, "grad_norm": 7.589264421487482, "learning_rate": 7.547619047619048e-06, "loss": 0.6711, "step": 317 }, { "epoch": 0.02, "grad_norm": 1.918163110200814, "learning_rate": 7.571428571428572e-06, "loss": 0.253, "step": 318 }, { "epoch": 0.02, "grad_norm": 1.794946759088491, "learning_rate": 7.595238095238095e-06, "loss": 0.2078, "step": 319 }, { "epoch": 0.02, "grad_norm": 1.8650702974456617, "learning_rate": 7.61904761904762e-06, "loss": 0.2106, "step": 320 }, { "epoch": 0.02, "grad_norm": 2.074954415910446, "learning_rate": 7.642857142857143e-06, "loss": 0.2561, "step": 321 }, { "epoch": 0.02, "grad_norm": 1.7041227480686176, "learning_rate": 7.666666666666667e-06, "loss": 0.2088, "step": 322 }, { "epoch": 0.02, "grad_norm": 1.7710638920386534, "learning_rate": 7.690476190476191e-06, "loss": 0.2193, "step": 323 }, { "epoch": 0.02, "grad_norm": 1.776195977604689, "learning_rate": 7.714285714285716e-06, "loss": 0.2069, "step": 324 }, { "epoch": 0.02, "grad_norm": 6.333406501579369, "learning_rate": 7.738095238095238e-06, "loss": 0.6174, "step": 325 }, { "epoch": 0.02, "grad_norm": 1.7606158122625184, "learning_rate": 7.761904761904762e-06, "loss": 0.1681, "step": 326 }, { "epoch": 0.02, "grad_norm": 1.8052289128026775, "learning_rate": 7.785714285714287e-06, "loss": 0.2403, "step": 327 }, { "epoch": 0.02, "grad_norm": 1.7741816428620443, "learning_rate": 7.809523809523811e-06, "loss": 0.2368, "step": 328 }, { "epoch": 0.02, "grad_norm": 1.8610983125290597, "learning_rate": 7.833333333333333e-06, "loss": 0.2245, "step": 329 }, { "epoch": 0.02, "grad_norm": 2.04955463527288, "learning_rate": 7.857142857142858e-06, "loss": 0.2186, "step": 330 }, { "epoch": 0.02, "grad_norm": 2.0167244128934745, "learning_rate": 7.880952380952382e-06, "loss": 0.297, "step": 331 }, { "epoch": 0.02, "grad_norm": 1.8044653628407887, "learning_rate": 7.904761904761904e-06, "loss": 0.2556, "step": 332 }, { "epoch": 0.02, "grad_norm": 5.928061261332684, "learning_rate": 7.928571428571429e-06, "loss": 0.5937, "step": 333 }, { "epoch": 0.02, "grad_norm": 1.9778530731030248, "learning_rate": 7.952380952380953e-06, "loss": 0.2515, "step": 334 }, { "epoch": 0.02, "grad_norm": 2.32571490301034, "learning_rate": 7.976190476190477e-06, "loss": 0.2326, "step": 335 }, { "epoch": 0.02, "grad_norm": 2.001011449389012, "learning_rate": 8.000000000000001e-06, "loss": 0.2387, "step": 336 }, { "epoch": 0.02, "grad_norm": 1.8572135430582049, "learning_rate": 8.023809523809526e-06, "loss": 0.2426, "step": 337 }, { "epoch": 0.02, "grad_norm": 1.9183159668971745, "learning_rate": 8.047619047619048e-06, "loss": 0.2463, "step": 338 }, { "epoch": 0.02, "grad_norm": 1.786747172402393, "learning_rate": 8.071428571428572e-06, "loss": 0.2279, "step": 339 }, { "epoch": 0.02, "grad_norm": 1.803676540185769, "learning_rate": 8.095238095238097e-06, "loss": 0.2308, "step": 340 }, { "epoch": 0.02, "grad_norm": 1.7753751618746538, "learning_rate": 8.119047619047619e-06, "loss": 0.2113, "step": 341 }, { "epoch": 0.02, "grad_norm": 1.8864489687496189, "learning_rate": 8.142857142857143e-06, "loss": 0.2336, "step": 342 }, { "epoch": 0.02, "grad_norm": 1.9525613516439526, "learning_rate": 8.166666666666668e-06, "loss": 0.2354, "step": 343 }, { "epoch": 0.02, "grad_norm": 1.584175122823716, "learning_rate": 8.190476190476192e-06, "loss": 0.2305, "step": 344 }, { "epoch": 0.02, "grad_norm": 1.5747505364884826, "learning_rate": 8.214285714285714e-06, "loss": 0.1818, "step": 345 }, { "epoch": 0.02, "grad_norm": 1.6505214720130668, "learning_rate": 8.238095238095239e-06, "loss": 0.1847, "step": 346 }, { "epoch": 0.02, "grad_norm": 1.7565908218840163, "learning_rate": 8.261904761904763e-06, "loss": 0.2054, "step": 347 }, { "epoch": 0.02, "grad_norm": 1.9977989317234695, "learning_rate": 8.285714285714287e-06, "loss": 0.2662, "step": 348 }, { "epoch": 0.02, "grad_norm": 2.0304709951751736, "learning_rate": 8.309523809523811e-06, "loss": 0.2218, "step": 349 }, { "epoch": 0.03, "grad_norm": 2.003160810603304, "learning_rate": 8.333333333333334e-06, "loss": 0.2745, "step": 350 }, { "epoch": 0.03, "grad_norm": 1.7425848846524117, "learning_rate": 8.357142857142858e-06, "loss": 0.1893, "step": 351 }, { "epoch": 0.03, "grad_norm": 1.838533165666302, "learning_rate": 8.380952380952382e-06, "loss": 0.2607, "step": 352 }, { "epoch": 0.03, "grad_norm": 1.7917766172163547, "learning_rate": 8.404761904761905e-06, "loss": 0.1962, "step": 353 }, { "epoch": 0.03, "grad_norm": 1.7148094981523583, "learning_rate": 8.428571428571429e-06, "loss": 0.1825, "step": 354 }, { "epoch": 0.03, "grad_norm": 1.891884751231864, "learning_rate": 8.452380952380953e-06, "loss": 0.1998, "step": 355 }, { "epoch": 0.03, "grad_norm": 1.8160875367492477, "learning_rate": 8.476190476190477e-06, "loss": 0.1936, "step": 356 }, { "epoch": 0.03, "grad_norm": 1.770843235882012, "learning_rate": 8.5e-06, "loss": 0.2507, "step": 357 }, { "epoch": 0.03, "grad_norm": 1.9045797791842085, "learning_rate": 8.523809523809524e-06, "loss": 0.2504, "step": 358 }, { "epoch": 0.03, "grad_norm": 1.9437448543812206, "learning_rate": 8.547619047619048e-06, "loss": 0.2439, "step": 359 }, { "epoch": 0.03, "grad_norm": 1.7558706974958636, "learning_rate": 8.571428571428571e-06, "loss": 0.2112, "step": 360 }, { "epoch": 0.03, "grad_norm": 1.8816222506716003, "learning_rate": 8.595238095238097e-06, "loss": 0.219, "step": 361 }, { "epoch": 0.03, "grad_norm": 1.8973290498259552, "learning_rate": 8.61904761904762e-06, "loss": 0.2357, "step": 362 }, { "epoch": 0.03, "grad_norm": 1.8179949121328882, "learning_rate": 8.642857142857144e-06, "loss": 0.2593, "step": 363 }, { "epoch": 0.03, "grad_norm": 1.9645533382791447, "learning_rate": 8.666666666666668e-06, "loss": 0.2791, "step": 364 }, { "epoch": 0.03, "grad_norm": 32.72426687805572, "learning_rate": 8.690476190476192e-06, "loss": 0.2297, "step": 365 }, { "epoch": 0.03, "grad_norm": 4.745976022432055, "learning_rate": 8.714285714285715e-06, "loss": 0.6184, "step": 366 }, { "epoch": 0.03, "grad_norm": 6.63107336379878, "learning_rate": 8.738095238095239e-06, "loss": 0.6297, "step": 367 }, { "epoch": 0.03, "grad_norm": 1.7141587525428186, "learning_rate": 8.761904761904763e-06, "loss": 0.2092, "step": 368 }, { "epoch": 0.03, "grad_norm": 1.7550000113092061, "learning_rate": 8.785714285714286e-06, "loss": 0.2486, "step": 369 }, { "epoch": 0.03, "grad_norm": 1.737687192164475, "learning_rate": 8.80952380952381e-06, "loss": 0.2032, "step": 370 }, { "epoch": 0.03, "grad_norm": 1.7101112507842706, "learning_rate": 8.833333333333334e-06, "loss": 0.2235, "step": 371 }, { "epoch": 0.03, "grad_norm": 5.7737299917561105, "learning_rate": 8.857142857142858e-06, "loss": 0.8163, "step": 372 }, { "epoch": 0.03, "grad_norm": 1.846462983341253, "learning_rate": 8.88095238095238e-06, "loss": 0.2444, "step": 373 }, { "epoch": 0.03, "grad_norm": 1.8109692510286255, "learning_rate": 8.904761904761905e-06, "loss": 0.2336, "step": 374 }, { "epoch": 0.03, "grad_norm": 1.9405563274944506, "learning_rate": 8.92857142857143e-06, "loss": 0.2432, "step": 375 }, { "epoch": 0.03, "grad_norm": 1.9179483007281288, "learning_rate": 8.952380952380953e-06, "loss": 0.233, "step": 376 }, { "epoch": 0.03, "grad_norm": 1.857294871080733, "learning_rate": 8.976190476190478e-06, "loss": 0.239, "step": 377 }, { "epoch": 0.03, "grad_norm": 1.647348351590959, "learning_rate": 9e-06, "loss": 0.2399, "step": 378 }, { "epoch": 0.03, "grad_norm": 2.094151279704169, "learning_rate": 9.023809523809524e-06, "loss": 0.2148, "step": 379 }, { "epoch": 0.03, "grad_norm": 6.372210893251924, "learning_rate": 9.047619047619049e-06, "loss": 0.6163, "step": 380 }, { "epoch": 0.03, "grad_norm": 1.775630431961017, "learning_rate": 9.071428571428573e-06, "loss": 0.2868, "step": 381 }, { "epoch": 0.03, "grad_norm": 1.9282720686813273, "learning_rate": 9.095238095238095e-06, "loss": 0.2492, "step": 382 }, { "epoch": 0.03, "grad_norm": 1.7765017512034897, "learning_rate": 9.11904761904762e-06, "loss": 0.2432, "step": 383 }, { "epoch": 0.03, "grad_norm": 1.839053609571583, "learning_rate": 9.142857142857144e-06, "loss": 0.2602, "step": 384 }, { "epoch": 0.03, "grad_norm": 1.630754028221605, "learning_rate": 9.166666666666666e-06, "loss": 0.1926, "step": 385 }, { "epoch": 0.03, "grad_norm": 1.9099510546191294, "learning_rate": 9.19047619047619e-06, "loss": 0.1862, "step": 386 }, { "epoch": 0.03, "grad_norm": 1.9361974835701143, "learning_rate": 9.214285714285715e-06, "loss": 0.2253, "step": 387 }, { "epoch": 0.03, "grad_norm": 1.9048612694275633, "learning_rate": 9.238095238095239e-06, "loss": 0.2587, "step": 388 }, { "epoch": 0.03, "grad_norm": 1.9553163714996091, "learning_rate": 9.261904761904763e-06, "loss": 0.2598, "step": 389 }, { "epoch": 0.03, "grad_norm": 1.933166728189562, "learning_rate": 9.285714285714288e-06, "loss": 0.2263, "step": 390 }, { "epoch": 0.03, "grad_norm": 2.031038888645553, "learning_rate": 9.30952380952381e-06, "loss": 0.2761, "step": 391 }, { "epoch": 0.03, "grad_norm": 1.9692502082507126, "learning_rate": 9.333333333333334e-06, "loss": 0.2266, "step": 392 }, { "epoch": 0.03, "grad_norm": 1.9290804306393872, "learning_rate": 9.357142857142859e-06, "loss": 0.1872, "step": 393 }, { "epoch": 0.03, "grad_norm": 1.691436975844405, "learning_rate": 9.380952380952381e-06, "loss": 0.2216, "step": 394 }, { "epoch": 0.03, "grad_norm": 1.9862039722299842, "learning_rate": 9.404761904761905e-06, "loss": 0.2296, "step": 395 }, { "epoch": 0.03, "grad_norm": 1.6838464536412863, "learning_rate": 9.42857142857143e-06, "loss": 0.2287, "step": 396 }, { "epoch": 0.03, "grad_norm": 1.664115811248073, "learning_rate": 9.452380952380952e-06, "loss": 0.2046, "step": 397 }, { "epoch": 0.03, "grad_norm": 1.7851447950299109, "learning_rate": 9.476190476190476e-06, "loss": 0.1923, "step": 398 }, { "epoch": 0.03, "grad_norm": 6.267739813716185, "learning_rate": 9.5e-06, "loss": 0.6433, "step": 399 }, { "epoch": 0.03, "grad_norm": 1.7788691572222957, "learning_rate": 9.523809523809525e-06, "loss": 0.2051, "step": 400 }, { "epoch": 0.03, "grad_norm": 1.9296463534345818, "learning_rate": 9.547619047619049e-06, "loss": 0.2333, "step": 401 }, { "epoch": 0.03, "grad_norm": 1.8142126861811512, "learning_rate": 9.571428571428573e-06, "loss": 0.2505, "step": 402 }, { "epoch": 0.03, "grad_norm": 1.9692530937233412, "learning_rate": 9.595238095238096e-06, "loss": 0.2093, "step": 403 }, { "epoch": 0.03, "grad_norm": 1.856043587537392, "learning_rate": 9.61904761904762e-06, "loss": 0.2635, "step": 404 }, { "epoch": 0.03, "grad_norm": 5.94443034052922, "learning_rate": 9.642857142857144e-06, "loss": 0.861, "step": 405 }, { "epoch": 0.03, "grad_norm": 1.7327783967262806, "learning_rate": 9.666666666666667e-06, "loss": 0.2482, "step": 406 }, { "epoch": 0.03, "grad_norm": 1.8315765464465676, "learning_rate": 9.690476190476191e-06, "loss": 0.2289, "step": 407 }, { "epoch": 0.03, "grad_norm": 2.112221522236389, "learning_rate": 9.714285714285715e-06, "loss": 0.2705, "step": 408 }, { "epoch": 0.03, "grad_norm": 1.9799258573051932, "learning_rate": 9.73809523809524e-06, "loss": 0.231, "step": 409 }, { "epoch": 0.03, "grad_norm": 1.7311808292751452, "learning_rate": 9.761904761904762e-06, "loss": 0.2559, "step": 410 }, { "epoch": 0.03, "grad_norm": 1.8502205815431971, "learning_rate": 9.785714285714286e-06, "loss": 0.212, "step": 411 }, { "epoch": 0.03, "grad_norm": 1.738527229864594, "learning_rate": 9.80952380952381e-06, "loss": 0.2662, "step": 412 }, { "epoch": 0.03, "grad_norm": 1.6655245824942482, "learning_rate": 9.833333333333333e-06, "loss": 0.1926, "step": 413 }, { "epoch": 0.03, "grad_norm": 1.552187474251555, "learning_rate": 9.857142857142859e-06, "loss": 0.209, "step": 414 }, { "epoch": 0.03, "grad_norm": 1.5768048771715486, "learning_rate": 9.880952380952381e-06, "loss": 0.2348, "step": 415 }, { "epoch": 0.03, "grad_norm": 1.6964878116119368, "learning_rate": 9.904761904761906e-06, "loss": 0.2021, "step": 416 }, { "epoch": 0.03, "grad_norm": 2.0526863748044817, "learning_rate": 9.92857142857143e-06, "loss": 0.2305, "step": 417 }, { "epoch": 0.03, "grad_norm": 1.787547142632327, "learning_rate": 9.952380952380954e-06, "loss": 0.2533, "step": 418 }, { "epoch": 0.03, "grad_norm": 1.9419923735530755, "learning_rate": 9.976190476190477e-06, "loss": 0.2618, "step": 419 }, { "epoch": 0.03, "grad_norm": 2.1373191265477978, "learning_rate": 1e-05, "loss": 0.2711, "step": 420 }, { "epoch": 0.03, "grad_norm": 1.9310276169897496, "learning_rate": 9.999999865809933e-06, "loss": 0.2946, "step": 421 }, { "epoch": 0.03, "grad_norm": 1.7379387372744968, "learning_rate": 9.999999463239735e-06, "loss": 0.198, "step": 422 }, { "epoch": 0.03, "grad_norm": 1.8245445172166241, "learning_rate": 9.999998792289432e-06, "loss": 0.2517, "step": 423 }, { "epoch": 0.03, "grad_norm": 1.822001276248104, "learning_rate": 9.999997852959055e-06, "loss": 0.2808, "step": 424 }, { "epoch": 0.03, "grad_norm": 1.9205509692763958, "learning_rate": 9.99999664524866e-06, "loss": 0.3423, "step": 425 }, { "epoch": 0.03, "grad_norm": 1.7888240575558378, "learning_rate": 9.999995169158308e-06, "loss": 0.2541, "step": 426 }, { "epoch": 0.03, "grad_norm": 1.7286963649374087, "learning_rate": 9.99999342468808e-06, "loss": 0.2232, "step": 427 }, { "epoch": 0.03, "grad_norm": 1.780722086503442, "learning_rate": 9.999991411838067e-06, "loss": 0.1884, "step": 428 }, { "epoch": 0.03, "grad_norm": 1.498913104958229, "learning_rate": 9.999989130608382e-06, "loss": 0.2023, "step": 429 }, { "epoch": 0.03, "grad_norm": 1.9628150553675676, "learning_rate": 9.99998658099914e-06, "loss": 0.213, "step": 430 }, { "epoch": 0.03, "grad_norm": 3.94943086884824, "learning_rate": 9.999983763010485e-06, "loss": 0.6611, "step": 431 }, { "epoch": 0.03, "grad_norm": 1.8853688764627523, "learning_rate": 9.999980676642564e-06, "loss": 0.2968, "step": 432 }, { "epoch": 0.03, "grad_norm": 8.231855994331173, "learning_rate": 9.999977321895547e-06, "loss": 0.5226, "step": 433 }, { "epoch": 0.03, "grad_norm": 1.540912893938073, "learning_rate": 9.999973698769608e-06, "loss": 0.1505, "step": 434 }, { "epoch": 0.03, "grad_norm": 5.971543961049707, "learning_rate": 9.999969807264946e-06, "loss": 0.758, "step": 435 }, { "epoch": 0.03, "grad_norm": 2.0221156004050185, "learning_rate": 9.99996564738177e-06, "loss": 0.2295, "step": 436 }, { "epoch": 0.03, "grad_norm": 1.8469466566406818, "learning_rate": 9.9999612191203e-06, "loss": 0.2348, "step": 437 }, { "epoch": 0.03, "grad_norm": 1.7898127914561228, "learning_rate": 9.999956522480776e-06, "loss": 0.2135, "step": 438 }, { "epoch": 0.03, "grad_norm": 1.795620819692547, "learning_rate": 9.99995155746345e-06, "loss": 0.233, "step": 439 }, { "epoch": 0.03, "grad_norm": 1.7248384283309917, "learning_rate": 9.999946324068588e-06, "loss": 0.2332, "step": 440 }, { "epoch": 0.03, "grad_norm": 1.6883591048498006, "learning_rate": 9.99994082229647e-06, "loss": 0.2564, "step": 441 }, { "epoch": 0.03, "grad_norm": 1.7302591438345385, "learning_rate": 9.999935052147393e-06, "loss": 0.2302, "step": 442 }, { "epoch": 0.03, "grad_norm": 8.544737991200359, "learning_rate": 9.999929013621667e-06, "loss": 0.6525, "step": 443 }, { "epoch": 0.03, "grad_norm": 5.71751612570369, "learning_rate": 9.999922706719614e-06, "loss": 0.5309, "step": 444 }, { "epoch": 0.03, "grad_norm": 1.6018585595110737, "learning_rate": 9.999916131441574e-06, "loss": 0.1775, "step": 445 }, { "epoch": 0.03, "grad_norm": 1.6255304380107483, "learning_rate": 9.9999092877879e-06, "loss": 0.2079, "step": 446 }, { "epoch": 0.03, "grad_norm": 1.8395138268286515, "learning_rate": 9.99990217575896e-06, "loss": 0.2247, "step": 447 }, { "epoch": 0.03, "grad_norm": 8.203220014861547, "learning_rate": 9.999894795355133e-06, "loss": 0.9359, "step": 448 }, { "epoch": 0.03, "grad_norm": 6.091457747147549, "learning_rate": 9.999887146576817e-06, "loss": 0.5447, "step": 449 }, { "epoch": 0.03, "grad_norm": 1.9360642516162143, "learning_rate": 9.999879229424423e-06, "loss": 0.2391, "step": 450 }, { "epoch": 0.03, "grad_norm": 1.6517621913320264, "learning_rate": 9.999871043898377e-06, "loss": 0.2192, "step": 451 }, { "epoch": 0.03, "grad_norm": 1.7488625226819485, "learning_rate": 9.999862589999114e-06, "loss": 0.2259, "step": 452 }, { "epoch": 0.03, "grad_norm": 1.7034760495762693, "learning_rate": 9.999853867727092e-06, "loss": 0.3037, "step": 453 }, { "epoch": 0.03, "grad_norm": 6.996622442113789, "learning_rate": 9.999844877082778e-06, "loss": 0.6067, "step": 454 }, { "epoch": 0.03, "grad_norm": 1.845993104016002, "learning_rate": 9.999835618066654e-06, "loss": 0.2983, "step": 455 }, { "epoch": 0.03, "grad_norm": 1.4696482241127788, "learning_rate": 9.999826090679217e-06, "loss": 0.1791, "step": 456 }, { "epoch": 0.03, "grad_norm": 1.6490012695861709, "learning_rate": 9.99981629492098e-06, "loss": 0.2605, "step": 457 }, { "epoch": 0.03, "grad_norm": 8.147175952765007, "learning_rate": 9.999806230792467e-06, "loss": 0.6311, "step": 458 }, { "epoch": 0.03, "grad_norm": 1.5997205298571935, "learning_rate": 9.999795898294217e-06, "loss": 0.2484, "step": 459 }, { "epoch": 0.03, "grad_norm": 1.6563332814372878, "learning_rate": 9.999785297426788e-06, "loss": 0.2163, "step": 460 }, { "epoch": 0.03, "grad_norm": 1.604871893843957, "learning_rate": 9.999774428190748e-06, "loss": 0.2192, "step": 461 }, { "epoch": 0.03, "grad_norm": 1.8108955797125175, "learning_rate": 9.999763290586678e-06, "loss": 0.2634, "step": 462 }, { "epoch": 0.03, "grad_norm": 1.8090156663520274, "learning_rate": 9.999751884615179e-06, "loss": 0.2843, "step": 463 }, { "epoch": 0.03, "grad_norm": 1.712980821493254, "learning_rate": 9.99974021027686e-06, "loss": 0.2689, "step": 464 }, { "epoch": 0.03, "grad_norm": 1.802867706232354, "learning_rate": 9.99972826757235e-06, "loss": 0.2138, "step": 465 }, { "epoch": 0.03, "grad_norm": 1.7272535778282685, "learning_rate": 9.99971605650229e-06, "loss": 0.1555, "step": 466 }, { "epoch": 0.03, "grad_norm": 1.809783852560339, "learning_rate": 9.999703577067335e-06, "loss": 0.2269, "step": 467 }, { "epoch": 0.03, "grad_norm": 2.375224264584951, "learning_rate": 9.999690829268154e-06, "loss": 0.2804, "step": 468 }, { "epoch": 0.03, "grad_norm": 1.741397257334788, "learning_rate": 9.999677813105433e-06, "loss": 0.2364, "step": 469 }, { "epoch": 0.03, "grad_norm": 1.7484815009682, "learning_rate": 9.999664528579869e-06, "loss": 0.203, "step": 470 }, { "epoch": 0.03, "grad_norm": 1.7966171894175609, "learning_rate": 9.999650975692177e-06, "loss": 0.2711, "step": 471 }, { "epoch": 0.03, "grad_norm": 1.8129529815924725, "learning_rate": 9.999637154443081e-06, "loss": 0.2353, "step": 472 }, { "epoch": 0.03, "grad_norm": 1.758591110114017, "learning_rate": 9.999623064833326e-06, "loss": 0.2811, "step": 473 }, { "epoch": 0.03, "grad_norm": 1.5950412617261114, "learning_rate": 9.999608706863669e-06, "loss": 0.2259, "step": 474 }, { "epoch": 0.03, "grad_norm": 1.7918947169648147, "learning_rate": 9.999594080534876e-06, "loss": 0.2721, "step": 475 }, { "epoch": 0.03, "grad_norm": 1.5977561127411257, "learning_rate": 9.999579185847737e-06, "loss": 0.1673, "step": 476 }, { "epoch": 0.03, "grad_norm": 1.9241096196453091, "learning_rate": 9.999564022803048e-06, "loss": 0.3209, "step": 477 }, { "epoch": 0.03, "grad_norm": 1.5925113761025154, "learning_rate": 9.999548591401627e-06, "loss": 0.2161, "step": 478 }, { "epoch": 0.03, "grad_norm": 1.6444947157558782, "learning_rate": 9.999532891644298e-06, "loss": 0.2278, "step": 479 }, { "epoch": 0.03, "grad_norm": 1.6299073963546644, "learning_rate": 9.999516923531906e-06, "loss": 0.2053, "step": 480 }, { "epoch": 0.03, "grad_norm": 1.766912757144763, "learning_rate": 9.999500687065306e-06, "loss": 0.2867, "step": 481 }, { "epoch": 0.03, "grad_norm": 1.6028071422876382, "learning_rate": 9.999484182245374e-06, "loss": 0.2154, "step": 482 }, { "epoch": 0.03, "grad_norm": 7.341953397901124, "learning_rate": 9.999467409072991e-06, "loss": 0.5632, "step": 483 }, { "epoch": 0.03, "grad_norm": 1.5975418883221923, "learning_rate": 9.99945036754906e-06, "loss": 0.2249, "step": 484 }, { "epoch": 0.03, "grad_norm": 1.725171549076565, "learning_rate": 9.999433057674495e-06, "loss": 0.2238, "step": 485 }, { "epoch": 0.03, "grad_norm": 1.7095499925260276, "learning_rate": 9.999415479450225e-06, "loss": 0.2416, "step": 486 }, { "epoch": 0.03, "grad_norm": 1.6737099249111227, "learning_rate": 9.999397632877193e-06, "loss": 0.1997, "step": 487 }, { "epoch": 0.03, "grad_norm": 1.828428651517357, "learning_rate": 9.999379517956358e-06, "loss": 0.2025, "step": 488 }, { "epoch": 0.03, "grad_norm": 1.672292412747221, "learning_rate": 9.999361134688694e-06, "loss": 0.2372, "step": 489 }, { "epoch": 0.04, "grad_norm": 1.8226958423104032, "learning_rate": 9.999342483075183e-06, "loss": 0.2134, "step": 490 }, { "epoch": 0.04, "grad_norm": 1.6337115996049818, "learning_rate": 9.99932356311683e-06, "loss": 0.2278, "step": 491 }, { "epoch": 0.04, "grad_norm": 1.8136645780795486, "learning_rate": 9.99930437481465e-06, "loss": 0.231, "step": 492 }, { "epoch": 0.04, "grad_norm": 1.6860539122480611, "learning_rate": 9.999284918169672e-06, "loss": 0.2384, "step": 493 }, { "epoch": 0.04, "grad_norm": 1.9109072690344266, "learning_rate": 9.999265193182938e-06, "loss": 0.2441, "step": 494 }, { "epoch": 0.04, "grad_norm": 1.6642202134097952, "learning_rate": 9.999245199855514e-06, "loss": 0.1918, "step": 495 }, { "epoch": 0.04, "grad_norm": 1.7254583544187543, "learning_rate": 9.999224938188466e-06, "loss": 0.2548, "step": 496 }, { "epoch": 0.04, "grad_norm": 1.825079509601027, "learning_rate": 9.999204408182884e-06, "loss": 0.1956, "step": 497 }, { "epoch": 0.04, "grad_norm": 1.6797037764194405, "learning_rate": 9.99918360983987e-06, "loss": 0.1966, "step": 498 }, { "epoch": 0.04, "grad_norm": 1.8948119709599023, "learning_rate": 9.999162543160542e-06, "loss": 0.2585, "step": 499 }, { "epoch": 0.04, "grad_norm": 1.6886961037869472, "learning_rate": 9.999141208146029e-06, "loss": 0.2594, "step": 500 }, { "epoch": 0.04, "grad_norm": 1.7531131698865352, "learning_rate": 9.999119604797475e-06, "loss": 0.2305, "step": 501 }, { "epoch": 0.04, "grad_norm": 1.785064566488959, "learning_rate": 9.999097733116042e-06, "loss": 0.1817, "step": 502 }, { "epoch": 0.04, "grad_norm": 1.7117784327730716, "learning_rate": 9.999075593102902e-06, "loss": 0.2505, "step": 503 }, { "epoch": 0.04, "grad_norm": 1.5942958873251942, "learning_rate": 9.999053184759247e-06, "loss": 0.2217, "step": 504 }, { "epoch": 0.04, "grad_norm": 1.6393439115054835, "learning_rate": 9.999030508086277e-06, "loss": 0.2063, "step": 505 }, { "epoch": 0.04, "grad_norm": 1.8830344378648498, "learning_rate": 9.999007563085205e-06, "loss": 0.2238, "step": 506 }, { "epoch": 0.04, "grad_norm": 1.6108881327048417, "learning_rate": 9.998984349757273e-06, "loss": 0.217, "step": 507 }, { "epoch": 0.04, "grad_norm": 1.801066230572164, "learning_rate": 9.998960868103718e-06, "loss": 0.2626, "step": 508 }, { "epoch": 0.04, "grad_norm": 6.199414809428491, "learning_rate": 9.998937118125802e-06, "loss": 0.5115, "step": 509 }, { "epoch": 0.04, "grad_norm": 1.5682770747611556, "learning_rate": 9.998913099824804e-06, "loss": 0.2126, "step": 510 }, { "epoch": 0.04, "grad_norm": 1.875160179744944, "learning_rate": 9.998888813202008e-06, "loss": 0.2203, "step": 511 }, { "epoch": 0.04, "grad_norm": 6.035463424478484, "learning_rate": 9.998864258258723e-06, "loss": 0.8033, "step": 512 }, { "epoch": 0.04, "grad_norm": 1.568685121215722, "learning_rate": 9.998839434996262e-06, "loss": 0.2246, "step": 513 }, { "epoch": 0.04, "grad_norm": 1.5569014534674326, "learning_rate": 9.99881434341596e-06, "loss": 0.19, "step": 514 }, { "epoch": 0.04, "grad_norm": 1.9197100667105194, "learning_rate": 9.998788983519163e-06, "loss": 0.2458, "step": 515 }, { "epoch": 0.04, "grad_norm": 1.5768354593340916, "learning_rate": 9.998763355307232e-06, "loss": 0.2166, "step": 516 }, { "epoch": 0.04, "grad_norm": 1.6643234251109573, "learning_rate": 9.998737458781543e-06, "loss": 0.2073, "step": 517 }, { "epoch": 0.04, "grad_norm": 6.685017082410479, "learning_rate": 9.998711293943487e-06, "loss": 0.5965, "step": 518 }, { "epoch": 0.04, "grad_norm": 1.7205352288266864, "learning_rate": 9.998684860794467e-06, "loss": 0.2376, "step": 519 }, { "epoch": 0.04, "grad_norm": 1.6077792074408113, "learning_rate": 9.998658159335903e-06, "loss": 0.2293, "step": 520 }, { "epoch": 0.04, "grad_norm": 1.8361190668365253, "learning_rate": 9.998631189569227e-06, "loss": 0.2702, "step": 521 }, { "epoch": 0.04, "grad_norm": 1.7618060906306372, "learning_rate": 9.998603951495889e-06, "loss": 0.2485, "step": 522 }, { "epoch": 0.04, "grad_norm": 1.4370266035532995, "learning_rate": 9.998576445117347e-06, "loss": 0.2043, "step": 523 }, { "epoch": 0.04, "grad_norm": 5.729653395178178, "learning_rate": 9.998548670435083e-06, "loss": 0.7933, "step": 524 }, { "epoch": 0.04, "grad_norm": 1.735281723583391, "learning_rate": 9.998520627450581e-06, "loss": 0.2206, "step": 525 }, { "epoch": 0.04, "grad_norm": 5.4277089511375, "learning_rate": 9.998492316165352e-06, "loss": 0.7456, "step": 526 }, { "epoch": 0.04, "grad_norm": 1.7131942904930606, "learning_rate": 9.998463736580911e-06, "loss": 0.2047, "step": 527 }, { "epoch": 0.04, "grad_norm": 1.5834872257368824, "learning_rate": 9.998434888698796e-06, "loss": 0.2193, "step": 528 }, { "epoch": 0.04, "grad_norm": 1.9159050691937483, "learning_rate": 9.998405772520554e-06, "loss": 0.2162, "step": 529 }, { "epoch": 0.04, "grad_norm": 1.6620470306872097, "learning_rate": 9.998376388047748e-06, "loss": 0.2658, "step": 530 }, { "epoch": 0.04, "grad_norm": 1.8166883799793274, "learning_rate": 9.998346735281954e-06, "loss": 0.2654, "step": 531 }, { "epoch": 0.04, "grad_norm": 8.765199078359851, "learning_rate": 9.998316814224765e-06, "loss": 0.6544, "step": 532 }, { "epoch": 0.04, "grad_norm": 1.7793716730033806, "learning_rate": 9.998286624877786e-06, "loss": 0.2085, "step": 533 }, { "epoch": 0.04, "grad_norm": 1.7968453636194168, "learning_rate": 9.99825616724264e-06, "loss": 0.2744, "step": 534 }, { "epoch": 0.04, "grad_norm": 1.601402270695304, "learning_rate": 9.998225441320959e-06, "loss": 0.276, "step": 535 }, { "epoch": 0.04, "grad_norm": 1.7769052610542935, "learning_rate": 9.998194447114394e-06, "loss": 0.2315, "step": 536 }, { "epoch": 0.04, "grad_norm": 1.635524613892197, "learning_rate": 9.998163184624606e-06, "loss": 0.2379, "step": 537 }, { "epoch": 0.04, "grad_norm": 1.6589381231401419, "learning_rate": 9.998131653853275e-06, "loss": 0.3002, "step": 538 }, { "epoch": 0.04, "grad_norm": 1.6155176067494363, "learning_rate": 9.998099854802095e-06, "loss": 0.2093, "step": 539 }, { "epoch": 0.04, "grad_norm": 1.7763240732316072, "learning_rate": 9.998067787472772e-06, "loss": 0.241, "step": 540 }, { "epoch": 0.04, "grad_norm": 1.8068331023253885, "learning_rate": 9.998035451867023e-06, "loss": 0.2371, "step": 541 }, { "epoch": 0.04, "grad_norm": 1.4999739203668576, "learning_rate": 9.99800284798659e-06, "loss": 0.2645, "step": 542 }, { "epoch": 0.04, "grad_norm": 1.7684112199228414, "learning_rate": 9.99796997583322e-06, "loss": 0.2248, "step": 543 }, { "epoch": 0.04, "grad_norm": 1.7109024922463631, "learning_rate": 9.997936835408675e-06, "loss": 0.2392, "step": 544 }, { "epoch": 0.04, "grad_norm": 1.5467644837186367, "learning_rate": 9.997903426714739e-06, "loss": 0.1684, "step": 545 }, { "epoch": 0.04, "grad_norm": 1.545073681962836, "learning_rate": 9.997869749753202e-06, "loss": 0.2637, "step": 546 }, { "epoch": 0.04, "grad_norm": 1.730639439800308, "learning_rate": 9.997835804525871e-06, "loss": 0.2764, "step": 547 }, { "epoch": 0.04, "grad_norm": 1.620402046346925, "learning_rate": 9.99780159103457e-06, "loss": 0.2087, "step": 548 }, { "epoch": 0.04, "grad_norm": 1.7907960508042755, "learning_rate": 9.997767109281136e-06, "loss": 0.2523, "step": 549 }, { "epoch": 0.04, "grad_norm": 1.8005056652493097, "learning_rate": 9.997732359267419e-06, "loss": 0.3039, "step": 550 }, { "epoch": 0.04, "grad_norm": 1.6639212189483445, "learning_rate": 9.99769734099528e-06, "loss": 0.2458, "step": 551 }, { "epoch": 0.04, "grad_norm": 1.5738592148956088, "learning_rate": 9.997662054466607e-06, "loss": 0.2707, "step": 552 }, { "epoch": 0.04, "grad_norm": 1.6967595197664154, "learning_rate": 9.997626499683289e-06, "loss": 0.2427, "step": 553 }, { "epoch": 0.04, "grad_norm": 1.7570547044433409, "learning_rate": 9.997590676647233e-06, "loss": 0.2612, "step": 554 }, { "epoch": 0.04, "grad_norm": 1.889790503370837, "learning_rate": 9.997554585360365e-06, "loss": 0.2475, "step": 555 }, { "epoch": 0.04, "grad_norm": 1.6774943962490119, "learning_rate": 9.997518225824621e-06, "loss": 0.2154, "step": 556 }, { "epoch": 0.04, "grad_norm": 1.8423758891610968, "learning_rate": 9.997481598041951e-06, "loss": 0.2919, "step": 557 }, { "epoch": 0.04, "grad_norm": 8.377229010501095, "learning_rate": 9.997444702014326e-06, "loss": 0.6379, "step": 558 }, { "epoch": 0.04, "grad_norm": 1.6616479182827921, "learning_rate": 9.997407537743721e-06, "loss": 0.2243, "step": 559 }, { "epoch": 0.04, "grad_norm": 1.9841749170287015, "learning_rate": 9.997370105232134e-06, "loss": 0.2444, "step": 560 }, { "epoch": 0.04, "grad_norm": 1.9656193443421748, "learning_rate": 9.997332404481572e-06, "loss": 0.2968, "step": 561 }, { "epoch": 0.04, "grad_norm": 1.471127362443591, "learning_rate": 9.99729443549406e-06, "loss": 0.2206, "step": 562 }, { "epoch": 0.04, "grad_norm": 1.5650716830935516, "learning_rate": 9.997256198271637e-06, "loss": 0.2013, "step": 563 }, { "epoch": 0.04, "grad_norm": 1.8484446399470826, "learning_rate": 9.997217692816355e-06, "loss": 0.2353, "step": 564 }, { "epoch": 0.04, "grad_norm": 1.5426262363806842, "learning_rate": 9.997178919130279e-06, "loss": 0.2006, "step": 565 }, { "epoch": 0.04, "grad_norm": 1.5623041578803294, "learning_rate": 9.997139877215492e-06, "loss": 0.2223, "step": 566 }, { "epoch": 0.04, "grad_norm": 1.803744305102567, "learning_rate": 9.997100567074087e-06, "loss": 0.2532, "step": 567 }, { "epoch": 0.04, "grad_norm": 1.9311619338752148, "learning_rate": 9.997060988708178e-06, "loss": 0.2448, "step": 568 }, { "epoch": 0.04, "grad_norm": 1.8669588569798805, "learning_rate": 9.997021142119886e-06, "loss": 0.2512, "step": 569 }, { "epoch": 0.04, "grad_norm": 1.7416672147329488, "learning_rate": 9.996981027311352e-06, "loss": 0.2265, "step": 570 }, { "epoch": 0.04, "grad_norm": 1.4749202237317114, "learning_rate": 9.996940644284729e-06, "loss": 0.1873, "step": 571 }, { "epoch": 0.04, "grad_norm": 1.5578129264883946, "learning_rate": 9.996899993042182e-06, "loss": 0.2042, "step": 572 }, { "epoch": 0.04, "grad_norm": 1.6221564193193698, "learning_rate": 9.996859073585898e-06, "loss": 0.2117, "step": 573 }, { "epoch": 0.04, "grad_norm": 1.678190107536808, "learning_rate": 9.996817885918069e-06, "loss": 0.2142, "step": 574 }, { "epoch": 0.04, "grad_norm": 11.29968171973042, "learning_rate": 9.996776430040906e-06, "loss": 0.6952, "step": 575 }, { "epoch": 0.04, "grad_norm": 1.4937877717830532, "learning_rate": 9.996734705956637e-06, "loss": 0.2218, "step": 576 }, { "epoch": 0.04, "grad_norm": 1.5677510808090842, "learning_rate": 9.996692713667498e-06, "loss": 0.2404, "step": 577 }, { "epoch": 0.04, "grad_norm": 1.6960950245606041, "learning_rate": 9.996650453175748e-06, "loss": 0.2073, "step": 578 }, { "epoch": 0.04, "grad_norm": 1.6029173822650022, "learning_rate": 9.99660792448365e-06, "loss": 0.2541, "step": 579 }, { "epoch": 0.04, "grad_norm": 1.6487313257896015, "learning_rate": 9.99656512759349e-06, "loss": 0.2577, "step": 580 }, { "epoch": 0.04, "grad_norm": 1.5680722873315158, "learning_rate": 9.996522062507564e-06, "loss": 0.204, "step": 581 }, { "epoch": 0.04, "grad_norm": 1.4884603105729501, "learning_rate": 9.996478729228182e-06, "loss": 0.228, "step": 582 }, { "epoch": 0.04, "grad_norm": 1.585192741789178, "learning_rate": 9.996435127757673e-06, "loss": 0.1913, "step": 583 }, { "epoch": 0.04, "grad_norm": 1.6580058137613347, "learning_rate": 9.996391258098376e-06, "loss": 0.2408, "step": 584 }, { "epoch": 0.04, "grad_norm": 6.2374068141697725, "learning_rate": 9.996347120252647e-06, "loss": 0.5011, "step": 585 }, { "epoch": 0.04, "grad_norm": 1.5613579859542268, "learning_rate": 9.996302714222853e-06, "loss": 0.2147, "step": 586 }, { "epoch": 0.04, "grad_norm": 1.6917879179396091, "learning_rate": 9.996258040011381e-06, "loss": 0.2214, "step": 587 }, { "epoch": 0.04, "grad_norm": 1.74731716420905, "learning_rate": 9.996213097620623e-06, "loss": 0.2718, "step": 588 }, { "epoch": 0.04, "grad_norm": 1.5626750637003448, "learning_rate": 9.996167887052997e-06, "loss": 0.23, "step": 589 }, { "epoch": 0.04, "grad_norm": 1.6178635707855193, "learning_rate": 9.996122408310927e-06, "loss": 0.2523, "step": 590 }, { "epoch": 0.04, "grad_norm": 2.0111752792353403, "learning_rate": 9.996076661396854e-06, "loss": 0.2675, "step": 591 }, { "epoch": 0.04, "grad_norm": 6.892157813613588, "learning_rate": 9.996030646313235e-06, "loss": 0.5959, "step": 592 }, { "epoch": 0.04, "grad_norm": 1.80468775181741, "learning_rate": 9.995984363062538e-06, "loss": 0.2857, "step": 593 }, { "epoch": 0.04, "grad_norm": 1.7772367829113869, "learning_rate": 9.995937811647251e-06, "loss": 0.2688, "step": 594 }, { "epoch": 0.04, "grad_norm": 1.581110070462405, "learning_rate": 9.995890992069868e-06, "loss": 0.2298, "step": 595 }, { "epoch": 0.04, "grad_norm": 1.5681972353664881, "learning_rate": 9.995843904332904e-06, "loss": 0.221, "step": 596 }, { "epoch": 0.04, "grad_norm": 1.6695363309663607, "learning_rate": 9.995796548438887e-06, "loss": 0.2676, "step": 597 }, { "epoch": 0.04, "grad_norm": 1.665844879938811, "learning_rate": 9.995748924390358e-06, "loss": 0.2166, "step": 598 }, { "epoch": 0.04, "grad_norm": 1.460510946143721, "learning_rate": 9.995701032189875e-06, "loss": 0.1801, "step": 599 }, { "epoch": 0.04, "grad_norm": 1.6493714929143417, "learning_rate": 9.995652871840006e-06, "loss": 0.2535, "step": 600 }, { "epoch": 0.04, "grad_norm": 1.7755574796736042, "learning_rate": 9.99560444334334e-06, "loss": 0.232, "step": 601 }, { "epoch": 0.04, "grad_norm": 7.124709629697863, "learning_rate": 9.995555746702472e-06, "loss": 0.5456, "step": 602 }, { "epoch": 0.04, "grad_norm": 1.8831853454619656, "learning_rate": 9.995506781920018e-06, "loss": 0.2046, "step": 603 }, { "epoch": 0.04, "grad_norm": 1.7654722033428032, "learning_rate": 9.995457548998606e-06, "loss": 0.1914, "step": 604 }, { "epoch": 0.04, "grad_norm": 1.4581466027526737, "learning_rate": 9.99540804794088e-06, "loss": 0.233, "step": 605 }, { "epoch": 0.04, "grad_norm": 1.6869813631526638, "learning_rate": 9.995358278749494e-06, "loss": 0.243, "step": 606 }, { "epoch": 0.04, "grad_norm": 1.5826420172912294, "learning_rate": 9.995308241427122e-06, "loss": 0.267, "step": 607 }, { "epoch": 0.04, "grad_norm": 1.4567906792683805, "learning_rate": 9.995257935976448e-06, "loss": 0.1924, "step": 608 }, { "epoch": 0.04, "grad_norm": 1.8330309990573586, "learning_rate": 9.995207362400175e-06, "loss": 0.2692, "step": 609 }, { "epoch": 0.04, "grad_norm": 1.5430968381143486, "learning_rate": 9.995156520701012e-06, "loss": 0.1932, "step": 610 }, { "epoch": 0.04, "grad_norm": 1.9428066567116653, "learning_rate": 9.995105410881695e-06, "loss": 0.2585, "step": 611 }, { "epoch": 0.04, "grad_norm": 1.6630153503418232, "learning_rate": 9.995054032944963e-06, "loss": 0.2232, "step": 612 }, { "epoch": 0.04, "grad_norm": 5.334326065352627, "learning_rate": 9.995002386893575e-06, "loss": 0.533, "step": 613 }, { "epoch": 0.04, "grad_norm": 1.796800976591682, "learning_rate": 9.994950472730302e-06, "loss": 0.2185, "step": 614 }, { "epoch": 0.04, "grad_norm": 1.6366769835867134, "learning_rate": 9.994898290457932e-06, "loss": 0.243, "step": 615 }, { "epoch": 0.04, "grad_norm": 1.828949240894937, "learning_rate": 9.994845840079267e-06, "loss": 0.2471, "step": 616 }, { "epoch": 0.04, "grad_norm": 1.6972468594596486, "learning_rate": 9.99479312159712e-06, "loss": 0.2647, "step": 617 }, { "epoch": 0.04, "grad_norm": 7.490134572545603, "learning_rate": 9.99474013501432e-06, "loss": 0.6792, "step": 618 }, { "epoch": 0.04, "grad_norm": 1.5263151793306773, "learning_rate": 9.994686880333715e-06, "loss": 0.2045, "step": 619 }, { "epoch": 0.04, "grad_norm": 1.6258968809448344, "learning_rate": 9.994633357558158e-06, "loss": 0.2969, "step": 620 }, { "epoch": 0.04, "grad_norm": 1.6345802243864846, "learning_rate": 9.994579566690527e-06, "loss": 0.3127, "step": 621 }, { "epoch": 0.04, "grad_norm": 1.617938165595077, "learning_rate": 9.994525507733708e-06, "loss": 0.2179, "step": 622 }, { "epoch": 0.04, "grad_norm": 1.5204451581647924, "learning_rate": 9.994471180690601e-06, "loss": 0.216, "step": 623 }, { "epoch": 0.04, "grad_norm": 1.5893303891887063, "learning_rate": 9.994416585564123e-06, "loss": 0.2505, "step": 624 }, { "epoch": 0.04, "grad_norm": 1.665748344611308, "learning_rate": 9.994361722357204e-06, "loss": 0.2014, "step": 625 }, { "epoch": 0.04, "grad_norm": 5.701428904090509, "learning_rate": 9.99430659107279e-06, "loss": 0.8376, "step": 626 }, { "epoch": 0.04, "grad_norm": 4.569425936531288, "learning_rate": 9.99425119171384e-06, "loss": 0.6867, "step": 627 }, { "epoch": 0.04, "grad_norm": 1.7785769642846632, "learning_rate": 9.994195524283326e-06, "loss": 0.2215, "step": 628 }, { "epoch": 0.04, "grad_norm": 1.7001487048566615, "learning_rate": 9.994139588784238e-06, "loss": 0.2186, "step": 629 }, { "epoch": 0.05, "grad_norm": 1.8228283775986551, "learning_rate": 9.994083385219578e-06, "loss": 0.2557, "step": 630 }, { "epoch": 0.05, "grad_norm": 1.6706857871472391, "learning_rate": 9.99402691359236e-06, "loss": 0.2328, "step": 631 }, { "epoch": 0.05, "grad_norm": 1.5287312505615744, "learning_rate": 9.993970173905618e-06, "loss": 0.1659, "step": 632 }, { "epoch": 0.05, "grad_norm": 1.8023120839277955, "learning_rate": 9.993913166162399e-06, "loss": 0.2495, "step": 633 }, { "epoch": 0.05, "grad_norm": 1.8599250552416529, "learning_rate": 9.99385589036576e-06, "loss": 0.2374, "step": 634 }, { "epoch": 0.05, "grad_norm": 1.5766608007380987, "learning_rate": 9.993798346518777e-06, "loss": 0.2547, "step": 635 }, { "epoch": 0.05, "grad_norm": 7.447632684960474, "learning_rate": 9.993740534624536e-06, "loss": 0.6147, "step": 636 }, { "epoch": 0.05, "grad_norm": 1.6379772903849688, "learning_rate": 9.993682454686144e-06, "loss": 0.2003, "step": 637 }, { "epoch": 0.05, "grad_norm": 1.9764683541501522, "learning_rate": 9.993624106706716e-06, "loss": 0.286, "step": 638 }, { "epoch": 0.05, "grad_norm": 1.6008619888219222, "learning_rate": 9.993565490689386e-06, "loss": 0.2571, "step": 639 }, { "epoch": 0.05, "grad_norm": 1.8045849937206637, "learning_rate": 9.993506606637297e-06, "loss": 0.2271, "step": 640 }, { "epoch": 0.05, "grad_norm": 1.648400479548625, "learning_rate": 9.993447454553611e-06, "loss": 0.2765, "step": 641 }, { "epoch": 0.05, "grad_norm": 1.5451627165196342, "learning_rate": 9.993388034441505e-06, "loss": 0.2282, "step": 642 }, { "epoch": 0.05, "grad_norm": 1.6308379584056347, "learning_rate": 9.993328346304167e-06, "loss": 0.1996, "step": 643 }, { "epoch": 0.05, "grad_norm": 1.8580692733407562, "learning_rate": 9.9932683901448e-06, "loss": 0.2853, "step": 644 }, { "epoch": 0.05, "grad_norm": 1.7290502849348282, "learning_rate": 9.993208165966624e-06, "loss": 0.21, "step": 645 }, { "epoch": 0.05, "grad_norm": 1.4724880569233778, "learning_rate": 9.993147673772869e-06, "loss": 0.1773, "step": 646 }, { "epoch": 0.05, "grad_norm": 1.6977919819596643, "learning_rate": 9.993086913566785e-06, "loss": 0.1963, "step": 647 }, { "epoch": 0.05, "grad_norm": 1.6005949693978905, "learning_rate": 9.993025885351633e-06, "loss": 0.2643, "step": 648 }, { "epoch": 0.05, "grad_norm": 1.6977853432026133, "learning_rate": 9.992964589130685e-06, "loss": 0.2279, "step": 649 }, { "epoch": 0.05, "grad_norm": 1.6921275344161306, "learning_rate": 9.992903024907236e-06, "loss": 0.2101, "step": 650 }, { "epoch": 0.05, "grad_norm": 1.7501982248183987, "learning_rate": 9.992841192684589e-06, "loss": 0.253, "step": 651 }, { "epoch": 0.05, "grad_norm": 5.517808439251331, "learning_rate": 9.99277909246606e-06, "loss": 0.594, "step": 652 }, { "epoch": 0.05, "grad_norm": 1.7139765120713024, "learning_rate": 9.992716724254985e-06, "loss": 0.2521, "step": 653 }, { "epoch": 0.05, "grad_norm": 4.962550950443222, "learning_rate": 9.992654088054712e-06, "loss": 0.7427, "step": 654 }, { "epoch": 0.05, "grad_norm": 1.773620290910626, "learning_rate": 9.992591183868602e-06, "loss": 0.2527, "step": 655 }, { "epoch": 0.05, "grad_norm": 1.6745668095849577, "learning_rate": 9.992528011700032e-06, "loss": 0.2472, "step": 656 }, { "epoch": 0.05, "grad_norm": 1.7151151980724444, "learning_rate": 9.99246457155239e-06, "loss": 0.2186, "step": 657 }, { "epoch": 0.05, "grad_norm": 1.7838451348370998, "learning_rate": 9.992400863429089e-06, "loss": 0.1938, "step": 658 }, { "epoch": 0.05, "grad_norm": 1.7456572374752544, "learning_rate": 9.992336887333541e-06, "loss": 0.2365, "step": 659 }, { "epoch": 0.05, "grad_norm": 1.8222414322056386, "learning_rate": 9.992272643269181e-06, "loss": 0.2185, "step": 660 }, { "epoch": 0.05, "grad_norm": 1.4609742655907418, "learning_rate": 9.99220813123946e-06, "loss": 0.176, "step": 661 }, { "epoch": 0.05, "grad_norm": 1.4997844934791071, "learning_rate": 9.992143351247839e-06, "loss": 0.2035, "step": 662 }, { "epoch": 0.05, "grad_norm": 1.407079689251482, "learning_rate": 9.992078303297796e-06, "loss": 0.179, "step": 663 }, { "epoch": 0.05, "grad_norm": 1.6303614474181178, "learning_rate": 9.992012987392823e-06, "loss": 0.2523, "step": 664 }, { "epoch": 0.05, "grad_norm": 1.7768269793314064, "learning_rate": 9.991947403536424e-06, "loss": 0.2765, "step": 665 }, { "epoch": 0.05, "grad_norm": 1.7857421304152366, "learning_rate": 9.991881551732118e-06, "loss": 0.2966, "step": 666 }, { "epoch": 0.05, "grad_norm": 9.79458873884545, "learning_rate": 9.991815431983446e-06, "loss": 0.6927, "step": 667 }, { "epoch": 0.05, "grad_norm": 1.536120814698744, "learning_rate": 9.991749044293952e-06, "loss": 0.2026, "step": 668 }, { "epoch": 0.05, "grad_norm": 1.5791806604568266, "learning_rate": 9.991682388667199e-06, "loss": 0.2257, "step": 669 }, { "epoch": 0.05, "grad_norm": 1.573932588629379, "learning_rate": 9.991615465106768e-06, "loss": 0.2142, "step": 670 }, { "epoch": 0.05, "grad_norm": 1.5744528996608034, "learning_rate": 9.991548273616249e-06, "loss": 0.2764, "step": 671 }, { "epoch": 0.05, "grad_norm": 1.6251326334329739, "learning_rate": 9.991480814199248e-06, "loss": 0.2144, "step": 672 }, { "epoch": 0.05, "grad_norm": 1.6384848773967653, "learning_rate": 9.991413086859388e-06, "loss": 0.212, "step": 673 }, { "epoch": 0.05, "grad_norm": 1.707350982974174, "learning_rate": 9.991345091600303e-06, "loss": 0.2168, "step": 674 }, { "epoch": 0.05, "grad_norm": 1.6588311096027324, "learning_rate": 9.991276828425642e-06, "loss": 0.248, "step": 675 }, { "epoch": 0.05, "grad_norm": 17.06273966083748, "learning_rate": 9.991208297339072e-06, "loss": 0.6826, "step": 676 }, { "epoch": 0.05, "grad_norm": 1.6172737021030636, "learning_rate": 9.991139498344269e-06, "loss": 0.2693, "step": 677 }, { "epoch": 0.05, "grad_norm": 1.6406289137232704, "learning_rate": 9.991070431444927e-06, "loss": 0.2541, "step": 678 }, { "epoch": 0.05, "grad_norm": 1.4472254286293658, "learning_rate": 9.991001096644752e-06, "loss": 0.1982, "step": 679 }, { "epoch": 0.05, "grad_norm": 1.7086115832780626, "learning_rate": 9.990931493947467e-06, "loss": 0.2742, "step": 680 }, { "epoch": 0.05, "grad_norm": 1.637502307686957, "learning_rate": 9.990861623356806e-06, "loss": 0.2779, "step": 681 }, { "epoch": 0.05, "grad_norm": 1.935092125493366, "learning_rate": 9.990791484876521e-06, "loss": 0.2713, "step": 682 }, { "epoch": 0.05, "grad_norm": 1.5825049620749547, "learning_rate": 9.990721078510378e-06, "loss": 0.1946, "step": 683 }, { "epoch": 0.05, "grad_norm": 1.8895524878943704, "learning_rate": 9.990650404262152e-06, "loss": 0.2882, "step": 684 }, { "epoch": 0.05, "grad_norm": 1.7099982114694339, "learning_rate": 9.990579462135641e-06, "loss": 0.2186, "step": 685 }, { "epoch": 0.05, "grad_norm": 1.766183409918529, "learning_rate": 9.990508252134651e-06, "loss": 0.1799, "step": 686 }, { "epoch": 0.05, "grad_norm": 1.5133854499847073, "learning_rate": 9.990436774263004e-06, "loss": 0.2567, "step": 687 }, { "epoch": 0.05, "grad_norm": 1.7683432324267765, "learning_rate": 9.990365028524535e-06, "loss": 0.2489, "step": 688 }, { "epoch": 0.05, "grad_norm": 1.4618691871535132, "learning_rate": 9.9902930149231e-06, "loss": 0.1862, "step": 689 }, { "epoch": 0.05, "grad_norm": 1.5048617126919146, "learning_rate": 9.990220733462559e-06, "loss": 0.2025, "step": 690 }, { "epoch": 0.05, "grad_norm": 1.6063300636267117, "learning_rate": 9.990148184146795e-06, "loss": 0.1922, "step": 691 }, { "epoch": 0.05, "grad_norm": 1.5346977645706932, "learning_rate": 9.990075366979702e-06, "loss": 0.2207, "step": 692 }, { "epoch": 0.05, "grad_norm": 6.570164765297416, "learning_rate": 9.990002281965187e-06, "loss": 0.7797, "step": 693 }, { "epoch": 0.05, "grad_norm": 1.6102851763440187, "learning_rate": 9.989928929107174e-06, "loss": 0.2384, "step": 694 }, { "epoch": 0.05, "grad_norm": 1.5115832870752945, "learning_rate": 9.989855308409601e-06, "loss": 0.197, "step": 695 }, { "epoch": 0.05, "grad_norm": 5.3942936973553355, "learning_rate": 9.989781419876416e-06, "loss": 0.6406, "step": 696 }, { "epoch": 0.05, "grad_norm": 1.3234101586603415, "learning_rate": 9.98970726351159e-06, "loss": 0.1828, "step": 697 }, { "epoch": 0.05, "grad_norm": 1.6838225515556275, "learning_rate": 9.9896328393191e-06, "loss": 0.2348, "step": 698 }, { "epoch": 0.05, "grad_norm": 1.7439114767871668, "learning_rate": 9.989558147302943e-06, "loss": 0.2174, "step": 699 }, { "epoch": 0.05, "grad_norm": 1.54608858355113, "learning_rate": 9.989483187467128e-06, "loss": 0.2556, "step": 700 }, { "epoch": 0.05, "grad_norm": 1.6389923853748325, "learning_rate": 9.989407959815676e-06, "loss": 0.2099, "step": 701 }, { "epoch": 0.05, "grad_norm": 1.6648925661720466, "learning_rate": 9.989332464352625e-06, "loss": 0.2089, "step": 702 }, { "epoch": 0.05, "grad_norm": 5.438464988623418, "learning_rate": 9.989256701082032e-06, "loss": 0.6251, "step": 703 }, { "epoch": 0.05, "grad_norm": 1.7124364911399221, "learning_rate": 9.989180670007958e-06, "loss": 0.2474, "step": 704 }, { "epoch": 0.05, "grad_norm": 1.777433049900891, "learning_rate": 9.989104371134489e-06, "loss": 0.2134, "step": 705 }, { "epoch": 0.05, "grad_norm": 1.571749507479074, "learning_rate": 9.989027804465716e-06, "loss": 0.2628, "step": 706 }, { "epoch": 0.05, "grad_norm": 1.7634914746136903, "learning_rate": 9.988950970005753e-06, "loss": 0.2337, "step": 707 }, { "epoch": 0.05, "grad_norm": 7.88274237620404, "learning_rate": 9.98887386775872e-06, "loss": 0.6121, "step": 708 }, { "epoch": 0.05, "grad_norm": 1.5456658966513586, "learning_rate": 9.98879649772876e-06, "loss": 0.193, "step": 709 }, { "epoch": 0.05, "grad_norm": 1.5696296058011945, "learning_rate": 9.98871885992002e-06, "loss": 0.224, "step": 710 }, { "epoch": 0.05, "grad_norm": 1.8070220959594163, "learning_rate": 9.988640954336672e-06, "loss": 0.249, "step": 711 }, { "epoch": 0.05, "grad_norm": 2.223432795464063, "learning_rate": 9.988562780982898e-06, "loss": 0.2749, "step": 712 }, { "epoch": 0.05, "grad_norm": 1.6042503033623325, "learning_rate": 9.98848433986289e-06, "loss": 0.2133, "step": 713 }, { "epoch": 0.05, "grad_norm": 1.9881422444036783, "learning_rate": 9.988405630980863e-06, "loss": 0.2854, "step": 714 }, { "epoch": 0.05, "grad_norm": 1.6171597287041666, "learning_rate": 9.98832665434104e-06, "loss": 0.2663, "step": 715 }, { "epoch": 0.05, "grad_norm": 1.5171023362890572, "learning_rate": 9.988247409947657e-06, "loss": 0.2471, "step": 716 }, { "epoch": 0.05, "grad_norm": 1.6010564818425659, "learning_rate": 9.988167897804971e-06, "loss": 0.23, "step": 717 }, { "epoch": 0.05, "grad_norm": 1.6810768730751677, "learning_rate": 9.98808811791725e-06, "loss": 0.2866, "step": 718 }, { "epoch": 0.05, "grad_norm": 1.4668506914852013, "learning_rate": 9.988008070288774e-06, "loss": 0.2071, "step": 719 }, { "epoch": 0.05, "grad_norm": 1.7820393576147726, "learning_rate": 9.987927754923844e-06, "loss": 0.2329, "step": 720 }, { "epoch": 0.05, "grad_norm": 1.6135470769069782, "learning_rate": 9.987847171826767e-06, "loss": 0.2276, "step": 721 }, { "epoch": 0.05, "grad_norm": 1.5251872921699774, "learning_rate": 9.987766321001868e-06, "loss": 0.211, "step": 722 }, { "epoch": 0.05, "grad_norm": 1.5780578362093696, "learning_rate": 9.98768520245349e-06, "loss": 0.2466, "step": 723 }, { "epoch": 0.05, "grad_norm": 1.4645401624370016, "learning_rate": 9.987603816185985e-06, "loss": 0.2459, "step": 724 }, { "epoch": 0.05, "grad_norm": 1.6685975345119328, "learning_rate": 9.98752216220372e-06, "loss": 0.2286, "step": 725 }, { "epoch": 0.05, "grad_norm": 1.4899719769996609, "learning_rate": 9.987440240511082e-06, "loss": 0.2323, "step": 726 }, { "epoch": 0.05, "grad_norm": 1.5610374154455227, "learning_rate": 9.987358051112465e-06, "loss": 0.2087, "step": 727 }, { "epoch": 0.05, "grad_norm": 1.7222604084697974, "learning_rate": 9.987275594012283e-06, "loss": 0.2576, "step": 728 }, { "epoch": 0.05, "grad_norm": 1.5177727573154718, "learning_rate": 9.987192869214958e-06, "loss": 0.1773, "step": 729 }, { "epoch": 0.05, "grad_norm": 1.6254157786608658, "learning_rate": 9.987109876724934e-06, "loss": 0.2268, "step": 730 }, { "epoch": 0.05, "grad_norm": 1.8479037625509187, "learning_rate": 9.987026616546665e-06, "loss": 0.2641, "step": 731 }, { "epoch": 0.05, "grad_norm": 1.5998119854604769, "learning_rate": 9.986943088684619e-06, "loss": 0.2244, "step": 732 }, { "epoch": 0.05, "grad_norm": 1.7630506131543133, "learning_rate": 9.986859293143279e-06, "loss": 0.2645, "step": 733 }, { "epoch": 0.05, "grad_norm": 13.486017086475076, "learning_rate": 9.986775229927147e-06, "loss": 0.5149, "step": 734 }, { "epoch": 0.05, "grad_norm": 1.6643683239221327, "learning_rate": 9.98669089904073e-06, "loss": 0.2519, "step": 735 }, { "epoch": 0.05, "grad_norm": 1.6954390164325874, "learning_rate": 9.986606300488555e-06, "loss": 0.2187, "step": 736 }, { "epoch": 0.05, "grad_norm": 1.6818453456134628, "learning_rate": 9.986521434275167e-06, "loss": 0.2722, "step": 737 }, { "epoch": 0.05, "grad_norm": 1.6359009320457223, "learning_rate": 9.986436300405119e-06, "loss": 0.2121, "step": 738 }, { "epoch": 0.05, "grad_norm": 1.5059629148004405, "learning_rate": 9.98635089888298e-06, "loss": 0.2181, "step": 739 }, { "epoch": 0.05, "grad_norm": 21.529067341605003, "learning_rate": 9.986265229713332e-06, "loss": 0.7089, "step": 740 }, { "epoch": 0.05, "grad_norm": 1.49801488114809, "learning_rate": 9.98617929290078e-06, "loss": 0.2113, "step": 741 }, { "epoch": 0.05, "grad_norm": 1.6178266851261276, "learning_rate": 9.986093088449928e-06, "loss": 0.1937, "step": 742 }, { "epoch": 0.05, "grad_norm": 1.5300996288740902, "learning_rate": 9.986006616365412e-06, "loss": 0.2046, "step": 743 }, { "epoch": 0.05, "grad_norm": 1.793540317769164, "learning_rate": 9.985919876651866e-06, "loss": 0.2715, "step": 744 }, { "epoch": 0.05, "grad_norm": 1.490481328209008, "learning_rate": 9.985832869313952e-06, "loss": 0.202, "step": 745 }, { "epoch": 0.05, "grad_norm": 1.8506943491030012, "learning_rate": 9.985745594356336e-06, "loss": 0.2809, "step": 746 }, { "epoch": 0.05, "grad_norm": 1.6763576622228822, "learning_rate": 9.985658051783701e-06, "loss": 0.1876, "step": 747 }, { "epoch": 0.05, "grad_norm": 1.7361491326028007, "learning_rate": 9.985570241600754e-06, "loss": 0.2731, "step": 748 }, { "epoch": 0.05, "grad_norm": 1.6595152747163822, "learning_rate": 9.9854821638122e-06, "loss": 0.2224, "step": 749 }, { "epoch": 0.05, "grad_norm": 1.8806129727918472, "learning_rate": 9.985393818422768e-06, "loss": 0.2431, "step": 750 }, { "epoch": 0.05, "grad_norm": 1.6534792721881997, "learning_rate": 9.985305205437205e-06, "loss": 0.2484, "step": 751 }, { "epoch": 0.05, "grad_norm": 6.600172218807455, "learning_rate": 9.985216324860264e-06, "loss": 0.7512, "step": 752 }, { "epoch": 0.05, "grad_norm": 1.768667856574418, "learning_rate": 9.985127176696713e-06, "loss": 0.2521, "step": 753 }, { "epoch": 0.05, "grad_norm": 1.590104236346727, "learning_rate": 9.985037760951342e-06, "loss": 0.1812, "step": 754 }, { "epoch": 0.05, "grad_norm": 1.5420818366462943, "learning_rate": 9.984948077628948e-06, "loss": 0.2207, "step": 755 }, { "epoch": 0.05, "grad_norm": 1.5362526188179724, "learning_rate": 9.984858126734345e-06, "loss": 0.2865, "step": 756 }, { "epoch": 0.05, "grad_norm": 1.5819938234825788, "learning_rate": 9.984767908272363e-06, "loss": 0.1727, "step": 757 }, { "epoch": 0.05, "grad_norm": 1.5092683006036836, "learning_rate": 9.984677422247842e-06, "loss": 0.2446, "step": 758 }, { "epoch": 0.05, "grad_norm": 1.5961083910100657, "learning_rate": 9.984586668665641e-06, "loss": 0.2195, "step": 759 }, { "epoch": 0.05, "grad_norm": 1.731370683117459, "learning_rate": 9.98449564753063e-06, "loss": 0.2654, "step": 760 }, { "epoch": 0.05, "grad_norm": 1.5923246566796907, "learning_rate": 9.984404358847695e-06, "loss": 0.2687, "step": 761 }, { "epoch": 0.05, "grad_norm": 1.6208198861905956, "learning_rate": 9.984312802621734e-06, "loss": 0.2096, "step": 762 }, { "epoch": 0.05, "grad_norm": 1.62726720911841, "learning_rate": 9.984220978857665e-06, "loss": 0.2667, "step": 763 }, { "epoch": 0.05, "grad_norm": 1.6641816454125784, "learning_rate": 9.984128887560416e-06, "loss": 0.2185, "step": 764 }, { "epoch": 0.05, "grad_norm": 4.498015379718007, "learning_rate": 9.984036528734928e-06, "loss": 0.6812, "step": 765 }, { "epoch": 0.05, "grad_norm": 1.641383835517193, "learning_rate": 9.98394390238616e-06, "loss": 0.2267, "step": 766 }, { "epoch": 0.05, "grad_norm": 1.7406510567344653, "learning_rate": 9.983851008519083e-06, "loss": 0.2502, "step": 767 }, { "epoch": 0.05, "grad_norm": 1.6869258010557757, "learning_rate": 9.983757847138684e-06, "loss": 0.2197, "step": 768 }, { "epoch": 0.06, "grad_norm": 1.6790302875768113, "learning_rate": 9.983664418249964e-06, "loss": 0.2105, "step": 769 }, { "epoch": 0.06, "grad_norm": 1.523423356938257, "learning_rate": 9.983570721857937e-06, "loss": 0.2151, "step": 770 }, { "epoch": 0.06, "grad_norm": 1.6781003019700231, "learning_rate": 9.983476757967632e-06, "loss": 0.2467, "step": 771 }, { "epoch": 0.06, "grad_norm": 1.5297832789332957, "learning_rate": 9.983382526584092e-06, "loss": 0.2244, "step": 772 }, { "epoch": 0.06, "grad_norm": 1.4803313421752438, "learning_rate": 9.983288027712377e-06, "loss": 0.2111, "step": 773 }, { "epoch": 0.06, "grad_norm": 1.5239553020266614, "learning_rate": 9.983193261357558e-06, "loss": 0.218, "step": 774 }, { "epoch": 0.06, "grad_norm": 1.5803720717550547, "learning_rate": 9.98309822752472e-06, "loss": 0.2012, "step": 775 }, { "epoch": 0.06, "grad_norm": 1.5977916552001226, "learning_rate": 9.983002926218969e-06, "loss": 0.2225, "step": 776 }, { "epoch": 0.06, "grad_norm": 6.015422428924549, "learning_rate": 9.982907357445417e-06, "loss": 0.8359, "step": 777 }, { "epoch": 0.06, "grad_norm": 1.6134212217568487, "learning_rate": 9.982811521209192e-06, "loss": 0.2542, "step": 778 }, { "epoch": 0.06, "grad_norm": 1.4308179305800175, "learning_rate": 9.982715417515441e-06, "loss": 0.1969, "step": 779 }, { "epoch": 0.06, "grad_norm": 1.9238535652058442, "learning_rate": 9.982619046369321e-06, "loss": 0.2356, "step": 780 }, { "epoch": 0.06, "grad_norm": 1.4308037964444096, "learning_rate": 9.982522407776008e-06, "loss": 0.2318, "step": 781 }, { "epoch": 0.06, "grad_norm": 1.9275258585130022, "learning_rate": 9.982425501740684e-06, "loss": 0.209, "step": 782 }, { "epoch": 0.06, "grad_norm": 7.090426309102259, "learning_rate": 9.982328328268556e-06, "loss": 0.7767, "step": 783 }, { "epoch": 0.06, "grad_norm": 1.7494064359877008, "learning_rate": 9.982230887364834e-06, "loss": 0.2402, "step": 784 }, { "epoch": 0.06, "grad_norm": 1.5801026043613933, "learning_rate": 9.982133179034753e-06, "loss": 0.2233, "step": 785 }, { "epoch": 0.06, "grad_norm": 1.7092706711367565, "learning_rate": 9.982035203283555e-06, "loss": 0.2323, "step": 786 }, { "epoch": 0.06, "grad_norm": 1.6849510886361898, "learning_rate": 9.981936960116501e-06, "loss": 0.2675, "step": 787 }, { "epoch": 0.06, "grad_norm": 1.4579634476147931, "learning_rate": 9.981838449538863e-06, "loss": 0.2056, "step": 788 }, { "epoch": 0.06, "grad_norm": 1.5941772188675964, "learning_rate": 9.981739671555928e-06, "loss": 0.221, "step": 789 }, { "epoch": 0.06, "grad_norm": 1.4654145850205578, "learning_rate": 9.981640626173e-06, "loss": 0.2026, "step": 790 }, { "epoch": 0.06, "grad_norm": 5.5623872044185765, "learning_rate": 9.981541313395394e-06, "loss": 0.5875, "step": 791 }, { "epoch": 0.06, "grad_norm": 1.8404440212328006, "learning_rate": 9.98144173322844e-06, "loss": 0.2459, "step": 792 }, { "epoch": 0.06, "grad_norm": 1.533952107562485, "learning_rate": 9.981341885677485e-06, "loss": 0.2468, "step": 793 }, { "epoch": 0.06, "grad_norm": 1.754967270623259, "learning_rate": 9.981241770747885e-06, "loss": 0.3142, "step": 794 }, { "epoch": 0.06, "grad_norm": 6.947918992862247, "learning_rate": 9.981141388445019e-06, "loss": 0.6977, "step": 795 }, { "epoch": 0.06, "grad_norm": 6.115271351973183, "learning_rate": 9.981040738774272e-06, "loss": 0.8959, "step": 796 }, { "epoch": 0.06, "grad_norm": 1.5278633938690163, "learning_rate": 9.980939821741045e-06, "loss": 0.2186, "step": 797 }, { "epoch": 0.06, "grad_norm": 1.4439536237648316, "learning_rate": 9.980838637350758e-06, "loss": 0.227, "step": 798 }, { "epoch": 0.06, "grad_norm": 1.6124461572495545, "learning_rate": 9.98073718560884e-06, "loss": 0.2297, "step": 799 }, { "epoch": 0.06, "grad_norm": 1.5324257672166732, "learning_rate": 9.980635466520738e-06, "loss": 0.2657, "step": 800 }, { "epoch": 0.06, "grad_norm": 1.655572465187251, "learning_rate": 9.98053348009191e-06, "loss": 0.2349, "step": 801 }, { "epoch": 0.06, "grad_norm": 1.505102462473888, "learning_rate": 9.980431226327834e-06, "loss": 0.2116, "step": 802 }, { "epoch": 0.06, "grad_norm": 1.5851032602851012, "learning_rate": 9.980328705233992e-06, "loss": 0.2418, "step": 803 }, { "epoch": 0.06, "grad_norm": 1.5383959820494633, "learning_rate": 9.980225916815894e-06, "loss": 0.2207, "step": 804 }, { "epoch": 0.06, "grad_norm": 1.66980817554449, "learning_rate": 9.980122861079054e-06, "loss": 0.2316, "step": 805 }, { "epoch": 0.06, "grad_norm": 1.7250264187671456, "learning_rate": 9.980019538029002e-06, "loss": 0.2288, "step": 806 }, { "epoch": 0.06, "grad_norm": 1.5515955076857662, "learning_rate": 9.979915947671288e-06, "loss": 0.2665, "step": 807 }, { "epoch": 0.06, "grad_norm": 1.807998162350851, "learning_rate": 9.979812090011469e-06, "loss": 0.2638, "step": 808 }, { "epoch": 0.06, "grad_norm": 1.7707263240485176, "learning_rate": 9.979707965055121e-06, "loss": 0.2766, "step": 809 }, { "epoch": 0.06, "grad_norm": 1.7275708639041143, "learning_rate": 9.979603572807831e-06, "loss": 0.2208, "step": 810 }, { "epoch": 0.06, "grad_norm": 2.0457549708009677, "learning_rate": 9.979498913275206e-06, "loss": 0.2354, "step": 811 }, { "epoch": 0.06, "grad_norm": 1.7686453384057466, "learning_rate": 9.979393986462862e-06, "loss": 0.2213, "step": 812 }, { "epoch": 0.06, "grad_norm": 1.9739061175518136, "learning_rate": 9.979288792376429e-06, "loss": 0.2766, "step": 813 }, { "epoch": 0.06, "grad_norm": 1.4667542400930431, "learning_rate": 9.979183331021558e-06, "loss": 0.2769, "step": 814 }, { "epoch": 0.06, "grad_norm": 1.6148132786265412, "learning_rate": 9.979077602403906e-06, "loss": 0.2285, "step": 815 }, { "epoch": 0.06, "grad_norm": 1.748139672996976, "learning_rate": 9.97897160652915e-06, "loss": 0.2853, "step": 816 }, { "epoch": 0.06, "grad_norm": 1.5530650282822267, "learning_rate": 9.978865343402976e-06, "loss": 0.235, "step": 817 }, { "epoch": 0.06, "grad_norm": 1.501294170254973, "learning_rate": 9.978758813031092e-06, "loss": 0.2096, "step": 818 }, { "epoch": 0.06, "grad_norm": 1.4880783019869301, "learning_rate": 9.978652015419212e-06, "loss": 0.2337, "step": 819 }, { "epoch": 0.06, "grad_norm": 1.5775724956647952, "learning_rate": 9.978544950573075e-06, "loss": 0.2087, "step": 820 }, { "epoch": 0.06, "grad_norm": 1.7167820050080607, "learning_rate": 9.97843761849842e-06, "loss": 0.2853, "step": 821 }, { "epoch": 0.06, "grad_norm": 35.2705906826178, "learning_rate": 9.978330019201015e-06, "loss": 0.5936, "step": 822 }, { "epoch": 0.06, "grad_norm": 1.4571735422392338, "learning_rate": 9.978222152686632e-06, "loss": 0.1913, "step": 823 }, { "epoch": 0.06, "grad_norm": 1.602937585278564, "learning_rate": 9.97811401896106e-06, "loss": 0.2362, "step": 824 }, { "epoch": 0.06, "grad_norm": 1.603369690247898, "learning_rate": 9.978005618030104e-06, "loss": 0.2277, "step": 825 }, { "epoch": 0.06, "grad_norm": 1.7138086039905924, "learning_rate": 9.977896949899585e-06, "loss": 0.2277, "step": 826 }, { "epoch": 0.06, "grad_norm": 1.650828522769254, "learning_rate": 9.977788014575333e-06, "loss": 0.2831, "step": 827 }, { "epoch": 0.06, "grad_norm": 1.5335644767490908, "learning_rate": 9.977678812063195e-06, "loss": 0.2538, "step": 828 }, { "epoch": 0.06, "grad_norm": 1.8352130086396239, "learning_rate": 9.977569342369034e-06, "loss": 0.2622, "step": 829 }, { "epoch": 0.06, "grad_norm": 1.690886564264814, "learning_rate": 9.977459605498728e-06, "loss": 0.2822, "step": 830 }, { "epoch": 0.06, "grad_norm": 1.4422358251298906, "learning_rate": 9.977349601458161e-06, "loss": 0.1812, "step": 831 }, { "epoch": 0.06, "grad_norm": 2.0856979401922504, "learning_rate": 9.977239330253243e-06, "loss": 0.2573, "step": 832 }, { "epoch": 0.06, "grad_norm": 1.5934180670082112, "learning_rate": 9.977128791889892e-06, "loss": 0.1998, "step": 833 }, { "epoch": 0.06, "grad_norm": 1.542659392723648, "learning_rate": 9.97701798637404e-06, "loss": 0.178, "step": 834 }, { "epoch": 0.06, "grad_norm": 1.5389935197646, "learning_rate": 9.976906913711634e-06, "loss": 0.2522, "step": 835 }, { "epoch": 0.06, "grad_norm": 1.4725179304387765, "learning_rate": 9.97679557390864e-06, "loss": 0.2277, "step": 836 }, { "epoch": 0.06, "grad_norm": 1.5704049070910084, "learning_rate": 9.97668396697103e-06, "loss": 0.2438, "step": 837 }, { "epoch": 0.06, "grad_norm": 1.678906995398683, "learning_rate": 9.976572092904795e-06, "loss": 0.2614, "step": 838 }, { "epoch": 0.06, "grad_norm": 1.7149728778845337, "learning_rate": 9.976459951715941e-06, "loss": 0.2191, "step": 839 }, { "epoch": 0.06, "grad_norm": 1.6278354665289996, "learning_rate": 9.976347543410487e-06, "loss": 0.2412, "step": 840 }, { "epoch": 0.06, "grad_norm": 1.7365054099376105, "learning_rate": 9.976234867994467e-06, "loss": 0.241, "step": 841 }, { "epoch": 0.06, "grad_norm": 1.427659833130422, "learning_rate": 9.976121925473931e-06, "loss": 0.2037, "step": 842 }, { "epoch": 0.06, "grad_norm": 1.5940463531031388, "learning_rate": 9.976008715854936e-06, "loss": 0.207, "step": 843 }, { "epoch": 0.06, "grad_norm": 1.5354101692087971, "learning_rate": 9.975895239143564e-06, "loss": 0.2284, "step": 844 }, { "epoch": 0.06, "grad_norm": 1.6081952704100781, "learning_rate": 9.975781495345903e-06, "loss": 0.2188, "step": 845 }, { "epoch": 0.06, "grad_norm": 1.5790256490995627, "learning_rate": 9.97566748446806e-06, "loss": 0.1857, "step": 846 }, { "epoch": 0.06, "grad_norm": 1.5844230644950563, "learning_rate": 9.975553206516153e-06, "loss": 0.2174, "step": 847 }, { "epoch": 0.06, "grad_norm": 1.6421126751219843, "learning_rate": 9.975438661496314e-06, "loss": 0.1999, "step": 848 }, { "epoch": 0.06, "grad_norm": 1.6206840404127683, "learning_rate": 9.975323849414698e-06, "loss": 0.2054, "step": 849 }, { "epoch": 0.06, "grad_norm": 1.2904874287566863, "learning_rate": 9.975208770277462e-06, "loss": 0.1837, "step": 850 }, { "epoch": 0.06, "grad_norm": 1.70264692972742, "learning_rate": 9.975093424090785e-06, "loss": 0.2282, "step": 851 }, { "epoch": 0.06, "grad_norm": 1.5271252620187525, "learning_rate": 9.974977810860858e-06, "loss": 0.195, "step": 852 }, { "epoch": 0.06, "grad_norm": 1.6219489092879902, "learning_rate": 9.974861930593886e-06, "loss": 0.2042, "step": 853 }, { "epoch": 0.06, "grad_norm": 1.6191719672569294, "learning_rate": 9.97474578329609e-06, "loss": 0.2469, "step": 854 }, { "epoch": 0.06, "grad_norm": 1.4206726258229492, "learning_rate": 9.974629368973705e-06, "loss": 0.1726, "step": 855 }, { "epoch": 0.06, "grad_norm": 5.584993069062993, "learning_rate": 9.974512687632978e-06, "loss": 0.6527, "step": 856 }, { "epoch": 0.06, "grad_norm": 2.8990817437797625, "learning_rate": 9.974395739280172e-06, "loss": 0.2153, "step": 857 }, { "epoch": 0.06, "grad_norm": 1.4712668098786195, "learning_rate": 9.974278523921564e-06, "loss": 0.2145, "step": 858 }, { "epoch": 0.06, "grad_norm": 1.8510417067707465, "learning_rate": 9.974161041563448e-06, "loss": 0.2717, "step": 859 }, { "epoch": 0.06, "grad_norm": 1.5179264655749278, "learning_rate": 9.974043292212129e-06, "loss": 0.1958, "step": 860 }, { "epoch": 0.06, "grad_norm": 1.6123037604890762, "learning_rate": 9.973925275873926e-06, "loss": 0.2787, "step": 861 }, { "epoch": 0.06, "grad_norm": 1.721121822032707, "learning_rate": 9.973806992555175e-06, "loss": 0.2423, "step": 862 }, { "epoch": 0.06, "grad_norm": 1.6309141528509064, "learning_rate": 9.973688442262224e-06, "loss": 0.1932, "step": 863 }, { "epoch": 0.06, "grad_norm": 1.8142396829400371, "learning_rate": 9.973569625001438e-06, "loss": 0.2959, "step": 864 }, { "epoch": 0.06, "grad_norm": 1.5920608255860877, "learning_rate": 9.973450540779193e-06, "loss": 0.2274, "step": 865 }, { "epoch": 0.06, "grad_norm": 1.5123799268103486, "learning_rate": 9.97333118960188e-06, "loss": 0.2155, "step": 866 }, { "epoch": 0.06, "grad_norm": 1.553935628963694, "learning_rate": 9.973211571475909e-06, "loss": 0.2587, "step": 867 }, { "epoch": 0.06, "grad_norm": 1.5455077890832842, "learning_rate": 9.973091686407697e-06, "loss": 0.2001, "step": 868 }, { "epoch": 0.06, "grad_norm": 1.5358338304583472, "learning_rate": 9.972971534403681e-06, "loss": 0.2575, "step": 869 }, { "epoch": 0.06, "grad_norm": 1.5166559131058521, "learning_rate": 9.972851115470308e-06, "loss": 0.2039, "step": 870 }, { "epoch": 0.06, "grad_norm": 1.4559514271139282, "learning_rate": 9.972730429614044e-06, "loss": 0.1968, "step": 871 }, { "epoch": 0.06, "grad_norm": 1.700957756057884, "learning_rate": 9.972609476841368e-06, "loss": 0.2518, "step": 872 }, { "epoch": 0.06, "grad_norm": 9.901233465877464, "learning_rate": 9.972488257158768e-06, "loss": 0.7071, "step": 873 }, { "epoch": 0.06, "grad_norm": 1.7669273349950874, "learning_rate": 9.972366770572755e-06, "loss": 0.2409, "step": 874 }, { "epoch": 0.06, "grad_norm": 1.3752067980995801, "learning_rate": 9.972245017089846e-06, "loss": 0.2118, "step": 875 }, { "epoch": 0.06, "grad_norm": 1.6180475222911945, "learning_rate": 9.97212299671658e-06, "loss": 0.2294, "step": 876 }, { "epoch": 0.06, "grad_norm": 1.592938504733884, "learning_rate": 9.972000709459503e-06, "loss": 0.1946, "step": 877 }, { "epoch": 0.06, "grad_norm": 1.6366563942855288, "learning_rate": 9.971878155325182e-06, "loss": 0.2414, "step": 878 }, { "epoch": 0.06, "grad_norm": 1.733104902323832, "learning_rate": 9.971755334320193e-06, "loss": 0.2162, "step": 879 }, { "epoch": 0.06, "grad_norm": 1.5688544180670083, "learning_rate": 9.97163224645113e-06, "loss": 0.2221, "step": 880 }, { "epoch": 0.06, "grad_norm": 1.6534573236758896, "learning_rate": 9.971508891724599e-06, "loss": 0.3192, "step": 881 }, { "epoch": 0.06, "grad_norm": 1.6426065087780641, "learning_rate": 9.97138527014722e-06, "loss": 0.2384, "step": 882 }, { "epoch": 0.06, "grad_norm": 1.613366042446708, "learning_rate": 9.971261381725632e-06, "loss": 0.2227, "step": 883 }, { "epoch": 0.06, "grad_norm": 1.6749161156738825, "learning_rate": 9.971137226466482e-06, "loss": 0.2383, "step": 884 }, { "epoch": 0.06, "grad_norm": 1.7151584706333372, "learning_rate": 9.971012804376433e-06, "loss": 0.2421, "step": 885 }, { "epoch": 0.06, "grad_norm": 1.5387726601970686, "learning_rate": 9.97088811546217e-06, "loss": 0.2294, "step": 886 }, { "epoch": 0.06, "grad_norm": 1.559544878333592, "learning_rate": 9.970763159730378e-06, "loss": 0.246, "step": 887 }, { "epoch": 0.06, "grad_norm": 1.6184225893374276, "learning_rate": 9.970637937187768e-06, "loss": 0.2472, "step": 888 }, { "epoch": 0.06, "grad_norm": 1.661850326024661, "learning_rate": 9.970512447841061e-06, "loss": 0.2128, "step": 889 }, { "epoch": 0.06, "grad_norm": 1.5605615897481977, "learning_rate": 9.970386691696993e-06, "loss": 0.2223, "step": 890 }, { "epoch": 0.06, "grad_norm": 1.6576224210433015, "learning_rate": 9.970260668762313e-06, "loss": 0.2524, "step": 891 }, { "epoch": 0.06, "grad_norm": 1.6005518622094375, "learning_rate": 9.970134379043787e-06, "loss": 0.2471, "step": 892 }, { "epoch": 0.06, "grad_norm": 1.532801506372282, "learning_rate": 9.970007822548191e-06, "loss": 0.2304, "step": 893 }, { "epoch": 0.06, "grad_norm": 1.5284133665120598, "learning_rate": 9.969880999282321e-06, "loss": 0.2142, "step": 894 }, { "epoch": 0.06, "grad_norm": 1.421626529821479, "learning_rate": 9.969753909252984e-06, "loss": 0.2215, "step": 895 }, { "epoch": 0.06, "grad_norm": 1.6157719282384164, "learning_rate": 9.969626552467e-06, "loss": 0.2975, "step": 896 }, { "epoch": 0.06, "grad_norm": 6.781303909704055, "learning_rate": 9.969498928931207e-06, "loss": 0.8017, "step": 897 }, { "epoch": 0.06, "grad_norm": 1.5426468687715291, "learning_rate": 9.969371038652453e-06, "loss": 0.2634, "step": 898 }, { "epoch": 0.06, "grad_norm": 1.463209584547742, "learning_rate": 9.969242881637603e-06, "loss": 0.1993, "step": 899 }, { "epoch": 0.06, "grad_norm": 1.6062066599823315, "learning_rate": 9.96911445789354e-06, "loss": 0.2116, "step": 900 }, { "epoch": 0.06, "grad_norm": 1.54423099328606, "learning_rate": 9.968985767427152e-06, "loss": 0.179, "step": 901 }, { "epoch": 0.06, "grad_norm": 1.8176125815329367, "learning_rate": 9.96885681024535e-06, "loss": 0.3119, "step": 902 }, { "epoch": 0.06, "grad_norm": 1.5222956348615937, "learning_rate": 9.968727586355053e-06, "loss": 0.2296, "step": 903 }, { "epoch": 0.06, "grad_norm": 1.6368882644654417, "learning_rate": 9.9685980957632e-06, "loss": 0.2471, "step": 904 }, { "epoch": 0.06, "grad_norm": 1.5935706288550355, "learning_rate": 9.96846833847674e-06, "loss": 0.1829, "step": 905 }, { "epoch": 0.06, "grad_norm": 1.4885986494261274, "learning_rate": 9.968338314502638e-06, "loss": 0.1977, "step": 906 }, { "epoch": 0.06, "grad_norm": 1.4588710763738337, "learning_rate": 9.968208023847875e-06, "loss": 0.2238, "step": 907 }, { "epoch": 0.06, "grad_norm": 5.329017768769023, "learning_rate": 9.968077466519441e-06, "loss": 0.7373, "step": 908 }, { "epoch": 0.07, "grad_norm": 1.6607736213558415, "learning_rate": 9.967946642524346e-06, "loss": 0.252, "step": 909 }, { "epoch": 0.07, "grad_norm": 1.6420196087204497, "learning_rate": 9.967815551869613e-06, "loss": 0.2392, "step": 910 }, { "epoch": 0.07, "grad_norm": 1.6506488118836737, "learning_rate": 9.967684194562276e-06, "loss": 0.2504, "step": 911 }, { "epoch": 0.07, "grad_norm": 1.5358795510934171, "learning_rate": 9.967552570609387e-06, "loss": 0.2398, "step": 912 }, { "epoch": 0.07, "grad_norm": 1.4994411465727906, "learning_rate": 9.967420680018011e-06, "loss": 0.2305, "step": 913 }, { "epoch": 0.07, "grad_norm": 4.358979407106109, "learning_rate": 9.96728852279523e-06, "loss": 0.6195, "step": 914 }, { "epoch": 0.07, "grad_norm": 1.6041590173560905, "learning_rate": 9.967156098948132e-06, "loss": 0.1652, "step": 915 }, { "epoch": 0.07, "grad_norm": 1.6934992184388324, "learning_rate": 9.967023408483828e-06, "loss": 0.3271, "step": 916 }, { "epoch": 0.07, "grad_norm": 1.5223110269152327, "learning_rate": 9.966890451409442e-06, "loss": 0.227, "step": 917 }, { "epoch": 0.07, "grad_norm": 1.5352648963577378, "learning_rate": 9.96675722773211e-06, "loss": 0.2099, "step": 918 }, { "epoch": 0.07, "grad_norm": 1.524685601829434, "learning_rate": 9.96662373745898e-06, "loss": 0.238, "step": 919 }, { "epoch": 0.07, "grad_norm": 1.832712105040142, "learning_rate": 9.966489980597217e-06, "loss": 0.2435, "step": 920 }, { "epoch": 0.07, "grad_norm": 1.651158576144617, "learning_rate": 9.966355957154007e-06, "loss": 0.262, "step": 921 }, { "epoch": 0.07, "grad_norm": 1.5475918496449772, "learning_rate": 9.966221667136538e-06, "loss": 0.2493, "step": 922 }, { "epoch": 0.07, "grad_norm": 1.55299882685511, "learning_rate": 9.966087110552018e-06, "loss": 0.1902, "step": 923 }, { "epoch": 0.07, "grad_norm": 5.235272797053675, "learning_rate": 9.965952287407674e-06, "loss": 0.7592, "step": 924 }, { "epoch": 0.07, "grad_norm": 1.3831406711799605, "learning_rate": 9.965817197710738e-06, "loss": 0.2513, "step": 925 }, { "epoch": 0.07, "grad_norm": 1.4741567312307593, "learning_rate": 9.965681841468462e-06, "loss": 0.2555, "step": 926 }, { "epoch": 0.07, "grad_norm": 1.4048510468745647, "learning_rate": 9.965546218688115e-06, "loss": 0.1918, "step": 927 }, { "epoch": 0.07, "grad_norm": 1.46812150736536, "learning_rate": 9.965410329376973e-06, "loss": 0.2391, "step": 928 }, { "epoch": 0.07, "grad_norm": 1.755434499237654, "learning_rate": 9.96527417354233e-06, "loss": 0.3131, "step": 929 }, { "epoch": 0.07, "grad_norm": 1.6937683571286128, "learning_rate": 9.965137751191496e-06, "loss": 0.2236, "step": 930 }, { "epoch": 0.07, "grad_norm": 1.7512725369067013, "learning_rate": 9.965001062331794e-06, "loss": 0.2345, "step": 931 }, { "epoch": 0.07, "grad_norm": 1.7423063881768497, "learning_rate": 9.964864106970558e-06, "loss": 0.2368, "step": 932 }, { "epoch": 0.07, "grad_norm": 1.5079932650361105, "learning_rate": 9.964726885115143e-06, "loss": 0.1771, "step": 933 }, { "epoch": 0.07, "grad_norm": 1.728006394200748, "learning_rate": 9.96458939677291e-06, "loss": 0.2762, "step": 934 }, { "epoch": 0.07, "grad_norm": 1.6514740941051578, "learning_rate": 9.964451641951245e-06, "loss": 0.2104, "step": 935 }, { "epoch": 0.07, "grad_norm": 4.691657934625792, "learning_rate": 9.964313620657535e-06, "loss": 0.5863, "step": 936 }, { "epoch": 0.07, "grad_norm": 6.226453049608885, "learning_rate": 9.964175332899195e-06, "loss": 0.6499, "step": 937 }, { "epoch": 0.07, "grad_norm": 4.630677747583257, "learning_rate": 9.964036778683643e-06, "loss": 0.4104, "step": 938 }, { "epoch": 0.07, "grad_norm": 1.5788880678360995, "learning_rate": 9.963897958018319e-06, "loss": 0.2342, "step": 939 }, { "epoch": 0.07, "grad_norm": 1.8291733111366622, "learning_rate": 9.963758870910672e-06, "loss": 0.287, "step": 940 }, { "epoch": 0.07, "grad_norm": 1.5325230123014268, "learning_rate": 9.963619517368169e-06, "loss": 0.2369, "step": 941 }, { "epoch": 0.07, "grad_norm": 1.6504989571594537, "learning_rate": 9.96347989739829e-06, "loss": 0.2524, "step": 942 }, { "epoch": 0.07, "grad_norm": 1.61217167711618, "learning_rate": 9.963340011008529e-06, "loss": 0.2426, "step": 943 }, { "epoch": 0.07, "grad_norm": 1.4962188746734713, "learning_rate": 9.963199858206393e-06, "loss": 0.2309, "step": 944 }, { "epoch": 0.07, "grad_norm": 1.48074521140785, "learning_rate": 9.963059438999408e-06, "loss": 0.2741, "step": 945 }, { "epoch": 0.07, "grad_norm": 4.449598767367919, "learning_rate": 9.96291875339511e-06, "loss": 0.6946, "step": 946 }, { "epoch": 0.07, "grad_norm": 1.795464179187031, "learning_rate": 9.962777801401049e-06, "loss": 0.2927, "step": 947 }, { "epoch": 0.07, "grad_norm": 1.4815926665375998, "learning_rate": 9.962636583024792e-06, "loss": 0.2389, "step": 948 }, { "epoch": 0.07, "grad_norm": 1.614062984684148, "learning_rate": 9.962495098273919e-06, "loss": 0.2371, "step": 949 }, { "epoch": 0.07, "grad_norm": 1.4228904690752817, "learning_rate": 9.962353347156023e-06, "loss": 0.1964, "step": 950 }, { "epoch": 0.07, "grad_norm": 1.63019977509266, "learning_rate": 9.962211329678716e-06, "loss": 0.2447, "step": 951 }, { "epoch": 0.07, "grad_norm": 1.6166119929566223, "learning_rate": 9.962069045849617e-06, "loss": 0.2843, "step": 952 }, { "epoch": 0.07, "grad_norm": 1.3885502152335687, "learning_rate": 9.961926495676364e-06, "loss": 0.1813, "step": 953 }, { "epoch": 0.07, "grad_norm": 12.94825947513103, "learning_rate": 9.96178367916661e-06, "loss": 0.7012, "step": 954 }, { "epoch": 0.07, "grad_norm": 1.6483847798299498, "learning_rate": 9.96164059632802e-06, "loss": 0.2631, "step": 955 }, { "epoch": 0.07, "grad_norm": 1.638739558848523, "learning_rate": 9.961497247168274e-06, "loss": 0.2459, "step": 956 }, { "epoch": 0.07, "grad_norm": 1.6003613799432537, "learning_rate": 9.961353631695069e-06, "loss": 0.2088, "step": 957 }, { "epoch": 0.07, "grad_norm": 1.623685768222847, "learning_rate": 9.96120974991611e-06, "loss": 0.2662, "step": 958 }, { "epoch": 0.07, "grad_norm": 5.930013283164487, "learning_rate": 9.961065601839121e-06, "loss": 0.632, "step": 959 }, { "epoch": 0.07, "grad_norm": 5.381462313226097, "learning_rate": 9.960921187471841e-06, "loss": 0.5662, "step": 960 }, { "epoch": 0.07, "grad_norm": 1.7552007258908067, "learning_rate": 9.960776506822019e-06, "loss": 0.2474, "step": 961 }, { "epoch": 0.07, "grad_norm": 1.487902375137377, "learning_rate": 9.960631559897422e-06, "loss": 0.2098, "step": 962 }, { "epoch": 0.07, "grad_norm": 1.5128555437128612, "learning_rate": 9.960486346705833e-06, "loss": 0.2478, "step": 963 }, { "epoch": 0.07, "grad_norm": 1.6140132617818768, "learning_rate": 9.960340867255042e-06, "loss": 0.2555, "step": 964 }, { "epoch": 0.07, "grad_norm": 1.5401110662117066, "learning_rate": 9.96019512155286e-06, "loss": 0.2711, "step": 965 }, { "epoch": 0.07, "grad_norm": 1.6425389288891483, "learning_rate": 9.960049109607109e-06, "loss": 0.2617, "step": 966 }, { "epoch": 0.07, "grad_norm": 6.319944009309532, "learning_rate": 9.959902831425628e-06, "loss": 0.6344, "step": 967 }, { "epoch": 0.07, "grad_norm": 1.6161669054356949, "learning_rate": 9.959756287016268e-06, "loss": 0.2004, "step": 968 }, { "epoch": 0.07, "grad_norm": 5.602691630624776, "learning_rate": 9.959609476386893e-06, "loss": 0.689, "step": 969 }, { "epoch": 0.07, "grad_norm": 1.5953130640977635, "learning_rate": 9.959462399545385e-06, "loss": 0.213, "step": 970 }, { "epoch": 0.07, "grad_norm": 1.5036475372123885, "learning_rate": 9.95931505649964e-06, "loss": 0.2317, "step": 971 }, { "epoch": 0.07, "grad_norm": 1.7071886112631878, "learning_rate": 9.959167447257563e-06, "loss": 0.2716, "step": 972 }, { "epoch": 0.07, "grad_norm": 1.6212299792475897, "learning_rate": 9.95901957182708e-06, "loss": 0.2366, "step": 973 }, { "epoch": 0.07, "grad_norm": 1.6200587720853887, "learning_rate": 9.958871430216128e-06, "loss": 0.2662, "step": 974 }, { "epoch": 0.07, "grad_norm": 1.841970417836384, "learning_rate": 9.958723022432658e-06, "loss": 0.252, "step": 975 }, { "epoch": 0.07, "grad_norm": 1.7644818196634862, "learning_rate": 9.958574348484635e-06, "loss": 0.2734, "step": 976 }, { "epoch": 0.07, "grad_norm": 1.5990626523788047, "learning_rate": 9.958425408380042e-06, "loss": 0.2069, "step": 977 }, { "epoch": 0.07, "grad_norm": 1.4292401560271797, "learning_rate": 9.958276202126871e-06, "loss": 0.2159, "step": 978 }, { "epoch": 0.07, "grad_norm": 1.3679376732221504, "learning_rate": 9.958126729733131e-06, "loss": 0.1647, "step": 979 }, { "epoch": 0.07, "grad_norm": 1.5217900723874238, "learning_rate": 9.957976991206847e-06, "loss": 0.2181, "step": 980 }, { "epoch": 0.07, "grad_norm": 1.6747889880308333, "learning_rate": 9.957826986556054e-06, "loss": 0.2234, "step": 981 }, { "epoch": 0.07, "grad_norm": 1.9021529860399558, "learning_rate": 9.957676715788806e-06, "loss": 0.2603, "step": 982 }, { "epoch": 0.07, "grad_norm": 1.6240680076233796, "learning_rate": 9.957526178913166e-06, "loss": 0.2359, "step": 983 }, { "epoch": 0.07, "grad_norm": 1.6213087323842932, "learning_rate": 9.957375375937217e-06, "loss": 0.2686, "step": 984 }, { "epoch": 0.07, "grad_norm": 1.530914533968772, "learning_rate": 9.957224306869053e-06, "loss": 0.2526, "step": 985 }, { "epoch": 0.07, "grad_norm": 6.962147982402997, "learning_rate": 9.957072971716782e-06, "loss": 0.8784, "step": 986 }, { "epoch": 0.07, "grad_norm": 1.7154213061160235, "learning_rate": 9.956921370488525e-06, "loss": 0.2762, "step": 987 }, { "epoch": 0.07, "grad_norm": 1.5005354505371913, "learning_rate": 9.956769503192426e-06, "loss": 0.2111, "step": 988 }, { "epoch": 0.07, "grad_norm": 1.452198476061592, "learning_rate": 9.956617369836628e-06, "loss": 0.1901, "step": 989 }, { "epoch": 0.07, "grad_norm": 1.6766577721974762, "learning_rate": 9.956464970429303e-06, "loss": 0.2254, "step": 990 }, { "epoch": 0.07, "grad_norm": 1.594729048086781, "learning_rate": 9.95631230497863e-06, "loss": 0.2235, "step": 991 }, { "epoch": 0.07, "grad_norm": 1.592530275426905, "learning_rate": 9.956159373492802e-06, "loss": 0.2261, "step": 992 }, { "epoch": 0.07, "grad_norm": 1.4149798986491637, "learning_rate": 9.956006175980028e-06, "loss": 0.253, "step": 993 }, { "epoch": 0.07, "grad_norm": 1.5927402174211944, "learning_rate": 9.955852712448534e-06, "loss": 0.2576, "step": 994 }, { "epoch": 0.07, "grad_norm": 1.457494320290799, "learning_rate": 9.955698982906552e-06, "loss": 0.2489, "step": 995 }, { "epoch": 0.07, "grad_norm": 1.4527550820359607, "learning_rate": 9.955544987362339e-06, "loss": 0.294, "step": 996 }, { "epoch": 0.07, "grad_norm": 1.5491164128132342, "learning_rate": 9.955390725824158e-06, "loss": 0.2554, "step": 997 }, { "epoch": 0.07, "grad_norm": 1.5021718477638957, "learning_rate": 9.955236198300289e-06, "loss": 0.246, "step": 998 }, { "epoch": 0.07, "grad_norm": 1.6303270064818196, "learning_rate": 9.955081404799026e-06, "loss": 0.2078, "step": 999 }, { "epoch": 0.07, "grad_norm": 1.7228813204533746, "learning_rate": 9.95492634532868e-06, "loss": 0.2227, "step": 1000 }, { "epoch": 0.07, "grad_norm": 1.7308317923845082, "learning_rate": 9.954771019897572e-06, "loss": 0.2805, "step": 1001 }, { "epoch": 0.07, "grad_norm": 1.56778598008499, "learning_rate": 9.954615428514041e-06, "loss": 0.2276, "step": 1002 }, { "epoch": 0.07, "grad_norm": 1.6151375579070484, "learning_rate": 9.954459571186436e-06, "loss": 0.2747, "step": 1003 }, { "epoch": 0.07, "grad_norm": 1.5868432427891044, "learning_rate": 9.954303447923125e-06, "loss": 0.2059, "step": 1004 }, { "epoch": 0.07, "grad_norm": 1.4889428936652886, "learning_rate": 9.954147058732487e-06, "loss": 0.2156, "step": 1005 }, { "epoch": 0.07, "grad_norm": 1.4971643014065947, "learning_rate": 9.953990403622918e-06, "loss": 0.2236, "step": 1006 }, { "epoch": 0.07, "grad_norm": 1.6076050935851762, "learning_rate": 9.953833482602824e-06, "loss": 0.238, "step": 1007 }, { "epoch": 0.07, "grad_norm": 4.22936853818382, "learning_rate": 9.953676295680628e-06, "loss": 0.6448, "step": 1008 }, { "epoch": 0.07, "grad_norm": 1.439165206743901, "learning_rate": 9.95351884286477e-06, "loss": 0.2322, "step": 1009 }, { "epoch": 0.07, "grad_norm": 2.073318128977774, "learning_rate": 9.9533611241637e-06, "loss": 0.2042, "step": 1010 }, { "epoch": 0.07, "grad_norm": 1.6915242133156365, "learning_rate": 9.953203139585883e-06, "loss": 0.284, "step": 1011 }, { "epoch": 0.07, "grad_norm": 1.4657506249351238, "learning_rate": 9.9530448891398e-06, "loss": 0.1963, "step": 1012 }, { "epoch": 0.07, "grad_norm": 7.345719020870333, "learning_rate": 9.952886372833943e-06, "loss": 0.672, "step": 1013 }, { "epoch": 0.07, "grad_norm": 6.602264922686953, "learning_rate": 9.952727590676824e-06, "loss": 0.6901, "step": 1014 }, { "epoch": 0.07, "grad_norm": 1.685921053439328, "learning_rate": 9.952568542676963e-06, "loss": 0.2481, "step": 1015 }, { "epoch": 0.07, "grad_norm": 1.6795568931682097, "learning_rate": 9.9524092288429e-06, "loss": 0.2044, "step": 1016 }, { "epoch": 0.07, "grad_norm": 1.3857739558708633, "learning_rate": 9.952249649183183e-06, "loss": 0.2124, "step": 1017 }, { "epoch": 0.07, "grad_norm": 1.3608516768501657, "learning_rate": 9.95208980370638e-06, "loss": 0.2583, "step": 1018 }, { "epoch": 0.07, "grad_norm": 1.4809319433741186, "learning_rate": 9.951929692421069e-06, "loss": 0.2468, "step": 1019 }, { "epoch": 0.07, "grad_norm": 1.4419709553370685, "learning_rate": 9.951769315335843e-06, "loss": 0.2132, "step": 1020 }, { "epoch": 0.07, "grad_norm": 1.650704989013261, "learning_rate": 9.951608672459316e-06, "loss": 0.2676, "step": 1021 }, { "epoch": 0.07, "grad_norm": 1.5398038052938297, "learning_rate": 9.951447763800106e-06, "loss": 0.2401, "step": 1022 }, { "epoch": 0.07, "grad_norm": 1.4932313993994064, "learning_rate": 9.951286589366852e-06, "loss": 0.2363, "step": 1023 }, { "epoch": 0.07, "grad_norm": 1.449952888601099, "learning_rate": 9.951125149168203e-06, "loss": 0.2131, "step": 1024 }, { "epoch": 0.07, "grad_norm": 1.6093305970158536, "learning_rate": 9.950963443212826e-06, "loss": 0.2342, "step": 1025 }, { "epoch": 0.07, "grad_norm": 1.546821521047033, "learning_rate": 9.950801471509402e-06, "loss": 0.264, "step": 1026 }, { "epoch": 0.07, "grad_norm": 1.401297575007682, "learning_rate": 9.95063923406662e-06, "loss": 0.2469, "step": 1027 }, { "epoch": 0.07, "grad_norm": 1.5341210641132266, "learning_rate": 9.950476730893195e-06, "loss": 0.2182, "step": 1028 }, { "epoch": 0.07, "grad_norm": 1.6354599001862569, "learning_rate": 9.950313961997845e-06, "loss": 0.253, "step": 1029 }, { "epoch": 0.07, "grad_norm": 1.6302879145004092, "learning_rate": 9.950150927389308e-06, "loss": 0.2092, "step": 1030 }, { "epoch": 0.07, "grad_norm": 1.5392831763325525, "learning_rate": 9.949987627076337e-06, "loss": 0.2205, "step": 1031 }, { "epoch": 0.07, "grad_norm": 1.7121577887554154, "learning_rate": 9.949824061067692e-06, "loss": 0.2983, "step": 1032 }, { "epoch": 0.07, "grad_norm": 1.7152318786831497, "learning_rate": 9.94966022937216e-06, "loss": 0.2419, "step": 1033 }, { "epoch": 0.07, "grad_norm": 1.6075780577184156, "learning_rate": 9.949496131998527e-06, "loss": 0.2481, "step": 1034 }, { "epoch": 0.07, "grad_norm": 1.5406417578218947, "learning_rate": 9.949331768955606e-06, "loss": 0.192, "step": 1035 }, { "epoch": 0.07, "grad_norm": 1.5666289529594795, "learning_rate": 9.949167140252221e-06, "loss": 0.2438, "step": 1036 }, { "epoch": 0.07, "grad_norm": 12.537364271359149, "learning_rate": 9.949002245897203e-06, "loss": 0.6747, "step": 1037 }, { "epoch": 0.07, "grad_norm": 1.4485754810341074, "learning_rate": 9.948837085899407e-06, "loss": 0.2323, "step": 1038 }, { "epoch": 0.07, "grad_norm": 1.8458906881058217, "learning_rate": 9.948671660267696e-06, "loss": 0.2931, "step": 1039 }, { "epoch": 0.07, "grad_norm": 1.6363330382828778, "learning_rate": 9.94850596901095e-06, "loss": 0.2517, "step": 1040 }, { "epoch": 0.07, "grad_norm": 5.929636429784305, "learning_rate": 9.948340012138064e-06, "loss": 0.5907, "step": 1041 }, { "epoch": 0.07, "grad_norm": 1.736192737380444, "learning_rate": 9.948173789657946e-06, "loss": 0.2119, "step": 1042 }, { "epoch": 0.07, "grad_norm": 1.7634488955715786, "learning_rate": 9.948007301579516e-06, "loss": 0.1989, "step": 1043 }, { "epoch": 0.07, "grad_norm": 1.3714381198643377, "learning_rate": 9.94784054791171e-06, "loss": 0.21, "step": 1044 }, { "epoch": 0.07, "grad_norm": 1.8850691204885872, "learning_rate": 9.947673528663482e-06, "loss": 0.1957, "step": 1045 }, { "epoch": 0.07, "grad_norm": 1.5800922825057861, "learning_rate": 9.947506243843795e-06, "loss": 0.2511, "step": 1046 }, { "epoch": 0.07, "grad_norm": 1.5351815574859178, "learning_rate": 9.947338693461626e-06, "loss": 0.254, "step": 1047 }, { "epoch": 0.07, "grad_norm": 1.5414123343060968, "learning_rate": 9.947170877525972e-06, "loss": 0.1957, "step": 1048 }, { "epoch": 0.08, "grad_norm": 1.5314756562783467, "learning_rate": 9.94700279604584e-06, "loss": 0.2126, "step": 1049 }, { "epoch": 0.08, "grad_norm": 5.15792517419071, "learning_rate": 9.946834449030252e-06, "loss": 0.2546, "step": 1050 }, { "epoch": 0.08, "grad_norm": 1.4474075591114994, "learning_rate": 9.946665836488241e-06, "loss": 0.2088, "step": 1051 }, { "epoch": 0.08, "grad_norm": 1.6796718151856187, "learning_rate": 9.946496958428862e-06, "loss": 0.2568, "step": 1052 }, { "epoch": 0.08, "grad_norm": 1.4911973841778483, "learning_rate": 9.946327814861176e-06, "loss": 0.2314, "step": 1053 }, { "epoch": 0.08, "grad_norm": 1.4264998384092136, "learning_rate": 9.946158405794266e-06, "loss": 0.2221, "step": 1054 }, { "epoch": 0.08, "grad_norm": 5.205927728205507, "learning_rate": 9.945988731237221e-06, "loss": 0.4087, "step": 1055 }, { "epoch": 0.08, "grad_norm": 1.650392206599657, "learning_rate": 9.94581879119915e-06, "loss": 0.2368, "step": 1056 }, { "epoch": 0.08, "grad_norm": 6.302826269422322, "learning_rate": 9.945648585689176e-06, "loss": 0.7123, "step": 1057 }, { "epoch": 0.08, "grad_norm": 1.533800732905702, "learning_rate": 9.945478114716433e-06, "loss": 0.2274, "step": 1058 }, { "epoch": 0.08, "grad_norm": 1.608307598501455, "learning_rate": 9.945307378290073e-06, "loss": 0.2735, "step": 1059 }, { "epoch": 0.08, "grad_norm": 1.7982908724076274, "learning_rate": 9.94513637641926e-06, "loss": 0.2438, "step": 1060 }, { "epoch": 0.08, "grad_norm": 1.6234499350765255, "learning_rate": 9.944965109113171e-06, "loss": 0.1964, "step": 1061 }, { "epoch": 0.08, "grad_norm": 1.6008586635131068, "learning_rate": 9.944793576381002e-06, "loss": 0.2253, "step": 1062 }, { "epoch": 0.08, "grad_norm": 6.294362153837577, "learning_rate": 9.944621778231956e-06, "loss": 0.6718, "step": 1063 }, { "epoch": 0.08, "grad_norm": 1.6364230117572207, "learning_rate": 9.944449714675259e-06, "loss": 0.1984, "step": 1064 }, { "epoch": 0.08, "grad_norm": 1.618832407664547, "learning_rate": 9.944277385720143e-06, "loss": 0.2123, "step": 1065 }, { "epoch": 0.08, "grad_norm": 1.4838019647981555, "learning_rate": 9.944104791375862e-06, "loss": 0.212, "step": 1066 }, { "epoch": 0.08, "grad_norm": 1.3781034204679117, "learning_rate": 9.943931931651675e-06, "loss": 0.2266, "step": 1067 }, { "epoch": 0.08, "grad_norm": 1.4310720361956148, "learning_rate": 9.943758806556864e-06, "loss": 0.2203, "step": 1068 }, { "epoch": 0.08, "grad_norm": 1.362197559705591, "learning_rate": 9.94358541610072e-06, "loss": 0.2338, "step": 1069 }, { "epoch": 0.08, "grad_norm": 1.5611010576678022, "learning_rate": 9.943411760292552e-06, "loss": 0.2457, "step": 1070 }, { "epoch": 0.08, "grad_norm": 1.518704795287663, "learning_rate": 9.94323783914168e-06, "loss": 0.2159, "step": 1071 }, { "epoch": 0.08, "grad_norm": 1.5303631008057625, "learning_rate": 9.94306365265744e-06, "loss": 0.2508, "step": 1072 }, { "epoch": 0.08, "grad_norm": 1.4062407195004938, "learning_rate": 9.942889200849178e-06, "loss": 0.2226, "step": 1073 }, { "epoch": 0.08, "grad_norm": 1.6175547562475912, "learning_rate": 9.942714483726263e-06, "loss": 0.2283, "step": 1074 }, { "epoch": 0.08, "grad_norm": 1.3525813966884446, "learning_rate": 9.94253950129807e-06, "loss": 0.2262, "step": 1075 }, { "epoch": 0.08, "grad_norm": 1.4714837178548161, "learning_rate": 9.942364253573993e-06, "loss": 0.1844, "step": 1076 }, { "epoch": 0.08, "grad_norm": 5.010952712421839, "learning_rate": 9.942188740563437e-06, "loss": 0.437, "step": 1077 }, { "epoch": 0.08, "grad_norm": 1.473048192083719, "learning_rate": 9.942012962275824e-06, "loss": 0.1895, "step": 1078 }, { "epoch": 0.08, "grad_norm": 1.4909759589197886, "learning_rate": 9.941836918720588e-06, "loss": 0.2048, "step": 1079 }, { "epoch": 0.08, "grad_norm": 1.604877836246275, "learning_rate": 9.94166060990718e-06, "loss": 0.2546, "step": 1080 }, { "epoch": 0.08, "grad_norm": 1.6048371961017147, "learning_rate": 9.941484035845061e-06, "loss": 0.2848, "step": 1081 }, { "epoch": 0.08, "grad_norm": 1.5320663252210094, "learning_rate": 9.941307196543712e-06, "loss": 0.2647, "step": 1082 }, { "epoch": 0.08, "grad_norm": 1.5490125158760615, "learning_rate": 9.941130092012624e-06, "loss": 0.2358, "step": 1083 }, { "epoch": 0.08, "grad_norm": 1.5042218924932877, "learning_rate": 9.940952722261302e-06, "loss": 0.2078, "step": 1084 }, { "epoch": 0.08, "grad_norm": 2.0980929109302493, "learning_rate": 9.940775087299267e-06, "loss": 0.2855, "step": 1085 }, { "epoch": 0.08, "grad_norm": 1.533576415958193, "learning_rate": 9.940597187136054e-06, "loss": 0.2005, "step": 1086 }, { "epoch": 0.08, "grad_norm": 1.476112180082261, "learning_rate": 9.940419021781214e-06, "loss": 0.2065, "step": 1087 }, { "epoch": 0.08, "grad_norm": 1.4267578886258836, "learning_rate": 9.940240591244306e-06, "loss": 0.2217, "step": 1088 }, { "epoch": 0.08, "grad_norm": 1.6035014287713436, "learning_rate": 9.94006189553491e-06, "loss": 0.2159, "step": 1089 }, { "epoch": 0.08, "grad_norm": 1.5143518062808157, "learning_rate": 9.939882934662617e-06, "loss": 0.2006, "step": 1090 }, { "epoch": 0.08, "grad_norm": 1.5330403345386265, "learning_rate": 9.939703708637033e-06, "loss": 0.2251, "step": 1091 }, { "epoch": 0.08, "grad_norm": 1.8029975676302326, "learning_rate": 9.939524217467779e-06, "loss": 0.3352, "step": 1092 }, { "epoch": 0.08, "grad_norm": 1.6536029638125382, "learning_rate": 9.939344461164488e-06, "loss": 0.2141, "step": 1093 }, { "epoch": 0.08, "grad_norm": 1.8072385104890933, "learning_rate": 9.93916443973681e-06, "loss": 0.2432, "step": 1094 }, { "epoch": 0.08, "grad_norm": 1.5992702637251062, "learning_rate": 9.938984153194406e-06, "loss": 0.2186, "step": 1095 }, { "epoch": 0.08, "grad_norm": 1.6006705193402297, "learning_rate": 9.938803601546957e-06, "loss": 0.2712, "step": 1096 }, { "epoch": 0.08, "grad_norm": 1.443343958671591, "learning_rate": 9.938622784804149e-06, "loss": 0.2242, "step": 1097 }, { "epoch": 0.08, "grad_norm": 1.6050528431399598, "learning_rate": 9.938441702975689e-06, "loss": 0.1861, "step": 1098 }, { "epoch": 0.08, "grad_norm": 5.103841139360225, "learning_rate": 9.938260356071299e-06, "loss": 0.7147, "step": 1099 }, { "epoch": 0.08, "grad_norm": 1.7027121410950583, "learning_rate": 9.938078744100713e-06, "loss": 0.2155, "step": 1100 }, { "epoch": 0.08, "grad_norm": 1.6848330906460747, "learning_rate": 9.937896867073678e-06, "loss": 0.2064, "step": 1101 }, { "epoch": 0.08, "grad_norm": 1.4678293835639407, "learning_rate": 9.937714724999953e-06, "loss": 0.2026, "step": 1102 }, { "epoch": 0.08, "grad_norm": 1.2260575484814509, "learning_rate": 9.937532317889322e-06, "loss": 0.2115, "step": 1103 }, { "epoch": 0.08, "grad_norm": 1.3860522733887144, "learning_rate": 9.93734964575157e-06, "loss": 0.2065, "step": 1104 }, { "epoch": 0.08, "grad_norm": 5.631746959209183, "learning_rate": 9.937166708596504e-06, "loss": 0.7735, "step": 1105 }, { "epoch": 0.08, "grad_norm": 1.5163915010165294, "learning_rate": 9.936983506433945e-06, "loss": 0.2593, "step": 1106 }, { "epoch": 0.08, "grad_norm": 4.090629076623357, "learning_rate": 9.936800039273724e-06, "loss": 0.6032, "step": 1107 }, { "epoch": 0.08, "grad_norm": 6.12088079571952, "learning_rate": 9.93661630712569e-06, "loss": 0.6263, "step": 1108 }, { "epoch": 0.08, "grad_norm": 1.5926099572905967, "learning_rate": 9.936432309999704e-06, "loss": 0.2661, "step": 1109 }, { "epoch": 0.08, "grad_norm": 1.3855609528300077, "learning_rate": 9.936248047905644e-06, "loss": 0.1831, "step": 1110 }, { "epoch": 0.08, "grad_norm": 1.5507617689660618, "learning_rate": 9.936063520853399e-06, "loss": 0.2646, "step": 1111 }, { "epoch": 0.08, "grad_norm": 1.5036292226530938, "learning_rate": 9.935878728852873e-06, "loss": 0.244, "step": 1112 }, { "epoch": 0.08, "grad_norm": 1.5683808728634534, "learning_rate": 9.935693671913986e-06, "loss": 0.2226, "step": 1113 }, { "epoch": 0.08, "grad_norm": 1.5142295296673947, "learning_rate": 9.935508350046672e-06, "loss": 0.2167, "step": 1114 }, { "epoch": 0.08, "grad_norm": 1.7190014522576007, "learning_rate": 9.93532276326088e-06, "loss": 0.2242, "step": 1115 }, { "epoch": 0.08, "grad_norm": 1.6279209691401062, "learning_rate": 9.935136911566566e-06, "loss": 0.2144, "step": 1116 }, { "epoch": 0.08, "grad_norm": 1.5955580635135345, "learning_rate": 9.934950794973708e-06, "loss": 0.2377, "step": 1117 }, { "epoch": 0.08, "grad_norm": 5.42747071263188, "learning_rate": 9.9347644134923e-06, "loss": 0.7366, "step": 1118 }, { "epoch": 0.08, "grad_norm": 1.5054239783526073, "learning_rate": 9.934577767132341e-06, "loss": 0.2454, "step": 1119 }, { "epoch": 0.08, "grad_norm": 1.706862813092047, "learning_rate": 9.934390855903852e-06, "loss": 0.2594, "step": 1120 }, { "epoch": 0.08, "grad_norm": 1.515741794429253, "learning_rate": 9.934203679816865e-06, "loss": 0.2021, "step": 1121 }, { "epoch": 0.08, "grad_norm": 1.4469333636053692, "learning_rate": 9.934016238881428e-06, "loss": 0.2444, "step": 1122 }, { "epoch": 0.08, "grad_norm": 1.3676366169245366, "learning_rate": 9.9338285331076e-06, "loss": 0.2038, "step": 1123 }, { "epoch": 0.08, "grad_norm": 5.100441246083172, "learning_rate": 9.933640562505458e-06, "loss": 0.7188, "step": 1124 }, { "epoch": 0.08, "grad_norm": 1.5527255683800822, "learning_rate": 9.93345232708509e-06, "loss": 0.2976, "step": 1125 }, { "epoch": 0.08, "grad_norm": 1.5974038948754141, "learning_rate": 9.933263826856601e-06, "loss": 0.2052, "step": 1126 }, { "epoch": 0.08, "grad_norm": 1.7007809308034525, "learning_rate": 9.93307506183011e-06, "loss": 0.21, "step": 1127 }, { "epoch": 0.08, "grad_norm": 1.6665260086696043, "learning_rate": 9.932886032015746e-06, "loss": 0.2197, "step": 1128 }, { "epoch": 0.08, "grad_norm": 1.7421161507008942, "learning_rate": 9.932696737423656e-06, "loss": 0.2739, "step": 1129 }, { "epoch": 0.08, "grad_norm": 1.67225587235492, "learning_rate": 9.932507178064006e-06, "loss": 0.2428, "step": 1130 }, { "epoch": 0.08, "grad_norm": 1.4742943367921002, "learning_rate": 9.932317353946964e-06, "loss": 0.2378, "step": 1131 }, { "epoch": 0.08, "grad_norm": 5.390701134572608, "learning_rate": 9.93212726508272e-06, "loss": 0.6425, "step": 1132 }, { "epoch": 0.08, "grad_norm": 1.6743390097737465, "learning_rate": 9.931936911481481e-06, "loss": 0.228, "step": 1133 }, { "epoch": 0.08, "grad_norm": 1.5685427760985085, "learning_rate": 9.93174629315346e-06, "loss": 0.2662, "step": 1134 }, { "epoch": 0.08, "grad_norm": 1.4979379074089965, "learning_rate": 9.931555410108893e-06, "loss": 0.27, "step": 1135 }, { "epoch": 0.08, "grad_norm": 6.1305112799585135, "learning_rate": 9.931364262358023e-06, "loss": 0.6212, "step": 1136 }, { "epoch": 0.08, "grad_norm": 1.4919989447400548, "learning_rate": 9.93117284991111e-06, "loss": 0.242, "step": 1137 }, { "epoch": 0.08, "grad_norm": 1.6726226969839952, "learning_rate": 9.93098117277843e-06, "loss": 0.2618, "step": 1138 }, { "epoch": 0.08, "grad_norm": 1.7796480013875138, "learning_rate": 9.930789230970269e-06, "loss": 0.3036, "step": 1139 }, { "epoch": 0.08, "grad_norm": 1.6059924919551873, "learning_rate": 9.930597024496933e-06, "loss": 0.2369, "step": 1140 }, { "epoch": 0.08, "grad_norm": 1.4118458684547648, "learning_rate": 9.930404553368733e-06, "loss": 0.2756, "step": 1141 }, { "epoch": 0.08, "grad_norm": 1.4364800363949266, "learning_rate": 9.930211817596008e-06, "loss": 0.227, "step": 1142 }, { "epoch": 0.08, "grad_norm": 1.4990861665393787, "learning_rate": 9.930018817189097e-06, "loss": 0.2508, "step": 1143 }, { "epoch": 0.08, "grad_norm": 1.4195372228809104, "learning_rate": 9.929825552158364e-06, "loss": 0.2081, "step": 1144 }, { "epoch": 0.08, "grad_norm": 1.5782544500786064, "learning_rate": 9.92963202251418e-06, "loss": 0.25, "step": 1145 }, { "epoch": 0.08, "grad_norm": 1.384373391912052, "learning_rate": 9.929438228266932e-06, "loss": 0.193, "step": 1146 }, { "epoch": 0.08, "grad_norm": 1.649296242057296, "learning_rate": 9.929244169427023e-06, "loss": 0.2512, "step": 1147 }, { "epoch": 0.08, "grad_norm": 1.541559488811707, "learning_rate": 9.929049846004873e-06, "loss": 0.2051, "step": 1148 }, { "epoch": 0.08, "grad_norm": 1.3813331356300795, "learning_rate": 9.928855258010907e-06, "loss": 0.1937, "step": 1149 }, { "epoch": 0.08, "grad_norm": 1.4689029334594381, "learning_rate": 9.928660405455572e-06, "loss": 0.1947, "step": 1150 }, { "epoch": 0.08, "grad_norm": 1.4539609865114502, "learning_rate": 9.928465288349328e-06, "loss": 0.1889, "step": 1151 }, { "epoch": 0.08, "grad_norm": 1.5608175162923896, "learning_rate": 9.928269906702647e-06, "loss": 0.2299, "step": 1152 }, { "epoch": 0.08, "grad_norm": 1.550219914255769, "learning_rate": 9.928074260526014e-06, "loss": 0.2385, "step": 1153 }, { "epoch": 0.08, "grad_norm": 1.4124121742478943, "learning_rate": 9.927878349829935e-06, "loss": 0.2203, "step": 1154 }, { "epoch": 0.08, "grad_norm": 1.6128076753656464, "learning_rate": 9.927682174624925e-06, "loss": 0.2533, "step": 1155 }, { "epoch": 0.08, "grad_norm": 1.5212727843845337, "learning_rate": 9.927485734921511e-06, "loss": 0.2397, "step": 1156 }, { "epoch": 0.08, "grad_norm": 1.5044143232968135, "learning_rate": 9.927289030730239e-06, "loss": 0.2378, "step": 1157 }, { "epoch": 0.08, "grad_norm": 1.5732720262508046, "learning_rate": 9.927092062061667e-06, "loss": 0.2392, "step": 1158 }, { "epoch": 0.08, "grad_norm": 1.41769202569861, "learning_rate": 9.926894828926366e-06, "loss": 0.1861, "step": 1159 }, { "epoch": 0.08, "grad_norm": 1.4346003944295718, "learning_rate": 9.926697331334924e-06, "loss": 0.2158, "step": 1160 }, { "epoch": 0.08, "grad_norm": 1.576747012069802, "learning_rate": 9.926499569297943e-06, "loss": 0.207, "step": 1161 }, { "epoch": 0.08, "grad_norm": 1.656663878987908, "learning_rate": 9.926301542826038e-06, "loss": 0.2502, "step": 1162 }, { "epoch": 0.08, "grad_norm": 1.541954005611327, "learning_rate": 9.926103251929836e-06, "loss": 0.2507, "step": 1163 }, { "epoch": 0.08, "grad_norm": 1.530091662294955, "learning_rate": 9.925904696619983e-06, "loss": 0.2362, "step": 1164 }, { "epoch": 0.08, "grad_norm": 1.7139948753263308, "learning_rate": 9.925705876907134e-06, "loss": 0.2616, "step": 1165 }, { "epoch": 0.08, "grad_norm": 1.4635097188487525, "learning_rate": 9.925506792801962e-06, "loss": 0.2328, "step": 1166 }, { "epoch": 0.08, "grad_norm": 1.3361479247209194, "learning_rate": 9.925307444315155e-06, "loss": 0.1916, "step": 1167 }, { "epoch": 0.08, "grad_norm": 6.399460259709606, "learning_rate": 9.92510783145741e-06, "loss": 0.5664, "step": 1168 }, { "epoch": 0.08, "grad_norm": 2.1518466693233513, "learning_rate": 9.924907954239446e-06, "loss": 0.2602, "step": 1169 }, { "epoch": 0.08, "grad_norm": 1.5263425809631828, "learning_rate": 9.924707812671986e-06, "loss": 0.2519, "step": 1170 }, { "epoch": 0.08, "grad_norm": 1.6937103464510361, "learning_rate": 9.924507406765776e-06, "loss": 0.2889, "step": 1171 }, { "epoch": 0.08, "grad_norm": 1.6133497529694538, "learning_rate": 9.924306736531572e-06, "loss": 0.2677, "step": 1172 }, { "epoch": 0.08, "grad_norm": 1.6704593822957057, "learning_rate": 9.924105801980146e-06, "loss": 0.2491, "step": 1173 }, { "epoch": 0.08, "grad_norm": 1.614246328017673, "learning_rate": 9.923904603122283e-06, "loss": 0.2925, "step": 1174 }, { "epoch": 0.08, "grad_norm": 1.4067812460657783, "learning_rate": 9.923703139968784e-06, "loss": 0.2025, "step": 1175 }, { "epoch": 0.08, "grad_norm": 1.6920495664016721, "learning_rate": 9.923501412530458e-06, "loss": 0.2604, "step": 1176 }, { "epoch": 0.08, "grad_norm": 1.4881384936536035, "learning_rate": 9.92329942081814e-06, "loss": 0.2449, "step": 1177 }, { "epoch": 0.08, "grad_norm": 1.4971607178529065, "learning_rate": 9.923097164842666e-06, "loss": 0.2001, "step": 1178 }, { "epoch": 0.08, "grad_norm": 1.4818095392721269, "learning_rate": 9.922894644614895e-06, "loss": 0.2229, "step": 1179 }, { "epoch": 0.08, "grad_norm": 1.7579447390164427, "learning_rate": 9.922691860145696e-06, "loss": 0.2918, "step": 1180 }, { "epoch": 0.08, "grad_norm": 1.6318483684604876, "learning_rate": 9.922488811445957e-06, "loss": 0.2466, "step": 1181 }, { "epoch": 0.08, "grad_norm": 1.397677123564768, "learning_rate": 9.922285498526575e-06, "loss": 0.206, "step": 1182 }, { "epoch": 0.08, "grad_norm": 1.3998255162937556, "learning_rate": 9.922081921398458e-06, "loss": 0.2154, "step": 1183 }, { "epoch": 0.08, "grad_norm": 1.5175236094470617, "learning_rate": 9.921878080072542e-06, "loss": 0.2611, "step": 1184 }, { "epoch": 0.08, "grad_norm": 1.6542587296439337, "learning_rate": 9.921673974559763e-06, "loss": 0.2527, "step": 1185 }, { "epoch": 0.08, "grad_norm": 1.6569505658546817, "learning_rate": 9.92146960487108e-06, "loss": 0.2777, "step": 1186 }, { "epoch": 0.08, "grad_norm": 1.5769165582412397, "learning_rate": 9.921264971017457e-06, "loss": 0.2121, "step": 1187 }, { "epoch": 0.08, "grad_norm": 1.5301618160383677, "learning_rate": 9.921060073009884e-06, "loss": 0.2063, "step": 1188 }, { "epoch": 0.09, "grad_norm": 1.4483243795898726, "learning_rate": 9.920854910859356e-06, "loss": 0.1978, "step": 1189 }, { "epoch": 0.09, "grad_norm": 1.5470267121396963, "learning_rate": 9.920649484576885e-06, "loss": 0.2588, "step": 1190 }, { "epoch": 0.09, "grad_norm": 1.6008871512704161, "learning_rate": 9.9204437941735e-06, "loss": 0.2213, "step": 1191 }, { "epoch": 0.09, "grad_norm": 1.6873839191317468, "learning_rate": 9.920237839660239e-06, "loss": 0.2338, "step": 1192 }, { "epoch": 0.09, "grad_norm": 1.6556225732153522, "learning_rate": 9.920031621048157e-06, "loss": 0.2025, "step": 1193 }, { "epoch": 0.09, "grad_norm": 1.4842168332252705, "learning_rate": 9.919825138348325e-06, "loss": 0.212, "step": 1194 }, { "epoch": 0.09, "grad_norm": 1.470374524714982, "learning_rate": 9.919618391571825e-06, "loss": 0.19, "step": 1195 }, { "epoch": 0.09, "grad_norm": 1.582239383420492, "learning_rate": 9.919411380729753e-06, "loss": 0.2331, "step": 1196 }, { "epoch": 0.09, "grad_norm": 1.7227821288652811, "learning_rate": 9.919204105833224e-06, "loss": 0.3193, "step": 1197 }, { "epoch": 0.09, "grad_norm": 4.889018099274441, "learning_rate": 9.91899656689336e-06, "loss": 0.5393, "step": 1198 }, { "epoch": 0.09, "grad_norm": 1.6496164398115643, "learning_rate": 9.918788763921303e-06, "loss": 0.2534, "step": 1199 }, { "epoch": 0.09, "grad_norm": 1.6618716608891957, "learning_rate": 9.918580696928206e-06, "loss": 0.2561, "step": 1200 }, { "epoch": 0.09, "grad_norm": 1.6148497683512029, "learning_rate": 9.918372365925238e-06, "loss": 0.2274, "step": 1201 }, { "epoch": 0.09, "grad_norm": 1.394778092833336, "learning_rate": 9.91816377092358e-06, "loss": 0.1879, "step": 1202 }, { "epoch": 0.09, "grad_norm": 1.5494693348575201, "learning_rate": 9.917954911934433e-06, "loss": 0.2691, "step": 1203 }, { "epoch": 0.09, "grad_norm": 1.8110328118956778, "learning_rate": 9.917745788969003e-06, "loss": 0.2544, "step": 1204 }, { "epoch": 0.09, "grad_norm": 6.1062923464027055, "learning_rate": 9.917536402038513e-06, "loss": 0.709, "step": 1205 }, { "epoch": 0.09, "grad_norm": 1.3807016297769146, "learning_rate": 9.917326751154208e-06, "loss": 0.2091, "step": 1206 }, { "epoch": 0.09, "grad_norm": 1.5357985916483932, "learning_rate": 9.91711683632734e-06, "loss": 0.2486, "step": 1207 }, { "epoch": 0.09, "grad_norm": 1.5483645553279626, "learning_rate": 9.916906657569171e-06, "loss": 0.2655, "step": 1208 }, { "epoch": 0.09, "grad_norm": 5.961588269399571, "learning_rate": 9.916696214890988e-06, "loss": 0.6549, "step": 1209 }, { "epoch": 0.09, "grad_norm": 1.3710636795800009, "learning_rate": 9.916485508304085e-06, "loss": 0.1995, "step": 1210 }, { "epoch": 0.09, "grad_norm": 1.3715187586041144, "learning_rate": 9.916274537819774e-06, "loss": 0.2305, "step": 1211 }, { "epoch": 0.09, "grad_norm": 1.5184926761534008, "learning_rate": 9.916063303449374e-06, "loss": 0.2377, "step": 1212 }, { "epoch": 0.09, "grad_norm": 1.4819978853399634, "learning_rate": 9.91585180520423e-06, "loss": 0.2488, "step": 1213 }, { "epoch": 0.09, "grad_norm": 1.5176154221603166, "learning_rate": 9.91564004309569e-06, "loss": 0.225, "step": 1214 }, { "epoch": 0.09, "grad_norm": 1.3686378811279407, "learning_rate": 9.915428017135117e-06, "loss": 0.2255, "step": 1215 }, { "epoch": 0.09, "grad_norm": 1.5532582572507878, "learning_rate": 9.9152157273339e-06, "loss": 0.1976, "step": 1216 }, { "epoch": 0.09, "grad_norm": 1.5731653038139306, "learning_rate": 9.91500317370343e-06, "loss": 0.2305, "step": 1217 }, { "epoch": 0.09, "grad_norm": 1.4695741876759432, "learning_rate": 9.914790356255115e-06, "loss": 0.2105, "step": 1218 }, { "epoch": 0.09, "grad_norm": 1.6214055641875886, "learning_rate": 9.914577275000378e-06, "loss": 0.2508, "step": 1219 }, { "epoch": 0.09, "grad_norm": 1.342472617228648, "learning_rate": 9.91436392995066e-06, "loss": 0.1919, "step": 1220 }, { "epoch": 0.09, "grad_norm": 1.422136636302543, "learning_rate": 9.914150321117408e-06, "loss": 0.2324, "step": 1221 }, { "epoch": 0.09, "grad_norm": 1.8441553307439726, "learning_rate": 9.91393644851209e-06, "loss": 0.2887, "step": 1222 }, { "epoch": 0.09, "grad_norm": 1.3825142476097159, "learning_rate": 9.913722312146186e-06, "loss": 0.1851, "step": 1223 }, { "epoch": 0.09, "grad_norm": 1.5899096399160513, "learning_rate": 9.913507912031188e-06, "loss": 0.3012, "step": 1224 }, { "epoch": 0.09, "grad_norm": 1.469712012626539, "learning_rate": 9.913293248178608e-06, "loss": 0.1877, "step": 1225 }, { "epoch": 0.09, "grad_norm": 1.7902861632241287, "learning_rate": 9.913078320599963e-06, "loss": 0.2418, "step": 1226 }, { "epoch": 0.09, "grad_norm": 1.647543320833529, "learning_rate": 9.912863129306796e-06, "loss": 0.2806, "step": 1227 }, { "epoch": 0.09, "grad_norm": 8.11614893267168, "learning_rate": 9.912647674310652e-06, "loss": 0.5523, "step": 1228 }, { "epoch": 0.09, "grad_norm": 1.62136549114214, "learning_rate": 9.912431955623096e-06, "loss": 0.227, "step": 1229 }, { "epoch": 0.09, "grad_norm": 12.017234390944381, "learning_rate": 9.91221597325571e-06, "loss": 0.511, "step": 1230 }, { "epoch": 0.09, "grad_norm": 1.762855647248683, "learning_rate": 9.911999727220088e-06, "loss": 0.2332, "step": 1231 }, { "epoch": 0.09, "grad_norm": 1.427478503346927, "learning_rate": 9.911783217527833e-06, "loss": 0.2173, "step": 1232 }, { "epoch": 0.09, "grad_norm": 1.3197486313148674, "learning_rate": 9.911566444190568e-06, "loss": 0.1993, "step": 1233 }, { "epoch": 0.09, "grad_norm": 1.4720266891056886, "learning_rate": 9.91134940721993e-06, "loss": 0.1967, "step": 1234 }, { "epoch": 0.09, "grad_norm": 1.5855416613511666, "learning_rate": 9.911132106627565e-06, "loss": 0.2397, "step": 1235 }, { "epoch": 0.09, "grad_norm": 1.5034429604427793, "learning_rate": 9.910914542425143e-06, "loss": 0.2126, "step": 1236 }, { "epoch": 0.09, "grad_norm": 1.7639865151303904, "learning_rate": 9.910696714624336e-06, "loss": 0.2739, "step": 1237 }, { "epoch": 0.09, "grad_norm": 1.4596367840619353, "learning_rate": 9.910478623236838e-06, "loss": 0.2565, "step": 1238 }, { "epoch": 0.09, "grad_norm": 1.5653254401290901, "learning_rate": 9.910260268274358e-06, "loss": 0.2415, "step": 1239 }, { "epoch": 0.09, "grad_norm": 1.6400025579810504, "learning_rate": 9.910041649748613e-06, "loss": 0.2033, "step": 1240 }, { "epoch": 0.09, "grad_norm": 1.5388289233467425, "learning_rate": 9.90982276767134e-06, "loss": 0.2312, "step": 1241 }, { "epoch": 0.09, "grad_norm": 1.7193470979175998, "learning_rate": 9.909603622054285e-06, "loss": 0.2522, "step": 1242 }, { "epoch": 0.09, "grad_norm": 1.6148638184867419, "learning_rate": 9.909384212909214e-06, "loss": 0.2485, "step": 1243 }, { "epoch": 0.09, "grad_norm": 1.36300648709624, "learning_rate": 9.909164540247902e-06, "loss": 0.1926, "step": 1244 }, { "epoch": 0.09, "grad_norm": 1.4856544469731123, "learning_rate": 9.908944604082139e-06, "loss": 0.226, "step": 1245 }, { "epoch": 0.09, "grad_norm": 1.3508106269052123, "learning_rate": 9.908724404423732e-06, "loss": 0.2034, "step": 1246 }, { "epoch": 0.09, "grad_norm": 1.538132258303328, "learning_rate": 9.908503941284502e-06, "loss": 0.2195, "step": 1247 }, { "epoch": 0.09, "grad_norm": 1.6295258370561332, "learning_rate": 9.90828321467628e-06, "loss": 0.2147, "step": 1248 }, { "epoch": 0.09, "grad_norm": 1.726687679437476, "learning_rate": 9.908062224610915e-06, "loss": 0.3342, "step": 1249 }, { "epoch": 0.09, "grad_norm": 1.5460182083651854, "learning_rate": 9.907840971100268e-06, "loss": 0.236, "step": 1250 }, { "epoch": 0.09, "grad_norm": 1.5197901052458545, "learning_rate": 9.907619454156215e-06, "loss": 0.2125, "step": 1251 }, { "epoch": 0.09, "grad_norm": 1.518320084139661, "learning_rate": 9.907397673790648e-06, "loss": 0.2032, "step": 1252 }, { "epoch": 0.09, "grad_norm": 1.6049641941003987, "learning_rate": 9.907175630015468e-06, "loss": 0.1844, "step": 1253 }, { "epoch": 0.09, "grad_norm": 1.546122120994793, "learning_rate": 9.906953322842598e-06, "loss": 0.1985, "step": 1254 }, { "epoch": 0.09, "grad_norm": 1.3832051092119177, "learning_rate": 9.906730752283965e-06, "loss": 0.2494, "step": 1255 }, { "epoch": 0.09, "grad_norm": 1.640238718358799, "learning_rate": 9.90650791835152e-06, "loss": 0.2726, "step": 1256 }, { "epoch": 0.09, "grad_norm": 1.6405238650307712, "learning_rate": 9.906284821057223e-06, "loss": 0.2289, "step": 1257 }, { "epoch": 0.09, "grad_norm": 1.563050941351312, "learning_rate": 9.906061460413047e-06, "loss": 0.2771, "step": 1258 }, { "epoch": 0.09, "grad_norm": 4.756122356907297, "learning_rate": 9.905837836430985e-06, "loss": 0.425, "step": 1259 }, { "epoch": 0.09, "grad_norm": 1.5651730236004022, "learning_rate": 9.905613949123036e-06, "loss": 0.2445, "step": 1260 }, { "epoch": 0.09, "grad_norm": 1.5033551117000115, "learning_rate": 9.905389798501219e-06, "loss": 0.243, "step": 1261 }, { "epoch": 0.09, "grad_norm": 5.264898963025065, "learning_rate": 9.905165384577567e-06, "loss": 0.6549, "step": 1262 }, { "epoch": 0.09, "grad_norm": 1.3901725308933657, "learning_rate": 9.904940707364123e-06, "loss": 0.2207, "step": 1263 }, { "epoch": 0.09, "grad_norm": 1.5141064897345597, "learning_rate": 9.90471576687295e-06, "loss": 0.2277, "step": 1264 }, { "epoch": 0.09, "grad_norm": 1.3814064661575949, "learning_rate": 9.904490563116118e-06, "loss": 0.1814, "step": 1265 }, { "epoch": 0.09, "grad_norm": 1.6340115808253446, "learning_rate": 9.904265096105719e-06, "loss": 0.2645, "step": 1266 }, { "epoch": 0.09, "grad_norm": 1.3998514593752909, "learning_rate": 9.90403936585385e-06, "loss": 0.2031, "step": 1267 }, { "epoch": 0.09, "grad_norm": 10.146258705481285, "learning_rate": 9.903813372372633e-06, "loss": 0.8096, "step": 1268 }, { "epoch": 0.09, "grad_norm": 1.70104468800173, "learning_rate": 9.903587115674196e-06, "loss": 0.3099, "step": 1269 }, { "epoch": 0.09, "grad_norm": 1.6901948751544316, "learning_rate": 9.903360595770684e-06, "loss": 0.2236, "step": 1270 }, { "epoch": 0.09, "grad_norm": 1.4854755572626794, "learning_rate": 9.903133812674252e-06, "loss": 0.2529, "step": 1271 }, { "epoch": 0.09, "grad_norm": 1.478866278812421, "learning_rate": 9.90290676639708e-06, "loss": 0.2555, "step": 1272 }, { "epoch": 0.09, "grad_norm": 1.4216083076053283, "learning_rate": 9.90267945695135e-06, "loss": 0.2227, "step": 1273 }, { "epoch": 0.09, "grad_norm": 1.5423743169132416, "learning_rate": 9.902451884349263e-06, "loss": 0.2002, "step": 1274 }, { "epoch": 0.09, "grad_norm": 1.3479660981400268, "learning_rate": 9.902224048603036e-06, "loss": 0.1932, "step": 1275 }, { "epoch": 0.09, "grad_norm": 1.3940957398424716, "learning_rate": 9.901995949724897e-06, "loss": 0.2187, "step": 1276 }, { "epoch": 0.09, "grad_norm": 1.5846144536629947, "learning_rate": 9.90176758772709e-06, "loss": 0.2196, "step": 1277 }, { "epoch": 0.09, "grad_norm": 1.5294697189331377, "learning_rate": 9.901538962621873e-06, "loss": 0.2626, "step": 1278 }, { "epoch": 0.09, "grad_norm": 1.550598250091117, "learning_rate": 9.901310074421518e-06, "loss": 0.2681, "step": 1279 }, { "epoch": 0.09, "grad_norm": 8.967554515038206, "learning_rate": 9.901080923138308e-06, "loss": 0.6241, "step": 1280 }, { "epoch": 0.09, "grad_norm": 1.490726447050626, "learning_rate": 9.900851508784548e-06, "loss": 0.2159, "step": 1281 }, { "epoch": 0.09, "grad_norm": 1.6237615341273124, "learning_rate": 9.900621831372545e-06, "loss": 0.2517, "step": 1282 }, { "epoch": 0.09, "grad_norm": 1.6117545928584855, "learning_rate": 9.900391890914634e-06, "loss": 0.2551, "step": 1283 }, { "epoch": 0.09, "grad_norm": 1.2333949243894047, "learning_rate": 9.900161687423155e-06, "loss": 0.1823, "step": 1284 }, { "epoch": 0.09, "grad_norm": 1.4832588808142355, "learning_rate": 9.899931220910463e-06, "loss": 0.2169, "step": 1285 }, { "epoch": 0.09, "grad_norm": 1.3608787648012268, "learning_rate": 9.89970049138893e-06, "loss": 0.2012, "step": 1286 }, { "epoch": 0.09, "grad_norm": 1.4953636941675037, "learning_rate": 9.89946949887094e-06, "loss": 0.2311, "step": 1287 }, { "epoch": 0.09, "grad_norm": 1.4113748872192358, "learning_rate": 9.899238243368889e-06, "loss": 0.2047, "step": 1288 }, { "epoch": 0.09, "grad_norm": 1.466324016834527, "learning_rate": 9.899006724895197e-06, "loss": 0.217, "step": 1289 }, { "epoch": 0.09, "grad_norm": 1.5682297216273149, "learning_rate": 9.898774943462285e-06, "loss": 0.2479, "step": 1290 }, { "epoch": 0.09, "grad_norm": 1.5234740176823738, "learning_rate": 9.898542899082595e-06, "loss": 0.2564, "step": 1291 }, { "epoch": 0.09, "grad_norm": 1.4887896434159156, "learning_rate": 9.898310591768583e-06, "loss": 0.241, "step": 1292 }, { "epoch": 0.09, "grad_norm": 1.5827242336135758, "learning_rate": 9.89807802153272e-06, "loss": 0.256, "step": 1293 }, { "epoch": 0.09, "grad_norm": 1.4631264116159273, "learning_rate": 9.897845188387487e-06, "loss": 0.1931, "step": 1294 }, { "epoch": 0.09, "grad_norm": 1.5003118170591392, "learning_rate": 9.897612092345383e-06, "loss": 0.2029, "step": 1295 }, { "epoch": 0.09, "grad_norm": 1.7666410515699988, "learning_rate": 9.897378733418918e-06, "loss": 0.2885, "step": 1296 }, { "epoch": 0.09, "grad_norm": 1.4839784911516873, "learning_rate": 9.897145111620618e-06, "loss": 0.2389, "step": 1297 }, { "epoch": 0.09, "grad_norm": 1.6910140011172734, "learning_rate": 9.896911226963026e-06, "loss": 0.2213, "step": 1298 }, { "epoch": 0.09, "grad_norm": 1.3231164481107967, "learning_rate": 9.896677079458692e-06, "loss": 0.205, "step": 1299 }, { "epoch": 0.09, "grad_norm": 1.5126383231950964, "learning_rate": 9.896442669120188e-06, "loss": 0.2261, "step": 1300 }, { "epoch": 0.09, "grad_norm": 1.4454015904020094, "learning_rate": 9.896207995960092e-06, "loss": 0.2145, "step": 1301 }, { "epoch": 0.09, "grad_norm": 1.5877457980205394, "learning_rate": 9.895973059991003e-06, "loss": 0.2209, "step": 1302 }, { "epoch": 0.09, "grad_norm": 1.6136708411679674, "learning_rate": 9.89573786122553e-06, "loss": 0.2335, "step": 1303 }, { "epoch": 0.09, "grad_norm": 1.4076390352518964, "learning_rate": 9.8955023996763e-06, "loss": 0.2312, "step": 1304 }, { "epoch": 0.09, "grad_norm": 1.863147246066385, "learning_rate": 9.895266675355949e-06, "loss": 0.3263, "step": 1305 }, { "epoch": 0.09, "grad_norm": 1.4040915020753781, "learning_rate": 9.895030688277131e-06, "loss": 0.1963, "step": 1306 }, { "epoch": 0.09, "grad_norm": 5.870829493177399, "learning_rate": 9.894794438452513e-06, "loss": 0.6985, "step": 1307 }, { "epoch": 0.09, "grad_norm": 1.6166373408190315, "learning_rate": 9.894557925894775e-06, "loss": 0.2609, "step": 1308 }, { "epoch": 0.09, "grad_norm": 1.5293210424360062, "learning_rate": 9.894321150616612e-06, "loss": 0.2113, "step": 1309 }, { "epoch": 0.09, "grad_norm": 5.536872763552488, "learning_rate": 9.894084112630735e-06, "loss": 0.7996, "step": 1310 }, { "epoch": 0.09, "grad_norm": 1.4916100794669982, "learning_rate": 9.893846811949865e-06, "loss": 0.2438, "step": 1311 }, { "epoch": 0.09, "grad_norm": 1.394152381409212, "learning_rate": 9.89360924858674e-06, "loss": 0.2331, "step": 1312 }, { "epoch": 0.09, "grad_norm": 1.4745935563236365, "learning_rate": 9.893371422554114e-06, "loss": 0.2359, "step": 1313 }, { "epoch": 0.09, "grad_norm": 1.6634350163496008, "learning_rate": 9.893133333864749e-06, "loss": 0.2474, "step": 1314 }, { "epoch": 0.09, "grad_norm": 1.6554119110665917, "learning_rate": 9.892894982531425e-06, "loss": 0.2473, "step": 1315 }, { "epoch": 0.09, "grad_norm": 1.5294445931095408, "learning_rate": 9.892656368566938e-06, "loss": 0.2349, "step": 1316 }, { "epoch": 0.09, "grad_norm": 1.4601902084755956, "learning_rate": 9.892417491984095e-06, "loss": 0.2039, "step": 1317 }, { "epoch": 0.09, "grad_norm": 1.597125024458567, "learning_rate": 9.892178352795716e-06, "loss": 0.278, "step": 1318 }, { "epoch": 0.09, "grad_norm": 1.511022778737241, "learning_rate": 9.891938951014639e-06, "loss": 0.2567, "step": 1319 }, { "epoch": 0.09, "grad_norm": 1.5964420404067765, "learning_rate": 9.891699286653714e-06, "loss": 0.2598, "step": 1320 }, { "epoch": 0.09, "grad_norm": 1.44226348445606, "learning_rate": 9.891459359725805e-06, "loss": 0.2188, "step": 1321 }, { "epoch": 0.09, "grad_norm": 1.5032803246630735, "learning_rate": 9.891219170243791e-06, "loss": 0.2266, "step": 1322 }, { "epoch": 0.09, "grad_norm": 1.588228800107201, "learning_rate": 9.890978718220563e-06, "loss": 0.2722, "step": 1323 }, { "epoch": 0.09, "grad_norm": 5.422475275426151, "learning_rate": 9.890738003669029e-06, "loss": 0.6185, "step": 1324 }, { "epoch": 0.09, "grad_norm": 1.4254569142519447, "learning_rate": 9.890497026602108e-06, "loss": 0.2258, "step": 1325 }, { "epoch": 0.09, "grad_norm": 1.392841461002796, "learning_rate": 9.890255787032736e-06, "loss": 0.2142, "step": 1326 }, { "epoch": 0.09, "grad_norm": 1.368277498764144, "learning_rate": 9.89001428497386e-06, "loss": 0.2266, "step": 1327 }, { "epoch": 0.09, "grad_norm": 1.4457252361224269, "learning_rate": 9.889772520438447e-06, "loss": 0.1947, "step": 1328 }, { "epoch": 0.1, "grad_norm": 6.586152564556905, "learning_rate": 9.88953049343947e-06, "loss": 0.7718, "step": 1329 }, { "epoch": 0.1, "grad_norm": 1.3388920790034404, "learning_rate": 9.889288203989919e-06, "loss": 0.2241, "step": 1330 }, { "epoch": 0.1, "grad_norm": 1.478649561798433, "learning_rate": 9.889045652102805e-06, "loss": 0.2186, "step": 1331 }, { "epoch": 0.1, "grad_norm": 1.4237016509765614, "learning_rate": 9.88880283779114e-06, "loss": 0.2196, "step": 1332 }, { "epoch": 0.1, "grad_norm": 4.7298903836119, "learning_rate": 9.888559761067963e-06, "loss": 0.6355, "step": 1333 }, { "epoch": 0.1, "grad_norm": 1.6566928382950308, "learning_rate": 9.888316421946317e-06, "loss": 0.1872, "step": 1334 }, { "epoch": 0.1, "grad_norm": 1.826340890527177, "learning_rate": 9.88807282043927e-06, "loss": 0.2396, "step": 1335 }, { "epoch": 0.1, "grad_norm": 1.6141911071296187, "learning_rate": 9.88782895655989e-06, "loss": 0.2201, "step": 1336 }, { "epoch": 0.1, "grad_norm": 1.4698602469849074, "learning_rate": 9.887584830321271e-06, "loss": 0.1795, "step": 1337 }, { "epoch": 0.1, "grad_norm": 1.6341767551365083, "learning_rate": 9.887340441736515e-06, "loss": 0.2546, "step": 1338 }, { "epoch": 0.1, "grad_norm": 1.4045203198486547, "learning_rate": 9.887095790818739e-06, "loss": 0.1918, "step": 1339 }, { "epoch": 0.1, "grad_norm": 1.4944385918927947, "learning_rate": 9.886850877581079e-06, "loss": 0.2423, "step": 1340 }, { "epoch": 0.1, "grad_norm": 1.4789529967825175, "learning_rate": 9.886605702036678e-06, "loss": 0.2613, "step": 1341 }, { "epoch": 0.1, "grad_norm": 1.388234486678817, "learning_rate": 9.886360264198695e-06, "loss": 0.2352, "step": 1342 }, { "epoch": 0.1, "grad_norm": 1.3978771715379719, "learning_rate": 9.886114564080307e-06, "loss": 0.1892, "step": 1343 }, { "epoch": 0.1, "grad_norm": 1.7467107363801577, "learning_rate": 9.8858686016947e-06, "loss": 0.2613, "step": 1344 }, { "epoch": 0.1, "grad_norm": 1.4715879407005288, "learning_rate": 9.885622377055076e-06, "loss": 0.2097, "step": 1345 }, { "epoch": 0.1, "grad_norm": 1.4092776507249072, "learning_rate": 9.885375890174653e-06, "loss": 0.2013, "step": 1346 }, { "epoch": 0.1, "grad_norm": 1.5703790976069962, "learning_rate": 9.88512914106666e-06, "loss": 0.209, "step": 1347 }, { "epoch": 0.1, "grad_norm": 1.7074739303008952, "learning_rate": 9.884882129744344e-06, "loss": 0.2355, "step": 1348 }, { "epoch": 0.1, "grad_norm": 9.050644198503285, "learning_rate": 9.88463485622096e-06, "loss": 0.6451, "step": 1349 }, { "epoch": 0.1, "grad_norm": 1.6793638226009862, "learning_rate": 9.884387320509785e-06, "loss": 0.2722, "step": 1350 }, { "epoch": 0.1, "grad_norm": 4.661645961215535, "learning_rate": 9.884139522624103e-06, "loss": 0.7158, "step": 1351 }, { "epoch": 0.1, "grad_norm": 1.4789662739595457, "learning_rate": 9.883891462577212e-06, "loss": 0.2387, "step": 1352 }, { "epoch": 0.1, "grad_norm": 1.3949955057286314, "learning_rate": 9.883643140382434e-06, "loss": 0.2468, "step": 1353 }, { "epoch": 0.1, "grad_norm": 1.3808342402643738, "learning_rate": 9.883394556053092e-06, "loss": 0.2098, "step": 1354 }, { "epoch": 0.1, "grad_norm": 1.538101242370762, "learning_rate": 9.883145709602531e-06, "loss": 0.2584, "step": 1355 }, { "epoch": 0.1, "grad_norm": 1.86290822172693, "learning_rate": 9.882896601044107e-06, "loss": 0.316, "step": 1356 }, { "epoch": 0.1, "grad_norm": 5.713126617537648, "learning_rate": 9.882647230391194e-06, "loss": 0.6937, "step": 1357 }, { "epoch": 0.1, "grad_norm": 1.635197733997727, "learning_rate": 9.882397597657173e-06, "loss": 0.1901, "step": 1358 }, { "epoch": 0.1, "grad_norm": 1.6553254263807922, "learning_rate": 9.88214770285545e-06, "loss": 0.2587, "step": 1359 }, { "epoch": 0.1, "grad_norm": 1.5840421647850982, "learning_rate": 9.88189754599943e-06, "loss": 0.2402, "step": 1360 }, { "epoch": 0.1, "grad_norm": 1.314996874350391, "learning_rate": 9.881647127102544e-06, "loss": 0.1854, "step": 1361 }, { "epoch": 0.1, "grad_norm": 1.7767371321777545, "learning_rate": 9.881396446178238e-06, "loss": 0.2666, "step": 1362 }, { "epoch": 0.1, "grad_norm": 1.5712443977101482, "learning_rate": 9.881145503239959e-06, "loss": 0.2217, "step": 1363 }, { "epoch": 0.1, "grad_norm": 1.5370653074684109, "learning_rate": 9.880894298301185e-06, "loss": 0.2662, "step": 1364 }, { "epoch": 0.1, "grad_norm": 1.782971856972029, "learning_rate": 9.880642831375394e-06, "loss": 0.2392, "step": 1365 }, { "epoch": 0.1, "grad_norm": 1.5590543363252756, "learning_rate": 9.880391102476085e-06, "loss": 0.2323, "step": 1366 }, { "epoch": 0.1, "grad_norm": 1.4294490476660207, "learning_rate": 9.880139111616771e-06, "loss": 0.2432, "step": 1367 }, { "epoch": 0.1, "grad_norm": 1.5086601115971956, "learning_rate": 9.87988685881098e-06, "loss": 0.2377, "step": 1368 }, { "epoch": 0.1, "grad_norm": 6.188232703548946, "learning_rate": 9.879634344072245e-06, "loss": 0.5783, "step": 1369 }, { "epoch": 0.1, "grad_norm": 1.5366564476829037, "learning_rate": 9.879381567414126e-06, "loss": 0.2209, "step": 1370 }, { "epoch": 0.1, "grad_norm": 1.810069839209789, "learning_rate": 9.879128528850187e-06, "loss": 0.2811, "step": 1371 }, { "epoch": 0.1, "grad_norm": 1.445279820439034, "learning_rate": 9.878875228394016e-06, "loss": 0.2288, "step": 1372 }, { "epoch": 0.1, "grad_norm": 1.5009254399163685, "learning_rate": 9.878621666059204e-06, "loss": 0.2398, "step": 1373 }, { "epoch": 0.1, "grad_norm": 1.5208096591918494, "learning_rate": 9.878367841859361e-06, "loss": 0.267, "step": 1374 }, { "epoch": 0.1, "grad_norm": 1.5387355246904635, "learning_rate": 9.878113755808116e-06, "loss": 0.2193, "step": 1375 }, { "epoch": 0.1, "grad_norm": 1.4428283899541279, "learning_rate": 9.877859407919102e-06, "loss": 0.2227, "step": 1376 }, { "epoch": 0.1, "grad_norm": 1.375032257385307, "learning_rate": 9.877604798205975e-06, "loss": 0.1773, "step": 1377 }, { "epoch": 0.1, "grad_norm": 1.4172625933970604, "learning_rate": 9.8773499266824e-06, "loss": 0.2059, "step": 1378 }, { "epoch": 0.1, "grad_norm": 1.598015126783416, "learning_rate": 9.877094793362056e-06, "loss": 0.2244, "step": 1379 }, { "epoch": 0.1, "grad_norm": 1.5089052939344718, "learning_rate": 9.87683939825864e-06, "loss": 0.2684, "step": 1380 }, { "epoch": 0.1, "grad_norm": 1.4463561033010983, "learning_rate": 9.876583741385861e-06, "loss": 0.2302, "step": 1381 }, { "epoch": 0.1, "grad_norm": 1.378942209157623, "learning_rate": 9.87632782275744e-06, "loss": 0.2216, "step": 1382 }, { "epoch": 0.1, "grad_norm": 1.5367213748371558, "learning_rate": 9.876071642387113e-06, "loss": 0.2238, "step": 1383 }, { "epoch": 0.1, "grad_norm": 1.3655811912055094, "learning_rate": 9.875815200288632e-06, "loss": 0.2173, "step": 1384 }, { "epoch": 0.1, "grad_norm": 1.4302463571759334, "learning_rate": 9.875558496475764e-06, "loss": 0.2303, "step": 1385 }, { "epoch": 0.1, "grad_norm": 1.4661578307794918, "learning_rate": 9.875301530962282e-06, "loss": 0.2152, "step": 1386 }, { "epoch": 0.1, "grad_norm": 1.4123416509132967, "learning_rate": 9.875044303761985e-06, "loss": 0.2466, "step": 1387 }, { "epoch": 0.1, "grad_norm": 1.5824875667225462, "learning_rate": 9.874786814888674e-06, "loss": 0.2409, "step": 1388 }, { "epoch": 0.1, "grad_norm": 1.6213635274561233, "learning_rate": 9.874529064356177e-06, "loss": 0.2641, "step": 1389 }, { "epoch": 0.1, "grad_norm": 1.344568638193726, "learning_rate": 9.874271052178324e-06, "loss": 0.1976, "step": 1390 }, { "epoch": 0.1, "grad_norm": 1.3848614477717307, "learning_rate": 9.874012778368965e-06, "loss": 0.21, "step": 1391 }, { "epoch": 0.1, "grad_norm": 1.4648344774919115, "learning_rate": 9.873754242941964e-06, "loss": 0.2489, "step": 1392 }, { "epoch": 0.1, "grad_norm": 1.5944472896231816, "learning_rate": 9.873495445911197e-06, "loss": 0.2464, "step": 1393 }, { "epoch": 0.1, "grad_norm": 1.4393610623013384, "learning_rate": 9.873236387290557e-06, "loss": 0.2328, "step": 1394 }, { "epoch": 0.1, "grad_norm": 1.468170863362504, "learning_rate": 9.872977067093948e-06, "loss": 0.2172, "step": 1395 }, { "epoch": 0.1, "grad_norm": 1.4834676281640755, "learning_rate": 9.872717485335287e-06, "loss": 0.2533, "step": 1396 }, { "epoch": 0.1, "grad_norm": 1.4175652988328173, "learning_rate": 9.87245764202851e-06, "loss": 0.2021, "step": 1397 }, { "epoch": 0.1, "grad_norm": 1.7699221445436606, "learning_rate": 9.872197537187567e-06, "loss": 0.2551, "step": 1398 }, { "epoch": 0.1, "grad_norm": 6.739787879716992, "learning_rate": 9.871937170826414e-06, "loss": 0.6319, "step": 1399 }, { "epoch": 0.1, "grad_norm": 1.454609823514992, "learning_rate": 9.87167654295903e-06, "loss": 0.229, "step": 1400 }, { "epoch": 0.1, "grad_norm": 1.3478402265288982, "learning_rate": 9.871415653599404e-06, "loss": 0.2381, "step": 1401 }, { "epoch": 0.1, "grad_norm": 5.801904732038938, "learning_rate": 9.871154502761536e-06, "loss": 0.5994, "step": 1402 }, { "epoch": 0.1, "grad_norm": 1.3981770010013133, "learning_rate": 9.870893090459448e-06, "loss": 0.2066, "step": 1403 }, { "epoch": 0.1, "grad_norm": 1.359653918232644, "learning_rate": 9.87063141670717e-06, "loss": 0.1611, "step": 1404 }, { "epoch": 0.1, "grad_norm": 1.4374560294241465, "learning_rate": 9.870369481518749e-06, "loss": 0.2492, "step": 1405 }, { "epoch": 0.1, "grad_norm": 1.5193541537658246, "learning_rate": 9.87010728490824e-06, "loss": 0.214, "step": 1406 }, { "epoch": 0.1, "grad_norm": 5.809640693434221, "learning_rate": 9.869844826889723e-06, "loss": 0.8256, "step": 1407 }, { "epoch": 0.1, "grad_norm": 1.3111602627172836, "learning_rate": 9.86958210747728e-06, "loss": 0.1878, "step": 1408 }, { "epoch": 0.1, "grad_norm": 1.5615759105051201, "learning_rate": 9.869319126685017e-06, "loss": 0.2577, "step": 1409 }, { "epoch": 0.1, "grad_norm": 1.5287341708843745, "learning_rate": 9.869055884527047e-06, "loss": 0.2202, "step": 1410 }, { "epoch": 0.1, "grad_norm": 1.6369503175082545, "learning_rate": 9.868792381017501e-06, "loss": 0.2813, "step": 1411 }, { "epoch": 0.1, "grad_norm": 1.5117116157951451, "learning_rate": 9.868528616170523e-06, "loss": 0.2508, "step": 1412 }, { "epoch": 0.1, "grad_norm": 1.639172446193039, "learning_rate": 9.868264590000272e-06, "loss": 0.2187, "step": 1413 }, { "epoch": 0.1, "grad_norm": 1.7574758403881272, "learning_rate": 9.868000302520917e-06, "loss": 0.2415, "step": 1414 }, { "epoch": 0.1, "grad_norm": 1.4144429675892014, "learning_rate": 9.867735753746643e-06, "loss": 0.1933, "step": 1415 }, { "epoch": 0.1, "grad_norm": 1.4666066165361122, "learning_rate": 9.867470943691654e-06, "loss": 0.1935, "step": 1416 }, { "epoch": 0.1, "grad_norm": 1.6444728998872147, "learning_rate": 9.867205872370166e-06, "loss": 0.2166, "step": 1417 }, { "epoch": 0.1, "grad_norm": 1.5836241537079765, "learning_rate": 9.866940539796397e-06, "loss": 0.2153, "step": 1418 }, { "epoch": 0.1, "grad_norm": 1.577980713045395, "learning_rate": 9.866674945984599e-06, "loss": 0.2176, "step": 1419 }, { "epoch": 0.1, "grad_norm": 1.5466560768089, "learning_rate": 9.866409090949023e-06, "loss": 0.2418, "step": 1420 }, { "epoch": 0.1, "grad_norm": 1.59109360630513, "learning_rate": 9.86614297470394e-06, "loss": 0.2586, "step": 1421 }, { "epoch": 0.1, "grad_norm": 1.497974793847826, "learning_rate": 9.865876597263635e-06, "loss": 0.2182, "step": 1422 }, { "epoch": 0.1, "grad_norm": 1.6881123943416616, "learning_rate": 9.865609958642405e-06, "loss": 0.2316, "step": 1423 }, { "epoch": 0.1, "grad_norm": 1.4818849219579886, "learning_rate": 9.865343058854563e-06, "loss": 0.2174, "step": 1424 }, { "epoch": 0.1, "grad_norm": 1.5061861040664406, "learning_rate": 9.865075897914433e-06, "loss": 0.194, "step": 1425 }, { "epoch": 0.1, "grad_norm": 1.4974686259819203, "learning_rate": 9.864808475836358e-06, "loss": 0.2447, "step": 1426 }, { "epoch": 0.1, "grad_norm": 1.5112938625335517, "learning_rate": 9.86454079263469e-06, "loss": 0.2066, "step": 1427 }, { "epoch": 0.1, "grad_norm": 1.9084662965686543, "learning_rate": 9.8642728483238e-06, "loss": 0.2695, "step": 1428 }, { "epoch": 0.1, "grad_norm": 1.4207834195531535, "learning_rate": 9.864004642918066e-06, "loss": 0.2163, "step": 1429 }, { "epoch": 0.1, "grad_norm": 6.3265899699538, "learning_rate": 9.863736176431888e-06, "loss": 0.6267, "step": 1430 }, { "epoch": 0.1, "grad_norm": 7.317240798229465, "learning_rate": 9.863467448879674e-06, "loss": 0.8054, "step": 1431 }, { "epoch": 0.1, "grad_norm": 1.4032825742106048, "learning_rate": 9.863198460275849e-06, "loss": 0.2002, "step": 1432 }, { "epoch": 0.1, "grad_norm": 1.5301223322063986, "learning_rate": 9.86292921063485e-06, "loss": 0.2366, "step": 1433 }, { "epoch": 0.1, "grad_norm": 1.5369791845645449, "learning_rate": 9.862659699971132e-06, "loss": 0.234, "step": 1434 }, { "epoch": 0.1, "grad_norm": 1.5355685919818973, "learning_rate": 9.862389928299159e-06, "loss": 0.2725, "step": 1435 }, { "epoch": 0.1, "grad_norm": 1.4940004911644824, "learning_rate": 9.862119895633411e-06, "loss": 0.2084, "step": 1436 }, { "epoch": 0.1, "grad_norm": 1.644860330097462, "learning_rate": 9.861849601988384e-06, "loss": 0.2277, "step": 1437 }, { "epoch": 0.1, "grad_norm": 1.6286894781024046, "learning_rate": 9.861579047378586e-06, "loss": 0.2452, "step": 1438 }, { "epoch": 0.1, "grad_norm": 1.454955867530021, "learning_rate": 9.861308231818539e-06, "loss": 0.2156, "step": 1439 }, { "epoch": 0.1, "grad_norm": 1.690383889166342, "learning_rate": 9.861037155322777e-06, "loss": 0.2032, "step": 1440 }, { "epoch": 0.1, "grad_norm": 7.819750828463083, "learning_rate": 9.860765817905852e-06, "loss": 0.7353, "step": 1441 }, { "epoch": 0.1, "grad_norm": 1.421978227146228, "learning_rate": 9.860494219582331e-06, "loss": 0.2123, "step": 1442 }, { "epoch": 0.1, "grad_norm": 1.3351254143219158, "learning_rate": 9.86022236036679e-06, "loss": 0.1875, "step": 1443 }, { "epoch": 0.1, "grad_norm": 1.7989610752793848, "learning_rate": 9.859950240273822e-06, "loss": 0.2518, "step": 1444 }, { "epoch": 0.1, "grad_norm": 1.3956003870777745, "learning_rate": 9.85967785931803e-06, "loss": 0.2649, "step": 1445 }, { "epoch": 0.1, "grad_norm": 1.6201214330558396, "learning_rate": 9.859405217514039e-06, "loss": 0.2396, "step": 1446 }, { "epoch": 0.1, "grad_norm": 1.4973721987809194, "learning_rate": 9.859132314876481e-06, "loss": 0.2138, "step": 1447 }, { "epoch": 0.1, "grad_norm": 1.5078236735255501, "learning_rate": 9.858859151420003e-06, "loss": 0.2748, "step": 1448 }, { "epoch": 0.1, "grad_norm": 1.6282415931031449, "learning_rate": 9.85858572715927e-06, "loss": 0.2332, "step": 1449 }, { "epoch": 0.1, "grad_norm": 1.454121187430639, "learning_rate": 9.85831204210896e-06, "loss": 0.2139, "step": 1450 }, { "epoch": 0.1, "grad_norm": 1.6401966087079514, "learning_rate": 9.858038096283758e-06, "loss": 0.2529, "step": 1451 }, { "epoch": 0.1, "grad_norm": 1.3924547392692403, "learning_rate": 9.85776388969837e-06, "loss": 0.2163, "step": 1452 }, { "epoch": 0.1, "grad_norm": 1.530921255774927, "learning_rate": 9.857489422367516e-06, "loss": 0.2236, "step": 1453 }, { "epoch": 0.1, "grad_norm": 1.4774001958307623, "learning_rate": 9.857214694305928e-06, "loss": 0.2093, "step": 1454 }, { "epoch": 0.1, "grad_norm": 1.5306239890154436, "learning_rate": 9.856939705528352e-06, "loss": 0.2222, "step": 1455 }, { "epoch": 0.1, "grad_norm": 1.4937244683186444, "learning_rate": 9.856664456049549e-06, "loss": 0.2622, "step": 1456 }, { "epoch": 0.1, "grad_norm": 1.5147168265999564, "learning_rate": 9.856388945884292e-06, "loss": 0.2377, "step": 1457 }, { "epoch": 0.1, "grad_norm": 1.5064246101230958, "learning_rate": 9.856113175047369e-06, "loss": 0.2114, "step": 1458 }, { "epoch": 0.1, "grad_norm": 1.410979062646319, "learning_rate": 9.855837143553585e-06, "loss": 0.2166, "step": 1459 }, { "epoch": 0.1, "grad_norm": 1.7487139270100134, "learning_rate": 9.855560851417752e-06, "loss": 0.2657, "step": 1460 }, { "epoch": 0.1, "grad_norm": 1.5749822438510075, "learning_rate": 9.855284298654703e-06, "loss": 0.254, "step": 1461 }, { "epoch": 0.1, "grad_norm": 1.3581809626850272, "learning_rate": 9.855007485279282e-06, "loss": 0.1948, "step": 1462 }, { "epoch": 0.1, "grad_norm": 1.6604290188908999, "learning_rate": 9.854730411306347e-06, "loss": 0.2504, "step": 1463 }, { "epoch": 0.1, "grad_norm": 1.4272296751612261, "learning_rate": 9.854453076750772e-06, "loss": 0.2377, "step": 1464 }, { "epoch": 0.1, "grad_norm": 9.351974537197615, "learning_rate": 9.85417548162744e-06, "loss": 0.6479, "step": 1465 }, { "epoch": 0.1, "grad_norm": 1.3645267990815164, "learning_rate": 9.853897625951254e-06, "loss": 0.19, "step": 1466 }, { "epoch": 0.1, "grad_norm": 1.5737788045126675, "learning_rate": 9.853619509737125e-06, "loss": 0.2397, "step": 1467 }, { "epoch": 0.11, "grad_norm": 1.378486674344173, "learning_rate": 9.853341132999985e-06, "loss": 0.1968, "step": 1468 }, { "epoch": 0.11, "grad_norm": 6.346890235286319, "learning_rate": 9.853062495754772e-06, "loss": 0.7932, "step": 1469 }, { "epoch": 0.11, "grad_norm": 1.408982958566723, "learning_rate": 9.852783598016447e-06, "loss": 0.2296, "step": 1470 }, { "epoch": 0.11, "grad_norm": 6.488275838404948, "learning_rate": 9.852504439799978e-06, "loss": 0.6584, "step": 1471 }, { "epoch": 0.11, "grad_norm": 1.5125708137449658, "learning_rate": 9.852225021120345e-06, "loss": 0.2032, "step": 1472 }, { "epoch": 0.11, "grad_norm": 1.7434891700545965, "learning_rate": 9.851945341992552e-06, "loss": 0.2442, "step": 1473 }, { "epoch": 0.11, "grad_norm": 1.576583034324645, "learning_rate": 9.85166540243161e-06, "loss": 0.2309, "step": 1474 }, { "epoch": 0.11, "grad_norm": 1.3906884559484975, "learning_rate": 9.85138520245254e-06, "loss": 0.2111, "step": 1475 }, { "epoch": 0.11, "grad_norm": 1.526006313461169, "learning_rate": 9.85110474207039e-06, "loss": 0.1961, "step": 1476 }, { "epoch": 0.11, "grad_norm": 1.551682966981367, "learning_rate": 9.850824021300208e-06, "loss": 0.1879, "step": 1477 }, { "epoch": 0.11, "grad_norm": 1.4366966787017894, "learning_rate": 9.850543040157064e-06, "loss": 0.2292, "step": 1478 }, { "epoch": 0.11, "grad_norm": 1.482174174932269, "learning_rate": 9.85026179865604e-06, "loss": 0.2234, "step": 1479 }, { "epoch": 0.11, "grad_norm": 1.3565803059264792, "learning_rate": 9.849980296812231e-06, "loss": 0.1874, "step": 1480 }, { "epoch": 0.11, "grad_norm": 1.547091982246654, "learning_rate": 9.849698534640748e-06, "loss": 0.2555, "step": 1481 }, { "epoch": 0.11, "grad_norm": 1.468160034672372, "learning_rate": 9.849416512156716e-06, "loss": 0.2528, "step": 1482 }, { "epoch": 0.11, "grad_norm": 1.4671801075807596, "learning_rate": 9.84913422937527e-06, "loss": 0.213, "step": 1483 }, { "epoch": 0.11, "grad_norm": 4.719235216958124, "learning_rate": 9.848851686311562e-06, "loss": 0.4794, "step": 1484 }, { "epoch": 0.11, "grad_norm": 1.3899382537601066, "learning_rate": 9.84856888298076e-06, "loss": 0.1926, "step": 1485 }, { "epoch": 0.11, "grad_norm": 1.4726749722108121, "learning_rate": 9.848285819398044e-06, "loss": 0.2437, "step": 1486 }, { "epoch": 0.11, "grad_norm": 1.4163095491147437, "learning_rate": 9.848002495578606e-06, "loss": 0.197, "step": 1487 }, { "epoch": 0.11, "grad_norm": 1.6454671654988682, "learning_rate": 9.847718911537652e-06, "loss": 0.2429, "step": 1488 }, { "epoch": 0.11, "grad_norm": 30.914938053365645, "learning_rate": 9.847435067290407e-06, "loss": 0.6408, "step": 1489 }, { "epoch": 0.11, "grad_norm": 1.4760425009869969, "learning_rate": 9.847150962852105e-06, "loss": 0.2325, "step": 1490 }, { "epoch": 0.11, "grad_norm": 1.4737565855294696, "learning_rate": 9.846866598238e-06, "loss": 0.2088, "step": 1491 }, { "epoch": 0.11, "grad_norm": 1.493551648435666, "learning_rate": 9.846581973463346e-06, "loss": 0.2349, "step": 1492 }, { "epoch": 0.11, "grad_norm": 1.534414687676963, "learning_rate": 9.846297088543428e-06, "loss": 0.2201, "step": 1493 }, { "epoch": 0.11, "grad_norm": 1.4602197898074398, "learning_rate": 9.846011943493536e-06, "loss": 0.2247, "step": 1494 }, { "epoch": 0.11, "grad_norm": 1.5233278091529827, "learning_rate": 9.845726538328975e-06, "loss": 0.2489, "step": 1495 }, { "epoch": 0.11, "grad_norm": 1.482065377638207, "learning_rate": 9.845440873065063e-06, "loss": 0.2263, "step": 1496 }, { "epoch": 0.11, "grad_norm": 1.3919579904472246, "learning_rate": 9.845154947717137e-06, "loss": 0.1904, "step": 1497 }, { "epoch": 0.11, "grad_norm": 1.4439690021659912, "learning_rate": 9.844868762300542e-06, "loss": 0.2261, "step": 1498 }, { "epoch": 0.11, "grad_norm": 1.5338843773573032, "learning_rate": 9.844582316830639e-06, "loss": 0.2335, "step": 1499 }, { "epoch": 0.11, "grad_norm": 1.416272077405108, "learning_rate": 9.844295611322804e-06, "loss": 0.2265, "step": 1500 }, { "epoch": 0.11, "grad_norm": 1.3908467277223857, "learning_rate": 9.844008645792425e-06, "loss": 0.2211, "step": 1501 }, { "epoch": 0.11, "grad_norm": 1.545979264940256, "learning_rate": 9.843721420254908e-06, "loss": 0.2421, "step": 1502 }, { "epoch": 0.11, "grad_norm": 1.6487964807360322, "learning_rate": 9.843433934725665e-06, "loss": 0.2122, "step": 1503 }, { "epoch": 0.11, "grad_norm": 1.3254313451364343, "learning_rate": 9.843146189220133e-06, "loss": 0.2237, "step": 1504 }, { "epoch": 0.11, "grad_norm": 1.6098553246456513, "learning_rate": 9.842858183753754e-06, "loss": 0.2389, "step": 1505 }, { "epoch": 0.11, "grad_norm": 1.484733036630676, "learning_rate": 9.842569918341988e-06, "loss": 0.2302, "step": 1506 }, { "epoch": 0.11, "grad_norm": 1.6277556106239954, "learning_rate": 9.842281393000306e-06, "loss": 0.2237, "step": 1507 }, { "epoch": 0.11, "grad_norm": 6.63480555826644, "learning_rate": 9.841992607744198e-06, "loss": 0.5224, "step": 1508 }, { "epoch": 0.11, "grad_norm": 1.6298117657719016, "learning_rate": 9.84170356258916e-06, "loss": 0.3044, "step": 1509 }, { "epoch": 0.11, "grad_norm": 1.475429486196651, "learning_rate": 9.841414257550712e-06, "loss": 0.2142, "step": 1510 }, { "epoch": 0.11, "grad_norm": 1.5859147463782726, "learning_rate": 9.84112469264438e-06, "loss": 0.2087, "step": 1511 }, { "epoch": 0.11, "grad_norm": 1.4264245655539785, "learning_rate": 9.840834867885706e-06, "loss": 0.1594, "step": 1512 }, { "epoch": 0.11, "grad_norm": 1.6378256740072248, "learning_rate": 9.84054478329025e-06, "loss": 0.2605, "step": 1513 }, { "epoch": 0.11, "grad_norm": 1.438318811935071, "learning_rate": 9.84025443887358e-06, "loss": 0.171, "step": 1514 }, { "epoch": 0.11, "grad_norm": 1.5217683214874616, "learning_rate": 9.83996383465128e-06, "loss": 0.2561, "step": 1515 }, { "epoch": 0.11, "grad_norm": 1.532853255708979, "learning_rate": 9.83967297063895e-06, "loss": 0.2118, "step": 1516 }, { "epoch": 0.11, "grad_norm": 1.5956952054895646, "learning_rate": 9.839381846852203e-06, "loss": 0.2602, "step": 1517 }, { "epoch": 0.11, "grad_norm": 1.4898384063066419, "learning_rate": 9.839090463306663e-06, "loss": 0.1995, "step": 1518 }, { "epoch": 0.11, "grad_norm": 1.6585037479244904, "learning_rate": 9.838798820017972e-06, "loss": 0.2107, "step": 1519 }, { "epoch": 0.11, "grad_norm": 1.5929778340254797, "learning_rate": 9.838506917001784e-06, "loss": 0.2116, "step": 1520 }, { "epoch": 0.11, "grad_norm": 1.4636014751855189, "learning_rate": 9.838214754273768e-06, "loss": 0.2512, "step": 1521 }, { "epoch": 0.11, "grad_norm": 1.5910332107395206, "learning_rate": 9.837922331849604e-06, "loss": 0.2659, "step": 1522 }, { "epoch": 0.11, "grad_norm": 4.626336220017116, "learning_rate": 9.837629649744989e-06, "loss": 0.6054, "step": 1523 }, { "epoch": 0.11, "grad_norm": 1.4785843323445005, "learning_rate": 9.837336707975633e-06, "loss": 0.2705, "step": 1524 }, { "epoch": 0.11, "grad_norm": 1.4834704203724265, "learning_rate": 9.837043506557261e-06, "loss": 0.2256, "step": 1525 }, { "epoch": 0.11, "grad_norm": 5.489207975590961, "learning_rate": 9.83675004550561e-06, "loss": 0.7058, "step": 1526 }, { "epoch": 0.11, "grad_norm": 1.6301629330971392, "learning_rate": 9.836456324836432e-06, "loss": 0.166, "step": 1527 }, { "epoch": 0.11, "grad_norm": 7.345588468704059, "learning_rate": 9.836162344565493e-06, "loss": 0.6935, "step": 1528 }, { "epoch": 0.11, "grad_norm": 1.669816651205717, "learning_rate": 9.835868104708572e-06, "loss": 0.231, "step": 1529 }, { "epoch": 0.11, "grad_norm": 1.4432564199624374, "learning_rate": 9.835573605281462e-06, "loss": 0.2332, "step": 1530 }, { "epoch": 0.11, "grad_norm": 5.145547683477056, "learning_rate": 9.835278846299974e-06, "loss": 0.5424, "step": 1531 }, { "epoch": 0.11, "grad_norm": 1.39277031606631, "learning_rate": 9.834983827779926e-06, "loss": 0.2107, "step": 1532 }, { "epoch": 0.11, "grad_norm": 1.4512375794572607, "learning_rate": 9.834688549737154e-06, "loss": 0.2153, "step": 1533 }, { "epoch": 0.11, "grad_norm": 1.347347554996215, "learning_rate": 9.83439301218751e-06, "loss": 0.2253, "step": 1534 }, { "epoch": 0.11, "grad_norm": 1.3406302593803636, "learning_rate": 9.834097215146851e-06, "loss": 0.2351, "step": 1535 }, { "epoch": 0.11, "grad_norm": 1.4771306942863593, "learning_rate": 9.833801158631062e-06, "loss": 0.1979, "step": 1536 }, { "epoch": 0.11, "grad_norm": 1.5263956381612158, "learning_rate": 9.833504842656029e-06, "loss": 0.2476, "step": 1537 }, { "epoch": 0.11, "grad_norm": 1.3429885859612332, "learning_rate": 9.833208267237658e-06, "loss": 0.2288, "step": 1538 }, { "epoch": 0.11, "grad_norm": 1.8363183773471008, "learning_rate": 9.83291143239187e-06, "loss": 0.295, "step": 1539 }, { "epoch": 0.11, "grad_norm": 1.4662700259047743, "learning_rate": 9.832614338134595e-06, "loss": 0.2282, "step": 1540 }, { "epoch": 0.11, "grad_norm": 1.585094546474449, "learning_rate": 9.832316984481783e-06, "loss": 0.2283, "step": 1541 }, { "epoch": 0.11, "grad_norm": 12.286584560215076, "learning_rate": 9.832019371449391e-06, "loss": 0.6859, "step": 1542 }, { "epoch": 0.11, "grad_norm": 1.7799698311643788, "learning_rate": 9.831721499053397e-06, "loss": 0.2964, "step": 1543 }, { "epoch": 0.11, "grad_norm": 1.431979054902501, "learning_rate": 9.831423367309788e-06, "loss": 0.2644, "step": 1544 }, { "epoch": 0.11, "grad_norm": 1.424940258021316, "learning_rate": 9.831124976234568e-06, "loss": 0.2053, "step": 1545 }, { "epoch": 0.11, "grad_norm": 1.5666413112239748, "learning_rate": 9.83082632584375e-06, "loss": 0.2366, "step": 1546 }, { "epoch": 0.11, "grad_norm": 1.4622931146485236, "learning_rate": 9.830527416153368e-06, "loss": 0.2482, "step": 1547 }, { "epoch": 0.11, "grad_norm": 1.6568808251615852, "learning_rate": 9.830228247179465e-06, "loss": 0.2481, "step": 1548 }, { "epoch": 0.11, "grad_norm": 1.5658953099891781, "learning_rate": 9.8299288189381e-06, "loss": 0.2155, "step": 1549 }, { "epoch": 0.11, "grad_norm": 1.4056570897983416, "learning_rate": 9.829629131445342e-06, "loss": 0.2109, "step": 1550 }, { "epoch": 0.11, "grad_norm": 1.5857742306507916, "learning_rate": 9.829329184717281e-06, "loss": 0.2299, "step": 1551 }, { "epoch": 0.11, "grad_norm": 1.676225632467976, "learning_rate": 9.829028978770015e-06, "loss": 0.2943, "step": 1552 }, { "epoch": 0.11, "grad_norm": 1.5838331474659668, "learning_rate": 9.828728513619659e-06, "loss": 0.2818, "step": 1553 }, { "epoch": 0.11, "grad_norm": 1.6099143879898696, "learning_rate": 9.828427789282337e-06, "loss": 0.2546, "step": 1554 }, { "epoch": 0.11, "grad_norm": 1.5127258143455455, "learning_rate": 9.828126805774197e-06, "loss": 0.2379, "step": 1555 }, { "epoch": 0.11, "grad_norm": 1.4977762823766838, "learning_rate": 9.827825563111389e-06, "loss": 0.2343, "step": 1556 }, { "epoch": 0.11, "grad_norm": 1.466637582859912, "learning_rate": 9.827524061310085e-06, "loss": 0.2714, "step": 1557 }, { "epoch": 0.11, "grad_norm": 1.3445130615270007, "learning_rate": 9.82722230038647e-06, "loss": 0.1946, "step": 1558 }, { "epoch": 0.11, "grad_norm": 1.4335509391352084, "learning_rate": 9.826920280356737e-06, "loss": 0.2348, "step": 1559 }, { "epoch": 0.11, "grad_norm": 5.73324219308033, "learning_rate": 9.826618001237101e-06, "loss": 0.653, "step": 1560 }, { "epoch": 0.11, "grad_norm": 1.4494109190769389, "learning_rate": 9.826315463043785e-06, "loss": 0.2031, "step": 1561 }, { "epoch": 0.11, "grad_norm": 1.5854440510035475, "learning_rate": 9.82601266579303e-06, "loss": 0.2499, "step": 1562 }, { "epoch": 0.11, "grad_norm": 7.7349652784427265, "learning_rate": 9.825709609501088e-06, "loss": 0.64, "step": 1563 }, { "epoch": 0.11, "grad_norm": 1.3863898822031804, "learning_rate": 9.825406294184225e-06, "loss": 0.2296, "step": 1564 }, { "epoch": 0.11, "grad_norm": 1.5376796384950608, "learning_rate": 9.825102719858722e-06, "loss": 0.2541, "step": 1565 }, { "epoch": 0.11, "grad_norm": 1.511888999228989, "learning_rate": 9.824798886540874e-06, "loss": 0.2072, "step": 1566 }, { "epoch": 0.11, "grad_norm": 5.371233568847546, "learning_rate": 9.82449479424699e-06, "loss": 0.7218, "step": 1567 }, { "epoch": 0.11, "grad_norm": 1.6560407943327873, "learning_rate": 9.824190442993392e-06, "loss": 0.2115, "step": 1568 }, { "epoch": 0.11, "grad_norm": 1.4589841234716807, "learning_rate": 9.823885832796419e-06, "loss": 0.2473, "step": 1569 }, { "epoch": 0.11, "grad_norm": 1.4226470373261548, "learning_rate": 9.823580963672415e-06, "loss": 0.2578, "step": 1570 }, { "epoch": 0.11, "grad_norm": 1.6943826821235448, "learning_rate": 9.82327583563775e-06, "loss": 0.2338, "step": 1571 }, { "epoch": 0.11, "grad_norm": 1.369131334580333, "learning_rate": 9.822970448708799e-06, "loss": 0.1939, "step": 1572 }, { "epoch": 0.11, "grad_norm": 1.5648149620275735, "learning_rate": 9.822664802901956e-06, "loss": 0.2377, "step": 1573 }, { "epoch": 0.11, "grad_norm": 1.2842799330598096, "learning_rate": 9.822358898233625e-06, "loss": 0.2021, "step": 1574 }, { "epoch": 0.11, "grad_norm": 1.4681206066600627, "learning_rate": 9.822052734720228e-06, "loss": 0.2358, "step": 1575 }, { "epoch": 0.11, "grad_norm": 1.3934825890155422, "learning_rate": 9.821746312378196e-06, "loss": 0.2305, "step": 1576 }, { "epoch": 0.11, "grad_norm": 1.5973548165869802, "learning_rate": 9.821439631223978e-06, "loss": 0.2542, "step": 1577 }, { "epoch": 0.11, "grad_norm": 5.5638008040552105, "learning_rate": 9.821132691274035e-06, "loss": 0.5038, "step": 1578 }, { "epoch": 0.11, "grad_norm": 1.3709727651556627, "learning_rate": 9.820825492544842e-06, "loss": 0.1994, "step": 1579 }, { "epoch": 0.11, "grad_norm": 1.5316623562308376, "learning_rate": 9.82051803505289e-06, "loss": 0.2622, "step": 1580 }, { "epoch": 0.11, "grad_norm": 1.5586460884475626, "learning_rate": 9.820210318814679e-06, "loss": 0.2422, "step": 1581 }, { "epoch": 0.11, "grad_norm": 1.4980698420031409, "learning_rate": 9.81990234384673e-06, "loss": 0.2199, "step": 1582 }, { "epoch": 0.11, "grad_norm": 5.471700998427309, "learning_rate": 9.81959411016557e-06, "loss": 0.5387, "step": 1583 }, { "epoch": 0.11, "grad_norm": 1.7200792458866005, "learning_rate": 9.819285617787746e-06, "loss": 0.236, "step": 1584 }, { "epoch": 0.11, "grad_norm": 1.5252434679303168, "learning_rate": 9.818976866729814e-06, "loss": 0.1757, "step": 1585 }, { "epoch": 0.11, "grad_norm": 1.3117876732376015, "learning_rate": 9.818667857008352e-06, "loss": 0.2159, "step": 1586 }, { "epoch": 0.11, "grad_norm": 1.6017841907018093, "learning_rate": 9.81835858863994e-06, "loss": 0.2623, "step": 1587 }, { "epoch": 0.11, "grad_norm": 1.623047665308878, "learning_rate": 9.818049061641183e-06, "loss": 0.2483, "step": 1588 }, { "epoch": 0.11, "grad_norm": 1.5876520427908178, "learning_rate": 9.817739276028694e-06, "loss": 0.253, "step": 1589 }, { "epoch": 0.11, "grad_norm": 1.5858352347151505, "learning_rate": 9.8174292318191e-06, "loss": 0.2391, "step": 1590 }, { "epoch": 0.11, "grad_norm": 1.441814385223122, "learning_rate": 9.817118929029042e-06, "loss": 0.2205, "step": 1591 }, { "epoch": 0.11, "grad_norm": 10.696672970972553, "learning_rate": 9.816808367675179e-06, "loss": 0.6145, "step": 1592 }, { "epoch": 0.11, "grad_norm": 1.4618408635629998, "learning_rate": 9.816497547774178e-06, "loss": 0.2008, "step": 1593 }, { "epoch": 0.11, "grad_norm": 1.4357087994743714, "learning_rate": 9.816186469342724e-06, "loss": 0.2358, "step": 1594 }, { "epoch": 0.11, "grad_norm": 2.3465233183511685, "learning_rate": 9.815875132397515e-06, "loss": 0.2013, "step": 1595 }, { "epoch": 0.11, "grad_norm": 1.8198660558733726, "learning_rate": 9.81556353695526e-06, "loss": 0.2367, "step": 1596 }, { "epoch": 0.11, "grad_norm": 1.568972485691452, "learning_rate": 9.815251683032684e-06, "loss": 0.2194, "step": 1597 }, { "epoch": 0.11, "grad_norm": 1.5877705447355996, "learning_rate": 9.814939570646532e-06, "loss": 0.218, "step": 1598 }, { "epoch": 0.11, "grad_norm": 1.4760635293787112, "learning_rate": 9.814627199813549e-06, "loss": 0.2302, "step": 1599 }, { "epoch": 0.11, "grad_norm": 1.4058410956326552, "learning_rate": 9.814314570550506e-06, "loss": 0.2242, "step": 1600 }, { "epoch": 0.11, "grad_norm": 1.374485548166533, "learning_rate": 9.814001682874183e-06, "loss": 0.1985, "step": 1601 }, { "epoch": 0.11, "grad_norm": 1.2933922310576191, "learning_rate": 9.813688536801375e-06, "loss": 0.1892, "step": 1602 }, { "epoch": 0.11, "grad_norm": 1.425693142424464, "learning_rate": 9.813375132348893e-06, "loss": 0.189, "step": 1603 }, { "epoch": 0.11, "grad_norm": 1.5217491709222601, "learning_rate": 9.813061469533552e-06, "loss": 0.2037, "step": 1604 }, { "epoch": 0.11, "grad_norm": 7.403142426244644, "learning_rate": 9.812747548372195e-06, "loss": 0.7243, "step": 1605 }, { "epoch": 0.11, "grad_norm": 1.3099323250937103, "learning_rate": 9.81243336888167e-06, "loss": 0.1915, "step": 1606 }, { "epoch": 0.11, "grad_norm": 1.6004250497925583, "learning_rate": 9.812118931078837e-06, "loss": 0.2336, "step": 1607 }, { "epoch": 0.12, "grad_norm": 5.208773259980145, "learning_rate": 9.811804234980583e-06, "loss": 0.6985, "step": 1608 }, { "epoch": 0.12, "grad_norm": 1.5875724798666768, "learning_rate": 9.81148928060379e-06, "loss": 0.2079, "step": 1609 }, { "epoch": 0.12, "grad_norm": 1.5450388873752503, "learning_rate": 9.811174067965367e-06, "loss": 0.2655, "step": 1610 }, { "epoch": 0.12, "grad_norm": 1.700370463813713, "learning_rate": 9.810858597082235e-06, "loss": 0.2399, "step": 1611 }, { "epoch": 0.12, "grad_norm": 1.44645809705951, "learning_rate": 9.810542867971326e-06, "loss": 0.2399, "step": 1612 }, { "epoch": 0.12, "grad_norm": 4.620041395369137, "learning_rate": 9.810226880649589e-06, "loss": 0.7007, "step": 1613 }, { "epoch": 0.12, "grad_norm": 1.4216453626938492, "learning_rate": 9.80991063513398e-06, "loss": 0.1686, "step": 1614 }, { "epoch": 0.12, "grad_norm": 2.112719232669365, "learning_rate": 9.809594131441479e-06, "loss": 0.2593, "step": 1615 }, { "epoch": 0.12, "grad_norm": 1.5865792604852162, "learning_rate": 9.809277369589072e-06, "loss": 0.2665, "step": 1616 }, { "epoch": 0.12, "grad_norm": 1.438636177630681, "learning_rate": 9.808960349593761e-06, "loss": 0.2438, "step": 1617 }, { "epoch": 0.12, "grad_norm": 1.4194285898096355, "learning_rate": 9.808643071472564e-06, "loss": 0.2208, "step": 1618 }, { "epoch": 0.12, "grad_norm": 1.365753865598493, "learning_rate": 9.80832553524251e-06, "loss": 0.1963, "step": 1619 }, { "epoch": 0.12, "grad_norm": 1.7057091545871734, "learning_rate": 9.808007740920647e-06, "loss": 0.2424, "step": 1620 }, { "epoch": 0.12, "grad_norm": 1.4806090639600953, "learning_rate": 9.807689688524027e-06, "loss": 0.2119, "step": 1621 }, { "epoch": 0.12, "grad_norm": 1.3747608366564992, "learning_rate": 9.807371378069723e-06, "loss": 0.1633, "step": 1622 }, { "epoch": 0.12, "grad_norm": 1.5289310003563972, "learning_rate": 9.807052809574824e-06, "loss": 0.2821, "step": 1623 }, { "epoch": 0.12, "grad_norm": 4.94711764436591, "learning_rate": 9.806733983056429e-06, "loss": 0.4993, "step": 1624 }, { "epoch": 0.12, "grad_norm": 1.458270531451846, "learning_rate": 9.806414898531647e-06, "loss": 0.247, "step": 1625 }, { "epoch": 0.12, "grad_norm": 7.4652526167248725, "learning_rate": 9.80609555601761e-06, "loss": 0.7849, "step": 1626 }, { "epoch": 0.12, "grad_norm": 1.4756719514712213, "learning_rate": 9.805775955531456e-06, "loss": 0.2282, "step": 1627 }, { "epoch": 0.12, "grad_norm": 1.396563422642613, "learning_rate": 9.805456097090343e-06, "loss": 0.213, "step": 1628 }, { "epoch": 0.12, "grad_norm": 1.488272823795172, "learning_rate": 9.805135980711436e-06, "loss": 0.2653, "step": 1629 }, { "epoch": 0.12, "grad_norm": 6.999818599273434, "learning_rate": 9.80481560641192e-06, "loss": 0.5572, "step": 1630 }, { "epoch": 0.12, "grad_norm": 1.4526772851541578, "learning_rate": 9.80449497420899e-06, "loss": 0.1932, "step": 1631 }, { "epoch": 0.12, "grad_norm": 1.4891417212197875, "learning_rate": 9.804174084119857e-06, "loss": 0.222, "step": 1632 }, { "epoch": 0.12, "grad_norm": 38.74248963793999, "learning_rate": 9.803852936161747e-06, "loss": 0.6564, "step": 1633 }, { "epoch": 0.12, "grad_norm": 1.5068881197059105, "learning_rate": 9.803531530351894e-06, "loss": 0.2604, "step": 1634 }, { "epoch": 0.12, "grad_norm": 1.521933240760097, "learning_rate": 9.803209866707553e-06, "loss": 0.2484, "step": 1635 }, { "epoch": 0.12, "grad_norm": 1.3377806412565296, "learning_rate": 9.802887945245987e-06, "loss": 0.1848, "step": 1636 }, { "epoch": 0.12, "grad_norm": 1.569442325202885, "learning_rate": 9.80256576598448e-06, "loss": 0.2314, "step": 1637 }, { "epoch": 0.12, "grad_norm": 1.5517290957281664, "learning_rate": 9.802243328940319e-06, "loss": 0.2716, "step": 1638 }, { "epoch": 0.12, "grad_norm": 1.3700517009054844, "learning_rate": 9.801920634130816e-06, "loss": 0.1853, "step": 1639 }, { "epoch": 0.12, "grad_norm": 1.5748658447719495, "learning_rate": 9.80159768157329e-06, "loss": 0.2476, "step": 1640 }, { "epoch": 0.12, "grad_norm": 1.4859273474333772, "learning_rate": 9.801274471285076e-06, "loss": 0.1984, "step": 1641 }, { "epoch": 0.12, "grad_norm": 1.6768396259375484, "learning_rate": 9.800951003283522e-06, "loss": 0.2931, "step": 1642 }, { "epoch": 0.12, "grad_norm": 1.3055000353275865, "learning_rate": 9.800627277585995e-06, "loss": 0.2, "step": 1643 }, { "epoch": 0.12, "grad_norm": 1.5995618887871297, "learning_rate": 9.800303294209865e-06, "loss": 0.2768, "step": 1644 }, { "epoch": 0.12, "grad_norm": 1.4494311430660118, "learning_rate": 9.799979053172524e-06, "loss": 0.2049, "step": 1645 }, { "epoch": 0.12, "grad_norm": 1.371483479323944, "learning_rate": 9.799654554491379e-06, "loss": 0.2096, "step": 1646 }, { "epoch": 0.12, "grad_norm": 1.4368264268889892, "learning_rate": 9.799329798183843e-06, "loss": 0.2214, "step": 1647 }, { "epoch": 0.12, "grad_norm": 1.3510505877201295, "learning_rate": 9.799004784267352e-06, "loss": 0.2271, "step": 1648 }, { "epoch": 0.12, "grad_norm": 1.4399895824051956, "learning_rate": 9.798679512759347e-06, "loss": 0.1996, "step": 1649 }, { "epoch": 0.12, "grad_norm": 1.3674449587836637, "learning_rate": 9.798353983677294e-06, "loss": 0.207, "step": 1650 }, { "epoch": 0.12, "grad_norm": 1.537947248078371, "learning_rate": 9.798028197038657e-06, "loss": 0.2456, "step": 1651 }, { "epoch": 0.12, "grad_norm": 1.57858660425834, "learning_rate": 9.797702152860932e-06, "loss": 0.2545, "step": 1652 }, { "epoch": 0.12, "grad_norm": 1.3461232070921907, "learning_rate": 9.797375851161615e-06, "loss": 0.2155, "step": 1653 }, { "epoch": 0.12, "grad_norm": 1.3409813556752181, "learning_rate": 9.79704929195822e-06, "loss": 0.1998, "step": 1654 }, { "epoch": 0.12, "grad_norm": 1.3936042232648944, "learning_rate": 9.796722475268278e-06, "loss": 0.1869, "step": 1655 }, { "epoch": 0.12, "grad_norm": 1.4930864115753177, "learning_rate": 9.79639540110933e-06, "loss": 0.1855, "step": 1656 }, { "epoch": 0.12, "grad_norm": 1.618483161995016, "learning_rate": 9.796068069498931e-06, "loss": 0.2222, "step": 1657 }, { "epoch": 0.12, "grad_norm": 1.552176239190835, "learning_rate": 9.795740480454651e-06, "loss": 0.205, "step": 1658 }, { "epoch": 0.12, "grad_norm": 1.5505038348518316, "learning_rate": 9.795412633994077e-06, "loss": 0.2486, "step": 1659 }, { "epoch": 0.12, "grad_norm": 1.326956273328569, "learning_rate": 9.795084530134801e-06, "loss": 0.1742, "step": 1660 }, { "epoch": 0.12, "grad_norm": 1.2964390822908187, "learning_rate": 9.794756168894439e-06, "loss": 0.1951, "step": 1661 }, { "epoch": 0.12, "grad_norm": 1.495084984643208, "learning_rate": 9.794427550290615e-06, "loss": 0.2617, "step": 1662 }, { "epoch": 0.12, "grad_norm": 1.3584011835260723, "learning_rate": 9.794098674340966e-06, "loss": 0.1905, "step": 1663 }, { "epoch": 0.12, "grad_norm": 1.3101838433835598, "learning_rate": 9.793769541063147e-06, "loss": 0.1766, "step": 1664 }, { "epoch": 0.12, "grad_norm": 1.4821449750998308, "learning_rate": 9.793440150474822e-06, "loss": 0.1994, "step": 1665 }, { "epoch": 0.12, "grad_norm": 1.5526190114019054, "learning_rate": 9.793110502593674e-06, "loss": 0.255, "step": 1666 }, { "epoch": 0.12, "grad_norm": 1.2332512322109075, "learning_rate": 9.792780597437397e-06, "loss": 0.2156, "step": 1667 }, { "epoch": 0.12, "grad_norm": 1.5703369628987611, "learning_rate": 9.792450435023699e-06, "loss": 0.2306, "step": 1668 }, { "epoch": 0.12, "grad_norm": 1.4000080992387918, "learning_rate": 9.7921200153703e-06, "loss": 0.2222, "step": 1669 }, { "epoch": 0.12, "grad_norm": 1.4235960362370275, "learning_rate": 9.791789338494937e-06, "loss": 0.2029, "step": 1670 }, { "epoch": 0.12, "grad_norm": 5.562190153953838, "learning_rate": 9.791458404415357e-06, "loss": 0.6406, "step": 1671 }, { "epoch": 0.12, "grad_norm": 1.4883614268854848, "learning_rate": 9.791127213149328e-06, "loss": 0.2228, "step": 1672 }, { "epoch": 0.12, "grad_norm": 1.3960957996252932, "learning_rate": 9.790795764714622e-06, "loss": 0.1991, "step": 1673 }, { "epoch": 0.12, "grad_norm": 6.653576076591192, "learning_rate": 9.790464059129034e-06, "loss": 0.6018, "step": 1674 }, { "epoch": 0.12, "grad_norm": 1.537778401628917, "learning_rate": 9.790132096410366e-06, "loss": 0.2336, "step": 1675 }, { "epoch": 0.12, "grad_norm": 1.3946258826829696, "learning_rate": 9.789799876576438e-06, "loss": 0.2562, "step": 1676 }, { "epoch": 0.12, "grad_norm": 8.293227697243076, "learning_rate": 9.789467399645081e-06, "loss": 0.6905, "step": 1677 }, { "epoch": 0.12, "grad_norm": 1.673971303387585, "learning_rate": 9.789134665634142e-06, "loss": 0.2008, "step": 1678 }, { "epoch": 0.12, "grad_norm": 1.5293938763424837, "learning_rate": 9.788801674561479e-06, "loss": 0.1998, "step": 1679 }, { "epoch": 0.12, "grad_norm": 1.5169792737748706, "learning_rate": 9.788468426444968e-06, "loss": 0.2187, "step": 1680 }, { "epoch": 0.12, "grad_norm": 1.4733994479348527, "learning_rate": 9.788134921302495e-06, "loss": 0.2478, "step": 1681 }, { "epoch": 0.12, "grad_norm": 1.7553052184536297, "learning_rate": 9.787801159151962e-06, "loss": 0.2492, "step": 1682 }, { "epoch": 0.12, "grad_norm": 1.3746876644012884, "learning_rate": 9.787467140011284e-06, "loss": 0.2197, "step": 1683 }, { "epoch": 0.12, "grad_norm": 1.605912095347559, "learning_rate": 9.78713286389839e-06, "loss": 0.2589, "step": 1684 }, { "epoch": 0.12, "grad_norm": 1.3788070834718391, "learning_rate": 9.78679833083122e-06, "loss": 0.2414, "step": 1685 }, { "epoch": 0.12, "grad_norm": 1.420012774090065, "learning_rate": 9.786463540827734e-06, "loss": 0.1933, "step": 1686 }, { "epoch": 0.12, "grad_norm": 1.5492844611671754, "learning_rate": 9.7861284939059e-06, "loss": 0.2465, "step": 1687 }, { "epoch": 0.12, "grad_norm": 1.5474295324375043, "learning_rate": 9.785793190083705e-06, "loss": 0.2368, "step": 1688 }, { "epoch": 0.12, "grad_norm": 1.5570560163743425, "learning_rate": 9.785457629379143e-06, "loss": 0.2175, "step": 1689 }, { "epoch": 0.12, "grad_norm": 1.3188894238129203, "learning_rate": 9.785121811810225e-06, "loss": 0.1809, "step": 1690 }, { "epoch": 0.12, "grad_norm": 1.5685988685450034, "learning_rate": 9.784785737394982e-06, "loss": 0.2384, "step": 1691 }, { "epoch": 0.12, "grad_norm": 4.277813732749272, "learning_rate": 9.784449406151448e-06, "loss": 0.5431, "step": 1692 }, { "epoch": 0.12, "grad_norm": 1.5102070798738463, "learning_rate": 9.784112818097678e-06, "loss": 0.2111, "step": 1693 }, { "epoch": 0.12, "grad_norm": 1.5958130357216775, "learning_rate": 9.783775973251737e-06, "loss": 0.226, "step": 1694 }, { "epoch": 0.12, "grad_norm": 1.5472157649871132, "learning_rate": 9.783438871631708e-06, "loss": 0.2413, "step": 1695 }, { "epoch": 0.12, "grad_norm": 1.574898315776022, "learning_rate": 9.783101513255681e-06, "loss": 0.2564, "step": 1696 }, { "epoch": 0.12, "grad_norm": 1.6031253781044434, "learning_rate": 9.78276389814177e-06, "loss": 0.2559, "step": 1697 }, { "epoch": 0.12, "grad_norm": 4.331173793948848, "learning_rate": 9.782426026308094e-06, "loss": 0.4703, "step": 1698 }, { "epoch": 0.12, "grad_norm": 1.506905336500835, "learning_rate": 9.782087897772787e-06, "loss": 0.2385, "step": 1699 }, { "epoch": 0.12, "grad_norm": 1.6377424364641004, "learning_rate": 9.781749512554e-06, "loss": 0.2384, "step": 1700 }, { "epoch": 0.12, "grad_norm": 1.5610477285967144, "learning_rate": 9.781410870669896e-06, "loss": 0.2117, "step": 1701 }, { "epoch": 0.12, "grad_norm": 1.530807762671212, "learning_rate": 9.781071972138654e-06, "loss": 0.2559, "step": 1702 }, { "epoch": 0.12, "grad_norm": 1.5709572317569678, "learning_rate": 9.780732816978462e-06, "loss": 0.2066, "step": 1703 }, { "epoch": 0.12, "grad_norm": 1.6710030826487932, "learning_rate": 9.780393405207524e-06, "loss": 0.2208, "step": 1704 }, { "epoch": 0.12, "grad_norm": 1.5839199379682274, "learning_rate": 9.78005373684406e-06, "loss": 0.2244, "step": 1705 }, { "epoch": 0.12, "grad_norm": 1.6121870410905914, "learning_rate": 9.7797138119063e-06, "loss": 0.2607, "step": 1706 }, { "epoch": 0.12, "grad_norm": 1.3095778207377542, "learning_rate": 9.779373630412493e-06, "loss": 0.1767, "step": 1707 }, { "epoch": 0.12, "grad_norm": 1.5897047503971178, "learning_rate": 9.779033192380897e-06, "loss": 0.2321, "step": 1708 }, { "epoch": 0.12, "grad_norm": 4.180408482820496, "learning_rate": 9.778692497829786e-06, "loss": 0.5975, "step": 1709 }, { "epoch": 0.12, "grad_norm": 1.5784143261414403, "learning_rate": 9.778351546777445e-06, "loss": 0.2675, "step": 1710 }, { "epoch": 0.12, "grad_norm": 1.463473005063729, "learning_rate": 9.778010339242178e-06, "loss": 0.2393, "step": 1711 }, { "epoch": 0.12, "grad_norm": 1.5795846447636275, "learning_rate": 9.777668875242296e-06, "loss": 0.2475, "step": 1712 }, { "epoch": 0.12, "grad_norm": 5.160087427735706, "learning_rate": 9.777327154796131e-06, "loss": 0.599, "step": 1713 }, { "epoch": 0.12, "grad_norm": 1.7361205806389284, "learning_rate": 9.776985177922022e-06, "loss": 0.2808, "step": 1714 }, { "epoch": 0.12, "grad_norm": 1.6872275639199157, "learning_rate": 9.776642944638328e-06, "loss": 0.2479, "step": 1715 }, { "epoch": 0.12, "grad_norm": 1.398505760704376, "learning_rate": 9.776300454963417e-06, "loss": 0.2149, "step": 1716 }, { "epoch": 0.12, "grad_norm": 1.4211355219914745, "learning_rate": 9.775957708915674e-06, "loss": 0.1917, "step": 1717 }, { "epoch": 0.12, "grad_norm": 4.763358612769277, "learning_rate": 9.775614706513495e-06, "loss": 0.5881, "step": 1718 }, { "epoch": 0.12, "grad_norm": 1.4396613813075212, "learning_rate": 9.775271447775289e-06, "loss": 0.2025, "step": 1719 }, { "epoch": 0.12, "grad_norm": 6.0827953130407915, "learning_rate": 9.774927932719484e-06, "loss": 0.7363, "step": 1720 }, { "epoch": 0.12, "grad_norm": 1.5086118597374356, "learning_rate": 9.774584161364517e-06, "loss": 0.2313, "step": 1721 }, { "epoch": 0.12, "grad_norm": 1.6470142925591267, "learning_rate": 9.77424013372884e-06, "loss": 0.2387, "step": 1722 }, { "epoch": 0.12, "grad_norm": 1.2810149240468711, "learning_rate": 9.773895849830922e-06, "loss": 0.1739, "step": 1723 }, { "epoch": 0.12, "grad_norm": 1.5627294806312633, "learning_rate": 9.773551309689237e-06, "loss": 0.2443, "step": 1724 }, { "epoch": 0.12, "grad_norm": 6.211473621619358, "learning_rate": 9.773206513322284e-06, "loss": 0.5538, "step": 1725 }, { "epoch": 0.12, "grad_norm": 1.7302147571694546, "learning_rate": 9.772861460748568e-06, "loss": 0.2359, "step": 1726 }, { "epoch": 0.12, "grad_norm": 1.515940136314209, "learning_rate": 9.772516151986612e-06, "loss": 0.2424, "step": 1727 }, { "epoch": 0.12, "grad_norm": 1.4052992604018248, "learning_rate": 9.772170587054948e-06, "loss": 0.1985, "step": 1728 }, { "epoch": 0.12, "grad_norm": 1.4020074968576497, "learning_rate": 9.771824765972124e-06, "loss": 0.1953, "step": 1729 }, { "epoch": 0.12, "grad_norm": 1.2730540696667596, "learning_rate": 9.771478688756706e-06, "loss": 0.211, "step": 1730 }, { "epoch": 0.12, "grad_norm": 1.6253760509498694, "learning_rate": 9.771132355427269e-06, "loss": 0.2608, "step": 1731 }, { "epoch": 0.12, "grad_norm": 1.5770357867311013, "learning_rate": 9.7707857660024e-06, "loss": 0.2575, "step": 1732 }, { "epoch": 0.12, "grad_norm": 1.5051016396234373, "learning_rate": 9.770438920500705e-06, "loss": 0.21, "step": 1733 }, { "epoch": 0.12, "grad_norm": 1.3931984271292237, "learning_rate": 9.7700918189408e-06, "loss": 0.2131, "step": 1734 }, { "epoch": 0.12, "grad_norm": 1.5452160295887971, "learning_rate": 9.769744461341317e-06, "loss": 0.2604, "step": 1735 }, { "epoch": 0.12, "grad_norm": 8.711680271083882, "learning_rate": 9.769396847720901e-06, "loss": 0.5922, "step": 1736 }, { "epoch": 0.12, "grad_norm": 1.6180297131926675, "learning_rate": 9.76904897809821e-06, "loss": 0.2631, "step": 1737 }, { "epoch": 0.12, "grad_norm": 1.5166668938850616, "learning_rate": 9.768700852491916e-06, "loss": 0.2239, "step": 1738 }, { "epoch": 0.12, "grad_norm": 1.3382487003321437, "learning_rate": 9.768352470920706e-06, "loss": 0.2356, "step": 1739 }, { "epoch": 0.12, "grad_norm": 1.7296301780916024, "learning_rate": 9.768003833403278e-06, "loss": 0.2786, "step": 1740 }, { "epoch": 0.12, "grad_norm": 1.6428518041136655, "learning_rate": 9.767654939958347e-06, "loss": 0.2622, "step": 1741 }, { "epoch": 0.12, "grad_norm": 1.807508333347318, "learning_rate": 9.76730579060464e-06, "loss": 0.2384, "step": 1742 }, { "epoch": 0.12, "grad_norm": 1.5213725388838946, "learning_rate": 9.766956385360897e-06, "loss": 0.221, "step": 1743 }, { "epoch": 0.12, "grad_norm": 1.4421635541538649, "learning_rate": 9.766606724245873e-06, "loss": 0.2172, "step": 1744 }, { "epoch": 0.12, "grad_norm": 1.7554754573381086, "learning_rate": 9.766256807278336e-06, "loss": 0.255, "step": 1745 }, { "epoch": 0.12, "grad_norm": 1.5144631931216825, "learning_rate": 9.765906634477072e-06, "loss": 0.2123, "step": 1746 }, { "epoch": 0.12, "grad_norm": 1.4189427324171482, "learning_rate": 9.765556205860871e-06, "loss": 0.2559, "step": 1747 }, { "epoch": 0.13, "grad_norm": 1.312330677864779, "learning_rate": 9.765205521448546e-06, "loss": 0.1968, "step": 1748 }, { "epoch": 0.13, "grad_norm": 1.4840485751257777, "learning_rate": 9.76485458125892e-06, "loss": 0.2316, "step": 1749 }, { "epoch": 0.13, "grad_norm": 1.4689633521815892, "learning_rate": 9.764503385310832e-06, "loss": 0.2059, "step": 1750 }, { "epoch": 0.13, "grad_norm": 1.5109896077523253, "learning_rate": 9.764151933623128e-06, "loss": 0.2694, "step": 1751 }, { "epoch": 0.13, "grad_norm": 1.3950757427748874, "learning_rate": 9.763800226214676e-06, "loss": 0.1811, "step": 1752 }, { "epoch": 0.13, "grad_norm": 1.5540312326284627, "learning_rate": 9.763448263104354e-06, "loss": 0.2009, "step": 1753 }, { "epoch": 0.13, "grad_norm": 1.7146612018399212, "learning_rate": 9.763096044311052e-06, "loss": 0.2365, "step": 1754 }, { "epoch": 0.13, "grad_norm": 1.5064354944989968, "learning_rate": 9.762743569853678e-06, "loss": 0.2076, "step": 1755 }, { "epoch": 0.13, "grad_norm": 1.425400050179596, "learning_rate": 9.76239083975115e-06, "loss": 0.2521, "step": 1756 }, { "epoch": 0.13, "grad_norm": 1.558954480252376, "learning_rate": 9.762037854022404e-06, "loss": 0.2807, "step": 1757 }, { "epoch": 0.13, "grad_norm": 1.5583352573535572, "learning_rate": 9.761684612686381e-06, "loss": 0.2358, "step": 1758 }, { "epoch": 0.13, "grad_norm": 4.014469495657771, "learning_rate": 9.761331115762047e-06, "loss": 0.6238, "step": 1759 }, { "epoch": 0.13, "grad_norm": 1.508560217232071, "learning_rate": 9.760977363268374e-06, "loss": 0.2143, "step": 1760 }, { "epoch": 0.13, "grad_norm": 1.3482809216255482, "learning_rate": 9.760623355224353e-06, "loss": 0.1849, "step": 1761 }, { "epoch": 0.13, "grad_norm": 1.6786324406044018, "learning_rate": 9.760269091648979e-06, "loss": 0.2445, "step": 1762 }, { "epoch": 0.13, "grad_norm": 4.830279810178743, "learning_rate": 9.759914572561275e-06, "loss": 0.7206, "step": 1763 }, { "epoch": 0.13, "grad_norm": 1.550231933125463, "learning_rate": 9.759559797980265e-06, "loss": 0.2323, "step": 1764 }, { "epoch": 0.13, "grad_norm": 1.5302449602768893, "learning_rate": 9.759204767924995e-06, "loss": 0.181, "step": 1765 }, { "epoch": 0.13, "grad_norm": 1.6621683153472693, "learning_rate": 9.75884948241452e-06, "loss": 0.2191, "step": 1766 }, { "epoch": 0.13, "grad_norm": 5.551382313097393, "learning_rate": 9.758493941467911e-06, "loss": 0.5484, "step": 1767 }, { "epoch": 0.13, "grad_norm": 1.4561481588967966, "learning_rate": 9.75813814510425e-06, "loss": 0.2128, "step": 1768 }, { "epoch": 0.13, "grad_norm": 5.394196728667399, "learning_rate": 9.757782093342639e-06, "loss": 0.6855, "step": 1769 }, { "epoch": 0.13, "grad_norm": 1.588281939706975, "learning_rate": 9.757425786202184e-06, "loss": 0.2358, "step": 1770 }, { "epoch": 0.13, "grad_norm": 1.3867498330884303, "learning_rate": 9.757069223702017e-06, "loss": 0.2448, "step": 1771 }, { "epoch": 0.13, "grad_norm": 1.7777796306460585, "learning_rate": 9.756712405861268e-06, "loss": 0.1957, "step": 1772 }, { "epoch": 0.13, "grad_norm": 1.5269974671345954, "learning_rate": 9.756355332699097e-06, "loss": 0.2596, "step": 1773 }, { "epoch": 0.13, "grad_norm": 1.320295742760158, "learning_rate": 9.755998004234667e-06, "loss": 0.1685, "step": 1774 }, { "epoch": 0.13, "grad_norm": 1.418358661214108, "learning_rate": 9.75564042048716e-06, "loss": 0.2043, "step": 1775 }, { "epoch": 0.13, "grad_norm": 1.587408036171723, "learning_rate": 9.755282581475769e-06, "loss": 0.2129, "step": 1776 }, { "epoch": 0.13, "grad_norm": 1.3738416867046819, "learning_rate": 9.754924487219698e-06, "loss": 0.2174, "step": 1777 }, { "epoch": 0.13, "grad_norm": 1.4149776247821395, "learning_rate": 9.754566137738174e-06, "loss": 0.2301, "step": 1778 }, { "epoch": 0.13, "grad_norm": 1.5187928825303296, "learning_rate": 9.754207533050425e-06, "loss": 0.2258, "step": 1779 }, { "epoch": 0.13, "grad_norm": 1.3851972909800576, "learning_rate": 9.753848673175707e-06, "loss": 0.2059, "step": 1780 }, { "epoch": 0.13, "grad_norm": 1.5088714909427967, "learning_rate": 9.753489558133276e-06, "loss": 0.2103, "step": 1781 }, { "epoch": 0.13, "grad_norm": 1.4958459168108815, "learning_rate": 9.75313018794241e-06, "loss": 0.2323, "step": 1782 }, { "epoch": 0.13, "grad_norm": 1.5086159100190941, "learning_rate": 9.7527705626224e-06, "loss": 0.2025, "step": 1783 }, { "epoch": 0.13, "grad_norm": 1.3492698858552774, "learning_rate": 9.752410682192547e-06, "loss": 0.1612, "step": 1784 }, { "epoch": 0.13, "grad_norm": 1.3816693681545629, "learning_rate": 9.75205054667217e-06, "loss": 0.1852, "step": 1785 }, { "epoch": 0.13, "grad_norm": 1.6267463663412634, "learning_rate": 9.751690156080597e-06, "loss": 0.2008, "step": 1786 }, { "epoch": 0.13, "grad_norm": 1.3506938109702742, "learning_rate": 9.751329510437176e-06, "loss": 0.2189, "step": 1787 }, { "epoch": 0.13, "grad_norm": 1.5270215059072123, "learning_rate": 9.75096860976126e-06, "loss": 0.2276, "step": 1788 }, { "epoch": 0.13, "grad_norm": 1.4478679000628312, "learning_rate": 9.750607454072225e-06, "loss": 0.2643, "step": 1789 }, { "epoch": 0.13, "grad_norm": 1.3239777373607575, "learning_rate": 9.750246043389455e-06, "loss": 0.1986, "step": 1790 }, { "epoch": 0.13, "grad_norm": 1.5057977821757051, "learning_rate": 9.74988437773235e-06, "loss": 0.1868, "step": 1791 }, { "epoch": 0.13, "grad_norm": 1.578865105189045, "learning_rate": 9.74952245712032e-06, "loss": 0.2054, "step": 1792 }, { "epoch": 0.13, "grad_norm": 1.6020558265755471, "learning_rate": 9.749160281572795e-06, "loss": 0.2263, "step": 1793 }, { "epoch": 0.13, "grad_norm": 1.4617795501102437, "learning_rate": 9.748797851109213e-06, "loss": 0.178, "step": 1794 }, { "epoch": 0.13, "grad_norm": 1.3554280430765748, "learning_rate": 9.748435165749029e-06, "loss": 0.2187, "step": 1795 }, { "epoch": 0.13, "grad_norm": 1.4977533848883124, "learning_rate": 9.748072225511708e-06, "loss": 0.2151, "step": 1796 }, { "epoch": 0.13, "grad_norm": 1.4777549976267563, "learning_rate": 9.747709030416734e-06, "loss": 0.2201, "step": 1797 }, { "epoch": 0.13, "grad_norm": 1.5003483822933346, "learning_rate": 9.747345580483602e-06, "loss": 0.2658, "step": 1798 }, { "epoch": 0.13, "grad_norm": 1.5124434011049583, "learning_rate": 9.746981875731817e-06, "loss": 0.2185, "step": 1799 }, { "epoch": 0.13, "grad_norm": 1.4908411378947168, "learning_rate": 9.746617916180906e-06, "loss": 0.2204, "step": 1800 }, { "epoch": 0.13, "grad_norm": 1.5142043770873934, "learning_rate": 9.7462537018504e-06, "loss": 0.2221, "step": 1801 }, { "epoch": 0.13, "grad_norm": 1.554203407901688, "learning_rate": 9.745889232759854e-06, "loss": 0.25, "step": 1802 }, { "epoch": 0.13, "grad_norm": 1.4186244329769964, "learning_rate": 9.745524508928827e-06, "loss": 0.2357, "step": 1803 }, { "epoch": 0.13, "grad_norm": 1.470528685888625, "learning_rate": 9.745159530376897e-06, "loss": 0.2284, "step": 1804 }, { "epoch": 0.13, "grad_norm": 1.538893527051276, "learning_rate": 9.744794297123656e-06, "loss": 0.1968, "step": 1805 }, { "epoch": 0.13, "grad_norm": 1.3570080127073203, "learning_rate": 9.744428809188707e-06, "loss": 0.1709, "step": 1806 }, { "epoch": 0.13, "grad_norm": 1.716985202567522, "learning_rate": 9.74406306659167e-06, "loss": 0.227, "step": 1807 }, { "epoch": 0.13, "grad_norm": 1.5930294146323556, "learning_rate": 9.74369706935217e-06, "loss": 0.2538, "step": 1808 }, { "epoch": 0.13, "grad_norm": 1.477987442929875, "learning_rate": 9.74333081748986e-06, "loss": 0.2277, "step": 1809 }, { "epoch": 0.13, "grad_norm": 1.4883461524651147, "learning_rate": 9.742964311024396e-06, "loss": 0.2335, "step": 1810 }, { "epoch": 0.13, "grad_norm": 1.3784403434462695, "learning_rate": 9.74259754997545e-06, "loss": 0.1979, "step": 1811 }, { "epoch": 0.13, "grad_norm": 1.43399893158558, "learning_rate": 9.742230534362708e-06, "loss": 0.2105, "step": 1812 }, { "epoch": 0.13, "grad_norm": 1.5747044306847444, "learning_rate": 9.741863264205873e-06, "loss": 0.2587, "step": 1813 }, { "epoch": 0.13, "grad_norm": 1.6369310886332042, "learning_rate": 9.741495739524655e-06, "loss": 0.2665, "step": 1814 }, { "epoch": 0.13, "grad_norm": 1.4148010433064986, "learning_rate": 9.741127960338784e-06, "loss": 0.2588, "step": 1815 }, { "epoch": 0.13, "grad_norm": 5.383135311959032, "learning_rate": 9.740759926667998e-06, "loss": 0.7326, "step": 1816 }, { "epoch": 0.13, "grad_norm": 6.215234690982681, "learning_rate": 9.740391638532055e-06, "loss": 0.7683, "step": 1817 }, { "epoch": 0.13, "grad_norm": 5.521689426252949, "learning_rate": 9.74002309595072e-06, "loss": 0.6337, "step": 1818 }, { "epoch": 0.13, "grad_norm": 9.219021824095078, "learning_rate": 9.739654298943775e-06, "loss": 0.6336, "step": 1819 }, { "epoch": 0.13, "grad_norm": 1.4671528860405594, "learning_rate": 9.739285247531019e-06, "loss": 0.2943, "step": 1820 }, { "epoch": 0.13, "grad_norm": 3.950679909288861, "learning_rate": 9.738915941732258e-06, "loss": 0.5464, "step": 1821 }, { "epoch": 0.13, "grad_norm": 1.263604446988797, "learning_rate": 9.738546381567315e-06, "loss": 0.1694, "step": 1822 }, { "epoch": 0.13, "grad_norm": 1.735655068451781, "learning_rate": 9.73817656705603e-06, "loss": 0.2754, "step": 1823 }, { "epoch": 0.13, "grad_norm": 1.3394602050267703, "learning_rate": 9.737806498218249e-06, "loss": 0.1876, "step": 1824 }, { "epoch": 0.13, "grad_norm": 1.3135586864657471, "learning_rate": 9.737436175073837e-06, "loss": 0.2064, "step": 1825 }, { "epoch": 0.13, "grad_norm": 1.3710058616784464, "learning_rate": 9.737065597642674e-06, "loss": 0.2041, "step": 1826 }, { "epoch": 0.13, "grad_norm": 1.3735647173601169, "learning_rate": 9.736694765944648e-06, "loss": 0.213, "step": 1827 }, { "epoch": 0.13, "grad_norm": 1.575970707511317, "learning_rate": 9.736323679999666e-06, "loss": 0.1764, "step": 1828 }, { "epoch": 0.13, "grad_norm": 1.4874345342814497, "learning_rate": 9.735952339827643e-06, "loss": 0.2594, "step": 1829 }, { "epoch": 0.13, "grad_norm": 1.4487034296171157, "learning_rate": 9.735580745448514e-06, "loss": 0.2587, "step": 1830 }, { "epoch": 0.13, "grad_norm": 1.6928386745538273, "learning_rate": 9.735208896882225e-06, "loss": 0.2471, "step": 1831 }, { "epoch": 0.13, "grad_norm": 1.6533601239995719, "learning_rate": 9.734836794148734e-06, "loss": 0.2599, "step": 1832 }, { "epoch": 0.13, "grad_norm": 3.793279344838839, "learning_rate": 9.734464437268014e-06, "loss": 0.7486, "step": 1833 }, { "epoch": 0.13, "grad_norm": 23.526035949817015, "learning_rate": 9.734091826260053e-06, "loss": 0.6702, "step": 1834 }, { "epoch": 0.13, "grad_norm": 1.5635677219351236, "learning_rate": 9.733718961144849e-06, "loss": 0.2625, "step": 1835 }, { "epoch": 0.13, "grad_norm": 1.5729201459566307, "learning_rate": 9.733345841942418e-06, "loss": 0.2621, "step": 1836 }, { "epoch": 0.13, "grad_norm": 1.4512507834508095, "learning_rate": 9.732972468672788e-06, "loss": 0.2568, "step": 1837 }, { "epoch": 0.13, "grad_norm": 4.81825127612839, "learning_rate": 9.732598841355997e-06, "loss": 0.7093, "step": 1838 }, { "epoch": 0.13, "grad_norm": 3.935854508103519, "learning_rate": 9.732224960012103e-06, "loss": 0.448, "step": 1839 }, { "epoch": 0.13, "grad_norm": 1.2361647924980592, "learning_rate": 9.731850824661171e-06, "loss": 0.1815, "step": 1840 }, { "epoch": 0.13, "grad_norm": 1.510432719016519, "learning_rate": 9.731476435323289e-06, "loss": 0.2244, "step": 1841 }, { "epoch": 0.13, "grad_norm": 1.5664921794746007, "learning_rate": 9.731101792018547e-06, "loss": 0.2267, "step": 1842 }, { "epoch": 0.13, "grad_norm": 1.4119985931063965, "learning_rate": 9.730726894767056e-06, "loss": 0.2467, "step": 1843 }, { "epoch": 0.13, "grad_norm": 1.4895625353438624, "learning_rate": 9.73035174358894e-06, "loss": 0.2377, "step": 1844 }, { "epoch": 0.13, "grad_norm": 1.415260683378231, "learning_rate": 9.729976338504336e-06, "loss": 0.255, "step": 1845 }, { "epoch": 0.13, "grad_norm": 1.6206785462449451, "learning_rate": 9.729600679533392e-06, "loss": 0.236, "step": 1846 }, { "epoch": 0.13, "grad_norm": 1.4862274100663593, "learning_rate": 9.729224766696274e-06, "loss": 0.2049, "step": 1847 }, { "epoch": 0.13, "grad_norm": 1.3233284851217337, "learning_rate": 9.72884860001316e-06, "loss": 0.2223, "step": 1848 }, { "epoch": 0.13, "grad_norm": 1.507432629772718, "learning_rate": 9.728472179504239e-06, "loss": 0.1975, "step": 1849 }, { "epoch": 0.13, "grad_norm": 1.664902547746061, "learning_rate": 9.728095505189714e-06, "loss": 0.246, "step": 1850 }, { "epoch": 0.13, "grad_norm": 1.4211299756869158, "learning_rate": 9.72771857708981e-06, "loss": 0.2365, "step": 1851 }, { "epoch": 0.13, "grad_norm": 10.154853910062455, "learning_rate": 9.727341395224751e-06, "loss": 0.7025, "step": 1852 }, { "epoch": 0.13, "grad_norm": 1.537067176261166, "learning_rate": 9.72696395961479e-06, "loss": 0.2221, "step": 1853 }, { "epoch": 0.13, "grad_norm": 1.3677475424495202, "learning_rate": 9.726586270280181e-06, "loss": 0.2353, "step": 1854 }, { "epoch": 0.13, "grad_norm": 1.7353022976423134, "learning_rate": 9.7262083272412e-06, "loss": 0.3053, "step": 1855 }, { "epoch": 0.13, "grad_norm": 1.3710181128537797, "learning_rate": 9.72583013051813e-06, "loss": 0.1966, "step": 1856 }, { "epoch": 0.13, "grad_norm": 1.9894691453887685, "learning_rate": 9.725451680131278e-06, "loss": 0.2328, "step": 1857 }, { "epoch": 0.13, "grad_norm": 1.431340194383833, "learning_rate": 9.725072976100949e-06, "loss": 0.2569, "step": 1858 }, { "epoch": 0.13, "grad_norm": 1.4015519374624992, "learning_rate": 9.724694018447475e-06, "loss": 0.2183, "step": 1859 }, { "epoch": 0.13, "grad_norm": 1.4059921260587471, "learning_rate": 9.724314807191197e-06, "loss": 0.1968, "step": 1860 }, { "epoch": 0.13, "grad_norm": 1.464275152148016, "learning_rate": 9.723935342352468e-06, "loss": 0.1826, "step": 1861 }, { "epoch": 0.13, "grad_norm": 1.449170754043741, "learning_rate": 9.723555623951657e-06, "loss": 0.2015, "step": 1862 }, { "epoch": 0.13, "grad_norm": 1.4958189850488863, "learning_rate": 9.723175652009148e-06, "loss": 0.2439, "step": 1863 }, { "epoch": 0.13, "grad_norm": 1.5739353607165942, "learning_rate": 9.72279542654533e-06, "loss": 0.2216, "step": 1864 }, { "epoch": 0.13, "grad_norm": 1.545987404161147, "learning_rate": 9.72241494758062e-06, "loss": 0.2347, "step": 1865 }, { "epoch": 0.13, "grad_norm": 4.67002781279628, "learning_rate": 9.722034215135436e-06, "loss": 0.7898, "step": 1866 }, { "epoch": 0.13, "grad_norm": 1.490860802093944, "learning_rate": 9.721653229230214e-06, "loss": 0.2081, "step": 1867 }, { "epoch": 0.13, "grad_norm": 1.4295548205115338, "learning_rate": 9.721271989885405e-06, "loss": 0.1908, "step": 1868 }, { "epoch": 0.13, "grad_norm": 1.405048012704226, "learning_rate": 9.720890497121473e-06, "loss": 0.2098, "step": 1869 }, { "epoch": 0.13, "grad_norm": 1.2924687409460553, "learning_rate": 9.720508750958892e-06, "loss": 0.218, "step": 1870 }, { "epoch": 0.13, "grad_norm": 1.4178717374792467, "learning_rate": 9.720126751418157e-06, "loss": 0.244, "step": 1871 }, { "epoch": 0.13, "grad_norm": 1.519848553701147, "learning_rate": 9.71974449851977e-06, "loss": 0.2592, "step": 1872 }, { "epoch": 0.13, "grad_norm": 1.4704563787455887, "learning_rate": 9.719361992284248e-06, "loss": 0.2538, "step": 1873 }, { "epoch": 0.13, "grad_norm": 1.6823052719706368, "learning_rate": 9.718979232732125e-06, "loss": 0.2319, "step": 1874 }, { "epoch": 0.13, "grad_norm": 1.5352781424097124, "learning_rate": 9.718596219883942e-06, "loss": 0.2258, "step": 1875 }, { "epoch": 0.13, "grad_norm": 1.4898589395872692, "learning_rate": 9.718212953760261e-06, "loss": 0.2002, "step": 1876 }, { "epoch": 0.13, "grad_norm": 1.7217510529734357, "learning_rate": 9.717829434381653e-06, "loss": 0.2771, "step": 1877 }, { "epoch": 0.13, "grad_norm": 1.5160913837771142, "learning_rate": 9.717445661768703e-06, "loss": 0.2289, "step": 1878 }, { "epoch": 0.13, "grad_norm": 1.3547828002842095, "learning_rate": 9.717061635942014e-06, "loss": 0.189, "step": 1879 }, { "epoch": 0.13, "grad_norm": 8.254461770583257, "learning_rate": 9.716677356922193e-06, "loss": 0.6469, "step": 1880 }, { "epoch": 0.13, "grad_norm": 1.5028762322461113, "learning_rate": 9.716292824729872e-06, "loss": 0.2349, "step": 1881 }, { "epoch": 0.13, "grad_norm": 1.6669656601377145, "learning_rate": 9.715908039385688e-06, "loss": 0.2003, "step": 1882 }, { "epoch": 0.13, "grad_norm": 1.3608523474912946, "learning_rate": 9.715523000910296e-06, "loss": 0.2579, "step": 1883 }, { "epoch": 0.13, "grad_norm": 1.4953629827375976, "learning_rate": 9.715137709324363e-06, "loss": 0.2258, "step": 1884 }, { "epoch": 0.13, "grad_norm": 1.590519300516705, "learning_rate": 9.714752164648571e-06, "loss": 0.302, "step": 1885 }, { "epoch": 0.13, "grad_norm": 1.4235103783129495, "learning_rate": 9.714366366903612e-06, "loss": 0.2176, "step": 1886 }, { "epoch": 0.13, "grad_norm": 1.4928235981142612, "learning_rate": 9.713980316110196e-06, "loss": 0.213, "step": 1887 }, { "epoch": 0.14, "grad_norm": 1.7248317156192252, "learning_rate": 9.713594012289045e-06, "loss": 0.246, "step": 1888 }, { "epoch": 0.14, "grad_norm": 1.4034855318944837, "learning_rate": 9.713207455460893e-06, "loss": 0.2058, "step": 1889 }, { "epoch": 0.14, "grad_norm": 1.4278403851695654, "learning_rate": 9.71282064564649e-06, "loss": 0.2199, "step": 1890 }, { "epoch": 0.14, "grad_norm": 1.469329707855562, "learning_rate": 9.712433582866596e-06, "loss": 0.2497, "step": 1891 }, { "epoch": 0.14, "grad_norm": 1.3010679363763809, "learning_rate": 9.71204626714199e-06, "loss": 0.2031, "step": 1892 }, { "epoch": 0.14, "grad_norm": 1.4156084758475338, "learning_rate": 9.71165869849346e-06, "loss": 0.2283, "step": 1893 }, { "epoch": 0.14, "grad_norm": 1.7838289407957961, "learning_rate": 9.71127087694181e-06, "loss": 0.2436, "step": 1894 }, { "epoch": 0.14, "grad_norm": 1.5887673362043404, "learning_rate": 9.710882802507857e-06, "loss": 0.261, "step": 1895 }, { "epoch": 0.14, "grad_norm": 1.409210802499972, "learning_rate": 9.710494475212429e-06, "loss": 0.193, "step": 1896 }, { "epoch": 0.14, "grad_norm": 1.4623938364806417, "learning_rate": 9.710105895076372e-06, "loss": 0.1991, "step": 1897 }, { "epoch": 0.14, "grad_norm": 1.7049493576296055, "learning_rate": 9.709717062120542e-06, "loss": 0.2438, "step": 1898 }, { "epoch": 0.14, "grad_norm": 1.4520075655164966, "learning_rate": 9.709327976365813e-06, "loss": 0.2494, "step": 1899 }, { "epoch": 0.14, "grad_norm": 4.165840308429936, "learning_rate": 9.708938637833065e-06, "loss": 0.5862, "step": 1900 }, { "epoch": 0.14, "grad_norm": 5.924872560397852, "learning_rate": 9.708549046543201e-06, "loss": 0.7531, "step": 1901 }, { "epoch": 0.14, "grad_norm": 1.411684373014523, "learning_rate": 9.708159202517129e-06, "loss": 0.2362, "step": 1902 }, { "epoch": 0.14, "grad_norm": 1.6666750908200851, "learning_rate": 9.707769105775774e-06, "loss": 0.2554, "step": 1903 }, { "epoch": 0.14, "grad_norm": 1.4960331275406946, "learning_rate": 9.707378756340079e-06, "loss": 0.2552, "step": 1904 }, { "epoch": 0.14, "grad_norm": 1.4987551615369414, "learning_rate": 9.706988154230992e-06, "loss": 0.2537, "step": 1905 }, { "epoch": 0.14, "grad_norm": 1.3951431210224616, "learning_rate": 9.70659729946948e-06, "loss": 0.2613, "step": 1906 }, { "epoch": 0.14, "grad_norm": 1.5313122108829853, "learning_rate": 9.706206192076527e-06, "loss": 0.2024, "step": 1907 }, { "epoch": 0.14, "grad_norm": 1.4271304891750085, "learning_rate": 9.705814832073118e-06, "loss": 0.2184, "step": 1908 }, { "epoch": 0.14, "grad_norm": 1.3742564968571074, "learning_rate": 9.705423219480265e-06, "loss": 0.2041, "step": 1909 }, { "epoch": 0.14, "grad_norm": 1.369953197872659, "learning_rate": 9.70503135431899e-06, "loss": 0.2157, "step": 1910 }, { "epoch": 0.14, "grad_norm": 1.4623807136452425, "learning_rate": 9.704639236610321e-06, "loss": 0.2142, "step": 1911 }, { "epoch": 0.14, "grad_norm": 1.3650250852724641, "learning_rate": 9.704246866375309e-06, "loss": 0.2139, "step": 1912 }, { "epoch": 0.14, "grad_norm": 1.3946951198554776, "learning_rate": 9.703854243635015e-06, "loss": 0.2611, "step": 1913 }, { "epoch": 0.14, "grad_norm": 1.3217007436002144, "learning_rate": 9.703461368410512e-06, "loss": 0.1748, "step": 1914 }, { "epoch": 0.14, "grad_norm": 1.420273891053106, "learning_rate": 9.70306824072289e-06, "loss": 0.2193, "step": 1915 }, { "epoch": 0.14, "grad_norm": 1.465617760705863, "learning_rate": 9.702674860593248e-06, "loss": 0.2026, "step": 1916 }, { "epoch": 0.14, "grad_norm": 1.4119543226623383, "learning_rate": 9.702281228042703e-06, "loss": 0.2331, "step": 1917 }, { "epoch": 0.14, "grad_norm": 1.6226099691583857, "learning_rate": 9.701887343092381e-06, "loss": 0.2662, "step": 1918 }, { "epoch": 0.14, "grad_norm": 6.351811391037114, "learning_rate": 9.701493205763427e-06, "loss": 0.7424, "step": 1919 }, { "epoch": 0.14, "grad_norm": 1.543343158235312, "learning_rate": 9.701098816076995e-06, "loss": 0.2553, "step": 1920 }, { "epoch": 0.14, "grad_norm": 1.6108706193671603, "learning_rate": 9.700704174054257e-06, "loss": 0.1923, "step": 1921 }, { "epoch": 0.14, "grad_norm": 1.6448967344527, "learning_rate": 9.700309279716394e-06, "loss": 0.2349, "step": 1922 }, { "epoch": 0.14, "grad_norm": 3.95404167783207, "learning_rate": 9.6999141330846e-06, "loss": 0.4977, "step": 1923 }, { "epoch": 0.14, "grad_norm": 1.7987358515060412, "learning_rate": 9.699518734180088e-06, "loss": 0.2859, "step": 1924 }, { "epoch": 0.14, "grad_norm": 1.4880639584777857, "learning_rate": 9.69912308302408e-06, "loss": 0.2307, "step": 1925 }, { "epoch": 0.14, "grad_norm": 1.5366616380761902, "learning_rate": 9.698727179637815e-06, "loss": 0.2411, "step": 1926 }, { "epoch": 0.14, "grad_norm": 1.599479460646312, "learning_rate": 9.69833102404254e-06, "loss": 0.2604, "step": 1927 }, { "epoch": 0.14, "grad_norm": 1.3877101222843358, "learning_rate": 9.697934616259523e-06, "loss": 0.239, "step": 1928 }, { "epoch": 0.14, "grad_norm": 1.4709492509117583, "learning_rate": 9.69753795631004e-06, "loss": 0.2611, "step": 1929 }, { "epoch": 0.14, "grad_norm": 2.029819809309242, "learning_rate": 9.697141044215379e-06, "loss": 0.2449, "step": 1930 }, { "epoch": 0.14, "grad_norm": 1.6259809200983182, "learning_rate": 9.696743879996851e-06, "loss": 0.2308, "step": 1931 }, { "epoch": 0.14, "grad_norm": 1.3304292583048278, "learning_rate": 9.696346463675767e-06, "loss": 0.2612, "step": 1932 }, { "epoch": 0.14, "grad_norm": 1.4982271274022423, "learning_rate": 9.695948795273465e-06, "loss": 0.2718, "step": 1933 }, { "epoch": 0.14, "grad_norm": 6.811316470748526, "learning_rate": 9.695550874811288e-06, "loss": 0.65, "step": 1934 }, { "epoch": 0.14, "grad_norm": 1.3713588643410768, "learning_rate": 9.695152702310592e-06, "loss": 0.2332, "step": 1935 }, { "epoch": 0.14, "grad_norm": 15.045125078879652, "learning_rate": 9.694754277792754e-06, "loss": 0.5881, "step": 1936 }, { "epoch": 0.14, "grad_norm": 1.5304467441851728, "learning_rate": 9.694355601279157e-06, "loss": 0.2416, "step": 1937 }, { "epoch": 0.14, "grad_norm": 1.3919126923257714, "learning_rate": 9.693956672791201e-06, "loss": 0.2425, "step": 1938 }, { "epoch": 0.14, "grad_norm": 1.4548562351366578, "learning_rate": 9.6935574923503e-06, "loss": 0.2542, "step": 1939 }, { "epoch": 0.14, "grad_norm": 4.070726763963831, "learning_rate": 9.693158059977879e-06, "loss": 0.5373, "step": 1940 }, { "epoch": 0.14, "grad_norm": 1.4969554025609875, "learning_rate": 9.692758375695377e-06, "loss": 0.2646, "step": 1941 }, { "epoch": 0.14, "grad_norm": 1.4569167815753479, "learning_rate": 9.692358439524248e-06, "loss": 0.2615, "step": 1942 }, { "epoch": 0.14, "grad_norm": 1.3328553208445544, "learning_rate": 9.691958251485962e-06, "loss": 0.2341, "step": 1943 }, { "epoch": 0.14, "grad_norm": 1.5713704154765922, "learning_rate": 9.691557811601996e-06, "loss": 0.2424, "step": 1944 }, { "epoch": 0.14, "grad_norm": 1.39665779910806, "learning_rate": 9.691157119893846e-06, "loss": 0.2273, "step": 1945 }, { "epoch": 0.14, "grad_norm": 1.529280500863797, "learning_rate": 9.690756176383019e-06, "loss": 0.2533, "step": 1946 }, { "epoch": 0.14, "grad_norm": 1.8566822154673406, "learning_rate": 9.690354981091033e-06, "loss": 0.1869, "step": 1947 }, { "epoch": 0.14, "grad_norm": 1.4771582292509915, "learning_rate": 9.689953534039428e-06, "loss": 0.2655, "step": 1948 }, { "epoch": 0.14, "grad_norm": 5.368097707546508, "learning_rate": 9.689551835249749e-06, "loss": 0.6712, "step": 1949 }, { "epoch": 0.14, "grad_norm": 1.4129571833399497, "learning_rate": 9.689149884743558e-06, "loss": 0.233, "step": 1950 }, { "epoch": 0.14, "grad_norm": 1.4836749349537037, "learning_rate": 9.68874768254243e-06, "loss": 0.2901, "step": 1951 }, { "epoch": 0.14, "grad_norm": 1.3570903349848955, "learning_rate": 9.688345228667955e-06, "loss": 0.2312, "step": 1952 }, { "epoch": 0.14, "grad_norm": 1.3881651633940988, "learning_rate": 9.687942523141732e-06, "loss": 0.2316, "step": 1953 }, { "epoch": 0.14, "grad_norm": 1.5997011592852977, "learning_rate": 9.687539565985379e-06, "loss": 0.228, "step": 1954 }, { "epoch": 0.14, "grad_norm": 1.5082993978627361, "learning_rate": 9.687136357220525e-06, "loss": 0.2123, "step": 1955 }, { "epoch": 0.14, "grad_norm": 4.698329092523647, "learning_rate": 9.686732896868814e-06, "loss": 0.6586, "step": 1956 }, { "epoch": 0.14, "grad_norm": 1.405525969744624, "learning_rate": 9.686329184951899e-06, "loss": 0.2509, "step": 1957 }, { "epoch": 0.14, "grad_norm": 1.5297734761278114, "learning_rate": 9.685925221491452e-06, "loss": 0.2187, "step": 1958 }, { "epoch": 0.14, "grad_norm": 1.3260628762739894, "learning_rate": 9.685521006509153e-06, "loss": 0.1754, "step": 1959 }, { "epoch": 0.14, "grad_norm": 1.3450722509545328, "learning_rate": 9.685116540026703e-06, "loss": 0.2111, "step": 1960 }, { "epoch": 0.14, "grad_norm": 1.332705349393018, "learning_rate": 9.68471182206581e-06, "loss": 0.1755, "step": 1961 }, { "epoch": 0.14, "grad_norm": 1.4101069743023689, "learning_rate": 9.684306852648198e-06, "loss": 0.1847, "step": 1962 }, { "epoch": 0.14, "grad_norm": 1.416647702595143, "learning_rate": 9.683901631795602e-06, "loss": 0.233, "step": 1963 }, { "epoch": 0.14, "grad_norm": 1.366678062343066, "learning_rate": 9.683496159529777e-06, "loss": 0.2035, "step": 1964 }, { "epoch": 0.14, "grad_norm": 1.6153968253300508, "learning_rate": 9.683090435872484e-06, "loss": 0.2598, "step": 1965 }, { "epoch": 0.14, "grad_norm": 1.3860988775998824, "learning_rate": 9.6826844608455e-06, "loss": 0.2118, "step": 1966 }, { "epoch": 0.14, "grad_norm": 1.4716406762575418, "learning_rate": 9.68227823447062e-06, "loss": 0.1927, "step": 1967 }, { "epoch": 0.14, "grad_norm": 1.4200399721936707, "learning_rate": 9.681871756769642e-06, "loss": 0.208, "step": 1968 }, { "epoch": 0.14, "grad_norm": 1.7434795267719347, "learning_rate": 9.681465027764391e-06, "loss": 0.2438, "step": 1969 }, { "epoch": 0.14, "grad_norm": 1.2958351514517805, "learning_rate": 9.681058047476696e-06, "loss": 0.186, "step": 1970 }, { "epoch": 0.14, "grad_norm": 1.3496401609989983, "learning_rate": 9.6806508159284e-06, "loss": 0.2105, "step": 1971 }, { "epoch": 0.14, "grad_norm": 1.4094482272930087, "learning_rate": 9.680243333141365e-06, "loss": 0.1977, "step": 1972 }, { "epoch": 0.14, "grad_norm": 1.4193954446257815, "learning_rate": 9.679835599137462e-06, "loss": 0.2458, "step": 1973 }, { "epoch": 0.14, "grad_norm": 1.5013428454716475, "learning_rate": 9.679427613938575e-06, "loss": 0.2398, "step": 1974 }, { "epoch": 0.14, "grad_norm": 1.5175489094371841, "learning_rate": 9.679019377566605e-06, "loss": 0.2572, "step": 1975 }, { "epoch": 0.14, "grad_norm": 1.545944330114262, "learning_rate": 9.678610890043462e-06, "loss": 0.2081, "step": 1976 }, { "epoch": 0.14, "grad_norm": 1.5315826750207684, "learning_rate": 9.678202151391075e-06, "loss": 0.218, "step": 1977 }, { "epoch": 0.14, "grad_norm": 1.3493208140573416, "learning_rate": 9.677793161631381e-06, "loss": 0.2126, "step": 1978 }, { "epoch": 0.14, "grad_norm": 1.3913479211246853, "learning_rate": 9.677383920786335e-06, "loss": 0.245, "step": 1979 }, { "epoch": 0.14, "grad_norm": 1.3725360521474659, "learning_rate": 9.6769744288779e-06, "loss": 0.2038, "step": 1980 }, { "epoch": 0.14, "grad_norm": 1.1460072122737435, "learning_rate": 9.676564685928063e-06, "loss": 0.1857, "step": 1981 }, { "epoch": 0.14, "grad_norm": 1.4165996092298085, "learning_rate": 9.676154691958807e-06, "loss": 0.2542, "step": 1982 }, { "epoch": 0.14, "grad_norm": 1.3913252584818068, "learning_rate": 9.67574444699215e-06, "loss": 0.2647, "step": 1983 }, { "epoch": 0.14, "grad_norm": 1.3721701264030706, "learning_rate": 9.675333951050103e-06, "loss": 0.2327, "step": 1984 }, { "epoch": 0.14, "grad_norm": 1.2781860681468193, "learning_rate": 9.674923204154704e-06, "loss": 0.2235, "step": 1985 }, { "epoch": 0.14, "grad_norm": 1.4157902387139727, "learning_rate": 9.674512206328002e-06, "loss": 0.2058, "step": 1986 }, { "epoch": 0.14, "grad_norm": 1.5593694121486974, "learning_rate": 9.674100957592054e-06, "loss": 0.2096, "step": 1987 }, { "epoch": 0.14, "grad_norm": 1.255870521935859, "learning_rate": 9.673689457968936e-06, "loss": 0.2097, "step": 1988 }, { "epoch": 0.14, "grad_norm": 6.506996381722429, "learning_rate": 9.673277707480735e-06, "loss": 0.7056, "step": 1989 }, { "epoch": 0.14, "grad_norm": 8.450737558060842, "learning_rate": 9.672865706149553e-06, "loss": 0.588, "step": 1990 }, { "epoch": 0.14, "grad_norm": 1.3361370022074133, "learning_rate": 9.672453453997506e-06, "loss": 0.2201, "step": 1991 }, { "epoch": 0.14, "grad_norm": 1.25532987521443, "learning_rate": 9.672040951046717e-06, "loss": 0.2024, "step": 1992 }, { "epoch": 0.14, "grad_norm": 1.4253200274503304, "learning_rate": 9.671628197319333e-06, "loss": 0.2343, "step": 1993 }, { "epoch": 0.14, "grad_norm": 1.5189963947102203, "learning_rate": 9.671215192837507e-06, "loss": 0.2203, "step": 1994 }, { "epoch": 0.14, "grad_norm": 1.6863477956428774, "learning_rate": 9.670801937623406e-06, "loss": 0.2358, "step": 1995 }, { "epoch": 0.14, "grad_norm": 1.6022919721304347, "learning_rate": 9.670388431699214e-06, "loss": 0.2825, "step": 1996 }, { "epoch": 0.14, "grad_norm": 1.3422471162623222, "learning_rate": 9.669974675087124e-06, "loss": 0.2141, "step": 1997 }, { "epoch": 0.14, "grad_norm": 1.3506876497482363, "learning_rate": 9.669560667809348e-06, "loss": 0.2183, "step": 1998 }, { "epoch": 0.14, "grad_norm": 5.841728687217585, "learning_rate": 9.669146409888104e-06, "loss": 0.5604, "step": 1999 }, { "epoch": 0.14, "grad_norm": 1.3005855706238183, "learning_rate": 9.668731901345632e-06, "loss": 0.1854, "step": 2000 }, { "epoch": 0.14, "grad_norm": 1.4462792549244696, "learning_rate": 9.66831714220418e-06, "loss": 0.2029, "step": 2001 }, { "epoch": 0.14, "grad_norm": 1.385734966334063, "learning_rate": 9.667902132486009e-06, "loss": 0.1894, "step": 2002 }, { "epoch": 0.14, "grad_norm": 1.4070308823112274, "learning_rate": 9.667486872213396e-06, "loss": 0.2589, "step": 2003 }, { "epoch": 0.14, "grad_norm": 1.554737030225706, "learning_rate": 9.667071361408633e-06, "loss": 0.2924, "step": 2004 }, { "epoch": 0.14, "grad_norm": 1.4899583573738209, "learning_rate": 9.666655600094019e-06, "loss": 0.2363, "step": 2005 }, { "epoch": 0.14, "grad_norm": 1.390947278730927, "learning_rate": 9.66623958829187e-06, "loss": 0.2298, "step": 2006 }, { "epoch": 0.14, "grad_norm": 1.4999926233597811, "learning_rate": 9.66582332602452e-06, "loss": 0.2058, "step": 2007 }, { "epoch": 0.14, "grad_norm": 1.3877316040988428, "learning_rate": 9.665406813314311e-06, "loss": 0.254, "step": 2008 }, { "epoch": 0.14, "grad_norm": 1.4413783819472263, "learning_rate": 9.664990050183598e-06, "loss": 0.2224, "step": 2009 }, { "epoch": 0.14, "grad_norm": 1.434013906942006, "learning_rate": 9.664573036654751e-06, "loss": 0.2018, "step": 2010 }, { "epoch": 0.14, "grad_norm": 1.4753947352626866, "learning_rate": 9.664155772750157e-06, "loss": 0.2852, "step": 2011 }, { "epoch": 0.14, "grad_norm": 9.176507368366487, "learning_rate": 9.663738258492208e-06, "loss": 0.6371, "step": 2012 }, { "epoch": 0.14, "grad_norm": 1.5175244668684245, "learning_rate": 9.66332049390332e-06, "loss": 0.2448, "step": 2013 }, { "epoch": 0.14, "grad_norm": 1.2154787302290333, "learning_rate": 9.662902479005913e-06, "loss": 0.167, "step": 2014 }, { "epoch": 0.14, "grad_norm": 1.5608394424198524, "learning_rate": 9.662484213822425e-06, "loss": 0.2736, "step": 2015 }, { "epoch": 0.14, "grad_norm": 1.3993343109913665, "learning_rate": 9.662065698375309e-06, "loss": 0.2153, "step": 2016 }, { "epoch": 0.14, "grad_norm": 1.4328077159315211, "learning_rate": 9.661646932687028e-06, "loss": 0.1873, "step": 2017 }, { "epoch": 0.14, "grad_norm": 1.4972597308547662, "learning_rate": 9.661227916780058e-06, "loss": 0.2281, "step": 2018 }, { "epoch": 0.14, "grad_norm": 1.3933492041177549, "learning_rate": 9.660808650676891e-06, "loss": 0.2377, "step": 2019 }, { "epoch": 0.14, "grad_norm": 1.948451495998756, "learning_rate": 9.660389134400034e-06, "loss": 0.2419, "step": 2020 }, { "epoch": 0.14, "grad_norm": 8.076089257238804, "learning_rate": 9.659969367972003e-06, "loss": 0.696, "step": 2021 }, { "epoch": 0.14, "grad_norm": 1.3860670402533746, "learning_rate": 9.659549351415327e-06, "loss": 0.2409, "step": 2022 }, { "epoch": 0.14, "grad_norm": 1.3658511006484433, "learning_rate": 9.659129084752554e-06, "loss": 0.2203, "step": 2023 }, { "epoch": 0.14, "grad_norm": 1.9841676260377752, "learning_rate": 9.658708568006244e-06, "loss": 0.2502, "step": 2024 }, { "epoch": 0.14, "grad_norm": 1.402650641094453, "learning_rate": 9.658287801198963e-06, "loss": 0.2201, "step": 2025 }, { "epoch": 0.14, "grad_norm": 1.5170963103634403, "learning_rate": 9.6578667843533e-06, "loss": 0.2155, "step": 2026 }, { "epoch": 0.14, "grad_norm": 1.6663798867931658, "learning_rate": 9.657445517491852e-06, "loss": 0.275, "step": 2027 }, { "epoch": 0.15, "grad_norm": 1.524758584013468, "learning_rate": 9.657024000637235e-06, "loss": 0.2107, "step": 2028 }, { "epoch": 0.15, "grad_norm": 1.4588984892968944, "learning_rate": 9.656602233812068e-06, "loss": 0.2032, "step": 2029 }, { "epoch": 0.15, "grad_norm": 1.229226181325648, "learning_rate": 9.656180217038994e-06, "loss": 0.1808, "step": 2030 }, { "epoch": 0.15, "grad_norm": 1.5829505974249733, "learning_rate": 9.655757950340662e-06, "loss": 0.3236, "step": 2031 }, { "epoch": 0.15, "grad_norm": 4.961639738709765, "learning_rate": 9.655335433739742e-06, "loss": 0.6644, "step": 2032 }, { "epoch": 0.15, "grad_norm": 1.5655971095139682, "learning_rate": 9.65491266725891e-06, "loss": 0.232, "step": 2033 }, { "epoch": 0.15, "grad_norm": 1.4092100892962622, "learning_rate": 9.65448965092086e-06, "loss": 0.2122, "step": 2034 }, { "epoch": 0.15, "grad_norm": 1.6776571274439038, "learning_rate": 9.654066384748296e-06, "loss": 0.2329, "step": 2035 }, { "epoch": 0.15, "grad_norm": 7.365196581000431, "learning_rate": 9.653642868763937e-06, "loss": 0.6796, "step": 2036 }, { "epoch": 0.15, "grad_norm": 1.6792110434028609, "learning_rate": 9.653219102990517e-06, "loss": 0.2155, "step": 2037 }, { "epoch": 0.15, "grad_norm": 1.479195429892465, "learning_rate": 9.652795087450781e-06, "loss": 0.1914, "step": 2038 }, { "epoch": 0.15, "grad_norm": 1.4301722559567636, "learning_rate": 9.652370822167491e-06, "loss": 0.2001, "step": 2039 }, { "epoch": 0.15, "grad_norm": 1.3291237840731893, "learning_rate": 9.651946307163417e-06, "loss": 0.2065, "step": 2040 }, { "epoch": 0.15, "grad_norm": 1.6486920412271882, "learning_rate": 9.651521542461348e-06, "loss": 0.2274, "step": 2041 }, { "epoch": 0.15, "grad_norm": 5.71291968455386, "learning_rate": 9.651096528084082e-06, "loss": 0.6185, "step": 2042 }, { "epoch": 0.15, "grad_norm": 1.613592932321471, "learning_rate": 9.650671264054432e-06, "loss": 0.1721, "step": 2043 }, { "epoch": 0.15, "grad_norm": 1.7377470959957462, "learning_rate": 9.650245750395224e-06, "loss": 0.2261, "step": 2044 }, { "epoch": 0.15, "grad_norm": 4.917527601020814, "learning_rate": 9.649819987129299e-06, "loss": 0.6601, "step": 2045 }, { "epoch": 0.15, "grad_norm": 1.6884395572607929, "learning_rate": 9.649393974279509e-06, "loss": 0.2185, "step": 2046 }, { "epoch": 0.15, "grad_norm": 4.09780037709546, "learning_rate": 9.648967711868722e-06, "loss": 0.5147, "step": 2047 }, { "epoch": 0.15, "grad_norm": 1.364056002031816, "learning_rate": 9.648541199919818e-06, "loss": 0.2168, "step": 2048 }, { "epoch": 0.15, "grad_norm": 1.4875827416470355, "learning_rate": 9.648114438455689e-06, "loss": 0.2507, "step": 2049 }, { "epoch": 0.15, "grad_norm": 1.3779775470439601, "learning_rate": 9.647687427499243e-06, "loss": 0.2555, "step": 2050 }, { "epoch": 0.15, "grad_norm": 5.59606789347987, "learning_rate": 9.647260167073402e-06, "loss": 0.6518, "step": 2051 }, { "epoch": 0.15, "grad_norm": 1.5265644765208641, "learning_rate": 9.646832657201097e-06, "loss": 0.2329, "step": 2052 }, { "epoch": 0.15, "grad_norm": 1.556613570163405, "learning_rate": 9.646404897905276e-06, "loss": 0.2526, "step": 2053 }, { "epoch": 0.15, "grad_norm": 1.4951310932493223, "learning_rate": 9.645976889208896e-06, "loss": 0.2319, "step": 2054 }, { "epoch": 0.15, "grad_norm": 1.514169998107707, "learning_rate": 9.645548631134938e-06, "loss": 0.223, "step": 2055 }, { "epoch": 0.15, "grad_norm": 1.378561275734485, "learning_rate": 9.645120123706384e-06, "loss": 0.1908, "step": 2056 }, { "epoch": 0.15, "grad_norm": 1.529367858055682, "learning_rate": 9.644691366946238e-06, "loss": 0.2378, "step": 2057 }, { "epoch": 0.15, "grad_norm": 4.749999058452197, "learning_rate": 9.644262360877507e-06, "loss": 0.4636, "step": 2058 }, { "epoch": 0.15, "grad_norm": 1.6691840195675718, "learning_rate": 9.643833105523227e-06, "loss": 0.234, "step": 2059 }, { "epoch": 0.15, "grad_norm": 1.9044867492531274, "learning_rate": 9.643403600906433e-06, "loss": 0.2205, "step": 2060 }, { "epoch": 0.15, "grad_norm": 1.3131553159943004, "learning_rate": 9.642973847050183e-06, "loss": 0.1943, "step": 2061 }, { "epoch": 0.15, "grad_norm": 1.5074633831157127, "learning_rate": 9.64254384397754e-06, "loss": 0.231, "step": 2062 }, { "epoch": 0.15, "grad_norm": 1.3843823957738763, "learning_rate": 9.64211359171159e-06, "loss": 0.2478, "step": 2063 }, { "epoch": 0.15, "grad_norm": 1.3891638643362163, "learning_rate": 9.641683090275423e-06, "loss": 0.2428, "step": 2064 }, { "epoch": 0.15, "grad_norm": 1.559516381594344, "learning_rate": 9.641252339692146e-06, "loss": 0.2612, "step": 2065 }, { "epoch": 0.15, "grad_norm": 1.515013793356278, "learning_rate": 9.640821339984885e-06, "loss": 0.1976, "step": 2066 }, { "epoch": 0.15, "grad_norm": 3.866356190073929, "learning_rate": 9.640390091176772e-06, "loss": 0.46, "step": 2067 }, { "epoch": 0.15, "grad_norm": 1.5610310042587374, "learning_rate": 9.639958593290952e-06, "loss": 0.2414, "step": 2068 }, { "epoch": 0.15, "grad_norm": 1.4735061792704163, "learning_rate": 9.63952684635059e-06, "loss": 0.2261, "step": 2069 }, { "epoch": 0.15, "grad_norm": 1.3647505077613498, "learning_rate": 9.639094850378859e-06, "loss": 0.2069, "step": 2070 }, { "epoch": 0.15, "grad_norm": 1.4755872866428412, "learning_rate": 9.638662605398944e-06, "loss": 0.2249, "step": 2071 }, { "epoch": 0.15, "grad_norm": 1.3241105884689077, "learning_rate": 9.638230111434051e-06, "loss": 0.1963, "step": 2072 }, { "epoch": 0.15, "grad_norm": 1.3181839956390904, "learning_rate": 9.63779736850739e-06, "loss": 0.2152, "step": 2073 }, { "epoch": 0.15, "grad_norm": 1.4283543373259668, "learning_rate": 9.637364376642194e-06, "loss": 0.2011, "step": 2074 }, { "epoch": 0.15, "grad_norm": 1.3601027013785114, "learning_rate": 9.6369311358617e-06, "loss": 0.2161, "step": 2075 }, { "epoch": 0.15, "grad_norm": 1.503669911642347, "learning_rate": 9.636497646189165e-06, "loss": 0.2513, "step": 2076 }, { "epoch": 0.15, "grad_norm": 1.570466148709927, "learning_rate": 9.636063907647856e-06, "loss": 0.2315, "step": 2077 }, { "epoch": 0.15, "grad_norm": 1.3434407784692561, "learning_rate": 9.635629920261054e-06, "loss": 0.1897, "step": 2078 }, { "epoch": 0.15, "grad_norm": 1.229893335694586, "learning_rate": 9.635195684052055e-06, "loss": 0.2284, "step": 2079 }, { "epoch": 0.15, "grad_norm": 1.5305370019155835, "learning_rate": 9.634761199044165e-06, "loss": 0.2025, "step": 2080 }, { "epoch": 0.15, "grad_norm": 1.3285144875002792, "learning_rate": 9.634326465260708e-06, "loss": 0.1952, "step": 2081 }, { "epoch": 0.15, "grad_norm": 1.2965342054696503, "learning_rate": 9.633891482725017e-06, "loss": 0.1583, "step": 2082 }, { "epoch": 0.15, "grad_norm": 5.305288213156061, "learning_rate": 9.633456251460443e-06, "loss": 0.7332, "step": 2083 }, { "epoch": 0.15, "grad_norm": 1.6089621017595082, "learning_rate": 9.633020771490343e-06, "loss": 0.2435, "step": 2084 }, { "epoch": 0.15, "grad_norm": 1.691289866192461, "learning_rate": 9.632585042838092e-06, "loss": 0.2419, "step": 2085 }, { "epoch": 0.15, "grad_norm": 1.6023878068408524, "learning_rate": 9.632149065527083e-06, "loss": 0.2501, "step": 2086 }, { "epoch": 0.15, "grad_norm": 1.3745010211116593, "learning_rate": 9.631712839580713e-06, "loss": 0.2383, "step": 2087 }, { "epoch": 0.15, "grad_norm": 1.3786220071975555, "learning_rate": 9.631276365022402e-06, "loss": 0.2281, "step": 2088 }, { "epoch": 0.15, "grad_norm": 1.3901687473012596, "learning_rate": 9.630839641875571e-06, "loss": 0.2288, "step": 2089 }, { "epoch": 0.15, "grad_norm": 9.42254037784379, "learning_rate": 9.630402670163668e-06, "loss": 0.6686, "step": 2090 }, { "epoch": 0.15, "grad_norm": 1.2932780366574195, "learning_rate": 9.629965449910144e-06, "loss": 0.1824, "step": 2091 }, { "epoch": 0.15, "grad_norm": 1.4673187630162479, "learning_rate": 9.629527981138468e-06, "loss": 0.2451, "step": 2092 }, { "epoch": 0.15, "grad_norm": 1.8261084867685116, "learning_rate": 9.629090263872124e-06, "loss": 0.241, "step": 2093 }, { "epoch": 0.15, "grad_norm": 1.3730726716899075, "learning_rate": 9.628652298134601e-06, "loss": 0.2222, "step": 2094 }, { "epoch": 0.15, "grad_norm": 5.145929770886053, "learning_rate": 9.628214083949415e-06, "loss": 0.5604, "step": 2095 }, { "epoch": 0.15, "grad_norm": 1.4894372629415926, "learning_rate": 9.627775621340083e-06, "loss": 0.2757, "step": 2096 }, { "epoch": 0.15, "grad_norm": 1.373150962284338, "learning_rate": 9.62733691033014e-06, "loss": 0.2243, "step": 2097 }, { "epoch": 0.15, "grad_norm": 1.3972287139307273, "learning_rate": 9.626897950943137e-06, "loss": 0.2046, "step": 2098 }, { "epoch": 0.15, "grad_norm": 1.6202438846569491, "learning_rate": 9.626458743202632e-06, "loss": 0.2971, "step": 2099 }, { "epoch": 0.15, "grad_norm": 1.4444902930510832, "learning_rate": 9.626019287132202e-06, "loss": 0.225, "step": 2100 }, { "epoch": 0.15, "grad_norm": 1.3667574563987765, "learning_rate": 9.625579582755436e-06, "loss": 0.2324, "step": 2101 }, { "epoch": 0.15, "grad_norm": 1.7630522812761482, "learning_rate": 9.625139630095933e-06, "loss": 0.2863, "step": 2102 }, { "epoch": 0.15, "grad_norm": 5.046378017571318, "learning_rate": 9.624699429177308e-06, "loss": 0.7104, "step": 2103 }, { "epoch": 0.15, "grad_norm": 1.5391312427463832, "learning_rate": 9.624258980023194e-06, "loss": 0.257, "step": 2104 }, { "epoch": 0.15, "grad_norm": 1.4103236786796092, "learning_rate": 9.623818282657227e-06, "loss": 0.2571, "step": 2105 }, { "epoch": 0.15, "grad_norm": 1.325287397409188, "learning_rate": 9.623377337103066e-06, "loss": 0.2191, "step": 2106 }, { "epoch": 0.15, "grad_norm": 1.5456451763862133, "learning_rate": 9.622936143384374e-06, "loss": 0.2527, "step": 2107 }, { "epoch": 0.15, "grad_norm": 1.421445026338252, "learning_rate": 9.622494701524838e-06, "loss": 0.262, "step": 2108 }, { "epoch": 0.15, "grad_norm": 1.3826173054696445, "learning_rate": 9.622053011548151e-06, "loss": 0.2099, "step": 2109 }, { "epoch": 0.15, "grad_norm": 1.3288567571280328, "learning_rate": 9.62161107347802e-06, "loss": 0.2145, "step": 2110 }, { "epoch": 0.15, "grad_norm": 1.3770629542494632, "learning_rate": 9.621168887338168e-06, "loss": 0.2492, "step": 2111 }, { "epoch": 0.15, "grad_norm": 4.929837125462641, "learning_rate": 9.620726453152327e-06, "loss": 0.6346, "step": 2112 }, { "epoch": 0.15, "grad_norm": 1.3735618465396864, "learning_rate": 9.62028377094425e-06, "loss": 0.2055, "step": 2113 }, { "epoch": 0.15, "grad_norm": 1.9083863886016605, "learning_rate": 9.619840840737693e-06, "loss": 0.2497, "step": 2114 }, { "epoch": 0.15, "grad_norm": 1.4099356283735647, "learning_rate": 9.619397662556434e-06, "loss": 0.2478, "step": 2115 }, { "epoch": 0.15, "grad_norm": 1.3228810757601803, "learning_rate": 9.618954236424261e-06, "loss": 0.2158, "step": 2116 }, { "epoch": 0.15, "grad_norm": 1.5925570223070498, "learning_rate": 9.618510562364976e-06, "loss": 0.2082, "step": 2117 }, { "epoch": 0.15, "grad_norm": 1.388580412686704, "learning_rate": 9.618066640402391e-06, "loss": 0.2177, "step": 2118 }, { "epoch": 0.15, "grad_norm": 1.3804031371423213, "learning_rate": 9.617622470560334e-06, "loss": 0.1707, "step": 2119 }, { "epoch": 0.15, "grad_norm": 1.4422215330788848, "learning_rate": 9.617178052862648e-06, "loss": 0.1889, "step": 2120 }, { "epoch": 0.15, "grad_norm": 7.392170256819738, "learning_rate": 9.616733387333189e-06, "loss": 0.7937, "step": 2121 }, { "epoch": 0.15, "grad_norm": 1.3966604831005234, "learning_rate": 9.616288473995822e-06, "loss": 0.1766, "step": 2122 }, { "epoch": 0.15, "grad_norm": 1.466240918188498, "learning_rate": 9.61584331287443e-06, "loss": 0.1906, "step": 2123 }, { "epoch": 0.15, "grad_norm": 1.4200473256976338, "learning_rate": 9.615397903992906e-06, "loss": 0.1935, "step": 2124 }, { "epoch": 0.15, "grad_norm": 1.6893118449916187, "learning_rate": 9.614952247375158e-06, "loss": 0.2318, "step": 2125 }, { "epoch": 0.15, "grad_norm": 5.4697299949041565, "learning_rate": 9.614506343045109e-06, "loss": 0.7798, "step": 2126 }, { "epoch": 0.15, "grad_norm": 1.484481472000296, "learning_rate": 9.614060191026691e-06, "loss": 0.1986, "step": 2127 }, { "epoch": 0.15, "grad_norm": 1.4329206635683895, "learning_rate": 9.613613791343852e-06, "loss": 0.2473, "step": 2128 }, { "epoch": 0.15, "grad_norm": 1.3978220860278685, "learning_rate": 9.613167144020554e-06, "loss": 0.2318, "step": 2129 }, { "epoch": 0.15, "grad_norm": 1.385632726926145, "learning_rate": 9.612720249080772e-06, "loss": 0.1472, "step": 2130 }, { "epoch": 0.15, "grad_norm": 1.4644422568869802, "learning_rate": 9.612273106548492e-06, "loss": 0.2431, "step": 2131 }, { "epoch": 0.15, "grad_norm": 5.921743720429038, "learning_rate": 9.611825716447716e-06, "loss": 0.7197, "step": 2132 }, { "epoch": 0.15, "grad_norm": 1.3735798927026437, "learning_rate": 9.611378078802456e-06, "loss": 0.1952, "step": 2133 }, { "epoch": 0.15, "grad_norm": 1.3420663158371056, "learning_rate": 9.610930193636741e-06, "loss": 0.22, "step": 2134 }, { "epoch": 0.15, "grad_norm": 1.3740066697000843, "learning_rate": 9.61048206097461e-06, "loss": 0.2451, "step": 2135 }, { "epoch": 0.15, "grad_norm": 6.5179349080410685, "learning_rate": 9.61003368084012e-06, "loss": 0.5987, "step": 2136 }, { "epoch": 0.15, "grad_norm": 1.2803752877068484, "learning_rate": 9.609585053257338e-06, "loss": 0.1842, "step": 2137 }, { "epoch": 0.15, "grad_norm": 1.2988091439402814, "learning_rate": 9.609136178250341e-06, "loss": 0.19, "step": 2138 }, { "epoch": 0.15, "grad_norm": 1.5157033116022467, "learning_rate": 9.608687055843227e-06, "loss": 0.2495, "step": 2139 }, { "epoch": 0.15, "grad_norm": 1.4391736210882573, "learning_rate": 9.608237686060099e-06, "loss": 0.2192, "step": 2140 }, { "epoch": 0.15, "grad_norm": 1.3465171887198473, "learning_rate": 9.607788068925082e-06, "loss": 0.1989, "step": 2141 }, { "epoch": 0.15, "grad_norm": 1.6297583298179183, "learning_rate": 9.607338204462304e-06, "loss": 0.2385, "step": 2142 }, { "epoch": 0.15, "grad_norm": 1.4628956113042402, "learning_rate": 9.606888092695918e-06, "loss": 0.2199, "step": 2143 }, { "epoch": 0.15, "grad_norm": 1.4508188429794666, "learning_rate": 9.60643773365008e-06, "loss": 0.2295, "step": 2144 }, { "epoch": 0.15, "grad_norm": 5.1619909242988555, "learning_rate": 9.605987127348966e-06, "loss": 0.6645, "step": 2145 }, { "epoch": 0.15, "grad_norm": 1.5262800427538348, "learning_rate": 9.605536273816761e-06, "loss": 0.2347, "step": 2146 }, { "epoch": 0.15, "grad_norm": 1.403326403440822, "learning_rate": 9.605085173077665e-06, "loss": 0.1822, "step": 2147 }, { "epoch": 0.15, "grad_norm": 1.6236197888154558, "learning_rate": 9.604633825155894e-06, "loss": 0.2784, "step": 2148 }, { "epoch": 0.15, "grad_norm": 1.5668373164226215, "learning_rate": 9.604182230075671e-06, "loss": 0.2576, "step": 2149 }, { "epoch": 0.15, "grad_norm": 1.5103560692643814, "learning_rate": 9.603730387861236e-06, "loss": 0.2306, "step": 2150 }, { "epoch": 0.15, "grad_norm": 1.446110275513765, "learning_rate": 9.603278298536845e-06, "loss": 0.2435, "step": 2151 }, { "epoch": 0.15, "grad_norm": 1.7267610687185202, "learning_rate": 9.602825962126763e-06, "loss": 0.3105, "step": 2152 }, { "epoch": 0.15, "grad_norm": 1.4063152208624514, "learning_rate": 9.60237337865527e-06, "loss": 0.1869, "step": 2153 }, { "epoch": 0.15, "grad_norm": 1.467851398019143, "learning_rate": 9.601920548146657e-06, "loss": 0.2376, "step": 2154 }, { "epoch": 0.15, "grad_norm": 3.9872533928032836, "learning_rate": 9.601467470625232e-06, "loss": 0.4816, "step": 2155 }, { "epoch": 0.15, "grad_norm": 1.555080988945281, "learning_rate": 9.601014146115313e-06, "loss": 0.2054, "step": 2156 }, { "epoch": 0.15, "grad_norm": 1.5961190183763232, "learning_rate": 9.600560574641234e-06, "loss": 0.2371, "step": 2157 }, { "epoch": 0.15, "grad_norm": 1.4573350717067362, "learning_rate": 9.60010675622734e-06, "loss": 0.2573, "step": 2158 }, { "epoch": 0.15, "grad_norm": 1.5400737107352143, "learning_rate": 9.599652690897991e-06, "loss": 0.2676, "step": 2159 }, { "epoch": 0.15, "grad_norm": 1.4364573214618583, "learning_rate": 9.599198378677559e-06, "loss": 0.2496, "step": 2160 }, { "epoch": 0.15, "grad_norm": 1.4147367597953493, "learning_rate": 9.598743819590431e-06, "loss": 0.2285, "step": 2161 }, { "epoch": 0.15, "grad_norm": 1.7288629502779715, "learning_rate": 9.598289013661002e-06, "loss": 0.2787, "step": 2162 }, { "epoch": 0.15, "grad_norm": 1.510985342804304, "learning_rate": 9.59783396091369e-06, "loss": 0.2115, "step": 2163 }, { "epoch": 0.15, "grad_norm": 1.5818735681933305, "learning_rate": 9.597378661372914e-06, "loss": 0.2377, "step": 2164 }, { "epoch": 0.15, "grad_norm": 1.4556521929735387, "learning_rate": 9.596923115063116e-06, "loss": 0.201, "step": 2165 }, { "epoch": 0.15, "grad_norm": 8.947415319075834, "learning_rate": 9.596467322008752e-06, "loss": 0.7998, "step": 2166 }, { "epoch": 0.16, "grad_norm": 1.377206368544645, "learning_rate": 9.59601128223428e-06, "loss": 0.1867, "step": 2167 }, { "epoch": 0.16, "grad_norm": 1.3843544245966157, "learning_rate": 9.59555499576418e-06, "loss": 0.2023, "step": 2168 }, { "epoch": 0.16, "grad_norm": 1.2793364459015888, "learning_rate": 9.595098462622947e-06, "loss": 0.2068, "step": 2169 }, { "epoch": 0.16, "grad_norm": 1.334474245759089, "learning_rate": 9.594641682835082e-06, "loss": 0.194, "step": 2170 }, { "epoch": 0.16, "grad_norm": 1.4099435673567542, "learning_rate": 9.594184656425108e-06, "loss": 0.2998, "step": 2171 }, { "epoch": 0.16, "grad_norm": 1.4805270547796043, "learning_rate": 9.593727383417551e-06, "loss": 0.2253, "step": 2172 }, { "epoch": 0.16, "grad_norm": 1.4647741759507256, "learning_rate": 9.593269863836958e-06, "loss": 0.2541, "step": 2173 }, { "epoch": 0.16, "grad_norm": 1.471328968626989, "learning_rate": 9.592812097707887e-06, "loss": 0.2403, "step": 2174 }, { "epoch": 0.16, "grad_norm": 1.5119445369724385, "learning_rate": 9.592354085054909e-06, "loss": 0.2601, "step": 2175 }, { "epoch": 0.16, "grad_norm": 1.620294134942793, "learning_rate": 9.591895825902608e-06, "loss": 0.2765, "step": 2176 }, { "epoch": 0.16, "grad_norm": 1.4184184102522184, "learning_rate": 9.591437320275582e-06, "loss": 0.2414, "step": 2177 }, { "epoch": 0.16, "grad_norm": 1.3041445292109501, "learning_rate": 9.590978568198442e-06, "loss": 0.1913, "step": 2178 }, { "epoch": 0.16, "grad_norm": 23.208818809455042, "learning_rate": 9.590519569695811e-06, "loss": 0.591, "step": 2179 }, { "epoch": 0.16, "grad_norm": 1.4886497907249971, "learning_rate": 9.590060324792328e-06, "loss": 0.2725, "step": 2180 }, { "epoch": 0.16, "grad_norm": 1.3647301572498627, "learning_rate": 9.58960083351264e-06, "loss": 0.2245, "step": 2181 }, { "epoch": 0.16, "grad_norm": 1.4477646794058292, "learning_rate": 9.589141095881414e-06, "loss": 0.2022, "step": 2182 }, { "epoch": 0.16, "grad_norm": 1.6160086935823856, "learning_rate": 9.588681111923325e-06, "loss": 0.2522, "step": 2183 }, { "epoch": 0.16, "grad_norm": 1.4444482884581678, "learning_rate": 9.588220881663065e-06, "loss": 0.2176, "step": 2184 }, { "epoch": 0.16, "grad_norm": 1.3620014487010388, "learning_rate": 9.587760405125334e-06, "loss": 0.1712, "step": 2185 }, { "epoch": 0.16, "grad_norm": 1.359346968575899, "learning_rate": 9.587299682334852e-06, "loss": 0.206, "step": 2186 }, { "epoch": 0.16, "grad_norm": 1.4371545126014544, "learning_rate": 9.586838713316347e-06, "loss": 0.238, "step": 2187 }, { "epoch": 0.16, "grad_norm": 1.354100736177541, "learning_rate": 9.586377498094562e-06, "loss": 0.1835, "step": 2188 }, { "epoch": 0.16, "grad_norm": 5.780242886017275, "learning_rate": 9.585916036694255e-06, "loss": 0.8067, "step": 2189 }, { "epoch": 0.16, "grad_norm": 1.4978420944151323, "learning_rate": 9.585454329140194e-06, "loss": 0.2111, "step": 2190 }, { "epoch": 0.16, "grad_norm": 1.393404039702627, "learning_rate": 9.58499237545716e-06, "loss": 0.2367, "step": 2191 }, { "epoch": 0.16, "grad_norm": 1.399419646336928, "learning_rate": 9.58453017566995e-06, "loss": 0.2605, "step": 2192 }, { "epoch": 0.16, "grad_norm": 1.4901533352427132, "learning_rate": 9.584067729803376e-06, "loss": 0.2039, "step": 2193 }, { "epoch": 0.16, "grad_norm": 1.7904135191761406, "learning_rate": 9.583605037882257e-06, "loss": 0.261, "step": 2194 }, { "epoch": 0.16, "grad_norm": 1.4080158745645563, "learning_rate": 9.583142099931429e-06, "loss": 0.2319, "step": 2195 }, { "epoch": 0.16, "grad_norm": 4.299051816783134, "learning_rate": 9.582678915975741e-06, "loss": 0.4772, "step": 2196 }, { "epoch": 0.16, "grad_norm": 1.2750240411130134, "learning_rate": 9.582215486040054e-06, "loss": 0.179, "step": 2197 }, { "epoch": 0.16, "grad_norm": 1.2674226676161087, "learning_rate": 9.581751810149244e-06, "loss": 0.1828, "step": 2198 }, { "epoch": 0.16, "grad_norm": 1.5105202069001844, "learning_rate": 9.581287888328201e-06, "loss": 0.2369, "step": 2199 }, { "epoch": 0.16, "grad_norm": 1.4065515902343977, "learning_rate": 9.580823720601824e-06, "loss": 0.2012, "step": 2200 }, { "epoch": 0.16, "grad_norm": 6.308827828233978, "learning_rate": 9.580359306995027e-06, "loss": 0.5322, "step": 2201 }, { "epoch": 0.16, "grad_norm": 1.4106749700153844, "learning_rate": 9.579894647532739e-06, "loss": 0.2052, "step": 2202 }, { "epoch": 0.16, "grad_norm": 1.339725912282564, "learning_rate": 9.579429742239902e-06, "loss": 0.207, "step": 2203 }, { "epoch": 0.16, "grad_norm": 10.805895094178684, "learning_rate": 9.57896459114147e-06, "loss": 0.4639, "step": 2204 }, { "epoch": 0.16, "grad_norm": 5.3076711726276855, "learning_rate": 9.578499194262411e-06, "loss": 0.6018, "step": 2205 }, { "epoch": 0.16, "grad_norm": 1.400498012085324, "learning_rate": 9.578033551627704e-06, "loss": 0.2205, "step": 2206 }, { "epoch": 0.16, "grad_norm": 4.771131356279034, "learning_rate": 9.577567663262342e-06, "loss": 0.689, "step": 2207 }, { "epoch": 0.16, "grad_norm": 1.3879651581087789, "learning_rate": 9.577101529191335e-06, "loss": 0.1982, "step": 2208 }, { "epoch": 0.16, "grad_norm": 1.3198941720292972, "learning_rate": 9.576635149439702e-06, "loss": 0.213, "step": 2209 }, { "epoch": 0.16, "grad_norm": 1.4885534014756785, "learning_rate": 9.576168524032475e-06, "loss": 0.2252, "step": 2210 }, { "epoch": 0.16, "grad_norm": 1.7797932984174016, "learning_rate": 9.575701652994702e-06, "loss": 0.2704, "step": 2211 }, { "epoch": 0.16, "grad_norm": 1.3779436149619189, "learning_rate": 9.575234536351443e-06, "loss": 0.2143, "step": 2212 }, { "epoch": 0.16, "grad_norm": 1.3806700320484169, "learning_rate": 9.57476717412777e-06, "loss": 0.1828, "step": 2213 }, { "epoch": 0.16, "grad_norm": 1.4395511285494844, "learning_rate": 9.57429956634877e-06, "loss": 0.1982, "step": 2214 }, { "epoch": 0.16, "grad_norm": 1.4340096249209484, "learning_rate": 9.573831713039543e-06, "loss": 0.2749, "step": 2215 }, { "epoch": 0.16, "grad_norm": 1.3586778787873939, "learning_rate": 9.573363614225199e-06, "loss": 0.2008, "step": 2216 }, { "epoch": 0.16, "grad_norm": 1.4606402869453705, "learning_rate": 9.572895269930865e-06, "loss": 0.2245, "step": 2217 }, { "epoch": 0.16, "grad_norm": 1.3884084579078353, "learning_rate": 9.572426680181681e-06, "loss": 0.2175, "step": 2218 }, { "epoch": 0.16, "grad_norm": 1.312240645966458, "learning_rate": 9.571957845002797e-06, "loss": 0.1707, "step": 2219 }, { "epoch": 0.16, "grad_norm": 1.4394167967969698, "learning_rate": 9.571488764419381e-06, "loss": 0.2166, "step": 2220 }, { "epoch": 0.16, "grad_norm": 1.4679816795649474, "learning_rate": 9.571019438456609e-06, "loss": 0.2219, "step": 2221 }, { "epoch": 0.16, "grad_norm": 1.4034172922758295, "learning_rate": 9.570549867139673e-06, "loss": 0.1969, "step": 2222 }, { "epoch": 0.16, "grad_norm": 1.6579497121557165, "learning_rate": 9.570080050493779e-06, "loss": 0.2022, "step": 2223 }, { "epoch": 0.16, "grad_norm": 1.3932417901644483, "learning_rate": 9.569609988544142e-06, "loss": 0.2058, "step": 2224 }, { "epoch": 0.16, "grad_norm": 3.8150973154133854, "learning_rate": 9.569139681315996e-06, "loss": 0.5729, "step": 2225 }, { "epoch": 0.16, "grad_norm": 1.6828550068840442, "learning_rate": 9.568669128834584e-06, "loss": 0.2496, "step": 2226 }, { "epoch": 0.16, "grad_norm": 1.4118980168705837, "learning_rate": 9.568198331125163e-06, "loss": 0.2049, "step": 2227 }, { "epoch": 0.16, "grad_norm": 1.4423502717163474, "learning_rate": 9.567727288213005e-06, "loss": 0.2388, "step": 2228 }, { "epoch": 0.16, "grad_norm": 1.5837169968597165, "learning_rate": 9.567256000123393e-06, "loss": 0.2616, "step": 2229 }, { "epoch": 0.16, "grad_norm": 1.422403736377754, "learning_rate": 9.566784466881622e-06, "loss": 0.2369, "step": 2230 }, { "epoch": 0.16, "grad_norm": 1.39328323126279, "learning_rate": 9.566312688513006e-06, "loss": 0.2045, "step": 2231 }, { "epoch": 0.16, "grad_norm": 1.265500887789139, "learning_rate": 9.565840665042865e-06, "loss": 0.2155, "step": 2232 }, { "epoch": 0.16, "grad_norm": 1.6337787653184104, "learning_rate": 9.565368396496535e-06, "loss": 0.2264, "step": 2233 }, { "epoch": 0.16, "grad_norm": 1.4370524952815555, "learning_rate": 9.564895882899368e-06, "loss": 0.2311, "step": 2234 }, { "epoch": 0.16, "grad_norm": 1.4402081614968072, "learning_rate": 9.564423124276726e-06, "loss": 0.2275, "step": 2235 }, { "epoch": 0.16, "grad_norm": 1.581527249382296, "learning_rate": 9.563950120653982e-06, "loss": 0.2116, "step": 2236 }, { "epoch": 0.16, "grad_norm": 1.4870635290804242, "learning_rate": 9.563476872056527e-06, "loss": 0.2483, "step": 2237 }, { "epoch": 0.16, "grad_norm": 1.4491257163556845, "learning_rate": 9.563003378509766e-06, "loss": 0.2012, "step": 2238 }, { "epoch": 0.16, "grad_norm": 1.4315753392964596, "learning_rate": 9.56252964003911e-06, "loss": 0.2164, "step": 2239 }, { "epoch": 0.16, "grad_norm": 10.663032144229193, "learning_rate": 9.562055656669988e-06, "loss": 0.4086, "step": 2240 }, { "epoch": 0.16, "grad_norm": 1.49489835845098, "learning_rate": 9.561581428427845e-06, "loss": 0.2615, "step": 2241 }, { "epoch": 0.16, "grad_norm": 1.5743286836998294, "learning_rate": 9.56110695533813e-06, "loss": 0.2424, "step": 2242 }, { "epoch": 0.16, "grad_norm": 1.4463748757917514, "learning_rate": 9.560632237426315e-06, "loss": 0.2166, "step": 2243 }, { "epoch": 0.16, "grad_norm": 1.4105702113471734, "learning_rate": 9.56015727471788e-06, "loss": 0.2005, "step": 2244 }, { "epoch": 0.16, "grad_norm": 1.3193286373739412, "learning_rate": 9.55968206723832e-06, "loss": 0.19, "step": 2245 }, { "epoch": 0.16, "grad_norm": 1.5854581037097646, "learning_rate": 9.559206615013142e-06, "loss": 0.2252, "step": 2246 }, { "epoch": 0.16, "grad_norm": 1.3729059019083576, "learning_rate": 9.558730918067862e-06, "loss": 0.2139, "step": 2247 }, { "epoch": 0.16, "grad_norm": 1.4088530824548053, "learning_rate": 9.558254976428022e-06, "loss": 0.2354, "step": 2248 }, { "epoch": 0.16, "grad_norm": 1.6816387302356723, "learning_rate": 9.55777879011916e-06, "loss": 0.2182, "step": 2249 }, { "epoch": 0.16, "grad_norm": 10.606334913339843, "learning_rate": 9.55730235916684e-06, "loss": 0.8392, "step": 2250 }, { "epoch": 0.16, "grad_norm": 1.3951464900498858, "learning_rate": 9.556825683596635e-06, "loss": 0.1977, "step": 2251 }, { "epoch": 0.16, "grad_norm": 1.3480952096502246, "learning_rate": 9.556348763434133e-06, "loss": 0.2074, "step": 2252 }, { "epoch": 0.16, "grad_norm": 1.589784112360952, "learning_rate": 9.555871598704929e-06, "loss": 0.2202, "step": 2253 }, { "epoch": 0.16, "grad_norm": 1.3191573243599104, "learning_rate": 9.555394189434636e-06, "loss": 0.1743, "step": 2254 }, { "epoch": 0.16, "grad_norm": 1.3393415736325, "learning_rate": 9.554916535648882e-06, "loss": 0.2058, "step": 2255 }, { "epoch": 0.16, "grad_norm": 1.5263805213583772, "learning_rate": 9.554438637373304e-06, "loss": 0.2283, "step": 2256 }, { "epoch": 0.16, "grad_norm": 1.5427083119975071, "learning_rate": 9.553960494633552e-06, "loss": 0.2608, "step": 2257 }, { "epoch": 0.16, "grad_norm": 1.473010941163431, "learning_rate": 9.553482107455296e-06, "loss": 0.2608, "step": 2258 }, { "epoch": 0.16, "grad_norm": 1.3096066081261102, "learning_rate": 9.553003475864207e-06, "loss": 0.2059, "step": 2259 }, { "epoch": 0.16, "grad_norm": 1.5464659159062626, "learning_rate": 9.552524599885982e-06, "loss": 0.2202, "step": 2260 }, { "epoch": 0.16, "grad_norm": 1.4793049959418219, "learning_rate": 9.552045479546322e-06, "loss": 0.2254, "step": 2261 }, { "epoch": 0.16, "grad_norm": 1.4201878945966728, "learning_rate": 9.551566114870945e-06, "loss": 0.237, "step": 2262 }, { "epoch": 0.16, "grad_norm": 1.6063762767942602, "learning_rate": 9.55108650588558e-06, "loss": 0.2365, "step": 2263 }, { "epoch": 0.16, "grad_norm": 4.6444395662999645, "learning_rate": 9.550606652615973e-06, "loss": 0.585, "step": 2264 }, { "epoch": 0.16, "grad_norm": 5.603321336434924, "learning_rate": 9.55012655508788e-06, "loss": 0.6692, "step": 2265 }, { "epoch": 0.16, "grad_norm": 1.3341201529460056, "learning_rate": 9.549646213327069e-06, "loss": 0.2236, "step": 2266 }, { "epoch": 0.16, "grad_norm": 1.3508297180273938, "learning_rate": 9.549165627359324e-06, "loss": 0.2057, "step": 2267 }, { "epoch": 0.16, "grad_norm": 5.098366150594217, "learning_rate": 9.548684797210444e-06, "loss": 0.6355, "step": 2268 }, { "epoch": 0.16, "grad_norm": 1.4796161663181557, "learning_rate": 9.548203722906231e-06, "loss": 0.2184, "step": 2269 }, { "epoch": 0.16, "grad_norm": 1.5883090232935988, "learning_rate": 9.547722404472514e-06, "loss": 0.2564, "step": 2270 }, { "epoch": 0.16, "grad_norm": 1.4319785113897798, "learning_rate": 9.547240841935124e-06, "loss": 0.2121, "step": 2271 }, { "epoch": 0.16, "grad_norm": 1.3735242154725285, "learning_rate": 9.546759035319911e-06, "loss": 0.2086, "step": 2272 }, { "epoch": 0.16, "grad_norm": 1.300715134219384, "learning_rate": 9.546276984652736e-06, "loss": 0.2026, "step": 2273 }, { "epoch": 0.16, "grad_norm": 1.3207040995008266, "learning_rate": 9.545794689959476e-06, "loss": 0.2287, "step": 2274 }, { "epoch": 0.16, "grad_norm": 1.3267397631179043, "learning_rate": 9.545312151266015e-06, "loss": 0.193, "step": 2275 }, { "epoch": 0.16, "grad_norm": 5.193934742994709, "learning_rate": 9.544829368598254e-06, "loss": 0.522, "step": 2276 }, { "epoch": 0.16, "grad_norm": 1.4319616589425652, "learning_rate": 9.54434634198211e-06, "loss": 0.2054, "step": 2277 }, { "epoch": 0.16, "grad_norm": 1.3046697439284143, "learning_rate": 9.543863071443508e-06, "loss": 0.1814, "step": 2278 }, { "epoch": 0.16, "grad_norm": 1.5200117496686427, "learning_rate": 9.543379557008388e-06, "loss": 0.2269, "step": 2279 }, { "epoch": 0.16, "grad_norm": 6.791963277922103, "learning_rate": 9.542895798702702e-06, "loss": 0.5867, "step": 2280 }, { "epoch": 0.16, "grad_norm": 1.451737868958123, "learning_rate": 9.542411796552418e-06, "loss": 0.2409, "step": 2281 }, { "epoch": 0.16, "grad_norm": 1.451508274389207, "learning_rate": 9.541927550583516e-06, "loss": 0.22, "step": 2282 }, { "epoch": 0.16, "grad_norm": 1.311063570759236, "learning_rate": 9.541443060821986e-06, "loss": 0.1757, "step": 2283 }, { "epoch": 0.16, "grad_norm": 1.439030798929904, "learning_rate": 9.540958327293835e-06, "loss": 0.2265, "step": 2284 }, { "epoch": 0.16, "grad_norm": 1.5174749622001882, "learning_rate": 9.540473350025082e-06, "loss": 0.2811, "step": 2285 }, { "epoch": 0.16, "grad_norm": 1.3029114176835457, "learning_rate": 9.539988129041757e-06, "loss": 0.2066, "step": 2286 }, { "epoch": 0.16, "grad_norm": 1.460803829409455, "learning_rate": 9.539502664369905e-06, "loss": 0.2088, "step": 2287 }, { "epoch": 0.16, "grad_norm": 6.607682359154701, "learning_rate": 9.539016956035588e-06, "loss": 0.7175, "step": 2288 }, { "epoch": 0.16, "grad_norm": 1.5433623722890235, "learning_rate": 9.538531004064869e-06, "loss": 0.2437, "step": 2289 }, { "epoch": 0.16, "grad_norm": 1.649202903570783, "learning_rate": 9.538044808483839e-06, "loss": 0.2503, "step": 2290 }, { "epoch": 0.16, "grad_norm": 1.3853485820054525, "learning_rate": 9.537558369318591e-06, "loss": 0.2041, "step": 2291 }, { "epoch": 0.16, "grad_norm": 1.3820991853368472, "learning_rate": 9.537071686595237e-06, "loss": 0.1705, "step": 2292 }, { "epoch": 0.16, "grad_norm": 1.6489403547185333, "learning_rate": 9.5365847603399e-06, "loss": 0.2372, "step": 2293 }, { "epoch": 0.16, "grad_norm": 1.5512020947576006, "learning_rate": 9.536097590578715e-06, "loss": 0.2594, "step": 2294 }, { "epoch": 0.16, "grad_norm": 1.391941919949618, "learning_rate": 9.535610177337833e-06, "loss": 0.2189, "step": 2295 }, { "epoch": 0.16, "grad_norm": 1.656919616128035, "learning_rate": 9.535122520643415e-06, "loss": 0.2066, "step": 2296 }, { "epoch": 0.16, "grad_norm": 1.2612389920459641, "learning_rate": 9.53463462052164e-06, "loss": 0.2261, "step": 2297 }, { "epoch": 0.16, "grad_norm": 1.5060811977949595, "learning_rate": 9.53414647699869e-06, "loss": 0.2145, "step": 2298 }, { "epoch": 0.16, "grad_norm": 1.475487046889574, "learning_rate": 9.533658090100772e-06, "loss": 0.2033, "step": 2299 }, { "epoch": 0.16, "grad_norm": 1.298480149515627, "learning_rate": 9.5331694598541e-06, "loss": 0.2327, "step": 2300 }, { "epoch": 0.16, "grad_norm": 1.4230620937762193, "learning_rate": 9.532680586284898e-06, "loss": 0.2116, "step": 2301 }, { "epoch": 0.16, "grad_norm": 1.614627635963967, "learning_rate": 9.532191469419412e-06, "loss": 0.2549, "step": 2302 }, { "epoch": 0.16, "grad_norm": 6.157347237314944, "learning_rate": 9.531702109283892e-06, "loss": 0.6041, "step": 2303 }, { "epoch": 0.16, "grad_norm": 1.331892603121996, "learning_rate": 9.531212505904606e-06, "loss": 0.233, "step": 2304 }, { "epoch": 0.16, "grad_norm": 1.4304896330775478, "learning_rate": 9.530722659307836e-06, "loss": 0.1883, "step": 2305 }, { "epoch": 0.16, "grad_norm": 1.3229709592839418, "learning_rate": 9.530232569519871e-06, "loss": 0.2369, "step": 2306 }, { "epoch": 0.17, "grad_norm": 1.5418363645160602, "learning_rate": 9.529742236567022e-06, "loss": 0.2712, "step": 2307 }, { "epoch": 0.17, "grad_norm": 1.319075302257437, "learning_rate": 9.529251660475602e-06, "loss": 0.2089, "step": 2308 }, { "epoch": 0.17, "grad_norm": 1.5588370866192451, "learning_rate": 9.528760841271949e-06, "loss": 0.2331, "step": 2309 }, { "epoch": 0.17, "grad_norm": 1.4886050158019533, "learning_rate": 9.528269778982407e-06, "loss": 0.2104, "step": 2310 }, { "epoch": 0.17, "grad_norm": 1.450455333423547, "learning_rate": 9.527778473633331e-06, "loss": 0.2342, "step": 2311 }, { "epoch": 0.17, "grad_norm": 1.4106105093077694, "learning_rate": 9.527286925251095e-06, "loss": 0.2478, "step": 2312 }, { "epoch": 0.17, "grad_norm": 1.5206971409482772, "learning_rate": 9.526795133862083e-06, "loss": 0.2183, "step": 2313 }, { "epoch": 0.17, "grad_norm": 1.4685942438699602, "learning_rate": 9.52630309949269e-06, "loss": 0.1986, "step": 2314 }, { "epoch": 0.17, "grad_norm": 4.640275774890013, "learning_rate": 9.525810822169332e-06, "loss": 0.5119, "step": 2315 }, { "epoch": 0.17, "grad_norm": 1.480412117304701, "learning_rate": 9.525318301918427e-06, "loss": 0.2569, "step": 2316 }, { "epoch": 0.17, "grad_norm": 1.4772556125522698, "learning_rate": 9.524825538766417e-06, "loss": 0.2553, "step": 2317 }, { "epoch": 0.17, "grad_norm": 5.670378961226955, "learning_rate": 9.524332532739745e-06, "loss": 0.689, "step": 2318 }, { "epoch": 0.17, "grad_norm": 1.420136427022906, "learning_rate": 9.523839283864878e-06, "loss": 0.2035, "step": 2319 }, { "epoch": 0.17, "grad_norm": 1.4263542873993964, "learning_rate": 9.52334579216829e-06, "loss": 0.2283, "step": 2320 }, { "epoch": 0.17, "grad_norm": 1.421509811738969, "learning_rate": 9.52285205767647e-06, "loss": 0.2099, "step": 2321 }, { "epoch": 0.17, "grad_norm": 1.626621512711122, "learning_rate": 9.52235808041592e-06, "loss": 0.2682, "step": 2322 }, { "epoch": 0.17, "grad_norm": 1.3813766297591432, "learning_rate": 9.521863860413154e-06, "loss": 0.2186, "step": 2323 }, { "epoch": 0.17, "grad_norm": 4.695258738944951, "learning_rate": 9.521369397694702e-06, "loss": 0.5931, "step": 2324 }, { "epoch": 0.17, "grad_norm": 1.5706953274986557, "learning_rate": 9.520874692287102e-06, "loss": 0.2432, "step": 2325 }, { "epoch": 0.17, "grad_norm": 1.4804008829469815, "learning_rate": 9.52037974421691e-06, "loss": 0.2149, "step": 2326 }, { "epoch": 0.17, "grad_norm": 1.3059048351611338, "learning_rate": 9.519884553510691e-06, "loss": 0.2414, "step": 2327 }, { "epoch": 0.17, "grad_norm": 1.5082721546336773, "learning_rate": 9.519389120195026e-06, "loss": 0.1941, "step": 2328 }, { "epoch": 0.17, "grad_norm": 1.3995581653430642, "learning_rate": 9.518893444296508e-06, "loss": 0.2479, "step": 2329 }, { "epoch": 0.17, "grad_norm": 1.4276169521955404, "learning_rate": 9.518397525841742e-06, "loss": 0.1997, "step": 2330 }, { "epoch": 0.17, "grad_norm": 1.4850931045413314, "learning_rate": 9.517901364857347e-06, "loss": 0.2036, "step": 2331 }, { "epoch": 0.17, "grad_norm": 1.4840980805794286, "learning_rate": 9.517404961369956e-06, "loss": 0.2858, "step": 2332 }, { "epoch": 0.17, "grad_norm": 1.5050591665709097, "learning_rate": 9.516908315406212e-06, "loss": 0.2082, "step": 2333 }, { "epoch": 0.17, "grad_norm": 5.994365483778152, "learning_rate": 9.516411426992777e-06, "loss": 0.5814, "step": 2334 }, { "epoch": 0.17, "grad_norm": 1.5322474640885728, "learning_rate": 9.515914296156318e-06, "loss": 0.2359, "step": 2335 }, { "epoch": 0.17, "grad_norm": 1.1119811688905632, "learning_rate": 9.51541692292352e-06, "loss": 0.1584, "step": 2336 }, { "epoch": 0.17, "grad_norm": 1.683876452595023, "learning_rate": 9.514919307321081e-06, "loss": 0.2507, "step": 2337 }, { "epoch": 0.17, "grad_norm": 1.4362818724730777, "learning_rate": 9.514421449375711e-06, "loss": 0.2034, "step": 2338 }, { "epoch": 0.17, "grad_norm": 1.38403440517796, "learning_rate": 9.513923349114131e-06, "loss": 0.2209, "step": 2339 }, { "epoch": 0.17, "grad_norm": 1.4170858126054215, "learning_rate": 9.51342500656308e-06, "loss": 0.2178, "step": 2340 }, { "epoch": 0.17, "grad_norm": 1.2949232073157406, "learning_rate": 9.512926421749305e-06, "loss": 0.2384, "step": 2341 }, { "epoch": 0.17, "grad_norm": 1.4943835263499077, "learning_rate": 9.512427594699567e-06, "loss": 0.2751, "step": 2342 }, { "epoch": 0.17, "grad_norm": 1.3302230009239429, "learning_rate": 9.511928525440644e-06, "loss": 0.1798, "step": 2343 }, { "epoch": 0.17, "grad_norm": 1.4805598699583216, "learning_rate": 9.511429213999323e-06, "loss": 0.222, "step": 2344 }, { "epoch": 0.17, "grad_norm": 1.3386830201887079, "learning_rate": 9.510929660402404e-06, "loss": 0.1468, "step": 2345 }, { "epoch": 0.17, "grad_norm": 1.5417678324419908, "learning_rate": 9.510429864676702e-06, "loss": 0.3038, "step": 2346 }, { "epoch": 0.17, "grad_norm": 6.158266688800714, "learning_rate": 9.509929826849044e-06, "loss": 0.9248, "step": 2347 }, { "epoch": 0.17, "grad_norm": 5.5280252168687865, "learning_rate": 9.509429546946269e-06, "loss": 0.7129, "step": 2348 }, { "epoch": 0.17, "grad_norm": 1.4210087367325064, "learning_rate": 9.508929024995233e-06, "loss": 0.2514, "step": 2349 }, { "epoch": 0.17, "grad_norm": 1.414547666014133, "learning_rate": 9.508428261022798e-06, "loss": 0.2425, "step": 2350 }, { "epoch": 0.17, "grad_norm": 1.4292607312526646, "learning_rate": 9.507927255055847e-06, "loss": 0.2218, "step": 2351 }, { "epoch": 0.17, "grad_norm": 1.6571863115922023, "learning_rate": 9.507426007121268e-06, "loss": 0.262, "step": 2352 }, { "epoch": 0.17, "grad_norm": 1.3796186401412138, "learning_rate": 9.506924517245968e-06, "loss": 0.2312, "step": 2353 }, { "epoch": 0.17, "grad_norm": 1.411856797638386, "learning_rate": 9.506422785456865e-06, "loss": 0.2176, "step": 2354 }, { "epoch": 0.17, "grad_norm": 1.3839233728749893, "learning_rate": 9.505920811780892e-06, "loss": 0.2041, "step": 2355 }, { "epoch": 0.17, "grad_norm": 1.2814223901795698, "learning_rate": 9.505418596244989e-06, "loss": 0.205, "step": 2356 }, { "epoch": 0.17, "grad_norm": 1.4325012609939118, "learning_rate": 9.504916138876115e-06, "loss": 0.2397, "step": 2357 }, { "epoch": 0.17, "grad_norm": 5.969135896429005, "learning_rate": 9.504413439701241e-06, "loss": 0.5681, "step": 2358 }, { "epoch": 0.17, "grad_norm": 1.5145294294113256, "learning_rate": 9.503910498747348e-06, "loss": 0.2047, "step": 2359 }, { "epoch": 0.17, "grad_norm": 1.525319043785778, "learning_rate": 9.503407316041432e-06, "loss": 0.2468, "step": 2360 }, { "epoch": 0.17, "grad_norm": 4.389277161363658, "learning_rate": 9.502903891610502e-06, "loss": 0.6212, "step": 2361 }, { "epoch": 0.17, "grad_norm": 12.682134048053712, "learning_rate": 9.502400225481582e-06, "loss": 0.4795, "step": 2362 }, { "epoch": 0.17, "grad_norm": 1.5882725996764977, "learning_rate": 9.501896317681703e-06, "loss": 0.2165, "step": 2363 }, { "epoch": 0.17, "grad_norm": 1.501100114827806, "learning_rate": 9.501392168237914e-06, "loss": 0.1984, "step": 2364 }, { "epoch": 0.17, "grad_norm": 1.7440042880270596, "learning_rate": 9.50088777717728e-06, "loss": 0.2512, "step": 2365 }, { "epoch": 0.17, "grad_norm": 1.427499099272823, "learning_rate": 9.500383144526868e-06, "loss": 0.2257, "step": 2366 }, { "epoch": 0.17, "grad_norm": 8.127610435126591, "learning_rate": 9.49987827031377e-06, "loss": 0.5424, "step": 2367 }, { "epoch": 0.17, "grad_norm": 1.4848357718694933, "learning_rate": 9.499373154565083e-06, "loss": 0.1879, "step": 2368 }, { "epoch": 0.17, "grad_norm": 1.4921283913114312, "learning_rate": 9.49886779730792e-06, "loss": 0.2291, "step": 2369 }, { "epoch": 0.17, "grad_norm": 1.4147138680821634, "learning_rate": 9.498362198569407e-06, "loss": 0.2286, "step": 2370 }, { "epoch": 0.17, "grad_norm": 1.4997754989648011, "learning_rate": 9.497856358376681e-06, "loss": 0.2574, "step": 2371 }, { "epoch": 0.17, "grad_norm": 1.4167895328550235, "learning_rate": 9.497350276756895e-06, "loss": 0.206, "step": 2372 }, { "epoch": 0.17, "grad_norm": 1.3337212853600846, "learning_rate": 9.496843953737214e-06, "loss": 0.1751, "step": 2373 }, { "epoch": 0.17, "grad_norm": 1.5150807321607511, "learning_rate": 9.496337389344814e-06, "loss": 0.2264, "step": 2374 }, { "epoch": 0.17, "grad_norm": 1.3414188971494687, "learning_rate": 9.495830583606887e-06, "loss": 0.2495, "step": 2375 }, { "epoch": 0.17, "grad_norm": 13.046149377746191, "learning_rate": 9.495323536550633e-06, "loss": 0.7607, "step": 2376 }, { "epoch": 0.17, "grad_norm": 1.5449413778195826, "learning_rate": 9.494816248203272e-06, "loss": 0.2557, "step": 2377 }, { "epoch": 0.17, "grad_norm": 1.38756764183356, "learning_rate": 9.494308718592032e-06, "loss": 0.2076, "step": 2378 }, { "epoch": 0.17, "grad_norm": 1.64804574743153, "learning_rate": 9.493800947744154e-06, "loss": 0.2555, "step": 2379 }, { "epoch": 0.17, "grad_norm": 1.4306783394738056, "learning_rate": 9.493292935686896e-06, "loss": 0.2287, "step": 2380 }, { "epoch": 0.17, "grad_norm": 1.5434781545497618, "learning_rate": 9.492784682447521e-06, "loss": 0.1985, "step": 2381 }, { "epoch": 0.17, "grad_norm": 1.429361794605198, "learning_rate": 9.492276188053315e-06, "loss": 0.2095, "step": 2382 }, { "epoch": 0.17, "grad_norm": 1.4119075397388192, "learning_rate": 9.491767452531571e-06, "loss": 0.1956, "step": 2383 }, { "epoch": 0.17, "grad_norm": 1.5556630616540017, "learning_rate": 9.491258475909593e-06, "loss": 0.2357, "step": 2384 }, { "epoch": 0.17, "grad_norm": 1.290056654928571, "learning_rate": 9.490749258214704e-06, "loss": 0.1729, "step": 2385 }, { "epoch": 0.17, "grad_norm": 1.2675972315226016, "learning_rate": 9.490239799474237e-06, "loss": 0.1736, "step": 2386 }, { "epoch": 0.17, "grad_norm": 1.5234740070937312, "learning_rate": 9.489730099715534e-06, "loss": 0.2445, "step": 2387 }, { "epoch": 0.17, "grad_norm": 1.3683308225226825, "learning_rate": 9.489220158965957e-06, "loss": 0.2021, "step": 2388 }, { "epoch": 0.17, "grad_norm": 1.3673711689337822, "learning_rate": 9.488709977252876e-06, "loss": 0.1986, "step": 2389 }, { "epoch": 0.17, "grad_norm": 1.6042250249500396, "learning_rate": 9.488199554603676e-06, "loss": 0.2391, "step": 2390 }, { "epoch": 0.17, "grad_norm": 1.6592866741284222, "learning_rate": 9.487688891045756e-06, "loss": 0.2754, "step": 2391 }, { "epoch": 0.17, "grad_norm": 1.4318310830364822, "learning_rate": 9.487177986606525e-06, "loss": 0.2279, "step": 2392 }, { "epoch": 0.17, "grad_norm": 1.4519554997433146, "learning_rate": 9.486666841313404e-06, "loss": 0.2068, "step": 2393 }, { "epoch": 0.17, "grad_norm": 1.4027138201387752, "learning_rate": 9.486155455193833e-06, "loss": 0.2508, "step": 2394 }, { "epoch": 0.17, "grad_norm": 13.617430246264899, "learning_rate": 9.485643828275259e-06, "loss": 0.625, "step": 2395 }, { "epoch": 0.17, "grad_norm": 1.5400345259920187, "learning_rate": 9.485131960585146e-06, "loss": 0.2669, "step": 2396 }, { "epoch": 0.17, "grad_norm": 1.3174053728272423, "learning_rate": 9.484619852150967e-06, "loss": 0.19, "step": 2397 }, { "epoch": 0.17, "grad_norm": 1.4571704252550244, "learning_rate": 9.484107503000211e-06, "loss": 0.2282, "step": 2398 }, { "epoch": 0.17, "grad_norm": 1.3275148469213154, "learning_rate": 9.48359491316038e-06, "loss": 0.2118, "step": 2399 }, { "epoch": 0.17, "grad_norm": 1.5524317178502713, "learning_rate": 9.483082082658984e-06, "loss": 0.2523, "step": 2400 }, { "epoch": 0.17, "grad_norm": 1.278269845411248, "learning_rate": 9.482569011523553e-06, "loss": 0.1922, "step": 2401 }, { "epoch": 0.17, "grad_norm": 19.28521233933641, "learning_rate": 9.482055699781625e-06, "loss": 0.5399, "step": 2402 }, { "epoch": 0.17, "grad_norm": 1.5315347033034197, "learning_rate": 9.481542147460756e-06, "loss": 0.2118, "step": 2403 }, { "epoch": 0.17, "grad_norm": 1.3555767356865305, "learning_rate": 9.481028354588506e-06, "loss": 0.1871, "step": 2404 }, { "epoch": 0.17, "grad_norm": 1.4678777874055562, "learning_rate": 9.480514321192458e-06, "loss": 0.2247, "step": 2405 }, { "epoch": 0.17, "grad_norm": 4.159926088978146, "learning_rate": 9.4800000473002e-06, "loss": 0.6156, "step": 2406 }, { "epoch": 0.17, "grad_norm": 1.3424263045263272, "learning_rate": 9.479485532939338e-06, "loss": 0.2224, "step": 2407 }, { "epoch": 0.17, "grad_norm": 1.3877855771842444, "learning_rate": 9.478970778137488e-06, "loss": 0.2031, "step": 2408 }, { "epoch": 0.17, "grad_norm": 1.3809372649674194, "learning_rate": 9.478455782922282e-06, "loss": 0.228, "step": 2409 }, { "epoch": 0.17, "grad_norm": 1.4611839123048342, "learning_rate": 9.477940547321361e-06, "loss": 0.2212, "step": 2410 }, { "epoch": 0.17, "grad_norm": 1.4558949819790852, "learning_rate": 9.47742507136238e-06, "loss": 0.2385, "step": 2411 }, { "epoch": 0.17, "grad_norm": 1.565317403500844, "learning_rate": 9.476909355073012e-06, "loss": 0.2503, "step": 2412 }, { "epoch": 0.17, "grad_norm": 5.645823851278893, "learning_rate": 9.476393398480933e-06, "loss": 0.5941, "step": 2413 }, { "epoch": 0.17, "grad_norm": 1.5182986320540979, "learning_rate": 9.47587720161384e-06, "loss": 0.2642, "step": 2414 }, { "epoch": 0.17, "grad_norm": 1.234095356823936, "learning_rate": 9.475360764499442e-06, "loss": 0.2427, "step": 2415 }, { "epoch": 0.17, "grad_norm": 1.3656075962229723, "learning_rate": 9.474844087165458e-06, "loss": 0.1975, "step": 2416 }, { "epoch": 0.17, "grad_norm": 5.232820518586282, "learning_rate": 9.47432716963962e-06, "loss": 0.6512, "step": 2417 }, { "epoch": 0.17, "grad_norm": 1.4423249203985848, "learning_rate": 9.473810011949676e-06, "loss": 0.2517, "step": 2418 }, { "epoch": 0.17, "grad_norm": 1.5993249880140628, "learning_rate": 9.473292614123384e-06, "loss": 0.1969, "step": 2419 }, { "epoch": 0.17, "grad_norm": 1.3806214622715274, "learning_rate": 9.472774976188515e-06, "loss": 0.2072, "step": 2420 }, { "epoch": 0.17, "grad_norm": 1.3504150106556574, "learning_rate": 9.472257098172856e-06, "loss": 0.2504, "step": 2421 }, { "epoch": 0.17, "grad_norm": 1.3256305389782437, "learning_rate": 9.471738980104202e-06, "loss": 0.2501, "step": 2422 }, { "epoch": 0.17, "grad_norm": 1.893047176166037, "learning_rate": 9.471220622010366e-06, "loss": 0.2438, "step": 2423 }, { "epoch": 0.17, "grad_norm": 1.276670629899183, "learning_rate": 9.47070202391917e-06, "loss": 0.162, "step": 2424 }, { "epoch": 0.17, "grad_norm": 1.5753352557350828, "learning_rate": 9.470183185858447e-06, "loss": 0.2216, "step": 2425 }, { "epoch": 0.17, "grad_norm": 1.394180061701923, "learning_rate": 9.469664107856054e-06, "loss": 0.2301, "step": 2426 }, { "epoch": 0.17, "grad_norm": 1.3917635561478825, "learning_rate": 9.469144789939848e-06, "loss": 0.2055, "step": 2427 }, { "epoch": 0.17, "grad_norm": 1.5224918506057952, "learning_rate": 9.468625232137703e-06, "loss": 0.2842, "step": 2428 }, { "epoch": 0.17, "grad_norm": 1.4027595752722253, "learning_rate": 9.46810543447751e-06, "loss": 0.1792, "step": 2429 }, { "epoch": 0.17, "grad_norm": 1.5610562148275493, "learning_rate": 9.467585396987168e-06, "loss": 0.2396, "step": 2430 }, { "epoch": 0.17, "grad_norm": 1.5809477351904575, "learning_rate": 9.467065119694589e-06, "loss": 0.2505, "step": 2431 }, { "epoch": 0.17, "grad_norm": 1.4558217715441395, "learning_rate": 9.466544602627703e-06, "loss": 0.189, "step": 2432 }, { "epoch": 0.17, "grad_norm": 1.7046440626205586, "learning_rate": 9.466023845814446e-06, "loss": 0.2188, "step": 2433 }, { "epoch": 0.17, "grad_norm": 1.3646207248336062, "learning_rate": 9.465502849282773e-06, "loss": 0.1969, "step": 2434 }, { "epoch": 0.17, "grad_norm": 1.3955047251744517, "learning_rate": 9.464981613060648e-06, "loss": 0.1948, "step": 2435 }, { "epoch": 0.17, "grad_norm": 1.394756048873777, "learning_rate": 9.464460137176047e-06, "loss": 0.1959, "step": 2436 }, { "epoch": 0.17, "grad_norm": 1.5448350162022106, "learning_rate": 9.463938421656962e-06, "loss": 0.2363, "step": 2437 }, { "epoch": 0.17, "grad_norm": 1.4074420286439586, "learning_rate": 9.463416466531397e-06, "loss": 0.1976, "step": 2438 }, { "epoch": 0.17, "grad_norm": 1.397981760304576, "learning_rate": 9.46289427182737e-06, "loss": 0.2439, "step": 2439 }, { "epoch": 0.17, "grad_norm": 5.5910820732962385, "learning_rate": 9.462371837572907e-06, "loss": 0.8099, "step": 2440 }, { "epoch": 0.17, "grad_norm": 1.3424003112067515, "learning_rate": 9.461849163796052e-06, "loss": 0.2006, "step": 2441 }, { "epoch": 0.17, "grad_norm": 1.3212945336196555, "learning_rate": 9.461326250524861e-06, "loss": 0.2031, "step": 2442 }, { "epoch": 0.17, "grad_norm": 1.4978260459150183, "learning_rate": 9.460803097787398e-06, "loss": 0.2161, "step": 2443 }, { "epoch": 0.17, "grad_norm": 1.386586616899618, "learning_rate": 9.460279705611748e-06, "loss": 0.2189, "step": 2444 }, { "epoch": 0.17, "grad_norm": 5.765708090831872, "learning_rate": 9.459756074026002e-06, "loss": 0.5152, "step": 2445 }, { "epoch": 0.17, "grad_norm": 1.4784525288847616, "learning_rate": 9.45923220305827e-06, "loss": 0.2042, "step": 2446 }, { "epoch": 0.18, "grad_norm": 1.4685467977243203, "learning_rate": 9.458708092736669e-06, "loss": 0.2203, "step": 2447 }, { "epoch": 0.18, "grad_norm": 2.210446188253907, "learning_rate": 9.45818374308933e-06, "loss": 0.2261, "step": 2448 }, { "epoch": 0.18, "grad_norm": 1.4903827313726299, "learning_rate": 9.457659154144398e-06, "loss": 0.1725, "step": 2449 }, { "epoch": 0.18, "grad_norm": 1.729920456377542, "learning_rate": 9.457134325930034e-06, "loss": 0.2202, "step": 2450 }, { "epoch": 0.18, "grad_norm": 1.3130165283240416, "learning_rate": 9.456609258474406e-06, "loss": 0.2471, "step": 2451 }, { "epoch": 0.18, "grad_norm": 1.4556616212347169, "learning_rate": 9.456083951805698e-06, "loss": 0.2284, "step": 2452 }, { "epoch": 0.18, "grad_norm": 1.4520219612286838, "learning_rate": 9.455558405952105e-06, "loss": 0.2317, "step": 2453 }, { "epoch": 0.18, "grad_norm": 1.4547885933196265, "learning_rate": 9.45503262094184e-06, "loss": 0.2229, "step": 2454 }, { "epoch": 0.18, "grad_norm": 1.3152056381543786, "learning_rate": 9.454506596803122e-06, "loss": 0.2263, "step": 2455 }, { "epoch": 0.18, "grad_norm": 1.3395566946378628, "learning_rate": 9.453980333564187e-06, "loss": 0.191, "step": 2456 }, { "epoch": 0.18, "grad_norm": 1.3764276354089426, "learning_rate": 9.453453831253282e-06, "loss": 0.214, "step": 2457 }, { "epoch": 0.18, "grad_norm": 1.47030020912995, "learning_rate": 9.452927089898667e-06, "loss": 0.1772, "step": 2458 }, { "epoch": 0.18, "grad_norm": 1.3515915272953936, "learning_rate": 9.452400109528619e-06, "loss": 0.2668, "step": 2459 }, { "epoch": 0.18, "grad_norm": 1.4709508758961785, "learning_rate": 9.451872890171419e-06, "loss": 0.202, "step": 2460 }, { "epoch": 0.18, "grad_norm": 1.5699363811627245, "learning_rate": 9.45134543185537e-06, "loss": 0.2342, "step": 2461 }, { "epoch": 0.18, "grad_norm": 1.4367649720743676, "learning_rate": 9.45081773460878e-06, "loss": 0.2186, "step": 2462 }, { "epoch": 0.18, "grad_norm": 1.5257438842110773, "learning_rate": 9.45028979845998e-06, "loss": 0.2453, "step": 2463 }, { "epoch": 0.18, "grad_norm": 1.554377577091035, "learning_rate": 9.449761623437302e-06, "loss": 0.2319, "step": 2464 }, { "epoch": 0.18, "grad_norm": 1.334766203130036, "learning_rate": 9.449233209569098e-06, "loss": 0.2022, "step": 2465 }, { "epoch": 0.18, "grad_norm": 1.6710315748597055, "learning_rate": 9.448704556883731e-06, "loss": 0.2428, "step": 2466 }, { "epoch": 0.18, "grad_norm": 5.93872037486805, "learning_rate": 9.448175665409578e-06, "loss": 0.5796, "step": 2467 }, { "epoch": 0.18, "grad_norm": 1.4304675392597739, "learning_rate": 9.447646535175027e-06, "loss": 0.1764, "step": 2468 }, { "epoch": 0.18, "grad_norm": 1.5910803662028452, "learning_rate": 9.447117166208477e-06, "loss": 0.2214, "step": 2469 }, { "epoch": 0.18, "grad_norm": 1.475957015041202, "learning_rate": 9.446587558538346e-06, "loss": 0.2208, "step": 2470 }, { "epoch": 0.18, "grad_norm": 1.4375360789950442, "learning_rate": 9.446057712193063e-06, "loss": 0.2124, "step": 2471 }, { "epoch": 0.18, "grad_norm": 1.3816179743264985, "learning_rate": 9.445527627201064e-06, "loss": 0.2246, "step": 2472 }, { "epoch": 0.18, "grad_norm": 1.4172040387055247, "learning_rate": 9.444997303590802e-06, "loss": 0.2267, "step": 2473 }, { "epoch": 0.18, "grad_norm": 1.55519095085811, "learning_rate": 9.444466741390743e-06, "loss": 0.2109, "step": 2474 }, { "epoch": 0.18, "grad_norm": 1.417476806387846, "learning_rate": 9.443935940629368e-06, "loss": 0.2168, "step": 2475 }, { "epoch": 0.18, "grad_norm": 1.458826150167142, "learning_rate": 9.443404901335167e-06, "loss": 0.2439, "step": 2476 }, { "epoch": 0.18, "grad_norm": 1.4846456027069463, "learning_rate": 9.442873623536642e-06, "loss": 0.2677, "step": 2477 }, { "epoch": 0.18, "grad_norm": 1.2692713615137534, "learning_rate": 9.44234210726231e-06, "loss": 0.1849, "step": 2478 }, { "epoch": 0.18, "grad_norm": 1.3879901088283517, "learning_rate": 9.441810352540705e-06, "loss": 0.168, "step": 2479 }, { "epoch": 0.18, "grad_norm": 1.8379916711988598, "learning_rate": 9.441278359400366e-06, "loss": 0.2587, "step": 2480 }, { "epoch": 0.18, "grad_norm": 1.2768327501037542, "learning_rate": 9.440746127869849e-06, "loss": 0.194, "step": 2481 }, { "epoch": 0.18, "grad_norm": 1.6528708633444582, "learning_rate": 9.440213657977721e-06, "loss": 0.2457, "step": 2482 }, { "epoch": 0.18, "grad_norm": 5.582112741107105, "learning_rate": 9.439680949752566e-06, "loss": 0.7004, "step": 2483 }, { "epoch": 0.18, "grad_norm": 1.5742574477787226, "learning_rate": 9.439148003222973e-06, "loss": 0.2496, "step": 2484 }, { "epoch": 0.18, "grad_norm": 1.3700644895289067, "learning_rate": 9.438614818417553e-06, "loss": 0.225, "step": 2485 }, { "epoch": 0.18, "grad_norm": 1.3939017135069647, "learning_rate": 9.438081395364922e-06, "loss": 0.224, "step": 2486 }, { "epoch": 0.18, "grad_norm": 1.2183299732774795, "learning_rate": 9.437547734093714e-06, "loss": 0.1613, "step": 2487 }, { "epoch": 0.18, "grad_norm": 1.4507348011498014, "learning_rate": 9.437013834632573e-06, "loss": 0.2167, "step": 2488 }, { "epoch": 0.18, "grad_norm": 1.3593412855339093, "learning_rate": 9.436479697010156e-06, "loss": 0.2003, "step": 2489 }, { "epoch": 0.18, "grad_norm": 1.3522448468440682, "learning_rate": 9.435945321255135e-06, "loss": 0.1857, "step": 2490 }, { "epoch": 0.18, "grad_norm": 1.446281182995164, "learning_rate": 9.435410707396192e-06, "loss": 0.1955, "step": 2491 }, { "epoch": 0.18, "grad_norm": 1.3487644674149917, "learning_rate": 9.434875855462023e-06, "loss": 0.2415, "step": 2492 }, { "epoch": 0.18, "grad_norm": 1.477806549360147, "learning_rate": 9.434340765481335e-06, "loss": 0.1968, "step": 2493 }, { "epoch": 0.18, "grad_norm": 4.950906532631057, "learning_rate": 9.433805437482854e-06, "loss": 0.7637, "step": 2494 }, { "epoch": 0.18, "grad_norm": 1.309629615185627, "learning_rate": 9.433269871495311e-06, "loss": 0.2364, "step": 2495 }, { "epoch": 0.18, "grad_norm": 1.3308815833565133, "learning_rate": 9.432734067547454e-06, "loss": 0.1978, "step": 2496 }, { "epoch": 0.18, "grad_norm": 1.5893063479965046, "learning_rate": 9.432198025668043e-06, "loss": 0.2799, "step": 2497 }, { "epoch": 0.18, "grad_norm": 1.3364069514862345, "learning_rate": 9.431661745885848e-06, "loss": 0.2023, "step": 2498 }, { "epoch": 0.18, "grad_norm": 1.394018826942934, "learning_rate": 9.431125228229658e-06, "loss": 0.218, "step": 2499 }, { "epoch": 0.18, "grad_norm": 1.502472282375056, "learning_rate": 9.430588472728271e-06, "loss": 0.2273, "step": 2500 }, { "epoch": 0.18, "grad_norm": 1.353080094648928, "learning_rate": 9.430051479410493e-06, "loss": 0.2206, "step": 2501 }, { "epoch": 0.18, "grad_norm": 1.450405257071622, "learning_rate": 9.429514248305154e-06, "loss": 0.2033, "step": 2502 }, { "epoch": 0.18, "grad_norm": 1.4796355156151553, "learning_rate": 9.428976779441089e-06, "loss": 0.2095, "step": 2503 }, { "epoch": 0.18, "grad_norm": 1.4731564633678975, "learning_rate": 9.428439072847142e-06, "loss": 0.1713, "step": 2504 }, { "epoch": 0.18, "grad_norm": 1.3858268422265942, "learning_rate": 9.427901128552182e-06, "loss": 0.224, "step": 2505 }, { "epoch": 0.18, "grad_norm": 1.4999664683552298, "learning_rate": 9.427362946585078e-06, "loss": 0.2538, "step": 2506 }, { "epoch": 0.18, "grad_norm": 1.443221153338652, "learning_rate": 9.426824526974721e-06, "loss": 0.232, "step": 2507 }, { "epoch": 0.18, "grad_norm": 1.5154799523932865, "learning_rate": 9.426285869750012e-06, "loss": 0.2653, "step": 2508 }, { "epoch": 0.18, "grad_norm": 1.7375101278234002, "learning_rate": 9.42574697493986e-06, "loss": 0.2563, "step": 2509 }, { "epoch": 0.18, "grad_norm": 1.7278190930598922, "learning_rate": 9.425207842573193e-06, "loss": 0.2373, "step": 2510 }, { "epoch": 0.18, "grad_norm": 6.112438188896315, "learning_rate": 9.42466847267895e-06, "loss": 0.7665, "step": 2511 }, { "epoch": 0.18, "grad_norm": 1.3670854493249174, "learning_rate": 9.424128865286082e-06, "loss": 0.2108, "step": 2512 }, { "epoch": 0.18, "grad_norm": 1.386900899115097, "learning_rate": 9.423589020423554e-06, "loss": 0.2055, "step": 2513 }, { "epoch": 0.18, "grad_norm": 1.3364404562962722, "learning_rate": 9.423048938120338e-06, "loss": 0.2303, "step": 2514 }, { "epoch": 0.18, "grad_norm": 1.5204239142157632, "learning_rate": 9.422508618405431e-06, "loss": 0.2188, "step": 2515 }, { "epoch": 0.18, "grad_norm": 1.447814845796347, "learning_rate": 9.421968061307826e-06, "loss": 0.24, "step": 2516 }, { "epoch": 0.18, "grad_norm": 1.345980152762442, "learning_rate": 9.421427266856546e-06, "loss": 0.2072, "step": 2517 }, { "epoch": 0.18, "grad_norm": 1.4858305049235843, "learning_rate": 9.420886235080616e-06, "loss": 0.2051, "step": 2518 }, { "epoch": 0.18, "grad_norm": 4.953166338104311, "learning_rate": 9.420344966009076e-06, "loss": 0.6718, "step": 2519 }, { "epoch": 0.18, "grad_norm": 1.4302932235919987, "learning_rate": 9.41980345967098e-06, "loss": 0.2218, "step": 2520 }, { "epoch": 0.18, "grad_norm": 1.5175740000448021, "learning_rate": 9.419261716095392e-06, "loss": 0.2535, "step": 2521 }, { "epoch": 0.18, "grad_norm": 1.3118175933393332, "learning_rate": 9.41871973531139e-06, "loss": 0.1868, "step": 2522 }, { "epoch": 0.18, "grad_norm": 1.5676814743103566, "learning_rate": 9.41817751734807e-06, "loss": 0.2724, "step": 2523 }, { "epoch": 0.18, "grad_norm": 6.482636525205323, "learning_rate": 9.417635062234533e-06, "loss": 0.8307, "step": 2524 }, { "epoch": 0.18, "grad_norm": 1.297961969341316, "learning_rate": 9.417092369999897e-06, "loss": 0.1962, "step": 2525 }, { "epoch": 0.18, "grad_norm": 1.4125358528613452, "learning_rate": 9.416549440673288e-06, "loss": 0.1973, "step": 2526 }, { "epoch": 0.18, "grad_norm": 1.296176583011786, "learning_rate": 9.416006274283853e-06, "loss": 0.1854, "step": 2527 }, { "epoch": 0.18, "grad_norm": 1.3313935994406978, "learning_rate": 9.415462870860746e-06, "loss": 0.2461, "step": 2528 }, { "epoch": 0.18, "grad_norm": 1.6780353280629303, "learning_rate": 9.41491923043313e-06, "loss": 0.2574, "step": 2529 }, { "epoch": 0.18, "grad_norm": 5.499101368110134, "learning_rate": 9.414375353030192e-06, "loss": 0.6757, "step": 2530 }, { "epoch": 0.18, "grad_norm": 1.4853714488403271, "learning_rate": 9.413831238681124e-06, "loss": 0.2355, "step": 2531 }, { "epoch": 0.18, "grad_norm": 1.569123284488001, "learning_rate": 9.413286887415128e-06, "loss": 0.197, "step": 2532 }, { "epoch": 0.18, "grad_norm": 1.4169325458493869, "learning_rate": 9.412742299261426e-06, "loss": 0.251, "step": 2533 }, { "epoch": 0.18, "grad_norm": 1.510651441098901, "learning_rate": 9.412197474249247e-06, "loss": 0.2637, "step": 2534 }, { "epoch": 0.18, "grad_norm": 1.2067362394542676, "learning_rate": 9.411652412407837e-06, "loss": 0.2162, "step": 2535 }, { "epoch": 0.18, "grad_norm": 1.2477273083006541, "learning_rate": 9.411107113766452e-06, "loss": 0.1857, "step": 2536 }, { "epoch": 0.18, "grad_norm": 1.4250817536231168, "learning_rate": 9.410561578354362e-06, "loss": 0.2028, "step": 2537 }, { "epoch": 0.18, "grad_norm": 1.1801288799728686, "learning_rate": 9.410015806200849e-06, "loss": 0.2395, "step": 2538 }, { "epoch": 0.18, "grad_norm": 1.4909444786860953, "learning_rate": 9.409469797335206e-06, "loss": 0.2295, "step": 2539 }, { "epoch": 0.18, "grad_norm": 1.516145414170348, "learning_rate": 9.408923551786742e-06, "loss": 0.2365, "step": 2540 }, { "epoch": 0.18, "grad_norm": 1.4758053953734014, "learning_rate": 9.40837706958478e-06, "loss": 0.2026, "step": 2541 }, { "epoch": 0.18, "grad_norm": 1.4286178469646953, "learning_rate": 9.40783035075865e-06, "loss": 0.2184, "step": 2542 }, { "epoch": 0.18, "grad_norm": 1.4604527487138599, "learning_rate": 9.407283395337696e-06, "loss": 0.2179, "step": 2543 }, { "epoch": 0.18, "grad_norm": 1.7185156550509677, "learning_rate": 9.406736203351278e-06, "loss": 0.2346, "step": 2544 }, { "epoch": 0.18, "grad_norm": 1.512872717059322, "learning_rate": 9.406188774828771e-06, "loss": 0.1901, "step": 2545 }, { "epoch": 0.18, "grad_norm": 1.4936408127657042, "learning_rate": 9.405641109799554e-06, "loss": 0.2307, "step": 2546 }, { "epoch": 0.18, "grad_norm": 1.667225573679939, "learning_rate": 9.405093208293023e-06, "loss": 0.2691, "step": 2547 }, { "epoch": 0.18, "grad_norm": 1.2487556753117102, "learning_rate": 9.404545070338593e-06, "loss": 0.2199, "step": 2548 }, { "epoch": 0.18, "grad_norm": 1.5825009255579274, "learning_rate": 9.403996695965677e-06, "loss": 0.2049, "step": 2549 }, { "epoch": 0.18, "grad_norm": 1.5170348902208557, "learning_rate": 9.403448085203718e-06, "loss": 0.2035, "step": 2550 }, { "epoch": 0.18, "grad_norm": 1.5473668278518455, "learning_rate": 9.402899238082159e-06, "loss": 0.215, "step": 2551 }, { "epoch": 0.18, "grad_norm": 1.275240360202992, "learning_rate": 9.402350154630462e-06, "loss": 0.1691, "step": 2552 }, { "epoch": 0.18, "grad_norm": 1.3418197112154044, "learning_rate": 9.401800834878096e-06, "loss": 0.2019, "step": 2553 }, { "epoch": 0.18, "grad_norm": 1.4542036000172531, "learning_rate": 9.401251278854551e-06, "loss": 0.2382, "step": 2554 }, { "epoch": 0.18, "grad_norm": 1.2648990973797447, "learning_rate": 9.40070148658932e-06, "loss": 0.2167, "step": 2555 }, { "epoch": 0.18, "grad_norm": 1.3670632272309051, "learning_rate": 9.400151458111918e-06, "loss": 0.2268, "step": 2556 }, { "epoch": 0.18, "grad_norm": 11.337470764442655, "learning_rate": 9.399601193451866e-06, "loss": 0.4662, "step": 2557 }, { "epoch": 0.18, "grad_norm": 1.4892041476995646, "learning_rate": 9.3990506926387e-06, "loss": 0.2519, "step": 2558 }, { "epoch": 0.18, "grad_norm": 5.119669690320454, "learning_rate": 9.39849995570197e-06, "loss": 0.6525, "step": 2559 }, { "epoch": 0.18, "grad_norm": 1.465976325894581, "learning_rate": 9.397948982671237e-06, "loss": 0.27, "step": 2560 }, { "epoch": 0.18, "grad_norm": 8.08568928988736, "learning_rate": 9.397397773576074e-06, "loss": 0.6543, "step": 2561 }, { "epoch": 0.18, "grad_norm": 1.5189782675403085, "learning_rate": 9.39684632844607e-06, "loss": 0.242, "step": 2562 }, { "epoch": 0.18, "grad_norm": 1.4071735096660711, "learning_rate": 9.396294647310822e-06, "loss": 0.19, "step": 2563 }, { "epoch": 0.18, "grad_norm": 1.4370055552296672, "learning_rate": 9.395742730199942e-06, "loss": 0.1654, "step": 2564 }, { "epoch": 0.18, "grad_norm": 5.970414512865405, "learning_rate": 9.395190577143057e-06, "loss": 0.749, "step": 2565 }, { "epoch": 0.18, "grad_norm": 1.484066099046267, "learning_rate": 9.394638188169803e-06, "loss": 0.2086, "step": 2566 }, { "epoch": 0.18, "grad_norm": 1.486353336139996, "learning_rate": 9.394085563309827e-06, "loss": 0.2414, "step": 2567 }, { "epoch": 0.18, "grad_norm": 1.4810219803692295, "learning_rate": 9.393532702592798e-06, "loss": 0.2556, "step": 2568 }, { "epoch": 0.18, "grad_norm": 1.4140330510795132, "learning_rate": 9.392979606048386e-06, "loss": 0.2422, "step": 2569 }, { "epoch": 0.18, "grad_norm": 1.3740066127090602, "learning_rate": 9.392426273706284e-06, "loss": 0.2165, "step": 2570 }, { "epoch": 0.18, "grad_norm": 1.3848248012270683, "learning_rate": 9.391872705596186e-06, "loss": 0.2233, "step": 2571 }, { "epoch": 0.18, "grad_norm": 1.238710975646007, "learning_rate": 9.391318901747812e-06, "loss": 0.1966, "step": 2572 }, { "epoch": 0.18, "grad_norm": 1.467387934226765, "learning_rate": 9.390764862190885e-06, "loss": 0.2275, "step": 2573 }, { "epoch": 0.18, "grad_norm": 1.3381964919802931, "learning_rate": 9.390210586955143e-06, "loss": 0.2334, "step": 2574 }, { "epoch": 0.18, "grad_norm": 4.150413931274772, "learning_rate": 9.389656076070337e-06, "loss": 0.3801, "step": 2575 }, { "epoch": 0.18, "grad_norm": 1.6358963938446924, "learning_rate": 9.389101329566234e-06, "loss": 0.238, "step": 2576 }, { "epoch": 0.18, "grad_norm": 1.3506830108362546, "learning_rate": 9.388546347472608e-06, "loss": 0.185, "step": 2577 }, { "epoch": 0.18, "grad_norm": 1.395977514191067, "learning_rate": 9.387991129819249e-06, "loss": 0.2077, "step": 2578 }, { "epoch": 0.18, "grad_norm": 1.4421513975374125, "learning_rate": 9.387435676635958e-06, "loss": 0.2438, "step": 2579 }, { "epoch": 0.18, "grad_norm": 1.5283789095280744, "learning_rate": 9.386879987952549e-06, "loss": 0.271, "step": 2580 }, { "epoch": 0.18, "grad_norm": 1.5573160095369922, "learning_rate": 9.386324063798853e-06, "loss": 0.2826, "step": 2581 }, { "epoch": 0.18, "grad_norm": 1.6341628290442851, "learning_rate": 9.385767904204705e-06, "loss": 0.2357, "step": 2582 }, { "epoch": 0.18, "grad_norm": 1.2991602293304791, "learning_rate": 9.385211509199961e-06, "loss": 0.2001, "step": 2583 }, { "epoch": 0.18, "grad_norm": 1.3820289229756992, "learning_rate": 9.384654878814485e-06, "loss": 0.194, "step": 2584 }, { "epoch": 0.18, "grad_norm": 1.5704484318379797, "learning_rate": 9.384098013078153e-06, "loss": 0.2075, "step": 2585 }, { "epoch": 0.18, "grad_norm": 1.4569078715468275, "learning_rate": 9.38354091202086e-06, "loss": 0.2265, "step": 2586 }, { "epoch": 0.19, "grad_norm": 1.366742029026146, "learning_rate": 9.382983575672502e-06, "loss": 0.2244, "step": 2587 }, { "epoch": 0.19, "grad_norm": 1.4285443750863607, "learning_rate": 9.382426004063e-06, "loss": 0.2129, "step": 2588 }, { "epoch": 0.19, "grad_norm": 1.4159106749452377, "learning_rate": 9.381868197222279e-06, "loss": 0.1942, "step": 2589 }, { "epoch": 0.19, "grad_norm": 1.47367493253568, "learning_rate": 9.381310155180283e-06, "loss": 0.2815, "step": 2590 }, { "epoch": 0.19, "grad_norm": 1.5011937629642231, "learning_rate": 9.380751877966964e-06, "loss": 0.2164, "step": 2591 }, { "epoch": 0.19, "grad_norm": 1.465458499746767, "learning_rate": 9.380193365612288e-06, "loss": 0.2406, "step": 2592 }, { "epoch": 0.19, "grad_norm": 7.220150935569685, "learning_rate": 9.379634618146234e-06, "loss": 0.657, "step": 2593 }, { "epoch": 0.19, "grad_norm": 1.3745933997369417, "learning_rate": 9.379075635598792e-06, "loss": 0.1996, "step": 2594 }, { "epoch": 0.19, "grad_norm": 1.5565525687039836, "learning_rate": 9.378516417999967e-06, "loss": 0.2278, "step": 2595 }, { "epoch": 0.19, "grad_norm": 1.3656471769207637, "learning_rate": 9.377956965379776e-06, "loss": 0.21, "step": 2596 }, { "epoch": 0.19, "grad_norm": 1.485108983419183, "learning_rate": 9.377397277768247e-06, "loss": 0.2394, "step": 2597 }, { "epoch": 0.19, "grad_norm": 1.5186262861840791, "learning_rate": 9.376837355195424e-06, "loss": 0.2379, "step": 2598 }, { "epoch": 0.19, "grad_norm": 1.3456319799277998, "learning_rate": 9.37627719769136e-06, "loss": 0.2136, "step": 2599 }, { "epoch": 0.19, "grad_norm": 1.4797305119689288, "learning_rate": 9.375716805286122e-06, "loss": 0.2223, "step": 2600 }, { "epoch": 0.19, "grad_norm": 1.3784933414834113, "learning_rate": 9.375156178009787e-06, "loss": 0.2112, "step": 2601 }, { "epoch": 0.19, "grad_norm": 1.362812963726172, "learning_rate": 9.374595315892453e-06, "loss": 0.2467, "step": 2602 }, { "epoch": 0.19, "grad_norm": 1.5282408902874565, "learning_rate": 9.37403421896422e-06, "loss": 0.2369, "step": 2603 }, { "epoch": 0.19, "grad_norm": 1.4662727727613907, "learning_rate": 9.373472887255209e-06, "loss": 0.2563, "step": 2604 }, { "epoch": 0.19, "grad_norm": 5.392805867502153, "learning_rate": 9.372911320795548e-06, "loss": 0.6633, "step": 2605 }, { "epoch": 0.19, "grad_norm": 7.411979198973745, "learning_rate": 9.372349519615378e-06, "loss": 0.6402, "step": 2606 }, { "epoch": 0.19, "grad_norm": 1.3296598634771524, "learning_rate": 9.371787483744858e-06, "loss": 0.2245, "step": 2607 }, { "epoch": 0.19, "grad_norm": 1.2074160163078957, "learning_rate": 9.371225213214155e-06, "loss": 0.1935, "step": 2608 }, { "epoch": 0.19, "grad_norm": 1.2345187064089502, "learning_rate": 9.370662708053446e-06, "loss": 0.2026, "step": 2609 }, { "epoch": 0.19, "grad_norm": 1.5670469034123782, "learning_rate": 9.370099968292928e-06, "loss": 0.3105, "step": 2610 }, { "epoch": 0.19, "grad_norm": 1.2736884433859044, "learning_rate": 9.369536993962805e-06, "loss": 0.1929, "step": 2611 }, { "epoch": 0.19, "grad_norm": 1.552111752667459, "learning_rate": 9.368973785093294e-06, "loss": 0.2218, "step": 2612 }, { "epoch": 0.19, "grad_norm": 1.2903047429914232, "learning_rate": 9.36841034171463e-06, "loss": 0.1725, "step": 2613 }, { "epoch": 0.19, "grad_norm": 5.316133816347541, "learning_rate": 9.367846663857052e-06, "loss": 0.6253, "step": 2614 }, { "epoch": 0.19, "grad_norm": 1.5343751127083, "learning_rate": 9.367282751550818e-06, "loss": 0.2547, "step": 2615 }, { "epoch": 0.19, "grad_norm": 1.3955394327221744, "learning_rate": 9.366718604826196e-06, "loss": 0.2411, "step": 2616 }, { "epoch": 0.19, "grad_norm": 1.310393300970099, "learning_rate": 9.366154223713468e-06, "loss": 0.1985, "step": 2617 }, { "epoch": 0.19, "grad_norm": 1.3973836522775696, "learning_rate": 9.365589608242925e-06, "loss": 0.2059, "step": 2618 }, { "epoch": 0.19, "grad_norm": 1.3238252565612803, "learning_rate": 9.365024758444876e-06, "loss": 0.211, "step": 2619 }, { "epoch": 0.19, "grad_norm": 5.608485287603873, "learning_rate": 9.364459674349642e-06, "loss": 0.7084, "step": 2620 }, { "epoch": 0.19, "grad_norm": 1.54372780776565, "learning_rate": 9.363894355987548e-06, "loss": 0.2312, "step": 2621 }, { "epoch": 0.19, "grad_norm": 1.4399688844193446, "learning_rate": 9.363328803388945e-06, "loss": 0.2341, "step": 2622 }, { "epoch": 0.19, "grad_norm": 1.555308097709079, "learning_rate": 9.362763016584185e-06, "loss": 0.2128, "step": 2623 }, { "epoch": 0.19, "grad_norm": 1.6054150228149366, "learning_rate": 9.362196995603638e-06, "loss": 0.2671, "step": 2624 }, { "epoch": 0.19, "grad_norm": 6.7441345999217495, "learning_rate": 9.361630740477687e-06, "loss": 0.748, "step": 2625 }, { "epoch": 0.19, "grad_norm": 1.3898886491048759, "learning_rate": 9.361064251236727e-06, "loss": 0.238, "step": 2626 }, { "epoch": 0.19, "grad_norm": 1.7681876860381698, "learning_rate": 9.360497527911161e-06, "loss": 0.2653, "step": 2627 }, { "epoch": 0.19, "grad_norm": 1.3830518890928463, "learning_rate": 9.359930570531412e-06, "loss": 0.1947, "step": 2628 }, { "epoch": 0.19, "grad_norm": 4.721124030620209, "learning_rate": 9.359363379127913e-06, "loss": 0.6674, "step": 2629 }, { "epoch": 0.19, "grad_norm": 1.3715638671589498, "learning_rate": 9.358795953731105e-06, "loss": 0.2118, "step": 2630 }, { "epoch": 0.19, "grad_norm": 5.300925773108132, "learning_rate": 9.358228294371447e-06, "loss": 0.574, "step": 2631 }, { "epoch": 0.19, "grad_norm": 1.470608078411285, "learning_rate": 9.357660401079407e-06, "loss": 0.1929, "step": 2632 }, { "epoch": 0.19, "grad_norm": 1.2712053947491184, "learning_rate": 9.357092273885471e-06, "loss": 0.2207, "step": 2633 }, { "epoch": 0.19, "grad_norm": 1.343966932686822, "learning_rate": 9.35652391282013e-06, "loss": 0.2021, "step": 2634 }, { "epoch": 0.19, "grad_norm": 1.4397501971606592, "learning_rate": 9.355955317913895e-06, "loss": 0.2335, "step": 2635 }, { "epoch": 0.19, "grad_norm": 1.3173720124582087, "learning_rate": 9.35538648919728e-06, "loss": 0.167, "step": 2636 }, { "epoch": 0.19, "grad_norm": 1.3250624739124315, "learning_rate": 9.354817426700823e-06, "loss": 0.1982, "step": 2637 }, { "epoch": 0.19, "grad_norm": 1.4240700305697667, "learning_rate": 9.35424813045507e-06, "loss": 0.2177, "step": 2638 }, { "epoch": 0.19, "grad_norm": 1.3009564031440501, "learning_rate": 9.353678600490574e-06, "loss": 0.2176, "step": 2639 }, { "epoch": 0.19, "grad_norm": 1.1809864114445163, "learning_rate": 9.353108836837907e-06, "loss": 0.1814, "step": 2640 }, { "epoch": 0.19, "grad_norm": 1.6284671722401147, "learning_rate": 9.35253883952765e-06, "loss": 0.2631, "step": 2641 }, { "epoch": 0.19, "grad_norm": 1.584967221164042, "learning_rate": 9.351968608590403e-06, "loss": 0.2671, "step": 2642 }, { "epoch": 0.19, "grad_norm": 1.6393939829993323, "learning_rate": 9.351398144056766e-06, "loss": 0.1816, "step": 2643 }, { "epoch": 0.19, "grad_norm": 1.2956942014534252, "learning_rate": 9.350827445957369e-06, "loss": 0.1869, "step": 2644 }, { "epoch": 0.19, "grad_norm": 1.3632702962493306, "learning_rate": 9.350256514322836e-06, "loss": 0.2217, "step": 2645 }, { "epoch": 0.19, "grad_norm": 5.605739316717559, "learning_rate": 9.349685349183817e-06, "loss": 0.5691, "step": 2646 }, { "epoch": 0.19, "grad_norm": 1.3461219548050736, "learning_rate": 9.349113950570969e-06, "loss": 0.1882, "step": 2647 }, { "epoch": 0.19, "grad_norm": 1.41257908296118, "learning_rate": 9.348542318514963e-06, "loss": 0.2178, "step": 2648 }, { "epoch": 0.19, "grad_norm": 5.683448129892471, "learning_rate": 9.34797045304648e-06, "loss": 0.7057, "step": 2649 }, { "epoch": 0.19, "grad_norm": 1.540053669782913, "learning_rate": 9.347398354196218e-06, "loss": 0.1963, "step": 2650 }, { "epoch": 0.19, "grad_norm": 1.3848077156393017, "learning_rate": 9.346826021994882e-06, "loss": 0.1656, "step": 2651 }, { "epoch": 0.19, "grad_norm": 1.7088747074613753, "learning_rate": 9.346253456473196e-06, "loss": 0.1953, "step": 2652 }, { "epoch": 0.19, "grad_norm": 1.356861984625284, "learning_rate": 9.345680657661889e-06, "loss": 0.2016, "step": 2653 }, { "epoch": 0.19, "grad_norm": 1.3702899839318514, "learning_rate": 9.34510762559171e-06, "loss": 0.1937, "step": 2654 }, { "epoch": 0.19, "grad_norm": 1.5373665955594087, "learning_rate": 9.344534360293416e-06, "loss": 0.2262, "step": 2655 }, { "epoch": 0.19, "grad_norm": 1.3508817893590057, "learning_rate": 9.343960861797776e-06, "loss": 0.2074, "step": 2656 }, { "epoch": 0.19, "grad_norm": 1.668990845109574, "learning_rate": 9.343387130135578e-06, "loss": 0.2209, "step": 2657 }, { "epoch": 0.19, "grad_norm": 1.2624700293295148, "learning_rate": 9.34281316533761e-06, "loss": 0.1696, "step": 2658 }, { "epoch": 0.19, "grad_norm": 1.371125337285241, "learning_rate": 9.342238967434687e-06, "loss": 0.2321, "step": 2659 }, { "epoch": 0.19, "grad_norm": 4.3154912091480195, "learning_rate": 9.341664536457626e-06, "loss": 0.6463, "step": 2660 }, { "epoch": 0.19, "grad_norm": 5.228365937805923, "learning_rate": 9.34108987243726e-06, "loss": 0.6574, "step": 2661 }, { "epoch": 0.19, "grad_norm": 1.5222433283254058, "learning_rate": 9.34051497540444e-06, "loss": 0.2486, "step": 2662 }, { "epoch": 0.19, "grad_norm": 1.186250406634907, "learning_rate": 9.339939845390017e-06, "loss": 0.1595, "step": 2663 }, { "epoch": 0.19, "grad_norm": 1.2586567404213749, "learning_rate": 9.339364482424865e-06, "loss": 0.1787, "step": 2664 }, { "epoch": 0.19, "grad_norm": 1.4708089042739207, "learning_rate": 9.338788886539867e-06, "loss": 0.2319, "step": 2665 }, { "epoch": 0.19, "grad_norm": 1.4473028547669557, "learning_rate": 9.33821305776592e-06, "loss": 0.2114, "step": 2666 }, { "epoch": 0.19, "grad_norm": 5.008082694813588, "learning_rate": 9.337636996133928e-06, "loss": 0.6501, "step": 2667 }, { "epoch": 0.19, "grad_norm": 1.3928793156903505, "learning_rate": 9.337060701674817e-06, "loss": 0.1992, "step": 2668 }, { "epoch": 0.19, "grad_norm": 7.127217601180208, "learning_rate": 9.336484174419517e-06, "loss": 0.7187, "step": 2669 }, { "epoch": 0.19, "grad_norm": 1.3621883220535445, "learning_rate": 9.335907414398974e-06, "loss": 0.2167, "step": 2670 }, { "epoch": 0.19, "grad_norm": 1.5112950897491666, "learning_rate": 9.335330421644149e-06, "loss": 0.268, "step": 2671 }, { "epoch": 0.19, "grad_norm": 6.6009733635284285, "learning_rate": 9.334753196186008e-06, "loss": 0.6594, "step": 2672 }, { "epoch": 0.19, "grad_norm": 1.4341769174052572, "learning_rate": 9.334175738055536e-06, "loss": 0.2215, "step": 2673 }, { "epoch": 0.19, "grad_norm": 1.537329764551531, "learning_rate": 9.333598047283732e-06, "loss": 0.2354, "step": 2674 }, { "epoch": 0.19, "grad_norm": 1.4103874617643597, "learning_rate": 9.333020123901598e-06, "loss": 0.2131, "step": 2675 }, { "epoch": 0.19, "grad_norm": 1.4867447840007968, "learning_rate": 9.332441967940161e-06, "loss": 0.2322, "step": 2676 }, { "epoch": 0.19, "grad_norm": 1.422852635270182, "learning_rate": 9.331863579430448e-06, "loss": 0.2232, "step": 2677 }, { "epoch": 0.19, "grad_norm": 1.5005484131513953, "learning_rate": 9.331284958403508e-06, "loss": 0.2768, "step": 2678 }, { "epoch": 0.19, "grad_norm": 1.6300623814846578, "learning_rate": 9.3307061048904e-06, "loss": 0.2521, "step": 2679 }, { "epoch": 0.19, "grad_norm": 1.3535516016391143, "learning_rate": 9.330127018922195e-06, "loss": 0.2567, "step": 2680 }, { "epoch": 0.19, "grad_norm": 9.134663939258145, "learning_rate": 9.32954770052997e-06, "loss": 0.4074, "step": 2681 }, { "epoch": 0.19, "grad_norm": 1.4924927869139148, "learning_rate": 9.328968149744828e-06, "loss": 0.2501, "step": 2682 }, { "epoch": 0.19, "grad_norm": 1.6153668058356008, "learning_rate": 9.328388366597873e-06, "loss": 0.2364, "step": 2683 }, { "epoch": 0.19, "grad_norm": 1.247401935794908, "learning_rate": 9.327808351120226e-06, "loss": 0.1843, "step": 2684 }, { "epoch": 0.19, "grad_norm": 1.2616115513981114, "learning_rate": 9.327228103343021e-06, "loss": 0.2139, "step": 2685 }, { "epoch": 0.19, "grad_norm": 1.5528563534981583, "learning_rate": 9.326647623297402e-06, "loss": 0.1905, "step": 2686 }, { "epoch": 0.19, "grad_norm": 1.3720780262954007, "learning_rate": 9.326066911014527e-06, "loss": 0.2023, "step": 2687 }, { "epoch": 0.19, "grad_norm": 1.3948592966233728, "learning_rate": 9.325485966525567e-06, "loss": 0.2033, "step": 2688 }, { "epoch": 0.19, "grad_norm": 1.4118822665499795, "learning_rate": 9.324904789861707e-06, "loss": 0.1953, "step": 2689 }, { "epoch": 0.19, "grad_norm": 1.457205728546168, "learning_rate": 9.324323381054137e-06, "loss": 0.2092, "step": 2690 }, { "epoch": 0.19, "grad_norm": 1.521618661494352, "learning_rate": 9.323741740134066e-06, "loss": 0.2268, "step": 2691 }, { "epoch": 0.19, "grad_norm": 1.5302387460297349, "learning_rate": 9.32315986713272e-06, "loss": 0.2808, "step": 2692 }, { "epoch": 0.19, "grad_norm": 1.4056407809955174, "learning_rate": 9.322577762081324e-06, "loss": 0.2364, "step": 2693 }, { "epoch": 0.19, "grad_norm": 1.5812229698886506, "learning_rate": 9.321995425011127e-06, "loss": 0.2411, "step": 2694 }, { "epoch": 0.19, "grad_norm": 1.4778914911432797, "learning_rate": 9.321412855953386e-06, "loss": 0.2259, "step": 2695 }, { "epoch": 0.19, "grad_norm": 4.5953610546342505, "learning_rate": 9.320830054939373e-06, "loss": 0.4609, "step": 2696 }, { "epoch": 0.19, "grad_norm": 1.3463001817745606, "learning_rate": 9.320247022000366e-06, "loss": 0.1609, "step": 2697 }, { "epoch": 0.19, "grad_norm": 1.4620690972729318, "learning_rate": 9.319663757167663e-06, "loss": 0.2232, "step": 2698 }, { "epoch": 0.19, "grad_norm": 1.571207864020747, "learning_rate": 9.319080260472571e-06, "loss": 0.2458, "step": 2699 }, { "epoch": 0.19, "grad_norm": 7.594287483968256, "learning_rate": 9.318496531946411e-06, "loss": 0.5159, "step": 2700 }, { "epoch": 0.19, "grad_norm": 1.5639165793111875, "learning_rate": 9.31791257162051e-06, "loss": 0.2586, "step": 2701 }, { "epoch": 0.19, "grad_norm": 1.6678175206244428, "learning_rate": 9.317328379526219e-06, "loss": 0.2199, "step": 2702 }, { "epoch": 0.19, "grad_norm": 1.411568760918074, "learning_rate": 9.316743955694892e-06, "loss": 0.1969, "step": 2703 }, { "epoch": 0.19, "grad_norm": 1.6006313297847388, "learning_rate": 9.316159300157899e-06, "loss": 0.2492, "step": 2704 }, { "epoch": 0.19, "grad_norm": 1.439379058915103, "learning_rate": 9.31557441294662e-06, "loss": 0.2225, "step": 2705 }, { "epoch": 0.19, "grad_norm": 1.375470478177647, "learning_rate": 9.314989294092453e-06, "loss": 0.2047, "step": 2706 }, { "epoch": 0.19, "grad_norm": 1.432408221958686, "learning_rate": 9.314403943626804e-06, "loss": 0.246, "step": 2707 }, { "epoch": 0.19, "grad_norm": 1.424167195985073, "learning_rate": 9.31381836158109e-06, "loss": 0.1968, "step": 2708 }, { "epoch": 0.19, "grad_norm": 1.5602232022364202, "learning_rate": 9.313232547986744e-06, "loss": 0.2459, "step": 2709 }, { "epoch": 0.19, "grad_norm": 6.430354091928059, "learning_rate": 9.312646502875213e-06, "loss": 0.7736, "step": 2710 }, { "epoch": 0.19, "grad_norm": 1.5224049519185867, "learning_rate": 9.312060226277948e-06, "loss": 0.2555, "step": 2711 }, { "epoch": 0.19, "grad_norm": 1.5217102315763986, "learning_rate": 9.311473718226422e-06, "loss": 0.2181, "step": 2712 }, { "epoch": 0.19, "grad_norm": 1.3757371671563503, "learning_rate": 9.310886978752116e-06, "loss": 0.247, "step": 2713 }, { "epoch": 0.19, "grad_norm": 1.4554764511425178, "learning_rate": 9.310300007886523e-06, "loss": 0.2572, "step": 2714 }, { "epoch": 0.19, "grad_norm": 1.4223498539004633, "learning_rate": 9.30971280566115e-06, "loss": 0.257, "step": 2715 }, { "epoch": 0.19, "grad_norm": 1.4603009274575902, "learning_rate": 9.309125372107514e-06, "loss": 0.198, "step": 2716 }, { "epoch": 0.19, "grad_norm": 1.3885932419394922, "learning_rate": 9.308537707257148e-06, "loss": 0.2471, "step": 2717 }, { "epoch": 0.19, "grad_norm": 1.3665536893236059, "learning_rate": 9.307949811141595e-06, "loss": 0.1904, "step": 2718 }, { "epoch": 0.19, "grad_norm": 1.7724286761669077, "learning_rate": 9.30736168379241e-06, "loss": 0.2754, "step": 2719 }, { "epoch": 0.19, "grad_norm": 4.468305458607527, "learning_rate": 9.306773325241161e-06, "loss": 0.4609, "step": 2720 }, { "epoch": 0.19, "grad_norm": 1.5025273147906886, "learning_rate": 9.306184735519432e-06, "loss": 0.2285, "step": 2721 }, { "epoch": 0.19, "grad_norm": 5.866954256090008, "learning_rate": 9.305595914658813e-06, "loss": 0.5086, "step": 2722 }, { "epoch": 0.19, "grad_norm": 1.4961517640542386, "learning_rate": 9.30500686269091e-06, "loss": 0.2187, "step": 2723 }, { "epoch": 0.19, "grad_norm": 1.4009562373951943, "learning_rate": 9.304417579647343e-06, "loss": 0.2207, "step": 2724 }, { "epoch": 0.19, "grad_norm": 1.52417488568084, "learning_rate": 9.303828065559739e-06, "loss": 0.2299, "step": 2725 }, { "epoch": 0.19, "grad_norm": 1.3922647711060259, "learning_rate": 9.303238320459743e-06, "loss": 0.2243, "step": 2726 }, { "epoch": 0.2, "grad_norm": 1.4158905057994566, "learning_rate": 9.302648344379011e-06, "loss": 0.2448, "step": 2727 }, { "epoch": 0.2, "grad_norm": 1.4007514263350775, "learning_rate": 9.302058137349208e-06, "loss": 0.224, "step": 2728 }, { "epoch": 0.2, "grad_norm": 1.4415310237479846, "learning_rate": 9.301467699402018e-06, "loss": 0.1995, "step": 2729 }, { "epoch": 0.2, "grad_norm": 1.547873263596923, "learning_rate": 9.300877030569129e-06, "loss": 0.2651, "step": 2730 }, { "epoch": 0.2, "grad_norm": 1.3112016602667436, "learning_rate": 9.300286130882247e-06, "loss": 0.2253, "step": 2731 }, { "epoch": 0.2, "grad_norm": 1.3200207159178472, "learning_rate": 9.29969500037309e-06, "loss": 0.1865, "step": 2732 }, { "epoch": 0.2, "grad_norm": 1.484029897637912, "learning_rate": 9.299103639073388e-06, "loss": 0.1804, "step": 2733 }, { "epoch": 0.2, "grad_norm": 1.3514755449299012, "learning_rate": 9.298512047014881e-06, "loss": 0.2162, "step": 2734 }, { "epoch": 0.2, "grad_norm": 1.3133749155000263, "learning_rate": 9.297920224229324e-06, "loss": 0.1811, "step": 2735 }, { "epoch": 0.2, "grad_norm": 1.6680523931386122, "learning_rate": 9.297328170748484e-06, "loss": 0.2187, "step": 2736 }, { "epoch": 0.2, "grad_norm": 1.3641498118584952, "learning_rate": 9.29673588660414e-06, "loss": 0.1825, "step": 2737 }, { "epoch": 0.2, "grad_norm": 1.3983912375089784, "learning_rate": 9.296143371828085e-06, "loss": 0.1868, "step": 2738 }, { "epoch": 0.2, "grad_norm": 1.4709086835658272, "learning_rate": 9.295550626452121e-06, "loss": 0.2417, "step": 2739 }, { "epoch": 0.2, "grad_norm": 1.4447720380731979, "learning_rate": 9.294957650508065e-06, "loss": 0.2407, "step": 2740 }, { "epoch": 0.2, "grad_norm": 1.351024778018581, "learning_rate": 9.294364444027746e-06, "loss": 0.2056, "step": 2741 }, { "epoch": 0.2, "grad_norm": 1.4556672287236467, "learning_rate": 9.293771007043002e-06, "loss": 0.2441, "step": 2742 }, { "epoch": 0.2, "grad_norm": 1.4059252715979254, "learning_rate": 9.293177339585691e-06, "loss": 0.2102, "step": 2743 }, { "epoch": 0.2, "grad_norm": 1.393370760501809, "learning_rate": 9.292583441687675e-06, "loss": 0.211, "step": 2744 }, { "epoch": 0.2, "grad_norm": 1.3850214815757624, "learning_rate": 9.291989313380833e-06, "loss": 0.2089, "step": 2745 }, { "epoch": 0.2, "grad_norm": 1.5452408158663233, "learning_rate": 9.291394954697058e-06, "loss": 0.2431, "step": 2746 }, { "epoch": 0.2, "grad_norm": 1.54379478589557, "learning_rate": 9.290800365668246e-06, "loss": 0.2444, "step": 2747 }, { "epoch": 0.2, "grad_norm": 1.4235216445906556, "learning_rate": 9.29020554632632e-06, "loss": 0.2208, "step": 2748 }, { "epoch": 0.2, "grad_norm": 1.5615086986420386, "learning_rate": 9.289610496703206e-06, "loss": 0.1903, "step": 2749 }, { "epoch": 0.2, "grad_norm": 4.980866950673275, "learning_rate": 9.289015216830839e-06, "loss": 0.4589, "step": 2750 }, { "epoch": 0.2, "grad_norm": 1.4709741365835498, "learning_rate": 9.288419706741177e-06, "loss": 0.2732, "step": 2751 }, { "epoch": 0.2, "grad_norm": 1.4944737467566327, "learning_rate": 9.28782396646618e-06, "loss": 0.247, "step": 2752 }, { "epoch": 0.2, "grad_norm": 1.2533161569754208, "learning_rate": 9.287227996037829e-06, "loss": 0.2166, "step": 2753 }, { "epoch": 0.2, "grad_norm": 1.5398154363558294, "learning_rate": 9.28663179548811e-06, "loss": 0.2799, "step": 2754 }, { "epoch": 0.2, "grad_norm": 1.366561861880614, "learning_rate": 9.286035364849027e-06, "loss": 0.2144, "step": 2755 }, { "epoch": 0.2, "grad_norm": 1.302162884394884, "learning_rate": 9.285438704152594e-06, "loss": 0.1653, "step": 2756 }, { "epoch": 0.2, "grad_norm": 1.3622804246308315, "learning_rate": 9.284841813430834e-06, "loss": 0.212, "step": 2757 }, { "epoch": 0.2, "grad_norm": 7.824199291912395, "learning_rate": 9.28424469271579e-06, "loss": 0.6597, "step": 2758 }, { "epoch": 0.2, "grad_norm": 3.8210438744484754, "learning_rate": 9.28364734203951e-06, "loss": 0.6027, "step": 2759 }, { "epoch": 0.2, "grad_norm": 1.3327106212448299, "learning_rate": 9.283049761434059e-06, "loss": 0.2064, "step": 2760 }, { "epoch": 0.2, "grad_norm": 1.564747013139255, "learning_rate": 9.282451950931515e-06, "loss": 0.2123, "step": 2761 }, { "epoch": 0.2, "grad_norm": 1.4655292731330085, "learning_rate": 9.28185391056396e-06, "loss": 0.2672, "step": 2762 }, { "epoch": 0.2, "grad_norm": 4.734441567397226, "learning_rate": 9.2812556403635e-06, "loss": 0.6468, "step": 2763 }, { "epoch": 0.2, "grad_norm": 5.035917829056862, "learning_rate": 9.280657140362246e-06, "loss": 0.7761, "step": 2764 }, { "epoch": 0.2, "grad_norm": 1.3411146166064634, "learning_rate": 9.28005841059232e-06, "loss": 0.1654, "step": 2765 }, { "epoch": 0.2, "grad_norm": 1.130826316965804, "learning_rate": 9.279459451085866e-06, "loss": 0.1416, "step": 2766 }, { "epoch": 0.2, "grad_norm": 1.4208572742343444, "learning_rate": 9.278860261875029e-06, "loss": 0.252, "step": 2767 }, { "epoch": 0.2, "grad_norm": 13.110058786456102, "learning_rate": 9.278260842991971e-06, "loss": 0.5862, "step": 2768 }, { "epoch": 0.2, "grad_norm": 5.109746481413057, "learning_rate": 9.277661194468867e-06, "loss": 0.5593, "step": 2769 }, { "epoch": 0.2, "grad_norm": 1.267914966286527, "learning_rate": 9.277061316337908e-06, "loss": 0.2155, "step": 2770 }, { "epoch": 0.2, "grad_norm": 1.5116837340734872, "learning_rate": 9.276461208631286e-06, "loss": 0.2528, "step": 2771 }, { "epoch": 0.2, "grad_norm": 1.3129335946906224, "learning_rate": 9.275860871381217e-06, "loss": 0.1985, "step": 2772 }, { "epoch": 0.2, "grad_norm": 1.7569515488462752, "learning_rate": 9.275260304619922e-06, "loss": 0.2886, "step": 2773 }, { "epoch": 0.2, "grad_norm": 1.5389205055054571, "learning_rate": 9.27465950837964e-06, "loss": 0.2095, "step": 2774 }, { "epoch": 0.2, "grad_norm": 1.3729770457397474, "learning_rate": 9.274058482692618e-06, "loss": 0.1806, "step": 2775 }, { "epoch": 0.2, "grad_norm": 1.457133959654698, "learning_rate": 9.273457227591117e-06, "loss": 0.1955, "step": 2776 }, { "epoch": 0.2, "grad_norm": 1.3837547158127559, "learning_rate": 9.272855743107408e-06, "loss": 0.2296, "step": 2777 }, { "epoch": 0.2, "grad_norm": 1.4770409181636541, "learning_rate": 9.272254029273779e-06, "loss": 0.246, "step": 2778 }, { "epoch": 0.2, "grad_norm": 1.50953406094558, "learning_rate": 9.271652086122527e-06, "loss": 0.1948, "step": 2779 }, { "epoch": 0.2, "grad_norm": 1.4094134858899505, "learning_rate": 9.27104991368596e-06, "loss": 0.206, "step": 2780 }, { "epoch": 0.2, "grad_norm": 5.743157530124243, "learning_rate": 9.270447511996402e-06, "loss": 0.5884, "step": 2781 }, { "epoch": 0.2, "grad_norm": 1.4585970236953467, "learning_rate": 9.269844881086187e-06, "loss": 0.2232, "step": 2782 }, { "epoch": 0.2, "grad_norm": 1.2420764128207955, "learning_rate": 9.269242020987663e-06, "loss": 0.1763, "step": 2783 }, { "epoch": 0.2, "grad_norm": 1.6389088435882875, "learning_rate": 9.268638931733188e-06, "loss": 0.2664, "step": 2784 }, { "epoch": 0.2, "grad_norm": 1.4173813235755677, "learning_rate": 9.268035613355133e-06, "loss": 0.2072, "step": 2785 }, { "epoch": 0.2, "grad_norm": 1.5163241281216688, "learning_rate": 9.267432065885881e-06, "loss": 0.2257, "step": 2786 }, { "epoch": 0.2, "grad_norm": 5.913317759367872, "learning_rate": 9.266828289357831e-06, "loss": 0.6326, "step": 2787 }, { "epoch": 0.2, "grad_norm": 1.3921344945068963, "learning_rate": 9.266224283803388e-06, "loss": 0.2147, "step": 2788 }, { "epoch": 0.2, "grad_norm": 1.3765929934884522, "learning_rate": 9.265620049254977e-06, "loss": 0.1904, "step": 2789 }, { "epoch": 0.2, "grad_norm": 1.4151804306094544, "learning_rate": 9.265015585745028e-06, "loss": 0.1913, "step": 2790 }, { "epoch": 0.2, "grad_norm": 5.730188153764662, "learning_rate": 9.264410893305984e-06, "loss": 0.4471, "step": 2791 }, { "epoch": 0.2, "grad_norm": 1.4275896451456935, "learning_rate": 9.263805971970305e-06, "loss": 0.2021, "step": 2792 }, { "epoch": 0.2, "grad_norm": 9.400286947194806, "learning_rate": 9.263200821770462e-06, "loss": 0.5824, "step": 2793 }, { "epoch": 0.2, "grad_norm": 7.525565134603284, "learning_rate": 9.262595442738936e-06, "loss": 0.6484, "step": 2794 }, { "epoch": 0.2, "grad_norm": 1.4376229811971102, "learning_rate": 9.26198983490822e-06, "loss": 0.2188, "step": 2795 }, { "epoch": 0.2, "grad_norm": 1.5562788641439695, "learning_rate": 9.261383998310822e-06, "loss": 0.2453, "step": 2796 }, { "epoch": 0.2, "grad_norm": 1.4095990593929484, "learning_rate": 9.26077793297926e-06, "loss": 0.2341, "step": 2797 }, { "epoch": 0.2, "grad_norm": 1.3918150164673484, "learning_rate": 9.260171638946065e-06, "loss": 0.207, "step": 2798 }, { "epoch": 0.2, "grad_norm": 1.383177749722103, "learning_rate": 9.259565116243782e-06, "loss": 0.2418, "step": 2799 }, { "epoch": 0.2, "grad_norm": 1.5421088406739354, "learning_rate": 9.258958364904966e-06, "loss": 0.2294, "step": 2800 }, { "epoch": 0.2, "grad_norm": 1.4101165407802647, "learning_rate": 9.258351384962187e-06, "loss": 0.1934, "step": 2801 }, { "epoch": 0.2, "grad_norm": 6.808644490871377, "learning_rate": 9.25774417644802e-06, "loss": 0.5176, "step": 2802 }, { "epoch": 0.2, "grad_norm": 1.2975173269739633, "learning_rate": 9.257136739395063e-06, "loss": 0.1771, "step": 2803 }, { "epoch": 0.2, "grad_norm": 1.301827000773767, "learning_rate": 9.256529073835917e-06, "loss": 0.2105, "step": 2804 }, { "epoch": 0.2, "grad_norm": 1.5622858386793308, "learning_rate": 9.255921179803202e-06, "loss": 0.241, "step": 2805 }, { "epoch": 0.2, "grad_norm": 1.4943515687214168, "learning_rate": 9.255313057329544e-06, "loss": 0.2104, "step": 2806 }, { "epoch": 0.2, "grad_norm": 1.4273896313848267, "learning_rate": 9.254704706447588e-06, "loss": 0.2469, "step": 2807 }, { "epoch": 0.2, "grad_norm": 1.3350369604624892, "learning_rate": 9.254096127189987e-06, "loss": 0.1907, "step": 2808 }, { "epoch": 0.2, "grad_norm": 6.22600804276605, "learning_rate": 9.253487319589405e-06, "loss": 0.6303, "step": 2809 }, { "epoch": 0.2, "grad_norm": 1.199166773009833, "learning_rate": 9.252878283678524e-06, "loss": 0.1561, "step": 2810 }, { "epoch": 0.2, "grad_norm": 1.4541966938026452, "learning_rate": 9.25226901949003e-06, "loss": 0.2152, "step": 2811 }, { "epoch": 0.2, "grad_norm": 1.418355352415606, "learning_rate": 9.251659527056629e-06, "loss": 0.2227, "step": 2812 }, { "epoch": 0.2, "grad_norm": 1.3098082635392974, "learning_rate": 9.251049806411037e-06, "loss": 0.2175, "step": 2813 }, { "epoch": 0.2, "grad_norm": 1.4981322906904728, "learning_rate": 9.250439857585977e-06, "loss": 0.2571, "step": 2814 }, { "epoch": 0.2, "grad_norm": 1.4681854896439621, "learning_rate": 9.249829680614195e-06, "loss": 0.2471, "step": 2815 }, { "epoch": 0.2, "grad_norm": 1.3584326514908769, "learning_rate": 9.249219275528438e-06, "loss": 0.1892, "step": 2816 }, { "epoch": 0.2, "grad_norm": 1.43939624957332, "learning_rate": 9.248608642361471e-06, "loss": 0.2305, "step": 2817 }, { "epoch": 0.2, "grad_norm": 1.6473957806283077, "learning_rate": 9.247997781146071e-06, "loss": 0.2225, "step": 2818 }, { "epoch": 0.2, "grad_norm": 1.3492870624845066, "learning_rate": 9.247386691915027e-06, "loss": 0.168, "step": 2819 }, { "epoch": 0.2, "grad_norm": 1.4929351976082126, "learning_rate": 9.246775374701139e-06, "loss": 0.2252, "step": 2820 }, { "epoch": 0.2, "grad_norm": 1.415205746908608, "learning_rate": 9.246163829537221e-06, "loss": 0.2468, "step": 2821 }, { "epoch": 0.2, "grad_norm": 1.5294410991062586, "learning_rate": 9.245552056456098e-06, "loss": 0.22, "step": 2822 }, { "epoch": 0.2, "grad_norm": 1.5060850201811555, "learning_rate": 9.244940055490605e-06, "loss": 0.2207, "step": 2823 }, { "epoch": 0.2, "grad_norm": 1.5442673706591972, "learning_rate": 9.244327826673597e-06, "loss": 0.2992, "step": 2824 }, { "epoch": 0.2, "grad_norm": 1.3735292757117905, "learning_rate": 9.24371537003793e-06, "loss": 0.2383, "step": 2825 }, { "epoch": 0.2, "grad_norm": 1.4279895383303036, "learning_rate": 9.243102685616486e-06, "loss": 0.1851, "step": 2826 }, { "epoch": 0.2, "grad_norm": 1.3736542694389804, "learning_rate": 9.242489773442142e-06, "loss": 0.1692, "step": 2827 }, { "epoch": 0.2, "grad_norm": 1.4050323736809658, "learning_rate": 9.241876633547803e-06, "loss": 0.2119, "step": 2828 }, { "epoch": 0.2, "grad_norm": 1.6507496021649768, "learning_rate": 9.24126326596638e-06, "loss": 0.2093, "step": 2829 }, { "epoch": 0.2, "grad_norm": 1.3120092849258171, "learning_rate": 9.240649670730793e-06, "loss": 0.1951, "step": 2830 }, { "epoch": 0.2, "grad_norm": 6.484924776215179, "learning_rate": 9.240035847873979e-06, "loss": 0.7142, "step": 2831 }, { "epoch": 0.2, "grad_norm": 4.982837490805566, "learning_rate": 9.239421797428884e-06, "loss": 0.6435, "step": 2832 }, { "epoch": 0.2, "grad_norm": 1.5965601464560937, "learning_rate": 9.238807519428472e-06, "loss": 0.2262, "step": 2833 }, { "epoch": 0.2, "grad_norm": 7.295069840397293, "learning_rate": 9.238193013905713e-06, "loss": 0.6014, "step": 2834 }, { "epoch": 0.2, "grad_norm": 1.4283721998912922, "learning_rate": 9.237578280893587e-06, "loss": 0.2141, "step": 2835 }, { "epoch": 0.2, "grad_norm": 1.526082744237695, "learning_rate": 9.236963320425097e-06, "loss": 0.2357, "step": 2836 }, { "epoch": 0.2, "grad_norm": 4.189494667861111, "learning_rate": 9.236348132533246e-06, "loss": 0.5008, "step": 2837 }, { "epoch": 0.2, "grad_norm": 1.4055438900908386, "learning_rate": 9.235732717251058e-06, "loss": 0.2041, "step": 2838 }, { "epoch": 0.2, "grad_norm": 1.460036773370276, "learning_rate": 9.235117074611565e-06, "loss": 0.1947, "step": 2839 }, { "epoch": 0.2, "grad_norm": 1.4335338202480654, "learning_rate": 9.234501204647814e-06, "loss": 0.2658, "step": 2840 }, { "epoch": 0.2, "grad_norm": 1.4684474814845387, "learning_rate": 9.233885107392862e-06, "loss": 0.2263, "step": 2841 }, { "epoch": 0.2, "grad_norm": 6.151358695531919, "learning_rate": 9.233268782879776e-06, "loss": 0.6527, "step": 2842 }, { "epoch": 0.2, "grad_norm": 1.4589339251718731, "learning_rate": 9.232652231141638e-06, "loss": 0.1888, "step": 2843 }, { "epoch": 0.2, "grad_norm": 1.5247053127117112, "learning_rate": 9.232035452211546e-06, "loss": 0.2671, "step": 2844 }, { "epoch": 0.2, "grad_norm": 1.5587615120413763, "learning_rate": 9.231418446122604e-06, "loss": 0.2018, "step": 2845 }, { "epoch": 0.2, "grad_norm": 1.2545277438154867, "learning_rate": 9.230801212907929e-06, "loss": 0.2125, "step": 2846 }, { "epoch": 0.2, "grad_norm": 5.124026859415719, "learning_rate": 9.230183752600654e-06, "loss": 0.5483, "step": 2847 }, { "epoch": 0.2, "grad_norm": 1.3049019603588219, "learning_rate": 9.229566065233921e-06, "loss": 0.1826, "step": 2848 }, { "epoch": 0.2, "grad_norm": 1.4311304964041343, "learning_rate": 9.228948150840885e-06, "loss": 0.2233, "step": 2849 }, { "epoch": 0.2, "grad_norm": 1.4595755941107131, "learning_rate": 9.228330009454712e-06, "loss": 0.2417, "step": 2850 }, { "epoch": 0.2, "grad_norm": 1.3195441124832552, "learning_rate": 9.227711641108584e-06, "loss": 0.1847, "step": 2851 }, { "epoch": 0.2, "grad_norm": 1.2680987731607882, "learning_rate": 9.22709304583569e-06, "loss": 0.1779, "step": 2852 }, { "epoch": 0.2, "grad_norm": 1.406926919757446, "learning_rate": 9.226474223669232e-06, "loss": 0.2046, "step": 2853 }, { "epoch": 0.2, "grad_norm": 1.4824867011955372, "learning_rate": 9.225855174642432e-06, "loss": 0.2482, "step": 2854 }, { "epoch": 0.2, "grad_norm": 1.3195907163082892, "learning_rate": 9.225235898788514e-06, "loss": 0.1813, "step": 2855 }, { "epoch": 0.2, "grad_norm": 1.423444453793779, "learning_rate": 9.224616396140718e-06, "loss": 0.2536, "step": 2856 }, { "epoch": 0.2, "grad_norm": 1.490820136277582, "learning_rate": 9.223996666732297e-06, "loss": 0.225, "step": 2857 }, { "epoch": 0.2, "grad_norm": 1.3734782446205307, "learning_rate": 9.223376710596517e-06, "loss": 0.1677, "step": 2858 }, { "epoch": 0.2, "grad_norm": 1.320047194461662, "learning_rate": 9.222756527766655e-06, "loss": 0.2177, "step": 2859 }, { "epoch": 0.2, "grad_norm": 5.724840604611111, "learning_rate": 9.222136118275996e-06, "loss": 0.565, "step": 2860 }, { "epoch": 0.2, "grad_norm": 1.5647166395838488, "learning_rate": 9.221515482157846e-06, "loss": 0.2416, "step": 2861 }, { "epoch": 0.2, "grad_norm": 1.5138161696762844, "learning_rate": 9.220894619445515e-06, "loss": 0.219, "step": 2862 }, { "epoch": 0.2, "grad_norm": 1.6641828045243603, "learning_rate": 9.22027353017233e-06, "loss": 0.2114, "step": 2863 }, { "epoch": 0.2, "grad_norm": 1.5068638410549247, "learning_rate": 9.219652214371628e-06, "loss": 0.2412, "step": 2864 }, { "epoch": 0.2, "grad_norm": 1.4088980677678526, "learning_rate": 9.219030672076759e-06, "loss": 0.1803, "step": 2865 }, { "epoch": 0.21, "grad_norm": 1.445658100962736, "learning_rate": 9.218408903321083e-06, "loss": 0.2682, "step": 2866 }, { "epoch": 0.21, "grad_norm": 1.2967510704170178, "learning_rate": 9.217786908137977e-06, "loss": 0.2097, "step": 2867 }, { "epoch": 0.21, "grad_norm": 7.2703517903031045, "learning_rate": 9.21716468656083e-06, "loss": 0.6153, "step": 2868 }, { "epoch": 0.21, "grad_norm": 1.4876279649956823, "learning_rate": 9.216542238623032e-06, "loss": 0.1947, "step": 2869 }, { "epoch": 0.21, "grad_norm": 1.4269034370104228, "learning_rate": 9.215919564358e-06, "loss": 0.218, "step": 2870 }, { "epoch": 0.21, "grad_norm": 1.305710200853404, "learning_rate": 9.215296663799154e-06, "loss": 0.2031, "step": 2871 }, { "epoch": 0.21, "grad_norm": 7.739863981747492, "learning_rate": 9.21467353697993e-06, "loss": 0.5491, "step": 2872 }, { "epoch": 0.21, "grad_norm": 1.5706629578445788, "learning_rate": 9.214050183933774e-06, "loss": 0.2676, "step": 2873 }, { "epoch": 0.21, "grad_norm": 5.754683199847275, "learning_rate": 9.213426604694144e-06, "loss": 0.6966, "step": 2874 }, { "epoch": 0.21, "grad_norm": 1.487998160727917, "learning_rate": 9.212802799294516e-06, "loss": 0.2161, "step": 2875 }, { "epoch": 0.21, "grad_norm": 1.294755276412573, "learning_rate": 9.21217876776837e-06, "loss": 0.2157, "step": 2876 }, { "epoch": 0.21, "grad_norm": 1.4278918743110487, "learning_rate": 9.2115545101492e-06, "loss": 0.2308, "step": 2877 }, { "epoch": 0.21, "grad_norm": 1.5712930003039205, "learning_rate": 9.210930026470515e-06, "loss": 0.2741, "step": 2878 }, { "epoch": 0.21, "grad_norm": 1.1750212499757744, "learning_rate": 9.210305316765837e-06, "loss": 0.1516, "step": 2879 }, { "epoch": 0.21, "grad_norm": 1.5418971011845954, "learning_rate": 9.209680381068698e-06, "loss": 0.2803, "step": 2880 }, { "epoch": 0.21, "grad_norm": 1.5855413746728415, "learning_rate": 9.209055219412636e-06, "loss": 0.2345, "step": 2881 }, { "epoch": 0.21, "grad_norm": 1.4665870339453904, "learning_rate": 9.208429831831216e-06, "loss": 0.2554, "step": 2882 }, { "epoch": 0.21, "grad_norm": 6.596048384415915, "learning_rate": 9.207804218357998e-06, "loss": 0.5125, "step": 2883 }, { "epoch": 0.21, "grad_norm": 1.3744818120320499, "learning_rate": 9.207178379026568e-06, "loss": 0.2148, "step": 2884 }, { "epoch": 0.21, "grad_norm": 1.391466740522246, "learning_rate": 9.206552313870518e-06, "loss": 0.2165, "step": 2885 }, { "epoch": 0.21, "grad_norm": 1.2245404745787873, "learning_rate": 9.20592602292345e-06, "loss": 0.2041, "step": 2886 }, { "epoch": 0.21, "grad_norm": 1.3303813780140332, "learning_rate": 9.205299506218984e-06, "loss": 0.1737, "step": 2887 }, { "epoch": 0.21, "grad_norm": 8.491862920075917, "learning_rate": 9.204672763790746e-06, "loss": 0.8073, "step": 2888 }, { "epoch": 0.21, "grad_norm": 1.324213034678787, "learning_rate": 9.204045795672378e-06, "loss": 0.2126, "step": 2889 }, { "epoch": 0.21, "grad_norm": 1.4647247173433193, "learning_rate": 9.203418601897533e-06, "loss": 0.2243, "step": 2890 }, { "epoch": 0.21, "grad_norm": 1.4114448082721172, "learning_rate": 9.202791182499877e-06, "loss": 0.2025, "step": 2891 }, { "epoch": 0.21, "grad_norm": 1.4730769243053359, "learning_rate": 9.202163537513088e-06, "loss": 0.2568, "step": 2892 }, { "epoch": 0.21, "grad_norm": 1.510226858794718, "learning_rate": 9.201535666970853e-06, "loss": 0.2336, "step": 2893 }, { "epoch": 0.21, "grad_norm": 1.6217562039263327, "learning_rate": 9.200907570906879e-06, "loss": 0.2609, "step": 2894 }, { "epoch": 0.21, "grad_norm": 4.187671247670847, "learning_rate": 9.200279249354871e-06, "loss": 0.5835, "step": 2895 }, { "epoch": 0.21, "grad_norm": 1.468417463546273, "learning_rate": 9.199650702348564e-06, "loss": 0.1782, "step": 2896 }, { "epoch": 0.21, "grad_norm": 1.494521617811187, "learning_rate": 9.19902192992169e-06, "loss": 0.2289, "step": 2897 }, { "epoch": 0.21, "grad_norm": 1.3300920353043386, "learning_rate": 9.198392932108e-06, "loss": 0.2353, "step": 2898 }, { "epoch": 0.21, "grad_norm": 1.3033020956747912, "learning_rate": 9.197763708941256e-06, "loss": 0.2116, "step": 2899 }, { "epoch": 0.21, "grad_norm": 1.2522625146157715, "learning_rate": 9.197134260455233e-06, "loss": 0.1925, "step": 2900 }, { "epoch": 0.21, "grad_norm": 1.3614017231829951, "learning_rate": 9.196504586683719e-06, "loss": 0.249, "step": 2901 }, { "epoch": 0.21, "grad_norm": 1.6164602045948744, "learning_rate": 9.195874687660512e-06, "loss": 0.2349, "step": 2902 }, { "epoch": 0.21, "grad_norm": 1.3772405635756395, "learning_rate": 9.195244563419418e-06, "loss": 0.2351, "step": 2903 }, { "epoch": 0.21, "grad_norm": 1.205161490884727, "learning_rate": 9.194614213994263e-06, "loss": 0.1672, "step": 2904 }, { "epoch": 0.21, "grad_norm": 1.3001416974410283, "learning_rate": 9.193983639418882e-06, "loss": 0.2357, "step": 2905 }, { "epoch": 0.21, "grad_norm": 3.8481986565466895, "learning_rate": 9.193352839727122e-06, "loss": 0.5973, "step": 2906 }, { "epoch": 0.21, "grad_norm": 1.347811544501337, "learning_rate": 9.192721814952838e-06, "loss": 0.2281, "step": 2907 }, { "epoch": 0.21, "grad_norm": 1.6103013379328681, "learning_rate": 9.192090565129907e-06, "loss": 0.2011, "step": 2908 }, { "epoch": 0.21, "grad_norm": 1.5320720317510643, "learning_rate": 9.191459090292207e-06, "loss": 0.265, "step": 2909 }, { "epoch": 0.21, "grad_norm": 1.524764620528247, "learning_rate": 9.190827390473636e-06, "loss": 0.3148, "step": 2910 }, { "epoch": 0.21, "grad_norm": 1.4294818226140642, "learning_rate": 9.190195465708098e-06, "loss": 0.2371, "step": 2911 }, { "epoch": 0.21, "grad_norm": 1.305243088148744, "learning_rate": 9.189563316029515e-06, "loss": 0.1709, "step": 2912 }, { "epoch": 0.21, "grad_norm": 1.3934715579723185, "learning_rate": 9.188930941471818e-06, "loss": 0.1621, "step": 2913 }, { "epoch": 0.21, "grad_norm": 1.2784602542474226, "learning_rate": 9.18829834206895e-06, "loss": 0.2014, "step": 2914 }, { "epoch": 0.21, "grad_norm": 1.449436754164341, "learning_rate": 9.187665517854866e-06, "loss": 0.227, "step": 2915 }, { "epoch": 0.21, "grad_norm": 1.396498785691227, "learning_rate": 9.187032468863532e-06, "loss": 0.2509, "step": 2916 }, { "epoch": 0.21, "grad_norm": 1.4228358880798186, "learning_rate": 9.186399195128932e-06, "loss": 0.2181, "step": 2917 }, { "epoch": 0.21, "grad_norm": 1.4272453857546465, "learning_rate": 9.185765696685053e-06, "loss": 0.2256, "step": 2918 }, { "epoch": 0.21, "grad_norm": 1.4407259463789588, "learning_rate": 9.185131973565901e-06, "loss": 0.1922, "step": 2919 }, { "epoch": 0.21, "grad_norm": 1.4248450625479225, "learning_rate": 9.184498025805493e-06, "loss": 0.2231, "step": 2920 }, { "epoch": 0.21, "grad_norm": 1.497140119027999, "learning_rate": 9.183863853437854e-06, "loss": 0.179, "step": 2921 }, { "epoch": 0.21, "grad_norm": 1.8461566417740474, "learning_rate": 9.183229456497024e-06, "loss": 0.219, "step": 2922 }, { "epoch": 0.21, "grad_norm": 1.367967088702171, "learning_rate": 9.182594835017056e-06, "loss": 0.2146, "step": 2923 }, { "epoch": 0.21, "grad_norm": 1.4650821723213976, "learning_rate": 9.181959989032014e-06, "loss": 0.2203, "step": 2924 }, { "epoch": 0.21, "grad_norm": 1.2576111863158799, "learning_rate": 9.181324918575974e-06, "loss": 0.1856, "step": 2925 }, { "epoch": 0.21, "grad_norm": 1.6514945340486962, "learning_rate": 9.180689623683024e-06, "loss": 0.2462, "step": 2926 }, { "epoch": 0.21, "grad_norm": 1.4877699404389335, "learning_rate": 9.180054104387264e-06, "loss": 0.1958, "step": 2927 }, { "epoch": 0.21, "grad_norm": 1.3572596692971515, "learning_rate": 9.179418360722807e-06, "loss": 0.188, "step": 2928 }, { "epoch": 0.21, "grad_norm": 1.4844144616604722, "learning_rate": 9.178782392723775e-06, "loss": 0.1828, "step": 2929 }, { "epoch": 0.21, "grad_norm": 1.464110868713132, "learning_rate": 9.178146200424306e-06, "loss": 0.2248, "step": 2930 }, { "epoch": 0.21, "grad_norm": 1.382279362877647, "learning_rate": 9.177509783858548e-06, "loss": 0.1875, "step": 2931 }, { "epoch": 0.21, "grad_norm": 1.3265848300436374, "learning_rate": 9.176873143060661e-06, "loss": 0.1784, "step": 2932 }, { "epoch": 0.21, "grad_norm": 1.560245132429068, "learning_rate": 9.176236278064818e-06, "loss": 0.2118, "step": 2933 }, { "epoch": 0.21, "grad_norm": 1.480926863352633, "learning_rate": 9.175599188905203e-06, "loss": 0.2001, "step": 2934 }, { "epoch": 0.21, "grad_norm": 1.5788512760334066, "learning_rate": 9.17496187561601e-06, "loss": 0.2392, "step": 2935 }, { "epoch": 0.21, "grad_norm": 1.603343800948272, "learning_rate": 9.174324338231452e-06, "loss": 0.2286, "step": 2936 }, { "epoch": 0.21, "grad_norm": 1.4771134268020356, "learning_rate": 9.173686576785747e-06, "loss": 0.2529, "step": 2937 }, { "epoch": 0.21, "grad_norm": 1.51347460813198, "learning_rate": 9.173048591313128e-06, "loss": 0.235, "step": 2938 }, { "epoch": 0.21, "grad_norm": 1.405034339120005, "learning_rate": 9.172410381847838e-06, "loss": 0.2068, "step": 2939 }, { "epoch": 0.21, "grad_norm": 8.930089226392536, "learning_rate": 9.171771948424138e-06, "loss": 0.5473, "step": 2940 }, { "epoch": 0.21, "grad_norm": 1.3219373958911942, "learning_rate": 9.171133291076289e-06, "loss": 0.2156, "step": 2941 }, { "epoch": 0.21, "grad_norm": 1.5606383088826374, "learning_rate": 9.170494409838579e-06, "loss": 0.2023, "step": 2942 }, { "epoch": 0.21, "grad_norm": 1.3482254729619318, "learning_rate": 9.169855304745297e-06, "loss": 0.1606, "step": 2943 }, { "epoch": 0.21, "grad_norm": 1.4099946923179147, "learning_rate": 9.169215975830747e-06, "loss": 0.2064, "step": 2944 }, { "epoch": 0.21, "grad_norm": 1.4014200171900193, "learning_rate": 9.168576423129247e-06, "loss": 0.2339, "step": 2945 }, { "epoch": 0.21, "grad_norm": 1.5054583925355143, "learning_rate": 9.167936646675126e-06, "loss": 0.2391, "step": 2946 }, { "epoch": 0.21, "grad_norm": 1.3604018423659765, "learning_rate": 9.167296646502725e-06, "loss": 0.2, "step": 2947 }, { "epoch": 0.21, "grad_norm": 6.545591963722861, "learning_rate": 9.166656422646396e-06, "loss": 0.7984, "step": 2948 }, { "epoch": 0.21, "grad_norm": 1.4563237314064592, "learning_rate": 9.166015975140502e-06, "loss": 0.1872, "step": 2949 }, { "epoch": 0.21, "grad_norm": 1.3093818155297914, "learning_rate": 9.165375304019422e-06, "loss": 0.2204, "step": 2950 }, { "epoch": 0.21, "grad_norm": 1.3301106967671248, "learning_rate": 9.164734409317542e-06, "loss": 0.2359, "step": 2951 }, { "epoch": 0.21, "grad_norm": 1.394982635646508, "learning_rate": 9.164093291069267e-06, "loss": 0.2334, "step": 2952 }, { "epoch": 0.21, "grad_norm": 1.523469364770285, "learning_rate": 9.163451949309005e-06, "loss": 0.2252, "step": 2953 }, { "epoch": 0.21, "grad_norm": 1.4400366451853166, "learning_rate": 9.162810384071186e-06, "loss": 0.2073, "step": 2954 }, { "epoch": 0.21, "grad_norm": 1.5001236193416456, "learning_rate": 9.16216859539024e-06, "loss": 0.228, "step": 2955 }, { "epoch": 0.21, "grad_norm": 1.5024858258321905, "learning_rate": 9.161526583300621e-06, "loss": 0.2313, "step": 2956 }, { "epoch": 0.21, "grad_norm": 1.3305208293726116, "learning_rate": 9.160884347836789e-06, "loss": 0.1792, "step": 2957 }, { "epoch": 0.21, "grad_norm": 1.1188431289538487, "learning_rate": 9.160241889033213e-06, "loss": 0.1732, "step": 2958 }, { "epoch": 0.21, "grad_norm": 1.4341096989102657, "learning_rate": 9.159599206924383e-06, "loss": 0.2159, "step": 2959 }, { "epoch": 0.21, "grad_norm": 1.5016750056550827, "learning_rate": 9.158956301544791e-06, "loss": 0.2743, "step": 2960 }, { "epoch": 0.21, "grad_norm": 1.2940109056668865, "learning_rate": 9.158313172928948e-06, "loss": 0.1973, "step": 2961 }, { "epoch": 0.21, "grad_norm": 1.2937943032426407, "learning_rate": 9.157669821111373e-06, "loss": 0.1955, "step": 2962 }, { "epoch": 0.21, "grad_norm": 1.5408680491649607, "learning_rate": 9.157026246126599e-06, "loss": 0.2481, "step": 2963 }, { "epoch": 0.21, "grad_norm": 1.5393186923767896, "learning_rate": 9.156382448009173e-06, "loss": 0.1958, "step": 2964 }, { "epoch": 0.21, "grad_norm": 1.4027958718970357, "learning_rate": 9.15573842679365e-06, "loss": 0.2148, "step": 2965 }, { "epoch": 0.21, "grad_norm": 8.453649674160241, "learning_rate": 9.155094182514595e-06, "loss": 0.7086, "step": 2966 }, { "epoch": 0.21, "grad_norm": 1.4662910818257089, "learning_rate": 9.154449715206595e-06, "loss": 0.1844, "step": 2967 }, { "epoch": 0.21, "grad_norm": 1.3489516261763703, "learning_rate": 9.153805024904236e-06, "loss": 0.239, "step": 2968 }, { "epoch": 0.21, "grad_norm": 1.2137091547192778, "learning_rate": 9.153160111642127e-06, "loss": 0.1561, "step": 2969 }, { "epoch": 0.21, "grad_norm": 1.4749059306062013, "learning_rate": 9.152514975454884e-06, "loss": 0.2293, "step": 2970 }, { "epoch": 0.21, "grad_norm": 1.393611101433033, "learning_rate": 9.151869616377132e-06, "loss": 0.212, "step": 2971 }, { "epoch": 0.21, "grad_norm": 1.6345451446085344, "learning_rate": 9.151224034443516e-06, "loss": 0.3214, "step": 2972 }, { "epoch": 0.21, "grad_norm": 1.7474050805689048, "learning_rate": 9.150578229688684e-06, "loss": 0.2856, "step": 2973 }, { "epoch": 0.21, "grad_norm": 1.415333897237682, "learning_rate": 9.149932202147302e-06, "loss": 0.2072, "step": 2974 }, { "epoch": 0.21, "grad_norm": 1.620666572430044, "learning_rate": 9.149285951854047e-06, "loss": 0.2297, "step": 2975 }, { "epoch": 0.21, "grad_norm": 1.483948549363042, "learning_rate": 9.148639478843606e-06, "loss": 0.2149, "step": 2976 }, { "epoch": 0.21, "grad_norm": 1.5429731254719918, "learning_rate": 9.147992783150679e-06, "loss": 0.2402, "step": 2977 }, { "epoch": 0.21, "grad_norm": 1.2824180752272012, "learning_rate": 9.14734586480998e-06, "loss": 0.2279, "step": 2978 }, { "epoch": 0.21, "grad_norm": 1.406260435718829, "learning_rate": 9.146698723856228e-06, "loss": 0.2296, "step": 2979 }, { "epoch": 0.21, "grad_norm": 1.4512049244634375, "learning_rate": 9.146051360324166e-06, "loss": 0.2134, "step": 2980 }, { "epoch": 0.21, "grad_norm": 1.4146936651181095, "learning_rate": 9.145403774248536e-06, "loss": 0.1947, "step": 2981 }, { "epoch": 0.21, "grad_norm": 1.521867269846185, "learning_rate": 9.144755965664102e-06, "loss": 0.2491, "step": 2982 }, { "epoch": 0.21, "grad_norm": 1.2943458397664256, "learning_rate": 9.144107934605634e-06, "loss": 0.2182, "step": 2983 }, { "epoch": 0.21, "grad_norm": 1.4757967065460242, "learning_rate": 9.143459681107915e-06, "loss": 0.2219, "step": 2984 }, { "epoch": 0.21, "grad_norm": 1.3770888511956474, "learning_rate": 9.142811205205742e-06, "loss": 0.1957, "step": 2985 }, { "epoch": 0.21, "grad_norm": 1.3424948673508654, "learning_rate": 9.142162506933921e-06, "loss": 0.2403, "step": 2986 }, { "epoch": 0.21, "grad_norm": 1.4421371902080553, "learning_rate": 9.141513586327273e-06, "loss": 0.2366, "step": 2987 }, { "epoch": 0.21, "grad_norm": 1.4306553360803718, "learning_rate": 9.140864443420629e-06, "loss": 0.227, "step": 2988 }, { "epoch": 0.21, "grad_norm": 5.139658732723522, "learning_rate": 9.140215078248833e-06, "loss": 0.7777, "step": 2989 }, { "epoch": 0.21, "grad_norm": 1.4169625821803593, "learning_rate": 9.13956549084674e-06, "loss": 0.2259, "step": 2990 }, { "epoch": 0.21, "grad_norm": 1.34830775179381, "learning_rate": 9.138915681249217e-06, "loss": 0.2523, "step": 2991 }, { "epoch": 0.21, "grad_norm": 1.5019153909923473, "learning_rate": 9.138265649491142e-06, "loss": 0.2718, "step": 2992 }, { "epoch": 0.21, "grad_norm": 5.461254835728262, "learning_rate": 9.137615395607409e-06, "loss": 0.5928, "step": 2993 }, { "epoch": 0.21, "grad_norm": 1.418195033785992, "learning_rate": 9.136964919632918e-06, "loss": 0.2262, "step": 2994 }, { "epoch": 0.21, "grad_norm": 1.2327639140404898, "learning_rate": 9.136314221602585e-06, "loss": 0.2206, "step": 2995 }, { "epoch": 0.21, "grad_norm": 1.3451593453030868, "learning_rate": 9.135663301551339e-06, "loss": 0.1796, "step": 2996 }, { "epoch": 0.21, "grad_norm": 1.2715335402641175, "learning_rate": 9.135012159514116e-06, "loss": 0.1835, "step": 2997 }, { "epoch": 0.21, "grad_norm": 1.4349084599292932, "learning_rate": 9.134360795525869e-06, "loss": 0.225, "step": 2998 }, { "epoch": 0.21, "grad_norm": 1.4063227352155494, "learning_rate": 9.133709209621559e-06, "loss": 0.2413, "step": 2999 }, { "epoch": 0.21, "grad_norm": 1.3298396166778672, "learning_rate": 9.13305740183616e-06, "loss": 0.1999, "step": 3000 }, { "epoch": 0.21, "grad_norm": 1.4280543500594016, "learning_rate": 9.13240537220466e-06, "loss": 0.2132, "step": 3001 }, { "epoch": 0.21, "grad_norm": 1.7267700938759818, "learning_rate": 9.131753120762057e-06, "loss": 0.2595, "step": 3002 }, { "epoch": 0.21, "grad_norm": 1.4551700898693578, "learning_rate": 9.13110064754336e-06, "loss": 0.2482, "step": 3003 }, { "epoch": 0.21, "grad_norm": 4.894896547989317, "learning_rate": 9.130447952583595e-06, "loss": 0.4624, "step": 3004 }, { "epoch": 0.21, "grad_norm": 1.455237938140936, "learning_rate": 9.12979503591779e-06, "loss": 0.1876, "step": 3005 }, { "epoch": 0.22, "grad_norm": 1.3889517326581236, "learning_rate": 9.129141897580995e-06, "loss": 0.2361, "step": 3006 }, { "epoch": 0.22, "grad_norm": 1.4740452259439505, "learning_rate": 9.128488537608269e-06, "loss": 0.2019, "step": 3007 }, { "epoch": 0.22, "grad_norm": 1.3870506617078244, "learning_rate": 9.127834956034679e-06, "loss": 0.1916, "step": 3008 }, { "epoch": 0.22, "grad_norm": 1.2196918840690139, "learning_rate": 9.127181152895308e-06, "loss": 0.19, "step": 3009 }, { "epoch": 0.22, "grad_norm": 1.3911461653887733, "learning_rate": 9.126527128225247e-06, "loss": 0.2276, "step": 3010 }, { "epoch": 0.22, "grad_norm": 1.2707836670071562, "learning_rate": 9.125872882059605e-06, "loss": 0.2115, "step": 3011 }, { "epoch": 0.22, "grad_norm": 1.3710191784202057, "learning_rate": 9.125218414433498e-06, "loss": 0.1987, "step": 3012 }, { "epoch": 0.22, "grad_norm": 1.3219643107282224, "learning_rate": 9.124563725382056e-06, "loss": 0.1467, "step": 3013 }, { "epoch": 0.22, "grad_norm": 1.3106134955741449, "learning_rate": 9.123908814940418e-06, "loss": 0.2091, "step": 3014 }, { "epoch": 0.22, "grad_norm": 1.4328400388253453, "learning_rate": 9.123253683143738e-06, "loss": 0.2315, "step": 3015 }, { "epoch": 0.22, "grad_norm": 1.5183517507411208, "learning_rate": 9.122598330027183e-06, "loss": 0.2285, "step": 3016 }, { "epoch": 0.22, "grad_norm": 1.6896997381230359, "learning_rate": 9.121942755625925e-06, "loss": 0.2802, "step": 3017 }, { "epoch": 0.22, "grad_norm": 1.748622611984189, "learning_rate": 9.121286959975159e-06, "loss": 0.24, "step": 3018 }, { "epoch": 0.22, "grad_norm": 1.4388623471874564, "learning_rate": 9.120630943110078e-06, "loss": 0.2264, "step": 3019 }, { "epoch": 0.22, "grad_norm": 1.534226535168014, "learning_rate": 9.119974705065902e-06, "loss": 0.235, "step": 3020 }, { "epoch": 0.22, "grad_norm": 1.458656626281266, "learning_rate": 9.119318245877849e-06, "loss": 0.2491, "step": 3021 }, { "epoch": 0.22, "grad_norm": 1.5062738683850154, "learning_rate": 9.11866156558116e-06, "loss": 0.2161, "step": 3022 }, { "epoch": 0.22, "grad_norm": 1.4615222759233526, "learning_rate": 9.118004664211078e-06, "loss": 0.2307, "step": 3023 }, { "epoch": 0.22, "grad_norm": 1.3951390025222694, "learning_rate": 9.117347541802868e-06, "loss": 0.2393, "step": 3024 }, { "epoch": 0.22, "grad_norm": 1.562445744050878, "learning_rate": 9.116690198391797e-06, "loss": 0.2183, "step": 3025 }, { "epoch": 0.22, "grad_norm": 1.5446008757300853, "learning_rate": 9.116032634013154e-06, "loss": 0.2656, "step": 3026 }, { "epoch": 0.22, "grad_norm": 1.3613353560372505, "learning_rate": 9.11537484870223e-06, "loss": 0.1816, "step": 3027 }, { "epoch": 0.22, "grad_norm": 1.4850661023273644, "learning_rate": 9.114716842494333e-06, "loss": 0.2423, "step": 3028 }, { "epoch": 0.22, "grad_norm": 5.963642580207885, "learning_rate": 9.114058615424783e-06, "loss": 0.567, "step": 3029 }, { "epoch": 0.22, "grad_norm": 1.4947142503305477, "learning_rate": 9.11340016752891e-06, "loss": 0.2161, "step": 3030 }, { "epoch": 0.22, "grad_norm": 1.4055485963174663, "learning_rate": 9.112741498842059e-06, "loss": 0.238, "step": 3031 }, { "epoch": 0.22, "grad_norm": 1.3717743001773361, "learning_rate": 9.112082609399585e-06, "loss": 0.1733, "step": 3032 }, { "epoch": 0.22, "grad_norm": 1.5924545538273276, "learning_rate": 9.111423499236849e-06, "loss": 0.2551, "step": 3033 }, { "epoch": 0.22, "grad_norm": 1.4718584536406234, "learning_rate": 9.110764168389236e-06, "loss": 0.2111, "step": 3034 }, { "epoch": 0.22, "grad_norm": 1.475152152205069, "learning_rate": 9.110104616892132e-06, "loss": 0.2064, "step": 3035 }, { "epoch": 0.22, "grad_norm": 1.4302155103564766, "learning_rate": 9.109444844780942e-06, "loss": 0.1805, "step": 3036 }, { "epoch": 0.22, "grad_norm": 1.3017397429582174, "learning_rate": 9.108784852091079e-06, "loss": 0.2121, "step": 3037 }, { "epoch": 0.22, "grad_norm": 1.517405486868771, "learning_rate": 9.108124638857967e-06, "loss": 0.1772, "step": 3038 }, { "epoch": 0.22, "grad_norm": 1.341607582504751, "learning_rate": 9.107464205117047e-06, "loss": 0.2122, "step": 3039 }, { "epoch": 0.22, "grad_norm": 6.165663380071189, "learning_rate": 9.106803550903765e-06, "loss": 0.4884, "step": 3040 }, { "epoch": 0.22, "grad_norm": 1.5200212390450671, "learning_rate": 9.106142676253584e-06, "loss": 0.2276, "step": 3041 }, { "epoch": 0.22, "grad_norm": 1.4380004076505872, "learning_rate": 9.105481581201978e-06, "loss": 0.1894, "step": 3042 }, { "epoch": 0.22, "grad_norm": 1.2954721613290607, "learning_rate": 9.10482026578443e-06, "loss": 0.1952, "step": 3043 }, { "epoch": 0.22, "grad_norm": 1.551039069988955, "learning_rate": 9.104158730036439e-06, "loss": 0.2213, "step": 3044 }, { "epoch": 0.22, "grad_norm": 1.372035457146638, "learning_rate": 9.10349697399351e-06, "loss": 0.2493, "step": 3045 }, { "epoch": 0.22, "grad_norm": 1.5605644629039561, "learning_rate": 9.102834997691167e-06, "loss": 0.2607, "step": 3046 }, { "epoch": 0.22, "grad_norm": 1.43958960478813, "learning_rate": 9.102172801164942e-06, "loss": 0.2387, "step": 3047 }, { "epoch": 0.22, "grad_norm": 1.5586279069795774, "learning_rate": 9.101510384450377e-06, "loss": 0.2438, "step": 3048 }, { "epoch": 0.22, "grad_norm": 1.570852692586721, "learning_rate": 9.10084774758303e-06, "loss": 0.24, "step": 3049 }, { "epoch": 0.22, "grad_norm": 1.4738828643951447, "learning_rate": 9.100184890598467e-06, "loss": 0.2331, "step": 3050 }, { "epoch": 0.22, "grad_norm": 1.5054705197867058, "learning_rate": 9.099521813532268e-06, "loss": 0.2253, "step": 3051 }, { "epoch": 0.22, "grad_norm": 1.5182933584250808, "learning_rate": 9.098858516420023e-06, "loss": 0.2501, "step": 3052 }, { "epoch": 0.22, "grad_norm": 8.095098728044864, "learning_rate": 9.098194999297339e-06, "loss": 0.7189, "step": 3053 }, { "epoch": 0.22, "grad_norm": 1.3866920682300161, "learning_rate": 9.097531262199828e-06, "loss": 0.2272, "step": 3054 }, { "epoch": 0.22, "grad_norm": 1.4153347174841637, "learning_rate": 9.09686730516312e-06, "loss": 0.2542, "step": 3055 }, { "epoch": 0.22, "grad_norm": 1.4393718472581118, "learning_rate": 9.096203128222847e-06, "loss": 0.2262, "step": 3056 }, { "epoch": 0.22, "grad_norm": 1.3993008788829686, "learning_rate": 9.095538731414666e-06, "loss": 0.2188, "step": 3057 }, { "epoch": 0.22, "grad_norm": 1.3805482638289766, "learning_rate": 9.094874114774235e-06, "loss": 0.2086, "step": 3058 }, { "epoch": 0.22, "grad_norm": 1.575546014928864, "learning_rate": 9.094209278337232e-06, "loss": 0.238, "step": 3059 }, { "epoch": 0.22, "grad_norm": 1.302214325231813, "learning_rate": 9.093544222139338e-06, "loss": 0.1954, "step": 3060 }, { "epoch": 0.22, "grad_norm": 1.375681807825601, "learning_rate": 9.092878946216252e-06, "loss": 0.2398, "step": 3061 }, { "epoch": 0.22, "grad_norm": 1.2959780151658282, "learning_rate": 9.092213450603687e-06, "loss": 0.214, "step": 3062 }, { "epoch": 0.22, "grad_norm": 1.479116322897022, "learning_rate": 9.09154773533736e-06, "loss": 0.1788, "step": 3063 }, { "epoch": 0.22, "grad_norm": 1.3763315893801809, "learning_rate": 9.090881800453006e-06, "loss": 0.2154, "step": 3064 }, { "epoch": 0.22, "grad_norm": 1.298242616584281, "learning_rate": 9.09021564598637e-06, "loss": 0.204, "step": 3065 }, { "epoch": 0.22, "grad_norm": 1.4167164560861139, "learning_rate": 9.089549271973207e-06, "loss": 0.2173, "step": 3066 }, { "epoch": 0.22, "grad_norm": 1.2812439930119288, "learning_rate": 9.088882678449285e-06, "loss": 0.2037, "step": 3067 }, { "epoch": 0.22, "grad_norm": 1.4978058371200331, "learning_rate": 9.088215865450386e-06, "loss": 0.2375, "step": 3068 }, { "epoch": 0.22, "grad_norm": 1.392670287565296, "learning_rate": 9.087548833012301e-06, "loss": 0.2198, "step": 3069 }, { "epoch": 0.22, "grad_norm": 1.6134189057385846, "learning_rate": 9.086881581170834e-06, "loss": 0.2559, "step": 3070 }, { "epoch": 0.22, "grad_norm": 1.4402453810896767, "learning_rate": 9.0862141099618e-06, "loss": 0.1733, "step": 3071 }, { "epoch": 0.22, "grad_norm": 1.583529395198567, "learning_rate": 9.085546419421026e-06, "loss": 0.2457, "step": 3072 }, { "epoch": 0.22, "grad_norm": 4.862349412477856, "learning_rate": 9.08487850958435e-06, "loss": 0.6775, "step": 3073 }, { "epoch": 0.22, "grad_norm": 1.3777231530134035, "learning_rate": 9.084210380487624e-06, "loss": 0.188, "step": 3074 }, { "epoch": 0.22, "grad_norm": 1.3611501690441554, "learning_rate": 9.083542032166712e-06, "loss": 0.2019, "step": 3075 }, { "epoch": 0.22, "grad_norm": 1.4694260224150932, "learning_rate": 9.082873464657486e-06, "loss": 0.2426, "step": 3076 }, { "epoch": 0.22, "grad_norm": 1.2860551361951889, "learning_rate": 9.082204677995832e-06, "loss": 0.1779, "step": 3077 }, { "epoch": 0.22, "grad_norm": 1.423730286399062, "learning_rate": 9.081535672217651e-06, "loss": 0.1782, "step": 3078 }, { "epoch": 0.22, "grad_norm": 4.6547524252785575, "learning_rate": 9.080866447358849e-06, "loss": 0.6702, "step": 3079 }, { "epoch": 0.22, "grad_norm": 1.343235357299407, "learning_rate": 9.080197003455347e-06, "loss": 0.2333, "step": 3080 }, { "epoch": 0.22, "grad_norm": 1.3404968854374828, "learning_rate": 9.079527340543082e-06, "loss": 0.2761, "step": 3081 }, { "epoch": 0.22, "grad_norm": 1.5419329235591352, "learning_rate": 9.078857458657995e-06, "loss": 0.22, "step": 3082 }, { "epoch": 0.22, "grad_norm": 1.5020062033996753, "learning_rate": 9.078187357836043e-06, "loss": 0.2159, "step": 3083 }, { "epoch": 0.22, "grad_norm": 1.2933799428719461, "learning_rate": 9.077517038113197e-06, "loss": 0.1797, "step": 3084 }, { "epoch": 0.22, "grad_norm": 1.5265142388755737, "learning_rate": 9.076846499525436e-06, "loss": 0.2151, "step": 3085 }, { "epoch": 0.22, "grad_norm": 1.3892483603033727, "learning_rate": 9.07617574210875e-06, "loss": 0.2239, "step": 3086 }, { "epoch": 0.22, "grad_norm": 1.3471434123066444, "learning_rate": 9.075504765899145e-06, "loss": 0.2144, "step": 3087 }, { "epoch": 0.22, "grad_norm": 1.6247715660885467, "learning_rate": 9.074833570932634e-06, "loss": 0.2442, "step": 3088 }, { "epoch": 0.22, "grad_norm": 1.615856520882401, "learning_rate": 9.074162157245248e-06, "loss": 0.2323, "step": 3089 }, { "epoch": 0.22, "grad_norm": 1.4738229671936398, "learning_rate": 9.07349052487302e-06, "loss": 0.278, "step": 3090 }, { "epoch": 0.22, "grad_norm": 1.3576676047963705, "learning_rate": 9.072818673852006e-06, "loss": 0.1989, "step": 3091 }, { "epoch": 0.22, "grad_norm": 1.4049716898845126, "learning_rate": 9.072146604218266e-06, "loss": 0.2074, "step": 3092 }, { "epoch": 0.22, "grad_norm": 1.399466741478571, "learning_rate": 9.071474316007874e-06, "loss": 0.2408, "step": 3093 }, { "epoch": 0.22, "grad_norm": 1.6050994153710094, "learning_rate": 9.070801809256915e-06, "loss": 0.2086, "step": 3094 }, { "epoch": 0.22, "grad_norm": 1.634783221499501, "learning_rate": 9.070129084001486e-06, "loss": 0.2265, "step": 3095 }, { "epoch": 0.22, "grad_norm": 4.820896781602804, "learning_rate": 9.0694561402777e-06, "loss": 0.8011, "step": 3096 }, { "epoch": 0.22, "grad_norm": 1.550301927650956, "learning_rate": 9.068782978121675e-06, "loss": 0.2557, "step": 3097 }, { "epoch": 0.22, "grad_norm": 1.5067359719988793, "learning_rate": 9.068109597569544e-06, "loss": 0.2249, "step": 3098 }, { "epoch": 0.22, "grad_norm": 4.313580573758077, "learning_rate": 9.06743599865745e-06, "loss": 0.5505, "step": 3099 }, { "epoch": 0.22, "grad_norm": 1.391490799551258, "learning_rate": 9.066762181421552e-06, "loss": 0.1928, "step": 3100 }, { "epoch": 0.22, "grad_norm": 1.3587038662699662, "learning_rate": 9.066088145898017e-06, "loss": 0.2139, "step": 3101 }, { "epoch": 0.22, "grad_norm": 1.393420355064994, "learning_rate": 9.065413892123024e-06, "loss": 0.2286, "step": 3102 }, { "epoch": 0.22, "grad_norm": 1.4738503317613607, "learning_rate": 9.064739420132761e-06, "loss": 0.2028, "step": 3103 }, { "epoch": 0.22, "grad_norm": 1.3833652833574654, "learning_rate": 9.06406472996344e-06, "loss": 0.2424, "step": 3104 }, { "epoch": 0.22, "grad_norm": 6.982873133522027, "learning_rate": 9.063389821651265e-06, "loss": 0.7066, "step": 3105 }, { "epoch": 0.22, "grad_norm": 1.6506590188069235, "learning_rate": 9.06271469523247e-06, "loss": 0.2525, "step": 3106 }, { "epoch": 0.22, "grad_norm": 1.4261458714564441, "learning_rate": 9.06203935074329e-06, "loss": 0.2693, "step": 3107 }, { "epoch": 0.22, "grad_norm": 1.461768602089681, "learning_rate": 9.061363788219975e-06, "loss": 0.2425, "step": 3108 }, { "epoch": 0.22, "grad_norm": 4.909424870684166, "learning_rate": 9.060688007698787e-06, "loss": 0.7189, "step": 3109 }, { "epoch": 0.22, "grad_norm": 1.3541743352047728, "learning_rate": 9.060012009215999e-06, "loss": 0.2186, "step": 3110 }, { "epoch": 0.22, "grad_norm": 7.860032310828481, "learning_rate": 9.059335792807896e-06, "loss": 0.6798, "step": 3111 }, { "epoch": 0.22, "grad_norm": 1.4382055948299062, "learning_rate": 9.058659358510774e-06, "loss": 0.2274, "step": 3112 }, { "epoch": 0.22, "grad_norm": 1.4517122439149788, "learning_rate": 9.057982706360942e-06, "loss": 0.2398, "step": 3113 }, { "epoch": 0.22, "grad_norm": 1.528432676062184, "learning_rate": 9.05730583639472e-06, "loss": 0.2427, "step": 3114 }, { "epoch": 0.22, "grad_norm": 1.4671933443084157, "learning_rate": 9.05662874864844e-06, "loss": 0.1921, "step": 3115 }, { "epoch": 0.22, "grad_norm": 1.3892865370872574, "learning_rate": 9.055951443158445e-06, "loss": 0.207, "step": 3116 }, { "epoch": 0.22, "grad_norm": 1.2271431427823836, "learning_rate": 9.055273919961089e-06, "loss": 0.1942, "step": 3117 }, { "epoch": 0.22, "grad_norm": 1.4506253933662845, "learning_rate": 9.054596179092739e-06, "loss": 0.2402, "step": 3118 }, { "epoch": 0.22, "grad_norm": 1.283514446520111, "learning_rate": 9.053918220589776e-06, "loss": 0.2037, "step": 3119 }, { "epoch": 0.22, "grad_norm": 1.4125661677551773, "learning_rate": 9.053240044488587e-06, "loss": 0.2184, "step": 3120 }, { "epoch": 0.22, "grad_norm": 1.3426478385223595, "learning_rate": 9.052561650825575e-06, "loss": 0.1755, "step": 3121 }, { "epoch": 0.22, "grad_norm": 1.304310556183331, "learning_rate": 9.051883039637155e-06, "loss": 0.2106, "step": 3122 }, { "epoch": 0.22, "grad_norm": 1.4008051832117994, "learning_rate": 9.051204210959749e-06, "loss": 0.1821, "step": 3123 }, { "epoch": 0.22, "grad_norm": 1.2700401304587063, "learning_rate": 9.050525164829797e-06, "loss": 0.2093, "step": 3124 }, { "epoch": 0.22, "grad_norm": 1.3075651981892595, "learning_rate": 9.049845901283746e-06, "loss": 0.17, "step": 3125 }, { "epoch": 0.22, "grad_norm": 1.6028507093490816, "learning_rate": 9.049166420358056e-06, "loss": 0.2927, "step": 3126 }, { "epoch": 0.22, "grad_norm": 1.4419739490653496, "learning_rate": 9.048486722089197e-06, "loss": 0.1981, "step": 3127 }, { "epoch": 0.22, "grad_norm": 1.4834209522926776, "learning_rate": 9.047806806513657e-06, "loss": 0.228, "step": 3128 }, { "epoch": 0.22, "grad_norm": 1.2671611224554695, "learning_rate": 9.047126673667929e-06, "loss": 0.18, "step": 3129 }, { "epoch": 0.22, "grad_norm": 1.6589943518084107, "learning_rate": 9.046446323588518e-06, "loss": 0.2433, "step": 3130 }, { "epoch": 0.22, "grad_norm": 1.3639326500262245, "learning_rate": 9.045765756311944e-06, "loss": 0.2189, "step": 3131 }, { "epoch": 0.22, "grad_norm": 5.579941761001767, "learning_rate": 9.045084971874738e-06, "loss": 0.5439, "step": 3132 }, { "epoch": 0.22, "grad_norm": 1.4443011167960502, "learning_rate": 9.04440397031344e-06, "loss": 0.2119, "step": 3133 }, { "epoch": 0.22, "grad_norm": 5.408106286512159, "learning_rate": 9.043722751664606e-06, "loss": 0.5594, "step": 3134 }, { "epoch": 0.22, "grad_norm": 1.455799147315538, "learning_rate": 9.043041315964797e-06, "loss": 0.1983, "step": 3135 }, { "epoch": 0.22, "grad_norm": 1.4318257825930656, "learning_rate": 9.042359663250595e-06, "loss": 0.2296, "step": 3136 }, { "epoch": 0.22, "grad_norm": 1.496274749187072, "learning_rate": 9.041677793558586e-06, "loss": 0.189, "step": 3137 }, { "epoch": 0.22, "grad_norm": 1.4598534225806843, "learning_rate": 9.040995706925368e-06, "loss": 0.2001, "step": 3138 }, { "epoch": 0.22, "grad_norm": 4.56329828889496, "learning_rate": 9.040313403387555e-06, "loss": 0.5713, "step": 3139 }, { "epoch": 0.22, "grad_norm": 1.4594863013418782, "learning_rate": 9.039630882981769e-06, "loss": 0.2085, "step": 3140 }, { "epoch": 0.22, "grad_norm": 1.410552904152116, "learning_rate": 9.038948145744647e-06, "loss": 0.1698, "step": 3141 }, { "epoch": 0.22, "grad_norm": 1.4609510355601452, "learning_rate": 9.038265191712833e-06, "loss": 0.187, "step": 3142 }, { "epoch": 0.22, "grad_norm": 1.4617049072236215, "learning_rate": 9.037582020922988e-06, "loss": 0.2167, "step": 3143 }, { "epoch": 0.22, "grad_norm": 1.5031952306224532, "learning_rate": 9.036898633411781e-06, "loss": 0.2145, "step": 3144 }, { "epoch": 0.22, "grad_norm": 1.5017250537407745, "learning_rate": 9.036215029215892e-06, "loss": 0.248, "step": 3145 }, { "epoch": 0.23, "grad_norm": 1.481497369269224, "learning_rate": 9.035531208372016e-06, "loss": 0.2096, "step": 3146 }, { "epoch": 0.23, "grad_norm": 1.3088068408624856, "learning_rate": 9.034847170916857e-06, "loss": 0.1892, "step": 3147 }, { "epoch": 0.23, "grad_norm": 1.5167000697990123, "learning_rate": 9.034162916887131e-06, "loss": 0.2303, "step": 3148 }, { "epoch": 0.23, "grad_norm": 1.3008429143131708, "learning_rate": 9.033478446319565e-06, "loss": 0.2114, "step": 3149 }, { "epoch": 0.23, "grad_norm": 1.3185034965123557, "learning_rate": 9.032793759250903e-06, "loss": 0.2099, "step": 3150 }, { "epoch": 0.23, "grad_norm": 1.4200025049597909, "learning_rate": 9.03210885571789e-06, "loss": 0.2076, "step": 3151 }, { "epoch": 0.23, "grad_norm": 1.4441790099506742, "learning_rate": 9.031423735757296e-06, "loss": 0.2449, "step": 3152 }, { "epoch": 0.23, "grad_norm": 1.4408584758246343, "learning_rate": 9.030738399405891e-06, "loss": 0.2127, "step": 3153 }, { "epoch": 0.23, "grad_norm": 1.383397977199835, "learning_rate": 9.030052846700462e-06, "loss": 0.2482, "step": 3154 }, { "epoch": 0.23, "grad_norm": 1.4172496286710512, "learning_rate": 9.029367077677807e-06, "loss": 0.1669, "step": 3155 }, { "epoch": 0.23, "grad_norm": 1.3080856995265862, "learning_rate": 9.028681092374733e-06, "loss": 0.1898, "step": 3156 }, { "epoch": 0.23, "grad_norm": 1.3740473249753982, "learning_rate": 9.027994890828065e-06, "loss": 0.2183, "step": 3157 }, { "epoch": 0.23, "grad_norm": 5.540079901954493, "learning_rate": 9.027308473074633e-06, "loss": 0.6943, "step": 3158 }, { "epoch": 0.23, "grad_norm": 1.309360876920952, "learning_rate": 9.026621839151282e-06, "loss": 0.1587, "step": 3159 }, { "epoch": 0.23, "grad_norm": 1.5587679645842738, "learning_rate": 9.025934989094866e-06, "loss": 0.2262, "step": 3160 }, { "epoch": 0.23, "grad_norm": 1.316631589828942, "learning_rate": 9.025247922942256e-06, "loss": 0.2196, "step": 3161 }, { "epoch": 0.23, "grad_norm": 1.4270498299951493, "learning_rate": 9.024560640730328e-06, "loss": 0.1895, "step": 3162 }, { "epoch": 0.23, "grad_norm": 1.4038980039618878, "learning_rate": 9.023873142495973e-06, "loss": 0.1801, "step": 3163 }, { "epoch": 0.23, "grad_norm": 1.3255108023193858, "learning_rate": 9.023185428276095e-06, "loss": 0.2354, "step": 3164 }, { "epoch": 0.23, "grad_norm": 1.4758133652302277, "learning_rate": 9.022497498107607e-06, "loss": 0.2417, "step": 3165 }, { "epoch": 0.23, "grad_norm": 1.3752511682461506, "learning_rate": 9.021809352027433e-06, "loss": 0.1927, "step": 3166 }, { "epoch": 0.23, "grad_norm": 1.479281693261229, "learning_rate": 9.02112099007251e-06, "loss": 0.243, "step": 3167 }, { "epoch": 0.23, "grad_norm": 1.3087628232523358, "learning_rate": 9.020432412279789e-06, "loss": 0.184, "step": 3168 }, { "epoch": 0.23, "grad_norm": 1.4489142467153464, "learning_rate": 9.019743618686226e-06, "loss": 0.2348, "step": 3169 }, { "epoch": 0.23, "grad_norm": 1.4677376178577999, "learning_rate": 9.019054609328796e-06, "loss": 0.1848, "step": 3170 }, { "epoch": 0.23, "grad_norm": 1.273660363404849, "learning_rate": 9.018365384244483e-06, "loss": 0.229, "step": 3171 }, { "epoch": 0.23, "grad_norm": 1.4744876308426846, "learning_rate": 9.01767594347028e-06, "loss": 0.2179, "step": 3172 }, { "epoch": 0.23, "grad_norm": 1.545305547174912, "learning_rate": 9.016986287043193e-06, "loss": 0.2116, "step": 3173 }, { "epoch": 0.23, "grad_norm": 1.4915769035904196, "learning_rate": 9.016296415000241e-06, "loss": 0.2265, "step": 3174 }, { "epoch": 0.23, "grad_norm": 1.3687396335099447, "learning_rate": 9.015606327378453e-06, "loss": 0.2256, "step": 3175 }, { "epoch": 0.23, "grad_norm": 1.7114140537035492, "learning_rate": 9.01491602421487e-06, "loss": 0.2084, "step": 3176 }, { "epoch": 0.23, "grad_norm": 1.5365783826583845, "learning_rate": 9.014225505546545e-06, "loss": 0.2273, "step": 3177 }, { "epoch": 0.23, "grad_norm": 1.5898198881183163, "learning_rate": 9.013534771410544e-06, "loss": 0.2341, "step": 3178 }, { "epoch": 0.23, "grad_norm": 1.3571622408935828, "learning_rate": 9.012843821843941e-06, "loss": 0.2113, "step": 3179 }, { "epoch": 0.23, "grad_norm": 1.697027857439972, "learning_rate": 9.012152656883824e-06, "loss": 0.2421, "step": 3180 }, { "epoch": 0.23, "grad_norm": 1.5212639457440849, "learning_rate": 9.01146127656729e-06, "loss": 0.1923, "step": 3181 }, { "epoch": 0.23, "grad_norm": 1.374267377501372, "learning_rate": 9.010769680931454e-06, "loss": 0.2611, "step": 3182 }, { "epoch": 0.23, "grad_norm": 1.3577588440248787, "learning_rate": 9.010077870013435e-06, "loss": 0.2476, "step": 3183 }, { "epoch": 0.23, "grad_norm": 1.4891901492574346, "learning_rate": 9.009385843850366e-06, "loss": 0.2274, "step": 3184 }, { "epoch": 0.23, "grad_norm": 1.451760575657211, "learning_rate": 9.008693602479392e-06, "loss": 0.225, "step": 3185 }, { "epoch": 0.23, "grad_norm": 2.74594614702044, "learning_rate": 9.008001145937675e-06, "loss": 0.2439, "step": 3186 }, { "epoch": 0.23, "grad_norm": 1.4256278661675348, "learning_rate": 9.007308474262378e-06, "loss": 0.2215, "step": 3187 }, { "epoch": 0.23, "grad_norm": 1.3514782376673284, "learning_rate": 9.00661558749068e-06, "loss": 0.2308, "step": 3188 }, { "epoch": 0.23, "grad_norm": 1.2809703659553242, "learning_rate": 9.005922485659777e-06, "loss": 0.213, "step": 3189 }, { "epoch": 0.23, "grad_norm": 1.2160509839341365, "learning_rate": 9.005229168806869e-06, "loss": 0.1637, "step": 3190 }, { "epoch": 0.23, "grad_norm": 4.133087376721232, "learning_rate": 9.00453563696917e-06, "loss": 0.6876, "step": 3191 }, { "epoch": 0.23, "grad_norm": 1.5037172568326707, "learning_rate": 9.003841890183908e-06, "loss": 0.2004, "step": 3192 }, { "epoch": 0.23, "grad_norm": 1.3167217856544602, "learning_rate": 9.00314792848832e-06, "loss": 0.1832, "step": 3193 }, { "epoch": 0.23, "grad_norm": 1.3041244312590856, "learning_rate": 9.002453751919655e-06, "loss": 0.1984, "step": 3194 }, { "epoch": 0.23, "grad_norm": 1.3127729295467228, "learning_rate": 9.001759360515175e-06, "loss": 0.2126, "step": 3195 }, { "epoch": 0.23, "grad_norm": 1.2492951653987943, "learning_rate": 9.001064754312148e-06, "loss": 0.1755, "step": 3196 }, { "epoch": 0.23, "grad_norm": 1.4802191001341758, "learning_rate": 9.00036993334786e-06, "loss": 0.2563, "step": 3197 }, { "epoch": 0.23, "grad_norm": 8.110759055604152, "learning_rate": 8.999674897659609e-06, "loss": 0.6189, "step": 3198 }, { "epoch": 0.23, "grad_norm": 1.6213529442893517, "learning_rate": 8.998979647284698e-06, "loss": 0.2187, "step": 3199 }, { "epoch": 0.23, "grad_norm": 1.4059377209471042, "learning_rate": 8.998284182260448e-06, "loss": 0.2536, "step": 3200 }, { "epoch": 0.23, "grad_norm": 1.395358619219628, "learning_rate": 8.997588502624186e-06, "loss": 0.2077, "step": 3201 }, { "epoch": 0.23, "grad_norm": 2.080373494390886, "learning_rate": 8.996892608413254e-06, "loss": 0.2668, "step": 3202 }, { "epoch": 0.23, "grad_norm": 1.7763310644364663, "learning_rate": 8.99619649966501e-06, "loss": 0.2101, "step": 3203 }, { "epoch": 0.23, "grad_norm": 1.5822614452434214, "learning_rate": 8.995500176416809e-06, "loss": 0.2281, "step": 3204 }, { "epoch": 0.23, "grad_norm": 1.4259913307625192, "learning_rate": 8.994803638706033e-06, "loss": 0.2234, "step": 3205 }, { "epoch": 0.23, "grad_norm": 5.731753048678132, "learning_rate": 8.994106886570069e-06, "loss": 0.674, "step": 3206 }, { "epoch": 0.23, "grad_norm": 1.4493100939504822, "learning_rate": 8.993409920046315e-06, "loss": 0.2885, "step": 3207 }, { "epoch": 0.23, "grad_norm": 1.4136468104460167, "learning_rate": 8.99271273917218e-06, "loss": 0.227, "step": 3208 }, { "epoch": 0.23, "grad_norm": 1.214134518468838, "learning_rate": 8.99201534398509e-06, "loss": 0.1936, "step": 3209 }, { "epoch": 0.23, "grad_norm": 5.161569171056517, "learning_rate": 8.991317734522475e-06, "loss": 0.6661, "step": 3210 }, { "epoch": 0.23, "grad_norm": 1.3443046610451856, "learning_rate": 8.990619910821779e-06, "loss": 0.2172, "step": 3211 }, { "epoch": 0.23, "grad_norm": 1.4391187234569074, "learning_rate": 8.989921872920462e-06, "loss": 0.1777, "step": 3212 }, { "epoch": 0.23, "grad_norm": 1.5870561010254274, "learning_rate": 8.989223620855989e-06, "loss": 0.2353, "step": 3213 }, { "epoch": 0.23, "grad_norm": 1.3162845902071516, "learning_rate": 8.988525154665842e-06, "loss": 0.1524, "step": 3214 }, { "epoch": 0.23, "grad_norm": 1.4499813871002178, "learning_rate": 8.987826474387507e-06, "loss": 0.2116, "step": 3215 }, { "epoch": 0.23, "grad_norm": 1.3898886209879526, "learning_rate": 8.987127580058492e-06, "loss": 0.2197, "step": 3216 }, { "epoch": 0.23, "grad_norm": 1.3683138112216187, "learning_rate": 8.986428471716308e-06, "loss": 0.1906, "step": 3217 }, { "epoch": 0.23, "grad_norm": 1.3725151241405702, "learning_rate": 8.98572914939848e-06, "loss": 0.1904, "step": 3218 }, { "epoch": 0.23, "grad_norm": 1.5775040302906564, "learning_rate": 8.985029613142548e-06, "loss": 0.26, "step": 3219 }, { "epoch": 0.23, "grad_norm": 1.333948626162973, "learning_rate": 8.984329862986056e-06, "loss": 0.1987, "step": 3220 }, { "epoch": 0.23, "grad_norm": 1.223090017271359, "learning_rate": 8.983629898966566e-06, "loss": 0.1752, "step": 3221 }, { "epoch": 0.23, "grad_norm": 7.748776699790595, "learning_rate": 8.98292972112165e-06, "loss": 0.4016, "step": 3222 }, { "epoch": 0.23, "grad_norm": 1.3089830235128994, "learning_rate": 8.982229329488889e-06, "loss": 0.1943, "step": 3223 }, { "epoch": 0.23, "grad_norm": 1.4425652085101002, "learning_rate": 8.98152872410588e-06, "loss": 0.2473, "step": 3224 }, { "epoch": 0.23, "grad_norm": 1.3641048051139864, "learning_rate": 8.980827905010226e-06, "loss": 0.2242, "step": 3225 }, { "epoch": 0.23, "grad_norm": 1.493343081710168, "learning_rate": 8.980126872239543e-06, "loss": 0.2482, "step": 3226 }, { "epoch": 0.23, "grad_norm": 1.6677565606161084, "learning_rate": 8.979425625831465e-06, "loss": 0.2223, "step": 3227 }, { "epoch": 0.23, "grad_norm": 1.441680509273604, "learning_rate": 8.978724165823626e-06, "loss": 0.2414, "step": 3228 }, { "epoch": 0.23, "grad_norm": 1.2203954117430933, "learning_rate": 8.978022492253681e-06, "loss": 0.2068, "step": 3229 }, { "epoch": 0.23, "grad_norm": 1.446225807207349, "learning_rate": 8.977320605159294e-06, "loss": 0.2245, "step": 3230 }, { "epoch": 0.23, "grad_norm": 1.3899562931464085, "learning_rate": 8.976618504578136e-06, "loss": 0.2392, "step": 3231 }, { "epoch": 0.23, "grad_norm": 1.3958861514752925, "learning_rate": 8.975916190547897e-06, "loss": 0.2091, "step": 3232 }, { "epoch": 0.23, "grad_norm": 1.3705652383212996, "learning_rate": 8.97521366310627e-06, "loss": 0.203, "step": 3233 }, { "epoch": 0.23, "grad_norm": 1.4331444789057235, "learning_rate": 8.974510922290968e-06, "loss": 0.2751, "step": 3234 }, { "epoch": 0.23, "grad_norm": 4.500506293702792, "learning_rate": 8.97380796813971e-06, "loss": 0.71, "step": 3235 }, { "epoch": 0.23, "grad_norm": 1.2615768578871394, "learning_rate": 8.973104800690226e-06, "loss": 0.1716, "step": 3236 }, { "epoch": 0.23, "grad_norm": 1.23751656026381, "learning_rate": 8.972401419980262e-06, "loss": 0.166, "step": 3237 }, { "epoch": 0.23, "grad_norm": 1.4511289385426775, "learning_rate": 8.97169782604757e-06, "loss": 0.2312, "step": 3238 }, { "epoch": 0.23, "grad_norm": 1.282322235389946, "learning_rate": 8.970994018929917e-06, "loss": 0.1902, "step": 3239 }, { "epoch": 0.23, "grad_norm": 1.3983098449734295, "learning_rate": 8.970289998665083e-06, "loss": 0.2387, "step": 3240 }, { "epoch": 0.23, "grad_norm": 1.405867915681503, "learning_rate": 8.969585765290853e-06, "loss": 0.2505, "step": 3241 }, { "epoch": 0.23, "grad_norm": 1.5374575437133131, "learning_rate": 8.96888131884503e-06, "loss": 0.2027, "step": 3242 }, { "epoch": 0.23, "grad_norm": 14.947900236737672, "learning_rate": 8.968176659365426e-06, "loss": 0.6052, "step": 3243 }, { "epoch": 0.23, "grad_norm": 1.334367057617126, "learning_rate": 8.967471786889863e-06, "loss": 0.157, "step": 3244 }, { "epoch": 0.23, "grad_norm": 1.394586182238013, "learning_rate": 8.966766701456177e-06, "loss": 0.2278, "step": 3245 }, { "epoch": 0.23, "grad_norm": 1.4403049322476118, "learning_rate": 8.966061403102213e-06, "loss": 0.225, "step": 3246 }, { "epoch": 0.23, "grad_norm": 1.3759637517081997, "learning_rate": 8.96535589186583e-06, "loss": 0.2144, "step": 3247 }, { "epoch": 0.23, "grad_norm": 1.475982869310369, "learning_rate": 8.964650167784895e-06, "loss": 0.2369, "step": 3248 }, { "epoch": 0.23, "grad_norm": 1.3847048798412704, "learning_rate": 8.96394423089729e-06, "loss": 0.2176, "step": 3249 }, { "epoch": 0.23, "grad_norm": 1.511481528160134, "learning_rate": 8.963238081240908e-06, "loss": 0.2209, "step": 3250 }, { "epoch": 0.23, "grad_norm": 1.3985750723212627, "learning_rate": 8.96253171885365e-06, "loss": 0.2128, "step": 3251 }, { "epoch": 0.23, "grad_norm": 1.2752337880192919, "learning_rate": 8.96182514377343e-06, "loss": 0.187, "step": 3252 }, { "epoch": 0.23, "grad_norm": 1.3249883079945826, "learning_rate": 8.961118356038179e-06, "loss": 0.1973, "step": 3253 }, { "epoch": 0.23, "grad_norm": 1.6044797164587958, "learning_rate": 8.96041135568583e-06, "loss": 0.2278, "step": 3254 }, { "epoch": 0.23, "grad_norm": 1.3220423255461502, "learning_rate": 8.959704142754331e-06, "loss": 0.2038, "step": 3255 }, { "epoch": 0.23, "grad_norm": 1.4906115572189955, "learning_rate": 8.958996717281648e-06, "loss": 0.2829, "step": 3256 }, { "epoch": 0.23, "grad_norm": 1.445090684175075, "learning_rate": 8.958289079305747e-06, "loss": 0.2269, "step": 3257 }, { "epoch": 0.23, "grad_norm": 1.2387680616871306, "learning_rate": 8.957581228864616e-06, "loss": 0.2081, "step": 3258 }, { "epoch": 0.23, "grad_norm": 1.4255887515723153, "learning_rate": 8.956873165996245e-06, "loss": 0.1826, "step": 3259 }, { "epoch": 0.23, "grad_norm": 1.2772500349247426, "learning_rate": 8.956164890738643e-06, "loss": 0.2256, "step": 3260 }, { "epoch": 0.23, "grad_norm": 1.4934700634627525, "learning_rate": 8.955456403129828e-06, "loss": 0.2279, "step": 3261 }, { "epoch": 0.23, "grad_norm": 1.4457790992712745, "learning_rate": 8.954747703207826e-06, "loss": 0.2417, "step": 3262 }, { "epoch": 0.23, "grad_norm": 1.3033396036548048, "learning_rate": 8.95403879101068e-06, "loss": 0.1714, "step": 3263 }, { "epoch": 0.23, "grad_norm": 1.4536092736551012, "learning_rate": 8.953329666576439e-06, "loss": 0.2011, "step": 3264 }, { "epoch": 0.23, "grad_norm": 1.5226812385381971, "learning_rate": 8.952620329943168e-06, "loss": 0.2723, "step": 3265 }, { "epoch": 0.23, "grad_norm": 8.983647940918482, "learning_rate": 8.95191078114894e-06, "loss": 0.4387, "step": 3266 }, { "epoch": 0.23, "grad_norm": 1.3554911560700096, "learning_rate": 8.95120102023184e-06, "loss": 0.2106, "step": 3267 }, { "epoch": 0.23, "grad_norm": 1.5086663228110664, "learning_rate": 8.95049104722997e-06, "loss": 0.2534, "step": 3268 }, { "epoch": 0.23, "grad_norm": 1.3854082469218247, "learning_rate": 8.949780862181432e-06, "loss": 0.2278, "step": 3269 }, { "epoch": 0.23, "grad_norm": 1.3804243408483254, "learning_rate": 8.949070465124352e-06, "loss": 0.2119, "step": 3270 }, { "epoch": 0.23, "grad_norm": 1.4561605996855538, "learning_rate": 8.948359856096855e-06, "loss": 0.1916, "step": 3271 }, { "epoch": 0.23, "grad_norm": 1.5314212914012642, "learning_rate": 8.94764903513709e-06, "loss": 0.2236, "step": 3272 }, { "epoch": 0.23, "grad_norm": 1.4282994510349154, "learning_rate": 8.946938002283206e-06, "loss": 0.2426, "step": 3273 }, { "epoch": 0.23, "grad_norm": 1.2951682280409567, "learning_rate": 8.946226757573371e-06, "loss": 0.2087, "step": 3274 }, { "epoch": 0.23, "grad_norm": 1.3099381063232904, "learning_rate": 8.945515301045759e-06, "loss": 0.2211, "step": 3275 }, { "epoch": 0.23, "grad_norm": 1.3063613448461173, "learning_rate": 8.944803632738563e-06, "loss": 0.2165, "step": 3276 }, { "epoch": 0.23, "grad_norm": 1.4192931220108251, "learning_rate": 8.944091752689979e-06, "loss": 0.2261, "step": 3277 }, { "epoch": 0.23, "grad_norm": 5.307477581721814, "learning_rate": 8.943379660938219e-06, "loss": 0.6033, "step": 3278 }, { "epoch": 0.23, "grad_norm": 1.5015387143512329, "learning_rate": 8.942667357521505e-06, "loss": 0.2724, "step": 3279 }, { "epoch": 0.23, "grad_norm": 1.224459884286943, "learning_rate": 8.941954842478071e-06, "loss": 0.1929, "step": 3280 }, { "epoch": 0.23, "grad_norm": 1.3979436724812062, "learning_rate": 8.94124211584616e-06, "loss": 0.219, "step": 3281 }, { "epoch": 0.23, "grad_norm": 6.125885975141405, "learning_rate": 8.940529177664032e-06, "loss": 0.5862, "step": 3282 }, { "epoch": 0.23, "grad_norm": 1.3555718616648973, "learning_rate": 8.939816027969952e-06, "loss": 0.2132, "step": 3283 }, { "epoch": 0.23, "grad_norm": 1.3012619267263932, "learning_rate": 8.9391026668022e-06, "loss": 0.2212, "step": 3284 }, { "epoch": 0.23, "grad_norm": 1.4156025698408934, "learning_rate": 8.938389094199065e-06, "loss": 0.2666, "step": 3285 }, { "epoch": 0.24, "grad_norm": 5.770113895552139, "learning_rate": 8.93767531019885e-06, "loss": 0.7606, "step": 3286 }, { "epoch": 0.24, "grad_norm": 7.384838394540693, "learning_rate": 8.936961314839869e-06, "loss": 0.5548, "step": 3287 }, { "epoch": 0.24, "grad_norm": 3.7104024976241554, "learning_rate": 8.936247108160444e-06, "loss": 0.4813, "step": 3288 }, { "epoch": 0.24, "grad_norm": 1.3202732823741723, "learning_rate": 8.935532690198912e-06, "loss": 0.216, "step": 3289 }, { "epoch": 0.24, "grad_norm": 1.4286467883584035, "learning_rate": 8.934818060993622e-06, "loss": 0.2235, "step": 3290 }, { "epoch": 0.24, "grad_norm": 5.437572706600563, "learning_rate": 8.934103220582929e-06, "loss": 0.5074, "step": 3291 }, { "epoch": 0.24, "grad_norm": 1.4469278564531205, "learning_rate": 8.933388169005205e-06, "loss": 0.2296, "step": 3292 }, { "epoch": 0.24, "grad_norm": 1.3784087009010173, "learning_rate": 8.93267290629883e-06, "loss": 0.2095, "step": 3293 }, { "epoch": 0.24, "grad_norm": 1.3863299577825723, "learning_rate": 8.931957432502198e-06, "loss": 0.1965, "step": 3294 }, { "epoch": 0.24, "grad_norm": 1.4180808522187207, "learning_rate": 8.931241747653711e-06, "loss": 0.205, "step": 3295 }, { "epoch": 0.24, "grad_norm": 6.736384856857584, "learning_rate": 8.930525851791787e-06, "loss": 0.6212, "step": 3296 }, { "epoch": 0.24, "grad_norm": 11.049262037118591, "learning_rate": 8.929809744954849e-06, "loss": 0.5757, "step": 3297 }, { "epoch": 0.24, "grad_norm": 7.717374439126013, "learning_rate": 8.929093427181336e-06, "loss": 0.7092, "step": 3298 }, { "epoch": 0.24, "grad_norm": 1.472558876669526, "learning_rate": 8.928376898509698e-06, "loss": 0.2056, "step": 3299 }, { "epoch": 0.24, "grad_norm": 1.4783267715022645, "learning_rate": 8.927660158978392e-06, "loss": 0.2225, "step": 3300 }, { "epoch": 0.24, "grad_norm": 1.376954662635892, "learning_rate": 8.926943208625896e-06, "loss": 0.2319, "step": 3301 }, { "epoch": 0.24, "grad_norm": 1.5247660656231647, "learning_rate": 8.92622604749069e-06, "loss": 0.2208, "step": 3302 }, { "epoch": 0.24, "grad_norm": 1.357453395494915, "learning_rate": 8.925508675611264e-06, "loss": 0.2316, "step": 3303 }, { "epoch": 0.24, "grad_norm": 1.3519253453378373, "learning_rate": 8.92479109302613e-06, "loss": 0.2177, "step": 3304 }, { "epoch": 0.24, "grad_norm": 1.4200443074562479, "learning_rate": 8.9240732997738e-06, "loss": 0.1984, "step": 3305 }, { "epoch": 0.24, "grad_norm": 1.3521377155229775, "learning_rate": 8.923355295892808e-06, "loss": 0.2172, "step": 3306 }, { "epoch": 0.24, "grad_norm": 1.4058697888257647, "learning_rate": 8.92263708142169e-06, "loss": 0.2038, "step": 3307 }, { "epoch": 0.24, "grad_norm": 7.733111647857325, "learning_rate": 8.921918656398995e-06, "loss": 0.6956, "step": 3308 }, { "epoch": 0.24, "grad_norm": 1.3219963398035541, "learning_rate": 8.921200020863289e-06, "loss": 0.1989, "step": 3309 }, { "epoch": 0.24, "grad_norm": 1.502684391473349, "learning_rate": 8.920481174853144e-06, "loss": 0.2224, "step": 3310 }, { "epoch": 0.24, "grad_norm": 1.3099365778310341, "learning_rate": 8.919762118407146e-06, "loss": 0.1821, "step": 3311 }, { "epoch": 0.24, "grad_norm": 1.3467531667799149, "learning_rate": 8.919042851563888e-06, "loss": 0.2041, "step": 3312 }, { "epoch": 0.24, "grad_norm": 5.611390824704519, "learning_rate": 8.91832337436198e-06, "loss": 0.5028, "step": 3313 }, { "epoch": 0.24, "grad_norm": 1.3578157689251673, "learning_rate": 8.917603686840039e-06, "loss": 0.2835, "step": 3314 }, { "epoch": 0.24, "grad_norm": 5.567274345310595, "learning_rate": 8.916883789036697e-06, "loss": 0.6578, "step": 3315 }, { "epoch": 0.24, "grad_norm": 1.3639732581018305, "learning_rate": 8.916163680990593e-06, "loss": 0.1818, "step": 3316 }, { "epoch": 0.24, "grad_norm": 1.223463220252573, "learning_rate": 8.915443362740383e-06, "loss": 0.2021, "step": 3317 }, { "epoch": 0.24, "grad_norm": 1.3470178564671258, "learning_rate": 8.914722834324725e-06, "loss": 0.1793, "step": 3318 }, { "epoch": 0.24, "grad_norm": 1.4195466440564755, "learning_rate": 8.914002095782301e-06, "loss": 0.2106, "step": 3319 }, { "epoch": 0.24, "grad_norm": 1.4707969756725279, "learning_rate": 8.913281147151793e-06, "loss": 0.2352, "step": 3320 }, { "epoch": 0.24, "grad_norm": 4.0827904437108975, "learning_rate": 8.912559988471899e-06, "loss": 0.4202, "step": 3321 }, { "epoch": 0.24, "grad_norm": 1.4672794497351787, "learning_rate": 8.911838619781328e-06, "loss": 0.2628, "step": 3322 }, { "epoch": 0.24, "grad_norm": 1.32036419332378, "learning_rate": 8.911117041118803e-06, "loss": 0.2029, "step": 3323 }, { "epoch": 0.24, "grad_norm": 1.3099676791027253, "learning_rate": 8.910395252523053e-06, "loss": 0.202, "step": 3324 }, { "epoch": 0.24, "grad_norm": 1.3370408628498869, "learning_rate": 8.909673254032818e-06, "loss": 0.2099, "step": 3325 }, { "epoch": 0.24, "grad_norm": 1.4901321999777943, "learning_rate": 8.908951045686858e-06, "loss": 0.2331, "step": 3326 }, { "epoch": 0.24, "grad_norm": 1.5409107704084781, "learning_rate": 8.908228627523934e-06, "loss": 0.2058, "step": 3327 }, { "epoch": 0.24, "grad_norm": 1.2499717556253962, "learning_rate": 8.907505999582823e-06, "loss": 0.2271, "step": 3328 }, { "epoch": 0.24, "grad_norm": 1.5495622911412996, "learning_rate": 8.906783161902316e-06, "loss": 0.2139, "step": 3329 }, { "epoch": 0.24, "grad_norm": 9.188244342530435, "learning_rate": 8.906060114521207e-06, "loss": 0.6686, "step": 3330 }, { "epoch": 0.24, "grad_norm": 1.5192486021521592, "learning_rate": 8.905336857478311e-06, "loss": 0.2679, "step": 3331 }, { "epoch": 0.24, "grad_norm": 1.2846258833251372, "learning_rate": 8.904613390812449e-06, "loss": 0.1586, "step": 3332 }, { "epoch": 0.24, "grad_norm": 1.443928316235056, "learning_rate": 8.903889714562449e-06, "loss": 0.1837, "step": 3333 }, { "epoch": 0.24, "grad_norm": 1.2734650330900155, "learning_rate": 8.90316582876716e-06, "loss": 0.2363, "step": 3334 }, { "epoch": 0.24, "grad_norm": 1.3793810358070389, "learning_rate": 8.902441733465436e-06, "loss": 0.2155, "step": 3335 }, { "epoch": 0.24, "grad_norm": 1.359097273332423, "learning_rate": 8.901717428696142e-06, "loss": 0.2322, "step": 3336 }, { "epoch": 0.24, "grad_norm": 1.4164999258689257, "learning_rate": 8.900992914498159e-06, "loss": 0.1832, "step": 3337 }, { "epoch": 0.24, "grad_norm": 1.3130728001093355, "learning_rate": 8.900268190910373e-06, "loss": 0.1762, "step": 3338 }, { "epoch": 0.24, "grad_norm": 1.430435786055622, "learning_rate": 8.899543257971686e-06, "loss": 0.2217, "step": 3339 }, { "epoch": 0.24, "grad_norm": 1.5286284222806887, "learning_rate": 8.898818115721009e-06, "loss": 0.2095, "step": 3340 }, { "epoch": 0.24, "grad_norm": 1.2846597620659843, "learning_rate": 8.898092764197264e-06, "loss": 0.1841, "step": 3341 }, { "epoch": 0.24, "grad_norm": 1.468208556285966, "learning_rate": 8.897367203439386e-06, "loss": 0.1916, "step": 3342 }, { "epoch": 0.24, "grad_norm": 1.5392152150071345, "learning_rate": 8.89664143348632e-06, "loss": 0.1649, "step": 3343 }, { "epoch": 0.24, "grad_norm": 1.4236655158906615, "learning_rate": 8.895915454377021e-06, "loss": 0.2132, "step": 3344 }, { "epoch": 0.24, "grad_norm": 1.6583361987145053, "learning_rate": 8.895189266150462e-06, "loss": 0.2067, "step": 3345 }, { "epoch": 0.24, "grad_norm": 1.4962372209160975, "learning_rate": 8.894462868845615e-06, "loss": 0.1987, "step": 3346 }, { "epoch": 0.24, "grad_norm": 1.3378925360554126, "learning_rate": 8.893736262501473e-06, "loss": 0.2316, "step": 3347 }, { "epoch": 0.24, "grad_norm": 1.625326675998737, "learning_rate": 8.893009447157039e-06, "loss": 0.2326, "step": 3348 }, { "epoch": 0.24, "grad_norm": 1.4716486335374517, "learning_rate": 8.892282422851321e-06, "loss": 0.195, "step": 3349 }, { "epoch": 0.24, "grad_norm": 1.6032885833363117, "learning_rate": 8.891555189623348e-06, "loss": 0.2324, "step": 3350 }, { "epoch": 0.24, "grad_norm": 1.427129227724154, "learning_rate": 8.890827747512155e-06, "loss": 0.1912, "step": 3351 }, { "epoch": 0.24, "grad_norm": 1.2777478492271028, "learning_rate": 8.890100096556782e-06, "loss": 0.1619, "step": 3352 }, { "epoch": 0.24, "grad_norm": 1.2665148255007546, "learning_rate": 8.889372236796292e-06, "loss": 0.1998, "step": 3353 }, { "epoch": 0.24, "grad_norm": 8.266448138690524, "learning_rate": 8.888644168269751e-06, "loss": 0.5737, "step": 3354 }, { "epoch": 0.24, "grad_norm": 7.48207732269246, "learning_rate": 8.88791589101624e-06, "loss": 0.7817, "step": 3355 }, { "epoch": 0.24, "grad_norm": 1.3590775535492565, "learning_rate": 8.887187405074852e-06, "loss": 0.2068, "step": 3356 }, { "epoch": 0.24, "grad_norm": 1.3326477877855927, "learning_rate": 8.886458710484684e-06, "loss": 0.214, "step": 3357 }, { "epoch": 0.24, "grad_norm": 1.4733779770393356, "learning_rate": 8.885729807284855e-06, "loss": 0.2076, "step": 3358 }, { "epoch": 0.24, "grad_norm": 1.4531546610230743, "learning_rate": 8.885000695514486e-06, "loss": 0.2564, "step": 3359 }, { "epoch": 0.24, "grad_norm": 1.3412051609871583, "learning_rate": 8.884271375212714e-06, "loss": 0.2443, "step": 3360 }, { "epoch": 0.24, "grad_norm": 1.472158654976976, "learning_rate": 8.883541846418687e-06, "loss": 0.2399, "step": 3361 }, { "epoch": 0.24, "grad_norm": 1.2940120326712834, "learning_rate": 8.882812109171561e-06, "loss": 0.2358, "step": 3362 }, { "epoch": 0.24, "grad_norm": 1.1251913401646718, "learning_rate": 8.882082163510507e-06, "loss": 0.1874, "step": 3363 }, { "epoch": 0.24, "grad_norm": 1.4993218690469448, "learning_rate": 8.881352009474704e-06, "loss": 0.2439, "step": 3364 }, { "epoch": 0.24, "grad_norm": 1.2372557034541531, "learning_rate": 8.880621647103346e-06, "loss": 0.2236, "step": 3365 }, { "epoch": 0.24, "grad_norm": 1.2943184727519619, "learning_rate": 8.879891076435636e-06, "loss": 0.193, "step": 3366 }, { "epoch": 0.24, "grad_norm": 1.4587366929940457, "learning_rate": 8.879160297510785e-06, "loss": 0.2473, "step": 3367 }, { "epoch": 0.24, "grad_norm": 4.877472926600581, "learning_rate": 8.878429310368022e-06, "loss": 0.5652, "step": 3368 }, { "epoch": 0.24, "grad_norm": 1.510777669951402, "learning_rate": 8.87769811504658e-06, "loss": 0.2325, "step": 3369 }, { "epoch": 0.24, "grad_norm": 1.2286085051030744, "learning_rate": 8.876966711585711e-06, "loss": 0.2018, "step": 3370 }, { "epoch": 0.24, "grad_norm": 1.381130732552162, "learning_rate": 8.876235100024668e-06, "loss": 0.1819, "step": 3371 }, { "epoch": 0.24, "grad_norm": 1.3573986351631284, "learning_rate": 8.875503280402727e-06, "loss": 0.1972, "step": 3372 }, { "epoch": 0.24, "grad_norm": 4.680697514470033, "learning_rate": 8.874771252759164e-06, "loss": 0.6843, "step": 3373 }, { "epoch": 0.24, "grad_norm": 1.3950776241986975, "learning_rate": 8.874039017133276e-06, "loss": 0.2052, "step": 3374 }, { "epoch": 0.24, "grad_norm": 1.510031245656418, "learning_rate": 8.873306573564364e-06, "loss": 0.2211, "step": 3375 }, { "epoch": 0.24, "grad_norm": 1.5140672736192597, "learning_rate": 8.872573922091743e-06, "loss": 0.2164, "step": 3376 }, { "epoch": 0.24, "grad_norm": 1.4161333050129712, "learning_rate": 8.871841062754737e-06, "loss": 0.1943, "step": 3377 }, { "epoch": 0.24, "grad_norm": 1.3117065041013924, "learning_rate": 8.871107995592687e-06, "loss": 0.206, "step": 3378 }, { "epoch": 0.24, "grad_norm": 1.5516234389644101, "learning_rate": 8.870374720644936e-06, "loss": 0.2486, "step": 3379 }, { "epoch": 0.24, "grad_norm": 1.5271342069400944, "learning_rate": 8.86964123795085e-06, "loss": 0.2344, "step": 3380 }, { "epoch": 0.24, "grad_norm": 1.3655716960852164, "learning_rate": 8.868907547549793e-06, "loss": 0.2191, "step": 3381 }, { "epoch": 0.24, "grad_norm": 1.3224030483687998, "learning_rate": 8.86817364948115e-06, "loss": 0.2011, "step": 3382 }, { "epoch": 0.24, "grad_norm": 1.459774043191652, "learning_rate": 8.867439543784313e-06, "loss": 0.2237, "step": 3383 }, { "epoch": 0.24, "grad_norm": 1.430488206111277, "learning_rate": 8.866705230498686e-06, "loss": 0.2403, "step": 3384 }, { "epoch": 0.24, "grad_norm": 1.5137601063390582, "learning_rate": 8.865970709663682e-06, "loss": 0.2259, "step": 3385 }, { "epoch": 0.24, "grad_norm": 6.487166428897814, "learning_rate": 8.865235981318732e-06, "loss": 0.5559, "step": 3386 }, { "epoch": 0.24, "grad_norm": 1.373772738971246, "learning_rate": 8.864501045503268e-06, "loss": 0.1924, "step": 3387 }, { "epoch": 0.24, "grad_norm": 1.5039193835440003, "learning_rate": 8.863765902256742e-06, "loss": 0.2222, "step": 3388 }, { "epoch": 0.24, "grad_norm": 1.2738683093008236, "learning_rate": 8.863030551618613e-06, "loss": 0.1886, "step": 3389 }, { "epoch": 0.24, "grad_norm": 1.367442924436874, "learning_rate": 8.862294993628348e-06, "loss": 0.215, "step": 3390 }, { "epoch": 0.24, "grad_norm": 1.551663676117261, "learning_rate": 8.861559228325433e-06, "loss": 0.232, "step": 3391 }, { "epoch": 0.24, "grad_norm": 1.3999192675201675, "learning_rate": 8.860823255749362e-06, "loss": 0.2101, "step": 3392 }, { "epoch": 0.24, "grad_norm": 8.632011962390878, "learning_rate": 8.860087075939636e-06, "loss": 0.5768, "step": 3393 }, { "epoch": 0.24, "grad_norm": 1.4214775226450687, "learning_rate": 8.859350688935769e-06, "loss": 0.2165, "step": 3394 }, { "epoch": 0.24, "grad_norm": 1.4747611725371281, "learning_rate": 8.858614094777292e-06, "loss": 0.1972, "step": 3395 }, { "epoch": 0.24, "grad_norm": 1.3661368245966141, "learning_rate": 8.857877293503739e-06, "loss": 0.2336, "step": 3396 }, { "epoch": 0.24, "grad_norm": 4.531646155795514, "learning_rate": 8.857140285154658e-06, "loss": 0.6816, "step": 3397 }, { "epoch": 0.24, "grad_norm": 1.4566310621944527, "learning_rate": 8.85640306976961e-06, "loss": 0.2398, "step": 3398 }, { "epoch": 0.24, "grad_norm": 1.4758597182222741, "learning_rate": 8.855665647388167e-06, "loss": 0.2151, "step": 3399 }, { "epoch": 0.24, "grad_norm": 1.372055257216578, "learning_rate": 8.85492801804991e-06, "loss": 0.2229, "step": 3400 }, { "epoch": 0.24, "grad_norm": 1.3958624833322675, "learning_rate": 8.854190181794433e-06, "loss": 0.1906, "step": 3401 }, { "epoch": 0.24, "grad_norm": 1.3913785077084155, "learning_rate": 8.853452138661338e-06, "loss": 0.2214, "step": 3402 }, { "epoch": 0.24, "grad_norm": 1.5901520433175405, "learning_rate": 8.852713888690239e-06, "loss": 0.2964, "step": 3403 }, { "epoch": 0.24, "grad_norm": 1.4859277916241667, "learning_rate": 8.851975431920768e-06, "loss": 0.2441, "step": 3404 }, { "epoch": 0.24, "grad_norm": 1.446421761868939, "learning_rate": 8.851236768392556e-06, "loss": 0.1935, "step": 3405 }, { "epoch": 0.24, "grad_norm": 1.3639746948539013, "learning_rate": 8.850497898145256e-06, "loss": 0.2109, "step": 3406 }, { "epoch": 0.24, "grad_norm": 5.398064511201901, "learning_rate": 8.849758821218525e-06, "loss": 0.7553, "step": 3407 }, { "epoch": 0.24, "grad_norm": 1.3380611537756595, "learning_rate": 8.849019537652035e-06, "loss": 0.1734, "step": 3408 }, { "epoch": 0.24, "grad_norm": 1.3633752856538814, "learning_rate": 8.848280047485468e-06, "loss": 0.2514, "step": 3409 }, { "epoch": 0.24, "grad_norm": 1.1902055027075862, "learning_rate": 8.847540350758516e-06, "loss": 0.1932, "step": 3410 }, { "epoch": 0.24, "grad_norm": 1.5142521791319392, "learning_rate": 8.846800447510884e-06, "loss": 0.2642, "step": 3411 }, { "epoch": 0.24, "grad_norm": 1.4314296013668832, "learning_rate": 8.846060337782288e-06, "loss": 0.2242, "step": 3412 }, { "epoch": 0.24, "grad_norm": 1.4353990399411127, "learning_rate": 8.845320021612448e-06, "loss": 0.2143, "step": 3413 }, { "epoch": 0.24, "grad_norm": 1.592767234174408, "learning_rate": 8.84457949904111e-06, "loss": 0.1966, "step": 3414 }, { "epoch": 0.24, "grad_norm": 1.3498548696683323, "learning_rate": 8.843838770108018e-06, "loss": 0.185, "step": 3415 }, { "epoch": 0.24, "grad_norm": 1.6315400671663203, "learning_rate": 8.843097834852929e-06, "loss": 0.216, "step": 3416 }, { "epoch": 0.24, "grad_norm": 1.7189920891855792, "learning_rate": 8.842356693315619e-06, "loss": 0.2362, "step": 3417 }, { "epoch": 0.24, "grad_norm": 1.3331114834417002, "learning_rate": 8.841615345535865e-06, "loss": 0.2149, "step": 3418 }, { "epoch": 0.24, "grad_norm": 4.083665803095692, "learning_rate": 8.840873791553461e-06, "loss": 0.5762, "step": 3419 }, { "epoch": 0.24, "grad_norm": 1.3692720174880115, "learning_rate": 8.84013203140821e-06, "loss": 0.1802, "step": 3420 }, { "epoch": 0.24, "grad_norm": 1.388798759333109, "learning_rate": 8.83939006513993e-06, "loss": 0.2108, "step": 3421 }, { "epoch": 0.24, "grad_norm": 1.5061799058694654, "learning_rate": 8.838647892788443e-06, "loss": 0.2296, "step": 3422 }, { "epoch": 0.24, "grad_norm": 1.348011868078559, "learning_rate": 8.837905514393587e-06, "loss": 0.1465, "step": 3423 }, { "epoch": 0.24, "grad_norm": 1.341854065561041, "learning_rate": 8.837162929995212e-06, "loss": 0.1952, "step": 3424 }, { "epoch": 0.24, "grad_norm": 1.424947633923621, "learning_rate": 8.836420139633173e-06, "loss": 0.2096, "step": 3425 }, { "epoch": 0.25, "grad_norm": 1.3189746719081146, "learning_rate": 8.835677143347343e-06, "loss": 0.2267, "step": 3426 }, { "epoch": 0.25, "grad_norm": 1.289344867034444, "learning_rate": 8.834933941177604e-06, "loss": 0.2576, "step": 3427 }, { "epoch": 0.25, "grad_norm": 1.3352373183998387, "learning_rate": 8.834190533163844e-06, "loss": 0.2333, "step": 3428 }, { "epoch": 0.25, "grad_norm": 1.4239783625387485, "learning_rate": 8.83344691934597e-06, "loss": 0.1843, "step": 3429 }, { "epoch": 0.25, "grad_norm": 1.490486275697981, "learning_rate": 8.832703099763894e-06, "loss": 0.2041, "step": 3430 }, { "epoch": 0.25, "grad_norm": 1.3367185460177247, "learning_rate": 8.83195907445754e-06, "loss": 0.2076, "step": 3431 }, { "epoch": 0.25, "grad_norm": 1.2489545011586032, "learning_rate": 8.83121484346685e-06, "loss": 0.1763, "step": 3432 }, { "epoch": 0.25, "grad_norm": 1.5202513292352233, "learning_rate": 8.830470406831767e-06, "loss": 0.208, "step": 3433 }, { "epoch": 0.25, "grad_norm": 1.3826391280267434, "learning_rate": 8.82972576459225e-06, "loss": 0.2348, "step": 3434 }, { "epoch": 0.25, "grad_norm": 1.420107967857565, "learning_rate": 8.828980916788269e-06, "loss": 0.2403, "step": 3435 }, { "epoch": 0.25, "grad_norm": 1.4183016175140915, "learning_rate": 8.828235863459801e-06, "loss": 0.2097, "step": 3436 }, { "epoch": 0.25, "grad_norm": 1.4181374571751888, "learning_rate": 8.827490604646845e-06, "loss": 0.2666, "step": 3437 }, { "epoch": 0.25, "grad_norm": 1.337114228016009, "learning_rate": 8.826745140389397e-06, "loss": 0.2364, "step": 3438 }, { "epoch": 0.25, "grad_norm": 1.4444529161124489, "learning_rate": 8.825999470727473e-06, "loss": 0.2402, "step": 3439 }, { "epoch": 0.25, "grad_norm": 1.377432885848545, "learning_rate": 8.825253595701097e-06, "loss": 0.2087, "step": 3440 }, { "epoch": 0.25, "grad_norm": 1.42755092706177, "learning_rate": 8.824507515350304e-06, "loss": 0.2286, "step": 3441 }, { "epoch": 0.25, "grad_norm": 1.3310633888039973, "learning_rate": 8.823761229715144e-06, "loss": 0.1924, "step": 3442 }, { "epoch": 0.25, "grad_norm": 1.3212561112899646, "learning_rate": 8.82301473883567e-06, "loss": 0.1703, "step": 3443 }, { "epoch": 0.25, "grad_norm": 1.597161415635961, "learning_rate": 8.822268042751956e-06, "loss": 0.2608, "step": 3444 }, { "epoch": 0.25, "grad_norm": 1.3073723924360938, "learning_rate": 8.821521141504075e-06, "loss": 0.2312, "step": 3445 }, { "epoch": 0.25, "grad_norm": 1.4643042870741099, "learning_rate": 8.820774035132122e-06, "loss": 0.1982, "step": 3446 }, { "epoch": 0.25, "grad_norm": 5.6647324206655885, "learning_rate": 8.8200267236762e-06, "loss": 0.5116, "step": 3447 }, { "epoch": 0.25, "grad_norm": 1.2681844338721362, "learning_rate": 8.819279207176418e-06, "loss": 0.1913, "step": 3448 }, { "epoch": 0.25, "grad_norm": 1.3590712286494593, "learning_rate": 8.818531485672902e-06, "loss": 0.2607, "step": 3449 }, { "epoch": 0.25, "grad_norm": 4.269856942907231, "learning_rate": 8.817783559205784e-06, "loss": 0.6841, "step": 3450 }, { "epoch": 0.25, "grad_norm": 1.537286679057048, "learning_rate": 8.817035427815214e-06, "loss": 0.1967, "step": 3451 }, { "epoch": 0.25, "grad_norm": 1.5691797491446957, "learning_rate": 8.816287091541346e-06, "loss": 0.2444, "step": 3452 }, { "epoch": 0.25, "grad_norm": 1.245704647399468, "learning_rate": 8.815538550424347e-06, "loss": 0.1844, "step": 3453 }, { "epoch": 0.25, "grad_norm": 1.2022984775696763, "learning_rate": 8.8147898045044e-06, "loss": 0.1851, "step": 3454 }, { "epoch": 0.25, "grad_norm": 6.7347282375961, "learning_rate": 8.814040853821688e-06, "loss": 0.5588, "step": 3455 }, { "epoch": 0.25, "grad_norm": 1.5063059529951168, "learning_rate": 8.813291698416417e-06, "loss": 0.2428, "step": 3456 }, { "epoch": 0.25, "grad_norm": 1.431643797650711, "learning_rate": 8.812542338328797e-06, "loss": 0.2319, "step": 3457 }, { "epoch": 0.25, "grad_norm": 1.330074686441079, "learning_rate": 8.81179277359905e-06, "loss": 0.2666, "step": 3458 }, { "epoch": 0.25, "grad_norm": 1.1856959380764205, "learning_rate": 8.811043004267411e-06, "loss": 0.1953, "step": 3459 }, { "epoch": 0.25, "grad_norm": 1.3265089760846611, "learning_rate": 8.810293030374126e-06, "loss": 0.2326, "step": 3460 }, { "epoch": 0.25, "grad_norm": 1.401036447124268, "learning_rate": 8.809542851959448e-06, "loss": 0.2372, "step": 3461 }, { "epoch": 0.25, "grad_norm": 1.2740193197283487, "learning_rate": 8.80879246906364e-06, "loss": 0.2115, "step": 3462 }, { "epoch": 0.25, "grad_norm": 1.2834841775473325, "learning_rate": 8.808041881726988e-06, "loss": 0.2202, "step": 3463 }, { "epoch": 0.25, "grad_norm": 1.3812463078224733, "learning_rate": 8.807291089989776e-06, "loss": 0.2374, "step": 3464 }, { "epoch": 0.25, "grad_norm": 1.3341292725043825, "learning_rate": 8.806540093892302e-06, "loss": 0.1854, "step": 3465 }, { "epoch": 0.25, "grad_norm": 1.3143854799170787, "learning_rate": 8.80578889347488e-06, "loss": 0.2616, "step": 3466 }, { "epoch": 0.25, "grad_norm": 1.3732514738130441, "learning_rate": 8.805037488777827e-06, "loss": 0.2282, "step": 3467 }, { "epoch": 0.25, "grad_norm": 1.4522727476685582, "learning_rate": 8.804285879841481e-06, "loss": 0.2021, "step": 3468 }, { "epoch": 0.25, "grad_norm": 1.3822539442064339, "learning_rate": 8.803534066706183e-06, "loss": 0.1906, "step": 3469 }, { "epoch": 0.25, "grad_norm": 6.097700997986065, "learning_rate": 8.802782049412286e-06, "loss": 0.7269, "step": 3470 }, { "epoch": 0.25, "grad_norm": 1.4175055475082383, "learning_rate": 8.802029828000157e-06, "loss": 0.237, "step": 3471 }, { "epoch": 0.25, "grad_norm": 1.4099038172743792, "learning_rate": 8.801277402510169e-06, "loss": 0.2475, "step": 3472 }, { "epoch": 0.25, "grad_norm": 6.148999200392265, "learning_rate": 8.800524772982712e-06, "loss": 0.5292, "step": 3473 }, { "epoch": 0.25, "grad_norm": 1.5572007957258802, "learning_rate": 8.799771939458186e-06, "loss": 0.1956, "step": 3474 }, { "epoch": 0.25, "grad_norm": 1.3784975137607194, "learning_rate": 8.799018901976997e-06, "loss": 0.2341, "step": 3475 }, { "epoch": 0.25, "grad_norm": 1.3444255154790965, "learning_rate": 8.798265660579567e-06, "loss": 0.1815, "step": 3476 }, { "epoch": 0.25, "grad_norm": 7.058409218546766, "learning_rate": 8.797512215306325e-06, "loss": 0.5875, "step": 3477 }, { "epoch": 0.25, "grad_norm": 1.3672000201131083, "learning_rate": 8.796758566197714e-06, "loss": 0.2122, "step": 3478 }, { "epoch": 0.25, "grad_norm": 1.4561403041767271, "learning_rate": 8.796004713294188e-06, "loss": 0.2684, "step": 3479 }, { "epoch": 0.25, "grad_norm": 1.4520918203577586, "learning_rate": 8.795250656636207e-06, "loss": 0.2735, "step": 3480 }, { "epoch": 0.25, "grad_norm": 1.4306970925535512, "learning_rate": 8.794496396264252e-06, "loss": 0.2121, "step": 3481 }, { "epoch": 0.25, "grad_norm": 1.5271108907038105, "learning_rate": 8.793741932218802e-06, "loss": 0.2709, "step": 3482 }, { "epoch": 0.25, "grad_norm": 1.4997989167028307, "learning_rate": 8.79298726454036e-06, "loss": 0.2511, "step": 3483 }, { "epoch": 0.25, "grad_norm": 1.319760205957678, "learning_rate": 8.792232393269428e-06, "loss": 0.2119, "step": 3484 }, { "epoch": 0.25, "grad_norm": 1.5213111616613597, "learning_rate": 8.791477318446527e-06, "loss": 0.2202, "step": 3485 }, { "epoch": 0.25, "grad_norm": 1.5133731246783466, "learning_rate": 8.790722040112188e-06, "loss": 0.1967, "step": 3486 }, { "epoch": 0.25, "grad_norm": 1.240615219142938, "learning_rate": 8.789966558306948e-06, "loss": 0.1795, "step": 3487 }, { "epoch": 0.25, "grad_norm": 1.4681425346219017, "learning_rate": 8.78921087307136e-06, "loss": 0.2428, "step": 3488 }, { "epoch": 0.25, "grad_norm": 1.613626956732254, "learning_rate": 8.788454984445987e-06, "loss": 0.1999, "step": 3489 }, { "epoch": 0.25, "grad_norm": 1.318662714152473, "learning_rate": 8.7876988924714e-06, "loss": 0.1884, "step": 3490 }, { "epoch": 0.25, "grad_norm": 1.1983018161825039, "learning_rate": 8.786942597188184e-06, "loss": 0.1824, "step": 3491 }, { "epoch": 0.25, "grad_norm": 5.5697451605491075, "learning_rate": 8.786186098636935e-06, "loss": 0.6824, "step": 3492 }, { "epoch": 0.25, "grad_norm": 1.416611687143626, "learning_rate": 8.785429396858258e-06, "loss": 0.2142, "step": 3493 }, { "epoch": 0.25, "grad_norm": 1.191946226057742, "learning_rate": 8.784672491892769e-06, "loss": 0.1896, "step": 3494 }, { "epoch": 0.25, "grad_norm": 4.831688127119905, "learning_rate": 8.7839153837811e-06, "loss": 0.7268, "step": 3495 }, { "epoch": 0.25, "grad_norm": 6.957040585244163, "learning_rate": 8.78315807256388e-06, "loss": 0.66, "step": 3496 }, { "epoch": 0.25, "grad_norm": 1.4176036084956738, "learning_rate": 8.782400558281768e-06, "loss": 0.2006, "step": 3497 }, { "epoch": 0.25, "grad_norm": 1.3765226812644953, "learning_rate": 8.781642840975419e-06, "loss": 0.1959, "step": 3498 }, { "epoch": 0.25, "grad_norm": 1.3748192531774663, "learning_rate": 8.780884920685507e-06, "loss": 0.2404, "step": 3499 }, { "epoch": 0.25, "grad_norm": 1.482517065468356, "learning_rate": 8.780126797452713e-06, "loss": 0.2586, "step": 3500 }, { "epoch": 0.25, "grad_norm": 1.4693765640052256, "learning_rate": 8.779368471317731e-06, "loss": 0.2431, "step": 3501 }, { "epoch": 0.25, "grad_norm": 1.521710983542316, "learning_rate": 8.778609942321263e-06, "loss": 0.2367, "step": 3502 }, { "epoch": 0.25, "grad_norm": 1.2999981556714992, "learning_rate": 8.777851210504025e-06, "loss": 0.1912, "step": 3503 }, { "epoch": 0.25, "grad_norm": 1.513404657769363, "learning_rate": 8.777092275906743e-06, "loss": 0.2516, "step": 3504 }, { "epoch": 0.25, "grad_norm": 1.4413557320617025, "learning_rate": 8.776333138570153e-06, "loss": 0.2428, "step": 3505 }, { "epoch": 0.25, "grad_norm": 1.488152051116762, "learning_rate": 8.775573798535002e-06, "loss": 0.1921, "step": 3506 }, { "epoch": 0.25, "grad_norm": 1.5782741742482813, "learning_rate": 8.77481425584205e-06, "loss": 0.2566, "step": 3507 }, { "epoch": 0.25, "grad_norm": 1.4682532410223015, "learning_rate": 8.774054510532064e-06, "loss": 0.2286, "step": 3508 }, { "epoch": 0.25, "grad_norm": 1.241297650818194, "learning_rate": 8.773294562645826e-06, "loss": 0.2226, "step": 3509 }, { "epoch": 0.25, "grad_norm": 1.371851516830798, "learning_rate": 8.772534412224128e-06, "loss": 0.1948, "step": 3510 }, { "epoch": 0.25, "grad_norm": 7.933212082311394, "learning_rate": 8.77177405930777e-06, "loss": 0.5866, "step": 3511 }, { "epoch": 0.25, "grad_norm": 1.4595417380909443, "learning_rate": 8.771013503937563e-06, "loss": 0.2091, "step": 3512 }, { "epoch": 0.25, "grad_norm": 1.3787810166214245, "learning_rate": 8.770252746154334e-06, "loss": 0.2297, "step": 3513 }, { "epoch": 0.25, "grad_norm": 4.244046545621324, "learning_rate": 8.769491785998914e-06, "loss": 0.4104, "step": 3514 }, { "epoch": 0.25, "grad_norm": 1.55061552776363, "learning_rate": 8.768730623512152e-06, "loss": 0.2498, "step": 3515 }, { "epoch": 0.25, "grad_norm": 1.2535863797730447, "learning_rate": 8.767969258734903e-06, "loss": 0.1893, "step": 3516 }, { "epoch": 0.25, "grad_norm": 1.4915819811911442, "learning_rate": 8.767207691708032e-06, "loss": 0.2239, "step": 3517 }, { "epoch": 0.25, "grad_norm": 1.7525037171060562, "learning_rate": 8.76644592247242e-06, "loss": 0.2258, "step": 3518 }, { "epoch": 0.25, "grad_norm": 1.4875652090202376, "learning_rate": 8.765683951068952e-06, "loss": 0.2525, "step": 3519 }, { "epoch": 0.25, "grad_norm": 1.306116531299578, "learning_rate": 8.764921777538533e-06, "loss": 0.2255, "step": 3520 }, { "epoch": 0.25, "grad_norm": 1.3299102580440942, "learning_rate": 8.764159401922068e-06, "loss": 0.2067, "step": 3521 }, { "epoch": 0.25, "grad_norm": 1.531280167049909, "learning_rate": 8.763396824260482e-06, "loss": 0.2168, "step": 3522 }, { "epoch": 0.25, "grad_norm": 1.4439434464232521, "learning_rate": 8.762634044594704e-06, "loss": 0.2309, "step": 3523 }, { "epoch": 0.25, "grad_norm": 1.2875234620153186, "learning_rate": 8.761871062965679e-06, "loss": 0.2047, "step": 3524 }, { "epoch": 0.25, "grad_norm": 1.5089507667914641, "learning_rate": 8.761107879414362e-06, "loss": 0.2599, "step": 3525 }, { "epoch": 0.25, "grad_norm": 1.4437356679462925, "learning_rate": 8.760344493981714e-06, "loss": 0.2205, "step": 3526 }, { "epoch": 0.25, "grad_norm": 1.5392915086275918, "learning_rate": 8.759580906708714e-06, "loss": 0.2419, "step": 3527 }, { "epoch": 0.25, "grad_norm": 38.905439270924006, "learning_rate": 8.758817117636345e-06, "loss": 0.5083, "step": 3528 }, { "epoch": 0.25, "grad_norm": 1.5192325288704667, "learning_rate": 8.758053126805608e-06, "loss": 0.2059, "step": 3529 }, { "epoch": 0.25, "grad_norm": 1.4430968894093856, "learning_rate": 8.757288934257509e-06, "loss": 0.2689, "step": 3530 }, { "epoch": 0.25, "grad_norm": 1.3798408298197458, "learning_rate": 8.756524540033066e-06, "loss": 0.2031, "step": 3531 }, { "epoch": 0.25, "grad_norm": 1.3896759524097035, "learning_rate": 8.75575994417331e-06, "loss": 0.2395, "step": 3532 }, { "epoch": 0.25, "grad_norm": 1.5436255189263635, "learning_rate": 8.754995146719281e-06, "loss": 0.2554, "step": 3533 }, { "epoch": 0.25, "grad_norm": 1.4341038003876296, "learning_rate": 8.754230147712031e-06, "loss": 0.2222, "step": 3534 }, { "epoch": 0.25, "grad_norm": 1.319592719778914, "learning_rate": 8.75346494719262e-06, "loss": 0.2154, "step": 3535 }, { "epoch": 0.25, "grad_norm": 1.316911936261718, "learning_rate": 8.752699545202124e-06, "loss": 0.2106, "step": 3536 }, { "epoch": 0.25, "grad_norm": 1.338061016540739, "learning_rate": 8.751933941781624e-06, "loss": 0.1823, "step": 3537 }, { "epoch": 0.25, "grad_norm": 1.4650919989159568, "learning_rate": 8.751168136972217e-06, "loss": 0.2157, "step": 3538 }, { "epoch": 0.25, "grad_norm": 1.5580500699771689, "learning_rate": 8.750402130815005e-06, "loss": 0.2473, "step": 3539 }, { "epoch": 0.25, "grad_norm": 1.4852966582571063, "learning_rate": 8.749635923351108e-06, "loss": 0.242, "step": 3540 }, { "epoch": 0.25, "grad_norm": 1.632942814709578, "learning_rate": 8.748869514621649e-06, "loss": 0.1588, "step": 3541 }, { "epoch": 0.25, "grad_norm": 1.2845291303522697, "learning_rate": 8.74810290466777e-06, "loss": 0.1745, "step": 3542 }, { "epoch": 0.25, "grad_norm": 1.2507709830571156, "learning_rate": 8.747336093530617e-06, "loss": 0.1844, "step": 3543 }, { "epoch": 0.25, "grad_norm": 1.5686949868723838, "learning_rate": 8.74656908125135e-06, "loss": 0.2736, "step": 3544 }, { "epoch": 0.25, "grad_norm": 1.5002574777247166, "learning_rate": 8.745801867871138e-06, "loss": 0.2052, "step": 3545 }, { "epoch": 0.25, "grad_norm": 1.6505551768471391, "learning_rate": 8.745034453431165e-06, "loss": 0.2729, "step": 3546 }, { "epoch": 0.25, "grad_norm": 5.1081811186792425, "learning_rate": 8.74426683797262e-06, "loss": 0.5648, "step": 3547 }, { "epoch": 0.25, "grad_norm": 4.609825386496396, "learning_rate": 8.743499021536705e-06, "loss": 0.6667, "step": 3548 }, { "epoch": 0.25, "grad_norm": 1.3764682796928078, "learning_rate": 8.742731004164636e-06, "loss": 0.2186, "step": 3549 }, { "epoch": 0.25, "grad_norm": 12.094134099366917, "learning_rate": 8.741962785897634e-06, "loss": 0.6248, "step": 3550 }, { "epoch": 0.25, "grad_norm": 6.602519796392337, "learning_rate": 8.741194366776937e-06, "loss": 0.6259, "step": 3551 }, { "epoch": 0.25, "grad_norm": 1.3250663290820373, "learning_rate": 8.74042574684379e-06, "loss": 0.2046, "step": 3552 }, { "epoch": 0.25, "grad_norm": 1.4679756514498974, "learning_rate": 8.739656926139448e-06, "loss": 0.1866, "step": 3553 }, { "epoch": 0.25, "grad_norm": 1.6160587197491099, "learning_rate": 8.738887904705179e-06, "loss": 0.2818, "step": 3554 }, { "epoch": 0.25, "grad_norm": 1.262920849537033, "learning_rate": 8.738118682582262e-06, "loss": 0.1805, "step": 3555 }, { "epoch": 0.25, "grad_norm": 1.2790353290893421, "learning_rate": 8.737349259811984e-06, "loss": 0.1693, "step": 3556 }, { "epoch": 0.25, "grad_norm": 1.3421431264287502, "learning_rate": 8.736579636435645e-06, "loss": 0.1964, "step": 3557 }, { "epoch": 0.25, "grad_norm": 1.5073213647446664, "learning_rate": 8.735809812494557e-06, "loss": 0.234, "step": 3558 }, { "epoch": 0.25, "grad_norm": 1.4391649603308385, "learning_rate": 8.73503978803004e-06, "loss": 0.2187, "step": 3559 }, { "epoch": 0.25, "grad_norm": 4.9150450311019975, "learning_rate": 8.734269563083424e-06, "loss": 0.5778, "step": 3560 }, { "epoch": 0.25, "grad_norm": 1.363786761957576, "learning_rate": 8.733499137696054e-06, "loss": 0.2208, "step": 3561 }, { "epoch": 0.25, "grad_norm": 1.4334337370598316, "learning_rate": 8.732728511909283e-06, "loss": 0.2136, "step": 3562 }, { "epoch": 0.25, "grad_norm": 1.358821096864716, "learning_rate": 8.731957685764474e-06, "loss": 0.21, "step": 3563 }, { "epoch": 0.25, "grad_norm": 1.3011599558892513, "learning_rate": 8.731186659303004e-06, "loss": 0.2087, "step": 3564 }, { "epoch": 0.26, "grad_norm": 1.4095166472010363, "learning_rate": 8.730415432566256e-06, "loss": 0.2044, "step": 3565 }, { "epoch": 0.26, "grad_norm": 1.5020613358153465, "learning_rate": 8.72964400559563e-06, "loss": 0.2468, "step": 3566 }, { "epoch": 0.26, "grad_norm": 1.3211988481952324, "learning_rate": 8.728872378432529e-06, "loss": 0.1804, "step": 3567 }, { "epoch": 0.26, "grad_norm": 4.783224591029421, "learning_rate": 8.728100551118372e-06, "loss": 0.5846, "step": 3568 }, { "epoch": 0.26, "grad_norm": 1.3736052175835756, "learning_rate": 8.727328523694591e-06, "loss": 0.1927, "step": 3569 }, { "epoch": 0.26, "grad_norm": 1.6062623185466405, "learning_rate": 8.726556296202622e-06, "loss": 0.2628, "step": 3570 }, { "epoch": 0.26, "grad_norm": 1.4839620595685727, "learning_rate": 8.725783868683915e-06, "loss": 0.2434, "step": 3571 }, { "epoch": 0.26, "grad_norm": 1.3043290153160276, "learning_rate": 8.72501124117993e-06, "loss": 0.1823, "step": 3572 }, { "epoch": 0.26, "grad_norm": 1.4121528772844587, "learning_rate": 8.724238413732143e-06, "loss": 0.2251, "step": 3573 }, { "epoch": 0.26, "grad_norm": 1.3927881020252728, "learning_rate": 8.723465386382032e-06, "loss": 0.2016, "step": 3574 }, { "epoch": 0.26, "grad_norm": 1.2834058695364785, "learning_rate": 8.722692159171093e-06, "loss": 0.2115, "step": 3575 }, { "epoch": 0.26, "grad_norm": 1.5176101395336725, "learning_rate": 8.721918732140829e-06, "loss": 0.2324, "step": 3576 }, { "epoch": 0.26, "grad_norm": 1.4058202994508628, "learning_rate": 8.721145105332752e-06, "loss": 0.2002, "step": 3577 }, { "epoch": 0.26, "grad_norm": 1.513263119324629, "learning_rate": 8.720371278788388e-06, "loss": 0.208, "step": 3578 }, { "epoch": 0.26, "grad_norm": 1.4361083208445464, "learning_rate": 8.719597252549277e-06, "loss": 0.2137, "step": 3579 }, { "epoch": 0.26, "grad_norm": 1.5770053410631122, "learning_rate": 8.71882302665696e-06, "loss": 0.2305, "step": 3580 }, { "epoch": 0.26, "grad_norm": 1.3455963477308486, "learning_rate": 8.718048601153e-06, "loss": 0.1903, "step": 3581 }, { "epoch": 0.26, "grad_norm": 1.5694022795297426, "learning_rate": 8.717273976078959e-06, "loss": 0.269, "step": 3582 }, { "epoch": 0.26, "grad_norm": 1.4442507099849007, "learning_rate": 8.71649915147642e-06, "loss": 0.23, "step": 3583 }, { "epoch": 0.26, "grad_norm": 1.2673091790275623, "learning_rate": 8.715724127386971e-06, "loss": 0.2193, "step": 3584 }, { "epoch": 0.26, "grad_norm": 1.312924200354133, "learning_rate": 8.714948903852214e-06, "loss": 0.1711, "step": 3585 }, { "epoch": 0.26, "grad_norm": 1.3324604288521895, "learning_rate": 8.714173480913759e-06, "loss": 0.1964, "step": 3586 }, { "epoch": 0.26, "grad_norm": 1.520153053960255, "learning_rate": 8.713397858613225e-06, "loss": 0.1928, "step": 3587 }, { "epoch": 0.26, "grad_norm": 1.4012050594272891, "learning_rate": 8.712622036992248e-06, "loss": 0.2133, "step": 3588 }, { "epoch": 0.26, "grad_norm": 1.4365188702349088, "learning_rate": 8.711846016092468e-06, "loss": 0.1973, "step": 3589 }, { "epoch": 0.26, "grad_norm": 1.5822814622006793, "learning_rate": 8.711069795955543e-06, "loss": 0.2402, "step": 3590 }, { "epoch": 0.26, "grad_norm": 7.6111822559081395, "learning_rate": 8.710293376623132e-06, "loss": 0.7092, "step": 3591 }, { "epoch": 0.26, "grad_norm": 1.2479384625692431, "learning_rate": 8.709516758136913e-06, "loss": 0.1959, "step": 3592 }, { "epoch": 0.26, "grad_norm": 1.448073854697756, "learning_rate": 8.708739940538573e-06, "loss": 0.1782, "step": 3593 }, { "epoch": 0.26, "grad_norm": 1.3371933544543355, "learning_rate": 8.707962923869806e-06, "loss": 0.1807, "step": 3594 }, { "epoch": 0.26, "grad_norm": 1.2765175158409978, "learning_rate": 8.707185708172322e-06, "loss": 0.2316, "step": 3595 }, { "epoch": 0.26, "grad_norm": 1.3677570962487962, "learning_rate": 8.706408293487833e-06, "loss": 0.221, "step": 3596 }, { "epoch": 0.26, "grad_norm": 1.4546737844569366, "learning_rate": 8.705630679858075e-06, "loss": 0.2258, "step": 3597 }, { "epoch": 0.26, "grad_norm": 1.5599792406944677, "learning_rate": 8.704852867324783e-06, "loss": 0.2349, "step": 3598 }, { "epoch": 0.26, "grad_norm": 1.471731278074859, "learning_rate": 8.704074855929708e-06, "loss": 0.2492, "step": 3599 }, { "epoch": 0.26, "grad_norm": 8.110888301431983, "learning_rate": 8.70329664571461e-06, "loss": 0.6398, "step": 3600 }, { "epoch": 0.26, "grad_norm": 1.504036494668173, "learning_rate": 8.70251823672126e-06, "loss": 0.2173, "step": 3601 }, { "epoch": 0.26, "grad_norm": 1.2426616378513164, "learning_rate": 8.701739628991442e-06, "loss": 0.2061, "step": 3602 }, { "epoch": 0.26, "grad_norm": 1.5465672950536882, "learning_rate": 8.700960822566943e-06, "loss": 0.2459, "step": 3603 }, { "epoch": 0.26, "grad_norm": 1.355312231546318, "learning_rate": 8.700181817489575e-06, "loss": 0.2552, "step": 3604 }, { "epoch": 0.26, "grad_norm": 5.078648726272848, "learning_rate": 8.699402613801145e-06, "loss": 0.6406, "step": 3605 }, { "epoch": 0.26, "grad_norm": 1.2930906564775058, "learning_rate": 8.698623211543478e-06, "loss": 0.2034, "step": 3606 }, { "epoch": 0.26, "grad_norm": 1.3808716774983836, "learning_rate": 8.697843610758413e-06, "loss": 0.2015, "step": 3607 }, { "epoch": 0.26, "grad_norm": 1.4714266271669127, "learning_rate": 8.697063811487793e-06, "loss": 0.2068, "step": 3608 }, { "epoch": 0.26, "grad_norm": 1.3882492151808599, "learning_rate": 8.696283813773475e-06, "loss": 0.1994, "step": 3609 }, { "epoch": 0.26, "grad_norm": 1.3311433421636816, "learning_rate": 8.695503617657328e-06, "loss": 0.2012, "step": 3610 }, { "epoch": 0.26, "grad_norm": 1.2732421567547827, "learning_rate": 8.694723223181226e-06, "loss": 0.1993, "step": 3611 }, { "epoch": 0.26, "grad_norm": 1.2649671765374781, "learning_rate": 8.69394263038706e-06, "loss": 0.1857, "step": 3612 }, { "epoch": 0.26, "grad_norm": 1.3614550487609511, "learning_rate": 8.693161839316731e-06, "loss": 0.2375, "step": 3613 }, { "epoch": 0.26, "grad_norm": 1.2190247167958133, "learning_rate": 8.692380850012145e-06, "loss": 0.1748, "step": 3614 }, { "epoch": 0.26, "grad_norm": 1.5641275002238877, "learning_rate": 8.691599662515224e-06, "loss": 0.2282, "step": 3615 }, { "epoch": 0.26, "grad_norm": 1.3916844818149299, "learning_rate": 8.6908182768679e-06, "loss": 0.2516, "step": 3616 }, { "epoch": 0.26, "grad_norm": 5.18483380444172, "learning_rate": 8.690036693112112e-06, "loss": 0.5917, "step": 3617 }, { "epoch": 0.26, "grad_norm": 5.609677420408956, "learning_rate": 8.689254911289816e-06, "loss": 0.5138, "step": 3618 }, { "epoch": 0.26, "grad_norm": 1.4479070490845398, "learning_rate": 8.688472931442972e-06, "loss": 0.2426, "step": 3619 }, { "epoch": 0.26, "grad_norm": 1.5123666783855032, "learning_rate": 8.687690753613554e-06, "loss": 0.2158, "step": 3620 }, { "epoch": 0.26, "grad_norm": 1.4300847273683959, "learning_rate": 8.686908377843547e-06, "loss": 0.2301, "step": 3621 }, { "epoch": 0.26, "grad_norm": 1.3080294668417833, "learning_rate": 8.686125804174947e-06, "loss": 0.1772, "step": 3622 }, { "epoch": 0.26, "grad_norm": 1.4053402814324312, "learning_rate": 8.685343032649758e-06, "loss": 0.2181, "step": 3623 }, { "epoch": 0.26, "grad_norm": 1.41175785513272, "learning_rate": 8.684560063309994e-06, "loss": 0.2017, "step": 3624 }, { "epoch": 0.26, "grad_norm": 1.3821512422026803, "learning_rate": 8.683776896197686e-06, "loss": 0.2071, "step": 3625 }, { "epoch": 0.26, "grad_norm": 1.3406631132475026, "learning_rate": 8.682993531354868e-06, "loss": 0.1721, "step": 3626 }, { "epoch": 0.26, "grad_norm": 1.4294988814561305, "learning_rate": 8.682209968823589e-06, "loss": 0.1923, "step": 3627 }, { "epoch": 0.26, "grad_norm": 1.4578902780291605, "learning_rate": 8.681426208645906e-06, "loss": 0.2414, "step": 3628 }, { "epoch": 0.26, "grad_norm": 1.246032363260343, "learning_rate": 8.680642250863892e-06, "loss": 0.1851, "step": 3629 }, { "epoch": 0.26, "grad_norm": 5.015773133168359, "learning_rate": 8.679858095519624e-06, "loss": 0.5859, "step": 3630 }, { "epoch": 0.26, "grad_norm": 1.4557244153862854, "learning_rate": 8.679073742655192e-06, "loss": 0.1992, "step": 3631 }, { "epoch": 0.26, "grad_norm": 1.180161468560182, "learning_rate": 8.6782891923127e-06, "loss": 0.1967, "step": 3632 }, { "epoch": 0.26, "grad_norm": 1.4885763535034882, "learning_rate": 8.677504444534253e-06, "loss": 0.2484, "step": 3633 }, { "epoch": 0.26, "grad_norm": 1.3773598582017972, "learning_rate": 8.67671949936198e-06, "loss": 0.235, "step": 3634 }, { "epoch": 0.26, "grad_norm": 1.3469819475814317, "learning_rate": 8.675934356838012e-06, "loss": 0.2165, "step": 3635 }, { "epoch": 0.26, "grad_norm": 1.5024048532435206, "learning_rate": 8.67514901700449e-06, "loss": 0.2716, "step": 3636 }, { "epoch": 0.26, "grad_norm": 1.3458938941602556, "learning_rate": 8.674363479903569e-06, "loss": 0.2058, "step": 3637 }, { "epoch": 0.26, "grad_norm": 1.4001320233741732, "learning_rate": 8.673577745577414e-06, "loss": 0.1935, "step": 3638 }, { "epoch": 0.26, "grad_norm": 1.4269731070102218, "learning_rate": 8.6727918140682e-06, "loss": 0.2306, "step": 3639 }, { "epoch": 0.26, "grad_norm": 1.3699289145976898, "learning_rate": 8.672005685418115e-06, "loss": 0.1926, "step": 3640 }, { "epoch": 0.26, "grad_norm": 1.4512164382661474, "learning_rate": 8.671219359669349e-06, "loss": 0.2386, "step": 3641 }, { "epoch": 0.26, "grad_norm": 1.2605249228203246, "learning_rate": 8.670432836864115e-06, "loss": 0.1791, "step": 3642 }, { "epoch": 0.26, "grad_norm": 1.5438693182045768, "learning_rate": 8.669646117044627e-06, "loss": 0.2365, "step": 3643 }, { "epoch": 0.26, "grad_norm": 10.170209911578782, "learning_rate": 8.668859200253116e-06, "loss": 0.6741, "step": 3644 }, { "epoch": 0.26, "grad_norm": 6.599932155038986, "learning_rate": 8.668072086531818e-06, "loss": 0.67, "step": 3645 }, { "epoch": 0.26, "grad_norm": 1.410254857501086, "learning_rate": 8.667284775922982e-06, "loss": 0.2435, "step": 3646 }, { "epoch": 0.26, "grad_norm": 1.3536109860503263, "learning_rate": 8.666497268468869e-06, "loss": 0.2142, "step": 3647 }, { "epoch": 0.26, "grad_norm": 1.2680493215714415, "learning_rate": 8.66570956421175e-06, "loss": 0.2125, "step": 3648 }, { "epoch": 0.26, "grad_norm": 1.5084335267763196, "learning_rate": 8.664921663193904e-06, "loss": 0.2126, "step": 3649 }, { "epoch": 0.26, "grad_norm": 1.528444591957655, "learning_rate": 8.664133565457623e-06, "loss": 0.2436, "step": 3650 }, { "epoch": 0.26, "grad_norm": 1.3133912476408447, "learning_rate": 8.663345271045207e-06, "loss": 0.1905, "step": 3651 }, { "epoch": 0.26, "grad_norm": 1.4326530666856625, "learning_rate": 8.662556779998974e-06, "loss": 0.1796, "step": 3652 }, { "epoch": 0.26, "grad_norm": 5.552917381106129, "learning_rate": 8.661768092361241e-06, "loss": 0.7356, "step": 3653 }, { "epoch": 0.26, "grad_norm": 1.6112582334380086, "learning_rate": 8.660979208174345e-06, "loss": 0.2218, "step": 3654 }, { "epoch": 0.26, "grad_norm": 1.5935572663030553, "learning_rate": 8.66019012748063e-06, "loss": 0.2762, "step": 3655 }, { "epoch": 0.26, "grad_norm": 1.2720226479046026, "learning_rate": 8.659400850322451e-06, "loss": 0.1911, "step": 3656 }, { "epoch": 0.26, "grad_norm": 1.1992858958944708, "learning_rate": 8.65861137674217e-06, "loss": 0.1841, "step": 3657 }, { "epoch": 0.26, "grad_norm": 1.3055061879240084, "learning_rate": 8.657821706782166e-06, "loss": 0.1751, "step": 3658 }, { "epoch": 0.26, "grad_norm": 1.4092479659674235, "learning_rate": 8.657031840484825e-06, "loss": 0.2339, "step": 3659 }, { "epoch": 0.26, "grad_norm": 4.620787342646206, "learning_rate": 8.656241777892544e-06, "loss": 0.5402, "step": 3660 }, { "epoch": 0.26, "grad_norm": 1.2306326391241493, "learning_rate": 8.655451519047729e-06, "loss": 0.2107, "step": 3661 }, { "epoch": 0.26, "grad_norm": 1.3136535876307098, "learning_rate": 8.654661063992799e-06, "loss": 0.2341, "step": 3662 }, { "epoch": 0.26, "grad_norm": 1.3190342215891908, "learning_rate": 8.653870412770182e-06, "loss": 0.2082, "step": 3663 }, { "epoch": 0.26, "grad_norm": 1.4399647918708154, "learning_rate": 8.653079565422318e-06, "loss": 0.2067, "step": 3664 }, { "epoch": 0.26, "grad_norm": 1.5822197544676122, "learning_rate": 8.652288521991656e-06, "loss": 0.2411, "step": 3665 }, { "epoch": 0.26, "grad_norm": 1.3809179381008752, "learning_rate": 8.651497282520654e-06, "loss": 0.2495, "step": 3666 }, { "epoch": 0.26, "grad_norm": 25.83720614132753, "learning_rate": 8.650705847051786e-06, "loss": 0.5793, "step": 3667 }, { "epoch": 0.26, "grad_norm": 1.6028630306315217, "learning_rate": 8.64991421562753e-06, "loss": 0.2486, "step": 3668 }, { "epoch": 0.26, "grad_norm": 1.174026891905283, "learning_rate": 8.649122388290383e-06, "loss": 0.1731, "step": 3669 }, { "epoch": 0.26, "grad_norm": 1.3008370950526544, "learning_rate": 8.64833036508284e-06, "loss": 0.1766, "step": 3670 }, { "epoch": 0.26, "grad_norm": 1.4591698283135168, "learning_rate": 8.647538146047418e-06, "loss": 0.2722, "step": 3671 }, { "epoch": 0.26, "grad_norm": 1.37903803953325, "learning_rate": 8.64674573122664e-06, "loss": 0.2429, "step": 3672 }, { "epoch": 0.26, "grad_norm": 1.4321179853388204, "learning_rate": 8.645953120663038e-06, "loss": 0.2259, "step": 3673 }, { "epoch": 0.26, "grad_norm": 1.3084897474827066, "learning_rate": 8.645160314399157e-06, "loss": 0.1617, "step": 3674 }, { "epoch": 0.26, "grad_norm": 1.2886844080760904, "learning_rate": 8.644367312477552e-06, "loss": 0.1928, "step": 3675 }, { "epoch": 0.26, "grad_norm": 1.4204960197469758, "learning_rate": 8.643574114940789e-06, "loss": 0.2532, "step": 3676 }, { "epoch": 0.26, "grad_norm": 1.4537882977779275, "learning_rate": 8.64278072183144e-06, "loss": 0.2447, "step": 3677 }, { "epoch": 0.26, "grad_norm": 1.2579570039529764, "learning_rate": 8.641987133192096e-06, "loss": 0.2121, "step": 3678 }, { "epoch": 0.26, "grad_norm": 1.2745975737245494, "learning_rate": 8.641193349065351e-06, "loss": 0.1722, "step": 3679 }, { "epoch": 0.26, "grad_norm": 1.2725434275095413, "learning_rate": 8.640399369493813e-06, "loss": 0.1685, "step": 3680 }, { "epoch": 0.26, "grad_norm": 1.3696478668492316, "learning_rate": 8.6396051945201e-06, "loss": 0.2366, "step": 3681 }, { "epoch": 0.26, "grad_norm": 1.4125743610898953, "learning_rate": 8.638810824186839e-06, "loss": 0.215, "step": 3682 }, { "epoch": 0.26, "grad_norm": 1.264651651921664, "learning_rate": 8.638016258536668e-06, "loss": 0.157, "step": 3683 }, { "epoch": 0.26, "grad_norm": 1.3164292254863632, "learning_rate": 8.637221497612238e-06, "loss": 0.231, "step": 3684 }, { "epoch": 0.26, "grad_norm": 1.4171474106663915, "learning_rate": 8.636426541456208e-06, "loss": 0.2242, "step": 3685 }, { "epoch": 0.26, "grad_norm": 1.25277934153627, "learning_rate": 8.635631390111248e-06, "loss": 0.2237, "step": 3686 }, { "epoch": 0.26, "grad_norm": 1.181973643974844, "learning_rate": 8.634836043620038e-06, "loss": 0.1661, "step": 3687 }, { "epoch": 0.26, "grad_norm": 1.6979088828056528, "learning_rate": 8.63404050202527e-06, "loss": 0.2863, "step": 3688 }, { "epoch": 0.26, "grad_norm": 1.39466882472605, "learning_rate": 8.633244765369648e-06, "loss": 0.2102, "step": 3689 }, { "epoch": 0.26, "grad_norm": 5.755946383717729, "learning_rate": 8.632448833695878e-06, "loss": 0.6949, "step": 3690 }, { "epoch": 0.26, "grad_norm": 8.641083154816055, "learning_rate": 8.631652707046686e-06, "loss": 0.5644, "step": 3691 }, { "epoch": 0.26, "grad_norm": 1.3395622409000634, "learning_rate": 8.630856385464805e-06, "loss": 0.2019, "step": 3692 }, { "epoch": 0.26, "grad_norm": 1.2762870830118762, "learning_rate": 8.630059868992978e-06, "loss": 0.2112, "step": 3693 }, { "epoch": 0.26, "grad_norm": 1.1789605845016022, "learning_rate": 8.629263157673958e-06, "loss": 0.2029, "step": 3694 }, { "epoch": 0.26, "grad_norm": 1.4282583141589658, "learning_rate": 8.628466251550509e-06, "loss": 0.2056, "step": 3695 }, { "epoch": 0.26, "grad_norm": 1.434177671039088, "learning_rate": 8.627669150665407e-06, "loss": 0.2407, "step": 3696 }, { "epoch": 0.26, "grad_norm": 1.4519080142263323, "learning_rate": 8.626871855061438e-06, "loss": 0.19, "step": 3697 }, { "epoch": 0.26, "grad_norm": 1.276463931167107, "learning_rate": 8.626074364781399e-06, "loss": 0.187, "step": 3698 }, { "epoch": 0.26, "grad_norm": 1.243157383330987, "learning_rate": 8.62527667986809e-06, "loss": 0.1821, "step": 3699 }, { "epoch": 0.26, "grad_norm": 4.447528236571992, "learning_rate": 8.624478800364332e-06, "loss": 0.6182, "step": 3700 }, { "epoch": 0.26, "grad_norm": 1.1717162837881208, "learning_rate": 8.623680726312953e-06, "loss": 0.1882, "step": 3701 }, { "epoch": 0.26, "grad_norm": 1.2427571157597763, "learning_rate": 8.622882457756787e-06, "loss": 0.199, "step": 3702 }, { "epoch": 0.26, "grad_norm": 1.3857724677518324, "learning_rate": 8.622083994738683e-06, "loss": 0.2161, "step": 3703 }, { "epoch": 0.26, "grad_norm": 1.3714289612765016, "learning_rate": 8.621285337301502e-06, "loss": 0.2465, "step": 3704 }, { "epoch": 0.27, "grad_norm": 1.448574625903545, "learning_rate": 8.62048648548811e-06, "loss": 0.2557, "step": 3705 }, { "epoch": 0.27, "grad_norm": 4.551615220918818, "learning_rate": 8.619687439341386e-06, "loss": 0.5106, "step": 3706 }, { "epoch": 0.27, "grad_norm": 1.1928727255980196, "learning_rate": 8.61888819890422e-06, "loss": 0.1928, "step": 3707 }, { "epoch": 0.27, "grad_norm": 4.138206198845635, "learning_rate": 8.618088764219514e-06, "loss": 0.6126, "step": 3708 }, { "epoch": 0.27, "grad_norm": 1.4238892807408703, "learning_rate": 8.617289135330177e-06, "loss": 0.2019, "step": 3709 }, { "epoch": 0.27, "grad_norm": 4.572065335113266, "learning_rate": 8.616489312279131e-06, "loss": 0.6101, "step": 3710 }, { "epoch": 0.27, "grad_norm": 1.386879721499365, "learning_rate": 8.615689295109304e-06, "loss": 0.2759, "step": 3711 }, { "epoch": 0.27, "grad_norm": 1.35698005446649, "learning_rate": 8.614889083863642e-06, "loss": 0.1763, "step": 3712 }, { "epoch": 0.27, "grad_norm": 1.6688673039010682, "learning_rate": 8.614088678585092e-06, "loss": 0.2658, "step": 3713 }, { "epoch": 0.27, "grad_norm": 1.3855174880737342, "learning_rate": 8.613288079316624e-06, "loss": 0.1845, "step": 3714 }, { "epoch": 0.27, "grad_norm": 1.3408652673693586, "learning_rate": 8.612487286101204e-06, "loss": 0.2083, "step": 3715 }, { "epoch": 0.27, "grad_norm": 1.3903959511207398, "learning_rate": 8.61168629898182e-06, "loss": 0.2563, "step": 3716 }, { "epoch": 0.27, "grad_norm": 1.4767600595492247, "learning_rate": 8.610885118001462e-06, "loss": 0.2308, "step": 3717 }, { "epoch": 0.27, "grad_norm": 1.4589461411253029, "learning_rate": 8.610083743203136e-06, "loss": 0.2081, "step": 3718 }, { "epoch": 0.27, "grad_norm": 1.373707232171107, "learning_rate": 8.609282174629859e-06, "loss": 0.2257, "step": 3719 }, { "epoch": 0.27, "grad_norm": 1.3736717254822854, "learning_rate": 8.608480412324652e-06, "loss": 0.2303, "step": 3720 }, { "epoch": 0.27, "grad_norm": 1.405861347386478, "learning_rate": 8.607678456330552e-06, "loss": 0.2539, "step": 3721 }, { "epoch": 0.27, "grad_norm": 1.4048214408132158, "learning_rate": 8.606876306690606e-06, "loss": 0.2004, "step": 3722 }, { "epoch": 0.27, "grad_norm": 1.402885719250304, "learning_rate": 8.606073963447868e-06, "loss": 0.2296, "step": 3723 }, { "epoch": 0.27, "grad_norm": 3.8891248047445655, "learning_rate": 8.605271426645408e-06, "loss": 0.2609, "step": 3724 }, { "epoch": 0.27, "grad_norm": 1.5029992380872932, "learning_rate": 8.604468696326299e-06, "loss": 0.291, "step": 3725 }, { "epoch": 0.27, "grad_norm": 1.2944434907898885, "learning_rate": 8.603665772533631e-06, "loss": 0.2098, "step": 3726 }, { "epoch": 0.27, "grad_norm": 1.7292187874233913, "learning_rate": 8.6028626553105e-06, "loss": 0.2723, "step": 3727 }, { "epoch": 0.27, "grad_norm": 1.3332799470322534, "learning_rate": 8.602059344700015e-06, "loss": 0.2403, "step": 3728 }, { "epoch": 0.27, "grad_norm": 1.2545676697316803, "learning_rate": 8.601255840745297e-06, "loss": 0.1767, "step": 3729 }, { "epoch": 0.27, "grad_norm": 4.362349085751693, "learning_rate": 8.60045214348947e-06, "loss": 0.5523, "step": 3730 }, { "epoch": 0.27, "grad_norm": 1.2701766759145263, "learning_rate": 8.599648252975676e-06, "loss": 0.1965, "step": 3731 }, { "epoch": 0.27, "grad_norm": 1.3268017744971359, "learning_rate": 8.598844169247064e-06, "loss": 0.216, "step": 3732 }, { "epoch": 0.27, "grad_norm": 1.0730807262687505, "learning_rate": 8.598039892346797e-06, "loss": 0.2062, "step": 3733 }, { "epoch": 0.27, "grad_norm": 1.319315926696157, "learning_rate": 8.597235422318041e-06, "loss": 0.2005, "step": 3734 }, { "epoch": 0.27, "grad_norm": 1.3362468249150978, "learning_rate": 8.596430759203979e-06, "loss": 0.1597, "step": 3735 }, { "epoch": 0.27, "grad_norm": 1.3671083974152785, "learning_rate": 8.595625903047801e-06, "loss": 0.2248, "step": 3736 }, { "epoch": 0.27, "grad_norm": 1.311381333957636, "learning_rate": 8.59482085389271e-06, "loss": 0.2341, "step": 3737 }, { "epoch": 0.27, "grad_norm": 1.4146299287788768, "learning_rate": 8.594015611781917e-06, "loss": 0.2287, "step": 3738 }, { "epoch": 0.27, "grad_norm": 4.608191484225012, "learning_rate": 8.593210176758644e-06, "loss": 0.5675, "step": 3739 }, { "epoch": 0.27, "grad_norm": 4.231463584323397, "learning_rate": 8.592404548866123e-06, "loss": 0.8225, "step": 3740 }, { "epoch": 0.27, "grad_norm": 1.3022457972067576, "learning_rate": 8.5915987281476e-06, "loss": 0.183, "step": 3741 }, { "epoch": 0.27, "grad_norm": 5.3679706087039785, "learning_rate": 8.590792714646323e-06, "loss": 0.5342, "step": 3742 }, { "epoch": 0.27, "grad_norm": 1.508856184978745, "learning_rate": 8.58998650840556e-06, "loss": 0.1893, "step": 3743 }, { "epoch": 0.27, "grad_norm": 1.319264800140163, "learning_rate": 8.589180109468584e-06, "loss": 0.1988, "step": 3744 }, { "epoch": 0.27, "grad_norm": 1.3207946987624866, "learning_rate": 8.588373517878678e-06, "loss": 0.1889, "step": 3745 }, { "epoch": 0.27, "grad_norm": 1.4605806029169857, "learning_rate": 8.587566733679137e-06, "loss": 0.1976, "step": 3746 }, { "epoch": 0.27, "grad_norm": 1.3846173005812834, "learning_rate": 8.586759756913268e-06, "loss": 0.2017, "step": 3747 }, { "epoch": 0.27, "grad_norm": 1.351304120329717, "learning_rate": 8.585952587624382e-06, "loss": 0.2075, "step": 3748 }, { "epoch": 0.27, "grad_norm": 1.4661103954883927, "learning_rate": 8.58514522585581e-06, "loss": 0.2267, "step": 3749 }, { "epoch": 0.27, "grad_norm": 1.3720702358694754, "learning_rate": 8.584337671650884e-06, "loss": 0.258, "step": 3750 }, { "epoch": 0.27, "grad_norm": 1.3758286068019823, "learning_rate": 8.583529925052952e-06, "loss": 0.2424, "step": 3751 }, { "epoch": 0.27, "grad_norm": 1.5776159633652178, "learning_rate": 8.58272198610537e-06, "loss": 0.2213, "step": 3752 }, { "epoch": 0.27, "grad_norm": 1.3607929834835275, "learning_rate": 8.581913854851506e-06, "loss": 0.2355, "step": 3753 }, { "epoch": 0.27, "grad_norm": 1.4698516044360939, "learning_rate": 8.581105531334735e-06, "loss": 0.2059, "step": 3754 }, { "epoch": 0.27, "grad_norm": 1.4508342304163457, "learning_rate": 8.580297015598447e-06, "loss": 0.2491, "step": 3755 }, { "epoch": 0.27, "grad_norm": 1.4339251429230815, "learning_rate": 8.57948830768604e-06, "loss": 0.2089, "step": 3756 }, { "epoch": 0.27, "grad_norm": 1.415239555826219, "learning_rate": 8.578679407640919e-06, "loss": 0.1714, "step": 3757 }, { "epoch": 0.27, "grad_norm": 1.2813124770634041, "learning_rate": 8.577870315506506e-06, "loss": 0.1666, "step": 3758 }, { "epoch": 0.27, "grad_norm": 1.4988480636735053, "learning_rate": 8.577061031326227e-06, "loss": 0.2331, "step": 3759 }, { "epoch": 0.27, "grad_norm": 4.773984018679814, "learning_rate": 8.576251555143524e-06, "loss": 0.5543, "step": 3760 }, { "epoch": 0.27, "grad_norm": 1.4032698870309013, "learning_rate": 8.575441887001847e-06, "loss": 0.2414, "step": 3761 }, { "epoch": 0.27, "grad_norm": 1.5419698548067005, "learning_rate": 8.574632026944652e-06, "loss": 0.2585, "step": 3762 }, { "epoch": 0.27, "grad_norm": 1.319388853697744, "learning_rate": 8.57382197501541e-06, "loss": 0.2225, "step": 3763 }, { "epoch": 0.27, "grad_norm": 1.344821102616439, "learning_rate": 8.573011731257605e-06, "loss": 0.1944, "step": 3764 }, { "epoch": 0.27, "grad_norm": 1.3392339595516822, "learning_rate": 8.572201295714723e-06, "loss": 0.1805, "step": 3765 }, { "epoch": 0.27, "grad_norm": 1.5048278098849377, "learning_rate": 8.571390668430268e-06, "loss": 0.2611, "step": 3766 }, { "epoch": 0.27, "grad_norm": 1.255493267859696, "learning_rate": 8.570579849447752e-06, "loss": 0.1779, "step": 3767 }, { "epoch": 0.27, "grad_norm": 1.5282593137374865, "learning_rate": 8.569768838810691e-06, "loss": 0.183, "step": 3768 }, { "epoch": 0.27, "grad_norm": 1.622052437571073, "learning_rate": 8.568957636562622e-06, "loss": 0.2793, "step": 3769 }, { "epoch": 0.27, "grad_norm": 1.3267230791955964, "learning_rate": 8.568146242747089e-06, "loss": 0.2423, "step": 3770 }, { "epoch": 0.27, "grad_norm": 1.2599102267646691, "learning_rate": 8.567334657407637e-06, "loss": 0.1887, "step": 3771 }, { "epoch": 0.27, "grad_norm": 1.6804956079989428, "learning_rate": 8.566522880587836e-06, "loss": 0.2724, "step": 3772 }, { "epoch": 0.27, "grad_norm": 21.36524085662909, "learning_rate": 8.565710912331253e-06, "loss": 0.5501, "step": 3773 }, { "epoch": 0.27, "grad_norm": 1.2720655163227121, "learning_rate": 8.564898752681476e-06, "loss": 0.2139, "step": 3774 }, { "epoch": 0.27, "grad_norm": 1.3441824566373441, "learning_rate": 8.564086401682095e-06, "loss": 0.1782, "step": 3775 }, { "epoch": 0.27, "grad_norm": 1.4209196093178265, "learning_rate": 8.563273859376717e-06, "loss": 0.2276, "step": 3776 }, { "epoch": 0.27, "grad_norm": 1.3154652703848242, "learning_rate": 8.562461125808955e-06, "loss": 0.1879, "step": 3777 }, { "epoch": 0.27, "grad_norm": 1.3878768452410848, "learning_rate": 8.56164820102243e-06, "loss": 0.2146, "step": 3778 }, { "epoch": 0.27, "grad_norm": 4.8679886368950696, "learning_rate": 8.56083508506078e-06, "loss": 0.4543, "step": 3779 }, { "epoch": 0.27, "grad_norm": 1.5101079599711065, "learning_rate": 8.56002177796765e-06, "loss": 0.253, "step": 3780 }, { "epoch": 0.27, "grad_norm": 1.352043590167883, "learning_rate": 8.559208279786693e-06, "loss": 0.1802, "step": 3781 }, { "epoch": 0.27, "grad_norm": 1.5023990757872405, "learning_rate": 8.558394590561575e-06, "loss": 0.2388, "step": 3782 }, { "epoch": 0.27, "grad_norm": 1.4019970606974428, "learning_rate": 8.557580710335976e-06, "loss": 0.2222, "step": 3783 }, { "epoch": 0.27, "grad_norm": 1.4235292783458622, "learning_rate": 8.556766639153575e-06, "loss": 0.2383, "step": 3784 }, { "epoch": 0.27, "grad_norm": 7.622177677072024, "learning_rate": 8.555952377058071e-06, "loss": 0.6352, "step": 3785 }, { "epoch": 0.27, "grad_norm": 1.255773833922842, "learning_rate": 8.55513792409317e-06, "loss": 0.1712, "step": 3786 }, { "epoch": 0.27, "grad_norm": 1.4176935537783162, "learning_rate": 8.55432328030259e-06, "loss": 0.2251, "step": 3787 }, { "epoch": 0.27, "grad_norm": 1.2872261987272458, "learning_rate": 8.553508445730059e-06, "loss": 0.2025, "step": 3788 }, { "epoch": 0.27, "grad_norm": 1.2901634942000337, "learning_rate": 8.552693420419309e-06, "loss": 0.2289, "step": 3789 }, { "epoch": 0.27, "grad_norm": 1.46746626514602, "learning_rate": 8.551878204414092e-06, "loss": 0.181, "step": 3790 }, { "epoch": 0.27, "grad_norm": 1.296549302443851, "learning_rate": 8.551062797758162e-06, "loss": 0.228, "step": 3791 }, { "epoch": 0.27, "grad_norm": 1.214529774209249, "learning_rate": 8.550247200495289e-06, "loss": 0.1886, "step": 3792 }, { "epoch": 0.27, "grad_norm": 1.3398654654605422, "learning_rate": 8.54943141266925e-06, "loss": 0.2401, "step": 3793 }, { "epoch": 0.27, "grad_norm": 1.4301536041908605, "learning_rate": 8.548615434323837e-06, "loss": 0.2369, "step": 3794 }, { "epoch": 0.27, "grad_norm": 1.4160362103673751, "learning_rate": 8.547799265502844e-06, "loss": 0.1808, "step": 3795 }, { "epoch": 0.27, "grad_norm": 1.6080012519209934, "learning_rate": 8.546982906250082e-06, "loss": 0.2171, "step": 3796 }, { "epoch": 0.27, "grad_norm": 1.5789634131715335, "learning_rate": 8.546166356609366e-06, "loss": 0.1684, "step": 3797 }, { "epoch": 0.27, "grad_norm": 1.3864888188164757, "learning_rate": 8.545349616624529e-06, "loss": 0.2195, "step": 3798 }, { "epoch": 0.27, "grad_norm": 1.2981800397987413, "learning_rate": 8.544532686339411e-06, "loss": 0.1681, "step": 3799 }, { "epoch": 0.27, "grad_norm": 1.3493144257686176, "learning_rate": 8.543715565797861e-06, "loss": 0.2276, "step": 3800 }, { "epoch": 0.27, "grad_norm": 1.175495981955503, "learning_rate": 8.542898255043735e-06, "loss": 0.1414, "step": 3801 }, { "epoch": 0.27, "grad_norm": 1.284617242951081, "learning_rate": 8.542080754120907e-06, "loss": 0.2513, "step": 3802 }, { "epoch": 0.27, "grad_norm": 1.4711552359422857, "learning_rate": 8.541263063073256e-06, "loss": 0.2724, "step": 3803 }, { "epoch": 0.27, "grad_norm": 5.712727661415676, "learning_rate": 8.540445181944673e-06, "loss": 0.6565, "step": 3804 }, { "epoch": 0.27, "grad_norm": 1.2021653482011387, "learning_rate": 8.53962711077906e-06, "loss": 0.2064, "step": 3805 }, { "epoch": 0.27, "grad_norm": 1.742796825476432, "learning_rate": 8.538808849620322e-06, "loss": 0.2273, "step": 3806 }, { "epoch": 0.27, "grad_norm": 1.2357270266781555, "learning_rate": 8.537990398512386e-06, "loss": 0.1718, "step": 3807 }, { "epoch": 0.27, "grad_norm": 1.3326391721760527, "learning_rate": 8.53717175749918e-06, "loss": 0.2318, "step": 3808 }, { "epoch": 0.27, "grad_norm": 1.421780249659735, "learning_rate": 8.536352926624648e-06, "loss": 0.2467, "step": 3809 }, { "epoch": 0.27, "grad_norm": 1.4640046874206998, "learning_rate": 8.535533905932739e-06, "loss": 0.2336, "step": 3810 }, { "epoch": 0.27, "grad_norm": 1.4162658461869917, "learning_rate": 8.534714695467416e-06, "loss": 0.1892, "step": 3811 }, { "epoch": 0.27, "grad_norm": 1.2946165276881276, "learning_rate": 8.53389529527265e-06, "loss": 0.1864, "step": 3812 }, { "epoch": 0.27, "grad_norm": 1.3520973820619984, "learning_rate": 8.533075705392426e-06, "loss": 0.2101, "step": 3813 }, { "epoch": 0.27, "grad_norm": 1.404255472017041, "learning_rate": 8.532255925870733e-06, "loss": 0.2708, "step": 3814 }, { "epoch": 0.27, "grad_norm": 1.5344333505848957, "learning_rate": 8.531435956751575e-06, "loss": 0.2146, "step": 3815 }, { "epoch": 0.27, "grad_norm": 1.5185512805731054, "learning_rate": 8.530615798078965e-06, "loss": 0.2285, "step": 3816 }, { "epoch": 0.27, "grad_norm": 1.2809346535080042, "learning_rate": 8.529795449896926e-06, "loss": 0.1944, "step": 3817 }, { "epoch": 0.27, "grad_norm": 1.2635046554670997, "learning_rate": 8.528974912249486e-06, "loss": 0.2213, "step": 3818 }, { "epoch": 0.27, "grad_norm": 1.5520902591388555, "learning_rate": 8.528154185180698e-06, "loss": 0.2765, "step": 3819 }, { "epoch": 0.27, "grad_norm": 1.4399614897339739, "learning_rate": 8.527333268734607e-06, "loss": 0.1828, "step": 3820 }, { "epoch": 0.27, "grad_norm": 5.282272758232697, "learning_rate": 8.52651216295528e-06, "loss": 0.5017, "step": 3821 }, { "epoch": 0.27, "grad_norm": 1.5083858400591068, "learning_rate": 8.525690867886791e-06, "loss": 0.1753, "step": 3822 }, { "epoch": 0.27, "grad_norm": 1.4134577679998788, "learning_rate": 8.524869383573223e-06, "loss": 0.1769, "step": 3823 }, { "epoch": 0.27, "grad_norm": 1.3260585276355787, "learning_rate": 8.524047710058669e-06, "loss": 0.1849, "step": 3824 }, { "epoch": 0.27, "grad_norm": 1.3412548012624133, "learning_rate": 8.523225847387234e-06, "loss": 0.1809, "step": 3825 }, { "epoch": 0.27, "grad_norm": 1.464972920229026, "learning_rate": 8.522403795603034e-06, "loss": 0.2464, "step": 3826 }, { "epoch": 0.27, "grad_norm": 1.4615100189581633, "learning_rate": 8.52158155475019e-06, "loss": 0.2194, "step": 3827 }, { "epoch": 0.27, "grad_norm": 1.4330635049973262, "learning_rate": 8.52075912487284e-06, "loss": 0.2399, "step": 3828 }, { "epoch": 0.27, "grad_norm": 1.4276667147227575, "learning_rate": 8.519936506015128e-06, "loss": 0.2479, "step": 3829 }, { "epoch": 0.27, "grad_norm": 1.5392773679120029, "learning_rate": 8.519113698221207e-06, "loss": 0.2757, "step": 3830 }, { "epoch": 0.27, "grad_norm": 5.888178509053642, "learning_rate": 8.518290701535244e-06, "loss": 0.53, "step": 3831 }, { "epoch": 0.27, "grad_norm": 1.2449269402585341, "learning_rate": 8.517467516001413e-06, "loss": 0.1912, "step": 3832 }, { "epoch": 0.27, "grad_norm": 1.464634182315767, "learning_rate": 8.516644141663902e-06, "loss": 0.233, "step": 3833 }, { "epoch": 0.27, "grad_norm": 1.420062555196312, "learning_rate": 8.515820578566902e-06, "loss": 0.1994, "step": 3834 }, { "epoch": 0.27, "grad_norm": 1.3744203931568093, "learning_rate": 8.514996826754622e-06, "loss": 0.2382, "step": 3835 }, { "epoch": 0.27, "grad_norm": 1.2427035375392246, "learning_rate": 8.514172886271277e-06, "loss": 0.2193, "step": 3836 }, { "epoch": 0.27, "grad_norm": 1.1799765071536565, "learning_rate": 8.513348757161091e-06, "loss": 0.175, "step": 3837 }, { "epoch": 0.27, "grad_norm": 1.4261323105009915, "learning_rate": 8.512524439468304e-06, "loss": 0.239, "step": 3838 }, { "epoch": 0.27, "grad_norm": 1.5825680546863101, "learning_rate": 8.511699933237158e-06, "loss": 0.2139, "step": 3839 }, { "epoch": 0.27, "grad_norm": 1.3602769139965654, "learning_rate": 8.510875238511911e-06, "loss": 0.2345, "step": 3840 }, { "epoch": 0.27, "grad_norm": 5.7587656928016475, "learning_rate": 8.510050355336828e-06, "loss": 0.5329, "step": 3841 }, { "epoch": 0.27, "grad_norm": 1.2989006186480576, "learning_rate": 8.509225283756188e-06, "loss": 0.2343, "step": 3842 }, { "epoch": 0.27, "grad_norm": 1.399484368227784, "learning_rate": 8.508400023814277e-06, "loss": 0.1919, "step": 3843 }, { "epoch": 0.27, "grad_norm": 1.5173475415398716, "learning_rate": 8.50757457555539e-06, "loss": 0.2408, "step": 3844 }, { "epoch": 0.28, "grad_norm": 1.3936609217596883, "learning_rate": 8.506748939023835e-06, "loss": 0.2065, "step": 3845 }, { "epoch": 0.28, "grad_norm": 1.3999993171933802, "learning_rate": 8.505923114263927e-06, "loss": 0.2354, "step": 3846 }, { "epoch": 0.28, "grad_norm": 1.2728624786418958, "learning_rate": 8.505097101319995e-06, "loss": 0.2071, "step": 3847 }, { "epoch": 0.28, "grad_norm": 1.4683537834395277, "learning_rate": 8.504270900236376e-06, "loss": 0.245, "step": 3848 }, { "epoch": 0.28, "grad_norm": 1.464388523583066, "learning_rate": 8.503444511057418e-06, "loss": 0.2549, "step": 3849 }, { "epoch": 0.28, "grad_norm": 1.2209656308717327, "learning_rate": 8.502617933827475e-06, "loss": 0.21, "step": 3850 }, { "epoch": 0.28, "grad_norm": 1.2676298409385023, "learning_rate": 8.501791168590918e-06, "loss": 0.282, "step": 3851 }, { "epoch": 0.28, "grad_norm": 1.3578261812017063, "learning_rate": 8.500964215392122e-06, "loss": 0.2441, "step": 3852 }, { "epoch": 0.28, "grad_norm": 1.3049482188584087, "learning_rate": 8.500137074275477e-06, "loss": 0.1885, "step": 3853 }, { "epoch": 0.28, "grad_norm": 1.2871134037657361, "learning_rate": 8.499309745285377e-06, "loss": 0.1702, "step": 3854 }, { "epoch": 0.28, "grad_norm": 1.2872539540030306, "learning_rate": 8.498482228466234e-06, "loss": 0.2068, "step": 3855 }, { "epoch": 0.28, "grad_norm": 5.429545612292519, "learning_rate": 8.497654523862462e-06, "loss": 0.6066, "step": 3856 }, { "epoch": 0.28, "grad_norm": 1.3587465498432072, "learning_rate": 8.496826631518493e-06, "loss": 0.221, "step": 3857 }, { "epoch": 0.28, "grad_norm": 1.29942182571122, "learning_rate": 8.495998551478758e-06, "loss": 0.2487, "step": 3858 }, { "epoch": 0.28, "grad_norm": 1.2344268686084001, "learning_rate": 8.495170283787714e-06, "loss": 0.2086, "step": 3859 }, { "epoch": 0.28, "grad_norm": 9.696172534643765, "learning_rate": 8.494341828489812e-06, "loss": 0.7293, "step": 3860 }, { "epoch": 0.28, "grad_norm": 1.4480304811526563, "learning_rate": 8.493513185629525e-06, "loss": 0.2508, "step": 3861 }, { "epoch": 0.28, "grad_norm": 6.061888987038116, "learning_rate": 8.492684355251328e-06, "loss": 0.722, "step": 3862 }, { "epoch": 0.28, "grad_norm": 1.3827564700987485, "learning_rate": 8.49185533739971e-06, "loss": 0.2013, "step": 3863 }, { "epoch": 0.28, "grad_norm": 1.4893528144147292, "learning_rate": 8.491026132119172e-06, "loss": 0.2027, "step": 3864 }, { "epoch": 0.28, "grad_norm": 1.4478083118091474, "learning_rate": 8.49019673945422e-06, "loss": 0.2181, "step": 3865 }, { "epoch": 0.28, "grad_norm": 1.4115218975626034, "learning_rate": 8.489367159449372e-06, "loss": 0.1628, "step": 3866 }, { "epoch": 0.28, "grad_norm": 1.4403719132612645, "learning_rate": 8.488537392149156e-06, "loss": 0.2033, "step": 3867 }, { "epoch": 0.28, "grad_norm": 1.2623247936867024, "learning_rate": 8.487707437598114e-06, "loss": 0.1924, "step": 3868 }, { "epoch": 0.28, "grad_norm": 1.3373492947847219, "learning_rate": 8.486877295840792e-06, "loss": 0.2243, "step": 3869 }, { "epoch": 0.28, "grad_norm": 1.3100192214626967, "learning_rate": 8.48604696692175e-06, "loss": 0.2112, "step": 3870 }, { "epoch": 0.28, "grad_norm": 5.014827214460886, "learning_rate": 8.485216450885555e-06, "loss": 0.641, "step": 3871 }, { "epoch": 0.28, "grad_norm": 1.3585463397851476, "learning_rate": 8.484385747776788e-06, "loss": 0.2012, "step": 3872 }, { "epoch": 0.28, "grad_norm": 4.0348356668256145, "learning_rate": 8.483554857640036e-06, "loss": 0.6327, "step": 3873 }, { "epoch": 0.28, "grad_norm": 10.45168813633487, "learning_rate": 8.482723780519899e-06, "loss": 0.7452, "step": 3874 }, { "epoch": 0.28, "grad_norm": 1.4578224311653685, "learning_rate": 8.481892516460986e-06, "loss": 0.2459, "step": 3875 }, { "epoch": 0.28, "grad_norm": 1.472959944912008, "learning_rate": 8.481061065507915e-06, "loss": 0.2345, "step": 3876 }, { "epoch": 0.28, "grad_norm": 1.3325024143997988, "learning_rate": 8.480229427705316e-06, "loss": 0.2297, "step": 3877 }, { "epoch": 0.28, "grad_norm": 1.331005411627358, "learning_rate": 8.479397603097827e-06, "loss": 0.2099, "step": 3878 }, { "epoch": 0.28, "grad_norm": 1.5341739560655858, "learning_rate": 8.478565591730096e-06, "loss": 0.237, "step": 3879 }, { "epoch": 0.28, "grad_norm": 4.729469243657141, "learning_rate": 8.477733393646787e-06, "loss": 0.6163, "step": 3880 }, { "epoch": 0.28, "grad_norm": 1.3142664877759358, "learning_rate": 8.476901008892565e-06, "loss": 0.2142, "step": 3881 }, { "epoch": 0.28, "grad_norm": 1.2290010762016657, "learning_rate": 8.476068437512108e-06, "loss": 0.2068, "step": 3882 }, { "epoch": 0.28, "grad_norm": 1.283988312428219, "learning_rate": 8.475235679550107e-06, "loss": 0.201, "step": 3883 }, { "epoch": 0.28, "grad_norm": 3.690341955628392, "learning_rate": 8.474402735051263e-06, "loss": 0.5353, "step": 3884 }, { "epoch": 0.28, "grad_norm": 1.2425303793971396, "learning_rate": 8.473569604060282e-06, "loss": 0.1889, "step": 3885 }, { "epoch": 0.28, "grad_norm": 6.772102796295855, "learning_rate": 8.472736286621885e-06, "loss": 0.6061, "step": 3886 }, { "epoch": 0.28, "grad_norm": 1.3859004863526245, "learning_rate": 8.4719027827808e-06, "loss": 0.1711, "step": 3887 }, { "epoch": 0.28, "grad_norm": 1.3036121916582706, "learning_rate": 8.471069092581768e-06, "loss": 0.1641, "step": 3888 }, { "epoch": 0.28, "grad_norm": 1.4343413663823026, "learning_rate": 8.470235216069536e-06, "loss": 0.2033, "step": 3889 }, { "epoch": 0.28, "grad_norm": 1.400341220731093, "learning_rate": 8.469401153288864e-06, "loss": 0.1985, "step": 3890 }, { "epoch": 0.28, "grad_norm": 1.4007034078501162, "learning_rate": 8.468566904284522e-06, "loss": 0.1698, "step": 3891 }, { "epoch": 0.28, "grad_norm": 5.841480205283238, "learning_rate": 8.467732469101286e-06, "loss": 0.637, "step": 3892 }, { "epoch": 0.28, "grad_norm": 1.3687822350774363, "learning_rate": 8.46689784778395e-06, "loss": 0.1963, "step": 3893 }, { "epoch": 0.28, "grad_norm": 1.2347683492206658, "learning_rate": 8.46606304037731e-06, "loss": 0.1626, "step": 3894 }, { "epoch": 0.28, "grad_norm": 1.3249510926380665, "learning_rate": 8.465228046926176e-06, "loss": 0.1445, "step": 3895 }, { "epoch": 0.28, "grad_norm": 1.5296602802178285, "learning_rate": 8.464392867475367e-06, "loss": 0.2097, "step": 3896 }, { "epoch": 0.28, "grad_norm": 1.2198063049719832, "learning_rate": 8.463557502069713e-06, "loss": 0.1823, "step": 3897 }, { "epoch": 0.28, "grad_norm": 1.5790548855219289, "learning_rate": 8.462721950754051e-06, "loss": 0.237, "step": 3898 }, { "epoch": 0.28, "grad_norm": 1.429830998163152, "learning_rate": 8.461886213573233e-06, "loss": 0.2051, "step": 3899 }, { "epoch": 0.28, "grad_norm": 1.3713118195382008, "learning_rate": 8.461050290572114e-06, "loss": 0.1962, "step": 3900 }, { "epoch": 0.28, "grad_norm": 1.4333873339597056, "learning_rate": 8.460214181795567e-06, "loss": 0.2371, "step": 3901 }, { "epoch": 0.28, "grad_norm": 1.445855041528906, "learning_rate": 8.45937788728847e-06, "loss": 0.202, "step": 3902 }, { "epoch": 0.28, "grad_norm": 1.373078371649321, "learning_rate": 8.458541407095711e-06, "loss": 0.215, "step": 3903 }, { "epoch": 0.28, "grad_norm": 1.448175548528807, "learning_rate": 8.45770474126219e-06, "loss": 0.2797, "step": 3904 }, { "epoch": 0.28, "grad_norm": 1.1102170609217346, "learning_rate": 8.456867889832814e-06, "loss": 0.1593, "step": 3905 }, { "epoch": 0.28, "grad_norm": 6.259627632966343, "learning_rate": 8.456030852852503e-06, "loss": 0.6566, "step": 3906 }, { "epoch": 0.28, "grad_norm": 1.3658408605205474, "learning_rate": 8.455193630366186e-06, "loss": 0.2265, "step": 3907 }, { "epoch": 0.28, "grad_norm": 1.2570739895655239, "learning_rate": 8.454356222418802e-06, "loss": 0.1879, "step": 3908 }, { "epoch": 0.28, "grad_norm": 1.1864070260657922, "learning_rate": 8.453518629055299e-06, "loss": 0.1758, "step": 3909 }, { "epoch": 0.28, "grad_norm": 1.3751739745230591, "learning_rate": 8.452680850320636e-06, "loss": 0.1963, "step": 3910 }, { "epoch": 0.28, "grad_norm": 1.3987720685250922, "learning_rate": 8.451842886259783e-06, "loss": 0.2156, "step": 3911 }, { "epoch": 0.28, "grad_norm": 1.5015606420035643, "learning_rate": 8.451004736917716e-06, "loss": 0.2151, "step": 3912 }, { "epoch": 0.28, "grad_norm": 1.3559298919557874, "learning_rate": 8.450166402339426e-06, "loss": 0.1961, "step": 3913 }, { "epoch": 0.28, "grad_norm": 1.4349465025694126, "learning_rate": 8.449327882569909e-06, "loss": 0.2584, "step": 3914 }, { "epoch": 0.28, "grad_norm": 1.6226511532980332, "learning_rate": 8.448489177654175e-06, "loss": 0.2284, "step": 3915 }, { "epoch": 0.28, "grad_norm": 1.326578529274266, "learning_rate": 8.447650287637244e-06, "loss": 0.1839, "step": 3916 }, { "epoch": 0.28, "grad_norm": 1.2379393649266521, "learning_rate": 8.446811212564138e-06, "loss": 0.1666, "step": 3917 }, { "epoch": 0.28, "grad_norm": 1.2820510903813804, "learning_rate": 8.445971952479903e-06, "loss": 0.2055, "step": 3918 }, { "epoch": 0.28, "grad_norm": 1.2197137820686106, "learning_rate": 8.445132507429584e-06, "loss": 0.1793, "step": 3919 }, { "epoch": 0.28, "grad_norm": 1.3335263545967568, "learning_rate": 8.444292877458238e-06, "loss": 0.2609, "step": 3920 }, { "epoch": 0.28, "grad_norm": 1.3724820740311976, "learning_rate": 8.443453062610933e-06, "loss": 0.2073, "step": 3921 }, { "epoch": 0.28, "grad_norm": 1.507924337360983, "learning_rate": 8.442613062932747e-06, "loss": 0.2397, "step": 3922 }, { "epoch": 0.28, "grad_norm": 1.3178126037265236, "learning_rate": 8.44177287846877e-06, "loss": 0.2052, "step": 3923 }, { "epoch": 0.28, "grad_norm": 5.128395941676882, "learning_rate": 8.440932509264099e-06, "loss": 0.5071, "step": 3924 }, { "epoch": 0.28, "grad_norm": 1.3306959169845087, "learning_rate": 8.440091955363841e-06, "loss": 0.1978, "step": 3925 }, { "epoch": 0.28, "grad_norm": 1.5267136285115483, "learning_rate": 8.439251216813111e-06, "loss": 0.2397, "step": 3926 }, { "epoch": 0.28, "grad_norm": 1.4499652822798768, "learning_rate": 8.438410293657042e-06, "loss": 0.2243, "step": 3927 }, { "epoch": 0.28, "grad_norm": 1.3330598220414203, "learning_rate": 8.437569185940766e-06, "loss": 0.2292, "step": 3928 }, { "epoch": 0.28, "grad_norm": 1.1362551629083406, "learning_rate": 8.436727893709435e-06, "loss": 0.182, "step": 3929 }, { "epoch": 0.28, "grad_norm": 1.403654715046327, "learning_rate": 8.435886417008203e-06, "loss": 0.2238, "step": 3930 }, { "epoch": 0.28, "grad_norm": 1.3015050504774932, "learning_rate": 8.43504475588224e-06, "loss": 0.209, "step": 3931 }, { "epoch": 0.28, "grad_norm": 1.3913390229002132, "learning_rate": 8.434202910376719e-06, "loss": 0.1967, "step": 3932 }, { "epoch": 0.28, "grad_norm": 1.3760598461389233, "learning_rate": 8.433360880536829e-06, "loss": 0.2246, "step": 3933 }, { "epoch": 0.28, "grad_norm": 6.535414410452314, "learning_rate": 8.432518666407767e-06, "loss": 0.5979, "step": 3934 }, { "epoch": 0.28, "grad_norm": 1.589676027287786, "learning_rate": 8.43167626803474e-06, "loss": 0.2068, "step": 3935 }, { "epoch": 0.28, "grad_norm": 6.549994991659285, "learning_rate": 8.430833685462965e-06, "loss": 0.5945, "step": 3936 }, { "epoch": 0.28, "grad_norm": 1.4517409351147652, "learning_rate": 8.42999091873767e-06, "loss": 0.204, "step": 3937 }, { "epoch": 0.28, "grad_norm": 5.426627335584929, "learning_rate": 8.429147967904085e-06, "loss": 0.5697, "step": 3938 }, { "epoch": 0.28, "grad_norm": 1.5473129845939642, "learning_rate": 8.428304833007462e-06, "loss": 0.2538, "step": 3939 }, { "epoch": 0.28, "grad_norm": 1.3095140515001482, "learning_rate": 8.427461514093056e-06, "loss": 0.2514, "step": 3940 }, { "epoch": 0.28, "grad_norm": 1.4118972722141407, "learning_rate": 8.426618011206132e-06, "loss": 0.2364, "step": 3941 }, { "epoch": 0.28, "grad_norm": 1.254087021132135, "learning_rate": 8.425774324391966e-06, "loss": 0.1934, "step": 3942 }, { "epoch": 0.28, "grad_norm": 1.3343530492923716, "learning_rate": 8.424930453695844e-06, "loss": 0.2093, "step": 3943 }, { "epoch": 0.28, "grad_norm": 1.4404064392691347, "learning_rate": 8.424086399163063e-06, "loss": 0.2047, "step": 3944 }, { "epoch": 0.28, "grad_norm": 5.103972321061653, "learning_rate": 8.423242160838927e-06, "loss": 0.6356, "step": 3945 }, { "epoch": 0.28, "grad_norm": 1.4499695427441652, "learning_rate": 8.42239773876875e-06, "loss": 0.2039, "step": 3946 }, { "epoch": 0.28, "grad_norm": 1.2965724784268746, "learning_rate": 8.42155313299786e-06, "loss": 0.2264, "step": 3947 }, { "epoch": 0.28, "grad_norm": 1.358675380966728, "learning_rate": 8.42070834357159e-06, "loss": 0.2468, "step": 3948 }, { "epoch": 0.28, "grad_norm": 1.417970703054247, "learning_rate": 8.419863370535287e-06, "loss": 0.1817, "step": 3949 }, { "epoch": 0.28, "grad_norm": 1.4114583450157883, "learning_rate": 8.419018213934305e-06, "loss": 0.1947, "step": 3950 }, { "epoch": 0.28, "grad_norm": 1.4615083198859138, "learning_rate": 8.418172873814008e-06, "loss": 0.2651, "step": 3951 }, { "epoch": 0.28, "grad_norm": 1.3036228315040475, "learning_rate": 8.41732735021977e-06, "loss": 0.2003, "step": 3952 }, { "epoch": 0.28, "grad_norm": 1.4132115684762423, "learning_rate": 8.416481643196977e-06, "loss": 0.2499, "step": 3953 }, { "epoch": 0.28, "grad_norm": 1.3731397025275016, "learning_rate": 8.415635752791022e-06, "loss": 0.191, "step": 3954 }, { "epoch": 0.28, "grad_norm": 1.3145367733790236, "learning_rate": 8.414789679047307e-06, "loss": 0.221, "step": 3955 }, { "epoch": 0.28, "grad_norm": 1.4392843679164615, "learning_rate": 8.413943422011252e-06, "loss": 0.2514, "step": 3956 }, { "epoch": 0.28, "grad_norm": 1.3917665737365188, "learning_rate": 8.413096981728275e-06, "loss": 0.2377, "step": 3957 }, { "epoch": 0.28, "grad_norm": 1.426169340192788, "learning_rate": 8.412250358243814e-06, "loss": 0.1959, "step": 3958 }, { "epoch": 0.28, "grad_norm": 1.3286828057417015, "learning_rate": 8.411403551603307e-06, "loss": 0.1512, "step": 3959 }, { "epoch": 0.28, "grad_norm": 4.969693749666775, "learning_rate": 8.410556561852212e-06, "loss": 0.5898, "step": 3960 }, { "epoch": 0.28, "grad_norm": 1.5039687568481517, "learning_rate": 8.40970938903599e-06, "loss": 0.2345, "step": 3961 }, { "epoch": 0.28, "grad_norm": 5.000501851715352, "learning_rate": 8.408862033200113e-06, "loss": 0.6636, "step": 3962 }, { "epoch": 0.28, "grad_norm": 1.369195458880927, "learning_rate": 8.408014494390065e-06, "loss": 0.2314, "step": 3963 }, { "epoch": 0.28, "grad_norm": 1.2710644482352162, "learning_rate": 8.407166772651339e-06, "loss": 0.1675, "step": 3964 }, { "epoch": 0.28, "grad_norm": 1.3830886185178681, "learning_rate": 8.406318868029436e-06, "loss": 0.2041, "step": 3965 }, { "epoch": 0.28, "grad_norm": 1.3826428304776857, "learning_rate": 8.405470780569869e-06, "loss": 0.2607, "step": 3966 }, { "epoch": 0.28, "grad_norm": 6.0361764970270695, "learning_rate": 8.40462251031816e-06, "loss": 0.5131, "step": 3967 }, { "epoch": 0.28, "grad_norm": 1.308915712136961, "learning_rate": 8.40377405731984e-06, "loss": 0.2263, "step": 3968 }, { "epoch": 0.28, "grad_norm": 1.3666572708447056, "learning_rate": 8.402925421620452e-06, "loss": 0.2252, "step": 3969 }, { "epoch": 0.28, "grad_norm": 1.3411335296611422, "learning_rate": 8.402076603265543e-06, "loss": 0.2613, "step": 3970 }, { "epoch": 0.28, "grad_norm": 1.3195904899125672, "learning_rate": 8.40122760230068e-06, "loss": 0.2104, "step": 3971 }, { "epoch": 0.28, "grad_norm": 1.4385890010815705, "learning_rate": 8.400378418771434e-06, "loss": 0.2371, "step": 3972 }, { "epoch": 0.28, "grad_norm": 10.533849862908287, "learning_rate": 8.39952905272338e-06, "loss": 0.6887, "step": 3973 }, { "epoch": 0.28, "grad_norm": 1.4129014984986605, "learning_rate": 8.398679504202114e-06, "loss": 0.2268, "step": 3974 }, { "epoch": 0.28, "grad_norm": 1.2877143380856004, "learning_rate": 8.397829773253234e-06, "loss": 0.2112, "step": 3975 }, { "epoch": 0.28, "grad_norm": 1.5128662918305495, "learning_rate": 8.396979859922351e-06, "loss": 0.2451, "step": 3976 }, { "epoch": 0.28, "grad_norm": 1.3974072800413495, "learning_rate": 8.396129764255086e-06, "loss": 0.2172, "step": 3977 }, { "epoch": 0.28, "grad_norm": 1.2761549189162609, "learning_rate": 8.395279486297065e-06, "loss": 0.1825, "step": 3978 }, { "epoch": 0.28, "grad_norm": 1.4451198547929118, "learning_rate": 8.39442902609393e-06, "loss": 0.2595, "step": 3979 }, { "epoch": 0.28, "grad_norm": 1.4259788487331106, "learning_rate": 8.39357838369133e-06, "loss": 0.2161, "step": 3980 }, { "epoch": 0.28, "grad_norm": 1.4916417807469637, "learning_rate": 8.392727559134926e-06, "loss": 0.2029, "step": 3981 }, { "epoch": 0.28, "grad_norm": 1.3259760193844965, "learning_rate": 8.391876552470383e-06, "loss": 0.2248, "step": 3982 }, { "epoch": 0.28, "grad_norm": 1.331501075800485, "learning_rate": 8.391025363743383e-06, "loss": 0.1621, "step": 3983 }, { "epoch": 0.28, "grad_norm": 1.3001003618940754, "learning_rate": 8.390173992999613e-06, "loss": 0.2164, "step": 3984 }, { "epoch": 0.29, "grad_norm": 1.449375766916586, "learning_rate": 8.389322440284773e-06, "loss": 0.1974, "step": 3985 }, { "epoch": 0.29, "grad_norm": 1.5474298612342725, "learning_rate": 8.388470705644569e-06, "loss": 0.2091, "step": 3986 }, { "epoch": 0.29, "grad_norm": 1.5474226228068124, "learning_rate": 8.387618789124718e-06, "loss": 0.2486, "step": 3987 }, { "epoch": 0.29, "grad_norm": 1.3917040139502261, "learning_rate": 8.386766690770948e-06, "loss": 0.2056, "step": 3988 }, { "epoch": 0.29, "grad_norm": 1.4054707048144732, "learning_rate": 8.385914410629e-06, "loss": 0.216, "step": 3989 }, { "epoch": 0.29, "grad_norm": 5.2084614527088675, "learning_rate": 8.385061948744615e-06, "loss": 0.5791, "step": 3990 }, { "epoch": 0.29, "grad_norm": 1.3425730483106462, "learning_rate": 8.384209305163555e-06, "loss": 0.2164, "step": 3991 }, { "epoch": 0.29, "grad_norm": 1.5519198070654316, "learning_rate": 8.383356479931583e-06, "loss": 0.2659, "step": 3992 }, { "epoch": 0.29, "grad_norm": 6.553094479680816, "learning_rate": 8.382503473094477e-06, "loss": 0.6156, "step": 3993 }, { "epoch": 0.29, "grad_norm": 1.4812023814302289, "learning_rate": 8.381650284698022e-06, "loss": 0.2035, "step": 3994 }, { "epoch": 0.29, "grad_norm": 1.3103359528506466, "learning_rate": 8.380796914788014e-06, "loss": 0.2329, "step": 3995 }, { "epoch": 0.29, "grad_norm": 1.3242123138762312, "learning_rate": 8.379943363410259e-06, "loss": 0.217, "step": 3996 }, { "epoch": 0.29, "grad_norm": 1.5208913474887478, "learning_rate": 8.379089630610573e-06, "loss": 0.2829, "step": 3997 }, { "epoch": 0.29, "grad_norm": 7.950024749502722, "learning_rate": 8.37823571643478e-06, "loss": 0.5987, "step": 3998 }, { "epoch": 0.29, "grad_norm": 1.6180169697676572, "learning_rate": 8.377381620928713e-06, "loss": 0.2772, "step": 3999 }, { "epoch": 0.29, "grad_norm": 7.959187466917736, "learning_rate": 8.376527344138222e-06, "loss": 0.6952, "step": 4000 }, { "epoch": 0.29, "grad_norm": 1.4346031586590502, "learning_rate": 8.375672886109152e-06, "loss": 0.2078, "step": 4001 }, { "epoch": 0.29, "grad_norm": 1.2049168639495935, "learning_rate": 8.374818246887376e-06, "loss": 0.1918, "step": 4002 }, { "epoch": 0.29, "grad_norm": 1.2708455010806656, "learning_rate": 8.373963426518764e-06, "loss": 0.2081, "step": 4003 }, { "epoch": 0.29, "grad_norm": 1.3758884523250772, "learning_rate": 8.373108425049198e-06, "loss": 0.1992, "step": 4004 }, { "epoch": 0.29, "grad_norm": 1.5098540920827337, "learning_rate": 8.372253242524573e-06, "loss": 0.2115, "step": 4005 }, { "epoch": 0.29, "grad_norm": 1.4306602309199932, "learning_rate": 8.37139787899079e-06, "loss": 0.2435, "step": 4006 }, { "epoch": 0.29, "grad_norm": 1.2386801952455295, "learning_rate": 8.370542334493764e-06, "loss": 0.1873, "step": 4007 }, { "epoch": 0.29, "grad_norm": 1.4603135799433573, "learning_rate": 8.369686609079416e-06, "loss": 0.2319, "step": 4008 }, { "epoch": 0.29, "grad_norm": 1.4519361273828328, "learning_rate": 8.368830702793678e-06, "loss": 0.1775, "step": 4009 }, { "epoch": 0.29, "grad_norm": 7.51288341908173, "learning_rate": 8.36797461568249e-06, "loss": 0.48, "step": 4010 }, { "epoch": 0.29, "grad_norm": 1.411305600834009, "learning_rate": 8.367118347791807e-06, "loss": 0.2236, "step": 4011 }, { "epoch": 0.29, "grad_norm": 1.388958296412984, "learning_rate": 8.366261899167588e-06, "loss": 0.1782, "step": 4012 }, { "epoch": 0.29, "grad_norm": 1.3416384476774192, "learning_rate": 8.365405269855801e-06, "loss": 0.1475, "step": 4013 }, { "epoch": 0.29, "grad_norm": 1.3328969301100053, "learning_rate": 8.364548459902431e-06, "loss": 0.2147, "step": 4014 }, { "epoch": 0.29, "grad_norm": 1.4370870661313566, "learning_rate": 8.363691469353465e-06, "loss": 0.2139, "step": 4015 }, { "epoch": 0.29, "grad_norm": 1.2281535786795532, "learning_rate": 8.362834298254906e-06, "loss": 0.1752, "step": 4016 }, { "epoch": 0.29, "grad_norm": 1.3232472634924308, "learning_rate": 8.361976946652761e-06, "loss": 0.1694, "step": 4017 }, { "epoch": 0.29, "grad_norm": 1.4420436049989904, "learning_rate": 8.361119414593048e-06, "loss": 0.1826, "step": 4018 }, { "epoch": 0.29, "grad_norm": 1.2933491624861813, "learning_rate": 8.360261702121802e-06, "loss": 0.1806, "step": 4019 }, { "epoch": 0.29, "grad_norm": 1.4301124568227406, "learning_rate": 8.359403809285054e-06, "loss": 0.2127, "step": 4020 }, { "epoch": 0.29, "grad_norm": 1.2892218668830984, "learning_rate": 8.358545736128856e-06, "loss": 0.1963, "step": 4021 }, { "epoch": 0.29, "grad_norm": 1.2638291220447115, "learning_rate": 8.357687482699268e-06, "loss": 0.191, "step": 4022 }, { "epoch": 0.29, "grad_norm": 1.281366995641839, "learning_rate": 8.356829049042352e-06, "loss": 0.2055, "step": 4023 }, { "epoch": 0.29, "grad_norm": 1.4527996120495386, "learning_rate": 8.355970435204192e-06, "loss": 0.2383, "step": 4024 }, { "epoch": 0.29, "grad_norm": 1.2730141165369886, "learning_rate": 8.355111641230869e-06, "loss": 0.1957, "step": 4025 }, { "epoch": 0.29, "grad_norm": 5.5983967912750545, "learning_rate": 8.354252667168483e-06, "loss": 0.4839, "step": 4026 }, { "epoch": 0.29, "grad_norm": 1.2326326377365737, "learning_rate": 8.353393513063141e-06, "loss": 0.1788, "step": 4027 }, { "epoch": 0.29, "grad_norm": 5.177843654907134, "learning_rate": 8.352534178960957e-06, "loss": 0.5307, "step": 4028 }, { "epoch": 0.29, "grad_norm": 1.550671996642033, "learning_rate": 8.351674664908055e-06, "loss": 0.2046, "step": 4029 }, { "epoch": 0.29, "grad_norm": 1.501404411655909, "learning_rate": 8.350814970950576e-06, "loss": 0.226, "step": 4030 }, { "epoch": 0.29, "grad_norm": 1.4630369680517794, "learning_rate": 8.349955097134658e-06, "loss": 0.2063, "step": 4031 }, { "epoch": 0.29, "grad_norm": 1.32373364183077, "learning_rate": 8.34909504350646e-06, "loss": 0.2391, "step": 4032 }, { "epoch": 0.29, "grad_norm": 1.2710467936835261, "learning_rate": 8.348234810112147e-06, "loss": 0.1915, "step": 4033 }, { "epoch": 0.29, "grad_norm": 1.403759358507858, "learning_rate": 8.347374396997891e-06, "loss": 0.2808, "step": 4034 }, { "epoch": 0.29, "grad_norm": 1.3486054279354456, "learning_rate": 8.346513804209873e-06, "loss": 0.2261, "step": 4035 }, { "epoch": 0.29, "grad_norm": 1.3535582327268165, "learning_rate": 8.345653031794292e-06, "loss": 0.2399, "step": 4036 }, { "epoch": 0.29, "grad_norm": 5.31878277785827, "learning_rate": 8.344792079797346e-06, "loss": 0.6265, "step": 4037 }, { "epoch": 0.29, "grad_norm": 1.4701425065115221, "learning_rate": 8.343930948265252e-06, "loss": 0.2574, "step": 4038 }, { "epoch": 0.29, "grad_norm": 1.5054124948717251, "learning_rate": 8.343069637244225e-06, "loss": 0.2032, "step": 4039 }, { "epoch": 0.29, "grad_norm": 1.3014033636689692, "learning_rate": 8.342208146780504e-06, "loss": 0.1629, "step": 4040 }, { "epoch": 0.29, "grad_norm": 1.4758254822502992, "learning_rate": 8.341346476920327e-06, "loss": 0.2242, "step": 4041 }, { "epoch": 0.29, "grad_norm": 1.4187252546423248, "learning_rate": 8.340484627709946e-06, "loss": 0.2029, "step": 4042 }, { "epoch": 0.29, "grad_norm": 1.5190637174147896, "learning_rate": 8.33962259919562e-06, "loss": 0.2247, "step": 4043 }, { "epoch": 0.29, "grad_norm": 1.3729387479960766, "learning_rate": 8.338760391423623e-06, "loss": 0.2412, "step": 4044 }, { "epoch": 0.29, "grad_norm": 1.4997378865254147, "learning_rate": 8.337898004440231e-06, "loss": 0.265, "step": 4045 }, { "epoch": 0.29, "grad_norm": 1.380536199730992, "learning_rate": 8.337035438291736e-06, "loss": 0.179, "step": 4046 }, { "epoch": 0.29, "grad_norm": 1.1338894273050752, "learning_rate": 8.336172693024434e-06, "loss": 0.1637, "step": 4047 }, { "epoch": 0.29, "grad_norm": 1.4326135358289709, "learning_rate": 8.335309768684637e-06, "loss": 0.2251, "step": 4048 }, { "epoch": 0.29, "grad_norm": 5.932507727142624, "learning_rate": 8.334446665318661e-06, "loss": 0.4825, "step": 4049 }, { "epoch": 0.29, "grad_norm": 1.233574299792511, "learning_rate": 8.333583382972834e-06, "loss": 0.1734, "step": 4050 }, { "epoch": 0.29, "grad_norm": 1.5577222441950687, "learning_rate": 8.332719921693497e-06, "loss": 0.2392, "step": 4051 }, { "epoch": 0.29, "grad_norm": 1.342977682785485, "learning_rate": 8.331856281526994e-06, "loss": 0.1966, "step": 4052 }, { "epoch": 0.29, "grad_norm": 1.5102325061533506, "learning_rate": 8.330992462519683e-06, "loss": 0.17, "step": 4053 }, { "epoch": 0.29, "grad_norm": 1.3776971260277344, "learning_rate": 8.33012846471793e-06, "loss": 0.1949, "step": 4054 }, { "epoch": 0.29, "grad_norm": 1.3187182366377714, "learning_rate": 8.329264288168112e-06, "loss": 0.2348, "step": 4055 }, { "epoch": 0.29, "grad_norm": 1.4538909032723915, "learning_rate": 8.328399932916612e-06, "loss": 0.1979, "step": 4056 }, { "epoch": 0.29, "grad_norm": 1.5400064914165799, "learning_rate": 8.327535399009825e-06, "loss": 0.228, "step": 4057 }, { "epoch": 0.29, "grad_norm": 1.3382103896943622, "learning_rate": 8.326670686494158e-06, "loss": 0.2037, "step": 4058 }, { "epoch": 0.29, "grad_norm": 19.52068815925955, "learning_rate": 8.325805795416026e-06, "loss": 0.5157, "step": 4059 }, { "epoch": 0.29, "grad_norm": 1.3112256934612216, "learning_rate": 8.324940725821853e-06, "loss": 0.1944, "step": 4060 }, { "epoch": 0.29, "grad_norm": 1.3049338751172164, "learning_rate": 8.324075477758069e-06, "loss": 0.1836, "step": 4061 }, { "epoch": 0.29, "grad_norm": 1.4500275111610934, "learning_rate": 8.32321005127112e-06, "loss": 0.2391, "step": 4062 }, { "epoch": 0.29, "grad_norm": 1.419033641146294, "learning_rate": 8.322344446407458e-06, "loss": 0.22, "step": 4063 }, { "epoch": 0.29, "grad_norm": 1.3499290967145556, "learning_rate": 8.321478663213545e-06, "loss": 0.234, "step": 4064 }, { "epoch": 0.29, "grad_norm": 4.753539244912161, "learning_rate": 8.32061270173585e-06, "loss": 0.6655, "step": 4065 }, { "epoch": 0.29, "grad_norm": 1.2452613712248535, "learning_rate": 8.31974656202086e-06, "loss": 0.1633, "step": 4066 }, { "epoch": 0.29, "grad_norm": 1.4274079751012592, "learning_rate": 8.318880244115064e-06, "loss": 0.2086, "step": 4067 }, { "epoch": 0.29, "grad_norm": 1.4469898126182499, "learning_rate": 8.318013748064962e-06, "loss": 0.2201, "step": 4068 }, { "epoch": 0.29, "grad_norm": 1.4065476551753204, "learning_rate": 8.317147073917061e-06, "loss": 0.2172, "step": 4069 }, { "epoch": 0.29, "grad_norm": 1.3487391492003584, "learning_rate": 8.316280221717885e-06, "loss": 0.2126, "step": 4070 }, { "epoch": 0.29, "grad_norm": 1.3739442032285805, "learning_rate": 8.315413191513962e-06, "loss": 0.1873, "step": 4071 }, { "epoch": 0.29, "grad_norm": 1.3566982024593732, "learning_rate": 8.314545983351829e-06, "loss": 0.2468, "step": 4072 }, { "epoch": 0.29, "grad_norm": 1.356501681831554, "learning_rate": 8.313678597278037e-06, "loss": 0.2224, "step": 4073 }, { "epoch": 0.29, "grad_norm": 1.2210012286000975, "learning_rate": 8.312811033339142e-06, "loss": 0.201, "step": 4074 }, { "epoch": 0.29, "grad_norm": 7.1509725616504305, "learning_rate": 8.311943291581712e-06, "loss": 0.7771, "step": 4075 }, { "epoch": 0.29, "grad_norm": 1.291329544504811, "learning_rate": 8.311075372052323e-06, "loss": 0.1863, "step": 4076 }, { "epoch": 0.29, "grad_norm": 1.1936210728746244, "learning_rate": 8.310207274797562e-06, "loss": 0.2061, "step": 4077 }, { "epoch": 0.29, "grad_norm": 1.579251319348325, "learning_rate": 8.309338999864028e-06, "loss": 0.249, "step": 4078 }, { "epoch": 0.29, "grad_norm": 1.3274821216987136, "learning_rate": 8.30847054729832e-06, "loss": 0.2101, "step": 4079 }, { "epoch": 0.29, "grad_norm": 1.3685723584451246, "learning_rate": 8.30760191714706e-06, "loss": 0.2118, "step": 4080 }, { "epoch": 0.29, "grad_norm": 1.4376530406426933, "learning_rate": 8.306733109456867e-06, "loss": 0.204, "step": 4081 }, { "epoch": 0.29, "grad_norm": 1.2200599052393177, "learning_rate": 8.305864124274378e-06, "loss": 0.2041, "step": 4082 }, { "epoch": 0.29, "grad_norm": 5.222004212985217, "learning_rate": 8.304994961646237e-06, "loss": 0.6477, "step": 4083 }, { "epoch": 0.29, "grad_norm": 1.3348909077177873, "learning_rate": 8.304125621619097e-06, "loss": 0.2147, "step": 4084 }, { "epoch": 0.29, "grad_norm": 1.4905274395269266, "learning_rate": 8.303256104239619e-06, "loss": 0.2135, "step": 4085 }, { "epoch": 0.29, "grad_norm": 8.22422801618588, "learning_rate": 8.302386409554478e-06, "loss": 0.5331, "step": 4086 }, { "epoch": 0.29, "grad_norm": 1.184461587209587, "learning_rate": 8.301516537610352e-06, "loss": 0.1987, "step": 4087 }, { "epoch": 0.29, "grad_norm": 7.407086091293792, "learning_rate": 8.300646488453937e-06, "loss": 0.4933, "step": 4088 }, { "epoch": 0.29, "grad_norm": 1.3883815705156846, "learning_rate": 8.29977626213193e-06, "loss": 0.2346, "step": 4089 }, { "epoch": 0.29, "grad_norm": 1.431575290629615, "learning_rate": 8.298905858691043e-06, "loss": 0.2121, "step": 4090 }, { "epoch": 0.29, "grad_norm": 1.2854725192766772, "learning_rate": 8.298035278177995e-06, "loss": 0.2244, "step": 4091 }, { "epoch": 0.29, "grad_norm": 5.554042431356747, "learning_rate": 8.297164520639515e-06, "loss": 0.5459, "step": 4092 }, { "epoch": 0.29, "grad_norm": 1.290859982180356, "learning_rate": 8.296293586122344e-06, "loss": 0.2481, "step": 4093 }, { "epoch": 0.29, "grad_norm": 1.405049609277019, "learning_rate": 8.295422474673229e-06, "loss": 0.2254, "step": 4094 }, { "epoch": 0.29, "grad_norm": 1.4252413609951429, "learning_rate": 8.294551186338927e-06, "loss": 0.2677, "step": 4095 }, { "epoch": 0.29, "grad_norm": 1.591828484511815, "learning_rate": 8.293679721166203e-06, "loss": 0.2345, "step": 4096 }, { "epoch": 0.29, "grad_norm": 1.3012073390371965, "learning_rate": 8.292808079201841e-06, "loss": 0.2349, "step": 4097 }, { "epoch": 0.29, "grad_norm": 1.354907016075446, "learning_rate": 8.29193626049262e-06, "loss": 0.2316, "step": 4098 }, { "epoch": 0.29, "grad_norm": 1.344608516474486, "learning_rate": 8.29106426508534e-06, "loss": 0.2126, "step": 4099 }, { "epoch": 0.29, "grad_norm": 1.51590703523331, "learning_rate": 8.290192093026805e-06, "loss": 0.2012, "step": 4100 }, { "epoch": 0.29, "grad_norm": 1.4320061458719695, "learning_rate": 8.28931974436383e-06, "loss": 0.2346, "step": 4101 }, { "epoch": 0.29, "grad_norm": 6.255925565059469, "learning_rate": 8.288447219143238e-06, "loss": 0.7, "step": 4102 }, { "epoch": 0.29, "grad_norm": 1.314163445815323, "learning_rate": 8.287574517411864e-06, "loss": 0.1997, "step": 4103 }, { "epoch": 0.29, "grad_norm": 8.83466197437151, "learning_rate": 8.28670163921655e-06, "loss": 0.5624, "step": 4104 }, { "epoch": 0.29, "grad_norm": 1.4770252516989408, "learning_rate": 8.28582858460415e-06, "loss": 0.2237, "step": 4105 }, { "epoch": 0.29, "grad_norm": 1.1830303643747915, "learning_rate": 8.284955353621527e-06, "loss": 0.2199, "step": 4106 }, { "epoch": 0.29, "grad_norm": 1.2878836431173761, "learning_rate": 8.284081946315548e-06, "loss": 0.2009, "step": 4107 }, { "epoch": 0.29, "grad_norm": 1.311095538461595, "learning_rate": 8.283208362733099e-06, "loss": 0.1736, "step": 4108 }, { "epoch": 0.29, "grad_norm": 1.29895157642666, "learning_rate": 8.282334602921069e-06, "loss": 0.1482, "step": 4109 }, { "epoch": 0.29, "grad_norm": 1.4466117098491575, "learning_rate": 8.281460666926357e-06, "loss": 0.2376, "step": 4110 }, { "epoch": 0.29, "grad_norm": 1.4990334342385714, "learning_rate": 8.280586554795872e-06, "loss": 0.2249, "step": 4111 }, { "epoch": 0.29, "grad_norm": 1.385249887752725, "learning_rate": 8.279712266576535e-06, "loss": 0.244, "step": 4112 }, { "epoch": 0.29, "grad_norm": 1.2791768468954543, "learning_rate": 8.278837802315273e-06, "loss": 0.2307, "step": 4113 }, { "epoch": 0.29, "grad_norm": 1.514441783731017, "learning_rate": 8.277963162059022e-06, "loss": 0.2851, "step": 4114 }, { "epoch": 0.29, "grad_norm": 1.3623507782184394, "learning_rate": 8.277088345854733e-06, "loss": 0.2606, "step": 4115 }, { "epoch": 0.29, "grad_norm": 1.5372309526673857, "learning_rate": 8.27621335374936e-06, "loss": 0.2487, "step": 4116 }, { "epoch": 0.29, "grad_norm": 1.4027257833471676, "learning_rate": 8.275338185789871e-06, "loss": 0.2432, "step": 4117 }, { "epoch": 0.29, "grad_norm": 1.2316506492789152, "learning_rate": 8.274462842023242e-06, "loss": 0.1781, "step": 4118 }, { "epoch": 0.29, "grad_norm": 1.374250892893682, "learning_rate": 8.273587322496452e-06, "loss": 0.2012, "step": 4119 }, { "epoch": 0.29, "grad_norm": 1.22973823962307, "learning_rate": 8.272711627256501e-06, "loss": 0.1926, "step": 4120 }, { "epoch": 0.29, "grad_norm": 1.31771661945902, "learning_rate": 8.271835756350394e-06, "loss": 0.217, "step": 4121 }, { "epoch": 0.29, "grad_norm": 1.3528779786346508, "learning_rate": 8.27095970982514e-06, "loss": 0.2179, "step": 4122 }, { "epoch": 0.29, "grad_norm": 1.3347701668028291, "learning_rate": 8.270083487727765e-06, "loss": 0.2385, "step": 4123 }, { "epoch": 0.29, "grad_norm": 1.3508483193699328, "learning_rate": 8.269207090105298e-06, "loss": 0.2441, "step": 4124 }, { "epoch": 0.3, "grad_norm": 1.331376672002768, "learning_rate": 8.268330517004783e-06, "loss": 0.2366, "step": 4125 }, { "epoch": 0.3, "grad_norm": 1.227092700701747, "learning_rate": 8.26745376847327e-06, "loss": 0.1749, "step": 4126 }, { "epoch": 0.3, "grad_norm": 1.3760598933805466, "learning_rate": 8.266576844557821e-06, "loss": 0.2193, "step": 4127 }, { "epoch": 0.3, "grad_norm": 1.2654406922659633, "learning_rate": 8.265699745305503e-06, "loss": 0.18, "step": 4128 }, { "epoch": 0.3, "grad_norm": 1.3369645934186092, "learning_rate": 8.264822470763398e-06, "loss": 0.2395, "step": 4129 }, { "epoch": 0.3, "grad_norm": 1.2639163215814413, "learning_rate": 8.263945020978592e-06, "loss": 0.1928, "step": 4130 }, { "epoch": 0.3, "grad_norm": 1.374377401477458, "learning_rate": 8.263067395998184e-06, "loss": 0.224, "step": 4131 }, { "epoch": 0.3, "grad_norm": 1.2924690761330049, "learning_rate": 8.262189595869283e-06, "loss": 0.2049, "step": 4132 }, { "epoch": 0.3, "grad_norm": 7.003057329625412, "learning_rate": 8.261311620639003e-06, "loss": 0.59, "step": 4133 }, { "epoch": 0.3, "grad_norm": 1.4397745786883145, "learning_rate": 8.260433470354472e-06, "loss": 0.2087, "step": 4134 }, { "epoch": 0.3, "grad_norm": 1.1862558652117978, "learning_rate": 8.259555145062827e-06, "loss": 0.216, "step": 4135 }, { "epoch": 0.3, "grad_norm": 1.4587167794676854, "learning_rate": 8.25867664481121e-06, "loss": 0.2237, "step": 4136 }, { "epoch": 0.3, "grad_norm": 1.3210007316834875, "learning_rate": 8.257797969646776e-06, "loss": 0.2035, "step": 4137 }, { "epoch": 0.3, "grad_norm": 1.66442182008484, "learning_rate": 8.25691911961669e-06, "loss": 0.2371, "step": 4138 }, { "epoch": 0.3, "grad_norm": 1.4066213884903278, "learning_rate": 8.256040094768124e-06, "loss": 0.2208, "step": 4139 }, { "epoch": 0.3, "grad_norm": 1.355214208992719, "learning_rate": 8.255160895148263e-06, "loss": 0.228, "step": 4140 }, { "epoch": 0.3, "grad_norm": 5.885406363426969, "learning_rate": 8.254281520804297e-06, "loss": 0.6964, "step": 4141 }, { "epoch": 0.3, "grad_norm": 1.4345638631716455, "learning_rate": 8.253401971783427e-06, "loss": 0.1567, "step": 4142 }, { "epoch": 0.3, "grad_norm": 7.661640609282136, "learning_rate": 8.252522248132863e-06, "loss": 0.6906, "step": 4143 }, { "epoch": 0.3, "grad_norm": 1.4560462853892395, "learning_rate": 8.25164234989983e-06, "loss": 0.178, "step": 4144 }, { "epoch": 0.3, "grad_norm": 1.2758352610351231, "learning_rate": 8.25076227713155e-06, "loss": 0.1822, "step": 4145 }, { "epoch": 0.3, "grad_norm": 6.118832600812816, "learning_rate": 8.249882029875267e-06, "loss": 0.4905, "step": 4146 }, { "epoch": 0.3, "grad_norm": 1.2449732874571167, "learning_rate": 8.249001608178226e-06, "loss": 0.1648, "step": 4147 }, { "epoch": 0.3, "grad_norm": 6.981695559494868, "learning_rate": 8.24812101208769e-06, "loss": 0.5962, "step": 4148 }, { "epoch": 0.3, "grad_norm": 11.09725375725208, "learning_rate": 8.247240241650918e-06, "loss": 0.6791, "step": 4149 }, { "epoch": 0.3, "grad_norm": 1.396648796654216, "learning_rate": 8.246359296915195e-06, "loss": 0.1778, "step": 4150 }, { "epoch": 0.3, "grad_norm": 1.2587879433554447, "learning_rate": 8.245478177927798e-06, "loss": 0.1634, "step": 4151 }, { "epoch": 0.3, "grad_norm": 1.346562167063866, "learning_rate": 8.244596884736027e-06, "loss": 0.1886, "step": 4152 }, { "epoch": 0.3, "grad_norm": 1.4176954502337669, "learning_rate": 8.243715417387185e-06, "loss": 0.2251, "step": 4153 }, { "epoch": 0.3, "grad_norm": 5.427726581021854, "learning_rate": 8.242833775928587e-06, "loss": 0.512, "step": 4154 }, { "epoch": 0.3, "grad_norm": 1.3579630019102151, "learning_rate": 8.241951960407556e-06, "loss": 0.1832, "step": 4155 }, { "epoch": 0.3, "grad_norm": 1.4056614544293775, "learning_rate": 8.241069970871422e-06, "loss": 0.1953, "step": 4156 }, { "epoch": 0.3, "grad_norm": 1.4421010053705154, "learning_rate": 8.240187807367528e-06, "loss": 0.1758, "step": 4157 }, { "epoch": 0.3, "grad_norm": 1.4562499241366997, "learning_rate": 8.239305469943227e-06, "loss": 0.167, "step": 4158 }, { "epoch": 0.3, "grad_norm": 1.3508505309317442, "learning_rate": 8.238422958645875e-06, "loss": 0.268, "step": 4159 }, { "epoch": 0.3, "grad_norm": 1.2752807956393788, "learning_rate": 8.237540273522844e-06, "loss": 0.2052, "step": 4160 }, { "epoch": 0.3, "grad_norm": 7.181102513331286, "learning_rate": 8.236657414621516e-06, "loss": 0.5543, "step": 4161 }, { "epoch": 0.3, "grad_norm": 1.3618381320220463, "learning_rate": 8.235774381989273e-06, "loss": 0.226, "step": 4162 }, { "epoch": 0.3, "grad_norm": 1.377872855858945, "learning_rate": 8.234891175673519e-06, "loss": 0.2175, "step": 4163 }, { "epoch": 0.3, "grad_norm": 1.1717665740809595, "learning_rate": 8.234007795721657e-06, "loss": 0.1697, "step": 4164 }, { "epoch": 0.3, "grad_norm": 1.333651846452711, "learning_rate": 8.233124242181104e-06, "loss": 0.1883, "step": 4165 }, { "epoch": 0.3, "grad_norm": 1.2068486466799073, "learning_rate": 8.232240515099287e-06, "loss": 0.1865, "step": 4166 }, { "epoch": 0.3, "grad_norm": 1.2385403227931173, "learning_rate": 8.23135661452364e-06, "loss": 0.2125, "step": 4167 }, { "epoch": 0.3, "grad_norm": 1.4272564301174362, "learning_rate": 8.230472540501607e-06, "loss": 0.2323, "step": 4168 }, { "epoch": 0.3, "grad_norm": 1.3905431500471723, "learning_rate": 8.229588293080644e-06, "loss": 0.2249, "step": 4169 }, { "epoch": 0.3, "grad_norm": 1.3037801599855574, "learning_rate": 8.228703872308208e-06, "loss": 0.2058, "step": 4170 }, { "epoch": 0.3, "grad_norm": 1.3885224331416648, "learning_rate": 8.22781927823178e-06, "loss": 0.2046, "step": 4171 }, { "epoch": 0.3, "grad_norm": 1.6111665645795197, "learning_rate": 8.226934510898832e-06, "loss": 0.2054, "step": 4172 }, { "epoch": 0.3, "grad_norm": 1.196500569041797, "learning_rate": 8.226049570356861e-06, "loss": 0.1782, "step": 4173 }, { "epoch": 0.3, "grad_norm": 1.3690664924664329, "learning_rate": 8.225164456653367e-06, "loss": 0.1905, "step": 4174 }, { "epoch": 0.3, "grad_norm": 1.461886179277451, "learning_rate": 8.224279169835857e-06, "loss": 0.2135, "step": 4175 }, { "epoch": 0.3, "grad_norm": 1.1594626760721105, "learning_rate": 8.22339370995185e-06, "loss": 0.1837, "step": 4176 }, { "epoch": 0.3, "grad_norm": 19.656856543079726, "learning_rate": 8.222508077048875e-06, "loss": 0.4676, "step": 4177 }, { "epoch": 0.3, "grad_norm": 1.3163229086835568, "learning_rate": 8.221622271174467e-06, "loss": 0.2012, "step": 4178 }, { "epoch": 0.3, "grad_norm": 1.3506962459335055, "learning_rate": 8.220736292376177e-06, "loss": 0.1955, "step": 4179 }, { "epoch": 0.3, "grad_norm": 5.888344368709606, "learning_rate": 8.219850140701557e-06, "loss": 0.6493, "step": 4180 }, { "epoch": 0.3, "grad_norm": 1.3622246846187454, "learning_rate": 8.218963816198174e-06, "loss": 0.2163, "step": 4181 }, { "epoch": 0.3, "grad_norm": 1.3470868804006588, "learning_rate": 8.2180773189136e-06, "loss": 0.1978, "step": 4182 }, { "epoch": 0.3, "grad_norm": 1.4109959877858365, "learning_rate": 8.217190648895421e-06, "loss": 0.2278, "step": 4183 }, { "epoch": 0.3, "grad_norm": 1.5902993487680914, "learning_rate": 8.21630380619123e-06, "loss": 0.2428, "step": 4184 }, { "epoch": 0.3, "grad_norm": 1.4217594417580592, "learning_rate": 8.215416790848626e-06, "loss": 0.2097, "step": 4185 }, { "epoch": 0.3, "grad_norm": 1.2774782747611606, "learning_rate": 8.214529602915226e-06, "loss": 0.1925, "step": 4186 }, { "epoch": 0.3, "grad_norm": 1.8001253571969031, "learning_rate": 8.213642242438646e-06, "loss": 0.2202, "step": 4187 }, { "epoch": 0.3, "grad_norm": 1.3165653675659297, "learning_rate": 8.212754709466519e-06, "loss": 0.229, "step": 4188 }, { "epoch": 0.3, "grad_norm": 1.4130863602956856, "learning_rate": 8.211867004046481e-06, "loss": 0.1852, "step": 4189 }, { "epoch": 0.3, "grad_norm": 1.2663213058231941, "learning_rate": 8.210979126226182e-06, "loss": 0.1919, "step": 4190 }, { "epoch": 0.3, "grad_norm": 1.322538712326694, "learning_rate": 8.21009107605328e-06, "loss": 0.1965, "step": 4191 }, { "epoch": 0.3, "grad_norm": 1.5072965848132547, "learning_rate": 8.209202853575445e-06, "loss": 0.2, "step": 4192 }, { "epoch": 0.3, "grad_norm": 6.731414517865259, "learning_rate": 8.208314458840347e-06, "loss": 0.5875, "step": 4193 }, { "epoch": 0.3, "grad_norm": 11.496024076905181, "learning_rate": 8.207425891895677e-06, "loss": 0.6528, "step": 4194 }, { "epoch": 0.3, "grad_norm": 1.3141460334551134, "learning_rate": 8.206537152789126e-06, "loss": 0.1871, "step": 4195 }, { "epoch": 0.3, "grad_norm": 1.3463192778368729, "learning_rate": 8.2056482415684e-06, "loss": 0.1703, "step": 4196 }, { "epoch": 0.3, "grad_norm": 1.4307792241044381, "learning_rate": 8.20475915828121e-06, "loss": 0.2128, "step": 4197 }, { "epoch": 0.3, "grad_norm": 1.5008272082930627, "learning_rate": 8.203869902975284e-06, "loss": 0.2647, "step": 4198 }, { "epoch": 0.3, "grad_norm": 1.3479888952877352, "learning_rate": 8.202980475698348e-06, "loss": 0.231, "step": 4199 }, { "epoch": 0.3, "grad_norm": 1.4640267857162308, "learning_rate": 8.202090876498144e-06, "loss": 0.2171, "step": 4200 }, { "epoch": 0.3, "grad_norm": 1.4754167364489201, "learning_rate": 8.201201105422423e-06, "loss": 0.2347, "step": 4201 }, { "epoch": 0.3, "grad_norm": 1.191654897703027, "learning_rate": 8.200311162518946e-06, "loss": 0.183, "step": 4202 }, { "epoch": 0.3, "grad_norm": 3.9070146062481172, "learning_rate": 8.199421047835478e-06, "loss": 0.5862, "step": 4203 }, { "epoch": 0.3, "grad_norm": 1.2632172450141754, "learning_rate": 8.1985307614198e-06, "loss": 0.2247, "step": 4204 }, { "epoch": 0.3, "grad_norm": 1.4556998963357939, "learning_rate": 8.197640303319698e-06, "loss": 0.2251, "step": 4205 }, { "epoch": 0.3, "grad_norm": 1.6009423598214585, "learning_rate": 8.19674967358297e-06, "loss": 0.2623, "step": 4206 }, { "epoch": 0.3, "grad_norm": 1.376526142247953, "learning_rate": 8.195858872257416e-06, "loss": 0.2283, "step": 4207 }, { "epoch": 0.3, "grad_norm": 1.409289209727124, "learning_rate": 8.194967899390856e-06, "loss": 0.198, "step": 4208 }, { "epoch": 0.3, "grad_norm": 1.2201765701022809, "learning_rate": 8.194076755031111e-06, "loss": 0.1498, "step": 4209 }, { "epoch": 0.3, "grad_norm": 7.8405006723402275, "learning_rate": 8.193185439226015e-06, "loss": 0.7434, "step": 4210 }, { "epoch": 0.3, "grad_norm": 1.4568870718594595, "learning_rate": 8.192293952023412e-06, "loss": 0.1851, "step": 4211 }, { "epoch": 0.3, "grad_norm": 1.3328075707104006, "learning_rate": 8.191402293471151e-06, "loss": 0.1986, "step": 4212 }, { "epoch": 0.3, "grad_norm": 1.5489508345812042, "learning_rate": 8.190510463617093e-06, "loss": 0.261, "step": 4213 }, { "epoch": 0.3, "grad_norm": 1.4107171572549215, "learning_rate": 8.18961846250911e-06, "loss": 0.1796, "step": 4214 }, { "epoch": 0.3, "grad_norm": 1.1747404360071292, "learning_rate": 8.18872629019508e-06, "loss": 0.1952, "step": 4215 }, { "epoch": 0.3, "grad_norm": 1.3887880768437932, "learning_rate": 8.187833946722889e-06, "loss": 0.1736, "step": 4216 }, { "epoch": 0.3, "grad_norm": 3.958813848424642, "learning_rate": 8.186941432140438e-06, "loss": 0.6375, "step": 4217 }, { "epoch": 0.3, "grad_norm": 1.3813345824735783, "learning_rate": 8.18604874649563e-06, "loss": 0.2122, "step": 4218 }, { "epoch": 0.3, "grad_norm": 1.4270433205816915, "learning_rate": 8.185155889836384e-06, "loss": 0.2226, "step": 4219 }, { "epoch": 0.3, "grad_norm": 1.390120132128045, "learning_rate": 8.184262862210624e-06, "loss": 0.2549, "step": 4220 }, { "epoch": 0.3, "grad_norm": 1.2861797264493036, "learning_rate": 8.183369663666285e-06, "loss": 0.1872, "step": 4221 }, { "epoch": 0.3, "grad_norm": 1.1685513436165265, "learning_rate": 8.182476294251307e-06, "loss": 0.2081, "step": 4222 }, { "epoch": 0.3, "grad_norm": 1.372084099564511, "learning_rate": 8.181582754013646e-06, "loss": 0.2063, "step": 4223 }, { "epoch": 0.3, "grad_norm": 1.4088541537318346, "learning_rate": 8.180689043001262e-06, "loss": 0.2232, "step": 4224 }, { "epoch": 0.3, "grad_norm": 1.475689886526372, "learning_rate": 8.179795161262128e-06, "loss": 0.228, "step": 4225 }, { "epoch": 0.3, "grad_norm": 1.3221744112747513, "learning_rate": 8.17890110884422e-06, "loss": 0.1875, "step": 4226 }, { "epoch": 0.3, "grad_norm": 1.3833247216758258, "learning_rate": 8.178006885795531e-06, "loss": 0.246, "step": 4227 }, { "epoch": 0.3, "grad_norm": 1.4077368289885681, "learning_rate": 8.177112492164058e-06, "loss": 0.2531, "step": 4228 }, { "epoch": 0.3, "grad_norm": 1.3879596969444226, "learning_rate": 8.176217927997807e-06, "loss": 0.2028, "step": 4229 }, { "epoch": 0.3, "grad_norm": 1.497118082063742, "learning_rate": 8.175323193344797e-06, "loss": 0.2079, "step": 4230 }, { "epoch": 0.3, "grad_norm": 1.1110221012685797, "learning_rate": 8.174428288253053e-06, "loss": 0.1755, "step": 4231 }, { "epoch": 0.3, "grad_norm": 1.4784721831246894, "learning_rate": 8.173533212770611e-06, "loss": 0.2288, "step": 4232 }, { "epoch": 0.3, "grad_norm": 7.765726509256648, "learning_rate": 8.172637966945513e-06, "loss": 0.6413, "step": 4233 }, { "epoch": 0.3, "grad_norm": 1.6041143094542212, "learning_rate": 8.171742550825814e-06, "loss": 0.2261, "step": 4234 }, { "epoch": 0.3, "grad_norm": 1.3054804726043723, "learning_rate": 8.170846964459573e-06, "loss": 0.2422, "step": 4235 }, { "epoch": 0.3, "grad_norm": 1.456502236131684, "learning_rate": 8.169951207894866e-06, "loss": 0.2463, "step": 4236 }, { "epoch": 0.3, "grad_norm": 1.377579537608348, "learning_rate": 8.169055281179771e-06, "loss": 0.222, "step": 4237 }, { "epoch": 0.3, "grad_norm": 1.3209362108055605, "learning_rate": 8.16815918436238e-06, "loss": 0.1945, "step": 4238 }, { "epoch": 0.3, "grad_norm": 6.494896256215283, "learning_rate": 8.167262917490788e-06, "loss": 0.7202, "step": 4239 }, { "epoch": 0.3, "grad_norm": 1.3678659303048917, "learning_rate": 8.166366480613107e-06, "loss": 0.1867, "step": 4240 }, { "epoch": 0.3, "grad_norm": 1.3718732679223735, "learning_rate": 8.165469873777452e-06, "loss": 0.2378, "step": 4241 }, { "epoch": 0.3, "grad_norm": 1.3878805365798734, "learning_rate": 8.16457309703195e-06, "loss": 0.229, "step": 4242 }, { "epoch": 0.3, "grad_norm": 1.3234508201567718, "learning_rate": 8.163676150424736e-06, "loss": 0.2414, "step": 4243 }, { "epoch": 0.3, "grad_norm": 1.4080048019737115, "learning_rate": 8.162779034003955e-06, "loss": 0.2135, "step": 4244 }, { "epoch": 0.3, "grad_norm": 1.2611355383410465, "learning_rate": 8.161881747817761e-06, "loss": 0.1635, "step": 4245 }, { "epoch": 0.3, "grad_norm": 5.08627799068848, "learning_rate": 8.160984291914316e-06, "loss": 0.6177, "step": 4246 }, { "epoch": 0.3, "grad_norm": 1.6488115455466497, "learning_rate": 8.160086666341794e-06, "loss": 0.2247, "step": 4247 }, { "epoch": 0.3, "grad_norm": 1.3016815243402677, "learning_rate": 8.159188871148372e-06, "loss": 0.1971, "step": 4248 }, { "epoch": 0.3, "grad_norm": 1.472141529986145, "learning_rate": 8.15829090638224e-06, "loss": 0.2493, "step": 4249 }, { "epoch": 0.3, "grad_norm": 1.386682010068181, "learning_rate": 8.157392772091603e-06, "loss": 0.1765, "step": 4250 }, { "epoch": 0.3, "grad_norm": 1.388635899272824, "learning_rate": 8.156494468324665e-06, "loss": 0.202, "step": 4251 }, { "epoch": 0.3, "grad_norm": 5.21510549038626, "learning_rate": 8.155595995129643e-06, "loss": 0.6997, "step": 4252 }, { "epoch": 0.3, "grad_norm": 4.8853480843177355, "learning_rate": 8.154697352554766e-06, "loss": 0.525, "step": 4253 }, { "epoch": 0.3, "grad_norm": 1.6264553729219897, "learning_rate": 8.153798540648266e-06, "loss": 0.2556, "step": 4254 }, { "epoch": 0.3, "grad_norm": 1.3589438318867064, "learning_rate": 8.152899559458392e-06, "loss": 0.2011, "step": 4255 }, { "epoch": 0.3, "grad_norm": 1.495531570310373, "learning_rate": 8.152000409033394e-06, "loss": 0.2154, "step": 4256 }, { "epoch": 0.3, "grad_norm": 1.3819337328695815, "learning_rate": 8.151101089421536e-06, "loss": 0.2226, "step": 4257 }, { "epoch": 0.3, "grad_norm": 1.3092968125303006, "learning_rate": 8.15020160067109e-06, "loss": 0.1944, "step": 4258 }, { "epoch": 0.3, "grad_norm": 1.5690378284594118, "learning_rate": 8.149301942830338e-06, "loss": 0.2143, "step": 4259 }, { "epoch": 0.3, "grad_norm": 1.601018913250708, "learning_rate": 8.14840211594757e-06, "loss": 0.235, "step": 4260 }, { "epoch": 0.3, "grad_norm": 4.972127047551957, "learning_rate": 8.147502120071084e-06, "loss": 0.6078, "step": 4261 }, { "epoch": 0.3, "grad_norm": 1.4222056146831985, "learning_rate": 8.146601955249187e-06, "loss": 0.2132, "step": 4262 }, { "epoch": 0.3, "grad_norm": 1.4553903088468665, "learning_rate": 8.1457016215302e-06, "loss": 0.2131, "step": 4263 }, { "epoch": 0.31, "grad_norm": 1.628269829543279, "learning_rate": 8.144801118962447e-06, "loss": 0.2044, "step": 4264 }, { "epoch": 0.31, "grad_norm": 1.4375679569378084, "learning_rate": 8.143900447594261e-06, "loss": 0.2592, "step": 4265 }, { "epoch": 0.31, "grad_norm": 1.3060112939564996, "learning_rate": 8.142999607473992e-06, "loss": 0.2025, "step": 4266 }, { "epoch": 0.31, "grad_norm": 1.2252729190796254, "learning_rate": 8.142098598649987e-06, "loss": 0.1931, "step": 4267 }, { "epoch": 0.31, "grad_norm": 1.4526668673570355, "learning_rate": 8.141197421170617e-06, "loss": 0.2692, "step": 4268 }, { "epoch": 0.31, "grad_norm": 1.2920917421539715, "learning_rate": 8.140296075084246e-06, "loss": 0.196, "step": 4269 }, { "epoch": 0.31, "grad_norm": 1.2275237203838303, "learning_rate": 8.139394560439257e-06, "loss": 0.1951, "step": 4270 }, { "epoch": 0.31, "grad_norm": 1.493707734851143, "learning_rate": 8.138492877284041e-06, "loss": 0.1878, "step": 4271 }, { "epoch": 0.31, "grad_norm": 1.3760960348368563, "learning_rate": 8.137591025666996e-06, "loss": 0.1898, "step": 4272 }, { "epoch": 0.31, "grad_norm": 5.988827822730528, "learning_rate": 8.13668900563653e-06, "loss": 0.6292, "step": 4273 }, { "epoch": 0.31, "grad_norm": 1.50941137823024, "learning_rate": 8.135786817241056e-06, "loss": 0.2468, "step": 4274 }, { "epoch": 0.31, "grad_norm": 1.5295258525481512, "learning_rate": 8.134884460529007e-06, "loss": 0.261, "step": 4275 }, { "epoch": 0.31, "grad_norm": 5.591735840225546, "learning_rate": 8.133981935548814e-06, "loss": 0.6078, "step": 4276 }, { "epoch": 0.31, "grad_norm": 1.334206505646596, "learning_rate": 8.13307924234892e-06, "loss": 0.1894, "step": 4277 }, { "epoch": 0.31, "grad_norm": 1.3394397827002784, "learning_rate": 8.13217638097778e-06, "loss": 0.1871, "step": 4278 }, { "epoch": 0.31, "grad_norm": 1.2901583074830638, "learning_rate": 8.131273351483857e-06, "loss": 0.2088, "step": 4279 }, { "epoch": 0.31, "grad_norm": 1.382496241929535, "learning_rate": 8.13037015391562e-06, "loss": 0.2066, "step": 4280 }, { "epoch": 0.31, "grad_norm": 1.3771757940572398, "learning_rate": 8.129466788321547e-06, "loss": 0.2298, "step": 4281 }, { "epoch": 0.31, "grad_norm": 1.5392102482960015, "learning_rate": 8.128563254750132e-06, "loss": 0.2738, "step": 4282 }, { "epoch": 0.31, "grad_norm": 1.447652591214568, "learning_rate": 8.127659553249869e-06, "loss": 0.206, "step": 4283 }, { "epoch": 0.31, "grad_norm": 1.3635992729973379, "learning_rate": 8.126755683869267e-06, "loss": 0.2077, "step": 4284 }, { "epoch": 0.31, "grad_norm": 1.2060758837323673, "learning_rate": 8.125851646656843e-06, "loss": 0.2036, "step": 4285 }, { "epoch": 0.31, "grad_norm": 1.4861654512601996, "learning_rate": 8.12494744166112e-06, "loss": 0.2661, "step": 4286 }, { "epoch": 0.31, "grad_norm": 7.05700923836143, "learning_rate": 8.124043068930633e-06, "loss": 0.563, "step": 4287 }, { "epoch": 0.31, "grad_norm": 1.5468862518324757, "learning_rate": 8.123138528513926e-06, "loss": 0.2324, "step": 4288 }, { "epoch": 0.31, "grad_norm": 1.3193142074907194, "learning_rate": 8.12223382045955e-06, "loss": 0.2004, "step": 4289 }, { "epoch": 0.31, "grad_norm": 1.38394573017972, "learning_rate": 8.121328944816067e-06, "loss": 0.2252, "step": 4290 }, { "epoch": 0.31, "grad_norm": 1.2461375197015399, "learning_rate": 8.120423901632046e-06, "loss": 0.1681, "step": 4291 }, { "epoch": 0.31, "grad_norm": 1.4568844544160655, "learning_rate": 8.119518690956066e-06, "loss": 0.2025, "step": 4292 }, { "epoch": 0.31, "grad_norm": 1.530716963461995, "learning_rate": 8.118613312836717e-06, "loss": 0.2379, "step": 4293 }, { "epoch": 0.31, "grad_norm": 1.3471392189116826, "learning_rate": 8.117707767322594e-06, "loss": 0.2382, "step": 4294 }, { "epoch": 0.31, "grad_norm": 1.253390141022074, "learning_rate": 8.116802054462305e-06, "loss": 0.2125, "step": 4295 }, { "epoch": 0.31, "grad_norm": 1.247934787101151, "learning_rate": 8.115896174304465e-06, "loss": 0.195, "step": 4296 }, { "epoch": 0.31, "grad_norm": 1.2581168812556756, "learning_rate": 8.114990126897694e-06, "loss": 0.1834, "step": 4297 }, { "epoch": 0.31, "grad_norm": 1.2644714288631789, "learning_rate": 8.114083912290631e-06, "loss": 0.1927, "step": 4298 }, { "epoch": 0.31, "grad_norm": 1.2128161930807477, "learning_rate": 8.113177530531915e-06, "loss": 0.2015, "step": 4299 }, { "epoch": 0.31, "grad_norm": 1.3381978625508204, "learning_rate": 8.112270981670196e-06, "loss": 0.1946, "step": 4300 }, { "epoch": 0.31, "grad_norm": 1.7883791587152686, "learning_rate": 8.111364265754136e-06, "loss": 0.3014, "step": 4301 }, { "epoch": 0.31, "grad_norm": 1.2386293146835143, "learning_rate": 8.110457382832402e-06, "loss": 0.2171, "step": 4302 }, { "epoch": 0.31, "grad_norm": 4.272227394407068, "learning_rate": 8.109550332953674e-06, "loss": 0.5293, "step": 4303 }, { "epoch": 0.31, "grad_norm": 1.435415085467868, "learning_rate": 8.108643116166637e-06, "loss": 0.2429, "step": 4304 }, { "epoch": 0.31, "grad_norm": 1.3248174984390693, "learning_rate": 8.107735732519988e-06, "loss": 0.1872, "step": 4305 }, { "epoch": 0.31, "grad_norm": 1.456474350496373, "learning_rate": 8.106828182062429e-06, "loss": 0.2241, "step": 4306 }, { "epoch": 0.31, "grad_norm": 1.5007446081463225, "learning_rate": 8.105920464842679e-06, "loss": 0.2472, "step": 4307 }, { "epoch": 0.31, "grad_norm": 1.6132215064587982, "learning_rate": 8.105012580909457e-06, "loss": 0.2207, "step": 4308 }, { "epoch": 0.31, "grad_norm": 1.4299033977950637, "learning_rate": 8.104104530311494e-06, "loss": 0.1958, "step": 4309 }, { "epoch": 0.31, "grad_norm": 1.4576235584286805, "learning_rate": 8.103196313097533e-06, "loss": 0.1843, "step": 4310 }, { "epoch": 0.31, "grad_norm": 1.3576010659856075, "learning_rate": 8.102287929316321e-06, "loss": 0.2277, "step": 4311 }, { "epoch": 0.31, "grad_norm": 1.4070119321783041, "learning_rate": 8.101379379016618e-06, "loss": 0.2218, "step": 4312 }, { "epoch": 0.31, "grad_norm": 1.3107927926177785, "learning_rate": 8.100470662247192e-06, "loss": 0.201, "step": 4313 }, { "epoch": 0.31, "grad_norm": 1.8606425157144113, "learning_rate": 8.099561779056816e-06, "loss": 0.1997, "step": 4314 }, { "epoch": 0.31, "grad_norm": 1.443892050419447, "learning_rate": 8.098652729494281e-06, "loss": 0.2171, "step": 4315 }, { "epoch": 0.31, "grad_norm": 1.3595994519217165, "learning_rate": 8.097743513608377e-06, "loss": 0.215, "step": 4316 }, { "epoch": 0.31, "grad_norm": 1.338811580758125, "learning_rate": 8.096834131447905e-06, "loss": 0.245, "step": 4317 }, { "epoch": 0.31, "grad_norm": 2.748082379088702, "learning_rate": 8.095924583061682e-06, "loss": 0.2103, "step": 4318 }, { "epoch": 0.31, "grad_norm": 1.4114121941161277, "learning_rate": 8.095014868498529e-06, "loss": 0.1814, "step": 4319 }, { "epoch": 0.31, "grad_norm": 2.4748395918483106, "learning_rate": 8.09410498780727e-06, "loss": 0.2016, "step": 4320 }, { "epoch": 0.31, "grad_norm": 1.5878975618580913, "learning_rate": 8.093194941036751e-06, "loss": 0.2284, "step": 4321 }, { "epoch": 0.31, "grad_norm": 1.535523107075687, "learning_rate": 8.092284728235812e-06, "loss": 0.2302, "step": 4322 }, { "epoch": 0.31, "grad_norm": 1.4614526758990578, "learning_rate": 8.091374349453318e-06, "loss": 0.2008, "step": 4323 }, { "epoch": 0.31, "grad_norm": 1.4074784083936163, "learning_rate": 8.090463804738126e-06, "loss": 0.2143, "step": 4324 }, { "epoch": 0.31, "grad_norm": 1.509964347716413, "learning_rate": 8.089553094139117e-06, "loss": 0.2223, "step": 4325 }, { "epoch": 0.31, "grad_norm": 1.4374751270016897, "learning_rate": 8.088642217705174e-06, "loss": 0.2135, "step": 4326 }, { "epoch": 0.31, "grad_norm": 1.38759590397608, "learning_rate": 8.087731175485184e-06, "loss": 0.2104, "step": 4327 }, { "epoch": 0.31, "grad_norm": 1.4020177497597461, "learning_rate": 8.086819967528053e-06, "loss": 0.1779, "step": 4328 }, { "epoch": 0.31, "grad_norm": 1.4408923995659428, "learning_rate": 8.08590859388269e-06, "loss": 0.2091, "step": 4329 }, { "epoch": 0.31, "grad_norm": 1.5119556082885228, "learning_rate": 8.084997054598012e-06, "loss": 0.1954, "step": 4330 }, { "epoch": 0.31, "grad_norm": 1.2391208604655684, "learning_rate": 8.084085349722948e-06, "loss": 0.1726, "step": 4331 }, { "epoch": 0.31, "grad_norm": 1.3950546255052358, "learning_rate": 8.083173479306436e-06, "loss": 0.2474, "step": 4332 }, { "epoch": 0.31, "grad_norm": 1.372392646739044, "learning_rate": 8.08226144339742e-06, "loss": 0.2225, "step": 4333 }, { "epoch": 0.31, "grad_norm": 8.287458644525628, "learning_rate": 8.081349242044854e-06, "loss": 0.8016, "step": 4334 }, { "epoch": 0.31, "grad_norm": 1.5096815544220206, "learning_rate": 8.080436875297703e-06, "loss": 0.2277, "step": 4335 }, { "epoch": 0.31, "grad_norm": 1.5331512870551067, "learning_rate": 8.07952434320494e-06, "loss": 0.2211, "step": 4336 }, { "epoch": 0.31, "grad_norm": 1.411875731148359, "learning_rate": 8.078611645815543e-06, "loss": 0.2585, "step": 4337 }, { "epoch": 0.31, "grad_norm": 4.648258019095411, "learning_rate": 8.077698783178503e-06, "loss": 0.4517, "step": 4338 }, { "epoch": 0.31, "grad_norm": 1.3103017860123887, "learning_rate": 8.07678575534282e-06, "loss": 0.229, "step": 4339 }, { "epoch": 0.31, "grad_norm": 8.029352018863754, "learning_rate": 8.075872562357502e-06, "loss": 0.71, "step": 4340 }, { "epoch": 0.31, "grad_norm": 1.389703688882895, "learning_rate": 8.074959204271563e-06, "loss": 0.2061, "step": 4341 }, { "epoch": 0.31, "grad_norm": 1.3985193573074026, "learning_rate": 8.07404568113403e-06, "loss": 0.2061, "step": 4342 }, { "epoch": 0.31, "grad_norm": 1.4570942289517306, "learning_rate": 8.07313199299394e-06, "loss": 0.1781, "step": 4343 }, { "epoch": 0.31, "grad_norm": 5.753879136287685, "learning_rate": 8.07221813990033e-06, "loss": 0.6069, "step": 4344 }, { "epoch": 0.31, "grad_norm": 1.3547257297122248, "learning_rate": 8.071304121902257e-06, "loss": 0.166, "step": 4345 }, { "epoch": 0.31, "grad_norm": 1.3039872159357055, "learning_rate": 8.070389939048782e-06, "loss": 0.2142, "step": 4346 }, { "epoch": 0.31, "grad_norm": 1.7021591994034695, "learning_rate": 8.06947559138897e-06, "loss": 0.2479, "step": 4347 }, { "epoch": 0.31, "grad_norm": 1.4644880783973198, "learning_rate": 8.068561078971905e-06, "loss": 0.245, "step": 4348 }, { "epoch": 0.31, "grad_norm": 6.198600718998808, "learning_rate": 8.067646401846672e-06, "loss": 0.6469, "step": 4349 }, { "epoch": 0.31, "grad_norm": 1.4479598256047939, "learning_rate": 8.066731560062366e-06, "loss": 0.243, "step": 4350 }, { "epoch": 0.31, "grad_norm": 1.6751343075158596, "learning_rate": 8.065816553668094e-06, "loss": 0.2016, "step": 4351 }, { "epoch": 0.31, "grad_norm": 1.2865171264850355, "learning_rate": 8.064901382712968e-06, "loss": 0.2335, "step": 4352 }, { "epoch": 0.31, "grad_norm": 4.379490490655004, "learning_rate": 8.063986047246113e-06, "loss": 0.6172, "step": 4353 }, { "epoch": 0.31, "grad_norm": 1.367335215695112, "learning_rate": 8.06307054731666e-06, "loss": 0.2506, "step": 4354 }, { "epoch": 0.31, "grad_norm": 1.3726092380062855, "learning_rate": 8.06215488297375e-06, "loss": 0.1656, "step": 4355 }, { "epoch": 0.31, "grad_norm": 1.4612872418740448, "learning_rate": 8.06123905426653e-06, "loss": 0.2115, "step": 4356 }, { "epoch": 0.31, "grad_norm": 1.3516581278007367, "learning_rate": 8.060323061244158e-06, "loss": 0.2455, "step": 4357 }, { "epoch": 0.31, "grad_norm": 1.5130941615553144, "learning_rate": 8.059406903955803e-06, "loss": 0.253, "step": 4358 }, { "epoch": 0.31, "grad_norm": 1.2986153397418696, "learning_rate": 8.05849058245064e-06, "loss": 0.1944, "step": 4359 }, { "epoch": 0.31, "grad_norm": 6.240657581779312, "learning_rate": 8.057574096777854e-06, "loss": 0.5496, "step": 4360 }, { "epoch": 0.31, "grad_norm": 1.4123866434694117, "learning_rate": 8.056657446986636e-06, "loss": 0.2001, "step": 4361 }, { "epoch": 0.31, "grad_norm": 1.4678821623436953, "learning_rate": 8.05574063312619e-06, "loss": 0.2681, "step": 4362 }, { "epoch": 0.31, "grad_norm": 1.1632388589280016, "learning_rate": 8.054823655245729e-06, "loss": 0.1684, "step": 4363 }, { "epoch": 0.31, "grad_norm": 1.4516975383265291, "learning_rate": 8.053906513394468e-06, "loss": 0.2378, "step": 4364 }, { "epoch": 0.31, "grad_norm": 1.3513223222369897, "learning_rate": 8.052989207621637e-06, "loss": 0.1845, "step": 4365 }, { "epoch": 0.31, "grad_norm": 7.645830662841758, "learning_rate": 8.052071737976477e-06, "loss": 0.5025, "step": 4366 }, { "epoch": 0.31, "grad_norm": 1.2491152825534535, "learning_rate": 8.05115410450823e-06, "loss": 0.1452, "step": 4367 }, { "epoch": 0.31, "grad_norm": 6.032912973155827, "learning_rate": 8.05023630726615e-06, "loss": 0.5316, "step": 4368 }, { "epoch": 0.31, "grad_norm": 1.5954177990220197, "learning_rate": 8.049318346299505e-06, "loss": 0.2024, "step": 4369 }, { "epoch": 0.31, "grad_norm": 1.33378192565435, "learning_rate": 8.048400221657566e-06, "loss": 0.2095, "step": 4370 }, { "epoch": 0.31, "grad_norm": 1.6174045257043435, "learning_rate": 8.047481933389613e-06, "loss": 0.2282, "step": 4371 }, { "epoch": 0.31, "grad_norm": 1.587260066239751, "learning_rate": 8.046563481544938e-06, "loss": 0.2182, "step": 4372 }, { "epoch": 0.31, "grad_norm": 1.506557481235486, "learning_rate": 8.045644866172838e-06, "loss": 0.2541, "step": 4373 }, { "epoch": 0.31, "grad_norm": 1.2557971615331542, "learning_rate": 8.044726087322621e-06, "loss": 0.1778, "step": 4374 }, { "epoch": 0.31, "grad_norm": 1.2884354900647808, "learning_rate": 8.043807145043604e-06, "loss": 0.2031, "step": 4375 }, { "epoch": 0.31, "grad_norm": 1.4256522171814492, "learning_rate": 8.042888039385112e-06, "loss": 0.2385, "step": 4376 }, { "epoch": 0.31, "grad_norm": 1.4482712939344824, "learning_rate": 8.041968770396477e-06, "loss": 0.2087, "step": 4377 }, { "epoch": 0.31, "grad_norm": 1.4235077321191418, "learning_rate": 8.041049338127045e-06, "loss": 0.2584, "step": 4378 }, { "epoch": 0.31, "grad_norm": 1.1395763654669604, "learning_rate": 8.040129742626167e-06, "loss": 0.1867, "step": 4379 }, { "epoch": 0.31, "grad_norm": 1.4914504486091855, "learning_rate": 8.039209983943201e-06, "loss": 0.2077, "step": 4380 }, { "epoch": 0.31, "grad_norm": 1.2168371645015115, "learning_rate": 8.038290062127517e-06, "loss": 0.1904, "step": 4381 }, { "epoch": 0.31, "grad_norm": 1.464070512158537, "learning_rate": 8.037369977228495e-06, "loss": 0.2286, "step": 4382 }, { "epoch": 0.31, "grad_norm": 1.3269698833704942, "learning_rate": 8.036449729295517e-06, "loss": 0.2199, "step": 4383 }, { "epoch": 0.31, "grad_norm": 1.552754738543002, "learning_rate": 8.035529318377981e-06, "loss": 0.2402, "step": 4384 }, { "epoch": 0.31, "grad_norm": 1.470719604985931, "learning_rate": 8.034608744525292e-06, "loss": 0.1935, "step": 4385 }, { "epoch": 0.31, "grad_norm": 1.521670332806466, "learning_rate": 8.03368800778686e-06, "loss": 0.2012, "step": 4386 }, { "epoch": 0.31, "grad_norm": 1.25690874284925, "learning_rate": 8.032767108212108e-06, "loss": 0.1617, "step": 4387 }, { "epoch": 0.31, "grad_norm": 1.2821137179733408, "learning_rate": 8.031846045850468e-06, "loss": 0.1724, "step": 4388 }, { "epoch": 0.31, "grad_norm": 5.8731199406179995, "learning_rate": 8.030924820751375e-06, "loss": 0.61, "step": 4389 }, { "epoch": 0.31, "grad_norm": 1.509132344249142, "learning_rate": 8.03000343296428e-06, "loss": 0.2107, "step": 4390 }, { "epoch": 0.31, "grad_norm": 1.3624206430473076, "learning_rate": 8.02908188253864e-06, "loss": 0.1738, "step": 4391 }, { "epoch": 0.31, "grad_norm": 1.28185480192736, "learning_rate": 8.028160169523915e-06, "loss": 0.209, "step": 4392 }, { "epoch": 0.31, "grad_norm": 1.326127703574001, "learning_rate": 8.027238293969583e-06, "loss": 0.1967, "step": 4393 }, { "epoch": 0.31, "grad_norm": 1.3773325555259772, "learning_rate": 8.026316255925127e-06, "loss": 0.192, "step": 4394 }, { "epoch": 0.31, "grad_norm": 1.2817350600827935, "learning_rate": 8.025394055440037e-06, "loss": 0.2089, "step": 4395 }, { "epoch": 0.31, "grad_norm": 1.337795880724, "learning_rate": 8.024471692563814e-06, "loss": 0.2009, "step": 4396 }, { "epoch": 0.31, "grad_norm": 1.447831148723988, "learning_rate": 8.023549167345966e-06, "loss": 0.2442, "step": 4397 }, { "epoch": 0.31, "grad_norm": 1.460251885683025, "learning_rate": 8.02262647983601e-06, "loss": 0.228, "step": 4398 }, { "epoch": 0.31, "grad_norm": 1.4368620433986492, "learning_rate": 8.021703630083472e-06, "loss": 0.1974, "step": 4399 }, { "epoch": 0.31, "grad_norm": 1.4290927471456292, "learning_rate": 8.020780618137889e-06, "loss": 0.2137, "step": 4400 }, { "epoch": 0.31, "grad_norm": 5.030068389467755, "learning_rate": 8.019857444048803e-06, "loss": 0.6593, "step": 4401 }, { "epoch": 0.31, "grad_norm": 1.2845583880557232, "learning_rate": 8.018934107865765e-06, "loss": 0.2239, "step": 4402 }, { "epoch": 0.31, "grad_norm": 6.1816193331842175, "learning_rate": 8.018010609638341e-06, "loss": 0.6666, "step": 4403 }, { "epoch": 0.32, "grad_norm": 1.4196583890055376, "learning_rate": 8.017086949416095e-06, "loss": 0.2304, "step": 4404 }, { "epoch": 0.32, "grad_norm": 1.4869634830644582, "learning_rate": 8.01616312724861e-06, "loss": 0.2426, "step": 4405 }, { "epoch": 0.32, "grad_norm": 1.3900479989047305, "learning_rate": 8.015239143185469e-06, "loss": 0.1895, "step": 4406 }, { "epoch": 0.32, "grad_norm": 1.366312103455014, "learning_rate": 8.01431499727627e-06, "loss": 0.2252, "step": 4407 }, { "epoch": 0.32, "grad_norm": 1.3988855544535694, "learning_rate": 8.013390689570616e-06, "loss": 0.1734, "step": 4408 }, { "epoch": 0.32, "grad_norm": 1.3595451562610792, "learning_rate": 8.012466220118125e-06, "loss": 0.2083, "step": 4409 }, { "epoch": 0.32, "grad_norm": 4.593861433496469, "learning_rate": 8.011541588968412e-06, "loss": 0.572, "step": 4410 }, { "epoch": 0.32, "grad_norm": 1.3863799945928819, "learning_rate": 8.010616796171112e-06, "loss": 0.1874, "step": 4411 }, { "epoch": 0.32, "grad_norm": 1.2195654185749925, "learning_rate": 8.009691841775864e-06, "loss": 0.1855, "step": 4412 }, { "epoch": 0.32, "grad_norm": 1.3872269768531935, "learning_rate": 8.008766725832313e-06, "loss": 0.2477, "step": 4413 }, { "epoch": 0.32, "grad_norm": 1.4138598883588593, "learning_rate": 8.007841448390116e-06, "loss": 0.2256, "step": 4414 }, { "epoch": 0.32, "grad_norm": 1.242908211430397, "learning_rate": 8.006916009498943e-06, "loss": 0.1791, "step": 4415 }, { "epoch": 0.32, "grad_norm": 1.4630881752965528, "learning_rate": 8.005990409208463e-06, "loss": 0.215, "step": 4416 }, { "epoch": 0.32, "grad_norm": 1.235703887416425, "learning_rate": 8.005064647568359e-06, "loss": 0.1845, "step": 4417 }, { "epoch": 0.32, "grad_norm": 1.4957633828146948, "learning_rate": 8.004138724628324e-06, "loss": 0.2361, "step": 4418 }, { "epoch": 0.32, "grad_norm": 1.1913757851587805, "learning_rate": 8.003212640438057e-06, "loss": 0.157, "step": 4419 }, { "epoch": 0.32, "grad_norm": 1.2943225304644, "learning_rate": 8.002286395047267e-06, "loss": 0.1635, "step": 4420 }, { "epoch": 0.32, "grad_norm": 1.6563952861503435, "learning_rate": 8.00135998850567e-06, "loss": 0.1996, "step": 4421 }, { "epoch": 0.32, "grad_norm": 1.6545270968008416, "learning_rate": 8.000433420862995e-06, "loss": 0.1775, "step": 4422 }, { "epoch": 0.32, "grad_norm": 1.6186880325413322, "learning_rate": 7.99950669216897e-06, "loss": 0.2464, "step": 4423 }, { "epoch": 0.32, "grad_norm": 1.5681421963143796, "learning_rate": 7.998579802473345e-06, "loss": 0.224, "step": 4424 }, { "epoch": 0.32, "grad_norm": 1.2641915288411145, "learning_rate": 7.997652751825868e-06, "loss": 0.2296, "step": 4425 }, { "epoch": 0.32, "grad_norm": 1.5499752972234897, "learning_rate": 7.9967255402763e-06, "loss": 0.2158, "step": 4426 }, { "epoch": 0.32, "grad_norm": 1.3982138383879292, "learning_rate": 7.995798167874412e-06, "loss": 0.2203, "step": 4427 }, { "epoch": 0.32, "grad_norm": 1.4403325618617224, "learning_rate": 7.994870634669978e-06, "loss": 0.2534, "step": 4428 }, { "epoch": 0.32, "grad_norm": 1.4993248958797845, "learning_rate": 7.993942940712789e-06, "loss": 0.2509, "step": 4429 }, { "epoch": 0.32, "grad_norm": 1.5977269404897865, "learning_rate": 7.993015086052634e-06, "loss": 0.2619, "step": 4430 }, { "epoch": 0.32, "grad_norm": 1.3053874858221115, "learning_rate": 7.99208707073932e-06, "loss": 0.2075, "step": 4431 }, { "epoch": 0.32, "grad_norm": 3.9933211059830316, "learning_rate": 7.991158894822662e-06, "loss": 0.5616, "step": 4432 }, { "epoch": 0.32, "grad_norm": 5.025397648166224, "learning_rate": 7.990230558352476e-06, "loss": 0.5474, "step": 4433 }, { "epoch": 0.32, "grad_norm": 1.3890792692972895, "learning_rate": 7.989302061378593e-06, "loss": 0.2156, "step": 4434 }, { "epoch": 0.32, "grad_norm": 1.5062850658802256, "learning_rate": 7.98837340395085e-06, "loss": 0.2149, "step": 4435 }, { "epoch": 0.32, "grad_norm": 1.4609356968777376, "learning_rate": 7.987444586119098e-06, "loss": 0.1916, "step": 4436 }, { "epoch": 0.32, "grad_norm": 1.2760646227945385, "learning_rate": 7.986515607933186e-06, "loss": 0.1852, "step": 4437 }, { "epoch": 0.32, "grad_norm": 1.3883782457986629, "learning_rate": 7.985586469442983e-06, "loss": 0.2497, "step": 4438 }, { "epoch": 0.32, "grad_norm": 1.3294698931618316, "learning_rate": 7.984657170698358e-06, "loss": 0.1991, "step": 4439 }, { "epoch": 0.32, "grad_norm": 1.2418933256847227, "learning_rate": 7.983727711749194e-06, "loss": 0.1883, "step": 4440 }, { "epoch": 0.32, "grad_norm": 1.3218579735000666, "learning_rate": 7.982798092645381e-06, "loss": 0.2025, "step": 4441 }, { "epoch": 0.32, "grad_norm": 1.3669601194605174, "learning_rate": 7.981868313436815e-06, "loss": 0.2249, "step": 4442 }, { "epoch": 0.32, "grad_norm": 1.6158819205955401, "learning_rate": 7.980938374173405e-06, "loss": 0.2049, "step": 4443 }, { "epoch": 0.32, "grad_norm": 1.178648280313349, "learning_rate": 7.980008274905067e-06, "loss": 0.1892, "step": 4444 }, { "epoch": 0.32, "grad_norm": 1.5124556239064182, "learning_rate": 7.979078015681723e-06, "loss": 0.1968, "step": 4445 }, { "epoch": 0.32, "grad_norm": 6.060394445214173, "learning_rate": 7.978147596553305e-06, "loss": 0.6798, "step": 4446 }, { "epoch": 0.32, "grad_norm": 1.2979858984801371, "learning_rate": 7.977217017569756e-06, "loss": 0.1698, "step": 4447 }, { "epoch": 0.32, "grad_norm": 1.4453727436678074, "learning_rate": 7.976286278781027e-06, "loss": 0.2332, "step": 4448 }, { "epoch": 0.32, "grad_norm": 1.2491791254477551, "learning_rate": 7.975355380237073e-06, "loss": 0.2006, "step": 4449 }, { "epoch": 0.32, "grad_norm": 4.552121570871094, "learning_rate": 7.974424321987864e-06, "loss": 0.4388, "step": 4450 }, { "epoch": 0.32, "grad_norm": 1.1382705219898712, "learning_rate": 7.973493104083373e-06, "loss": 0.1635, "step": 4451 }, { "epoch": 0.32, "grad_norm": 1.344768779122071, "learning_rate": 7.972561726573584e-06, "loss": 0.2652, "step": 4452 }, { "epoch": 0.32, "grad_norm": 7.906376285964811, "learning_rate": 7.971630189508494e-06, "loss": 0.6874, "step": 4453 }, { "epoch": 0.32, "grad_norm": 1.3952139362505775, "learning_rate": 7.970698492938099e-06, "loss": 0.2032, "step": 4454 }, { "epoch": 0.32, "grad_norm": 1.5678841917122002, "learning_rate": 7.969766636912411e-06, "loss": 0.2017, "step": 4455 }, { "epoch": 0.32, "grad_norm": 1.3956741388414295, "learning_rate": 7.96883462148145e-06, "loss": 0.2545, "step": 4456 }, { "epoch": 0.32, "grad_norm": 1.4088872109774793, "learning_rate": 7.96790244669524e-06, "loss": 0.2637, "step": 4457 }, { "epoch": 0.32, "grad_norm": 1.3093415022609916, "learning_rate": 7.966970112603816e-06, "loss": 0.2268, "step": 4458 }, { "epoch": 0.32, "grad_norm": 1.295417617695109, "learning_rate": 7.966037619257225e-06, "loss": 0.1831, "step": 4459 }, { "epoch": 0.32, "grad_norm": 1.1115885543034163, "learning_rate": 7.965104966705518e-06, "loss": 0.1778, "step": 4460 }, { "epoch": 0.32, "grad_norm": 1.4145462216345506, "learning_rate": 7.964172154998755e-06, "loss": 0.2022, "step": 4461 }, { "epoch": 0.32, "grad_norm": 1.4518580632109828, "learning_rate": 7.963239184187008e-06, "loss": 0.2346, "step": 4462 }, { "epoch": 0.32, "grad_norm": 1.364393664812921, "learning_rate": 7.962306054320353e-06, "loss": 0.2088, "step": 4463 }, { "epoch": 0.32, "grad_norm": 1.3584626301828335, "learning_rate": 7.96137276544888e-06, "loss": 0.235, "step": 4464 }, { "epoch": 0.32, "grad_norm": 1.4934977840503671, "learning_rate": 7.960439317622679e-06, "loss": 0.1944, "step": 4465 }, { "epoch": 0.32, "grad_norm": 1.3521829875729687, "learning_rate": 7.959505710891857e-06, "loss": 0.2037, "step": 4466 }, { "epoch": 0.32, "grad_norm": 1.2833183485163702, "learning_rate": 7.958571945306526e-06, "loss": 0.1858, "step": 4467 }, { "epoch": 0.32, "grad_norm": 1.4047627421962094, "learning_rate": 7.957638020916807e-06, "loss": 0.222, "step": 4468 }, { "epoch": 0.32, "grad_norm": 1.391299484235729, "learning_rate": 7.95670393777283e-06, "loss": 0.1888, "step": 4469 }, { "epoch": 0.32, "grad_norm": 6.419979642048531, "learning_rate": 7.95576969592473e-06, "loss": 0.6293, "step": 4470 }, { "epoch": 0.32, "grad_norm": 1.623453409076198, "learning_rate": 7.954835295422659e-06, "loss": 0.276, "step": 4471 }, { "epoch": 0.32, "grad_norm": 1.4137906497530288, "learning_rate": 7.953900736316766e-06, "loss": 0.1908, "step": 4472 }, { "epoch": 0.32, "grad_norm": 1.3033307833561416, "learning_rate": 7.952966018657217e-06, "loss": 0.1881, "step": 4473 }, { "epoch": 0.32, "grad_norm": 1.2493001610096695, "learning_rate": 7.952031142494184e-06, "loss": 0.2532, "step": 4474 }, { "epoch": 0.32, "grad_norm": 1.2457485934423147, "learning_rate": 7.951096107877845e-06, "loss": 0.2027, "step": 4475 }, { "epoch": 0.32, "grad_norm": 1.3660009618879005, "learning_rate": 7.950160914858392e-06, "loss": 0.215, "step": 4476 }, { "epoch": 0.32, "grad_norm": 1.2726755001039265, "learning_rate": 7.949225563486022e-06, "loss": 0.2347, "step": 4477 }, { "epoch": 0.32, "grad_norm": 1.3641903138221871, "learning_rate": 7.94829005381094e-06, "loss": 0.2108, "step": 4478 }, { "epoch": 0.32, "grad_norm": 1.3290355546439336, "learning_rate": 7.947354385883363e-06, "loss": 0.2056, "step": 4479 }, { "epoch": 0.32, "grad_norm": 1.3146752574988225, "learning_rate": 7.946418559753509e-06, "loss": 0.1862, "step": 4480 }, { "epoch": 0.32, "grad_norm": 32.4022285563669, "learning_rate": 7.945482575471614e-06, "loss": 0.6941, "step": 4481 }, { "epoch": 0.32, "grad_norm": 1.281183678234258, "learning_rate": 7.944546433087915e-06, "loss": 0.1883, "step": 4482 }, { "epoch": 0.32, "grad_norm": 1.3384358782648709, "learning_rate": 7.943610132652661e-06, "loss": 0.2398, "step": 4483 }, { "epoch": 0.32, "grad_norm": 1.2019106210608, "learning_rate": 7.94267367421611e-06, "loss": 0.1882, "step": 4484 }, { "epoch": 0.32, "grad_norm": 1.5677833275684363, "learning_rate": 7.941737057828528e-06, "loss": 0.2568, "step": 4485 }, { "epoch": 0.32, "grad_norm": 1.3380521824873923, "learning_rate": 7.940800283540187e-06, "loss": 0.2321, "step": 4486 }, { "epoch": 0.32, "grad_norm": 1.293598817422054, "learning_rate": 7.93986335140137e-06, "loss": 0.2249, "step": 4487 }, { "epoch": 0.32, "grad_norm": 1.3043607263182484, "learning_rate": 7.938926261462366e-06, "loss": 0.2352, "step": 4488 }, { "epoch": 0.32, "grad_norm": 1.452477829001722, "learning_rate": 7.937989013773478e-06, "loss": 0.2357, "step": 4489 }, { "epoch": 0.32, "grad_norm": 1.2167689531023806, "learning_rate": 7.93705160838501e-06, "loss": 0.2094, "step": 4490 }, { "epoch": 0.32, "grad_norm": 6.145762137923544, "learning_rate": 7.936114045347283e-06, "loss": 0.5817, "step": 4491 }, { "epoch": 0.32, "grad_norm": 1.4686505885351229, "learning_rate": 7.935176324710617e-06, "loss": 0.2028, "step": 4492 }, { "epoch": 0.32, "grad_norm": 1.342718985548421, "learning_rate": 7.934238446525346e-06, "loss": 0.1552, "step": 4493 }, { "epoch": 0.32, "grad_norm": 1.3295684792728466, "learning_rate": 7.933300410841812e-06, "loss": 0.2243, "step": 4494 }, { "epoch": 0.32, "grad_norm": 8.53041440632651, "learning_rate": 7.932362217710366e-06, "loss": 0.754, "step": 4495 }, { "epoch": 0.32, "grad_norm": 5.386632453836793, "learning_rate": 7.931423867181365e-06, "loss": 0.6938, "step": 4496 }, { "epoch": 0.32, "grad_norm": 1.2008989402072303, "learning_rate": 7.93048535930518e-06, "loss": 0.1883, "step": 4497 }, { "epoch": 0.32, "grad_norm": 1.375885058889189, "learning_rate": 7.929546694132179e-06, "loss": 0.2413, "step": 4498 }, { "epoch": 0.32, "grad_norm": 4.5219274529473505, "learning_rate": 7.928607871712753e-06, "loss": 0.6843, "step": 4499 }, { "epoch": 0.32, "grad_norm": 1.319016846670825, "learning_rate": 7.927668892097288e-06, "loss": 0.1827, "step": 4500 }, { "epoch": 0.32, "grad_norm": 1.3139024377211619, "learning_rate": 7.92672975533619e-06, "loss": 0.2161, "step": 4501 }, { "epoch": 0.32, "grad_norm": 4.628506686462941, "learning_rate": 7.925790461479866e-06, "loss": 0.5458, "step": 4502 }, { "epoch": 0.32, "grad_norm": 1.5399219850173202, "learning_rate": 7.924851010578734e-06, "loss": 0.2258, "step": 4503 }, { "epoch": 0.32, "grad_norm": 1.2731638508450254, "learning_rate": 7.92391140268322e-06, "loss": 0.187, "step": 4504 }, { "epoch": 0.32, "grad_norm": 6.7069782511242755, "learning_rate": 7.922971637843755e-06, "loss": 0.5683, "step": 4505 }, { "epoch": 0.32, "grad_norm": 1.5213544284024967, "learning_rate": 7.922031716110788e-06, "loss": 0.1985, "step": 4506 }, { "epoch": 0.32, "grad_norm": 7.547754680437872, "learning_rate": 7.921091637534765e-06, "loss": 0.6847, "step": 4507 }, { "epoch": 0.32, "grad_norm": 5.657259229431092, "learning_rate": 7.920151402166148e-06, "loss": 0.6026, "step": 4508 }, { "epoch": 0.32, "grad_norm": 5.944326504417041, "learning_rate": 7.919211010055406e-06, "loss": 0.7102, "step": 4509 }, { "epoch": 0.32, "grad_norm": 1.2567871898564404, "learning_rate": 7.918270461253014e-06, "loss": 0.1745, "step": 4510 }, { "epoch": 0.32, "grad_norm": 1.290050534445731, "learning_rate": 7.917329755809458e-06, "loss": 0.1733, "step": 4511 }, { "epoch": 0.32, "grad_norm": 1.5044430412753775, "learning_rate": 7.916388893775229e-06, "loss": 0.2878, "step": 4512 }, { "epoch": 0.32, "grad_norm": 1.599422334813816, "learning_rate": 7.91544787520083e-06, "loss": 0.2655, "step": 4513 }, { "epoch": 0.32, "grad_norm": 1.2565495235343223, "learning_rate": 7.914506700136775e-06, "loss": 0.2354, "step": 4514 }, { "epoch": 0.32, "grad_norm": 1.3105938408365039, "learning_rate": 7.913565368633576e-06, "loss": 0.1971, "step": 4515 }, { "epoch": 0.32, "grad_norm": 1.3045700943891678, "learning_rate": 7.912623880741762e-06, "loss": 0.1938, "step": 4516 }, { "epoch": 0.32, "grad_norm": 1.331556175888798, "learning_rate": 7.911682236511873e-06, "loss": 0.2158, "step": 4517 }, { "epoch": 0.32, "grad_norm": 1.4155291192537862, "learning_rate": 7.910740435994446e-06, "loss": 0.2161, "step": 4518 }, { "epoch": 0.32, "grad_norm": 1.309813488037048, "learning_rate": 7.909798479240038e-06, "loss": 0.2254, "step": 4519 }, { "epoch": 0.32, "grad_norm": 1.3126323049373394, "learning_rate": 7.908856366299206e-06, "loss": 0.1611, "step": 4520 }, { "epoch": 0.32, "grad_norm": 6.34334380472764, "learning_rate": 7.907914097222522e-06, "loss": 0.4557, "step": 4521 }, { "epoch": 0.32, "grad_norm": 1.2643675430235892, "learning_rate": 7.906971672060562e-06, "loss": 0.1811, "step": 4522 }, { "epoch": 0.32, "grad_norm": 1.3109301655779633, "learning_rate": 7.906029090863912e-06, "loss": 0.1686, "step": 4523 }, { "epoch": 0.32, "grad_norm": 1.3316416981307055, "learning_rate": 7.905086353683162e-06, "loss": 0.1974, "step": 4524 }, { "epoch": 0.32, "grad_norm": 1.2334280157535553, "learning_rate": 7.90414346056892e-06, "loss": 0.1724, "step": 4525 }, { "epoch": 0.32, "grad_norm": 1.386982235604832, "learning_rate": 7.903200411571795e-06, "loss": 0.1995, "step": 4526 }, { "epoch": 0.32, "grad_norm": 1.5810861345088132, "learning_rate": 7.902257206742405e-06, "loss": 0.2221, "step": 4527 }, { "epoch": 0.32, "grad_norm": 1.4632766554285028, "learning_rate": 7.90131384613138e-06, "loss": 0.2758, "step": 4528 }, { "epoch": 0.32, "grad_norm": 1.2969663486403764, "learning_rate": 7.900370329789352e-06, "loss": 0.1631, "step": 4529 }, { "epoch": 0.32, "grad_norm": 1.361265045295215, "learning_rate": 7.899426657766966e-06, "loss": 0.2102, "step": 4530 }, { "epoch": 0.32, "grad_norm": 1.3465527966386448, "learning_rate": 7.898482830114878e-06, "loss": 0.2478, "step": 4531 }, { "epoch": 0.32, "grad_norm": 6.586004046349359, "learning_rate": 7.897538846883748e-06, "loss": 0.7201, "step": 4532 }, { "epoch": 0.32, "grad_norm": 1.435943251686969, "learning_rate": 7.896594708124242e-06, "loss": 0.2208, "step": 4533 }, { "epoch": 0.32, "grad_norm": 1.4133963762648394, "learning_rate": 7.89565041388704e-06, "loss": 0.1993, "step": 4534 }, { "epoch": 0.32, "grad_norm": 1.533070930780637, "learning_rate": 7.894705964222826e-06, "loss": 0.2286, "step": 4535 }, { "epoch": 0.32, "grad_norm": 1.3409986235406122, "learning_rate": 7.893761359182297e-06, "loss": 0.1788, "step": 4536 }, { "epoch": 0.32, "grad_norm": 11.048763170674597, "learning_rate": 7.892816598816155e-06, "loss": 0.5688, "step": 4537 }, { "epoch": 0.32, "grad_norm": 1.2466152956002763, "learning_rate": 7.891871683175112e-06, "loss": 0.1801, "step": 4538 }, { "epoch": 0.32, "grad_norm": 1.2224506125160157, "learning_rate": 7.890926612309884e-06, "loss": 0.1761, "step": 4539 }, { "epoch": 0.32, "grad_norm": 1.6167614418535203, "learning_rate": 7.889981386271202e-06, "loss": 0.2408, "step": 4540 }, { "epoch": 0.32, "grad_norm": 1.3278833914792667, "learning_rate": 7.889036005109798e-06, "loss": 0.2077, "step": 4541 }, { "epoch": 0.32, "grad_norm": 1.5177657304852616, "learning_rate": 7.888090468876422e-06, "loss": 0.2343, "step": 4542 }, { "epoch": 0.32, "grad_norm": 1.4306884377432565, "learning_rate": 7.88714477762182e-06, "loss": 0.2323, "step": 4543 }, { "epoch": 0.33, "grad_norm": 1.2736924401106264, "learning_rate": 7.886198931396758e-06, "loss": 0.1468, "step": 4544 }, { "epoch": 0.33, "grad_norm": 1.3046940231608952, "learning_rate": 7.885252930252002e-06, "loss": 0.2075, "step": 4545 }, { "epoch": 0.33, "grad_norm": 1.3195738101274577, "learning_rate": 7.884306774238336e-06, "loss": 0.1682, "step": 4546 }, { "epoch": 0.33, "grad_norm": 1.4643083128801333, "learning_rate": 7.883360463406535e-06, "loss": 0.2307, "step": 4547 }, { "epoch": 0.33, "grad_norm": 1.3938845011705756, "learning_rate": 7.882413997807404e-06, "loss": 0.2056, "step": 4548 }, { "epoch": 0.33, "grad_norm": 5.979216308183173, "learning_rate": 7.881467377491738e-06, "loss": 0.5259, "step": 4549 }, { "epoch": 0.33, "grad_norm": 1.3866044063845284, "learning_rate": 7.880520602510353e-06, "loss": 0.1798, "step": 4550 }, { "epoch": 0.33, "grad_norm": 1.5197792969642114, "learning_rate": 7.879573672914062e-06, "loss": 0.2407, "step": 4551 }, { "epoch": 0.33, "grad_norm": 1.4897730321258797, "learning_rate": 7.8786265887537e-06, "loss": 0.2742, "step": 4552 }, { "epoch": 0.33, "grad_norm": 1.420884554330261, "learning_rate": 7.877679350080096e-06, "loss": 0.1812, "step": 4553 }, { "epoch": 0.33, "grad_norm": 1.4602262454738961, "learning_rate": 7.876731956944098e-06, "loss": 0.233, "step": 4554 }, { "epoch": 0.33, "grad_norm": 1.4039803370357302, "learning_rate": 7.87578440939656e-06, "loss": 0.2128, "step": 4555 }, { "epoch": 0.33, "grad_norm": 1.5613483760983986, "learning_rate": 7.874836707488336e-06, "loss": 0.2309, "step": 4556 }, { "epoch": 0.33, "grad_norm": 4.964179051533017, "learning_rate": 7.8738888512703e-06, "loss": 0.6407, "step": 4557 }, { "epoch": 0.33, "grad_norm": 1.2420390396839507, "learning_rate": 7.872940840793328e-06, "loss": 0.1863, "step": 4558 }, { "epoch": 0.33, "grad_norm": 1.3584788758460387, "learning_rate": 7.871992676108305e-06, "loss": 0.2115, "step": 4559 }, { "epoch": 0.33, "grad_norm": 1.2545314082817982, "learning_rate": 7.871044357266124e-06, "loss": 0.1917, "step": 4560 }, { "epoch": 0.33, "grad_norm": 6.016830104946275, "learning_rate": 7.87009588431769e-06, "loss": 0.6992, "step": 4561 }, { "epoch": 0.33, "grad_norm": 1.3867711001481327, "learning_rate": 7.869147257313909e-06, "loss": 0.205, "step": 4562 }, { "epoch": 0.33, "grad_norm": 1.3366054887906549, "learning_rate": 7.868198476305705e-06, "loss": 0.212, "step": 4563 }, { "epoch": 0.33, "grad_norm": 1.313308469373452, "learning_rate": 7.867249541343997e-06, "loss": 0.1852, "step": 4564 }, { "epoch": 0.33, "grad_norm": 1.4770904645777039, "learning_rate": 7.866300452479728e-06, "loss": 0.2484, "step": 4565 }, { "epoch": 0.33, "grad_norm": 1.2539209550778694, "learning_rate": 7.865351209763837e-06, "loss": 0.1497, "step": 4566 }, { "epoch": 0.33, "grad_norm": 1.5019930504895755, "learning_rate": 7.864401813247276e-06, "loss": 0.1874, "step": 4567 }, { "epoch": 0.33, "grad_norm": 6.836866351635279, "learning_rate": 7.863452262981006e-06, "loss": 0.6518, "step": 4568 }, { "epoch": 0.33, "grad_norm": 1.3652888718031775, "learning_rate": 7.862502559015994e-06, "loss": 0.2377, "step": 4569 }, { "epoch": 0.33, "grad_norm": 1.324201279005588, "learning_rate": 7.861552701403218e-06, "loss": 0.1833, "step": 4570 }, { "epoch": 0.33, "grad_norm": 1.2785338879288153, "learning_rate": 7.86060269019366e-06, "loss": 0.2494, "step": 4571 }, { "epoch": 0.33, "grad_norm": 1.3962329492726893, "learning_rate": 7.859652525438314e-06, "loss": 0.1974, "step": 4572 }, { "epoch": 0.33, "grad_norm": 1.4065724187551483, "learning_rate": 7.858702207188183e-06, "loss": 0.2068, "step": 4573 }, { "epoch": 0.33, "grad_norm": 7.4585713462910705, "learning_rate": 7.857751735494272e-06, "loss": 0.6454, "step": 4574 }, { "epoch": 0.33, "grad_norm": 1.630148422752979, "learning_rate": 7.856801110407602e-06, "loss": 0.2458, "step": 4575 }, { "epoch": 0.33, "grad_norm": 1.4243636421074484, "learning_rate": 7.855850331979199e-06, "loss": 0.2192, "step": 4576 }, { "epoch": 0.33, "grad_norm": 1.4326285195979387, "learning_rate": 7.854899400260094e-06, "loss": 0.1993, "step": 4577 }, { "epoch": 0.33, "grad_norm": 1.3265202734027455, "learning_rate": 7.853948315301334e-06, "loss": 0.2432, "step": 4578 }, { "epoch": 0.33, "grad_norm": 1.2278220732696887, "learning_rate": 7.852997077153964e-06, "loss": 0.1889, "step": 4579 }, { "epoch": 0.33, "grad_norm": 1.4881727868391827, "learning_rate": 7.852045685869046e-06, "loss": 0.2092, "step": 4580 }, { "epoch": 0.33, "grad_norm": 1.5693306002705179, "learning_rate": 7.851094141497645e-06, "loss": 0.189, "step": 4581 }, { "epoch": 0.33, "grad_norm": 1.502471023510673, "learning_rate": 7.85014244409084e-06, "loss": 0.2488, "step": 4582 }, { "epoch": 0.33, "grad_norm": 1.280772540551916, "learning_rate": 7.84919059369971e-06, "loss": 0.2087, "step": 4583 }, { "epoch": 0.33, "grad_norm": 5.220497804121692, "learning_rate": 7.848238590375348e-06, "loss": 0.6535, "step": 4584 }, { "epoch": 0.33, "grad_norm": 1.4399212367337306, "learning_rate": 7.847286434168853e-06, "loss": 0.2293, "step": 4585 }, { "epoch": 0.33, "grad_norm": 1.479232273595728, "learning_rate": 7.846334125131336e-06, "loss": 0.2363, "step": 4586 }, { "epoch": 0.33, "grad_norm": 1.2934676487345758, "learning_rate": 7.84538166331391e-06, "loss": 0.1556, "step": 4587 }, { "epoch": 0.33, "grad_norm": 1.275788358162262, "learning_rate": 7.844429048767702e-06, "loss": 0.1907, "step": 4588 }, { "epoch": 0.33, "grad_norm": 1.22902838957051, "learning_rate": 7.843476281543841e-06, "loss": 0.1996, "step": 4589 }, { "epoch": 0.33, "grad_norm": 1.105408356676575, "learning_rate": 7.84252336169347e-06, "loss": 0.1606, "step": 4590 }, { "epoch": 0.33, "grad_norm": 7.337862630638302, "learning_rate": 7.841570289267738e-06, "loss": 0.4815, "step": 4591 }, { "epoch": 0.33, "grad_norm": 1.276848463714875, "learning_rate": 7.8406170643178e-06, "loss": 0.2029, "step": 4592 }, { "epoch": 0.33, "grad_norm": 7.153834564616883, "learning_rate": 7.839663686894825e-06, "loss": 0.7063, "step": 4593 }, { "epoch": 0.33, "grad_norm": 1.432277600197333, "learning_rate": 7.838710157049985e-06, "loss": 0.1743, "step": 4594 }, { "epoch": 0.33, "grad_norm": 1.3261365291246432, "learning_rate": 7.83775647483446e-06, "loss": 0.1693, "step": 4595 }, { "epoch": 0.33, "grad_norm": 1.3728823721554628, "learning_rate": 7.836802640299442e-06, "loss": 0.2287, "step": 4596 }, { "epoch": 0.33, "grad_norm": 1.3941663022899486, "learning_rate": 7.835848653496129e-06, "loss": 0.2308, "step": 4597 }, { "epoch": 0.33, "grad_norm": 1.2712959913348558, "learning_rate": 7.834894514475725e-06, "loss": 0.1847, "step": 4598 }, { "epoch": 0.33, "grad_norm": 1.1684241777978779, "learning_rate": 7.833940223289448e-06, "loss": 0.1804, "step": 4599 }, { "epoch": 0.33, "grad_norm": 1.3654626862260337, "learning_rate": 7.832985779988518e-06, "loss": 0.1906, "step": 4600 }, { "epoch": 0.33, "grad_norm": 1.4105204397528188, "learning_rate": 7.832031184624165e-06, "loss": 0.2074, "step": 4601 }, { "epoch": 0.33, "grad_norm": 1.1032821708609535, "learning_rate": 7.83107643724763e-06, "loss": 0.1545, "step": 4602 }, { "epoch": 0.33, "grad_norm": 1.3644410751507485, "learning_rate": 7.830121537910158e-06, "loss": 0.2136, "step": 4603 }, { "epoch": 0.33, "grad_norm": 1.4604238950197626, "learning_rate": 7.829166486663005e-06, "loss": 0.2198, "step": 4604 }, { "epoch": 0.33, "grad_norm": 1.2273127316468662, "learning_rate": 7.828211283557436e-06, "loss": 0.1811, "step": 4605 }, { "epoch": 0.33, "grad_norm": 1.3262955536888117, "learning_rate": 7.827255928644721e-06, "loss": 0.2259, "step": 4606 }, { "epoch": 0.33, "grad_norm": 1.1904641910498672, "learning_rate": 7.82630042197614e-06, "loss": 0.1425, "step": 4607 }, { "epoch": 0.33, "grad_norm": 1.4375485519569122, "learning_rate": 7.825344763602982e-06, "loss": 0.2344, "step": 4608 }, { "epoch": 0.33, "grad_norm": 1.419910912165066, "learning_rate": 7.824388953576539e-06, "loss": 0.2269, "step": 4609 }, { "epoch": 0.33, "grad_norm": 1.4266323744991785, "learning_rate": 7.823432991948119e-06, "loss": 0.2204, "step": 4610 }, { "epoch": 0.33, "grad_norm": 7.4004901939796905, "learning_rate": 7.822476878769032e-06, "loss": 0.641, "step": 4611 }, { "epoch": 0.33, "grad_norm": 5.096382652051679, "learning_rate": 7.8215206140906e-06, "loss": 0.7195, "step": 4612 }, { "epoch": 0.33, "grad_norm": 1.4505301139004232, "learning_rate": 7.82056419796415e-06, "loss": 0.2221, "step": 4613 }, { "epoch": 0.33, "grad_norm": 1.4106869332709613, "learning_rate": 7.81960763044102e-06, "loss": 0.2102, "step": 4614 }, { "epoch": 0.33, "grad_norm": 1.3495953342102984, "learning_rate": 7.818650911572553e-06, "loss": 0.1709, "step": 4615 }, { "epoch": 0.33, "grad_norm": 1.4223874091694961, "learning_rate": 7.817694041410106e-06, "loss": 0.2692, "step": 4616 }, { "epoch": 0.33, "grad_norm": 1.120940129272454, "learning_rate": 7.816737020005035e-06, "loss": 0.1582, "step": 4617 }, { "epoch": 0.33, "grad_norm": 1.4745310663883042, "learning_rate": 7.815779847408711e-06, "loss": 0.218, "step": 4618 }, { "epoch": 0.33, "grad_norm": 1.3128387347753592, "learning_rate": 7.81482252367251e-06, "loss": 0.2088, "step": 4619 }, { "epoch": 0.33, "grad_norm": 1.2424369624303544, "learning_rate": 7.81386504884782e-06, "loss": 0.1734, "step": 4620 }, { "epoch": 0.33, "grad_norm": 1.411049224954434, "learning_rate": 7.812907422986033e-06, "loss": 0.1643, "step": 4621 }, { "epoch": 0.33, "grad_norm": 1.2902778313369658, "learning_rate": 7.81194964613855e-06, "loss": 0.2288, "step": 4622 }, { "epoch": 0.33, "grad_norm": 1.3890905780208214, "learning_rate": 7.810991718356781e-06, "loss": 0.2134, "step": 4623 }, { "epoch": 0.33, "grad_norm": 1.3872279940987733, "learning_rate": 7.810033639692147e-06, "loss": 0.2421, "step": 4624 }, { "epoch": 0.33, "grad_norm": 1.4755240719102296, "learning_rate": 7.809075410196067e-06, "loss": 0.2016, "step": 4625 }, { "epoch": 0.33, "grad_norm": 1.2844593142517982, "learning_rate": 7.80811702991998e-06, "loss": 0.2184, "step": 4626 }, { "epoch": 0.33, "grad_norm": 1.188589907562974, "learning_rate": 7.807158498915329e-06, "loss": 0.1861, "step": 4627 }, { "epoch": 0.33, "grad_norm": 1.3521513491386523, "learning_rate": 7.80619981723356e-06, "loss": 0.2325, "step": 4628 }, { "epoch": 0.33, "grad_norm": 1.2807056943208444, "learning_rate": 7.805240984926134e-06, "loss": 0.2034, "step": 4629 }, { "epoch": 0.33, "grad_norm": 1.6474080972030807, "learning_rate": 7.804282002044517e-06, "loss": 0.2607, "step": 4630 }, { "epoch": 0.33, "grad_norm": 1.5412601689299397, "learning_rate": 7.803322868640182e-06, "loss": 0.2523, "step": 4631 }, { "epoch": 0.33, "grad_norm": 1.822059582538499, "learning_rate": 7.802363584764613e-06, "loss": 0.2321, "step": 4632 }, { "epoch": 0.33, "grad_norm": 1.3025015177536587, "learning_rate": 7.8014041504693e-06, "loss": 0.1839, "step": 4633 }, { "epoch": 0.33, "grad_norm": 1.3294454676960608, "learning_rate": 7.80044456580574e-06, "loss": 0.1626, "step": 4634 }, { "epoch": 0.33, "grad_norm": 1.347201365237797, "learning_rate": 7.799484830825443e-06, "loss": 0.2122, "step": 4635 }, { "epoch": 0.33, "grad_norm": 4.91421616385361, "learning_rate": 7.798524945579922e-06, "loss": 0.4876, "step": 4636 }, { "epoch": 0.33, "grad_norm": 1.346536420197388, "learning_rate": 7.7975649101207e-06, "loss": 0.2262, "step": 4637 }, { "epoch": 0.33, "grad_norm": 1.3037138629132499, "learning_rate": 7.796604724499306e-06, "loss": 0.1921, "step": 4638 }, { "epoch": 0.33, "grad_norm": 1.3549954527577046, "learning_rate": 7.795644388767283e-06, "loss": 0.2078, "step": 4639 }, { "epoch": 0.33, "grad_norm": 1.5127267773769109, "learning_rate": 7.794683902976175e-06, "loss": 0.2413, "step": 4640 }, { "epoch": 0.33, "grad_norm": 1.4248220359700412, "learning_rate": 7.793723267177538e-06, "loss": 0.2114, "step": 4641 }, { "epoch": 0.33, "grad_norm": 6.277380730332711, "learning_rate": 7.792762481422933e-06, "loss": 0.5926, "step": 4642 }, { "epoch": 0.33, "grad_norm": 4.44824350460334, "learning_rate": 7.791801545763934e-06, "loss": 0.7087, "step": 4643 }, { "epoch": 0.33, "grad_norm": 1.495388866003692, "learning_rate": 7.790840460252121e-06, "loss": 0.2244, "step": 4644 }, { "epoch": 0.33, "grad_norm": 1.4371861560000003, "learning_rate": 7.789879224939078e-06, "loss": 0.246, "step": 4645 }, { "epoch": 0.33, "grad_norm": 1.3935489834158172, "learning_rate": 7.788917839876402e-06, "loss": 0.2361, "step": 4646 }, { "epoch": 0.33, "grad_norm": 1.416779449322759, "learning_rate": 7.787956305115696e-06, "loss": 0.1843, "step": 4647 }, { "epoch": 0.33, "grad_norm": 1.4125387282742232, "learning_rate": 7.786994620708572e-06, "loss": 0.2139, "step": 4648 }, { "epoch": 0.33, "grad_norm": 1.1033852742707633, "learning_rate": 7.786032786706648e-06, "loss": 0.1402, "step": 4649 }, { "epoch": 0.33, "grad_norm": 1.3597511933142212, "learning_rate": 7.785070803161552e-06, "loss": 0.1829, "step": 4650 }, { "epoch": 0.33, "grad_norm": 1.1907186733056092, "learning_rate": 7.784108670124921e-06, "loss": 0.1561, "step": 4651 }, { "epoch": 0.33, "grad_norm": 1.30763833288827, "learning_rate": 7.783146387648397e-06, "loss": 0.2175, "step": 4652 }, { "epoch": 0.33, "grad_norm": 1.2270376205196245, "learning_rate": 7.782183955783633e-06, "loss": 0.1577, "step": 4653 }, { "epoch": 0.33, "grad_norm": 1.5646695070806511, "learning_rate": 7.781221374582284e-06, "loss": 0.2265, "step": 4654 }, { "epoch": 0.33, "grad_norm": 1.2820686465104565, "learning_rate": 7.780258644096024e-06, "loss": 0.2394, "step": 4655 }, { "epoch": 0.33, "grad_norm": 1.4805879866501594, "learning_rate": 7.779295764376524e-06, "loss": 0.2045, "step": 4656 }, { "epoch": 0.33, "grad_norm": 1.439745454232342, "learning_rate": 7.77833273547547e-06, "loss": 0.1985, "step": 4657 }, { "epoch": 0.33, "grad_norm": 1.4349080124112894, "learning_rate": 7.77736955744455e-06, "loss": 0.237, "step": 4658 }, { "epoch": 0.33, "grad_norm": 1.2474915637670567, "learning_rate": 7.776406230335469e-06, "loss": 0.183, "step": 4659 }, { "epoch": 0.33, "grad_norm": 1.5339912123169865, "learning_rate": 7.775442754199929e-06, "loss": 0.2519, "step": 4660 }, { "epoch": 0.33, "grad_norm": 1.3489278063475347, "learning_rate": 7.77447912908965e-06, "loss": 0.2419, "step": 4661 }, { "epoch": 0.33, "grad_norm": 1.3182120957097267, "learning_rate": 7.773515355056354e-06, "loss": 0.2106, "step": 4662 }, { "epoch": 0.33, "grad_norm": 1.3401895304346232, "learning_rate": 7.772551432151771e-06, "loss": 0.1999, "step": 4663 }, { "epoch": 0.33, "grad_norm": 1.5535310494308856, "learning_rate": 7.771587360427642e-06, "loss": 0.1939, "step": 4664 }, { "epoch": 0.33, "grad_norm": 1.3822008916216864, "learning_rate": 7.770623139935716e-06, "loss": 0.2231, "step": 4665 }, { "epoch": 0.33, "grad_norm": 1.3795615224891262, "learning_rate": 7.769658770727745e-06, "loss": 0.238, "step": 4666 }, { "epoch": 0.33, "grad_norm": 1.4620561520826667, "learning_rate": 7.768694252855495e-06, "loss": 0.2123, "step": 4667 }, { "epoch": 0.33, "grad_norm": 1.3518772674923205, "learning_rate": 7.767729586370738e-06, "loss": 0.187, "step": 4668 }, { "epoch": 0.33, "grad_norm": 1.1649761230185158, "learning_rate": 7.76676477132525e-06, "loss": 0.1477, "step": 4669 }, { "epoch": 0.33, "grad_norm": 1.465256565603295, "learning_rate": 7.765799807770824e-06, "loss": 0.2208, "step": 4670 }, { "epoch": 0.33, "grad_norm": 1.386945854291886, "learning_rate": 7.764834695759251e-06, "loss": 0.2204, "step": 4671 }, { "epoch": 0.33, "grad_norm": 1.2121196514853203, "learning_rate": 7.763869435342335e-06, "loss": 0.1797, "step": 4672 }, { "epoch": 0.33, "grad_norm": 1.5221129127495716, "learning_rate": 7.762904026571889e-06, "loss": 0.2141, "step": 4673 }, { "epoch": 0.33, "grad_norm": 1.5241753922235284, "learning_rate": 7.76193846949973e-06, "loss": 0.2699, "step": 4674 }, { "epoch": 0.33, "grad_norm": 5.778930221261666, "learning_rate": 7.760972764177688e-06, "loss": 0.6438, "step": 4675 }, { "epoch": 0.33, "grad_norm": 1.5107383821748674, "learning_rate": 7.760006910657597e-06, "loss": 0.24, "step": 4676 }, { "epoch": 0.33, "grad_norm": 1.5143363602372204, "learning_rate": 7.759040908991297e-06, "loss": 0.2391, "step": 4677 }, { "epoch": 0.33, "grad_norm": 5.4099940483474835, "learning_rate": 7.758074759230645e-06, "loss": 0.7578, "step": 4678 }, { "epoch": 0.33, "grad_norm": 1.3891058934766316, "learning_rate": 7.757108461427496e-06, "loss": 0.2182, "step": 4679 }, { "epoch": 0.33, "grad_norm": 8.116495119673267, "learning_rate": 7.75614201563372e-06, "loss": 0.6878, "step": 4680 }, { "epoch": 0.33, "grad_norm": 1.3542336247691311, "learning_rate": 7.755175421901187e-06, "loss": 0.1645, "step": 4681 }, { "epoch": 0.33, "grad_norm": 1.277862209020888, "learning_rate": 7.754208680281784e-06, "loss": 0.2081, "step": 4682 }, { "epoch": 0.33, "grad_norm": 1.467077621457075, "learning_rate": 7.753241790827401e-06, "loss": 0.2276, "step": 4683 }, { "epoch": 0.34, "grad_norm": 1.4178612377713071, "learning_rate": 7.752274753589939e-06, "loss": 0.2288, "step": 4684 }, { "epoch": 0.34, "grad_norm": 1.2707158965681835, "learning_rate": 7.7513075686213e-06, "loss": 0.1798, "step": 4685 }, { "epoch": 0.34, "grad_norm": 1.4861475442673708, "learning_rate": 7.750340235973401e-06, "loss": 0.2341, "step": 4686 }, { "epoch": 0.34, "grad_norm": 1.4580276865070532, "learning_rate": 7.749372755698166e-06, "loss": 0.2155, "step": 4687 }, { "epoch": 0.34, "grad_norm": 1.313186658018327, "learning_rate": 7.748405127847525e-06, "loss": 0.1932, "step": 4688 }, { "epoch": 0.34, "grad_norm": 1.708257396340362, "learning_rate": 7.747437352473414e-06, "loss": 0.2348, "step": 4689 }, { "epoch": 0.34, "grad_norm": 1.2572847058322358, "learning_rate": 7.74646942962778e-06, "loss": 0.2156, "step": 4690 }, { "epoch": 0.34, "grad_norm": 1.3831811530875395, "learning_rate": 7.74550135936258e-06, "loss": 0.2012, "step": 4691 }, { "epoch": 0.34, "grad_norm": 1.4782713571556692, "learning_rate": 7.744533141729773e-06, "loss": 0.1928, "step": 4692 }, { "epoch": 0.34, "grad_norm": 1.6087742949685924, "learning_rate": 7.743564776781332e-06, "loss": 0.2297, "step": 4693 }, { "epoch": 0.34, "grad_norm": 1.319235132797053, "learning_rate": 7.742596264569232e-06, "loss": 0.1819, "step": 4694 }, { "epoch": 0.34, "grad_norm": 1.5307696053948605, "learning_rate": 7.74162760514546e-06, "loss": 0.206, "step": 4695 }, { "epoch": 0.34, "grad_norm": 5.290977741433551, "learning_rate": 7.74065879856201e-06, "loss": 0.6766, "step": 4696 }, { "epoch": 0.34, "grad_norm": 1.5126675736856585, "learning_rate": 7.739689844870885e-06, "loss": 0.204, "step": 4697 }, { "epoch": 0.34, "grad_norm": 1.4653002924408978, "learning_rate": 7.738720744124093e-06, "loss": 0.2388, "step": 4698 }, { "epoch": 0.34, "grad_norm": 5.90522809641348, "learning_rate": 7.737751496373652e-06, "loss": 0.7529, "step": 4699 }, { "epoch": 0.34, "grad_norm": 5.747649106380913, "learning_rate": 7.736782101671587e-06, "loss": 0.5075, "step": 4700 }, { "epoch": 0.34, "grad_norm": 1.422438147751286, "learning_rate": 7.73581256006993e-06, "loss": 0.1962, "step": 4701 }, { "epoch": 0.34, "grad_norm": 1.332620916718465, "learning_rate": 7.734842871620725e-06, "loss": 0.1919, "step": 4702 }, { "epoch": 0.34, "grad_norm": 1.5814793775956724, "learning_rate": 7.733873036376021e-06, "loss": 0.234, "step": 4703 }, { "epoch": 0.34, "grad_norm": 1.2915242772272948, "learning_rate": 7.732903054387872e-06, "loss": 0.2102, "step": 4704 }, { "epoch": 0.34, "grad_norm": 1.3007399495509682, "learning_rate": 7.731932925708343e-06, "loss": 0.1809, "step": 4705 }, { "epoch": 0.34, "grad_norm": 11.620738510847799, "learning_rate": 7.73096265038951e-06, "loss": 0.4606, "step": 4706 }, { "epoch": 0.34, "grad_norm": 1.5156201529372535, "learning_rate": 7.72999222848345e-06, "loss": 0.2464, "step": 4707 }, { "epoch": 0.34, "grad_norm": 1.5030260185893738, "learning_rate": 7.729021660042254e-06, "loss": 0.2317, "step": 4708 }, { "epoch": 0.34, "grad_norm": 1.423551542191018, "learning_rate": 7.728050945118017e-06, "loss": 0.2266, "step": 4709 }, { "epoch": 0.34, "grad_norm": 1.3631395992829514, "learning_rate": 7.727080083762844e-06, "loss": 0.2004, "step": 4710 }, { "epoch": 0.34, "grad_norm": 5.7113638193465475, "learning_rate": 7.726109076028846e-06, "loss": 0.5942, "step": 4711 }, { "epoch": 0.34, "grad_norm": 1.3866158352213205, "learning_rate": 7.72513792196814e-06, "loss": 0.1941, "step": 4712 }, { "epoch": 0.34, "grad_norm": 1.321018366837503, "learning_rate": 7.724166621632859e-06, "loss": 0.2031, "step": 4713 }, { "epoch": 0.34, "grad_norm": 1.344041606929519, "learning_rate": 7.723195175075136e-06, "loss": 0.2091, "step": 4714 }, { "epoch": 0.34, "grad_norm": 1.313191087104459, "learning_rate": 7.722223582347115e-06, "loss": 0.2147, "step": 4715 }, { "epoch": 0.34, "grad_norm": 6.181209704056137, "learning_rate": 7.721251843500948e-06, "loss": 0.5853, "step": 4716 }, { "epoch": 0.34, "grad_norm": 1.3420655405288728, "learning_rate": 7.720279958588791e-06, "loss": 0.219, "step": 4717 }, { "epoch": 0.34, "grad_norm": 1.4515739332436, "learning_rate": 7.719307927662813e-06, "loss": 0.2124, "step": 4718 }, { "epoch": 0.34, "grad_norm": 1.2212428798120076, "learning_rate": 7.71833575077519e-06, "loss": 0.1587, "step": 4719 }, { "epoch": 0.34, "grad_norm": 1.4844113039597002, "learning_rate": 7.717363427978103e-06, "loss": 0.2062, "step": 4720 }, { "epoch": 0.34, "grad_norm": 1.410865759148572, "learning_rate": 7.716390959323743e-06, "loss": 0.2498, "step": 4721 }, { "epoch": 0.34, "grad_norm": 1.2130805585552855, "learning_rate": 7.715418344864306e-06, "loss": 0.1841, "step": 4722 }, { "epoch": 0.34, "grad_norm": 1.438362118314582, "learning_rate": 7.714445584652001e-06, "loss": 0.2043, "step": 4723 }, { "epoch": 0.34, "grad_norm": 3.99252549522735, "learning_rate": 7.713472678739042e-06, "loss": 0.4975, "step": 4724 }, { "epoch": 0.34, "grad_norm": 1.3662144993163154, "learning_rate": 7.712499627177648e-06, "loss": 0.199, "step": 4725 }, { "epoch": 0.34, "grad_norm": 1.471292440398072, "learning_rate": 7.711526430020052e-06, "loss": 0.2291, "step": 4726 }, { "epoch": 0.34, "grad_norm": 4.868256489088615, "learning_rate": 7.710553087318489e-06, "loss": 0.5817, "step": 4727 }, { "epoch": 0.34, "grad_norm": 1.3954135172494164, "learning_rate": 7.709579599125205e-06, "loss": 0.2368, "step": 4728 }, { "epoch": 0.34, "grad_norm": 1.463593919186997, "learning_rate": 7.708605965492451e-06, "loss": 0.2477, "step": 4729 }, { "epoch": 0.34, "grad_norm": 1.428317301164114, "learning_rate": 7.70763218647249e-06, "loss": 0.2102, "step": 4730 }, { "epoch": 0.34, "grad_norm": 1.3068161816616652, "learning_rate": 7.706658262117592e-06, "loss": 0.2075, "step": 4731 }, { "epoch": 0.34, "grad_norm": 1.4412562126913349, "learning_rate": 7.705684192480029e-06, "loss": 0.201, "step": 4732 }, { "epoch": 0.34, "grad_norm": 1.4737874705637126, "learning_rate": 7.70470997761209e-06, "loss": 0.1997, "step": 4733 }, { "epoch": 0.34, "grad_norm": 1.523453737076902, "learning_rate": 7.703735617566063e-06, "loss": 0.2544, "step": 4734 }, { "epoch": 0.34, "grad_norm": 1.3015823117521228, "learning_rate": 7.70276111239425e-06, "loss": 0.1964, "step": 4735 }, { "epoch": 0.34, "grad_norm": 1.2820393677384432, "learning_rate": 7.701786462148958e-06, "loss": 0.1702, "step": 4736 }, { "epoch": 0.34, "grad_norm": 1.4204634617817746, "learning_rate": 7.700811666882501e-06, "loss": 0.192, "step": 4737 }, { "epoch": 0.34, "grad_norm": 1.4655529705751784, "learning_rate": 7.699836726647206e-06, "loss": 0.2383, "step": 4738 }, { "epoch": 0.34, "grad_norm": 1.3828304068236836, "learning_rate": 7.6988616414954e-06, "loss": 0.2036, "step": 4739 }, { "epoch": 0.34, "grad_norm": 1.3099561276681655, "learning_rate": 7.697886411479422e-06, "loss": 0.1854, "step": 4740 }, { "epoch": 0.34, "grad_norm": 1.5053759040935586, "learning_rate": 7.696911036651621e-06, "loss": 0.2496, "step": 4741 }, { "epoch": 0.34, "grad_norm": 1.5182686435592552, "learning_rate": 7.695935517064352e-06, "loss": 0.1943, "step": 4742 }, { "epoch": 0.34, "grad_norm": 1.4235657024967352, "learning_rate": 7.694959852769972e-06, "loss": 0.2272, "step": 4743 }, { "epoch": 0.34, "grad_norm": 1.2952225674386235, "learning_rate": 7.693984043820852e-06, "loss": 0.2159, "step": 4744 }, { "epoch": 0.34, "grad_norm": 1.2622448496007699, "learning_rate": 7.693008090269374e-06, "loss": 0.173, "step": 4745 }, { "epoch": 0.34, "grad_norm": 1.2482007353236018, "learning_rate": 7.692031992167921e-06, "loss": 0.1932, "step": 4746 }, { "epoch": 0.34, "grad_norm": 1.328284774859831, "learning_rate": 7.691055749568885e-06, "loss": 0.211, "step": 4747 }, { "epoch": 0.34, "grad_norm": 1.2402447991480978, "learning_rate": 7.690079362524666e-06, "loss": 0.1489, "step": 4748 }, { "epoch": 0.34, "grad_norm": 1.4901467009292249, "learning_rate": 7.689102831087673e-06, "loss": 0.2525, "step": 4749 }, { "epoch": 0.34, "grad_norm": 5.878067706926046, "learning_rate": 7.688126155310326e-06, "loss": 0.6885, "step": 4750 }, { "epoch": 0.34, "grad_norm": 1.3817010171751853, "learning_rate": 7.687149335245046e-06, "loss": 0.2032, "step": 4751 }, { "epoch": 0.34, "grad_norm": 1.3808639596573962, "learning_rate": 7.686172370944262e-06, "loss": 0.2068, "step": 4752 }, { "epoch": 0.34, "grad_norm": 1.3069442104023203, "learning_rate": 7.68519526246042e-06, "loss": 0.2101, "step": 4753 }, { "epoch": 0.34, "grad_norm": 1.3325687667717678, "learning_rate": 7.684218009845963e-06, "loss": 0.1952, "step": 4754 }, { "epoch": 0.34, "grad_norm": 1.5643501164981082, "learning_rate": 7.683240613153346e-06, "loss": 0.2434, "step": 4755 }, { "epoch": 0.34, "grad_norm": 5.5634315246853765, "learning_rate": 7.682263072435035e-06, "loss": 0.5861, "step": 4756 }, { "epoch": 0.34, "grad_norm": 1.338700688562351, "learning_rate": 7.681285387743495e-06, "loss": 0.1885, "step": 4757 }, { "epoch": 0.34, "grad_norm": 1.2621647667930365, "learning_rate": 7.680307559131211e-06, "loss": 0.1917, "step": 4758 }, { "epoch": 0.34, "grad_norm": 5.284168673309812, "learning_rate": 7.679329586650666e-06, "loss": 0.5075, "step": 4759 }, { "epoch": 0.34, "grad_norm": 1.5555761774869115, "learning_rate": 7.67835147035435e-06, "loss": 0.2592, "step": 4760 }, { "epoch": 0.34, "grad_norm": 1.4112778162698283, "learning_rate": 7.677373210294769e-06, "loss": 0.2775, "step": 4761 }, { "epoch": 0.34, "grad_norm": 1.227435460165308, "learning_rate": 7.676394806524432e-06, "loss": 0.1941, "step": 4762 }, { "epoch": 0.34, "grad_norm": 1.3303070283760783, "learning_rate": 7.675416259095854e-06, "loss": 0.2043, "step": 4763 }, { "epoch": 0.34, "grad_norm": 1.359128076090484, "learning_rate": 7.674437568061559e-06, "loss": 0.2013, "step": 4764 }, { "epoch": 0.34, "grad_norm": 1.2868546486487915, "learning_rate": 7.673458733474081e-06, "loss": 0.1952, "step": 4765 }, { "epoch": 0.34, "grad_norm": 1.4590554233493203, "learning_rate": 7.672479755385958e-06, "loss": 0.2081, "step": 4766 }, { "epoch": 0.34, "grad_norm": 1.418259566522473, "learning_rate": 7.67150063384974e-06, "loss": 0.1737, "step": 4767 }, { "epoch": 0.34, "grad_norm": 1.5189724354844958, "learning_rate": 7.670521368917983e-06, "loss": 0.1758, "step": 4768 }, { "epoch": 0.34, "grad_norm": 5.958703495896905, "learning_rate": 7.669541960643246e-06, "loss": 0.5713, "step": 4769 }, { "epoch": 0.34, "grad_norm": 1.3350124994230945, "learning_rate": 7.668562409078102e-06, "loss": 0.2308, "step": 4770 }, { "epoch": 0.34, "grad_norm": 1.3516542855167446, "learning_rate": 7.66758271427513e-06, "loss": 0.1947, "step": 4771 }, { "epoch": 0.34, "grad_norm": 1.2413588946742415, "learning_rate": 7.666602876286917e-06, "loss": 0.1946, "step": 4772 }, { "epoch": 0.34, "grad_norm": 1.213847339434656, "learning_rate": 7.665622895166054e-06, "loss": 0.1356, "step": 4773 }, { "epoch": 0.34, "grad_norm": 1.3719635367607725, "learning_rate": 7.664642770965144e-06, "loss": 0.2083, "step": 4774 }, { "epoch": 0.34, "grad_norm": 1.4746489253933357, "learning_rate": 7.663662503736796e-06, "loss": 0.2234, "step": 4775 }, { "epoch": 0.34, "grad_norm": 1.4277292171734157, "learning_rate": 7.662682093533628e-06, "loss": 0.1991, "step": 4776 }, { "epoch": 0.34, "grad_norm": 1.2535939652339498, "learning_rate": 7.661701540408263e-06, "loss": 0.1693, "step": 4777 }, { "epoch": 0.34, "grad_norm": 5.291477683063257, "learning_rate": 7.660720844413333e-06, "loss": 0.5333, "step": 4778 }, { "epoch": 0.34, "grad_norm": 1.4080758817313526, "learning_rate": 7.65974000560148e-06, "loss": 0.2139, "step": 4779 }, { "epoch": 0.34, "grad_norm": 1.4034870364239027, "learning_rate": 7.658759024025349e-06, "loss": 0.2346, "step": 4780 }, { "epoch": 0.34, "grad_norm": 1.2476731993490076, "learning_rate": 7.657777899737596e-06, "loss": 0.1847, "step": 4781 }, { "epoch": 0.34, "grad_norm": 6.353810377694333, "learning_rate": 7.656796632790885e-06, "loss": 0.6565, "step": 4782 }, { "epoch": 0.34, "grad_norm": 1.3285412345757042, "learning_rate": 7.655815223237884e-06, "loss": 0.1629, "step": 4783 }, { "epoch": 0.34, "grad_norm": 1.380001667541261, "learning_rate": 7.654833671131274e-06, "loss": 0.2283, "step": 4784 }, { "epoch": 0.34, "grad_norm": 1.3471407908316917, "learning_rate": 7.65385197652374e-06, "loss": 0.2059, "step": 4785 }, { "epoch": 0.34, "grad_norm": 1.4080061119496534, "learning_rate": 7.652870139467975e-06, "loss": 0.2357, "step": 4786 }, { "epoch": 0.34, "grad_norm": 1.3394925069781158, "learning_rate": 7.65188816001668e-06, "loss": 0.2184, "step": 4787 }, { "epoch": 0.34, "grad_norm": 1.4593354599939896, "learning_rate": 7.650906038222563e-06, "loss": 0.207, "step": 4788 }, { "epoch": 0.34, "grad_norm": 7.812776945242237, "learning_rate": 7.649923774138344e-06, "loss": 0.6408, "step": 4789 }, { "epoch": 0.34, "grad_norm": 1.3236409211252762, "learning_rate": 7.648941367816742e-06, "loss": 0.1794, "step": 4790 }, { "epoch": 0.34, "grad_norm": 1.545015814424405, "learning_rate": 7.647958819310491e-06, "loss": 0.2179, "step": 4791 }, { "epoch": 0.34, "grad_norm": 1.3692024652124706, "learning_rate": 7.646976128672332e-06, "loss": 0.1866, "step": 4792 }, { "epoch": 0.34, "grad_norm": 1.5617845371835737, "learning_rate": 7.64599329595501e-06, "loss": 0.253, "step": 4793 }, { "epoch": 0.34, "grad_norm": 1.2905704416991175, "learning_rate": 7.64501032121128e-06, "loss": 0.1779, "step": 4794 }, { "epoch": 0.34, "grad_norm": 1.3308460365962715, "learning_rate": 7.644027204493902e-06, "loss": 0.2042, "step": 4795 }, { "epoch": 0.34, "grad_norm": 1.3124957898029632, "learning_rate": 7.643043945855648e-06, "loss": 0.2094, "step": 4796 }, { "epoch": 0.34, "grad_norm": 1.5019655768267048, "learning_rate": 7.642060545349296e-06, "loss": 0.2519, "step": 4797 }, { "epoch": 0.34, "grad_norm": 4.962778050704384, "learning_rate": 7.641077003027632e-06, "loss": 0.5218, "step": 4798 }, { "epoch": 0.34, "grad_norm": 1.3518032245550393, "learning_rate": 7.640093318943445e-06, "loss": 0.2273, "step": 4799 }, { "epoch": 0.34, "grad_norm": 1.3206915323461959, "learning_rate": 7.639109493149537e-06, "loss": 0.191, "step": 4800 }, { "epoch": 0.34, "grad_norm": 1.4623275872791461, "learning_rate": 7.638125525698717e-06, "loss": 0.2079, "step": 4801 }, { "epoch": 0.34, "grad_norm": 1.2915364454995029, "learning_rate": 7.637141416643801e-06, "loss": 0.1795, "step": 4802 }, { "epoch": 0.34, "grad_norm": 5.445577362855475, "learning_rate": 7.636157166037608e-06, "loss": 0.4998, "step": 4803 }, { "epoch": 0.34, "grad_norm": 1.4853517079752392, "learning_rate": 7.635172773932972e-06, "loss": 0.2161, "step": 4804 }, { "epoch": 0.34, "grad_norm": 6.349138158557549, "learning_rate": 7.63418824038273e-06, "loss": 0.6782, "step": 4805 }, { "epoch": 0.34, "grad_norm": 1.4365065464431361, "learning_rate": 7.633203565439729e-06, "loss": 0.2246, "step": 4806 }, { "epoch": 0.34, "grad_norm": 1.2390027699622674, "learning_rate": 7.632218749156822e-06, "loss": 0.1683, "step": 4807 }, { "epoch": 0.34, "grad_norm": 1.290893284934911, "learning_rate": 7.63123379158687e-06, "loss": 0.1888, "step": 4808 }, { "epoch": 0.34, "grad_norm": 1.4570974272516766, "learning_rate": 7.63024869278274e-06, "loss": 0.2488, "step": 4809 }, { "epoch": 0.34, "grad_norm": 1.375698858780777, "learning_rate": 7.629263452797311e-06, "loss": 0.2316, "step": 4810 }, { "epoch": 0.34, "grad_norm": 1.5072108475588435, "learning_rate": 7.628278071683465e-06, "loss": 0.1793, "step": 4811 }, { "epoch": 0.34, "grad_norm": 5.526017044000886, "learning_rate": 7.627292549494092e-06, "loss": 0.713, "step": 4812 }, { "epoch": 0.34, "grad_norm": 1.3093291958536024, "learning_rate": 7.626306886282096e-06, "loss": 0.19, "step": 4813 }, { "epoch": 0.34, "grad_norm": 5.662294652638992, "learning_rate": 7.6253210821003765e-06, "loss": 0.5742, "step": 4814 }, { "epoch": 0.34, "grad_norm": 1.563646496477359, "learning_rate": 7.624335137001854e-06, "loss": 0.1871, "step": 4815 }, { "epoch": 0.34, "grad_norm": 1.2678218628667255, "learning_rate": 7.623349051039446e-06, "loss": 0.1952, "step": 4816 }, { "epoch": 0.34, "grad_norm": 1.4776598187788588, "learning_rate": 7.622362824266083e-06, "loss": 0.1878, "step": 4817 }, { "epoch": 0.34, "grad_norm": 1.3459846639995374, "learning_rate": 7.621376456734702e-06, "loss": 0.1907, "step": 4818 }, { "epoch": 0.34, "grad_norm": 1.5095567454306926, "learning_rate": 7.620389948498245e-06, "loss": 0.2214, "step": 4819 }, { "epoch": 0.34, "grad_norm": 1.3728387013238597, "learning_rate": 7.6194032996096685e-06, "loss": 0.1911, "step": 4820 }, { "epoch": 0.34, "grad_norm": 6.558272752468376, "learning_rate": 7.618416510121928e-06, "loss": 0.6037, "step": 4821 }, { "epoch": 0.34, "grad_norm": 1.4598006541076647, "learning_rate": 7.617429580087992e-06, "loss": 0.2251, "step": 4822 }, { "epoch": 0.34, "grad_norm": 1.2905467003056512, "learning_rate": 7.616442509560835e-06, "loss": 0.2051, "step": 4823 }, { "epoch": 0.35, "grad_norm": 1.2464350907294492, "learning_rate": 7.6154552985934385e-06, "loss": 0.2223, "step": 4824 }, { "epoch": 0.35, "grad_norm": 1.2923541791153246, "learning_rate": 7.614467947238791e-06, "loss": 0.187, "step": 4825 }, { "epoch": 0.35, "grad_norm": 1.2937071697459692, "learning_rate": 7.613480455549892e-06, "loss": 0.181, "step": 4826 }, { "epoch": 0.35, "grad_norm": 1.3702656641539492, "learning_rate": 7.612492823579744e-06, "loss": 0.1843, "step": 4827 }, { "epoch": 0.35, "grad_norm": 1.3470974453348925, "learning_rate": 7.611505051381363e-06, "loss": 0.2037, "step": 4828 }, { "epoch": 0.35, "grad_norm": 1.4379575616220603, "learning_rate": 7.610517139007763e-06, "loss": 0.2116, "step": 4829 }, { "epoch": 0.35, "grad_norm": 4.681876417899939, "learning_rate": 7.609529086511974e-06, "loss": 0.487, "step": 4830 }, { "epoch": 0.35, "grad_norm": 1.303871482302147, "learning_rate": 7.608540893947032e-06, "loss": 0.195, "step": 4831 }, { "epoch": 0.35, "grad_norm": 1.4187441867158963, "learning_rate": 7.6075525613659775e-06, "loss": 0.2202, "step": 4832 }, { "epoch": 0.35, "grad_norm": 1.4298521316600983, "learning_rate": 7.6065640888218595e-06, "loss": 0.1985, "step": 4833 }, { "epoch": 0.35, "grad_norm": 5.6041872257246235, "learning_rate": 7.605575476367739e-06, "loss": 0.5279, "step": 4834 }, { "epoch": 0.35, "grad_norm": 1.2670522491520646, "learning_rate": 7.604586724056677e-06, "loss": 0.1873, "step": 4835 }, { "epoch": 0.35, "grad_norm": 1.5206554895647546, "learning_rate": 7.603597831941747e-06, "loss": 0.2176, "step": 4836 }, { "epoch": 0.35, "grad_norm": 1.4900278091852655, "learning_rate": 7.6026088000760276e-06, "loss": 0.2839, "step": 4837 }, { "epoch": 0.35, "grad_norm": 1.3689246018042849, "learning_rate": 7.601619628512611e-06, "loss": 0.2406, "step": 4838 }, { "epoch": 0.35, "grad_norm": 4.130549102392845, "learning_rate": 7.600630317304586e-06, "loss": 0.6326, "step": 4839 }, { "epoch": 0.35, "grad_norm": 1.4184896893429064, "learning_rate": 7.599640866505058e-06, "loss": 0.2299, "step": 4840 }, { "epoch": 0.35, "grad_norm": 12.325807647443925, "learning_rate": 7.598651276167135e-06, "loss": 0.6177, "step": 4841 }, { "epoch": 0.35, "grad_norm": 1.344380938165053, "learning_rate": 7.597661546343936e-06, "loss": 0.1916, "step": 4842 }, { "epoch": 0.35, "grad_norm": 1.47371052658536, "learning_rate": 7.596671677088585e-06, "loss": 0.1702, "step": 4843 }, { "epoch": 0.35, "grad_norm": 3.760704218726979, "learning_rate": 7.5956816684542154e-06, "loss": 0.2053, "step": 4844 }, { "epoch": 0.35, "grad_norm": 1.5055142206570484, "learning_rate": 7.594691520493965e-06, "loss": 0.2568, "step": 4845 }, { "epoch": 0.35, "grad_norm": 1.459961605910439, "learning_rate": 7.593701233260983e-06, "loss": 0.1919, "step": 4846 }, { "epoch": 0.35, "grad_norm": 1.468617622570324, "learning_rate": 7.592710806808422e-06, "loss": 0.2301, "step": 4847 }, { "epoch": 0.35, "grad_norm": 1.2709344896206303, "learning_rate": 7.591720241189446e-06, "loss": 0.1909, "step": 4848 }, { "epoch": 0.35, "grad_norm": 1.2501506566761205, "learning_rate": 7.590729536457222e-06, "loss": 0.1912, "step": 4849 }, { "epoch": 0.35, "grad_norm": 1.271986900844157, "learning_rate": 7.58973869266493e-06, "loss": 0.1704, "step": 4850 }, { "epoch": 0.35, "grad_norm": 1.528409270726896, "learning_rate": 7.588747709865753e-06, "loss": 0.2483, "step": 4851 }, { "epoch": 0.35, "grad_norm": 1.316591437389964, "learning_rate": 7.587756588112884e-06, "loss": 0.2133, "step": 4852 }, { "epoch": 0.35, "grad_norm": 1.3276773688122625, "learning_rate": 7.586765327459523e-06, "loss": 0.222, "step": 4853 }, { "epoch": 0.35, "grad_norm": 1.175065077542618, "learning_rate": 7.585773927958875e-06, "loss": 0.2033, "step": 4854 }, { "epoch": 0.35, "grad_norm": 1.171127420129231, "learning_rate": 7.5847823896641545e-06, "loss": 0.1816, "step": 4855 }, { "epoch": 0.35, "grad_norm": 1.3604538157774908, "learning_rate": 7.583790712628585e-06, "loss": 0.2668, "step": 4856 }, { "epoch": 0.35, "grad_norm": 1.2119980473902272, "learning_rate": 7.582798896905396e-06, "loss": 0.1755, "step": 4857 }, { "epoch": 0.35, "grad_norm": 1.454400368314169, "learning_rate": 7.58180694254782e-06, "loss": 0.1672, "step": 4858 }, { "epoch": 0.35, "grad_norm": 1.368637701492504, "learning_rate": 7.580814849609107e-06, "loss": 0.2037, "step": 4859 }, { "epoch": 0.35, "grad_norm": 4.79332036474255, "learning_rate": 7.579822618142505e-06, "loss": 0.7615, "step": 4860 }, { "epoch": 0.35, "grad_norm": 1.3204940761501438, "learning_rate": 7.578830248201274e-06, "loss": 0.2147, "step": 4861 }, { "epoch": 0.35, "grad_norm": 1.2068251132945396, "learning_rate": 7.5778377398386815e-06, "loss": 0.1884, "step": 4862 }, { "epoch": 0.35, "grad_norm": 4.962422720932961, "learning_rate": 7.576845093107998e-06, "loss": 0.5353, "step": 4863 }, { "epoch": 0.35, "grad_norm": 1.3713051258115976, "learning_rate": 7.575852308062509e-06, "loss": 0.24, "step": 4864 }, { "epoch": 0.35, "grad_norm": 1.3252313777225955, "learning_rate": 7.574859384755502e-06, "loss": 0.1787, "step": 4865 }, { "epoch": 0.35, "grad_norm": 1.5692679421811029, "learning_rate": 7.5738663232402706e-06, "loss": 0.2674, "step": 4866 }, { "epoch": 0.35, "grad_norm": 1.3828773822083427, "learning_rate": 7.572873123570122e-06, "loss": 0.202, "step": 4867 }, { "epoch": 0.35, "grad_norm": 1.7611567492044915, "learning_rate": 7.571879785798364e-06, "loss": 0.2849, "step": 4868 }, { "epoch": 0.35, "grad_norm": 1.2841391409561893, "learning_rate": 7.570886309978318e-06, "loss": 0.21, "step": 4869 }, { "epoch": 0.35, "grad_norm": 1.2021566397634278, "learning_rate": 7.569892696163308e-06, "loss": 0.1678, "step": 4870 }, { "epoch": 0.35, "grad_norm": 1.3162514003607486, "learning_rate": 7.568898944406667e-06, "loss": 0.195, "step": 4871 }, { "epoch": 0.35, "grad_norm": 1.507754483745572, "learning_rate": 7.5679050547617375e-06, "loss": 0.1845, "step": 4872 }, { "epoch": 0.35, "grad_norm": 1.2782065369795192, "learning_rate": 7.566911027281867e-06, "loss": 0.1817, "step": 4873 }, { "epoch": 0.35, "grad_norm": 1.3476634111381744, "learning_rate": 7.565916862020408e-06, "loss": 0.1991, "step": 4874 }, { "epoch": 0.35, "grad_norm": 8.068654419698149, "learning_rate": 7.564922559030727e-06, "loss": 0.6658, "step": 4875 }, { "epoch": 0.35, "grad_norm": 1.3277446711224548, "learning_rate": 7.563928118366193e-06, "loss": 0.1878, "step": 4876 }, { "epoch": 0.35, "grad_norm": 5.226406401741414, "learning_rate": 7.562933540080184e-06, "loss": 0.6546, "step": 4877 }, { "epoch": 0.35, "grad_norm": 1.2223421587364767, "learning_rate": 7.561938824226085e-06, "loss": 0.1844, "step": 4878 }, { "epoch": 0.35, "grad_norm": 1.5586431550918087, "learning_rate": 7.560943970857286e-06, "loss": 0.2188, "step": 4879 }, { "epoch": 0.35, "grad_norm": 4.991888915391443, "learning_rate": 7.559948980027189e-06, "loss": 0.4627, "step": 4880 }, { "epoch": 0.35, "grad_norm": 1.2109543725805108, "learning_rate": 7.558953851789202e-06, "loss": 0.1307, "step": 4881 }, { "epoch": 0.35, "grad_norm": 1.5019326719899215, "learning_rate": 7.5579585861967384e-06, "loss": 0.2221, "step": 4882 }, { "epoch": 0.35, "grad_norm": 1.4867330949771582, "learning_rate": 7.55696318330322e-06, "loss": 0.2136, "step": 4883 }, { "epoch": 0.35, "grad_norm": 1.3573556113783107, "learning_rate": 7.5559676431620745e-06, "loss": 0.1958, "step": 4884 }, { "epoch": 0.35, "grad_norm": 1.351894349261488, "learning_rate": 7.554971965826743e-06, "loss": 0.2218, "step": 4885 }, { "epoch": 0.35, "grad_norm": 1.1705583896548382, "learning_rate": 7.553976151350666e-06, "loss": 0.1662, "step": 4886 }, { "epoch": 0.35, "grad_norm": 1.4584942278928406, "learning_rate": 7.5529801997872945e-06, "loss": 0.2114, "step": 4887 }, { "epoch": 0.35, "grad_norm": 1.3951963575091393, "learning_rate": 7.551984111190088e-06, "loss": 0.2144, "step": 4888 }, { "epoch": 0.35, "grad_norm": 1.4317257168999753, "learning_rate": 7.550987885612514e-06, "loss": 0.2462, "step": 4889 }, { "epoch": 0.35, "grad_norm": 1.3874513555107229, "learning_rate": 7.5499915231080445e-06, "loss": 0.1742, "step": 4890 }, { "epoch": 0.35, "grad_norm": 1.4657648796551586, "learning_rate": 7.548995023730161e-06, "loss": 0.2518, "step": 4891 }, { "epoch": 0.35, "grad_norm": 1.1510718228769996, "learning_rate": 7.5479983875323515e-06, "loss": 0.193, "step": 4892 }, { "epoch": 0.35, "grad_norm": 1.4221449394956425, "learning_rate": 7.54700161456811e-06, "loss": 0.2088, "step": 4893 }, { "epoch": 0.35, "grad_norm": 1.4121771519367425, "learning_rate": 7.546004704890941e-06, "loss": 0.1969, "step": 4894 }, { "epoch": 0.35, "grad_norm": 1.3725266277942887, "learning_rate": 7.545007658554355e-06, "loss": 0.1933, "step": 4895 }, { "epoch": 0.35, "grad_norm": 1.3565179177323987, "learning_rate": 7.544010475611868e-06, "loss": 0.1809, "step": 4896 }, { "epoch": 0.35, "grad_norm": 1.43429813419383, "learning_rate": 7.543013156117005e-06, "loss": 0.234, "step": 4897 }, { "epoch": 0.35, "grad_norm": 1.4277102171831735, "learning_rate": 7.5420157001233e-06, "loss": 0.2495, "step": 4898 }, { "epoch": 0.35, "grad_norm": 1.2805273655553038, "learning_rate": 7.54101810768429e-06, "loss": 0.1747, "step": 4899 }, { "epoch": 0.35, "grad_norm": 1.2491244302454414, "learning_rate": 7.540020378853523e-06, "loss": 0.2092, "step": 4900 }, { "epoch": 0.35, "grad_norm": 1.4530146738132, "learning_rate": 7.539022513684554e-06, "loss": 0.2364, "step": 4901 }, { "epoch": 0.35, "grad_norm": 1.1580658916765, "learning_rate": 7.538024512230942e-06, "loss": 0.1536, "step": 4902 }, { "epoch": 0.35, "grad_norm": 1.2738472211245093, "learning_rate": 7.537026374546259e-06, "loss": 0.2019, "step": 4903 }, { "epoch": 0.35, "grad_norm": 1.27540036808571, "learning_rate": 7.536028100684078e-06, "loss": 0.2428, "step": 4904 }, { "epoch": 0.35, "grad_norm": 1.3213004179854244, "learning_rate": 7.535029690697984e-06, "loss": 0.2214, "step": 4905 }, { "epoch": 0.35, "grad_norm": 1.4292236653344172, "learning_rate": 7.534031144641567e-06, "loss": 0.2201, "step": 4906 }, { "epoch": 0.35, "grad_norm": 1.3656069957100705, "learning_rate": 7.533032462568426e-06, "loss": 0.2183, "step": 4907 }, { "epoch": 0.35, "grad_norm": 1.3169538418677602, "learning_rate": 7.532033644532166e-06, "loss": 0.2135, "step": 4908 }, { "epoch": 0.35, "grad_norm": 6.842073703047555, "learning_rate": 7.531034690586397e-06, "loss": 0.6519, "step": 4909 }, { "epoch": 0.35, "grad_norm": 1.6822239306366573, "learning_rate": 7.530035600784743e-06, "loss": 0.2638, "step": 4910 }, { "epoch": 0.35, "grad_norm": 1.1971238627600487, "learning_rate": 7.529036375180829e-06, "loss": 0.159, "step": 4911 }, { "epoch": 0.35, "grad_norm": 1.4499414260005141, "learning_rate": 7.528037013828289e-06, "loss": 0.2585, "step": 4912 }, { "epoch": 0.35, "grad_norm": 1.3732065734740053, "learning_rate": 7.527037516780766e-06, "loss": 0.2526, "step": 4913 }, { "epoch": 0.35, "grad_norm": 1.1866339609962981, "learning_rate": 7.5260378840919075e-06, "loss": 0.1833, "step": 4914 }, { "epoch": 0.35, "grad_norm": 2.2923380895358165, "learning_rate": 7.525038115815372e-06, "loss": 0.1872, "step": 4915 }, { "epoch": 0.35, "grad_norm": 1.4692185236673518, "learning_rate": 7.524038212004822e-06, "loss": 0.1853, "step": 4916 }, { "epoch": 0.35, "grad_norm": 1.366637234095254, "learning_rate": 7.523038172713928e-06, "loss": 0.2053, "step": 4917 }, { "epoch": 0.35, "grad_norm": 1.2856167046513394, "learning_rate": 7.522037997996367e-06, "loss": 0.2188, "step": 4918 }, { "epoch": 0.35, "grad_norm": 4.594769744316143, "learning_rate": 7.521037687905828e-06, "loss": 0.5826, "step": 4919 }, { "epoch": 0.35, "grad_norm": 1.3259501384597088, "learning_rate": 7.520037242496e-06, "loss": 0.1832, "step": 4920 }, { "epoch": 0.35, "grad_norm": 1.4562357405237034, "learning_rate": 7.519036661820585e-06, "loss": 0.2462, "step": 4921 }, { "epoch": 0.35, "grad_norm": 1.3075853379762876, "learning_rate": 7.518035945933289e-06, "loss": 0.1945, "step": 4922 }, { "epoch": 0.35, "grad_norm": 1.353088020037506, "learning_rate": 7.5170350948878276e-06, "loss": 0.2283, "step": 4923 }, { "epoch": 0.35, "grad_norm": 1.274677746850781, "learning_rate": 7.516034108737922e-06, "loss": 0.1893, "step": 4924 }, { "epoch": 0.35, "grad_norm": 1.3695201697438923, "learning_rate": 7.5150329875373005e-06, "loss": 0.2687, "step": 4925 }, { "epoch": 0.35, "grad_norm": 1.1606398021160842, "learning_rate": 7.5140317313397004e-06, "loss": 0.157, "step": 4926 }, { "epoch": 0.35, "grad_norm": 1.4315485020922114, "learning_rate": 7.513030340198865e-06, "loss": 0.2356, "step": 4927 }, { "epoch": 0.35, "grad_norm": 1.163736619001254, "learning_rate": 7.512028814168543e-06, "loss": 0.1776, "step": 4928 }, { "epoch": 0.35, "grad_norm": 1.4943158448350171, "learning_rate": 7.511027153302497e-06, "loss": 0.234, "step": 4929 }, { "epoch": 0.35, "grad_norm": 1.291155598321692, "learning_rate": 7.510025357654487e-06, "loss": 0.2008, "step": 4930 }, { "epoch": 0.35, "grad_norm": 1.4704581664077705, "learning_rate": 7.509023427278288e-06, "loss": 0.1824, "step": 4931 }, { "epoch": 0.35, "grad_norm": 1.3906552140004318, "learning_rate": 7.50802136222768e-06, "loss": 0.2197, "step": 4932 }, { "epoch": 0.35, "grad_norm": 1.435666578778386, "learning_rate": 7.507019162556448e-06, "loss": 0.1937, "step": 4933 }, { "epoch": 0.35, "grad_norm": 1.519805774388088, "learning_rate": 7.506016828318387e-06, "loss": 0.2248, "step": 4934 }, { "epoch": 0.35, "grad_norm": 1.3163819304563045, "learning_rate": 7.5050143595673e-06, "loss": 0.1942, "step": 4935 }, { "epoch": 0.35, "grad_norm": 1.2351041680437558, "learning_rate": 7.504011756356993e-06, "loss": 0.1595, "step": 4936 }, { "epoch": 0.35, "grad_norm": 1.346938297916784, "learning_rate": 7.503009018741284e-06, "loss": 0.2252, "step": 4937 }, { "epoch": 0.35, "grad_norm": 1.3211131714154514, "learning_rate": 7.502006146773994e-06, "loss": 0.1969, "step": 4938 }, { "epoch": 0.35, "grad_norm": 1.563772423638981, "learning_rate": 7.501003140508953e-06, "loss": 0.2431, "step": 4939 }, { "epoch": 0.35, "grad_norm": 1.3086536119191392, "learning_rate": 7.500000000000001e-06, "loss": 0.231, "step": 4940 }, { "epoch": 0.35, "grad_norm": 1.2805202486805651, "learning_rate": 7.498996725300981e-06, "loss": 0.1908, "step": 4941 }, { "epoch": 0.35, "grad_norm": 1.4735548834670145, "learning_rate": 7.4979933164657435e-06, "loss": 0.2256, "step": 4942 }, { "epoch": 0.35, "grad_norm": 1.3386817786513228, "learning_rate": 7.496989773548149e-06, "loss": 0.1723, "step": 4943 }, { "epoch": 0.35, "grad_norm": 1.32719791301756, "learning_rate": 7.495986096602064e-06, "loss": 0.2302, "step": 4944 }, { "epoch": 0.35, "grad_norm": 1.3953854996827593, "learning_rate": 7.494982285681361e-06, "loss": 0.179, "step": 4945 }, { "epoch": 0.35, "grad_norm": 1.4277518925399062, "learning_rate": 7.493978340839922e-06, "loss": 0.2028, "step": 4946 }, { "epoch": 0.35, "grad_norm": 1.3257678355133768, "learning_rate": 7.4929742621316316e-06, "loss": 0.2195, "step": 4947 }, { "epoch": 0.35, "grad_norm": 1.391522824989136, "learning_rate": 7.491970049610388e-06, "loss": 0.2035, "step": 4948 }, { "epoch": 0.35, "grad_norm": 9.302298565299465, "learning_rate": 7.490965703330092e-06, "loss": 0.6846, "step": 4949 }, { "epoch": 0.35, "grad_norm": 1.2947533257862585, "learning_rate": 7.489961223344654e-06, "loss": 0.2165, "step": 4950 }, { "epoch": 0.35, "grad_norm": 5.812313190245047, "learning_rate": 7.488956609707988e-06, "loss": 0.585, "step": 4951 }, { "epoch": 0.35, "grad_norm": 1.338333064193312, "learning_rate": 7.487951862474021e-06, "loss": 0.2193, "step": 4952 }, { "epoch": 0.35, "grad_norm": 4.801694758161189, "learning_rate": 7.486946981696681e-06, "loss": 0.6365, "step": 4953 }, { "epoch": 0.35, "grad_norm": 1.2531530747128843, "learning_rate": 7.485941967429908e-06, "loss": 0.1742, "step": 4954 }, { "epoch": 0.35, "grad_norm": 1.30309347691331, "learning_rate": 7.484936819727646e-06, "loss": 0.1511, "step": 4955 }, { "epoch": 0.35, "grad_norm": 1.4588277513293393, "learning_rate": 7.483931538643847e-06, "loss": 0.1658, "step": 4956 }, { "epoch": 0.35, "grad_norm": 1.482204001242645, "learning_rate": 7.482926124232473e-06, "loss": 0.2778, "step": 4957 }, { "epoch": 0.35, "grad_norm": 1.4228124615462518, "learning_rate": 7.481920576547488e-06, "loss": 0.2163, "step": 4958 }, { "epoch": 0.35, "grad_norm": 1.5064408305912127, "learning_rate": 7.480914895642866e-06, "loss": 0.1719, "step": 4959 }, { "epoch": 0.35, "grad_norm": 1.4639589919492935, "learning_rate": 7.479909081572587e-06, "loss": 0.1989, "step": 4960 }, { "epoch": 0.35, "grad_norm": 1.3416977824803633, "learning_rate": 7.4789031343906425e-06, "loss": 0.1786, "step": 4961 }, { "epoch": 0.35, "grad_norm": 1.5187715510673072, "learning_rate": 7.4778970541510265e-06, "loss": 0.2705, "step": 4962 }, { "epoch": 0.36, "grad_norm": 1.3792651359763892, "learning_rate": 7.47689084090774e-06, "loss": 0.232, "step": 4963 }, { "epoch": 0.36, "grad_norm": 5.329243693926206, "learning_rate": 7.475884494714794e-06, "loss": 0.7428, "step": 4964 }, { "epoch": 0.36, "grad_norm": 1.4654588554639674, "learning_rate": 7.474878015626204e-06, "loss": 0.2442, "step": 4965 }, { "epoch": 0.36, "grad_norm": 1.4817947980953543, "learning_rate": 7.473871403695994e-06, "loss": 0.2063, "step": 4966 }, { "epoch": 0.36, "grad_norm": 1.4288554842016112, "learning_rate": 7.472864658978195e-06, "loss": 0.2141, "step": 4967 }, { "epoch": 0.36, "grad_norm": 1.4722007821868395, "learning_rate": 7.471857781526846e-06, "loss": 0.19, "step": 4968 }, { "epoch": 0.36, "grad_norm": 1.2411222914807758, "learning_rate": 7.47085077139599e-06, "loss": 0.2233, "step": 4969 }, { "epoch": 0.36, "grad_norm": 1.3518056905884852, "learning_rate": 7.469843628639682e-06, "loss": 0.2266, "step": 4970 }, { "epoch": 0.36, "grad_norm": 1.2749149974591274, "learning_rate": 7.468836353311981e-06, "loss": 0.1832, "step": 4971 }, { "epoch": 0.36, "grad_norm": 1.3128472700591396, "learning_rate": 7.467828945466951e-06, "loss": 0.2163, "step": 4972 }, { "epoch": 0.36, "grad_norm": 5.783606297757659, "learning_rate": 7.466821405158669e-06, "loss": 0.5181, "step": 4973 }, { "epoch": 0.36, "grad_norm": 1.3840725379667707, "learning_rate": 7.465813732441213e-06, "loss": 0.2244, "step": 4974 }, { "epoch": 0.36, "grad_norm": 1.204090294273979, "learning_rate": 7.464805927368672e-06, "loss": 0.1766, "step": 4975 }, { "epoch": 0.36, "grad_norm": 1.3995168247664902, "learning_rate": 7.463797989995141e-06, "loss": 0.2371, "step": 4976 }, { "epoch": 0.36, "grad_norm": 1.2351626883583433, "learning_rate": 7.462789920374722e-06, "loss": 0.2327, "step": 4977 }, { "epoch": 0.36, "grad_norm": 1.3104804963697543, "learning_rate": 7.461781718561524e-06, "loss": 0.2225, "step": 4978 }, { "epoch": 0.36, "grad_norm": 1.5308322627951092, "learning_rate": 7.4607733846096645e-06, "loss": 0.1825, "step": 4979 }, { "epoch": 0.36, "grad_norm": 5.090044475468448, "learning_rate": 7.459764918573264e-06, "loss": 0.5416, "step": 4980 }, { "epoch": 0.36, "grad_norm": 1.3605899422455203, "learning_rate": 7.4587563205064575e-06, "loss": 0.2072, "step": 4981 }, { "epoch": 0.36, "grad_norm": 1.5297551866753034, "learning_rate": 7.457747590463378e-06, "loss": 0.2346, "step": 4982 }, { "epoch": 0.36, "grad_norm": 1.516775900207488, "learning_rate": 7.456738728498171e-06, "loss": 0.1988, "step": 4983 }, { "epoch": 0.36, "grad_norm": 1.4080765249319216, "learning_rate": 7.455729734664993e-06, "loss": 0.2006, "step": 4984 }, { "epoch": 0.36, "grad_norm": 1.359848000505531, "learning_rate": 7.454720609017996e-06, "loss": 0.2065, "step": 4985 }, { "epoch": 0.36, "grad_norm": 1.4725352821496327, "learning_rate": 7.453711351611349e-06, "loss": 0.2247, "step": 4986 }, { "epoch": 0.36, "grad_norm": 1.326877550427805, "learning_rate": 7.452701962499225e-06, "loss": 0.211, "step": 4987 }, { "epoch": 0.36, "grad_norm": 1.302379900783371, "learning_rate": 7.451692441735804e-06, "loss": 0.2093, "step": 4988 }, { "epoch": 0.36, "grad_norm": 1.4113435272542305, "learning_rate": 7.450682789375272e-06, "loss": 0.1995, "step": 4989 }, { "epoch": 0.36, "grad_norm": 1.2861232622733887, "learning_rate": 7.449673005471825e-06, "loss": 0.1867, "step": 4990 }, { "epoch": 0.36, "grad_norm": 1.2516408707609386, "learning_rate": 7.448663090079662e-06, "loss": 0.206, "step": 4991 }, { "epoch": 0.36, "grad_norm": 1.3681929554335204, "learning_rate": 7.447653043252993e-06, "loss": 0.2437, "step": 4992 }, { "epoch": 0.36, "grad_norm": 1.4781755994456356, "learning_rate": 7.446642865046032e-06, "loss": 0.1872, "step": 4993 }, { "epoch": 0.36, "grad_norm": 1.2963522326464425, "learning_rate": 7.445632555513003e-06, "loss": 0.1986, "step": 4994 }, { "epoch": 0.36, "grad_norm": 1.3966555867825088, "learning_rate": 7.444622114708134e-06, "loss": 0.171, "step": 4995 }, { "epoch": 0.36, "grad_norm": 1.2734060457982501, "learning_rate": 7.443611542685663e-06, "loss": 0.1551, "step": 4996 }, { "epoch": 0.36, "grad_norm": 1.4455191339960127, "learning_rate": 7.44260083949983e-06, "loss": 0.2435, "step": 4997 }, { "epoch": 0.36, "grad_norm": 1.254516001195114, "learning_rate": 7.441590005204888e-06, "loss": 0.1769, "step": 4998 }, { "epoch": 0.36, "grad_norm": 1.2699722072751873, "learning_rate": 7.440579039855097e-06, "loss": 0.201, "step": 4999 }, { "epoch": 0.36, "grad_norm": 1.2785232134454927, "learning_rate": 7.4395679435047175e-06, "loss": 0.1789, "step": 5000 }, { "epoch": 0.36, "grad_norm": 1.2685419615302125, "learning_rate": 7.4385567162080215e-06, "loss": 0.2495, "step": 5001 }, { "epoch": 0.36, "grad_norm": 1.4486776913062192, "learning_rate": 7.437545358019291e-06, "loss": 0.2326, "step": 5002 }, { "epoch": 0.36, "grad_norm": 1.3121029817118894, "learning_rate": 7.436533868992808e-06, "loss": 0.2147, "step": 5003 }, { "epoch": 0.36, "grad_norm": 1.0753230726305032, "learning_rate": 7.435522249182868e-06, "loss": 0.1657, "step": 5004 }, { "epoch": 0.36, "grad_norm": 4.116535998391369, "learning_rate": 7.434510498643769e-06, "loss": 0.5774, "step": 5005 }, { "epoch": 0.36, "grad_norm": 1.4567701753845885, "learning_rate": 7.433498617429817e-06, "loss": 0.2316, "step": 5006 }, { "epoch": 0.36, "grad_norm": 1.254519145806552, "learning_rate": 7.432486605595328e-06, "loss": 0.1963, "step": 5007 }, { "epoch": 0.36, "grad_norm": 1.2047559223408137, "learning_rate": 7.431474463194624e-06, "loss": 0.1915, "step": 5008 }, { "epoch": 0.36, "grad_norm": 1.5299436975800416, "learning_rate": 7.430462190282027e-06, "loss": 0.2719, "step": 5009 }, { "epoch": 0.36, "grad_norm": 1.367391509733238, "learning_rate": 7.429449786911876e-06, "loss": 0.204, "step": 5010 }, { "epoch": 0.36, "grad_norm": 1.4626050551548924, "learning_rate": 7.428437253138512e-06, "loss": 0.2344, "step": 5011 }, { "epoch": 0.36, "grad_norm": 1.3667750570565864, "learning_rate": 7.4274245890162854e-06, "loss": 0.2626, "step": 5012 }, { "epoch": 0.36, "grad_norm": 1.3496837911785873, "learning_rate": 7.426411794599551e-06, "loss": 0.2079, "step": 5013 }, { "epoch": 0.36, "grad_norm": 1.604965542930624, "learning_rate": 7.425398869942669e-06, "loss": 0.1951, "step": 5014 }, { "epoch": 0.36, "grad_norm": 1.4626663462069416, "learning_rate": 7.424385815100011e-06, "loss": 0.2113, "step": 5015 }, { "epoch": 0.36, "grad_norm": 1.3737570541114905, "learning_rate": 7.4233726301259555e-06, "loss": 0.1906, "step": 5016 }, { "epoch": 0.36, "grad_norm": 1.2279924735078325, "learning_rate": 7.4223593150748855e-06, "loss": 0.2047, "step": 5017 }, { "epoch": 0.36, "grad_norm": 1.3911663855664134, "learning_rate": 7.421345870001191e-06, "loss": 0.2609, "step": 5018 }, { "epoch": 0.36, "grad_norm": 1.316822990250142, "learning_rate": 7.420332294959267e-06, "loss": 0.2206, "step": 5019 }, { "epoch": 0.36, "grad_norm": 1.62683803517904, "learning_rate": 7.419318590003524e-06, "loss": 0.2201, "step": 5020 }, { "epoch": 0.36, "grad_norm": 1.2661009154476908, "learning_rate": 7.418304755188368e-06, "loss": 0.1816, "step": 5021 }, { "epoch": 0.36, "grad_norm": 1.4826038741199117, "learning_rate": 7.417290790568221e-06, "loss": 0.2125, "step": 5022 }, { "epoch": 0.36, "grad_norm": 1.3029836311122123, "learning_rate": 7.416276696197508e-06, "loss": 0.1898, "step": 5023 }, { "epoch": 0.36, "grad_norm": 1.4352710852586592, "learning_rate": 7.415262472130662e-06, "loss": 0.2133, "step": 5024 }, { "epoch": 0.36, "grad_norm": 1.4075647467950272, "learning_rate": 7.41424811842212e-06, "loss": 0.208, "step": 5025 }, { "epoch": 0.36, "grad_norm": 1.3499260887375084, "learning_rate": 7.413233635126332e-06, "loss": 0.1969, "step": 5026 }, { "epoch": 0.36, "grad_norm": 1.1583077733819755, "learning_rate": 7.412219022297749e-06, "loss": 0.1915, "step": 5027 }, { "epoch": 0.36, "grad_norm": 1.3369953391197438, "learning_rate": 7.411204279990832e-06, "loss": 0.2444, "step": 5028 }, { "epoch": 0.36, "grad_norm": 6.159860390306262, "learning_rate": 7.410189408260049e-06, "loss": 0.594, "step": 5029 }, { "epoch": 0.36, "grad_norm": 1.3597841296002897, "learning_rate": 7.409174407159873e-06, "loss": 0.2075, "step": 5030 }, { "epoch": 0.36, "grad_norm": 1.197193250817317, "learning_rate": 7.4081592767447865e-06, "loss": 0.173, "step": 5031 }, { "epoch": 0.36, "grad_norm": 1.2284514099036052, "learning_rate": 7.407144017069278e-06, "loss": 0.1967, "step": 5032 }, { "epoch": 0.36, "grad_norm": 1.3039656277051226, "learning_rate": 7.406128628187841e-06, "loss": 0.2295, "step": 5033 }, { "epoch": 0.36, "grad_norm": 1.3152298045258015, "learning_rate": 7.405113110154978e-06, "loss": 0.2023, "step": 5034 }, { "epoch": 0.36, "grad_norm": 1.29135420448655, "learning_rate": 7.404097463025197e-06, "loss": 0.1686, "step": 5035 }, { "epoch": 0.36, "grad_norm": 8.064973389530389, "learning_rate": 7.403081686853017e-06, "loss": 0.6202, "step": 5036 }, { "epoch": 0.36, "grad_norm": 1.4050148428714593, "learning_rate": 7.4020657816929594e-06, "loss": 0.2047, "step": 5037 }, { "epoch": 0.36, "grad_norm": 1.682322779335673, "learning_rate": 7.401049747599552e-06, "loss": 0.2331, "step": 5038 }, { "epoch": 0.36, "grad_norm": 1.4284679347685394, "learning_rate": 7.400033584627333e-06, "loss": 0.2463, "step": 5039 }, { "epoch": 0.36, "grad_norm": 5.514762780384517, "learning_rate": 7.399017292830848e-06, "loss": 0.5583, "step": 5040 }, { "epoch": 0.36, "grad_norm": 1.3018475750798593, "learning_rate": 7.3980008722646435e-06, "loss": 0.2189, "step": 5041 }, { "epoch": 0.36, "grad_norm": 1.1931038158556406, "learning_rate": 7.396984322983279e-06, "loss": 0.1665, "step": 5042 }, { "epoch": 0.36, "grad_norm": 1.2562281002231366, "learning_rate": 7.39596764504132e-06, "loss": 0.1892, "step": 5043 }, { "epoch": 0.36, "grad_norm": 1.4741267738891257, "learning_rate": 7.394950838493334e-06, "loss": 0.2071, "step": 5044 }, { "epoch": 0.36, "grad_norm": 3.572257998058601, "learning_rate": 7.393933903393904e-06, "loss": 0.362, "step": 5045 }, { "epoch": 0.36, "grad_norm": 1.1954920667259894, "learning_rate": 7.3929168397976126e-06, "loss": 0.1574, "step": 5046 }, { "epoch": 0.36, "grad_norm": 1.5499893582186544, "learning_rate": 7.391899647759052e-06, "loss": 0.2428, "step": 5047 }, { "epoch": 0.36, "grad_norm": 5.8688060894010246, "learning_rate": 7.390882327332819e-06, "loss": 0.5788, "step": 5048 }, { "epoch": 0.36, "grad_norm": 1.408068472863165, "learning_rate": 7.389864878573523e-06, "loss": 0.2189, "step": 5049 }, { "epoch": 0.36, "grad_norm": 1.145050207407742, "learning_rate": 7.3888473015357744e-06, "loss": 0.1393, "step": 5050 }, { "epoch": 0.36, "grad_norm": 1.3516528938735064, "learning_rate": 7.387829596274192e-06, "loss": 0.2101, "step": 5051 }, { "epoch": 0.36, "grad_norm": 1.5577477638591355, "learning_rate": 7.386811762843404e-06, "loss": 0.2137, "step": 5052 }, { "epoch": 0.36, "grad_norm": 1.353687514234991, "learning_rate": 7.3857938012980425e-06, "loss": 0.2129, "step": 5053 }, { "epoch": 0.36, "grad_norm": 1.3785065904693468, "learning_rate": 7.384775711692749e-06, "loss": 0.2153, "step": 5054 }, { "epoch": 0.36, "grad_norm": 1.3118505601478194, "learning_rate": 7.383757494082169e-06, "loss": 0.2231, "step": 5055 }, { "epoch": 0.36, "grad_norm": 1.441600778334505, "learning_rate": 7.3827391485209555e-06, "loss": 0.2646, "step": 5056 }, { "epoch": 0.36, "grad_norm": 5.197618172506486, "learning_rate": 7.381720675063772e-06, "loss": 0.5099, "step": 5057 }, { "epoch": 0.36, "grad_norm": 1.4236147305355868, "learning_rate": 7.380702073765285e-06, "loss": 0.1943, "step": 5058 }, { "epoch": 0.36, "grad_norm": 1.4851612016128881, "learning_rate": 7.379683344680169e-06, "loss": 0.2379, "step": 5059 }, { "epoch": 0.36, "grad_norm": 1.3235883095300325, "learning_rate": 7.3786644878631035e-06, "loss": 0.2027, "step": 5060 }, { "epoch": 0.36, "grad_norm": 1.244484782361441, "learning_rate": 7.377645503368778e-06, "loss": 0.151, "step": 5061 }, { "epoch": 0.36, "grad_norm": 1.1174083810085311, "learning_rate": 7.3766263912518885e-06, "loss": 0.1237, "step": 5062 }, { "epoch": 0.36, "grad_norm": 5.018050125766409, "learning_rate": 7.375607151567137e-06, "loss": 0.4748, "step": 5063 }, { "epoch": 0.36, "grad_norm": 4.218435133616388, "learning_rate": 7.374587784369231e-06, "loss": 0.5892, "step": 5064 }, { "epoch": 0.36, "grad_norm": 9.64271679144367, "learning_rate": 7.3735682897128854e-06, "loss": 0.5502, "step": 5065 }, { "epoch": 0.36, "grad_norm": 1.3555756495555293, "learning_rate": 7.372548667652826e-06, "loss": 0.229, "step": 5066 }, { "epoch": 0.36, "grad_norm": 1.48899415805759, "learning_rate": 7.3715289182437775e-06, "loss": 0.2458, "step": 5067 }, { "epoch": 0.36, "grad_norm": 10.8270945351268, "learning_rate": 7.37050904154048e-06, "loss": 0.6074, "step": 5068 }, { "epoch": 0.36, "grad_norm": 5.529923760001668, "learning_rate": 7.369489037597673e-06, "loss": 0.8744, "step": 5069 }, { "epoch": 0.36, "grad_norm": 1.3148280549662714, "learning_rate": 7.368468906470109e-06, "loss": 0.1528, "step": 5070 }, { "epoch": 0.36, "grad_norm": 1.3451109039714237, "learning_rate": 7.367448648212543e-06, "loss": 0.1729, "step": 5071 }, { "epoch": 0.36, "grad_norm": 7.477737095536321, "learning_rate": 7.366428262879739e-06, "loss": 0.5628, "step": 5072 }, { "epoch": 0.36, "grad_norm": 1.4162625413971304, "learning_rate": 7.365407750526468e-06, "loss": 0.1858, "step": 5073 }, { "epoch": 0.36, "grad_norm": 1.4037819036938215, "learning_rate": 7.364387111207504e-06, "loss": 0.2041, "step": 5074 }, { "epoch": 0.36, "grad_norm": 1.4081080941664654, "learning_rate": 7.363366344977635e-06, "loss": 0.2325, "step": 5075 }, { "epoch": 0.36, "grad_norm": 1.3169872743863023, "learning_rate": 7.362345451891649e-06, "loss": 0.2137, "step": 5076 }, { "epoch": 0.36, "grad_norm": 5.967329823486751, "learning_rate": 7.361324432004345e-06, "loss": 0.4909, "step": 5077 }, { "epoch": 0.36, "grad_norm": 1.2649711289107375, "learning_rate": 7.360303285370526e-06, "loss": 0.1927, "step": 5078 }, { "epoch": 0.36, "grad_norm": 1.4192690443747504, "learning_rate": 7.359282012045004e-06, "loss": 0.204, "step": 5079 }, { "epoch": 0.36, "grad_norm": 1.298050556452496, "learning_rate": 7.358260612082596e-06, "loss": 0.2232, "step": 5080 }, { "epoch": 0.36, "grad_norm": 5.869120480193776, "learning_rate": 7.357239085538128e-06, "loss": 0.5494, "step": 5081 }, { "epoch": 0.36, "grad_norm": 1.4819430223275167, "learning_rate": 7.356217432466429e-06, "loss": 0.2633, "step": 5082 }, { "epoch": 0.36, "grad_norm": 1.3298582142137947, "learning_rate": 7.355195652922341e-06, "loss": 0.2124, "step": 5083 }, { "epoch": 0.36, "grad_norm": 1.2909246133832941, "learning_rate": 7.3541737469607066e-06, "loss": 0.191, "step": 5084 }, { "epoch": 0.36, "grad_norm": 1.7024423232529686, "learning_rate": 7.353151714636378e-06, "loss": 0.2213, "step": 5085 }, { "epoch": 0.36, "grad_norm": 1.3654462733634802, "learning_rate": 7.352129556004213e-06, "loss": 0.2088, "step": 5086 }, { "epoch": 0.36, "grad_norm": 1.4398679995804622, "learning_rate": 7.351107271119079e-06, "loss": 0.2176, "step": 5087 }, { "epoch": 0.36, "grad_norm": 8.907328569565744, "learning_rate": 7.350084860035846e-06, "loss": 0.575, "step": 5088 }, { "epoch": 0.36, "grad_norm": 1.339368158161762, "learning_rate": 7.349062322809396e-06, "loss": 0.2181, "step": 5089 }, { "epoch": 0.36, "grad_norm": 1.481302468033255, "learning_rate": 7.34803965949461e-06, "loss": 0.2312, "step": 5090 }, { "epoch": 0.36, "grad_norm": 1.2469803170214144, "learning_rate": 7.3470168701463865e-06, "loss": 0.1908, "step": 5091 }, { "epoch": 0.36, "grad_norm": 1.4101426289432533, "learning_rate": 7.34599395481962e-06, "loss": 0.2301, "step": 5092 }, { "epoch": 0.36, "grad_norm": 1.4102732734700403, "learning_rate": 7.344970913569218e-06, "loss": 0.2305, "step": 5093 }, { "epoch": 0.36, "grad_norm": 1.3134751850961166, "learning_rate": 7.343947746450093e-06, "loss": 0.1995, "step": 5094 }, { "epoch": 0.36, "grad_norm": 1.4417431845009951, "learning_rate": 7.342924453517166e-06, "loss": 0.2053, "step": 5095 }, { "epoch": 0.36, "grad_norm": 1.468886601041917, "learning_rate": 7.3419010348253626e-06, "loss": 0.1979, "step": 5096 }, { "epoch": 0.36, "grad_norm": 1.3699813739498823, "learning_rate": 7.340877490429616e-06, "loss": 0.2274, "step": 5097 }, { "epoch": 0.36, "grad_norm": 1.3513958122951433, "learning_rate": 7.339853820384863e-06, "loss": 0.2256, "step": 5098 }, { "epoch": 0.36, "grad_norm": 1.6218953351697176, "learning_rate": 7.338830024746054e-06, "loss": 0.2863, "step": 5099 }, { "epoch": 0.36, "grad_norm": 1.366117596342071, "learning_rate": 7.3378061035681415e-06, "loss": 0.1755, "step": 5100 }, { "epoch": 0.36, "grad_norm": 1.4907351789004128, "learning_rate": 7.336782056906085e-06, "loss": 0.2415, "step": 5101 }, { "epoch": 0.36, "grad_norm": 4.83784183925294, "learning_rate": 7.33575788481485e-06, "loss": 0.6952, "step": 5102 }, { "epoch": 0.37, "grad_norm": 1.3915068076300086, "learning_rate": 7.3347335873494115e-06, "loss": 0.2049, "step": 5103 }, { "epoch": 0.37, "grad_norm": 7.853785239163227, "learning_rate": 7.33370916456475e-06, "loss": 0.62, "step": 5104 }, { "epoch": 0.37, "grad_norm": 4.653635022474864, "learning_rate": 7.332684616515853e-06, "loss": 0.6255, "step": 5105 }, { "epoch": 0.37, "grad_norm": 1.2317747594837243, "learning_rate": 7.331659943257712e-06, "loss": 0.1988, "step": 5106 }, { "epoch": 0.37, "grad_norm": 1.3242076092525958, "learning_rate": 7.330635144845326e-06, "loss": 0.2096, "step": 5107 }, { "epoch": 0.37, "grad_norm": 1.3359612460151886, "learning_rate": 7.329610221333707e-06, "loss": 0.2327, "step": 5108 }, { "epoch": 0.37, "grad_norm": 1.362601045135596, "learning_rate": 7.328585172777866e-06, "loss": 0.206, "step": 5109 }, { "epoch": 0.37, "grad_norm": 1.2320770293602212, "learning_rate": 7.327559999232826e-06, "loss": 0.1625, "step": 5110 }, { "epoch": 0.37, "grad_norm": 1.416593582190875, "learning_rate": 7.3265347007536105e-06, "loss": 0.2249, "step": 5111 }, { "epoch": 0.37, "grad_norm": 1.3491679041254274, "learning_rate": 7.3255092773952545e-06, "loss": 0.1808, "step": 5112 }, { "epoch": 0.37, "grad_norm": 1.3289184341102411, "learning_rate": 7.324483729212799e-06, "loss": 0.1716, "step": 5113 }, { "epoch": 0.37, "grad_norm": 1.3547225452016027, "learning_rate": 7.3234580562612925e-06, "loss": 0.205, "step": 5114 }, { "epoch": 0.37, "grad_norm": 1.2992165382839085, "learning_rate": 7.322432258595789e-06, "loss": 0.1617, "step": 5115 }, { "epoch": 0.37, "grad_norm": 1.3429405705094573, "learning_rate": 7.321406336271346e-06, "loss": 0.2288, "step": 5116 }, { "epoch": 0.37, "grad_norm": 1.5744046977208355, "learning_rate": 7.320380289343034e-06, "loss": 0.2784, "step": 5117 }, { "epoch": 0.37, "grad_norm": 1.2475813397793685, "learning_rate": 7.319354117865928e-06, "loss": 0.2135, "step": 5118 }, { "epoch": 0.37, "grad_norm": 1.5539987133417845, "learning_rate": 7.318327821895106e-06, "loss": 0.2094, "step": 5119 }, { "epoch": 0.37, "grad_norm": 1.395083805081527, "learning_rate": 7.317301401485657e-06, "loss": 0.2186, "step": 5120 }, { "epoch": 0.37, "grad_norm": 1.6799260528271898, "learning_rate": 7.316274856692676e-06, "loss": 0.2524, "step": 5121 }, { "epoch": 0.37, "grad_norm": 1.3154543855523566, "learning_rate": 7.315248187571261e-06, "loss": 0.2151, "step": 5122 }, { "epoch": 0.37, "grad_norm": 1.3493201538042643, "learning_rate": 7.314221394176521e-06, "loss": 0.1744, "step": 5123 }, { "epoch": 0.37, "grad_norm": 1.3188766631770004, "learning_rate": 7.313194476563572e-06, "loss": 0.1962, "step": 5124 }, { "epoch": 0.37, "grad_norm": 1.1916186652604503, "learning_rate": 7.312167434787533e-06, "loss": 0.1423, "step": 5125 }, { "epoch": 0.37, "grad_norm": 1.3734549448807392, "learning_rate": 7.311140268903532e-06, "loss": 0.2174, "step": 5126 }, { "epoch": 0.37, "grad_norm": 25.814919012522356, "learning_rate": 7.310112978966703e-06, "loss": 0.7954, "step": 5127 }, { "epoch": 0.37, "grad_norm": 1.354703031760036, "learning_rate": 7.309085565032186e-06, "loss": 0.1984, "step": 5128 }, { "epoch": 0.37, "grad_norm": 1.209679246354488, "learning_rate": 7.30805802715513e-06, "loss": 0.2166, "step": 5129 }, { "epoch": 0.37, "grad_norm": 1.4148499414399502, "learning_rate": 7.3070303653906885e-06, "loss": 0.2083, "step": 5130 }, { "epoch": 0.37, "grad_norm": 4.680965311972458, "learning_rate": 7.306002579794022e-06, "loss": 0.5361, "step": 5131 }, { "epoch": 0.37, "grad_norm": 4.293801887529542, "learning_rate": 7.304974670420299e-06, "loss": 0.5016, "step": 5132 }, { "epoch": 0.37, "grad_norm": 1.3298934060847296, "learning_rate": 7.3039466373246924e-06, "loss": 0.1938, "step": 5133 }, { "epoch": 0.37, "grad_norm": 1.4031811427091343, "learning_rate": 7.302918480562384e-06, "loss": 0.2408, "step": 5134 }, { "epoch": 0.37, "grad_norm": 1.3155849047084702, "learning_rate": 7.301890200188559e-06, "loss": 0.2167, "step": 5135 }, { "epoch": 0.37, "grad_norm": 1.3504393618522972, "learning_rate": 7.300861796258414e-06, "loss": 0.1803, "step": 5136 }, { "epoch": 0.37, "grad_norm": 1.3594597575281275, "learning_rate": 7.299833268827149e-06, "loss": 0.2355, "step": 5137 }, { "epoch": 0.37, "grad_norm": 1.2982296238885702, "learning_rate": 7.298804617949971e-06, "loss": 0.1852, "step": 5138 }, { "epoch": 0.37, "grad_norm": 4.63364757781956, "learning_rate": 7.297775843682092e-06, "loss": 0.5781, "step": 5139 }, { "epoch": 0.37, "grad_norm": 1.434353908119797, "learning_rate": 7.296746946078737e-06, "loss": 0.2157, "step": 5140 }, { "epoch": 0.37, "grad_norm": 1.4063621249688407, "learning_rate": 7.295717925195127e-06, "loss": 0.2054, "step": 5141 }, { "epoch": 0.37, "grad_norm": 1.3270107107523625, "learning_rate": 7.294688781086502e-06, "loss": 0.2167, "step": 5142 }, { "epoch": 0.37, "grad_norm": 1.355016926810944, "learning_rate": 7.293659513808099e-06, "loss": 0.2151, "step": 5143 }, { "epoch": 0.37, "grad_norm": 5.3304327626455645, "learning_rate": 7.292630123415165e-06, "loss": 0.5147, "step": 5144 }, { "epoch": 0.37, "grad_norm": 1.4360423980230157, "learning_rate": 7.291600609962954e-06, "loss": 0.2479, "step": 5145 }, { "epoch": 0.37, "grad_norm": 1.4132294470869529, "learning_rate": 7.290570973506728e-06, "loss": 0.2067, "step": 5146 }, { "epoch": 0.37, "grad_norm": 1.3341659203066594, "learning_rate": 7.289541214101751e-06, "loss": 0.1688, "step": 5147 }, { "epoch": 0.37, "grad_norm": 1.258479962165081, "learning_rate": 7.288511331803296e-06, "loss": 0.2207, "step": 5148 }, { "epoch": 0.37, "grad_norm": 1.3573728195272545, "learning_rate": 7.287481326666646e-06, "loss": 0.2329, "step": 5149 }, { "epoch": 0.37, "grad_norm": 1.4824645746943406, "learning_rate": 7.286451198747086e-06, "loss": 0.1872, "step": 5150 }, { "epoch": 0.37, "grad_norm": 1.4317296056318793, "learning_rate": 7.285420948099909e-06, "loss": 0.2185, "step": 5151 }, { "epoch": 0.37, "grad_norm": 1.4843550812587933, "learning_rate": 7.2843905747804155e-06, "loss": 0.2202, "step": 5152 }, { "epoch": 0.37, "grad_norm": 1.3528765347796194, "learning_rate": 7.283360078843911e-06, "loss": 0.2115, "step": 5153 }, { "epoch": 0.37, "grad_norm": 1.108385858553031, "learning_rate": 7.282329460345708e-06, "loss": 0.1663, "step": 5154 }, { "epoch": 0.37, "grad_norm": 1.3827773904075156, "learning_rate": 7.2812987193411275e-06, "loss": 0.172, "step": 5155 }, { "epoch": 0.37, "grad_norm": 1.2629200472943685, "learning_rate": 7.280267855885495e-06, "loss": 0.18, "step": 5156 }, { "epoch": 0.37, "grad_norm": 1.3035043518947358, "learning_rate": 7.279236870034143e-06, "loss": 0.1644, "step": 5157 }, { "epoch": 0.37, "grad_norm": 1.2148600687824793, "learning_rate": 7.278205761842411e-06, "loss": 0.1629, "step": 5158 }, { "epoch": 0.37, "grad_norm": 1.4951490784323807, "learning_rate": 7.277174531365644e-06, "loss": 0.2304, "step": 5159 }, { "epoch": 0.37, "grad_norm": 1.3028294533111069, "learning_rate": 7.276143178659195e-06, "loss": 0.216, "step": 5160 }, { "epoch": 0.37, "grad_norm": 1.3263851490994678, "learning_rate": 7.275111703778424e-06, "loss": 0.1586, "step": 5161 }, { "epoch": 0.37, "grad_norm": 1.2576312475354818, "learning_rate": 7.274080106778693e-06, "loss": 0.1633, "step": 5162 }, { "epoch": 0.37, "grad_norm": 1.2577592983473234, "learning_rate": 7.273048387715378e-06, "loss": 0.182, "step": 5163 }, { "epoch": 0.37, "grad_norm": 1.3001077297600752, "learning_rate": 7.272016546643856e-06, "loss": 0.2038, "step": 5164 }, { "epoch": 0.37, "grad_norm": 1.2678935690023225, "learning_rate": 7.270984583619511e-06, "loss": 0.1884, "step": 5165 }, { "epoch": 0.37, "grad_norm": 1.3723187033295654, "learning_rate": 7.269952498697734e-06, "loss": 0.2079, "step": 5166 }, { "epoch": 0.37, "grad_norm": 1.3042531944184668, "learning_rate": 7.2689202919339275e-06, "loss": 0.2059, "step": 5167 }, { "epoch": 0.37, "grad_norm": 1.606630411940769, "learning_rate": 7.267887963383493e-06, "loss": 0.2616, "step": 5168 }, { "epoch": 0.37, "grad_norm": 1.463249577946215, "learning_rate": 7.2668555131018436e-06, "loss": 0.2197, "step": 5169 }, { "epoch": 0.37, "grad_norm": 1.3729679382122428, "learning_rate": 7.265822941144394e-06, "loss": 0.1936, "step": 5170 }, { "epoch": 0.37, "grad_norm": 1.2525458466183756, "learning_rate": 7.264790247566571e-06, "loss": 0.1573, "step": 5171 }, { "epoch": 0.37, "grad_norm": 1.5284074909251033, "learning_rate": 7.263757432423806e-06, "loss": 0.2175, "step": 5172 }, { "epoch": 0.37, "grad_norm": 1.334218645271567, "learning_rate": 7.262724495771535e-06, "loss": 0.212, "step": 5173 }, { "epoch": 0.37, "grad_norm": 4.614828006929252, "learning_rate": 7.2616914376652025e-06, "loss": 0.7485, "step": 5174 }, { "epoch": 0.37, "grad_norm": 4.9766280174403414, "learning_rate": 7.260658258160258e-06, "loss": 0.6285, "step": 5175 }, { "epoch": 0.37, "grad_norm": 1.1761404133082543, "learning_rate": 7.259624957312161e-06, "loss": 0.1793, "step": 5176 }, { "epoch": 0.37, "grad_norm": 1.5528252059853986, "learning_rate": 7.258591535176372e-06, "loss": 0.1793, "step": 5177 }, { "epoch": 0.37, "grad_norm": 1.43773186882904, "learning_rate": 7.257557991808364e-06, "loss": 0.2338, "step": 5178 }, { "epoch": 0.37, "grad_norm": 1.293354125614021, "learning_rate": 7.256524327263609e-06, "loss": 0.1839, "step": 5179 }, { "epoch": 0.37, "grad_norm": 1.3773617489408085, "learning_rate": 7.255490541597594e-06, "loss": 0.2221, "step": 5180 }, { "epoch": 0.37, "grad_norm": 8.693038900622433, "learning_rate": 7.254456634865809e-06, "loss": 0.5299, "step": 5181 }, { "epoch": 0.37, "grad_norm": 1.4212298092844184, "learning_rate": 7.253422607123747e-06, "loss": 0.1883, "step": 5182 }, { "epoch": 0.37, "grad_norm": 1.196121250077038, "learning_rate": 7.252388458426911e-06, "loss": 0.1595, "step": 5183 }, { "epoch": 0.37, "grad_norm": 1.4078144708601932, "learning_rate": 7.251354188830811e-06, "loss": 0.2341, "step": 5184 }, { "epoch": 0.37, "grad_norm": 1.2453539056806038, "learning_rate": 7.250319798390963e-06, "loss": 0.1738, "step": 5185 }, { "epoch": 0.37, "grad_norm": 1.3614467982557197, "learning_rate": 7.249285287162889e-06, "loss": 0.2094, "step": 5186 }, { "epoch": 0.37, "grad_norm": 11.46518890857137, "learning_rate": 7.2482506552021156e-06, "loss": 0.4895, "step": 5187 }, { "epoch": 0.37, "grad_norm": 4.804094126042983, "learning_rate": 7.24721590256418e-06, "loss": 0.6617, "step": 5188 }, { "epoch": 0.37, "grad_norm": 1.4887594601316252, "learning_rate": 7.246181029304621e-06, "loss": 0.2113, "step": 5189 }, { "epoch": 0.37, "grad_norm": 5.4328976786609635, "learning_rate": 7.245146035478989e-06, "loss": 0.5084, "step": 5190 }, { "epoch": 0.37, "grad_norm": 1.3998945603461392, "learning_rate": 7.244110921142836e-06, "loss": 0.2193, "step": 5191 }, { "epoch": 0.37, "grad_norm": 1.390308470260549, "learning_rate": 7.243075686351725e-06, "loss": 0.1643, "step": 5192 }, { "epoch": 0.37, "grad_norm": 5.361451300515782, "learning_rate": 7.242040331161223e-06, "loss": 0.7866, "step": 5193 }, { "epoch": 0.37, "grad_norm": 1.3752222453678717, "learning_rate": 7.241004855626903e-06, "loss": 0.209, "step": 5194 }, { "epoch": 0.37, "grad_norm": 1.4297853317980782, "learning_rate": 7.239969259804344e-06, "loss": 0.1841, "step": 5195 }, { "epoch": 0.37, "grad_norm": 1.415871817163078, "learning_rate": 7.238933543749135e-06, "loss": 0.2279, "step": 5196 }, { "epoch": 0.37, "grad_norm": 1.3726852462029897, "learning_rate": 7.237897707516869e-06, "loss": 0.1966, "step": 5197 }, { "epoch": 0.37, "grad_norm": 1.3050725056091215, "learning_rate": 7.236861751163144e-06, "loss": 0.1535, "step": 5198 }, { "epoch": 0.37, "grad_norm": 1.3298373560564996, "learning_rate": 7.235825674743566e-06, "loss": 0.1961, "step": 5199 }, { "epoch": 0.37, "grad_norm": 1.350616649716634, "learning_rate": 7.2347894783137485e-06, "loss": 0.1691, "step": 5200 }, { "epoch": 0.37, "grad_norm": 1.4097265758634332, "learning_rate": 7.233753161929311e-06, "loss": 0.1901, "step": 5201 }, { "epoch": 0.37, "grad_norm": 1.3850589952964674, "learning_rate": 7.232716725645878e-06, "loss": 0.2317, "step": 5202 }, { "epoch": 0.37, "grad_norm": 1.189507890619141, "learning_rate": 7.2316801695190805e-06, "loss": 0.1611, "step": 5203 }, { "epoch": 0.37, "grad_norm": 1.3547494435062506, "learning_rate": 7.230643493604557e-06, "loss": 0.1927, "step": 5204 }, { "epoch": 0.37, "grad_norm": 1.3850060634800934, "learning_rate": 7.229606697957954e-06, "loss": 0.2457, "step": 5205 }, { "epoch": 0.37, "grad_norm": 1.4276054498014128, "learning_rate": 7.228569782634921e-06, "loss": 0.2012, "step": 5206 }, { "epoch": 0.37, "grad_norm": 5.909443532007172, "learning_rate": 7.227532747691116e-06, "loss": 0.7191, "step": 5207 }, { "epoch": 0.37, "grad_norm": 1.556164160010817, "learning_rate": 7.2264955931822025e-06, "loss": 0.2227, "step": 5208 }, { "epoch": 0.37, "grad_norm": 1.324432853537748, "learning_rate": 7.22545831916385e-06, "loss": 0.184, "step": 5209 }, { "epoch": 0.37, "grad_norm": 1.2981119903414269, "learning_rate": 7.2244209256917365e-06, "loss": 0.2007, "step": 5210 }, { "epoch": 0.37, "grad_norm": 1.3081605669408995, "learning_rate": 7.2233834128215454e-06, "loss": 0.2103, "step": 5211 }, { "epoch": 0.37, "grad_norm": 1.4321957307194026, "learning_rate": 7.222345780608966e-06, "loss": 0.2192, "step": 5212 }, { "epoch": 0.37, "grad_norm": 1.3353790485087782, "learning_rate": 7.2213080291096925e-06, "loss": 0.212, "step": 5213 }, { "epoch": 0.37, "grad_norm": 1.118043416651116, "learning_rate": 7.22027015837943e-06, "loss": 0.146, "step": 5214 }, { "epoch": 0.37, "grad_norm": 1.4286772145067639, "learning_rate": 7.219232168473886e-06, "loss": 0.1961, "step": 5215 }, { "epoch": 0.37, "grad_norm": 1.3224601038674204, "learning_rate": 7.218194059448775e-06, "loss": 0.2374, "step": 5216 }, { "epoch": 0.37, "grad_norm": 1.2536616816707513, "learning_rate": 7.21715583135982e-06, "loss": 0.1675, "step": 5217 }, { "epoch": 0.37, "grad_norm": 13.416246544080483, "learning_rate": 7.216117484262748e-06, "loss": 0.5001, "step": 5218 }, { "epoch": 0.37, "grad_norm": 1.4573004238493596, "learning_rate": 7.215079018213294e-06, "loss": 0.2159, "step": 5219 }, { "epoch": 0.37, "grad_norm": 1.4739864880204603, "learning_rate": 7.2140404332671986e-06, "loss": 0.2388, "step": 5220 }, { "epoch": 0.37, "grad_norm": 1.5674819679957297, "learning_rate": 7.213001729480207e-06, "loss": 0.2596, "step": 5221 }, { "epoch": 0.37, "grad_norm": 1.554984172802207, "learning_rate": 7.2119629069080766e-06, "loss": 0.2518, "step": 5222 }, { "epoch": 0.37, "grad_norm": 1.242590750946218, "learning_rate": 7.210923965606564e-06, "loss": 0.1768, "step": 5223 }, { "epoch": 0.37, "grad_norm": 1.5459015741480204, "learning_rate": 7.209884905631437e-06, "loss": 0.257, "step": 5224 }, { "epoch": 0.37, "grad_norm": 4.73546361333772, "learning_rate": 7.208845727038466e-06, "loss": 0.5381, "step": 5225 }, { "epoch": 0.37, "grad_norm": 1.1787149298978972, "learning_rate": 7.207806429883433e-06, "loss": 0.1362, "step": 5226 }, { "epoch": 0.37, "grad_norm": 1.2675783787813169, "learning_rate": 7.206767014222121e-06, "loss": 0.1912, "step": 5227 }, { "epoch": 0.37, "grad_norm": 1.3112684966441426, "learning_rate": 7.2057274801103235e-06, "loss": 0.1946, "step": 5228 }, { "epoch": 0.37, "grad_norm": 1.227659156923823, "learning_rate": 7.204687827603838e-06, "loss": 0.159, "step": 5229 }, { "epoch": 0.37, "grad_norm": 1.1808104269558863, "learning_rate": 7.203648056758468e-06, "loss": 0.1622, "step": 5230 }, { "epoch": 0.37, "grad_norm": 1.3114681554794212, "learning_rate": 7.202608167630026e-06, "loss": 0.1721, "step": 5231 }, { "epoch": 0.37, "grad_norm": 1.285441981582618, "learning_rate": 7.201568160274327e-06, "loss": 0.1705, "step": 5232 }, { "epoch": 0.37, "grad_norm": 1.4060944873286094, "learning_rate": 7.2005280347471955e-06, "loss": 0.2034, "step": 5233 }, { "epoch": 0.37, "grad_norm": 1.323400990736908, "learning_rate": 7.19948779110446e-06, "loss": 0.2173, "step": 5234 }, { "epoch": 0.37, "grad_norm": 1.4023750873886776, "learning_rate": 7.19844742940196e-06, "loss": 0.1865, "step": 5235 }, { "epoch": 0.37, "grad_norm": 1.426244152049011, "learning_rate": 7.1974069496955365e-06, "loss": 0.2052, "step": 5236 }, { "epoch": 0.37, "grad_norm": 1.412262846832108, "learning_rate": 7.196366352041037e-06, "loss": 0.2137, "step": 5237 }, { "epoch": 0.37, "grad_norm": 1.4667088460909592, "learning_rate": 7.195325636494317e-06, "loss": 0.2022, "step": 5238 }, { "epoch": 0.37, "grad_norm": 1.3802805101133935, "learning_rate": 7.194284803111239e-06, "loss": 0.2336, "step": 5239 }, { "epoch": 0.37, "grad_norm": 1.3926828310978259, "learning_rate": 7.19324385194767e-06, "loss": 0.2349, "step": 5240 }, { "epoch": 0.37, "grad_norm": 1.2199814686947814, "learning_rate": 7.1922027830594846e-06, "loss": 0.135, "step": 5241 }, { "epoch": 0.37, "grad_norm": 1.4158870466344178, "learning_rate": 7.1911615965025615e-06, "loss": 0.1884, "step": 5242 }, { "epoch": 0.38, "grad_norm": 1.2549814342067525, "learning_rate": 7.19012029233279e-06, "loss": 0.2112, "step": 5243 }, { "epoch": 0.38, "grad_norm": 1.333600611200091, "learning_rate": 7.189078870606063e-06, "loss": 0.206, "step": 5244 }, { "epoch": 0.38, "grad_norm": 4.796546380376461, "learning_rate": 7.188037331378278e-06, "loss": 0.6358, "step": 5245 }, { "epoch": 0.38, "grad_norm": 1.2673757158726637, "learning_rate": 7.1869956747053414e-06, "loss": 0.1917, "step": 5246 }, { "epoch": 0.38, "grad_norm": 5.0815786492865875, "learning_rate": 7.185953900643165e-06, "loss": 0.5622, "step": 5247 }, { "epoch": 0.38, "grad_norm": 1.727191632389053, "learning_rate": 7.1849120092476685e-06, "loss": 0.2434, "step": 5248 }, { "epoch": 0.38, "grad_norm": 1.2395147299528873, "learning_rate": 7.183870000574777e-06, "loss": 0.1758, "step": 5249 }, { "epoch": 0.38, "grad_norm": 1.4297513270738393, "learning_rate": 7.182827874680417e-06, "loss": 0.2116, "step": 5250 }, { "epoch": 0.38, "grad_norm": 1.2794268923172978, "learning_rate": 7.18178563162053e-06, "loss": 0.2004, "step": 5251 }, { "epoch": 0.38, "grad_norm": 1.3069182235896368, "learning_rate": 7.180743271451058e-06, "loss": 0.2113, "step": 5252 }, { "epoch": 0.38, "grad_norm": 1.2648456227397045, "learning_rate": 7.179700794227952e-06, "loss": 0.1927, "step": 5253 }, { "epoch": 0.38, "grad_norm": 1.294624020087715, "learning_rate": 7.178658200007165e-06, "loss": 0.2005, "step": 5254 }, { "epoch": 0.38, "grad_norm": 1.4407690305450147, "learning_rate": 7.177615488844661e-06, "loss": 0.2539, "step": 5255 }, { "epoch": 0.38, "grad_norm": 8.29352991089323, "learning_rate": 7.17657266079641e-06, "loss": 0.7696, "step": 5256 }, { "epoch": 0.38, "grad_norm": 1.4421601640421118, "learning_rate": 7.175529715918386e-06, "loss": 0.1713, "step": 5257 }, { "epoch": 0.38, "grad_norm": 4.251778709842344, "learning_rate": 7.1744866542665695e-06, "loss": 0.5524, "step": 5258 }, { "epoch": 0.38, "grad_norm": 1.3505063954947236, "learning_rate": 7.173443475896946e-06, "loss": 0.1916, "step": 5259 }, { "epoch": 0.38, "grad_norm": 4.985356346821925, "learning_rate": 7.172400180865514e-06, "loss": 0.6054, "step": 5260 }, { "epoch": 0.38, "grad_norm": 1.4297995671407833, "learning_rate": 7.17135676922827e-06, "loss": 0.2121, "step": 5261 }, { "epoch": 0.38, "grad_norm": 1.297481235706431, "learning_rate": 7.1703132410412215e-06, "loss": 0.1685, "step": 5262 }, { "epoch": 0.38, "grad_norm": 1.274773710041479, "learning_rate": 7.169269596360379e-06, "loss": 0.1991, "step": 5263 }, { "epoch": 0.38, "grad_norm": 1.2977346617550918, "learning_rate": 7.168225835241764e-06, "loss": 0.1615, "step": 5264 }, { "epoch": 0.38, "grad_norm": 1.258783013672195, "learning_rate": 7.167181957741398e-06, "loss": 0.1726, "step": 5265 }, { "epoch": 0.38, "grad_norm": 1.325941785774385, "learning_rate": 7.166137963915316e-06, "loss": 0.1958, "step": 5266 }, { "epoch": 0.38, "grad_norm": 1.3681856554132266, "learning_rate": 7.165093853819553e-06, "loss": 0.1977, "step": 5267 }, { "epoch": 0.38, "grad_norm": 6.911135145452195, "learning_rate": 7.164049627510154e-06, "loss": 0.7471, "step": 5268 }, { "epoch": 0.38, "grad_norm": 1.4731608476626972, "learning_rate": 7.163005285043168e-06, "loss": 0.2396, "step": 5269 }, { "epoch": 0.38, "grad_norm": 1.415189789601481, "learning_rate": 7.161960826474651e-06, "loss": 0.2088, "step": 5270 }, { "epoch": 0.38, "grad_norm": 1.355802769028095, "learning_rate": 7.160916251860667e-06, "loss": 0.2392, "step": 5271 }, { "epoch": 0.38, "grad_norm": 1.4139478354102717, "learning_rate": 7.159871561257282e-06, "loss": 0.2008, "step": 5272 }, { "epoch": 0.38, "grad_norm": 1.3553352440241615, "learning_rate": 7.158826754720574e-06, "loss": 0.224, "step": 5273 }, { "epoch": 0.38, "grad_norm": 1.299653602677173, "learning_rate": 7.1577818323066205e-06, "loss": 0.2476, "step": 5274 }, { "epoch": 0.38, "grad_norm": 1.4214623239103048, "learning_rate": 7.156736794071512e-06, "loss": 0.2407, "step": 5275 }, { "epoch": 0.38, "grad_norm": 1.3338898738432636, "learning_rate": 7.155691640071341e-06, "loss": 0.175, "step": 5276 }, { "epoch": 0.38, "grad_norm": 1.3545367785801545, "learning_rate": 7.154646370362206e-06, "loss": 0.2039, "step": 5277 }, { "epoch": 0.38, "grad_norm": 1.3833067920256379, "learning_rate": 7.153600985000213e-06, "loss": 0.2379, "step": 5278 }, { "epoch": 0.38, "grad_norm": 1.2123594881388255, "learning_rate": 7.1525554840414765e-06, "loss": 0.1682, "step": 5279 }, { "epoch": 0.38, "grad_norm": 1.2718267930716303, "learning_rate": 7.1515098675421125e-06, "loss": 0.1545, "step": 5280 }, { "epoch": 0.38, "grad_norm": 1.470272208182743, "learning_rate": 7.150464135558246e-06, "loss": 0.1835, "step": 5281 }, { "epoch": 0.38, "grad_norm": 1.3271528128938859, "learning_rate": 7.14941828814601e-06, "loss": 0.1789, "step": 5282 }, { "epoch": 0.38, "grad_norm": 1.4291837746951697, "learning_rate": 7.148372325361538e-06, "loss": 0.2197, "step": 5283 }, { "epoch": 0.38, "grad_norm": 1.280551710575636, "learning_rate": 7.147326247260973e-06, "loss": 0.1947, "step": 5284 }, { "epoch": 0.38, "grad_norm": 1.5145956947959534, "learning_rate": 7.146280053900468e-06, "loss": 0.2038, "step": 5285 }, { "epoch": 0.38, "grad_norm": 5.996376436278586, "learning_rate": 7.145233745336176e-06, "loss": 0.5867, "step": 5286 }, { "epoch": 0.38, "grad_norm": 1.1775284987475882, "learning_rate": 7.144187321624259e-06, "loss": 0.1727, "step": 5287 }, { "epoch": 0.38, "grad_norm": 4.476337945416581, "learning_rate": 7.143140782820885e-06, "loss": 0.5064, "step": 5288 }, { "epoch": 0.38, "grad_norm": 1.3488345043328493, "learning_rate": 7.142094128982228e-06, "loss": 0.2399, "step": 5289 }, { "epoch": 0.38, "grad_norm": 1.401625436230305, "learning_rate": 7.141047360164469e-06, "loss": 0.1985, "step": 5290 }, { "epoch": 0.38, "grad_norm": 1.3370891656015966, "learning_rate": 7.140000476423793e-06, "loss": 0.1926, "step": 5291 }, { "epoch": 0.38, "grad_norm": 1.43850975081692, "learning_rate": 7.138953477816393e-06, "loss": 0.2138, "step": 5292 }, { "epoch": 0.38, "grad_norm": 1.2688690255232633, "learning_rate": 7.137906364398468e-06, "loss": 0.2017, "step": 5293 }, { "epoch": 0.38, "grad_norm": 1.4473672598169522, "learning_rate": 7.136859136226223e-06, "loss": 0.2312, "step": 5294 }, { "epoch": 0.38, "grad_norm": 1.5216866236271294, "learning_rate": 7.135811793355869e-06, "loss": 0.2202, "step": 5295 }, { "epoch": 0.38, "grad_norm": 1.4016946341096583, "learning_rate": 7.1347643358436226e-06, "loss": 0.2069, "step": 5296 }, { "epoch": 0.38, "grad_norm": 1.3453228880814028, "learning_rate": 7.133716763745707e-06, "loss": 0.1776, "step": 5297 }, { "epoch": 0.38, "grad_norm": 1.4409939984676357, "learning_rate": 7.132669077118352e-06, "loss": 0.2084, "step": 5298 }, { "epoch": 0.38, "grad_norm": 1.334354563803864, "learning_rate": 7.131621276017796e-06, "loss": 0.2318, "step": 5299 }, { "epoch": 0.38, "grad_norm": 6.820991565415937, "learning_rate": 7.130573360500277e-06, "loss": 0.6317, "step": 5300 }, { "epoch": 0.38, "grad_norm": 1.378037039414726, "learning_rate": 7.1295253306220435e-06, "loss": 0.1945, "step": 5301 }, { "epoch": 0.38, "grad_norm": 1.165590661477633, "learning_rate": 7.128477186439352e-06, "loss": 0.1617, "step": 5302 }, { "epoch": 0.38, "grad_norm": 1.3848916198514492, "learning_rate": 7.127428928008461e-06, "loss": 0.2173, "step": 5303 }, { "epoch": 0.38, "grad_norm": 1.3587961795281016, "learning_rate": 7.126380555385637e-06, "loss": 0.2179, "step": 5304 }, { "epoch": 0.38, "grad_norm": 1.4334153696673626, "learning_rate": 7.125332068627151e-06, "loss": 0.2367, "step": 5305 }, { "epoch": 0.38, "grad_norm": 1.2331758708682101, "learning_rate": 7.124283467789284e-06, "loss": 0.1777, "step": 5306 }, { "epoch": 0.38, "grad_norm": 1.3629330195691678, "learning_rate": 7.12323475292832e-06, "loss": 0.2468, "step": 5307 }, { "epoch": 0.38, "grad_norm": 1.2608640531997062, "learning_rate": 7.12218592410055e-06, "loss": 0.2012, "step": 5308 }, { "epoch": 0.38, "grad_norm": 1.2878358055964931, "learning_rate": 7.12113698136227e-06, "loss": 0.1885, "step": 5309 }, { "epoch": 0.38, "grad_norm": 4.7216822643518075, "learning_rate": 7.120087924769783e-06, "loss": 0.6202, "step": 5310 }, { "epoch": 0.38, "grad_norm": 1.4701954639598007, "learning_rate": 7.119038754379399e-06, "loss": 0.1826, "step": 5311 }, { "epoch": 0.38, "grad_norm": 1.3926629610644652, "learning_rate": 7.117989470247434e-06, "loss": 0.2239, "step": 5312 }, { "epoch": 0.38, "grad_norm": 4.861720419306191, "learning_rate": 7.116940072430208e-06, "loss": 0.5447, "step": 5313 }, { "epoch": 0.38, "grad_norm": 1.4597522882713152, "learning_rate": 7.1158905609840486e-06, "loss": 0.244, "step": 5314 }, { "epoch": 0.38, "grad_norm": 1.406652505729443, "learning_rate": 7.1148409359652914e-06, "loss": 0.2269, "step": 5315 }, { "epoch": 0.38, "grad_norm": 1.6689972780200897, "learning_rate": 7.113791197430275e-06, "loss": 0.2185, "step": 5316 }, { "epoch": 0.38, "grad_norm": 1.3922548995863004, "learning_rate": 7.112741345435342e-06, "loss": 0.2114, "step": 5317 }, { "epoch": 0.38, "grad_norm": 1.1602190523344074, "learning_rate": 7.111691380036848e-06, "loss": 0.1707, "step": 5318 }, { "epoch": 0.38, "grad_norm": 1.3844979994689708, "learning_rate": 7.1106413012911514e-06, "loss": 0.2481, "step": 5319 }, { "epoch": 0.38, "grad_norm": 1.2174944224488164, "learning_rate": 7.109591109254614e-06, "loss": 0.148, "step": 5320 }, { "epoch": 0.38, "grad_norm": 1.3159546984056836, "learning_rate": 7.108540803983608e-06, "loss": 0.1736, "step": 5321 }, { "epoch": 0.38, "grad_norm": 1.2458224018515767, "learning_rate": 7.107490385534506e-06, "loss": 0.2056, "step": 5322 }, { "epoch": 0.38, "grad_norm": 1.328504744734417, "learning_rate": 7.106439853963694e-06, "loss": 0.2347, "step": 5323 }, { "epoch": 0.38, "grad_norm": 1.2488937313417505, "learning_rate": 7.10538920932756e-06, "loss": 0.2115, "step": 5324 }, { "epoch": 0.38, "grad_norm": 1.3151866389456683, "learning_rate": 7.1043384516824965e-06, "loss": 0.2091, "step": 5325 }, { "epoch": 0.38, "grad_norm": 1.3148738407977136, "learning_rate": 7.103287581084905e-06, "loss": 0.227, "step": 5326 }, { "epoch": 0.38, "grad_norm": 1.1227393727508552, "learning_rate": 7.102236597591193e-06, "loss": 0.1129, "step": 5327 }, { "epoch": 0.38, "grad_norm": 1.341605284158933, "learning_rate": 7.1011855012577705e-06, "loss": 0.1787, "step": 5328 }, { "epoch": 0.38, "grad_norm": 1.4239463000165027, "learning_rate": 7.100134292141058e-06, "loss": 0.2186, "step": 5329 }, { "epoch": 0.38, "grad_norm": 1.2798347196932893, "learning_rate": 7.099082970297481e-06, "loss": 0.1981, "step": 5330 }, { "epoch": 0.38, "grad_norm": 1.3222489756026916, "learning_rate": 7.098031535783468e-06, "loss": 0.2075, "step": 5331 }, { "epoch": 0.38, "grad_norm": 1.543461247996678, "learning_rate": 7.096979988655459e-06, "loss": 0.2504, "step": 5332 }, { "epoch": 0.38, "grad_norm": 1.4116409393313618, "learning_rate": 7.095928328969895e-06, "loss": 0.199, "step": 5333 }, { "epoch": 0.38, "grad_norm": 1.2768246204064162, "learning_rate": 7.094876556783225e-06, "loss": 0.1485, "step": 5334 }, { "epoch": 0.38, "grad_norm": 1.3235101589700204, "learning_rate": 7.093824672151901e-06, "loss": 0.2023, "step": 5335 }, { "epoch": 0.38, "grad_norm": 1.2038491894455357, "learning_rate": 7.09277267513239e-06, "loss": 0.1972, "step": 5336 }, { "epoch": 0.38, "grad_norm": 1.199967832284442, "learning_rate": 7.091720565781156e-06, "loss": 0.1607, "step": 5337 }, { "epoch": 0.38, "grad_norm": 1.2800986957899048, "learning_rate": 7.090668344154671e-06, "loss": 0.182, "step": 5338 }, { "epoch": 0.38, "grad_norm": 1.2598876949846325, "learning_rate": 7.0896160103094145e-06, "loss": 0.2104, "step": 5339 }, { "epoch": 0.38, "grad_norm": 1.3473038713994927, "learning_rate": 7.088563564301874e-06, "loss": 0.2539, "step": 5340 }, { "epoch": 0.38, "grad_norm": 1.3985245043743806, "learning_rate": 7.0875110061885375e-06, "loss": 0.1926, "step": 5341 }, { "epoch": 0.38, "grad_norm": 1.3564563019131808, "learning_rate": 7.086458336025904e-06, "loss": 0.2008, "step": 5342 }, { "epoch": 0.38, "grad_norm": 1.3250471927629286, "learning_rate": 7.085405553870475e-06, "loss": 0.2039, "step": 5343 }, { "epoch": 0.38, "grad_norm": 1.481494999284909, "learning_rate": 7.084352659778762e-06, "loss": 0.2043, "step": 5344 }, { "epoch": 0.38, "grad_norm": 1.4642368292048393, "learning_rate": 7.083299653807278e-06, "loss": 0.1954, "step": 5345 }, { "epoch": 0.38, "grad_norm": 1.5944475816441144, "learning_rate": 7.082246536012546e-06, "loss": 0.2429, "step": 5346 }, { "epoch": 0.38, "grad_norm": 1.1162649820923438, "learning_rate": 7.081193306451091e-06, "loss": 0.1749, "step": 5347 }, { "epoch": 0.38, "grad_norm": 1.6472149481192184, "learning_rate": 7.080139965179449e-06, "loss": 0.2093, "step": 5348 }, { "epoch": 0.38, "grad_norm": 1.4379596979127667, "learning_rate": 7.079086512254156e-06, "loss": 0.2052, "step": 5349 }, { "epoch": 0.38, "grad_norm": 1.2858700292192091, "learning_rate": 7.078032947731761e-06, "loss": 0.1583, "step": 5350 }, { "epoch": 0.38, "grad_norm": 1.4068751127747166, "learning_rate": 7.076979271668811e-06, "loss": 0.2762, "step": 5351 }, { "epoch": 0.38, "grad_norm": 1.4107479290003986, "learning_rate": 7.075925484121866e-06, "loss": 0.2243, "step": 5352 }, { "epoch": 0.38, "grad_norm": 1.4382586138466011, "learning_rate": 7.074871585147488e-06, "loss": 0.2122, "step": 5353 }, { "epoch": 0.38, "grad_norm": 1.2980422364784499, "learning_rate": 7.0738175748022464e-06, "loss": 0.1896, "step": 5354 }, { "epoch": 0.38, "grad_norm": 1.5171936244275739, "learning_rate": 7.0727634531427155e-06, "loss": 0.2524, "step": 5355 }, { "epoch": 0.38, "grad_norm": 1.346182385629131, "learning_rate": 7.071709220225478e-06, "loss": 0.1979, "step": 5356 }, { "epoch": 0.38, "grad_norm": 1.4956888475605887, "learning_rate": 7.070654876107119e-06, "loss": 0.2067, "step": 5357 }, { "epoch": 0.38, "grad_norm": 1.3268758107537053, "learning_rate": 7.069600420844234e-06, "loss": 0.1832, "step": 5358 }, { "epoch": 0.38, "grad_norm": 1.3839862518756176, "learning_rate": 7.06854585449342e-06, "loss": 0.2191, "step": 5359 }, { "epoch": 0.38, "grad_norm": 7.494683041128527, "learning_rate": 7.067491177111282e-06, "loss": 0.7203, "step": 5360 }, { "epoch": 0.38, "grad_norm": 1.2594638487488858, "learning_rate": 7.066436388754432e-06, "loss": 0.2007, "step": 5361 }, { "epoch": 0.38, "grad_norm": 1.4111417080392896, "learning_rate": 7.0653814894794836e-06, "loss": 0.1662, "step": 5362 }, { "epoch": 0.38, "grad_norm": 1.264770813746771, "learning_rate": 7.064326479343065e-06, "loss": 0.2008, "step": 5363 }, { "epoch": 0.38, "grad_norm": 1.447296836396725, "learning_rate": 7.063271358401802e-06, "loss": 0.1952, "step": 5364 }, { "epoch": 0.38, "grad_norm": 1.3848085938776455, "learning_rate": 7.062216126712328e-06, "loss": 0.2218, "step": 5365 }, { "epoch": 0.38, "grad_norm": 1.2776837965233663, "learning_rate": 7.061160784331286e-06, "loss": 0.1752, "step": 5366 }, { "epoch": 0.38, "grad_norm": 1.342066014003571, "learning_rate": 7.060105331315321e-06, "loss": 0.2284, "step": 5367 }, { "epoch": 0.38, "grad_norm": 1.3254528944533246, "learning_rate": 7.059049767721087e-06, "loss": 0.1757, "step": 5368 }, { "epoch": 0.38, "grad_norm": 1.2195873163623143, "learning_rate": 7.05799409360524e-06, "loss": 0.1852, "step": 5369 }, { "epoch": 0.38, "grad_norm": 4.560337500701452, "learning_rate": 7.056938309024448e-06, "loss": 0.6138, "step": 5370 }, { "epoch": 0.38, "grad_norm": 1.3259779319632197, "learning_rate": 7.05588241403538e-06, "loss": 0.1852, "step": 5371 }, { "epoch": 0.38, "grad_norm": 1.3521330819516233, "learning_rate": 7.054826408694711e-06, "loss": 0.1674, "step": 5372 }, { "epoch": 0.38, "grad_norm": 1.4023064735465687, "learning_rate": 7.053770293059121e-06, "loss": 0.1865, "step": 5373 }, { "epoch": 0.38, "grad_norm": 1.2892231521775988, "learning_rate": 7.052714067185303e-06, "loss": 0.2264, "step": 5374 }, { "epoch": 0.38, "grad_norm": 1.3557590815462028, "learning_rate": 7.051657731129949e-06, "loss": 0.2476, "step": 5375 }, { "epoch": 0.38, "grad_norm": 1.3966054176055451, "learning_rate": 7.050601284949759e-06, "loss": 0.2258, "step": 5376 }, { "epoch": 0.38, "grad_norm": 1.321655033038373, "learning_rate": 7.049544728701439e-06, "loss": 0.1782, "step": 5377 }, { "epoch": 0.38, "grad_norm": 1.2085789231078083, "learning_rate": 7.0484880624416985e-06, "loss": 0.1836, "step": 5378 }, { "epoch": 0.38, "grad_norm": 5.017034507951786, "learning_rate": 7.047431286227259e-06, "loss": 0.5426, "step": 5379 }, { "epoch": 0.38, "grad_norm": 1.4172221890454402, "learning_rate": 7.046374400114842e-06, "loss": 0.1927, "step": 5380 }, { "epoch": 0.38, "grad_norm": 1.20038033563381, "learning_rate": 7.045317404161175e-06, "loss": 0.1884, "step": 5381 }, { "epoch": 0.38, "grad_norm": 1.5367987925486606, "learning_rate": 7.044260298422997e-06, "loss": 0.2141, "step": 5382 }, { "epoch": 0.39, "grad_norm": 1.3449502243111315, "learning_rate": 7.043203082957046e-06, "loss": 0.1729, "step": 5383 }, { "epoch": 0.39, "grad_norm": 1.414854392595381, "learning_rate": 7.042145757820073e-06, "loss": 0.2144, "step": 5384 }, { "epoch": 0.39, "grad_norm": 1.3245857028642467, "learning_rate": 7.041088323068827e-06, "loss": 0.2018, "step": 5385 }, { "epoch": 0.39, "grad_norm": 1.1095325233540685, "learning_rate": 7.04003077876007e-06, "loss": 0.1691, "step": 5386 }, { "epoch": 0.39, "grad_norm": 1.4051072252962102, "learning_rate": 7.038973124950564e-06, "loss": 0.2538, "step": 5387 }, { "epoch": 0.39, "grad_norm": 1.2041851912153743, "learning_rate": 7.037915361697082e-06, "loss": 0.2134, "step": 5388 }, { "epoch": 0.39, "grad_norm": 1.5227259467220857, "learning_rate": 7.036857489056399e-06, "loss": 0.1876, "step": 5389 }, { "epoch": 0.39, "grad_norm": 1.4615992206052881, "learning_rate": 7.035799507085299e-06, "loss": 0.2376, "step": 5390 }, { "epoch": 0.39, "grad_norm": 1.4843549189083187, "learning_rate": 7.034741415840568e-06, "loss": 0.2813, "step": 5391 }, { "epoch": 0.39, "grad_norm": 1.3731244884684177, "learning_rate": 7.033683215379002e-06, "loss": 0.1952, "step": 5392 }, { "epoch": 0.39, "grad_norm": 1.3711030997553977, "learning_rate": 7.0326249057574e-06, "loss": 0.2118, "step": 5393 }, { "epoch": 0.39, "grad_norm": 1.3241887600159117, "learning_rate": 7.031566487032568e-06, "loss": 0.2089, "step": 5394 }, { "epoch": 0.39, "grad_norm": 9.17915184631606, "learning_rate": 7.030507959261319e-06, "loss": 0.7093, "step": 5395 }, { "epoch": 0.39, "grad_norm": 1.34254517195933, "learning_rate": 7.029449322500469e-06, "loss": 0.1908, "step": 5396 }, { "epoch": 0.39, "grad_norm": 1.280853949312569, "learning_rate": 7.028390576806843e-06, "loss": 0.191, "step": 5397 }, { "epoch": 0.39, "grad_norm": 1.3771367768269365, "learning_rate": 7.027331722237268e-06, "loss": 0.1945, "step": 5398 }, { "epoch": 0.39, "grad_norm": 1.3636017755883405, "learning_rate": 7.02627275884858e-06, "loss": 0.2113, "step": 5399 }, { "epoch": 0.39, "grad_norm": 1.3583402332203054, "learning_rate": 7.0252136866976205e-06, "loss": 0.1936, "step": 5400 }, { "epoch": 0.39, "grad_norm": 1.229436370670822, "learning_rate": 7.024154505841237e-06, "loss": 0.204, "step": 5401 }, { "epoch": 0.39, "grad_norm": 5.979406682598009, "learning_rate": 7.02309521633628e-06, "loss": 0.7524, "step": 5402 }, { "epoch": 0.39, "grad_norm": 6.475900757305683, "learning_rate": 7.0220358182396095e-06, "loss": 0.5348, "step": 5403 }, { "epoch": 0.39, "grad_norm": 9.415485383395827, "learning_rate": 7.0209763116080895e-06, "loss": 0.5551, "step": 5404 }, { "epoch": 0.39, "grad_norm": 1.4152115843077044, "learning_rate": 7.019916696498591e-06, "loss": 0.2432, "step": 5405 }, { "epoch": 0.39, "grad_norm": 1.5153297551955902, "learning_rate": 7.018856972967988e-06, "loss": 0.2284, "step": 5406 }, { "epoch": 0.39, "grad_norm": 1.3783458263792205, "learning_rate": 7.017797141073163e-06, "loss": 0.2103, "step": 5407 }, { "epoch": 0.39, "grad_norm": 1.3352340767878301, "learning_rate": 7.0167372008710035e-06, "loss": 0.173, "step": 5408 }, { "epoch": 0.39, "grad_norm": 5.453324817894412, "learning_rate": 7.015677152418404e-06, "loss": 0.7066, "step": 5409 }, { "epoch": 0.39, "grad_norm": 1.381443235572736, "learning_rate": 7.014616995772263e-06, "loss": 0.2343, "step": 5410 }, { "epoch": 0.39, "grad_norm": 1.2977153085166835, "learning_rate": 7.013556730989484e-06, "loss": 0.2387, "step": 5411 }, { "epoch": 0.39, "grad_norm": 1.406639170604141, "learning_rate": 7.012496358126979e-06, "loss": 0.2136, "step": 5412 }, { "epoch": 0.39, "grad_norm": 1.7038903414323416, "learning_rate": 7.011435877241666e-06, "loss": 0.2292, "step": 5413 }, { "epoch": 0.39, "grad_norm": 1.292494944421032, "learning_rate": 7.010375288390466e-06, "loss": 0.2011, "step": 5414 }, { "epoch": 0.39, "grad_norm": 1.5602545772142864, "learning_rate": 7.009314591630306e-06, "loss": 0.2444, "step": 5415 }, { "epoch": 0.39, "grad_norm": 1.4289953442673606, "learning_rate": 7.008253787018124e-06, "loss": 0.1684, "step": 5416 }, { "epoch": 0.39, "grad_norm": 1.3671991359227755, "learning_rate": 7.0071928746108545e-06, "loss": 0.2221, "step": 5417 }, { "epoch": 0.39, "grad_norm": 1.2581705815559647, "learning_rate": 7.006131854465447e-06, "loss": 0.2291, "step": 5418 }, { "epoch": 0.39, "grad_norm": 1.2499704238689193, "learning_rate": 7.005070726638849e-06, "loss": 0.1659, "step": 5419 }, { "epoch": 0.39, "grad_norm": 1.1549711068570299, "learning_rate": 7.004009491188023e-06, "loss": 0.1751, "step": 5420 }, { "epoch": 0.39, "grad_norm": 1.4573035289690772, "learning_rate": 7.002948148169929e-06, "loss": 0.2338, "step": 5421 }, { "epoch": 0.39, "grad_norm": 1.4643026458633595, "learning_rate": 7.0018866976415345e-06, "loss": 0.1832, "step": 5422 }, { "epoch": 0.39, "grad_norm": 1.3546310321858084, "learning_rate": 7.000825139659815e-06, "loss": 0.3, "step": 5423 }, { "epoch": 0.39, "grad_norm": 1.3087462235220766, "learning_rate": 6.999763474281751e-06, "loss": 0.1996, "step": 5424 }, { "epoch": 0.39, "grad_norm": 1.3650633768781233, "learning_rate": 6.99870170156433e-06, "loss": 0.2212, "step": 5425 }, { "epoch": 0.39, "grad_norm": 1.3461634538583682, "learning_rate": 6.997639821564539e-06, "loss": 0.2282, "step": 5426 }, { "epoch": 0.39, "grad_norm": 1.5369389855175586, "learning_rate": 6.996577834339381e-06, "loss": 0.25, "step": 5427 }, { "epoch": 0.39, "grad_norm": 1.3633347594031406, "learning_rate": 6.995515739945855e-06, "loss": 0.1982, "step": 5428 }, { "epoch": 0.39, "grad_norm": 1.3766699480481996, "learning_rate": 6.994453538440974e-06, "loss": 0.1664, "step": 5429 }, { "epoch": 0.39, "grad_norm": 1.126979028576072, "learning_rate": 6.9933912298817495e-06, "loss": 0.1305, "step": 5430 }, { "epoch": 0.39, "grad_norm": 1.2982718915641807, "learning_rate": 6.9923288143252036e-06, "loss": 0.1994, "step": 5431 }, { "epoch": 0.39, "grad_norm": 1.4822797868836177, "learning_rate": 6.99126629182836e-06, "loss": 0.2401, "step": 5432 }, { "epoch": 0.39, "grad_norm": 1.5061680226185863, "learning_rate": 6.990203662448256e-06, "loss": 0.2316, "step": 5433 }, { "epoch": 0.39, "grad_norm": 1.0945477657779545, "learning_rate": 6.989140926241924e-06, "loss": 0.1613, "step": 5434 }, { "epoch": 0.39, "grad_norm": 4.049624372936082, "learning_rate": 6.988078083266411e-06, "loss": 0.5779, "step": 5435 }, { "epoch": 0.39, "grad_norm": 1.3504998789719542, "learning_rate": 6.987015133578763e-06, "loss": 0.2016, "step": 5436 }, { "epoch": 0.39, "grad_norm": 5.509246475289816, "learning_rate": 6.985952077236037e-06, "loss": 0.5983, "step": 5437 }, { "epoch": 0.39, "grad_norm": 1.3638594343592807, "learning_rate": 6.9848889142952935e-06, "loss": 0.2302, "step": 5438 }, { "epoch": 0.39, "grad_norm": 1.3449807096523152, "learning_rate": 6.983825644813599e-06, "loss": 0.2057, "step": 5439 }, { "epoch": 0.39, "grad_norm": 1.3459576211057953, "learning_rate": 6.982762268848024e-06, "loss": 0.2333, "step": 5440 }, { "epoch": 0.39, "grad_norm": 1.3806908566081453, "learning_rate": 6.981698786455649e-06, "loss": 0.2349, "step": 5441 }, { "epoch": 0.39, "grad_norm": 1.3828263332108028, "learning_rate": 6.980635197693556e-06, "loss": 0.2067, "step": 5442 }, { "epoch": 0.39, "grad_norm": 1.3596307606629263, "learning_rate": 6.979571502618834e-06, "loss": 0.193, "step": 5443 }, { "epoch": 0.39, "grad_norm": 1.2764381400960554, "learning_rate": 6.978507701288577e-06, "loss": 0.2017, "step": 5444 }, { "epoch": 0.39, "grad_norm": 1.3404962934387084, "learning_rate": 6.977443793759887e-06, "loss": 0.1938, "step": 5445 }, { "epoch": 0.39, "grad_norm": 1.2246279123538737, "learning_rate": 6.976379780089872e-06, "loss": 0.2148, "step": 5446 }, { "epoch": 0.39, "grad_norm": 1.4996250744722766, "learning_rate": 6.975315660335641e-06, "loss": 0.1708, "step": 5447 }, { "epoch": 0.39, "grad_norm": 1.4649974659846794, "learning_rate": 6.974251434554311e-06, "loss": 0.2041, "step": 5448 }, { "epoch": 0.39, "grad_norm": 1.267520895710077, "learning_rate": 6.97318710280301e-06, "loss": 0.202, "step": 5449 }, { "epoch": 0.39, "grad_norm": 6.228808055712223, "learning_rate": 6.972122665138863e-06, "loss": 0.6716, "step": 5450 }, { "epoch": 0.39, "grad_norm": 1.5637211150013883, "learning_rate": 6.9710581216190075e-06, "loss": 0.2184, "step": 5451 }, { "epoch": 0.39, "grad_norm": 1.329835555251607, "learning_rate": 6.96999347230058e-06, "loss": 0.1842, "step": 5452 }, { "epoch": 0.39, "grad_norm": 1.3091830818548698, "learning_rate": 6.968928717240731e-06, "loss": 0.2013, "step": 5453 }, { "epoch": 0.39, "grad_norm": 1.6450888668766965, "learning_rate": 6.967863856496612e-06, "loss": 0.2997, "step": 5454 }, { "epoch": 0.39, "grad_norm": 1.4801640725200875, "learning_rate": 6.966798890125377e-06, "loss": 0.2033, "step": 5455 }, { "epoch": 0.39, "grad_norm": 1.4044223724676201, "learning_rate": 6.965733818184193e-06, "loss": 0.2347, "step": 5456 }, { "epoch": 0.39, "grad_norm": 1.3380437086676884, "learning_rate": 6.964668640730225e-06, "loss": 0.2336, "step": 5457 }, { "epoch": 0.39, "grad_norm": 1.2845347581958746, "learning_rate": 6.9636033578206495e-06, "loss": 0.1472, "step": 5458 }, { "epoch": 0.39, "grad_norm": 1.3182752854169142, "learning_rate": 6.962537969512648e-06, "loss": 0.1622, "step": 5459 }, { "epoch": 0.39, "grad_norm": 1.3649039153082538, "learning_rate": 6.961472475863406e-06, "loss": 0.2531, "step": 5460 }, { "epoch": 0.39, "grad_norm": 1.3216202290703494, "learning_rate": 6.960406876930113e-06, "loss": 0.2433, "step": 5461 }, { "epoch": 0.39, "grad_norm": 1.2192405156866233, "learning_rate": 6.959341172769966e-06, "loss": 0.2079, "step": 5462 }, { "epoch": 0.39, "grad_norm": 1.4646986136743858, "learning_rate": 6.9582753634401704e-06, "loss": 0.2305, "step": 5463 }, { "epoch": 0.39, "grad_norm": 1.215859986299084, "learning_rate": 6.957209448997932e-06, "loss": 0.1838, "step": 5464 }, { "epoch": 0.39, "grad_norm": 1.3856115455902704, "learning_rate": 6.956143429500467e-06, "loss": 0.2376, "step": 5465 }, { "epoch": 0.39, "grad_norm": 1.3726881407635687, "learning_rate": 6.955077305004992e-06, "loss": 0.2003, "step": 5466 }, { "epoch": 0.39, "grad_norm": 1.3922409624360077, "learning_rate": 6.954011075568737e-06, "loss": 0.2354, "step": 5467 }, { "epoch": 0.39, "grad_norm": 1.3932986112609804, "learning_rate": 6.952944741248929e-06, "loss": 0.2302, "step": 5468 }, { "epoch": 0.39, "grad_norm": 1.4091143853419041, "learning_rate": 6.951878302102807e-06, "loss": 0.174, "step": 5469 }, { "epoch": 0.39, "grad_norm": 1.3591130073679345, "learning_rate": 6.95081175818761e-06, "loss": 0.1643, "step": 5470 }, { "epoch": 0.39, "grad_norm": 1.3231129772971173, "learning_rate": 6.949745109560591e-06, "loss": 0.2075, "step": 5471 }, { "epoch": 0.39, "grad_norm": 1.3997652853893072, "learning_rate": 6.948678356278998e-06, "loss": 0.2142, "step": 5472 }, { "epoch": 0.39, "grad_norm": 1.2336107661111204, "learning_rate": 6.9476114984000945e-06, "loss": 0.1821, "step": 5473 }, { "epoch": 0.39, "grad_norm": 1.4804027467445875, "learning_rate": 6.946544535981141e-06, "loss": 0.1901, "step": 5474 }, { "epoch": 0.39, "grad_norm": 1.4334816150192307, "learning_rate": 6.945477469079411e-06, "loss": 0.1985, "step": 5475 }, { "epoch": 0.39, "grad_norm": 1.3239666120461713, "learning_rate": 6.94441029775218e-06, "loss": 0.1998, "step": 5476 }, { "epoch": 0.39, "grad_norm": 1.3778692413027909, "learning_rate": 6.943343022056729e-06, "loss": 0.218, "step": 5477 }, { "epoch": 0.39, "grad_norm": 1.3891162039987508, "learning_rate": 6.942275642050343e-06, "loss": 0.1768, "step": 5478 }, { "epoch": 0.39, "grad_norm": 1.126610743877066, "learning_rate": 6.9412081577903186e-06, "loss": 0.1515, "step": 5479 }, { "epoch": 0.39, "grad_norm": 5.872071658548364, "learning_rate": 6.940140569333953e-06, "loss": 0.5451, "step": 5480 }, { "epoch": 0.39, "grad_norm": 1.2669710216213697, "learning_rate": 6.939072876738549e-06, "loss": 0.2255, "step": 5481 }, { "epoch": 0.39, "grad_norm": 1.4534831120477456, "learning_rate": 6.938005080061414e-06, "loss": 0.1918, "step": 5482 }, { "epoch": 0.39, "grad_norm": 1.3807755411599583, "learning_rate": 6.936937179359869e-06, "loss": 0.2168, "step": 5483 }, { "epoch": 0.39, "grad_norm": 1.4334375026592392, "learning_rate": 6.935869174691229e-06, "loss": 0.2379, "step": 5484 }, { "epoch": 0.39, "grad_norm": 1.4130295307485325, "learning_rate": 6.934801066112824e-06, "loss": 0.2176, "step": 5485 }, { "epoch": 0.39, "grad_norm": 1.6787814601493884, "learning_rate": 6.9337328536819825e-06, "loss": 0.191, "step": 5486 }, { "epoch": 0.39, "grad_norm": 1.7966309013808608, "learning_rate": 6.932664537456045e-06, "loss": 0.1946, "step": 5487 }, { "epoch": 0.39, "grad_norm": 6.746680268168713, "learning_rate": 6.931596117492353e-06, "loss": 0.5163, "step": 5488 }, { "epoch": 0.39, "grad_norm": 1.4681701638679818, "learning_rate": 6.930527593848256e-06, "loss": 0.2214, "step": 5489 }, { "epoch": 0.39, "grad_norm": 1.3769314701402668, "learning_rate": 6.929458966581106e-06, "loss": 0.2113, "step": 5490 }, { "epoch": 0.39, "grad_norm": 1.2690878350082115, "learning_rate": 6.928390235748264e-06, "loss": 0.2087, "step": 5491 }, { "epoch": 0.39, "grad_norm": 1.2910906935960704, "learning_rate": 6.927321401407096e-06, "loss": 0.1765, "step": 5492 }, { "epoch": 0.39, "grad_norm": 1.327513611497493, "learning_rate": 6.926252463614972e-06, "loss": 0.1932, "step": 5493 }, { "epoch": 0.39, "grad_norm": 1.4762999936203332, "learning_rate": 6.925183422429268e-06, "loss": 0.232, "step": 5494 }, { "epoch": 0.39, "grad_norm": 1.3630700420103667, "learning_rate": 6.924114277907366e-06, "loss": 0.1736, "step": 5495 }, { "epoch": 0.39, "grad_norm": 1.3630609119162944, "learning_rate": 6.923045030106655e-06, "loss": 0.2032, "step": 5496 }, { "epoch": 0.39, "grad_norm": 1.4056445452757549, "learning_rate": 6.921975679084525e-06, "loss": 0.1619, "step": 5497 }, { "epoch": 0.39, "grad_norm": 1.3221458634519583, "learning_rate": 6.920906224898377e-06, "loss": 0.1909, "step": 5498 }, { "epoch": 0.39, "grad_norm": 1.3958109637965124, "learning_rate": 6.919836667605614e-06, "loss": 0.193, "step": 5499 }, { "epoch": 0.39, "grad_norm": 1.302085135385465, "learning_rate": 6.918767007263646e-06, "loss": 0.1571, "step": 5500 }, { "epoch": 0.39, "grad_norm": 1.3132205881081422, "learning_rate": 6.917697243929887e-06, "loss": 0.2357, "step": 5501 }, { "epoch": 0.39, "grad_norm": 1.3380872205262417, "learning_rate": 6.91662737766176e-06, "loss": 0.183, "step": 5502 }, { "epoch": 0.39, "grad_norm": 1.35832643457043, "learning_rate": 6.915557408516688e-06, "loss": 0.2139, "step": 5503 }, { "epoch": 0.39, "grad_norm": 1.4724196446921758, "learning_rate": 6.914487336552106e-06, "loss": 0.2205, "step": 5504 }, { "epoch": 0.39, "grad_norm": 1.2336497878247246, "learning_rate": 6.913417161825449e-06, "loss": 0.1612, "step": 5505 }, { "epoch": 0.39, "grad_norm": 1.3607706184220547, "learning_rate": 6.912346884394161e-06, "loss": 0.1695, "step": 5506 }, { "epoch": 0.39, "grad_norm": 1.4079452052457617, "learning_rate": 6.91127650431569e-06, "loss": 0.2263, "step": 5507 }, { "epoch": 0.39, "grad_norm": 1.4843204641975927, "learning_rate": 6.910206021647487e-06, "loss": 0.2341, "step": 5508 }, { "epoch": 0.39, "grad_norm": 1.5306619981529381, "learning_rate": 6.9091354364470156e-06, "loss": 0.1943, "step": 5509 }, { "epoch": 0.39, "grad_norm": 1.4043490780015915, "learning_rate": 6.908064748771738e-06, "loss": 0.2259, "step": 5510 }, { "epoch": 0.39, "grad_norm": 1.2845860301656613, "learning_rate": 6.906993958679126e-06, "loss": 0.1925, "step": 5511 }, { "epoch": 0.39, "grad_norm": 4.587546955656274, "learning_rate": 6.9059230662266515e-06, "loss": 0.5324, "step": 5512 }, { "epoch": 0.39, "grad_norm": 1.416903463322816, "learning_rate": 6.904852071471802e-06, "loss": 0.2003, "step": 5513 }, { "epoch": 0.39, "grad_norm": 1.3531753268997397, "learning_rate": 6.903780974472059e-06, "loss": 0.1796, "step": 5514 }, { "epoch": 0.39, "grad_norm": 6.996130215595993, "learning_rate": 6.902709775284917e-06, "loss": 0.7079, "step": 5515 }, { "epoch": 0.39, "grad_norm": 1.3704174787830783, "learning_rate": 6.9016384739678735e-06, "loss": 0.1737, "step": 5516 }, { "epoch": 0.39, "grad_norm": 1.416586084777156, "learning_rate": 6.900567070578431e-06, "loss": 0.2119, "step": 5517 }, { "epoch": 0.39, "grad_norm": 9.963018669013438, "learning_rate": 6.899495565174099e-06, "loss": 0.4257, "step": 5518 }, { "epoch": 0.39, "grad_norm": 1.5199529408285737, "learning_rate": 6.898423957812392e-06, "loss": 0.1975, "step": 5519 }, { "epoch": 0.39, "grad_norm": 1.4478822221640517, "learning_rate": 6.897352248550828e-06, "loss": 0.2744, "step": 5520 }, { "epoch": 0.39, "grad_norm": 1.427538214887589, "learning_rate": 6.8962804374469314e-06, "loss": 0.211, "step": 5521 }, { "epoch": 0.39, "grad_norm": 1.3044939390893289, "learning_rate": 6.8952085245582376e-06, "loss": 0.1981, "step": 5522 }, { "epoch": 0.4, "grad_norm": 1.4431798240896716, "learning_rate": 6.894136509942278e-06, "loss": 0.1824, "step": 5523 }, { "epoch": 0.4, "grad_norm": 1.421751206974571, "learning_rate": 6.893064393656596e-06, "loss": 0.2322, "step": 5524 }, { "epoch": 0.4, "grad_norm": 1.4173752264386053, "learning_rate": 6.891992175758739e-06, "loss": 0.2112, "step": 5525 }, { "epoch": 0.4, "grad_norm": 1.1231312836131055, "learning_rate": 6.890919856306257e-06, "loss": 0.1576, "step": 5526 }, { "epoch": 0.4, "grad_norm": 3.7020199159754856, "learning_rate": 6.88984743535671e-06, "loss": 0.5266, "step": 5527 }, { "epoch": 0.4, "grad_norm": 5.785817339048343, "learning_rate": 6.88877491296766e-06, "loss": 0.5494, "step": 5528 }, { "epoch": 0.4, "grad_norm": 1.489519780110349, "learning_rate": 6.887702289196677e-06, "loss": 0.2483, "step": 5529 }, { "epoch": 0.4, "grad_norm": 1.5012710029410992, "learning_rate": 6.886629564101335e-06, "loss": 0.2485, "step": 5530 }, { "epoch": 0.4, "grad_norm": 1.315510356681793, "learning_rate": 6.8855567377392114e-06, "loss": 0.1983, "step": 5531 }, { "epoch": 0.4, "grad_norm": 1.34327927053197, "learning_rate": 6.884483810167896e-06, "loss": 0.2165, "step": 5532 }, { "epoch": 0.4, "grad_norm": 1.374256567675251, "learning_rate": 6.883410781444974e-06, "loss": 0.2331, "step": 5533 }, { "epoch": 0.4, "grad_norm": 1.5501373669937877, "learning_rate": 6.882337651628044e-06, "loss": 0.2694, "step": 5534 }, { "epoch": 0.4, "grad_norm": 1.4138076226603336, "learning_rate": 6.881264420774707e-06, "loss": 0.2489, "step": 5535 }, { "epoch": 0.4, "grad_norm": 1.3696030268079102, "learning_rate": 6.88019108894257e-06, "loss": 0.1817, "step": 5536 }, { "epoch": 0.4, "grad_norm": 1.4388805252758328, "learning_rate": 6.8791176561892445e-06, "loss": 0.2189, "step": 5537 }, { "epoch": 0.4, "grad_norm": 1.255780910094656, "learning_rate": 6.878044122572349e-06, "loss": 0.1761, "step": 5538 }, { "epoch": 0.4, "grad_norm": 1.464304972671263, "learning_rate": 6.876970488149507e-06, "loss": 0.1901, "step": 5539 }, { "epoch": 0.4, "grad_norm": 1.4026020197478846, "learning_rate": 6.875896752978345e-06, "loss": 0.1867, "step": 5540 }, { "epoch": 0.4, "grad_norm": 1.3417660176466784, "learning_rate": 6.874822917116497e-06, "loss": 0.1707, "step": 5541 }, { "epoch": 0.4, "grad_norm": 4.87636241256386, "learning_rate": 6.873748980621604e-06, "loss": 0.6684, "step": 5542 }, { "epoch": 0.4, "grad_norm": 5.635198967281351, "learning_rate": 6.872674943551311e-06, "loss": 0.511, "step": 5543 }, { "epoch": 0.4, "grad_norm": 1.4265213957806706, "learning_rate": 6.871600805963265e-06, "loss": 0.1896, "step": 5544 }, { "epoch": 0.4, "grad_norm": 1.3990489038233138, "learning_rate": 6.870526567915123e-06, "loss": 0.2034, "step": 5545 }, { "epoch": 0.4, "grad_norm": 4.6926395423565115, "learning_rate": 6.869452229464547e-06, "loss": 0.5113, "step": 5546 }, { "epoch": 0.4, "grad_norm": 1.224148616305966, "learning_rate": 6.868377790669203e-06, "loss": 0.1821, "step": 5547 }, { "epoch": 0.4, "grad_norm": 1.3059819063753604, "learning_rate": 6.867303251586761e-06, "loss": 0.187, "step": 5548 }, { "epoch": 0.4, "grad_norm": 1.407287523803495, "learning_rate": 6.8662286122748965e-06, "loss": 0.2338, "step": 5549 }, { "epoch": 0.4, "grad_norm": 9.649997347402051, "learning_rate": 6.865153872791297e-06, "loss": 0.6762, "step": 5550 }, { "epoch": 0.4, "grad_norm": 1.267629056663605, "learning_rate": 6.864079033193648e-06, "loss": 0.1811, "step": 5551 }, { "epoch": 0.4, "grad_norm": 1.3098261226588972, "learning_rate": 6.863004093539641e-06, "loss": 0.2184, "step": 5552 }, { "epoch": 0.4, "grad_norm": 1.508380338709397, "learning_rate": 6.861929053886975e-06, "loss": 0.2215, "step": 5553 }, { "epoch": 0.4, "grad_norm": 1.5479807821706266, "learning_rate": 6.860853914293354e-06, "loss": 0.2122, "step": 5554 }, { "epoch": 0.4, "grad_norm": 1.1717678582352498, "learning_rate": 6.859778674816488e-06, "loss": 0.184, "step": 5555 }, { "epoch": 0.4, "grad_norm": 1.3997633658897108, "learning_rate": 6.858703335514093e-06, "loss": 0.2505, "step": 5556 }, { "epoch": 0.4, "grad_norm": 1.1880016976382144, "learning_rate": 6.8576278964438865e-06, "loss": 0.1548, "step": 5557 }, { "epoch": 0.4, "grad_norm": 1.5156639715768516, "learning_rate": 6.856552357663595e-06, "loss": 0.1811, "step": 5558 }, { "epoch": 0.4, "grad_norm": 1.4042744538306366, "learning_rate": 6.855476719230947e-06, "loss": 0.2125, "step": 5559 }, { "epoch": 0.4, "grad_norm": 5.7597815200588975, "learning_rate": 6.85440098120368e-06, "loss": 0.7087, "step": 5560 }, { "epoch": 0.4, "grad_norm": 4.859582509334346, "learning_rate": 6.853325143639537e-06, "loss": 0.4197, "step": 5561 }, { "epoch": 0.4, "grad_norm": 6.591397920738184, "learning_rate": 6.852249206596263e-06, "loss": 0.5732, "step": 5562 }, { "epoch": 0.4, "grad_norm": 1.1116464862503945, "learning_rate": 6.851173170131609e-06, "loss": 0.1248, "step": 5563 }, { "epoch": 0.4, "grad_norm": 1.3032199753861151, "learning_rate": 6.850097034303334e-06, "loss": 0.1781, "step": 5564 }, { "epoch": 0.4, "grad_norm": 1.2526028623669665, "learning_rate": 6.849020799169201e-06, "loss": 0.2041, "step": 5565 }, { "epoch": 0.4, "grad_norm": 1.599648992525517, "learning_rate": 6.847944464786977e-06, "loss": 0.2251, "step": 5566 }, { "epoch": 0.4, "grad_norm": 1.3575918628520225, "learning_rate": 6.846868031214435e-06, "loss": 0.1866, "step": 5567 }, { "epoch": 0.4, "grad_norm": 1.1710525756292172, "learning_rate": 6.845791498509354e-06, "loss": 0.1807, "step": 5568 }, { "epoch": 0.4, "grad_norm": 1.3301206328347441, "learning_rate": 6.84471486672952e-06, "loss": 0.2128, "step": 5569 }, { "epoch": 0.4, "grad_norm": 1.2307937675753777, "learning_rate": 6.84363813593272e-06, "loss": 0.1941, "step": 5570 }, { "epoch": 0.4, "grad_norm": 1.4485222151481953, "learning_rate": 6.842561306176748e-06, "loss": 0.2173, "step": 5571 }, { "epoch": 0.4, "grad_norm": 1.4337920419380417, "learning_rate": 6.8414843775194066e-06, "loss": 0.1882, "step": 5572 }, { "epoch": 0.4, "grad_norm": 1.4296636398082319, "learning_rate": 6.840407350018499e-06, "loss": 0.2026, "step": 5573 }, { "epoch": 0.4, "grad_norm": 1.4097176286519408, "learning_rate": 6.839330223731837e-06, "loss": 0.2469, "step": 5574 }, { "epoch": 0.4, "grad_norm": 1.4105148026192633, "learning_rate": 6.838252998717234e-06, "loss": 0.1928, "step": 5575 }, { "epoch": 0.4, "grad_norm": 1.4587265494013628, "learning_rate": 6.8371756750325145e-06, "loss": 0.2201, "step": 5576 }, { "epoch": 0.4, "grad_norm": 1.3402577320249627, "learning_rate": 6.836098252735504e-06, "loss": 0.224, "step": 5577 }, { "epoch": 0.4, "grad_norm": 1.3594176110604488, "learning_rate": 6.835020731884034e-06, "loss": 0.2413, "step": 5578 }, { "epoch": 0.4, "grad_norm": 1.3143514713280868, "learning_rate": 6.833943112535939e-06, "loss": 0.2131, "step": 5579 }, { "epoch": 0.4, "grad_norm": 1.459414761332634, "learning_rate": 6.832865394749065e-06, "loss": 0.1876, "step": 5580 }, { "epoch": 0.4, "grad_norm": 4.617409977829471, "learning_rate": 6.831787578581259e-06, "loss": 0.4895, "step": 5581 }, { "epoch": 0.4, "grad_norm": 1.4607782064684072, "learning_rate": 6.830709664090372e-06, "loss": 0.1918, "step": 5582 }, { "epoch": 0.4, "grad_norm": 1.8059841837737545, "learning_rate": 6.829631651334263e-06, "loss": 0.2749, "step": 5583 }, { "epoch": 0.4, "grad_norm": 5.023705694200093, "learning_rate": 6.828553540370796e-06, "loss": 0.7598, "step": 5584 }, { "epoch": 0.4, "grad_norm": 1.1538320438027967, "learning_rate": 6.827475331257839e-06, "loss": 0.1662, "step": 5585 }, { "epoch": 0.4, "grad_norm": 1.4485756229003985, "learning_rate": 6.826397024053267e-06, "loss": 0.2315, "step": 5586 }, { "epoch": 0.4, "grad_norm": 1.226058775726674, "learning_rate": 6.825318618814959e-06, "loss": 0.1556, "step": 5587 }, { "epoch": 0.4, "grad_norm": 1.363053413472059, "learning_rate": 6.824240115600798e-06, "loss": 0.2029, "step": 5588 }, { "epoch": 0.4, "grad_norm": 1.3321334761367272, "learning_rate": 6.823161514468676e-06, "loss": 0.1963, "step": 5589 }, { "epoch": 0.4, "grad_norm": 1.3763765335929299, "learning_rate": 6.822082815476486e-06, "loss": 0.1846, "step": 5590 }, { "epoch": 0.4, "grad_norm": 1.263566843650097, "learning_rate": 6.82100401868213e-06, "loss": 0.1639, "step": 5591 }, { "epoch": 0.4, "grad_norm": 1.4228771044736253, "learning_rate": 6.819925124143513e-06, "loss": 0.1874, "step": 5592 }, { "epoch": 0.4, "grad_norm": 1.4247225358616686, "learning_rate": 6.818846131918545e-06, "loss": 0.2197, "step": 5593 }, { "epoch": 0.4, "grad_norm": 1.390647622950931, "learning_rate": 6.817767042065142e-06, "loss": 0.1853, "step": 5594 }, { "epoch": 0.4, "grad_norm": 1.3395484757380494, "learning_rate": 6.816687854641227e-06, "loss": 0.1952, "step": 5595 }, { "epoch": 0.4, "grad_norm": 5.850204089984936, "learning_rate": 6.8156085697047234e-06, "loss": 0.622, "step": 5596 }, { "epoch": 0.4, "grad_norm": 1.620831558801329, "learning_rate": 6.814529187313568e-06, "loss": 0.225, "step": 5597 }, { "epoch": 0.4, "grad_norm": 1.3770183157654075, "learning_rate": 6.813449707525692e-06, "loss": 0.1756, "step": 5598 }, { "epoch": 0.4, "grad_norm": 1.5029098051648315, "learning_rate": 6.812370130399041e-06, "loss": 0.2018, "step": 5599 }, { "epoch": 0.4, "grad_norm": 1.4312658274499137, "learning_rate": 6.811290455991561e-06, "loss": 0.1681, "step": 5600 }, { "epoch": 0.4, "grad_norm": 1.3536552620855886, "learning_rate": 6.810210684361206e-06, "loss": 0.226, "step": 5601 }, { "epoch": 0.4, "grad_norm": 1.307051465859498, "learning_rate": 6.809130815565932e-06, "loss": 0.1979, "step": 5602 }, { "epoch": 0.4, "grad_norm": 1.2819170194886846, "learning_rate": 6.8080508496637045e-06, "loss": 0.1659, "step": 5603 }, { "epoch": 0.4, "grad_norm": 1.2715534584094037, "learning_rate": 6.806970786712489e-06, "loss": 0.2147, "step": 5604 }, { "epoch": 0.4, "grad_norm": 1.2244668718626839, "learning_rate": 6.8058906267702604e-06, "loss": 0.1865, "step": 5605 }, { "epoch": 0.4, "grad_norm": 5.854578091455639, "learning_rate": 6.804810369894998e-06, "loss": 0.6661, "step": 5606 }, { "epoch": 0.4, "grad_norm": 1.5679196216002247, "learning_rate": 6.803730016144686e-06, "loss": 0.2461, "step": 5607 }, { "epoch": 0.4, "grad_norm": 1.2946201530421633, "learning_rate": 6.802649565577311e-06, "loss": 0.1775, "step": 5608 }, { "epoch": 0.4, "grad_norm": 1.457927186726712, "learning_rate": 6.8015690182508694e-06, "loss": 0.1969, "step": 5609 }, { "epoch": 0.4, "grad_norm": 1.4387634054821987, "learning_rate": 6.800488374223361e-06, "loss": 0.1888, "step": 5610 }, { "epoch": 0.4, "grad_norm": 1.2634647040079436, "learning_rate": 6.7994076335527895e-06, "loss": 0.1804, "step": 5611 }, { "epoch": 0.4, "grad_norm": 1.2913501710486008, "learning_rate": 6.7983267962971646e-06, "loss": 0.221, "step": 5612 }, { "epoch": 0.4, "grad_norm": 1.4463424421840554, "learning_rate": 6.7972458625145e-06, "loss": 0.2387, "step": 5613 }, { "epoch": 0.4, "grad_norm": 1.1865963361719756, "learning_rate": 6.79616483226282e-06, "loss": 0.2194, "step": 5614 }, { "epoch": 0.4, "grad_norm": 1.333945270110938, "learning_rate": 6.795083705600147e-06, "loss": 0.2099, "step": 5615 }, { "epoch": 0.4, "grad_norm": 1.5012291318917845, "learning_rate": 6.794002482584511e-06, "loss": 0.1558, "step": 5616 }, { "epoch": 0.4, "grad_norm": 5.303196211375126, "learning_rate": 6.792921163273949e-06, "loss": 0.5494, "step": 5617 }, { "epoch": 0.4, "grad_norm": 1.2807506122899042, "learning_rate": 6.7918397477265e-06, "loss": 0.1928, "step": 5618 }, { "epoch": 0.4, "grad_norm": 1.3710794262638142, "learning_rate": 6.790758236000217e-06, "loss": 0.2087, "step": 5619 }, { "epoch": 0.4, "grad_norm": 1.2816986408208557, "learning_rate": 6.7896766281531435e-06, "loss": 0.1958, "step": 5620 }, { "epoch": 0.4, "grad_norm": 1.30825114772741, "learning_rate": 6.788594924243339e-06, "loss": 0.1722, "step": 5621 }, { "epoch": 0.4, "grad_norm": 1.3143265759851857, "learning_rate": 6.787513124328863e-06, "loss": 0.219, "step": 5622 }, { "epoch": 0.4, "grad_norm": 1.244290675604723, "learning_rate": 6.786431228467786e-06, "loss": 0.1723, "step": 5623 }, { "epoch": 0.4, "grad_norm": 1.3061247617568577, "learning_rate": 6.785349236718178e-06, "loss": 0.1745, "step": 5624 }, { "epoch": 0.4, "grad_norm": 1.4454913451299987, "learning_rate": 6.784267149138114e-06, "loss": 0.2168, "step": 5625 }, { "epoch": 0.4, "grad_norm": 1.346377383351855, "learning_rate": 6.783184965785678e-06, "loss": 0.1954, "step": 5626 }, { "epoch": 0.4, "grad_norm": 1.4333145754571348, "learning_rate": 6.782102686718959e-06, "loss": 0.2161, "step": 5627 }, { "epoch": 0.4, "grad_norm": 1.3016011363404663, "learning_rate": 6.781020311996046e-06, "loss": 0.2111, "step": 5628 }, { "epoch": 0.4, "grad_norm": 5.668174190513239, "learning_rate": 6.779937841675039e-06, "loss": 0.6214, "step": 5629 }, { "epoch": 0.4, "grad_norm": 1.7338934574235783, "learning_rate": 6.77885527581404e-06, "loss": 0.1882, "step": 5630 }, { "epoch": 0.4, "grad_norm": 1.4228248548587812, "learning_rate": 6.777772614471157e-06, "loss": 0.1925, "step": 5631 }, { "epoch": 0.4, "grad_norm": 1.4672302890507156, "learning_rate": 6.7766898577045025e-06, "loss": 0.2483, "step": 5632 }, { "epoch": 0.4, "grad_norm": 1.2541623101007504, "learning_rate": 6.775607005572196e-06, "loss": 0.2088, "step": 5633 }, { "epoch": 0.4, "grad_norm": 4.275631519261516, "learning_rate": 6.7745240581323564e-06, "loss": 0.5352, "step": 5634 }, { "epoch": 0.4, "grad_norm": 1.3223278163794383, "learning_rate": 6.773441015443118e-06, "loss": 0.2313, "step": 5635 }, { "epoch": 0.4, "grad_norm": 1.7273539639985924, "learning_rate": 6.772357877562611e-06, "loss": 0.1882, "step": 5636 }, { "epoch": 0.4, "grad_norm": 1.1681766348586549, "learning_rate": 6.771274644548974e-06, "loss": 0.1408, "step": 5637 }, { "epoch": 0.4, "grad_norm": 1.340093170375682, "learning_rate": 6.770191316460351e-06, "loss": 0.2237, "step": 5638 }, { "epoch": 0.4, "grad_norm": 1.2664269241767787, "learning_rate": 6.7691078933548906e-06, "loss": 0.2034, "step": 5639 }, { "epoch": 0.4, "grad_norm": 1.5414294694474973, "learning_rate": 6.768024375290747e-06, "loss": 0.2414, "step": 5640 }, { "epoch": 0.4, "grad_norm": 1.4095764473750485, "learning_rate": 6.7669407623260795e-06, "loss": 0.205, "step": 5641 }, { "epoch": 0.4, "grad_norm": 1.6811992346321818, "learning_rate": 6.765857054519049e-06, "loss": 0.2113, "step": 5642 }, { "epoch": 0.4, "grad_norm": 1.3459242541542007, "learning_rate": 6.764773251927829e-06, "loss": 0.1846, "step": 5643 }, { "epoch": 0.4, "grad_norm": 1.252840276206209, "learning_rate": 6.763689354610593e-06, "loss": 0.1494, "step": 5644 }, { "epoch": 0.4, "grad_norm": 1.40161470572911, "learning_rate": 6.762605362625519e-06, "loss": 0.2016, "step": 5645 }, { "epoch": 0.4, "grad_norm": 1.284078616107917, "learning_rate": 6.761521276030789e-06, "loss": 0.1993, "step": 5646 }, { "epoch": 0.4, "grad_norm": 1.362782444433252, "learning_rate": 6.760437094884598e-06, "loss": 0.2081, "step": 5647 }, { "epoch": 0.4, "grad_norm": 1.4501700912105644, "learning_rate": 6.7593528192451365e-06, "loss": 0.2136, "step": 5648 }, { "epoch": 0.4, "grad_norm": 8.472602136339102, "learning_rate": 6.758268449170604e-06, "loss": 0.7433, "step": 5649 }, { "epoch": 0.4, "grad_norm": 1.2875915689205593, "learning_rate": 6.757183984719208e-06, "loss": 0.1697, "step": 5650 }, { "epoch": 0.4, "grad_norm": 1.2809689059353946, "learning_rate": 6.756099425949155e-06, "loss": 0.1538, "step": 5651 }, { "epoch": 0.4, "grad_norm": 1.3446215599964646, "learning_rate": 6.7550147729186635e-06, "loss": 0.2251, "step": 5652 }, { "epoch": 0.4, "grad_norm": 1.2453541675547508, "learning_rate": 6.753930025685951e-06, "loss": 0.211, "step": 5653 }, { "epoch": 0.4, "grad_norm": 1.2107568020733361, "learning_rate": 6.752845184309241e-06, "loss": 0.1778, "step": 5654 }, { "epoch": 0.4, "grad_norm": 1.242823192084643, "learning_rate": 6.751760248846766e-06, "loss": 0.1985, "step": 5655 }, { "epoch": 0.4, "grad_norm": 1.285560873477995, "learning_rate": 6.7506752193567605e-06, "loss": 0.1797, "step": 5656 }, { "epoch": 0.4, "grad_norm": 1.294694911178069, "learning_rate": 6.749590095897464e-06, "loss": 0.1829, "step": 5657 }, { "epoch": 0.4, "grad_norm": 1.4382540341219394, "learning_rate": 6.748504878527124e-06, "loss": 0.2331, "step": 5658 }, { "epoch": 0.4, "grad_norm": 5.490172429199845, "learning_rate": 6.747419567303985e-06, "loss": 0.6948, "step": 5659 }, { "epoch": 0.4, "grad_norm": 1.5703790270565599, "learning_rate": 6.7463341622863074e-06, "loss": 0.2278, "step": 5660 }, { "epoch": 0.4, "grad_norm": 1.3390537479883828, "learning_rate": 6.745248663532351e-06, "loss": 0.19, "step": 5661 }, { "epoch": 0.41, "grad_norm": 1.4671708164590929, "learning_rate": 6.744163071100378e-06, "loss": 0.2003, "step": 5662 }, { "epoch": 0.41, "grad_norm": 1.2791333480531302, "learning_rate": 6.743077385048662e-06, "loss": 0.2, "step": 5663 }, { "epoch": 0.41, "grad_norm": 1.3189994027736311, "learning_rate": 6.741991605435475e-06, "loss": 0.2095, "step": 5664 }, { "epoch": 0.41, "grad_norm": 1.2622877259411815, "learning_rate": 6.7409057323191e-06, "loss": 0.2167, "step": 5665 }, { "epoch": 0.41, "grad_norm": 1.2798733050476965, "learning_rate": 6.7398197657578224e-06, "loss": 0.1964, "step": 5666 }, { "epoch": 0.41, "grad_norm": 1.3298721564993314, "learning_rate": 6.738733705809932e-06, "loss": 0.2021, "step": 5667 }, { "epoch": 0.41, "grad_norm": 1.4185326127884075, "learning_rate": 6.737647552533722e-06, "loss": 0.2189, "step": 5668 }, { "epoch": 0.41, "grad_norm": 5.825427042928148, "learning_rate": 6.736561305987495e-06, "loss": 0.6682, "step": 5669 }, { "epoch": 0.41, "grad_norm": 1.4556261443779515, "learning_rate": 6.735474966229558e-06, "loss": 0.253, "step": 5670 }, { "epoch": 0.41, "grad_norm": 1.2609450123145123, "learning_rate": 6.734388533318219e-06, "loss": 0.1584, "step": 5671 }, { "epoch": 0.41, "grad_norm": 1.1865214803102657, "learning_rate": 6.733302007311792e-06, "loss": 0.1879, "step": 5672 }, { "epoch": 0.41, "grad_norm": 1.3307829453079547, "learning_rate": 6.7322153882686e-06, "loss": 0.1891, "step": 5673 }, { "epoch": 0.41, "grad_norm": 1.1971321588825528, "learning_rate": 6.7311286762469695e-06, "loss": 0.1632, "step": 5674 }, { "epoch": 0.41, "grad_norm": 1.306003062447459, "learning_rate": 6.730041871305227e-06, "loss": 0.2139, "step": 5675 }, { "epoch": 0.41, "grad_norm": 1.5119779248431573, "learning_rate": 6.72895497350171e-06, "loss": 0.2152, "step": 5676 }, { "epoch": 0.41, "grad_norm": 1.3437741442558313, "learning_rate": 6.72786798289476e-06, "loss": 0.2398, "step": 5677 }, { "epoch": 0.41, "grad_norm": 1.3986115793524223, "learning_rate": 6.726780899542721e-06, "loss": 0.1945, "step": 5678 }, { "epoch": 0.41, "grad_norm": 1.4754978141769395, "learning_rate": 6.725693723503943e-06, "loss": 0.2211, "step": 5679 }, { "epoch": 0.41, "grad_norm": 1.289503148716925, "learning_rate": 6.724606454836782e-06, "loss": 0.1946, "step": 5680 }, { "epoch": 0.41, "grad_norm": 1.2160744511196684, "learning_rate": 6.723519093599597e-06, "loss": 0.1879, "step": 5681 }, { "epoch": 0.41, "grad_norm": 1.3508942105590303, "learning_rate": 6.722431639850755e-06, "loss": 0.1852, "step": 5682 }, { "epoch": 0.41, "grad_norm": 1.4369667744563324, "learning_rate": 6.721344093648625e-06, "loss": 0.2123, "step": 5683 }, { "epoch": 0.41, "grad_norm": 1.5170552888975881, "learning_rate": 6.720256455051583e-06, "loss": 0.227, "step": 5684 }, { "epoch": 0.41, "grad_norm": 1.4636048633477217, "learning_rate": 6.719168724118008e-06, "loss": 0.1767, "step": 5685 }, { "epoch": 0.41, "grad_norm": 1.2595801185377093, "learning_rate": 6.718080900906288e-06, "loss": 0.1974, "step": 5686 }, { "epoch": 0.41, "grad_norm": 1.2092727413352022, "learning_rate": 6.716992985474808e-06, "loss": 0.1826, "step": 5687 }, { "epoch": 0.41, "grad_norm": 1.3672481777544003, "learning_rate": 6.715904977881968e-06, "loss": 0.2308, "step": 5688 }, { "epoch": 0.41, "grad_norm": 1.1813708068878224, "learning_rate": 6.714816878186164e-06, "loss": 0.185, "step": 5689 }, { "epoch": 0.41, "grad_norm": 1.5669579216180372, "learning_rate": 6.713728686445804e-06, "loss": 0.2299, "step": 5690 }, { "epoch": 0.41, "grad_norm": 1.4903699159583168, "learning_rate": 6.712640402719296e-06, "loss": 0.2031, "step": 5691 }, { "epoch": 0.41, "grad_norm": 1.2527154817357304, "learning_rate": 6.7115520270650555e-06, "loss": 0.2142, "step": 5692 }, { "epoch": 0.41, "grad_norm": 1.3861229545733509, "learning_rate": 6.7104635595415005e-06, "loss": 0.2038, "step": 5693 }, { "epoch": 0.41, "grad_norm": 1.3582388168366353, "learning_rate": 6.709375000207058e-06, "loss": 0.2402, "step": 5694 }, { "epoch": 0.41, "grad_norm": 10.378207215998074, "learning_rate": 6.708286349120157e-06, "loss": 0.7471, "step": 5695 }, { "epoch": 0.41, "grad_norm": 1.3097393518637221, "learning_rate": 6.707197606339231e-06, "loss": 0.229, "step": 5696 }, { "epoch": 0.41, "grad_norm": 1.4454186492776733, "learning_rate": 6.706108771922718e-06, "loss": 0.1992, "step": 5697 }, { "epoch": 0.41, "grad_norm": 1.4415416407224941, "learning_rate": 6.7050198459290665e-06, "loss": 0.1991, "step": 5698 }, { "epoch": 0.41, "grad_norm": 1.3622344353971196, "learning_rate": 6.7039308284167215e-06, "loss": 0.1746, "step": 5699 }, { "epoch": 0.41, "grad_norm": 1.5682298830567587, "learning_rate": 6.702841719444141e-06, "loss": 0.1946, "step": 5700 }, { "epoch": 0.41, "grad_norm": 1.4176130913484848, "learning_rate": 6.70175251906978e-06, "loss": 0.1868, "step": 5701 }, { "epoch": 0.41, "grad_norm": 1.5217291404470525, "learning_rate": 6.7006632273521054e-06, "loss": 0.2398, "step": 5702 }, { "epoch": 0.41, "grad_norm": 1.3771729268140283, "learning_rate": 6.699573844349585e-06, "loss": 0.1914, "step": 5703 }, { "epoch": 0.41, "grad_norm": 1.298716872644517, "learning_rate": 6.698484370120694e-06, "loss": 0.2126, "step": 5704 }, { "epoch": 0.41, "grad_norm": 1.3101969439906083, "learning_rate": 6.697394804723909e-06, "loss": 0.1904, "step": 5705 }, { "epoch": 0.41, "grad_norm": 1.3257733140367038, "learning_rate": 6.696305148217712e-06, "loss": 0.234, "step": 5706 }, { "epoch": 0.41, "grad_norm": 1.3260888015245609, "learning_rate": 6.695215400660597e-06, "loss": 0.2436, "step": 5707 }, { "epoch": 0.41, "grad_norm": 1.293972584864335, "learning_rate": 6.694125562111052e-06, "loss": 0.1883, "step": 5708 }, { "epoch": 0.41, "grad_norm": 1.301660574142384, "learning_rate": 6.693035632627578e-06, "loss": 0.2161, "step": 5709 }, { "epoch": 0.41, "grad_norm": 1.4970658983587233, "learning_rate": 6.6919456122686764e-06, "loss": 0.1842, "step": 5710 }, { "epoch": 0.41, "grad_norm": 1.2451956593961513, "learning_rate": 6.690855501092858e-06, "loss": 0.1904, "step": 5711 }, { "epoch": 0.41, "grad_norm": 1.2251760310389341, "learning_rate": 6.689765299158633e-06, "loss": 0.1505, "step": 5712 }, { "epoch": 0.41, "grad_norm": 1.3432077675301248, "learning_rate": 6.688675006524518e-06, "loss": 0.2385, "step": 5713 }, { "epoch": 0.41, "grad_norm": 5.70178491501225, "learning_rate": 6.6875846232490395e-06, "loss": 0.5519, "step": 5714 }, { "epoch": 0.41, "grad_norm": 4.803092414746553, "learning_rate": 6.686494149390721e-06, "loss": 0.6494, "step": 5715 }, { "epoch": 0.41, "grad_norm": 1.3847034323477567, "learning_rate": 6.685403585008098e-06, "loss": 0.1738, "step": 5716 }, { "epoch": 0.41, "grad_norm": 1.5130012561071726, "learning_rate": 6.684312930159706e-06, "loss": 0.1969, "step": 5717 }, { "epoch": 0.41, "grad_norm": 1.3469290735108792, "learning_rate": 6.683222184904088e-06, "loss": 0.2216, "step": 5718 }, { "epoch": 0.41, "grad_norm": 1.552237356884769, "learning_rate": 6.682131349299789e-06, "loss": 0.2605, "step": 5719 }, { "epoch": 0.41, "grad_norm": 1.3784043334802591, "learning_rate": 6.681040423405363e-06, "loss": 0.2073, "step": 5720 }, { "epoch": 0.41, "grad_norm": 1.1621381142808658, "learning_rate": 6.6799494072793646e-06, "loss": 0.156, "step": 5721 }, { "epoch": 0.41, "grad_norm": 1.3307904565468147, "learning_rate": 6.678858300980356e-06, "loss": 0.169, "step": 5722 }, { "epoch": 0.41, "grad_norm": 1.2790254033946793, "learning_rate": 6.677767104566904e-06, "loss": 0.1715, "step": 5723 }, { "epoch": 0.41, "grad_norm": 1.3824441325089205, "learning_rate": 6.67667581809758e-06, "loss": 0.1919, "step": 5724 }, { "epoch": 0.41, "grad_norm": 1.280187317096119, "learning_rate": 6.675584441630958e-06, "loss": 0.1869, "step": 5725 }, { "epoch": 0.41, "grad_norm": 1.4456989285244566, "learning_rate": 6.6744929752256215e-06, "loss": 0.2211, "step": 5726 }, { "epoch": 0.41, "grad_norm": 1.2500894170526489, "learning_rate": 6.673401418940152e-06, "loss": 0.1908, "step": 5727 }, { "epoch": 0.41, "grad_norm": 1.357103916117228, "learning_rate": 6.672309772833144e-06, "loss": 0.2045, "step": 5728 }, { "epoch": 0.41, "grad_norm": 1.2699251316660984, "learning_rate": 6.671218036963192e-06, "loss": 0.209, "step": 5729 }, { "epoch": 0.41, "grad_norm": 1.3158350854769476, "learning_rate": 6.670126211388894e-06, "loss": 0.197, "step": 5730 }, { "epoch": 0.41, "grad_norm": 1.4958896619074076, "learning_rate": 6.669034296168855e-06, "loss": 0.1965, "step": 5731 }, { "epoch": 0.41, "grad_norm": 1.4591919793204364, "learning_rate": 6.667942291361687e-06, "loss": 0.2558, "step": 5732 }, { "epoch": 0.41, "grad_norm": 1.4072871061940093, "learning_rate": 6.666850197026003e-06, "loss": 0.1704, "step": 5733 }, { "epoch": 0.41, "grad_norm": 1.110998594507066, "learning_rate": 6.665758013220422e-06, "loss": 0.18, "step": 5734 }, { "epoch": 0.41, "grad_norm": 1.2012612616825202, "learning_rate": 6.66466574000357e-06, "loss": 0.1495, "step": 5735 }, { "epoch": 0.41, "grad_norm": 1.3198798354609544, "learning_rate": 6.663573377434074e-06, "loss": 0.2195, "step": 5736 }, { "epoch": 0.41, "grad_norm": 1.2347107647484115, "learning_rate": 6.662480925570567e-06, "loss": 0.1923, "step": 5737 }, { "epoch": 0.41, "grad_norm": 1.2981680362855919, "learning_rate": 6.6613883844716894e-06, "loss": 0.1684, "step": 5738 }, { "epoch": 0.41, "grad_norm": 1.3114960686925083, "learning_rate": 6.660295754196082e-06, "loss": 0.193, "step": 5739 }, { "epoch": 0.41, "grad_norm": 1.4874291814930212, "learning_rate": 6.659203034802397e-06, "loss": 0.261, "step": 5740 }, { "epoch": 0.41, "grad_norm": 1.2083904418057052, "learning_rate": 6.658110226349284e-06, "loss": 0.1605, "step": 5741 }, { "epoch": 0.41, "grad_norm": 1.362892995611294, "learning_rate": 6.657017328895401e-06, "loss": 0.1785, "step": 5742 }, { "epoch": 0.41, "grad_norm": 1.306355006571541, "learning_rate": 6.65592434249941e-06, "loss": 0.1919, "step": 5743 }, { "epoch": 0.41, "grad_norm": 1.4577068238341349, "learning_rate": 6.6548312672199815e-06, "loss": 0.243, "step": 5744 }, { "epoch": 0.41, "grad_norm": 1.4651108479679416, "learning_rate": 6.653738103115781e-06, "loss": 0.2136, "step": 5745 }, { "epoch": 0.41, "grad_norm": 1.259625944798431, "learning_rate": 6.6526448502454925e-06, "loss": 0.1883, "step": 5746 }, { "epoch": 0.41, "grad_norm": 1.5029829739662313, "learning_rate": 6.651551508667793e-06, "loss": 0.2011, "step": 5747 }, { "epoch": 0.41, "grad_norm": 1.4200453788687677, "learning_rate": 6.650458078441368e-06, "loss": 0.227, "step": 5748 }, { "epoch": 0.41, "grad_norm": 1.3398797259999171, "learning_rate": 6.649364559624912e-06, "loss": 0.2113, "step": 5749 }, { "epoch": 0.41, "grad_norm": 1.2671121825600271, "learning_rate": 6.6482709522771196e-06, "loss": 0.1869, "step": 5750 }, { "epoch": 0.41, "grad_norm": 1.3590177332533206, "learning_rate": 6.647177256456691e-06, "loss": 0.2686, "step": 5751 }, { "epoch": 0.41, "grad_norm": 1.3245832044176813, "learning_rate": 6.646083472222329e-06, "loss": 0.198, "step": 5752 }, { "epoch": 0.41, "grad_norm": 1.457116661497489, "learning_rate": 6.644989599632747e-06, "loss": 0.1844, "step": 5753 }, { "epoch": 0.41, "grad_norm": 1.4003388410510098, "learning_rate": 6.643895638746658e-06, "loss": 0.2018, "step": 5754 }, { "epoch": 0.41, "grad_norm": 1.4306379403620073, "learning_rate": 6.6428015896227826e-06, "loss": 0.2297, "step": 5755 }, { "epoch": 0.41, "grad_norm": 1.2520424959493313, "learning_rate": 6.641707452319845e-06, "loss": 0.1669, "step": 5756 }, { "epoch": 0.41, "grad_norm": 1.458495847628931, "learning_rate": 6.6406132268965716e-06, "loss": 0.1905, "step": 5757 }, { "epoch": 0.41, "grad_norm": 1.4327577804334437, "learning_rate": 6.6395189134117e-06, "loss": 0.2105, "step": 5758 }, { "epoch": 0.41, "grad_norm": 1.5175703626853256, "learning_rate": 6.638424511923965e-06, "loss": 0.2459, "step": 5759 }, { "epoch": 0.41, "grad_norm": 1.30495049377198, "learning_rate": 6.637330022492112e-06, "loss": 0.1746, "step": 5760 }, { "epoch": 0.41, "grad_norm": 1.4175972507024075, "learning_rate": 6.6362354451748876e-06, "loss": 0.2281, "step": 5761 }, { "epoch": 0.41, "grad_norm": 1.6614805020605328, "learning_rate": 6.635140780031047e-06, "loss": 0.2368, "step": 5762 }, { "epoch": 0.41, "grad_norm": 4.860031903398821, "learning_rate": 6.634046027119343e-06, "loss": 0.7272, "step": 5763 }, { "epoch": 0.41, "grad_norm": 1.1892387029159839, "learning_rate": 6.632951186498542e-06, "loss": 0.168, "step": 5764 }, { "epoch": 0.41, "grad_norm": 1.354577102683608, "learning_rate": 6.631856258227406e-06, "loss": 0.2307, "step": 5765 }, { "epoch": 0.41, "grad_norm": 1.434940027389757, "learning_rate": 6.63076124236471e-06, "loss": 0.2427, "step": 5766 }, { "epoch": 0.41, "grad_norm": 1.6738306611269618, "learning_rate": 6.629666138969231e-06, "loss": 0.2289, "step": 5767 }, { "epoch": 0.41, "grad_norm": 1.2407976139071275, "learning_rate": 6.628570948099746e-06, "loss": 0.1705, "step": 5768 }, { "epoch": 0.41, "grad_norm": 1.2711675218171379, "learning_rate": 6.627475669815043e-06, "loss": 0.1932, "step": 5769 }, { "epoch": 0.41, "grad_norm": 1.2918210848770273, "learning_rate": 6.626380304173913e-06, "loss": 0.2035, "step": 5770 }, { "epoch": 0.41, "grad_norm": 1.3993038185093003, "learning_rate": 6.6252848512351475e-06, "loss": 0.2013, "step": 5771 }, { "epoch": 0.41, "grad_norm": 1.481145230863057, "learning_rate": 6.62418931105755e-06, "loss": 0.2196, "step": 5772 }, { "epoch": 0.41, "grad_norm": 1.566701943275558, "learning_rate": 6.623093683699921e-06, "loss": 0.2832, "step": 5773 }, { "epoch": 0.41, "grad_norm": 1.3180289505129423, "learning_rate": 6.621997969221072e-06, "loss": 0.1873, "step": 5774 }, { "epoch": 0.41, "grad_norm": 1.2334155726627893, "learning_rate": 6.620902167679817e-06, "loss": 0.2202, "step": 5775 }, { "epoch": 0.41, "grad_norm": 1.3969341302125762, "learning_rate": 6.6198062791349715e-06, "loss": 0.235, "step": 5776 }, { "epoch": 0.41, "grad_norm": 1.4117737649966786, "learning_rate": 6.618710303645361e-06, "loss": 0.2173, "step": 5777 }, { "epoch": 0.41, "grad_norm": 1.339795476748654, "learning_rate": 6.617614241269811e-06, "loss": 0.2019, "step": 5778 }, { "epoch": 0.41, "grad_norm": 1.2367667482086984, "learning_rate": 6.616518092067155e-06, "loss": 0.1569, "step": 5779 }, { "epoch": 0.41, "grad_norm": 1.4824745373651798, "learning_rate": 6.615421856096231e-06, "loss": 0.2413, "step": 5780 }, { "epoch": 0.41, "grad_norm": 1.5265366193080834, "learning_rate": 6.614325533415879e-06, "loss": 0.218, "step": 5781 }, { "epoch": 0.41, "grad_norm": 4.1946566010934205, "learning_rate": 6.613229124084947e-06, "loss": 0.5893, "step": 5782 }, { "epoch": 0.41, "grad_norm": 1.2337097864822355, "learning_rate": 6.612132628162283e-06, "loss": 0.1676, "step": 5783 }, { "epoch": 0.41, "grad_norm": 1.4905616945451448, "learning_rate": 6.611036045706746e-06, "loss": 0.2212, "step": 5784 }, { "epoch": 0.41, "grad_norm": 4.125834108847531, "learning_rate": 6.609939376777192e-06, "loss": 0.5499, "step": 5785 }, { "epoch": 0.41, "grad_norm": 1.2510642605702738, "learning_rate": 6.60884262143249e-06, "loss": 0.1854, "step": 5786 }, { "epoch": 0.41, "grad_norm": 1.2998781303620168, "learning_rate": 6.607745779731507e-06, "loss": 0.1792, "step": 5787 }, { "epoch": 0.41, "grad_norm": 1.2044164390816559, "learning_rate": 6.606648851733119e-06, "loss": 0.2096, "step": 5788 }, { "epoch": 0.41, "grad_norm": 1.4204717989247857, "learning_rate": 6.6055518374962025e-06, "loss": 0.1722, "step": 5789 }, { "epoch": 0.41, "grad_norm": 1.37736387519301, "learning_rate": 6.604454737079639e-06, "loss": 0.1922, "step": 5790 }, { "epoch": 0.41, "grad_norm": 1.4526356734509236, "learning_rate": 6.603357550542324e-06, "loss": 0.1842, "step": 5791 }, { "epoch": 0.41, "grad_norm": 1.206607547875254, "learning_rate": 6.602260277943143e-06, "loss": 0.1975, "step": 5792 }, { "epoch": 0.41, "grad_norm": 1.2891778931473696, "learning_rate": 6.601162919340998e-06, "loss": 0.2219, "step": 5793 }, { "epoch": 0.41, "grad_norm": 1.3345675469457485, "learning_rate": 6.600065474794786e-06, "loss": 0.226, "step": 5794 }, { "epoch": 0.41, "grad_norm": 1.3385439372006158, "learning_rate": 6.5989679443634165e-06, "loss": 0.1717, "step": 5795 }, { "epoch": 0.41, "grad_norm": 1.2162891827977884, "learning_rate": 6.597870328105801e-06, "loss": 0.1972, "step": 5796 }, { "epoch": 0.41, "grad_norm": 1.3114315083343318, "learning_rate": 6.596772626080854e-06, "loss": 0.1994, "step": 5797 }, { "epoch": 0.41, "grad_norm": 1.3799082848016855, "learning_rate": 6.595674838347496e-06, "loss": 0.2021, "step": 5798 }, { "epoch": 0.41, "grad_norm": 1.3339837257477711, "learning_rate": 6.594576964964652e-06, "loss": 0.2069, "step": 5799 }, { "epoch": 0.41, "grad_norm": 4.373644131742913, "learning_rate": 6.593479005991251e-06, "loss": 0.498, "step": 5800 }, { "epoch": 0.41, "grad_norm": 1.4698861948278468, "learning_rate": 6.592380961486228e-06, "loss": 0.2246, "step": 5801 }, { "epoch": 0.42, "grad_norm": 1.4385713560968467, "learning_rate": 6.591282831508521e-06, "loss": 0.1958, "step": 5802 }, { "epoch": 0.42, "grad_norm": 1.4490409354008162, "learning_rate": 6.590184616117073e-06, "loss": 0.2473, "step": 5803 }, { "epoch": 0.42, "grad_norm": 1.3637450520681709, "learning_rate": 6.5890863153708325e-06, "loss": 0.1981, "step": 5804 }, { "epoch": 0.42, "grad_norm": 1.1372947335089096, "learning_rate": 6.587987929328753e-06, "loss": 0.1718, "step": 5805 }, { "epoch": 0.42, "grad_norm": 1.2763116248491544, "learning_rate": 6.586889458049789e-06, "loss": 0.1752, "step": 5806 }, { "epoch": 0.42, "grad_norm": 1.3777730365982443, "learning_rate": 6.585790901592903e-06, "loss": 0.187, "step": 5807 }, { "epoch": 0.42, "grad_norm": 1.400989115968126, "learning_rate": 6.584692260017062e-06, "loss": 0.2244, "step": 5808 }, { "epoch": 0.42, "grad_norm": 1.455140337920268, "learning_rate": 6.583593533381235e-06, "loss": 0.2154, "step": 5809 }, { "epoch": 0.42, "grad_norm": 6.028481766449438, "learning_rate": 6.582494721744399e-06, "loss": 0.7127, "step": 5810 }, { "epoch": 0.42, "grad_norm": 3.828653839167092, "learning_rate": 6.581395825165534e-06, "loss": 0.4907, "step": 5811 }, { "epoch": 0.42, "grad_norm": 1.2944491358521215, "learning_rate": 6.580296843703623e-06, "loss": 0.1676, "step": 5812 }, { "epoch": 0.42, "grad_norm": 1.288376814289655, "learning_rate": 6.579197777417656e-06, "loss": 0.1917, "step": 5813 }, { "epoch": 0.42, "grad_norm": 1.4308751371931998, "learning_rate": 6.578098626366627e-06, "loss": 0.2248, "step": 5814 }, { "epoch": 0.42, "grad_norm": 1.3307813109395805, "learning_rate": 6.576999390609532e-06, "loss": 0.1887, "step": 5815 }, { "epoch": 0.42, "grad_norm": 1.404066257320106, "learning_rate": 6.575900070205375e-06, "loss": 0.2622, "step": 5816 }, { "epoch": 0.42, "grad_norm": 4.363946171834274, "learning_rate": 6.574800665213162e-06, "loss": 0.4892, "step": 5817 }, { "epoch": 0.42, "grad_norm": 1.1342565562742273, "learning_rate": 6.5737011756919065e-06, "loss": 0.1559, "step": 5818 }, { "epoch": 0.42, "grad_norm": 1.3390629374145653, "learning_rate": 6.572601601700626e-06, "loss": 0.2015, "step": 5819 }, { "epoch": 0.42, "grad_norm": 1.3552363009522392, "learning_rate": 6.571501943298335e-06, "loss": 0.2059, "step": 5820 }, { "epoch": 0.42, "grad_norm": 1.413615809833346, "learning_rate": 6.570402200544066e-06, "loss": 0.2244, "step": 5821 }, { "epoch": 0.42, "grad_norm": 1.726952335686051, "learning_rate": 6.569302373496844e-06, "loss": 0.2532, "step": 5822 }, { "epoch": 0.42, "grad_norm": 1.385709602737147, "learning_rate": 6.568202462215707e-06, "loss": 0.2194, "step": 5823 }, { "epoch": 0.42, "grad_norm": 1.3777678397484279, "learning_rate": 6.567102466759689e-06, "loss": 0.1682, "step": 5824 }, { "epoch": 0.42, "grad_norm": 1.4669834801301234, "learning_rate": 6.5660023871878395e-06, "loss": 0.1959, "step": 5825 }, { "epoch": 0.42, "grad_norm": 6.437592375258112, "learning_rate": 6.5649022235592016e-06, "loss": 0.5079, "step": 5826 }, { "epoch": 0.42, "grad_norm": 1.3516275588362223, "learning_rate": 6.563801975932831e-06, "loss": 0.2358, "step": 5827 }, { "epoch": 0.42, "grad_norm": 1.3645295875037928, "learning_rate": 6.56270164436778e-06, "loss": 0.2029, "step": 5828 }, { "epoch": 0.42, "grad_norm": 1.4078522411267933, "learning_rate": 6.561601228923116e-06, "loss": 0.2348, "step": 5829 }, { "epoch": 0.42, "grad_norm": 1.3644431828728858, "learning_rate": 6.5605007296579e-06, "loss": 0.214, "step": 5830 }, { "epoch": 0.42, "grad_norm": 1.2750978268468776, "learning_rate": 6.559400146631205e-06, "loss": 0.2012, "step": 5831 }, { "epoch": 0.42, "grad_norm": 1.3505032187284072, "learning_rate": 6.558299479902105e-06, "loss": 0.2188, "step": 5832 }, { "epoch": 0.42, "grad_norm": 1.1041574253780873, "learning_rate": 6.55719872952968e-06, "loss": 0.1777, "step": 5833 }, { "epoch": 0.42, "grad_norm": 1.3513393518954229, "learning_rate": 6.556097895573015e-06, "loss": 0.2246, "step": 5834 }, { "epoch": 0.42, "grad_norm": 1.522512949439286, "learning_rate": 6.554996978091195e-06, "loss": 0.1811, "step": 5835 }, { "epoch": 0.42, "grad_norm": 1.2674060529948155, "learning_rate": 6.553895977143316e-06, "loss": 0.22, "step": 5836 }, { "epoch": 0.42, "grad_norm": 1.168520078217555, "learning_rate": 6.552794892788473e-06, "loss": 0.18, "step": 5837 }, { "epoch": 0.42, "grad_norm": 1.4865238283399682, "learning_rate": 6.55169372508577e-06, "loss": 0.237, "step": 5838 }, { "epoch": 0.42, "grad_norm": 1.4676534859000676, "learning_rate": 6.550592474094313e-06, "loss": 0.2596, "step": 5839 }, { "epoch": 0.42, "grad_norm": 1.1856485615316945, "learning_rate": 6.549491139873211e-06, "loss": 0.1789, "step": 5840 }, { "epoch": 0.42, "grad_norm": 1.1907948669397783, "learning_rate": 6.548389722481579e-06, "loss": 0.1832, "step": 5841 }, { "epoch": 0.42, "grad_norm": 1.2804324033510017, "learning_rate": 6.54728822197854e-06, "loss": 0.2482, "step": 5842 }, { "epoch": 0.42, "grad_norm": 1.395626716107591, "learning_rate": 6.546186638423218e-06, "loss": 0.2046, "step": 5843 }, { "epoch": 0.42, "grad_norm": 1.3416283520985277, "learning_rate": 6.545084971874738e-06, "loss": 0.208, "step": 5844 }, { "epoch": 0.42, "grad_norm": 1.2694107596100175, "learning_rate": 6.543983222392234e-06, "loss": 0.1903, "step": 5845 }, { "epoch": 0.42, "grad_norm": 1.1308645061090474, "learning_rate": 6.542881390034846e-06, "loss": 0.176, "step": 5846 }, { "epoch": 0.42, "grad_norm": 1.2070393017771712, "learning_rate": 6.541779474861715e-06, "loss": 0.1758, "step": 5847 }, { "epoch": 0.42, "grad_norm": 1.2109682595934381, "learning_rate": 6.540677476931987e-06, "loss": 0.1598, "step": 5848 }, { "epoch": 0.42, "grad_norm": 1.3516036121659398, "learning_rate": 6.53957539630481e-06, "loss": 0.1809, "step": 5849 }, { "epoch": 0.42, "grad_norm": 1.263059345262111, "learning_rate": 6.5384732330393455e-06, "loss": 0.204, "step": 5850 }, { "epoch": 0.42, "grad_norm": 4.737474395910261, "learning_rate": 6.537370987194749e-06, "loss": 0.4954, "step": 5851 }, { "epoch": 0.42, "grad_norm": 1.3643204377707794, "learning_rate": 6.536268658830185e-06, "loss": 0.2165, "step": 5852 }, { "epoch": 0.42, "grad_norm": 1.396008763518398, "learning_rate": 6.535166248004821e-06, "loss": 0.1951, "step": 5853 }, { "epoch": 0.42, "grad_norm": 1.1608353835904177, "learning_rate": 6.534063754777835e-06, "loss": 0.1839, "step": 5854 }, { "epoch": 0.42, "grad_norm": 1.3736640493188175, "learning_rate": 6.532961179208399e-06, "loss": 0.2163, "step": 5855 }, { "epoch": 0.42, "grad_norm": 1.4782450295109213, "learning_rate": 6.531858521355699e-06, "loss": 0.1798, "step": 5856 }, { "epoch": 0.42, "grad_norm": 1.4002589477576388, "learning_rate": 6.530755781278918e-06, "loss": 0.2173, "step": 5857 }, { "epoch": 0.42, "grad_norm": 4.793937893111318, "learning_rate": 6.529652959037247e-06, "loss": 0.567, "step": 5858 }, { "epoch": 0.42, "grad_norm": 1.2277525259011342, "learning_rate": 6.528550054689884e-06, "loss": 0.1657, "step": 5859 }, { "epoch": 0.42, "grad_norm": 1.4775171224782773, "learning_rate": 6.527447068296026e-06, "loss": 0.2176, "step": 5860 }, { "epoch": 0.42, "grad_norm": 1.3015551534841745, "learning_rate": 6.526343999914877e-06, "loss": 0.2016, "step": 5861 }, { "epoch": 0.42, "grad_norm": 1.5000661439196052, "learning_rate": 6.525240849605646e-06, "loss": 0.1896, "step": 5862 }, { "epoch": 0.42, "grad_norm": 1.2988463326285618, "learning_rate": 6.524137617427546e-06, "loss": 0.2214, "step": 5863 }, { "epoch": 0.42, "grad_norm": 1.463932839366527, "learning_rate": 6.523034303439794e-06, "loss": 0.2005, "step": 5864 }, { "epoch": 0.42, "grad_norm": 1.4553030842357515, "learning_rate": 6.521930907701612e-06, "loss": 0.2342, "step": 5865 }, { "epoch": 0.42, "grad_norm": 6.7450200298018315, "learning_rate": 6.520827430272223e-06, "loss": 0.625, "step": 5866 }, { "epoch": 0.42, "grad_norm": 1.179887560028545, "learning_rate": 6.5197238712108616e-06, "loss": 0.1577, "step": 5867 }, { "epoch": 0.42, "grad_norm": 1.5002743010074722, "learning_rate": 6.51862023057676e-06, "loss": 0.2469, "step": 5868 }, { "epoch": 0.42, "grad_norm": 1.3344591606642755, "learning_rate": 6.517516508429157e-06, "loss": 0.1892, "step": 5869 }, { "epoch": 0.42, "grad_norm": 1.2166895454585263, "learning_rate": 6.5164127048272966e-06, "loss": 0.1681, "step": 5870 }, { "epoch": 0.42, "grad_norm": 1.361521785623413, "learning_rate": 6.515308819830426e-06, "loss": 0.2114, "step": 5871 }, { "epoch": 0.42, "grad_norm": 5.154625262597219, "learning_rate": 6.514204853497799e-06, "loss": 0.6385, "step": 5872 }, { "epoch": 0.42, "grad_norm": 1.2436883536414283, "learning_rate": 6.51310080588867e-06, "loss": 0.1713, "step": 5873 }, { "epoch": 0.42, "grad_norm": 1.38674664660239, "learning_rate": 6.511996677062302e-06, "loss": 0.1874, "step": 5874 }, { "epoch": 0.42, "grad_norm": 1.2431111317813046, "learning_rate": 6.510892467077958e-06, "loss": 0.1966, "step": 5875 }, { "epoch": 0.42, "grad_norm": 1.3116195008222193, "learning_rate": 6.5097881759949096e-06, "loss": 0.177, "step": 5876 }, { "epoch": 0.42, "grad_norm": 1.291090736803952, "learning_rate": 6.508683803872431e-06, "loss": 0.1968, "step": 5877 }, { "epoch": 0.42, "grad_norm": 1.5268017496508894, "learning_rate": 6.5075793507698e-06, "loss": 0.2454, "step": 5878 }, { "epoch": 0.42, "grad_norm": 1.4881959677282703, "learning_rate": 6.506474816746296e-06, "loss": 0.2634, "step": 5879 }, { "epoch": 0.42, "grad_norm": 1.366364070181591, "learning_rate": 6.50537020186121e-06, "loss": 0.2164, "step": 5880 }, { "epoch": 0.42, "grad_norm": 1.327254732488619, "learning_rate": 6.504265506173832e-06, "loss": 0.2157, "step": 5881 }, { "epoch": 0.42, "grad_norm": 1.5096418726092107, "learning_rate": 6.503160729743459e-06, "loss": 0.2195, "step": 5882 }, { "epoch": 0.42, "grad_norm": 1.344942739348234, "learning_rate": 6.502055872629387e-06, "loss": 0.1703, "step": 5883 }, { "epoch": 0.42, "grad_norm": 1.6419840901517802, "learning_rate": 6.500950934890925e-06, "loss": 0.2398, "step": 5884 }, { "epoch": 0.42, "grad_norm": 1.4539572219982062, "learning_rate": 6.499845916587379e-06, "loss": 0.2198, "step": 5885 }, { "epoch": 0.42, "grad_norm": 1.479323759121893, "learning_rate": 6.4987408177780644e-06, "loss": 0.2044, "step": 5886 }, { "epoch": 0.42, "grad_norm": 1.4279422505801262, "learning_rate": 6.497635638522295e-06, "loss": 0.211, "step": 5887 }, { "epoch": 0.42, "grad_norm": 1.2626392367876296, "learning_rate": 6.496530378879396e-06, "loss": 0.1684, "step": 5888 }, { "epoch": 0.42, "grad_norm": 5.426438435200346, "learning_rate": 6.495425038908691e-06, "loss": 0.5226, "step": 5889 }, { "epoch": 0.42, "grad_norm": 1.3809045050154767, "learning_rate": 6.494319618669511e-06, "loss": 0.2114, "step": 5890 }, { "epoch": 0.42, "grad_norm": 1.1858061325324465, "learning_rate": 6.49321411822119e-06, "loss": 0.1758, "step": 5891 }, { "epoch": 0.42, "grad_norm": 1.6024961671804752, "learning_rate": 6.492108537623067e-06, "loss": 0.2311, "step": 5892 }, { "epoch": 0.42, "grad_norm": 1.1899911538664314, "learning_rate": 6.491002876934487e-06, "loss": 0.1906, "step": 5893 }, { "epoch": 0.42, "grad_norm": 1.3188653112616997, "learning_rate": 6.489897136214796e-06, "loss": 0.2242, "step": 5894 }, { "epoch": 0.42, "grad_norm": 1.3973454325963925, "learning_rate": 6.488791315523343e-06, "loss": 0.2072, "step": 5895 }, { "epoch": 0.42, "grad_norm": 1.4019710859358498, "learning_rate": 6.487685414919489e-06, "loss": 0.2449, "step": 5896 }, { "epoch": 0.42, "grad_norm": 1.4006508241178328, "learning_rate": 6.486579434462592e-06, "loss": 0.2021, "step": 5897 }, { "epoch": 0.42, "grad_norm": 1.3857691016848122, "learning_rate": 6.485473374212017e-06, "loss": 0.2022, "step": 5898 }, { "epoch": 0.42, "grad_norm": 1.2222022273123703, "learning_rate": 6.4843672342271315e-06, "loss": 0.2194, "step": 5899 }, { "epoch": 0.42, "grad_norm": 1.2047049380664667, "learning_rate": 6.483261014567311e-06, "loss": 0.1688, "step": 5900 }, { "epoch": 0.42, "grad_norm": 1.227358422999814, "learning_rate": 6.482154715291933e-06, "loss": 0.2142, "step": 5901 }, { "epoch": 0.42, "grad_norm": 1.3329649469811904, "learning_rate": 6.481048336460377e-06, "loss": 0.2104, "step": 5902 }, { "epoch": 0.42, "grad_norm": 1.3695443403163936, "learning_rate": 6.47994187813203e-06, "loss": 0.1804, "step": 5903 }, { "epoch": 0.42, "grad_norm": 1.3203471656623678, "learning_rate": 6.478835340366283e-06, "loss": 0.1864, "step": 5904 }, { "epoch": 0.42, "grad_norm": 1.2227725143288974, "learning_rate": 6.477728723222528e-06, "loss": 0.1873, "step": 5905 }, { "epoch": 0.42, "grad_norm": 1.4888314018696718, "learning_rate": 6.476622026760169e-06, "loss": 0.202, "step": 5906 }, { "epoch": 0.42, "grad_norm": 1.3135846646724263, "learning_rate": 6.475515251038604e-06, "loss": 0.1802, "step": 5907 }, { "epoch": 0.42, "grad_norm": 1.4025241675095252, "learning_rate": 6.4744083961172435e-06, "loss": 0.2086, "step": 5908 }, { "epoch": 0.42, "grad_norm": 4.448966500528644, "learning_rate": 6.473301462055496e-06, "loss": 0.5136, "step": 5909 }, { "epoch": 0.42, "grad_norm": 1.2523462139898633, "learning_rate": 6.4721944489127795e-06, "loss": 0.1761, "step": 5910 }, { "epoch": 0.42, "grad_norm": 1.5759270520669146, "learning_rate": 6.4710873567485154e-06, "loss": 0.21, "step": 5911 }, { "epoch": 0.42, "grad_norm": 1.440067354064942, "learning_rate": 6.469980185622126e-06, "loss": 0.2248, "step": 5912 }, { "epoch": 0.42, "grad_norm": 4.784287688240427, "learning_rate": 6.468872935593038e-06, "loss": 0.5357, "step": 5913 }, { "epoch": 0.42, "grad_norm": 1.2234199934316157, "learning_rate": 6.467765606720689e-06, "loss": 0.2182, "step": 5914 }, { "epoch": 0.42, "grad_norm": 1.3489742132603246, "learning_rate": 6.466658199064512e-06, "loss": 0.1849, "step": 5915 }, { "epoch": 0.42, "grad_norm": 1.2976974628135605, "learning_rate": 6.465550712683949e-06, "loss": 0.2062, "step": 5916 }, { "epoch": 0.42, "grad_norm": 5.173882356417723, "learning_rate": 6.464443147638446e-06, "loss": 0.4705, "step": 5917 }, { "epoch": 0.42, "grad_norm": 1.4845354867133451, "learning_rate": 6.463335503987453e-06, "loss": 0.2432, "step": 5918 }, { "epoch": 0.42, "grad_norm": 4.646708847517072, "learning_rate": 6.462227781790425e-06, "loss": 0.6802, "step": 5919 }, { "epoch": 0.42, "grad_norm": 1.457177906182861, "learning_rate": 6.4611199811068196e-06, "loss": 0.2446, "step": 5920 }, { "epoch": 0.42, "grad_norm": 1.428018628917624, "learning_rate": 6.460012101996095e-06, "loss": 0.1994, "step": 5921 }, { "epoch": 0.42, "grad_norm": 1.326422509604255, "learning_rate": 6.458904144517723e-06, "loss": 0.1991, "step": 5922 }, { "epoch": 0.42, "grad_norm": 1.5753864955116637, "learning_rate": 6.457796108731173e-06, "loss": 0.2182, "step": 5923 }, { "epoch": 0.42, "grad_norm": 1.4946401195482777, "learning_rate": 6.456687994695918e-06, "loss": 0.2192, "step": 5924 }, { "epoch": 0.42, "grad_norm": 1.4298647431191143, "learning_rate": 6.455579802471439e-06, "loss": 0.2256, "step": 5925 }, { "epoch": 0.42, "grad_norm": 1.441258178748346, "learning_rate": 6.4544715321172195e-06, "loss": 0.1882, "step": 5926 }, { "epoch": 0.42, "grad_norm": 5.543330786728296, "learning_rate": 6.453363183692747e-06, "loss": 0.6414, "step": 5927 }, { "epoch": 0.42, "grad_norm": 1.410805166736668, "learning_rate": 6.4522547572575126e-06, "loss": 0.2366, "step": 5928 }, { "epoch": 0.42, "grad_norm": 1.3467241157606815, "learning_rate": 6.45114625287101e-06, "loss": 0.197, "step": 5929 }, { "epoch": 0.42, "grad_norm": 1.5686855818906367, "learning_rate": 6.450037670592744e-06, "loss": 0.1962, "step": 5930 }, { "epoch": 0.42, "grad_norm": 1.2514219546097685, "learning_rate": 6.448929010482215e-06, "loss": 0.1641, "step": 5931 }, { "epoch": 0.42, "grad_norm": 1.3434494673188755, "learning_rate": 6.447820272598935e-06, "loss": 0.2298, "step": 5932 }, { "epoch": 0.42, "grad_norm": 1.192009584164867, "learning_rate": 6.446711457002412e-06, "loss": 0.1944, "step": 5933 }, { "epoch": 0.42, "grad_norm": 1.4215603166585253, "learning_rate": 6.445602563752167e-06, "loss": 0.2271, "step": 5934 }, { "epoch": 0.42, "grad_norm": 1.2040112411460901, "learning_rate": 6.444493592907719e-06, "loss": 0.1744, "step": 5935 }, { "epoch": 0.42, "grad_norm": 1.294529589000353, "learning_rate": 6.443384544528595e-06, "loss": 0.1771, "step": 5936 }, { "epoch": 0.42, "grad_norm": 1.542482111671868, "learning_rate": 6.442275418674321e-06, "loss": 0.2107, "step": 5937 }, { "epoch": 0.42, "grad_norm": 1.4649221220498023, "learning_rate": 6.441166215404432e-06, "loss": 0.253, "step": 5938 }, { "epoch": 0.42, "grad_norm": 1.4157801939047272, "learning_rate": 6.440056934778468e-06, "loss": 0.2253, "step": 5939 }, { "epoch": 0.42, "grad_norm": 1.402721685432459, "learning_rate": 6.4389475768559675e-06, "loss": 0.2394, "step": 5940 }, { "epoch": 0.42, "grad_norm": 1.2252310072851156, "learning_rate": 6.4378381416964785e-06, "loss": 0.1749, "step": 5941 }, { "epoch": 0.43, "grad_norm": 1.4038690190908032, "learning_rate": 6.4367286293595495e-06, "loss": 0.2193, "step": 5942 }, { "epoch": 0.43, "grad_norm": 5.048032419441468, "learning_rate": 6.435619039904737e-06, "loss": 0.6367, "step": 5943 }, { "epoch": 0.43, "grad_norm": 1.3364933807635349, "learning_rate": 6.434509373391596e-06, "loss": 0.2181, "step": 5944 }, { "epoch": 0.43, "grad_norm": 1.2656920943798071, "learning_rate": 6.433399629879693e-06, "loss": 0.2042, "step": 5945 }, { "epoch": 0.43, "grad_norm": 1.3399756767361302, "learning_rate": 6.432289809428589e-06, "loss": 0.218, "step": 5946 }, { "epoch": 0.43, "grad_norm": 1.1010395013712686, "learning_rate": 6.4311799120978614e-06, "loss": 0.1406, "step": 5947 }, { "epoch": 0.43, "grad_norm": 1.2604562260316823, "learning_rate": 6.430069937947081e-06, "loss": 0.1512, "step": 5948 }, { "epoch": 0.43, "grad_norm": 1.3259867563499246, "learning_rate": 6.428959887035828e-06, "loss": 0.185, "step": 5949 }, { "epoch": 0.43, "grad_norm": 1.2893931797495104, "learning_rate": 6.427849759423684e-06, "loss": 0.2268, "step": 5950 }, { "epoch": 0.43, "grad_norm": 1.2955594250160696, "learning_rate": 6.426739555170239e-06, "loss": 0.2291, "step": 5951 }, { "epoch": 0.43, "grad_norm": 1.2122340386448756, "learning_rate": 6.425629274335081e-06, "loss": 0.1915, "step": 5952 }, { "epoch": 0.43, "grad_norm": 1.2990168098314965, "learning_rate": 6.4245189169778086e-06, "loss": 0.1912, "step": 5953 }, { "epoch": 0.43, "grad_norm": 1.2669679137435874, "learning_rate": 6.4234084831580205e-06, "loss": 0.1654, "step": 5954 }, { "epoch": 0.43, "grad_norm": 1.3821924819216012, "learning_rate": 6.4222979729353185e-06, "loss": 0.2187, "step": 5955 }, { "epoch": 0.43, "grad_norm": 1.4880833905000703, "learning_rate": 6.421187386369313e-06, "loss": 0.3013, "step": 5956 }, { "epoch": 0.43, "grad_norm": 1.2788119359227417, "learning_rate": 6.420076723519615e-06, "loss": 0.2234, "step": 5957 }, { "epoch": 0.43, "grad_norm": 1.4379247842890428, "learning_rate": 6.4189659844458385e-06, "loss": 0.226, "step": 5958 }, { "epoch": 0.43, "grad_norm": 1.135747896149007, "learning_rate": 6.4178551692076056e-06, "loss": 0.1781, "step": 5959 }, { "epoch": 0.43, "grad_norm": 1.1950873021654478, "learning_rate": 6.416744277864541e-06, "loss": 0.1814, "step": 5960 }, { "epoch": 0.43, "grad_norm": 1.3915629970627863, "learning_rate": 6.415633310476271e-06, "loss": 0.201, "step": 5961 }, { "epoch": 0.43, "grad_norm": 1.1916977228600398, "learning_rate": 6.41452226710243e-06, "loss": 0.1858, "step": 5962 }, { "epoch": 0.43, "grad_norm": 1.347095480190013, "learning_rate": 6.413411147802652e-06, "loss": 0.1762, "step": 5963 }, { "epoch": 0.43, "grad_norm": 1.3051788588075717, "learning_rate": 6.41229995263658e-06, "loss": 0.1982, "step": 5964 }, { "epoch": 0.43, "grad_norm": 1.2144848564630306, "learning_rate": 6.411188681663857e-06, "loss": 0.2122, "step": 5965 }, { "epoch": 0.43, "grad_norm": 1.2269403931337466, "learning_rate": 6.410077334944132e-06, "loss": 0.2016, "step": 5966 }, { "epoch": 0.43, "grad_norm": 1.2310353015325373, "learning_rate": 6.408965912537056e-06, "loss": 0.1701, "step": 5967 }, { "epoch": 0.43, "grad_norm": 1.3501885632216366, "learning_rate": 6.407854414502289e-06, "loss": 0.1841, "step": 5968 }, { "epoch": 0.43, "grad_norm": 1.266179408155581, "learning_rate": 6.406742840899489e-06, "loss": 0.1945, "step": 5969 }, { "epoch": 0.43, "grad_norm": 1.3041677477887543, "learning_rate": 6.405631191788323e-06, "loss": 0.2229, "step": 5970 }, { "epoch": 0.43, "grad_norm": 1.4609152780374801, "learning_rate": 6.404519467228458e-06, "loss": 0.2135, "step": 5971 }, { "epoch": 0.43, "grad_norm": 1.1262734460938701, "learning_rate": 6.403407667279569e-06, "loss": 0.1441, "step": 5972 }, { "epoch": 0.43, "grad_norm": 1.3759527012886266, "learning_rate": 6.402295792001332e-06, "loss": 0.1999, "step": 5973 }, { "epoch": 0.43, "grad_norm": 1.3399992536264542, "learning_rate": 6.401183841453427e-06, "loss": 0.2162, "step": 5974 }, { "epoch": 0.43, "grad_norm": 1.1887821337767206, "learning_rate": 6.400071815695541e-06, "loss": 0.1759, "step": 5975 }, { "epoch": 0.43, "grad_norm": 1.1884530497356576, "learning_rate": 6.3989597147873606e-06, "loss": 0.1474, "step": 5976 }, { "epoch": 0.43, "grad_norm": 1.2275994458889348, "learning_rate": 6.397847538788581e-06, "loss": 0.164, "step": 5977 }, { "epoch": 0.43, "grad_norm": 1.2987352699326156, "learning_rate": 6.3967352877589e-06, "loss": 0.1868, "step": 5978 }, { "epoch": 0.43, "grad_norm": 1.266851966614568, "learning_rate": 6.3956229617580165e-06, "loss": 0.1767, "step": 5979 }, { "epoch": 0.43, "grad_norm": 1.2813758808086664, "learning_rate": 6.394510560845637e-06, "loss": 0.2219, "step": 5980 }, { "epoch": 0.43, "grad_norm": 1.3552096037884132, "learning_rate": 6.39339808508147e-06, "loss": 0.177, "step": 5981 }, { "epoch": 0.43, "grad_norm": 1.4254453694214366, "learning_rate": 6.392285534525231e-06, "loss": 0.219, "step": 5982 }, { "epoch": 0.43, "grad_norm": 1.206071177567004, "learning_rate": 6.391172909236636e-06, "loss": 0.1727, "step": 5983 }, { "epoch": 0.43, "grad_norm": 1.3689629623887476, "learning_rate": 6.3900602092754034e-06, "loss": 0.1949, "step": 5984 }, { "epoch": 0.43, "grad_norm": 1.3283682590450712, "learning_rate": 6.388947434701265e-06, "loss": 0.223, "step": 5985 }, { "epoch": 0.43, "grad_norm": 1.4366286017005132, "learning_rate": 6.387834585573944e-06, "loss": 0.2099, "step": 5986 }, { "epoch": 0.43, "grad_norm": 1.364550249291559, "learning_rate": 6.386721661953177e-06, "loss": 0.2109, "step": 5987 }, { "epoch": 0.43, "grad_norm": 1.300147407512584, "learning_rate": 6.3856086638986995e-06, "loss": 0.2132, "step": 5988 }, { "epoch": 0.43, "grad_norm": 1.4007614831246213, "learning_rate": 6.3844955914702534e-06, "loss": 0.2088, "step": 5989 }, { "epoch": 0.43, "grad_norm": 1.6843347474779486, "learning_rate": 6.383382444727585e-06, "loss": 0.1897, "step": 5990 }, { "epoch": 0.43, "grad_norm": 5.52461273967348, "learning_rate": 6.382269223730443e-06, "loss": 0.5295, "step": 5991 }, { "epoch": 0.43, "grad_norm": 1.3506725592827098, "learning_rate": 6.381155928538579e-06, "loss": 0.1582, "step": 5992 }, { "epoch": 0.43, "grad_norm": 1.3187605434614629, "learning_rate": 6.380042559211753e-06, "loss": 0.1788, "step": 5993 }, { "epoch": 0.43, "grad_norm": 1.4852857438599727, "learning_rate": 6.378929115809725e-06, "loss": 0.1937, "step": 5994 }, { "epoch": 0.43, "grad_norm": 1.3559121462875783, "learning_rate": 6.37781559839226e-06, "loss": 0.1909, "step": 5995 }, { "epoch": 0.43, "grad_norm": 1.3457770800742648, "learning_rate": 6.376702007019127e-06, "loss": 0.1756, "step": 5996 }, { "epoch": 0.43, "grad_norm": 4.54786787786061, "learning_rate": 6.3755883417501006e-06, "loss": 0.4815, "step": 5997 }, { "epoch": 0.43, "grad_norm": 1.1066497192571227, "learning_rate": 6.374474602644958e-06, "loss": 0.1448, "step": 5998 }, { "epoch": 0.43, "grad_norm": 1.2577465797862053, "learning_rate": 6.373360789763477e-06, "loss": 0.189, "step": 5999 }, { "epoch": 0.43, "grad_norm": 1.2788970309672827, "learning_rate": 6.372246903165445e-06, "loss": 0.2042, "step": 6000 }, { "epoch": 0.43, "grad_norm": 1.3972279532730183, "learning_rate": 6.371132942910652e-06, "loss": 0.2302, "step": 6001 }, { "epoch": 0.43, "grad_norm": 1.503823317905543, "learning_rate": 6.370018909058889e-06, "loss": 0.2363, "step": 6002 }, { "epoch": 0.43, "grad_norm": 1.370060516102682, "learning_rate": 6.368904801669955e-06, "loss": 0.1895, "step": 6003 }, { "epoch": 0.43, "grad_norm": 1.7386443131796494, "learning_rate": 6.3677906208036485e-06, "loss": 0.2125, "step": 6004 }, { "epoch": 0.43, "grad_norm": 5.039571077501851, "learning_rate": 6.366676366519775e-06, "loss": 0.6157, "step": 6005 }, { "epoch": 0.43, "grad_norm": 5.665452647494954, "learning_rate": 6.365562038878145e-06, "loss": 0.5618, "step": 6006 }, { "epoch": 0.43, "grad_norm": 1.5390271453761908, "learning_rate": 6.364447637938569e-06, "loss": 0.2201, "step": 6007 }, { "epoch": 0.43, "grad_norm": 1.3523826531813135, "learning_rate": 6.363333163760865e-06, "loss": 0.2091, "step": 6008 }, { "epoch": 0.43, "grad_norm": 1.4781381140183742, "learning_rate": 6.362218616404854e-06, "loss": 0.2064, "step": 6009 }, { "epoch": 0.43, "grad_norm": 1.1729211448378916, "learning_rate": 6.361103995930357e-06, "loss": 0.1736, "step": 6010 }, { "epoch": 0.43, "grad_norm": 5.8514395666538705, "learning_rate": 6.3599893023972056e-06, "loss": 0.5345, "step": 6011 }, { "epoch": 0.43, "grad_norm": 1.2529744461132255, "learning_rate": 6.358874535865233e-06, "loss": 0.2249, "step": 6012 }, { "epoch": 0.43, "grad_norm": 1.2752063477226305, "learning_rate": 6.3577596963942725e-06, "loss": 0.182, "step": 6013 }, { "epoch": 0.43, "grad_norm": 1.3158678118337033, "learning_rate": 6.356644784044164e-06, "loss": 0.16, "step": 6014 }, { "epoch": 0.43, "grad_norm": 1.3945931081818745, "learning_rate": 6.355529798874757e-06, "loss": 0.1989, "step": 6015 }, { "epoch": 0.43, "grad_norm": 1.3473817810635123, "learning_rate": 6.354414740945894e-06, "loss": 0.2294, "step": 6016 }, { "epoch": 0.43, "grad_norm": 1.2162395444267329, "learning_rate": 6.353299610317427e-06, "loss": 0.1779, "step": 6017 }, { "epoch": 0.43, "grad_norm": 1.4712114430649834, "learning_rate": 6.352184407049214e-06, "loss": 0.2113, "step": 6018 }, { "epoch": 0.43, "grad_norm": 1.4251986494226967, "learning_rate": 6.351069131201115e-06, "loss": 0.2152, "step": 6019 }, { "epoch": 0.43, "grad_norm": 1.3055438377445368, "learning_rate": 6.349953782832991e-06, "loss": 0.1581, "step": 6020 }, { "epoch": 0.43, "grad_norm": 1.1280743247758633, "learning_rate": 6.348838362004713e-06, "loss": 0.1541, "step": 6021 }, { "epoch": 0.43, "grad_norm": 1.3732847305084301, "learning_rate": 6.347722868776148e-06, "loss": 0.1814, "step": 6022 }, { "epoch": 0.43, "grad_norm": 1.3943803096345815, "learning_rate": 6.346607303207176e-06, "loss": 0.2308, "step": 6023 }, { "epoch": 0.43, "grad_norm": 1.2452937287808699, "learning_rate": 6.345491665357673e-06, "loss": 0.1704, "step": 6024 }, { "epoch": 0.43, "grad_norm": 1.3581319366717806, "learning_rate": 6.344375955287523e-06, "loss": 0.2247, "step": 6025 }, { "epoch": 0.43, "grad_norm": 1.491029662219368, "learning_rate": 6.343260173056612e-06, "loss": 0.2472, "step": 6026 }, { "epoch": 0.43, "grad_norm": 1.309496566040985, "learning_rate": 6.34214431872483e-06, "loss": 0.2111, "step": 6027 }, { "epoch": 0.43, "grad_norm": 1.3164834825764118, "learning_rate": 6.341028392352076e-06, "loss": 0.1957, "step": 6028 }, { "epoch": 0.43, "grad_norm": 4.8440888161367095, "learning_rate": 6.339912393998244e-06, "loss": 0.6598, "step": 6029 }, { "epoch": 0.43, "grad_norm": 1.272016747790002, "learning_rate": 6.338796323723236e-06, "loss": 0.1969, "step": 6030 }, { "epoch": 0.43, "grad_norm": 1.3960289170959554, "learning_rate": 6.337680181586963e-06, "loss": 0.2064, "step": 6031 }, { "epoch": 0.43, "grad_norm": 1.4520561618171652, "learning_rate": 6.336563967649332e-06, "loss": 0.237, "step": 6032 }, { "epoch": 0.43, "grad_norm": 4.35469442577264, "learning_rate": 6.335447681970255e-06, "loss": 0.6443, "step": 6033 }, { "epoch": 0.43, "grad_norm": 1.4395958848140078, "learning_rate": 6.334331324609654e-06, "loss": 0.2034, "step": 6034 }, { "epoch": 0.43, "grad_norm": 1.367556922101954, "learning_rate": 6.3332148956274466e-06, "loss": 0.1539, "step": 6035 }, { "epoch": 0.43, "grad_norm": 1.206265415641196, "learning_rate": 6.332098395083562e-06, "loss": 0.1566, "step": 6036 }, { "epoch": 0.43, "grad_norm": 1.1895328825014628, "learning_rate": 6.330981823037926e-06, "loss": 0.1875, "step": 6037 }, { "epoch": 0.43, "grad_norm": 1.3440759949805046, "learning_rate": 6.329865179550475e-06, "loss": 0.1783, "step": 6038 }, { "epoch": 0.43, "grad_norm": 1.376585951856802, "learning_rate": 6.328748464681143e-06, "loss": 0.1949, "step": 6039 }, { "epoch": 0.43, "grad_norm": 1.1629048279951808, "learning_rate": 6.327631678489874e-06, "loss": 0.153, "step": 6040 }, { "epoch": 0.43, "grad_norm": 1.3623472418213103, "learning_rate": 6.32651482103661e-06, "loss": 0.2241, "step": 6041 }, { "epoch": 0.43, "grad_norm": 1.2903805178978347, "learning_rate": 6.325397892381301e-06, "loss": 0.1918, "step": 6042 }, { "epoch": 0.43, "grad_norm": 1.3277787323291077, "learning_rate": 6.324280892583896e-06, "loss": 0.1947, "step": 6043 }, { "epoch": 0.43, "grad_norm": 1.188570791239475, "learning_rate": 6.323163821704357e-06, "loss": 0.1688, "step": 6044 }, { "epoch": 0.43, "grad_norm": 1.3774756657643852, "learning_rate": 6.322046679802639e-06, "loss": 0.2389, "step": 6045 }, { "epoch": 0.43, "grad_norm": 1.5206647968224447, "learning_rate": 6.320929466938709e-06, "loss": 0.2466, "step": 6046 }, { "epoch": 0.43, "grad_norm": 1.5675209041491782, "learning_rate": 6.319812183172531e-06, "loss": 0.2493, "step": 6047 }, { "epoch": 0.43, "grad_norm": 1.5112712767834573, "learning_rate": 6.3186948285640795e-06, "loss": 0.1908, "step": 6048 }, { "epoch": 0.43, "grad_norm": 1.346116175191913, "learning_rate": 6.317577403173328e-06, "loss": 0.1958, "step": 6049 }, { "epoch": 0.43, "grad_norm": 5.479828707900571, "learning_rate": 6.316459907060257e-06, "loss": 0.7712, "step": 6050 }, { "epoch": 0.43, "grad_norm": 1.3213781213243325, "learning_rate": 6.315342340284847e-06, "loss": 0.2467, "step": 6051 }, { "epoch": 0.43, "grad_norm": 1.5808386537163082, "learning_rate": 6.3142247029070855e-06, "loss": 0.1988, "step": 6052 }, { "epoch": 0.43, "grad_norm": 1.3449799290707891, "learning_rate": 6.313106994986965e-06, "loss": 0.174, "step": 6053 }, { "epoch": 0.43, "grad_norm": 1.5131614861257754, "learning_rate": 6.311989216584476e-06, "loss": 0.2302, "step": 6054 }, { "epoch": 0.43, "grad_norm": 1.145758856149474, "learning_rate": 6.310871367759619e-06, "loss": 0.1351, "step": 6055 }, { "epoch": 0.43, "grad_norm": 1.3241803729449049, "learning_rate": 6.309753448572394e-06, "loss": 0.2086, "step": 6056 }, { "epoch": 0.43, "grad_norm": 1.3726814861839929, "learning_rate": 6.308635459082808e-06, "loss": 0.2234, "step": 6057 }, { "epoch": 0.43, "grad_norm": 1.3933392060361727, "learning_rate": 6.3075173993508684e-06, "loss": 0.2267, "step": 6058 }, { "epoch": 0.43, "grad_norm": 1.2764532389853296, "learning_rate": 6.306399269436591e-06, "loss": 0.2342, "step": 6059 }, { "epoch": 0.43, "grad_norm": 5.168912319889708, "learning_rate": 6.305281069399989e-06, "loss": 0.8818, "step": 6060 }, { "epoch": 0.43, "grad_norm": 1.3707749990702702, "learning_rate": 6.3041627993010855e-06, "loss": 0.2187, "step": 6061 }, { "epoch": 0.43, "grad_norm": 1.4589728663563595, "learning_rate": 6.303044459199905e-06, "loss": 0.27, "step": 6062 }, { "epoch": 0.43, "grad_norm": 1.4452196504646069, "learning_rate": 6.301926049156475e-06, "loss": 0.2253, "step": 6063 }, { "epoch": 0.43, "grad_norm": 1.4641511553846807, "learning_rate": 6.300807569230825e-06, "loss": 0.1943, "step": 6064 }, { "epoch": 0.43, "grad_norm": 5.137920974039288, "learning_rate": 6.299689019482993e-06, "loss": 0.5305, "step": 6065 }, { "epoch": 0.43, "grad_norm": 4.217361603368205, "learning_rate": 6.298570399973017e-06, "loss": 0.4738, "step": 6066 }, { "epoch": 0.43, "grad_norm": 1.758242731517963, "learning_rate": 6.297451710760944e-06, "loss": 0.2013, "step": 6067 }, { "epoch": 0.43, "grad_norm": 8.158738996363695, "learning_rate": 6.296332951906815e-06, "loss": 0.7065, "step": 6068 }, { "epoch": 0.43, "grad_norm": 1.4595291321970978, "learning_rate": 6.2952141234706835e-06, "loss": 0.2189, "step": 6069 }, { "epoch": 0.43, "grad_norm": 1.2934768057145665, "learning_rate": 6.294095225512604e-06, "loss": 0.2, "step": 6070 }, { "epoch": 0.43, "grad_norm": 1.4605990474584731, "learning_rate": 6.292976258092635e-06, "loss": 0.1858, "step": 6071 }, { "epoch": 0.43, "grad_norm": 1.4418245200721458, "learning_rate": 6.291857221270836e-06, "loss": 0.2087, "step": 6072 }, { "epoch": 0.43, "grad_norm": 1.3168402606578098, "learning_rate": 6.290738115107273e-06, "loss": 0.2103, "step": 6073 }, { "epoch": 0.43, "grad_norm": 1.390301597819154, "learning_rate": 6.2896189396620165e-06, "loss": 0.2359, "step": 6074 }, { "epoch": 0.43, "grad_norm": 1.658191379068836, "learning_rate": 6.288499694995139e-06, "loss": 0.2555, "step": 6075 }, { "epoch": 0.43, "grad_norm": 1.4754928777384115, "learning_rate": 6.2873803811667164e-06, "loss": 0.1822, "step": 6076 }, { "epoch": 0.43, "grad_norm": 1.480901366231782, "learning_rate": 6.286260998236829e-06, "loss": 0.2364, "step": 6077 }, { "epoch": 0.43, "grad_norm": 1.3909388935933629, "learning_rate": 6.2851415462655625e-06, "loss": 0.2456, "step": 6078 }, { "epoch": 0.43, "grad_norm": 1.5305797781282102, "learning_rate": 6.284022025313003e-06, "loss": 0.2374, "step": 6079 }, { "epoch": 0.43, "grad_norm": 1.457662784771119, "learning_rate": 6.282902435439242e-06, "loss": 0.2442, "step": 6080 }, { "epoch": 0.43, "grad_norm": 1.3925048699359455, "learning_rate": 6.281782776704375e-06, "loss": 0.2241, "step": 6081 }, { "epoch": 0.44, "grad_norm": 3.7416384937177707, "learning_rate": 6.2806630491685e-06, "loss": 0.7541, "step": 6082 }, { "epoch": 0.44, "grad_norm": 1.408815079810787, "learning_rate": 6.279543252891722e-06, "loss": 0.2092, "step": 6083 }, { "epoch": 0.44, "grad_norm": 1.396629402432976, "learning_rate": 6.278423387934145e-06, "loss": 0.2411, "step": 6084 }, { "epoch": 0.44, "grad_norm": 1.2451831326037093, "learning_rate": 6.277303454355878e-06, "loss": 0.1866, "step": 6085 }, { "epoch": 0.44, "grad_norm": 1.0819483604290343, "learning_rate": 6.276183452217038e-06, "loss": 0.1477, "step": 6086 }, { "epoch": 0.44, "grad_norm": 1.5559081183368948, "learning_rate": 6.2750633815777395e-06, "loss": 0.2522, "step": 6087 }, { "epoch": 0.44, "grad_norm": 1.4025796562908368, "learning_rate": 6.273943242498105e-06, "loss": 0.2384, "step": 6088 }, { "epoch": 0.44, "grad_norm": 1.3885648019350323, "learning_rate": 6.272823035038257e-06, "loss": 0.2022, "step": 6089 }, { "epoch": 0.44, "grad_norm": 1.4706824483661423, "learning_rate": 6.2717027592583255e-06, "loss": 0.2, "step": 6090 }, { "epoch": 0.44, "grad_norm": 1.4267343312064267, "learning_rate": 6.270582415218442e-06, "loss": 0.1925, "step": 6091 }, { "epoch": 0.44, "grad_norm": 1.172331016913087, "learning_rate": 6.269462002978743e-06, "loss": 0.1802, "step": 6092 }, { "epoch": 0.44, "grad_norm": 1.261669885250662, "learning_rate": 6.268341522599366e-06, "loss": 0.1751, "step": 6093 }, { "epoch": 0.44, "grad_norm": 1.1600762773277573, "learning_rate": 6.267220974140456e-06, "loss": 0.1419, "step": 6094 }, { "epoch": 0.44, "grad_norm": 1.4545003234020253, "learning_rate": 6.266100357662158e-06, "loss": 0.2117, "step": 6095 }, { "epoch": 0.44, "grad_norm": 4.126817450825488, "learning_rate": 6.2649796732246226e-06, "loss": 0.6336, "step": 6096 }, { "epoch": 0.44, "grad_norm": 5.981721024362856, "learning_rate": 6.263858920888003e-06, "loss": 0.6032, "step": 6097 }, { "epoch": 0.44, "grad_norm": 1.3231552146704568, "learning_rate": 6.262738100712458e-06, "loss": 0.1853, "step": 6098 }, { "epoch": 0.44, "grad_norm": 1.4078309506930746, "learning_rate": 6.261617212758148e-06, "loss": 0.1914, "step": 6099 }, { "epoch": 0.44, "grad_norm": 1.4980321479181489, "learning_rate": 6.26049625708524e-06, "loss": 0.2265, "step": 6100 }, { "epoch": 0.44, "grad_norm": 1.3244830573180066, "learning_rate": 6.259375233753901e-06, "loss": 0.2024, "step": 6101 }, { "epoch": 0.44, "grad_norm": 1.679345222315573, "learning_rate": 6.258254142824299e-06, "loss": 0.2545, "step": 6102 }, { "epoch": 0.44, "grad_norm": 1.3277166982723225, "learning_rate": 6.257132984356616e-06, "loss": 0.1807, "step": 6103 }, { "epoch": 0.44, "grad_norm": 1.1656771841958482, "learning_rate": 6.256011758411029e-06, "loss": 0.1536, "step": 6104 }, { "epoch": 0.44, "grad_norm": 1.4484315046107583, "learning_rate": 6.254890465047721e-06, "loss": 0.2322, "step": 6105 }, { "epoch": 0.44, "grad_norm": 1.333120438361033, "learning_rate": 6.253769104326877e-06, "loss": 0.1772, "step": 6106 }, { "epoch": 0.44, "grad_norm": 1.3073745304785707, "learning_rate": 6.252647676308687e-06, "loss": 0.1582, "step": 6107 }, { "epoch": 0.44, "grad_norm": 1.4194973406503713, "learning_rate": 6.251526181053349e-06, "loss": 0.2508, "step": 6108 }, { "epoch": 0.44, "grad_norm": 1.40369981239689, "learning_rate": 6.250404618621058e-06, "loss": 0.2187, "step": 6109 }, { "epoch": 0.44, "grad_norm": 1.359033950143654, "learning_rate": 6.249282989072013e-06, "loss": 0.1966, "step": 6110 }, { "epoch": 0.44, "grad_norm": 1.3879305257460344, "learning_rate": 6.248161292466419e-06, "loss": 0.2077, "step": 6111 }, { "epoch": 0.44, "grad_norm": 1.2953763457714846, "learning_rate": 6.247039528864488e-06, "loss": 0.1887, "step": 6112 }, { "epoch": 0.44, "grad_norm": 1.301919732019173, "learning_rate": 6.245917698326429e-06, "loss": 0.1953, "step": 6113 }, { "epoch": 0.44, "grad_norm": 1.2076293467702066, "learning_rate": 6.244795800912458e-06, "loss": 0.189, "step": 6114 }, { "epoch": 0.44, "grad_norm": 4.350264703496325, "learning_rate": 6.243673836682791e-06, "loss": 0.5526, "step": 6115 }, { "epoch": 0.44, "grad_norm": 1.2103954099887813, "learning_rate": 6.242551805697655e-06, "loss": 0.1607, "step": 6116 }, { "epoch": 0.44, "grad_norm": 1.329998748756925, "learning_rate": 6.241429708017276e-06, "loss": 0.1592, "step": 6117 }, { "epoch": 0.44, "grad_norm": 1.425673407956938, "learning_rate": 6.2403075437018804e-06, "loss": 0.2315, "step": 6118 }, { "epoch": 0.44, "grad_norm": 1.2871733217204477, "learning_rate": 6.239185312811703e-06, "loss": 0.1977, "step": 6119 }, { "epoch": 0.44, "grad_norm": 1.3576584400873155, "learning_rate": 6.238063015406982e-06, "loss": 0.2047, "step": 6120 }, { "epoch": 0.44, "grad_norm": 4.646365886454608, "learning_rate": 6.236940651547957e-06, "loss": 0.4971, "step": 6121 }, { "epoch": 0.44, "grad_norm": 5.673272147120256, "learning_rate": 6.235818221294871e-06, "loss": 0.6871, "step": 6122 }, { "epoch": 0.44, "grad_norm": 1.3710451409448157, "learning_rate": 6.234695724707973e-06, "loss": 0.215, "step": 6123 }, { "epoch": 0.44, "grad_norm": 10.24275454631224, "learning_rate": 6.233573161847515e-06, "loss": 0.6136, "step": 6124 }, { "epoch": 0.44, "grad_norm": 1.3173447842887431, "learning_rate": 6.232450532773749e-06, "loss": 0.2116, "step": 6125 }, { "epoch": 0.44, "grad_norm": 1.223449819969378, "learning_rate": 6.231327837546935e-06, "loss": 0.1865, "step": 6126 }, { "epoch": 0.44, "grad_norm": 4.206800843035456, "learning_rate": 6.230205076227334e-06, "loss": 0.572, "step": 6127 }, { "epoch": 0.44, "grad_norm": 1.546131691032404, "learning_rate": 6.229082248875212e-06, "loss": 0.2109, "step": 6128 }, { "epoch": 0.44, "grad_norm": 1.2287647396310375, "learning_rate": 6.227959355550838e-06, "loss": 0.1658, "step": 6129 }, { "epoch": 0.44, "grad_norm": 1.3238274378728416, "learning_rate": 6.226836396314484e-06, "loss": 0.1658, "step": 6130 }, { "epoch": 0.44, "grad_norm": 1.8058051582682093, "learning_rate": 6.225713371226427e-06, "loss": 0.254, "step": 6131 }, { "epoch": 0.44, "grad_norm": 1.5235175658254225, "learning_rate": 6.224590280346944e-06, "loss": 0.2128, "step": 6132 }, { "epoch": 0.44, "grad_norm": 1.5450292786024251, "learning_rate": 6.223467123736321e-06, "loss": 0.2081, "step": 6133 }, { "epoch": 0.44, "grad_norm": 1.4204993746034242, "learning_rate": 6.222343901454844e-06, "loss": 0.2162, "step": 6134 }, { "epoch": 0.44, "grad_norm": 1.2087701624518392, "learning_rate": 6.221220613562802e-06, "loss": 0.1741, "step": 6135 }, { "epoch": 0.44, "grad_norm": 1.3617109027969951, "learning_rate": 6.220097260120488e-06, "loss": 0.1869, "step": 6136 }, { "epoch": 0.44, "grad_norm": 1.289692806054038, "learning_rate": 6.218973841188202e-06, "loss": 0.1996, "step": 6137 }, { "epoch": 0.44, "grad_norm": 1.21172292445162, "learning_rate": 6.217850356826242e-06, "loss": 0.1946, "step": 6138 }, { "epoch": 0.44, "grad_norm": 1.4087152832187702, "learning_rate": 6.216726807094914e-06, "loss": 0.2207, "step": 6139 }, { "epoch": 0.44, "grad_norm": 1.362987376426746, "learning_rate": 6.215603192054523e-06, "loss": 0.1787, "step": 6140 }, { "epoch": 0.44, "grad_norm": 1.3603964348432152, "learning_rate": 6.2144795117653835e-06, "loss": 0.2139, "step": 6141 }, { "epoch": 0.44, "grad_norm": 1.3756707903231142, "learning_rate": 6.213355766287808e-06, "loss": 0.171, "step": 6142 }, { "epoch": 0.44, "grad_norm": 1.1973063848281436, "learning_rate": 6.212231955682115e-06, "loss": 0.1818, "step": 6143 }, { "epoch": 0.44, "grad_norm": 1.5103461390608002, "learning_rate": 6.211108080008627e-06, "loss": 0.2559, "step": 6144 }, { "epoch": 0.44, "grad_norm": 3.954328025773266, "learning_rate": 6.20998413932767e-06, "loss": 0.6369, "step": 6145 }, { "epoch": 0.44, "grad_norm": 5.765131265325706, "learning_rate": 6.2088601336995705e-06, "loss": 0.5469, "step": 6146 }, { "epoch": 0.44, "grad_norm": 1.2826158108561603, "learning_rate": 6.207736063184661e-06, "loss": 0.1886, "step": 6147 }, { "epoch": 0.44, "grad_norm": 5.443263937670081, "learning_rate": 6.206611927843277e-06, "loss": 0.6505, "step": 6148 }, { "epoch": 0.44, "grad_norm": 1.3189733136217026, "learning_rate": 6.205487727735759e-06, "loss": 0.2324, "step": 6149 }, { "epoch": 0.44, "grad_norm": 1.241002664653362, "learning_rate": 6.20436346292245e-06, "loss": 0.1907, "step": 6150 }, { "epoch": 0.44, "grad_norm": 1.8471270765250267, "learning_rate": 6.203239133463695e-06, "loss": 0.1875, "step": 6151 }, { "epoch": 0.44, "grad_norm": 1.4086430969684023, "learning_rate": 6.202114739419843e-06, "loss": 0.2146, "step": 6152 }, { "epoch": 0.44, "grad_norm": 1.1660663473024138, "learning_rate": 6.200990280851247e-06, "loss": 0.1915, "step": 6153 }, { "epoch": 0.44, "grad_norm": 1.3683259942529886, "learning_rate": 6.199865757818264e-06, "loss": 0.203, "step": 6154 }, { "epoch": 0.44, "grad_norm": 1.1616775809018711, "learning_rate": 6.198741170381255e-06, "loss": 0.1775, "step": 6155 }, { "epoch": 0.44, "grad_norm": 1.4802348407405437, "learning_rate": 6.1976165186005825e-06, "loss": 0.1963, "step": 6156 }, { "epoch": 0.44, "grad_norm": 1.405109664397268, "learning_rate": 6.19649180253661e-06, "loss": 0.199, "step": 6157 }, { "epoch": 0.44, "grad_norm": 1.1960728828482798, "learning_rate": 6.195367022249714e-06, "loss": 0.1956, "step": 6158 }, { "epoch": 0.44, "grad_norm": 1.424632469178128, "learning_rate": 6.194242177800265e-06, "loss": 0.2227, "step": 6159 }, { "epoch": 0.44, "grad_norm": 1.3099422770056406, "learning_rate": 6.1931172692486405e-06, "loss": 0.1807, "step": 6160 }, { "epoch": 0.44, "grad_norm": 1.3783965786067665, "learning_rate": 6.191992296655219e-06, "loss": 0.1773, "step": 6161 }, { "epoch": 0.44, "grad_norm": 1.4164351520790772, "learning_rate": 6.190867260080387e-06, "loss": 0.2208, "step": 6162 }, { "epoch": 0.44, "grad_norm": 1.2331184457132873, "learning_rate": 6.189742159584533e-06, "loss": 0.1925, "step": 6163 }, { "epoch": 0.44, "grad_norm": 1.2115048258332202, "learning_rate": 6.188616995228046e-06, "loss": 0.2144, "step": 6164 }, { "epoch": 0.44, "grad_norm": 1.2972706704868169, "learning_rate": 6.187491767071321e-06, "loss": 0.2259, "step": 6165 }, { "epoch": 0.44, "grad_norm": 1.403564026534955, "learning_rate": 6.186366475174754e-06, "loss": 0.2168, "step": 6166 }, { "epoch": 0.44, "grad_norm": 1.36688384725241, "learning_rate": 6.185241119598749e-06, "loss": 0.1912, "step": 6167 }, { "epoch": 0.44, "grad_norm": 1.5636020618240198, "learning_rate": 6.18411570040371e-06, "loss": 0.2447, "step": 6168 }, { "epoch": 0.44, "grad_norm": 1.4533881277320841, "learning_rate": 6.182990217650045e-06, "loss": 0.2063, "step": 6169 }, { "epoch": 0.44, "grad_norm": 1.4702121720775534, "learning_rate": 6.1818646713981626e-06, "loss": 0.1927, "step": 6170 }, { "epoch": 0.44, "grad_norm": 1.4412977528362718, "learning_rate": 6.180739061708481e-06, "loss": 0.2275, "step": 6171 }, { "epoch": 0.44, "grad_norm": 1.3760067997333865, "learning_rate": 6.179613388641419e-06, "loss": 0.2098, "step": 6172 }, { "epoch": 0.44, "grad_norm": 1.3991209815623915, "learning_rate": 6.178487652257395e-06, "loss": 0.2252, "step": 6173 }, { "epoch": 0.44, "grad_norm": 5.851941936343868, "learning_rate": 6.177361852616834e-06, "loss": 0.5485, "step": 6174 }, { "epoch": 0.44, "grad_norm": 6.036157039773243, "learning_rate": 6.176235989780169e-06, "loss": 0.5849, "step": 6175 }, { "epoch": 0.44, "grad_norm": 1.3042836319967661, "learning_rate": 6.175110063807829e-06, "loss": 0.2073, "step": 6176 }, { "epoch": 0.44, "grad_norm": 1.2279721196528364, "learning_rate": 6.173984074760249e-06, "loss": 0.2118, "step": 6177 }, { "epoch": 0.44, "grad_norm": 1.4152578089982346, "learning_rate": 6.1728580226978656e-06, "loss": 0.1845, "step": 6178 }, { "epoch": 0.44, "grad_norm": 1.2402171604683756, "learning_rate": 6.171731907681124e-06, "loss": 0.1738, "step": 6179 }, { "epoch": 0.44, "grad_norm": 1.358663751974876, "learning_rate": 6.17060572977047e-06, "loss": 0.2181, "step": 6180 }, { "epoch": 0.44, "grad_norm": 1.2218877796487795, "learning_rate": 6.169479489026351e-06, "loss": 0.1481, "step": 6181 }, { "epoch": 0.44, "grad_norm": 1.3014885595741474, "learning_rate": 6.168353185509217e-06, "loss": 0.199, "step": 6182 }, { "epoch": 0.44, "grad_norm": 1.3369567669595106, "learning_rate": 6.1672268192795285e-06, "loss": 0.1955, "step": 6183 }, { "epoch": 0.44, "grad_norm": 1.2670428442030366, "learning_rate": 6.1661003903977405e-06, "loss": 0.1896, "step": 6184 }, { "epoch": 0.44, "grad_norm": 1.4355153720215206, "learning_rate": 6.164973898924315e-06, "loss": 0.1974, "step": 6185 }, { "epoch": 0.44, "grad_norm": 1.324647936278329, "learning_rate": 6.1638473449197185e-06, "loss": 0.2015, "step": 6186 }, { "epoch": 0.44, "grad_norm": 1.20759406732928, "learning_rate": 6.162720728444422e-06, "loss": 0.1575, "step": 6187 }, { "epoch": 0.44, "grad_norm": 1.2511800565545457, "learning_rate": 6.1615940495588955e-06, "loss": 0.2126, "step": 6188 }, { "epoch": 0.44, "grad_norm": 1.5014285751933938, "learning_rate": 6.160467308323616e-06, "loss": 0.2076, "step": 6189 }, { "epoch": 0.44, "grad_norm": 1.340871336596143, "learning_rate": 6.159340504799059e-06, "loss": 0.2168, "step": 6190 }, { "epoch": 0.44, "grad_norm": 1.2616066255345741, "learning_rate": 6.158213639045712e-06, "loss": 0.2553, "step": 6191 }, { "epoch": 0.44, "grad_norm": 1.3510305597489163, "learning_rate": 6.157086711124057e-06, "loss": 0.1799, "step": 6192 }, { "epoch": 0.44, "grad_norm": 1.4910693978490932, "learning_rate": 6.155959721094585e-06, "loss": 0.1985, "step": 6193 }, { "epoch": 0.44, "grad_norm": 1.379994434569168, "learning_rate": 6.154832669017787e-06, "loss": 0.245, "step": 6194 }, { "epoch": 0.44, "grad_norm": 1.4038194192086932, "learning_rate": 6.153705554954158e-06, "loss": 0.2156, "step": 6195 }, { "epoch": 0.44, "grad_norm": 1.3806045759435384, "learning_rate": 6.1525783789642e-06, "loss": 0.2084, "step": 6196 }, { "epoch": 0.44, "grad_norm": 1.3250138340182083, "learning_rate": 6.1514511411084134e-06, "loss": 0.1962, "step": 6197 }, { "epoch": 0.44, "grad_norm": 1.4094631144210183, "learning_rate": 6.150323841447304e-06, "loss": 0.2415, "step": 6198 }, { "epoch": 0.44, "grad_norm": 1.421841611223444, "learning_rate": 6.1491964800413795e-06, "loss": 0.2101, "step": 6199 }, { "epoch": 0.44, "grad_norm": 1.3832341057798752, "learning_rate": 6.1480690569511545e-06, "loss": 0.2275, "step": 6200 }, { "epoch": 0.44, "grad_norm": 1.392889850897278, "learning_rate": 6.146941572237144e-06, "loss": 0.2147, "step": 6201 }, { "epoch": 0.44, "grad_norm": 1.217405760422023, "learning_rate": 6.145814025959866e-06, "loss": 0.1822, "step": 6202 }, { "epoch": 0.44, "grad_norm": 1.5699487203119262, "learning_rate": 6.144686418179844e-06, "loss": 0.2452, "step": 6203 }, { "epoch": 0.44, "grad_norm": 1.366077704621606, "learning_rate": 6.143558748957601e-06, "loss": 0.2108, "step": 6204 }, { "epoch": 0.44, "grad_norm": 1.4732859544911876, "learning_rate": 6.14243101835367e-06, "loss": 0.2477, "step": 6205 }, { "epoch": 0.44, "grad_norm": 1.3315202482635133, "learning_rate": 6.141303226428579e-06, "loss": 0.191, "step": 6206 }, { "epoch": 0.44, "grad_norm": 1.2482294060093437, "learning_rate": 6.140175373242865e-06, "loss": 0.1996, "step": 6207 }, { "epoch": 0.44, "grad_norm": 1.3569112411065822, "learning_rate": 6.139047458857066e-06, "loss": 0.1976, "step": 6208 }, { "epoch": 0.44, "grad_norm": 1.4052322355071327, "learning_rate": 6.137919483331725e-06, "loss": 0.1721, "step": 6209 }, { "epoch": 0.44, "grad_norm": 1.263342237411041, "learning_rate": 6.136791446727387e-06, "loss": 0.1834, "step": 6210 }, { "epoch": 0.44, "grad_norm": 1.311912214696059, "learning_rate": 6.135663349104601e-06, "loss": 0.1904, "step": 6211 }, { "epoch": 0.44, "grad_norm": 1.2401868278387478, "learning_rate": 6.134535190523917e-06, "loss": 0.1895, "step": 6212 }, { "epoch": 0.44, "grad_norm": 4.32775518689021, "learning_rate": 6.133406971045892e-06, "loss": 0.5603, "step": 6213 }, { "epoch": 0.44, "grad_norm": 1.382364531410594, "learning_rate": 6.132278690731084e-06, "loss": 0.2349, "step": 6214 }, { "epoch": 0.44, "grad_norm": 1.0386007285000776, "learning_rate": 6.131150349640053e-06, "loss": 0.1463, "step": 6215 }, { "epoch": 0.44, "grad_norm": 1.3906146274093631, "learning_rate": 6.130021947833364e-06, "loss": 0.1851, "step": 6216 }, { "epoch": 0.44, "grad_norm": 1.630223903710757, "learning_rate": 6.128893485371589e-06, "loss": 0.2271, "step": 6217 }, { "epoch": 0.44, "grad_norm": 1.2084793956096727, "learning_rate": 6.127764962315296e-06, "loss": 0.1484, "step": 6218 }, { "epoch": 0.44, "grad_norm": 1.4312937297665498, "learning_rate": 6.12663637872506e-06, "loss": 0.2123, "step": 6219 }, { "epoch": 0.44, "grad_norm": 1.3638214268796884, "learning_rate": 6.125507734661458e-06, "loss": 0.2116, "step": 6220 }, { "epoch": 0.44, "grad_norm": 1.2938095651235335, "learning_rate": 6.124379030185072e-06, "loss": 0.2341, "step": 6221 }, { "epoch": 0.45, "grad_norm": 4.885239796801945, "learning_rate": 6.123250265356489e-06, "loss": 0.7492, "step": 6222 }, { "epoch": 0.45, "grad_norm": 1.1906012301671285, "learning_rate": 6.122121440236292e-06, "loss": 0.1739, "step": 6223 }, { "epoch": 0.45, "grad_norm": 1.2731473919044556, "learning_rate": 6.1209925548850734e-06, "loss": 0.1658, "step": 6224 }, { "epoch": 0.45, "grad_norm": 1.2877739378374993, "learning_rate": 6.119863609363429e-06, "loss": 0.1921, "step": 6225 }, { "epoch": 0.45, "grad_norm": 1.3621938534785047, "learning_rate": 6.118734603731955e-06, "loss": 0.1817, "step": 6226 }, { "epoch": 0.45, "grad_norm": 1.3648345183442678, "learning_rate": 6.117605538051252e-06, "loss": 0.2033, "step": 6227 }, { "epoch": 0.45, "grad_norm": 1.138755521519348, "learning_rate": 6.116476412381926e-06, "loss": 0.1467, "step": 6228 }, { "epoch": 0.45, "grad_norm": 1.1088150409775865, "learning_rate": 6.115347226784578e-06, "loss": 0.1651, "step": 6229 }, { "epoch": 0.45, "grad_norm": 4.330296853056125, "learning_rate": 6.114217981319823e-06, "loss": 0.6135, "step": 6230 }, { "epoch": 0.45, "grad_norm": 1.2721725444224916, "learning_rate": 6.113088676048274e-06, "loss": 0.1946, "step": 6231 }, { "epoch": 0.45, "grad_norm": 1.5357647293942507, "learning_rate": 6.111959311030548e-06, "loss": 0.2226, "step": 6232 }, { "epoch": 0.45, "grad_norm": 1.3967966416842132, "learning_rate": 6.1108298863272616e-06, "loss": 0.2105, "step": 6233 }, { "epoch": 0.45, "grad_norm": 4.076899093344789, "learning_rate": 6.109700401999042e-06, "loss": 0.55, "step": 6234 }, { "epoch": 0.45, "grad_norm": 1.2311866535673874, "learning_rate": 6.108570858106512e-06, "loss": 0.1161, "step": 6235 }, { "epoch": 0.45, "grad_norm": 1.3166317583633051, "learning_rate": 6.1074412547103025e-06, "loss": 0.1784, "step": 6236 }, { "epoch": 0.45, "grad_norm": 1.2704777627249038, "learning_rate": 6.106311591871046e-06, "loss": 0.1776, "step": 6237 }, { "epoch": 0.45, "grad_norm": 1.19983985382999, "learning_rate": 6.105181869649379e-06, "loss": 0.2002, "step": 6238 }, { "epoch": 0.45, "grad_norm": 1.4677889375537017, "learning_rate": 6.10405208810594e-06, "loss": 0.1827, "step": 6239 }, { "epoch": 0.45, "grad_norm": 4.390272710189704, "learning_rate": 6.1029222473013705e-06, "loss": 0.5498, "step": 6240 }, { "epoch": 0.45, "grad_norm": 1.293239891737135, "learning_rate": 6.101792347296316e-06, "loss": 0.2385, "step": 6241 }, { "epoch": 0.45, "grad_norm": 5.443916025669607, "learning_rate": 6.100662388151427e-06, "loss": 0.7363, "step": 6242 }, { "epoch": 0.45, "grad_norm": 1.5122152655578487, "learning_rate": 6.099532369927353e-06, "loss": 0.2285, "step": 6243 }, { "epoch": 0.45, "grad_norm": 1.336912865585975, "learning_rate": 6.09840229268475e-06, "loss": 0.1723, "step": 6244 }, { "epoch": 0.45, "grad_norm": 1.2734663966435864, "learning_rate": 6.097272156484274e-06, "loss": 0.1879, "step": 6245 }, { "epoch": 0.45, "grad_norm": 1.448569829089818, "learning_rate": 6.096141961386589e-06, "loss": 0.212, "step": 6246 }, { "epoch": 0.45, "grad_norm": 5.614573599409491, "learning_rate": 6.095011707452358e-06, "loss": 0.6934, "step": 6247 }, { "epoch": 0.45, "grad_norm": 1.4237615740702574, "learning_rate": 6.09388139474225e-06, "loss": 0.1955, "step": 6248 }, { "epoch": 0.45, "grad_norm": 1.2836320754134385, "learning_rate": 6.092751023316933e-06, "loss": 0.1798, "step": 6249 }, { "epoch": 0.45, "grad_norm": 1.243663407298675, "learning_rate": 6.091620593237083e-06, "loss": 0.2014, "step": 6250 }, { "epoch": 0.45, "grad_norm": 1.2637810131072553, "learning_rate": 6.090490104563377e-06, "loss": 0.1807, "step": 6251 }, { "epoch": 0.45, "grad_norm": 1.2507542335233655, "learning_rate": 6.0893595573564935e-06, "loss": 0.186, "step": 6252 }, { "epoch": 0.45, "grad_norm": 1.2434477247734192, "learning_rate": 6.088228951677117e-06, "loss": 0.1597, "step": 6253 }, { "epoch": 0.45, "grad_norm": 1.3929016069730167, "learning_rate": 6.087098287585933e-06, "loss": 0.2329, "step": 6254 }, { "epoch": 0.45, "grad_norm": 1.574675666920025, "learning_rate": 6.085967565143632e-06, "loss": 0.2048, "step": 6255 }, { "epoch": 0.45, "grad_norm": 1.392765196853635, "learning_rate": 6.084836784410908e-06, "loss": 0.1806, "step": 6256 }, { "epoch": 0.45, "grad_norm": 4.2100092271209295, "learning_rate": 6.083705945448453e-06, "loss": 0.5043, "step": 6257 }, { "epoch": 0.45, "grad_norm": 1.3240589149003132, "learning_rate": 6.08257504831697e-06, "loss": 0.179, "step": 6258 }, { "epoch": 0.45, "grad_norm": 1.4255817514024363, "learning_rate": 6.081444093077157e-06, "loss": 0.1835, "step": 6259 }, { "epoch": 0.45, "grad_norm": 1.374730721722942, "learning_rate": 6.080313079789723e-06, "loss": 0.194, "step": 6260 }, { "epoch": 0.45, "grad_norm": 1.4282025574862633, "learning_rate": 6.079182008515376e-06, "loss": 0.2235, "step": 6261 }, { "epoch": 0.45, "grad_norm": 1.5389624388632812, "learning_rate": 6.078050879314824e-06, "loss": 0.2101, "step": 6262 }, { "epoch": 0.45, "grad_norm": 5.192906115255055, "learning_rate": 6.076919692248784e-06, "loss": 0.6677, "step": 6263 }, { "epoch": 0.45, "grad_norm": 1.2621826272550847, "learning_rate": 6.075788447377974e-06, "loss": 0.1909, "step": 6264 }, { "epoch": 0.45, "grad_norm": 1.3613137196387608, "learning_rate": 6.074657144763114e-06, "loss": 0.1984, "step": 6265 }, { "epoch": 0.45, "grad_norm": 1.5738135038837997, "learning_rate": 6.073525784464928e-06, "loss": 0.2065, "step": 6266 }, { "epoch": 0.45, "grad_norm": 1.5311593565023167, "learning_rate": 6.072394366544141e-06, "loss": 0.2259, "step": 6267 }, { "epoch": 0.45, "grad_norm": 1.2303994063072423, "learning_rate": 6.071262891061487e-06, "loss": 0.2171, "step": 6268 }, { "epoch": 0.45, "grad_norm": 6.149972000965288, "learning_rate": 6.070131358077695e-06, "loss": 0.5325, "step": 6269 }, { "epoch": 0.45, "grad_norm": 1.2961106972910443, "learning_rate": 6.068999767653505e-06, "loss": 0.1794, "step": 6270 }, { "epoch": 0.45, "grad_norm": 3.5074673461155017, "learning_rate": 6.067868119849654e-06, "loss": 0.5808, "step": 6271 }, { "epoch": 0.45, "grad_norm": 1.5227394000854955, "learning_rate": 6.066736414726884e-06, "loss": 0.1943, "step": 6272 }, { "epoch": 0.45, "grad_norm": 1.4007077133053332, "learning_rate": 6.065604652345942e-06, "loss": 0.2008, "step": 6273 }, { "epoch": 0.45, "grad_norm": 1.3788116505384536, "learning_rate": 6.064472832767575e-06, "loss": 0.215, "step": 6274 }, { "epoch": 0.45, "grad_norm": 1.2167009064908902, "learning_rate": 6.063340956052535e-06, "loss": 0.1746, "step": 6275 }, { "epoch": 0.45, "grad_norm": 1.3587374269621821, "learning_rate": 6.062209022261577e-06, "loss": 0.1911, "step": 6276 }, { "epoch": 0.45, "grad_norm": 1.302926753246791, "learning_rate": 6.06107703145546e-06, "loss": 0.2233, "step": 6277 }, { "epoch": 0.45, "grad_norm": 1.3926403236691507, "learning_rate": 6.059944983694942e-06, "loss": 0.2215, "step": 6278 }, { "epoch": 0.45, "grad_norm": 1.4597420568222197, "learning_rate": 6.058812879040788e-06, "loss": 0.2114, "step": 6279 }, { "epoch": 0.45, "grad_norm": 1.370211512995902, "learning_rate": 6.0576807175537654e-06, "loss": 0.2237, "step": 6280 }, { "epoch": 0.45, "grad_norm": 4.500930703748208, "learning_rate": 6.056548499294646e-06, "loss": 0.4779, "step": 6281 }, { "epoch": 0.45, "grad_norm": 1.4633484928762333, "learning_rate": 6.055416224324198e-06, "loss": 0.2114, "step": 6282 }, { "epoch": 0.45, "grad_norm": 1.4013170605867908, "learning_rate": 6.0542838927032e-06, "loss": 0.2321, "step": 6283 }, { "epoch": 0.45, "grad_norm": 1.3518412157419437, "learning_rate": 6.053151504492431e-06, "loss": 0.1731, "step": 6284 }, { "epoch": 0.45, "grad_norm": 1.2443941602769666, "learning_rate": 6.052019059752675e-06, "loss": 0.1609, "step": 6285 }, { "epoch": 0.45, "grad_norm": 1.2183093941072516, "learning_rate": 6.0508865585447156e-06, "loss": 0.1587, "step": 6286 }, { "epoch": 0.45, "grad_norm": 1.2435163329780377, "learning_rate": 6.0497540009293375e-06, "loss": 0.1905, "step": 6287 }, { "epoch": 0.45, "grad_norm": 1.236734889156336, "learning_rate": 6.048621386967337e-06, "loss": 0.2169, "step": 6288 }, { "epoch": 0.45, "grad_norm": 1.2936016621117368, "learning_rate": 6.0474887167195065e-06, "loss": 0.1714, "step": 6289 }, { "epoch": 0.45, "grad_norm": 1.3117060899185808, "learning_rate": 6.0463559902466425e-06, "loss": 0.1963, "step": 6290 }, { "epoch": 0.45, "grad_norm": 1.441185049728224, "learning_rate": 6.045223207609547e-06, "loss": 0.2242, "step": 6291 }, { "epoch": 0.45, "grad_norm": 1.4909393207291666, "learning_rate": 6.044090368869021e-06, "loss": 0.1856, "step": 6292 }, { "epoch": 0.45, "grad_norm": 1.4453673900207722, "learning_rate": 6.042957474085873e-06, "loss": 0.2356, "step": 6293 }, { "epoch": 0.45, "grad_norm": 1.1441891228733112, "learning_rate": 6.041824523320911e-06, "loss": 0.1875, "step": 6294 }, { "epoch": 0.45, "grad_norm": 7.568945226444918, "learning_rate": 6.0406915166349475e-06, "loss": 0.6374, "step": 6295 }, { "epoch": 0.45, "grad_norm": 1.5429572550344108, "learning_rate": 6.039558454088796e-06, "loss": 0.2178, "step": 6296 }, { "epoch": 0.45, "grad_norm": 1.3936526079781395, "learning_rate": 6.038425335743279e-06, "loss": 0.1977, "step": 6297 }, { "epoch": 0.45, "grad_norm": 1.3292338981763416, "learning_rate": 6.037292161659215e-06, "loss": 0.2133, "step": 6298 }, { "epoch": 0.45, "grad_norm": 1.4828141189545134, "learning_rate": 6.036158931897429e-06, "loss": 0.2228, "step": 6299 }, { "epoch": 0.45, "grad_norm": 1.4020798827656242, "learning_rate": 6.035025646518747e-06, "loss": 0.2271, "step": 6300 }, { "epoch": 0.45, "grad_norm": 5.131310637340404, "learning_rate": 6.033892305584001e-06, "loss": 0.7952, "step": 6301 }, { "epoch": 0.45, "grad_norm": 1.3079993540816803, "learning_rate": 6.032758909154023e-06, "loss": 0.1964, "step": 6302 }, { "epoch": 0.45, "grad_norm": 1.237006692797655, "learning_rate": 6.031625457289649e-06, "loss": 0.1886, "step": 6303 }, { "epoch": 0.45, "grad_norm": 1.338943704898319, "learning_rate": 6.0304919500517214e-06, "loss": 0.223, "step": 6304 }, { "epoch": 0.45, "grad_norm": 1.3342799236949077, "learning_rate": 6.029358387501076e-06, "loss": 0.2181, "step": 6305 }, { "epoch": 0.45, "grad_norm": 1.3623897368278908, "learning_rate": 6.0282247696985655e-06, "loss": 0.1875, "step": 6306 }, { "epoch": 0.45, "grad_norm": 1.5052215264138904, "learning_rate": 6.027091096705032e-06, "loss": 0.2224, "step": 6307 }, { "epoch": 0.45, "grad_norm": 1.3118463019171482, "learning_rate": 6.02595736858133e-06, "loss": 0.1778, "step": 6308 }, { "epoch": 0.45, "grad_norm": 1.2686771197138902, "learning_rate": 6.024823585388311e-06, "loss": 0.2018, "step": 6309 }, { "epoch": 0.45, "grad_norm": 1.1575823445280766, "learning_rate": 6.023689747186835e-06, "loss": 0.1493, "step": 6310 }, { "epoch": 0.45, "grad_norm": 1.4153949398860801, "learning_rate": 6.022555854037759e-06, "loss": 0.1896, "step": 6311 }, { "epoch": 0.45, "grad_norm": 4.982957771772166, "learning_rate": 6.021421906001948e-06, "loss": 0.7722, "step": 6312 }, { "epoch": 0.45, "grad_norm": 1.2853598883875035, "learning_rate": 6.020287903140266e-06, "loss": 0.1959, "step": 6313 }, { "epoch": 0.45, "grad_norm": 1.315768973307454, "learning_rate": 6.019153845513583e-06, "loss": 0.2492, "step": 6314 }, { "epoch": 0.45, "grad_norm": 1.3328433004418445, "learning_rate": 6.018019733182772e-06, "loss": 0.1857, "step": 6315 }, { "epoch": 0.45, "grad_norm": 1.5841571644699906, "learning_rate": 6.016885566208706e-06, "loss": 0.2486, "step": 6316 }, { "epoch": 0.45, "grad_norm": 1.1870924265019922, "learning_rate": 6.01575134465226e-06, "loss": 0.1482, "step": 6317 }, { "epoch": 0.45, "grad_norm": 6.488639556326491, "learning_rate": 6.0146170685743185e-06, "loss": 0.5919, "step": 6318 }, { "epoch": 0.45, "grad_norm": 1.336588295500303, "learning_rate": 6.013482738035765e-06, "loss": 0.1923, "step": 6319 }, { "epoch": 0.45, "grad_norm": 1.374509516732174, "learning_rate": 6.012348353097484e-06, "loss": 0.2108, "step": 6320 }, { "epoch": 0.45, "grad_norm": 1.4314863576343633, "learning_rate": 6.011213913820364e-06, "loss": 0.1782, "step": 6321 }, { "epoch": 0.45, "grad_norm": 1.483039803388748, "learning_rate": 6.010079420265298e-06, "loss": 0.2066, "step": 6322 }, { "epoch": 0.45, "grad_norm": 1.2134817967379525, "learning_rate": 6.0089448724931836e-06, "loss": 0.1716, "step": 6323 }, { "epoch": 0.45, "grad_norm": 1.2544916123895111, "learning_rate": 6.007810270564916e-06, "loss": 0.1593, "step": 6324 }, { "epoch": 0.45, "grad_norm": 1.3369852528936066, "learning_rate": 6.006675614541397e-06, "loss": 0.2108, "step": 6325 }, { "epoch": 0.45, "grad_norm": 1.3928096542961756, "learning_rate": 6.005540904483529e-06, "loss": 0.1911, "step": 6326 }, { "epoch": 0.45, "grad_norm": 5.488883859381624, "learning_rate": 6.004406140452221e-06, "loss": 0.7346, "step": 6327 }, { "epoch": 0.45, "grad_norm": 1.289322667574908, "learning_rate": 6.003271322508381e-06, "loss": 0.1857, "step": 6328 }, { "epoch": 0.45, "grad_norm": 1.352329087691462, "learning_rate": 6.002136450712924e-06, "loss": 0.2203, "step": 6329 }, { "epoch": 0.45, "grad_norm": 1.3021521239738627, "learning_rate": 6.001001525126762e-06, "loss": 0.2233, "step": 6330 }, { "epoch": 0.45, "grad_norm": 1.2048483133621708, "learning_rate": 5.9998665458108155e-06, "loss": 0.2254, "step": 6331 }, { "epoch": 0.45, "grad_norm": 1.2440829804628666, "learning_rate": 5.998731512826006e-06, "loss": 0.1708, "step": 6332 }, { "epoch": 0.45, "grad_norm": 7.737432064359035, "learning_rate": 5.9975964262332554e-06, "loss": 0.4908, "step": 6333 }, { "epoch": 0.45, "grad_norm": 1.4185981452451342, "learning_rate": 5.996461286093491e-06, "loss": 0.1882, "step": 6334 }, { "epoch": 0.45, "grad_norm": 1.5086443231035676, "learning_rate": 5.9953260924676446e-06, "loss": 0.2472, "step": 6335 }, { "epoch": 0.45, "grad_norm": 1.310336954815626, "learning_rate": 5.994190845416649e-06, "loss": 0.2043, "step": 6336 }, { "epoch": 0.45, "grad_norm": 1.388052397772874, "learning_rate": 5.993055545001437e-06, "loss": 0.1975, "step": 6337 }, { "epoch": 0.45, "grad_norm": 1.4282144729464996, "learning_rate": 5.991920191282948e-06, "loss": 0.1752, "step": 6338 }, { "epoch": 0.45, "grad_norm": 1.4173446922685864, "learning_rate": 5.990784784322126e-06, "loss": 0.1945, "step": 6339 }, { "epoch": 0.45, "grad_norm": 1.400586578613333, "learning_rate": 5.9896493241799115e-06, "loss": 0.2114, "step": 6340 }, { "epoch": 0.45, "grad_norm": 1.3621881375185332, "learning_rate": 5.988513810917254e-06, "loss": 0.2026, "step": 6341 }, { "epoch": 0.45, "grad_norm": 1.1921816035416763, "learning_rate": 5.9873782445951e-06, "loss": 0.2189, "step": 6342 }, { "epoch": 0.45, "grad_norm": 1.323916164001303, "learning_rate": 5.986242625274407e-06, "loss": 0.2143, "step": 6343 }, { "epoch": 0.45, "grad_norm": 1.2273759572975567, "learning_rate": 5.9851069530161265e-06, "loss": 0.2084, "step": 6344 }, { "epoch": 0.45, "grad_norm": 1.6739554039408258, "learning_rate": 5.983971227881219e-06, "loss": 0.2101, "step": 6345 }, { "epoch": 0.45, "grad_norm": 1.3402818777437433, "learning_rate": 5.982835449930644e-06, "loss": 0.1547, "step": 6346 }, { "epoch": 0.45, "grad_norm": 1.3433283134082625, "learning_rate": 5.981699619225368e-06, "loss": 0.1898, "step": 6347 }, { "epoch": 0.45, "grad_norm": 1.4108050089036783, "learning_rate": 5.980563735826355e-06, "loss": 0.2186, "step": 6348 }, { "epoch": 0.45, "grad_norm": 1.1332072821247325, "learning_rate": 5.979427799794579e-06, "loss": 0.1605, "step": 6349 }, { "epoch": 0.45, "grad_norm": 4.847725651219658, "learning_rate": 5.9782918111910085e-06, "loss": 0.5338, "step": 6350 }, { "epoch": 0.45, "grad_norm": 1.2091786517248881, "learning_rate": 5.977155770076618e-06, "loss": 0.2137, "step": 6351 }, { "epoch": 0.45, "grad_norm": 1.1839504407795458, "learning_rate": 5.97601967651239e-06, "loss": 0.1899, "step": 6352 }, { "epoch": 0.45, "grad_norm": 1.1579553877660862, "learning_rate": 5.974883530559302e-06, "loss": 0.1595, "step": 6353 }, { "epoch": 0.45, "grad_norm": 1.1934046565649217, "learning_rate": 5.97374733227834e-06, "loss": 0.1813, "step": 6354 }, { "epoch": 0.45, "grad_norm": 4.522164194237041, "learning_rate": 5.97261108173049e-06, "loss": 0.5392, "step": 6355 }, { "epoch": 0.45, "grad_norm": 5.769575816204579, "learning_rate": 5.97147477897674e-06, "loss": 0.6495, "step": 6356 }, { "epoch": 0.45, "grad_norm": 1.2036694679804498, "learning_rate": 5.970338424078084e-06, "loss": 0.1724, "step": 6357 }, { "epoch": 0.45, "grad_norm": 1.2452484179167644, "learning_rate": 5.969202017095517e-06, "loss": 0.1861, "step": 6358 }, { "epoch": 0.45, "grad_norm": 1.2018314197124143, "learning_rate": 5.968065558090036e-06, "loss": 0.1867, "step": 6359 }, { "epoch": 0.45, "grad_norm": 1.356294461046957, "learning_rate": 5.966929047122641e-06, "loss": 0.18, "step": 6360 }, { "epoch": 0.46, "grad_norm": 1.3255124937647749, "learning_rate": 5.965792484254336e-06, "loss": 0.2095, "step": 6361 }, { "epoch": 0.46, "grad_norm": 6.2302897521075975, "learning_rate": 5.964655869546128e-06, "loss": 0.3118, "step": 6362 }, { "epoch": 0.46, "grad_norm": 1.291048859009271, "learning_rate": 5.963519203059025e-06, "loss": 0.1627, "step": 6363 }, { "epoch": 0.46, "grad_norm": 1.2439628346359013, "learning_rate": 5.962382484854038e-06, "loss": 0.1975, "step": 6364 }, { "epoch": 0.46, "grad_norm": 1.1378770205241788, "learning_rate": 5.961245714992184e-06, "loss": 0.1457, "step": 6365 }, { "epoch": 0.46, "grad_norm": 1.1367603185012025, "learning_rate": 5.9601088935344785e-06, "loss": 0.1675, "step": 6366 }, { "epoch": 0.46, "grad_norm": 1.2476079926971129, "learning_rate": 5.9589720205419425e-06, "loss": 0.1429, "step": 6367 }, { "epoch": 0.46, "grad_norm": 1.4171734097597941, "learning_rate": 5.957835096075597e-06, "loss": 0.222, "step": 6368 }, { "epoch": 0.46, "grad_norm": 1.176774758143685, "learning_rate": 5.956698120196469e-06, "loss": 0.1813, "step": 6369 }, { "epoch": 0.46, "grad_norm": 1.382647891989676, "learning_rate": 5.955561092965588e-06, "loss": 0.2267, "step": 6370 }, { "epoch": 0.46, "grad_norm": 1.3247987986788528, "learning_rate": 5.9544240144439835e-06, "loss": 0.228, "step": 6371 }, { "epoch": 0.46, "grad_norm": 1.3269420621580488, "learning_rate": 5.953286884692688e-06, "loss": 0.2056, "step": 6372 }, { "epoch": 0.46, "grad_norm": 1.4553216205781438, "learning_rate": 5.95214970377274e-06, "loss": 0.2063, "step": 6373 }, { "epoch": 0.46, "grad_norm": 1.3425910116135131, "learning_rate": 5.951012471745181e-06, "loss": 0.2347, "step": 6374 }, { "epoch": 0.46, "grad_norm": 1.3962498087779134, "learning_rate": 5.949875188671049e-06, "loss": 0.2357, "step": 6375 }, { "epoch": 0.46, "grad_norm": 1.442870179887074, "learning_rate": 5.948737854611389e-06, "loss": 0.1731, "step": 6376 }, { "epoch": 0.46, "grad_norm": 1.5360169247097335, "learning_rate": 5.947600469627253e-06, "loss": 0.2285, "step": 6377 }, { "epoch": 0.46, "grad_norm": 1.1387305531954393, "learning_rate": 5.946463033779687e-06, "loss": 0.1554, "step": 6378 }, { "epoch": 0.46, "grad_norm": 5.824312749942107, "learning_rate": 5.945325547129745e-06, "loss": 0.65, "step": 6379 }, { "epoch": 0.46, "grad_norm": 1.3442747503456838, "learning_rate": 5.944188009738483e-06, "loss": 0.2047, "step": 6380 }, { "epoch": 0.46, "grad_norm": 1.5116092199632625, "learning_rate": 5.943050421666962e-06, "loss": 0.2073, "step": 6381 }, { "epoch": 0.46, "grad_norm": 1.3126078406247952, "learning_rate": 5.941912782976239e-06, "loss": 0.2028, "step": 6382 }, { "epoch": 0.46, "grad_norm": 1.2668469679867702, "learning_rate": 5.94077509372738e-06, "loss": 0.1658, "step": 6383 }, { "epoch": 0.46, "grad_norm": 1.2331608070574458, "learning_rate": 5.939637353981451e-06, "loss": 0.174, "step": 6384 }, { "epoch": 0.46, "grad_norm": 1.3844469717760677, "learning_rate": 5.938499563799521e-06, "loss": 0.2148, "step": 6385 }, { "epoch": 0.46, "grad_norm": 1.3783196853090651, "learning_rate": 5.937361723242665e-06, "loss": 0.1829, "step": 6386 }, { "epoch": 0.46, "grad_norm": 1.3181039909223589, "learning_rate": 5.936223832371956e-06, "loss": 0.1985, "step": 6387 }, { "epoch": 0.46, "grad_norm": 1.4296927282769185, "learning_rate": 5.93508589124847e-06, "loss": 0.2021, "step": 6388 }, { "epoch": 0.46, "grad_norm": 1.4129004479727896, "learning_rate": 5.9339478999332874e-06, "loss": 0.2301, "step": 6389 }, { "epoch": 0.46, "grad_norm": 1.24118835363352, "learning_rate": 5.932809858487493e-06, "loss": 0.1598, "step": 6390 }, { "epoch": 0.46, "grad_norm": 1.1243158269688118, "learning_rate": 5.931671766972172e-06, "loss": 0.1635, "step": 6391 }, { "epoch": 0.46, "grad_norm": 1.4752454909314463, "learning_rate": 5.9305336254484105e-06, "loss": 0.2243, "step": 6392 }, { "epoch": 0.46, "grad_norm": 1.2603702706787374, "learning_rate": 5.929395433977301e-06, "loss": 0.2454, "step": 6393 }, { "epoch": 0.46, "grad_norm": 1.1428968817615206, "learning_rate": 5.928257192619937e-06, "loss": 0.1684, "step": 6394 }, { "epoch": 0.46, "grad_norm": 1.387450367807098, "learning_rate": 5.927118901437416e-06, "loss": 0.2049, "step": 6395 }, { "epoch": 0.46, "grad_norm": 1.423292449416241, "learning_rate": 5.925980560490834e-06, "loss": 0.2093, "step": 6396 }, { "epoch": 0.46, "grad_norm": 1.2822988742082448, "learning_rate": 5.924842169841294e-06, "loss": 0.1937, "step": 6397 }, { "epoch": 0.46, "grad_norm": 1.3155132368791596, "learning_rate": 5.923703729549901e-06, "loss": 0.1819, "step": 6398 }, { "epoch": 0.46, "grad_norm": 1.5626142861090788, "learning_rate": 5.922565239677762e-06, "loss": 0.2641, "step": 6399 }, { "epoch": 0.46, "grad_norm": 1.308320508740407, "learning_rate": 5.921426700285986e-06, "loss": 0.1748, "step": 6400 }, { "epoch": 0.46, "grad_norm": 1.2876023378190788, "learning_rate": 5.920288111435685e-06, "loss": 0.2238, "step": 6401 }, { "epoch": 0.46, "grad_norm": 1.2336669247018972, "learning_rate": 5.9191494731879736e-06, "loss": 0.1851, "step": 6402 }, { "epoch": 0.46, "grad_norm": 6.1882990397510005, "learning_rate": 5.918010785603971e-06, "loss": 0.7286, "step": 6403 }, { "epoch": 0.46, "grad_norm": 1.3885533454512806, "learning_rate": 5.916872048744797e-06, "loss": 0.1919, "step": 6404 }, { "epoch": 0.46, "grad_norm": 1.2347547274853918, "learning_rate": 5.915733262671573e-06, "loss": 0.2041, "step": 6405 }, { "epoch": 0.46, "grad_norm": 1.4240502757995508, "learning_rate": 5.914594427445426e-06, "loss": 0.2152, "step": 6406 }, { "epoch": 0.46, "grad_norm": 1.2365012189078137, "learning_rate": 5.913455543127484e-06, "loss": 0.1982, "step": 6407 }, { "epoch": 0.46, "grad_norm": 1.2404379751449344, "learning_rate": 5.912316609778876e-06, "loss": 0.2051, "step": 6408 }, { "epoch": 0.46, "grad_norm": 1.3768777308964077, "learning_rate": 5.911177627460739e-06, "loss": 0.2185, "step": 6409 }, { "epoch": 0.46, "grad_norm": 1.3143196388988154, "learning_rate": 5.910038596234205e-06, "loss": 0.1495, "step": 6410 }, { "epoch": 0.46, "grad_norm": 1.2938354765102151, "learning_rate": 5.908899516160414e-06, "loss": 0.168, "step": 6411 }, { "epoch": 0.46, "grad_norm": 1.4387654948685327, "learning_rate": 5.9077603873005095e-06, "loss": 0.1971, "step": 6412 }, { "epoch": 0.46, "grad_norm": 1.2633516864995231, "learning_rate": 5.906621209715634e-06, "loss": 0.1665, "step": 6413 }, { "epoch": 0.46, "grad_norm": 1.3178462276839193, "learning_rate": 5.905481983466932e-06, "loss": 0.196, "step": 6414 }, { "epoch": 0.46, "grad_norm": 5.434962758979205, "learning_rate": 5.904342708615556e-06, "loss": 0.5096, "step": 6415 }, { "epoch": 0.46, "grad_norm": 6.3323461858176815, "learning_rate": 5.903203385222656e-06, "loss": 0.6354, "step": 6416 }, { "epoch": 0.46, "grad_norm": 1.3684449014669653, "learning_rate": 5.902064013349385e-06, "loss": 0.2103, "step": 6417 }, { "epoch": 0.46, "grad_norm": 1.3605920854798437, "learning_rate": 5.900924593056902e-06, "loss": 0.2166, "step": 6418 }, { "epoch": 0.46, "grad_norm": 1.3185792753387833, "learning_rate": 5.899785124406366e-06, "loss": 0.1688, "step": 6419 }, { "epoch": 0.46, "grad_norm": 1.535848228670038, "learning_rate": 5.898645607458941e-06, "loss": 0.1933, "step": 6420 }, { "epoch": 0.46, "grad_norm": 1.3999551608078555, "learning_rate": 5.897506042275789e-06, "loss": 0.183, "step": 6421 }, { "epoch": 0.46, "grad_norm": 1.2242529210332216, "learning_rate": 5.896366428918078e-06, "loss": 0.1419, "step": 6422 }, { "epoch": 0.46, "grad_norm": 1.4836206187691554, "learning_rate": 5.895226767446978e-06, "loss": 0.2209, "step": 6423 }, { "epoch": 0.46, "grad_norm": 1.3573684868770453, "learning_rate": 5.894087057923661e-06, "loss": 0.1915, "step": 6424 }, { "epoch": 0.46, "grad_norm": 1.494923519387455, "learning_rate": 5.892947300409304e-06, "loss": 0.2321, "step": 6425 }, { "epoch": 0.46, "grad_norm": 1.2687737264928087, "learning_rate": 5.891807494965083e-06, "loss": 0.1679, "step": 6426 }, { "epoch": 0.46, "grad_norm": 1.4795524925076713, "learning_rate": 5.890667641652178e-06, "loss": 0.1795, "step": 6427 }, { "epoch": 0.46, "grad_norm": 1.312366472918141, "learning_rate": 5.889527740531773e-06, "loss": 0.1524, "step": 6428 }, { "epoch": 0.46, "grad_norm": 5.005535262918279, "learning_rate": 5.8883877916650535e-06, "loss": 0.4854, "step": 6429 }, { "epoch": 0.46, "grad_norm": 1.412068786406388, "learning_rate": 5.887247795113206e-06, "loss": 0.2051, "step": 6430 }, { "epoch": 0.46, "grad_norm": 1.3485782267298134, "learning_rate": 5.886107750937421e-06, "loss": 0.1849, "step": 6431 }, { "epoch": 0.46, "grad_norm": 5.0660343015209515, "learning_rate": 5.884967659198893e-06, "loss": 0.6338, "step": 6432 }, { "epoch": 0.46, "grad_norm": 1.283252719212704, "learning_rate": 5.883827519958817e-06, "loss": 0.1945, "step": 6433 }, { "epoch": 0.46, "grad_norm": 1.383096483958068, "learning_rate": 5.882687333278391e-06, "loss": 0.201, "step": 6434 }, { "epoch": 0.46, "grad_norm": 1.304577357921389, "learning_rate": 5.881547099218815e-06, "loss": 0.1616, "step": 6435 }, { "epoch": 0.46, "grad_norm": 1.2478015838822425, "learning_rate": 5.8804068178412936e-06, "loss": 0.1688, "step": 6436 }, { "epoch": 0.46, "grad_norm": 1.323507064769264, "learning_rate": 5.8792664892070314e-06, "loss": 0.2196, "step": 6437 }, { "epoch": 0.46, "grad_norm": 1.3980612651983149, "learning_rate": 5.878126113377239e-06, "loss": 0.1881, "step": 6438 }, { "epoch": 0.46, "grad_norm": 1.3814027750282512, "learning_rate": 5.876985690413122e-06, "loss": 0.2173, "step": 6439 }, { "epoch": 0.46, "grad_norm": 1.295807002156964, "learning_rate": 5.8758452203758995e-06, "loss": 0.1659, "step": 6440 }, { "epoch": 0.46, "grad_norm": 1.578053999684828, "learning_rate": 5.874704703326786e-06, "loss": 0.204, "step": 6441 }, { "epoch": 0.46, "grad_norm": 1.3166363777251926, "learning_rate": 5.8735641393269975e-06, "loss": 0.1593, "step": 6442 }, { "epoch": 0.46, "grad_norm": 1.4103537034339437, "learning_rate": 5.8724235284377565e-06, "loss": 0.2189, "step": 6443 }, { "epoch": 0.46, "grad_norm": 1.3600438782692628, "learning_rate": 5.871282870720286e-06, "loss": 0.2375, "step": 6444 }, { "epoch": 0.46, "grad_norm": 1.371705733346872, "learning_rate": 5.870142166235814e-06, "loss": 0.2158, "step": 6445 }, { "epoch": 0.46, "grad_norm": 1.2388547779423673, "learning_rate": 5.869001415045567e-06, "loss": 0.1721, "step": 6446 }, { "epoch": 0.46, "grad_norm": 1.3007379947510882, "learning_rate": 5.867860617210775e-06, "loss": 0.176, "step": 6447 }, { "epoch": 0.46, "grad_norm": 1.2484623578651421, "learning_rate": 5.866719772792674e-06, "loss": 0.1837, "step": 6448 }, { "epoch": 0.46, "grad_norm": 1.155756294626106, "learning_rate": 5.865578881852498e-06, "loss": 0.1617, "step": 6449 }, { "epoch": 0.46, "grad_norm": 3.198978912643704, "learning_rate": 5.864437944451488e-06, "loss": 0.431, "step": 6450 }, { "epoch": 0.46, "grad_norm": 1.3208647477769846, "learning_rate": 5.8632969606508815e-06, "loss": 0.195, "step": 6451 }, { "epoch": 0.46, "grad_norm": 1.4377743757926795, "learning_rate": 5.862155930511924e-06, "loss": 0.2322, "step": 6452 }, { "epoch": 0.46, "grad_norm": 1.2712935847919, "learning_rate": 5.861014854095863e-06, "loss": 0.2069, "step": 6453 }, { "epoch": 0.46, "grad_norm": 1.4941703778333344, "learning_rate": 5.859873731463945e-06, "loss": 0.2027, "step": 6454 }, { "epoch": 0.46, "grad_norm": 1.3204050486221552, "learning_rate": 5.85873256267742e-06, "loss": 0.174, "step": 6455 }, { "epoch": 0.46, "grad_norm": 1.4286429509496164, "learning_rate": 5.857591347797545e-06, "loss": 0.2206, "step": 6456 }, { "epoch": 0.46, "grad_norm": 1.051661885850453, "learning_rate": 5.85645008688557e-06, "loss": 0.1427, "step": 6457 }, { "epoch": 0.46, "grad_norm": 1.3302224970221388, "learning_rate": 5.8553087800027585e-06, "loss": 0.184, "step": 6458 }, { "epoch": 0.46, "grad_norm": 1.2557404886319494, "learning_rate": 5.85416742721037e-06, "loss": 0.1575, "step": 6459 }, { "epoch": 0.46, "grad_norm": 1.4176027702175606, "learning_rate": 5.8530260285696674e-06, "loss": 0.2094, "step": 6460 }, { "epoch": 0.46, "grad_norm": 1.140488907024391, "learning_rate": 5.851884584141916e-06, "loss": 0.1615, "step": 6461 }, { "epoch": 0.46, "grad_norm": 1.4692581979450128, "learning_rate": 5.850743093988385e-06, "loss": 0.2118, "step": 6462 }, { "epoch": 0.46, "grad_norm": 1.3168929573000587, "learning_rate": 5.849601558170343e-06, "loss": 0.1726, "step": 6463 }, { "epoch": 0.46, "grad_norm": 1.2374304751719958, "learning_rate": 5.8484599767490665e-06, "loss": 0.2145, "step": 6464 }, { "epoch": 0.46, "grad_norm": 1.327025197307282, "learning_rate": 5.847318349785829e-06, "loss": 0.1798, "step": 6465 }, { "epoch": 0.46, "grad_norm": 1.2473517908221359, "learning_rate": 5.846176677341908e-06, "loss": 0.1788, "step": 6466 }, { "epoch": 0.46, "grad_norm": 1.4175127651531947, "learning_rate": 5.845034959478585e-06, "loss": 0.2146, "step": 6467 }, { "epoch": 0.46, "grad_norm": 1.1074090292812042, "learning_rate": 5.8438931962571435e-06, "loss": 0.1606, "step": 6468 }, { "epoch": 0.46, "grad_norm": 1.5104794569956375, "learning_rate": 5.842751387738865e-06, "loss": 0.223, "step": 6469 }, { "epoch": 0.46, "grad_norm": 1.2197224016780748, "learning_rate": 5.841609533985042e-06, "loss": 0.2253, "step": 6470 }, { "epoch": 0.46, "grad_norm": 1.1662703105674344, "learning_rate": 5.840467635056964e-06, "loss": 0.1654, "step": 6471 }, { "epoch": 0.46, "grad_norm": 1.281069834174271, "learning_rate": 5.83932569101592e-06, "loss": 0.1994, "step": 6472 }, { "epoch": 0.46, "grad_norm": 1.3414520922450812, "learning_rate": 5.838183701923208e-06, "loss": 0.2052, "step": 6473 }, { "epoch": 0.46, "grad_norm": 1.2867824577668747, "learning_rate": 5.837041667840125e-06, "loss": 0.1953, "step": 6474 }, { "epoch": 0.46, "grad_norm": 1.274664089341185, "learning_rate": 5.835899588827971e-06, "loss": 0.1897, "step": 6475 }, { "epoch": 0.46, "grad_norm": 1.247652038364852, "learning_rate": 5.834757464948049e-06, "loss": 0.1978, "step": 6476 }, { "epoch": 0.46, "grad_norm": 5.111805810154478, "learning_rate": 5.83361529626166e-06, "loss": 0.5317, "step": 6477 }, { "epoch": 0.46, "grad_norm": 1.3386962283526556, "learning_rate": 5.832473082830116e-06, "loss": 0.1698, "step": 6478 }, { "epoch": 0.46, "grad_norm": 1.4294715056398621, "learning_rate": 5.8313308247147226e-06, "loss": 0.1985, "step": 6479 }, { "epoch": 0.46, "grad_norm": 1.3744150466120613, "learning_rate": 5.830188521976794e-06, "loss": 0.2055, "step": 6480 }, { "epoch": 0.46, "grad_norm": 1.5797107604026737, "learning_rate": 5.829046174677643e-06, "loss": 0.2563, "step": 6481 }, { "epoch": 0.46, "grad_norm": 1.3072715248108897, "learning_rate": 5.827903782878587e-06, "loss": 0.1862, "step": 6482 }, { "epoch": 0.46, "grad_norm": 1.3226750607331805, "learning_rate": 5.826761346640946e-06, "loss": 0.2098, "step": 6483 }, { "epoch": 0.46, "grad_norm": 1.5039041127818567, "learning_rate": 5.82561886602604e-06, "loss": 0.2061, "step": 6484 }, { "epoch": 0.46, "grad_norm": 1.3639965472850113, "learning_rate": 5.824476341095193e-06, "loss": 0.2079, "step": 6485 }, { "epoch": 0.46, "grad_norm": 1.3268587125144686, "learning_rate": 5.823333771909731e-06, "loss": 0.1853, "step": 6486 }, { "epoch": 0.46, "grad_norm": 1.3486487300756063, "learning_rate": 5.822191158530983e-06, "loss": 0.214, "step": 6487 }, { "epoch": 0.46, "grad_norm": 1.3417341654137487, "learning_rate": 5.8210485010202825e-06, "loss": 0.1677, "step": 6488 }, { "epoch": 0.46, "grad_norm": 1.2829756437540412, "learning_rate": 5.819905799438958e-06, "loss": 0.2039, "step": 6489 }, { "epoch": 0.46, "grad_norm": 1.2608536338619447, "learning_rate": 5.818763053848347e-06, "loss": 0.1934, "step": 6490 }, { "epoch": 0.46, "grad_norm": 1.6073396735482781, "learning_rate": 5.81762026430979e-06, "loss": 0.2208, "step": 6491 }, { "epoch": 0.46, "grad_norm": 1.2741602671703798, "learning_rate": 5.816477430884625e-06, "loss": 0.156, "step": 6492 }, { "epoch": 0.46, "grad_norm": 1.2968001085064529, "learning_rate": 5.815334553634194e-06, "loss": 0.2001, "step": 6493 }, { "epoch": 0.46, "grad_norm": 1.437122287301326, "learning_rate": 5.814191632619843e-06, "loss": 0.194, "step": 6494 }, { "epoch": 0.46, "grad_norm": 1.6382713465825602, "learning_rate": 5.8130486679029216e-06, "loss": 0.2679, "step": 6495 }, { "epoch": 0.46, "grad_norm": 4.46536819673083, "learning_rate": 5.811905659544777e-06, "loss": 0.414, "step": 6496 }, { "epoch": 0.46, "grad_norm": 1.391566702455359, "learning_rate": 5.8107626076067635e-06, "loss": 0.186, "step": 6497 }, { "epoch": 0.46, "grad_norm": 1.3325702424872043, "learning_rate": 5.809619512150232e-06, "loss": 0.208, "step": 6498 }, { "epoch": 0.46, "grad_norm": 1.2099104541239016, "learning_rate": 5.8084763732365425e-06, "loss": 0.1571, "step": 6499 }, { "epoch": 0.46, "grad_norm": 1.5583130284005118, "learning_rate": 5.807333190927054e-06, "loss": 0.2273, "step": 6500 }, { "epoch": 0.47, "grad_norm": 1.3537215103713915, "learning_rate": 5.806189965283125e-06, "loss": 0.2143, "step": 6501 }, { "epoch": 0.47, "grad_norm": 1.3913161315149758, "learning_rate": 5.8050466963661235e-06, "loss": 0.2378, "step": 6502 }, { "epoch": 0.47, "grad_norm": 1.2780095081635476, "learning_rate": 5.803903384237412e-06, "loss": 0.1583, "step": 6503 }, { "epoch": 0.47, "grad_norm": 1.3197158481725357, "learning_rate": 5.802760028958362e-06, "loss": 0.1951, "step": 6504 }, { "epoch": 0.47, "grad_norm": 1.3062029036510774, "learning_rate": 5.801616630590342e-06, "loss": 0.1549, "step": 6505 }, { "epoch": 0.47, "grad_norm": 1.5011822094458633, "learning_rate": 5.800473189194727e-06, "loss": 0.1997, "step": 6506 }, { "epoch": 0.47, "grad_norm": 1.280410093862145, "learning_rate": 5.79932970483289e-06, "loss": 0.1766, "step": 6507 }, { "epoch": 0.47, "grad_norm": 1.324915390740993, "learning_rate": 5.798186177566211e-06, "loss": 0.1946, "step": 6508 }, { "epoch": 0.47, "grad_norm": 1.2260447282313718, "learning_rate": 5.797042607456068e-06, "loss": 0.2061, "step": 6509 }, { "epoch": 0.47, "grad_norm": 1.3943975520058736, "learning_rate": 5.7958989945638455e-06, "loss": 0.1993, "step": 6510 }, { "epoch": 0.47, "grad_norm": 1.382449279495089, "learning_rate": 5.794755338950926e-06, "loss": 0.207, "step": 6511 }, { "epoch": 0.47, "grad_norm": 1.3461185249876617, "learning_rate": 5.793611640678697e-06, "loss": 0.207, "step": 6512 }, { "epoch": 0.47, "grad_norm": 1.362568649297016, "learning_rate": 5.792467899808549e-06, "loss": 0.1988, "step": 6513 }, { "epoch": 0.47, "grad_norm": 1.3492548212186104, "learning_rate": 5.791324116401871e-06, "loss": 0.179, "step": 6514 }, { "epoch": 0.47, "grad_norm": 1.3090815463032532, "learning_rate": 5.79018029052006e-06, "loss": 0.1872, "step": 6515 }, { "epoch": 0.47, "grad_norm": 1.5097649231636865, "learning_rate": 5.789036422224508e-06, "loss": 0.1959, "step": 6516 }, { "epoch": 0.47, "grad_norm": 1.2995178332281412, "learning_rate": 5.787892511576617e-06, "loss": 0.178, "step": 6517 }, { "epoch": 0.47, "grad_norm": 1.3567649910791422, "learning_rate": 5.786748558637787e-06, "loss": 0.186, "step": 6518 }, { "epoch": 0.47, "grad_norm": 1.2600833120858175, "learning_rate": 5.785604563469419e-06, "loss": 0.155, "step": 6519 }, { "epoch": 0.47, "grad_norm": 1.5433940458167406, "learning_rate": 5.784460526132918e-06, "loss": 0.1966, "step": 6520 }, { "epoch": 0.47, "grad_norm": 1.4848559604976856, "learning_rate": 5.783316446689694e-06, "loss": 0.182, "step": 6521 }, { "epoch": 0.47, "grad_norm": 1.3022873825124526, "learning_rate": 5.782172325201155e-06, "loss": 0.2118, "step": 6522 }, { "epoch": 0.47, "grad_norm": 1.3458818653693136, "learning_rate": 5.781028161728714e-06, "loss": 0.1834, "step": 6523 }, { "epoch": 0.47, "grad_norm": 1.2499798888822795, "learning_rate": 5.77988395633378e-06, "loss": 0.1805, "step": 6524 }, { "epoch": 0.47, "grad_norm": 1.3723680168278265, "learning_rate": 5.778739709077777e-06, "loss": 0.2063, "step": 6525 }, { "epoch": 0.47, "grad_norm": 1.5032534056326532, "learning_rate": 5.7775954200221206e-06, "loss": 0.2136, "step": 6526 }, { "epoch": 0.47, "grad_norm": 1.5117166346725868, "learning_rate": 5.77645108922823e-06, "loss": 0.2272, "step": 6527 }, { "epoch": 0.47, "grad_norm": 1.2499605548875892, "learning_rate": 5.775306716757529e-06, "loss": 0.1723, "step": 6528 }, { "epoch": 0.47, "grad_norm": 1.2172999915430145, "learning_rate": 5.774162302671444e-06, "loss": 0.2073, "step": 6529 }, { "epoch": 0.47, "grad_norm": 1.3838191904059878, "learning_rate": 5.773017847031403e-06, "loss": 0.1605, "step": 6530 }, { "epoch": 0.47, "grad_norm": 1.2005138673340463, "learning_rate": 5.771873349898835e-06, "loss": 0.1747, "step": 6531 }, { "epoch": 0.47, "grad_norm": 1.2074800256054974, "learning_rate": 5.770728811335171e-06, "loss": 0.1542, "step": 6532 }, { "epoch": 0.47, "grad_norm": 1.4086544387965998, "learning_rate": 5.769584231401847e-06, "loss": 0.1723, "step": 6533 }, { "epoch": 0.47, "grad_norm": 1.4471187224720532, "learning_rate": 5.768439610160299e-06, "loss": 0.1801, "step": 6534 }, { "epoch": 0.47, "grad_norm": 1.4321299981649063, "learning_rate": 5.767294947671967e-06, "loss": 0.2127, "step": 6535 }, { "epoch": 0.47, "grad_norm": 1.395255678701889, "learning_rate": 5.766150243998288e-06, "loss": 0.2329, "step": 6536 }, { "epoch": 0.47, "grad_norm": 1.207618633862699, "learning_rate": 5.7650054992007074e-06, "loss": 0.2048, "step": 6537 }, { "epoch": 0.47, "grad_norm": 1.1460654610544796, "learning_rate": 5.7638607133406725e-06, "loss": 0.1565, "step": 6538 }, { "epoch": 0.47, "grad_norm": 1.3829574436665721, "learning_rate": 5.762715886479629e-06, "loss": 0.228, "step": 6539 }, { "epoch": 0.47, "grad_norm": 1.2864625718187568, "learning_rate": 5.761571018679025e-06, "loss": 0.1876, "step": 6540 }, { "epoch": 0.47, "grad_norm": 1.4898457334327468, "learning_rate": 5.760426110000317e-06, "loss": 0.2363, "step": 6541 }, { "epoch": 0.47, "grad_norm": 1.362393062545094, "learning_rate": 5.759281160504955e-06, "loss": 0.1571, "step": 6542 }, { "epoch": 0.47, "grad_norm": 1.4242618532825995, "learning_rate": 5.758136170254397e-06, "loss": 0.2033, "step": 6543 }, { "epoch": 0.47, "grad_norm": 1.245624717784355, "learning_rate": 5.756991139310101e-06, "loss": 0.2083, "step": 6544 }, { "epoch": 0.47, "grad_norm": 1.3930637078334769, "learning_rate": 5.755846067733527e-06, "loss": 0.2049, "step": 6545 }, { "epoch": 0.47, "grad_norm": 1.454082084339547, "learning_rate": 5.754700955586141e-06, "loss": 0.2181, "step": 6546 }, { "epoch": 0.47, "grad_norm": 1.326006153677617, "learning_rate": 5.753555802929405e-06, "loss": 0.2113, "step": 6547 }, { "epoch": 0.47, "grad_norm": 1.2632819620314486, "learning_rate": 5.752410609824788e-06, "loss": 0.1682, "step": 6548 }, { "epoch": 0.47, "grad_norm": 6.781802814930023, "learning_rate": 5.7512653763337565e-06, "loss": 0.5712, "step": 6549 }, { "epoch": 0.47, "grad_norm": 1.3219037170566765, "learning_rate": 5.750120102517787e-06, "loss": 0.2033, "step": 6550 }, { "epoch": 0.47, "grad_norm": 1.474044600550226, "learning_rate": 5.748974788438349e-06, "loss": 0.1974, "step": 6551 }, { "epoch": 0.47, "grad_norm": 1.5234361024017575, "learning_rate": 5.74782943415692e-06, "loss": 0.2367, "step": 6552 }, { "epoch": 0.47, "grad_norm": 1.4046211089672842, "learning_rate": 5.746684039734976e-06, "loss": 0.2021, "step": 6553 }, { "epoch": 0.47, "grad_norm": 1.4051430098137947, "learning_rate": 5.745538605234003e-06, "loss": 0.2246, "step": 6554 }, { "epoch": 0.47, "grad_norm": 1.202109561029295, "learning_rate": 5.744393130715478e-06, "loss": 0.1746, "step": 6555 }, { "epoch": 0.47, "grad_norm": 1.392074710822377, "learning_rate": 5.7432476162408854e-06, "loss": 0.2, "step": 6556 }, { "epoch": 0.47, "grad_norm": 1.6035897987328618, "learning_rate": 5.742102061871716e-06, "loss": 0.197, "step": 6557 }, { "epoch": 0.47, "grad_norm": 1.2301113007281486, "learning_rate": 5.740956467669454e-06, "loss": 0.1687, "step": 6558 }, { "epoch": 0.47, "grad_norm": 12.496434456399223, "learning_rate": 5.739810833695594e-06, "loss": 0.5074, "step": 6559 }, { "epoch": 0.47, "grad_norm": 1.3882148171153095, "learning_rate": 5.738665160011627e-06, "loss": 0.2081, "step": 6560 }, { "epoch": 0.47, "grad_norm": 1.493534877243792, "learning_rate": 5.7375194466790495e-06, "loss": 0.207, "step": 6561 }, { "epoch": 0.47, "grad_norm": 1.3523957007748022, "learning_rate": 5.736373693759357e-06, "loss": 0.1435, "step": 6562 }, { "epoch": 0.47, "grad_norm": 3.203909406807871, "learning_rate": 5.73522790131405e-06, "loss": 0.6458, "step": 6563 }, { "epoch": 0.47, "grad_norm": 1.302314193502512, "learning_rate": 5.734082069404631e-06, "loss": 0.1939, "step": 6564 }, { "epoch": 0.47, "grad_norm": 1.3508130527036732, "learning_rate": 5.732936198092603e-06, "loss": 0.2098, "step": 6565 }, { "epoch": 0.47, "grad_norm": 6.021243716312969, "learning_rate": 5.7317902874394694e-06, "loss": 0.6822, "step": 6566 }, { "epoch": 0.47, "grad_norm": 1.3980451685602266, "learning_rate": 5.730644337506743e-06, "loss": 0.1566, "step": 6567 }, { "epoch": 0.47, "grad_norm": 1.4976865161057045, "learning_rate": 5.729498348355931e-06, "loss": 0.2238, "step": 6568 }, { "epoch": 0.47, "grad_norm": 7.906146507194042, "learning_rate": 5.728352320048545e-06, "loss": 0.6332, "step": 6569 }, { "epoch": 0.47, "grad_norm": 1.4098158614209872, "learning_rate": 5.7272062526461e-06, "loss": 0.2076, "step": 6570 }, { "epoch": 0.47, "grad_norm": 1.317533695252874, "learning_rate": 5.726060146210113e-06, "loss": 0.1774, "step": 6571 }, { "epoch": 0.47, "grad_norm": 1.2906586447583197, "learning_rate": 5.724914000802102e-06, "loss": 0.1951, "step": 6572 }, { "epoch": 0.47, "grad_norm": 1.414917475059135, "learning_rate": 5.723767816483587e-06, "loss": 0.1741, "step": 6573 }, { "epoch": 0.47, "grad_norm": 1.5828806073634103, "learning_rate": 5.72262159331609e-06, "loss": 0.191, "step": 6574 }, { "epoch": 0.47, "grad_norm": 1.529656325700105, "learning_rate": 5.721475331361137e-06, "loss": 0.2253, "step": 6575 }, { "epoch": 0.47, "grad_norm": 1.3483928013740227, "learning_rate": 5.720329030680256e-06, "loss": 0.1583, "step": 6576 }, { "epoch": 0.47, "grad_norm": 4.356213511425819, "learning_rate": 5.719182691334972e-06, "loss": 0.6198, "step": 6577 }, { "epoch": 0.47, "grad_norm": 1.4159356903603015, "learning_rate": 5.718036313386821e-06, "loss": 0.184, "step": 6578 }, { "epoch": 0.47, "grad_norm": 1.298559359192132, "learning_rate": 5.716889896897331e-06, "loss": 0.1663, "step": 6579 }, { "epoch": 0.47, "grad_norm": 11.235786677172939, "learning_rate": 5.715743441928041e-06, "loss": 0.7425, "step": 6580 }, { "epoch": 0.47, "grad_norm": 1.2350710716526738, "learning_rate": 5.7145969485404865e-06, "loss": 0.1666, "step": 6581 }, { "epoch": 0.47, "grad_norm": 1.4163438485808915, "learning_rate": 5.713450416796206e-06, "loss": 0.204, "step": 6582 }, { "epoch": 0.47, "grad_norm": 1.354082948875915, "learning_rate": 5.71230384675674e-06, "loss": 0.1989, "step": 6583 }, { "epoch": 0.47, "grad_norm": 1.4846696665257617, "learning_rate": 5.711157238483635e-06, "loss": 0.2071, "step": 6584 }, { "epoch": 0.47, "grad_norm": 1.4616357037774714, "learning_rate": 5.7100105920384365e-06, "loss": 0.2028, "step": 6585 }, { "epoch": 0.47, "grad_norm": 1.5201786333393195, "learning_rate": 5.708863907482688e-06, "loss": 0.2288, "step": 6586 }, { "epoch": 0.47, "grad_norm": 1.237476820167331, "learning_rate": 5.707717184877941e-06, "loss": 0.1873, "step": 6587 }, { "epoch": 0.47, "grad_norm": 1.4882630525263791, "learning_rate": 5.706570424285747e-06, "loss": 0.2235, "step": 6588 }, { "epoch": 0.47, "grad_norm": 1.5193640327634763, "learning_rate": 5.705423625767661e-06, "loss": 0.1962, "step": 6589 }, { "epoch": 0.47, "grad_norm": 1.488793686929965, "learning_rate": 5.704276789385237e-06, "loss": 0.2321, "step": 6590 }, { "epoch": 0.47, "grad_norm": 1.211678914986396, "learning_rate": 5.703129915200032e-06, "loss": 0.1306, "step": 6591 }, { "epoch": 0.47, "grad_norm": 1.4452476328485626, "learning_rate": 5.701983003273607e-06, "loss": 0.2044, "step": 6592 }, { "epoch": 0.47, "grad_norm": 5.941880668226676, "learning_rate": 5.700836053667524e-06, "loss": 0.6017, "step": 6593 }, { "epoch": 0.47, "grad_norm": 1.3334533712039534, "learning_rate": 5.6996890664433444e-06, "loss": 0.1733, "step": 6594 }, { "epoch": 0.47, "grad_norm": 1.4157998865403114, "learning_rate": 5.698542041662636e-06, "loss": 0.1813, "step": 6595 }, { "epoch": 0.47, "grad_norm": 1.3426195883272436, "learning_rate": 5.6973949793869665e-06, "loss": 0.2308, "step": 6596 }, { "epoch": 0.47, "grad_norm": 1.3432609085054619, "learning_rate": 5.696247879677905e-06, "loss": 0.1976, "step": 6597 }, { "epoch": 0.47, "grad_norm": 1.465047681545308, "learning_rate": 5.695100742597023e-06, "loss": 0.2107, "step": 6598 }, { "epoch": 0.47, "grad_norm": 1.351271092964941, "learning_rate": 5.693953568205894e-06, "loss": 0.2075, "step": 6599 }, { "epoch": 0.47, "grad_norm": 1.4097710561011356, "learning_rate": 5.6928063565660955e-06, "loss": 0.1871, "step": 6600 }, { "epoch": 0.47, "grad_norm": 1.3803266064713453, "learning_rate": 5.691659107739205e-06, "loss": 0.2166, "step": 6601 }, { "epoch": 0.47, "grad_norm": 4.301797639221462, "learning_rate": 5.690511821786799e-06, "loss": 0.6561, "step": 6602 }, { "epoch": 0.47, "grad_norm": 1.3030832555992136, "learning_rate": 5.689364498770464e-06, "loss": 0.1911, "step": 6603 }, { "epoch": 0.47, "grad_norm": 1.2376309669081882, "learning_rate": 5.688217138751779e-06, "loss": 0.1801, "step": 6604 }, { "epoch": 0.47, "grad_norm": 4.063643900205768, "learning_rate": 5.687069741792334e-06, "loss": 0.5719, "step": 6605 }, { "epoch": 0.47, "grad_norm": 1.1588282811234394, "learning_rate": 5.685922307953714e-06, "loss": 0.1588, "step": 6606 }, { "epoch": 0.47, "grad_norm": 1.2998763576833958, "learning_rate": 5.684774837297511e-06, "loss": 0.2183, "step": 6607 }, { "epoch": 0.47, "grad_norm": 1.2433683507958764, "learning_rate": 5.6836273298853125e-06, "loss": 0.2105, "step": 6608 }, { "epoch": 0.47, "grad_norm": 1.3256238480455282, "learning_rate": 5.682479785778716e-06, "loss": 0.1579, "step": 6609 }, { "epoch": 0.47, "grad_norm": 1.416507335296526, "learning_rate": 5.681332205039316e-06, "loss": 0.2062, "step": 6610 }, { "epoch": 0.47, "grad_norm": 1.1959167561202941, "learning_rate": 5.680184587728711e-06, "loss": 0.1261, "step": 6611 }, { "epoch": 0.47, "grad_norm": 1.3383093427406567, "learning_rate": 5.6790369339085e-06, "loss": 0.2549, "step": 6612 }, { "epoch": 0.47, "grad_norm": 1.3447069054150522, "learning_rate": 5.677889243640281e-06, "loss": 0.1911, "step": 6613 }, { "epoch": 0.47, "grad_norm": 1.3073870274351875, "learning_rate": 5.676741516985662e-06, "loss": 0.2171, "step": 6614 }, { "epoch": 0.47, "grad_norm": 1.2179887755105836, "learning_rate": 5.675593754006248e-06, "loss": 0.2135, "step": 6615 }, { "epoch": 0.47, "grad_norm": 1.4375101013690217, "learning_rate": 5.674445954763646e-06, "loss": 0.2053, "step": 6616 }, { "epoch": 0.47, "grad_norm": 1.4484395951201523, "learning_rate": 5.673298119319461e-06, "loss": 0.1374, "step": 6617 }, { "epoch": 0.47, "grad_norm": 1.5721007717575335, "learning_rate": 5.672150247735311e-06, "loss": 0.2196, "step": 6618 }, { "epoch": 0.47, "grad_norm": 1.5482159371704185, "learning_rate": 5.671002340072805e-06, "loss": 0.2191, "step": 6619 }, { "epoch": 0.47, "grad_norm": 1.375198427149108, "learning_rate": 5.669854396393559e-06, "loss": 0.2374, "step": 6620 }, { "epoch": 0.47, "grad_norm": 1.3284557340254821, "learning_rate": 5.66870641675919e-06, "loss": 0.241, "step": 6621 }, { "epoch": 0.47, "grad_norm": 1.5456241729458258, "learning_rate": 5.667558401231318e-06, "loss": 0.1554, "step": 6622 }, { "epoch": 0.47, "grad_norm": 1.514184569796053, "learning_rate": 5.666410349871563e-06, "loss": 0.1735, "step": 6623 }, { "epoch": 0.47, "grad_norm": 5.709034238782593, "learning_rate": 5.665262262741548e-06, "loss": 0.7653, "step": 6624 }, { "epoch": 0.47, "grad_norm": 1.3817859441044498, "learning_rate": 5.664114139902897e-06, "loss": 0.1734, "step": 6625 }, { "epoch": 0.47, "grad_norm": 1.4096745830497825, "learning_rate": 5.6629659814172365e-06, "loss": 0.1697, "step": 6626 }, { "epoch": 0.47, "grad_norm": 1.3168424168755262, "learning_rate": 5.661817787346198e-06, "loss": 0.2086, "step": 6627 }, { "epoch": 0.47, "grad_norm": 1.3622785181946422, "learning_rate": 5.660669557751409e-06, "loss": 0.2058, "step": 6628 }, { "epoch": 0.47, "grad_norm": 3.2315474394610164, "learning_rate": 5.659521292694502e-06, "loss": 0.3605, "step": 6629 }, { "epoch": 0.47, "grad_norm": 1.2399285585743323, "learning_rate": 5.6583729922371135e-06, "loss": 0.1819, "step": 6630 }, { "epoch": 0.47, "grad_norm": 1.4703244971557394, "learning_rate": 5.657224656440878e-06, "loss": 0.2211, "step": 6631 }, { "epoch": 0.47, "grad_norm": 1.4120333186098457, "learning_rate": 5.656076285367434e-06, "loss": 0.1898, "step": 6632 }, { "epoch": 0.47, "grad_norm": 1.3875502650553995, "learning_rate": 5.65492787907842e-06, "loss": 0.2044, "step": 6633 }, { "epoch": 0.47, "grad_norm": 1.3045646050508035, "learning_rate": 5.653779437635481e-06, "loss": 0.1763, "step": 6634 }, { "epoch": 0.47, "grad_norm": 1.4478781748780905, "learning_rate": 5.65263096110026e-06, "loss": 0.2167, "step": 6635 }, { "epoch": 0.47, "grad_norm": 1.393015761074465, "learning_rate": 5.6514824495344e-06, "loss": 0.1909, "step": 6636 }, { "epoch": 0.47, "grad_norm": 1.1692813776182762, "learning_rate": 5.65033390299955e-06, "loss": 0.15, "step": 6637 }, { "epoch": 0.47, "grad_norm": 1.5013667525119645, "learning_rate": 5.64918532155736e-06, "loss": 0.2549, "step": 6638 }, { "epoch": 0.47, "grad_norm": 1.1807121184803353, "learning_rate": 5.6480367052694814e-06, "loss": 0.1872, "step": 6639 }, { "epoch": 0.47, "grad_norm": 1.447065956867901, "learning_rate": 5.646888054197568e-06, "loss": 0.1996, "step": 6640 }, { "epoch": 0.48, "grad_norm": 1.540889333290801, "learning_rate": 5.645739368403272e-06, "loss": 0.2144, "step": 6641 }, { "epoch": 0.48, "grad_norm": 1.4663105084313814, "learning_rate": 5.644590647948252e-06, "loss": 0.2349, "step": 6642 }, { "epoch": 0.48, "grad_norm": 1.4120831257870394, "learning_rate": 5.643441892894168e-06, "loss": 0.2338, "step": 6643 }, { "epoch": 0.48, "grad_norm": 1.423531552999105, "learning_rate": 5.642293103302681e-06, "loss": 0.2014, "step": 6644 }, { "epoch": 0.48, "grad_norm": 1.3576783681898044, "learning_rate": 5.64114427923545e-06, "loss": 0.1745, "step": 6645 }, { "epoch": 0.48, "grad_norm": 1.2271112875772985, "learning_rate": 5.639995420754141e-06, "loss": 0.1794, "step": 6646 }, { "epoch": 0.48, "grad_norm": 1.284576286012348, "learning_rate": 5.638846527920421e-06, "loss": 0.2049, "step": 6647 }, { "epoch": 0.48, "grad_norm": 4.735140367055312, "learning_rate": 5.637697600795957e-06, "loss": 0.5307, "step": 6648 }, { "epoch": 0.48, "grad_norm": 1.5159081046847358, "learning_rate": 5.63654863944242e-06, "loss": 0.1797, "step": 6649 }, { "epoch": 0.48, "grad_norm": 4.23526647937402, "learning_rate": 5.635399643921479e-06, "loss": 0.5596, "step": 6650 }, { "epoch": 0.48, "grad_norm": 1.467072851912935, "learning_rate": 5.634250614294811e-06, "loss": 0.2073, "step": 6651 }, { "epoch": 0.48, "grad_norm": 1.3815219896405229, "learning_rate": 5.633101550624089e-06, "loss": 0.2072, "step": 6652 }, { "epoch": 0.48, "grad_norm": 1.260196760246533, "learning_rate": 5.631952452970992e-06, "loss": 0.1917, "step": 6653 }, { "epoch": 0.48, "grad_norm": 1.4720139244592239, "learning_rate": 5.630803321397198e-06, "loss": 0.2407, "step": 6654 }, { "epoch": 0.48, "grad_norm": 1.3286458796307956, "learning_rate": 5.6296541559643854e-06, "loss": 0.1979, "step": 6655 }, { "epoch": 0.48, "grad_norm": 1.306094194100853, "learning_rate": 5.628504956734242e-06, "loss": 0.2366, "step": 6656 }, { "epoch": 0.48, "grad_norm": 1.319116897884429, "learning_rate": 5.627355723768449e-06, "loss": 0.2167, "step": 6657 }, { "epoch": 0.48, "grad_norm": 1.272662775051772, "learning_rate": 5.626206457128692e-06, "loss": 0.1859, "step": 6658 }, { "epoch": 0.48, "grad_norm": 1.2047439538386733, "learning_rate": 5.62505715687666e-06, "loss": 0.1743, "step": 6659 }, { "epoch": 0.48, "grad_norm": 1.2344994679699708, "learning_rate": 5.623907823074044e-06, "loss": 0.1813, "step": 6660 }, { "epoch": 0.48, "grad_norm": 1.3093303281663546, "learning_rate": 5.622758455782534e-06, "loss": 0.1957, "step": 6661 }, { "epoch": 0.48, "grad_norm": 1.3582004498814646, "learning_rate": 5.621609055063826e-06, "loss": 0.1726, "step": 6662 }, { "epoch": 0.48, "grad_norm": 1.176726358488571, "learning_rate": 5.62045962097961e-06, "loss": 0.1608, "step": 6663 }, { "epoch": 0.48, "grad_norm": 1.2183205294078407, "learning_rate": 5.6193101535915894e-06, "loss": 0.1742, "step": 6664 }, { "epoch": 0.48, "grad_norm": 1.265218178169771, "learning_rate": 5.618160652961459e-06, "loss": 0.1311, "step": 6665 }, { "epoch": 0.48, "grad_norm": 1.2435751219275146, "learning_rate": 5.617011119150922e-06, "loss": 0.1777, "step": 6666 }, { "epoch": 0.48, "grad_norm": 7.914470400501476, "learning_rate": 5.615861552221677e-06, "loss": 0.7304, "step": 6667 }, { "epoch": 0.48, "grad_norm": 1.1829092287141503, "learning_rate": 5.614711952235432e-06, "loss": 0.2057, "step": 6668 }, { "epoch": 0.48, "grad_norm": 1.2986090537159352, "learning_rate": 5.6135623192538915e-06, "loss": 0.1649, "step": 6669 }, { "epoch": 0.48, "grad_norm": 1.3058567665023229, "learning_rate": 5.612412653338763e-06, "loss": 0.1851, "step": 6670 }, { "epoch": 0.48, "grad_norm": 1.4467702225431087, "learning_rate": 5.6112629545517565e-06, "loss": 0.2026, "step": 6671 }, { "epoch": 0.48, "grad_norm": 1.2249025798770863, "learning_rate": 5.610113222954583e-06, "loss": 0.1815, "step": 6672 }, { "epoch": 0.48, "grad_norm": 1.2688443562901717, "learning_rate": 5.608963458608953e-06, "loss": 0.1762, "step": 6673 }, { "epoch": 0.48, "grad_norm": 8.985408878215837, "learning_rate": 5.607813661576587e-06, "loss": 0.649, "step": 6674 }, { "epoch": 0.48, "grad_norm": 1.2681637172408171, "learning_rate": 5.606663831919198e-06, "loss": 0.1903, "step": 6675 }, { "epoch": 0.48, "grad_norm": 1.8481052922301722, "learning_rate": 5.605513969698504e-06, "loss": 0.21, "step": 6676 }, { "epoch": 0.48, "grad_norm": 1.2842823473237996, "learning_rate": 5.604364074976227e-06, "loss": 0.1769, "step": 6677 }, { "epoch": 0.48, "grad_norm": 1.3543061435568093, "learning_rate": 5.603214147814086e-06, "loss": 0.2237, "step": 6678 }, { "epoch": 0.48, "grad_norm": 1.4139731288564583, "learning_rate": 5.602064188273806e-06, "loss": 0.2254, "step": 6679 }, { "epoch": 0.48, "grad_norm": 1.4527351608229153, "learning_rate": 5.600914196417112e-06, "loss": 0.2305, "step": 6680 }, { "epoch": 0.48, "grad_norm": 1.2851283704251248, "learning_rate": 5.599764172305732e-06, "loss": 0.17, "step": 6681 }, { "epoch": 0.48, "grad_norm": 1.3838102091405438, "learning_rate": 5.598614116001394e-06, "loss": 0.1657, "step": 6682 }, { "epoch": 0.48, "grad_norm": 1.5208474089354278, "learning_rate": 5.5974640275658284e-06, "loss": 0.1863, "step": 6683 }, { "epoch": 0.48, "grad_norm": 1.4431061962676712, "learning_rate": 5.596313907060766e-06, "loss": 0.2286, "step": 6684 }, { "epoch": 0.48, "grad_norm": 1.301210709072611, "learning_rate": 5.595163754547945e-06, "loss": 0.1946, "step": 6685 }, { "epoch": 0.48, "grad_norm": 1.3528715438233607, "learning_rate": 5.594013570089096e-06, "loss": 0.1714, "step": 6686 }, { "epoch": 0.48, "grad_norm": 5.863473383864885, "learning_rate": 5.592863353745959e-06, "loss": 0.6037, "step": 6687 }, { "epoch": 0.48, "grad_norm": 1.5296372419376616, "learning_rate": 5.591713105580272e-06, "loss": 0.2185, "step": 6688 }, { "epoch": 0.48, "grad_norm": 1.3475353144390334, "learning_rate": 5.590562825653777e-06, "loss": 0.1708, "step": 6689 }, { "epoch": 0.48, "grad_norm": 1.3621160200499347, "learning_rate": 5.589412514028215e-06, "loss": 0.2149, "step": 6690 }, { "epoch": 0.48, "grad_norm": 1.2343770963578025, "learning_rate": 5.588262170765332e-06, "loss": 0.1475, "step": 6691 }, { "epoch": 0.48, "grad_norm": 1.226253359094388, "learning_rate": 5.587111795926871e-06, "loss": 0.1955, "step": 6692 }, { "epoch": 0.48, "grad_norm": 4.665249904996852, "learning_rate": 5.585961389574582e-06, "loss": 0.6404, "step": 6693 }, { "epoch": 0.48, "grad_norm": 1.4663346383017628, "learning_rate": 5.584810951770215e-06, "loss": 0.2036, "step": 6694 }, { "epoch": 0.48, "grad_norm": 1.4705378305401333, "learning_rate": 5.5836604825755165e-06, "loss": 0.2304, "step": 6695 }, { "epoch": 0.48, "grad_norm": 1.328203328595461, "learning_rate": 5.582509982052244e-06, "loss": 0.1689, "step": 6696 }, { "epoch": 0.48, "grad_norm": 1.4327974602094515, "learning_rate": 5.58135945026215e-06, "loss": 0.1966, "step": 6697 }, { "epoch": 0.48, "grad_norm": 1.5586132079801607, "learning_rate": 5.580208887266989e-06, "loss": 0.2036, "step": 6698 }, { "epoch": 0.48, "grad_norm": 1.2520196199064095, "learning_rate": 5.579058293128522e-06, "loss": 0.1665, "step": 6699 }, { "epoch": 0.48, "grad_norm": 1.3531001361487087, "learning_rate": 5.577907667908505e-06, "loss": 0.1832, "step": 6700 }, { "epoch": 0.48, "grad_norm": 1.2592648229797585, "learning_rate": 5.5767570116687e-06, "loss": 0.1551, "step": 6701 }, { "epoch": 0.48, "grad_norm": 1.391433623850729, "learning_rate": 5.57560632447087e-06, "loss": 0.2143, "step": 6702 }, { "epoch": 0.48, "grad_norm": 1.289281399178544, "learning_rate": 5.5744556063767804e-06, "loss": 0.2076, "step": 6703 }, { "epoch": 0.48, "grad_norm": 4.466612044841786, "learning_rate": 5.573304857448195e-06, "loss": 0.5673, "step": 6704 }, { "epoch": 0.48, "grad_norm": 1.322530237319094, "learning_rate": 5.572154077746882e-06, "loss": 0.1739, "step": 6705 }, { "epoch": 0.48, "grad_norm": 1.2205176424784763, "learning_rate": 5.571003267334612e-06, "loss": 0.1744, "step": 6706 }, { "epoch": 0.48, "grad_norm": 1.3967933641228538, "learning_rate": 5.569852426273154e-06, "loss": 0.228, "step": 6707 }, { "epoch": 0.48, "grad_norm": 1.2940529143495834, "learning_rate": 5.568701554624284e-06, "loss": 0.2075, "step": 6708 }, { "epoch": 0.48, "grad_norm": 1.3132503436465324, "learning_rate": 5.567550652449773e-06, "loss": 0.1854, "step": 6709 }, { "epoch": 0.48, "grad_norm": 1.1810895547714022, "learning_rate": 5.566399719811396e-06, "loss": 0.1499, "step": 6710 }, { "epoch": 0.48, "grad_norm": 1.3570662722160278, "learning_rate": 5.565248756770934e-06, "loss": 0.2078, "step": 6711 }, { "epoch": 0.48, "grad_norm": 6.132891668764976, "learning_rate": 5.564097763390164e-06, "loss": 0.5219, "step": 6712 }, { "epoch": 0.48, "grad_norm": 1.4552138400814318, "learning_rate": 5.5629467397308666e-06, "loss": 0.2003, "step": 6713 }, { "epoch": 0.48, "grad_norm": 1.4386516292199851, "learning_rate": 5.561795685854825e-06, "loss": 0.1968, "step": 6714 }, { "epoch": 0.48, "grad_norm": 3.4208210831262718, "learning_rate": 5.560644601823823e-06, "loss": 0.5845, "step": 6715 }, { "epoch": 0.48, "grad_norm": 1.2243283906933147, "learning_rate": 5.5594934876996455e-06, "loss": 0.1686, "step": 6716 }, { "epoch": 0.48, "grad_norm": 1.5637156771632874, "learning_rate": 5.558342343544081e-06, "loss": 0.2178, "step": 6717 }, { "epoch": 0.48, "grad_norm": 8.023920571981453, "learning_rate": 5.5571911694189175e-06, "loss": 0.404, "step": 6718 }, { "epoch": 0.48, "grad_norm": 1.475071302654246, "learning_rate": 5.556039965385945e-06, "loss": 0.2102, "step": 6719 }, { "epoch": 0.48, "grad_norm": 3.7532609839625657, "learning_rate": 5.5548887315069575e-06, "loss": 0.5653, "step": 6720 }, { "epoch": 0.48, "grad_norm": 1.3775939142712832, "learning_rate": 5.553737467843747e-06, "loss": 0.202, "step": 6721 }, { "epoch": 0.48, "grad_norm": 10.951403106549975, "learning_rate": 5.552586174458108e-06, "loss": 0.6949, "step": 6722 }, { "epoch": 0.48, "grad_norm": 1.3748090969676616, "learning_rate": 5.5514348514118395e-06, "loss": 0.1988, "step": 6723 }, { "epoch": 0.48, "grad_norm": 1.3024851074778925, "learning_rate": 5.5502834987667386e-06, "loss": 0.1384, "step": 6724 }, { "epoch": 0.48, "grad_norm": 1.5291363573830403, "learning_rate": 5.549132116584606e-06, "loss": 0.1884, "step": 6725 }, { "epoch": 0.48, "grad_norm": 1.45020599412656, "learning_rate": 5.547980704927241e-06, "loss": 0.2131, "step": 6726 }, { "epoch": 0.48, "grad_norm": 1.4983111458375615, "learning_rate": 5.546829263856451e-06, "loss": 0.2094, "step": 6727 }, { "epoch": 0.48, "grad_norm": 1.2516208235667108, "learning_rate": 5.545677793434039e-06, "loss": 0.1676, "step": 6728 }, { "epoch": 0.48, "grad_norm": 1.5275846375975488, "learning_rate": 5.5445262937218106e-06, "loss": 0.253, "step": 6729 }, { "epoch": 0.48, "grad_norm": 4.445212578479364, "learning_rate": 5.543374764781571e-06, "loss": 0.6099, "step": 6730 }, { "epoch": 0.48, "grad_norm": 1.4068861590880137, "learning_rate": 5.542223206675137e-06, "loss": 0.1808, "step": 6731 }, { "epoch": 0.48, "grad_norm": 1.2113251939943535, "learning_rate": 5.541071619464314e-06, "loss": 0.1632, "step": 6732 }, { "epoch": 0.48, "grad_norm": 1.2824024101668492, "learning_rate": 5.5399200032109145e-06, "loss": 0.1804, "step": 6733 }, { "epoch": 0.48, "grad_norm": 1.2874919486512382, "learning_rate": 5.538768357976755e-06, "loss": 0.2087, "step": 6734 }, { "epoch": 0.48, "grad_norm": 3.729622441398244, "learning_rate": 5.537616683823651e-06, "loss": 0.5009, "step": 6735 }, { "epoch": 0.48, "grad_norm": 1.1965131187862261, "learning_rate": 5.536464980813418e-06, "loss": 0.157, "step": 6736 }, { "epoch": 0.48, "grad_norm": 1.31904448568832, "learning_rate": 5.535313249007878e-06, "loss": 0.1714, "step": 6737 }, { "epoch": 0.48, "grad_norm": 1.3645902524097202, "learning_rate": 5.5341614884688475e-06, "loss": 0.2174, "step": 6738 }, { "epoch": 0.48, "grad_norm": 1.4646830743059327, "learning_rate": 5.533009699258152e-06, "loss": 0.2073, "step": 6739 }, { "epoch": 0.48, "grad_norm": 1.4699739579948878, "learning_rate": 5.531857881437612e-06, "loss": 0.1898, "step": 6740 }, { "epoch": 0.48, "grad_norm": 4.049197321903939, "learning_rate": 5.5307060350690554e-06, "loss": 0.3951, "step": 6741 }, { "epoch": 0.48, "grad_norm": 1.387503532780477, "learning_rate": 5.529554160214306e-06, "loss": 0.1745, "step": 6742 }, { "epoch": 0.48, "grad_norm": 1.3764467731007737, "learning_rate": 5.528402256935194e-06, "loss": 0.2002, "step": 6743 }, { "epoch": 0.48, "grad_norm": 1.136569721889636, "learning_rate": 5.5272503252935474e-06, "loss": 0.1671, "step": 6744 }, { "epoch": 0.48, "grad_norm": 1.253399986912368, "learning_rate": 5.526098365351198e-06, "loss": 0.1576, "step": 6745 }, { "epoch": 0.48, "grad_norm": 5.835596509172331, "learning_rate": 5.524946377169979e-06, "loss": 0.6145, "step": 6746 }, { "epoch": 0.48, "grad_norm": 1.2477404081621708, "learning_rate": 5.523794360811723e-06, "loss": 0.1937, "step": 6747 }, { "epoch": 0.48, "grad_norm": 1.3502021143467564, "learning_rate": 5.522642316338268e-06, "loss": 0.1722, "step": 6748 }, { "epoch": 0.48, "grad_norm": 1.1249319624186798, "learning_rate": 5.521490243811449e-06, "loss": 0.1437, "step": 6749 }, { "epoch": 0.48, "grad_norm": 1.4134308181069786, "learning_rate": 5.520338143293106e-06, "loss": 0.1702, "step": 6750 }, { "epoch": 0.48, "grad_norm": 1.5058031024348788, "learning_rate": 5.519186014845077e-06, "loss": 0.2021, "step": 6751 }, { "epoch": 0.48, "grad_norm": 1.3451481541968922, "learning_rate": 5.518033858529207e-06, "loss": 0.1839, "step": 6752 }, { "epoch": 0.48, "grad_norm": 1.2538438229051385, "learning_rate": 5.516881674407337e-06, "loss": 0.1942, "step": 6753 }, { "epoch": 0.48, "grad_norm": 1.1579813727682793, "learning_rate": 5.515729462541312e-06, "loss": 0.158, "step": 6754 }, { "epoch": 0.48, "grad_norm": 1.331516309303722, "learning_rate": 5.51457722299298e-06, "loss": 0.1993, "step": 6755 }, { "epoch": 0.48, "grad_norm": 1.279591499033453, "learning_rate": 5.513424955824185e-06, "loss": 0.1928, "step": 6756 }, { "epoch": 0.48, "grad_norm": 1.1928069592518773, "learning_rate": 5.512272661096779e-06, "loss": 0.1674, "step": 6757 }, { "epoch": 0.48, "grad_norm": 1.4181095732284703, "learning_rate": 5.511120338872611e-06, "loss": 0.2564, "step": 6758 }, { "epoch": 0.48, "grad_norm": 1.310888891244863, "learning_rate": 5.509967989213535e-06, "loss": 0.1885, "step": 6759 }, { "epoch": 0.48, "grad_norm": 1.3813179269948987, "learning_rate": 5.508815612181401e-06, "loss": 0.1641, "step": 6760 }, { "epoch": 0.48, "grad_norm": 1.5016381790547977, "learning_rate": 5.507663207838069e-06, "loss": 0.2606, "step": 6761 }, { "epoch": 0.48, "grad_norm": 1.564429076716475, "learning_rate": 5.506510776245392e-06, "loss": 0.2573, "step": 6762 }, { "epoch": 0.48, "grad_norm": 1.469400090289019, "learning_rate": 5.505358317465228e-06, "loss": 0.2177, "step": 6763 }, { "epoch": 0.48, "grad_norm": 1.5118141701378751, "learning_rate": 5.504205831559437e-06, "loss": 0.2163, "step": 6764 }, { "epoch": 0.48, "grad_norm": 1.337947196177064, "learning_rate": 5.5030533185898815e-06, "loss": 0.1411, "step": 6765 }, { "epoch": 0.48, "grad_norm": 1.3890752819568728, "learning_rate": 5.501900778618422e-06, "loss": 0.1775, "step": 6766 }, { "epoch": 0.48, "grad_norm": 1.290413323372114, "learning_rate": 5.500748211706922e-06, "loss": 0.1754, "step": 6767 }, { "epoch": 0.48, "grad_norm": 1.2812686906137314, "learning_rate": 5.499595617917247e-06, "loss": 0.1961, "step": 6768 }, { "epoch": 0.48, "grad_norm": 1.3361976560245312, "learning_rate": 5.498442997311264e-06, "loss": 0.1819, "step": 6769 }, { "epoch": 0.48, "grad_norm": 1.4551005583589307, "learning_rate": 5.497290349950842e-06, "loss": 0.251, "step": 6770 }, { "epoch": 0.48, "grad_norm": 1.1775914443063273, "learning_rate": 5.49613767589785e-06, "loss": 0.1828, "step": 6771 }, { "epoch": 0.48, "grad_norm": 1.3957315529181746, "learning_rate": 5.494984975214158e-06, "loss": 0.2393, "step": 6772 }, { "epoch": 0.48, "grad_norm": 1.2548407835443514, "learning_rate": 5.493832247961638e-06, "loss": 0.1992, "step": 6773 }, { "epoch": 0.48, "grad_norm": 1.1888279045100618, "learning_rate": 5.492679494202167e-06, "loss": 0.1977, "step": 6774 }, { "epoch": 0.48, "grad_norm": 1.229916681884861, "learning_rate": 5.491526713997617e-06, "loss": 0.1912, "step": 6775 }, { "epoch": 0.48, "grad_norm": 1.483293591571265, "learning_rate": 5.490373907409866e-06, "loss": 0.2101, "step": 6776 }, { "epoch": 0.48, "grad_norm": 1.3174297839789983, "learning_rate": 5.48922107450079e-06, "loss": 0.1762, "step": 6777 }, { "epoch": 0.48, "grad_norm": 1.4859410262083592, "learning_rate": 5.488068215332273e-06, "loss": 0.2503, "step": 6778 }, { "epoch": 0.48, "grad_norm": 1.3900691398471186, "learning_rate": 5.486915329966192e-06, "loss": 0.2292, "step": 6779 }, { "epoch": 0.48, "grad_norm": 1.338193212251261, "learning_rate": 5.48576241846443e-06, "loss": 0.2071, "step": 6780 }, { "epoch": 0.49, "grad_norm": 1.3424836903990778, "learning_rate": 5.484609480888872e-06, "loss": 0.2058, "step": 6781 }, { "epoch": 0.49, "grad_norm": 1.3353165304076882, "learning_rate": 5.483456517301401e-06, "loss": 0.193, "step": 6782 }, { "epoch": 0.49, "grad_norm": 1.281846483294925, "learning_rate": 5.482303527763906e-06, "loss": 0.1815, "step": 6783 }, { "epoch": 0.49, "grad_norm": 1.5155167360178363, "learning_rate": 5.481150512338275e-06, "loss": 0.1975, "step": 6784 }, { "epoch": 0.49, "grad_norm": 1.2260338966621829, "learning_rate": 5.479997471086394e-06, "loss": 0.1657, "step": 6785 }, { "epoch": 0.49, "grad_norm": 1.2471540592974437, "learning_rate": 5.478844404070155e-06, "loss": 0.2368, "step": 6786 }, { "epoch": 0.49, "grad_norm": 1.1602960211615858, "learning_rate": 5.477691311351453e-06, "loss": 0.1769, "step": 6787 }, { "epoch": 0.49, "grad_norm": 1.5165083929741874, "learning_rate": 5.476538192992178e-06, "loss": 0.2244, "step": 6788 }, { "epoch": 0.49, "grad_norm": 1.3051802629646676, "learning_rate": 5.475385049054225e-06, "loss": 0.1849, "step": 6789 }, { "epoch": 0.49, "grad_norm": 1.2823513404076694, "learning_rate": 5.474231879599493e-06, "loss": 0.2212, "step": 6790 }, { "epoch": 0.49, "grad_norm": 1.3062268530636707, "learning_rate": 5.4730786846898765e-06, "loss": 0.1611, "step": 6791 }, { "epoch": 0.49, "grad_norm": 4.351821537968671, "learning_rate": 5.471925464387276e-06, "loss": 0.567, "step": 6792 }, { "epoch": 0.49, "grad_norm": 1.3518153230617713, "learning_rate": 5.470772218753592e-06, "loss": 0.1883, "step": 6793 }, { "epoch": 0.49, "grad_norm": 1.417485558053931, "learning_rate": 5.4696189478507255e-06, "loss": 0.2116, "step": 6794 }, { "epoch": 0.49, "grad_norm": 1.3634475709227498, "learning_rate": 5.468465651740579e-06, "loss": 0.1773, "step": 6795 }, { "epoch": 0.49, "grad_norm": 1.2240665757903137, "learning_rate": 5.467312330485058e-06, "loss": 0.1618, "step": 6796 }, { "epoch": 0.49, "grad_norm": 1.8658802132960561, "learning_rate": 5.466158984146066e-06, "loss": 0.1846, "step": 6797 }, { "epoch": 0.49, "grad_norm": 1.4045855747908262, "learning_rate": 5.465005612785514e-06, "loss": 0.1969, "step": 6798 }, { "epoch": 0.49, "grad_norm": 1.155989318442908, "learning_rate": 5.463852216465307e-06, "loss": 0.1877, "step": 6799 }, { "epoch": 0.49, "grad_norm": 1.3539566726968721, "learning_rate": 5.462698795247357e-06, "loss": 0.2181, "step": 6800 }, { "epoch": 0.49, "grad_norm": 7.748510638540412, "learning_rate": 5.461545349193571e-06, "loss": 0.5437, "step": 6801 }, { "epoch": 0.49, "grad_norm": 1.3186992652514298, "learning_rate": 5.460391878365865e-06, "loss": 0.1642, "step": 6802 }, { "epoch": 0.49, "grad_norm": 1.2401163653155394, "learning_rate": 5.4592383828261534e-06, "loss": 0.1781, "step": 6803 }, { "epoch": 0.49, "grad_norm": 1.2487002072660474, "learning_rate": 5.45808486263635e-06, "loss": 0.1845, "step": 6804 }, { "epoch": 0.49, "grad_norm": 1.3288613206864088, "learning_rate": 5.456931317858371e-06, "loss": 0.1541, "step": 6805 }, { "epoch": 0.49, "grad_norm": 1.2787722773924046, "learning_rate": 5.455777748554134e-06, "loss": 0.158, "step": 6806 }, { "epoch": 0.49, "grad_norm": 1.193116551333469, "learning_rate": 5.454624154785557e-06, "loss": 0.1942, "step": 6807 }, { "epoch": 0.49, "grad_norm": 1.3163562959430897, "learning_rate": 5.4534705366145624e-06, "loss": 0.2113, "step": 6808 }, { "epoch": 0.49, "grad_norm": 1.3153432053486518, "learning_rate": 5.452316894103071e-06, "loss": 0.1727, "step": 6809 }, { "epoch": 0.49, "grad_norm": 6.166680895264921, "learning_rate": 5.451163227313006e-06, "loss": 0.6397, "step": 6810 }, { "epoch": 0.49, "grad_norm": 1.2726459140397102, "learning_rate": 5.45000953630629e-06, "loss": 0.1813, "step": 6811 }, { "epoch": 0.49, "grad_norm": 1.3199381108460457, "learning_rate": 5.448855821144851e-06, "loss": 0.1934, "step": 6812 }, { "epoch": 0.49, "grad_norm": 1.2631007381920603, "learning_rate": 5.447702081890614e-06, "loss": 0.2148, "step": 6813 }, { "epoch": 0.49, "grad_norm": 1.2805316866182044, "learning_rate": 5.446548318605509e-06, "loss": 0.1934, "step": 6814 }, { "epoch": 0.49, "grad_norm": 1.6003247685395632, "learning_rate": 5.445394531351463e-06, "loss": 0.2113, "step": 6815 }, { "epoch": 0.49, "grad_norm": 1.0813379121308828, "learning_rate": 5.444240720190409e-06, "loss": 0.1212, "step": 6816 }, { "epoch": 0.49, "grad_norm": 1.1900432749014551, "learning_rate": 5.443086885184279e-06, "loss": 0.1821, "step": 6817 }, { "epoch": 0.49, "grad_norm": 5.852085547568391, "learning_rate": 5.441933026395003e-06, "loss": 0.6501, "step": 6818 }, { "epoch": 0.49, "grad_norm": 1.2468105134590544, "learning_rate": 5.440779143884518e-06, "loss": 0.1545, "step": 6819 }, { "epoch": 0.49, "grad_norm": 1.4776502847286042, "learning_rate": 5.4396252377147615e-06, "loss": 0.236, "step": 6820 }, { "epoch": 0.49, "grad_norm": 1.5009812213307152, "learning_rate": 5.438471307947668e-06, "loss": 0.225, "step": 6821 }, { "epoch": 0.49, "grad_norm": 1.4038094267466799, "learning_rate": 5.437317354645177e-06, "loss": 0.1819, "step": 6822 }, { "epoch": 0.49, "grad_norm": 1.1683216835146955, "learning_rate": 5.436163377869225e-06, "loss": 0.134, "step": 6823 }, { "epoch": 0.49, "grad_norm": 1.2999795014448476, "learning_rate": 5.435009377681759e-06, "loss": 0.1737, "step": 6824 }, { "epoch": 0.49, "grad_norm": 1.3251914868544477, "learning_rate": 5.433855354144717e-06, "loss": 0.2024, "step": 6825 }, { "epoch": 0.49, "grad_norm": 5.426699190203773, "learning_rate": 5.432701307320043e-06, "loss": 0.4185, "step": 6826 }, { "epoch": 0.49, "grad_norm": 1.335181480267974, "learning_rate": 5.431547237269681e-06, "loss": 0.2167, "step": 6827 }, { "epoch": 0.49, "grad_norm": 5.912729742178636, "learning_rate": 5.430393144055579e-06, "loss": 0.4096, "step": 6828 }, { "epoch": 0.49, "grad_norm": 6.996352912267826, "learning_rate": 5.429239027739683e-06, "loss": 0.4474, "step": 6829 }, { "epoch": 0.49, "grad_norm": 1.2818577794654593, "learning_rate": 5.428084888383941e-06, "loss": 0.1916, "step": 6830 }, { "epoch": 0.49, "grad_norm": 1.368241629993963, "learning_rate": 5.426930726050301e-06, "loss": 0.1853, "step": 6831 }, { "epoch": 0.49, "grad_norm": 1.4627782578378192, "learning_rate": 5.425776540800718e-06, "loss": 0.2162, "step": 6832 }, { "epoch": 0.49, "grad_norm": 1.4066559397733949, "learning_rate": 5.42462233269714e-06, "loss": 0.2014, "step": 6833 }, { "epoch": 0.49, "grad_norm": 1.4254521665605342, "learning_rate": 5.423468101801524e-06, "loss": 0.2122, "step": 6834 }, { "epoch": 0.49, "grad_norm": 1.4675478161898972, "learning_rate": 5.422313848175822e-06, "loss": 0.2148, "step": 6835 }, { "epoch": 0.49, "grad_norm": 1.1566236627923878, "learning_rate": 5.42115957188199e-06, "loss": 0.1408, "step": 6836 }, { "epoch": 0.49, "grad_norm": 1.3084436902699248, "learning_rate": 5.420005272981985e-06, "loss": 0.1894, "step": 6837 }, { "epoch": 0.49, "grad_norm": 1.459146106449613, "learning_rate": 5.418850951537766e-06, "loss": 0.1978, "step": 6838 }, { "epoch": 0.49, "grad_norm": 1.1352391865180778, "learning_rate": 5.417696607611293e-06, "loss": 0.1332, "step": 6839 }, { "epoch": 0.49, "grad_norm": 1.3241699706533734, "learning_rate": 5.416542241264524e-06, "loss": 0.1756, "step": 6840 }, { "epoch": 0.49, "grad_norm": 1.4338074572884234, "learning_rate": 5.415387852559423e-06, "loss": 0.2379, "step": 6841 }, { "epoch": 0.49, "grad_norm": 1.4625409549017454, "learning_rate": 5.414233441557952e-06, "loss": 0.231, "step": 6842 }, { "epoch": 0.49, "grad_norm": 5.821278361849323, "learning_rate": 5.413079008322077e-06, "loss": 0.5212, "step": 6843 }, { "epoch": 0.49, "grad_norm": 1.394559279673393, "learning_rate": 5.41192455291376e-06, "loss": 0.2031, "step": 6844 }, { "epoch": 0.49, "grad_norm": 1.3980311142225494, "learning_rate": 5.41077007539497e-06, "loss": 0.1725, "step": 6845 }, { "epoch": 0.49, "grad_norm": 1.4455303301307307, "learning_rate": 5.409615575827675e-06, "loss": 0.1838, "step": 6846 }, { "epoch": 0.49, "grad_norm": 1.3000556821255083, "learning_rate": 5.408461054273843e-06, "loss": 0.1493, "step": 6847 }, { "epoch": 0.49, "grad_norm": 1.262575224506312, "learning_rate": 5.407306510795443e-06, "loss": 0.1784, "step": 6848 }, { "epoch": 0.49, "grad_norm": 1.323117468141364, "learning_rate": 5.406151945454449e-06, "loss": 0.1752, "step": 6849 }, { "epoch": 0.49, "grad_norm": 1.2848735054760394, "learning_rate": 5.404997358312831e-06, "loss": 0.1995, "step": 6850 }, { "epoch": 0.49, "grad_norm": 1.492760115222576, "learning_rate": 5.403842749432565e-06, "loss": 0.1844, "step": 6851 }, { "epoch": 0.49, "grad_norm": 1.4224989711900202, "learning_rate": 5.402688118875624e-06, "loss": 0.1973, "step": 6852 }, { "epoch": 0.49, "grad_norm": 1.352826740035777, "learning_rate": 5.401533466703984e-06, "loss": 0.206, "step": 6853 }, { "epoch": 0.49, "grad_norm": 1.360718796004018, "learning_rate": 5.400378792979623e-06, "loss": 0.223, "step": 6854 }, { "epoch": 0.49, "grad_norm": 1.2441070913734704, "learning_rate": 5.3992240977645205e-06, "loss": 0.1958, "step": 6855 }, { "epoch": 0.49, "grad_norm": 1.3684218888807818, "learning_rate": 5.398069381120654e-06, "loss": 0.2145, "step": 6856 }, { "epoch": 0.49, "grad_norm": 3.9437922931801097, "learning_rate": 5.396914643110003e-06, "loss": 0.5525, "step": 6857 }, { "epoch": 0.49, "grad_norm": 1.4578418294020825, "learning_rate": 5.395759883794552e-06, "loss": 0.2529, "step": 6858 }, { "epoch": 0.49, "grad_norm": 1.3429557470919053, "learning_rate": 5.394605103236282e-06, "loss": 0.2009, "step": 6859 }, { "epoch": 0.49, "grad_norm": 1.3486119063612603, "learning_rate": 5.39345030149718e-06, "loss": 0.194, "step": 6860 }, { "epoch": 0.49, "grad_norm": 1.2981974073177407, "learning_rate": 5.392295478639226e-06, "loss": 0.1854, "step": 6861 }, { "epoch": 0.49, "grad_norm": 1.1880723233403436, "learning_rate": 5.39114063472441e-06, "loss": 0.1407, "step": 6862 }, { "epoch": 0.49, "grad_norm": 1.184610553066995, "learning_rate": 5.38998576981472e-06, "loss": 0.2021, "step": 6863 }, { "epoch": 0.49, "grad_norm": 4.850625546616521, "learning_rate": 5.388830883972142e-06, "loss": 0.6059, "step": 6864 }, { "epoch": 0.49, "grad_norm": 1.2536190233682296, "learning_rate": 5.387675977258667e-06, "loss": 0.1758, "step": 6865 }, { "epoch": 0.49, "grad_norm": 1.3615340293904863, "learning_rate": 5.386521049736284e-06, "loss": 0.1969, "step": 6866 }, { "epoch": 0.49, "grad_norm": 1.2418907660541736, "learning_rate": 5.385366101466989e-06, "loss": 0.1481, "step": 6867 }, { "epoch": 0.49, "grad_norm": 1.199297659947035, "learning_rate": 5.384211132512772e-06, "loss": 0.2011, "step": 6868 }, { "epoch": 0.49, "grad_norm": 1.3400687511898048, "learning_rate": 5.383056142935627e-06, "loss": 0.2109, "step": 6869 }, { "epoch": 0.49, "grad_norm": 1.3918692645164261, "learning_rate": 5.381901132797551e-06, "loss": 0.2039, "step": 6870 }, { "epoch": 0.49, "grad_norm": 4.694929636188387, "learning_rate": 5.3807461021605404e-06, "loss": 0.5441, "step": 6871 }, { "epoch": 0.49, "grad_norm": 1.4078895202731236, "learning_rate": 5.3795910510865904e-06, "loss": 0.2142, "step": 6872 }, { "epoch": 0.49, "grad_norm": 3.065199216341531, "learning_rate": 5.378435979637701e-06, "loss": 0.3409, "step": 6873 }, { "epoch": 0.49, "grad_norm": 1.1539222015472128, "learning_rate": 5.377280887875871e-06, "loss": 0.1562, "step": 6874 }, { "epoch": 0.49, "grad_norm": 4.592742425504455, "learning_rate": 5.376125775863102e-06, "loss": 0.4414, "step": 6875 }, { "epoch": 0.49, "grad_norm": 1.344416840980456, "learning_rate": 5.374970643661397e-06, "loss": 0.1899, "step": 6876 }, { "epoch": 0.49, "grad_norm": 6.725309334488556, "learning_rate": 5.3738154913327566e-06, "loss": 0.6448, "step": 6877 }, { "epoch": 0.49, "grad_norm": 1.204632970642909, "learning_rate": 5.372660318939185e-06, "loss": 0.2217, "step": 6878 }, { "epoch": 0.49, "grad_norm": 1.4521894238228432, "learning_rate": 5.371505126542688e-06, "loss": 0.2271, "step": 6879 }, { "epoch": 0.49, "grad_norm": 1.3690083233734283, "learning_rate": 5.370349914205273e-06, "loss": 0.2101, "step": 6880 }, { "epoch": 0.49, "grad_norm": 1.2626558170234456, "learning_rate": 5.369194681988946e-06, "loss": 0.2326, "step": 6881 }, { "epoch": 0.49, "grad_norm": 1.3904146465182194, "learning_rate": 5.368039429955714e-06, "loss": 0.2108, "step": 6882 }, { "epoch": 0.49, "grad_norm": 4.752489433358283, "learning_rate": 5.366884158167588e-06, "loss": 0.746, "step": 6883 }, { "epoch": 0.49, "grad_norm": 1.3799612514551323, "learning_rate": 5.365728866686578e-06, "loss": 0.1798, "step": 6884 }, { "epoch": 0.49, "grad_norm": 1.2959051595514177, "learning_rate": 5.364573555574696e-06, "loss": 0.159, "step": 6885 }, { "epoch": 0.49, "grad_norm": 1.4767672741729594, "learning_rate": 5.363418224893953e-06, "loss": 0.2122, "step": 6886 }, { "epoch": 0.49, "grad_norm": 1.3577137054221575, "learning_rate": 5.362262874706363e-06, "loss": 0.2116, "step": 6887 }, { "epoch": 0.49, "grad_norm": 1.5070744292955682, "learning_rate": 5.361107505073942e-06, "loss": 0.1797, "step": 6888 }, { "epoch": 0.49, "grad_norm": 1.3814161611560107, "learning_rate": 5.359952116058705e-06, "loss": 0.2478, "step": 6889 }, { "epoch": 0.49, "grad_norm": 1.2749135343725408, "learning_rate": 5.358796707722668e-06, "loss": 0.1836, "step": 6890 }, { "epoch": 0.49, "grad_norm": 1.290265885306727, "learning_rate": 5.357641280127849e-06, "loss": 0.1746, "step": 6891 }, { "epoch": 0.49, "grad_norm": 1.3955214226735753, "learning_rate": 5.356485833336267e-06, "loss": 0.1964, "step": 6892 }, { "epoch": 0.49, "grad_norm": 1.3305364451393777, "learning_rate": 5.355330367409942e-06, "loss": 0.2232, "step": 6893 }, { "epoch": 0.49, "grad_norm": 1.2208846958712158, "learning_rate": 5.3541748824108945e-06, "loss": 0.1893, "step": 6894 }, { "epoch": 0.49, "grad_norm": 1.3603845632106566, "learning_rate": 5.3530193784011455e-06, "loss": 0.2293, "step": 6895 }, { "epoch": 0.49, "grad_norm": 7.800738808515453, "learning_rate": 5.351863855442719e-06, "loss": 0.7261, "step": 6896 }, { "epoch": 0.49, "grad_norm": 1.2834949531715418, "learning_rate": 5.350708313597639e-06, "loss": 0.1975, "step": 6897 }, { "epoch": 0.49, "grad_norm": 1.3520939986193152, "learning_rate": 5.34955275292793e-06, "loss": 0.1537, "step": 6898 }, { "epoch": 0.49, "grad_norm": 1.3723173599808838, "learning_rate": 5.348397173495618e-06, "loss": 0.1846, "step": 6899 }, { "epoch": 0.49, "grad_norm": 1.1476418856636816, "learning_rate": 5.347241575362729e-06, "loss": 0.1404, "step": 6900 }, { "epoch": 0.49, "grad_norm": 1.2702689076544331, "learning_rate": 5.346085958591294e-06, "loss": 0.1821, "step": 6901 }, { "epoch": 0.49, "grad_norm": 1.3500271506732693, "learning_rate": 5.3449303232433384e-06, "loss": 0.2059, "step": 6902 }, { "epoch": 0.49, "grad_norm": 1.2900776653928712, "learning_rate": 5.343774669380894e-06, "loss": 0.1758, "step": 6903 }, { "epoch": 0.49, "grad_norm": 1.1916150179732805, "learning_rate": 5.342618997065988e-06, "loss": 0.1657, "step": 6904 }, { "epoch": 0.49, "grad_norm": 6.484467974452162, "learning_rate": 5.341463306360658e-06, "loss": 0.6843, "step": 6905 }, { "epoch": 0.49, "grad_norm": 1.4559229356402914, "learning_rate": 5.340307597326935e-06, "loss": 0.2419, "step": 6906 }, { "epoch": 0.49, "grad_norm": 1.4558134333716426, "learning_rate": 5.339151870026851e-06, "loss": 0.2036, "step": 6907 }, { "epoch": 0.49, "grad_norm": 1.3192070564716802, "learning_rate": 5.337996124522441e-06, "loss": 0.1911, "step": 6908 }, { "epoch": 0.49, "grad_norm": 1.4431564962778856, "learning_rate": 5.336840360875743e-06, "loss": 0.191, "step": 6909 }, { "epoch": 0.49, "grad_norm": 5.4956812204791055, "learning_rate": 5.335684579148793e-06, "loss": 0.6524, "step": 6910 }, { "epoch": 0.49, "grad_norm": 1.3257691893566066, "learning_rate": 5.334528779403628e-06, "loss": 0.2044, "step": 6911 }, { "epoch": 0.49, "grad_norm": 1.3651487555684232, "learning_rate": 5.333372961702287e-06, "loss": 0.2291, "step": 6912 }, { "epoch": 0.49, "grad_norm": 1.3676981633956835, "learning_rate": 5.33221712610681e-06, "loss": 0.2122, "step": 6913 }, { "epoch": 0.49, "grad_norm": 1.3421180363013006, "learning_rate": 5.331061272679238e-06, "loss": 0.1786, "step": 6914 }, { "epoch": 0.49, "grad_norm": 1.3973527291268202, "learning_rate": 5.329905401481612e-06, "loss": 0.2173, "step": 6915 }, { "epoch": 0.49, "grad_norm": 1.396112863515519, "learning_rate": 5.3287495125759734e-06, "loss": 0.1914, "step": 6916 }, { "epoch": 0.49, "grad_norm": 1.4514155692232678, "learning_rate": 5.327593606024369e-06, "loss": 0.2066, "step": 6917 }, { "epoch": 0.49, "grad_norm": 1.128054667522911, "learning_rate": 5.32643768188884e-06, "loss": 0.1616, "step": 6918 }, { "epoch": 0.49, "grad_norm": 1.299898500672018, "learning_rate": 5.325281740231433e-06, "loss": 0.2013, "step": 6919 }, { "epoch": 0.49, "grad_norm": 5.272469977229793, "learning_rate": 5.324125781114193e-06, "loss": 0.5872, "step": 6920 }, { "epoch": 0.5, "grad_norm": 1.2542762514476613, "learning_rate": 5.3229698045991715e-06, "loss": 0.203, "step": 6921 }, { "epoch": 0.5, "grad_norm": 4.170224660158486, "learning_rate": 5.321813810748413e-06, "loss": 0.5297, "step": 6922 }, { "epoch": 0.5, "grad_norm": 1.3475238184530494, "learning_rate": 5.320657799623968e-06, "loss": 0.1883, "step": 6923 }, { "epoch": 0.5, "grad_norm": 1.1954081109023742, "learning_rate": 5.319501771287885e-06, "loss": 0.1599, "step": 6924 }, { "epoch": 0.5, "grad_norm": 1.3056892781408058, "learning_rate": 5.318345725802217e-06, "loss": 0.1767, "step": 6925 }, { "epoch": 0.5, "grad_norm": 4.666525736250797, "learning_rate": 5.317189663229016e-06, "loss": 0.5874, "step": 6926 }, { "epoch": 0.5, "grad_norm": 1.5332223871057418, "learning_rate": 5.316033583630333e-06, "loss": 0.214, "step": 6927 }, { "epoch": 0.5, "grad_norm": 1.5811909891696048, "learning_rate": 5.314877487068223e-06, "loss": 0.2209, "step": 6928 }, { "epoch": 0.5, "grad_norm": 4.30206773777624, "learning_rate": 5.313721373604739e-06, "loss": 0.5329, "step": 6929 }, { "epoch": 0.5, "grad_norm": 6.105534250946762, "learning_rate": 5.312565243301939e-06, "loss": 0.6328, "step": 6930 }, { "epoch": 0.5, "grad_norm": 1.3536912276683108, "learning_rate": 5.31140909622188e-06, "loss": 0.1913, "step": 6931 }, { "epoch": 0.5, "grad_norm": 1.1950031128266467, "learning_rate": 5.310252932426617e-06, "loss": 0.1731, "step": 6932 }, { "epoch": 0.5, "grad_norm": 1.498715990170105, "learning_rate": 5.309096751978207e-06, "loss": 0.2314, "step": 6933 }, { "epoch": 0.5, "grad_norm": 1.260478479196862, "learning_rate": 5.307940554938715e-06, "loss": 0.2002, "step": 6934 }, { "epoch": 0.5, "grad_norm": 1.336780929062254, "learning_rate": 5.306784341370196e-06, "loss": 0.1989, "step": 6935 }, { "epoch": 0.5, "grad_norm": 4.0520937222232885, "learning_rate": 5.3056281113347115e-06, "loss": 0.3559, "step": 6936 }, { "epoch": 0.5, "grad_norm": 1.3642359387185994, "learning_rate": 5.304471864894324e-06, "loss": 0.2, "step": 6937 }, { "epoch": 0.5, "grad_norm": 1.2210056089928485, "learning_rate": 5.303315602111098e-06, "loss": 0.1728, "step": 6938 }, { "epoch": 0.5, "grad_norm": 1.384300097852221, "learning_rate": 5.302159323047095e-06, "loss": 0.1813, "step": 6939 }, { "epoch": 0.5, "grad_norm": 1.3997601349736764, "learning_rate": 5.30100302776438e-06, "loss": 0.2112, "step": 6940 }, { "epoch": 0.5, "grad_norm": 1.301123689449632, "learning_rate": 5.299846716325017e-06, "loss": 0.1593, "step": 6941 }, { "epoch": 0.5, "grad_norm": 1.1918243835388032, "learning_rate": 5.298690388791075e-06, "loss": 0.1741, "step": 6942 }, { "epoch": 0.5, "grad_norm": 1.133828752410014, "learning_rate": 5.297534045224618e-06, "loss": 0.1802, "step": 6943 }, { "epoch": 0.5, "grad_norm": 1.219529933202826, "learning_rate": 5.296377685687717e-06, "loss": 0.1675, "step": 6944 }, { "epoch": 0.5, "grad_norm": 1.312904182970714, "learning_rate": 5.295221310242438e-06, "loss": 0.1953, "step": 6945 }, { "epoch": 0.5, "grad_norm": 1.2828822917969591, "learning_rate": 5.294064918950852e-06, "loss": 0.1437, "step": 6946 }, { "epoch": 0.5, "grad_norm": 1.1921141517366456, "learning_rate": 5.2929085118750314e-06, "loss": 0.167, "step": 6947 }, { "epoch": 0.5, "grad_norm": 1.6084642096365014, "learning_rate": 5.291752089077044e-06, "loss": 0.2363, "step": 6948 }, { "epoch": 0.5, "grad_norm": 1.3404605033511738, "learning_rate": 5.2905956506189624e-06, "loss": 0.2265, "step": 6949 }, { "epoch": 0.5, "grad_norm": 1.1686624605572, "learning_rate": 5.289439196562862e-06, "loss": 0.1546, "step": 6950 }, { "epoch": 0.5, "grad_norm": 1.2927540260027461, "learning_rate": 5.288282726970816e-06, "loss": 0.1807, "step": 6951 }, { "epoch": 0.5, "grad_norm": 1.3429634620110584, "learning_rate": 5.287126241904898e-06, "loss": 0.1805, "step": 6952 }, { "epoch": 0.5, "grad_norm": 1.099768949498347, "learning_rate": 5.285969741427185e-06, "loss": 0.1556, "step": 6953 }, { "epoch": 0.5, "grad_norm": 5.215707948192235, "learning_rate": 5.28481322559975e-06, "loss": 0.4682, "step": 6954 }, { "epoch": 0.5, "grad_norm": 1.4053041347325612, "learning_rate": 5.2836566944846745e-06, "loss": 0.1862, "step": 6955 }, { "epoch": 0.5, "grad_norm": 1.2802057930304818, "learning_rate": 5.2825001481440345e-06, "loss": 0.1709, "step": 6956 }, { "epoch": 0.5, "grad_norm": 1.2007798873765443, "learning_rate": 5.281343586639908e-06, "loss": 0.1832, "step": 6957 }, { "epoch": 0.5, "grad_norm": 2.087866708648857, "learning_rate": 5.280187010034375e-06, "loss": 0.2304, "step": 6958 }, { "epoch": 0.5, "grad_norm": 1.1988287776468347, "learning_rate": 5.279030418389517e-06, "loss": 0.1647, "step": 6959 }, { "epoch": 0.5, "grad_norm": 1.383176448583845, "learning_rate": 5.277873811767415e-06, "loss": 0.1515, "step": 6960 }, { "epoch": 0.5, "grad_norm": 1.3608641872017129, "learning_rate": 5.27671719023015e-06, "loss": 0.1662, "step": 6961 }, { "epoch": 0.5, "grad_norm": 1.3549062675690828, "learning_rate": 5.275560553839807e-06, "loss": 0.2089, "step": 6962 }, { "epoch": 0.5, "grad_norm": 1.3829462886193433, "learning_rate": 5.274403902658466e-06, "loss": 0.1884, "step": 6963 }, { "epoch": 0.5, "grad_norm": 1.2074995673104718, "learning_rate": 5.273247236748216e-06, "loss": 0.1277, "step": 6964 }, { "epoch": 0.5, "grad_norm": 5.871830434045196, "learning_rate": 5.272090556171139e-06, "loss": 0.5336, "step": 6965 }, { "epoch": 0.5, "grad_norm": 3.6855446302255834, "learning_rate": 5.270933860989322e-06, "loss": 0.7287, "step": 6966 }, { "epoch": 0.5, "grad_norm": 1.3946988138142102, "learning_rate": 5.269777151264851e-06, "loss": 0.2073, "step": 6967 }, { "epoch": 0.5, "grad_norm": 1.357318475599064, "learning_rate": 5.268620427059815e-06, "loss": 0.1827, "step": 6968 }, { "epoch": 0.5, "grad_norm": 1.4805490001588364, "learning_rate": 5.267463688436302e-06, "loss": 0.2241, "step": 6969 }, { "epoch": 0.5, "grad_norm": 1.201551357368844, "learning_rate": 5.2663069354564e-06, "loss": 0.1711, "step": 6970 }, { "epoch": 0.5, "grad_norm": 1.4327573573096604, "learning_rate": 5.265150168182199e-06, "loss": 0.1994, "step": 6971 }, { "epoch": 0.5, "grad_norm": 1.2832779482491845, "learning_rate": 5.263993386675792e-06, "loss": 0.2089, "step": 6972 }, { "epoch": 0.5, "grad_norm": 1.3975638935301886, "learning_rate": 5.262836590999267e-06, "loss": 0.2297, "step": 6973 }, { "epoch": 0.5, "grad_norm": 1.4179187780677898, "learning_rate": 5.2616797812147205e-06, "loss": 0.2539, "step": 6974 }, { "epoch": 0.5, "grad_norm": 4.398053235881588, "learning_rate": 5.260522957384241e-06, "loss": 0.5396, "step": 6975 }, { "epoch": 0.5, "grad_norm": 1.2574885714448814, "learning_rate": 5.259366119569924e-06, "loss": 0.1864, "step": 6976 }, { "epoch": 0.5, "grad_norm": 4.737646301992324, "learning_rate": 5.2582092678338656e-06, "loss": 0.6419, "step": 6977 }, { "epoch": 0.5, "grad_norm": 1.1046360742955015, "learning_rate": 5.2570524022381585e-06, "loss": 0.1492, "step": 6978 }, { "epoch": 0.5, "grad_norm": 1.398534291552053, "learning_rate": 5.2558955228449e-06, "loss": 0.1754, "step": 6979 }, { "epoch": 0.5, "grad_norm": 1.352348505318937, "learning_rate": 5.254738629716186e-06, "loss": 0.194, "step": 6980 }, { "epoch": 0.5, "grad_norm": 1.319624161414183, "learning_rate": 5.253581722914115e-06, "loss": 0.1629, "step": 6981 }, { "epoch": 0.5, "grad_norm": 1.373644358565873, "learning_rate": 5.252424802500784e-06, "loss": 0.1765, "step": 6982 }, { "epoch": 0.5, "grad_norm": 1.3272613268584128, "learning_rate": 5.251267868538292e-06, "loss": 0.1958, "step": 6983 }, { "epoch": 0.5, "grad_norm": 1.344822600053962, "learning_rate": 5.25011092108874e-06, "loss": 0.1823, "step": 6984 }, { "epoch": 0.5, "grad_norm": 1.37170821096451, "learning_rate": 5.248953960214227e-06, "loss": 0.2124, "step": 6985 }, { "epoch": 0.5, "grad_norm": 19.26135577532696, "learning_rate": 5.2477969859768555e-06, "loss": 0.5871, "step": 6986 }, { "epoch": 0.5, "grad_norm": 1.3361000936955896, "learning_rate": 5.246639998438724e-06, "loss": 0.186, "step": 6987 }, { "epoch": 0.5, "grad_norm": 1.3360560542393998, "learning_rate": 5.245482997661939e-06, "loss": 0.1855, "step": 6988 }, { "epoch": 0.5, "grad_norm": 1.4722810098348993, "learning_rate": 5.244325983708601e-06, "loss": 0.2202, "step": 6989 }, { "epoch": 0.5, "grad_norm": 1.3993748967527266, "learning_rate": 5.2431689566408165e-06, "loss": 0.2029, "step": 6990 }, { "epoch": 0.5, "grad_norm": 1.3615873275828976, "learning_rate": 5.242011916520686e-06, "loss": 0.1852, "step": 6991 }, { "epoch": 0.5, "grad_norm": 1.2744668123237122, "learning_rate": 5.240854863410319e-06, "loss": 0.1577, "step": 6992 }, { "epoch": 0.5, "grad_norm": 1.4264109569810484, "learning_rate": 5.239697797371819e-06, "loss": 0.1703, "step": 6993 }, { "epoch": 0.5, "grad_norm": 1.2506961273297414, "learning_rate": 5.238540718467294e-06, "loss": 0.1703, "step": 6994 }, { "epoch": 0.5, "grad_norm": 1.265950406133173, "learning_rate": 5.237383626758851e-06, "loss": 0.191, "step": 6995 }, { "epoch": 0.5, "grad_norm": 1.2953092896525331, "learning_rate": 5.236226522308596e-06, "loss": 0.1572, "step": 6996 }, { "epoch": 0.5, "grad_norm": 1.1351216479714892, "learning_rate": 5.235069405178642e-06, "loss": 0.1433, "step": 6997 }, { "epoch": 0.5, "grad_norm": 1.2207629120414003, "learning_rate": 5.233912275431096e-06, "loss": 0.2124, "step": 6998 }, { "epoch": 0.5, "grad_norm": 1.3174142058167035, "learning_rate": 5.232755133128069e-06, "loss": 0.1772, "step": 6999 }, { "epoch": 0.5, "grad_norm": 1.5438121558763236, "learning_rate": 5.231597978331669e-06, "loss": 0.2281, "step": 7000 }, { "epoch": 0.5, "grad_norm": 1.2985162404889248, "learning_rate": 5.230440811104012e-06, "loss": 0.1713, "step": 7001 }, { "epoch": 0.5, "grad_norm": 1.2827631169196971, "learning_rate": 5.229283631507206e-06, "loss": 0.2186, "step": 7002 }, { "epoch": 0.5, "grad_norm": 1.1565063249181857, "learning_rate": 5.228126439603367e-06, "loss": 0.1378, "step": 7003 }, { "epoch": 0.5, "grad_norm": 1.1641651947723026, "learning_rate": 5.226969235454606e-06, "loss": 0.1754, "step": 7004 }, { "epoch": 0.5, "grad_norm": 1.3260369658131836, "learning_rate": 5.225812019123038e-06, "loss": 0.2013, "step": 7005 }, { "epoch": 0.5, "grad_norm": 1.3861039585408548, "learning_rate": 5.224654790670778e-06, "loss": 0.1968, "step": 7006 }, { "epoch": 0.5, "grad_norm": 1.2949928363136323, "learning_rate": 5.223497550159942e-06, "loss": 0.1988, "step": 7007 }, { "epoch": 0.5, "grad_norm": 1.2485435084200702, "learning_rate": 5.222340297652645e-06, "loss": 0.2012, "step": 7008 }, { "epoch": 0.5, "grad_norm": 1.3913081818240074, "learning_rate": 5.221183033211003e-06, "loss": 0.2029, "step": 7009 }, { "epoch": 0.5, "grad_norm": 1.3168410976478224, "learning_rate": 5.220025756897136e-06, "loss": 0.1806, "step": 7010 }, { "epoch": 0.5, "grad_norm": 1.401583369278531, "learning_rate": 5.21886846877316e-06, "loss": 0.1883, "step": 7011 }, { "epoch": 0.5, "grad_norm": 1.317320049903733, "learning_rate": 5.217711168901195e-06, "loss": 0.2076, "step": 7012 }, { "epoch": 0.5, "grad_norm": 1.4421780401820896, "learning_rate": 5.216553857343358e-06, "loss": 0.1944, "step": 7013 }, { "epoch": 0.5, "grad_norm": 1.635311612326544, "learning_rate": 5.215396534161771e-06, "loss": 0.2123, "step": 7014 }, { "epoch": 0.5, "grad_norm": 1.516834044415225, "learning_rate": 5.214239199418554e-06, "loss": 0.2019, "step": 7015 }, { "epoch": 0.5, "grad_norm": 1.4399606499390736, "learning_rate": 5.213081853175827e-06, "loss": 0.2138, "step": 7016 }, { "epoch": 0.5, "grad_norm": 1.1339955683637923, "learning_rate": 5.211924495495713e-06, "loss": 0.1839, "step": 7017 }, { "epoch": 0.5, "grad_norm": 1.2956875074473657, "learning_rate": 5.210767126440335e-06, "loss": 0.1628, "step": 7018 }, { "epoch": 0.5, "grad_norm": 1.1934055159897403, "learning_rate": 5.209609746071813e-06, "loss": 0.2093, "step": 7019 }, { "epoch": 0.5, "grad_norm": 1.1273795600006267, "learning_rate": 5.208452354452275e-06, "loss": 0.1267, "step": 7020 }, { "epoch": 0.5, "grad_norm": 1.5470143033170909, "learning_rate": 5.20729495164384e-06, "loss": 0.2313, "step": 7021 }, { "epoch": 0.5, "grad_norm": 1.4267961746927944, "learning_rate": 5.206137537708637e-06, "loss": 0.2218, "step": 7022 }, { "epoch": 0.5, "grad_norm": 1.2137288029382431, "learning_rate": 5.20498011270879e-06, "loss": 0.2052, "step": 7023 }, { "epoch": 0.5, "grad_norm": 1.2822849107152563, "learning_rate": 5.203822676706426e-06, "loss": 0.1873, "step": 7024 }, { "epoch": 0.5, "grad_norm": 1.3115825239060672, "learning_rate": 5.202665229763668e-06, "loss": 0.2035, "step": 7025 }, { "epoch": 0.5, "grad_norm": 1.4396129248174863, "learning_rate": 5.2015077719426465e-06, "loss": 0.1905, "step": 7026 }, { "epoch": 0.5, "grad_norm": 1.4868518646980733, "learning_rate": 5.200350303305489e-06, "loss": 0.2282, "step": 7027 }, { "epoch": 0.5, "grad_norm": 1.2709610230821584, "learning_rate": 5.199192823914323e-06, "loss": 0.1561, "step": 7028 }, { "epoch": 0.5, "grad_norm": 1.339942110143628, "learning_rate": 5.1980353338312775e-06, "loss": 0.1489, "step": 7029 }, { "epoch": 0.5, "grad_norm": 1.2329534340646224, "learning_rate": 5.196877833118481e-06, "loss": 0.1856, "step": 7030 }, { "epoch": 0.5, "grad_norm": 1.1645231827208187, "learning_rate": 5.195720321838066e-06, "loss": 0.1459, "step": 7031 }, { "epoch": 0.5, "grad_norm": 4.764728955111807, "learning_rate": 5.194562800052162e-06, "loss": 0.4393, "step": 7032 }, { "epoch": 0.5, "grad_norm": 1.3027468360760934, "learning_rate": 5.1934052678229e-06, "loss": 0.1875, "step": 7033 }, { "epoch": 0.5, "grad_norm": 1.3435743058686787, "learning_rate": 5.192247725212409e-06, "loss": 0.2083, "step": 7034 }, { "epoch": 0.5, "grad_norm": 1.335528018493987, "learning_rate": 5.191090172282826e-06, "loss": 0.1683, "step": 7035 }, { "epoch": 0.5, "grad_norm": 1.2898255348556065, "learning_rate": 5.189932609096281e-06, "loss": 0.206, "step": 7036 }, { "epoch": 0.5, "grad_norm": 1.490030857060649, "learning_rate": 5.188775035714909e-06, "loss": 0.1855, "step": 7037 }, { "epoch": 0.5, "grad_norm": 1.300653683151527, "learning_rate": 5.187617452200842e-06, "loss": 0.1765, "step": 7038 }, { "epoch": 0.5, "grad_norm": 4.874356721541645, "learning_rate": 5.1864598586162166e-06, "loss": 0.5751, "step": 7039 }, { "epoch": 0.5, "grad_norm": 1.3160138141432036, "learning_rate": 5.185302255023166e-06, "loss": 0.1784, "step": 7040 }, { "epoch": 0.5, "grad_norm": 1.4020593781714124, "learning_rate": 5.1841446414838274e-06, "loss": 0.1772, "step": 7041 }, { "epoch": 0.5, "grad_norm": 1.3068384059456177, "learning_rate": 5.182987018060334e-06, "loss": 0.1972, "step": 7042 }, { "epoch": 0.5, "grad_norm": 1.4934326714937047, "learning_rate": 5.181829384814826e-06, "loss": 0.2209, "step": 7043 }, { "epoch": 0.5, "grad_norm": 1.3126916178357773, "learning_rate": 5.180671741809439e-06, "loss": 0.1889, "step": 7044 }, { "epoch": 0.5, "grad_norm": 1.1300865424387185, "learning_rate": 5.17951408910631e-06, "loss": 0.1489, "step": 7045 }, { "epoch": 0.5, "grad_norm": 1.2875430695330707, "learning_rate": 5.178356426767578e-06, "loss": 0.2111, "step": 7046 }, { "epoch": 0.5, "grad_norm": 1.2520092563962444, "learning_rate": 5.177198754855382e-06, "loss": 0.2065, "step": 7047 }, { "epoch": 0.5, "grad_norm": 1.269322156897904, "learning_rate": 5.17604107343186e-06, "loss": 0.2094, "step": 7048 }, { "epoch": 0.5, "grad_norm": 1.2796420565569904, "learning_rate": 5.174883382559154e-06, "loss": 0.1892, "step": 7049 }, { "epoch": 0.5, "grad_norm": 1.2834589851512133, "learning_rate": 5.173725682299402e-06, "loss": 0.2245, "step": 7050 }, { "epoch": 0.5, "grad_norm": 1.3795351525805337, "learning_rate": 5.172567972714745e-06, "loss": 0.1888, "step": 7051 }, { "epoch": 0.5, "grad_norm": 4.866081714136038, "learning_rate": 5.171410253867325e-06, "loss": 0.4929, "step": 7052 }, { "epoch": 0.5, "grad_norm": 1.641775236051953, "learning_rate": 5.170252525819285e-06, "loss": 0.2264, "step": 7053 }, { "epoch": 0.5, "grad_norm": 1.25455871205736, "learning_rate": 5.1690947886327645e-06, "loss": 0.1686, "step": 7054 }, { "epoch": 0.5, "grad_norm": 1.5455810236713847, "learning_rate": 5.167937042369907e-06, "loss": 0.1882, "step": 7055 }, { "epoch": 0.5, "grad_norm": 1.4186789268967792, "learning_rate": 5.166779287092857e-06, "loss": 0.161, "step": 7056 }, { "epoch": 0.5, "grad_norm": 1.3049888042387654, "learning_rate": 5.165621522863756e-06, "loss": 0.1994, "step": 7057 }, { "epoch": 0.5, "grad_norm": 1.3146143827330297, "learning_rate": 5.164463749744751e-06, "loss": 0.2199, "step": 7058 }, { "epoch": 0.5, "grad_norm": 1.3194439567219534, "learning_rate": 5.163305967797986e-06, "loss": 0.1889, "step": 7059 }, { "epoch": 0.51, "grad_norm": 1.343740174539173, "learning_rate": 5.162148177085604e-06, "loss": 0.1947, "step": 7060 }, { "epoch": 0.51, "grad_norm": 1.304934655096607, "learning_rate": 5.1609903776697535e-06, "loss": 0.2038, "step": 7061 }, { "epoch": 0.51, "grad_norm": 1.1889128165698772, "learning_rate": 5.1598325696125785e-06, "loss": 0.1801, "step": 7062 }, { "epoch": 0.51, "grad_norm": 1.5597980706662804, "learning_rate": 5.158674752976226e-06, "loss": 0.2298, "step": 7063 }, { "epoch": 0.51, "grad_norm": 1.2082773574681906, "learning_rate": 5.157516927822842e-06, "loss": 0.1834, "step": 7064 }, { "epoch": 0.51, "grad_norm": 1.3492537362478436, "learning_rate": 5.156359094214577e-06, "loss": 0.1668, "step": 7065 }, { "epoch": 0.51, "grad_norm": 1.2461915119225135, "learning_rate": 5.155201252213576e-06, "loss": 0.1865, "step": 7066 }, { "epoch": 0.51, "grad_norm": 1.2273496009884375, "learning_rate": 5.1540434018819895e-06, "loss": 0.1813, "step": 7067 }, { "epoch": 0.51, "grad_norm": 1.4732433039028856, "learning_rate": 5.152885543281964e-06, "loss": 0.2226, "step": 7068 }, { "epoch": 0.51, "grad_norm": 1.342413607381237, "learning_rate": 5.15172767647565e-06, "loss": 0.1821, "step": 7069 }, { "epoch": 0.51, "grad_norm": 1.3368474612813426, "learning_rate": 5.150569801525197e-06, "loss": 0.2137, "step": 7070 }, { "epoch": 0.51, "grad_norm": 1.3045391482715343, "learning_rate": 5.149411918492756e-06, "loss": 0.2033, "step": 7071 }, { "epoch": 0.51, "grad_norm": 1.5681584252122862, "learning_rate": 5.1482540274404755e-06, "loss": 0.1987, "step": 7072 }, { "epoch": 0.51, "grad_norm": 1.3403861875833958, "learning_rate": 5.1470961284305085e-06, "loss": 0.2026, "step": 7073 }, { "epoch": 0.51, "grad_norm": 1.2343605542430138, "learning_rate": 5.145938221525006e-06, "loss": 0.1678, "step": 7074 }, { "epoch": 0.51, "grad_norm": 1.157920229430441, "learning_rate": 5.14478030678612e-06, "loss": 0.1879, "step": 7075 }, { "epoch": 0.51, "grad_norm": 1.5097109678860694, "learning_rate": 5.1436223842760015e-06, "loss": 0.214, "step": 7076 }, { "epoch": 0.51, "grad_norm": 1.283303493299244, "learning_rate": 5.142464454056803e-06, "loss": 0.206, "step": 7077 }, { "epoch": 0.51, "grad_norm": 6.148093922148823, "learning_rate": 5.14130651619068e-06, "loss": 0.603, "step": 7078 }, { "epoch": 0.51, "grad_norm": 1.3134949394077244, "learning_rate": 5.140148570739786e-06, "loss": 0.1713, "step": 7079 }, { "epoch": 0.51, "grad_norm": 1.3070542790122428, "learning_rate": 5.1389906177662705e-06, "loss": 0.2009, "step": 7080 }, { "epoch": 0.51, "grad_norm": 1.4085656614458386, "learning_rate": 5.137832657332292e-06, "loss": 0.163, "step": 7081 }, { "epoch": 0.51, "grad_norm": 1.5965888074933976, "learning_rate": 5.1366746895000055e-06, "loss": 0.2107, "step": 7082 }, { "epoch": 0.51, "grad_norm": 4.689250647979224, "learning_rate": 5.135516714331563e-06, "loss": 0.4845, "step": 7083 }, { "epoch": 0.51, "grad_norm": 1.4793749217638605, "learning_rate": 5.134358731889122e-06, "loss": 0.2084, "step": 7084 }, { "epoch": 0.51, "grad_norm": 1.3366911326784332, "learning_rate": 5.133200742234837e-06, "loss": 0.1832, "step": 7085 }, { "epoch": 0.51, "grad_norm": 1.3293063402347323, "learning_rate": 5.1320427454308676e-06, "loss": 0.1967, "step": 7086 }, { "epoch": 0.51, "grad_norm": 1.3780726807443109, "learning_rate": 5.130884741539367e-06, "loss": 0.2029, "step": 7087 }, { "epoch": 0.51, "grad_norm": 1.3542279544229225, "learning_rate": 5.1297267306224925e-06, "loss": 0.1783, "step": 7088 }, { "epoch": 0.51, "grad_norm": 1.3952434370688405, "learning_rate": 5.1285687127424035e-06, "loss": 0.17, "step": 7089 }, { "epoch": 0.51, "grad_norm": 1.2430523442778054, "learning_rate": 5.127410687961256e-06, "loss": 0.1224, "step": 7090 }, { "epoch": 0.51, "grad_norm": 1.3472701139484442, "learning_rate": 5.126252656341211e-06, "loss": 0.1638, "step": 7091 }, { "epoch": 0.51, "grad_norm": 1.4235821670685538, "learning_rate": 5.125094617944424e-06, "loss": 0.1955, "step": 7092 }, { "epoch": 0.51, "grad_norm": 1.4102468753273756, "learning_rate": 5.123936572833053e-06, "loss": 0.2421, "step": 7093 }, { "epoch": 0.51, "grad_norm": 1.3748186155844737, "learning_rate": 5.122778521069261e-06, "loss": 0.2139, "step": 7094 }, { "epoch": 0.51, "grad_norm": 1.2313035212758103, "learning_rate": 5.121620462715205e-06, "loss": 0.1587, "step": 7095 }, { "epoch": 0.51, "grad_norm": 7.526335250573845, "learning_rate": 5.120462397833047e-06, "loss": 0.5863, "step": 7096 }, { "epoch": 0.51, "grad_norm": 1.5514627619664212, "learning_rate": 5.119304326484944e-06, "loss": 0.2621, "step": 7097 }, { "epoch": 0.51, "grad_norm": 1.4356120022720127, "learning_rate": 5.11814624873306e-06, "loss": 0.1902, "step": 7098 }, { "epoch": 0.51, "grad_norm": 1.3353845638519553, "learning_rate": 5.116988164639556e-06, "loss": 0.1931, "step": 7099 }, { "epoch": 0.51, "grad_norm": 1.2916993189894708, "learning_rate": 5.115830074266592e-06, "loss": 0.1845, "step": 7100 }, { "epoch": 0.51, "grad_norm": 1.5881740908852107, "learning_rate": 5.114671977676328e-06, "loss": 0.2238, "step": 7101 }, { "epoch": 0.51, "grad_norm": 1.3445272740206737, "learning_rate": 5.113513874930928e-06, "loss": 0.2097, "step": 7102 }, { "epoch": 0.51, "grad_norm": 1.2974752678916706, "learning_rate": 5.112355766092554e-06, "loss": 0.1679, "step": 7103 }, { "epoch": 0.51, "grad_norm": 1.5682372204030228, "learning_rate": 5.11119765122337e-06, "loss": 0.1951, "step": 7104 }, { "epoch": 0.51, "grad_norm": 1.3558619730624946, "learning_rate": 5.1100395303855374e-06, "loss": 0.1661, "step": 7105 }, { "epoch": 0.51, "grad_norm": 1.313818720959302, "learning_rate": 5.108881403641218e-06, "loss": 0.1479, "step": 7106 }, { "epoch": 0.51, "grad_norm": 1.2969136904250302, "learning_rate": 5.10772327105258e-06, "loss": 0.1995, "step": 7107 }, { "epoch": 0.51, "grad_norm": 1.4036773331390153, "learning_rate": 5.106565132681784e-06, "loss": 0.1822, "step": 7108 }, { "epoch": 0.51, "grad_norm": 1.424345880653183, "learning_rate": 5.105406988590996e-06, "loss": 0.2049, "step": 7109 }, { "epoch": 0.51, "grad_norm": 1.4638574021270017, "learning_rate": 5.104248838842377e-06, "loss": 0.1925, "step": 7110 }, { "epoch": 0.51, "grad_norm": 1.3579398012646053, "learning_rate": 5.1030906834980965e-06, "loss": 0.1757, "step": 7111 }, { "epoch": 0.51, "grad_norm": 1.4209363850392511, "learning_rate": 5.101932522620317e-06, "loss": 0.2374, "step": 7112 }, { "epoch": 0.51, "grad_norm": 1.2636129932211884, "learning_rate": 5.1007743562712065e-06, "loss": 0.2344, "step": 7113 }, { "epoch": 0.51, "grad_norm": 1.3922972458480978, "learning_rate": 5.099616184512926e-06, "loss": 0.1788, "step": 7114 }, { "epoch": 0.51, "grad_norm": 1.2923905677860181, "learning_rate": 5.098458007407646e-06, "loss": 0.1716, "step": 7115 }, { "epoch": 0.51, "grad_norm": 1.4472064003350025, "learning_rate": 5.097299825017532e-06, "loss": 0.1873, "step": 7116 }, { "epoch": 0.51, "grad_norm": 1.197994884601184, "learning_rate": 5.096141637404749e-06, "loss": 0.2006, "step": 7117 }, { "epoch": 0.51, "grad_norm": 1.3430428059975343, "learning_rate": 5.094983444631464e-06, "loss": 0.1684, "step": 7118 }, { "epoch": 0.51, "grad_norm": 1.4511034025138099, "learning_rate": 5.093825246759846e-06, "loss": 0.2206, "step": 7119 }, { "epoch": 0.51, "grad_norm": 1.347489751604239, "learning_rate": 5.092667043852062e-06, "loss": 0.198, "step": 7120 }, { "epoch": 0.51, "grad_norm": 1.3615646558004235, "learning_rate": 5.091508835970278e-06, "loss": 0.2063, "step": 7121 }, { "epoch": 0.51, "grad_norm": 1.354780935240967, "learning_rate": 5.0903506231766645e-06, "loss": 0.1891, "step": 7122 }, { "epoch": 0.51, "grad_norm": 1.2621190988596216, "learning_rate": 5.089192405533387e-06, "loss": 0.1956, "step": 7123 }, { "epoch": 0.51, "grad_norm": 1.5609273907338883, "learning_rate": 5.088034183102617e-06, "loss": 0.2045, "step": 7124 }, { "epoch": 0.51, "grad_norm": 1.3846848133681071, "learning_rate": 5.086875955946521e-06, "loss": 0.2378, "step": 7125 }, { "epoch": 0.51, "grad_norm": 1.2634008727311616, "learning_rate": 5.085717724127269e-06, "loss": 0.1843, "step": 7126 }, { "epoch": 0.51, "grad_norm": 1.501295664398636, "learning_rate": 5.084559487707029e-06, "loss": 0.2247, "step": 7127 }, { "epoch": 0.51, "grad_norm": 1.232901951792052, "learning_rate": 5.083401246747973e-06, "loss": 0.2268, "step": 7128 }, { "epoch": 0.51, "grad_norm": 1.3292853066079058, "learning_rate": 5.08224300131227e-06, "loss": 0.1728, "step": 7129 }, { "epoch": 0.51, "grad_norm": 1.31585657939233, "learning_rate": 5.0810847514620884e-06, "loss": 0.1953, "step": 7130 }, { "epoch": 0.51, "grad_norm": 1.482503384818558, "learning_rate": 5.0799264972596e-06, "loss": 0.2119, "step": 7131 }, { "epoch": 0.51, "grad_norm": 1.4076306674495631, "learning_rate": 5.0787682387669745e-06, "loss": 0.1974, "step": 7132 }, { "epoch": 0.51, "grad_norm": 1.4125891884076005, "learning_rate": 5.077609976046383e-06, "loss": 0.1998, "step": 7133 }, { "epoch": 0.51, "grad_norm": 1.567041027090421, "learning_rate": 5.0764517091599965e-06, "loss": 0.2124, "step": 7134 }, { "epoch": 0.51, "grad_norm": 1.4465693417770462, "learning_rate": 5.075293438169985e-06, "loss": 0.2062, "step": 7135 }, { "epoch": 0.51, "grad_norm": 1.3230522438494927, "learning_rate": 5.074135163138521e-06, "loss": 0.1689, "step": 7136 }, { "epoch": 0.51, "grad_norm": 1.5847703677739586, "learning_rate": 5.0729768841277774e-06, "loss": 0.2245, "step": 7137 }, { "epoch": 0.51, "grad_norm": 1.4372068170293133, "learning_rate": 5.071818601199924e-06, "loss": 0.2214, "step": 7138 }, { "epoch": 0.51, "grad_norm": 8.888439022591472, "learning_rate": 5.070660314417132e-06, "loss": 0.577, "step": 7139 }, { "epoch": 0.51, "grad_norm": 1.2882563938288163, "learning_rate": 5.069502023841576e-06, "loss": 0.202, "step": 7140 }, { "epoch": 0.51, "grad_norm": 5.725048368243982, "learning_rate": 5.068343729535427e-06, "loss": 0.6147, "step": 7141 }, { "epoch": 0.51, "grad_norm": 1.2285588875399995, "learning_rate": 5.06718543156086e-06, "loss": 0.1554, "step": 7142 }, { "epoch": 0.51, "grad_norm": 1.3146404492714245, "learning_rate": 5.066027129980043e-06, "loss": 0.1832, "step": 7143 }, { "epoch": 0.51, "grad_norm": 5.093121969165497, "learning_rate": 5.064868824855155e-06, "loss": 0.6943, "step": 7144 }, { "epoch": 0.51, "grad_norm": 1.328526264128496, "learning_rate": 5.063710516248364e-06, "loss": 0.201, "step": 7145 }, { "epoch": 0.51, "grad_norm": 1.311193202854565, "learning_rate": 5.0625522042218475e-06, "loss": 0.1916, "step": 7146 }, { "epoch": 0.51, "grad_norm": 1.3329668612325984, "learning_rate": 5.061393888837776e-06, "loss": 0.1718, "step": 7147 }, { "epoch": 0.51, "grad_norm": 1.2082439717078732, "learning_rate": 5.0602355701583235e-06, "loss": 0.1706, "step": 7148 }, { "epoch": 0.51, "grad_norm": 1.1234271194178975, "learning_rate": 5.059077248245666e-06, "loss": 0.1452, "step": 7149 }, { "epoch": 0.51, "grad_norm": 1.1654082305902267, "learning_rate": 5.057918923161978e-06, "loss": 0.1275, "step": 7150 }, { "epoch": 0.51, "grad_norm": 1.4855142455069617, "learning_rate": 5.056760594969431e-06, "loss": 0.2153, "step": 7151 }, { "epoch": 0.51, "grad_norm": 1.2988362945869183, "learning_rate": 5.0556022637302e-06, "loss": 0.1868, "step": 7152 }, { "epoch": 0.51, "grad_norm": 1.3063296163157463, "learning_rate": 5.0544439295064595e-06, "loss": 0.1704, "step": 7153 }, { "epoch": 0.51, "grad_norm": 1.4118721325138428, "learning_rate": 5.053285592360386e-06, "loss": 0.199, "step": 7154 }, { "epoch": 0.51, "grad_norm": 5.603240108008233, "learning_rate": 5.052127252354155e-06, "loss": 0.5054, "step": 7155 }, { "epoch": 0.51, "grad_norm": 1.16939967707264, "learning_rate": 5.050968909549939e-06, "loss": 0.1772, "step": 7156 }, { "epoch": 0.51, "grad_norm": 1.268540087210407, "learning_rate": 5.049810564009915e-06, "loss": 0.1781, "step": 7157 }, { "epoch": 0.51, "grad_norm": 1.3227016862269099, "learning_rate": 5.048652215796257e-06, "loss": 0.1825, "step": 7158 }, { "epoch": 0.51, "grad_norm": 1.4365322812761614, "learning_rate": 5.047493864971142e-06, "loss": 0.2385, "step": 7159 }, { "epoch": 0.51, "grad_norm": 1.19419135517295, "learning_rate": 5.046335511596746e-06, "loss": 0.1495, "step": 7160 }, { "epoch": 0.51, "grad_norm": 1.1207966817976098, "learning_rate": 5.045177155735241e-06, "loss": 0.1871, "step": 7161 }, { "epoch": 0.51, "grad_norm": 1.443122081696332, "learning_rate": 5.044018797448809e-06, "loss": 0.238, "step": 7162 }, { "epoch": 0.51, "grad_norm": 4.753371015677805, "learning_rate": 5.04286043679962e-06, "loss": 0.6796, "step": 7163 }, { "epoch": 0.51, "grad_norm": 4.976693357369042, "learning_rate": 5.041702073849856e-06, "loss": 0.6184, "step": 7164 }, { "epoch": 0.51, "grad_norm": 1.3731540395904565, "learning_rate": 5.040543708661687e-06, "loss": 0.1974, "step": 7165 }, { "epoch": 0.51, "grad_norm": 1.3299295373038282, "learning_rate": 5.039385341297295e-06, "loss": 0.2264, "step": 7166 }, { "epoch": 0.51, "grad_norm": 1.46403157349107, "learning_rate": 5.038226971818854e-06, "loss": 0.2155, "step": 7167 }, { "epoch": 0.51, "grad_norm": 1.2648680629874816, "learning_rate": 5.037068600288542e-06, "loss": 0.1817, "step": 7168 }, { "epoch": 0.51, "grad_norm": 1.0977994613283544, "learning_rate": 5.0359102267685324e-06, "loss": 0.1653, "step": 7169 }, { "epoch": 0.51, "grad_norm": 1.365319919825025, "learning_rate": 5.0347518513210056e-06, "loss": 0.2083, "step": 7170 }, { "epoch": 0.51, "grad_norm": 1.268268619293284, "learning_rate": 5.0335934740081385e-06, "loss": 0.1704, "step": 7171 }, { "epoch": 0.51, "grad_norm": 1.2645990387115449, "learning_rate": 5.032435094892106e-06, "loss": 0.1677, "step": 7172 }, { "epoch": 0.51, "grad_norm": 1.289443906840907, "learning_rate": 5.031276714035086e-06, "loss": 0.1899, "step": 7173 }, { "epoch": 0.51, "grad_norm": 1.287302540760077, "learning_rate": 5.030118331499257e-06, "loss": 0.1671, "step": 7174 }, { "epoch": 0.51, "grad_norm": 5.527273755572574, "learning_rate": 5.028959947346797e-06, "loss": 0.654, "step": 7175 }, { "epoch": 0.51, "grad_norm": 1.3105406569884996, "learning_rate": 5.0278015616398805e-06, "loss": 0.2069, "step": 7176 }, { "epoch": 0.51, "grad_norm": 1.4233773858241843, "learning_rate": 5.026643174440686e-06, "loss": 0.2172, "step": 7177 }, { "epoch": 0.51, "grad_norm": 4.0603740676183655, "learning_rate": 5.025484785811393e-06, "loss": 0.4947, "step": 7178 }, { "epoch": 0.51, "grad_norm": 1.200089658853904, "learning_rate": 5.024326395814178e-06, "loss": 0.1493, "step": 7179 }, { "epoch": 0.51, "grad_norm": 1.5707208950544234, "learning_rate": 5.0231680045112174e-06, "loss": 0.2106, "step": 7180 }, { "epoch": 0.51, "grad_norm": 1.275835884655264, "learning_rate": 5.022009611964691e-06, "loss": 0.1904, "step": 7181 }, { "epoch": 0.51, "grad_norm": 1.3321657911195346, "learning_rate": 5.0208512182367766e-06, "loss": 0.199, "step": 7182 }, { "epoch": 0.51, "grad_norm": 1.318506769226695, "learning_rate": 5.0196928233896516e-06, "loss": 0.2072, "step": 7183 }, { "epoch": 0.51, "grad_norm": 1.487178949015922, "learning_rate": 5.018534427485494e-06, "loss": 0.2013, "step": 7184 }, { "epoch": 0.51, "grad_norm": 1.4446371097399218, "learning_rate": 5.017376030586481e-06, "loss": 0.2523, "step": 7185 }, { "epoch": 0.51, "grad_norm": 1.39385896214302, "learning_rate": 5.016217632754792e-06, "loss": 0.164, "step": 7186 }, { "epoch": 0.51, "grad_norm": 4.227147006811772, "learning_rate": 5.015059234052605e-06, "loss": 0.5688, "step": 7187 }, { "epoch": 0.51, "grad_norm": 1.4184924267552947, "learning_rate": 5.013900834542099e-06, "loss": 0.201, "step": 7188 }, { "epoch": 0.51, "grad_norm": 1.2737997741165885, "learning_rate": 5.012742434285452e-06, "loss": 0.1964, "step": 7189 }, { "epoch": 0.51, "grad_norm": 1.2762622751733355, "learning_rate": 5.0115840333448386e-06, "loss": 0.1876, "step": 7190 }, { "epoch": 0.51, "grad_norm": 1.4243683003054242, "learning_rate": 5.010425631782443e-06, "loss": 0.1955, "step": 7191 }, { "epoch": 0.51, "grad_norm": 1.3048485420763423, "learning_rate": 5.00926722966044e-06, "loss": 0.1689, "step": 7192 }, { "epoch": 0.51, "grad_norm": 1.3571129157436526, "learning_rate": 5.00810882704101e-06, "loss": 0.1641, "step": 7193 }, { "epoch": 0.51, "grad_norm": 1.3497116202850905, "learning_rate": 5.006950423986328e-06, "loss": 0.1854, "step": 7194 }, { "epoch": 0.51, "grad_norm": 1.5095096263051992, "learning_rate": 5.005792020558577e-06, "loss": 0.239, "step": 7195 }, { "epoch": 0.51, "grad_norm": 1.3080272985335348, "learning_rate": 5.004633616819934e-06, "loss": 0.1866, "step": 7196 }, { "epoch": 0.51, "grad_norm": 1.6997148461561171, "learning_rate": 5.003475212832576e-06, "loss": 0.1781, "step": 7197 }, { "epoch": 0.51, "grad_norm": 1.172788469801866, "learning_rate": 5.00231680865868e-06, "loss": 0.1648, "step": 7198 }, { "epoch": 0.51, "grad_norm": 1.2767903494474397, "learning_rate": 5.00115840436043e-06, "loss": 0.1986, "step": 7199 }, { "epoch": 0.52, "grad_norm": 4.041474753827072, "learning_rate": 5e-06, "loss": 0.4518, "step": 7200 }, { "epoch": 0.52, "grad_norm": 1.2536997121175588, "learning_rate": 4.9988415956395706e-06, "loss": 0.1821, "step": 7201 }, { "epoch": 0.52, "grad_norm": 1.3672113597070907, "learning_rate": 4.9976831913413205e-06, "loss": 0.1916, "step": 7202 }, { "epoch": 0.52, "grad_norm": 1.582090708222644, "learning_rate": 4.996524787167425e-06, "loss": 0.2201, "step": 7203 }, { "epoch": 0.52, "grad_norm": 1.3620995852998068, "learning_rate": 4.995366383180068e-06, "loss": 0.228, "step": 7204 }, { "epoch": 0.52, "grad_norm": 1.5879134785385052, "learning_rate": 4.9942079794414226e-06, "loss": 0.213, "step": 7205 }, { "epoch": 0.52, "grad_norm": 1.2148935314774743, "learning_rate": 4.9930495760136725e-06, "loss": 0.2066, "step": 7206 }, { "epoch": 0.52, "grad_norm": 1.3081760021480335, "learning_rate": 4.991891172958991e-06, "loss": 0.183, "step": 7207 }, { "epoch": 0.52, "grad_norm": 1.2331301385897968, "learning_rate": 4.990732770339561e-06, "loss": 0.1481, "step": 7208 }, { "epoch": 0.52, "grad_norm": 1.279907423070733, "learning_rate": 4.989574368217558e-06, "loss": 0.1944, "step": 7209 }, { "epoch": 0.52, "grad_norm": 1.3597232937832466, "learning_rate": 4.988415966655162e-06, "loss": 0.1914, "step": 7210 }, { "epoch": 0.52, "grad_norm": 1.5107577266807566, "learning_rate": 4.987257565714551e-06, "loss": 0.2282, "step": 7211 }, { "epoch": 0.52, "grad_norm": 1.445690571469777, "learning_rate": 4.9860991654579025e-06, "loss": 0.2215, "step": 7212 }, { "epoch": 0.52, "grad_norm": 1.17942560246018, "learning_rate": 4.984940765947395e-06, "loss": 0.1598, "step": 7213 }, { "epoch": 0.52, "grad_norm": 1.3621631167789856, "learning_rate": 4.98378236724521e-06, "loss": 0.1944, "step": 7214 }, { "epoch": 0.52, "grad_norm": 1.4129475082639673, "learning_rate": 4.982623969413521e-06, "loss": 0.2072, "step": 7215 }, { "epoch": 0.52, "grad_norm": 1.2199817422306318, "learning_rate": 4.981465572514509e-06, "loss": 0.2154, "step": 7216 }, { "epoch": 0.52, "grad_norm": 1.267152765163097, "learning_rate": 4.98030717661035e-06, "loss": 0.1766, "step": 7217 }, { "epoch": 0.52, "grad_norm": 1.3291759934935892, "learning_rate": 4.979148781763226e-06, "loss": 0.1622, "step": 7218 }, { "epoch": 0.52, "grad_norm": 1.3953027974581143, "learning_rate": 4.97799038803531e-06, "loss": 0.2287, "step": 7219 }, { "epoch": 0.52, "grad_norm": 1.4787157786553355, "learning_rate": 4.976831995488784e-06, "loss": 0.2537, "step": 7220 }, { "epoch": 0.52, "grad_norm": 1.452085840695506, "learning_rate": 4.975673604185824e-06, "loss": 0.195, "step": 7221 }, { "epoch": 0.52, "grad_norm": 1.3237866763462198, "learning_rate": 4.9745152141886096e-06, "loss": 0.2108, "step": 7222 }, { "epoch": 0.52, "grad_norm": 5.289557695544683, "learning_rate": 4.973356825559315e-06, "loss": 0.6423, "step": 7223 }, { "epoch": 0.52, "grad_norm": 1.3837522135104725, "learning_rate": 4.972198438360122e-06, "loss": 0.1959, "step": 7224 }, { "epoch": 0.52, "grad_norm": 1.4240604515708708, "learning_rate": 4.971040052653205e-06, "loss": 0.2155, "step": 7225 }, { "epoch": 0.52, "grad_norm": 1.0929029495203233, "learning_rate": 4.969881668500745e-06, "loss": 0.141, "step": 7226 }, { "epoch": 0.52, "grad_norm": 1.2183924010946645, "learning_rate": 4.968723285964915e-06, "loss": 0.1635, "step": 7227 }, { "epoch": 0.52, "grad_norm": 1.2381034566037676, "learning_rate": 4.9675649051078965e-06, "loss": 0.177, "step": 7228 }, { "epoch": 0.52, "grad_norm": 1.2324970197661276, "learning_rate": 4.966406525991863e-06, "loss": 0.1812, "step": 7229 }, { "epoch": 0.52, "grad_norm": 12.67712826366447, "learning_rate": 4.965248148678997e-06, "loss": 0.7243, "step": 7230 }, { "epoch": 0.52, "grad_norm": 1.3704204329014857, "learning_rate": 4.964089773231469e-06, "loss": 0.2183, "step": 7231 }, { "epoch": 0.52, "grad_norm": 1.1366786738316605, "learning_rate": 4.962931399711461e-06, "loss": 0.1471, "step": 7232 }, { "epoch": 0.52, "grad_norm": 1.358856610583483, "learning_rate": 4.961773028181147e-06, "loss": 0.1786, "step": 7233 }, { "epoch": 0.52, "grad_norm": 1.2722444780062026, "learning_rate": 4.960614658702705e-06, "loss": 0.2235, "step": 7234 }, { "epoch": 0.52, "grad_norm": 1.2684994142282349, "learning_rate": 4.959456291338314e-06, "loss": 0.2169, "step": 7235 }, { "epoch": 0.52, "grad_norm": 1.4585472899932999, "learning_rate": 4.958297926150146e-06, "loss": 0.1974, "step": 7236 }, { "epoch": 0.52, "grad_norm": 1.2536169354652384, "learning_rate": 4.957139563200381e-06, "loss": 0.1816, "step": 7237 }, { "epoch": 0.52, "grad_norm": 1.2679151195075582, "learning_rate": 4.955981202551192e-06, "loss": 0.1848, "step": 7238 }, { "epoch": 0.52, "grad_norm": 1.2961624324536725, "learning_rate": 4.95482284426476e-06, "loss": 0.1885, "step": 7239 }, { "epoch": 0.52, "grad_norm": 1.5136766882175596, "learning_rate": 4.953664488403256e-06, "loss": 0.1699, "step": 7240 }, { "epoch": 0.52, "grad_norm": 1.4258857279984767, "learning_rate": 4.952506135028859e-06, "loss": 0.184, "step": 7241 }, { "epoch": 0.52, "grad_norm": 1.3284345979884045, "learning_rate": 4.951347784203744e-06, "loss": 0.197, "step": 7242 }, { "epoch": 0.52, "grad_norm": 5.021895950883319, "learning_rate": 4.950189435990087e-06, "loss": 0.5727, "step": 7243 }, { "epoch": 0.52, "grad_norm": 1.383968723727223, "learning_rate": 4.949031090450062e-06, "loss": 0.2434, "step": 7244 }, { "epoch": 0.52, "grad_norm": 1.5153094862349283, "learning_rate": 4.9478727476458465e-06, "loss": 0.247, "step": 7245 }, { "epoch": 0.52, "grad_norm": 1.446523160530515, "learning_rate": 4.946714407639614e-06, "loss": 0.2202, "step": 7246 }, { "epoch": 0.52, "grad_norm": 1.2291567626918671, "learning_rate": 4.945556070493542e-06, "loss": 0.1501, "step": 7247 }, { "epoch": 0.52, "grad_norm": 1.3395754633905068, "learning_rate": 4.944397736269803e-06, "loss": 0.1936, "step": 7248 }, { "epoch": 0.52, "grad_norm": 1.7101356585680272, "learning_rate": 4.943239405030572e-06, "loss": 0.1979, "step": 7249 }, { "epoch": 0.52, "grad_norm": 1.251054021132169, "learning_rate": 4.942081076838024e-06, "loss": 0.1662, "step": 7250 }, { "epoch": 0.52, "grad_norm": 1.2864146345263114, "learning_rate": 4.940922751754336e-06, "loss": 0.1824, "step": 7251 }, { "epoch": 0.52, "grad_norm": 1.4474859665105393, "learning_rate": 4.939764429841677e-06, "loss": 0.2224, "step": 7252 }, { "epoch": 0.52, "grad_norm": 6.806272501150447, "learning_rate": 4.9386061111622266e-06, "loss": 0.5632, "step": 7253 }, { "epoch": 0.52, "grad_norm": 1.1174045090648597, "learning_rate": 4.937447795778154e-06, "loss": 0.1268, "step": 7254 }, { "epoch": 0.52, "grad_norm": 1.3445036050985604, "learning_rate": 4.936289483751638e-06, "loss": 0.185, "step": 7255 }, { "epoch": 0.52, "grad_norm": 1.3899334386654179, "learning_rate": 4.9351311751448465e-06, "loss": 0.1586, "step": 7256 }, { "epoch": 0.52, "grad_norm": 1.3582388646262364, "learning_rate": 4.9339728700199575e-06, "loss": 0.1742, "step": 7257 }, { "epoch": 0.52, "grad_norm": 1.4797610692058374, "learning_rate": 4.932814568439142e-06, "loss": 0.2222, "step": 7258 }, { "epoch": 0.52, "grad_norm": 1.6336743400438396, "learning_rate": 4.931656270464574e-06, "loss": 0.1898, "step": 7259 }, { "epoch": 0.52, "grad_norm": 1.4547344512481002, "learning_rate": 4.9304979761584256e-06, "loss": 0.2186, "step": 7260 }, { "epoch": 0.52, "grad_norm": 1.1713156718117477, "learning_rate": 4.92933968558287e-06, "loss": 0.1807, "step": 7261 }, { "epoch": 0.52, "grad_norm": 1.4692113224133134, "learning_rate": 4.928181398800078e-06, "loss": 0.1757, "step": 7262 }, { "epoch": 0.52, "grad_norm": 1.1235768391195224, "learning_rate": 4.927023115872225e-06, "loss": 0.1663, "step": 7263 }, { "epoch": 0.52, "grad_norm": 1.5541117011835093, "learning_rate": 4.9258648368614796e-06, "loss": 0.2246, "step": 7264 }, { "epoch": 0.52, "grad_norm": 1.2904602747690472, "learning_rate": 4.924706561830015e-06, "loss": 0.2148, "step": 7265 }, { "epoch": 0.52, "grad_norm": 13.952410729401024, "learning_rate": 4.923548290840005e-06, "loss": 0.4815, "step": 7266 }, { "epoch": 0.52, "grad_norm": 4.936815840751015, "learning_rate": 4.922390023953618e-06, "loss": 0.5943, "step": 7267 }, { "epoch": 0.52, "grad_norm": 1.32528501384882, "learning_rate": 4.921231761233027e-06, "loss": 0.1896, "step": 7268 }, { "epoch": 0.52, "grad_norm": 1.4407976403259906, "learning_rate": 4.9200735027404e-06, "loss": 0.2312, "step": 7269 }, { "epoch": 0.52, "grad_norm": 1.4380365246466567, "learning_rate": 4.918915248537912e-06, "loss": 0.2366, "step": 7270 }, { "epoch": 0.52, "grad_norm": 1.0975541839221759, "learning_rate": 4.91775699868773e-06, "loss": 0.1797, "step": 7271 }, { "epoch": 0.52, "grad_norm": 1.2486216966963446, "learning_rate": 4.9165987532520276e-06, "loss": 0.1758, "step": 7272 }, { "epoch": 0.52, "grad_norm": 1.501037422263749, "learning_rate": 4.915440512292971e-06, "loss": 0.2041, "step": 7273 }, { "epoch": 0.52, "grad_norm": 1.2208790924061084, "learning_rate": 4.914282275872732e-06, "loss": 0.1662, "step": 7274 }, { "epoch": 0.52, "grad_norm": 3.9215426657282593, "learning_rate": 4.913124044053479e-06, "loss": 0.5136, "step": 7275 }, { "epoch": 0.52, "grad_norm": 1.3753466016961633, "learning_rate": 4.911965816897385e-06, "loss": 0.161, "step": 7276 }, { "epoch": 0.52, "grad_norm": 1.2745614531755465, "learning_rate": 4.910807594466613e-06, "loss": 0.1475, "step": 7277 }, { "epoch": 0.52, "grad_norm": 1.19092527556746, "learning_rate": 4.909649376823337e-06, "loss": 0.155, "step": 7278 }, { "epoch": 0.52, "grad_norm": 1.4481344305296573, "learning_rate": 4.908491164029723e-06, "loss": 0.2063, "step": 7279 }, { "epoch": 0.52, "grad_norm": 1.329560080587342, "learning_rate": 4.90733295614794e-06, "loss": 0.2525, "step": 7280 }, { "epoch": 0.52, "grad_norm": 1.2523304727835112, "learning_rate": 4.906174753240155e-06, "loss": 0.19, "step": 7281 }, { "epoch": 0.52, "grad_norm": 1.4429945406171445, "learning_rate": 4.905016555368538e-06, "loss": 0.2218, "step": 7282 }, { "epoch": 0.52, "grad_norm": 1.3177743814041725, "learning_rate": 4.9038583625952525e-06, "loss": 0.204, "step": 7283 }, { "epoch": 0.52, "grad_norm": 1.276489494284318, "learning_rate": 4.902700174982471e-06, "loss": 0.1768, "step": 7284 }, { "epoch": 0.52, "grad_norm": 1.37507967873485, "learning_rate": 4.901541992592355e-06, "loss": 0.1652, "step": 7285 }, { "epoch": 0.52, "grad_norm": 1.3298418072268594, "learning_rate": 4.900383815487075e-06, "loss": 0.1957, "step": 7286 }, { "epoch": 0.52, "grad_norm": 1.2971847884593566, "learning_rate": 4.899225643728795e-06, "loss": 0.1912, "step": 7287 }, { "epoch": 0.52, "grad_norm": 1.1235143231379767, "learning_rate": 4.8980674773796845e-06, "loss": 0.1456, "step": 7288 }, { "epoch": 0.52, "grad_norm": 1.5511653697656087, "learning_rate": 4.896909316501904e-06, "loss": 0.2052, "step": 7289 }, { "epoch": 0.52, "grad_norm": 1.2497891335025184, "learning_rate": 4.895751161157624e-06, "loss": 0.2156, "step": 7290 }, { "epoch": 0.52, "grad_norm": 1.2982925137909171, "learning_rate": 4.894593011409006e-06, "loss": 0.1841, "step": 7291 }, { "epoch": 0.52, "grad_norm": 6.609937657003099, "learning_rate": 4.893434867318218e-06, "loss": 0.6731, "step": 7292 }, { "epoch": 0.52, "grad_norm": 1.337492621747176, "learning_rate": 4.892276728947421e-06, "loss": 0.2329, "step": 7293 }, { "epoch": 0.52, "grad_norm": 1.2141356776287322, "learning_rate": 4.8911185963587834e-06, "loss": 0.1669, "step": 7294 }, { "epoch": 0.52, "grad_norm": 1.5056853718097494, "learning_rate": 4.889960469614464e-06, "loss": 0.243, "step": 7295 }, { "epoch": 0.52, "grad_norm": 1.3035038475868024, "learning_rate": 4.88880234877663e-06, "loss": 0.1248, "step": 7296 }, { "epoch": 0.52, "grad_norm": 1.390247196683685, "learning_rate": 4.887644233907448e-06, "loss": 0.2051, "step": 7297 }, { "epoch": 0.52, "grad_norm": 1.3209611805997716, "learning_rate": 4.886486125069073e-06, "loss": 0.177, "step": 7298 }, { "epoch": 0.52, "grad_norm": 1.2989256119094814, "learning_rate": 4.885328022323674e-06, "loss": 0.1899, "step": 7299 }, { "epoch": 0.52, "grad_norm": 1.2629127564680402, "learning_rate": 4.884169925733409e-06, "loss": 0.2017, "step": 7300 }, { "epoch": 0.52, "grad_norm": 1.141562905397782, "learning_rate": 4.883011835360446e-06, "loss": 0.1262, "step": 7301 }, { "epoch": 0.52, "grad_norm": 1.2695280126944313, "learning_rate": 4.881853751266939e-06, "loss": 0.1835, "step": 7302 }, { "epoch": 0.52, "grad_norm": 1.2108592159030154, "learning_rate": 4.880695673515057e-06, "loss": 0.164, "step": 7303 }, { "epoch": 0.52, "grad_norm": 1.2205562621302926, "learning_rate": 4.879537602166953e-06, "loss": 0.1844, "step": 7304 }, { "epoch": 0.52, "grad_norm": 1.3908186249477168, "learning_rate": 4.878379537284796e-06, "loss": 0.1812, "step": 7305 }, { "epoch": 0.52, "grad_norm": 1.3569030537246227, "learning_rate": 4.877221478930739e-06, "loss": 0.1674, "step": 7306 }, { "epoch": 0.52, "grad_norm": 1.280329524705964, "learning_rate": 4.876063427166948e-06, "loss": 0.2007, "step": 7307 }, { "epoch": 0.52, "grad_norm": 1.3008768496869145, "learning_rate": 4.874905382055578e-06, "loss": 0.1788, "step": 7308 }, { "epoch": 0.52, "grad_norm": 1.3407500633903797, "learning_rate": 4.873747343658791e-06, "loss": 0.2049, "step": 7309 }, { "epoch": 0.52, "grad_norm": 1.390948046598325, "learning_rate": 4.872589312038744e-06, "loss": 0.1898, "step": 7310 }, { "epoch": 0.52, "grad_norm": 1.2537742403987546, "learning_rate": 4.871431287257599e-06, "loss": 0.1903, "step": 7311 }, { "epoch": 0.52, "grad_norm": 1.4346444093672632, "learning_rate": 4.870273269377508e-06, "loss": 0.1695, "step": 7312 }, { "epoch": 0.52, "grad_norm": 1.354917485961351, "learning_rate": 4.869115258460636e-06, "loss": 0.1927, "step": 7313 }, { "epoch": 0.52, "grad_norm": 1.2857880213701396, "learning_rate": 4.867957254569134e-06, "loss": 0.1861, "step": 7314 }, { "epoch": 0.52, "grad_norm": 1.15796483859027, "learning_rate": 4.8667992577651646e-06, "loss": 0.1625, "step": 7315 }, { "epoch": 0.52, "grad_norm": 1.4327851727478282, "learning_rate": 4.86564126811088e-06, "loss": 0.1738, "step": 7316 }, { "epoch": 0.52, "grad_norm": 1.2862982121576751, "learning_rate": 4.8644832856684394e-06, "loss": 0.2144, "step": 7317 }, { "epoch": 0.52, "grad_norm": 1.6023695815945183, "learning_rate": 4.863325310499996e-06, "loss": 0.1993, "step": 7318 }, { "epoch": 0.52, "grad_norm": 1.5543733946376874, "learning_rate": 4.862167342667709e-06, "loss": 0.2116, "step": 7319 }, { "epoch": 0.52, "grad_norm": 9.339510360605741, "learning_rate": 4.86100938223373e-06, "loss": 0.586, "step": 7320 }, { "epoch": 0.52, "grad_norm": 1.3631627320827617, "learning_rate": 4.859851429260217e-06, "loss": 0.1683, "step": 7321 }, { "epoch": 0.52, "grad_norm": 1.2872997094989862, "learning_rate": 4.8586934838093204e-06, "loss": 0.187, "step": 7322 }, { "epoch": 0.52, "grad_norm": 1.3033670413965044, "learning_rate": 4.8575355459431985e-06, "loss": 0.1505, "step": 7323 }, { "epoch": 0.52, "grad_norm": 1.3982163016026572, "learning_rate": 4.856377615724001e-06, "loss": 0.2444, "step": 7324 }, { "epoch": 0.52, "grad_norm": 6.541083080057253, "learning_rate": 4.8552196932138825e-06, "loss": 0.3898, "step": 7325 }, { "epoch": 0.52, "grad_norm": 1.4586587922520873, "learning_rate": 4.854061778474995e-06, "loss": 0.2184, "step": 7326 }, { "epoch": 0.52, "grad_norm": 1.2563978131545368, "learning_rate": 4.852903871569494e-06, "loss": 0.1896, "step": 7327 }, { "epoch": 0.52, "grad_norm": 1.3299224526319926, "learning_rate": 4.851745972559526e-06, "loss": 0.2022, "step": 7328 }, { "epoch": 0.52, "grad_norm": 1.206617621403913, "learning_rate": 4.850588081507246e-06, "loss": 0.1611, "step": 7329 }, { "epoch": 0.52, "grad_norm": 1.224242849284024, "learning_rate": 4.8494301984748045e-06, "loss": 0.2086, "step": 7330 }, { "epoch": 0.52, "grad_norm": 1.269434723831549, "learning_rate": 4.848272323524351e-06, "loss": 0.1818, "step": 7331 }, { "epoch": 0.52, "grad_norm": 1.3987682151947491, "learning_rate": 4.847114456718039e-06, "loss": 0.2254, "step": 7332 }, { "epoch": 0.52, "grad_norm": 1.0772069578775725, "learning_rate": 4.845956598118012e-06, "loss": 0.1523, "step": 7333 }, { "epoch": 0.52, "grad_norm": 11.949436907086621, "learning_rate": 4.844798747786425e-06, "loss": 0.5902, "step": 7334 }, { "epoch": 0.52, "grad_norm": 1.3276774537208853, "learning_rate": 4.843640905785423e-06, "loss": 0.1978, "step": 7335 }, { "epoch": 0.52, "grad_norm": 1.6061625362681544, "learning_rate": 4.84248307217716e-06, "loss": 0.2129, "step": 7336 }, { "epoch": 0.52, "grad_norm": 1.4096761993167843, "learning_rate": 4.841325247023776e-06, "loss": 0.2032, "step": 7337 }, { "epoch": 0.52, "grad_norm": 1.0976466420717723, "learning_rate": 4.840167430387423e-06, "loss": 0.1471, "step": 7338 }, { "epoch": 0.52, "grad_norm": 6.966879966219873, "learning_rate": 4.839009622330247e-06, "loss": 0.6256, "step": 7339 }, { "epoch": 0.53, "grad_norm": 1.1759283384509396, "learning_rate": 4.837851822914397e-06, "loss": 0.2016, "step": 7340 }, { "epoch": 0.53, "grad_norm": 1.0655314852737376, "learning_rate": 4.836694032202015e-06, "loss": 0.1386, "step": 7341 }, { "epoch": 0.53, "grad_norm": 1.285416307320226, "learning_rate": 4.83553625025525e-06, "loss": 0.1709, "step": 7342 }, { "epoch": 0.53, "grad_norm": 1.3097383374586917, "learning_rate": 4.834378477136244e-06, "loss": 0.1742, "step": 7343 }, { "epoch": 0.53, "grad_norm": 1.2942769505797476, "learning_rate": 4.8332207129071455e-06, "loss": 0.1936, "step": 7344 }, { "epoch": 0.53, "grad_norm": 1.2984396213558214, "learning_rate": 4.832062957630095e-06, "loss": 0.2043, "step": 7345 }, { "epoch": 0.53, "grad_norm": 1.3171272321483678, "learning_rate": 4.830905211367238e-06, "loss": 0.2066, "step": 7346 }, { "epoch": 0.53, "grad_norm": 1.2074312636259634, "learning_rate": 4.829747474180717e-06, "loss": 0.176, "step": 7347 }, { "epoch": 0.53, "grad_norm": 6.025901396791245, "learning_rate": 4.828589746132677e-06, "loss": 0.6538, "step": 7348 }, { "epoch": 0.53, "grad_norm": 2.036414668709515, "learning_rate": 4.827432027285257e-06, "loss": 0.1872, "step": 7349 }, { "epoch": 0.53, "grad_norm": 1.5767917376482379, "learning_rate": 4.8262743177006e-06, "loss": 0.2192, "step": 7350 }, { "epoch": 0.53, "grad_norm": 1.5482291273528677, "learning_rate": 4.825116617440848e-06, "loss": 0.2522, "step": 7351 }, { "epoch": 0.53, "grad_norm": 1.4126738962314416, "learning_rate": 4.823958926568142e-06, "loss": 0.2297, "step": 7352 }, { "epoch": 0.53, "grad_norm": 1.3682289194408515, "learning_rate": 4.82280124514462e-06, "loss": 0.1847, "step": 7353 }, { "epoch": 0.53, "grad_norm": 1.2850062988418043, "learning_rate": 4.821643573232424e-06, "loss": 0.1714, "step": 7354 }, { "epoch": 0.53, "grad_norm": 1.6083594227127784, "learning_rate": 4.8204859108936915e-06, "loss": 0.2067, "step": 7355 }, { "epoch": 0.53, "grad_norm": 1.5074176420267948, "learning_rate": 4.819328258190564e-06, "loss": 0.2392, "step": 7356 }, { "epoch": 0.53, "grad_norm": 1.4540648071303213, "learning_rate": 4.818170615185176e-06, "loss": 0.219, "step": 7357 }, { "epoch": 0.53, "grad_norm": 1.422685529173656, "learning_rate": 4.817012981939668e-06, "loss": 0.2197, "step": 7358 }, { "epoch": 0.53, "grad_norm": 1.5068611539789891, "learning_rate": 4.815855358516175e-06, "loss": 0.1801, "step": 7359 }, { "epoch": 0.53, "grad_norm": 5.44486765944861, "learning_rate": 4.814697744976835e-06, "loss": 0.6532, "step": 7360 }, { "epoch": 0.53, "grad_norm": 1.3196680337249584, "learning_rate": 4.813540141383786e-06, "loss": 0.1932, "step": 7361 }, { "epoch": 0.53, "grad_norm": 1.3848005487820005, "learning_rate": 4.812382547799159e-06, "loss": 0.1772, "step": 7362 }, { "epoch": 0.53, "grad_norm": 1.403622958238679, "learning_rate": 4.811224964285092e-06, "loss": 0.2088, "step": 7363 }, { "epoch": 0.53, "grad_norm": 1.3377788197977927, "learning_rate": 4.8100673909037185e-06, "loss": 0.1793, "step": 7364 }, { "epoch": 0.53, "grad_norm": 1.4195117443644716, "learning_rate": 4.808909827717175e-06, "loss": 0.1896, "step": 7365 }, { "epoch": 0.53, "grad_norm": 1.2542331270645244, "learning_rate": 4.807752274787592e-06, "loss": 0.1919, "step": 7366 }, { "epoch": 0.53, "grad_norm": 1.2640004220954202, "learning_rate": 4.8065947321771025e-06, "loss": 0.1916, "step": 7367 }, { "epoch": 0.53, "grad_norm": 8.994655482737985, "learning_rate": 4.805437199947838e-06, "loss": 0.5996, "step": 7368 }, { "epoch": 0.53, "grad_norm": 1.5101926574229056, "learning_rate": 4.804279678161935e-06, "loss": 0.1927, "step": 7369 }, { "epoch": 0.53, "grad_norm": 4.307332046894558, "learning_rate": 4.803122166881519e-06, "loss": 0.5202, "step": 7370 }, { "epoch": 0.53, "grad_norm": 1.445615989769837, "learning_rate": 4.801964666168723e-06, "loss": 0.2136, "step": 7371 }, { "epoch": 0.53, "grad_norm": 8.072966814033737, "learning_rate": 4.800807176085677e-06, "loss": 0.3656, "step": 7372 }, { "epoch": 0.53, "grad_norm": 1.355198698443477, "learning_rate": 4.799649696694512e-06, "loss": 0.1735, "step": 7373 }, { "epoch": 0.53, "grad_norm": 1.3406970666226732, "learning_rate": 4.7984922280573535e-06, "loss": 0.2022, "step": 7374 }, { "epoch": 0.53, "grad_norm": 1.4860760816275518, "learning_rate": 4.7973347702363336e-06, "loss": 0.2404, "step": 7375 }, { "epoch": 0.53, "grad_norm": 1.305830879975686, "learning_rate": 4.796177323293576e-06, "loss": 0.1722, "step": 7376 }, { "epoch": 0.53, "grad_norm": 1.4354173743669323, "learning_rate": 4.795019887291212e-06, "loss": 0.2449, "step": 7377 }, { "epoch": 0.53, "grad_norm": 1.2934662271546125, "learning_rate": 4.793862462291364e-06, "loss": 0.1762, "step": 7378 }, { "epoch": 0.53, "grad_norm": 1.216034262255652, "learning_rate": 4.792705048356161e-06, "loss": 0.1899, "step": 7379 }, { "epoch": 0.53, "grad_norm": 1.3691484553183713, "learning_rate": 4.791547645547727e-06, "loss": 0.2153, "step": 7380 }, { "epoch": 0.53, "grad_norm": 1.2679985149349193, "learning_rate": 4.790390253928189e-06, "loss": 0.1903, "step": 7381 }, { "epoch": 0.53, "grad_norm": 1.2434538104662756, "learning_rate": 4.789232873559667e-06, "loss": 0.1468, "step": 7382 }, { "epoch": 0.53, "grad_norm": 1.373274718606192, "learning_rate": 4.788075504504289e-06, "loss": 0.1627, "step": 7383 }, { "epoch": 0.53, "grad_norm": 10.26386636235655, "learning_rate": 4.7869181468241735e-06, "loss": 0.5167, "step": 7384 }, { "epoch": 0.53, "grad_norm": 1.5483747842385602, "learning_rate": 4.785760800581448e-06, "loss": 0.2335, "step": 7385 }, { "epoch": 0.53, "grad_norm": 1.50393022660587, "learning_rate": 4.784603465838231e-06, "loss": 0.25, "step": 7386 }, { "epoch": 0.53, "grad_norm": 1.3695507054958678, "learning_rate": 4.783446142656643e-06, "loss": 0.2185, "step": 7387 }, { "epoch": 0.53, "grad_norm": 1.3315442409394023, "learning_rate": 4.782288831098806e-06, "loss": 0.1834, "step": 7388 }, { "epoch": 0.53, "grad_norm": 1.3363698480693038, "learning_rate": 4.781131531226842e-06, "loss": 0.1948, "step": 7389 }, { "epoch": 0.53, "grad_norm": 1.400715007782383, "learning_rate": 4.779974243102865e-06, "loss": 0.2007, "step": 7390 }, { "epoch": 0.53, "grad_norm": 1.3040043148159026, "learning_rate": 4.778816966788998e-06, "loss": 0.1652, "step": 7391 }, { "epoch": 0.53, "grad_norm": 1.3135387225702022, "learning_rate": 4.777659702347356e-06, "loss": 0.1936, "step": 7392 }, { "epoch": 0.53, "grad_norm": 1.4537390399218433, "learning_rate": 4.776502449840058e-06, "loss": 0.168, "step": 7393 }, { "epoch": 0.53, "grad_norm": 1.3480107666932997, "learning_rate": 4.775345209329224e-06, "loss": 0.1763, "step": 7394 }, { "epoch": 0.53, "grad_norm": 1.2992857960915318, "learning_rate": 4.774187980876963e-06, "loss": 0.2042, "step": 7395 }, { "epoch": 0.53, "grad_norm": 1.4740279415729771, "learning_rate": 4.773030764545395e-06, "loss": 0.2113, "step": 7396 }, { "epoch": 0.53, "grad_norm": 1.3182544174688373, "learning_rate": 4.771873560396634e-06, "loss": 0.2, "step": 7397 }, { "epoch": 0.53, "grad_norm": 1.3711383785423088, "learning_rate": 4.770716368492795e-06, "loss": 0.2283, "step": 7398 }, { "epoch": 0.53, "grad_norm": 1.3289072097781582, "learning_rate": 4.769559188895989e-06, "loss": 0.1949, "step": 7399 }, { "epoch": 0.53, "grad_norm": 1.3983638592501968, "learning_rate": 4.768402021668332e-06, "loss": 0.174, "step": 7400 }, { "epoch": 0.53, "grad_norm": 1.2628474933458138, "learning_rate": 4.767244866871931e-06, "loss": 0.1843, "step": 7401 }, { "epoch": 0.53, "grad_norm": 5.380312232625276, "learning_rate": 4.7660877245689045e-06, "loss": 0.5109, "step": 7402 }, { "epoch": 0.53, "grad_norm": 1.235837644737068, "learning_rate": 4.764930594821357e-06, "loss": 0.1482, "step": 7403 }, { "epoch": 0.53, "grad_norm": 6.0942579645975785, "learning_rate": 4.7637734776914045e-06, "loss": 0.7299, "step": 7404 }, { "epoch": 0.53, "grad_norm": 1.258313857088213, "learning_rate": 4.762616373241151e-06, "loss": 0.1932, "step": 7405 }, { "epoch": 0.53, "grad_norm": 1.3604786415206325, "learning_rate": 4.7614592815327076e-06, "loss": 0.161, "step": 7406 }, { "epoch": 0.53, "grad_norm": 1.323515850043093, "learning_rate": 4.760302202628182e-06, "loss": 0.1998, "step": 7407 }, { "epoch": 0.53, "grad_norm": 1.2450979967927167, "learning_rate": 4.759145136589683e-06, "loss": 0.179, "step": 7408 }, { "epoch": 0.53, "grad_norm": 1.3952837437189276, "learning_rate": 4.757988083479315e-06, "loss": 0.1935, "step": 7409 }, { "epoch": 0.53, "grad_norm": 1.1107711498467083, "learning_rate": 4.756831043359186e-06, "loss": 0.1306, "step": 7410 }, { "epoch": 0.53, "grad_norm": 1.283819317095004, "learning_rate": 4.7556740162913995e-06, "loss": 0.1669, "step": 7411 }, { "epoch": 0.53, "grad_norm": 1.4586722670483339, "learning_rate": 4.754517002338064e-06, "loss": 0.1994, "step": 7412 }, { "epoch": 0.53, "grad_norm": 1.3961467076645901, "learning_rate": 4.753360001561277e-06, "loss": 0.2379, "step": 7413 }, { "epoch": 0.53, "grad_norm": 1.3582956062301503, "learning_rate": 4.752203014023147e-06, "loss": 0.2069, "step": 7414 }, { "epoch": 0.53, "grad_norm": 1.1938817043450436, "learning_rate": 4.7510460397857736e-06, "loss": 0.1645, "step": 7415 }, { "epoch": 0.53, "grad_norm": 1.3700288286196967, "learning_rate": 4.749889078911262e-06, "loss": 0.196, "step": 7416 }, { "epoch": 0.53, "grad_norm": 1.3045945122636486, "learning_rate": 4.748732131461709e-06, "loss": 0.2054, "step": 7417 }, { "epoch": 0.53, "grad_norm": 1.2976636552735104, "learning_rate": 4.747575197499218e-06, "loss": 0.188, "step": 7418 }, { "epoch": 0.53, "grad_norm": 1.4570696624844335, "learning_rate": 4.746418277085887e-06, "loss": 0.229, "step": 7419 }, { "epoch": 0.53, "grad_norm": 7.17924883019989, "learning_rate": 4.7452613702838166e-06, "loss": 0.6187, "step": 7420 }, { "epoch": 0.53, "grad_norm": 1.3237423818177605, "learning_rate": 4.744104477155102e-06, "loss": 0.256, "step": 7421 }, { "epoch": 0.53, "grad_norm": 1.3302607452389341, "learning_rate": 4.742947597761844e-06, "loss": 0.2334, "step": 7422 }, { "epoch": 0.53, "grad_norm": 1.3270104082507541, "learning_rate": 4.741790732166136e-06, "loss": 0.1957, "step": 7423 }, { "epoch": 0.53, "grad_norm": 1.2403488693908353, "learning_rate": 4.740633880430076e-06, "loss": 0.1545, "step": 7424 }, { "epoch": 0.53, "grad_norm": 1.3007745766263163, "learning_rate": 4.739477042615761e-06, "loss": 0.2103, "step": 7425 }, { "epoch": 0.53, "grad_norm": 1.4512589295588125, "learning_rate": 4.738320218785281e-06, "loss": 0.2166, "step": 7426 }, { "epoch": 0.53, "grad_norm": 1.264973534188588, "learning_rate": 4.737163409000733e-06, "loss": 0.1884, "step": 7427 }, { "epoch": 0.53, "grad_norm": 1.288248777721767, "learning_rate": 4.736006613324209e-06, "loss": 0.1572, "step": 7428 }, { "epoch": 0.53, "grad_norm": 1.5302413702590902, "learning_rate": 4.734849831817803e-06, "loss": 0.2239, "step": 7429 }, { "epoch": 0.53, "grad_norm": 1.3954710446929588, "learning_rate": 4.7336930645436015e-06, "loss": 0.2184, "step": 7430 }, { "epoch": 0.53, "grad_norm": 4.415124432577513, "learning_rate": 4.7325363115637005e-06, "loss": 0.621, "step": 7431 }, { "epoch": 0.53, "grad_norm": 5.123139573331286, "learning_rate": 4.7313795729401855e-06, "loss": 0.5965, "step": 7432 }, { "epoch": 0.53, "grad_norm": 1.4131847385151435, "learning_rate": 4.730222848735151e-06, "loss": 0.2261, "step": 7433 }, { "epoch": 0.53, "grad_norm": 1.376843979404407, "learning_rate": 4.72906613901068e-06, "loss": 0.1794, "step": 7434 }, { "epoch": 0.53, "grad_norm": 1.5843620467800101, "learning_rate": 4.727909443828862e-06, "loss": 0.1976, "step": 7435 }, { "epoch": 0.53, "grad_norm": 1.3093756956624991, "learning_rate": 4.726752763251785e-06, "loss": 0.1942, "step": 7436 }, { "epoch": 0.53, "grad_norm": 1.5223379067965448, "learning_rate": 4.7255960973415345e-06, "loss": 0.2039, "step": 7437 }, { "epoch": 0.53, "grad_norm": 1.4044720436427227, "learning_rate": 4.7244394461601944e-06, "loss": 0.2192, "step": 7438 }, { "epoch": 0.53, "grad_norm": 1.3230537537598956, "learning_rate": 4.723282809769851e-06, "loss": 0.1693, "step": 7439 }, { "epoch": 0.53, "grad_norm": 1.352622555753637, "learning_rate": 4.722126188232586e-06, "loss": 0.1913, "step": 7440 }, { "epoch": 0.53, "grad_norm": 1.4375877015157081, "learning_rate": 4.720969581610485e-06, "loss": 0.1867, "step": 7441 }, { "epoch": 0.53, "grad_norm": 1.147654026772895, "learning_rate": 4.7198129899656266e-06, "loss": 0.173, "step": 7442 }, { "epoch": 0.53, "grad_norm": 1.1345156910842074, "learning_rate": 4.7186564133600945e-06, "loss": 0.1657, "step": 7443 }, { "epoch": 0.53, "grad_norm": 1.3109128805295256, "learning_rate": 4.717499851855967e-06, "loss": 0.2205, "step": 7444 }, { "epoch": 0.53, "grad_norm": 1.211770489295749, "learning_rate": 4.716343305515329e-06, "loss": 0.2163, "step": 7445 }, { "epoch": 0.53, "grad_norm": 1.4094523703247277, "learning_rate": 4.715186774400251e-06, "loss": 0.1874, "step": 7446 }, { "epoch": 0.53, "grad_norm": 1.3395440935286418, "learning_rate": 4.714030258572818e-06, "loss": 0.1841, "step": 7447 }, { "epoch": 0.53, "grad_norm": 1.121677750773963, "learning_rate": 4.7128737580951024e-06, "loss": 0.1791, "step": 7448 }, { "epoch": 0.53, "grad_norm": 1.407790279044471, "learning_rate": 4.711717273029187e-06, "loss": 0.2071, "step": 7449 }, { "epoch": 0.53, "grad_norm": 1.2893210635600285, "learning_rate": 4.710560803437139e-06, "loss": 0.1854, "step": 7450 }, { "epoch": 0.53, "grad_norm": 1.3638126877601455, "learning_rate": 4.709404349381038e-06, "loss": 0.1856, "step": 7451 }, { "epoch": 0.53, "grad_norm": 1.307991108455758, "learning_rate": 4.708247910922958e-06, "loss": 0.1723, "step": 7452 }, { "epoch": 0.53, "grad_norm": 4.142350626110311, "learning_rate": 4.707091488124972e-06, "loss": 0.465, "step": 7453 }, { "epoch": 0.53, "grad_norm": 1.1069841203677042, "learning_rate": 4.705935081049149e-06, "loss": 0.1912, "step": 7454 }, { "epoch": 0.53, "grad_norm": 1.498612101800132, "learning_rate": 4.704778689757564e-06, "loss": 0.208, "step": 7455 }, { "epoch": 0.53, "grad_norm": 1.280886250263835, "learning_rate": 4.7036223143122845e-06, "loss": 0.2169, "step": 7456 }, { "epoch": 0.53, "grad_norm": 1.1795375075721113, "learning_rate": 4.702465954775382e-06, "loss": 0.1969, "step": 7457 }, { "epoch": 0.53, "grad_norm": 5.142871764078737, "learning_rate": 4.701309611208927e-06, "loss": 0.7533, "step": 7458 }, { "epoch": 0.53, "grad_norm": 6.948718341608626, "learning_rate": 4.700153283674984e-06, "loss": 0.6144, "step": 7459 }, { "epoch": 0.53, "grad_norm": 1.4117807233681705, "learning_rate": 4.698996972235622e-06, "loss": 0.1671, "step": 7460 }, { "epoch": 0.53, "grad_norm": 1.3606036414712264, "learning_rate": 4.697840676952905e-06, "loss": 0.2165, "step": 7461 }, { "epoch": 0.53, "grad_norm": 1.2644966230161345, "learning_rate": 4.696684397888904e-06, "loss": 0.1832, "step": 7462 }, { "epoch": 0.53, "grad_norm": 1.3744329374141788, "learning_rate": 4.695528135105676e-06, "loss": 0.1898, "step": 7463 }, { "epoch": 0.53, "grad_norm": 1.6040346584296856, "learning_rate": 4.694371888665289e-06, "loss": 0.1852, "step": 7464 }, { "epoch": 0.53, "grad_norm": 1.1906228712447857, "learning_rate": 4.693215658629805e-06, "loss": 0.1744, "step": 7465 }, { "epoch": 0.53, "grad_norm": 1.3656986106250484, "learning_rate": 4.692059445061287e-06, "loss": 0.2176, "step": 7466 }, { "epoch": 0.53, "grad_norm": 1.3854840961978976, "learning_rate": 4.690903248021792e-06, "loss": 0.2009, "step": 7467 }, { "epoch": 0.53, "grad_norm": 1.318983167426717, "learning_rate": 4.6897470675733845e-06, "loss": 0.1753, "step": 7468 }, { "epoch": 0.53, "grad_norm": 1.421237380734001, "learning_rate": 4.68859090377812e-06, "loss": 0.2127, "step": 7469 }, { "epoch": 0.53, "grad_norm": 1.4008717756530908, "learning_rate": 4.6874347566980615e-06, "loss": 0.2463, "step": 7470 }, { "epoch": 0.53, "grad_norm": 1.343637960279099, "learning_rate": 4.686278626395262e-06, "loss": 0.1773, "step": 7471 }, { "epoch": 0.53, "grad_norm": 1.3895064173217797, "learning_rate": 4.685122512931779e-06, "loss": 0.187, "step": 7472 }, { "epoch": 0.53, "grad_norm": 1.3697850465019976, "learning_rate": 4.683966416369668e-06, "loss": 0.2068, "step": 7473 }, { "epoch": 0.53, "grad_norm": 1.3948972938503301, "learning_rate": 4.682810336770987e-06, "loss": 0.2518, "step": 7474 }, { "epoch": 0.53, "grad_norm": 1.5889777664355842, "learning_rate": 4.681654274197784e-06, "loss": 0.2258, "step": 7475 }, { "epoch": 0.53, "grad_norm": 1.311569480262528, "learning_rate": 4.680498228712116e-06, "loss": 0.2146, "step": 7476 }, { "epoch": 0.53, "grad_norm": 1.3034001980187484, "learning_rate": 4.6793422003760335e-06, "loss": 0.1844, "step": 7477 }, { "epoch": 0.53, "grad_norm": 1.232291608204105, "learning_rate": 4.678186189251589e-06, "loss": 0.1741, "step": 7478 }, { "epoch": 0.53, "grad_norm": 1.377896008808318, "learning_rate": 4.67703019540083e-06, "loss": 0.2015, "step": 7479 }, { "epoch": 0.54, "grad_norm": 1.2794902669522432, "learning_rate": 4.6758742188858074e-06, "loss": 0.1934, "step": 7480 }, { "epoch": 0.54, "grad_norm": 1.3800895729808818, "learning_rate": 4.674718259768569e-06, "loss": 0.2113, "step": 7481 }, { "epoch": 0.54, "grad_norm": 1.3367197074514325, "learning_rate": 4.673562318111163e-06, "loss": 0.2051, "step": 7482 }, { "epoch": 0.54, "grad_norm": 1.5832329453571012, "learning_rate": 4.672406393975634e-06, "loss": 0.2527, "step": 7483 }, { "epoch": 0.54, "grad_norm": 1.3684726914153058, "learning_rate": 4.671250487424028e-06, "loss": 0.1914, "step": 7484 }, { "epoch": 0.54, "grad_norm": 1.3536269044892348, "learning_rate": 4.67009459851839e-06, "loss": 0.1867, "step": 7485 }, { "epoch": 0.54, "grad_norm": 1.4262834924989731, "learning_rate": 4.6689387273207645e-06, "loss": 0.1721, "step": 7486 }, { "epoch": 0.54, "grad_norm": 1.2255674605852958, "learning_rate": 4.667782873893192e-06, "loss": 0.1861, "step": 7487 }, { "epoch": 0.54, "grad_norm": 1.4113078698314363, "learning_rate": 4.666627038297713e-06, "loss": 0.1686, "step": 7488 }, { "epoch": 0.54, "grad_norm": 1.1850875320488745, "learning_rate": 4.665471220596373e-06, "loss": 0.1526, "step": 7489 }, { "epoch": 0.54, "grad_norm": 1.3635934995601118, "learning_rate": 4.664315420851207e-06, "loss": 0.1939, "step": 7490 }, { "epoch": 0.54, "grad_norm": 1.2169037576271426, "learning_rate": 4.6631596391242575e-06, "loss": 0.1639, "step": 7491 }, { "epoch": 0.54, "grad_norm": 1.3870638583559538, "learning_rate": 4.662003875477559e-06, "loss": 0.1849, "step": 7492 }, { "epoch": 0.54, "grad_norm": 1.3929222477791734, "learning_rate": 4.66084812997315e-06, "loss": 0.1716, "step": 7493 }, { "epoch": 0.54, "grad_norm": 1.5004389583288535, "learning_rate": 4.659692402673066e-06, "loss": 0.2015, "step": 7494 }, { "epoch": 0.54, "grad_norm": 1.2789358029101445, "learning_rate": 4.6585366936393426e-06, "loss": 0.1674, "step": 7495 }, { "epoch": 0.54, "grad_norm": 1.251974506077591, "learning_rate": 4.657381002934011e-06, "loss": 0.1939, "step": 7496 }, { "epoch": 0.54, "grad_norm": 1.2939403466858708, "learning_rate": 4.656225330619109e-06, "loss": 0.1747, "step": 7497 }, { "epoch": 0.54, "grad_norm": 1.3076814191177963, "learning_rate": 4.655069676756662e-06, "loss": 0.1888, "step": 7498 }, { "epoch": 0.54, "grad_norm": 1.4023440936505025, "learning_rate": 4.653914041408708e-06, "loss": 0.1917, "step": 7499 }, { "epoch": 0.54, "grad_norm": 1.4275234864810196, "learning_rate": 4.652758424637271e-06, "loss": 0.2178, "step": 7500 }, { "epoch": 0.54, "grad_norm": 1.3259454583573873, "learning_rate": 4.651602826504384e-06, "loss": 0.2261, "step": 7501 }, { "epoch": 0.54, "grad_norm": 1.4680337762354, "learning_rate": 4.65044724707207e-06, "loss": 0.2325, "step": 7502 }, { "epoch": 0.54, "grad_norm": 1.4337010140557995, "learning_rate": 4.649291686402362e-06, "loss": 0.1967, "step": 7503 }, { "epoch": 0.54, "grad_norm": 1.3486810938849187, "learning_rate": 4.648136144557282e-06, "loss": 0.2117, "step": 7504 }, { "epoch": 0.54, "grad_norm": 1.109743908957312, "learning_rate": 4.646980621598857e-06, "loss": 0.1537, "step": 7505 }, { "epoch": 0.54, "grad_norm": 1.4549351604558525, "learning_rate": 4.645825117589108e-06, "loss": 0.2045, "step": 7506 }, { "epoch": 0.54, "grad_norm": 1.2968438773634885, "learning_rate": 4.644669632590061e-06, "loss": 0.1714, "step": 7507 }, { "epoch": 0.54, "grad_norm": 4.686711346071377, "learning_rate": 4.643514166663735e-06, "loss": 0.5402, "step": 7508 }, { "epoch": 0.54, "grad_norm": 1.2988118235537465, "learning_rate": 4.642358719872154e-06, "loss": 0.1872, "step": 7509 }, { "epoch": 0.54, "grad_norm": 1.1427461215374288, "learning_rate": 4.641203292277334e-06, "loss": 0.1632, "step": 7510 }, { "epoch": 0.54, "grad_norm": 1.2831724595970118, "learning_rate": 4.640047883941297e-06, "loss": 0.172, "step": 7511 }, { "epoch": 0.54, "grad_norm": 5.865323521539073, "learning_rate": 4.638892494926059e-06, "loss": 0.6696, "step": 7512 }, { "epoch": 0.54, "grad_norm": 1.4393047060717468, "learning_rate": 4.63773712529364e-06, "loss": 0.1904, "step": 7513 }, { "epoch": 0.54, "grad_norm": 1.3759243712168225, "learning_rate": 4.63658177510605e-06, "loss": 0.1896, "step": 7514 }, { "epoch": 0.54, "grad_norm": 1.3481478225622276, "learning_rate": 4.6354264444253074e-06, "loss": 0.1842, "step": 7515 }, { "epoch": 0.54, "grad_norm": 1.3884466004113205, "learning_rate": 4.634271133313423e-06, "loss": 0.1802, "step": 7516 }, { "epoch": 0.54, "grad_norm": 1.2918102157741091, "learning_rate": 4.633115841832415e-06, "loss": 0.2138, "step": 7517 }, { "epoch": 0.54, "grad_norm": 1.3310691111340116, "learning_rate": 4.631960570044289e-06, "loss": 0.1508, "step": 7518 }, { "epoch": 0.54, "grad_norm": 5.264819766548765, "learning_rate": 4.630805318011057e-06, "loss": 0.5573, "step": 7519 }, { "epoch": 0.54, "grad_norm": 1.1631998281480738, "learning_rate": 4.629650085794728e-06, "loss": 0.1952, "step": 7520 }, { "epoch": 0.54, "grad_norm": 1.3439238083948173, "learning_rate": 4.628494873457312e-06, "loss": 0.1699, "step": 7521 }, { "epoch": 0.54, "grad_norm": 5.842345628272662, "learning_rate": 4.627339681060817e-06, "loss": 0.5393, "step": 7522 }, { "epoch": 0.54, "grad_norm": 1.0914234065820243, "learning_rate": 4.626184508667245e-06, "loss": 0.16, "step": 7523 }, { "epoch": 0.54, "grad_norm": 1.3198296337502753, "learning_rate": 4.625029356338605e-06, "loss": 0.1945, "step": 7524 }, { "epoch": 0.54, "grad_norm": 1.240577267266923, "learning_rate": 4.623874224136899e-06, "loss": 0.1719, "step": 7525 }, { "epoch": 0.54, "grad_norm": 6.3020901308010515, "learning_rate": 4.622719112124131e-06, "loss": 0.4178, "step": 7526 }, { "epoch": 0.54, "grad_norm": 5.019397175830867, "learning_rate": 4.621564020362301e-06, "loss": 0.5541, "step": 7527 }, { "epoch": 0.54, "grad_norm": 1.101645737169648, "learning_rate": 4.620408948913411e-06, "loss": 0.1543, "step": 7528 }, { "epoch": 0.54, "grad_norm": 1.4374458817863476, "learning_rate": 4.61925389783946e-06, "loss": 0.1734, "step": 7529 }, { "epoch": 0.54, "grad_norm": 1.3755195847278285, "learning_rate": 4.61809886720245e-06, "loss": 0.1906, "step": 7530 }, { "epoch": 0.54, "grad_norm": 1.30963836133564, "learning_rate": 4.616943857064374e-06, "loss": 0.2104, "step": 7531 }, { "epoch": 0.54, "grad_norm": 1.2075327034494507, "learning_rate": 4.61578886748723e-06, "loss": 0.1746, "step": 7532 }, { "epoch": 0.54, "grad_norm": 6.779955664580308, "learning_rate": 4.614633898533012e-06, "loss": 0.6518, "step": 7533 }, { "epoch": 0.54, "grad_norm": 1.2242157545446113, "learning_rate": 4.613478950263718e-06, "loss": 0.2192, "step": 7534 }, { "epoch": 0.54, "grad_norm": 1.3555237232100728, "learning_rate": 4.612324022741336e-06, "loss": 0.1843, "step": 7535 }, { "epoch": 0.54, "grad_norm": 1.4840891814848796, "learning_rate": 4.611169116027861e-06, "loss": 0.2418, "step": 7536 }, { "epoch": 0.54, "grad_norm": 1.306316589654052, "learning_rate": 4.610014230185283e-06, "loss": 0.2046, "step": 7537 }, { "epoch": 0.54, "grad_norm": 1.249244277568884, "learning_rate": 4.608859365275592e-06, "loss": 0.2019, "step": 7538 }, { "epoch": 0.54, "grad_norm": 3.4920018052575506, "learning_rate": 4.6077045213607765e-06, "loss": 0.6069, "step": 7539 }, { "epoch": 0.54, "grad_norm": 1.4129609254472701, "learning_rate": 4.606549698502824e-06, "loss": 0.1677, "step": 7540 }, { "epoch": 0.54, "grad_norm": 1.2423768338269527, "learning_rate": 4.605394896763719e-06, "loss": 0.1848, "step": 7541 }, { "epoch": 0.54, "grad_norm": 1.2695356553963164, "learning_rate": 4.6042401162054505e-06, "loss": 0.1698, "step": 7542 }, { "epoch": 0.54, "grad_norm": 5.096200604788515, "learning_rate": 4.6030853568899985e-06, "loss": 0.6798, "step": 7543 }, { "epoch": 0.54, "grad_norm": 1.3630152253698309, "learning_rate": 4.601930618879349e-06, "loss": 0.1722, "step": 7544 }, { "epoch": 0.54, "grad_norm": 1.2593746356112543, "learning_rate": 4.600775902235481e-06, "loss": 0.1632, "step": 7545 }, { "epoch": 0.54, "grad_norm": 4.027903368709246, "learning_rate": 4.599621207020378e-06, "loss": 0.4485, "step": 7546 }, { "epoch": 0.54, "grad_norm": 1.374625255429694, "learning_rate": 4.598466533296017e-06, "loss": 0.2033, "step": 7547 }, { "epoch": 0.54, "grad_norm": 1.332674177182077, "learning_rate": 4.597311881124378e-06, "loss": 0.1971, "step": 7548 }, { "epoch": 0.54, "grad_norm": 1.34482185464182, "learning_rate": 4.5961572505674366e-06, "loss": 0.2033, "step": 7549 }, { "epoch": 0.54, "grad_norm": 1.300902800842386, "learning_rate": 4.595002641687171e-06, "loss": 0.1764, "step": 7550 }, { "epoch": 0.54, "grad_norm": 1.2058640786208512, "learning_rate": 4.593848054545554e-06, "loss": 0.1452, "step": 7551 }, { "epoch": 0.54, "grad_norm": 1.2849828629062505, "learning_rate": 4.592693489204558e-06, "loss": 0.1982, "step": 7552 }, { "epoch": 0.54, "grad_norm": 1.4008869405139501, "learning_rate": 4.591538945726159e-06, "loss": 0.1973, "step": 7553 }, { "epoch": 0.54, "grad_norm": 1.304243118961976, "learning_rate": 4.590384424172326e-06, "loss": 0.2125, "step": 7554 }, { "epoch": 0.54, "grad_norm": 1.342644413558855, "learning_rate": 4.589229924605031e-06, "loss": 0.1614, "step": 7555 }, { "epoch": 0.54, "grad_norm": 1.308308718935779, "learning_rate": 4.588075447086241e-06, "loss": 0.1505, "step": 7556 }, { "epoch": 0.54, "grad_norm": 1.265498558405239, "learning_rate": 4.586920991677925e-06, "loss": 0.1833, "step": 7557 }, { "epoch": 0.54, "grad_norm": 1.2584525625248717, "learning_rate": 4.585766558442048e-06, "loss": 0.1877, "step": 7558 }, { "epoch": 0.54, "grad_norm": 1.309145710531472, "learning_rate": 4.5846121474405785e-06, "loss": 0.1564, "step": 7559 }, { "epoch": 0.54, "grad_norm": 1.3969622179243657, "learning_rate": 4.583457758735477e-06, "loss": 0.1888, "step": 7560 }, { "epoch": 0.54, "grad_norm": 1.2793479149031906, "learning_rate": 4.582303392388708e-06, "loss": 0.1566, "step": 7561 }, { "epoch": 0.54, "grad_norm": 4.823089829673408, "learning_rate": 4.581149048462234e-06, "loss": 0.4043, "step": 7562 }, { "epoch": 0.54, "grad_norm": 1.2684776190768705, "learning_rate": 4.579994727018016e-06, "loss": 0.1395, "step": 7563 }, { "epoch": 0.54, "grad_norm": 1.2576126783819088, "learning_rate": 4.578840428118011e-06, "loss": 0.183, "step": 7564 }, { "epoch": 0.54, "grad_norm": 1.2702637318743508, "learning_rate": 4.57768615182418e-06, "loss": 0.1948, "step": 7565 }, { "epoch": 0.54, "grad_norm": 1.4805358862960174, "learning_rate": 4.576531898198476e-06, "loss": 0.1965, "step": 7566 }, { "epoch": 0.54, "grad_norm": 1.26378113181666, "learning_rate": 4.575377667302861e-06, "loss": 0.2153, "step": 7567 }, { "epoch": 0.54, "grad_norm": 1.6633196945167759, "learning_rate": 4.574223459199284e-06, "loss": 0.2223, "step": 7568 }, { "epoch": 0.54, "grad_norm": 1.376264941339721, "learning_rate": 4.573069273949699e-06, "loss": 0.194, "step": 7569 }, { "epoch": 0.54, "grad_norm": 1.2850630917590276, "learning_rate": 4.57191511161606e-06, "loss": 0.1705, "step": 7570 }, { "epoch": 0.54, "grad_norm": 1.511314043309628, "learning_rate": 4.570760972260319e-06, "loss": 0.1983, "step": 7571 }, { "epoch": 0.54, "grad_norm": 1.5023694424470495, "learning_rate": 4.5696068559444225e-06, "loss": 0.1911, "step": 7572 }, { "epoch": 0.54, "grad_norm": 1.254463503366863, "learning_rate": 4.56845276273032e-06, "loss": 0.2016, "step": 7573 }, { "epoch": 0.54, "grad_norm": 1.6821706405477255, "learning_rate": 4.567298692679958e-06, "loss": 0.2431, "step": 7574 }, { "epoch": 0.54, "grad_norm": 1.198669834798029, "learning_rate": 4.566144645855285e-06, "loss": 0.1715, "step": 7575 }, { "epoch": 0.54, "grad_norm": 1.4474491698567924, "learning_rate": 4.5649906223182425e-06, "loss": 0.1791, "step": 7576 }, { "epoch": 0.54, "grad_norm": 1.2769803923089442, "learning_rate": 4.5638366221307754e-06, "loss": 0.1705, "step": 7577 }, { "epoch": 0.54, "grad_norm": 1.3908191385630573, "learning_rate": 4.562682645354825e-06, "loss": 0.2149, "step": 7578 }, { "epoch": 0.54, "grad_norm": 1.2633180333909195, "learning_rate": 4.5615286920523345e-06, "loss": 0.189, "step": 7579 }, { "epoch": 0.54, "grad_norm": 1.2960606295975712, "learning_rate": 4.56037476228524e-06, "loss": 0.1673, "step": 7580 }, { "epoch": 0.54, "grad_norm": 1.2738513537287657, "learning_rate": 4.5592208561154825e-06, "loss": 0.2171, "step": 7581 }, { "epoch": 0.54, "grad_norm": 1.2724456045668626, "learning_rate": 4.5580669736049975e-06, "loss": 0.2003, "step": 7582 }, { "epoch": 0.54, "grad_norm": 1.1839303717154426, "learning_rate": 4.556913114815724e-06, "loss": 0.1692, "step": 7583 }, { "epoch": 0.54, "grad_norm": 1.4657497201831253, "learning_rate": 4.555759279809591e-06, "loss": 0.1638, "step": 7584 }, { "epoch": 0.54, "grad_norm": 5.563836419321114, "learning_rate": 4.554605468648537e-06, "loss": 0.5793, "step": 7585 }, { "epoch": 0.54, "grad_norm": 1.2695155758587455, "learning_rate": 4.553451681394492e-06, "loss": 0.2016, "step": 7586 }, { "epoch": 0.54, "grad_norm": 1.44041059713771, "learning_rate": 4.552297918109385e-06, "loss": 0.1619, "step": 7587 }, { "epoch": 0.54, "grad_norm": 1.5426582928806007, "learning_rate": 4.55114417885515e-06, "loss": 0.2542, "step": 7588 }, { "epoch": 0.54, "grad_norm": 1.2512214925917666, "learning_rate": 4.54999046369371e-06, "loss": 0.1885, "step": 7589 }, { "epoch": 0.54, "grad_norm": 1.278180505609115, "learning_rate": 4.548836772686995e-06, "loss": 0.1848, "step": 7590 }, { "epoch": 0.54, "grad_norm": 1.485658284115507, "learning_rate": 4.54768310589693e-06, "loss": 0.1983, "step": 7591 }, { "epoch": 0.54, "grad_norm": 1.2676608549655977, "learning_rate": 4.546529463385439e-06, "loss": 0.1561, "step": 7592 }, { "epoch": 0.54, "grad_norm": 1.2340633501414175, "learning_rate": 4.545375845214443e-06, "loss": 0.1747, "step": 7593 }, { "epoch": 0.54, "grad_norm": 1.2386382547596009, "learning_rate": 4.544222251445868e-06, "loss": 0.1839, "step": 7594 }, { "epoch": 0.54, "grad_norm": 1.6249778921223967, "learning_rate": 4.54306868214163e-06, "loss": 0.1797, "step": 7595 }, { "epoch": 0.54, "grad_norm": 1.436637589173101, "learning_rate": 4.541915137363651e-06, "loss": 0.1863, "step": 7596 }, { "epoch": 0.54, "grad_norm": 1.3826570737009853, "learning_rate": 4.5407616171738465e-06, "loss": 0.2105, "step": 7597 }, { "epoch": 0.54, "grad_norm": 1.392578278075935, "learning_rate": 4.539608121634136e-06, "loss": 0.1852, "step": 7598 }, { "epoch": 0.54, "grad_norm": 1.266900368224982, "learning_rate": 4.538454650806429e-06, "loss": 0.1965, "step": 7599 }, { "epoch": 0.54, "grad_norm": 1.253535870652874, "learning_rate": 4.537301204752647e-06, "loss": 0.2074, "step": 7600 }, { "epoch": 0.54, "grad_norm": 1.294760603910289, "learning_rate": 4.5361477835346945e-06, "loss": 0.1667, "step": 7601 }, { "epoch": 0.54, "grad_norm": 1.2931409383464647, "learning_rate": 4.534994387214489e-06, "loss": 0.2008, "step": 7602 }, { "epoch": 0.54, "grad_norm": 1.361316797668219, "learning_rate": 4.5338410158539345e-06, "loss": 0.1645, "step": 7603 }, { "epoch": 0.54, "grad_norm": 1.2760861034435151, "learning_rate": 4.532687669514944e-06, "loss": 0.1299, "step": 7604 }, { "epoch": 0.54, "grad_norm": 1.2325380237400623, "learning_rate": 4.531534348259422e-06, "loss": 0.1746, "step": 7605 }, { "epoch": 0.54, "grad_norm": 1.5145411503839208, "learning_rate": 4.530381052149278e-06, "loss": 0.1769, "step": 7606 }, { "epoch": 0.54, "grad_norm": 1.2809818861273423, "learning_rate": 4.52922778124641e-06, "loss": 0.1699, "step": 7607 }, { "epoch": 0.54, "grad_norm": 1.3025998291888405, "learning_rate": 4.528074535612725e-06, "loss": 0.2047, "step": 7608 }, { "epoch": 0.54, "grad_norm": 7.7381682788340695, "learning_rate": 4.526921315310124e-06, "loss": 0.7048, "step": 7609 }, { "epoch": 0.54, "grad_norm": 1.5126814182320447, "learning_rate": 4.52576812040051e-06, "loss": 0.1934, "step": 7610 }, { "epoch": 0.54, "grad_norm": 1.2100335473942259, "learning_rate": 4.524614950945776e-06, "loss": 0.1834, "step": 7611 }, { "epoch": 0.54, "grad_norm": 5.4140016368185035, "learning_rate": 4.523461807007825e-06, "loss": 0.7099, "step": 7612 }, { "epoch": 0.54, "grad_norm": 1.1806095883348924, "learning_rate": 4.522308688648549e-06, "loss": 0.1752, "step": 7613 }, { "epoch": 0.54, "grad_norm": 1.2539706363369176, "learning_rate": 4.521155595929847e-06, "loss": 0.2141, "step": 7614 }, { "epoch": 0.54, "grad_norm": 1.4794814984763651, "learning_rate": 4.520002528913609e-06, "loss": 0.2121, "step": 7615 }, { "epoch": 0.54, "grad_norm": 1.2805312934785509, "learning_rate": 4.518849487661727e-06, "loss": 0.1746, "step": 7616 }, { "epoch": 0.54, "grad_norm": 1.3741800419092545, "learning_rate": 4.517696472236095e-06, "loss": 0.1752, "step": 7617 }, { "epoch": 0.54, "grad_norm": 1.432854104065085, "learning_rate": 4.516543482698599e-06, "loss": 0.1679, "step": 7618 }, { "epoch": 0.54, "grad_norm": 1.257439467499808, "learning_rate": 4.51539051911113e-06, "loss": 0.2049, "step": 7619 }, { "epoch": 0.55, "grad_norm": 1.3339940107768395, "learning_rate": 4.514237581535571e-06, "loss": 0.2032, "step": 7620 }, { "epoch": 0.55, "grad_norm": 4.852518096986699, "learning_rate": 4.51308467003381e-06, "loss": 0.6243, "step": 7621 }, { "epoch": 0.55, "grad_norm": 1.3656007815350302, "learning_rate": 4.511931784667728e-06, "loss": 0.2076, "step": 7622 }, { "epoch": 0.55, "grad_norm": 1.5777214263900718, "learning_rate": 4.510778925499211e-06, "loss": 0.2184, "step": 7623 }, { "epoch": 0.55, "grad_norm": 1.2257423908898823, "learning_rate": 4.509626092590136e-06, "loss": 0.1719, "step": 7624 }, { "epoch": 0.55, "grad_norm": 1.385941713122498, "learning_rate": 4.508473286002385e-06, "loss": 0.1949, "step": 7625 }, { "epoch": 0.55, "grad_norm": 1.3782898211041559, "learning_rate": 4.507320505797833e-06, "loss": 0.21, "step": 7626 }, { "epoch": 0.55, "grad_norm": 1.3109291431526804, "learning_rate": 4.506167752038363e-06, "loss": 0.159, "step": 7627 }, { "epoch": 0.55, "grad_norm": 1.3410471853579888, "learning_rate": 4.505015024785843e-06, "loss": 0.1748, "step": 7628 }, { "epoch": 0.55, "grad_norm": 1.3847826119841864, "learning_rate": 4.503862324102152e-06, "loss": 0.1889, "step": 7629 }, { "epoch": 0.55, "grad_norm": 1.3148862569750293, "learning_rate": 4.502709650049158e-06, "loss": 0.1607, "step": 7630 }, { "epoch": 0.55, "grad_norm": 7.540855081073755, "learning_rate": 4.501557002688738e-06, "loss": 0.5838, "step": 7631 }, { "epoch": 0.55, "grad_norm": 1.570811409071723, "learning_rate": 4.5004043820827544e-06, "loss": 0.2174, "step": 7632 }, { "epoch": 0.55, "grad_norm": 1.276373187646043, "learning_rate": 4.4992517882930805e-06, "loss": 0.1573, "step": 7633 }, { "epoch": 0.55, "grad_norm": 1.3324442199690818, "learning_rate": 4.49809922138158e-06, "loss": 0.1836, "step": 7634 }, { "epoch": 0.55, "grad_norm": 1.2190857705188312, "learning_rate": 4.496946681410121e-06, "loss": 0.1587, "step": 7635 }, { "epoch": 0.55, "grad_norm": 1.2294148997243335, "learning_rate": 4.495794168440564e-06, "loss": 0.1709, "step": 7636 }, { "epoch": 0.55, "grad_norm": 1.2031429042361435, "learning_rate": 4.494641682534774e-06, "loss": 0.1809, "step": 7637 }, { "epoch": 0.55, "grad_norm": 1.2322880093262887, "learning_rate": 4.49348922375461e-06, "loss": 0.1682, "step": 7638 }, { "epoch": 0.55, "grad_norm": 1.2378188690572838, "learning_rate": 4.492336792161934e-06, "loss": 0.1555, "step": 7639 }, { "epoch": 0.55, "grad_norm": 4.178347627996018, "learning_rate": 4.4911843878186e-06, "loss": 0.6494, "step": 7640 }, { "epoch": 0.55, "grad_norm": 4.618804740303724, "learning_rate": 4.490032010786468e-06, "loss": 0.4912, "step": 7641 }, { "epoch": 0.55, "grad_norm": 1.3047088388363703, "learning_rate": 4.48887966112739e-06, "loss": 0.2243, "step": 7642 }, { "epoch": 0.55, "grad_norm": 5.332952914112738, "learning_rate": 4.4877273389032235e-06, "loss": 0.5274, "step": 7643 }, { "epoch": 0.55, "grad_norm": 1.396317172745299, "learning_rate": 4.486575044175817e-06, "loss": 0.2087, "step": 7644 }, { "epoch": 0.55, "grad_norm": 4.83831864704029, "learning_rate": 4.485422777007022e-06, "loss": 0.6053, "step": 7645 }, { "epoch": 0.55, "grad_norm": 1.2944023438719365, "learning_rate": 4.484270537458688e-06, "loss": 0.2014, "step": 7646 }, { "epoch": 0.55, "grad_norm": 1.3810259300977725, "learning_rate": 4.4831183255926645e-06, "loss": 0.1871, "step": 7647 }, { "epoch": 0.55, "grad_norm": 1.4064722014692068, "learning_rate": 4.481966141470794e-06, "loss": 0.2015, "step": 7648 }, { "epoch": 0.55, "grad_norm": 5.528260472698978, "learning_rate": 4.480813985154923e-06, "loss": 0.6004, "step": 7649 }, { "epoch": 0.55, "grad_norm": 1.194047236025288, "learning_rate": 4.479661856706896e-06, "loss": 0.1484, "step": 7650 }, { "epoch": 0.55, "grad_norm": 1.3495871399599806, "learning_rate": 4.478509756188551e-06, "loss": 0.2414, "step": 7651 }, { "epoch": 0.55, "grad_norm": 1.2853243161128571, "learning_rate": 4.477357683661734e-06, "loss": 0.1789, "step": 7652 }, { "epoch": 0.55, "grad_norm": 1.3553009704959187, "learning_rate": 4.476205639188277e-06, "loss": 0.1771, "step": 7653 }, { "epoch": 0.55, "grad_norm": 5.385990867737515, "learning_rate": 4.475053622830022e-06, "loss": 0.6294, "step": 7654 }, { "epoch": 0.55, "grad_norm": 1.1536496408226689, "learning_rate": 4.473901634648802e-06, "loss": 0.1575, "step": 7655 }, { "epoch": 0.55, "grad_norm": 1.3129452913116322, "learning_rate": 4.472749674706454e-06, "loss": 0.1753, "step": 7656 }, { "epoch": 0.55, "grad_norm": 1.7466696746898132, "learning_rate": 4.471597743064807e-06, "loss": 0.1916, "step": 7657 }, { "epoch": 0.55, "grad_norm": 1.2692794703412886, "learning_rate": 4.470445839785695e-06, "loss": 0.1549, "step": 7658 }, { "epoch": 0.55, "grad_norm": 1.2739099082518146, "learning_rate": 4.469293964930945e-06, "loss": 0.1983, "step": 7659 }, { "epoch": 0.55, "grad_norm": 1.1100022327362709, "learning_rate": 4.468142118562389e-06, "loss": 0.1641, "step": 7660 }, { "epoch": 0.55, "grad_norm": 1.5158421991195783, "learning_rate": 4.466990300741849e-06, "loss": 0.2573, "step": 7661 }, { "epoch": 0.55, "grad_norm": 1.3749292982859778, "learning_rate": 4.465838511531153e-06, "loss": 0.2253, "step": 7662 }, { "epoch": 0.55, "grad_norm": 1.3289800539419694, "learning_rate": 4.464686750992123e-06, "loss": 0.1992, "step": 7663 }, { "epoch": 0.55, "grad_norm": 1.2374468961601377, "learning_rate": 4.463535019186583e-06, "loss": 0.1941, "step": 7664 }, { "epoch": 0.55, "grad_norm": 1.2065443527052542, "learning_rate": 4.4623833161763504e-06, "loss": 0.1776, "step": 7665 }, { "epoch": 0.55, "grad_norm": 1.3738863431450612, "learning_rate": 4.461231642023247e-06, "loss": 0.1824, "step": 7666 }, { "epoch": 0.55, "grad_norm": 1.2259418504058657, "learning_rate": 4.460079996789086e-06, "loss": 0.1918, "step": 7667 }, { "epoch": 0.55, "grad_norm": 5.130615184541869, "learning_rate": 4.458928380535689e-06, "loss": 0.5615, "step": 7668 }, { "epoch": 0.55, "grad_norm": 1.2043430761350364, "learning_rate": 4.457776793324865e-06, "loss": 0.1846, "step": 7669 }, { "epoch": 0.55, "grad_norm": 1.3500739793228955, "learning_rate": 4.4566252352184295e-06, "loss": 0.2588, "step": 7670 }, { "epoch": 0.55, "grad_norm": 1.375942822650095, "learning_rate": 4.455473706278191e-06, "loss": 0.2346, "step": 7671 }, { "epoch": 0.55, "grad_norm": 1.4918395470846977, "learning_rate": 4.454322206565964e-06, "loss": 0.2179, "step": 7672 }, { "epoch": 0.55, "grad_norm": 1.4866011387046485, "learning_rate": 4.45317073614355e-06, "loss": 0.2003, "step": 7673 }, { "epoch": 0.55, "grad_norm": 4.775925502580284, "learning_rate": 4.452019295072759e-06, "loss": 0.5148, "step": 7674 }, { "epoch": 0.55, "grad_norm": 1.1478124023425795, "learning_rate": 4.450867883415395e-06, "loss": 0.1391, "step": 7675 }, { "epoch": 0.55, "grad_norm": 1.3199512792412464, "learning_rate": 4.449716501233264e-06, "loss": 0.2183, "step": 7676 }, { "epoch": 0.55, "grad_norm": 1.5675912938937444, "learning_rate": 4.448565148588162e-06, "loss": 0.2414, "step": 7677 }, { "epoch": 0.55, "grad_norm": 1.2442251995241251, "learning_rate": 4.447413825541893e-06, "loss": 0.1486, "step": 7678 }, { "epoch": 0.55, "grad_norm": 1.3126986408180328, "learning_rate": 4.446262532156255e-06, "loss": 0.2054, "step": 7679 }, { "epoch": 0.55, "grad_norm": 1.1866863748913334, "learning_rate": 4.4451112684930424e-06, "loss": 0.1577, "step": 7680 }, { "epoch": 0.55, "grad_norm": 1.2554538463734985, "learning_rate": 4.443960034614056e-06, "loss": 0.1682, "step": 7681 }, { "epoch": 0.55, "grad_norm": 1.091769917561093, "learning_rate": 4.442808830581083e-06, "loss": 0.1736, "step": 7682 }, { "epoch": 0.55, "grad_norm": 1.3551510520020444, "learning_rate": 4.44165765645592e-06, "loss": 0.2204, "step": 7683 }, { "epoch": 0.55, "grad_norm": 1.3215884841792749, "learning_rate": 4.4405065123003545e-06, "loss": 0.1812, "step": 7684 }, { "epoch": 0.55, "grad_norm": 1.486951679518656, "learning_rate": 4.439355398176179e-06, "loss": 0.1765, "step": 7685 }, { "epoch": 0.55, "grad_norm": 1.357060061908719, "learning_rate": 4.438204314145176e-06, "loss": 0.1813, "step": 7686 }, { "epoch": 0.55, "grad_norm": 1.35465376411796, "learning_rate": 4.437053260269134e-06, "loss": 0.1777, "step": 7687 }, { "epoch": 0.55, "grad_norm": 1.4573037479351156, "learning_rate": 4.435902236609837e-06, "loss": 0.2284, "step": 7688 }, { "epoch": 0.55, "grad_norm": 1.4884091768505359, "learning_rate": 4.434751243229068e-06, "loss": 0.2153, "step": 7689 }, { "epoch": 0.55, "grad_norm": 1.2969874536365351, "learning_rate": 4.433600280188605e-06, "loss": 0.1948, "step": 7690 }, { "epoch": 0.55, "grad_norm": 1.1516612674513016, "learning_rate": 4.432449347550229e-06, "loss": 0.1574, "step": 7691 }, { "epoch": 0.55, "grad_norm": 1.3876325812048094, "learning_rate": 4.431298445375717e-06, "loss": 0.225, "step": 7692 }, { "epoch": 0.55, "grad_norm": 1.283482116884178, "learning_rate": 4.430147573726846e-06, "loss": 0.1547, "step": 7693 }, { "epoch": 0.55, "grad_norm": 1.5369157680070666, "learning_rate": 4.428996732665389e-06, "loss": 0.2003, "step": 7694 }, { "epoch": 0.55, "grad_norm": 1.3606996292738782, "learning_rate": 4.4278459222531186e-06, "loss": 0.1775, "step": 7695 }, { "epoch": 0.55, "grad_norm": 1.3572762170629935, "learning_rate": 4.426695142551806e-06, "loss": 0.2054, "step": 7696 }, { "epoch": 0.55, "grad_norm": 1.199715928021808, "learning_rate": 4.425544393623222e-06, "loss": 0.1577, "step": 7697 }, { "epoch": 0.55, "grad_norm": 1.3356654721674448, "learning_rate": 4.424393675529131e-06, "loss": 0.1517, "step": 7698 }, { "epoch": 0.55, "grad_norm": 1.212667352098118, "learning_rate": 4.4232429883313025e-06, "loss": 0.1446, "step": 7699 }, { "epoch": 0.55, "grad_norm": 1.5512404322431161, "learning_rate": 4.422092332091497e-06, "loss": 0.2168, "step": 7700 }, { "epoch": 0.55, "grad_norm": 1.3176614496514911, "learning_rate": 4.4209417068714815e-06, "loss": 0.2102, "step": 7701 }, { "epoch": 0.55, "grad_norm": 1.3230262363448326, "learning_rate": 4.419791112733012e-06, "loss": 0.2017, "step": 7702 }, { "epoch": 0.55, "grad_norm": 1.3877718872339417, "learning_rate": 4.418640549737853e-06, "loss": 0.1845, "step": 7703 }, { "epoch": 0.55, "grad_norm": 1.1812623000476177, "learning_rate": 4.417490017947757e-06, "loss": 0.1626, "step": 7704 }, { "epoch": 0.55, "grad_norm": 1.339011679846205, "learning_rate": 4.416339517424485e-06, "loss": 0.1712, "step": 7705 }, { "epoch": 0.55, "grad_norm": 1.456604196950525, "learning_rate": 4.415189048229788e-06, "loss": 0.1765, "step": 7706 }, { "epoch": 0.55, "grad_norm": 1.5838390171455192, "learning_rate": 4.414038610425421e-06, "loss": 0.2243, "step": 7707 }, { "epoch": 0.55, "grad_norm": 7.648551389807109, "learning_rate": 4.412888204073131e-06, "loss": 0.6188, "step": 7708 }, { "epoch": 0.55, "grad_norm": 1.3215850028136427, "learning_rate": 4.411737829234671e-06, "loss": 0.193, "step": 7709 }, { "epoch": 0.55, "grad_norm": 1.398305488628446, "learning_rate": 4.4105874859717865e-06, "loss": 0.2211, "step": 7710 }, { "epoch": 0.55, "grad_norm": 1.2868866505526957, "learning_rate": 4.4094371743462265e-06, "loss": 0.1759, "step": 7711 }, { "epoch": 0.55, "grad_norm": 1.3518860568322544, "learning_rate": 4.40828689441973e-06, "loss": 0.2202, "step": 7712 }, { "epoch": 0.55, "grad_norm": 1.3471251694251827, "learning_rate": 4.407136646254042e-06, "loss": 0.2406, "step": 7713 }, { "epoch": 0.55, "grad_norm": 1.2948724788396258, "learning_rate": 4.405986429910906e-06, "loss": 0.1811, "step": 7714 }, { "epoch": 0.55, "grad_norm": 1.2456302034219648, "learning_rate": 4.404836245452056e-06, "loss": 0.1546, "step": 7715 }, { "epoch": 0.55, "grad_norm": 1.1099816376976996, "learning_rate": 4.403686092939235e-06, "loss": 0.1794, "step": 7716 }, { "epoch": 0.55, "grad_norm": 1.378698276222049, "learning_rate": 4.402535972434172e-06, "loss": 0.2076, "step": 7717 }, { "epoch": 0.55, "grad_norm": 1.3467082509390003, "learning_rate": 4.4013858839986074e-06, "loss": 0.1444, "step": 7718 }, { "epoch": 0.55, "grad_norm": 1.4001862795204687, "learning_rate": 4.400235827694268e-06, "loss": 0.1816, "step": 7719 }, { "epoch": 0.55, "grad_norm": 4.204061581459019, "learning_rate": 4.399085803582889e-06, "loss": 0.5724, "step": 7720 }, { "epoch": 0.55, "grad_norm": 1.479328599941078, "learning_rate": 4.397935811726195e-06, "loss": 0.2071, "step": 7721 }, { "epoch": 0.55, "grad_norm": 1.290825754864189, "learning_rate": 4.396785852185916e-06, "loss": 0.1622, "step": 7722 }, { "epoch": 0.55, "grad_norm": 1.2936411470425098, "learning_rate": 4.395635925023774e-06, "loss": 0.1625, "step": 7723 }, { "epoch": 0.55, "grad_norm": 5.3736115250499425, "learning_rate": 4.3944860303014975e-06, "loss": 0.6355, "step": 7724 }, { "epoch": 0.55, "grad_norm": 1.42249702890177, "learning_rate": 4.3933361680808025e-06, "loss": 0.1811, "step": 7725 }, { "epoch": 0.55, "grad_norm": 1.3360099910398058, "learning_rate": 4.392186338423413e-06, "loss": 0.2064, "step": 7726 }, { "epoch": 0.55, "grad_norm": 1.2890383273613972, "learning_rate": 4.391036541391046e-06, "loss": 0.1537, "step": 7727 }, { "epoch": 0.55, "grad_norm": 1.3937289595335949, "learning_rate": 4.38988677704542e-06, "loss": 0.1767, "step": 7728 }, { "epoch": 0.55, "grad_norm": 1.3668161056545798, "learning_rate": 4.388737045448245e-06, "loss": 0.1605, "step": 7729 }, { "epoch": 0.55, "grad_norm": 1.3630058465619108, "learning_rate": 4.387587346661239e-06, "loss": 0.1607, "step": 7730 }, { "epoch": 0.55, "grad_norm": 1.1323630667342475, "learning_rate": 4.386437680746109e-06, "loss": 0.169, "step": 7731 }, { "epoch": 0.55, "grad_norm": 1.347872470377837, "learning_rate": 4.385288047764571e-06, "loss": 0.2148, "step": 7732 }, { "epoch": 0.55, "grad_norm": 1.3327594204550057, "learning_rate": 4.3841384477783245e-06, "loss": 0.1572, "step": 7733 }, { "epoch": 0.55, "grad_norm": 1.3378062657827072, "learning_rate": 4.382988880849081e-06, "loss": 0.2009, "step": 7734 }, { "epoch": 0.55, "grad_norm": 1.1823208737770983, "learning_rate": 4.3818393470385415e-06, "loss": 0.1612, "step": 7735 }, { "epoch": 0.55, "grad_norm": 1.4422132982690001, "learning_rate": 4.380689846408413e-06, "loss": 0.1764, "step": 7736 }, { "epoch": 0.55, "grad_norm": 1.1332765231931248, "learning_rate": 4.379540379020391e-06, "loss": 0.1631, "step": 7737 }, { "epoch": 0.55, "grad_norm": 1.1555485927762972, "learning_rate": 4.378390944936177e-06, "loss": 0.1563, "step": 7738 }, { "epoch": 0.55, "grad_norm": 1.337241472251638, "learning_rate": 4.3772415442174665e-06, "loss": 0.182, "step": 7739 }, { "epoch": 0.55, "grad_norm": 1.1483837410748234, "learning_rate": 4.3760921769259585e-06, "loss": 0.1672, "step": 7740 }, { "epoch": 0.55, "grad_norm": 1.4549152001977281, "learning_rate": 4.374942843123341e-06, "loss": 0.2017, "step": 7741 }, { "epoch": 0.55, "grad_norm": 1.3961642380538595, "learning_rate": 4.37379354287131e-06, "loss": 0.2127, "step": 7742 }, { "epoch": 0.55, "grad_norm": 1.3451628456972229, "learning_rate": 4.372644276231553e-06, "loss": 0.1704, "step": 7743 }, { "epoch": 0.55, "grad_norm": 1.415441103104087, "learning_rate": 4.371495043265759e-06, "loss": 0.2084, "step": 7744 }, { "epoch": 0.55, "grad_norm": 1.330154176818541, "learning_rate": 4.370345844035615e-06, "loss": 0.257, "step": 7745 }, { "epoch": 0.55, "grad_norm": 1.254171297260292, "learning_rate": 4.369196678602803e-06, "loss": 0.2006, "step": 7746 }, { "epoch": 0.55, "grad_norm": 1.2749663368075461, "learning_rate": 4.368047547029009e-06, "loss": 0.1961, "step": 7747 }, { "epoch": 0.55, "grad_norm": 2.105323084636687, "learning_rate": 4.366898449375911e-06, "loss": 0.1929, "step": 7748 }, { "epoch": 0.55, "grad_norm": 1.2488997881719994, "learning_rate": 4.365749385705191e-06, "loss": 0.1454, "step": 7749 }, { "epoch": 0.55, "grad_norm": 1.2795606981307899, "learning_rate": 4.364600356078521e-06, "loss": 0.1682, "step": 7750 }, { "epoch": 0.55, "grad_norm": 1.2876196983727848, "learning_rate": 4.363451360557583e-06, "loss": 0.1636, "step": 7751 }, { "epoch": 0.55, "grad_norm": 1.1734999732280171, "learning_rate": 4.362302399204044e-06, "loss": 0.1787, "step": 7752 }, { "epoch": 0.55, "grad_norm": 1.326313846416316, "learning_rate": 4.361153472079581e-06, "loss": 0.1969, "step": 7753 }, { "epoch": 0.55, "grad_norm": 6.8639253908071165, "learning_rate": 4.36000457924586e-06, "loss": 0.5312, "step": 7754 }, { "epoch": 0.55, "grad_norm": 1.3786000634223605, "learning_rate": 4.3588557207645525e-06, "loss": 0.164, "step": 7755 }, { "epoch": 0.55, "grad_norm": 1.2963185734271767, "learning_rate": 4.35770689669732e-06, "loss": 0.225, "step": 7756 }, { "epoch": 0.55, "grad_norm": 1.2929559467648828, "learning_rate": 4.356558107105833e-06, "loss": 0.1952, "step": 7757 }, { "epoch": 0.55, "grad_norm": 1.215835579170114, "learning_rate": 4.355409352051748e-06, "loss": 0.1226, "step": 7758 }, { "epoch": 0.56, "grad_norm": 1.2669085324837415, "learning_rate": 4.354260631596729e-06, "loss": 0.1759, "step": 7759 }, { "epoch": 0.56, "grad_norm": 1.2025736754959084, "learning_rate": 4.353111945802433e-06, "loss": 0.2027, "step": 7760 }, { "epoch": 0.56, "grad_norm": 1.3456872739614776, "learning_rate": 4.35196329473052e-06, "loss": 0.2108, "step": 7761 }, { "epoch": 0.56, "grad_norm": 1.3532575574840335, "learning_rate": 4.350814678442641e-06, "loss": 0.2267, "step": 7762 }, { "epoch": 0.56, "grad_norm": 23.413922799173346, "learning_rate": 4.349666097000452e-06, "loss": 0.6456, "step": 7763 }, { "epoch": 0.56, "grad_norm": 1.4268280648441312, "learning_rate": 4.348517550465602e-06, "loss": 0.1927, "step": 7764 }, { "epoch": 0.56, "grad_norm": 1.5365798544856561, "learning_rate": 4.347369038899744e-06, "loss": 0.1907, "step": 7765 }, { "epoch": 0.56, "grad_norm": 1.327622353099601, "learning_rate": 4.3462205623645205e-06, "loss": 0.1696, "step": 7766 }, { "epoch": 0.56, "grad_norm": 1.402951663062515, "learning_rate": 4.345072120921581e-06, "loss": 0.192, "step": 7767 }, { "epoch": 0.56, "grad_norm": 1.2576826856790329, "learning_rate": 4.343923714632567e-06, "loss": 0.1675, "step": 7768 }, { "epoch": 0.56, "grad_norm": 1.5436845533994012, "learning_rate": 4.342775343559125e-06, "loss": 0.1949, "step": 7769 }, { "epoch": 0.56, "grad_norm": 1.2834247045929448, "learning_rate": 4.341627007762888e-06, "loss": 0.2001, "step": 7770 }, { "epoch": 0.56, "grad_norm": 1.3750610398448708, "learning_rate": 4.3404787073054995e-06, "loss": 0.1624, "step": 7771 }, { "epoch": 0.56, "grad_norm": 1.309295656554821, "learning_rate": 4.3393304422485925e-06, "loss": 0.1844, "step": 7772 }, { "epoch": 0.56, "grad_norm": 1.186387836871521, "learning_rate": 4.338182212653805e-06, "loss": 0.1632, "step": 7773 }, { "epoch": 0.56, "grad_norm": 5.150770753829921, "learning_rate": 4.337034018582764e-06, "loss": 0.6821, "step": 7774 }, { "epoch": 0.56, "grad_norm": 1.2610148343072487, "learning_rate": 4.335885860097106e-06, "loss": 0.2073, "step": 7775 }, { "epoch": 0.56, "grad_norm": 1.2447074238968894, "learning_rate": 4.334737737258454e-06, "loss": 0.1487, "step": 7776 }, { "epoch": 0.56, "grad_norm": 1.5440712397083656, "learning_rate": 4.333589650128437e-06, "loss": 0.1988, "step": 7777 }, { "epoch": 0.56, "grad_norm": 1.1871687601520655, "learning_rate": 4.332441598768684e-06, "loss": 0.1262, "step": 7778 }, { "epoch": 0.56, "grad_norm": 5.1565367206625226, "learning_rate": 4.33129358324081e-06, "loss": 0.7563, "step": 7779 }, { "epoch": 0.56, "grad_norm": 1.2738855503154205, "learning_rate": 4.3301456036064415e-06, "loss": 0.1503, "step": 7780 }, { "epoch": 0.56, "grad_norm": 1.4075016927803767, "learning_rate": 4.328997659927196e-06, "loss": 0.1526, "step": 7781 }, { "epoch": 0.56, "grad_norm": 1.4648039679916371, "learning_rate": 4.3278497522646905e-06, "loss": 0.2169, "step": 7782 }, { "epoch": 0.56, "grad_norm": 1.2573430201257563, "learning_rate": 4.326701880680539e-06, "loss": 0.2135, "step": 7783 }, { "epoch": 0.56, "grad_norm": 1.3898256534259048, "learning_rate": 4.325554045236357e-06, "loss": 0.2042, "step": 7784 }, { "epoch": 0.56, "grad_norm": 1.3543516767008708, "learning_rate": 4.324406245993752e-06, "loss": 0.2101, "step": 7785 }, { "epoch": 0.56, "grad_norm": 5.226943921383533, "learning_rate": 4.323258483014339e-06, "loss": 0.575, "step": 7786 }, { "epoch": 0.56, "grad_norm": 1.4314763589505322, "learning_rate": 4.322110756359719e-06, "loss": 0.1576, "step": 7787 }, { "epoch": 0.56, "grad_norm": 1.409908566610597, "learning_rate": 4.320963066091503e-06, "loss": 0.2192, "step": 7788 }, { "epoch": 0.56, "grad_norm": 1.2814594217472992, "learning_rate": 4.3198154122712895e-06, "loss": 0.185, "step": 7789 }, { "epoch": 0.56, "grad_norm": 1.122784697364961, "learning_rate": 4.318667794960685e-06, "loss": 0.173, "step": 7790 }, { "epoch": 0.56, "grad_norm": 1.3435530736413335, "learning_rate": 4.317520214221284e-06, "loss": 0.1928, "step": 7791 }, { "epoch": 0.56, "grad_norm": 4.393642304912983, "learning_rate": 4.316372670114689e-06, "loss": 0.5051, "step": 7792 }, { "epoch": 0.56, "grad_norm": 1.1967346924798075, "learning_rate": 4.315225162702491e-06, "loss": 0.1717, "step": 7793 }, { "epoch": 0.56, "grad_norm": 1.2881015550384576, "learning_rate": 4.314077692046288e-06, "loss": 0.194, "step": 7794 }, { "epoch": 0.56, "grad_norm": 1.5559467502100368, "learning_rate": 4.312930258207668e-06, "loss": 0.2058, "step": 7795 }, { "epoch": 0.56, "grad_norm": 6.694538469782157, "learning_rate": 4.311782861248223e-06, "loss": 0.6859, "step": 7796 }, { "epoch": 0.56, "grad_norm": 1.2535198148762579, "learning_rate": 4.310635501229537e-06, "loss": 0.1765, "step": 7797 }, { "epoch": 0.56, "grad_norm": 1.367019054527165, "learning_rate": 4.3094881782132025e-06, "loss": 0.1704, "step": 7798 }, { "epoch": 0.56, "grad_norm": 1.3147584899923683, "learning_rate": 4.308340892260798e-06, "loss": 0.2129, "step": 7799 }, { "epoch": 0.56, "grad_norm": 1.3463262510839447, "learning_rate": 4.307193643433907e-06, "loss": 0.1665, "step": 7800 }, { "epoch": 0.56, "grad_norm": 1.3796274317058435, "learning_rate": 4.306046431794107e-06, "loss": 0.1914, "step": 7801 }, { "epoch": 0.56, "grad_norm": 1.1781154971752665, "learning_rate": 4.304899257402979e-06, "loss": 0.1393, "step": 7802 }, { "epoch": 0.56, "grad_norm": 1.170996813416576, "learning_rate": 4.303752120322096e-06, "loss": 0.155, "step": 7803 }, { "epoch": 0.56, "grad_norm": 1.3239728894785163, "learning_rate": 4.302605020613036e-06, "loss": 0.1902, "step": 7804 }, { "epoch": 0.56, "grad_norm": 1.5258568606904732, "learning_rate": 4.301457958337365e-06, "loss": 0.2113, "step": 7805 }, { "epoch": 0.56, "grad_norm": 1.2684662439207885, "learning_rate": 4.300310933556657e-06, "loss": 0.1863, "step": 7806 }, { "epoch": 0.56, "grad_norm": 1.1479658599919356, "learning_rate": 4.299163946332477e-06, "loss": 0.1695, "step": 7807 }, { "epoch": 0.56, "grad_norm": 1.3493721063311346, "learning_rate": 4.298016996726393e-06, "loss": 0.172, "step": 7808 }, { "epoch": 0.56, "grad_norm": 1.1644324805896755, "learning_rate": 4.29687008479997e-06, "loss": 0.1933, "step": 7809 }, { "epoch": 0.56, "grad_norm": 1.3667846801454973, "learning_rate": 4.295723210614764e-06, "loss": 0.1986, "step": 7810 }, { "epoch": 0.56, "grad_norm": 1.388402057871557, "learning_rate": 4.29457637423234e-06, "loss": 0.2285, "step": 7811 }, { "epoch": 0.56, "grad_norm": 1.1189473036902835, "learning_rate": 4.2934295757142526e-06, "loss": 0.1391, "step": 7812 }, { "epoch": 0.56, "grad_norm": 1.4363010927709214, "learning_rate": 4.29228281512206e-06, "loss": 0.226, "step": 7813 }, { "epoch": 0.56, "grad_norm": 1.405228917955243, "learning_rate": 4.291136092517313e-06, "loss": 0.1985, "step": 7814 }, { "epoch": 0.56, "grad_norm": 1.3095548882209582, "learning_rate": 4.289989407961566e-06, "loss": 0.1808, "step": 7815 }, { "epoch": 0.56, "grad_norm": 1.2926219544885713, "learning_rate": 4.288842761516364e-06, "loss": 0.1778, "step": 7816 }, { "epoch": 0.56, "grad_norm": 1.449507286740925, "learning_rate": 4.287696153243261e-06, "loss": 0.1971, "step": 7817 }, { "epoch": 0.56, "grad_norm": 1.3323738032018198, "learning_rate": 4.286549583203796e-06, "loss": 0.1796, "step": 7818 }, { "epoch": 0.56, "grad_norm": 1.3987228609516833, "learning_rate": 4.285403051459516e-06, "loss": 0.2065, "step": 7819 }, { "epoch": 0.56, "grad_norm": 1.4951066627375353, "learning_rate": 4.28425655807196e-06, "loss": 0.2353, "step": 7820 }, { "epoch": 0.56, "grad_norm": 1.3420235913424998, "learning_rate": 4.283110103102671e-06, "loss": 0.188, "step": 7821 }, { "epoch": 0.56, "grad_norm": 1.6448780035576838, "learning_rate": 4.281963686613181e-06, "loss": 0.2538, "step": 7822 }, { "epoch": 0.56, "grad_norm": 1.5207459318515872, "learning_rate": 4.280817308665029e-06, "loss": 0.2122, "step": 7823 }, { "epoch": 0.56, "grad_norm": 1.3138608424173452, "learning_rate": 4.2796709693197455e-06, "loss": 0.1841, "step": 7824 }, { "epoch": 0.56, "grad_norm": 1.306407149009988, "learning_rate": 4.278524668638865e-06, "loss": 0.2036, "step": 7825 }, { "epoch": 0.56, "grad_norm": 1.318060315295124, "learning_rate": 4.277378406683912e-06, "loss": 0.1834, "step": 7826 }, { "epoch": 0.56, "grad_norm": 1.3468972692614767, "learning_rate": 4.276232183516416e-06, "loss": 0.1377, "step": 7827 }, { "epoch": 0.56, "grad_norm": 6.544812529316365, "learning_rate": 4.2750859991979e-06, "loss": 0.5618, "step": 7828 }, { "epoch": 0.56, "grad_norm": 1.2590485770516662, "learning_rate": 4.27393985378989e-06, "loss": 0.1929, "step": 7829 }, { "epoch": 0.56, "grad_norm": 5.169790051404965, "learning_rate": 4.272793747353902e-06, "loss": 0.6183, "step": 7830 }, { "epoch": 0.56, "grad_norm": 1.7547877431539443, "learning_rate": 4.2716476799514574e-06, "loss": 0.2129, "step": 7831 }, { "epoch": 0.56, "grad_norm": 1.4132170230477592, "learning_rate": 4.2705016516440705e-06, "loss": 0.1819, "step": 7832 }, { "epoch": 0.56, "grad_norm": 1.450838731543012, "learning_rate": 4.26935566249326e-06, "loss": 0.2044, "step": 7833 }, { "epoch": 0.56, "grad_norm": 1.24003940348491, "learning_rate": 4.268209712560531e-06, "loss": 0.1764, "step": 7834 }, { "epoch": 0.56, "grad_norm": 1.4179993261516857, "learning_rate": 4.2670638019074e-06, "loss": 0.2479, "step": 7835 }, { "epoch": 0.56, "grad_norm": 1.4821622761917188, "learning_rate": 4.265917930595371e-06, "loss": 0.2044, "step": 7836 }, { "epoch": 0.56, "grad_norm": 1.2765371592707864, "learning_rate": 4.264772098685952e-06, "loss": 0.1788, "step": 7837 }, { "epoch": 0.56, "grad_norm": 1.3862953257444357, "learning_rate": 4.263626306240645e-06, "loss": 0.2, "step": 7838 }, { "epoch": 0.56, "grad_norm": 1.1633404260035345, "learning_rate": 4.262480553320954e-06, "loss": 0.1488, "step": 7839 }, { "epoch": 0.56, "grad_norm": 1.241820596330925, "learning_rate": 4.261334839988375e-06, "loss": 0.1776, "step": 7840 }, { "epoch": 0.56, "grad_norm": 1.3396847449718594, "learning_rate": 4.260189166304407e-06, "loss": 0.199, "step": 7841 }, { "epoch": 0.56, "grad_norm": 1.2939209144666226, "learning_rate": 4.259043532330547e-06, "loss": 0.2097, "step": 7842 }, { "epoch": 0.56, "grad_norm": 1.4467526616411694, "learning_rate": 4.257897938128286e-06, "loss": 0.1672, "step": 7843 }, { "epoch": 0.56, "grad_norm": 1.1315376499932335, "learning_rate": 4.256752383759115e-06, "loss": 0.13, "step": 7844 }, { "epoch": 0.56, "grad_norm": 1.2743685374316147, "learning_rate": 4.255606869284523e-06, "loss": 0.1548, "step": 7845 }, { "epoch": 0.56, "grad_norm": 1.5428863183943617, "learning_rate": 4.254461394765999e-06, "loss": 0.1879, "step": 7846 }, { "epoch": 0.56, "grad_norm": 1.1800055995918612, "learning_rate": 4.253315960265024e-06, "loss": 0.176, "step": 7847 }, { "epoch": 0.56, "grad_norm": 1.5319742339025262, "learning_rate": 4.252170565843082e-06, "loss": 0.1715, "step": 7848 }, { "epoch": 0.56, "grad_norm": 1.2124224970404862, "learning_rate": 4.251025211561652e-06, "loss": 0.1654, "step": 7849 }, { "epoch": 0.56, "grad_norm": 1.3708294611253513, "learning_rate": 4.249879897482215e-06, "loss": 0.1508, "step": 7850 }, { "epoch": 0.56, "grad_norm": 1.3850634602537883, "learning_rate": 4.2487346236662435e-06, "loss": 0.1941, "step": 7851 }, { "epoch": 0.56, "grad_norm": 1.3717528097252316, "learning_rate": 4.247589390175214e-06, "loss": 0.1897, "step": 7852 }, { "epoch": 0.56, "grad_norm": 1.3860924625725142, "learning_rate": 4.246444197070596e-06, "loss": 0.1916, "step": 7853 }, { "epoch": 0.56, "grad_norm": 1.4362901853748111, "learning_rate": 4.2452990444138605e-06, "loss": 0.192, "step": 7854 }, { "epoch": 0.56, "grad_norm": 1.3173784577725747, "learning_rate": 4.244153932266473e-06, "loss": 0.1908, "step": 7855 }, { "epoch": 0.56, "grad_norm": 1.337733009455502, "learning_rate": 4.243008860689901e-06, "loss": 0.1968, "step": 7856 }, { "epoch": 0.56, "grad_norm": 1.8255030076008663, "learning_rate": 4.241863829745604e-06, "loss": 0.2001, "step": 7857 }, { "epoch": 0.56, "grad_norm": 1.3257029755817045, "learning_rate": 4.240718839495047e-06, "loss": 0.1626, "step": 7858 }, { "epoch": 0.56, "grad_norm": 1.4366838151419672, "learning_rate": 4.239573889999685e-06, "loss": 0.2299, "step": 7859 }, { "epoch": 0.56, "grad_norm": 1.3306153270602634, "learning_rate": 4.2384289813209754e-06, "loss": 0.1845, "step": 7860 }, { "epoch": 0.56, "grad_norm": 1.393184694831203, "learning_rate": 4.2372841135203725e-06, "loss": 0.193, "step": 7861 }, { "epoch": 0.56, "grad_norm": 1.2091667154866548, "learning_rate": 4.23613928665933e-06, "loss": 0.153, "step": 7862 }, { "epoch": 0.56, "grad_norm": 1.5236115475384011, "learning_rate": 4.234994500799293e-06, "loss": 0.2106, "step": 7863 }, { "epoch": 0.56, "grad_norm": 1.1456291973797892, "learning_rate": 4.233849756001714e-06, "loss": 0.1351, "step": 7864 }, { "epoch": 0.56, "grad_norm": 1.4553273025869284, "learning_rate": 4.232705052328036e-06, "loss": 0.191, "step": 7865 }, { "epoch": 0.56, "grad_norm": 1.184378848796924, "learning_rate": 4.231560389839703e-06, "loss": 0.1749, "step": 7866 }, { "epoch": 0.56, "grad_norm": 1.19820089275979, "learning_rate": 4.230415768598154e-06, "loss": 0.1729, "step": 7867 }, { "epoch": 0.56, "grad_norm": 1.2446325652360506, "learning_rate": 4.229271188664831e-06, "loss": 0.1654, "step": 7868 }, { "epoch": 0.56, "grad_norm": 1.2761419371451115, "learning_rate": 4.228126650101166e-06, "loss": 0.1694, "step": 7869 }, { "epoch": 0.56, "grad_norm": 4.407126955601497, "learning_rate": 4.226982152968599e-06, "loss": 0.5106, "step": 7870 }, { "epoch": 0.56, "grad_norm": 1.41692687117347, "learning_rate": 4.2258376973285566e-06, "loss": 0.1767, "step": 7871 }, { "epoch": 0.56, "grad_norm": 1.3069620435077771, "learning_rate": 4.2246932832424715e-06, "loss": 0.1804, "step": 7872 }, { "epoch": 0.56, "grad_norm": 1.2661105537629833, "learning_rate": 4.223548910771772e-06, "loss": 0.1603, "step": 7873 }, { "epoch": 0.56, "grad_norm": 1.3133761922692821, "learning_rate": 4.22240457997788e-06, "loss": 0.1699, "step": 7874 }, { "epoch": 0.56, "grad_norm": 1.3405997983644282, "learning_rate": 4.2212602909222235e-06, "loss": 0.2206, "step": 7875 }, { "epoch": 0.56, "grad_norm": 1.3197282998639068, "learning_rate": 4.220116043666219e-06, "loss": 0.1965, "step": 7876 }, { "epoch": 0.56, "grad_norm": 1.1809412372634818, "learning_rate": 4.218971838271289e-06, "loss": 0.1452, "step": 7877 }, { "epoch": 0.56, "grad_norm": 1.5855816284552255, "learning_rate": 4.217827674798845e-06, "loss": 0.2504, "step": 7878 }, { "epoch": 0.56, "grad_norm": 1.3674772086218194, "learning_rate": 4.216683553310308e-06, "loss": 0.192, "step": 7879 }, { "epoch": 0.56, "grad_norm": 1.2389672126651905, "learning_rate": 4.2155394738670814e-06, "loss": 0.175, "step": 7880 }, { "epoch": 0.56, "grad_norm": 1.3784447823987964, "learning_rate": 4.214395436530582e-06, "loss": 0.1678, "step": 7881 }, { "epoch": 0.56, "grad_norm": 6.769303510929099, "learning_rate": 4.213251441362214e-06, "loss": 0.5595, "step": 7882 }, { "epoch": 0.56, "grad_norm": 1.5267078788007482, "learning_rate": 4.212107488423383e-06, "loss": 0.2294, "step": 7883 }, { "epoch": 0.56, "grad_norm": 1.3939647565274438, "learning_rate": 4.210963577775492e-06, "loss": 0.1863, "step": 7884 }, { "epoch": 0.56, "grad_norm": 1.2255321542013293, "learning_rate": 4.209819709479942e-06, "loss": 0.1586, "step": 7885 }, { "epoch": 0.56, "grad_norm": 1.3430518485877314, "learning_rate": 4.208675883598129e-06, "loss": 0.2046, "step": 7886 }, { "epoch": 0.56, "grad_norm": 1.3353008012062375, "learning_rate": 4.207532100191454e-06, "loss": 0.1894, "step": 7887 }, { "epoch": 0.56, "grad_norm": 1.0338987165288436, "learning_rate": 4.206388359321304e-06, "loss": 0.0915, "step": 7888 }, { "epoch": 0.56, "grad_norm": 6.364735435305999, "learning_rate": 4.205244661049076e-06, "loss": 0.5883, "step": 7889 }, { "epoch": 0.56, "grad_norm": 1.3327251070751325, "learning_rate": 4.204101005436156e-06, "loss": 0.1891, "step": 7890 }, { "epoch": 0.56, "grad_norm": 1.3704925212680386, "learning_rate": 4.202957392543934e-06, "loss": 0.1904, "step": 7891 }, { "epoch": 0.56, "grad_norm": 1.1906147218188272, "learning_rate": 4.2018138224337915e-06, "loss": 0.1583, "step": 7892 }, { "epoch": 0.56, "grad_norm": 1.277297639707543, "learning_rate": 4.200670295167112e-06, "loss": 0.1768, "step": 7893 }, { "epoch": 0.56, "grad_norm": 1.3005763464178688, "learning_rate": 4.199526810805274e-06, "loss": 0.2023, "step": 7894 }, { "epoch": 0.56, "grad_norm": 1.3231971534917217, "learning_rate": 4.1983833694096595e-06, "loss": 0.1778, "step": 7895 }, { "epoch": 0.56, "grad_norm": 1.3513145261692916, "learning_rate": 4.19723997104164e-06, "loss": 0.1629, "step": 7896 }, { "epoch": 0.56, "grad_norm": 1.1766817696503347, "learning_rate": 4.19609661576259e-06, "loss": 0.1388, "step": 7897 }, { "epoch": 0.56, "grad_norm": 1.3214014213663674, "learning_rate": 4.194953303633877e-06, "loss": 0.1861, "step": 7898 }, { "epoch": 0.57, "grad_norm": 1.5039734449338713, "learning_rate": 4.193810034716877e-06, "loss": 0.2223, "step": 7899 }, { "epoch": 0.57, "grad_norm": 1.1293393454396783, "learning_rate": 4.192666809072948e-06, "loss": 0.156, "step": 7900 }, { "epoch": 0.57, "grad_norm": 1.2753459212792047, "learning_rate": 4.19152362676346e-06, "loss": 0.1708, "step": 7901 }, { "epoch": 0.57, "grad_norm": 1.4984188778770722, "learning_rate": 4.19038048784977e-06, "loss": 0.1941, "step": 7902 }, { "epoch": 0.57, "grad_norm": 1.4545772199429274, "learning_rate": 4.18923739239324e-06, "loss": 0.2005, "step": 7903 }, { "epoch": 0.57, "grad_norm": 3.4479870673565096, "learning_rate": 4.1880943404552234e-06, "loss": 0.6561, "step": 7904 }, { "epoch": 0.57, "grad_norm": 1.280202969892627, "learning_rate": 4.186951332097078e-06, "loss": 0.1854, "step": 7905 }, { "epoch": 0.57, "grad_norm": 1.4854053724482144, "learning_rate": 4.185808367380158e-06, "loss": 0.2417, "step": 7906 }, { "epoch": 0.57, "grad_norm": 1.3396469893932181, "learning_rate": 4.184665446365807e-06, "loss": 0.1784, "step": 7907 }, { "epoch": 0.57, "grad_norm": 1.3376596206176443, "learning_rate": 4.183522569115377e-06, "loss": 0.2057, "step": 7908 }, { "epoch": 0.57, "grad_norm": 1.2703552540015433, "learning_rate": 4.1823797356902106e-06, "loss": 0.197, "step": 7909 }, { "epoch": 0.57, "grad_norm": 1.4920973790879564, "learning_rate": 4.181236946151654e-06, "loss": 0.1911, "step": 7910 }, { "epoch": 0.57, "grad_norm": 1.2324281286720502, "learning_rate": 4.1800942005610434e-06, "loss": 0.1991, "step": 7911 }, { "epoch": 0.57, "grad_norm": 1.2509754125826382, "learning_rate": 4.17895149897972e-06, "loss": 0.1887, "step": 7912 }, { "epoch": 0.57, "grad_norm": 1.52545610270568, "learning_rate": 4.177808841469017e-06, "loss": 0.1904, "step": 7913 }, { "epoch": 0.57, "grad_norm": 1.241717214721998, "learning_rate": 4.176666228090271e-06, "loss": 0.166, "step": 7914 }, { "epoch": 0.57, "grad_norm": 1.433130260215562, "learning_rate": 4.175523658904808e-06, "loss": 0.2296, "step": 7915 }, { "epoch": 0.57, "grad_norm": 1.4693810354116887, "learning_rate": 4.174381133973962e-06, "loss": 0.1594, "step": 7916 }, { "epoch": 0.57, "grad_norm": 1.2478683249709333, "learning_rate": 4.173238653359055e-06, "loss": 0.198, "step": 7917 }, { "epoch": 0.57, "grad_norm": 1.7727414218985758, "learning_rate": 4.172096217121415e-06, "loss": 0.2117, "step": 7918 }, { "epoch": 0.57, "grad_norm": 1.2256931107201692, "learning_rate": 4.170953825322359e-06, "loss": 0.1873, "step": 7919 }, { "epoch": 0.57, "grad_norm": 1.2817622715348236, "learning_rate": 4.1698114780232085e-06, "loss": 0.1991, "step": 7920 }, { "epoch": 0.57, "grad_norm": 1.4205988934077478, "learning_rate": 4.168669175285279e-06, "loss": 0.2008, "step": 7921 }, { "epoch": 0.57, "grad_norm": 4.817551560208983, "learning_rate": 4.167526917169887e-06, "loss": 0.5361, "step": 7922 }, { "epoch": 0.57, "grad_norm": 5.6471396835925685, "learning_rate": 4.166384703738341e-06, "loss": 0.6133, "step": 7923 }, { "epoch": 0.57, "grad_norm": 1.4071471051372368, "learning_rate": 4.165242535051955e-06, "loss": 0.204, "step": 7924 }, { "epoch": 0.57, "grad_norm": 1.4788873020805953, "learning_rate": 4.164100411172029e-06, "loss": 0.2295, "step": 7925 }, { "epoch": 0.57, "grad_norm": 4.5855535231841245, "learning_rate": 4.162958332159877e-06, "loss": 0.4392, "step": 7926 }, { "epoch": 0.57, "grad_norm": 1.404526040877423, "learning_rate": 4.161816298076794e-06, "loss": 0.1915, "step": 7927 }, { "epoch": 0.57, "grad_norm": 12.796283605702229, "learning_rate": 4.160674308984081e-06, "loss": 0.502, "step": 7928 }, { "epoch": 0.57, "grad_norm": 1.3115609994851167, "learning_rate": 4.159532364943038e-06, "loss": 0.2264, "step": 7929 }, { "epoch": 0.57, "grad_norm": 1.3253801997827823, "learning_rate": 4.15839046601496e-06, "loss": 0.2208, "step": 7930 }, { "epoch": 0.57, "grad_norm": 1.1734463179433283, "learning_rate": 4.157248612261136e-06, "loss": 0.1474, "step": 7931 }, { "epoch": 0.57, "grad_norm": 1.483046417977614, "learning_rate": 4.15610680374286e-06, "loss": 0.1966, "step": 7932 }, { "epoch": 0.57, "grad_norm": 1.313854712878686, "learning_rate": 4.154965040521416e-06, "loss": 0.203, "step": 7933 }, { "epoch": 0.57, "grad_norm": 1.2791878050048069, "learning_rate": 4.153823322658094e-06, "loss": 0.183, "step": 7934 }, { "epoch": 0.57, "grad_norm": 1.2983990141004578, "learning_rate": 4.152681650214173e-06, "loss": 0.1822, "step": 7935 }, { "epoch": 0.57, "grad_norm": 1.406445226434695, "learning_rate": 4.151540023250934e-06, "loss": 0.1849, "step": 7936 }, { "epoch": 0.57, "grad_norm": 1.251403758514166, "learning_rate": 4.150398441829658e-06, "loss": 0.1879, "step": 7937 }, { "epoch": 0.57, "grad_norm": 1.434084048647608, "learning_rate": 4.149256906011616e-06, "loss": 0.213, "step": 7938 }, { "epoch": 0.57, "grad_norm": 1.54561678851188, "learning_rate": 4.148115415858086e-06, "loss": 0.1986, "step": 7939 }, { "epoch": 0.57, "grad_norm": 1.3178779597857702, "learning_rate": 4.146973971430333e-06, "loss": 0.1786, "step": 7940 }, { "epoch": 0.57, "grad_norm": 1.5637452029816388, "learning_rate": 4.1458325727896305e-06, "loss": 0.2245, "step": 7941 }, { "epoch": 0.57, "grad_norm": 1.5246863484283584, "learning_rate": 4.1446912199972414e-06, "loss": 0.2243, "step": 7942 }, { "epoch": 0.57, "grad_norm": 1.2428416470249841, "learning_rate": 4.143549913114431e-06, "loss": 0.1641, "step": 7943 }, { "epoch": 0.57, "grad_norm": 1.5906078498177274, "learning_rate": 4.142408652202458e-06, "loss": 0.2085, "step": 7944 }, { "epoch": 0.57, "grad_norm": 4.268357267014983, "learning_rate": 4.141267437322581e-06, "loss": 0.5253, "step": 7945 }, { "epoch": 0.57, "grad_norm": 1.3334862992255274, "learning_rate": 4.140126268536055e-06, "loss": 0.1982, "step": 7946 }, { "epoch": 0.57, "grad_norm": 1.2054809623702682, "learning_rate": 4.1389851459041385e-06, "loss": 0.1556, "step": 7947 }, { "epoch": 0.57, "grad_norm": 1.3285097239945738, "learning_rate": 4.137844069488076e-06, "loss": 0.2055, "step": 7948 }, { "epoch": 0.57, "grad_norm": 1.3500727721884291, "learning_rate": 4.136703039349119e-06, "loss": 0.1687, "step": 7949 }, { "epoch": 0.57, "grad_norm": 1.3049991646383885, "learning_rate": 4.135562055548513e-06, "loss": 0.1816, "step": 7950 }, { "epoch": 0.57, "grad_norm": 1.4075096808638043, "learning_rate": 4.1344211181475035e-06, "loss": 0.1868, "step": 7951 }, { "epoch": 0.57, "grad_norm": 1.3382121044835877, "learning_rate": 4.133280227207328e-06, "loss": 0.1757, "step": 7952 }, { "epoch": 0.57, "grad_norm": 4.69681926087116, "learning_rate": 4.132139382789226e-06, "loss": 0.3713, "step": 7953 }, { "epoch": 0.57, "grad_norm": 1.2100555266042954, "learning_rate": 4.130998584954435e-06, "loss": 0.1843, "step": 7954 }, { "epoch": 0.57, "grad_norm": 6.700673045415729, "learning_rate": 4.129857833764189e-06, "loss": 0.4342, "step": 7955 }, { "epoch": 0.57, "grad_norm": 1.5162515140724764, "learning_rate": 4.128717129279715e-06, "loss": 0.2077, "step": 7956 }, { "epoch": 0.57, "grad_norm": 1.321707210297209, "learning_rate": 4.127576471562246e-06, "loss": 0.1821, "step": 7957 }, { "epoch": 0.57, "grad_norm": 1.494633770501327, "learning_rate": 4.126435860673004e-06, "loss": 0.2127, "step": 7958 }, { "epoch": 0.57, "grad_norm": 1.392136285695771, "learning_rate": 4.1252952966732176e-06, "loss": 0.1852, "step": 7959 }, { "epoch": 0.57, "grad_norm": 1.9601008314685433, "learning_rate": 4.124154779624101e-06, "loss": 0.1874, "step": 7960 }, { "epoch": 0.57, "grad_norm": 1.1478828898684157, "learning_rate": 4.123014309586879e-06, "loss": 0.1701, "step": 7961 }, { "epoch": 0.57, "grad_norm": 1.175464910557783, "learning_rate": 4.121873886622763e-06, "loss": 0.1606, "step": 7962 }, { "epoch": 0.57, "grad_norm": 1.523393907111342, "learning_rate": 4.120733510792971e-06, "loss": 0.234, "step": 7963 }, { "epoch": 0.57, "grad_norm": 1.2525333325543262, "learning_rate": 4.119593182158707e-06, "loss": 0.1563, "step": 7964 }, { "epoch": 0.57, "grad_norm": 1.6693110910218578, "learning_rate": 4.118452900781187e-06, "loss": 0.1715, "step": 7965 }, { "epoch": 0.57, "grad_norm": 1.4175473564526528, "learning_rate": 4.11731266672161e-06, "loss": 0.1551, "step": 7966 }, { "epoch": 0.57, "grad_norm": 5.0092382877351485, "learning_rate": 4.116172480041185e-06, "loss": 0.5857, "step": 7967 }, { "epoch": 0.57, "grad_norm": 8.87387150018845, "learning_rate": 4.115032340801108e-06, "loss": 0.6907, "step": 7968 }, { "epoch": 0.57, "grad_norm": 1.3489692214501614, "learning_rate": 4.1138922490625795e-06, "loss": 0.1971, "step": 7969 }, { "epoch": 0.57, "grad_norm": 1.2338561327277546, "learning_rate": 4.112752204886796e-06, "loss": 0.1704, "step": 7970 }, { "epoch": 0.57, "grad_norm": 1.0519680459001135, "learning_rate": 4.111612208334947e-06, "loss": 0.1503, "step": 7971 }, { "epoch": 0.57, "grad_norm": 1.2340214520677595, "learning_rate": 4.110472259468228e-06, "loss": 0.1712, "step": 7972 }, { "epoch": 0.57, "grad_norm": 1.3488628829857716, "learning_rate": 4.109332358347823e-06, "loss": 0.158, "step": 7973 }, { "epoch": 0.57, "grad_norm": 1.2202628139469027, "learning_rate": 4.108192505034918e-06, "loss": 0.1622, "step": 7974 }, { "epoch": 0.57, "grad_norm": 1.4482880537063456, "learning_rate": 4.107052699590697e-06, "loss": 0.2027, "step": 7975 }, { "epoch": 0.57, "grad_norm": 1.2679028821731093, "learning_rate": 4.10591294207634e-06, "loss": 0.1843, "step": 7976 }, { "epoch": 0.57, "grad_norm": 1.1605989004383006, "learning_rate": 4.104773232553023e-06, "loss": 0.1754, "step": 7977 }, { "epoch": 0.57, "grad_norm": 1.2354085821261411, "learning_rate": 4.103633571081923e-06, "loss": 0.2066, "step": 7978 }, { "epoch": 0.57, "grad_norm": 1.3146859083101374, "learning_rate": 4.102493957724212e-06, "loss": 0.1649, "step": 7979 }, { "epoch": 0.57, "grad_norm": 1.3926103391711135, "learning_rate": 4.101354392541061e-06, "loss": 0.1752, "step": 7980 }, { "epoch": 0.57, "grad_norm": 1.4399301264220867, "learning_rate": 4.100214875593634e-06, "loss": 0.1985, "step": 7981 }, { "epoch": 0.57, "grad_norm": 10.434037916203813, "learning_rate": 4.0990754069430986e-06, "loss": 0.4859, "step": 7982 }, { "epoch": 0.57, "grad_norm": 6.7090365227332995, "learning_rate": 4.097935986650615e-06, "loss": 0.5872, "step": 7983 }, { "epoch": 0.57, "grad_norm": 1.2298491572833825, "learning_rate": 4.096796614777347e-06, "loss": 0.1535, "step": 7984 }, { "epoch": 0.57, "grad_norm": 1.2930728204229172, "learning_rate": 4.095657291384445e-06, "loss": 0.1666, "step": 7985 }, { "epoch": 0.57, "grad_norm": 1.3141402802806195, "learning_rate": 4.094518016533069e-06, "loss": 0.1949, "step": 7986 }, { "epoch": 0.57, "grad_norm": 1.2227180620732314, "learning_rate": 4.093378790284368e-06, "loss": 0.22, "step": 7987 }, { "epoch": 0.57, "grad_norm": 1.1940926963020462, "learning_rate": 4.092239612699492e-06, "loss": 0.1886, "step": 7988 }, { "epoch": 0.57, "grad_norm": 1.2099501678305673, "learning_rate": 4.0911004838395865e-06, "loss": 0.2041, "step": 7989 }, { "epoch": 0.57, "grad_norm": 1.5610292457570594, "learning_rate": 4.089961403765797e-06, "loss": 0.1967, "step": 7990 }, { "epoch": 0.57, "grad_norm": 1.4452804821195513, "learning_rate": 4.088822372539263e-06, "loss": 0.1708, "step": 7991 }, { "epoch": 0.57, "grad_norm": 1.4017633775219112, "learning_rate": 4.0876833902211255e-06, "loss": 0.1946, "step": 7992 }, { "epoch": 0.57, "grad_norm": 1.3602119672908588, "learning_rate": 4.086544456872518e-06, "loss": 0.2035, "step": 7993 }, { "epoch": 0.57, "grad_norm": 1.2365785098726378, "learning_rate": 4.085405572554576e-06, "loss": 0.1783, "step": 7994 }, { "epoch": 0.57, "grad_norm": 1.3170550956466496, "learning_rate": 4.084266737328428e-06, "loss": 0.2118, "step": 7995 }, { "epoch": 0.57, "grad_norm": 1.2162572244941738, "learning_rate": 4.083127951255205e-06, "loss": 0.151, "step": 7996 }, { "epoch": 0.57, "grad_norm": 1.3777326752473358, "learning_rate": 4.0819892143960295e-06, "loss": 0.2027, "step": 7997 }, { "epoch": 0.57, "grad_norm": 5.389027794930302, "learning_rate": 4.080850526812028e-06, "loss": 0.5152, "step": 7998 }, { "epoch": 0.57, "grad_norm": 1.5275908080749763, "learning_rate": 4.079711888564317e-06, "loss": 0.2105, "step": 7999 }, { "epoch": 0.57, "grad_norm": 1.4858716107749628, "learning_rate": 4.078573299714014e-06, "loss": 0.2131, "step": 8000 }, { "epoch": 0.57, "grad_norm": 1.3348155744430208, "learning_rate": 4.07743476032224e-06, "loss": 0.1812, "step": 8001 }, { "epoch": 0.57, "grad_norm": 1.3260179942010921, "learning_rate": 4.076296270450099e-06, "loss": 0.1723, "step": 8002 }, { "epoch": 0.57, "grad_norm": 1.3173780937817325, "learning_rate": 4.075157830158708e-06, "loss": 0.192, "step": 8003 }, { "epoch": 0.57, "grad_norm": 1.2736999158418605, "learning_rate": 4.074019439509168e-06, "loss": 0.1812, "step": 8004 }, { "epoch": 0.57, "grad_norm": 1.5648938691796492, "learning_rate": 4.072881098562587e-06, "loss": 0.2136, "step": 8005 }, { "epoch": 0.57, "grad_norm": 8.375060297619061, "learning_rate": 4.071742807380064e-06, "loss": 0.7304, "step": 8006 }, { "epoch": 0.57, "grad_norm": 1.2253732040821348, "learning_rate": 4.070604566022701e-06, "loss": 0.1421, "step": 8007 }, { "epoch": 0.57, "grad_norm": 1.2496259566027623, "learning_rate": 4.06946637455159e-06, "loss": 0.2016, "step": 8008 }, { "epoch": 0.57, "grad_norm": 1.5253626822798474, "learning_rate": 4.06832823302783e-06, "loss": 0.1992, "step": 8009 }, { "epoch": 0.57, "grad_norm": 1.349783521995507, "learning_rate": 4.067190141512507e-06, "loss": 0.1908, "step": 8010 }, { "epoch": 0.57, "grad_norm": 6.585683000418182, "learning_rate": 4.066052100066714e-06, "loss": 0.7002, "step": 8011 }, { "epoch": 0.57, "grad_norm": 1.343799989317747, "learning_rate": 4.0649141087515314e-06, "loss": 0.2479, "step": 8012 }, { "epoch": 0.57, "grad_norm": 1.4227749973611183, "learning_rate": 4.0637761676280466e-06, "loss": 0.1886, "step": 8013 }, { "epoch": 0.57, "grad_norm": 1.353880241541804, "learning_rate": 4.062638276757335e-06, "loss": 0.2051, "step": 8014 }, { "epoch": 0.57, "grad_norm": 1.4603702696167313, "learning_rate": 4.0615004362004794e-06, "loss": 0.1788, "step": 8015 }, { "epoch": 0.57, "grad_norm": 1.2036066407916044, "learning_rate": 4.06036264601855e-06, "loss": 0.1668, "step": 8016 }, { "epoch": 0.57, "grad_norm": 1.3950374878702316, "learning_rate": 4.059224906272622e-06, "loss": 0.1853, "step": 8017 }, { "epoch": 0.57, "grad_norm": 1.4212078004498507, "learning_rate": 4.0580872170237625e-06, "loss": 0.1665, "step": 8018 }, { "epoch": 0.57, "grad_norm": 1.4003850997340643, "learning_rate": 4.056949578333042e-06, "loss": 0.2202, "step": 8019 }, { "epoch": 0.57, "grad_norm": 1.4151876329871242, "learning_rate": 4.055811990261518e-06, "loss": 0.1626, "step": 8020 }, { "epoch": 0.57, "grad_norm": 1.2139987561570011, "learning_rate": 4.054674452870257e-06, "loss": 0.1488, "step": 8021 }, { "epoch": 0.57, "grad_norm": 1.1906911161957223, "learning_rate": 4.053536966220315e-06, "loss": 0.1832, "step": 8022 }, { "epoch": 0.57, "grad_norm": 1.3537434961775903, "learning_rate": 4.05239953037275e-06, "loss": 0.2104, "step": 8023 }, { "epoch": 0.57, "grad_norm": 1.2421183430747667, "learning_rate": 4.051262145388612e-06, "loss": 0.184, "step": 8024 }, { "epoch": 0.57, "grad_norm": 6.085963051612594, "learning_rate": 4.050124811328954e-06, "loss": 0.4591, "step": 8025 }, { "epoch": 0.57, "grad_norm": 1.3012486911818981, "learning_rate": 4.048987528254821e-06, "loss": 0.1789, "step": 8026 }, { "epoch": 0.57, "grad_norm": 1.1895873176699898, "learning_rate": 4.047850296227261e-06, "loss": 0.1828, "step": 8027 }, { "epoch": 0.57, "grad_norm": 1.1096143505769158, "learning_rate": 4.046713115307314e-06, "loss": 0.1436, "step": 8028 }, { "epoch": 0.57, "grad_norm": 1.2968522325621459, "learning_rate": 4.04557598555602e-06, "loss": 0.1793, "step": 8029 }, { "epoch": 0.57, "grad_norm": 1.4138172044396728, "learning_rate": 4.044438907034414e-06, "loss": 0.1963, "step": 8030 }, { "epoch": 0.57, "grad_norm": 4.897422582296905, "learning_rate": 4.043301879803533e-06, "loss": 0.6714, "step": 8031 }, { "epoch": 0.57, "grad_norm": 1.1567264509301127, "learning_rate": 4.042164903924405e-06, "loss": 0.1605, "step": 8032 }, { "epoch": 0.57, "grad_norm": 1.2698959271277395, "learning_rate": 4.041027979458058e-06, "loss": 0.1757, "step": 8033 }, { "epoch": 0.57, "grad_norm": 1.3540541364026093, "learning_rate": 4.039891106465522e-06, "loss": 0.1885, "step": 8034 }, { "epoch": 0.57, "grad_norm": 1.363769317103242, "learning_rate": 4.038754285007816e-06, "loss": 0.1741, "step": 8035 }, { "epoch": 0.57, "grad_norm": 1.2921362979153503, "learning_rate": 4.037617515145963e-06, "loss": 0.2098, "step": 8036 }, { "epoch": 0.57, "grad_norm": 1.0576040317485342, "learning_rate": 4.036480796940976e-06, "loss": 0.152, "step": 8037 }, { "epoch": 0.57, "grad_norm": 1.2185878074217285, "learning_rate": 4.035344130453874e-06, "loss": 0.2024, "step": 8038 }, { "epoch": 0.58, "grad_norm": 1.281999057338327, "learning_rate": 4.034207515745665e-06, "loss": 0.2045, "step": 8039 }, { "epoch": 0.58, "grad_norm": 4.653684805842123, "learning_rate": 4.033070952877362e-06, "loss": 0.6067, "step": 8040 }, { "epoch": 0.58, "grad_norm": 1.2625739425973685, "learning_rate": 4.031934441909966e-06, "loss": 0.1998, "step": 8041 }, { "epoch": 0.58, "grad_norm": 1.1989897813654178, "learning_rate": 4.030797982904485e-06, "loss": 0.158, "step": 8042 }, { "epoch": 0.58, "grad_norm": 1.2853595408482956, "learning_rate": 4.029661575921916e-06, "loss": 0.159, "step": 8043 }, { "epoch": 0.58, "grad_norm": 1.269686840951969, "learning_rate": 4.028525221023262e-06, "loss": 0.1937, "step": 8044 }, { "epoch": 0.58, "grad_norm": 1.3806153845902618, "learning_rate": 4.027388918269511e-06, "loss": 0.1678, "step": 8045 }, { "epoch": 0.58, "grad_norm": 3.862354304764796, "learning_rate": 4.026252667721661e-06, "loss": 0.6053, "step": 8046 }, { "epoch": 0.58, "grad_norm": 1.1862540278605633, "learning_rate": 4.025116469440698e-06, "loss": 0.186, "step": 8047 }, { "epoch": 0.58, "grad_norm": 1.2753619649190167, "learning_rate": 4.023980323487612e-06, "loss": 0.1542, "step": 8048 }, { "epoch": 0.58, "grad_norm": 1.0770854449796035, "learning_rate": 4.022844229923383e-06, "loss": 0.1578, "step": 8049 }, { "epoch": 0.58, "grad_norm": 7.169596180769709, "learning_rate": 4.021708188808994e-06, "loss": 0.7305, "step": 8050 }, { "epoch": 0.58, "grad_norm": 1.3479432669443594, "learning_rate": 4.0205722002054225e-06, "loss": 0.179, "step": 8051 }, { "epoch": 0.58, "grad_norm": 1.4178520896072546, "learning_rate": 4.019436264173646e-06, "loss": 0.202, "step": 8052 }, { "epoch": 0.58, "grad_norm": 1.3796429959079035, "learning_rate": 4.018300380774633e-06, "loss": 0.1599, "step": 8053 }, { "epoch": 0.58, "grad_norm": 6.027281622390009, "learning_rate": 4.017164550069357e-06, "loss": 0.6118, "step": 8054 }, { "epoch": 0.58, "grad_norm": 1.2433805319592304, "learning_rate": 4.016028772118783e-06, "loss": 0.1677, "step": 8055 }, { "epoch": 0.58, "grad_norm": 1.293486956367564, "learning_rate": 4.014893046983876e-06, "loss": 0.171, "step": 8056 }, { "epoch": 0.58, "grad_norm": 1.3338623027846672, "learning_rate": 4.013757374725596e-06, "loss": 0.186, "step": 8057 }, { "epoch": 0.58, "grad_norm": 1.4656626020410528, "learning_rate": 4.012621755404901e-06, "loss": 0.1975, "step": 8058 }, { "epoch": 0.58, "grad_norm": 1.2284020301944203, "learning_rate": 4.011486189082749e-06, "loss": 0.1739, "step": 8059 }, { "epoch": 0.58, "grad_norm": 1.3087009547084283, "learning_rate": 4.010350675820091e-06, "loss": 0.1886, "step": 8060 }, { "epoch": 0.58, "grad_norm": 1.200274072139302, "learning_rate": 4.009215215677876e-06, "loss": 0.1684, "step": 8061 }, { "epoch": 0.58, "grad_norm": 1.3350982159954299, "learning_rate": 4.0080798087170534e-06, "loss": 0.209, "step": 8062 }, { "epoch": 0.58, "grad_norm": 1.2955833773718224, "learning_rate": 4.006944454998564e-06, "loss": 0.179, "step": 8063 }, { "epoch": 0.58, "grad_norm": 1.252919418797698, "learning_rate": 4.005809154583351e-06, "loss": 0.1873, "step": 8064 }, { "epoch": 0.58, "grad_norm": 1.296132965140545, "learning_rate": 4.004673907532356e-06, "loss": 0.1716, "step": 8065 }, { "epoch": 0.58, "grad_norm": 1.3375059727991874, "learning_rate": 4.003538713906509e-06, "loss": 0.1872, "step": 8066 }, { "epoch": 0.58, "grad_norm": 1.4604908562601489, "learning_rate": 4.002403573766746e-06, "loss": 0.1937, "step": 8067 }, { "epoch": 0.58, "grad_norm": 1.225260264180561, "learning_rate": 4.001268487173995e-06, "loss": 0.1508, "step": 8068 }, { "epoch": 0.58, "grad_norm": 1.523383920068594, "learning_rate": 4.000133454189185e-06, "loss": 0.2159, "step": 8069 }, { "epoch": 0.58, "grad_norm": 1.3131023549424223, "learning_rate": 3.998998474873239e-06, "loss": 0.1708, "step": 8070 }, { "epoch": 0.58, "grad_norm": 1.182638511348339, "learning_rate": 3.997863549287077e-06, "loss": 0.1372, "step": 8071 }, { "epoch": 0.58, "grad_norm": 1.4261854727241108, "learning_rate": 3.996728677491619e-06, "loss": 0.1828, "step": 8072 }, { "epoch": 0.58, "grad_norm": 1.4427880523211525, "learning_rate": 3.995593859547781e-06, "loss": 0.2306, "step": 8073 }, { "epoch": 0.58, "grad_norm": 5.727590997230231, "learning_rate": 3.9944590955164716e-06, "loss": 0.5859, "step": 8074 }, { "epoch": 0.58, "grad_norm": 1.5250499473045989, "learning_rate": 3.993324385458605e-06, "loss": 0.1999, "step": 8075 }, { "epoch": 0.58, "grad_norm": 1.3766032782187008, "learning_rate": 3.992189729435085e-06, "loss": 0.1819, "step": 8076 }, { "epoch": 0.58, "grad_norm": 1.3010079284543583, "learning_rate": 3.991055127506819e-06, "loss": 0.1575, "step": 8077 }, { "epoch": 0.58, "grad_norm": 1.2302796495627735, "learning_rate": 3.989920579734703e-06, "loss": 0.1766, "step": 8078 }, { "epoch": 0.58, "grad_norm": 1.2718770805178725, "learning_rate": 3.988786086179638e-06, "loss": 0.189, "step": 8079 }, { "epoch": 0.58, "grad_norm": 1.1623334072052143, "learning_rate": 3.987651646902518e-06, "loss": 0.1333, "step": 8080 }, { "epoch": 0.58, "grad_norm": 4.783985448173852, "learning_rate": 3.9865172619642374e-06, "loss": 0.6099, "step": 8081 }, { "epoch": 0.58, "grad_norm": 1.2905100679672514, "learning_rate": 3.985382931425682e-06, "loss": 0.188, "step": 8082 }, { "epoch": 0.58, "grad_norm": 1.4167772549320934, "learning_rate": 3.984248655347741e-06, "loss": 0.2053, "step": 8083 }, { "epoch": 0.58, "grad_norm": 4.625763945399266, "learning_rate": 3.983114433791297e-06, "loss": 0.6303, "step": 8084 }, { "epoch": 0.58, "grad_norm": 1.5901993326178454, "learning_rate": 3.9819802668172305e-06, "loss": 0.2685, "step": 8085 }, { "epoch": 0.58, "grad_norm": 1.4726484418808994, "learning_rate": 3.9808461544864176e-06, "loss": 0.2064, "step": 8086 }, { "epoch": 0.58, "grad_norm": 1.4604875050312616, "learning_rate": 3.979712096859735e-06, "loss": 0.1383, "step": 8087 }, { "epoch": 0.58, "grad_norm": 1.1755201492600569, "learning_rate": 3.9785780939980525e-06, "loss": 0.1239, "step": 8088 }, { "epoch": 0.58, "grad_norm": 1.0476284442546069, "learning_rate": 3.977444145962243e-06, "loss": 0.1468, "step": 8089 }, { "epoch": 0.58, "grad_norm": 5.536942637421289, "learning_rate": 3.976310252813167e-06, "loss": 0.6333, "step": 8090 }, { "epoch": 0.58, "grad_norm": 1.3793193177642775, "learning_rate": 3.97517641461169e-06, "loss": 0.2024, "step": 8091 }, { "epoch": 0.58, "grad_norm": 1.4296887192598908, "learning_rate": 3.974042631418671e-06, "loss": 0.1646, "step": 8092 }, { "epoch": 0.58, "grad_norm": 1.4151675425742891, "learning_rate": 3.97290890329497e-06, "loss": 0.2279, "step": 8093 }, { "epoch": 0.58, "grad_norm": 4.47985618961368, "learning_rate": 3.971775230301437e-06, "loss": 0.4685, "step": 8094 }, { "epoch": 0.58, "grad_norm": 3.3574708188182463, "learning_rate": 3.970641612498926e-06, "loss": 0.4977, "step": 8095 }, { "epoch": 0.58, "grad_norm": 1.2936997172404536, "learning_rate": 3.969508049948281e-06, "loss": 0.1601, "step": 8096 }, { "epoch": 0.58, "grad_norm": 1.2838259096078106, "learning_rate": 3.968374542710351e-06, "loss": 0.1886, "step": 8097 }, { "epoch": 0.58, "grad_norm": 1.4768638704511474, "learning_rate": 3.967241090845979e-06, "loss": 0.1918, "step": 8098 }, { "epoch": 0.58, "grad_norm": 1.3538735998815188, "learning_rate": 3.966107694416e-06, "loss": 0.1917, "step": 8099 }, { "epoch": 0.58, "grad_norm": 1.2488067948452406, "learning_rate": 3.964974353481254e-06, "loss": 0.2131, "step": 8100 }, { "epoch": 0.58, "grad_norm": 1.353988965058894, "learning_rate": 3.963841068102571e-06, "loss": 0.2121, "step": 8101 }, { "epoch": 0.58, "grad_norm": 4.653108048282838, "learning_rate": 3.962707838340786e-06, "loss": 0.5144, "step": 8102 }, { "epoch": 0.58, "grad_norm": 4.117135606476216, "learning_rate": 3.961574664256721e-06, "loss": 0.5411, "step": 8103 }, { "epoch": 0.58, "grad_norm": 1.4892678489001696, "learning_rate": 3.960441545911205e-06, "loss": 0.2044, "step": 8104 }, { "epoch": 0.58, "grad_norm": 1.0843689378064763, "learning_rate": 3.959308483365054e-06, "loss": 0.146, "step": 8105 }, { "epoch": 0.58, "grad_norm": 1.2234665936952536, "learning_rate": 3.95817547667909e-06, "loss": 0.1346, "step": 8106 }, { "epoch": 0.58, "grad_norm": 1.5504714741171117, "learning_rate": 3.957042525914128e-06, "loss": 0.2416, "step": 8107 }, { "epoch": 0.58, "grad_norm": 1.3269004982761294, "learning_rate": 3.9559096311309805e-06, "loss": 0.1574, "step": 8108 }, { "epoch": 0.58, "grad_norm": 5.542983227846797, "learning_rate": 3.9547767923904546e-06, "loss": 0.7287, "step": 8109 }, { "epoch": 0.58, "grad_norm": 1.2982295122789667, "learning_rate": 3.953644009753358e-06, "loss": 0.2008, "step": 8110 }, { "epoch": 0.58, "grad_norm": 1.3836076696045754, "learning_rate": 3.952511283280494e-06, "loss": 0.1621, "step": 8111 }, { "epoch": 0.58, "grad_norm": 1.3206727494925798, "learning_rate": 3.951378613032665e-06, "loss": 0.199, "step": 8112 }, { "epoch": 0.58, "grad_norm": 1.33267583373924, "learning_rate": 3.950245999070664e-06, "loss": 0.2091, "step": 8113 }, { "epoch": 0.58, "grad_norm": 1.3856570162995032, "learning_rate": 3.949113441455288e-06, "loss": 0.1954, "step": 8114 }, { "epoch": 0.58, "grad_norm": 1.2798782490966174, "learning_rate": 3.947980940247326e-06, "loss": 0.1779, "step": 8115 }, { "epoch": 0.58, "grad_norm": 1.2752247404567465, "learning_rate": 3.94684849550757e-06, "loss": 0.1585, "step": 8116 }, { "epoch": 0.58, "grad_norm": 1.3078870380164824, "learning_rate": 3.9457161072968015e-06, "loss": 0.1768, "step": 8117 }, { "epoch": 0.58, "grad_norm": 1.431809603015343, "learning_rate": 3.944583775675804e-06, "loss": 0.1829, "step": 8118 }, { "epoch": 0.58, "grad_norm": 12.636393874353214, "learning_rate": 3.943451500705357e-06, "loss": 0.6949, "step": 8119 }, { "epoch": 0.58, "grad_norm": 1.3824909787490212, "learning_rate": 3.942319282446236e-06, "loss": 0.2046, "step": 8120 }, { "epoch": 0.58, "grad_norm": 1.3500255504885168, "learning_rate": 3.9411871209592135e-06, "loss": 0.1837, "step": 8121 }, { "epoch": 0.58, "grad_norm": 1.5083475443041952, "learning_rate": 3.94005501630506e-06, "loss": 0.2156, "step": 8122 }, { "epoch": 0.58, "grad_norm": 1.2872850366754804, "learning_rate": 3.938922968544542e-06, "loss": 0.1778, "step": 8123 }, { "epoch": 0.58, "grad_norm": 1.255649672017287, "learning_rate": 3.937790977738425e-06, "loss": 0.1632, "step": 8124 }, { "epoch": 0.58, "grad_norm": 4.412161209826929, "learning_rate": 3.936659043947467e-06, "loss": 0.5399, "step": 8125 }, { "epoch": 0.58, "grad_norm": 1.145448645996042, "learning_rate": 3.935527167232427e-06, "loss": 0.1592, "step": 8126 }, { "epoch": 0.58, "grad_norm": 1.4601783110998678, "learning_rate": 3.9343953476540595e-06, "loss": 0.2123, "step": 8127 }, { "epoch": 0.58, "grad_norm": 1.2931321748575118, "learning_rate": 3.933263585273117e-06, "loss": 0.1938, "step": 8128 }, { "epoch": 0.58, "grad_norm": 4.3386220986459625, "learning_rate": 3.932131880150348e-06, "loss": 0.4618, "step": 8129 }, { "epoch": 0.58, "grad_norm": 1.3137978154310215, "learning_rate": 3.931000232346496e-06, "loss": 0.1815, "step": 8130 }, { "epoch": 0.58, "grad_norm": 1.1551659926038138, "learning_rate": 3.929868641922306e-06, "loss": 0.1598, "step": 8131 }, { "epoch": 0.58, "grad_norm": 1.2788923538545707, "learning_rate": 3.928737108938514e-06, "loss": 0.1423, "step": 8132 }, { "epoch": 0.58, "grad_norm": 1.1302259426939052, "learning_rate": 3.92760563345586e-06, "loss": 0.1649, "step": 8133 }, { "epoch": 0.58, "grad_norm": 1.1969739945177365, "learning_rate": 3.926474215535074e-06, "loss": 0.1768, "step": 8134 }, { "epoch": 0.58, "grad_norm": 1.3167420710310629, "learning_rate": 3.925342855236888e-06, "loss": 0.1901, "step": 8135 }, { "epoch": 0.58, "grad_norm": 1.3594382073605924, "learning_rate": 3.924211552622026e-06, "loss": 0.2088, "step": 8136 }, { "epoch": 0.58, "grad_norm": 1.3043423941741843, "learning_rate": 3.923080307751218e-06, "loss": 0.1939, "step": 8137 }, { "epoch": 0.58, "grad_norm": 1.3022019456897431, "learning_rate": 3.921949120685178e-06, "loss": 0.1767, "step": 8138 }, { "epoch": 0.58, "grad_norm": 7.171416154249515, "learning_rate": 3.920817991484627e-06, "loss": 0.513, "step": 8139 }, { "epoch": 0.58, "grad_norm": 1.315203169807279, "learning_rate": 3.9196869202102775e-06, "loss": 0.1853, "step": 8140 }, { "epoch": 0.58, "grad_norm": 1.2123076747104098, "learning_rate": 3.918555906922845e-06, "loss": 0.1472, "step": 8141 }, { "epoch": 0.58, "grad_norm": 1.4227486995343832, "learning_rate": 3.917424951683032e-06, "loss": 0.1741, "step": 8142 }, { "epoch": 0.58, "grad_norm": 1.2909050526561214, "learning_rate": 3.916294054551548e-06, "loss": 0.1845, "step": 8143 }, { "epoch": 0.58, "grad_norm": 1.3135556641506279, "learning_rate": 3.915163215589094e-06, "loss": 0.1476, "step": 8144 }, { "epoch": 0.58, "grad_norm": 1.1899592895624993, "learning_rate": 3.91403243485637e-06, "loss": 0.1551, "step": 8145 }, { "epoch": 0.58, "grad_norm": 1.3856053710386913, "learning_rate": 3.912901712414068e-06, "loss": 0.1994, "step": 8146 }, { "epoch": 0.58, "grad_norm": 1.4223187317432422, "learning_rate": 3.911771048322885e-06, "loss": 0.2103, "step": 8147 }, { "epoch": 0.58, "grad_norm": 1.3563099429245546, "learning_rate": 3.910640442643508e-06, "loss": 0.1977, "step": 8148 }, { "epoch": 0.58, "grad_norm": 1.2706505057831987, "learning_rate": 3.9095098954366264e-06, "loss": 0.2196, "step": 8149 }, { "epoch": 0.58, "grad_norm": 1.3622788213792312, "learning_rate": 3.908379406762918e-06, "loss": 0.1818, "step": 8150 }, { "epoch": 0.58, "grad_norm": 1.3292331972810614, "learning_rate": 3.907248976683069e-06, "loss": 0.1989, "step": 8151 }, { "epoch": 0.58, "grad_norm": 1.2430086877384752, "learning_rate": 3.9061186052577515e-06, "loss": 0.2086, "step": 8152 }, { "epoch": 0.58, "grad_norm": 1.4004852508407137, "learning_rate": 3.904988292547643e-06, "loss": 0.2334, "step": 8153 }, { "epoch": 0.58, "grad_norm": 1.183495840610173, "learning_rate": 3.903858038613412e-06, "loss": 0.2382, "step": 8154 }, { "epoch": 0.58, "grad_norm": 1.4325619158460001, "learning_rate": 3.902727843515727e-06, "loss": 0.241, "step": 8155 }, { "epoch": 0.58, "grad_norm": 1.3760137453176498, "learning_rate": 3.9015977073152516e-06, "loss": 0.2231, "step": 8156 }, { "epoch": 0.58, "grad_norm": 1.341312526403342, "learning_rate": 3.90046763007265e-06, "loss": 0.1946, "step": 8157 }, { "epoch": 0.58, "grad_norm": 1.407829326775763, "learning_rate": 3.899337611848575e-06, "loss": 0.1913, "step": 8158 }, { "epoch": 0.58, "grad_norm": 1.326656399944762, "learning_rate": 3.898207652703685e-06, "loss": 0.2296, "step": 8159 }, { "epoch": 0.58, "grad_norm": 1.5429949108067782, "learning_rate": 3.89707775269863e-06, "loss": 0.2112, "step": 8160 }, { "epoch": 0.58, "grad_norm": 1.3854076500714492, "learning_rate": 3.89594791189406e-06, "loss": 0.1646, "step": 8161 }, { "epoch": 0.58, "grad_norm": 1.3322226829916661, "learning_rate": 3.894818130350622e-06, "loss": 0.1451, "step": 8162 }, { "epoch": 0.58, "grad_norm": 1.5979229346416453, "learning_rate": 3.893688408128955e-06, "loss": 0.2058, "step": 8163 }, { "epoch": 0.58, "grad_norm": 1.435527348301019, "learning_rate": 3.892558745289698e-06, "loss": 0.1887, "step": 8164 }, { "epoch": 0.58, "grad_norm": 1.4488540473470253, "learning_rate": 3.891429141893489e-06, "loss": 0.2043, "step": 8165 }, { "epoch": 0.58, "grad_norm": 1.3039421615118558, "learning_rate": 3.890299598000961e-06, "loss": 0.1732, "step": 8166 }, { "epoch": 0.58, "grad_norm": 1.2043196690173117, "learning_rate": 3.889170113672739e-06, "loss": 0.1574, "step": 8167 }, { "epoch": 0.58, "grad_norm": 1.4651840568526107, "learning_rate": 3.888040688969454e-06, "loss": 0.1603, "step": 8168 }, { "epoch": 0.58, "grad_norm": 1.3565189895583047, "learning_rate": 3.886911323951725e-06, "loss": 0.1856, "step": 8169 }, { "epoch": 0.58, "grad_norm": 1.2905263478886697, "learning_rate": 3.8857820186801775e-06, "loss": 0.2003, "step": 8170 }, { "epoch": 0.58, "grad_norm": 1.2994892590047724, "learning_rate": 3.8846527732154224e-06, "loss": 0.1421, "step": 8171 }, { "epoch": 0.58, "grad_norm": 1.244924802065644, "learning_rate": 3.883523587618077e-06, "loss": 0.1607, "step": 8172 }, { "epoch": 0.58, "grad_norm": 4.669071457381142, "learning_rate": 3.882394461948748e-06, "loss": 0.439, "step": 8173 }, { "epoch": 0.58, "grad_norm": 1.3135493277674954, "learning_rate": 3.881265396268046e-06, "loss": 0.1908, "step": 8174 }, { "epoch": 0.58, "grad_norm": 1.2933195709072633, "learning_rate": 3.880136390636571e-06, "loss": 0.1873, "step": 8175 }, { "epoch": 0.58, "grad_norm": 1.6118211658697195, "learning_rate": 3.879007445114927e-06, "loss": 0.2261, "step": 8176 }, { "epoch": 0.58, "grad_norm": 1.3922521239012664, "learning_rate": 3.877878559763709e-06, "loss": 0.227, "step": 8177 }, { "epoch": 0.58, "grad_norm": 1.3678209399132972, "learning_rate": 3.8767497346435145e-06, "loss": 0.1847, "step": 8178 }, { "epoch": 0.59, "grad_norm": 1.3268655274217813, "learning_rate": 3.875620969814929e-06, "loss": 0.1658, "step": 8179 }, { "epoch": 0.59, "grad_norm": 1.3334428915797054, "learning_rate": 3.874492265338544e-06, "loss": 0.189, "step": 8180 }, { "epoch": 0.59, "grad_norm": 1.3608532815525396, "learning_rate": 3.873363621274942e-06, "loss": 0.1405, "step": 8181 }, { "epoch": 0.59, "grad_norm": 1.4123860902121381, "learning_rate": 3.872235037684707e-06, "loss": 0.1899, "step": 8182 }, { "epoch": 0.59, "grad_norm": 1.2087985097031808, "learning_rate": 3.871106514628412e-06, "loss": 0.1778, "step": 8183 }, { "epoch": 0.59, "grad_norm": 1.3389011035323852, "learning_rate": 3.869978052166637e-06, "loss": 0.1583, "step": 8184 }, { "epoch": 0.59, "grad_norm": 1.2086628760258693, "learning_rate": 3.868849650359948e-06, "loss": 0.1756, "step": 8185 }, { "epoch": 0.59, "grad_norm": 1.5768497302293405, "learning_rate": 3.867721309268919e-06, "loss": 0.2028, "step": 8186 }, { "epoch": 0.59, "grad_norm": 6.648931620027212, "learning_rate": 3.86659302895411e-06, "loss": 0.6985, "step": 8187 }, { "epoch": 0.59, "grad_norm": 1.5278640646907544, "learning_rate": 3.865464809476085e-06, "loss": 0.1961, "step": 8188 }, { "epoch": 0.59, "grad_norm": 1.1234800719551359, "learning_rate": 3.8643366508954e-06, "loss": 0.1751, "step": 8189 }, { "epoch": 0.59, "grad_norm": 1.335651297310427, "learning_rate": 3.8632085532726156e-06, "loss": 0.1703, "step": 8190 }, { "epoch": 0.59, "grad_norm": 1.601863663293131, "learning_rate": 3.862080516668277e-06, "loss": 0.2074, "step": 8191 }, { "epoch": 0.59, "grad_norm": 1.1625932326851998, "learning_rate": 3.860952541142935e-06, "loss": 0.1591, "step": 8192 }, { "epoch": 0.59, "grad_norm": 1.2244072588594317, "learning_rate": 3.859824626757137e-06, "loss": 0.1819, "step": 8193 }, { "epoch": 0.59, "grad_norm": 1.4139756552210125, "learning_rate": 3.858696773571422e-06, "loss": 0.217, "step": 8194 }, { "epoch": 0.59, "grad_norm": 1.378263873819433, "learning_rate": 3.857568981646332e-06, "loss": 0.1936, "step": 8195 }, { "epoch": 0.59, "grad_norm": 1.4455239076067516, "learning_rate": 3.856441251042399e-06, "loss": 0.2101, "step": 8196 }, { "epoch": 0.59, "grad_norm": 1.3058299826787094, "learning_rate": 3.8553135818201585e-06, "loss": 0.193, "step": 8197 }, { "epoch": 0.59, "grad_norm": 1.3418131299208313, "learning_rate": 3.854185974040133e-06, "loss": 0.1978, "step": 8198 }, { "epoch": 0.59, "grad_norm": 1.3306553579379938, "learning_rate": 3.853058427762857e-06, "loss": 0.1874, "step": 8199 }, { "epoch": 0.59, "grad_norm": 1.468056985017301, "learning_rate": 3.851930943048845e-06, "loss": 0.2002, "step": 8200 }, { "epoch": 0.59, "grad_norm": 1.2723241790375805, "learning_rate": 3.850803519958621e-06, "loss": 0.1964, "step": 8201 }, { "epoch": 0.59, "grad_norm": 1.4376556542227754, "learning_rate": 3.849676158552697e-06, "loss": 0.2231, "step": 8202 }, { "epoch": 0.59, "grad_norm": 1.2356535534939614, "learning_rate": 3.848548858891588e-06, "loss": 0.1682, "step": 8203 }, { "epoch": 0.59, "grad_norm": 1.4308626690880544, "learning_rate": 3.847421621035801e-06, "loss": 0.1854, "step": 8204 }, { "epoch": 0.59, "grad_norm": 1.384868146765296, "learning_rate": 3.846294445045843e-06, "loss": 0.233, "step": 8205 }, { "epoch": 0.59, "grad_norm": 1.5551709903763036, "learning_rate": 3.845167330982215e-06, "loss": 0.2407, "step": 8206 }, { "epoch": 0.59, "grad_norm": 14.730020667441869, "learning_rate": 3.844040278905418e-06, "loss": 0.7133, "step": 8207 }, { "epoch": 0.59, "grad_norm": 1.4934493691673087, "learning_rate": 3.842913288875944e-06, "loss": 0.2118, "step": 8208 }, { "epoch": 0.59, "grad_norm": 1.2922259669969103, "learning_rate": 3.841786360954291e-06, "loss": 0.1812, "step": 8209 }, { "epoch": 0.59, "grad_norm": 11.701881204586385, "learning_rate": 3.840659495200942e-06, "loss": 0.5567, "step": 8210 }, { "epoch": 0.59, "grad_norm": 1.4461140283267908, "learning_rate": 3.8395326916763875e-06, "loss": 0.1885, "step": 8211 }, { "epoch": 0.59, "grad_norm": 10.267828768474322, "learning_rate": 3.838405950441106e-06, "loss": 0.7477, "step": 8212 }, { "epoch": 0.59, "grad_norm": 1.229406198385201, "learning_rate": 3.83727927155558e-06, "loss": 0.1641, "step": 8213 }, { "epoch": 0.59, "grad_norm": 1.4619759754412416, "learning_rate": 3.836152655080282e-06, "loss": 0.2127, "step": 8214 }, { "epoch": 0.59, "grad_norm": 3.99694058346617, "learning_rate": 3.835026101075687e-06, "loss": 0.5915, "step": 8215 }, { "epoch": 0.59, "grad_norm": 1.2396661704370746, "learning_rate": 3.833899609602262e-06, "loss": 0.1874, "step": 8216 }, { "epoch": 0.59, "grad_norm": 5.080170500603185, "learning_rate": 3.832773180720475e-06, "loss": 0.3854, "step": 8217 }, { "epoch": 0.59, "grad_norm": 1.3525940250399073, "learning_rate": 3.831646814490784e-06, "loss": 0.1734, "step": 8218 }, { "epoch": 0.59, "grad_norm": 1.2768447025790666, "learning_rate": 3.830520510973652e-06, "loss": 0.1735, "step": 8219 }, { "epoch": 0.59, "grad_norm": 1.2940805380524523, "learning_rate": 3.829394270229531e-06, "loss": 0.1896, "step": 8220 }, { "epoch": 0.59, "grad_norm": 1.253648582268682, "learning_rate": 3.8282680923188775e-06, "loss": 0.1625, "step": 8221 }, { "epoch": 0.59, "grad_norm": 1.0743483650338528, "learning_rate": 3.827141977302135e-06, "loss": 0.148, "step": 8222 }, { "epoch": 0.59, "grad_norm": 1.3210407251528782, "learning_rate": 3.826015925239755e-06, "loss": 0.1851, "step": 8223 }, { "epoch": 0.59, "grad_norm": 1.279716320114616, "learning_rate": 3.824889936192173e-06, "loss": 0.1926, "step": 8224 }, { "epoch": 0.59, "grad_norm": 1.3006319973903484, "learning_rate": 3.823764010219831e-06, "loss": 0.1757, "step": 8225 }, { "epoch": 0.59, "grad_norm": 1.427344672363482, "learning_rate": 3.822638147383166e-06, "loss": 0.2296, "step": 8226 }, { "epoch": 0.59, "grad_norm": 1.277319151260448, "learning_rate": 3.821512347742606e-06, "loss": 0.1778, "step": 8227 }, { "epoch": 0.59, "grad_norm": 4.847510883770891, "learning_rate": 3.820386611358583e-06, "loss": 0.6468, "step": 8228 }, { "epoch": 0.59, "grad_norm": 1.399461133991848, "learning_rate": 3.8192609382915195e-06, "loss": 0.2005, "step": 8229 }, { "epoch": 0.59, "grad_norm": 1.2967583961834626, "learning_rate": 3.818135328601838e-06, "loss": 0.1913, "step": 8230 }, { "epoch": 0.59, "grad_norm": 1.413063792608432, "learning_rate": 3.817009782349957e-06, "loss": 0.1684, "step": 8231 }, { "epoch": 0.59, "grad_norm": 1.482659523859138, "learning_rate": 3.815884299596291e-06, "loss": 0.23, "step": 8232 }, { "epoch": 0.59, "grad_norm": 1.4470971548160143, "learning_rate": 3.81475888040125e-06, "loss": 0.207, "step": 8233 }, { "epoch": 0.59, "grad_norm": 1.3435603059391588, "learning_rate": 3.813633524825247e-06, "loss": 0.1617, "step": 8234 }, { "epoch": 0.59, "grad_norm": 1.3268370068650563, "learning_rate": 3.8125082329286807e-06, "loss": 0.1749, "step": 8235 }, { "epoch": 0.59, "grad_norm": 1.3797434450555608, "learning_rate": 3.8113830047719556e-06, "loss": 0.1712, "step": 8236 }, { "epoch": 0.59, "grad_norm": 1.4655184210881325, "learning_rate": 3.8102578404154677e-06, "loss": 0.1528, "step": 8237 }, { "epoch": 0.59, "grad_norm": 1.2641358308136652, "learning_rate": 3.8091327399196143e-06, "loss": 0.1854, "step": 8238 }, { "epoch": 0.59, "grad_norm": 1.399825429243039, "learning_rate": 3.808007703344782e-06, "loss": 0.1862, "step": 8239 }, { "epoch": 0.59, "grad_norm": 1.2113498066344248, "learning_rate": 3.8068827307513624e-06, "loss": 0.1424, "step": 8240 }, { "epoch": 0.59, "grad_norm": 1.2298689145993762, "learning_rate": 3.805757822199736e-06, "loss": 0.1749, "step": 8241 }, { "epoch": 0.59, "grad_norm": 1.3571604770707844, "learning_rate": 3.8046329777502883e-06, "loss": 0.2002, "step": 8242 }, { "epoch": 0.59, "grad_norm": 1.3819176978148626, "learning_rate": 3.80350819746339e-06, "loss": 0.2058, "step": 8243 }, { "epoch": 0.59, "grad_norm": 1.2866728311936917, "learning_rate": 3.802383481399421e-06, "loss": 0.1507, "step": 8244 }, { "epoch": 0.59, "grad_norm": 1.386009820351826, "learning_rate": 3.8012588296187465e-06, "loss": 0.199, "step": 8245 }, { "epoch": 0.59, "grad_norm": 1.227706633242937, "learning_rate": 3.800134242181738e-06, "loss": 0.2003, "step": 8246 }, { "epoch": 0.59, "grad_norm": 1.3693890357411833, "learning_rate": 3.7990097191487544e-06, "loss": 0.1971, "step": 8247 }, { "epoch": 0.59, "grad_norm": 1.3765619997111092, "learning_rate": 3.797885260580159e-06, "loss": 0.2002, "step": 8248 }, { "epoch": 0.59, "grad_norm": 1.3113474115542139, "learning_rate": 3.7967608665363065e-06, "loss": 0.1698, "step": 8249 }, { "epoch": 0.59, "grad_norm": 1.4765323183380141, "learning_rate": 3.7956365370775522e-06, "loss": 0.2076, "step": 8250 }, { "epoch": 0.59, "grad_norm": 1.2571400634408958, "learning_rate": 3.7945122722642416e-06, "loss": 0.1974, "step": 8251 }, { "epoch": 0.59, "grad_norm": 1.2273369888422752, "learning_rate": 3.793388072156724e-06, "loss": 0.1649, "step": 8252 }, { "epoch": 0.59, "grad_norm": 1.4006924757587365, "learning_rate": 3.79226393681534e-06, "loss": 0.1835, "step": 8253 }, { "epoch": 0.59, "grad_norm": 1.3007943997750702, "learning_rate": 3.791139866300433e-06, "loss": 0.1567, "step": 8254 }, { "epoch": 0.59, "grad_norm": 7.344181333223399, "learning_rate": 3.790015860672332e-06, "loss": 0.3917, "step": 8255 }, { "epoch": 0.59, "grad_norm": 1.5594539304519044, "learning_rate": 3.788891919991373e-06, "loss": 0.2237, "step": 8256 }, { "epoch": 0.59, "grad_norm": 1.3925451739585688, "learning_rate": 3.7877680443178856e-06, "loss": 0.1659, "step": 8257 }, { "epoch": 0.59, "grad_norm": 1.2943944826471174, "learning_rate": 3.7866442337121922e-06, "loss": 0.2113, "step": 8258 }, { "epoch": 0.59, "grad_norm": 1.3931469883195335, "learning_rate": 3.7855204882346186e-06, "loss": 0.1529, "step": 8259 }, { "epoch": 0.59, "grad_norm": 1.4331546500990469, "learning_rate": 3.7843968079454773e-06, "loss": 0.2091, "step": 8260 }, { "epoch": 0.59, "grad_norm": 1.1256385703064187, "learning_rate": 3.783273192905088e-06, "loss": 0.1394, "step": 8261 }, { "epoch": 0.59, "grad_norm": 1.3498903273146463, "learning_rate": 3.782149643173758e-06, "loss": 0.1967, "step": 8262 }, { "epoch": 0.59, "grad_norm": 1.2764352541584139, "learning_rate": 3.7810261588118e-06, "loss": 0.1746, "step": 8263 }, { "epoch": 0.59, "grad_norm": 1.350285025648923, "learning_rate": 3.7799027398795123e-06, "loss": 0.1859, "step": 8264 }, { "epoch": 0.59, "grad_norm": 1.3502985092897761, "learning_rate": 3.778779386437199e-06, "loss": 0.1892, "step": 8265 }, { "epoch": 0.59, "grad_norm": 1.406906182444263, "learning_rate": 3.7776560985451564e-06, "loss": 0.1865, "step": 8266 }, { "epoch": 0.59, "grad_norm": 1.130643488150585, "learning_rate": 3.77653287626368e-06, "loss": 0.1516, "step": 8267 }, { "epoch": 0.59, "grad_norm": 1.363674506252413, "learning_rate": 3.7754097196530566e-06, "loss": 0.2292, "step": 8268 }, { "epoch": 0.59, "grad_norm": 1.342491485070118, "learning_rate": 3.7742866287735747e-06, "loss": 0.2078, "step": 8269 }, { "epoch": 0.59, "grad_norm": 1.343696181476595, "learning_rate": 3.7731636036855164e-06, "loss": 0.2149, "step": 8270 }, { "epoch": 0.59, "grad_norm": 1.3334790869628896, "learning_rate": 3.7720406444491643e-06, "loss": 0.1951, "step": 8271 }, { "epoch": 0.59, "grad_norm": 1.2842693707122037, "learning_rate": 3.770917751124789e-06, "loss": 0.1767, "step": 8272 }, { "epoch": 0.59, "grad_norm": 1.3212106337797853, "learning_rate": 3.769794923772668e-06, "loss": 0.1851, "step": 8273 }, { "epoch": 0.59, "grad_norm": 1.2031926842590743, "learning_rate": 3.7686721624530665e-06, "loss": 0.1156, "step": 8274 }, { "epoch": 0.59, "grad_norm": 1.4597403771905013, "learning_rate": 3.7675494672262534e-06, "loss": 0.1718, "step": 8275 }, { "epoch": 0.59, "grad_norm": 1.51198559035681, "learning_rate": 3.766426838152487e-06, "loss": 0.2135, "step": 8276 }, { "epoch": 0.59, "grad_norm": 1.3487929063786075, "learning_rate": 3.765304275292028e-06, "loss": 0.1811, "step": 8277 }, { "epoch": 0.59, "grad_norm": 1.4632126581909304, "learning_rate": 3.764181778705129e-06, "loss": 0.1727, "step": 8278 }, { "epoch": 0.59, "grad_norm": 1.2671296547199031, "learning_rate": 3.763059348452045e-06, "loss": 0.1739, "step": 8279 }, { "epoch": 0.59, "grad_norm": 1.3622540923506359, "learning_rate": 3.7619369845930195e-06, "loss": 0.1927, "step": 8280 }, { "epoch": 0.59, "grad_norm": 1.3850389467070323, "learning_rate": 3.7608146871882985e-06, "loss": 0.2098, "step": 8281 }, { "epoch": 0.59, "grad_norm": 1.419122698963883, "learning_rate": 3.759692456298121e-06, "loss": 0.1995, "step": 8282 }, { "epoch": 0.59, "grad_norm": 1.1817374957157123, "learning_rate": 3.758570291982727e-06, "loss": 0.183, "step": 8283 }, { "epoch": 0.59, "grad_norm": 4.2418702738125456, "learning_rate": 3.7574481943023454e-06, "loss": 0.7239, "step": 8284 }, { "epoch": 0.59, "grad_norm": 1.3127347361271424, "learning_rate": 3.7563261633172105e-06, "loss": 0.2035, "step": 8285 }, { "epoch": 0.59, "grad_norm": 1.23905080016415, "learning_rate": 3.7552041990875444e-06, "loss": 0.1781, "step": 8286 }, { "epoch": 0.59, "grad_norm": 1.2627608692663168, "learning_rate": 3.754082301673574e-06, "loss": 0.1993, "step": 8287 }, { "epoch": 0.59, "grad_norm": 1.294428821791146, "learning_rate": 3.752960471135513e-06, "loss": 0.181, "step": 8288 }, { "epoch": 0.59, "grad_norm": 1.3239653083357747, "learning_rate": 3.7518387075335805e-06, "loss": 0.2374, "step": 8289 }, { "epoch": 0.59, "grad_norm": 1.2991920327128375, "learning_rate": 3.750717010927989e-06, "loss": 0.1855, "step": 8290 }, { "epoch": 0.59, "grad_norm": 1.1021347738004053, "learning_rate": 3.749595381378943e-06, "loss": 0.1408, "step": 8291 }, { "epoch": 0.59, "grad_norm": 1.31935867882752, "learning_rate": 3.748473818946652e-06, "loss": 0.1656, "step": 8292 }, { "epoch": 0.59, "grad_norm": 1.0977527496575792, "learning_rate": 3.747352323691312e-06, "loss": 0.1511, "step": 8293 }, { "epoch": 0.59, "grad_norm": 1.4393089833922683, "learning_rate": 3.746230895673124e-06, "loss": 0.198, "step": 8294 }, { "epoch": 0.59, "grad_norm": 5.7235182633734585, "learning_rate": 3.7451095349522798e-06, "loss": 0.5684, "step": 8295 }, { "epoch": 0.59, "grad_norm": 1.3175745563104093, "learning_rate": 3.743988241588972e-06, "loss": 0.183, "step": 8296 }, { "epoch": 0.59, "grad_norm": 1.323830107527644, "learning_rate": 3.742867015643384e-06, "loss": 0.1791, "step": 8297 }, { "epoch": 0.59, "grad_norm": 1.3924303324238536, "learning_rate": 3.7417458571757016e-06, "loss": 0.1893, "step": 8298 }, { "epoch": 0.59, "grad_norm": 4.70187732558381, "learning_rate": 3.7406247662461004e-06, "loss": 0.6204, "step": 8299 }, { "epoch": 0.59, "grad_norm": 1.3370869566911594, "learning_rate": 3.7395037429147615e-06, "loss": 0.1813, "step": 8300 }, { "epoch": 0.59, "grad_norm": 1.3304972038375855, "learning_rate": 3.738382787241851e-06, "loss": 0.1958, "step": 8301 }, { "epoch": 0.59, "grad_norm": 1.1666797289930977, "learning_rate": 3.7372618992875433e-06, "loss": 0.1303, "step": 8302 }, { "epoch": 0.59, "grad_norm": 1.5048103632722376, "learning_rate": 3.736141079111998e-06, "loss": 0.2026, "step": 8303 }, { "epoch": 0.59, "grad_norm": 1.244507243592505, "learning_rate": 3.73502032677538e-06, "loss": 0.1374, "step": 8304 }, { "epoch": 0.59, "grad_norm": 1.415973427973317, "learning_rate": 3.733899642337844e-06, "loss": 0.1637, "step": 8305 }, { "epoch": 0.59, "grad_norm": 8.466779997603522, "learning_rate": 3.732779025859547e-06, "loss": 0.5694, "step": 8306 }, { "epoch": 0.59, "grad_norm": 1.4688324912054063, "learning_rate": 3.7316584774006354e-06, "loss": 0.2409, "step": 8307 }, { "epoch": 0.59, "grad_norm": 1.5237183546471513, "learning_rate": 3.7305379970212595e-06, "loss": 0.1996, "step": 8308 }, { "epoch": 0.59, "grad_norm": 1.242250545782007, "learning_rate": 3.729417584781559e-06, "loss": 0.1798, "step": 8309 }, { "epoch": 0.59, "grad_norm": 1.282090258760859, "learning_rate": 3.7282972407416774e-06, "loss": 0.187, "step": 8310 }, { "epoch": 0.59, "grad_norm": 1.3401399295235474, "learning_rate": 3.727176964961745e-06, "loss": 0.1722, "step": 8311 }, { "epoch": 0.59, "grad_norm": 1.1523268483665714, "learning_rate": 3.726056757501898e-06, "loss": 0.1352, "step": 8312 }, { "epoch": 0.59, "grad_norm": 1.3578040352652172, "learning_rate": 3.7249366184222613e-06, "loss": 0.1695, "step": 8313 }, { "epoch": 0.59, "grad_norm": 1.123123848221532, "learning_rate": 3.7238165477829645e-06, "loss": 0.1876, "step": 8314 }, { "epoch": 0.59, "grad_norm": 1.469065363856272, "learning_rate": 3.7226965456441233e-06, "loss": 0.2102, "step": 8315 }, { "epoch": 0.59, "grad_norm": 1.322311888156187, "learning_rate": 3.7215766120658568e-06, "loss": 0.1589, "step": 8316 }, { "epoch": 0.59, "grad_norm": 1.521963998706819, "learning_rate": 3.7204567471082793e-06, "loss": 0.2071, "step": 8317 }, { "epoch": 0.59, "grad_norm": 1.2645379441833726, "learning_rate": 3.7193369508315014e-06, "loss": 0.1754, "step": 8318 }, { "epoch": 0.6, "grad_norm": 1.375301815340607, "learning_rate": 3.718217223295627e-06, "loss": 0.205, "step": 8319 }, { "epoch": 0.6, "grad_norm": 1.4260287784795684, "learning_rate": 3.7170975645607587e-06, "loss": 0.187, "step": 8320 }, { "epoch": 0.6, "grad_norm": 1.3546069926375115, "learning_rate": 3.7159779746869984e-06, "loss": 0.2156, "step": 8321 }, { "epoch": 0.6, "grad_norm": 1.3559037643080034, "learning_rate": 3.714858453734438e-06, "loss": 0.2266, "step": 8322 }, { "epoch": 0.6, "grad_norm": 1.3393984147838809, "learning_rate": 3.7137390017631725e-06, "loss": 0.2072, "step": 8323 }, { "epoch": 0.6, "grad_norm": 1.5607051803207834, "learning_rate": 3.712619618833284e-06, "loss": 0.2208, "step": 8324 }, { "epoch": 0.6, "grad_norm": 6.182263001545025, "learning_rate": 3.7115003050048625e-06, "loss": 0.7286, "step": 8325 }, { "epoch": 0.6, "grad_norm": 1.3822935717263325, "learning_rate": 3.7103810603379843e-06, "loss": 0.1631, "step": 8326 }, { "epoch": 0.6, "grad_norm": 1.3903897878007845, "learning_rate": 3.7092618848927287e-06, "loss": 0.1907, "step": 8327 }, { "epoch": 0.6, "grad_norm": 1.4111078928387744, "learning_rate": 3.708142778729166e-06, "loss": 0.1957, "step": 8328 }, { "epoch": 0.6, "grad_norm": 1.300768296528793, "learning_rate": 3.7070237419073674e-06, "loss": 0.1786, "step": 8329 }, { "epoch": 0.6, "grad_norm": 1.4548948432457116, "learning_rate": 3.705904774487396e-06, "loss": 0.2106, "step": 8330 }, { "epoch": 0.6, "grad_norm": 1.4597409202330278, "learning_rate": 3.7047858765293178e-06, "loss": 0.2023, "step": 8331 }, { "epoch": 0.6, "grad_norm": 1.2621307227237388, "learning_rate": 3.7036670480931856e-06, "loss": 0.1591, "step": 8332 }, { "epoch": 0.6, "grad_norm": 1.2036017489465787, "learning_rate": 3.7025482892390584e-06, "loss": 0.1593, "step": 8333 }, { "epoch": 0.6, "grad_norm": 1.3054145603719276, "learning_rate": 3.7014296000269823e-06, "loss": 0.1675, "step": 8334 }, { "epoch": 0.6, "grad_norm": 1.3319224901742184, "learning_rate": 3.700310980517009e-06, "loss": 0.2204, "step": 8335 }, { "epoch": 0.6, "grad_norm": 1.2939423546900832, "learning_rate": 3.6991924307691766e-06, "loss": 0.1688, "step": 8336 }, { "epoch": 0.6, "grad_norm": 1.3024415426511768, "learning_rate": 3.6980739508435283e-06, "loss": 0.1621, "step": 8337 }, { "epoch": 0.6, "grad_norm": 1.3918656068426778, "learning_rate": 3.6969555408000955e-06, "loss": 0.1564, "step": 8338 }, { "epoch": 0.6, "grad_norm": 1.3700234727487073, "learning_rate": 3.695837200698916e-06, "loss": 0.1896, "step": 8339 }, { "epoch": 0.6, "grad_norm": 1.518091278615905, "learning_rate": 3.694718930600012e-06, "loss": 0.1954, "step": 8340 }, { "epoch": 0.6, "grad_norm": 1.2516303827194313, "learning_rate": 3.6936007305634116e-06, "loss": 0.1792, "step": 8341 }, { "epoch": 0.6, "grad_norm": 1.342132593428376, "learning_rate": 3.692482600649132e-06, "loss": 0.168, "step": 8342 }, { "epoch": 0.6, "grad_norm": 1.4798619422975465, "learning_rate": 3.691364540917195e-06, "loss": 0.2215, "step": 8343 }, { "epoch": 0.6, "grad_norm": 1.3107462316622562, "learning_rate": 3.6902465514276075e-06, "loss": 0.1929, "step": 8344 }, { "epoch": 0.6, "grad_norm": 9.736391622133606, "learning_rate": 3.689128632240383e-06, "loss": 0.5382, "step": 8345 }, { "epoch": 0.6, "grad_norm": 1.4084280297576446, "learning_rate": 3.688010783415525e-06, "loss": 0.19, "step": 8346 }, { "epoch": 0.6, "grad_norm": 1.4844754195241587, "learning_rate": 3.686893005013038e-06, "loss": 0.2245, "step": 8347 }, { "epoch": 0.6, "grad_norm": 1.2736474432936669, "learning_rate": 3.685775297092915e-06, "loss": 0.1631, "step": 8348 }, { "epoch": 0.6, "grad_norm": 6.584847713461996, "learning_rate": 3.6846576597151545e-06, "loss": 0.6822, "step": 8349 }, { "epoch": 0.6, "grad_norm": 1.283399436266776, "learning_rate": 3.6835400929397447e-06, "loss": 0.1773, "step": 8350 }, { "epoch": 0.6, "grad_norm": 1.261610272042946, "learning_rate": 3.6824225968266736e-06, "loss": 0.1605, "step": 8351 }, { "epoch": 0.6, "grad_norm": 1.2391470324038933, "learning_rate": 3.6813051714359217e-06, "loss": 0.1729, "step": 8352 }, { "epoch": 0.6, "grad_norm": 1.5542715073205327, "learning_rate": 3.6801878168274697e-06, "loss": 0.2074, "step": 8353 }, { "epoch": 0.6, "grad_norm": 1.183661611213176, "learning_rate": 3.6790705330612924e-06, "loss": 0.1635, "step": 8354 }, { "epoch": 0.6, "grad_norm": 1.40952360089313, "learning_rate": 3.6779533201973606e-06, "loss": 0.1667, "step": 8355 }, { "epoch": 0.6, "grad_norm": 4.843695765403123, "learning_rate": 3.676836178295645e-06, "loss": 0.4557, "step": 8356 }, { "epoch": 0.6, "grad_norm": 5.6487129470323385, "learning_rate": 3.6757191074161034e-06, "loss": 0.6148, "step": 8357 }, { "epoch": 0.6, "grad_norm": 1.2997819865333031, "learning_rate": 3.674602107618701e-06, "loss": 0.1742, "step": 8358 }, { "epoch": 0.6, "grad_norm": 1.2680921027929253, "learning_rate": 3.6734851789633906e-06, "loss": 0.1452, "step": 8359 }, { "epoch": 0.6, "grad_norm": 1.4087280760901224, "learning_rate": 3.672368321510128e-06, "loss": 0.1761, "step": 8360 }, { "epoch": 0.6, "grad_norm": 1.345190441285059, "learning_rate": 3.671251535318857e-06, "loss": 0.1971, "step": 8361 }, { "epoch": 0.6, "grad_norm": 1.393749018793386, "learning_rate": 3.6701348204495262e-06, "loss": 0.1936, "step": 8362 }, { "epoch": 0.6, "grad_norm": 1.5118403122003532, "learning_rate": 3.669018176962074e-06, "loss": 0.1826, "step": 8363 }, { "epoch": 0.6, "grad_norm": 1.3187411805744231, "learning_rate": 3.66790160491644e-06, "loss": 0.1787, "step": 8364 }, { "epoch": 0.6, "grad_norm": 4.8516701564271285, "learning_rate": 3.6667851043725543e-06, "loss": 0.5359, "step": 8365 }, { "epoch": 0.6, "grad_norm": 1.233558914855478, "learning_rate": 3.6656686753903482e-06, "loss": 0.1353, "step": 8366 }, { "epoch": 0.6, "grad_norm": 1.3787787973060996, "learning_rate": 3.6645523180297453e-06, "loss": 0.2266, "step": 8367 }, { "epoch": 0.6, "grad_norm": 1.1780168016353185, "learning_rate": 3.6634360323506714e-06, "loss": 0.18, "step": 8368 }, { "epoch": 0.6, "grad_norm": 1.3633533956493566, "learning_rate": 3.662319818413038e-06, "loss": 0.2268, "step": 8369 }, { "epoch": 0.6, "grad_norm": 1.2731965549745274, "learning_rate": 3.6612036762767646e-06, "loss": 0.1537, "step": 8370 }, { "epoch": 0.6, "grad_norm": 1.3693526121301356, "learning_rate": 3.6600876060017576e-06, "loss": 0.1484, "step": 8371 }, { "epoch": 0.6, "grad_norm": 9.235004671785468, "learning_rate": 3.658971607647927e-06, "loss": 0.4584, "step": 8372 }, { "epoch": 0.6, "grad_norm": 1.3027019794553836, "learning_rate": 3.6578556812751707e-06, "loss": 0.15, "step": 8373 }, { "epoch": 0.6, "grad_norm": 1.4334972852586056, "learning_rate": 3.656739826943391e-06, "loss": 0.1887, "step": 8374 }, { "epoch": 0.6, "grad_norm": 1.2783856344129527, "learning_rate": 3.655624044712479e-06, "loss": 0.1966, "step": 8375 }, { "epoch": 0.6, "grad_norm": 1.3145390929201037, "learning_rate": 3.6545083346423295e-06, "loss": 0.1891, "step": 8376 }, { "epoch": 0.6, "grad_norm": 1.193447655064492, "learning_rate": 3.6533926967928256e-06, "loss": 0.1442, "step": 8377 }, { "epoch": 0.6, "grad_norm": 1.2793217052561896, "learning_rate": 3.6522771312238525e-06, "loss": 0.1954, "step": 8378 }, { "epoch": 0.6, "grad_norm": 1.4348332898916734, "learning_rate": 3.6511616379952886e-06, "loss": 0.1977, "step": 8379 }, { "epoch": 0.6, "grad_norm": 10.050732560496598, "learning_rate": 3.6500462171670104e-06, "loss": 0.6795, "step": 8380 }, { "epoch": 0.6, "grad_norm": 1.4192554461856675, "learning_rate": 3.648930868798887e-06, "loss": 0.1922, "step": 8381 }, { "epoch": 0.6, "grad_norm": 1.2903795974408132, "learning_rate": 3.6478155929507876e-06, "loss": 0.1997, "step": 8382 }, { "epoch": 0.6, "grad_norm": 1.3919959264618358, "learning_rate": 3.6467003896825737e-06, "loss": 0.2132, "step": 8383 }, { "epoch": 0.6, "grad_norm": 1.2785309645998517, "learning_rate": 3.6455852590541075e-06, "loss": 0.159, "step": 8384 }, { "epoch": 0.6, "grad_norm": 1.388438417768651, "learning_rate": 3.6444702011252455e-06, "loss": 0.2019, "step": 8385 }, { "epoch": 0.6, "grad_norm": 1.614838264291656, "learning_rate": 3.643355215955835e-06, "loss": 0.2023, "step": 8386 }, { "epoch": 0.6, "grad_norm": 1.5325605824165516, "learning_rate": 3.6422403036057287e-06, "loss": 0.1623, "step": 8387 }, { "epoch": 0.6, "grad_norm": 1.551157103412736, "learning_rate": 3.641125464134768e-06, "loss": 0.2302, "step": 8388 }, { "epoch": 0.6, "grad_norm": 1.2898447666256956, "learning_rate": 3.640010697602795e-06, "loss": 0.1643, "step": 8389 }, { "epoch": 0.6, "grad_norm": 1.243238201094228, "learning_rate": 3.638896004069644e-06, "loss": 0.1546, "step": 8390 }, { "epoch": 0.6, "grad_norm": 1.3250652271781276, "learning_rate": 3.6377813835951483e-06, "loss": 0.2234, "step": 8391 }, { "epoch": 0.6, "grad_norm": 1.3113914745820412, "learning_rate": 3.636666836239135e-06, "loss": 0.1629, "step": 8392 }, { "epoch": 0.6, "grad_norm": 1.5349374102385616, "learning_rate": 3.635552362061432e-06, "loss": 0.1941, "step": 8393 }, { "epoch": 0.6, "grad_norm": 1.2642342546498229, "learning_rate": 3.6344379611218557e-06, "loss": 0.1877, "step": 8394 }, { "epoch": 0.6, "grad_norm": 1.3365056072929644, "learning_rate": 3.633323633480226e-06, "loss": 0.1673, "step": 8395 }, { "epoch": 0.6, "grad_norm": 1.3971651059673489, "learning_rate": 3.632209379196352e-06, "loss": 0.2178, "step": 8396 }, { "epoch": 0.6, "grad_norm": 1.4960848921120704, "learning_rate": 3.631095198330047e-06, "loss": 0.2026, "step": 8397 }, { "epoch": 0.6, "grad_norm": 1.380448071203612, "learning_rate": 3.6299810909411114e-06, "loss": 0.1791, "step": 8398 }, { "epoch": 0.6, "grad_norm": 4.785850953093162, "learning_rate": 3.6288670570893505e-06, "loss": 0.5208, "step": 8399 }, { "epoch": 0.6, "grad_norm": 1.2476096016344955, "learning_rate": 3.6277530968345552e-06, "loss": 0.1757, "step": 8400 }, { "epoch": 0.6, "grad_norm": 1.3908193244569391, "learning_rate": 3.6266392102365245e-06, "loss": 0.1934, "step": 8401 }, { "epoch": 0.6, "grad_norm": 4.790317570983507, "learning_rate": 3.625525397355044e-06, "loss": 0.5287, "step": 8402 }, { "epoch": 0.6, "grad_norm": 1.513729924869061, "learning_rate": 3.624411658249901e-06, "loss": 0.2312, "step": 8403 }, { "epoch": 0.6, "grad_norm": 3.8320252004375126, "learning_rate": 3.6232979929808735e-06, "loss": 0.3684, "step": 8404 }, { "epoch": 0.6, "grad_norm": 1.2894707673000105, "learning_rate": 3.6221844016077414e-06, "loss": 0.1443, "step": 8405 }, { "epoch": 0.6, "grad_norm": 1.2214325665922086, "learning_rate": 3.621070884190276e-06, "loss": 0.1726, "step": 8406 }, { "epoch": 0.6, "grad_norm": 1.31295387530887, "learning_rate": 3.619957440788249e-06, "loss": 0.1964, "step": 8407 }, { "epoch": 0.6, "grad_norm": 1.3191468058107259, "learning_rate": 3.6188440714614228e-06, "loss": 0.1734, "step": 8408 }, { "epoch": 0.6, "grad_norm": 4.665724523521432, "learning_rate": 3.61773077626956e-06, "loss": 0.5279, "step": 8409 }, { "epoch": 0.6, "grad_norm": 1.3785646298616203, "learning_rate": 3.6166175552724165e-06, "loss": 0.188, "step": 8410 }, { "epoch": 0.6, "grad_norm": 1.555284487043558, "learning_rate": 3.615504408529749e-06, "loss": 0.1838, "step": 8411 }, { "epoch": 0.6, "grad_norm": 1.2856872267074997, "learning_rate": 3.6143913361013026e-06, "loss": 0.2211, "step": 8412 }, { "epoch": 0.6, "grad_norm": 1.6786345925203463, "learning_rate": 3.6132783380468263e-06, "loss": 0.2247, "step": 8413 }, { "epoch": 0.6, "grad_norm": 1.5993502112216864, "learning_rate": 3.6121654144260576e-06, "loss": 0.208, "step": 8414 }, { "epoch": 0.6, "grad_norm": 6.689764043236425, "learning_rate": 3.611052565298738e-06, "loss": 0.4547, "step": 8415 }, { "epoch": 0.6, "grad_norm": 1.3706206541146488, "learning_rate": 3.609939790724597e-06, "loss": 0.1606, "step": 8416 }, { "epoch": 0.6, "grad_norm": 1.4037807889252851, "learning_rate": 3.6088270907633654e-06, "loss": 0.1632, "step": 8417 }, { "epoch": 0.6, "grad_norm": 1.369250976882622, "learning_rate": 3.6077144654747696e-06, "loss": 0.1856, "step": 8418 }, { "epoch": 0.6, "grad_norm": 1.3894419463153862, "learning_rate": 3.6066019149185296e-06, "loss": 0.1974, "step": 8419 }, { "epoch": 0.6, "grad_norm": 1.1476116302668742, "learning_rate": 3.605489439154365e-06, "loss": 0.1515, "step": 8420 }, { "epoch": 0.6, "grad_norm": 1.353712757508189, "learning_rate": 3.604377038241985e-06, "loss": 0.2001, "step": 8421 }, { "epoch": 0.6, "grad_norm": 1.549066734720979, "learning_rate": 3.6032647122411018e-06, "loss": 0.1914, "step": 8422 }, { "epoch": 0.6, "grad_norm": 1.5528898029659586, "learning_rate": 3.602152461211419e-06, "loss": 0.2029, "step": 8423 }, { "epoch": 0.6, "grad_norm": 1.3803578173852133, "learning_rate": 3.6010402852126415e-06, "loss": 0.1916, "step": 8424 }, { "epoch": 0.6, "grad_norm": 4.575511380649091, "learning_rate": 3.5999281843044607e-06, "loss": 0.5809, "step": 8425 }, { "epoch": 0.6, "grad_norm": 1.2146737696212906, "learning_rate": 3.598816158546574e-06, "loss": 0.1653, "step": 8426 }, { "epoch": 0.6, "grad_norm": 1.29875464830727, "learning_rate": 3.597704207998669e-06, "loss": 0.1751, "step": 8427 }, { "epoch": 0.6, "grad_norm": 3.9294187641129517, "learning_rate": 3.5965923327204326e-06, "loss": 0.6068, "step": 8428 }, { "epoch": 0.6, "grad_norm": 1.547543362719488, "learning_rate": 3.5954805327715426e-06, "loss": 0.1821, "step": 8429 }, { "epoch": 0.6, "grad_norm": 1.2397043860833754, "learning_rate": 3.5943688082116784e-06, "loss": 0.1594, "step": 8430 }, { "epoch": 0.6, "grad_norm": 1.2944129480680122, "learning_rate": 3.5932571591005116e-06, "loss": 0.1551, "step": 8431 }, { "epoch": 0.6, "grad_norm": 5.426555619890655, "learning_rate": 3.592145585497713e-06, "loss": 0.5961, "step": 8432 }, { "epoch": 0.6, "grad_norm": 1.6747670621203012, "learning_rate": 3.591034087462945e-06, "loss": 0.2073, "step": 8433 }, { "epoch": 0.6, "grad_norm": 1.4303967749279372, "learning_rate": 3.5899226650558704e-06, "loss": 0.1904, "step": 8434 }, { "epoch": 0.6, "grad_norm": 1.4788903749098592, "learning_rate": 3.588811318336145e-06, "loss": 0.204, "step": 8435 }, { "epoch": 0.6, "grad_norm": 1.3598727880587567, "learning_rate": 3.5877000473634227e-06, "loss": 0.2277, "step": 8436 }, { "epoch": 0.6, "grad_norm": 1.5147111709789234, "learning_rate": 3.5865888521973493e-06, "loss": 0.1692, "step": 8437 }, { "epoch": 0.6, "grad_norm": 1.1423775609675448, "learning_rate": 3.585477732897572e-06, "loss": 0.1564, "step": 8438 }, { "epoch": 0.6, "grad_norm": 1.222204772320094, "learning_rate": 3.58436668952373e-06, "loss": 0.1738, "step": 8439 }, { "epoch": 0.6, "grad_norm": 1.252503334126405, "learning_rate": 3.583255722135462e-06, "loss": 0.172, "step": 8440 }, { "epoch": 0.6, "grad_norm": 4.9193106987262265, "learning_rate": 3.5821448307923957e-06, "loss": 0.6111, "step": 8441 }, { "epoch": 0.6, "grad_norm": 1.3137855379053673, "learning_rate": 3.5810340155541636e-06, "loss": 0.1738, "step": 8442 }, { "epoch": 0.6, "grad_norm": 1.5445580979958053, "learning_rate": 3.579923276480387e-06, "loss": 0.2087, "step": 8443 }, { "epoch": 0.6, "grad_norm": 4.681052262333492, "learning_rate": 3.57881261363069e-06, "loss": 0.5736, "step": 8444 }, { "epoch": 0.6, "grad_norm": 1.3926166746148214, "learning_rate": 3.5777020270646827e-06, "loss": 0.1871, "step": 8445 }, { "epoch": 0.6, "grad_norm": 1.2756241716841266, "learning_rate": 3.576591516841982e-06, "loss": 0.1498, "step": 8446 }, { "epoch": 0.6, "grad_norm": 1.1738736732685149, "learning_rate": 3.575481083022192e-06, "loss": 0.1728, "step": 8447 }, { "epoch": 0.6, "grad_norm": 1.4167673744009057, "learning_rate": 3.5743707256649184e-06, "loss": 0.1805, "step": 8448 }, { "epoch": 0.6, "grad_norm": 1.3696861528187663, "learning_rate": 3.573260444829763e-06, "loss": 0.2219, "step": 8449 }, { "epoch": 0.6, "grad_norm": 1.423873178920901, "learning_rate": 3.5721502405763163e-06, "loss": 0.2212, "step": 8450 }, { "epoch": 0.6, "grad_norm": 5.2514934794189445, "learning_rate": 3.571040112964174e-06, "loss": 0.7691, "step": 8451 }, { "epoch": 0.6, "grad_norm": 1.5438970613979808, "learning_rate": 3.569930062052919e-06, "loss": 0.1896, "step": 8452 }, { "epoch": 0.6, "grad_norm": 1.2103593409757405, "learning_rate": 3.5688200879021402e-06, "loss": 0.161, "step": 8453 }, { "epoch": 0.6, "grad_norm": 1.2014437946414354, "learning_rate": 3.5677101905714106e-06, "loss": 0.1626, "step": 8454 }, { "epoch": 0.6, "grad_norm": 4.164408108726043, "learning_rate": 3.566600370120309e-06, "loss": 0.3726, "step": 8455 }, { "epoch": 0.6, "grad_norm": 1.3764834229506135, "learning_rate": 3.565490626608404e-06, "loss": 0.1938, "step": 8456 }, { "epoch": 0.6, "grad_norm": 1.4079927529388696, "learning_rate": 3.564380960095265e-06, "loss": 0.2147, "step": 8457 }, { "epoch": 0.61, "grad_norm": 1.2339033085450002, "learning_rate": 3.563271370640451e-06, "loss": 0.1644, "step": 8458 }, { "epoch": 0.61, "grad_norm": 1.4137453845990362, "learning_rate": 3.5621618583035223e-06, "loss": 0.1815, "step": 8459 }, { "epoch": 0.61, "grad_norm": 1.3358613037355063, "learning_rate": 3.5610524231440324e-06, "loss": 0.1691, "step": 8460 }, { "epoch": 0.61, "grad_norm": 1.4217867775110316, "learning_rate": 3.5599430652215337e-06, "loss": 0.2288, "step": 8461 }, { "epoch": 0.61, "grad_norm": 1.2527569521361974, "learning_rate": 3.5588337845955678e-06, "loss": 0.1835, "step": 8462 }, { "epoch": 0.61, "grad_norm": 12.05132144997392, "learning_rate": 3.5577245813256807e-06, "loss": 0.7908, "step": 8463 }, { "epoch": 0.61, "grad_norm": 1.2856943258653033, "learning_rate": 3.5566154554714067e-06, "loss": 0.175, "step": 8464 }, { "epoch": 0.61, "grad_norm": 1.545320405462178, "learning_rate": 3.5555064070922827e-06, "loss": 0.2211, "step": 8465 }, { "epoch": 0.61, "grad_norm": 1.3599588398325186, "learning_rate": 3.5543974362478345e-06, "loss": 0.163, "step": 8466 }, { "epoch": 0.61, "grad_norm": 1.3744884093304237, "learning_rate": 3.5532885429975893e-06, "loss": 0.2166, "step": 8467 }, { "epoch": 0.61, "grad_norm": 1.31987026837196, "learning_rate": 3.552179727401067e-06, "loss": 0.2, "step": 8468 }, { "epoch": 0.61, "grad_norm": 1.1821937905733866, "learning_rate": 3.5510709895177873e-06, "loss": 0.127, "step": 8469 }, { "epoch": 0.61, "grad_norm": 1.386302636031659, "learning_rate": 3.5499623294072585e-06, "loss": 0.176, "step": 8470 }, { "epoch": 0.61, "grad_norm": 1.203115265671527, "learning_rate": 3.5488537471289917e-06, "loss": 0.1587, "step": 8471 }, { "epoch": 0.61, "grad_norm": 1.2823763253514968, "learning_rate": 3.5477452427424895e-06, "loss": 0.1925, "step": 8472 }, { "epoch": 0.61, "grad_norm": 1.2766545067833932, "learning_rate": 3.5466368163072563e-06, "loss": 0.1645, "step": 8473 }, { "epoch": 0.61, "grad_norm": 1.359090712367458, "learning_rate": 3.5455284678827818e-06, "loss": 0.1818, "step": 8474 }, { "epoch": 0.61, "grad_norm": 1.3844395691989348, "learning_rate": 3.544420197528562e-06, "loss": 0.1664, "step": 8475 }, { "epoch": 0.61, "grad_norm": 1.3977142316994342, "learning_rate": 3.5433120053040826e-06, "loss": 0.224, "step": 8476 }, { "epoch": 0.61, "grad_norm": 1.3129763370128653, "learning_rate": 3.5422038912688296e-06, "loss": 0.1801, "step": 8477 }, { "epoch": 0.61, "grad_norm": 5.097706968772192, "learning_rate": 3.5410958554822783e-06, "loss": 0.441, "step": 8478 }, { "epoch": 0.61, "grad_norm": 1.4618236398664612, "learning_rate": 3.539987898003906e-06, "loss": 0.1994, "step": 8479 }, { "epoch": 0.61, "grad_norm": 1.243414385200963, "learning_rate": 3.5388800188931825e-06, "loss": 0.2155, "step": 8480 }, { "epoch": 0.61, "grad_norm": 1.1963692063450893, "learning_rate": 3.5377722182095746e-06, "loss": 0.1787, "step": 8481 }, { "epoch": 0.61, "grad_norm": 1.2201024520052748, "learning_rate": 3.5366644960125474e-06, "loss": 0.1603, "step": 8482 }, { "epoch": 0.61, "grad_norm": 1.2555192276633345, "learning_rate": 3.535556852361554e-06, "loss": 0.1947, "step": 8483 }, { "epoch": 0.61, "grad_norm": 1.3147341562824693, "learning_rate": 3.534449287316052e-06, "loss": 0.1803, "step": 8484 }, { "epoch": 0.61, "grad_norm": 1.3494254478569783, "learning_rate": 3.533341800935489e-06, "loss": 0.2184, "step": 8485 }, { "epoch": 0.61, "grad_norm": 1.3230171056413007, "learning_rate": 3.5322343932793133e-06, "loss": 0.1777, "step": 8486 }, { "epoch": 0.61, "grad_norm": 1.271745427557579, "learning_rate": 3.5311270644069624e-06, "loss": 0.1615, "step": 8487 }, { "epoch": 0.61, "grad_norm": 1.1149804083740633, "learning_rate": 3.5300198143778764e-06, "loss": 0.1443, "step": 8488 }, { "epoch": 0.61, "grad_norm": 1.4583341359880058, "learning_rate": 3.5289126432514854e-06, "loss": 0.1959, "step": 8489 }, { "epoch": 0.61, "grad_norm": 1.3441474074986868, "learning_rate": 3.527805551087221e-06, "loss": 0.1883, "step": 8490 }, { "epoch": 0.61, "grad_norm": 3.6319334664071325, "learning_rate": 3.526698537944505e-06, "loss": 0.5411, "step": 8491 }, { "epoch": 0.61, "grad_norm": 1.3184478594161924, "learning_rate": 3.5255916038827586e-06, "loss": 0.1672, "step": 8492 }, { "epoch": 0.61, "grad_norm": 1.3189433111837376, "learning_rate": 3.5244847489613964e-06, "loss": 0.1977, "step": 8493 }, { "epoch": 0.61, "grad_norm": 1.4611787463512182, "learning_rate": 3.523377973239833e-06, "loss": 0.2262, "step": 8494 }, { "epoch": 0.61, "grad_norm": 1.1760501416446874, "learning_rate": 3.5222712767774718e-06, "loss": 0.16, "step": 8495 }, { "epoch": 0.61, "grad_norm": 1.3337550589986982, "learning_rate": 3.52116465963372e-06, "loss": 0.1706, "step": 8496 }, { "epoch": 0.61, "grad_norm": 1.5720199669906814, "learning_rate": 3.5200581218679708e-06, "loss": 0.2146, "step": 8497 }, { "epoch": 0.61, "grad_norm": 1.4128661548772858, "learning_rate": 3.5189516635396254e-06, "loss": 0.1749, "step": 8498 }, { "epoch": 0.61, "grad_norm": 1.4092287292457755, "learning_rate": 3.517845284708069e-06, "loss": 0.1964, "step": 8499 }, { "epoch": 0.61, "grad_norm": 7.310898891779096, "learning_rate": 3.5167389854326907e-06, "loss": 0.4652, "step": 8500 }, { "epoch": 0.61, "grad_norm": 1.3307790284839118, "learning_rate": 3.5156327657728693e-06, "loss": 0.1867, "step": 8501 }, { "epoch": 0.61, "grad_norm": 1.4215756111294309, "learning_rate": 3.514526625787985e-06, "loss": 0.1982, "step": 8502 }, { "epoch": 0.61, "grad_norm": 4.420069258325338, "learning_rate": 3.5134205655374094e-06, "loss": 0.413, "step": 8503 }, { "epoch": 0.61, "grad_norm": 1.4413048153707648, "learning_rate": 3.5123145850805135e-06, "loss": 0.1947, "step": 8504 }, { "epoch": 0.61, "grad_norm": 16.33850354796122, "learning_rate": 3.5112086844766586e-06, "loss": 0.5862, "step": 8505 }, { "epoch": 0.61, "grad_norm": 6.161032185173859, "learning_rate": 3.5101028637852076e-06, "loss": 0.4368, "step": 8506 }, { "epoch": 0.61, "grad_norm": 1.239626846076969, "learning_rate": 3.508997123065515e-06, "loss": 0.1809, "step": 8507 }, { "epoch": 0.61, "grad_norm": 1.2122490677260336, "learning_rate": 3.5078914623769357e-06, "loss": 0.1885, "step": 8508 }, { "epoch": 0.61, "grad_norm": 1.3576104378297111, "learning_rate": 3.5067858817788124e-06, "loss": 0.2293, "step": 8509 }, { "epoch": 0.61, "grad_norm": 1.2182292740448817, "learning_rate": 3.505680381330492e-06, "loss": 0.1358, "step": 8510 }, { "epoch": 0.61, "grad_norm": 4.64415605720798, "learning_rate": 3.5045749610913106e-06, "loss": 0.5023, "step": 8511 }, { "epoch": 0.61, "grad_norm": 1.374680624330204, "learning_rate": 3.503469621120604e-06, "loss": 0.1818, "step": 8512 }, { "epoch": 0.61, "grad_norm": 1.4244841651917741, "learning_rate": 3.5023643614777057e-06, "loss": 0.1718, "step": 8513 }, { "epoch": 0.61, "grad_norm": 1.5219431564764838, "learning_rate": 3.501259182221937e-06, "loss": 0.2314, "step": 8514 }, { "epoch": 0.61, "grad_norm": 4.38404273856023, "learning_rate": 3.5001540834126215e-06, "loss": 0.6031, "step": 8515 }, { "epoch": 0.61, "grad_norm": 1.3256221189615416, "learning_rate": 3.499049065109075e-06, "loss": 0.1961, "step": 8516 }, { "epoch": 0.61, "grad_norm": 6.468821265520208, "learning_rate": 3.497944127370614e-06, "loss": 0.5287, "step": 8517 }, { "epoch": 0.61, "grad_norm": 1.387071117558416, "learning_rate": 3.4968392702565435e-06, "loss": 0.1961, "step": 8518 }, { "epoch": 0.61, "grad_norm": 1.455914613523176, "learning_rate": 3.4957344938261696e-06, "loss": 0.1697, "step": 8519 }, { "epoch": 0.61, "grad_norm": 1.2260581014359513, "learning_rate": 3.4946297981387913e-06, "loss": 0.1469, "step": 8520 }, { "epoch": 0.61, "grad_norm": 1.1562137496669744, "learning_rate": 3.4935251832537063e-06, "loss": 0.1586, "step": 8521 }, { "epoch": 0.61, "grad_norm": 1.1620544698785438, "learning_rate": 3.4924206492302024e-06, "loss": 0.1697, "step": 8522 }, { "epoch": 0.61, "grad_norm": 1.3978741885194694, "learning_rate": 3.4913161961275706e-06, "loss": 0.2127, "step": 8523 }, { "epoch": 0.61, "grad_norm": 1.233596701668589, "learning_rate": 3.4902118240050904e-06, "loss": 0.1766, "step": 8524 }, { "epoch": 0.61, "grad_norm": 1.3796451018382179, "learning_rate": 3.4891075329220436e-06, "loss": 0.1693, "step": 8525 }, { "epoch": 0.61, "grad_norm": 1.2404593049090307, "learning_rate": 3.4880033229376997e-06, "loss": 0.1482, "step": 8526 }, { "epoch": 0.61, "grad_norm": 1.3813321550992355, "learning_rate": 3.4868991941113317e-06, "loss": 0.2027, "step": 8527 }, { "epoch": 0.61, "grad_norm": 1.286488521359285, "learning_rate": 3.485795146502202e-06, "loss": 0.1755, "step": 8528 }, { "epoch": 0.61, "grad_norm": 1.387550183080702, "learning_rate": 3.484691180169576e-06, "loss": 0.2206, "step": 8529 }, { "epoch": 0.61, "grad_norm": 1.2831500810976135, "learning_rate": 3.4835872951727055e-06, "loss": 0.216, "step": 8530 }, { "epoch": 0.61, "grad_norm": 1.0867328869360358, "learning_rate": 3.4824834915708456e-06, "loss": 0.1325, "step": 8531 }, { "epoch": 0.61, "grad_norm": 1.3583162473875354, "learning_rate": 3.481379769423242e-06, "loss": 0.1856, "step": 8532 }, { "epoch": 0.61, "grad_norm": 1.478942780581958, "learning_rate": 3.4802761287891405e-06, "loss": 0.1933, "step": 8533 }, { "epoch": 0.61, "grad_norm": 1.2106054381316174, "learning_rate": 3.479172569727778e-06, "loss": 0.1619, "step": 8534 }, { "epoch": 0.61, "grad_norm": 1.2199101890595334, "learning_rate": 3.4780690922983905e-06, "loss": 0.2098, "step": 8535 }, { "epoch": 0.61, "grad_norm": 1.3208745282789323, "learning_rate": 3.4769656965602073e-06, "loss": 0.1857, "step": 8536 }, { "epoch": 0.61, "grad_norm": 1.2997287844962955, "learning_rate": 3.475862382572456e-06, "loss": 0.1539, "step": 8537 }, { "epoch": 0.61, "grad_norm": 1.3925049095390296, "learning_rate": 3.4747591503943557e-06, "loss": 0.1751, "step": 8538 }, { "epoch": 0.61, "grad_norm": 1.2870860018118642, "learning_rate": 3.473656000085125e-06, "loss": 0.1762, "step": 8539 }, { "epoch": 0.61, "grad_norm": 1.2750415302241722, "learning_rate": 3.472552931703975e-06, "loss": 0.1694, "step": 8540 }, { "epoch": 0.61, "grad_norm": 1.1774431810087114, "learning_rate": 3.4714499453101192e-06, "loss": 0.14, "step": 8541 }, { "epoch": 0.61, "grad_norm": 1.302893428721198, "learning_rate": 3.470347040962754e-06, "loss": 0.1974, "step": 8542 }, { "epoch": 0.61, "grad_norm": 1.3447498927341335, "learning_rate": 3.4692442187210845e-06, "loss": 0.1932, "step": 8543 }, { "epoch": 0.61, "grad_norm": 1.3665477688404022, "learning_rate": 3.4681414786443026e-06, "loss": 0.2049, "step": 8544 }, { "epoch": 0.61, "grad_norm": 1.4784932319332114, "learning_rate": 3.467038820791601e-06, "loss": 0.1989, "step": 8545 }, { "epoch": 0.61, "grad_norm": 1.2612465475073615, "learning_rate": 3.465936245222167e-06, "loss": 0.1869, "step": 8546 }, { "epoch": 0.61, "grad_norm": 1.3226690536907344, "learning_rate": 3.464833751995178e-06, "loss": 0.176, "step": 8547 }, { "epoch": 0.61, "grad_norm": 1.199645469378118, "learning_rate": 3.4637313411698166e-06, "loss": 0.1309, "step": 8548 }, { "epoch": 0.61, "grad_norm": 1.283174495951623, "learning_rate": 3.462629012805252e-06, "loss": 0.1925, "step": 8549 }, { "epoch": 0.61, "grad_norm": 1.2491659858308428, "learning_rate": 3.461526766960656e-06, "loss": 0.1648, "step": 8550 }, { "epoch": 0.61, "grad_norm": 1.368293977817649, "learning_rate": 3.4604246036951895e-06, "loss": 0.1881, "step": 8551 }, { "epoch": 0.61, "grad_norm": 1.2143614843020516, "learning_rate": 3.4593225230680148e-06, "loss": 0.1681, "step": 8552 }, { "epoch": 0.61, "grad_norm": 1.3904016015035026, "learning_rate": 3.4582205251382853e-06, "loss": 0.1871, "step": 8553 }, { "epoch": 0.61, "grad_norm": 1.3847496043895973, "learning_rate": 3.457118609965155e-06, "loss": 0.1719, "step": 8554 }, { "epoch": 0.61, "grad_norm": 5.394336909992013, "learning_rate": 3.456016777607766e-06, "loss": 0.4762, "step": 8555 }, { "epoch": 0.61, "grad_norm": 1.1876925014720798, "learning_rate": 3.4549150281252635e-06, "loss": 0.1466, "step": 8556 }, { "epoch": 0.61, "grad_norm": 1.1284980289732123, "learning_rate": 3.4538133615767832e-06, "loss": 0.1487, "step": 8557 }, { "epoch": 0.61, "grad_norm": 1.3749375526821372, "learning_rate": 3.45271177802146e-06, "loss": 0.1898, "step": 8558 }, { "epoch": 0.61, "grad_norm": 1.551599736672071, "learning_rate": 3.4516102775184206e-06, "loss": 0.1954, "step": 8559 }, { "epoch": 0.61, "grad_norm": 5.314450120501177, "learning_rate": 3.4505088601267913e-06, "loss": 0.5061, "step": 8560 }, { "epoch": 0.61, "grad_norm": 1.2392908694576763, "learning_rate": 3.4494075259056888e-06, "loss": 0.1457, "step": 8561 }, { "epoch": 0.61, "grad_norm": 1.4333085023945549, "learning_rate": 3.448306274914232e-06, "loss": 0.1876, "step": 8562 }, { "epoch": 0.61, "grad_norm": 1.2106012820665915, "learning_rate": 3.4472051072115285e-06, "loss": 0.1581, "step": 8563 }, { "epoch": 0.61, "grad_norm": 4.474993049547695, "learning_rate": 3.4461040228566865e-06, "loss": 0.4627, "step": 8564 }, { "epoch": 0.61, "grad_norm": 1.4390453027233296, "learning_rate": 3.445003021908806e-06, "loss": 0.1912, "step": 8565 }, { "epoch": 0.61, "grad_norm": 1.3546882474831952, "learning_rate": 3.4439021044269882e-06, "loss": 0.1652, "step": 8566 }, { "epoch": 0.61, "grad_norm": 5.299947803263112, "learning_rate": 3.442801270470321e-06, "loss": 0.7075, "step": 8567 }, { "epoch": 0.61, "grad_norm": 1.4338853630270099, "learning_rate": 3.4417005200978965e-06, "loss": 0.2169, "step": 8568 }, { "epoch": 0.61, "grad_norm": 5.374269909410642, "learning_rate": 3.4405998533687955e-06, "loss": 0.6094, "step": 8569 }, { "epoch": 0.61, "grad_norm": 1.330783331304729, "learning_rate": 3.4394992703421026e-06, "loss": 0.2077, "step": 8570 }, { "epoch": 0.61, "grad_norm": 1.362972339875966, "learning_rate": 3.4383987710768863e-06, "loss": 0.1908, "step": 8571 }, { "epoch": 0.61, "grad_norm": 1.2567323106023296, "learning_rate": 3.4372983556322213e-06, "loss": 0.1745, "step": 8572 }, { "epoch": 0.61, "grad_norm": 1.3441258617137768, "learning_rate": 3.4361980240671713e-06, "loss": 0.1963, "step": 8573 }, { "epoch": 0.61, "grad_norm": 1.3541682223384297, "learning_rate": 3.4350977764408005e-06, "loss": 0.1875, "step": 8574 }, { "epoch": 0.61, "grad_norm": 1.2827997444277321, "learning_rate": 3.433997612812162e-06, "loss": 0.2051, "step": 8575 }, { "epoch": 0.61, "grad_norm": 1.485093874537023, "learning_rate": 3.4328975332403105e-06, "loss": 0.2006, "step": 8576 }, { "epoch": 0.61, "grad_norm": 1.2770275800610154, "learning_rate": 3.431797537784295e-06, "loss": 0.1588, "step": 8577 }, { "epoch": 0.61, "grad_norm": 1.1916715099261135, "learning_rate": 3.4306976265031555e-06, "loss": 0.1652, "step": 8578 }, { "epoch": 0.61, "grad_norm": 1.0942575710213116, "learning_rate": 3.429597799455936e-06, "loss": 0.1474, "step": 8579 }, { "epoch": 0.61, "grad_norm": 1.170145344610598, "learning_rate": 3.428498056701665e-06, "loss": 0.173, "step": 8580 }, { "epoch": 0.61, "grad_norm": 1.1989052366242785, "learning_rate": 3.4273983982993765e-06, "loss": 0.1261, "step": 8581 }, { "epoch": 0.61, "grad_norm": 1.3334679788573782, "learning_rate": 3.426298824308093e-06, "loss": 0.158, "step": 8582 }, { "epoch": 0.61, "grad_norm": 1.3749311868200442, "learning_rate": 3.4251993347868396e-06, "loss": 0.1645, "step": 8583 }, { "epoch": 0.61, "grad_norm": 1.270133050243536, "learning_rate": 3.4240999297946266e-06, "loss": 0.1919, "step": 8584 }, { "epoch": 0.61, "grad_norm": 1.088293758333263, "learning_rate": 3.42300060939047e-06, "loss": 0.1448, "step": 8585 }, { "epoch": 0.61, "grad_norm": 1.3454301371616595, "learning_rate": 3.421901373633374e-06, "loss": 0.1789, "step": 8586 }, { "epoch": 0.61, "grad_norm": 1.3861888472450776, "learning_rate": 3.4208022225823455e-06, "loss": 0.1912, "step": 8587 }, { "epoch": 0.61, "grad_norm": 1.5241479967175833, "learning_rate": 3.4197031562963773e-06, "loss": 0.2261, "step": 8588 }, { "epoch": 0.61, "grad_norm": 1.354415413622633, "learning_rate": 3.4186041748344667e-06, "loss": 0.2034, "step": 8589 }, { "epoch": 0.61, "grad_norm": 1.3415751734943744, "learning_rate": 3.4175052782556004e-06, "loss": 0.1938, "step": 8590 }, { "epoch": 0.61, "grad_norm": 1.1854241162055552, "learning_rate": 3.416406466618767e-06, "loss": 0.1482, "step": 8591 }, { "epoch": 0.61, "grad_norm": 1.4344052298001966, "learning_rate": 3.4153077399829393e-06, "loss": 0.1928, "step": 8592 }, { "epoch": 0.61, "grad_norm": 1.446903210475065, "learning_rate": 3.4142090984070997e-06, "loss": 0.1816, "step": 8593 }, { "epoch": 0.61, "grad_norm": 1.2650657161103647, "learning_rate": 3.413110541950212e-06, "loss": 0.1924, "step": 8594 }, { "epoch": 0.61, "grad_norm": 1.34275825572772, "learning_rate": 3.41201207067125e-06, "loss": 0.1774, "step": 8595 }, { "epoch": 0.61, "grad_norm": 1.1968725439007109, "learning_rate": 3.4109136846291684e-06, "loss": 0.1655, "step": 8596 }, { "epoch": 0.61, "grad_norm": 1.3342252411708815, "learning_rate": 3.4098153838829297e-06, "loss": 0.1922, "step": 8597 }, { "epoch": 0.62, "grad_norm": 1.2585929207151196, "learning_rate": 3.4087171684914812e-06, "loss": 0.1854, "step": 8598 }, { "epoch": 0.62, "grad_norm": 1.2843500409183648, "learning_rate": 3.407619038513774e-06, "loss": 0.2041, "step": 8599 }, { "epoch": 0.62, "grad_norm": 1.3102132790374756, "learning_rate": 3.4065209940087507e-06, "loss": 0.164, "step": 8600 }, { "epoch": 0.62, "grad_norm": 1.2022603341184783, "learning_rate": 3.405423035035351e-06, "loss": 0.1469, "step": 8601 }, { "epoch": 0.62, "grad_norm": 1.2165682177119461, "learning_rate": 3.404325161652506e-06, "loss": 0.1853, "step": 8602 }, { "epoch": 0.62, "grad_norm": 1.3976224667382484, "learning_rate": 3.4032273739191484e-06, "loss": 0.1832, "step": 8603 }, { "epoch": 0.62, "grad_norm": 1.2073596262191757, "learning_rate": 3.4021296718942006e-06, "loss": 0.1825, "step": 8604 }, { "epoch": 0.62, "grad_norm": 1.5514805707547807, "learning_rate": 3.401032055636586e-06, "loss": 0.2075, "step": 8605 }, { "epoch": 0.62, "grad_norm": 1.3860784010625111, "learning_rate": 3.3999345252052162e-06, "loss": 0.1832, "step": 8606 }, { "epoch": 0.62, "grad_norm": 6.025647968863036, "learning_rate": 3.3988370806590054e-06, "loss": 0.6009, "step": 8607 }, { "epoch": 0.62, "grad_norm": 1.6252164696348075, "learning_rate": 3.3977397220568575e-06, "loss": 0.1905, "step": 8608 }, { "epoch": 0.62, "grad_norm": 1.2903631407541527, "learning_rate": 3.396642449457677e-06, "loss": 0.1854, "step": 8609 }, { "epoch": 0.62, "grad_norm": 1.2459143873617287, "learning_rate": 3.395545262920361e-06, "loss": 0.2184, "step": 8610 }, { "epoch": 0.62, "grad_norm": 1.364433543838583, "learning_rate": 3.3944481625037996e-06, "loss": 0.2071, "step": 8611 }, { "epoch": 0.62, "grad_norm": 1.2409111497032799, "learning_rate": 3.3933511482668834e-06, "loss": 0.1307, "step": 8612 }, { "epoch": 0.62, "grad_norm": 1.300492055125391, "learning_rate": 3.3922542202684934e-06, "loss": 0.1837, "step": 8613 }, { "epoch": 0.62, "grad_norm": 6.937449676392841, "learning_rate": 3.3911573785675117e-06, "loss": 0.6464, "step": 8614 }, { "epoch": 0.62, "grad_norm": 1.3565271169299857, "learning_rate": 3.3900606232228086e-06, "loss": 0.1765, "step": 8615 }, { "epoch": 0.62, "grad_norm": 1.276844932018481, "learning_rate": 3.3889639542932563e-06, "loss": 0.1798, "step": 8616 }, { "epoch": 0.62, "grad_norm": 1.3132890154316432, "learning_rate": 3.387867371837717e-06, "loss": 0.1746, "step": 8617 }, { "epoch": 0.62, "grad_norm": 1.374320787321818, "learning_rate": 3.3867708759150554e-06, "loss": 0.1991, "step": 8618 }, { "epoch": 0.62, "grad_norm": 1.3112896799821265, "learning_rate": 3.385674466584121e-06, "loss": 0.1821, "step": 8619 }, { "epoch": 0.62, "grad_norm": 1.6715102859251778, "learning_rate": 3.3845781439037695e-06, "loss": 0.2184, "step": 8620 }, { "epoch": 0.62, "grad_norm": 1.340800479331654, "learning_rate": 3.3834819079328446e-06, "loss": 0.1812, "step": 8621 }, { "epoch": 0.62, "grad_norm": 1.3325544478535725, "learning_rate": 3.3823857587301913e-06, "loss": 0.177, "step": 8622 }, { "epoch": 0.62, "grad_norm": 1.339146441401021, "learning_rate": 3.381289696354641e-06, "loss": 0.213, "step": 8623 }, { "epoch": 0.62, "grad_norm": 1.340317293636075, "learning_rate": 3.38019372086503e-06, "loss": 0.1515, "step": 8624 }, { "epoch": 0.62, "grad_norm": 1.4349589869108337, "learning_rate": 3.379097832320185e-06, "loss": 0.2018, "step": 8625 }, { "epoch": 0.62, "grad_norm": 1.4228090997176617, "learning_rate": 3.3780020307789303e-06, "loss": 0.1666, "step": 8626 }, { "epoch": 0.62, "grad_norm": 1.164790870175347, "learning_rate": 3.3769063163000803e-06, "loss": 0.1584, "step": 8627 }, { "epoch": 0.62, "grad_norm": 1.1100530915661122, "learning_rate": 3.3758106889424526e-06, "loss": 0.1557, "step": 8628 }, { "epoch": 0.62, "grad_norm": 1.5657948720855377, "learning_rate": 3.3747151487648533e-06, "loss": 0.1978, "step": 8629 }, { "epoch": 0.62, "grad_norm": 4.4045824313513, "learning_rate": 3.3736196958260902e-06, "loss": 0.6103, "step": 8630 }, { "epoch": 0.62, "grad_norm": 1.383326993430047, "learning_rate": 3.372524330184958e-06, "loss": 0.1916, "step": 8631 }, { "epoch": 0.62, "grad_norm": 1.3498457385592173, "learning_rate": 3.371429051900256e-06, "loss": 0.1817, "step": 8632 }, { "epoch": 0.62, "grad_norm": 1.2339949154542118, "learning_rate": 3.3703338610307707e-06, "loss": 0.1685, "step": 8633 }, { "epoch": 0.62, "grad_norm": 1.10639255004049, "learning_rate": 3.3692387576352914e-06, "loss": 0.1515, "step": 8634 }, { "epoch": 0.62, "grad_norm": 1.398060173340639, "learning_rate": 3.3681437417725954e-06, "loss": 0.1881, "step": 8635 }, { "epoch": 0.62, "grad_norm": 17.782464745225116, "learning_rate": 3.3670488135014613e-06, "loss": 0.5967, "step": 8636 }, { "epoch": 0.62, "grad_norm": 1.175138701246457, "learning_rate": 3.365953972880658e-06, "loss": 0.184, "step": 8637 }, { "epoch": 0.62, "grad_norm": 1.494031360321678, "learning_rate": 3.3648592199689567e-06, "loss": 0.195, "step": 8638 }, { "epoch": 0.62, "grad_norm": 1.3148881394671816, "learning_rate": 3.3637645548251137e-06, "loss": 0.1788, "step": 8639 }, { "epoch": 0.62, "grad_norm": 1.3346393964753769, "learning_rate": 3.3626699775078884e-06, "loss": 0.1711, "step": 8640 }, { "epoch": 0.62, "grad_norm": 1.2853281487364556, "learning_rate": 3.3615754880760358e-06, "loss": 0.1833, "step": 8641 }, { "epoch": 0.62, "grad_norm": 1.3891862089959266, "learning_rate": 3.360481086588301e-06, "loss": 0.1794, "step": 8642 }, { "epoch": 0.62, "grad_norm": 1.3623478850126345, "learning_rate": 3.3593867731034297e-06, "loss": 0.1668, "step": 8643 }, { "epoch": 0.62, "grad_norm": 1.3867393329906643, "learning_rate": 3.3582925476801563e-06, "loss": 0.1928, "step": 8644 }, { "epoch": 0.62, "grad_norm": 1.3041854721246153, "learning_rate": 3.3571984103772183e-06, "loss": 0.1728, "step": 8645 }, { "epoch": 0.62, "grad_norm": 1.1347276111334628, "learning_rate": 3.3561043612533416e-06, "loss": 0.1496, "step": 8646 }, { "epoch": 0.62, "grad_norm": 5.738822657597331, "learning_rate": 3.3550104003672545e-06, "loss": 0.6189, "step": 8647 }, { "epoch": 0.62, "grad_norm": 1.3465038860276453, "learning_rate": 3.353916527777672e-06, "loss": 0.1718, "step": 8648 }, { "epoch": 0.62, "grad_norm": 1.1751736808380955, "learning_rate": 3.3528227435433114e-06, "loss": 0.1571, "step": 8649 }, { "epoch": 0.62, "grad_norm": 1.2187797434548493, "learning_rate": 3.3517290477228804e-06, "loss": 0.1634, "step": 8650 }, { "epoch": 0.62, "grad_norm": 1.2430212514361907, "learning_rate": 3.3506354403750886e-06, "loss": 0.1888, "step": 8651 }, { "epoch": 0.62, "grad_norm": 1.4736642082777354, "learning_rate": 3.3495419215586324e-06, "loss": 0.1964, "step": 8652 }, { "epoch": 0.62, "grad_norm": 1.2958159435682504, "learning_rate": 3.348448491332209e-06, "loss": 0.1774, "step": 8653 }, { "epoch": 0.62, "grad_norm": 1.40179239589177, "learning_rate": 3.3473551497545087e-06, "loss": 0.2551, "step": 8654 }, { "epoch": 0.62, "grad_norm": 1.1538580964288585, "learning_rate": 3.3462618968842197e-06, "loss": 0.1865, "step": 8655 }, { "epoch": 0.62, "grad_norm": 1.364450028534952, "learning_rate": 3.345168732780021e-06, "loss": 0.18, "step": 8656 }, { "epoch": 0.62, "grad_norm": 1.3086104426993366, "learning_rate": 3.3440756575005905e-06, "loss": 0.1646, "step": 8657 }, { "epoch": 0.62, "grad_norm": 1.407745359289409, "learning_rate": 3.3429826711046e-06, "loss": 0.1852, "step": 8658 }, { "epoch": 0.62, "grad_norm": 1.2063331389397043, "learning_rate": 3.3418897736507184e-06, "loss": 0.1844, "step": 8659 }, { "epoch": 0.62, "grad_norm": 1.3126321331279238, "learning_rate": 3.3407969651976045e-06, "loss": 0.1606, "step": 8660 }, { "epoch": 0.62, "grad_norm": 5.789337471685698, "learning_rate": 3.3397042458039186e-06, "loss": 0.7628, "step": 8661 }, { "epoch": 0.62, "grad_norm": 1.3567526927523057, "learning_rate": 3.3386116155283122e-06, "loss": 0.1744, "step": 8662 }, { "epoch": 0.62, "grad_norm": 1.3635409368629408, "learning_rate": 3.3375190744294357e-06, "loss": 0.201, "step": 8663 }, { "epoch": 0.62, "grad_norm": 1.4102701693184723, "learning_rate": 3.3364266225659283e-06, "loss": 0.1877, "step": 8664 }, { "epoch": 0.62, "grad_norm": 1.3239105153571677, "learning_rate": 3.335334259996432e-06, "loss": 0.1661, "step": 8665 }, { "epoch": 0.62, "grad_norm": 7.037669634476579, "learning_rate": 3.3342419867795785e-06, "loss": 0.6535, "step": 8666 }, { "epoch": 0.62, "grad_norm": 1.258586904227841, "learning_rate": 3.333149802973999e-06, "loss": 0.1748, "step": 8667 }, { "epoch": 0.62, "grad_norm": 1.2303489922391702, "learning_rate": 3.3320577086383144e-06, "loss": 0.1665, "step": 8668 }, { "epoch": 0.62, "grad_norm": 6.951440224966758, "learning_rate": 3.330965703831146e-06, "loss": 0.6723, "step": 8669 }, { "epoch": 0.62, "grad_norm": 1.3095504626776195, "learning_rate": 3.3298737886111075e-06, "loss": 0.1728, "step": 8670 }, { "epoch": 0.62, "grad_norm": 1.412882263641033, "learning_rate": 3.3287819630368113e-06, "loss": 0.1597, "step": 8671 }, { "epoch": 0.62, "grad_norm": 1.3659364983316864, "learning_rate": 3.327690227166857e-06, "loss": 0.1822, "step": 8672 }, { "epoch": 0.62, "grad_norm": 1.3249393799676707, "learning_rate": 3.326598581059848e-06, "loss": 0.1927, "step": 8673 }, { "epoch": 0.62, "grad_norm": 1.3610729051344845, "learning_rate": 3.3255070247743802e-06, "loss": 0.1794, "step": 8674 }, { "epoch": 0.62, "grad_norm": 1.4451311227039474, "learning_rate": 3.324415558369042e-06, "loss": 0.1971, "step": 8675 }, { "epoch": 0.62, "grad_norm": 1.4213584204350374, "learning_rate": 3.323324181902422e-06, "loss": 0.1619, "step": 8676 }, { "epoch": 0.62, "grad_norm": 1.3907900192148737, "learning_rate": 3.322232895433096e-06, "loss": 0.1667, "step": 8677 }, { "epoch": 0.62, "grad_norm": 1.4097187806244211, "learning_rate": 3.3211416990196444e-06, "loss": 0.171, "step": 8678 }, { "epoch": 0.62, "grad_norm": 1.5090879791713012, "learning_rate": 3.3200505927206363e-06, "loss": 0.2063, "step": 8679 }, { "epoch": 0.62, "grad_norm": 1.336233840270013, "learning_rate": 3.3189595765946394e-06, "loss": 0.187, "step": 8680 }, { "epoch": 0.62, "grad_norm": 1.3438193215760068, "learning_rate": 3.3178686507002117e-06, "loss": 0.1925, "step": 8681 }, { "epoch": 0.62, "grad_norm": 1.5187105238626142, "learning_rate": 3.316777815095914e-06, "loss": 0.2203, "step": 8682 }, { "epoch": 0.62, "grad_norm": 1.1577282388810495, "learning_rate": 3.3156870698402942e-06, "loss": 0.1566, "step": 8683 }, { "epoch": 0.62, "grad_norm": 1.2500257738216274, "learning_rate": 3.3145964149919034e-06, "loss": 0.1431, "step": 8684 }, { "epoch": 0.62, "grad_norm": 1.3162291838810491, "learning_rate": 3.31350585060928e-06, "loss": 0.1863, "step": 8685 }, { "epoch": 0.62, "grad_norm": 1.2031501120221884, "learning_rate": 3.3124153767509626e-06, "loss": 0.1749, "step": 8686 }, { "epoch": 0.62, "grad_norm": 6.352768427778713, "learning_rate": 3.3113249934754823e-06, "loss": 0.5964, "step": 8687 }, { "epoch": 0.62, "grad_norm": 5.001087248451887, "learning_rate": 3.3102347008413703e-06, "loss": 0.6842, "step": 8688 }, { "epoch": 0.62, "grad_norm": 4.597846118714508, "learning_rate": 3.309144498907144e-06, "loss": 0.5868, "step": 8689 }, { "epoch": 0.62, "grad_norm": 4.566956700977014, "learning_rate": 3.3080543877313244e-06, "loss": 0.4193, "step": 8690 }, { "epoch": 0.62, "grad_norm": 5.827586582477459, "learning_rate": 3.306964367372423e-06, "loss": 0.6278, "step": 8691 }, { "epoch": 0.62, "grad_norm": 1.2530552735413754, "learning_rate": 3.30587443788895e-06, "loss": 0.1519, "step": 8692 }, { "epoch": 0.62, "grad_norm": 1.175532372256806, "learning_rate": 3.3047845993394047e-06, "loss": 0.1467, "step": 8693 }, { "epoch": 0.62, "grad_norm": 1.3687162231977779, "learning_rate": 3.30369485178229e-06, "loss": 0.1955, "step": 8694 }, { "epoch": 0.62, "grad_norm": 1.3478984479182248, "learning_rate": 3.3026051952760928e-06, "loss": 0.1635, "step": 8695 }, { "epoch": 0.62, "grad_norm": 1.3179963864441426, "learning_rate": 3.301515629879309e-06, "loss": 0.1966, "step": 8696 }, { "epoch": 0.62, "grad_norm": 1.293834634343991, "learning_rate": 3.3004261556504157e-06, "loss": 0.1319, "step": 8697 }, { "epoch": 0.62, "grad_norm": 5.408175906736706, "learning_rate": 3.299336772647897e-06, "loss": 0.5648, "step": 8698 }, { "epoch": 0.62, "grad_norm": 1.4764772893137355, "learning_rate": 3.2982474809302216e-06, "loss": 0.1832, "step": 8699 }, { "epoch": 0.62, "grad_norm": 1.3388818864796406, "learning_rate": 3.2971582805558622e-06, "loss": 0.1996, "step": 8700 }, { "epoch": 0.62, "grad_norm": 1.3668078187000845, "learning_rate": 3.2960691715832793e-06, "loss": 0.1624, "step": 8701 }, { "epoch": 0.62, "grad_norm": 1.3104181352541406, "learning_rate": 3.294980154070937e-06, "loss": 0.1753, "step": 8702 }, { "epoch": 0.62, "grad_norm": 1.4032214548601905, "learning_rate": 3.293891228077284e-06, "loss": 0.16, "step": 8703 }, { "epoch": 0.62, "grad_norm": 6.334905072313115, "learning_rate": 3.292802393660771e-06, "loss": 0.5789, "step": 8704 }, { "epoch": 0.62, "grad_norm": 1.2919248886937025, "learning_rate": 3.2917136508798452e-06, "loss": 0.1479, "step": 8705 }, { "epoch": 0.62, "grad_norm": 1.6307168402080758, "learning_rate": 3.2906249997929427e-06, "loss": 0.2102, "step": 8706 }, { "epoch": 0.62, "grad_norm": 1.338258011451746, "learning_rate": 3.289536440458501e-06, "loss": 0.1802, "step": 8707 }, { "epoch": 0.62, "grad_norm": 1.529269840966855, "learning_rate": 3.288447972934946e-06, "loss": 0.1868, "step": 8708 }, { "epoch": 0.62, "grad_norm": 1.4843884856604834, "learning_rate": 3.2873595972807054e-06, "loss": 0.1805, "step": 8709 }, { "epoch": 0.62, "grad_norm": 1.3975063464460078, "learning_rate": 3.286271313554196e-06, "loss": 0.2047, "step": 8710 }, { "epoch": 0.62, "grad_norm": 1.3884893827525335, "learning_rate": 3.2851831218138373e-06, "loss": 0.2142, "step": 8711 }, { "epoch": 0.62, "grad_norm": 1.3780908452096792, "learning_rate": 3.2840950221180336e-06, "loss": 0.1927, "step": 8712 }, { "epoch": 0.62, "grad_norm": 1.1841892506778477, "learning_rate": 3.2830070145251926e-06, "loss": 0.1427, "step": 8713 }, { "epoch": 0.62, "grad_norm": 1.2846183383816219, "learning_rate": 3.2819190990937134e-06, "loss": 0.1898, "step": 8714 }, { "epoch": 0.62, "grad_norm": 4.448947175387091, "learning_rate": 3.2808312758819926e-06, "loss": 0.5468, "step": 8715 }, { "epoch": 0.62, "grad_norm": 1.2113134088730473, "learning_rate": 3.2797435449484182e-06, "loss": 0.1835, "step": 8716 }, { "epoch": 0.62, "grad_norm": 1.2880639486876093, "learning_rate": 3.2786559063513763e-06, "loss": 0.1825, "step": 8717 }, { "epoch": 0.62, "grad_norm": 6.950657622118116, "learning_rate": 3.2775683601492457e-06, "loss": 0.4313, "step": 8718 }, { "epoch": 0.62, "grad_norm": 1.431133487427325, "learning_rate": 3.2764809064004055e-06, "loss": 0.2174, "step": 8719 }, { "epoch": 0.62, "grad_norm": 1.3023604295307964, "learning_rate": 3.27539354516322e-06, "loss": 0.1718, "step": 8720 }, { "epoch": 0.62, "grad_norm": 1.3631230861078312, "learning_rate": 3.2743062764960594e-06, "loss": 0.1796, "step": 8721 }, { "epoch": 0.62, "grad_norm": 1.4696938709677243, "learning_rate": 3.2732191004572806e-06, "loss": 0.2179, "step": 8722 }, { "epoch": 0.62, "grad_norm": 1.2570828363237014, "learning_rate": 3.272132017105242e-06, "loss": 0.1763, "step": 8723 }, { "epoch": 0.62, "grad_norm": 1.5099290606694302, "learning_rate": 3.2710450264982906e-06, "loss": 0.1718, "step": 8724 }, { "epoch": 0.62, "grad_norm": 1.276085479829146, "learning_rate": 3.2699581286947747e-06, "loss": 0.1577, "step": 8725 }, { "epoch": 0.62, "grad_norm": 1.253008245438098, "learning_rate": 3.268871323753032e-06, "loss": 0.1708, "step": 8726 }, { "epoch": 0.62, "grad_norm": 1.260099351378492, "learning_rate": 3.2677846117314016e-06, "loss": 0.179, "step": 8727 }, { "epoch": 0.62, "grad_norm": 1.287806581986106, "learning_rate": 3.266697992688209e-06, "loss": 0.1715, "step": 8728 }, { "epoch": 0.62, "grad_norm": 1.4057828993213628, "learning_rate": 3.265611466681784e-06, "loss": 0.2027, "step": 8729 }, { "epoch": 0.62, "grad_norm": 1.5198456269055516, "learning_rate": 3.2645250337704437e-06, "loss": 0.2214, "step": 8730 }, { "epoch": 0.62, "grad_norm": 5.544071585132708, "learning_rate": 3.2634386940125064e-06, "loss": 0.7532, "step": 8731 }, { "epoch": 0.62, "grad_norm": 1.3841289846779117, "learning_rate": 3.2623524474662792e-06, "loss": 0.1755, "step": 8732 }, { "epoch": 0.62, "grad_norm": 1.3763059404463562, "learning_rate": 3.2612662941900707e-06, "loss": 0.1913, "step": 8733 }, { "epoch": 0.62, "grad_norm": 1.2745271305190085, "learning_rate": 3.2601802342421784e-06, "loss": 0.1881, "step": 8734 }, { "epoch": 0.62, "grad_norm": 1.4221743138015968, "learning_rate": 3.2590942676809017e-06, "loss": 0.1853, "step": 8735 }, { "epoch": 0.62, "grad_norm": 1.1745237432487898, "learning_rate": 3.2580083945645264e-06, "loss": 0.1426, "step": 8736 }, { "epoch": 0.62, "grad_norm": 1.3106647306835637, "learning_rate": 3.2569226149513393e-06, "loss": 0.1523, "step": 8737 }, { "epoch": 0.63, "grad_norm": 1.2036955580314888, "learning_rate": 3.2558369288996226e-06, "loss": 0.1834, "step": 8738 }, { "epoch": 0.63, "grad_norm": 1.4457982528688325, "learning_rate": 3.2547513364676497e-06, "loss": 0.2283, "step": 8739 }, { "epoch": 0.63, "grad_norm": 1.1927022607588957, "learning_rate": 3.253665837713694e-06, "loss": 0.1373, "step": 8740 }, { "epoch": 0.63, "grad_norm": 1.4644863457255979, "learning_rate": 3.252580432696015e-06, "loss": 0.1791, "step": 8741 }, { "epoch": 0.63, "grad_norm": 1.2411593019173135, "learning_rate": 3.2514951214728787e-06, "loss": 0.1775, "step": 8742 }, { "epoch": 0.63, "grad_norm": 1.6312403767158474, "learning_rate": 3.250409904102536e-06, "loss": 0.2337, "step": 8743 }, { "epoch": 0.63, "grad_norm": 5.397134312988982, "learning_rate": 3.2493247806432403e-06, "loss": 0.7022, "step": 8744 }, { "epoch": 0.63, "grad_norm": 1.2072114269845415, "learning_rate": 3.248239751153235e-06, "loss": 0.1655, "step": 8745 }, { "epoch": 0.63, "grad_norm": 1.2428445910544852, "learning_rate": 3.24715481569076e-06, "loss": 0.1563, "step": 8746 }, { "epoch": 0.63, "grad_norm": 5.229613614904168, "learning_rate": 3.24606997431405e-06, "loss": 0.5724, "step": 8747 }, { "epoch": 0.63, "grad_norm": 1.4130183422064464, "learning_rate": 3.2449852270813386e-06, "loss": 0.158, "step": 8748 }, { "epoch": 0.63, "grad_norm": 1.2522743976032682, "learning_rate": 3.243900574050845e-06, "loss": 0.1919, "step": 8749 }, { "epoch": 0.63, "grad_norm": 1.3657312217598976, "learning_rate": 3.2428160152807934e-06, "loss": 0.183, "step": 8750 }, { "epoch": 0.63, "grad_norm": 1.2258537032764811, "learning_rate": 3.241731550829396e-06, "loss": 0.1916, "step": 8751 }, { "epoch": 0.63, "grad_norm": 1.284224399031125, "learning_rate": 3.240647180754866e-06, "loss": 0.1295, "step": 8752 }, { "epoch": 0.63, "grad_norm": 1.304655458964003, "learning_rate": 3.239562905115404e-06, "loss": 0.1539, "step": 8753 }, { "epoch": 0.63, "grad_norm": 1.0721800562513246, "learning_rate": 3.2384787239692115e-06, "loss": 0.1605, "step": 8754 }, { "epoch": 0.63, "grad_norm": 1.225840395998788, "learning_rate": 3.2373946373744826e-06, "loss": 0.1968, "step": 8755 }, { "epoch": 0.63, "grad_norm": 1.4181937716000355, "learning_rate": 3.236310645389409e-06, "loss": 0.1724, "step": 8756 }, { "epoch": 0.63, "grad_norm": 1.446415193838621, "learning_rate": 3.235226748072171e-06, "loss": 0.183, "step": 8757 }, { "epoch": 0.63, "grad_norm": 1.3321038927362396, "learning_rate": 3.2341429454809516e-06, "loss": 0.1614, "step": 8758 }, { "epoch": 0.63, "grad_norm": 1.2657181253538574, "learning_rate": 3.233059237673922e-06, "loss": 0.1661, "step": 8759 }, { "epoch": 0.63, "grad_norm": 1.3646892380370144, "learning_rate": 3.2319756247092552e-06, "loss": 0.1895, "step": 8760 }, { "epoch": 0.63, "grad_norm": 1.361351781035548, "learning_rate": 3.230892106645111e-06, "loss": 0.1674, "step": 8761 }, { "epoch": 0.63, "grad_norm": 1.199405732815531, "learning_rate": 3.229808683539651e-06, "loss": 0.1511, "step": 8762 }, { "epoch": 0.63, "grad_norm": 1.2769623701382478, "learning_rate": 3.2287253554510267e-06, "loss": 0.1514, "step": 8763 }, { "epoch": 0.63, "grad_norm": 1.5259361861573943, "learning_rate": 3.2276421224373912e-06, "loss": 0.1808, "step": 8764 }, { "epoch": 0.63, "grad_norm": 1.387472104348445, "learning_rate": 3.2265589845568835e-06, "loss": 0.1902, "step": 8765 }, { "epoch": 0.63, "grad_norm": 1.6739096738981114, "learning_rate": 3.225475941867644e-06, "loss": 0.1907, "step": 8766 }, { "epoch": 0.63, "grad_norm": 1.4426136569362213, "learning_rate": 3.2243929944278065e-06, "loss": 0.1627, "step": 8767 }, { "epoch": 0.63, "grad_norm": 1.365149131194602, "learning_rate": 3.2233101422954975e-06, "loss": 0.177, "step": 8768 }, { "epoch": 0.63, "grad_norm": 1.4075727220349132, "learning_rate": 3.2222273855288446e-06, "loss": 0.2027, "step": 8769 }, { "epoch": 0.63, "grad_norm": 1.4576376140699303, "learning_rate": 3.2211447241859606e-06, "loss": 0.2046, "step": 8770 }, { "epoch": 0.63, "grad_norm": 1.24032341729528, "learning_rate": 3.2200621583249613e-06, "loss": 0.1636, "step": 8771 }, { "epoch": 0.63, "grad_norm": 1.306733781425664, "learning_rate": 3.2189796880039535e-06, "loss": 0.1922, "step": 8772 }, { "epoch": 0.63, "grad_norm": 1.28233687522276, "learning_rate": 3.217897313281043e-06, "loss": 0.1702, "step": 8773 }, { "epoch": 0.63, "grad_norm": 1.3472196666251426, "learning_rate": 3.216815034214322e-06, "loss": 0.1856, "step": 8774 }, { "epoch": 0.63, "grad_norm": 1.563654025155306, "learning_rate": 3.2157328508618873e-06, "loss": 0.2125, "step": 8775 }, { "epoch": 0.63, "grad_norm": 1.3637711791766904, "learning_rate": 3.2146507632818237e-06, "loss": 0.1681, "step": 8776 }, { "epoch": 0.63, "grad_norm": 1.5331361845218694, "learning_rate": 3.213568771532216e-06, "loss": 0.2084, "step": 8777 }, { "epoch": 0.63, "grad_norm": 1.3370895010188608, "learning_rate": 3.212486875671137e-06, "loss": 0.1673, "step": 8778 }, { "epoch": 0.63, "grad_norm": 1.4181346824719638, "learning_rate": 3.2114050757566627e-06, "loss": 0.2232, "step": 8779 }, { "epoch": 0.63, "grad_norm": 1.160070264111628, "learning_rate": 3.2103233718468574e-06, "loss": 0.1576, "step": 8780 }, { "epoch": 0.63, "grad_norm": 1.2071853737726663, "learning_rate": 3.2092417639997853e-06, "loss": 0.1783, "step": 8781 }, { "epoch": 0.63, "grad_norm": 1.3269724623284693, "learning_rate": 3.2081602522734987e-06, "loss": 0.2258, "step": 8782 }, { "epoch": 0.63, "grad_norm": 1.5556833384652586, "learning_rate": 3.207078836726053e-06, "loss": 0.2007, "step": 8783 }, { "epoch": 0.63, "grad_norm": 4.779378576324171, "learning_rate": 3.20599751741549e-06, "loss": 0.5426, "step": 8784 }, { "epoch": 0.63, "grad_norm": 1.2270274943483026, "learning_rate": 3.2049162943998563e-06, "loss": 0.1742, "step": 8785 }, { "epoch": 0.63, "grad_norm": 4.729921937651621, "learning_rate": 3.203835167737182e-06, "loss": 0.5412, "step": 8786 }, { "epoch": 0.63, "grad_norm": 1.253131238926825, "learning_rate": 3.202754137485501e-06, "loss": 0.1575, "step": 8787 }, { "epoch": 0.63, "grad_norm": 1.2806861899280495, "learning_rate": 3.2016732037028375e-06, "loss": 0.1808, "step": 8788 }, { "epoch": 0.63, "grad_norm": 1.3960081756921914, "learning_rate": 3.2005923664472134e-06, "loss": 0.2011, "step": 8789 }, { "epoch": 0.63, "grad_norm": 5.287404793138487, "learning_rate": 3.19951162577664e-06, "loss": 0.5753, "step": 8790 }, { "epoch": 0.63, "grad_norm": 1.420283866307087, "learning_rate": 3.1984309817491327e-06, "loss": 0.191, "step": 8791 }, { "epoch": 0.63, "grad_norm": 1.2611759945066752, "learning_rate": 3.197350434422689e-06, "loss": 0.1842, "step": 8792 }, { "epoch": 0.63, "grad_norm": 1.5605813478108956, "learning_rate": 3.1962699838553164e-06, "loss": 0.2045, "step": 8793 }, { "epoch": 0.63, "grad_norm": 1.3841780143079696, "learning_rate": 3.1951896301050022e-06, "loss": 0.2224, "step": 8794 }, { "epoch": 0.63, "grad_norm": 1.3826302719958257, "learning_rate": 3.1941093732297412e-06, "loss": 0.2181, "step": 8795 }, { "epoch": 0.63, "grad_norm": 6.0323033810152245, "learning_rate": 3.193029213287513e-06, "loss": 0.5028, "step": 8796 }, { "epoch": 0.63, "grad_norm": 1.4614444225235361, "learning_rate": 3.191949150336298e-06, "loss": 0.1926, "step": 8797 }, { "epoch": 0.63, "grad_norm": 1.2397674469683848, "learning_rate": 3.190869184434069e-06, "loss": 0.1651, "step": 8798 }, { "epoch": 0.63, "grad_norm": 1.2515749926056632, "learning_rate": 3.189789315638797e-06, "loss": 0.1915, "step": 8799 }, { "epoch": 0.63, "grad_norm": 1.2018222616851735, "learning_rate": 3.1887095440084402e-06, "loss": 0.1902, "step": 8800 }, { "epoch": 0.63, "grad_norm": 1.2495030326546441, "learning_rate": 3.187629869600959e-06, "loss": 0.2035, "step": 8801 }, { "epoch": 0.63, "grad_norm": 1.485802813696995, "learning_rate": 3.18655029247431e-06, "loss": 0.1884, "step": 8802 }, { "epoch": 0.63, "grad_norm": 1.2097727020580316, "learning_rate": 3.185470812686433e-06, "loss": 0.1844, "step": 8803 }, { "epoch": 0.63, "grad_norm": 7.424383783767484, "learning_rate": 3.1843914302952774e-06, "loss": 0.7474, "step": 8804 }, { "epoch": 0.63, "grad_norm": 1.2873612395061638, "learning_rate": 3.183312145358774e-06, "loss": 0.1994, "step": 8805 }, { "epoch": 0.63, "grad_norm": 1.6001685767150289, "learning_rate": 3.1822329579348586e-06, "loss": 0.2102, "step": 8806 }, { "epoch": 0.63, "grad_norm": 1.371527594517568, "learning_rate": 3.181153868081456e-06, "loss": 0.1673, "step": 8807 }, { "epoch": 0.63, "grad_norm": 1.1593608057837652, "learning_rate": 3.180074875856489e-06, "loss": 0.1435, "step": 8808 }, { "epoch": 0.63, "grad_norm": 6.182849996743878, "learning_rate": 3.1789959813178704e-06, "loss": 0.6055, "step": 8809 }, { "epoch": 0.63, "grad_norm": 1.2528606345333098, "learning_rate": 3.1779171845235144e-06, "loss": 0.1618, "step": 8810 }, { "epoch": 0.63, "grad_norm": 1.3444459482260436, "learning_rate": 3.176838485531325e-06, "loss": 0.1766, "step": 8811 }, { "epoch": 0.63, "grad_norm": 1.4907913264859274, "learning_rate": 3.175759884399203e-06, "loss": 0.1859, "step": 8812 }, { "epoch": 0.63, "grad_norm": 1.210942553801653, "learning_rate": 3.1746813811850424e-06, "loss": 0.1732, "step": 8813 }, { "epoch": 0.63, "grad_norm": 1.4356574807055744, "learning_rate": 3.1736029759467345e-06, "loss": 0.2085, "step": 8814 }, { "epoch": 0.63, "grad_norm": 1.274379200816446, "learning_rate": 3.1725246687421616e-06, "loss": 0.1634, "step": 8815 }, { "epoch": 0.63, "grad_norm": 1.398396826892128, "learning_rate": 3.1714464596292065e-06, "loss": 0.1837, "step": 8816 }, { "epoch": 0.63, "grad_norm": 1.2668053272079471, "learning_rate": 3.170368348665739e-06, "loss": 0.1708, "step": 8817 }, { "epoch": 0.63, "grad_norm": 1.430912822381253, "learning_rate": 3.1692903359096305e-06, "loss": 0.1973, "step": 8818 }, { "epoch": 0.63, "grad_norm": 1.2718128796216763, "learning_rate": 3.1682124214187427e-06, "loss": 0.1467, "step": 8819 }, { "epoch": 0.63, "grad_norm": 1.2482868574170807, "learning_rate": 3.167134605250938e-06, "loss": 0.1706, "step": 8820 }, { "epoch": 0.63, "grad_norm": 4.452240409976205, "learning_rate": 3.1660568874640627e-06, "loss": 0.4341, "step": 8821 }, { "epoch": 0.63, "grad_norm": 5.8710785984880625, "learning_rate": 3.164979268115969e-06, "loss": 0.6081, "step": 8822 }, { "epoch": 0.63, "grad_norm": 1.4718850313100704, "learning_rate": 3.1639017472644975e-06, "loss": 0.1868, "step": 8823 }, { "epoch": 0.63, "grad_norm": 1.460272002991077, "learning_rate": 3.162824324967487e-06, "loss": 0.2029, "step": 8824 }, { "epoch": 0.63, "grad_norm": 1.3050281730843296, "learning_rate": 3.1617470012827674e-06, "loss": 0.2015, "step": 8825 }, { "epoch": 0.63, "grad_norm": 1.3304566485991751, "learning_rate": 3.160669776268166e-06, "loss": 0.1735, "step": 8826 }, { "epoch": 0.63, "grad_norm": 1.2428681803698354, "learning_rate": 3.159592649981502e-06, "loss": 0.2113, "step": 8827 }, { "epoch": 0.63, "grad_norm": 1.2991718587101573, "learning_rate": 3.1585156224805964e-06, "loss": 0.1869, "step": 8828 }, { "epoch": 0.63, "grad_norm": 1.1840332850568884, "learning_rate": 3.157438693823254e-06, "loss": 0.1346, "step": 8829 }, { "epoch": 0.63, "grad_norm": 1.4375418467726195, "learning_rate": 3.156361864067283e-06, "loss": 0.2102, "step": 8830 }, { "epoch": 0.63, "grad_norm": 1.2793589348710266, "learning_rate": 3.1552851332704816e-06, "loss": 0.1572, "step": 8831 }, { "epoch": 0.63, "grad_norm": 1.2140866881572394, "learning_rate": 3.1542085014906456e-06, "loss": 0.1692, "step": 8832 }, { "epoch": 0.63, "grad_norm": 5.3033030527223115, "learning_rate": 3.1531319687855666e-06, "loss": 0.5462, "step": 8833 }, { "epoch": 0.63, "grad_norm": 1.2811081884191098, "learning_rate": 3.1520555352130246e-06, "loss": 0.1791, "step": 8834 }, { "epoch": 0.63, "grad_norm": 1.2849381569596956, "learning_rate": 3.1509792008308003e-06, "loss": 0.1473, "step": 8835 }, { "epoch": 0.63, "grad_norm": 1.1820783798089056, "learning_rate": 3.149902965696666e-06, "loss": 0.1445, "step": 8836 }, { "epoch": 0.63, "grad_norm": 1.495352450514506, "learning_rate": 3.148826829868392e-06, "loss": 0.1934, "step": 8837 }, { "epoch": 0.63, "grad_norm": 1.4403379489320527, "learning_rate": 3.147750793403739e-06, "loss": 0.2087, "step": 8838 }, { "epoch": 0.63, "grad_norm": 1.5206568211527367, "learning_rate": 3.1466748563604643e-06, "loss": 0.1702, "step": 8839 }, { "epoch": 0.63, "grad_norm": 1.301550706919972, "learning_rate": 3.14559901879632e-06, "loss": 0.1461, "step": 8840 }, { "epoch": 0.63, "grad_norm": 1.372285156932272, "learning_rate": 3.144523280769055e-06, "loss": 0.1758, "step": 8841 }, { "epoch": 0.63, "grad_norm": 1.2554276442076213, "learning_rate": 3.143447642336407e-06, "loss": 0.1796, "step": 8842 }, { "epoch": 0.63, "grad_norm": 1.4685929166706162, "learning_rate": 3.142372103556115e-06, "loss": 0.177, "step": 8843 }, { "epoch": 0.63, "grad_norm": 1.4564830910823923, "learning_rate": 3.1412966644859073e-06, "loss": 0.2193, "step": 8844 }, { "epoch": 0.63, "grad_norm": 1.3098117723453786, "learning_rate": 3.1402213251835124e-06, "loss": 0.1788, "step": 8845 }, { "epoch": 0.63, "grad_norm": 1.422002744513389, "learning_rate": 3.1391460857066465e-06, "loss": 0.1999, "step": 8846 }, { "epoch": 0.63, "grad_norm": 1.3161119472032723, "learning_rate": 3.1380709461130267e-06, "loss": 0.1653, "step": 8847 }, { "epoch": 0.63, "grad_norm": 1.1830774487804931, "learning_rate": 3.1369959064603607e-06, "loss": 0.1839, "step": 8848 }, { "epoch": 0.63, "grad_norm": 1.3196350012228997, "learning_rate": 3.135920966806355e-06, "loss": 0.2055, "step": 8849 }, { "epoch": 0.63, "grad_norm": 1.260750789462727, "learning_rate": 3.134846127208704e-06, "loss": 0.151, "step": 8850 }, { "epoch": 0.63, "grad_norm": 1.1997800540004422, "learning_rate": 3.1337713877251043e-06, "loss": 0.1551, "step": 8851 }, { "epoch": 0.63, "grad_norm": 5.3036874147466, "learning_rate": 3.132696748413241e-06, "loss": 0.5738, "step": 8852 }, { "epoch": 0.63, "grad_norm": 1.155824888330129, "learning_rate": 3.1316222093308e-06, "loss": 0.17, "step": 8853 }, { "epoch": 0.63, "grad_norm": 1.3491935745093266, "learning_rate": 3.1305477705354546e-06, "loss": 0.1626, "step": 8854 }, { "epoch": 0.63, "grad_norm": 6.725674005346951, "learning_rate": 3.1294734320848787e-06, "loss": 0.4747, "step": 8855 }, { "epoch": 0.63, "grad_norm": 1.3706054944490205, "learning_rate": 3.1283991940367365e-06, "loss": 0.1614, "step": 8856 }, { "epoch": 0.63, "grad_norm": 1.3957512667871042, "learning_rate": 3.1273250564486924e-06, "loss": 0.2073, "step": 8857 }, { "epoch": 0.63, "grad_norm": 1.1989228552822873, "learning_rate": 3.1262510193783966e-06, "loss": 0.154, "step": 8858 }, { "epoch": 0.63, "grad_norm": 1.387621023597622, "learning_rate": 3.1251770828835043e-06, "loss": 0.1983, "step": 8859 }, { "epoch": 0.63, "grad_norm": 1.2702175449365516, "learning_rate": 3.1241032470216564e-06, "loss": 0.2008, "step": 8860 }, { "epoch": 0.63, "grad_norm": 1.3295065603780214, "learning_rate": 3.123029511850496e-06, "loss": 0.2183, "step": 8861 }, { "epoch": 0.63, "grad_norm": 1.4504848165717281, "learning_rate": 3.121955877427652e-06, "loss": 0.198, "step": 8862 }, { "epoch": 0.63, "grad_norm": 1.3593693491593615, "learning_rate": 3.1208823438107567e-06, "loss": 0.1876, "step": 8863 }, { "epoch": 0.63, "grad_norm": 5.426057254864369, "learning_rate": 3.119808911057431e-06, "loss": 0.5131, "step": 8864 }, { "epoch": 0.63, "grad_norm": 1.1970410910589355, "learning_rate": 3.118735579225293e-06, "loss": 0.1796, "step": 8865 }, { "epoch": 0.63, "grad_norm": 1.3472276644147088, "learning_rate": 3.1176623483719576e-06, "loss": 0.2035, "step": 8866 }, { "epoch": 0.63, "grad_norm": 1.5903488520738773, "learning_rate": 3.1165892185550272e-06, "loss": 0.1749, "step": 8867 }, { "epoch": 0.63, "grad_norm": 5.340465416073398, "learning_rate": 3.1155161898321064e-06, "loss": 0.573, "step": 8868 }, { "epoch": 0.63, "grad_norm": 1.2482834591975278, "learning_rate": 3.1144432622607877e-06, "loss": 0.1697, "step": 8869 }, { "epoch": 0.63, "grad_norm": 1.3581271568406463, "learning_rate": 3.1133704358986674e-06, "loss": 0.1806, "step": 8870 }, { "epoch": 0.63, "grad_norm": 1.3667894662750992, "learning_rate": 3.1122977108033237e-06, "loss": 0.19, "step": 8871 }, { "epoch": 0.63, "grad_norm": 1.2702473398470158, "learning_rate": 3.1112250870323406e-06, "loss": 0.1988, "step": 8872 }, { "epoch": 0.63, "grad_norm": 1.1920508736353224, "learning_rate": 3.110152564643291e-06, "loss": 0.1595, "step": 8873 }, { "epoch": 0.63, "grad_norm": 1.3842444809170014, "learning_rate": 3.1090801436937446e-06, "loss": 0.1713, "step": 8874 }, { "epoch": 0.63, "grad_norm": 5.170675181142841, "learning_rate": 3.1080078242412627e-06, "loss": 0.6643, "step": 8875 }, { "epoch": 0.63, "grad_norm": 1.3145728982252256, "learning_rate": 3.106935606343404e-06, "loss": 0.1493, "step": 8876 }, { "epoch": 0.63, "grad_norm": 1.3405440973322789, "learning_rate": 3.1058634900577216e-06, "loss": 0.1683, "step": 8877 }, { "epoch": 0.64, "grad_norm": 1.3068131161695118, "learning_rate": 3.104791475441764e-06, "loss": 0.1933, "step": 8878 }, { "epoch": 0.64, "grad_norm": 1.3068759017012703, "learning_rate": 3.103719562553068e-06, "loss": 0.1948, "step": 8879 }, { "epoch": 0.64, "grad_norm": 1.6389880783838642, "learning_rate": 3.102647751449174e-06, "loss": 0.2163, "step": 8880 }, { "epoch": 0.64, "grad_norm": 1.2874779699311212, "learning_rate": 3.1015760421876095e-06, "loss": 0.1876, "step": 8881 }, { "epoch": 0.64, "grad_norm": 1.1211522667827096, "learning_rate": 3.100504434825903e-06, "loss": 0.1754, "step": 8882 }, { "epoch": 0.64, "grad_norm": 1.30324175203119, "learning_rate": 3.0994329294215707e-06, "loss": 0.1571, "step": 8883 }, { "epoch": 0.64, "grad_norm": 1.2801131400738004, "learning_rate": 3.0983615260321286e-06, "loss": 0.1778, "step": 8884 }, { "epoch": 0.64, "grad_norm": 1.3919939624156619, "learning_rate": 3.097290224715084e-06, "loss": 0.2051, "step": 8885 }, { "epoch": 0.64, "grad_norm": 1.4459454853178553, "learning_rate": 3.0962190255279433e-06, "loss": 0.1754, "step": 8886 }, { "epoch": 0.64, "grad_norm": 1.246065239189784, "learning_rate": 3.0951479285282005e-06, "loss": 0.1826, "step": 8887 }, { "epoch": 0.64, "grad_norm": 1.2788513383978244, "learning_rate": 3.094076933773349e-06, "loss": 0.1759, "step": 8888 }, { "epoch": 0.64, "grad_norm": 1.2755330168232222, "learning_rate": 3.093006041320876e-06, "loss": 0.1957, "step": 8889 }, { "epoch": 0.64, "grad_norm": 1.2536842737368088, "learning_rate": 3.091935251228264e-06, "loss": 0.1815, "step": 8890 }, { "epoch": 0.64, "grad_norm": 1.348033848778666, "learning_rate": 3.0908645635529857e-06, "loss": 0.1638, "step": 8891 }, { "epoch": 0.64, "grad_norm": 1.2771167337999285, "learning_rate": 3.0897939783525156e-06, "loss": 0.1625, "step": 8892 }, { "epoch": 0.64, "grad_norm": 1.297569785907774, "learning_rate": 3.0887234956843115e-06, "loss": 0.1791, "step": 8893 }, { "epoch": 0.64, "grad_norm": 7.348054492099745, "learning_rate": 3.0876531156058407e-06, "loss": 0.631, "step": 8894 }, { "epoch": 0.64, "grad_norm": 1.3612267561762736, "learning_rate": 3.0865828381745515e-06, "loss": 0.1778, "step": 8895 }, { "epoch": 0.64, "grad_norm": 1.4527640516674205, "learning_rate": 3.085512663447894e-06, "loss": 0.2174, "step": 8896 }, { "epoch": 0.64, "grad_norm": 1.3765184528476222, "learning_rate": 3.0844425914833133e-06, "loss": 0.1893, "step": 8897 }, { "epoch": 0.64, "grad_norm": 4.798712977944269, "learning_rate": 3.0833726223382397e-06, "loss": 0.712, "step": 8898 }, { "epoch": 0.64, "grad_norm": 1.2591647315777175, "learning_rate": 3.0823027560701135e-06, "loss": 0.2076, "step": 8899 }, { "epoch": 0.64, "grad_norm": 5.487563793603447, "learning_rate": 3.081232992736355e-06, "loss": 0.5308, "step": 8900 }, { "epoch": 0.64, "grad_norm": 1.3901669595080166, "learning_rate": 3.080163332394388e-06, "loss": 0.2091, "step": 8901 }, { "epoch": 0.64, "grad_norm": 1.342217962347694, "learning_rate": 3.0790937751016243e-06, "loss": 0.1731, "step": 8902 }, { "epoch": 0.64, "grad_norm": 1.5637813921643717, "learning_rate": 3.0780243209154763e-06, "loss": 0.1932, "step": 8903 }, { "epoch": 0.64, "grad_norm": 1.317242390607728, "learning_rate": 3.0769549698933465e-06, "loss": 0.2203, "step": 8904 }, { "epoch": 0.64, "grad_norm": 1.3878957543756512, "learning_rate": 3.075885722092635e-06, "loss": 0.1745, "step": 8905 }, { "epoch": 0.64, "grad_norm": 1.1348643222694037, "learning_rate": 3.0748165775707334e-06, "loss": 0.151, "step": 8906 }, { "epoch": 0.64, "grad_norm": 1.1849760355659502, "learning_rate": 3.07374753638503e-06, "loss": 0.1486, "step": 8907 }, { "epoch": 0.64, "grad_norm": 1.377211838259763, "learning_rate": 3.0726785985929048e-06, "loss": 0.186, "step": 8908 }, { "epoch": 0.64, "grad_norm": 1.5084746512590044, "learning_rate": 3.071609764251738e-06, "loss": 0.1906, "step": 8909 }, { "epoch": 0.64, "grad_norm": 1.3346883548232737, "learning_rate": 3.0705410334188957e-06, "loss": 0.1808, "step": 8910 }, { "epoch": 0.64, "grad_norm": 1.3721942395043114, "learning_rate": 3.069472406151747e-06, "loss": 0.1798, "step": 8911 }, { "epoch": 0.64, "grad_norm": 1.3603723918144288, "learning_rate": 3.068403882507648e-06, "loss": 0.1841, "step": 8912 }, { "epoch": 0.64, "grad_norm": 1.3566573889379783, "learning_rate": 3.0673354625439577e-06, "loss": 0.1915, "step": 8913 }, { "epoch": 0.64, "grad_norm": 1.3970179252006574, "learning_rate": 3.066267146318019e-06, "loss": 0.2056, "step": 8914 }, { "epoch": 0.64, "grad_norm": 1.3502037299082505, "learning_rate": 3.065198933887179e-06, "loss": 0.1861, "step": 8915 }, { "epoch": 0.64, "grad_norm": 1.3451761324297506, "learning_rate": 3.0641308253087722e-06, "loss": 0.2042, "step": 8916 }, { "epoch": 0.64, "grad_norm": 4.2358359870780635, "learning_rate": 3.0630628206401347e-06, "loss": 0.4907, "step": 8917 }, { "epoch": 0.64, "grad_norm": 1.2881979292596126, "learning_rate": 3.0619949199385866e-06, "loss": 0.1797, "step": 8918 }, { "epoch": 0.64, "grad_norm": 1.1577687135728536, "learning_rate": 3.0609271232614545e-06, "loss": 0.1747, "step": 8919 }, { "epoch": 0.64, "grad_norm": 1.2601056733692304, "learning_rate": 3.059859430666049e-06, "loss": 0.1951, "step": 8920 }, { "epoch": 0.64, "grad_norm": 1.5329657652634363, "learning_rate": 3.0587918422096835e-06, "loss": 0.1786, "step": 8921 }, { "epoch": 0.64, "grad_norm": 1.242628099751199, "learning_rate": 3.0577243579496583e-06, "loss": 0.1623, "step": 8922 }, { "epoch": 0.64, "grad_norm": 1.6859495916837106, "learning_rate": 3.056656977943274e-06, "loss": 0.2316, "step": 8923 }, { "epoch": 0.64, "grad_norm": 1.3299118202892588, "learning_rate": 3.0555897022478217e-06, "loss": 0.1731, "step": 8924 }, { "epoch": 0.64, "grad_norm": 1.1911641552700072, "learning_rate": 3.0545225309205918e-06, "loss": 0.1615, "step": 8925 }, { "epoch": 0.64, "grad_norm": 1.2296697063326794, "learning_rate": 3.0534554640188608e-06, "loss": 0.1558, "step": 8926 }, { "epoch": 0.64, "grad_norm": 1.3038618299436948, "learning_rate": 3.0523885015999076e-06, "loss": 0.1616, "step": 8927 }, { "epoch": 0.64, "grad_norm": 1.2461747968884576, "learning_rate": 3.051321643721003e-06, "loss": 0.1729, "step": 8928 }, { "epoch": 0.64, "grad_norm": 1.3000685290478593, "learning_rate": 3.05025489043941e-06, "loss": 0.1548, "step": 8929 }, { "epoch": 0.64, "grad_norm": 1.209858723154252, "learning_rate": 3.049188241812391e-06, "loss": 0.1698, "step": 8930 }, { "epoch": 0.64, "grad_norm": 1.3707010463983653, "learning_rate": 3.0481216978971937e-06, "loss": 0.214, "step": 8931 }, { "epoch": 0.64, "grad_norm": 1.3565091191272802, "learning_rate": 3.0470552587510714e-06, "loss": 0.1824, "step": 8932 }, { "epoch": 0.64, "grad_norm": 8.86158429004687, "learning_rate": 3.0459889244312636e-06, "loss": 0.6688, "step": 8933 }, { "epoch": 0.64, "grad_norm": 1.2400955520526493, "learning_rate": 3.0449226949950086e-06, "loss": 0.1771, "step": 8934 }, { "epoch": 0.64, "grad_norm": 1.4074892990993473, "learning_rate": 3.0438565704995343e-06, "loss": 0.2203, "step": 8935 }, { "epoch": 0.64, "grad_norm": 1.2586931432721307, "learning_rate": 3.0427905510020693e-06, "loss": 0.1606, "step": 8936 }, { "epoch": 0.64, "grad_norm": 8.64271705484394, "learning_rate": 3.041724636559831e-06, "loss": 0.5625, "step": 8937 }, { "epoch": 0.64, "grad_norm": 1.35176895919859, "learning_rate": 3.0406588272300357e-06, "loss": 0.1713, "step": 8938 }, { "epoch": 0.64, "grad_norm": 1.2083590681503207, "learning_rate": 3.0395931230698893e-06, "loss": 0.1327, "step": 8939 }, { "epoch": 0.64, "grad_norm": 1.2007659454264854, "learning_rate": 3.0385275241365965e-06, "loss": 0.1602, "step": 8940 }, { "epoch": 0.64, "grad_norm": 1.5824947839303294, "learning_rate": 3.0374620304873525e-06, "loss": 0.2264, "step": 8941 }, { "epoch": 0.64, "grad_norm": 1.3038507737673648, "learning_rate": 3.0363966421793518e-06, "loss": 0.1803, "step": 8942 }, { "epoch": 0.64, "grad_norm": 1.4668400931674868, "learning_rate": 3.0353313592697765e-06, "loss": 0.1921, "step": 8943 }, { "epoch": 0.64, "grad_norm": 1.3165143140182756, "learning_rate": 3.03426618181581e-06, "loss": 0.1989, "step": 8944 }, { "epoch": 0.64, "grad_norm": 1.4032072675005498, "learning_rate": 3.033201109874624e-06, "loss": 0.1965, "step": 8945 }, { "epoch": 0.64, "grad_norm": 1.1787711933468785, "learning_rate": 3.032136143503391e-06, "loss": 0.1651, "step": 8946 }, { "epoch": 0.64, "grad_norm": 5.207891212759981, "learning_rate": 3.0310712827592696e-06, "loss": 0.5839, "step": 8947 }, { "epoch": 0.64, "grad_norm": 1.432004812184872, "learning_rate": 3.030006527699421e-06, "loss": 0.2086, "step": 8948 }, { "epoch": 0.64, "grad_norm": 1.3290496851023192, "learning_rate": 3.028941878380994e-06, "loss": 0.1378, "step": 8949 }, { "epoch": 0.64, "grad_norm": 1.317892079568536, "learning_rate": 3.027877334861139e-06, "loss": 0.1445, "step": 8950 }, { "epoch": 0.64, "grad_norm": 1.2476046313781441, "learning_rate": 3.026812897196992e-06, "loss": 0.1833, "step": 8951 }, { "epoch": 0.64, "grad_norm": 1.3325306475077288, "learning_rate": 3.0257485654456897e-06, "loss": 0.1937, "step": 8952 }, { "epoch": 0.64, "grad_norm": 1.3724534163945883, "learning_rate": 3.024684339664361e-06, "loss": 0.1922, "step": 8953 }, { "epoch": 0.64, "grad_norm": 1.341381314770079, "learning_rate": 3.0236202199101313e-06, "loss": 0.1858, "step": 8954 }, { "epoch": 0.64, "grad_norm": 1.260299981318571, "learning_rate": 3.0225562062401138e-06, "loss": 0.152, "step": 8955 }, { "epoch": 0.64, "grad_norm": 1.4059760816998603, "learning_rate": 3.0214922987114247e-06, "loss": 0.1754, "step": 8956 }, { "epoch": 0.64, "grad_norm": 5.527796990144992, "learning_rate": 3.020428497381168e-06, "loss": 0.5417, "step": 8957 }, { "epoch": 0.64, "grad_norm": 1.242402686040406, "learning_rate": 3.019364802306447e-06, "loss": 0.1606, "step": 8958 }, { "epoch": 0.64, "grad_norm": 1.2048144410810482, "learning_rate": 3.0183012135443525e-06, "loss": 0.1609, "step": 8959 }, { "epoch": 0.64, "grad_norm": 1.3517809032625816, "learning_rate": 3.017237731151976e-06, "loss": 0.1691, "step": 8960 }, { "epoch": 0.64, "grad_norm": 1.2806943695435553, "learning_rate": 3.016174355186402e-06, "loss": 0.1759, "step": 8961 }, { "epoch": 0.64, "grad_norm": 1.2983962180398247, "learning_rate": 3.015111085704707e-06, "loss": 0.1902, "step": 8962 }, { "epoch": 0.64, "grad_norm": 1.4696046077114788, "learning_rate": 3.0140479227639643e-06, "loss": 0.2091, "step": 8963 }, { "epoch": 0.64, "grad_norm": 1.273598698202574, "learning_rate": 3.012984866421238e-06, "loss": 0.1679, "step": 8964 }, { "epoch": 0.64, "grad_norm": 1.2531709262376356, "learning_rate": 3.0119219167335913e-06, "loss": 0.1624, "step": 8965 }, { "epoch": 0.64, "grad_norm": 1.314800932334077, "learning_rate": 3.010859073758076e-06, "loss": 0.1699, "step": 8966 }, { "epoch": 0.64, "grad_norm": 1.346193728445068, "learning_rate": 3.009796337551746e-06, "loss": 0.1526, "step": 8967 }, { "epoch": 0.64, "grad_norm": 1.36693171164487, "learning_rate": 3.0087337081716393e-06, "loss": 0.1807, "step": 8968 }, { "epoch": 0.64, "grad_norm": 5.5239613128363265, "learning_rate": 3.0076711856747977e-06, "loss": 0.4946, "step": 8969 }, { "epoch": 0.64, "grad_norm": 1.2577052715276438, "learning_rate": 3.0066087701182513e-06, "loss": 0.1412, "step": 8970 }, { "epoch": 0.64, "grad_norm": 1.1530185894253566, "learning_rate": 3.005546461559028e-06, "loss": 0.1667, "step": 8971 }, { "epoch": 0.64, "grad_norm": 1.4711722568069157, "learning_rate": 3.0044842600541447e-06, "loss": 0.129, "step": 8972 }, { "epoch": 0.64, "grad_norm": 1.5519954306432573, "learning_rate": 3.00342216566062e-06, "loss": 0.1496, "step": 8973 }, { "epoch": 0.64, "grad_norm": 1.3178250918891812, "learning_rate": 3.0023601784354605e-06, "loss": 0.181, "step": 8974 }, { "epoch": 0.64, "grad_norm": 1.3073732517174987, "learning_rate": 3.001298298435673e-06, "loss": 0.2126, "step": 8975 }, { "epoch": 0.64, "grad_norm": 1.4359627401075035, "learning_rate": 3.0002365257182496e-06, "loss": 0.1924, "step": 8976 }, { "epoch": 0.64, "grad_norm": 1.3799339486399935, "learning_rate": 2.999174860340186e-06, "loss": 0.1562, "step": 8977 }, { "epoch": 0.64, "grad_norm": 1.2088711192617874, "learning_rate": 2.9981133023584664e-06, "loss": 0.1718, "step": 8978 }, { "epoch": 0.64, "grad_norm": 1.3244857076038692, "learning_rate": 2.997051851830074e-06, "loss": 0.199, "step": 8979 }, { "epoch": 0.64, "grad_norm": 1.2427599981070234, "learning_rate": 2.9959905088119777e-06, "loss": 0.1566, "step": 8980 }, { "epoch": 0.64, "grad_norm": 1.4249666536763284, "learning_rate": 2.9949292733611512e-06, "loss": 0.1968, "step": 8981 }, { "epoch": 0.64, "grad_norm": 1.2100262496124679, "learning_rate": 2.9938681455345542e-06, "loss": 0.1577, "step": 8982 }, { "epoch": 0.64, "grad_norm": 1.3810841964683973, "learning_rate": 2.9928071253891476e-06, "loss": 0.1722, "step": 8983 }, { "epoch": 0.64, "grad_norm": 1.3688845375925671, "learning_rate": 2.9917462129818785e-06, "loss": 0.1841, "step": 8984 }, { "epoch": 0.64, "grad_norm": 1.2647315028134762, "learning_rate": 2.9906854083696947e-06, "loss": 0.1625, "step": 8985 }, { "epoch": 0.64, "grad_norm": 1.2471020906403547, "learning_rate": 2.989624711609535e-06, "loss": 0.1847, "step": 8986 }, { "epoch": 0.64, "grad_norm": 1.2833137779985297, "learning_rate": 2.988564122758336e-06, "loss": 0.1822, "step": 8987 }, { "epoch": 0.64, "grad_norm": 1.2538857701666606, "learning_rate": 2.9875036418730218e-06, "loss": 0.2055, "step": 8988 }, { "epoch": 0.64, "grad_norm": 1.4175681266325126, "learning_rate": 2.986443269010519e-06, "loss": 0.2021, "step": 8989 }, { "epoch": 0.64, "grad_norm": 1.4247700466053956, "learning_rate": 2.985383004227739e-06, "loss": 0.1613, "step": 8990 }, { "epoch": 0.64, "grad_norm": 1.3569114641050322, "learning_rate": 2.9843228475815965e-06, "loss": 0.1875, "step": 8991 }, { "epoch": 0.64, "grad_norm": 1.4549816871284327, "learning_rate": 2.9832627991289973e-06, "loss": 0.185, "step": 8992 }, { "epoch": 0.64, "grad_norm": 1.2768614512045962, "learning_rate": 2.9822028589268383e-06, "loss": 0.1786, "step": 8993 }, { "epoch": 0.64, "grad_norm": 1.31323651838234, "learning_rate": 2.981143027032013e-06, "loss": 0.1996, "step": 8994 }, { "epoch": 0.64, "grad_norm": 1.2041169749711287, "learning_rate": 2.9800833035014098e-06, "loss": 0.2167, "step": 8995 }, { "epoch": 0.64, "grad_norm": 1.2477360425933492, "learning_rate": 2.979023688391911e-06, "loss": 0.1434, "step": 8996 }, { "epoch": 0.64, "grad_norm": 1.2769529395078603, "learning_rate": 2.977964181760391e-06, "loss": 0.2005, "step": 8997 }, { "epoch": 0.64, "grad_norm": 1.4065839791151251, "learning_rate": 2.976904783663722e-06, "loss": 0.1961, "step": 8998 }, { "epoch": 0.64, "grad_norm": 7.95403982604042, "learning_rate": 2.9758454941587632e-06, "loss": 0.5831, "step": 8999 }, { "epoch": 0.64, "grad_norm": 1.116017101658174, "learning_rate": 2.9747863133023803e-06, "loss": 0.1446, "step": 9000 }, { "epoch": 0.64, "grad_norm": 1.2536273900457988, "learning_rate": 2.9737272411514207e-06, "loss": 0.1553, "step": 9001 }, { "epoch": 0.64, "grad_norm": 1.322677442441434, "learning_rate": 2.972668277762735e-06, "loss": 0.1822, "step": 9002 }, { "epoch": 0.64, "grad_norm": 1.3925139595523468, "learning_rate": 2.971609423193159e-06, "loss": 0.1786, "step": 9003 }, { "epoch": 0.64, "grad_norm": 9.047854841153445, "learning_rate": 2.970550677499532e-06, "loss": 0.6008, "step": 9004 }, { "epoch": 0.64, "grad_norm": 1.5273823418147014, "learning_rate": 2.969492040738682e-06, "loss": 0.2036, "step": 9005 }, { "epoch": 0.64, "grad_norm": 1.3815385345536884, "learning_rate": 2.9684335129674334e-06, "loss": 0.1894, "step": 9006 }, { "epoch": 0.64, "grad_norm": 1.5948521083294915, "learning_rate": 2.9673750942426015e-06, "loss": 0.1929, "step": 9007 }, { "epoch": 0.64, "grad_norm": 1.1879092678524734, "learning_rate": 2.966316784621e-06, "loss": 0.1244, "step": 9008 }, { "epoch": 0.64, "grad_norm": 1.1778206912142821, "learning_rate": 2.9652585841594327e-06, "loss": 0.1611, "step": 9009 }, { "epoch": 0.64, "grad_norm": 1.334322094811316, "learning_rate": 2.9642004929147037e-06, "loss": 0.1662, "step": 9010 }, { "epoch": 0.64, "grad_norm": 1.3680612768090985, "learning_rate": 2.9631425109436022e-06, "loss": 0.1372, "step": 9011 }, { "epoch": 0.64, "grad_norm": 1.299897327149625, "learning_rate": 2.96208463830292e-06, "loss": 0.1636, "step": 9012 }, { "epoch": 0.64, "grad_norm": 1.1906746982821015, "learning_rate": 2.961026875049437e-06, "loss": 0.1291, "step": 9013 }, { "epoch": 0.64, "grad_norm": 1.502762352376142, "learning_rate": 2.959969221239933e-06, "loss": 0.191, "step": 9014 }, { "epoch": 0.64, "grad_norm": 1.2899640855085186, "learning_rate": 2.9589116769311744e-06, "loss": 0.1581, "step": 9015 }, { "epoch": 0.64, "grad_norm": 1.430895570738022, "learning_rate": 2.9578542421799296e-06, "loss": 0.2104, "step": 9016 }, { "epoch": 0.64, "grad_norm": 1.3954055199146087, "learning_rate": 2.9567969170429543e-06, "loss": 0.1579, "step": 9017 }, { "epoch": 0.65, "grad_norm": 1.4727090079464, "learning_rate": 2.9557397015770063e-06, "loss": 0.2301, "step": 9018 }, { "epoch": 0.65, "grad_norm": 1.2900463999664125, "learning_rate": 2.954682595838827e-06, "loss": 0.1566, "step": 9019 }, { "epoch": 0.65, "grad_norm": 1.3134563279302998, "learning_rate": 2.9536255998851615e-06, "loss": 0.149, "step": 9020 }, { "epoch": 0.65, "grad_norm": 1.2630352005867114, "learning_rate": 2.952568713772743e-06, "loss": 0.161, "step": 9021 }, { "epoch": 0.65, "grad_norm": 1.3921550032241177, "learning_rate": 2.9515119375583036e-06, "loss": 0.1977, "step": 9022 }, { "epoch": 0.65, "grad_norm": 1.3444820193727198, "learning_rate": 2.9504552712985635e-06, "loss": 0.2174, "step": 9023 }, { "epoch": 0.65, "grad_norm": 1.2562094881397003, "learning_rate": 2.949398715050242e-06, "loss": 0.165, "step": 9024 }, { "epoch": 0.65, "grad_norm": 1.2811385299677427, "learning_rate": 2.9483422688700513e-06, "loss": 0.1741, "step": 9025 }, { "epoch": 0.65, "grad_norm": 6.921001978261553, "learning_rate": 2.947285932814697e-06, "loss": 0.5729, "step": 9026 }, { "epoch": 0.65, "grad_norm": 1.3143679551853817, "learning_rate": 2.9462297069408798e-06, "loss": 0.1907, "step": 9027 }, { "epoch": 0.65, "grad_norm": 1.3492723006377716, "learning_rate": 2.9451735913052916e-06, "loss": 0.177, "step": 9028 }, { "epoch": 0.65, "grad_norm": 5.138793696274279, "learning_rate": 2.944117585964622e-06, "loss": 0.589, "step": 9029 }, { "epoch": 0.65, "grad_norm": 1.3779049601013813, "learning_rate": 2.9430616909755515e-06, "loss": 0.2178, "step": 9030 }, { "epoch": 0.65, "grad_norm": 1.1290230378198411, "learning_rate": 2.9420059063947605e-06, "loss": 0.1613, "step": 9031 }, { "epoch": 0.65, "grad_norm": 1.2745577473478662, "learning_rate": 2.940950232278914e-06, "loss": 0.1617, "step": 9032 }, { "epoch": 0.65, "grad_norm": 1.3087596392562098, "learning_rate": 2.9398946686846797e-06, "loss": 0.1752, "step": 9033 }, { "epoch": 0.65, "grad_norm": 1.506951213251745, "learning_rate": 2.9388392156687145e-06, "loss": 0.2133, "step": 9034 }, { "epoch": 0.65, "grad_norm": 1.350795108913614, "learning_rate": 2.937783873287674e-06, "loss": 0.2114, "step": 9035 }, { "epoch": 0.65, "grad_norm": 1.1806920529447555, "learning_rate": 2.9367286415982e-06, "loss": 0.1663, "step": 9036 }, { "epoch": 0.65, "grad_norm": 1.3584325124931473, "learning_rate": 2.9356735206569365e-06, "loss": 0.1392, "step": 9037 }, { "epoch": 0.65, "grad_norm": 1.1690467394505801, "learning_rate": 2.934618510520516e-06, "loss": 0.1405, "step": 9038 }, { "epoch": 0.65, "grad_norm": 1.3011541070499342, "learning_rate": 2.9335636112455714e-06, "loss": 0.1717, "step": 9039 }, { "epoch": 0.65, "grad_norm": 1.2464153596213308, "learning_rate": 2.93250882288872e-06, "loss": 0.1418, "step": 9040 }, { "epoch": 0.65, "grad_norm": 1.6590000467747565, "learning_rate": 2.9314541455065827e-06, "loss": 0.1878, "step": 9041 }, { "epoch": 0.65, "grad_norm": 1.2129390096594173, "learning_rate": 2.930399579155767e-06, "loss": 0.181, "step": 9042 }, { "epoch": 0.65, "grad_norm": 1.4688911300417744, "learning_rate": 2.9293451238928828e-06, "loss": 0.2074, "step": 9043 }, { "epoch": 0.65, "grad_norm": 1.2847687418643121, "learning_rate": 2.9282907797745236e-06, "loss": 0.1448, "step": 9044 }, { "epoch": 0.65, "grad_norm": 1.3190927229070775, "learning_rate": 2.927236546857286e-06, "loss": 0.1724, "step": 9045 }, { "epoch": 0.65, "grad_norm": 1.1006115878611313, "learning_rate": 2.926182425197755e-06, "loss": 0.129, "step": 9046 }, { "epoch": 0.65, "grad_norm": 0.9919982585784483, "learning_rate": 2.925128414852515e-06, "loss": 0.1058, "step": 9047 }, { "epoch": 0.65, "grad_norm": 1.4020618619342518, "learning_rate": 2.9240745158781357e-06, "loss": 0.2155, "step": 9048 }, { "epoch": 0.65, "grad_norm": 1.383480464557657, "learning_rate": 2.9230207283311908e-06, "loss": 0.2162, "step": 9049 }, { "epoch": 0.65, "grad_norm": 1.4240563442996321, "learning_rate": 2.921967052268241e-06, "loss": 0.1918, "step": 9050 }, { "epoch": 0.65, "grad_norm": 1.4480067377313433, "learning_rate": 2.9209134877458457e-06, "loss": 0.179, "step": 9051 }, { "epoch": 0.65, "grad_norm": 5.728263846645501, "learning_rate": 2.919860034820553e-06, "loss": 0.6956, "step": 9052 }, { "epoch": 0.65, "grad_norm": 6.312509461221239, "learning_rate": 2.91880669354891e-06, "loss": 0.5956, "step": 9053 }, { "epoch": 0.65, "grad_norm": 1.2563410389005534, "learning_rate": 2.917753463987455e-06, "loss": 0.1447, "step": 9054 }, { "epoch": 0.65, "grad_norm": 1.6161052401027491, "learning_rate": 2.916700346192722e-06, "loss": 0.1818, "step": 9055 }, { "epoch": 0.65, "grad_norm": 1.4791989778215424, "learning_rate": 2.9156473402212394e-06, "loss": 0.1974, "step": 9056 }, { "epoch": 0.65, "grad_norm": 1.4014265023359394, "learning_rate": 2.914594446129525e-06, "loss": 0.1728, "step": 9057 }, { "epoch": 0.65, "grad_norm": 1.2746216672457429, "learning_rate": 2.9135416639740987e-06, "loss": 0.1781, "step": 9058 }, { "epoch": 0.65, "grad_norm": 1.3753562975862914, "learning_rate": 2.912488993811464e-06, "loss": 0.2074, "step": 9059 }, { "epoch": 0.65, "grad_norm": 1.2971239740076461, "learning_rate": 2.9114364356981274e-06, "loss": 0.1685, "step": 9060 }, { "epoch": 0.65, "grad_norm": 1.537578315723535, "learning_rate": 2.910383989690585e-06, "loss": 0.1862, "step": 9061 }, { "epoch": 0.65, "grad_norm": 1.296001205770126, "learning_rate": 2.90933165584533e-06, "loss": 0.1657, "step": 9062 }, { "epoch": 0.65, "grad_norm": 1.187497057145445, "learning_rate": 2.908279434218845e-06, "loss": 0.1431, "step": 9063 }, { "epoch": 0.65, "grad_norm": 1.3406397043285756, "learning_rate": 2.9072273248676107e-06, "loss": 0.1961, "step": 9064 }, { "epoch": 0.65, "grad_norm": 1.2837856155197962, "learning_rate": 2.9061753278480987e-06, "loss": 0.1689, "step": 9065 }, { "epoch": 0.65, "grad_norm": 1.177606595634527, "learning_rate": 2.9051234432167786e-06, "loss": 0.1936, "step": 9066 }, { "epoch": 0.65, "grad_norm": 1.1449900086891058, "learning_rate": 2.904071671030107e-06, "loss": 0.1611, "step": 9067 }, { "epoch": 0.65, "grad_norm": 1.39725572436349, "learning_rate": 2.9030200113445415e-06, "loss": 0.1912, "step": 9068 }, { "epoch": 0.65, "grad_norm": 4.690426718452229, "learning_rate": 2.901968464216531e-06, "loss": 0.6479, "step": 9069 }, { "epoch": 0.65, "grad_norm": 1.2779487289619416, "learning_rate": 2.900917029702521e-06, "loss": 0.1517, "step": 9070 }, { "epoch": 0.65, "grad_norm": 1.3601654246466315, "learning_rate": 2.8998657078589424e-06, "loss": 0.2039, "step": 9071 }, { "epoch": 0.65, "grad_norm": 1.5263630373187766, "learning_rate": 2.8988144987422325e-06, "loss": 0.2035, "step": 9072 }, { "epoch": 0.65, "grad_norm": 1.2719445102531923, "learning_rate": 2.89776340240881e-06, "loss": 0.1824, "step": 9073 }, { "epoch": 0.65, "grad_norm": 1.909376100349962, "learning_rate": 2.8967124189150985e-06, "loss": 0.2139, "step": 9074 }, { "epoch": 0.65, "grad_norm": 5.749218754821759, "learning_rate": 2.895661548317506e-06, "loss": 0.5513, "step": 9075 }, { "epoch": 0.65, "grad_norm": 1.2558987075772945, "learning_rate": 2.894610790672442e-06, "loss": 0.1612, "step": 9076 }, { "epoch": 0.65, "grad_norm": 1.4662531947988435, "learning_rate": 2.893560146036306e-06, "loss": 0.2098, "step": 9077 }, { "epoch": 0.65, "grad_norm": 1.4402046517144116, "learning_rate": 2.8925096144654956e-06, "loss": 0.1849, "step": 9078 }, { "epoch": 0.65, "grad_norm": 1.2424667475979911, "learning_rate": 2.891459196016394e-06, "loss": 0.1827, "step": 9079 }, { "epoch": 0.65, "grad_norm": 1.1925889059076282, "learning_rate": 2.8904088907453887e-06, "loss": 0.1557, "step": 9080 }, { "epoch": 0.65, "grad_norm": 1.5071488789489207, "learning_rate": 2.8893586987088502e-06, "loss": 0.1963, "step": 9081 }, { "epoch": 0.65, "grad_norm": 1.4459193823704106, "learning_rate": 2.888308619963154e-06, "loss": 0.1829, "step": 9082 }, { "epoch": 0.65, "grad_norm": 1.4476566453499513, "learning_rate": 2.8872586545646597e-06, "loss": 0.1851, "step": 9083 }, { "epoch": 0.65, "grad_norm": 1.3245811867298163, "learning_rate": 2.886208802569728e-06, "loss": 0.1841, "step": 9084 }, { "epoch": 0.65, "grad_norm": 1.4738373197165826, "learning_rate": 2.8851590640347094e-06, "loss": 0.2039, "step": 9085 }, { "epoch": 0.65, "grad_norm": 1.2732079501974507, "learning_rate": 2.884109439015952e-06, "loss": 0.2151, "step": 9086 }, { "epoch": 0.65, "grad_norm": 1.2783253958145018, "learning_rate": 2.883059927569793e-06, "loss": 0.1564, "step": 9087 }, { "epoch": 0.65, "grad_norm": 1.2752686393749104, "learning_rate": 2.8820105297525664e-06, "loss": 0.1549, "step": 9088 }, { "epoch": 0.65, "grad_norm": 1.3687513795261879, "learning_rate": 2.880961245620602e-06, "loss": 0.1876, "step": 9089 }, { "epoch": 0.65, "grad_norm": 1.4022633759405605, "learning_rate": 2.879912075230218e-06, "loss": 0.1781, "step": 9090 }, { "epoch": 0.65, "grad_norm": 1.307045836353741, "learning_rate": 2.878863018637733e-06, "loss": 0.1985, "step": 9091 }, { "epoch": 0.65, "grad_norm": 1.3820230734764989, "learning_rate": 2.877814075899452e-06, "loss": 0.1791, "step": 9092 }, { "epoch": 0.65, "grad_norm": 1.2296235227739558, "learning_rate": 2.8767652470716805e-06, "loss": 0.1497, "step": 9093 }, { "epoch": 0.65, "grad_norm": 1.3645595110393514, "learning_rate": 2.8757165322107156e-06, "loss": 0.1732, "step": 9094 }, { "epoch": 0.65, "grad_norm": 1.232719967045591, "learning_rate": 2.87466793137285e-06, "loss": 0.1538, "step": 9095 }, { "epoch": 0.65, "grad_norm": 1.3449034208731279, "learning_rate": 2.873619444614364e-06, "loss": 0.1837, "step": 9096 }, { "epoch": 0.65, "grad_norm": 1.3438380185872054, "learning_rate": 2.8725710719915406e-06, "loss": 0.208, "step": 9097 }, { "epoch": 0.65, "grad_norm": 1.6930454655398925, "learning_rate": 2.871522813560648e-06, "loss": 0.1669, "step": 9098 }, { "epoch": 0.65, "grad_norm": 6.112427403316681, "learning_rate": 2.8704746693779573e-06, "loss": 0.5536, "step": 9099 }, { "epoch": 0.65, "grad_norm": 1.2130588102525823, "learning_rate": 2.8694266394997238e-06, "loss": 0.1514, "step": 9100 }, { "epoch": 0.65, "grad_norm": 1.4631426558518188, "learning_rate": 2.868378723982206e-06, "loss": 0.1789, "step": 9101 }, { "epoch": 0.65, "grad_norm": 1.614652285387004, "learning_rate": 2.867330922881648e-06, "loss": 0.2069, "step": 9102 }, { "epoch": 0.65, "grad_norm": 1.2544767784979112, "learning_rate": 2.8662832362542934e-06, "loss": 0.1725, "step": 9103 }, { "epoch": 0.65, "grad_norm": 1.583331225964863, "learning_rate": 2.8652356641563783e-06, "loss": 0.2405, "step": 9104 }, { "epoch": 0.65, "grad_norm": 1.2748638191999488, "learning_rate": 2.8641882066441336e-06, "loss": 0.1553, "step": 9105 }, { "epoch": 0.65, "grad_norm": 1.3181552108068055, "learning_rate": 2.8631408637737784e-06, "loss": 0.1817, "step": 9106 }, { "epoch": 0.65, "grad_norm": 1.2913770516319223, "learning_rate": 2.862093635601535e-06, "loss": 0.1582, "step": 9107 }, { "epoch": 0.65, "grad_norm": 6.310057142195116, "learning_rate": 2.8610465221836094e-06, "loss": 0.6924, "step": 9108 }, { "epoch": 0.65, "grad_norm": 1.3511973881496233, "learning_rate": 2.85999952357621e-06, "loss": 0.1821, "step": 9109 }, { "epoch": 0.65, "grad_norm": 1.3148709285229379, "learning_rate": 2.858952639835534e-06, "loss": 0.1987, "step": 9110 }, { "epoch": 0.65, "grad_norm": 1.24549234908076, "learning_rate": 2.8579058710177733e-06, "loss": 0.1253, "step": 9111 }, { "epoch": 0.65, "grad_norm": 4.733868103061433, "learning_rate": 2.8568592171791156e-06, "loss": 0.5707, "step": 9112 }, { "epoch": 0.65, "grad_norm": 1.1403666249966755, "learning_rate": 2.8558126783757424e-06, "loss": 0.1475, "step": 9113 }, { "epoch": 0.65, "grad_norm": 1.5130716516099876, "learning_rate": 2.8547662546638248e-06, "loss": 0.1922, "step": 9114 }, { "epoch": 0.65, "grad_norm": 1.3140950004612044, "learning_rate": 2.853719946099534e-06, "loss": 0.1616, "step": 9115 }, { "epoch": 0.65, "grad_norm": 1.2622954116082894, "learning_rate": 2.852673752739028e-06, "loss": 0.1759, "step": 9116 }, { "epoch": 0.65, "grad_norm": 1.218053777558217, "learning_rate": 2.8516276746384656e-06, "loss": 0.1535, "step": 9117 }, { "epoch": 0.65, "grad_norm": 1.222865556753346, "learning_rate": 2.850581711853993e-06, "loss": 0.1332, "step": 9118 }, { "epoch": 0.65, "grad_norm": 1.1454488605550988, "learning_rate": 2.849535864441755e-06, "loss": 0.154, "step": 9119 }, { "epoch": 0.65, "grad_norm": 1.4973720088212403, "learning_rate": 2.8484901324578883e-06, "loss": 0.221, "step": 9120 }, { "epoch": 0.65, "grad_norm": 10.722205533689605, "learning_rate": 2.8474445159585235e-06, "loss": 0.5296, "step": 9121 }, { "epoch": 0.65, "grad_norm": 1.3466521190940732, "learning_rate": 2.8463990149997877e-06, "loss": 0.1998, "step": 9122 }, { "epoch": 0.65, "grad_norm": 1.5686678226019848, "learning_rate": 2.8453536296377948e-06, "loss": 0.2467, "step": 9123 }, { "epoch": 0.65, "grad_norm": 1.4203469811446856, "learning_rate": 2.8443083599286615e-06, "loss": 0.1818, "step": 9124 }, { "epoch": 0.65, "grad_norm": 1.3290866520438898, "learning_rate": 2.8432632059284893e-06, "loss": 0.178, "step": 9125 }, { "epoch": 0.65, "grad_norm": 1.5623409548955918, "learning_rate": 2.842218167693381e-06, "loss": 0.2708, "step": 9126 }, { "epoch": 0.65, "grad_norm": 1.4246139366756787, "learning_rate": 2.8411732452794284e-06, "loss": 0.1496, "step": 9127 }, { "epoch": 0.65, "grad_norm": 1.2972264778860447, "learning_rate": 2.8401284387427185e-06, "loss": 0.1708, "step": 9128 }, { "epoch": 0.65, "grad_norm": 1.3209367647959487, "learning_rate": 2.8390837481393335e-06, "loss": 0.136, "step": 9129 }, { "epoch": 0.65, "grad_norm": 1.2561911045433416, "learning_rate": 2.83803917352535e-06, "loss": 0.178, "step": 9130 }, { "epoch": 0.65, "grad_norm": 1.174680667521737, "learning_rate": 2.836994714956833e-06, "loss": 0.1475, "step": 9131 }, { "epoch": 0.65, "grad_norm": 5.732567710659345, "learning_rate": 2.8359503724898485e-06, "loss": 0.5529, "step": 9132 }, { "epoch": 0.65, "grad_norm": 1.4348805872214954, "learning_rate": 2.8349061461804483e-06, "loss": 0.1601, "step": 9133 }, { "epoch": 0.65, "grad_norm": 1.4528042419704412, "learning_rate": 2.833862036084687e-06, "loss": 0.1781, "step": 9134 }, { "epoch": 0.65, "grad_norm": 1.2746644933427882, "learning_rate": 2.8328180422586036e-06, "loss": 0.1695, "step": 9135 }, { "epoch": 0.65, "grad_norm": 1.4331759495519956, "learning_rate": 2.8317741647582385e-06, "loss": 0.1553, "step": 9136 }, { "epoch": 0.65, "grad_norm": 3.9043904215217404, "learning_rate": 2.8307304036396222e-06, "loss": 0.3483, "step": 9137 }, { "epoch": 0.65, "grad_norm": 1.374350982287738, "learning_rate": 2.829686758958782e-06, "loss": 0.1613, "step": 9138 }, { "epoch": 0.65, "grad_norm": 8.390443321937747, "learning_rate": 2.8286432307717315e-06, "loss": 0.5265, "step": 9139 }, { "epoch": 0.65, "grad_norm": 1.2727971354479681, "learning_rate": 2.827599819134489e-06, "loss": 0.1713, "step": 9140 }, { "epoch": 0.65, "grad_norm": 1.285165735343625, "learning_rate": 2.8265565241030547e-06, "loss": 0.1679, "step": 9141 }, { "epoch": 0.65, "grad_norm": 1.440593107010905, "learning_rate": 2.8255133457334343e-06, "loss": 0.1953, "step": 9142 }, { "epoch": 0.65, "grad_norm": 1.446254804985315, "learning_rate": 2.8244702840816163e-06, "loss": 0.1885, "step": 9143 }, { "epoch": 0.65, "grad_norm": 1.1311931396842339, "learning_rate": 2.823427339203591e-06, "loss": 0.1357, "step": 9144 }, { "epoch": 0.65, "grad_norm": 1.531695091559079, "learning_rate": 2.8223845111553388e-06, "loss": 0.2032, "step": 9145 }, { "epoch": 0.65, "grad_norm": 1.340532428074617, "learning_rate": 2.821341799992837e-06, "loss": 0.1908, "step": 9146 }, { "epoch": 0.65, "grad_norm": 1.213585702827845, "learning_rate": 2.82029920577205e-06, "loss": 0.124, "step": 9147 }, { "epoch": 0.65, "grad_norm": 1.2553022926886956, "learning_rate": 2.8192567285489434e-06, "loss": 0.1536, "step": 9148 }, { "epoch": 0.65, "grad_norm": 1.4772475540394465, "learning_rate": 2.818214368379471e-06, "loss": 0.2383, "step": 9149 }, { "epoch": 0.65, "grad_norm": 1.4770072465137571, "learning_rate": 2.817172125319585e-06, "loss": 0.1643, "step": 9150 }, { "epoch": 0.65, "grad_norm": 1.300381001786014, "learning_rate": 2.8161299994252255e-06, "loss": 0.157, "step": 9151 }, { "epoch": 0.65, "grad_norm": 1.2452223147923527, "learning_rate": 2.815087990752332e-06, "loss": 0.1561, "step": 9152 }, { "epoch": 0.65, "grad_norm": 1.1776467473936596, "learning_rate": 2.8140460993568348e-06, "loss": 0.139, "step": 9153 }, { "epoch": 0.65, "grad_norm": 7.578830121928663, "learning_rate": 2.813004325294658e-06, "loss": 0.6898, "step": 9154 }, { "epoch": 0.65, "grad_norm": 1.308938250782106, "learning_rate": 2.8119626686217235e-06, "loss": 0.1578, "step": 9155 }, { "epoch": 0.65, "grad_norm": 1.2630429962404797, "learning_rate": 2.8109211293939376e-06, "loss": 0.2092, "step": 9156 }, { "epoch": 0.66, "grad_norm": 1.346525557804274, "learning_rate": 2.8098797076672114e-06, "loss": 0.168, "step": 9157 }, { "epoch": 0.66, "grad_norm": 1.435339094381345, "learning_rate": 2.8088384034974385e-06, "loss": 0.1855, "step": 9158 }, { "epoch": 0.66, "grad_norm": 1.4088798707848207, "learning_rate": 2.8077972169405175e-06, "loss": 0.2067, "step": 9159 }, { "epoch": 0.66, "grad_norm": 1.177345398638196, "learning_rate": 2.8067561480523315e-06, "loss": 0.167, "step": 9160 }, { "epoch": 0.66, "grad_norm": 7.696041322348422, "learning_rate": 2.8057151968887618e-06, "loss": 0.6083, "step": 9161 }, { "epoch": 0.66, "grad_norm": 1.265258120934531, "learning_rate": 2.8046743635056828e-06, "loss": 0.1888, "step": 9162 }, { "epoch": 0.66, "grad_norm": 1.3942502139023836, "learning_rate": 2.8036336479589644e-06, "loss": 0.1653, "step": 9163 }, { "epoch": 0.66, "grad_norm": 1.384709645024829, "learning_rate": 2.802593050304464e-06, "loss": 0.1835, "step": 9164 }, { "epoch": 0.66, "grad_norm": 1.212484651119731, "learning_rate": 2.8015525705980405e-06, "loss": 0.1737, "step": 9165 }, { "epoch": 0.66, "grad_norm": 1.286298711861579, "learning_rate": 2.8005122088955395e-06, "loss": 0.183, "step": 9166 }, { "epoch": 0.66, "grad_norm": 1.4490900745105453, "learning_rate": 2.799471965252807e-06, "loss": 0.2291, "step": 9167 }, { "epoch": 0.66, "grad_norm": 1.3634770143179573, "learning_rate": 2.798431839725675e-06, "loss": 0.1945, "step": 9168 }, { "epoch": 0.66, "grad_norm": 1.2895287728446805, "learning_rate": 2.7973918323699756e-06, "loss": 0.1488, "step": 9169 }, { "epoch": 0.66, "grad_norm": 1.4185321326779774, "learning_rate": 2.796351943241532e-06, "loss": 0.2199, "step": 9170 }, { "epoch": 0.66, "grad_norm": 1.3432723184361033, "learning_rate": 2.795312172396164e-06, "loss": 0.1527, "step": 9171 }, { "epoch": 0.66, "grad_norm": 1.412181900464206, "learning_rate": 2.794272519889677e-06, "loss": 0.1634, "step": 9172 }, { "epoch": 0.66, "grad_norm": 1.3334300851699827, "learning_rate": 2.7932329857778807e-06, "loss": 0.1874, "step": 9173 }, { "epoch": 0.66, "grad_norm": 1.4641846326819397, "learning_rate": 2.7921935701165686e-06, "loss": 0.1524, "step": 9174 }, { "epoch": 0.66, "grad_norm": 1.360484250754988, "learning_rate": 2.791154272961537e-06, "loss": 0.1512, "step": 9175 }, { "epoch": 0.66, "grad_norm": 1.5298355751829058, "learning_rate": 2.790115094368566e-06, "loss": 0.1794, "step": 9176 }, { "epoch": 0.66, "grad_norm": 1.6649370543017161, "learning_rate": 2.7890760343934377e-06, "loss": 0.1544, "step": 9177 }, { "epoch": 0.66, "grad_norm": 1.2767568785959826, "learning_rate": 2.7880370930919243e-06, "loss": 0.1533, "step": 9178 }, { "epoch": 0.66, "grad_norm": 1.3520757758417188, "learning_rate": 2.786998270519794e-06, "loss": 0.2153, "step": 9179 }, { "epoch": 0.66, "grad_norm": 1.4642949250540587, "learning_rate": 2.7859595667328027e-06, "loss": 0.1927, "step": 9180 }, { "epoch": 0.66, "grad_norm": 1.5527659305911454, "learning_rate": 2.7849209817867085e-06, "loss": 0.1869, "step": 9181 }, { "epoch": 0.66, "grad_norm": 1.2480577221849667, "learning_rate": 2.7838825157372527e-06, "loss": 0.1688, "step": 9182 }, { "epoch": 0.66, "grad_norm": 1.3030919870143174, "learning_rate": 2.7828441686401807e-06, "loss": 0.1707, "step": 9183 }, { "epoch": 0.66, "grad_norm": 1.4262679733622055, "learning_rate": 2.7818059405512267e-06, "loss": 0.1884, "step": 9184 }, { "epoch": 0.66, "grad_norm": 1.2281805717888794, "learning_rate": 2.7807678315261155e-06, "loss": 0.1805, "step": 9185 }, { "epoch": 0.66, "grad_norm": 1.3617106125711933, "learning_rate": 2.77972984162057e-06, "loss": 0.1759, "step": 9186 }, { "epoch": 0.66, "grad_norm": 1.3542284249040688, "learning_rate": 2.778691970890307e-06, "loss": 0.1989, "step": 9187 }, { "epoch": 0.66, "grad_norm": 1.2899810532209837, "learning_rate": 2.777654219391036e-06, "loss": 0.1654, "step": 9188 }, { "epoch": 0.66, "grad_norm": 1.426899245331698, "learning_rate": 2.776616587178455e-06, "loss": 0.1804, "step": 9189 }, { "epoch": 0.66, "grad_norm": 1.3285160228805808, "learning_rate": 2.775579074308265e-06, "loss": 0.2092, "step": 9190 }, { "epoch": 0.66, "grad_norm": 1.4613867719211835, "learning_rate": 2.774541680836151e-06, "loss": 0.1712, "step": 9191 }, { "epoch": 0.66, "grad_norm": 1.1779294228018107, "learning_rate": 2.7735044068178e-06, "loss": 0.138, "step": 9192 }, { "epoch": 0.66, "grad_norm": 1.1790873204804098, "learning_rate": 2.7724672523088857e-06, "loss": 0.1298, "step": 9193 }, { "epoch": 0.66, "grad_norm": 1.3240577487046459, "learning_rate": 2.771430217365081e-06, "loss": 0.1978, "step": 9194 }, { "epoch": 0.66, "grad_norm": 1.3361666719078507, "learning_rate": 2.7703933020420453e-06, "loss": 0.1552, "step": 9195 }, { "epoch": 0.66, "grad_norm": 1.449438649050825, "learning_rate": 2.769356506395443e-06, "loss": 0.1892, "step": 9196 }, { "epoch": 0.66, "grad_norm": 1.396391739398802, "learning_rate": 2.76831983048092e-06, "loss": 0.1726, "step": 9197 }, { "epoch": 0.66, "grad_norm": 1.3060722383166927, "learning_rate": 2.767283274354124e-06, "loss": 0.1446, "step": 9198 }, { "epoch": 0.66, "grad_norm": 8.89992247238176, "learning_rate": 2.7662468380706897e-06, "loss": 0.5954, "step": 9199 }, { "epoch": 0.66, "grad_norm": 1.1793376818854633, "learning_rate": 2.7652105216862536e-06, "loss": 0.1465, "step": 9200 }, { "epoch": 0.66, "grad_norm": 4.834673135848603, "learning_rate": 2.7641743252564355e-06, "loss": 0.5286, "step": 9201 }, { "epoch": 0.66, "grad_norm": 1.3412421253854436, "learning_rate": 2.7631382488368598e-06, "loss": 0.1694, "step": 9202 }, { "epoch": 0.66, "grad_norm": 1.487649754527599, "learning_rate": 2.7621022924831337e-06, "loss": 0.2221, "step": 9203 }, { "epoch": 0.66, "grad_norm": 1.1986840182196448, "learning_rate": 2.761066456250866e-06, "loss": 0.1717, "step": 9204 }, { "epoch": 0.66, "grad_norm": 1.5059505157009987, "learning_rate": 2.7600307401956565e-06, "loss": 0.2336, "step": 9205 }, { "epoch": 0.66, "grad_norm": 1.346391640069979, "learning_rate": 2.7589951443731e-06, "loss": 0.1972, "step": 9206 }, { "epoch": 0.66, "grad_norm": 1.5097187839877142, "learning_rate": 2.757959668838779e-06, "loss": 0.2404, "step": 9207 }, { "epoch": 0.66, "grad_norm": 1.3159413497478796, "learning_rate": 2.756924313648277e-06, "loss": 0.1851, "step": 9208 }, { "epoch": 0.66, "grad_norm": 1.4321556356958238, "learning_rate": 2.7558890788571657e-06, "loss": 0.1942, "step": 9209 }, { "epoch": 0.66, "grad_norm": 4.461317887693462, "learning_rate": 2.754853964521015e-06, "loss": 0.4871, "step": 9210 }, { "epoch": 0.66, "grad_norm": 1.3715587534979456, "learning_rate": 2.7538189706953812e-06, "loss": 0.2329, "step": 9211 }, { "epoch": 0.66, "grad_norm": 1.4394626112279234, "learning_rate": 2.752784097435822e-06, "loss": 0.1903, "step": 9212 }, { "epoch": 0.66, "grad_norm": 5.616960026516626, "learning_rate": 2.751749344797885e-06, "loss": 0.4282, "step": 9213 }, { "epoch": 0.66, "grad_norm": 1.2756494240633804, "learning_rate": 2.750714712837113e-06, "loss": 0.1727, "step": 9214 }, { "epoch": 0.66, "grad_norm": 1.3693924493472256, "learning_rate": 2.749680201609037e-06, "loss": 0.1667, "step": 9215 }, { "epoch": 0.66, "grad_norm": 1.3591515202456343, "learning_rate": 2.748645811169189e-06, "loss": 0.169, "step": 9216 }, { "epoch": 0.66, "grad_norm": 1.2740030145018901, "learning_rate": 2.7476115415730906e-06, "loss": 0.207, "step": 9217 }, { "epoch": 0.66, "grad_norm": 1.5061819690401377, "learning_rate": 2.7465773928762546e-06, "loss": 0.146, "step": 9218 }, { "epoch": 0.66, "grad_norm": 1.3130471229954106, "learning_rate": 2.745543365134194e-06, "loss": 0.1776, "step": 9219 }, { "epoch": 0.66, "grad_norm": 1.3722361122194406, "learning_rate": 2.7445094584024067e-06, "loss": 0.1792, "step": 9220 }, { "epoch": 0.66, "grad_norm": 1.3214313491408873, "learning_rate": 2.743475672736391e-06, "loss": 0.1562, "step": 9221 }, { "epoch": 0.66, "grad_norm": 1.216806364554223, "learning_rate": 2.742442008191637e-06, "loss": 0.1566, "step": 9222 }, { "epoch": 0.66, "grad_norm": 1.5274689212737964, "learning_rate": 2.741408464823629e-06, "loss": 0.1697, "step": 9223 }, { "epoch": 0.66, "grad_norm": 1.3116436787574242, "learning_rate": 2.7403750426878394e-06, "loss": 0.1733, "step": 9224 }, { "epoch": 0.66, "grad_norm": 1.400038341916177, "learning_rate": 2.739341741839743e-06, "loss": 0.2095, "step": 9225 }, { "epoch": 0.66, "grad_norm": 1.3957557592259002, "learning_rate": 2.7383085623347983e-06, "loss": 0.178, "step": 9226 }, { "epoch": 0.66, "grad_norm": 1.4805199707811998, "learning_rate": 2.737275504228467e-06, "loss": 0.1885, "step": 9227 }, { "epoch": 0.66, "grad_norm": 1.409596295065184, "learning_rate": 2.7362425675761955e-06, "loss": 0.1808, "step": 9228 }, { "epoch": 0.66, "grad_norm": 1.208954979052102, "learning_rate": 2.735209752433429e-06, "loss": 0.122, "step": 9229 }, { "epoch": 0.66, "grad_norm": 1.4108146234348289, "learning_rate": 2.734177058855606e-06, "loss": 0.1874, "step": 9230 }, { "epoch": 0.66, "grad_norm": 1.2695486863214673, "learning_rate": 2.733144486898159e-06, "loss": 0.1549, "step": 9231 }, { "epoch": 0.66, "grad_norm": 1.2980650207421978, "learning_rate": 2.7321120366165075e-06, "loss": 0.1871, "step": 9232 }, { "epoch": 0.66, "grad_norm": 1.39149004498744, "learning_rate": 2.7310797080660746e-06, "loss": 0.1848, "step": 9233 }, { "epoch": 0.66, "grad_norm": 1.3966372159945377, "learning_rate": 2.7300475013022666e-06, "loss": 0.1968, "step": 9234 }, { "epoch": 0.66, "grad_norm": 1.2854290403330828, "learning_rate": 2.7290154163804928e-06, "loss": 0.189, "step": 9235 }, { "epoch": 0.66, "grad_norm": 5.27864775455434, "learning_rate": 2.7279834533561477e-06, "loss": 0.6036, "step": 9236 }, { "epoch": 0.66, "grad_norm": 1.281735888003445, "learning_rate": 2.726951612284624e-06, "loss": 0.1967, "step": 9237 }, { "epoch": 0.66, "grad_norm": 1.2513717817598022, "learning_rate": 2.7259198932213078e-06, "loss": 0.1764, "step": 9238 }, { "epoch": 0.66, "grad_norm": 5.373489243001319, "learning_rate": 2.7248882962215794e-06, "loss": 0.4761, "step": 9239 }, { "epoch": 0.66, "grad_norm": 1.318781985113303, "learning_rate": 2.723856821340806e-06, "loss": 0.1839, "step": 9240 }, { "epoch": 0.66, "grad_norm": 1.2637637233589352, "learning_rate": 2.722825468634358e-06, "loss": 0.1879, "step": 9241 }, { "epoch": 0.66, "grad_norm": 5.517800726371777, "learning_rate": 2.7217942381575913e-06, "loss": 0.7799, "step": 9242 }, { "epoch": 0.66, "grad_norm": 1.526346952652297, "learning_rate": 2.72076312996586e-06, "loss": 0.1882, "step": 9243 }, { "epoch": 0.66, "grad_norm": 1.3204087250759695, "learning_rate": 2.719732144114507e-06, "loss": 0.2194, "step": 9244 }, { "epoch": 0.66, "grad_norm": 1.163283456153252, "learning_rate": 2.718701280658873e-06, "loss": 0.1904, "step": 9245 }, { "epoch": 0.66, "grad_norm": 1.4052492410158965, "learning_rate": 2.7176705396542925e-06, "loss": 0.2031, "step": 9246 }, { "epoch": 0.66, "grad_norm": 1.104724609128661, "learning_rate": 2.716639921156089e-06, "loss": 0.1386, "step": 9247 }, { "epoch": 0.66, "grad_norm": 5.034995552793996, "learning_rate": 2.7156094252195858e-06, "loss": 0.572, "step": 9248 }, { "epoch": 0.66, "grad_norm": 1.490733837958997, "learning_rate": 2.7145790519000912e-06, "loss": 0.1751, "step": 9249 }, { "epoch": 0.66, "grad_norm": 1.329690949967403, "learning_rate": 2.7135488012529154e-06, "loss": 0.1545, "step": 9250 }, { "epoch": 0.66, "grad_norm": 1.1595356314013299, "learning_rate": 2.712518673333354e-06, "loss": 0.154, "step": 9251 }, { "epoch": 0.66, "grad_norm": 1.3548915620337294, "learning_rate": 2.711488668196706e-06, "loss": 0.1801, "step": 9252 }, { "epoch": 0.66, "grad_norm": 1.3175768296350248, "learning_rate": 2.7104587858982514e-06, "loss": 0.1944, "step": 9253 }, { "epoch": 0.66, "grad_norm": 1.2982247766862691, "learning_rate": 2.7094290264932733e-06, "loss": 0.172, "step": 9254 }, { "epoch": 0.66, "grad_norm": 1.2446071473635107, "learning_rate": 2.7083993900370453e-06, "loss": 0.143, "step": 9255 }, { "epoch": 0.66, "grad_norm": 1.1977745423597743, "learning_rate": 2.7073698765848357e-06, "loss": 0.1374, "step": 9256 }, { "epoch": 0.66, "grad_norm": 1.3248679409289712, "learning_rate": 2.706340486191901e-06, "loss": 0.2029, "step": 9257 }, { "epoch": 0.66, "grad_norm": 1.3913776645603948, "learning_rate": 2.7053112189134988e-06, "loss": 0.187, "step": 9258 }, { "epoch": 0.66, "grad_norm": 1.4605588310662811, "learning_rate": 2.7042820748048724e-06, "loss": 0.1859, "step": 9259 }, { "epoch": 0.66, "grad_norm": 1.4480777026998808, "learning_rate": 2.703253053921266e-06, "loss": 0.1926, "step": 9260 }, { "epoch": 0.66, "grad_norm": 1.224398025382799, "learning_rate": 2.7022241563179085e-06, "loss": 0.1946, "step": 9261 }, { "epoch": 0.66, "grad_norm": 1.4120986783646317, "learning_rate": 2.7011953820500305e-06, "loss": 0.2026, "step": 9262 }, { "epoch": 0.66, "grad_norm": 1.3533241482151315, "learning_rate": 2.7001667311728507e-06, "loss": 0.1797, "step": 9263 }, { "epoch": 0.66, "grad_norm": 1.405525446702556, "learning_rate": 2.699138203741587e-06, "loss": 0.1892, "step": 9264 }, { "epoch": 0.66, "grad_norm": 1.4180960069873771, "learning_rate": 2.6981097998114416e-06, "loss": 0.1887, "step": 9265 }, { "epoch": 0.66, "grad_norm": 1.3051652773211828, "learning_rate": 2.6970815194376187e-06, "loss": 0.1638, "step": 9266 }, { "epoch": 0.66, "grad_norm": 1.3653319811806104, "learning_rate": 2.696053362675309e-06, "loss": 0.1749, "step": 9267 }, { "epoch": 0.66, "grad_norm": 1.5026885968292292, "learning_rate": 2.6950253295797033e-06, "loss": 0.1877, "step": 9268 }, { "epoch": 0.66, "grad_norm": 1.512445884946505, "learning_rate": 2.6939974202059793e-06, "loss": 0.2127, "step": 9269 }, { "epoch": 0.66, "grad_norm": 1.3343622463117142, "learning_rate": 2.6929696346093128e-06, "loss": 0.1737, "step": 9270 }, { "epoch": 0.66, "grad_norm": 1.4869490897346702, "learning_rate": 2.6919419728448703e-06, "loss": 0.2147, "step": 9271 }, { "epoch": 0.66, "grad_norm": 1.6034803164105245, "learning_rate": 2.690914434967815e-06, "loss": 0.1741, "step": 9272 }, { "epoch": 0.66, "grad_norm": 1.205290952666303, "learning_rate": 2.6898870210332982e-06, "loss": 0.1607, "step": 9273 }, { "epoch": 0.66, "grad_norm": 1.2927306173376851, "learning_rate": 2.68885973109647e-06, "loss": 0.1783, "step": 9274 }, { "epoch": 0.66, "grad_norm": 1.1995964389815783, "learning_rate": 2.6878325652124683e-06, "loss": 0.1788, "step": 9275 }, { "epoch": 0.66, "grad_norm": 1.1697118062243048, "learning_rate": 2.6868055234364304e-06, "loss": 0.1586, "step": 9276 }, { "epoch": 0.66, "grad_norm": 2.817803226555485, "learning_rate": 2.6857786058234803e-06, "loss": 0.1664, "step": 9277 }, { "epoch": 0.66, "grad_norm": 1.4316280401644745, "learning_rate": 2.6847518124287403e-06, "loss": 0.1878, "step": 9278 }, { "epoch": 0.66, "grad_norm": 1.5599982382612807, "learning_rate": 2.6837251433073252e-06, "loss": 0.1649, "step": 9279 }, { "epoch": 0.66, "grad_norm": 3.967772934417691, "learning_rate": 2.682698598514343e-06, "loss": 0.4092, "step": 9280 }, { "epoch": 0.66, "grad_norm": 1.3190403475864327, "learning_rate": 2.681672178104895e-06, "loss": 0.1636, "step": 9281 }, { "epoch": 0.66, "grad_norm": 1.3481183260083802, "learning_rate": 2.6806458821340726e-06, "loss": 0.172, "step": 9282 }, { "epoch": 0.66, "grad_norm": 1.4273857136394643, "learning_rate": 2.6796197106569667e-06, "loss": 0.1904, "step": 9283 }, { "epoch": 0.66, "grad_norm": 1.355000729326456, "learning_rate": 2.6785936637286546e-06, "loss": 0.1675, "step": 9284 }, { "epoch": 0.66, "grad_norm": 8.566463122407972, "learning_rate": 2.677567741404214e-06, "loss": 0.5335, "step": 9285 }, { "epoch": 0.66, "grad_norm": 1.6682991609050168, "learning_rate": 2.6765419437387088e-06, "loss": 0.2171, "step": 9286 }, { "epoch": 0.66, "grad_norm": 1.332531661889876, "learning_rate": 2.675516270787201e-06, "loss": 0.168, "step": 9287 }, { "epoch": 0.66, "grad_norm": 1.299136020006505, "learning_rate": 2.6744907226047455e-06, "loss": 0.1454, "step": 9288 }, { "epoch": 0.66, "grad_norm": 1.225761706161005, "learning_rate": 2.673465299246391e-06, "loss": 0.1576, "step": 9289 }, { "epoch": 0.66, "grad_norm": 1.0842168558994822, "learning_rate": 2.6724400007671745e-06, "loss": 0.1326, "step": 9290 }, { "epoch": 0.66, "grad_norm": 1.2244833517813778, "learning_rate": 2.671414827222134e-06, "loss": 0.1614, "step": 9291 }, { "epoch": 0.66, "grad_norm": 1.4513025346771953, "learning_rate": 2.6703897786662925e-06, "loss": 0.1556, "step": 9292 }, { "epoch": 0.66, "grad_norm": 1.3402095259989195, "learning_rate": 2.669364855154675e-06, "loss": 0.1906, "step": 9293 }, { "epoch": 0.66, "grad_norm": 1.310623714205806, "learning_rate": 2.66834005674229e-06, "loss": 0.1679, "step": 9294 }, { "epoch": 0.66, "grad_norm": 1.337539931906442, "learning_rate": 2.6673153834841503e-06, "loss": 0.1953, "step": 9295 }, { "epoch": 0.66, "grad_norm": 1.3903304616172176, "learning_rate": 2.666290835435249e-06, "loss": 0.1688, "step": 9296 }, { "epoch": 0.67, "grad_norm": 1.2773407558574614, "learning_rate": 2.665266412650589e-06, "loss": 0.1838, "step": 9297 }, { "epoch": 0.67, "grad_norm": 1.3843137570338333, "learning_rate": 2.6642421151851506e-06, "loss": 0.1789, "step": 9298 }, { "epoch": 0.67, "grad_norm": 1.254203113204737, "learning_rate": 2.6632179430939175e-06, "loss": 0.1675, "step": 9299 }, { "epoch": 0.67, "grad_norm": 1.341905399106123, "learning_rate": 2.6621938964318593e-06, "loss": 0.1515, "step": 9300 }, { "epoch": 0.67, "grad_norm": 1.2534036113592772, "learning_rate": 2.6611699752539477e-06, "loss": 0.1755, "step": 9301 }, { "epoch": 0.67, "grad_norm": 1.2606640330360908, "learning_rate": 2.6601461796151383e-06, "loss": 0.1517, "step": 9302 }, { "epoch": 0.67, "grad_norm": 1.3065428778443706, "learning_rate": 2.6591225095703876e-06, "loss": 0.1971, "step": 9303 }, { "epoch": 0.67, "grad_norm": 1.4171440044672396, "learning_rate": 2.6580989651746395e-06, "loss": 0.1725, "step": 9304 }, { "epoch": 0.67, "grad_norm": 1.2898160533609875, "learning_rate": 2.6570755464828346e-06, "loss": 0.1811, "step": 9305 }, { "epoch": 0.67, "grad_norm": 1.1624951769933016, "learning_rate": 2.6560522535499067e-06, "loss": 0.1482, "step": 9306 }, { "epoch": 0.67, "grad_norm": 1.467477688349392, "learning_rate": 2.655029086430784e-06, "loss": 0.203, "step": 9307 }, { "epoch": 0.67, "grad_norm": 1.4449126070963914, "learning_rate": 2.6540060451803817e-06, "loss": 0.2012, "step": 9308 }, { "epoch": 0.67, "grad_norm": 1.314717863675286, "learning_rate": 2.6529831298536165e-06, "loss": 0.1709, "step": 9309 }, { "epoch": 0.67, "grad_norm": 1.378840632807368, "learning_rate": 2.6519603405053906e-06, "loss": 0.2039, "step": 9310 }, { "epoch": 0.67, "grad_norm": 1.2368441788263926, "learning_rate": 2.650937677190606e-06, "loss": 0.1497, "step": 9311 }, { "epoch": 0.67, "grad_norm": 1.2516041645441565, "learning_rate": 2.6499151399641554e-06, "loss": 0.1595, "step": 9312 }, { "epoch": 0.67, "grad_norm": 1.1908802864332015, "learning_rate": 2.6488927288809228e-06, "loss": 0.1716, "step": 9313 }, { "epoch": 0.67, "grad_norm": 1.190018255022369, "learning_rate": 2.647870443995788e-06, "loss": 0.1604, "step": 9314 }, { "epoch": 0.67, "grad_norm": 3.7783849537471483, "learning_rate": 2.6468482853636223e-06, "loss": 0.4645, "step": 9315 }, { "epoch": 0.67, "grad_norm": 1.388480098679166, "learning_rate": 2.645826253039295e-06, "loss": 0.1917, "step": 9316 }, { "epoch": 0.67, "grad_norm": 1.290163946696938, "learning_rate": 2.6448043470776596e-06, "loss": 0.1578, "step": 9317 }, { "epoch": 0.67, "grad_norm": 1.3711633249526303, "learning_rate": 2.6437825675335714e-06, "loss": 0.1716, "step": 9318 }, { "epoch": 0.67, "grad_norm": 5.130156475775542, "learning_rate": 2.6427609144618737e-06, "loss": 0.6032, "step": 9319 }, { "epoch": 0.67, "grad_norm": 1.352036236926486, "learning_rate": 2.6417393879174056e-06, "loss": 0.1949, "step": 9320 }, { "epoch": 0.67, "grad_norm": 1.2439730595921765, "learning_rate": 2.6407179879549976e-06, "loss": 0.2039, "step": 9321 }, { "epoch": 0.67, "grad_norm": 3.847093245452757, "learning_rate": 2.639696714629475e-06, "loss": 0.3558, "step": 9322 }, { "epoch": 0.67, "grad_norm": 1.5186189482682566, "learning_rate": 2.6386755679956557e-06, "loss": 0.1724, "step": 9323 }, { "epoch": 0.67, "grad_norm": 1.4583920556492413, "learning_rate": 2.637654548108352e-06, "loss": 0.1894, "step": 9324 }, { "epoch": 0.67, "grad_norm": 1.4507341880288265, "learning_rate": 2.636633655022366e-06, "loss": 0.2165, "step": 9325 }, { "epoch": 0.67, "grad_norm": 5.818422412431467, "learning_rate": 2.6356128887924975e-06, "loss": 0.5487, "step": 9326 }, { "epoch": 0.67, "grad_norm": 6.152994506410259, "learning_rate": 2.6345922494735345e-06, "loss": 0.5725, "step": 9327 }, { "epoch": 0.67, "grad_norm": 1.3785943050660376, "learning_rate": 2.6335717371202642e-06, "loss": 0.1931, "step": 9328 }, { "epoch": 0.67, "grad_norm": 1.2858839415292656, "learning_rate": 2.632551351787459e-06, "loss": 0.1242, "step": 9329 }, { "epoch": 0.67, "grad_norm": 1.3391311034556699, "learning_rate": 2.6315310935298925e-06, "loss": 0.1697, "step": 9330 }, { "epoch": 0.67, "grad_norm": 1.5412038893174917, "learning_rate": 2.6305109624023273e-06, "loss": 0.2047, "step": 9331 }, { "epoch": 0.67, "grad_norm": 1.1845457482871438, "learning_rate": 2.629490958459523e-06, "loss": 0.1446, "step": 9332 }, { "epoch": 0.67, "grad_norm": 1.236969026655756, "learning_rate": 2.6284710817562233e-06, "loss": 0.1569, "step": 9333 }, { "epoch": 0.67, "grad_norm": 1.5001832513724567, "learning_rate": 2.627451332347177e-06, "loss": 0.2038, "step": 9334 }, { "epoch": 0.67, "grad_norm": 1.2993687079211058, "learning_rate": 2.626431710287115e-06, "loss": 0.1858, "step": 9335 }, { "epoch": 0.67, "grad_norm": 1.3976779735873606, "learning_rate": 2.6254122156307717e-06, "loss": 0.2116, "step": 9336 }, { "epoch": 0.67, "grad_norm": 4.825096375892765, "learning_rate": 2.6243928484328653e-06, "loss": 0.4562, "step": 9337 }, { "epoch": 0.67, "grad_norm": 1.2199147986963674, "learning_rate": 2.623373608748112e-06, "loss": 0.1721, "step": 9338 }, { "epoch": 0.67, "grad_norm": 1.3550745283656216, "learning_rate": 2.622354496631222e-06, "loss": 0.175, "step": 9339 }, { "epoch": 0.67, "grad_norm": 1.2927934456195045, "learning_rate": 2.621335512136899e-06, "loss": 0.1991, "step": 9340 }, { "epoch": 0.67, "grad_norm": 1.3467794721091668, "learning_rate": 2.6203166553198334e-06, "loss": 0.1871, "step": 9341 }, { "epoch": 0.67, "grad_norm": 1.3932350941373284, "learning_rate": 2.6192979262347174e-06, "loss": 0.1741, "step": 9342 }, { "epoch": 0.67, "grad_norm": 1.2323234385641546, "learning_rate": 2.6182793249362293e-06, "loss": 0.1971, "step": 9343 }, { "epoch": 0.67, "grad_norm": 1.4026438291440717, "learning_rate": 2.617260851479045e-06, "loss": 0.211, "step": 9344 }, { "epoch": 0.67, "grad_norm": 1.2435716531540053, "learning_rate": 2.616242505917834e-06, "loss": 0.1501, "step": 9345 }, { "epoch": 0.67, "grad_norm": 1.1901959717746668, "learning_rate": 2.615224288307253e-06, "loss": 0.1505, "step": 9346 }, { "epoch": 0.67, "grad_norm": 1.366117603031097, "learning_rate": 2.614206198701958e-06, "loss": 0.1989, "step": 9347 }, { "epoch": 0.67, "grad_norm": 5.627917647099022, "learning_rate": 2.613188237156596e-06, "loss": 0.5708, "step": 9348 }, { "epoch": 0.67, "grad_norm": 1.1522649440190307, "learning_rate": 2.612170403725809e-06, "loss": 0.1571, "step": 9349 }, { "epoch": 0.67, "grad_norm": 1.402475462827547, "learning_rate": 2.6111526984642264e-06, "loss": 0.1915, "step": 9350 }, { "epoch": 0.67, "grad_norm": 1.1711313286606628, "learning_rate": 2.6101351214264786e-06, "loss": 0.1524, "step": 9351 }, { "epoch": 0.67, "grad_norm": 1.3257106917788217, "learning_rate": 2.6091176726671815e-06, "loss": 0.1779, "step": 9352 }, { "epoch": 0.67, "grad_norm": 1.2419111184467833, "learning_rate": 2.608100352240951e-06, "loss": 0.1603, "step": 9353 }, { "epoch": 0.67, "grad_norm": 1.430704688108737, "learning_rate": 2.6070831602023882e-06, "loss": 0.2167, "step": 9354 }, { "epoch": 0.67, "grad_norm": 1.2324689143075884, "learning_rate": 2.606066096606096e-06, "loss": 0.1482, "step": 9355 }, { "epoch": 0.67, "grad_norm": 5.313772675883521, "learning_rate": 2.6050491615066646e-06, "loss": 0.5332, "step": 9356 }, { "epoch": 0.67, "grad_norm": 1.4623855661703495, "learning_rate": 2.6040323549586814e-06, "loss": 0.154, "step": 9357 }, { "epoch": 0.67, "grad_norm": 1.321971314757792, "learning_rate": 2.6030156770167216e-06, "loss": 0.1889, "step": 9358 }, { "epoch": 0.67, "grad_norm": 1.4218177859606302, "learning_rate": 2.6019991277353586e-06, "loss": 0.1759, "step": 9359 }, { "epoch": 0.67, "grad_norm": 1.2714487148094773, "learning_rate": 2.600982707169154e-06, "loss": 0.158, "step": 9360 }, { "epoch": 0.67, "grad_norm": 1.194443203620762, "learning_rate": 2.599966415372669e-06, "loss": 0.155, "step": 9361 }, { "epoch": 0.67, "grad_norm": 1.4351956499202008, "learning_rate": 2.5989502524004496e-06, "loss": 0.1839, "step": 9362 }, { "epoch": 0.67, "grad_norm": 1.3973737761653315, "learning_rate": 2.5979342183070422e-06, "loss": 0.1772, "step": 9363 }, { "epoch": 0.67, "grad_norm": 1.326328677963942, "learning_rate": 2.5969183131469834e-06, "loss": 0.2161, "step": 9364 }, { "epoch": 0.67, "grad_norm": 1.5916166484798997, "learning_rate": 2.5959025369748035e-06, "loss": 0.2114, "step": 9365 }, { "epoch": 0.67, "grad_norm": 1.4282663435773646, "learning_rate": 2.594886889845023e-06, "loss": 0.1848, "step": 9366 }, { "epoch": 0.67, "grad_norm": 1.2776917314592318, "learning_rate": 2.5938713718121617e-06, "loss": 0.1712, "step": 9367 }, { "epoch": 0.67, "grad_norm": 1.4994098307899435, "learning_rate": 2.5928559829307236e-06, "loss": 0.2216, "step": 9368 }, { "epoch": 0.67, "grad_norm": 1.2203642787089797, "learning_rate": 2.5918407232552156e-06, "loss": 0.1763, "step": 9369 }, { "epoch": 0.67, "grad_norm": 1.6569940859029384, "learning_rate": 2.5908255928401283e-06, "loss": 0.1887, "step": 9370 }, { "epoch": 0.67, "grad_norm": 1.531836052188907, "learning_rate": 2.589810591739952e-06, "loss": 0.2132, "step": 9371 }, { "epoch": 0.67, "grad_norm": 1.2740067404076434, "learning_rate": 2.588795720009168e-06, "loss": 0.203, "step": 9372 }, { "epoch": 0.67, "grad_norm": 1.2295950964849394, "learning_rate": 2.587780977702252e-06, "loss": 0.164, "step": 9373 }, { "epoch": 0.67, "grad_norm": 1.4588205683030269, "learning_rate": 2.5867663648736687e-06, "loss": 0.2045, "step": 9374 }, { "epoch": 0.67, "grad_norm": 4.217350171890381, "learning_rate": 2.5857518815778795e-06, "loss": 0.607, "step": 9375 }, { "epoch": 0.67, "grad_norm": 1.2405239398352612, "learning_rate": 2.58473752786934e-06, "loss": 0.1541, "step": 9376 }, { "epoch": 0.67, "grad_norm": 1.604736293080333, "learning_rate": 2.5837233038024924e-06, "loss": 0.2058, "step": 9377 }, { "epoch": 0.67, "grad_norm": 1.3667483378536727, "learning_rate": 2.5827092094317807e-06, "loss": 0.1783, "step": 9378 }, { "epoch": 0.67, "grad_norm": 1.3664999839484415, "learning_rate": 2.581695244811633e-06, "loss": 0.1571, "step": 9379 }, { "epoch": 0.67, "grad_norm": 1.1894738211791513, "learning_rate": 2.580681409996477e-06, "loss": 0.1799, "step": 9380 }, { "epoch": 0.67, "grad_norm": 4.597504834857048, "learning_rate": 2.5796677050407327e-06, "loss": 0.4943, "step": 9381 }, { "epoch": 0.67, "grad_norm": 1.3622817014108628, "learning_rate": 2.578654129998811e-06, "loss": 0.1668, "step": 9382 }, { "epoch": 0.67, "grad_norm": 1.2763570083498477, "learning_rate": 2.5776406849251144e-06, "loss": 0.1925, "step": 9383 }, { "epoch": 0.67, "grad_norm": 1.3678536993405142, "learning_rate": 2.5766273698740454e-06, "loss": 0.1758, "step": 9384 }, { "epoch": 0.67, "grad_norm": 1.1801426475082042, "learning_rate": 2.5756141848999883e-06, "loss": 0.1577, "step": 9385 }, { "epoch": 0.67, "grad_norm": 1.3299629284589918, "learning_rate": 2.5746011300573327e-06, "loss": 0.1484, "step": 9386 }, { "epoch": 0.67, "grad_norm": 7.383337167043854, "learning_rate": 2.5735882054004515e-06, "loss": 0.556, "step": 9387 }, { "epoch": 0.67, "grad_norm": 1.192455243772222, "learning_rate": 2.572575410983715e-06, "loss": 0.1532, "step": 9388 }, { "epoch": 0.67, "grad_norm": 1.2823850473967455, "learning_rate": 2.571562746861487e-06, "loss": 0.1766, "step": 9389 }, { "epoch": 0.67, "grad_norm": 1.333794744793949, "learning_rate": 2.570550213088125e-06, "loss": 0.1693, "step": 9390 }, { "epoch": 0.67, "grad_norm": 1.345812001483703, "learning_rate": 2.5695378097179735e-06, "loss": 0.1972, "step": 9391 }, { "epoch": 0.67, "grad_norm": 1.3587442780064092, "learning_rate": 2.5685255368053796e-06, "loss": 0.1814, "step": 9392 }, { "epoch": 0.67, "grad_norm": 1.1889564598425788, "learning_rate": 2.5675133944046726e-06, "loss": 0.1495, "step": 9393 }, { "epoch": 0.67, "grad_norm": 5.193778657595414, "learning_rate": 2.5665013825701845e-06, "loss": 0.569, "step": 9394 }, { "epoch": 0.67, "grad_norm": 1.4228530323214668, "learning_rate": 2.565489501356233e-06, "loss": 0.1919, "step": 9395 }, { "epoch": 0.67, "grad_norm": 1.2441754907306442, "learning_rate": 2.564477750817135e-06, "loss": 0.1645, "step": 9396 }, { "epoch": 0.67, "grad_norm": 1.1577420526330335, "learning_rate": 2.563466131007194e-06, "loss": 0.1331, "step": 9397 }, { "epoch": 0.67, "grad_norm": 1.7258741505990391, "learning_rate": 2.5624546419807105e-06, "loss": 0.1927, "step": 9398 }, { "epoch": 0.67, "grad_norm": 1.303096822534713, "learning_rate": 2.5614432837919785e-06, "loss": 0.1867, "step": 9399 }, { "epoch": 0.67, "grad_norm": 1.4182391584554086, "learning_rate": 2.5604320564952846e-06, "loss": 0.2092, "step": 9400 }, { "epoch": 0.67, "grad_norm": 1.3879500152722326, "learning_rate": 2.559420960144905e-06, "loss": 0.1555, "step": 9401 }, { "epoch": 0.67, "grad_norm": 1.3574713096735207, "learning_rate": 2.5584099947951136e-06, "loss": 0.1849, "step": 9402 }, { "epoch": 0.67, "grad_norm": 4.762614528303701, "learning_rate": 2.5573991605001715e-06, "loss": 0.6438, "step": 9403 }, { "epoch": 0.67, "grad_norm": 1.4038797438286432, "learning_rate": 2.556388457314341e-06, "loss": 0.1769, "step": 9404 }, { "epoch": 0.67, "grad_norm": 5.912980337263262, "learning_rate": 2.5553778852918683e-06, "loss": 0.5079, "step": 9405 }, { "epoch": 0.67, "grad_norm": 1.3184240667136915, "learning_rate": 2.5543674444869983e-06, "loss": 0.1728, "step": 9406 }, { "epoch": 0.67, "grad_norm": 1.2286490769659115, "learning_rate": 2.553357134953968e-06, "loss": 0.1362, "step": 9407 }, { "epoch": 0.67, "grad_norm": 1.2661342113241145, "learning_rate": 2.552346956747007e-06, "loss": 0.1402, "step": 9408 }, { "epoch": 0.67, "grad_norm": 1.3023213503530433, "learning_rate": 2.551336909920339e-06, "loss": 0.1787, "step": 9409 }, { "epoch": 0.67, "grad_norm": 1.2676728376641704, "learning_rate": 2.550326994528176e-06, "loss": 0.144, "step": 9410 }, { "epoch": 0.67, "grad_norm": 1.2470361119800326, "learning_rate": 2.54931721062473e-06, "loss": 0.2013, "step": 9411 }, { "epoch": 0.67, "grad_norm": 1.3686661924701848, "learning_rate": 2.548307558264197e-06, "loss": 0.1922, "step": 9412 }, { "epoch": 0.67, "grad_norm": 8.895753112918506, "learning_rate": 2.547298037500777e-06, "loss": 0.5998, "step": 9413 }, { "epoch": 0.67, "grad_norm": 1.3691302868122082, "learning_rate": 2.546288648388653e-06, "loss": 0.1922, "step": 9414 }, { "epoch": 0.67, "grad_norm": 1.327826954883381, "learning_rate": 2.545279390982005e-06, "loss": 0.2045, "step": 9415 }, { "epoch": 0.67, "grad_norm": 1.1196954692619139, "learning_rate": 2.5442702653350083e-06, "loss": 0.1256, "step": 9416 }, { "epoch": 0.67, "grad_norm": 1.3256193739894035, "learning_rate": 2.543261271501829e-06, "loss": 0.2431, "step": 9417 }, { "epoch": 0.67, "grad_norm": 1.251508021641177, "learning_rate": 2.5422524095366223e-06, "loss": 0.1342, "step": 9418 }, { "epoch": 0.67, "grad_norm": 1.636520434693917, "learning_rate": 2.5412436794935446e-06, "loss": 0.1899, "step": 9419 }, { "epoch": 0.67, "grad_norm": 1.2532637964310225, "learning_rate": 2.5402350814267364e-06, "loss": 0.1432, "step": 9420 }, { "epoch": 0.67, "grad_norm": 1.160802048867107, "learning_rate": 2.5392266153903376e-06, "loss": 0.1725, "step": 9421 }, { "epoch": 0.67, "grad_norm": 1.222153408678953, "learning_rate": 2.538218281438477e-06, "loss": 0.1584, "step": 9422 }, { "epoch": 0.67, "grad_norm": 1.332858945629204, "learning_rate": 2.537210079625279e-06, "loss": 0.1517, "step": 9423 }, { "epoch": 0.67, "grad_norm": 1.205923606162704, "learning_rate": 2.53620201000486e-06, "loss": 0.1791, "step": 9424 }, { "epoch": 0.67, "grad_norm": 1.285492880336232, "learning_rate": 2.53519407263133e-06, "loss": 0.2022, "step": 9425 }, { "epoch": 0.67, "grad_norm": 5.441178238393899, "learning_rate": 2.5341862675587886e-06, "loss": 0.7059, "step": 9426 }, { "epoch": 0.67, "grad_norm": 1.184285262087191, "learning_rate": 2.5331785948413348e-06, "loss": 0.1601, "step": 9427 }, { "epoch": 0.67, "grad_norm": 6.009035267654339, "learning_rate": 2.5321710545330504e-06, "loss": 0.6278, "step": 9428 }, { "epoch": 0.67, "grad_norm": 1.2901917690526674, "learning_rate": 2.531163646688023e-06, "loss": 0.1895, "step": 9429 }, { "epoch": 0.67, "grad_norm": 1.4617353265688011, "learning_rate": 2.5301563713603195e-06, "loss": 0.2099, "step": 9430 }, { "epoch": 0.67, "grad_norm": 1.2331575803259158, "learning_rate": 2.529149228604011e-06, "loss": 0.1689, "step": 9431 }, { "epoch": 0.67, "grad_norm": 1.1666139802057132, "learning_rate": 2.528142218473155e-06, "loss": 0.1801, "step": 9432 }, { "epoch": 0.67, "grad_norm": 1.3805029973256087, "learning_rate": 2.527135341021807e-06, "loss": 0.1832, "step": 9433 }, { "epoch": 0.67, "grad_norm": 1.31696521585725, "learning_rate": 2.5261285963040076e-06, "loss": 0.2019, "step": 9434 }, { "epoch": 0.67, "grad_norm": 5.96895352628261, "learning_rate": 2.5251219843737993e-06, "loss": 0.5936, "step": 9435 }, { "epoch": 0.67, "grad_norm": 1.1252506865319964, "learning_rate": 2.5241155052852086e-06, "loss": 0.1388, "step": 9436 }, { "epoch": 0.68, "grad_norm": 1.3938873326305772, "learning_rate": 2.523109159092263e-06, "loss": 0.2086, "step": 9437 }, { "epoch": 0.68, "grad_norm": 1.2916593597002277, "learning_rate": 2.5221029458489756e-06, "loss": 0.1718, "step": 9438 }, { "epoch": 0.68, "grad_norm": 1.2036627619734719, "learning_rate": 2.5210968656093583e-06, "loss": 0.1698, "step": 9439 }, { "epoch": 0.68, "grad_norm": 1.4072654144888759, "learning_rate": 2.5200909184274125e-06, "loss": 0.1922, "step": 9440 }, { "epoch": 0.68, "grad_norm": 4.573798950341566, "learning_rate": 2.5190851043571344e-06, "loss": 0.6186, "step": 9441 }, { "epoch": 0.68, "grad_norm": 1.424787531461755, "learning_rate": 2.5180794234525143e-06, "loss": 0.1912, "step": 9442 }, { "epoch": 0.68, "grad_norm": 1.1986551870897442, "learning_rate": 2.5170738757675277e-06, "loss": 0.1593, "step": 9443 }, { "epoch": 0.68, "grad_norm": 1.5058013828757564, "learning_rate": 2.516068461356154e-06, "loss": 0.1886, "step": 9444 }, { "epoch": 0.68, "grad_norm": 1.255133515063899, "learning_rate": 2.5150631802723547e-06, "loss": 0.1374, "step": 9445 }, { "epoch": 0.68, "grad_norm": 1.4423207873489587, "learning_rate": 2.5140580325700934e-06, "loss": 0.1939, "step": 9446 }, { "epoch": 0.68, "grad_norm": 1.2951851515705977, "learning_rate": 2.5130530183033197e-06, "loss": 0.1673, "step": 9447 }, { "epoch": 0.68, "grad_norm": 1.494209470562147, "learning_rate": 2.51204813752598e-06, "loss": 0.1891, "step": 9448 }, { "epoch": 0.68, "grad_norm": 1.2655596634464201, "learning_rate": 2.5110433902920106e-06, "loss": 0.1789, "step": 9449 }, { "epoch": 0.68, "grad_norm": 1.388063477107571, "learning_rate": 2.510038776655347e-06, "loss": 0.1962, "step": 9450 }, { "epoch": 0.68, "grad_norm": 1.5563532591953824, "learning_rate": 2.5090342966699077e-06, "loss": 0.1764, "step": 9451 }, { "epoch": 0.68, "grad_norm": 7.220676935674887, "learning_rate": 2.508029950389613e-06, "loss": 0.5474, "step": 9452 }, { "epoch": 0.68, "grad_norm": 1.329741177054563, "learning_rate": 2.5070257378683692e-06, "loss": 0.1586, "step": 9453 }, { "epoch": 0.68, "grad_norm": 1.2589800126763218, "learning_rate": 2.5060216591600817e-06, "loss": 0.1458, "step": 9454 }, { "epoch": 0.68, "grad_norm": 1.3146463159371862, "learning_rate": 2.5050177143186405e-06, "loss": 0.1659, "step": 9455 }, { "epoch": 0.68, "grad_norm": 1.3628922224618658, "learning_rate": 2.504013903397937e-06, "loss": 0.1938, "step": 9456 }, { "epoch": 0.68, "grad_norm": 1.3852549875067506, "learning_rate": 2.5030102264518515e-06, "loss": 0.1728, "step": 9457 }, { "epoch": 0.68, "grad_norm": 1.3744170065686803, "learning_rate": 2.502006683534258e-06, "loss": 0.1984, "step": 9458 }, { "epoch": 0.68, "grad_norm": 1.3613976128515801, "learning_rate": 2.501003274699021e-06, "loss": 0.174, "step": 9459 }, { "epoch": 0.68, "grad_norm": 1.455684614279227, "learning_rate": 2.5000000000000015e-06, "loss": 0.1973, "step": 9460 }, { "epoch": 0.68, "grad_norm": 1.4092578066508759, "learning_rate": 2.498996859491048e-06, "loss": 0.1685, "step": 9461 }, { "epoch": 0.68, "grad_norm": 1.2687481141064756, "learning_rate": 2.497993853226009e-06, "loss": 0.1816, "step": 9462 }, { "epoch": 0.68, "grad_norm": 1.360109787545344, "learning_rate": 2.4969909812587184e-06, "loss": 0.1436, "step": 9463 }, { "epoch": 0.68, "grad_norm": 1.2735829582039413, "learning_rate": 2.495988243643008e-06, "loss": 0.1448, "step": 9464 }, { "epoch": 0.68, "grad_norm": 1.3937288717018805, "learning_rate": 2.4949856404327007e-06, "loss": 0.1952, "step": 9465 }, { "epoch": 0.68, "grad_norm": 1.3140238149532244, "learning_rate": 2.4939831716816136e-06, "loss": 0.1959, "step": 9466 }, { "epoch": 0.68, "grad_norm": 1.444659848827377, "learning_rate": 2.4929808374435526e-06, "loss": 0.2006, "step": 9467 }, { "epoch": 0.68, "grad_norm": 1.3392593441187322, "learning_rate": 2.4919786377723225e-06, "loss": 0.1875, "step": 9468 }, { "epoch": 0.68, "grad_norm": 1.3340744555736854, "learning_rate": 2.4909765727217132e-06, "loss": 0.1585, "step": 9469 }, { "epoch": 0.68, "grad_norm": 1.2504621477337763, "learning_rate": 2.4899746423455158e-06, "loss": 0.156, "step": 9470 }, { "epoch": 0.68, "grad_norm": 1.4548149456950323, "learning_rate": 2.4889728466975056e-06, "loss": 0.1317, "step": 9471 }, { "epoch": 0.68, "grad_norm": 1.2499324520767812, "learning_rate": 2.487971185831457e-06, "loss": 0.1518, "step": 9472 }, { "epoch": 0.68, "grad_norm": 1.2749843872325894, "learning_rate": 2.4869696598011354e-06, "loss": 0.1869, "step": 9473 }, { "epoch": 0.68, "grad_norm": 1.3900200163774987, "learning_rate": 2.485968268660299e-06, "loss": 0.1517, "step": 9474 }, { "epoch": 0.68, "grad_norm": 1.3032265095461188, "learning_rate": 2.4849670124627e-06, "loss": 0.1726, "step": 9475 }, { "epoch": 0.68, "grad_norm": 1.294659071149979, "learning_rate": 2.4839658912620782e-06, "loss": 0.2013, "step": 9476 }, { "epoch": 0.68, "grad_norm": 1.3896783519645675, "learning_rate": 2.482964905112174e-06, "loss": 0.1868, "step": 9477 }, { "epoch": 0.68, "grad_norm": 1.4051772761160892, "learning_rate": 2.481964054066712e-06, "loss": 0.1577, "step": 9478 }, { "epoch": 0.68, "grad_norm": 1.5351161988158686, "learning_rate": 2.4809633381794175e-06, "loss": 0.1997, "step": 9479 }, { "epoch": 0.68, "grad_norm": 1.421929577193885, "learning_rate": 2.4799627575040014e-06, "loss": 0.1847, "step": 9480 }, { "epoch": 0.68, "grad_norm": 4.632105163448895, "learning_rate": 2.478962312094173e-06, "loss": 0.577, "step": 9481 }, { "epoch": 0.68, "grad_norm": 1.3233472911457882, "learning_rate": 2.4779620020036328e-06, "loss": 0.212, "step": 9482 }, { "epoch": 0.68, "grad_norm": 1.4495694547483886, "learning_rate": 2.4769618272860742e-06, "loss": 0.1898, "step": 9483 }, { "epoch": 0.68, "grad_norm": 1.251711329981804, "learning_rate": 2.475961787995179e-06, "loss": 0.1467, "step": 9484 }, { "epoch": 0.68, "grad_norm": 1.5035248994312191, "learning_rate": 2.4749618841846297e-06, "loss": 0.1829, "step": 9485 }, { "epoch": 0.68, "grad_norm": 1.219082891583727, "learning_rate": 2.473962115908093e-06, "loss": 0.1504, "step": 9486 }, { "epoch": 0.68, "grad_norm": 1.2129967224136002, "learning_rate": 2.4729624832192365e-06, "loss": 0.1311, "step": 9487 }, { "epoch": 0.68, "grad_norm": 1.5828077011642536, "learning_rate": 2.4719629861717126e-06, "loss": 0.2106, "step": 9488 }, { "epoch": 0.68, "grad_norm": 1.3868597614783982, "learning_rate": 2.4709636248191727e-06, "loss": 0.2075, "step": 9489 }, { "epoch": 0.68, "grad_norm": 1.4235150300827484, "learning_rate": 2.469964399215258e-06, "loss": 0.1897, "step": 9490 }, { "epoch": 0.68, "grad_norm": 1.3344444004687213, "learning_rate": 2.468965309413604e-06, "loss": 0.1742, "step": 9491 }, { "epoch": 0.68, "grad_norm": 1.3688543567069344, "learning_rate": 2.4679663554678357e-06, "loss": 0.1759, "step": 9492 }, { "epoch": 0.68, "grad_norm": 1.3834593693099588, "learning_rate": 2.466967537431576e-06, "loss": 0.1664, "step": 9493 }, { "epoch": 0.68, "grad_norm": 6.336560301208897, "learning_rate": 2.4659688553584342e-06, "loss": 0.6521, "step": 9494 }, { "epoch": 0.68, "grad_norm": 1.2037183356606593, "learning_rate": 2.464970309302019e-06, "loss": 0.1566, "step": 9495 }, { "epoch": 0.68, "grad_norm": 1.220859189978877, "learning_rate": 2.463971899315924e-06, "loss": 0.1756, "step": 9496 }, { "epoch": 0.68, "grad_norm": 1.2568516985268094, "learning_rate": 2.462973625453744e-06, "loss": 0.1647, "step": 9497 }, { "epoch": 0.68, "grad_norm": 5.688822089006416, "learning_rate": 2.4619754877690593e-06, "loss": 0.7172, "step": 9498 }, { "epoch": 0.68, "grad_norm": 1.394205855984081, "learning_rate": 2.4609774863154474e-06, "loss": 0.2194, "step": 9499 }, { "epoch": 0.68, "grad_norm": 1.3164653331753404, "learning_rate": 2.4599796211464772e-06, "loss": 0.1605, "step": 9500 }, { "epoch": 0.68, "grad_norm": 1.4060312633532654, "learning_rate": 2.4589818923157117e-06, "loss": 0.1648, "step": 9501 }, { "epoch": 0.68, "grad_norm": 1.2162788429382967, "learning_rate": 2.457984299876701e-06, "loss": 0.1402, "step": 9502 }, { "epoch": 0.68, "grad_norm": 1.3076689569243973, "learning_rate": 2.4569868438829948e-06, "loss": 0.1575, "step": 9503 }, { "epoch": 0.68, "grad_norm": 1.2764283887880365, "learning_rate": 2.4559895243881333e-06, "loss": 0.1542, "step": 9504 }, { "epoch": 0.68, "grad_norm": 1.3877067994408514, "learning_rate": 2.4549923414456455e-06, "loss": 0.1633, "step": 9505 }, { "epoch": 0.68, "grad_norm": 1.3024964980712612, "learning_rate": 2.4539952951090607e-06, "loss": 0.2202, "step": 9506 }, { "epoch": 0.68, "grad_norm": 1.4326962381366886, "learning_rate": 2.452998385431891e-06, "loss": 0.1804, "step": 9507 }, { "epoch": 0.68, "grad_norm": 1.445550988927256, "learning_rate": 2.4520016124676493e-06, "loss": 0.2032, "step": 9508 }, { "epoch": 0.68, "grad_norm": 1.3695328356604932, "learning_rate": 2.451004976269839e-06, "loss": 0.2041, "step": 9509 }, { "epoch": 0.68, "grad_norm": 1.2906646431967768, "learning_rate": 2.4500084768919563e-06, "loss": 0.1888, "step": 9510 }, { "epoch": 0.68, "grad_norm": 1.3497308458581203, "learning_rate": 2.449012114387486e-06, "loss": 0.19, "step": 9511 }, { "epoch": 0.68, "grad_norm": 1.1930495781368502, "learning_rate": 2.448015888809913e-06, "loss": 0.1641, "step": 9512 }, { "epoch": 0.68, "grad_norm": 1.33863872982848, "learning_rate": 2.447019800212707e-06, "loss": 0.1662, "step": 9513 }, { "epoch": 0.68, "grad_norm": 1.4377090155090906, "learning_rate": 2.4460238486493372e-06, "loss": 0.1769, "step": 9514 }, { "epoch": 0.68, "grad_norm": 1.6028935901016272, "learning_rate": 2.445028034173259e-06, "loss": 0.1802, "step": 9515 }, { "epoch": 0.68, "grad_norm": 3.7922137598579253, "learning_rate": 2.4440323568379255e-06, "loss": 0.4954, "step": 9516 }, { "epoch": 0.68, "grad_norm": 1.182783380660088, "learning_rate": 2.4430368166967806e-06, "loss": 0.1387, "step": 9517 }, { "epoch": 0.68, "grad_norm": 1.4545432327652243, "learning_rate": 2.4420414138032632e-06, "loss": 0.194, "step": 9518 }, { "epoch": 0.68, "grad_norm": 1.258249588613014, "learning_rate": 2.441046148210799e-06, "loss": 0.1686, "step": 9519 }, { "epoch": 0.68, "grad_norm": 8.75849137008114, "learning_rate": 2.4400510199728123e-06, "loss": 0.5279, "step": 9520 }, { "epoch": 0.68, "grad_norm": 1.258812033061077, "learning_rate": 2.439056029142716e-06, "loss": 0.1658, "step": 9521 }, { "epoch": 0.68, "grad_norm": 1.4257034995983668, "learning_rate": 2.4380611757739186e-06, "loss": 0.169, "step": 9522 }, { "epoch": 0.68, "grad_norm": 1.5325296600330616, "learning_rate": 2.437066459919818e-06, "loss": 0.1687, "step": 9523 }, { "epoch": 0.68, "grad_norm": 7.292259345136729, "learning_rate": 2.436071881633808e-06, "loss": 0.4338, "step": 9524 }, { "epoch": 0.68, "grad_norm": 1.1769596361936145, "learning_rate": 2.435077440969273e-06, "loss": 0.1798, "step": 9525 }, { "epoch": 0.68, "grad_norm": 1.489933317507412, "learning_rate": 2.434083137979593e-06, "loss": 0.2304, "step": 9526 }, { "epoch": 0.68, "grad_norm": 1.3552780428631421, "learning_rate": 2.433088972718135e-06, "loss": 0.1513, "step": 9527 }, { "epoch": 0.68, "grad_norm": 4.283262168029897, "learning_rate": 2.4320949452382646e-06, "loss": 0.5374, "step": 9528 }, { "epoch": 0.68, "grad_norm": 1.4900735313325197, "learning_rate": 2.431101055593334e-06, "loss": 0.2067, "step": 9529 }, { "epoch": 0.68, "grad_norm": 1.334086340807652, "learning_rate": 2.430107303836695e-06, "loss": 0.2008, "step": 9530 }, { "epoch": 0.68, "grad_norm": 1.2883223805723385, "learning_rate": 2.4291136900216845e-06, "loss": 0.1515, "step": 9531 }, { "epoch": 0.68, "grad_norm": 1.5280252776473957, "learning_rate": 2.428120214201637e-06, "loss": 0.2167, "step": 9532 }, { "epoch": 0.68, "grad_norm": 1.2621615766897027, "learning_rate": 2.4271268764298795e-06, "loss": 0.1889, "step": 9533 }, { "epoch": 0.68, "grad_norm": 1.224430695059103, "learning_rate": 2.426133676759731e-06, "loss": 0.1298, "step": 9534 }, { "epoch": 0.68, "grad_norm": 1.2327989639630421, "learning_rate": 2.4251406152445002e-06, "loss": 0.1662, "step": 9535 }, { "epoch": 0.68, "grad_norm": 1.274958052224102, "learning_rate": 2.424147691937491e-06, "loss": 0.1647, "step": 9536 }, { "epoch": 0.68, "grad_norm": 1.4628545205932186, "learning_rate": 2.423154906892003e-06, "loss": 0.1916, "step": 9537 }, { "epoch": 0.68, "grad_norm": 1.2578554589432847, "learning_rate": 2.4221622601613197e-06, "loss": 0.1792, "step": 9538 }, { "epoch": 0.68, "grad_norm": 1.4167442112575475, "learning_rate": 2.4211697517987278e-06, "loss": 0.177, "step": 9539 }, { "epoch": 0.68, "grad_norm": 1.393326625321518, "learning_rate": 2.4201773818574956e-06, "loss": 0.1719, "step": 9540 }, { "epoch": 0.68, "grad_norm": 1.231525474208533, "learning_rate": 2.419185150390893e-06, "loss": 0.1508, "step": 9541 }, { "epoch": 0.68, "grad_norm": 1.360380482853282, "learning_rate": 2.418193057452179e-06, "loss": 0.1716, "step": 9542 }, { "epoch": 0.68, "grad_norm": 1.2789153732114065, "learning_rate": 2.4172011030946057e-06, "loss": 0.1634, "step": 9543 }, { "epoch": 0.68, "grad_norm": 6.481508343520224, "learning_rate": 2.416209287371415e-06, "loss": 0.5845, "step": 9544 }, { "epoch": 0.68, "grad_norm": 1.4085161678270786, "learning_rate": 2.4152176103358468e-06, "loss": 0.1948, "step": 9545 }, { "epoch": 0.68, "grad_norm": 1.2984463025381572, "learning_rate": 2.414226072041126e-06, "loss": 0.2037, "step": 9546 }, { "epoch": 0.68, "grad_norm": 1.259660295186739, "learning_rate": 2.4132346725404795e-06, "loss": 0.1794, "step": 9547 }, { "epoch": 0.68, "grad_norm": 1.2880896301145979, "learning_rate": 2.4122434118871162e-06, "loss": 0.1659, "step": 9548 }, { "epoch": 0.68, "grad_norm": 1.2507517360154965, "learning_rate": 2.4112522901342467e-06, "loss": 0.1793, "step": 9549 }, { "epoch": 0.68, "grad_norm": 1.2664580496144495, "learning_rate": 2.41026130733507e-06, "loss": 0.1667, "step": 9550 }, { "epoch": 0.68, "grad_norm": 1.18301868968222, "learning_rate": 2.409270463542779e-06, "loss": 0.1518, "step": 9551 }, { "epoch": 0.68, "grad_norm": 1.574399271597619, "learning_rate": 2.4082797588105554e-06, "loss": 0.1907, "step": 9552 }, { "epoch": 0.68, "grad_norm": 1.323093302716809, "learning_rate": 2.4072891931915805e-06, "loss": 0.1622, "step": 9553 }, { "epoch": 0.68, "grad_norm": 1.371821637289227, "learning_rate": 2.4062987667390186e-06, "loss": 0.2031, "step": 9554 }, { "epoch": 0.68, "grad_norm": 1.349611035479337, "learning_rate": 2.405308479506037e-06, "loss": 0.1971, "step": 9555 }, { "epoch": 0.68, "grad_norm": 1.4158294566891614, "learning_rate": 2.4043183315457862e-06, "loss": 0.1797, "step": 9556 }, { "epoch": 0.68, "grad_norm": 1.2103979872541717, "learning_rate": 2.403328322911416e-06, "loss": 0.1622, "step": 9557 }, { "epoch": 0.68, "grad_norm": 1.295627483288364, "learning_rate": 2.402338453656064e-06, "loss": 0.1909, "step": 9558 }, { "epoch": 0.68, "grad_norm": 1.5097208732141465, "learning_rate": 2.4013487238328663e-06, "loss": 0.1812, "step": 9559 }, { "epoch": 0.68, "grad_norm": 1.358160288033, "learning_rate": 2.400359133494944e-06, "loss": 0.1854, "step": 9560 }, { "epoch": 0.68, "grad_norm": 1.4850782248528784, "learning_rate": 2.3993696826954168e-06, "loss": 0.1809, "step": 9561 }, { "epoch": 0.68, "grad_norm": 1.451460854456984, "learning_rate": 2.398380371487391e-06, "loss": 0.1809, "step": 9562 }, { "epoch": 0.68, "grad_norm": 1.3840279145599783, "learning_rate": 2.3973911999239737e-06, "loss": 0.1737, "step": 9563 }, { "epoch": 0.68, "grad_norm": 1.381715306101897, "learning_rate": 2.396402168058255e-06, "loss": 0.1703, "step": 9564 }, { "epoch": 0.68, "grad_norm": 1.382419570520723, "learning_rate": 2.3954132759433244e-06, "loss": 0.2302, "step": 9565 }, { "epoch": 0.68, "grad_norm": 5.459974944879367, "learning_rate": 2.3944245236322623e-06, "loss": 0.5708, "step": 9566 }, { "epoch": 0.68, "grad_norm": 1.4032251710027135, "learning_rate": 2.3934359111781397e-06, "loss": 0.2099, "step": 9567 }, { "epoch": 0.68, "grad_norm": 1.2887263859508393, "learning_rate": 2.392447438634024e-06, "loss": 0.1829, "step": 9568 }, { "epoch": 0.68, "grad_norm": 1.2733876959471513, "learning_rate": 2.3914591060529683e-06, "loss": 0.1804, "step": 9569 }, { "epoch": 0.68, "grad_norm": 1.4487286894472564, "learning_rate": 2.390470913488027e-06, "loss": 0.1932, "step": 9570 }, { "epoch": 0.68, "grad_norm": 6.061827390708526, "learning_rate": 2.3894828609922384e-06, "loss": 0.7468, "step": 9571 }, { "epoch": 0.68, "grad_norm": 4.56720031848221, "learning_rate": 2.38849494861864e-06, "loss": 0.5525, "step": 9572 }, { "epoch": 0.68, "grad_norm": 1.2348093869460817, "learning_rate": 2.387507176420256e-06, "loss": 0.1843, "step": 9573 }, { "epoch": 0.68, "grad_norm": 1.340631015273686, "learning_rate": 2.3865195444501083e-06, "loss": 0.2131, "step": 9574 }, { "epoch": 0.68, "grad_norm": 1.347413371841547, "learning_rate": 2.3855320527612087e-06, "loss": 0.2193, "step": 9575 }, { "epoch": 0.68, "grad_norm": 1.5813625051119005, "learning_rate": 2.384544701406563e-06, "loss": 0.2122, "step": 9576 }, { "epoch": 0.69, "grad_norm": 1.3994996041294225, "learning_rate": 2.3835574904391655e-06, "loss": 0.1887, "step": 9577 }, { "epoch": 0.69, "grad_norm": 1.2557062125241178, "learning_rate": 2.382570419912009e-06, "loss": 0.1153, "step": 9578 }, { "epoch": 0.69, "grad_norm": 1.1901274711006653, "learning_rate": 2.381583489878073e-06, "loss": 0.1597, "step": 9579 }, { "epoch": 0.69, "grad_norm": 1.2554220870604667, "learning_rate": 2.3805967003903336e-06, "loss": 0.1592, "step": 9580 }, { "epoch": 0.69, "grad_norm": 6.242064754122138, "learning_rate": 2.3796100515017554e-06, "loss": 0.7462, "step": 9581 }, { "epoch": 0.69, "grad_norm": 1.4938901903656598, "learning_rate": 2.3786235432653e-06, "loss": 0.2004, "step": 9582 }, { "epoch": 0.69, "grad_norm": 1.2941732877084384, "learning_rate": 2.377637175733918e-06, "loss": 0.1683, "step": 9583 }, { "epoch": 0.69, "grad_norm": 1.2226125232845264, "learning_rate": 2.376650948960556e-06, "loss": 0.1641, "step": 9584 }, { "epoch": 0.69, "grad_norm": 1.1464644357026295, "learning_rate": 2.3756648629981476e-06, "loss": 0.151, "step": 9585 }, { "epoch": 0.69, "grad_norm": 1.2913757222535163, "learning_rate": 2.3746789178996248e-06, "loss": 0.1614, "step": 9586 }, { "epoch": 0.69, "grad_norm": 4.310951053771698, "learning_rate": 2.3736931137179063e-06, "loss": 0.5001, "step": 9587 }, { "epoch": 0.69, "grad_norm": 1.3945321557538508, "learning_rate": 2.37270745050591e-06, "loss": 0.1654, "step": 9588 }, { "epoch": 0.69, "grad_norm": 1.4449032529699466, "learning_rate": 2.3717219283165375e-06, "loss": 0.219, "step": 9589 }, { "epoch": 0.69, "grad_norm": 1.315662133987693, "learning_rate": 2.3707365472026926e-06, "loss": 0.1736, "step": 9590 }, { "epoch": 0.69, "grad_norm": 1.2897628579930753, "learning_rate": 2.36975130721726e-06, "loss": 0.1695, "step": 9591 }, { "epoch": 0.69, "grad_norm": 1.5824439239115817, "learning_rate": 2.3687662084131326e-06, "loss": 0.146, "step": 9592 }, { "epoch": 0.69, "grad_norm": 1.2913828139769141, "learning_rate": 2.367781250843179e-06, "loss": 0.1701, "step": 9593 }, { "epoch": 0.69, "grad_norm": 1.276603265066647, "learning_rate": 2.366796434560273e-06, "loss": 0.135, "step": 9594 }, { "epoch": 0.69, "grad_norm": 1.486992602464422, "learning_rate": 2.365811759617271e-06, "loss": 0.1921, "step": 9595 }, { "epoch": 0.69, "grad_norm": 1.4428170710564467, "learning_rate": 2.364827226067031e-06, "loss": 0.2036, "step": 9596 }, { "epoch": 0.69, "grad_norm": 1.3725179037986874, "learning_rate": 2.363842833962394e-06, "loss": 0.1904, "step": 9597 }, { "epoch": 0.69, "grad_norm": 1.1572152585931663, "learning_rate": 2.362858583356203e-06, "loss": 0.1424, "step": 9598 }, { "epoch": 0.69, "grad_norm": 1.4370164608425777, "learning_rate": 2.3618744743012843e-06, "loss": 0.1811, "step": 9599 }, { "epoch": 0.69, "grad_norm": 1.6166589740705166, "learning_rate": 2.360890506850464e-06, "loss": 0.1698, "step": 9600 }, { "epoch": 0.69, "grad_norm": 1.4120658181921466, "learning_rate": 2.3599066810565556e-06, "loss": 0.2117, "step": 9601 }, { "epoch": 0.69, "grad_norm": 1.166276234696767, "learning_rate": 2.358922996972369e-06, "loss": 0.1582, "step": 9602 }, { "epoch": 0.69, "grad_norm": 1.3990501716524772, "learning_rate": 2.3579394546507043e-06, "loss": 0.1719, "step": 9603 }, { "epoch": 0.69, "grad_norm": 1.2322491575843164, "learning_rate": 2.356956054144352e-06, "loss": 0.1554, "step": 9604 }, { "epoch": 0.69, "grad_norm": 1.4300032138994128, "learning_rate": 2.3559727955061e-06, "loss": 0.2179, "step": 9605 }, { "epoch": 0.69, "grad_norm": 1.271275713294997, "learning_rate": 2.3549896787887227e-06, "loss": 0.2032, "step": 9606 }, { "epoch": 0.69, "grad_norm": 4.3170387518029685, "learning_rate": 2.354006704044993e-06, "loss": 0.4816, "step": 9607 }, { "epoch": 0.69, "grad_norm": 4.19384141499077, "learning_rate": 2.3530238713276697e-06, "loss": 0.5959, "step": 9608 }, { "epoch": 0.69, "grad_norm": 1.4020350853909755, "learning_rate": 2.3520411806895094e-06, "loss": 0.2032, "step": 9609 }, { "epoch": 0.69, "grad_norm": 1.408594129093204, "learning_rate": 2.3510586321832586e-06, "loss": 0.185, "step": 9610 }, { "epoch": 0.69, "grad_norm": 1.319491006252254, "learning_rate": 2.3500762258616585e-06, "loss": 0.1617, "step": 9611 }, { "epoch": 0.69, "grad_norm": 1.4119304945685256, "learning_rate": 2.349093961777437e-06, "loss": 0.1668, "step": 9612 }, { "epoch": 0.69, "grad_norm": 1.3828278393312037, "learning_rate": 2.348111839983322e-06, "loss": 0.1974, "step": 9613 }, { "epoch": 0.69, "grad_norm": 1.3350745849245143, "learning_rate": 2.3471298605320263e-06, "loss": 0.1834, "step": 9614 }, { "epoch": 0.69, "grad_norm": 1.3318689998534465, "learning_rate": 2.346148023476262e-06, "loss": 0.1735, "step": 9615 }, { "epoch": 0.69, "grad_norm": 1.3889895134448862, "learning_rate": 2.345166328868727e-06, "loss": 0.1885, "step": 9616 }, { "epoch": 0.69, "grad_norm": 1.0415049470454416, "learning_rate": 2.344184776762117e-06, "loss": 0.1226, "step": 9617 }, { "epoch": 0.69, "grad_norm": 6.614843922460169, "learning_rate": 2.3432033672091167e-06, "loss": 0.483, "step": 9618 }, { "epoch": 0.69, "grad_norm": 1.361517812561361, "learning_rate": 2.342222100262406e-06, "loss": 0.1679, "step": 9619 }, { "epoch": 0.69, "grad_norm": 1.3337294422224266, "learning_rate": 2.341240975974653e-06, "loss": 0.1421, "step": 9620 }, { "epoch": 0.69, "grad_norm": 1.2618537815658224, "learning_rate": 2.3402599943985233e-06, "loss": 0.1687, "step": 9621 }, { "epoch": 0.69, "grad_norm": 1.3071918743921869, "learning_rate": 2.3392791555866682e-06, "loss": 0.1446, "step": 9622 }, { "epoch": 0.69, "grad_norm": 1.5100228688968884, "learning_rate": 2.3382984595917403e-06, "loss": 0.2021, "step": 9623 }, { "epoch": 0.69, "grad_norm": 1.2537668357837448, "learning_rate": 2.3373179064663742e-06, "loss": 0.1619, "step": 9624 }, { "epoch": 0.69, "grad_norm": 1.3725846921721856, "learning_rate": 2.3363374962632054e-06, "loss": 0.1994, "step": 9625 }, { "epoch": 0.69, "grad_norm": 1.2877026098755515, "learning_rate": 2.335357229034857e-06, "loss": 0.1697, "step": 9626 }, { "epoch": 0.69, "grad_norm": 1.7060020295720955, "learning_rate": 2.334377104833948e-06, "loss": 0.166, "step": 9627 }, { "epoch": 0.69, "grad_norm": 1.3044122683292672, "learning_rate": 2.3333971237130847e-06, "loss": 0.207, "step": 9628 }, { "epoch": 0.69, "grad_norm": 1.2975771909108422, "learning_rate": 2.3324172857248715e-06, "loss": 0.1692, "step": 9629 }, { "epoch": 0.69, "grad_norm": 1.3663624162254207, "learning_rate": 2.3314375909218997e-06, "loss": 0.1988, "step": 9630 }, { "epoch": 0.69, "grad_norm": 1.106730218126169, "learning_rate": 2.3304580393567554e-06, "loss": 0.1743, "step": 9631 }, { "epoch": 0.69, "grad_norm": 4.572069082973457, "learning_rate": 2.32947863108202e-06, "loss": 0.6817, "step": 9632 }, { "epoch": 0.69, "grad_norm": 1.4511487074092666, "learning_rate": 2.328499366150261e-06, "loss": 0.1923, "step": 9633 }, { "epoch": 0.69, "grad_norm": 4.953096068918021, "learning_rate": 2.327520244614042e-06, "loss": 0.6517, "step": 9634 }, { "epoch": 0.69, "grad_norm": 1.3088291430614793, "learning_rate": 2.3265412665259194e-06, "loss": 0.1671, "step": 9635 }, { "epoch": 0.69, "grad_norm": 1.2518362639334584, "learning_rate": 2.325562431938442e-06, "loss": 0.1825, "step": 9636 }, { "epoch": 0.69, "grad_norm": 1.3953291939815788, "learning_rate": 2.324583740904147e-06, "loss": 0.188, "step": 9637 }, { "epoch": 0.69, "grad_norm": 1.3208537437516812, "learning_rate": 2.32360519347557e-06, "loss": 0.1747, "step": 9638 }, { "epoch": 0.69, "grad_norm": 1.4094861164622756, "learning_rate": 2.3226267897052313e-06, "loss": 0.1972, "step": 9639 }, { "epoch": 0.69, "grad_norm": 1.2997457753032087, "learning_rate": 2.3216485296456514e-06, "loss": 0.1778, "step": 9640 }, { "epoch": 0.69, "grad_norm": 1.1383595761387246, "learning_rate": 2.320670413349336e-06, "loss": 0.1519, "step": 9641 }, { "epoch": 0.69, "grad_norm": 1.34202827430766, "learning_rate": 2.3196924408687887e-06, "loss": 0.1947, "step": 9642 }, { "epoch": 0.69, "grad_norm": 1.3386712008370174, "learning_rate": 2.318714612256503e-06, "loss": 0.1821, "step": 9643 }, { "epoch": 0.69, "grad_norm": 1.5602744842561822, "learning_rate": 2.3177369275649664e-06, "loss": 0.1659, "step": 9644 }, { "epoch": 0.69, "grad_norm": 1.4499245076413771, "learning_rate": 2.3167593868466537e-06, "loss": 0.1735, "step": 9645 }, { "epoch": 0.69, "grad_norm": 1.3265672274413594, "learning_rate": 2.315781990154039e-06, "loss": 0.1631, "step": 9646 }, { "epoch": 0.69, "grad_norm": 1.4210072248178438, "learning_rate": 2.3148047375395816e-06, "loss": 0.1911, "step": 9647 }, { "epoch": 0.69, "grad_norm": 1.3034695677690444, "learning_rate": 2.3138276290557394e-06, "loss": 0.1634, "step": 9648 }, { "epoch": 0.69, "grad_norm": 1.3722130315334324, "learning_rate": 2.312850664754957e-06, "loss": 0.2091, "step": 9649 }, { "epoch": 0.69, "grad_norm": 1.3035920861902845, "learning_rate": 2.3118738446896756e-06, "loss": 0.165, "step": 9650 }, { "epoch": 0.69, "grad_norm": 1.4044459720042444, "learning_rate": 2.3108971689123266e-06, "loss": 0.2282, "step": 9651 }, { "epoch": 0.69, "grad_norm": 1.480192140490272, "learning_rate": 2.309920637475336e-06, "loss": 0.2281, "step": 9652 }, { "epoch": 0.69, "grad_norm": 1.2182601778361872, "learning_rate": 2.308944250431117e-06, "loss": 0.1736, "step": 9653 }, { "epoch": 0.69, "grad_norm": 1.4367435704557008, "learning_rate": 2.3079680078320814e-06, "loss": 0.1933, "step": 9654 }, { "epoch": 0.69, "grad_norm": 1.2192072422236415, "learning_rate": 2.3069919097306264e-06, "loss": 0.1446, "step": 9655 }, { "epoch": 0.69, "grad_norm": 4.739784062486623, "learning_rate": 2.306015956179149e-06, "loss": 0.5005, "step": 9656 }, { "epoch": 0.69, "grad_norm": 1.3365742581605828, "learning_rate": 2.3050401472300304e-06, "loss": 0.1823, "step": 9657 }, { "epoch": 0.69, "grad_norm": 1.3917417583636542, "learning_rate": 2.30406448293565e-06, "loss": 0.1923, "step": 9658 }, { "epoch": 0.69, "grad_norm": 1.4665081824002415, "learning_rate": 2.303088963348378e-06, "loss": 0.1815, "step": 9659 }, { "epoch": 0.69, "grad_norm": 5.934251264734299, "learning_rate": 2.302113588520578e-06, "loss": 0.4464, "step": 9660 }, { "epoch": 0.69, "grad_norm": 1.2269594425932628, "learning_rate": 2.3011383585046003e-06, "loss": 0.144, "step": 9661 }, { "epoch": 0.69, "grad_norm": 1.4242193818502173, "learning_rate": 2.3001632733527957e-06, "loss": 0.1796, "step": 9662 }, { "epoch": 0.69, "grad_norm": 1.2978960983933665, "learning_rate": 2.299188333117499e-06, "loss": 0.1625, "step": 9663 }, { "epoch": 0.69, "grad_norm": 1.3927477868875529, "learning_rate": 2.298213537851042e-06, "loss": 0.1784, "step": 9664 }, { "epoch": 0.69, "grad_norm": 1.1932552959588192, "learning_rate": 2.2972388876057517e-06, "loss": 0.155, "step": 9665 }, { "epoch": 0.69, "grad_norm": 1.3922745081488688, "learning_rate": 2.296264382433938e-06, "loss": 0.2045, "step": 9666 }, { "epoch": 0.69, "grad_norm": 1.120035771211254, "learning_rate": 2.295290022387911e-06, "loss": 0.1283, "step": 9667 }, { "epoch": 0.69, "grad_norm": 1.4698697809323575, "learning_rate": 2.29431580751997e-06, "loss": 0.2104, "step": 9668 }, { "epoch": 0.69, "grad_norm": 1.4000771729505515, "learning_rate": 2.293341737882409e-06, "loss": 0.1723, "step": 9669 }, { "epoch": 0.69, "grad_norm": 1.3278408255016803, "learning_rate": 2.2923678135275096e-06, "loss": 0.1928, "step": 9670 }, { "epoch": 0.69, "grad_norm": 1.2506308987349293, "learning_rate": 2.29139403450755e-06, "loss": 0.1432, "step": 9671 }, { "epoch": 0.69, "grad_norm": 1.3173164957948924, "learning_rate": 2.2904204008747966e-06, "loss": 0.1832, "step": 9672 }, { "epoch": 0.69, "grad_norm": 1.232586956895497, "learning_rate": 2.2894469126815134e-06, "loss": 0.1921, "step": 9673 }, { "epoch": 0.69, "grad_norm": 1.2640227449292543, "learning_rate": 2.2884735699799493e-06, "loss": 0.1395, "step": 9674 }, { "epoch": 0.69, "grad_norm": 1.180722217334474, "learning_rate": 2.2875003728223515e-06, "loss": 0.1597, "step": 9675 }, { "epoch": 0.69, "grad_norm": 1.433887040145558, "learning_rate": 2.286527321260958e-06, "loss": 0.1967, "step": 9676 }, { "epoch": 0.69, "grad_norm": 1.2583215096976526, "learning_rate": 2.2855544153479998e-06, "loss": 0.1685, "step": 9677 }, { "epoch": 0.69, "grad_norm": 1.299870474557435, "learning_rate": 2.284581655135694e-06, "loss": 0.1812, "step": 9678 }, { "epoch": 0.69, "grad_norm": 1.4802137282370857, "learning_rate": 2.28360904067626e-06, "loss": 0.2006, "step": 9679 }, { "epoch": 0.69, "grad_norm": 1.3653653252914038, "learning_rate": 2.2826365720218984e-06, "loss": 0.2183, "step": 9680 }, { "epoch": 0.69, "grad_norm": 1.3538937561986935, "learning_rate": 2.2816642492248126e-06, "loss": 0.1857, "step": 9681 }, { "epoch": 0.69, "grad_norm": 1.1532971580799278, "learning_rate": 2.2806920723371882e-06, "loss": 0.1477, "step": 9682 }, { "epoch": 0.69, "grad_norm": 1.367533812790115, "learning_rate": 2.2797200414112105e-06, "loss": 0.1751, "step": 9683 }, { "epoch": 0.69, "grad_norm": 6.6011990748603315, "learning_rate": 2.2787481564990533e-06, "loss": 0.5811, "step": 9684 }, { "epoch": 0.69, "grad_norm": 5.478295520358733, "learning_rate": 2.277776417652886e-06, "loss": 0.6629, "step": 9685 }, { "epoch": 0.69, "grad_norm": 1.4054731692061906, "learning_rate": 2.2768048249248648e-06, "loss": 0.2177, "step": 9686 }, { "epoch": 0.69, "grad_norm": 1.2453904173837793, "learning_rate": 2.2758333783671427e-06, "loss": 0.1397, "step": 9687 }, { "epoch": 0.69, "grad_norm": 1.300086568585657, "learning_rate": 2.274862078031861e-06, "loss": 0.1844, "step": 9688 }, { "epoch": 0.69, "grad_norm": 1.166774210259524, "learning_rate": 2.2738909239711586e-06, "loss": 0.1422, "step": 9689 }, { "epoch": 0.69, "grad_norm": 1.3262475475546804, "learning_rate": 2.272919916237159e-06, "loss": 0.1578, "step": 9690 }, { "epoch": 0.69, "grad_norm": 1.3283178285061432, "learning_rate": 2.2719490548819863e-06, "loss": 0.1521, "step": 9691 }, { "epoch": 0.69, "grad_norm": 1.2811957066415476, "learning_rate": 2.2709783399577463e-06, "loss": 0.1736, "step": 9692 }, { "epoch": 0.69, "grad_norm": 1.4105127662138184, "learning_rate": 2.270007771516551e-06, "loss": 0.2067, "step": 9693 }, { "epoch": 0.69, "grad_norm": 8.513005955007769, "learning_rate": 2.2690373496104906e-06, "loss": 0.5899, "step": 9694 }, { "epoch": 0.69, "grad_norm": 8.930753585509171, "learning_rate": 2.2680670742916565e-06, "loss": 0.5254, "step": 9695 }, { "epoch": 0.69, "grad_norm": 1.332373171802525, "learning_rate": 2.26709694561213e-06, "loss": 0.1779, "step": 9696 }, { "epoch": 0.69, "grad_norm": 1.3162728915981785, "learning_rate": 2.2661269636239802e-06, "loss": 0.1543, "step": 9697 }, { "epoch": 0.69, "grad_norm": 1.3838313522337564, "learning_rate": 2.2651571283792758e-06, "loss": 0.1437, "step": 9698 }, { "epoch": 0.69, "grad_norm": 1.333960544850729, "learning_rate": 2.2641874399300703e-06, "loss": 0.2292, "step": 9699 }, { "epoch": 0.69, "grad_norm": 1.2234946377382132, "learning_rate": 2.263217898328415e-06, "loss": 0.1697, "step": 9700 }, { "epoch": 0.69, "grad_norm": 1.5535356202117834, "learning_rate": 2.26224850362635e-06, "loss": 0.1778, "step": 9701 }, { "epoch": 0.69, "grad_norm": 1.4305158718920306, "learning_rate": 2.261279255875908e-06, "loss": 0.2066, "step": 9702 }, { "epoch": 0.69, "grad_norm": 1.398051605771974, "learning_rate": 2.2603101551291144e-06, "loss": 0.1543, "step": 9703 }, { "epoch": 0.69, "grad_norm": 1.2080128727622723, "learning_rate": 2.2593412014379905e-06, "loss": 0.174, "step": 9704 }, { "epoch": 0.69, "grad_norm": 1.3103284854668777, "learning_rate": 2.2583723948545405e-06, "loss": 0.1655, "step": 9705 }, { "epoch": 0.69, "grad_norm": 1.3848654478525144, "learning_rate": 2.2574037354307703e-06, "loss": 0.1801, "step": 9706 }, { "epoch": 0.69, "grad_norm": 1.2605202889970932, "learning_rate": 2.2564352232186705e-06, "loss": 0.1783, "step": 9707 }, { "epoch": 0.69, "grad_norm": 1.2914575689928482, "learning_rate": 2.2554668582702294e-06, "loss": 0.1633, "step": 9708 }, { "epoch": 0.69, "grad_norm": 1.3747841020060527, "learning_rate": 2.2544986406374227e-06, "loss": 0.1896, "step": 9709 }, { "epoch": 0.69, "grad_norm": 1.4251162239176987, "learning_rate": 2.253530570372221e-06, "loss": 0.1921, "step": 9710 }, { "epoch": 0.69, "grad_norm": 1.554411122996993, "learning_rate": 2.2525626475265872e-06, "loss": 0.1882, "step": 9711 }, { "epoch": 0.69, "grad_norm": 1.4508543175292883, "learning_rate": 2.2515948721524776e-06, "loss": 0.1696, "step": 9712 }, { "epoch": 0.69, "grad_norm": 1.4938149483985999, "learning_rate": 2.2506272443018345e-06, "loss": 0.1745, "step": 9713 }, { "epoch": 0.69, "grad_norm": 1.3205219015522827, "learning_rate": 2.2496597640266006e-06, "loss": 0.188, "step": 9714 }, { "epoch": 0.69, "grad_norm": 1.303149261159802, "learning_rate": 2.2486924313787016e-06, "loss": 0.1834, "step": 9715 }, { "epoch": 0.69, "grad_norm": 1.3436937129281168, "learning_rate": 2.2477252464100647e-06, "loss": 0.1934, "step": 9716 }, { "epoch": 0.7, "grad_norm": 5.757359500335642, "learning_rate": 2.2467582091726005e-06, "loss": 0.546, "step": 9717 }, { "epoch": 0.7, "grad_norm": 1.276507786716258, "learning_rate": 2.2457913197182176e-06, "loss": 0.1815, "step": 9718 }, { "epoch": 0.7, "grad_norm": 1.2724609407467427, "learning_rate": 2.244824578098814e-06, "loss": 0.1684, "step": 9719 }, { "epoch": 0.7, "grad_norm": 1.3120618546666403, "learning_rate": 2.243857984366284e-06, "loss": 0.1853, "step": 9720 }, { "epoch": 0.7, "grad_norm": 1.43249676899009, "learning_rate": 2.242891538572506e-06, "loss": 0.2002, "step": 9721 }, { "epoch": 0.7, "grad_norm": 1.5567195632798496, "learning_rate": 2.241925240769358e-06, "loss": 0.2131, "step": 9722 }, { "epoch": 0.7, "grad_norm": 1.390862661319984, "learning_rate": 2.2409590910087043e-06, "loss": 0.193, "step": 9723 }, { "epoch": 0.7, "grad_norm": 1.442157007922685, "learning_rate": 2.2399930893424075e-06, "loss": 0.1677, "step": 9724 }, { "epoch": 0.7, "grad_norm": 1.5379227399043693, "learning_rate": 2.2390272358223147e-06, "loss": 0.2169, "step": 9725 }, { "epoch": 0.7, "grad_norm": 1.4466667868936998, "learning_rate": 2.238061530500271e-06, "loss": 0.1782, "step": 9726 }, { "epoch": 0.7, "grad_norm": 1.3192603645555172, "learning_rate": 2.2370959734281123e-06, "loss": 0.1781, "step": 9727 }, { "epoch": 0.7, "grad_norm": 1.3863468411308855, "learning_rate": 2.236130564657665e-06, "loss": 0.17, "step": 9728 }, { "epoch": 0.7, "grad_norm": 1.1720704759578813, "learning_rate": 2.2351653042407506e-06, "loss": 0.1601, "step": 9729 }, { "epoch": 0.7, "grad_norm": 1.4201437717355005, "learning_rate": 2.2342001922291766e-06, "loss": 0.1919, "step": 9730 }, { "epoch": 0.7, "grad_norm": 1.3056523221471235, "learning_rate": 2.2332352286747506e-06, "loss": 0.1707, "step": 9731 }, { "epoch": 0.7, "grad_norm": 1.433928092347942, "learning_rate": 2.2322704136292632e-06, "loss": 0.2172, "step": 9732 }, { "epoch": 0.7, "grad_norm": 1.3068410167898366, "learning_rate": 2.2313057471445067e-06, "loss": 0.1615, "step": 9733 }, { "epoch": 0.7, "grad_norm": 1.2976581236634164, "learning_rate": 2.2303412292722563e-06, "loss": 0.1755, "step": 9734 }, { "epoch": 0.7, "grad_norm": 1.363446725123765, "learning_rate": 2.2293768600642857e-06, "loss": 0.2049, "step": 9735 }, { "epoch": 0.7, "grad_norm": 1.2796484475894485, "learning_rate": 2.2284126395723574e-06, "loss": 0.175, "step": 9736 }, { "epoch": 0.7, "grad_norm": 1.4633672223582117, "learning_rate": 2.2274485678482304e-06, "loss": 0.2145, "step": 9737 }, { "epoch": 0.7, "grad_norm": 1.532321659074599, "learning_rate": 2.226484644943647e-06, "loss": 0.2231, "step": 9738 }, { "epoch": 0.7, "grad_norm": 1.499386988206639, "learning_rate": 2.2255208709103514e-06, "loss": 0.1848, "step": 9739 }, { "epoch": 0.7, "grad_norm": 1.3196621951517866, "learning_rate": 2.2245572458000714e-06, "loss": 0.1861, "step": 9740 }, { "epoch": 0.7, "grad_norm": 1.2874800065123078, "learning_rate": 2.2235937696645337e-06, "loss": 0.1965, "step": 9741 }, { "epoch": 0.7, "grad_norm": 1.4868735999903773, "learning_rate": 2.2226304425554505e-06, "loss": 0.1956, "step": 9742 }, { "epoch": 0.7, "grad_norm": 5.248187980098897, "learning_rate": 2.221667264524532e-06, "loss": 0.5484, "step": 9743 }, { "epoch": 0.7, "grad_norm": 1.271445456407996, "learning_rate": 2.220704235623476e-06, "loss": 0.1615, "step": 9744 }, { "epoch": 0.7, "grad_norm": 1.5059273015754457, "learning_rate": 2.2197413559039776e-06, "loss": 0.2196, "step": 9745 }, { "epoch": 0.7, "grad_norm": 6.702554128704514, "learning_rate": 2.2187786254177157e-06, "loss": 0.5361, "step": 9746 }, { "epoch": 0.7, "grad_norm": 1.3330002534738616, "learning_rate": 2.21781604421637e-06, "loss": 0.1557, "step": 9747 }, { "epoch": 0.7, "grad_norm": 1.1802860971891644, "learning_rate": 2.216853612351604e-06, "loss": 0.1865, "step": 9748 }, { "epoch": 0.7, "grad_norm": 1.2008768382834334, "learning_rate": 2.2158913298750808e-06, "loss": 0.1528, "step": 9749 }, { "epoch": 0.7, "grad_norm": 1.209721038773094, "learning_rate": 2.214929196838449e-06, "loss": 0.1912, "step": 9750 }, { "epoch": 0.7, "grad_norm": 1.2997292422817348, "learning_rate": 2.213967213293353e-06, "loss": 0.1606, "step": 9751 }, { "epoch": 0.7, "grad_norm": 1.138940089917714, "learning_rate": 2.2130053792914293e-06, "loss": 0.1854, "step": 9752 }, { "epoch": 0.7, "grad_norm": 1.2929227307368167, "learning_rate": 2.2120436948843054e-06, "loss": 0.1564, "step": 9753 }, { "epoch": 0.7, "grad_norm": 1.2777302272797602, "learning_rate": 2.2110821601235994e-06, "loss": 0.1798, "step": 9754 }, { "epoch": 0.7, "grad_norm": 1.046816416246104, "learning_rate": 2.2101207750609244e-06, "loss": 0.1493, "step": 9755 }, { "epoch": 0.7, "grad_norm": 1.4664400815897602, "learning_rate": 2.209159539747881e-06, "loss": 0.1948, "step": 9756 }, { "epoch": 0.7, "grad_norm": 1.3631804036081288, "learning_rate": 2.208198454236068e-06, "loss": 0.1864, "step": 9757 }, { "epoch": 0.7, "grad_norm": 1.349327172156145, "learning_rate": 2.2072375185770685e-06, "loss": 0.1669, "step": 9758 }, { "epoch": 0.7, "grad_norm": 1.3543415064924562, "learning_rate": 2.206276732822464e-06, "loss": 0.2043, "step": 9759 }, { "epoch": 0.7, "grad_norm": 6.789483604727544, "learning_rate": 2.205316097023826e-06, "loss": 0.5836, "step": 9760 }, { "epoch": 0.7, "grad_norm": 1.1719311132301335, "learning_rate": 2.204355611232717e-06, "loss": 0.1951, "step": 9761 }, { "epoch": 0.7, "grad_norm": 1.493055191698878, "learning_rate": 2.203395275500694e-06, "loss": 0.2191, "step": 9762 }, { "epoch": 0.7, "grad_norm": 1.3140231440065886, "learning_rate": 2.2024350898793007e-06, "loss": 0.1704, "step": 9763 }, { "epoch": 0.7, "grad_norm": 1.228806126251975, "learning_rate": 2.201475054420079e-06, "loss": 0.1789, "step": 9764 }, { "epoch": 0.7, "grad_norm": 1.3956869046372913, "learning_rate": 2.2005151691745576e-06, "loss": 0.1751, "step": 9765 }, { "epoch": 0.7, "grad_norm": 1.271396321940789, "learning_rate": 2.199555434194261e-06, "loss": 0.1606, "step": 9766 }, { "epoch": 0.7, "grad_norm": 1.3897115554178203, "learning_rate": 2.198595849530702e-06, "loss": 0.2011, "step": 9767 }, { "epoch": 0.7, "grad_norm": 1.2003788274107248, "learning_rate": 2.197636415235388e-06, "loss": 0.1534, "step": 9768 }, { "epoch": 0.7, "grad_norm": 1.2540910878332205, "learning_rate": 2.1966771313598177e-06, "loss": 0.1924, "step": 9769 }, { "epoch": 0.7, "grad_norm": 1.284108328479996, "learning_rate": 2.1957179979554845e-06, "loss": 0.1714, "step": 9770 }, { "epoch": 0.7, "grad_norm": 1.340130576373397, "learning_rate": 2.1947590150738657e-06, "loss": 0.1803, "step": 9771 }, { "epoch": 0.7, "grad_norm": 1.3238317064201461, "learning_rate": 2.193800182766441e-06, "loss": 0.1933, "step": 9772 }, { "epoch": 0.7, "grad_norm": 1.4028670337036626, "learning_rate": 2.1928415010846716e-06, "loss": 0.1714, "step": 9773 }, { "epoch": 0.7, "grad_norm": 1.3364499395157738, "learning_rate": 2.1918829700800205e-06, "loss": 0.2072, "step": 9774 }, { "epoch": 0.7, "grad_norm": 1.3260300115673664, "learning_rate": 2.190924589803933e-06, "loss": 0.1924, "step": 9775 }, { "epoch": 0.7, "grad_norm": 1.3889342063246712, "learning_rate": 2.1899663603078547e-06, "loss": 0.1712, "step": 9776 }, { "epoch": 0.7, "grad_norm": 5.845651577365227, "learning_rate": 2.1890082816432178e-06, "loss": 0.5323, "step": 9777 }, { "epoch": 0.7, "grad_norm": 1.5081977872128027, "learning_rate": 2.188050353861451e-06, "loss": 0.1837, "step": 9778 }, { "epoch": 0.7, "grad_norm": 1.3551426045966632, "learning_rate": 2.187092577013968e-06, "loss": 0.1806, "step": 9779 }, { "epoch": 0.7, "grad_norm": 1.1858690375513175, "learning_rate": 2.1861349511521817e-06, "loss": 0.1596, "step": 9780 }, { "epoch": 0.7, "grad_norm": 1.6670239326283454, "learning_rate": 2.185177476327491e-06, "loss": 0.2124, "step": 9781 }, { "epoch": 0.7, "grad_norm": 1.356287409109955, "learning_rate": 2.1842201525912926e-06, "loss": 0.1789, "step": 9782 }, { "epoch": 0.7, "grad_norm": 1.3372966736255054, "learning_rate": 2.1832629799949677e-06, "loss": 0.1799, "step": 9783 }, { "epoch": 0.7, "grad_norm": 8.179881864475025, "learning_rate": 2.182305958589896e-06, "loss": 0.4768, "step": 9784 }, { "epoch": 0.7, "grad_norm": 1.4942260252605708, "learning_rate": 2.1813490884274462e-06, "loss": 0.1978, "step": 9785 }, { "epoch": 0.7, "grad_norm": 1.230835077684288, "learning_rate": 2.1803923695589813e-06, "loss": 0.1808, "step": 9786 }, { "epoch": 0.7, "grad_norm": 1.4082042883415542, "learning_rate": 2.179435802035851e-06, "loss": 0.1563, "step": 9787 }, { "epoch": 0.7, "grad_norm": 1.1355504886815047, "learning_rate": 2.178479385909402e-06, "loss": 0.1257, "step": 9788 }, { "epoch": 0.7, "grad_norm": 1.2908015201681449, "learning_rate": 2.1775231212309696e-06, "loss": 0.1408, "step": 9789 }, { "epoch": 0.7, "grad_norm": 1.2311623759404684, "learning_rate": 2.176567008051884e-06, "loss": 0.1815, "step": 9790 }, { "epoch": 0.7, "grad_norm": 1.2706656662120428, "learning_rate": 2.175611046423463e-06, "loss": 0.1367, "step": 9791 }, { "epoch": 0.7, "grad_norm": 1.4923360541063972, "learning_rate": 2.1746552363970204e-06, "loss": 0.1905, "step": 9792 }, { "epoch": 0.7, "grad_norm": 6.363760267275215, "learning_rate": 2.1736995780238618e-06, "loss": 0.8033, "step": 9793 }, { "epoch": 0.7, "grad_norm": 1.4925938809631258, "learning_rate": 2.1727440713552785e-06, "loss": 0.1987, "step": 9794 }, { "epoch": 0.7, "grad_norm": 1.4424911442143913, "learning_rate": 2.171788716442564e-06, "loss": 0.196, "step": 9795 }, { "epoch": 0.7, "grad_norm": 1.5236799445345468, "learning_rate": 2.170833513336994e-06, "loss": 0.2111, "step": 9796 }, { "epoch": 0.7, "grad_norm": 1.2513693692354264, "learning_rate": 2.1698784620898433e-06, "loss": 0.1525, "step": 9797 }, { "epoch": 0.7, "grad_norm": 1.199117341283263, "learning_rate": 2.1689235627523713e-06, "loss": 0.1627, "step": 9798 }, { "epoch": 0.7, "grad_norm": 1.2359381822172406, "learning_rate": 2.1679688153758373e-06, "loss": 0.1302, "step": 9799 }, { "epoch": 0.7, "grad_norm": 1.3557157055208306, "learning_rate": 2.1670142200114837e-06, "loss": 0.1824, "step": 9800 }, { "epoch": 0.7, "grad_norm": 1.1336371203771571, "learning_rate": 2.166059776710554e-06, "loss": 0.185, "step": 9801 }, { "epoch": 0.7, "grad_norm": 1.4009504690460342, "learning_rate": 2.1651054855242754e-06, "loss": 0.1655, "step": 9802 }, { "epoch": 0.7, "grad_norm": 1.1914045534611415, "learning_rate": 2.1641513465038714e-06, "loss": 0.1293, "step": 9803 }, { "epoch": 0.7, "grad_norm": 1.3707046480957197, "learning_rate": 2.1631973597005574e-06, "loss": 0.1887, "step": 9804 }, { "epoch": 0.7, "grad_norm": 1.2457669192725758, "learning_rate": 2.162243525165541e-06, "loss": 0.1487, "step": 9805 }, { "epoch": 0.7, "grad_norm": 1.2884176430997798, "learning_rate": 2.1612898429500162e-06, "loss": 0.153, "step": 9806 }, { "epoch": 0.7, "grad_norm": 1.2165153869360554, "learning_rate": 2.1603363131051767e-06, "loss": 0.1519, "step": 9807 }, { "epoch": 0.7, "grad_norm": 1.196684562667685, "learning_rate": 2.159382935682201e-06, "loss": 0.1554, "step": 9808 }, { "epoch": 0.7, "grad_norm": 6.035757256589133, "learning_rate": 2.158429710732266e-06, "loss": 0.6991, "step": 9809 }, { "epoch": 0.7, "grad_norm": 1.386838360759269, "learning_rate": 2.157476638306533e-06, "loss": 0.1738, "step": 9810 }, { "epoch": 0.7, "grad_norm": 1.4564464976376614, "learning_rate": 2.1565237184561614e-06, "loss": 0.1973, "step": 9811 }, { "epoch": 0.7, "grad_norm": 5.371689022823875, "learning_rate": 2.1555709512323004e-06, "loss": 0.5556, "step": 9812 }, { "epoch": 0.7, "grad_norm": 1.6310505582621118, "learning_rate": 2.1546183366860917e-06, "loss": 0.2048, "step": 9813 }, { "epoch": 0.7, "grad_norm": 1.271839509677533, "learning_rate": 2.1536658748686652e-06, "loss": 0.1893, "step": 9814 }, { "epoch": 0.7, "grad_norm": 1.2292143440068781, "learning_rate": 2.1527135658311482e-06, "loss": 0.1775, "step": 9815 }, { "epoch": 0.7, "grad_norm": 1.399743386058442, "learning_rate": 2.1517614096246537e-06, "loss": 0.214, "step": 9816 }, { "epoch": 0.7, "grad_norm": 5.108654972731843, "learning_rate": 2.1508094063002933e-06, "loss": 0.4864, "step": 9817 }, { "epoch": 0.7, "grad_norm": 1.3752551184363477, "learning_rate": 2.1498575559091626e-06, "loss": 0.1887, "step": 9818 }, { "epoch": 0.7, "grad_norm": 1.4917364082271154, "learning_rate": 2.1489058585023555e-06, "loss": 0.1832, "step": 9819 }, { "epoch": 0.7, "grad_norm": 1.3879291609473101, "learning_rate": 2.147954314130955e-06, "loss": 0.1881, "step": 9820 }, { "epoch": 0.7, "grad_norm": 1.3730433904924315, "learning_rate": 2.1470029228460383e-06, "loss": 0.1681, "step": 9821 }, { "epoch": 0.7, "grad_norm": 1.2250160081962989, "learning_rate": 2.1460516846986677e-06, "loss": 0.1732, "step": 9822 }, { "epoch": 0.7, "grad_norm": 1.3869059515505648, "learning_rate": 2.1451005997399055e-06, "loss": 0.1977, "step": 9823 }, { "epoch": 0.7, "grad_norm": 1.2458557758400128, "learning_rate": 2.144149668020803e-06, "loss": 0.1735, "step": 9824 }, { "epoch": 0.7, "grad_norm": 1.269771610125317, "learning_rate": 2.1431988895923985e-06, "loss": 0.1826, "step": 9825 }, { "epoch": 0.7, "grad_norm": 1.414956748198051, "learning_rate": 2.1422482645057297e-06, "loss": 0.1994, "step": 9826 }, { "epoch": 0.7, "grad_norm": 1.4139962881941133, "learning_rate": 2.1412977928118194e-06, "loss": 0.168, "step": 9827 }, { "epoch": 0.7, "grad_norm": 1.4817229597478334, "learning_rate": 2.1403474745616863e-06, "loss": 0.1578, "step": 9828 }, { "epoch": 0.7, "grad_norm": 1.3136500060251255, "learning_rate": 2.1393973098063403e-06, "loss": 0.1607, "step": 9829 }, { "epoch": 0.7, "grad_norm": 1.376707416839989, "learning_rate": 2.1384472985967834e-06, "loss": 0.1611, "step": 9830 }, { "epoch": 0.7, "grad_norm": 1.4227680260801439, "learning_rate": 2.137497440984006e-06, "loss": 0.1892, "step": 9831 }, { "epoch": 0.7, "grad_norm": 1.3010916342822003, "learning_rate": 2.136547737018995e-06, "loss": 0.1818, "step": 9832 }, { "epoch": 0.7, "grad_norm": 1.1882299843679442, "learning_rate": 2.1355981867527242e-06, "loss": 0.1553, "step": 9833 }, { "epoch": 0.7, "grad_norm": 1.3918362860062254, "learning_rate": 2.134648790236165e-06, "loss": 0.1777, "step": 9834 }, { "epoch": 0.7, "grad_norm": 1.177337817612947, "learning_rate": 2.133699547520273e-06, "loss": 0.1623, "step": 9835 }, { "epoch": 0.7, "grad_norm": 1.2146118372014378, "learning_rate": 2.1327504586560027e-06, "loss": 0.1656, "step": 9836 }, { "epoch": 0.7, "grad_norm": 5.859983861030236, "learning_rate": 2.131801523694296e-06, "loss": 0.6272, "step": 9837 }, { "epoch": 0.7, "grad_norm": 7.086489384171177, "learning_rate": 2.1308527426860913e-06, "loss": 0.7281, "step": 9838 }, { "epoch": 0.7, "grad_norm": 1.358152726796936, "learning_rate": 2.129904115682311e-06, "loss": 0.1933, "step": 9839 }, { "epoch": 0.7, "grad_norm": 14.402683899193852, "learning_rate": 2.128955642733877e-06, "loss": 0.559, "step": 9840 }, { "epoch": 0.7, "grad_norm": 1.2552467660847846, "learning_rate": 2.1280073238916963e-06, "loss": 0.1353, "step": 9841 }, { "epoch": 0.7, "grad_norm": 1.4164923735629824, "learning_rate": 2.127059159206675e-06, "loss": 0.1872, "step": 9842 }, { "epoch": 0.7, "grad_norm": 1.3616961451551473, "learning_rate": 2.1261111487297016e-06, "loss": 0.1901, "step": 9843 }, { "epoch": 0.7, "grad_norm": 1.4764420279718353, "learning_rate": 2.125163292511665e-06, "loss": 0.2138, "step": 9844 }, { "epoch": 0.7, "grad_norm": 1.4524883720008324, "learning_rate": 2.124215590603442e-06, "loss": 0.1892, "step": 9845 }, { "epoch": 0.7, "grad_norm": 1.6297056938074792, "learning_rate": 2.1232680430559023e-06, "loss": 0.1628, "step": 9846 }, { "epoch": 0.7, "grad_norm": 1.3378764285105262, "learning_rate": 2.122320649919904e-06, "loss": 0.2065, "step": 9847 }, { "epoch": 0.7, "grad_norm": 1.328894460021943, "learning_rate": 2.1213734112463023e-06, "loss": 0.1826, "step": 9848 }, { "epoch": 0.7, "grad_norm": 1.3626453592908885, "learning_rate": 2.1204263270859382e-06, "loss": 0.189, "step": 9849 }, { "epoch": 0.7, "grad_norm": 1.3305004126096318, "learning_rate": 2.119479397489651e-06, "loss": 0.1843, "step": 9850 }, { "epoch": 0.7, "grad_norm": 5.092708529617387, "learning_rate": 2.1185326225082635e-06, "loss": 0.4826, "step": 9851 }, { "epoch": 0.7, "grad_norm": 5.24977364229167, "learning_rate": 2.117586002192598e-06, "loss": 0.5784, "step": 9852 }, { "epoch": 0.7, "grad_norm": 1.1703383260185363, "learning_rate": 2.1166395365934644e-06, "loss": 0.175, "step": 9853 }, { "epoch": 0.7, "grad_norm": 1.4133920836005898, "learning_rate": 2.1156932257616675e-06, "loss": 0.181, "step": 9854 }, { "epoch": 0.7, "grad_norm": 1.5406109202978615, "learning_rate": 2.1147470697479977e-06, "loss": 0.2006, "step": 9855 }, { "epoch": 0.71, "grad_norm": 1.2641386257074096, "learning_rate": 2.113801068603242e-06, "loss": 0.1567, "step": 9856 }, { "epoch": 0.71, "grad_norm": 1.199845695098481, "learning_rate": 2.1128552223781813e-06, "loss": 0.178, "step": 9857 }, { "epoch": 0.71, "grad_norm": 1.4346026560783243, "learning_rate": 2.11190953112358e-06, "loss": 0.1845, "step": 9858 }, { "epoch": 0.71, "grad_norm": 5.435736365003967, "learning_rate": 2.1109639948902035e-06, "loss": 0.6163, "step": 9859 }, { "epoch": 0.71, "grad_norm": 7.493198960772928, "learning_rate": 2.1100186137288005e-06, "loss": 0.6731, "step": 9860 }, { "epoch": 0.71, "grad_norm": 1.558155063149377, "learning_rate": 2.1090733876901167e-06, "loss": 0.1645, "step": 9861 }, { "epoch": 0.71, "grad_norm": 1.2368114440515035, "learning_rate": 2.1081283168248877e-06, "loss": 0.171, "step": 9862 }, { "epoch": 0.71, "grad_norm": 5.707143597347296, "learning_rate": 2.107183401183845e-06, "loss": 0.5725, "step": 9863 }, { "epoch": 0.71, "grad_norm": 1.4476322191348134, "learning_rate": 2.106238640817702e-06, "loss": 0.1465, "step": 9864 }, { "epoch": 0.71, "grad_norm": 1.3857148022505723, "learning_rate": 2.1052940357771744e-06, "loss": 0.1534, "step": 9865 }, { "epoch": 0.71, "grad_norm": 1.1512504628727052, "learning_rate": 2.1043495861129616e-06, "loss": 0.1542, "step": 9866 }, { "epoch": 0.71, "grad_norm": 1.3643399260164875, "learning_rate": 2.103405291875761e-06, "loss": 0.179, "step": 9867 }, { "epoch": 0.71, "grad_norm": 1.3144717942036308, "learning_rate": 2.102461153116254e-06, "loss": 0.1745, "step": 9868 }, { "epoch": 0.71, "grad_norm": 1.3178520653149153, "learning_rate": 2.1015171698851224e-06, "loss": 0.1922, "step": 9869 }, { "epoch": 0.71, "grad_norm": 1.2049150813341212, "learning_rate": 2.1005733422330327e-06, "loss": 0.1842, "step": 9870 }, { "epoch": 0.71, "grad_norm": 1.3238706714513198, "learning_rate": 2.09962967021065e-06, "loss": 0.1645, "step": 9871 }, { "epoch": 0.71, "grad_norm": 6.052272989707731, "learning_rate": 2.0986861538686217e-06, "loss": 0.5756, "step": 9872 }, { "epoch": 0.71, "grad_norm": 1.1180484463234799, "learning_rate": 2.0977427932575962e-06, "loss": 0.1774, "step": 9873 }, { "epoch": 0.71, "grad_norm": 1.3877540826058457, "learning_rate": 2.0967995884282063e-06, "loss": 0.2093, "step": 9874 }, { "epoch": 0.71, "grad_norm": 1.1723618846940806, "learning_rate": 2.0958565394310815e-06, "loss": 0.1538, "step": 9875 }, { "epoch": 0.71, "grad_norm": 6.711517928748774, "learning_rate": 2.094913646316839e-06, "loss": 0.7216, "step": 9876 }, { "epoch": 0.71, "grad_norm": 8.44195770017684, "learning_rate": 2.0939709091360903e-06, "loss": 0.6406, "step": 9877 }, { "epoch": 0.71, "grad_norm": 1.2670440421966707, "learning_rate": 2.093028327939439e-06, "loss": 0.1575, "step": 9878 }, { "epoch": 0.71, "grad_norm": 1.7101381262791193, "learning_rate": 2.0920859027774786e-06, "loss": 0.2087, "step": 9879 }, { "epoch": 0.71, "grad_norm": 1.2059864574999795, "learning_rate": 2.0911436337007935e-06, "loss": 0.1519, "step": 9880 }, { "epoch": 0.71, "grad_norm": 1.2782775145517447, "learning_rate": 2.0902015207599634e-06, "loss": 0.1547, "step": 9881 }, { "epoch": 0.71, "grad_norm": 1.2985311050471064, "learning_rate": 2.0892595640055548e-06, "loss": 0.16, "step": 9882 }, { "epoch": 0.71, "grad_norm": 1.447111342126725, "learning_rate": 2.0883177634881296e-06, "loss": 0.2169, "step": 9883 }, { "epoch": 0.71, "grad_norm": 1.3203283399814725, "learning_rate": 2.0873761192582383e-06, "loss": 0.1533, "step": 9884 }, { "epoch": 0.71, "grad_norm": 1.1696047562410419, "learning_rate": 2.086434631366425e-06, "loss": 0.1726, "step": 9885 }, { "epoch": 0.71, "grad_norm": 1.325639447985463, "learning_rate": 2.0854932998632267e-06, "loss": 0.2013, "step": 9886 }, { "epoch": 0.71, "grad_norm": 1.3861177849146418, "learning_rate": 2.0845521247991683e-06, "loss": 0.2155, "step": 9887 }, { "epoch": 0.71, "grad_norm": 4.909888771878877, "learning_rate": 2.0836111062247717e-06, "loss": 0.5115, "step": 9888 }, { "epoch": 0.71, "grad_norm": 1.4270173560107637, "learning_rate": 2.0826702441905426e-06, "loss": 0.1888, "step": 9889 }, { "epoch": 0.71, "grad_norm": 5.63940189521589, "learning_rate": 2.081729538746987e-06, "loss": 0.4169, "step": 9890 }, { "epoch": 0.71, "grad_norm": 1.1639412240224096, "learning_rate": 2.0807889899445944e-06, "loss": 0.1279, "step": 9891 }, { "epoch": 0.71, "grad_norm": 1.4223063733194534, "learning_rate": 2.0798485978338527e-06, "loss": 0.1761, "step": 9892 }, { "epoch": 0.71, "grad_norm": 1.1386563403704233, "learning_rate": 2.0789083624652364e-06, "loss": 0.1352, "step": 9893 }, { "epoch": 0.71, "grad_norm": 1.3067980433377382, "learning_rate": 2.0779682838892153e-06, "loss": 0.1748, "step": 9894 }, { "epoch": 0.71, "grad_norm": 5.438303718230723, "learning_rate": 2.077028362156244e-06, "loss": 0.6173, "step": 9895 }, { "epoch": 0.71, "grad_norm": 1.3225269162277258, "learning_rate": 2.0760885973167822e-06, "loss": 0.1789, "step": 9896 }, { "epoch": 0.71, "grad_norm": 1.2403730206078722, "learning_rate": 2.0751489894212667e-06, "loss": 0.1529, "step": 9897 }, { "epoch": 0.71, "grad_norm": 1.4962382933372795, "learning_rate": 2.074209538520135e-06, "loss": 0.177, "step": 9898 }, { "epoch": 0.71, "grad_norm": 1.6072497697934938, "learning_rate": 2.07327024466381e-06, "loss": 0.2006, "step": 9899 }, { "epoch": 0.71, "grad_norm": 1.1065627984714854, "learning_rate": 2.072331107902713e-06, "loss": 0.1608, "step": 9900 }, { "epoch": 0.71, "grad_norm": 1.4269701400323827, "learning_rate": 2.0713921282872492e-06, "loss": 0.1861, "step": 9901 }, { "epoch": 0.71, "grad_norm": 1.2948558793292697, "learning_rate": 2.070453305867823e-06, "loss": 0.1614, "step": 9902 }, { "epoch": 0.71, "grad_norm": 1.3884971562191146, "learning_rate": 2.069514640694823e-06, "loss": 0.1466, "step": 9903 }, { "epoch": 0.71, "grad_norm": 1.3290346527325099, "learning_rate": 2.0685761328186347e-06, "loss": 0.1626, "step": 9904 }, { "epoch": 0.71, "grad_norm": 1.346545724483843, "learning_rate": 2.0676377822896344e-06, "loss": 0.1606, "step": 9905 }, { "epoch": 0.71, "grad_norm": 1.3760334978048923, "learning_rate": 2.06669958915819e-06, "loss": 0.1768, "step": 9906 }, { "epoch": 0.71, "grad_norm": 1.3879009732809675, "learning_rate": 2.0657615534746556e-06, "loss": 0.1708, "step": 9907 }, { "epoch": 0.71, "grad_norm": 1.3111919220892199, "learning_rate": 2.0648236752893864e-06, "loss": 0.1564, "step": 9908 }, { "epoch": 0.71, "grad_norm": 1.4068572242027364, "learning_rate": 2.0638859546527197e-06, "loss": 0.1577, "step": 9909 }, { "epoch": 0.71, "grad_norm": 1.3082797608506729, "learning_rate": 2.0629483916149917e-06, "loss": 0.159, "step": 9910 }, { "epoch": 0.71, "grad_norm": 1.3662036495798913, "learning_rate": 2.0620109862265243e-06, "loss": 0.181, "step": 9911 }, { "epoch": 0.71, "grad_norm": 1.4756062145129323, "learning_rate": 2.061073738537635e-06, "loss": 0.165, "step": 9912 }, { "epoch": 0.71, "grad_norm": 1.4654128865018698, "learning_rate": 2.060136648598632e-06, "loss": 0.2105, "step": 9913 }, { "epoch": 0.71, "grad_norm": 1.3417563355846003, "learning_rate": 2.0591997164598156e-06, "loss": 0.1395, "step": 9914 }, { "epoch": 0.71, "grad_norm": 5.37793674815471, "learning_rate": 2.0582629421714732e-06, "loss": 0.5017, "step": 9915 }, { "epoch": 0.71, "grad_norm": 1.4523592614749674, "learning_rate": 2.0573263257838916e-06, "loss": 0.2006, "step": 9916 }, { "epoch": 0.71, "grad_norm": 4.667190535424932, "learning_rate": 2.0563898673473404e-06, "loss": 0.5101, "step": 9917 }, { "epoch": 0.71, "grad_norm": 1.4519457092220538, "learning_rate": 2.0554535669120884e-06, "loss": 0.1938, "step": 9918 }, { "epoch": 0.71, "grad_norm": 1.3950910565768653, "learning_rate": 2.054517424528389e-06, "loss": 0.1709, "step": 9919 }, { "epoch": 0.71, "grad_norm": 1.3277775889218504, "learning_rate": 2.0535814402464922e-06, "loss": 0.1455, "step": 9920 }, { "epoch": 0.71, "grad_norm": 1.292053353997159, "learning_rate": 2.0526456141166383e-06, "loss": 0.2162, "step": 9921 }, { "epoch": 0.71, "grad_norm": 1.278502844299361, "learning_rate": 2.0517099461890595e-06, "loss": 0.1805, "step": 9922 }, { "epoch": 0.71, "grad_norm": 1.2475328412925255, "learning_rate": 2.0507744365139787e-06, "loss": 0.1343, "step": 9923 }, { "epoch": 0.71, "grad_norm": 1.2698321236183767, "learning_rate": 2.049839085141608e-06, "loss": 0.141, "step": 9924 }, { "epoch": 0.71, "grad_norm": 1.3500628775322452, "learning_rate": 2.0489038921221567e-06, "loss": 0.1625, "step": 9925 }, { "epoch": 0.71, "grad_norm": 1.3754737956966643, "learning_rate": 2.047968857505818e-06, "loss": 0.1704, "step": 9926 }, { "epoch": 0.71, "grad_norm": 1.1611271725569305, "learning_rate": 2.0470339813427857e-06, "loss": 0.1801, "step": 9927 }, { "epoch": 0.71, "grad_norm": 1.4097202962325988, "learning_rate": 2.046099263683236e-06, "loss": 0.1768, "step": 9928 }, { "epoch": 0.71, "grad_norm": 4.30380353723763, "learning_rate": 2.045164704577342e-06, "loss": 0.5791, "step": 9929 }, { "epoch": 0.71, "grad_norm": 1.2612467596651904, "learning_rate": 2.0442303040752687e-06, "loss": 0.1638, "step": 9930 }, { "epoch": 0.71, "grad_norm": 1.3172675483473377, "learning_rate": 2.0432960622271707e-06, "loss": 0.1713, "step": 9931 }, { "epoch": 0.71, "grad_norm": 1.2776405878156374, "learning_rate": 2.0423619790831926e-06, "loss": 0.1655, "step": 9932 }, { "epoch": 0.71, "grad_norm": 1.4325661991318008, "learning_rate": 2.041428054693475e-06, "loss": 0.1958, "step": 9933 }, { "epoch": 0.71, "grad_norm": 1.3905024627115765, "learning_rate": 2.0404942891081442e-06, "loss": 0.1588, "step": 9934 }, { "epoch": 0.71, "grad_norm": 1.3750492736772684, "learning_rate": 2.0395606823773238e-06, "loss": 0.1925, "step": 9935 }, { "epoch": 0.71, "grad_norm": 1.2091818285473133, "learning_rate": 2.038627234551123e-06, "loss": 0.1733, "step": 9936 }, { "epoch": 0.71, "grad_norm": 1.2428074399341464, "learning_rate": 2.0376939456796473e-06, "loss": 0.1837, "step": 9937 }, { "epoch": 0.71, "grad_norm": 1.2315659125835232, "learning_rate": 2.0367608158129924e-06, "loss": 0.1678, "step": 9938 }, { "epoch": 0.71, "grad_norm": 1.3487241133819814, "learning_rate": 2.0358278450012466e-06, "loss": 0.1976, "step": 9939 }, { "epoch": 0.71, "grad_norm": 1.2408577807948782, "learning_rate": 2.034895033294483e-06, "loss": 0.1895, "step": 9940 }, { "epoch": 0.71, "grad_norm": 1.294963169940782, "learning_rate": 2.033962380742777e-06, "loss": 0.1765, "step": 9941 }, { "epoch": 0.71, "grad_norm": 1.177799872035875, "learning_rate": 2.033029887396185e-06, "loss": 0.1402, "step": 9942 }, { "epoch": 0.71, "grad_norm": 1.3166406745042563, "learning_rate": 2.0320975533047636e-06, "loss": 0.1815, "step": 9943 }, { "epoch": 0.71, "grad_norm": 1.3162591720066166, "learning_rate": 2.0311653785185524e-06, "loss": 0.1309, "step": 9944 }, { "epoch": 0.71, "grad_norm": 1.3865209658904454, "learning_rate": 2.0302333630875896e-06, "loss": 0.2024, "step": 9945 }, { "epoch": 0.71, "grad_norm": 1.4070486787694327, "learning_rate": 2.0293015070619014e-06, "loss": 0.1492, "step": 9946 }, { "epoch": 0.71, "grad_norm": 1.3413435702578953, "learning_rate": 2.0283698104915077e-06, "loss": 0.1734, "step": 9947 }, { "epoch": 0.71, "grad_norm": 1.2040091017762542, "learning_rate": 2.027438273426416e-06, "loss": 0.1314, "step": 9948 }, { "epoch": 0.71, "grad_norm": 1.3255688061352577, "learning_rate": 2.0265068959166295e-06, "loss": 0.1637, "step": 9949 }, { "epoch": 0.71, "grad_norm": 4.577348153990678, "learning_rate": 2.025575678012138e-06, "loss": 0.5839, "step": 9950 }, { "epoch": 0.71, "grad_norm": 1.3592618493514435, "learning_rate": 2.024644619762928e-06, "loss": 0.1713, "step": 9951 }, { "epoch": 0.71, "grad_norm": 1.4135629960848357, "learning_rate": 2.0237137212189757e-06, "loss": 0.2316, "step": 9952 }, { "epoch": 0.71, "grad_norm": 4.660838090743961, "learning_rate": 2.0227829824302447e-06, "loss": 0.5356, "step": 9953 }, { "epoch": 0.71, "grad_norm": 1.5376454522037042, "learning_rate": 2.021852403446695e-06, "loss": 0.1626, "step": 9954 }, { "epoch": 0.71, "grad_norm": 1.220637733301534, "learning_rate": 2.0209219843182774e-06, "loss": 0.1588, "step": 9955 }, { "epoch": 0.71, "grad_norm": 1.231993916316084, "learning_rate": 2.0199917250949343e-06, "loss": 0.1846, "step": 9956 }, { "epoch": 0.71, "grad_norm": 1.253543716851911, "learning_rate": 2.0190616258265944e-06, "loss": 0.1699, "step": 9957 }, { "epoch": 0.71, "grad_norm": 1.2599842700727308, "learning_rate": 2.0181316865631856e-06, "loss": 0.1527, "step": 9958 }, { "epoch": 0.71, "grad_norm": 1.2121234553443256, "learning_rate": 2.0172019073546194e-06, "loss": 0.1428, "step": 9959 }, { "epoch": 0.71, "grad_norm": 6.604587799419003, "learning_rate": 2.0162722882508072e-06, "loss": 0.4892, "step": 9960 }, { "epoch": 0.71, "grad_norm": 1.321906946177831, "learning_rate": 2.015342829301643e-06, "loss": 0.1524, "step": 9961 }, { "epoch": 0.71, "grad_norm": 1.3314032643648415, "learning_rate": 2.0144135305570176e-06, "loss": 0.1813, "step": 9962 }, { "epoch": 0.71, "grad_norm": 1.391198732418022, "learning_rate": 2.0134843920668135e-06, "loss": 0.1856, "step": 9963 }, { "epoch": 0.71, "grad_norm": 1.2551270398026033, "learning_rate": 2.012555413880904e-06, "loss": 0.1462, "step": 9964 }, { "epoch": 0.71, "grad_norm": 1.637962012578639, "learning_rate": 2.0116265960491498e-06, "loss": 0.1778, "step": 9965 }, { "epoch": 0.71, "grad_norm": 1.4563355136386023, "learning_rate": 2.0106979386214092e-06, "loss": 0.1746, "step": 9966 }, { "epoch": 0.71, "grad_norm": 1.5458919543662657, "learning_rate": 2.0097694416475256e-06, "loss": 0.1896, "step": 9967 }, { "epoch": 0.71, "grad_norm": 1.3830086251858307, "learning_rate": 2.008841105177341e-06, "loss": 0.2009, "step": 9968 }, { "epoch": 0.71, "grad_norm": 1.320823688855238, "learning_rate": 2.0079129292606804e-06, "loss": 0.2028, "step": 9969 }, { "epoch": 0.71, "grad_norm": 1.3374183490147276, "learning_rate": 2.0069849139473668e-06, "loss": 0.1859, "step": 9970 }, { "epoch": 0.71, "grad_norm": 1.2062755336057875, "learning_rate": 2.006057059287213e-06, "loss": 0.1643, "step": 9971 }, { "epoch": 0.71, "grad_norm": 1.517620449608032, "learning_rate": 2.005129365330023e-06, "loss": 0.2083, "step": 9972 }, { "epoch": 0.71, "grad_norm": 5.500973331345831, "learning_rate": 2.0042018321255897e-06, "loss": 0.5806, "step": 9973 }, { "epoch": 0.71, "grad_norm": 1.5311301816314118, "learning_rate": 2.0032744597237013e-06, "loss": 0.1849, "step": 9974 }, { "epoch": 0.71, "grad_norm": 5.288556996109179, "learning_rate": 2.0023472481741336e-06, "loss": 0.6284, "step": 9975 }, { "epoch": 0.71, "grad_norm": 5.034290350631877, "learning_rate": 2.001420197526658e-06, "loss": 0.5797, "step": 9976 }, { "epoch": 0.71, "grad_norm": 3.830220074581334, "learning_rate": 2.0004933078310314e-06, "loss": 0.4121, "step": 9977 }, { "epoch": 0.71, "grad_norm": 1.275727810170509, "learning_rate": 1.999566579137008e-06, "loss": 0.1713, "step": 9978 }, { "epoch": 0.71, "grad_norm": 1.5029710576558715, "learning_rate": 1.99864001149433e-06, "loss": 0.2002, "step": 9979 }, { "epoch": 0.71, "grad_norm": 1.295772751036187, "learning_rate": 1.9977136049527348e-06, "loss": 0.1541, "step": 9980 }, { "epoch": 0.71, "grad_norm": 1.4987024650526921, "learning_rate": 1.9967873595619438e-06, "loss": 0.1784, "step": 9981 }, { "epoch": 0.71, "grad_norm": 1.3964645802324291, "learning_rate": 1.9958612753716777e-06, "loss": 0.1934, "step": 9982 }, { "epoch": 0.71, "grad_norm": 5.787899154429594, "learning_rate": 1.994935352431642e-06, "loss": 0.6112, "step": 9983 }, { "epoch": 0.71, "grad_norm": 1.3635541754658314, "learning_rate": 1.9940095907915385e-06, "loss": 0.1792, "step": 9984 }, { "epoch": 0.71, "grad_norm": 1.1980851306409523, "learning_rate": 1.993083990501059e-06, "loss": 0.1544, "step": 9985 }, { "epoch": 0.71, "grad_norm": 1.3459564980767784, "learning_rate": 1.992158551609884e-06, "loss": 0.2406, "step": 9986 }, { "epoch": 0.71, "grad_norm": 1.5303919745846444, "learning_rate": 1.9912332741676883e-06, "loss": 0.2167, "step": 9987 }, { "epoch": 0.71, "grad_norm": 5.555531857659759, "learning_rate": 1.9903081582241368e-06, "loss": 0.5453, "step": 9988 }, { "epoch": 0.71, "grad_norm": 1.4371404332673614, "learning_rate": 1.9893832038288884e-06, "loss": 0.187, "step": 9989 }, { "epoch": 0.71, "grad_norm": 1.4627928288988505, "learning_rate": 1.9884584110315872e-06, "loss": 0.1917, "step": 9990 }, { "epoch": 0.71, "grad_norm": 1.279006588744145, "learning_rate": 1.9875337798818768e-06, "loss": 0.1713, "step": 9991 }, { "epoch": 0.71, "grad_norm": 9.294157588002328, "learning_rate": 1.9866093104293825e-06, "loss": 0.627, "step": 9992 }, { "epoch": 0.71, "grad_norm": 1.1187815813718136, "learning_rate": 1.985685002723731e-06, "loss": 0.1534, "step": 9993 }, { "epoch": 0.71, "grad_norm": 1.344608284221575, "learning_rate": 1.9847608568145322e-06, "loss": 0.1708, "step": 9994 }, { "epoch": 0.71, "grad_norm": 1.4110959257349243, "learning_rate": 1.9838368727513922e-06, "loss": 0.1923, "step": 9995 }, { "epoch": 0.72, "grad_norm": 1.4722265504927314, "learning_rate": 1.9829130505839058e-06, "loss": 0.1786, "step": 9996 }, { "epoch": 0.72, "grad_norm": 6.169097784340746, "learning_rate": 1.98198939036166e-06, "loss": 0.5308, "step": 9997 }, { "epoch": 0.72, "grad_norm": 1.3565470534271693, "learning_rate": 1.9810658921342335e-06, "loss": 0.1539, "step": 9998 }, { "epoch": 0.72, "grad_norm": 1.2084401665284408, "learning_rate": 1.9801425559511983e-06, "loss": 0.1362, "step": 9999 }, { "epoch": 0.72, "grad_norm": 1.308036139495296, "learning_rate": 1.9792193818621118e-06, "loss": 0.178, "step": 10000 }, { "epoch": 0.72, "grad_norm": 1.3002703300010954, "learning_rate": 1.97829636991653e-06, "loss": 0.181, "step": 10001 }, { "epoch": 0.72, "grad_norm": 1.3506514018375053, "learning_rate": 1.977373520163992e-06, "loss": 0.1588, "step": 10002 }, { "epoch": 0.72, "grad_norm": 1.258429056924509, "learning_rate": 1.9764508326540373e-06, "loss": 0.1675, "step": 10003 }, { "epoch": 0.72, "grad_norm": 1.3616523183499503, "learning_rate": 1.975528307436188e-06, "loss": 0.1957, "step": 10004 }, { "epoch": 0.72, "grad_norm": 1.515419103999382, "learning_rate": 1.9746059445599642e-06, "loss": 0.2022, "step": 10005 }, { "epoch": 0.72, "grad_norm": 1.3622111500585605, "learning_rate": 1.9736837440748736e-06, "loss": 0.2195, "step": 10006 }, { "epoch": 0.72, "grad_norm": 1.3516561393619462, "learning_rate": 1.9727617060304184e-06, "loss": 0.1945, "step": 10007 }, { "epoch": 0.72, "grad_norm": 1.4203374848910484, "learning_rate": 1.9718398304760865e-06, "loss": 0.1546, "step": 10008 }, { "epoch": 0.72, "grad_norm": 1.319376882198309, "learning_rate": 1.970918117461364e-06, "loss": 0.2016, "step": 10009 }, { "epoch": 0.72, "grad_norm": 1.2041856623861882, "learning_rate": 1.969996567035722e-06, "loss": 0.1732, "step": 10010 }, { "epoch": 0.72, "grad_norm": 1.2950772399705848, "learning_rate": 1.9690751792486272e-06, "loss": 0.1745, "step": 10011 }, { "epoch": 0.72, "grad_norm": 1.299377524873502, "learning_rate": 1.968153954149535e-06, "loss": 0.1824, "step": 10012 }, { "epoch": 0.72, "grad_norm": 1.2051540026024092, "learning_rate": 1.967232891787893e-06, "loss": 0.1451, "step": 10013 }, { "epoch": 0.72, "grad_norm": 1.3265857607816367, "learning_rate": 1.9663119922131406e-06, "loss": 0.1888, "step": 10014 }, { "epoch": 0.72, "grad_norm": 1.3483994333627227, "learning_rate": 1.965391255474709e-06, "loss": 0.195, "step": 10015 }, { "epoch": 0.72, "grad_norm": 1.4647502373067502, "learning_rate": 1.9644706816220198e-06, "loss": 0.1469, "step": 10016 }, { "epoch": 0.72, "grad_norm": 1.2521323132240383, "learning_rate": 1.963550270704484e-06, "loss": 0.1608, "step": 10017 }, { "epoch": 0.72, "grad_norm": 1.2955625198668186, "learning_rate": 1.962630022771508e-06, "loss": 0.1975, "step": 10018 }, { "epoch": 0.72, "grad_norm": 5.574089501170558, "learning_rate": 1.9617099378724835e-06, "loss": 0.4704, "step": 10019 }, { "epoch": 0.72, "grad_norm": 1.3241325547782847, "learning_rate": 1.960790016056801e-06, "loss": 0.1613, "step": 10020 }, { "epoch": 0.72, "grad_norm": 1.2642620553053199, "learning_rate": 1.9598702573738343e-06, "loss": 0.1469, "step": 10021 }, { "epoch": 0.72, "grad_norm": 1.3660707676889527, "learning_rate": 1.958950661872955e-06, "loss": 0.1619, "step": 10022 }, { "epoch": 0.72, "grad_norm": 1.2421609850287287, "learning_rate": 1.958031229603522e-06, "loss": 0.1747, "step": 10023 }, { "epoch": 0.72, "grad_norm": 1.2945827617360746, "learning_rate": 1.9571119606148896e-06, "loss": 0.1774, "step": 10024 }, { "epoch": 0.72, "grad_norm": 6.174257667911944, "learning_rate": 1.956192854956397e-06, "loss": 0.5525, "step": 10025 }, { "epoch": 0.72, "grad_norm": 1.35846071902805, "learning_rate": 1.9552739126773807e-06, "loss": 0.1821, "step": 10026 }, { "epoch": 0.72, "grad_norm": 1.3905322218653868, "learning_rate": 1.9543551338271633e-06, "loss": 0.1413, "step": 10027 }, { "epoch": 0.72, "grad_norm": 1.3412091760095721, "learning_rate": 1.953436518455064e-06, "loss": 0.1479, "step": 10028 }, { "epoch": 0.72, "grad_norm": 6.002538100300138, "learning_rate": 1.9525180666103877e-06, "loss": 0.469, "step": 10029 }, { "epoch": 0.72, "grad_norm": 1.229061561538334, "learning_rate": 1.9515997783424346e-06, "loss": 0.1552, "step": 10030 }, { "epoch": 0.72, "grad_norm": 1.1380014851653208, "learning_rate": 1.950681653700494e-06, "loss": 0.1382, "step": 10031 }, { "epoch": 0.72, "grad_norm": 1.2582997770057984, "learning_rate": 1.9497636927338502e-06, "loss": 0.1662, "step": 10032 }, { "epoch": 0.72, "grad_norm": 1.3257997003881854, "learning_rate": 1.948845895491772e-06, "loss": 0.1454, "step": 10033 }, { "epoch": 0.72, "grad_norm": 1.353889464227676, "learning_rate": 1.947928262023526e-06, "loss": 0.1798, "step": 10034 }, { "epoch": 0.72, "grad_norm": 1.3721545573066014, "learning_rate": 1.9470107923783636e-06, "loss": 0.1905, "step": 10035 }, { "epoch": 0.72, "grad_norm": 1.333359598590068, "learning_rate": 1.946093486605535e-06, "loss": 0.172, "step": 10036 }, { "epoch": 0.72, "grad_norm": 1.2988439125189775, "learning_rate": 1.945176344754274e-06, "loss": 0.1612, "step": 10037 }, { "epoch": 0.72, "grad_norm": 1.381404634904194, "learning_rate": 1.944259366873811e-06, "loss": 0.1757, "step": 10038 }, { "epoch": 0.72, "grad_norm": 1.3110011432744135, "learning_rate": 1.9433425530133643e-06, "loss": 0.1436, "step": 10039 }, { "epoch": 0.72, "grad_norm": 1.4265984553693554, "learning_rate": 1.9424259032221482e-06, "loss": 0.1945, "step": 10040 }, { "epoch": 0.72, "grad_norm": 1.4711115379390938, "learning_rate": 1.941509417549361e-06, "loss": 0.1783, "step": 10041 }, { "epoch": 0.72, "grad_norm": 1.371635070062476, "learning_rate": 1.9405930960441987e-06, "loss": 0.2102, "step": 10042 }, { "epoch": 0.72, "grad_norm": 1.2427339482459354, "learning_rate": 1.9396769387558433e-06, "loss": 0.1684, "step": 10043 }, { "epoch": 0.72, "grad_norm": 1.3658652040803707, "learning_rate": 1.9387609457334734e-06, "loss": 0.1768, "step": 10044 }, { "epoch": 0.72, "grad_norm": 5.37624817996891, "learning_rate": 1.9378451170262525e-06, "loss": 0.4902, "step": 10045 }, { "epoch": 0.72, "grad_norm": 1.2122479629862002, "learning_rate": 1.9369294526833406e-06, "loss": 0.1843, "step": 10046 }, { "epoch": 0.72, "grad_norm": 1.2193864298399844, "learning_rate": 1.936013952753887e-06, "loss": 0.1483, "step": 10047 }, { "epoch": 0.72, "grad_norm": 1.2996167821346771, "learning_rate": 1.9350986172870314e-06, "loss": 0.1823, "step": 10048 }, { "epoch": 0.72, "grad_norm": 1.3273127957719435, "learning_rate": 1.9341834463319077e-06, "loss": 0.1754, "step": 10049 }, { "epoch": 0.72, "grad_norm": 1.3914272874714133, "learning_rate": 1.9332684399376343e-06, "loss": 0.1589, "step": 10050 }, { "epoch": 0.72, "grad_norm": 1.2809711411306326, "learning_rate": 1.9323535981533303e-06, "loss": 0.1812, "step": 10051 }, { "epoch": 0.72, "grad_norm": 1.2215052659139345, "learning_rate": 1.931438921028096e-06, "loss": 0.1534, "step": 10052 }, { "epoch": 0.72, "grad_norm": 1.3799929602418441, "learning_rate": 1.9305244086110314e-06, "loss": 0.1851, "step": 10053 }, { "epoch": 0.72, "grad_norm": 1.4136260567362546, "learning_rate": 1.9296100609512203e-06, "loss": 0.1576, "step": 10054 }, { "epoch": 0.72, "grad_norm": 1.428626801621541, "learning_rate": 1.9286958780977433e-06, "loss": 0.1944, "step": 10055 }, { "epoch": 0.72, "grad_norm": 1.5161120801102852, "learning_rate": 1.92778186009967e-06, "loss": 0.1945, "step": 10056 }, { "epoch": 0.72, "grad_norm": 1.2799630540918274, "learning_rate": 1.9268680070060624e-06, "loss": 0.1671, "step": 10057 }, { "epoch": 0.72, "grad_norm": 1.598641089843731, "learning_rate": 1.92595431886597e-06, "loss": 0.169, "step": 10058 }, { "epoch": 0.72, "grad_norm": 1.1255589237568593, "learning_rate": 1.925040795728439e-06, "loss": 0.1561, "step": 10059 }, { "epoch": 0.72, "grad_norm": 1.3564080449031184, "learning_rate": 1.9241274376425e-06, "loss": 0.1946, "step": 10060 }, { "epoch": 0.72, "grad_norm": 1.3266289835649838, "learning_rate": 1.923214244657182e-06, "loss": 0.1802, "step": 10061 }, { "epoch": 0.72, "grad_norm": 1.2245294203361756, "learning_rate": 1.9223012168214987e-06, "loss": 0.1624, "step": 10062 }, { "epoch": 0.72, "grad_norm": 1.3565029340392736, "learning_rate": 1.9213883541844586e-06, "loss": 0.1463, "step": 10063 }, { "epoch": 0.72, "grad_norm": 1.4228776587705079, "learning_rate": 1.920475656795061e-06, "loss": 0.1809, "step": 10064 }, { "epoch": 0.72, "grad_norm": 1.473325571278964, "learning_rate": 1.9195631247022977e-06, "loss": 0.1778, "step": 10065 }, { "epoch": 0.72, "grad_norm": 1.224706257875385, "learning_rate": 1.918650757955146e-06, "loss": 0.1319, "step": 10066 }, { "epoch": 0.72, "grad_norm": 1.3103477211728565, "learning_rate": 1.9177385566025817e-06, "loss": 0.2033, "step": 10067 }, { "epoch": 0.72, "grad_norm": 1.2267250095222508, "learning_rate": 1.9168265206935655e-06, "loss": 0.1439, "step": 10068 }, { "epoch": 0.72, "grad_norm": 1.3295437827514043, "learning_rate": 1.9159146502770536e-06, "loss": 0.1582, "step": 10069 }, { "epoch": 0.72, "grad_norm": 1.3746214633304938, "learning_rate": 1.91500294540199e-06, "loss": 0.1777, "step": 10070 }, { "epoch": 0.72, "grad_norm": 1.275820945195789, "learning_rate": 1.9140914061173115e-06, "loss": 0.1917, "step": 10071 }, { "epoch": 0.72, "grad_norm": 4.225934904213671, "learning_rate": 1.9131800324719474e-06, "loss": 0.5913, "step": 10072 }, { "epoch": 0.72, "grad_norm": 1.1598667659136688, "learning_rate": 1.9122688245148168e-06, "loss": 0.1885, "step": 10073 }, { "epoch": 0.72, "grad_norm": 1.37660095914232, "learning_rate": 1.9113577822948276e-06, "loss": 0.1605, "step": 10074 }, { "epoch": 0.72, "grad_norm": 1.3510985525150097, "learning_rate": 1.910446905860884e-06, "loss": 0.1836, "step": 10075 }, { "epoch": 0.72, "grad_norm": 1.458284947029599, "learning_rate": 1.909536195261874e-06, "loss": 0.181, "step": 10076 }, { "epoch": 0.72, "grad_norm": 1.3453052074973277, "learning_rate": 1.9086256505466856e-06, "loss": 0.156, "step": 10077 }, { "epoch": 0.72, "grad_norm": 1.293418025993272, "learning_rate": 1.907715271764189e-06, "loss": 0.1715, "step": 10078 }, { "epoch": 0.72, "grad_norm": 1.4331204823445411, "learning_rate": 1.9068050589632514e-06, "loss": 0.2183, "step": 10079 }, { "epoch": 0.72, "grad_norm": 1.2970533702774103, "learning_rate": 1.90589501219273e-06, "loss": 0.1485, "step": 10080 }, { "epoch": 0.72, "grad_norm": 1.4101586248907372, "learning_rate": 1.9049851315014716e-06, "loss": 0.1953, "step": 10081 }, { "epoch": 0.72, "grad_norm": 1.448633265910556, "learning_rate": 1.9040754169383174e-06, "loss": 0.1752, "step": 10082 }, { "epoch": 0.72, "grad_norm": 1.3671429913082793, "learning_rate": 1.9031658685520936e-06, "loss": 0.1934, "step": 10083 }, { "epoch": 0.72, "grad_norm": 1.195816997470254, "learning_rate": 1.902256486391625e-06, "loss": 0.1597, "step": 10084 }, { "epoch": 0.72, "grad_norm": 1.2902389125960165, "learning_rate": 1.9013472705057196e-06, "loss": 0.159, "step": 10085 }, { "epoch": 0.72, "grad_norm": 1.4564688015064238, "learning_rate": 1.9004382209431843e-06, "loss": 0.2074, "step": 10086 }, { "epoch": 0.72, "grad_norm": 1.186170901211127, "learning_rate": 1.8995293377528096e-06, "loss": 0.1682, "step": 10087 }, { "epoch": 0.72, "grad_norm": 1.262405906817386, "learning_rate": 1.8986206209833823e-06, "loss": 0.1334, "step": 10088 }, { "epoch": 0.72, "grad_norm": 1.1288114109466896, "learning_rate": 1.8977120706836788e-06, "loss": 0.1705, "step": 10089 }, { "epoch": 0.72, "grad_norm": 1.4114182890723526, "learning_rate": 1.8968036869024687e-06, "loss": 0.166, "step": 10090 }, { "epoch": 0.72, "grad_norm": 1.3364538906088588, "learning_rate": 1.8958954696885068e-06, "loss": 0.1829, "step": 10091 }, { "epoch": 0.72, "grad_norm": 1.2493150823516865, "learning_rate": 1.8949874190905453e-06, "loss": 0.1431, "step": 10092 }, { "epoch": 0.72, "grad_norm": 1.3950656544124114, "learning_rate": 1.8940795351573222e-06, "loss": 0.1957, "step": 10093 }, { "epoch": 0.72, "grad_norm": 1.4991913687742395, "learning_rate": 1.8931718179375724e-06, "loss": 0.2117, "step": 10094 }, { "epoch": 0.72, "grad_norm": 1.3383928984238893, "learning_rate": 1.8922642674800146e-06, "loss": 0.1675, "step": 10095 }, { "epoch": 0.72, "grad_norm": 1.2874503051690889, "learning_rate": 1.8913568838333663e-06, "loss": 0.1795, "step": 10096 }, { "epoch": 0.72, "grad_norm": 1.4104368280808777, "learning_rate": 1.8904496670463285e-06, "loss": 0.2047, "step": 10097 }, { "epoch": 0.72, "grad_norm": 6.546129141814203, "learning_rate": 1.8895426171675996e-06, "loss": 0.3746, "step": 10098 }, { "epoch": 0.72, "grad_norm": 1.3119353317967886, "learning_rate": 1.8886357342458655e-06, "loss": 0.1825, "step": 10099 }, { "epoch": 0.72, "grad_norm": 1.4225633111052087, "learning_rate": 1.8877290183298058e-06, "loss": 0.1913, "step": 10100 }, { "epoch": 0.72, "grad_norm": 1.3444380409394994, "learning_rate": 1.886822469468087e-06, "loss": 0.1703, "step": 10101 }, { "epoch": 0.72, "grad_norm": 1.408869452331511, "learning_rate": 1.8859160877093713e-06, "loss": 0.1577, "step": 10102 }, { "epoch": 0.72, "grad_norm": 1.3767288644558326, "learning_rate": 1.8850098731023064e-06, "loss": 0.1988, "step": 10103 }, { "epoch": 0.72, "grad_norm": 1.62319917502275, "learning_rate": 1.8841038256955385e-06, "loss": 0.1714, "step": 10104 }, { "epoch": 0.72, "grad_norm": 1.1138993934121302, "learning_rate": 1.883197945537697e-06, "loss": 0.1558, "step": 10105 }, { "epoch": 0.72, "grad_norm": 1.317572076631177, "learning_rate": 1.8822922326774067e-06, "loss": 0.1655, "step": 10106 }, { "epoch": 0.72, "grad_norm": 1.2694553512991862, "learning_rate": 1.8813866871632836e-06, "loss": 0.1539, "step": 10107 }, { "epoch": 0.72, "grad_norm": 4.685160417849182, "learning_rate": 1.8804813090439355e-06, "loss": 0.5128, "step": 10108 }, { "epoch": 0.72, "grad_norm": 1.1326918715153882, "learning_rate": 1.8795760983679556e-06, "loss": 0.1602, "step": 10109 }, { "epoch": 0.72, "grad_norm": 6.348760452413663, "learning_rate": 1.8786710551839354e-06, "loss": 0.578, "step": 10110 }, { "epoch": 0.72, "grad_norm": 1.3409790320427564, "learning_rate": 1.8777661795404511e-06, "loss": 0.1728, "step": 10111 }, { "epoch": 0.72, "grad_norm": 1.5593283660153445, "learning_rate": 1.8768614714860744e-06, "loss": 0.1908, "step": 10112 }, { "epoch": 0.72, "grad_norm": 1.3625331474009448, "learning_rate": 1.8759569310693682e-06, "loss": 0.1762, "step": 10113 }, { "epoch": 0.72, "grad_norm": 1.1796226418325513, "learning_rate": 1.8750525583388806e-06, "loss": 0.1437, "step": 10114 }, { "epoch": 0.72, "grad_norm": 1.4031245734410154, "learning_rate": 1.8741483533431575e-06, "loss": 0.1767, "step": 10115 }, { "epoch": 0.72, "grad_norm": 1.3855889811739275, "learning_rate": 1.8732443161307323e-06, "loss": 0.181, "step": 10116 }, { "epoch": 0.72, "grad_norm": 1.4070907333663598, "learning_rate": 1.8723404467501316e-06, "loss": 0.2023, "step": 10117 }, { "epoch": 0.72, "grad_norm": 1.1809365915931191, "learning_rate": 1.8714367452498688e-06, "loss": 0.1865, "step": 10118 }, { "epoch": 0.72, "grad_norm": 1.2647378106793603, "learning_rate": 1.8705332116784541e-06, "loss": 0.1453, "step": 10119 }, { "epoch": 0.72, "grad_norm": 1.4861448590844877, "learning_rate": 1.869629846084382e-06, "loss": 0.2352, "step": 10120 }, { "epoch": 0.72, "grad_norm": 1.308040265671357, "learning_rate": 1.868726648516145e-06, "loss": 0.1626, "step": 10121 }, { "epoch": 0.72, "grad_norm": 1.356088441059376, "learning_rate": 1.8678236190222204e-06, "loss": 0.1749, "step": 10122 }, { "epoch": 0.72, "grad_norm": 1.338235114393291, "learning_rate": 1.86692075765108e-06, "loss": 0.2018, "step": 10123 }, { "epoch": 0.72, "grad_norm": 1.519494095366238, "learning_rate": 1.8660180644511866e-06, "loss": 0.1716, "step": 10124 }, { "epoch": 0.72, "grad_norm": 1.5659262640062315, "learning_rate": 1.865115539470994e-06, "loss": 0.2107, "step": 10125 }, { "epoch": 0.72, "grad_norm": 1.4422404911992923, "learning_rate": 1.8642131827589439e-06, "loss": 0.1924, "step": 10126 }, { "epoch": 0.72, "grad_norm": 1.299048714053764, "learning_rate": 1.8633109943634742e-06, "loss": 0.1812, "step": 10127 }, { "epoch": 0.72, "grad_norm": 1.132101581496522, "learning_rate": 1.8624089743330064e-06, "loss": 0.1539, "step": 10128 }, { "epoch": 0.72, "grad_norm": 1.2850524368248895, "learning_rate": 1.8615071227159625e-06, "loss": 0.1531, "step": 10129 }, { "epoch": 0.72, "grad_norm": 6.958026559012103, "learning_rate": 1.8606054395607453e-06, "loss": 0.6237, "step": 10130 }, { "epoch": 0.72, "grad_norm": 1.3975829041797732, "learning_rate": 1.8597039249157561e-06, "loss": 0.171, "step": 10131 }, { "epoch": 0.72, "grad_norm": 1.1390250326226639, "learning_rate": 1.8588025788293845e-06, "loss": 0.1767, "step": 10132 }, { "epoch": 0.72, "grad_norm": 1.4798546512367434, "learning_rate": 1.857901401350013e-06, "loss": 0.2272, "step": 10133 }, { "epoch": 0.72, "grad_norm": 1.5335107269031858, "learning_rate": 1.8570003925260095e-06, "loss": 0.2172, "step": 10134 }, { "epoch": 0.72, "grad_norm": 4.837374540889827, "learning_rate": 1.8560995524057402e-06, "loss": 0.5395, "step": 10135 }, { "epoch": 0.73, "grad_norm": 1.3124938832903987, "learning_rate": 1.8551988810375554e-06, "loss": 0.1732, "step": 10136 }, { "epoch": 0.73, "grad_norm": 1.3556871507220654, "learning_rate": 1.8542983784698026e-06, "loss": 0.1452, "step": 10137 }, { "epoch": 0.73, "grad_norm": 1.2348658665079344, "learning_rate": 1.8533980447508138e-06, "loss": 0.1554, "step": 10138 }, { "epoch": 0.73, "grad_norm": 1.334170734017181, "learning_rate": 1.8524978799289172e-06, "loss": 0.1478, "step": 10139 }, { "epoch": 0.73, "grad_norm": 1.3286826535945606, "learning_rate": 1.8515978840524302e-06, "loss": 0.1476, "step": 10140 }, { "epoch": 0.73, "grad_norm": 1.3020618273552271, "learning_rate": 1.8506980571696626e-06, "loss": 0.1841, "step": 10141 }, { "epoch": 0.73, "grad_norm": 1.307529982436838, "learning_rate": 1.8497983993289097e-06, "loss": 0.176, "step": 10142 }, { "epoch": 0.73, "grad_norm": 4.431564165757675, "learning_rate": 1.8488989105784638e-06, "loss": 0.5799, "step": 10143 }, { "epoch": 0.73, "grad_norm": 1.215077135855464, "learning_rate": 1.8479995909666077e-06, "loss": 0.1453, "step": 10144 }, { "epoch": 0.73, "grad_norm": 1.4488265682353514, "learning_rate": 1.8471004405416094e-06, "loss": 0.1952, "step": 10145 }, { "epoch": 0.73, "grad_norm": 3.958197397085143, "learning_rate": 1.8462014593517352e-06, "loss": 0.4985, "step": 10146 }, { "epoch": 0.73, "grad_norm": 4.281644466615377, "learning_rate": 1.8453026474452358e-06, "loss": 0.5803, "step": 10147 }, { "epoch": 0.73, "grad_norm": 1.4590389105898813, "learning_rate": 1.844404004870357e-06, "loss": 0.1759, "step": 10148 }, { "epoch": 0.73, "grad_norm": 1.22702914860479, "learning_rate": 1.8435055316753354e-06, "loss": 0.1676, "step": 10149 }, { "epoch": 0.73, "grad_norm": 1.1970796784903657, "learning_rate": 1.842607227908398e-06, "loss": 0.1422, "step": 10150 }, { "epoch": 0.73, "grad_norm": 1.2685443051548926, "learning_rate": 1.841709093617759e-06, "loss": 0.1712, "step": 10151 }, { "epoch": 0.73, "grad_norm": 4.179842531562926, "learning_rate": 1.8408111288516306e-06, "loss": 0.6358, "step": 10152 }, { "epoch": 0.73, "grad_norm": 1.3342834562183434, "learning_rate": 1.8399133336582087e-06, "loss": 0.1837, "step": 10153 }, { "epoch": 0.73, "grad_norm": 1.320163091674366, "learning_rate": 1.8390157080856858e-06, "loss": 0.1897, "step": 10154 }, { "epoch": 0.73, "grad_norm": 1.2221635757058384, "learning_rate": 1.8381182521822406e-06, "loss": 0.1526, "step": 10155 }, { "epoch": 0.73, "grad_norm": 1.1959551568915936, "learning_rate": 1.8372209659960454e-06, "loss": 0.1647, "step": 10156 }, { "epoch": 0.73, "grad_norm": 1.234257269349216, "learning_rate": 1.8363238495752638e-06, "loss": 0.1568, "step": 10157 }, { "epoch": 0.73, "grad_norm": 1.5872728174744868, "learning_rate": 1.8354269029680515e-06, "loss": 0.1989, "step": 10158 }, { "epoch": 0.73, "grad_norm": 1.304962824728764, "learning_rate": 1.834530126222549e-06, "loss": 0.1979, "step": 10159 }, { "epoch": 0.73, "grad_norm": 1.6567830139336046, "learning_rate": 1.8336335193868955e-06, "loss": 0.1862, "step": 10160 }, { "epoch": 0.73, "grad_norm": 1.2959701347169124, "learning_rate": 1.8327370825092134e-06, "loss": 0.1549, "step": 10161 }, { "epoch": 0.73, "grad_norm": 1.3733814263508257, "learning_rate": 1.8318408156376239e-06, "loss": 0.1722, "step": 10162 }, { "epoch": 0.73, "grad_norm": 1.4055393765905368, "learning_rate": 1.8309447188202306e-06, "loss": 0.1734, "step": 10163 }, { "epoch": 0.73, "grad_norm": 1.413165966634706, "learning_rate": 1.8300487921051352e-06, "loss": 0.1848, "step": 10164 }, { "epoch": 0.73, "grad_norm": 1.3056398235970395, "learning_rate": 1.8291530355404275e-06, "loss": 0.1628, "step": 10165 }, { "epoch": 0.73, "grad_norm": 1.432863663775086, "learning_rate": 1.828257449174189e-06, "loss": 0.165, "step": 10166 }, { "epoch": 0.73, "grad_norm": 1.2300735644118348, "learning_rate": 1.8273620330544882e-06, "loss": 0.1602, "step": 10167 }, { "epoch": 0.73, "grad_norm": 1.3721071519293426, "learning_rate": 1.8264667872293913e-06, "loss": 0.1648, "step": 10168 }, { "epoch": 0.73, "grad_norm": 1.562851287250568, "learning_rate": 1.8255717117469474e-06, "loss": 0.176, "step": 10169 }, { "epoch": 0.73, "grad_norm": 1.3343953322580107, "learning_rate": 1.8246768066552046e-06, "loss": 0.1549, "step": 10170 }, { "epoch": 0.73, "grad_norm": 1.189925218306639, "learning_rate": 1.8237820720021938e-06, "loss": 0.1564, "step": 10171 }, { "epoch": 0.73, "grad_norm": 1.1885149890326123, "learning_rate": 1.8228875078359432e-06, "loss": 0.1554, "step": 10172 }, { "epoch": 0.73, "grad_norm": 5.974332899028695, "learning_rate": 1.8219931142044694e-06, "loss": 0.5856, "step": 10173 }, { "epoch": 0.73, "grad_norm": 1.5225721656793263, "learning_rate": 1.8210988911557813e-06, "loss": 0.1968, "step": 10174 }, { "epoch": 0.73, "grad_norm": 1.3441867967910932, "learning_rate": 1.820204838737874e-06, "loss": 0.163, "step": 10175 }, { "epoch": 0.73, "grad_norm": 4.677084440811978, "learning_rate": 1.8193109569987376e-06, "loss": 0.554, "step": 10176 }, { "epoch": 0.73, "grad_norm": 1.3507115870748871, "learning_rate": 1.8184172459863553e-06, "loss": 0.197, "step": 10177 }, { "epoch": 0.73, "grad_norm": 1.1846633423280752, "learning_rate": 1.8175237057486938e-06, "loss": 0.171, "step": 10178 }, { "epoch": 0.73, "grad_norm": 1.429224370159057, "learning_rate": 1.8166303363337179e-06, "loss": 0.1381, "step": 10179 }, { "epoch": 0.73, "grad_norm": 4.798969255098325, "learning_rate": 1.8157371377893769e-06, "loss": 0.5402, "step": 10180 }, { "epoch": 0.73, "grad_norm": 1.2308471350396217, "learning_rate": 1.814844110163616e-06, "loss": 0.1574, "step": 10181 }, { "epoch": 0.73, "grad_norm": 1.274521083760135, "learning_rate": 1.8139512535043696e-06, "loss": 0.1383, "step": 10182 }, { "epoch": 0.73, "grad_norm": 1.3055894021065348, "learning_rate": 1.8130585678595635e-06, "loss": 0.1549, "step": 10183 }, { "epoch": 0.73, "grad_norm": 7.731425241716922, "learning_rate": 1.8121660532771113e-06, "loss": 0.6181, "step": 10184 }, { "epoch": 0.73, "grad_norm": 5.920393490046399, "learning_rate": 1.811273709804922e-06, "loss": 0.6822, "step": 10185 }, { "epoch": 0.73, "grad_norm": 4.5731593412723806, "learning_rate": 1.8103815374908906e-06, "loss": 0.5495, "step": 10186 }, { "epoch": 0.73, "grad_norm": 1.3462488704074438, "learning_rate": 1.809489536382908e-06, "loss": 0.1576, "step": 10187 }, { "epoch": 0.73, "grad_norm": 6.913074934248999, "learning_rate": 1.8085977065288502e-06, "loss": 0.5428, "step": 10188 }, { "epoch": 0.73, "grad_norm": 1.243961251542498, "learning_rate": 1.8077060479765908e-06, "loss": 0.1733, "step": 10189 }, { "epoch": 0.73, "grad_norm": 1.1952351373615455, "learning_rate": 1.8068145607739845e-06, "loss": 0.1589, "step": 10190 }, { "epoch": 0.73, "grad_norm": 1.2572704555585423, "learning_rate": 1.8059232449688903e-06, "loss": 0.1781, "step": 10191 }, { "epoch": 0.73, "grad_norm": 5.140822276025719, "learning_rate": 1.8050321006091453e-06, "loss": 0.6907, "step": 10192 }, { "epoch": 0.73, "grad_norm": 1.4172125173299168, "learning_rate": 1.8041411277425858e-06, "loss": 0.1829, "step": 10193 }, { "epoch": 0.73, "grad_norm": 1.2121386475466875, "learning_rate": 1.8032503264170326e-06, "loss": 0.162, "step": 10194 }, { "epoch": 0.73, "grad_norm": 1.4813177471663996, "learning_rate": 1.802359696680303e-06, "loss": 0.1564, "step": 10195 }, { "epoch": 0.73, "grad_norm": 1.2338853544864892, "learning_rate": 1.8014692385802002e-06, "loss": 0.12, "step": 10196 }, { "epoch": 0.73, "grad_norm": 1.6424419343561885, "learning_rate": 1.8005789521645233e-06, "loss": 0.1956, "step": 10197 }, { "epoch": 0.73, "grad_norm": 1.2682472111428265, "learning_rate": 1.7996888374810556e-06, "loss": 0.1623, "step": 10198 }, { "epoch": 0.73, "grad_norm": 8.26763806701586, "learning_rate": 1.798798894577577e-06, "loss": 0.6783, "step": 10199 }, { "epoch": 0.73, "grad_norm": 7.802546050912952, "learning_rate": 1.7979091235018564e-06, "loss": 0.685, "step": 10200 }, { "epoch": 0.73, "grad_norm": 1.2986642922285885, "learning_rate": 1.797019524301654e-06, "loss": 0.1834, "step": 10201 }, { "epoch": 0.73, "grad_norm": 1.2055159528192068, "learning_rate": 1.7961300970247175e-06, "loss": 0.1515, "step": 10202 }, { "epoch": 0.73, "grad_norm": 1.3422415577262934, "learning_rate": 1.7952408417187905e-06, "loss": 0.1693, "step": 10203 }, { "epoch": 0.73, "grad_norm": 7.386323278877843, "learning_rate": 1.7943517584316017e-06, "loss": 0.5714, "step": 10204 }, { "epoch": 0.73, "grad_norm": 1.270325730975223, "learning_rate": 1.7934628472108768e-06, "loss": 0.1443, "step": 10205 }, { "epoch": 0.73, "grad_norm": 1.2933545340037202, "learning_rate": 1.7925741081043258e-06, "loss": 0.1603, "step": 10206 }, { "epoch": 0.73, "grad_norm": 1.4005850176619432, "learning_rate": 1.791685541159654e-06, "loss": 0.1674, "step": 10207 }, { "epoch": 0.73, "grad_norm": 1.2570882752983858, "learning_rate": 1.7907971464245565e-06, "loss": 0.1758, "step": 10208 }, { "epoch": 0.73, "grad_norm": 1.3387111047330447, "learning_rate": 1.7899089239467189e-06, "loss": 0.1663, "step": 10209 }, { "epoch": 0.73, "grad_norm": 1.3072043788948535, "learning_rate": 1.789020873773819e-06, "loss": 0.1898, "step": 10210 }, { "epoch": 0.73, "grad_norm": 7.2616081088417435, "learning_rate": 1.7881329959535198e-06, "loss": 0.6354, "step": 10211 }, { "epoch": 0.73, "grad_norm": 1.6794633571536743, "learning_rate": 1.7872452905334836e-06, "loss": 0.2101, "step": 10212 }, { "epoch": 0.73, "grad_norm": 1.5223137599746444, "learning_rate": 1.7863577575613545e-06, "loss": 0.1795, "step": 10213 }, { "epoch": 0.73, "grad_norm": 1.4134285695549014, "learning_rate": 1.785470397084776e-06, "loss": 0.1967, "step": 10214 }, { "epoch": 0.73, "grad_norm": 1.3932791744452067, "learning_rate": 1.7845832091513737e-06, "loss": 0.2277, "step": 10215 }, { "epoch": 0.73, "grad_norm": 1.3050702155085232, "learning_rate": 1.783696193808771e-06, "loss": 0.1538, "step": 10216 }, { "epoch": 0.73, "grad_norm": 1.4841508095381029, "learning_rate": 1.7828093511045785e-06, "loss": 0.2247, "step": 10217 }, { "epoch": 0.73, "grad_norm": 1.4203334846316993, "learning_rate": 1.781922681086401e-06, "loss": 0.1835, "step": 10218 }, { "epoch": 0.73, "grad_norm": 1.3346839146909562, "learning_rate": 1.781036183801827e-06, "loss": 0.1846, "step": 10219 }, { "epoch": 0.73, "grad_norm": 6.022095595227784, "learning_rate": 1.7801498592984445e-06, "loss": 0.5531, "step": 10220 }, { "epoch": 0.73, "grad_norm": 1.280632275722684, "learning_rate": 1.779263707623824e-06, "loss": 0.1779, "step": 10221 }, { "epoch": 0.73, "grad_norm": 1.4004560787761884, "learning_rate": 1.7783777288255339e-06, "loss": 0.1925, "step": 10222 }, { "epoch": 0.73, "grad_norm": 1.3471469258233124, "learning_rate": 1.7774919229511268e-06, "loss": 0.1818, "step": 10223 }, { "epoch": 0.73, "grad_norm": 1.2006745154325504, "learning_rate": 1.7766062900481513e-06, "loss": 0.1669, "step": 10224 }, { "epoch": 0.73, "grad_norm": 1.7076920140567544, "learning_rate": 1.7757208301641437e-06, "loss": 0.135, "step": 10225 }, { "epoch": 0.73, "grad_norm": 1.3702522174339256, "learning_rate": 1.7748355433466347e-06, "loss": 0.1951, "step": 10226 }, { "epoch": 0.73, "grad_norm": 1.3612757438023597, "learning_rate": 1.7739504296431387e-06, "loss": 0.2028, "step": 10227 }, { "epoch": 0.73, "grad_norm": 1.2369234177538446, "learning_rate": 1.7730654891011695e-06, "loss": 0.1586, "step": 10228 }, { "epoch": 0.73, "grad_norm": 1.2500904942204571, "learning_rate": 1.7721807217682229e-06, "loss": 0.1514, "step": 10229 }, { "epoch": 0.73, "grad_norm": 1.2483830189244156, "learning_rate": 1.7712961276917933e-06, "loss": 0.2105, "step": 10230 }, { "epoch": 0.73, "grad_norm": 1.4024952363947432, "learning_rate": 1.7704117069193593e-06, "loss": 0.1788, "step": 10231 }, { "epoch": 0.73, "grad_norm": 1.5028497593640122, "learning_rate": 1.7695274594983941e-06, "loss": 0.2083, "step": 10232 }, { "epoch": 0.73, "grad_norm": 1.2171607507142124, "learning_rate": 1.7686433854763608e-06, "loss": 0.153, "step": 10233 }, { "epoch": 0.73, "grad_norm": 1.3672689977834445, "learning_rate": 1.7677594849007152e-06, "loss": 0.1743, "step": 10234 }, { "epoch": 0.73, "grad_norm": 1.459238715373597, "learning_rate": 1.7668757578188973e-06, "loss": 0.1956, "step": 10235 }, { "epoch": 0.73, "grad_norm": 1.2664668718024144, "learning_rate": 1.7659922042783463e-06, "loss": 0.2008, "step": 10236 }, { "epoch": 0.73, "grad_norm": 1.317961079422185, "learning_rate": 1.7651088243264836e-06, "loss": 0.1668, "step": 10237 }, { "epoch": 0.73, "grad_norm": 1.3724015700051369, "learning_rate": 1.7642256180107293e-06, "loss": 0.2199, "step": 10238 }, { "epoch": 0.73, "grad_norm": 1.2756975090785245, "learning_rate": 1.7633425853784875e-06, "loss": 0.1761, "step": 10239 }, { "epoch": 0.73, "grad_norm": 1.4800122003649414, "learning_rate": 1.762459726477157e-06, "loss": 0.1714, "step": 10240 }, { "epoch": 0.73, "grad_norm": 1.6054926673612844, "learning_rate": 1.761577041354126e-06, "loss": 0.1955, "step": 10241 }, { "epoch": 0.73, "grad_norm": 1.1309432146348073, "learning_rate": 1.760694530056774e-06, "loss": 0.1327, "step": 10242 }, { "epoch": 0.73, "grad_norm": 1.3520163506884577, "learning_rate": 1.759812192632473e-06, "loss": 0.1822, "step": 10243 }, { "epoch": 0.73, "grad_norm": 1.235679869079916, "learning_rate": 1.7589300291285783e-06, "loss": 0.1644, "step": 10244 }, { "epoch": 0.73, "grad_norm": 1.1850099330109805, "learning_rate": 1.7580480395924454e-06, "loss": 0.166, "step": 10245 }, { "epoch": 0.73, "grad_norm": 1.609668807186098, "learning_rate": 1.7571662240714127e-06, "loss": 0.1914, "step": 10246 }, { "epoch": 0.73, "grad_norm": 1.327687833021984, "learning_rate": 1.7562845826128156e-06, "loss": 0.1855, "step": 10247 }, { "epoch": 0.73, "grad_norm": 1.295100459348844, "learning_rate": 1.755403115263974e-06, "loss": 0.1584, "step": 10248 }, { "epoch": 0.73, "grad_norm": 3.555737462437905, "learning_rate": 1.7545218220722026e-06, "loss": 0.5094, "step": 10249 }, { "epoch": 0.73, "grad_norm": 1.4959608456843099, "learning_rate": 1.7536407030848068e-06, "loss": 0.1806, "step": 10250 }, { "epoch": 0.73, "grad_norm": 10.645441765357326, "learning_rate": 1.7527597583490825e-06, "loss": 0.5271, "step": 10251 }, { "epoch": 0.73, "grad_norm": 1.6885021420052595, "learning_rate": 1.7518789879123117e-06, "loss": 0.1845, "step": 10252 }, { "epoch": 0.73, "grad_norm": 1.3448838005997752, "learning_rate": 1.7509983918217742e-06, "loss": 0.1801, "step": 10253 }, { "epoch": 0.73, "grad_norm": 4.617342288056361, "learning_rate": 1.7501179701247344e-06, "loss": 0.502, "step": 10254 }, { "epoch": 0.73, "grad_norm": 1.3615924050057526, "learning_rate": 1.7492377228684527e-06, "loss": 0.1553, "step": 10255 }, { "epoch": 0.73, "grad_norm": 1.3772740404287978, "learning_rate": 1.748357650100173e-06, "loss": 0.1886, "step": 10256 }, { "epoch": 0.73, "grad_norm": 1.4321862567055832, "learning_rate": 1.7474777518671371e-06, "loss": 0.1886, "step": 10257 }, { "epoch": 0.73, "grad_norm": 4.964062069815653, "learning_rate": 1.7465980282165734e-06, "loss": 0.5338, "step": 10258 }, { "epoch": 0.73, "grad_norm": 1.1828580178038595, "learning_rate": 1.745718479195705e-06, "loss": 0.1659, "step": 10259 }, { "epoch": 0.73, "grad_norm": 1.3839481715607915, "learning_rate": 1.7448391048517378e-06, "loss": 0.1728, "step": 10260 }, { "epoch": 0.73, "grad_norm": 1.3523519249224243, "learning_rate": 1.7439599052318768e-06, "loss": 0.1995, "step": 10261 }, { "epoch": 0.73, "grad_norm": 1.3163517500543693, "learning_rate": 1.743080880383311e-06, "loss": 0.1745, "step": 10262 }, { "epoch": 0.73, "grad_norm": 1.3080790255940928, "learning_rate": 1.7422020303532267e-06, "loss": 0.1713, "step": 10263 }, { "epoch": 0.73, "grad_norm": 1.3830825654508476, "learning_rate": 1.7413233551887925e-06, "loss": 0.1707, "step": 10264 }, { "epoch": 0.73, "grad_norm": 1.1891848071236455, "learning_rate": 1.740444854937175e-06, "loss": 0.1425, "step": 10265 }, { "epoch": 0.73, "grad_norm": 4.423678319557384, "learning_rate": 1.7395665296455283e-06, "loss": 0.3815, "step": 10266 }, { "epoch": 0.73, "grad_norm": 1.327568538456378, "learning_rate": 1.7386883793609983e-06, "loss": 0.175, "step": 10267 }, { "epoch": 0.73, "grad_norm": 1.4586768676139563, "learning_rate": 1.7378104041307187e-06, "loss": 0.1935, "step": 10268 }, { "epoch": 0.73, "grad_norm": 1.3120126249001447, "learning_rate": 1.7369326040018175e-06, "loss": 0.2049, "step": 10269 }, { "epoch": 0.73, "grad_norm": 1.362670309884819, "learning_rate": 1.7360549790214092e-06, "loss": 0.1769, "step": 10270 }, { "epoch": 0.73, "grad_norm": 1.4106546786951808, "learning_rate": 1.735177529236603e-06, "loss": 0.1479, "step": 10271 }, { "epoch": 0.73, "grad_norm": 1.1786536575392577, "learning_rate": 1.7343002546944981e-06, "loss": 0.1667, "step": 10272 }, { "epoch": 0.73, "grad_norm": 1.5386550224797546, "learning_rate": 1.7334231554421798e-06, "loss": 0.1896, "step": 10273 }, { "epoch": 0.73, "grad_norm": 1.383199666625297, "learning_rate": 1.7325462315267294e-06, "loss": 0.179, "step": 10274 }, { "epoch": 0.73, "grad_norm": 1.3630335475403168, "learning_rate": 1.7316694829952164e-06, "loss": 0.1631, "step": 10275 }, { "epoch": 0.74, "grad_norm": 7.068085931777952, "learning_rate": 1.7307929098947023e-06, "loss": 0.5857, "step": 10276 }, { "epoch": 0.74, "grad_norm": 1.3700467327454295, "learning_rate": 1.7299165122722355e-06, "loss": 0.1553, "step": 10277 }, { "epoch": 0.74, "grad_norm": 7.356427422847231, "learning_rate": 1.729040290174861e-06, "loss": 0.5547, "step": 10278 }, { "epoch": 0.74, "grad_norm": 1.443538100790687, "learning_rate": 1.7281642436496065e-06, "loss": 0.2025, "step": 10279 }, { "epoch": 0.74, "grad_norm": 1.3940254264241456, "learning_rate": 1.7272883727434996e-06, "loss": 0.1807, "step": 10280 }, { "epoch": 0.74, "grad_norm": 1.367920886966645, "learning_rate": 1.7264126775035488e-06, "loss": 0.1897, "step": 10281 }, { "epoch": 0.74, "grad_norm": 1.1746517986915292, "learning_rate": 1.7255371579767605e-06, "loss": 0.1418, "step": 10282 }, { "epoch": 0.74, "grad_norm": 1.155317162675417, "learning_rate": 1.7246618142101285e-06, "loss": 0.1505, "step": 10283 }, { "epoch": 0.74, "grad_norm": 1.1749151156217477, "learning_rate": 1.7237866462506398e-06, "loss": 0.1626, "step": 10284 }, { "epoch": 0.74, "grad_norm": 1.3900181844576949, "learning_rate": 1.7229116541452668e-06, "loss": 0.1701, "step": 10285 }, { "epoch": 0.74, "grad_norm": 1.218431596613596, "learning_rate": 1.7220368379409785e-06, "loss": 0.1238, "step": 10286 }, { "epoch": 0.74, "grad_norm": 1.3487715981494197, "learning_rate": 1.7211621976847286e-06, "loss": 0.1929, "step": 10287 }, { "epoch": 0.74, "grad_norm": 1.1902952843571097, "learning_rate": 1.7202877334234675e-06, "loss": 0.1649, "step": 10288 }, { "epoch": 0.74, "grad_norm": 1.3341946151862414, "learning_rate": 1.7194134452041294e-06, "loss": 0.1798, "step": 10289 }, { "epoch": 0.74, "grad_norm": 1.3274603843341255, "learning_rate": 1.7185393330736461e-06, "loss": 0.1809, "step": 10290 }, { "epoch": 0.74, "grad_norm": 1.1435499605767787, "learning_rate": 1.7176653970789314e-06, "loss": 0.1289, "step": 10291 }, { "epoch": 0.74, "grad_norm": 1.3726085536415662, "learning_rate": 1.7167916372669018e-06, "loss": 0.1851, "step": 10292 }, { "epoch": 0.74, "grad_norm": 1.4476079097881336, "learning_rate": 1.7159180536844523e-06, "loss": 0.1861, "step": 10293 }, { "epoch": 0.74, "grad_norm": 1.4678839805969595, "learning_rate": 1.7150446463784753e-06, "loss": 0.1574, "step": 10294 }, { "epoch": 0.74, "grad_norm": 1.4512821041850994, "learning_rate": 1.7141714153958505e-06, "loss": 0.2025, "step": 10295 }, { "epoch": 0.74, "grad_norm": 1.2341635450104858, "learning_rate": 1.7132983607834514e-06, "loss": 0.1466, "step": 10296 }, { "epoch": 0.74, "grad_norm": 1.4207675079157591, "learning_rate": 1.7124254825881375e-06, "loss": 0.188, "step": 10297 }, { "epoch": 0.74, "grad_norm": 1.336059133559886, "learning_rate": 1.7115527808567644e-06, "loss": 0.1707, "step": 10298 }, { "epoch": 0.74, "grad_norm": 1.1717707869613851, "learning_rate": 1.7106802556361724e-06, "loss": 0.1781, "step": 10299 }, { "epoch": 0.74, "grad_norm": 1.3610610741309637, "learning_rate": 1.709807906973196e-06, "loss": 0.1915, "step": 10300 }, { "epoch": 0.74, "grad_norm": 1.311107582636797, "learning_rate": 1.7089357349146601e-06, "loss": 0.1548, "step": 10301 }, { "epoch": 0.74, "grad_norm": 1.2550265407593348, "learning_rate": 1.7080637395073813e-06, "loss": 0.1864, "step": 10302 }, { "epoch": 0.74, "grad_norm": 1.608291989965493, "learning_rate": 1.7071919207981607e-06, "loss": 0.2147, "step": 10303 }, { "epoch": 0.74, "grad_norm": 1.2457981051884262, "learning_rate": 1.7063202788337963e-06, "loss": 0.187, "step": 10304 }, { "epoch": 0.74, "grad_norm": 1.6505542947461438, "learning_rate": 1.705448813661076e-06, "loss": 0.199, "step": 10305 }, { "epoch": 0.74, "grad_norm": 1.402698715295302, "learning_rate": 1.7045775253267727e-06, "loss": 0.2253, "step": 10306 }, { "epoch": 0.74, "grad_norm": 1.3944299251041152, "learning_rate": 1.7037064138776576e-06, "loss": 0.1697, "step": 10307 }, { "epoch": 0.74, "grad_norm": 1.320449494207843, "learning_rate": 1.7028354793604857e-06, "loss": 0.1978, "step": 10308 }, { "epoch": 0.74, "grad_norm": 4.2040076986967385, "learning_rate": 1.7019647218220059e-06, "loss": 0.4548, "step": 10309 }, { "epoch": 0.74, "grad_norm": 1.0962414813264134, "learning_rate": 1.701094141308957e-06, "loss": 0.1384, "step": 10310 }, { "epoch": 0.74, "grad_norm": 1.4099362471718222, "learning_rate": 1.7002237378680708e-06, "loss": 0.1734, "step": 10311 }, { "epoch": 0.74, "grad_norm": 1.208247842365986, "learning_rate": 1.6993535115460635e-06, "loss": 0.1575, "step": 10312 }, { "epoch": 0.74, "grad_norm": 1.3832252743388607, "learning_rate": 1.6984834623896483e-06, "loss": 0.1902, "step": 10313 }, { "epoch": 0.74, "grad_norm": 1.3786921913391152, "learning_rate": 1.6976135904455233e-06, "loss": 0.1759, "step": 10314 }, { "epoch": 0.74, "grad_norm": 1.4218006978677453, "learning_rate": 1.6967438957603826e-06, "loss": 0.1885, "step": 10315 }, { "epoch": 0.74, "grad_norm": 1.2412303311716457, "learning_rate": 1.6958743783809052e-06, "loss": 0.1555, "step": 10316 }, { "epoch": 0.74, "grad_norm": 1.3721081873400627, "learning_rate": 1.6950050383537637e-06, "loss": 0.189, "step": 10317 }, { "epoch": 0.74, "grad_norm": 1.2036364596083167, "learning_rate": 1.694135875725622e-06, "loss": 0.1299, "step": 10318 }, { "epoch": 0.74, "grad_norm": 1.555898266235163, "learning_rate": 1.6932668905431348e-06, "loss": 0.2032, "step": 10319 }, { "epoch": 0.74, "grad_norm": 1.3598769649801314, "learning_rate": 1.6923980828529424e-06, "loss": 0.1924, "step": 10320 }, { "epoch": 0.74, "grad_norm": 1.2599647475452873, "learning_rate": 1.6915294527016818e-06, "loss": 0.1445, "step": 10321 }, { "epoch": 0.74, "grad_norm": 1.44036931483922, "learning_rate": 1.6906610001359752e-06, "loss": 0.1777, "step": 10322 }, { "epoch": 0.74, "grad_norm": 1.4131806100990976, "learning_rate": 1.68979272520244e-06, "loss": 0.1599, "step": 10323 }, { "epoch": 0.74, "grad_norm": 1.3798080389938432, "learning_rate": 1.688924627947679e-06, "loss": 0.2041, "step": 10324 }, { "epoch": 0.74, "grad_norm": 1.253565151013493, "learning_rate": 1.6880567084182897e-06, "loss": 0.167, "step": 10325 }, { "epoch": 0.74, "grad_norm": 1.329094527169436, "learning_rate": 1.6871889666608588e-06, "loss": 0.1661, "step": 10326 }, { "epoch": 0.74, "grad_norm": 1.3676643715970154, "learning_rate": 1.686321402721965e-06, "loss": 0.1678, "step": 10327 }, { "epoch": 0.74, "grad_norm": 1.270827932603055, "learning_rate": 1.6854540166481715e-06, "loss": 0.1737, "step": 10328 }, { "epoch": 0.74, "grad_norm": 1.3367757327490406, "learning_rate": 1.6845868084860406e-06, "loss": 0.2021, "step": 10329 }, { "epoch": 0.74, "grad_norm": 1.4222947180171932, "learning_rate": 1.6837197782821162e-06, "loss": 0.1991, "step": 10330 }, { "epoch": 0.74, "grad_norm": 1.3507261696911916, "learning_rate": 1.6828529260829412e-06, "loss": 0.1653, "step": 10331 }, { "epoch": 0.74, "grad_norm": 1.3268775317838808, "learning_rate": 1.681986251935041e-06, "loss": 0.2294, "step": 10332 }, { "epoch": 0.74, "grad_norm": 1.2657716807797372, "learning_rate": 1.6811197558849373e-06, "loss": 0.1466, "step": 10333 }, { "epoch": 0.74, "grad_norm": 1.2538562372520918, "learning_rate": 1.6802534379791396e-06, "loss": 0.162, "step": 10334 }, { "epoch": 0.74, "grad_norm": 1.2921727358750732, "learning_rate": 1.679387298264149e-06, "loss": 0.1629, "step": 10335 }, { "epoch": 0.74, "grad_norm": 1.3598057348711288, "learning_rate": 1.6785213367864572e-06, "loss": 0.1929, "step": 10336 }, { "epoch": 0.74, "grad_norm": 1.4359303726512576, "learning_rate": 1.6776555535925432e-06, "loss": 0.1891, "step": 10337 }, { "epoch": 0.74, "grad_norm": 1.3769709236279324, "learning_rate": 1.676789948728882e-06, "loss": 0.1732, "step": 10338 }, { "epoch": 0.74, "grad_norm": 5.2005371134326, "learning_rate": 1.6759245222419324e-06, "loss": 0.4455, "step": 10339 }, { "epoch": 0.74, "grad_norm": 1.3280111772057075, "learning_rate": 1.6750592741781496e-06, "loss": 0.1836, "step": 10340 }, { "epoch": 0.74, "grad_norm": 1.1821331067811789, "learning_rate": 1.6741942045839743e-06, "loss": 0.1546, "step": 10341 }, { "epoch": 0.74, "grad_norm": 1.2079398834993706, "learning_rate": 1.6733293135058416e-06, "loss": 0.1673, "step": 10342 }, { "epoch": 0.74, "grad_norm": 1.4146461877872094, "learning_rate": 1.6724646009901752e-06, "loss": 0.1928, "step": 10343 }, { "epoch": 0.74, "grad_norm": 1.211824029428611, "learning_rate": 1.6716000670833903e-06, "loss": 0.1573, "step": 10344 }, { "epoch": 0.74, "grad_norm": 1.4332298482835029, "learning_rate": 1.6707357118318901e-06, "loss": 0.184, "step": 10345 }, { "epoch": 0.74, "grad_norm": 1.473332589404938, "learning_rate": 1.6698715352820716e-06, "loss": 0.2126, "step": 10346 }, { "epoch": 0.74, "grad_norm": 1.5675267670883708, "learning_rate": 1.6690075374803178e-06, "loss": 0.1753, "step": 10347 }, { "epoch": 0.74, "grad_norm": 1.5720496604507306, "learning_rate": 1.6681437184730077e-06, "loss": 0.1912, "step": 10348 }, { "epoch": 0.74, "grad_norm": 1.3617973574152942, "learning_rate": 1.667280078306504e-06, "loss": 0.149, "step": 10349 }, { "epoch": 0.74, "grad_norm": 1.2774610620335403, "learning_rate": 1.6664166170271657e-06, "loss": 0.152, "step": 10350 }, { "epoch": 0.74, "grad_norm": 8.489654349738053, "learning_rate": 1.6655533346813397e-06, "loss": 0.5743, "step": 10351 }, { "epoch": 0.74, "grad_norm": 1.6098959885164814, "learning_rate": 1.6646902313153651e-06, "loss": 0.2221, "step": 10352 }, { "epoch": 0.74, "grad_norm": 1.2932019864784865, "learning_rate": 1.6638273069755672e-06, "loss": 0.2279, "step": 10353 }, { "epoch": 0.74, "grad_norm": 4.820790118026856, "learning_rate": 1.6629645617082668e-06, "loss": 0.6432, "step": 10354 }, { "epoch": 0.74, "grad_norm": 1.4448411440971078, "learning_rate": 1.66210199555977e-06, "loss": 0.1692, "step": 10355 }, { "epoch": 0.74, "grad_norm": 1.362823644086111, "learning_rate": 1.6612396085763794e-06, "loss": 0.2045, "step": 10356 }, { "epoch": 0.74, "grad_norm": 1.274987769968265, "learning_rate": 1.66037740080438e-06, "loss": 0.1654, "step": 10357 }, { "epoch": 0.74, "grad_norm": 1.3074232325559954, "learning_rate": 1.6595153722900542e-06, "loss": 0.1772, "step": 10358 }, { "epoch": 0.74, "grad_norm": 4.994467196821839, "learning_rate": 1.6586535230796725e-06, "loss": 0.5328, "step": 10359 }, { "epoch": 0.74, "grad_norm": 1.3008356056869474, "learning_rate": 1.657791853219497e-06, "loss": 0.1399, "step": 10360 }, { "epoch": 0.74, "grad_norm": 1.4040576030812788, "learning_rate": 1.6569303627557749e-06, "loss": 0.195, "step": 10361 }, { "epoch": 0.74, "grad_norm": 1.3179984547509473, "learning_rate": 1.656069051734751e-06, "loss": 0.1665, "step": 10362 }, { "epoch": 0.74, "grad_norm": 1.33828010953948, "learning_rate": 1.655207920202655e-06, "loss": 0.1868, "step": 10363 }, { "epoch": 0.74, "grad_norm": 1.5160046960303568, "learning_rate": 1.6543469682057105e-06, "loss": 0.185, "step": 10364 }, { "epoch": 0.74, "grad_norm": 5.638459050556768, "learning_rate": 1.6534861957901277e-06, "loss": 0.74, "step": 10365 }, { "epoch": 0.74, "grad_norm": 1.2348844798402445, "learning_rate": 1.6526256030021109e-06, "loss": 0.1411, "step": 10366 }, { "epoch": 0.74, "grad_norm": 1.374223300756354, "learning_rate": 1.6517651898878534e-06, "loss": 0.209, "step": 10367 }, { "epoch": 0.74, "grad_norm": 1.261322820189383, "learning_rate": 1.6509049564935387e-06, "loss": 0.1642, "step": 10368 }, { "epoch": 0.74, "grad_norm": 4.642022633800516, "learning_rate": 1.6500449028653425e-06, "loss": 0.4681, "step": 10369 }, { "epoch": 0.74, "grad_norm": 5.258272432285171, "learning_rate": 1.6491850290494255e-06, "loss": 0.705, "step": 10370 }, { "epoch": 0.74, "grad_norm": 1.424837576465882, "learning_rate": 1.6483253350919453e-06, "loss": 0.1635, "step": 10371 }, { "epoch": 0.74, "grad_norm": 7.793462889141143, "learning_rate": 1.6474658210390443e-06, "loss": 0.5877, "step": 10372 }, { "epoch": 0.74, "grad_norm": 1.4163291508486673, "learning_rate": 1.6466064869368609e-06, "loss": 0.173, "step": 10373 }, { "epoch": 0.74, "grad_norm": 1.4869653565432819, "learning_rate": 1.6457473328315171e-06, "loss": 0.1665, "step": 10374 }, { "epoch": 0.74, "grad_norm": 1.330769568187334, "learning_rate": 1.6448883587691311e-06, "loss": 0.1768, "step": 10375 }, { "epoch": 0.74, "grad_norm": 1.2075519557305774, "learning_rate": 1.6440295647958081e-06, "loss": 0.175, "step": 10376 }, { "epoch": 0.74, "grad_norm": 7.599522185607976, "learning_rate": 1.6431709509576477e-06, "loss": 0.5564, "step": 10377 }, { "epoch": 0.74, "grad_norm": 1.2939894682175557, "learning_rate": 1.6423125173007332e-06, "loss": 0.2022, "step": 10378 }, { "epoch": 0.74, "grad_norm": 1.2238600316114605, "learning_rate": 1.6414542638711445e-06, "loss": 0.1737, "step": 10379 }, { "epoch": 0.74, "grad_norm": 1.2107266696746273, "learning_rate": 1.640596190714947e-06, "loss": 0.1615, "step": 10380 }, { "epoch": 0.74, "grad_norm": 1.3345972625107967, "learning_rate": 1.639738297878201e-06, "loss": 0.177, "step": 10381 }, { "epoch": 0.74, "grad_norm": 4.981025231046975, "learning_rate": 1.6388805854069522e-06, "loss": 0.7038, "step": 10382 }, { "epoch": 0.74, "grad_norm": 1.4495144098321755, "learning_rate": 1.6380230533472402e-06, "loss": 0.1616, "step": 10383 }, { "epoch": 0.74, "grad_norm": 1.1676309135795562, "learning_rate": 1.6371657017450948e-06, "loss": 0.1523, "step": 10384 }, { "epoch": 0.74, "grad_norm": 7.23665370651643, "learning_rate": 1.636308530646536e-06, "loss": 0.4602, "step": 10385 }, { "epoch": 0.74, "grad_norm": 4.075335500028317, "learning_rate": 1.6354515400975702e-06, "loss": 0.6194, "step": 10386 }, { "epoch": 0.74, "grad_norm": 1.264501645165069, "learning_rate": 1.6345947301442011e-06, "loss": 0.1777, "step": 10387 }, { "epoch": 0.74, "grad_norm": 1.3274838100671293, "learning_rate": 1.633738100832415e-06, "loss": 0.1549, "step": 10388 }, { "epoch": 0.74, "grad_norm": 1.229808024579589, "learning_rate": 1.6328816522081958e-06, "loss": 0.1597, "step": 10389 }, { "epoch": 0.74, "grad_norm": 1.2499662371302345, "learning_rate": 1.632025384317511e-06, "loss": 0.1423, "step": 10390 }, { "epoch": 0.74, "grad_norm": 1.275561075677841, "learning_rate": 1.6311692972063254e-06, "loss": 0.1485, "step": 10391 }, { "epoch": 0.74, "grad_norm": 1.3483622186384292, "learning_rate": 1.6303133909205859e-06, "loss": 0.158, "step": 10392 }, { "epoch": 0.74, "grad_norm": 5.173664842272985, "learning_rate": 1.6294576655062372e-06, "loss": 0.536, "step": 10393 }, { "epoch": 0.74, "grad_norm": 1.140890022534197, "learning_rate": 1.6286021210092101e-06, "loss": 0.1635, "step": 10394 }, { "epoch": 0.74, "grad_norm": 1.48083233380804, "learning_rate": 1.627746757475429e-06, "loss": 0.2016, "step": 10395 }, { "epoch": 0.74, "grad_norm": 1.4528764647578005, "learning_rate": 1.6268915749508036e-06, "loss": 0.1957, "step": 10396 }, { "epoch": 0.74, "grad_norm": 1.4743110767665601, "learning_rate": 1.626036573481239e-06, "loss": 0.2009, "step": 10397 }, { "epoch": 0.74, "grad_norm": 1.2925835214429204, "learning_rate": 1.6251817531126252e-06, "loss": 0.14, "step": 10398 }, { "epoch": 0.74, "grad_norm": 1.2749657033878894, "learning_rate": 1.6243271138908479e-06, "loss": 0.1618, "step": 10399 }, { "epoch": 0.74, "grad_norm": 1.4880029236402061, "learning_rate": 1.623472655861782e-06, "loss": 0.2059, "step": 10400 }, { "epoch": 0.74, "grad_norm": 1.4890311812433514, "learning_rate": 1.6226183790712874e-06, "loss": 0.2206, "step": 10401 }, { "epoch": 0.74, "grad_norm": 1.407354880385227, "learning_rate": 1.6217642835652215e-06, "loss": 0.1858, "step": 10402 }, { "epoch": 0.74, "grad_norm": 1.2731865191454874, "learning_rate": 1.6209103693894275e-06, "loss": 0.1547, "step": 10403 }, { "epoch": 0.74, "grad_norm": 1.1739939690333454, "learning_rate": 1.6200566365897414e-06, "loss": 0.1573, "step": 10404 }, { "epoch": 0.74, "grad_norm": 1.2864779234869765, "learning_rate": 1.619203085211986e-06, "loss": 0.1636, "step": 10405 }, { "epoch": 0.74, "grad_norm": 1.3716415650105631, "learning_rate": 1.61834971530198e-06, "loss": 0.2044, "step": 10406 }, { "epoch": 0.74, "grad_norm": 1.1904280828200353, "learning_rate": 1.6174965269055242e-06, "loss": 0.1591, "step": 10407 }, { "epoch": 0.74, "grad_norm": 1.1772613245249952, "learning_rate": 1.6166435200684193e-06, "loss": 0.1407, "step": 10408 }, { "epoch": 0.74, "grad_norm": 1.381330590843818, "learning_rate": 1.6157906948364471e-06, "loss": 0.2043, "step": 10409 }, { "epoch": 0.74, "grad_norm": 1.2767343679928234, "learning_rate": 1.6149380512553854e-06, "loss": 0.1561, "step": 10410 }, { "epoch": 0.74, "grad_norm": 1.272197621078242, "learning_rate": 1.6140855893710005e-06, "loss": 0.1446, "step": 10411 }, { "epoch": 0.74, "grad_norm": 1.3346564994831331, "learning_rate": 1.613233309229052e-06, "loss": 0.1729, "step": 10412 }, { "epoch": 0.74, "grad_norm": 1.2638352032189408, "learning_rate": 1.6123812108752828e-06, "loss": 0.1919, "step": 10413 }, { "epoch": 0.74, "grad_norm": 1.2074662517025514, "learning_rate": 1.6115292943554334e-06, "loss": 0.1455, "step": 10414 }, { "epoch": 0.74, "grad_norm": 1.265967151498462, "learning_rate": 1.610677559715228e-06, "loss": 0.1365, "step": 10415 }, { "epoch": 0.75, "grad_norm": 4.7277266145406465, "learning_rate": 1.6098260070003884e-06, "loss": 0.5701, "step": 10416 }, { "epoch": 0.75, "grad_norm": 1.3827494922084411, "learning_rate": 1.6089746362566182e-06, "loss": 0.1782, "step": 10417 }, { "epoch": 0.75, "grad_norm": 1.2560205681383758, "learning_rate": 1.6081234475296177e-06, "loss": 0.1853, "step": 10418 }, { "epoch": 0.75, "grad_norm": 1.1818173436187212, "learning_rate": 1.6072724408650752e-06, "loss": 0.1345, "step": 10419 }, { "epoch": 0.75, "grad_norm": 1.3522643277313509, "learning_rate": 1.6064216163086716e-06, "loss": 0.1495, "step": 10420 }, { "epoch": 0.75, "grad_norm": 1.6086104272864234, "learning_rate": 1.6055709739060715e-06, "loss": 0.2019, "step": 10421 }, { "epoch": 0.75, "grad_norm": 1.2061845364093557, "learning_rate": 1.604720513702938e-06, "loss": 0.1495, "step": 10422 }, { "epoch": 0.75, "grad_norm": 1.2344255580758596, "learning_rate": 1.6038702357449171e-06, "loss": 0.1545, "step": 10423 }, { "epoch": 0.75, "grad_norm": 1.383317539515382, "learning_rate": 1.6030201400776513e-06, "loss": 0.1577, "step": 10424 }, { "epoch": 0.75, "grad_norm": 5.422823544165772, "learning_rate": 1.602170226746767e-06, "loss": 0.5171, "step": 10425 }, { "epoch": 0.75, "grad_norm": 1.1964492058917868, "learning_rate": 1.6013204957978868e-06, "loss": 0.1762, "step": 10426 }, { "epoch": 0.75, "grad_norm": 1.2923217032177687, "learning_rate": 1.6004709472766195e-06, "loss": 0.1564, "step": 10427 }, { "epoch": 0.75, "grad_norm": 1.388750144845829, "learning_rate": 1.5996215812285682e-06, "loss": 0.1675, "step": 10428 }, { "epoch": 0.75, "grad_norm": 1.482674110094329, "learning_rate": 1.598772397699319e-06, "loss": 0.1701, "step": 10429 }, { "epoch": 0.75, "grad_norm": 1.2153599147872247, "learning_rate": 1.5979233967344575e-06, "loss": 0.1948, "step": 10430 }, { "epoch": 0.75, "grad_norm": 1.5015225829071586, "learning_rate": 1.5970745783795504e-06, "loss": 0.1913, "step": 10431 }, { "epoch": 0.75, "grad_norm": 1.260102406718699, "learning_rate": 1.5962259426801613e-06, "loss": 0.1497, "step": 10432 }, { "epoch": 0.75, "grad_norm": 1.3919105355351218, "learning_rate": 1.5953774896818424e-06, "loss": 0.1771, "step": 10433 }, { "epoch": 0.75, "grad_norm": 6.3817853805002125, "learning_rate": 1.5945292194301326e-06, "loss": 0.5207, "step": 10434 }, { "epoch": 0.75, "grad_norm": 1.309524487955066, "learning_rate": 1.5936811319705648e-06, "loss": 0.1847, "step": 10435 }, { "epoch": 0.75, "grad_norm": 1.4364693382993523, "learning_rate": 1.5928332273486614e-06, "loss": 0.2253, "step": 10436 }, { "epoch": 0.75, "grad_norm": 1.4914262836960828, "learning_rate": 1.591985505609936e-06, "loss": 0.1748, "step": 10437 }, { "epoch": 0.75, "grad_norm": 1.3144699714126866, "learning_rate": 1.5911379667998877e-06, "loss": 0.1688, "step": 10438 }, { "epoch": 0.75, "grad_norm": 5.863829595753437, "learning_rate": 1.590290610964012e-06, "loss": 0.4636, "step": 10439 }, { "epoch": 0.75, "grad_norm": 1.4717114721475364, "learning_rate": 1.589443438147789e-06, "loss": 0.1958, "step": 10440 }, { "epoch": 0.75, "grad_norm": 1.3174063206379354, "learning_rate": 1.5885964483966943e-06, "loss": 0.1531, "step": 10441 }, { "epoch": 0.75, "grad_norm": 1.3179528723446643, "learning_rate": 1.5877496417561876e-06, "loss": 0.1474, "step": 10442 }, { "epoch": 0.75, "grad_norm": 1.5442161100087044, "learning_rate": 1.5869030182717243e-06, "loss": 0.1988, "step": 10443 }, { "epoch": 0.75, "grad_norm": 6.037153163028543, "learning_rate": 1.5860565779887478e-06, "loss": 0.6036, "step": 10444 }, { "epoch": 0.75, "grad_norm": 1.1890610143495717, "learning_rate": 1.585210320952692e-06, "loss": 0.153, "step": 10445 }, { "epoch": 0.75, "grad_norm": 1.2866077585034341, "learning_rate": 1.5843642472089794e-06, "loss": 0.1791, "step": 10446 }, { "epoch": 0.75, "grad_norm": 1.1838220178068828, "learning_rate": 1.5835183568030254e-06, "loss": 0.1586, "step": 10447 }, { "epoch": 0.75, "grad_norm": 1.2007826506998083, "learning_rate": 1.5826726497802314e-06, "loss": 0.1601, "step": 10448 }, { "epoch": 0.75, "grad_norm": 1.3296838973276808, "learning_rate": 1.5818271261859946e-06, "loss": 0.1498, "step": 10449 }, { "epoch": 0.75, "grad_norm": 1.4753703581185218, "learning_rate": 1.5809817860656967e-06, "loss": 0.1605, "step": 10450 }, { "epoch": 0.75, "grad_norm": 5.542361186576464, "learning_rate": 1.5801366294647135e-06, "loss": 0.4413, "step": 10451 }, { "epoch": 0.75, "grad_norm": 1.2720675829811852, "learning_rate": 1.57929165642841e-06, "loss": 0.1525, "step": 10452 }, { "epoch": 0.75, "grad_norm": 1.493673222914656, "learning_rate": 1.5784468670021418e-06, "loss": 0.1571, "step": 10453 }, { "epoch": 0.75, "grad_norm": 6.923016990517658, "learning_rate": 1.5776022612312514e-06, "loss": 0.4507, "step": 10454 }, { "epoch": 0.75, "grad_norm": 1.4485115577611092, "learning_rate": 1.576757839161076e-06, "loss": 0.1601, "step": 10455 }, { "epoch": 0.75, "grad_norm": 1.3753279095893713, "learning_rate": 1.575913600836939e-06, "loss": 0.1764, "step": 10456 }, { "epoch": 0.75, "grad_norm": 1.3441596059211236, "learning_rate": 1.5750695463041581e-06, "loss": 0.1552, "step": 10457 }, { "epoch": 0.75, "grad_norm": 1.3578550107654128, "learning_rate": 1.574225675608036e-06, "loss": 0.1516, "step": 10458 }, { "epoch": 0.75, "grad_norm": 1.2782773816434025, "learning_rate": 1.5733819887938701e-06, "loss": 0.1706, "step": 10459 }, { "epoch": 0.75, "grad_norm": 1.270020650996983, "learning_rate": 1.5725384859069454e-06, "loss": 0.151, "step": 10460 }, { "epoch": 0.75, "grad_norm": 1.4095682848122812, "learning_rate": 1.5716951669925396e-06, "loss": 0.1764, "step": 10461 }, { "epoch": 0.75, "grad_norm": 1.2710444153630065, "learning_rate": 1.5708520320959158e-06, "loss": 0.1885, "step": 10462 }, { "epoch": 0.75, "grad_norm": 1.5947326455532704, "learning_rate": 1.570009081262332e-06, "loss": 0.1948, "step": 10463 }, { "epoch": 0.75, "grad_norm": 1.4312996145327896, "learning_rate": 1.5691663145370351e-06, "loss": 0.213, "step": 10464 }, { "epoch": 0.75, "grad_norm": 1.2940615763119767, "learning_rate": 1.5683237319652594e-06, "loss": 0.1972, "step": 10465 }, { "epoch": 0.75, "grad_norm": 1.4541562881732073, "learning_rate": 1.567481333592234e-06, "loss": 0.1939, "step": 10466 }, { "epoch": 0.75, "grad_norm": 1.4593776333657507, "learning_rate": 1.566639119463172e-06, "loss": 0.1807, "step": 10467 }, { "epoch": 0.75, "grad_norm": 7.2427895766204, "learning_rate": 1.5657970896232822e-06, "loss": 0.5081, "step": 10468 }, { "epoch": 0.75, "grad_norm": 1.2644237451651799, "learning_rate": 1.5649552441177612e-06, "loss": 0.1191, "step": 10469 }, { "epoch": 0.75, "grad_norm": 1.37473850002062, "learning_rate": 1.5641135829917976e-06, "loss": 0.1958, "step": 10470 }, { "epoch": 0.75, "grad_norm": 1.4481417203664921, "learning_rate": 1.5632721062905653e-06, "loss": 0.2231, "step": 10471 }, { "epoch": 0.75, "grad_norm": 1.3610888233221126, "learning_rate": 1.5624308140592347e-06, "loss": 0.1284, "step": 10472 }, { "epoch": 0.75, "grad_norm": 1.2721491562571627, "learning_rate": 1.5615897063429591e-06, "loss": 0.1354, "step": 10473 }, { "epoch": 0.75, "grad_norm": 1.4442291585209646, "learning_rate": 1.56074878318689e-06, "loss": 0.1504, "step": 10474 }, { "epoch": 0.75, "grad_norm": 1.5515207498420296, "learning_rate": 1.5599080446361614e-06, "loss": 0.1822, "step": 10475 }, { "epoch": 0.75, "grad_norm": 1.3109183797055048, "learning_rate": 1.559067490735902e-06, "loss": 0.1728, "step": 10476 }, { "epoch": 0.75, "grad_norm": 1.2683373792990822, "learning_rate": 1.5582271215312294e-06, "loss": 0.154, "step": 10477 }, { "epoch": 0.75, "grad_norm": 4.618002043239327, "learning_rate": 1.5573869370672534e-06, "loss": 0.6047, "step": 10478 }, { "epoch": 0.75, "grad_norm": 1.4837638178701218, "learning_rate": 1.556546937389068e-06, "loss": 0.1554, "step": 10479 }, { "epoch": 0.75, "grad_norm": 1.3536498608784664, "learning_rate": 1.5557071225417648e-06, "loss": 0.1473, "step": 10480 }, { "epoch": 0.75, "grad_norm": 1.4980320228970405, "learning_rate": 1.5548674925704178e-06, "loss": 0.1948, "step": 10481 }, { "epoch": 0.75, "grad_norm": 1.439382566889818, "learning_rate": 1.5540280475200986e-06, "loss": 0.172, "step": 10482 }, { "epoch": 0.75, "grad_norm": 1.2071957810521379, "learning_rate": 1.5531887874358625e-06, "loss": 0.1409, "step": 10483 }, { "epoch": 0.75, "grad_norm": 1.3297630809915868, "learning_rate": 1.5523497123627584e-06, "loss": 0.1879, "step": 10484 }, { "epoch": 0.75, "grad_norm": 4.760363322440483, "learning_rate": 1.5515108223458253e-06, "loss": 0.4804, "step": 10485 }, { "epoch": 0.75, "grad_norm": 1.3020760278978654, "learning_rate": 1.5506721174300926e-06, "loss": 0.1717, "step": 10486 }, { "epoch": 0.75, "grad_norm": 6.050288118512901, "learning_rate": 1.5498335976605756e-06, "loss": 0.6893, "step": 10487 }, { "epoch": 0.75, "grad_norm": 1.2991265476024951, "learning_rate": 1.5489952630822858e-06, "loss": 0.1798, "step": 10488 }, { "epoch": 0.75, "grad_norm": 1.1791477022383148, "learning_rate": 1.5481571137402185e-06, "loss": 0.1594, "step": 10489 }, { "epoch": 0.75, "grad_norm": 1.2620432561465613, "learning_rate": 1.547319149679366e-06, "loss": 0.1463, "step": 10490 }, { "epoch": 0.75, "grad_norm": 1.415792197423433, "learning_rate": 1.5464813709447023e-06, "loss": 0.1966, "step": 10491 }, { "epoch": 0.75, "grad_norm": 1.3312179585275428, "learning_rate": 1.5456437775812011e-06, "loss": 0.1569, "step": 10492 }, { "epoch": 0.75, "grad_norm": 1.476917509438233, "learning_rate": 1.544806369633816e-06, "loss": 0.2036, "step": 10493 }, { "epoch": 0.75, "grad_norm": 1.342948707354769, "learning_rate": 1.5439691471474989e-06, "loss": 0.1675, "step": 10494 }, { "epoch": 0.75, "grad_norm": 1.3633698369517413, "learning_rate": 1.5431321101671875e-06, "loss": 0.1877, "step": 10495 }, { "epoch": 0.75, "grad_norm": 1.411179893935012, "learning_rate": 1.5422952587378114e-06, "loss": 0.1906, "step": 10496 }, { "epoch": 0.75, "grad_norm": 1.3983821057002455, "learning_rate": 1.5414585929042902e-06, "loss": 0.1592, "step": 10497 }, { "epoch": 0.75, "grad_norm": 1.5457220171447756, "learning_rate": 1.5406221127115306e-06, "loss": 0.1709, "step": 10498 }, { "epoch": 0.75, "grad_norm": 1.59649189888967, "learning_rate": 1.539785818204434e-06, "loss": 0.188, "step": 10499 }, { "epoch": 0.75, "grad_norm": 1.3857419513300129, "learning_rate": 1.5389497094278861e-06, "loss": 0.1785, "step": 10500 }, { "epoch": 0.75, "grad_norm": 1.453495273364983, "learning_rate": 1.5381137864267693e-06, "loss": 0.189, "step": 10501 }, { "epoch": 0.75, "grad_norm": 1.3588141733003223, "learning_rate": 1.53727804924595e-06, "loss": 0.202, "step": 10502 }, { "epoch": 0.75, "grad_norm": 1.2225155290765854, "learning_rate": 1.5364424979302877e-06, "loss": 0.1453, "step": 10503 }, { "epoch": 0.75, "grad_norm": 1.4257958323614714, "learning_rate": 1.535607132524633e-06, "loss": 0.1739, "step": 10504 }, { "epoch": 0.75, "grad_norm": 1.8002157627992004, "learning_rate": 1.5347719530738254e-06, "loss": 0.2025, "step": 10505 }, { "epoch": 0.75, "grad_norm": 1.1187456986922084, "learning_rate": 1.5339369596226905e-06, "loss": 0.1709, "step": 10506 }, { "epoch": 0.75, "grad_norm": 1.092893006757683, "learning_rate": 1.5331021522160522e-06, "loss": 0.1246, "step": 10507 }, { "epoch": 0.75, "grad_norm": 1.3410714646742177, "learning_rate": 1.532267530898715e-06, "loss": 0.1605, "step": 10508 }, { "epoch": 0.75, "grad_norm": 1.264595904732522, "learning_rate": 1.5314330957154817e-06, "loss": 0.1605, "step": 10509 }, { "epoch": 0.75, "grad_norm": 1.262591230676526, "learning_rate": 1.5305988467111388e-06, "loss": 0.1793, "step": 10510 }, { "epoch": 0.75, "grad_norm": 1.4103210248021236, "learning_rate": 1.5297647839304663e-06, "loss": 0.172, "step": 10511 }, { "epoch": 0.75, "grad_norm": 1.3350509223522926, "learning_rate": 1.5289309074182334e-06, "loss": 0.1702, "step": 10512 }, { "epoch": 0.75, "grad_norm": 1.4154796423185076, "learning_rate": 1.5280972172192015e-06, "loss": 0.1882, "step": 10513 }, { "epoch": 0.75, "grad_norm": 7.650132589344743, "learning_rate": 1.5272637133781159e-06, "loss": 0.6434, "step": 10514 }, { "epoch": 0.75, "grad_norm": 1.3873754900927415, "learning_rate": 1.5264303959397197e-06, "loss": 0.1687, "step": 10515 }, { "epoch": 0.75, "grad_norm": 1.190551666666915, "learning_rate": 1.5255972649487388e-06, "loss": 0.1408, "step": 10516 }, { "epoch": 0.75, "grad_norm": 1.1721007522357072, "learning_rate": 1.5247643204498947e-06, "loss": 0.1942, "step": 10517 }, { "epoch": 0.75, "grad_norm": 1.3432918007970127, "learning_rate": 1.5239315624878942e-06, "loss": 0.1623, "step": 10518 }, { "epoch": 0.75, "grad_norm": 1.3498411248222175, "learning_rate": 1.5230989911074374e-06, "loss": 0.1597, "step": 10519 }, { "epoch": 0.75, "grad_norm": 1.3232695452513834, "learning_rate": 1.5222666063532138e-06, "loss": 0.185, "step": 10520 }, { "epoch": 0.75, "grad_norm": 1.3858168196914384, "learning_rate": 1.5214344082699045e-06, "loss": 0.1722, "step": 10521 }, { "epoch": 0.75, "grad_norm": 1.4316784861820746, "learning_rate": 1.5206023969021743e-06, "loss": 0.1655, "step": 10522 }, { "epoch": 0.75, "grad_norm": 1.2812976406998857, "learning_rate": 1.5197705722946866e-06, "loss": 0.1169, "step": 10523 }, { "epoch": 0.75, "grad_norm": 4.812609794364607, "learning_rate": 1.518938934492087e-06, "loss": 0.5593, "step": 10524 }, { "epoch": 0.75, "grad_norm": 1.549235289607002, "learning_rate": 1.5181074835390169e-06, "loss": 0.1642, "step": 10525 }, { "epoch": 0.75, "grad_norm": 7.605472880724829, "learning_rate": 1.5172762194801028e-06, "loss": 0.6602, "step": 10526 }, { "epoch": 0.75, "grad_norm": 4.831231461324541, "learning_rate": 1.5164451423599651e-06, "loss": 0.6236, "step": 10527 }, { "epoch": 0.75, "grad_norm": 1.5725697854023923, "learning_rate": 1.515614252223213e-06, "loss": 0.158, "step": 10528 }, { "epoch": 0.75, "grad_norm": 1.2503000998528866, "learning_rate": 1.5147835491144448e-06, "loss": 0.1592, "step": 10529 }, { "epoch": 0.75, "grad_norm": 1.370360427676737, "learning_rate": 1.5139530330782515e-06, "loss": 0.1722, "step": 10530 }, { "epoch": 0.75, "grad_norm": 1.2886671493011164, "learning_rate": 1.5131227041592083e-06, "loss": 0.1418, "step": 10531 }, { "epoch": 0.75, "grad_norm": 1.3157957867042849, "learning_rate": 1.5122925624018875e-06, "loss": 0.1596, "step": 10532 }, { "epoch": 0.75, "grad_norm": 1.4251297502743585, "learning_rate": 1.5114626078508442e-06, "loss": 0.1993, "step": 10533 }, { "epoch": 0.75, "grad_norm": 1.1608913134696162, "learning_rate": 1.5106328405506304e-06, "loss": 0.1451, "step": 10534 }, { "epoch": 0.75, "grad_norm": 1.46126676309415, "learning_rate": 1.5098032605457818e-06, "loss": 0.1524, "step": 10535 }, { "epoch": 0.75, "grad_norm": 1.5662206507480947, "learning_rate": 1.5089738678808285e-06, "loss": 0.1986, "step": 10536 }, { "epoch": 0.75, "grad_norm": 1.2158037939564321, "learning_rate": 1.5081446626002889e-06, "loss": 0.1318, "step": 10537 }, { "epoch": 0.75, "grad_norm": 1.6869950033723824, "learning_rate": 1.5073156447486725e-06, "loss": 0.194, "step": 10538 }, { "epoch": 0.75, "grad_norm": 1.1966204025155878, "learning_rate": 1.5064868143704752e-06, "loss": 0.1765, "step": 10539 }, { "epoch": 0.75, "grad_norm": 1.45471694700186, "learning_rate": 1.5056581715101887e-06, "loss": 0.2049, "step": 10540 }, { "epoch": 0.75, "grad_norm": 1.3567814666740656, "learning_rate": 1.5048297162122872e-06, "loss": 0.1567, "step": 10541 }, { "epoch": 0.75, "grad_norm": 1.3143219045746624, "learning_rate": 1.5040014485212428e-06, "loss": 0.1848, "step": 10542 }, { "epoch": 0.75, "grad_norm": 1.4288827634511303, "learning_rate": 1.5031733684815103e-06, "loss": 0.1936, "step": 10543 }, { "epoch": 0.75, "grad_norm": 1.330689146820718, "learning_rate": 1.5023454761375389e-06, "loss": 0.188, "step": 10544 }, { "epoch": 0.75, "grad_norm": 1.5496218307316194, "learning_rate": 1.501517771533767e-06, "loss": 0.1906, "step": 10545 }, { "epoch": 0.75, "grad_norm": 4.712509488714706, "learning_rate": 1.500690254714624e-06, "loss": 0.6921, "step": 10546 }, { "epoch": 0.75, "grad_norm": 1.5104770457518657, "learning_rate": 1.4998629257245246e-06, "loss": 0.1742, "step": 10547 }, { "epoch": 0.75, "grad_norm": 1.213206683677835, "learning_rate": 1.4990357846078795e-06, "loss": 0.1569, "step": 10548 }, { "epoch": 0.75, "grad_norm": 1.3278586200749611, "learning_rate": 1.498208831409083e-06, "loss": 0.1623, "step": 10549 }, { "epoch": 0.75, "grad_norm": 4.374103606124417, "learning_rate": 1.4973820661725269e-06, "loss": 0.5857, "step": 10550 }, { "epoch": 0.75, "grad_norm": 1.3249415078196536, "learning_rate": 1.4965554889425843e-06, "loss": 0.1547, "step": 10551 }, { "epoch": 0.75, "grad_norm": 1.4391646263227567, "learning_rate": 1.4957290997636247e-06, "loss": 0.2013, "step": 10552 }, { "epoch": 0.75, "grad_norm": 1.2289012474443257, "learning_rate": 1.4949028986800053e-06, "loss": 0.1462, "step": 10553 }, { "epoch": 0.75, "grad_norm": 1.4348550957699242, "learning_rate": 1.4940768857360743e-06, "loss": 0.1647, "step": 10554 }, { "epoch": 0.76, "grad_norm": 1.567119717207712, "learning_rate": 1.4932510609761668e-06, "loss": 0.2338, "step": 10555 }, { "epoch": 0.76, "grad_norm": 1.4374220379260532, "learning_rate": 1.4924254244446123e-06, "loss": 0.1661, "step": 10556 }, { "epoch": 0.76, "grad_norm": 1.2225683188446723, "learning_rate": 1.4915999761857246e-06, "loss": 0.1622, "step": 10557 }, { "epoch": 0.76, "grad_norm": 1.207388201889036, "learning_rate": 1.4907747162438135e-06, "loss": 0.1722, "step": 10558 }, { "epoch": 0.76, "grad_norm": 1.3134720054135238, "learning_rate": 1.4899496446631728e-06, "loss": 0.1469, "step": 10559 }, { "epoch": 0.76, "grad_norm": 1.3548453884450626, "learning_rate": 1.4891247614880904e-06, "loss": 0.1413, "step": 10560 }, { "epoch": 0.76, "grad_norm": 1.246687918094449, "learning_rate": 1.4883000667628428e-06, "loss": 0.197, "step": 10561 }, { "epoch": 0.76, "grad_norm": 1.5803243777354625, "learning_rate": 1.4874755605316966e-06, "loss": 0.1817, "step": 10562 }, { "epoch": 0.76, "grad_norm": 1.5371401992368383, "learning_rate": 1.486651242838909e-06, "loss": 0.1596, "step": 10563 }, { "epoch": 0.76, "grad_norm": 1.3902733442039896, "learning_rate": 1.4858271137287234e-06, "loss": 0.1818, "step": 10564 }, { "epoch": 0.76, "grad_norm": 1.3611509376914028, "learning_rate": 1.485003173245379e-06, "loss": 0.1703, "step": 10565 }, { "epoch": 0.76, "grad_norm": 1.3726066295824362, "learning_rate": 1.4841794214330984e-06, "loss": 0.1546, "step": 10566 }, { "epoch": 0.76, "grad_norm": 1.2509577715646216, "learning_rate": 1.4833558583361002e-06, "loss": 0.1719, "step": 10567 }, { "epoch": 0.76, "grad_norm": 1.3060200520189311, "learning_rate": 1.4825324839985872e-06, "loss": 0.1803, "step": 10568 }, { "epoch": 0.76, "grad_norm": 1.1927775072944458, "learning_rate": 1.4817092984647564e-06, "loss": 0.1313, "step": 10569 }, { "epoch": 0.76, "grad_norm": 1.391061581669175, "learning_rate": 1.4808863017787928e-06, "loss": 0.1509, "step": 10570 }, { "epoch": 0.76, "grad_norm": 1.1670381189334535, "learning_rate": 1.4800634939848734e-06, "loss": 0.1616, "step": 10571 }, { "epoch": 0.76, "grad_norm": 1.3584365611697602, "learning_rate": 1.4792408751271603e-06, "loss": 0.1499, "step": 10572 }, { "epoch": 0.76, "grad_norm": 1.2825827495983173, "learning_rate": 1.4784184452498112e-06, "loss": 0.1795, "step": 10573 }, { "epoch": 0.76, "grad_norm": 5.316344244296368, "learning_rate": 1.4775962043969678e-06, "loss": 0.4989, "step": 10574 }, { "epoch": 0.76, "grad_norm": 6.370991733439597, "learning_rate": 1.4767741526127682e-06, "loss": 0.6052, "step": 10575 }, { "epoch": 0.76, "grad_norm": 8.636132891095267, "learning_rate": 1.475952289941333e-06, "loss": 0.4593, "step": 10576 }, { "epoch": 0.76, "grad_norm": 1.3060210390871034, "learning_rate": 1.4751306164267792e-06, "loss": 0.1702, "step": 10577 }, { "epoch": 0.76, "grad_norm": 1.3664071629583912, "learning_rate": 1.4743091321132103e-06, "loss": 0.1965, "step": 10578 }, { "epoch": 0.76, "grad_norm": 1.1782094955392424, "learning_rate": 1.4734878370447214e-06, "loss": 0.122, "step": 10579 }, { "epoch": 0.76, "grad_norm": 1.2213035165067083, "learning_rate": 1.472666731265394e-06, "loss": 0.1713, "step": 10580 }, { "epoch": 0.76, "grad_norm": 1.4007137884037673, "learning_rate": 1.471845814819305e-06, "loss": 0.1357, "step": 10581 }, { "epoch": 0.76, "grad_norm": 1.398630310099093, "learning_rate": 1.471025087750514e-06, "loss": 0.1759, "step": 10582 }, { "epoch": 0.76, "grad_norm": 5.374612275955994, "learning_rate": 1.470204550103078e-06, "loss": 0.4612, "step": 10583 }, { "epoch": 0.76, "grad_norm": 1.3737434647516604, "learning_rate": 1.4693842019210374e-06, "loss": 0.1987, "step": 10584 }, { "epoch": 0.76, "grad_norm": 1.4919742294220693, "learning_rate": 1.4685640432484283e-06, "loss": 0.179, "step": 10585 }, { "epoch": 0.76, "grad_norm": 1.2144052809144879, "learning_rate": 1.4677440741292682e-06, "loss": 0.1604, "step": 10586 }, { "epoch": 0.76, "grad_norm": 1.230132487344419, "learning_rate": 1.4669242946075762e-06, "loss": 0.1593, "step": 10587 }, { "epoch": 0.76, "grad_norm": 1.2755902067388574, "learning_rate": 1.4661047047273503e-06, "loss": 0.1782, "step": 10588 }, { "epoch": 0.76, "grad_norm": 5.2034993819031525, "learning_rate": 1.4652853045325866e-06, "loss": 0.6184, "step": 10589 }, { "epoch": 0.76, "grad_norm": 1.2600635159021931, "learning_rate": 1.4644660940672628e-06, "loss": 0.1534, "step": 10590 }, { "epoch": 0.76, "grad_norm": 1.3559647041955567, "learning_rate": 1.4636470733753534e-06, "loss": 0.198, "step": 10591 }, { "epoch": 0.76, "grad_norm": 1.4056354987457755, "learning_rate": 1.4628282425008217e-06, "loss": 0.178, "step": 10592 }, { "epoch": 0.76, "grad_norm": 1.3898302578419468, "learning_rate": 1.4620096014876156e-06, "loss": 0.1866, "step": 10593 }, { "epoch": 0.76, "grad_norm": 1.382559226409949, "learning_rate": 1.4611911503796806e-06, "loss": 0.1899, "step": 10594 }, { "epoch": 0.76, "grad_norm": 1.2286515797178907, "learning_rate": 1.4603728892209418e-06, "loss": 0.1618, "step": 10595 }, { "epoch": 0.76, "grad_norm": 1.1764502219829687, "learning_rate": 1.4595548180553275e-06, "loss": 0.1398, "step": 10596 }, { "epoch": 0.76, "grad_norm": 1.317049685017896, "learning_rate": 1.4587369369267435e-06, "loss": 0.188, "step": 10597 }, { "epoch": 0.76, "grad_norm": 1.2055789463948723, "learning_rate": 1.4579192458790937e-06, "loss": 0.1763, "step": 10598 }, { "epoch": 0.76, "grad_norm": 1.401903892202455, "learning_rate": 1.4571017449562652e-06, "loss": 0.1449, "step": 10599 }, { "epoch": 0.76, "grad_norm": 1.1100904088419075, "learning_rate": 1.456284434202142e-06, "loss": 0.1224, "step": 10600 }, { "epoch": 0.76, "grad_norm": 1.2103193996723656, "learning_rate": 1.4554673136605896e-06, "loss": 0.1722, "step": 10601 }, { "epoch": 0.76, "grad_norm": 1.3886078014208094, "learning_rate": 1.454650383375472e-06, "loss": 0.1972, "step": 10602 }, { "epoch": 0.76, "grad_norm": 1.4483303317922087, "learning_rate": 1.4538336433906352e-06, "loss": 0.1956, "step": 10603 }, { "epoch": 0.76, "grad_norm": 1.5776078396865971, "learning_rate": 1.4530170937499206e-06, "loss": 0.2079, "step": 10604 }, { "epoch": 0.76, "grad_norm": 6.532278756700247, "learning_rate": 1.4522007344971566e-06, "loss": 0.7007, "step": 10605 }, { "epoch": 0.76, "grad_norm": 1.274398674317184, "learning_rate": 1.4513845656761644e-06, "loss": 0.1568, "step": 10606 }, { "epoch": 0.76, "grad_norm": 1.3929696665481452, "learning_rate": 1.450568587330749e-06, "loss": 0.2015, "step": 10607 }, { "epoch": 0.76, "grad_norm": 6.05913217738532, "learning_rate": 1.4497527995047123e-06, "loss": 0.7864, "step": 10608 }, { "epoch": 0.76, "grad_norm": 1.2535824518215695, "learning_rate": 1.448937202241839e-06, "loss": 0.178, "step": 10609 }, { "epoch": 0.76, "grad_norm": 1.4345734708213747, "learning_rate": 1.4481217955859112e-06, "loss": 0.1528, "step": 10610 }, { "epoch": 0.76, "grad_norm": 1.4706450842209635, "learning_rate": 1.447306579580693e-06, "loss": 0.2059, "step": 10611 }, { "epoch": 0.76, "grad_norm": 1.2951598554853498, "learning_rate": 1.4464915542699432e-06, "loss": 0.157, "step": 10612 }, { "epoch": 0.76, "grad_norm": 1.3911767722779842, "learning_rate": 1.4456767196974098e-06, "loss": 0.1437, "step": 10613 }, { "epoch": 0.76, "grad_norm": 1.1517692208504924, "learning_rate": 1.4448620759068304e-06, "loss": 0.1106, "step": 10614 }, { "epoch": 0.76, "grad_norm": 1.2026303945974282, "learning_rate": 1.4440476229419299e-06, "loss": 0.1763, "step": 10615 }, { "epoch": 0.76, "grad_norm": 1.274013434380245, "learning_rate": 1.4432333608464276e-06, "loss": 0.1538, "step": 10616 }, { "epoch": 0.76, "grad_norm": 1.1760165045887663, "learning_rate": 1.4424192896640265e-06, "loss": 0.169, "step": 10617 }, { "epoch": 0.76, "grad_norm": 1.4586743250582557, "learning_rate": 1.4416054094384257e-06, "loss": 0.1859, "step": 10618 }, { "epoch": 0.76, "grad_norm": 1.245373465617723, "learning_rate": 1.4407917202133088e-06, "loss": 0.1449, "step": 10619 }, { "epoch": 0.76, "grad_norm": 1.4222419465369949, "learning_rate": 1.4399782220323515e-06, "loss": 0.1741, "step": 10620 }, { "epoch": 0.76, "grad_norm": 1.2488694661936237, "learning_rate": 1.4391649149392205e-06, "loss": 0.1859, "step": 10621 }, { "epoch": 0.76, "grad_norm": 1.3624180212075656, "learning_rate": 1.4383517989775708e-06, "loss": 0.1906, "step": 10622 }, { "epoch": 0.76, "grad_norm": 1.267535233288876, "learning_rate": 1.4375388741910478e-06, "loss": 0.1584, "step": 10623 }, { "epoch": 0.76, "grad_norm": 9.323832012700867, "learning_rate": 1.436726140623284e-06, "loss": 0.4138, "step": 10624 }, { "epoch": 0.76, "grad_norm": 1.2061636025153357, "learning_rate": 1.4359135983179062e-06, "loss": 0.1426, "step": 10625 }, { "epoch": 0.76, "grad_norm": 1.455849778896278, "learning_rate": 1.4351012473185255e-06, "loss": 0.1575, "step": 10626 }, { "epoch": 0.76, "grad_norm": 1.6485764862807495, "learning_rate": 1.4342890876687487e-06, "loss": 0.2042, "step": 10627 }, { "epoch": 0.76, "grad_norm": 1.4016511792544684, "learning_rate": 1.4334771194121667e-06, "loss": 0.1816, "step": 10628 }, { "epoch": 0.76, "grad_norm": 1.3714802513009687, "learning_rate": 1.4326653425923637e-06, "loss": 0.1567, "step": 10629 }, { "epoch": 0.76, "grad_norm": 1.3169689483132603, "learning_rate": 1.4318537572529129e-06, "loss": 0.1821, "step": 10630 }, { "epoch": 0.76, "grad_norm": 1.3282215000613666, "learning_rate": 1.4310423634373781e-06, "loss": 0.1587, "step": 10631 }, { "epoch": 0.76, "grad_norm": 1.22594977702295, "learning_rate": 1.4302311611893095e-06, "loss": 0.1319, "step": 10632 }, { "epoch": 0.76, "grad_norm": 1.458736479077391, "learning_rate": 1.4294201505522516e-06, "loss": 0.1896, "step": 10633 }, { "epoch": 0.76, "grad_norm": 1.2752821605354525, "learning_rate": 1.428609331569733e-06, "loss": 0.1363, "step": 10634 }, { "epoch": 0.76, "grad_norm": 1.3078509392709274, "learning_rate": 1.4277987042852787e-06, "loss": 0.1518, "step": 10635 }, { "epoch": 0.76, "grad_norm": 1.3532817483186756, "learning_rate": 1.426988268742397e-06, "loss": 0.1921, "step": 10636 }, { "epoch": 0.76, "grad_norm": 1.3522630572229293, "learning_rate": 1.4261780249845902e-06, "loss": 0.1619, "step": 10637 }, { "epoch": 0.76, "grad_norm": 1.4884331423372616, "learning_rate": 1.4253679730553489e-06, "loss": 0.185, "step": 10638 }, { "epoch": 0.76, "grad_norm": 1.2350711450728686, "learning_rate": 1.424558112998155e-06, "loss": 0.1362, "step": 10639 }, { "epoch": 0.76, "grad_norm": 1.4189969768197646, "learning_rate": 1.4237484448564759e-06, "loss": 0.1591, "step": 10640 }, { "epoch": 0.76, "grad_norm": 1.342769001404274, "learning_rate": 1.4229389686737737e-06, "loss": 0.1933, "step": 10641 }, { "epoch": 0.76, "grad_norm": 1.690512209122188, "learning_rate": 1.4221296844934956e-06, "loss": 0.2197, "step": 10642 }, { "epoch": 0.76, "grad_norm": 1.3615159008118802, "learning_rate": 1.4213205923590833e-06, "loss": 0.1795, "step": 10643 }, { "epoch": 0.76, "grad_norm": 1.3460883783826403, "learning_rate": 1.4205116923139628e-06, "loss": 0.1586, "step": 10644 }, { "epoch": 0.76, "grad_norm": 1.4570477207600203, "learning_rate": 1.4197029844015541e-06, "loss": 0.1852, "step": 10645 }, { "epoch": 0.76, "grad_norm": 1.3259620844575377, "learning_rate": 1.4188944686652651e-06, "loss": 0.1681, "step": 10646 }, { "epoch": 0.76, "grad_norm": 1.3606126696906709, "learning_rate": 1.4180861451484962e-06, "loss": 0.1834, "step": 10647 }, { "epoch": 0.76, "grad_norm": 1.3751019903512172, "learning_rate": 1.4172780138946306e-06, "loss": 0.1626, "step": 10648 }, { "epoch": 0.76, "grad_norm": 1.3395099325671, "learning_rate": 1.41647007494705e-06, "loss": 0.165, "step": 10649 }, { "epoch": 0.76, "grad_norm": 1.4955340846064766, "learning_rate": 1.4156623283491171e-06, "loss": 0.1692, "step": 10650 }, { "epoch": 0.76, "grad_norm": 1.3156014401931164, "learning_rate": 1.4148547741441926e-06, "loss": 0.1725, "step": 10651 }, { "epoch": 0.76, "grad_norm": 1.4296605039360266, "learning_rate": 1.4140474123756186e-06, "loss": 0.1787, "step": 10652 }, { "epoch": 0.76, "grad_norm": 4.6238803795863435, "learning_rate": 1.4132402430867343e-06, "loss": 0.6098, "step": 10653 }, { "epoch": 0.76, "grad_norm": 1.2443303865267252, "learning_rate": 1.4124332663208635e-06, "loss": 0.1592, "step": 10654 }, { "epoch": 0.76, "grad_norm": 1.2419548980365436, "learning_rate": 1.4116264821213221e-06, "loss": 0.1412, "step": 10655 }, { "epoch": 0.76, "grad_norm": 1.2839150273379119, "learning_rate": 1.4108198905314175e-06, "loss": 0.1334, "step": 10656 }, { "epoch": 0.76, "grad_norm": 1.463591575678856, "learning_rate": 1.4100134915944403e-06, "loss": 0.2239, "step": 10657 }, { "epoch": 0.76, "grad_norm": 6.255725157559926, "learning_rate": 1.409207285353678e-06, "loss": 0.6076, "step": 10658 }, { "epoch": 0.76, "grad_norm": 1.4751197645117728, "learning_rate": 1.408401271852402e-06, "loss": 0.1589, "step": 10659 }, { "epoch": 0.76, "grad_norm": 1.4230251284826614, "learning_rate": 1.4075954511338784e-06, "loss": 0.2015, "step": 10660 }, { "epoch": 0.76, "grad_norm": 8.678180395079142, "learning_rate": 1.4067898232413574e-06, "loss": 0.5524, "step": 10661 }, { "epoch": 0.76, "grad_norm": 1.3069921696200468, "learning_rate": 1.405984388218084e-06, "loss": 0.18, "step": 10662 }, { "epoch": 0.76, "grad_norm": 1.321398413399983, "learning_rate": 1.40517914610729e-06, "loss": 0.1776, "step": 10663 }, { "epoch": 0.76, "grad_norm": 1.3581919204301052, "learning_rate": 1.4043740969521996e-06, "loss": 0.1864, "step": 10664 }, { "epoch": 0.76, "grad_norm": 1.3715465049394573, "learning_rate": 1.4035692407960217e-06, "loss": 0.1939, "step": 10665 }, { "epoch": 0.76, "grad_norm": 1.3221011476093758, "learning_rate": 1.4027645776819605e-06, "loss": 0.1708, "step": 10666 }, { "epoch": 0.76, "grad_norm": 1.2498033559009347, "learning_rate": 1.4019601076532042e-06, "loss": 0.1749, "step": 10667 }, { "epoch": 0.76, "grad_norm": 1.1392541844568327, "learning_rate": 1.4011558307529366e-06, "loss": 0.1712, "step": 10668 }, { "epoch": 0.76, "grad_norm": 1.3490334017490764, "learning_rate": 1.400351747024325e-06, "loss": 0.1682, "step": 10669 }, { "epoch": 0.76, "grad_norm": 1.2581572199371105, "learning_rate": 1.3995478565105308e-06, "loss": 0.149, "step": 10670 }, { "epoch": 0.76, "grad_norm": 1.3912356378844166, "learning_rate": 1.3987441592547041e-06, "loss": 0.1837, "step": 10671 }, { "epoch": 0.76, "grad_norm": 4.996208750013756, "learning_rate": 1.3979406552999852e-06, "loss": 0.5232, "step": 10672 }, { "epoch": 0.76, "grad_norm": 1.0483495864350354, "learning_rate": 1.3971373446895004e-06, "loss": 0.1332, "step": 10673 }, { "epoch": 0.76, "grad_norm": 1.2255004310564488, "learning_rate": 1.396334227466371e-06, "loss": 0.1024, "step": 10674 }, { "epoch": 0.76, "grad_norm": 6.030652704900943, "learning_rate": 1.3955313036737022e-06, "loss": 0.7205, "step": 10675 }, { "epoch": 0.76, "grad_norm": 1.3234941592276754, "learning_rate": 1.3947285733545946e-06, "loss": 0.2008, "step": 10676 }, { "epoch": 0.76, "grad_norm": 1.2332184681005414, "learning_rate": 1.3939260365521328e-06, "loss": 0.1915, "step": 10677 }, { "epoch": 0.76, "grad_norm": 1.3201239693417748, "learning_rate": 1.393123693309395e-06, "loss": 0.1663, "step": 10678 }, { "epoch": 0.76, "grad_norm": 1.3791539649097073, "learning_rate": 1.3923215436694482e-06, "loss": 0.1367, "step": 10679 }, { "epoch": 0.76, "grad_norm": 1.2655939352799814, "learning_rate": 1.3915195876753495e-06, "loss": 0.1552, "step": 10680 }, { "epoch": 0.76, "grad_norm": 1.4942336063625583, "learning_rate": 1.3907178253701427e-06, "loss": 0.2201, "step": 10681 }, { "epoch": 0.76, "grad_norm": 1.3332296802201882, "learning_rate": 1.389916256796865e-06, "loss": 0.1563, "step": 10682 }, { "epoch": 0.76, "grad_norm": 4.337693818909043, "learning_rate": 1.3891148819985395e-06, "loss": 0.4015, "step": 10683 }, { "epoch": 0.76, "grad_norm": 1.49005665195222, "learning_rate": 1.388313701018183e-06, "loss": 0.1844, "step": 10684 }, { "epoch": 0.76, "grad_norm": 8.953453429530292, "learning_rate": 1.3875127138987972e-06, "loss": 0.5978, "step": 10685 }, { "epoch": 0.76, "grad_norm": 1.3498551898418027, "learning_rate": 1.3867119206833773e-06, "loss": 0.1469, "step": 10686 }, { "epoch": 0.76, "grad_norm": 1.559914919221235, "learning_rate": 1.3859113214149066e-06, "loss": 0.1717, "step": 10687 }, { "epoch": 0.76, "grad_norm": 1.4020726669669477, "learning_rate": 1.3851109161363584e-06, "loss": 0.1884, "step": 10688 }, { "epoch": 0.76, "grad_norm": 6.136795463745805, "learning_rate": 1.3843107048906961e-06, "loss": 0.6171, "step": 10689 }, { "epoch": 0.76, "grad_norm": 4.956703608838789, "learning_rate": 1.3835106877208698e-06, "loss": 0.453, "step": 10690 }, { "epoch": 0.76, "grad_norm": 1.3806030281590127, "learning_rate": 1.3827108646698234e-06, "loss": 0.1569, "step": 10691 }, { "epoch": 0.76, "grad_norm": 1.5446572164495918, "learning_rate": 1.3819112357804859e-06, "loss": 0.1968, "step": 10692 }, { "epoch": 0.76, "grad_norm": 1.3457324067460188, "learning_rate": 1.3811118010957803e-06, "loss": 0.161, "step": 10693 }, { "epoch": 0.76, "grad_norm": 1.3644741713327855, "learning_rate": 1.3803125606586149e-06, "loss": 0.1852, "step": 10694 }, { "epoch": 0.77, "grad_norm": 1.4531227434780998, "learning_rate": 1.3795135145118927e-06, "loss": 0.1499, "step": 10695 }, { "epoch": 0.77, "grad_norm": 1.3160971956684657, "learning_rate": 1.3787146626984999e-06, "loss": 0.1703, "step": 10696 }, { "epoch": 0.77, "grad_norm": 1.3509052832404558, "learning_rate": 1.3779160052613172e-06, "loss": 0.1431, "step": 10697 }, { "epoch": 0.77, "grad_norm": 1.2145492347667803, "learning_rate": 1.3771175422432137e-06, "loss": 0.161, "step": 10698 }, { "epoch": 0.77, "grad_norm": 1.3310841235838138, "learning_rate": 1.3763192736870496e-06, "loss": 0.181, "step": 10699 }, { "epoch": 0.77, "grad_norm": 1.298008941856758, "learning_rate": 1.3755211996356687e-06, "loss": 0.1535, "step": 10700 }, { "epoch": 0.77, "grad_norm": 1.5573491298206854, "learning_rate": 1.374723320131912e-06, "loss": 0.1949, "step": 10701 }, { "epoch": 0.77, "grad_norm": 1.4205135782840004, "learning_rate": 1.3739256352186038e-06, "loss": 0.185, "step": 10702 }, { "epoch": 0.77, "grad_norm": 1.2957289293795626, "learning_rate": 1.373128144938563e-06, "loss": 0.1542, "step": 10703 }, { "epoch": 0.77, "grad_norm": 1.2427103455394968, "learning_rate": 1.3723308493345939e-06, "loss": 0.1424, "step": 10704 }, { "epoch": 0.77, "grad_norm": 1.2340094317129295, "learning_rate": 1.3715337484494917e-06, "loss": 0.1847, "step": 10705 }, { "epoch": 0.77, "grad_norm": 1.1743704986028893, "learning_rate": 1.3707368423260436e-06, "loss": 0.1663, "step": 10706 }, { "epoch": 0.77, "grad_norm": 1.1215933133294034, "learning_rate": 1.3699401310070249e-06, "loss": 0.1375, "step": 10707 }, { "epoch": 0.77, "grad_norm": 1.2938966493683683, "learning_rate": 1.3691436145351967e-06, "loss": 0.1907, "step": 10708 }, { "epoch": 0.77, "grad_norm": 4.8701422556617615, "learning_rate": 1.3683472929533164e-06, "loss": 0.5278, "step": 10709 }, { "epoch": 0.77, "grad_norm": 1.4582423101502153, "learning_rate": 1.3675511663041235e-06, "loss": 0.2116, "step": 10710 }, { "epoch": 0.77, "grad_norm": 4.051575536793547, "learning_rate": 1.3667552346303554e-06, "loss": 0.4116, "step": 10711 }, { "epoch": 0.77, "grad_norm": 1.2820486866287786, "learning_rate": 1.3659594979747304e-06, "loss": 0.1709, "step": 10712 }, { "epoch": 0.77, "grad_norm": 1.6009099370694422, "learning_rate": 1.3651639563799618e-06, "loss": 0.1589, "step": 10713 }, { "epoch": 0.77, "grad_norm": 5.265859931306252, "learning_rate": 1.364368609888752e-06, "loss": 0.5951, "step": 10714 }, { "epoch": 0.77, "grad_norm": 1.266262211605513, "learning_rate": 1.3635734585437933e-06, "loss": 0.1512, "step": 10715 }, { "epoch": 0.77, "grad_norm": 1.1850726031690055, "learning_rate": 1.3627785023877633e-06, "loss": 0.131, "step": 10716 }, { "epoch": 0.77, "grad_norm": 3.190135136857808, "learning_rate": 1.361983741463334e-06, "loss": 0.4616, "step": 10717 }, { "epoch": 0.77, "grad_norm": 1.3671267463752237, "learning_rate": 1.3611891758131635e-06, "loss": 0.1679, "step": 10718 }, { "epoch": 0.77, "grad_norm": 1.2270367238849111, "learning_rate": 1.3603948054799016e-06, "loss": 0.1538, "step": 10719 }, { "epoch": 0.77, "grad_norm": 1.4798486614313762, "learning_rate": 1.3596006305061888e-06, "loss": 0.1802, "step": 10720 }, { "epoch": 0.77, "grad_norm": 1.4661646421872494, "learning_rate": 1.3588066509346498e-06, "loss": 0.2048, "step": 10721 }, { "epoch": 0.77, "grad_norm": 1.2889687781356651, "learning_rate": 1.3580128668079045e-06, "loss": 0.184, "step": 10722 }, { "epoch": 0.77, "grad_norm": 4.985053012435174, "learning_rate": 1.3572192781685595e-06, "loss": 0.5419, "step": 10723 }, { "epoch": 0.77, "grad_norm": 1.212364342184223, "learning_rate": 1.3564258850592131e-06, "loss": 0.1626, "step": 10724 }, { "epoch": 0.77, "grad_norm": 1.4136611149222236, "learning_rate": 1.3556326875224485e-06, "loss": 0.1638, "step": 10725 }, { "epoch": 0.77, "grad_norm": 1.3802477216946167, "learning_rate": 1.3548396856008445e-06, "loss": 0.1646, "step": 10726 }, { "epoch": 0.77, "grad_norm": 1.1029452967288487, "learning_rate": 1.3540468793369632e-06, "loss": 0.1415, "step": 10727 }, { "epoch": 0.77, "grad_norm": 1.4875400136410541, "learning_rate": 1.3532542687733624e-06, "loss": 0.1554, "step": 10728 }, { "epoch": 0.77, "grad_norm": 1.4135323257621524, "learning_rate": 1.3524618539525836e-06, "loss": 0.1946, "step": 10729 }, { "epoch": 0.77, "grad_norm": 1.4147146601308604, "learning_rate": 1.351669634917161e-06, "loss": 0.1788, "step": 10730 }, { "epoch": 0.77, "grad_norm": 1.312406363220769, "learning_rate": 1.3508776117096184e-06, "loss": 0.1385, "step": 10731 }, { "epoch": 0.77, "grad_norm": 1.1981369969407047, "learning_rate": 1.3500857843724701e-06, "loss": 0.1407, "step": 10732 }, { "epoch": 0.77, "grad_norm": 1.3281657345711524, "learning_rate": 1.3492941529482145e-06, "loss": 0.1767, "step": 10733 }, { "epoch": 0.77, "grad_norm": 6.007834809378267, "learning_rate": 1.3485027174793475e-06, "loss": 0.5169, "step": 10734 }, { "epoch": 0.77, "grad_norm": 1.2993655934141313, "learning_rate": 1.3477114780083462e-06, "loss": 0.1524, "step": 10735 }, { "epoch": 0.77, "grad_norm": 1.4490315968916014, "learning_rate": 1.3469204345776843e-06, "loss": 0.1752, "step": 10736 }, { "epoch": 0.77, "grad_norm": 1.2174064564698628, "learning_rate": 1.3461295872298197e-06, "loss": 0.1604, "step": 10737 }, { "epoch": 0.77, "grad_norm": 1.5923419265567251, "learning_rate": 1.3453389360072022e-06, "loss": 0.1876, "step": 10738 }, { "epoch": 0.77, "grad_norm": 1.3310326597531117, "learning_rate": 1.3445484809522714e-06, "loss": 0.1729, "step": 10739 }, { "epoch": 0.77, "grad_norm": 1.3161242769628008, "learning_rate": 1.3437582221074574e-06, "loss": 0.1407, "step": 10740 }, { "epoch": 0.77, "grad_norm": 1.3062713388224785, "learning_rate": 1.3429681595151755e-06, "loss": 0.1662, "step": 10741 }, { "epoch": 0.77, "grad_norm": 1.2212507405389408, "learning_rate": 1.342178293217835e-06, "loss": 0.1821, "step": 10742 }, { "epoch": 0.77, "grad_norm": 1.5155222247509201, "learning_rate": 1.341388623257831e-06, "loss": 0.1813, "step": 10743 }, { "epoch": 0.77, "grad_norm": 1.2930395148305247, "learning_rate": 1.3405991496775521e-06, "loss": 0.1843, "step": 10744 }, { "epoch": 0.77, "grad_norm": 1.293207003362307, "learning_rate": 1.3398098725193715e-06, "loss": 0.1334, "step": 10745 }, { "epoch": 0.77, "grad_norm": 1.2403723215333968, "learning_rate": 1.3390207918256555e-06, "loss": 0.1543, "step": 10746 }, { "epoch": 0.77, "grad_norm": 1.2677875162831829, "learning_rate": 1.338231907638759e-06, "loss": 0.1838, "step": 10747 }, { "epoch": 0.77, "grad_norm": 1.2821863605813173, "learning_rate": 1.3374432200010278e-06, "loss": 0.1824, "step": 10748 }, { "epoch": 0.77, "grad_norm": 1.2432247652092665, "learning_rate": 1.3366547289547933e-06, "loss": 0.1468, "step": 10749 }, { "epoch": 0.77, "grad_norm": 1.290581399909029, "learning_rate": 1.3358664345423783e-06, "loss": 0.1435, "step": 10750 }, { "epoch": 0.77, "grad_norm": 1.2478340865014668, "learning_rate": 1.3350783368060983e-06, "loss": 0.1537, "step": 10751 }, { "epoch": 0.77, "grad_norm": 1.3428385808940781, "learning_rate": 1.3342904357882514e-06, "loss": 0.1549, "step": 10752 }, { "epoch": 0.77, "grad_norm": 5.759993489006135, "learning_rate": 1.333502731531132e-06, "loss": 0.6973, "step": 10753 }, { "epoch": 0.77, "grad_norm": 1.2335013416393081, "learning_rate": 1.3327152240770191e-06, "loss": 0.1641, "step": 10754 }, { "epoch": 0.77, "grad_norm": 1.2221054484779326, "learning_rate": 1.3319279134681829e-06, "loss": 0.1378, "step": 10755 }, { "epoch": 0.77, "grad_norm": 5.197778683165155, "learning_rate": 1.331140799746884e-06, "loss": 0.6157, "step": 10756 }, { "epoch": 0.77, "grad_norm": 1.4217412452795737, "learning_rate": 1.3303538829553731e-06, "loss": 0.2072, "step": 10757 }, { "epoch": 0.77, "grad_norm": 1.2247622852655058, "learning_rate": 1.3295671631358852e-06, "loss": 0.1881, "step": 10758 }, { "epoch": 0.77, "grad_norm": 1.4264226701317393, "learning_rate": 1.328780640330652e-06, "loss": 0.1802, "step": 10759 }, { "epoch": 0.77, "grad_norm": 7.909014684575279, "learning_rate": 1.3279943145818874e-06, "loss": 0.5543, "step": 10760 }, { "epoch": 0.77, "grad_norm": 1.289576134396907, "learning_rate": 1.3272081859318014e-06, "loss": 0.1712, "step": 10761 }, { "epoch": 0.77, "grad_norm": 1.3161224002137772, "learning_rate": 1.3264222544225874e-06, "loss": 0.1726, "step": 10762 }, { "epoch": 0.77, "grad_norm": 1.4577136684418077, "learning_rate": 1.3256365200964321e-06, "loss": 0.1934, "step": 10763 }, { "epoch": 0.77, "grad_norm": 5.453773862187874, "learning_rate": 1.324850982995511e-06, "loss": 0.4802, "step": 10764 }, { "epoch": 0.77, "grad_norm": 6.202690816933377, "learning_rate": 1.3240656431619903e-06, "loss": 0.5093, "step": 10765 }, { "epoch": 0.77, "grad_norm": 1.3131469273857477, "learning_rate": 1.32328050063802e-06, "loss": 0.1896, "step": 10766 }, { "epoch": 0.77, "grad_norm": 1.241394643224215, "learning_rate": 1.3224955554657475e-06, "loss": 0.163, "step": 10767 }, { "epoch": 0.77, "grad_norm": 1.2424292059084432, "learning_rate": 1.3217108076873026e-06, "loss": 0.1712, "step": 10768 }, { "epoch": 0.77, "grad_norm": 1.2235669730371863, "learning_rate": 1.3209262573448095e-06, "loss": 0.1124, "step": 10769 }, { "epoch": 0.77, "grad_norm": 1.3948656746150816, "learning_rate": 1.320141904480377e-06, "loss": 0.1842, "step": 10770 }, { "epoch": 0.77, "grad_norm": 1.3406788555407718, "learning_rate": 1.3193577491361082e-06, "loss": 0.1678, "step": 10771 }, { "epoch": 0.77, "grad_norm": 1.1699421257821243, "learning_rate": 1.318573791354093e-06, "loss": 0.1496, "step": 10772 }, { "epoch": 0.77, "grad_norm": 1.3627037525290404, "learning_rate": 1.3177900311764125e-06, "loss": 0.1815, "step": 10773 }, { "epoch": 0.77, "grad_norm": 1.3279741362380528, "learning_rate": 1.317006468645133e-06, "loss": 0.156, "step": 10774 }, { "epoch": 0.77, "grad_norm": 1.2818253928538257, "learning_rate": 1.316223103802316e-06, "loss": 0.1659, "step": 10775 }, { "epoch": 0.77, "grad_norm": 1.212814788672113, "learning_rate": 1.3154399366900068e-06, "loss": 0.1547, "step": 10776 }, { "epoch": 0.77, "grad_norm": 1.3640529959116054, "learning_rate": 1.3146569673502447e-06, "loss": 0.1842, "step": 10777 }, { "epoch": 0.77, "grad_norm": 1.233836640167664, "learning_rate": 1.3138741958250545e-06, "loss": 0.1375, "step": 10778 }, { "epoch": 0.77, "grad_norm": 1.291666631101557, "learning_rate": 1.3130916221564528e-06, "loss": 0.1533, "step": 10779 }, { "epoch": 0.77, "grad_norm": 1.42101048046062, "learning_rate": 1.3123092463864456e-06, "loss": 0.1917, "step": 10780 }, { "epoch": 0.77, "grad_norm": 1.2101729503749248, "learning_rate": 1.3115270685570292e-06, "loss": 0.1339, "step": 10781 }, { "epoch": 0.77, "grad_norm": 1.0872991727571533, "learning_rate": 1.3107450887101847e-06, "loss": 0.1341, "step": 10782 }, { "epoch": 0.77, "grad_norm": 1.2492778709917987, "learning_rate": 1.3099633068878875e-06, "loss": 0.1361, "step": 10783 }, { "epoch": 0.77, "grad_norm": 1.4926090007333466, "learning_rate": 1.3091817231321013e-06, "loss": 0.1931, "step": 10784 }, { "epoch": 0.77, "grad_norm": 1.398321310675229, "learning_rate": 1.3084003374847765e-06, "loss": 0.2076, "step": 10785 }, { "epoch": 0.77, "grad_norm": 1.3068888023636955, "learning_rate": 1.3076191499878565e-06, "loss": 0.1791, "step": 10786 }, { "epoch": 0.77, "grad_norm": 1.437667974651349, "learning_rate": 1.3068381606832702e-06, "loss": 0.1769, "step": 10787 }, { "epoch": 0.77, "grad_norm": 1.3661340203824657, "learning_rate": 1.3060573696129396e-06, "loss": 0.176, "step": 10788 }, { "epoch": 0.77, "grad_norm": 1.2177434541263317, "learning_rate": 1.3052767768187734e-06, "loss": 0.1511, "step": 10789 }, { "epoch": 0.77, "grad_norm": 12.481775094810473, "learning_rate": 1.3044963823426737e-06, "loss": 0.6787, "step": 10790 }, { "epoch": 0.77, "grad_norm": 1.3808560970726922, "learning_rate": 1.3037161862265252e-06, "loss": 0.2009, "step": 10791 }, { "epoch": 0.77, "grad_norm": 1.2720916362864194, "learning_rate": 1.3029361885122083e-06, "loss": 0.1844, "step": 10792 }, { "epoch": 0.77, "grad_norm": 1.431056799062906, "learning_rate": 1.3021563892415879e-06, "loss": 0.1363, "step": 10793 }, { "epoch": 0.77, "grad_norm": 5.674698553392927, "learning_rate": 1.3013767884565232e-06, "loss": 0.5085, "step": 10794 }, { "epoch": 0.77, "grad_norm": 1.7782155019521222, "learning_rate": 1.3005973861988574e-06, "loss": 0.2079, "step": 10795 }, { "epoch": 0.77, "grad_norm": 1.5463308552469406, "learning_rate": 1.2998181825104283e-06, "loss": 0.1791, "step": 10796 }, { "epoch": 0.77, "grad_norm": 1.3091103574733114, "learning_rate": 1.2990391774330573e-06, "loss": 0.1695, "step": 10797 }, { "epoch": 0.77, "grad_norm": 1.353386788623521, "learning_rate": 1.2982603710085606e-06, "loss": 0.1644, "step": 10798 }, { "epoch": 0.77, "grad_norm": 1.4341121999679103, "learning_rate": 1.2974817632787406e-06, "loss": 0.1683, "step": 10799 }, { "epoch": 0.77, "grad_norm": 1.297081967123346, "learning_rate": 1.2967033542853918e-06, "loss": 0.1585, "step": 10800 }, { "epoch": 0.77, "grad_norm": 1.3371651256172636, "learning_rate": 1.295925144070293e-06, "loss": 0.1878, "step": 10801 }, { "epoch": 0.77, "grad_norm": 1.4074227028619066, "learning_rate": 1.2951471326752184e-06, "loss": 0.2135, "step": 10802 }, { "epoch": 0.77, "grad_norm": 1.443389796182147, "learning_rate": 1.294369320141926e-06, "loss": 0.167, "step": 10803 }, { "epoch": 0.77, "grad_norm": 1.4247181315889128, "learning_rate": 1.2935917065121684e-06, "loss": 0.2057, "step": 10804 }, { "epoch": 0.77, "grad_norm": 1.3429033729282496, "learning_rate": 1.292814291827681e-06, "loss": 0.1952, "step": 10805 }, { "epoch": 0.77, "grad_norm": 1.2981290072483687, "learning_rate": 1.2920370761301948e-06, "loss": 0.1744, "step": 10806 }, { "epoch": 0.77, "grad_norm": 1.3093861256219763, "learning_rate": 1.2912600594614276e-06, "loss": 0.1556, "step": 10807 }, { "epoch": 0.77, "grad_norm": 1.5418434318989682, "learning_rate": 1.290483241863088e-06, "loss": 0.1746, "step": 10808 }, { "epoch": 0.77, "grad_norm": 1.599394965787028, "learning_rate": 1.289706623376869e-06, "loss": 0.1806, "step": 10809 }, { "epoch": 0.77, "grad_norm": 6.1614939022579795, "learning_rate": 1.28893020404446e-06, "loss": 0.6115, "step": 10810 }, { "epoch": 0.77, "grad_norm": 1.1757367453635346, "learning_rate": 1.288153983907533e-06, "loss": 0.1286, "step": 10811 }, { "epoch": 0.77, "grad_norm": 1.6069544364596753, "learning_rate": 1.287377963007755e-06, "loss": 0.2062, "step": 10812 }, { "epoch": 0.77, "grad_norm": 1.3247923791551692, "learning_rate": 1.2866021413867768e-06, "loss": 0.159, "step": 10813 }, { "epoch": 0.77, "grad_norm": 1.2720482341489399, "learning_rate": 1.2858265190862434e-06, "loss": 0.1712, "step": 10814 }, { "epoch": 0.77, "grad_norm": 1.543794340047884, "learning_rate": 1.2850510961477863e-06, "loss": 0.1826, "step": 10815 }, { "epoch": 0.77, "grad_norm": 10.422743007194265, "learning_rate": 1.2842758726130283e-06, "loss": 0.6393, "step": 10816 }, { "epoch": 0.77, "grad_norm": 1.26988826346006, "learning_rate": 1.2835008485235811e-06, "loss": 0.1465, "step": 10817 }, { "epoch": 0.77, "grad_norm": 1.2215915476933914, "learning_rate": 1.2827260239210415e-06, "loss": 0.1514, "step": 10818 }, { "epoch": 0.77, "grad_norm": 7.251385506681329, "learning_rate": 1.2819513988470027e-06, "loss": 0.5144, "step": 10819 }, { "epoch": 0.77, "grad_norm": 1.2703000025537514, "learning_rate": 1.2811769733430406e-06, "loss": 0.1469, "step": 10820 }, { "epoch": 0.77, "grad_norm": 5.034792231837164, "learning_rate": 1.2804027474507259e-06, "loss": 0.5539, "step": 10821 }, { "epoch": 0.77, "grad_norm": 1.373248559033873, "learning_rate": 1.2796287212116126e-06, "loss": 0.1991, "step": 10822 }, { "epoch": 0.77, "grad_norm": 1.3005342086617848, "learning_rate": 1.2788548946672496e-06, "loss": 0.1845, "step": 10823 }, { "epoch": 0.77, "grad_norm": 10.027266465669893, "learning_rate": 1.2780812678591724e-06, "loss": 0.5943, "step": 10824 }, { "epoch": 0.77, "grad_norm": 1.4741953311200033, "learning_rate": 1.277307840828908e-06, "loss": 0.1789, "step": 10825 }, { "epoch": 0.77, "grad_norm": 5.195056417417213, "learning_rate": 1.276534613617968e-06, "loss": 0.6181, "step": 10826 }, { "epoch": 0.77, "grad_norm": 1.209736903782424, "learning_rate": 1.2757615862678584e-06, "loss": 0.118, "step": 10827 }, { "epoch": 0.77, "grad_norm": 1.316525220252832, "learning_rate": 1.27498875882007e-06, "loss": 0.1601, "step": 10828 }, { "epoch": 0.77, "grad_norm": 1.4922975863139818, "learning_rate": 1.2742161313160877e-06, "loss": 0.1983, "step": 10829 }, { "epoch": 0.77, "grad_norm": 1.26735752829659, "learning_rate": 1.27344370379738e-06, "loss": 0.1792, "step": 10830 }, { "epoch": 0.77, "grad_norm": 1.2246445151973904, "learning_rate": 1.2726714763054104e-06, "loss": 0.1401, "step": 10831 }, { "epoch": 0.77, "grad_norm": 1.3422349553176591, "learning_rate": 1.2718994488816272e-06, "loss": 0.177, "step": 10832 }, { "epoch": 0.77, "grad_norm": 1.1797925820374757, "learning_rate": 1.2711276215674722e-06, "loss": 0.1416, "step": 10833 }, { "epoch": 0.77, "grad_norm": 5.641156412222506, "learning_rate": 1.2703559944043714e-06, "loss": 0.3846, "step": 10834 }, { "epoch": 0.78, "grad_norm": 1.3225248956186373, "learning_rate": 1.2695845674337447e-06, "loss": 0.1421, "step": 10835 }, { "epoch": 0.78, "grad_norm": 1.2884608915474698, "learning_rate": 1.268813340696997e-06, "loss": 0.1572, "step": 10836 }, { "epoch": 0.78, "grad_norm": 8.594848801598252, "learning_rate": 1.2680423142355274e-06, "loss": 0.733, "step": 10837 }, { "epoch": 0.78, "grad_norm": 1.2734102607225462, "learning_rate": 1.267271488090719e-06, "loss": 0.1652, "step": 10838 }, { "epoch": 0.78, "grad_norm": 1.4268329557703527, "learning_rate": 1.266500862303947e-06, "loss": 0.1796, "step": 10839 }, { "epoch": 0.78, "grad_norm": 1.2390693092215115, "learning_rate": 1.2657304369165768e-06, "loss": 0.1632, "step": 10840 }, { "epoch": 0.78, "grad_norm": 1.3174904981271964, "learning_rate": 1.2649602119699628e-06, "loss": 0.1996, "step": 10841 }, { "epoch": 0.78, "grad_norm": 1.257970487634104, "learning_rate": 1.264190187505444e-06, "loss": 0.1792, "step": 10842 }, { "epoch": 0.78, "grad_norm": 1.3376028391109658, "learning_rate": 1.2634203635643566e-06, "loss": 0.2356, "step": 10843 }, { "epoch": 0.78, "grad_norm": 6.779189018283161, "learning_rate": 1.2626507401880178e-06, "loss": 0.5841, "step": 10844 }, { "epoch": 0.78, "grad_norm": 1.71826309729734, "learning_rate": 1.2618813174177408e-06, "loss": 0.1671, "step": 10845 }, { "epoch": 0.78, "grad_norm": 1.2294011185593903, "learning_rate": 1.2611120952948225e-06, "loss": 0.1719, "step": 10846 }, { "epoch": 0.78, "grad_norm": 1.3706348574669802, "learning_rate": 1.2603430738605532e-06, "loss": 0.1719, "step": 10847 }, { "epoch": 0.78, "grad_norm": 1.562828388910144, "learning_rate": 1.2595742531562112e-06, "loss": 0.159, "step": 10848 }, { "epoch": 0.78, "grad_norm": 1.165477515659987, "learning_rate": 1.2588056332230624e-06, "loss": 0.1452, "step": 10849 }, { "epoch": 0.78, "grad_norm": 1.3470305613664593, "learning_rate": 1.2580372141023666e-06, "loss": 0.1567, "step": 10850 }, { "epoch": 0.78, "grad_norm": 1.4298814770524364, "learning_rate": 1.2572689958353652e-06, "loss": 0.1645, "step": 10851 }, { "epoch": 0.78, "grad_norm": 1.1394094001619999, "learning_rate": 1.256500978463297e-06, "loss": 0.1454, "step": 10852 }, { "epoch": 0.78, "grad_norm": 1.4439355287298474, "learning_rate": 1.2557331620273822e-06, "loss": 0.1641, "step": 10853 }, { "epoch": 0.78, "grad_norm": 1.3018782675310325, "learning_rate": 1.2549655465688377e-06, "loss": 0.1571, "step": 10854 }, { "epoch": 0.78, "grad_norm": 1.2178248179763735, "learning_rate": 1.2541981321288627e-06, "loss": 0.1379, "step": 10855 }, { "epoch": 0.78, "grad_norm": 1.248986747754545, "learning_rate": 1.253430918748651e-06, "loss": 0.1613, "step": 10856 }, { "epoch": 0.78, "grad_norm": 1.1738555423077708, "learning_rate": 1.252663906469383e-06, "loss": 0.1293, "step": 10857 }, { "epoch": 0.78, "grad_norm": 1.2671663491080725, "learning_rate": 1.2518970953322307e-06, "loss": 0.1754, "step": 10858 }, { "epoch": 0.78, "grad_norm": 1.292379662548495, "learning_rate": 1.2511304853783507e-06, "loss": 0.1528, "step": 10859 }, { "epoch": 0.78, "grad_norm": 1.5040790519795335, "learning_rate": 1.250364076648894e-06, "loss": 0.2087, "step": 10860 }, { "epoch": 0.78, "grad_norm": 5.626455555642887, "learning_rate": 1.2495978691849952e-06, "loss": 0.5038, "step": 10861 }, { "epoch": 0.78, "grad_norm": 1.2478144347957785, "learning_rate": 1.2488318630277857e-06, "loss": 0.1579, "step": 10862 }, { "epoch": 0.78, "grad_norm": 1.3496675002007257, "learning_rate": 1.2480660582183768e-06, "loss": 0.1635, "step": 10863 }, { "epoch": 0.78, "grad_norm": 1.379713333188431, "learning_rate": 1.2473004547978767e-06, "loss": 0.1547, "step": 10864 }, { "epoch": 0.78, "grad_norm": 1.3782187428241317, "learning_rate": 1.2465350528073795e-06, "loss": 0.1948, "step": 10865 }, { "epoch": 0.78, "grad_norm": 1.3063237183790244, "learning_rate": 1.2457698522879708e-06, "loss": 0.2007, "step": 10866 }, { "epoch": 0.78, "grad_norm": 1.2587602041541603, "learning_rate": 1.2450048532807197e-06, "loss": 0.1636, "step": 10867 }, { "epoch": 0.78, "grad_norm": 1.347123677399065, "learning_rate": 1.2442400558266915e-06, "loss": 0.1646, "step": 10868 }, { "epoch": 0.78, "grad_norm": 1.5743149012516464, "learning_rate": 1.2434754599669352e-06, "loss": 0.1786, "step": 10869 }, { "epoch": 0.78, "grad_norm": 1.3615329254520898, "learning_rate": 1.2427110657424934e-06, "loss": 0.1503, "step": 10870 }, { "epoch": 0.78, "grad_norm": 1.2555263759081152, "learning_rate": 1.241946873194394e-06, "loss": 0.1559, "step": 10871 }, { "epoch": 0.78, "grad_norm": 1.387864910690997, "learning_rate": 1.2411828823636556e-06, "loss": 0.1784, "step": 10872 }, { "epoch": 0.78, "grad_norm": 1.4576703889354408, "learning_rate": 1.240419093291288e-06, "loss": 0.1873, "step": 10873 }, { "epoch": 0.78, "grad_norm": 1.604596962934677, "learning_rate": 1.239655506018288e-06, "loss": 0.1913, "step": 10874 }, { "epoch": 0.78, "grad_norm": 1.3784569633327155, "learning_rate": 1.2388921205856402e-06, "loss": 0.1751, "step": 10875 }, { "epoch": 0.78, "grad_norm": 1.5264233469757673, "learning_rate": 1.238128937034323e-06, "loss": 0.137, "step": 10876 }, { "epoch": 0.78, "grad_norm": 1.2066734265868877, "learning_rate": 1.237365955405298e-06, "loss": 0.155, "step": 10877 }, { "epoch": 0.78, "grad_norm": 1.316622575817147, "learning_rate": 1.2366031757395196e-06, "loss": 0.1661, "step": 10878 }, { "epoch": 0.78, "grad_norm": 1.460600512919755, "learning_rate": 1.2358405980779336e-06, "loss": 0.1923, "step": 10879 }, { "epoch": 0.78, "grad_norm": 1.2185596180850247, "learning_rate": 1.2350782224614689e-06, "loss": 0.1684, "step": 10880 }, { "epoch": 0.78, "grad_norm": 4.163605919937378, "learning_rate": 1.2343160489310474e-06, "loss": 0.2819, "step": 10881 }, { "epoch": 0.78, "grad_norm": 1.4604222302466239, "learning_rate": 1.2335540775275805e-06, "loss": 0.1576, "step": 10882 }, { "epoch": 0.78, "grad_norm": 6.177485692583051, "learning_rate": 1.2327923082919686e-06, "loss": 0.5795, "step": 10883 }, { "epoch": 0.78, "grad_norm": 1.3418368450868547, "learning_rate": 1.2320307412650978e-06, "loss": 0.1696, "step": 10884 }, { "epoch": 0.78, "grad_norm": 5.274821050036068, "learning_rate": 1.231269376487849e-06, "loss": 0.5121, "step": 10885 }, { "epoch": 0.78, "grad_norm": 1.2545933036410872, "learning_rate": 1.2305082140010866e-06, "loss": 0.1599, "step": 10886 }, { "epoch": 0.78, "grad_norm": 1.4646050978552463, "learning_rate": 1.2297472538456689e-06, "loss": 0.1458, "step": 10887 }, { "epoch": 0.78, "grad_norm": 1.4085044292407272, "learning_rate": 1.2289864960624388e-06, "loss": 0.1493, "step": 10888 }, { "epoch": 0.78, "grad_norm": 1.2075088285832922, "learning_rate": 1.2282259406922337e-06, "loss": 0.1613, "step": 10889 }, { "epoch": 0.78, "grad_norm": 5.20321872976204, "learning_rate": 1.227465587775873e-06, "loss": 0.4779, "step": 10890 }, { "epoch": 0.78, "grad_norm": 4.881576955854875, "learning_rate": 1.2267054373541743e-06, "loss": 0.5566, "step": 10891 }, { "epoch": 0.78, "grad_norm": 1.5304905032403282, "learning_rate": 1.2259454894679363e-06, "loss": 0.1637, "step": 10892 }, { "epoch": 0.78, "grad_norm": 1.3540299318797002, "learning_rate": 1.2251857441579518e-06, "loss": 0.1705, "step": 10893 }, { "epoch": 0.78, "grad_norm": 1.1621470967677188, "learning_rate": 1.2244262014649994e-06, "loss": 0.1147, "step": 10894 }, { "epoch": 0.78, "grad_norm": 1.3169760199568337, "learning_rate": 1.2236668614298497e-06, "loss": 0.1769, "step": 10895 }, { "epoch": 0.78, "grad_norm": 6.09762923319316, "learning_rate": 1.222907724093259e-06, "loss": 0.5675, "step": 10896 }, { "epoch": 0.78, "grad_norm": 1.360619120709428, "learning_rate": 1.2221487894959778e-06, "loss": 0.1857, "step": 10897 }, { "epoch": 0.78, "grad_norm": 1.1287142915909105, "learning_rate": 1.221390057678739e-06, "loss": 0.1079, "step": 10898 }, { "epoch": 0.78, "grad_norm": 1.4101895248522336, "learning_rate": 1.2206315286822707e-06, "loss": 0.1937, "step": 10899 }, { "epoch": 0.78, "grad_norm": 1.3269436171358282, "learning_rate": 1.2198732025472876e-06, "loss": 0.1714, "step": 10900 }, { "epoch": 0.78, "grad_norm": 1.1989913590412595, "learning_rate": 1.219115079314494e-06, "loss": 0.1535, "step": 10901 }, { "epoch": 0.78, "grad_norm": 1.1847195826871226, "learning_rate": 1.2183571590245813e-06, "loss": 0.1476, "step": 10902 }, { "epoch": 0.78, "grad_norm": 1.2460408635768618, "learning_rate": 1.2175994417182342e-06, "loss": 0.1632, "step": 10903 }, { "epoch": 0.78, "grad_norm": 4.6970286332008815, "learning_rate": 1.216841927436121e-06, "loss": 0.4323, "step": 10904 }, { "epoch": 0.78, "grad_norm": 1.3083750472280593, "learning_rate": 1.2160846162189044e-06, "loss": 0.176, "step": 10905 }, { "epoch": 0.78, "grad_norm": 1.211950787332152, "learning_rate": 1.215327508107232e-06, "loss": 0.1522, "step": 10906 }, { "epoch": 0.78, "grad_norm": 1.2951256769106785, "learning_rate": 1.2145706031417427e-06, "loss": 0.1834, "step": 10907 }, { "epoch": 0.78, "grad_norm": 1.5386536060807698, "learning_rate": 1.213813901363065e-06, "loss": 0.183, "step": 10908 }, { "epoch": 0.78, "grad_norm": 1.3964815659951433, "learning_rate": 1.2130574028118168e-06, "loss": 0.1764, "step": 10909 }, { "epoch": 0.78, "grad_norm": 1.359976872244731, "learning_rate": 1.2123011075286012e-06, "loss": 0.1852, "step": 10910 }, { "epoch": 0.78, "grad_norm": 1.335506216544898, "learning_rate": 1.2115450155540143e-06, "loss": 0.1479, "step": 10911 }, { "epoch": 0.78, "grad_norm": 1.290646557587444, "learning_rate": 1.2107891269286415e-06, "loss": 0.1651, "step": 10912 }, { "epoch": 0.78, "grad_norm": 1.5120755787944296, "learning_rate": 1.210033441693053e-06, "loss": 0.1768, "step": 10913 }, { "epoch": 0.78, "grad_norm": 1.5747317261744758, "learning_rate": 1.2092779598878146e-06, "loss": 0.1834, "step": 10914 }, { "epoch": 0.78, "grad_norm": 4.588502352649904, "learning_rate": 1.208522681553474e-06, "loss": 0.4306, "step": 10915 }, { "epoch": 0.78, "grad_norm": 1.2427522956813504, "learning_rate": 1.2077676067305726e-06, "loss": 0.1694, "step": 10916 }, { "epoch": 0.78, "grad_norm": 5.409532477575555, "learning_rate": 1.2070127354596411e-06, "loss": 0.494, "step": 10917 }, { "epoch": 0.78, "grad_norm": 1.3406072820409283, "learning_rate": 1.2062580677811986e-06, "loss": 0.1599, "step": 10918 }, { "epoch": 0.78, "grad_norm": 1.3208482030664774, "learning_rate": 1.2055036037357493e-06, "loss": 0.1831, "step": 10919 }, { "epoch": 0.78, "grad_norm": 4.547293881378643, "learning_rate": 1.2047493433637935e-06, "loss": 0.5855, "step": 10920 }, { "epoch": 0.78, "grad_norm": 1.4690520122105315, "learning_rate": 1.2039952867058136e-06, "loss": 0.167, "step": 10921 }, { "epoch": 0.78, "grad_norm": 1.3842997818030294, "learning_rate": 1.203241433802288e-06, "loss": 0.1748, "step": 10922 }, { "epoch": 0.78, "grad_norm": 1.29114389934993, "learning_rate": 1.2024877846936767e-06, "loss": 0.1671, "step": 10923 }, { "epoch": 0.78, "grad_norm": 1.3196215655203563, "learning_rate": 1.201734339420434e-06, "loss": 0.1637, "step": 10924 }, { "epoch": 0.78, "grad_norm": 1.2819160789914978, "learning_rate": 1.200981098023003e-06, "loss": 0.1343, "step": 10925 }, { "epoch": 0.78, "grad_norm": 1.2842097934757721, "learning_rate": 1.2002280605418149e-06, "loss": 0.1468, "step": 10926 }, { "epoch": 0.78, "grad_norm": 4.777021201743696, "learning_rate": 1.1994752270172877e-06, "loss": 0.4906, "step": 10927 }, { "epoch": 0.78, "grad_norm": 1.4700286300938474, "learning_rate": 1.1987225974898326e-06, "loss": 0.1488, "step": 10928 }, { "epoch": 0.78, "grad_norm": 1.224188443943246, "learning_rate": 1.1979701719998454e-06, "loss": 0.1574, "step": 10929 }, { "epoch": 0.78, "grad_norm": 1.338422247971015, "learning_rate": 1.1972179505877168e-06, "loss": 0.1685, "step": 10930 }, { "epoch": 0.78, "grad_norm": 1.2497246407815572, "learning_rate": 1.1964659332938189e-06, "loss": 0.1397, "step": 10931 }, { "epoch": 0.78, "grad_norm": 1.1449469990354073, "learning_rate": 1.1957141201585193e-06, "loss": 0.1216, "step": 10932 }, { "epoch": 0.78, "grad_norm": 1.3536989041127743, "learning_rate": 1.194962511222172e-06, "loss": 0.1502, "step": 10933 }, { "epoch": 0.78, "grad_norm": 1.3061859697305618, "learning_rate": 1.194211106525122e-06, "loss": 0.165, "step": 10934 }, { "epoch": 0.78, "grad_norm": 1.35080872445764, "learning_rate": 1.193459906107699e-06, "loss": 0.1876, "step": 10935 }, { "epoch": 0.78, "grad_norm": 1.4377018365144416, "learning_rate": 1.1927089100102268e-06, "loss": 0.1638, "step": 10936 }, { "epoch": 0.78, "grad_norm": 1.2835555119446052, "learning_rate": 1.1919581182730134e-06, "loss": 0.1316, "step": 10937 }, { "epoch": 0.78, "grad_norm": 1.4375372783979672, "learning_rate": 1.1912075309363607e-06, "loss": 0.1915, "step": 10938 }, { "epoch": 0.78, "grad_norm": 1.2576594981208595, "learning_rate": 1.1904571480405552e-06, "loss": 0.1809, "step": 10939 }, { "epoch": 0.78, "grad_norm": 1.1909520427187312, "learning_rate": 1.1897069696258756e-06, "loss": 0.121, "step": 10940 }, { "epoch": 0.78, "grad_norm": 1.3492986099296709, "learning_rate": 1.1889569957325881e-06, "loss": 0.1651, "step": 10941 }, { "epoch": 0.78, "grad_norm": 1.360638553551535, "learning_rate": 1.1882072264009486e-06, "loss": 0.157, "step": 10942 }, { "epoch": 0.78, "grad_norm": 9.269837929133121, "learning_rate": 1.1874576616712036e-06, "loss": 0.5576, "step": 10943 }, { "epoch": 0.78, "grad_norm": 1.3857224228903668, "learning_rate": 1.1867083015835828e-06, "loss": 0.1545, "step": 10944 }, { "epoch": 0.78, "grad_norm": 1.224328313978514, "learning_rate": 1.1859591461783131e-06, "loss": 0.151, "step": 10945 }, { "epoch": 0.78, "grad_norm": 1.4989331932872134, "learning_rate": 1.1852101954956019e-06, "loss": 0.2055, "step": 10946 }, { "epoch": 0.78, "grad_norm": 8.036791669584762, "learning_rate": 1.184461449575654e-06, "loss": 0.6189, "step": 10947 }, { "epoch": 0.78, "grad_norm": 1.5474920700449022, "learning_rate": 1.1837129084586557e-06, "loss": 0.1768, "step": 10948 }, { "epoch": 0.78, "grad_norm": 1.241620725124109, "learning_rate": 1.1829645721847872e-06, "loss": 0.1487, "step": 10949 }, { "epoch": 0.78, "grad_norm": 5.8920319927479605, "learning_rate": 1.1822164407942156e-06, "loss": 0.5676, "step": 10950 }, { "epoch": 0.78, "grad_norm": 7.426617961786165, "learning_rate": 1.1814685143271004e-06, "loss": 0.4035, "step": 10951 }, { "epoch": 0.78, "grad_norm": 5.895290640409777, "learning_rate": 1.180720792823583e-06, "loss": 0.5889, "step": 10952 }, { "epoch": 0.78, "grad_norm": 1.0963924067989739, "learning_rate": 1.179973276323802e-06, "loss": 0.1496, "step": 10953 }, { "epoch": 0.78, "grad_norm": 1.277924117986559, "learning_rate": 1.1792259648678782e-06, "loss": 0.1605, "step": 10954 }, { "epoch": 0.78, "grad_norm": 1.2597152640325617, "learning_rate": 1.1784788584959268e-06, "loss": 0.1645, "step": 10955 }, { "epoch": 0.78, "grad_norm": 1.4568895569386944, "learning_rate": 1.1777319572480468e-06, "loss": 0.172, "step": 10956 }, { "epoch": 0.78, "grad_norm": 1.407177695676701, "learning_rate": 1.1769852611643296e-06, "loss": 0.1434, "step": 10957 }, { "epoch": 0.78, "grad_norm": 1.2817150566782245, "learning_rate": 1.176238770284856e-06, "loss": 0.1491, "step": 10958 }, { "epoch": 0.78, "grad_norm": 1.5240994665309255, "learning_rate": 1.1754924846496957e-06, "loss": 0.1746, "step": 10959 }, { "epoch": 0.78, "grad_norm": 1.458375019058329, "learning_rate": 1.1747464042989037e-06, "loss": 0.1697, "step": 10960 }, { "epoch": 0.78, "grad_norm": 1.3178840320139094, "learning_rate": 1.1740005292725286e-06, "loss": 0.1732, "step": 10961 }, { "epoch": 0.78, "grad_norm": 1.4279350471352732, "learning_rate": 1.1732548596106041e-06, "loss": 0.1437, "step": 10962 }, { "epoch": 0.78, "grad_norm": 1.1519674419501997, "learning_rate": 1.1725093953531573e-06, "loss": 0.135, "step": 10963 }, { "epoch": 0.78, "grad_norm": 1.2798063383622331, "learning_rate": 1.1717641365401994e-06, "loss": 0.1342, "step": 10964 }, { "epoch": 0.78, "grad_norm": 1.2441209933602742, "learning_rate": 1.171019083211733e-06, "loss": 0.1738, "step": 10965 }, { "epoch": 0.78, "grad_norm": 1.5427944260905202, "learning_rate": 1.1702742354077511e-06, "loss": 0.1493, "step": 10966 }, { "epoch": 0.78, "grad_norm": 1.33790118895171, "learning_rate": 1.1695295931682348e-06, "loss": 0.1411, "step": 10967 }, { "epoch": 0.78, "grad_norm": 1.1839384806564794, "learning_rate": 1.168785156533151e-06, "loss": 0.1471, "step": 10968 }, { "epoch": 0.78, "grad_norm": 1.3485268163845912, "learning_rate": 1.1680409255424606e-06, "loss": 0.1997, "step": 10969 }, { "epoch": 0.78, "grad_norm": 1.3813885208552825, "learning_rate": 1.1672969002361083e-06, "loss": 0.1696, "step": 10970 }, { "epoch": 0.78, "grad_norm": 1.274772352696011, "learning_rate": 1.1665530806540331e-06, "loss": 0.1722, "step": 10971 }, { "epoch": 0.78, "grad_norm": 1.0982590317808154, "learning_rate": 1.165809466836158e-06, "loss": 0.1162, "step": 10972 }, { "epoch": 0.78, "grad_norm": 1.3796921745862174, "learning_rate": 1.1650660588223982e-06, "loss": 0.1721, "step": 10973 }, { "epoch": 0.78, "grad_norm": 1.6604738275298139, "learning_rate": 1.164322856652657e-06, "loss": 0.2042, "step": 10974 }, { "epoch": 0.79, "grad_norm": 1.3010538100470785, "learning_rate": 1.1635798603668264e-06, "loss": 0.1611, "step": 10975 }, { "epoch": 0.79, "grad_norm": 1.3016453362634859, "learning_rate": 1.1628370700047892e-06, "loss": 0.1516, "step": 10976 }, { "epoch": 0.79, "grad_norm": 1.4682939908293164, "learning_rate": 1.1620944856064126e-06, "loss": 0.1821, "step": 10977 }, { "epoch": 0.79, "grad_norm": 1.3005040209427214, "learning_rate": 1.161352107211558e-06, "loss": 0.1868, "step": 10978 }, { "epoch": 0.79, "grad_norm": 1.3997474192724004, "learning_rate": 1.1606099348600703e-06, "loss": 0.1641, "step": 10979 }, { "epoch": 0.79, "grad_norm": 6.087893926523309, "learning_rate": 1.1598679685917901e-06, "loss": 0.5316, "step": 10980 }, { "epoch": 0.79, "grad_norm": 1.1928035700434374, "learning_rate": 1.15912620844654e-06, "loss": 0.1259, "step": 10981 }, { "epoch": 0.79, "grad_norm": 1.380211240870162, "learning_rate": 1.1583846544641357e-06, "loss": 0.2123, "step": 10982 }, { "epoch": 0.79, "grad_norm": 1.1997649445909777, "learning_rate": 1.1576433066843818e-06, "loss": 0.1583, "step": 10983 }, { "epoch": 0.79, "grad_norm": 1.6457831230459785, "learning_rate": 1.1569021651470714e-06, "loss": 0.2054, "step": 10984 }, { "epoch": 0.79, "grad_norm": 1.5439557982592125, "learning_rate": 1.1561612298919834e-06, "loss": 0.2149, "step": 10985 }, { "epoch": 0.79, "grad_norm": 1.3450096149097845, "learning_rate": 1.1554205009588909e-06, "loss": 0.1614, "step": 10986 }, { "epoch": 0.79, "grad_norm": 6.6552913173139245, "learning_rate": 1.1546799783875512e-06, "loss": 0.5728, "step": 10987 }, { "epoch": 0.79, "grad_norm": 1.4442767736012718, "learning_rate": 1.1539396622177152e-06, "loss": 0.1911, "step": 10988 }, { "epoch": 0.79, "grad_norm": 1.292966122348718, "learning_rate": 1.1531995524891172e-06, "loss": 0.1881, "step": 10989 }, { "epoch": 0.79, "grad_norm": 1.4765604588833408, "learning_rate": 1.1524596492414857e-06, "loss": 0.1637, "step": 10990 }, { "epoch": 0.79, "grad_norm": 1.5081912897601593, "learning_rate": 1.1517199525145317e-06, "loss": 0.226, "step": 10991 }, { "epoch": 0.79, "grad_norm": 1.1510794035893048, "learning_rate": 1.150980462347966e-06, "loss": 0.1674, "step": 10992 }, { "epoch": 0.79, "grad_norm": 1.2851822223676088, "learning_rate": 1.1502411787814754e-06, "loss": 0.1578, "step": 10993 }, { "epoch": 0.79, "grad_norm": 1.3681006712494275, "learning_rate": 1.1495021018547459e-06, "loss": 0.1787, "step": 10994 }, { "epoch": 0.79, "grad_norm": 5.039762524276647, "learning_rate": 1.1487632316074449e-06, "loss": 0.5763, "step": 10995 }, { "epoch": 0.79, "grad_norm": 1.257111125667077, "learning_rate": 1.148024568079235e-06, "loss": 0.1777, "step": 10996 }, { "epoch": 0.79, "grad_norm": 1.1806158417239778, "learning_rate": 1.1472861113097616e-06, "loss": 0.1375, "step": 10997 }, { "epoch": 0.79, "grad_norm": 1.1166604448672575, "learning_rate": 1.1465478613386654e-06, "loss": 0.1201, "step": 10998 }, { "epoch": 0.79, "grad_norm": 1.2059095530530801, "learning_rate": 1.1458098182055693e-06, "loss": 0.1489, "step": 10999 }, { "epoch": 0.79, "grad_norm": 1.4598640185367813, "learning_rate": 1.1450719819500906e-06, "loss": 0.138, "step": 11000 }, { "epoch": 0.79, "grad_norm": 1.3073839265477671, "learning_rate": 1.1443343526118327e-06, "loss": 0.1769, "step": 11001 }, { "epoch": 0.79, "grad_norm": 1.2853183071958543, "learning_rate": 1.1435969302303907e-06, "loss": 0.1641, "step": 11002 }, { "epoch": 0.79, "grad_norm": 1.3743770635188666, "learning_rate": 1.1428597148453436e-06, "loss": 0.1906, "step": 11003 }, { "epoch": 0.79, "grad_norm": 1.4398085978963613, "learning_rate": 1.1421227064962641e-06, "loss": 0.2057, "step": 11004 }, { "epoch": 0.79, "grad_norm": 1.406245580981124, "learning_rate": 1.1413859052227106e-06, "loss": 0.2087, "step": 11005 }, { "epoch": 0.79, "grad_norm": 1.2956258926391493, "learning_rate": 1.1406493110642313e-06, "loss": 0.1776, "step": 11006 }, { "epoch": 0.79, "grad_norm": 1.3252337636594103, "learning_rate": 1.1399129240603667e-06, "loss": 0.1653, "step": 11007 }, { "epoch": 0.79, "grad_norm": 1.2198248288399403, "learning_rate": 1.1391767442506395e-06, "loss": 0.1437, "step": 11008 }, { "epoch": 0.79, "grad_norm": 1.2558935264984894, "learning_rate": 1.1384407716745666e-06, "loss": 0.1528, "step": 11009 }, { "epoch": 0.79, "grad_norm": 5.122582565894138, "learning_rate": 1.1377050063716517e-06, "loss": 0.4582, "step": 11010 }, { "epoch": 0.79, "grad_norm": 1.2766171705690863, "learning_rate": 1.1369694483813893e-06, "loss": 0.1483, "step": 11011 }, { "epoch": 0.79, "grad_norm": 16.524030106707816, "learning_rate": 1.1362340977432584e-06, "loss": 0.6287, "step": 11012 }, { "epoch": 0.79, "grad_norm": 1.5272315422516722, "learning_rate": 1.1354989544967331e-06, "loss": 0.1953, "step": 11013 }, { "epoch": 0.79, "grad_norm": 4.719434985355951, "learning_rate": 1.134764018681269e-06, "loss": 0.4772, "step": 11014 }, { "epoch": 0.79, "grad_norm": 1.5824412095678229, "learning_rate": 1.1340292903363187e-06, "loss": 0.2163, "step": 11015 }, { "epoch": 0.79, "grad_norm": 1.2391756504547187, "learning_rate": 1.1332947695013158e-06, "loss": 0.151, "step": 11016 }, { "epoch": 0.79, "grad_norm": 1.3345650914400444, "learning_rate": 1.1325604562156877e-06, "loss": 0.1584, "step": 11017 }, { "epoch": 0.79, "grad_norm": 1.244820217111886, "learning_rate": 1.1318263505188504e-06, "loss": 0.1525, "step": 11018 }, { "epoch": 0.79, "grad_norm": 12.945752358335145, "learning_rate": 1.1310924524502082e-06, "loss": 0.6471, "step": 11019 }, { "epoch": 0.79, "grad_norm": 6.178713029249838, "learning_rate": 1.1303587620491513e-06, "loss": 0.6066, "step": 11020 }, { "epoch": 0.79, "grad_norm": 1.3204515922289943, "learning_rate": 1.1296252793550644e-06, "loss": 0.1691, "step": 11021 }, { "epoch": 0.79, "grad_norm": 1.3606917110714518, "learning_rate": 1.1288920044073148e-06, "loss": 0.1978, "step": 11022 }, { "epoch": 0.79, "grad_norm": 1.2969465212871056, "learning_rate": 1.1281589372452644e-06, "loss": 0.1532, "step": 11023 }, { "epoch": 0.79, "grad_norm": 1.1970237613391201, "learning_rate": 1.1274260779082597e-06, "loss": 0.1601, "step": 11024 }, { "epoch": 0.79, "grad_norm": 1.1617692053364916, "learning_rate": 1.1266934264356378e-06, "loss": 0.1424, "step": 11025 }, { "epoch": 0.79, "grad_norm": 1.3346372964204707, "learning_rate": 1.1259609828667246e-06, "loss": 0.1489, "step": 11026 }, { "epoch": 0.79, "grad_norm": 1.3926830046872551, "learning_rate": 1.125228747240837e-06, "loss": 0.1599, "step": 11027 }, { "epoch": 0.79, "grad_norm": 1.8564013714541625, "learning_rate": 1.1244967195972745e-06, "loss": 0.1906, "step": 11028 }, { "epoch": 0.79, "grad_norm": 1.3837937602297297, "learning_rate": 1.1237648999753336e-06, "loss": 0.1689, "step": 11029 }, { "epoch": 0.79, "grad_norm": 1.198689690643117, "learning_rate": 1.1230332884142914e-06, "loss": 0.1326, "step": 11030 }, { "epoch": 0.79, "grad_norm": 1.2956544546503657, "learning_rate": 1.1223018849534217e-06, "loss": 0.2057, "step": 11031 }, { "epoch": 0.79, "grad_norm": 1.4489668151242507, "learning_rate": 1.12157068963198e-06, "loss": 0.1462, "step": 11032 }, { "epoch": 0.79, "grad_norm": 1.201700960788802, "learning_rate": 1.120839702489216e-06, "loss": 0.1626, "step": 11033 }, { "epoch": 0.79, "grad_norm": 1.4109223765732992, "learning_rate": 1.1201089235643653e-06, "loss": 0.1758, "step": 11034 }, { "epoch": 0.79, "grad_norm": 8.202113185419405, "learning_rate": 1.1193783528966552e-06, "loss": 0.6505, "step": 11035 }, { "epoch": 0.79, "grad_norm": 1.3690935597088953, "learning_rate": 1.1186479905252962e-06, "loss": 0.1842, "step": 11036 }, { "epoch": 0.79, "grad_norm": 1.1359073666882549, "learning_rate": 1.1179178364894949e-06, "loss": 0.1423, "step": 11037 }, { "epoch": 0.79, "grad_norm": 1.4926833009133986, "learning_rate": 1.1171878908284406e-06, "loss": 0.1831, "step": 11038 }, { "epoch": 0.79, "grad_norm": 1.2176937435874768, "learning_rate": 1.116458153581314e-06, "loss": 0.1428, "step": 11039 }, { "epoch": 0.79, "grad_norm": 1.3570133071606425, "learning_rate": 1.1157286247872873e-06, "loss": 0.174, "step": 11040 }, { "epoch": 0.79, "grad_norm": 1.3043834952177298, "learning_rate": 1.114999304485515e-06, "loss": 0.1889, "step": 11041 }, { "epoch": 0.79, "grad_norm": 1.4409855169941834, "learning_rate": 1.1142701927151456e-06, "loss": 0.2024, "step": 11042 }, { "epoch": 0.79, "grad_norm": 1.5580958977041677, "learning_rate": 1.1135412895153153e-06, "loss": 0.2257, "step": 11043 }, { "epoch": 0.79, "grad_norm": 1.5224752770619159, "learning_rate": 1.1128125949251494e-06, "loss": 0.1832, "step": 11044 }, { "epoch": 0.79, "grad_norm": 1.3456512075204285, "learning_rate": 1.1120841089837592e-06, "loss": 0.1542, "step": 11045 }, { "epoch": 0.79, "grad_norm": 69.65170279983215, "learning_rate": 1.1113558317302498e-06, "loss": 0.605, "step": 11046 }, { "epoch": 0.79, "grad_norm": 1.4040364388772975, "learning_rate": 1.1106277632037093e-06, "loss": 0.1803, "step": 11047 }, { "epoch": 0.79, "grad_norm": 1.157671243520093, "learning_rate": 1.10989990344322e-06, "loss": 0.1836, "step": 11048 }, { "epoch": 0.79, "grad_norm": 1.278594547886093, "learning_rate": 1.1091722524878478e-06, "loss": 0.1497, "step": 11049 }, { "epoch": 0.79, "grad_norm": 1.2050138216783188, "learning_rate": 1.108444810376652e-06, "loss": 0.1509, "step": 11050 }, { "epoch": 0.79, "grad_norm": 1.3429566855091466, "learning_rate": 1.1077175771486781e-06, "loss": 0.1546, "step": 11051 }, { "epoch": 0.79, "grad_norm": 1.446702395408079, "learning_rate": 1.1069905528429631e-06, "loss": 0.1879, "step": 11052 }, { "epoch": 0.79, "grad_norm": 1.4333995907421395, "learning_rate": 1.1062637374985273e-06, "loss": 0.1997, "step": 11053 }, { "epoch": 0.79, "grad_norm": 1.3774328351601197, "learning_rate": 1.1055371311543872e-06, "loss": 0.198, "step": 11054 }, { "epoch": 0.79, "grad_norm": 1.411625333503697, "learning_rate": 1.10481073384954e-06, "loss": 0.1795, "step": 11055 }, { "epoch": 0.79, "grad_norm": 1.4608589328073691, "learning_rate": 1.1040845456229793e-06, "loss": 0.1963, "step": 11056 }, { "epoch": 0.79, "grad_norm": 1.1713571222911876, "learning_rate": 1.1033585665136814e-06, "loss": 0.1318, "step": 11057 }, { "epoch": 0.79, "grad_norm": 5.3446533914960135, "learning_rate": 1.102632796560615e-06, "loss": 0.6952, "step": 11058 }, { "epoch": 0.79, "grad_norm": 1.22123189629881, "learning_rate": 1.1019072358027366e-06, "loss": 0.1452, "step": 11059 }, { "epoch": 0.79, "grad_norm": 1.4352019369823124, "learning_rate": 1.1011818842789928e-06, "loss": 0.1701, "step": 11060 }, { "epoch": 0.79, "grad_norm": 1.387554273654965, "learning_rate": 1.1004567420283151e-06, "loss": 0.1616, "step": 11061 }, { "epoch": 0.79, "grad_norm": 1.2397862876273045, "learning_rate": 1.0997318090896291e-06, "loss": 0.1504, "step": 11062 }, { "epoch": 0.79, "grad_norm": 1.3753208188851656, "learning_rate": 1.0990070855018426e-06, "loss": 0.1811, "step": 11063 }, { "epoch": 0.79, "grad_norm": 7.638540343584256, "learning_rate": 1.0982825713038596e-06, "loss": 0.6794, "step": 11064 }, { "epoch": 0.79, "grad_norm": 1.323078266007812, "learning_rate": 1.0975582665345664e-06, "loss": 0.1787, "step": 11065 }, { "epoch": 0.79, "grad_norm": 1.270777338925711, "learning_rate": 1.096834171232841e-06, "loss": 0.1659, "step": 11066 }, { "epoch": 0.79, "grad_norm": 1.357225567824931, "learning_rate": 1.0961102854375515e-06, "loss": 0.2232, "step": 11067 }, { "epoch": 0.79, "grad_norm": 1.48766266445047, "learning_rate": 1.0953866091875542e-06, "loss": 0.1977, "step": 11068 }, { "epoch": 0.79, "grad_norm": 1.4504211063509296, "learning_rate": 1.0946631425216897e-06, "loss": 0.1835, "step": 11069 }, { "epoch": 0.79, "grad_norm": 1.3918549185331648, "learning_rate": 1.0939398854787925e-06, "loss": 0.1959, "step": 11070 }, { "epoch": 0.79, "grad_norm": 1.3988013896047897, "learning_rate": 1.093216838097686e-06, "loss": 0.1873, "step": 11071 }, { "epoch": 0.79, "grad_norm": 1.4320590180105313, "learning_rate": 1.0924940004171775e-06, "loss": 0.1955, "step": 11072 }, { "epoch": 0.79, "grad_norm": 1.3737704532253179, "learning_rate": 1.091771372476068e-06, "loss": 0.1582, "step": 11073 }, { "epoch": 0.79, "grad_norm": 1.3656059511609862, "learning_rate": 1.0910489543131442e-06, "loss": 0.2115, "step": 11074 }, { "epoch": 0.79, "grad_norm": 1.3918198500801657, "learning_rate": 1.0903267459671823e-06, "loss": 0.1998, "step": 11075 }, { "epoch": 0.79, "grad_norm": 1.3087138198987212, "learning_rate": 1.0896047474769489e-06, "loss": 0.163, "step": 11076 }, { "epoch": 0.79, "grad_norm": 1.4585104126550326, "learning_rate": 1.088882958881199e-06, "loss": 0.1929, "step": 11077 }, { "epoch": 0.79, "grad_norm": 1.4827821821431055, "learning_rate": 1.0881613802186718e-06, "loss": 0.1761, "step": 11078 }, { "epoch": 0.79, "grad_norm": 5.633022216853365, "learning_rate": 1.0874400115281025e-06, "loss": 0.5729, "step": 11079 }, { "epoch": 0.79, "grad_norm": 1.4014813406281752, "learning_rate": 1.0867188528482087e-06, "loss": 0.1515, "step": 11080 }, { "epoch": 0.79, "grad_norm": 1.345593055023765, "learning_rate": 1.085997904217701e-06, "loss": 0.1966, "step": 11081 }, { "epoch": 0.79, "grad_norm": 1.4623345013369717, "learning_rate": 1.0852771656752753e-06, "loss": 0.1353, "step": 11082 }, { "epoch": 0.79, "grad_norm": 1.181801533691612, "learning_rate": 1.0845566372596185e-06, "loss": 0.1493, "step": 11083 }, { "epoch": 0.79, "grad_norm": 1.5503009645819674, "learning_rate": 1.0838363190094065e-06, "loss": 0.1989, "step": 11084 }, { "epoch": 0.79, "grad_norm": 1.380675386245187, "learning_rate": 1.0831162109633043e-06, "loss": 0.165, "step": 11085 }, { "epoch": 0.79, "grad_norm": 1.4117785828975489, "learning_rate": 1.0823963131599614e-06, "loss": 0.2174, "step": 11086 }, { "epoch": 0.79, "grad_norm": 1.2076115031588082, "learning_rate": 1.0816766256380218e-06, "loss": 0.1603, "step": 11087 }, { "epoch": 0.79, "grad_norm": 1.3552259275329406, "learning_rate": 1.080957148436113e-06, "loss": 0.1364, "step": 11088 }, { "epoch": 0.79, "grad_norm": 1.321282782357995, "learning_rate": 1.0802378815928565e-06, "loss": 0.1669, "step": 11089 }, { "epoch": 0.79, "grad_norm": 1.1991648110365258, "learning_rate": 1.0795188251468569e-06, "loss": 0.1421, "step": 11090 }, { "epoch": 0.79, "grad_norm": 1.2779882611443147, "learning_rate": 1.0787999791367126e-06, "loss": 0.1689, "step": 11091 }, { "epoch": 0.79, "grad_norm": 1.2745288583651153, "learning_rate": 1.0780813436010057e-06, "loss": 0.1631, "step": 11092 }, { "epoch": 0.79, "grad_norm": 1.2890327840751468, "learning_rate": 1.077362918578312e-06, "loss": 0.2025, "step": 11093 }, { "epoch": 0.79, "grad_norm": 1.2252457671139332, "learning_rate": 1.076644704107192e-06, "loss": 0.1836, "step": 11094 }, { "epoch": 0.79, "grad_norm": 5.128840446846303, "learning_rate": 1.0759267002261997e-06, "loss": 0.4893, "step": 11095 }, { "epoch": 0.79, "grad_norm": 1.3650289528342299, "learning_rate": 1.0752089069738713e-06, "loss": 0.1574, "step": 11096 }, { "epoch": 0.79, "grad_norm": 1.405440071365288, "learning_rate": 1.0744913243887379e-06, "loss": 0.149, "step": 11097 }, { "epoch": 0.79, "grad_norm": 1.3214571934044328, "learning_rate": 1.0737739525093132e-06, "loss": 0.1634, "step": 11098 }, { "epoch": 0.79, "grad_norm": 1.219696713589709, "learning_rate": 1.0730567913741063e-06, "loss": 0.1502, "step": 11099 }, { "epoch": 0.79, "grad_norm": 4.478619192817674, "learning_rate": 1.0723398410216085e-06, "loss": 0.4887, "step": 11100 }, { "epoch": 0.79, "grad_norm": 1.444602108478056, "learning_rate": 1.0716231014903038e-06, "loss": 0.162, "step": 11101 }, { "epoch": 0.79, "grad_norm": 1.3337560119010048, "learning_rate": 1.070906572818665e-06, "loss": 0.1863, "step": 11102 }, { "epoch": 0.79, "grad_norm": 1.3724028626408107, "learning_rate": 1.0701902550451516e-06, "loss": 0.1607, "step": 11103 }, { "epoch": 0.79, "grad_norm": 1.3776270531710393, "learning_rate": 1.0694741482082144e-06, "loss": 0.1654, "step": 11104 }, { "epoch": 0.79, "grad_norm": 1.2827369049576012, "learning_rate": 1.0687582523462887e-06, "loss": 0.1689, "step": 11105 }, { "epoch": 0.79, "grad_norm": 5.843595525987163, "learning_rate": 1.0680425674978029e-06, "loss": 0.5041, "step": 11106 }, { "epoch": 0.79, "grad_norm": 6.59237598848215, "learning_rate": 1.0673270937011703e-06, "loss": 0.5283, "step": 11107 }, { "epoch": 0.79, "grad_norm": 1.1169470440021918, "learning_rate": 1.066611830994797e-06, "loss": 0.1491, "step": 11108 }, { "epoch": 0.79, "grad_norm": 1.3723220396336342, "learning_rate": 1.0658967794170721e-06, "loss": 0.197, "step": 11109 }, { "epoch": 0.79, "grad_norm": 1.459623452660205, "learning_rate": 1.0651819390063794e-06, "loss": 0.188, "step": 11110 }, { "epoch": 0.79, "grad_norm": 1.1663476028625093, "learning_rate": 1.0644673098010878e-06, "loss": 0.169, "step": 11111 }, { "epoch": 0.79, "grad_norm": 1.2179626154463086, "learning_rate": 1.0637528918395572e-06, "loss": 0.156, "step": 11112 }, { "epoch": 0.79, "grad_norm": 1.292982170832504, "learning_rate": 1.0630386851601327e-06, "loss": 0.1779, "step": 11113 }, { "epoch": 0.79, "grad_norm": 4.99203663331111, "learning_rate": 1.0623246898011514e-06, "loss": 0.6062, "step": 11114 }, { "epoch": 0.8, "grad_norm": 1.3551704713085768, "learning_rate": 1.0616109058009361e-06, "loss": 0.1375, "step": 11115 }, { "epoch": 0.8, "grad_norm": 1.3534551921940863, "learning_rate": 1.0608973331978028e-06, "loss": 0.169, "step": 11116 }, { "epoch": 0.8, "grad_norm": 1.0725766827610275, "learning_rate": 1.0601839720300495e-06, "loss": 0.1332, "step": 11117 }, { "epoch": 0.8, "grad_norm": 1.28929409911985, "learning_rate": 1.059470822335969e-06, "loss": 0.1583, "step": 11118 }, { "epoch": 0.8, "grad_norm": 1.5052511454209667, "learning_rate": 1.0587578841538392e-06, "loss": 0.2082, "step": 11119 }, { "epoch": 0.8, "grad_norm": 1.2006494247886867, "learning_rate": 1.0580451575219304e-06, "loss": 0.1316, "step": 11120 }, { "epoch": 0.8, "grad_norm": 1.3878190942866944, "learning_rate": 1.0573326424784957e-06, "loss": 0.1848, "step": 11121 }, { "epoch": 0.8, "grad_norm": 1.3941424850751762, "learning_rate": 1.0566203390617823e-06, "loss": 0.1956, "step": 11122 }, { "epoch": 0.8, "grad_norm": 1.477315206105067, "learning_rate": 1.0559082473100218e-06, "loss": 0.2041, "step": 11123 }, { "epoch": 0.8, "grad_norm": 1.3398934024864189, "learning_rate": 1.0551963672614385e-06, "loss": 0.1624, "step": 11124 }, { "epoch": 0.8, "grad_norm": 1.101342489803704, "learning_rate": 1.0544846989542413e-06, "loss": 0.1433, "step": 11125 }, { "epoch": 0.8, "grad_norm": 1.5011513538676027, "learning_rate": 1.0537732424266311e-06, "loss": 0.2143, "step": 11126 }, { "epoch": 0.8, "grad_norm": 1.230979814473399, "learning_rate": 1.0530619977167949e-06, "loss": 0.126, "step": 11127 }, { "epoch": 0.8, "grad_norm": 1.435994810853229, "learning_rate": 1.0523509648629126e-06, "loss": 0.1897, "step": 11128 }, { "epoch": 0.8, "grad_norm": 1.1856642966476643, "learning_rate": 1.0516401439031453e-06, "loss": 0.158, "step": 11129 }, { "epoch": 0.8, "grad_norm": 1.4782973294687232, "learning_rate": 1.0509295348756503e-06, "loss": 0.2386, "step": 11130 }, { "epoch": 0.8, "grad_norm": 1.5197555220442451, "learning_rate": 1.0502191378185684e-06, "loss": 0.1847, "step": 11131 }, { "epoch": 0.8, "grad_norm": 1.3630595073131238, "learning_rate": 1.0495089527700325e-06, "loss": 0.1901, "step": 11132 }, { "epoch": 0.8, "grad_norm": 6.015952216179803, "learning_rate": 1.0487989797681602e-06, "loss": 0.5251, "step": 11133 }, { "epoch": 0.8, "grad_norm": 6.386678523195115, "learning_rate": 1.048089218851061e-06, "loss": 0.4171, "step": 11134 }, { "epoch": 0.8, "grad_norm": 1.2609467831012091, "learning_rate": 1.0473796700568328e-06, "loss": 0.15, "step": 11135 }, { "epoch": 0.8, "grad_norm": 1.3926719380068098, "learning_rate": 1.046670333423561e-06, "loss": 0.1668, "step": 11136 }, { "epoch": 0.8, "grad_norm": 1.2421880562324485, "learning_rate": 1.0459612089893217e-06, "loss": 0.1724, "step": 11137 }, { "epoch": 0.8, "grad_norm": 1.5600975396715222, "learning_rate": 1.0452522967921742e-06, "loss": 0.1893, "step": 11138 }, { "epoch": 0.8, "grad_norm": 1.5979140395794278, "learning_rate": 1.0445435968701733e-06, "loss": 0.1927, "step": 11139 }, { "epoch": 0.8, "grad_norm": 1.3501946618131464, "learning_rate": 1.043835109261357e-06, "loss": 0.175, "step": 11140 }, { "epoch": 0.8, "grad_norm": 1.2399340308141287, "learning_rate": 1.0431268340037565e-06, "loss": 0.1642, "step": 11141 }, { "epoch": 0.8, "grad_norm": 1.4959960914038462, "learning_rate": 1.0424187711353861e-06, "loss": 0.1607, "step": 11142 }, { "epoch": 0.8, "grad_norm": 1.5200543089606962, "learning_rate": 1.0417109206942533e-06, "loss": 0.198, "step": 11143 }, { "epoch": 0.8, "grad_norm": 1.3721799523865998, "learning_rate": 1.041003282718353e-06, "loss": 0.184, "step": 11144 }, { "epoch": 0.8, "grad_norm": 1.3275147752498082, "learning_rate": 1.0402958572456695e-06, "loss": 0.1808, "step": 11145 }, { "epoch": 0.8, "grad_norm": 1.4089808414788942, "learning_rate": 1.0395886443141718e-06, "loss": 0.1857, "step": 11146 }, { "epoch": 0.8, "grad_norm": 1.74194148913727, "learning_rate": 1.0388816439618232e-06, "loss": 0.2455, "step": 11147 }, { "epoch": 0.8, "grad_norm": 1.2131626198331975, "learning_rate": 1.0381748562265704e-06, "loss": 0.1161, "step": 11148 }, { "epoch": 0.8, "grad_norm": 1.2857279175384386, "learning_rate": 1.0374682811463527e-06, "loss": 0.1653, "step": 11149 }, { "epoch": 0.8, "grad_norm": 1.3249149161539553, "learning_rate": 1.0367619187590939e-06, "loss": 0.1363, "step": 11150 }, { "epoch": 0.8, "grad_norm": 1.4291198725742187, "learning_rate": 1.0360557691027107e-06, "loss": 0.1885, "step": 11151 }, { "epoch": 0.8, "grad_norm": 3.5946607914372644, "learning_rate": 1.0353498322151057e-06, "loss": 0.4811, "step": 11152 }, { "epoch": 0.8, "grad_norm": 1.2488817922310351, "learning_rate": 1.0346441081341719e-06, "loss": 0.1577, "step": 11153 }, { "epoch": 0.8, "grad_norm": 1.184795198872465, "learning_rate": 1.0339385968977878e-06, "loss": 0.1585, "step": 11154 }, { "epoch": 0.8, "grad_norm": 1.4709592961050704, "learning_rate": 1.0332332985438248e-06, "loss": 0.2251, "step": 11155 }, { "epoch": 0.8, "grad_norm": 1.166193031134425, "learning_rate": 1.0325282131101382e-06, "loss": 0.1486, "step": 11156 }, { "epoch": 0.8, "grad_norm": 1.4222976848670972, "learning_rate": 1.0318233406345763e-06, "loss": 0.1652, "step": 11157 }, { "epoch": 0.8, "grad_norm": 1.3450064486780235, "learning_rate": 1.031118681154971e-06, "loss": 0.1901, "step": 11158 }, { "epoch": 0.8, "grad_norm": 1.5101419180734172, "learning_rate": 1.0304142347091477e-06, "loss": 0.1747, "step": 11159 }, { "epoch": 0.8, "grad_norm": 1.250474933122123, "learning_rate": 1.0297100013349181e-06, "loss": 0.1483, "step": 11160 }, { "epoch": 0.8, "grad_norm": 1.347034506360916, "learning_rate": 1.0290059810700841e-06, "loss": 0.1706, "step": 11161 }, { "epoch": 0.8, "grad_norm": 1.3282864529891576, "learning_rate": 1.0283021739524313e-06, "loss": 0.1812, "step": 11162 }, { "epoch": 0.8, "grad_norm": 1.3180546110488653, "learning_rate": 1.0275985800197408e-06, "loss": 0.1522, "step": 11163 }, { "epoch": 0.8, "grad_norm": 6.674186260902461, "learning_rate": 1.0268951993097753e-06, "loss": 0.5349, "step": 11164 }, { "epoch": 0.8, "grad_norm": 1.4010563696600737, "learning_rate": 1.0261920318602924e-06, "loss": 0.1947, "step": 11165 }, { "epoch": 0.8, "grad_norm": 1.2543933809436032, "learning_rate": 1.0254890777090331e-06, "loss": 0.1673, "step": 11166 }, { "epoch": 0.8, "grad_norm": 1.4006686664352774, "learning_rate": 1.0247863368937306e-06, "loss": 0.1905, "step": 11167 }, { "epoch": 0.8, "grad_norm": 1.259052746900076, "learning_rate": 1.024083809452104e-06, "loss": 0.1598, "step": 11168 }, { "epoch": 0.8, "grad_norm": 1.3489387450139074, "learning_rate": 1.0233814954218634e-06, "loss": 0.182, "step": 11169 }, { "epoch": 0.8, "grad_norm": 1.3618575760577665, "learning_rate": 1.022679394840707e-06, "loss": 0.1602, "step": 11170 }, { "epoch": 0.8, "grad_norm": 1.2222195453749545, "learning_rate": 1.021977507746319e-06, "loss": 0.154, "step": 11171 }, { "epoch": 0.8, "grad_norm": 1.2776335246185315, "learning_rate": 1.0212758341763752e-06, "loss": 0.1625, "step": 11172 }, { "epoch": 0.8, "grad_norm": 1.3360590491440565, "learning_rate": 1.020574374168537e-06, "loss": 0.1679, "step": 11173 }, { "epoch": 0.8, "grad_norm": 1.401117924397328, "learning_rate": 1.019873127760458e-06, "loss": 0.1708, "step": 11174 }, { "epoch": 0.8, "grad_norm": 1.0886023085883567, "learning_rate": 1.019172094989776e-06, "loss": 0.1457, "step": 11175 }, { "epoch": 0.8, "grad_norm": 4.932938684432314, "learning_rate": 1.0184712758941212e-06, "loss": 0.6062, "step": 11176 }, { "epoch": 0.8, "grad_norm": 1.3562633741420573, "learning_rate": 1.0177706705111106e-06, "loss": 0.1602, "step": 11177 }, { "epoch": 0.8, "grad_norm": 6.662255864120727, "learning_rate": 1.0170702788783509e-06, "loss": 0.5371, "step": 11178 }, { "epoch": 0.8, "grad_norm": 1.2162021078672893, "learning_rate": 1.0163701010334343e-06, "loss": 0.1283, "step": 11179 }, { "epoch": 0.8, "grad_norm": 1.4955097136095414, "learning_rate": 1.0156701370139454e-06, "loss": 0.1915, "step": 11180 }, { "epoch": 0.8, "grad_norm": 1.2831355204712909, "learning_rate": 1.0149703868574533e-06, "loss": 0.1446, "step": 11181 }, { "epoch": 0.8, "grad_norm": 1.470990025490076, "learning_rate": 1.0142708506015208e-06, "loss": 0.158, "step": 11182 }, { "epoch": 0.8, "grad_norm": 1.3280912515408867, "learning_rate": 1.0135715282836933e-06, "loss": 0.1816, "step": 11183 }, { "epoch": 0.8, "grad_norm": 1.370992289139157, "learning_rate": 1.0128724199415092e-06, "loss": 0.1427, "step": 11184 }, { "epoch": 0.8, "grad_norm": 4.35704111104011, "learning_rate": 1.0121735256124925e-06, "loss": 0.5628, "step": 11185 }, { "epoch": 0.8, "grad_norm": 1.2724302697056273, "learning_rate": 1.0114748453341605e-06, "loss": 0.1295, "step": 11186 }, { "epoch": 0.8, "grad_norm": 1.3611937819286883, "learning_rate": 1.0107763791440118e-06, "loss": 0.1524, "step": 11187 }, { "epoch": 0.8, "grad_norm": 1.4750807001387294, "learning_rate": 1.0100781270795396e-06, "loss": 0.2131, "step": 11188 }, { "epoch": 0.8, "grad_norm": 1.4206421569999124, "learning_rate": 1.0093800891782214e-06, "loss": 0.181, "step": 11189 }, { "epoch": 0.8, "grad_norm": 1.2959495947946629, "learning_rate": 1.0086822654775274e-06, "loss": 0.1459, "step": 11190 }, { "epoch": 0.8, "grad_norm": 1.4925786311774873, "learning_rate": 1.0079846560149115e-06, "loss": 0.1727, "step": 11191 }, { "epoch": 0.8, "grad_norm": 1.3296792006644287, "learning_rate": 1.0072872608278206e-06, "loss": 0.1878, "step": 11192 }, { "epoch": 0.8, "grad_norm": 1.3846123085232542, "learning_rate": 1.0065900799536866e-06, "loss": 0.209, "step": 11193 }, { "epoch": 0.8, "grad_norm": 1.4740125596429086, "learning_rate": 1.0058931134299322e-06, "loss": 0.1693, "step": 11194 }, { "epoch": 0.8, "grad_norm": 6.276414210041604, "learning_rate": 1.005196361293967e-06, "loss": 0.5279, "step": 11195 }, { "epoch": 0.8, "grad_norm": 1.2576006925643655, "learning_rate": 1.0044998235831927e-06, "loss": 0.1605, "step": 11196 }, { "epoch": 0.8, "grad_norm": 1.749871280992744, "learning_rate": 1.0038035003349927e-06, "loss": 0.2544, "step": 11197 }, { "epoch": 0.8, "grad_norm": 1.1688265932300483, "learning_rate": 1.003107391586745e-06, "loss": 0.1449, "step": 11198 }, { "epoch": 0.8, "grad_norm": 1.3952724008586497, "learning_rate": 1.0024114973758154e-06, "loss": 0.1621, "step": 11199 }, { "epoch": 0.8, "grad_norm": 1.1960062898824, "learning_rate": 1.0017158177395531e-06, "loss": 0.1468, "step": 11200 }, { "epoch": 0.8, "grad_norm": 1.429594431442411, "learning_rate": 1.0010203527153034e-06, "loss": 0.204, "step": 11201 }, { "epoch": 0.8, "grad_norm": 1.5348639876914003, "learning_rate": 1.000325102340392e-06, "loss": 0.2015, "step": 11202 }, { "epoch": 0.8, "grad_norm": 1.3688547526533104, "learning_rate": 9.996300666521397e-07, "loss": 0.205, "step": 11203 }, { "epoch": 0.8, "grad_norm": 1.4346566499001738, "learning_rate": 9.989352456878525e-07, "loss": 0.1699, "step": 11204 }, { "epoch": 0.8, "grad_norm": 1.3940865477919593, "learning_rate": 9.982406394848276e-07, "loss": 0.1598, "step": 11205 }, { "epoch": 0.8, "grad_norm": 1.3621843413009298, "learning_rate": 9.975462480803456e-07, "loss": 0.1691, "step": 11206 }, { "epoch": 0.8, "grad_norm": 1.1117209091608427, "learning_rate": 9.968520715116808e-07, "loss": 0.1443, "step": 11207 }, { "epoch": 0.8, "grad_norm": 1.3576463957783502, "learning_rate": 9.961581098160928e-07, "loss": 0.1776, "step": 11208 }, { "epoch": 0.8, "grad_norm": 1.407979831960541, "learning_rate": 9.954643630308318e-07, "loss": 0.1924, "step": 11209 }, { "epoch": 0.8, "grad_norm": 1.378835060706294, "learning_rate": 9.947708311931332e-07, "loss": 0.1883, "step": 11210 }, { "epoch": 0.8, "grad_norm": 1.225115649224681, "learning_rate": 9.940775143402249e-07, "loss": 0.1461, "step": 11211 }, { "epoch": 0.8, "grad_norm": 5.83665480730355, "learning_rate": 9.933844125093206e-07, "loss": 0.5009, "step": 11212 }, { "epoch": 0.8, "grad_norm": 1.29434453618675, "learning_rate": 9.926915257376252e-07, "loss": 0.1538, "step": 11213 }, { "epoch": 0.8, "grad_norm": 1.3769176590798835, "learning_rate": 9.919988540623264e-07, "loss": 0.1624, "step": 11214 }, { "epoch": 0.8, "grad_norm": 1.2728529246022755, "learning_rate": 9.913063975206082e-07, "loss": 0.1814, "step": 11215 }, { "epoch": 0.8, "grad_norm": 1.3294890564974637, "learning_rate": 9.906141561496358e-07, "loss": 0.1757, "step": 11216 }, { "epoch": 0.8, "grad_norm": 1.309861456377614, "learning_rate": 9.89922129986568e-07, "loss": 0.1602, "step": 11217 }, { "epoch": 0.8, "grad_norm": 1.5564195564796117, "learning_rate": 9.892303190685476e-07, "loss": 0.2072, "step": 11218 }, { "epoch": 0.8, "grad_norm": 1.3078303661955493, "learning_rate": 9.885387234327099e-07, "loss": 0.1883, "step": 11219 }, { "epoch": 0.8, "grad_norm": 1.3431925986799484, "learning_rate": 9.878473431161767e-07, "loss": 0.1252, "step": 11220 }, { "epoch": 0.8, "grad_norm": 1.4352237959940475, "learning_rate": 9.871561781560606e-07, "loss": 0.1584, "step": 11221 }, { "epoch": 0.8, "grad_norm": 1.3538200219329861, "learning_rate": 9.864652285894567e-07, "loss": 0.1889, "step": 11222 }, { "epoch": 0.8, "grad_norm": 1.4755556689624574, "learning_rate": 9.857744944534559e-07, "loss": 0.2228, "step": 11223 }, { "epoch": 0.8, "grad_norm": 1.4599485264010472, "learning_rate": 9.85083975785131e-07, "loss": 0.1875, "step": 11224 }, { "epoch": 0.8, "grad_norm": 1.33836799638902, "learning_rate": 9.843936726215498e-07, "loss": 0.1946, "step": 11225 }, { "epoch": 0.8, "grad_norm": 1.151640439349557, "learning_rate": 9.837035849997611e-07, "loss": 0.1372, "step": 11226 }, { "epoch": 0.8, "grad_norm": 1.2368020370743107, "learning_rate": 9.830137129568086e-07, "loss": 0.149, "step": 11227 }, { "epoch": 0.8, "grad_norm": 1.5137469651441258, "learning_rate": 9.82324056529721e-07, "loss": 0.2038, "step": 11228 }, { "epoch": 0.8, "grad_norm": 1.4635798524396362, "learning_rate": 9.816346157555184e-07, "loss": 0.1866, "step": 11229 }, { "epoch": 0.8, "grad_norm": 1.3918238980125193, "learning_rate": 9.80945390671204e-07, "loss": 0.1758, "step": 11230 }, { "epoch": 0.8, "grad_norm": 1.3695709245431835, "learning_rate": 9.802563813137738e-07, "loss": 0.1501, "step": 11231 }, { "epoch": 0.8, "grad_norm": 1.4978861349992698, "learning_rate": 9.795675877202132e-07, "loss": 0.2177, "step": 11232 }, { "epoch": 0.8, "grad_norm": 4.548952476180618, "learning_rate": 9.78879009927491e-07, "loss": 0.346, "step": 11233 }, { "epoch": 0.8, "grad_norm": 1.2739197322761147, "learning_rate": 9.781906479725688e-07, "loss": 0.1628, "step": 11234 }, { "epoch": 0.8, "grad_norm": 1.4346648445233583, "learning_rate": 9.775025018923946e-07, "loss": 0.1784, "step": 11235 }, { "epoch": 0.8, "grad_norm": 1.1356241160994607, "learning_rate": 9.768145717239052e-07, "loss": 0.1552, "step": 11236 }, { "epoch": 0.8, "grad_norm": 1.4521472336792134, "learning_rate": 9.76126857504026e-07, "loss": 0.1795, "step": 11237 }, { "epoch": 0.8, "grad_norm": 1.335878244633931, "learning_rate": 9.754393592696732e-07, "loss": 0.1864, "step": 11238 }, { "epoch": 0.8, "grad_norm": 1.110333520921348, "learning_rate": 9.747520770577447e-07, "loss": 0.1605, "step": 11239 }, { "epoch": 0.8, "grad_norm": 1.1232923553732563, "learning_rate": 9.740650109051348e-07, "loss": 0.1665, "step": 11240 }, { "epoch": 0.8, "grad_norm": 1.264057476381941, "learning_rate": 9.733781608487196e-07, "loss": 0.166, "step": 11241 }, { "epoch": 0.8, "grad_norm": 1.1511081259839442, "learning_rate": 9.726915269253695e-07, "loss": 0.1377, "step": 11242 }, { "epoch": 0.8, "grad_norm": 1.3396523826975002, "learning_rate": 9.720051091719369e-07, "loss": 0.1467, "step": 11243 }, { "epoch": 0.8, "grad_norm": 1.3083494937943594, "learning_rate": 9.713189076252676e-07, "loss": 0.1532, "step": 11244 }, { "epoch": 0.8, "grad_norm": 1.3724053453700986, "learning_rate": 9.706329223221945e-07, "loss": 0.1816, "step": 11245 }, { "epoch": 0.8, "grad_norm": 1.3597827551168278, "learning_rate": 9.699471532995397e-07, "loss": 0.1976, "step": 11246 }, { "epoch": 0.8, "grad_norm": 1.2291124590731324, "learning_rate": 9.692616005941098e-07, "loss": 0.1569, "step": 11247 }, { "epoch": 0.8, "grad_norm": 1.3197276477827402, "learning_rate": 9.685762642427048e-07, "loss": 0.1861, "step": 11248 }, { "epoch": 0.8, "grad_norm": 1.4975893590509837, "learning_rate": 9.678911442821087e-07, "loss": 0.1603, "step": 11249 }, { "epoch": 0.8, "grad_norm": 1.402325256491505, "learning_rate": 9.67206240749099e-07, "loss": 0.1542, "step": 11250 }, { "epoch": 0.8, "grad_norm": 1.2314936044579967, "learning_rate": 9.66521553680435e-07, "loss": 0.1485, "step": 11251 }, { "epoch": 0.8, "grad_norm": 1.362892631002434, "learning_rate": 9.658370831128704e-07, "loss": 0.1634, "step": 11252 }, { "epoch": 0.8, "grad_norm": 9.579815020615234, "learning_rate": 9.65152829083144e-07, "loss": 0.5307, "step": 11253 }, { "epoch": 0.81, "grad_norm": 1.305906845943606, "learning_rate": 9.644687916279854e-07, "loss": 0.147, "step": 11254 }, { "epoch": 0.81, "grad_norm": 1.3556620853025283, "learning_rate": 9.63784970784109e-07, "loss": 0.1523, "step": 11255 }, { "epoch": 0.81, "grad_norm": 1.4651132255342085, "learning_rate": 9.63101366588221e-07, "loss": 0.1841, "step": 11256 }, { "epoch": 0.81, "grad_norm": 1.40862887362798, "learning_rate": 9.624179790770126e-07, "loss": 0.1672, "step": 11257 }, { "epoch": 0.81, "grad_norm": 1.282911274062553, "learning_rate": 9.61734808287168e-07, "loss": 0.142, "step": 11258 }, { "epoch": 0.81, "grad_norm": 1.352214193267794, "learning_rate": 9.610518542553549e-07, "loss": 0.1798, "step": 11259 }, { "epoch": 0.81, "grad_norm": 1.2273075770141646, "learning_rate": 9.603691170182316e-07, "loss": 0.1727, "step": 11260 }, { "epoch": 0.81, "grad_norm": 1.3260003560402982, "learning_rate": 9.596865966124463e-07, "loss": 0.1816, "step": 11261 }, { "epoch": 0.81, "grad_norm": 1.2908776147626249, "learning_rate": 9.590042930746323e-07, "loss": 0.1493, "step": 11262 }, { "epoch": 0.81, "grad_norm": 1.331236886220097, "learning_rate": 9.58322206441416e-07, "loss": 0.2001, "step": 11263 }, { "epoch": 0.81, "grad_norm": 1.3262849837999742, "learning_rate": 9.576403367494054e-07, "loss": 0.1562, "step": 11264 }, { "epoch": 0.81, "grad_norm": 1.4402669358174165, "learning_rate": 9.56958684035203e-07, "loss": 0.1695, "step": 11265 }, { "epoch": 0.81, "grad_norm": 1.382882190497153, "learning_rate": 9.562772483353949e-07, "loss": 0.1925, "step": 11266 }, { "epoch": 0.81, "grad_norm": 1.3225653988588857, "learning_rate": 9.555960296865614e-07, "loss": 0.1717, "step": 11267 }, { "epoch": 0.81, "grad_norm": 1.4205788326676936, "learning_rate": 9.549150281252633e-07, "loss": 0.2058, "step": 11268 }, { "epoch": 0.81, "grad_norm": 1.4707299829483176, "learning_rate": 9.542342436880564e-07, "loss": 0.1964, "step": 11269 }, { "epoch": 0.81, "grad_norm": 1.2425641157829774, "learning_rate": 9.535536764114827e-07, "loss": 0.1819, "step": 11270 }, { "epoch": 0.81, "grad_norm": 1.3950987545298927, "learning_rate": 9.528733263320727e-07, "loss": 0.1512, "step": 11271 }, { "epoch": 0.81, "grad_norm": 1.1862549024993225, "learning_rate": 9.521931934863432e-07, "loss": 0.1455, "step": 11272 }, { "epoch": 0.81, "grad_norm": 1.4972549067220295, "learning_rate": 9.515132779108033e-07, "loss": 0.1728, "step": 11273 }, { "epoch": 0.81, "grad_norm": 1.3870091575373642, "learning_rate": 9.508335796419454e-07, "loss": 0.198, "step": 11274 }, { "epoch": 0.81, "grad_norm": 1.3137581405691436, "learning_rate": 9.501540987162561e-07, "loss": 0.176, "step": 11275 }, { "epoch": 0.81, "grad_norm": 1.3449511853136782, "learning_rate": 9.494748351702038e-07, "loss": 0.1858, "step": 11276 }, { "epoch": 0.81, "grad_norm": 7.4978959231228535, "learning_rate": 9.48795789040251e-07, "loss": 0.4166, "step": 11277 }, { "epoch": 0.81, "grad_norm": 1.2963616011920063, "learning_rate": 9.481169603628454e-07, "loss": 0.1482, "step": 11278 }, { "epoch": 0.81, "grad_norm": 1.1360199419642687, "learning_rate": 9.474383491744255e-07, "loss": 0.1395, "step": 11279 }, { "epoch": 0.81, "grad_norm": 1.3867549606824945, "learning_rate": 9.467599555114137e-07, "loss": 0.1863, "step": 11280 }, { "epoch": 0.81, "grad_norm": 1.4013565827116152, "learning_rate": 9.460817794102262e-07, "loss": 0.1515, "step": 11281 }, { "epoch": 0.81, "grad_norm": 1.215970843830103, "learning_rate": 9.454038209072619e-07, "loss": 0.141, "step": 11282 }, { "epoch": 0.81, "grad_norm": 1.1888310603015435, "learning_rate": 9.447260800389135e-07, "loss": 0.1565, "step": 11283 }, { "epoch": 0.81, "grad_norm": 8.44893397468075, "learning_rate": 9.440485568415575e-07, "loss": 0.5472, "step": 11284 }, { "epoch": 0.81, "grad_norm": 1.3624428742966304, "learning_rate": 9.433712513515624e-07, "loss": 0.1868, "step": 11285 }, { "epoch": 0.81, "grad_norm": 1.2468534220331826, "learning_rate": 9.426941636052805e-07, "loss": 0.1527, "step": 11286 }, { "epoch": 0.81, "grad_norm": 1.308190581144608, "learning_rate": 9.420172936390592e-07, "loss": 0.1533, "step": 11287 }, { "epoch": 0.81, "grad_norm": 1.4393027433743304, "learning_rate": 9.413406414892268e-07, "loss": 0.1708, "step": 11288 }, { "epoch": 0.81, "grad_norm": 1.2942717252069884, "learning_rate": 9.406642071921062e-07, "loss": 0.2029, "step": 11289 }, { "epoch": 0.81, "grad_norm": 1.526307482877949, "learning_rate": 9.39987990784002e-07, "loss": 0.1905, "step": 11290 }, { "epoch": 0.81, "grad_norm": 1.3918439999240453, "learning_rate": 9.39311992301215e-07, "loss": 0.1627, "step": 11291 }, { "epoch": 0.81, "grad_norm": 1.2534505998061605, "learning_rate": 9.386362117800262e-07, "loss": 0.1401, "step": 11292 }, { "epoch": 0.81, "grad_norm": 1.3368209071596542, "learning_rate": 9.379606492567122e-07, "loss": 0.1587, "step": 11293 }, { "epoch": 0.81, "grad_norm": 1.27340979791407, "learning_rate": 9.372853047675318e-07, "loss": 0.203, "step": 11294 }, { "epoch": 0.81, "grad_norm": 1.102416515403265, "learning_rate": 9.366101783487352e-07, "loss": 0.1283, "step": 11295 }, { "epoch": 0.81, "grad_norm": 1.4825521752301705, "learning_rate": 9.35935270036562e-07, "loss": 0.1869, "step": 11296 }, { "epoch": 0.81, "grad_norm": 1.3643023559245617, "learning_rate": 9.352605798672377e-07, "loss": 0.2059, "step": 11297 }, { "epoch": 0.81, "grad_norm": 1.4065346423057608, "learning_rate": 9.34586107876978e-07, "loss": 0.1707, "step": 11298 }, { "epoch": 0.81, "grad_norm": 1.4342465639184196, "learning_rate": 9.339118541019837e-07, "loss": 0.1836, "step": 11299 }, { "epoch": 0.81, "grad_norm": 1.2939700053869756, "learning_rate": 9.332378185784491e-07, "loss": 0.2016, "step": 11300 }, { "epoch": 0.81, "grad_norm": 1.4438399744559105, "learning_rate": 9.325640013425502e-07, "loss": 0.183, "step": 11301 }, { "epoch": 0.81, "grad_norm": 1.4061487188463309, "learning_rate": 9.318904024304587e-07, "loss": 0.1844, "step": 11302 }, { "epoch": 0.81, "grad_norm": 1.5700146340377061, "learning_rate": 9.312170218783268e-07, "loss": 0.251, "step": 11303 }, { "epoch": 0.81, "grad_norm": 1.2820168252746016, "learning_rate": 9.30543859722301e-07, "loss": 0.1501, "step": 11304 }, { "epoch": 0.81, "grad_norm": 6.6487025389185614, "learning_rate": 9.298709159985131e-07, "loss": 0.591, "step": 11305 }, { "epoch": 0.81, "grad_norm": 1.4385275887805435, "learning_rate": 9.291981907430864e-07, "loss": 0.185, "step": 11306 }, { "epoch": 0.81, "grad_norm": 1.4779126918065233, "learning_rate": 9.285256839921275e-07, "loss": 0.2095, "step": 11307 }, { "epoch": 0.81, "grad_norm": 1.3975371211484617, "learning_rate": 9.278533957817354e-07, "loss": 0.1601, "step": 11308 }, { "epoch": 0.81, "grad_norm": 1.3515387290452283, "learning_rate": 9.271813261479945e-07, "loss": 0.1824, "step": 11309 }, { "epoch": 0.81, "grad_norm": 1.3197301239519237, "learning_rate": 9.265094751269804e-07, "loss": 0.1637, "step": 11310 }, { "epoch": 0.81, "grad_norm": 1.4255502567030556, "learning_rate": 9.258378427547532e-07, "loss": 0.1857, "step": 11311 }, { "epoch": 0.81, "grad_norm": 5.734510015504786, "learning_rate": 9.251664290673651e-07, "loss": 0.5718, "step": 11312 }, { "epoch": 0.81, "grad_norm": 1.1925586206748733, "learning_rate": 9.244952341008545e-07, "loss": 0.1304, "step": 11313 }, { "epoch": 0.81, "grad_norm": 1.3335609167361322, "learning_rate": 9.238242578912505e-07, "loss": 0.1987, "step": 11314 }, { "epoch": 0.81, "grad_norm": 1.239442218519999, "learning_rate": 9.231535004745645e-07, "loss": 0.1804, "step": 11315 }, { "epoch": 0.81, "grad_norm": 1.1431423655919553, "learning_rate": 9.224829618868037e-07, "loss": 0.1191, "step": 11316 }, { "epoch": 0.81, "grad_norm": 1.3757847730555146, "learning_rate": 9.218126421639572e-07, "loss": 0.1862, "step": 11317 }, { "epoch": 0.81, "grad_norm": 5.275887073863003, "learning_rate": 9.211425413420072e-07, "loss": 0.5805, "step": 11318 }, { "epoch": 0.81, "grad_norm": 1.3749864353155, "learning_rate": 9.204726594569202e-07, "loss": 0.1439, "step": 11319 }, { "epoch": 0.81, "grad_norm": 1.3130767003706008, "learning_rate": 9.198029965446537e-07, "loss": 0.1541, "step": 11320 }, { "epoch": 0.81, "grad_norm": 1.4830886576452098, "learning_rate": 9.191335526411527e-07, "loss": 0.2086, "step": 11321 }, { "epoch": 0.81, "grad_norm": 1.545185840944322, "learning_rate": 9.184643277823512e-07, "loss": 0.1933, "step": 11322 }, { "epoch": 0.81, "grad_norm": 1.186782484786305, "learning_rate": 9.17795322004168e-07, "loss": 0.1504, "step": 11323 }, { "epoch": 0.81, "grad_norm": 1.4537303817645482, "learning_rate": 9.171265353425158e-07, "loss": 0.1636, "step": 11324 }, { "epoch": 0.81, "grad_norm": 1.399512774088883, "learning_rate": 9.164579678332897e-07, "loss": 0.1431, "step": 11325 }, { "epoch": 0.81, "grad_norm": 1.3082206393433835, "learning_rate": 9.157896195123767e-07, "loss": 0.1426, "step": 11326 }, { "epoch": 0.81, "grad_norm": 1.108821635545509, "learning_rate": 9.151214904156525e-07, "loss": 0.1407, "step": 11327 }, { "epoch": 0.81, "grad_norm": 1.330332453553592, "learning_rate": 9.144535805789767e-07, "loss": 0.1744, "step": 11328 }, { "epoch": 0.81, "grad_norm": 1.436775348103306, "learning_rate": 9.137858900382018e-07, "loss": 0.1825, "step": 11329 }, { "epoch": 0.81, "grad_norm": 1.3685097889302205, "learning_rate": 9.13118418829167e-07, "loss": 0.1814, "step": 11330 }, { "epoch": 0.81, "grad_norm": 4.7935673906164675, "learning_rate": 9.124511669877001e-07, "loss": 0.5478, "step": 11331 }, { "epoch": 0.81, "grad_norm": 1.1650898738466133, "learning_rate": 9.117841345496142e-07, "loss": 0.1319, "step": 11332 }, { "epoch": 0.81, "grad_norm": 1.2585985478773416, "learning_rate": 9.111173215507164e-07, "loss": 0.1899, "step": 11333 }, { "epoch": 0.81, "grad_norm": 1.4357708877913766, "learning_rate": 9.104507280267944e-07, "loss": 0.154, "step": 11334 }, { "epoch": 0.81, "grad_norm": 1.3207506081304636, "learning_rate": 9.09784354013632e-07, "loss": 0.1913, "step": 11335 }, { "epoch": 0.81, "grad_norm": 1.5386178379533013, "learning_rate": 9.091181995469945e-07, "loss": 0.1521, "step": 11336 }, { "epoch": 0.81, "grad_norm": 1.4545662678168756, "learning_rate": 9.084522646626398e-07, "loss": 0.158, "step": 11337 }, { "epoch": 0.81, "grad_norm": 7.75007569615903, "learning_rate": 9.077865493963129e-07, "loss": 0.5576, "step": 11338 }, { "epoch": 0.81, "grad_norm": 1.3659008162949513, "learning_rate": 9.071210537837477e-07, "loss": 0.1825, "step": 11339 }, { "epoch": 0.81, "grad_norm": 5.067957807273403, "learning_rate": 9.064557778606631e-07, "loss": 0.4836, "step": 11340 }, { "epoch": 0.81, "grad_norm": 1.1863284559765537, "learning_rate": 9.057907216627703e-07, "loss": 0.1659, "step": 11341 }, { "epoch": 0.81, "grad_norm": 1.329821303008681, "learning_rate": 9.051258852257655e-07, "loss": 0.1701, "step": 11342 }, { "epoch": 0.81, "grad_norm": 1.3625375559348458, "learning_rate": 9.04461268585336e-07, "loss": 0.1865, "step": 11343 }, { "epoch": 0.81, "grad_norm": 1.5490187725445175, "learning_rate": 9.037968717771539e-07, "loss": 0.2056, "step": 11344 }, { "epoch": 0.81, "grad_norm": 1.3031422435958897, "learning_rate": 9.031326948368824e-07, "loss": 0.1886, "step": 11345 }, { "epoch": 0.81, "grad_norm": 1.5742537878036162, "learning_rate": 9.024687378001712e-07, "loss": 0.1866, "step": 11346 }, { "epoch": 0.81, "grad_norm": 1.197228067563145, "learning_rate": 9.018050007026613e-07, "loss": 0.1239, "step": 11347 }, { "epoch": 0.81, "grad_norm": 1.3414003794383405, "learning_rate": 9.011414835799764e-07, "loss": 0.1841, "step": 11348 }, { "epoch": 0.81, "grad_norm": 1.5361535567940363, "learning_rate": 9.004781864677342e-07, "loss": 0.2063, "step": 11349 }, { "epoch": 0.81, "grad_norm": 1.4741188672701016, "learning_rate": 8.99815109401535e-07, "loss": 0.1833, "step": 11350 }, { "epoch": 0.81, "grad_norm": 1.3197866264161884, "learning_rate": 8.991522524169726e-07, "loss": 0.1592, "step": 11351 }, { "epoch": 0.81, "grad_norm": 1.5314598928140957, "learning_rate": 8.984896155496242e-07, "loss": 0.211, "step": 11352 }, { "epoch": 0.81, "grad_norm": 10.295104709775284, "learning_rate": 8.97827198835059e-07, "loss": 0.5759, "step": 11353 }, { "epoch": 0.81, "grad_norm": 1.2320781253601287, "learning_rate": 8.971650023088329e-07, "loss": 0.1293, "step": 11354 }, { "epoch": 0.81, "grad_norm": 19.50510936719855, "learning_rate": 8.965030260064906e-07, "loss": 0.4975, "step": 11355 }, { "epoch": 0.81, "grad_norm": 4.72883765476052, "learning_rate": 8.958412699635627e-07, "loss": 0.5195, "step": 11356 }, { "epoch": 0.81, "grad_norm": 1.377114862755084, "learning_rate": 8.951797342155716e-07, "loss": 0.1801, "step": 11357 }, { "epoch": 0.81, "grad_norm": 1.2830103403679676, "learning_rate": 8.945184187980233e-07, "loss": 0.1922, "step": 11358 }, { "epoch": 0.81, "grad_norm": 1.2304614256782398, "learning_rate": 8.93857323746416e-07, "loss": 0.1532, "step": 11359 }, { "epoch": 0.81, "grad_norm": 1.3953649092550644, "learning_rate": 8.931964490962364e-07, "loss": 0.1463, "step": 11360 }, { "epoch": 0.81, "grad_norm": 1.2127193347416148, "learning_rate": 8.925357948829544e-07, "loss": 0.1451, "step": 11361 }, { "epoch": 0.81, "grad_norm": 1.3575276153551967, "learning_rate": 8.918753611420328e-07, "loss": 0.1868, "step": 11362 }, { "epoch": 0.81, "grad_norm": 12.052468043288988, "learning_rate": 8.912151479089215e-07, "loss": 0.5023, "step": 11363 }, { "epoch": 0.81, "grad_norm": 1.2873481785687018, "learning_rate": 8.905551552190589e-07, "loss": 0.1368, "step": 11364 }, { "epoch": 0.81, "grad_norm": 1.3328600272095767, "learning_rate": 8.898953831078678e-07, "loss": 0.1892, "step": 11365 }, { "epoch": 0.81, "grad_norm": 1.1123152633721056, "learning_rate": 8.892358316107658e-07, "loss": 0.1217, "step": 11366 }, { "epoch": 0.81, "grad_norm": 1.3636489877997258, "learning_rate": 8.885765007631514e-07, "loss": 0.1891, "step": 11367 }, { "epoch": 0.81, "grad_norm": 1.1376326174490177, "learning_rate": 8.879173906004184e-07, "loss": 0.1352, "step": 11368 }, { "epoch": 0.81, "grad_norm": 1.395818120418276, "learning_rate": 8.872585011579416e-07, "loss": 0.1755, "step": 11369 }, { "epoch": 0.81, "grad_norm": 1.281478242564968, "learning_rate": 8.8659983247109e-07, "loss": 0.1797, "step": 11370 }, { "epoch": 0.81, "grad_norm": 1.3361140416471964, "learning_rate": 8.859413845752174e-07, "loss": 0.1534, "step": 11371 }, { "epoch": 0.81, "grad_norm": 1.4126760701955117, "learning_rate": 8.852831575056681e-07, "loss": 0.1629, "step": 11372 }, { "epoch": 0.81, "grad_norm": 1.3519387220846326, "learning_rate": 8.846251512977711e-07, "loss": 0.1957, "step": 11373 }, { "epoch": 0.81, "grad_norm": 10.412770170307391, "learning_rate": 8.839673659868475e-07, "loss": 0.5328, "step": 11374 }, { "epoch": 0.81, "grad_norm": 1.7326474782113603, "learning_rate": 8.833098016082025e-07, "loss": 0.1764, "step": 11375 }, { "epoch": 0.81, "grad_norm": 1.3436427884261881, "learning_rate": 8.826524581971335e-07, "loss": 0.1684, "step": 11376 }, { "epoch": 0.81, "grad_norm": 1.396479269852451, "learning_rate": 8.819953357889221e-07, "loss": 0.1467, "step": 11377 }, { "epoch": 0.81, "grad_norm": 1.5507408245191352, "learning_rate": 8.813384344188414e-07, "loss": 0.1943, "step": 11378 }, { "epoch": 0.81, "grad_norm": 1.3410763796504985, "learning_rate": 8.806817541221507e-07, "loss": 0.1861, "step": 11379 }, { "epoch": 0.81, "grad_norm": 1.3681216249588533, "learning_rate": 8.800252949340998e-07, "loss": 0.1628, "step": 11380 }, { "epoch": 0.81, "grad_norm": 1.2029895125939711, "learning_rate": 8.793690568899216e-07, "loss": 0.1412, "step": 11381 }, { "epoch": 0.81, "grad_norm": 1.3565488484412431, "learning_rate": 8.787130400248434e-07, "loss": 0.154, "step": 11382 }, { "epoch": 0.81, "grad_norm": 1.3403941249537674, "learning_rate": 8.780572443740754e-07, "loss": 0.1762, "step": 11383 }, { "epoch": 0.81, "grad_norm": 1.2628129983635457, "learning_rate": 8.774016699728194e-07, "loss": 0.1664, "step": 11384 }, { "epoch": 0.81, "grad_norm": 1.45450465953174, "learning_rate": 8.767463168562628e-07, "loss": 0.1761, "step": 11385 }, { "epoch": 0.81, "grad_norm": 1.1698539011781106, "learning_rate": 8.760911850595844e-07, "loss": 0.1363, "step": 11386 }, { "epoch": 0.81, "grad_norm": 1.2393681878220268, "learning_rate": 8.754362746179445e-07, "loss": 0.1608, "step": 11387 }, { "epoch": 0.81, "grad_norm": 1.786102518694291, "learning_rate": 8.747815855665026e-07, "loss": 0.1773, "step": 11388 }, { "epoch": 0.81, "grad_norm": 1.5191709697516351, "learning_rate": 8.741271179403954e-07, "loss": 0.2134, "step": 11389 }, { "epoch": 0.81, "grad_norm": 1.5989698275939244, "learning_rate": 8.734728717747531e-07, "loss": 0.2261, "step": 11390 }, { "epoch": 0.81, "grad_norm": 1.4320000167371147, "learning_rate": 8.728188471046944e-07, "loss": 0.2141, "step": 11391 }, { "epoch": 0.81, "grad_norm": 1.4061933115162406, "learning_rate": 8.721650439653223e-07, "loss": 0.182, "step": 11392 }, { "epoch": 0.81, "grad_norm": 1.3411206071685393, "learning_rate": 8.715114623917325e-07, "loss": 0.1829, "step": 11393 }, { "epoch": 0.82, "grad_norm": 1.4047648766427407, "learning_rate": 8.708581024190049e-07, "loss": 0.1807, "step": 11394 }, { "epoch": 0.82, "grad_norm": 1.3997925347838647, "learning_rate": 8.702049640822119e-07, "loss": 0.1723, "step": 11395 }, { "epoch": 0.82, "grad_norm": 1.329303012777994, "learning_rate": 8.695520474164077e-07, "loss": 0.1983, "step": 11396 }, { "epoch": 0.82, "grad_norm": 1.2566335286900716, "learning_rate": 8.688993524566403e-07, "loss": 0.1417, "step": 11397 }, { "epoch": 0.82, "grad_norm": 1.3755171497958136, "learning_rate": 8.68246879237944e-07, "loss": 0.1527, "step": 11398 }, { "epoch": 0.82, "grad_norm": 5.902837798432318, "learning_rate": 8.675946277953418e-07, "loss": 0.4532, "step": 11399 }, { "epoch": 0.82, "grad_norm": 1.2897325512104756, "learning_rate": 8.669425981638413e-07, "loss": 0.1325, "step": 11400 }, { "epoch": 0.82, "grad_norm": 1.3546675765651925, "learning_rate": 8.662907903784434e-07, "loss": 0.1756, "step": 11401 }, { "epoch": 0.82, "grad_norm": 8.437645662695946, "learning_rate": 8.65639204474133e-07, "loss": 0.5031, "step": 11402 }, { "epoch": 0.82, "grad_norm": 1.3563981453144178, "learning_rate": 8.649878404858858e-07, "loss": 0.1851, "step": 11403 }, { "epoch": 0.82, "grad_norm": 1.3611351621977739, "learning_rate": 8.643366984486628e-07, "loss": 0.1683, "step": 11404 }, { "epoch": 0.82, "grad_norm": 1.5114863224056716, "learning_rate": 8.636857783974156e-07, "loss": 0.1811, "step": 11405 }, { "epoch": 0.82, "grad_norm": 1.3436726027838803, "learning_rate": 8.630350803670828e-07, "loss": 0.1947, "step": 11406 }, { "epoch": 0.82, "grad_norm": 1.291571610785287, "learning_rate": 8.623846043925931e-07, "loss": 0.155, "step": 11407 }, { "epoch": 0.82, "grad_norm": 1.472627137613963, "learning_rate": 8.617343505088593e-07, "loss": 0.1946, "step": 11408 }, { "epoch": 0.82, "grad_norm": 1.36291889844303, "learning_rate": 8.610843187507856e-07, "loss": 0.1504, "step": 11409 }, { "epoch": 0.82, "grad_norm": 1.355327121725816, "learning_rate": 8.604345091532618e-07, "loss": 0.1441, "step": 11410 }, { "epoch": 0.82, "grad_norm": 5.854175717794461, "learning_rate": 8.597849217511689e-07, "loss": 0.5641, "step": 11411 }, { "epoch": 0.82, "grad_norm": 1.4962333643174324, "learning_rate": 8.591355565793724e-07, "loss": 0.1652, "step": 11412 }, { "epoch": 0.82, "grad_norm": 1.331070670423646, "learning_rate": 8.584864136727278e-07, "loss": 0.1686, "step": 11413 }, { "epoch": 0.82, "grad_norm": 1.3814311996215893, "learning_rate": 8.578374930660793e-07, "loss": 0.1807, "step": 11414 }, { "epoch": 0.82, "grad_norm": 1.2960419642529422, "learning_rate": 8.571887947942598e-07, "loss": 0.1971, "step": 11415 }, { "epoch": 0.82, "grad_norm": 1.3386559675980279, "learning_rate": 8.565403188920862e-07, "loss": 0.1791, "step": 11416 }, { "epoch": 0.82, "grad_norm": 1.4456846623644037, "learning_rate": 8.558920653943681e-07, "loss": 0.1765, "step": 11417 }, { "epoch": 0.82, "grad_norm": 1.1894196749349266, "learning_rate": 8.552440343358987e-07, "loss": 0.1444, "step": 11418 }, { "epoch": 0.82, "grad_norm": 1.2940086993018718, "learning_rate": 8.545962257514651e-07, "loss": 0.1991, "step": 11419 }, { "epoch": 0.82, "grad_norm": 1.191183798086964, "learning_rate": 8.539486396758357e-07, "loss": 0.1662, "step": 11420 }, { "epoch": 0.82, "grad_norm": 1.433385408384829, "learning_rate": 8.53301276143772e-07, "loss": 0.1646, "step": 11421 }, { "epoch": 0.82, "grad_norm": 1.6347912932052613, "learning_rate": 8.52654135190022e-07, "loss": 0.1902, "step": 11422 }, { "epoch": 0.82, "grad_norm": 1.5317605837676436, "learning_rate": 8.520072168493215e-07, "loss": 0.2058, "step": 11423 }, { "epoch": 0.82, "grad_norm": 1.3151030158179964, "learning_rate": 8.513605211563958e-07, "loss": 0.167, "step": 11424 }, { "epoch": 0.82, "grad_norm": 1.3456674562440323, "learning_rate": 8.507140481459541e-07, "loss": 0.1543, "step": 11425 }, { "epoch": 0.82, "grad_norm": 1.2698463288012949, "learning_rate": 8.500677978526995e-07, "loss": 0.1284, "step": 11426 }, { "epoch": 0.82, "grad_norm": 1.4545367473438622, "learning_rate": 8.494217703113173e-07, "loss": 0.1603, "step": 11427 }, { "epoch": 0.82, "grad_norm": 1.3828906073421843, "learning_rate": 8.487759655564864e-07, "loss": 0.1912, "step": 11428 }, { "epoch": 0.82, "grad_norm": 1.3783875459732904, "learning_rate": 8.481303836228683e-07, "loss": 0.173, "step": 11429 }, { "epoch": 0.82, "grad_norm": 1.1927576599979075, "learning_rate": 8.474850245451172e-07, "loss": 0.1461, "step": 11430 }, { "epoch": 0.82, "grad_norm": 1.4555194343619704, "learning_rate": 8.468398883578726e-07, "loss": 0.2197, "step": 11431 }, { "epoch": 0.82, "grad_norm": 1.3264473940926609, "learning_rate": 8.461949750957643e-07, "loss": 0.1856, "step": 11432 }, { "epoch": 0.82, "grad_norm": 83.21352343716754, "learning_rate": 8.455502847934066e-07, "loss": 0.582, "step": 11433 }, { "epoch": 0.82, "grad_norm": 1.4432548050672056, "learning_rate": 8.449058174854058e-07, "loss": 0.1459, "step": 11434 }, { "epoch": 0.82, "grad_norm": 1.3589038430181848, "learning_rate": 8.442615732063525e-07, "loss": 0.1932, "step": 11435 }, { "epoch": 0.82, "grad_norm": 1.3136081518102787, "learning_rate": 8.436175519908291e-07, "loss": 0.1693, "step": 11436 }, { "epoch": 0.82, "grad_norm": 1.2946457571192662, "learning_rate": 8.429737538734017e-07, "loss": 0.1502, "step": 11437 }, { "epoch": 0.82, "grad_norm": 1.3921904235005222, "learning_rate": 8.423301788886285e-07, "loss": 0.1847, "step": 11438 }, { "epoch": 0.82, "grad_norm": 1.227592518715334, "learning_rate": 8.416868270710538e-07, "loss": 0.1757, "step": 11439 }, { "epoch": 0.82, "grad_norm": 1.417611348627177, "learning_rate": 8.410436984552112e-07, "loss": 0.1935, "step": 11440 }, { "epoch": 0.82, "grad_norm": 1.2878340989843002, "learning_rate": 8.404007930756186e-07, "loss": 0.1759, "step": 11441 }, { "epoch": 0.82, "grad_norm": 7.416962663817964, "learning_rate": 8.397581109667879e-07, "loss": 0.5042, "step": 11442 }, { "epoch": 0.82, "grad_norm": 6.219356507567168, "learning_rate": 8.391156521632126e-07, "loss": 0.4531, "step": 11443 }, { "epoch": 0.82, "grad_norm": 1.4151476458946368, "learning_rate": 8.384734166993802e-07, "loss": 0.1722, "step": 11444 }, { "epoch": 0.82, "grad_norm": 1.3489144496479875, "learning_rate": 8.378314046097607e-07, "loss": 0.1832, "step": 11445 }, { "epoch": 0.82, "grad_norm": 1.261660195567686, "learning_rate": 8.371896159288162e-07, "loss": 0.1889, "step": 11446 }, { "epoch": 0.82, "grad_norm": 1.1899621055589107, "learning_rate": 8.365480506909945e-07, "loss": 0.1372, "step": 11447 }, { "epoch": 0.82, "grad_norm": 1.5048603569251995, "learning_rate": 8.359067089307349e-07, "loss": 0.1814, "step": 11448 }, { "epoch": 0.82, "grad_norm": 1.3390024617764553, "learning_rate": 8.35265590682458e-07, "loss": 0.1733, "step": 11449 }, { "epoch": 0.82, "grad_norm": 1.2799014281780703, "learning_rate": 8.346246959805804e-07, "loss": 0.1541, "step": 11450 }, { "epoch": 0.82, "grad_norm": 1.232379939509497, "learning_rate": 8.339840248594999e-07, "loss": 0.1323, "step": 11451 }, { "epoch": 0.82, "grad_norm": 5.540355309579994, "learning_rate": 8.33343577353607e-07, "loss": 0.6364, "step": 11452 }, { "epoch": 0.82, "grad_norm": 1.532081170578114, "learning_rate": 8.327033534972767e-07, "loss": 0.1493, "step": 11453 }, { "epoch": 0.82, "grad_norm": 1.687825726883838, "learning_rate": 8.320633533248745e-07, "loss": 0.2218, "step": 11454 }, { "epoch": 0.82, "grad_norm": 1.5075911252993697, "learning_rate": 8.314235768707529e-07, "loss": 0.2012, "step": 11455 }, { "epoch": 0.82, "grad_norm": 1.2120210753157823, "learning_rate": 8.307840241692533e-07, "loss": 0.1615, "step": 11456 }, { "epoch": 0.82, "grad_norm": 1.2334533982612899, "learning_rate": 8.301446952547049e-07, "loss": 0.1422, "step": 11457 }, { "epoch": 0.82, "grad_norm": 1.150364470744097, "learning_rate": 8.295055901614218e-07, "loss": 0.1577, "step": 11458 }, { "epoch": 0.82, "grad_norm": 1.2932311573561976, "learning_rate": 8.288667089237118e-07, "loss": 0.1462, "step": 11459 }, { "epoch": 0.82, "grad_norm": 6.481512813055017, "learning_rate": 8.282280515758639e-07, "loss": 0.4908, "step": 11460 }, { "epoch": 0.82, "grad_norm": 1.1685834832873647, "learning_rate": 8.275896181521625e-07, "loss": 0.129, "step": 11461 }, { "epoch": 0.82, "grad_norm": 1.2564310152375446, "learning_rate": 8.26951408686873e-07, "loss": 0.1377, "step": 11462 }, { "epoch": 0.82, "grad_norm": 1.3100266148914808, "learning_rate": 8.263134232142533e-07, "loss": 0.1606, "step": 11463 }, { "epoch": 0.82, "grad_norm": 4.924961285467801, "learning_rate": 8.256756617685474e-07, "loss": 0.5518, "step": 11464 }, { "epoch": 0.82, "grad_norm": 1.3099353995253473, "learning_rate": 8.2503812438399e-07, "loss": 0.1477, "step": 11465 }, { "epoch": 0.82, "grad_norm": 1.404820815623232, "learning_rate": 8.244008110947982e-07, "loss": 0.1657, "step": 11466 }, { "epoch": 0.82, "grad_norm": 1.410008435631488, "learning_rate": 8.237637219351835e-07, "loss": 0.1636, "step": 11467 }, { "epoch": 0.82, "grad_norm": 1.3521792707814833, "learning_rate": 8.231268569393396e-07, "loss": 0.1794, "step": 11468 }, { "epoch": 0.82, "grad_norm": 1.274283928033697, "learning_rate": 8.224902161414538e-07, "loss": 0.1284, "step": 11469 }, { "epoch": 0.82, "grad_norm": 1.4471274992141288, "learning_rate": 8.218537995756953e-07, "loss": 0.1982, "step": 11470 }, { "epoch": 0.82, "grad_norm": 1.2723078407743786, "learning_rate": 8.212176072762257e-07, "loss": 0.1623, "step": 11471 }, { "epoch": 0.82, "grad_norm": 1.232020983422353, "learning_rate": 8.205816392771937e-07, "loss": 0.1886, "step": 11472 }, { "epoch": 0.82, "grad_norm": 1.3133249356227799, "learning_rate": 8.199458956127366e-07, "loss": 0.1552, "step": 11473 }, { "epoch": 0.82, "grad_norm": 1.3989406792886945, "learning_rate": 8.193103763169763e-07, "loss": 0.1972, "step": 11474 }, { "epoch": 0.82, "grad_norm": 1.488434652451793, "learning_rate": 8.186750814240268e-07, "loss": 0.1814, "step": 11475 }, { "epoch": 0.82, "grad_norm": 1.2927563889891591, "learning_rate": 8.180400109679864e-07, "loss": 0.1434, "step": 11476 }, { "epoch": 0.82, "grad_norm": 1.378222540491481, "learning_rate": 8.174051649829456e-07, "loss": 0.1669, "step": 11477 }, { "epoch": 0.82, "grad_norm": 1.4021134040163132, "learning_rate": 8.167705435029777e-07, "loss": 0.1628, "step": 11478 }, { "epoch": 0.82, "grad_norm": 1.2863336310940636, "learning_rate": 8.161361465621481e-07, "loss": 0.169, "step": 11479 }, { "epoch": 0.82, "grad_norm": 1.4294140842320746, "learning_rate": 8.15501974194508e-07, "loss": 0.1831, "step": 11480 }, { "epoch": 0.82, "grad_norm": 1.3911381286819142, "learning_rate": 8.148680264340997e-07, "loss": 0.1802, "step": 11481 }, { "epoch": 0.82, "grad_norm": 1.2343342185221675, "learning_rate": 8.142343033149475e-07, "loss": 0.1347, "step": 11482 }, { "epoch": 0.82, "grad_norm": 1.3439496996293885, "learning_rate": 8.136008048710697e-07, "loss": 0.1663, "step": 11483 }, { "epoch": 0.82, "grad_norm": 1.4269009778769877, "learning_rate": 8.129675311364682e-07, "loss": 0.1954, "step": 11484 }, { "epoch": 0.82, "grad_norm": 1.3086128953887586, "learning_rate": 8.123344821451362e-07, "loss": 0.1455, "step": 11485 }, { "epoch": 0.82, "grad_norm": 1.3170625609030797, "learning_rate": 8.117016579310516e-07, "loss": 0.1468, "step": 11486 }, { "epoch": 0.82, "grad_norm": 7.022690193465774, "learning_rate": 8.110690585281827e-07, "loss": 0.5532, "step": 11487 }, { "epoch": 0.82, "grad_norm": 1.4332464655896164, "learning_rate": 8.104366839704864e-07, "loss": 0.1994, "step": 11488 }, { "epoch": 0.82, "grad_norm": 1.3252524370680863, "learning_rate": 8.09804534291902e-07, "loss": 0.1807, "step": 11489 }, { "epoch": 0.82, "grad_norm": 1.4290117914058154, "learning_rate": 8.091726095263658e-07, "loss": 0.1639, "step": 11490 }, { "epoch": 0.82, "grad_norm": 1.3559999589836476, "learning_rate": 8.085409097077935e-07, "loss": 0.154, "step": 11491 }, { "epoch": 0.82, "grad_norm": 1.2169854693587618, "learning_rate": 8.07909434870095e-07, "loss": 0.131, "step": 11492 }, { "epoch": 0.82, "grad_norm": 1.4983154037756823, "learning_rate": 8.072781850471617e-07, "loss": 0.166, "step": 11493 }, { "epoch": 0.82, "grad_norm": 1.215137305195108, "learning_rate": 8.066471602728804e-07, "loss": 0.1309, "step": 11494 }, { "epoch": 0.82, "grad_norm": 1.2600736785724975, "learning_rate": 8.060163605811189e-07, "loss": 0.1619, "step": 11495 }, { "epoch": 0.82, "grad_norm": 1.4411261810835667, "learning_rate": 8.05385786005739e-07, "loss": 0.2178, "step": 11496 }, { "epoch": 0.82, "grad_norm": 1.4487999820197899, "learning_rate": 8.047554365805837e-07, "loss": 0.1956, "step": 11497 }, { "epoch": 0.82, "grad_norm": 1.5063790981367113, "learning_rate": 8.041253123394905e-07, "loss": 0.24, "step": 11498 }, { "epoch": 0.82, "grad_norm": 1.466884247536365, "learning_rate": 8.034954133162809e-07, "loss": 0.19, "step": 11499 }, { "epoch": 0.82, "grad_norm": 1.3924120962871684, "learning_rate": 8.02865739544767e-07, "loss": 0.1866, "step": 11500 }, { "epoch": 0.82, "grad_norm": 1.3057473474268322, "learning_rate": 8.022362910587444e-07, "loss": 0.167, "step": 11501 }, { "epoch": 0.82, "grad_norm": 1.41886909693053, "learning_rate": 8.016070678920023e-07, "loss": 0.1563, "step": 11502 }, { "epoch": 0.82, "grad_norm": 1.2455875720942915, "learning_rate": 8.009780700783126e-07, "loss": 0.14, "step": 11503 }, { "epoch": 0.82, "grad_norm": 1.2819670123112297, "learning_rate": 8.003492976514387e-07, "loss": 0.1556, "step": 11504 }, { "epoch": 0.82, "grad_norm": 1.2170296388803648, "learning_rate": 7.997207506451293e-07, "loss": 0.1504, "step": 11505 }, { "epoch": 0.82, "grad_norm": 1.4207155604639734, "learning_rate": 7.990924290931235e-07, "loss": 0.1839, "step": 11506 }, { "epoch": 0.82, "grad_norm": 1.6266247095023547, "learning_rate": 7.984643330291464e-07, "loss": 0.1973, "step": 11507 }, { "epoch": 0.82, "grad_norm": 1.400452828264983, "learning_rate": 7.978364624869134e-07, "loss": 0.2009, "step": 11508 }, { "epoch": 0.82, "grad_norm": 1.1847663780948023, "learning_rate": 7.972088175001236e-07, "loss": 0.1569, "step": 11509 }, { "epoch": 0.82, "grad_norm": 1.3898018615701242, "learning_rate": 7.96581398102469e-07, "loss": 0.171, "step": 11510 }, { "epoch": 0.82, "grad_norm": 1.3423827400338622, "learning_rate": 7.959542043276242e-07, "loss": 0.1795, "step": 11511 }, { "epoch": 0.82, "grad_norm": 1.3889643900998607, "learning_rate": 7.953272362092573e-07, "loss": 0.1763, "step": 11512 }, { "epoch": 0.82, "grad_norm": 4.158905902456587, "learning_rate": 7.947004937810188e-07, "loss": 0.4405, "step": 11513 }, { "epoch": 0.82, "grad_norm": 1.298443666662164, "learning_rate": 7.94073977076551e-07, "loss": 0.1529, "step": 11514 }, { "epoch": 0.82, "grad_norm": 1.4650860255634925, "learning_rate": 7.934476861294831e-07, "loss": 0.1795, "step": 11515 }, { "epoch": 0.82, "grad_norm": 1.2192932766570668, "learning_rate": 7.92821620973433e-07, "loss": 0.148, "step": 11516 }, { "epoch": 0.82, "grad_norm": 1.383336794134288, "learning_rate": 7.921957816420023e-07, "loss": 0.1807, "step": 11517 }, { "epoch": 0.82, "grad_norm": 1.279964189088151, "learning_rate": 7.915701681687854e-07, "loss": 0.1879, "step": 11518 }, { "epoch": 0.82, "grad_norm": 1.1165254041883554, "learning_rate": 7.909447805873643e-07, "loss": 0.1454, "step": 11519 }, { "epoch": 0.82, "grad_norm": 1.3521343890275785, "learning_rate": 7.903196189313039e-07, "loss": 0.1488, "step": 11520 }, { "epoch": 0.82, "grad_norm": 1.229123036957462, "learning_rate": 7.89694683234164e-07, "loss": 0.1589, "step": 11521 }, { "epoch": 0.82, "grad_norm": 1.4916578370647167, "learning_rate": 7.890699735294849e-07, "loss": 0.2255, "step": 11522 }, { "epoch": 0.82, "grad_norm": 1.3684949267094058, "learning_rate": 7.88445489850801e-07, "loss": 0.1718, "step": 11523 }, { "epoch": 0.82, "grad_norm": 1.5259462181532597, "learning_rate": 7.878212322316315e-07, "loss": 0.2103, "step": 11524 }, { "epoch": 0.82, "grad_norm": 1.2530013089782075, "learning_rate": 7.871972007054857e-07, "loss": 0.1651, "step": 11525 }, { "epoch": 0.82, "grad_norm": 1.2965918925973268, "learning_rate": 7.865733953058557e-07, "loss": 0.1707, "step": 11526 }, { "epoch": 0.82, "grad_norm": 1.3543305548312021, "learning_rate": 7.859498160662288e-07, "loss": 0.1622, "step": 11527 }, { "epoch": 0.82, "grad_norm": 5.461249997521325, "learning_rate": 7.853264630200724e-07, "loss": 0.7479, "step": 11528 }, { "epoch": 0.82, "grad_norm": 1.2334353141045586, "learning_rate": 7.847033362008488e-07, "loss": 0.1688, "step": 11529 }, { "epoch": 0.82, "grad_norm": 1.5638901386218238, "learning_rate": 7.840804356420023e-07, "loss": 0.1906, "step": 11530 }, { "epoch": 0.82, "grad_norm": 1.3085516759229985, "learning_rate": 7.834577613769695e-07, "loss": 0.1449, "step": 11531 }, { "epoch": 0.82, "grad_norm": 1.3983067791455244, "learning_rate": 7.82835313439172e-07, "loss": 0.1496, "step": 11532 }, { "epoch": 0.82, "grad_norm": 1.3649651066379158, "learning_rate": 7.822130918620229e-07, "loss": 0.1668, "step": 11533 }, { "epoch": 0.83, "grad_norm": 1.2842317132142549, "learning_rate": 7.81591096678917e-07, "loss": 0.1615, "step": 11534 }, { "epoch": 0.83, "grad_norm": 1.4081704303421252, "learning_rate": 7.809693279232433e-07, "loss": 0.1598, "step": 11535 }, { "epoch": 0.83, "grad_norm": 1.3008653200642926, "learning_rate": 7.803477856283737e-07, "loss": 0.1661, "step": 11536 }, { "epoch": 0.83, "grad_norm": 1.6934171084701195, "learning_rate": 7.797264698276724e-07, "loss": 0.1928, "step": 11537 }, { "epoch": 0.83, "grad_norm": 1.2801505967998739, "learning_rate": 7.791053805544868e-07, "loss": 0.1525, "step": 11538 }, { "epoch": 0.83, "grad_norm": 1.4122828018364577, "learning_rate": 7.784845178421557e-07, "loss": 0.1849, "step": 11539 }, { "epoch": 0.83, "grad_norm": 4.3762601828516114, "learning_rate": 7.778638817240042e-07, "loss": 0.4396, "step": 11540 }, { "epoch": 0.83, "grad_norm": 4.293272809696471, "learning_rate": 7.772434722333472e-07, "loss": 0.4673, "step": 11541 }, { "epoch": 0.83, "grad_norm": 1.3378789806836056, "learning_rate": 7.766232894034836e-07, "loss": 0.1927, "step": 11542 }, { "epoch": 0.83, "grad_norm": 1.2215449572499835, "learning_rate": 7.760033332677042e-07, "loss": 0.193, "step": 11543 }, { "epoch": 0.83, "grad_norm": 6.278849011623813, "learning_rate": 7.753836038592832e-07, "loss": 0.5433, "step": 11544 }, { "epoch": 0.83, "grad_norm": 1.2552225211964956, "learning_rate": 7.747641012114888e-07, "loss": 0.167, "step": 11545 }, { "epoch": 0.83, "grad_norm": 1.4000809638611953, "learning_rate": 7.741448253575696e-07, "loss": 0.1888, "step": 11546 }, { "epoch": 0.83, "grad_norm": 1.3722678543938278, "learning_rate": 7.73525776330768e-07, "loss": 0.1433, "step": 11547 }, { "epoch": 0.83, "grad_norm": 1.2435592500271755, "learning_rate": 7.72906954164312e-07, "loss": 0.1449, "step": 11548 }, { "epoch": 0.83, "grad_norm": 1.4152898672306937, "learning_rate": 7.722883588914182e-07, "loss": 0.1773, "step": 11549 }, { "epoch": 0.83, "grad_norm": 1.381987639110904, "learning_rate": 7.71669990545289e-07, "loss": 0.1694, "step": 11550 }, { "epoch": 0.83, "grad_norm": 1.42479961541026, "learning_rate": 7.710518491591157e-07, "loss": 0.1659, "step": 11551 }, { "epoch": 0.83, "grad_norm": 1.4462711227043956, "learning_rate": 7.7043393476608e-07, "loss": 0.1914, "step": 11552 }, { "epoch": 0.83, "grad_norm": 1.34193221328926, "learning_rate": 7.69816247399346e-07, "loss": 0.1551, "step": 11553 }, { "epoch": 0.83, "grad_norm": 1.2681811340099547, "learning_rate": 7.691987870920714e-07, "loss": 0.1432, "step": 11554 }, { "epoch": 0.83, "grad_norm": 1.3866136376429892, "learning_rate": 7.685815538773972e-07, "loss": 0.1789, "step": 11555 }, { "epoch": 0.83, "grad_norm": 1.3636563466687117, "learning_rate": 7.67964547788454e-07, "loss": 0.1692, "step": 11556 }, { "epoch": 0.83, "grad_norm": 1.1881460364550938, "learning_rate": 7.673477688583614e-07, "loss": 0.1423, "step": 11557 }, { "epoch": 0.83, "grad_norm": 1.326759707480179, "learning_rate": 7.667312171202262e-07, "loss": 0.1655, "step": 11558 }, { "epoch": 0.83, "grad_norm": 1.253416671078902, "learning_rate": 7.661148926071399e-07, "loss": 0.1467, "step": 11559 }, { "epoch": 0.83, "grad_norm": 1.1965902275491853, "learning_rate": 7.654987953521875e-07, "loss": 0.1366, "step": 11560 }, { "epoch": 0.83, "grad_norm": 1.3389678297848768, "learning_rate": 7.648829253884354e-07, "loss": 0.201, "step": 11561 }, { "epoch": 0.83, "grad_norm": 1.3517352503918176, "learning_rate": 7.642672827489439e-07, "loss": 0.1834, "step": 11562 }, { "epoch": 0.83, "grad_norm": 1.611422802926312, "learning_rate": 7.636518674667559e-07, "loss": 0.174, "step": 11563 }, { "epoch": 0.83, "grad_norm": 1.4253466716311418, "learning_rate": 7.63036679574905e-07, "loss": 0.1715, "step": 11564 }, { "epoch": 0.83, "grad_norm": 1.2376411307084338, "learning_rate": 7.624217191064132e-07, "loss": 0.1559, "step": 11565 }, { "epoch": 0.83, "grad_norm": 1.3446571342551363, "learning_rate": 7.618069860942895e-07, "loss": 0.1549, "step": 11566 }, { "epoch": 0.83, "grad_norm": 1.5191627813968365, "learning_rate": 7.611924805715281e-07, "loss": 0.1809, "step": 11567 }, { "epoch": 0.83, "grad_norm": 1.437873724648136, "learning_rate": 7.605782025711162e-07, "loss": 0.1921, "step": 11568 }, { "epoch": 0.83, "grad_norm": 1.3331462797618217, "learning_rate": 7.59964152126022e-07, "loss": 0.1624, "step": 11569 }, { "epoch": 0.83, "grad_norm": 1.4792015457858434, "learning_rate": 7.593503292692089e-07, "loss": 0.1671, "step": 11570 }, { "epoch": 0.83, "grad_norm": 1.2777413872750198, "learning_rate": 7.587367340336221e-07, "loss": 0.1429, "step": 11571 }, { "epoch": 0.83, "grad_norm": 1.0861017482570525, "learning_rate": 7.581233664521975e-07, "loss": 0.1182, "step": 11572 }, { "epoch": 0.83, "grad_norm": 1.2408880862072895, "learning_rate": 7.575102265578582e-07, "loss": 0.1383, "step": 11573 }, { "epoch": 0.83, "grad_norm": 1.4634111106649268, "learning_rate": 7.568973143835167e-07, "loss": 0.1628, "step": 11574 }, { "epoch": 0.83, "grad_norm": 1.2217449727468324, "learning_rate": 7.562846299620697e-07, "loss": 0.1654, "step": 11575 }, { "epoch": 0.83, "grad_norm": 1.249319849310265, "learning_rate": 7.55672173326405e-07, "loss": 0.1638, "step": 11576 }, { "epoch": 0.83, "grad_norm": 1.291951851251199, "learning_rate": 7.550599445093954e-07, "loss": 0.1581, "step": 11577 }, { "epoch": 0.83, "grad_norm": 1.257928676082074, "learning_rate": 7.54447943543905e-07, "loss": 0.1491, "step": 11578 }, { "epoch": 0.83, "grad_norm": 1.5349822952156822, "learning_rate": 7.538361704627806e-07, "loss": 0.1936, "step": 11579 }, { "epoch": 0.83, "grad_norm": 1.4003094983468192, "learning_rate": 7.532246252988617e-07, "loss": 0.1936, "step": 11580 }, { "epoch": 0.83, "grad_norm": 1.284220739888052, "learning_rate": 7.526133080849735e-07, "loss": 0.1298, "step": 11581 }, { "epoch": 0.83, "grad_norm": 1.2832934872079642, "learning_rate": 7.520022188539289e-07, "loss": 0.1623, "step": 11582 }, { "epoch": 0.83, "grad_norm": 7.512801460576542, "learning_rate": 7.513913576385301e-07, "loss": 0.525, "step": 11583 }, { "epoch": 0.83, "grad_norm": 1.2630420408284937, "learning_rate": 7.507807244715632e-07, "loss": 0.1568, "step": 11584 }, { "epoch": 0.83, "grad_norm": 1.3029361632584875, "learning_rate": 7.501703193858067e-07, "loss": 0.1817, "step": 11585 }, { "epoch": 0.83, "grad_norm": 1.564370039556058, "learning_rate": 7.495601424140225e-07, "loss": 0.2096, "step": 11586 }, { "epoch": 0.83, "grad_norm": 1.3706090738354475, "learning_rate": 7.489501935889654e-07, "loss": 0.1609, "step": 11587 }, { "epoch": 0.83, "grad_norm": 1.1932657385144498, "learning_rate": 7.483404729433719e-07, "loss": 0.1306, "step": 11588 }, { "epoch": 0.83, "grad_norm": 7.945988227655147, "learning_rate": 7.47730980509972e-07, "loss": 0.6897, "step": 11589 }, { "epoch": 0.83, "grad_norm": 1.4562936630667112, "learning_rate": 7.471217163214772e-07, "loss": 0.1649, "step": 11590 }, { "epoch": 0.83, "grad_norm": 1.2408819525654768, "learning_rate": 7.46512680410596e-07, "loss": 0.1516, "step": 11591 }, { "epoch": 0.83, "grad_norm": 7.679494843254499, "learning_rate": 7.459038728100138e-07, "loss": 0.6117, "step": 11592 }, { "epoch": 0.83, "grad_norm": 1.5048268288352258, "learning_rate": 7.452952935524127e-07, "loss": 0.1623, "step": 11593 }, { "epoch": 0.83, "grad_norm": 1.3704584312833803, "learning_rate": 7.446869426704561e-07, "loss": 0.1882, "step": 11594 }, { "epoch": 0.83, "grad_norm": 1.087557321193094, "learning_rate": 7.440788201968002e-07, "loss": 0.139, "step": 11595 }, { "epoch": 0.83, "grad_norm": 1.302819372526447, "learning_rate": 7.434709261640838e-07, "loss": 0.1648, "step": 11596 }, { "epoch": 0.83, "grad_norm": 1.199608237312297, "learning_rate": 7.428632606049391e-07, "loss": 0.1387, "step": 11597 }, { "epoch": 0.83, "grad_norm": 1.330136663119566, "learning_rate": 7.422558235519806e-07, "loss": 0.17, "step": 11598 }, { "epoch": 0.83, "grad_norm": 1.3711883535313825, "learning_rate": 7.416486150378144e-07, "loss": 0.1619, "step": 11599 }, { "epoch": 0.83, "grad_norm": 1.4004435705121814, "learning_rate": 7.410416350950333e-07, "loss": 0.1789, "step": 11600 }, { "epoch": 0.83, "grad_norm": 1.2076941643903294, "learning_rate": 7.40434883756218e-07, "loss": 0.1443, "step": 11601 }, { "epoch": 0.83, "grad_norm": 1.2884450094197355, "learning_rate": 7.398283610539353e-07, "loss": 0.1622, "step": 11602 }, { "epoch": 0.83, "grad_norm": 1.4814363222316966, "learning_rate": 7.392220670207423e-07, "loss": 0.2145, "step": 11603 }, { "epoch": 0.83, "grad_norm": 1.2472243685304822, "learning_rate": 7.386160016891802e-07, "loss": 0.1816, "step": 11604 }, { "epoch": 0.83, "grad_norm": 1.3212437636954517, "learning_rate": 7.380101650917826e-07, "loss": 0.1584, "step": 11605 }, { "epoch": 0.83, "grad_norm": 1.5248995669252374, "learning_rate": 7.374045572610666e-07, "loss": 0.1774, "step": 11606 }, { "epoch": 0.83, "grad_norm": 9.708464933852696, "learning_rate": 7.367991782295392e-07, "loss": 0.6073, "step": 11607 }, { "epoch": 0.83, "grad_norm": 1.5007342902331788, "learning_rate": 7.361940280296953e-07, "loss": 0.176, "step": 11608 }, { "epoch": 0.83, "grad_norm": 1.3013840924045739, "learning_rate": 7.355891066940179e-07, "loss": 0.1711, "step": 11609 }, { "epoch": 0.83, "grad_norm": 1.196484274712304, "learning_rate": 7.349844142549744e-07, "loss": 0.1679, "step": 11610 }, { "epoch": 0.83, "grad_norm": 1.4110754991383962, "learning_rate": 7.34379950745025e-07, "loss": 0.1842, "step": 11611 }, { "epoch": 0.83, "grad_norm": 1.3294933767706165, "learning_rate": 7.337757161966119e-07, "loss": 0.1533, "step": 11612 }, { "epoch": 0.83, "grad_norm": 1.3295565377974825, "learning_rate": 7.331717106421709e-07, "loss": 0.1866, "step": 11613 }, { "epoch": 0.83, "grad_norm": 30.98738037654596, "learning_rate": 7.3256793411412e-07, "loss": 0.5968, "step": 11614 }, { "epoch": 0.83, "grad_norm": 1.393754771874616, "learning_rate": 7.319643866448689e-07, "loss": 0.2062, "step": 11615 }, { "epoch": 0.83, "grad_norm": 1.455280162807254, "learning_rate": 7.313610682668132e-07, "loss": 0.1984, "step": 11616 }, { "epoch": 0.83, "grad_norm": 1.3775703371519914, "learning_rate": 7.307579790123376e-07, "loss": 0.1582, "step": 11617 }, { "epoch": 0.83, "grad_norm": 1.3831351045595033, "learning_rate": 7.301551189138134e-07, "loss": 0.1708, "step": 11618 }, { "epoch": 0.83, "grad_norm": 1.339201170953808, "learning_rate": 7.295524880035987e-07, "loss": 0.2, "step": 11619 }, { "epoch": 0.83, "grad_norm": 1.3418064995382029, "learning_rate": 7.289500863140414e-07, "loss": 0.1836, "step": 11620 }, { "epoch": 0.83, "grad_norm": 1.3326598824030378, "learning_rate": 7.283479138774746e-07, "loss": 0.1667, "step": 11621 }, { "epoch": 0.83, "grad_norm": 1.3655307260463638, "learning_rate": 7.277459707262225e-07, "loss": 0.1581, "step": 11622 }, { "epoch": 0.83, "grad_norm": 1.4331386375969262, "learning_rate": 7.27144256892593e-07, "loss": 0.2059, "step": 11623 }, { "epoch": 0.83, "grad_norm": 1.2804462693553644, "learning_rate": 7.265427724088842e-07, "loss": 0.2018, "step": 11624 }, { "epoch": 0.83, "grad_norm": 1.2914246183466542, "learning_rate": 7.25941517307382e-07, "loss": 0.1333, "step": 11625 }, { "epoch": 0.83, "grad_norm": 1.5763593066306556, "learning_rate": 7.253404916203605e-07, "loss": 0.1827, "step": 11626 }, { "epoch": 0.83, "grad_norm": 4.588771475788876, "learning_rate": 7.247396953800778e-07, "loss": 0.5215, "step": 11627 }, { "epoch": 0.83, "grad_norm": 1.6097079485031904, "learning_rate": 7.241391286187849e-07, "loss": 0.2276, "step": 11628 }, { "epoch": 0.83, "grad_norm": 1.3581610147676522, "learning_rate": 7.235387913687153e-07, "loss": 0.1778, "step": 11629 }, { "epoch": 0.83, "grad_norm": 1.439366639484726, "learning_rate": 7.22938683662095e-07, "loss": 0.1459, "step": 11630 }, { "epoch": 0.83, "grad_norm": 1.421754298246453, "learning_rate": 7.223388055311331e-07, "loss": 0.154, "step": 11631 }, { "epoch": 0.83, "grad_norm": 1.4037248082670815, "learning_rate": 7.217391570080302e-07, "loss": 0.1811, "step": 11632 }, { "epoch": 0.83, "grad_norm": 7.458678308573073, "learning_rate": 7.211397381249724e-07, "loss": 0.4446, "step": 11633 }, { "epoch": 0.83, "grad_norm": 1.4752498990753813, "learning_rate": 7.205405489141354e-07, "loss": 0.1646, "step": 11634 }, { "epoch": 0.83, "grad_norm": 1.388908443341257, "learning_rate": 7.199415894076795e-07, "loss": 0.1582, "step": 11635 }, { "epoch": 0.83, "grad_norm": 1.2895418286990967, "learning_rate": 7.193428596377561e-07, "loss": 0.1305, "step": 11636 }, { "epoch": 0.83, "grad_norm": 5.01856345789147, "learning_rate": 7.187443596365013e-07, "loss": 0.4766, "step": 11637 }, { "epoch": 0.83, "grad_norm": 1.3089173390219897, "learning_rate": 7.181460894360415e-07, "loss": 0.1635, "step": 11638 }, { "epoch": 0.83, "grad_norm": 1.3765236494637514, "learning_rate": 7.175480490684872e-07, "loss": 0.1729, "step": 11639 }, { "epoch": 0.83, "grad_norm": 1.7509183075420982, "learning_rate": 7.16950238565941e-07, "loss": 0.2087, "step": 11640 }, { "epoch": 0.83, "grad_norm": 1.1854208294479573, "learning_rate": 7.163526579604902e-07, "loss": 0.1328, "step": 11641 }, { "epoch": 0.83, "grad_norm": 1.450009021457237, "learning_rate": 7.157553072842116e-07, "loss": 0.1619, "step": 11642 }, { "epoch": 0.83, "grad_norm": 1.3579177639365734, "learning_rate": 7.151581865691671e-07, "loss": 0.1712, "step": 11643 }, { "epoch": 0.83, "grad_norm": 1.3798007825921677, "learning_rate": 7.14561295847409e-07, "loss": 0.1619, "step": 11644 }, { "epoch": 0.83, "grad_norm": 1.3706440834218203, "learning_rate": 7.139646351509744e-07, "loss": 0.2247, "step": 11645 }, { "epoch": 0.83, "grad_norm": 1.489584081379273, "learning_rate": 7.133682045118906e-07, "loss": 0.2412, "step": 11646 }, { "epoch": 0.83, "grad_norm": 1.303560384052622, "learning_rate": 7.127720039621727e-07, "loss": 0.1595, "step": 11647 }, { "epoch": 0.83, "grad_norm": 1.2322530008983776, "learning_rate": 7.121760335338201e-07, "loss": 0.1594, "step": 11648 }, { "epoch": 0.83, "grad_norm": 1.3701177571012602, "learning_rate": 7.115802932588239e-07, "loss": 0.183, "step": 11649 }, { "epoch": 0.83, "grad_norm": 1.3448546900148413, "learning_rate": 7.109847831691608e-07, "loss": 0.1647, "step": 11650 }, { "epoch": 0.83, "grad_norm": 1.249504772027403, "learning_rate": 7.103895032967955e-07, "loss": 0.1523, "step": 11651 }, { "epoch": 0.83, "grad_norm": 5.719241041882766, "learning_rate": 7.097944536736795e-07, "loss": 0.5115, "step": 11652 }, { "epoch": 0.83, "grad_norm": 6.105762148973235, "learning_rate": 7.091996343317537e-07, "loss": 0.7344, "step": 11653 }, { "epoch": 0.83, "grad_norm": 1.3814628572246865, "learning_rate": 7.086050453029442e-07, "loss": 0.1923, "step": 11654 }, { "epoch": 0.83, "grad_norm": 1.371654667850567, "learning_rate": 7.080106866191683e-07, "loss": 0.1568, "step": 11655 }, { "epoch": 0.83, "grad_norm": 1.4375467628545004, "learning_rate": 7.074165583123266e-07, "loss": 0.1862, "step": 11656 }, { "epoch": 0.83, "grad_norm": 5.911442313692837, "learning_rate": 7.068226604143103e-07, "loss": 0.6756, "step": 11657 }, { "epoch": 0.83, "grad_norm": 1.381787643863725, "learning_rate": 7.062289929569977e-07, "loss": 0.1875, "step": 11658 }, { "epoch": 0.83, "grad_norm": 1.3426209192917509, "learning_rate": 7.056355559722555e-07, "loss": 0.1663, "step": 11659 }, { "epoch": 0.83, "grad_norm": 1.4536859530141466, "learning_rate": 7.05042349491935e-07, "loss": 0.1737, "step": 11660 }, { "epoch": 0.83, "grad_norm": 6.6978744719980305, "learning_rate": 7.044493735478797e-07, "loss": 0.472, "step": 11661 }, { "epoch": 0.83, "grad_norm": 1.2818340837972102, "learning_rate": 7.03856628171915e-07, "loss": 0.1673, "step": 11662 }, { "epoch": 0.83, "grad_norm": 1.4849495835658328, "learning_rate": 7.032641133958607e-07, "loss": 0.169, "step": 11663 }, { "epoch": 0.83, "grad_norm": 1.2451367941156322, "learning_rate": 7.026718292515166e-07, "loss": 0.1336, "step": 11664 }, { "epoch": 0.83, "grad_norm": 1.3027510813242822, "learning_rate": 7.020797757706771e-07, "loss": 0.1567, "step": 11665 }, { "epoch": 0.83, "grad_norm": 1.4526465700969229, "learning_rate": 7.014879529851204e-07, "loss": 0.1703, "step": 11666 }, { "epoch": 0.83, "grad_norm": 1.3267610438653357, "learning_rate": 7.008963609266145e-07, "loss": 0.1804, "step": 11667 }, { "epoch": 0.83, "grad_norm": 1.3754745718316033, "learning_rate": 7.003049996269112e-07, "loss": 0.1514, "step": 11668 }, { "epoch": 0.83, "grad_norm": 5.606624661694305, "learning_rate": 6.997138691177546e-07, "loss": 0.6555, "step": 11669 }, { "epoch": 0.83, "grad_norm": 1.5229793942128156, "learning_rate": 6.991229694308726e-07, "loss": 0.1924, "step": 11670 }, { "epoch": 0.83, "grad_norm": 1.3983102002880965, "learning_rate": 6.985323005979844e-07, "loss": 0.1975, "step": 11671 }, { "epoch": 0.83, "grad_norm": 1.0278462527220869, "learning_rate": 6.979418626507922e-07, "loss": 0.1003, "step": 11672 }, { "epoch": 0.83, "grad_norm": 1.273742050539887, "learning_rate": 6.973516556209898e-07, "loss": 0.1642, "step": 11673 }, { "epoch": 0.84, "grad_norm": 1.4212775476674828, "learning_rate": 6.967616795402565e-07, "loss": 0.1677, "step": 11674 }, { "epoch": 0.84, "grad_norm": 1.5959808353107412, "learning_rate": 6.961719344402617e-07, "loss": 0.1854, "step": 11675 }, { "epoch": 0.84, "grad_norm": 1.3091535942384105, "learning_rate": 6.955824203526585e-07, "loss": 0.1653, "step": 11676 }, { "epoch": 0.84, "grad_norm": 1.3134927942392767, "learning_rate": 6.949931373090912e-07, "loss": 0.1409, "step": 11677 }, { "epoch": 0.84, "grad_norm": 1.3652037895055094, "learning_rate": 6.944040853411882e-07, "loss": 0.1762, "step": 11678 }, { "epoch": 0.84, "grad_norm": 1.2766694701772208, "learning_rate": 6.93815264480569e-07, "loss": 0.1542, "step": 11679 }, { "epoch": 0.84, "grad_norm": 1.335967256640693, "learning_rate": 6.932266747588395e-07, "loss": 0.1578, "step": 11680 }, { "epoch": 0.84, "grad_norm": 6.481941753227402, "learning_rate": 6.926383162075917e-07, "loss": 0.4685, "step": 11681 }, { "epoch": 0.84, "grad_norm": 1.3858531760525818, "learning_rate": 6.920501888584063e-07, "loss": 0.1516, "step": 11682 }, { "epoch": 0.84, "grad_norm": 1.2770421713865137, "learning_rate": 6.914622927428527e-07, "loss": 0.1561, "step": 11683 }, { "epoch": 0.84, "grad_norm": 6.329417357024852, "learning_rate": 6.908746278924872e-07, "loss": 0.6285, "step": 11684 }, { "epoch": 0.84, "grad_norm": 1.2641289409351533, "learning_rate": 6.902871943388512e-07, "loss": 0.1421, "step": 11685 }, { "epoch": 0.84, "grad_norm": 1.3261687506803588, "learning_rate": 6.896999921134784e-07, "loss": 0.1836, "step": 11686 }, { "epoch": 0.84, "grad_norm": 1.5977296438011017, "learning_rate": 6.891130212478847e-07, "loss": 0.2024, "step": 11687 }, { "epoch": 0.84, "grad_norm": 1.3346254901050132, "learning_rate": 6.885262817735789e-07, "loss": 0.1428, "step": 11688 }, { "epoch": 0.84, "grad_norm": 1.3739951000940909, "learning_rate": 6.879397737220528e-07, "loss": 0.1453, "step": 11689 }, { "epoch": 0.84, "grad_norm": 1.347399895127108, "learning_rate": 6.873534971247897e-07, "loss": 0.1751, "step": 11690 }, { "epoch": 0.84, "grad_norm": 1.2898639620122447, "learning_rate": 6.867674520132567e-07, "loss": 0.1539, "step": 11691 }, { "epoch": 0.84, "grad_norm": 1.3188204756353004, "learning_rate": 6.861816384189107e-07, "loss": 0.1597, "step": 11692 }, { "epoch": 0.84, "grad_norm": 1.5334764211286867, "learning_rate": 6.855960563731972e-07, "loss": 0.2061, "step": 11693 }, { "epoch": 0.84, "grad_norm": 1.3828502166818224, "learning_rate": 6.850107059075478e-07, "loss": 0.189, "step": 11694 }, { "epoch": 0.84, "grad_norm": 1.4380249797393059, "learning_rate": 6.844255870533806e-07, "loss": 0.1833, "step": 11695 }, { "epoch": 0.84, "grad_norm": 1.3844340246367486, "learning_rate": 6.838406998421038e-07, "loss": 0.1819, "step": 11696 }, { "epoch": 0.84, "grad_norm": 1.6103480951829856, "learning_rate": 6.8325604430511e-07, "loss": 0.2018, "step": 11697 }, { "epoch": 0.84, "grad_norm": 1.3346237743896638, "learning_rate": 6.826716204737832e-07, "loss": 0.158, "step": 11698 }, { "epoch": 0.84, "grad_norm": 1.3466762863298858, "learning_rate": 6.820874283794909e-07, "loss": 0.1735, "step": 11699 }, { "epoch": 0.84, "grad_norm": 5.558454454418486, "learning_rate": 6.815034680535915e-07, "loss": 0.442, "step": 11700 }, { "epoch": 0.84, "grad_norm": 1.2728376002677624, "learning_rate": 6.809197395274291e-07, "loss": 0.1763, "step": 11701 }, { "epoch": 0.84, "grad_norm": 5.036437312375747, "learning_rate": 6.803362428323379e-07, "loss": 0.5571, "step": 11702 }, { "epoch": 0.84, "grad_norm": 1.2178136934159807, "learning_rate": 6.797529779996342e-07, "loss": 0.1582, "step": 11703 }, { "epoch": 0.84, "grad_norm": 1.2617567877666496, "learning_rate": 6.791699450606293e-07, "loss": 0.1724, "step": 11704 }, { "epoch": 0.84, "grad_norm": 1.26442055427084, "learning_rate": 6.78587144046614e-07, "loss": 0.1921, "step": 11705 }, { "epoch": 0.84, "grad_norm": 1.371830499006445, "learning_rate": 6.780045749888742e-07, "loss": 0.159, "step": 11706 }, { "epoch": 0.84, "grad_norm": 1.3599719811081583, "learning_rate": 6.774222379186774e-07, "loss": 0.1722, "step": 11707 }, { "epoch": 0.84, "grad_norm": 5.349665489670711, "learning_rate": 6.768401328672825e-07, "loss": 0.5265, "step": 11708 }, { "epoch": 0.84, "grad_norm": 1.4471261458332878, "learning_rate": 6.762582598659334e-07, "loss": 0.1626, "step": 11709 }, { "epoch": 0.84, "grad_norm": 1.4713515872406844, "learning_rate": 6.756766189458642e-07, "loss": 0.1582, "step": 11710 }, { "epoch": 0.84, "grad_norm": 1.4358802364193017, "learning_rate": 6.750952101382957e-07, "loss": 0.2113, "step": 11711 }, { "epoch": 0.84, "grad_norm": 1.2107552455140782, "learning_rate": 6.745140334744327e-07, "loss": 0.1566, "step": 11712 }, { "epoch": 0.84, "grad_norm": 1.2892818200222695, "learning_rate": 6.739330889854739e-07, "loss": 0.1567, "step": 11713 }, { "epoch": 0.84, "grad_norm": 1.4724895935737239, "learning_rate": 6.73352376702599e-07, "loss": 0.166, "step": 11714 }, { "epoch": 0.84, "grad_norm": 5.337973964978723, "learning_rate": 6.727718966569807e-07, "loss": 0.6202, "step": 11715 }, { "epoch": 0.84, "grad_norm": 1.1445821298792591, "learning_rate": 6.721916488797748e-07, "loss": 0.1605, "step": 11716 }, { "epoch": 0.84, "grad_norm": 1.2882209120008221, "learning_rate": 6.716116334021272e-07, "loss": 0.2137, "step": 11717 }, { "epoch": 0.84, "grad_norm": 6.321049666032275, "learning_rate": 6.710318502551721e-07, "loss": 0.5727, "step": 11718 }, { "epoch": 0.84, "grad_norm": 1.329302298124122, "learning_rate": 6.704522994700296e-07, "loss": 0.14, "step": 11719 }, { "epoch": 0.84, "grad_norm": 1.3388685316716111, "learning_rate": 6.698729810778065e-07, "loss": 0.1589, "step": 11720 }, { "epoch": 0.84, "grad_norm": 9.88308495341784, "learning_rate": 6.692938951096006e-07, "loss": 0.7317, "step": 11721 }, { "epoch": 0.84, "grad_norm": 1.4206784227754827, "learning_rate": 6.687150415964916e-07, "loss": 0.2065, "step": 11722 }, { "epoch": 0.84, "grad_norm": 1.2779763353784492, "learning_rate": 6.681364205695534e-07, "loss": 0.1706, "step": 11723 }, { "epoch": 0.84, "grad_norm": 1.3476026400011756, "learning_rate": 6.675580320598418e-07, "loss": 0.1743, "step": 11724 }, { "epoch": 0.84, "grad_norm": 1.4393112906738605, "learning_rate": 6.669798760984026e-07, "loss": 0.1803, "step": 11725 }, { "epoch": 0.84, "grad_norm": 1.4368406290964721, "learning_rate": 6.664019527162697e-07, "loss": 0.1482, "step": 11726 }, { "epoch": 0.84, "grad_norm": 1.1262472278577211, "learning_rate": 6.658242619444644e-07, "loss": 0.158, "step": 11727 }, { "epoch": 0.84, "grad_norm": 1.1935727083902938, "learning_rate": 6.652468038139931e-07, "loss": 0.1325, "step": 11728 }, { "epoch": 0.84, "grad_norm": 1.464242741678958, "learning_rate": 6.646695783558532e-07, "loss": 0.1756, "step": 11729 }, { "epoch": 0.84, "grad_norm": 1.3428680940062958, "learning_rate": 6.640925856010261e-07, "loss": 0.1557, "step": 11730 }, { "epoch": 0.84, "grad_norm": 4.137829852659806, "learning_rate": 6.635158255804841e-07, "loss": 0.5337, "step": 11731 }, { "epoch": 0.84, "grad_norm": 1.1385992638106823, "learning_rate": 6.629392983251842e-07, "loss": 0.1209, "step": 11732 }, { "epoch": 0.84, "grad_norm": 1.5452992999716484, "learning_rate": 6.623630038660722e-07, "loss": 0.18, "step": 11733 }, { "epoch": 0.84, "grad_norm": 1.7144292028908832, "learning_rate": 6.617869422340812e-07, "loss": 0.1693, "step": 11734 }, { "epoch": 0.84, "grad_norm": 1.47553365268462, "learning_rate": 6.61211113460134e-07, "loss": 0.1634, "step": 11735 }, { "epoch": 0.84, "grad_norm": 1.2106834650915264, "learning_rate": 6.606355175751361e-07, "loss": 0.1737, "step": 11736 }, { "epoch": 0.84, "grad_norm": 1.4289696172642374, "learning_rate": 6.60060154609985e-07, "loss": 0.148, "step": 11737 }, { "epoch": 0.84, "grad_norm": 1.1332711007831808, "learning_rate": 6.59485024595562e-07, "loss": 0.1117, "step": 11738 }, { "epoch": 0.84, "grad_norm": 1.5044072373214703, "learning_rate": 6.589101275627402e-07, "loss": 0.1818, "step": 11739 }, { "epoch": 0.84, "grad_norm": 1.3228327735703826, "learning_rate": 6.583354635423755e-07, "loss": 0.1487, "step": 11740 }, { "epoch": 0.84, "grad_norm": 1.3225351863819077, "learning_rate": 6.577610325653144e-07, "loss": 0.1598, "step": 11741 }, { "epoch": 0.84, "grad_norm": 1.3295954408994202, "learning_rate": 6.5718683466239e-07, "loss": 0.1768, "step": 11742 }, { "epoch": 0.84, "grad_norm": 1.3735337339069924, "learning_rate": 6.566128698644237e-07, "loss": 0.1521, "step": 11743 }, { "epoch": 0.84, "grad_norm": 6.778216073936575, "learning_rate": 6.560391382022241e-07, "loss": 0.6154, "step": 11744 }, { "epoch": 0.84, "grad_norm": 1.4864768870777096, "learning_rate": 6.55465639706585e-07, "loss": 0.1512, "step": 11745 }, { "epoch": 0.84, "grad_norm": 1.4979391761733065, "learning_rate": 6.548923744082913e-07, "loss": 0.1879, "step": 11746 }, { "epoch": 0.84, "grad_norm": 1.4561052640747052, "learning_rate": 6.543193423381117e-07, "loss": 0.1855, "step": 11747 }, { "epoch": 0.84, "grad_norm": 1.3658502460776525, "learning_rate": 6.537465435268065e-07, "loss": 0.1541, "step": 11748 }, { "epoch": 0.84, "grad_norm": 1.0913594619378433, "learning_rate": 6.531739780051194e-07, "loss": 0.1452, "step": 11749 }, { "epoch": 0.84, "grad_norm": 1.5195907251245484, "learning_rate": 6.526016458037837e-07, "loss": 0.1784, "step": 11750 }, { "epoch": 0.84, "grad_norm": 5.146031419662332, "learning_rate": 6.520295469535204e-07, "loss": 0.6037, "step": 11751 }, { "epoch": 0.84, "grad_norm": 4.206777256735477, "learning_rate": 6.514576814850382e-07, "loss": 0.4465, "step": 11752 }, { "epoch": 0.84, "grad_norm": 1.4179645020141463, "learning_rate": 6.508860494290309e-07, "loss": 0.207, "step": 11753 }, { "epoch": 0.84, "grad_norm": 7.882637412159327, "learning_rate": 6.503146508161839e-07, "loss": 0.6348, "step": 11754 }, { "epoch": 0.84, "grad_norm": 1.2418436042677932, "learning_rate": 6.497434856771645e-07, "loss": 0.1226, "step": 11755 }, { "epoch": 0.84, "grad_norm": 1.4257577774940373, "learning_rate": 6.491725540426335e-07, "loss": 0.1765, "step": 11756 }, { "epoch": 0.84, "grad_norm": 1.4515318733357008, "learning_rate": 6.486018559432338e-07, "loss": 0.2055, "step": 11757 }, { "epoch": 0.84, "grad_norm": 1.3886170265344622, "learning_rate": 6.480313914095992e-07, "loss": 0.1504, "step": 11758 }, { "epoch": 0.84, "grad_norm": 1.3806203570120676, "learning_rate": 6.474611604723496e-07, "loss": 0.162, "step": 11759 }, { "epoch": 0.84, "grad_norm": 1.3017799672656005, "learning_rate": 6.46891163162095e-07, "loss": 0.1545, "step": 11760 }, { "epoch": 0.84, "grad_norm": 1.4955182263148958, "learning_rate": 6.463213995094275e-07, "loss": 0.1737, "step": 11761 }, { "epoch": 0.84, "grad_norm": 1.3447545243109928, "learning_rate": 6.457518695449316e-07, "loss": 0.1845, "step": 11762 }, { "epoch": 0.84, "grad_norm": 1.2131151573920949, "learning_rate": 6.451825732991762e-07, "loss": 0.1249, "step": 11763 }, { "epoch": 0.84, "grad_norm": 1.279126757749715, "learning_rate": 6.446135108027208e-07, "loss": 0.1442, "step": 11764 }, { "epoch": 0.84, "grad_norm": 1.238124636974609, "learning_rate": 6.440446820861073e-07, "loss": 0.1293, "step": 11765 }, { "epoch": 0.84, "grad_norm": 1.2678413577697123, "learning_rate": 6.434760871798707e-07, "loss": 0.1635, "step": 11766 }, { "epoch": 0.84, "grad_norm": 1.3596953656098216, "learning_rate": 6.4290772611453e-07, "loss": 0.1562, "step": 11767 }, { "epoch": 0.84, "grad_norm": 1.3308912794158525, "learning_rate": 6.423395989205938e-07, "loss": 0.1536, "step": 11768 }, { "epoch": 0.84, "grad_norm": 1.4022258646513146, "learning_rate": 6.417717056285544e-07, "loss": 0.1771, "step": 11769 }, { "epoch": 0.84, "grad_norm": 1.3232166314615519, "learning_rate": 6.412040462688973e-07, "loss": 0.1634, "step": 11770 }, { "epoch": 0.84, "grad_norm": 1.411760500873118, "learning_rate": 6.406366208720887e-07, "loss": 0.1476, "step": 11771 }, { "epoch": 0.84, "grad_norm": 1.257320733796225, "learning_rate": 6.400694294685889e-07, "loss": 0.1864, "step": 11772 }, { "epoch": 0.84, "grad_norm": 1.4213901649618883, "learning_rate": 6.395024720888398e-07, "loss": 0.1709, "step": 11773 }, { "epoch": 0.84, "grad_norm": 5.474540886126251, "learning_rate": 6.389357487632747e-07, "loss": 0.551, "step": 11774 }, { "epoch": 0.84, "grad_norm": 1.3676068144540694, "learning_rate": 6.383692595223129e-07, "loss": 0.1468, "step": 11775 }, { "epoch": 0.84, "grad_norm": 1.3631988095529393, "learning_rate": 6.378030043963618e-07, "loss": 0.1951, "step": 11776 }, { "epoch": 0.84, "grad_norm": 1.3231786675784036, "learning_rate": 6.37236983415816e-07, "loss": 0.1551, "step": 11777 }, { "epoch": 0.84, "grad_norm": 1.30640779582237, "learning_rate": 6.366711966110556e-07, "loss": 0.1621, "step": 11778 }, { "epoch": 0.84, "grad_norm": 1.3270666637225543, "learning_rate": 6.361056440124519e-07, "loss": 0.1676, "step": 11779 }, { "epoch": 0.84, "grad_norm": 1.3213712133721982, "learning_rate": 6.355403256503595e-07, "loss": 0.1721, "step": 11780 }, { "epoch": 0.84, "grad_norm": 5.974781036389823, "learning_rate": 6.349752415551241e-07, "loss": 0.47, "step": 11781 }, { "epoch": 0.84, "grad_norm": 5.979692414166524, "learning_rate": 6.34410391757076e-07, "loss": 0.5925, "step": 11782 }, { "epoch": 0.84, "grad_norm": 1.39539996003602, "learning_rate": 6.338457762865336e-07, "loss": 0.1687, "step": 11783 }, { "epoch": 0.84, "grad_norm": 1.281130222513427, "learning_rate": 6.332813951738048e-07, "loss": 0.1439, "step": 11784 }, { "epoch": 0.84, "grad_norm": 1.3886407096596323, "learning_rate": 6.327172484491834e-07, "loss": 0.1656, "step": 11785 }, { "epoch": 0.84, "grad_norm": 1.347948408407155, "learning_rate": 6.321533361429489e-07, "loss": 0.1344, "step": 11786 }, { "epoch": 0.84, "grad_norm": 1.2493491195629831, "learning_rate": 6.315896582853714e-07, "loss": 0.131, "step": 11787 }, { "epoch": 0.84, "grad_norm": 1.3098562943588363, "learning_rate": 6.310262149067059e-07, "loss": 0.1596, "step": 11788 }, { "epoch": 0.84, "grad_norm": 1.1956999952490035, "learning_rate": 6.304630060371969e-07, "loss": 0.1739, "step": 11789 }, { "epoch": 0.84, "grad_norm": 1.4462340067272852, "learning_rate": 6.29900031707073e-07, "loss": 0.192, "step": 11790 }, { "epoch": 0.84, "grad_norm": 1.231425066252052, "learning_rate": 6.293372919465557e-07, "loss": 0.1492, "step": 11791 }, { "epoch": 0.84, "grad_norm": 1.645304568722303, "learning_rate": 6.287747867858473e-07, "loss": 0.1794, "step": 11792 }, { "epoch": 0.84, "grad_norm": 1.3705834856034897, "learning_rate": 6.282125162551428e-07, "loss": 0.1552, "step": 11793 }, { "epoch": 0.84, "grad_norm": 1.294808462813357, "learning_rate": 6.276504803846218e-07, "loss": 0.1947, "step": 11794 }, { "epoch": 0.84, "grad_norm": 1.2945688230608996, "learning_rate": 6.27088679204454e-07, "loss": 0.1524, "step": 11795 }, { "epoch": 0.84, "grad_norm": 1.4997974318618315, "learning_rate": 6.26527112744792e-07, "loss": 0.183, "step": 11796 }, { "epoch": 0.84, "grad_norm": 1.2443360259166645, "learning_rate": 6.259657810357806e-07, "loss": 0.1475, "step": 11797 }, { "epoch": 0.84, "grad_norm": 1.3192035620163904, "learning_rate": 6.254046841075484e-07, "loss": 0.1762, "step": 11798 }, { "epoch": 0.84, "grad_norm": 5.178712571124446, "learning_rate": 6.248438219902142e-07, "loss": 0.4251, "step": 11799 }, { "epoch": 0.84, "grad_norm": 5.8792583565370435, "learning_rate": 6.242831947138806e-07, "loss": 0.559, "step": 11800 }, { "epoch": 0.84, "grad_norm": 1.4683693918250638, "learning_rate": 6.237228023086417e-07, "loss": 0.1632, "step": 11801 }, { "epoch": 0.84, "grad_norm": 1.1669918420301704, "learning_rate": 6.231626448045768e-07, "loss": 0.163, "step": 11802 }, { "epoch": 0.84, "grad_norm": 1.4678020617138623, "learning_rate": 6.226027222317537e-07, "loss": 0.1769, "step": 11803 }, { "epoch": 0.84, "grad_norm": 1.2121870294760644, "learning_rate": 6.22043034620225e-07, "loss": 0.1398, "step": 11804 }, { "epoch": 0.84, "grad_norm": 6.606257562154117, "learning_rate": 6.214835820000348e-07, "loss": 0.6958, "step": 11805 }, { "epoch": 0.84, "grad_norm": 1.3514621994016, "learning_rate": 6.209243644012097e-07, "loss": 0.1626, "step": 11806 }, { "epoch": 0.84, "grad_norm": 1.5014267658838967, "learning_rate": 6.20365381853768e-07, "loss": 0.1681, "step": 11807 }, { "epoch": 0.84, "grad_norm": 1.3409474272230335, "learning_rate": 6.198066343877135e-07, "loss": 0.1615, "step": 11808 }, { "epoch": 0.84, "grad_norm": 1.376892335599446, "learning_rate": 6.19248122033037e-07, "loss": 0.1503, "step": 11809 }, { "epoch": 0.84, "grad_norm": 1.418224130834307, "learning_rate": 6.186898448197171e-07, "loss": 0.1827, "step": 11810 }, { "epoch": 0.84, "grad_norm": 1.136715036733706, "learning_rate": 6.181318027777206e-07, "loss": 0.1487, "step": 11811 }, { "epoch": 0.84, "grad_norm": 1.3966931557076556, "learning_rate": 6.175739959370014e-07, "loss": 0.1885, "step": 11812 }, { "epoch": 0.84, "grad_norm": 1.2910033251439874, "learning_rate": 6.170164243274985e-07, "loss": 0.1969, "step": 11813 }, { "epoch": 0.85, "grad_norm": 1.4004184911286062, "learning_rate": 6.164590879791426e-07, "loss": 0.175, "step": 11814 }, { "epoch": 0.85, "grad_norm": 1.1986470757325758, "learning_rate": 6.159019869218469e-07, "loss": 0.1524, "step": 11815 }, { "epoch": 0.85, "grad_norm": 1.3452678226203771, "learning_rate": 6.153451211855166e-07, "loss": 0.1474, "step": 11816 }, { "epoch": 0.85, "grad_norm": 1.3621701275636258, "learning_rate": 6.147884908000395e-07, "loss": 0.1852, "step": 11817 }, { "epoch": 0.85, "grad_norm": 12.68012984037672, "learning_rate": 6.142320957952952e-07, "loss": 0.6479, "step": 11818 }, { "epoch": 0.85, "grad_norm": 6.176518127917164, "learning_rate": 6.13675936201148e-07, "loss": 0.6383, "step": 11819 }, { "epoch": 0.85, "grad_norm": 1.4058398596729822, "learning_rate": 6.131200120474512e-07, "loss": 0.2144, "step": 11820 }, { "epoch": 0.85, "grad_norm": 1.4397957447801177, "learning_rate": 6.125643233640433e-07, "loss": 0.1505, "step": 11821 }, { "epoch": 0.85, "grad_norm": 1.3713555901524892, "learning_rate": 6.120088701807536e-07, "loss": 0.2013, "step": 11822 }, { "epoch": 0.85, "grad_norm": 1.2689658787351874, "learning_rate": 6.114536525273934e-07, "loss": 0.1697, "step": 11823 }, { "epoch": 0.85, "grad_norm": 5.84563822572434, "learning_rate": 6.108986704337683e-07, "loss": 0.6613, "step": 11824 }, { "epoch": 0.85, "grad_norm": 1.2877933397532546, "learning_rate": 6.103439239296638e-07, "loss": 0.1528, "step": 11825 }, { "epoch": 0.85, "grad_norm": 1.5016383123625436, "learning_rate": 6.097894130448584e-07, "loss": 0.1843, "step": 11826 }, { "epoch": 0.85, "grad_norm": 1.3230757480386068, "learning_rate": 6.092351378091161e-07, "loss": 0.1588, "step": 11827 }, { "epoch": 0.85, "grad_norm": 1.404649833155314, "learning_rate": 6.086810982521891e-07, "loss": 0.1943, "step": 11828 }, { "epoch": 0.85, "grad_norm": 1.5628874625177338, "learning_rate": 6.081272944038141e-07, "loss": 0.1943, "step": 11829 }, { "epoch": 0.85, "grad_norm": 1.2319735528470148, "learning_rate": 6.075737262937187e-07, "loss": 0.1379, "step": 11830 }, { "epoch": 0.85, "grad_norm": 1.3462818246507613, "learning_rate": 6.070203939516145e-07, "loss": 0.1864, "step": 11831 }, { "epoch": 0.85, "grad_norm": 1.2711814688721346, "learning_rate": 6.064672974072044e-07, "loss": 0.1337, "step": 11832 }, { "epoch": 0.85, "grad_norm": 1.4983709367505482, "learning_rate": 6.059144366901737e-07, "loss": 0.1966, "step": 11833 }, { "epoch": 0.85, "grad_norm": 1.3814432226186442, "learning_rate": 6.053618118301996e-07, "loss": 0.1765, "step": 11834 }, { "epoch": 0.85, "grad_norm": 1.234577445202359, "learning_rate": 6.048094228569445e-07, "loss": 0.1254, "step": 11835 }, { "epoch": 0.85, "grad_norm": 1.4391204341033825, "learning_rate": 6.042572698000592e-07, "loss": 0.1679, "step": 11836 }, { "epoch": 0.85, "grad_norm": 1.404540471495477, "learning_rate": 6.037053526891795e-07, "loss": 0.1732, "step": 11837 }, { "epoch": 0.85, "grad_norm": 1.4652753241611205, "learning_rate": 6.03153671553931e-07, "loss": 0.1885, "step": 11838 }, { "epoch": 0.85, "grad_norm": 1.2230926520136514, "learning_rate": 6.026022264239262e-07, "loss": 0.1817, "step": 11839 }, { "epoch": 0.85, "grad_norm": 7.009650851859441, "learning_rate": 6.020510173287636e-07, "loss": 0.5332, "step": 11840 }, { "epoch": 0.85, "grad_norm": 6.5444583602084005, "learning_rate": 6.015000442980307e-07, "loss": 0.6531, "step": 11841 }, { "epoch": 0.85, "grad_norm": 1.1929639206071763, "learning_rate": 6.009493073613004e-07, "loss": 0.1538, "step": 11842 }, { "epoch": 0.85, "grad_norm": 1.3628521254901884, "learning_rate": 6.003988065481347e-07, "loss": 0.1963, "step": 11843 }, { "epoch": 0.85, "grad_norm": 1.361486919745974, "learning_rate": 5.998485418880822e-07, "loss": 0.1623, "step": 11844 }, { "epoch": 0.85, "grad_norm": 1.344447512503299, "learning_rate": 5.992985134106805e-07, "loss": 0.154, "step": 11845 }, { "epoch": 0.85, "grad_norm": 1.337377575003357, "learning_rate": 5.987487211454502e-07, "loss": 0.1953, "step": 11846 }, { "epoch": 0.85, "grad_norm": 1.5198571468397921, "learning_rate": 5.981991651219043e-07, "loss": 0.1819, "step": 11847 }, { "epoch": 0.85, "grad_norm": 1.2846263944982605, "learning_rate": 5.97649845369539e-07, "loss": 0.1727, "step": 11848 }, { "epoch": 0.85, "grad_norm": 1.3924642609089055, "learning_rate": 5.971007619178415e-07, "loss": 0.2059, "step": 11849 }, { "epoch": 0.85, "grad_norm": 1.4255315287521797, "learning_rate": 5.965519147962823e-07, "loss": 0.151, "step": 11850 }, { "epoch": 0.85, "grad_norm": 1.488422612454247, "learning_rate": 5.960033040343222e-07, "loss": 0.1699, "step": 11851 }, { "epoch": 0.85, "grad_norm": 1.3336444806956729, "learning_rate": 5.954549296614082e-07, "loss": 0.1725, "step": 11852 }, { "epoch": 0.85, "grad_norm": 1.0855948361912515, "learning_rate": 5.949067917069768e-07, "loss": 0.111, "step": 11853 }, { "epoch": 0.85, "grad_norm": 6.6050991328470054, "learning_rate": 5.943588902004471e-07, "loss": 0.6256, "step": 11854 }, { "epoch": 0.85, "grad_norm": 10.750108234176988, "learning_rate": 5.938112251712308e-07, "loss": 0.5607, "step": 11855 }, { "epoch": 0.85, "grad_norm": 6.424404622560542, "learning_rate": 5.932637966487214e-07, "loss": 0.527, "step": 11856 }, { "epoch": 0.85, "grad_norm": 1.5139715575552102, "learning_rate": 5.927166046623062e-07, "loss": 0.2156, "step": 11857 }, { "epoch": 0.85, "grad_norm": 1.4870681731113684, "learning_rate": 5.921696492413525e-07, "loss": 0.166, "step": 11858 }, { "epoch": 0.85, "grad_norm": 1.3826070631527563, "learning_rate": 5.916229304152215e-07, "loss": 0.2079, "step": 11859 }, { "epoch": 0.85, "grad_norm": 1.2969889297987252, "learning_rate": 5.910764482132575e-07, "loss": 0.1788, "step": 11860 }, { "epoch": 0.85, "grad_norm": 1.519378427113825, "learning_rate": 5.905302026647953e-07, "loss": 0.1551, "step": 11861 }, { "epoch": 0.85, "grad_norm": 1.3762181285482564, "learning_rate": 5.899841937991529e-07, "loss": 0.1719, "step": 11862 }, { "epoch": 0.85, "grad_norm": 1.2133694256920355, "learning_rate": 5.8943842164564e-07, "loss": 0.1544, "step": 11863 }, { "epoch": 0.85, "grad_norm": 7.155096680459281, "learning_rate": 5.888928862335491e-07, "loss": 0.5704, "step": 11864 }, { "epoch": 0.85, "grad_norm": 1.1743679456871643, "learning_rate": 5.883475875921646e-07, "loss": 0.1289, "step": 11865 }, { "epoch": 0.85, "grad_norm": 1.382454117439039, "learning_rate": 5.878025257507547e-07, "loss": 0.1557, "step": 11866 }, { "epoch": 0.85, "grad_norm": 1.3101415689207183, "learning_rate": 5.87257700738576e-07, "loss": 0.1904, "step": 11867 }, { "epoch": 0.85, "grad_norm": 1.3378263236375791, "learning_rate": 5.867131125848729e-07, "loss": 0.1817, "step": 11868 }, { "epoch": 0.85, "grad_norm": 1.3912503329505317, "learning_rate": 5.861687613188782e-07, "loss": 0.1682, "step": 11869 }, { "epoch": 0.85, "grad_norm": 1.2552846866997394, "learning_rate": 5.856246469698079e-07, "loss": 0.1674, "step": 11870 }, { "epoch": 0.85, "grad_norm": 1.3633125848770604, "learning_rate": 5.850807695668692e-07, "loss": 0.1914, "step": 11871 }, { "epoch": 0.85, "grad_norm": 1.291762433235719, "learning_rate": 5.845371291392565e-07, "loss": 0.1907, "step": 11872 }, { "epoch": 0.85, "grad_norm": 1.3103926316467114, "learning_rate": 5.839937257161477e-07, "loss": 0.142, "step": 11873 }, { "epoch": 0.85, "grad_norm": 1.41188789321237, "learning_rate": 5.834505593267131e-07, "loss": 0.221, "step": 11874 }, { "epoch": 0.85, "grad_norm": 1.5025406468682707, "learning_rate": 5.829076300001052e-07, "loss": 0.1781, "step": 11875 }, { "epoch": 0.85, "grad_norm": 1.4440595148580095, "learning_rate": 5.823649377654678e-07, "loss": 0.1873, "step": 11876 }, { "epoch": 0.85, "grad_norm": 1.4167769567823612, "learning_rate": 5.818224826519297e-07, "loss": 0.1724, "step": 11877 }, { "epoch": 0.85, "grad_norm": 1.4548155939555418, "learning_rate": 5.812802646886095e-07, "loss": 0.1742, "step": 11878 }, { "epoch": 0.85, "grad_norm": 1.3277806643217567, "learning_rate": 5.807382839046094e-07, "loss": 0.1885, "step": 11879 }, { "epoch": 0.85, "grad_norm": 1.3327495249796568, "learning_rate": 5.801965403290221e-07, "loss": 0.1364, "step": 11880 }, { "epoch": 0.85, "grad_norm": 1.5758238212909672, "learning_rate": 5.796550339909245e-07, "loss": 0.1929, "step": 11881 }, { "epoch": 0.85, "grad_norm": 1.3367845920743415, "learning_rate": 5.791137649193851e-07, "loss": 0.1671, "step": 11882 }, { "epoch": 0.85, "grad_norm": 1.3365213228439465, "learning_rate": 5.785727331434543e-07, "loss": 0.1397, "step": 11883 }, { "epoch": 0.85, "grad_norm": 1.3254073485998006, "learning_rate": 5.780319386921745e-07, "loss": 0.2032, "step": 11884 }, { "epoch": 0.85, "grad_norm": 1.3542488455385189, "learning_rate": 5.77491381594571e-07, "loss": 0.1794, "step": 11885 }, { "epoch": 0.85, "grad_norm": 1.2519627811573155, "learning_rate": 5.76951061879662e-07, "loss": 0.1554, "step": 11886 }, { "epoch": 0.85, "grad_norm": 1.5007446306421866, "learning_rate": 5.764109795764478e-07, "loss": 0.2027, "step": 11887 }, { "epoch": 0.85, "grad_norm": 1.5364241678623267, "learning_rate": 5.758711347139189e-07, "loss": 0.1873, "step": 11888 }, { "epoch": 0.85, "grad_norm": 1.2885868043790423, "learning_rate": 5.753315273210502e-07, "loss": 0.1566, "step": 11889 }, { "epoch": 0.85, "grad_norm": 1.3040430826874798, "learning_rate": 5.747921574268078e-07, "loss": 0.1877, "step": 11890 }, { "epoch": 0.85, "grad_norm": 1.3496253365252762, "learning_rate": 5.742530250601414e-07, "loss": 0.1865, "step": 11891 }, { "epoch": 0.85, "grad_norm": 1.1181022245888634, "learning_rate": 5.73714130249991e-07, "loss": 0.1205, "step": 11892 }, { "epoch": 0.85, "grad_norm": 1.427943808137582, "learning_rate": 5.731754730252797e-07, "loss": 0.1966, "step": 11893 }, { "epoch": 0.85, "grad_norm": 1.641824724978169, "learning_rate": 5.726370534149228e-07, "loss": 0.2237, "step": 11894 }, { "epoch": 0.85, "grad_norm": 1.3459842522768013, "learning_rate": 5.720988714478199e-07, "loss": 0.1399, "step": 11895 }, { "epoch": 0.85, "grad_norm": 1.4086366166429791, "learning_rate": 5.715609271528594e-07, "loss": 0.1783, "step": 11896 }, { "epoch": 0.85, "grad_norm": 1.3691981343784334, "learning_rate": 5.710232205589139e-07, "loss": 0.1722, "step": 11897 }, { "epoch": 0.85, "grad_norm": 1.1988263420978127, "learning_rate": 5.704857516948476e-07, "loss": 0.1427, "step": 11898 }, { "epoch": 0.85, "grad_norm": 1.4724141375899162, "learning_rate": 5.699485205895078e-07, "loss": 0.1796, "step": 11899 }, { "epoch": 0.85, "grad_norm": 1.308025678307626, "learning_rate": 5.694115272717326e-07, "loss": 0.1654, "step": 11900 }, { "epoch": 0.85, "grad_norm": 1.1472654075812014, "learning_rate": 5.688747717703436e-07, "loss": 0.1401, "step": 11901 }, { "epoch": 0.85, "grad_norm": 1.5054923709289376, "learning_rate": 5.68338254114153e-07, "loss": 0.1842, "step": 11902 }, { "epoch": 0.85, "grad_norm": 1.3527434717453437, "learning_rate": 5.67801974331959e-07, "loss": 0.1592, "step": 11903 }, { "epoch": 0.85, "grad_norm": 1.1855312617240594, "learning_rate": 5.67265932452547e-07, "loss": 0.152, "step": 11904 }, { "epoch": 0.85, "grad_norm": 1.3820501160084664, "learning_rate": 5.6673012850469e-07, "loss": 0.2014, "step": 11905 }, { "epoch": 0.85, "grad_norm": 1.3302475707209809, "learning_rate": 5.661945625171467e-07, "loss": 0.1841, "step": 11906 }, { "epoch": 0.85, "grad_norm": 1.2097195984335025, "learning_rate": 5.656592345186657e-07, "loss": 0.161, "step": 11907 }, { "epoch": 0.85, "grad_norm": 1.4124420566582243, "learning_rate": 5.65124144537979e-07, "loss": 0.1736, "step": 11908 }, { "epoch": 0.85, "grad_norm": 1.5064478562026378, "learning_rate": 5.645892926038105e-07, "loss": 0.1689, "step": 11909 }, { "epoch": 0.85, "grad_norm": 1.4037128965418295, "learning_rate": 5.64054678744867e-07, "loss": 0.1953, "step": 11910 }, { "epoch": 0.85, "grad_norm": 1.1588590428448058, "learning_rate": 5.635203029898451e-07, "loss": 0.1258, "step": 11911 }, { "epoch": 0.85, "grad_norm": 1.4009223522939578, "learning_rate": 5.629861653674279e-07, "loss": 0.1888, "step": 11912 }, { "epoch": 0.85, "grad_norm": 1.239482489537595, "learning_rate": 5.624522659062875e-07, "loss": 0.1608, "step": 11913 }, { "epoch": 0.85, "grad_norm": 1.8389237057585537, "learning_rate": 5.619186046350788e-07, "loss": 0.1542, "step": 11914 }, { "epoch": 0.85, "grad_norm": 1.3317044109873473, "learning_rate": 5.613851815824489e-07, "loss": 0.143, "step": 11915 }, { "epoch": 0.85, "grad_norm": 1.2578699671643352, "learning_rate": 5.608519967770276e-07, "loss": 0.1716, "step": 11916 }, { "epoch": 0.85, "grad_norm": 1.1035110092075342, "learning_rate": 5.603190502474365e-07, "loss": 0.1357, "step": 11917 }, { "epoch": 0.85, "grad_norm": 1.3423718411181578, "learning_rate": 5.597863420222793e-07, "loss": 0.1727, "step": 11918 }, { "epoch": 0.85, "grad_norm": 1.3441268516481935, "learning_rate": 5.592538721301522e-07, "loss": 0.1302, "step": 11919 }, { "epoch": 0.85, "grad_norm": 1.4640847294429042, "learning_rate": 5.587216405996343e-07, "loss": 0.1807, "step": 11920 }, { "epoch": 0.85, "grad_norm": 1.5212014573704855, "learning_rate": 5.581896474592957e-07, "loss": 0.2031, "step": 11921 }, { "epoch": 0.85, "grad_norm": 1.215648286912998, "learning_rate": 5.576578927376897e-07, "loss": 0.1542, "step": 11922 }, { "epoch": 0.85, "grad_norm": 1.1912643265601766, "learning_rate": 5.571263764633606e-07, "loss": 0.1372, "step": 11923 }, { "epoch": 0.85, "grad_norm": 1.4463365351938686, "learning_rate": 5.565950986648355e-07, "loss": 0.1731, "step": 11924 }, { "epoch": 0.85, "grad_norm": 4.008005449135428, "learning_rate": 5.56064059370634e-07, "loss": 0.3738, "step": 11925 }, { "epoch": 0.85, "grad_norm": 1.2596222522530265, "learning_rate": 5.555332586092583e-07, "loss": 0.2166, "step": 11926 }, { "epoch": 0.85, "grad_norm": 1.2374533258730707, "learning_rate": 5.550026964091998e-07, "loss": 0.1487, "step": 11927 }, { "epoch": 0.85, "grad_norm": 1.2301517607904666, "learning_rate": 5.544723727989377e-07, "loss": 0.18, "step": 11928 }, { "epoch": 0.85, "grad_norm": 1.2324063556645592, "learning_rate": 5.539422878069389e-07, "loss": 0.1503, "step": 11929 }, { "epoch": 0.85, "grad_norm": 7.970387239803195, "learning_rate": 5.534124414616532e-07, "loss": 0.6601, "step": 11930 }, { "epoch": 0.85, "grad_norm": 8.668238456586904, "learning_rate": 5.528828337915237e-07, "loss": 0.5707, "step": 11931 }, { "epoch": 0.85, "grad_norm": 6.241390059160808, "learning_rate": 5.523534648249751e-07, "loss": 0.6603, "step": 11932 }, { "epoch": 0.85, "grad_norm": 1.3193667062433727, "learning_rate": 5.518243345904239e-07, "loss": 0.1603, "step": 11933 }, { "epoch": 0.85, "grad_norm": 1.2033244935184737, "learning_rate": 5.512954431162698e-07, "loss": 0.1925, "step": 11934 }, { "epoch": 0.85, "grad_norm": 7.1683665110223105, "learning_rate": 5.507667904309027e-07, "loss": 0.38, "step": 11935 }, { "epoch": 0.85, "grad_norm": 1.3531119292411646, "learning_rate": 5.502383765626984e-07, "loss": 0.1875, "step": 11936 }, { "epoch": 0.85, "grad_norm": 1.5142867063195167, "learning_rate": 5.497102015400197e-07, "loss": 0.1644, "step": 11937 }, { "epoch": 0.85, "grad_norm": 1.2215437916349061, "learning_rate": 5.491822653912188e-07, "loss": 0.126, "step": 11938 }, { "epoch": 0.85, "grad_norm": 1.3326128268066109, "learning_rate": 5.486545681446303e-07, "loss": 0.2022, "step": 11939 }, { "epoch": 0.85, "grad_norm": 4.926809551267746, "learning_rate": 5.481271098285818e-07, "loss": 0.3574, "step": 11940 }, { "epoch": 0.85, "grad_norm": 1.2702723643377511, "learning_rate": 5.475998904713825e-07, "loss": 0.149, "step": 11941 }, { "epoch": 0.85, "grad_norm": 9.330307227865044, "learning_rate": 5.470729101013333e-07, "loss": 0.5359, "step": 11942 }, { "epoch": 0.85, "grad_norm": 1.4493380738552404, "learning_rate": 5.465461687467194e-07, "loss": 0.1957, "step": 11943 }, { "epoch": 0.85, "grad_norm": 1.4294740018377265, "learning_rate": 5.460196664358141e-07, "loss": 0.1504, "step": 11944 }, { "epoch": 0.85, "grad_norm": 1.3496267056338718, "learning_rate": 5.454934031968784e-07, "loss": 0.1467, "step": 11945 }, { "epoch": 0.85, "grad_norm": 1.1514274541346865, "learning_rate": 5.449673790581611e-07, "loss": 0.1405, "step": 11946 }, { "epoch": 0.85, "grad_norm": 1.40909698283343, "learning_rate": 5.444415940478948e-07, "loss": 0.1658, "step": 11947 }, { "epoch": 0.85, "grad_norm": 1.3922497019313413, "learning_rate": 5.43916048194304e-07, "loss": 0.1525, "step": 11948 }, { "epoch": 0.85, "grad_norm": 5.2525178338145695, "learning_rate": 5.433907415255951e-07, "loss": 0.5715, "step": 11949 }, { "epoch": 0.85, "grad_norm": 1.4388383242469989, "learning_rate": 5.428656740699678e-07, "loss": 0.1558, "step": 11950 }, { "epoch": 0.85, "grad_norm": 1.1839678015650463, "learning_rate": 5.423408458556023e-07, "loss": 0.1806, "step": 11951 }, { "epoch": 0.85, "grad_norm": 1.3729480158384668, "learning_rate": 5.418162569106711e-07, "loss": 0.1988, "step": 11952 }, { "epoch": 0.86, "grad_norm": 7.322635509044266, "learning_rate": 5.412919072633316e-07, "loss": 0.4371, "step": 11953 }, { "epoch": 0.86, "grad_norm": 4.062667417592494, "learning_rate": 5.407677969417302e-07, "loss": 0.427, "step": 11954 }, { "epoch": 0.86, "grad_norm": 1.6011492676449068, "learning_rate": 5.402439259739972e-07, "loss": 0.1656, "step": 11955 }, { "epoch": 0.86, "grad_norm": 1.4969033099173448, "learning_rate": 5.397202943882529e-07, "loss": 0.1851, "step": 11956 }, { "epoch": 0.86, "grad_norm": 1.328170343331863, "learning_rate": 5.391969022126031e-07, "loss": 0.171, "step": 11957 }, { "epoch": 0.86, "grad_norm": 1.2773728805842328, "learning_rate": 5.386737494751421e-07, "loss": 0.1395, "step": 11958 }, { "epoch": 0.86, "grad_norm": 4.5357603853826465, "learning_rate": 5.381508362039495e-07, "loss": 0.507, "step": 11959 }, { "epoch": 0.86, "grad_norm": 1.329689928153197, "learning_rate": 5.376281624270946e-07, "loss": 0.1701, "step": 11960 }, { "epoch": 0.86, "grad_norm": 1.3175595070852766, "learning_rate": 5.371057281726311e-07, "loss": 0.1808, "step": 11961 }, { "epoch": 0.86, "grad_norm": 1.4276482219442137, "learning_rate": 5.365835334686037e-07, "loss": 0.1807, "step": 11962 }, { "epoch": 0.86, "grad_norm": 1.3182528099412956, "learning_rate": 5.360615783430384e-07, "loss": 0.1837, "step": 11963 }, { "epoch": 0.86, "grad_norm": 1.2327930372495695, "learning_rate": 5.355398628239544e-07, "loss": 0.1477, "step": 11964 }, { "epoch": 0.86, "grad_norm": 1.533575832596085, "learning_rate": 5.350183869393532e-07, "loss": 0.1644, "step": 11965 }, { "epoch": 0.86, "grad_norm": 1.377566068246399, "learning_rate": 5.34497150717227e-07, "loss": 0.1947, "step": 11966 }, { "epoch": 0.86, "grad_norm": 1.271684749098478, "learning_rate": 5.339761541855537e-07, "loss": 0.1498, "step": 11967 }, { "epoch": 0.86, "grad_norm": 1.2599098109710354, "learning_rate": 5.334553973722973e-07, "loss": 0.1541, "step": 11968 }, { "epoch": 0.86, "grad_norm": 1.1781500964320193, "learning_rate": 5.329348803054101e-07, "loss": 0.1351, "step": 11969 }, { "epoch": 0.86, "grad_norm": 1.3612329107125847, "learning_rate": 5.324146030128325e-07, "loss": 0.1537, "step": 11970 }, { "epoch": 0.86, "grad_norm": 1.3714680404078448, "learning_rate": 5.318945655224905e-07, "loss": 0.1806, "step": 11971 }, { "epoch": 0.86, "grad_norm": 1.4767216159160663, "learning_rate": 5.313747678622965e-07, "loss": 0.1611, "step": 11972 }, { "epoch": 0.86, "grad_norm": 1.1490626778464939, "learning_rate": 5.308552100601533e-07, "loss": 0.1363, "step": 11973 }, { "epoch": 0.86, "grad_norm": 1.2054626630330978, "learning_rate": 5.303358921439467e-07, "loss": 0.1527, "step": 11974 }, { "epoch": 0.86, "grad_norm": 1.1832835314945667, "learning_rate": 5.298168141415528e-07, "loss": 0.1268, "step": 11975 }, { "epoch": 0.86, "grad_norm": 6.835606836554857, "learning_rate": 5.292979760808326e-07, "loss": 0.6086, "step": 11976 }, { "epoch": 0.86, "grad_norm": 10.747572637787231, "learning_rate": 5.287793779896356e-07, "loss": 0.5843, "step": 11977 }, { "epoch": 0.86, "grad_norm": 5.675132587305952, "learning_rate": 5.282610198957988e-07, "loss": 0.6761, "step": 11978 }, { "epoch": 0.86, "grad_norm": 1.2127876735036949, "learning_rate": 5.277429018271457e-07, "loss": 0.1555, "step": 11979 }, { "epoch": 0.86, "grad_norm": 1.2852387539900954, "learning_rate": 5.272250238114857e-07, "loss": 0.1607, "step": 11980 }, { "epoch": 0.86, "grad_norm": 1.3277913063459401, "learning_rate": 5.267073858766176e-07, "loss": 0.1896, "step": 11981 }, { "epoch": 0.86, "grad_norm": 1.223891402652373, "learning_rate": 5.261899880503246e-07, "loss": 0.1525, "step": 11982 }, { "epoch": 0.86, "grad_norm": 1.2899044232379844, "learning_rate": 5.256728303603809e-07, "loss": 0.1627, "step": 11983 }, { "epoch": 0.86, "grad_norm": 8.286517861900291, "learning_rate": 5.251559128345429e-07, "loss": 0.5475, "step": 11984 }, { "epoch": 0.86, "grad_norm": 1.4455629094842346, "learning_rate": 5.246392355005591e-07, "loss": 0.1665, "step": 11985 }, { "epoch": 0.86, "grad_norm": 1.3091123115345589, "learning_rate": 5.241227983861591e-07, "loss": 0.1447, "step": 11986 }, { "epoch": 0.86, "grad_norm": 1.3418773987432282, "learning_rate": 5.236066015190677e-07, "loss": 0.1754, "step": 11987 }, { "epoch": 0.86, "grad_norm": 1.2661347586246718, "learning_rate": 5.230906449269895e-07, "loss": 0.1606, "step": 11988 }, { "epoch": 0.86, "grad_norm": 1.3424979374870976, "learning_rate": 5.225749286376202e-07, "loss": 0.1891, "step": 11989 }, { "epoch": 0.86, "grad_norm": 6.213095258539563, "learning_rate": 5.220594526786405e-07, "loss": 0.5789, "step": 11990 }, { "epoch": 0.86, "grad_norm": 1.2379775644078874, "learning_rate": 5.215442170777196e-07, "loss": 0.1811, "step": 11991 }, { "epoch": 0.86, "grad_norm": 1.132940374364009, "learning_rate": 5.210292218625129e-07, "loss": 0.133, "step": 11992 }, { "epoch": 0.86, "grad_norm": 1.320505003258881, "learning_rate": 5.205144670606643e-07, "loss": 0.1855, "step": 11993 }, { "epoch": 0.86, "grad_norm": 1.3812496057614005, "learning_rate": 5.199999526998017e-07, "loss": 0.1828, "step": 11994 }, { "epoch": 0.86, "grad_norm": 1.4366874102412701, "learning_rate": 5.194856788075442e-07, "loss": 0.1678, "step": 11995 }, { "epoch": 0.86, "grad_norm": 1.5433334628647148, "learning_rate": 5.189716454114946e-07, "loss": 0.1797, "step": 11996 }, { "epoch": 0.86, "grad_norm": 1.283487865577448, "learning_rate": 5.184578525392464e-07, "loss": 0.181, "step": 11997 }, { "epoch": 0.86, "grad_norm": 1.5930113738339486, "learning_rate": 5.179443002183754e-07, "loss": 0.1579, "step": 11998 }, { "epoch": 0.86, "grad_norm": 6.9101051832294145, "learning_rate": 5.174309884764478e-07, "loss": 0.5179, "step": 11999 }, { "epoch": 0.86, "grad_norm": 1.3638396987401733, "learning_rate": 5.169179173410178e-07, "loss": 0.1742, "step": 12000 }, { "epoch": 0.86, "grad_norm": 1.3041162873666514, "learning_rate": 5.164050868396225e-07, "loss": 0.1462, "step": 12001 }, { "epoch": 0.86, "grad_norm": 1.5386301177121489, "learning_rate": 5.158924969997908e-07, "loss": 0.1603, "step": 12002 }, { "epoch": 0.86, "grad_norm": 1.3901668082452658, "learning_rate": 5.153801478490344e-07, "loss": 0.1696, "step": 12003 }, { "epoch": 0.86, "grad_norm": 1.109902231292228, "learning_rate": 5.148680394148553e-07, "loss": 0.1343, "step": 12004 }, { "epoch": 0.86, "grad_norm": 1.410708097142772, "learning_rate": 5.143561717247414e-07, "loss": 0.1729, "step": 12005 }, { "epoch": 0.86, "grad_norm": 1.1395484611839373, "learning_rate": 5.138445448061686e-07, "loss": 0.1286, "step": 12006 }, { "epoch": 0.86, "grad_norm": 1.323453445405225, "learning_rate": 5.13333158686597e-07, "loss": 0.1292, "step": 12007 }, { "epoch": 0.86, "grad_norm": 1.3288784757288983, "learning_rate": 5.12822013393478e-07, "loss": 0.1591, "step": 12008 }, { "epoch": 0.86, "grad_norm": 1.232465708975618, "learning_rate": 5.123111089542454e-07, "loss": 0.1396, "step": 12009 }, { "epoch": 0.86, "grad_norm": 1.1555252943829946, "learning_rate": 5.118004453963255e-07, "loss": 0.1474, "step": 12010 }, { "epoch": 0.86, "grad_norm": 1.3660256158512312, "learning_rate": 5.112900227471257e-07, "loss": 0.1821, "step": 12011 }, { "epoch": 0.86, "grad_norm": 6.988651188083765, "learning_rate": 5.107798410340442e-07, "loss": 0.6405, "step": 12012 }, { "epoch": 0.86, "grad_norm": 1.2369970791967717, "learning_rate": 5.102699002844669e-07, "loss": 0.1626, "step": 12013 }, { "epoch": 0.86, "grad_norm": 1.3417701095247825, "learning_rate": 5.097602005257651e-07, "loss": 0.1947, "step": 12014 }, { "epoch": 0.86, "grad_norm": 1.340964331175679, "learning_rate": 5.09250741785296e-07, "loss": 0.1329, "step": 12015 }, { "epoch": 0.86, "grad_norm": 1.4358438432050118, "learning_rate": 5.087415240904076e-07, "loss": 0.1638, "step": 12016 }, { "epoch": 0.86, "grad_norm": 1.4028479394363496, "learning_rate": 5.082325474684303e-07, "loss": 0.1605, "step": 12017 }, { "epoch": 0.86, "grad_norm": 1.5407047053725136, "learning_rate": 5.07723811946686e-07, "loss": 0.2027, "step": 12018 }, { "epoch": 0.86, "grad_norm": 1.3479755638075555, "learning_rate": 5.072153175524791e-07, "loss": 0.1564, "step": 12019 }, { "epoch": 0.86, "grad_norm": 1.3777309483966722, "learning_rate": 5.067070643131056e-07, "loss": 0.1539, "step": 12020 }, { "epoch": 0.86, "grad_norm": 1.1516838456526879, "learning_rate": 5.061990522558457e-07, "loss": 0.1724, "step": 12021 }, { "epoch": 0.86, "grad_norm": 1.1735812211725698, "learning_rate": 5.05691281407969e-07, "loss": 0.1746, "step": 12022 }, { "epoch": 0.86, "grad_norm": 1.2816709672405695, "learning_rate": 5.051837517967284e-07, "loss": 0.1886, "step": 12023 }, { "epoch": 0.86, "grad_norm": 1.3285365148746178, "learning_rate": 5.046764634493678e-07, "loss": 0.1607, "step": 12024 }, { "epoch": 0.86, "grad_norm": 1.4605427995706401, "learning_rate": 5.041694163931149e-07, "loss": 0.1641, "step": 12025 }, { "epoch": 0.86, "grad_norm": 1.4480998193182022, "learning_rate": 5.036626106551879e-07, "loss": 0.208, "step": 12026 }, { "epoch": 0.86, "grad_norm": 1.5182937714709108, "learning_rate": 5.031560462627876e-07, "loss": 0.2011, "step": 12027 }, { "epoch": 0.86, "grad_norm": 10.622260698591672, "learning_rate": 5.026497232431055e-07, "loss": 0.4959, "step": 12028 }, { "epoch": 0.86, "grad_norm": 1.3042977109013212, "learning_rate": 5.021436416233199e-07, "loss": 0.184, "step": 12029 }, { "epoch": 0.86, "grad_norm": 1.2791144721053482, "learning_rate": 5.01637801430594e-07, "loss": 0.176, "step": 12030 }, { "epoch": 0.86, "grad_norm": 5.3087952987783895, "learning_rate": 5.011322026920812e-07, "loss": 0.4897, "step": 12031 }, { "epoch": 0.86, "grad_norm": 1.2479862515079598, "learning_rate": 5.006268454349178e-07, "loss": 0.1312, "step": 12032 }, { "epoch": 0.86, "grad_norm": 1.5527835678744886, "learning_rate": 5.00121729686231e-07, "loss": 0.1611, "step": 12033 }, { "epoch": 0.86, "grad_norm": 1.227772118329657, "learning_rate": 4.996168554731317e-07, "loss": 0.1465, "step": 12034 }, { "epoch": 0.86, "grad_norm": 1.3805144787449908, "learning_rate": 4.991122228227213e-07, "loss": 0.1985, "step": 12035 }, { "epoch": 0.86, "grad_norm": 1.2272353356267884, "learning_rate": 4.986078317620852e-07, "loss": 0.1445, "step": 12036 }, { "epoch": 0.86, "grad_norm": 1.407514163288947, "learning_rate": 4.981036823182972e-07, "loss": 0.1679, "step": 12037 }, { "epoch": 0.86, "grad_norm": 1.4485648221990761, "learning_rate": 4.975997745184191e-07, "loss": 0.2106, "step": 12038 }, { "epoch": 0.86, "grad_norm": 1.469848568371563, "learning_rate": 4.970961083894982e-07, "loss": 0.1835, "step": 12039 }, { "epoch": 0.86, "grad_norm": 1.3033805816871606, "learning_rate": 4.965926839585688e-07, "loss": 0.1628, "step": 12040 }, { "epoch": 0.86, "grad_norm": 1.3073392957575978, "learning_rate": 4.960895012526534e-07, "loss": 0.1491, "step": 12041 }, { "epoch": 0.86, "grad_norm": 1.4108953663455581, "learning_rate": 4.955865602987598e-07, "loss": 0.187, "step": 12042 }, { "epoch": 0.86, "grad_norm": 1.3368221979909678, "learning_rate": 4.950838611238856e-07, "loss": 0.1658, "step": 12043 }, { "epoch": 0.86, "grad_norm": 1.2549017462884158, "learning_rate": 4.945814037550117e-07, "loss": 0.1591, "step": 12044 }, { "epoch": 0.86, "grad_norm": 1.5265264799438054, "learning_rate": 4.940791882191087e-07, "loss": 0.1773, "step": 12045 }, { "epoch": 0.86, "grad_norm": 1.3065146281181967, "learning_rate": 4.935772145431344e-07, "loss": 0.1983, "step": 12046 }, { "epoch": 0.86, "grad_norm": 1.4288383301541023, "learning_rate": 4.930754827540324e-07, "loss": 0.2015, "step": 12047 }, { "epoch": 0.86, "grad_norm": 1.3259444683598571, "learning_rate": 4.925739928787332e-07, "loss": 0.1618, "step": 12048 }, { "epoch": 0.86, "grad_norm": 1.2555773763850806, "learning_rate": 4.920727449441554e-07, "loss": 0.176, "step": 12049 }, { "epoch": 0.86, "grad_norm": 1.2929795110541837, "learning_rate": 4.915717389772029e-07, "loss": 0.1584, "step": 12050 }, { "epoch": 0.86, "grad_norm": 1.4207443334974492, "learning_rate": 4.910709750047693e-07, "loss": 0.1801, "step": 12051 }, { "epoch": 0.86, "grad_norm": 1.3263765222855792, "learning_rate": 4.905704530537314e-07, "loss": 0.1303, "step": 12052 }, { "epoch": 0.86, "grad_norm": 9.52274707143638, "learning_rate": 4.900701731509571e-07, "loss": 0.6399, "step": 12053 }, { "epoch": 0.86, "grad_norm": 1.279330411279231, "learning_rate": 4.895701353232984e-07, "loss": 0.127, "step": 12054 }, { "epoch": 0.86, "grad_norm": 1.2767071359998512, "learning_rate": 4.890703395975976e-07, "loss": 0.183, "step": 12055 }, { "epoch": 0.86, "grad_norm": 7.333287718600134, "learning_rate": 4.885707860006783e-07, "loss": 0.4976, "step": 12056 }, { "epoch": 0.86, "grad_norm": 1.2324529264422999, "learning_rate": 4.880714745593574e-07, "loss": 0.1745, "step": 12057 }, { "epoch": 0.86, "grad_norm": 1.1560019255779528, "learning_rate": 4.875724053004339e-07, "loss": 0.1137, "step": 12058 }, { "epoch": 0.86, "grad_norm": 1.4088661842911823, "learning_rate": 4.87073578250698e-07, "loss": 0.1639, "step": 12059 }, { "epoch": 0.86, "grad_norm": 1.3968088815528998, "learning_rate": 4.865749934369224e-07, "loss": 0.1548, "step": 12060 }, { "epoch": 0.86, "grad_norm": 1.5418992476934186, "learning_rate": 4.860766508858699e-07, "loss": 0.1819, "step": 12061 }, { "epoch": 0.86, "grad_norm": 1.384114995152617, "learning_rate": 4.855785506242899e-07, "loss": 0.2188, "step": 12062 }, { "epoch": 0.86, "grad_norm": 1.4839897990197475, "learning_rate": 4.850806926789192e-07, "loss": 0.1792, "step": 12063 }, { "epoch": 0.86, "grad_norm": 1.363981383431486, "learning_rate": 4.845830770764803e-07, "loss": 0.16, "step": 12064 }, { "epoch": 0.86, "grad_norm": 6.300397110449409, "learning_rate": 4.840857038436825e-07, "loss": 0.4779, "step": 12065 }, { "epoch": 0.86, "grad_norm": 1.272100988282035, "learning_rate": 4.835885730072243e-07, "loss": 0.1577, "step": 12066 }, { "epoch": 0.86, "grad_norm": 1.3094773393893258, "learning_rate": 4.830916845937872e-07, "loss": 0.1715, "step": 12067 }, { "epoch": 0.86, "grad_norm": 1.3326900119217566, "learning_rate": 4.825950386300454e-07, "loss": 0.161, "step": 12068 }, { "epoch": 0.86, "grad_norm": 1.3655936580907897, "learning_rate": 4.820986351426537e-07, "loss": 0.1886, "step": 12069 }, { "epoch": 0.86, "grad_norm": 1.2175696161197458, "learning_rate": 4.816024741582593e-07, "loss": 0.1542, "step": 12070 }, { "epoch": 0.86, "grad_norm": 1.2580143874975975, "learning_rate": 4.811065557034927e-07, "loss": 0.1672, "step": 12071 }, { "epoch": 0.86, "grad_norm": 1.4382745973561353, "learning_rate": 4.806108798049752e-07, "loss": 0.1982, "step": 12072 }, { "epoch": 0.86, "grad_norm": 1.1003844880306466, "learning_rate": 4.801154464893098e-07, "loss": 0.1419, "step": 12073 }, { "epoch": 0.86, "grad_norm": 1.3682629936777522, "learning_rate": 4.796202557830914e-07, "loss": 0.1543, "step": 12074 }, { "epoch": 0.86, "grad_norm": 1.3061279550821403, "learning_rate": 4.791253077128988e-07, "loss": 0.1545, "step": 12075 }, { "epoch": 0.86, "grad_norm": 5.872187046039576, "learning_rate": 4.786306023053e-07, "loss": 0.5414, "step": 12076 }, { "epoch": 0.86, "grad_norm": 6.034567788464287, "learning_rate": 4.781361395868467e-07, "loss": 0.4311, "step": 12077 }, { "epoch": 0.86, "grad_norm": 1.2660147875931582, "learning_rate": 4.776419195840809e-07, "loss": 0.1669, "step": 12078 }, { "epoch": 0.86, "grad_norm": 1.2303544936107844, "learning_rate": 4.771479423235309e-07, "loss": 0.1619, "step": 12079 }, { "epoch": 0.86, "grad_norm": 1.4749718448657627, "learning_rate": 4.766542078317121e-07, "loss": 0.2042, "step": 12080 }, { "epoch": 0.86, "grad_norm": 1.3076443156159239, "learning_rate": 4.7616071613512394e-07, "loss": 0.1586, "step": 12081 }, { "epoch": 0.86, "grad_norm": 1.388036453829973, "learning_rate": 4.756674672602574e-07, "loss": 0.1478, "step": 12082 }, { "epoch": 0.86, "grad_norm": 1.251372578781535, "learning_rate": 4.7517446123358566e-07, "loss": 0.1719, "step": 12083 }, { "epoch": 0.86, "grad_norm": 1.1686357435779169, "learning_rate": 4.746816980815738e-07, "loss": 0.1228, "step": 12084 }, { "epoch": 0.86, "grad_norm": 1.2481857885447187, "learning_rate": 4.7418917783066956e-07, "loss": 0.1815, "step": 12085 }, { "epoch": 0.86, "grad_norm": 1.4546438282829435, "learning_rate": 4.736969005073105e-07, "loss": 0.1735, "step": 12086 }, { "epoch": 0.86, "grad_norm": 1.2393235951392096, "learning_rate": 4.7320486613791904e-07, "loss": 0.1515, "step": 12087 }, { "epoch": 0.86, "grad_norm": 1.4670446776653145, "learning_rate": 4.727130747489067e-07, "loss": 0.1651, "step": 12088 }, { "epoch": 0.86, "grad_norm": 1.346087553083383, "learning_rate": 4.722215263666702e-07, "loss": 0.1577, "step": 12089 }, { "epoch": 0.86, "grad_norm": 1.4215527901831406, "learning_rate": 4.717302210175956e-07, "loss": 0.1732, "step": 12090 }, { "epoch": 0.86, "grad_norm": 1.2358803005810628, "learning_rate": 4.712391587280518e-07, "loss": 0.1483, "step": 12091 }, { "epoch": 0.86, "grad_norm": 1.212986776261093, "learning_rate": 4.707483395243989e-07, "loss": 0.1601, "step": 12092 }, { "epoch": 0.87, "grad_norm": 1.188765331400923, "learning_rate": 4.7025776343298023e-07, "loss": 0.1384, "step": 12093 }, { "epoch": 0.87, "grad_norm": 1.4146078484558355, "learning_rate": 4.697674304801297e-07, "loss": 0.2004, "step": 12094 }, { "epoch": 0.87, "grad_norm": 1.2555503325213395, "learning_rate": 4.6927734069216633e-07, "loss": 0.1341, "step": 12095 }, { "epoch": 0.87, "grad_norm": 1.3997454774395999, "learning_rate": 4.6878749409539515e-07, "loss": 0.2178, "step": 12096 }, { "epoch": 0.87, "grad_norm": 1.3440107209857164, "learning_rate": 4.6829789071610955e-07, "loss": 0.1355, "step": 12097 }, { "epoch": 0.87, "grad_norm": 1.3818965012018785, "learning_rate": 4.678085305805896e-07, "loss": 0.1654, "step": 12098 }, { "epoch": 0.87, "grad_norm": 1.2401974217328104, "learning_rate": 4.6731941371510324e-07, "loss": 0.2002, "step": 12099 }, { "epoch": 0.87, "grad_norm": 1.3624157940766028, "learning_rate": 4.668305401459022e-07, "loss": 0.1627, "step": 12100 }, { "epoch": 0.87, "grad_norm": 4.141679817638267, "learning_rate": 4.6634190989922947e-07, "loss": 0.537, "step": 12101 }, { "epoch": 0.87, "grad_norm": 1.5026080738426155, "learning_rate": 4.6585352300131113e-07, "loss": 0.1778, "step": 12102 }, { "epoch": 0.87, "grad_norm": 1.193631947611065, "learning_rate": 4.6536537947836356e-07, "loss": 0.1548, "step": 12103 }, { "epoch": 0.87, "grad_norm": 1.4887071873815263, "learning_rate": 4.6487747935658576e-07, "loss": 0.1846, "step": 12104 }, { "epoch": 0.87, "grad_norm": 1.5976807457233757, "learning_rate": 4.6438982266216793e-07, "loss": 0.1856, "step": 12105 }, { "epoch": 0.87, "grad_norm": 1.4101931936809016, "learning_rate": 4.6390240942128573e-07, "loss": 0.2156, "step": 12106 }, { "epoch": 0.87, "grad_norm": 1.3742170074788973, "learning_rate": 4.634152396601016e-07, "loss": 0.1803, "step": 12107 }, { "epoch": 0.87, "grad_norm": 1.352882633235185, "learning_rate": 4.6292831340476406e-07, "loss": 0.1483, "step": 12108 }, { "epoch": 0.87, "grad_norm": 1.4546025549351913, "learning_rate": 4.6244163068141057e-07, "loss": 0.1453, "step": 12109 }, { "epoch": 0.87, "grad_norm": 1.33421094366108, "learning_rate": 4.61955191516163e-07, "loss": 0.1662, "step": 12110 }, { "epoch": 0.87, "grad_norm": 1.4363937926170578, "learning_rate": 4.614689959351326e-07, "loss": 0.1947, "step": 12111 }, { "epoch": 0.87, "grad_norm": 1.4674549401563293, "learning_rate": 4.6098304396441474e-07, "loss": 0.2094, "step": 12112 }, { "epoch": 0.87, "grad_norm": 1.5929176435326713, "learning_rate": 4.6049733563009515e-07, "loss": 0.1854, "step": 12113 }, { "epoch": 0.87, "grad_norm": 1.3610287350878238, "learning_rate": 4.600118709582435e-07, "loss": 0.1604, "step": 12114 }, { "epoch": 0.87, "grad_norm": 1.3249234947239992, "learning_rate": 4.595266499749196e-07, "loss": 0.1686, "step": 12115 }, { "epoch": 0.87, "grad_norm": 6.822404599675312, "learning_rate": 4.5904167270616527e-07, "loss": 0.4698, "step": 12116 }, { "epoch": 0.87, "grad_norm": 1.2964428425537264, "learning_rate": 4.5855693917801537e-07, "loss": 0.1649, "step": 12117 }, { "epoch": 0.87, "grad_norm": 1.2477238355064513, "learning_rate": 4.580724494164851e-07, "loss": 0.1611, "step": 12118 }, { "epoch": 0.87, "grad_norm": 1.3805966659786733, "learning_rate": 4.5758820344758317e-07, "loss": 0.1944, "step": 12119 }, { "epoch": 0.87, "grad_norm": 1.4177783647063877, "learning_rate": 4.571042012972993e-07, "loss": 0.1699, "step": 12120 }, { "epoch": 0.87, "grad_norm": 1.3519721452161142, "learning_rate": 4.566204429916138e-07, "loss": 0.1789, "step": 12121 }, { "epoch": 0.87, "grad_norm": 1.2065207392224284, "learning_rate": 4.5613692855649316e-07, "loss": 0.138, "step": 12122 }, { "epoch": 0.87, "grad_norm": 1.5021114733730614, "learning_rate": 4.556536580178911e-07, "loss": 0.1909, "step": 12123 }, { "epoch": 0.87, "grad_norm": 1.2686956146474246, "learning_rate": 4.551706314017462e-07, "loss": 0.1824, "step": 12124 }, { "epoch": 0.87, "grad_norm": 1.3342065932239426, "learning_rate": 4.5468784873398733e-07, "loss": 0.1406, "step": 12125 }, { "epoch": 0.87, "grad_norm": 1.1378828874411688, "learning_rate": 4.5420531004052584e-07, "loss": 0.156, "step": 12126 }, { "epoch": 0.87, "grad_norm": 1.4866697701096736, "learning_rate": 4.537230153472638e-07, "loss": 0.1755, "step": 12127 }, { "epoch": 0.87, "grad_norm": 1.2237092356301258, "learning_rate": 4.5324096468009006e-07, "loss": 0.1278, "step": 12128 }, { "epoch": 0.87, "grad_norm": 1.2340765133303346, "learning_rate": 4.527591580648771e-07, "loss": 0.1543, "step": 12129 }, { "epoch": 0.87, "grad_norm": 1.175850339966986, "learning_rate": 4.5227759552748716e-07, "loss": 0.1296, "step": 12130 }, { "epoch": 0.87, "grad_norm": 1.2948577160880215, "learning_rate": 4.5179627709376893e-07, "loss": 0.1857, "step": 12131 }, { "epoch": 0.87, "grad_norm": 1.157772732853982, "learning_rate": 4.5131520278955785e-07, "loss": 0.156, "step": 12132 }, { "epoch": 0.87, "grad_norm": 1.1985947240732575, "learning_rate": 4.5083437264067555e-07, "loss": 0.1598, "step": 12133 }, { "epoch": 0.87, "grad_norm": 1.3342352506877397, "learning_rate": 4.503537866729318e-07, "loss": 0.1823, "step": 12134 }, { "epoch": 0.87, "grad_norm": 1.1872918917230555, "learning_rate": 4.4987344491212117e-07, "loss": 0.0958, "step": 12135 }, { "epoch": 0.87, "grad_norm": 5.798849286541649, "learning_rate": 4.493933473840284e-07, "loss": 0.4926, "step": 12136 }, { "epoch": 0.87, "grad_norm": 1.2694586966163712, "learning_rate": 4.4891349411442074e-07, "loss": 0.1614, "step": 12137 }, { "epoch": 0.87, "grad_norm": 1.2484985522759429, "learning_rate": 4.484338851290565e-07, "loss": 0.1433, "step": 12138 }, { "epoch": 0.87, "grad_norm": 1.4386659174564058, "learning_rate": 4.479545204536789e-07, "loss": 0.1483, "step": 12139 }, { "epoch": 0.87, "grad_norm": 1.4166666368515797, "learning_rate": 4.4747540011401913e-07, "loss": 0.1854, "step": 12140 }, { "epoch": 0.87, "grad_norm": 1.198211637051815, "learning_rate": 4.469965241357932e-07, "loss": 0.1526, "step": 12141 }, { "epoch": 0.87, "grad_norm": 1.3860905054221737, "learning_rate": 4.465178925447061e-07, "loss": 0.1931, "step": 12142 }, { "epoch": 0.87, "grad_norm": 5.246253008483393, "learning_rate": 4.460395053664479e-07, "loss": 0.469, "step": 12143 }, { "epoch": 0.87, "grad_norm": 1.4453350166592382, "learning_rate": 4.45561362626698e-07, "loss": 0.1603, "step": 12144 }, { "epoch": 0.87, "grad_norm": 5.169594590585564, "learning_rate": 4.450834643511193e-07, "loss": 0.7001, "step": 12145 }, { "epoch": 0.87, "grad_norm": 7.666100207694078, "learning_rate": 4.446058105653644e-07, "loss": 0.7823, "step": 12146 }, { "epoch": 0.87, "grad_norm": 1.6227531398697075, "learning_rate": 4.4412840129507194e-07, "loss": 0.167, "step": 12147 }, { "epoch": 0.87, "grad_norm": 1.2583613161113556, "learning_rate": 4.4365123656586853e-07, "loss": 0.1375, "step": 12148 }, { "epoch": 0.87, "grad_norm": 1.2330254891650425, "learning_rate": 4.431743164033647e-07, "loss": 0.179, "step": 12149 }, { "epoch": 0.87, "grad_norm": 1.3854823515773331, "learning_rate": 4.426976408331607e-07, "loss": 0.1765, "step": 12150 }, { "epoch": 0.87, "grad_norm": 1.2045150808063168, "learning_rate": 4.422212098808415e-07, "loss": 0.1384, "step": 12151 }, { "epoch": 0.87, "grad_norm": 1.2721965538461268, "learning_rate": 4.417450235719811e-07, "loss": 0.1426, "step": 12152 }, { "epoch": 0.87, "grad_norm": 1.2991215730071053, "learning_rate": 4.4126908193213856e-07, "loss": 0.1698, "step": 12153 }, { "epoch": 0.87, "grad_norm": 1.323325836148321, "learning_rate": 4.4079338498686063e-07, "loss": 0.1383, "step": 12154 }, { "epoch": 0.87, "grad_norm": 1.4289252337746643, "learning_rate": 4.403179327616808e-07, "loss": 0.1432, "step": 12155 }, { "epoch": 0.87, "grad_norm": 13.01925522703635, "learning_rate": 4.3984272528212077e-07, "loss": 0.6366, "step": 12156 }, { "epoch": 0.87, "grad_norm": 1.2177147900210736, "learning_rate": 4.3936776257368586e-07, "loss": 0.1409, "step": 12157 }, { "epoch": 0.87, "grad_norm": 1.5411701783366438, "learning_rate": 4.3889304466187055e-07, "loss": 0.1927, "step": 12158 }, { "epoch": 0.87, "grad_norm": 1.2327111905230808, "learning_rate": 4.384185715721578e-07, "loss": 0.1541, "step": 12159 }, { "epoch": 0.87, "grad_norm": 1.306575622650174, "learning_rate": 4.379443433300129e-07, "loss": 0.1449, "step": 12160 }, { "epoch": 0.87, "grad_norm": 7.03960602568097, "learning_rate": 4.3747035996089206e-07, "loss": 0.4088, "step": 12161 }, { "epoch": 0.87, "grad_norm": 14.100764281235646, "learning_rate": 4.36996621490236e-07, "loss": 0.6425, "step": 12162 }, { "epoch": 0.87, "grad_norm": 1.550904632861906, "learning_rate": 4.3652312794347284e-07, "loss": 0.1977, "step": 12163 }, { "epoch": 0.87, "grad_norm": 1.2751742127626762, "learning_rate": 4.360498793460188e-07, "loss": 0.1552, "step": 12164 }, { "epoch": 0.87, "grad_norm": 13.055919294893094, "learning_rate": 4.3557687572327644e-07, "loss": 0.5961, "step": 12165 }, { "epoch": 0.87, "grad_norm": 1.4806812405444445, "learning_rate": 4.3510411710063316e-07, "loss": 0.1692, "step": 12166 }, { "epoch": 0.87, "grad_norm": 4.998049834843293, "learning_rate": 4.3463160350346644e-07, "loss": 0.4372, "step": 12167 }, { "epoch": 0.87, "grad_norm": 1.3366537001986256, "learning_rate": 4.341593349571366e-07, "loss": 0.1555, "step": 12168 }, { "epoch": 0.87, "grad_norm": 1.4205419887384076, "learning_rate": 4.3368731148699604e-07, "loss": 0.1699, "step": 12169 }, { "epoch": 0.87, "grad_norm": 1.4155081990278664, "learning_rate": 4.3321553311837847e-07, "loss": 0.1567, "step": 12170 }, { "epoch": 0.87, "grad_norm": 1.3087785368213412, "learning_rate": 4.327439998766081e-07, "loss": 0.1866, "step": 12171 }, { "epoch": 0.87, "grad_norm": 1.4719709901256772, "learning_rate": 4.322727117869951e-07, "loss": 0.1528, "step": 12172 }, { "epoch": 0.87, "grad_norm": 1.329574533977938, "learning_rate": 4.3180166887483723e-07, "loss": 0.1544, "step": 12173 }, { "epoch": 0.87, "grad_norm": 1.351566962012528, "learning_rate": 4.313308711654163e-07, "loss": 0.1771, "step": 12174 }, { "epoch": 0.87, "grad_norm": 7.748832165787509, "learning_rate": 4.308603186840049e-07, "loss": 0.5493, "step": 12175 }, { "epoch": 0.87, "grad_norm": 1.328431481581902, "learning_rate": 4.3039001145585843e-07, "loss": 0.1639, "step": 12176 }, { "epoch": 0.87, "grad_norm": 16.261083758165544, "learning_rate": 4.299199495062234e-07, "loss": 0.366, "step": 12177 }, { "epoch": 0.87, "grad_norm": 1.2546935342771806, "learning_rate": 4.294501328603279e-07, "loss": 0.1681, "step": 12178 }, { "epoch": 0.87, "grad_norm": 1.24761627420577, "learning_rate": 4.2898056154339174e-07, "loss": 0.1658, "step": 12179 }, { "epoch": 0.87, "grad_norm": 1.4273639524174486, "learning_rate": 4.2851123558061927e-07, "loss": 0.2006, "step": 12180 }, { "epoch": 0.87, "grad_norm": 1.2367723589706219, "learning_rate": 4.2804215499720304e-07, "loss": 0.1719, "step": 12181 }, { "epoch": 0.87, "grad_norm": 9.776615709933504, "learning_rate": 4.275733198183196e-07, "loss": 0.4312, "step": 12182 }, { "epoch": 0.87, "grad_norm": 1.1599368773771237, "learning_rate": 4.271047300691361e-07, "loss": 0.1534, "step": 12183 }, { "epoch": 0.87, "grad_norm": 1.2594040577272472, "learning_rate": 4.266363857748024e-07, "loss": 0.151, "step": 12184 }, { "epoch": 0.87, "grad_norm": 1.4163832549097195, "learning_rate": 4.2616828696045943e-07, "loss": 0.1687, "step": 12185 }, { "epoch": 0.87, "grad_norm": 1.2516124972585618, "learning_rate": 4.2570043365123105e-07, "loss": 0.1704, "step": 12186 }, { "epoch": 0.87, "grad_norm": 1.2640239092462386, "learning_rate": 4.2523282587223216e-07, "loss": 0.1232, "step": 12187 }, { "epoch": 0.87, "grad_norm": 1.200582172097428, "learning_rate": 4.2476546364855875e-07, "loss": 0.1745, "step": 12188 }, { "epoch": 0.87, "grad_norm": 1.3052542189004455, "learning_rate": 4.2429834700529914e-07, "loss": 0.1988, "step": 12189 }, { "epoch": 0.87, "grad_norm": 1.3091665092389297, "learning_rate": 4.238314759675266e-07, "loss": 0.1619, "step": 12190 }, { "epoch": 0.87, "grad_norm": 1.3700322187550933, "learning_rate": 4.233648505602994e-07, "loss": 0.1994, "step": 12191 }, { "epoch": 0.87, "grad_norm": 1.7742948656314352, "learning_rate": 4.228984708086664e-07, "loss": 0.1447, "step": 12192 }, { "epoch": 0.87, "grad_norm": 1.2091943714488227, "learning_rate": 4.224323367376587e-07, "loss": 0.1689, "step": 12193 }, { "epoch": 0.87, "grad_norm": 5.5959588072957525, "learning_rate": 4.219664483722979e-07, "loss": 0.738, "step": 12194 }, { "epoch": 0.87, "grad_norm": 1.1875479298262428, "learning_rate": 4.215008057375902e-07, "loss": 0.1281, "step": 12195 }, { "epoch": 0.87, "grad_norm": 1.2140445212908904, "learning_rate": 4.2103540885853055e-07, "loss": 0.1431, "step": 12196 }, { "epoch": 0.87, "grad_norm": 1.4146790789458403, "learning_rate": 4.2057025776009787e-07, "loss": 0.1467, "step": 12197 }, { "epoch": 0.87, "grad_norm": 1.3520571174870055, "learning_rate": 4.20105352467261e-07, "loss": 0.1601, "step": 12198 }, { "epoch": 0.87, "grad_norm": 1.2280093935850074, "learning_rate": 4.1964069300497345e-07, "loss": 0.1563, "step": 12199 }, { "epoch": 0.87, "grad_norm": 1.4581271619862008, "learning_rate": 4.1917627939817793e-07, "loss": 0.1659, "step": 12200 }, { "epoch": 0.87, "grad_norm": 1.557918726414298, "learning_rate": 4.1871211167180015e-07, "loss": 0.1643, "step": 12201 }, { "epoch": 0.87, "grad_norm": 1.251951033704364, "learning_rate": 4.182481898507562e-07, "loss": 0.158, "step": 12202 }, { "epoch": 0.87, "grad_norm": 1.3338568506650184, "learning_rate": 4.1778451395994677e-07, "loss": 0.1909, "step": 12203 }, { "epoch": 0.87, "grad_norm": 1.175865882056589, "learning_rate": 4.1732108402426087e-07, "loss": 0.1349, "step": 12204 }, { "epoch": 0.87, "grad_norm": 1.3227403096633223, "learning_rate": 4.168579000685724e-07, "loss": 0.1537, "step": 12205 }, { "epoch": 0.87, "grad_norm": 1.4735030953649493, "learning_rate": 4.1639496211774434e-07, "loss": 0.1797, "step": 12206 }, { "epoch": 0.87, "grad_norm": 1.2156417629751475, "learning_rate": 4.1593227019662453e-07, "loss": 0.1553, "step": 12207 }, { "epoch": 0.87, "grad_norm": 1.373289203466091, "learning_rate": 4.1546982433005036e-07, "loss": 0.1936, "step": 12208 }, { "epoch": 0.87, "grad_norm": 1.3709082679424485, "learning_rate": 4.1500762454284136e-07, "loss": 0.1421, "step": 12209 }, { "epoch": 0.87, "grad_norm": 1.416322985715025, "learning_rate": 4.145456708598089e-07, "loss": 0.1563, "step": 12210 }, { "epoch": 0.87, "grad_norm": 1.3632375154018952, "learning_rate": 4.1408396330574643e-07, "loss": 0.1587, "step": 12211 }, { "epoch": 0.87, "grad_norm": 1.3615236367905432, "learning_rate": 4.136225019054391e-07, "loss": 0.1705, "step": 12212 }, { "epoch": 0.87, "grad_norm": 1.323329634548248, "learning_rate": 4.1316128668365483e-07, "loss": 0.1623, "step": 12213 }, { "epoch": 0.87, "grad_norm": 5.285195743266254, "learning_rate": 4.1270031766514953e-07, "loss": 0.4025, "step": 12214 }, { "epoch": 0.87, "grad_norm": 1.244070687686029, "learning_rate": 4.1223959487466713e-07, "loss": 0.1362, "step": 12215 }, { "epoch": 0.87, "grad_norm": 1.200025885286412, "learning_rate": 4.117791183369374e-07, "loss": 0.1452, "step": 12216 }, { "epoch": 0.87, "grad_norm": 1.2431502237774237, "learning_rate": 4.1131888807667607e-07, "loss": 0.1436, "step": 12217 }, { "epoch": 0.87, "grad_norm": 1.2384751176492876, "learning_rate": 4.1085890411858785e-07, "loss": 0.1558, "step": 12218 }, { "epoch": 0.87, "grad_norm": 1.3940139174592552, "learning_rate": 4.1039916648736133e-07, "loss": 0.1792, "step": 12219 }, { "epoch": 0.87, "grad_norm": 1.3904645131808844, "learning_rate": 4.0993967520767455e-07, "loss": 0.1792, "step": 12220 }, { "epoch": 0.87, "grad_norm": 12.457294363268492, "learning_rate": 4.094804303041899e-07, "loss": 0.7482, "step": 12221 }, { "epoch": 0.87, "grad_norm": 1.5941108175601084, "learning_rate": 4.090214318015584e-07, "loss": 0.204, "step": 12222 }, { "epoch": 0.87, "grad_norm": 1.3024176362045392, "learning_rate": 4.0856267972441797e-07, "loss": 0.1675, "step": 12223 }, { "epoch": 0.87, "grad_norm": 1.4440827087074304, "learning_rate": 4.081041740973918e-07, "loss": 0.1441, "step": 12224 }, { "epoch": 0.87, "grad_norm": 1.2829166128395488, "learning_rate": 4.0764591494509176e-07, "loss": 0.1669, "step": 12225 }, { "epoch": 0.87, "grad_norm": 1.2300453122373085, "learning_rate": 4.071879022921138e-07, "loss": 0.1433, "step": 12226 }, { "epoch": 0.87, "grad_norm": 1.4664215629290163, "learning_rate": 4.0673013616304326e-07, "loss": 0.1513, "step": 12227 }, { "epoch": 0.87, "grad_norm": 1.416449360875045, "learning_rate": 4.062726165824504e-07, "loss": 0.19, "step": 12228 }, { "epoch": 0.87, "grad_norm": 1.3411980862560648, "learning_rate": 4.058153435748946e-07, "loss": 0.1809, "step": 12229 }, { "epoch": 0.87, "grad_norm": 6.892496377199309, "learning_rate": 4.053583171649189e-07, "loss": 0.6597, "step": 12230 }, { "epoch": 0.87, "grad_norm": 11.590427617530558, "learning_rate": 4.0490153737705486e-07, "loss": 0.5039, "step": 12231 }, { "epoch": 0.87, "grad_norm": 7.205068764583334, "learning_rate": 4.0444500423582057e-07, "loss": 0.5764, "step": 12232 }, { "epoch": 0.88, "grad_norm": 1.331187991214813, "learning_rate": 4.0398871776572256e-07, "loss": 0.175, "step": 12233 }, { "epoch": 0.88, "grad_norm": 1.5120773480108451, "learning_rate": 4.035326779912502e-07, "loss": 0.1504, "step": 12234 }, { "epoch": 0.88, "grad_norm": 1.3599119941773794, "learning_rate": 4.0307688493688377e-07, "loss": 0.1944, "step": 12235 }, { "epoch": 0.88, "grad_norm": 1.3876240506926127, "learning_rate": 4.026213386270866e-07, "loss": 0.1893, "step": 12236 }, { "epoch": 0.88, "grad_norm": 8.298959646081002, "learning_rate": 4.021660390863125e-07, "loss": 0.5796, "step": 12237 }, { "epoch": 0.88, "grad_norm": 1.231696624177417, "learning_rate": 4.017109863389984e-07, "loss": 0.1365, "step": 12238 }, { "epoch": 0.88, "grad_norm": 10.476024619507395, "learning_rate": 4.012561804095705e-07, "loss": 0.6054, "step": 12239 }, { "epoch": 0.88, "grad_norm": 1.3826891841009685, "learning_rate": 4.008016213224408e-07, "loss": 0.1911, "step": 12240 }, { "epoch": 0.88, "grad_norm": 1.3798521154519907, "learning_rate": 4.003473091020094e-07, "loss": 0.172, "step": 12241 }, { "epoch": 0.88, "grad_norm": 1.1508323425007327, "learning_rate": 3.9989324377266046e-07, "loss": 0.1457, "step": 12242 }, { "epoch": 0.88, "grad_norm": 1.2192145747985534, "learning_rate": 3.9943942535876744e-07, "loss": 0.157, "step": 12243 }, { "epoch": 0.88, "grad_norm": 1.1033435724988767, "learning_rate": 3.98985853884688e-07, "loss": 0.1199, "step": 12244 }, { "epoch": 0.88, "grad_norm": 1.393878738043813, "learning_rate": 3.985325293747705e-07, "loss": 0.1764, "step": 12245 }, { "epoch": 0.88, "grad_norm": 1.4144657662132667, "learning_rate": 3.980794518533448e-07, "loss": 0.208, "step": 12246 }, { "epoch": 0.88, "grad_norm": 1.392002203773193, "learning_rate": 3.9762662134473163e-07, "loss": 0.1839, "step": 12247 }, { "epoch": 0.88, "grad_norm": 1.2444680437920506, "learning_rate": 3.971740378732375e-07, "loss": 0.1409, "step": 12248 }, { "epoch": 0.88, "grad_norm": 1.4910329498955077, "learning_rate": 3.967217014631558e-07, "loss": 0.1797, "step": 12249 }, { "epoch": 0.88, "grad_norm": 1.6977258899730152, "learning_rate": 3.9626961213876433e-07, "loss": 0.1954, "step": 12250 }, { "epoch": 0.88, "grad_norm": 7.676296993636367, "learning_rate": 3.9581776992433096e-07, "loss": 0.6332, "step": 12251 }, { "epoch": 0.88, "grad_norm": 1.3785641666925728, "learning_rate": 3.953661748441079e-07, "loss": 0.1758, "step": 12252 }, { "epoch": 0.88, "grad_norm": 1.2439100582036597, "learning_rate": 3.949148269223363e-07, "loss": 0.1449, "step": 12253 }, { "epoch": 0.88, "grad_norm": 1.2186158784700016, "learning_rate": 3.9446372618324014e-07, "loss": 0.13, "step": 12254 }, { "epoch": 0.88, "grad_norm": 7.493630704725571, "learning_rate": 3.9401287265103504e-07, "loss": 0.6286, "step": 12255 }, { "epoch": 0.88, "grad_norm": 5.647808266787209, "learning_rate": 3.9356226634992e-07, "loss": 0.5098, "step": 12256 }, { "epoch": 0.88, "grad_norm": 1.1761827044722613, "learning_rate": 3.9311190730408233e-07, "loss": 0.1508, "step": 12257 }, { "epoch": 0.88, "grad_norm": 1.4434775606417454, "learning_rate": 3.92661795537696e-07, "loss": 0.1656, "step": 12258 }, { "epoch": 0.88, "grad_norm": 1.3342629015040315, "learning_rate": 3.922119310749195e-07, "loss": 0.1635, "step": 12259 }, { "epoch": 0.88, "grad_norm": 1.3305860297153327, "learning_rate": 3.9176231393990183e-07, "loss": 0.1972, "step": 12260 }, { "epoch": 0.88, "grad_norm": 1.3772213508150388, "learning_rate": 3.913129441567748e-07, "loss": 0.1617, "step": 12261 }, { "epoch": 0.88, "grad_norm": 4.948450420518056, "learning_rate": 3.908638217496602e-07, "loss": 0.3538, "step": 12262 }, { "epoch": 0.88, "grad_norm": 1.262322033590518, "learning_rate": 3.904149467426638e-07, "loss": 0.1616, "step": 12263 }, { "epoch": 0.88, "grad_norm": 6.632738746918453, "learning_rate": 3.8996631915988015e-07, "loss": 0.647, "step": 12264 }, { "epoch": 0.88, "grad_norm": 1.3390360530271688, "learning_rate": 3.895179390253895e-07, "loss": 0.1816, "step": 12265 }, { "epoch": 0.88, "grad_norm": 1.433933385951657, "learning_rate": 3.8906980636326084e-07, "loss": 0.1772, "step": 12266 }, { "epoch": 0.88, "grad_norm": 1.4605461032137121, "learning_rate": 3.886219211975456e-07, "loss": 0.178, "step": 12267 }, { "epoch": 0.88, "grad_norm": 1.3999869830628127, "learning_rate": 3.8817428355228617e-07, "loss": 0.1605, "step": 12268 }, { "epoch": 0.88, "grad_norm": 1.3918101029623047, "learning_rate": 3.8772689345150885e-07, "loss": 0.1365, "step": 12269 }, { "epoch": 0.88, "grad_norm": 1.2761329244596857, "learning_rate": 3.8727975091922885e-07, "loss": 0.1666, "step": 12270 }, { "epoch": 0.88, "grad_norm": 1.2871996959620147, "learning_rate": 3.8683285597944597e-07, "loss": 0.1684, "step": 12271 }, { "epoch": 0.88, "grad_norm": 1.131599000441692, "learning_rate": 3.8638620865614816e-07, "loss": 0.1772, "step": 12272 }, { "epoch": 0.88, "grad_norm": 1.2263705961535334, "learning_rate": 3.8593980897330964e-07, "loss": 0.1502, "step": 12273 }, { "epoch": 0.88, "grad_norm": 4.5382787792814065, "learning_rate": 3.854936569548923e-07, "loss": 0.5408, "step": 12274 }, { "epoch": 0.88, "grad_norm": 1.2366509309891942, "learning_rate": 3.850477526248425e-07, "loss": 0.1883, "step": 12275 }, { "epoch": 0.88, "grad_norm": 1.4034854012144553, "learning_rate": 3.846020960070956e-07, "loss": 0.1532, "step": 12276 }, { "epoch": 0.88, "grad_norm": 1.4540425842688196, "learning_rate": 3.8415668712557144e-07, "loss": 0.2122, "step": 12277 }, { "epoch": 0.88, "grad_norm": 1.4070341664050143, "learning_rate": 3.837115260041796e-07, "loss": 0.1591, "step": 12278 }, { "epoch": 0.88, "grad_norm": 1.2806264333576889, "learning_rate": 3.832666126668122e-07, "loss": 0.1637, "step": 12279 }, { "epoch": 0.88, "grad_norm": 1.4815025974692486, "learning_rate": 3.8282194713735286e-07, "loss": 0.1688, "step": 12280 }, { "epoch": 0.88, "grad_norm": 1.5266397018111026, "learning_rate": 3.823775294396664e-07, "loss": 0.1791, "step": 12281 }, { "epoch": 0.88, "grad_norm": 1.5880528232541373, "learning_rate": 3.819333595976116e-07, "loss": 0.2031, "step": 12282 }, { "epoch": 0.88, "grad_norm": 1.228769227001511, "learning_rate": 3.814894376350259e-07, "loss": 0.143, "step": 12283 }, { "epoch": 0.88, "grad_norm": 1.5483599195029787, "learning_rate": 3.8104576357573975e-07, "loss": 0.1913, "step": 12284 }, { "epoch": 0.88, "grad_norm": 1.5119780247374408, "learning_rate": 3.8060233744356634e-07, "loss": 0.181, "step": 12285 }, { "epoch": 0.88, "grad_norm": 11.331125935441687, "learning_rate": 3.8015915926230774e-07, "loss": 0.4936, "step": 12286 }, { "epoch": 0.88, "grad_norm": 1.3576696371095958, "learning_rate": 3.797162290557521e-07, "loss": 0.1656, "step": 12287 }, { "epoch": 0.88, "grad_norm": 9.668643520280467, "learning_rate": 3.7927354684767323e-07, "loss": 0.4131, "step": 12288 }, { "epoch": 0.88, "grad_norm": 1.4170356259058676, "learning_rate": 3.7883111266183435e-07, "loss": 0.1774, "step": 12289 }, { "epoch": 0.88, "grad_norm": 7.103271131856995, "learning_rate": 3.7838892652198036e-07, "loss": 0.568, "step": 12290 }, { "epoch": 0.88, "grad_norm": 1.2053350626947632, "learning_rate": 3.7794698845184994e-07, "loss": 0.1617, "step": 12291 }, { "epoch": 0.88, "grad_norm": 1.3965087454268106, "learning_rate": 3.77505298475162e-07, "loss": 0.1764, "step": 12292 }, { "epoch": 0.88, "grad_norm": 1.2970032203006339, "learning_rate": 3.7706385661562596e-07, "loss": 0.1693, "step": 12293 }, { "epoch": 0.88, "grad_norm": 1.4355161302530306, "learning_rate": 3.766226628969355e-07, "loss": 0.1895, "step": 12294 }, { "epoch": 0.88, "grad_norm": 1.442672401575189, "learning_rate": 3.7618171734277396e-07, "loss": 0.1612, "step": 12295 }, { "epoch": 0.88, "grad_norm": 1.5211361710003917, "learning_rate": 3.757410199768069e-07, "loss": 0.2211, "step": 12296 }, { "epoch": 0.88, "grad_norm": 1.1370303397238015, "learning_rate": 3.75300570822692e-07, "loss": 0.137, "step": 12297 }, { "epoch": 0.88, "grad_norm": 1.3717828731318171, "learning_rate": 3.7486036990406873e-07, "loss": 0.152, "step": 12298 }, { "epoch": 0.88, "grad_norm": 9.13957036649532, "learning_rate": 3.744204172445653e-07, "loss": 0.5733, "step": 12299 }, { "epoch": 0.88, "grad_norm": 1.3861206028774824, "learning_rate": 3.739807128677986e-07, "loss": 0.1615, "step": 12300 }, { "epoch": 0.88, "grad_norm": 1.3022870386754126, "learning_rate": 3.7354125679736897e-07, "loss": 0.157, "step": 12301 }, { "epoch": 0.88, "grad_norm": 1.1157176515128262, "learning_rate": 3.7310204905686433e-07, "loss": 0.1351, "step": 12302 }, { "epoch": 0.88, "grad_norm": 1.3918472910864714, "learning_rate": 3.7266308966986074e-07, "loss": 0.1835, "step": 12303 }, { "epoch": 0.88, "grad_norm": 1.4909868167616538, "learning_rate": 3.722243786599178e-07, "loss": 0.1998, "step": 12304 }, { "epoch": 0.88, "grad_norm": 1.280873808077874, "learning_rate": 3.717859160505866e-07, "loss": 0.1512, "step": 12305 }, { "epoch": 0.88, "grad_norm": 1.418460877360948, "learning_rate": 3.7134770186539935e-07, "loss": 0.1757, "step": 12306 }, { "epoch": 0.88, "grad_norm": 4.9956190497297035, "learning_rate": 3.709097361278785e-07, "loss": 0.5067, "step": 12307 }, { "epoch": 0.88, "grad_norm": 1.276270539250999, "learning_rate": 3.7047201886153296e-07, "loss": 0.1484, "step": 12308 }, { "epoch": 0.88, "grad_norm": 4.73469025182058, "learning_rate": 3.700345500898583e-07, "loss": 0.4954, "step": 12309 }, { "epoch": 0.88, "grad_norm": 1.2986115732855688, "learning_rate": 3.695973298363337e-07, "loss": 0.1285, "step": 12310 }, { "epoch": 0.88, "grad_norm": 5.263406964778152, "learning_rate": 3.691603581244302e-07, "loss": 0.5711, "step": 12311 }, { "epoch": 0.88, "grad_norm": 1.9668219167265268, "learning_rate": 3.687236349776002e-07, "loss": 0.1906, "step": 12312 }, { "epoch": 0.88, "grad_norm": 1.3439456633990847, "learning_rate": 3.6828716041928723e-07, "loss": 0.1873, "step": 12313 }, { "epoch": 0.88, "grad_norm": 1.2352730566493975, "learning_rate": 3.678509344729181e-07, "loss": 0.1593, "step": 12314 }, { "epoch": 0.88, "grad_norm": 1.2483539040552933, "learning_rate": 3.674149571619079e-07, "loss": 0.1687, "step": 12315 }, { "epoch": 0.88, "grad_norm": 7.635784358384336, "learning_rate": 3.6697922850965906e-07, "loss": 0.5316, "step": 12316 }, { "epoch": 0.88, "grad_norm": 1.3312229282458148, "learning_rate": 3.6654374853955844e-07, "loss": 0.1733, "step": 12317 }, { "epoch": 0.88, "grad_norm": 1.4175391552464351, "learning_rate": 3.6610851727498343e-07, "loss": 0.1654, "step": 12318 }, { "epoch": 0.88, "grad_norm": 1.3066140238651576, "learning_rate": 3.65673534739292e-07, "loss": 0.141, "step": 12319 }, { "epoch": 0.88, "grad_norm": 1.0928493779892046, "learning_rate": 3.6523880095583554e-07, "loss": 0.1452, "step": 12320 }, { "epoch": 0.88, "grad_norm": 1.358086193285075, "learning_rate": 3.648043159479459e-07, "loss": 0.1878, "step": 12321 }, { "epoch": 0.88, "grad_norm": 1.373036638653411, "learning_rate": 3.643700797389471e-07, "loss": 0.1705, "step": 12322 }, { "epoch": 0.88, "grad_norm": 1.294043813270681, "learning_rate": 3.6393609235214513e-07, "loss": 0.2, "step": 12323 }, { "epoch": 0.88, "grad_norm": 1.3459247005783943, "learning_rate": 3.6350235381083563e-07, "loss": 0.1836, "step": 12324 }, { "epoch": 0.88, "grad_norm": 1.4733115237766263, "learning_rate": 3.630688641383001e-07, "loss": 0.1675, "step": 12325 }, { "epoch": 0.88, "grad_norm": 1.2128790661257387, "learning_rate": 3.626356233578071e-07, "loss": 0.1361, "step": 12326 }, { "epoch": 0.88, "grad_norm": 1.49515009735866, "learning_rate": 3.622026314926097e-07, "loss": 0.1649, "step": 12327 }, { "epoch": 0.88, "grad_norm": 1.3636222685705321, "learning_rate": 3.6176988856595095e-07, "loss": 0.1702, "step": 12328 }, { "epoch": 0.88, "grad_norm": 1.348972954270638, "learning_rate": 3.6133739460105675e-07, "loss": 0.1772, "step": 12329 }, { "epoch": 0.88, "grad_norm": 1.2885129841539755, "learning_rate": 3.609051496211441e-07, "loss": 0.1929, "step": 12330 }, { "epoch": 0.88, "grad_norm": 1.4773227816967902, "learning_rate": 3.604731536494116e-07, "loss": 0.2028, "step": 12331 }, { "epoch": 0.88, "grad_norm": 1.4681410774497143, "learning_rate": 3.6004140670904853e-07, "loss": 0.1661, "step": 12332 }, { "epoch": 0.88, "grad_norm": 1.2093552741892046, "learning_rate": 3.596099088232291e-07, "loss": 0.1414, "step": 12333 }, { "epoch": 0.88, "grad_norm": 1.459888258865435, "learning_rate": 3.591786600151154e-07, "loss": 0.1731, "step": 12334 }, { "epoch": 0.88, "grad_norm": 1.47142068635466, "learning_rate": 3.5874766030785327e-07, "loss": 0.1858, "step": 12335 }, { "epoch": 0.88, "grad_norm": 6.239522511716702, "learning_rate": 3.583169097245792e-07, "loss": 0.7484, "step": 12336 }, { "epoch": 0.88, "grad_norm": 6.539485995845976, "learning_rate": 3.5788640828841205e-07, "loss": 0.5582, "step": 12337 }, { "epoch": 0.88, "grad_norm": 1.295000261721078, "learning_rate": 3.57456156022461e-07, "loss": 0.1604, "step": 12338 }, { "epoch": 0.88, "grad_norm": 1.2865617902982753, "learning_rate": 3.570261529498187e-07, "loss": 0.1805, "step": 12339 }, { "epoch": 0.88, "grad_norm": 1.2564835669419616, "learning_rate": 3.5659639909356725e-07, "loss": 0.1433, "step": 12340 }, { "epoch": 0.88, "grad_norm": 1.388334398910148, "learning_rate": 3.561668944767738e-07, "loss": 0.1787, "step": 12341 }, { "epoch": 0.88, "grad_norm": 1.196559703772662, "learning_rate": 3.5573763912249313e-07, "loss": 0.1585, "step": 12342 }, { "epoch": 0.88, "grad_norm": 7.342659361808913, "learning_rate": 3.553086330537647e-07, "loss": 0.5447, "step": 12343 }, { "epoch": 0.88, "grad_norm": 1.3256640899410945, "learning_rate": 3.5487987629361676e-07, "loss": 0.1379, "step": 12344 }, { "epoch": 0.88, "grad_norm": 1.3964545508681, "learning_rate": 3.544513688650625e-07, "loss": 0.1668, "step": 12345 }, { "epoch": 0.88, "grad_norm": 1.1967479338941163, "learning_rate": 3.540231107911041e-07, "loss": 0.1419, "step": 12346 }, { "epoch": 0.88, "grad_norm": 1.2371701858481159, "learning_rate": 3.5359510209472646e-07, "loss": 0.1421, "step": 12347 }, { "epoch": 0.88, "grad_norm": 1.3382407833683225, "learning_rate": 3.531673427989046e-07, "loss": 0.1747, "step": 12348 }, { "epoch": 0.88, "grad_norm": 1.5690017815328545, "learning_rate": 3.527398329265991e-07, "loss": 0.24, "step": 12349 }, { "epoch": 0.88, "grad_norm": 1.2353217629407596, "learning_rate": 3.523125725007565e-07, "loss": 0.1932, "step": 12350 }, { "epoch": 0.88, "grad_norm": 1.5311822924626806, "learning_rate": 3.5188556154431174e-07, "loss": 0.1802, "step": 12351 }, { "epoch": 0.88, "grad_norm": 1.2659777934358274, "learning_rate": 3.514588000801833e-07, "loss": 0.1538, "step": 12352 }, { "epoch": 0.88, "grad_norm": 1.2078433176260877, "learning_rate": 3.5103228813127944e-07, "loss": 0.1584, "step": 12353 }, { "epoch": 0.88, "grad_norm": 1.2115604846805677, "learning_rate": 3.506060257204924e-07, "loss": 0.1332, "step": 12354 }, { "epoch": 0.88, "grad_norm": 1.3624529911697794, "learning_rate": 3.5018001287070336e-07, "loss": 0.1769, "step": 12355 }, { "epoch": 0.88, "grad_norm": 1.273772827219816, "learning_rate": 3.497542496047779e-07, "loss": 0.1443, "step": 12356 }, { "epoch": 0.88, "grad_norm": 1.4465989168929785, "learning_rate": 3.493287359455694e-07, "loss": 0.233, "step": 12357 }, { "epoch": 0.88, "grad_norm": 7.912982676040247, "learning_rate": 3.4890347191591843e-07, "loss": 0.5574, "step": 12358 }, { "epoch": 0.88, "grad_norm": 1.4832753212448433, "learning_rate": 3.484784575386524e-07, "loss": 0.1867, "step": 12359 }, { "epoch": 0.88, "grad_norm": 1.4330048010668202, "learning_rate": 3.480536928365824e-07, "loss": 0.1871, "step": 12360 }, { "epoch": 0.88, "grad_norm": 1.3703808225425294, "learning_rate": 3.476291778325097e-07, "loss": 0.1639, "step": 12361 }, { "epoch": 0.88, "grad_norm": 1.2772496372971203, "learning_rate": 3.472049125492188e-07, "loss": 0.1357, "step": 12362 }, { "epoch": 0.88, "grad_norm": 1.149637745702199, "learning_rate": 3.4678089700948434e-07, "loss": 0.1473, "step": 12363 }, { "epoch": 0.88, "grad_norm": 13.172816609200712, "learning_rate": 3.4635713123606475e-07, "loss": 0.5569, "step": 12364 }, { "epoch": 0.88, "grad_norm": 4.9600074701916546, "learning_rate": 3.4593361525170623e-07, "loss": 0.5062, "step": 12365 }, { "epoch": 0.88, "grad_norm": 1.1533178814690603, "learning_rate": 3.455103490791417e-07, "loss": 0.1259, "step": 12366 }, { "epoch": 0.88, "grad_norm": 9.597160896139815, "learning_rate": 3.4508733274109087e-07, "loss": 0.6245, "step": 12367 }, { "epoch": 0.88, "grad_norm": 1.2706524349221564, "learning_rate": 3.4466456626025824e-07, "loss": 0.162, "step": 12368 }, { "epoch": 0.88, "grad_norm": 1.4204386749026654, "learning_rate": 3.4424204965933795e-07, "loss": 0.1549, "step": 12369 }, { "epoch": 0.88, "grad_norm": 1.4020773809171883, "learning_rate": 3.438197829610074e-07, "loss": 0.1515, "step": 12370 }, { "epoch": 0.88, "grad_norm": 1.3773032257163889, "learning_rate": 3.4339776618793344e-07, "loss": 0.1732, "step": 12371 }, { "epoch": 0.88, "grad_norm": 1.323601648147998, "learning_rate": 3.429759993627674e-07, "loss": 0.1584, "step": 12372 }, { "epoch": 0.89, "grad_norm": 1.3434001656029488, "learning_rate": 3.4255448250814784e-07, "loss": 0.1811, "step": 12373 }, { "epoch": 0.89, "grad_norm": 5.051427180575806, "learning_rate": 3.421332156467005e-07, "loss": 0.6618, "step": 12374 }, { "epoch": 0.89, "grad_norm": 1.2132310916298552, "learning_rate": 3.4171219880103857e-07, "loss": 0.1462, "step": 12375 }, { "epoch": 0.89, "grad_norm": 1.3358766688809016, "learning_rate": 3.4129143199375823e-07, "loss": 0.1476, "step": 12376 }, { "epoch": 0.89, "grad_norm": 1.4334694027367691, "learning_rate": 3.4087091524744706e-07, "loss": 0.1986, "step": 12377 }, { "epoch": 0.89, "grad_norm": 1.477152860097439, "learning_rate": 3.4045064858467426e-07, "loss": 0.2167, "step": 12378 }, { "epoch": 0.89, "grad_norm": 1.1574427847598912, "learning_rate": 3.40030632028e-07, "loss": 0.1471, "step": 12379 }, { "epoch": 0.89, "grad_norm": 1.1979415157825948, "learning_rate": 3.39610865599968e-07, "loss": 0.1523, "step": 12380 }, { "epoch": 0.89, "grad_norm": 1.3489180679672341, "learning_rate": 3.391913493231097e-07, "loss": 0.1605, "step": 12381 }, { "epoch": 0.89, "grad_norm": 6.203006222351786, "learning_rate": 3.3877208321994316e-07, "loss": 0.4289, "step": 12382 }, { "epoch": 0.89, "grad_norm": 1.348096782614331, "learning_rate": 3.3835306731297313e-07, "loss": 0.1652, "step": 12383 }, { "epoch": 0.89, "grad_norm": 5.765061639221064, "learning_rate": 3.3793430162469166e-07, "loss": 0.5733, "step": 12384 }, { "epoch": 0.89, "grad_norm": 1.2827393879554507, "learning_rate": 3.375157861775746e-07, "loss": 0.1691, "step": 12385 }, { "epoch": 0.89, "grad_norm": 1.5540421573430727, "learning_rate": 3.37097520994088e-07, "loss": 0.166, "step": 12386 }, { "epoch": 0.89, "grad_norm": 1.4090949388514569, "learning_rate": 3.36679506096681e-07, "loss": 0.1397, "step": 12387 }, { "epoch": 0.89, "grad_norm": 1.2611459387350195, "learning_rate": 3.362617415077923e-07, "loss": 0.1515, "step": 12388 }, { "epoch": 0.89, "grad_norm": 1.3037005367631884, "learning_rate": 3.3584422724984465e-07, "loss": 0.167, "step": 12389 }, { "epoch": 0.89, "grad_norm": 1.3567670838458108, "learning_rate": 3.3542696334525e-07, "loss": 0.1662, "step": 12390 }, { "epoch": 0.89, "grad_norm": 1.439067013285684, "learning_rate": 3.3500994981640387e-07, "loss": 0.1825, "step": 12391 }, { "epoch": 0.89, "grad_norm": 1.4258390338582885, "learning_rate": 3.3459318668569053e-07, "loss": 0.214, "step": 12392 }, { "epoch": 0.89, "grad_norm": 1.3264378498860108, "learning_rate": 3.341766739754798e-07, "loss": 0.1536, "step": 12393 }, { "epoch": 0.89, "grad_norm": 1.2356530717824874, "learning_rate": 3.3376041170813003e-07, "loss": 0.1426, "step": 12394 }, { "epoch": 0.89, "grad_norm": 1.479249458709933, "learning_rate": 3.3334439990598264e-07, "loss": 0.2046, "step": 12395 }, { "epoch": 0.89, "grad_norm": 1.3094962782956234, "learning_rate": 3.329286385913688e-07, "loss": 0.1829, "step": 12396 }, { "epoch": 0.89, "grad_norm": 1.067125408659858, "learning_rate": 3.3251312778660394e-07, "loss": 0.1222, "step": 12397 }, { "epoch": 0.89, "grad_norm": 1.2422038297816622, "learning_rate": 3.320978675139919e-07, "loss": 0.1768, "step": 12398 }, { "epoch": 0.89, "grad_norm": 1.3766425132407423, "learning_rate": 3.3168285779582144e-07, "loss": 0.2059, "step": 12399 }, { "epoch": 0.89, "grad_norm": 7.75046398306239, "learning_rate": 3.3126809865436817e-07, "loss": 0.5133, "step": 12400 }, { "epoch": 0.89, "grad_norm": 1.367680696745612, "learning_rate": 3.308535901118959e-07, "loss": 0.2001, "step": 12401 }, { "epoch": 0.89, "grad_norm": 1.3004700846865003, "learning_rate": 3.3043933219065404e-07, "loss": 0.1758, "step": 12402 }, { "epoch": 0.89, "grad_norm": 1.54481663369894, "learning_rate": 3.300253249128771e-07, "loss": 0.1982, "step": 12403 }, { "epoch": 0.89, "grad_norm": 1.3224219478602848, "learning_rate": 3.2961156830078836e-07, "loss": 0.1884, "step": 12404 }, { "epoch": 0.89, "grad_norm": 1.2267261418082813, "learning_rate": 3.291980623765956e-07, "loss": 0.1285, "step": 12405 }, { "epoch": 0.89, "grad_norm": 1.3056063682518488, "learning_rate": 3.287848071624955e-07, "loss": 0.1837, "step": 12406 }, { "epoch": 0.89, "grad_norm": 1.3657374754509184, "learning_rate": 3.283718026806687e-07, "loss": 0.2021, "step": 12407 }, { "epoch": 0.89, "grad_norm": 1.3441918433307374, "learning_rate": 3.2795904895328356e-07, "loss": 0.1691, "step": 12408 }, { "epoch": 0.89, "grad_norm": 5.59482928195363, "learning_rate": 3.275465460024957e-07, "loss": 0.5248, "step": 12409 }, { "epoch": 0.89, "grad_norm": 1.335134705645474, "learning_rate": 3.271342938504479e-07, "loss": 0.1436, "step": 12410 }, { "epoch": 0.89, "grad_norm": 1.3406152387376735, "learning_rate": 3.267222925192659e-07, "loss": 0.189, "step": 12411 }, { "epoch": 0.89, "grad_norm": 1.413920952316013, "learning_rate": 3.263105420310658e-07, "loss": 0.1599, "step": 12412 }, { "epoch": 0.89, "grad_norm": 1.3959980563531655, "learning_rate": 3.258990424079472e-07, "loss": 0.1451, "step": 12413 }, { "epoch": 0.89, "grad_norm": 1.2718813359317076, "learning_rate": 3.2548779367199965e-07, "loss": 0.1419, "step": 12414 }, { "epoch": 0.89, "grad_norm": 1.3769974828851124, "learning_rate": 3.250767958452966e-07, "loss": 0.1918, "step": 12415 }, { "epoch": 0.89, "grad_norm": 1.1817027545956562, "learning_rate": 3.246660489498982e-07, "loss": 0.1452, "step": 12416 }, { "epoch": 0.89, "grad_norm": 1.3086829248549918, "learning_rate": 3.242555530078517e-07, "loss": 0.1847, "step": 12417 }, { "epoch": 0.89, "grad_norm": 1.2626044260275835, "learning_rate": 3.2384530804119185e-07, "loss": 0.1683, "step": 12418 }, { "epoch": 0.89, "grad_norm": 1.5208098424671461, "learning_rate": 3.234353140719393e-07, "loss": 0.2158, "step": 12419 }, { "epoch": 0.89, "grad_norm": 5.6812279838177036, "learning_rate": 3.230255711220992e-07, "loss": 0.6642, "step": 12420 }, { "epoch": 0.89, "grad_norm": 6.848773243185552, "learning_rate": 3.226160792136662e-07, "loss": 0.624, "step": 12421 }, { "epoch": 0.89, "grad_norm": 1.4532321794848184, "learning_rate": 3.222068383686194e-07, "loss": 0.2123, "step": 12422 }, { "epoch": 0.89, "grad_norm": 1.3661002088818466, "learning_rate": 3.217978486089263e-07, "loss": 0.1875, "step": 12423 }, { "epoch": 0.89, "grad_norm": 1.4092594637819513, "learning_rate": 3.2138910995653863e-07, "loss": 0.2075, "step": 12424 }, { "epoch": 0.89, "grad_norm": 1.2910739822320736, "learning_rate": 3.209806224333961e-07, "loss": 0.1241, "step": 12425 }, { "epoch": 0.89, "grad_norm": 1.3783685604183196, "learning_rate": 3.205723860614252e-07, "loss": 0.1776, "step": 12426 }, { "epoch": 0.89, "grad_norm": 1.0438665324735945, "learning_rate": 3.2016440086253873e-07, "loss": 0.117, "step": 12427 }, { "epoch": 0.89, "grad_norm": 1.195459556146762, "learning_rate": 3.1975666685863494e-07, "loss": 0.1312, "step": 12428 }, { "epoch": 0.89, "grad_norm": 1.3882636245512612, "learning_rate": 3.193491840716001e-07, "loss": 0.1765, "step": 12429 }, { "epoch": 0.89, "grad_norm": 1.364730778036446, "learning_rate": 3.1894195252330506e-07, "loss": 0.1315, "step": 12430 }, { "epoch": 0.89, "grad_norm": 1.4064565023172253, "learning_rate": 3.185349722356101e-07, "loss": 0.1445, "step": 12431 }, { "epoch": 0.89, "grad_norm": 1.340913204651421, "learning_rate": 3.1812824323035843e-07, "loss": 0.1634, "step": 12432 }, { "epoch": 0.89, "grad_norm": 7.3619176645894555, "learning_rate": 3.1772176552938237e-07, "loss": 0.5294, "step": 12433 }, { "epoch": 0.89, "grad_norm": 1.3168796741115623, "learning_rate": 3.1731553915450077e-07, "loss": 0.1465, "step": 12434 }, { "epoch": 0.89, "grad_norm": 1.3897909799715649, "learning_rate": 3.1690956412751774e-07, "loss": 0.1595, "step": 12435 }, { "epoch": 0.89, "grad_norm": 1.3792811248485628, "learning_rate": 3.165038404702242e-07, "loss": 0.151, "step": 12436 }, { "epoch": 0.89, "grad_norm": 1.3494970740900445, "learning_rate": 3.160983682043983e-07, "loss": 0.1913, "step": 12437 }, { "epoch": 0.89, "grad_norm": 1.3835658274110567, "learning_rate": 3.156931473518038e-07, "loss": 0.1576, "step": 12438 }, { "epoch": 0.89, "grad_norm": 5.6748384678749755, "learning_rate": 3.1528817793419154e-07, "loss": 0.4537, "step": 12439 }, { "epoch": 0.89, "grad_norm": 1.3404998653831366, "learning_rate": 3.1488345997329806e-07, "loss": 0.1405, "step": 12440 }, { "epoch": 0.89, "grad_norm": 5.401175191074733, "learning_rate": 3.144789934908471e-07, "loss": 0.5149, "step": 12441 }, { "epoch": 0.89, "grad_norm": 1.3316726052927388, "learning_rate": 3.140747785085496e-07, "loss": 0.1837, "step": 12442 }, { "epoch": 0.89, "grad_norm": 1.4888731829927793, "learning_rate": 3.136708150481022e-07, "loss": 0.2071, "step": 12443 }, { "epoch": 0.89, "grad_norm": 1.4279632262611879, "learning_rate": 3.132671031311868e-07, "loss": 0.1426, "step": 12444 }, { "epoch": 0.89, "grad_norm": 1.4498420793390037, "learning_rate": 3.12863642779474e-07, "loss": 0.1647, "step": 12445 }, { "epoch": 0.89, "grad_norm": 1.3946268108917923, "learning_rate": 3.124604340146209e-07, "loss": 0.1826, "step": 12446 }, { "epoch": 0.89, "grad_norm": 1.1673593961733186, "learning_rate": 3.120574768582685e-07, "loss": 0.1214, "step": 12447 }, { "epoch": 0.89, "grad_norm": 1.3578403325048316, "learning_rate": 3.1165477133204667e-07, "loss": 0.1721, "step": 12448 }, { "epoch": 0.89, "grad_norm": 1.563163070820605, "learning_rate": 3.1125231745757045e-07, "loss": 0.1877, "step": 12449 }, { "epoch": 0.89, "grad_norm": 1.377470444069363, "learning_rate": 3.108501152564425e-07, "loss": 0.1571, "step": 12450 }, { "epoch": 0.89, "grad_norm": 1.279067843436056, "learning_rate": 3.1044816475025106e-07, "loss": 0.194, "step": 12451 }, { "epoch": 0.89, "grad_norm": 1.6144859106476261, "learning_rate": 3.100464659605729e-07, "loss": 0.2168, "step": 12452 }, { "epoch": 0.89, "grad_norm": 1.253188148166966, "learning_rate": 3.096450189089667e-07, "loss": 0.1624, "step": 12453 }, { "epoch": 0.89, "grad_norm": 1.488688662069539, "learning_rate": 3.092438236169837e-07, "loss": 0.1603, "step": 12454 }, { "epoch": 0.89, "grad_norm": 1.3393158505834106, "learning_rate": 3.088428801061555e-07, "loss": 0.1395, "step": 12455 }, { "epoch": 0.89, "grad_norm": 1.2418327503431368, "learning_rate": 3.0844218839800545e-07, "loss": 0.1383, "step": 12456 }, { "epoch": 0.89, "grad_norm": 1.194457668720862, "learning_rate": 3.0804174851403914e-07, "loss": 0.1461, "step": 12457 }, { "epoch": 0.89, "grad_norm": 8.920598916824305, "learning_rate": 3.076415604757521e-07, "loss": 0.6875, "step": 12458 }, { "epoch": 0.89, "grad_norm": 1.4053592451727894, "learning_rate": 3.072416243046239e-07, "loss": 0.1586, "step": 12459 }, { "epoch": 0.89, "grad_norm": 1.2897200257329038, "learning_rate": 3.0684194002212287e-07, "loss": 0.1695, "step": 12460 }, { "epoch": 0.89, "grad_norm": 5.7529251989028, "learning_rate": 3.064425076497007e-07, "loss": 0.5261, "step": 12461 }, { "epoch": 0.89, "grad_norm": 1.433009955482248, "learning_rate": 3.0604332720879924e-07, "loss": 0.1553, "step": 12462 }, { "epoch": 0.89, "grad_norm": 1.4001146457754277, "learning_rate": 3.0564439872084293e-07, "loss": 0.1791, "step": 12463 }, { "epoch": 0.89, "grad_norm": 4.142190149933078, "learning_rate": 3.052457222072469e-07, "loss": 0.4043, "step": 12464 }, { "epoch": 0.89, "grad_norm": 1.3443850312846948, "learning_rate": 3.048472976894079e-07, "loss": 0.1638, "step": 12465 }, { "epoch": 0.89, "grad_norm": 1.2442280654811086, "learning_rate": 3.044491251887133e-07, "loss": 0.1642, "step": 12466 }, { "epoch": 0.89, "grad_norm": 1.3138916736364092, "learning_rate": 3.0405120472653483e-07, "loss": 0.1702, "step": 12467 }, { "epoch": 0.89, "grad_norm": 1.2591893810302524, "learning_rate": 3.036535363242327e-07, "loss": 0.1768, "step": 12468 }, { "epoch": 0.89, "grad_norm": 1.2499275416547715, "learning_rate": 3.0325612000315084e-07, "loss": 0.159, "step": 12469 }, { "epoch": 0.89, "grad_norm": 1.3831302487444126, "learning_rate": 3.028589557846218e-07, "loss": 0.1692, "step": 12470 }, { "epoch": 0.89, "grad_norm": 1.4390109897537258, "learning_rate": 3.0246204368996225e-07, "loss": 0.1711, "step": 12471 }, { "epoch": 0.89, "grad_norm": 1.304005878619856, "learning_rate": 3.0206538374047857e-07, "loss": 0.1645, "step": 12472 }, { "epoch": 0.89, "grad_norm": 1.2519095276043892, "learning_rate": 3.016689759574604e-07, "loss": 0.1325, "step": 12473 }, { "epoch": 0.89, "grad_norm": 1.3715711453842334, "learning_rate": 3.012728203621862e-07, "loss": 0.1935, "step": 12474 }, { "epoch": 0.89, "grad_norm": 1.263307314409951, "learning_rate": 3.0087691697592014e-07, "loss": 0.1607, "step": 12475 }, { "epoch": 0.89, "grad_norm": 1.3312634813017985, "learning_rate": 3.0048126581991353e-07, "loss": 0.1322, "step": 12476 }, { "epoch": 0.89, "grad_norm": 1.261362369410002, "learning_rate": 3.000858669154011e-07, "loss": 0.172, "step": 12477 }, { "epoch": 0.89, "grad_norm": 1.3611724398878218, "learning_rate": 2.9969072028360804e-07, "loss": 0.1351, "step": 12478 }, { "epoch": 0.89, "grad_norm": 1.5007552151191472, "learning_rate": 2.9929582594574403e-07, "loss": 0.1987, "step": 12479 }, { "epoch": 0.89, "grad_norm": 9.559493919121897, "learning_rate": 2.9890118392300493e-07, "loss": 0.4451, "step": 12480 }, { "epoch": 0.89, "grad_norm": 1.3368832252066805, "learning_rate": 2.985067942365738e-07, "loss": 0.1782, "step": 12481 }, { "epoch": 0.89, "grad_norm": 1.32638849344922, "learning_rate": 2.981126569076198e-07, "loss": 0.1814, "step": 12482 }, { "epoch": 0.89, "grad_norm": 7.837124975944917, "learning_rate": 2.9771877195729883e-07, "loss": 0.5101, "step": 12483 }, { "epoch": 0.89, "grad_norm": 13.22147042289944, "learning_rate": 2.9732513940675276e-07, "loss": 0.5297, "step": 12484 }, { "epoch": 0.89, "grad_norm": 1.1666480087203432, "learning_rate": 2.9693175927711093e-07, "loss": 0.1313, "step": 12485 }, { "epoch": 0.89, "grad_norm": 7.684537497491347, "learning_rate": 2.96538631589488e-07, "loss": 0.6459, "step": 12486 }, { "epoch": 0.89, "grad_norm": 1.4085592456680074, "learning_rate": 2.9614575636498557e-07, "loss": 0.1806, "step": 12487 }, { "epoch": 0.89, "grad_norm": 1.3629126695917761, "learning_rate": 2.9575313362469105e-07, "loss": 0.1523, "step": 12488 }, { "epoch": 0.89, "grad_norm": 1.5276165162006734, "learning_rate": 2.9536076338967987e-07, "loss": 0.2187, "step": 12489 }, { "epoch": 0.89, "grad_norm": 1.2869717020421376, "learning_rate": 2.949686456810119e-07, "loss": 0.1621, "step": 12490 }, { "epoch": 0.89, "grad_norm": 6.209368106952642, "learning_rate": 2.945767805197353e-07, "loss": 0.5035, "step": 12491 }, { "epoch": 0.89, "grad_norm": 1.3687655137467514, "learning_rate": 2.941851679268831e-07, "loss": 0.1702, "step": 12492 }, { "epoch": 0.89, "grad_norm": 1.2776005894333442, "learning_rate": 2.937938079234753e-07, "loss": 0.1747, "step": 12493 }, { "epoch": 0.89, "grad_norm": 1.3555054933350938, "learning_rate": 2.934027005305201e-07, "loss": 0.1554, "step": 12494 }, { "epoch": 0.89, "grad_norm": 1.5697083387951232, "learning_rate": 2.9301184576900954e-07, "loss": 0.2058, "step": 12495 }, { "epoch": 0.89, "grad_norm": 1.3318681327793407, "learning_rate": 2.9262124365992294e-07, "loss": 0.1759, "step": 12496 }, { "epoch": 0.89, "grad_norm": 1.5419473085227686, "learning_rate": 2.922308942242269e-07, "loss": 0.1948, "step": 12497 }, { "epoch": 0.89, "grad_norm": 1.5323509408420828, "learning_rate": 2.9184079748287297e-07, "loss": 0.1516, "step": 12498 }, { "epoch": 0.89, "grad_norm": 1.393141092397405, "learning_rate": 2.9145095345680105e-07, "loss": 0.1776, "step": 12499 }, { "epoch": 0.89, "grad_norm": 5.193024294597759, "learning_rate": 2.910613621669356e-07, "loss": 0.5073, "step": 12500 }, { "epoch": 0.89, "grad_norm": 1.3108665252595617, "learning_rate": 2.906720236341881e-07, "loss": 0.1403, "step": 12501 }, { "epoch": 0.89, "grad_norm": 1.1726288023083062, "learning_rate": 2.902829378794575e-07, "loss": 0.1391, "step": 12502 }, { "epoch": 0.89, "grad_norm": 1.3738962089123448, "learning_rate": 2.898941049236287e-07, "loss": 0.1967, "step": 12503 }, { "epoch": 0.89, "grad_norm": 6.5844703246899, "learning_rate": 2.895055247875717e-07, "loss": 0.6976, "step": 12504 }, { "epoch": 0.89, "grad_norm": 1.2556502697049867, "learning_rate": 2.891171974921447e-07, "loss": 0.1452, "step": 12505 }, { "epoch": 0.89, "grad_norm": 1.301032256629275, "learning_rate": 2.887291230581901e-07, "loss": 0.2031, "step": 12506 }, { "epoch": 0.89, "grad_norm": 1.4341399008956541, "learning_rate": 2.883413015065406e-07, "loss": 0.1787, "step": 12507 }, { "epoch": 0.89, "grad_norm": 1.3590387892800633, "learning_rate": 2.8795373285801067e-07, "loss": 0.1708, "step": 12508 }, { "epoch": 0.89, "grad_norm": 1.3139818584917724, "learning_rate": 2.875664171334047e-07, "loss": 0.1835, "step": 12509 }, { "epoch": 0.89, "grad_norm": 1.3777335355500027, "learning_rate": 2.8717935435351174e-07, "loss": 0.1949, "step": 12510 }, { "epoch": 0.89, "grad_norm": 1.3364779053597309, "learning_rate": 2.867925445391079e-07, "loss": 0.1696, "step": 12511 }, { "epoch": 0.89, "grad_norm": 1.6997505026451962, "learning_rate": 2.864059877109565e-07, "loss": 0.1937, "step": 12512 }, { "epoch": 0.9, "grad_norm": 1.4251723104736207, "learning_rate": 2.860196838898044e-07, "loss": 0.1785, "step": 12513 }, { "epoch": 0.9, "grad_norm": 1.336513246438921, "learning_rate": 2.856336330963894e-07, "loss": 0.1632, "step": 12514 }, { "epoch": 0.9, "grad_norm": 1.1555101125920735, "learning_rate": 2.85247835351431e-07, "loss": 0.1618, "step": 12515 }, { "epoch": 0.9, "grad_norm": 10.964823186769275, "learning_rate": 2.848622906756382e-07, "loss": 0.509, "step": 12516 }, { "epoch": 0.9, "grad_norm": 1.3465780416192414, "learning_rate": 2.8447699908970506e-07, "loss": 0.203, "step": 12517 }, { "epoch": 0.9, "grad_norm": 1.252086372121904, "learning_rate": 2.840919606143133e-07, "loss": 0.1646, "step": 12518 }, { "epoch": 0.9, "grad_norm": 1.307387248344108, "learning_rate": 2.8370717527012925e-07, "loss": 0.1521, "step": 12519 }, { "epoch": 0.9, "grad_norm": 1.4588793145150496, "learning_rate": 2.83322643077808e-07, "loss": 0.1685, "step": 12520 }, { "epoch": 0.9, "grad_norm": 1.4584636313246884, "learning_rate": 2.8293836405798805e-07, "loss": 0.2085, "step": 12521 }, { "epoch": 0.9, "grad_norm": 1.2486457371904838, "learning_rate": 2.8255433823129787e-07, "loss": 0.1453, "step": 12522 }, { "epoch": 0.9, "grad_norm": 7.042719641988343, "learning_rate": 2.821705656183482e-07, "loss": 0.4748, "step": 12523 }, { "epoch": 0.9, "grad_norm": 1.338185186893009, "learning_rate": 2.8178704623974094e-07, "loss": 0.1717, "step": 12524 }, { "epoch": 0.9, "grad_norm": 1.4081695583690395, "learning_rate": 2.8140378011605904e-07, "loss": 0.1796, "step": 12525 }, { "epoch": 0.9, "grad_norm": 1.227685634090469, "learning_rate": 2.810207672678772e-07, "loss": 0.1512, "step": 12526 }, { "epoch": 0.9, "grad_norm": 1.4557141248542993, "learning_rate": 2.8063800771575224e-07, "loss": 0.1511, "step": 12527 }, { "epoch": 0.9, "grad_norm": 1.321647147921995, "learning_rate": 2.802555014802311e-07, "loss": 0.1591, "step": 12528 }, { "epoch": 0.9, "grad_norm": 1.3627614152594545, "learning_rate": 2.798732485818434e-07, "loss": 0.1591, "step": 12529 }, { "epoch": 0.9, "grad_norm": 1.3983385210594073, "learning_rate": 2.7949124904110836e-07, "loss": 0.1863, "step": 12530 }, { "epoch": 0.9, "grad_norm": 1.5056646731198842, "learning_rate": 2.7910950287852903e-07, "loss": 0.164, "step": 12531 }, { "epoch": 0.9, "grad_norm": 1.3238623817356592, "learning_rate": 2.787280101145967e-07, "loss": 0.2073, "step": 12532 }, { "epoch": 0.9, "grad_norm": 1.2687150559654465, "learning_rate": 2.7834677076978787e-07, "loss": 0.1814, "step": 12533 }, { "epoch": 0.9, "grad_norm": 1.272581854399197, "learning_rate": 2.779657848645656e-07, "loss": 0.1471, "step": 12534 }, { "epoch": 0.9, "grad_norm": 1.171771105732968, "learning_rate": 2.775850524193807e-07, "loss": 0.1523, "step": 12535 }, { "epoch": 0.9, "grad_norm": 1.4164942157037956, "learning_rate": 2.772045734546702e-07, "loss": 0.1686, "step": 12536 }, { "epoch": 0.9, "grad_norm": 1.339114043030958, "learning_rate": 2.7682434799085444e-07, "loss": 0.1356, "step": 12537 }, { "epoch": 0.9, "grad_norm": 1.4290427416069456, "learning_rate": 2.764443760483443e-07, "loss": 0.1466, "step": 12538 }, { "epoch": 0.9, "grad_norm": 1.2918517412285264, "learning_rate": 2.7606465764753345e-07, "loss": 0.1673, "step": 12539 }, { "epoch": 0.9, "grad_norm": 1.2736457837521273, "learning_rate": 2.756851928088056e-07, "loss": 0.1137, "step": 12540 }, { "epoch": 0.9, "grad_norm": 1.3602898770286478, "learning_rate": 2.7530598155252666e-07, "loss": 0.187, "step": 12541 }, { "epoch": 0.9, "grad_norm": 1.4249664016452768, "learning_rate": 2.7492702389905256e-07, "loss": 0.1952, "step": 12542 }, { "epoch": 0.9, "grad_norm": 1.3048012602183272, "learning_rate": 2.745483198687243e-07, "loss": 0.173, "step": 12543 }, { "epoch": 0.9, "grad_norm": 1.3374903248710193, "learning_rate": 2.7416986948186897e-07, "loss": 0.1781, "step": 12544 }, { "epoch": 0.9, "grad_norm": 5.341796236212721, "learning_rate": 2.737916727588014e-07, "loss": 0.4947, "step": 12545 }, { "epoch": 0.9, "grad_norm": 1.4704387419518583, "learning_rate": 2.7341372971981917e-07, "loss": 0.2267, "step": 12546 }, { "epoch": 0.9, "grad_norm": 4.364986000832111, "learning_rate": 2.730360403852117e-07, "loss": 0.5715, "step": 12547 }, { "epoch": 0.9, "grad_norm": 1.2590148768463814, "learning_rate": 2.7265860477524884e-07, "loss": 0.1869, "step": 12548 }, { "epoch": 0.9, "grad_norm": 1.184235382412954, "learning_rate": 2.722814229101928e-07, "loss": 0.1426, "step": 12549 }, { "epoch": 0.9, "grad_norm": 1.4046107910298908, "learning_rate": 2.719044948102867e-07, "loss": 0.16, "step": 12550 }, { "epoch": 0.9, "grad_norm": 1.3447899315524046, "learning_rate": 2.7152782049576333e-07, "loss": 0.1569, "step": 12551 }, { "epoch": 0.9, "grad_norm": 1.4537091126228423, "learning_rate": 2.711513999868415e-07, "loss": 0.1751, "step": 12552 }, { "epoch": 0.9, "grad_norm": 1.289879925660578, "learning_rate": 2.707752333037267e-07, "loss": 0.1703, "step": 12553 }, { "epoch": 0.9, "grad_norm": 1.3761626848003403, "learning_rate": 2.7039932046660833e-07, "loss": 0.1559, "step": 12554 }, { "epoch": 0.9, "grad_norm": 1.229135617513562, "learning_rate": 2.700236614956653e-07, "loss": 0.1694, "step": 12555 }, { "epoch": 0.9, "grad_norm": 1.355308492082857, "learning_rate": 2.696482564110603e-07, "loss": 0.1715, "step": 12556 }, { "epoch": 0.9, "grad_norm": 1.4285438957038499, "learning_rate": 2.6927310523294503e-07, "loss": 0.1789, "step": 12557 }, { "epoch": 0.9, "grad_norm": 4.021920864952697, "learning_rate": 2.688982079814545e-07, "loss": 0.4165, "step": 12558 }, { "epoch": 0.9, "grad_norm": 1.246655775470798, "learning_rate": 2.685235646767126e-07, "loss": 0.1437, "step": 12559 }, { "epoch": 0.9, "grad_norm": 1.8937377906460031, "learning_rate": 2.681491753388282e-07, "loss": 0.2287, "step": 12560 }, { "epoch": 0.9, "grad_norm": 1.363767789495913, "learning_rate": 2.677750399878987e-07, "loss": 0.1924, "step": 12561 }, { "epoch": 0.9, "grad_norm": 1.3829496459611403, "learning_rate": 2.6740115864400396e-07, "loss": 0.1996, "step": 12562 }, { "epoch": 0.9, "grad_norm": 1.3203493974584029, "learning_rate": 2.6702753132721414e-07, "loss": 0.1776, "step": 12563 }, { "epoch": 0.9, "grad_norm": 1.1857892031064974, "learning_rate": 2.6665415805758264e-07, "loss": 0.1285, "step": 12564 }, { "epoch": 0.9, "grad_norm": 1.3542116209574886, "learning_rate": 2.662810388551518e-07, "loss": 0.2093, "step": 12565 }, { "epoch": 0.9, "grad_norm": 1.17659787964073, "learning_rate": 2.6590817373994826e-07, "loss": 0.1233, "step": 12566 }, { "epoch": 0.9, "grad_norm": 1.438280177912832, "learning_rate": 2.655355627319861e-07, "loss": 0.1243, "step": 12567 }, { "epoch": 0.9, "grad_norm": 1.3023538663901006, "learning_rate": 2.6516320585126653e-07, "loss": 0.188, "step": 12568 }, { "epoch": 0.9, "grad_norm": 1.238514189539441, "learning_rate": 2.6479110311777576e-07, "loss": 0.1545, "step": 12569 }, { "epoch": 0.9, "grad_norm": 1.411360869702086, "learning_rate": 2.644192545514862e-07, "loss": 0.1769, "step": 12570 }, { "epoch": 0.9, "grad_norm": 1.5939163650426575, "learning_rate": 2.6404766017235794e-07, "loss": 0.1798, "step": 12571 }, { "epoch": 0.9, "grad_norm": 1.2626736258315665, "learning_rate": 2.636763200003356e-07, "loss": 0.1879, "step": 12572 }, { "epoch": 0.9, "grad_norm": 1.2500473212776095, "learning_rate": 2.6330523405535267e-07, "loss": 0.1413, "step": 12573 }, { "epoch": 0.9, "grad_norm": 1.2682458474736469, "learning_rate": 2.6293440235732717e-07, "loss": 0.1551, "step": 12574 }, { "epoch": 0.9, "grad_norm": 1.2972426190502278, "learning_rate": 2.625638249261625e-07, "loss": 0.1674, "step": 12575 }, { "epoch": 0.9, "grad_norm": 1.3334537112900413, "learning_rate": 2.621935017817512e-07, "loss": 0.1661, "step": 12576 }, { "epoch": 0.9, "grad_norm": 1.3876207690264928, "learning_rate": 2.6182343294397063e-07, "loss": 0.2015, "step": 12577 }, { "epoch": 0.9, "grad_norm": 1.3558097484135483, "learning_rate": 2.614536184326849e-07, "loss": 0.1615, "step": 12578 }, { "epoch": 0.9, "grad_norm": 1.3438174049366505, "learning_rate": 2.610840582677426e-07, "loss": 0.1822, "step": 12579 }, { "epoch": 0.9, "grad_norm": 1.3193474495388444, "learning_rate": 2.607147524689829e-07, "loss": 0.1766, "step": 12580 }, { "epoch": 0.9, "grad_norm": 1.2161152651702318, "learning_rate": 2.6034570105622604e-07, "loss": 0.1247, "step": 12581 }, { "epoch": 0.9, "grad_norm": 1.2819576709581733, "learning_rate": 2.599769040492828e-07, "loss": 0.1763, "step": 12582 }, { "epoch": 0.9, "grad_norm": 1.361645412362775, "learning_rate": 2.5960836146794734e-07, "loss": 0.1604, "step": 12583 }, { "epoch": 0.9, "grad_norm": 1.376547176144151, "learning_rate": 2.5924007333200387e-07, "loss": 0.1495, "step": 12584 }, { "epoch": 0.9, "grad_norm": 1.2778420966634354, "learning_rate": 2.588720396612171e-07, "loss": 0.1741, "step": 12585 }, { "epoch": 0.9, "grad_norm": 1.4570081720208612, "learning_rate": 2.585042604753457e-07, "loss": 0.1673, "step": 12586 }, { "epoch": 0.9, "grad_norm": 1.3521435576405392, "learning_rate": 2.5813673579412724e-07, "loss": 0.1996, "step": 12587 }, { "epoch": 0.9, "grad_norm": 1.416755350975266, "learning_rate": 2.57769465637292e-07, "loss": 0.1778, "step": 12588 }, { "epoch": 0.9, "grad_norm": 1.2967252316492108, "learning_rate": 2.574024500245503e-07, "loss": 0.1435, "step": 12589 }, { "epoch": 0.9, "grad_norm": 5.152672295231197, "learning_rate": 2.5703568897560535e-07, "loss": 0.4505, "step": 12590 }, { "epoch": 0.9, "grad_norm": 1.4130034582149782, "learning_rate": 2.566691825101403e-07, "loss": 0.196, "step": 12591 }, { "epoch": 0.9, "grad_norm": 1.316328585841141, "learning_rate": 2.563029306478304e-07, "loss": 0.1072, "step": 12592 }, { "epoch": 0.9, "grad_norm": 1.381329074036769, "learning_rate": 2.559369334083328e-07, "loss": 0.1501, "step": 12593 }, { "epoch": 0.9, "grad_norm": 6.13616648166765, "learning_rate": 2.5557119081129343e-07, "loss": 0.6089, "step": 12594 }, { "epoch": 0.9, "grad_norm": 1.2667537866810106, "learning_rate": 2.5520570287634383e-07, "loss": 0.1715, "step": 12595 }, { "epoch": 0.9, "grad_norm": 1.3751099081153582, "learning_rate": 2.548404696231027e-07, "loss": 0.1911, "step": 12596 }, { "epoch": 0.9, "grad_norm": 1.214955755228897, "learning_rate": 2.544754910711733e-07, "loss": 0.1575, "step": 12597 }, { "epoch": 0.9, "grad_norm": 1.2761027544301025, "learning_rate": 2.5411076724014715e-07, "loss": 0.1935, "step": 12598 }, { "epoch": 0.9, "grad_norm": 1.4992574399977376, "learning_rate": 2.537462981495997e-07, "loss": 0.1735, "step": 12599 }, { "epoch": 0.9, "grad_norm": 1.2901441287426927, "learning_rate": 2.533820838190959e-07, "loss": 0.1676, "step": 12600 }, { "epoch": 0.9, "grad_norm": 1.3562405522916454, "learning_rate": 2.530181242681834e-07, "loss": 0.1528, "step": 12601 }, { "epoch": 0.9, "grad_norm": 1.1891191079285433, "learning_rate": 2.526544195163999e-07, "loss": 0.1317, "step": 12602 }, { "epoch": 0.9, "grad_norm": 1.1358061463714986, "learning_rate": 2.522909695832665e-07, "loss": 0.1646, "step": 12603 }, { "epoch": 0.9, "grad_norm": 1.4563545500093997, "learning_rate": 2.519277744882931e-07, "loss": 0.1795, "step": 12604 }, { "epoch": 0.9, "grad_norm": 1.3757213132838086, "learning_rate": 2.51564834250973e-07, "loss": 0.1664, "step": 12605 }, { "epoch": 0.9, "grad_norm": 1.3615375691049343, "learning_rate": 2.5120214889078785e-07, "loss": 0.1404, "step": 12606 }, { "epoch": 0.9, "grad_norm": 8.115128303924921, "learning_rate": 2.5083971842720654e-07, "loss": 0.6011, "step": 12607 }, { "epoch": 0.9, "grad_norm": 1.3777547141643542, "learning_rate": 2.5047754287968075e-07, "loss": 0.1835, "step": 12608 }, { "epoch": 0.9, "grad_norm": 1.4550370281847165, "learning_rate": 2.5011562226765205e-07, "loss": 0.1729, "step": 12609 }, { "epoch": 0.9, "grad_norm": 5.772965394498877, "learning_rate": 2.4975395661054614e-07, "loss": 0.5983, "step": 12610 }, { "epoch": 0.9, "grad_norm": 7.452928597215935, "learning_rate": 2.4939254592777573e-07, "loss": 0.5357, "step": 12611 }, { "epoch": 0.9, "grad_norm": 1.2980012496121758, "learning_rate": 2.490313902387409e-07, "loss": 0.1391, "step": 12612 }, { "epoch": 0.9, "grad_norm": 1.3526996219450143, "learning_rate": 2.486704895628267e-07, "loss": 0.1504, "step": 12613 }, { "epoch": 0.9, "grad_norm": 1.3600802230817879, "learning_rate": 2.483098439194037e-07, "loss": 0.1849, "step": 12614 }, { "epoch": 0.9, "grad_norm": 6.323109597117248, "learning_rate": 2.4794945332783205e-07, "loss": 0.4746, "step": 12615 }, { "epoch": 0.9, "grad_norm": 1.2898502608589355, "learning_rate": 2.47589317807454e-07, "loss": 0.1806, "step": 12616 }, { "epoch": 0.9, "grad_norm": 1.4890459380942849, "learning_rate": 2.472294373776013e-07, "loss": 0.1567, "step": 12617 }, { "epoch": 0.9, "grad_norm": 1.2869522820229289, "learning_rate": 2.468698120575902e-07, "loss": 0.1382, "step": 12618 }, { "epoch": 0.9, "grad_norm": 1.374671484906084, "learning_rate": 2.465104418667247e-07, "loss": 0.1589, "step": 12619 }, { "epoch": 0.9, "grad_norm": 1.465379589483349, "learning_rate": 2.461513268242938e-07, "loss": 0.1438, "step": 12620 }, { "epoch": 0.9, "grad_norm": 1.268249103254443, "learning_rate": 2.4579246694957426e-07, "loss": 0.1795, "step": 12621 }, { "epoch": 0.9, "grad_norm": 1.2648286021344564, "learning_rate": 2.4543386226182686e-07, "loss": 0.1856, "step": 12622 }, { "epoch": 0.9, "grad_norm": 1.1864698921227268, "learning_rate": 2.4507551278030173e-07, "loss": 0.1345, "step": 12623 }, { "epoch": 0.9, "grad_norm": 1.4440622907300216, "learning_rate": 2.447174185242324e-07, "loss": 0.1621, "step": 12624 }, { "epoch": 0.9, "grad_norm": 4.798272506632053, "learning_rate": 2.443595795128406e-07, "loss": 0.4578, "step": 12625 }, { "epoch": 0.9, "grad_norm": 1.3258373802044958, "learning_rate": 2.440019957653328e-07, "loss": 0.1712, "step": 12626 }, { "epoch": 0.9, "grad_norm": 1.3808373471207442, "learning_rate": 2.4364466730090355e-07, "loss": 0.219, "step": 12627 }, { "epoch": 0.9, "grad_norm": 1.3594122353703182, "learning_rate": 2.4328759413873194e-07, "loss": 0.1769, "step": 12628 }, { "epoch": 0.9, "grad_norm": 1.2489932798227668, "learning_rate": 2.42930776297986e-07, "loss": 0.1419, "step": 12629 }, { "epoch": 0.9, "grad_norm": 1.3075258853304108, "learning_rate": 2.4257421379781654e-07, "loss": 0.1391, "step": 12630 }, { "epoch": 0.9, "grad_norm": 1.5263125558689432, "learning_rate": 2.4221790665736325e-07, "loss": 0.2145, "step": 12631 }, { "epoch": 0.9, "grad_norm": 1.362030507587289, "learning_rate": 2.4186185489575076e-07, "loss": 0.1567, "step": 12632 }, { "epoch": 0.9, "grad_norm": 1.2511353758378456, "learning_rate": 2.4150605853209164e-07, "loss": 0.1594, "step": 12633 }, { "epoch": 0.9, "grad_norm": 1.287665169396671, "learning_rate": 2.411505175854817e-07, "loss": 0.1951, "step": 12634 }, { "epoch": 0.9, "grad_norm": 1.2157894373998381, "learning_rate": 2.407952320750062e-07, "loss": 0.1753, "step": 12635 }, { "epoch": 0.9, "grad_norm": 1.4326108665458335, "learning_rate": 2.404402020197355e-07, "loss": 0.1756, "step": 12636 }, { "epoch": 0.9, "grad_norm": 1.4382546042767963, "learning_rate": 2.4008542743872597e-07, "loss": 0.1896, "step": 12637 }, { "epoch": 0.9, "grad_norm": 1.180132108354165, "learning_rate": 2.397309083510213e-07, "loss": 0.1233, "step": 12638 }, { "epoch": 0.9, "grad_norm": 1.2945141482590476, "learning_rate": 2.3937664477564905e-07, "loss": 0.1647, "step": 12639 }, { "epoch": 0.9, "grad_norm": 1.1998086146363998, "learning_rate": 2.390226367316262e-07, "loss": 0.1615, "step": 12640 }, { "epoch": 0.9, "grad_norm": 1.367934762975672, "learning_rate": 2.386688842379531e-07, "loss": 0.1894, "step": 12641 }, { "epoch": 0.9, "grad_norm": 1.361737265873703, "learning_rate": 2.383153873136196e-07, "loss": 0.1412, "step": 12642 }, { "epoch": 0.9, "grad_norm": 1.4918394390969996, "learning_rate": 2.3796214597759825e-07, "loss": 0.1641, "step": 12643 }, { "epoch": 0.9, "grad_norm": 1.4099074226139585, "learning_rate": 2.3760916024885007e-07, "loss": 0.1919, "step": 12644 }, { "epoch": 0.9, "grad_norm": 1.6334725342104919, "learning_rate": 2.3725643014632205e-07, "loss": 0.2573, "step": 12645 }, { "epoch": 0.9, "grad_norm": 1.4623518045673327, "learning_rate": 2.3690395568894853e-07, "loss": 0.1873, "step": 12646 }, { "epoch": 0.9, "grad_norm": 1.2134875684352853, "learning_rate": 2.3655173689564714e-07, "loss": 0.1618, "step": 12647 }, { "epoch": 0.9, "grad_norm": 1.2261628665144881, "learning_rate": 2.36199773785325e-07, "loss": 0.1381, "step": 12648 }, { "epoch": 0.9, "grad_norm": 1.3269781824631393, "learning_rate": 2.3584806637687252e-07, "loss": 0.1367, "step": 12649 }, { "epoch": 0.9, "grad_norm": 1.385156350246033, "learning_rate": 2.3549661468916963e-07, "loss": 0.1514, "step": 12650 }, { "epoch": 0.9, "grad_norm": 1.2972375857117198, "learning_rate": 2.3514541874107954e-07, "loss": 0.1677, "step": 12651 }, { "epoch": 0.91, "grad_norm": 5.088758649983839, "learning_rate": 2.3479447855145388e-07, "loss": 0.611, "step": 12652 }, { "epoch": 0.91, "grad_norm": 1.2714985403771593, "learning_rate": 2.3444379413912865e-07, "loss": 0.1595, "step": 12653 }, { "epoch": 0.91, "grad_norm": 1.3952634400063104, "learning_rate": 2.3409336552292937e-07, "loss": 0.2054, "step": 12654 }, { "epoch": 0.91, "grad_norm": 1.2878854804319144, "learning_rate": 2.3374319272166324e-07, "loss": 0.1565, "step": 12655 }, { "epoch": 0.91, "grad_norm": 1.390845028815124, "learning_rate": 2.3339327575412852e-07, "loss": 0.1471, "step": 12656 }, { "epoch": 0.91, "grad_norm": 1.3455684222514772, "learning_rate": 2.3304361463910463e-07, "loss": 0.1444, "step": 12657 }, { "epoch": 0.91, "grad_norm": 1.7130207936078783, "learning_rate": 2.3269420939536213e-07, "loss": 0.1845, "step": 12658 }, { "epoch": 0.91, "grad_norm": 1.2157470666127261, "learning_rate": 2.3234506004165492e-07, "loss": 0.149, "step": 12659 }, { "epoch": 0.91, "grad_norm": 5.210971755825889, "learning_rate": 2.3199616659672352e-07, "loss": 0.5211, "step": 12660 }, { "epoch": 0.91, "grad_norm": 1.275369519662811, "learning_rate": 2.3164752907929522e-07, "loss": 0.1495, "step": 12661 }, { "epoch": 0.91, "grad_norm": 1.3570798167669922, "learning_rate": 2.3129914750808503e-07, "loss": 0.1345, "step": 12662 }, { "epoch": 0.91, "grad_norm": 1.355340402076586, "learning_rate": 2.309510219017913e-07, "loss": 0.1689, "step": 12663 }, { "epoch": 0.91, "grad_norm": 1.4251630076528905, "learning_rate": 2.3060315227910023e-07, "loss": 0.1748, "step": 12664 }, { "epoch": 0.91, "grad_norm": 1.2901020450337168, "learning_rate": 2.3025553865868412e-07, "loss": 0.1731, "step": 12665 }, { "epoch": 0.91, "grad_norm": 1.3455511759679402, "learning_rate": 2.2990818105920132e-07, "loss": 0.1597, "step": 12666 }, { "epoch": 0.91, "grad_norm": 1.5308538427693263, "learning_rate": 2.2956107949929696e-07, "loss": 0.2, "step": 12667 }, { "epoch": 0.91, "grad_norm": 1.3524891383994382, "learning_rate": 2.2921423399760168e-07, "loss": 0.1624, "step": 12668 }, { "epoch": 0.91, "grad_norm": 1.1494135250024697, "learning_rate": 2.288676445727328e-07, "loss": 0.1094, "step": 12669 }, { "epoch": 0.91, "grad_norm": 1.3464442488383974, "learning_rate": 2.285213112432938e-07, "loss": 0.1502, "step": 12670 }, { "epoch": 0.91, "grad_norm": 4.074477184718176, "learning_rate": 2.281752340278759e-07, "loss": 0.4917, "step": 12671 }, { "epoch": 0.91, "grad_norm": 1.7279358846116963, "learning_rate": 2.2782941294505312e-07, "loss": 0.2067, "step": 12672 }, { "epoch": 0.91, "grad_norm": 1.1919272580318094, "learning_rate": 2.2748384801338953e-07, "loss": 0.1452, "step": 12673 }, { "epoch": 0.91, "grad_norm": 10.72844635430164, "learning_rate": 2.271385392514319e-07, "loss": 0.6493, "step": 12674 }, { "epoch": 0.91, "grad_norm": 1.5019772434597523, "learning_rate": 2.2679348667771662e-07, "loss": 0.1417, "step": 12675 }, { "epoch": 0.91, "grad_norm": 1.391474200074532, "learning_rate": 2.264486903107632e-07, "loss": 0.2061, "step": 12676 }, { "epoch": 0.91, "grad_norm": 5.1715158813366795, "learning_rate": 2.2610415016907973e-07, "loss": 0.6574, "step": 12677 }, { "epoch": 0.91, "grad_norm": 1.2851411197759597, "learning_rate": 2.257598662711602e-07, "loss": 0.1671, "step": 12678 }, { "epoch": 0.91, "grad_norm": 1.489362709641089, "learning_rate": 2.2541583863548433e-07, "loss": 0.1735, "step": 12679 }, { "epoch": 0.91, "grad_norm": 1.4497275435218506, "learning_rate": 2.2507206728051732e-07, "loss": 0.1777, "step": 12680 }, { "epoch": 0.91, "grad_norm": 1.2438421670019086, "learning_rate": 2.2472855222471225e-07, "loss": 0.1335, "step": 12681 }, { "epoch": 0.91, "grad_norm": 1.3333968076745413, "learning_rate": 2.2438529348650707e-07, "loss": 0.1608, "step": 12682 }, { "epoch": 0.91, "grad_norm": 1.3080865554542618, "learning_rate": 2.2404229108432762e-07, "loss": 0.1746, "step": 12683 }, { "epoch": 0.91, "grad_norm": 1.3560911245380336, "learning_rate": 2.2369954503658308e-07, "loss": 0.1493, "step": 12684 }, { "epoch": 0.91, "grad_norm": 1.34216046791445, "learning_rate": 2.2335705536167263e-07, "loss": 0.1697, "step": 12685 }, { "epoch": 0.91, "grad_norm": 1.2421169726672066, "learning_rate": 2.2301482207797765e-07, "loss": 0.1602, "step": 12686 }, { "epoch": 0.91, "grad_norm": 1.4440396982952837, "learning_rate": 2.226728452038701e-07, "loss": 0.1917, "step": 12687 }, { "epoch": 0.91, "grad_norm": 1.4842486618204693, "learning_rate": 2.2233112475770425e-07, "loss": 0.1508, "step": 12688 }, { "epoch": 0.91, "grad_norm": 6.31412708120763, "learning_rate": 2.2198966075782368e-07, "loss": 0.4765, "step": 12689 }, { "epoch": 0.91, "grad_norm": 1.401507444196246, "learning_rate": 2.2164845322255545e-07, "loss": 0.1554, "step": 12690 }, { "epoch": 0.91, "grad_norm": 1.593578750664929, "learning_rate": 2.2130750217021546e-07, "loss": 0.1843, "step": 12691 }, { "epoch": 0.91, "grad_norm": 6.387124870363444, "learning_rate": 2.2096680761910349e-07, "loss": 0.5165, "step": 12692 }, { "epoch": 0.91, "grad_norm": 1.1720870993437176, "learning_rate": 2.206263695875077e-07, "loss": 0.1271, "step": 12693 }, { "epoch": 0.91, "grad_norm": 6.29065390664548, "learning_rate": 2.2028618809370073e-07, "loss": 0.5026, "step": 12694 }, { "epoch": 0.91, "grad_norm": 1.3341181283120755, "learning_rate": 2.1994626315594181e-07, "loss": 0.1362, "step": 12695 }, { "epoch": 0.91, "grad_norm": 1.166459844987831, "learning_rate": 2.1960659479247749e-07, "loss": 0.1416, "step": 12696 }, { "epoch": 0.91, "grad_norm": 1.3180970280588613, "learning_rate": 2.1926718302154037e-07, "loss": 0.1438, "step": 12697 }, { "epoch": 0.91, "grad_norm": 1.205945768344295, "learning_rate": 2.189280278613476e-07, "loss": 0.1575, "step": 12698 }, { "epoch": 0.91, "grad_norm": 1.5407504146541176, "learning_rate": 2.1858912933010456e-07, "loss": 0.1715, "step": 12699 }, { "epoch": 0.91, "grad_norm": 1.240102079430423, "learning_rate": 2.1825048744600062e-07, "loss": 0.1398, "step": 12700 }, { "epoch": 0.91, "grad_norm": 1.3946866416980106, "learning_rate": 2.1791210222721405e-07, "loss": 0.1655, "step": 12701 }, { "epoch": 0.91, "grad_norm": 1.3432019843021943, "learning_rate": 2.1757397369190802e-07, "loss": 0.1773, "step": 12702 }, { "epoch": 0.91, "grad_norm": 4.848624915876966, "learning_rate": 2.1723610185823085e-07, "loss": 0.4256, "step": 12703 }, { "epoch": 0.91, "grad_norm": 1.3644903532423924, "learning_rate": 2.1689848674431912e-07, "loss": 0.2049, "step": 12704 }, { "epoch": 0.91, "grad_norm": 1.2659687446524321, "learning_rate": 2.165611283682939e-07, "loss": 0.1793, "step": 12705 }, { "epoch": 0.91, "grad_norm": 1.2604631340377983, "learning_rate": 2.162240267482646e-07, "loss": 0.158, "step": 12706 }, { "epoch": 0.91, "grad_norm": 1.2062277517590558, "learning_rate": 2.1588718190232395e-07, "loss": 0.1449, "step": 12707 }, { "epoch": 0.91, "grad_norm": 8.0812775361243, "learning_rate": 2.1555059384855358e-07, "loss": 0.5837, "step": 12708 }, { "epoch": 0.91, "grad_norm": 1.2652800820562675, "learning_rate": 2.1521426260501965e-07, "loss": 0.1547, "step": 12709 }, { "epoch": 0.91, "grad_norm": 1.292272420480728, "learning_rate": 2.1487818818977545e-07, "loss": 0.1642, "step": 12710 }, { "epoch": 0.91, "grad_norm": 1.4300264731553758, "learning_rate": 2.145423706208588e-07, "loss": 0.1786, "step": 12711 }, { "epoch": 0.91, "grad_norm": 12.608328991227296, "learning_rate": 2.142068099162964e-07, "loss": 0.6462, "step": 12712 }, { "epoch": 0.91, "grad_norm": 1.2223192099129898, "learning_rate": 2.1387150609409991e-07, "loss": 0.1537, "step": 12713 }, { "epoch": 0.91, "grad_norm": 1.2811856241231114, "learning_rate": 2.135364591722666e-07, "loss": 0.1405, "step": 12714 }, { "epoch": 0.91, "grad_norm": 1.2919615344656545, "learning_rate": 2.132016691687805e-07, "loss": 0.1696, "step": 12715 }, { "epoch": 0.91, "grad_norm": 1.4143280558702906, "learning_rate": 2.1286713610161215e-07, "loss": 0.1627, "step": 12716 }, { "epoch": 0.91, "grad_norm": 1.3171704607557675, "learning_rate": 2.1253285998871719e-07, "loss": 0.1483, "step": 12717 }, { "epoch": 0.91, "grad_norm": 1.3888484234119711, "learning_rate": 2.121988408480391e-07, "loss": 0.1935, "step": 12718 }, { "epoch": 0.91, "grad_norm": 1.389201521309338, "learning_rate": 2.1186507869750573e-07, "loss": 0.1567, "step": 12719 }, { "epoch": 0.91, "grad_norm": 6.939276780240387, "learning_rate": 2.1153157355503274e-07, "loss": 0.4929, "step": 12720 }, { "epoch": 0.91, "grad_norm": 1.2849870808185841, "learning_rate": 2.1119832543852137e-07, "loss": 0.1423, "step": 12721 }, { "epoch": 0.91, "grad_norm": 1.4064810631375324, "learning_rate": 2.108653343658601e-07, "loss": 0.1909, "step": 12722 }, { "epoch": 0.91, "grad_norm": 11.933941021940372, "learning_rate": 2.1053260035492017e-07, "loss": 0.5475, "step": 12723 }, { "epoch": 0.91, "grad_norm": 1.400823236097534, "learning_rate": 2.1020012342356344e-07, "loss": 0.1914, "step": 12724 }, { "epoch": 0.91, "grad_norm": 1.194602002841027, "learning_rate": 2.0986790358963449e-07, "loss": 0.1618, "step": 12725 }, { "epoch": 0.91, "grad_norm": 1.4286086054600553, "learning_rate": 2.095359408709674e-07, "loss": 0.1613, "step": 12726 }, { "epoch": 0.91, "grad_norm": 1.2315076608409024, "learning_rate": 2.0920423528537848e-07, "loss": 0.1501, "step": 12727 }, { "epoch": 0.91, "grad_norm": 1.3306083422762518, "learning_rate": 2.0887278685067348e-07, "loss": 0.1787, "step": 12728 }, { "epoch": 0.91, "grad_norm": 1.0917576309404418, "learning_rate": 2.0854159558464316e-07, "loss": 0.1468, "step": 12729 }, { "epoch": 0.91, "grad_norm": 1.4355306159506678, "learning_rate": 2.0821066150506497e-07, "loss": 0.1825, "step": 12730 }, { "epoch": 0.91, "grad_norm": 1.4181918270894824, "learning_rate": 2.078799846297014e-07, "loss": 0.1582, "step": 12731 }, { "epoch": 0.91, "grad_norm": 1.3544895023806705, "learning_rate": 2.0754956497630262e-07, "loss": 0.1685, "step": 12732 }, { "epoch": 0.91, "grad_norm": 1.3791696139548792, "learning_rate": 2.072194025626034e-07, "loss": 0.191, "step": 12733 }, { "epoch": 0.91, "grad_norm": 1.1845903456790687, "learning_rate": 2.068894974063257e-07, "loss": 0.1266, "step": 12734 }, { "epoch": 0.91, "grad_norm": 1.4915368865117113, "learning_rate": 2.0655984952517806e-07, "loss": 0.2032, "step": 12735 }, { "epoch": 0.91, "grad_norm": 1.244248709550514, "learning_rate": 2.0623045893685413e-07, "loss": 0.1474, "step": 12736 }, { "epoch": 0.91, "grad_norm": 1.3279174530285651, "learning_rate": 2.0590132565903475e-07, "loss": 0.1817, "step": 12737 }, { "epoch": 0.91, "grad_norm": 1.5801327434120815, "learning_rate": 2.0557244970938583e-07, "loss": 0.1975, "step": 12738 }, { "epoch": 0.91, "grad_norm": 1.4573533212946175, "learning_rate": 2.0524383110556156e-07, "loss": 0.1518, "step": 12739 }, { "epoch": 0.91, "grad_norm": 1.302924610356093, "learning_rate": 2.0491546986519896e-07, "loss": 0.1998, "step": 12740 }, { "epoch": 0.91, "grad_norm": 1.2155938743662618, "learning_rate": 2.04587366005925e-07, "loss": 0.1548, "step": 12741 }, { "epoch": 0.91, "grad_norm": 1.4973755541503786, "learning_rate": 2.0425951954534896e-07, "loss": 0.1731, "step": 12742 }, { "epoch": 0.91, "grad_norm": 1.2550542012626813, "learning_rate": 2.0393193050107064e-07, "loss": 0.1497, "step": 12743 }, { "epoch": 0.91, "grad_norm": 4.9313190551921675, "learning_rate": 2.0360459889067153e-07, "loss": 0.4868, "step": 12744 }, { "epoch": 0.91, "grad_norm": 1.3243342929166981, "learning_rate": 2.0327752473172256e-07, "loss": 0.1713, "step": 12745 }, { "epoch": 0.91, "grad_norm": 1.176956660120435, "learning_rate": 2.0295070804177974e-07, "loss": 0.1319, "step": 12746 }, { "epoch": 0.91, "grad_norm": 1.3982550048429367, "learning_rate": 2.0262414883838565e-07, "loss": 0.158, "step": 12747 }, { "epoch": 0.91, "grad_norm": 1.4570055716443895, "learning_rate": 2.0229784713906796e-07, "loss": 0.1825, "step": 12748 }, { "epoch": 0.91, "grad_norm": 1.1312398118018703, "learning_rate": 2.019718029613421e-07, "loss": 0.1762, "step": 12749 }, { "epoch": 0.91, "grad_norm": 1.4268647707584796, "learning_rate": 2.0164601632270797e-07, "loss": 0.195, "step": 12750 }, { "epoch": 0.91, "grad_norm": 1.3915743114432548, "learning_rate": 2.0132048724065323e-07, "loss": 0.1792, "step": 12751 }, { "epoch": 0.91, "grad_norm": 1.222932772242943, "learning_rate": 2.0099521573265e-07, "loss": 0.1559, "step": 12752 }, { "epoch": 0.91, "grad_norm": 1.5233677766233629, "learning_rate": 2.006702018161577e-07, "loss": 0.1866, "step": 12753 }, { "epoch": 0.91, "grad_norm": 1.3666646012117158, "learning_rate": 2.003454455086229e-07, "loss": 0.1668, "step": 12754 }, { "epoch": 0.91, "grad_norm": 1.3783722747849652, "learning_rate": 2.0002094682747663e-07, "loss": 0.1447, "step": 12755 }, { "epoch": 0.91, "grad_norm": 1.261074088016899, "learning_rate": 1.996967057901361e-07, "loss": 0.1507, "step": 12756 }, { "epoch": 0.91, "grad_norm": 1.2987466133276455, "learning_rate": 1.9937272241400685e-07, "loss": 0.2055, "step": 12757 }, { "epoch": 0.91, "grad_norm": 1.2160074800438412, "learning_rate": 1.9904899671647715e-07, "loss": 0.1551, "step": 12758 }, { "epoch": 0.91, "grad_norm": 1.2916984396955824, "learning_rate": 1.9872552871492478e-07, "loss": 0.158, "step": 12759 }, { "epoch": 0.91, "grad_norm": 1.4241466276475483, "learning_rate": 1.9840231842671087e-07, "loss": 0.194, "step": 12760 }, { "epoch": 0.91, "grad_norm": 1.3674066382133252, "learning_rate": 1.9807936586918485e-07, "loss": 0.167, "step": 12761 }, { "epoch": 0.91, "grad_norm": 14.339278699785922, "learning_rate": 1.977566710596812e-07, "loss": 0.478, "step": 12762 }, { "epoch": 0.91, "grad_norm": 1.3730802100404944, "learning_rate": 1.974342340155222e-07, "loss": 0.1869, "step": 12763 }, { "epoch": 0.91, "grad_norm": 1.393961963912052, "learning_rate": 1.9711205475401286e-07, "loss": 0.2109, "step": 12764 }, { "epoch": 0.91, "grad_norm": 1.3612032996038137, "learning_rate": 1.9679013329244766e-07, "loss": 0.1632, "step": 12765 }, { "epoch": 0.91, "grad_norm": 1.37050486797022, "learning_rate": 1.9646846964810672e-07, "loss": 0.1549, "step": 12766 }, { "epoch": 0.91, "grad_norm": 1.2643573707724556, "learning_rate": 1.9614706383825454e-07, "loss": 0.1857, "step": 12767 }, { "epoch": 0.91, "grad_norm": 1.5286755492706403, "learning_rate": 1.9582591588014344e-07, "loss": 0.1764, "step": 12768 }, { "epoch": 0.91, "grad_norm": 1.3630176807988492, "learning_rate": 1.955050257910107e-07, "loss": 0.1762, "step": 12769 }, { "epoch": 0.91, "grad_norm": 1.4368104550676726, "learning_rate": 1.951843935880815e-07, "loss": 0.1868, "step": 12770 }, { "epoch": 0.91, "grad_norm": 1.4312590684300504, "learning_rate": 1.948640192885648e-07, "loss": 0.1897, "step": 12771 }, { "epoch": 0.91, "grad_norm": 1.3363325117674558, "learning_rate": 1.9454390290965853e-07, "loss": 0.1665, "step": 12772 }, { "epoch": 0.91, "grad_norm": 1.1983137451812842, "learning_rate": 1.9422404446854393e-07, "loss": 0.1426, "step": 12773 }, { "epoch": 0.91, "grad_norm": 1.364344406207192, "learning_rate": 1.9390444398239117e-07, "loss": 0.1422, "step": 12774 }, { "epoch": 0.91, "grad_norm": 1.2735006406516576, "learning_rate": 1.9358510146835318e-07, "loss": 0.1645, "step": 12775 }, { "epoch": 0.91, "grad_norm": 6.42555570748825, "learning_rate": 1.9326601694357296e-07, "loss": 0.4525, "step": 12776 }, { "epoch": 0.91, "grad_norm": 1.3865371850103634, "learning_rate": 1.9294719042517617e-07, "loss": 0.1404, "step": 12777 }, { "epoch": 0.91, "grad_norm": 1.2844491232657287, "learning_rate": 1.92628621930277e-07, "loss": 0.1591, "step": 12778 }, { "epoch": 0.91, "grad_norm": 1.5943022315536373, "learning_rate": 1.9231031147597446e-07, "loss": 0.1891, "step": 12779 }, { "epoch": 0.91, "grad_norm": 1.1856540736482608, "learning_rate": 1.9199225907935492e-07, "loss": 0.1619, "step": 12780 }, { "epoch": 0.91, "grad_norm": 1.384493799844268, "learning_rate": 1.916744647574892e-07, "loss": 0.124, "step": 12781 }, { "epoch": 0.91, "grad_norm": 1.5264940808139795, "learning_rate": 1.9135692852743637e-07, "loss": 0.1865, "step": 12782 }, { "epoch": 0.91, "grad_norm": 1.3477894799329293, "learning_rate": 1.910396504062395e-07, "loss": 0.1455, "step": 12783 }, { "epoch": 0.91, "grad_norm": 1.4738063550447547, "learning_rate": 1.9072263041092998e-07, "loss": 0.1654, "step": 12784 }, { "epoch": 0.91, "grad_norm": 1.273004954345331, "learning_rate": 1.904058685585225e-07, "loss": 0.1499, "step": 12785 }, { "epoch": 0.91, "grad_norm": 1.3647244268406307, "learning_rate": 1.900893648660207e-07, "loss": 0.1918, "step": 12786 }, { "epoch": 0.91, "grad_norm": 1.3147723721912674, "learning_rate": 1.897731193504132e-07, "loss": 0.1733, "step": 12787 }, { "epoch": 0.91, "grad_norm": 1.5137023341174125, "learning_rate": 1.894571320286742e-07, "loss": 0.1452, "step": 12788 }, { "epoch": 0.91, "grad_norm": 1.2739711704495216, "learning_rate": 1.8914140291776516e-07, "loss": 0.184, "step": 12789 }, { "epoch": 0.91, "grad_norm": 1.317344106338329, "learning_rate": 1.888259320346336e-07, "loss": 0.189, "step": 12790 }, { "epoch": 0.91, "grad_norm": 1.3675545493382846, "learning_rate": 1.8851071939621157e-07, "loss": 0.1554, "step": 12791 }, { "epoch": 0.92, "grad_norm": 1.3532705112489385, "learning_rate": 1.8819576501941995e-07, "loss": 0.1442, "step": 12792 }, { "epoch": 0.92, "grad_norm": 1.320639052149005, "learning_rate": 1.8788106892116243e-07, "loss": 0.1368, "step": 12793 }, { "epoch": 0.92, "grad_norm": 1.4273980400305475, "learning_rate": 1.8756663111833274e-07, "loss": 0.1602, "step": 12794 }, { "epoch": 0.92, "grad_norm": 1.2804584621244166, "learning_rate": 1.8725245162780625e-07, "loss": 0.15, "step": 12795 }, { "epoch": 0.92, "grad_norm": 1.5179165168103461, "learning_rate": 1.8693853046644838e-07, "loss": 0.1871, "step": 12796 }, { "epoch": 0.92, "grad_norm": 1.5290061771363928, "learning_rate": 1.86624867651109e-07, "loss": 0.1785, "step": 12797 }, { "epoch": 0.92, "grad_norm": 1.3524513085804102, "learning_rate": 1.8631146319862459e-07, "loss": 0.1447, "step": 12798 }, { "epoch": 0.92, "grad_norm": 1.195468698768072, "learning_rate": 1.8599831712581728e-07, "loss": 0.1433, "step": 12799 }, { "epoch": 0.92, "grad_norm": 1.1825816397123174, "learning_rate": 1.8568542944949474e-07, "loss": 0.1728, "step": 12800 }, { "epoch": 0.92, "grad_norm": 4.882215261329323, "learning_rate": 1.8537280018645243e-07, "loss": 0.559, "step": 12801 }, { "epoch": 0.92, "grad_norm": 1.3086463135748683, "learning_rate": 1.8506042935347025e-07, "loss": 0.1493, "step": 12802 }, { "epoch": 0.92, "grad_norm": 8.433916015712827, "learning_rate": 1.8474831696731588e-07, "loss": 0.4228, "step": 12803 }, { "epoch": 0.92, "grad_norm": 1.4149473796727927, "learning_rate": 1.8443646304474206e-07, "loss": 0.2147, "step": 12804 }, { "epoch": 0.92, "grad_norm": 6.378455823490417, "learning_rate": 1.8412486760248705e-07, "loss": 0.5911, "step": 12805 }, { "epoch": 0.92, "grad_norm": 1.3333284540094448, "learning_rate": 1.8381353065727636e-07, "loss": 0.1965, "step": 12806 }, { "epoch": 0.92, "grad_norm": 1.3424792342914524, "learning_rate": 1.8350245222582274e-07, "loss": 0.1687, "step": 12807 }, { "epoch": 0.92, "grad_norm": 1.3298522610183259, "learning_rate": 1.831916323248223e-07, "loss": 0.1667, "step": 12808 }, { "epoch": 0.92, "grad_norm": 1.354789719890319, "learning_rate": 1.8288107097095887e-07, "loss": 0.1641, "step": 12809 }, { "epoch": 0.92, "grad_norm": 1.3871104225019073, "learning_rate": 1.8257076818090192e-07, "loss": 0.1926, "step": 12810 }, { "epoch": 0.92, "grad_norm": 1.2672795185829975, "learning_rate": 1.8226072397130755e-07, "loss": 0.1344, "step": 12811 }, { "epoch": 0.92, "grad_norm": 1.4348601102312597, "learning_rate": 1.819509383588175e-07, "loss": 0.1654, "step": 12812 }, { "epoch": 0.92, "grad_norm": 1.5196719280027633, "learning_rate": 1.816414113600601e-07, "loss": 0.1881, "step": 12813 }, { "epoch": 0.92, "grad_norm": 1.3362287003305124, "learning_rate": 1.8133214299164925e-07, "loss": 0.1777, "step": 12814 }, { "epoch": 0.92, "grad_norm": 1.2797063776665907, "learning_rate": 1.8102313327018617e-07, "loss": 0.2087, "step": 12815 }, { "epoch": 0.92, "grad_norm": 1.4723686324634557, "learning_rate": 1.807143822122559e-07, "loss": 0.1877, "step": 12816 }, { "epoch": 0.92, "grad_norm": 1.4246960496045753, "learning_rate": 1.8040588983443185e-07, "loss": 0.1662, "step": 12817 }, { "epoch": 0.92, "grad_norm": 1.568065016145491, "learning_rate": 1.800976561532719e-07, "loss": 0.1817, "step": 12818 }, { "epoch": 0.92, "grad_norm": 1.1955192728492101, "learning_rate": 1.7978968118532226e-07, "loss": 0.1448, "step": 12819 }, { "epoch": 0.92, "grad_norm": 1.4478351914573695, "learning_rate": 1.794819649471119e-07, "loss": 0.2073, "step": 12820 }, { "epoch": 0.92, "grad_norm": 1.4682813705173308, "learning_rate": 1.7917450745515874e-07, "loss": 0.2157, "step": 12821 }, { "epoch": 0.92, "grad_norm": 1.378405298776126, "learning_rate": 1.7886730872596625e-07, "loss": 0.1707, "step": 12822 }, { "epoch": 0.92, "grad_norm": 1.4207753909467031, "learning_rate": 1.7856036877602344e-07, "loss": 0.1865, "step": 12823 }, { "epoch": 0.92, "grad_norm": 1.5527661084262234, "learning_rate": 1.7825368762180496e-07, "loss": 0.1582, "step": 12824 }, { "epoch": 0.92, "grad_norm": 1.3120611712944024, "learning_rate": 1.779472652797737e-07, "loss": 0.1528, "step": 12825 }, { "epoch": 0.92, "grad_norm": 1.3689170371802326, "learning_rate": 1.776411017663754e-07, "loss": 0.1731, "step": 12826 }, { "epoch": 0.92, "grad_norm": 1.2977832575145447, "learning_rate": 1.7733519709804525e-07, "loss": 0.1533, "step": 12827 }, { "epoch": 0.92, "grad_norm": 1.5177522975788869, "learning_rate": 1.7702955129120125e-07, "loss": 0.1631, "step": 12828 }, { "epoch": 0.92, "grad_norm": 1.5444551598888603, "learning_rate": 1.7672416436225082e-07, "loss": 0.2274, "step": 12829 }, { "epoch": 0.92, "grad_norm": 1.3082135917193725, "learning_rate": 1.764190363275853e-07, "loss": 0.1905, "step": 12830 }, { "epoch": 0.92, "grad_norm": 1.2353417737363328, "learning_rate": 1.7611416720358266e-07, "loss": 0.1112, "step": 12831 }, { "epoch": 0.92, "grad_norm": 1.376544398794052, "learning_rate": 1.7580955700660762e-07, "loss": 0.1965, "step": 12832 }, { "epoch": 0.92, "grad_norm": 1.2546797642941734, "learning_rate": 1.755052057530099e-07, "loss": 0.1611, "step": 12833 }, { "epoch": 0.92, "grad_norm": 1.3318230988802007, "learning_rate": 1.7520111345912637e-07, "loss": 0.1901, "step": 12834 }, { "epoch": 0.92, "grad_norm": 1.1471092268359164, "learning_rate": 1.7489728014127905e-07, "loss": 0.1308, "step": 12835 }, { "epoch": 0.92, "grad_norm": 1.2609314058257748, "learning_rate": 1.745937058157765e-07, "loss": 0.1285, "step": 12836 }, { "epoch": 0.92, "grad_norm": 1.3912497761656284, "learning_rate": 1.742903904989135e-07, "loss": 0.1809, "step": 12837 }, { "epoch": 0.92, "grad_norm": 1.34030083386807, "learning_rate": 1.7398733420697033e-07, "loss": 0.175, "step": 12838 }, { "epoch": 0.92, "grad_norm": 1.3641488291375587, "learning_rate": 1.7368453695621456e-07, "loss": 0.1559, "step": 12839 }, { "epoch": 0.92, "grad_norm": 1.52920484075741, "learning_rate": 1.7338199876289984e-07, "loss": 0.2003, "step": 12840 }, { "epoch": 0.92, "grad_norm": 3.691867749837039, "learning_rate": 1.730797196432632e-07, "loss": 0.5169, "step": 12841 }, { "epoch": 0.92, "grad_norm": 1.5431078769645992, "learning_rate": 1.7277769961353165e-07, "loss": 0.1891, "step": 12842 }, { "epoch": 0.92, "grad_norm": 4.538845839325577, "learning_rate": 1.7247593868991496e-07, "loss": 0.4731, "step": 12843 }, { "epoch": 0.92, "grad_norm": 1.2867288000206403, "learning_rate": 1.721744368886119e-07, "loss": 0.1478, "step": 12844 }, { "epoch": 0.92, "grad_norm": 1.3257499301183564, "learning_rate": 1.7187319422580451e-07, "loss": 0.1589, "step": 12845 }, { "epoch": 0.92, "grad_norm": 4.905120048463217, "learning_rate": 1.7157221071766316e-07, "loss": 0.4166, "step": 12846 }, { "epoch": 0.92, "grad_norm": 1.2254771028913514, "learning_rate": 1.7127148638034274e-07, "loss": 0.1323, "step": 12847 }, { "epoch": 0.92, "grad_norm": 1.4773835885105777, "learning_rate": 1.7097102122998588e-07, "loss": 0.2281, "step": 12848 }, { "epoch": 0.92, "grad_norm": 1.2417969338478139, "learning_rate": 1.7067081528271966e-07, "loss": 0.1393, "step": 12849 }, { "epoch": 0.92, "grad_norm": 1.1834486968188433, "learning_rate": 1.7037086855465902e-07, "loss": 0.1433, "step": 12850 }, { "epoch": 0.92, "grad_norm": 1.3465693673928576, "learning_rate": 1.7007118106190213e-07, "loss": 0.1823, "step": 12851 }, { "epoch": 0.92, "grad_norm": 6.626746650203718, "learning_rate": 1.6977175282053672e-07, "loss": 0.5873, "step": 12852 }, { "epoch": 0.92, "grad_norm": 1.437169898472335, "learning_rate": 1.6947258384663322e-07, "loss": 0.1736, "step": 12853 }, { "epoch": 0.92, "grad_norm": 1.38632346129207, "learning_rate": 1.6917367415625107e-07, "loss": 0.1793, "step": 12854 }, { "epoch": 0.92, "grad_norm": 1.246604364869626, "learning_rate": 1.6887502376543407e-07, "loss": 0.1614, "step": 12855 }, { "epoch": 0.92, "grad_norm": 1.4662093112879977, "learning_rate": 1.685766326902133e-07, "loss": 0.1781, "step": 12856 }, { "epoch": 0.92, "grad_norm": 1.5890817347206132, "learning_rate": 1.6827850094660424e-07, "loss": 0.2021, "step": 12857 }, { "epoch": 0.92, "grad_norm": 1.3837649229609714, "learning_rate": 1.6798062855061025e-07, "loss": 0.1832, "step": 12858 }, { "epoch": 0.92, "grad_norm": 1.367038361183578, "learning_rate": 1.6768301551821908e-07, "loss": 0.1491, "step": 12859 }, { "epoch": 0.92, "grad_norm": 1.3541996635276008, "learning_rate": 1.6738566186540628e-07, "loss": 0.1538, "step": 12860 }, { "epoch": 0.92, "grad_norm": 1.2440045619088251, "learning_rate": 1.6708856760813187e-07, "loss": 0.1324, "step": 12861 }, { "epoch": 0.92, "grad_norm": 1.5797297998894513, "learning_rate": 1.667917327623425e-07, "loss": 0.1566, "step": 12862 }, { "epoch": 0.92, "grad_norm": 1.2573092740785436, "learning_rate": 1.6649515734397214e-07, "loss": 0.1422, "step": 12863 }, { "epoch": 0.92, "grad_norm": 11.432367036544106, "learning_rate": 1.6619884136893915e-07, "loss": 0.5574, "step": 12864 }, { "epoch": 0.92, "grad_norm": 1.163731921719996, "learning_rate": 1.6590278485314914e-07, "loss": 0.161, "step": 12865 }, { "epoch": 0.92, "grad_norm": 1.3408682819827338, "learning_rate": 1.6560698781249218e-07, "loss": 0.1515, "step": 12866 }, { "epoch": 0.92, "grad_norm": 1.245681832770281, "learning_rate": 1.6531145026284667e-07, "loss": 0.1698, "step": 12867 }, { "epoch": 0.92, "grad_norm": 1.4037636132878548, "learning_rate": 1.6501617222007492e-07, "loss": 0.1935, "step": 12868 }, { "epoch": 0.92, "grad_norm": 1.5000233103807037, "learning_rate": 1.6472115370002705e-07, "loss": 0.1645, "step": 12869 }, { "epoch": 0.92, "grad_norm": 3.916082454841228, "learning_rate": 1.6442639471853762e-07, "loss": 0.4796, "step": 12870 }, { "epoch": 0.92, "grad_norm": 4.611564504019174, "learning_rate": 1.6413189529142838e-07, "loss": 0.6148, "step": 12871 }, { "epoch": 0.92, "grad_norm": 1.5505699569923206, "learning_rate": 1.6383765543450724e-07, "loss": 0.1863, "step": 12872 }, { "epoch": 0.92, "grad_norm": 1.4012195447185236, "learning_rate": 1.635436751635683e-07, "loss": 0.2008, "step": 12873 }, { "epoch": 0.92, "grad_norm": 1.5143417399237455, "learning_rate": 1.6324995449439053e-07, "loss": 0.1381, "step": 12874 }, { "epoch": 0.92, "grad_norm": 6.732242935702377, "learning_rate": 1.6295649344273968e-07, "loss": 0.4083, "step": 12875 }, { "epoch": 0.92, "grad_norm": 1.4580546232263518, "learning_rate": 1.6266329202436758e-07, "loss": 0.2346, "step": 12876 }, { "epoch": 0.92, "grad_norm": 1.1821854518223598, "learning_rate": 1.623703502550128e-07, "loss": 0.1464, "step": 12877 }, { "epoch": 0.92, "grad_norm": 1.6275301531509294, "learning_rate": 1.620776681503977e-07, "loss": 0.2043, "step": 12878 }, { "epoch": 0.92, "grad_norm": 1.4274674232524396, "learning_rate": 1.6178524572623367e-07, "loss": 0.137, "step": 12879 }, { "epoch": 0.92, "grad_norm": 1.2871975339199262, "learning_rate": 1.6149308299821643e-07, "loss": 0.1903, "step": 12880 }, { "epoch": 0.92, "grad_norm": 1.4624109661668023, "learning_rate": 1.61201179982029e-07, "loss": 0.1624, "step": 12881 }, { "epoch": 0.92, "grad_norm": 1.4062327411793167, "learning_rate": 1.6090953669333775e-07, "loss": 0.138, "step": 12882 }, { "epoch": 0.92, "grad_norm": 1.2791369612992154, "learning_rate": 1.606181531477985e-07, "loss": 0.1568, "step": 12883 }, { "epoch": 0.92, "grad_norm": 1.3992412508669223, "learning_rate": 1.603270293610504e-07, "loss": 0.1435, "step": 12884 }, { "epoch": 0.92, "grad_norm": 1.3364573951294005, "learning_rate": 1.600361653487209e-07, "loss": 0.1704, "step": 12885 }, { "epoch": 0.92, "grad_norm": 1.4880208469135259, "learning_rate": 1.5974556112642147e-07, "loss": 0.1942, "step": 12886 }, { "epoch": 0.92, "grad_norm": 1.3665210421700824, "learning_rate": 1.5945521670975128e-07, "loss": 0.1728, "step": 12887 }, { "epoch": 0.92, "grad_norm": 1.3651330828997572, "learning_rate": 1.591651321142945e-07, "loss": 0.192, "step": 12888 }, { "epoch": 0.92, "grad_norm": 7.3734949761380655, "learning_rate": 1.5887530735562152e-07, "loss": 0.5842, "step": 12889 }, { "epoch": 0.92, "grad_norm": 1.39587384754957, "learning_rate": 1.5858574244928926e-07, "loss": 0.19, "step": 12890 }, { "epoch": 0.92, "grad_norm": 1.083456310151006, "learning_rate": 1.582964374108409e-07, "loss": 0.1289, "step": 12891 }, { "epoch": 0.92, "grad_norm": 1.2269041243617134, "learning_rate": 1.5800739225580452e-07, "loss": 0.1427, "step": 12892 }, { "epoch": 0.92, "grad_norm": 1.3823273105647054, "learning_rate": 1.5771860699969445e-07, "loss": 0.1472, "step": 12893 }, { "epoch": 0.92, "grad_norm": 1.6096440171397919, "learning_rate": 1.574300816580132e-07, "loss": 0.1607, "step": 12894 }, { "epoch": 0.92, "grad_norm": 1.2169577171172035, "learning_rate": 1.5714181624624625e-07, "loss": 0.1522, "step": 12895 }, { "epoch": 0.92, "grad_norm": 1.2899281018294328, "learning_rate": 1.5685381077986727e-07, "loss": 0.1369, "step": 12896 }, { "epoch": 0.92, "grad_norm": 1.3504956597421125, "learning_rate": 1.565660652743345e-07, "loss": 0.1625, "step": 12897 }, { "epoch": 0.92, "grad_norm": 1.4600053487346356, "learning_rate": 1.562785797450933e-07, "loss": 0.1768, "step": 12898 }, { "epoch": 0.92, "grad_norm": 1.350876291909039, "learning_rate": 1.559913542075747e-07, "loss": 0.1684, "step": 12899 }, { "epoch": 0.92, "grad_norm": 1.492894935337053, "learning_rate": 1.5570438867719695e-07, "loss": 0.1697, "step": 12900 }, { "epoch": 0.92, "grad_norm": 1.4524982054585263, "learning_rate": 1.5541768316936157e-07, "loss": 0.1975, "step": 12901 }, { "epoch": 0.92, "grad_norm": 6.148422425918754, "learning_rate": 1.5513123769945847e-07, "loss": 0.5124, "step": 12902 }, { "epoch": 0.92, "grad_norm": 1.3452216940275108, "learning_rate": 1.5484505228286317e-07, "loss": 0.1688, "step": 12903 }, { "epoch": 0.92, "grad_norm": 1.3194292911203649, "learning_rate": 1.5455912693493668e-07, "loss": 0.1784, "step": 12904 }, { "epoch": 0.92, "grad_norm": 1.3770426527454505, "learning_rate": 1.5427346167102564e-07, "loss": 0.1713, "step": 12905 }, { "epoch": 0.92, "grad_norm": 1.3357997850634453, "learning_rate": 1.5398805650646443e-07, "loss": 0.1716, "step": 12906 }, { "epoch": 0.92, "grad_norm": 1.3538916176121085, "learning_rate": 1.5370291145657247e-07, "loss": 0.1634, "step": 12907 }, { "epoch": 0.92, "grad_norm": 1.367050146082185, "learning_rate": 1.5341802653665527e-07, "loss": 0.1646, "step": 12908 }, { "epoch": 0.92, "grad_norm": 1.3863426338217126, "learning_rate": 1.5313340176200285e-07, "loss": 0.1965, "step": 12909 }, { "epoch": 0.92, "grad_norm": 1.3606178826830897, "learning_rate": 1.5284903714789523e-07, "loss": 0.1785, "step": 12910 }, { "epoch": 0.92, "grad_norm": 1.4613775082693043, "learning_rate": 1.525649327095935e-07, "loss": 0.1677, "step": 12911 }, { "epoch": 0.92, "grad_norm": 1.5540744289976354, "learning_rate": 1.522810884623488e-07, "loss": 0.1972, "step": 12912 }, { "epoch": 0.92, "grad_norm": 1.2192449938417365, "learning_rate": 1.5199750442139615e-07, "loss": 0.1488, "step": 12913 }, { "epoch": 0.92, "grad_norm": 12.703233971197784, "learning_rate": 1.517141806019573e-07, "loss": 0.5076, "step": 12914 }, { "epoch": 0.92, "grad_norm": 1.330231492019149, "learning_rate": 1.5143111701924006e-07, "loss": 0.1505, "step": 12915 }, { "epoch": 0.92, "grad_norm": 11.610203280691792, "learning_rate": 1.511483136884384e-07, "loss": 0.7352, "step": 12916 }, { "epoch": 0.92, "grad_norm": 1.2303204031852368, "learning_rate": 1.5086577062473128e-07, "loss": 0.1521, "step": 12917 }, { "epoch": 0.92, "grad_norm": 1.311486357413655, "learning_rate": 1.5058348784328547e-07, "loss": 0.146, "step": 12918 }, { "epoch": 0.92, "grad_norm": 1.3313614159749305, "learning_rate": 1.5030146535925217e-07, "loss": 0.1771, "step": 12919 }, { "epoch": 0.92, "grad_norm": 1.7519745892173373, "learning_rate": 1.500197031877698e-07, "loss": 0.1779, "step": 12920 }, { "epoch": 0.92, "grad_norm": 1.3438452715000122, "learning_rate": 1.4973820134396079e-07, "loss": 0.1651, "step": 12921 }, { "epoch": 0.92, "grad_norm": 1.5677381084250293, "learning_rate": 1.4945695984293685e-07, "loss": 0.1817, "step": 12922 }, { "epoch": 0.92, "grad_norm": 3.8272386030734693, "learning_rate": 1.491759786997926e-07, "loss": 0.4609, "step": 12923 }, { "epoch": 0.92, "grad_norm": 1.4328138791713092, "learning_rate": 1.4889525792961103e-07, "loss": 0.1798, "step": 12924 }, { "epoch": 0.92, "grad_norm": 5.339541059012164, "learning_rate": 1.4861479754745943e-07, "loss": 0.4788, "step": 12925 }, { "epoch": 0.92, "grad_norm": 1.350559882043178, "learning_rate": 1.483345975683914e-07, "loss": 0.1603, "step": 12926 }, { "epoch": 0.92, "grad_norm": 8.379839200113693, "learning_rate": 1.4805465800744877e-07, "loss": 0.4229, "step": 12927 }, { "epoch": 0.92, "grad_norm": 1.3792472209690887, "learning_rate": 1.4777497887965508e-07, "loss": 0.1896, "step": 12928 }, { "epoch": 0.92, "grad_norm": 1.36359365528784, "learning_rate": 1.4749556020002443e-07, "loss": 0.1766, "step": 12929 }, { "epoch": 0.92, "grad_norm": 1.326540428353443, "learning_rate": 1.4721640198355424e-07, "loss": 0.2028, "step": 12930 }, { "epoch": 0.92, "grad_norm": 1.4913169154942265, "learning_rate": 1.4693750424522757e-07, "loss": 0.1711, "step": 12931 }, { "epoch": 0.93, "grad_norm": 1.39884134525124, "learning_rate": 1.4665886700001632e-07, "loss": 0.1852, "step": 12932 }, { "epoch": 0.93, "grad_norm": 1.323525708225339, "learning_rate": 1.4638049026287572e-07, "loss": 0.1414, "step": 12933 }, { "epoch": 0.93, "grad_norm": 1.0932652740903774, "learning_rate": 1.4610237404874773e-07, "loss": 0.1265, "step": 12934 }, { "epoch": 0.93, "grad_norm": 1.4207941555917307, "learning_rate": 1.4582451837256095e-07, "loss": 0.1636, "step": 12935 }, { "epoch": 0.93, "grad_norm": 1.40865501645935, "learning_rate": 1.4554692324922903e-07, "loss": 0.193, "step": 12936 }, { "epoch": 0.93, "grad_norm": 1.5067522653309107, "learning_rate": 1.4526958869365338e-07, "loss": 0.1843, "step": 12937 }, { "epoch": 0.93, "grad_norm": 1.3997914897268995, "learning_rate": 1.4499251472071873e-07, "loss": 0.1569, "step": 12938 }, { "epoch": 0.93, "grad_norm": 1.5249571025971718, "learning_rate": 1.4471570134529768e-07, "loss": 0.1998, "step": 12939 }, { "epoch": 0.93, "grad_norm": 1.4433698743624355, "learning_rate": 1.4443914858224938e-07, "loss": 0.1479, "step": 12940 }, { "epoch": 0.93, "grad_norm": 1.5523992832047322, "learning_rate": 1.441628564464176e-07, "loss": 0.1948, "step": 12941 }, { "epoch": 0.93, "grad_norm": 1.6378516115813424, "learning_rate": 1.438868249526315e-07, "loss": 0.2051, "step": 12942 }, { "epoch": 0.93, "grad_norm": 1.2128528613426908, "learning_rate": 1.436110541157093e-07, "loss": 0.1391, "step": 12943 }, { "epoch": 0.93, "grad_norm": 1.9841802211260668, "learning_rate": 1.433355439504519e-07, "loss": 0.1794, "step": 12944 }, { "epoch": 0.93, "grad_norm": 1.3325814272476917, "learning_rate": 1.4306029447164858e-07, "loss": 0.1966, "step": 12945 }, { "epoch": 0.93, "grad_norm": 1.347697119996661, "learning_rate": 1.4278530569407257e-07, "loss": 0.1622, "step": 12946 }, { "epoch": 0.93, "grad_norm": 1.2773133500884537, "learning_rate": 1.425105776324842e-07, "loss": 0.1969, "step": 12947 }, { "epoch": 0.93, "grad_norm": 1.356618109075472, "learning_rate": 1.4223611030163064e-07, "loss": 0.1892, "step": 12948 }, { "epoch": 0.93, "grad_norm": 1.4095478445827514, "learning_rate": 1.4196190371624397e-07, "loss": 0.1879, "step": 12949 }, { "epoch": 0.93, "grad_norm": 7.097802976395776, "learning_rate": 1.4168795789104184e-07, "loss": 0.6606, "step": 12950 }, { "epoch": 0.93, "grad_norm": 12.31442018266902, "learning_rate": 1.4141427284072973e-07, "loss": 0.7216, "step": 12951 }, { "epoch": 0.93, "grad_norm": 1.2610130126756305, "learning_rate": 1.4114084857999698e-07, "loss": 0.1399, "step": 12952 }, { "epoch": 0.93, "grad_norm": 1.4560254381956967, "learning_rate": 1.4086768512352079e-07, "loss": 0.1583, "step": 12953 }, { "epoch": 0.93, "grad_norm": 1.3040342171574046, "learning_rate": 1.4059478248596215e-07, "loss": 0.1184, "step": 12954 }, { "epoch": 0.93, "grad_norm": 1.3316654826481873, "learning_rate": 1.4032214068197048e-07, "loss": 0.19, "step": 12955 }, { "epoch": 0.93, "grad_norm": 1.3402568257667655, "learning_rate": 1.4004975972617962e-07, "loss": 0.142, "step": 12956 }, { "epoch": 0.93, "grad_norm": 1.160935222611165, "learning_rate": 1.397776396332101e-07, "loss": 0.1529, "step": 12957 }, { "epoch": 0.93, "grad_norm": 1.5175251881684066, "learning_rate": 1.3950578041766917e-07, "loss": 0.1673, "step": 12958 }, { "epoch": 0.93, "grad_norm": 4.786461694698364, "learning_rate": 1.392341820941473e-07, "loss": 0.6165, "step": 12959 }, { "epoch": 0.93, "grad_norm": 1.3569909910398614, "learning_rate": 1.3896284467722398e-07, "loss": 0.1821, "step": 12960 }, { "epoch": 0.93, "grad_norm": 9.975597444480245, "learning_rate": 1.3869176818146314e-07, "loss": 0.5669, "step": 12961 }, { "epoch": 0.93, "grad_norm": 1.3533362659141104, "learning_rate": 1.3842095262141585e-07, "loss": 0.15, "step": 12962 }, { "epoch": 0.93, "grad_norm": 7.960541401546087, "learning_rate": 1.3815039801161723e-07, "loss": 0.3474, "step": 12963 }, { "epoch": 0.93, "grad_norm": 1.2680083681014855, "learning_rate": 1.3788010436659006e-07, "loss": 0.1714, "step": 12964 }, { "epoch": 0.93, "grad_norm": 5.8324390497248215, "learning_rate": 1.3761007170084273e-07, "loss": 0.5217, "step": 12965 }, { "epoch": 0.93, "grad_norm": 1.3337206639634662, "learning_rate": 1.3734030002886978e-07, "loss": 0.1924, "step": 12966 }, { "epoch": 0.93, "grad_norm": 1.3467838888854387, "learning_rate": 1.3707078936515071e-07, "loss": 0.1906, "step": 12967 }, { "epoch": 0.93, "grad_norm": 5.44934871803956, "learning_rate": 1.3680153972415343e-07, "loss": 0.5684, "step": 12968 }, { "epoch": 0.93, "grad_norm": 1.4529880658353362, "learning_rate": 1.3653255112032803e-07, "loss": 0.1908, "step": 12969 }, { "epoch": 0.93, "grad_norm": 1.3412948556016269, "learning_rate": 1.3626382356811408e-07, "loss": 0.1447, "step": 12970 }, { "epoch": 0.93, "grad_norm": 1.2658208042288468, "learning_rate": 1.35995357081935e-07, "loss": 0.1814, "step": 12971 }, { "epoch": 0.93, "grad_norm": 1.4584534496212038, "learning_rate": 1.357271516762021e-07, "loss": 0.1586, "step": 12972 }, { "epoch": 0.93, "grad_norm": 1.4108451912653879, "learning_rate": 1.354592073653105e-07, "loss": 0.2214, "step": 12973 }, { "epoch": 0.93, "grad_norm": 1.63946851920727, "learning_rate": 1.3519152416364312e-07, "loss": 0.2029, "step": 12974 }, { "epoch": 0.93, "grad_norm": 5.393962865895861, "learning_rate": 1.3492410208556795e-07, "loss": 0.6108, "step": 12975 }, { "epoch": 0.93, "grad_norm": 1.536757571289754, "learning_rate": 1.3465694114543903e-07, "loss": 0.1624, "step": 12976 }, { "epoch": 0.93, "grad_norm": 1.2069847101656013, "learning_rate": 1.343900413575966e-07, "loss": 0.1486, "step": 12977 }, { "epoch": 0.93, "grad_norm": 1.304099853911999, "learning_rate": 1.3412340273636636e-07, "loss": 0.1323, "step": 12978 }, { "epoch": 0.93, "grad_norm": 1.2626633941477252, "learning_rate": 1.3385702529606082e-07, "loss": 0.1864, "step": 12979 }, { "epoch": 0.93, "grad_norm": 1.3512974962584914, "learning_rate": 1.335909090509785e-07, "loss": 0.1931, "step": 12980 }, { "epoch": 0.93, "grad_norm": 1.3962725520776282, "learning_rate": 1.3332505401540186e-07, "loss": 0.1736, "step": 12981 }, { "epoch": 0.93, "grad_norm": 1.3317482919784012, "learning_rate": 1.3305946020360338e-07, "loss": 0.1753, "step": 12982 }, { "epoch": 0.93, "grad_norm": 1.3582307673986647, "learning_rate": 1.3279412762983668e-07, "loss": 0.1698, "step": 12983 }, { "epoch": 0.93, "grad_norm": 14.790905680492724, "learning_rate": 1.325290563083459e-07, "loss": 0.4909, "step": 12984 }, { "epoch": 0.93, "grad_norm": 1.5316658739383067, "learning_rate": 1.3226424625335686e-07, "loss": 0.1684, "step": 12985 }, { "epoch": 0.93, "grad_norm": 1.3640900131410525, "learning_rate": 1.3199969747908546e-07, "loss": 0.1767, "step": 12986 }, { "epoch": 0.93, "grad_norm": 1.0082411426812892, "learning_rate": 1.3173540999973034e-07, "loss": 0.0983, "step": 12987 }, { "epoch": 0.93, "grad_norm": 1.6212404149523039, "learning_rate": 1.3147138382947788e-07, "loss": 0.1743, "step": 12988 }, { "epoch": 0.93, "grad_norm": 1.3052263993813262, "learning_rate": 1.3120761898250013e-07, "loss": 0.153, "step": 12989 }, { "epoch": 0.93, "grad_norm": 1.388540708638378, "learning_rate": 1.3094411547295406e-07, "loss": 0.1475, "step": 12990 }, { "epoch": 0.93, "grad_norm": 1.3532659090054722, "learning_rate": 1.3068087331498447e-07, "loss": 0.1556, "step": 12991 }, { "epoch": 0.93, "grad_norm": 1.30881558198817, "learning_rate": 1.3041789252272063e-07, "loss": 0.1464, "step": 12992 }, { "epoch": 0.93, "grad_norm": 1.392115111204779, "learning_rate": 1.3015517311027904e-07, "loss": 0.1915, "step": 12993 }, { "epoch": 0.93, "grad_norm": 1.2764244182103412, "learning_rate": 1.2989271509176004e-07, "loss": 0.157, "step": 12994 }, { "epoch": 0.93, "grad_norm": 1.271604139215707, "learning_rate": 1.2963051848125352e-07, "loss": 0.1547, "step": 12995 }, { "epoch": 0.93, "grad_norm": 4.703866424221616, "learning_rate": 1.2936858329283043e-07, "loss": 0.6039, "step": 12996 }, { "epoch": 0.93, "grad_norm": 1.329588261496409, "learning_rate": 1.2910690954055282e-07, "loss": 0.1415, "step": 12997 }, { "epoch": 0.93, "grad_norm": 5.415173397206805, "learning_rate": 1.2884549723846451e-07, "loss": 0.6107, "step": 12998 }, { "epoch": 0.93, "grad_norm": 1.342267609363397, "learning_rate": 1.2858434640059814e-07, "loss": 0.1666, "step": 12999 }, { "epoch": 0.93, "grad_norm": 1.304897421337096, "learning_rate": 1.2832345704097082e-07, "loss": 0.1644, "step": 13000 }, { "epoch": 0.93, "grad_norm": 1.4212017982709633, "learning_rate": 1.2806282917358692e-07, "loss": 0.1751, "step": 13001 }, { "epoch": 0.93, "grad_norm": 1.3177373540991115, "learning_rate": 1.2780246281243414e-07, "loss": 0.1407, "step": 13002 }, { "epoch": 0.93, "grad_norm": 1.4478646716571963, "learning_rate": 1.275423579714896e-07, "loss": 0.1891, "step": 13003 }, { "epoch": 0.93, "grad_norm": 1.1882960944908658, "learning_rate": 1.2728251466471387e-07, "loss": 0.1588, "step": 13004 }, { "epoch": 0.93, "grad_norm": 1.399788493914658, "learning_rate": 1.270229329060546e-07, "loss": 0.1692, "step": 13005 }, { "epoch": 0.93, "grad_norm": 1.269340039774937, "learning_rate": 1.267636127094446e-07, "loss": 0.1525, "step": 13006 }, { "epoch": 0.93, "grad_norm": 1.2735409203175627, "learning_rate": 1.265045540888038e-07, "loss": 0.142, "step": 13007 }, { "epoch": 0.93, "grad_norm": 1.4037757526766568, "learning_rate": 1.2624575705803666e-07, "loss": 0.1597, "step": 13008 }, { "epoch": 0.93, "grad_norm": 7.883200284065154, "learning_rate": 1.2598722163103593e-07, "loss": 0.4594, "step": 13009 }, { "epoch": 0.93, "grad_norm": 1.400190402513513, "learning_rate": 1.2572894782167667e-07, "loss": 0.1355, "step": 13010 }, { "epoch": 0.93, "grad_norm": 1.4207291454800643, "learning_rate": 1.2547093564382384e-07, "loss": 0.1754, "step": 13011 }, { "epoch": 0.93, "grad_norm": 1.4315867529724424, "learning_rate": 1.252131851113253e-07, "loss": 0.1528, "step": 13012 }, { "epoch": 0.93, "grad_norm": 1.458978036337528, "learning_rate": 1.249556962380166e-07, "loss": 0.1596, "step": 13013 }, { "epoch": 0.93, "grad_norm": 1.3523558623896803, "learning_rate": 1.2469846903771843e-07, "loss": 0.1627, "step": 13014 }, { "epoch": 0.93, "grad_norm": 1.5271121669354406, "learning_rate": 1.24441503524238e-07, "loss": 0.1558, "step": 13015 }, { "epoch": 0.93, "grad_norm": 1.3453344696713097, "learning_rate": 1.2418479971136822e-07, "loss": 0.1551, "step": 13016 }, { "epoch": 0.93, "grad_norm": 1.332534543665476, "learning_rate": 1.2392835761288802e-07, "loss": 0.1338, "step": 13017 }, { "epoch": 0.93, "grad_norm": 1.2941825192215042, "learning_rate": 1.2367217724256141e-07, "loss": 0.1598, "step": 13018 }, { "epoch": 0.93, "grad_norm": 11.537320213274038, "learning_rate": 1.234162586141402e-07, "loss": 0.6438, "step": 13019 }, { "epoch": 0.93, "grad_norm": 1.4544338958215004, "learning_rate": 1.2316060174136e-07, "loss": 0.1787, "step": 13020 }, { "epoch": 0.93, "grad_norm": 4.747512231125022, "learning_rate": 1.2290520663794435e-07, "loss": 0.4764, "step": 13021 }, { "epoch": 0.93, "grad_norm": 7.420694078192601, "learning_rate": 1.226500733176017e-07, "loss": 0.5657, "step": 13022 }, { "epoch": 0.93, "grad_norm": 1.2658350758569819, "learning_rate": 1.223952017940261e-07, "loss": 0.1225, "step": 13023 }, { "epoch": 0.93, "grad_norm": 1.2537207318497374, "learning_rate": 1.2214059208089825e-07, "loss": 0.1565, "step": 13024 }, { "epoch": 0.93, "grad_norm": 1.3872971259085132, "learning_rate": 1.2188624419188499e-07, "loss": 0.1808, "step": 13025 }, { "epoch": 0.93, "grad_norm": 1.2658627968217848, "learning_rate": 1.216321581406388e-07, "loss": 0.1321, "step": 13026 }, { "epoch": 0.93, "grad_norm": 6.624023964992862, "learning_rate": 1.2137833394079701e-07, "loss": 0.583, "step": 13027 }, { "epoch": 0.93, "grad_norm": 1.3453485750899448, "learning_rate": 1.2112477160598546e-07, "loss": 0.162, "step": 13028 }, { "epoch": 0.93, "grad_norm": 1.4765884147791575, "learning_rate": 1.2087147114981214e-07, "loss": 0.1821, "step": 13029 }, { "epoch": 0.93, "grad_norm": 1.3711788968569627, "learning_rate": 1.2061843258587557e-07, "loss": 0.166, "step": 13030 }, { "epoch": 0.93, "grad_norm": 9.894817986312573, "learning_rate": 1.2036565592775606e-07, "loss": 0.5937, "step": 13031 }, { "epoch": 0.93, "grad_norm": 1.3342730369248716, "learning_rate": 1.2011314118902273e-07, "loss": 0.1907, "step": 13032 }, { "epoch": 0.93, "grad_norm": 1.3948150810909734, "learning_rate": 1.1986088838322919e-07, "loss": 0.1707, "step": 13033 }, { "epoch": 0.93, "grad_norm": 1.2974649541602692, "learning_rate": 1.1960889752391512e-07, "loss": 0.1683, "step": 13034 }, { "epoch": 0.93, "grad_norm": 1.2191271656826363, "learning_rate": 1.19357168624607e-07, "loss": 0.1342, "step": 13035 }, { "epoch": 0.93, "grad_norm": 1.50397157272611, "learning_rate": 1.1910570169881675e-07, "loss": 0.1661, "step": 13036 }, { "epoch": 0.93, "grad_norm": 1.2310629315843262, "learning_rate": 1.188544967600408e-07, "loss": 0.1559, "step": 13037 }, { "epoch": 0.93, "grad_norm": 1.2869831494558506, "learning_rate": 1.1860355382176447e-07, "loss": 0.1435, "step": 13038 }, { "epoch": 0.93, "grad_norm": 1.508035417979797, "learning_rate": 1.1835287289745589e-07, "loss": 0.2021, "step": 13039 }, { "epoch": 0.93, "grad_norm": 1.4614234319272879, "learning_rate": 1.1810245400057152e-07, "loss": 0.1846, "step": 13040 }, { "epoch": 0.93, "grad_norm": 9.24864616097747, "learning_rate": 1.1785229714455227e-07, "loss": 0.675, "step": 13041 }, { "epoch": 0.93, "grad_norm": 1.3413861124674022, "learning_rate": 1.1760240234282683e-07, "loss": 0.1278, "step": 13042 }, { "epoch": 0.93, "grad_norm": 1.3674041012899993, "learning_rate": 1.1735276960880726e-07, "loss": 0.1646, "step": 13043 }, { "epoch": 0.93, "grad_norm": 1.3581006225710643, "learning_rate": 1.171033989558934e-07, "loss": 0.1754, "step": 13044 }, { "epoch": 0.93, "grad_norm": 1.3013411966835415, "learning_rate": 1.1685429039747065e-07, "loss": 0.1612, "step": 13045 }, { "epoch": 0.93, "grad_norm": 1.478292729082162, "learning_rate": 1.1660544394690943e-07, "loss": 0.2076, "step": 13046 }, { "epoch": 0.93, "grad_norm": 1.2814085519438763, "learning_rate": 1.1635685961756737e-07, "loss": 0.1709, "step": 13047 }, { "epoch": 0.93, "grad_norm": 1.445485746596271, "learning_rate": 1.1610853742278716e-07, "loss": 0.1568, "step": 13048 }, { "epoch": 0.93, "grad_norm": 1.2202765683122585, "learning_rate": 1.1586047737589811e-07, "loss": 0.1518, "step": 13049 }, { "epoch": 0.93, "grad_norm": 1.314784589660231, "learning_rate": 1.156126794902157e-07, "loss": 0.1313, "step": 13050 }, { "epoch": 0.93, "grad_norm": 1.3029084909703432, "learning_rate": 1.1536514377903929e-07, "loss": 0.1637, "step": 13051 }, { "epoch": 0.93, "grad_norm": 1.4339914262765812, "learning_rate": 1.1511787025565657e-07, "loss": 0.1434, "step": 13052 }, { "epoch": 0.93, "grad_norm": 1.2844191853008895, "learning_rate": 1.1487085893333971e-07, "loss": 0.1312, "step": 13053 }, { "epoch": 0.93, "grad_norm": 8.029903553624552, "learning_rate": 1.1462410982534755e-07, "loss": 0.6679, "step": 13054 }, { "epoch": 0.93, "grad_norm": 1.2381020716243722, "learning_rate": 1.1437762294492505e-07, "loss": 0.1691, "step": 13055 }, { "epoch": 0.93, "grad_norm": 1.35257495098502, "learning_rate": 1.1413139830530218e-07, "loss": 0.1633, "step": 13056 }, { "epoch": 0.93, "grad_norm": 1.4669209733120736, "learning_rate": 1.1388543591969447e-07, "loss": 0.1659, "step": 13057 }, { "epoch": 0.93, "grad_norm": 1.461701080708909, "learning_rate": 1.1363973580130583e-07, "loss": 0.172, "step": 13058 }, { "epoch": 0.93, "grad_norm": 1.5460519686090854, "learning_rate": 1.1339429796332346e-07, "loss": 0.166, "step": 13059 }, { "epoch": 0.93, "grad_norm": 1.531885829681735, "learning_rate": 1.1314912241892184e-07, "loss": 0.1714, "step": 13060 }, { "epoch": 0.93, "grad_norm": 4.808847205768016, "learning_rate": 1.12904209181261e-07, "loss": 0.5302, "step": 13061 }, { "epoch": 0.93, "grad_norm": 1.3035162116687118, "learning_rate": 1.1265955826348651e-07, "loss": 0.1274, "step": 13062 }, { "epoch": 0.93, "grad_norm": 1.373753561739822, "learning_rate": 1.1241516967873123e-07, "loss": 0.1557, "step": 13063 }, { "epoch": 0.93, "grad_norm": 1.3210867360016143, "learning_rate": 1.1217104344011188e-07, "loss": 0.1594, "step": 13064 }, { "epoch": 0.93, "grad_norm": 1.2385621757311034, "learning_rate": 1.1192717956073185e-07, "loss": 0.1477, "step": 13065 }, { "epoch": 0.93, "grad_norm": 1.167584155431501, "learning_rate": 1.1168357805368235e-07, "loss": 0.1785, "step": 13066 }, { "epoch": 0.93, "grad_norm": 1.3436188375363232, "learning_rate": 1.1144023893203793e-07, "loss": 0.1818, "step": 13067 }, { "epoch": 0.93, "grad_norm": 1.3352287358052948, "learning_rate": 1.1119716220886034e-07, "loss": 0.1747, "step": 13068 }, { "epoch": 0.93, "grad_norm": 1.4915024396218557, "learning_rate": 1.1095434789719694e-07, "loss": 0.1707, "step": 13069 }, { "epoch": 0.93, "grad_norm": 1.3193940317117399, "learning_rate": 1.107117960100812e-07, "loss": 0.1335, "step": 13070 }, { "epoch": 0.93, "grad_norm": 1.2944119861840084, "learning_rate": 1.1046950656053213e-07, "loss": 0.1559, "step": 13071 }, { "epoch": 0.94, "grad_norm": 1.3431899582036462, "learning_rate": 1.1022747956155432e-07, "loss": 0.152, "step": 13072 }, { "epoch": 0.94, "grad_norm": 1.3819211347768412, "learning_rate": 1.0998571502613964e-07, "loss": 0.1694, "step": 13073 }, { "epoch": 0.94, "grad_norm": 1.2411462118299283, "learning_rate": 1.0974421296726435e-07, "loss": 0.1435, "step": 13074 }, { "epoch": 0.94, "grad_norm": 5.5324546334017, "learning_rate": 1.0950297339789252e-07, "loss": 0.5558, "step": 13075 }, { "epoch": 0.94, "grad_norm": 1.3834918229447388, "learning_rate": 1.0926199633097156e-07, "loss": 0.2221, "step": 13076 }, { "epoch": 0.94, "grad_norm": 4.5197715092846, "learning_rate": 1.0902128177943783e-07, "loss": 0.4249, "step": 13077 }, { "epoch": 0.94, "grad_norm": 1.4913011682842394, "learning_rate": 1.0878082975620985e-07, "loss": 0.1902, "step": 13078 }, { "epoch": 0.94, "grad_norm": 1.3248632542578571, "learning_rate": 1.0854064027419564e-07, "loss": 0.1698, "step": 13079 }, { "epoch": 0.94, "grad_norm": 8.030048043291224, "learning_rate": 1.0830071334628655e-07, "loss": 0.4913, "step": 13080 }, { "epoch": 0.94, "grad_norm": 1.5813138430132825, "learning_rate": 1.0806104898536229e-07, "loss": 0.1792, "step": 13081 }, { "epoch": 0.94, "grad_norm": 1.378566173273803, "learning_rate": 1.0782164720428479e-07, "loss": 0.1338, "step": 13082 }, { "epoch": 0.94, "grad_norm": 1.504559037991383, "learning_rate": 1.0758250801590709e-07, "loss": 0.2034, "step": 13083 }, { "epoch": 0.94, "grad_norm": 1.3848468434242853, "learning_rate": 1.0734363143306337e-07, "loss": 0.187, "step": 13084 }, { "epoch": 0.94, "grad_norm": 1.3329826609720354, "learning_rate": 1.071050174685756e-07, "loss": 0.1686, "step": 13085 }, { "epoch": 0.94, "grad_norm": 1.4426649824191622, "learning_rate": 1.0686666613525298e-07, "loss": 0.1842, "step": 13086 }, { "epoch": 0.94, "grad_norm": 1.395489446400578, "learning_rate": 1.0662857744588751e-07, "loss": 0.1446, "step": 13087 }, { "epoch": 0.94, "grad_norm": 1.2792686249295209, "learning_rate": 1.0639075141326061e-07, "loss": 0.1193, "step": 13088 }, { "epoch": 0.94, "grad_norm": 1.3984795156823275, "learning_rate": 1.0615318805013596e-07, "loss": 0.1508, "step": 13089 }, { "epoch": 0.94, "grad_norm": 1.4382458851959075, "learning_rate": 1.0591588736926672e-07, "loss": 0.1755, "step": 13090 }, { "epoch": 0.94, "grad_norm": 1.3533279641194829, "learning_rate": 1.0567884938338879e-07, "loss": 0.1698, "step": 13091 }, { "epoch": 0.94, "grad_norm": 1.2702161509420813, "learning_rate": 1.0544207410522644e-07, "loss": 0.1773, "step": 13092 }, { "epoch": 0.94, "grad_norm": 1.5583326868147065, "learning_rate": 1.0520556154748785e-07, "loss": 0.1861, "step": 13093 }, { "epoch": 0.94, "grad_norm": 1.5483130044613096, "learning_rate": 1.0496931172287006e-07, "loss": 0.184, "step": 13094 }, { "epoch": 0.94, "grad_norm": 9.708388351396467, "learning_rate": 1.0473332464405184e-07, "loss": 0.6168, "step": 13095 }, { "epoch": 0.94, "grad_norm": 1.2889044943779253, "learning_rate": 1.0449760032370082e-07, "loss": 0.1496, "step": 13096 }, { "epoch": 0.94, "grad_norm": 1.4123455661993207, "learning_rate": 1.0426213877447022e-07, "loss": 0.2114, "step": 13097 }, { "epoch": 0.94, "grad_norm": 1.7067079570069033, "learning_rate": 1.0402694000899827e-07, "loss": 0.2053, "step": 13098 }, { "epoch": 0.94, "grad_norm": 1.3205238824992984, "learning_rate": 1.037920040399093e-07, "loss": 0.1385, "step": 13099 }, { "epoch": 0.94, "grad_norm": 1.6385177648235962, "learning_rate": 1.035573308798138e-07, "loss": 0.2219, "step": 13100 }, { "epoch": 0.94, "grad_norm": 1.3263431913695964, "learning_rate": 1.0332292054130777e-07, "loss": 0.1548, "step": 13101 }, { "epoch": 0.94, "grad_norm": 1.159165278064142, "learning_rate": 1.0308877303697507e-07, "loss": 0.1278, "step": 13102 }, { "epoch": 0.94, "grad_norm": 5.595434251741846, "learning_rate": 1.0285488837938174e-07, "loss": 0.5559, "step": 13103 }, { "epoch": 0.94, "grad_norm": 1.3752630909287211, "learning_rate": 1.0262126658108329e-07, "loss": 0.1631, "step": 13104 }, { "epoch": 0.94, "grad_norm": 1.2840145265315719, "learning_rate": 1.0238790765461858e-07, "loss": 0.1285, "step": 13105 }, { "epoch": 0.94, "grad_norm": 5.107591290111134, "learning_rate": 1.0215481161251428e-07, "loss": 0.6798, "step": 13106 }, { "epoch": 0.94, "grad_norm": 1.3693171368534323, "learning_rate": 1.0192197846728147e-07, "loss": 0.2093, "step": 13107 }, { "epoch": 0.94, "grad_norm": 5.277469947011179, "learning_rate": 1.0168940823141737e-07, "loss": 0.6557, "step": 13108 }, { "epoch": 0.94, "grad_norm": 1.3852171817117205, "learning_rate": 1.014571009174059e-07, "loss": 0.1811, "step": 13109 }, { "epoch": 0.94, "grad_norm": 1.1329181902241747, "learning_rate": 1.0122505653771708e-07, "loss": 0.1242, "step": 13110 }, { "epoch": 0.94, "grad_norm": 1.353506512698889, "learning_rate": 1.0099327510480483e-07, "loss": 0.1876, "step": 13111 }, { "epoch": 0.94, "grad_norm": 1.4337532917646107, "learning_rate": 1.0076175663111087e-07, "loss": 0.1348, "step": 13112 }, { "epoch": 0.94, "grad_norm": 1.2477611070074937, "learning_rate": 1.0053050112906248e-07, "loss": 0.1321, "step": 13113 }, { "epoch": 0.94, "grad_norm": 1.3912910857740561, "learning_rate": 1.0029950861107196e-07, "loss": 0.1741, "step": 13114 }, { "epoch": 0.94, "grad_norm": 1.3512404767831534, "learning_rate": 1.0006877908953826e-07, "loss": 0.1386, "step": 13115 }, { "epoch": 0.94, "grad_norm": 8.110986870385391, "learning_rate": 9.983831257684651e-08, "loss": 0.5485, "step": 13116 }, { "epoch": 0.94, "grad_norm": 1.5483293940956269, "learning_rate": 9.960810908536622e-08, "loss": 0.1896, "step": 13117 }, { "epoch": 0.94, "grad_norm": 1.229605540318505, "learning_rate": 9.937816862745475e-08, "loss": 0.1407, "step": 13118 }, { "epoch": 0.94, "grad_norm": 1.3297474618040226, "learning_rate": 9.914849121545388e-08, "loss": 0.1843, "step": 13119 }, { "epoch": 0.94, "grad_norm": 1.5567899971390182, "learning_rate": 9.891907686169211e-08, "loss": 0.189, "step": 13120 }, { "epoch": 0.94, "grad_norm": 1.4312646734644974, "learning_rate": 9.868992557848401e-08, "loss": 0.1659, "step": 13121 }, { "epoch": 0.94, "grad_norm": 1.341316879436319, "learning_rate": 9.846103737812807e-08, "loss": 0.1651, "step": 13122 }, { "epoch": 0.94, "grad_norm": 14.065110660810747, "learning_rate": 9.823241227291113e-08, "loss": 0.5843, "step": 13123 }, { "epoch": 0.94, "grad_norm": 1.382805628650766, "learning_rate": 9.800405027510396e-08, "loss": 0.1682, "step": 13124 }, { "epoch": 0.94, "grad_norm": 1.585446288251537, "learning_rate": 9.777595139696506e-08, "loss": 0.1742, "step": 13125 }, { "epoch": 0.94, "grad_norm": 1.2576152579774451, "learning_rate": 9.754811565073796e-08, "loss": 0.1685, "step": 13126 }, { "epoch": 0.94, "grad_norm": 1.4499602127577298, "learning_rate": 9.73205430486518e-08, "loss": 0.1851, "step": 13127 }, { "epoch": 0.94, "grad_norm": 1.4824911039300384, "learning_rate": 9.709323360292067e-08, "loss": 0.1733, "step": 13128 }, { "epoch": 0.94, "grad_norm": 1.3227094630150207, "learning_rate": 9.68661873257476e-08, "loss": 0.1767, "step": 13129 }, { "epoch": 0.94, "grad_norm": 1.2209886216560952, "learning_rate": 9.663940422931783e-08, "loss": 0.159, "step": 13130 }, { "epoch": 0.94, "grad_norm": 1.337536080474091, "learning_rate": 9.641288432580498e-08, "loss": 0.1852, "step": 13131 }, { "epoch": 0.94, "grad_norm": 1.5227611830354462, "learning_rate": 9.618662762736764e-08, "loss": 0.1981, "step": 13132 }, { "epoch": 0.94, "grad_norm": 1.2972669414577704, "learning_rate": 9.596063414614998e-08, "loss": 0.1865, "step": 13133 }, { "epoch": 0.94, "grad_norm": 1.3156695845908088, "learning_rate": 9.573490389428286e-08, "loss": 0.1651, "step": 13134 }, { "epoch": 0.94, "grad_norm": 1.357873390491458, "learning_rate": 9.550943688388325e-08, "loss": 0.2129, "step": 13135 }, { "epoch": 0.94, "grad_norm": 1.2288554871393065, "learning_rate": 9.528423312705148e-08, "loss": 0.1716, "step": 13136 }, { "epoch": 0.94, "grad_norm": 1.2303274837130422, "learning_rate": 9.505929263587733e-08, "loss": 0.1337, "step": 13137 }, { "epoch": 0.94, "grad_norm": 1.2889038268444117, "learning_rate": 9.483461542243388e-08, "loss": 0.1568, "step": 13138 }, { "epoch": 0.94, "grad_norm": 1.521683357271531, "learning_rate": 9.461020149878153e-08, "loss": 0.1709, "step": 13139 }, { "epoch": 0.94, "grad_norm": 1.3005790355300526, "learning_rate": 9.43860508769645e-08, "loss": 0.189, "step": 13140 }, { "epoch": 0.94, "grad_norm": 1.2474637291205568, "learning_rate": 9.416216356901597e-08, "loss": 0.1988, "step": 13141 }, { "epoch": 0.94, "grad_norm": 1.4133639339308066, "learning_rate": 9.393853958695242e-08, "loss": 0.162, "step": 13142 }, { "epoch": 0.94, "grad_norm": 1.3785053020309725, "learning_rate": 9.37151789427776e-08, "loss": 0.1567, "step": 13143 }, { "epoch": 0.94, "grad_norm": 1.1666104222537332, "learning_rate": 9.349208164847967e-08, "loss": 0.1444, "step": 13144 }, { "epoch": 0.94, "grad_norm": 1.3307109058678352, "learning_rate": 9.326924771603518e-08, "loss": 0.1706, "step": 13145 }, { "epoch": 0.94, "grad_norm": 6.6088935611880295, "learning_rate": 9.304667715740345e-08, "loss": 0.6629, "step": 13146 }, { "epoch": 0.94, "grad_norm": 1.3319720250609979, "learning_rate": 9.282436998453215e-08, "loss": 0.1507, "step": 13147 }, { "epoch": 0.94, "grad_norm": 1.4114046449541982, "learning_rate": 9.26023262093534e-08, "loss": 0.2, "step": 13148 }, { "epoch": 0.94, "grad_norm": 1.2603244406999339, "learning_rate": 9.238054584378541e-08, "loss": 0.1785, "step": 13149 }, { "epoch": 0.94, "grad_norm": 1.4976525073715505, "learning_rate": 9.21590288997326e-08, "loss": 0.1987, "step": 13150 }, { "epoch": 0.94, "grad_norm": 1.2864213327436262, "learning_rate": 9.193777538908599e-08, "loss": 0.1713, "step": 13151 }, { "epoch": 0.94, "grad_norm": 1.2462573364831886, "learning_rate": 9.171678532372052e-08, "loss": 0.1589, "step": 13152 }, { "epoch": 0.94, "grad_norm": 1.26175273957759, "learning_rate": 9.149605871549838e-08, "loss": 0.1415, "step": 13153 }, { "epoch": 0.94, "grad_norm": 1.1182522289505583, "learning_rate": 9.127559557626786e-08, "loss": 0.1251, "step": 13154 }, { "epoch": 0.94, "grad_norm": 4.904515872118644, "learning_rate": 9.105539591786172e-08, "loss": 0.5161, "step": 13155 }, { "epoch": 0.94, "grad_norm": 1.357110053473666, "learning_rate": 9.083545975209995e-08, "loss": 0.1946, "step": 13156 }, { "epoch": 0.94, "grad_norm": 4.772578343180989, "learning_rate": 9.0615787090787e-08, "loss": 0.402, "step": 13157 }, { "epoch": 0.94, "grad_norm": 1.1188050197733188, "learning_rate": 9.03963779457151e-08, "loss": 0.1253, "step": 13158 }, { "epoch": 0.94, "grad_norm": 1.4707379574131976, "learning_rate": 9.017723232866039e-08, "loss": 0.1639, "step": 13159 }, { "epoch": 0.94, "grad_norm": 1.2273041047292015, "learning_rate": 8.995835025138677e-08, "loss": 0.1577, "step": 13160 }, { "epoch": 0.94, "grad_norm": 1.0926319135109355, "learning_rate": 8.97397317256421e-08, "loss": 0.1207, "step": 13161 }, { "epoch": 0.94, "grad_norm": 1.3867029982926145, "learning_rate": 8.952137676316141e-08, "loss": 0.1692, "step": 13162 }, { "epoch": 0.94, "grad_norm": 1.4240445258121894, "learning_rate": 8.93032853756648e-08, "loss": 0.1505, "step": 13163 }, { "epoch": 0.94, "grad_norm": 1.4157623775107033, "learning_rate": 8.908545757485843e-08, "loss": 0.1938, "step": 13164 }, { "epoch": 0.94, "grad_norm": 1.2080908690891115, "learning_rate": 8.886789337243462e-08, "loss": 0.1409, "step": 13165 }, { "epoch": 0.94, "grad_norm": 1.2990010372879015, "learning_rate": 8.865059278007182e-08, "loss": 0.1402, "step": 13166 }, { "epoch": 0.94, "grad_norm": 1.2229264905357322, "learning_rate": 8.843355580943291e-08, "loss": 0.1613, "step": 13167 }, { "epoch": 0.94, "grad_norm": 1.2919422351630574, "learning_rate": 8.821678247216858e-08, "loss": 0.1653, "step": 13168 }, { "epoch": 0.94, "grad_norm": 6.720174039662592, "learning_rate": 8.800027277991341e-08, "loss": 0.7503, "step": 13169 }, { "epoch": 0.94, "grad_norm": 1.2591293754937085, "learning_rate": 8.778402674429031e-08, "loss": 0.1202, "step": 13170 }, { "epoch": 0.94, "grad_norm": 1.6820175215716104, "learning_rate": 8.756804437690447e-08, "loss": 0.1728, "step": 13171 }, { "epoch": 0.94, "grad_norm": 1.2476043944204145, "learning_rate": 8.735232568935049e-08, "loss": 0.1819, "step": 13172 }, { "epoch": 0.94, "grad_norm": 1.28675531022988, "learning_rate": 8.713687069320631e-08, "loss": 0.1468, "step": 13173 }, { "epoch": 0.94, "grad_norm": 1.3475176998565799, "learning_rate": 8.692167940003715e-08, "loss": 0.1872, "step": 13174 }, { "epoch": 0.94, "grad_norm": 1.1563297716167171, "learning_rate": 8.670675182139321e-08, "loss": 0.1285, "step": 13175 }, { "epoch": 0.94, "grad_norm": 1.2527017363892579, "learning_rate": 8.649208796881248e-08, "loss": 0.1331, "step": 13176 }, { "epoch": 0.94, "grad_norm": 1.464795728271567, "learning_rate": 8.627768785381518e-08, "loss": 0.1875, "step": 13177 }, { "epoch": 0.94, "grad_norm": 1.3890714342467665, "learning_rate": 8.606355148791101e-08, "loss": 0.1699, "step": 13178 }, { "epoch": 0.94, "grad_norm": 1.3710217749381677, "learning_rate": 8.584967888259299e-08, "loss": 0.2052, "step": 13179 }, { "epoch": 0.94, "grad_norm": 1.424173515550315, "learning_rate": 8.563607004934193e-08, "loss": 0.1825, "step": 13180 }, { "epoch": 0.94, "grad_norm": 1.3525908163031133, "learning_rate": 8.5422724999622e-08, "loss": 0.1731, "step": 13181 }, { "epoch": 0.94, "grad_norm": 1.3889769580591278, "learning_rate": 8.520964374488627e-08, "loss": 0.1657, "step": 13182 }, { "epoch": 0.94, "grad_norm": 1.3917039236269346, "learning_rate": 8.499682629657114e-08, "loss": 0.1768, "step": 13183 }, { "epoch": 0.94, "grad_norm": 1.2745733567079207, "learning_rate": 8.478427266609968e-08, "loss": 0.1807, "step": 13184 }, { "epoch": 0.94, "grad_norm": 1.2221171778586912, "learning_rate": 8.457198286488222e-08, "loss": 0.1639, "step": 13185 }, { "epoch": 0.94, "grad_norm": 1.2338872376690642, "learning_rate": 8.435995690431242e-08, "loss": 0.1546, "step": 13186 }, { "epoch": 0.94, "grad_norm": 1.3188212572258886, "learning_rate": 8.414819479577118e-08, "loss": 0.1622, "step": 13187 }, { "epoch": 0.94, "grad_norm": 1.379511274167154, "learning_rate": 8.393669655062553e-08, "loss": 0.124, "step": 13188 }, { "epoch": 0.94, "grad_norm": 1.5580262105535592, "learning_rate": 8.372546218022747e-08, "loss": 0.1702, "step": 13189 }, { "epoch": 0.94, "grad_norm": 6.570275028084891, "learning_rate": 8.351449169591463e-08, "loss": 0.616, "step": 13190 }, { "epoch": 0.94, "grad_norm": 1.3419742066889624, "learning_rate": 8.330378510901238e-08, "loss": 0.1589, "step": 13191 }, { "epoch": 0.94, "grad_norm": 1.4778512504421364, "learning_rate": 8.309334243083e-08, "loss": 0.1426, "step": 13192 }, { "epoch": 0.94, "grad_norm": 1.268436198876092, "learning_rate": 8.288316367266291e-08, "loss": 0.1395, "step": 13193 }, { "epoch": 0.94, "grad_norm": 1.3528786332494176, "learning_rate": 8.267324884579264e-08, "loss": 0.1865, "step": 13194 }, { "epoch": 0.94, "grad_norm": 1.2932904597220947, "learning_rate": 8.24635979614874e-08, "loss": 0.137, "step": 13195 }, { "epoch": 0.94, "grad_norm": 4.411943736392523, "learning_rate": 8.225421103099929e-08, "loss": 0.5426, "step": 13196 }, { "epoch": 0.94, "grad_norm": 5.414172526109825, "learning_rate": 8.204508806556877e-08, "loss": 0.5679, "step": 13197 }, { "epoch": 0.94, "grad_norm": 1.3660109446424185, "learning_rate": 8.183622907641963e-08, "loss": 0.1765, "step": 13198 }, { "epoch": 0.94, "grad_norm": 1.3240350017822546, "learning_rate": 8.162763407476293e-08, "loss": 0.1685, "step": 13199 }, { "epoch": 0.94, "grad_norm": 1.2602024284896012, "learning_rate": 8.141930307179468e-08, "loss": 0.1734, "step": 13200 }, { "epoch": 0.94, "grad_norm": 1.4054850099068794, "learning_rate": 8.121123607869818e-08, "loss": 0.1495, "step": 13201 }, { "epoch": 0.94, "grad_norm": 1.3272310905770894, "learning_rate": 8.100343310664061e-08, "loss": 0.1457, "step": 13202 }, { "epoch": 0.94, "grad_norm": 1.4742246384552529, "learning_rate": 8.079589416677747e-08, "loss": 0.1761, "step": 13203 }, { "epoch": 0.94, "grad_norm": 1.3407467158577455, "learning_rate": 8.058861927024708e-08, "loss": 0.1699, "step": 13204 }, { "epoch": 0.94, "grad_norm": 1.2833551866324244, "learning_rate": 8.038160842817666e-08, "loss": 0.1673, "step": 13205 }, { "epoch": 0.94, "grad_norm": 1.4261462553203497, "learning_rate": 8.017486165167621e-08, "loss": 0.1407, "step": 13206 }, { "epoch": 0.94, "grad_norm": 1.1562713087317606, "learning_rate": 7.996837895184406e-08, "loss": 0.1265, "step": 13207 }, { "epoch": 0.94, "grad_norm": 1.4636628870217436, "learning_rate": 7.976216033976303e-08, "loss": 0.1919, "step": 13208 }, { "epoch": 0.94, "grad_norm": 1.3401967494704117, "learning_rate": 7.955620582650204e-08, "loss": 0.1413, "step": 13209 }, { "epoch": 0.94, "grad_norm": 1.2935427297686013, "learning_rate": 7.935051542311556e-08, "loss": 0.1695, "step": 13210 }, { "epoch": 0.94, "grad_norm": 1.5016092546494715, "learning_rate": 7.91450891406459e-08, "loss": 0.1721, "step": 13211 }, { "epoch": 0.95, "grad_norm": 1.4751814581786167, "learning_rate": 7.893992699011754e-08, "loss": 0.176, "step": 13212 }, { "epoch": 0.95, "grad_norm": 1.339140025214084, "learning_rate": 7.873502898254337e-08, "loss": 0.1601, "step": 13213 }, { "epoch": 0.95, "grad_norm": 1.2450203871782686, "learning_rate": 7.853039512892235e-08, "loss": 0.1422, "step": 13214 }, { "epoch": 0.95, "grad_norm": 6.862964689344508, "learning_rate": 7.832602544023737e-08, "loss": 0.6403, "step": 13215 }, { "epoch": 0.95, "grad_norm": 1.4564716105323856, "learning_rate": 7.812191992745854e-08, "loss": 0.1768, "step": 13216 }, { "epoch": 0.95, "grad_norm": 1.284168528774165, "learning_rate": 7.791807860154155e-08, "loss": 0.1395, "step": 13217 }, { "epoch": 0.95, "grad_norm": 1.0470929117950873, "learning_rate": 7.771450147342763e-08, "loss": 0.1026, "step": 13218 }, { "epoch": 0.95, "grad_norm": 1.4016408515146388, "learning_rate": 7.751118855404416e-08, "loss": 0.206, "step": 13219 }, { "epoch": 0.95, "grad_norm": 1.3856990364992687, "learning_rate": 7.730813985430407e-08, "loss": 0.1543, "step": 13220 }, { "epoch": 0.95, "grad_norm": 1.4213896760697655, "learning_rate": 7.710535538510589e-08, "loss": 0.2111, "step": 13221 }, { "epoch": 0.95, "grad_norm": 5.012461332937578, "learning_rate": 7.690283515733532e-08, "loss": 0.5598, "step": 13222 }, { "epoch": 0.95, "grad_norm": 6.560209140015982, "learning_rate": 7.670057918186147e-08, "loss": 0.6105, "step": 13223 }, { "epoch": 0.95, "grad_norm": 1.3324723368649825, "learning_rate": 7.649858746954176e-08, "loss": 0.1433, "step": 13224 }, { "epoch": 0.95, "grad_norm": 1.4768086776168086, "learning_rate": 7.62968600312175e-08, "loss": 0.1561, "step": 13225 }, { "epoch": 0.95, "grad_norm": 1.2798724942726052, "learning_rate": 7.609539687771727e-08, "loss": 0.1304, "step": 13226 }, { "epoch": 0.95, "grad_norm": 1.7855314924497487, "learning_rate": 7.589419801985409e-08, "loss": 0.1865, "step": 13227 }, { "epoch": 0.95, "grad_norm": 1.296955319019752, "learning_rate": 7.569326346842876e-08, "loss": 0.1718, "step": 13228 }, { "epoch": 0.95, "grad_norm": 1.4851335929835605, "learning_rate": 7.549259323422486e-08, "loss": 0.1884, "step": 13229 }, { "epoch": 0.95, "grad_norm": 1.3677643098864833, "learning_rate": 7.529218732801491e-08, "loss": 0.1738, "step": 13230 }, { "epoch": 0.95, "grad_norm": 1.2484973443639664, "learning_rate": 7.509204576055529e-08, "loss": 0.1725, "step": 13231 }, { "epoch": 0.95, "grad_norm": 1.2846549468903667, "learning_rate": 7.489216854258963e-08, "loss": 0.1445, "step": 13232 }, { "epoch": 0.95, "grad_norm": 1.2658728955844833, "learning_rate": 7.469255568484545e-08, "loss": 0.1443, "step": 13233 }, { "epoch": 0.95, "grad_norm": 1.2605448486544473, "learning_rate": 7.449320719803755e-08, "loss": 0.2019, "step": 13234 }, { "epoch": 0.95, "grad_norm": 1.429345167294272, "learning_rate": 7.429412309286676e-08, "loss": 0.1707, "step": 13235 }, { "epoch": 0.95, "grad_norm": 1.49159151752236, "learning_rate": 7.409530338001846e-08, "loss": 0.1815, "step": 13236 }, { "epoch": 0.95, "grad_norm": 1.2420970692119704, "learning_rate": 7.389674807016466e-08, "loss": 0.1207, "step": 13237 }, { "epoch": 0.95, "grad_norm": 1.3931910733036952, "learning_rate": 7.369845717396296e-08, "loss": 0.1632, "step": 13238 }, { "epoch": 0.95, "grad_norm": 1.3788759808527267, "learning_rate": 7.350043070205704e-08, "loss": 0.2028, "step": 13239 }, { "epoch": 0.95, "grad_norm": 1.3691547178452361, "learning_rate": 7.330266866507618e-08, "loss": 0.1773, "step": 13240 }, { "epoch": 0.95, "grad_norm": 1.1552786896711713, "learning_rate": 7.310517107363524e-08, "loss": 0.14, "step": 13241 }, { "epoch": 0.95, "grad_norm": 1.1923613077957342, "learning_rate": 7.290793793833517e-08, "loss": 0.1523, "step": 13242 }, { "epoch": 0.95, "grad_norm": 4.6902423487841105, "learning_rate": 7.271096926976251e-08, "loss": 0.5808, "step": 13243 }, { "epoch": 0.95, "grad_norm": 1.3509350546262486, "learning_rate": 7.25142650784899e-08, "loss": 0.1672, "step": 13244 }, { "epoch": 0.95, "grad_norm": 1.4719310867624726, "learning_rate": 7.231782537507614e-08, "loss": 0.1538, "step": 13245 }, { "epoch": 0.95, "grad_norm": 1.4686892159235314, "learning_rate": 7.212165017006445e-08, "loss": 0.1893, "step": 13246 }, { "epoch": 0.95, "grad_norm": 1.4935687993719293, "learning_rate": 7.192573947398529e-08, "loss": 0.203, "step": 13247 }, { "epoch": 0.95, "grad_norm": 1.3848343846267883, "learning_rate": 7.173009329735415e-08, "loss": 0.1637, "step": 13248 }, { "epoch": 0.95, "grad_norm": 1.460053041703305, "learning_rate": 7.153471165067316e-08, "loss": 0.1717, "step": 13249 }, { "epoch": 0.95, "grad_norm": 1.4102758891736003, "learning_rate": 7.13395945444284e-08, "loss": 0.183, "step": 13250 }, { "epoch": 0.95, "grad_norm": 1.3119573478711892, "learning_rate": 7.114474198909372e-08, "loss": 0.151, "step": 13251 }, { "epoch": 0.95, "grad_norm": 7.2454437782653445, "learning_rate": 7.095015399512795e-08, "loss": 0.6151, "step": 13252 }, { "epoch": 0.95, "grad_norm": 1.4925098994958246, "learning_rate": 7.075583057297664e-08, "loss": 0.1974, "step": 13253 }, { "epoch": 0.95, "grad_norm": 1.2899647110576729, "learning_rate": 7.056177173306866e-08, "loss": 0.1221, "step": 13254 }, { "epoch": 0.95, "grad_norm": 1.6524785038332643, "learning_rate": 7.03679774858218e-08, "loss": 0.206, "step": 13255 }, { "epoch": 0.95, "grad_norm": 1.4669521814104454, "learning_rate": 7.017444784163663e-08, "loss": 0.1706, "step": 13256 }, { "epoch": 0.95, "grad_norm": 1.2481972039563185, "learning_rate": 6.998118281090316e-08, "loss": 0.1214, "step": 13257 }, { "epoch": 0.95, "grad_norm": 1.371040592234891, "learning_rate": 6.978818240399309e-08, "loss": 0.1599, "step": 13258 }, { "epoch": 0.95, "grad_norm": 1.3964108047019743, "learning_rate": 6.95954466312665e-08, "loss": 0.1815, "step": 13259 }, { "epoch": 0.95, "grad_norm": 1.4784656137991061, "learning_rate": 6.940297550306895e-08, "loss": 0.1566, "step": 13260 }, { "epoch": 0.95, "grad_norm": 1.18117714834306, "learning_rate": 6.921076902973167e-08, "loss": 0.1418, "step": 13261 }, { "epoch": 0.95, "grad_norm": 1.238292358755321, "learning_rate": 6.901882722157138e-08, "loss": 0.1704, "step": 13262 }, { "epoch": 0.95, "grad_norm": 1.43456800477493, "learning_rate": 6.882715008889096e-08, "loss": 0.1551, "step": 13263 }, { "epoch": 0.95, "grad_norm": 1.4521725723769963, "learning_rate": 6.863573764197828e-08, "loss": 0.1766, "step": 13264 }, { "epoch": 0.95, "grad_norm": 1.2621969287598642, "learning_rate": 6.844458989110792e-08, "loss": 0.1443, "step": 13265 }, { "epoch": 0.95, "grad_norm": 1.328167882481714, "learning_rate": 6.825370684653998e-08, "loss": 0.1896, "step": 13266 }, { "epoch": 0.95, "grad_norm": 1.3201820530043873, "learning_rate": 6.806308851852017e-08, "loss": 0.1502, "step": 13267 }, { "epoch": 0.95, "grad_norm": 1.2808754072918478, "learning_rate": 6.78727349172803e-08, "loss": 0.184, "step": 13268 }, { "epoch": 0.95, "grad_norm": 1.3209254841369964, "learning_rate": 6.768264605303776e-08, "loss": 0.1727, "step": 13269 }, { "epoch": 0.95, "grad_norm": 1.572085168199533, "learning_rate": 6.749282193599549e-08, "loss": 0.1663, "step": 13270 }, { "epoch": 0.95, "grad_norm": 1.459129359779204, "learning_rate": 6.730326257634312e-08, "loss": 0.182, "step": 13271 }, { "epoch": 0.95, "grad_norm": 1.2546965359437803, "learning_rate": 6.711396798425473e-08, "loss": 0.1728, "step": 13272 }, { "epoch": 0.95, "grad_norm": 1.6376378247208394, "learning_rate": 6.692493816989165e-08, "loss": 0.2236, "step": 13273 }, { "epoch": 0.95, "grad_norm": 4.4782200603387725, "learning_rate": 6.673617314339964e-08, "loss": 0.3883, "step": 13274 }, { "epoch": 0.95, "grad_norm": 1.4372954261811717, "learning_rate": 6.65476729149106e-08, "loss": 0.1601, "step": 13275 }, { "epoch": 0.95, "grad_norm": 1.3228751621797696, "learning_rate": 6.63594374945431e-08, "loss": 0.1667, "step": 13276 }, { "epoch": 0.95, "grad_norm": 1.377892444882691, "learning_rate": 6.617146689240072e-08, "loss": 0.1839, "step": 13277 }, { "epoch": 0.95, "grad_norm": 6.976187994249105, "learning_rate": 6.598376111857319e-08, "loss": 0.694, "step": 13278 }, { "epoch": 0.95, "grad_norm": 6.115276875479222, "learning_rate": 6.579632018313519e-08, "loss": 0.6909, "step": 13279 }, { "epoch": 0.95, "grad_norm": 1.3358222860011357, "learning_rate": 6.560914409614872e-08, "loss": 0.1642, "step": 13280 }, { "epoch": 0.95, "grad_norm": 1.4880771477728123, "learning_rate": 6.542223286765959e-08, "loss": 0.1902, "step": 13281 }, { "epoch": 0.95, "grad_norm": 1.321907913587343, "learning_rate": 6.523558650770146e-08, "loss": 0.1681, "step": 13282 }, { "epoch": 0.95, "grad_norm": 1.3677737122692795, "learning_rate": 6.504920502629187e-08, "loss": 0.1463, "step": 13283 }, { "epoch": 0.95, "grad_norm": 1.5141921611143105, "learning_rate": 6.486308843343558e-08, "loss": 0.1935, "step": 13284 }, { "epoch": 0.95, "grad_norm": 1.3318243542536914, "learning_rate": 6.467723673912185e-08, "loss": 0.1732, "step": 13285 }, { "epoch": 0.95, "grad_norm": 1.3120647066274833, "learning_rate": 6.449164995332768e-08, "loss": 0.1641, "step": 13286 }, { "epoch": 0.95, "grad_norm": 1.4009255292805334, "learning_rate": 6.4306328086014e-08, "loss": 0.1521, "step": 13287 }, { "epoch": 0.95, "grad_norm": 1.48578473007008, "learning_rate": 6.41212711471284e-08, "loss": 0.1548, "step": 13288 }, { "epoch": 0.95, "grad_norm": 1.358401053210439, "learning_rate": 6.393647914660294e-08, "loss": 0.1492, "step": 13289 }, { "epoch": 0.95, "grad_norm": 1.3250508821516083, "learning_rate": 6.375195209435803e-08, "loss": 0.1689, "step": 13290 }, { "epoch": 0.95, "grad_norm": 1.4656157093677809, "learning_rate": 6.356769000029739e-08, "loss": 0.1903, "step": 13291 }, { "epoch": 0.95, "grad_norm": 1.3818822044955037, "learning_rate": 6.3383692874312e-08, "loss": 0.2122, "step": 13292 }, { "epoch": 0.95, "grad_norm": 1.4730473651680245, "learning_rate": 6.319996072627732e-08, "loss": 0.1918, "step": 13293 }, { "epoch": 0.95, "grad_norm": 1.2674267147016414, "learning_rate": 6.301649356605654e-08, "loss": 0.157, "step": 13294 }, { "epoch": 0.95, "grad_norm": 1.1830084310265918, "learning_rate": 6.283329140349626e-08, "loss": 0.1293, "step": 13295 }, { "epoch": 0.95, "grad_norm": 1.4218508529157183, "learning_rate": 6.26503542484308e-08, "loss": 0.164, "step": 13296 }, { "epoch": 0.95, "grad_norm": 5.222018035693397, "learning_rate": 6.2467682110679e-08, "loss": 0.5788, "step": 13297 }, { "epoch": 0.95, "grad_norm": 1.2132260342523824, "learning_rate": 6.228527500004689e-08, "loss": 0.145, "step": 13298 }, { "epoch": 0.95, "grad_norm": 1.2614549166426727, "learning_rate": 6.210313292632386e-08, "loss": 0.1696, "step": 13299 }, { "epoch": 0.95, "grad_norm": 1.5315587375111008, "learning_rate": 6.192125589928821e-08, "loss": 0.1832, "step": 13300 }, { "epoch": 0.95, "grad_norm": 1.401038314734205, "learning_rate": 6.173964392870102e-08, "loss": 0.1512, "step": 13301 }, { "epoch": 0.95, "grad_norm": 1.21109833931684, "learning_rate": 6.15582970243117e-08, "loss": 0.1685, "step": 13302 }, { "epoch": 0.95, "grad_norm": 1.297054989176573, "learning_rate": 6.137721519585304e-08, "loss": 0.192, "step": 13303 }, { "epoch": 0.95, "grad_norm": 5.395650862574453, "learning_rate": 6.119639845304559e-08, "loss": 0.579, "step": 13304 }, { "epoch": 0.95, "grad_norm": 7.147773363851346, "learning_rate": 6.101584680559435e-08, "loss": 0.4924, "step": 13305 }, { "epoch": 0.95, "grad_norm": 1.3199385825542405, "learning_rate": 6.08355602631916e-08, "loss": 0.1552, "step": 13306 }, { "epoch": 0.95, "grad_norm": 1.5480018070497346, "learning_rate": 6.065553883551289e-08, "loss": 0.1963, "step": 13307 }, { "epoch": 0.95, "grad_norm": 1.1936634452362769, "learning_rate": 6.047578253222274e-08, "loss": 0.1656, "step": 13308 }, { "epoch": 0.95, "grad_norm": 1.3050289600060894, "learning_rate": 6.029629136296844e-08, "loss": 0.1844, "step": 13309 }, { "epoch": 0.95, "grad_norm": 10.498520030951294, "learning_rate": 6.011706533738448e-08, "loss": 0.4027, "step": 13310 }, { "epoch": 0.95, "grad_norm": 1.4907994128497006, "learning_rate": 5.993810446509152e-08, "loss": 0.1937, "step": 13311 }, { "epoch": 0.95, "grad_norm": 5.439007954762212, "learning_rate": 5.97594087556952e-08, "loss": 0.6615, "step": 13312 }, { "epoch": 0.95, "grad_norm": 1.433832491403932, "learning_rate": 5.958097821878783e-08, "loss": 0.1299, "step": 13313 }, { "epoch": 0.95, "grad_norm": 1.750590433012544, "learning_rate": 5.940281286394622e-08, "loss": 0.2214, "step": 13314 }, { "epoch": 0.95, "grad_norm": 1.326044406318544, "learning_rate": 5.922491270073327e-08, "loss": 0.2027, "step": 13315 }, { "epoch": 0.95, "grad_norm": 1.2576458229959064, "learning_rate": 5.904727773869856e-08, "loss": 0.1369, "step": 13316 }, { "epoch": 0.95, "grad_norm": 1.4481887572809524, "learning_rate": 5.886990798737668e-08, "loss": 0.1642, "step": 13317 }, { "epoch": 0.95, "grad_norm": 1.331185790914141, "learning_rate": 5.86928034562878e-08, "loss": 0.1453, "step": 13318 }, { "epoch": 0.95, "grad_norm": 9.856244889250446, "learning_rate": 5.8515964154938765e-08, "loss": 0.6249, "step": 13319 }, { "epoch": 0.95, "grad_norm": 1.3697926286046078, "learning_rate": 5.833939009282086e-08, "loss": 0.1739, "step": 13320 }, { "epoch": 0.95, "grad_norm": 5.121631552967087, "learning_rate": 5.816308127941317e-08, "loss": 0.5855, "step": 13321 }, { "epoch": 0.95, "grad_norm": 1.380012654949571, "learning_rate": 5.798703772417758e-08, "loss": 0.1774, "step": 13322 }, { "epoch": 0.95, "grad_norm": 1.5785816694601777, "learning_rate": 5.781125943656485e-08, "loss": 0.1981, "step": 13323 }, { "epoch": 0.95, "grad_norm": 1.251334898223548, "learning_rate": 5.7635746426008556e-08, "loss": 0.1487, "step": 13324 }, { "epoch": 0.95, "grad_norm": 1.3370060453185597, "learning_rate": 5.7460498701931154e-08, "loss": 0.1528, "step": 13325 }, { "epoch": 0.95, "grad_norm": 1.0664712674479397, "learning_rate": 5.728551627373846e-08, "loss": 0.1421, "step": 13326 }, { "epoch": 0.95, "grad_norm": 1.256741311145682, "learning_rate": 5.71107991508224e-08, "loss": 0.1626, "step": 13327 }, { "epoch": 0.95, "grad_norm": 1.2580714492004215, "learning_rate": 5.693634734256215e-08, "loss": 0.1495, "step": 13328 }, { "epoch": 0.95, "grad_norm": 1.4378931754320057, "learning_rate": 5.676216085832076e-08, "loss": 0.1779, "step": 13329 }, { "epoch": 0.95, "grad_norm": 1.2078462543445339, "learning_rate": 5.6588239707447976e-08, "loss": 0.1348, "step": 13330 }, { "epoch": 0.95, "grad_norm": 1.3533986303394667, "learning_rate": 5.6414583899279675e-08, "loss": 0.1555, "step": 13331 }, { "epoch": 0.95, "grad_norm": 1.2627942575878692, "learning_rate": 5.624119344313672e-08, "loss": 0.1382, "step": 13332 }, { "epoch": 0.95, "grad_norm": 1.4373704264141196, "learning_rate": 5.606806834832612e-08, "loss": 0.1825, "step": 13333 }, { "epoch": 0.95, "grad_norm": 1.3456351830861386, "learning_rate": 5.5895208624139885e-08, "loss": 0.1834, "step": 13334 }, { "epoch": 0.95, "grad_norm": 1.3485527747737822, "learning_rate": 5.5722614279857255e-08, "loss": 0.1686, "step": 13335 }, { "epoch": 0.95, "grad_norm": 1.2105391216883623, "learning_rate": 5.5550285324741936e-08, "loss": 0.157, "step": 13336 }, { "epoch": 0.95, "grad_norm": 1.2652996262811467, "learning_rate": 5.53782217680443e-08, "loss": 0.1647, "step": 13337 }, { "epoch": 0.95, "grad_norm": 1.3463600148166348, "learning_rate": 5.520642361899975e-08, "loss": 0.1849, "step": 13338 }, { "epoch": 0.95, "grad_norm": 1.2036475762872985, "learning_rate": 5.5034890886829785e-08, "loss": 0.1313, "step": 13339 }, { "epoch": 0.95, "grad_norm": 1.3399224061386756, "learning_rate": 5.486362358074093e-08, "loss": 0.1678, "step": 13340 }, { "epoch": 0.95, "grad_norm": 1.2092158749392885, "learning_rate": 5.469262170992751e-08, "loss": 0.1283, "step": 13341 }, { "epoch": 0.95, "grad_norm": 1.3136108378510918, "learning_rate": 5.4521885283567165e-08, "loss": 0.1535, "step": 13342 }, { "epoch": 0.95, "grad_norm": 1.3558511872621661, "learning_rate": 5.43514143108248e-08, "loss": 0.1786, "step": 13343 }, { "epoch": 0.95, "grad_norm": 1.2285151000405505, "learning_rate": 5.4181208800850316e-08, "loss": 0.1393, "step": 13344 }, { "epoch": 0.95, "grad_norm": 1.2208502706003217, "learning_rate": 5.4011268762780286e-08, "loss": 0.1778, "step": 13345 }, { "epoch": 0.95, "grad_norm": 1.2159366049961524, "learning_rate": 5.3841594205735756e-08, "loss": 0.1491, "step": 13346 }, { "epoch": 0.95, "grad_norm": 1.52271175688257, "learning_rate": 5.367218513882444e-08, "loss": 0.2022, "step": 13347 }, { "epoch": 0.95, "grad_norm": 1.5162390871495097, "learning_rate": 5.3503041571139615e-08, "loss": 0.1809, "step": 13348 }, { "epoch": 0.95, "grad_norm": 1.3098647538696273, "learning_rate": 5.3334163511759574e-08, "loss": 0.1349, "step": 13349 }, { "epoch": 0.95, "grad_norm": 1.357042284406413, "learning_rate": 5.31655509697504e-08, "loss": 0.1542, "step": 13350 }, { "epoch": 0.96, "grad_norm": 1.37228549287707, "learning_rate": 5.299720395416097e-08, "loss": 0.1965, "step": 13351 }, { "epoch": 0.96, "grad_norm": 6.073265668477661, "learning_rate": 5.282912247402849e-08, "loss": 0.4443, "step": 13352 }, { "epoch": 0.96, "grad_norm": 1.3773165249115564, "learning_rate": 5.2661306538374647e-08, "loss": 0.1505, "step": 13353 }, { "epoch": 0.96, "grad_norm": 5.9919602453498335, "learning_rate": 5.2493756156207224e-08, "loss": 0.5429, "step": 13354 }, { "epoch": 0.96, "grad_norm": 7.105664864594872, "learning_rate": 5.232647133651958e-08, "loss": 0.5636, "step": 13355 }, { "epoch": 0.96, "grad_norm": 1.3579666109855832, "learning_rate": 5.2159452088290654e-08, "loss": 0.1699, "step": 13356 }, { "epoch": 0.96, "grad_norm": 1.3171739091207284, "learning_rate": 5.1992698420486045e-08, "loss": 0.1551, "step": 13357 }, { "epoch": 0.96, "grad_norm": 1.3148577970301574, "learning_rate": 5.182621034205582e-08, "loss": 0.1669, "step": 13358 }, { "epoch": 0.96, "grad_norm": 1.3866526739992016, "learning_rate": 5.1659987861936154e-08, "loss": 0.1748, "step": 13359 }, { "epoch": 0.96, "grad_norm": 1.2428152545721247, "learning_rate": 5.1494030989049926e-08, "loss": 0.1368, "step": 13360 }, { "epoch": 0.96, "grad_norm": 1.2402516349793913, "learning_rate": 5.1328339732304446e-08, "loss": 0.1662, "step": 13361 }, { "epoch": 0.96, "grad_norm": 1.4311569486596438, "learning_rate": 5.116291410059426e-08, "loss": 0.1553, "step": 13362 }, { "epoch": 0.96, "grad_norm": 1.2792495997952833, "learning_rate": 5.0997754102797836e-08, "loss": 0.1589, "step": 13363 }, { "epoch": 0.96, "grad_norm": 1.3390687671844013, "learning_rate": 5.083285974778085e-08, "loss": 0.181, "step": 13364 }, { "epoch": 0.96, "grad_norm": 1.2775621884755708, "learning_rate": 5.0668231044394e-08, "loss": 0.1754, "step": 13365 }, { "epoch": 0.96, "grad_norm": 1.3059576246723856, "learning_rate": 5.050386800147356e-08, "loss": 0.1812, "step": 13366 }, { "epoch": 0.96, "grad_norm": 1.310051122957858, "learning_rate": 5.033977062784245e-08, "loss": 0.1567, "step": 13367 }, { "epoch": 0.96, "grad_norm": 1.2069005617167312, "learning_rate": 5.01759389323081e-08, "loss": 0.1601, "step": 13368 }, { "epoch": 0.96, "grad_norm": 1.2388821618341201, "learning_rate": 5.0012372923664565e-08, "loss": 0.1657, "step": 13369 }, { "epoch": 0.96, "grad_norm": 1.383510345278049, "learning_rate": 4.9849072610692605e-08, "loss": 0.1436, "step": 13370 }, { "epoch": 0.96, "grad_norm": 5.3380576104167705, "learning_rate": 4.968603800215577e-08, "loss": 0.6283, "step": 13371 }, { "epoch": 0.96, "grad_norm": 4.690474436780941, "learning_rate": 4.952326910680594e-08, "loss": 0.555, "step": 13372 }, { "epoch": 0.96, "grad_norm": 1.3003011457676212, "learning_rate": 4.936076593338002e-08, "loss": 0.1753, "step": 13373 }, { "epoch": 0.96, "grad_norm": 1.4804120316538376, "learning_rate": 4.919852849059992e-08, "loss": 0.177, "step": 13374 }, { "epoch": 0.96, "grad_norm": 1.2709296202328781, "learning_rate": 4.9036556787174785e-08, "loss": 0.1533, "step": 13375 }, { "epoch": 0.96, "grad_norm": 1.3585750777153645, "learning_rate": 4.887485083179766e-08, "loss": 0.1773, "step": 13376 }, { "epoch": 0.96, "grad_norm": 1.4202582383668876, "learning_rate": 4.871341063314883e-08, "loss": 0.1625, "step": 13377 }, { "epoch": 0.96, "grad_norm": 1.3141249428230466, "learning_rate": 4.855223619989413e-08, "loss": 0.1478, "step": 13378 }, { "epoch": 0.96, "grad_norm": 1.5275567644878913, "learning_rate": 4.839132754068443e-08, "loss": 0.2305, "step": 13379 }, { "epoch": 0.96, "grad_norm": 1.4363399500901421, "learning_rate": 4.823068466415615e-08, "loss": 0.1512, "step": 13380 }, { "epoch": 0.96, "grad_norm": 1.3632251100023647, "learning_rate": 4.807030757893238e-08, "loss": 0.1442, "step": 13381 }, { "epoch": 0.96, "grad_norm": 1.2103501576223712, "learning_rate": 4.7910196293621814e-08, "loss": 0.1329, "step": 13382 }, { "epoch": 0.96, "grad_norm": 1.5332606217612996, "learning_rate": 4.775035081681812e-08, "loss": 0.1703, "step": 13383 }, { "epoch": 0.96, "grad_norm": 1.340850258744859, "learning_rate": 4.759077115710109e-08, "loss": 0.1649, "step": 13384 }, { "epoch": 0.96, "grad_norm": 1.3767552298241532, "learning_rate": 4.743145732303722e-08, "loss": 0.1717, "step": 13385 }, { "epoch": 0.96, "grad_norm": 1.2609956949348624, "learning_rate": 4.7272409323176894e-08, "loss": 0.1476, "step": 13386 }, { "epoch": 0.96, "grad_norm": 1.3904608078504561, "learning_rate": 4.711362716605716e-08, "loss": 0.1699, "step": 13387 }, { "epoch": 0.96, "grad_norm": 1.552395597100296, "learning_rate": 4.6955110860201214e-08, "loss": 0.1706, "step": 13388 }, { "epoch": 0.96, "grad_norm": 1.3650201046552604, "learning_rate": 4.679686041411835e-08, "loss": 0.1568, "step": 13389 }, { "epoch": 0.96, "grad_norm": 1.4027235500671005, "learning_rate": 4.663887583630122e-08, "loss": 0.1782, "step": 13390 }, { "epoch": 0.96, "grad_norm": 1.145779485786868, "learning_rate": 4.6481157135230827e-08, "loss": 0.1399, "step": 13391 }, { "epoch": 0.96, "grad_norm": 1.3665753861588434, "learning_rate": 4.632370431937261e-08, "loss": 0.1983, "step": 13392 }, { "epoch": 0.96, "grad_norm": 1.257564542521957, "learning_rate": 4.6166517397178144e-08, "loss": 0.1528, "step": 13393 }, { "epoch": 0.96, "grad_norm": 1.472840982699897, "learning_rate": 4.6009596377084023e-08, "loss": 0.157, "step": 13394 }, { "epoch": 0.96, "grad_norm": 1.1384737356892514, "learning_rate": 4.58529412675135e-08, "loss": 0.1554, "step": 13395 }, { "epoch": 0.96, "grad_norm": 1.32307034310547, "learning_rate": 4.5696552076875954e-08, "loss": 0.1938, "step": 13396 }, { "epoch": 0.96, "grad_norm": 1.136596042085612, "learning_rate": 4.5540428813564684e-08, "loss": 0.1231, "step": 13397 }, { "epoch": 0.96, "grad_norm": 1.264315099096542, "learning_rate": 4.5384571485960204e-08, "loss": 0.1546, "step": 13398 }, { "epoch": 0.96, "grad_norm": 1.5468919884633898, "learning_rate": 4.52289801024286e-08, "loss": 0.2084, "step": 13399 }, { "epoch": 0.96, "grad_norm": 1.2726377104857365, "learning_rate": 4.5073654671320965e-08, "loss": 0.158, "step": 13400 }, { "epoch": 0.96, "grad_norm": 5.964969621632054, "learning_rate": 4.491859520097452e-08, "loss": 0.589, "step": 13401 }, { "epoch": 0.96, "grad_norm": 1.19618936197148, "learning_rate": 4.476380169971262e-08, "loss": 0.1455, "step": 13402 }, { "epoch": 0.96, "grad_norm": 1.1565798383005856, "learning_rate": 4.46092741758436e-08, "loss": 0.1488, "step": 13403 }, { "epoch": 0.96, "grad_norm": 1.3095333949218393, "learning_rate": 4.445501263766194e-08, "loss": 0.1803, "step": 13404 }, { "epoch": 0.96, "grad_norm": 1.3758516805632601, "learning_rate": 4.4301017093447695e-08, "loss": 0.1738, "step": 13405 }, { "epoch": 0.96, "grad_norm": 1.3167378257075941, "learning_rate": 4.414728755146702e-08, "loss": 0.1422, "step": 13406 }, { "epoch": 0.96, "grad_norm": 1.5680713710979421, "learning_rate": 4.3993824019971654e-08, "loss": 0.1826, "step": 13407 }, { "epoch": 0.96, "grad_norm": 1.4868944384571994, "learning_rate": 4.3840626507198334e-08, "loss": 0.168, "step": 13408 }, { "epoch": 0.96, "grad_norm": 1.2962314810006972, "learning_rate": 4.368769502137049e-08, "loss": 0.1582, "step": 13409 }, { "epoch": 0.96, "grad_norm": 1.4072354396182396, "learning_rate": 4.353502957069711e-08, "loss": 0.1316, "step": 13410 }, { "epoch": 0.96, "grad_norm": 1.156987034701498, "learning_rate": 4.338263016337219e-08, "loss": 0.1236, "step": 13411 }, { "epoch": 0.96, "grad_norm": 6.132235860517753, "learning_rate": 4.323049680757585e-08, "loss": 0.3685, "step": 13412 }, { "epoch": 0.96, "grad_norm": 1.3470456979430232, "learning_rate": 4.307862951147434e-08, "loss": 0.1579, "step": 13413 }, { "epoch": 0.96, "grad_norm": 1.4361559686875618, "learning_rate": 4.292702828321948e-08, "loss": 0.1252, "step": 13414 }, { "epoch": 0.96, "grad_norm": 1.4610914571945541, "learning_rate": 4.2775693130948094e-08, "loss": 0.1861, "step": 13415 }, { "epoch": 0.96, "grad_norm": 1.2594491982909732, "learning_rate": 4.262462406278367e-08, "loss": 0.1574, "step": 13416 }, { "epoch": 0.96, "grad_norm": 1.4964065089956797, "learning_rate": 4.247382108683418e-08, "loss": 0.1889, "step": 13417 }, { "epoch": 0.96, "grad_norm": 1.4175671685548346, "learning_rate": 4.232328421119591e-08, "loss": 0.171, "step": 13418 }, { "epoch": 0.96, "grad_norm": 1.4713696020562725, "learning_rate": 4.2173013443946866e-08, "loss": 0.1568, "step": 13419 }, { "epoch": 0.96, "grad_norm": 1.3120353953799364, "learning_rate": 4.202300879315446e-08, "loss": 0.1384, "step": 13420 }, { "epoch": 0.96, "grad_norm": 1.1979885422368324, "learning_rate": 4.1873270266870046e-08, "loss": 0.1246, "step": 13421 }, { "epoch": 0.96, "grad_norm": 1.2084865364089519, "learning_rate": 4.1723797873130524e-08, "loss": 0.1445, "step": 13422 }, { "epoch": 0.96, "grad_norm": 5.106882124440699, "learning_rate": 4.1574591619959473e-08, "loss": 0.4468, "step": 13423 }, { "epoch": 0.96, "grad_norm": 1.3132370954953507, "learning_rate": 4.142565151536604e-08, "loss": 0.163, "step": 13424 }, { "epoch": 0.96, "grad_norm": 1.3737600751147108, "learning_rate": 4.1276977567343835e-08, "loss": 0.1542, "step": 13425 }, { "epoch": 0.96, "grad_norm": 1.4480994235103994, "learning_rate": 4.112856978387369e-08, "loss": 0.1643, "step": 13426 }, { "epoch": 0.96, "grad_norm": 1.3671886661168962, "learning_rate": 4.098042817292147e-08, "loss": 0.182, "step": 13427 }, { "epoch": 0.96, "grad_norm": 1.3451970784005587, "learning_rate": 4.083255274243858e-08, "loss": 0.1605, "step": 13428 }, { "epoch": 0.96, "grad_norm": 1.3075391320870409, "learning_rate": 4.0684943500362004e-08, "loss": 0.1493, "step": 13429 }, { "epoch": 0.96, "grad_norm": 1.4190269636146942, "learning_rate": 4.0537600454615964e-08, "loss": 0.1766, "step": 13430 }, { "epoch": 0.96, "grad_norm": 9.455965064516791, "learning_rate": 4.039052361310858e-08, "loss": 0.5239, "step": 13431 }, { "epoch": 0.96, "grad_norm": 1.3762837000499397, "learning_rate": 4.024371298373464e-08, "loss": 0.1615, "step": 13432 }, { "epoch": 0.96, "grad_norm": 1.311405070834158, "learning_rate": 4.0097168574373404e-08, "loss": 0.1694, "step": 13433 }, { "epoch": 0.96, "grad_norm": 6.647325873059729, "learning_rate": 3.9950890392892463e-08, "loss": 0.4556, "step": 13434 }, { "epoch": 0.96, "grad_norm": 1.195812367605104, "learning_rate": 3.980487844714165e-08, "loss": 0.1372, "step": 13435 }, { "epoch": 0.96, "grad_norm": 1.21639358457569, "learning_rate": 3.965913274495914e-08, "loss": 0.1578, "step": 13436 }, { "epoch": 0.96, "grad_norm": 1.5475300240448313, "learning_rate": 3.951365329416868e-08, "loss": 0.2478, "step": 13437 }, { "epoch": 0.96, "grad_norm": 1.3909560837744872, "learning_rate": 3.9368440102577344e-08, "loss": 0.1491, "step": 13438 }, { "epoch": 0.96, "grad_norm": 1.3906920920374972, "learning_rate": 3.922349317798169e-08, "loss": 0.1446, "step": 13439 }, { "epoch": 0.96, "grad_norm": 6.407279423094399, "learning_rate": 3.907881252816048e-08, "loss": 0.623, "step": 13440 }, { "epoch": 0.96, "grad_norm": 1.2883523489273276, "learning_rate": 3.893439816087974e-08, "loss": 0.1564, "step": 13441 }, { "epoch": 0.96, "grad_norm": 1.325358919754688, "learning_rate": 3.8790250083891035e-08, "loss": 0.2085, "step": 13442 }, { "epoch": 0.96, "grad_norm": 1.4168206996971229, "learning_rate": 3.864636830493263e-08, "loss": 0.1819, "step": 13443 }, { "epoch": 0.96, "grad_norm": 1.297821226690224, "learning_rate": 3.850275283172611e-08, "loss": 0.1557, "step": 13444 }, { "epoch": 0.96, "grad_norm": 1.3453988818866625, "learning_rate": 3.8359403671980876e-08, "loss": 0.1489, "step": 13445 }, { "epoch": 0.96, "grad_norm": 1.310748126225704, "learning_rate": 3.821632083339133e-08, "loss": 0.1756, "step": 13446 }, { "epoch": 0.96, "grad_norm": 1.3953551702796914, "learning_rate": 3.8073504323637434e-08, "loss": 0.1544, "step": 13447 }, { "epoch": 0.96, "grad_norm": 5.894984344402886, "learning_rate": 3.793095415038528e-08, "loss": 0.5836, "step": 13448 }, { "epoch": 0.96, "grad_norm": 1.2750278313666839, "learning_rate": 3.778867032128653e-08, "loss": 0.1588, "step": 13449 }, { "epoch": 0.96, "grad_norm": 1.6094053246130275, "learning_rate": 3.764665284397784e-08, "loss": 0.1746, "step": 13450 }, { "epoch": 0.96, "grad_norm": 1.238658067704647, "learning_rate": 3.750490172608257e-08, "loss": 0.1464, "step": 13451 }, { "epoch": 0.96, "grad_norm": 1.4034514801998699, "learning_rate": 3.7363416975209065e-08, "loss": 0.1544, "step": 13452 }, { "epoch": 0.96, "grad_norm": 1.4911247207287885, "learning_rate": 3.722219859895182e-08, "loss": 0.1969, "step": 13453 }, { "epoch": 0.96, "grad_norm": 1.3525103632612157, "learning_rate": 3.7081246604890876e-08, "loss": 0.1652, "step": 13454 }, { "epoch": 0.96, "grad_norm": 1.4092494223929755, "learning_rate": 3.69405610005924e-08, "loss": 0.1874, "step": 13455 }, { "epoch": 0.96, "grad_norm": 1.1959723883173319, "learning_rate": 3.6800141793607026e-08, "loss": 0.1484, "step": 13456 }, { "epoch": 0.96, "grad_norm": 1.17583171042287, "learning_rate": 3.665998899147261e-08, "loss": 0.1321, "step": 13457 }, { "epoch": 0.96, "grad_norm": 1.3453310996721675, "learning_rate": 3.652010260171146e-08, "loss": 0.1308, "step": 13458 }, { "epoch": 0.96, "grad_norm": 1.5268250521353204, "learning_rate": 3.638048263183258e-08, "loss": 0.1989, "step": 13459 }, { "epoch": 0.96, "grad_norm": 1.5151274014800573, "learning_rate": 3.6241129089329416e-08, "loss": 0.1882, "step": 13460 }, { "epoch": 0.96, "grad_norm": 1.4312937639909997, "learning_rate": 3.6102041981682656e-08, "loss": 0.1766, "step": 13461 }, { "epoch": 0.96, "grad_norm": 1.3018182090407715, "learning_rate": 3.596322131635799e-08, "loss": 0.1452, "step": 13462 }, { "epoch": 0.96, "grad_norm": 8.633747961327465, "learning_rate": 3.5824667100806676e-08, "loss": 0.5477, "step": 13463 }, { "epoch": 0.96, "grad_norm": 1.3934875907470676, "learning_rate": 3.5686379342464994e-08, "loss": 0.1936, "step": 13464 }, { "epoch": 0.96, "grad_norm": 1.3088132671013497, "learning_rate": 3.5548358048757007e-08, "loss": 0.1549, "step": 13465 }, { "epoch": 0.96, "grad_norm": 1.2805761724438518, "learning_rate": 3.541060322709011e-08, "loss": 0.156, "step": 13466 }, { "epoch": 0.96, "grad_norm": 1.3045256029709005, "learning_rate": 3.527311488485896e-08, "loss": 0.2026, "step": 13467 }, { "epoch": 0.96, "grad_norm": 1.386591364130476, "learning_rate": 3.513589302944265e-08, "loss": 0.1849, "step": 13468 }, { "epoch": 0.96, "grad_norm": 4.512860731630957, "learning_rate": 3.49989376682075e-08, "loss": 0.594, "step": 13469 }, { "epoch": 0.96, "grad_norm": 1.3971922711846156, "learning_rate": 3.486224880850431e-08, "loss": 0.1768, "step": 13470 }, { "epoch": 0.96, "grad_norm": 1.3534205229345293, "learning_rate": 3.472582645767053e-08, "loss": 0.1699, "step": 13471 }, { "epoch": 0.96, "grad_norm": 1.306108318680653, "learning_rate": 3.458967062302865e-08, "loss": 0.1426, "step": 13472 }, { "epoch": 0.96, "grad_norm": 1.1931731613072083, "learning_rate": 3.4453781311886145e-08, "loss": 0.1331, "step": 13473 }, { "epoch": 0.96, "grad_norm": 1.1795034432627136, "learning_rate": 3.4318158531537746e-08, "loss": 0.1205, "step": 13474 }, { "epoch": 0.96, "grad_norm": 1.4211815443932077, "learning_rate": 3.4182802289263186e-08, "loss": 0.177, "step": 13475 }, { "epoch": 0.96, "grad_norm": 1.2465314056057581, "learning_rate": 3.4047712592327753e-08, "loss": 0.1556, "step": 13476 }, { "epoch": 0.96, "grad_norm": 1.3795035586194502, "learning_rate": 3.3912889447981765e-08, "loss": 0.1764, "step": 13477 }, { "epoch": 0.96, "grad_norm": 1.520335697353306, "learning_rate": 3.377833286346333e-08, "loss": 0.2082, "step": 13478 }, { "epoch": 0.96, "grad_norm": 1.349788380273607, "learning_rate": 3.3644042845993876e-08, "loss": 0.1825, "step": 13479 }, { "epoch": 0.96, "grad_norm": 1.6004288411105598, "learning_rate": 3.351001940278209e-08, "loss": 0.1877, "step": 13480 }, { "epoch": 0.96, "grad_norm": 1.169855199066051, "learning_rate": 3.3376262541021667e-08, "loss": 0.1493, "step": 13481 }, { "epoch": 0.96, "grad_norm": 7.441778544750508, "learning_rate": 3.3242772267891853e-08, "loss": 0.5192, "step": 13482 }, { "epoch": 0.96, "grad_norm": 1.2088909576113247, "learning_rate": 3.3109548590558595e-08, "loss": 0.1252, "step": 13483 }, { "epoch": 0.96, "grad_norm": 4.7969975915167264, "learning_rate": 3.297659151617172e-08, "loss": 0.5821, "step": 13484 }, { "epoch": 0.96, "grad_norm": 1.3614781721951619, "learning_rate": 3.2843901051868854e-08, "loss": 0.1933, "step": 13485 }, { "epoch": 0.96, "grad_norm": 1.2482815065413153, "learning_rate": 3.271147720477208e-08, "loss": 0.1232, "step": 13486 }, { "epoch": 0.96, "grad_norm": 1.355364373719001, "learning_rate": 3.257931998198904e-08, "loss": 0.1911, "step": 13487 }, { "epoch": 0.96, "grad_norm": 7.4889812160759455, "learning_rate": 3.244742939061352e-08, "loss": 0.507, "step": 13488 }, { "epoch": 0.96, "grad_norm": 1.4028676514759573, "learning_rate": 3.231580543772483e-08, "loss": 0.1781, "step": 13489 }, { "epoch": 0.96, "grad_norm": 1.3188738616019953, "learning_rate": 3.2184448130388456e-08, "loss": 0.1356, "step": 13490 }, { "epoch": 0.97, "grad_norm": 1.3738877569673011, "learning_rate": 3.205335747565486e-08, "loss": 0.1908, "step": 13491 }, { "epoch": 0.97, "grad_norm": 1.5249259141448719, "learning_rate": 3.192253348056062e-08, "loss": 0.1746, "step": 13492 }, { "epoch": 0.97, "grad_norm": 1.5052533731646958, "learning_rate": 3.1791976152126816e-08, "loss": 0.1891, "step": 13493 }, { "epoch": 0.97, "grad_norm": 1.238036928738311, "learning_rate": 3.1661685497362816e-08, "loss": 0.1458, "step": 13494 }, { "epoch": 0.97, "grad_norm": 5.409437953025684, "learning_rate": 3.153166152326137e-08, "loss": 0.6771, "step": 13495 }, { "epoch": 0.97, "grad_norm": 1.3269830434880143, "learning_rate": 3.1401904236801336e-08, "loss": 0.1431, "step": 13496 }, { "epoch": 0.97, "grad_norm": 1.375706950459523, "learning_rate": 3.127241364494771e-08, "loss": 0.1382, "step": 13497 }, { "epoch": 0.97, "grad_norm": 1.2645535490570792, "learning_rate": 3.1143189754651584e-08, "loss": 0.1605, "step": 13498 }, { "epoch": 0.97, "grad_norm": 1.2799245748736396, "learning_rate": 3.1014232572848525e-08, "loss": 0.1312, "step": 13499 }, { "epoch": 0.97, "grad_norm": 1.136469198607651, "learning_rate": 3.088554210646133e-08, "loss": 0.1442, "step": 13500 }, { "epoch": 0.97, "grad_norm": 1.4319355551291426, "learning_rate": 3.0757118362396144e-08, "loss": 0.1684, "step": 13501 }, { "epoch": 0.97, "grad_norm": 1.6479174243125727, "learning_rate": 3.0628961347547445e-08, "loss": 0.2019, "step": 13502 }, { "epoch": 0.97, "grad_norm": 1.2836780399883283, "learning_rate": 3.0501071068794185e-08, "loss": 0.1845, "step": 13503 }, { "epoch": 0.97, "grad_norm": 1.285343074122367, "learning_rate": 3.037344753300031e-08, "loss": 0.1475, "step": 13504 }, { "epoch": 0.97, "grad_norm": 1.5010741709669615, "learning_rate": 3.0246090747016455e-08, "loss": 0.1653, "step": 13505 }, { "epoch": 0.97, "grad_norm": 1.2568853881008153, "learning_rate": 3.0119000717678814e-08, "loss": 0.1399, "step": 13506 }, { "epoch": 0.97, "grad_norm": 1.393717375410588, "learning_rate": 2.9992177451809156e-08, "loss": 0.1682, "step": 13507 }, { "epoch": 0.97, "grad_norm": 1.3677880404379226, "learning_rate": 2.986562095621426e-08, "loss": 0.1907, "step": 13508 }, { "epoch": 0.97, "grad_norm": 1.4056186342977315, "learning_rate": 2.9739331237687575e-08, "loss": 0.1635, "step": 13509 }, { "epoch": 0.97, "grad_norm": 1.3079993999593347, "learning_rate": 2.9613308303007572e-08, "loss": 0.1399, "step": 13510 }, { "epoch": 0.97, "grad_norm": 1.2760156072200008, "learning_rate": 2.948755215893939e-08, "loss": 0.1691, "step": 13511 }, { "epoch": 0.97, "grad_norm": 5.373478363788751, "learning_rate": 2.936206281223264e-08, "loss": 0.484, "step": 13512 }, { "epoch": 0.97, "grad_norm": 1.4733086704891554, "learning_rate": 2.923684026962248e-08, "loss": 0.1784, "step": 13513 }, { "epoch": 0.97, "grad_norm": 1.4061030306317441, "learning_rate": 2.9111884537831315e-08, "loss": 0.2032, "step": 13514 }, { "epoch": 0.97, "grad_norm": 1.3760417016050845, "learning_rate": 2.8987195623566e-08, "loss": 0.1589, "step": 13515 }, { "epoch": 0.97, "grad_norm": 1.2306461682771985, "learning_rate": 2.886277353351896e-08, "loss": 0.1757, "step": 13516 }, { "epoch": 0.97, "grad_norm": 1.5113202700567616, "learning_rate": 2.8738618274369302e-08, "loss": 0.1755, "step": 13517 }, { "epoch": 0.97, "grad_norm": 1.3946485198098404, "learning_rate": 2.861472985278002e-08, "loss": 0.2106, "step": 13518 }, { "epoch": 0.97, "grad_norm": 1.423962663122846, "learning_rate": 2.8491108275403023e-08, "loss": 0.1372, "step": 13519 }, { "epoch": 0.97, "grad_norm": 1.2973631005391462, "learning_rate": 2.8367753548871335e-08, "loss": 0.1771, "step": 13520 }, { "epoch": 0.97, "grad_norm": 1.3442053530083253, "learning_rate": 2.8244665679807992e-08, "loss": 0.1863, "step": 13521 }, { "epoch": 0.97, "grad_norm": 5.052238576094346, "learning_rate": 2.8121844674819375e-08, "loss": 0.5306, "step": 13522 }, { "epoch": 0.97, "grad_norm": 1.2926721786385134, "learning_rate": 2.799929054049799e-08, "loss": 0.1715, "step": 13523 }, { "epoch": 0.97, "grad_norm": 1.2991845001196516, "learning_rate": 2.7877003283421356e-08, "loss": 0.1665, "step": 13524 }, { "epoch": 0.97, "grad_norm": 1.1588570258818658, "learning_rate": 2.775498291015477e-08, "loss": 0.142, "step": 13525 }, { "epoch": 0.97, "grad_norm": 1.4322364013974507, "learning_rate": 2.7633229427246334e-08, "loss": 0.1483, "step": 13526 }, { "epoch": 0.97, "grad_norm": 1.294714964995783, "learning_rate": 2.751174284123248e-08, "loss": 0.1055, "step": 13527 }, { "epoch": 0.97, "grad_norm": 1.39709608110624, "learning_rate": 2.7390523158633552e-08, "loss": 0.1364, "step": 13528 }, { "epoch": 0.97, "grad_norm": 1.3136065560964818, "learning_rate": 2.7269570385956012e-08, "loss": 0.1412, "step": 13529 }, { "epoch": 0.97, "grad_norm": 1.3340868632646825, "learning_rate": 2.7148884529692444e-08, "loss": 0.1773, "step": 13530 }, { "epoch": 0.97, "grad_norm": 1.36558477984767, "learning_rate": 2.702846559632044e-08, "loss": 0.1767, "step": 13531 }, { "epoch": 0.97, "grad_norm": 1.393137723140857, "learning_rate": 2.690831359230428e-08, "loss": 0.139, "step": 13532 }, { "epoch": 0.97, "grad_norm": 1.503710022547652, "learning_rate": 2.6788428524092136e-08, "loss": 0.1695, "step": 13533 }, { "epoch": 0.97, "grad_norm": 1.493751394526797, "learning_rate": 2.6668810398120524e-08, "loss": 0.1842, "step": 13534 }, { "epoch": 0.97, "grad_norm": 1.386651815626172, "learning_rate": 2.6549459220808204e-08, "loss": 0.1685, "step": 13535 }, { "epoch": 0.97, "grad_norm": 1.3463293067683428, "learning_rate": 2.643037499856338e-08, "loss": 0.1578, "step": 13536 }, { "epoch": 0.97, "grad_norm": 1.3135651399226986, "learning_rate": 2.6311557737776495e-08, "loss": 0.178, "step": 13537 }, { "epoch": 0.97, "grad_norm": 1.1243493549468184, "learning_rate": 2.619300744482578e-08, "loss": 0.1297, "step": 13538 }, { "epoch": 0.97, "grad_norm": 1.269470891552126, "learning_rate": 2.607472412607448e-08, "loss": 0.1522, "step": 13539 }, { "epoch": 0.97, "grad_norm": 1.3015713698427231, "learning_rate": 2.595670778787196e-08, "loss": 0.1689, "step": 13540 }, { "epoch": 0.97, "grad_norm": 1.3901035013019694, "learning_rate": 2.5838958436552043e-08, "loss": 0.1863, "step": 13541 }, { "epoch": 0.97, "grad_norm": 1.3181246927638073, "learning_rate": 2.5721476078436335e-08, "loss": 0.1708, "step": 13542 }, { "epoch": 0.97, "grad_norm": 1.307648757868234, "learning_rate": 2.5604260719829244e-08, "loss": 0.1572, "step": 13543 }, { "epoch": 0.97, "grad_norm": 1.2966709292091905, "learning_rate": 2.5487312367024063e-08, "loss": 0.1583, "step": 13544 }, { "epoch": 0.97, "grad_norm": 1.2317757922946628, "learning_rate": 2.5370631026296334e-08, "loss": 0.1846, "step": 13545 }, { "epoch": 0.97, "grad_norm": 1.447115643116738, "learning_rate": 2.5254216703910483e-08, "loss": 0.1747, "step": 13546 }, { "epoch": 0.97, "grad_norm": 1.2540977062694127, "learning_rate": 2.5138069406114296e-08, "loss": 0.1591, "step": 13547 }, { "epoch": 0.97, "grad_norm": 1.140631595495422, "learning_rate": 2.5022189139143338e-08, "loss": 0.1201, "step": 13548 }, { "epoch": 0.97, "grad_norm": 4.737416834232494, "learning_rate": 2.4906575909215968e-08, "loss": 0.5465, "step": 13549 }, { "epoch": 0.97, "grad_norm": 1.380948013559843, "learning_rate": 2.479122972253889e-08, "loss": 0.1572, "step": 13550 }, { "epoch": 0.97, "grad_norm": 1.4641710164753734, "learning_rate": 2.4676150585302705e-08, "loss": 0.1943, "step": 13551 }, { "epoch": 0.97, "grad_norm": 1.211314833726081, "learning_rate": 2.456133850368525e-08, "loss": 0.1467, "step": 13552 }, { "epoch": 0.97, "grad_norm": 1.3583144272661467, "learning_rate": 2.4446793483848818e-08, "loss": 0.1889, "step": 13553 }, { "epoch": 0.97, "grad_norm": 1.2773645151015598, "learning_rate": 2.433251553194127e-08, "loss": 0.142, "step": 13554 }, { "epoch": 0.97, "grad_norm": 1.3066702279865525, "learning_rate": 2.4218504654097696e-08, "loss": 0.1842, "step": 13555 }, { "epoch": 0.97, "grad_norm": 1.2953732597589545, "learning_rate": 2.4104760856436538e-08, "loss": 0.1492, "step": 13556 }, { "epoch": 0.97, "grad_norm": 1.353893000896847, "learning_rate": 2.3991284145063465e-08, "loss": 0.1551, "step": 13557 }, { "epoch": 0.97, "grad_norm": 1.40933615701645, "learning_rate": 2.387807452607027e-08, "loss": 0.1677, "step": 13558 }, { "epoch": 0.97, "grad_norm": 1.1648837223117543, "learning_rate": 2.3765132005532654e-08, "loss": 0.1336, "step": 13559 }, { "epoch": 0.97, "grad_norm": 1.2435376472134816, "learning_rate": 2.3652456589512983e-08, "loss": 0.174, "step": 13560 }, { "epoch": 0.97, "grad_norm": 1.527629800209174, "learning_rate": 2.3540048284059758e-08, "loss": 0.1792, "step": 13561 }, { "epoch": 0.97, "grad_norm": 1.3682203102662622, "learning_rate": 2.3427907095205927e-08, "loss": 0.1337, "step": 13562 }, { "epoch": 0.97, "grad_norm": 1.4065334497810493, "learning_rate": 2.3316033028971672e-08, "loss": 0.1517, "step": 13563 }, { "epoch": 0.97, "grad_norm": 4.641842457643189, "learning_rate": 2.3204426091361087e-08, "loss": 0.5222, "step": 13564 }, { "epoch": 0.97, "grad_norm": 1.6858020574352042, "learning_rate": 2.3093086288365486e-08, "loss": 0.2067, "step": 13565 }, { "epoch": 0.97, "grad_norm": 1.5102376833356161, "learning_rate": 2.298201362596064e-08, "loss": 0.1865, "step": 13566 }, { "epoch": 0.97, "grad_norm": 1.3377594375223474, "learning_rate": 2.2871208110108457e-08, "loss": 0.1749, "step": 13567 }, { "epoch": 0.97, "grad_norm": 1.323008478608899, "learning_rate": 2.2760669746756947e-08, "loss": 0.142, "step": 13568 }, { "epoch": 0.97, "grad_norm": 1.2851556554603194, "learning_rate": 2.2650398541839147e-08, "loss": 0.1452, "step": 13569 }, { "epoch": 0.97, "grad_norm": 1.5506223029508786, "learning_rate": 2.254039450127421e-08, "loss": 0.2062, "step": 13570 }, { "epoch": 0.97, "grad_norm": 1.462420555545724, "learning_rate": 2.24306576309663e-08, "loss": 0.1888, "step": 13571 }, { "epoch": 0.97, "grad_norm": 1.4176010585166643, "learning_rate": 2.232118793680571e-08, "loss": 0.1737, "step": 13572 }, { "epoch": 0.97, "grad_norm": 1.370509904235416, "learning_rate": 2.2211985424668292e-08, "loss": 0.2076, "step": 13573 }, { "epoch": 0.97, "grad_norm": 1.2507334598061663, "learning_rate": 2.2103050100416023e-08, "loss": 0.1228, "step": 13574 }, { "epoch": 0.97, "grad_norm": 1.2975733931000102, "learning_rate": 2.199438196989645e-08, "loss": 0.1411, "step": 13575 }, { "epoch": 0.97, "grad_norm": 1.3106961880159813, "learning_rate": 2.188598103894102e-08, "loss": 0.1624, "step": 13576 }, { "epoch": 0.97, "grad_norm": 1.275428201541623, "learning_rate": 2.1777847313370072e-08, "loss": 0.1385, "step": 13577 }, { "epoch": 0.97, "grad_norm": 5.021685072078932, "learning_rate": 2.166998079898619e-08, "loss": 0.6221, "step": 13578 }, { "epoch": 0.97, "grad_norm": 1.6505374795838146, "learning_rate": 2.1562381501579742e-08, "loss": 0.231, "step": 13579 }, { "epoch": 0.97, "grad_norm": 1.2723782252385625, "learning_rate": 2.1455049426926666e-08, "loss": 0.1548, "step": 13580 }, { "epoch": 0.97, "grad_norm": 1.4079686129393176, "learning_rate": 2.1347984580787905e-08, "loss": 0.176, "step": 13581 }, { "epoch": 0.97, "grad_norm": 1.4057700186481081, "learning_rate": 2.1241186968909978e-08, "loss": 0.1799, "step": 13582 }, { "epoch": 0.97, "grad_norm": 1.5915269908275451, "learning_rate": 2.1134656597025515e-08, "loss": 0.1894, "step": 13583 }, { "epoch": 0.97, "grad_norm": 6.322932801834802, "learning_rate": 2.1028393470852726e-08, "loss": 0.4568, "step": 13584 }, { "epoch": 0.97, "grad_norm": 1.115610777201104, "learning_rate": 2.0922397596095934e-08, "loss": 0.12, "step": 13585 }, { "epoch": 0.97, "grad_norm": 1.4340244667987418, "learning_rate": 2.0816668978443366e-08, "loss": 0.1749, "step": 13586 }, { "epoch": 0.97, "grad_norm": 1.2748364596144908, "learning_rate": 2.071120762357104e-08, "loss": 0.1472, "step": 13587 }, { "epoch": 0.97, "grad_norm": 1.4396492205043323, "learning_rate": 2.0606013537139426e-08, "loss": 0.2077, "step": 13588 }, { "epoch": 0.97, "grad_norm": 1.3522635400638698, "learning_rate": 2.0501086724794562e-08, "loss": 0.1609, "step": 13589 }, { "epoch": 0.97, "grad_norm": 1.5862277454772007, "learning_rate": 2.0396427192169167e-08, "loss": 0.2063, "step": 13590 }, { "epoch": 0.97, "grad_norm": 1.1371611264516623, "learning_rate": 2.0292034944880967e-08, "loss": 0.1396, "step": 13591 }, { "epoch": 0.97, "grad_norm": 1.2887817169502267, "learning_rate": 2.018790998853215e-08, "loss": 0.1591, "step": 13592 }, { "epoch": 0.97, "grad_norm": 1.4724216636395278, "learning_rate": 2.0084052328713244e-08, "loss": 0.1678, "step": 13593 }, { "epoch": 0.97, "grad_norm": 1.166303051009998, "learning_rate": 1.998046197099812e-08, "loss": 0.1328, "step": 13594 }, { "epoch": 0.97, "grad_norm": 1.3836734178556176, "learning_rate": 1.9877138920947336e-08, "loss": 0.1661, "step": 13595 }, { "epoch": 0.97, "grad_norm": 1.3333959953656356, "learning_rate": 1.977408318410645e-08, "loss": 0.1908, "step": 13596 }, { "epoch": 0.97, "grad_norm": 1.4519152017831924, "learning_rate": 1.967129476600771e-08, "loss": 0.1864, "step": 13597 }, { "epoch": 0.97, "grad_norm": 1.2563387112455275, "learning_rate": 1.9568773672167808e-08, "loss": 0.1298, "step": 13598 }, { "epoch": 0.97, "grad_norm": 1.4986368972098423, "learning_rate": 1.946651990809012e-08, "loss": 0.1667, "step": 13599 }, { "epoch": 0.97, "grad_norm": 1.6021681789516407, "learning_rate": 1.9364533479263036e-08, "loss": 0.186, "step": 13600 }, { "epoch": 0.97, "grad_norm": 1.2442081121357813, "learning_rate": 1.9262814391160512e-08, "loss": 0.1705, "step": 13601 }, { "epoch": 0.97, "grad_norm": 1.2591588490206833, "learning_rate": 1.9161362649243175e-08, "loss": 0.1817, "step": 13602 }, { "epoch": 0.97, "grad_norm": 1.34088666759668, "learning_rate": 1.9060178258955565e-08, "loss": 0.1498, "step": 13603 }, { "epoch": 0.97, "grad_norm": 1.3504884940156416, "learning_rate": 1.8959261225729997e-08, "loss": 0.1376, "step": 13604 }, { "epoch": 0.97, "grad_norm": 1.4763908506576335, "learning_rate": 1.8858611554982143e-08, "loss": 0.1876, "step": 13605 }, { "epoch": 0.97, "grad_norm": 1.4522179504093689, "learning_rate": 1.8758229252114902e-08, "loss": 0.2006, "step": 13606 }, { "epoch": 0.97, "grad_norm": 1.3870830387503175, "learning_rate": 1.8658114322516742e-08, "loss": 0.154, "step": 13607 }, { "epoch": 0.97, "grad_norm": 1.2696722845007449, "learning_rate": 1.855826677156114e-08, "loss": 0.1837, "step": 13608 }, { "epoch": 0.97, "grad_norm": 1.4218807358969392, "learning_rate": 1.84586866046077e-08, "loss": 0.1683, "step": 13609 }, { "epoch": 0.97, "grad_norm": 1.3104689302336376, "learning_rate": 1.8359373827001037e-08, "loss": 0.1515, "step": 13610 }, { "epoch": 0.97, "grad_norm": 1.2619490552277752, "learning_rate": 1.826032844407244e-08, "loss": 0.1357, "step": 13611 }, { "epoch": 0.97, "grad_norm": 1.2678489252363436, "learning_rate": 1.8161550461137657e-08, "loss": 0.1521, "step": 13612 }, { "epoch": 0.97, "grad_norm": 1.3738406959803933, "learning_rate": 1.8063039883499668e-08, "loss": 0.1877, "step": 13613 }, { "epoch": 0.97, "grad_norm": 1.2103895848046795, "learning_rate": 1.79647967164448e-08, "loss": 0.1513, "step": 13614 }, { "epoch": 0.97, "grad_norm": 1.3599412761436713, "learning_rate": 1.7866820965247168e-08, "loss": 0.1392, "step": 13615 }, { "epoch": 0.97, "grad_norm": 1.6249835013724747, "learning_rate": 1.7769112635166453e-08, "loss": 0.1704, "step": 13616 }, { "epoch": 0.97, "grad_norm": 5.957354925186498, "learning_rate": 1.7671671731445682e-08, "loss": 0.6573, "step": 13617 }, { "epoch": 0.97, "grad_norm": 1.2429284440111148, "learning_rate": 1.757449825931623e-08, "loss": 0.1696, "step": 13618 }, { "epoch": 0.97, "grad_norm": 1.4260422732158284, "learning_rate": 1.7477592223992812e-08, "loss": 0.1883, "step": 13619 }, { "epoch": 0.97, "grad_norm": 1.3387779045222323, "learning_rate": 1.7380953630678488e-08, "loss": 0.1432, "step": 13620 }, { "epoch": 0.97, "grad_norm": 5.092783678754347, "learning_rate": 1.7284582484559665e-08, "loss": 0.5289, "step": 13621 }, { "epoch": 0.97, "grad_norm": 1.2509832713388866, "learning_rate": 1.718847879080887e-08, "loss": 0.1478, "step": 13622 }, { "epoch": 0.97, "grad_norm": 1.3551402677435755, "learning_rate": 1.709264255458476e-08, "loss": 0.1565, "step": 13623 }, { "epoch": 0.97, "grad_norm": 1.3422694794441525, "learning_rate": 1.6997073781032104e-08, "loss": 0.1787, "step": 13624 }, { "epoch": 0.97, "grad_norm": 7.607756781975114, "learning_rate": 1.6901772475279575e-08, "loss": 0.6996, "step": 13625 }, { "epoch": 0.97, "grad_norm": 1.398580887468457, "learning_rate": 1.680673864244309e-08, "loss": 0.1527, "step": 13626 }, { "epoch": 0.97, "grad_norm": 1.277015634211775, "learning_rate": 1.671197228762411e-08, "loss": 0.1448, "step": 13627 }, { "epoch": 0.97, "grad_norm": 1.482700690368156, "learning_rate": 1.661747341590858e-08, "loss": 0.194, "step": 13628 }, { "epoch": 0.97, "grad_norm": 1.5557842591039206, "learning_rate": 1.6523242032369658e-08, "loss": 0.1923, "step": 13629 }, { "epoch": 0.97, "grad_norm": 1.70891279574617, "learning_rate": 1.6429278142063853e-08, "loss": 0.1571, "step": 13630 }, { "epoch": 0.98, "grad_norm": 1.3731601703717145, "learning_rate": 1.6335581750036578e-08, "loss": 0.1778, "step": 13631 }, { "epoch": 0.98, "grad_norm": 1.3757931445094258, "learning_rate": 1.624215286131603e-08, "loss": 0.1448, "step": 13632 }, { "epoch": 0.98, "grad_norm": 1.2359218706321575, "learning_rate": 1.6148991480917086e-08, "loss": 0.1522, "step": 13633 }, { "epoch": 0.98, "grad_norm": 5.726410027981581, "learning_rate": 1.605609761384075e-08, "loss": 0.5658, "step": 13634 }, { "epoch": 0.98, "grad_norm": 1.5818416633369334, "learning_rate": 1.5963471265072472e-08, "loss": 0.1914, "step": 13635 }, { "epoch": 0.98, "grad_norm": 1.4887617997817009, "learning_rate": 1.5871112439584946e-08, "loss": 0.188, "step": 13636 }, { "epoch": 0.98, "grad_norm": 1.5783026153143933, "learning_rate": 1.5779021142335316e-08, "loss": 0.2248, "step": 13637 }, { "epoch": 0.98, "grad_norm": 1.28243782167815, "learning_rate": 1.5687197378266296e-08, "loss": 0.1547, "step": 13638 }, { "epoch": 0.98, "grad_norm": 1.491263138746027, "learning_rate": 1.559564115230672e-08, "loss": 0.2329, "step": 13639 }, { "epoch": 0.98, "grad_norm": 1.3412353556003431, "learning_rate": 1.5504352469371543e-08, "loss": 0.1698, "step": 13640 }, { "epoch": 0.98, "grad_norm": 7.01429030309989, "learning_rate": 1.541333133436018e-08, "loss": 0.5715, "step": 13641 }, { "epoch": 0.98, "grad_norm": 5.831156879732491, "learning_rate": 1.5322577752158175e-08, "loss": 0.5884, "step": 13642 }, { "epoch": 0.98, "grad_norm": 1.2958416136460544, "learning_rate": 1.5232091727637734e-08, "loss": 0.1613, "step": 13643 }, { "epoch": 0.98, "grad_norm": 1.2502241942707604, "learning_rate": 1.5141873265654973e-08, "loss": 0.1576, "step": 13644 }, { "epoch": 0.98, "grad_norm": 1.3086153672252985, "learning_rate": 1.5051922371052685e-08, "loss": 0.1669, "step": 13645 }, { "epoch": 0.98, "grad_norm": 1.4330177714146035, "learning_rate": 1.4962239048658677e-08, "loss": 0.1604, "step": 13646 }, { "epoch": 0.98, "grad_norm": 1.4866202884983841, "learning_rate": 1.4872823303287431e-08, "loss": 0.1837, "step": 13647 }, { "epoch": 0.98, "grad_norm": 1.4817413513364268, "learning_rate": 1.478367513973844e-08, "loss": 0.1731, "step": 13648 }, { "epoch": 0.98, "grad_norm": 1.3986636947498032, "learning_rate": 1.4694794562796211e-08, "loss": 0.181, "step": 13649 }, { "epoch": 0.98, "grad_norm": 1.4873203935920392, "learning_rate": 1.4606181577231927e-08, "loss": 0.1482, "step": 13650 }, { "epoch": 0.98, "grad_norm": 1.284585597745238, "learning_rate": 1.4517836187802337e-08, "loss": 0.1417, "step": 13651 }, { "epoch": 0.98, "grad_norm": 1.2932031321709383, "learning_rate": 1.4429758399248094e-08, "loss": 0.1441, "step": 13652 }, { "epoch": 0.98, "grad_norm": 1.2561672503447745, "learning_rate": 1.434194821629875e-08, "loss": 0.1676, "step": 13653 }, { "epoch": 0.98, "grad_norm": 1.3874813212949326, "learning_rate": 1.4254405643666092e-08, "loss": 0.2054, "step": 13654 }, { "epoch": 0.98, "grad_norm": 1.8281917446366627, "learning_rate": 1.416713068604969e-08, "loss": 0.1868, "step": 13655 }, { "epoch": 0.98, "grad_norm": 1.2495077626406412, "learning_rate": 1.4080123348134133e-08, "loss": 0.1484, "step": 13656 }, { "epoch": 0.98, "grad_norm": 1.3787498720016917, "learning_rate": 1.3993383634589575e-08, "loss": 0.1544, "step": 13657 }, { "epoch": 0.98, "grad_norm": 1.3549737262384072, "learning_rate": 1.3906911550071734e-08, "loss": 0.1648, "step": 13658 }, { "epoch": 0.98, "grad_norm": 1.2818011836952403, "learning_rate": 1.3820707099222453e-08, "loss": 0.1702, "step": 13659 }, { "epoch": 0.98, "grad_norm": 1.4021808740955757, "learning_rate": 1.373477028666803e-08, "loss": 0.1556, "step": 13660 }, { "epoch": 0.98, "grad_norm": 1.171152481828228, "learning_rate": 1.3649101117022002e-08, "loss": 0.1322, "step": 13661 }, { "epoch": 0.98, "grad_norm": 4.392952745961978, "learning_rate": 1.3563699594882907e-08, "loss": 0.5976, "step": 13662 }, { "epoch": 0.98, "grad_norm": 1.1817304071105879, "learning_rate": 1.3478565724833747e-08, "loss": 0.1381, "step": 13663 }, { "epoch": 0.98, "grad_norm": 5.802152022118856, "learning_rate": 1.3393699511445314e-08, "loss": 0.5644, "step": 13664 }, { "epoch": 0.98, "grad_norm": 1.2840154929253358, "learning_rate": 1.3309100959271737e-08, "loss": 0.1852, "step": 13665 }, { "epoch": 0.98, "grad_norm": 1.4299286498283428, "learning_rate": 1.3224770072854942e-08, "loss": 0.1513, "step": 13666 }, { "epoch": 0.98, "grad_norm": 1.5649447232021354, "learning_rate": 1.3140706856721307e-08, "loss": 0.2265, "step": 13667 }, { "epoch": 0.98, "grad_norm": 1.2365933082157086, "learning_rate": 1.3056911315382226e-08, "loss": 0.1586, "step": 13668 }, { "epoch": 0.98, "grad_norm": 1.2718908973097662, "learning_rate": 1.297338345333632e-08, "loss": 0.1768, "step": 13669 }, { "epoch": 0.98, "grad_norm": 1.2849420592441336, "learning_rate": 1.2890123275067224e-08, "loss": 0.1337, "step": 13670 }, { "epoch": 0.98, "grad_norm": 1.4040937369768771, "learning_rate": 1.2807130785043031e-08, "loss": 0.1943, "step": 13671 }, { "epoch": 0.98, "grad_norm": 1.2769576609827307, "learning_rate": 1.2724405987719069e-08, "loss": 0.1788, "step": 13672 }, { "epoch": 0.98, "grad_norm": 1.4783892473952724, "learning_rate": 1.264194888753567e-08, "loss": 0.173, "step": 13673 }, { "epoch": 0.98, "grad_norm": 1.4050780206677376, "learning_rate": 1.2559759488918743e-08, "loss": 0.1594, "step": 13674 }, { "epoch": 0.98, "grad_norm": 1.3470650374524797, "learning_rate": 1.2477837796280312e-08, "loss": 0.1628, "step": 13675 }, { "epoch": 0.98, "grad_norm": 1.4469025340298471, "learning_rate": 1.239618381401686e-08, "loss": 0.1834, "step": 13676 }, { "epoch": 0.98, "grad_norm": 5.439013780601163, "learning_rate": 1.2314797546510992e-08, "loss": 0.4704, "step": 13677 }, { "epoch": 0.98, "grad_norm": 1.4365031854155996, "learning_rate": 1.2233678998132548e-08, "loss": 0.1742, "step": 13678 }, { "epoch": 0.98, "grad_norm": 1.2454433950419583, "learning_rate": 1.2152828173234155e-08, "loss": 0.1527, "step": 13679 }, { "epoch": 0.98, "grad_norm": 1.30664595558892, "learning_rate": 1.2072245076156786e-08, "loss": 0.1675, "step": 13680 }, { "epoch": 0.98, "grad_norm": 1.3265562309851793, "learning_rate": 1.1991929711225314e-08, "loss": 0.1087, "step": 13681 }, { "epoch": 0.98, "grad_norm": 1.419309967487841, "learning_rate": 1.1911882082750736e-08, "loss": 0.1896, "step": 13682 }, { "epoch": 0.98, "grad_norm": 1.253065982253021, "learning_rate": 1.1832102195029615e-08, "loss": 0.1417, "step": 13683 }, { "epoch": 0.98, "grad_norm": 1.4424089615964648, "learning_rate": 1.1752590052344081e-08, "loss": 0.1847, "step": 13684 }, { "epoch": 0.98, "grad_norm": 1.4326565573057186, "learning_rate": 1.1673345658962388e-08, "loss": 0.1571, "step": 13685 }, { "epoch": 0.98, "grad_norm": 1.488183356945678, "learning_rate": 1.1594369019138352e-08, "loss": 0.154, "step": 13686 }, { "epoch": 0.98, "grad_norm": 1.4508986156085968, "learning_rate": 1.1515660137110252e-08, "loss": 0.1662, "step": 13687 }, { "epoch": 0.98, "grad_norm": 1.4102147912612923, "learning_rate": 1.1437219017103596e-08, "loss": 0.1705, "step": 13688 }, { "epoch": 0.98, "grad_norm": 1.2619362175231899, "learning_rate": 1.135904566332835e-08, "loss": 0.1623, "step": 13689 }, { "epoch": 0.98, "grad_norm": 1.1532689969027141, "learning_rate": 1.1281140079980602e-08, "loss": 0.1479, "step": 13690 }, { "epoch": 0.98, "grad_norm": 1.359846341065935, "learning_rate": 1.1203502271242006e-08, "loss": 0.1484, "step": 13691 }, { "epoch": 0.98, "grad_norm": 1.3963050551267666, "learning_rate": 1.1126132241280342e-08, "loss": 0.1922, "step": 13692 }, { "epoch": 0.98, "grad_norm": 5.538547415771429, "learning_rate": 1.1049029994248395e-08, "loss": 0.7705, "step": 13693 }, { "epoch": 0.98, "grad_norm": 1.412139762557859, "learning_rate": 1.097219553428397e-08, "loss": 0.1855, "step": 13694 }, { "epoch": 0.98, "grad_norm": 7.174411274526148, "learning_rate": 1.0895628865512099e-08, "loss": 0.6033, "step": 13695 }, { "epoch": 0.98, "grad_norm": 6.941993593021459, "learning_rate": 1.0819329992041715e-08, "loss": 0.5794, "step": 13696 }, { "epoch": 0.98, "grad_norm": 4.665516705063931, "learning_rate": 1.0743298917968992e-08, "loss": 0.6437, "step": 13697 }, { "epoch": 0.98, "grad_norm": 1.3171863462035416, "learning_rate": 1.066753564737455e-08, "loss": 0.1633, "step": 13698 }, { "epoch": 0.98, "grad_norm": 1.488037106473797, "learning_rate": 1.0592040184325691e-08, "loss": 0.2197, "step": 13699 }, { "epoch": 0.98, "grad_norm": 1.3146266555087547, "learning_rate": 1.0516812532873622e-08, "loss": 0.1828, "step": 13700 }, { "epoch": 0.98, "grad_norm": 1.8944955460657231, "learning_rate": 1.0441852697057331e-08, "loss": 0.2096, "step": 13701 }, { "epoch": 0.98, "grad_norm": 1.1950643756463852, "learning_rate": 1.0367160680899712e-08, "loss": 0.1191, "step": 13702 }, { "epoch": 0.98, "grad_norm": 1.0103163851092223, "learning_rate": 1.0292736488410338e-08, "loss": 0.1091, "step": 13703 }, { "epoch": 0.98, "grad_norm": 1.2977024662123242, "learning_rate": 1.0218580123583788e-08, "loss": 0.1816, "step": 13704 }, { "epoch": 0.98, "grad_norm": 1.333465346668352, "learning_rate": 1.0144691590400768e-08, "loss": 0.16, "step": 13705 }, { "epoch": 0.98, "grad_norm": 1.4560829074450061, "learning_rate": 1.007107089282644e-08, "loss": 0.1832, "step": 13706 }, { "epoch": 0.98, "grad_norm": 5.580717367412487, "learning_rate": 9.997718034813197e-09, "loss": 0.4536, "step": 13707 }, { "epoch": 0.98, "grad_norm": 1.6792400841646018, "learning_rate": 9.924633020298446e-09, "loss": 0.1925, "step": 13708 }, { "epoch": 0.98, "grad_norm": 1.1682522489313207, "learning_rate": 9.85181585320516e-09, "loss": 0.1474, "step": 13709 }, { "epoch": 0.98, "grad_norm": 1.275167987637848, "learning_rate": 9.779266537441323e-09, "loss": 0.1605, "step": 13710 }, { "epoch": 0.98, "grad_norm": 1.1430458150342195, "learning_rate": 9.706985076901043e-09, "loss": 0.1225, "step": 13711 }, { "epoch": 0.98, "grad_norm": 1.3836941751382688, "learning_rate": 9.63497147546455e-09, "loss": 0.1672, "step": 13712 }, { "epoch": 0.98, "grad_norm": 1.5474968334224275, "learning_rate": 9.563225736997638e-09, "loss": 0.2042, "step": 13713 }, { "epoch": 0.98, "grad_norm": 1.2862071068011836, "learning_rate": 9.491747865350564e-09, "loss": 0.1637, "step": 13714 }, { "epoch": 0.98, "grad_norm": 1.3945164789340625, "learning_rate": 9.420537864359703e-09, "loss": 0.1689, "step": 13715 }, { "epoch": 0.98, "grad_norm": 1.2973386921974353, "learning_rate": 9.349595737848105e-09, "loss": 0.1466, "step": 13716 }, { "epoch": 0.98, "grad_norm": 1.1836265285983647, "learning_rate": 9.278921489623838e-09, "loss": 0.1158, "step": 13717 }, { "epoch": 0.98, "grad_norm": 1.311131786681849, "learning_rate": 9.208515123479978e-09, "loss": 0.1411, "step": 13718 }, { "epoch": 0.98, "grad_norm": 1.1869810888984487, "learning_rate": 9.138376643195168e-09, "loss": 0.1644, "step": 13719 }, { "epoch": 0.98, "grad_norm": 1.4224385635842964, "learning_rate": 9.068506052534732e-09, "loss": 0.1632, "step": 13720 }, { "epoch": 0.98, "grad_norm": 1.393752525969081, "learning_rate": 8.998903355249556e-09, "loss": 0.1678, "step": 13721 }, { "epoch": 0.98, "grad_norm": 1.4300052794041918, "learning_rate": 8.92956855507443e-09, "loss": 0.1542, "step": 13722 }, { "epoch": 0.98, "grad_norm": 4.548638313269367, "learning_rate": 8.860501655731379e-09, "loss": 0.476, "step": 13723 }, { "epoch": 0.98, "grad_norm": 1.1525192944754574, "learning_rate": 8.791702660928547e-09, "loss": 0.1448, "step": 13724 }, { "epoch": 0.98, "grad_norm": 1.4760506281235386, "learning_rate": 8.723171574357426e-09, "loss": 0.1626, "step": 13725 }, { "epoch": 0.98, "grad_norm": 8.255571970163762, "learning_rate": 8.654908399697848e-09, "loss": 0.5562, "step": 13726 }, { "epoch": 0.98, "grad_norm": 1.3349079459426851, "learning_rate": 8.586913140612996e-09, "loss": 0.1928, "step": 13727 }, { "epoch": 0.98, "grad_norm": 1.3700897179093627, "learning_rate": 8.519185800752727e-09, "loss": 0.1699, "step": 13728 }, { "epoch": 0.98, "grad_norm": 1.3145531214857635, "learning_rate": 8.451726383752468e-09, "loss": 0.1523, "step": 13729 }, { "epoch": 0.98, "grad_norm": 1.3461762697053308, "learning_rate": 8.384534893233209e-09, "loss": 0.1554, "step": 13730 }, { "epoch": 0.98, "grad_norm": 1.2291045194725614, "learning_rate": 8.317611332801512e-09, "loss": 0.1517, "step": 13731 }, { "epoch": 0.98, "grad_norm": 1.3675255427154311, "learning_rate": 8.250955706049502e-09, "loss": 0.1602, "step": 13732 }, { "epoch": 0.98, "grad_norm": 1.289106167270459, "learning_rate": 8.184568016554872e-09, "loss": 0.1596, "step": 13733 }, { "epoch": 0.98, "grad_norm": 1.2813556293207777, "learning_rate": 8.11844826788144e-09, "loss": 0.166, "step": 13734 }, { "epoch": 0.98, "grad_norm": 1.2719586337953566, "learning_rate": 8.052596463577477e-09, "loss": 0.1513, "step": 13735 }, { "epoch": 0.98, "grad_norm": 5.227281161319752, "learning_rate": 7.987012607179046e-09, "loss": 0.396, "step": 13736 }, { "epoch": 0.98, "grad_norm": 1.2950775116329474, "learning_rate": 7.921696702204995e-09, "loss": 0.152, "step": 13737 }, { "epoch": 0.98, "grad_norm": 1.0928852801904914, "learning_rate": 7.856648752161966e-09, "loss": 0.1413, "step": 13738 }, { "epoch": 0.98, "grad_norm": 1.5318661272977012, "learning_rate": 7.791868760541055e-09, "loss": 0.1874, "step": 13739 }, { "epoch": 0.98, "grad_norm": 1.3761890682488607, "learning_rate": 7.727356730820035e-09, "loss": 0.1852, "step": 13740 }, { "epoch": 0.98, "grad_norm": 1.4822769714589838, "learning_rate": 7.66311266646058e-09, "loss": 0.2208, "step": 13741 }, { "epoch": 0.98, "grad_norm": 1.2489083369739473, "learning_rate": 7.599136570912713e-09, "loss": 0.1471, "step": 13742 }, { "epoch": 0.98, "grad_norm": 1.3381313753926616, "learning_rate": 7.535428447608683e-09, "loss": 0.156, "step": 13743 }, { "epoch": 0.98, "grad_norm": 1.596558459246474, "learning_rate": 7.47198829996909e-09, "loss": 0.2048, "step": 13744 }, { "epoch": 0.98, "grad_norm": 1.4135431346394731, "learning_rate": 7.408816131398988e-09, "loss": 0.1843, "step": 13745 }, { "epoch": 0.98, "grad_norm": 1.3830020049533036, "learning_rate": 7.345911945288997e-09, "loss": 0.1783, "step": 13746 }, { "epoch": 0.98, "grad_norm": 1.5241591561959147, "learning_rate": 7.283275745015861e-09, "loss": 0.1955, "step": 13747 }, { "epoch": 0.98, "grad_norm": 12.828411825211115, "learning_rate": 7.220907533941335e-09, "loss": 0.5722, "step": 13748 }, { "epoch": 0.98, "grad_norm": 1.2432314711460515, "learning_rate": 7.158807315412741e-09, "loss": 0.1618, "step": 13749 }, { "epoch": 0.98, "grad_norm": 1.2423568703940722, "learning_rate": 7.096975092764636e-09, "loss": 0.1339, "step": 13750 }, { "epoch": 0.98, "grad_norm": 1.245637178702311, "learning_rate": 7.035410869314918e-09, "loss": 0.1441, "step": 13751 }, { "epoch": 0.98, "grad_norm": 1.3596460615185115, "learning_rate": 6.974114648368169e-09, "loss": 0.1551, "step": 13752 }, { "epoch": 0.98, "grad_norm": 7.632297140446915, "learning_rate": 6.913086433215088e-09, "loss": 0.5309, "step": 13753 }, { "epoch": 0.98, "grad_norm": 1.3802793238812316, "learning_rate": 6.852326227130835e-09, "loss": 0.1928, "step": 13754 }, { "epoch": 0.98, "grad_norm": 1.3730332427384633, "learning_rate": 6.791834033376688e-09, "loss": 0.167, "step": 13755 }, { "epoch": 0.98, "grad_norm": 1.3396676914975802, "learning_rate": 6.7316098552000495e-09, "loss": 0.1711, "step": 13756 }, { "epoch": 0.98, "grad_norm": 1.335828160071708, "learning_rate": 6.6716536958338905e-09, "loss": 0.1537, "step": 13757 }, { "epoch": 0.98, "grad_norm": 1.3572897822043613, "learning_rate": 6.611965558495637e-09, "loss": 0.1557, "step": 13758 }, { "epoch": 0.98, "grad_norm": 1.3616984237363876, "learning_rate": 6.552545446389391e-09, "loss": 0.131, "step": 13759 }, { "epoch": 0.98, "grad_norm": 1.339915040423464, "learning_rate": 6.49339336270427e-09, "loss": 0.1804, "step": 13760 }, { "epoch": 0.98, "grad_norm": 7.488336141687242, "learning_rate": 6.434509310616066e-09, "loss": 0.5782, "step": 13761 }, { "epoch": 0.98, "grad_norm": 1.2834270414653823, "learning_rate": 6.375893293284474e-09, "loss": 0.1824, "step": 13762 }, { "epoch": 0.98, "grad_norm": 1.4529077352286064, "learning_rate": 6.3175453138569765e-09, "loss": 0.1757, "step": 13763 }, { "epoch": 0.98, "grad_norm": 1.0778138032486184, "learning_rate": 6.259465375464402e-09, "loss": 0.1414, "step": 13764 }, { "epoch": 0.98, "grad_norm": 1.2504729875014426, "learning_rate": 6.201653481224812e-09, "loss": 0.1778, "step": 13765 }, { "epoch": 0.98, "grad_norm": 1.524317093784593, "learning_rate": 6.14410963424128e-09, "loss": 0.1727, "step": 13766 }, { "epoch": 0.98, "grad_norm": 1.3002314894107676, "learning_rate": 6.086833837601891e-09, "loss": 0.1652, "step": 13767 }, { "epoch": 0.98, "grad_norm": 1.412010258639999, "learning_rate": 6.029826094381963e-09, "loss": 0.1682, "step": 13768 }, { "epoch": 0.98, "grad_norm": 1.604111773345135, "learning_rate": 5.9730864076407155e-09, "loss": 0.1827, "step": 13769 }, { "epoch": 0.98, "grad_norm": 1.441446528046009, "learning_rate": 5.916614780424046e-09, "loss": 0.1989, "step": 13770 }, { "epoch": 0.99, "grad_norm": 1.4225681161413641, "learning_rate": 5.8604112157634176e-09, "loss": 0.1664, "step": 13771 }, { "epoch": 0.99, "grad_norm": 1.257400145508779, "learning_rate": 5.8044757166747515e-09, "loss": 0.1355, "step": 13772 }, { "epoch": 0.99, "grad_norm": 1.5911716864761534, "learning_rate": 5.7488082861612005e-09, "loss": 0.1467, "step": 13773 }, { "epoch": 0.99, "grad_norm": 1.357925181368716, "learning_rate": 5.693408927210376e-09, "loss": 0.1566, "step": 13774 }, { "epoch": 0.99, "grad_norm": 1.3351715466857323, "learning_rate": 5.6382776427960085e-09, "loss": 0.1731, "step": 13775 }, { "epoch": 0.99, "grad_norm": 1.3736141821497354, "learning_rate": 5.583414435877399e-09, "loss": 0.1774, "step": 13776 }, { "epoch": 0.99, "grad_norm": 1.4220336920638375, "learning_rate": 5.5288193093999685e-09, "loss": 0.2062, "step": 13777 }, { "epoch": 0.99, "grad_norm": 1.2236400141963666, "learning_rate": 5.474492266292486e-09, "loss": 0.1441, "step": 13778 }, { "epoch": 0.99, "grad_norm": 1.7857313572670095, "learning_rate": 5.420433309472617e-09, "loss": 0.1771, "step": 13779 }, { "epoch": 0.99, "grad_norm": 1.469384947483464, "learning_rate": 5.366642441841374e-09, "loss": 0.1514, "step": 13780 }, { "epoch": 0.99, "grad_norm": 1.3329658960211284, "learning_rate": 5.313119666286448e-09, "loss": 0.1746, "step": 13781 }, { "epoch": 0.99, "grad_norm": 1.3244398817176808, "learning_rate": 5.259864985679985e-09, "loss": 0.1579, "step": 13782 }, { "epoch": 0.99, "grad_norm": 1.4639015529504087, "learning_rate": 5.20687840288081e-09, "loss": 0.179, "step": 13783 }, { "epoch": 0.99, "grad_norm": 1.452927532342113, "learning_rate": 5.15415992073387e-09, "loss": 0.1764, "step": 13784 }, { "epoch": 0.99, "grad_norm": 1.1497458903228337, "learning_rate": 5.101709542067457e-09, "loss": 0.1445, "step": 13785 }, { "epoch": 0.99, "grad_norm": 1.2946875451041968, "learning_rate": 5.0495272696976515e-09, "loss": 0.1703, "step": 13786 }, { "epoch": 0.99, "grad_norm": 1.341664595397723, "learning_rate": 4.9976131064255475e-09, "loss": 0.1771, "step": 13787 }, { "epoch": 0.99, "grad_norm": 1.5134442351925002, "learning_rate": 4.945967055037803e-09, "loss": 0.1581, "step": 13788 }, { "epoch": 0.99, "grad_norm": 1.3872128808359643, "learning_rate": 4.8945891183055374e-09, "loss": 0.1718, "step": 13789 }, { "epoch": 0.99, "grad_norm": 1.4643336406867975, "learning_rate": 4.843479298987652e-09, "loss": 0.1742, "step": 13790 }, { "epoch": 0.99, "grad_norm": 1.4236404793807012, "learning_rate": 4.792637599826954e-09, "loss": 0.2016, "step": 13791 }, { "epoch": 0.99, "grad_norm": 1.5668141598943903, "learning_rate": 4.742064023552928e-09, "loss": 0.1795, "step": 13792 }, { "epoch": 0.99, "grad_norm": 1.2328018099672282, "learning_rate": 4.691758572879512e-09, "loss": 0.1688, "step": 13793 }, { "epoch": 0.99, "grad_norm": 1.2962102341211879, "learning_rate": 4.64172125050677e-09, "loss": 0.1638, "step": 13794 }, { "epoch": 0.99, "grad_norm": 1.2411754179759507, "learning_rate": 4.591952059121441e-09, "loss": 0.1416, "step": 13795 }, { "epoch": 0.99, "grad_norm": 1.2895239430957919, "learning_rate": 4.542451001394721e-09, "loss": 0.1468, "step": 13796 }, { "epoch": 0.99, "grad_norm": 5.4972244028484365, "learning_rate": 4.49321807998282e-09, "loss": 0.3848, "step": 13797 }, { "epoch": 0.99, "grad_norm": 1.394042904006588, "learning_rate": 4.444253297528622e-09, "loss": 0.1618, "step": 13798 }, { "epoch": 0.99, "grad_norm": 1.3155024722609006, "learning_rate": 4.395556656661138e-09, "loss": 0.1686, "step": 13799 }, { "epoch": 0.99, "grad_norm": 1.5044844397560435, "learning_rate": 4.347128159993829e-09, "loss": 0.1547, "step": 13800 }, { "epoch": 0.99, "grad_norm": 1.3640647844849467, "learning_rate": 4.298967810125176e-09, "loss": 0.1709, "step": 13801 }, { "epoch": 0.99, "grad_norm": 1.3016381806809318, "learning_rate": 4.251075609641997e-09, "loss": 0.1808, "step": 13802 }, { "epoch": 0.99, "grad_norm": 1.2635510496716096, "learning_rate": 4.203451561113347e-09, "loss": 0.1338, "step": 13803 }, { "epoch": 0.99, "grad_norm": 1.3457264783451994, "learning_rate": 4.156095667096627e-09, "loss": 0.1609, "step": 13804 }, { "epoch": 0.99, "grad_norm": 1.3491570976060996, "learning_rate": 4.109007930133136e-09, "loss": 0.1322, "step": 13805 }, { "epoch": 0.99, "grad_norm": 7.276806054389065, "learning_rate": 4.062188352750296e-09, "loss": 0.4151, "step": 13806 }, { "epoch": 0.99, "grad_norm": 1.4233562060078815, "learning_rate": 4.015636937461654e-09, "loss": 0.1451, "step": 13807 }, { "epoch": 0.99, "grad_norm": 1.4262538640117528, "learning_rate": 3.9693536867652096e-09, "loss": 0.1628, "step": 13808 }, { "epoch": 0.99, "grad_norm": 1.3219953544239822, "learning_rate": 3.923338603146198e-09, "loss": 0.1525, "step": 13809 }, { "epoch": 0.99, "grad_norm": 1.239379911562932, "learning_rate": 3.877591689073756e-09, "loss": 0.1789, "step": 13810 }, { "epoch": 0.99, "grad_norm": 1.2982096725138397, "learning_rate": 3.832112947003697e-09, "loss": 0.1922, "step": 13811 }, { "epoch": 0.99, "grad_norm": 1.4406857720172193, "learning_rate": 3.786902379376844e-09, "loss": 0.2045, "step": 13812 }, { "epoch": 0.99, "grad_norm": 1.3810198627663803, "learning_rate": 3.741959988620147e-09, "loss": 0.187, "step": 13813 }, { "epoch": 0.99, "grad_norm": 6.565326891081775, "learning_rate": 3.697285777146675e-09, "loss": 0.5557, "step": 13814 }, { "epoch": 0.99, "grad_norm": 1.3634287249816766, "learning_rate": 3.6528797473528444e-09, "loss": 0.1538, "step": 13815 }, { "epoch": 0.99, "grad_norm": 1.180420957823594, "learning_rate": 3.6087419016234138e-09, "loss": 0.1623, "step": 13816 }, { "epoch": 0.99, "grad_norm": 1.3549995143170401, "learning_rate": 3.5648722423270443e-09, "loss": 0.1849, "step": 13817 }, { "epoch": 0.99, "grad_norm": 1.419438213368951, "learning_rate": 3.521270771818519e-09, "loss": 0.1618, "step": 13818 }, { "epoch": 0.99, "grad_norm": 1.3854194785045055, "learning_rate": 3.477937492438188e-09, "loss": 0.2154, "step": 13819 }, { "epoch": 0.99, "grad_norm": 5.857489542091022, "learning_rate": 3.4348724065119687e-09, "loss": 0.5328, "step": 13820 }, { "epoch": 0.99, "grad_norm": 1.4054028283122966, "learning_rate": 3.3920755163513452e-09, "loss": 0.2015, "step": 13821 }, { "epoch": 0.99, "grad_norm": 1.4328396991498686, "learning_rate": 3.349546824253924e-09, "loss": 0.1644, "step": 13822 }, { "epoch": 0.99, "grad_norm": 1.6048744891724689, "learning_rate": 3.307286332502324e-09, "loss": 0.1818, "step": 13823 }, { "epoch": 0.99, "grad_norm": 1.9169040545962999, "learning_rate": 3.2652940433647307e-09, "loss": 0.204, "step": 13824 }, { "epoch": 0.99, "grad_norm": 1.2049123804937716, "learning_rate": 3.2235699590948966e-09, "loss": 0.169, "step": 13825 }, { "epoch": 0.99, "grad_norm": 1.2746606210826756, "learning_rate": 3.1821140819326966e-09, "loss": 0.1965, "step": 13826 }, { "epoch": 0.99, "grad_norm": 1.3407719254261528, "learning_rate": 3.1409264141035733e-09, "loss": 0.1721, "step": 13827 }, { "epoch": 0.99, "grad_norm": 1.4506271364821373, "learning_rate": 3.1000069578179803e-09, "loss": 0.1884, "step": 13828 }, { "epoch": 0.99, "grad_norm": 1.4971809970222851, "learning_rate": 3.059355715272494e-09, "loss": 0.1954, "step": 13829 }, { "epoch": 0.99, "grad_norm": 7.978356842968884, "learning_rate": 3.0189726886487026e-09, "loss": 0.6222, "step": 13830 }, { "epoch": 0.99, "grad_norm": 1.4012262409401515, "learning_rate": 2.9788578801148717e-09, "loss": 0.1834, "step": 13831 }, { "epoch": 0.99, "grad_norm": 1.3882680803367355, "learning_rate": 2.9390112918231684e-09, "loss": 0.1603, "step": 13832 }, { "epoch": 0.99, "grad_norm": 1.440990790482852, "learning_rate": 2.8994329259135477e-09, "loss": 0.1478, "step": 13833 }, { "epoch": 0.99, "grad_norm": 1.304334709826213, "learning_rate": 2.8601227845098667e-09, "loss": 0.188, "step": 13834 }, { "epoch": 0.99, "grad_norm": 1.415511835647568, "learning_rate": 2.8210808697226586e-09, "loss": 0.2173, "step": 13835 }, { "epoch": 0.99, "grad_norm": 1.395973490321563, "learning_rate": 2.78230718364636e-09, "loss": 0.1599, "step": 13836 }, { "epoch": 0.99, "grad_norm": 1.2888119754190626, "learning_rate": 2.7438017283631936e-09, "loss": 0.15, "step": 13837 }, { "epoch": 0.99, "grad_norm": 4.630092657364098, "learning_rate": 2.7055645059398393e-09, "loss": 0.5132, "step": 13838 }, { "epoch": 0.99, "grad_norm": 1.3054035283267547, "learning_rate": 2.667595518428545e-09, "loss": 0.143, "step": 13839 }, { "epoch": 0.99, "grad_norm": 1.3884604685213746, "learning_rate": 2.62989476786768e-09, "loss": 0.1692, "step": 13840 }, { "epoch": 0.99, "grad_norm": 1.5262629410272037, "learning_rate": 2.5924622562800705e-09, "loss": 0.2212, "step": 13841 }, { "epoch": 0.99, "grad_norm": 1.3329465370982787, "learning_rate": 2.55529798567522e-09, "loss": 0.1485, "step": 13842 }, { "epoch": 0.99, "grad_norm": 5.562009305947579, "learning_rate": 2.518401958048755e-09, "loss": 0.4823, "step": 13843 }, { "epoch": 0.99, "grad_norm": 1.376370030540331, "learning_rate": 2.4817741753802027e-09, "loss": 0.2137, "step": 13844 }, { "epoch": 0.99, "grad_norm": 1.4222291116963806, "learning_rate": 2.445414639636323e-09, "loss": 0.2027, "step": 13845 }, { "epoch": 0.99, "grad_norm": 1.384628795485498, "learning_rate": 2.4093233527677785e-09, "loss": 0.1685, "step": 13846 }, { "epoch": 0.99, "grad_norm": 1.1898567976599435, "learning_rate": 2.373500316713018e-09, "loss": 0.1485, "step": 13847 }, { "epoch": 0.99, "grad_norm": 1.512665913106766, "learning_rate": 2.337945533393837e-09, "loss": 0.1757, "step": 13848 }, { "epoch": 0.99, "grad_norm": 1.4958055241781885, "learning_rate": 2.3026590047192652e-09, "loss": 0.1892, "step": 13849 }, { "epoch": 0.99, "grad_norm": 6.383501439427282, "learning_rate": 2.2676407325827875e-09, "loss": 0.6196, "step": 13850 }, { "epoch": 0.99, "grad_norm": 1.138526099515865, "learning_rate": 2.2328907188645664e-09, "loss": 0.111, "step": 13851 }, { "epoch": 0.99, "grad_norm": 6.122043276348766, "learning_rate": 2.198408965429777e-09, "loss": 0.4725, "step": 13852 }, { "epoch": 0.99, "grad_norm": 1.3747909989192764, "learning_rate": 2.1641954741297157e-09, "loss": 0.2071, "step": 13853 }, { "epoch": 0.99, "grad_norm": 1.3359711194513268, "learning_rate": 2.130250246799581e-09, "loss": 0.1808, "step": 13854 }, { "epoch": 0.99, "grad_norm": 1.3684408717022054, "learning_rate": 2.0965732852623598e-09, "loss": 0.1762, "step": 13855 }, { "epoch": 0.99, "grad_norm": 1.3177776065431566, "learning_rate": 2.063164591325495e-09, "loss": 0.1614, "step": 13856 }, { "epoch": 0.99, "grad_norm": 1.2239040329795832, "learning_rate": 2.0300241667819963e-09, "loss": 0.1577, "step": 13857 }, { "epoch": 0.99, "grad_norm": 1.411373262074015, "learning_rate": 1.9971520134109966e-09, "loss": 0.1701, "step": 13858 }, { "epoch": 0.99, "grad_norm": 1.3451912997335598, "learning_rate": 1.9645481329771954e-09, "loss": 0.1549, "step": 13859 }, { "epoch": 0.99, "grad_norm": 1.4628715625094626, "learning_rate": 1.9322125272297488e-09, "loss": 0.2052, "step": 13860 }, { "epoch": 0.99, "grad_norm": 1.3529642814259122, "learning_rate": 1.9001451979056006e-09, "loss": 0.1591, "step": 13861 }, { "epoch": 0.99, "grad_norm": 1.4468314423681135, "learning_rate": 1.868346146724487e-09, "loss": 0.1791, "step": 13862 }, { "epoch": 0.99, "grad_norm": 1.3347701353413681, "learning_rate": 1.8368153753944852e-09, "loss": 0.1665, "step": 13863 }, { "epoch": 0.99, "grad_norm": 1.418342642271985, "learning_rate": 1.8055528856075755e-09, "loss": 0.2077, "step": 13864 }, { "epoch": 0.99, "grad_norm": 1.401456031810564, "learning_rate": 1.77455867904186e-09, "loss": 0.2072, "step": 13865 }, { "epoch": 0.99, "grad_norm": 1.2801068683967731, "learning_rate": 1.743832757360453e-09, "loss": 0.1537, "step": 13866 }, { "epoch": 0.99, "grad_norm": 4.730572935216498, "learning_rate": 1.7133751222137007e-09, "loss": 0.4997, "step": 13867 }, { "epoch": 0.99, "grad_norm": 1.4688276040561234, "learning_rate": 1.6831857752352964e-09, "loss": 0.2097, "step": 13868 }, { "epoch": 0.99, "grad_norm": 1.3153144200890292, "learning_rate": 1.653264718046721e-09, "loss": 0.1587, "step": 13869 }, { "epoch": 0.99, "grad_norm": 1.3668286885821832, "learning_rate": 1.6236119522533566e-09, "loss": 0.1698, "step": 13870 }, { "epoch": 0.99, "grad_norm": 1.2400394192461697, "learning_rate": 1.5942274794467083e-09, "loss": 0.1433, "step": 13871 }, { "epoch": 0.99, "grad_norm": 1.445513603297033, "learning_rate": 1.5651113012044029e-09, "loss": 0.1596, "step": 13872 }, { "epoch": 0.99, "grad_norm": 1.3453218681076602, "learning_rate": 1.5362634190896341e-09, "loss": 0.1506, "step": 13873 }, { "epoch": 0.99, "grad_norm": 1.5851901722524955, "learning_rate": 1.5076838346500533e-09, "loss": 0.1384, "step": 13874 }, { "epoch": 0.99, "grad_norm": 1.438759733122165, "learning_rate": 1.4793725494199884e-09, "loss": 0.1684, "step": 13875 }, { "epoch": 0.99, "grad_norm": 1.4078784053755165, "learning_rate": 1.4513295649193348e-09, "loss": 0.161, "step": 13876 }, { "epoch": 0.99, "grad_norm": 1.1438857995315674, "learning_rate": 1.423554882653e-09, "loss": 0.1333, "step": 13877 }, { "epoch": 0.99, "grad_norm": 1.2479457009002595, "learning_rate": 1.3960485041120131e-09, "loss": 0.1593, "step": 13878 }, { "epoch": 0.99, "grad_norm": 5.014545653290866, "learning_rate": 1.3688104307729711e-09, "loss": 0.4985, "step": 13879 }, { "epoch": 0.99, "grad_norm": 1.2082669911508943, "learning_rate": 1.3418406640969272e-09, "loss": 0.1534, "step": 13880 }, { "epoch": 0.99, "grad_norm": 1.173697454548059, "learning_rate": 1.315139205533278e-09, "loss": 0.1485, "step": 13881 }, { "epoch": 0.99, "grad_norm": 1.2608928752143274, "learning_rate": 1.2887060565136556e-09, "loss": 0.1598, "step": 13882 }, { "epoch": 0.99, "grad_norm": 1.3193664366532913, "learning_rate": 1.2625412184574803e-09, "loss": 0.1723, "step": 13883 }, { "epoch": 0.99, "grad_norm": 1.302599573484244, "learning_rate": 1.2366446927691844e-09, "loss": 0.1784, "step": 13884 }, { "epoch": 0.99, "grad_norm": 1.4277753765424852, "learning_rate": 1.211016480838767e-09, "loss": 0.1668, "step": 13885 }, { "epoch": 0.99, "grad_norm": 1.3813149938073364, "learning_rate": 1.1856565840417944e-09, "loss": 0.1471, "step": 13886 }, { "epoch": 0.99, "grad_norm": 1.3540424568857683, "learning_rate": 1.1605650037394002e-09, "loss": 0.1735, "step": 13887 }, { "epoch": 0.99, "grad_norm": 1.295677276055178, "learning_rate": 1.1357417412788396e-09, "loss": 0.1827, "step": 13888 }, { "epoch": 0.99, "grad_norm": 1.4712042296682597, "learning_rate": 1.1111867979923807e-09, "loss": 0.1677, "step": 13889 }, { "epoch": 0.99, "grad_norm": 1.3207283262469796, "learning_rate": 1.086900175197303e-09, "loss": 0.1943, "step": 13890 }, { "epoch": 0.99, "grad_norm": 1.3408684139390483, "learning_rate": 1.0628818741981184e-09, "loss": 0.1387, "step": 13891 }, { "epoch": 0.99, "grad_norm": 1.378357771708658, "learning_rate": 1.0391318962837959e-09, "loss": 0.2034, "step": 13892 }, { "epoch": 0.99, "grad_norm": 1.3880528326760135, "learning_rate": 1.0156502427288717e-09, "loss": 0.1437, "step": 13893 }, { "epoch": 0.99, "grad_norm": 1.2072290797790888, "learning_rate": 9.924369147945589e-10, "loss": 0.1455, "step": 13894 }, { "epoch": 0.99, "grad_norm": 1.1302197174429163, "learning_rate": 9.694919137254177e-10, "loss": 0.1391, "step": 13895 }, { "epoch": 0.99, "grad_norm": 1.4240799404606188, "learning_rate": 9.468152407543508e-10, "loss": 0.1603, "step": 13896 }, { "epoch": 0.99, "grad_norm": 6.506016209679943, "learning_rate": 9.244068970976072e-10, "loss": 0.4413, "step": 13897 }, { "epoch": 0.99, "grad_norm": 6.126116456405039, "learning_rate": 9.022668839586691e-10, "loss": 0.6294, "step": 13898 }, { "epoch": 0.99, "grad_norm": 4.6041474639039635, "learning_rate": 8.803952025260299e-10, "loss": 0.5389, "step": 13899 }, { "epoch": 0.99, "grad_norm": 1.4050807767214724, "learning_rate": 8.587918539726403e-10, "loss": 0.1506, "step": 13900 }, { "epoch": 0.99, "grad_norm": 1.4972876465146017, "learning_rate": 8.374568394592386e-10, "loss": 0.1827, "step": 13901 }, { "epoch": 0.99, "grad_norm": 1.2036911774184678, "learning_rate": 8.163901601304647e-10, "loss": 0.1414, "step": 13902 }, { "epoch": 0.99, "grad_norm": 1.4557849792409847, "learning_rate": 7.955918171170806e-10, "loss": 0.2152, "step": 13903 }, { "epoch": 0.99, "grad_norm": 1.4505874176139495, "learning_rate": 7.750618115354159e-10, "loss": 0.1862, "step": 13904 }, { "epoch": 0.99, "grad_norm": 1.2189394103247244, "learning_rate": 7.548001444879216e-10, "loss": 0.1643, "step": 13905 }, { "epoch": 0.99, "grad_norm": 1.4786841351789568, "learning_rate": 7.348068170615064e-10, "loss": 0.2165, "step": 13906 }, { "epoch": 0.99, "grad_norm": 1.271113039067953, "learning_rate": 7.150818303297557e-10, "loss": 0.1695, "step": 13907 }, { "epoch": 0.99, "grad_norm": 1.2875075035786803, "learning_rate": 6.956251853512675e-10, "loss": 0.1775, "step": 13908 }, { "epoch": 0.99, "grad_norm": 1.3157150797300077, "learning_rate": 6.764368831707613e-10, "loss": 0.1924, "step": 13909 }, { "epoch": 0.99, "grad_norm": 1.4205952036871892, "learning_rate": 6.57516924817414e-10, "loss": 0.1931, "step": 13910 }, { "epoch": 1.0, "grad_norm": 1.511636514646889, "learning_rate": 6.388653113076348e-10, "loss": 0.164, "step": 13911 }, { "epoch": 1.0, "grad_norm": 1.2997966283205473, "learning_rate": 6.204820436417347e-10, "loss": 0.1731, "step": 13912 }, { "epoch": 1.0, "grad_norm": 1.2816180364063396, "learning_rate": 6.02367122807257e-10, "loss": 0.1468, "step": 13913 }, { "epoch": 1.0, "grad_norm": 1.4587143702967096, "learning_rate": 5.845205497756467e-10, "loss": 0.1793, "step": 13914 }, { "epoch": 1.0, "grad_norm": 1.2829631467376925, "learning_rate": 5.669423255055817e-10, "loss": 0.1791, "step": 13915 }, { "epoch": 1.0, "grad_norm": 1.3280606742170686, "learning_rate": 5.496324509407513e-10, "loss": 0.129, "step": 13916 }, { "epoch": 1.0, "grad_norm": 1.4183881083256866, "learning_rate": 5.325909270093022e-10, "loss": 0.1633, "step": 13917 }, { "epoch": 1.0, "grad_norm": 5.766115601224815, "learning_rate": 5.158177546271681e-10, "loss": 0.4822, "step": 13918 }, { "epoch": 1.0, "grad_norm": 1.3647217835434147, "learning_rate": 4.993129346936298e-10, "loss": 0.1768, "step": 13919 }, { "epoch": 1.0, "grad_norm": 7.188324187214796, "learning_rate": 4.830764680946453e-10, "loss": 0.5553, "step": 13920 }, { "epoch": 1.0, "grad_norm": 1.3739583052308526, "learning_rate": 4.6710835570285e-10, "loss": 0.1926, "step": 13921 }, { "epoch": 1.0, "grad_norm": 7.230569548273155, "learning_rate": 4.514085983742256e-10, "loss": 0.5354, "step": 13922 }, { "epoch": 1.0, "grad_norm": 1.346658279551643, "learning_rate": 4.359771969519866e-10, "loss": 0.1643, "step": 13923 }, { "epoch": 1.0, "grad_norm": 1.2420010751890922, "learning_rate": 4.2081415226380427e-10, "loss": 0.1814, "step": 13924 }, { "epoch": 1.0, "grad_norm": 1.4550993678193778, "learning_rate": 4.0591946512458237e-10, "loss": 0.1689, "step": 13925 }, { "epoch": 1.0, "grad_norm": 1.330650214688605, "learning_rate": 3.9129313633312626e-10, "loss": 0.1792, "step": 13926 }, { "epoch": 1.0, "grad_norm": 1.4145277310451065, "learning_rate": 3.769351666743637e-10, "loss": 0.1701, "step": 13927 }, { "epoch": 1.0, "grad_norm": 1.4874394205383876, "learning_rate": 3.6284555691989964e-10, "loss": 0.1481, "step": 13928 }, { "epoch": 1.0, "grad_norm": 1.4165259524333382, "learning_rate": 3.4902430782468577e-10, "loss": 0.1655, "step": 13929 }, { "epoch": 1.0, "grad_norm": 7.936843726406026, "learning_rate": 3.354714201314613e-10, "loss": 0.6275, "step": 13930 }, { "epoch": 1.0, "grad_norm": 1.283585215521246, "learning_rate": 3.2218689456797734e-10, "loss": 0.1462, "step": 13931 }, { "epoch": 1.0, "grad_norm": 5.30010841347557, "learning_rate": 3.091707318464421e-10, "loss": 0.6043, "step": 13932 }, { "epoch": 1.0, "grad_norm": 1.3424364324041096, "learning_rate": 2.9642293266574087e-10, "loss": 0.1836, "step": 13933 }, { "epoch": 1.0, "grad_norm": 1.348948289457645, "learning_rate": 2.8394349771088124e-10, "loss": 0.1765, "step": 13934 }, { "epoch": 1.0, "grad_norm": 1.221247548853169, "learning_rate": 2.717324276507727e-10, "loss": 0.1421, "step": 13935 }, { "epoch": 1.0, "grad_norm": 1.283334635554951, "learning_rate": 2.597897231410018e-10, "loss": 0.1518, "step": 13936 }, { "epoch": 1.0, "grad_norm": 1.457703061820408, "learning_rate": 2.4811538482272246e-10, "loss": 0.1922, "step": 13937 }, { "epoch": 1.0, "grad_norm": 1.5056527336136298, "learning_rate": 2.3670941332321063e-10, "loss": 0.1894, "step": 13938 }, { "epoch": 1.0, "grad_norm": 1.309650636904002, "learning_rate": 2.2557180925364408e-10, "loss": 0.1535, "step": 13939 }, { "epoch": 1.0, "grad_norm": 1.635154066566935, "learning_rate": 2.1470257321298815e-10, "loss": 0.2301, "step": 13940 }, { "epoch": 1.0, "grad_norm": 1.2666979728499836, "learning_rate": 2.041017057829997e-10, "loss": 0.1621, "step": 13941 }, { "epoch": 1.0, "grad_norm": 1.3344421000365319, "learning_rate": 1.9376920753433338e-10, "loss": 0.147, "step": 13942 }, { "epoch": 1.0, "grad_norm": 1.1543823983652604, "learning_rate": 1.8370507902099045e-10, "loss": 0.1352, "step": 13943 }, { "epoch": 1.0, "grad_norm": 6.445876798883753, "learning_rate": 1.739093207830944e-10, "loss": 0.5656, "step": 13944 }, { "epoch": 1.0, "grad_norm": 1.3932824054848654, "learning_rate": 1.643819333463359e-10, "loss": 0.1458, "step": 13945 }, { "epoch": 1.0, "grad_norm": 1.44860105327731, "learning_rate": 1.5512291722252772e-10, "loss": 0.1824, "step": 13946 }, { "epoch": 1.0, "grad_norm": 1.431555380354713, "learning_rate": 1.4613227290793953e-10, "loss": 0.1711, "step": 13947 }, { "epoch": 1.0, "grad_norm": 1.2979265552282082, "learning_rate": 1.374100008860735e-10, "loss": 0.146, "step": 13948 }, { "epoch": 1.0, "grad_norm": 1.2939053540419243, "learning_rate": 1.2895610162433347e-10, "loss": 0.1474, "step": 13949 }, { "epoch": 1.0, "grad_norm": 1.3326453605207067, "learning_rate": 1.207705755768007e-10, "loss": 0.1396, "step": 13950 }, { "epoch": 1.0, "grad_norm": 1.2714279412649456, "learning_rate": 1.1285342318312354e-10, "loss": 0.1828, "step": 13951 }, { "epoch": 1.0, "grad_norm": 1.5078356970200613, "learning_rate": 1.0520464486796222e-10, "loss": 0.1994, "step": 13952 }, { "epoch": 1.0, "grad_norm": 1.3478369737524984, "learning_rate": 9.782424104154419e-11, "loss": 0.1361, "step": 13953 }, { "epoch": 1.0, "grad_norm": 1.2866285511869955, "learning_rate": 9.071221210021908e-11, "loss": 0.1673, "step": 13954 }, { "epoch": 1.0, "grad_norm": 1.3372848353363207, "learning_rate": 8.386855842645869e-11, "loss": 0.1735, "step": 13955 }, { "epoch": 1.0, "grad_norm": 1.1863449345978516, "learning_rate": 7.729328038663663e-11, "loss": 0.1565, "step": 13956 }, { "epoch": 1.0, "grad_norm": 1.238621027931929, "learning_rate": 7.098637833435895e-11, "loss": 0.1389, "step": 13957 }, { "epoch": 1.0, "grad_norm": 1.3421719137941692, "learning_rate": 6.494785260768854e-11, "loss": 0.2053, "step": 13958 }, { "epoch": 1.0, "grad_norm": 1.2767878594519209, "learning_rate": 5.917770353081053e-11, "loss": 0.1573, "step": 13959 }, { "epoch": 1.0, "grad_norm": 1.514873896403063, "learning_rate": 5.3675931413477156e-11, "loss": 0.2187, "step": 13960 }, { "epoch": 1.0, "grad_norm": 1.250040612724496, "learning_rate": 4.8442536551562837e-11, "loss": 0.1793, "step": 13961 }, { "epoch": 1.0, "grad_norm": 1.5591863753386381, "learning_rate": 4.347751922539889e-11, "loss": 0.1735, "step": 13962 }, { "epoch": 1.0, "grad_norm": 1.5153531766594353, "learning_rate": 3.878087970143884e-11, "loss": 0.1795, "step": 13963 }, { "epoch": 1.0, "grad_norm": 1.4087817224091694, "learning_rate": 3.435261823170333e-11, "loss": 0.1428, "step": 13964 }, { "epoch": 1.0, "grad_norm": 4.547560325445944, "learning_rate": 3.019273505433518e-11, "loss": 0.4143, "step": 13965 }, { "epoch": 1.0, "grad_norm": 1.2956547619507537, "learning_rate": 2.6301230392489218e-11, "loss": 0.1594, "step": 13966 }, { "epoch": 1.0, "grad_norm": 1.438946622684075, "learning_rate": 2.2678104454887383e-11, "loss": 0.1757, "step": 13967 }, { "epoch": 1.0, "grad_norm": 1.35647642891667, "learning_rate": 1.93233574358187e-11, "loss": 0.1653, "step": 13968 }, { "epoch": 1.0, "grad_norm": 1.2487794628871496, "learning_rate": 1.6236989515694412e-11, "loss": 0.1473, "step": 13969 }, { "epoch": 1.0, "grad_norm": 1.314917513225307, "learning_rate": 1.341900086049286e-11, "loss": 0.1733, "step": 13970 }, { "epoch": 1.0, "grad_norm": 1.3471196497156284, "learning_rate": 1.0869391620649262e-11, "loss": 0.1766, "step": 13971 }, { "epoch": 1.0, "grad_norm": 1.234502324479324, "learning_rate": 8.588161933831273e-12, "loss": 0.1506, "step": 13972 }, { "epoch": 1.0, "grad_norm": 1.2963158923361942, "learning_rate": 6.5753119216083184e-12, "loss": 0.1415, "step": 13973 }, { "epoch": 1.0, "grad_norm": 1.3816031459170275, "learning_rate": 4.830841692782251e-12, "loss": 0.1777, "step": 13974 }, { "epoch": 1.0, "grad_norm": 1.5194552022525774, "learning_rate": 3.3547513406118058e-12, "loss": 0.1604, "step": 13975 }, { "epoch": 1.0, "grad_norm": 1.2988785591839587, "learning_rate": 2.1470409444779295e-12, "loss": 0.1909, "step": 13976 }, { "epoch": 1.0, "grad_norm": 1.3785665351758758, "learning_rate": 1.207710568773557e-12, "loss": 0.1628, "step": 13977 }, { "epoch": 1.0, "grad_norm": 1.2609014213578345, "learning_rate": 5.367602651240589e-13, "loss": 0.1749, "step": 13978 }, { "epoch": 1.0, "grad_norm": 1.3459893667371516, "learning_rate": 1.3419006794634925e-13, "loss": 0.1227, "step": 13979 }, { "epoch": 1.0, "grad_norm": 3.5220905301420165, "learning_rate": 0.0, "loss": 0.3333, "step": 13980 }, { "epoch": 1.0, "step": 13980, "total_flos": 6714736945350656.0, "train_loss": 0.23951792627871463, "train_runtime": 45097.9201, "train_samples_per_second": 4.96, "train_steps_per_second": 0.31 } ], "logging_steps": 1.0, "max_steps": 13980, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1000, "total_flos": 6714736945350656.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }